From dd58ef019b700900793a1eb48b52123db01b654e Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 11:46:15 +0000
Subject: [PATCH 01/10] Vendor import of llvm trunk r256633:
 https://llvm.org/svn/llvm-project/llvm/trunk@256633

---
 .clang-tidy                                   |    14 +-
 CMakeLists.txt                                |   110 +-
 CODE_OWNERS.TXT                               |    32 +-
 CREDITS.TXT                                   |    51 -
 Makefile.config.in                            |     4 +-
 Makefile.rules                                |     7 +-
 README.txt                                    |     2 +-
 autoconf/configure.ac                         |    81 +-
 autoconf/m4/rand48.m4                         |    12 -
 bindings/go/llvm/DIBuilderBindings.cpp        |    32 +-
 bindings/go/llvm/DIBuilderBindings.h          |    14 +-
 bindings/go/llvm/IRBindings.cpp               |     4 +
 bindings/go/llvm/IRBindings.h                 |     2 +
 bindings/go/llvm/analysis.go                  |     1 +
 bindings/go/llvm/bitreader.go                 |     3 +-
 bindings/go/llvm/dibuilder.go                 |    45 +-
 bindings/go/llvm/executionengine.go           |     1 +
 bindings/go/llvm/ir.go                        |    10 +-
 bindings/go/llvm/linker.go                    |     7 +-
 bindings/go/llvm/target.go                    |     1 +
 bindings/ocaml/Makefile.ocaml                 |     2 +
 bindings/ocaml/bitreader/bitreader_ocaml.c    |    10 +-
 bindings/ocaml/linker/linker_ocaml.c          |     6 +-
 bindings/ocaml/linker/llvm_linker.ml          |     4 +-
 bindings/ocaml/linker/llvm_linker.mli         |     6 +-
 bindings/ocaml/llvm/llvm.ml                   |     2 +
 bindings/ocaml/llvm/llvm.mli                  |    10 +
 bindings/ocaml/llvm/llvm_ocaml.c              |    11 +
 bindings/python/llvm/bit_reader.py            |     9 +-
 bindings/python/llvm/core.py                  |     4 -
 cmake/config-ix.cmake                         |    13 +-
 cmake/dummy.cpp                               |     1 +
 cmake/modules/AddLLVM.cmake                   |   432 +-
 cmake/modules/AddLLVMDefinitions.cmake        |     6 +-
 cmake/modules/CrossCompile.cmake              |     2 +
 cmake/modules/DetermineGCCCompatible.cmake    |    11 +
 cmake/modules/HandleLLVMOptions.cmake         |   125 +-
 cmake/modules/HandleLLVMStdlib.cmake          |    10 +-
 cmake/modules/LLVM-Config.cmake               |    18 +-
 cmake/modules/LLVMConfig.cmake.in             |     5 +-
 cmake/modules/LLVMExternalProjectUtils.cmake  |   195 +
 cmake/modules/LLVMInstallSymlink.cmake        |    21 +
 cmake/modules/Makefile                        |    19 +-
 cmake/modules/TableGen.cmake                  |    24 +-
 configure                                     |   524 +-
 docs/AliasAnalysis.rst                        |     9 +-
 docs/Atomics.rst                              |     2 +-
 docs/BitCodeFormat.rst                        |     5 +-
 docs/BitSets.rst                              |    65 +-
 docs/BranchWeightMetadata.rst                 |     6 +-
 docs/BuildingLLVMWithAutotools.rst            |     6 +
 docs/CMake.rst                                |   214 +-
 docs/CMakeLists.txt                           |     4 +-
 docs/CodeGenerator.rst                        |     2 +-
 docs/CodingStandards.rst                      |     4 +-
 docs/CommandGuide/index.rst                   |     1 +
 docs/CommandGuide/lit.rst                     |     5 +
 docs/CommandGuide/llc.rst                     |     6 +
 docs/CommandGuide/lli.rst                     |   168 +-
 docs/CommandGuide/llvm-lib.rst                |    31 +
 docs/CommandGuide/llvm-profdata.rst           |    62 +-
 docs/CommandGuide/llvm-symbolizer.rst         |    16 +
 docs/CommandLine.rst                          |     1 +
 docs/CompileCudaWithLLVM.rst                  |   169 +
 docs/CompilerWriterInfo.rst                   |     6 +-
 docs/CoverageMappingFormat.rst                |     2 +-
 docs/DeveloperPolicy.rst                      |    40 +-
 docs/ExceptionHandling.rst                    |   436 +-
 docs/ExtendingLLVM.rst                        |     6 +-
 docs/Frontend/PerformanceTips.rst             |   227 +-
 docs/GettingStarted.rst                       |    88 +-
 docs/HowToBuildOnARM.rst                      |    64 +-
 docs/HowToReleaseLLVM.rst                     |    57 +-
 docs/LangRef.rst                              |  1202 +-
 docs/LibFuzzer.rst                            |   144 +-
 docs/MIRLangRef.rst                           |   495 +
 docs/Phabricator.rst                          |    26 +-
 docs/ProgrammersManual.rst                    |    28 +-
 docs/README.txt                               |     2 +-
 docs/ReleaseNotes.rst                         |   491 +-
 docs/ReleaseProcess.rst                       |     2 +-
 docs/SourceLevelDebugging.rst                 |    34 +-
 docs/StackMaps.rst                            |    10 +
 docs/Statepoints.rst                          |   115 +-
 docs/TestingGuide.rst                         |     4 +
 docs/WritingAnLLVMPass.rst                    |    30 +-
 docs/_ocamldoc/style.css                      |    97 +
 docs/conf.py                                  |     4 +-
 docs/doxygen.cfg.in                           |     2 +-
 docs/index.rst                                |    14 +
 docs/tutorial/LangImpl1.rst                   |    25 +-
 docs/tutorial/LangImpl2.rst                   |   185 +-
 docs/tutorial/LangImpl3.rst                   |   277 +-
 docs/tutorial/LangImpl4.rst                   |   356 +-
 docs/tutorial/LangImpl5.rst                   |   201 +-
 docs/tutorial/LangImpl6.rst                   |   120 +-
 docs/tutorial/LangImpl7.rst                   |   119 +-
 docs/tutorial/LangImpl8.rst                   |    43 +-
 docs/tutorial/LangImpl9.rst                   |     2 +-
 docs/tutorial/OCamlLangImpl1.rst              |     4 +-
 docs/tutorial/OCamlLangImpl2.rst              |     6 +-
 docs/tutorial/OCamlLangImpl3.rst              |    24 +-
 docs/tutorial/OCamlLangImpl4.rst              |     2 +-
 docs/tutorial/OCamlLangImpl5.rst              |     2 +-
 docs/tutorial/OCamlLangImpl6.rst              |     4 +-
 docs/tutorial/OCamlLangImpl7.rst              |     8 +-
 docs/tutorial/OCamlLangImpl8.rst              |     2 +-
 docs/yaml2obj.rst                             |     1 +
 examples/BrainF/BrainF.cpp                    |    10 +-
 examples/BrainF/BrainFDriver.cpp              |     4 +-
 examples/CMakeLists.txt                       |     2 +-
 examples/ExceptionDemo/CMakeLists.txt         |     8 +-
 examples/ExceptionDemo/ExceptionDemo.cpp      |    21 +-
 examples/Fibonacci/fibonacci.cpp              |     7 +-
 examples/HowToUseJIT/HowToUseJIT.cpp          |     7 +-
 examples/Kaleidoscope/Chapter2/CMakeLists.txt |     6 +
 examples/Kaleidoscope/Chapter2/Makefile       |     2 +
 examples/Kaleidoscope/Chapter2/toy.cpp        |   282 +-
 examples/Kaleidoscope/Chapter3/toy.cpp        |   460 +-
 examples/Kaleidoscope/Chapter4/CMakeLists.txt |     5 +-
 examples/Kaleidoscope/Chapter4/toy.cpp        |   620 +-
 examples/Kaleidoscope/Chapter5/CMakeLists.txt |     4 +-
 examples/Kaleidoscope/Chapter5/toy.cpp        |   561 +-
 examples/Kaleidoscope/Chapter6/CMakeLists.txt |     4 +-
 examples/Kaleidoscope/Chapter6/toy.cpp        |   608 +-
 examples/Kaleidoscope/Chapter7/CMakeLists.txt |     5 +-
 examples/Kaleidoscope/Chapter7/toy.cpp        |   704 +-
 examples/Kaleidoscope/Chapter8/CMakeLists.txt |     5 +-
 examples/Kaleidoscope/Chapter8/toy.cpp        |   776 +-
 examples/Kaleidoscope/Orc/fully_lazy/toy.cpp  |   248 +-
 examples/Kaleidoscope/Orc/initial/toy.cpp     |   239 +-
 .../Kaleidoscope/Orc/lazy_codegen/toy.cpp     |   239 +-
 examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp  |   239 +-
 .../Kaleidoscope/include/KaleidoscopeJIT.h    |   114 +
 examples/ParallelJIT/ParallelJIT.cpp          |    19 +-
 include/llvm-c/Analysis.h                     |     2 +-
 include/llvm-c/BitReader.h                    |    45 +-
 include/llvm-c/BitWriter.h                    |     2 +-
 include/llvm-c/Core.h                         |   135 +-
 include/llvm-c/ErrorHandling.h                |    51 +
 include/llvm-c/ExecutionEngine.h              |    26 +-
 include/llvm-c/IRReader.h                     |     2 +-
 include/llvm-c/Initialization.h               |     2 +-
 include/llvm-c/Linker.h                       |    24 +-
 include/llvm-c/Object.h                       |     2 +-
 include/llvm-c/OrcBindings.h                  |   134 +
 include/llvm-c/Support.h                      |    20 +-
 include/llvm-c/Target.h                       |     2 +-
 include/llvm-c/TargetMachine.h                |     4 +-
 include/llvm-c/Transforms/IPO.h               |     2 +-
 .../llvm-c/Transforms/PassManagerBuilder.h    |     2 +-
 include/llvm-c/Transforms/Scalar.h            |     2 +-
 include/llvm-c/Transforms/Vectorize.h         |     3 +-
 include/llvm-c/Types.h                        |   124 +
 include/llvm-c/lto.h                          |     4 +-
 include/llvm/ADT/APFloat.h                    |     8 +-
 include/llvm/ADT/APInt.h                      |     7 +-
 include/llvm/ADT/APSInt.h                     |     6 +-
 include/llvm/ADT/ArrayRef.h                   |    21 +-
 include/llvm/ADT/BitVector.h                  |    10 +-
 include/llvm/ADT/DeltaAlgorithm.h             |     2 +-
 include/llvm/ADT/DenseMap.h                   |    26 +-
 include/llvm/ADT/DenseMapInfo.h               |    28 +-
 include/llvm/ADT/DenseSet.h                   |     5 +-
 include/llvm/ADT/DepthFirstIterator.h         |    22 +-
 include/llvm/ADT/FoldingSet.h                 |    37 +-
 include/llvm/ADT/ImmutableList.h              |    12 +-
 include/llvm/ADT/ImmutableMap.h               |   101 +-
 include/llvm/ADT/IntrusiveRefCntPtr.h         |     4 +-
 include/llvm/ADT/Optional.h                   |    19 +
 include/llvm/ADT/PackedVector.h               |    28 +-
 include/llvm/ADT/PointerIntPair.h             |   178 +-
 include/llvm/ADT/PointerUnion.h               |   870 +-
 include/llvm/ADT/PostOrderIterator.h          |     8 +-
 include/llvm/ADT/STLExtras.h                  |    52 +-
 include/llvm/ADT/ScopedHashTable.h            |    40 +-
 include/llvm/ADT/SetOperations.h              |     2 +-
 include/llvm/ADT/SetVector.h                  |    30 +-
 include/llvm/ADT/SmallBitVector.h             |    23 +-
 include/llvm/ADT/SmallPtrSet.h                |    13 +-
 include/llvm/ADT/SmallSet.h                   |     4 +-
 include/llvm/ADT/SmallVector.h                |     7 +
 include/llvm/ADT/SparseBitVector.h            |    48 +-
 include/llvm/ADT/Statistic.h                  |     5 +
 include/llvm/ADT/StringMap.h                  |    22 +-
 include/llvm/ADT/StringRef.h                  |    37 +-
 include/llvm/ADT/StringSet.h                  |     5 +
 include/llvm/ADT/StringSwitch.h               |    50 +-
 include/llvm/ADT/TinyPtrVector.h              |     3 +-
 include/llvm/ADT/Triple.h                     |    97 +-
 include/llvm/ADT/UniqueVector.h               |     1 +
 include/llvm/ADT/ilist.h                      |   152 +-
 include/llvm/ADT/ilist_node.h                 |    89 +-
 include/llvm/ADT/iterator_range.h             |    12 +
 include/llvm/Analysis/AliasAnalysis.h         |  1147 +-
 include/llvm/Analysis/AliasSetTracker.h       |    15 +-
 include/llvm/Analysis/AssumptionCache.h       |     6 +-
 include/llvm/Analysis/BasicAliasAnalysis.h    |   223 +
 include/llvm/Analysis/BlockFrequencyInfo.h    |    42 +-
 .../llvm/Analysis/BlockFrequencyInfoImpl.h    |    88 +-
 include/llvm/Analysis/BranchProbabilityInfo.h |    91 +-
 include/llvm/Analysis/CFG.h                   |     2 +-
 include/llvm/Analysis/CFLAliasAnalysis.h      |   158 +
 include/llvm/Analysis/CGSCCPassManager.h      |     2 +-
 include/llvm/Analysis/CallGraph.h             |    33 +-
 include/llvm/Analysis/CallGraphSCCPass.h      |    17 +-
 include/llvm/Analysis/CaptureTracking.h       |     7 +-
 include/llvm/Analysis/DOTGraphTraitsPass.h    |    34 +-
 include/llvm/Analysis/DemandedBits.h          |    75 +
 include/llvm/Analysis/DependenceAnalysis.h    |    61 +-
 include/llvm/Analysis/DivergenceAnalysis.h    |    48 +
 include/llvm/Analysis/EHPersonalities.h       |    94 +
 include/llvm/Analysis/GlobalsModRef.h         |   160 +
 include/llvm/Analysis/IVUsers.h               |     2 +-
 include/llvm/Analysis/InlineCost.h            |    63 +-
 include/llvm/Analysis/InstructionSimplify.h   |     2 +-
 .../llvm/Analysis/IteratedDominanceFrontier.h |     6 +-
 include/llvm/Analysis/LazyCallGraph.h         |   113 +-
 include/llvm/Analysis/LazyValueInfo.h         |    17 +-
 include/llvm/Analysis/LibCallAliasAnalysis.h  |    71 -
 include/llvm/Analysis/LibCallSemantics.h      |   225 -
 include/llvm/Analysis/Loads.h                 |    13 +-
 include/llvm/Analysis/LoopAccessAnalysis.h    |   226 +-
 include/llvm/Analysis/LoopInfo.h              |   111 +-
 include/llvm/Analysis/LoopInfoImpl.h          |     8 +-
 include/llvm/Analysis/LoopPass.h              |    19 +-
 include/llvm/Analysis/MemoryBuiltins.h        |     5 -
 .../llvm/Analysis/MemoryDependenceAnalysis.h  |    39 +-
 include/llvm/Analysis/ObjCARCAliasAnalysis.h  |   102 +
 include/llvm/Analysis/ObjCARCAnalysisUtils.h  |   287 +
 .../llvm/Analysis/ObjCARCInstKind.h           |     8 +-
 include/llvm/Analysis/OrderedBasicBlock.h     |    66 +
 include/llvm/Analysis/PHITransAddr.h          |    23 +-
 include/llvm/Analysis/Passes.h                |    74 +-
 include/llvm/Analysis/RegionInfo.h            |    57 +-
 include/llvm/Analysis/RegionInfoImpl.h        |    68 +-
 include/llvm/Analysis/RegionPrinter.h         |    45 +
 include/llvm/Analysis/ScalarEvolution.h       |  1005 +-
 .../Analysis/ScalarEvolutionAliasAnalysis.h   |    79 +
 .../llvm/Analysis/ScalarEvolutionExpander.h   |    36 +-
 .../Analysis/ScalarEvolutionExpressions.h     |   227 +-
 include/llvm/Analysis/ScopedNoAliasAA.h       |    92 +
 include/llvm/Analysis/SparsePropagation.h     |    93 +-
 include/llvm/Analysis/TargetLibraryInfo.def   |    90 +
 include/llvm/Analysis/TargetLibraryInfo.h     |     8 +-
 include/llvm/Analysis/TargetTransformInfo.h   |   376 +-
 .../llvm/Analysis/TargetTransformInfoImpl.h   |   101 +-
 .../llvm/Analysis/TypeBasedAliasAnalysis.h    |    93 +
 include/llvm/Analysis/ValueTracking.h         |   180 +-
 include/llvm/Analysis/VectorUtils.h           |    52 +-
 include/llvm/AsmParser/Parser.h               |    12 +
 include/llvm/AsmParser/SlotMapping.h          |    12 +-
 include/llvm/Bitcode/BitcodeWriterPass.h      |    16 +-
 include/llvm/Bitcode/BitstreamReader.h        |     2 +
 include/llvm/Bitcode/BitstreamWriter.h        |   125 +-
 include/llvm/Bitcode/LLVMBitCodes.h           |   122 +-
 include/llvm/Bitcode/ReaderWriter.h           |    64 +-
 include/llvm/CodeGen/Analysis.h               |     8 +-
 include/llvm/CodeGen/AsmPrinter.h             |    33 +-
 include/llvm/CodeGen/AtomicExpandUtils.h      |    57 +
 include/llvm/CodeGen/BasicTTIImpl.h           |    77 +-
 include/llvm/CodeGen/CalcSpillWeights.h       |     7 +-
 include/llvm/CodeGen/CallingConvLower.h       |    19 +-
 include/llvm/CodeGen/CommandFlags.h           |    36 +
 include/llvm/CodeGen/DFAPacketizer.h          |    70 +-
 include/llvm/CodeGen/DIE.h                    |   126 +-
 include/llvm/CodeGen/FastISel.h               |    28 +-
 include/llvm/CodeGen/FunctionLoweringInfo.h   |    25 +-
 include/llvm/CodeGen/GCMetadata.h             |     4 +-
 include/llvm/CodeGen/GCStrategy.h             |     4 +-
 include/llvm/CodeGen/ISDOpcodes.h             |    49 +-
 include/llvm/CodeGen/IntrinsicLowering.h      |    59 +-
 include/llvm/CodeGen/LiveInterval.h           |    30 +-
 include/llvm/CodeGen/LiveIntervalAnalysis.h   |    28 +-
 include/llvm/CodeGen/LivePhysRegs.h           |     8 +-
 include/llvm/CodeGen/LiveRangeEdit.h          |     2 +-
 include/llvm/CodeGen/LiveRegMatrix.h          |     2 -
 include/llvm/CodeGen/LiveStackAnalysis.h      |   110 +-
 include/llvm/CodeGen/MIRParser/MIRParser.h    |     6 +-
 include/llvm/CodeGen/MIRYamlMapping.h         |   202 +-
 include/llvm/CodeGen/MachineBasicBlock.h      |   460 +-
 .../CodeGen/MachineBranchProbabilityInfo.h    |    31 +-
 include/llvm/CodeGen/MachineCombinerPattern.h |    31 +-
 include/llvm/CodeGen/MachineConstantPool.h    |    47 +-
 include/llvm/CodeGen/MachineDominators.h      |    32 +-
 include/llvm/CodeGen/MachineFrameInfo.h       |    46 +-
 include/llvm/CodeGen/MachineFunction.h        |    61 +-
 include/llvm/CodeGen/MachineInstr.h           |    92 +-
 include/llvm/CodeGen/MachineInstrBuilder.h    |    85 +-
 include/llvm/CodeGen/MachineInstrBundle.h     |    53 +-
 include/llvm/CodeGen/MachineMemOperand.h      |    12 +-
 include/llvm/CodeGen/MachineModuleInfo.h      |    42 +-
 include/llvm/CodeGen/MachineModuleInfoImpls.h |   120 +-
 include/llvm/CodeGen/MachineRegisterInfo.h    |   118 +-
 include/llvm/CodeGen/MachineScheduler.h       |    30 +-
 include/llvm/CodeGen/MachineValueType.h       |   248 +-
 include/llvm/CodeGen/ParallelCG.h             |    43 +
 include/llvm/CodeGen/Passes.h                 |    14 +-
 include/llvm/CodeGen/PseudoSourceValue.h      |   229 +-
 include/llvm/CodeGen/RegAllocPBQP.h           |     2 +-
 include/llvm/CodeGen/RegAllocRegistry.h       |     7 +-
 include/llvm/CodeGen/RegisterPressure.h       |    89 +-
 include/llvm/CodeGen/RegisterScavenging.h     |    19 +-
 include/llvm/CodeGen/RuntimeLibcalls.h        |     8 -
 include/llvm/CodeGen/ScheduleDAG.h            |    53 +-
 include/llvm/CodeGen/ScheduleDAGInstrs.h      |    56 +-
 include/llvm/CodeGen/SchedulerRegistry.h      |     6 -
 include/llvm/CodeGen/SelectionDAG.h           |    39 +-
 include/llvm/CodeGen/SelectionDAGNodes.h      |   210 +-
 include/llvm/CodeGen/SlotIndexes.h            |    93 +-
 include/llvm/CodeGen/StackMaps.h              |     1 +
 .../CodeGen/TargetLoweringObjectFileImpl.h    |    12 +-
 include/llvm/CodeGen/TargetSchedule.h         |     6 +
 include/llvm/CodeGen/ValueTypes.h             |    20 +
 include/llvm/CodeGen/ValueTypes.td            |   107 +-
 include/llvm/CodeGen/WinEHFuncInfo.h          |   177 +-
 include/llvm/Config/config.h.cmake            |     9 +-
 include/llvm/Config/config.h.in               |    18 +-
 include/llvm/DebugInfo/CodeView/CodeView.h    |   367 +
 .../llvm/DebugInfo/CodeView/CodeViewOStream.h |    39 +
 .../CodeView/FieldListRecordBuilder.h         |    78 +
 include/llvm/DebugInfo/CodeView/FunctionId.h  |    56 +
 include/llvm/DebugInfo/CodeView/Line.h        |   124 +
 .../DebugInfo/CodeView/ListRecordBuilder.h    |    43 +
 .../CodeView/MemoryTypeTableBuilder.h         |    68 +
 .../CodeView/MethodListRecordBuilder.h        |    35 +
 include/llvm/DebugInfo/CodeView/TypeIndex.h   |   176 +
 include/llvm/DebugInfo/CodeView/TypeRecord.h  |   270 +
 .../DebugInfo/CodeView/TypeRecordBuilder.h    |    57 +
 .../DebugInfo/CodeView/TypeSymbolEmitter.h    |    37 +
 .../DebugInfo/CodeView/TypeTableBuilder.h     |    60 +
 include/llvm/DebugInfo/DIContext.h            |    33 +-
 .../llvm/DebugInfo/DWARF/DWARFCompileUnit.h   |     9 +-
 include/llvm/DebugInfo/DWARF/DWARFContext.h   |    21 +-
 include/llvm/DebugInfo/DWARF/DWARFDebugLine.h |     3 +-
 .../llvm/DebugInfo/DWARF/DWARFDebugMacro.h    |    59 +
 include/llvm/DebugInfo/DWARF/DWARFFormValue.h |     3 +
 include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h  |    10 +-
 include/llvm/DebugInfo/DWARF/DWARFUnit.h      |    49 +-
 include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h |    81 +
 include/llvm/DebugInfo/PDB/PDBContext.h       |     3 +-
 include/llvm/DebugInfo/PDB/PDBTypes.h         |    31 +
 include/llvm/DebugInfo/Symbolize/DIPrinter.h  |    47 +
 .../DebugInfo/Symbolize/SymbolizableModule.h  |    53 +
 include/llvm/DebugInfo/Symbolize/Symbolize.h  |   105 +
 .../llvm/ExecutionEngine/ExecutionEngine.h    |    17 +-
 include/llvm/ExecutionEngine/Interpreter.h    |    12 +-
 .../Orc/CompileOnDemandLayer.h                |   375 +-
 .../llvm/ExecutionEngine/Orc/CompileUtils.h   |     1 -
 .../ExecutionEngine/Orc/GlobalMappingLayer.h  |   108 +
 .../llvm/ExecutionEngine/Orc/IRCompileLayer.h |     2 -
 .../ExecutionEngine/Orc/IndirectionUtils.h    |   326 +-
 .../ExecutionEngine/Orc/LazyEmittingLayer.h   |     8 +-
 .../llvm/ExecutionEngine/Orc/LogicalDylib.h   |    40 +-
 .../ExecutionEngine/Orc/ObjectLinkingLayer.h  |    57 +-
 .../Orc/ObjectTransformLayer.h                |     8 -
 .../ExecutionEngine/Orc/OrcTargetSupport.h    |    90 +-
 include/llvm/ExecutionEngine/RuntimeDyld.h    |    31 +-
 .../ExecutionEngine/SectionMemoryManager.h    |    25 +-
 include/llvm/IR/Argument.h                    |     9 +-
 include/llvm/IR/AssemblyAnnotationWriter.h    |     3 +-
 include/llvm/IR/Attributes.h                  |    89 +-
 include/llvm/IR/Attributes.td                 |   192 +
 include/llvm/IR/BasicBlock.h                  |    44 +-
 include/llvm/IR/CFG.h                         |   150 +-
 include/llvm/IR/CMakeLists.txt                |     5 +-
 include/llvm/IR/CallSite.h                    |   197 +-
 include/llvm/IR/CallingConv.h                 |    27 +-
 include/llvm/IR/Comdat.h                      |     2 +-
 include/llvm/IR/Constant.h                    |    51 +-
 include/llvm/IR/ConstantRange.h               |    15 +-
 include/llvm/IR/Constants.h                   |    30 +-
 include/llvm/IR/DIBuilder.h                   |   107 +-
 include/llvm/IR/DataLayout.h                  |     7 +-
 include/llvm/IR/DebugInfo.h                   |    15 +-
 include/llvm/IR/DebugInfoFlags.def            |     1 +
 include/llvm/IR/DebugInfoMetadata.h           |   564 +-
 include/llvm/IR/DerivedTypes.h                |   100 +-
 include/llvm/IR/DiagnosticInfo.h              |   133 +-
 include/llvm/IR/DiagnosticPrinter.h           |     2 +-
 include/llvm/IR/Dominators.h                  |    49 +-
 include/llvm/IR/Function.h                    |   156 +-
 include/llvm/IR/FunctionInfo.h                |   241 +
 include/llvm/IR/GVMaterializer.h              |    23 +-
 include/llvm/IR/GetElementPtrTypeIterator.h   |     2 +-
 include/llvm/IR/GlobalAlias.h                 |    25 +-
 include/llvm/IR/GlobalObject.h                |     8 +-
 include/llvm/IR/GlobalValue.h                 |    26 +-
 include/llvm/IR/GlobalVariable.h              |    24 +-
 include/llvm/IR/IRBuilder.h                   |   159 +-
 include/llvm/IR/IRPrintingPasses.h            |     6 +
 include/llvm/IR/InlineAsm.h                   |    65 +-
 include/llvm/IR/InstIterator.h                |    29 +-
 include/llvm/IR/InstVisitor.h                 |     6 +
 include/llvm/IR/InstrTypes.h                  |   838 +-
 include/llvm/IR/Instruction.def               |   173 +-
 include/llvm/IR/Instruction.h                 |    88 +-
 include/llvm/IR/Instructions.h                |   948 +-
 include/llvm/IR/IntrinsicInst.h               |    35 +-
 include/llvm/IR/Intrinsics.h                  |     2 +-
 include/llvm/IR/Intrinsics.td                 |    74 +-
 include/llvm/IR/IntrinsicsAArch64.td          |     3 +
 include/llvm/IR/IntrinsicsAMDGPU.td           |    72 +
 include/llvm/IR/IntrinsicsARM.td              |    44 +-
 include/llvm/IR/IntrinsicsHexagon.td          |  4411 ++++-
 include/llvm/IR/IntrinsicsPowerPC.td          |    18 +
 include/llvm/IR/IntrinsicsWebAssembly.td      |     6 +
 include/llvm/IR/IntrinsicsX86.td              |  2508 ++-
 include/llvm/IR/LLVMContext.h                 |    25 +-
 include/llvm/IR/LegacyPassManagers.h          |    75 +-
 include/llvm/IR/MDBuilder.h                   |     3 +
 include/llvm/IR/Mangler.h                     |     2 +-
 include/llvm/IR/Metadata.def                  |    72 +-
 include/llvm/IR/Metadata.h                    |   115 +-
 include/llvm/IR/MetadataTracking.h            |    99 -
 include/llvm/IR/Module.h                      |   117 +-
 include/llvm/IR/ModuleSlotTracker.h           |     8 +
 include/llvm/IR/PassManager.h                 |     3 +-
 include/llvm/IR/PatternMatch.h                |    40 +
 include/llvm/IR/Statepoint.h                  |    29 +-
 include/llvm/IR/SymbolTableListTraits.h       |    64 +-
 include/llvm/IR/TrackingMDRef.h               |     6 +-
 include/llvm/IR/Type.h                        |   115 +-
 include/llvm/IR/TypeFinder.h                  |     2 +-
 include/llvm/IR/Use.h                         |     1 -
 include/llvm/IR/UseListOrder.h                |     4 +-
 include/llvm/IR/User.h                        |    37 +-
 include/llvm/IR/Value.def                     |     3 +-
 include/llvm/IR/Value.h                       |   142 +-
 include/llvm/IR/ValueHandle.h                 |    33 +-
 include/llvm/IR/ValueMap.h                    |     4 +-
 include/llvm/IR/ValueSymbolTable.h            |    44 +-
 include/llvm/IRReader/IRReader.h              |     9 +-
 include/llvm/InitializePasses.h               |    47 +-
 include/llvm/LTO/LTOCodeGenerator.h           |   141 +-
 include/llvm/LTO/LTOModule.h                  |    49 +-
 include/llvm/LibDriver/LibDriver.h            |     2 +-
 include/llvm/LinkAllPasses.h                  |    29 +-
 include/llvm/Linker/IRMover.h                 |    76 +
 include/llvm/Linker/Linker.h                  |    96 +-
 include/llvm/MC/ConstantPools.h               |    10 +-
 include/llvm/MC/MCAsmBackend.h                |     5 +
 include/llvm/MC/MCAsmInfo.h                   |     9 +
 include/llvm/MC/MCAssembler.h                 |   512 +-
 include/llvm/MC/MCContext.h                   |    36 +-
 include/llvm/MC/MCDirectives.h                |     4 +-
 include/llvm/MC/MCDwarf.h                     |    44 +-
 include/llvm/MC/MCELFObjectWriter.h           |     2 -
 include/llvm/MC/MCELFStreamer.h               |    13 +-
 include/llvm/MC/MCExpr.h                      |     6 +-
 include/llvm/MC/MCFixedLenDisassembler.h      |     2 +
 include/llvm/MC/MCFragment.h                  |   506 +
 include/llvm/MC/MCInstrDesc.h                 |    32 +-
 include/llvm/MC/MCInstrItineraries.h          |     2 +-
 include/llvm/MC/MCLinkerOptimizationHint.h    |     2 +-
 include/llvm/MC/MCMachObjectWriter.h          |    22 +-
 include/llvm/MC/MCObjectFileInfo.h            |    36 +-
 include/llvm/MC/MCObjectStreamer.h            |     7 +-
 include/llvm/MC/MCObjectWriter.h              |    38 +-
 include/llvm/MC/MCParser/AsmLexer.h           |     3 +-
 include/llvm/MC/MCParser/MCAsmLexer.h         |    40 +-
 .../llvm/MC/MCParser/MCAsmParserExtension.h   |     3 +
 include/llvm/MC/MCParser/MCParsedAsmOperand.h |    10 +-
 include/llvm/MC/MCRegisterInfo.h              |     8 +-
 include/llvm/MC/MCSchedule.h                  |     5 +-
 include/llvm/MC/MCSection.h                   |    16 +-
 include/llvm/MC/MCSectionCOFF.h               |    89 +-
 include/llvm/MC/MCSectionELF.h                |    27 +-
 include/llvm/MC/MCSectionMachO.h              |    27 +-
 include/llvm/MC/MCStreamer.h                  |    31 +-
 include/llvm/MC/MCSubtargetInfo.h             |    12 +-
 include/llvm/MC/MCSymbol.h                    |   101 +-
 include/llvm/MC/MCTargetAsmParser.h           |    28 +-
 include/llvm/MC/MCTargetOptions.h             |     4 +
 include/llvm/MC/MCTargetOptionsCommandFlags.h |    15 +
 include/llvm/MC/MCValue.h                     |     5 -
 include/llvm/MC/MCWinCOFFStreamer.h           |     2 +-
 include/llvm/MC/MachineLocation.h             |     4 -
 include/llvm/MC/SectionKind.h                 |    59 +-
 include/llvm/MC/StringTableBuilder.h          |    40 +-
 include/llvm/MC/SubtargetFeature.h            |     2 +-
 include/llvm/Object/Archive.h                 |    67 +-
 include/llvm/Object/ArchiveWriter.h           |    13 +-
 include/llvm/Object/Binary.h                  |    10 +-
 include/llvm/Object/COFF.h                    |     4 +-
 include/llvm/Object/COFFImportFile.h          |    74 +
 include/llvm/Object/ELF.h                     |   879 +-
 include/llvm/Object/ELFObjectFile.h           |   171 +-
 include/llvm/Object/ELFTypes.h                |    37 +-
 include/llvm/Object/Error.h                   |     1 +
 include/llvm/Object/FunctionIndexObjectFile.h |   110 +
 include/llvm/Object/MachO.h                   |    26 +-
 include/llvm/Object/ObjectFile.h              |    13 +-
 include/llvm/Object/SymbolicFile.h            |     8 +
 include/llvm/Option/Arg.h                     |     1 +
 include/llvm/Option/ArgList.h                 |     6 +
 include/llvm/Option/OptTable.h                |     8 +-
 include/llvm/Option/Option.h                  |     1 +
 include/llvm/PassAnalysisSupport.h            |    33 +-
 include/llvm/PassInfo.h                       |    36 +-
 include/llvm/PassRegistry.h                   |     1 -
 include/llvm/PassSupport.h                    |     2 +-
 include/llvm/ProfileData/CoverageMapping.h    |     6 +-
 include/llvm/ProfileData/InstrProf.h          |   552 +-
 include/llvm/ProfileData/InstrProfData.inc    |   735 +
 include/llvm/ProfileData/InstrProfReader.h    |   175 +-
 include/llvm/ProfileData/InstrProfWriter.h    |    23 +-
 include/llvm/ProfileData/SampleProf.h         |   303 +-
 include/llvm/ProfileData/SampleProfReader.h   |   245 +-
 include/llvm/ProfileData/SampleProfWriter.h   |   114 +-
 include/llvm/Support/ARMTargetParser.def      |   223 +
 include/llvm/Support/AlignOf.h                |    37 +-
 include/llvm/Support/Allocator.h              |    15 +-
 include/llvm/Support/BlockFrequency.h         |    26 +-
 include/llvm/Support/BranchProbability.h      |   174 +-
 include/llvm/Support/CBindingWrapping.h       |     1 +
 include/llvm/Support/COFF.h                   |     2 +
 include/llvm/Support/CommandLine.h            |    32 +-
 include/llvm/Support/Compiler.h               |    74 +-
 include/llvm/Support/CrashRecoveryContext.h   |    35 +-
 include/llvm/Support/DOTGraphTraits.h         |    11 +-
 include/llvm/Support/Debug.h                  |     2 +-
 include/llvm/Support/Dwarf.def                |    10 +-
 include/llvm/Support/Dwarf.h                  |    54 +-
 include/llvm/Support/ELF.h                    |    69 +-
 include/llvm/Support/ELFRelocs/AVR.def        |    40 +
 include/llvm/Support/ELFRelocs/PowerPC.def    |    62 +
 include/llvm/Support/ELFRelocs/PowerPC64.def  |    93 +
 include/llvm/Support/Endian.h                 |   141 +-
 include/llvm/Support/ErrorHandling.h          |    32 +-
 include/llvm/Support/ErrorOr.h                |    11 +-
 include/llvm/Support/FileOutputBuffer.h       |     6 +-
 include/llvm/Support/FileSystem.h             |    51 +-
 include/llvm/Support/Format.h                 |     5 +-
 include/llvm/Support/GCOV.h                   |    19 +-
 include/llvm/Support/GenericDomTree.h         |    14 +-
 .../llvm/Support/GenericDomTreeConstruction.h |    18 +-
 include/llvm/Support/GraphWriter.h            |    10 +-
 include/llvm/Support/JamCRC.h                 |    48 +
 include/llvm/Support/MachO.h                  |    13 +-
 include/llvm/Support/ManagedStatic.h          |     2 +-
 include/llvm/Support/MathExtras.h             |    96 +-
 include/llvm/Support/Memory.h                 |    33 +-
 include/llvm/Support/MemoryBuffer.h           |     6 +-
 include/llvm/Support/OnDiskHashTable.h        |   216 +-
 include/llvm/Support/Options.h                |     4 +-
 include/llvm/Support/OutputBuffer.h           |   166 -
 include/llvm/Support/Path.h                   |    35 +-
 include/llvm/Support/PointerLikeTypeTraits.h  |    67 +-
 include/llvm/Support/PrettyStackTrace.h       |    12 +
 include/llvm/Support/Printable.h              |    52 +
 include/llvm/Support/Program.h                |     3 +-
 include/llvm/Support/Recycler.h               |    80 +-
 include/llvm/Support/Registry.h               |    14 +-
 include/llvm/Support/SMLoc.h                  |     6 +-
 include/llvm/Support/ScaledNumber.h           |     4 +-
 include/llvm/Support/Signals.h                |     3 +
 include/llvm/Support/StreamingMemoryObject.h  |     4 +-
 include/llvm/Support/StringSaver.h            |    16 +-
 include/llvm/Support/TargetParser.h           |   278 +-
 include/llvm/Support/TargetRegistry.h         |    15 +-
 include/llvm/Support/TargetSelect.h           |    27 +-
 include/llvm/Support/ThreadPool.h             |   136 +
 include/llvm/Support/Threading.h              |     2 +-
 include/llvm/Support/Timer.h                  |    63 +-
 include/llvm/Support/TrailingObjects.h        |   349 +
 include/llvm/Support/UnicodeCharRanges.h      |     5 +
 include/llvm/Support/Valgrind.h               |    39 -
 include/llvm/Support/YAMLParser.h             |     7 +-
 include/llvm/Support/YAMLTraits.h             |    49 +-
 include/llvm/Support/circular_raw_ostream.h   |     4 +-
 include/llvm/Support/raw_ostream.h            |    60 +-
 include/llvm/Support/thread.h                 |    66 +
 include/llvm/Support/type_traits.h            |     9 +
 include/llvm/TableGen/Record.h                |    10 +-
 include/llvm/Target/CostTable.h               |    60 +-
 include/llvm/Target/Target.td                 |    38 +-
 include/llvm/Target/TargetCallingConv.h       |     5 +
 include/llvm/Target/TargetFrameLowering.h     |    39 +-
 include/llvm/Target/TargetInstrInfo.h         |   225 +-
 include/llvm/Target/TargetItinerary.td        |    16 +
 include/llvm/Target/TargetLowering.h          |   304 +-
 .../llvm/Target/TargetLoweringObjectFile.h    |    16 +-
 include/llvm/Target/TargetMachine.h           |    42 +-
 include/llvm/Target/TargetOpcodes.h           |     6 +-
 include/llvm/Target/TargetOptions.h           |    64 +-
 include/llvm/Target/TargetRecip.h             |    14 +-
 include/llvm/Target/TargetRegisterInfo.h      |   439 +-
 include/llvm/Target/TargetSelectionDAG.td     |    61 +-
 include/llvm/Target/TargetSelectionDAGInfo.h  |    45 +-
 include/llvm/Target/TargetSubtargetInfo.h     |     6 +
 include/llvm/Transforms/IPO.h                 |    15 +
 .../llvm/Transforms/IPO/ForceFunctionAttrs.h  |    35 +
 include/llvm/Transforms/IPO/FunctionImport.h  |    43 +
 .../llvm/Transforms/IPO/InferFunctionAttrs.h  |    38 +
 include/llvm/Transforms/IPO/InlinerPass.h     |    13 +-
 include/llvm/Transforms/IPO/LowerBitSets.h    |     7 +-
 .../llvm/Transforms/IPO/PassManagerBuilder.h  |    10 +
 .../llvm/Transforms/IPO/StripDeadPrototypes.h |    34 +
 .../InstCombine/InstCombineWorklist.h         |    12 +-
 include/llvm/Transforms/Instrumentation.h     |    42 +-
 include/llvm/Transforms/Scalar.h              |    18 +-
 include/llvm/Transforms/Scalar/ADCE.h         |    38 +
 include/llvm/Transforms/Scalar/SROA.h         |   129 +
 .../llvm/Transforms/Utils/BasicBlockUtils.h   |    40 +-
 include/llvm/Transforms/Utils/Cloning.h       |    32 +-
 include/llvm/Transforms/Utils/Local.h         |    49 +-
 include/llvm/Transforms/Utils/LoopUtils.h     |   161 +-
 .../llvm/Transforms/Utils/LoopVersioning.h    |    56 +-
 include/llvm/Transforms/Utils/ModuleUtils.h   |     4 +-
 .../llvm/Transforms/Utils/SSAUpdaterImpl.h    |     2 +-
 .../llvm/Transforms/Utils/SimplifyIndVar.h    |    11 +-
 .../llvm/Transforms/Utils/SimplifyLibCalls.h  |     3 +
 include/llvm/Transforms/Utils/SplitModule.h   |    43 +
 include/llvm/Transforms/Utils/UnrollLoop.h    |     9 +-
 include/llvm/Transforms/Utils/ValueMapper.h   |    48 +-
 include/llvm/module.modulemap                 |    12 +-
 lib/Analysis/AliasAnalysis.cpp                |   601 +-
 lib/Analysis/AliasAnalysisCounter.cpp         |   173 -
 lib/Analysis/AliasAnalysisEvaluator.cpp       |    54 +-
 lib/Analysis/AliasDebugger.cpp                |   136 -
 lib/Analysis/AliasSetTracker.cpp              |    44 +-
 lib/Analysis/Analysis.cpp                     |    32 +-
 lib/Analysis/BasicAliasAnalysis.cpp           |  1094 +-
 lib/Analysis/BlockFrequencyInfo.cpp           |    93 +-
 lib/Analysis/BlockFrequencyInfoImpl.cpp       |    12 +-
 lib/Analysis/BranchProbabilityInfo.cpp        |   397 +-
 lib/Analysis/CFG.cpp                          |     8 +-
 lib/Analysis/CFLAliasAnalysis.cpp             |   253 +-
 lib/Analysis/CMakeLists.txt                   |    18 +-
 lib/Analysis/{IPA => }/CallGraph.cpp          |    42 +-
 lib/Analysis/{IPA => }/CallGraphSCCPass.cpp   |     0
 lib/Analysis/{IPA => }/CallPrinter.cpp        |     0
 lib/Analysis/CaptureTracking.cpp              |    98 +-
 lib/Analysis/CodeMetrics.cpp                  |    15 +-
 lib/Analysis/ConstantFolding.cpp              |   136 +-
 lib/Analysis/CostModel.cpp                    |    19 +-
 lib/Analysis/Delinearization.cpp              |    14 +-
 lib/Analysis/DemandedBits.cpp                 |   392 +
 lib/Analysis/DependenceAnalysis.cpp           |   182 +-
 lib/Analysis/DivergenceAnalysis.cpp           |   115 +-
 lib/Analysis/EHPersonalities.cpp              |   106 +
 lib/Analysis/GlobalsModRef.cpp                |  1002 +
 lib/Analysis/IPA/CMakeLists.txt               |    10 -
 lib/Analysis/IPA/GlobalsModRef.cpp            |   609 -
 lib/Analysis/IPA/IPA.cpp                      |    30 -
 lib/Analysis/IVUsers.cpp                      |     8 +-
 lib/Analysis/{IPA => }/InlineCost.cpp         |   102 +-
 lib/Analysis/InstructionSimplify.cpp          |    81 +-
 lib/Analysis/LLVMBuild.txt                    |     3 -
 lib/Analysis/LazyCallGraph.cpp                |     8 +-
 lib/Analysis/LazyValueInfo.cpp                |   341 +-
 lib/Analysis/LibCallAliasAnalysis.cpp         |   141 -
 lib/Analysis/LibCallSemantics.cpp             |    89 -
 lib/Analysis/Lint.cpp                         |   303 +-
 lib/Analysis/Loads.cpp                        |    24 +-
 lib/Analysis/LoopAccessAnalysis.cpp           |   541 +-
 lib/Analysis/LoopInfo.cpp                     |    69 +-
 lib/Analysis/LoopPass.cpp                     |   141 +-
 lib/Analysis/Makefile                         |     1 -
 lib/Analysis/MemDepPrinter.cpp                |     6 +-
 lib/Analysis/MemDerefPrinter.cpp              |    18 +-
 lib/Analysis/MemoryBuiltins.cpp               |    55 +-
 lib/Analysis/MemoryDependenceAnalysis.cpp     |   174 +-
 lib/Analysis/NoAliasAnalysis.cpp              |    95 -
 .../ObjCARCAliasAnalysis.cpp                  |   130 +-
 lib/Analysis/ObjCARCAnalysisUtils.cpp         |    28 +
 .../ObjCARCInstKind.cpp}                      |     8 +-
 lib/Analysis/OrderedBasicBlock.cpp            |    85 +
 lib/Analysis/RegionInfo.cpp                   |     9 +
 lib/Analysis/RegionPrinter.cpp                |   153 +-
 lib/Analysis/ScalarEvolution.cpp              |  3267 +++-
 lib/Analysis/ScalarEvolutionAliasAnalysis.cpp |   178 +-
 lib/Analysis/ScalarEvolutionExpander.cpp      |   361 +-
 lib/Analysis/ScalarEvolutionNormalization.cpp |     2 +-
 lib/Analysis/ScopedNoAliasAA.cpp              |   204 +-
 lib/Analysis/SparsePropagation.cpp            |    14 +-
 lib/Analysis/TargetLibraryInfo.cpp            |    42 +-
 lib/Analysis/TargetTransformInfo.cpp          |   236 +-
 lib/Analysis/TypeBasedAliasAnalysis.cpp       |   638 +-
 lib/Analysis/ValueTracking.cpp                |  1162 +-
 lib/Analysis/VectorUtils.cpp                  |   199 +-
 lib/AsmParser/LLLexer.cpp                     |    21 +-
 lib/AsmParser/LLParser.cpp                    |   732 +-
 lib/AsmParser/LLParser.h                      |    57 +-
 lib/AsmParser/LLToken.h                       |    16 +-
 lib/AsmParser/Parser.cpp                      |    12 +
 lib/Bitcode/Reader/BitReader.cpp              |   105 +-
 lib/Bitcode/Reader/BitcodeReader.cpp          |  1927 +-
 lib/Bitcode/Writer/BitcodeWriter.cpp          |   741 +-
 lib/Bitcode/Writer/BitcodeWriterPass.cpp      |    18 +-
 lib/Bitcode/Writer/ValueEnumerator.cpp        |    96 +-
 lib/Bitcode/Writer/ValueEnumerator.h          |     7 +-
 lib/CodeGen/AggressiveAntiDepBreaker.cpp      |    61 +-
 lib/CodeGen/AllocationOrder.cpp               |     5 +-
 lib/CodeGen/AllocationOrder.h                 |     4 +-
 lib/CodeGen/Analysis.cpp                      |    98 +-
 lib/CodeGen/AsmPrinter/ARMException.cpp       |     5 +-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp         |   459 +-
 lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp    |    42 +-
 lib/CodeGen/AsmPrinter/AsmPrinterHandler.h    |     6 +
 .../AsmPrinter/AsmPrinterInlineAsm.cpp        |    14 +-
 lib/CodeGen/AsmPrinter/ByteStreamer.h         |    13 +-
 lib/CodeGen/AsmPrinter/DIE.cpp                |   124 +-
 lib/CodeGen/AsmPrinter/DIEHash.cpp            |    32 -
 lib/CodeGen/AsmPrinter/DIEHash.h              |     3 -
 lib/CodeGen/AsmPrinter/DebugLocEntry.h        |     3 +-
 lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp    |     2 +-
 lib/CodeGen/AsmPrinter/DwarfCFIException.cpp  |    10 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp   |    59 +-
 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h     |    10 +
 lib/CodeGen/AsmPrinter/DwarfDebug.cpp         |   109 +-
 lib/CodeGen/AsmPrinter/DwarfDebug.h           |    84 +-
 lib/CodeGen/AsmPrinter/DwarfExpression.cpp    |    15 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.cpp          |    98 +-
 lib/CodeGen/AsmPrinter/DwarfUnit.h            |    30 +-
 lib/CodeGen/AsmPrinter/EHStreamer.cpp         |     5 +-
 lib/CodeGen/AsmPrinter/EHStreamer.h           |     8 +-
 lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp    |     2 +-
 lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp     |     2 +-
 .../AsmPrinter/WinCodeViewLineTables.cpp      |    13 +-
 .../AsmPrinter/WinCodeViewLineTables.h        |    10 +-
 lib/CodeGen/AsmPrinter/WinException.cpp       |  1212 +-
 lib/CodeGen/AsmPrinter/WinException.h         |    31 +-
 lib/CodeGen/AtomicExpandPass.cpp              |   348 +-
 lib/CodeGen/BasicTargetTransformInfo.cpp      |     2 +-
 lib/CodeGen/BranchFolding.cpp                 |   284 +-
 lib/CodeGen/BranchFolding.h                   |     1 +
 lib/CodeGen/CMakeLists.txt                    |    10 +
 lib/CodeGen/CalcSpillWeights.cpp              |    39 +-
 lib/CodeGen/CallingConvLower.cpp              |     3 +
 lib/CodeGen/CodeGen.cpp                       |     2 +
 lib/CodeGen/CodeGenPrepare.cpp                |  1325 +-
 lib/CodeGen/CoreCLRGC.cpp                     |     4 +-
 lib/CodeGen/CriticalAntiDepBreaker.cpp        |     7 +-
 lib/CodeGen/DFAPacketizer.cpp                 |    87 +-
 lib/CodeGen/DeadMachineInstructionElim.cpp    |    20 +-
 lib/CodeGen/DwarfEHPrepare.cpp                |     6 +-
 lib/CodeGen/EarlyIfConversion.cpp             |     6 +-
 lib/CodeGen/ExecutionDepsFix.cpp              |    36 +-
 lib/CodeGen/ExpandISelPseudos.cpp             |     4 +-
 lib/CodeGen/FuncletLayout.cpp                 |    55 +
 lib/CodeGen/GCRootLowering.cpp                |     6 +-
 lib/CodeGen/GlobalMerge.cpp                   |   138 +-
 lib/CodeGen/IfConversion.cpp                  |   189 +-
 lib/CodeGen/ImplicitNullChecks.cpp            |   201 +-
 lib/CodeGen/InlineSpiller.cpp                 |    76 +-
 lib/CodeGen/InterferenceCache.cpp             |     3 +-
 lib/CodeGen/InterleavedAccessPass.cpp         |     4 +-
 lib/CodeGen/IntrinsicLowering.cpp             |    65 +-
 lib/CodeGen/LLVMBuild.txt                     |     2 +-
 lib/CodeGen/LLVMTargetMachine.cpp             |     7 +-
 lib/CodeGen/LiveDebugValues.cpp               |   405 +
 lib/CodeGen/LiveDebugVariables.cpp            |   105 +-
 lib/CodeGen/LiveDebugVariables.h              |     1 -
 lib/CodeGen/LiveInterval.cpp                  |   117 +-
 lib/CodeGen/LiveIntervalAnalysis.cpp          |   129 +-
 lib/CodeGen/LivePhysRegs.cpp                  |    18 +-
 lib/CodeGen/LiveRangeCalc.cpp                 |    15 +-
 lib/CodeGen/LiveRangeCalc.h                   |     2 +-
 lib/CodeGen/LiveRangeEdit.cpp                 |    37 +-
 lib/CodeGen/LiveRegMatrix.cpp                 |     7 +-
 lib/CodeGen/LiveVariables.cpp                 |    27 +-
 lib/CodeGen/LocalStackSlotAllocation.cpp      |     2 +-
 lib/CodeGen/MIRParser/LLVMBuild.txt           |     2 +-
 lib/CodeGen/MIRParser/MILexer.cpp             |   451 +-
 lib/CodeGen/MIRParser/MILexer.h               |   102 +-
 lib/CodeGen/MIRParser/MIParser.cpp            |  1599 +-
 lib/CodeGen/MIRParser/MIParser.h              |    54 +-
 lib/CodeGen/MIRParser/MIRParser.cpp           |   432 +-
 lib/CodeGen/MIRPrinter.cpp                    |   771 +-
 lib/CodeGen/MIRPrintingPass.cpp               |     4 +-
 lib/CodeGen/MachineBasicBlock.cpp             |   507 +-
 lib/CodeGen/MachineBlockFrequencyInfo.cpp     |     4 +-
 lib/CodeGen/MachineBlockPlacement.cpp         |   364 +-
 lib/CodeGen/MachineBranchProbabilityInfo.cpp  |    75 +-
 lib/CodeGen/MachineCSE.cpp                    |     6 +-
 lib/CodeGen/MachineCombiner.cpp               |   202 +-
 lib/CodeGen/MachineFunction.cpp               |    88 +-
 lib/CodeGen/MachineFunctionPass.cpp           |    10 +-
 lib/CodeGen/MachineInstr.cpp                  |    89 +-
 lib/CodeGen/MachineInstrBundle.cpp            |    43 +-
 lib/CodeGen/MachineLICM.cpp                   |   259 +-
 lib/CodeGen/MachineLoopInfo.cpp               |    13 +-
 lib/CodeGen/MachineModuleInfo.cpp             |    86 +-
 lib/CodeGen/MachineRegisterInfo.cpp           |    32 +-
 lib/CodeGen/MachineScheduler.cpp              |   232 +-
 lib/CodeGen/MachineSink.cpp                   |    11 +-
 lib/CodeGen/MachineTraceMetrics.cpp           |    10 +-
 lib/CodeGen/MachineVerifier.cpp               |   326 +-
 lib/CodeGen/PHIElimination.cpp                |     2 +-
 lib/CodeGen/PHIEliminationUtils.cpp           |     2 +-
 lib/CodeGen/ParallelCG.cpp                    |    96 +
 lib/CodeGen/Passes.cpp                        |   144 +-
 lib/CodeGen/PeepholeOptimizer.cpp             |  1011 +-
 lib/CodeGen/PostRASchedulerList.cpp           |    37 +-
 lib/CodeGen/ProcessImplicitDefs.cpp           |     6 +-
 lib/CodeGen/PrologEpilogInserter.cpp          |   149 +-
 lib/CodeGen/PseudoSourceValue.cpp             |   140 +-
 lib/CodeGen/RegAllocBasic.cpp                 |     8 +-
 lib/CodeGen/RegAllocFast.cpp                  |    21 +-
 lib/CodeGen/RegAllocGreedy.cpp                |    53 +-
 lib/CodeGen/RegAllocPBQP.cpp                  |    37 +-
 lib/CodeGen/RegisterCoalescer.cpp             |   256 +-
 lib/CodeGen/RegisterPressure.cpp              |   373 +-
 lib/CodeGen/RegisterScavenging.cpp            |    14 +-
 lib/CodeGen/ScheduleDAG.cpp                   |     1 -
 lib/CodeGen/ScheduleDAGInstrs.cpp             |   313 +-
 lib/CodeGen/ScheduleDAGPrinter.cpp            |     9 +-
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp      |  2674 ++-
 lib/CodeGen/SelectionDAG/FastISel.cpp         |   142 +-
 .../SelectionDAG/FunctionLoweringInfo.cpp     |   192 +-
 lib/CodeGen/SelectionDAG/InstrEmitter.cpp     |     2 +-
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp      |  1283 +-
 .../SelectionDAG/LegalizeFloatTypes.cpp       |   274 +-
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |   304 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.cpp    |   124 +-
 lib/CodeGen/SelectionDAG/LegalizeTypes.h      |   110 +-
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |    14 +-
 .../SelectionDAG/LegalizeVectorOps.cpp        |    58 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   277 +-
 .../SelectionDAG/ResourcePriorityQueue.cpp    |     6 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp  |     4 +-
 .../SelectionDAG/ScheduleDAGRRList.cpp        |    34 +-
 lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h |     6 -
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp     |   530 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  1052 +-
 .../SelectionDAG/SelectionDAGBuilder.h        |   109 +-
 .../SelectionDAG/SelectionDAGDumper.cpp       |   133 +-
 lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp |   263 +-
 .../SelectionDAG/SelectionDAGPrinter.cpp      |    13 +-
 .../SelectionDAG/StatepointLowering.cpp       |   137 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp   |   267 +-
 lib/CodeGen/ShadowStackGCLowering.cpp         |    10 +-
 lib/CodeGen/ShrinkWrap.cpp                    |   175 +-
 lib/CodeGen/SjLjEHPrepare.cpp                 |    44 +-
 lib/CodeGen/SlotIndexes.cpp                   |     4 +-
 lib/CodeGen/SpillPlacement.cpp                |     7 +-
 lib/CodeGen/SplitKit.cpp                      |    49 +-
 lib/CodeGen/StackMaps.cpp                     |     4 +-
 lib/CodeGen/StackProtector.cpp                |    20 +-
 lib/CodeGen/StackSlotColoring.cpp             |     2 +-
 lib/CodeGen/StatepointExampleGC.cpp           |     4 +-
 lib/CodeGen/TailDuplication.cpp               |   115 +-
 lib/CodeGen/TargetFrameLoweringImpl.cpp       |    30 +-
 lib/CodeGen/TargetInstrInfo.cpp               |   310 +-
 lib/CodeGen/TargetLoweringBase.cpp            |   148 +-
 lib/CodeGen/TargetLoweringObjectFileImpl.cpp  |    80 +-
 lib/CodeGen/TargetRegisterInfo.cpp            |   185 +-
 lib/CodeGen/TargetSchedule.cpp                |     8 +-
 lib/CodeGen/TwoAddressInstructionPass.cpp     |   302 +-
 lib/CodeGen/UnreachableBlockElim.cpp          |     8 +-
 lib/CodeGen/VirtRegMap.cpp                    |   166 +-
 lib/CodeGen/WinEHPrepare.cpp                  |  3748 +---
 lib/DebugInfo/CMakeLists.txt                  |     4 +-
 lib/DebugInfo/CodeView/CMakeLists.txt         |    12 +
 .../CodeView/FieldListRecordBuilder.cpp       |   165 +
 .../IPA => DebugInfo/CodeView}/LLVMBuild.txt  |     9 +-
 lib/DebugInfo/CodeView/Line.cpp               |    22 +
 lib/DebugInfo/CodeView/ListRecordBuilder.cpp  |    31 +
 lib/DebugInfo/CodeView/Makefile               |    14 +
 .../CodeView/MemoryTypeTableBuilder.cpp       |    35 +
 .../CodeView/MethodListRecordBuilder.cpp      |    49 +
 lib/DebugInfo/CodeView/TypeRecordBuilder.cpp  |   113 +
 lib/DebugInfo/CodeView/TypeTableBuilder.cpp   |   217 +
 lib/DebugInfo/DWARF/CMakeLists.txt            |     2 +
 lib/DebugInfo/DWARF/DWARFContext.cpp          |    93 +-
 lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp   |     2 +-
 lib/DebugInfo/DWARF/DWARFDebugMacro.cpp       |   103 +
 lib/DebugInfo/DWARF/DWARFFormValue.cpp        |    18 +-
 lib/DebugInfo/DWARF/DWARFUnit.cpp             |    44 +-
 lib/DebugInfo/DWARF/DWARFUnitIndex.cpp        |   168 +
 lib/DebugInfo/DWARF/SyntaxHighlighting.cpp    |     1 +
 lib/DebugInfo/DWARF/SyntaxHighlighting.h      |     2 +-
 lib/DebugInfo/LLVMBuild.txt                   |     2 +-
 lib/DebugInfo/Makefile                        |     4 +-
 lib/DebugInfo/PDB/PDB.cpp                     |     2 +-
 lib/DebugInfo/PDB/PDBContext.cpp              |    21 +-
 lib/DebugInfo/Symbolize/CMakeLists.txt        |     8 +
 lib/DebugInfo/Symbolize/DIPrinter.cpp         |    69 +
 lib/DebugInfo/Symbolize/LLVMBuild.txt         |    22 +
 .../IPA => DebugInfo/Symbolize}/Makefile      |     6 +-
 .../Symbolize/SymbolizableObjectFile.cpp      |   254 +
 .../Symbolize/SymbolizableObjectFile.h        |    82 +
 lib/DebugInfo/Symbolize/Symbolize.cpp         |   456 +
 lib/ExecutionEngine/ExecutionEngine.cpp       |    69 +-
 .../ExecutionEngineBindings.cpp               |    46 +-
 lib/ExecutionEngine/Interpreter/Execution.cpp |    76 +-
 .../Interpreter/ExternalFunctions.cpp         |    10 +-
 .../Interpreter/Interpreter.cpp               |     7 +-
 lib/ExecutionEngine/Interpreter/Interpreter.h |     2 -
 lib/ExecutionEngine/MCJIT/MCJIT.cpp           |    23 +-
 lib/ExecutionEngine/MCJIT/MCJIT.h             |    17 +-
 lib/ExecutionEngine/Orc/CMakeLists.txt        |     2 +
 lib/ExecutionEngine/Orc/IndirectionUtils.cpp  |    25 +-
 lib/ExecutionEngine/Orc/OrcCBindings.cpp      |    97 +
 lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp |    43 +
 lib/ExecutionEngine/Orc/OrcCBindingsStack.h   |   282 +
 lib/ExecutionEngine/Orc/OrcMCJITReplacement.h |    44 +-
 lib/ExecutionEngine/Orc/OrcTargetSupport.cpp  |   271 +-
 .../RuntimeDyld/RuntimeDyld.cpp               |   163 +-
 .../RuntimeDyld/RuntimeDyldCOFF.cpp           |    15 +-
 .../RuntimeDyld/RuntimeDyldChecker.cpp        |    25 +-
 .../RuntimeDyld/RuntimeDyldELF.cpp            |   460 +-
 .../RuntimeDyld/RuntimeDyldELF.h              |    13 +
 .../RuntimeDyld/RuntimeDyldImpl.h             |    69 +-
 .../RuntimeDyld/RuntimeDyldMachO.cpp          |    80 +-
 .../RuntimeDyld/RuntimeDyldMachO.h            |     8 +-
 .../RuntimeDyld/Targets/RuntimeDyldCOFFI386.h |   201 +
 .../Targets/RuntimeDyldCOFFX86_64.h           |    32 +-
 .../Targets/RuntimeDyldMachOAArch64.h         |    23 +-
 .../RuntimeDyld/Targets/RuntimeDyldMachOARM.h |    34 +-
 .../Targets/RuntimeDyldMachOI386.h            |    48 +-
 .../Targets/RuntimeDyldMachOX86_64.h          |    69 +-
 lib/ExecutionEngine/SectionMemoryManager.cpp  |   103 +-
 lib/Fuzzer/CMakeLists.txt                     |     6 +
 lib/Fuzzer/FuzzerCrossOver.cpp                |    10 +-
 lib/Fuzzer/FuzzerDFSan.h                      |    61 +
 lib/Fuzzer/FuzzerDriver.cpp                   |   168 +-
 lib/Fuzzer/FuzzerFlags.def                    |    46 +-
 lib/Fuzzer/FuzzerIO.cpp                       |    27 +-
 lib/Fuzzer/FuzzerInterface.cpp                |    19 +-
 lib/Fuzzer/FuzzerInterface.h                  |   117 +-
 lib/Fuzzer/FuzzerInternal.h                   |    93 +-
 lib/Fuzzer/FuzzerLoop.cpp                     |   464 +-
 lib/Fuzzer/FuzzerMain.cpp                     |     2 +-
 lib/Fuzzer/FuzzerMutate.cpp                   |   208 +-
 lib/Fuzzer/FuzzerTraceState.cpp               |   220 +-
 lib/Fuzzer/FuzzerUtil.cpp                     |   129 +-
 lib/Fuzzer/cxx.dict                           |   122 +
 lib/Fuzzer/cxx_fuzzer_tokens.txt              |   218 -
 lib/Fuzzer/test/CMakeLists.txt                |    36 +-
 lib/Fuzzer/test/CallerCalleeTest.cpp          |    56 +
 lib/Fuzzer/test/CounterTest.cpp               |     3 +-
 lib/Fuzzer/test/CxxTokensTest.cpp             |    24 -
 lib/Fuzzer/test/DFSanMemcmpTest.cpp           |    12 -
 .../test/FourIndependentBranchesTest.cpp      |     3 +-
 lib/Fuzzer/test/FullCoverageSetTest.cpp       |     3 +-
 lib/Fuzzer/test/FuzzerUnittest.cpp            |   308 +-
 lib/Fuzzer/test/InfiniteTest.cpp              |    24 -
 lib/Fuzzer/test/MemcmpTest.cpp                |    20 +
 lib/Fuzzer/test/NullDerefTest.cpp             |     3 +-
 ...SanSimpleCmpTest.cpp => SimpleCmpTest.cpp} |     5 +-
 lib/Fuzzer/test/SimpleDictionaryTest.cpp      |    26 +
 lib/Fuzzer/test/SimpleHashTest.cpp            |    37 +
 lib/Fuzzer/test/SimpleTest.cpp                |     5 +-
 lib/Fuzzer/test/StrcmpTest.cpp                |    29 +
 lib/Fuzzer/test/StrncmpTest.cpp               |    25 +
 lib/Fuzzer/test/SwitchTest.cpp                |    55 +
 lib/Fuzzer/test/TimeoutTest.cpp               |     3 +-
 lib/Fuzzer/test/UninstrumentedTest.cpp        |     8 +
 lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp    |    16 +-
 lib/Fuzzer/test/dict1.txt                     |     4 +
 lib/Fuzzer/test/fuzzer-dfsan.test             |    22 +
 lib/Fuzzer/test/fuzzer-drill.test             |     8 +
 lib/Fuzzer/test/fuzzer-timeout.test           |    13 +
 lib/Fuzzer/test/fuzzer-traces.test            |    19 +
 lib/Fuzzer/test/fuzzer.test                   |    40 +-
 lib/Fuzzer/test/hi.txt                        |     1 +
 lib/Fuzzer/test/lit.cfg                       |     5 +-
 lib/Fuzzer/test/merge.test                    |    29 +
 lib/Fuzzer/test/trace-bb/CMakeLists.txt       |    14 +
 lib/Fuzzer/test/uninstrumented/CMakeLists.txt |    14 +
 lib/IR/AsmWriter.cpp                          |   407 +-
 lib/IR/AttributeImpl.h                        |    37 +-
 lib/IR/Attributes.cpp                         |   213 +-
 lib/IR/AttributesCompatFunc.td                |     1 +
 lib/IR/AutoUpgrade.cpp                        |   103 +-
 lib/IR/BasicBlock.cpp                         |    42 +-
 lib/IR/CMakeLists.txt                         |    14 +-
 lib/IR/ConstantFold.cpp                       |    35 +-
 lib/IR/ConstantRange.cpp                      |    53 +
 lib/IR/Constants.cpp                          |   326 +-
 lib/IR/ConstantsContext.h                     |    47 +-
 lib/IR/Core.cpp                               |   113 +-
 lib/IR/DIBuilder.cpp                          |   120 +-
 lib/IR/DataLayout.cpp                         |    13 +-
 lib/IR/DebugInfo.cpp                          |    55 +-
 lib/IR/DebugInfoMetadata.cpp                  |    90 +-
 lib/IR/DiagnosticInfo.cpp                     |    41 +-
 lib/IR/Dominators.cpp                         |    32 +-
 lib/IR/Function.cpp                           |   211 +-
 lib/IR/FunctionInfo.cpp                       |    67 +
 lib/IR/GCOV.cpp                               |     4 +-
 lib/IR/Globals.cpp                            |    78 +-
 lib/IR/IRBuilder.cpp                          |   115 +-
 lib/IR/InlineAsm.cpp                          |    27 +-
 lib/IR/Instruction.cpp                        |    40 +-
 lib/IR/Instructions.cpp                       |   418 +-
 lib/IR/LLVMContext.cpp                        |    48 +-
 lib/IR/LLVMContextImpl.cpp                    |    24 +-
 lib/IR/LLVMContextImpl.h                      |   170 +-
 lib/IR/LegacyPassManager.cpp                  |   159 +-
 lib/IR/MDBuilder.cpp                          |    48 +-
 lib/IR/Makefile                               |    30 +-
 lib/IR/Metadata.cpp                           |    89 +-
 lib/IR/MetadataImpl.h                         |    13 +
 lib/IR/MetadataTracking.cpp                   |    55 -
 lib/IR/Module.cpp                             |    60 +-
 lib/IR/Statepoint.cpp                         |     5 +-
 lib/IR/SymbolTableListTraitsImpl.h            |    50 +-
 lib/IR/Type.cpp                               |   154 +-
 lib/IR/TypeFinder.cpp                         |    14 +-
 lib/IR/User.cpp                               |    64 +-
 lib/IR/Value.cpp                              |    13 +-
 lib/IR/ValueSymbolTable.cpp                   |    50 +-
 lib/IR/ValueTypes.cpp                         |    27 +
 lib/IR/Verifier.cpp                           |   682 +-
 lib/IRReader/IRReader.cpp                     |    12 +-
 lib/LTO/LLVMBuild.txt                         |     1 -
 lib/LTO/LTOCodeGenerator.cpp                  |   505 +-
 lib/LTO/LTOModule.cpp                         |   155 +-
 lib/LibDriver/LibDriver.cpp                   |    11 +-
 lib/LibDriver/Options.td                      |     2 +
 lib/Linker/CMakeLists.txt                     |     1 +
 lib/Linker/IRMover.cpp                        |  1657 ++
 lib/Linker/LinkDiagnosticInfo.h               |    25 +
 lib/Linker/LinkModules.cpp                    |  1906 +-
 lib/MC/CMakeLists.txt                         |     1 +
 lib/MC/ConstantPools.cpp                      |    10 +-
 lib/MC/ELFObjectWriter.cpp                    |   144 +-
 lib/MC/MCAsmBackend.cpp                       |     4 +
 lib/MC/MCAsmInfo.cpp                          |     6 +
 lib/MC/MCAsmInfoCOFF.cpp                      |     3 +-
 lib/MC/MCAsmInfoDarwin.cpp                    |     5 -
 lib/MC/MCAsmStreamer.cpp                      |    95 +-
 lib/MC/MCAssembler.cpp                        |   527 +-
 lib/MC/MCContext.cpp                          |    56 +-
 lib/MC/MCDisassembler/Disassembler.cpp        |     4 -
 lib/MC/MCDwarf.cpp                            |   393 +-
 lib/MC/MCELFObjectTargetWriter.cpp            |    16 -
 lib/MC/MCELFStreamer.cpp                      |    60 +-
 lib/MC/MCExpr.cpp                             |    74 +-
 lib/MC/MCFragment.cpp                         |   458 +
 lib/MC/MCInst.cpp                             |     2 +
 lib/MC/MCInstrDesc.cpp                        |     2 +-
 lib/MC/MCMachOStreamer.cpp                    |    47 +-
 lib/MC/MCObjectFileInfo.cpp                   |   230 +-
 lib/MC/MCObjectStreamer.cpp                   |    96 +-
 lib/MC/MCObjectWriter.cpp                     |    10 +-
 lib/MC/MCParser/AsmLexer.cpp                  |    16 +-
 lib/MC/MCParser/AsmParser.cpp                 |   270 +-
 lib/MC/MCParser/COFFAsmParser.cpp             |    11 +-
 lib/MC/MCParser/DarwinAsmParser.cpp           |    78 +-
 lib/MC/MCParser/ELFAsmParser.cpp              |    22 +-
 lib/MC/MCParser/MCAsmLexer.cpp                |     4 +-
 lib/MC/MCParser/MCTargetAsmParser.cpp         |    17 +-
 lib/MC/MCSection.cpp                          |     4 +-
 lib/MC/MCSectionCOFF.cpp                      |     1 +
 lib/MC/MCSectionELF.cpp                       |     9 +-
 lib/MC/MCSectionMachO.cpp                     |     4 +-
 lib/MC/MCStreamer.cpp                         |    31 +-
 lib/MC/MCSubtargetInfo.cpp                    |    17 +-
 lib/MC/MCSymbol.cpp                           |     7 +-
 lib/MC/MCTargetOptions.cpp                    |     7 +-
 lib/MC/MCWinEH.cpp                            |     8 +-
 lib/MC/MachObjectWriter.cpp                   |   135 +-
 lib/MC/StringTableBuilder.cpp                 |   118 +-
 lib/MC/SubtargetFeature.cpp                   |     2 +-
 lib/MC/WinCOFFObjectWriter.cpp                |   193 +-
 lib/MC/WinCOFFStreamer.cpp                    |    41 +-
 lib/Object/Archive.cpp                        |   210 +-
 lib/Object/ArchiveWriter.cpp                  |   117 +-
 lib/Object/CMakeLists.txt                     |     1 +
 lib/Object/COFFObjectFile.cpp                 |    56 +-
 lib/Object/COFFYAML.cpp                       |     2 +
 lib/Object/ELF.cpp                            |     1 +
 lib/Object/ELFYAML.cpp                        |    33 +
 lib/Object/Error.cpp                          |     2 +
 lib/Object/FunctionIndexObjectFile.cpp        |   143 +
 lib/Object/IRObjectFile.cpp                   |     8 +-
 lib/Object/MachOObjectFile.cpp                |   175 +-
 lib/Object/MachOUniversal.cpp                 |    16 +-
 lib/Object/Object.cpp                         |     4 +-
 lib/Object/ObjectFile.cpp                     |     6 +-
 lib/Object/SymbolicFile.cpp                   |     5 +-
 lib/Option/Arg.cpp                            |    21 +-
 lib/Option/ArgList.cpp                        |    25 +
 lib/Option/OptTable.cpp                       |    12 +-
 lib/Option/Option.cpp                         |    31 +-
 lib/Passes/LLVMBuild.txt                      |     2 +-
 lib/Passes/PassBuilder.cpp                    |     6 +
 lib/Passes/PassRegistry.def                   |     7 +
 lib/ProfileData/CoverageMapping.cpp           |    18 +-
 lib/ProfileData/CoverageMappingReader.cpp     |   101 +-
 lib/ProfileData/InstrProf.cpp                 |   434 +-
 lib/ProfileData/InstrProfIndexed.h            |    56 -
 lib/ProfileData/InstrProfReader.cpp           |   454 +-
 lib/ProfileData/InstrProfWriter.cpp           |   175 +-
 lib/ProfileData/SampleProf.cpp                |   106 +-
 lib/ProfileData/SampleProfReader.cpp          |   762 +-
 lib/ProfileData/SampleProfWriter.cpp          |   180 +-
 lib/Support/APFloat.cpp                       |    46 +-
 lib/Support/BlockFrequency.cpp                |    36 +-
 lib/Support/BranchProbability.cpp             |    49 +-
 lib/Support/CMakeLists.txt                    |     5 +-
 lib/Support/CommandLine.cpp                   |    60 +-
 lib/Support/CrashRecoveryContext.cpp          |    26 +-
 lib/Support/Dwarf.cpp                         |    37 +
 lib/Support/ErrorHandling.cpp                 |     2 +-
 lib/Support/FileOutputBuffer.cpp              |    18 +-
 lib/Support/FoldingSet.cpp                    |    20 +
 lib/Support/GraphWriter.cpp                   |    74 +-
 lib/Support/Host.cpp                          |    43 +-
 lib/Support/JamCRC.cpp                        |    96 +
 lib/Support/Locale.cpp                        |     1 +
 lib/Support/ManagedStatic.cpp                 |     1 +
 lib/Support/MemoryBuffer.cpp                  |     5 +-
 lib/Support/Path.cpp                          |   110 +-
 lib/Support/PrettyStackTrace.cpp              |    16 +-
 lib/Support/Signals.cpp                       |   140 +-
 lib/Support/Statistic.cpp                     |    18 +-
 lib/Support/StringRef.cpp                     |   101 +-
 lib/Support/StringSaver.cpp                   |     2 +-
 lib/Support/TargetParser.cpp                  |   506 +-
 lib/Support/ThreadPool.cpp                    |   155 +
 lib/Support/TimeValue.cpp                     |     6 +-
 lib/Support/Timer.cpp                         |    74 +-
 lib/Support/Triple.cpp                        |   217 +-
 lib/Support/Unix/Memory.inc                   |    15 +-
 lib/Support/Unix/Path.inc                     |   120 +-
 lib/Support/Unix/Process.inc                  |    13 +-
 lib/Support/Unix/Program.inc                  |     7 -
 lib/Support/Unix/Signals.inc                  |   151 +-
 lib/Support/Unix/Unix.h                       |    13 +-
 lib/Support/Valgrind.cpp                      |    21 +-
 lib/Support/Windows/COM.inc                   |     2 +-
 lib/Support/Windows/DynamicLibrary.inc        |     4 +-
 lib/Support/Windows/Memory.inc                |     4 +-
 lib/Support/Windows/Path.inc                  |   109 +-
 lib/Support/Windows/Process.inc               |    11 +-
 lib/Support/Windows/Program.inc               |    25 +-
 lib/Support/Windows/Signals.inc               |   131 +-
 lib/Support/Windows/WindowsSupport.h          |    18 +-
 lib/Support/YAMLParser.cpp                    |    18 +-
 lib/Support/YAMLTraits.cpp                    |    16 +-
 lib/Support/raw_ostream.cpp                   |    83 +-
 lib/TableGen/Record.cpp                       |    10 +-
 lib/TableGen/SetTheory.cpp                    |     2 +-
 lib/TableGen/TGParser.cpp                     |    30 +-
 lib/TableGen/TGParser.h                       |    16 +-
 lib/Target/AArch64/AArch64.td                 |    43 +-
 lib/Target/AArch64/AArch64A53Fix835769.cpp    |    15 +-
 .../AArch64/AArch64A57FPLoadBalancing.cpp     |     1 -
 .../AArch64/AArch64AddressTypePromotion.cpp   |     8 +-
 .../AArch64/AArch64AdvSIMDScalarPass.cpp      |    17 +-
 .../AArch64/AArch64BranchRelaxation.cpp       |    27 +-
 lib/Target/AArch64/AArch64CallingConvention.h |    38 +-
 .../AArch64/AArch64CallingConvention.td       |    19 +-
 .../AArch64CleanupLocalDynamicTLSPass.cpp     |     8 +-
 lib/Target/AArch64/AArch64CollectLOH.cpp      |    32 +-
 .../AArch64/AArch64ConditionOptimizer.cpp     |    22 +-
 .../AArch64/AArch64ConditionalCompares.cpp    |    20 +-
 .../AArch64DeadRegisterDefinitionsPass.cpp    |    16 +-
 .../AArch64/AArch64ExpandPseudoInsts.cpp      |    15 +-
 lib/Target/AArch64/AArch64FastISel.cpp        |   221 +-
 lib/Target/AArch64/AArch64FrameLowering.cpp   |    93 +-
 lib/Target/AArch64/AArch64FrameLowering.h     |     6 +-
 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp    |   246 +-
 lib/Target/AArch64/AArch64ISelLowering.cpp    |  1446 +-
 lib/Target/AArch64/AArch64ISelLowering.h      |   110 +-
 lib/Target/AArch64/AArch64InstrFormats.td     |  1076 +-
 lib/Target/AArch64/AArch64InstrInfo.cpp       |   225 +-
 lib/Target/AArch64/AArch64InstrInfo.h         |    12 +-
 lib/Target/AArch64/AArch64InstrInfo.td        |   483 +-
 .../AArch64/AArch64LoadStoreOptimizer.cpp     |  1310 +-
 lib/Target/AArch64/AArch64MCInstLower.cpp     |     4 +-
 .../AArch64/AArch64MachineCombinerPattern.h   |    42 -
 .../AArch64/AArch64MachineFunctionInfo.h      |    17 +-
 lib/Target/AArch64/AArch64PromoteConstant.cpp |     4 +-
 lib/Target/AArch64/AArch64RegisterInfo.cpp    |    56 +-
 lib/Target/AArch64/AArch64RegisterInfo.h      |     5 +-
 lib/Target/AArch64/AArch64RegisterInfo.td     |     2 +-
 lib/Target/AArch64/AArch64Subtarget.cpp       |    31 +-
 lib/Target/AArch64/AArch64Subtarget.h         |    27 +-
 lib/Target/AArch64/AArch64TargetMachine.cpp   |     2 +-
 .../AArch64/AArch64TargetTransformInfo.cpp    |   150 +-
 .../AArch64/AArch64TargetTransformInfo.h      |    39 +-
 .../AArch64/AsmParser/AArch64AsmParser.cpp    |   201 +-
 .../Disassembler/AArch64Disassembler.cpp      |     4 +
 .../InstPrinter/AArch64InstPrinter.cpp        |    46 +-
 .../AArch64/InstPrinter/AArch64InstPrinter.h  |    10 +-
 .../MCTargetDesc/AArch64AddressingModes.h     |    26 +
 .../MCTargetDesc/AArch64ELFStreamer.cpp       |     5 +-
 .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp |     4 -
 .../AArch64/MCTargetDesc/AArch64MCExpr.cpp    |     6 +-
 .../AArch64/MCTargetDesc/AArch64MCExpr.h      |     8 +-
 .../MCTargetDesc/AArch64MachObjectWriter.cpp  |    88 +-
 .../MCTargetDesc/AArch64TargetStreamer.cpp    |     5 +-
 .../MCTargetDesc/AArch64TargetStreamer.h      |     2 +-
 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp  |    33 +-
 lib/Target/AArch64/Utils/AArch64BaseInfo.h    |    41 +-
 lib/Target/AMDGPU/AMDGPU.h                    |    16 +-
 lib/Target/AMDGPU/AMDGPU.td                   |    10 +
 .../AMDGPU/AMDGPUAnnotateKernelFeatures.cpp   |   126 +
 .../AMDGPU/AMDGPUAnnotateUniformValues.cpp    |    84 +
 lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp        |   200 +-
 lib/Target/AMDGPU/AMDGPUAsmPrinter.h          |     4 +-
 .../AMDGPUDiagnosticInfoUnsupported.cpp       |    26 +
 .../AMDGPU/AMDGPUDiagnosticInfoUnsupported.h  |    48 +
 lib/Target/AMDGPU/AMDGPUFrameLowering.cpp     |    10 +-
 lib/Target/AMDGPU/AMDGPUFrameLowering.h       |    11 +-
 lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp      |   477 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.cpp      |   195 +-
 lib/Target/AMDGPU/AMDGPUISelLowering.h        |    15 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.cpp         |    20 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.h           |     6 +-
 lib/Target/AMDGPU/AMDGPUInstrInfo.td          |     2 -
 lib/Target/AMDGPU/AMDGPUInstructions.td       |     4 +-
 lib/Target/AMDGPU/AMDGPUIntrinsics.td         |     4 +-
 lib/Target/AMDGPU/AMDGPUMCInstLower.cpp       |    18 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.cpp   |    11 +-
 lib/Target/AMDGPU/AMDGPUMachineFunction.h     |     5 +
 .../AMDGPUOpenCLImageTypeLoweringPass.cpp     |   373 +
 lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp     |    11 +-
 lib/Target/AMDGPU/AMDGPURegisterInfo.h        |     4 -
 lib/Target/AMDGPU/AMDGPUSubtarget.cpp         |    25 +-
 lib/Target/AMDGPU/AMDGPUSubtarget.h           |    26 +-
 lib/Target/AMDGPU/AMDGPUTargetMachine.cpp     |    77 +-
 lib/Target/AMDGPU/AMDGPUTargetMachine.h       |     4 +-
 lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp  |    87 +
 lib/Target/AMDGPU/AMDGPUTargetObjectFile.h    |    51 +
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      |   102 +-
 lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h |     5 +
 lib/Target/AMDGPU/AMDILCFGStructurizer.cpp    |    43 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   266 +-
 lib/Target/AMDGPU/CIInstructions.td           |   336 +-
 lib/Target/AMDGPU/CMakeLists.txt              |     7 +-
 lib/Target/AMDGPU/CaymanInstructions.td       |     4 +
 lib/Target/AMDGPU/EvergreenInstructions.td    |    11 +
 .../AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp  |    16 +-
 .../AMDGPU/InstPrinter/AMDGPUInstPrinter.h    |     2 -
 .../AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp  |    27 +-
 .../AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp |    26 +
 .../AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h   |    40 +
 .../AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h    |     3 -
 .../AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp   |    14 +-
 .../AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h     |     3 +-
 .../MCTargetDesc/AMDGPUMCTargetDesc.cpp       |    11 +
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |    51 +-
 .../MCTargetDesc/AMDGPUTargetStreamer.h       |    21 +
 lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt |     1 +
 lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt  |     2 +-
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp |     6 +-
 .../AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp   |    15 +-
 lib/Target/AMDGPU/Processors.td               |     4 +
 .../AMDGPU/R600ControlFlowFinalizer.cpp       |     4 +-
 lib/Target/AMDGPU/R600ISelLowering.cpp        |    21 +-
 lib/Target/AMDGPU/R600InstrInfo.cpp           |     6 +-
 lib/Target/AMDGPU/R600InstrInfo.h             |     6 +-
 lib/Target/AMDGPU/R600Instructions.td         |     2 +-
 .../AMDGPU/R600OptimizeVectorRegisters.cpp    |     2 +-
 lib/Target/AMDGPU/R600Packetizer.cpp          |    18 +-
 lib/Target/AMDGPU/R600RegisterInfo.h          |     2 +-
 lib/Target/AMDGPU/SIAnnotateControlFlow.cpp   |     5 +-
 lib/Target/AMDGPU/SIDefines.h                 |     3 +-
 .../AMDGPU/SIFixControlFlowLiveIntervals.cpp  |     6 -
 lib/Target/AMDGPU/SIFixSGPRCopies.cpp         |   247 +-
 lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp     |   129 +-
 lib/Target/AMDGPU/SIFoldOperands.cpp          |   204 +-
 lib/Target/AMDGPU/SIFrameLowering.cpp         |   243 +
 lib/Target/AMDGPU/SIFrameLowering.h           |    34 +
 lib/Target/AMDGPU/SIISelLowering.cpp          |   666 +-
 lib/Target/AMDGPU/SIISelLowering.h            |    12 +-
 lib/Target/AMDGPU/SIInsertWaits.cpp           |    84 +-
 lib/Target/AMDGPU/SIInstrFormats.td           |    44 +-
 lib/Target/AMDGPU/SIInstrInfo.cpp             |  1330 +-
 lib/Target/AMDGPU/SIInstrInfo.h               |   154 +-
 lib/Target/AMDGPU/SIInstrInfo.td              |   839 +-
 lib/Target/AMDGPU/SIInstructions.td           |   487 +-
 lib/Target/AMDGPU/SILowerControlFlow.cpp      |    36 +-
 lib/Target/AMDGPU/SILowerI1Copies.cpp         |     1 +
 lib/Target/AMDGPU/SIMachineFunctionInfo.cpp   |   109 +-
 lib/Target/AMDGPU/SIMachineFunctionInfo.h     |   232 +-
 lib/Target/AMDGPU/SIPrepareScratchRegs.cpp    |   193 -
 lib/Target/AMDGPU/SIRegisterInfo.cpp          |   271 +-
 lib/Target/AMDGPU/SIRegisterInfo.h            |    64 +-
 lib/Target/AMDGPU/SIRegisterInfo.td           |   135 +-
 lib/Target/AMDGPU/SISchedule.td               |    18 +-
 lib/Target/AMDGPU/SIShrinkInstructions.cpp    |    61 +-
 lib/Target/AMDGPU/SITypeRewriter.cpp          |    10 +-
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |    97 +
 lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h      |    26 +
 lib/Target/AMDGPU/Utils/LLVMBuild.txt         |     2 +-
 lib/Target/AMDGPU/VIInstructions.td           |    69 +-
 lib/Target/ARM/ARM.h                          |     1 -
 lib/Target/ARM/ARM.td                         |   694 +-
 lib/Target/ARM/ARMAsmPrinter.cpp              |   119 +-
 lib/Target/ARM/ARMAsmPrinter.h                |     9 +-
 lib/Target/ARM/ARMBaseInstrInfo.cpp           |   172 +-
 lib/Target/ARM/ARMBaseInstrInfo.h             |    23 +-
 lib/Target/ARM/ARMBaseRegisterInfo.cpp        |    17 +-
 lib/Target/ARM/ARMBaseRegisterInfo.h          |     8 +-
 lib/Target/ARM/ARMCallingConv.h               |    22 +-
 lib/Target/ARM/ARMCallingConv.td              |     2 +
 lib/Target/ARM/ARMConstantIslandPass.cpp      |    44 +-
 lib/Target/ARM/ARMConstantPoolValue.cpp       |     3 +-
 lib/Target/ARM/ARMConstantPoolValue.h         |     5 +-
 lib/Target/ARM/ARMExpandPseudoInsts.cpp       |    64 +-
 lib/Target/ARM/ARMFastISel.cpp                |   108 +-
 lib/Target/ARM/ARMFrameLowering.cpp           |   202 +-
 lib/Target/ARM/ARMFrameLowering.h             |     8 +-
 lib/Target/ARM/ARMISelDAGToDAG.cpp            |   275 +-
 lib/Target/ARM/ARMISelLowering.cpp            |  2091 ++-
 lib/Target/ARM/ARMISelLowering.h              |    49 +-
 lib/Target/ARM/ARMInstrInfo.cpp               |    77 +-
 lib/Target/ARM/ARMInstrInfo.td                |    73 +-
 lib/Target/ARM/ARMInstrNEON.td                |   440 +-
 lib/Target/ARM/ARMInstrThumb.td               |   185 +-
 lib/Target/ARM/ARMInstrThumb2.td              |    95 +-
 lib/Target/ARM/ARMInstrVFP.td                 |    55 +-
 lib/Target/ARM/ARMLoadStoreOptimizer.cpp      |   514 +-
 lib/Target/ARM/ARMMachineFunctionInfo.cpp     |     5 +-
 lib/Target/ARM/ARMMachineFunctionInfo.h       |    14 +-
 lib/Target/ARM/ARMRegisterInfo.td             |    37 +-
 lib/Target/ARM/ARMScheduleSwift.td            |  1046 +-
 lib/Target/ARM/ARMSelectionDAGInfo.cpp        |    56 +-
 lib/Target/ARM/ARMSubtarget.cpp               |   169 +-
 lib/Target/ARM/ARMSubtarget.h                 |    72 +-
 lib/Target/ARM/ARMTargetMachine.cpp           |    50 +-
 lib/Target/ARM/ARMTargetMachine.h             |     3 +-
 lib/Target/ARM/ARMTargetTransformInfo.cpp     |   170 +-
 lib/Target/ARM/ARMTargetTransformInfo.h       |    33 +-
 lib/Target/ARM/AsmParser/ARMAsmParser.cpp     |   204 +-
 .../ARM/Disassembler/ARMDisassembler.cpp      |   113 +-
 lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp |     3 +-
 lib/Target/ARM/InstPrinter/ARMInstPrinter.h   |     3 -
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp |   389 +-
 lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h   |     7 +
 .../ARM/MCTargetDesc/ARMAsmBackendDarwin.h    |     8 +-
 .../ARM/MCTargetDesc/ARMELFObjectWriter.cpp   |     4 +-
 .../ARM/MCTargetDesc/ARMELFStreamer.cpp       |    54 +-
 lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp  |     4 +-
 lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h    |    47 +-
 lib/Target/ARM/MCTargetDesc/ARMMCExpr.h       |     4 +-
 .../ARM/MCTargetDesc/ARMMCTargetDesc.cpp      |    99 +-
 lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h |     3 +-
 .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp  |    32 +-
 .../ARM/MCTargetDesc/ARMTargetStreamer.cpp    |     4 +-
 .../ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp   |    12 +-
 lib/Target/ARM/README.txt                     |     1 -
 lib/Target/ARM/Thumb1FrameLowering.cpp        |   293 +-
 lib/Target/ARM/Thumb1FrameLowering.h          |    36 +
 lib/Target/ARM/Thumb1InstrInfo.cpp            |    16 +-
 lib/Target/ARM/Thumb2ITBlockPass.cpp          |     4 +-
 lib/Target/ARM/Thumb2InstrInfo.cpp            |    16 +-
 lib/Target/ARM/Thumb2SizeReduction.cpp        |    47 +-
 lib/Target/AVR/AVR.td                         |   563 +
 lib/Target/AVR/AVRCallingConv.td              |    65 +
 lib/Target/AVR/AVRConfig.h                    |    15 +
 lib/Target/AVR/AVRMachineFunctionInfo.h       |    73 +
 lib/Target/AVR/AVRRegisterInfo.td             |   216 +
 lib/Target/AVR/AVRTargetMachine.cpp           |     4 +
 lib/Target/AVR/CMakeLists.txt                 |    14 +
 lib/Target/AVR/LLVMBuild.txt                  |    33 +
 lib/Target/AVR/Makefile                       |    19 +
 lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp   |    25 +
 lib/Target/AVR/TargetInfo/CMakeLists.txt      |     7 +
 lib/Target/AVR/TargetInfo/LLVMBuild.txt       |    23 +
 lib/Target/AVR/TargetInfo/Makefile            |    16 +
 lib/Target/BPF/BPF.td                         |     7 +
 lib/Target/BPF/BPFISelLowering.cpp            |     3 +-
 lib/Target/BPF/InstPrinter/BPFInstPrinter.h   |     2 -
 lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp |    25 +-
 .../BPF/MCTargetDesc/BPFELFObjectWriter.cpp   |     4 +
 lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h    |     2 +
 lib/Target/CppBackend/CPPBackend.cpp          |    91 +-
 lib/Target/Hexagon/AsmParser/CMakeLists.txt   |     7 +
 .../Hexagon/AsmParser/HexagonAsmParser.cpp    |  2152 +++
 lib/Target/Hexagon/AsmParser/LLVMBuild.txt    |    23 +
 lib/Target/Hexagon/AsmParser/Makefile         |    15 +
 lib/Target/Hexagon/BitTracker.cpp             |    12 +-
 lib/Target/Hexagon/BitTracker.h               |    16 +-
 lib/Target/Hexagon/CMakeLists.txt             |    10 +-
 .../Disassembler/HexagonDisassembler.cpp      |  1010 +-
 lib/Target/Hexagon/Disassembler/LLVMBuild.txt |     2 +-
 lib/Target/Hexagon/Hexagon.h                  |     7 -
 lib/Target/Hexagon/Hexagon.td                 |    82 +-
 lib/Target/Hexagon/HexagonAsmPrinter.cpp      |   435 +-
 lib/Target/Hexagon/HexagonAsmPrinter.h        |     4 +
 lib/Target/Hexagon/HexagonBitSimplify.cpp     |  2778 +++
 lib/Target/Hexagon/HexagonBitTracker.cpp      |    33 +-
 lib/Target/Hexagon/HexagonCFGOptimizer.cpp    |    27 +-
 lib/Target/Hexagon/HexagonCommonGEP.cpp       |    43 +-
 lib/Target/Hexagon/HexagonEarlyIfConv.cpp     |  1063 ++
 .../Hexagon/HexagonExpandPredSpillCode.cpp    |     2 +-
 lib/Target/Hexagon/HexagonFrameLowering.cpp   |   435 +-
 lib/Target/Hexagon/HexagonFrameLowering.h     |    11 +-
 lib/Target/Hexagon/HexagonGenExtract.cpp      |     2 +-
 lib/Target/Hexagon/HexagonGenInsert.cpp       |    15 +-
 lib/Target/Hexagon/HexagonGenMux.cpp          |   319 +
 lib/Target/Hexagon/HexagonGenPredicate.cpp    |     2 +-
 lib/Target/Hexagon/HexagonHardwareLoops.cpp   |    16 +-
 lib/Target/Hexagon/HexagonISelDAGToDAG.cpp    |   160 +-
 lib/Target/Hexagon/HexagonISelLowering.cpp    |   774 +-
 lib/Target/Hexagon/HexagonISelLowering.h      |    48 +-
 lib/Target/Hexagon/HexagonInstrAlias.td       |   462 +
 lib/Target/Hexagon/HexagonInstrEnc.td         |  1019 +
 lib/Target/Hexagon/HexagonInstrFormats.td     |    46 +-
 lib/Target/Hexagon/HexagonInstrFormatsV4.td   |     2 -
 lib/Target/Hexagon/HexagonInstrFormatsV60.td  |   238 +
 lib/Target/Hexagon/HexagonInstrInfo.cpp       |  4472 +++--
 lib/Target/Hexagon/HexagonInstrInfo.h         |   400 +-
 lib/Target/Hexagon/HexagonInstrInfo.td        |    65 +-
 lib/Target/Hexagon/HexagonInstrInfoV4.td      |   130 +-
 lib/Target/Hexagon/HexagonInstrInfoV5.td      |    10 +-
 lib/Target/Hexagon/HexagonInstrInfoV60.td     |  2241 +++
 lib/Target/Hexagon/HexagonInstrInfoVector.td  |    43 +
 lib/Target/Hexagon/HexagonIntrinsics.td       |    24 +-
 lib/Target/Hexagon/HexagonIntrinsicsV60.td    |   836 +
 lib/Target/Hexagon/HexagonMCInstLower.cpp     |    60 +-
 .../Hexagon/HexagonMachineScheduler.cpp       |     6 +-
 lib/Target/Hexagon/HexagonNewValueJump.cpp    |    53 +-
 lib/Target/Hexagon/HexagonOperands.td         |   378 +-
 .../Hexagon/HexagonOptimizeSZextends.cpp      |   150 +
 lib/Target/Hexagon/HexagonPeephole.cpp        |     4 +-
 lib/Target/Hexagon/HexagonRegisterInfo.cpp    |    94 +-
 lib/Target/Hexagon/HexagonRegisterInfo.h      |     2 -
 lib/Target/Hexagon/HexagonRegisterInfo.td     |    81 +-
 lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp |    91 -
 lib/Target/Hexagon/HexagonSchedule.td         |     8 +-
 lib/Target/Hexagon/HexagonScheduleV4.td       |     5 +-
 lib/Target/Hexagon/HexagonScheduleV55.td      |   170 +
 lib/Target/Hexagon/HexagonScheduleV60.td      |   310 +
 .../Hexagon/HexagonSelectionDAGInfo.cpp       |    48 +-
 .../Hexagon/HexagonSplitConst32AndConst64.cpp |     2 +-
 lib/Target/Hexagon/HexagonSplitDouble.cpp     |  1209 ++
 lib/Target/Hexagon/HexagonStoreWidening.cpp   |   616 +
 lib/Target/Hexagon/HexagonSubtarget.cpp       |    84 +-
 lib/Target/Hexagon/HexagonSubtarget.h         |    20 +-
 lib/Target/Hexagon/HexagonTargetMachine.cpp   |    97 +-
 lib/Target/Hexagon/HexagonTargetMachine.h     |    15 +-
 .../Hexagon/HexagonTargetObjectFile.cpp       |     7 +-
 .../Hexagon/HexagonTargetTransformInfo.cpp    |    38 +
 .../Hexagon/HexagonTargetTransformInfo.h      |    70 +
 lib/Target/Hexagon/HexagonVLIWPacketizer.cpp  |  2021 +-
 lib/Target/Hexagon/HexagonVLIWPacketizer.h    |   114 +
 lib/Target/Hexagon/LLVMBuild.txt              |     3 +-
 .../Hexagon/MCTargetDesc/CMakeLists.txt       |     2 +
 .../MCTargetDesc/HexagonAsmBackend.cpp        |    52 +-
 .../Hexagon/MCTargetDesc/HexagonBaseInfo.h    |    65 +-
 .../MCTargetDesc/HexagonInstPrinter.cpp       |   282 +-
 .../Hexagon/MCTargetDesc/HexagonInstPrinter.h |   119 +-
 .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.h   |    13 +-
 .../Hexagon/MCTargetDesc/HexagonMCChecker.cpp |   581 +
 .../Hexagon/MCTargetDesc/HexagonMCChecker.h   |   218 +
 .../MCTargetDesc/HexagonMCCodeEmitter.cpp     |    24 +-
 .../MCTargetDesc/HexagonMCCompound.cpp        |    29 +-
 .../MCTargetDesc/HexagonMCDuplexInfo.cpp      |   126 +-
 .../MCTargetDesc/HexagonMCELFStreamer.cpp     |    12 +-
 .../Hexagon/MCTargetDesc/HexagonMCExpr.cpp    |    49 +
 .../Hexagon/MCTargetDesc/HexagonMCExpr.h      |    35 +
 .../MCTargetDesc/HexagonMCInstrInfo.cpp       |   228 +-
 .../Hexagon/MCTargetDesc/HexagonMCInstrInfo.h |    58 +-
 .../MCTargetDesc/HexagonMCTargetDesc.cpp      |    52 +-
 .../MCTargetDesc/HexagonMCTargetDesc.h        |    17 +-
 .../Hexagon/MCTargetDesc/HexagonShuffler.cpp  |    90 +-
 .../Hexagon/MCTargetDesc/HexagonShuffler.h    |    52 +-
 lib/Target/Hexagon/Makefile                   |     3 +-
 lib/Target/LLVMBuild.txt                      |     1 +
 .../MSP430/InstPrinter/MSP430InstPrinter.h    |     2 -
 .../MSP430/MCTargetDesc/MSP430MCAsmInfo.h     |    13 +-
 lib/Target/MSP430/MSP430BranchSelector.cpp    |     2 +-
 lib/Target/MSP430/MSP430ISelLowering.cpp      |    17 +-
 lib/Target/MSP430/MSP430InstrInfo.cpp         |    18 +-
 lib/Target/MSP430/MSP430MCInstLower.cpp       |     8 +-
 .../MSP430/MSP430MachineFunctionInfo.cpp      |     2 +-
 lib/Target/MSP430/MSP430MachineFunctionInfo.h |     2 +-
 lib/Target/MSP430/README.txt                  |     2 +-
 lib/Target/Mips/AsmParser/MipsAsmParser.cpp   |  2242 ++-
 .../Mips/Disassembler/MipsDisassembler.cpp    |   318 +-
 .../Mips/InstPrinter/MipsInstPrinter.cpp      |     2 +
 lib/Target/Mips/InstPrinter/MipsInstPrinter.h |     2 -
 lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp  |    13 +-
 lib/Target/Mips/MCTargetDesc/MipsABIInfo.h    |     7 +-
 .../Mips/MCTargetDesc/MipsAsmBackend.cpp      |    62 +-
 lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h |     1 +
 .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp |    17 +-
 .../Mips/MCTargetDesc/MipsELFStreamer.cpp     |     2 +-
 .../Mips/MCTargetDesc/MipsELFStreamer.h       |     3 +-
 lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h |     5 +-
 lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h  |    13 +-
 .../Mips/MCTargetDesc/MipsMCCodeEmitter.cpp   |    73 +-
 .../Mips/MCTargetDesc/MipsMCCodeEmitter.h     |    25 +-
 lib/Target/Mips/MCTargetDesc/MipsMCExpr.h     |     4 +-
 .../Mips/MCTargetDesc/MipsTargetStreamer.cpp  |    82 +-
 lib/Target/Mips/MicroMips32r6InstrFormats.td  |   579 +-
 lib/Target/Mips/MicroMips32r6InstrInfo.td     |   887 +-
 lib/Target/Mips/MicroMips64r6InstrFormats.td  |    86 +
 lib/Target/Mips/MicroMips64r6InstrInfo.td     |   119 +
 lib/Target/Mips/MicroMipsDSPInstrFormats.td   |   244 +
 lib/Target/Mips/MicroMipsDSPInstrInfo.td      |   528 +
 lib/Target/Mips/MicroMipsInstrFPU.td          |    28 +-
 lib/Target/Mips/MicroMipsInstrFormats.td      |    81 +-
 lib/Target/Mips/MicroMipsInstrInfo.td         |   174 +-
 lib/Target/Mips/Mips.td                       |    17 +-
 lib/Target/Mips/Mips16FrameLowering.cpp       |     8 +-
 lib/Target/Mips/Mips16HardFloat.cpp           |   200 +-
 lib/Target/Mips/Mips16ISelDAGToDAG.cpp        |     2 +-
 lib/Target/Mips/Mips16ISelLowering.cpp        |     9 +-
 lib/Target/Mips/Mips16InstrInfo.cpp           |     6 +-
 lib/Target/Mips/Mips16InstrInfo.td            |   120 +-
 lib/Target/Mips/Mips32r6InstrInfo.td          |   298 +-
 lib/Target/Mips/Mips64InstrInfo.td            |    91 +-
 lib/Target/Mips/Mips64r6InstrInfo.td          |    12 +-
 lib/Target/Mips/MipsAsmPrinter.cpp            |    13 +-
 lib/Target/Mips/MipsCCState.cpp               |    16 +-
 lib/Target/Mips/MipsCallingConv.td            |    25 +
 lib/Target/Mips/MipsConstantIslandPass.cpp    |    31 +-
 lib/Target/Mips/MipsDSPInstrFormats.td        |    38 +-
 lib/Target/Mips/MipsDSPInstrInfo.td           |   378 +-
 lib/Target/Mips/MipsDelaySlotFiller.cpp       |    34 +-
 lib/Target/Mips/MipsEVAInstrFormats.td        |    84 +
 lib/Target/Mips/MipsEVAInstrInfo.td           |   192 +
 lib/Target/Mips/MipsFastISel.cpp              |    59 +-
 lib/Target/Mips/MipsISelLowering.cpp          |   125 +-
 lib/Target/Mips/MipsISelLowering.h            |    37 +-
 lib/Target/Mips/MipsInstrFPU.td               |    42 +-
 lib/Target/Mips/MipsInstrFormats.td           |     8 +-
 lib/Target/Mips/MipsInstrInfo.cpp             |     4 +-
 lib/Target/Mips/MipsInstrInfo.td              |   508 +-
 lib/Target/Mips/MipsLongBranch.cpp            |     9 +-
 lib/Target/Mips/MipsMSAInstrFormats.td        |    24 +-
 lib/Target/Mips/MipsMSAInstrInfo.td           |   208 +-
 lib/Target/Mips/MipsMachineFunction.cpp       |    69 +-
 lib/Target/Mips/MipsMachineFunction.h         |    51 +-
 lib/Target/Mips/MipsRegisterInfo.cpp          |    59 +-
 lib/Target/Mips/MipsRegisterInfo.h            |     4 +-
 lib/Target/Mips/MipsSEFrameLowering.cpp       |   241 +-
 lib/Target/Mips/MipsSEFrameLowering.h         |    10 +-
 lib/Target/Mips/MipsSEISelDAGToDAG.cpp        |     2 +-
 lib/Target/Mips/MipsSEISelLowering.cpp        |    22 +-
 lib/Target/Mips/MipsSEInstrInfo.cpp           |    85 +-
 lib/Target/Mips/MipsSEInstrInfo.h             |     2 +
 lib/Target/Mips/MipsSERegisterInfo.cpp        |     6 +-
 lib/Target/Mips/MipsSchedule.td               |    68 +-
 lib/Target/Mips/MipsScheduleP5600.td          |   392 +
 lib/Target/Mips/MipsSubtarget.cpp             |     5 +-
 lib/Target/Mips/MipsSubtarget.h               |    21 +-
 lib/Target/Mips/MipsTargetMachine.cpp         |     2 +-
 lib/Target/Mips/MipsTargetObjectFile.cpp      |    24 +-
 lib/Target/Mips/MipsTargetObjectFile.h        |     4 +-
 lib/Target/Mips/MipsTargetStreamer.h          |    19 +-
 .../NVPTX/InstPrinter/NVPTXInstPrinter.h      |     2 -
 .../NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h       |     1 +
 lib/Target/NVPTX/NVPTX.h                      |    18 -
 lib/Target/NVPTX/NVPTXAsmPrinter.cpp          |   132 +-
 lib/Target/NVPTX/NVPTXAsmPrinter.h            |    15 +-
 .../NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp  |    10 +-
 lib/Target/NVPTX/NVPTXGenericToNVVM.cpp       |    18 +-
 lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp        |    59 +-
 lib/Target/NVPTX/NVPTXISelLowering.cpp        |   181 +-
 lib/Target/NVPTX/NVPTXISelLowering.h          |    17 +-
 lib/Target/NVPTX/NVPTXInstrInfo.cpp           |    64 +-
 lib/Target/NVPTX/NVPTXInstrInfo.h             |     1 -
 lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp     |   343 +-
 lib/Target/NVPTX/NVPTXLowerAlloca.cpp         |     2 +-
 lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp     |   104 +-
 lib/Target/NVPTX/NVPTXMCExpr.h                |     4 +-
 lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp    |     2 +-
 lib/Target/NVPTX/NVPTXSection.h               |     9 +-
 lib/Target/NVPTX/NVPTXTargetMachine.cpp       |    60 +-
 lib/Target/NVPTX/NVPTXTargetObjectFile.h      |     5 +-
 lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp |     4 +-
 lib/Target/NVPTX/NVPTXTargetTransformInfo.h   |     2 +-
 lib/Target/NVPTX/NVPTXUtilities.cpp           |   105 +-
 lib/Target/NVPTX/NVPTXUtilities.h             |    21 -
 lib/Target/NVPTX/NVPTXVector.td               |    58 +-
 lib/Target/NVPTX/NVVMReflect.cpp              |     4 +-
 lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp |    24 +-
 lib/Target/PowerPC/CMakeLists.txt             |     2 +
 .../PowerPC/Disassembler/PPCDisassembler.cpp  |     2 -
 .../PowerPC/InstPrinter/PPCInstPrinter.h      |     2 -
 .../MCTargetDesc/PPCELFObjectWriter.cpp       |    16 +-
 .../PowerPC/MCTargetDesc/PPCMCAsmInfo.h       |    21 +-
 lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h   |     4 +-
 .../MCTargetDesc/PPCMachObjectWriter.cpp      |     4 +-
 .../PowerPC/MCTargetDesc/PPCPredicates.h      |     8 +
 lib/Target/PowerPC/PPC.h                      |     3 +
 lib/Target/PowerPC/PPC.td                     |    14 +-
 lib/Target/PowerPC/PPCAsmPrinter.cpp          |   265 +-
 lib/Target/PowerPC/PPCBoolRetToInt.cpp        |   253 +
 lib/Target/PowerPC/PPCBranchSelector.cpp      |     2 +-
 lib/Target/PowerPC/PPCCTRLoops.cpp            |    26 +-
 lib/Target/PowerPC/PPCEarlyReturn.cpp         |    35 +-
 lib/Target/PowerPC/PPCFastISel.cpp            |    87 +-
 lib/Target/PowerPC/PPCFrameLowering.cpp       |   238 +-
 lib/Target/PowerPC/PPCFrameLowering.h         |    31 +
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp        |   140 +-
 lib/Target/PowerPC/PPCISelLowering.cpp        |   504 +-
 lib/Target/PowerPC/PPCISelLowering.h          |    22 +-
 lib/Target/PowerPC/PPCInstr64Bit.td           |     2 +
 lib/Target/PowerPC/PPCInstrInfo.cpp           |   131 +-
 lib/Target/PowerPC/PPCInstrInfo.h             |    53 +-
 lib/Target/PowerPC/PPCInstrInfo.td            |    11 +-
 lib/Target/PowerPC/PPCInstrQPX.td             |    20 +-
 lib/Target/PowerPC/PPCInstrVSX.td             |   612 +-
 lib/Target/PowerPC/PPCLoopDataPrefetch.cpp    |     9 +-
 lib/Target/PowerPC/PPCLoopPreIncPrep.cpp      |   131 +-
 lib/Target/PowerPC/PPCMCInstLower.cpp         |     6 +-
 lib/Target/PowerPC/PPCMIPeephole.cpp          |   230 +
 lib/Target/PowerPC/PPCMachineFunctionInfo.cpp |     4 +-
 lib/Target/PowerPC/PPCRegisterInfo.cpp        |   114 +-
 lib/Target/PowerPC/PPCRegisterInfo.h          |    15 +-
 lib/Target/PowerPC/PPCSubtarget.cpp           |    31 +
 lib/Target/PowerPC/PPCSubtarget.h             |    11 +
 lib/Target/PowerPC/PPCTargetMachine.cpp       |    52 +-
 lib/Target/PowerPC/PPCTargetObjectFile.cpp    |     4 +-
 lib/Target/PowerPC/PPCTargetTransformInfo.cpp |   137 +-
 lib/Target/PowerPC/PPCTargetTransformInfo.h   |    32 +-
 lib/Target/PowerPC/PPCVSXCopy.cpp             |    14 +-
 lib/Target/PowerPC/PPCVSXFMAMutate.cpp        |    53 +-
 lib/Target/PowerPC/PPCVSXSwapRemoval.cpp      |    89 +-
 lib/Target/Sparc/AsmParser/SparcAsmParser.cpp |   248 +-
 lib/Target/Sparc/DelaySlotFiller.cpp          |     2 +
 .../Sparc/Disassembler/SparcDisassembler.cpp  |    52 +
 .../Sparc/InstPrinter/SparcInstPrinter.h      |     3 -
 .../Sparc/MCTargetDesc/SparcMCAsmInfo.h       |     1 +
 lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h   |     4 +-
 lib/Target/Sparc/SparcAsmPrinter.cpp          |     8 +-
 lib/Target/Sparc/SparcCallingConv.td          |     9 +-
 lib/Target/Sparc/SparcFrameLowering.cpp       |   154 +-
 lib/Target/Sparc/SparcFrameLowering.h         |     8 +
 lib/Target/Sparc/SparcISelDAGToDAG.cpp        |   183 +
 lib/Target/Sparc/SparcISelLowering.cpp        |   295 +-
 lib/Target/Sparc/SparcISelLowering.h          |    18 +-
 lib/Target/Sparc/SparcInstrAliases.td         |     9 +
 lib/Target/Sparc/SparcInstrInfo.cpp           |    39 +-
 lib/Target/Sparc/SparcInstrInfo.td            |   146 +-
 lib/Target/Sparc/SparcRegisterInfo.cpp        |    64 +-
 lib/Target/Sparc/SparcRegisterInfo.h          |     4 +-
 lib/Target/Sparc/SparcRegisterInfo.td         |    54 +
 lib/Target/Sparc/SparcSubtarget.cpp           |     6 +-
 lib/Target/Sparc/SparcSubtarget.h             |     3 +-
 .../SystemZ/AsmParser/SystemZAsmParser.cpp    |    19 +-
 .../InstPrinter/SystemZInstPrinter.cpp        |     8 +-
 .../SystemZ/InstPrinter/SystemZInstPrinter.h  |     1 -
 .../MCTargetDesc/SystemZMCTargetDesc.cpp      |     2 +-
 lib/Target/SystemZ/README.txt                 |     6 -
 lib/Target/SystemZ/SystemZAsmPrinter.cpp      |     2 +-
 .../SystemZ/SystemZConstantPoolValue.cpp      |    15 -
 lib/Target/SystemZ/SystemZConstantPoolValue.h |     1 -
 lib/Target/SystemZ/SystemZElimCompare.cpp     |    86 +-
 lib/Target/SystemZ/SystemZFrameLowering.cpp   |    24 +-
 lib/Target/SystemZ/SystemZFrameLowering.h     |     3 +-
 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp    |    24 +-
 lib/Target/SystemZ/SystemZISelLowering.cpp    |   159 +-
 lib/Target/SystemZ/SystemZISelLowering.h      |    20 +-
 lib/Target/SystemZ/SystemZInstrBuilder.h      |     8 +-
 lib/Target/SystemZ/SystemZInstrFP.td          |    55 +-
 lib/Target/SystemZ/SystemZInstrFormats.td     |     1 +
 lib/Target/SystemZ/SystemZInstrInfo.cpp       |    77 +-
 lib/Target/SystemZ/SystemZInstrInfo.h         |     4 +-
 lib/Target/SystemZ/SystemZInstrInfo.td        |    34 +-
 .../SystemZ/SystemZMachineFunctionInfo.cpp    |     2 +-
 .../SystemZ/SystemZMachineFunctionInfo.h      |     2 +-
 lib/Target/SystemZ/SystemZRegisterInfo.cpp    |     4 +-
 lib/Target/SystemZ/SystemZRegisterInfo.td     |     3 +-
 lib/Target/SystemZ/SystemZShortenInst.cpp     |   128 +-
 lib/Target/SystemZ/SystemZTargetMachine.cpp   |    24 +-
 lib/Target/SystemZ/SystemZTargetMachine.h     |     3 +
 .../SystemZ/SystemZTargetTransformInfo.cpp    |    10 +-
 .../SystemZ/SystemZTargetTransformInfo.h      |    11 +-
 lib/Target/TargetLoweringObjectFile.cpp       |    55 +-
 lib/Target/TargetMachine.cpp                  |    20 +-
 lib/Target/TargetMachineC.cpp                 |    46 +-
 lib/Target/TargetRecip.cpp                    |    12 +-
 lib/Target/WebAssembly/CMakeLists.txt         |    21 +-
 .../InstPrinter/WebAssemblyInstPrinter.cpp    |    94 +-
 .../InstPrinter/WebAssemblyInstPrinter.h      |    18 +-
 lib/Target/WebAssembly/LLVMBuild.txt          |     2 +-
 .../WebAssembly/MCTargetDesc/CMakeLists.txt   |     3 +
 .../MCTargetDesc/WebAssemblyAsmBackend.cpp    |   103 +
 .../WebAssemblyELFObjectWriter.cpp            |    54 +
 .../MCTargetDesc/WebAssemblyMCAsmInfo.cpp     |     5 +-
 .../MCTargetDesc/WebAssemblyMCAsmInfo.h       |     4 +-
 .../MCTargetDesc/WebAssemblyMCCodeEmitter.cpp |   100 +
 .../MCTargetDesc/WebAssemblyMCTargetDesc.cpp  |    41 +-
 .../MCTargetDesc/WebAssemblyMCTargetDesc.h    |    16 +-
 lib/Target/WebAssembly/Makefile               |    10 +-
 lib/Target/WebAssembly/README.txt             |    68 +-
 lib/Target/WebAssembly/Relooper.cpp           |   984 +
 lib/Target/WebAssembly/Relooper.h             |   186 +
 lib/Target/WebAssembly/WebAssembly.h          |    14 +
 lib/Target/WebAssembly/WebAssembly.td         |    12 +-
 .../WebAssembly/WebAssemblyArgumentMove.cpp   |   110 +
 .../WebAssembly/WebAssemblyAsmPrinter.cpp     |   285 +
 .../WebAssembly/WebAssemblyCFGStackify.cpp    |   468 +
 .../WebAssembly/WebAssemblyFastISel.cpp       |    81 +
 .../WebAssembly/WebAssemblyFrameLowering.cpp  |   117 +-
 .../WebAssembly/WebAssemblyFrameLowering.h    |     3 -
 lib/Target/WebAssembly/WebAssemblyISD.def     |    25 +
 .../WebAssembly/WebAssemblyISelDAGToDAG.cpp   |    57 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   |   602 +-
 .../WebAssembly/WebAssemblyISelLowering.h     |    50 +-
 .../WebAssembly/WebAssemblyInstrCall.td       |    67 +-
 .../WebAssembly/WebAssemblyInstrControl.td    |    82 +
 .../WebAssembly/WebAssemblyInstrConv.td       |   125 +-
 .../WebAssembly/WebAssemblyInstrFloat.td      |   111 +-
 .../WebAssembly/WebAssemblyInstrFormats.td    |    78 +-
 .../WebAssembly/WebAssemblyInstrInfo.cpp      |   133 +-
 lib/Target/WebAssembly/WebAssemblyInstrInfo.h |    20 +-
 .../WebAssembly/WebAssemblyInstrInfo.td       |   115 +-
 .../WebAssembly/WebAssemblyInstrInteger.td    |   101 +-
 .../WebAssembly/WebAssemblyInstrMemory.td     |   529 +-
 .../WebAssembly/WebAssemblyLowerBrUnless.cpp  |   133 +
 .../WebAssembly/WebAssemblyMCInstLower.cpp    |   106 +
 .../WebAssembly/WebAssemblyMCInstLower.h      |    45 +
 .../WebAssemblyMachineFunctionInfo.cpp        |     6 +
 .../WebAssemblyMachineFunctionInfo.h          |    68 +-
 .../WebAssemblyOptimizeReturned.cpp           |    76 +
 lib/Target/WebAssembly/WebAssemblyPEI.cpp     |  1066 ++
 .../WebAssembly/WebAssemblyPeephole.cpp       |    86 +
 .../WebAssembly/WebAssemblyRegColoring.cpp    |   175 +
 .../WebAssembly/WebAssemblyRegNumbering.cpp   |   109 +
 .../WebAssembly/WebAssemblyRegStackify.cpp    |   265 +
 .../WebAssembly/WebAssemblyRegisterInfo.cpp   |    60 +-
 .../WebAssembly/WebAssemblyRegisterInfo.h     |     6 +-
 .../WebAssembly/WebAssemblyRegisterInfo.td    |    32 +-
 .../WebAssembly/WebAssemblyStoreResults.cpp   |   124 +
 .../WebAssembly/WebAssemblySubtarget.cpp      |     1 +
 lib/Target/WebAssembly/WebAssemblySubtarget.h |     8 +-
 .../WebAssembly/WebAssemblyTargetMachine.cpp  |    96 +-
 .../WebAssemblyTargetObjectFile.cpp           |    24 +
 .../WebAssembly/WebAssemblyTargetObjectFile.h |    43 +-
 .../WebAssemblyTargetTransformInfo.cpp        |     5 +-
 .../WebAssemblyTargetTransformInfo.h          |     4 +-
 .../WebAssembly/known_gcc_test_failures.txt   |   311 +
 lib/Target/X86/AsmParser/CMakeLists.txt       |     3 -
 lib/Target/X86/AsmParser/LLVMBuild.txt        |     2 +-
 lib/Target/X86/AsmParser/Makefile             |     2 +-
 .../X86/AsmParser/X86AsmInstrumentation.cpp   |   274 +-
 .../X86/AsmParser/X86AsmInstrumentation.h     |    10 +-
 lib/Target/X86/AsmParser/X86AsmParser.cpp     |   319 +-
 lib/Target/X86/AsmParser/X86AsmParserCommon.h |    19 +-
 lib/Target/X86/CMakeLists.txt                 |    11 +-
 .../X86/Disassembler/X86Disassembler.cpp      |     7 +
 .../Disassembler/X86DisassemblerDecoder.cpp   |    47 +-
 .../X86/Disassembler/X86DisassemblerDecoder.h |     2 -
 .../X86/InstPrinter/X86ATTInstPrinter.cpp     |     1 +
 .../X86/InstPrinter/X86ATTInstPrinter.h       |     3 -
 .../X86/InstPrinter/X86InstComments.cpp       |   735 +-
 .../X86/InstPrinter/X86IntelInstPrinter.h     |     2 -
 lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp |    83 +-
 lib/Target/X86/MCTargetDesc/X86BaseInfo.h     |    48 +-
 .../X86/MCTargetDesc/X86ELFObjectWriter.cpp   |    11 +-
 lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h    |    57 +-
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     |    11 +-
 .../X86/MCTargetDesc/X86MCTargetDesc.cpp      |   184 +-
 lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h |    13 +-
 .../X86/MCTargetDesc/X86MachObjectWriter.cpp  |    99 +-
 .../X86/MCTargetDesc/X86WinCOFFStreamer.cpp   |     4 +-
 lib/Target/X86/Utils/X86ShuffleDecode.cpp     |   197 +-
 lib/Target/X86/Utils/X86ShuffleDecode.h       |    27 +-
 lib/Target/X86/X86.h                          |    59 +-
 lib/Target/X86/X86.td                         |   657 +-
 lib/Target/X86/X86AsmPrinter.cpp              |    25 +-
 lib/Target/X86/X86AsmPrinter.h                |     2 -
 lib/Target/X86/X86CallFrameOptimization.cpp   |   109 +-
 lib/Target/X86/X86CallingConv.h               |    59 +
 lib/Target/X86/X86CallingConv.td              |    79 +-
 .../X86/X86CompilationCallback_Win64.asm      |    68 -
 lib/Target/X86/X86ExpandPseudo.cpp            |    21 +-
 lib/Target/X86/X86FastISel.cpp                |   168 +-
 lib/Target/X86/X86FixupLEAs.cpp               |    89 +-
 lib/Target/X86/X86FloatingPoint.cpp           |    76 +-
 lib/Target/X86/X86FrameLowering.cpp           |  1162 +-
 lib/Target/X86/X86FrameLowering.h             |    65 +-
 lib/Target/X86/X86ISelDAGToDAG.cpp            |   503 +-
 lib/Target/X86/X86ISelLowering.cpp            |  6418 +++++--
 lib/Target/X86/X86ISelLowering.h              |   124 +-
 lib/Target/X86/X86InstrAVX512.td              |  3690 ++--
 lib/Target/X86/X86InstrArithmetic.td          |    69 +-
 lib/Target/X86/X86InstrBuilder.h              |     7 +-
 lib/Target/X86/X86InstrCMovSetCC.td           |     2 +-
 lib/Target/X86/X86InstrCompiler.td            |   216 +-
 lib/Target/X86/X86InstrControl.td             |    13 +
 lib/Target/X86/X86InstrFMA.td                 |   252 +-
 lib/Target/X86/X86InstrFPStack.td             |   136 +-
 lib/Target/X86/X86InstrFragmentsSIMD.td       |   336 +-
 lib/Target/X86/X86InstrInfo.cpp               |  1516 +-
 lib/Target/X86/X86InstrInfo.h                 |   142 +-
 lib/Target/X86/X86InstrInfo.td                |   257 +-
 lib/Target/X86/X86InstrMMX.td                 |    24 +-
 lib/Target/X86/X86InstrSSE.td                 |   771 +-
 lib/Target/X86/X86InstrShiftRotate.td         |   140 +-
 lib/Target/X86/X86InstrSystem.td              |   101 +-
 lib/Target/X86/X86InstrXOP.td                 |   119 +-
 lib/Target/X86/X86IntrinsicsInfo.h            |   980 +-
 lib/Target/X86/X86MCInstLower.cpp             |   216 +-
 lib/Target/X86/X86MachineFunctionInfo.cpp     |     2 +-
 lib/Target/X86/X86MachineFunctionInfo.h       |     8 +-
 lib/Target/X86/X86OptimizeLEAs.cpp            |   326 +
 lib/Target/X86/X86PadShortFunction.cpp        |     5 +-
 lib/Target/X86/X86RegisterInfo.cpp            |   318 +-
 lib/Target/X86/X86RegisterInfo.h              |    25 +-
 lib/Target/X86/X86RegisterInfo.td             |    41 +-
 lib/Target/X86/X86SelectionDAGInfo.cpp        |    43 +-
 lib/Target/X86/X86Subtarget.cpp               |    46 +-
 lib/Target/X86/X86Subtarget.h                 |    57 +-
 lib/Target/X86/X86TargetMachine.cpp           |    17 +-
 lib/Target/X86/X86TargetObjectFile.cpp        |     8 +-
 lib/Target/X86/X86TargetObjectFile.h          |     2 +-
 lib/Target/X86/X86TargetTransformInfo.cpp     |   875 +-
 lib/Target/X86/X86TargetTransformInfo.h       |    56 +-
 lib/Target/X86/X86WinEHState.cpp              |   232 +-
 .../XCore/Disassembler/XCoreDisassembler.cpp  |     2 +-
 .../XCore/InstPrinter/XCoreInstPrinter.h      |     2 -
 lib/Target/XCore/XCoreAsmPrinter.cpp          |    18 +-
 lib/Target/XCore/XCoreFrameLowering.cpp       |    38 +-
 lib/Target/XCore/XCoreISelDAGToDAG.cpp        |     5 +-
 lib/Target/XCore/XCoreISelLowering.cpp        |    26 +-
 lib/Target/XCore/XCoreISelLowering.h          |    14 +
 lib/Target/XCore/XCoreInstrInfo.cpp           |    18 +-
 lib/Target/XCore/XCoreLowerThreadLocal.cpp    |     9 +-
 lib/Target/XCore/XCoreMachineFunctionInfo.cpp |     2 +-
 lib/Target/XCore/XCoreMachineFunctionInfo.h   |     2 +-
 lib/Target/XCore/XCoreTargetMachine.cpp       |     2 +-
 lib/Target/XCore/XCoreTargetObjectFile.cpp    |    14 +-
 lib/Target/XCore/XCoreTargetObjectFile.h      |     2 +-
 lib/Target/XCore/XCoreTargetTransformInfo.h   |     2 +-
 lib/Transforms/IPO/ArgumentPromotion.cpp      |   128 +-
 lib/Transforms/IPO/CMakeLists.txt             |     5 +
 lib/Transforms/IPO/ConstantMerge.cpp          |     4 +-
 lib/Transforms/IPO/CrossDSOCFI.cpp            |   166 +
 .../IPO/DeadArgumentElimination.cpp           |    83 +-
 lib/Transforms/IPO/ElimAvailExtern.cpp        |    54 +-
 lib/Transforms/IPO/ExtractGV.cpp              |     8 +-
 lib/Transforms/IPO/ForceFunctionAttrs.cpp     |   121 +
 lib/Transforms/IPO/FunctionAttrs.cpp          |  1889 +-
 lib/Transforms/IPO/FunctionImport.cpp         |   433 +
 lib/Transforms/IPO/GlobalDCE.cpp              |   114 +-
 lib/Transforms/IPO/GlobalOpt.cpp              |   633 +-
 lib/Transforms/IPO/IPO.cpp                    |    11 +-
 lib/Transforms/IPO/InferFunctionAttrs.cpp     |   937 +
 lib/Transforms/IPO/InlineAlways.cpp           |    27 +-
 lib/Transforms/IPO/InlineSimple.cpp           |    25 +-
 lib/Transforms/IPO/Inliner.cpp                |   150 +-
 lib/Transforms/IPO/Internalize.cpp            |   103 +-
 lib/Transforms/IPO/LLVMBuild.txt              |     2 +-
 lib/Transforms/IPO/LoopExtractor.cpp          |    20 +-
 lib/Transforms/IPO/LowerBitSets.cpp           |   593 +-
 lib/Transforms/IPO/MergeFunctions.cpp         |   560 +-
 lib/Transforms/IPO/PartialInlining.cpp        |    18 +-
 lib/Transforms/IPO/PassManagerBuilder.cpp     |   125 +-
 lib/Transforms/IPO/PruneEH.cpp                |    31 +-
 .../{Scalar => IPO}/SampleProfile.cpp         |   736 +-
 lib/Transforms/IPO/StripDeadPrototypes.cpp    |    64 +-
 lib/Transforms/IPO/StripSymbols.cpp           |    12 +-
 .../InstCombine/InstCombineAddSub.cpp         |    58 +-
 .../InstCombine/InstCombineAndOrXor.cpp       |   443 +-
 .../InstCombine/InstCombineCalls.cpp          |   934 +-
 .../InstCombine/InstCombineCasts.cpp          |   353 +-
 .../InstCombine/InstCombineCompares.cpp       |   242 +-
 .../InstCombine/InstCombineInternal.h         |    12 +-
 .../InstCombineLoadStoreAlloca.cpp            |   136 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |    26 +-
 lib/Transforms/InstCombine/InstCombinePHI.cpp |   113 +-
 .../InstCombine/InstCombineSelect.cpp         |    78 +-
 .../InstCombine/InstCombineShifts.cpp         |     4 +-
 .../InstCombineSimplifyDemanded.cpp           |    90 +-
 .../InstCombine/InstCombineVectorOps.cpp      |   121 +-
 .../InstCombine/InstructionCombining.cpp      |   346 +-
 .../Instrumentation/AddressSanitizer.cpp      |   253 +-
 .../Instrumentation/BoundsChecking.cpp        |     2 +-
 lib/Transforms/Instrumentation/CFGMST.h       |   217 +
 lib/Transforms/Instrumentation/CMakeLists.txt |     1 +
 .../Instrumentation/DataFlowSanitizer.cpp     |    84 +-
 .../Instrumentation/GCOVProfiling.cpp         |    49 +-
 .../Instrumentation/InstrProfiling.cpp        |   287 +-
 .../Instrumentation/Instrumentation.cpp       |    39 +-
 lib/Transforms/Instrumentation/LLVMBuild.txt  |     2 +-
 .../Instrumentation/MemorySanitizer.cpp       |   434 +-
 .../Instrumentation/PGOInstrumentation.cpp    |   718 +
 lib/Transforms/Instrumentation/SafeStack.cpp  |   497 +-
 .../Instrumentation/SanitizerCoverage.cpp     |   117 +-
 .../Instrumentation/ThreadSanitizer.cpp       |    40 +-
 lib/Transforms/ObjCARC/CMakeLists.txt         |     2 -
 lib/Transforms/ObjCARC/DependencyAnalysis.cpp |     6 +-
 lib/Transforms/ObjCARC/ObjCARC.cpp            |    10 +-
 lib/Transforms/ObjCARC/ObjCARC.h              |   242 +-
 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp      |    13 +-
 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h |    74 -
 lib/Transforms/ObjCARC/ObjCARCContract.cpp    |    28 +-
 lib/Transforms/ObjCARC/ObjCARCOpts.cpp        |    77 +-
 lib/Transforms/ObjCARC/ProvenanceAnalysis.h   |     2 +-
 .../ObjCARC/ProvenanceAnalysisEvaluator.cpp   |     6 +-
 lib/Transforms/ObjCARC/PtrState.cpp           |     8 +-
 lib/Transforms/ObjCARC/PtrState.h             |     8 +-
 lib/Transforms/Scalar/ADCE.cpp                |    69 +-
 .../Scalar/AlignmentFromAssumptions.cpp       |    17 +-
 lib/Transforms/Scalar/BDCE.cpp                |   350 +-
 lib/Transforms/Scalar/CMakeLists.txt          |     2 +-
 lib/Transforms/Scalar/ConstantHoisting.cpp    |    10 +-
 .../Scalar/CorrelatedValuePropagation.cpp     |   120 +-
 lib/Transforms/Scalar/DCE.cpp                 |    78 +-
 .../Scalar/DeadStoreElimination.cpp           |   278 +-
 lib/Transforms/Scalar/EarlyCSE.cpp            |   269 +-
 lib/Transforms/Scalar/FlattenCFGPass.cpp      |     8 +-
 lib/Transforms/Scalar/Float2Int.cpp           |    49 +-
 lib/Transforms/Scalar/GVN.cpp                 |   319 +-
 lib/Transforms/Scalar/IndVarSimplify.cpp      |   712 +-
 .../Scalar/InductiveRangeCheckElimination.cpp |    24 +-
 lib/Transforms/Scalar/JumpThreading.cpp       |   263 +-
 lib/Transforms/Scalar/LICM.cpp                |    65 +-
 lib/Transforms/Scalar/LLVMBuild.txt           |     2 +-
 lib/Transforms/Scalar/LoadCombine.cpp         |    12 +-
 lib/Transforms/Scalar/LoopDeletion.cpp        |    19 +-
 lib/Transforms/Scalar/LoopDistribute.cpp      |   113 +-
 lib/Transforms/Scalar/LoopIdiomRecognize.cpp  |  1391 +-
 lib/Transforms/Scalar/LoopInstSimplify.cpp    |     4 +-
 lib/Transforms/Scalar/LoopInterchange.cpp     |    79 +-
 lib/Transforms/Scalar/LoopLoadElimination.cpp |   566 +
 lib/Transforms/Scalar/LoopRerollPass.cpp      |   141 +-
 lib/Transforms/Scalar/LoopRotation.cpp        |   447 +-
 lib/Transforms/Scalar/LoopStrengthReduce.cpp  |   828 +-
 lib/Transforms/Scalar/LoopUnrollPass.cpp      |   278 +-
 lib/Transforms/Scalar/LoopUnswitch.cpp        |   455 +-
 lib/Transforms/Scalar/LowerAtomic.cpp         |     6 +-
 .../Scalar/LowerExpectIntrinsic.cpp           |     2 +-
 lib/Transforms/Scalar/MemCpyOptimizer.cpp     |   174 +-
 .../Scalar/MergedLoadStoreMotion.cpp          |    51 +-
 lib/Transforms/Scalar/NaryReassociate.cpp     |   199 +-
 .../Scalar/PartiallyInlineLibCalls.cpp        |     2 +-
 lib/Transforms/Scalar/PlaceSafepoints.cpp     |   104 +-
 lib/Transforms/Scalar/Reassociate.cpp         |   109 +-
 lib/Transforms/Scalar/Reg2Mem.cpp             |     9 +-
 .../Scalar/RewriteStatepointsForGC.cpp        |  1814 +-
 lib/Transforms/Scalar/SCCP.cpp                |   166 +-
 lib/Transforms/Scalar/SROA.cpp                |   991 +-
 lib/Transforms/Scalar/Scalar.cpp              |    15 +-
 .../Scalar/ScalarReplAggregates.cpp           |     7 +-
 lib/Transforms/Scalar/Scalarizer.cpp          |    32 +-
 .../Scalar/SeparateConstOffsetFromGEP.cpp     |   251 +-
 lib/Transforms/Scalar/SimplifyCFGPass.cpp     |    11 +-
 lib/Transforms/Scalar/Sink.cpp                |    25 +-
 .../Scalar/SpeculativeExecution.cpp           |     2 +-
 .../Scalar/StraightLineStrengthReduce.cpp     |     9 +-
 lib/Transforms/Scalar/StructurizeCFG.cpp      |    12 +-
 .../Scalar/TailRecursionElimination.cpp       |    42 +-
 lib/Transforms/Utils/ASanStackFrameLayout.cpp |     2 +-
 lib/Transforms/Utils/AddDiscriminators.cpp    |   147 +-
 lib/Transforms/Utils/BasicBlockUtils.cpp      |    54 +-
 lib/Transforms/Utils/BreakCriticalEdges.cpp   |    19 +-
 lib/Transforms/Utils/BuildLibCalls.cpp        |    28 +-
 lib/Transforms/Utils/BypassSlowDivision.cpp   |     6 +-
 lib/Transforms/Utils/CMakeLists.txt           |     1 +
 lib/Transforms/Utils/CloneFunction.cpp        |   122 +-
 lib/Transforms/Utils/CloneModule.cpp          |    89 +-
 lib/Transforms/Utils/CodeExtractor.cpp        |    36 +-
 lib/Transforms/Utils/CtorUtils.cpp            |     2 +-
 lib/Transforms/Utils/DemoteRegToStack.cpp     |    21 +-
 lib/Transforms/Utils/FlattenCFG.cpp           |    34 +-
 lib/Transforms/Utils/GlobalStatus.cpp         |     4 +
 lib/Transforms/Utils/InlineFunction.cpp       |   431 +-
 lib/Transforms/Utils/IntegerDivision.cpp      |    58 +-
 lib/Transforms/Utils/LCSSA.cpp                |    76 +-
 lib/Transforms/Utils/LLVMBuild.txt            |     2 +-
 lib/Transforms/Utils/Local.cpp                |   270 +-
 lib/Transforms/Utils/LoopSimplify.cpp         |   118 +-
 lib/Transforms/Utils/LoopUnroll.cpp           |    99 +-
 lib/Transforms/Utils/LoopUnrollRuntime.cpp    |    36 +-
 lib/Transforms/Utils/LoopUtils.cpp            |   276 +-
 lib/Transforms/Utils/LoopVersioning.cpp       |    79 +-
 lib/Transforms/Utils/LowerInvoke.cpp          |     2 +-
 lib/Transforms/Utils/LowerSwitch.cpp          |    93 +-
 lib/Transforms/Utils/Mem2Reg.cpp              |     3 +
 lib/Transforms/Utils/MetaRenamer.cpp          |    69 +-
 lib/Transforms/Utils/ModuleUtils.cpp          |    14 +-
 .../Utils/PromoteMemoryToRegister.cpp         |    57 +-
 lib/Transforms/Utils/SimplifyCFG.cpp          |   706 +-
 lib/Transforms/Utils/SimplifyIndVar.cpp       |   130 +-
 lib/Transforms/Utils/SimplifyInstructions.cpp |     4 +-
 lib/Transforms/Utils/SimplifyLibCalls.cpp     |   445 +-
 lib/Transforms/Utils/SplitModule.cpp          |    85 +
 lib/Transforms/Utils/SymbolRewriter.cpp       |     1 -
 .../Utils/UnifyFunctionExitNodes.cpp          |    10 +-
 lib/Transforms/Utils/ValueMapper.cpp          |   254 +-
 lib/Transforms/Vectorize/BBVectorize.cpp      |   191 +-
 lib/Transforms/Vectorize/LoopVectorize.cpp    |  2595 +--
 lib/Transforms/Vectorize/SLPVectorizer.cpp    |   489 +-
 llvm.spec.in                                  |     1 +
 projects/CMakeLists.txt                       |     3 +-
 test/Analysis/BasicAA/2007-11-05-SizeCrash.ll |     2 +-
 .../BasicAA/2007-12-08-OutOfBoundsCrash.ll    |     2 +-
 test/Analysis/BasicAA/bug.23540.ll            |    17 +
 test/Analysis/BasicAA/bug.23626.ll            |    31 +
 test/Analysis/BasicAA/cs-cs.ll                |    41 +-
 .../BasicAA/full-store-partial-alias.ll       |     4 +-
 test/Analysis/BasicAA/intrinsics.ll           |    27 +-
 test/Analysis/BasicAA/modref.ll               |    37 +
 test/Analysis/BasicAA/noalias-bugs.ll         |     2 +-
 test/Analysis/BasicAA/phi-aa.ll               |     1 +
 test/Analysis/BasicAA/phi-loop.ll             |    75 +
 test/Analysis/BasicAA/q.bad.ll                |   180 +
 test/Analysis/BasicAA/sequential-gep.ll       |    54 +
 test/Analysis/BasicAA/zext.ll                 |   231 +
 test/Analysis/BlockFrequencyInfo/bad_input.ll |     4 +-
 test/Analysis/BlockFrequencyInfo/basic.ll     |     6 +-
 .../irreducible_loop_crash.ll                 |   155 +
 .../loops_with_profile_info.ll                |     6 +-
 test/Analysis/BranchProbabilityInfo/basic.ll  |    68 +-
 test/Analysis/BranchProbabilityInfo/loop.ll   |   152 +-
 .../BranchProbabilityInfo/noreturn.ll         |    67 +-
 .../Analysis/BranchProbabilityInfo/pr18705.ll |     4 +-
 .../Analysis/BranchProbabilityInfo/pr22718.ll |     4 +-
 .../CFLAliasAnalysis/arguments-globals.ll     |     2 +-
 .../CFLAliasAnalysis/basic-interproc.ll       |     2 +-
 .../Analysis/CFLAliasAnalysis/branch-alias.ll |     2 +-
 .../CFLAliasAnalysis/const-expr-gep.ll        |     2 +-
 .../full-store-partial-alias.ll               |     4 +-
 .../CFLAliasAnalysis/gep-signed-arithmetic.ll |     2 +-
 .../CFLAliasAnalysis/multilevel-combine.ll    |     2 +-
 .../CFLAliasAnalysis/must-and-partial.ll      |     2 +-
 .../CFLAliasAnalysis/opaque-call-alias.ll     |    20 +
 test/Analysis/CFLAliasAnalysis/va.ll          |     2 +-
 .../Analysis/CallGraph/non-leaf-intrinsics.ll |     4 +-
 test/Analysis/CostModel/AArch64/select.ll     |    12 +-
 test/Analysis/CostModel/AMDGPU/br.ll          |    45 +
 .../CostModel/AMDGPU/extractelement.ll        |   110 +
 test/Analysis/CostModel/AMDGPU/lit.local.cfg  |     2 +
 test/Analysis/CostModel/ARM/cast.ll           |   507 +-
 test/Analysis/CostModel/ARM/gep.ll            |    88 +-
 test/Analysis/CostModel/ARM/select.ll         |     6 +-
 test/Analysis/CostModel/PowerPC/load_store.ll |     4 +-
 .../CostModel/PowerPC/unal-vec-ldst.ll        |   404 +
 test/Analysis/CostModel/X86/arith.ll          |     4 +-
 test/Analysis/CostModel/X86/cast.ll           |    38 +-
 .../CostModel/X86/masked-intrinsic-cost.ll    |   215 +-
 test/Analysis/CostModel/X86/reduction.ll      |     2 +-
 test/Analysis/CostModel/X86/sitofp.ll         |   346 +-
 test/Analysis/CostModel/X86/sse-itoi.ll       |   353 +
 test/Analysis/CostModel/X86/testshiftashr.ll  |    32 +-
 test/Analysis/CostModel/X86/testshiftlshr.ll  |    16 +-
 test/Analysis/CostModel/X86/testshiftshl.ll   |    16 +-
 test/Analysis/CostModel/X86/uitofp.ll         |   418 +-
 test/Analysis/CostModel/X86/vector_gep.ll     |    17 +
 .../CostModel/X86/vshift-ashr-cost.ll         |   392 +
 test/Analysis/CostModel/X86/vshift-cost.ll    |   167 -
 .../CostModel/X86/vshift-lshr-cost.ll         |   400 +
 .../Analysis/CostModel/X86/vshift-shl-cost.ll |   580 +
 test/Analysis/CostModel/no_info.ll            |    22 +-
 test/Analysis/Delinearization/a.ll            |     2 +-
 .../multidim_ivs_and_integer_offsets_3d.ll    |     2 +-
 ...multidim_ivs_and_integer_offsets_nts_3d.ll |     2 +-
 ...multidim_ivs_and_parameteric_offsets_3d.ll |     2 +-
 .../parameter_addrec_product.ll               |    56 +
 test/Analysis/DemandedBits/basic.ll           |    34 +
 test/Analysis/DependenceAnalysis/GCD.ll       |    14 +-
 .../DependenceAnalysis/NonAffineExpr.ll       |    36 +
 test/Analysis/DependenceAnalysis/PR21585.ll   |     2 +-
 .../DivergenceAnalysis/AMDGPU/kernel-args.ll  |    16 +
 .../DivergenceAnalysis/AMDGPU/lit.local.cfg   |     2 +
 .../DivergenceAnalysis/NVPTX/diverge.ll       |    23 +-
 .../GlobalsModRef/2008-09-03-ReadGlobals.ll   |     2 +-
 test/Analysis/GlobalsModRef/aliastest.ll      |     5 +-
 .../GlobalsModRef/argmemonly-escape.ll        |    47 +
 test/Analysis/GlobalsModRef/atomic-instrs.ll  |    37 +
 .../GlobalsModRef/chaining-analysis.ll        |     2 +-
 .../Analysis/GlobalsModRef/indirect-global.ll |     5 +-
 test/Analysis/GlobalsModRef/memset-escape.ll  |    65 +
 test/Analysis/GlobalsModRef/modreftest.ll     |    22 +-
 test/Analysis/GlobalsModRef/nocapture.ll      |    57 +
 .../GlobalsModRef/nonescaping-noalias.ll      |   116 +
 test/Analysis/GlobalsModRef/pr12351.ll        |     2 +-
 test/Analysis/GlobalsModRef/pr25309.ll        |    27 +
 test/Analysis/GlobalsModRef/purecse.ll        |     2 +-
 .../GlobalsModRef/weak-interposition.ll       |    24 +
 .../LazyCallGraph/non-leaf-intrinsics.ll      |     4 +-
 test/Analysis/Lint/cppeh-catch-intrinsics.ll  |   278 -
 .../forward-loop-carried.ll                   |    44 +
 .../forward-loop-independent.ll               |    64 +
 test/Analysis/LoopAccessAnalysis/nullptr.ll   |    38 +
 .../LoopAccessAnalysis/number-of-memchecks.ll |    51 +-
 .../pointer-with-unknown-bounds.ll            |     4 +-
 .../resort-to-memchecks-only.ll               |     4 +-
 .../reverse-memcheck-bounds.ll                |    89 +
 .../LoopAccessAnalysis/safe-no-checks.ll      |     8 +
 .../stride-access-dependence.ll               |    28 +-
 .../underlying-objects-2.ll                   |     2 +-
 .../unsafe-and-rt-checks.ll                   |     2 +-
 .../memdep-block-scan-limit.ll                |    15 +
 .../ScalarEvolution/avoid-assume-hang.ll      |   139 +
 .../ScalarEvolution/constant_condition.ll     |    51 +
 .../ScalarEvolution/flags-from-poison.ll      |   592 +
 .../ScalarEvolution/infer-prestart-no-wrap.ll |    10 +-
 .../Analysis/ScalarEvolution/min-max-exprs.ll |     2 +-
 .../ScalarEvolution/no-wrap-add-exprs.ll      |   122 +
 test/Analysis/ScalarEvolution/non-IV-phi.ll   |    59 +
 test/Analysis/ScalarEvolution/pr24757.ll      |    35 +
 test/Analysis/ScalarEvolution/pr25369.ll      |    78 +
 test/Analysis/ScalarEvolution/scev-aa.ll      |     2 +-
 test/Analysis/ScalarEvolution/shift-op.ll     |   164 +
 .../ScalarEvolution/smax-br-phi-idioms.ll     |   128 +
 test/Analysis/ScalarEvolution/trip-count.ll   |     5 +-
 test/Analysis/ScalarEvolution/zext-wrap.ll    |     2 +-
 .../TypeBasedAliasAnalysis/functionattrs.ll   |     8 +-
 .../TypeBasedAliasAnalysis/intrinsics.ll      |    17 +-
 test/Analysis/TypeBasedAliasAnalysis/licm.ll  |     2 +-
 .../TypeBasedAliasAnalysis/precedence.ll      |     8 +-
 .../ValueTracking/known-bits-from-range-md.ll |    34 +
 .../Analysis/ValueTracking/known-non-equal.ll |    21 +
 .../ValueTracking/knownnonzero-shift.ll       |    13 +
 .../Analysis/ValueTracking/knownzero-shift.ll |    14 +
 .../ValueTracking/memory-dereferenceable.ll   |   117 +-
 test/Analysis/ValueTracking/monotonic-phi.ll  |    49 +
 test/Analysis/ValueTracking/pr24866.ll        |    44 +
 test/Assembler/2007-09-10-AliasFwdRef.ll      |     2 +-
 ...-02-05-FunctionLocalMetadataBecomesNull.ll |     8 +-
 test/Assembler/ConstantExprFoldCast.ll        |     4 +
 test/Assembler/ConstantExprNoFold.ll          |     8 +-
 test/Assembler/addrspacecast-alias.ll         |     4 +-
 test/Assembler/alias-redefinition.ll          |     6 +-
 test/Assembler/alias-use-list-order.ll        |     6 +-
 test/Assembler/anon-functions.ll              |     4 +-
 test/Assembler/debug-info.ll                  |    19 +-
 test/Assembler/dicompileunit.ll               |    31 +-
 test/Assembler/diimportedentity.ll            |     4 +-
 test/Assembler/dilexicalblock.ll              |     2 +-
 test/Assembler/dilocalvariable-arg-large.ll   |     4 +-
 test/Assembler/dilocalvariable.ll             |    16 +-
 test/Assembler/dilocation.ll                  |     4 +-
 test/Assembler/disubprogram.ll                |    31 +-
 test/Assembler/drop-debug-info.ll             |     6 +-
 test/Assembler/global-addrspace-forwardref.ll |     9 +
 .../Assembler/incorrect-tdep-attrs-parsing.ll |     6 +
 test/Assembler/internal-hidden-alias.ll       |     2 +-
 test/Assembler/internal-protected-alias.ll    |     2 +-
 .../invalid-alias-mismatched-explicit-type.ll |     4 +
 .../invalid-dicompileunit-language-bad.ll     |     6 +-
 ...invalid-dicompileunit-language-overflow.ll |    10 +-
 .../invalid-dicompileunit-missing-language.ll |     4 +-
 .../invalid-dicompileunit-null-file.ll        |     4 +-
 .../invalid-dicompileunit-uniqued.ll          |     4 +
 .../invalid-dilocalvariable-arg-large.ll      |     6 +-
 .../invalid-dilocalvariable-arg-negative.ll   |     7 +-
 .../invalid-dilocalvariable-missing-scope.ll  |     4 +-
 .../invalid-dilocalvariable-missing-tag.ll    |     4 -
 ...invalid-disubprogram-uniqued-definition.ll |     4 +
 test/Assembler/invalid-fp80hex.ll             |     6 +
 test/Assembler/invalid-fwdref2.ll             |     2 +-
 test/Assembler/invalid-inline-constraint.ll   |     7 +
 test/Assembler/invalid-untyped-metadata.ll    |     6 +
 ...invalid-uselistorder-indexes-duplicated.ll |     6 +-
 .../invalid-uselistorder-indexes-one.ll       |     2 +-
 .../invalid-uselistorder-indexes-ordered.ll   |     6 +-
 .../invalid-uselistorder-indexes-range.ll     |     6 +-
 .../invalid-uselistorder-indexes-toofew.ll    |     6 +-
 .../invalid-uselistorder-indexes-toomany.ll   |     4 +-
 test/Assembler/metadata.ll                    |     2 +-
 test/Assembler/private-hidden-alias.ll        |     2 +-
 test/Assembler/private-protected-alias.ll     |     2 +-
 test/Assembler/token.ll                       |    11 +
 test/Assembler/unnamed-alias.ll               |     8 +-
 test/Assembler/uselistorder.ll                |     2 +-
 test/Bindings/Go/go.test                      |     2 +-
 test/Bindings/Go/lit.local.cfg                |     3 +
 test/Bindings/OCaml/analysis.ml               |     4 +-
 test/Bindings/OCaml/bitreader.ml              |     4 +-
 test/Bindings/OCaml/bitwriter.ml              |     4 +-
 test/Bindings/OCaml/core.ml                   |     4 +-
 test/Bindings/OCaml/executionengine.ml        |     4 +-
 test/Bindings/OCaml/ext_exc.ml                |     4 +-
 test/Bindings/OCaml/ipo.ml                    |     4 +-
 test/Bindings/OCaml/irreader.ml               |     4 +-
 test/Bindings/OCaml/linker.ml                 |    14 +-
 test/Bindings/OCaml/passmgr_builder.ml        |     4 +-
 test/Bindings/OCaml/scalar_opts.ml            |     4 +-
 test/Bindings/OCaml/target.ml                 |     4 +-
 test/Bindings/OCaml/transform_utils.ml        |     4 +-
 test/Bindings/OCaml/vectorize.ml              |     4 +-
 test/Bindings/llvm-c/Inputs/invalid.ll.bc     |   Bin 332 -> 688 bytes
 test/Bindings/llvm-c/functions.ll             |    11 +-
 test/Bindings/llvm-c/invalid-bitcode.test     |     9 +-
 test/Bitcode/DICompileUnit-no-DWOId.ll        |     2 +-
 test/Bitcode/DILocalVariable-explicit-tags.ll |    16 +
 .../DILocalVariable-explicit-tags.ll.bc       |   Bin 0 -> 500 bytes
 .../DISubprogram-distinct-definitions.ll      |    11 +
 .../DISubprogram-distinct-definitions.ll.bc   |   Bin 0 -> 512 bytes
 test/Bitcode/Inputs/invalid-abbrev.bc         |   Bin 129 -> 132 bytes
 test/Bitcode/Inputs/invalid-cast.bc           |   Bin 0 -> 1236 bytes
 .../Inputs/invalid-name-with-0-byte.bc        |   Bin 0 -> 1265 bytes
 .../Inputs/invalid-no-function-block.bc       |   Bin 0 -> 548 bytes
 test/Bitcode/anon-functions.ll                |    18 +
 test/Bitcode/attributes.ll                    |    32 +-
 test/Bitcode/compatibility-3.6.ll             |  1207 ++
 test/Bitcode/compatibility-3.6.ll.bc          |   Bin 0 -> 10192 bytes
 test/Bitcode/compatibility-3.7.ll             |  1280 ++
 test/Bitcode/compatibility-3.7.ll.bc          |   Bin 0 -> 11584 bytes
 test/Bitcode/compatibility.ll                 |  1560 ++
 test/Bitcode/debug-loc-again.ll               |     6 +-
 test/Bitcode/highLevelStructure.3.2.ll        |    20 +-
 test/Bitcode/identification.ll                |     6 +
 test/Bitcode/invalid.ll                       |     2 +-
 test/Bitcode/invalid.ll.bc                    |   Bin 332 -> 688 bytes
 test/Bitcode/invalid.test                     |    15 +
 .../local-linkage-default-visibility.3.4.ll   |    24 +-
 test/Bitcode/old-aliases.ll                   |    16 +-
 test/Bitcode/operand-bundles.ll               |   152 +
 test/Bitcode/select.ll                        |     8 +
 test/Bitcode/tailcall.ll                      |    16 +-
 test/Bitcode/thinlto-function-summary.ll      |    45 +
 test/Bitcode/upgrade-subprogram.ll            |    17 +
 test/Bitcode/upgrade-subprogram.ll.bc         |   Bin 0 -> 784 bytes
 test/Bitcode/use-list-order.ll                |    14 +-
 test/Bitcode/use-list-order2.ll               |    57 +
 test/Bitcode/vst-forward-declaration.ll       |    29 +
 test/BugPoint/metadata.ll                     |     6 +-
 test/BugPoint/named-md.ll                     |    39 +
 test/BugPoint/remove_arguments_test.ll        |     6 +-
 test/BugPoint/replace-funcs-with-null.ll      |     2 +-
 test/CMakeLists.txt                           |    33 +-
 ...aarch64-2014-08-11-MachineCombinerCrash.ll |    46 +-
 test/CodeGen/AArch64/aarch64-addv.ll          |    98 +
 .../AArch64/aarch64-deferred-spilling.ll      |   514 +
 .../AArch64/aarch64-dynamic-stack-layout.ll   |    24 +-
 .../AArch64/aarch64-interleaved-accesses.ll   |   147 +-
 test/CodeGen/AArch64/aarch64-loop-gep-opt.ll  |    50 +
 test/CodeGen/AArch64/aarch64-minmaxv.ll       |   511 +
 .../AArch64/aarch64-smax-constantfold.ll      |    12 +
 test/CodeGen/AArch64/addsub_ext.ll            |   146 +
 test/CodeGen/AArch64/alloca.ll                |     4 +-
 .../arm64-2011-03-17-AsmPrinterCrash.ll       |    14 +-
 test/CodeGen/AArch64/arm64-aapcs-be.ll        |     2 +-
 test/CodeGen/AArch64/arm64-aapcs.ll           |    21 +-
 test/CodeGen/AArch64/arm64-abi_align.ll       |     2 +-
 .../AArch64/arm64-addr-type-promotion.ll      |     9 +-
 .../arm64-alloca-frame-pointer-offset.ll      |     6 +-
 test/CodeGen/AArch64/arm64-arith.ll           |     3 +-
 test/CodeGen/AArch64/arm64-atomic-128.ll      |     7 +-
 test/CodeGen/AArch64/arm64-atomic.ll          |    70 +-
 test/CodeGen/AArch64/arm64-builtins-linux.ll  |    11 +
 test/CodeGen/AArch64/arm64-ccmp-heuristics.ll |     4 +-
 test/CodeGen/AArch64/arm64-ccmp.ll            |   166 +-
 .../AArch64/arm64-coalescing-MOVi32imm.ll     |    17 +
 test/CodeGen/AArch64/arm64-collect-loh.ll     |   604 +
 test/CodeGen/AArch64/arm64-fast-isel-br.ll    |    15 +-
 test/CodeGen/AArch64/arm64-fmax-safe.ll       |    53 +
 test/CodeGen/AArch64/arm64-fmax.ll            |    46 +-
 test/CodeGen/AArch64/arm64-fp128.ll           |    31 +-
 test/CodeGen/AArch64/arm64-hello.ll           |     4 +-
 test/CodeGen/AArch64/arm64-indexed-memory.ll  |    33 +
 .../AArch64/arm64-indexed-vector-ldst.ll      |    26 +-
 test/CodeGen/AArch64/arm64-inline-asm.ll      |     2 +-
 test/CodeGen/AArch64/arm64-join-reserved.ll   |     2 +-
 test/CodeGen/AArch64/arm64-large-frame.ll     |     2 +-
 test/CodeGen/AArch64/arm64-ld-from-st.ll      |   666 +
 test/CodeGen/AArch64/arm64-ldp.ll             |   188 +-
 test/CodeGen/AArch64/arm64-long-shift.ll      |    80 +-
 .../AArch64/arm64-misaligned-memcpy-inline.ll |     2 +-
 .../AArch64/arm64-narrow-ldst-merge.ll        |   406 +
 test/CodeGen/AArch64/arm64-neon-2velem.ll     |    55 +
 test/CodeGen/AArch64/arm64-neon-copy.ll       |    17 +-
 .../AArch64/arm64-patchpoint-webkit_jscc.ll   |     8 +-
 test/CodeGen/AArch64/arm64-platform-reg.ll    |     4 +-
 test/CodeGen/AArch64/arm64-popcnt.ll          |     8 +-
 test/CodeGen/AArch64/arm64-rounding.ll        |    62 +-
 test/CodeGen/AArch64/arm64-shrink-wrapping.ll |    95 +-
 test/CodeGen/AArch64/arm64-spill-lr.ll        |     6 +-
 test/CodeGen/AArch64/arm64-stackmap.ll        |     4 +-
 test/CodeGen/AArch64/arm64-stp.ll             |    34 +-
 test/CodeGen/AArch64/arm64-strict-align.ll    |     5 +-
 .../AArch64/arm64-tls-dynamic-together.ll     |    43 +-
 test/CodeGen/AArch64/arm64-trunc-store.ll     |     2 +-
 test/CodeGen/AArch64/arm64-vabs.ll            |    66 +
 test/CodeGen/AArch64/arm64-variadic-aapcs.ll  |     2 +-
 test/CodeGen/AArch64/arm64-vector-ext.ll      |    54 +-
 test/CodeGen/AArch64/arm64-vminmaxnm.ll       |    17 +-
 test/CodeGen/AArch64/arm64-xaluo.ll           |     4 +-
 test/CodeGen/AArch64/atomic-ops.ll            |    20 +-
 test/CodeGen/AArch64/bitcast-v2i8.ll          |     2 +-
 test/CodeGen/AArch64/bitfield-insert.ll       |    41 +
 test/CodeGen/AArch64/bitfield.ll              |    46 +-
 test/CodeGen/AArch64/bitreverse.ll            |    87 +
 .../AArch64/combine-comparisons-by-cse.ll     |    26 +
 test/CodeGen/AArch64/cpus.ll                  |     1 +
 test/CodeGen/AArch64/cxx-tlscc.ll             |    76 +
 test/CodeGen/AArch64/dag-combine-select.ll    |    47 +
 test/CodeGen/AArch64/divrem.ll                |    22 +
 test/CodeGen/AArch64/emutls.ll                |   116 +
 test/CodeGen/AArch64/emutls_generic.ll        |    59 +
 test/CodeGen/AArch64/eon.ll                   |    29 +
 test/CodeGen/AArch64/f16-instructions.ll      |   111 +-
 .../AArch64/fast-isel-branch-cond-mask.ll     |    19 +
 .../AArch64/fast-isel-branch-cond-split.ll    |    52 +-
 test/CodeGen/AArch64/fast-isel-cmp-vec.ll     |   100 +
 .../CodeGen/AArch64/fast-isel-folded-shift.ll |   125 +
 test/CodeGen/AArch64/fast-isel-logic-op.ll    |     2 +-
 test/CodeGen/AArch64/fastcc-reserved.ll       |     4 +-
 test/CodeGen/AArch64/fastcc.ll                |     8 +-
 test/CodeGen/AArch64/fcvt_combine.ll          |   154 +
 test/CodeGen/AArch64/fdiv_combine.ll          |   115 +
 test/CodeGen/AArch64/fold-constants.ll        |    19 +-
 test/CodeGen/AArch64/fp16-v4-instructions.ll  |    51 +-
 test/CodeGen/AArch64/fp16-v8-instructions.ll  |    63 +
 test/CodeGen/AArch64/free-zext.ll             |    59 +-
 test/CodeGen/AArch64/func-argpassing.ll       |     4 +-
 test/CodeGen/AArch64/func-calls.ll            |     4 +-
 test/CodeGen/AArch64/global-alignment.ll      |     2 +-
 test/CodeGen/AArch64/global-merge-1.ll        |    16 +-
 test/CodeGen/AArch64/global-merge-2.ll        |    34 +-
 test/CodeGen/AArch64/global-merge-3.ll        |    46 +-
 test/CodeGen/AArch64/global-merge-4.ll        |     6 +-
 .../AArch64/global-merge-group-by-use.ll      |     6 +-
 .../global-merge-ignore-single-use-minsize.ll |     2 +-
 .../AArch64/global-merge-ignore-single-use.ll |     2 +-
 test/CodeGen/AArch64/ldst-opt.ll              |   477 +-
 test/CodeGen/AArch64/merge-store.ll           |    30 +
 test/CodeGen/AArch64/misched-fusion.ll        |    34 +
 test/CodeGen/AArch64/mul-lohi.ll              |    29 +
 test/CodeGen/AArch64/nest-register.ll         |     2 +-
 test/CodeGen/AArch64/nontemporal.ll           |   339 +
 test/CodeGen/AArch64/pic-eh-stubs.ll          |     2 +-
 test/CodeGen/AArch64/readcyclecounter.ll      |    15 +
 test/CodeGen/AArch64/regress-tblgen-chains.ll |     4 +-
 test/CodeGen/AArch64/remat.ll                 |     1 +
 test/CodeGen/AArch64/rotate.ll                |    14 +
 test/CodeGen/AArch64/round-conv.ll            |   330 +
 test/CodeGen/AArch64/shrink-wrap.ll           |   184 +
 test/CodeGen/AArch64/stackmap-frame-setup.ll  |    20 +
 test/CodeGen/AArch64/tail-call.ll             |     6 +-
 .../CodeGen/AArch64/tailcall-explicit-sret.ll |     2 +-
 test/CodeGen/AArch64/tbi.ll                   |   102 +
 test/CodeGen/AArch64/vector-fcopysign.ll      |   178 +
 test/CodeGen/AArch64/xbfiz.ll                 |    30 +
 test/CodeGen/AMDGPU/add.ll                    |    14 +-
 test/CodeGen/AMDGPU/address-space.ll          |     6 +-
 test/CodeGen/AMDGPU/addrspacecast.ll          |    66 +
 test/CodeGen/AMDGPU/and.ll                    |   101 +-
 .../AMDGPU/annotate-kernel-features.ll        |   193 +
 test/CodeGen/AMDGPU/array-ptr-calc-i32.ll     |     8 +-
 test/CodeGen/AMDGPU/bitreverse.ll             |   115 +
 test/CodeGen/AMDGPU/calling-conventions.ll    |    20 +
 .../AMDGPU/cgp-addressing-modes-flat.ll       |    98 +
 test/CodeGen/AMDGPU/cgp-addressing-modes.ll   |   254 +-
 test/CodeGen/AMDGPU/ci-use-flat-for-global.ll |    15 +
 test/CodeGen/AMDGPU/ctpop64.ll                |    22 +-
 test/CodeGen/AMDGPU/cvt_f32_ubyte.ll          |    12 +-
 .../AMDGPU/drop-mem-operand-move-smrd.ll      |    52 +
 ...ds-negative-offset-addressing-mode-loop.ll |    10 +-
 test/CodeGen/AMDGPU/ds-sub-offset.ll          |   125 +
 test/CodeGen/AMDGPU/ds_read2.ll               |    10 +-
 test/CodeGen/AMDGPU/ds_read2_superreg.ll      |    89 +-
 test/CodeGen/AMDGPU/ds_read2st64.ll           |     8 +-
 test/CodeGen/AMDGPU/ds_write2.ll              |     9 +-
 test/CodeGen/AMDGPU/ds_write2st64.ll          |     4 +-
 test/CodeGen/AMDGPU/dynamic_stackalloc.ll     |    11 +
 test/CodeGen/AMDGPU/extract-vector-elt-i64.ll |    43 +
 test/CodeGen/AMDGPU/fadd64.ll                 |    52 +-
 test/CodeGen/AMDGPU/fceil64.ll                |    12 +-
 test/CodeGen/AMDGPU/fcmp.ll                   |     2 +-
 test/CodeGen/AMDGPU/flat-address-space.ll     |    77 +-
 test/CodeGen/AMDGPU/flat-scratch-reg.ll       |    36 +
 test/CodeGen/AMDGPU/fma-combine.ll            |   200 +
 test/CodeGen/AMDGPU/fmax_legacy.ll            |    40 +
 test/CodeGen/AMDGPU/fmin_legacy.ll            |    63 +
 .../AMDGPU/fmul-2-combine-multi-use.ll        |   102 +
 test/CodeGen/AMDGPU/fneg-fabs.ll              |    27 +-
 test/CodeGen/AMDGPU/ftrunc.f64.ll             |    12 +-
 test/CodeGen/AMDGPU/gep-address-space.ll      |    34 +-
 test/CodeGen/AMDGPU/global-constant.ll        |    27 +
 test/CodeGen/AMDGPU/global-extload-i32.ll     |   321 +-
 test/CodeGen/AMDGPU/global_atomics.ll         |    20 +-
 test/CodeGen/AMDGPU/half.ll                   |   256 +-
 test/CodeGen/AMDGPU/hsa-globals.ll            |   132 +
 test/CodeGen/AMDGPU/hsa-group-segment.ll      |    14 +
 test/CodeGen/AMDGPU/hsa.ll                    |    36 +-
 test/CodeGen/AMDGPU/image-attributes.ll       |   206 +
 test/CodeGen/AMDGPU/image-resource-id.ll      |   409 +
 test/CodeGen/AMDGPU/imm.ll                    |    24 +-
 test/CodeGen/AMDGPU/indirect-addressing-si.ll |    67 +-
 test/CodeGen/AMDGPU/indirect-private-64.ll    |    34 +-
 test/CodeGen/AMDGPU/inline-constraints.ll     |    23 +
 test/CodeGen/AMDGPU/insert_vector_elt.ll      |   103 +-
 test/CodeGen/AMDGPU/kernel-args.ll            |    26 +-
 test/CodeGen/AMDGPU/large-alloca-compute.ll   |    57 +
 test/CodeGen/AMDGPU/large-alloca-graphics.ll  |    47 +
 test/CodeGen/AMDGPU/large-alloca.ll           |    15 -
 test/CodeGen/AMDGPU/literals.ll               |     8 +-
 test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll        |     4 +-
 test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll    |     2 +-
 test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll       |    28 -
 test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll      |     6 +-
 test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll   |     1 -
 .../AMDGPU/llvm.AMDGPU.read.workdim.ll        |    37 +
 test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll      |     2 +-
 test/CodeGen/AMDGPU/llvm.SI.packf16.ll        |    29 +
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll      |    16 +
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll   |    14 +
 .../AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll  |    16 +
 .../AMDGPU/llvm.amdgcn.dispatch.ptr.ll        |    16 +
 test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll     |    30 +
 test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll      |    24 +
 .../AMDGPU/llvm.amdgcn.s.dcache.inv.ll        |    29 +
 .../AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll    |    29 +
 .../CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll |    27 +
 .../AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll     |    27 +
 test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll        |     2 +-
 test/CodeGen/AMDGPU/llvm.dbg.value.ll         |    12 +-
 test/CodeGen/AMDGPU/llvm.memcpy.ll            |    66 +-
 .../AMDGPU/llvm.r600.read.local.size.ll       |   184 +
 test/CodeGen/AMDGPU/llvm.round.f64.ll         |     5 +-
 test/CodeGen/AMDGPU/load.ll                   |    34 +-
 .../AMDGPU/local-memory-two-objects.ll        |     4 +-
 test/CodeGen/AMDGPU/local-memory.ll           |     4 +-
 test/CodeGen/AMDGPU/max.ll                    |   116 +-
 test/CodeGen/AMDGPU/merge-stores.ll           |   196 +-
 test/CodeGen/AMDGPU/min.ll                    |   171 +-
 .../move-addr64-rsrc-dead-subreg-writes.ll    |    36 +
 test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll |    52 +
 .../CodeGen/AMDGPU/no-hsa-graphics-shaders.ll |    18 +
 test/CodeGen/AMDGPU/no-shrink-extloads.ll     |    12 +
 test/CodeGen/AMDGPU/opencl-image-metadata.ll  |    24 +
 test/CodeGen/AMDGPU/operand-folding.ll        |     2 +-
 test/CodeGen/AMDGPU/or.ll                     |     2 +-
 ...partially-dead-super-register-immediate.ll |    28 +
 test/CodeGen/AMDGPU/private-memory.ll         |    12 +
 .../CodeGen/AMDGPU/register-count-comments.ll |     3 +-
 test/CodeGen/AMDGPU/reorder-stores.ll         |    58 +-
 test/CodeGen/AMDGPU/s_movk_i32.ll             |    18 +-
 test/CodeGen/AMDGPU/salu-to-valu.ll           |   416 +-
 test/CodeGen/AMDGPU/sampler-resource-id.ll    |    65 +
 .../schedule-vs-if-nested-loop-failure.ll     |     2 +-
 test/CodeGen/AMDGPU/scratch-buffer.ll         |     2 +-
 test/CodeGen/AMDGPU/select64.ll               |     8 +-
 test/CodeGen/AMDGPU/set-dx10.ll               |    48 +-
 test/CodeGen/AMDGPU/setcc-opt.ll              |    22 +-
 test/CodeGen/AMDGPU/sext-in-reg.ll            |    54 +-
 test/CodeGen/AMDGPU/shl.ll                    |    15 +-
 test/CodeGen/AMDGPU/shl_add_constant.ll       |     6 +-
 test/CodeGen/AMDGPU/shl_add_ptr.ll            |     2 +-
 ...si-instr-info-correct-implicit-operands.ll |    16 +
 test/CodeGen/AMDGPU/si-literal-folding.ll     |    17 +
 test/CodeGen/AMDGPU/si-sgpr-spill.ll          |    10 +
 .../AMDGPU/si-triv-disjoint-mem-access.ll     |     7 +-
 test/CodeGen/AMDGPU/sint_to_fp.f64.ll         |     6 +-
 test/CodeGen/AMDGPU/sminmax.ll                |   130 +
 test/CodeGen/AMDGPU/smrd.ll                   |    73 +-
 test/CodeGen/AMDGPU/split-scalar-i64-add.ll   |    42 +-
 .../AMDGPU/split-vector-memoperand-offsets.ll |   104 +
 test/CodeGen/AMDGPU/sra.ll                    |     8 +-
 test/CodeGen/AMDGPU/srl.ll                    |    13 +-
 test/CodeGen/AMDGPU/store-barrier.ll          |     4 +-
 test/CodeGen/AMDGPU/store.ll                  |    25 +-
 test/CodeGen/AMDGPU/store_typed.ll            |    24 +
 test/CodeGen/AMDGPU/sub.ll                    |    14 +-
 test/CodeGen/AMDGPU/trunc.ll                  |     8 +-
 test/CodeGen/AMDGPU/udivrem.ll                |   130 +-
 test/CodeGen/AMDGPU/uint_to_fp.f64.ll         |     6 +-
 test/CodeGen/AMDGPU/unsupported-cc.ll         |    32 +-
 .../CodeGen/AMDGPU/use-sgpr-multiple-times.ll |   167 +
 test/CodeGen/AMDGPU/valu-i1.ll                |    16 +-
 ...vgpr-spill-emergency-stack-slot-compute.ll |   585 +
 .../AMDGPU/vgpr-spill-emergency-stack-slot.ll |   494 +
 test/CodeGen/AMDGPU/vop-shrink.ll             |     4 +-
 test/CodeGen/AMDGPU/wait.ll                   |    59 +-
 test/CodeGen/AMDGPU/work-item-intrinsics.ll   |   263 +-
 test/CodeGen/AMDGPU/xor.ll                    |     2 +-
 test/CodeGen/AMDGPU/zero_extend.ll            |     3 +-
 test/CodeGen/ARM/2007-03-13-InstrSched.ll     |     2 +-
 test/CodeGen/ARM/2009-10-16-Scope.ll          |     6 +-
 .../ARM/2010-04-15-ScavengerDebugValue.ll     |     6 +-
 test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll |    24 +-
 test/CodeGen/ARM/2010-05-21-BuildVector.ll    |     4 +-
 .../CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll |     4 +-
 test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll   |     2 +-
 .../ARM/2010-06-25-Thumb2ITInvalidIterator.ll |    12 +-
 .../ARM/2010-06-29-PartialRedefFastAlloc.ll   |     4 +-
 test/CodeGen/ARM/2010-08-04-StackVariable.ll  |    24 +-
 .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll |    42 +-
 .../ARM/2011-06-29-MergeGlobalsAlign.ll       |     2 +-
 .../CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll |    42 +-
 .../CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll |     4 +-
 test/CodeGen/ARM/2011-10-26-memset-inline.ll  |     2 +-
 .../ARM/2012-01-24-RegSequenceLiveRange.ll    |    10 +-
 .../ARM/2012-05-10-PreferVMOVtoVDUP32.ll      |     4 +-
 .../ARM/2012-08-27-CopyPhysRegCrash.ll        |    14 +-
 test/CodeGen/ARM/2012-11-14-subs_carry.ll     |    10 +-
 test/CodeGen/ARM/2013-10-11-select-stalls.ll  |    13 +-
 .../2014-01-09-pseudo_expand_implicit_reg.ll  |     4 +-
 test/CodeGen/ARM/MachO-subtypes.ll            |    68 +
 test/CodeGen/ARM/Windows/division.ll          |    38 +
 ...oating-point-conversion.ll => libcalls.ll} |     3 +-
 test/CodeGen/ARM/Windows/no-eabi.ll           |    10 +
 test/CodeGen/ARM/Windows/no-frame-register.ll |    22 +
 test/CodeGen/ARM/Windows/overflow.ll          |    77 +
 test/CodeGen/ARM/adv-copy-opt.ll              |    14 +-
 test/CodeGen/ARM/aliases.ll                   |    30 +-
 test/CodeGen/ARM/align-sp-adjustment.ll       |    47 +
 test/CodeGen/ARM/apcs-vfp.ll                  |   153 +
 test/CodeGen/ARM/arm-eabi.ll                  |    63 +
 test/CodeGen/ARM/arm-interleaved-accesses.ll  |   190 +-
 test/CodeGen/ARM/arm-shrink-wrapping-linux.ll |   142 +
 test/CodeGen/ARM/arm-shrink-wrapping.ll       |   683 +
 test/CodeGen/ARM/atomic-64bit.ll              |     6 +
 test/CodeGen/ARM/atomic-cmp.ll                |     4 +-
 test/CodeGen/ARM/atomic-cmpxchg.ll            |    96 +-
 test/CodeGen/ARM/atomic-op.ll                 |    46 +-
 test/CodeGen/ARM/atomic-ops-v8.ll             |    38 +-
 test/CodeGen/ARM/avoid-cpsr-rmw.ll            |    16 +-
 test/CodeGen/ARM/bfi.ll                       |    95 +
 .../build-attributes-optimization-minsize.ll  |    18 +
 .../build-attributes-optimization-mixed.ll    |    23 +
 .../build-attributes-optimization-optnone.ll  |    18 +
 .../build-attributes-optimization-optsize.ll  |    18 +
 .../ARM/build-attributes-optimization.ll      |    23 +
 test/CodeGen/ARM/build-attributes.ll          |   142 +-
 test/CodeGen/ARM/call-tc.ll                   |     8 +-
 test/CodeGen/ARM/cfi-alignment.ll             |    48 +
 test/CodeGen/ARM/cmpxchg-idioms.ll            |     6 +-
 test/CodeGen/ARM/cmpxchg-weak.ll              |    56 +-
 test/CodeGen/ARM/coalesce-dbgvalue.ll         |    10 +-
 test/CodeGen/ARM/coalesce-subregs.ll          |    38 +-
 test/CodeGen/ARM/combine-vmovdrr.ll           |    72 +
 test/CodeGen/ARM/constants.ll                 |     6 +-
 test/CodeGen/ARM/dagcombine-concatvector.ll   |     4 +-
 test/CodeGen/ARM/debug-frame-vararg.ll        |    14 +-
 test/CodeGen/ARM/debug-frame.ll               |    28 +-
 test/CodeGen/ARM/debug-info-arg.ll            |    20 +-
 test/CodeGen/ARM/debug-info-blocks.ll         |    40 +-
 test/CodeGen/ARM/debug-info-branch-folding.ll |    32 +-
 test/CodeGen/ARM/debug-info-d16-reg.ll        |    38 +-
 test/CodeGen/ARM/debug-info-no-frame.ll       |     8 +-
 test/CodeGen/ARM/debug-info-qreg.ll           |    28 +-
 test/CodeGen/ARM/debug-info-s16-reg.ll        |    38 +-
 test/CodeGen/ARM/debug-info-sreg2.ll          |    10 +-
 test/CodeGen/ARM/debug-segmented-stacks.ll    |    14 +-
 test/CodeGen/ARM/debugtrap.ll                 |    17 +
 test/CodeGen/ARM/div.ll                       |    71 +-
 test/CodeGen/ARM/divmod-eabi.ll               |     4 +-
 test/CodeGen/ARM/eh-resume-darwin.ll          |     8 +-
 test/CodeGen/ARM/emutls.ll                    |   258 +
 test/CodeGen/ARM/emutls1.ll                   |    31 +
 test/CodeGen/ARM/emutls_generic.ll            |    61 +
 test/CodeGen/ARM/fast-isel-align.ll           |    22 +-
 test/CodeGen/ARM/fast-isel-ext.ll             |    35 -
 test/CodeGen/ARM/fast-isel-mvn.ll             |    10 +-
 test/CodeGen/ARM/fast-isel-pic.ll             |    23 +-
 test/CodeGen/ARM/fold-stack-adjust.ll         |    18 +-
 test/CodeGen/ARM/fp16-args.ll                 |    40 +
 test/CodeGen/ARM/fp16-promote.ll              |   471 +-
 test/CodeGen/ARM/fp16.ll                      |    62 +-
 test/CodeGen/ARM/fparith.ll                   |     4 +-
 test/CodeGen/ARM/gep-optimization.ll          |    77 +
 test/CodeGen/ARM/global-merge-1.ll            |     6 +-
 test/CodeGen/ARM/global-merge-external.ll     |    46 +
 test/CodeGen/ARM/globals.ll                   |     9 +-
 test/CodeGen/ARM/ifcvt-branch-weight-bug.ll   |    14 +-
 test/CodeGen/ARM/ifcvt-branch-weight.ll       |     2 +-
 test/CodeGen/ARM/ifcvt-iter-indbr.ll          |     6 +
 test/CodeGen/ARM/ifcvt4.ll                    |     6 +-
 test/CodeGen/ARM/ifcvt5.ll                    |     4 +-
 test/CodeGen/ARM/ifcvt6.ll                    |     2 +-
 test/CodeGen/ARM/ifcvt8.ll                    |     4 +-
 test/CodeGen/ARM/inlineasm-switch-mode.ll     |     4 +-
 .../ARM/ldm-stm-base-materialization.ll       |    93 +
 test/CodeGen/ARM/ldrd.ll                      |    58 +-
 test/CodeGen/ARM/legalize-unaligned-load.ll   |    35 +
 test/CodeGen/ARM/load-global.ll               |    12 +-
 test/CodeGen/ARM/load-store-flags.ll          |     4 +-
 test/CodeGen/ARM/load.ll                      |   571 +-
 test/CodeGen/ARM/machine-cse-cmp.ll           |     2 +-
 test/CodeGen/ARM/memcpy-inline.ll             |     2 +-
 test/CodeGen/ARM/memcpy-ldm-stm.ll            |    94 +
 test/CodeGen/ARM/memfunc.ll                   |   255 +-
 test/CodeGen/ARM/minmax.ll                    |   193 +
 test/CodeGen/ARM/neon_minmax.ll               |     1 +
 test/CodeGen/ARM/neon_spill.ll                |     6 +-
 test/CodeGen/ARM/neon_vabs.ll                 |    38 +
 test/CodeGen/ARM/neon_vshl_minint.ll          |    13 +
 test/CodeGen/ARM/out-of-registers.ll          |     8 +-
 test/CodeGen/ARM/pr25317.ll                   |    11 +
 test/CodeGen/ARM/pr25838.ll                   |    34 +
 test/CodeGen/ARM/rbit.ll                      |    11 +
 test/CodeGen/ARM/reg_sequence.ll              |    64 +-
 test/CodeGen/ARM/rotate.ll                    |    14 +
 test/CodeGen/ARM/sat-arith.ll                 |    63 +
 test/CodeGen/ARM/sched-it-debug-nodes.ll      |    88 -
 test/CodeGen/ARM/setjmp_longjmp.ll            |   113 +
 test/CodeGen/ARM/shifter_operand.ll           |   228 +-
 .../CodeGen/ARM/sjlj-prepare-critical-edge.ll |     2 +-
 .../ARM/sjljehprepare-lower-empty-struct.ll   |     1 +
 test/CodeGen/ARM/softfp-fabs-fneg.ll          |    41 +
 test/CodeGen/ARM/special-reg-mcore.ll         |     2 +-
 test/CodeGen/ARM/spill-q.ll                   |    28 +-
 test/CodeGen/ARM/ssat-lower.ll                |    11 +
 test/CodeGen/ARM/ssat-upper.ll                |    11 +
 test/CodeGen/ARM/subtarget-no-movt.ll         |    45 +
 test/CodeGen/ARM/tail-merge-branch-weight.ll  |     2 +-
 test/CodeGen/ARM/taildup-branch-weight.ll     |     4 +-
 test/CodeGen/ARM/test-sharedidx.ll            |    15 +-
 test/CodeGen/ARM/thumb-alignment.ll           |     2 +-
 test/CodeGen/ARM/thumb1-ldst-opt.ll           |    27 +
 test/CodeGen/ARM/thumb1_return_sequence.ll    |    70 +-
 test/CodeGen/ARM/thumb2-it-block.ll           |    24 +-
 test/CodeGen/ARM/thumb_indirect_calls.ll      |     5 +-
 test/CodeGen/ARM/tls-models.ll                |    74 +-
 test/CodeGen/ARM/tls3.ll                      |    29 +-
 test/CodeGen/ARM/unaligned_load_store.ll      |     4 +-
 test/CodeGen/ARM/unaligned_load_store_vfp.ll  |    98 +
 test/CodeGen/ARM/usat-lower.ll                |    11 +
 test/CodeGen/ARM/usat-upper.ll                |    11 +
 test/CodeGen/ARM/v7k-abi-align.ll             |   152 +
 test/CodeGen/ARM/v7k-libcalls.ll              |   154 +
 test/CodeGen/ARM/v7k-sincos.ll                |    16 +
 test/CodeGen/ARM/vcge.ll                      |     4 +-
 test/CodeGen/ARM/vcombine.ll                  |    64 +-
 test/CodeGen/ARM/vcvt_combine.ll              |   103 +-
 test/CodeGen/ARM/vdiv_combine.ll              |    17 +
 test/CodeGen/ARM/vdup.ll                      |    16 +
 test/CodeGen/ARM/vector-DAGCombine.ll         |     4 +-
 test/CodeGen/ARM/vector-load.ll               |     4 +-
 test/CodeGen/ARM/vector-store.ll              |     6 +-
 test/CodeGen/ARM/vext.ll                      |    34 +-
 test/CodeGen/ARM/vfp-reg-stride.ll            |    42 +
 test/CodeGen/ARM/vfp-regs-dwarf.ll            |     6 +-
 test/CodeGen/ARM/vld-vst-upgrade.ll           |   139 +
 test/CodeGen/ARM/vld1.ll                      |    52 +-
 test/CodeGen/ARM/vld2.ll                      |    40 +-
 test/CodeGen/ARM/vld3.ll                      |    42 +-
 test/CodeGen/ARM/vld4.ll                      |    42 +-
 test/CodeGen/ARM/vlddup.ll                    |    30 +-
 test/CodeGen/ARM/vldlane.ll                   |    92 +-
 test/CodeGen/ARM/vminmaxnm-safe.ll            |   396 +
 test/CodeGen/ARM/vminmaxnm.ll                 |   358 +-
 test/CodeGen/ARM/vmov.ll                      |     4 +-
 test/CodeGen/ARM/vmul.ll                      |    14 +-
 test/CodeGen/ARM/vpadd.ll                     |     2 +-
 test/CodeGen/ARM/vselect_imax.ll              |    26 +-
 test/CodeGen/ARM/vst1.ll                      |    48 +-
 test/CodeGen/ARM/vst2.ll                      |    44 +-
 test/CodeGen/ARM/vst3.ll                      |    42 +-
 test/CodeGen/ARM/vst4.ll                      |    42 +-
 test/CodeGen/ARM/vstlane.ll                   |    90 +-
 test/CodeGen/ARM/vtrn.ll                      |   124 +-
 test/CodeGen/ARM/vuzp.ll                      |   136 +-
 test/CodeGen/ARM/vzip.ll                      |    82 +-
 test/CodeGen/BPF/sockex2.ll                   |     2 +-
 test/CodeGen/CPP/gep.ll                       |    10 +
 test/CodeGen/Generic/2009-03-17-LSR-APInt.ll  |    28 +-
 test/CodeGen/Generic/ForceStackAlign.ll       |    27 +
 test/CodeGen/Generic/MachineBranchProb.ll     |     8 +-
 test/CodeGen/Generic/dbg_value.ll             |     5 +-
 test/CodeGen/Generic/lit.local.cfg            |     3 +
 .../Generic/overloaded-intrinsic-name.ll      |    32 +-
 test/CodeGen/Generic/vector.ll                |     6 +
 test/CodeGen/Hexagon/NVJumpCmp.ll             |    89 +
 test/CodeGen/Hexagon/absaddr-store.ll         |     1 +
 test/CodeGen/Hexagon/adde.ll                  |     6 +-
 test/CodeGen/Hexagon/alu64.ll                 |   134 +-
 test/CodeGen/Hexagon/bit-eval.ll              |    53 +
 test/CodeGen/Hexagon/bit-loop.ll              |    80 +
 test/CodeGen/Hexagon/cfi-late.ll              |    65 +
 test/CodeGen/Hexagon/clr_set_toggle.ll        |     2 +-
 test/CodeGen/Hexagon/combine.ll               |     2 +-
 test/CodeGen/Hexagon/combine_ir.ll            |    16 +-
 .../Hexagon/early-if-conversion-bug1.ll       |   412 +
 test/CodeGen/Hexagon/early-if-phi-i1.ll       |    17 +
 test/CodeGen/Hexagon/early-if-spare.ll        |    57 +
 test/CodeGen/Hexagon/early-if.ll              |    75 +
 test/CodeGen/Hexagon/extload-combine.ll       |     2 +-
 test/CodeGen/Hexagon/hwloop-dbg.ll            |    12 +-
 test/CodeGen/Hexagon/i16_VarArg.ll            |     2 +-
 test/CodeGen/Hexagon/i1_VarArg.ll             |     2 +-
 test/CodeGen/Hexagon/i8_VarArg.ll             |     2 +-
 test/CodeGen/Hexagon/ifcvt-edge-weight.ll     |    64 +
 test/CodeGen/Hexagon/memcpy-likely-aligned.ll |    32 +
 test/CodeGen/Hexagon/mux-basic.ll             |    28 +
 test/CodeGen/Hexagon/opt-fabs.ll              |     2 +-
 test/CodeGen/Hexagon/pic-jumptables.ll        |    48 +
 test/CodeGen/Hexagon/pic-simple.ll            |    22 +
 test/CodeGen/Hexagon/pic-static.ll            |    21 +
 test/CodeGen/Hexagon/relax.ll                 |     9 +-
 test/CodeGen/Hexagon/sdr-basic.ll             |    15 +
 test/CodeGen/Hexagon/sdr-shr32.ll             |    22 +
 test/CodeGen/Hexagon/simple_addend.ll         |     2 +-
 .../Hexagon/store-widen-aliased-load.ll       |    21 +
 test/CodeGen/Hexagon/store-widen-negv.ll      |    11 +
 test/CodeGen/Hexagon/store-widen-negv2.ll     |    19 +
 test/CodeGen/Hexagon/store-widen.ll           |    18 +
 test/CodeGen/Hexagon/struct_args.ll           |     2 +-
 test/CodeGen/Hexagon/sube.ll                  |    10 +-
 test/CodeGen/Hexagon/tail-dup-subreg-abort.ll |    28 +
 test/CodeGen/Hexagon/tfr-to-combine.ll        |     2 +-
 test/CodeGen/Hexagon/union-1.ll               |     2 -
 test/CodeGen/Hexagon/v60Intrins.ll            |  2559 +++
 test/CodeGen/Hexagon/v60Vasr.ll               |   247 +
 test/CodeGen/Hexagon/v60small.ll              |    51 +
 test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll   |     2 +-
 test/CodeGen/Hexagon/vect/vect-loadv4i16.ll   |     2 +-
 test/CodeGen/Hexagon/vect/vect-shuffle.ll     |     2 +-
 test/CodeGen/Hexagon/vect/vect-splat.ll       |     2 +-
 test/CodeGen/Hexagon/vect/vect-xor.ll         |     2 +-
 test/CodeGen/Inputs/DbgValueOtherTargets.ll   |     8 +-
 test/CodeGen/MIR/AArch64/cfi-def-cfa.mir      |    31 +
 .../MIR/AArch64/expected-target-flag-name.mir |    23 +
 .../MIR/AArch64/invalid-target-flag-name.mir  |    23 +
 test/CodeGen/MIR/AArch64/lit.local.cfg        |     8 +
 .../MIR/AArch64/multiple-lhs-operands.mir     |    28 +
 .../MIR/AArch64/stack-object-local-offset.mir |    41 +
 test/CodeGen/MIR/AArch64/target-flags.mir     |    39 +
 .../MIR/AMDGPU/expected-target-index-name.mir |    64 +
 .../AMDGPU/invalid-target-index-operand.mir   |    64 +
 test/CodeGen/MIR/AMDGPU/lit.local.cfg         |     2 +
 .../MIR/AMDGPU/target-index-operands.mir      |   104 +
 test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir      |   165 +
 test/CodeGen/MIR/ARM/bundled-instructions.mir |    75 +
 test/CodeGen/MIR/ARM/cfi-same-value.mir       |    80 +
 .../MIR/ARM/expected-closing-brace.mir        |    50 +
 .../ARM/extraneous-closing-brace-error.mir    |    20 +
 test/CodeGen/MIR/ARM/lit.local.cfg            |     2 +
 .../ARM/nested-instruction-bundle-error.mir   |    30 +
 test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir |   160 +
 test/CodeGen/MIR/Generic/basic-blocks.mir     |    49 +
 .../expected-colon-after-basic-block.mir}     |     7 +-
 ...pected-mbb-reference-for-successor-mbb.mir |    17 +-
 test/CodeGen/MIR/{ => Generic}/frame-info.mir |    10 +-
 .../function-missing-machine-function.mir     |     0
 .../MIR/Generic/invalid-jump-table-kind.mir   |    53 +
 test/CodeGen/MIR/Generic/lit.local.cfg        |     3 +
 .../{ => Generic}/llvm-ir-error-reported.mir  |     0
 test/CodeGen/MIR/{ => Generic}/llvmIR.mir     |     4 +-
 .../MIR/{ => Generic}/llvmIRMissing.mir       |     4 +-
 ...machine-basic-block-ir-block-reference.mir |    17 +
 ...machine-basic-block-redefinition-error.mir |    18 +
 ...machine-basic-block-undefined-ir-block.mir |    15 +
 .../machine-basic-block-unknown-name.mir      |     7 +-
 .../machine-function-missing-body-error.mir   |     0
 .../machine-function-missing-function.mir     |     8 +-
 .../machine-function-missing-name.mir         |     8 +-
 .../machine-function-redefinition-error.mir   |     0
 .../MIR/{ => Generic}/machine-function.mir    |    16 +-
 .../MIR/{ => Generic}/register-info.mir       |     8 +-
 ...lobal-value-or-symbol-after-call-entry.mir |    41 +
 test/CodeGen/MIR/Mips/lit.local.cfg           |     2 +
 test/CodeGen/MIR/Mips/memory-operands.mir     |   102 +
 .../NVPTX/expected-floating-point-literal.mir |    24 +
 .../floating-point-immediate-operands.mir     |    81 +
 .../floating-point-invalid-type-error.mir     |    24 +
 test/CodeGen/MIR/NVPTX/lit.local.cfg          |     2 +
 test/CodeGen/MIR/PowerPC/lit.local.cfg        |     2 +
 .../PowerPC/unordered-implicit-registers.mir  |    45 +
 test/CodeGen/MIR/X86/basic-block-liveins.mir  |    57 +-
 ...basic-block-not-at-start-of-line-error.mir |    41 +
 .../MIR/X86/block-address-operands.mir        |   121 +
 test/CodeGen/MIR/X86/callee-saved-info.mir    |    95 +
 test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir   |    29 +
 test/CodeGen/MIR/X86/cfi-def-cfa-register.mir |    32 +
 test/CodeGen/MIR/X86/cfi-offset.mir           |    47 +
 .../constant-pool-item-redefinition-error.mir |    25 +
 test/CodeGen/MIR/X86/constant-pool.mir        |   139 +
 test/CodeGen/MIR/X86/constant-value-error.mir |    25 +
 test/CodeGen/MIR/X86/dead-register-flag.mir   |    14 +-
 .../X86/def-register-already-tied-error.mir   |    25 +
 .../MIR/X86/duplicate-memory-operand-flag.mir |    27 +
 .../MIR/X86/duplicate-register-flag-error.mir |    35 +
 .../MIR/X86/early-clobber-register-flag.mir   |    45 +
 .../X86/expected-align-in-memory-operand.mir  |    30 +
 ...lignment-after-align-in-memory-operand.mir |    30 +
 .../expected-basic-block-at-start-of-body.mir |    40 +
 ...pected-block-reference-in-blockaddress.mir |    30 +
 .../X86/expected-comma-after-cfi-register.mir |    42 +
 .../expected-comma-after-memory-operand.mir   |    25 +
 .../expected-different-implicit-operand.mir   |    28 +-
 ...ected-different-implicit-register-flag.mir |    28 +-
 .../X86/expected-from-in-memory-operand.mir   |    24 +
 ...-function-reference-after-blockaddress.mir |    30 +
 ...pected-global-value-after-blockaddress.mir |    30 +
 .../expected-integer-after-offset-sign.mir    |    24 +
 .../X86/expected-integer-after-tied-def.mir   |    25 +
 .../expected-integer-in-successor-weight.mir  |    38 +
 ...pected-load-or-store-in-memory-operand.mir |    23 +
 .../MIR/X86/expected-machine-operand.mir      |    12 +-
 ...ted-metadata-node-after-debug-location.mir |    59 +
 .../expected-metadata-node-after-exclaim.mir  |    59 +
 ...expected-metadata-node-in-stack-object.mir |    25 +
 ...cted-named-register-in-allocation-hint.mir |    29 +
 ...amed-register-in-callee-saved-register.mir |    88 +
 ...ted-named-register-in-functions-livein.mir |    27 +
 .../X86/expected-named-register-livein.mir    |    15 +-
 .../X86/expected-newline-at-end-of-list.mir   |    41 +
 .../MIR/X86/expected-number-after-bb.mir      |    28 +-
 .../X86/expected-offset-after-cfi-operand.mir |    27 +
 ...pected-pointer-value-in-memory-operand.mir |    24 +
 ...xpected-positive-alignment-after-align.mir |    30 +
 .../expected-register-after-cfi-operand.mir   |    42 +
 .../MIR/X86/expected-register-after-flags.mir |    12 +-
 ...ed-size-integer-after-memory-operation.mir |    24 +
 .../CodeGen/MIR/X86/expected-stack-object.mir |    67 +
 .../X86/expected-subregister-after-colon.mir  |    18 +-
 .../MIR/X86/expected-target-flag-name.mir     |    24 +
 .../X86/expected-tied-def-after-lparen.mir    |    25 +
 .../X86/expected-value-in-memory-operand.mir  |    24 +
 ...d-virtual-register-in-functions-livein.mir |    27 +
 .../MIR/X86/external-symbol-operands.mir      |    64 +
 .../MIR/X86/fixed-stack-memory-operands.mir   |    39 +
 .../fixed-stack-object-redefinition-error.mir |    28 +
 test/CodeGen/MIR/X86/fixed-stack-objects.mir  |    12 +-
 .../X86/frame-info-save-restore-points.mir    |    73 +
 .../MIR/X86/frame-info-stack-references.mir   |    79 +
 .../MIR/X86/frame-setup-instruction-flag.mir  |    35 +
 test/CodeGen/MIR/X86/function-liveins.mir     |    37 +
 .../CodeGen/MIR/X86/global-value-operands.mir |   127 +-
 test/CodeGen/MIR/X86/immediate-operands.mir   |    28 +-
 .../MIR/X86/implicit-register-flag.mir        |    65 +-
 test/CodeGen/MIR/X86/inline-asm-registers.mir |    54 +
 .../MIR/X86/instructions-debug-location.mir   |    98 +
 .../MIR/X86/invalid-constant-pool-item.mir    |    25 +
 .../MIR/X86/invalid-metadata-node-type.mir    |    53 +
 .../MIR/X86/invalid-target-flag-name.mir      |    24 +
 .../MIR/X86/invalid-tied-def-index-error.mir  |    25 +
 test/CodeGen/MIR/X86/jump-table-info.mir      |   150 +
 .../MIR/X86/jump-table-redefinition-error.mir |    76 +
 test/CodeGen/MIR/X86/killed-register-flag.mir |    38 +-
 .../MIR/X86/large-cfi-offset-number-error.mir |    27 +
 .../MIR/X86/large-immediate-operand-error.mir |    18 +
 .../MIR/X86/large-index-number-error.mir      |    26 +-
 .../MIR/X86/large-offset-number-error.mir     |    24 +
 .../large-size-in-memory-operand-error.mir    |    24 +
 .../CodeGen/MIR/X86/liveout-register-mask.mir |    42 +
 .../MIR/X86/machine-basic-block-operands.mir  |    68 +-
 test/CodeGen/MIR/X86/machine-instructions.mir |    14 +-
 test/CodeGen/MIR/X86/machine-verifier.mir     |    22 +
 test/CodeGen/MIR/X86/memory-operands.mir      |   508 +
 test/CodeGen/MIR/X86/metadata-operands.mir    |    63 +
 .../CodeGen/MIR/X86/missing-closing-quote.mir |    22 +
 test/CodeGen/MIR/X86/missing-comma.mir        |    12 +-
 .../MIR/X86/missing-implicit-operand.mir      |    30 +-
 test/CodeGen/MIR/X86/missing-instruction.mir  |    19 -
 test/CodeGen/MIR/X86/named-registers.mir      |    14 +-
 test/CodeGen/MIR/X86/newline-handling.mir     |   109 +
 .../MIR/X86/null-register-operands.mir        |    14 +-
 .../MIR/X86/register-mask-operands.mir        |    28 +-
 .../register-operands-target-flag-error.mir   |    24 +
 .../X86/simple-register-allocation-hints.mir  |    34 +
 .../spill-slot-fixed-stack-object-aliased.mir |    12 +-
 ...pill-slot-fixed-stack-object-immutable.mir |    12 +-
 .../X86/spill-slot-fixed-stack-objects.mir    |    12 +-
 .../MIR/X86/stack-object-debug-info.mir       |    65 +
 .../MIR/X86/stack-object-invalid-name.mir     |    28 +
 ...ack-object-operand-name-mismatch-error.mir |    33 +
 .../CodeGen/MIR/X86/stack-object-operands.mir |    45 +
 .../X86/stack-object-redefinition-error.mir   |    37 +
 test/CodeGen/MIR/X86/stack-objects.mir        |    22 +-
 .../MIR/X86/standalone-register-error.mir     |    24 +
 test/CodeGen/MIR/X86/subregister-operands.mir |    21 +-
 .../X86/successor-basic-blocks-weights.mir    |    42 +
 .../MIR/X86/successor-basic-blocks.mir        |    83 +
 .../MIR/X86/tied-def-operand-invalid.mir      |    25 +
 test/CodeGen/MIR/X86/undef-register-flag.mir  |    26 +-
 .../MIR/X86/undefined-fixed-stack-object.mir  |    38 +
 .../MIR/X86/undefined-global-value.mir        |    16 +-
 .../undefined-ir-block-in-blockaddress.mir    |    30 +
 ...ndefined-ir-block-slot-in-blockaddress.mir |    29 +
 .../MIR/X86/undefined-jump-table-id.mir       |    73 +
 .../MIR/X86/undefined-named-global-value.mir  |    16 +-
 .../MIR/X86/undefined-register-class.mir      |     8 +-
 .../MIR/X86/undefined-stack-object.mir        |    30 +
 .../X86/undefined-value-in-memory-operand.mir |    24 +
 .../MIR/X86/undefined-virtual-register.mir    |    14 +-
 test/CodeGen/MIR/X86/unknown-instruction.mir  |    10 +-
 .../MIR/X86/unknown-machine-basic-block.mir   |    26 +-
 .../MIR/X86/unknown-metadata-keyword.mir      |    25 +
 .../CodeGen/MIR/X86/unknown-metadata-node.mir |    59 +
 .../X86/unknown-named-machine-basic-block.mir |    28 +-
 test/CodeGen/MIR/X86/unknown-register.mir     |    12 +-
 .../MIR/X86/unknown-subregister-index.mir     |    18 +-
 .../MIR/X86/unrecognized-character.mir        |    10 +-
 .../MIR/X86/used-physical-register-info.mir   |   109 +
 ...variable-sized-stack-object-size-error.mir |    14 +-
 .../MIR/X86/variable-sized-stack-objects.mir  |    18 +-
 .../virtual-register-redefinition-error.mir   |    27 +
 test/CodeGen/MIR/X86/virtual-registers.mir    |    90 +-
 test/CodeGen/MIR/basic-blocks.mir             |    49 -
 .../MIR/expected-eof-after-successor-mbb.mir  |    29 -
 test/CodeGen/MIR/successor-basic-blocks.mir   |    58 -
 .../Mips/Fast-ISel/check-disabled-mcpus.ll    |    27 +
 test/CodeGen/Mips/addi.ll                     |     2 +-
 test/CodeGen/Mips/adjust-callstack-sp.ll      |     2 +-
 test/CodeGen/Mips/align16.ll                  |     2 +-
 test/CodeGen/Mips/alloca16.ll                 |     2 +-
 test/CodeGen/Mips/and1.ll                     |     2 +-
 test/CodeGen/Mips/asm-large-immediate.ll      |     3 +-
 test/CodeGen/Mips/atomicops.ll                |     2 +-
 test/CodeGen/Mips/beqzc.ll                    |     2 +-
 test/CodeGen/Mips/beqzc1.ll                   |     2 +-
 test/CodeGen/Mips/br-jmp.ll                   |     4 +-
 test/CodeGen/Mips/brconeq.ll                  |     2 +-
 test/CodeGen/Mips/brconeqk.ll                 |     2 +-
 test/CodeGen/Mips/brconeqz.ll                 |     2 +-
 test/CodeGen/Mips/brconge.ll                  |     2 +-
 test/CodeGen/Mips/brcongt.ll                  |     2 +-
 test/CodeGen/Mips/brconle.ll                  |     2 +-
 test/CodeGen/Mips/brconlt.ll                  |     2 +-
 test/CodeGen/Mips/brconne.ll                  |     2 +-
 test/CodeGen/Mips/brconnek.ll                 |     2 +-
 test/CodeGen/Mips/brconnez.ll                 |     2 +-
 test/CodeGen/Mips/brind.ll                    |     2 +-
 test/CodeGen/Mips/brsize3.ll                  |     4 +-
 test/CodeGen/Mips/brsize3a.ll                 |     2 +-
 test/CodeGen/Mips/cconv/arguments-varargs.ll  |    72 +-
 test/CodeGen/Mips/ci2.ll                      |     2 +-
 test/CodeGen/Mips/cmplarge.ll                 |     2 +-
 test/CodeGen/Mips/const1.ll                   |     2 +-
 test/CodeGen/Mips/const4a.ll                  |     2 +-
 test/CodeGen/Mips/const6.ll                   |     4 +-
 test/CodeGen/Mips/const6a.ll                  |     4 +-
 test/CodeGen/Mips/div.ll                      |     2 +-
 test/CodeGen/Mips/div_rem.ll                  |     2 +-
 test/CodeGen/Mips/divu.ll                     |     2 +-
 test/CodeGen/Mips/divu_remu.ll                |     2 +-
 test/CodeGen/Mips/eh.ll                       |     2 +-
 .../Mips/emergency-spill-slot-near-fp.ll      |     4 +-
 test/CodeGen/Mips/emutls_generic.ll           |    70 +
 test/CodeGen/Mips/ex2.ll                      |     2 +-
 test/CodeGen/Mips/extins.ll                   |     2 +-
 test/CodeGen/Mips/f16abs.ll                   |     2 +-
 test/CodeGen/Mips/fixdfsf.ll                  |     4 +-
 test/CodeGen/Mips/fp16instrinsmc.ll           |     4 +-
 test/CodeGen/Mips/fp16mix.ll                  |     6 +-
 test/CodeGen/Mips/fp16static.ll               |     2 +-
 test/CodeGen/Mips/helloworld.ll               |    12 +-
 test/CodeGen/Mips/hf16_1.ll                   |     4 +-
 test/CodeGen/Mips/hf16call32.ll               |   408 +-
 test/CodeGen/Mips/hf16call32_body.ll          |   206 +-
 test/CodeGen/Mips/hf1_body.ll                 |    18 +-
 test/CodeGen/Mips/hfptrcall.ll                |     2 +-
 test/CodeGen/Mips/i32k.ll                     |     2 +-
 .../Mips/inlineasm-assembler-directives.ll    |     4 +-
 test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll    |    36 +-
 test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll  |     4 +-
 test/CodeGen/Mips/inlineasm-operand-code.ll   |   185 +-
 test/CodeGen/Mips/inlineasm_constraint.ll     |   118 +-
 test/CodeGen/Mips/inlineasmmemop.ll           |     8 +-
 test/CodeGen/Mips/insn-zero-size-bb.ll        |     4 +-
 test/CodeGen/Mips/interrupt-attr-64-error.ll  |     9 +
 .../CodeGen/Mips/interrupt-attr-args-error.ll |     9 +
 test/CodeGen/Mips/interrupt-attr-error.ll     |     9 +
 test/CodeGen/Mips/interrupt-attr.ll           |   244 +
 test/CodeGen/Mips/jtstat.ll                   |     2 +-
 test/CodeGen/Mips/l3mc.ll                     |    20 +-
 test/CodeGen/Mips/lb1.ll                      |     2 +-
 test/CodeGen/Mips/lbu1.ll                     |     2 +-
 test/CodeGen/Mips/lcb2.ll                     |     4 +-
 test/CodeGen/Mips/lcb3c.ll                    |     2 +-
 test/CodeGen/Mips/lcb4a.ll                    |     2 +-
 test/CodeGen/Mips/lcb5.ll                     |     2 +-
 test/CodeGen/Mips/lh1.ll                      |     2 +-
 test/CodeGen/Mips/lhu1.ll                     |     2 +-
 test/CodeGen/Mips/llcarry.ll                  |     2 +-
 test/CodeGen/Mips/llvm-ir/atomicrmx.ll        |    26 +
 test/CodeGen/Mips/llvm-ir/call.ll             |    14 +
 test/CodeGen/Mips/llvm-ir/load-atomic.ll      |    42 +
 test/CodeGen/Mips/llvm-ir/sqrt.ll             |    13 +
 test/CodeGen/Mips/llvm-ir/store-atomic.ll     |    42 +
 test/CodeGen/Mips/madd-msub.ll                |     2 +-
 test/CodeGen/Mips/mbrsize4a.ll                |     2 +-
 test/CodeGen/Mips/mips16-hf-attr-2.ll         |     2 +-
 test/CodeGen/Mips/mips16-hf-attr.ll           |     2 +-
 test/CodeGen/Mips/mips16_32_1.ll              |     2 +-
 test/CodeGen/Mips/mips16_32_10.ll             |     2 +-
 test/CodeGen/Mips/mips16_32_3.ll              |     2 +-
 test/CodeGen/Mips/mips16_32_4.ll              |     2 +-
 test/CodeGen/Mips/mips16_32_5.ll              |     2 +-
 test/CodeGen/Mips/mips16_32_6.ll              |     2 +-
 test/CodeGen/Mips/mips16_32_7.ll              |     2 +-
 test/CodeGen/Mips/mips16_fpret.ll             |     8 +-
 test/CodeGen/Mips/mips16ex.ll                 |     2 +-
 test/CodeGen/Mips/mips16fpe.ll                |     6 +-
 test/CodeGen/Mips/misha.ll                    |     2 +-
 test/CodeGen/Mips/msa/elm_copy.ll             |     5 +-
 test/CodeGen/Mips/mul.ll                      |     2 +-
 test/CodeGen/Mips/mulll.ll                    |     2 +-
 test/CodeGen/Mips/mulull.ll                   |     2 +-
 test/CodeGen/Mips/nacl-align.ll               |     7 +-
 test/CodeGen/Mips/neg1.ll                     |     2 +-
 test/CodeGen/Mips/no-odd-spreg-msa.ll         |    24 +-
 test/CodeGen/Mips/nomips16.ll                 |     2 +-
 test/CodeGen/Mips/not1.ll                     |     2 +-
 test/CodeGen/Mips/null.ll                     |     2 +-
 test/CodeGen/Mips/or1.ll                      |     2 +-
 test/CodeGen/Mips/powif64_16.ll               |     2 +-
 test/CodeGen/Mips/rem.ll                      |     2 +-
 test/CodeGen/Mips/remu.ll                     |     2 +-
 test/CodeGen/Mips/s2rem.ll                    |     4 +-
 test/CodeGen/Mips/sb1.ll                      |     2 +-
 test/CodeGen/Mips/sel1c.ll                    |     2 +-
 test/CodeGen/Mips/sel2c.ll                    |     2 +-
 test/CodeGen/Mips/selTBteqzCmpi.ll            |     2 +-
 test/CodeGen/Mips/selTBtnezCmpi.ll            |     2 +-
 test/CodeGen/Mips/selTBtnezSlti.ll            |     2 +-
 test/CodeGen/Mips/seleq.ll                    |     2 +-
 test/CodeGen/Mips/seleqk.ll                   |     2 +-
 test/CodeGen/Mips/selgek.ll                   |     2 +-
 test/CodeGen/Mips/selgt.ll                    |     2 +-
 test/CodeGen/Mips/selle.ll                    |     2 +-
 test/CodeGen/Mips/selltk.ll                   |     2 +-
 test/CodeGen/Mips/selne.ll                    |     2 +-
 test/CodeGen/Mips/selnek.ll                   |     2 +-
 test/CodeGen/Mips/selpat.ll                   |     2 +-
 test/CodeGen/Mips/seteq.ll                    |     2 +-
 test/CodeGen/Mips/seteqz.ll                   |     2 +-
 test/CodeGen/Mips/setge.ll                    |     2 +-
 test/CodeGen/Mips/setgek.ll                   |     2 +-
 test/CodeGen/Mips/setle.ll                    |     2 +-
 test/CodeGen/Mips/setlt.ll                    |     2 +-
 test/CodeGen/Mips/setltk.ll                   |     2 +-
 test/CodeGen/Mips/setne.ll                    |     2 +-
 test/CodeGen/Mips/setuge.ll                   |     2 +-
 test/CodeGen/Mips/setugt.ll                   |     2 +-
 test/CodeGen/Mips/setule.ll                   |     2 +-
 test/CodeGen/Mips/setult.ll                   |     2 +-
 test/CodeGen/Mips/setultk.ll                  |     2 +-
 test/CodeGen/Mips/sh1.ll                      |     2 +-
 test/CodeGen/Mips/simplebr.ll                 |     2 +-
 test/CodeGen/Mips/sitofp-selectcc-opt.ll      |     3 +-
 test/CodeGen/Mips/sll1.ll                     |     2 +-
 test/CodeGen/Mips/sll2.ll                     |     2 +-
 test/CodeGen/Mips/sr1.ll                      |     4 +-
 test/CodeGen/Mips/sra1.ll                     |     2 +-
 test/CodeGen/Mips/sra2.ll                     |     2 +-
 test/CodeGen/Mips/srl1.ll                     |     2 +-
 test/CodeGen/Mips/srl2.ll                     |     2 +-
 test/CodeGen/Mips/stchar.ll                   |     4 +-
 test/CodeGen/Mips/stldst.ll                   |     2 +-
 test/CodeGen/Mips/sub1.ll                     |     2 +-
 test/CodeGen/Mips/sub2.ll                     |     2 +-
 test/CodeGen/Mips/tail16.ll                   |     2 +-
 test/CodeGen/Mips/tailcall.ll                 |     2 +-
 test/CodeGen/Mips/tls-alias.ll                |     2 +-
 test/CodeGen/Mips/tls16.ll                    |     2 +-
 test/CodeGen/Mips/tls16_2.ll                  |     2 +-
 test/CodeGen/Mips/trap1.ll                    |     2 +-
 test/CodeGen/Mips/ul1.ll                      |     2 +-
 test/CodeGen/Mips/xor1.ll                     |     2 +-
 test/CodeGen/NVPTX/branch-fold.ll             |    40 +
 test/CodeGen/NVPTX/bypass-div.ll              |    80 +
 test/CodeGen/NVPTX/combine-min-max.ll         |   307 +
 test/CodeGen/NVPTX/fma-assoc.ll               |    13 +
 test/CodeGen/NVPTX/global-addrspace.ll        |    12 +
 .../NVPTX/load-with-non-coherent-cache.ll     |   264 +
 test/CodeGen/NVPTX/lower-aggr-copies.ll       |   118 +-
 test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll    |    20 +-
 test/CodeGen/NVPTX/reg-copy.ll                |   224 +
 test/CodeGen/NVPTX/symbol-naming.ll           |     4 +-
 test/CodeGen/NVPTX/vector-call.ll             |     2 +-
 .../PowerPC/2006-01-20-ShiftPartsCrash.ll     |     1 +
 .../PowerPC/2006-08-15-SelectionCrash.ll      |     1 +
 .../CodeGen/PowerPC/2006-12-07-LargeAlloca.ll |     1 +
 .../CodeGen/PowerPC/2006-12-07-SelectCrash.ll |     1 +
 .../PowerPC/2007-11-19-VectorSplitting.ll     |     1 +
 test/CodeGen/PowerPC/BoolRetToIntTest.ll      |   203 +
 .../CodeGen/PowerPC/BreakableToken-reduced.ll |   335 +
 test/CodeGen/PowerPC/aantidep-def-ec.mir      |   117 +
 .../PowerPC/aantidep-inline-asm-use.ll        |   305 +
 test/CodeGen/PowerPC/addisdtprelha-nonr3.mir  |    80 +
 test/CodeGen/PowerPC/alias.ll                 |     4 +-
 test/CodeGen/PowerPC/bitcasts-direct-move.ll  |    83 +
 test/CodeGen/PowerPC/bitreverse.ll            |    23 +
 test/CodeGen/PowerPC/branch-hint.ll           |   135 +
 test/CodeGen/PowerPC/coal-sections.ll         |    24 +
 test/CodeGen/PowerPC/crbit-asm-disabled.ll    |    16 +
 test/CodeGen/PowerPC/crbit-asm.ll             |     3 +-
 test/CodeGen/PowerPC/cttz.ll                  |     2 +-
 test/CodeGen/PowerPC/dbg.ll                   |    10 +-
 test/CodeGen/PowerPC/dyn-alloca-offset.ll     |    21 +
 test/CodeGen/PowerPC/e500-1.ll                |    30 +
 test/CodeGen/PowerPC/emutls_generic.ll        |    41 +
 test/CodeGen/PowerPC/fast-isel-binary.ll      |    26 +-
 test/CodeGen/PowerPC/fast-isel-br-const.ll    |     2 +-
 test/CodeGen/PowerPC/fast-isel-call.ll        |    14 +-
 test/CodeGen/PowerPC/fast-isel-cmp-imm.ll     |    34 +-
 test/CodeGen/PowerPC/fast-isel-const.ll       |     2 +-
 .../PowerPC/fast-isel-conversion-p5.ll        |    20 +-
 test/CodeGen/PowerPC/fast-isel-conversion.ll  |    48 +-
 test/CodeGen/PowerPC/fast-isel-crash.ll       |     4 +-
 test/CodeGen/PowerPC/fast-isel-ext.ll         |    20 +-
 test/CodeGen/PowerPC/fast-isel-fold.ll        |    26 +-
 test/CodeGen/PowerPC/fast-isel-indirectbr.ll  |     2 +-
 test/CodeGen/PowerPC/fast-isel-load-store.ll  |    34 +-
 .../CodeGen/PowerPC/fast-isel-redefinition.ll |     2 +-
 test/CodeGen/PowerPC/fast-isel-ret.ll         |    52 +-
 test/CodeGen/PowerPC/fast-isel-shifter.ll     |    12 +-
 .../fastisel-gep-promote-before-add.ll        |     2 +-
 .../PowerPC/fma-mutate-register-constraint.ll |    89 +
 .../fp-int-conversions-direct-moves.ll        |    24 +-
 .../PowerPC/fp128-bitcast-after-operation.ll  |   137 +
 test/CodeGen/PowerPC/load-shift-combine.ll    |     1 +
 test/CodeGen/PowerPC/long-compare.ll          |     2 +-
 test/CodeGen/PowerPC/machine-combiner.ll      |   188 +
 test/CodeGen/PowerPC/mc-instrlat.ll           |    25 +
 test/CodeGen/PowerPC/mcm-13.ll                |    27 +
 test/CodeGen/PowerPC/memcpy-vec.ll            |     7 +-
 test/CodeGen/PowerPC/merge-st-chain-op.ll     |    41 +
 .../PowerPC/p8-scalar_vector_conversions.ll   |  1476 ++
 test/CodeGen/PowerPC/peephole-align.ll        |   335 +
 test/CodeGen/PowerPC/ppc-shrink-wrapping.ll   |   784 +
 test/CodeGen/PowerPC/ppc32-i1-vaarg.ll        |     2 +-
 test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll       |     8 +-
 test/CodeGen/PowerPC/ppcsoftops.ll            |    50 +
 test/CodeGen/PowerPC/pr17168.ll               |   366 +-
 test/CodeGen/PowerPC/pr24546.ll               |    22 +-
 test/CodeGen/PowerPC/pr24636.ll               |    41 +
 test/CodeGen/PowerPC/pr25157-peephole.ll      |    61 +
 .../PowerPC/preincprep-nontrans-crash.ll      |    94 +
 test/CodeGen/PowerPC/qpx-unal-cons-lds.ll     |   217 +
 test/CodeGen/PowerPC/retaddr2.ll              |     6 +-
 test/CodeGen/PowerPC/rm-zext.ll               |     6 +-
 test/CodeGen/PowerPC/rotl-rotr-crash.ll       |    12 +
 test/CodeGen/PowerPC/sdiv-pow2.ll             |     8 +-
 .../selectiondag-extload-computeknownbits.ll  |    12 +
 test/CodeGen/PowerPC/seteq-0.ll               |     2 +-
 test/CodeGen/PowerPC/sjlj.ll                  |    20 +-
 test/CodeGen/PowerPC/stack-realign.ll         |    26 +-
 test/CodeGen/PowerPC/stackmap-frame-setup.ll  |    20 +
 test/CodeGen/PowerPC/swaps-le-5.ll            |     4 +-
 test/CodeGen/PowerPC/swaps-le-6.ll            |    42 +
 test/CodeGen/PowerPC/unal-vec-ldst.ll         |   580 +
 test/CodeGen/PowerPC/unal-vec-negarith.ll     |    17 +
 test/CodeGen/PowerPC/unwind-dw2-g.ll          |     6 +-
 .../PowerPC/variable_elem_vec_extracts.ll     |   114 +
 test/CodeGen/PowerPC/vec-asm-disabled.ll      |    14 +
 test/CodeGen/PowerPC/vec_add_sub_quadword.ll  |     6 +-
 .../vector-merge-store-fp-constants.ll        |    28 +
 test/CodeGen/PowerPC/vsx.ll                   |     5 +-
 test/CodeGen/PowerPC/vsx_insert_extract_le.ll |     6 +-
 test/CodeGen/PowerPC/vsx_scalar_ld_st.ll      |     6 +-
 test/CodeGen/PowerPC/vsx_shuffle_le.ll        |    20 +-
 test/CodeGen/SPARC/2011-01-22-SRet.ll         |     2 +-
 test/CodeGen/SPARC/32abi.ll                   |   191 +
 test/CodeGen/SPARC/64abi.ll                   |    84 +-
 test/CodeGen/SPARC/basictest.ll               |    21 +-
 test/CodeGen/SPARC/float-constants.ll         |    41 +
 test/CodeGen/SPARC/float.ll                   |    10 +-
 test/CodeGen/SPARC/fp128.ll                   |     4 +-
 test/CodeGen/SPARC/inlineasm.ll               |    53 +-
 test/CodeGen/SPARC/missing-sret.ll            |     9 +
 test/CodeGen/SPARC/reserved-regs.ll           |   135 +
 test/CodeGen/SPARC/select-mask.ll             |    17 +
 test/CodeGen/SPARC/spill.ll                   |    64 +
 test/CodeGen/SPARC/stack-align.ll             |    22 +
 test/CodeGen/SPARC/tls.ll                     |     2 +-
 test/CodeGen/SPARC/varargs.ll                 |     2 +-
 test/CodeGen/SystemZ/alloca-03.ll             |    84 +
 test/CodeGen/SystemZ/alloca-04.ll             |    14 +
 test/CodeGen/SystemZ/args-01.ll               |     4 +-
 test/CodeGen/SystemZ/args-02.ll               |     4 +-
 test/CodeGen/SystemZ/args-03.ll               |     4 +-
 test/CodeGen/SystemZ/args-04.ll               |     2 +-
 test/CodeGen/SystemZ/args-07.ll               |     2 +-
 test/CodeGen/SystemZ/asm-17.ll                |     3 +-
 test/CodeGen/SystemZ/asm-18.ll                |     3 +-
 test/CodeGen/SystemZ/dag-combine-01.ll        |    97 +
 test/CodeGen/SystemZ/fp-abs-01.ll             |     4 +-
 test/CodeGen/SystemZ/fp-abs-02.ll             |     4 +-
 test/CodeGen/SystemZ/fp-add-02.ll             |     2 +-
 test/CodeGen/SystemZ/fp-cmp-02.ll             |     5 +-
 test/CodeGen/SystemZ/fp-cmp-05.ll             |    80 +
 test/CodeGen/SystemZ/fp-const-02.ll           |     4 +-
 test/CodeGen/SystemZ/fp-libcall.ll            |   273 +
 test/CodeGen/SystemZ/fp-move-05.ll            |     2 +-
 test/CodeGen/SystemZ/fp-neg-01.ll             |     4 +-
 test/CodeGen/SystemZ/fp-sincos-01.ll          |    56 +
 test/CodeGen/SystemZ/insert-05.ll             |     4 +-
 test/CodeGen/SystemZ/int-cmp-44.ll            |     3 +-
 test/CodeGen/SystemZ/int-cmp-51.ll            |    34 +
 test/CodeGen/SystemZ/int-cmp-52.ll            |    24 +
 test/CodeGen/SystemZ/memchr-01.ll             |     2 +-
 test/CodeGen/SystemZ/spill-01.ll              |     2 +-
 test/CodeGen/SystemZ/vec-args-04.ll           |    26 +-
 test/CodeGen/SystemZ/vec-args-05.ll           |    10 +-
 test/CodeGen/SystemZ/vec-perm-12.ll           |    43 +
 test/CodeGen/SystemZ/vec-perm-13.ll           |    38 +
 test/CodeGen/SystemZ/xor-01.ll                |     2 +-
 .../CodeGen/Thumb/2010-07-15-debugOrdering.ll |    14 +-
 .../Thumb/cortex-m0-unaligned-access.ll       |     2 +-
 test/CodeGen/Thumb/large-stack.ll             |    20 +-
 .../ldm-stm-base-materialization-thumb2.ll    |    93 +
 .../Thumb/ldm-stm-base-materialization.ll     |    77 +-
 test/CodeGen/Thumb/pop.ll                     |     4 +-
 test/CodeGen/Thumb/segmented-stacks.ll        |    24 +-
 test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll    |    36 -
 test/CodeGen/Thumb/thumb-shrink-wrapping.ll   |   691 +
 test/CodeGen/Thumb/vargs.ll                   |     6 +-
 test/CodeGen/Thumb2/crash.ll                  |    14 +-
 test/CodeGen/Thumb2/emit-unwinding.ll         |    11 +
 test/CodeGen/Thumb2/float-cmp.ll              |    44 +-
 .../CodeGen/Thumb2/float-intrinsics-double.ll |    11 +-
 test/CodeGen/Thumb2/float-intrinsics-float.ll |     4 +-
 test/CodeGen/Thumb2/ifcvt-compare.ll          |     6 +-
 test/CodeGen/Thumb2/machine-licm.ll           |     8 +-
 test/CodeGen/Thumb2/pic-load.ll               |    12 +-
 test/CodeGen/Thumb2/setjmp_longjmp.ll         |    89 +
 test/CodeGen/Thumb2/thumb2-ifcvt1.ll          |    14 +-
 test/CodeGen/Thumb2/thumb2-ifcvt2.ll          |     4 +-
 test/CodeGen/Thumb2/thumb2-mulhi.ll           |     2 +-
 test/CodeGen/Thumb2/thumb2-smla.ll            |     4 +-
 test/CodeGen/Thumb2/thumb2-smul.ll            |     2 +-
 test/CodeGen/Thumb2/thumb2-spill-q.ll         |    28 +-
 test/CodeGen/Thumb2/thumb2-uxt_rot.ll         |     8 +-
 test/CodeGen/Thumb2/v8_IT_1.ll                |     4 +-
 test/CodeGen/Thumb2/v8_IT_3.ll                |     5 +-
 test/CodeGen/Thumb2/v8_IT_5.ll                |     4 +-
 test/CodeGen/WebAssembly/call.ll              |   127 +
 test/CodeGen/WebAssembly/cfg-stackify.ll      |  1102 ++
 test/CodeGen/WebAssembly/comparisons_f32.ll   |   181 +
 test/CodeGen/WebAssembly/comparisons_f64.ll   |   181 +
 test/CodeGen/WebAssembly/comparisons_i32.ll   |    98 +
 test/CodeGen/WebAssembly/comparisons_i64.ll   |    98 +
 test/CodeGen/WebAssembly/conv.ll              |   255 +
 test/CodeGen/WebAssembly/copysign-casts.ll    |    28 +
 test/CodeGen/WebAssembly/cpus.ll              |    17 +
 test/CodeGen/WebAssembly/dead-vreg.ll         |    51 +
 test/CodeGen/WebAssembly/f32.ll               |   154 +
 test/CodeGen/WebAssembly/f64.ll               |   154 +
 test/CodeGen/WebAssembly/fast-isel.ll         |    20 +
 test/CodeGen/WebAssembly/frem.ll              |    26 +
 test/CodeGen/WebAssembly/func.ll              |    62 +
 test/CodeGen/WebAssembly/global.ll            |   177 +
 test/CodeGen/WebAssembly/globl.ll             |    10 +
 test/CodeGen/WebAssembly/i32.ll               |   190 +
 test/CodeGen/WebAssembly/i64.ll               |   190 +
 test/CodeGen/WebAssembly/ident.ll             |    12 +
 test/CodeGen/WebAssembly/immediates.ll        |   198 +
 test/CodeGen/WebAssembly/inline-asm.ll        |    94 +
 test/CodeGen/WebAssembly/legalize.ll          |    62 +
 test/CodeGen/WebAssembly/load-ext.ll          |    96 +
 test/CodeGen/WebAssembly/load-store-i1.ll     |    68 +
 test/CodeGen/WebAssembly/load.ll              |    46 +
 test/CodeGen/WebAssembly/loop-idiom.ll        |    53 +
 test/CodeGen/WebAssembly/memory-addr32.ll     |    27 +
 test/CodeGen/WebAssembly/memory-addr64.ll     |    27 +
 test/CodeGen/WebAssembly/offset-folding.ll    |    48 +
 test/CodeGen/WebAssembly/offset.ll            |   185 +
 test/CodeGen/WebAssembly/phi.ll               |    47 +
 test/CodeGen/WebAssembly/reg-stackify.ll      |   126 +
 test/CodeGen/WebAssembly/return-int32.ll      |    10 +
 test/CodeGen/WebAssembly/return-void.ll       |    10 +
 test/CodeGen/WebAssembly/returned.ll          |    49 +
 test/CodeGen/WebAssembly/select.ll            |   135 +
 test/CodeGen/WebAssembly/signext-zeroext.ll   |    60 +
 test/CodeGen/WebAssembly/store-results.ll     |    61 +
 test/CodeGen/WebAssembly/store-trunc.ll       |    46 +
 test/CodeGen/WebAssembly/store.ll             |    42 +
 test/CodeGen/WebAssembly/switch.ll            |   174 +
 test/CodeGen/WebAssembly/unreachable.ll       |    34 +
 test/CodeGen/WebAssembly/unused-argument.ll   |    31 +
 test/CodeGen/WebAssembly/userstack.ll         |    81 +
 test/CodeGen/WebAssembly/varargs.ll           |   123 +
 test/CodeGen/WebAssembly/vtable.ll            |   171 +
 test/CodeGen/WinEH/cppeh-alloca-sink.ll       |   180 -
 test/CodeGen/WinEH/cppeh-catch-all-win32.ll   |    86 -
 test/CodeGen/WinEH/cppeh-catch-all.ll         |    97 -
 test/CodeGen/WinEH/cppeh-catch-and-throw.ll   |   143 -
 test/CodeGen/WinEH/cppeh-catch-scalar.ll      |   126 -
 test/CodeGen/WinEH/cppeh-catch-unwind.ll      |   240 -
 test/CodeGen/WinEH/cppeh-cleanup-invoke.ll    |    91 -
 test/CodeGen/WinEH/cppeh-demote-liveout.ll    |    72 -
 test/CodeGen/WinEH/cppeh-frame-vars.ll        |   272 -
 test/CodeGen/WinEH/cppeh-inalloca.ll          |   194 -
 test/CodeGen/WinEH/cppeh-min-unwind.ll        |    99 -
 .../WinEH/cppeh-mixed-catch-and-cleanup.ll    |   106 -
 test/CodeGen/WinEH/cppeh-multi-catch.ll       |   226 -
 test/CodeGen/WinEH/cppeh-nested-1.ll          |   194 -
 test/CodeGen/WinEH/cppeh-nested-2.ll          |   324 -
 test/CodeGen/WinEH/cppeh-nested-3.ll          |   260 -
 test/CodeGen/WinEH/cppeh-nested-rethrow.ll    |   212 -
 .../WinEH/cppeh-nonalloca-frame-values.ll     |   278 -
 .../CodeGen/WinEH/cppeh-prepared-catch-all.ll |    47 -
 .../WinEH/cppeh-prepared-catch-reordered.ll   |   165 -
 test/CodeGen/WinEH/cppeh-prepared-catch.ll    |   232 -
 test/CodeGen/WinEH/cppeh-prepared-cleanups.ll |   245 -
 .../CodeGen/WinEH/cppeh-shared-empty-catch.ll |   110 -
 .../WinEH/cppeh-similar-catch-blocks.ll       |   394 -
 test/CodeGen/WinEH/cppeh-state-calc-1.ll      |   289 -
 test/CodeGen/WinEH/seh-catch-all.ll           |    59 -
 test/CodeGen/WinEH/seh-exception-code.ll      |    66 -
 test/CodeGen/WinEH/seh-exception-code2.ll     |    91 -
 test/CodeGen/WinEH/seh-inlined-finally.ll     |    83 -
 .../WinEH/seh-outlined-finally-win32.ll       |   172 -
 test/CodeGen/WinEH/seh-outlined-finally.ll    |   155 -
 test/CodeGen/WinEH/seh-prepared-basic.ll      |    83 -
 test/CodeGen/WinEH/seh-resume-phi.ll          |    66 -
 test/CodeGen/WinEH/seh-simple.ll              |   233 -
 test/CodeGen/WinEH/wineh-cloning.ll           |   391 +
 test/CodeGen/WinEH/wineh-demotion.ll          |   356 +
 .../CodeGen/WinEH/wineh-intrinsics-invalid.ll |    26 +
 test/CodeGen/WinEH/wineh-intrinsics.ll        |    44 +
 test/CodeGen/WinEH/wineh-no-demotion.ll       |   130 +
 .../WinEH/wineh-statenumbering-cleanups.ll    |    62 +
 test/CodeGen/WinEH/wineh-statenumbering.ll    |   148 +
 test/CodeGen/X86/2006-10-02-BoolRetCrash.ll   |     1 +
 .../2006-10-19-SwitchUnnecessaryBranching.ll  |     4 +-
 test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll |     2 +-
 .../X86/2008-03-12-ThreadLocalAlias.ll        |     2 +-
 test/CodeGen/X86/2008-03-14-SpillerCrash.ll   |     2 +-
 .../X86/2008-06-13-NotVolatileLoadStore.ll    |     8 +-
 test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll |    15 +-
 test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll   |     8 +-
 .../X86/2009-05-23-dagcombine-shifts.ll       |    14 +-
 test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll  |     8 +-
 .../X86/2009-06-05-VariableIndexInsert.ll     |     1 +
 test/CodeGen/X86/2009-06-06-ConcatVectors.ll  |     1 +
 test/CodeGen/X86/2009-10-16-Scope.ll          |     6 +-
 test/CodeGen/X86/2010-01-18-DbgValue.ll       |     8 +-
 test/CodeGen/X86/2010-02-01-DbgValueCrash.ll  |     8 +-
 test/CodeGen/X86/2010-05-25-DotDebugLoc.ll    |    22 +-
 test/CodeGen/X86/2010-05-26-DotDebugLoc.ll    |    20 +-
 test/CodeGen/X86/2010-05-28-Crash.ll          |    18 +-
 .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll |    28 +-
 test/CodeGen/X86/2010-07-06-DbgCrash.ll       |     7 +-
 test/CodeGen/X86/2010-08-04-StackVariable.ll  |    24 +-
 test/CodeGen/X86/2010-09-16-EmptyFilename.ll  |    10 +-
 test/CodeGen/X86/2010-11-02-DbgParameter.ll   |     8 +-
 .../X86/2011-01-24-DbgValue-Before-Use.ll     |    20 +-
 test/CodeGen/X86/2011-10-21-widen-cmp.ll      |    42 +-
 .../X86/2011-12-06-AVXVectorExtractCombine.ll |    13 +-
 test/CodeGen/X86/2011-20-21-zext-ui2fp.ll     |    14 +-
 test/CodeGen/X86/2012-01-12-extract-sv.ll     |    28 +-
 .../CodeGen/X86/2012-08-17-legalizer-crash.ll |     3 +-
 test/CodeGen/X86/2012-1-10-buildvector.ll     |     1 +
 test/CodeGen/X86/2012-11-30-handlemove-dbg.ll |     8 +-
 test/CodeGen/X86/2012-11-30-misched-dbg.ll    |    16 +-
 test/CodeGen/X86/2012-11-30-regpres-dbg.ll    |     8 +-
 test/CodeGen/X86/3dnow-intrinsics.ll          |     4 +-
 test/CodeGen/X86/GC/alloc_loop.ll             |     1 +
 test/CodeGen/X86/GC/cg-O0.ll                  |     1 +
 test/CodeGen/X86/GC/dynamic-frame-size.ll     |    10 +-
 test/CodeGen/X86/GC/lower_gcroot.ll           |     1 +
 test/CodeGen/X86/MachineBranchProb.ll         |     4 +-
 test/CodeGen/X86/MachineSink-DbgValue.ll      |    12 +-
 test/CodeGen/X86/MergeConsecutiveStores.ll    |    37 +-
 test/CodeGen/X86/StackColoring-dbg.ll         |     6 +-
 test/CodeGen/X86/add-nsw-sext.ll              |   168 +
 test/CodeGen/X86/aliases.ll                   |    26 +-
 test/CodeGen/X86/and-encoding.ll              |    41 +
 test/CodeGen/X86/atomic-flags.ll              |    61 +
 test/CodeGen/X86/atomic-minmax-i6432.ll       |     8 +-
 test/CodeGen/X86/atomic-non-integer.ll        |   108 +
 test/CodeGen/X86/atomic128.ll                 |    52 +-
 test/CodeGen/X86/atomic_mi.ll                 |   662 +-
 test/CodeGen/X86/avg.ll                       |   724 +
 test/CodeGen/X86/avx-cvt-2.ll                 |     1 +
 test/CodeGen/X86/avx-cvt.ll                   |     6 +-
 .../CodeGen/X86/avx-intrinsics-x86-upgrade.ll |    66 +
 test/CodeGen/X86/avx-intrinsics-x86.ll        |   685 +-
 test/CodeGen/X86/avx-isa-check.ll             |   570 +
 test/CodeGen/X86/avx-load-store.ll            |     4 +-
 test/CodeGen/X86/avx-logic.ll                 |     2 +
 test/CodeGen/X86/avx-shift.ll                 |     1 +
 test/CodeGen/X86/avx-shuffle-x86_32.ll        |    26 +-
 test/CodeGen/X86/avx-splat.ll                 |   114 +-
 test/CodeGen/X86/avx-vbroadcast.ll            |   261 +-
 test/CodeGen/X86/avx-vperm2x128.ll            |    44 +-
 test/CodeGen/X86/avx-win64.ll                 |     2 -
 test/CodeGen/X86/avx.ll                       |     6 +-
 test/CodeGen/X86/avx2-conversions.ll          |   131 +-
 .../X86/avx2-intrinsics-x86-upgrade.ll        |   120 +
 test/CodeGen/X86/avx2-intrinsics-x86.ll       |    94 +-
 test/CodeGen/X86/avx2-nontemporal.ll          |    17 +-
 test/CodeGen/X86/avx2-vbroadcast.ll           |   441 +-
 test/CodeGen/X86/avx512-arith.ll              |   358 +-
 test/CodeGen/X86/avx512-bugfix-25270.ll       |    35 +
 test/CodeGen/X86/avx512-build-vector.ll       |     1 +
 test/CodeGen/X86/avx512-calling-conv.ll       |   481 +-
 test/CodeGen/X86/avx512-cvt.ll                |   119 +-
 test/CodeGen/X86/avx512-ext.ll                |  1835 ++
 test/CodeGen/X86/avx512-extract-subvector.ll  |    56 +
 test/CodeGen/X86/avx512-fma.ll                |   155 +-
 .../X86/avx512-gather-scatter-intrin.ll       |   185 +-
 test/CodeGen/X86/avx512-insert-extract.ll     |   519 +-
 test/CodeGen/X86/avx512-intrinsics.ll         |  4955 ++++-
 test/CodeGen/X86/avx512-logic.ll              |   164 +-
 test/CodeGen/X86/avx512-mask-op.ll            |  1474 +-
 test/CodeGen/X86/avx512-skx-insert-subvec.ll  |   135 +
 test/CodeGen/X86/avx512-trunc-ext.ll          |   961 -
 test/CodeGen/X86/avx512-trunc.ll              |   488 +
 test/CodeGen/X86/avx512-vbroadcast.ll         |   262 +-
 test/CodeGen/X86/avx512-vec-cmp.ll            |    27 +-
 test/CodeGen/X86/avx512bw-intrinsics.ll       |  2658 ++-
 test/CodeGen/X86/avx512bwvl-intrinsics.ll     |   748 +
 test/CodeGen/X86/avx512cd-intrinsics.ll       |    18 +
 test/CodeGen/X86/avx512cdvl-intrinsics.ll     |   179 +
 test/CodeGen/X86/avx512dq-intrinsics.ll       |   667 +
 test/CodeGen/X86/avx512dqvl-intrinsics.ll     |   818 +-
 test/CodeGen/X86/avx512vl-intrinsics.ll       |  2977 ++-
 test/CodeGen/X86/bit-piece-comment.ll         |    64 +
 test/CodeGen/X86/bitreverse.ll                |    22 +
 test/CodeGen/X86/branchfolding-catchpads.ll   |    95 +
 test/CodeGen/X86/buildvec-insertvec.ll        |     1 +
 test/CodeGen/X86/catchpad-realign-savexmm.ll  |    53 +
 test/CodeGen/X86/catchpad-regmask.ll          |   144 +
 test/CodeGen/X86/catchpad-weight.ll           |    82 +
 .../CodeGen/X86/catchret-empty-fallthrough.ll |    53 +
 test/CodeGen/X86/catchret-fallthrough.ll      |    42 +
 test/CodeGen/X86/cleanuppad-inalloca.ll       |    68 +
 .../CodeGen/X86/cleanuppad-large-codemodel.ll |    27 +
 test/CodeGen/X86/cleanuppad-realign.ll        |    78 +
 test/CodeGen/X86/clz.ll                       |   148 +-
 test/CodeGen/X86/cmp.ll                       |    44 +
 test/CodeGen/X86/cmpxchg-clobber-flags.ll     |   144 +-
 test/CodeGen/X86/coal-sections.ll             |    23 +
 test/CodeGen/X86/coalescer-win64.ll           |    16 +
 .../X86/code_placement_cold_loop_blocks.ll    |   122 +
 ...ode_placement_ignore_succ_in_inner_loop.ll |   123 +
 .../X86/code_placement_loop_rotation.ll       |    80 +
 .../X86/code_placement_loop_rotation2.ll      |   122 +
 test/CodeGen/X86/codegen-prepare-cast.ll      |     2 +-
 test/CodeGen/X86/coff-comdat.ll               |     2 +-
 test/CodeGen/X86/combine-and.ll               |     1 +
 test/CodeGen/X86/combine-avx-intrinsics.ll    |    59 -
 test/CodeGen/X86/combine-avx2-intrinsics.ll   |    74 -
 test/CodeGen/X86/combine-multiplies.ll        |   163 +
 test/CodeGen/X86/combine-or.ll                |     1 +
 test/CodeGen/X86/combine-sse2-intrinsics.ll   |    53 -
 test/CodeGen/X86/combine-sse41-intrinsics.ll  |    91 -
 test/CodeGen/X86/commute-two-addr.ll          |     2 +-
 test/CodeGen/X86/constant-hoisting-and.ll     |    19 +
 test/CodeGen/X86/constant-hoisting-cmp.ll     |    25 +
 .../X86/copysign-constant-magnitude.ll        |    24 +-
 test/CodeGen/X86/cppeh-nounwind.ll            |    35 -
 test/CodeGen/X86/cxx_tlscc64.ll               |    71 +
 test/CodeGen/X86/dag-fmf-cse.ll               |    22 +
 test/CodeGen/X86/dag-merge-fast-accesses.ll   |    90 +
 test/CodeGen/X86/darwin-tls.ll                |    28 +
 .../X86/dbg-changes-codegen-branch-folding.ll |    48 +-
 test/CodeGen/X86/dbg-changes-codegen.ll       |     9 +-
 test/CodeGen/X86/dbg-combine.ll               |    12 +-
 test/CodeGen/X86/debugloc-argsize.ll          |    58 +
 test/CodeGen/X86/divide-by-constant.ll        |    32 +
 test/CodeGen/X86/dllexport-x86_64.ll          |    10 +-
 test/CodeGen/X86/dllexport.ll                 |     8 +-
 test/CodeGen/X86/dwarf-comp-dir.ll            |     2 +-
 test/CodeGen/X86/dynamic-allocas-VLAs.ll      |     2 +-
 test/CodeGen/X86/eh-null-personality.ll       |    25 +
 test/CodeGen/X86/eh_frame.ll                  |     4 +-
 test/CodeGen/X86/emutls-pic.ll                |   168 +
 test/CodeGen/X86/emutls-pie.ll                |   131 +
 test/CodeGen/X86/emutls.ll                    |   347 +
 test/CodeGen/X86/emutls_generic.ll            |   107 +
 test/CodeGen/X86/exedeps-movq.ll              |    19 +
 test/CodeGen/X86/expand-vr64-gr64-copy.mir    |    36 +
 .../X86/extractelement-legalization-cycle.ll  |    21 +
 test/CodeGen/X86/extractelement-shuffle.ll    |     1 +
 test/CodeGen/X86/fadd-combines.ll             |   224 +
 test/CodeGen/X86/fast-isel-bitcasts-avx.ll    |   244 +
 test/CodeGen/X86/fast-isel-bitcasts.ll        |   245 +
 test/CodeGen/X86/fast-isel-cmp-branch.ll      |    17 +-
 test/CodeGen/X86/fast-isel-deadcode.ll        |   147 +
 test/CodeGen/X86/fast-isel-emutls.ll          |    48 +
 test/CodeGen/X86/fast-isel-nontemporal.ll     |   111 +
 test/CodeGen/X86/fast-isel-stackcheck.ll      |    44 +
 test/CodeGen/X86/fast-isel-tls.ll             |     2 +-
 test/CodeGen/X86/fdiv-combine.ll              |    69 +-
 test/CodeGen/X86/fdiv.ll                      |    52 +-
 test/CodeGen/X86/fixup-lea.ll                 |    34 +
 test/CodeGen/X86/float-asmprint.ll            |    15 +
 test/CodeGen/X86/floor-soft-float.ll          |     2 +-
 test/CodeGen/X86/fma-commute-x86.ll           |   761 +
 test/CodeGen/X86/fma-do-not-commute.ll        |     2 +-
 .../X86/fma-intrinsics-phi-213-to-231.ll      |   499 +-
 test/CodeGen/X86/fma-intrinsics-x86.ll        |   752 +-
 test/CodeGen/X86/fma-scalar-memfold.ll        |   383 +
 test/CodeGen/X86/fma_patterns.ll              |  1317 +-
 test/CodeGen/X86/fma_patterns_wide.ll         |   849 +-
 test/CodeGen/X86/fmaxnum.ll                   |   203 +-
 test/CodeGen/X86/fminnum.ll                   |   181 +-
 test/CodeGen/X86/fmul-combines.ll             |    44 +-
 test/CodeGen/X86/fold-load-binops.ll          |     1 +
 test/CodeGen/X86/fold-load-unops.ll           |     1 +
 test/CodeGen/X86/fold-push.ll                 |    40 +
 test/CodeGen/X86/force-align-stack-alloca.ll  |     2 +-
 test/CodeGen/X86/force-align-stack.ll         |     2 +-
 test/CodeGen/X86/fp-fast.ll                   |     1 +
 test/CodeGen/X86/fp-logic.ll                  |   264 +
 test/CodeGen/X86/fp128-calling-conv.ll        |    47 +
 test/CodeGen/X86/fp128-cast.ll                |   279 +
 test/CodeGen/X86/fp128-compare.ll             |    96 +
 test/CodeGen/X86/fp128-i128.ll                |   320 +
 test/CodeGen/X86/fp128-libcalls.ll            |   107 +
 test/CodeGen/X86/fp128-load.ll                |    35 +
 test/CodeGen/X86/fp128-store.ll               |    14 +
 test/CodeGen/X86/fpcmp-soft-fp.ll             |   127 +
 test/CodeGen/X86/fpstack-debuginstr-kill.ll   |    16 +-
 test/CodeGen/X86/frem-msvc32.ll               |    12 +
 test/CodeGen/X86/funclet-layout.ll            |   158 +
 test/CodeGen/X86/function-alias.ll            |    12 +
 test/CodeGen/X86/gcc_except_table.ll          |     2 +-
 test/CodeGen/X86/global-sections.ll           |     7 +-
 test/CodeGen/X86/h-register-store.ll          |    25 +-
 test/CodeGen/X86/h-registers-0.ll             |     1 +
 test/CodeGen/X86/h-registers-1.ll             |     1 +
 test/CodeGen/X86/h-registers-3.ll             |     1 +
 test/CodeGen/X86/half.ll                      |     4 +-
 test/CodeGen/X86/hhvm-cc.ll                   |   241 +
 test/CodeGen/X86/i386-shrink-wrapping.ll      |   113 +
 test/CodeGen/X86/immediate_merging.ll         |    82 +
 test/CodeGen/X86/implicit-null-check.ll       |    51 +-
 test/CodeGen/X86/imul.ll                      |    63 +
 test/CodeGen/X86/inalloca-stdcall.ll          |     5 +-
 test/CodeGen/X86/inalloca.ll                  |    15 +-
 test/CodeGen/X86/inconsistent_landingpad.ll   |    30 +
 test/CodeGen/X86/inline-asm-2addr.ll          |    11 +-
 .../X86/inline-asm-sp-clobber-memcpy.ll       |     2 +-
 test/CodeGen/X86/inline-sse.ll                |    34 +
 .../CodeGen/X86/insertps-from-constantpool.ll |    20 +
 test/CodeGen/X86/insertps-unfold-load-bug.ll  |    33 +
 test/CodeGen/X86/int-intrinsic.ll             |     2 +-
 test/CodeGen/X86/late-address-taken.ll        |    68 +
 test/CodeGen/X86/lea-opt.ll                   |   131 +
 test/CodeGen/X86/lit.local.cfg                |     2 +-
 .../X86/{frameescape.ll => localescape.ll}    |    51 +-
 test/CodeGen/X86/lower-vec-shift-2.ll         |     1 +
 test/CodeGen/X86/lsr-static-addr.ll           |     2 +-
 test/CodeGen/X86/machine-combiner-int-vec.ll  |   112 +
 test/CodeGen/X86/machine-combiner-int.ll      |   194 +
 test/CodeGen/X86/machine-combiner.ll          |   467 +-
 test/CodeGen/X86/machine-cp.ll                |    38 +-
 .../X86/machine-trace-metrics-crash.ll        |     4 +-
 test/CodeGen/X86/masked_gather_scatter.ll     |  2012 +-
 test/CodeGen/X86/masked_memop.ll              |   524 +-
 test/CodeGen/X86/materialize.ll               |   184 +
 test/CodeGen/X86/mcu-abi.ll                   |   112 +
 test/CodeGen/X86/memcpy-2.ll                  |    26 +-
 test/CodeGen/X86/memcpy.ll                    |    33 +
 .../X86/merge-store-partially-alias-loads.ll  |    52 +
 .../X86/misched-code-difference-with-debug.ll |    12 +-
 test/CodeGen/X86/mmx-arg-passing-x86-64.ll    |     1 +
 test/CodeGen/X86/mmx-arg-passing.ll           |     1 +
 test/CodeGen/X86/mmx-coalescing.ll            |    84 +
 test/CodeGen/X86/mmx-intrinsics.ll            |   291 +-
 test/CodeGen/X86/mmx-only.ll                  |    21 +
 test/CodeGen/X86/movntdq-no-avx.ll            |     2 +-
 test/CodeGen/X86/movpc32-check.ll             |    42 +
 test/CodeGen/X86/movtopush.ll                 |    25 +-
 test/CodeGen/X86/mult-alt-x86.ll              |     2 +-
 test/CodeGen/X86/musttail-varargs.ll          |    43 +
 test/CodeGen/X86/nontemporal-2.ll             |    21 +-
 test/CodeGen/X86/nontemporal.ll               |    11 +-
 test/CodeGen/X86/null-streamer.ll             |     4 +-
 test/CodeGen/X86/opt-ext-uses.ll              |     8 +-
 test/CodeGen/X86/or-branch.ll                 |    30 +-
 test/CodeGen/X86/or-lea.ll                    |   120 +
 test/CodeGen/X86/palignr.ll                   |     1 +
 test/CodeGen/X86/patchpoint-verifiable.mir    |    42 +
 .../X86/peephole-na-phys-copy-folding.ll      |   190 +
 test/CodeGen/X86/pmul.ll                      |   297 +-
 test/CodeGen/X86/pop-stack-cleanup.ll         |    76 +
 test/CodeGen/X86/powi.ll                      |    38 +-
 test/CodeGen/X86/pr11415.ll                   |     8 +-
 test/CodeGen/X86/pr11468.ll                   |     2 +-
 test/CodeGen/X86/pr11985.ll                   |    30 +-
 test/CodeGen/X86/pr13577.ll                   |     5 +-
 test/CodeGen/X86/pr15267.ll                   |   240 +-
 test/CodeGen/X86/pr17631.ll                   |     2 +-
 test/CodeGen/X86/pr21529.ll                   |    15 -
 test/CodeGen/X86/pr22019.ll                   |     2 +-
 test/CodeGen/X86/pr23900.ll                   |    29 -
 test/CodeGen/X86/pr24139.ll                   |   148 +
 test/CodeGen/X86/pr24602.ll                   |    17 +
 test/CodeGen/X86/pr25828.ll                   |    30 +
 test/CodeGen/X86/prolog-push-seq.ll           |    19 +
 test/CodeGen/X86/pseudo_cmov_lower.ll         |   267 +
 test/CodeGen/X86/pseudo_cmov_lower1.ll        |    39 +
 test/CodeGen/X86/pseudo_cmov_lower2.ll        |   100 +
 test/CodeGen/X86/psubus.ll                    |   580 +-
 test/CodeGen/X86/push-cfi-debug.ll            |    53 +
 test/CodeGen/X86/push-cfi-obj.ll              |    44 +
 test/CodeGen/X86/push-cfi.ll                  |   304 +
 test/CodeGen/X86/ragreedy-hoist-spill.ll      |     2 +-
 test/CodeGen/X86/rem_crash.ll                 |   257 +
 test/CodeGen/X86/remat-invalid-liveness.ll    |    85 -
 test/CodeGen/X86/rodata-relocs.ll             |     8 +-
 test/CodeGen/X86/rounding-ops.ll              |    24 +-
 test/CodeGen/X86/safestack.ll                 |    32 +
 test/CodeGen/X86/sar_fold.ll                  |    37 +
 test/CodeGen/X86/sar_fold64.ll                |    43 +
 test/CodeGen/X86/scalar-fp-to-i64.ll          |   151 +
 test/CodeGen/X86/scalar-int-to-fp.ll          |   132 +
 test/CodeGen/X86/sdiv-pow2.ll                 |    33 +
 test/CodeGen/X86/seh-catch-all-win32.ll       |    33 +-
 test/CodeGen/X86/seh-catch-all.ll             |    29 +-
 test/CodeGen/X86/seh-catchpad.ll              |   198 +
 test/CodeGen/X86/seh-except-finally.ll        |    71 +-
 test/CodeGen/X86/seh-exception-code.ll        |    38 +
 test/CodeGen/X86/seh-filter.ll                |    21 -
 test/CodeGen/X86/seh-finally.ll               |    48 +-
 test/CodeGen/X86/seh-safe-div-win32.ll        |    40 +-
 test/CodeGen/X86/seh-safe-div.ll              |    52 +-
 test/CodeGen/X86/seh-stack-realign-win32.ll   |    99 -
 test/CodeGen/X86/seh-stack-realign.ll         |    34 +-
 test/CodeGen/X86/setcc-lowering.ll            |     1 +
 test/CodeGen/X86/setcc.ll                     |    20 +
 test/CodeGen/X86/shift-bmi2.ll                |    20 +-
 test/CodeGen/X86/shrink-wrap-chkstk.ll        |    37 +
 test/CodeGen/X86/slow-div.ll                  |    15 +
 test/CodeGen/X86/slow-unaligned-mem.ll        |    95 +
 test/CodeGen/X86/soft-fp.ll                   |    34 +-
 test/CodeGen/X86/soft-sitofp.ll               |   169 +
 test/CodeGen/X86/splat-for-size.ll            |   197 +-
 test/CodeGen/X86/sqrt-fastmath.ll             |     9 +-
 test/CodeGen/X86/sse-align-12.ll              |     1 +
 test/CodeGen/X86/sse-minmax.ll                |     2 +-
 test/CodeGen/X86/sse-only.ll                  |    20 +
 test/CodeGen/X86/sse-scalar-fp-arith-unary.ll |     1 +
 test/CodeGen/X86/sse2-vector-shifts.ll        |   282 +-
 test/CodeGen/X86/sse2.ll                      |     1 +
 test/CodeGen/X86/sse3-avx-addsub-2.ll         |   312 +-
 test/CodeGen/X86/sse3-avx-addsub.ll           |   197 +-
 test/CodeGen/X86/sse3-intrinsics-fast-isel.ll |   171 +
 test/CodeGen/X86/sse3.ll                      |     7 +-
 .../X86/sse41-intrinsics-x86-upgrade.ll       |    47 +-
 test/CodeGen/X86/sse41-intrinsics-x86.ll      |    48 -
 test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll  |   185 +-
 test/CodeGen/X86/sse41.ll                     |    65 +-
 .../CodeGen/X86/sse4a-intrinsics-fast-isel.ll |    98 +
 test/CodeGen/X86/sse_partial_update.ll        |    33 +
 .../CodeGen/X86/ssse3-intrinsics-fast-isel.ll |   290 +
 test/CodeGen/X86/stack-align-memcpy.ll        |     2 +-
 test/CodeGen/X86/stack-folding-adx-x86_64.ll  |    45 +
 test/CodeGen/X86/stack-folding-fp-avx1.ll     |    18 +-
 test/CodeGen/X86/stack-folding-fp-sse42.ll    |    28 +-
 test/CodeGen/X86/stack-folding-int-avx1.ll    |    40 +-
 test/CodeGen/X86/stack-folding-int-avx2.ll    |    55 +-
 test/CodeGen/X86/stack-folding-int-sse42.ll   |    38 +-
 test/CodeGen/X86/stack-folding-mmx.ll         |   148 +-
 test/CodeGen/X86/stack-folding-x86_64.ll      |     2 +-
 test/CodeGen/X86/stack-folding-xop.ll         |     2 +-
 test/CodeGen/X86/stack-probe-size.ll          |     3 +-
 test/CodeGen/X86/stack-protector-dbginfo.ll   |    36 +-
 test/CodeGen/X86/stack-protector-weight.ll    |     4 +-
 test/CodeGen/X86/stackmap-frame-setup.ll      |    20 +
 test/CodeGen/X86/statepoint-allocas.ll        |    10 +-
 test/CodeGen/X86/statepoint-call-lowering.ll  |   103 +-
 test/CodeGen/X86/statepoint-far-call.ll       |     4 +-
 test/CodeGen/X86/statepoint-forward.ll        |    16 +-
 .../statepoint-gctransition-call-lowering.ll  |    66 +-
 test/CodeGen/X86/statepoint-invoke.ll         |    78 +-
 test/CodeGen/X86/statepoint-stack-usage.ll    |    54 +-
 .../CodeGen/X86/statepoint-stackmap-format.ll |    96 +-
 test/CodeGen/X86/stdarg.ll                    |    10 +-
 test/CodeGen/X86/stores-merging.ll            |    46 +-
 test/CodeGen/X86/switch-bt.ll                 |     8 +-
 test/CodeGen/X86/switch-edge-weight.ll        |   281 +
 test/CodeGen/X86/switch-jump-table.ll         |    54 +-
 test/CodeGen/X86/switch-order-weight.ll       |     2 +-
 test/CodeGen/X86/switch.ll                    |    85 +-
 test/CodeGen/X86/swizzle-2.ll                 |     1 +
 .../CodeGen/X86/system-intrinsics-64-xsave.ll |    41 +
 .../X86/system-intrinsics-64-xsavec.ll        |    21 +
 .../X86/system-intrinsics-64-xsaveopt.ll      |    21 +
 .../X86/system-intrinsics-64-xsaves.ll        |    41 +
 test/CodeGen/X86/system-intrinsics-64.ll      |     2 +-
 test/CodeGen/X86/system-intrinsics-xsave.ll   |    23 +
 test/CodeGen/X86/system-intrinsics-xsavec.ll  |    12 +
 .../CodeGen/X86/system-intrinsics-xsaveopt.ll |    12 +
 test/CodeGen/X86/system-intrinsics-xsaves.ll  |    23 +
 test/CodeGen/X86/system-intrinsics.ll         |     2 +-
 test/CodeGen/X86/tail-dup-catchret.ll         |    31 +
 test/CodeGen/X86/tail-merge-wineh.ll          |   107 +
 test/CodeGen/X86/tail-opts.ll                 |    40 +-
 test/CodeGen/X86/tailcall-mem-intrinsics.ll   |     4 +-
 test/CodeGen/X86/tailcall-msvc-conventions.ll |   189 +
 test/CodeGen/X86/tailcall-readnone.ll         |    15 +
 test/CodeGen/X86/tls-android-negative.ll      |    65 +
 test/CodeGen/X86/tls-android.ll               |    89 +
 test/CodeGen/X86/tls-models.ll                |     2 +
 test/CodeGen/X86/tls-pie.ll                   |     8 +
 test/CodeGen/X86/token_landingpad.ll          |    21 +
 test/CodeGen/X86/trunc-store.ll               |    49 +
 test/CodeGen/X86/unaligned-32-byte-memops.ll  |     7 +-
 test/CodeGen/X86/unaligned-spill-folding.ll   |     2 +-
 test/CodeGen/X86/unknown-location.ll          |     8 +-
 test/CodeGen/X86/v2f32.ll                     |     1 +
 test/CodeGen/X86/vec_cast2.ll                 |    31 +-
 test/CodeGen/X86/vec_cmp_sint-128.ll          |   722 +
 test/CodeGen/X86/vec_cmp_uint-128.ll          |   860 +
 test/CodeGen/X86/vec_ctbits.ll                |   129 +-
 test/CodeGen/X86/vec_extract-avx.ll           |   116 +-
 test/CodeGen/X86/vec_fabs.ll                  |     2 +-
 test/CodeGen/X86/vec_fp_to_int.ll             |  1245 +-
 test/CodeGen/X86/vec_insert-5.ll              |     1 +
 test/CodeGen/X86/vec_int_to_fp.ll             |  1872 +-
 test/CodeGen/X86/vec_minmax_sint.ll           |  2090 +++
 test/CodeGen/X86/vec_minmax_uint.ll           |  2229 +++
 test/CodeGen/X86/vec_sdiv_to_shift.ll         |    13 +
 test/CodeGen/X86/vec_trunc_sext.ll            |    31 +-
 test/CodeGen/X86/vec_uint_to_fp-fastmath.ll   |   130 +
 test/CodeGen/X86/vec_uint_to_fp.ll            |     8 +-
 test/CodeGen/X86/vector-blend.ll              |    72 +-
 test/CodeGen/X86/vector-idiv.ll               |     1 +
 test/CodeGen/X86/vector-lzcnt-128.ll          |   472 +-
 test/CodeGen/X86/vector-lzcnt-256.ll          |   257 +-
 test/CodeGen/X86/vector-lzcnt-512.ll          |   219 +
 .../X86/vector-merge-store-fp-constants.ll    |    35 +
 test/CodeGen/X86/vector-popcnt-128.ll         |    37 +-
 test/CodeGen/X86/vector-popcnt-256.ll         |    73 +-
 test/CodeGen/X86/vector-popcnt-512.ll         |   161 +
 test/CodeGen/X86/vector-rotate-128.ll         |  1595 ++
 test/CodeGen/X86/vector-rotate-256.ll         |  1089 ++
 test/CodeGen/X86/vector-sext.ll               |  4090 +++-
 test/CodeGen/X86/vector-shift-ashr-128.ll     |   917 +-
 test/CodeGen/X86/vector-shift-ashr-256.ll     |   691 +-
 test/CodeGen/X86/vector-shift-ashr-512.ll     |   378 +
 test/CodeGen/X86/vector-shift-lshr-128.ll     |   619 +-
 test/CodeGen/X86/vector-shift-lshr-256.ll     |   444 +-
 test/CodeGen/X86/vector-shift-lshr-512.ll     |   317 +
 test/CodeGen/X86/vector-shift-shl-128.ll      |   501 +-
 test/CodeGen/X86/vector-shift-shl-256.ll      |   403 +-
 test/CodeGen/X86/vector-shift-shl-512.ll      |   293 +
 test/CodeGen/X86/vector-shuffle-128-v16.ll    |   276 +
 test/CodeGen/X86/vector-shuffle-128-v2.ll     |   318 +-
 test/CodeGen/X86/vector-shuffle-128-v4.ll     |    92 +
 test/CodeGen/X86/vector-shuffle-128-v8.ll     |   252 +
 test/CodeGen/X86/vector-shuffle-256-v16.ll    |   249 +-
 test/CodeGen/X86/vector-shuffle-256-v32.ll    |   210 +-
 test/CodeGen/X86/vector-shuffle-256-v4.ll     |   703 +-
 test/CodeGen/X86/vector-shuffle-256-v8.ll     |   221 +-
 test/CodeGen/X86/vector-shuffle-512-v16.ll    |   134 +
 test/CodeGen/X86/vector-shuffle-512-v32.ll    |    44 +
 test/CodeGen/X86/vector-shuffle-512-v8.ll     |  2487 ++-
 test/CodeGen/X86/vector-shuffle-combining.ll  |     1 +
 test/CodeGen/X86/vector-shuffle-mmx.ll        |     1 +
 test/CodeGen/X86/vector-shuffle-sse1.ll       |     1 +
 test/CodeGen/X86/vector-shuffle-sse4a.ll      |   140 +
 test/CodeGen/X86/vector-shuffle-v1.ll         |   439 +
 test/CodeGen/X86/vector-trunc.ll              |   685 +-
 test/CodeGen/X86/vector-tzcnt-128.ll          |  2035 +-
 test/CodeGen/X86/vector-tzcnt-256.ll          |  1455 +-
 test/CodeGen/X86/vector-tzcnt-512.ll          |   271 +
 test/CodeGen/X86/vector-zext.ll               |  1239 +-
 test/CodeGen/X86/vector-zmov.ll               |     1 +
 ...rs-cleared-in-machine-functions-liveins.ll |    19 +
 test/CodeGen/X86/vmovq.ll                     |    28 +
 test/CodeGen/X86/vselect-2.ll                 |     1 +
 test/CodeGen/X86/vselect-avx.ll               |    12 +-
 test/CodeGen/X86/vselect-minmax.ll            | 15574 +++++++++++-----
 test/CodeGen/X86/vselect.ll                   |     1 +
 test/CodeGen/X86/vshift_scalar.ll             |     1 +
 test/CodeGen/X86/wide-integer-cmp.ll          |   130 +
 test/CodeGen/X86/widen_load-2.ll              |     4 +-
 test/CodeGen/X86/widen_shuffle-1.ll           |     1 +
 test/CodeGen/X86/win-catchpad-csrs.ll         |   268 +
 test/CodeGen/X86/win-catchpad-nested-cxx.ll   |   105 +
 test/CodeGen/X86/win-catchpad-nested.ll       |    42 +
 test/CodeGen/X86/win-catchpad-varargs.ll      |   101 +
 test/CodeGen/X86/win-catchpad.ll              |   353 +
 test/CodeGen/X86/win-cleanuppad.ll            |   199 +
 test/CodeGen/X86/win-funclet-cfi.ll           |    95 +
 ..._prepare.ll => win-mixed-ehpersonality.ll} |    51 +-
 test/CodeGen/X86/win32-eh-states.ll           |   213 +-
 test/CodeGen/X86/win32-eh.ll                  |    49 +-
 test/CodeGen/X86/win32-pic-jumptable.ll       |     8 +-
 .../CodeGen/X86/win32-seh-catchpad-realign.ll |    77 +
 test/CodeGen/X86/win32-seh-catchpad.ll        |   231 +
 test/CodeGen/X86/win32-seh-nested-finally.ll  |    80 +
 test/CodeGen/X86/win32-spill-xmm.ll           |    40 +
 test/CodeGen/X86/win64_frame.ll               |    70 +-
 test/CodeGen/X86/win64_sibcall.ll             |    38 +
 test/CodeGen/X86/win_coreclr_chkstk.ll        |   143 +
 test/CodeGen/X86/win_ftol2.ll                 |   166 -
 test/CodeGen/X86/wineh-coreclr.ll             |   267 +
 test/CodeGen/X86/wineh-exceptionpointer.ll    |    26 +
 test/CodeGen/X86/wineh-no-ehpads.ll           |    20 +
 test/CodeGen/X86/x32-function_pointer-3.ll    |     2 +-
 test/CodeGen/X86/x32-indirectbr.ll            |    26 +
 test/CodeGen/X86/x32-landingpad.ll            |    27 +
 test/CodeGen/X86/x32-va_start.ll              |    99 +
 test/CodeGen/X86/x86-32-intrcc.ll             |    79 +
 test/CodeGen/X86/x86-64-baseptr.ll            |     4 +-
 .../X86/x86-64-double-precision-shift-left.ll |    17 +-
 .../x86-64-double-precision-shift-right.ll    |     9 +-
 .../X86/x86-64-double-shifts-Oz-Os-O2.ll      |     4 +-
 test/CodeGen/X86/x86-64-intrcc.ll             |    86 +
 test/CodeGen/X86/x86-64-ms_abi-vararg.ll      |   108 +
 test/CodeGen/X86/x86-64-pic-10.ll             |     2 +-
 test/CodeGen/X86/x86-fold-pshufb.ll           |    20 +-
 .../X86/x86-sanitizer-shrink-wrapping.ll      |    40 +
 .../X86/x86-setcc-int-to-fp-combine.ll        |    16 +-
 test/CodeGen/X86/x86-shrink-wrap-unwind.ll    |   153 +
 test/CodeGen/X86/x86-shrink-wrapping.ll       |   254 +-
 test/CodeGen/X86/x86-win64-shrink-wrapping.ll |   126 +
 test/CodeGen/X86/xop-intrinsics-x86_64.ll     |    33 +-
 test/CodeGen/X86/xop-pcmov.ll                 |   163 +
 test/CodeGen/XCore/aliases.ll                 |     6 +-
 test/CodeGen/XCore/dwarf_debug.ll             |     8 +-
 test/DebugInfo/AArch64/big-endian.ll          |     2 +-
 test/DebugInfo/AArch64/bitfields.ll           |     2 +-
 test/DebugInfo/AArch64/cfi-eof-prologue.ll    |    16 +-
 test/DebugInfo/AArch64/coalescing.ll          |    10 +-
 test/DebugInfo/AArch64/constant-dbgloc.ll     |     6 +-
 test/DebugInfo/AArch64/dwarfdump.ll           |     6 +-
 test/DebugInfo/AArch64/frameindices.ll        |    24 +-
 test/DebugInfo/AArch64/prologue_end.ll        |    43 +
 test/DebugInfo/AArch64/struct_by_value.ll     |     8 +-
 test/DebugInfo/ARM/PR16736.ll                 |    18 +-
 test/DebugInfo/ARM/bitfield.ll                |     2 +-
 test/DebugInfo/ARM/cfi-eof-prologue.ll        |    16 +-
 test/DebugInfo/ARM/constant-dbgloc.ll         |     6 +-
 test/DebugInfo/ARM/float-args.ll              |    45 +
 test/DebugInfo/ARM/header.ll                  |     6 +-
 test/DebugInfo/ARM/lowerbdgdeclare_vla.ll     |    14 +-
 .../multiple-constant-uses-drops-dbgloc.ll    |     6 +-
 test/DebugInfo/ARM/prologue_end.ll            |    46 +
 test/DebugInfo/ARM/s-super-register.ll        |    10 +-
 test/DebugInfo/ARM/selectiondag-deadcode.ll   |     6 +-
 .../single-constant-use-preserves-dbgloc.ll   |     8 +-
 test/DebugInfo/ARM/tls.ll                     |    12 +-
 test/DebugInfo/COFF/asan-module-ctor.ll       |     8 +-
 .../COFF/asan-module-without-functions.ll     |     4 +-
 test/DebugInfo/COFF/asm.ll                    |    34 +-
 test/DebugInfo/COFF/cpp-mangling.ll           |     6 +-
 test/DebugInfo/COFF/multifile.ll              |    12 +-
 test/DebugInfo/COFF/multifunction.ll          |    24 +-
 test/DebugInfo/COFF/simple.ll                 |    12 +-
 .../COFF/tail-call-without-lexical-scopes.ll  |     8 +-
 .../DebugInfo/{ => Generic}/2009-10-16-Phi.ll |     0
 .../2009-11-03-InsertExtractValue.ll          |     2 +-
 .../2009-11-05-DeadGlobalVariable.ll          |     6 +-
 .../2009-11-06-NamelessGlobalVariable.ll      |     2 +-
 .../{ => Generic}/2009-11-10-CurrentFn.ll     |     8 +-
 .../{ => Generic}/2010-01-05-DbgScope.ll      |     4 +-
 .../{ => Generic}/2010-03-12-llc-crash.ll     |     8 +-
 .../{ => Generic}/2010-03-19-DbgDeclare.ll    |     6 +-
 .../{ => Generic}/2010-03-24-MemberFn.ll      |    14 +-
 .../2010-04-06-NestedFnDbgInfo.ll             |    26 +-
 .../{ => Generic}/2010-04-19-FramePtr.ll      |     6 +-
 .../2010-05-03-DisableFramePtr.ll             |     6 +-
 .../{ => Generic}/2010-05-03-OriginDIE.ll     |    18 +-
 .../{ => Generic}/2010-05-10-MultipleCU.ll    |    12 +-
 .../2010-06-29-InlinedFnLocalVar.ll           |    16 +-
 .../{ => Generic}/2010-07-19-Crash.ll         |    10 +-
 .../{ => Generic}/2010-10-01-crash.ll         |     8 +-
 test/DebugInfo/Generic/Inputs/gmlt.ll         |   153 +
 test/DebugInfo/{ => Generic}/PR20038.ll       |    29 +-
 .../accel-table-hash-collisions.ll            |     2 +-
 test/DebugInfo/{ => Generic}/array.ll         |     8 +-
 test/DebugInfo/{ => Generic}/block-asan.ll    |     8 +-
 .../{ => Generic}/bug_null_debuginfo.ll       |     2 +-
 .../{ => Generic}/constant-pointers.ll        |     6 +-
 .../constant-sdnodes-have-dbg-location.ll     |    10 +-
 .../constantfp-sdnodes-have-dbg-location.ll   |    10 +-
 .../{ => Generic}/cross-cu-inlining.ll        |    20 +-
 .../cross-cu-linkonce-distinct.ll             |    12 +-
 .../{ => Generic}/cross-cu-linkonce.ll        |    10 +-
 test/DebugInfo/{ => Generic}/cu-range-hole.ll |    14 +-
 test/DebugInfo/{ => Generic}/cu-ranges.ll     |    14 +-
 .../{X86 => Generic}/dbg-at-specficiation.ll  |     2 +-
 .../{ => Generic}/dead-argument-order.ll      |    10 +-
 .../{ => Generic}/debug-info-always-inline.ll |     0
 .../{ => Generic}/debug-info-qualifiers.ll    |    12 +-
 .../debuginfofinder-forward-declaration.ll    |     2 +-
 .../debuginfofinder-multiple-cu.ll            |    12 +-
 test/DebugInfo/Generic/def-line.ll            |    93 +
 test/DebugInfo/Generic/discriminator.ll       |    52 +
 .../{ => Generic}/dwarf-public-names.ll       |    20 +-
 test/DebugInfo/{ => Generic}/empty.ll         |     2 +-
 test/DebugInfo/{ => Generic}/enum-types.ll    |    18 +-
 test/DebugInfo/{ => Generic}/enum.ll          |     8 +-
 test/DebugInfo/{ => Generic}/global.ll        |     6 +-
 test/DebugInfo/{ => Generic}/gmlt.test        |     0
 test/DebugInfo/Generic/gvn.ll                 |   114 +
 .../incorrect-variable-debugloc.ll            |    24 +-
 .../incorrect-variable-debugloc1.ll           |     8 +-
 test/DebugInfo/{ => Generic}/inheritance.ll   |    20 +-
 .../inline-debug-info-multiret.ll             |    16 +-
 .../{ => Generic}/inline-debug-info.ll        |    16 +-
 .../{ => Generic}/inline-no-debug-info.ll     |     8 +-
 test/DebugInfo/{ => Generic}/inline-scopes.ll |    14 +-
 .../{ => Generic}/inlined-arguments.ll        |    18 +-
 test/DebugInfo/{ => Generic}/inlined-vars.ll  |    16 +-
 test/DebugInfo/Generic/lit.local.cfg          |     3 +
 .../{ => Generic}/location-verifier.ll        |     6 +-
 test/DebugInfo/{ => Generic}/lto-comp-dir.ll  |    12 +-
 test/DebugInfo/{ => Generic}/member-order.ll  |     8 +-
 .../{ => Generic}/member-pointers.ll          |     2 +-
 .../missing-abstract-variable.ll              |    32 +-
 test/DebugInfo/{ => Generic}/multiline.ll     |     6 +-
 test/DebugInfo/{ => Generic}/namespace.ll     |    35 +-
 .../namespace_function_definition.ll          |     8 +-
 .../namespace_inline_function_definition.ll   |    16 +-
 test/DebugInfo/{ => Generic}/nodebug.ll       |     4 +-
 .../DebugInfo/{ => Generic}/piece-verifier.ll |    12 +-
 test/DebugInfo/Generic/ptrsize.ll             |    47 +
 .../{X86 => Generic}/recursive_inlining.ll    |    42 +-
 test/DebugInfo/{ => Generic}/restrict.ll      |     8 +-
 test/DebugInfo/Generic/skeletoncu.ll          |    16 +
 .../{ => Generic}/sugared-constants.ll        |    12 +-
 .../{ => Generic}/template-recursive-void.ll  |     2 +-
 test/DebugInfo/{ => Generic}/tu-composite.ll  |    24 +-
 .../{ => Generic}/tu-member-pointer.ll        |     2 +-
 .../{ => Generic}/two-cus-from-same-file.ll   |    16 +-
 test/DebugInfo/{ => Generic}/typedef.ll       |     2 +-
 .../{ => Generic}/unconditional-branch.ll     |     8 +-
 test/DebugInfo/{ => Generic}/varargs.ll       |    12 +-
 test/DebugInfo/{ => Generic}/version.ll       |     6 +-
 test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o  |   Bin 0 -> 2000 bytes
 .../dwarfdump-macho-relocs.macho.x86_64.o     |   Bin 0 -> 2364 bytes
 test/DebugInfo/Inputs/dwarfdump-macro-cmd.h   |     1 +
 test/DebugInfo/Inputs/dwarfdump-macro.cc      |    11 +
 test/DebugInfo/Inputs/dwarfdump-macro.h       |     5 +
 test/DebugInfo/Inputs/dwarfdump-macro.o       |   Bin 0 -> 5616 bytes
 test/DebugInfo/Inputs/dwarfdump-test.cc       |     2 +
 .../Inputs/dwarfdump-test.macho-i386.o        |   Bin 0 -> 3620 bytes
 test/DebugInfo/Inputs/fat-test.o              |   Bin 0 -> 5000 bytes
 test/DebugInfo/Inputs/gmlt.ll                 |    18 +-
 test/DebugInfo/Inputs/line.ll                 |     6 +-
 test/DebugInfo/MIR/X86/lit.local.cfg          |     2 +
 .../MIR/X86/live-debug-values-3preds.mir      |   299 +
 test/DebugInfo/MIR/X86/live-debug-values.mir  |   260 +
 test/DebugInfo/MIR/lit.local.cfg              |     2 +
 test/DebugInfo/Mips/InlinedFnLocalVar.ll      |    16 +-
 test/DebugInfo/Mips/delay-slot.ll             |    12 +-
 test/DebugInfo/Mips/dsr-fixed-objects.ll      |   156 +
 test/DebugInfo/Mips/dsr-non-fixed-objects.ll  |   125 +
 test/DebugInfo/Mips/fn-call-line.ll           |     6 +-
 test/DebugInfo/Mips/prologue_end.ll           |    70 +
 test/DebugInfo/PDB/{ => DIA}/lit.local.cfg    |     0
 .../PDB/{ => DIA}/pdbdump-flags.test          |     8 +-
 .../PDB/{ => DIA}/pdbdump-symbol-format.test  |     6 +-
 test/DebugInfo/PDB/pdbdump-headers.test       |    12 +
 test/DebugInfo/PowerPC/tls-fission.ll         |     2 +-
 test/DebugInfo/PowerPC/tls.ll                 |     2 +-
 test/DebugInfo/Sparc/gnu-window-save.ll       |     6 +-
 test/DebugInfo/Sparc/prologue_end.ll          |    41 +
 test/DebugInfo/SystemZ/prologue_end.ll        |    42 +
 test/DebugInfo/SystemZ/variable-loc.ll        |    14 +-
 test/DebugInfo/X86/2010-04-13-PubType.ll      |    10 +-
 .../X86/2011-09-26-GlobalVarContext.ll        |     8 +-
 test/DebugInfo/X86/2011-12-16-BadStructRef.ll |    44 +-
 test/DebugInfo/X86/DIModuleContext.ll         |    30 +
 test/DebugInfo/X86/DW_AT_byte_size.ll         |     8 +-
 test/DebugInfo/X86/DW_AT_linkage_name.ll      |    20 +-
 .../DebugInfo/X86/DW_AT_location-reference.ll |     8 +-
 test/DebugInfo/X86/DW_AT_object_pointer.ll    |    22 +-
 test/DebugInfo/X86/DW_AT_specification.ll     |     6 +-
 .../X86/DW_AT_stmt_list_sec_offset.ll         |     6 +-
 test/DebugInfo/X86/DW_TAG_friend.ll           |     2 +-
 test/DebugInfo/X86/InlinedFnLocalVar.ll       |    16 +-
 test/DebugInfo/X86/aligned_stack_var.ll       |     8 +-
 test/DebugInfo/X86/arange-and-stub.ll         |    14 +-
 test/DebugInfo/X86/arange.ll                  |     2 +-
 test/DebugInfo/X86/arguments.ll               |    10 +-
 test/DebugInfo/X86/array.ll                   |    30 +-
 test/DebugInfo/X86/array2.ll                  |    20 +-
 test/DebugInfo/X86/bbjoin.ll                  |   101 +
 test/DebugInfo/X86/bitfields.ll               |     2 +-
 test/DebugInfo/X86/block-capture.ll           |    10 +-
 test/DebugInfo/X86/byvalstruct.ll             |    16 +-
 test/DebugInfo/X86/c-type-units.ll            |     2 +-
 test/DebugInfo/X86/coff_debug_info_type.ll    |    10 +-
 test/DebugInfo/X86/coff_relative_names.ll     |     6 +-
 test/DebugInfo/X86/concrete_out_of_line.ll    |    26 +-
 test/DebugInfo/X86/constant-aggregate.ll      |    20 +-
 test/DebugInfo/X86/cu-ranges-odr.ll           |    18 +-
 test/DebugInfo/X86/cu-ranges.ll               |    14 +-
 test/DebugInfo/X86/data_member_location.ll    |     2 +-
 test/DebugInfo/X86/dbg-byval-parameter.ll     |     8 +-
 test/DebugInfo/X86/dbg-const-int.ll           |     8 +-
 test/DebugInfo/X86/dbg-const.ll               |     8 +-
 test/DebugInfo/X86/dbg-declare-arg.ll         |    24 +-
 test/DebugInfo/X86/dbg-declare.ll             |    10 +-
 test/DebugInfo/X86/dbg-file-name.ll           |     6 +-
 test/DebugInfo/X86/dbg-i128-const.ll          |     8 +-
 test/DebugInfo/X86/dbg-merge-loc-entry.ll     |    12 +-
 test/DebugInfo/X86/dbg-prolog-end.ll          |    14 +-
 test/DebugInfo/X86/dbg-subrange.ll            |     6 +-
 test/DebugInfo/X86/dbg-value-const-byref.ll   |    10 +-
 test/DebugInfo/X86/dbg-value-dag-combine.ll   |    14 +-
 .../X86/dbg-value-inlined-parameter.ll        |    14 +-
 test/DebugInfo/X86/dbg-value-isel.ll          |    14 +-
 test/DebugInfo/X86/dbg-value-location.ll      |    16 +-
 test/DebugInfo/X86/dbg-value-range.ll         |    10 +-
 test/DebugInfo/X86/dbg-value-terminator.ll    |    12 +-
 test/DebugInfo/X86/dbg_value_direct.ll        |    10 +-
 test/DebugInfo/X86/debug-dead-local-var.ll    |    10 +-
 test/DebugInfo/X86/debug-info-access.ll       |     6 +-
 .../X86/debug-info-block-captured-self.ll     |    14 +-
 test/DebugInfo/X86/debug-info-blocks.ll       |    46 +-
 .../DebugInfo/X86/debug-info-packed-struct.ll |     2 +-
 .../DebugInfo/X86/debug-info-static-member.ll |     8 +-
 test/DebugInfo/X86/debug-loc-asan.ll          |    19 +-
 test/DebugInfo/X86/debug-loc-empty-entries.ll |     8 +-
 test/DebugInfo/X86/debug-loc-offset.ll        |    18 +-
 test/DebugInfo/X86/debug-ranges-offset.ll     |    14 +-
 test/DebugInfo/X86/debug_frame.ll             |     6 +-
 test/DebugInfo/X86/debugger-tune.ll           |    44 +
 test/DebugInfo/X86/decl-derived-member.ll     |    26 +-
 test/DebugInfo/X86/deleted-bit-piece.ll       |     8 +-
 test/DebugInfo/X86/discriminator.ll           |     6 +-
 test/DebugInfo/X86/dw_op_minus.ll             |    84 +
 .../X86/dwarf-aranges-no-dwarf-labels.ll      |    18 +-
 test/DebugInfo/X86/dwarf-aranges.ll           |     6 +-
 test/DebugInfo/X86/dwarf-linkage-names.ll     |    71 +
 test/DebugInfo/X86/dwarf-public-names.ll      |    22 +-
 test/DebugInfo/X86/dwarf-pubnames-split.ll    |     6 +-
 test/DebugInfo/X86/earlydup-crash.ll          |     8 +-
 test/DebugInfo/X86/elf-names.ll               |    20 +-
 .../DebugInfo/X86/empty-and-one-elem-array.ll |    10 +-
 test/DebugInfo/X86/empty-array.ll             |     2 +-
 test/DebugInfo/X86/empty.ll                   |     2 +-
 test/DebugInfo/X86/ending-run.ll              |    10 +-
 test/DebugInfo/X86/enum-class.ll              |     2 +-
 test/DebugInfo/X86/enum-fwd-decl.ll           |     2 +-
 test/DebugInfo/X86/externaltyperef.ll         |    51 +
 test/DebugInfo/X86/fission-cu.ll              |     2 +-
 test/DebugInfo/X86/fission-hash.ll            |     2 +-
 test/DebugInfo/X86/fission-inline.ll          |     8 +-
 test/DebugInfo/X86/fission-ranges.ll          |    30 +-
 test/DebugInfo/X86/float_const.ll             |     8 +-
 test/DebugInfo/X86/formal_parameter.ll        |     8 +-
 test/DebugInfo/X86/frame-register.ll          |    12 +-
 test/DebugInfo/X86/generate-odr-hash.ll       |    22 +-
 test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll  |    18 +-
 test/DebugInfo/X86/gnu-public-names-empty.ll  |     2 +-
 test/DebugInfo/X86/gnu-public-names.ll        |    28 +-
 test/DebugInfo/X86/header.ll                  |     6 +-
 test/DebugInfo/X86/inline-member-function.ll  |    12 +-
 test/DebugInfo/X86/inline-seldag-test.ll      |    12 +-
 .../DebugInfo/X86/inlined-formal-parameter.ll |    10 +-
 test/DebugInfo/X86/inlined-indirect-value.ll  |     8 +-
 test/DebugInfo/X86/instcombine-instrinsics.ll |     8 +-
 test/DebugInfo/X86/lexical_block.ll           |     8 +-
 test/DebugInfo/X86/line-info.ll               |    12 +-
 test/DebugInfo/X86/linkage-name.ll            |    10 +-
 test/DebugInfo/X86/live-debug-values.ll       |   152 +
 test/DebugInfo/X86/low-pc-cu.ll               |     6 +-
 test/DebugInfo/X86/memberfnptr.ll             |     2 +-
 test/DebugInfo/X86/mi-print.ll                |    14 +-
 test/DebugInfo/X86/misched-dbg-value.ll       |    18 +-
 test/DebugInfo/X86/missing-file-line.ll       |     8 +-
 test/DebugInfo/X86/multiple-aranges.ll        |     4 +-
 test/DebugInfo/X86/multiple-at-const-val.ll   |     6 +-
 test/DebugInfo/X86/nodebug_with_debug_loc.ll  |    12 +-
 .../X86/nondefault-subrange-array.ll          |     2 +-
 test/DebugInfo/X86/nophysreg.ll               |    24 +-
 test/DebugInfo/X86/objc-fwd-decl.ll           |     2 +-
 test/DebugInfo/X86/objc-property-void.ll      |    10 +-
 test/DebugInfo/X86/op_deref.ll                |    18 +-
 test/DebugInfo/X86/parameters.ll              |    16 +-
 test/DebugInfo/X86/pieces-1.ll                |    12 +-
 test/DebugInfo/X86/pieces-2.ll                |    10 +-
 test/DebugInfo/X86/pieces-3.ll                |    18 +-
 test/DebugInfo/X86/pointer-type-size.ll       |     2 +-
 test/DebugInfo/X86/pr11300.ll                 |    14 +-
 test/DebugInfo/X86/pr12831.ll                 |    28 +-
 test/DebugInfo/X86/pr13303.ll                 |     6 +-
 test/DebugInfo/X86/pr19307.ll                 |    12 +-
 test/DebugInfo/X86/processes-relocations.ll   |     2 +-
 test/DebugInfo/X86/prologue-stack.ll          |     6 +-
 test/DebugInfo/X86/ref_addr_relocation.ll     |     4 +-
 test/DebugInfo/X86/reference-argument.ll      |    47 +-
 test/DebugInfo/X86/rvalue-ref.ll              |     8 +-
 test/DebugInfo/X86/safestack-byval.ll         |    91 +
 test/DebugInfo/X86/sret.ll                    |    76 +-
 test/DebugInfo/X86/sroasplit-1.ll             |    12 +-
 test/DebugInfo/X86/sroasplit-2.ll             |    14 +-
 test/DebugInfo/X86/sroasplit-3.ll             |    10 +-
 test/DebugInfo/X86/sroasplit-4.ll             |    10 +-
 test/DebugInfo/X86/sroasplit-5.ll             |    10 +-
 .../X86/stmt-list-multiple-compile-units.ll   |    16 +-
 test/DebugInfo/X86/stmt-list.ll               |     6 +-
 test/DebugInfo/X86/stringpool.ll              |     2 +-
 test/DebugInfo/X86/struct-loc.ll              |     2 +-
 test/DebugInfo/X86/subrange-type.ll           |     8 +-
 test/DebugInfo/X86/subreg.ll                  |     8 +-
 test/DebugInfo/X86/subregisters.ll            |    16 +-
 test/DebugInfo/X86/template.ll                |    12 +-
 test/DebugInfo/X86/tls.ll                     |    28 +-
 .../X86/type_units_with_addresses.ll          |     2 +-
 test/DebugInfo/X86/union-const.ll             |     8 +-
 test/DebugInfo/X86/union-template.ll          |    10 +-
 test/DebugInfo/X86/vector.ll                  |     2 +-
 test/DebugInfo/X86/vla.ll                     |    20 +-
 test/DebugInfo/debugmacinfo.test              |    27 +
 test/DebugInfo/dwarfdump-accel.test           |     2 +-
 test/DebugInfo/dwarfdump-dump-flags.test      |     3 +
 test/DebugInfo/dwarfdump-dwp.test             |    53 +
 test/DebugInfo/dwarfdump-macho-relocs.test    |    27 +
 test/DebugInfo/dwarfdump-macho-universal.test |    17 +
 test/DebugInfo/dwo.ll                         |    15 +
 test/DebugInfo/gvn.ll                         |   135 -
 test/DebugInfo/llvm-symbolizer.test           |     6 +-
 test/DebugInfo/skeletoncu.ll                  |    17 +
 test/Examples/Kaleidoscope/Chapter3.test      |    17 +
 test/Examples/Kaleidoscope/Chapter4.test      |    17 +
 test/Examples/Kaleidoscope/Chapter5.test      |    19 +
 test/Examples/Kaleidoscope/Chapter6.test      |    15 +
 test/Examples/Kaleidoscope/Chapter7.test      |    15 +
 test/Examples/lit.local.cfg                   |     1 +
 test/ExecutionEngine/MCJIT/eh-lg-pic.ll       |     2 +-
 test/ExecutionEngine/MCJIT/eh-sm-pic.ll       |     2 +-
 test/ExecutionEngine/MCJIT/eh.ll              |     2 +-
 .../MCJIT/multi-module-eh-a.ll                |     2 +-
 .../ExecutionEngine/OrcLazy/global_aliases.ll |    21 +
 test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll    |     2 +-
 test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll    |     2 +-
 test/ExecutionEngine/OrcMCJIT/eh.ll           |     2 +-
 .../OrcMCJIT/multi-module-eh-a.ll             |     2 +-
 .../AArch64/MachO_ARM64_relocations.s         |    12 +
 .../ARM/MachO_ARM_PIC_relocations.s           |     7 +
 .../Mips/ELF_Mips64r2N64_PIC_relocations.s    |    10 +-
 .../RuntimeDyld/Mips/ELF_N64R6_relocations.s  |    54 +
 .../Mips/ELF_O32_PIC_relocations.s            |    13 +-
 .../RuntimeDyld/PowerPC/lit.local.cfg         |     3 +
 .../PowerPC/ppc32_elf_rel_addr16.s            |    47 +
 .../RuntimeDyld/X86/COFF_i386.s               |    66 +
 .../X86/{COFF_x86_64 => COFF_x86_64.s}        |     9 +
 .../RuntimeDyld/X86/ELF_STT_FILE.s            |    14 +
 .../X86/ELF_x64-64_PC8_relocations.s          |    26 +
 .../RuntimeDyld/X86/ELF_x86_64_StubBuf.s      |    26 +
 .../X86/Inputs/ELF_STT_FILE_FILE.s            |     3 +
 .../X86/Inputs/ELF_STT_FILE_GLOBAL.s          |     2 +
 .../X86/Inputs/ELF_x86_64_StubBuf.ll          |    12 +
 .../X86/MachO_x86-64_PIC_relocations.s        |    18 +
 test/ExecutionEngine/lit.local.cfg            |     5 +-
 test/Feature/OperandBundles/adce.ll           |    49 +
 .../OperandBundles/basic-aa-argmemonly.ll     |    51 +
 test/Feature/OperandBundles/dse.ll            |    62 +
 test/Feature/OperandBundles/early-cse.ll      |    89 +
 test/Feature/OperandBundles/function-attrs.ll |    33 +
 .../OperandBundles/inliner-conservative.ll    |    17 +
 test/Feature/OperandBundles/merge-func.ll     |    64 +
 test/Feature/OperandBundles/special-state.ll  |    21 +
 test/Feature/alias2.ll                        |    24 +-
 test/Feature/aliases.ll                       |    18 +-
 test/Feature/callingconventions.ll            |     7 +
 test/Feature/comdat.ll                        |     4 +-
 test/Feature/exception.ll                     |   109 +
 test/Feature/optnone-llc.ll                   |    10 +-
 .../Instrumentation/AddressSanitizer/basic.ll |     2 +-
 .../AddressSanitizer/debug_info.ll            |    14 +-
 .../debug_info_noninstrumented_alloca.ll      |    40 +
 .../do-not-instrument-cstring.ll              |     8 -
 ...ll => do-not-instrument-globals-darwin.ll} |     8 +-
 .../do-not-instrument-globals-linux.ll        |    35 +
 .../do-not-instrument-llvm-metadata.ll        |    15 -
 .../do-not-touch-comdat-global.ll             |     4 +-
 .../instrument-dynamic-allocas.ll             |    16 +
 .../AddressSanitizer/keep_going.ll            |    14 +
 .../AddressSanitizer/localescape.ll           |    86 +
 .../AddressSanitizer/stack_dynamic_alloca.ll  |    31 +
 .../Instrumentation/AddressSanitizer/twice.ll |     8 +
 .../DataFlowSanitizer/abilist.ll              |     4 +-
 .../DataFlowSanitizer/debug.ll                |     9 +-
 .../DataFlowSanitizer/external_mask.ll        |    14 +
 .../DataFlowSanitizer/prefix-rename.ll        |     4 +-
 .../Instrumentation/InstrProfiling/PR23499.ll |    21 +-
 .../Instrumentation/InstrProfiling/linkage.ll |    49 +-
 .../InstrProfiling/no-counters.ll             |     4 +-
 .../InstrProfiling/noruntime.ll               |     4 +-
 .../InstrProfiling/platform.ll                |    31 +-
 .../InstrProfiling/profiling.ll               |    36 +-
 .../MemorySanitizer/AArch64/vararg.ll         |    75 +
 .../MemorySanitizer/atomics.ll                |    24 +-
 .../MemorySanitizer/check_access_address.ll   |     4 +-
 .../MemorySanitizer/msan_basic.ll             |   201 +-
 .../MemorySanitizer/mul_by_constant.ll        |    23 +
 .../MemorySanitizer/origin-alignment.ll       |     9 +-
 .../MemorySanitizer/return_from_main.ll       |     2 +-
 .../MemorySanitizer/store-origin.ll           |    12 +-
 .../MemorySanitizer/unreachable.ll            |     4 +-
 .../MemorySanitizer/vector_cvt.ll             |     6 +-
 .../MemorySanitizer/vector_shift.ll           |    10 +-
 .../SanitizerCoverage/coverage-dbg.ll         |     8 +-
 .../SanitizerCoverage/coverage.ll             |     4 +-
 .../SanitizerCoverage/coverage2-dbg.ll        |     8 +-
 test/Instrumentation/SanitizerCoverage/seh.ll |    86 +
 .../SanitizerCoverage/switch-tracing.ll       |    56 +
 .../Instrumentation/ThreadSanitizer/atomic.ll |     2 +-
 test/JitListener/multiple.ll                  |    20 +-
 test/JitListener/simple.ll                    |     8 +-
 test/LTO/X86/Inputs/invalid.ll.bc             |   Bin 332 -> 688 bytes
 test/LTO/X86/bcsection.ll                     |     2 +
 test/LTO/X86/current-section.ll               |     1 +
 test/LTO/X86/diagnostic-handler-noexit.ll     |     4 +-
 test/LTO/X86/diagnostic-handler-remarks.ll    |     9 +-
 test/LTO/X86/disable-verify.ll                |    18 +
 test/LTO/X86/invalid.ll                       |     2 +-
 test/LTO/X86/list-symbols.ll                  |     1 +
 test/LTO/X86/llvm-lto-output.ll               |    21 +
 test/LTO/X86/parallel.ll                      |    25 +
 test/LibDriver/thin.test                      |     9 +
 test/Linker/2003-01-30-LinkerRename.ll        |     4 +-
 test/Linker/2003-04-23-LinkOnceLost.ll        |    13 +-
 test/Linker/2003-05-31-LinkerRename.ll        |    12 +-
 test/Linker/2008-03-05-AliasReference.ll      |     2 +-
 test/Linker/2008-07-06-AliasFnDecl.ll         |     2 +-
 test/Linker/2008-07-06-AliasWeakDest.ll       |     4 +-
 test/Linker/2009-09-03-mdnode.ll              |     4 +-
 test/Linker/2009-09-03-mdnode2.ll             |     4 +-
 test/Linker/2011-08-04-DebugLoc.ll            |     6 +-
 test/Linker/2011-08-04-DebugLoc2.ll           |     6 +-
 test/Linker/2011-08-04-Metadata.ll            |     6 +-
 test/Linker/2011-08-04-Metadata2.ll           |     6 +-
 test/Linker/2011-08-18-unique-class-type.ll   |     8 +-
 test/Linker/2011-08-18-unique-class-type2.ll  |     8 +-
 test/Linker/2011-08-18-unique-debug-type.ll   |     6 +-
 test/Linker/2011-08-18-unique-debug-type2.ll  |     6 +-
 test/Linker/ConstantGlobals.ll                |     4 +
 test/Linker/DbgDeclare.ll                     |    10 +-
 test/Linker/DbgDeclare2.ll                    |    12 +-
 test/Linker/Inputs/PR8300.b.ll                |     2 +-
 test/Linker/Inputs/alias.ll                   |     4 +-
 .../Inputs/available_externally_over_decl.ll  |    10 +
 test/Linker/Inputs/comdat11.ll                |     9 +
 test/Linker/Inputs/comdat13.ll                |     9 +
 test/Linker/Inputs/comdat14.ll                |    12 +
 test/Linker/Inputs/comdat15.ll                |     6 +
 test/Linker/Inputs/comdat5.ll                 |     2 +-
 test/Linker/Inputs/comdat8.ll                 |     2 +-
 test/Linker/Inputs/ctors2.ll                  |     6 +
 test/Linker/Inputs/ctors3.ll                  |     7 +
 test/Linker/Inputs/funcimport.ll              |    28 +
 .../Inputs/funcimport_appending_global.ll     |     6 +
 test/Linker/Inputs/internalize-lazy.ll        |     8 +
 test/Linker/Inputs/linkage.c.ll               |     4 +
 test/Linker/Inputs/mdlocation.ll              |    12 +-
 .../Inputs/only-needed-debug-metadata.ll      |    27 +
 .../Inputs/only-needed-named-metadata.ll      |     9 +
 test/Linker/Inputs/opaque.ll                  |     8 +
 ...laced-function-matches-first-subprogram.ll |     6 +-
 .../Inputs/subprogram-linkonce-weak-odr.ll    |    15 -
 .../Linker/Inputs/subprogram-linkonce-weak.ll |     6 +-
 test/Linker/Inputs/testlink.ll                |     4 +-
 .../Linker/Inputs/thinlto_funcimport_debug.ll |    38 +
 test/Linker/Inputs/type-unique-alias.ll       |     2 +-
 test/Linker/Inputs/type-unique-dst-types2.ll  |     4 +
 test/Linker/Inputs/type-unique-dst-types3.ll  |     4 +
 .../Inputs/type-unique-inheritance-a.ll       |    10 +-
 .../Inputs/type-unique-inheritance-b.ll       |    16 +-
 test/Linker/Inputs/type-unique-simple2-a.ll   |    10 +-
 test/Linker/Inputs/type-unique-simple2-b.ll   |    14 +-
 test/Linker/Inputs/visibility.ll              |     6 +-
 test/Linker/alias.ll                          |    39 +-
 test/Linker/available_externally_over_decl.ll |    15 +
 test/Linker/comdat11.ll                       |    13 +
 test/Linker/comdat12.ll                       |     8 +
 test/Linker/comdat13.ll                       |    30 +
 test/Linker/comdat14.ll                       |     9 +
 test/Linker/comdat15.ll                       |     9 +
 test/Linker/comdat6.ll                        |     2 +-
 test/Linker/comdat8.ll                        |     2 +-
 test/Linker/comdat9.ll                        |     7 +-
 test/Linker/comdat_group.ll                   |    18 +
 test/Linker/constructor-comdat.ll             |     4 +-
 test/Linker/ctors.ll                          |     3 +
 test/Linker/ctors2.ll                         |     7 +
 test/Linker/ctors3.ll                         |     8 +
 test/Linker/ctors4.ll                         |    14 +
 test/Linker/ctors5.ll                         |     8 +
 test/Linker/debug-info-version-a.ll           |     2 +-
 test/Linker/debug-info-version-b.ll           |     2 +-
 test/Linker/distinct.ll                       |     2 +
 test/Linker/drop-debug.ll                     |     2 +-
 test/Linker/funcimport.ll                     |   195 +
 test/Linker/funcimport_appending_global.ll    |    20 +
 test/Linker/global_ctors.ll                   |     5 +-
 test/Linker/internalize-lazy.ll               |     4 +
 test/Linker/link-flags.ll                     |    19 +
 test/Linker/mdlocation.ll                     |    36 +-
 test/Linker/only-needed-debug-metadata.ll     |    49 +
 test/Linker/only-needed-named-metadata.ll     |    65 +
 test/Linker/opaque.ll                         |     4 +
 test/Linker/override-with-internal-linkage.ll |     4 +-
 test/Linker/pr21494.ll                        |     4 +-
 test/Linker/prologuedata.ll                   |    10 +-
 ...laced-function-matches-first-subprogram.ll |    27 +-
 test/Linker/subprogram-linkonce-weak-odr.ll   |   177 -
 test/Linker/subprogram-linkonce-weak.ll       |    53 +-
 test/Linker/testlink.ll                       |    11 +-
 test/Linker/thinlto_funcimport_debug.ll       |    80 +
 test/Linker/type-unique-alias.ll              |     4 +-
 test/Linker/type-unique-dst-types.ll          |     4 +
 test/Linker/type-unique-odr-a.ll              |    16 +-
 test/Linker/type-unique-odr-b.ll              |    16 +-
 test/Linker/type-unique-simple-a.ll           |    12 +-
 test/Linker/type-unique-simple-b.ll           |    14 +-
 test/Linker/type-unique-simple2-a.ll          |    23 +-
 test/Linker/type-unique-simple2-b.ll          |    14 +-
 test/Linker/type-unique-simple2.ll            |     1 +
 test/Linker/type-unique-src-type.ll           |     4 +-
 test/Linker/type-unique-type-array-a.ll       |    24 +-
 test/Linker/type-unique-type-array-b.ll       |    18 +-
 test/Linker/uniqued-distinct-cycles.ll        |    14 +
 test/Linker/unnamed-addr1-a.ll                |    18 +-
 test/Linker/unnamed-addr1-b.ll                |     8 +-
 test/Linker/visibility.ll                     |    12 +-
 test/Linker/weakextern.ll                     |    14 +-
 test/MC/AArch64/arm64-advsimd.s               |   153 +-
 test/MC/AArch64/arm64-diags.s                 |    63 +
 test/MC/AArch64/arm64-fp-encoding.s           |   424 +-
 test/MC/AArch64/arm64-leaf-compact-unwind.s   |     1 +
 test/MC/AArch64/arm64-small-data-fixups.s     |    27 +-
 test/MC/AArch64/armv8.1a-pan.s                |    10 +-
 test/MC/AArch64/armv8.1a-rdma.s               |    18 -
 test/MC/AArch64/armv8.2a-at.s                 |     9 +
 test/MC/AArch64/armv8.2a-mmfr2.s              |     6 +
 test/MC/AArch64/armv8.2a-persistent-memory.s  |     6 +
 .../AArch64/armv8.2a-statistical-profiling.s  |    87 +
 test/MC/AArch64/armv8.2a-uao.s                |    17 +
 test/MC/AArch64/basic-a64-diagnostics.s       |     4 +-
 test/MC/AArch64/elf_osabi_flags.s             |     9 +-
 test/MC/AArch64/error-location-ldr-pseudo.s   |     5 +
 test/MC/AArch64/error-location.s              |    49 +
 test/MC/AArch64/fullfp16-diagnostics.s        |    82 +
 test/MC/AArch64/fullfp16-neon-neg.s           |   382 +
 test/MC/AArch64/ldr-pseudo-diagnostics.s      |    18 +
 test/MC/AArch64/neon-2velem.s                 |    18 +-
 test/MC/AArch64/neon-aba-abd.s                |     4 +-
 test/MC/AArch64/neon-across.s                 |    18 +-
 test/MC/AArch64/neon-add-pairwise.s           |     6 +-
 test/MC/AArch64/neon-add-sub-instructions.s   |    10 +-
 test/MC/AArch64/neon-compare-instructions.s   |    62 +-
 test/MC/AArch64/neon-diagnostics.s            |   900 +-
 test/MC/AArch64/neon-facge-facgt.s            |    18 +-
 test/MC/AArch64/neon-frsqrt-frecp.s           |    10 +-
 test/MC/AArch64/neon-max-min-pairwise.s       |    18 +-
 test/MC/AArch64/neon-max-min.s                |    18 +-
 test/MC/AArch64/neon-mla-mls-instructions.s   |    10 +-
 test/MC/AArch64/neon-scalar-abs.s             |     4 +-
 test/MC/AArch64/neon-scalar-by-elem-mla.s     |     6 +-
 test/MC/AArch64/neon-scalar-by-elem-mul.s     |     6 +-
 test/MC/AArch64/neon-scalar-cvt.s             |    34 +-
 test/MC/AArch64/neon-scalar-fp-compare.s      |    32 +-
 test/MC/AArch64/neon-scalar-mul.s             |     4 +-
 test/MC/AArch64/neon-scalar-recip.s           |    12 +-
 test/MC/AArch64/neon-scalar-reduce-pairwise.s |     7 +-
 test/MC/AArch64/neon-simd-misc.s              |    98 +-
 test/MC/AArch64/neon-simd-shift.s             |    18 +-
 test/MC/AArch64/noneon-diagnostics.s          |    15 +
 test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s        |     7 +
 test/MC/AMDGPU/flat-scratch.s                 |    33 +
 test/MC/AMDGPU/flat.s                         |   516 +-
 test/MC/AMDGPU/hsa-text.s                     |    34 +
 test/MC/AMDGPU/hsa.s                          |    31 +-
 test/MC/AMDGPU/mubuf.s                        |   224 +-
 test/MC/AMDGPU/out-of-range-registers.s       |    62 +
 test/MC/AMDGPU/smem.s                         |    11 +
 test/MC/AMDGPU/smrd-err.s                     |    15 +
 test/MC/AMDGPU/smrd.s                         |    61 +-
 test/MC/AMDGPU/sop1-err.s                     |    50 +-
 test/MC/AMDGPU/sop1.s                         |    17 +
 test/MC/AMDGPU/sop2.s                         |     3 +
 test/MC/AMDGPU/vop1.s                         |    19 +
 test/MC/AMDGPU/vop2-err.s                     |    27 +
 test/MC/AMDGPU/vop2.s                         |   123 +-
 test/MC/AMDGPU/vop3-vop1-nosrc.s              |    14 +
 test/MC/AMDGPU/vop3.s                         |    44 +-
 test/MC/AMDGPU/vopc-errs.s                    |     8 +
 test/MC/AMDGPU/vopc.s                         |    21 +-
 test/MC/ARM/Windows/invalid-relocation.s      |     2 +-
 test/MC/ARM/arm-elf-relocation-diagnostics.s  |    29 +-
 test/MC/ARM/arm-thumb-trustzone.s             |     1 +
 test/MC/ARM/arm-trustzone.s                   |     3 +-
 test/MC/ARM/arm11-hint-instr.s                |    23 +-
 test/MC/ARM/basic-arm-instructions-v8.1a.s    |     2 +-
 test/MC/ARM/basic-arm-instructions.s          |    11 +
 test/MC/ARM/basic-thumb2-instructions-v8.s    |    23 +-
 test/MC/ARM/big-endian-thumb2-fixup.s         |     4 +-
 test/MC/ARM/coff-debugging-secrel.ll          |     9 +-
 test/MC/ARM/data-in-code.ll                   |    16 +-
 test/MC/ARM/diagnostics.s                     |    64 +-
 test/MC/ARM/directive-arch-armv6j.s           |    34 -
 test/MC/ARM/directive-arch-armv6z.s           |     4 +-
 ...h-armv6zk.s => directive-arch-armv8.2-a.s} |    24 +-
 test/MC/ARM/directive-arch-semantic-action.s  |     4 +-
 test/MC/ARM/directive-arch_extension-sec.s    |    13 +-
 .../ARM/dwarf-asm-multiple-sections-dwarf-2.s |     2 +-
 test/MC/ARM/dwarf-asm-multiple-sections.s     |    23 +-
 test/MC/ARM/dwarf-asm-nonstandard-section.s   |     2 +-
 test/MC/ARM/dwarf-asm-single-section.s        |     4 +-
 test/MC/ARM/eh-compact-pr0.s                  |     4 +-
 test/MC/ARM/eh-compact-pr1.s                  |     2 +-
 test/MC/ARM/eh-directive-handlerdata.s        |     4 +-
 test/MC/ARM/eh-directive-personalityindex.s   |    12 +-
 test/MC/ARM/eh-directive-section-comdat.s     |    16 +-
 .../ARM/eh-directive-section-multiple-func.s  |     4 +-
 test/MC/ARM/eh-directive-section.s            |     8 +-
 test/MC/ARM/eh-directive-text-section.s       |     2 +-
 test/MC/ARM/eh-link.s                         |    12 +-
 test/MC/ARM/error-location-ldr-pseudo.s       |     5 +
 test/MC/ARM/error-location.s                  |    49 +
 test/MC/ARM/fullfp16-neon-neg.s               |   289 +
 test/MC/ARM/fullfp16-neon.s                   |   404 +
 test/MC/ARM/neon-vcvt-fp16.s                  |    18 +
 test/MC/ARM/thumb-branches.s                  |    25 +
 test/MC/ARM/thumb-shift-encoding.s            |    16 +-
 test/MC/ARM/thumb1-relax.s                    |    35 +
 test/MC/ARM/thumb2-diagnostics.s              |    19 +-
 test/MC/ARM/v7k-dsp.s                         |     4 +
 test/MC/AsmParser/dot-symbol-non-absolute.s   |     2 +-
 test/MC/AsmParser/expr-shr.s                  |     5 +-
 test/MC/AsmParser/exprs-invalid.s             |     3 +
 test/MC/AsmParser/exprs.s                     |     2 +-
 test/MC/AsmParser/macros-darwin-vararg.s      |     2 +-
 test/MC/AsmParser/reassign.s                  |    12 +
 test/MC/AsmParser/undefined-local-symbol.s    |     8 +
 test/MC/AsmParser/vararg.s                    |     2 +-
 test/MC/COFF/ARM/directive-type-diagnostics.s |    10 -
 test/MC/COFF/alias.s                          |     4 +-
 test/MC/COFF/bad-expr.s                       |     2 +
 test/MC/COFF/basic-coff-64.s                  |     4 +-
 test/MC/COFF/basic-coff.s                     |     4 +-
 test/MC/COFF/invalid-def.s                    |     5 +-
 test/MC/COFF/invalid-endef.s                  |     5 +-
 test/MC/COFF/invalid-scl-range.s              |     3 +
 test/MC/COFF/invalid-scl.s                    |     5 +-
 test/MC/COFF/invalid-type.s                   |     5 +-
 test/MC/COFF/label-undefined.s                |     6 +
 test/MC/COFF/secidx-diagnostic.s              |     2 +
 test/MC/COFF/simple-fixups.s                  |     4 +-
 test/MC/COFF/stdin.s                          |     3 +
 test/MC/COFF/symbol-fragment-offset-64.s      |     4 +-
 test/MC/COFF/symbol-fragment-offset.s         |     4 +-
 test/MC/COFF/temporary-alias.s                |    21 +
 test/MC/COFF/timestamp.s                      |     6 +-
 .../Disassembler/AArch64/arm64-scalar-fp.txt  |    69 +
 test/MC/Disassembler/AArch64/armv8.1a-pan.txt |     2 +
 test/MC/Disassembler/AArch64/armv8.2a-at.txt  |     9 +
 .../Disassembler/AArch64/armv8.2a-mmfr2.txt   |     4 +
 .../AArch64/armv8.2a-persistent-memory.txt    |     6 +
 .../armv8.2a-statistical-profiling.txt        |    87 +
 test/MC/Disassembler/AArch64/armv8.2a-uao.txt |    19 +
 .../AArch64/basic-a64-instructions.txt        |   116 +
 test/MC/Disassembler/AArch64/fullfp16-neg.txt |   145 +
 .../AArch64/fullfp16-neon-neg.txt             |   382 +
 .../ARM/fullfp16-neon-arm-neg.txt             |   274 +
 .../MC/Disassembler/ARM/fullfp16-neon-arm.txt |   309 +
 .../ARM/fullfp16-neon-thumb-neg.txt           |   274 +
 .../Disassembler/ARM/fullfp16-neon-thumb.txt  |   309 +
 test/MC/Disassembler/ARM/invalid-thumbv7.txt  |    23 +-
 test/MC/Disassembler/ARM/thumb-v8.txt         |    10 +-
 .../Disassembler/Hexagon/invalid_packet.txt   |     4 +
 test/MC/Disassembler/Hexagon/j.txt            |   148 +-
 test/MC/Disassembler/Hexagon/ld.txt           |    90 +-
 test/MC/Disassembler/Hexagon/lit.local.cfg    |     6 +-
 test/MC/Disassembler/Hexagon/nv_j.txt         |    88 +-
 test/MC/Disassembler/Hexagon/nv_st.txt        |   127 +-
 test/MC/Disassembler/Hexagon/st.txt           |    84 +-
 .../Hexagon/too_many_instructions.txt         |     4 +
 .../Hexagon/too_many_loop_ends.txt            |     4 +
 test/MC/Disassembler/Hexagon/unextendable.txt |     9 +
 test/MC/Disassembler/Mips/dsp/valid-el.txt    |    12 +
 test/MC/Disassembler/Mips/dsp/valid.txt       |   125 +
 test/MC/Disassembler/Mips/dspr2/valid.txt     |   173 +
 .../MC/Disassembler/Mips/eva/valid_R6-eva.txt |    38 +
 .../Disassembler/Mips/eva/valid_preR6-eva.txt |    54 +
 .../Disassembler/Mips/micromips-dsp/valid.txt |   103 +
 .../Mips/micromips-dspr2/valid.txt            |   125 +
 .../Mips/micromips32r3/invalid.txt            |     4 +
 .../valid-el.txt}                             |   427 +-
 .../valid.txt}                                |   427 +-
 test/MC/Disassembler/Mips/micromips32r6.txt   |   114 -
 .../Disassembler/Mips/micromips32r6/valid.txt |   258 +
 .../Disassembler/Mips/micromips64r6/valid.txt |   171 +
 test/MC/Disassembler/Mips/mips-dsp.txt        |    22 -
 .../Disassembler/Mips/mips1/invalid-xfail.txt |    11 +
 test/MC/Disassembler/Mips/mips1/invalid.txt   |    45 +
 .../Mips/mips1/valid-mips1-el.txt             |     2 +
 .../Disassembler/Mips/mips1/valid-mips1.txt   |     5 +
 .../Disassembler/Mips/mips1/valid-xfail.txt   |     8 +
 .../Disassembler/Mips/mips2/invalid-xfail.txt |    13 +
 .../Mips/mips2/valid-mips2-el.txt             |     2 +
 .../Disassembler/Mips/mips2/valid-mips2.txt   |    20 +
 .../Disassembler/Mips/mips2/valid-xfail.txt   |    13 +
 .../Disassembler/Mips/mips3/invalid-xfail.txt |    14 +
 .../Mips/mips3/valid-mips3-el.txt             |     2 +
 .../Disassembler/Mips/mips3/valid-mips3.txt   |    25 +
 .../Disassembler/Mips/mips3/valid-xfail.txt   |    13 +
 .../Mips/mips32/invalid-xfail.txt             |    13 +
 .../Mips/mips32/valid-mips32-el.txt           |    11 +
 .../Disassembler/Mips/mips32/valid-mips32.txt |   180 +
 .../Disassembler/Mips/mips32/valid-xfail.txt  |    13 +
 test/MC/Disassembler/Mips/mips32_le.txt       |   450 -
 .../Mips/mips32r2/invalid-xfail.txt           |    13 +
 .../Mips/mips32r2/valid-mips32r2-el.txt       |     2 +
 .../Mips/mips32r2/valid-mips32r2.txt          |   195 +
 .../Mips/mips32r2/valid-xfail.txt             |    13 +
 test/MC/Disassembler/Mips/mips32r2_le.txt     |   459 -
 .../Mips/mips32r3/invalid-xfail.txt           |    13 +
 .../Mips/mips32r3/valid-mips32r3.txt          |   197 +
 .../Mips/mips32r3/valid-xfail.txt             |    13 +
 .../Mips/mips32r5/invalid-xfail.txt           |    13 +
 .../Mips/mips32r5/valid-mips32r5.txt          |   198 +
 .../Mips/mips32r5/valid-xfail.txt             |    13 +
 .../Mips/mips32r6/valid-mips32r6-el.txt       |     2 +-
 .../Mips/mips32r6/valid-mips32r6.txt          |     3 +-
 .../Disassembler/Mips/mips4/invalid-xfail.txt |    13 +
 .../Mips/mips4/valid-mips4-el.txt             |     2 +
 .../Disassembler/Mips/mips4/valid-mips4.txt   |    29 +
 .../Disassembler/Mips/mips4/valid-xfail.txt   |    16 +
 .../Mips/mips64/invalid-xfail.txt             |    13 +
 .../Mips/mips64/valid-mips64-el.txt           |    30 +
 .../Disassembler/Mips/mips64/valid-mips64.txt |   193 +
 .../Disassembler/Mips/mips64/valid-xfail.txt  |    13 +
 test/MC/Disassembler/Mips/mips64_le.txt       |    84 -
 .../Mips/mips64r2/invalid-xfail.txt           |    13 +
 .../Mips/mips64r2/valid-mips64r2-el.txt       |    32 +
 .../Mips/mips64r2/valid-mips64r2.txt          |   212 +
 .../Mips/mips64r2/valid-xfail.txt             |    14 +
 test/MC/Disassembler/Mips/mips64r2_le.txt     |    90 -
 .../Mips/mips64r3/invalid-xfail.txt           |    13 +
 .../Mips/mips64r3/valid-mips64r3-el.txt       |     2 +
 .../Mips/mips64r3/valid-mips64r3.txt          |   243 +
 .../Mips/mips64r3/valid-xfail.txt             |    14 +
 .../Mips/mips64r5/invalid-xfail.txt           |    13 +
 .../Mips/mips64r5/valid-mips64r5-el.txt       |     2 +
 .../Mips/mips64r5/valid-mips64r5.txt          |   244 +
 .../Mips/mips64r5/valid-xfail.txt             |    14 +
 .../Mips/mips64r6/valid-mips64r6-el.txt       |     6 +-
 .../Mips/mips64r6/valid-mips64r6.txt          |     7 +-
 test/MC/Disassembler/Mips/msa/test_elm.txt    |     1 -
 .../Disassembler/Mips/msa/test_elm_msa64.txt  |     5 +-
 .../Disassembler/PowerPC/ppc64-encoding.txt   |     4 +-
 .../Disassembler/PowerPC/ppc64le-encoding.txt |     4 +-
 test/MC/Disassembler/PowerPC/vsx.txt          |    12 +
 test/MC/Disassembler/Sparc/sparc-mem.txt      |    24 +
 test/MC/Disassembler/Sparc/sparc-v9.txt       |     4 +
 test/MC/Disassembler/SystemZ/insns.txt        |    74 +-
 test/MC/Disassembler/X86/x86-64.txt           |   111 +
 test/MC/ELF/ARM/directive-type-diagnostics.s  |    10 +
 test/MC/ELF/align-zero.s                      |     4 +
 test/MC/ELF/align.s                           |    14 +-
 test/MC/ELF/cfi-adjust-cfa-offset.s           |     2 +-
 test/MC/ELF/cfi-advance-loc2.s                |     2 +-
 test/MC/ELF/cfi-def-cfa-offset.s              |     2 +-
 test/MC/ELF/cfi-def-cfa-register.s            |     2 +-
 test/MC/ELF/cfi-def-cfa.s                     |     2 +-
 test/MC/ELF/cfi-escape.s                      |     2 +-
 test/MC/ELF/cfi-large-model.s                 |     2 +-
 test/MC/ELF/cfi-offset.s                      |     2 +-
 test/MC/ELF/cfi-register.s                    |     2 +-
 test/MC/ELF/cfi-rel-offset.s                  |     2 +-
 test/MC/ELF/cfi-rel-offset2.s                 |     2 +-
 test/MC/ELF/cfi-remember.s                    |     2 +-
 test/MC/ELF/cfi-restore.s                     |     2 +-
 test/MC/ELF/cfi-same-value.s                  |     2 +-
 test/MC/ELF/cfi-signal-frame.s                |     2 +-
 test/MC/ELF/cfi-undefined.s                   |     2 +-
 test/MC/ELF/cfi-version.ll                    |     8 +-
 test/MC/ELF/cfi-window-save.s                 |     2 +-
 test/MC/ELF/cfi-zero-addr-delta.s             |     2 +-
 test/MC/ELF/cfi.s                             |     2 +-
 test/MC/ELF/comdat-dup-group-name.s           |    16 +-
 test/MC/ELF/comdat-reloc.s                    |     6 +-
 test/MC/ELF/comdat.s                          |    14 +-
 test/MC/ELF/common-error1.s                   |     2 +-
 test/MC/ELF/common-error2.s                   |     2 +-
 test/MC/ELF/common2.s                         |     7 +-
 test/MC/ELF/debug-loc.s                       |     2 +-
 test/MC/ELF/div-by-zero.s                     |     6 +
 test/MC/ELF/dot-symbol-assignment.s           |     5 +-
 test/MC/ELF/empty-twice.ll                    |     6 +
 test/MC/ELF/empty.s                           |    36 +-
 test/MC/ELF/many-sections-2.s                 |     3 +
 test/MC/ELF/many-sections-3.s                 |     2 +
 test/MC/ELF/many-sections.s                   |     3 +-
 test/MC/ELF/popsection.s                      |     4 +-
 test/MC/ELF/relax-arith.s                     |    32 +
 test/MC/ELF/relocation-386.s                  |     5 +-
 test/MC/ELF/relocation-pc.s                   |     8 +-
 test/MC/ELF/relocation.s                      |     8 +-
 test/MC/ELF/section-sym.s                     |    18 +-
 test/MC/ELF/section-unique.s                  |     4 +-
 test/MC/ELF/section.s                         |     2 +-
 test/MC/ELF/sleb.s                            |    16 +-
 test/MC/ELF/strtab-suffix-opt.s               |     6 +-
 test/MC/ELF/uleb.s                            |    16 +-
 test/MC/Hexagon/asmMap.s                      |   608 +
 test/MC/Hexagon/capitalizedEndloop.s          |    29 +
 test/MC/Hexagon/dcfetch.s                     |    15 +
 test/MC/Hexagon/empty_asm.s                   |    15 +
 test/MC/Hexagon/endloop.s                     |    19 +
 test/MC/Hexagon/got.s                         |    11 +
 test/MC/Hexagon/inst_and64.ll                 |     2 +-
 test/MC/Hexagon/inst_or64.ll                  |     2 +-
 test/MC/Hexagon/inst_xor64.ll                 |     2 +-
 test/MC/Hexagon/instructions/alu32_alu.s      |    84 +
 test/MC/Hexagon/instructions/alu32_perm.s     |    40 +
 test/MC/Hexagon/instructions/alu32_pred.s     |   222 +
 test/MC/Hexagon/instructions/cr.s             |    78 +
 test/MC/Hexagon/instructions/j.s              |   206 +
 test/MC/Hexagon/instructions/jr.s             |    38 +
 test/MC/Hexagon/instructions/ld.s             |   493 +
 test/MC/Hexagon/instructions/memop.s          |    56 +
 test/MC/Hexagon/instructions/nv_j.s           |   180 +
 test/MC/Hexagon/instructions/nv_st.s          |   290 +
 test/MC/Hexagon/instructions/st.s             |   434 +
 test/MC/Hexagon/instructions/system_user.s    |    26 +
 test/MC/Hexagon/instructions/xtype_alu.s      |   395 +
 test/MC/Hexagon/instructions/xtype_bit.s      |   118 +
 test/MC/Hexagon/instructions/xtype_complex.s  |   128 +
 test/MC/Hexagon/instructions/xtype_fp.s       |   146 +
 test/MC/Hexagon/instructions/xtype_mpy.s      |   400 +
 test/MC/Hexagon/instructions/xtype_perm.s     |   104 +
 test/MC/Hexagon/instructions/xtype_pred.s     |   136 +
 test/MC/Hexagon/instructions/xtype_shift.s    |   260 +
 test/MC/Hexagon/jumpdoublepound.s             |    13 +
 test/MC/Hexagon/labels.s                      |    26 +
 test/MC/Hexagon/new-value-check.s             |    72 +
 test/MC/Hexagon/out_of_range.s                |    10 +
 test/MC/Hexagon/pcrel.s                       |    11 +
 test/MC/Hexagon/relaxed_newvalue.s            |    10 +
 test/MC/Hexagon/test.s                        |     4 +
 test/MC/Hexagon/two_ext.s                     |    12 +
 test/MC/Hexagon/v60-alu.s                     |   312 +
 test/MC/Hexagon/v60-permute.s                 |    51 +
 test/MC/Hexagon/v60-shift.s                   |    39 +
 test/MC/Hexagon/v60-vcmp.s                    |    84 +
 test/MC/Hexagon/v60-vmem.s                    |   424 +
 test/MC/Hexagon/v60-vmpy-acc.s                |   123 +
 test/MC/Hexagon/v60-vmpy1.s                   |   138 +
 test/MC/Hexagon/v60lookup.s                   |    14 +
 .../AArch64/darwin-ARM64-local-label-diff.s   |    11 +-
 test/MC/MachO/AArch64/reloc-errors.s          |    10 +
 test/MC/MachO/ARM/bad-darwin-ARM-reloc.s      |     6 +
 test/MC/MachO/ARM/compact-unwind-armv7k.s     |   124 +
 test/MC/MachO/ARM/darwin-ARM-reloc.s          |   315 +-
 test/MC/MachO/ARM/darwin-Thumb-reloc.s        |   241 +-
 test/MC/MachO/ARM/data-in-code.s              |    53 +-
 test/MC/MachO/ARM/empty-function-nop.ll       |    24 +-
 .../MachO/ARM/ios-version-min-load-command.s  |    18 +-
 .../ARM/long-call-branch-island-relocation.s  |    22 +-
 test/MC/MachO/ARM/no-subsections-reloc.s      |     8 +-
 test/MC/MachO/ARM/nop-armv4-padding.s         |     7 +-
 test/MC/MachO/ARM/nop-armv6t2-padding.s       |     7 +-
 test/MC/MachO/ARM/nop-thumb-padding.s         |     7 +-
 test/MC/MachO/ARM/nop-thumb2-padding.s        |     7 +-
 test/MC/MachO/ARM/relax-thumb-ldr-literal.s   |    15 +-
 test/MC/MachO/ARM/relax-thumb2-branches.s     |    39 +-
 test/MC/MachO/ARM/thumb-bl-jbits.s            |    12 +-
 .../MachO/ARM/thumb2-function-relative-load.s |     6 +-
 test/MC/MachO/ARM/thumb2-movt-fixup.s         |    32 +-
 test/MC/MachO/ARM/thumb2-movw-fixup.s         |    90 +-
 .../MachO/ARM/tvos-version-min-load-command.s |    13 +
 test/MC/MachO/ARM/version-min-diagnostics.s   |    40 +
 test/MC/MachO/ARM/version-min-diagnostics2.s  |    34 +
 test/MC/MachO/ARM/version-min.s               |    16 +
 .../ARM/watchos-version-min-load-command.s    |    13 +
 test/MC/MachO/PowerPC/coal-sections-powerpc.s |    46 +
 test/MC/MachO/PowerPC/lit.local.cfg           |     2 +
 test/MC/MachO/absolute.s                      |   297 +-
 test/MC/MachO/absolutize.s                    |   290 +-
 test/MC/MachO/bad-darwin-x86_64-diff-relocs.s |    10 +-
 test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s |     6 -
 test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s |     6 -
 test/MC/MachO/coal-sections-x86_64.s          |    48 +
 test/MC/MachO/comm-1.s                        |   223 +-
 test/MC/MachO/cstexpr-gotpcrel-64.ll          |    12 +-
 test/MC/MachO/darwin-complex-difference.s     |   228 +-
 .../MachO/darwin-version-min-load-command.s   |    28 +
 .../MachO/darwin-x86_64-diff-reloc-assign.s   |    16 +-
 test/MC/MachO/darwin-x86_64-diff-relocs.s     |   400 +-
 test/MC/MachO/darwin-x86_64-nobase-relocs.s   |    98 +-
 test/MC/MachO/darwin-x86_64-reloc-offsets.s   |   404 +-
 test/MC/MachO/data.s                          |   115 +-
 test/MC/MachO/debug_frame.s                   |    52 +-
 test/MC/MachO/diff-with-two-sections.s        |   135 +-
 test/MC/MachO/direction_labels.s              |   169 +-
 test/MC/MachO/empty-twice.ll                  |    12 +
 test/MC/MachO/file.s                          |     2 +-
 test/MC/MachO/gen-dwarf.s                     |     4 +-
 test/MC/MachO/i386-large-relocations.s        |    24 +-
 test/MC/MachO/indirect-symbols.s              |   365 +-
 test/MC/MachO/jcc.s                           |    98 +-
 test/MC/MachO/lcomm-attributes.s              |   253 +-
 test/MC/MachO/linker-option-2.s               |    37 +-
 test/MC/MachO/linker-options.ll               |    47 +-
 test/MC/MachO/loc.s                           |    50 +-
 test/MC/MachO/osx-version-min-load-command.s  |    18 +-
 test/MC/MachO/pcrel-to-other-section.s        |   216 +-
 test/MC/MachO/relax-jumps.s                   |    15 +-
 test/MC/MachO/relax-recompute-align.s         |    35 +-
 test/MC/MachO/reloc-diff.s                    |    41 +-
 test/MC/MachO/reloc-pcrel-offset.s            |    33 +-
 test/MC/MachO/reloc-pcrel.s                   |    53 +-
 test/MC/MachO/section-align-1.s               |   164 +-
 test/MC/MachO/section-align-2.s               |   257 +-
 test/MC/MachO/section-attributes.s            |     9 +-
 test/MC/MachO/section-flags.s                 |    53 +-
 test/MC/MachO/string-table.s                  |   201 +-
 test/MC/MachO/symbol-diff.s                   |   241 +-
 test/MC/MachO/symbol-flags.s                  |   628 +-
 test/MC/MachO/symbol-indirect.s               |   444 +-
 test/MC/MachO/symbols-1.s                     |   620 +-
 test/MC/MachO/tbss.s                          |   228 +-
 test/MC/MachO/tdata.s                         |   211 +-
 test/MC/MachO/temp-labels.s                   |    50 +-
 test/MC/MachO/thread_init_func.s              |   132 +-
 test/MC/MachO/tls.s                           |   468 +-
 test/MC/MachO/tlv-bss.ll                      |    15 +-
 test/MC/MachO/tlv-reloc.s                     |   321 +-
 test/MC/MachO/tlv.s                           |   213 +-
 test/MC/MachO/values.s                        |   247 +-
 test/MC/MachO/variable-exprs.s                |   814 +-
 test/MC/MachO/weakdef.s                       |   266 +-
 test/MC/MachO/x86-data-in-code.ll             |     5 +-
 test/MC/MachO/x86_32-optimal_nop.s            |   229 +-
 .../MachO/x86_32-scattered-reloc-fallback.s   |     8 +-
 test/MC/MachO/x86_32-sections.s               |  1190 +-
 test/MC/MachO/x86_32-symbols.s                |  2004 +-
 test/MC/MachO/x86_64-reloc-arithmetic.s       |    42 +-
 test/MC/MachO/x86_64-sections.s               |  1160 +-
 test/MC/MachO/zerofill-1.s                    |   234 +-
 test/MC/MachO/zerofill-2.s                    |   201 +-
 test/MC/MachO/zerofill-3.s                    |   253 +-
 test/MC/MachO/zerofill-4.s                    |   104 +-
 test/MC/MachO/zerofill-5.s                    |   209 +-
 test/MC/MachO/zerofill-sect-align.s           |    32 +-
 test/MC/Mips/branch-pseudos-bad.s             |    17 +
 test/MC/Mips/branch-pseudos.s                 |   180 +
 test/MC/Mips/cnmips/invalid.s                 |    15 +
 test/MC/Mips/cprestore-bad.s                  |    23 +
 test/MC/Mips/cprestore-noreorder.s            |    97 +
 test/MC/Mips/cprestore-reorder.s              |    98 +
 test/MC/Mips/cprestore-warning-unused.s       |    10 +
 test/MC/Mips/cpsetup.s                        |   158 +-
 test/MC/Mips/directive-ent.s                  |    50 +
 test/MC/Mips/dsp/invalid.s                    |    25 +
 test/MC/Mips/dsp/valid.s                      |   131 +
 test/MC/Mips/dspr2/invalid.s                  |    20 +
 test/MC/Mips/dspr2/valid.s                    |   179 +
 test/MC/Mips/elf_basic.s                      |     2 +-
 test/MC/Mips/eva/invalid-noeva-wrong-error.s  |    69 +
 test/MC/Mips/eva/invalid-noeva.s              |    22 +
 test/MC/Mips/eva/invalid.s                    |    11 +
 test/MC/Mips/eva/invalid_R6.s                 |    20 +
 test/MC/Mips/eva/valid_R6.s                   |    47 +
 test/MC/Mips/eva/valid_preR6.s                |    62 +
 test/MC/Mips/expansion-jal-sym-pic.s          |   183 +
 test/MC/Mips/instalias-imm-expanding.s        |   273 +
 test/MC/Mips/macro-bcc-imm-bad.s              |    12 +
 test/MC/Mips/macro-bcc-imm.s                  |    69 +
 test/MC/Mips/macro-ddiv-bad.s                 |    18 +
 test/MC/Mips/macro-ddiv.s                     |    85 +
 test/MC/Mips/macro-ddivu-bad.s                |    18 +
 test/MC/Mips/macro-ddivu.s                    |    59 +
 test/MC/Mips/macro-div-bad.s                  |    18 +
 test/MC/Mips/macro-div.s                      |    64 +
 test/MC/Mips/macro-divu-bad.s                 |    18 +
 test/MC/Mips/macro-divu.s                     |    49 +
 test/MC/Mips/macro-dla.s                      |   707 +
 test/MC/Mips/macro-dli.s                      |   534 +
 test/MC/Mips/macro-la-bad.s                   |    24 +-
 test/MC/Mips/macro-la.s                       |    40 +-
 test/MC/Mips/micromips-control-instructions.s |    43 +-
 test/MC/Mips/micromips-diagnostic-fixup.s     |     7 +-
 .../Mips/micromips-dsp/invalid-wrong-error.s  |     7 +
 test/MC/Mips/micromips-dsp/invalid.s          |    23 +
 test/MC/Mips/micromips-dsp/valid.s            |   105 +
 test/MC/Mips/micromips-dspr2/invalid.s        |     9 +
 test/MC/Mips/micromips-dspr2/valid.s          |   127 +
 test/MC/Mips/micromips-invalid.s              |    27 +-
 .../Mips/micromips-loadstore-instructions.s   |    24 +
 test/MC/Mips/micromips-pc16-fixup.s           |     2 +-
 test/MC/Mips/micromips/invalid.s              |    35 +
 test/MC/Mips/micromips32r6/invalid.s          |   119 +-
 test/MC/Mips/micromips32r6/valid.s            |   199 +-
 test/MC/Mips/micromips64r6/invalid.s          |   145 +
 test/MC/Mips/micromips64r6/valid.s            |   154 +
 test/MC/Mips/mips-alu-instructions.s          |     2 +-
 test/MC/Mips/mips-diagnostic-fixup.s          |     7 +-
 test/MC/Mips/mips-dsp-instructions.s          |    97 -
 test/MC/Mips/mips-expansions-bad.s            |     9 +
 test/MC/Mips/mips-expansions.s                |   215 +-
 test/MC/Mips/mips-pc16-fixup.s                |     2 +-
 test/MC/Mips/mips-pdr.s                       |     2 +-
 test/MC/Mips/mips1/valid.s                    |     4 +-
 test/MC/Mips/mips2/valid.s                    |     4 +-
 test/MC/Mips/mips3/valid.s                    |     8 +-
 test/MC/Mips/mips32/valid.s                   |     6 +-
 test/MC/Mips/mips32r2/invalid-dsp.s           |    97 +
 test/MC/Mips/mips32r2/invalid-dspr2.s         |   134 +
 test/MC/Mips/mips32r2/invalid-msa.s           |    62 +
 test/MC/Mips/mips32r2/invalid.s               |    26 +-
 test/MC/Mips/mips32r2/valid-xfail.s           |   178 -
 test/MC/Mips/mips32r2/valid.s                 |     6 +-
 test/MC/Mips/mips32r3/invalid.s               |    10 +-
 test/MC/Mips/mips32r3/valid-xfail.s           |   178 -
 test/MC/Mips/mips32r3/valid.s                 |     6 +-
 test/MC/Mips/mips32r5/invalid-mips32.s        |     8 +
 test/MC/Mips/mips32r5/invalid-mips32r2.s      |     8 +
 test/MC/Mips/mips32r5/invalid-mips32r3.s      |     8 +
 test/MC/Mips/mips32r5/invalid.s               |    10 +-
 test/MC/Mips/mips32r5/valid-xfail.s           |   178 -
 test/MC/Mips/mips32r5/valid.s                 |     7 +-
 .../Mips/mips32r6/invalid-mips1-wrong-error.s |     9 +-
 .../Mips/mips32r6/invalid-mips4-wrong-error.s |     1 -
 test/MC/Mips/mips32r6/invalid-mips4.s         |     1 +
 test/MC/Mips/mips32r6/invalid.s               |    43 +-
 test/MC/Mips/mips32r6/valid.s                 |    10 +-
 test/MC/Mips/mips4/valid.s                    |     8 +-
 test/MC/Mips/mips5/valid.s                    |     8 +-
 test/MC/Mips/mips64-alu-instructions.s        |     6 +-
 test/MC/Mips/mips64-expansions.s              |   221 +-
 test/MC/Mips/mips64/valid.s                   |    10 +-
 test/MC/Mips/mips64r2/invalid.s               |    64 +-
 test/MC/Mips/mips64r2/valid-xfail.s           |   197 +-
 test/MC/Mips/mips64r2/valid.s                 |    10 +-
 test/MC/Mips/mips64r3/invalid.s               |    12 +-
 test/MC/Mips/mips64r3/valid-xfail.s           |   194 +-
 test/MC/Mips/mips64r3/valid.s                 |    10 +-
 test/MC/Mips/mips64r5/invalid-mips64.s        |     8 +
 test/MC/Mips/mips64r5/invalid-mips64r2.s      |     8 +
 test/MC/Mips/mips64r5/invalid-mips64r3.s      |     8 +
 test/MC/Mips/mips64r5/invalid.s               |    12 +-
 test/MC/Mips/mips64r5/valid-xfail.s           |   194 +-
 test/MC/Mips/mips64r5/valid.s                 |    11 +-
 .../Mips/mips64r6/invalid-mips1-wrong-error.s |     8 +-
 .../Mips/mips64r6/invalid-mips3-wrong-error.s |     8 +-
 .../Mips/mips64r6/invalid-mips4-wrong-error.s |     1 -
 test/MC/Mips/mips64r6/invalid-mips4.s         |     1 +
 test/MC/Mips/mips64r6/invalid.s               |    45 +-
 test/MC/Mips/mips64r6/valid.s                 |    11 +-
 test/MC/Mips/msa/invalid-64.s                 |    66 +
 test/MC/Mips/msa/invalid.s                    |    67 +
 test/MC/Mips/msa/test_elm.s                   |    45 +-
 test/MC/Mips/msa/test_elm_msa64.s             |     6 +-
 test/MC/Mips/reloc-directive-bad.s            |     6 +
 test/MC/Mips/reloc-directive.s                |    58 +
 test/MC/Mips/rotations32-bad.s                |    31 +
 test/MC/Mips/rotations32.s                    |    87 +
 test/MC/Mips/rotations64.s                    |   238 +
 test/MC/Mips/set-nomacro.s                    |    15 +
 test/MC/PowerPC/ppc-llong.s                   |     2 +-
 test/MC/PowerPC/ppc-word.s                    |     2 +-
 test/MC/PowerPC/ppc64-encoding.s              |    22 +-
 test/MC/PowerPC/ppc64-fixup-apply.s           |     2 +-
 test/MC/PowerPC/pr24686.s                     |     7 +
 test/MC/PowerPC/st-other-crash.s              |     2 +-
 test/MC/PowerPC/vsx.s                         |    12 +
 test/MC/Sparc/sparc-alu-instructions.s        |     6 +
 test/MC/Sparc/sparc-asm-errors.s              |     8 +
 test/MC/Sparc/sparc-assembly-exprs.s          |     9 +-
 test/MC/Sparc/sparc-atomic-instructions.s     |     9 +
 test/MC/Sparc/sparc-ctrl-instructions.s       |     4 +
 test/MC/Sparc/sparc-fp-instructions.s         |     9 +
 test/MC/Sparc/sparc-mem-instructions.s        |    18 +
 test/MC/Sparc/sparc-pic.s                     |    44 +-
 test/MC/Sparc/sparc-relocations.s             |    10 +-
 test/MC/Sparc/sparc-special-registers.s       |    18 +
 test/MC/Sparc/sparc-synthetic-instructions.s  |    72 +-
 test/MC/Sparc/sparcv9-instructions.s          |   272 +
 test/MC/SystemZ/fixups.s                      |     8 +-
 test/MC/SystemZ/insn-good-z13.s               |    66 +-
 test/MC/SystemZ/insn-good.s                   |    56 +
 test/MC/SystemZ/lit.local.cfg                 |     4 -
 test/MC/X86/X86_64-pku.s                      |     8 +
 test/MC/X86/avx512-encodings.s                |  5181 +++++
 test/MC/X86/avx512vl-encoding.s               |   209 +
 test/MC/X86/cfi_def_cfa-crash.s               |    24 +-
 test/MC/X86/encoder-fail.s                    |     3 +
 test/MC/X86/expand-var.s                      |     7 +-
 test/MC/X86/i386-darwin-frame-register.ll     |     2 +-
 test/MC/X86/intel-syntax-2.s                  |    14 +
 test/MC/X86/intel-syntax-ambiguous.s          |    12 +
 test/MC/X86/intel-syntax-avx512.s             |    96 +
 test/MC/X86/intel-syntax-print.ll             |    10 +
 test/MC/X86/intel-syntax.s                    |    72 +
 test/MC/X86/large-bss.s                       |    14 +
 test/MC/X86/macho-reloc-errors-x86.s          |    15 +
 test/MC/X86/macho-reloc-errors-x86_64.s       |    19 +
 test/MC/X86/validate-inst-att.s               |    17 +-
 test/MC/X86/validate-inst-intel.s             |     8 +-
 test/MC/X86/x86-32-coverage.s                 |    20 +
 test/MC/X86/x86-64-avx512bw.s                 |  1059 ++
 test/MC/X86/x86-64-avx512bw_vl.s              |  3264 +++-
 test/MC/X86/x86-64-avx512cd.s                 |   450 +
 test/MC/X86/x86-64-avx512cd_vl.s              |   913 +
 test/MC/X86/x86-64-avx512dq.s                 |  2229 +++
 test/MC/X86/x86-64-avx512dq_vl.s              |  1720 ++
 test/MC/X86/x86-64-avx512f_vl.s               |  5560 ++++++
 test/MC/X86/x86-64.s                          |    18 +-
 test/MC/X86/x86-evenDirective.s               |    47 +
 test/MC/X86/x86_nop.s                         |     8 +-
 test/Makefile                                 |     3 +
 test/Object/AMDGPU/elf-definitios.yaml        |    27 +
 test/Object/Inputs/coff-short-import-code     |   Bin 0 -> 31 bytes
 test/Object/Inputs/coff-short-import-data     |   Bin 0 -> 31 bytes
 ...pt-invalid-dynamic-table-offset.elf.x86-64 |   Bin 0 -> 1688 bytes
 ...rupt-invalid-dynamic-table-size.elf.x86-64 |   Bin 0 -> 1736 bytes
 ...invalid-dynamic-table-too-large.elf.x86-64 |   Bin 0 -> 1688 bytes
 .../corrupt-invalid-phentsize.elf.x86-64      |   Bin 0 -> 1720 bytes
 ...corrupt-invalid-relocation-size.elf.x86-64 |   Bin 0 -> 2160 bytes
 .../Inputs/corrupt-invalid-strtab.elf.x86-64  |   Bin 0 -> 1712 bytes
 .../corrupt-invalid-virtual-addr.elf.x86-64   |   Bin 0 -> 1720 bytes
 .../Inputs/invalid-symbol-table-size.elf      |   Bin 0 -> 536 bytes
 test/Object/Inputs/invalid-xindex-size.elf    |   Bin 0 -> 624 bytes
 test/Object/Inputs/main-ret-zero-pe-i386.dll  |   Bin 0 -> 5120 bytes
 test/Object/Inputs/main-ret-zero-pe-i386.exe  |   Bin 0 -> 5120 bytes
 .../no-section-header-string-table.elf-x86-64 |   Bin 0 -> 1024 bytes
 test/Object/Inputs/pr25877.lib                |   Bin 0 -> 774 bytes
 .../Object/Inputs/rel-no-sec-table.elf-x86-64 |   Bin 0 -> 2152 bytes
 test/Object/Inputs/shndx.elf                  |   Bin 0 -> 824 bytes
 .../Object/Inputs/trivial-object-test.elf-avr |   Bin 0 -> 840 bytes
 test/Object/X86/nm-ir.ll                      |     4 +-
 test/Object/archive-format.test               |    41 +-
 test/Object/archive-symtab.test               |    23 +-
 test/Object/archive-update.test               |    17 +-
 test/Object/corrupt.test                      |    58 +-
 test/Object/invalid.test                      |     8 +
 test/Object/nm-archive.test                   |     9 +
 test/Object/nm-pe-image.test                  |    31 +
 .../no-section-header-string-table.test       |    10 +
 test/Object/obj2yaml.test                     |    65 +
 test/Object/objdump-shndx.test                |     8 +
 test/Object/pr25877.test                      |     9 +
 test/Object/readobj-absent.test               |     2 +
 test/Object/readobj-shared-object.test        |    38 +-
 test/Object/relocation-executable.test        |    12 +
 test/Other/2010-05-06-Printer.ll              |     1 +
 test/Other/extract-alias.ll                   |    22 +-
 test/Other/llvm-nm-without-aliases.ll         |     4 +-
 test/Other/opt-twice.ll                       |    14 +
 test/SymbolRewriter/rewrite.ll                |     2 +-
 test/TableGen/cast-list-initializer.td        |    10 +
 test/TableGen/intrinsic-varargs.td            |     4 +-
 test/TableGen/trydecode-emission.td           |    43 +
 test/TableGen/trydecode-emission2.td          |    44 +
 test/TableGen/trydecode-emission3.td          |    44 +
 test/Transforms/ADCE/basictest.ll             |     3 +-
 test/Transforms/AddDiscriminators/basic.ll    |    14 +-
 test/Transforms/AddDiscriminators/call.ll     |    52 +
 .../dbg-declare-discriminator.ll              |    30 +
 test/Transforms/AddDiscriminators/diamond.ll  |    72 +
 .../AddDiscriminators/first-only.ll           |    14 +-
 test/Transforms/AddDiscriminators/multiple.ll |    12 +-
 .../AddDiscriminators/no-discriminators.ll    |    14 +-
 test/Transforms/AddDiscriminators/oneline.ll  |   102 +
 test/Transforms/ArgumentPromotion/dbg.ll      |    12 +-
 .../AtomicExpand/ARM/atomic-expansion-v7.ll   |    24 +-
 .../AtomicExpand/ARM/atomic-expansion-v8.ll   |    24 +-
 .../AtomicExpand/ARM/cmpxchg-weak.ll          |    20 +-
 .../X86/expand-atomic-non-integer.ll          |    82 +
 .../X86/expand-atomic-rmw-initial-load.ll     |    11 +
 .../Transforms/AtomicExpand/X86/lit.local.cfg |     2 +
 test/Transforms/BBVectorize/X86/wr-aliases.ll |     2 +-
 test/Transforms/BBVectorize/simple3.ll        |    16 +-
 .../2007-10-19-InlineAsmDirectives.ll         |     9 +-
 .../CodeGenPrepare/AArch64/free-zext.ll       |    82 +
 .../CodeGenPrepare/AArch64/widen_switch.ll    |    95 +
 .../CodeGenPrepare/X86/catchpad-phi-cast.ll   |   118 +
 .../CodeGenPrepare/X86/cttz-ctlz.ll           |    56 +
 test/Transforms/CodeGenPrepare/X86/select.ll  |   141 +
 .../CodeGenPrepare/X86/widen_switch.ll        |    95 +
 .../CodeGenPrepare/invariant.group.ll         |    23 +
 .../CodeGenPrepare/statepoint-relocate.ll     |    87 +-
 test/Transforms/ConstProp/calls.ll            |   240 +-
 test/Transforms/ConstProp/insertvalue.ll      |    10 +
 test/Transforms/ConstProp/loads.ll            |     7 +-
 test/Transforms/ConstantMerge/merge-both.ll   |     2 +-
 .../CorrelatedValuePropagation/non-null.ll    |    60 +
 .../CorrelatedValuePropagation/range.ll       |    24 +
 .../CorrelatedValuePropagation/select.ll      |     2 +-
 test/Transforms/CrossDSOCFI/basic.ll          |    88 +
 .../DeadArgElim/2010-04-30-DbgInfo.ll         |    24 +-
 test/Transforms/DeadArgElim/aggregates.ll     |    26 +-
 test/Transforms/DeadArgElim/dbginfo.ll        |    15 +-
 .../Transforms/DeadArgElim/naked_functions.ll |    31 +
 test/Transforms/DeadArgElim/operandbundle.ll  |    12 +
 .../DeadStoreElimination/calloc-store.ll      |    65 +
 .../DeadStoreElimination/inst-limits.ll       |     8 +-
 .../Transforms/DeadStoreElimination/simple.ll |   147 +
 test/Transforms/EarlyCSE/AArch64/ldstN.ll     |    18 +
 test/Transforms/EarlyCSE/atomics.ll           |   259 +
 test/Transforms/EarlyCSE/basic.ll             |    74 +
 test/Transforms/EarlyCSE/fence.ll             |    86 +
 test/Transforms/Float2Int/basic.ll            |    10 +
 test/Transforms/ForcedFunctionAttrs/forced.ll |    12 +
 .../FunctionAttrs/2008-09-03-ReadNone.ll      |     5 +-
 .../FunctionAttrs/2009-01-04-Annotate.ll      |    21 -
 .../FunctionAttrs/2010-10-30-volatile.ll      |     4 +-
 test/Transforms/FunctionAttrs/atomic.ll       |     4 +-
 test/Transforms/FunctionAttrs/nonnull.ll      |    74 +
 test/Transforms/FunctionAttrs/norecurse.ll    |    57 +
 test/Transforms/FunctionAttrs/optnone.ll      |     6 +-
 .../out-of-bounds-iterator-bug.ll             |    30 +
 test/Transforms/FunctionAttrs/readattrs.ll    |    38 +
 .../FunctionImport/Inputs/funcimport.ll       |    87 +
 .../FunctionImport/Inputs/funcimport_debug.ll |    27 +
 test/Transforms/FunctionImport/funcimport.ll  |    75 +
 .../FunctionImport/funcimport_debug.ll        |    45 +
 .../GCOVProfiling/function-numbering.ll       |    14 +-
 test/Transforms/GCOVProfiling/global-ctor.ll  |     8 +-
 test/Transforms/GCOVProfiling/linezero.ll     |    18 +-
 test/Transforms/GCOVProfiling/linkagename.ll  |     6 +-
 test/Transforms/GCOVProfiling/return-block.ll |     6 +-
 test/Transforms/GCOVProfiling/version.ll      |     6 +-
 test/Transforms/GVN/2009-03-10-PREOnVoid.ll   |    28 +-
 test/Transforms/GVN/assume-equal.ll           |   235 +
 test/Transforms/GVN/crash-no-aa.ll            |     2 +-
 test/Transforms/GVN/funclet.ll                |    44 +
 test/Transforms/GVN/invariant-load.ll         |    17 +
 test/Transforms/GVN/invariant.group.ll        |   337 +
 test/Transforms/GVN/load-pre-nonlocal.ll      |     4 +-
 .../GVN/no_speculative_loads_with_asan.ll     |    57 +
 test/Transforms/GVN/phi-translate.ll          |     4 +-
 test/Transforms/GVN/pr14166.ll                |     2 +-
 test/Transforms/GVN/pr24426.ll                |    18 +
 test/Transforms/GVN/pr25440.ll                |   108 +
 test/Transforms/GVN/pre-gep-load.ll           |    31 +
 test/Transforms/GVN/pre-load.ll               |    41 +
 test/Transforms/GVN/range.ll                  |    24 +-
 .../GlobalDCE/2009-01-05-DeadAliases.ll       |    14 +-
 .../GlobalDCE/2009-02-17-AliasUsesAliasee.ll  |     2 +-
 test/Transforms/GlobalDCE/pr20981.ll          |     4 +-
 .../GlobalOpt/2009-02-15-BitcastAlias.ll      |     2 +-
 .../GlobalOpt/2009-02-15-ResolveAlias.ll      |     4 +-
 test/Transforms/GlobalOpt/2009-03-05-dbg.ll   |     8 +-
 test/Transforms/GlobalOpt/alias-resolve.ll    |    22 +-
 .../GlobalOpt/alias-used-address-space.ll     |     6 +-
 .../GlobalOpt/alias-used-section.ll           |     2 +-
 test/Transforms/GlobalOpt/alias-used.ll       |    16 +-
 test/Transforms/GlobalOpt/assume.ll           |    21 +
 .../available_externally_global_ctors.ll      |    22 +
 test/Transforms/GlobalOpt/deadglobal.ll       |     3 +
 .../externally-initialized-aggregate.ll       |    50 +
 .../GlobalOpt/externally-initialized.ll       |    37 +
 test/Transforms/GlobalOpt/global-demotion.ll  |    80 +
 .../GlobalOpt/invariant.group.barrier.ll      |    79 +
 .../GlobalOpt/localize-constexpr.ll           |    28 +
 test/Transforms/GlobalOpt/metadata.ll         |     2 +-
 test/Transforms/GlobalOpt/tls.ll              |     1 +
 test/Transforms/GlobalOpt/unnamed-addr.ll     |     6 +
 test/Transforms/IndVarSimplify/bec-cmp.ll     |    47 +
 test/Transforms/IndVarSimplify/const_phi.ll   |    33 +
 .../IndVarSimplify/eliminate-comparison.ll    |   348 +
 test/Transforms/IndVarSimplify/iv-widen.ll    |    30 +-
 .../loop-invariant-conditions.ll              |   279 +
 test/Transforms/IndVarSimplify/pr24356.ll     |    63 +
 test/Transforms/IndVarSimplify/pr24783.ll     |    30 +
 test/Transforms/IndVarSimplify/pr24804.ll     |    25 +
 test/Transforms/IndVarSimplify/pr24952.ll     |    27 +
 test/Transforms/IndVarSimplify/pr24956.ll     |    37 +
 test/Transforms/IndVarSimplify/pr25047.ll     |    49 +
 test/Transforms/IndVarSimplify/pr25051.ll     |    44 +
 test/Transforms/IndVarSimplify/pr25060.ll     |    37 +
 test/Transforms/IndVarSimplify/pr25360.ll     |    33 +
 test/Transforms/IndVarSimplify/pr25421.ll     |    30 +
 test/Transforms/IndVarSimplify/pr25578.ll     |    45 +
 .../IndVarSimplify/tripcount_infinite.ll      |    15 +-
 .../IndVarSimplify/widen-loop-comp.ll         |   160 +
 test/Transforms/IndVarSimplify/zext-nuw.ll    |    49 +
 .../annotate.ll}                              |     5 +-
 .../Inline/alloca-dbgdeclare-merge.ll         |   102 +
 test/Transforms/Inline/alloca-dbgdeclare.ll   |    16 +-
 .../Inline/debug-info-duplicate-calls.ll      |    26 +-
 test/Transforms/Inline/debug-invoke.ll        |     4 +-
 test/Transforms/Inline/deopt-bundles.ll       |   203 +
 test/Transforms/Inline/ignore-debug-info.ll   |    16 +-
 test/Transforms/Inline/inline-assume.ll       |    31 +
 test/Transforms/Inline/inline-cold-callee.ll  |    39 +
 ...inline-constexpr-addrspacecast-argument.ll |    30 +
 test/Transforms/Inline/inline-hot-callee.ll   |    39 +
 test/Transforms/Inline/inline-optsize.ll      |     2 +-
 test/Transforms/Inline/inline_dbg_declare.ll  |    26 +-
 test/Transforms/Inline/inline_invoke.ll       |     3 +-
 test/Transforms/Inline/noalias-calls.ll       |    19 +-
 test/Transforms/Inline/noalias-cs.ll          |    12 +-
 test/Transforms/Inline/noalias2.ll            |     4 +-
 test/Transforms/Inline/zero-cost.ll           |    17 +
 .../InstCombine/2007-09-10-AliasConstFold.ll  |     2 +-
 .../InstCombine/2007-09-17-AliasConstFold2.ll |     2 +-
 .../InstCombine/LandingPadClauses.ll          |    11 +-
 test/Transforms/InstCombine/add2.ll           |    10 +
 .../Transforms/InstCombine/alias-recursion.ll |     2 +-
 test/Transforms/InstCombine/all-bits-shift.ll |    46 +
 test/Transforms/InstCombine/alloca.ll         |    11 +
 test/Transforms/InstCombine/and-compare.ll    |    23 +
 test/Transforms/InstCombine/and2.ll           |    68 +
 test/Transforms/InstCombine/apint-or.ll       |    79 +
 test/Transforms/InstCombine/apint-or1.ll      |    36 -
 test/Transforms/InstCombine/apint-or2.ll      |    35 -
 .../InstCombine/assume-redundant.ll           |    26 +
 .../InstCombine/bitcast-alias-function.ll     |    24 +-
 .../Transforms/InstCombine/bitcast-bitcast.ll |    84 +
 .../InstCombine/bitcast-vec-canon.ll          |    25 +-
 test/Transforms/InstCombine/bitcast.ll        |    55 +
 .../Transforms/InstCombine/bitreverse-fold.ll |    11 +
 .../InstCombine/bitreverse-recognize.ll       |   114 +
 test/Transforms/InstCombine/blend_x86.ll      |   102 +-
 test/Transforms/InstCombine/bswap-fold.ll     |     6 +-
 .../InstCombine/bswap-known-bits.ll           |    47 +
 test/Transforms/InstCombine/bswap.ll          |    14 +-
 .../InstCombine/call_nonnull_arg.ll           |    20 +
 .../InstCombine/cast-callee-deopt-bundles.ll  |    11 +
 .../InstCombine/cast-int-fcmp-eq-0.ll         |   108 +-
 test/Transforms/InstCombine/cast-set.ll       |     4 +-
 test/Transforms/InstCombine/cast.ll           |    54 +-
 test/Transforms/InstCombine/compare-alloca.ll |    97 +
 test/Transforms/InstCombine/compare-signs.ll  |    40 +
 .../InstCombine/constant-fold-alias.ll        |     4 +-
 test/Transforms/InstCombine/ctpop.ll          |    45 +
 test/Transforms/InstCombine/debug-line.ll     |     6 +-
 test/Transforms/InstCombine/debuginfo.ll      |    12 +-
 test/Transforms/InstCombine/demorgan-zext.ll  |    34 +
 test/Transforms/InstCombine/div.ll            |    24 +-
 test/Transforms/InstCombine/exp2-1.ll         |    19 +-
 test/Transforms/InstCombine/extractvalue.ll   |    22 +-
 test/Transforms/InstCombine/fabs.ll           |    25 +
 test/Transforms/InstCombine/fast-math.ll      |   141 +-
 test/Transforms/InstCombine/ffs-1.ll          |    69 +-
 .../InstCombine/fold-phi-load-metadata.ll     |    69 +
 test/Transforms/InstCombine/gc.relocate.ll    |    39 +-
 test/Transforms/InstCombine/gepphigep.ll      |    50 +
 test/Transforms/InstCombine/icmp-range.ll     |    89 +
 test/Transforms/InstCombine/icmp-shr.ll       |     9 +
 test/Transforms/InstCombine/icmp.ll           |    73 +-
 .../InstCombine/inline-intrinsic-assert.ll    |     2 +-
 .../InstCombine/insert-extract-shuffle.ll     |    47 +-
 test/Transforms/InstCombine/intrinsics.ll     |    65 +
 test/Transforms/InstCombine/lifetime.ll       |    93 +
 test/Transforms/InstCombine/load-cmp.ll       |     7 +-
 .../InstCombine/load-combine-metadata-2.ll    |    20 +
 .../InstCombine/load-combine-metadata-3.ll    |    20 +
 .../InstCombine/load-combine-metadata-4.ll    |    20 +
 .../InstCombine/load-combine-metadata.ll      |     6 +-
 .../InstCombine/loadstore-metadata.ll         |    38 +-
 .../InstCombine/log-pow-nofastmath.ll         |    30 +
 test/Transforms/InstCombine/log-pow.ll        |    41 +
 .../InstCombine/malloc-free-delete.ll         |    11 +
 test/Transforms/InstCombine/memcmp-1.ll       |    53 +-
 test/Transforms/InstCombine/memset_chk-1.ll   |    26 +
 test/Transforms/InstCombine/minmax-fp.ll      |   156 +
 .../Transforms/InstCombine/neon-intrinsics.ll |    12 +-
 .../Transforms/InstCombine/no_cgscc_assert.ll |     2 +-
 .../InstCombine/nonnull-attribute.ll          |    19 +
 test/Transforms/InstCombine/not.ll            |    47 +-
 .../InstCombine/objsize-address-space.ll      |     2 +-
 test/Transforms/InstCombine/objsize.ll        |     4 +-
 test/Transforms/InstCombine/or.ll             |     2 +-
 .../InstCombine/phi-load-metadata-2.ll        |    30 +
 .../InstCombine/phi-load-metadata-3.ll        |    30 +
 .../InstCombine/phi-load-metadata.ll          |    30 +
 test/Transforms/InstCombine/phi.ll            |   130 +
 test/Transforms/InstCombine/pow-1.ll          |     2 +
 test/Transforms/InstCombine/pow-4.ll          |   120 +
 .../InstCombine/pow-exp-nofastmath.ll         |    17 +
 test/Transforms/InstCombine/pow-exp.ll        |    28 +
 test/Transforms/InstCombine/pow-exp2.ll       |    19 +
 test/Transforms/InstCombine/pow-sqrt.ll       |    15 +
 test/Transforms/InstCombine/pr20059.ll        |    16 -
 test/Transforms/InstCombine/pr24605.ll        |    15 +
 test/Transforms/InstCombine/pr25745.ll        |    20 +
 test/Transforms/InstCombine/shift.ll          |     4 +-
 test/Transforms/InstCombine/sincospi.ll       |     9 +
 test/Transforms/InstCombine/sqrt-nofast.ll    |    25 +
 test/Transforms/InstCombine/statepoint.ll     |    20 +-
 test/Transforms/InstCombine/store.ll          |   113 +
 test/Transforms/InstCombine/strto-1.ll        |     2 +-
 test/Transforms/InstCombine/tan-nofastmath.ll |    17 +
 test/Transforms/InstCombine/tan.ll            |    24 +
 test/Transforms/InstCombine/token.ll          |    89 +
 test/Transforms/InstCombine/trunc.ll          |    42 +
 test/Transforms/InstCombine/unpack-fca.ll     |   168 +-
 .../InstCombine/vec_demanded_elts.ll          |   359 +-
 test/Transforms/InstCombine/vec_shuffle.ll    |    27 +-
 test/Transforms/InstCombine/vector_gep2.ll    |    23 +
 test/Transforms/InstCombine/x86-f16c.ll       |    61 +
 test/Transforms/InstCombine/x86-pmovsx.ll     |   136 +
 test/Transforms/InstCombine/x86-pmovzx.ll     |   136 +
 test/Transforms/InstCombine/x86-pshufb.ll     |   267 +
 test/Transforms/InstCombine/x86-sse4a.ll      |   336 +
 .../InstCombine/x86-vector-shifts.ll          |  1318 ++
 test/Transforms/InstCombine/x86-xop.ll        |   209 +
 test/Transforms/InstCombine/xor.ll            |     8 +-
 test/Transforms/InstSimplify/add-mask.ll      |    65 +
 test/Transforms/InstSimplify/apint-or.ll      |    36 +-
 test/Transforms/InstSimplify/bswap.ll         |    41 +
 test/Transforms/InstSimplify/compare.ll       |     8 +
 test/Transforms/InstSimplify/implies.ll       |   217 +
 test/Transforms/InstSimplify/shift-128-kb.ll  |    22 +
 test/Transforms/InstSimplify/shr-nop.ll       |    12 +-
 .../2009-01-05-InternalizeAliases.ll          |     8 +-
 test/Transforms/Internalize/comdat.ll         |    52 +
 .../Internalize/local-visibility.ll           |     8 +-
 test/Transforms/JumpThreading/basic.ll        |    34 +-
 test/Transforms/JumpThreading/implied-cond.ll |    98 +
 test/Transforms/JumpThreading/phi-known.ll    |    66 +
 test/Transforms/JumpThreading/select.ll       |    30 +
 .../JumpThreading/update-edge-weight.ll       |    43 +
 test/Transforms/LCSSA/mixed-catch.ll          |    95 +
 .../2004-09-14-AliasAnalysisInvalidate.ll     |     2 +-
 test/Transforms/LICM/argmemonly-call.ll       |    69 +
 test/Transforms/LICM/debug-value.ll           |    12 +-
 test/Transforms/LICM/hoist-deref-load.ll      |    44 +
 test/Transforms/LICM/hoist-invariant-load.ll  |     2 +-
 test/Transforms/LICM/pr23608.ll               |     2 +-
 .../LoopDistribute/basic-with-memchecks.ll    |     2 +-
 .../LoopDistribute/bounds-expansion-bug.ll    |   106 +
 .../unknown-bounds-for-memchecks.ll           |    57 +
 test/Transforms/LoopIdiom/basic.ll            |   107 +
 test/Transforms/LoopIdiom/debug-line.ll       |    10 +-
 test/Transforms/LoopLoadElim/backward.ll      |    32 +
 .../LoopLoadElim/def-store-before-load.ll     |    35 +
 test/Transforms/LoopLoadElim/forward.ll       |    47 +
 test/Transforms/LoopLoadElim/memcheck.ll      |    52 +
 .../multiple-stores-same-block.ll             |    48 +
 test/Transforms/LoopLoadElim/unknown-dep.ll   |    54 +
 test/Transforms/LoopReroll/negative.ll        |    48 +
 test/Transforms/LoopReroll/reroll_with_dbg.ll |   139 +
 test/Transforms/LoopRotate/dbgvalue.ll        |    14 +-
 test/Transforms/LoopSimplify/dbg-loc.ll       |     4 +-
 .../LoopSimplify/single-backedge.ll           |     2 +-
 ...fferent-addrspace-addressing-mode-loops.ll |   156 +
 .../LoopStrengthReduce/AMDGPU/lit.local.cfg   |     3 +
 .../AMDGPU/lsr-postinc-pos-addrspace.ll       |   113 +
 .../LoopStrengthReduce/ARM/ivchain-ARM.ll     |    50 +-
 .../LoopStrengthReduce/NVPTX/lit.local.cfg    |     2 +
 .../LoopStrengthReduce/NVPTX/trunc.ll         |    45 +
 .../X86/ivchain-stress-X86.ll                 |     2 +-
 test/Transforms/LoopStrengthReduce/funclet.ll |   216 +
 test/Transforms/LoopStrengthReduce/pr12018.ll |     5 +-
 test/Transforms/LoopStrengthReduce/pr25541.ll |    48 +
 .../quadradic-exit-value.ll                   |     2 +-
 .../LoopStrengthReduce/sext-ind-var.ll        |   140 +
 .../LoopUnroll/AMDGPU/lit.local.cfg           |     3 +
 .../LoopUnroll/AMDGPU/unroll-barrier.ll       |    33 +
 test/Transforms/LoopUnroll/X86/partial.ll     |     9 +-
 .../LoopUnroll/full-unroll-bad-geps.ll        |    34 -
 .../LoopUnroll/full-unroll-crashers.ll        |   102 +
 .../LoopUnroll/full-unroll-heuristics-2.ll    |    57 +
 .../LoopUnroll/full-unroll-heuristics-cast.ll |    97 +
 .../LoopUnroll/full-unroll-heuristics-cmp.ll  |   207 +
 .../full-unroll-heuristics-phi-prop.ll        |    23 +
 test/Transforms/LoopUnroll/pr18861.ll         |    93 +-
 test/Transforms/LoopUnroll/rebuild_lcssa.ll   |   119 +
 test/Transforms/LoopUnroll/runtime-loop1.ll   |     4 +-
 test/Transforms/LoopUnroll/unroll-pragmas.ll  |    66 +
 .../LoopUnswitch/2011-11-18-SimpleSwitch.ll   |     2 +-
 .../LoopUnswitch/2011-11-18-TwoSwitches.ll    |     2 +-
 .../LoopUnswitch/2015-09-18-Addrspace.ll      |    28 +
 .../LoopUnswitch/LIV-loop-condtion.ll         |    28 +
 test/Transforms/LoopUnswitch/basictest.ll     |    39 +
 test/Transforms/LoopUnswitch/cleanuppad.ll    |    44 +
 test/Transforms/LoopUnswitch/cold-loop.ll     |    52 +
 test/Transforms/LoopUnswitch/copy-metadata.ll |    23 +
 test/Transforms/LoopUnswitch/infinite-loop.ll |    10 +-
 .../LoopUnswitch/trivial-unswitch.ll          |    47 +
 .../AArch64/arbitrary-induction-step.ll       |     4 +-
 .../AArch64/deterministic-type-shrinkage.ll   |    54 +
 .../LoopVectorize/AArch64/interleaved_cost.ll |    39 +
 .../AArch64/loop-vectorization-factors.ll     |   243 +
 .../AArch64/reduction-small-size.ll           |   191 +
 .../LoopVectorize/ARM/interleaved_cost.ll     |    39 +
 .../LoopVectorize/ARM/vector_cast.ll          |    37 +
 .../PowerPC/agg-interleave-a2.ll              |    40 +
 .../PowerPC/stride-vectorization.ll           |    30 +
 .../LoopVectorize/X86/masked_load_store.ll    |   142 +
 .../LoopVectorize/X86/metadata-enable.ll      |     6 +-
 .../Transforms/LoopVectorize/X86/no_fpmath.ll |   104 +
 .../Transforms/LoopVectorize/X86/powof2div.ll |     8 +-
 .../LoopVectorize/X86/reduction-crash.ll      |     2 +-
 .../Transforms/LoopVectorize/X86/reg-usage.ll |    71 +
 .../LoopVectorize/X86/vector_max_bandwidth.ll |    46 +
 .../X86/vector_ptr_load_store.ll              |     8 +-
 .../X86/vectorization-remarks-missed.ll       |    16 +-
 .../X86/vectorization-remarks-profitable.ll   |   113 +
 .../X86/vectorization-remarks.ll              |     8 +-
 .../LoopVectorize/conditional-assignment.ll   |     6 +-
 test/Transforms/LoopVectorize/control-flow.ll |     8 +-
 test/Transforms/LoopVectorize/dbg.value.ll    |     8 +-
 test/Transforms/LoopVectorize/debugloc.ll     |    18 +-
 .../LoopVectorize/gep_with_bitcast.ll         |    40 +
 .../LoopVectorize/if-pred-stores.ll           |    43 +-
 test/Transforms/LoopVectorize/induction.ll    |    13 +-
 test/Transforms/LoopVectorize/miniters.ll     |    45 +
 .../LoopVectorize/minmax_reduction.ll         |   104 +-
 .../LoopVectorize/no_array_bounds.ll          |     6 +-
 .../LoopVectorize/no_outside_user.ll          |     2 +-
 test/Transforms/LoopVectorize/no_switch.ll    |    18 +-
 test/Transforms/LoopVectorize/nontemporal.ll  |    47 +
 test/Transforms/LoopVectorize/optsize.ll      |    43 +-
 .../Transforms/LoopVectorize/ptr-induction.ll |    34 +
 test/Transforms/LoopVectorize/reduction.ll    |     2 +-
 .../LoopVectorize/reverse_induction.ll        |     9 +-
 .../Transforms/LoopVectorize/runtime-check.ll |     6 +-
 .../Transforms/LoopVectorize/runtime-limit.ll |    21 +-
 test/Transforms/LowerBitSets/function-ext.ll  |    22 +
 test/Transforms/LowerBitSets/function.ll      |    35 +
 test/Transforms/LowerBitSets/nonstring.ll     |    34 +
 test/Transforms/LowerBitSets/pr25902.ll       |    21 +
 test/Transforms/LowerBitSets/simple.ll        |    36 +-
 test/Transforms/LowerExpectIntrinsic/basic.ll |     2 +-
 .../LowerSwitch/delete-default-block-crash.ll |    27 +
 test/Transforms/LowerSwitch/feature.ll        |    60 +-
 test/Transforms/Mem2Reg/ConvertDebugInfo.ll   |    14 +-
 test/Transforms/Mem2Reg/ConvertDebugInfo2.ll  |    28 +-
 test/Transforms/Mem2Reg/optnone.ll            |    21 +
 test/Transforms/Mem2Reg/pr24179.ll            |    44 +
 test/Transforms/MemCpyOpt/memcpy.ll           |     5 +-
 test/Transforms/MemCpyOpt/nontemporal.ll      |    49 +
 .../MergeFunc/apply_function_attributes.ll    |    47 +
 .../MergeFunc/call-and-invoke-with-ranges.ll  |    18 +-
 .../MergeFunc/constant-entire-value.ll        |    42 +
 test/Transforms/MergeFunc/crash2.ll           |    54 +
 test/Transforms/MergeFunc/gep-base-type.ll    |    46 +
 .../MergeFunc/inttoptr-address-space.ll       |     2 +-
 test/Transforms/MergeFunc/inttoptr.ll         |     2 +-
 .../merge-block-address-other-function.ll     |    49 +
 .../MergeFunc/merge-block-address.ll          |    91 +
 .../MergeFunc/merge-const-ptr-and-int.ll      |    20 +
 .../MergeFunc/merge-different-vector-types.ll |    18 +
 ...no-merge-block-address-different-labels.ll |    96 +
 .../no-merge-block-address-other-function.ll  |    61 +
 .../MergeFunc/no-merge-ptr-different-sizes.ll |    24 +
 .../no-merge-ptr-int-different-values.ll      |    23 +
 test/Transforms/MergeFunc/ranges-multiple.ll  |    44 +
 test/Transforms/MergeFunc/ranges.ll           |     8 +-
 .../MergeFunc/self-referential-global.ll      |    40 +
 .../MergeFunc/undef-different-types.ll        |    21 +
 test/Transforms/MetaRenamer/metarenamer.ll    |     2 +-
 .../NaryReassociate/NVPTX/nary-gep.ll         |    17 +
 test/Transforms/NaryReassociate/nary-add.ll   |     6 +-
 test/Transforms/NaryReassociate/nary-mul.ll   |    19 +
 test/Transforms/NaryReassociate/pr24301.ll    |    14 +
 test/Transforms/ObjCARC/basic.ll              |     6 +-
 ...e-that-exception-unwind-path-is-visited.ll |    18 +-
 test/Transforms/ObjCARC/nested.ll             |     4 +-
 test/Transforms/ObjCARC/provenance.ll         |     2 +-
 .../PGOProfile/Inputs/branch1.proftext        |     6 +
 .../PGOProfile/Inputs/branch2.proftext        |     6 +
 .../PGOProfile/Inputs/criticaledge.proftext   |    17 +
 .../PGOProfile/Inputs/diag.proftext           |     5 +
 .../PGOProfile/Inputs/landingpad.proftext     |    14 +
 .../PGOProfile/Inputs/loop1.proftext          |     6 +
 .../PGOProfile/Inputs/loop2.proftext          |     7 +
 .../PGOProfile/Inputs/switch.proftext         |     8 +
 test/Transforms/PGOProfile/branch1.ll         |    30 +
 test/Transforms/PGOProfile/branch2.ll         |    37 +
 test/Transforms/PGOProfile/criticaledge.ll    |   108 +
 test/Transforms/PGOProfile/diag_mismatch.ll   |    12 +
 .../PGOProfile/diag_no_funcprofdata.ll        |    12 +
 test/Transforms/PGOProfile/diag_no_profile.ll |     9 +
 test/Transforms/PGOProfile/landingpad.ll      |   124 +
 test/Transforms/PGOProfile/loop1.ll           |    42 +
 test/Transforms/PGOProfile/loop2.ll           |    70 +
 test/Transforms/PGOProfile/single_bb.ll       |    12 +
 test/Transforms/PGOProfile/switch.ll          |    47 +
 test/Transforms/PlaceSafepoints/basic.ll      |     2 +-
 .../PlaceSafepoints/call_gc_result.ll         |     4 +-
 .../PlaceSafepoints/finite-loops.ll           |    65 +-
 .../PlaceSafepoints/patchable-statepoints.ll  |     4 +-
 .../statepoint-calling-conventions.ll         |     4 +-
 .../PlaceSafepoints/statepoint-format.ll      |     4 +-
 test/Transforms/PruneEH/operand-bundles.ll    |    26 +
 .../Reassociate/fast-ReassociateVector.ll     |    10 +-
 test/Transforms/Reassociate/fast-basictest.ll |     2 +-
 .../Transforms/Reassociate/fast-fp-commute.ll |     4 +-
 test/Transforms/Reassociate/fast-multistep.ll |     6 +-
 test/Transforms/Reassociate/fp-expr.ll        |    33 +
 test/Transforms/Reassociate/multistep.ll      |     6 +-
 .../Reassociate/reassoc-intermediate-fnegs.ll |    31 +
 test/Transforms/Reassociate/secondary.ll      |     2 +-
 test/Transforms/Reassociate/vaarg_movable.ll  |    28 +
 test/Transforms/Reassociate/xor_reassoc.ll    |     4 +-
 .../base-pointers-1.ll                        |     8 +-
 .../base-pointers-10.ll                       |    11 +-
 .../base-pointers-11.ll                       |     4 +-
 .../base-pointers-12.ll                       |    20 +
 .../base-pointers-13.ll                       |    19 +
 .../base-pointers-2.ll                        |     4 +-
 .../base-pointers-3.ll                        |     4 +-
 .../base-pointers-4.ll                        |    21 +-
 .../base-pointers-5.ll                        |     8 +-
 .../base-pointers-6.ll                        |     8 +-
 .../base-pointers-7.ll                        |    10 +-
 .../base-pointers-8.ll                        |     4 +-
 .../base-pointers-9.ll                        |     4 +-
 .../RewriteStatepointsForGC/base-pointers.ll  |    73 +-
 .../RewriteStatepointsForGC/base-vector.ll    |   167 +
 .../RewriteStatepointsForGC/basics.ll         |    16 +-
 .../RewriteStatepointsForGC/codegen-cond.ll   |    74 +
 .../RewriteStatepointsForGC/constants.ll      |    43 +-
 .../deopt-bundles/base-pointers-1.ll          |    25 +
 .../deopt-bundles/base-pointers-10.ll         |    35 +
 .../deopt-bundles/base-pointers-11.ll         |    24 +
 .../deopt-bundles/base-pointers-2.ll          |    19 +
 .../deopt-bundles/base-pointers-3.ll          |    19 +
 .../deopt-bundles/base-pointers-4.ll          |    44 +
 .../deopt-bundles/base-pointers-5.ll          |    28 +
 .../deopt-bundles/base-pointers-6.ll          |    37 +
 .../deopt-bundles/base-pointers-7.ll          |    45 +
 .../deopt-bundles/base-pointers-8.ll          |    37 +
 .../deopt-bundles/base-pointers-9.ll          |    20 +
 .../deopt-bundles/base-pointers.ll            |   151 +
 .../deopt-bundles/base-vector.ll              |   167 +
 .../deopt-bundles/basic.ll                    |    65 +
 .../deopt-bundles/basics.ll                   |    88 +
 .../deopt-bundles/codegen-cond.ll             |    81 +
 .../deopt-bundles/constants.ll                |    51 +
 .../deopt-bundles/deref-pointers.ll           |   104 +
 .../deopt-bundles/gc-relocate-creation.ll     |    22 +
 .../deopt-bundles/live-vector.ll              |   149 +
 .../deopt-bundles/liveness-basics.ll          |   165 +
 .../deopt-bundles/patchable-statepoints.ll    |    44 +
 .../deopt-bundles/preprocess.ll               |    62 +
 .../deopt-bundles/relocate-invoke-result.ll   |    32 +
 .../deopt-bundles/relocation.ll               |   279 +
 .../rematerialize-derived-pointers.ll         |   150 +
 .../deopt-bundles/rewrite-invoke.ll           |    32 +
 .../RewriteStatepointsForGC/deref-pointers.ll |    53 +-
 .../gc_relocate_creation.ll                   |     6 +-
 .../RewriteStatepointsForGC/live-vector.ll    |    25 +-
 .../liveness-basics.ll                        |    22 +-
 .../RewriteStatepointsForGC/preprocess.ll     |     8 +-
 .../relocate_invoke_result.ll                 |     4 +-
 .../RewriteStatepointsForGC/relocation.ll     |    40 +-
 .../rematerialize-derived-pointers.ll         |    66 +-
 .../Transforms/SCCP/global-alias-constprop.ll |    11 +
 .../SLPVectorizer/AArch64/commute.ll          |     2 +-
 .../SLPVectorizer/AArch64/horizontal.ll       |   270 +
 .../SLPVectorizer/AArch64/nontemporal.ll      |    76 +
 .../Transforms/SLPVectorizer/X86/bad_types.ll |    26 +
 .../SLPVectorizer/X86/commutativity.ll        |    78 +
 .../SLPVectorizer/X86/debug_info.ll           |    16 +-
 .../SLPVectorizer/X86/horizontal.ll           |     2 +-
 test/Transforms/SLPVectorizer/X86/pr23510.ll  |    38 +
 .../SLPVectorizer/X86/schedule_budget.ll      |    93 +
 test/Transforms/SROA/basictest.ll             |    25 +-
 test/Transforms/SROA/big-endian.ll            |     1 -
 test/Transforms/SROA/fca.ll                   |     1 -
 test/Transforms/SafeStack/AArch64/abi.ll      |    20 +
 .../SafeStack/AArch64/lit.local.cfg           |     3 +
 test/Transforms/SafeStack/ARM/abi.ll          |    18 +
 test/Transforms/SafeStack/ARM/lit.local.cfg   |     3 +
 test/Transforms/SafeStack/ARM/setjmp.ll       |    34 +
 test/Transforms/SafeStack/X86/abi.ll          |    30 +
 test/Transforms/SafeStack/X86/lit.local.cfg   |     3 +
 test/Transforms/SafeStack/array.ll            |    53 +
 test/Transforms/SafeStack/byval.ll            |    51 +
 test/Transforms/SafeStack/call.ll             |   160 +-
 test/Transforms/SafeStack/cast.ll             |    28 +-
 test/Transforms/SafeStack/debug-loc.ll        |    83 +
 test/Transforms/SafeStack/ret.ll              |    17 +
 test/Transforms/SafeStack/setjmp2.ll          |     2 +-
 test/Transforms/SafeStack/store.ll            |    63 +
 .../Inputs/bad_discriminator_value.prof       |     2 +-
 .../SampleProfile/Inputs/bad_fn_header.prof   |     4 +-
 .../SampleProfile/Inputs/bad_mangle.prof      |     4 +-
 .../SampleProfile/Inputs/bad_sample_line.prof |     4 +-
 .../SampleProfile/Inputs/bad_samples.prof     |     2 +-
 .../SampleProfile/Inputs/branch.prof          |    16 +-
 .../SampleProfile/Inputs/calls.prof           |    16 +-
 .../Inputs/cov-zero-samples.prof              |    10 +
 .../Inputs/coverage-warning.prof              |     5 +
 .../SampleProfile/Inputs/discriminator.prof   |    14 +-
 .../SampleProfile/Inputs/entry_counts.prof    |     4 +-
 .../SampleProfile/Inputs/fnptr.binprof        |   Bin 112 -> 105 bytes
 .../SampleProfile/Inputs/fnptr.prof           |    18 +-
 .../SampleProfile/Inputs/gcc-simple.afdo      |   Bin 0 -> 1972 bytes
 .../SampleProfile/Inputs/inline-coverage.prof |     7 +
 .../SampleProfile/Inputs/inline-hint.prof     |     3 +
 .../SampleProfile/Inputs/inline.prof          |     7 +
 .../SampleProfile/Inputs/nolocinfo.prof       |     3 +
 .../SampleProfile/Inputs/offset.prof          |     4 +
 .../SampleProfile/Inputs/propagate.prof       |    32 +-
 .../SampleProfile/Inputs/remarks.prof         |     7 +
 .../SampleProfile/Inputs/syntax.prof          |     4 +-
 test/Transforms/SampleProfile/branch.ll       |   294 +-
 test/Transforms/SampleProfile/calls.ll        |    18 +-
 .../SampleProfile/cov-zero-samples.ll         |   142 +
 .../SampleProfile/coverage-warning.ll         |    46 +
 .../Transforms/SampleProfile/discriminator.ll |    14 +-
 test/Transforms/SampleProfile/entry_counts.ll |     8 +-
 test/Transforms/SampleProfile/fnptr.ll        |    24 +-
 test/Transforms/SampleProfile/gcc-simple.ll   |   218 +
 .../SampleProfile/inline-coverage.ll          |   135 +
 test/Transforms/SampleProfile/inline-hint.ll  |    38 +
 test/Transforms/SampleProfile/inline.ll       |   108 +
 test/Transforms/SampleProfile/nolocinfo.ll    |    38 +
 test/Transforms/SampleProfile/offset.ll       |    82 +
 test/Transforms/SampleProfile/propagate.ll    |    26 +-
 test/Transforms/SampleProfile/remarks.ll      |   185 +
 test/Transforms/SampleProfile/syntax.ll       |     2 +-
 .../ScalarRepl/debuginfo-preserved.ll         |    12 +-
 test/Transforms/Scalarizer/dbginfo.ll         |    12 +-
 test/Transforms/Scalarizer/store-bug.ll       |    25 +
 ...-gep-and-gvn-addrspace-addressing-modes.ll |    14 +-
 .../NVPTX/split-gep-and-gvn.ll                |    62 +-
 .../NVPTX/split-gep.ll                        |    16 +-
 .../NVPTX/value-tracking-domtree.ll           |     2 +-
 .../SimplifyCFG/AArch64/cttz-ctlz.ll          |    43 +
 test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll  |    43 +
 test/Transforms/SimplifyCFG/ARM/lit.local.cfg |     5 +
 test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll |    43 +
 .../Transforms/SimplifyCFG/Mips/lit.local.cfg |     5 +
 test/Transforms/SimplifyCFG/PR25267.ll        |    24 +
 .../Transforms/SimplifyCFG/SpeculativeExec.ll |    26 +
 .../SimplifyCFG/X86/speculate-cttz-ctlz.ll    |    24 +-
 test/Transforms/SimplifyCFG/basictest.ll      |    14 +-
 .../Transforms/SimplifyCFG/branch-fold-dbg.ll |     8 +-
 .../SimplifyCFG/empty-cleanuppad.ll           |   415 +
 test/Transforms/SimplifyCFG/hoist-dbgvalue.ll |    10 +-
 test/Transforms/SimplifyCFG/implied-cond.ll   |    81 +
 test/Transforms/SimplifyCFG/invoke_unwind.ll  |    13 +
 .../SimplifyCFG/merge-cond-stores-2.ll        |   215 +
 .../SimplifyCFG/merge-cond-stores.ll          |   241 +
 .../no_speculative_loads_with_asan.ll         |    40 +
 .../SimplifyCFG/preserve-load-metadata-2.ll   |    32 +
 .../SimplifyCFG/preserve-load-metadata-3.ll   |    32 +
 .../SimplifyCFG/preserve-load-metadata.ll     |    32 +
 .../preserve-make-implicit-on-switch-to-br.ll |    30 +
 test/Transforms/SimplifyCFG/speculate-math.ll |    45 +-
 .../SimplifyCFG/statepoint-invoke-unwind.ll   |     6 +-
 .../SimplifyCFG/switch-dead-default.ll        |   179 +
 test/Transforms/SimplifyCFG/trap-debugloc.ll  |     6 +-
 .../SimplifyCFG/wineh-unreachable.ll          |    83 +
 test/Transforms/Sink/catchswitch.ll           |    37 +
 test/Transforms/Sink/landingpad.ll            |    33 +
 .../reassociate-geps-and-slsr-addrspace.ll    |     4 +-
 .../NVPTX/speculative-slsr.ll                 |    71 +
 test/Transforms/StripDeadPrototypes/basic.ll  |    12 +
 .../StripSymbols/2010-06-30-StripDebug.ll     |     8 +-
 .../StripSymbols/2010-08-25-crash.ll          |     6 +-
 .../StripSymbols/strip-dead-debug-info.ll     |    16 +-
 .../StructurizeCFG/nested-loop-order.ll       |     2 +-
 test/Transforms/TailCallElim/basic.ll         |    12 +-
 test/Transforms/TailCallElim/notail.ll        |    24 +
 test/Transforms/Util/lowerswitch.ll           |     6 +-
 .../Util/simplify-dbg-declare-load.ll         |    52 +
 test/Verifier/alias.ll                        |    18 +-
 test/Verifier/align-md.ll                     |    59 +
 test/Verifier/atomics.ll                      |    14 +
 test/Verifier/bitcast-alias-address-space.ll  |     2 +-
 test/Verifier/dbg-null-retained-type.ll       |    10 +
 test/Verifier/dbg-typerefs.ll                 |     2 +-
 test/Verifier/dbg.ll                          |     3 +-
 test/Verifier/dereferenceable-md.ll           |    86 +
 test/Verifier/func-dbg.ll                     |    25 +
 test/Verifier/gc_relocate_addrspace.ll        |    10 +-
 test/Verifier/gc_relocate_operand.ll          |     8 +-
 test/Verifier/gc_relocate_return.ll           |     8 +-
 test/Verifier/invalid-eh.ll                   |    38 +
 test/Verifier/invalid-patchable-statepoint.ll |    14 -
 test/Verifier/invalid-statepoint.ll           |     8 +-
 test/Verifier/invalid-statepoint2.ll          |    10 +-
 test/Verifier/invoke.ll                       |     4 +-
 test/Verifier/llvm.dbg.declare-address.ll     |     4 +-
 test/Verifier/llvm.dbg.declare-expression.ll  |     4 +-
 test/Verifier/llvm.dbg.declare-variable.ll    |     2 +-
 .../llvm.dbg.intrinsic-dbg-attachment.ll      |    20 +-
 test/Verifier/llvm.dbg.value-expression.ll    |     4 +-
 test/Verifier/llvm.dbg.value-value.ll         |     4 +-
 test/Verifier/llvm.dbg.value-variable.ll      |     2 +-
 test/Verifier/metadata-function-dbg.ll        |    23 +
 test/Verifier/operand-bundles.ll              |    49 +
 test/Verifier/statepoint.ll                   |    31 +-
 test/Verifier/token1.ll                       |    11 +
 test/Verifier/token2.ll                       |    11 +
 test/Verifier/token3.ll                       |     8 +
 test/Verifier/token4.ll                       |     4 +
 test/Verifier/token5.ll                       |     7 +
 test/Verifier/token6.ll                       |     7 +
 test/Verifier/token7.ll                       |     8 +
 test/lit.cfg                                  |    92 +-
 test/lit.site.cfg.in                          |     4 +
 .../dsymutil/ARM/dummy-debug-map-amr64.map    |    15 +
 test/tools/dsymutil/ARM/empty-map.test        |     8 +
 test/tools/dsymutil/ARM/fat-arch-name.test    |    21 +
 .../dsymutil/ARM/fat-arch-not-found.test      |    13 +
 test/tools/dsymutil/ARM/inlined-low_pc.c      |    15 +
 test/tools/dsymutil/ARM/lit.local.cfg         |     7 +
 .../dsymutil/Inputs/absolute_sym.macho.i386   |   Bin 0 -> 8592 bytes
 .../dsymutil/Inputs/absolute_sym.macho.i386.o |   Bin 0 -> 2472 bytes
 test/tools/dsymutil/Inputs/basic.macho.i386   |   Bin 0 -> 9080 bytes
 .../basic2-custom-linetable.macho.x86_64.o    |   Bin 0 -> 3144 bytes
 test/tools/dsymutil/Inputs/basic2.c           |     6 +
 test/tools/dsymutil/Inputs/dead-stripped/1.o  |   Bin 0 -> 3200 bytes
 test/tools/dsymutil/Inputs/empty_range/1.o    |   Bin 0 -> 636 bytes
 test/tools/dsymutil/Inputs/fat-test.arm.dylib |   Bin 0 -> 25180 bytes
 test/tools/dsymutil/Inputs/fat-test.arm.o     |   Bin 0 -> 50736 bytes
 test/tools/dsymutil/Inputs/fat-test.c         |    28 +
 test/tools/dsymutil/Inputs/fat-test.dylib     |   Bin 0 -> 13012 bytes
 test/tools/dsymutil/Inputs/fat-test.o         |   Bin 0 -> 5000 bytes
 test/tools/dsymutil/Inputs/frame-dw2.ll       |    16 +-
 test/tools/dsymutil/Inputs/frame-dw4.ll       |    16 +-
 test/tools/dsymutil/Inputs/inlined-low_pc/1.o |   Bin 0 -> 1960 bytes
 test/tools/dsymutil/Inputs/libfat-test.a      |   Bin 0 -> 5136 bytes
 test/tools/dsymutil/Inputs/mismatch/1.o       |   Bin 0 -> 1972 bytes
 .../dsymutil/Inputs/mismatch/mismatch.pcm     |   Bin 0 -> 24940 bytes
 test/tools/dsymutil/Inputs/modules/1.o        |   Bin 0 -> 2444 bytes
 test/tools/dsymutil/Inputs/modules/Bar.pcm    |   Bin 0 -> 25636 bytes
 test/tools/dsymutil/Inputs/modules/Foo.pcm    |   Bin 0 -> 26060 bytes
 .../dsymutil/Inputs/odr-anon-namespace/1.o    |   Bin 0 -> 2084 bytes
 .../dsymutil/Inputs/odr-anon-namespace/2.o    |   Bin 0 -> 2084 bytes
 .../dsymutil/Inputs/odr-member-functions/1.o  |   Bin 0 -> 2236 bytes
 .../dsymutil/Inputs/odr-member-functions/2.o  |   Bin 0 -> 2660 bytes
 .../dsymutil/Inputs/odr-member-functions/3.o  |   Bin 0 -> 2832 bytes
 test/tools/dsymutil/Inputs/odr-uniquing/1.o   |   Bin 0 -> 2544 bytes
 test/tools/dsymutil/Inputs/odr-uniquing/2.o   |   Bin 0 -> 2544 bytes
 test/tools/dsymutil/Inputs/submodules/1.o     |   Bin 0 -> 2232 bytes
 .../dsymutil/Inputs/submodules/Parent.pcm     |   Bin 0 -> 25260 bytes
 .../dsymutil/X86/basic-linking-bundle.test    |    38 +
 .../tools/dsymutil/X86/basic-linking-x86.test |    13 +-
 .../X86/basic-lto-dw4-linking-x86.test        |     3 +-
 .../dsymutil/X86/basic-lto-linking-x86.test   |     7 +-
 .../tools/dsymutil/X86/custom-line-table.test |    40 +
 test/tools/dsymutil/X86/dead-stripped.cpp     |    48 +
 test/tools/dsymutil/X86/dsym-companion.test   |   339 +
 test/tools/dsymutil/X86/dummy-debug-map.map   |    22 +
 test/tools/dsymutil/X86/empty_range.s         |    61 +
 .../dsymutil/X86/fat-archive-input-i386.test  |    16 +
 .../dsymutil/X86/fat-object-input-x86_64.test |    16 +
 .../X86/fat-object-input-x86_64h.test         |    16 +
 test/tools/dsymutil/X86/frame-1.test          |     4 +-
 test/tools/dsymutil/X86/frame-2.test          |     4 +-
 test/tools/dsymutil/X86/lit.local.cfg         |     2 +
 test/tools/dsymutil/X86/mismatch.m            |    23 +
 test/tools/dsymutil/X86/modules.m             |   117 +
 test/tools/dsymutil/X86/multiple-inputs.test  |    31 +
 .../tools/dsymutil/X86/odr-anon-namespace.cpp |    65 +
 .../dsymutil/X86/odr-member-functions.cpp     |   109 +
 test/tools/dsymutil/X86/odr-uniquing.cpp      |   187 +
 test/tools/dsymutil/X86/submodules.m          |    52 +
 test/tools/dsymutil/absolute_symbol.test      |    16 +
 test/tools/dsymutil/arch-option.test          |    39 +
 test/tools/dsymutil/archive-timestamp.test    |    24 +
 test/tools/dsymutil/basic-linking.test        |     7 +-
 test/tools/dsymutil/debug-map-parsing.test    |    22 +-
 test/tools/dsymutil/dump-symtab.test          |    44 +
 test/tools/dsymutil/fat-binary-output.test    |    32 +
 .../dsymutil/yaml-object-address-rewrite.test |    10 +-
 test/tools/gold/Inputs/linkonce-weak.ll       |     3 -
 test/tools/gold/{ => PowerPC}/lit.local.cfg   |     1 -
 test/tools/gold/{ => PowerPC}/mtriple.ll      |     0
 test/tools/gold/{ => X86}/Inputs/alias-1.ll   |     0
 .../gold/X86/Inputs/available-externally.ll   |     3 +
 test/tools/gold/{ => X86}/Inputs/bcsection.s  |     0
 test/tools/gold/{ => X86}/Inputs/comdat.ll    |    10 +-
 test/tools/gold/X86/Inputs/comdat2.ll         |     9 +
 test/tools/gold/{ => X86}/Inputs/common.ll    |     0
 test/tools/gold/X86/Inputs/ctors2.ll          |     5 +
 .../tools/gold/{ => X86}/Inputs/drop-debug.bc |   Bin
 test/tools/gold/X86/Inputs/drop-linkage.ll    |     9 +
 test/tools/gold/{ => X86}/Inputs/invalid.bc   |   Bin
 .../{ => X86}/Inputs/linker-script.export     |     0
 test/tools/gold/X86/Inputs/linkonce-weak.ll   |    19 +
 test/tools/gold/{ => X86}/Inputs/pr19901-1.ll |     0
 .../tools/gold/X86/Inputs/resolve-to-alias.ll |     4 +
 test/tools/gold/X86/Inputs/thinlto.ll         |     4 +
 test/tools/gold/X86/Inputs/type-merge.ll      |     5 +
 test/tools/gold/X86/Inputs/type-merge2.ll     |     5 +
 test/tools/gold/{ => X86}/Inputs/weak.ll      |     0
 test/tools/gold/{ => X86}/alias.ll            |     2 +-
 test/tools/gold/X86/alias2.ll                 |    23 +
 .../available-externally.ll}                  |    16 +-
 test/tools/gold/{ => X86}/bad-alias.ll        |     4 +-
 test/tools/gold/X86/bcsection.ll              |    13 +
 test/tools/gold/{ => X86}/coff.ll             |     0
 test/tools/gold/X86/comdat.ll                 |    65 +
 test/tools/gold/X86/comdat2.ll                |    19 +
 test/tools/gold/{ => X86}/common.ll           |     0
 test/tools/gold/X86/ctors.ll                  |    13 +
 test/tools/gold/X86/ctors2.ll                 |    14 +
 test/tools/gold/X86/disable-verify.ll         |    25 +
 test/tools/gold/{ => X86}/drop-debug.ll       |     0
 test/tools/gold/X86/drop-linkage.ll           |    14 +
 test/tools/gold/{ => X86}/emit-llvm.ll        |    25 +-
 test/tools/gold/{ => X86}/invalid.ll          |     0
 test/tools/gold/{ => X86}/linker-script.ll    |     0
 test/tools/gold/X86/linkonce-weak.ll          |    39 +
 test/tools/gold/X86/lit.local.cfg             |     3 +
 .../tools/gold/{ => X86}/no-map-whole-file.ll |     0
 test/tools/gold/{ => X86}/opt-level.ll        |     0
 test/tools/gold/X86/parallel.ll               |    22 +
 test/tools/gold/{ => X86}/pr19901.ll          |     0
 test/tools/gold/X86/pr25907.ll                |    28 +
 test/tools/gold/X86/pr25915.ll                |    17 +
 test/tools/gold/{ => X86}/remarks.ll          |     9 +-
 test/tools/gold/X86/resolve-to-alias.ll       |    33 +
 test/tools/gold/{ => X86}/slp-vectorize.ll    |     0
 test/tools/gold/{ => X86}/stats.ll            |     0
 test/tools/gold/X86/thinlto.ll                |    34 +
 test/tools/gold/X86/type-merge.ll             |    24 +
 test/tools/gold/X86/type-merge2.ll            |    26 +
 test/tools/gold/X86/unnamed-addr.ll           |    14 +
 test/tools/gold/{ => X86}/vectorize.ll        |     0
 test/tools/gold/{ => X86}/weak.ll             |     0
 test/tools/gold/bcsection.ll                  |    11 -
 test/tools/gold/comdat.ll                     |    65 -
 test/tools/llvm-cxxdump/trivial.test          |     3 +
 .../llvm-dwp/Inputs/simple/notypes/a.dwo      |   Bin 0 -> 1193 bytes
 .../llvm-dwp/Inputs/simple/notypes/b.dwo      |   Bin 0 -> 1241 bytes
 test/tools/llvm-dwp/Inputs/simple/types/a.dwo |   Bin 0 -> 1369 bytes
 test/tools/llvm-dwp/Inputs/simple/types/b.dwo |   Bin 0 -> 1409 bytes
 test/tools/llvm-dwp/Inputs/type_dedup/a.dwo   |   Bin 0 -> 1449 bytes
 test/tools/llvm-dwp/Inputs/type_dedup/b.dwo   |   Bin 0 -> 1449 bytes
 test/tools/llvm-dwp/X86/lit.local.cfg         |     4 +
 test/tools/llvm-dwp/X86/simple.test           |    98 +
 test/tools/llvm-dwp/X86/type_dedup.test       |    35 +
 test/tools/llvm-lto/Inputs/thinlto.ll         |     4 +
 test/tools/llvm-lto/thinlto.ll                |    24 +
 test/tools/llvm-mc/basic.test                 |     3 +
 test/tools/llvm-mc/fatal_warnings.test        |     4 +
 test/tools/llvm-mc/line_end_with_space.test   |     1 -
 test/tools/llvm-mc/lit.local.cfg              |     4 +
 test/tools/llvm-mc/no_warnings.test           |     4 +
 test/tools/llvm-nm/X86/IRobj.test             |    11 +
 .../llvm-nm/X86/Inputs/hello.obj.macho-x86_64 |   Bin 0 -> 844 bytes
 .../llvm-nm/X86/Inputs/test.IRobj-x86_64      |   Bin 0 -> 1168 bytes
 test/tools/llvm-nm/X86/externalonly.test      |     4 +
 test/tools/llvm-nm/X86/groupingflags.test     |     5 +
 test/tools/llvm-nm/X86/posixMachO.test        |     7 +
 test/tools/llvm-nm/lit.local.cfg              |     2 +
 .../AArch64/elf-aarch64-mapping-symbols.test  |    30 +
 .../llvm-objdump/Inputs/eh_frame.macho-arm64  |   Bin 0 -> 888 bytes
 test/tools/llvm-objdump/Inputs/libbogus1.a    |    13 +
 test/tools/llvm-objdump/Inputs/libbogus2.a    |    13 +
 test/tools/llvm-objdump/Inputs/libbogus3.a    |    16 +
 .../llvm-objdump/Inputs/section-filter.obj    |   Bin 0 -> 441 bytes
 .../X86/Inputs/disassemble-data.obj           |   Bin 0 -> 254 bytes
 .../X86/Inputs/disassemble.dll.coff-i386      |   Bin 0 -> 1536 bytes
 .../X86/Inputs/internal.exe.coff-x86_64       |   Bin 0 -> 6144 bytes
 .../X86/Inputs/malformed-machos/00000031.a    |   Bin 0 -> 2768 bytes
 .../malformed-machos/mem-crup-0001.macho      |   Bin 0 -> 9248 bytes
 .../malformed-machos/mem-crup-0006.macho      |   Bin 0 -> 9248 bytes
 .../malformed-machos/mem-crup-0010.macho      |   Bin 0 -> 9248 bytes
 .../malformed-machos/mem-crup-0040.macho      |   Bin 0 -> 9248 bytes
 .../malformed-machos/mem-crup-0080.macho      |   Bin 0 -> 9166 bytes
 .../malformed-machos/mem-crup-0261.macho      |   Bin 0 -> 8752 bytes
 .../malformed-machos/mem-crup-0337.macho      |   Bin 0 -> 9248 bytes
 .../llvm-objdump/X86/coff-dis-internal.test   |     3 +
 .../X86/coff-disassemble-export.test          |     8 +
 .../llvm-objdump/X86/disassemble-data.test    |     4 +
 .../llvm-objdump/X86/macho-symbol-table.test  |     6 +-
 .../llvm-objdump/X86/malformed-machos.test    |    41 +
 test/tools/llvm-objdump/eh_frame-arm64.test   |    23 +
 .../llvm-objdump/malformed-archives.test      |    20 +
 test/tools/llvm-objdump/section-filter.test   |     7 +
 test/tools/llvm-pdbdump/regex-filter.test     |    20 +
 .../tools/llvm-profdata/Inputs/basic.proftext |    19 +
 .../llvm-profdata/Inputs/c-general.profraw    |   Bin 1384 -> 1776 bytes
 .../llvm-profdata/Inputs/compat.profdata.v2   |   Bin 0 -> 712 bytes
 .../Inputs/gcc-sample-profile.gcov            |   Bin 0 -> 1960 bytes
 .../llvm-profdata/Inputs/inline-samples.afdo  |    20 +
 .../Inputs/overflow-instr.proftext            |     6 +
 .../Inputs/overflow-sample.proftext           |     7 +
 .../Inputs/sample-profile.proftext            |    18 +-
 .../Inputs/text-format-errors.text.bin        |     1 +
 .../llvm-profdata/Inputs/vp-malform.proftext  |    42 +
 .../llvm-profdata/Inputs/vp-malform2.proftext |    32 +
 .../llvm-profdata/Inputs/vp-truncate.proftext |    36 +
 .../Inputs/weight-instr-bar.profdata          |   Bin 0 -> 1320 bytes
 .../Inputs/weight-instr-foo.profdata          |   Bin 0 -> 1320 bytes
 .../Inputs/weight-sample-bar.proftext         |     8 +
 .../Inputs/weight-sample-foo.proftext         |     8 +
 test/tools/llvm-profdata/c-general.test       |     6 +-
 test/tools/llvm-profdata/compat.proftext      |    20 +
 .../llvm-profdata/count-mismatch.proftext     |     3 +-
 .../gcc-gcov-sample-profile.test              |    29 +
 test/tools/llvm-profdata/inline-samples.test  |    30 +
 test/tools/llvm-profdata/overflow-instr.test  |    17 +
 test/tools/llvm-profdata/overflow-sample.test |    43 +
 test/tools/llvm-profdata/overflow.proftext    |    12 -
 test/tools/llvm-profdata/raw-32-bits-be.test  |    13 +-
 test/tools/llvm-profdata/raw-32-bits-le.test  |    13 +-
 test/tools/llvm-profdata/raw-64-bits-be.test  |    13 +-
 test/tools/llvm-profdata/raw-64-bits-le.test  |    13 +-
 .../raw-magic-but-no-header.test              |     2 +-
 .../tools/llvm-profdata/raw-two-profiles.test |    31 +-
 .../llvm-profdata/sample-profile-basic.test   |    10 +-
 test/tools/llvm-profdata/text-dump.test       |    21 +
 .../llvm-profdata/text-format-errors.test     |    29 +-
 test/tools/llvm-profdata/value-prof.proftext  |    57 +
 test/tools/llvm-profdata/weight-instr.test    |    69 +
 test/tools/llvm-profdata/weight-sample.test   |    56 +
 test/tools/llvm-readobj/ARM/attribute-4.s     |     7 +
 .../llvm-readobj/Inputs/gnuhash.so.elf-i386   |   Bin 0 -> 1080 bytes
 .../llvm-readobj/Inputs/gnuhash.so.elf-ppc    |   Bin 0 -> 1204 bytes
 .../llvm-readobj/Inputs/gnuhash.so.elf-ppc64  |   Bin 0 -> 1784 bytes
 .../llvm-readobj/Inputs/gnuhash.so.elf-x86_64 |   Bin 0 -> 1616 bytes
 .../Inputs/mips-rld-map-rel.elf-mipsel        |   Bin 0 -> 2484 bytes
 .../Inputs/trivial.elf-amdhsa-kaveri          |   Bin 0 -> 13208 bytes
 .../llvm-readobj/Inputs/verdef.elf-x86-64     |   Bin 0 -> 2256 bytes
 test/tools/llvm-readobj/amdgpu-elf-defs.test  |    28 +
 test/tools/llvm-readobj/basic.test            |     2 +
 .../llvm-readobj/codeview-linetables.test     |    36 +-
 test/tools/llvm-readobj/elf-gnuhash.test      |    63 +
 test/tools/llvm-readobj/elf-versioninfo.test  |    81 +
 test/tools/llvm-readobj/file-headers.test     |    18 +-
 test/tools/llvm-readobj/mips-rld-map-rel.test |    24 +
 test/tools/llvm-readobj/sections-ext.test     |     7 +
 test/tools/llvm-readobj/sections.test         |     7 +
 test/tools/llvm-size/basic.test               |     2 +
 test/tools/llvm-split/alias.ll                |    19 +
 test/tools/llvm-split/comdat.ll               |    19 +
 test/tools/llvm-split/function.ll             |    17 +
 test/tools/llvm-split/global.ll               |    11 +
 test/tools/llvm-split/internal.ll             |    17 +
 test/tools/llvm-split/unnamed.ll              |    31 +
 test/tools/llvm-symbolizer/Inputs/addr.exe    |   Bin 0 -> 10109 bytes
 test/tools/llvm-symbolizer/Inputs/addr.inp    |     1 +
 .../llvm-symbolizer/Inputs/coff-dwarf.cpp     |    19 +
 .../llvm-symbolizer/Inputs/coff-dwarf.exe     |   Bin 0 -> 18944 bytes
 .../llvm-symbolizer/Inputs/coff-exports.cpp   |    20 +
 .../llvm-symbolizer/Inputs/coff-exports.exe   |   Bin 0 -> 8192 bytes
 test/tools/llvm-symbolizer/coff-dwarf.test    |    16 +
 test/tools/llvm-symbolizer/coff-exports.test  |    20 +
 .../tools/llvm-symbolizer/pdb/Inputs/test.cpp |     7 +
 .../tools/llvm-symbolizer/pdb/Inputs/test.exe |   Bin 165888 -> 126464 bytes
 .../llvm-symbolizer/pdb/Inputs/test.exe.input |     4 -
 .../tools/llvm-symbolizer/pdb/Inputs/test.pdb |   Bin 1552384 -> 1626112 bytes
 test/tools/llvm-symbolizer/pdb/pdb.test       |    63 +-
 test/tools/llvm-symbolizer/sym.test           |    30 +
 test/tools/lto/opt-level.ll                   |     5 +-
 test/tools/sancov/Inputs/blacklist.txt        |     1 +
 test/tools/sancov/Inputs/foo.cpp              |     5 +
 test/tools/sancov/Inputs/test-linux_x86_64    |   Bin 0 -> 2355767 bytes
 .../sancov/Inputs/test-linux_x86_64-1.sancov  |   Bin 0 -> 80 bytes
 .../sancov/Inputs/test-linux_x86_64.sancov    |   Bin 0 -> 64 bytes
 test/tools/sancov/Inputs/test.cpp             |    19 +
 test/tools/sancov/blacklist.test              |     5 +
 test/tools/sancov/covered_functions.test      |    13 +
 test/tools/sancov/not_covered_functions.test  |     8 +
 test/tools/sancov/print.test                  |    11 +
 tools/CMakeLists.txt                          |    95 +-
 tools/LLVMBuild.txt                           |     3 +-
 tools/Makefile                                |     5 +-
 tools/bugpoint-passes/TestPasses.cpp          |    53 +-
 tools/bugpoint/BugDriver.cpp                  |     6 +-
 tools/bugpoint/BugDriver.h                    |    19 +-
 tools/bugpoint/CMakeLists.txt                 |     1 -
 tools/bugpoint/CrashDebugger.cpp              |   265 +-
 tools/bugpoint/ExecutionDriver.cpp            |    41 +-
 tools/bugpoint/ExtractFunction.cpp            |    71 +-
 tools/bugpoint/ListReducer.h                  |    12 +-
 tools/bugpoint/Miscompilation.cpp             |   277 +-
 tools/bugpoint/ToolRunner.cpp                 |   217 +-
 tools/bugpoint/ToolRunner.h                   |    50 +-
 tools/bugpoint/bugpoint.cpp                   |    10 +-
 tools/dsymutil/BinaryHolder.cpp               |   172 +-
 tools/dsymutil/BinaryHolder.h                 |    98 +-
 tools/dsymutil/CMakeLists.txt                 |     1 +
 tools/dsymutil/DebugMap.cpp                   |    38 +-
 tools/dsymutil/DebugMap.h                     |    19 +-
 tools/dsymutil/DwarfLinker.cpp                |  1700 +-
 tools/dsymutil/MachODebugMapParser.cpp        |   323 +-
 tools/dsymutil/MachOUtils.cpp                 |   521 +
 tools/dsymutil/MachOUtils.h                   |    39 +
 tools/dsymutil/NonRelocatableStringpool.h     |    68 +
 tools/dsymutil/dsymutil.cpp                   |   297 +-
 tools/dsymutil/dsymutil.h                     |    25 +-
 tools/gold/gold-plugin.cpp                    |   493 +-
 tools/llc/CMakeLists.txt                      |     1 +
 tools/llc/LLVMBuild.txt                       |     2 +-
 tools/llc/Makefile                            |     2 +-
 tools/llc/llc.cpp                             |    55 +-
 tools/lli/OrcLazyJIT.cpp                      |    62 +-
 tools/lli/OrcLazyJIT.h                        |    54 +-
 tools/lli/RemoteTarget.cpp                    |     4 +-
 tools/lli/lli.cpp                             |     5 +-
 tools/llvm-ar/CMakeLists.txt                  |    29 +-
 tools/llvm-ar/install_symlink.cmake           |    31 -
 tools/llvm-ar/llvm-ar.cpp                     |   104 +-
 tools/llvm-as-fuzzer/CMakeLists.txt           |    13 +
 tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp       |    76 +
 tools/llvm-as/llvm-as.cpp                     |     7 +-
 tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp     |    67 +-
 tools/llvm-c-test/llvm-c-test.h               |     4 +-
 tools/llvm-c-test/main.c                      |    18 +-
 tools/llvm-c-test/module.c                    |    38 +-
 tools/llvm-c-test/object.c                    |     1 +
 tools/llvm-config/BuildVariables.inc.in       |     5 +
 tools/llvm-config/CMakeLists.txt              |     8 +
 tools/llvm-config/Makefile                    |    18 +
 tools/llvm-config/llvm-config.cpp             |   285 +-
 tools/llvm-cov/CoverageReport.cpp             |    31 +-
 tools/llvm-cov/CoverageViewOptions.h          |     1 +
 tools/llvm-cov/gcov.cpp                       |     6 +-
 tools/llvm-cxxdump/llvm-cxxdump.cpp           |   104 +-
 tools/llvm-diff/DiffLog.cpp                   |     3 +-
 tools/llvm-diff/DiffLog.h                     |    10 +-
 tools/llvm-diff/DifferenceEngine.cpp          |     4 +-
 tools/llvm-dis/llvm-dis.cpp                   |     2 +-
 tools/llvm-dwarfdump/llvm-dwarfdump.cpp       |   107 +-
 tools/llvm-dwp/CMakeLists.txt                 |    13 +
 tools/llvm-dwp/LLVMBuild.txt                  |    23 +
 tools/llvm-dwp/Makefile                       |    18 +
 tools/llvm-dwp/llvm-dwp.cpp                   |   420 +
 tools/llvm-extract/llvm-extract.cpp           |    53 +-
 tools/llvm-go/llvm-go.go                      |    91 +-
 tools/llvm-link/CMakeLists.txt                |     1 +
 tools/llvm-link/LLVMBuild.txt                 |     2 +-
 tools/llvm-link/Makefile                      |     2 +-
 tools/llvm-link/llvm-link.cpp                 |   194 +-
 tools/llvm-lto/CMakeLists.txt                 |     3 +
 tools/llvm-lto/LLVMBuild.txt                  |     2 +-
 tools/llvm-lto/Makefile                       |     2 -
 tools/llvm-lto/llvm-lto.cpp                   |   269 +-
 tools/llvm-mc-fuzzer/CMakeLists.txt           |    18 +
 tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp       |   138 +
 tools/llvm-mc/llvm-mc.cpp                     |    14 +-
 tools/llvm-nm/CMakeLists.txt                  |     4 +-
 tools/llvm-nm/llvm-nm.cpp                     |   258 +-
 tools/llvm-objdump/CMakeLists.txt             |     5 +-
 tools/llvm-objdump/COFFDump.cpp               |    85 +-
 tools/llvm-objdump/ELFDump.cpp                |    32 +-
 tools/llvm-objdump/MachODump.cpp              |   802 +-
 tools/llvm-objdump/llvm-objdump.cpp           |   564 +-
 tools/llvm-objdump/llvm-objdump.h             |     8 +-
 tools/llvm-pdbdump/BuiltinDumper.cpp          |    66 +-
 tools/llvm-pdbdump/BuiltinDumper.h            |     2 +
 tools/llvm-pdbdump/LinePrinter.cpp            |   104 +-
 tools/llvm-pdbdump/LinePrinter.h              |    13 +-
 tools/llvm-pdbdump/llvm-pdbdump.cpp           |   299 +-
 tools/llvm-pdbdump/llvm-pdbdump.h             |     3 +
 tools/llvm-profdata/llvm-profdata.cpp         |   269 +-
 tools/llvm-readobj/ARMAttributeParser.cpp     |    72 +-
 tools/llvm-readobj/ARMEHABIPrinter.h          |    60 +-
 tools/llvm-readobj/ARMWinEHPrinter.cpp        |     5 +-
 tools/llvm-readobj/CMakeLists.txt             |     1 +
 tools/llvm-readobj/COFFDumper.cpp             |   120 +-
 tools/llvm-readobj/COFFImportDumper.cpp       |    52 +
 tools/llvm-readobj/ELFDumper.cpp              |   903 +-
 tools/llvm-readobj/MachODumper.cpp            |   240 +-
 tools/llvm-readobj/ObjDumper.h                |    19 +-
 tools/llvm-readobj/StreamWriter.h             |    32 +-
 tools/llvm-readobj/Win64EHDumper.cpp          |    21 +-
 tools/llvm-readobj/llvm-readobj.cpp           |   147 +-
 tools/llvm-readobj/llvm-readobj.h             |     4 +-
 tools/llvm-rtdyld/llvm-rtdyld.cpp             |   235 +-
 tools/llvm-shlib/CMakeLists.txt               |   144 +-
 tools/llvm-shlib/Makefile                     |     2 +-
 tools/llvm-size/llvm-size.cpp                 |    44 +-
 tools/llvm-split/CMakeLists.txt               |    11 +
 .../{macho-dump => llvm-split}/LLVMBuild.txt  |     6 +-
 tools/{macho-dump => llvm-split}/Makefile     |    10 +-
 tools/llvm-split/llvm-split.cpp               |    67 +
 tools/llvm-stress/CMakeLists.txt              |     2 +-
 tools/llvm-stress/llvm-stress.cpp             |     5 +-
 tools/llvm-symbolizer/CMakeLists.txt          |     2 +-
 tools/llvm-symbolizer/LLVMSymbolize.cpp       |   532 -
 tools/llvm-symbolizer/LLVMSymbolize.h         |   144 -
 tools/llvm-symbolizer/Makefile                |     2 +-
 tools/llvm-symbolizer/llvm-symbolizer.cpp     |    50 +-
 tools/lto/CMakeLists.txt                      |    10 +
 tools/lto/lto.cpp                             |   133 +-
 tools/macho-dump/CMakeLists.txt               |     8 -
 tools/macho-dump/macho-dump.cpp               |   434 -
 tools/obj2yaml/elf2yaml.cpp                   |    79 +-
 tools/obj2yaml/obj2yaml.cpp                   |     3 -
 tools/opt/CMakeLists.txt                      |     1 -
 tools/opt/opt.cpp                             |    56 +-
 tools/sancov/CMakeLists.txt                   |    17 +
 tools/sancov/Makefile                         |    18 +
 tools/sancov/sancov.cc                        |   533 +
 .../verify-uselistorder.cpp                   |     1 +
 tools/xcode-toolchain/CMakeLists.txt          |    72 +
 tools/yaml2obj/yaml2elf.cpp                   |    16 +-
 unittests/ADT/APFloatTest.cpp                 |    16 +-
 unittests/ADT/APIntTest.cpp                   |     7 +
 unittests/ADT/ArrayRefTest.cpp                |    31 +-
 unittests/ADT/BitVectorTest.cpp               |     4 +-
 unittests/ADT/CMakeLists.txt                  |     1 +
 unittests/ADT/DenseMapTest.cpp                |    16 +
 unittests/ADT/FoldingSet.cpp                  |     4 +-
 unittests/ADT/OptionalTest.cpp                |    13 +
 unittests/ADT/PackedVectorTest.cpp            |    12 -
 unittests/ADT/PointerIntPairTest.cpp          |    25 +-
 unittests/ADT/RangeAdapterTest.cpp            |    83 +
 unittests/ADT/SmallStringTest.cpp             |    11 +
 unittests/ADT/SparseBitVectorTest.cpp         |    94 +
 unittests/ADT/StringRefTest.cpp               |    53 +
 unittests/ADT/TripleTest.cpp                  |   115 +-
 unittests/ADT/ilistTest.cpp                   |    14 +-
 unittests/Analysis/AliasAnalysisTest.cpp      |   252 +-
 unittests/Analysis/CFGTest.cpp                |     2 +-
 unittests/Analysis/CMakeLists.txt             |     2 +-
 unittests/Analysis/Makefile                   |     2 +-
 unittests/Analysis/MixedTBAATest.cpp          |     3 +-
 unittests/Analysis/ScalarEvolutionTest.cpp    |    35 +-
 unittests/Analysis/ValueTrackingTest.cpp      |   189 +
 unittests/AsmParser/AsmParserTest.cpp         |    88 +
 unittests/Bitcode/BitReaderTest.cpp           |    75 +-
 .../ExecutionEngine/MCJIT/MCJITCAPITest.cpp   |     6 +-
 .../MCJIT/MCJITMultipleModuleTest.cpp         |    10 +-
 .../MCJIT/MCJITObjectCacheTest.cpp            |     7 +-
 unittests/ExecutionEngine/MCJIT/MCJITTest.cpp |    12 +-
 .../MCJIT/MCJITTestAPICommon.h                |     6 +
 .../ExecutionEngine/MCJIT/MCJITTestBase.h     |    26 +-
 unittests/ExecutionEngine/Orc/CMakeLists.txt  |     9 +
 .../Orc/CompileOnDemandLayerTest.cpp          |    75 +
 .../Orc/GlobalMappingLayerTest.cpp            |    55 +
 .../Orc/IndirectionUtilsTest.cpp              |     2 +-
 .../Orc/ObjectLinkingLayerTest.cpp            |    94 +
 .../Orc/ObjectTransformLayerTest.cpp          |    27 -
 unittests/ExecutionEngine/Orc/OrcCAPITest.cpp |   160 +
 .../ExecutionEngine/Orc/OrcTestCommon.cpp     |     2 +
 unittests/ExecutionEngine/Orc/OrcTestCommon.h |   171 +-
 unittests/IR/CMakeLists.txt                   |     1 -
 unittests/IR/ConstantRangeTest.cpp            |    52 +
 unittests/IR/ConstantsTest.cpp                |    73 +-
 unittests/IR/DominatorTreeTest.cpp            |    28 +-
 unittests/IR/IRBuilderTest.cpp                |    38 +-
 unittests/IR/LegacyPassManagerTest.cpp        |     2 +-
 unittests/IR/Makefile                         |     2 +-
 unittests/IR/MetadataTest.cpp                 |   323 +-
 unittests/IR/TypesTest.cpp                    |     8 +
 unittests/IR/UserTest.cpp                     |    24 +
 unittests/IR/ValueHandleTest.cpp              |    12 +-
 unittests/IR/ValueTest.cpp                    |    70 +-
 unittests/IR/VerifierTest.cpp                 |    47 +
 unittests/Linker/LinkModulesTest.cpp          |   127 +-
 unittests/MC/StringTableBuilderTest.cpp       |     8 +-
 unittests/Option/OptionParsingTest.cpp        |     2 +-
 unittests/ProfileData/CMakeLists.txt          |     1 +
 unittests/ProfileData/CoverageMappingTest.cpp |    12 +-
 unittests/ProfileData/InstrProfTest.cpp       |   496 +-
 unittests/ProfileData/SampleProfTest.cpp      |   132 +
 unittests/Support/AlignOfTest.cpp             |    20 +
 unittests/Support/BlockFrequencyTest.cpp      |    18 +-
 unittests/Support/BranchProbabilityTest.cpp   |   236 +-
 unittests/Support/CMakeLists.txt              |     4 +
 unittests/Support/CommandLineTest.cpp         |    14 +-
 unittests/Support/EndianTest.cpp              |   132 +
 unittests/Support/FileOutputBufferTest.cpp    |    25 +-
 unittests/Support/MathExtrasTest.cpp          |   119 +
 unittests/Support/MemoryBufferTest.cpp        |    11 +-
 unittests/Support/Path.cpp                    |   206 +-
 unittests/Support/ProgramTest.cpp             |   108 +-
 unittests/Support/ReplaceFileTest.cpp         |   113 +
 unittests/Support/ThreadPool.cpp              |   168 +
 unittests/Support/TimerTest.cpp               |    65 +
 unittests/Support/TrailingObjectsTest.cpp     |   195 +
 unittests/Transforms/Utils/Cloning.cpp        |    43 +-
 .../Transforms/Utils/IntegerDivision.cpp      |    32 +-
 .../Transforms/Utils/ValueMapperTest.cpp      |    31 +
 utils/FileCheck/FileCheck.cpp                 |    44 +-
 utils/PerfectShuffle/PerfectShuffle.cpp       |     6 +-
 utils/TableGen/AsmMatcherEmitter.cpp          |   372 +-
 utils/TableGen/AsmWriterEmitter.cpp           |   109 +-
 utils/TableGen/Attributes.cpp                 |   156 +
 utils/TableGen/CMakeLists.txt                 |     1 +
 utils/TableGen/CallingConvEmitter.cpp         |     8 +-
 utils/TableGen/CodeGenDAGPatterns.cpp         |   613 +-
 utils/TableGen/CodeGenDAGPatterns.h           |    16 +-
 utils/TableGen/CodeGenInstruction.cpp         |     1 +
 utils/TableGen/CodeGenIntrinsics.h            |     5 +-
 utils/TableGen/CodeGenMapTable.cpp            |     2 +-
 utils/TableGen/CodeGenRegisters.cpp           |    31 +-
 utils/TableGen/CodeGenSchedule.cpp            |    91 +-
 utils/TableGen/CodeGenSchedule.h              |    49 +-
 utils/TableGen/CodeGenTarget.cpp              |    12 +-
 utils/TableGen/CodeGenTarget.h                |     2 +-
 utils/TableGen/DAGISelMatcherEmitter.cpp      |    36 +-
 utils/TableGen/DFAPacketizerEmitter.cpp       |   710 +-
 utils/TableGen/DisassemblerEmitter.cpp        |     5 +-
 utils/TableGen/FixedLenDecoderEmitter.cpp     |   189 +-
 utils/TableGen/InstrInfoEmitter.cpp           |    50 +-
 utils/TableGen/IntrinsicEmitter.cpp           |   136 +-
 utils/TableGen/OptParserEmitter.cpp           |    20 +-
 utils/TableGen/RegisterInfoEmitter.cpp        |    41 +-
 utils/TableGen/SubtargetEmitter.cpp           |    17 +-
 utils/TableGen/TableGen.cpp                   |     8 +-
 utils/TableGen/TableGenBackends.h             |     1 +
 utils/TableGen/X86RecognizableInstr.cpp       |    17 +-
 utils/emacs/llvm-mode.el                      |    97 +-
 utils/lit/TODO                                |    12 +-
 utils/lit/lit/LitConfig.py                    |    36 +-
 utils/lit/lit/Test.py                         |     4 +-
 utils/lit/lit/TestRunner.py                   |   308 +-
 utils/lit/lit/TestingConfig.py                |     2 +-
 utils/lit/lit/formats/googletest.py           |    11 +-
 utils/lit/lit/main.py                         |    72 +-
 utils/lit/lit/util.py                         |   100 +-
 .../googletest-timeout/DummySubDir/OneTest    |    35 +
 .../tests/Inputs/googletest-timeout/lit.cfg   |     9 +
 .../Inputs/shtest-timeout/infinite_loop.py    |    10 +
 utils/lit/tests/Inputs/shtest-timeout/lit.cfg |    32 +
 .../Inputs/shtest-timeout/quick_then_slow.py  |    24 +
 .../lit/tests/Inputs/shtest-timeout/short.py  |     6 +
 utils/lit/tests/Inputs/shtest-timeout/slow.py |     9 +
 utils/lit/tests/discovery.py                  |     8 +-
 utils/lit/tests/googletest-timeout.py         |    29 +
 utils/lit/tests/lit.cfg                       |     9 +
 utils/lit/tests/shtest-timeout.py             |   116 +
 utils/llvm-build/llvmbuild/main.py            |    98 +-
 utils/release/build_llvm_package.bat          |    93 +
 utils/release/merge.sh                        |    23 +-
 utils/release/test-release.sh                 |    77 +-
 utils/schedcover.py                           |    77 +
 utils/shuffle_fuzz.py                         |     4 +-
 .../googletest/src/gtest-internal-inl.h       |    58 +-
 utils/update_llc_test_checks.py               |     8 +-
 utils/vim/ftplugin/llvm.vim                   |     1 +
 utils/vim/syntax/llvm.vim                     |    21 +-
 6088 files changed, 431649 insertions(+), 132043 deletions(-)
 delete mode 100644 autoconf/m4/rand48.m4
 create mode 100644 cmake/dummy.cpp
 create mode 100644 cmake/modules/DetermineGCCCompatible.cmake
 create mode 100644 cmake/modules/LLVMExternalProjectUtils.cmake
 create mode 100644 cmake/modules/LLVMInstallSymlink.cmake
 create mode 100644 docs/CommandGuide/llvm-lib.rst
 create mode 100644 docs/CompileCudaWithLLVM.rst
 create mode 100644 docs/MIRLangRef.rst
 create mode 100644 docs/_ocamldoc/style.css
 create mode 100644 examples/Kaleidoscope/include/KaleidoscopeJIT.h
 create mode 100644 include/llvm-c/ErrorHandling.h
 create mode 100644 include/llvm-c/OrcBindings.h
 create mode 100644 include/llvm-c/Types.h
 create mode 100644 include/llvm/Analysis/BasicAliasAnalysis.h
 create mode 100644 include/llvm/Analysis/CFLAliasAnalysis.h
 create mode 100644 include/llvm/Analysis/DemandedBits.h
 create mode 100644 include/llvm/Analysis/DivergenceAnalysis.h
 create mode 100644 include/llvm/Analysis/EHPersonalities.h
 create mode 100644 include/llvm/Analysis/GlobalsModRef.h
 delete mode 100644 include/llvm/Analysis/LibCallAliasAnalysis.h
 delete mode 100644 include/llvm/Analysis/LibCallSemantics.h
 create mode 100644 include/llvm/Analysis/ObjCARCAliasAnalysis.h
 create mode 100644 include/llvm/Analysis/ObjCARCAnalysisUtils.h
 rename lib/Transforms/ObjCARC/ARCInstKind.h => include/llvm/Analysis/ObjCARCInstKind.h (94%)
 create mode 100644 include/llvm/Analysis/OrderedBasicBlock.h
 create mode 100644 include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
 create mode 100644 include/llvm/Analysis/ScopedNoAliasAA.h
 create mode 100644 include/llvm/Analysis/TypeBasedAliasAnalysis.h
 create mode 100644 include/llvm/CodeGen/AtomicExpandUtils.h
 create mode 100644 include/llvm/CodeGen/ParallelCG.h
 create mode 100644 include/llvm/DebugInfo/CodeView/CodeView.h
 create mode 100644 include/llvm/DebugInfo/CodeView/CodeViewOStream.h
 create mode 100644 include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
 create mode 100644 include/llvm/DebugInfo/CodeView/FunctionId.h
 create mode 100644 include/llvm/DebugInfo/CodeView/Line.h
 create mode 100644 include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
 create mode 100644 include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
 create mode 100644 include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeIndex.h
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeRecord.h
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
 create mode 100644 include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
 create mode 100644 include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
 create mode 100644 include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
 create mode 100644 include/llvm/DebugInfo/Symbolize/DIPrinter.h
 create mode 100644 include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
 create mode 100644 include/llvm/DebugInfo/Symbolize/Symbolize.h
 create mode 100644 include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
 create mode 100644 include/llvm/IR/Attributes.td
 create mode 100644 include/llvm/IR/FunctionInfo.h
 delete mode 100644 include/llvm/IR/MetadataTracking.h
 create mode 100644 include/llvm/Linker/IRMover.h
 create mode 100644 include/llvm/MC/MCFragment.h
 create mode 100644 include/llvm/Object/COFFImportFile.h
 create mode 100644 include/llvm/Object/FunctionIndexObjectFile.h
 create mode 100644 include/llvm/ProfileData/InstrProfData.inc
 create mode 100644 include/llvm/Support/ARMTargetParser.def
 create mode 100644 include/llvm/Support/ELFRelocs/AVR.def
 create mode 100644 include/llvm/Support/JamCRC.h
 delete mode 100644 include/llvm/Support/OutputBuffer.h
 create mode 100644 include/llvm/Support/Printable.h
 create mode 100644 include/llvm/Support/ThreadPool.h
 create mode 100644 include/llvm/Support/TrailingObjects.h
 create mode 100644 include/llvm/Support/thread.h
 create mode 100644 include/llvm/Transforms/IPO/ForceFunctionAttrs.h
 create mode 100644 include/llvm/Transforms/IPO/FunctionImport.h
 create mode 100644 include/llvm/Transforms/IPO/InferFunctionAttrs.h
 create mode 100644 include/llvm/Transforms/IPO/StripDeadPrototypes.h
 create mode 100644 include/llvm/Transforms/Scalar/ADCE.h
 create mode 100644 include/llvm/Transforms/Scalar/SROA.h
 create mode 100644 include/llvm/Transforms/Utils/SplitModule.h
 delete mode 100644 lib/Analysis/AliasAnalysisCounter.cpp
 delete mode 100644 lib/Analysis/AliasDebugger.cpp
 rename lib/Analysis/{IPA => }/CallGraph.cpp (91%)
 rename lib/Analysis/{IPA => }/CallGraphSCCPass.cpp (100%)
 rename lib/Analysis/{IPA => }/CallPrinter.cpp (100%)
 create mode 100644 lib/Analysis/DemandedBits.cpp
 create mode 100644 lib/Analysis/EHPersonalities.cpp
 create mode 100644 lib/Analysis/GlobalsModRef.cpp
 delete mode 100644 lib/Analysis/IPA/CMakeLists.txt
 delete mode 100644 lib/Analysis/IPA/GlobalsModRef.cpp
 delete mode 100644 lib/Analysis/IPA/IPA.cpp
 rename lib/Analysis/{IPA => }/InlineCost.cpp (95%)
 delete mode 100644 lib/Analysis/LibCallAliasAnalysis.cpp
 delete mode 100644 lib/Analysis/LibCallSemantics.cpp
 delete mode 100644 lib/Analysis/NoAliasAnalysis.cpp
 rename lib/{Transforms/ObjCARC => Analysis}/ObjCARCAliasAnalysis.cpp (53%)
 create mode 100644 lib/Analysis/ObjCARCAnalysisUtils.cpp
 rename lib/{Transforms/ObjCARC/ARCInstKind.cpp => Analysis/ObjCARCInstKind.cpp} (99%)
 create mode 100644 lib/Analysis/OrderedBasicBlock.cpp
 create mode 100644 lib/CodeGen/FuncletLayout.cpp
 create mode 100644 lib/CodeGen/LiveDebugValues.cpp
 create mode 100644 lib/CodeGen/ParallelCG.cpp
 create mode 100644 lib/DebugInfo/CodeView/CMakeLists.txt
 create mode 100644 lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
 rename lib/{Analysis/IPA => DebugInfo/CodeView}/LLVMBuild.txt (78%)
 create mode 100644 lib/DebugInfo/CodeView/Line.cpp
 create mode 100644 lib/DebugInfo/CodeView/ListRecordBuilder.cpp
 create mode 100644 lib/DebugInfo/CodeView/Makefile
 create mode 100644 lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
 create mode 100644 lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
 create mode 100644 lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
 create mode 100644 lib/DebugInfo/CodeView/TypeTableBuilder.cpp
 create mode 100644 lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
 create mode 100644 lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
 create mode 100644 lib/DebugInfo/Symbolize/CMakeLists.txt
 create mode 100644 lib/DebugInfo/Symbolize/DIPrinter.cpp
 create mode 100644 lib/DebugInfo/Symbolize/LLVMBuild.txt
 rename lib/{Analysis/IPA => DebugInfo/Symbolize}/Makefile (74%)
 create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
 create mode 100644 lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
 create mode 100644 lib/DebugInfo/Symbolize/Symbolize.cpp
 create mode 100644 lib/ExecutionEngine/Orc/OrcCBindings.cpp
 create mode 100644 lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
 create mode 100644 lib/ExecutionEngine/Orc/OrcCBindingsStack.h
 create mode 100644 lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
 create mode 100644 lib/Fuzzer/FuzzerDFSan.h
 create mode 100644 lib/Fuzzer/cxx.dict
 delete mode 100644 lib/Fuzzer/cxx_fuzzer_tokens.txt
 create mode 100644 lib/Fuzzer/test/CallerCalleeTest.cpp
 delete mode 100644 lib/Fuzzer/test/CxxTokensTest.cpp
 delete mode 100644 lib/Fuzzer/test/DFSanMemcmpTest.cpp
 delete mode 100644 lib/Fuzzer/test/InfiniteTest.cpp
 create mode 100644 lib/Fuzzer/test/MemcmpTest.cpp
 rename lib/Fuzzer/test/{DFSanSimpleCmpTest.cpp => SimpleCmpTest.cpp} (85%)
 create mode 100644 lib/Fuzzer/test/SimpleDictionaryTest.cpp
 create mode 100644 lib/Fuzzer/test/SimpleHashTest.cpp
 create mode 100644 lib/Fuzzer/test/StrcmpTest.cpp
 create mode 100644 lib/Fuzzer/test/StrncmpTest.cpp
 create mode 100644 lib/Fuzzer/test/SwitchTest.cpp
 create mode 100644 lib/Fuzzer/test/UninstrumentedTest.cpp
 create mode 100644 lib/Fuzzer/test/dict1.txt
 create mode 100644 lib/Fuzzer/test/fuzzer-dfsan.test
 create mode 100644 lib/Fuzzer/test/fuzzer-drill.test
 create mode 100644 lib/Fuzzer/test/fuzzer-timeout.test
 create mode 100644 lib/Fuzzer/test/fuzzer-traces.test
 create mode 100644 lib/Fuzzer/test/hi.txt
 create mode 100644 lib/Fuzzer/test/merge.test
 create mode 100644 lib/Fuzzer/test/trace-bb/CMakeLists.txt
 create mode 100644 lib/Fuzzer/test/uninstrumented/CMakeLists.txt
 create mode 100644 lib/IR/AttributesCompatFunc.td
 create mode 100644 lib/IR/FunctionInfo.cpp
 delete mode 100644 lib/IR/MetadataTracking.cpp
 create mode 100644 lib/Linker/IRMover.cpp
 create mode 100644 lib/Linker/LinkDiagnosticInfo.h
 create mode 100644 lib/MC/MCFragment.cpp
 create mode 100644 lib/Object/FunctionIndexObjectFile.cpp
 delete mode 100644 lib/ProfileData/InstrProfIndexed.h
 create mode 100644 lib/Support/JamCRC.cpp
 create mode 100644 lib/Support/ThreadPool.cpp
 delete mode 100644 lib/Target/AArch64/AArch64MachineCombinerPattern.h
 create mode 100644 lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
 create mode 100644 lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
 create mode 100644 lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
 create mode 100644 lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
 create mode 100644 lib/Target/AMDGPU/SIFrameLowering.cpp
 create mode 100644 lib/Target/AMDGPU/SIFrameLowering.h
 delete mode 100644 lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
 create mode 100644 lib/Target/AVR/AVR.td
 create mode 100644 lib/Target/AVR/AVRCallingConv.td
 create mode 100644 lib/Target/AVR/AVRConfig.h
 create mode 100644 lib/Target/AVR/AVRMachineFunctionInfo.h
 create mode 100644 lib/Target/AVR/AVRRegisterInfo.td
 create mode 100644 lib/Target/AVR/AVRTargetMachine.cpp
 create mode 100644 lib/Target/AVR/CMakeLists.txt
 create mode 100644 lib/Target/AVR/LLVMBuild.txt
 create mode 100644 lib/Target/AVR/Makefile
 create mode 100644 lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
 create mode 100644 lib/Target/AVR/TargetInfo/CMakeLists.txt
 create mode 100644 lib/Target/AVR/TargetInfo/LLVMBuild.txt
 create mode 100644 lib/Target/AVR/TargetInfo/Makefile
 create mode 100644 lib/Target/Hexagon/AsmParser/CMakeLists.txt
 create mode 100644 lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
 create mode 100644 lib/Target/Hexagon/AsmParser/LLVMBuild.txt
 create mode 100644 lib/Target/Hexagon/AsmParser/Makefile
 create mode 100644 lib/Target/Hexagon/HexagonBitSimplify.cpp
 create mode 100644 lib/Target/Hexagon/HexagonEarlyIfConv.cpp
 create mode 100644 lib/Target/Hexagon/HexagonGenMux.cpp
 create mode 100644 lib/Target/Hexagon/HexagonInstrAlias.td
 create mode 100644 lib/Target/Hexagon/HexagonInstrEnc.td
 create mode 100644 lib/Target/Hexagon/HexagonInstrFormatsV60.td
 create mode 100644 lib/Target/Hexagon/HexagonInstrInfoV60.td
 create mode 100644 lib/Target/Hexagon/HexagonIntrinsicsV60.td
 create mode 100644 lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
 delete mode 100644 lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
 create mode 100644 lib/Target/Hexagon/HexagonScheduleV55.td
 create mode 100644 lib/Target/Hexagon/HexagonScheduleV60.td
 create mode 100644 lib/Target/Hexagon/HexagonSplitDouble.cpp
 create mode 100644 lib/Target/Hexagon/HexagonStoreWidening.cpp
 create mode 100644 lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
 create mode 100644 lib/Target/Hexagon/HexagonTargetTransformInfo.h
 create mode 100644 lib/Target/Hexagon/HexagonVLIWPacketizer.h
 create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
 create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
 create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
 create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
 create mode 100644 lib/Target/Mips/MicroMips64r6InstrFormats.td
 create mode 100644 lib/Target/Mips/MicroMips64r6InstrInfo.td
 create mode 100644 lib/Target/Mips/MicroMipsDSPInstrFormats.td
 create mode 100644 lib/Target/Mips/MicroMipsDSPInstrInfo.td
 create mode 100644 lib/Target/Mips/MipsEVAInstrFormats.td
 create mode 100644 lib/Target/Mips/MipsEVAInstrInfo.td
 create mode 100644 lib/Target/Mips/MipsScheduleP5600.td
 create mode 100644 lib/Target/PowerPC/PPCBoolRetToInt.cpp
 create mode 100644 lib/Target/PowerPC/PPCMIPeephole.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
 create mode 100644 lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
 create mode 100644 lib/Target/WebAssembly/Relooper.cpp
 create mode 100644 lib/Target/WebAssembly/Relooper.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyFastISel.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyISD.def
 create mode 100644 lib/Target/WebAssembly/WebAssemblyInstrControl.td
 create mode 100644 lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyMCInstLower.h
 create mode 100644 lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyPEI.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyPeephole.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
 create mode 100644 lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
 create mode 100644 lib/Target/WebAssembly/known_gcc_test_failures.txt
 delete mode 100644 lib/Target/X86/X86CompilationCallback_Win64.asm
 create mode 100644 lib/Target/X86/X86OptimizeLEAs.cpp
 create mode 100644 lib/Transforms/IPO/CrossDSOCFI.cpp
 create mode 100644 lib/Transforms/IPO/ForceFunctionAttrs.cpp
 create mode 100644 lib/Transforms/IPO/FunctionImport.cpp
 create mode 100644 lib/Transforms/IPO/InferFunctionAttrs.cpp
 rename lib/Transforms/{Scalar => IPO}/SampleProfile.cpp (50%)
 create mode 100644 lib/Transforms/Instrumentation/CFGMST.h
 create mode 100644 lib/Transforms/Instrumentation/PGOInstrumentation.cpp
 delete mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
 create mode 100644 lib/Transforms/Scalar/LoopLoadElimination.cpp
 create mode 100644 lib/Transforms/Utils/SplitModule.cpp
 create mode 100644 test/Analysis/BasicAA/bug.23540.ll
 create mode 100644 test/Analysis/BasicAA/bug.23626.ll
 create mode 100644 test/Analysis/BasicAA/phi-loop.ll
 create mode 100644 test/Analysis/BasicAA/q.bad.ll
 create mode 100644 test/Analysis/BasicAA/sequential-gep.ll
 create mode 100644 test/Analysis/BasicAA/zext.ll
 create mode 100644 test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll
 create mode 100644 test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll
 create mode 100644 test/Analysis/CostModel/AMDGPU/br.ll
 create mode 100644 test/Analysis/CostModel/AMDGPU/extractelement.ll
 create mode 100644 test/Analysis/CostModel/AMDGPU/lit.local.cfg
 create mode 100644 test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
 create mode 100644 test/Analysis/CostModel/X86/sse-itoi.ll
 create mode 100644 test/Analysis/CostModel/X86/vector_gep.ll
 create mode 100644 test/Analysis/CostModel/X86/vshift-ashr-cost.ll
 delete mode 100644 test/Analysis/CostModel/X86/vshift-cost.ll
 create mode 100644 test/Analysis/CostModel/X86/vshift-lshr-cost.ll
 create mode 100644 test/Analysis/CostModel/X86/vshift-shl-cost.ll
 create mode 100644 test/Analysis/Delinearization/parameter_addrec_product.ll
 create mode 100644 test/Analysis/DemandedBits/basic.ll
 create mode 100644 test/Analysis/DependenceAnalysis/NonAffineExpr.ll
 create mode 100644 test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
 create mode 100644 test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
 create mode 100644 test/Analysis/GlobalsModRef/argmemonly-escape.ll
 create mode 100644 test/Analysis/GlobalsModRef/atomic-instrs.ll
 create mode 100644 test/Analysis/GlobalsModRef/memset-escape.ll
 create mode 100644 test/Analysis/GlobalsModRef/nocapture.ll
 create mode 100644 test/Analysis/GlobalsModRef/nonescaping-noalias.ll
 create mode 100644 test/Analysis/GlobalsModRef/pr25309.ll
 create mode 100644 test/Analysis/GlobalsModRef/weak-interposition.ll
 delete mode 100644 test/Analysis/Lint/cppeh-catch-intrinsics.ll
 create mode 100644 test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
 create mode 100644 test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
 create mode 100644 test/Analysis/LoopAccessAnalysis/nullptr.ll
 create mode 100644 test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
 create mode 100644 test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll
 create mode 100644 test/Analysis/ScalarEvolution/avoid-assume-hang.ll
 create mode 100644 test/Analysis/ScalarEvolution/constant_condition.ll
 create mode 100644 test/Analysis/ScalarEvolution/flags-from-poison.ll
 create mode 100644 test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
 create mode 100644 test/Analysis/ScalarEvolution/non-IV-phi.ll
 create mode 100644 test/Analysis/ScalarEvolution/pr24757.ll
 create mode 100644 test/Analysis/ScalarEvolution/pr25369.ll
 create mode 100644 test/Analysis/ScalarEvolution/shift-op.ll
 create mode 100644 test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
 create mode 100644 test/Analysis/ValueTracking/known-bits-from-range-md.ll
 create mode 100644 test/Analysis/ValueTracking/known-non-equal.ll
 create mode 100644 test/Analysis/ValueTracking/knownnonzero-shift.ll
 create mode 100644 test/Analysis/ValueTracking/knownzero-shift.ll
 create mode 100644 test/Analysis/ValueTracking/monotonic-phi.ll
 create mode 100644 test/Analysis/ValueTracking/pr24866.ll
 create mode 100644 test/Assembler/incorrect-tdep-attrs-parsing.ll
 create mode 100644 test/Assembler/invalid-alias-mismatched-explicit-type.ll
 create mode 100644 test/Assembler/invalid-dicompileunit-uniqued.ll
 delete mode 100644 test/Assembler/invalid-dilocalvariable-missing-tag.ll
 create mode 100644 test/Assembler/invalid-disubprogram-uniqued-definition.ll
 create mode 100644 test/Assembler/invalid-fp80hex.ll
 create mode 100644 test/Assembler/invalid-inline-constraint.ll
 create mode 100644 test/Assembler/invalid-untyped-metadata.ll
 create mode 100644 test/Assembler/token.ll
 create mode 100644 test/Bitcode/DILocalVariable-explicit-tags.ll
 create mode 100644 test/Bitcode/DILocalVariable-explicit-tags.ll.bc
 create mode 100644 test/Bitcode/DISubprogram-distinct-definitions.ll
 create mode 100644 test/Bitcode/DISubprogram-distinct-definitions.ll.bc
 create mode 100644 test/Bitcode/Inputs/invalid-cast.bc
 create mode 100644 test/Bitcode/Inputs/invalid-name-with-0-byte.bc
 create mode 100644 test/Bitcode/Inputs/invalid-no-function-block.bc
 create mode 100644 test/Bitcode/anon-functions.ll
 create mode 100644 test/Bitcode/compatibility-3.6.ll
 create mode 100644 test/Bitcode/compatibility-3.6.ll.bc
 create mode 100644 test/Bitcode/compatibility-3.7.ll
 create mode 100644 test/Bitcode/compatibility-3.7.ll.bc
 create mode 100644 test/Bitcode/compatibility.ll
 create mode 100644 test/Bitcode/identification.ll
 create mode 100644 test/Bitcode/operand-bundles.ll
 create mode 100644 test/Bitcode/thinlto-function-summary.ll
 create mode 100644 test/Bitcode/upgrade-subprogram.ll
 create mode 100644 test/Bitcode/upgrade-subprogram.ll.bc
 create mode 100644 test/Bitcode/use-list-order2.ll
 create mode 100644 test/Bitcode/vst-forward-declaration.ll
 create mode 100644 test/BugPoint/named-md.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-addv.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-deferred-spilling.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-minmaxv.ll
 create mode 100644 test/CodeGen/AArch64/aarch64-smax-constantfold.ll
 create mode 100644 test/CodeGen/AArch64/arm64-builtins-linux.ll
 create mode 100644 test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll
 create mode 100644 test/CodeGen/AArch64/arm64-fmax-safe.ll
 create mode 100644 test/CodeGen/AArch64/arm64-ld-from-st.ll
 create mode 100644 test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
 create mode 100644 test/CodeGen/AArch64/bitreverse.ll
 create mode 100644 test/CodeGen/AArch64/cxx-tlscc.ll
 create mode 100644 test/CodeGen/AArch64/dag-combine-select.ll
 create mode 100644 test/CodeGen/AArch64/divrem.ll
 create mode 100644 test/CodeGen/AArch64/emutls.ll
 create mode 100644 test/CodeGen/AArch64/emutls_generic.ll
 create mode 100644 test/CodeGen/AArch64/eon.ll
 create mode 100644 test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
 create mode 100644 test/CodeGen/AArch64/fast-isel-cmp-vec.ll
 create mode 100644 test/CodeGen/AArch64/fast-isel-folded-shift.ll
 create mode 100644 test/CodeGen/AArch64/fcvt_combine.ll
 create mode 100644 test/CodeGen/AArch64/fdiv_combine.ll
 create mode 100644 test/CodeGen/AArch64/misched-fusion.ll
 create mode 100644 test/CodeGen/AArch64/nontemporal.ll
 create mode 100644 test/CodeGen/AArch64/readcyclecounter.ll
 create mode 100644 test/CodeGen/AArch64/rotate.ll
 create mode 100644 test/CodeGen/AArch64/round-conv.ll
 create mode 100755 test/CodeGen/AArch64/shrink-wrap.ll
 create mode 100644 test/CodeGen/AArch64/stackmap-frame-setup.ll
 create mode 100644 test/CodeGen/AArch64/tbi.ll
 create mode 100644 test/CodeGen/AArch64/vector-fcopysign.ll
 create mode 100644 test/CodeGen/AMDGPU/addrspacecast.ll
 create mode 100644 test/CodeGen/AMDGPU/annotate-kernel-features.ll
 create mode 100644 test/CodeGen/AMDGPU/bitreverse.ll
 create mode 100644 test/CodeGen/AMDGPU/calling-conventions.ll
 create mode 100644 test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
 create mode 100644 test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
 create mode 100644 test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
 create mode 100644 test/CodeGen/AMDGPU/ds-sub-offset.ll
 create mode 100644 test/CodeGen/AMDGPU/dynamic_stackalloc.ll
 create mode 100644 test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
 create mode 100644 test/CodeGen/AMDGPU/flat-scratch-reg.ll
 create mode 100644 test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
 create mode 100644 test/CodeGen/AMDGPU/global-constant.ll
 create mode 100644 test/CodeGen/AMDGPU/hsa-globals.ll
 create mode 100644 test/CodeGen/AMDGPU/hsa-group-segment.ll
 create mode 100644 test/CodeGen/AMDGPU/image-attributes.ll
 create mode 100644 test/CodeGen/AMDGPU/image-resource-id.ll
 create mode 100644 test/CodeGen/AMDGPU/inline-constraints.ll
 create mode 100644 test/CodeGen/AMDGPU/large-alloca-compute.ll
 create mode 100644 test/CodeGen/AMDGPU/large-alloca-graphics.ll
 delete mode 100644 test/CodeGen/AMDGPU/large-alloca.ll
 delete mode 100644 test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.SI.packf16.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
 create mode 100644 test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
 create mode 100644 test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
 create mode 100644 test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
 create mode 100644 test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
 create mode 100644 test/CodeGen/AMDGPU/opencl-image-metadata.ll
 create mode 100644 test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
 create mode 100644 test/CodeGen/AMDGPU/sampler-resource-id.ll
 create mode 100644 test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
 create mode 100644 test/CodeGen/AMDGPU/si-literal-folding.ll
 create mode 100644 test/CodeGen/AMDGPU/sminmax.ll
 create mode 100644 test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
 create mode 100644 test/CodeGen/AMDGPU/store_typed.ll
 create mode 100644 test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
 create mode 100644 test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
 create mode 100644 test/CodeGen/ARM/MachO-subtypes.ll
 create mode 100644 test/CodeGen/ARM/Windows/division.ll
 rename test/CodeGen/ARM/Windows/{integer-floating-point-conversion.ll => libcalls.ll} (88%)
 create mode 100644 test/CodeGen/ARM/Windows/no-eabi.ll
 create mode 100644 test/CodeGen/ARM/Windows/no-frame-register.ll
 create mode 100644 test/CodeGen/ARM/Windows/overflow.ll
 create mode 100644 test/CodeGen/ARM/align-sp-adjustment.ll
 create mode 100644 test/CodeGen/ARM/apcs-vfp.ll
 create mode 100644 test/CodeGen/ARM/arm-eabi.ll
 create mode 100644 test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
 create mode 100644 test/CodeGen/ARM/arm-shrink-wrapping.ll
 create mode 100644 test/CodeGen/ARM/build-attributes-optimization-minsize.ll
 create mode 100644 test/CodeGen/ARM/build-attributes-optimization-mixed.ll
 create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optnone.ll
 create mode 100644 test/CodeGen/ARM/build-attributes-optimization-optsize.ll
 create mode 100644 test/CodeGen/ARM/build-attributes-optimization.ll
 create mode 100644 test/CodeGen/ARM/cfi-alignment.ll
 create mode 100644 test/CodeGen/ARM/combine-vmovdrr.ll
 create mode 100644 test/CodeGen/ARM/debugtrap.ll
 create mode 100644 test/CodeGen/ARM/emutls.ll
 create mode 100644 test/CodeGen/ARM/emutls1.ll
 create mode 100644 test/CodeGen/ARM/emutls_generic.ll
 create mode 100644 test/CodeGen/ARM/fp16-args.ll
 create mode 100644 test/CodeGen/ARM/gep-optimization.ll
 create mode 100644 test/CodeGen/ARM/global-merge-external.ll
 create mode 100644 test/CodeGen/ARM/ldm-stm-base-materialization.ll
 create mode 100644 test/CodeGen/ARM/legalize-unaligned-load.ll
 create mode 100644 test/CodeGen/ARM/memcpy-ldm-stm.ll
 create mode 100644 test/CodeGen/ARM/minmax.ll
 create mode 100644 test/CodeGen/ARM/neon_vshl_minint.ll
 create mode 100644 test/CodeGen/ARM/pr25317.ll
 create mode 100644 test/CodeGen/ARM/pr25838.ll
 create mode 100644 test/CodeGen/ARM/rotate.ll
 create mode 100644 test/CodeGen/ARM/sat-arith.ll
 delete mode 100644 test/CodeGen/ARM/sched-it-debug-nodes.ll
 create mode 100644 test/CodeGen/ARM/setjmp_longjmp.ll
 create mode 100644 test/CodeGen/ARM/softfp-fabs-fneg.ll
 create mode 100644 test/CodeGen/ARM/ssat-lower.ll
 create mode 100644 test/CodeGen/ARM/ssat-upper.ll
 create mode 100644 test/CodeGen/ARM/subtarget-no-movt.ll
 create mode 100644 test/CodeGen/ARM/thumb1-ldst-opt.ll
 create mode 100644 test/CodeGen/ARM/unaligned_load_store_vfp.ll
 create mode 100644 test/CodeGen/ARM/usat-lower.ll
 create mode 100644 test/CodeGen/ARM/usat-upper.ll
 create mode 100644 test/CodeGen/ARM/v7k-abi-align.ll
 create mode 100644 test/CodeGen/ARM/v7k-libcalls.ll
 create mode 100644 test/CodeGen/ARM/v7k-sincos.ll
 create mode 100644 test/CodeGen/ARM/vfp-reg-stride.ll
 create mode 100644 test/CodeGen/ARM/vld-vst-upgrade.ll
 create mode 100644 test/CodeGen/ARM/vminmaxnm-safe.ll
 create mode 100644 test/CodeGen/CPP/gep.ll
 create mode 100644 test/CodeGen/Generic/ForceStackAlign.ll
 create mode 100644 test/CodeGen/Generic/lit.local.cfg
 create mode 100644 test/CodeGen/Hexagon/NVJumpCmp.ll
 create mode 100644 test/CodeGen/Hexagon/bit-eval.ll
 create mode 100644 test/CodeGen/Hexagon/bit-loop.ll
 create mode 100644 test/CodeGen/Hexagon/cfi-late.ll
 create mode 100644 test/CodeGen/Hexagon/early-if-conversion-bug1.ll
 create mode 100644 test/CodeGen/Hexagon/early-if-phi-i1.ll
 create mode 100644 test/CodeGen/Hexagon/early-if-spare.ll
 create mode 100644 test/CodeGen/Hexagon/early-if.ll
 create mode 100644 test/CodeGen/Hexagon/ifcvt-edge-weight.ll
 create mode 100644 test/CodeGen/Hexagon/memcpy-likely-aligned.ll
 create mode 100644 test/CodeGen/Hexagon/mux-basic.ll
 create mode 100644 test/CodeGen/Hexagon/pic-jumptables.ll
 create mode 100644 test/CodeGen/Hexagon/pic-simple.ll
 create mode 100644 test/CodeGen/Hexagon/pic-static.ll
 create mode 100644 test/CodeGen/Hexagon/sdr-basic.ll
 create mode 100644 test/CodeGen/Hexagon/sdr-shr32.ll
 create mode 100644 test/CodeGen/Hexagon/store-widen-aliased-load.ll
 create mode 100644 test/CodeGen/Hexagon/store-widen-negv.ll
 create mode 100644 test/CodeGen/Hexagon/store-widen-negv2.ll
 create mode 100644 test/CodeGen/Hexagon/store-widen.ll
 create mode 100644 test/CodeGen/Hexagon/tail-dup-subreg-abort.ll
 create mode 100644 test/CodeGen/Hexagon/v60Intrins.ll
 create mode 100644 test/CodeGen/Hexagon/v60Vasr.ll
 create mode 100644 test/CodeGen/Hexagon/v60small.ll
 create mode 100644 test/CodeGen/MIR/AArch64/cfi-def-cfa.mir
 create mode 100644 test/CodeGen/MIR/AArch64/expected-target-flag-name.mir
 create mode 100644 test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir
 create mode 100644 test/CodeGen/MIR/AArch64/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
 create mode 100644 test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
 create mode 100644 test/CodeGen/MIR/AArch64/target-flags.mir
 create mode 100644 test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir
 create mode 100644 test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir
 create mode 100644 test/CodeGen/MIR/AMDGPU/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/AMDGPU/target-index-operands.mir
 create mode 100644 test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir
 create mode 100644 test/CodeGen/MIR/ARM/bundled-instructions.mir
 create mode 100644 test/CodeGen/MIR/ARM/cfi-same-value.mir
 create mode 100644 test/CodeGen/MIR/ARM/expected-closing-brace.mir
 create mode 100644 test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir
 create mode 100644 test/CodeGen/MIR/ARM/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir
 create mode 100644 test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir
 create mode 100644 test/CodeGen/MIR/Generic/basic-blocks.mir
 rename test/CodeGen/MIR/{machine-basic-block-redefinition-error.mir => Generic/expected-colon-after-basic-block.mir} (67%)
 rename test/CodeGen/MIR/{ => Generic}/expected-mbb-reference-for-successor-mbb.mir (56%)
 rename test/CodeGen/MIR/{ => Generic}/frame-info.mir (95%)
 rename test/CodeGen/MIR/{ => Generic}/function-missing-machine-function.mir (100%)
 create mode 100644 test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir
 create mode 100644 test/CodeGen/MIR/Generic/lit.local.cfg
 rename test/CodeGen/MIR/{ => Generic}/llvm-ir-error-reported.mir (100%)
 rename test/CodeGen/MIR/{ => Generic}/llvmIR.mir (98%)
 rename test/CodeGen/MIR/{ => Generic}/llvmIRMissing.mir (92%)
 create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir
 create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir
 rename test/CodeGen/MIR/{ => Generic}/machine-basic-block-unknown-name.mir (71%)
 rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-body-error.mir (100%)
 rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-function.mir (93%)
 rename test/CodeGen/MIR/{ => Generic}/machine-function-missing-name.mir (92%)
 rename test/CodeGen/MIR/{ => Generic}/machine-function-redefinition-error.mir (100%)
 rename test/CodeGen/MIR/{ => Generic}/machine-function.mir (94%)
 rename test/CodeGen/MIR/{ => Generic}/register-info.mir (95%)
 create mode 100644 test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir
 create mode 100644 test/CodeGen/MIR/Mips/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/Mips/memory-operands.mir
 create mode 100644 test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
 create mode 100644 test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
 create mode 100644 test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
 create mode 100644 test/CodeGen/MIR/NVPTX/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/PowerPC/lit.local.cfg
 create mode 100644 test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir
 create mode 100644 test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir
 create mode 100644 test/CodeGen/MIR/X86/block-address-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/callee-saved-info.mir
 create mode 100644 test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir
 create mode 100644 test/CodeGen/MIR/X86/cfi-def-cfa-register.mir
 create mode 100644 test/CodeGen/MIR/X86/cfi-offset.mir
 create mode 100644 test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/X86/constant-pool.mir
 create mode 100644 test/CodeGen/MIR/X86/constant-value-error.mir
 create mode 100644 test/CodeGen/MIR/X86/def-register-already-tied-error.mir
 create mode 100644 test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir
 create mode 100644 test/CodeGen/MIR/X86/duplicate-register-flag-error.mir
 create mode 100644 test/CodeGen/MIR/X86/early-clobber-register-flag.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-stack-object.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-target-flag-name.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir
 create mode 100644 test/CodeGen/MIR/X86/external-symbol-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/X86/frame-info-save-restore-points.mir
 create mode 100644 test/CodeGen/MIR/X86/frame-info-stack-references.mir
 create mode 100644 test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir
 create mode 100644 test/CodeGen/MIR/X86/function-liveins.mir
 create mode 100644 test/CodeGen/MIR/X86/inline-asm-registers.mir
 create mode 100644 test/CodeGen/MIR/X86/instructions-debug-location.mir
 create mode 100644 test/CodeGen/MIR/X86/invalid-constant-pool-item.mir
 create mode 100644 test/CodeGen/MIR/X86/invalid-metadata-node-type.mir
 create mode 100644 test/CodeGen/MIR/X86/invalid-target-flag-name.mir
 create mode 100644 test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir
 create mode 100644 test/CodeGen/MIR/X86/jump-table-info.mir
 create mode 100644 test/CodeGen/MIR/X86/jump-table-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir
 create mode 100644 test/CodeGen/MIR/X86/large-immediate-operand-error.mir
 create mode 100644 test/CodeGen/MIR/X86/large-offset-number-error.mir
 create mode 100644 test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir
 create mode 100644 test/CodeGen/MIR/X86/liveout-register-mask.mir
 create mode 100644 test/CodeGen/MIR/X86/machine-verifier.mir
 create mode 100644 test/CodeGen/MIR/X86/memory-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/metadata-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/missing-closing-quote.mir
 delete mode 100644 test/CodeGen/MIR/X86/missing-instruction.mir
 create mode 100644 test/CodeGen/MIR/X86/newline-handling.mir
 create mode 100644 test/CodeGen/MIR/X86/register-operands-target-flag-error.mir
 create mode 100644 test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
 create mode 100644 test/CodeGen/MIR/X86/stack-object-debug-info.mir
 create mode 100644 test/CodeGen/MIR/X86/stack-object-invalid-name.mir
 create mode 100644 test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir
 create mode 100644 test/CodeGen/MIR/X86/stack-object-operands.mir
 create mode 100644 test/CodeGen/MIR/X86/stack-object-redefinition-error.mir
 create mode 100644 test/CodeGen/MIR/X86/standalone-register-error.mir
 create mode 100644 test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir
 create mode 100644 test/CodeGen/MIR/X86/successor-basic-blocks.mir
 create mode 100644 test/CodeGen/MIR/X86/tied-def-operand-invalid.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-jump-table-id.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-stack-object.mir
 create mode 100644 test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-metadata-keyword.mir
 create mode 100644 test/CodeGen/MIR/X86/unknown-metadata-node.mir
 create mode 100644 test/CodeGen/MIR/X86/used-physical-register-info.mir
 create mode 100644 test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir
 delete mode 100644 test/CodeGen/MIR/basic-blocks.mir
 delete mode 100644 test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
 delete mode 100644 test/CodeGen/MIR/successor-basic-blocks.mir
 create mode 100644 test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll
 create mode 100644 test/CodeGen/Mips/emutls_generic.ll
 create mode 100644 test/CodeGen/Mips/interrupt-attr-64-error.ll
 create mode 100644 test/CodeGen/Mips/interrupt-attr-args-error.ll
 create mode 100644 test/CodeGen/Mips/interrupt-attr-error.ll
 create mode 100644 test/CodeGen/Mips/interrupt-attr.ll
 create mode 100644 test/CodeGen/Mips/llvm-ir/atomicrmx.ll
 create mode 100644 test/CodeGen/Mips/llvm-ir/load-atomic.ll
 create mode 100644 test/CodeGen/Mips/llvm-ir/sqrt.ll
 create mode 100644 test/CodeGen/Mips/llvm-ir/store-atomic.ll
 create mode 100644 test/CodeGen/NVPTX/branch-fold.ll
 create mode 100644 test/CodeGen/NVPTX/bypass-div.ll
 create mode 100644 test/CodeGen/NVPTX/combine-min-max.ll
 create mode 100644 test/CodeGen/NVPTX/global-addrspace.ll
 create mode 100644 test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
 create mode 100644 test/CodeGen/NVPTX/reg-copy.ll
 create mode 100644 test/CodeGen/PowerPC/BoolRetToIntTest.ll
 create mode 100644 test/CodeGen/PowerPC/BreakableToken-reduced.ll
 create mode 100644 test/CodeGen/PowerPC/aantidep-def-ec.mir
 create mode 100644 test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
 create mode 100644 test/CodeGen/PowerPC/addisdtprelha-nonr3.mir
 create mode 100644 test/CodeGen/PowerPC/bitcasts-direct-move.ll
 create mode 100644 test/CodeGen/PowerPC/bitreverse.ll
 create mode 100644 test/CodeGen/PowerPC/branch-hint.ll
 create mode 100644 test/CodeGen/PowerPC/coal-sections.ll
 create mode 100644 test/CodeGen/PowerPC/crbit-asm-disabled.ll
 create mode 100644 test/CodeGen/PowerPC/dyn-alloca-offset.ll
 create mode 100644 test/CodeGen/PowerPC/e500-1.ll
 create mode 100644 test/CodeGen/PowerPC/emutls_generic.ll
 create mode 100644 test/CodeGen/PowerPC/fma-mutate-register-constraint.ll
 create mode 100644 test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
 create mode 100644 test/CodeGen/PowerPC/machine-combiner.ll
 create mode 100644 test/CodeGen/PowerPC/mc-instrlat.ll
 create mode 100644 test/CodeGen/PowerPC/mcm-13.ll
 create mode 100644 test/CodeGen/PowerPC/merge-st-chain-op.ll
 create mode 100644 test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
 create mode 100644 test/CodeGen/PowerPC/peephole-align.ll
 create mode 100644 test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
 create mode 100644 test/CodeGen/PowerPC/ppcsoftops.ll
 create mode 100644 test/CodeGen/PowerPC/pr24636.ll
 create mode 100644 test/CodeGen/PowerPC/pr25157-peephole.ll
 create mode 100644 test/CodeGen/PowerPC/preincprep-nontrans-crash.ll
 create mode 100644 test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
 create mode 100644 test/CodeGen/PowerPC/rotl-rotr-crash.ll
 create mode 100644 test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
 create mode 100644 test/CodeGen/PowerPC/stackmap-frame-setup.ll
 create mode 100644 test/CodeGen/PowerPC/swaps-le-6.ll
 create mode 100644 test/CodeGen/PowerPC/unal-vec-ldst.ll
 create mode 100644 test/CodeGen/PowerPC/unal-vec-negarith.ll
 create mode 100644 test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
 create mode 100644 test/CodeGen/PowerPC/vec-asm-disabled.ll
 create mode 100644 test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
 create mode 100644 test/CodeGen/SPARC/32abi.ll
 create mode 100644 test/CodeGen/SPARC/float-constants.ll
 create mode 100644 test/CodeGen/SPARC/missing-sret.ll
 create mode 100644 test/CodeGen/SPARC/reserved-regs.ll
 create mode 100644 test/CodeGen/SPARC/select-mask.ll
 create mode 100644 test/CodeGen/SPARC/spill.ll
 create mode 100644 test/CodeGen/SPARC/stack-align.ll
 create mode 100644 test/CodeGen/SystemZ/alloca-03.ll
 create mode 100644 test/CodeGen/SystemZ/alloca-04.ll
 create mode 100644 test/CodeGen/SystemZ/dag-combine-01.ll
 create mode 100644 test/CodeGen/SystemZ/fp-cmp-05.ll
 create mode 100644 test/CodeGen/SystemZ/fp-libcall.ll
 create mode 100644 test/CodeGen/SystemZ/fp-sincos-01.ll
 create mode 100644 test/CodeGen/SystemZ/int-cmp-51.ll
 create mode 100644 test/CodeGen/SystemZ/int-cmp-52.ll
 create mode 100644 test/CodeGen/SystemZ/vec-perm-12.ll
 create mode 100644 test/CodeGen/SystemZ/vec-perm-13.ll
 create mode 100644 test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll
 delete mode 100644 test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
 create mode 100644 test/CodeGen/Thumb/thumb-shrink-wrapping.ll
 create mode 100644 test/CodeGen/Thumb2/emit-unwinding.ll
 create mode 100644 test/CodeGen/Thumb2/setjmp_longjmp.ll
 create mode 100644 test/CodeGen/WebAssembly/call.ll
 create mode 100644 test/CodeGen/WebAssembly/cfg-stackify.ll
 create mode 100644 test/CodeGen/WebAssembly/comparisons_f32.ll
 create mode 100644 test/CodeGen/WebAssembly/comparisons_f64.ll
 create mode 100644 test/CodeGen/WebAssembly/comparisons_i32.ll
 create mode 100644 test/CodeGen/WebAssembly/comparisons_i64.ll
 create mode 100644 test/CodeGen/WebAssembly/conv.ll
 create mode 100644 test/CodeGen/WebAssembly/copysign-casts.ll
 create mode 100644 test/CodeGen/WebAssembly/cpus.ll
 create mode 100644 test/CodeGen/WebAssembly/dead-vreg.ll
 create mode 100644 test/CodeGen/WebAssembly/f32.ll
 create mode 100644 test/CodeGen/WebAssembly/f64.ll
 create mode 100644 test/CodeGen/WebAssembly/fast-isel.ll
 create mode 100644 test/CodeGen/WebAssembly/frem.ll
 create mode 100644 test/CodeGen/WebAssembly/func.ll
 create mode 100644 test/CodeGen/WebAssembly/global.ll
 create mode 100644 test/CodeGen/WebAssembly/globl.ll
 create mode 100644 test/CodeGen/WebAssembly/i32.ll
 create mode 100644 test/CodeGen/WebAssembly/i64.ll
 create mode 100644 test/CodeGen/WebAssembly/ident.ll
 create mode 100644 test/CodeGen/WebAssembly/immediates.ll
 create mode 100644 test/CodeGen/WebAssembly/inline-asm.ll
 create mode 100644 test/CodeGen/WebAssembly/legalize.ll
 create mode 100644 test/CodeGen/WebAssembly/load-ext.ll
 create mode 100644 test/CodeGen/WebAssembly/load-store-i1.ll
 create mode 100644 test/CodeGen/WebAssembly/load.ll
 create mode 100644 test/CodeGen/WebAssembly/loop-idiom.ll
 create mode 100644 test/CodeGen/WebAssembly/memory-addr32.ll
 create mode 100644 test/CodeGen/WebAssembly/memory-addr64.ll
 create mode 100644 test/CodeGen/WebAssembly/offset-folding.ll
 create mode 100644 test/CodeGen/WebAssembly/offset.ll
 create mode 100644 test/CodeGen/WebAssembly/phi.ll
 create mode 100644 test/CodeGen/WebAssembly/reg-stackify.ll
 create mode 100644 test/CodeGen/WebAssembly/return-int32.ll
 create mode 100644 test/CodeGen/WebAssembly/return-void.ll
 create mode 100644 test/CodeGen/WebAssembly/returned.ll
 create mode 100644 test/CodeGen/WebAssembly/select.ll
 create mode 100644 test/CodeGen/WebAssembly/signext-zeroext.ll
 create mode 100644 test/CodeGen/WebAssembly/store-results.ll
 create mode 100644 test/CodeGen/WebAssembly/store-trunc.ll
 create mode 100644 test/CodeGen/WebAssembly/store.ll
 create mode 100644 test/CodeGen/WebAssembly/switch.ll
 create mode 100644 test/CodeGen/WebAssembly/unreachable.ll
 create mode 100644 test/CodeGen/WebAssembly/unused-argument.ll
 create mode 100644 test/CodeGen/WebAssembly/userstack.ll
 create mode 100644 test/CodeGen/WebAssembly/varargs.ll
 create mode 100644 test/CodeGen/WebAssembly/vtable.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-alloca-sink.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-catch-all-win32.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-catch-all.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-catch-and-throw.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-catch-scalar.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-catch-unwind.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-demote-liveout.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-frame-vars.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-inalloca.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-min-unwind.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-multi-catch.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-nested-1.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-nested-2.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-nested-3.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-nested-rethrow.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-catch.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
 delete mode 100644 test/CodeGen/WinEH/cppeh-state-calc-1.ll
 delete mode 100644 test/CodeGen/WinEH/seh-catch-all.ll
 delete mode 100644 test/CodeGen/WinEH/seh-exception-code.ll
 delete mode 100644 test/CodeGen/WinEH/seh-exception-code2.ll
 delete mode 100644 test/CodeGen/WinEH/seh-inlined-finally.ll
 delete mode 100644 test/CodeGen/WinEH/seh-outlined-finally-win32.ll
 delete mode 100644 test/CodeGen/WinEH/seh-outlined-finally.ll
 delete mode 100644 test/CodeGen/WinEH/seh-prepared-basic.ll
 delete mode 100644 test/CodeGen/WinEH/seh-resume-phi.ll
 delete mode 100644 test/CodeGen/WinEH/seh-simple.ll
 create mode 100644 test/CodeGen/WinEH/wineh-cloning.ll
 create mode 100644 test/CodeGen/WinEH/wineh-demotion.ll
 create mode 100644 test/CodeGen/WinEH/wineh-intrinsics-invalid.ll
 create mode 100644 test/CodeGen/WinEH/wineh-intrinsics.ll
 create mode 100644 test/CodeGen/WinEH/wineh-no-demotion.ll
 create mode 100644 test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll
 create mode 100644 test/CodeGen/WinEH/wineh-statenumbering.ll
 create mode 100644 test/CodeGen/X86/add-nsw-sext.ll
 create mode 100644 test/CodeGen/X86/and-encoding.ll
 create mode 100644 test/CodeGen/X86/atomic-flags.ll
 create mode 100644 test/CodeGen/X86/atomic-non-integer.ll
 create mode 100644 test/CodeGen/X86/avg.ll
 create mode 100644 test/CodeGen/X86/avx-isa-check.ll
 create mode 100644 test/CodeGen/X86/avx512-bugfix-25270.ll
 create mode 100644 test/CodeGen/X86/avx512-ext.ll
 create mode 100644 test/CodeGen/X86/avx512-extract-subvector.ll
 create mode 100644 test/CodeGen/X86/avx512-skx-insert-subvec.ll
 delete mode 100644 test/CodeGen/X86/avx512-trunc-ext.ll
 create mode 100644 test/CodeGen/X86/avx512-trunc.ll
 create mode 100644 test/CodeGen/X86/avx512cd-intrinsics.ll
 create mode 100644 test/CodeGen/X86/avx512cdvl-intrinsics.ll
 create mode 100644 test/CodeGen/X86/avx512dq-intrinsics.ll
 create mode 100644 test/CodeGen/X86/bit-piece-comment.ll
 create mode 100644 test/CodeGen/X86/bitreverse.ll
 create mode 100644 test/CodeGen/X86/branchfolding-catchpads.ll
 create mode 100644 test/CodeGen/X86/catchpad-realign-savexmm.ll
 create mode 100644 test/CodeGen/X86/catchpad-regmask.ll
 create mode 100644 test/CodeGen/X86/catchpad-weight.ll
 create mode 100644 test/CodeGen/X86/catchret-empty-fallthrough.ll
 create mode 100644 test/CodeGen/X86/catchret-fallthrough.ll
 create mode 100644 test/CodeGen/X86/cleanuppad-inalloca.ll
 create mode 100644 test/CodeGen/X86/cleanuppad-large-codemodel.ll
 create mode 100644 test/CodeGen/X86/cleanuppad-realign.ll
 create mode 100644 test/CodeGen/X86/coal-sections.ll
 create mode 100644 test/CodeGen/X86/coalescer-win64.ll
 create mode 100644 test/CodeGen/X86/code_placement_cold_loop_blocks.ll
 create mode 100644 test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
 create mode 100644 test/CodeGen/X86/code_placement_loop_rotation.ll
 create mode 100644 test/CodeGen/X86/code_placement_loop_rotation2.ll
 create mode 100644 test/CodeGen/X86/combine-multiplies.ll
 delete mode 100644 test/CodeGen/X86/combine-sse2-intrinsics.ll
 create mode 100644 test/CodeGen/X86/constant-hoisting-and.ll
 create mode 100644 test/CodeGen/X86/constant-hoisting-cmp.ll
 delete mode 100644 test/CodeGen/X86/cppeh-nounwind.ll
 create mode 100644 test/CodeGen/X86/cxx_tlscc64.ll
 create mode 100644 test/CodeGen/X86/dag-fmf-cse.ll
 create mode 100644 test/CodeGen/X86/dag-merge-fast-accesses.ll
 create mode 100644 test/CodeGen/X86/darwin-tls.ll
 create mode 100644 test/CodeGen/X86/debugloc-argsize.ll
 create mode 100644 test/CodeGen/X86/eh-null-personality.ll
 create mode 100644 test/CodeGen/X86/emutls-pic.ll
 create mode 100644 test/CodeGen/X86/emutls-pie.ll
 create mode 100644 test/CodeGen/X86/emutls.ll
 create mode 100644 test/CodeGen/X86/emutls_generic.ll
 create mode 100644 test/CodeGen/X86/expand-vr64-gr64-copy.mir
 create mode 100644 test/CodeGen/X86/extractelement-legalization-cycle.ll
 create mode 100644 test/CodeGen/X86/fadd-combines.ll
 create mode 100644 test/CodeGen/X86/fast-isel-bitcasts-avx.ll
 create mode 100644 test/CodeGen/X86/fast-isel-bitcasts.ll
 create mode 100644 test/CodeGen/X86/fast-isel-deadcode.ll
 create mode 100644 test/CodeGen/X86/fast-isel-emutls.ll
 create mode 100644 test/CodeGen/X86/fast-isel-nontemporal.ll
 create mode 100644 test/CodeGen/X86/fast-isel-stackcheck.ll
 create mode 100644 test/CodeGen/X86/fixup-lea.ll
 create mode 100644 test/CodeGen/X86/fma-commute-x86.ll
 create mode 100644 test/CodeGen/X86/fma-scalar-memfold.ll
 create mode 100644 test/CodeGen/X86/fold-push.ll
 create mode 100644 test/CodeGen/X86/fp-logic.ll
 create mode 100644 test/CodeGen/X86/fp128-calling-conv.ll
 create mode 100644 test/CodeGen/X86/fp128-cast.ll
 create mode 100644 test/CodeGen/X86/fp128-compare.ll
 create mode 100644 test/CodeGen/X86/fp128-i128.ll
 create mode 100644 test/CodeGen/X86/fp128-libcalls.ll
 create mode 100644 test/CodeGen/X86/fp128-load.ll
 create mode 100644 test/CodeGen/X86/fp128-store.ll
 create mode 100644 test/CodeGen/X86/fpcmp-soft-fp.ll
 create mode 100644 test/CodeGen/X86/frem-msvc32.ll
 create mode 100644 test/CodeGen/X86/funclet-layout.ll
 create mode 100644 test/CodeGen/X86/function-alias.ll
 create mode 100644 test/CodeGen/X86/hhvm-cc.ll
 create mode 100644 test/CodeGen/X86/i386-shrink-wrapping.ll
 create mode 100644 test/CodeGen/X86/immediate_merging.ll
 create mode 100644 test/CodeGen/X86/inconsistent_landingpad.ll
 create mode 100644 test/CodeGen/X86/inline-sse.ll
 create mode 100644 test/CodeGen/X86/insertps-from-constantpool.ll
 create mode 100644 test/CodeGen/X86/insertps-unfold-load-bug.ll
 create mode 100644 test/CodeGen/X86/late-address-taken.ll
 create mode 100644 test/CodeGen/X86/lea-opt.ll
 rename test/CodeGen/X86/{frameescape.ll => localescape.ll} (76%)
 create mode 100644 test/CodeGen/X86/machine-combiner-int-vec.ll
 create mode 100644 test/CodeGen/X86/machine-combiner-int.ll
 create mode 100644 test/CodeGen/X86/materialize.ll
 create mode 100644 test/CodeGen/X86/mcu-abi.ll
 create mode 100644 test/CodeGen/X86/merge-store-partially-alias-loads.ll
 create mode 100644 test/CodeGen/X86/mmx-coalescing.ll
 create mode 100644 test/CodeGen/X86/mmx-only.ll
 create mode 100644 test/CodeGen/X86/movpc32-check.ll
 create mode 100644 test/CodeGen/X86/or-lea.ll
 create mode 100644 test/CodeGen/X86/patchpoint-verifiable.mir
 create mode 100644 test/CodeGen/X86/peephole-na-phys-copy-folding.ll
 create mode 100644 test/CodeGen/X86/pop-stack-cleanup.ll
 delete mode 100644 test/CodeGen/X86/pr21529.ll
 delete mode 100644 test/CodeGen/X86/pr23900.ll
 create mode 100644 test/CodeGen/X86/pr24139.ll
 create mode 100644 test/CodeGen/X86/pr24602.ll
 create mode 100644 test/CodeGen/X86/pr25828.ll
 create mode 100644 test/CodeGen/X86/prolog-push-seq.ll
 create mode 100644 test/CodeGen/X86/pseudo_cmov_lower.ll
 create mode 100644 test/CodeGen/X86/pseudo_cmov_lower1.ll
 create mode 100644 test/CodeGen/X86/pseudo_cmov_lower2.ll
 create mode 100644 test/CodeGen/X86/push-cfi-debug.ll
 create mode 100644 test/CodeGen/X86/push-cfi-obj.ll
 create mode 100644 test/CodeGen/X86/push-cfi.ll
 create mode 100644 test/CodeGen/X86/rem_crash.ll
 delete mode 100644 test/CodeGen/X86/remat-invalid-liveness.ll
 create mode 100644 test/CodeGen/X86/safestack.ll
 create mode 100644 test/CodeGen/X86/sar_fold.ll
 create mode 100644 test/CodeGen/X86/sar_fold64.ll
 create mode 100644 test/CodeGen/X86/scalar-fp-to-i64.ll
 create mode 100644 test/CodeGen/X86/scalar-int-to-fp.ll
 create mode 100644 test/CodeGen/X86/sdiv-pow2.ll
 create mode 100644 test/CodeGen/X86/seh-catchpad.ll
 create mode 100644 test/CodeGen/X86/seh-exception-code.ll
 delete mode 100644 test/CodeGen/X86/seh-filter.ll
 delete mode 100644 test/CodeGen/X86/seh-stack-realign-win32.ll
 create mode 100644 test/CodeGen/X86/shrink-wrap-chkstk.ll
 create mode 100644 test/CodeGen/X86/slow-unaligned-mem.ll
 create mode 100644 test/CodeGen/X86/soft-sitofp.ll
 create mode 100644 test/CodeGen/X86/sse-only.ll
 create mode 100644 test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
 create mode 100644 test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll
 create mode 100644 test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
 create mode 100644 test/CodeGen/X86/stack-folding-adx-x86_64.ll
 create mode 100644 test/CodeGen/X86/stackmap-frame-setup.ll
 create mode 100644 test/CodeGen/X86/switch-edge-weight.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsave.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsavec.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-64-xsaves.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-xsave.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-xsavec.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-xsaveopt.ll
 create mode 100644 test/CodeGen/X86/system-intrinsics-xsaves.ll
 create mode 100644 test/CodeGen/X86/tail-dup-catchret.ll
 create mode 100644 test/CodeGen/X86/tail-merge-wineh.ll
 create mode 100644 test/CodeGen/X86/tailcall-msvc-conventions.ll
 create mode 100644 test/CodeGen/X86/tailcall-readnone.ll
 create mode 100644 test/CodeGen/X86/tls-android-negative.ll
 create mode 100644 test/CodeGen/X86/tls-android.ll
 create mode 100644 test/CodeGen/X86/token_landingpad.ll
 create mode 100644 test/CodeGen/X86/trunc-store.ll
 create mode 100644 test/CodeGen/X86/vec_cmp_sint-128.ll
 create mode 100644 test/CodeGen/X86/vec_cmp_uint-128.ll
 create mode 100644 test/CodeGen/X86/vec_minmax_sint.ll
 create mode 100644 test/CodeGen/X86/vec_minmax_uint.ll
 create mode 100644 test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
 create mode 100644 test/CodeGen/X86/vector-lzcnt-512.ll
 create mode 100644 test/CodeGen/X86/vector-merge-store-fp-constants.ll
 create mode 100644 test/CodeGen/X86/vector-popcnt-512.ll
 create mode 100644 test/CodeGen/X86/vector-rotate-128.ll
 create mode 100644 test/CodeGen/X86/vector-rotate-256.ll
 create mode 100644 test/CodeGen/X86/vector-shift-ashr-512.ll
 create mode 100644 test/CodeGen/X86/vector-shift-lshr-512.ll
 create mode 100644 test/CodeGen/X86/vector-shift-shl-512.ll
 create mode 100644 test/CodeGen/X86/vector-shuffle-512-v32.ll
 create mode 100644 test/CodeGen/X86/vector-shuffle-v1.ll
 create mode 100644 test/CodeGen/X86/vector-tzcnt-512.ll
 create mode 100644 test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
 create mode 100644 test/CodeGen/X86/vmovq.ll
 create mode 100644 test/CodeGen/X86/wide-integer-cmp.ll
 create mode 100644 test/CodeGen/X86/win-catchpad-csrs.ll
 create mode 100644 test/CodeGen/X86/win-catchpad-nested-cxx.ll
 create mode 100644 test/CodeGen/X86/win-catchpad-nested.ll
 create mode 100644 test/CodeGen/X86/win-catchpad-varargs.ll
 create mode 100644 test/CodeGen/X86/win-catchpad.ll
 create mode 100644 test/CodeGen/X86/win-cleanuppad.ll
 create mode 100644 test/CodeGen/X86/win-funclet-cfi.ll
 rename test/CodeGen/X86/{win_eh_prepare.ll => win-mixed-ehpersonality.ll} (55%)
 create mode 100644 test/CodeGen/X86/win32-seh-catchpad-realign.ll
 create mode 100644 test/CodeGen/X86/win32-seh-catchpad.ll
 create mode 100644 test/CodeGen/X86/win32-seh-nested-finally.ll
 create mode 100644 test/CodeGen/X86/win32-spill-xmm.ll
 create mode 100644 test/CodeGen/X86/win64_sibcall.ll
 create mode 100644 test/CodeGen/X86/win_coreclr_chkstk.ll
 delete mode 100644 test/CodeGen/X86/win_ftol2.ll
 create mode 100644 test/CodeGen/X86/wineh-coreclr.ll
 create mode 100644 test/CodeGen/X86/wineh-exceptionpointer.ll
 create mode 100644 test/CodeGen/X86/wineh-no-ehpads.ll
 create mode 100644 test/CodeGen/X86/x32-indirectbr.ll
 create mode 100644 test/CodeGen/X86/x32-landingpad.ll
 create mode 100644 test/CodeGen/X86/x32-va_start.ll
 create mode 100644 test/CodeGen/X86/x86-32-intrcc.ll
 create mode 100644 test/CodeGen/X86/x86-64-intrcc.ll
 create mode 100644 test/CodeGen/X86/x86-64-ms_abi-vararg.ll
 create mode 100644 test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll
 create mode 100644 test/CodeGen/X86/x86-shrink-wrap-unwind.ll
 create mode 100644 test/CodeGen/X86/x86-win64-shrink-wrapping.ll
 create mode 100644 test/CodeGen/X86/xop-pcmov.ll
 create mode 100644 test/DebugInfo/AArch64/prologue_end.ll
 create mode 100644 test/DebugInfo/ARM/float-args.ll
 create mode 100644 test/DebugInfo/ARM/prologue_end.ll
 rename test/DebugInfo/{ => Generic}/2009-10-16-Phi.ll (100%)
 rename test/DebugInfo/{ => Generic}/2009-11-03-InsertExtractValue.ll (86%)
 rename test/DebugInfo/{ => Generic}/2009-11-05-DeadGlobalVariable.ll (65%)
 rename test/DebugInfo/{ => Generic}/2009-11-06-NamelessGlobalVariable.ll (71%)
 rename test/DebugInfo/{ => Generic}/2009-11-10-CurrentFn.ll (59%)
 rename test/DebugInfo/{ => Generic}/2010-01-05-DbgScope.ll (70%)
 rename test/DebugInfo/{ => Generic}/2010-03-12-llc-crash.ll (58%)
 rename test/DebugInfo/{ => Generic}/2010-03-19-DbgDeclare.ll (65%)
 rename test/DebugInfo/{ => Generic}/2010-03-24-MemberFn.ll (77%)
 rename test/DebugInfo/{ => Generic}/2010-04-06-NestedFnDbgInfo.ll (76%)
 rename test/DebugInfo/{ => Generic}/2010-04-19-FramePtr.ll (73%)
 rename test/DebugInfo/{ => Generic}/2010-05-03-DisableFramePtr.ll (77%)
 rename test/DebugInfo/{ => Generic}/2010-05-03-OriginDIE.ll (80%)
 rename test/DebugInfo/{ => Generic}/2010-05-10-MultipleCU.ll (56%)
 rename test/DebugInfo/{ => Generic}/2010-06-29-InlinedFnLocalVar.ll (67%)
 rename test/DebugInfo/{ => Generic}/2010-07-19-Crash.ll (58%)
 rename test/DebugInfo/{ => Generic}/2010-10-01-crash.ll (59%)
 create mode 100644 test/DebugInfo/Generic/Inputs/gmlt.ll
 rename test/DebugInfo/{ => Generic}/PR20038.ll (77%)
 rename test/DebugInfo/{ => Generic}/accel-table-hash-collisions.ll (96%)
 rename test/DebugInfo/{ => Generic}/array.ll (71%)
 rename test/DebugInfo/{ => Generic}/block-asan.ll (89%)
 rename test/DebugInfo/{ => Generic}/bug_null_debuginfo.ll (57%)
 rename test/DebugInfo/{ => Generic}/constant-pointers.ll (77%)
 rename test/DebugInfo/{ => Generic}/constant-sdnodes-have-dbg-location.ll (53%)
 rename test/DebugInfo/{ => Generic}/constantfp-sdnodes-have-dbg-location.ll (50%)
 rename test/DebugInfo/{ => Generic}/cross-cu-inlining.ll (77%)
 rename test/DebugInfo/{ => Generic}/cross-cu-linkonce-distinct.ll (76%)
 rename test/DebugInfo/{ => Generic}/cross-cu-linkonce.ll (75%)
 rename test/DebugInfo/{ => Generic}/cu-range-hole.ll (72%)
 rename test/DebugInfo/{ => Generic}/cu-ranges.ll (70%)
 rename test/DebugInfo/{X86 => Generic}/dbg-at-specficiation.ll (79%)
 rename test/DebugInfo/{ => Generic}/dead-argument-order.ll (79%)
 rename test/DebugInfo/{ => Generic}/debug-info-always-inline.ll (100%)
 rename test/DebugInfo/{ => Generic}/debug-info-qualifiers.ll (83%)
 rename test/DebugInfo/{ => Generic}/debuginfofinder-forward-declaration.ll (80%)
 rename test/DebugInfo/{ => Generic}/debuginfofinder-multiple-cu.ll (54%)
 create mode 100644 test/DebugInfo/Generic/def-line.ll
 create mode 100644 test/DebugInfo/Generic/discriminator.ll
 rename test/DebugInfo/{ => Generic}/dwarf-public-names.ll (72%)
 rename test/DebugInfo/{ => Generic}/empty.ll (82%)
 rename test/DebugInfo/{ => Generic}/enum-types.ll (65%)
 rename test/DebugInfo/{ => Generic}/enum.ll (83%)
 rename test/DebugInfo/{ => Generic}/global.ll (74%)
 rename test/DebugInfo/{ => Generic}/gmlt.test (100%)
 create mode 100644 test/DebugInfo/Generic/gvn.ll
 rename test/DebugInfo/{ => Generic}/incorrect-variable-debugloc.ll (89%)
 rename test/DebugInfo/{ => Generic}/incorrect-variable-debugloc1.ll (85%)
 rename test/DebugInfo/{ => Generic}/inheritance.ll (84%)
 rename test/DebugInfo/{ => Generic}/inline-debug-info-multiret.ll (84%)
 rename test/DebugInfo/{ => Generic}/inline-debug-info.ll (84%)
 rename test/DebugInfo/{ => Generic}/inline-no-debug-info.ll (77%)
 rename test/DebugInfo/{ => Generic}/inline-scopes.ll (81%)
 rename test/DebugInfo/{ => Generic}/inlined-arguments.ll (67%)
 rename test/DebugInfo/{ => Generic}/inlined-vars.ll (61%)
 create mode 100644 test/DebugInfo/Generic/lit.local.cfg
 rename test/DebugInfo/{ => Generic}/location-verifier.ll (69%)
 rename test/DebugInfo/{ => Generic}/lto-comp-dir.ll (74%)
 rename test/DebugInfo/{ => Generic}/member-order.ll (77%)
 rename test/DebugInfo/{ => Generic}/member-pointers.ll (89%)
 rename test/DebugInfo/{ => Generic}/missing-abstract-variable.ll (76%)
 rename test/DebugInfo/{ => Generic}/multiline.ll (86%)
 rename test/DebugInfo/{ => Generic}/namespace.ll (84%)
 rename test/DebugInfo/{ => Generic}/namespace_function_definition.ll (64%)
 rename test/DebugInfo/{ => Generic}/namespace_inline_function_definition.ll (74%)
 rename test/DebugInfo/{ => Generic}/nodebug.ll (77%)
 rename test/DebugInfo/{ => Generic}/piece-verifier.ll (73%)
 create mode 100755 test/DebugInfo/Generic/ptrsize.ll
 rename test/DebugInfo/{X86 => Generic}/recursive_inlining.ll (79%)
 rename test/DebugInfo/{ => Generic}/restrict.ll (75%)
 create mode 100644 test/DebugInfo/Generic/skeletoncu.ll
 rename test/DebugInfo/{ => Generic}/sugared-constants.ll (79%)
 rename test/DebugInfo/{ => Generic}/template-recursive-void.ll (92%)
 rename test/DebugInfo/{ => Generic}/tu-composite.ll (85%)
 rename test/DebugInfo/{ => Generic}/tu-member-pointer.ll (84%)
 rename test/DebugInfo/{ => Generic}/two-cus-from-same-file.ll (67%)
 rename test/DebugInfo/{ => Generic}/typedef.ll (82%)
 rename test/DebugInfo/{ => Generic}/unconditional-branch.ll (78%)
 rename test/DebugInfo/{ => Generic}/varargs.ll (80%)
 rename test/DebugInfo/{ => Generic}/version.ll (69%)
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro-cmd.h
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.cc
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.h
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-macro.o
 create mode 100644 test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o
 create mode 100644 test/DebugInfo/Inputs/fat-test.o
 create mode 100644 test/DebugInfo/MIR/X86/lit.local.cfg
 create mode 100644 test/DebugInfo/MIR/X86/live-debug-values-3preds.mir
 create mode 100644 test/DebugInfo/MIR/X86/live-debug-values.mir
 create mode 100644 test/DebugInfo/MIR/lit.local.cfg
 create mode 100644 test/DebugInfo/Mips/dsr-fixed-objects.ll
 create mode 100644 test/DebugInfo/Mips/dsr-non-fixed-objects.ll
 create mode 100644 test/DebugInfo/Mips/prologue_end.ll
 rename test/DebugInfo/PDB/{ => DIA}/lit.local.cfg (100%)
 rename test/DebugInfo/PDB/{ => DIA}/pdbdump-flags.test (77%)
 rename test/DebugInfo/PDB/{ => DIA}/pdbdump-symbol-format.test (87%)
 create mode 100644 test/DebugInfo/PDB/pdbdump-headers.test
 create mode 100644 test/DebugInfo/Sparc/prologue_end.ll
 create mode 100644 test/DebugInfo/SystemZ/prologue_end.ll
 create mode 100644 test/DebugInfo/X86/DIModuleContext.ll
 create mode 100644 test/DebugInfo/X86/bbjoin.ll
 create mode 100644 test/DebugInfo/X86/debugger-tune.ll
 create mode 100644 test/DebugInfo/X86/dw_op_minus.ll
 create mode 100644 test/DebugInfo/X86/dwarf-linkage-names.ll
 create mode 100644 test/DebugInfo/X86/externaltyperef.ll
 create mode 100644 test/DebugInfo/X86/live-debug-values.ll
 create mode 100644 test/DebugInfo/X86/safestack-byval.ll
 create mode 100644 test/DebugInfo/debugmacinfo.test
 create mode 100644 test/DebugInfo/dwarfdump-dwp.test
 create mode 100644 test/DebugInfo/dwarfdump-macho-relocs.test
 create mode 100644 test/DebugInfo/dwarfdump-macho-universal.test
 create mode 100644 test/DebugInfo/dwo.ll
 delete mode 100644 test/DebugInfo/gvn.ll
 create mode 100644 test/DebugInfo/skeletoncu.ll
 create mode 100644 test/Examples/Kaleidoscope/Chapter3.test
 create mode 100644 test/Examples/Kaleidoscope/Chapter4.test
 create mode 100644 test/Examples/Kaleidoscope/Chapter5.test
 create mode 100644 test/Examples/Kaleidoscope/Chapter6.test
 create mode 100644 test/Examples/Kaleidoscope/Chapter7.test
 create mode 100644 test/Examples/lit.local.cfg
 create mode 100644 test/ExecutionEngine/OrcLazy/global_aliases.ll
 create mode 100644 test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg
 create mode 100644 test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s
 rename test/ExecutionEngine/RuntimeDyld/X86/{COFF_x86_64 => COFF_x86_64.s} (80%)
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s
 create mode 100644 test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll
 create mode 100644 test/Feature/OperandBundles/adce.ll
 create mode 100644 test/Feature/OperandBundles/basic-aa-argmemonly.ll
 create mode 100644 test/Feature/OperandBundles/dse.ll
 create mode 100644 test/Feature/OperandBundles/early-cse.ll
 create mode 100644 test/Feature/OperandBundles/function-attrs.ll
 create mode 100644 test/Feature/OperandBundles/inliner-conservative.ll
 create mode 100644 test/Feature/OperandBundles/merge-func.ll
 create mode 100644 test/Feature/OperandBundles/special-state.ll
 create mode 100644 test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
 delete mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
 rename test/Instrumentation/AddressSanitizer/{do-not-instrument-llvm-metadata-darwin.ll => do-not-instrument-globals-darwin.ll} (60%)
 create mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
 delete mode 100644 test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
 create mode 100644 test/Instrumentation/AddressSanitizer/keep_going.ll
 create mode 100644 test/Instrumentation/AddressSanitizer/localescape.ll
 create mode 100644 test/Instrumentation/AddressSanitizer/twice.ll
 create mode 100644 test/Instrumentation/DataFlowSanitizer/external_mask.ll
 create mode 100644 test/Instrumentation/MemorySanitizer/AArch64/vararg.ll
 create mode 100644 test/Instrumentation/SanitizerCoverage/seh.ll
 create mode 100644 test/Instrumentation/SanitizerCoverage/switch-tracing.ll
 create mode 100644 test/LTO/X86/disable-verify.ll
 create mode 100644 test/LTO/X86/llvm-lto-output.ll
 create mode 100644 test/LTO/X86/parallel.ll
 create mode 100644 test/LibDriver/thin.test
 create mode 100644 test/Linker/Inputs/available_externally_over_decl.ll
 create mode 100644 test/Linker/Inputs/comdat11.ll
 create mode 100644 test/Linker/Inputs/comdat13.ll
 create mode 100644 test/Linker/Inputs/comdat14.ll
 create mode 100644 test/Linker/Inputs/comdat15.ll
 create mode 100644 test/Linker/Inputs/ctors2.ll
 create mode 100644 test/Linker/Inputs/ctors3.ll
 create mode 100644 test/Linker/Inputs/funcimport.ll
 create mode 100644 test/Linker/Inputs/funcimport_appending_global.ll
 create mode 100644 test/Linker/Inputs/internalize-lazy.ll
 create mode 100644 test/Linker/Inputs/linkage.c.ll
 create mode 100644 test/Linker/Inputs/only-needed-debug-metadata.ll
 create mode 100644 test/Linker/Inputs/only-needed-named-metadata.ll
 delete mode 100644 test/Linker/Inputs/subprogram-linkonce-weak-odr.ll
 create mode 100644 test/Linker/Inputs/thinlto_funcimport_debug.ll
 create mode 100644 test/Linker/available_externally_over_decl.ll
 create mode 100644 test/Linker/comdat11.ll
 create mode 100644 test/Linker/comdat12.ll
 create mode 100644 test/Linker/comdat13.ll
 create mode 100644 test/Linker/comdat14.ll
 create mode 100644 test/Linker/comdat15.ll
 create mode 100644 test/Linker/comdat_group.ll
 create mode 100644 test/Linker/ctors2.ll
 create mode 100644 test/Linker/ctors3.ll
 create mode 100644 test/Linker/ctors4.ll
 create mode 100644 test/Linker/ctors5.ll
 create mode 100644 test/Linker/funcimport.ll
 create mode 100644 test/Linker/funcimport_appending_global.ll
 create mode 100644 test/Linker/internalize-lazy.ll
 create mode 100644 test/Linker/link-flags.ll
 create mode 100644 test/Linker/only-needed-debug-metadata.ll
 create mode 100644 test/Linker/only-needed-named-metadata.ll
 delete mode 100644 test/Linker/subprogram-linkonce-weak-odr.ll
 create mode 100644 test/Linker/thinlto_funcimport_debug.ll
 create mode 100644 test/Linker/uniqued-distinct-cycles.ll
 create mode 100644 test/MC/AArch64/armv8.2a-at.s
 create mode 100644 test/MC/AArch64/armv8.2a-mmfr2.s
 create mode 100644 test/MC/AArch64/armv8.2a-persistent-memory.s
 create mode 100644 test/MC/AArch64/armv8.2a-statistical-profiling.s
 create mode 100644 test/MC/AArch64/armv8.2a-uao.s
 create mode 100644 test/MC/AArch64/error-location-ldr-pseudo.s
 create mode 100644 test/MC/AArch64/error-location.s
 create mode 100644 test/MC/AArch64/fullfp16-diagnostics.s
 create mode 100644 test/MC/AArch64/fullfp16-neon-neg.s
 create mode 100644 test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s
 create mode 100644 test/MC/AMDGPU/flat-scratch.s
 create mode 100644 test/MC/AMDGPU/hsa-text.s
 create mode 100644 test/MC/AMDGPU/out-of-range-registers.s
 create mode 100644 test/MC/AMDGPU/smem.s
 create mode 100644 test/MC/AMDGPU/smrd-err.s
 create mode 100644 test/MC/AMDGPU/vop3-vop1-nosrc.s
 create mode 100644 test/MC/AMDGPU/vopc-errs.s
 delete mode 100644 test/MC/ARM/directive-arch-armv6j.s
 rename test/MC/ARM/{directive-arch-armv6zk.s => directive-arch-armv8.2-a.s} (60%)
 create mode 100644 test/MC/ARM/error-location-ldr-pseudo.s
 create mode 100644 test/MC/ARM/error-location.s
 create mode 100644 test/MC/ARM/fullfp16-neon-neg.s
 create mode 100644 test/MC/ARM/fullfp16-neon.s
 create mode 100644 test/MC/ARM/neon-vcvt-fp16.s
 create mode 100644 test/MC/ARM/thumb-branches.s
 create mode 100644 test/MC/ARM/thumb1-relax.s
 create mode 100644 test/MC/ARM/v7k-dsp.s
 create mode 100644 test/MC/AsmParser/reassign.s
 create mode 100644 test/MC/AsmParser/undefined-local-symbol.s
 delete mode 100644 test/MC/COFF/ARM/directive-type-diagnostics.s
 create mode 100644 test/MC/COFF/label-undefined.s
 create mode 100644 test/MC/COFF/stdin.s
 create mode 100644 test/MC/COFF/temporary-alias.s
 create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-at.txt
 create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt
 create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt
 create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt
 create mode 100644 test/MC/Disassembler/AArch64/armv8.2a-uao.txt
 create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neg.txt
 create mode 100644 test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
 create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
 create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
 create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
 create mode 100644 test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
 create mode 100644 test/MC/Disassembler/Hexagon/invalid_packet.txt
 create mode 100644 test/MC/Disassembler/Hexagon/too_many_instructions.txt
 create mode 100644 test/MC/Disassembler/Hexagon/too_many_loop_ends.txt
 create mode 100644 test/MC/Disassembler/Hexagon/unextendable.txt
 create mode 100644 test/MC/Disassembler/Mips/dsp/valid-el.txt
 create mode 100644 test/MC/Disassembler/Mips/dsp/valid.txt
 create mode 100644 test/MC/Disassembler/Mips/dspr2/valid.txt
 create mode 100644 test/MC/Disassembler/Mips/eva/valid_R6-eva.txt
 create mode 100644 test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt
 create mode 100644 test/MC/Disassembler/Mips/micromips-dsp/valid.txt
 create mode 100644 test/MC/Disassembler/Mips/micromips-dspr2/valid.txt
 create mode 100644 test/MC/Disassembler/Mips/micromips32r3/invalid.txt
 rename test/MC/Disassembler/Mips/{micromips_le.txt => micromips32r3/valid-el.txt} (86%)
 rename test/MC/Disassembler/Mips/{micromips.txt => micromips32r3/valid.txt} (84%)
 delete mode 100644 test/MC/Disassembler/Mips/micromips32r6.txt
 create mode 100644 test/MC/Disassembler/Mips/micromips32r6/valid.txt
 create mode 100644 test/MC/Disassembler/Mips/micromips64r6/valid.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips-dsp.txt
 create mode 100644 test/MC/Disassembler/Mips/mips1/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips1/invalid.txt
 create mode 100644 test/MC/Disassembler/Mips/mips2/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips2/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips3/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips3/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32/valid-xfail.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32_le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips32r2_le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips4/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips4/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64/valid-xfail.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips64_le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt
 delete mode 100644 test/MC/Disassembler/Mips/mips64r2_le.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt
 create mode 100644 test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt
 create mode 100644 test/MC/Disassembler/Sparc/sparc-v9.txt
 create mode 100644 test/MC/ELF/ARM/directive-type-diagnostics.s
 create mode 100644 test/MC/ELF/align-zero.s
 create mode 100644 test/MC/ELF/div-by-zero.s
 create mode 100644 test/MC/ELF/empty-twice.ll
 create mode 100644 test/MC/Hexagon/asmMap.s
 create mode 100644 test/MC/Hexagon/capitalizedEndloop.s
 create mode 100644 test/MC/Hexagon/dcfetch.s
 create mode 100644 test/MC/Hexagon/empty_asm.s
 create mode 100644 test/MC/Hexagon/endloop.s
 create mode 100644 test/MC/Hexagon/got.s
 create mode 100644 test/MC/Hexagon/instructions/alu32_alu.s
 create mode 100644 test/MC/Hexagon/instructions/alu32_perm.s
 create mode 100644 test/MC/Hexagon/instructions/alu32_pred.s
 create mode 100644 test/MC/Hexagon/instructions/cr.s
 create mode 100644 test/MC/Hexagon/instructions/j.s
 create mode 100644 test/MC/Hexagon/instructions/jr.s
 create mode 100644 test/MC/Hexagon/instructions/ld.s
 create mode 100644 test/MC/Hexagon/instructions/memop.s
 create mode 100644 test/MC/Hexagon/instructions/nv_j.s
 create mode 100644 test/MC/Hexagon/instructions/nv_st.s
 create mode 100644 test/MC/Hexagon/instructions/st.s
 create mode 100644 test/MC/Hexagon/instructions/system_user.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_alu.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_bit.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_complex.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_fp.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_mpy.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_perm.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_pred.s
 create mode 100644 test/MC/Hexagon/instructions/xtype_shift.s
 create mode 100644 test/MC/Hexagon/jumpdoublepound.s
 create mode 100644 test/MC/Hexagon/labels.s
 create mode 100644 test/MC/Hexagon/new-value-check.s
 create mode 100644 test/MC/Hexagon/out_of_range.s
 create mode 100644 test/MC/Hexagon/pcrel.s
 create mode 100644 test/MC/Hexagon/relaxed_newvalue.s
 create mode 100644 test/MC/Hexagon/test.s
 create mode 100644 test/MC/Hexagon/two_ext.s
 create mode 100644 test/MC/Hexagon/v60-alu.s
 create mode 100644 test/MC/Hexagon/v60-permute.s
 create mode 100644 test/MC/Hexagon/v60-shift.s
 create mode 100644 test/MC/Hexagon/v60-vcmp.s
 create mode 100644 test/MC/Hexagon/v60-vmem.s
 create mode 100644 test/MC/Hexagon/v60-vmpy-acc.s
 create mode 100644 test/MC/Hexagon/v60-vmpy1.s
 create mode 100644 test/MC/Hexagon/v60lookup.s
 create mode 100644 test/MC/MachO/AArch64/reloc-errors.s
 create mode 100644 test/MC/MachO/ARM/compact-unwind-armv7k.s
 create mode 100644 test/MC/MachO/ARM/tvos-version-min-load-command.s
 create mode 100644 test/MC/MachO/ARM/version-min-diagnostics2.s
 create mode 100644 test/MC/MachO/ARM/watchos-version-min-load-command.s
 create mode 100644 test/MC/MachO/PowerPC/coal-sections-powerpc.s
 create mode 100644 test/MC/MachO/PowerPC/lit.local.cfg
 delete mode 100644 test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
 delete mode 100644 test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
 create mode 100644 test/MC/MachO/coal-sections-x86_64.s
 create mode 100644 test/MC/MachO/darwin-version-min-load-command.s
 create mode 100644 test/MC/MachO/empty-twice.ll
 create mode 100644 test/MC/Mips/cnmips/invalid.s
 create mode 100644 test/MC/Mips/cprestore-bad.s
 create mode 100644 test/MC/Mips/cprestore-noreorder.s
 create mode 100644 test/MC/Mips/cprestore-reorder.s
 create mode 100644 test/MC/Mips/cprestore-warning-unused.s
 create mode 100644 test/MC/Mips/directive-ent.s
 create mode 100644 test/MC/Mips/dsp/invalid.s
 create mode 100644 test/MC/Mips/dsp/valid.s
 create mode 100644 test/MC/Mips/dspr2/invalid.s
 create mode 100644 test/MC/Mips/dspr2/valid.s
 create mode 100644 test/MC/Mips/eva/invalid-noeva-wrong-error.s
 create mode 100644 test/MC/Mips/eva/invalid-noeva.s
 create mode 100644 test/MC/Mips/eva/invalid.s
 create mode 100644 test/MC/Mips/eva/invalid_R6.s
 create mode 100644 test/MC/Mips/eva/valid_R6.s
 create mode 100644 test/MC/Mips/eva/valid_preR6.s
 create mode 100644 test/MC/Mips/expansion-jal-sym-pic.s
 create mode 100644 test/MC/Mips/instalias-imm-expanding.s
 create mode 100644 test/MC/Mips/macro-bcc-imm-bad.s
 create mode 100644 test/MC/Mips/macro-bcc-imm.s
 create mode 100644 test/MC/Mips/macro-ddiv-bad.s
 create mode 100644 test/MC/Mips/macro-ddiv.s
 create mode 100644 test/MC/Mips/macro-ddivu-bad.s
 create mode 100644 test/MC/Mips/macro-ddivu.s
 create mode 100644 test/MC/Mips/macro-div-bad.s
 create mode 100644 test/MC/Mips/macro-div.s
 create mode 100644 test/MC/Mips/macro-divu-bad.s
 create mode 100644 test/MC/Mips/macro-divu.s
 create mode 100644 test/MC/Mips/macro-dla.s
 create mode 100644 test/MC/Mips/macro-dli.s
 create mode 100644 test/MC/Mips/micromips-dsp/invalid-wrong-error.s
 create mode 100644 test/MC/Mips/micromips-dsp/invalid.s
 create mode 100644 test/MC/Mips/micromips-dsp/valid.s
 create mode 100644 test/MC/Mips/micromips-dspr2/invalid.s
 create mode 100644 test/MC/Mips/micromips-dspr2/valid.s
 create mode 100644 test/MC/Mips/micromips/invalid.s
 create mode 100644 test/MC/Mips/micromips64r6/invalid.s
 create mode 100644 test/MC/Mips/micromips64r6/valid.s
 delete mode 100644 test/MC/Mips/mips-dsp-instructions.s
 create mode 100644 test/MC/Mips/mips32r2/invalid-dsp.s
 create mode 100644 test/MC/Mips/mips32r2/invalid-dspr2.s
 create mode 100644 test/MC/Mips/mips32r2/invalid-msa.s
 create mode 100644 test/MC/Mips/mips32r5/invalid-mips32.s
 create mode 100644 test/MC/Mips/mips32r5/invalid-mips32r2.s
 create mode 100644 test/MC/Mips/mips32r5/invalid-mips32r3.s
 create mode 100644 test/MC/Mips/mips64r5/invalid-mips64.s
 create mode 100644 test/MC/Mips/mips64r5/invalid-mips64r2.s
 create mode 100644 test/MC/Mips/mips64r5/invalid-mips64r3.s
 create mode 100644 test/MC/Mips/msa/invalid-64.s
 create mode 100644 test/MC/Mips/msa/invalid.s
 create mode 100644 test/MC/Mips/reloc-directive-bad.s
 create mode 100644 test/MC/Mips/reloc-directive.s
 create mode 100644 test/MC/Mips/rotations32-bad.s
 create mode 100644 test/MC/Mips/rotations32.s
 create mode 100644 test/MC/Mips/rotations64.s
 create mode 100644 test/MC/PowerPC/pr24686.s
 create mode 100644 test/MC/Sparc/sparc-asm-errors.s
 create mode 100644 test/MC/X86/X86_64-pku.s
 create mode 100644 test/MC/X86/encoder-fail.s
 create mode 100644 test/MC/X86/intel-syntax-print.ll
 create mode 100644 test/MC/X86/large-bss.s
 create mode 100644 test/MC/X86/macho-reloc-errors-x86.s
 create mode 100644 test/MC/X86/macho-reloc-errors-x86_64.s
 create mode 100644 test/MC/X86/x86-64-avx512cd.s
 create mode 100644 test/MC/X86/x86-64-avx512cd_vl.s
 create mode 100644 test/MC/X86/x86-evenDirective.s
 create mode 100644 test/Object/AMDGPU/elf-definitios.yaml
 create mode 100644 test/Object/Inputs/coff-short-import-code
 create mode 100644 test/Object/Inputs/coff-short-import-data
 create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64
 create mode 100755 test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64
 create mode 100755 test/Object/Inputs/invalid-symbol-table-size.elf
 create mode 100644 test/Object/Inputs/invalid-xindex-size.elf
 create mode 100755 test/Object/Inputs/main-ret-zero-pe-i386.dll
 create mode 100755 test/Object/Inputs/main-ret-zero-pe-i386.exe
 create mode 100644 test/Object/Inputs/no-section-header-string-table.elf-x86-64
 create mode 100644 test/Object/Inputs/pr25877.lib
 create mode 100755 test/Object/Inputs/rel-no-sec-table.elf-x86-64
 create mode 100644 test/Object/Inputs/shndx.elf
 create mode 100755 test/Object/Inputs/trivial-object-test.elf-avr
 create mode 100644 test/Object/nm-pe-image.test
 create mode 100644 test/Object/no-section-header-string-table.test
 create mode 100644 test/Object/objdump-shndx.test
 create mode 100644 test/Object/pr25877.test
 create mode 100644 test/Object/readobj-absent.test
 create mode 100644 test/Other/opt-twice.ll
 create mode 100644 test/TableGen/cast-list-initializer.td
 create mode 100644 test/TableGen/trydecode-emission.td
 create mode 100644 test/TableGen/trydecode-emission2.td
 create mode 100644 test/TableGen/trydecode-emission3.td
 create mode 100644 test/Transforms/AddDiscriminators/call.ll
 create mode 100644 test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
 create mode 100644 test/Transforms/AddDiscriminators/diamond.ll
 create mode 100644 test/Transforms/AddDiscriminators/oneline.ll
 create mode 100644 test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
 create mode 100644 test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
 create mode 100644 test/Transforms/AtomicExpand/X86/lit.local.cfg
 create mode 100644 test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
 create mode 100644 test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/select.ll
 create mode 100644 test/Transforms/CodeGenPrepare/X86/widen_switch.ll
 create mode 100644 test/Transforms/CodeGenPrepare/invariant.group.ll
 create mode 100644 test/Transforms/CrossDSOCFI/basic.ll
 create mode 100644 test/Transforms/DeadArgElim/naked_functions.ll
 create mode 100644 test/Transforms/DeadArgElim/operandbundle.ll
 create mode 100644 test/Transforms/DeadStoreElimination/calloc-store.ll
 create mode 100644 test/Transforms/EarlyCSE/AArch64/ldstN.ll
 create mode 100644 test/Transforms/EarlyCSE/atomics.ll
 create mode 100644 test/Transforms/EarlyCSE/fence.ll
 create mode 100644 test/Transforms/ForcedFunctionAttrs/forced.ll
 delete mode 100644 test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
 create mode 100644 test/Transforms/FunctionAttrs/nonnull.ll
 create mode 100644 test/Transforms/FunctionAttrs/norecurse.ll
 create mode 100644 test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
 create mode 100644 test/Transforms/FunctionImport/Inputs/funcimport.ll
 create mode 100644 test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
 create mode 100644 test/Transforms/FunctionImport/funcimport.ll
 create mode 100644 test/Transforms/FunctionImport/funcimport_debug.ll
 create mode 100644 test/Transforms/GVN/assume-equal.ll
 create mode 100644 test/Transforms/GVN/funclet.ll
 create mode 100644 test/Transforms/GVN/invariant.group.ll
 create mode 100644 test/Transforms/GVN/no_speculative_loads_with_asan.ll
 create mode 100644 test/Transforms/GVN/pr24426.ll
 create mode 100644 test/Transforms/GVN/pr25440.ll
 create mode 100644 test/Transforms/GlobalOpt/assume.ll
 create mode 100644 test/Transforms/GlobalOpt/available_externally_global_ctors.ll
 create mode 100644 test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
 create mode 100644 test/Transforms/GlobalOpt/externally-initialized.ll
 create mode 100644 test/Transforms/GlobalOpt/global-demotion.ll
 create mode 100644 test/Transforms/GlobalOpt/invariant.group.barrier.ll
 create mode 100644 test/Transforms/GlobalOpt/localize-constexpr.ll
 create mode 100644 test/Transforms/IndVarSimplify/bec-cmp.ll
 create mode 100644 test/Transforms/IndVarSimplify/const_phi.ll
 create mode 100644 test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr24356.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr24783.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr24804.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr24952.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr24956.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25047.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25051.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25060.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25360.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25421.ll
 create mode 100644 test/Transforms/IndVarSimplify/pr25578.ll
 create mode 100644 test/Transforms/IndVarSimplify/zext-nuw.ll
 rename test/Transforms/{FunctionAttrs/annotate-1.ll => InferFunctionAttrs/annotate.ll} (76%)
 create mode 100644 test/Transforms/Inline/alloca-dbgdeclare-merge.ll
 create mode 100644 test/Transforms/Inline/deopt-bundles.ll
 create mode 100644 test/Transforms/Inline/inline-assume.ll
 create mode 100644 test/Transforms/Inline/inline-cold-callee.ll
 create mode 100644 test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
 create mode 100644 test/Transforms/Inline/inline-hot-callee.ll
 create mode 100644 test/Transforms/Inline/zero-cost.ll
 create mode 100644 test/Transforms/InstCombine/all-bits-shift.ll
 create mode 100644 test/Transforms/InstCombine/apint-or.ll
 delete mode 100644 test/Transforms/InstCombine/apint-or1.ll
 delete mode 100644 test/Transforms/InstCombine/apint-or2.ll
 create mode 100644 test/Transforms/InstCombine/bitcast-bitcast.ll
 create mode 100644 test/Transforms/InstCombine/bitreverse-fold.ll
 create mode 100644 test/Transforms/InstCombine/bitreverse-recognize.ll
 create mode 100644 test/Transforms/InstCombine/bswap-known-bits.ll
 create mode 100644 test/Transforms/InstCombine/call_nonnull_arg.ll
 create mode 100644 test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
 create mode 100644 test/Transforms/InstCombine/compare-alloca.ll
 create mode 100644 test/Transforms/InstCombine/ctpop.ll
 create mode 100644 test/Transforms/InstCombine/demorgan-zext.ll
 create mode 100644 test/Transforms/InstCombine/fold-phi-load-metadata.ll
 create mode 100644 test/Transforms/InstCombine/lifetime.ll
 create mode 100644 test/Transforms/InstCombine/load-combine-metadata-2.ll
 create mode 100644 test/Transforms/InstCombine/load-combine-metadata-3.ll
 create mode 100644 test/Transforms/InstCombine/load-combine-metadata-4.ll
 create mode 100644 test/Transforms/InstCombine/log-pow-nofastmath.ll
 create mode 100644 test/Transforms/InstCombine/log-pow.ll
 create mode 100644 test/Transforms/InstCombine/minmax-fp.ll
 create mode 100644 test/Transforms/InstCombine/nonnull-attribute.ll
 create mode 100644 test/Transforms/InstCombine/phi-load-metadata-2.ll
 create mode 100644 test/Transforms/InstCombine/phi-load-metadata-3.ll
 create mode 100644 test/Transforms/InstCombine/phi-load-metadata.ll
 create mode 100644 test/Transforms/InstCombine/pow-4.ll
 create mode 100644 test/Transforms/InstCombine/pow-exp-nofastmath.ll
 create mode 100644 test/Transforms/InstCombine/pow-exp.ll
 create mode 100644 test/Transforms/InstCombine/pow-exp2.ll
 create mode 100644 test/Transforms/InstCombine/pow-sqrt.ll
 delete mode 100644 test/Transforms/InstCombine/pr20059.ll
 create mode 100644 test/Transforms/InstCombine/pr24605.ll
 create mode 100644 test/Transforms/InstCombine/pr25745.ll
 create mode 100644 test/Transforms/InstCombine/sqrt-nofast.ll
 create mode 100644 test/Transforms/InstCombine/tan-nofastmath.ll
 create mode 100644 test/Transforms/InstCombine/tan.ll
 create mode 100644 test/Transforms/InstCombine/token.ll
 create mode 100644 test/Transforms/InstCombine/x86-f16c.ll
 create mode 100644 test/Transforms/InstCombine/x86-pmovsx.ll
 create mode 100644 test/Transforms/InstCombine/x86-pmovzx.ll
 create mode 100644 test/Transforms/InstCombine/x86-pshufb.ll
 create mode 100644 test/Transforms/InstCombine/x86-sse4a.ll
 create mode 100644 test/Transforms/InstCombine/x86-vector-shifts.ll
 create mode 100644 test/Transforms/InstCombine/x86-xop.ll
 create mode 100644 test/Transforms/InstSimplify/add-mask.ll
 create mode 100644 test/Transforms/InstSimplify/bswap.ll
 create mode 100644 test/Transforms/InstSimplify/implies.ll
 create mode 100644 test/Transforms/InstSimplify/shift-128-kb.ll
 create mode 100644 test/Transforms/Internalize/comdat.ll
 create mode 100644 test/Transforms/JumpThreading/implied-cond.ll
 create mode 100644 test/Transforms/JumpThreading/phi-known.ll
 create mode 100644 test/Transforms/JumpThreading/update-edge-weight.ll
 create mode 100644 test/Transforms/LCSSA/mixed-catch.ll
 create mode 100644 test/Transforms/LICM/argmemonly-call.ll
 create mode 100644 test/Transforms/LoopDistribute/bounds-expansion-bug.ll
 create mode 100644 test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
 create mode 100644 test/Transforms/LoopLoadElim/backward.ll
 create mode 100644 test/Transforms/LoopLoadElim/def-store-before-load.ll
 create mode 100644 test/Transforms/LoopLoadElim/forward.ll
 create mode 100644 test/Transforms/LoopLoadElim/memcheck.ll
 create mode 100644 test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
 create mode 100644 test/Transforms/LoopLoadElim/unknown-dep.ll
 create mode 100644 test/Transforms/LoopReroll/negative.ll
 create mode 100644 test/Transforms/LoopReroll/reroll_with_dbg.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
 create mode 100644 test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
 create mode 100644 test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/funclet.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/pr25541.ll
 create mode 100644 test/Transforms/LoopStrengthReduce/sext-ind-var.ll
 create mode 100644 test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
 create mode 100644 test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
 delete mode 100644 test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
 create mode 100644 test/Transforms/LoopUnroll/full-unroll-crashers.ll
 create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
 create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
 create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
 create mode 100644 test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
 create mode 100644 test/Transforms/LoopUnroll/rebuild_lcssa.ll
 create mode 100644 test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
 create mode 100644 test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
 create mode 100644 test/Transforms/LoopUnswitch/cleanuppad.ll
 create mode 100644 test/Transforms/LoopUnswitch/cold-loop.ll
 create mode 100644 test/Transforms/LoopUnswitch/copy-metadata.ll
 create mode 100644 test/Transforms/LoopUnswitch/trivial-unswitch.ll
 create mode 100644 test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
 create mode 100644 test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
 create mode 100644 test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
 create mode 100644 test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
 create mode 100644 test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
 create mode 100644 test/Transforms/LoopVectorize/ARM/vector_cast.ll
 create mode 100644 test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
 create mode 100644 test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
 create mode 100644 test/Transforms/LoopVectorize/X86/no_fpmath.ll
 create mode 100644 test/Transforms/LoopVectorize/X86/reg-usage.ll
 create mode 100644 test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
 create mode 100644 test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
 create mode 100644 test/Transforms/LoopVectorize/gep_with_bitcast.ll
 create mode 100644 test/Transforms/LoopVectorize/miniters.ll
 create mode 100644 test/Transforms/LoopVectorize/nontemporal.ll
 create mode 100644 test/Transforms/LoopVectorize/ptr-induction.ll
 create mode 100644 test/Transforms/LowerBitSets/function-ext.ll
 create mode 100644 test/Transforms/LowerBitSets/function.ll
 create mode 100644 test/Transforms/LowerBitSets/nonstring.ll
 create mode 100644 test/Transforms/LowerBitSets/pr25902.ll
 create mode 100644 test/Transforms/LowerSwitch/delete-default-block-crash.ll
 create mode 100644 test/Transforms/Mem2Reg/optnone.ll
 create mode 100644 test/Transforms/Mem2Reg/pr24179.ll
 create mode 100644 test/Transforms/MemCpyOpt/nontemporal.ll
 create mode 100644 test/Transforms/MergeFunc/apply_function_attributes.ll
 create mode 100644 test/Transforms/MergeFunc/constant-entire-value.ll
 create mode 100644 test/Transforms/MergeFunc/crash2.ll
 create mode 100644 test/Transforms/MergeFunc/gep-base-type.ll
 create mode 100644 test/Transforms/MergeFunc/merge-block-address-other-function.ll
 create mode 100644 test/Transforms/MergeFunc/merge-block-address.ll
 create mode 100644 test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
 create mode 100644 test/Transforms/MergeFunc/merge-different-vector-types.ll
 create mode 100644 test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
 create mode 100644 test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
 create mode 100644 test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
 create mode 100644 test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
 create mode 100644 test/Transforms/MergeFunc/ranges-multiple.ll
 create mode 100644 test/Transforms/MergeFunc/self-referential-global.ll
 create mode 100644 test/Transforms/MergeFunc/undef-different-types.ll
 create mode 100644 test/Transforms/NaryReassociate/nary-mul.ll
 create mode 100644 test/Transforms/NaryReassociate/pr24301.ll
 create mode 100644 test/Transforms/PGOProfile/Inputs/branch1.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/branch2.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/criticaledge.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/diag.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/landingpad.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/loop1.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/loop2.proftext
 create mode 100644 test/Transforms/PGOProfile/Inputs/switch.proftext
 create mode 100644 test/Transforms/PGOProfile/branch1.ll
 create mode 100644 test/Transforms/PGOProfile/branch2.ll
 create mode 100644 test/Transforms/PGOProfile/criticaledge.ll
 create mode 100644 test/Transforms/PGOProfile/diag_mismatch.ll
 create mode 100644 test/Transforms/PGOProfile/diag_no_funcprofdata.ll
 create mode 100644 test/Transforms/PGOProfile/diag_no_profile.ll
 create mode 100644 test/Transforms/PGOProfile/landingpad.ll
 create mode 100644 test/Transforms/PGOProfile/loop1.ll
 create mode 100644 test/Transforms/PGOProfile/loop2.ll
 create mode 100644 test/Transforms/PGOProfile/single_bb.ll
 create mode 100644 test/Transforms/PGOProfile/switch.ll
 create mode 100644 test/Transforms/PruneEH/operand-bundles.ll
 create mode 100644 test/Transforms/Reassociate/fp-expr.ll
 create mode 100644 test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
 create mode 100644 test/Transforms/Reassociate/vaarg_movable.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/base-vector.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
 create mode 100644 test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
 create mode 100644 test/Transforms/SCCP/global-alias-constprop.ll
 create mode 100644 test/Transforms/SLPVectorizer/AArch64/horizontal.ll
 create mode 100644 test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/commutativity.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/pr23510.ll
 create mode 100644 test/Transforms/SLPVectorizer/X86/schedule_budget.ll
 create mode 100644 test/Transforms/SafeStack/AArch64/abi.ll
 create mode 100644 test/Transforms/SafeStack/AArch64/lit.local.cfg
 create mode 100644 test/Transforms/SafeStack/ARM/abi.ll
 create mode 100644 test/Transforms/SafeStack/ARM/lit.local.cfg
 create mode 100644 test/Transforms/SafeStack/ARM/setjmp.ll
 create mode 100644 test/Transforms/SafeStack/X86/abi.ll
 create mode 100644 test/Transforms/SafeStack/X86/lit.local.cfg
 create mode 100644 test/Transforms/SafeStack/byval.ll
 create mode 100644 test/Transforms/SafeStack/debug-loc.ll
 create mode 100644 test/Transforms/SafeStack/ret.ll
 create mode 100644 test/Transforms/SafeStack/store.ll
 create mode 100644 test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/coverage-warning.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
 create mode 100644 test/Transforms/SampleProfile/Inputs/inline-coverage.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/inline-hint.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/inline.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/nolocinfo.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/offset.prof
 create mode 100644 test/Transforms/SampleProfile/Inputs/remarks.prof
 create mode 100644 test/Transforms/SampleProfile/cov-zero-samples.ll
 create mode 100644 test/Transforms/SampleProfile/coverage-warning.ll
 create mode 100644 test/Transforms/SampleProfile/gcc-simple.ll
 create mode 100644 test/Transforms/SampleProfile/inline-coverage.ll
 create mode 100644 test/Transforms/SampleProfile/inline-hint.ll
 create mode 100644 test/Transforms/SampleProfile/inline.ll
 create mode 100644 test/Transforms/SampleProfile/nolocinfo.ll
 create mode 100644 test/Transforms/SampleProfile/offset.ll
 create mode 100644 test/Transforms/SampleProfile/remarks.ll
 create mode 100644 test/Transforms/Scalarizer/store-bug.ll
 create mode 100644 test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
 create mode 100644 test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
 create mode 100644 test/Transforms/SimplifyCFG/ARM/lit.local.cfg
 create mode 100644 test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
 create mode 100644 test/Transforms/SimplifyCFG/Mips/lit.local.cfg
 create mode 100644 test/Transforms/SimplifyCFG/PR25267.ll
 create mode 100644 test/Transforms/SimplifyCFG/empty-cleanuppad.ll
 create mode 100644 test/Transforms/SimplifyCFG/implied-cond.ll
 create mode 100644 test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
 create mode 100644 test/Transforms/SimplifyCFG/merge-cond-stores.ll
 create mode 100644 test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
 create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
 create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
 create mode 100644 test/Transforms/SimplifyCFG/preserve-load-metadata.ll
 create mode 100644 test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
 create mode 100644 test/Transforms/SimplifyCFG/switch-dead-default.ll
 create mode 100644 test/Transforms/SimplifyCFG/wineh-unreachable.ll
 create mode 100644 test/Transforms/Sink/catchswitch.ll
 create mode 100644 test/Transforms/Sink/landingpad.ll
 create mode 100644 test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
 create mode 100644 test/Transforms/StripDeadPrototypes/basic.ll
 create mode 100644 test/Transforms/TailCallElim/notail.ll
 create mode 100644 test/Transforms/Util/simplify-dbg-declare-load.ll
 create mode 100644 test/Verifier/align-md.ll
 create mode 100644 test/Verifier/atomics.ll
 create mode 100644 test/Verifier/dbg-null-retained-type.ll
 create mode 100644 test/Verifier/dereferenceable-md.ll
 create mode 100644 test/Verifier/func-dbg.ll
 create mode 100644 test/Verifier/invalid-eh.ll
 delete mode 100644 test/Verifier/invalid-patchable-statepoint.ll
 create mode 100644 test/Verifier/metadata-function-dbg.ll
 create mode 100644 test/Verifier/operand-bundles.ll
 create mode 100644 test/Verifier/token1.ll
 create mode 100644 test/Verifier/token2.ll
 create mode 100644 test/Verifier/token3.ll
 create mode 100644 test/Verifier/token4.ll
 create mode 100644 test/Verifier/token5.ll
 create mode 100644 test/Verifier/token6.ll
 create mode 100644 test/Verifier/token7.ll
 create mode 100644 test/tools/dsymutil/ARM/dummy-debug-map-amr64.map
 create mode 100644 test/tools/dsymutil/ARM/empty-map.test
 create mode 100644 test/tools/dsymutil/ARM/fat-arch-name.test
 create mode 100644 test/tools/dsymutil/ARM/fat-arch-not-found.test
 create mode 100644 test/tools/dsymutil/ARM/inlined-low_pc.c
 create mode 100644 test/tools/dsymutil/ARM/lit.local.cfg
 create mode 100755 test/tools/dsymutil/Inputs/absolute_sym.macho.i386
 create mode 100644 test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o
 create mode 100755 test/tools/dsymutil/Inputs/basic.macho.i386
 create mode 100644 test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o
 create mode 100644 test/tools/dsymutil/Inputs/dead-stripped/1.o
 create mode 100644 test/tools/dsymutil/Inputs/empty_range/1.o
 create mode 100755 test/tools/dsymutil/Inputs/fat-test.arm.dylib
 create mode 100644 test/tools/dsymutil/Inputs/fat-test.arm.o
 create mode 100644 test/tools/dsymutil/Inputs/fat-test.c
 create mode 100755 test/tools/dsymutil/Inputs/fat-test.dylib
 create mode 100644 test/tools/dsymutil/Inputs/fat-test.o
 create mode 100644 test/tools/dsymutil/Inputs/inlined-low_pc/1.o
 create mode 100644 test/tools/dsymutil/Inputs/libfat-test.a
 create mode 100644 test/tools/dsymutil/Inputs/mismatch/1.o
 create mode 100644 test/tools/dsymutil/Inputs/mismatch/mismatch.pcm
 create mode 100644 test/tools/dsymutil/Inputs/modules/1.o
 create mode 100644 test/tools/dsymutil/Inputs/modules/Bar.pcm
 create mode 100644 test/tools/dsymutil/Inputs/modules/Foo.pcm
 create mode 100644 test/tools/dsymutil/Inputs/odr-anon-namespace/1.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-anon-namespace/2.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/1.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/2.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-member-functions/3.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-uniquing/1.o
 create mode 100644 test/tools/dsymutil/Inputs/odr-uniquing/2.o
 create mode 100644 test/tools/dsymutil/Inputs/submodules/1.o
 create mode 100644 test/tools/dsymutil/Inputs/submodules/Parent.pcm
 create mode 100644 test/tools/dsymutil/X86/basic-linking-bundle.test
 create mode 100644 test/tools/dsymutil/X86/custom-line-table.test
 create mode 100644 test/tools/dsymutil/X86/dead-stripped.cpp
 create mode 100644 test/tools/dsymutil/X86/dsym-companion.test
 create mode 100644 test/tools/dsymutil/X86/dummy-debug-map.map
 create mode 100644 test/tools/dsymutil/X86/empty_range.s
 create mode 100644 test/tools/dsymutil/X86/fat-archive-input-i386.test
 create mode 100644 test/tools/dsymutil/X86/fat-object-input-x86_64.test
 create mode 100644 test/tools/dsymutil/X86/fat-object-input-x86_64h.test
 create mode 100644 test/tools/dsymutil/X86/mismatch.m
 create mode 100644 test/tools/dsymutil/X86/modules.m
 create mode 100644 test/tools/dsymutil/X86/multiple-inputs.test
 create mode 100644 test/tools/dsymutil/X86/odr-anon-namespace.cpp
 create mode 100644 test/tools/dsymutil/X86/odr-member-functions.cpp
 create mode 100644 test/tools/dsymutil/X86/odr-uniquing.cpp
 create mode 100644 test/tools/dsymutil/X86/submodules.m
 create mode 100644 test/tools/dsymutil/absolute_symbol.test
 create mode 100644 test/tools/dsymutil/arch-option.test
 create mode 100644 test/tools/dsymutil/archive-timestamp.test
 create mode 100644 test/tools/dsymutil/dump-symtab.test
 create mode 100644 test/tools/dsymutil/fat-binary-output.test
 delete mode 100644 test/tools/gold/Inputs/linkonce-weak.ll
 rename test/tools/gold/{ => PowerPC}/lit.local.cfg (75%)
 rename test/tools/gold/{ => PowerPC}/mtriple.ll (100%)
 rename test/tools/gold/{ => X86}/Inputs/alias-1.ll (100%)
 create mode 100644 test/tools/gold/X86/Inputs/available-externally.ll
 rename test/tools/gold/{ => X86}/Inputs/bcsection.s (100%)
 rename test/tools/gold/{ => X86}/Inputs/comdat.ll (69%)
 create mode 100644 test/tools/gold/X86/Inputs/comdat2.ll
 rename test/tools/gold/{ => X86}/Inputs/common.ll (100%)
 create mode 100644 test/tools/gold/X86/Inputs/ctors2.ll
 rename test/tools/gold/{ => X86}/Inputs/drop-debug.bc (100%)
 create mode 100644 test/tools/gold/X86/Inputs/drop-linkage.ll
 rename test/tools/gold/{ => X86}/Inputs/invalid.bc (100%)
 rename test/tools/gold/{ => X86}/Inputs/linker-script.export (100%)
 create mode 100644 test/tools/gold/X86/Inputs/linkonce-weak.ll
 rename test/tools/gold/{ => X86}/Inputs/pr19901-1.ll (100%)
 create mode 100644 test/tools/gold/X86/Inputs/resolve-to-alias.ll
 create mode 100644 test/tools/gold/X86/Inputs/thinlto.ll
 create mode 100644 test/tools/gold/X86/Inputs/type-merge.ll
 create mode 100644 test/tools/gold/X86/Inputs/type-merge2.ll
 rename test/tools/gold/{ => X86}/Inputs/weak.ll (100%)
 rename test/tools/gold/{ => X86}/alias.ll (92%)
 create mode 100644 test/tools/gold/X86/alias2.ll
 rename test/tools/gold/{linkonce-weak.ll => X86/available-externally.ll} (54%)
 rename test/tools/gold/{ => X86}/bad-alias.ll (64%)
 create mode 100644 test/tools/gold/X86/bcsection.ll
 rename test/tools/gold/{ => X86}/coff.ll (100%)
 create mode 100644 test/tools/gold/X86/comdat.ll
 create mode 100644 test/tools/gold/X86/comdat2.ll
 rename test/tools/gold/{ => X86}/common.ll (100%)
 create mode 100644 test/tools/gold/X86/ctors.ll
 create mode 100644 test/tools/gold/X86/ctors2.ll
 create mode 100644 test/tools/gold/X86/disable-verify.ll
 rename test/tools/gold/{ => X86}/drop-debug.ll (100%)
 create mode 100644 test/tools/gold/X86/drop-linkage.ll
 rename test/tools/gold/{ => X86}/emit-llvm.ll (78%)
 rename test/tools/gold/{ => X86}/invalid.ll (100%)
 rename test/tools/gold/{ => X86}/linker-script.ll (100%)
 create mode 100644 test/tools/gold/X86/linkonce-weak.ll
 create mode 100644 test/tools/gold/X86/lit.local.cfg
 rename test/tools/gold/{ => X86}/no-map-whole-file.ll (100%)
 rename test/tools/gold/{ => X86}/opt-level.ll (100%)
 create mode 100644 test/tools/gold/X86/parallel.ll
 rename test/tools/gold/{ => X86}/pr19901.ll (100%)
 create mode 100644 test/tools/gold/X86/pr25907.ll
 create mode 100644 test/tools/gold/X86/pr25915.ll
 rename test/tools/gold/{ => X86}/remarks.ll (69%)
 create mode 100644 test/tools/gold/X86/resolve-to-alias.ll
 rename test/tools/gold/{ => X86}/slp-vectorize.ll (100%)
 rename test/tools/gold/{ => X86}/stats.ll (100%)
 create mode 100644 test/tools/gold/X86/thinlto.ll
 create mode 100644 test/tools/gold/X86/type-merge.ll
 create mode 100644 test/tools/gold/X86/type-merge2.ll
 create mode 100644 test/tools/gold/X86/unnamed-addr.ll
 rename test/tools/gold/{ => X86}/vectorize.ll (100%)
 rename test/tools/gold/{ => X86}/weak.ll (100%)
 delete mode 100644 test/tools/gold/bcsection.ll
 delete mode 100644 test/tools/gold/comdat.ll
 create mode 100644 test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo
 create mode 100644 test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo
 create mode 100644 test/tools/llvm-dwp/Inputs/simple/types/a.dwo
 create mode 100644 test/tools/llvm-dwp/Inputs/simple/types/b.dwo
 create mode 100644 test/tools/llvm-dwp/Inputs/type_dedup/a.dwo
 create mode 100644 test/tools/llvm-dwp/Inputs/type_dedup/b.dwo
 create mode 100644 test/tools/llvm-dwp/X86/lit.local.cfg
 create mode 100644 test/tools/llvm-dwp/X86/simple.test
 create mode 100644 test/tools/llvm-dwp/X86/type_dedup.test
 create mode 100644 test/tools/llvm-lto/Inputs/thinlto.ll
 create mode 100644 test/tools/llvm-lto/thinlto.ll
 create mode 100644 test/tools/llvm-mc/basic.test
 create mode 100644 test/tools/llvm-mc/fatal_warnings.test
 create mode 100644 test/tools/llvm-mc/lit.local.cfg
 create mode 100644 test/tools/llvm-mc/no_warnings.test
 create mode 100644 test/tools/llvm-nm/X86/IRobj.test
 create mode 100644 test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64
 create mode 100644 test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64
 create mode 100644 test/tools/llvm-nm/X86/externalonly.test
 create mode 100644 test/tools/llvm-nm/X86/groupingflags.test
 create mode 100644 test/tools/llvm-nm/X86/posixMachO.test
 create mode 100644 test/tools/llvm-nm/lit.local.cfg
 create mode 100644 test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test
 create mode 100644 test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64
 create mode 100644 test/tools/llvm-objdump/Inputs/libbogus1.a
 create mode 100644 test/tools/llvm-objdump/Inputs/libbogus2.a
 create mode 100644 test/tools/llvm-objdump/Inputs/libbogus3.a
 create mode 100644 test/tools/llvm-objdump/Inputs/section-filter.obj
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj
 create mode 100755 test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386
 create mode 100755 test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho
 create mode 100644 test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho
 create mode 100644 test/tools/llvm-objdump/X86/coff-dis-internal.test
 create mode 100644 test/tools/llvm-objdump/X86/coff-disassemble-export.test
 create mode 100644 test/tools/llvm-objdump/X86/disassemble-data.test
 create mode 100644 test/tools/llvm-objdump/X86/malformed-machos.test
 create mode 100644 test/tools/llvm-objdump/eh_frame-arm64.test
 create mode 100644 test/tools/llvm-objdump/malformed-archives.test
 create mode 100644 test/tools/llvm-objdump/section-filter.test
 create mode 100644 test/tools/llvm-profdata/Inputs/basic.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/compat.profdata.v2
 create mode 100644 test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
 create mode 100644 test/tools/llvm-profdata/Inputs/inline-samples.afdo
 create mode 100644 test/tools/llvm-profdata/Inputs/overflow-instr.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/overflow-sample.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/text-format-errors.text.bin
 create mode 100644 test/tools/llvm-profdata/Inputs/vp-malform.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/vp-malform2.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/vp-truncate.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata
 create mode 100644 test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata
 create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext
 create mode 100644 test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext
 create mode 100644 test/tools/llvm-profdata/gcc-gcov-sample-profile.test
 create mode 100644 test/tools/llvm-profdata/inline-samples.test
 create mode 100644 test/tools/llvm-profdata/overflow-instr.test
 create mode 100644 test/tools/llvm-profdata/overflow-sample.test
 delete mode 100644 test/tools/llvm-profdata/overflow.proftext
 create mode 100644 test/tools/llvm-profdata/text-dump.test
 create mode 100644 test/tools/llvm-profdata/value-prof.proftext
 create mode 100644 test/tools/llvm-profdata/weight-instr.test
 create mode 100644 test/tools/llvm-profdata/weight-sample.test
 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386
 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc
 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64
 create mode 100644 test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64
 create mode 100755 test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel
 create mode 100755 test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
 create mode 100755 test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
 create mode 100644 test/tools/llvm-readobj/amdgpu-elf-defs.test
 create mode 100644 test/tools/llvm-readobj/basic.test
 create mode 100644 test/tools/llvm-readobj/elf-gnuhash.test
 create mode 100644 test/tools/llvm-readobj/elf-versioninfo.test
 create mode 100644 test/tools/llvm-readobj/mips-rld-map-rel.test
 create mode 100644 test/tools/llvm-size/basic.test
 create mode 100644 test/tools/llvm-split/alias.ll
 create mode 100644 test/tools/llvm-split/comdat.ll
 create mode 100644 test/tools/llvm-split/function.ll
 create mode 100644 test/tools/llvm-split/global.ll
 create mode 100644 test/tools/llvm-split/internal.ll
 create mode 100644 test/tools/llvm-split/unnamed.ll
 create mode 100755 test/tools/llvm-symbolizer/Inputs/addr.exe
 create mode 100644 test/tools/llvm-symbolizer/Inputs/addr.inp
 create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp
 create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe
 create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-exports.cpp
 create mode 100644 test/tools/llvm-symbolizer/Inputs/coff-exports.exe
 create mode 100644 test/tools/llvm-symbolizer/coff-dwarf.test
 create mode 100644 test/tools/llvm-symbolizer/coff-exports.test
 delete mode 100644 test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
 create mode 100644 test/tools/llvm-symbolizer/sym.test
 create mode 100644 test/tools/sancov/Inputs/blacklist.txt
 create mode 100644 test/tools/sancov/Inputs/foo.cpp
 create mode 100755 test/tools/sancov/Inputs/test-linux_x86_64
 create mode 100644 test/tools/sancov/Inputs/test-linux_x86_64-1.sancov
 create mode 100644 test/tools/sancov/Inputs/test-linux_x86_64.sancov
 create mode 100644 test/tools/sancov/Inputs/test.cpp
 create mode 100644 test/tools/sancov/blacklist.test
 create mode 100644 test/tools/sancov/covered_functions.test
 create mode 100644 test/tools/sancov/not_covered_functions.test
 create mode 100644 test/tools/sancov/print.test
 create mode 100644 tools/dsymutil/MachOUtils.cpp
 create mode 100644 tools/dsymutil/MachOUtils.h
 create mode 100644 tools/dsymutil/NonRelocatableStringpool.h
 delete mode 100644 tools/llvm-ar/install_symlink.cmake
 create mode 100644 tools/llvm-as-fuzzer/CMakeLists.txt
 create mode 100644 tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp
 create mode 100644 tools/llvm-dwp/CMakeLists.txt
 create mode 100644 tools/llvm-dwp/LLVMBuild.txt
 create mode 100644 tools/llvm-dwp/Makefile
 create mode 100644 tools/llvm-dwp/llvm-dwp.cpp
 create mode 100644 tools/llvm-mc-fuzzer/CMakeLists.txt
 create mode 100644 tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp
 create mode 100644 tools/llvm-readobj/COFFImportDumper.cpp
 create mode 100644 tools/llvm-split/CMakeLists.txt
 rename tools/{macho-dump => llvm-split}/LLVMBuild.txt (79%)
 rename tools/{macho-dump => llvm-split}/Makefile (71%)
 create mode 100644 tools/llvm-split/llvm-split.cpp
 delete mode 100644 tools/llvm-symbolizer/LLVMSymbolize.cpp
 delete mode 100644 tools/llvm-symbolizer/LLVMSymbolize.h
 delete mode 100644 tools/macho-dump/CMakeLists.txt
 delete mode 100644 tools/macho-dump/macho-dump.cpp
 create mode 100644 tools/sancov/CMakeLists.txt
 create mode 100644 tools/sancov/Makefile
 create mode 100644 tools/sancov/sancov.cc
 create mode 100644 tools/xcode-toolchain/CMakeLists.txt
 create mode 100644 unittests/ADT/RangeAdapterTest.cpp
 create mode 100644 unittests/Analysis/ValueTrackingTest.cpp
 create mode 100644 unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
 create mode 100644 unittests/ExecutionEngine/Orc/GlobalMappingLayerTest.cpp
 create mode 100644 unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
 create mode 100644 unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
 create mode 100644 unittests/ProfileData/SampleProfTest.cpp
 create mode 100644 unittests/Support/ReplaceFileTest.cpp
 create mode 100644 unittests/Support/ThreadPool.cpp
 create mode 100644 unittests/Support/TimerTest.cpp
 create mode 100644 unittests/Support/TrailingObjectsTest.cpp
 create mode 100644 utils/TableGen/Attributes.cpp
 create mode 100755 utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest
 create mode 100644 utils/lit/tests/Inputs/googletest-timeout/lit.cfg
 create mode 100644 utils/lit/tests/Inputs/shtest-timeout/infinite_loop.py
 create mode 100644 utils/lit/tests/Inputs/shtest-timeout/lit.cfg
 create mode 100644 utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py
 create mode 100644 utils/lit/tests/Inputs/shtest-timeout/short.py
 create mode 100644 utils/lit/tests/Inputs/shtest-timeout/slow.py
 create mode 100644 utils/lit/tests/googletest-timeout.py
 create mode 100644 utils/lit/tests/shtest-timeout.py
 create mode 100755 utils/release/build_llvm_package.bat
 create mode 100644 utils/schedcover.py

diff --git a/.clang-tidy b/.clang-tidy
index 3186da43d43d..97fbe23333bd 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1 +1,13 @@
-Checks: '-*,clang-diagnostic-*,llvm-*,misc-*'
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming'
+CheckOptions:
+  - key:             readability-identifier-naming.ClassCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.EnumCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.FunctionCase
+    value:           lowerCase
+  - key:             readability-identifier-naming.UnionCase
+    value:           CamelCase
+  - key:             readability-identifier-naming.VariableCase
+    value:           CamelCase
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 78fc78b11781..3d2093fde634 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -26,13 +26,41 @@ else()
   set(cmake_3_2_USES_TERMINAL USES_TERMINAL)
 endif()
 
-project(LLVM)
+if(NOT DEFINED LLVM_VERSION_MAJOR)
+  set(LLVM_VERSION_MAJOR 3)
+endif()
+if(NOT DEFINED LLVM_VERSION_MINOR)
+  set(LLVM_VERSION_MINOR 8)
+endif()
+if(NOT DEFINED LLVM_VERSION_PATCH)
+  set(LLVM_VERSION_PATCH 0)
+endif()
+if(NOT DEFINED LLVM_VERSION_SUFFIX)
+  set(LLVM_VERSION_SUFFIX svn)
+endif()
+
+if (POLICY CMP0048)
+  cmake_policy(SET CMP0048 NEW)
+  set(cmake_3_0_PROJ_VERSION
+    VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH})
+  set(cmake_3_0_LANGUAGES LANGUAGES)
+endif()
+
+if (NOT PACKAGE_VERSION)
+  set(PACKAGE_VERSION
+    "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}")
+endif()
+
+project(LLVM
+  ${cmake_3_0_PROJ_VERSION}
+  ${cmake_3_0_LANGUAGES}
+  C CXX ASM)
 
 # The following only works with the Ninja generator in CMake >= 3.0.
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
   "Define the maximum number of concurrent compilation jobs.")
 if(LLVM_PARALLEL_COMPILE_JOBS)
-  if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja$")
+  if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
     message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.")
   else()
     set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
@@ -43,7 +71,7 @@ endif()
 set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
   "Define the maximum number of concurrent link jobs.")
 if(LLVM_PARALLEL_LINK_JOBS)
-  if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja$")
+  if(CMAKE_VERSION VERSION_LESS 3.0 OR NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
     message(WARNING "Job pooling is only available with Ninja generators and CMake 3.0 and later.")
   else()
     set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
@@ -58,15 +86,9 @@ set(CMAKE_MODULE_PATH
   "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
   )
 
-set(LLVM_VERSION_MAJOR 3)
-set(LLVM_VERSION_MINOR 7)
-set(LLVM_VERSION_PATCH 1)
-set(LLVM_VERSION_SUFFIX "")
-
-if (NOT PACKAGE_VERSION)
-  set(PACKAGE_VERSION
-    "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}")
-endif()
+# Generate a CompilationDatabase (compile_commands.json file) for our build,
+# for use by clang_complete, YouCompleteMe, etc.
+set(CMAKE_EXPORT_COMPILE_COMMANDS 1)
 
 option(LLVM_INSTALL_UTILS "Include utility binaries in the 'install' target." OFF)
 
@@ -152,6 +174,11 @@ endif()
 
 string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
 
+if (CMAKE_BUILD_TYPE AND
+    NOT uppercase_CMAKE_BUILD_TYPE MATCHES "^(DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL)$")
+  message(FATAL_ERROR "Invalid value for CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
+endif()
+
 set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
 
 # They are used as destination of target generators.
@@ -278,6 +305,9 @@ endif( LLVM_USE_INTEL_JITEVENTS )
 option(LLVM_USE_OPROFILE
   "Use opagent JIT interface to inform OProfile about JIT code" OFF)
 
+option(LLVM_EXTERNALIZE_DEBUGINFO
+  "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF)
+
 # If enabled, verify we are on a platform that supports oprofile.
 if( LLVM_USE_OPROFILE )
   if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
@@ -304,7 +334,7 @@ endif()
 # Define the default arguments to use with 'lit', and an option for the user to
 # override.
 set(LIT_ARGS_DEFAULT "-sv")
-if (MSVC OR XCODE)
+if (MSVC_IDE OR XCODE)
   set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
 endif()
 set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit")
@@ -333,6 +363,7 @@ option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
 option(LLVM_BUILD_TESTS
   "Build LLVM unit tests. If OFF, just generate build targets." OFF)
 option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
+option(LLVM_INCLUDE_GO_TESTS "Include the Go bindings tests in test build targets." ON)
 
 option (LLVM_BUILD_DOCS "Build the llvm documentation." OFF)
 option (LLVM_INCLUDE_DOCS "Generate build targets for llvm documentation." ON)
@@ -342,9 +373,25 @@ option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
 option (LLVM_BUILD_EXTERNAL_COMPILER_RT
   "Build compiler-rt as an external project." OFF)
 
-option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" OFF)
-option(LLVM_DYLIB_EXPORT_ALL "Export all symbols from libLLVM.dylib (default is C API only" OFF)
-option(LLVM_DISABLE_LLVM_DYLIB_ATEXIT "Disable llvm-shlib's atexit destructors." ON)
+# You can configure which libraries from LLVM you want to include in the
+# shared library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited
+# list of LLVM components. All component names handled by llvm-config are valid.
+if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
+  set(LLVM_DYLIB_COMPONENTS "all" CACHE STRING
+    "Semicolon-separated list of components to include in libLLVM, or \"all\".")
+endif()
+option(LLVM_LINK_LLVM_DYLIB "Link tools against the libllvm dynamic library" OFF)
+option(LLVM_BUILD_LLVM_C_DYLIB "Build libllvm-c re-export library (Darwin Only)" OFF)
+set(LLVM_BUILD_LLVM_DYLIB_default OFF)
+if(LLVM_LINK_LLVM_DYLIB OR LLVM_BUILD_LLVM_C_DYLIB)
+  set(LLVM_BUILD_LLVM_DYLIB_default ON)
+endif()
+option(LLVM_BUILD_LLVM_DYLIB "Build libllvm dynamic library" ${LLVM_BUILD_LLVM_DYLIB_default})
+set(LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT ON)
+if (LLVM_LINK_LLVM_DYLIB)
+  set(LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT OFF)
+endif()
+option(LLVM_DISABLE_LLVM_DYLIB_ATEXIT "Disable llvm-shlib's atexit destructors." ${LLVM_DISABLE_LLVM_DYLIB_ATEXIT_DEFAULT})
 if(LLVM_DISABLE_LLVM_DYLIB_ATEXIT)
   set(DISABLE_LLVM_DYLIB_ATEXIT 1)
 endif()
@@ -525,6 +572,15 @@ else(UNIX)
   endif(NOT DEFINED CMAKE_INSTALL_RPATH)
 endif()
 
+if(APPLE AND DARWIN_LTO_LIBRARY)
+  set(CMAKE_EXE_LINKER_FLAGS
+    "${CMAKE_EXE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}")
+  set(CMAKE_SHARED_LINKER_FLAGS
+    "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}")
+  set(CMAKE_MODULE_LINKER_FLAGS
+    "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-lto_library -Wl,${DARWIN_LTO_LIBRARY}")
+endif()
+
 # Work around a broken bfd ld behavior. When linking a binary with a
 # foo.so library, it will try to find any library that foo.so uses and
 # check its symbols. This is wasteful (the check was done when foo.so
@@ -543,6 +599,10 @@ include_directories( ${LLVM_INCLUDE_DIR} ${LLVM_MAIN_INCLUDE_DIR})
 if(LLVM_USE_HOST_TOOLS)
   include(CrossCompile)
 endif(LLVM_USE_HOST_TOOLS)
+if(LLVM_TARGET_IS_CROSSCOMPILE_HOST)
+# Dummy use to avoid CMake Wraning: Manually-specified variables were not used
+# (this is a variable that CrossCompile sets on recursive invocations)
+endif()
 
 if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
   # On FreeBSD, /usr/local/* is not used by default. In order to build LLVM
@@ -559,6 +619,17 @@ endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
 # use export_executable_symbols(target).
 set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
 
+set(LLVM_PROFDATA_FILE "" CACHE FILEPATH
+  "Profiling data file to use when compiling in order to improve runtime performance.")
+
+if(LLVM_PROFDATA_FILE AND EXISTS ${LLVM_PROFDATA_FILE})
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+    add_definitions("-fprofile-instr-use=${LLVM_PROFDATA_FILE}")
+  else()
+    message(FATAL_ERROR "LLVM_PROFDATA_FILE can only be specified when compiling with clang")
+  endif()
+endif()
+
 include(AddLLVM)
 include(TableGen)
 
@@ -619,6 +690,13 @@ if( LLVM_INCLUDE_EXAMPLES )
 endif()
 
 if( LLVM_INCLUDE_TESTS )
+  if(EXISTS ${LLVM_MAIN_SRC_DIR}/projects/test-suite AND TARGET clang)
+    include(LLVMExternalProjectUtils)
+    llvm_ExternalProject_Add(test-suite ${LLVM_MAIN_SRC_DIR}/projects/test-suite
+      USE_TOOLCHAIN
+      EXCLUDE_FROM_ALL
+      NO_INSTALL)
+  endif()
   add_subdirectory(test)
   add_subdirectory(unittests)
   if (MSVC)
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index a15f2919cc78..da0e7a471738 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -31,7 +31,7 @@ D: Config, ADT, Support, inlining & related passes, SROA/mem2reg & related passe
 
 N: Evan Cheng
 E: evan.cheng@apple.com
-D: ARM target, parts of code generator not covered by someone else
+D: parts of code generator not covered by someone else
 
 N: Eric Christopher
 E: echristo@gmail.com
@@ -53,10 +53,6 @@ N: Quentin Colombet
 E: qcolombet@apple.com
 D: Register allocators
 
-N: Anshuman Dasgupta
-E: adasgupt@codeaurora.org
-D: Hexagon Backend
-
 N: Duncan P. N. Exon Smith
 E: dexonsmith@apple.com
 D: Branch weights and BlockFrequencyInfo
@@ -93,10 +89,6 @@ N: Lang Hames
 E: lhames@gmail.com
 D: MCJIT, RuntimeDyld and JIT event listeners
 
-N: David Majnemer
-E: david.majnemer@gmail.com
-D: IR Constant Folder
-
 N: Galina Kistanova
 E: gkistanova@gmail.com
 D: LLVM Buildbot
@@ -118,9 +110,17 @@ E: sabre@nondot.org
 W: http://nondot.org/~sabre/
 D: Everything not covered by someone else
 
+N: David Majnemer
+E: david.majnemer@gmail.com
+D: IR Constant Folder, InstCombine
+
+N: Dylan McKay
+E: dylanmckay34@gmail.com
+D: AVR Backend
+
 N: Tim Northover
 E: t.p.northover@gmail.com
-D: AArch64 backend
+D: AArch64 backend, misc ARM backend
 
 N: Diego Novillo
 E: dnovillo@google.com
@@ -134,14 +134,18 @@ N: Richard Osborne
 E: richard@xmos.com
 D: XCore Backend
 
+N: Krzysztof Parzyszek
+E: kparzysz@codeaurora.org
+D: Hexagon Backend
+
+N: Paul Robinson
+E: paul_robinson@playstation.sony.com
+D: Sony PlayStation®4 support
+
 N: Chad Rosier
 E: mcrosier@codeaurora.org
 D: Fast-Isel
 
-N: Alex Rosenberg
-E: alexr@leftfield.org
-D: Sony PlayStation®4 support
-
 N: Nadav Rotem
 E: nrotem@apple.com
 D: X86 Backend, Loop Vectorizer
diff --git a/CREDITS.TXT b/CREDITS.TXT
index fd5119f01111..da1fb010e35b 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -465,54 +465,3 @@ N: Bob Wilson
 E: bob.wilson@acm.org
 D: Advanced SIMD (NEON) support in the ARM backend.
 
-N: Alexey Bataev
-E: a.bataev@hotmail.com
-D: Clang OpenMP implementation
-
-N: Andrey Bokhanko
-E: andreybokhanko@gmail.com 
-D: Clang OpenMP implementation
-
-N: Carlo Bertolli
-E: cbertol@us.ibm.com 
-D: Clang OpenMP implementation
-
-N: Eric Stotzer
-E: estotzer@ti.com 
-D: Clang OpenMP implementation
-
-N: Kelvin Li
-E: kkwli0@gmail.com 
-D: Clang OpenMP implementation
-
-N: Samuel Antao
-E: sfantao@us.ibm.com 
-D: Clang OpenMP implementation
-
-N: Sergey Ostanevich
-E: sergos.gnu@gmail.com 
-D: Clang OpenMP implementation
-
-N: Alexandre Eichenberger
-E: alexe@us.ibm.com 
-D: Clang OpenMP implementation
-
-N: Guansong Zhang
-E: guansong.zhang@amd.com 
-D: Clang OpenMP implementation
-
-N: Sunita Chandrasekaran
-E: sunisg123@gmail.com  
-D: Clang OpenMP implementation
-
-N: Michael Wong
-E: fraggamuffin@gmail.com 
-D: Clang OpenMP implementation
-
-N: Alexander Mussman
-E: alexander.musman@intel.com 
-D: Clang OpenMP implementation
-
-N: Kevin O'Brien
-E: caomhin@us.ibm.com 
-D: Clang OpenMP implementation
\ No newline at end of file
diff --git a/Makefile.config.in b/Makefile.config.in
index 9df9834f4dbe..5ca264320c51 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -396,8 +396,8 @@ endif
 BINUTILS_INCDIR := @BINUTILS_INCDIR@
 
 # Optional flags supported by the compiler
-# -Wno-missing-field-initializers
-NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
+# -Wmissing-field-initializers
+MISSING_FIELD_INITIALIZERS = @MISSING_FIELD_INITIALIZERS@
 # -Wno-variadic-macros
 NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
 # -Wcovered-switch-default
diff --git a/Makefile.rules b/Makefile.rules
index 24cac3b37659..a67aef7c97aa 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -486,6 +486,8 @@ endif
 ObjRootDir  := $(PROJ_OBJ_DIR)/$(BuildMode)
 ObjDir      := $(ObjRootDir)
 LibDir      := $(PROJ_OBJ_ROOT)/$(BuildMode)/lib
+LibexecDir  := $(PROJ_OBJ_ROOT)/$(BuildMode)/libexec
+ShareDir    := $(PROJ_OBJ_ROOT)/$(BuildMode)/share
 ToolDir     := $(PROJ_OBJ_ROOT)/$(BuildMode)/bin
 ExmplDir    := $(PROJ_OBJ_ROOT)/$(BuildMode)/examples
 LLVMLibDir  := $(LLVM_OBJ_ROOT)/$(BuildMode)/lib
@@ -686,7 +688,7 @@ endif
 CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
                      $(EXTRA_OPTIONS) $(COVERED_SWITCH_DEFAULT) \
                      $(NO_UNINITIALIZED) $(NO_MAYBE_UNINITIALIZED) \
-                     $(NO_MISSING_FIELD_INITIALIZERS) $(NO_COMMENT)
+                     $(MISSING_FIELD_INITIALIZERS) $(NO_COMMENT)
 # Enable cast-qual for C++; the workaround is to use const_cast.
 CXX.Flags += -Wcast-qual
 
@@ -857,6 +859,7 @@ $(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $
 
 .PRECIOUS: $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir
 .PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir
+.PRECIOUS: $(LibexecDir)/.dir $(ShareDir)/.dir
 
 #---------------------------------------------------------
 # Collect the object directories (as there may be more
@@ -2144,6 +2147,8 @@ printvars::
 	$(Echo) "SrcMakefiles : " '$(SrcMakefiles)'
 	$(Echo) "ObjDir       : " '$(ObjDir)'
 	$(Echo) "LibDir       : " '$(LibDir)'
+	$(Echo) "LibexecDir   : " '$(LibexecDir)'
+	$(Echo) "ShareDir     : " '$(ShareDir)'
 	$(Echo) "ToolDir      : " '$(ToolDir)'
 	$(Echo) "ExmplDir     : " '$(ExmplDir)'
 	$(Echo) "Sources      : " '$(Sources)'
diff --git a/README.txt b/README.txt
index 6358a0684211..7cfb1640995f 100644
--- a/README.txt
+++ b/README.txt
@@ -13,5 +13,5 @@ assistance with LLVM, and in particular docs/GettingStarted.rst for getting
 started with LLVM and docs/README.txt for an overview of LLVM's
 documentation setup.
 
-If you're writing a package for LLVM, see docs/Packaging.rst for our
+If you are writing a package for LLVM, see docs/Packaging.rst for our
 suggestions.
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index af57712b57c5..02ab161e3b03 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -32,12 +32,12 @@ dnl===-----------------------------------------------------------------------===
 dnl Initialize autoconf and define the package name, version number and
 dnl address for reporting bugs.
 
-AC_INIT([LLVM],[3.7.1],[http://llvm.org/bugs/])
+AC_INIT([LLVM],[3.8.0svn],[http://llvm.org/bugs/])
 
 LLVM_VERSION_MAJOR=3
-LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=1
-LLVM_VERSION_SUFFIX=
+LLVM_VERSION_MINOR=8
+LLVM_VERSION_PATCH=0
+LLVM_VERSION_SUFFIX=svn
 
 AC_DEFINE_UNQUOTED([LLVM_VERSION_MAJOR], $LLVM_VERSION_MAJOR, [Major version of the LLVM API])
 AC_DEFINE_UNQUOTED([LLVM_VERSION_MINOR], $LLVM_VERSION_MINOR, [Minor version of the LLVM API])
@@ -74,7 +74,7 @@ if test ${srcdir} != "." ; then
 fi
 
 dnl Quit if it is an in-source build
-if test ${srcdir} == "." ; then
+if test ${srcdir} = "." ; then
   AC_MSG_ERROR([In-source builds are not allowed. Please configure from a separate build directory!])
 fi
 
@@ -133,6 +133,7 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([[#if ! __clang__
                                      llvm_cv_cxx_compiler=gcc, [])])
 AC_LANG_POP([C++])
 AC_MSG_RESULT([${llvm_cv_cxx_compiler}])
+AC_SUBST(CXX_COMPILER,$llvm_cv_cxx_compiler)
 
 dnl Configure all of the projects present in our source tree. While we could
 dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
@@ -430,6 +431,7 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
   arm64*-*)               llvm_cv_target_arch="AArch64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
+  avr-*)                  llvm_cv_target_arch="AVR" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -467,6 +469,7 @@ case $host in
   arm64*-*)               host_arch="AArch64" ;;
   arm*-*)                 host_arch="ARM" ;;
   aarch64*-*)             host_arch="AArch64" ;;
+  avr-*)                  host_arch="AVR" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -800,6 +803,7 @@ else
     PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
     x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
     ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    AVR)         AC_SUBST(TARGET_HAS_JIT,0) ;;
     Mips)        AC_SUBST(TARGET_HAS_JIT,1) ;;
     XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
     MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
@@ -1339,7 +1343,7 @@ AC_DEFINE_UNQUOTED(DEFAULT_SYSROOT,"$withval",
 AC_ARG_WITH(clang-default-openmp-runtime,
   AS_HELP_STRING([--with-clang-default-openmp-runtime],
     [The default OpenMP runtime for Clang.]),,
-    withval="libgomp")
+    withval="libomp")
 AC_DEFINE_UNQUOTED(CLANG_DEFAULT_OPENMP_RUNTIME,"$withval",
                    [Default OpenMP runtime used by -fopenmp.])
 
@@ -1548,25 +1552,31 @@ AC_MSG_RESULT([ok])
 
 dnl Check optional compiler flags.
 AC_MSG_CHECKING([optional compiler flags])
-CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
-CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
-CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
-
-dnl GCC's potential uninitialized use analysis is weak and presents lots of
-dnl false positives, so disable it.
-NO_UNINITIALIZED=
-NO_MAYBE_UNINITIALIZED=
-if test "$GXX" = "yes"
-then
-  CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized])
-  dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are
-  dnl known to be uninitialized from cases which might be uninitialized.  We
-  dnl still want to catch the first kind of errors.
-  if test -z "$NO_MAYBE_UNINITIALIZED"
-  then
-    CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized])
-  fi
-fi
+case "$llvm_cv_cxx_compiler" in
+  clang)
+    CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
+    CXX_FLAG_CHECK(MISSING_FIELD_INITIALIZERS, [-Wmissing-field-initializers])
+    CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
+    ;;
+  gcc)
+    dnl If we're using gcc check for -Wno-missing-field-initializers as gcc will warn
+    dnl on plain open brace initializations. clang won't so use -Wmissing-field-initializers
+    dnl there.
+    CXX_FLAG_CHECK(MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
+    CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
+    CXX_FLAG_CHECK(COVERED_SWITCH_DEFAULT, [-Wcovered-switch-default])
+    CXX_FLAG_CHECK(NO_MAYBE_UNINITIALIZED, [-Wno-maybe-uninitialized])
+    dnl gcc 4.7 introduced -Wmaybe-uninitialized to distinguish cases which are
+    dnl known to be uninitialized from cases which might be uninitialized.  We
+    dnl still want to catch the first kind of errors.
+    if test -z "$NO_MAYBE_UNINITIALIZED"
+    then
+      CXX_FLAG_CHECK(NO_UNINITIALIZED, [-Wno-uninitialized])
+    fi
+    ;;
+  unknown)
+    ;;
+esac
 
 dnl Check for misbehaving -Wcomment (gcc-4.7 has this) and maybe add
 dnl -Wno-comment to the flags.
@@ -1587,7 +1597,7 @@ int main() { return 0; }
 AC_SUBST(NO_COMMENT, [$no_comment])
 CXXFLAGS="$llvm_cv_old_cxxflags"
 
-AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT])
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT])
 
 AC_ARG_WITH([python],
             [AS_HELP_STRING([--with-python], [path to python])],
@@ -1628,9 +1638,12 @@ dnl===-----------------------------------------------------------------------===
 
 AC_CHECK_LIB(m,sin)
 if test "$llvm_cv_os_type" = "MingW" ; then
+  dnl mingw-gcc's driver doesn't imply -lole32 by default so we may need this
+  dnl when being built with gcc for bootstrapping purposes.
   AC_CHECK_LIB(ole32, main)
   AC_CHECK_LIB(psapi, main)
   AC_CHECK_LIB(shell32, main)
+  AC_CHECK_LIB(uuid,main)
 fi
 
 dnl dlopen() is required for plugin support.
@@ -1786,7 +1799,6 @@ dnl Generally we're looking for POSIX headers.
 AC_HEADER_DIRENT
 AC_HEADER_MMAP_ANONYMOUS
 AC_HEADER_STAT
-AC_HEADER_SYS_WAIT
 AC_HEADER_TIME
 
 AC_LANG_PUSH([C++])
@@ -1798,7 +1810,6 @@ AC_LANG_POP([C++])
 
 AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h link.h])
 AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h])
-AC_CHECK_HEADERS([utime.h])
 AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
 AC_CHECK_HEADERS([sys/ioctl.h malloc/malloc.h mach/mach.h])
 AC_CHECK_HEADERS([valgrind/valgrind.h])
@@ -1876,10 +1887,9 @@ AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
 AC_CHECK_FUNCS([mktemp posix_spawn pread realpath sbrk setrlimit ])
 AC_CHECK_FUNCS([strerror strerror_r setenv ])
 AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
-AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev])
+AC_CHECK_FUNCS([setjmp longjmp writev])
 AC_CHECK_FUNCS([futimes futimens])
 AC_C_PRINTF_A
-AC_FUNC_RAND48
 
 dnl Check for arc4random accessible via AC_INCLUDES_DEFAULT.
 AC_CHECK_DECLS([arc4random])
@@ -2232,3 +2242,14 @@ AC_CONFIG_MAKEFILE(bindings/ocaml/Makefile.ocaml)
 
 dnl Finally, crank out the output
 AC_OUTPUT
+echo ""
+echo ""
+echo "################################################################################"
+echo "################################################################################"
+echo "The LLVM project has deprecated building with configure & make."
+echo "The autoconf-based makefile build system will be removed in the 3.9 release."
+echo ""
+echo "Please migrate to the CMake-based build system."
+echo "For more information see: http://llvm.org/docs/CMake.html"
+echo "################################################################################"
+echo "################################################################################"
diff --git a/autoconf/m4/rand48.m4 b/autoconf/m4/rand48.m4
deleted file mode 100644
index 76f08faad284..000000000000
--- a/autoconf/m4/rand48.m4
+++ /dev/null
@@ -1,12 +0,0 @@
-#
-# This function determins if the srand48,drand48,lrand48 functions are
-# available on this platform.
-#
-AC_DEFUN([AC_FUNC_RAND48],[
-AC_SINGLE_CXX_CHECK([ac_cv_func_rand48],   
-                    [srand48/lrand48/drand48], [<stdlib.h>],
-                    [srand48(0);lrand48();drand48();])
-if test "$ac_cv_func_rand48" = "yes" ; then
-AC_DEFINE([HAVE_RAND48],1,[Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h>])
-fi
-])
diff --git a/bindings/go/llvm/DIBuilderBindings.cpp b/bindings/go/llvm/DIBuilderBindings.cpp
index df5885de25c4..e767144bb322 100644
--- a/bindings/go/llvm/DIBuilderBindings.cpp
+++ b/bindings/go/llvm/DIBuilderBindings.cpp
@@ -74,23 +74,34 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction(
     LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
     const char *LinkageName, LLVMMetadataRef File, unsigned Line,
     LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition,
-    unsigned ScopeLine, unsigned Flags, int IsOptimized, LLVMValueRef Func) {
+    unsigned ScopeLine, unsigned Flags, int IsOptimized) {
   DIBuilder *D = unwrap(Dref);
   return wrap(D->createFunction(unwrap<DIScope>(Scope), Name, LinkageName,
                                 File ? unwrap<DIFile>(File) : nullptr, Line,
                                 unwrap<DISubroutineType>(CompositeType),
                                 IsLocalToUnit, IsDefinition, ScopeLine, Flags,
-                                IsOptimized, unwrap<Function>(Func)));
+                                IsOptimized));
 }
 
-LLVMMetadataRef LLVMDIBuilderCreateLocalVariable(
-    LLVMDIBuilderRef Dref, unsigned Tag, LLVMMetadataRef Scope,
-    const char *Name, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty,
-    int AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
+LLVMMetadataRef
+LLVMDIBuilderCreateAutoVariable(LLVMDIBuilderRef Dref, LLVMMetadataRef Scope,
+                                const char *Name, LLVMMetadataRef File,
+                                unsigned Line, LLVMMetadataRef Ty,
+                                int AlwaysPreserve, unsigned Flags) {
   DIBuilder *D = unwrap(Dref);
-  return wrap(D->createLocalVariable(
-      Tag, unwrap<DIScope>(Scope), Name, unwrap<DIFile>(File), Line,
-      unwrap<DIType>(Ty), AlwaysPreserve, Flags, ArgNo));
+  return wrap(D->createAutoVariable(unwrap<DIScope>(Scope), Name,
+                                    unwrap<DIFile>(File), Line,
+                                    unwrap<DIType>(Ty), AlwaysPreserve, Flags));
+}
+
+LLVMMetadataRef LLVMDIBuilderCreateParameterVariable(
+    LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
+    unsigned ArgNo, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty,
+    int AlwaysPreserve, unsigned Flags) {
+  DIBuilder *D = unwrap(Dref);
+  return wrap(D->createParameterVariable(
+      unwrap<DIScope>(Scope), Name, ArgNo, unwrap<DIFile>(File), Line,
+      unwrap<DIType>(Ty), AlwaysPreserve, Flags));
 }
 
 LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Dref,
@@ -117,8 +128,7 @@ LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Dref, LLVMMetadataRef File,
                                   LLVMMetadataRef ParameterTypes) {
   DIBuilder *D = unwrap(Dref);
   return wrap(
-      D->createSubroutineType(File ? unwrap<DIFile>(File) : nullptr,
-                              DITypeRefArray(unwrap<MDTuple>(ParameterTypes))));
+      D->createSubroutineType(DITypeRefArray(unwrap<MDTuple>(ParameterTypes))));
 }
 
 LLVMMetadataRef LLVMDIBuilderCreateStructType(
diff --git a/bindings/go/llvm/DIBuilderBindings.h b/bindings/go/llvm/DIBuilderBindings.h
index a4fba2784185..f14fd0f7b5f6 100644
--- a/bindings/go/llvm/DIBuilderBindings.h
+++ b/bindings/go/llvm/DIBuilderBindings.h
@@ -55,12 +55,18 @@ LLVMMetadataRef LLVMDIBuilderCreateFunction(
     LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name,
     const char *LinkageName, LLVMMetadataRef File, unsigned Line,
     LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition,
-    unsigned ScopeLine, unsigned Flags, int IsOptimized, LLVMValueRef Function);
+    unsigned ScopeLine, unsigned Flags, int IsOptimized);
 
-LLVMMetadataRef LLVMDIBuilderCreateLocalVariable(
-    LLVMDIBuilderRef D, unsigned Tag, LLVMMetadataRef Scope, const char *Name,
+LLVMMetadataRef
+LLVMDIBuilderCreateAutoVariable(LLVMDIBuilderRef D, LLVMMetadataRef Scope,
+                                const char *Name, LLVMMetadataRef File,
+                                unsigned Line, LLVMMetadataRef Ty,
+                                int AlwaysPreserve, unsigned Flags);
+
+LLVMMetadataRef LLVMDIBuilderCreateParameterVariable(
+    LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name, unsigned ArgNo,
     LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, int AlwaysPreserve,
-    unsigned Flags, unsigned ArgNo);
+    unsigned Flags);
 
 LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef D,
                                              const char *Name,
diff --git a/bindings/go/llvm/IRBindings.cpp b/bindings/go/llvm/IRBindings.cpp
index fd0cb8006a4f..4308f84cc1be 100644
--- a/bindings/go/llvm/IRBindings.cpp
+++ b/bindings/go/llvm/IRBindings.cpp
@@ -98,3 +98,7 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
       DebugLoc::get(Line, Col, Scope ? unwrap<MDNode>(Scope) : nullptr,
                     InlinedAt ? unwrap<MDNode>(InlinedAt) : nullptr));
 }
+
+void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
+  unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
+}
diff --git a/bindings/go/llvm/IRBindings.h b/bindings/go/llvm/IRBindings.h
index a53e178f1e0c..dcdb26eda795 100644
--- a/bindings/go/llvm/IRBindings.h
+++ b/bindings/go/llvm/IRBindings.h
@@ -55,6 +55,8 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
                                   unsigned Col, LLVMMetadataRef Scope,
                                   LLVMMetadataRef InlinedAt);
 
+void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP);
+
 #ifdef __cplusplus
 }
 
diff --git a/bindings/go/llvm/analysis.go b/bindings/go/llvm/analysis.go
index 7b0d8e3e8b8a..3ae4b71def7c 100644
--- a/bindings/go/llvm/analysis.go
+++ b/bindings/go/llvm/analysis.go
@@ -15,6 +15,7 @@ package llvm
 
 /*
 #include "llvm-c/Analysis.h" // If you are getting an error here read bindings/go/README.txt
+#include "llvm-c/Core.h"
 #include <stdlib.h>
 */
 import "C"
diff --git a/bindings/go/llvm/bitreader.go b/bindings/go/llvm/bitreader.go
index 98112a99dd3b..c3bf07a19044 100644
--- a/bindings/go/llvm/bitreader.go
+++ b/bindings/go/llvm/bitreader.go
@@ -15,6 +15,7 @@ package llvm
 
 /*
 #include "llvm-c/BitReader.h"
+#include "llvm-c/Core.h"
 #include <stdlib.h>
 */
 import "C"
@@ -40,7 +41,7 @@ func ParseBitcodeFile(name string) (Module, error) {
 	defer C.LLVMDisposeMemoryBuffer(buf)
 
 	var m Module
-	if C.LLVMParseBitcode(buf, &m.C, &errmsg) == 0 {
+	if C.LLVMParseBitcode2(buf, &m.C) == 0 {
 		return m, nil
 	}
 
diff --git a/bindings/go/llvm/dibuilder.go b/bindings/go/llvm/dibuilder.go
index f03f740b7770..778c31785836 100644
--- a/bindings/go/llvm/dibuilder.go
+++ b/bindings/go/llvm/dibuilder.go
@@ -189,7 +189,6 @@ type DIFunction struct {
 	ScopeLine    int
 	Flags        int
 	Optimized    bool
-	Function     Value
 }
 
 // CreateCompileUnit creates function debug metadata.
@@ -211,14 +210,39 @@ func (d *DIBuilder) CreateFunction(diScope Metadata, f DIFunction) Metadata {
 		C.unsigned(f.ScopeLine),
 		C.unsigned(f.Flags),
 		boolToCInt(f.Optimized),
-		f.Function.C,
 	)
 	return Metadata{C: result}
 }
 
-// DILocalVariable holds the values for creating local variable debug metadata.
-type DILocalVariable struct {
-	Tag            dwarf.Tag
+// DIAutoVariable holds the values for creating auto variable debug metadata.
+type DIAutoVariable struct {
+	Name           string
+	File           Metadata
+	Line           int
+	Type           Metadata
+	AlwaysPreserve bool
+	Flags          int
+}
+
+// CreateAutoVariable creates local variable debug metadata.
+func (d *DIBuilder) CreateAutoVariable(scope Metadata, v DIAutoVariable) Metadata {
+	name := C.CString(v.Name)
+	defer C.free(unsafe.Pointer(name))
+	result := C.LLVMDIBuilderCreateAutoVariable(
+		d.ref,
+		scope.C,
+		name,
+		v.File.C,
+		C.unsigned(v.Line),
+		v.Type.C,
+		boolToCInt(v.AlwaysPreserve),
+		C.unsigned(v.Flags),
+	)
+	return Metadata{C: result}
+}
+
+// DIParameterVariable holds the values for creating parameter variable debug metadata.
+type DIParameterVariable struct {
 	Name           string
 	File           Metadata
 	Line           int
@@ -227,25 +251,24 @@ type DILocalVariable struct {
 	Flags          int
 
 	// ArgNo is the 1-based index of the argument in the function's
-	// parameter list if it is an argument, or 0 otherwise.
+	// parameter list.
 	ArgNo int
 }
 
-// CreateLocalVariable creates local variable debug metadata.
-func (d *DIBuilder) CreateLocalVariable(scope Metadata, v DILocalVariable) Metadata {
+// CreateParameterVariable creates parameter variable debug metadata.
+func (d *DIBuilder) CreateParameterVariable(scope Metadata, v DIParameterVariable) Metadata {
 	name := C.CString(v.Name)
 	defer C.free(unsafe.Pointer(name))
-	result := C.LLVMDIBuilderCreateLocalVariable(
+	result := C.LLVMDIBuilderCreateParameterVariable(
 		d.ref,
-		C.unsigned(v.Tag),
 		scope.C,
 		name,
+		C.unsigned(v.ArgNo),
 		v.File.C,
 		C.unsigned(v.Line),
 		v.Type.C,
 		boolToCInt(v.AlwaysPreserve),
 		C.unsigned(v.Flags),
-		C.unsigned(v.ArgNo),
 	)
 	return Metadata{C: result}
 }
diff --git a/bindings/go/llvm/executionengine.go b/bindings/go/llvm/executionengine.go
index 94d4e83b4cf3..91f8366ca75d 100644
--- a/bindings/go/llvm/executionengine.go
+++ b/bindings/go/llvm/executionengine.go
@@ -14,6 +14,7 @@
 package llvm
 
 /*
+#include "llvm-c/Core.h"
 #include "llvm-c/ExecutionEngine.h"
 #include <stdlib.h>
 */
diff --git a/bindings/go/llvm/ir.go b/bindings/go/llvm/ir.go
index 76f5f06017c8..b8ea28d3a0db 100644
--- a/bindings/go/llvm/ir.go
+++ b/bindings/go/llvm/ir.go
@@ -1054,6 +1054,12 @@ func (v Value) AddTargetDependentFunctionAttr(attr, value string) {
 	defer C.free(unsafe.Pointer(cvalue))
 	C.LLVMAddTargetDependentFunctionAttr(v.C, cattr, cvalue)
 }
+func (v Value) SetPersonality(p Value) {
+	C.LLVMSetPersonalityFn(v.C, p.C)
+}
+func (v Value) SetSubprogram(sp Metadata) {
+	C.LLVMSetSubprogram(v.C, sp.C)
+}
 
 // Operations on parameters
 func (v Value) ParamsCount() int { return int(C.LLVMCountParams(v.C)) }
@@ -1206,7 +1212,7 @@ func (b Builder) Dispose() { C.LLVMDisposeBuilder(b.C) }
 func (b Builder) SetCurrentDebugLocation(line, col uint, scope, inlinedAt Metadata) {
 	C.LLVMSetCurrentDebugLocation2(b.C, C.unsigned(line), C.unsigned(col), scope.C, inlinedAt.C)
 }
-func (b Builder) SetInstDebugLocation(v Value)    { C.LLVMSetInstDebugLocation(b.C, v.C) }
+func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) }
 func (b Builder) InsertDeclare(module Module, storage Value, md Value) Value {
 	f := module.NamedFunction("llvm.dbg.declare")
 	if f.IsNil() {
@@ -1725,7 +1731,7 @@ func (b Builder) CreatePtrDiff(lhs, rhs Value, name string) (v Value) {
 	return
 }
 
-func (b Builder) CreateLandingPad(t Type, personality Value, nclauses int, name string) (l Value) {
+func (b Builder) CreateLandingPad(t Type, nclauses int, name string) (l Value) {
 	cname := C.CString(name)
 	defer C.free(unsafe.Pointer(cname))
 	l.C = C.LLVMBuildLandingPad(b.C, t.C, nil, C.unsigned(nclauses), cname)
diff --git a/bindings/go/llvm/linker.go b/bindings/go/llvm/linker.go
index f64f66c858e8..ca16f7637b2b 100644
--- a/bindings/go/llvm/linker.go
+++ b/bindings/go/llvm/linker.go
@@ -14,6 +14,7 @@
 package llvm
 
 /*
+#include "llvm-c/Core.h"
 #include "llvm-c/Linker.h"
 #include <stdlib.h>
 */
@@ -21,11 +22,9 @@ import "C"
 import "errors"
 
 func LinkModules(Dest, Src Module) error {
-	var cmsg *C.char
-	failed := C.LLVMLinkModules(Dest.C, Src.C, C.LLVMLinkerDestroySource, &cmsg)
+	failed := C.LLVMLinkModules2(Dest.C, Src.C)
 	if failed != 0 {
-		err := errors.New(C.GoString(cmsg))
-		C.LLVMDisposeMessage(cmsg)
+		err := errors.New("Linking failed")
 		return err
 	}
 	return nil
diff --git a/bindings/go/llvm/target.go b/bindings/go/llvm/target.go
index bd1d0f3a440d..6b1895b9ac60 100644
--- a/bindings/go/llvm/target.go
+++ b/bindings/go/llvm/target.go
@@ -14,6 +14,7 @@
 package llvm
 
 /*
+#include "llvm-c/Core.h"
 #include "llvm-c/Target.h"
 #include "llvm-c/TargetMachine.h"
 #include <stdlib.h>
diff --git a/bindings/ocaml/Makefile.ocaml b/bindings/ocaml/Makefile.ocaml
index 1f65a7b8f905..22b96a298eff 100644
--- a/bindings/ocaml/Makefile.ocaml
+++ b/bindings/ocaml/Makefile.ocaml
@@ -277,6 +277,8 @@ uninstall-local:: uninstall-deplibs
 
 build-deplibs: $(OutputLibs)
 
+$(OcamlDir)/%.so: $(LibDir)/%.so
+	$(Verb) ln -sf $< $@
 $(OcamlDir)/%.a: $(LibDir)/%.a
 	$(Verb) ln -sf $< $@
 
diff --git a/bindings/ocaml/bitreader/bitreader_ocaml.c b/bindings/ocaml/bitreader/bitreader_ocaml.c
index 15ebd5f635fd..f91b092d9176 100644
--- a/bindings/ocaml/bitreader/bitreader_ocaml.c
+++ b/bindings/ocaml/bitreader/bitreader_ocaml.c
@@ -23,10 +23,9 @@ void llvm_raise(value Prototype, char *Message);
 /* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
 CAMLprim LLVMModuleRef llvm_get_module(LLVMContextRef C, LLVMMemoryBufferRef MemBuf) {
   LLVMModuleRef M;
-  char *Message;
 
-  if (LLVMGetBitcodeModuleInContext(C, MemBuf, &M, &Message))
-    llvm_raise(*caml_named_value("Llvm_bitreader.Error"), Message);
+  if (LLVMGetBitcodeModuleInContext2(C, MemBuf, &M))
+    llvm_raise(*caml_named_value("Llvm_bitreader.Error"), "");
 
   return M;
 }
@@ -34,10 +33,9 @@ CAMLprim LLVMModuleRef llvm_get_module(LLVMContextRef C, LLVMMemoryBufferRef Mem
 /* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
 CAMLprim LLVMModuleRef llvm_parse_bitcode(LLVMContextRef C, LLVMMemoryBufferRef MemBuf) {
   LLVMModuleRef M;
-  char *Message;
 
-  if (LLVMParseBitcodeInContext(C, MemBuf, &M, &Message))
-    llvm_raise(*caml_named_value("Llvm_bitreader.Error"), Message);
+  if (LLVMParseBitcodeInContext2(C, MemBuf, &M))
+    llvm_raise(*caml_named_value("Llvm_bitreader.Error"), "");
 
   return M;
 }
diff --git a/bindings/ocaml/linker/linker_ocaml.c b/bindings/ocaml/linker/linker_ocaml.c
index 3b8512aa5953..498a5f0c8453 100644
--- a/bindings/ocaml/linker/linker_ocaml.c
+++ b/bindings/ocaml/linker/linker_ocaml.c
@@ -25,10 +25,8 @@ void llvm_raise(value Prototype, char *Message);
 
 /* llmodule -> llmodule -> unit */
 CAMLprim value llvm_link_modules(LLVMModuleRef Dst, LLVMModuleRef Src) {
-  char* Message;
-
-  if (LLVMLinkModules(Dst, Src, 0, &Message))
-    llvm_raise(*caml_named_value("Llvm_linker.Error"), Message);
+  if (LLVMLinkModules2(Dst, Src))
+    llvm_raise(*caml_named_value("Llvm_linker.Error"), "Linking failed");
 
   return Val_unit;
 }
diff --git a/bindings/ocaml/linker/llvm_linker.ml b/bindings/ocaml/linker/llvm_linker.ml
index 3044abd8b6cf..f2b64eeee918 100644
--- a/bindings/ocaml/linker/llvm_linker.ml
+++ b/bindings/ocaml/linker/llvm_linker.ml
@@ -11,5 +11,5 @@ exception Error of string
 
 let () = Callback.register_exception "Llvm_linker.Error" (Error "")
 
-external link_modules : Llvm.llmodule -> Llvm.llmodule -> unit
-                      = "llvm_link_modules"
+external link_modules' : Llvm.llmodule -> Llvm.llmodule -> unit
+                       = "llvm_link_modules"
diff --git a/bindings/ocaml/linker/llvm_linker.mli b/bindings/ocaml/linker/llvm_linker.mli
index 06c3b92a577e..5f558ffb1162 100644
--- a/bindings/ocaml/linker/llvm_linker.mli
+++ b/bindings/ocaml/linker/llvm_linker.mli
@@ -14,6 +14,6 @@
 
 exception Error of string
 
-(** [link_modules dst src mode] links [src] into [dst], raising [Error]
-    if the linking fails. *)
-val link_modules : Llvm.llmodule -> Llvm.llmodule -> unit
\ No newline at end of file
+(** [link_modules' dst src] links [src] into [dst], raising [Error]
+    if the linking fails. The src module is destroyed. *)
+val link_modules' : Llvm.llmodule -> Llvm.llmodule -> unit
\ No newline at end of file
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 9a3cb1f0de09..259d57bc0680 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -579,6 +579,8 @@ external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent"
 external is_declaration : llvalue -> bool = "llvm_is_declaration"
 external linkage : llvalue -> Linkage.t = "llvm_linkage"
 external set_linkage : Linkage.t -> llvalue -> unit = "llvm_set_linkage"
+external unnamed_addr : llvalue -> bool = "llvm_unnamed_addr"
+external set_unnamed_addr : bool -> llvalue -> unit = "llvm_set_unnamed_addr"
 external section : llvalue -> string = "llvm_section"
 external set_section : string -> llvalue -> unit = "llvm_set_section"
 external visibility : llvalue -> Visibility.t = "llvm_visibility"
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index dcda02764f54..541c35a2a229 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -1255,6 +1255,16 @@ val linkage : llvalue -> Linkage.t
     See the method [llvm::GlobalValue::setLinkage]. *)
 val set_linkage : Linkage.t -> llvalue -> unit
 
+(** [unnamed_addr g] returns [true] if the global value [g] has the unnamed_addr
+    attribute. Returns [false] otherwise.
+    See the method [llvm::GlobalValue::getUnnamedAddr]. *)
+val unnamed_addr : llvalue -> bool
+
+(** [set_unnamed_addr b g] if [b] is [true], sets the unnamed_addr attribute of
+    the global value [g]. Unset it otherwise.
+    See the method [llvm::GlobalValue::setUnnamedAddr]. *)
+val set_unnamed_addr : bool -> llvalue -> unit
+
 (** [section g] returns the linker section of the global value [g].
     See the method [llvm::GlobalValue::getSection]. *)
 val section : llvalue -> string
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index 3889f9276ccd..b4c47e7475e6 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -940,6 +940,17 @@ CAMLprim value llvm_set_linkage(value Linkage, LLVMValueRef Global) {
   return Val_unit;
 }
 
+/* llvalue -> bool */
+CAMLprim value llvm_unnamed_addr(LLVMValueRef Global) {
+  return Val_bool(LLVMHasUnnamedAddr(Global));
+}
+
+/* bool -> llvalue -> unit */
+CAMLprim value llvm_set_unnamed_addr(value UseUnnamedAddr, LLVMValueRef Global) {
+  LLVMSetUnnamedAddr(Global, Bool_val(UseUnnamedAddr));
+  return Val_unit;
+}
+
 /* llvalue -> string */
 CAMLprim value llvm_section(LLVMValueRef Global) {
   return caml_copy_string(LLVMGetSection(Global));
diff --git a/bindings/python/llvm/bit_reader.py b/bindings/python/llvm/bit_reader.py
index 5bf5e22025a3..33b8211076b8 100644
--- a/bindings/python/llvm/bit_reader.py
+++ b/bindings/python/llvm/bit_reader.py
@@ -16,16 +16,15 @@ lib = get_library()
 def parse_bitcode(mem_buffer):
     """Input is .core.MemoryBuffer"""
     module = c_object_p()
-    out = c_char_p(None)
-    result = lib.LLVMParseBitcode(mem_buffer, byref(module), byref(out))
+    result = lib.LLVMParseBitcode2(mem_buffer, byref(module))
     if result:
-        raise RuntimeError('LLVM Error: %s' % out.value)
+        raise RuntimeError('LLVM Error')
     m = Module(module)
     m.take_ownership(mem_buffer)
     return m
 
 def register_library(library):
-    library.LLVMParseBitcode.argtypes = [MemoryBuffer, POINTER(c_object_p), POINTER(c_char_p)]
-    library.LLVMParseBitcode.restype = bool
+    library.LLVMParseBitcode2.argtypes = [MemoryBuffer, POINTER(c_object_p)]
+    library.LLVMParseBitcode2.restype = bool
 
 register_library(lib)
diff --git a/bindings/python/llvm/core.py b/bindings/python/llvm/core.py
index c95952db6fc5..47e81dd1a4f9 100644
--- a/bindings/python/llvm/core.py
+++ b/bindings/python/llvm/core.py
@@ -465,9 +465,6 @@ def register_library(library):
     library.LLVMInitializeAnalysis.argtypes = [PassRegistry]
     library.LLVMInitializeAnalysis.restype = None
 
-    library.LLVMInitializeIPA.argtypes = [PassRegistry]
-    library.LLVMInitializeIPA.restype = None
-
     library.LLVMInitializeCodeGen.argtypes = [PassRegistry]
     library.LLVMInitializeCodeGen.restype = None
 
@@ -621,7 +618,6 @@ def initialize_llvm():
     lib.LLVMInitializeIPO(p)
     lib.LLVMInitializeInstrumentation(p)
     lib.LLVMInitializeAnalysis(p)
-    lib.LLVMInitializeIPA(p)
     lib.LLVMInitializeCodeGen(p)
     lib.LLVMInitializeTarget(p)
 
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 3203d1ea708b..f699211bd5b4 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -67,7 +67,6 @@ check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
 check_include_file(sys/stat.h HAVE_SYS_STAT_H)
 check_include_file(sys/time.h HAVE_SYS_TIME_H)
 check_include_file(sys/uio.h HAVE_SYS_UIO_H)
-check_include_file(sys/wait.h HAVE_SYS_WAIT_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(utime.h HAVE_UTIME_H)
@@ -106,6 +105,12 @@ if( NOT PURE_WINDOWS )
   endif()
   check_library_exists(dl dlopen "" HAVE_LIBDL)
   check_library_exists(rt clock_gettime "" HAVE_LIBRT)
+endif()
+
+# Don't look for these libraries on Windows. Also don't look for them if we're
+# using MSan, since uninstrmented third party code may call MSan interceptors
+# like strlen, leading to false positives.
+if( NOT PURE_WINDOWS AND NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
   if (LLVM_ENABLE_ZLIB)
     check_library_exists(z compress2 "" HAVE_LIBZ)
   else()
@@ -294,6 +299,10 @@ if( LLVM_ENABLE_PIC )
   set(ENABLE_PIC 1)
 else()
   set(ENABLE_PIC 0)
+  check_cxx_compiler_flag("-fno-pie" SUPPORTS_NO_PIE_FLAG)
+  if(SUPPORTS_NO_PIE_FLAG)
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fno-pie")
+  endif()
 endif()
 
 check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
@@ -491,7 +500,7 @@ if (LLVM_ENABLE_DOXYGEN)
 
     option(LLVM_DOXYGEN_EXTERNAL_SEARCH "Enable doxygen external search." OFF)
     if (LLVM_DOXYGEN_EXTERNAL_SEARCH)
-      set(LLVM_DOXYGEN_SEARCHENGINE_URL "" CACHE STRING "URL to use for external searhc.")
+      set(LLVM_DOXYGEN_SEARCHENGINE_URL "" CACHE STRING "URL to use for external search.")
       set(LLVM_DOXYGEN_SEARCH_MAPPINGS "" CACHE STRING "Doxygen Search Mappings")
     endif()
   endif()
diff --git a/cmake/dummy.cpp b/cmake/dummy.cpp
new file mode 100644
index 000000000000..c4e7e9536871
--- /dev/null
+++ b/cmake/dummy.cpp
@@ -0,0 +1 @@
+typedef int dummy;
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 45f6746948d2..bed81b28426e 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -1,5 +1,6 @@
 include(LLVMProcessSources)
 include(LLVM-Config)
+include(DetermineGCCCompatible)
 
 function(llvm_update_compile_flags name)
   get_property(sources TARGET ${name} PROPERTY SOURCES)
@@ -21,15 +22,13 @@ function(llvm_update_compile_flags name)
       list(APPEND LLVM_COMPILE_DEFINITIONS _HAS_EXCEPTIONS=0)
       list(APPEND LLVM_COMPILE_FLAGS "/EHs-c-")
     endif()
-    if (CLANG_CL)
-      # FIXME: Remove this once clang-cl supports SEH
-      list(APPEND LLVM_COMPILE_DEFINITIONS "GTEST_HAS_SEH=0")
-    endif()
   endif()
 
   # LLVM_REQUIRES_RTTI is an internal flag that individual
   # targets can use to force RTTI
+  set(LLVM_CONFIG_HAS_RTTI YES CACHE INTERNAL "")
   if(NOT (LLVM_REQUIRES_RTTI OR LLVM_ENABLE_RTTI))
+    set(LLVM_CONFIG_HAS_RTTI NO CACHE INTERNAL "")
     list(APPEND LLVM_COMPILE_DEFINITIONS GTEST_HAS_RTTI=0)
     if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
       list(APPEND LLVM_COMPILE_FLAGS "-fno-rtti")
@@ -41,7 +40,7 @@ function(llvm_update_compile_flags name)
   # Assume that;
   #   - LLVM_COMPILE_FLAGS is list.
   #   - PROPERTY COMPILE_FLAGS is string.
-  string(REPLACE ";" " " target_compile_flags "${LLVM_COMPILE_FLAGS}")
+  string(REPLACE ";" " " target_compile_flags " ${LLVM_COMPILE_FLAGS}")
 
   if(update_src_props)
     foreach(fn ${sources})
@@ -193,34 +192,43 @@ endfunction(add_link_opts)
 # Set each output directory according to ${CMAKE_CONFIGURATION_TYPES}.
 # Note: Don't set variables CMAKE_*_OUTPUT_DIRECTORY any more,
 # or a certain builder, for eaxample, msbuild.exe, would be confused.
-function(set_output_directory target bindir libdir)
-  # Do nothing if *_OUTPUT_INTDIR is empty.
-  if("${bindir}" STREQUAL "")
-    return()
-  endif()
+function(set_output_directory target)
+  cmake_parse_arguments(ARG "" "BINARY_DIR;LIBRARY_DIR" "" ${ARGN})
 
-  # moddir -- corresponding to LIBRARY_OUTPUT_DIRECTORY.
+  # module_dir -- corresponding to LIBRARY_OUTPUT_DIRECTORY.
   # It affects output of add_library(MODULE).
   if(WIN32 OR CYGWIN)
     # DLL platform
-    set(moddir ${bindir})
+    set(module_dir ${ARG_BINARY_DIR})
   else()
-    set(moddir ${libdir})
+    set(module_dir ${ARG_LIBRARY_DIR})
   endif()
   if(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".")
     foreach(build_mode ${CMAKE_CONFIGURATION_TYPES})
       string(TOUPPER "${build_mode}" CONFIG_SUFFIX)
-      string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} bi ${bindir})
-      string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} li ${libdir})
-      string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} mi ${moddir})
-      set_target_properties(${target} PROPERTIES "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${bi})
-      set_target_properties(${target} PROPERTIES "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${li})
-      set_target_properties(${target} PROPERTIES "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${mi})
+      if(ARG_BINARY_DIR)
+        string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} bi ${ARG_BINARY_DIR})
+        set_target_properties(${target} PROPERTIES "RUNTIME_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${bi})
+      endif()
+      if(ARG_LIBRARY_DIR)
+        string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} li ${ARG_LIBRARY_DIR})
+        set_target_properties(${target} PROPERTIES "ARCHIVE_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${li})
+      endif()
+      if(module_dir)
+        string(REPLACE ${CMAKE_CFG_INTDIR} ${build_mode} mi ${module_dir})
+        set_target_properties(${target} PROPERTIES "LIBRARY_OUTPUT_DIRECTORY_${CONFIG_SUFFIX}" ${mi})
+      endif()
     endforeach()
   else()
-    set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${bindir})
-    set_target_properties(${target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${libdir})
-    set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${moddir})
+    if(ARG_BINARY_DIR)
+      set_target_properties(${target} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${ARG_BINARY_DIR})
+    endif()
+    if(ARG_LIBRARY_DIR)
+      set_target_properties(${target} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${ARG_LIBRARY_DIR})
+    endif()
+    if(module_dir)
+      set_target_properties(${target} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${module_dir})
+    endif()
   endif()
 endfunction()
 
@@ -303,6 +311,9 @@ endfunction(set_windows_version_resource_properties)
 #   MODULE
 #     Target ${name} might not be created on unsupported platforms.
 #     Check with "if(TARGET ${name})".
+#   DISABLE_LLVM_LINK_LLVM_DYLIB
+#     Do not link this library to libLLVM, even if
+#     LLVM_LINK_LLVM_DYLIB is enabled.
 #   OUTPUT_NAME name
 #     Corresponds to OUTPUT_NAME in target properties.
 #   DEPENDS targets...
@@ -313,10 +324,12 @@ endfunction(set_windows_version_resource_properties)
 #     Same semantics as target_link_libraries().
 #   ADDITIONAL_HEADERS
 #     May specify header files for IDE generators.
+#   SONAME
+#     Should set SONAME link flags and create symlinks
 #   )
 function(llvm_add_library name)
   cmake_parse_arguments(ARG
-    "MODULE;SHARED;STATIC"
+    "MODULE;SHARED;STATIC;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME"
     "OUTPUT_NAME"
     "ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS"
     ${ARGN})
@@ -395,8 +408,11 @@ function(llvm_add_library name)
     set(windows_resource_file ${windows_resource_file} PARENT_SCOPE)
   endif()
 
-  set_output_directory(${name} ${LLVM_RUNTIME_OUTPUT_INTDIR} ${LLVM_LIBRARY_OUTPUT_INTDIR})
-  llvm_update_compile_flags(${name})
+  set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
+  # $<TARGET_OBJECTS> doesn't require compile flags.
+  if(NOT obj_name)
+    llvm_update_compile_flags(${name})
+  endif()
   add_link_opts( ${name} )
   if(ARG_OUTPUT_NAME)
     set_target_properties(${name}
@@ -418,11 +434,6 @@ function(llvm_add_library name)
         PREFIX ""
         )
     endif()
-
-    set_target_properties(${name}
-      PROPERTIES
-      SOVERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}
-      VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
   endif()
 
   if(ARG_MODULE OR ARG_SHARED)
@@ -437,6 +448,24 @@ function(llvm_add_library name)
     endif()
   endif()
 
+  if(ARG_SHARED AND UNIX)
+    if(NOT APPLE AND ARG_SONAME)
+      get_target_property(output_name ${name} OUTPUT_NAME)
+      if(${output_name} STREQUAL "output_name-NOTFOUND")
+        set(output_name ${name})
+      endif()
+      set(library_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX})
+      set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
+      set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name})
+      llvm_install_library_symlink(${api_name} ${library_name} SHARED
+        COMPONENT ${name}
+        ALWAYS_GENERATE)
+      llvm_install_library_symlink(${output_name} ${library_name} SHARED
+        COMPONENT ${name}
+        ALWAYS_GENERATE)
+    endif()
+  endif()
+
   # Add the explicit dependency information for this library.
   #
   # It would be nice to verify that we have the dependencies for this library
@@ -444,10 +473,14 @@ function(llvm_add_library name)
   # property has been set to an empty value.
   get_property(lib_deps GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_${name})
 
-  llvm_map_components_to_libnames(llvm_libs
-    ${ARG_LINK_COMPONENTS}
-    ${LLVM_LINK_COMPONENTS}
-    )
+  if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_STATIC AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB)
+    set(llvm_libs LLVM)
+  else()
+    llvm_map_components_to_libnames(llvm_libs
+      ${ARG_LINK_COMPONENTS}
+      ${LLVM_LINK_COMPONENTS}
+      )
+  endif()
 
   if(CMAKE_VERSION VERSION_LESS 2.8.12)
     # Link libs w/o keywords, assuming PUBLIC.
@@ -479,6 +512,10 @@ function(llvm_add_library name)
       add_dependencies(${objlib} ${LLVM_COMMON_DEPENDS})
     endforeach()
   endif()
+
+  if(ARG_SHARED OR ARG_MODULE)
+    llvm_externalize_debuginfo(${name})
+  endif()
 endfunction()
 
 macro(add_llvm_library name)
@@ -504,9 +541,11 @@ macro(add_llvm_library name)
     set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON)
   elseif(NOT _is_gtest)
     if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "LTO")
+      set(install_dir lib${LLVM_LIBDIR_SUFFIX})
       if(ARG_SHARED OR BUILD_SHARED_LIBS)
-        if(WIN32 OR CYGWIN)
+        if(WIN32 OR CYGWIN OR MINGW)
           set(install_type RUNTIME)
+          set(install_dir bin)
         else()
           set(install_type LIBRARY)
         endif()
@@ -516,7 +555,7 @@ macro(add_llvm_library name)
 
       install(TARGETS ${name}
             EXPORT LLVMExports
-            ${install_type} DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+            ${install_type} DESTINATION ${install_dir}
             COMPONENT ${name})
 
       if (NOT CMAKE_CONFIGURATION_TYPES)
@@ -562,9 +601,30 @@ endmacro(add_llvm_loadable_module name)
 
 
 macro(add_llvm_executable name)
-  llvm_process_sources( ALL_FILES ${ARGN} )
+  cmake_parse_arguments(ARG "DISABLE_LLVM_LINK_LLVM_DYLIB;IGNORE_EXTERNALIZE_DEBUGINFO" "" "" ${ARGN})
+  llvm_process_sources( ALL_FILES ${ARG_UNPARSED_ARGUMENTS} )
+
+  # Generate objlib
+  if(LLVM_ENABLE_OBJLIB)
+    # Generate an obj library for both targets.
+    set(obj_name "obj.${name}")
+    add_library(${obj_name} OBJECT EXCLUDE_FROM_ALL
+      ${ALL_FILES}
+      )
+    llvm_update_compile_flags(${obj_name})
+    set(ALL_FILES "$<TARGET_OBJECTS:${obj_name}>")
+
+    set_target_properties(${obj_name} PROPERTIES FOLDER "Object Libraries")
+  endif()
+
   add_windows_version_resource_file(ALL_FILES ${ALL_FILES})
 
+  if(XCODE)
+    # Note: the dummy.cpp source file provides no definitions. However,
+    # it forces Xcode to properly link the static library.
+    list(APPEND ALL_FILES "${LLVM_MAIN_SRC_DIR}/cmake/dummy.cpp")
+  endif()
+
   if( EXCLUDE_FROM_ALL )
     add_executable(${name} EXCLUDE_FROM_ALL ${ALL_FILES})
   else()
@@ -575,7 +635,10 @@ macro(add_llvm_executable name)
     set_windows_version_resource_properties(${name} ${windows_resource_file})
   endif()
 
-  llvm_update_compile_flags(${name})
+  # $<TARGET_OBJECTS> doesn't require compile flags.
+  if(NOT LLVM_ENABLE_OBJLIB)
+    llvm_update_compile_flags(${name})
+  endif()
   add_link_opts( ${name} )
 
   # Do not add -Dname_EXPORTS to the command-line when building files in this
@@ -588,25 +651,40 @@ macro(add_llvm_executable name)
     add_llvm_symbol_exports( ${name} ${LLVM_EXPORTED_SYMBOL_FILE} )
   endif(LLVM_EXPORTED_SYMBOL_FILE)
 
+  if (LLVM_LINK_LLVM_DYLIB AND NOT ARG_DISABLE_LLVM_LINK_LLVM_DYLIB)
+    set(USE_SHARED USE_SHARED)
+  endif()
+
   set(EXCLUDE_FROM_ALL OFF)
-  set_output_directory(${name} ${LLVM_RUNTIME_OUTPUT_INTDIR} ${LLVM_LIBRARY_OUTPUT_INTDIR})
-  llvm_config( ${name} ${LLVM_LINK_COMPONENTS} )
+  set_output_directory(${name} BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR} LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR})
+  llvm_config( ${name} ${USE_SHARED} ${LLVM_LINK_COMPONENTS} )
   if( LLVM_COMMON_DEPENDS )
     add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
   endif( LLVM_COMMON_DEPENDS )
+
+  if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO)
+    llvm_externalize_debuginfo(${name})
+  endif()
 endmacro(add_llvm_executable name)
 
 function(export_executable_symbols target)
   if (NOT MSVC) # MSVC's linker doesn't support exporting all symbols.
     set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1)
+    if (APPLE)
+      set_property(TARGET ${target} APPEND_STRING PROPERTY
+        LINK_FLAGS " -rdynamic")
+    endif()
   endif()
 endfunction()
 
-
-set (LLVM_TOOLCHAIN_TOOLS
-  llvm-ar
-  llvm-objdump
-  )
+if(NOT LLVM_TOOLCHAIN_TOOLS)
+  set (LLVM_TOOLCHAIN_TOOLS
+    llvm-ar
+    llvm-ranlib
+    llvm-lib
+    llvm-objdump
+    )
+endif()
 
 macro(add_llvm_tool name)
   if( NOT LLVM_BUILD_TOOLS )
@@ -651,7 +729,7 @@ endmacro(add_llvm_example name)
 
 
 macro(add_llvm_utility name)
-  add_llvm_executable(${name} ${ARGN})
+  add_llvm_executable(${name} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN})
   set_target_properties(${name} PROPERTIES FOLDER "Utils")
   if( LLVM_INSTALL_UTILS )
     install (TARGETS ${name}
@@ -676,56 +754,104 @@ macro(add_llvm_target target_name)
   set( CURRENT_LLVM_TARGET LLVM${target_name} )
 endmacro(add_llvm_target)
 
+function(canonicalize_tool_name name output)
+  string(REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/" "" nameStrip ${name})
+  string(REPLACE "-" "_" nameUNDERSCORE ${nameStrip})
+  string(TOUPPER ${nameUNDERSCORE} nameUPPER)
+  set(${output} "${nameUPPER}" PARENT_SCOPE)
+endfunction(canonicalize_tool_name)
+
+# Custom add_subdirectory wrapper
+# Takes in a project name (i.e. LLVM), the the subdirectory name, and an
+# and an optional path if it differs from the name.
+macro(add_llvm_subdirectory project type name)
+  set(add_llvm_external_dir "${ARGN}")
+  if("${add_llvm_external_dir}" STREQUAL "")
+    set(add_llvm_external_dir ${name})
+  endif()
+  canonicalize_tool_name(${name} nameUPPER)
+  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}/CMakeLists.txt)
+    # Treat it as in-tree subproject.
+    option(${project}_${type}_${nameUPPER}_BUILD
+           "Whether to build ${name} as part of ${project}" On)
+    mark_as_advanced(${project}_${type}_${name}_BUILD)
+    if(${project}_${type}_${nameUPPER}_BUILD)
+      add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir} ${add_llvm_external_dir})
+      # Don't process it in add_llvm_implicit_projects().
+      set(${project}_${type}_${nameUPPER}_BUILD OFF)
+    endif()
+  else()
+    set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR
+      "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}"
+      CACHE PATH "Path to ${name} source directory")
+    set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT ON)
+    if(NOT LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR OR NOT EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR})
+      set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT OFF)
+    endif()
+    if("${LLVM_EXTERNAL_${nameUPPER}_BUILD}" STREQUAL "OFF")
+      set(${project}_${type}_${nameUPPER}_BUILD_DEFAULT OFF)
+    endif()
+    option(${project}_${type}_${nameUPPER}_BUILD
+      "Whether to build ${name} as part of LLVM"
+      ${${project}_${type}_${nameUPPER}_BUILD_DEFAULT})
+    if (${project}_${type}_${nameUPPER}_BUILD)
+      if(EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR})
+        add_subdirectory(${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR} ${add_llvm_external_dir})
+      elseif(NOT "${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" STREQUAL "")
+        message(WARNING "Nonexistent directory for ${name}: ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}")
+      endif()
+      # FIXME: It'd be redundant.
+      set(${project}_${type}_${nameUPPER}_BUILD Off)
+    endif()
+  endif()
+endmacro()
+
 # Add external project that may want to be built as part of llvm such as Clang,
 # lld, and Polly. This adds two options. One for the source directory of the
 # project, which defaults to ${CMAKE_CURRENT_SOURCE_DIR}/${name}. Another to
 # enable or disable building it with everything else.
 # Additional parameter can be specified as the name of directory.
 macro(add_llvm_external_project name)
-  set(add_llvm_external_dir "${ARGN}")
-  if("${add_llvm_external_dir}" STREQUAL "")
-    set(add_llvm_external_dir ${name})
-  endif()
-  list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}")
-  string(REPLACE "-" "_" nameUNDERSCORE ${name})
-  string(TOUPPER ${nameUNDERSCORE} nameUPPER)
-  #TODO: Remove this check in a few days once it has circulated through
-  # buildbots and people's checkouts (cbieneman - July 14, 2015)
-  if("${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}")
-    unset(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR CACHE)
-  endif()
-  if(NOT LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR)
-    set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/${add_llvm_external_dir}")
-  else()
-    set(LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR
-        CACHE PATH "Path to ${name} source directory")
-  endif()
-  if (EXISTS ${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR}/CMakeLists.txt)
-    option(LLVM_EXTERNAL_${nameUPPER}_BUILD
-           "Whether to build ${name} as part of LLVM" ON)
-    if (LLVM_EXTERNAL_${nameUPPER}_BUILD)
-      add_subdirectory(${LLVM_EXTERNAL_${nameUPPER}_SOURCE_DIR} ${add_llvm_external_dir})
-    endif()
-  endif()
-endmacro(add_llvm_external_project)
+  add_llvm_subdirectory(LLVM TOOL ${name} ${ARGN})
+endmacro()
 
 macro(add_llvm_tool_subdirectory name)
-  list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${name}")
-  add_subdirectory(${name})
+  add_llvm_external_project(${name})
 endmacro(add_llvm_tool_subdirectory)
 
-macro(ignore_llvm_tool_subdirectory name)
-  list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${CMAKE_CURRENT_SOURCE_DIR}/${name}")
-endmacro(ignore_llvm_tool_subdirectory)
+function(get_project_name_from_src_var var output)
+  string(REGEX MATCH "LLVM_EXTERNAL_(.*)_SOURCE_DIR"
+         MACHED_TOOL "${var}")
+  if(MACHED_TOOL)
+    set(${output} ${CMAKE_MATCH_1} PARENT_SCOPE)
+  else()
+    set(${output} PARENT_SCOPE)
+  endif()
+endfunction()
 
-function(add_llvm_implicit_external_projects)
+function(create_subdirectory_options project type)
+  file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*")
+  foreach(dir ${sub-dirs})
+    if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt")
+      canonicalize_tool_name(${dir} name)
+      option(${project}_${type}_${name}_BUILD
+           "Whether to build ${name} as part of ${project}" On)
+      mark_as_advanced(${project}_${type}_${name}_BUILD)
+    endif()
+  endforeach()
+endfunction(create_subdirectory_options)
+
+function(create_llvm_tool_options)
+  create_subdirectory_options(LLVM TOOL)
+endfunction(create_llvm_tool_options)
+
+function(add_llvm_implicit_projects)
   set(list_of_implicit_subdirs "")
   file(GLOB sub-dirs "${CMAKE_CURRENT_SOURCE_DIR}/*")
   foreach(dir ${sub-dirs})
-    if(IS_DIRECTORY "${dir}")
-      list(FIND LLVM_IMPLICIT_PROJECT_IGNORE "${dir}" tool_subdir_ignore)
-      if( tool_subdir_ignore EQUAL -1
-          AND EXISTS "${dir}/CMakeLists.txt")
+    if(IS_DIRECTORY "${dir}" AND EXISTS "${dir}/CMakeLists.txt")
+      canonicalize_tool_name(${dir} name)
+      if (LLVM_TOOL_${name}_BUILD)
         get_filename_component(fn "${dir}" NAME)
         list(APPEND list_of_implicit_subdirs "${fn}")
       endif()
@@ -735,7 +861,7 @@ function(add_llvm_implicit_external_projects)
   foreach(external_proj ${list_of_implicit_subdirs})
     add_llvm_external_project("${external_proj}")
   endforeach()
-endfunction(add_llvm_implicit_external_projects)
+endfunction(add_llvm_implicit_projects)
 
 # Generic support for adding a unittest.
 function(add_unittest test_suite test_name)
@@ -754,9 +880,9 @@ function(add_unittest test_suite test_name)
 
   set(LLVM_REQUIRES_RTTI OFF)
 
-  add_llvm_executable(${test_name} ${ARGN})
+  add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO ${ARGN})
   set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR})
-  set_output_directory(${test_name} ${outdir} ${outdir})
+  set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir})
   target_link_libraries(${test_name}
     gtest
     gtest_main
@@ -785,8 +911,13 @@ function(llvm_add_go_executable binary pkgpath)
       set(cppflags "${cppflags} -I${d}")
     endforeach(d)
     set(ldflags "${CMAKE_EXE_LINKER_FLAGS}")
+    if (LLVM_LINK_LLVM_DYLIB)
+      set(linkmode "dylib")
+    else()
+      set(linkmode "component-libs")
+    endif()
     add_custom_command(OUTPUT ${binpath}
-      COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-go "cc=${cc}" "cxx=${cxx}" "cppflags=${cppflags}" "ldflags=${ldflags}"
+      COMMAND ${CMAKE_BINARY_DIR}/bin/llvm-go "go=${GO_EXECUTABLE}" "cc=${cc}" "cxx=${cxx}" "cppflags=${cppflags}" "ldflags=${ldflags}" "linkmode=${linkmode}"
               ${ARG_GOFLAGS} build -o ${binpath} ${pkgpath}
       DEPENDS llvm-config ${CMAKE_BINARY_DIR}/bin/llvm-go${CMAKE_EXECUTABLE_SUFFIX}
               ${llvmlibs} ${ARG_DEPENDS}
@@ -939,3 +1070,132 @@ function(add_lit_testsuites project directory)
     endforeach()
   endif()
 endfunction()
+
+function(llvm_install_library_symlink name dest type)
+  cmake_parse_arguments(ARG "ALWAYS_GENERATE" "COMPONENT" "" ${ARGN})
+  foreach(path ${CMAKE_MODULE_PATH})
+    if(EXISTS ${path}/LLVMInstallSymlink.cmake)
+      set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake)
+      break()
+    endif()
+  endforeach()
+
+  set(component ${ARG_COMPONENT})
+  if(NOT component)
+    set(component ${name})
+  endif()
+
+  set(full_name ${CMAKE_${type}_LIBRARY_PREFIX}${name}${CMAKE_${type}_LIBRARY_SUFFIX})
+  set(full_dest ${CMAKE_${type}_LIBRARY_PREFIX}${dest}${CMAKE_${type}_LIBRARY_SUFFIX})
+
+  set(output_dir lib${LLVM_LIBDIR_SUFFIX})
+  if(WIN32 AND "${type}" STREQUAL "SHARED")
+    set(output_dir bin)
+  endif()
+
+  install(SCRIPT ${INSTALL_SYMLINK}
+          CODE "install_symlink(${full_name} ${full_dest} ${output_dir})"
+          COMPONENT ${component})
+
+  if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE)
+    add_custom_target(install-${name}
+                      DEPENDS ${name} ${dest} install-${dest}
+                      COMMAND "${CMAKE_COMMAND}"
+                              -DCMAKE_INSTALL_COMPONENT=${name}
+                              -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+  endif()
+endfunction()
+
+function(llvm_install_symlink name dest)
+  cmake_parse_arguments(ARG "ALWAYS_GENERATE" "" "" ${ARGN})
+  foreach(path ${CMAKE_MODULE_PATH})
+    if(EXISTS ${path}/LLVMInstallSymlink.cmake)
+      set(INSTALL_SYMLINK ${path}/LLVMInstallSymlink.cmake)
+      break()
+    endif()
+  endforeach()
+
+  if(ARG_ALWAYS_GENERATE)
+    set(component ${dest})
+  else()
+    set(component ${name})
+  endif()
+
+  set(full_name ${name}${CMAKE_EXECUTABLE_SUFFIX})
+  set(full_dest ${dest}${CMAKE_EXECUTABLE_SUFFIX})
+
+  install(SCRIPT ${INSTALL_SYMLINK}
+          CODE "install_symlink(${full_name} ${full_dest} bin)"
+          COMPONENT ${component})
+
+  if (NOT CMAKE_CONFIGURATION_TYPES AND NOT ARG_ALWAYS_GENERATE)
+    add_custom_target(install-${name}
+                      DEPENDS ${name} ${dest} install-${dest}
+                      COMMAND "${CMAKE_COMMAND}"
+                              -DCMAKE_INSTALL_COMPONENT=${name}
+                              -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+  endif()
+endfunction()
+
+function(add_llvm_tool_symlink name dest)
+  cmake_parse_arguments(ARG "ALWAYS_GENERATE" "" "" ${ARGN})
+  if(UNIX)
+    set(LLVM_LINK_OR_COPY create_symlink)
+    set(dest_binary "${dest}${CMAKE_EXECUTABLE_SUFFIX}")
+  else()
+    set(LLVM_LINK_OR_COPY copy)
+    set(dest_binary "${LLVM_RUNTIME_OUTPUT_INTDIR}/${dest}${CMAKE_EXECUTABLE_SUFFIX}")
+  endif()
+
+  set(output_path "${LLVM_RUNTIME_OUTPUT_INTDIR}/${name}${CMAKE_EXECUTABLE_SUFFIX}")
+
+  if(ARG_ALWAYS_GENERATE)
+    set_property(DIRECTORY APPEND PROPERTY
+      ADDITIONAL_MAKE_CLEAN_FILES ${dest_binary})
+    add_custom_command(TARGET ${dest} POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}")
+  else()
+    add_custom_command(OUTPUT ${output_path}
+                     COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${dest_binary}" "${output_path}"
+                     DEPENDS ${dest})
+    add_custom_target(${name} ALL DEPENDS ${output_path})
+    set_target_properties(${name} PROPERTIES FOLDER Tools)
+
+    # Make sure the parent tool is a toolchain tool, otherwise exclude this tool
+    list(FIND LLVM_TOOLCHAIN_TOOLS ${dest} LLVM_IS_${dest}_TOOLCHAIN_TOOL)
+    if (NOT LLVM_IS_${dest}_TOOLCHAIN_TOOL GREATER -1)
+      set(LLVM_IS_${name}_TOOLCHAIN_TOOL ${LLVM_IS_${dest}_TOOLCHAIN_TOOL})
+    else()
+      list(FIND LLVM_TOOLCHAIN_TOOLS ${name} LLVM_IS_${name}_TOOLCHAIN_TOOL)
+    endif()
+
+    # LLVM_IS_${name}_TOOLCHAIN_TOOL will only be greater than -1 if both this
+    # tool and its parent tool are in LLVM_TOOLCHAIN_TOOLS
+    if (LLVM_IS_${name}_TOOLCHAIN_TOOL GREATER -1 OR NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+      if( LLVM_BUILD_TOOLS )
+        llvm_install_symlink(${name} ${dest})
+      endif()
+    endif()
+  endif()
+endfunction()
+
+function(llvm_externalize_debuginfo name)
+  if(NOT LLVM_EXTERNALIZE_DEBUGINFO)
+    return()
+  endif()
+
+  if(APPLE)
+    if(CMAKE_CXX_FLAGS MATCHES "-flto"
+      OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto")
+
+      set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o)
+      set_property(TARGET ${name} APPEND_STRING PROPERTY
+        LINK_FLAGS " -Wl,-object_path_lto,${lto_object}")
+    endif()
+    add_custom_command(TARGET ${name} POST_BUILD
+      COMMAND xcrun dsymutil $<TARGET_FILE:${name}>
+      COMMAND xcrun strip -Sl $<TARGET_FILE:${name}>)
+  else()
+    message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!")
+  endif()
+endfunction()
diff --git a/cmake/modules/AddLLVMDefinitions.cmake b/cmake/modules/AddLLVMDefinitions.cmake
index 33ac9731db5d..dab16236d3e9 100644
--- a/cmake/modules/AddLLVMDefinitions.cmake
+++ b/cmake/modules/AddLLVMDefinitions.cmake
@@ -7,7 +7,11 @@
 macro(add_llvm_definitions)
   # We don't want no semicolons on LLVM_DEFINITIONS:
   foreach(arg ${ARGN})
-    set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}")
+    if(DEFINED LLVM_DEFINITIONS)
+      set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}")
+    else()
+      set(LLVM_DEFINITIONS ${arg})
+    endif()
   endforeach(arg)
   add_definitions( ${ARGN} )
 endmacro(add_llvm_definitions)
diff --git a/cmake/modules/CrossCompile.cmake b/cmake/modules/CrossCompile.cmake
index 76a3078a5440..c136dfaa612e 100644
--- a/cmake/modules/CrossCompile.cmake
+++ b/cmake/modules/CrossCompile.cmake
@@ -19,6 +19,7 @@ function(llvm_create_cross_target_internal target_name toochain buildtype)
   add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt
     COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
         ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR}
+        -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
     WORKING_DIRECTORY ${LLVM_${target_name}_BUILD}
     DEPENDS ${LLVM_${target_name}_BUILD}
     COMMENT "Configuring ${target_name} LLVM...")
@@ -43,6 +44,7 @@ function(llvm_create_cross_target_internal target_name toochain buildtype)
     execute_process(COMMAND ${CMAKE_COMMAND} ${build_type_flags}
         -G "${CMAKE_GENERATOR}" -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
         ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR}
+        -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
       WORKING_DIRECTORY ${LLVM_${target_name}_BUILD} )
   endif(NOT IS_DIRECTORY ${LLVM_${target_name}_BUILD})
 
diff --git a/cmake/modules/DetermineGCCCompatible.cmake b/cmake/modules/DetermineGCCCompatible.cmake
new file mode 100644
index 000000000000..1bf15fcba72f
--- /dev/null
+++ b/cmake/modules/DetermineGCCCompatible.cmake
@@ -0,0 +1,11 @@
+# Determine if the compiler has GCC-compatible command-line syntax.
+
+if(NOT DEFINED LLVM_COMPILER_IS_GCC_COMPATIBLE)
+  if(CMAKE_COMPILER_IS_GNUCXX)
+    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
+  elseif( MSVC )
+    set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
+  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
+    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
+  endif()
+endif()
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 4db27033b203..4c5ffe2f7b28 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -132,7 +132,8 @@ endif()
 # Pass -Wl,-z,defs. This makes sure all symbols are defined. Otherwise a DSO
 # build might work on ELF but fail on MachO/COFF.
 if(NOT (${CMAKE_SYSTEM_NAME} MATCHES "Darwin" OR WIN32 OR CYGWIN OR
-        ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD") AND
+        ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR
+        ${CMAKE_SYSTEM_NAME} MATCHES "OpenBSD") AND
    NOT LLVM_USE_SANITIZER)
   set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-z,defs")
 endif()
@@ -166,6 +167,7 @@ function(add_flag_or_print_warning flag name)
     message(STATUS "Building with ${flag}")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${flag}" PARENT_SCOPE)
     set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${flag}" PARENT_SCOPE)
+    set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${flag}" PARENT_SCOPE)
   else()
     message(WARNING "${flag} is not supported.")
   endif()
@@ -180,16 +182,15 @@ if( LLVM_ENABLE_PIC )
     # On Windows all code is PIC. MinGW warns if -fPIC is used.
   else()
     add_flag_or_print_warning("-fPIC" FPIC)
-
-    if( WIN32 OR CYGWIN)
-      # MinGW warns if -fvisibility-inlines-hidden is used.
-    else()
-      check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
-      append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS)
-    endif()
   endif()
 endif()
 
+if(NOT WIN32 AND NOT CYGWIN)
+  # MinGW warns if -fvisibility-inlines-hidden is used.
+  check_cxx_compiler_flag("-fvisibility-inlines-hidden" SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG)
+  append_if(SUPPORTS_FVISIBILITY_INLINES_HIDDEN_FLAG "-fvisibility-inlines-hidden" CMAKE_CXX_FLAGS)
+endif()
+
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
   # TODO: support other platforms and toolchains.
   if( LLVM_BUILD_32_BITS )
@@ -246,6 +247,12 @@ if( MSVC_IDE )
 endif()
 
 if( MSVC )
+  if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0 )
+    # For MSVC 2013, disable iterator null pointer checking in debug mode,
+    # especially so std::equal(nullptr, nullptr, nullptr) will not assert.
+    add_llvm_definitions("-D_DEBUG_POINTER_IMPL=")
+  endif()
+  
   include(ChooseMSVCCRT)
 
   if( NOT (${CMAKE_VERSION} VERSION_LESS 2.8.11) )
@@ -274,6 +281,7 @@ if( MSVC )
 
   set(msvc_warning_flags
     # Disabled warnings.
+    -wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline)
     -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
     -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
     -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
@@ -307,6 +315,11 @@ if( MSVC )
     -wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable'
     -wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
     -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
+    -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed'
+    -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared'
+        # C4592 is disabled because of false positives in Visual Studio 2015
+        # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2.
+    -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation)
 
 	# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
 	# support the 'aligned' attribute in the way that clang sources requires (for
@@ -325,7 +338,10 @@ if( MSVC )
 
   # Enable warnings
   if (LLVM_ENABLE_WARNINGS)
-    append("/W4" msvc_warning_flags)
+    # Put /W4 in front of all the -we flags. cl.exe doesn't care, but for
+    # clang-cl having /W4 after the -we flags will re-enable the warnings
+    # disabled by -we.
+    set(msvc_warning_flags "/W4 ${msvc_warning_flags}")
     # CMake appends /W3 by default, and having /W3 followed by /W4 will result in 
     # cl : Command line warning D9025 : overriding '/W3' with '/W4'.  Since this is
     # a command line warning and not a compiler warning, it cannot be suppressed except
@@ -345,6 +361,8 @@ if( MSVC )
     append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
   endforeach(flag)
 
+  append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+
   # Disable sized deallocation if the flag is supported. MSVC fails to compile
   # the operator new overload in User otherwise.
   check_c_compiler_flag("/WX /Zc:sizedDealloc-" SUPPORTS_SIZED_DEALLOC)
@@ -367,7 +385,8 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
       endif()
     endif()
 
-    append_if(LLVM_ENABLE_PEDANTIC "-pedantic -Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    append_if(LLVM_ENABLE_PEDANTIC "-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    append_if(LLVM_ENABLE_PEDANTIC "-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
     append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
     append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
@@ -375,16 +394,23 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
     # Check if -Wnon-virtual-dtor warns even though the class is marked final.
     # If it does, don't add it. So it won't be added on clang 3.4 and older.
     # This also catches cases when -Wnon-virtual-dtor isn't supported by
-    # the compiler at all.
-    set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
-    CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
-                               class derived final : public base { public: ~derived();};
-                               int main() { return 0; }"
-                              CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
-    set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
-    append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
-              "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
+    # the compiler at all.  This flag is not activated for gcc since it will
+    # incorrectly identify a protected non-virtual base when there is a friend
+    # declaration.
+    if (NOT CMAKE_COMPILER_IS_GNUCXX)
+      set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
+      set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
+      CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
+                                 class derived final : public base { public: ~derived();};
+                                 int main() { return 0; }"
+                                CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
+      set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
+      append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
+                "-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
+    endif()
+
+    # Enable -Wdelete-non-virtual-dtor if available.
+    add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG)
 
     # Check if -Wcomment is OK with an // comment ending with '\' if the next
     # line is also a // comment.
@@ -420,7 +446,7 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
   endif()
   if (LLVM_ENABLE_MODULES)
     set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
-    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fmodules -fcxx-modules")
+    set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -fmodules")
     # Check that we can build code with modules enabled, and that repeatedly
     # including <cassert> still manages to respect NDEBUG properly.
     CHECK_CXX_SOURCE_COMPILES("#undef NDEBUG
@@ -440,16 +466,34 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
 endif( MSVC )
 
 macro(append_common_sanitizer_flags)
-  # Append -fno-omit-frame-pointer and turn on debug info to get better
-  # stack traces.
-  add_flag_if_supported("-fno-omit-frame-pointer" FNO_OMIT_FRAME_POINTER)
-  if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND
-      NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO")
-    add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY)
-  endif()
-  # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large.
-  if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
-    add_flag_if_supported("-O1" O1)
+  if (NOT MSVC)
+    # Append -fno-omit-frame-pointer and turn on debug info to get better
+    # stack traces.
+    add_flag_if_supported("-fno-omit-frame-pointer" FNO_OMIT_FRAME_POINTER)
+    if (NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND
+        NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO")
+      add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY)
+    endif()
+    # Use -O1 even in debug mode, otherwise sanitizers slowdown is too large.
+    if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
+      add_flag_if_supported("-O1" O1)
+    endif()
+  elseif (CLANG_CL)
+    # Keep frame pointers around.
+    append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    if (CMAKE_LINKER MATCHES "lld-link.exe")
+      # Use DWARF debug info with LLD.
+      append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      # Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries.
+      # Adding manifests with mt.exe breaks LLD's symbol tables. See PR24476.
+      append("/MANIFEST:NO"
+        CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
+    else()
+      # Enable codeview otherwise.
+      append("/Z7" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    endif()
+    # Always ask the linker to produce symbols with asan.
+    append("-debug" CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
   endif()
 endmacro()
 
@@ -478,10 +522,17 @@ if(LLVM_USE_SANITIZER)
       append("-fsanitize=address,undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
               CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     else()
-      message(WARNING "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}")
+      message(FATAL_ERROR "Unsupported value of LLVM_USE_SANITIZER: ${LLVM_USE_SANITIZER}")
+    endif()
+  elseif(MSVC)
+    if (LLVM_USE_SANITIZER STREQUAL "Address")
+      append_common_sanitizer_flags()
+      append("-fsanitize=address" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+    else()
+      message(FATAL_ERROR "This sanitizer not yet supported in the MSVC environment: ${LLVM_USE_SANITIZER}")
     endif()
   else()
-    message(WARNING "LLVM_USE_SANITIZER is not supported on this platform.")
+    message(FATAL_ERROR "LLVM_USE_SANITIZER is not supported on this platform.")
   endif()
   if (LLVM_USE_SANITIZE_COVERAGE)
     append("-fsanitize-coverage=edge,indirect-calls,8bit-counters,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
@@ -546,6 +597,14 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI)
   message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON")
 endif()
 
+option(LLVM_BUILD_INSTRUMENTED "Build LLVM and tools with PGO instrumentation (experimental)" Off)
+mark_as_advanced(LLVM_BUILD_INSTRUMENTED)
+append_if(LLVM_BUILD_INSTRUMENTED "-fprofile-instr-generate"
+  CMAKE_CXX_FLAGS
+  CMAKE_C_FLAGS
+  CMAKE_EXE_LINKER_FLAGS
+  CMAKE_SHARED_LINKER_FLAGS)
+
 # Plugin support
 # FIXME: Make this configurable.
 if(WIN32 OR CYGWIN)
diff --git a/cmake/modules/HandleLLVMStdlib.cmake b/cmake/modules/HandleLLVMStdlib.cmake
index 66ad078fb66e..b07781c3f290 100644
--- a/cmake/modules/HandleLLVMStdlib.cmake
+++ b/cmake/modules/HandleLLVMStdlib.cmake
@@ -1,17 +1,11 @@
 # This CMake module is responsible for setting the standard library to libc++
 # if the user has requested it.
 
+include(DetermineGCCCompatible)
+
 if(NOT DEFINED LLVM_STDLIB_HANDLED)
   set(LLVM_STDLIB_HANDLED ON)
 
-  if(CMAKE_COMPILER_IS_GNUCXX)
-    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
-  elseif( MSVC )
-    set(LLVM_COMPILER_IS_GCC_COMPATIBLE OFF)
-  elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
-    set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON)
-  endif()
-
   function(append value)
     foreach(variable ${ARGN})
       set(${variable} "${${variable}} ${value}" PARENT_SCOPE)
diff --git a/cmake/modules/LLVM-Config.cmake b/cmake/modules/LLVM-Config.cmake
index 22ac71451917..aa68b4007602 100755
--- a/cmake/modules/LLVM-Config.cmake
+++ b/cmake/modules/LLVM-Config.cmake
@@ -31,7 +31,23 @@ endfunction(is_llvm_target_library)
 
 
 macro(llvm_config executable)
-  explicit_llvm_config(${executable} ${ARGN})
+  cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN})
+  set(link_components ${ARG_UNPARSED_ARGUMENTS})
+
+  if(USE_SHARED)
+    # If USE_SHARED is specified, then we link against libLLVM,
+    # but also against the component libraries below. This is
+    # done in case libLLVM does not contain all of the components
+    # the target requires.
+    #
+    # TODO strip LLVM_DYLIB_COMPONENTS out of link_components.
+    # To do this, we need special handling for "all", since that
+    # may imply linking to libraries that are not included in
+    # libLLVM.
+    target_link_libraries(${executable} LLVM)
+  endif()
+
+  explicit_llvm_config(${executable} ${link_components})
 endmacro(llvm_config)
 
 
diff --git a/cmake/modules/LLVMConfig.cmake.in b/cmake/modules/LLVMConfig.cmake.in
index 5b7789dbc9a1..6855c4422543 100644
--- a/cmake/modules/LLVMConfig.cmake.in
+++ b/cmake/modules/LLVMConfig.cmake.in
@@ -39,6 +39,9 @@ set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
 
 set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
 
+set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@)
+set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@)
+
 set(LLVM_ON_UNIX @LLVM_ON_UNIX@)
 set(LLVM_ON_WIN32 @LLVM_ON_WIN32@)
 
@@ -46,7 +49,7 @@ set(LLVM_LIBDIR_SUFFIX @LLVM_LIBDIR_SUFFIX@)
 
 set(LLVM_INCLUDE_DIRS "@LLVM_CONFIG_INCLUDE_DIRS@")
 set(LLVM_LIBRARY_DIRS "@LLVM_CONFIG_LIBRARY_DIRS@")
-set(LLVM_DEFINITIONS "-D__STDC_LIMIT_MACROS" "-D__STDC_CONSTANT_MACROS")
+set(LLVM_DEFINITIONS "@LLVM_DEFINITIONS@")
 set(LLVM_CMAKE_DIR "@LLVM_CONFIG_CMAKE_DIR@")
 set(LLVM_TOOLS_BINARY_DIR "@LLVM_CONFIG_TOOLS_BINARY_DIR@")
 
diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake
new file mode 100644
index 000000000000..c2d9f530c200
--- /dev/null
+++ b/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -0,0 +1,195 @@
+include(ExternalProject)
+
+# llvm_ExternalProject_BuildCmd(out_var target)
+#   Utility function for constructing command lines for external project targets
+function(llvm_ExternalProject_BuildCmd out_var target)
+  if (CMAKE_GENERATOR MATCHES "Make")
+    # Use special command for Makefiles to support parallelism.
+    set(${out_var} "$(MAKE)" "${target}" PARENT_SCOPE)
+  else()
+    set(${out_var} ${CMAKE_COMMAND} --build . --target ${target}
+                                    --config $<CONFIGURATION> PARENT_SCOPE)
+  endif()
+endfunction()
+
+# llvm_ExternalProject_Add(name source_dir ...
+#   USE_TOOLCHAIN
+#     Use just-built tools (see TOOLCHAIN_TOOLS)
+#   EXCLUDE_FROM_ALL
+#     Exclude this project from the all target
+#   NO_INSTALL
+#     Don't generate install targets for this project
+#   CMAKE_ARGS arguments...
+#     Optional cmake arguments to pass when configuring the project
+#   TOOLCHAIN_TOOLS targets...
+#     Targets for toolchain tools (defaults to clang;lld)
+#   DEPENDS targets...
+#     Targets that this project depends on
+#   EXTRA_TARGETS targets...
+#     Extra targets in the subproject to generate targets for
+#   )
+function(llvm_ExternalProject_Add name source_dir)
+  cmake_parse_arguments(ARG "USE_TOOLCHAIN;EXCLUDE_FROM_ALL;NO_INSTALL"
+    "SOURCE_DIR"
+    "CMAKE_ARGS;TOOLCHAIN_TOOLS;RUNTIME_LIBRARIES;DEPENDS;EXTRA_TARGETS" ${ARGN})
+  canonicalize_tool_name(${name} nameCanon)
+  if(NOT ARG_TOOLCHAIN_TOOLS)
+    set(ARG_TOOLCHAIN_TOOLS clang lld)
+  endif()
+  foreach(tool ${ARG_TOOLCHAIN_TOOLS})
+    if(TARGET ${tool})
+      list(APPEND TOOLCHAIN_TOOLS ${tool})
+      list(APPEND TOOLCHAIN_BINS $<TARGET_FILE:${tool}>)
+    endif()
+  endforeach()
+
+  if(NOT ARG_RUNTIME_LIBRARIES)
+    set(ARG_RUNTIME_LIBRARIES compiler-rt libcxx)
+  endif()
+  foreach(lib ${ARG_RUNTIME_LIBRARIES})
+    if(TARGET ${lib})
+      list(APPEND RUNTIME_LIBRARIES ${lib})
+    endif()
+  endforeach()
+
+  list(FIND TOOLCHAIN_TOOLS clang FOUND_CLANG)
+  if(FOUND_CLANG GREATER -1)
+    set(CLANG_IN_TOOLCHAIN On)
+  endif()
+
+  if(RUNTIME_LIBRARIES AND CLANG_IN_TOOLCHAIN)
+    list(APPEND TOOLCHAIN_BINS ${RUNTIME_LIBRARIES})
+  endif()
+
+  if(CMAKE_VERSION VERSION_GREATER 3.1.0)
+    set(cmake_3_1_EXCLUDE_FROM_ALL EXCLUDE_FROM_ALL 1)
+  endif()
+
+  if(CMAKE_VERSION VERSION_GREATER 3.3.20150708)
+    set(cmake_3_4_USES_TERMINAL_OPTIONS
+      USES_TERMINAL_CONFIGURE 1
+      USES_TERMINAL_BUILD 1
+      USES_TERMINAL_INSTALL 1
+      )
+    set(cmake_3_4_USES_TERMINAL USES_TERMINAL 1)
+  endif()
+
+  if(CMAKE_VERSION VERSION_GREATER 3.1.20141116)
+    set(cmake_3_2_USES_TERMINAL USES_TERMINAL)
+  endif()
+
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${name}-bins/)
+
+  add_custom_target(${name}-clear
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR}
+    COMMENT "Clobbering ${name} build and stamp directories"
+    ${cmake_3_2_USES_TERMINAL}
+    )
+
+  # Find all variables that start with COMPILER_RT and populate a variable with
+  # them.
+  get_cmake_property(variableNames VARIABLES)
+  foreach(variableName ${variableNames})
+    if(variableName MATCHES "^${nameCanon}")
+      string(REPLACE ";" "\;" value "${${variableName}}")
+      list(APPEND PASSTHROUGH_VARIABLES
+        -D${variableName}=${value})
+    endif()
+  endforeach()
+
+  if(ARG_USE_TOOLCHAIN)
+    if(CLANG_IN_TOOLCHAIN)
+      set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
+                        -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++)
+    endif()
+    list(APPEND ARG_DEPENDS ${TOOLCHAIN_TOOLS})
+  endif()
+
+  add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp
+    DEPENDS ${ARG_DEPENDS}
+    COMMAND ${CMAKE_COMMAND} -E touch ${BINARY_DIR}/CMakeCache.txt
+    COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_DIR}/${name}-mkdir
+    COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp
+    COMMENT "Clobbering bootstrap build and stamp directories"
+    )
+
+  add_custom_target(${name}-clobber
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp)
+
+  if(ARG_EXCLUDE_FROM_ALL)
+    set(exclude ${cmake_3_1_EXCLUDE_FROM_ALL})
+  endif()
+
+  ExternalProject_Add(${name}
+    DEPENDS ${ARG_DEPENDS}
+    ${name}-clobber
+    PREFIX ${CMAKE_BINARY_DIR}/projects/${name}
+    SOURCE_DIR ${source_dir}
+    STAMP_DIR ${STAMP_DIR}
+    BINARY_DIR ${BINARY_DIR}
+    ${exclude}
+    CMAKE_ARGS ${${nameCanon}_CMAKE_ARGS}
+               ${compiler_args}
+               -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+               ${ARG_CMAKE_ARGS}
+               ${PASSTHROUGH_VARIABLES}
+    INSTALL_COMMAND ""
+    STEP_TARGETS configure build
+    ${cmake_3_4_USES_TERMINAL_OPTIONS}
+    )
+
+  if(ARG_USE_TOOLCHAIN)
+    ExternalProject_Add_Step(${name} force-rebuild
+      COMMENT "Forcing rebuild becaues tools have changed"
+      DEPENDERS configure
+      DEPENDS ${TOOLCHAIN_BINS}
+      ${cmake_3_4_USES_TERMINAL} )
+  endif()
+
+  if(ARG_USE_TOOLCHAIN)
+    set(force_deps DEPENDS ${TOOLCHAIN_BINS})
+  endif()
+
+  llvm_ExternalProject_BuildCmd(run_clean clean)
+  ExternalProject_Add_Step(${name} clean
+    COMMAND ${run_clean}
+    COMMENT "Cleaning ${name}..."
+    DEPENDEES configure
+    ${force_deps}
+    WORKING_DIRECTORY ${BINARY_DIR}
+    ${cmake_3_4_USES_TERMINAL}
+    )
+  ExternalProject_Add_StepTargets(${name} clean)
+
+  if(ARG_USE_TOOLCHAIN)
+    add_dependencies(${name}-clean ${name}-clobber)
+    set_target_properties(${name}-clean PROPERTIES
+      SOURCES ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp)
+  endif()
+
+  if(NOT ARG_NO_INSTALL)
+    install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)"
+      COMPONENT ${name})
+
+    add_custom_target(install-${name}
+                      DEPENDS ${name}
+                      COMMAND "${CMAKE_COMMAND}"
+                               -DCMAKE_INSTALL_COMPONENT=${name}
+                               -P "${CMAKE_BINARY_DIR}/cmake_install.cmake"
+                      ${cmake_3_2_USES_TERMINAL})
+  endif()
+
+  # Add top-level targets
+  foreach(target ${ARG_EXTRA_TARGETS})
+    llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target})
+    add_custom_target(${target}
+      COMMAND ${build_runtime_cmd}
+      DEPENDS ${name}-configure
+      WORKING_DIRECTORY ${BINARY_DIR}
+      VERBATIM
+      ${cmake_3_2_USES_TERMINAL})
+  endforeach()
+endfunction()
diff --git a/cmake/modules/LLVMInstallSymlink.cmake b/cmake/modules/LLVMInstallSymlink.cmake
new file mode 100644
index 000000000000..482697b06baf
--- /dev/null
+++ b/cmake/modules/LLVMInstallSymlink.cmake
@@ -0,0 +1,21 @@
+# We need to execute this script at installation time because the
+# DESTDIR environment variable may be unset at configuration time.
+# See PR8397.
+
+function(install_symlink name target outdir)
+  if(UNIX)
+    set(LINK_OR_COPY create_symlink)
+    set(DESTDIR $ENV{DESTDIR})
+  else()
+    set(LINK_OR_COPY copy)
+  endif()
+
+  set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}/")
+
+  message("Creating ${name}")
+
+  execute_process(
+    COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "${target}" "${name}"
+    WORKING_DIRECTORY "${bindir}")
+
+endfunction()
diff --git a/cmake/modules/Makefile b/cmake/modules/Makefile
index f644c45dcd0d..abfda93b210f 100644
--- a/cmake/modules/Makefile
+++ b/cmake/modules/Makefile
@@ -9,8 +9,6 @@
 
 LEVEL = ../..
 
-LINK_COMPONENTS := all
-
 include $(LEVEL)/Makefile.common
 
 PROJ_cmake := $(DESTDIR)$(PROJ_prefix)/share/llvm/cmake
@@ -39,24 +37,9 @@ else
 	LLVM_ENABLE_RTTI := 0
 endif
 
-# Don't try to run llvm-config during clean because it won't be available
-ifneq ($(MAKECMDGOALS),clean)
-LLVM_LIBS_TO_EXPORT := $(subst -l,,$(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error))
-
-ifeq ($(LLVM_LIBS_TO_EXPORT),Error)
-$(error llvm-config --libs failed)
-endif
-
-# Strip out gtest and gtest_main from LLVM_LIBS_TO_EXPORT, these are not
-# installed and won't be available from the install tree.
-# FIXME: If we used llvm-config from the install tree this wouldn't be
-# necessary.
-LLVM_LIBS_TO_EXPORT := $(filter-out gtest gtest_main,$(LLVM_LIBS_TO_EXPORT))
-
 ifndef LLVM_LIBS_TO_EXPORT
 $(error LLVM_LIBS_TO_EXPORT cannot be empty)
 endif
-endif
 
 OBJMODS := LLVMConfig.cmake LLVMConfigVersion.cmake LLVMExports.cmake
 
@@ -135,7 +118,7 @@ $(PROJ_OBJ_DIR)/LLVMExports.cmake: $(LLVMBuildCMakeExportsFrag) Makefile
 	  done && \
 	  cat "$(LLVMBuildCMakeExportsFrag)" && \
 	  echo 'set_property(TARGET LLVMSupport APPEND PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES '"$(subst -l,,$(LIBS))"')' \
-	) | grep -v gtest > $@
+	) > $@
 
 all-local:: $(addprefix $(PROJ_OBJ_DIR)/, $(OBJMODS))
 
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index 85d720e91fd7..fca7d1bda4b3 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -70,9 +70,31 @@ function(add_public_tablegen_target target)
   set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} ${target} PARENT_SCOPE)
 endfunction()
 
+if(LLVM_USE_HOST_TOOLS)
+  add_custom_command(OUTPUT LIB_LLVMSUPPORT
+      COMMAND ${CMAKE_COMMAND} --build . --target LLVMSupport --config Release
+      DEPENDS CONFIGURE_LLVM_NATIVE
+      WORKING_DIRECTORY ${LLVM_NATIVE_BUILD}
+      COMMENT "Building libLLVMSupport for native TableGen...")
+  add_custom_target(NATIVE_LIB_LLVMSUPPORT DEPENDS LIB_LLVMSUPPORT)
+
+  add_custom_command(OUTPUT LIB_LLVMTABLEGEN
+      COMMAND ${CMAKE_COMMAND} --build . --target LLVMTableGen --config Release
+      DEPENDS CONFIGURE_LLVM_NATIVE
+      WORKING_DIRECTORY ${LLVM_NATIVE_BUILD}
+      COMMENT "Building libLLVMTableGen for native TableGen...")
+  add_custom_target(NATIVE_LIB_LLVMTABLEGEN DEPENDS LIB_LLVMTABLEGEN)
+endif(LLVM_USE_HOST_TOOLS)
+
 macro(add_tablegen target project)
   set(${target}_OLD_LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS})
   set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
+
+  if(NOT XCODE)
+    # FIXME: It leaks to user, callee of add_tablegen.
+    set(LLVM_ENABLE_OBJLIB ON)
+  endif()
+
   add_llvm_utility(${target} ${ARGN})
   set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS})
 
@@ -103,7 +125,7 @@ macro(add_tablegen target project)
 
       add_custom_command(OUTPUT ${${project}_TABLEGEN_EXE}
         COMMAND ${CMAKE_COMMAND} --build . --target ${target} --config Release
-        DEPENDS CONFIGURE_LLVM_NATIVE ${target}
+        DEPENDS ${target} NATIVE_LIB_LLVMSUPPORT NATIVE_LIB_LLVMTABLEGEN
         WORKING_DIRECTORY ${LLVM_NATIVE_BUILD}
         COMMENT "Building native TableGen...")
       add_custom_target(${project}-tablegen-host DEPENDS ${${project}_TABLEGEN_EXE})
diff --git a/configure b/configure
index c192415c24a8..33438c60365e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.60 for LLVM 3.7.1.
+# Generated by GNU Autoconf 2.60 for LLVM 3.8.0svn.
 #
 # Report bugs to <http://llvm.org/bugs/>.
 #
@@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh}
 # Identity of this package.
 PACKAGE_NAME='LLVM'
 PACKAGE_TARNAME='llvm'
-PACKAGE_VERSION='3.7.1'
-PACKAGE_STRING='LLVM 3.7.1'
+PACKAGE_VERSION='3.8.0svn'
+PACKAGE_STRING='LLVM 3.8.0svn'
 PACKAGE_BUGREPORT='http://llvm.org/bugs/'
 
 ac_unique_file="lib/IR/Module.cpp"
@@ -655,6 +655,7 @@ CXX
 CXXFLAGS
 ac_ct_CXX
 CPP
+CXX_COMPILER
 subdirs
 ENABLE_POLLY
 LLVM_HAS_POLLY
@@ -777,7 +778,7 @@ GAS
 HAVE_LINK_VERSION_SCRIPT
 EGREP
 NO_VARIADIC_MACROS
-NO_MISSING_FIELD_INITIALIZERS
+MISSING_FIELD_INITIALIZERS
 COVERED_SWITCH_DEFAULT
 NO_MAYBE_UNINITIALIZED
 NO_UNINITIALIZED
@@ -1333,7 +1334,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures LLVM 3.7.1 to adapt to many kinds of systems.
+\`configure' configures LLVM 3.8.0svn to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1399,7 +1400,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of LLVM 3.7.1:";;
+     short | recursive ) echo "Configuration of LLVM 3.8.0svn:";;
    esac
   cat <<\_ACEOF
 
@@ -1583,7 +1584,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-LLVM configure 3.7.1
+LLVM configure 3.8.0svn
 generated by GNU Autoconf 2.60
 
 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
@@ -1599,7 +1600,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by LLVM $as_me 3.7.1, which was
+It was created by LLVM $as_me 3.8.0svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   $ $0 $@
@@ -1954,9 +1955,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 LLVM_VERSION_MAJOR=3
-LLVM_VERSION_MINOR=7
-LLVM_VERSION_PATCH=1
-LLVM_VERSION_SUFFIX=
+LLVM_VERSION_MINOR=8
+LLVM_VERSION_PATCH=0
+LLVM_VERSION_SUFFIX=svn
 
 
 cat >>confdefs.h <<_ACEOF
@@ -2032,7 +2033,7 @@ echo "$as_me: error: Already configured in ${srcdir}" >&2;}
   fi
 fi
 
-if test ${srcdir} == "." ; then
+if test ${srcdir} = "." ; then
   { { echo "$as_me:$LINENO: error: In-source builds are not allowed. Please configure from a separate build directory!" >&5
 echo "$as_me: error: In-source builds are not allowed. Please configure from a separate build directory!" >&2;}
    { (exit 1); exit 1; }; }
@@ -3752,6 +3753,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 { echo "$as_me:$LINENO: result: ${llvm_cv_cxx_compiler}" >&5
 echo "${ECHO_T}${llvm_cv_cxx_compiler}" >&6; }
+CXX_COMPILER=$llvm_cv_cxx_compiler
+
 
 
 
@@ -4185,6 +4188,7 @@ else
   arm64*-*)               llvm_cv_target_arch="AArch64" ;;
   arm*-*)                 llvm_cv_target_arch="ARM" ;;
   aarch64*-*)             llvm_cv_target_arch="AArch64" ;;
+  avr-*)                  llvm_cv_target_arch="AVR" ;;
   mips-* | mips64-*)      llvm_cv_target_arch="Mips" ;;
   mipsel-* | mips64el-*)  llvm_cv_target_arch="Mips" ;;
   xcore-*)                llvm_cv_target_arch="XCore" ;;
@@ -4223,6 +4227,7 @@ case $host in
   arm64*-*)               host_arch="AArch64" ;;
   arm*-*)                 host_arch="ARM" ;;
   aarch64*-*)             host_arch="AArch64" ;;
+  avr-*)                  host_arch="AVR" ;;
   mips-* | mips64-*)      host_arch="Mips" ;;
   mipsel-* | mips64el-*)  host_arch="Mips" ;;
   xcore-*)                host_arch="XCore" ;;
@@ -5144,6 +5149,8 @@ else
     x86_64)      TARGET_HAS_JIT=1
  ;;
     ARM)         TARGET_HAS_JIT=1
+ ;;
+    AVR)         TARGET_HAS_JIT=0
  ;;
     Mips)        TARGET_HAS_JIT=1
  ;;
@@ -5947,7 +5954,7 @@ _ACEOF
 if test "${with_clang_default_openmp_runtime+set}" = set; then
   withval=$with_clang_default_openmp_runtime;
 else
-  withval="libgomp"
+  withval="libomp"
 fi
 
 
@@ -8379,25 +8386,33 @@ echo "${ECHO_T}ok" >&6; }
 
 { echo "$as_me:$LINENO: checking optional compiler flags" >&5
 echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
-NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+case "$llvm_cv_cxx_compiler" in
+  clang)
+    NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
 
-NO_MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+    MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wmissing-field-initializers`
 
-COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
+    COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
 
+    ;;
+  gcc)
+                MISSING_FIELD_INITIALIZERS=`$CXX -Werror -Wmissing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
 
-NO_UNINITIALIZED=
-NO_MAYBE_UNINITIALIZED=
-if test "$GXX" = "yes"
-then
-  NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized`
+    NO_VARIADIC_MACROS=`$CXX -Werror -Wvariadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
 
-        if test -z "$NO_MAYBE_UNINITIALIZED"
-  then
-    NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized`
+    COVERED_SWITCH_DEFAULT=`$CXX -Werror -Wcovered-switch-default -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wcovered-switch-default`
 
-  fi
-fi
+    NO_MAYBE_UNINITIALIZED=`$CXX -Werror -Wmaybe-uninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-maybe-uninitialized`
+
+                if test -z "$NO_MAYBE_UNINITIALIZED"
+    then
+      NO_UNINITIALIZED=`$CXX -Werror -Wuninitialized -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-uninitialized`
+
+    fi
+    ;;
+  unknown)
+    ;;
+esac
 
 no_comment=
 llvm_cv_old_cxxflags="$CXXFLAGS"
@@ -8464,8 +8479,8 @@ NO_COMMENT=$no_comment
 
 CXXFLAGS="$llvm_cv_old_cxxflags"
 
-{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&5
-echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&6; }
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $MISSING_FIELD_INITIALIZERS $COVERED_SWITCH_DEFAULT $NO_UNINITIALIZED $NO_MAYBE_UNINITIALIZED $NO_COMMENT" >&6; }
 
 
 # Check whether --with-python was given.
@@ -8885,6 +8900,87 @@ _ACEOF
 
 fi
 
+
+{ echo "$as_me:$LINENO: checking for main in -luuid" >&5
+echo $ECHO_N "checking for main in -luuid... $ECHO_C" >&6; }
+if test "${ac_cv_lib_uuid_main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-luuid  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+    { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+    { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_uuid_main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+   ac_cv_lib_uuid_main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_uuid_main" >&5
+echo "${ECHO_T}$ac_cv_lib_uuid_main" >&6; }
+if test $ac_cv_lib_uuid_main = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBUUID 1
+_ACEOF
+
+  LIBS="-luuid $LIBS"
+
+fi
+
 fi
 
 { echo "$as_me:$LINENO: checking for library containing dlopen" >&5
@@ -11043,90 +11139,6 @@ _ACEOF
 
 fi
 
-{ echo "$as_me:$LINENO: checking for sys/wait.h that is POSIX.1 compatible" >&5
-echo $ECHO_N "checking for sys/wait.h that is POSIX.1 compatible... $ECHO_C" >&6; }
-if test "${ac_cv_header_sys_wait_h+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-#include <sys/types.h>
-#include <sys/wait.h>
-#ifndef WEXITSTATUS
-# define WEXITSTATUS(stat_val) ((unsigned int) (stat_val) >> 8)
-#endif
-#ifndef WIFEXITED
-# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
-#endif
-
-int
-main ()
-{
-  int s;
-  wait (&s);
-  s = WIFEXITED (s) ? WEXITSTATUS (s) : 1;
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_header_sys_wait_h=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_header_sys_wait_h=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_wait_h" >&5
-echo "${ECHO_T}$ac_cv_header_sys_wait_h" >&6; }
-if test $ac_cv_header_sys_wait_h = yes; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_SYS_WAIT_H 1
-_ACEOF
-
-fi
-
 { echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5
 echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6; }
 if test "${ac_cv_header_time+set}" = set; then
@@ -11643,175 +11655,6 @@ fi
 done
 
 
-for ac_header in utime.h
-do
-as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
-if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
-  { echo "$as_me:$LINENO: checking for $ac_header" >&5
-echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
-if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-fi
-ac_res=`eval echo '${'$as_ac_Header'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-else
-  # Is the header compilable?
-{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
-echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-$ac_includes_default
-#include <$ac_header>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_header_compiler=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_header_compiler=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
-echo "${ECHO_T}$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
-echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
-cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-#include <$ac_header>
-_ACEOF
-if { (ac_try="$ac_cpp conftest.$ac_ext"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } >/dev/null; then
-  if test -s conftest.err; then
-    ac_cpp_err=$ac_c_preproc_warn_flag
-    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
-  else
-    ac_cpp_err=
-  fi
-else
-  ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
-  ac_header_preproc=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-  ac_header_preproc=no
-fi
-
-rm -f conftest.err conftest.$ac_ext
-{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
-echo "${ECHO_T}$ac_header_preproc" >&6; }
-
-# So?  What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
-  yes:no: )
-    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
-echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
-echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
-    ac_header_preproc=yes
-    ;;
-  no:yes:* )
-    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
-echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
-echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
-echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
-echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
-echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
-    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
-echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
-    ( cat <<\_ASBOX
-## ------------------------------------ ##
-## Report this to http://llvm.org/bugs/ ##
-## ------------------------------------ ##
-_ASBOX
-     ) | sed "s/^/$as_me: WARNING:     /" >&2
-    ;;
-esac
-{ echo "$as_me:$LINENO: checking for $ac_header" >&5
-echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
-if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-  eval "$as_ac_Header=\$ac_header_preproc"
-fi
-ac_res=`eval echo '${'$as_ac_Header'}'`
-	       { echo "$as_me:$LINENO: result: $ac_res" >&5
-echo "${ECHO_T}$ac_res" >&6; }
-
-fi
-if test `eval echo '${'$as_ac_Header'}'` = yes; then
-  cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-
 
 
 
@@ -14694,9 +14537,7 @@ done
 
 
 
-
-
-for ac_func in setjmp longjmp sigsetjmp siglongjmp writev
+for ac_func in setjmp longjmp writev
 do
 as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
 { echo "$as_me:$LINENO: checking for $ac_func" >&5
@@ -15011,97 +14852,6 @@ _ACEOF
  fi
 
 
-
-  { echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
-echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
-if test "${ac_cv_func_rand48+set}" = set; then
-  echo $ECHO_N "(cached) $ECHO_C" >&6
-else
-
-    ac_ext=cpp
-ac_cpp='$CXXCPP $CPPFLAGS'
-ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
-
-    cat >conftest.$ac_ext <<_ACEOF
-/* confdefs.h.  */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h.  */
-#include <stdlib.h>
-int
-main ()
-{
-srand48(0);lrand48();drand48();
-  ;
-  return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (ac_try="$ac_compile"
-case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_compile") 2>conftest.er1
-  ac_status=$?
-  grep -v '^ *+' conftest.er1 >conftest.err
-  rm -f conftest.er1
-  cat conftest.err >&5
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); } &&
-	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; } &&
-	 { ac_try='test -s conftest.$ac_objext'
-  { (case "(($ac_try" in
-  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
-  *) ac_try_echo=$ac_try;;
-esac
-eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
-  (eval "$ac_try") 2>&5
-  ac_status=$?
-  echo "$as_me:$LINENO: \$? = $ac_status" >&5
-  (exit $ac_status); }; }; then
-  ac_cv_func_rand48=yes
-else
-  echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-	ac_cv_func_rand48=no
-fi
-
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-    ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-fi
-{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
-echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
-
-if test "$ac_cv_func_rand48" = "yes" ; then
-
-cat >>confdefs.h <<\_ACEOF
-#define HAVE_RAND48 1
-_ACEOF
-
-fi
-
-
 { echo "$as_me:$LINENO: checking whether arc4random is declared" >&5
 echo $ECHO_N "checking whether arc4random is declared... $ECHO_C" >&6; }
 if test "${ac_cv_have_decl_arc4random+set}" = set; then
@@ -18529,7 +18279,7 @@ exec 6>&1
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by LLVM $as_me 3.7.1, which was
+This file was extended by LLVM $as_me 3.8.0svn, which was
 generated by GNU Autoconf 2.60.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -18582,7 +18332,7 @@ Report bugs to <bug-autoconf@gnu.org>."
 _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF
 ac_cs_version="\\
-LLVM config.status 3.7.1
+LLVM config.status 3.8.0svn
 configured by $0, generated by GNU Autoconf 2.60,
   with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
 
@@ -18836,6 +18586,7 @@ CXX!$CXX$ac_delim
 CXXFLAGS!$CXXFLAGS$ac_delim
 ac_ct_CXX!$ac_ct_CXX$ac_delim
 CPP!$CPP$ac_delim
+CXX_COMPILER!$CXX_COMPILER$ac_delim
 subdirs!$subdirs$ac_delim
 ENABLE_POLLY!$ENABLE_POLLY$ac_delim
 LLVM_HAS_POLLY!$LLVM_HAS_POLLY$ac_delim
@@ -18879,7 +18630,6 @@ ENABLE_WERROR!$ENABLE_WERROR$ac_delim
 ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
 EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
 ENABLE_ABI_BREAKING_CHECKS!$ENABLE_ABI_BREAKING_CHECKS$ac_delim
-DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -18921,6 +18671,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
 DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
 KEEP_SYMBOLS!$KEEP_SYMBOLS$ac_delim
 JIT!$JIT$ac_delim
@@ -18999,7 +18750,7 @@ GAS!$GAS$ac_delim
 HAVE_LINK_VERSION_SCRIPT!$HAVE_LINK_VERSION_SCRIPT$ac_delim
 EGREP!$EGREP$ac_delim
 NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
-NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
+MISSING_FIELD_INITIALIZERS!$MISSING_FIELD_INITIALIZERS$ac_delim
 COVERED_SWITCH_DEFAULT!$COVERED_SWITCH_DEFAULT$ac_delim
 NO_MAYBE_UNINITIALIZED!$NO_MAYBE_UNINITIALIZED$ac_delim
 NO_UNINITIALIZED!$NO_UNINITIALIZED$ac_delim
@@ -19017,7 +18768,6 @@ HAVE_LIBZ!$HAVE_LIBZ$ac_delim
 HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
 MMAP_FILE!$MMAP_FILE$ac_delim
 SHLIBEXT!$SHLIBEXT$ac_delim
-LLVM_PREFIX!$LLVM_PREFIX$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -19059,6 +18809,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+LLVM_PREFIX!$LLVM_PREFIX$ac_delim
 LLVM_BINDIR!$LLVM_BINDIR$ac_delim
 LLVM_DATADIR!$LLVM_DATADIR$ac_delim
 LLVM_DOCSDIR!$LLVM_DOCSDIR$ac_delim
@@ -19079,7 +18830,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 18; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 19; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
@@ -19713,3 +19464,14 @@ echo "$as_me: error: $ac_sub_configure failed for $ac_dir" >&2;}
   done
 fi
 
+echo ""
+echo ""
+echo "################################################################################"
+echo "################################################################################"
+echo "The LLVM project has deprecated building with configure & make."
+echo "The autoconf-based makefile build system will be removed in the 3.9 release."
+echo ""
+echo "Please migrate to the CMake-based build system."
+echo "For more information see: http://llvm.org/docs/CMake.html"
+echo "################################################################################"
+echo "################################################################################"
diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst
index f62cc3fe4d31..fe7fcbd4bc50 100644
--- a/docs/AliasAnalysis.rst
+++ b/docs/AliasAnalysis.rst
@@ -389,11 +389,10 @@ in its ``getAnalysisUsage`` that it does so. Some passes attempt to use
 ``AU.addPreserved<AliasAnalysis>``, however this doesn't actually have any
 effect.
 
-``AliasAnalysisCounter`` (``-count-aa``) and ``AliasDebugger`` (``-debug-aa``)
-are implemented as ``ModulePass`` classes, so if your alias analysis uses
-``FunctionPass``, it won't be able to use these utilities. If you try to use
-them, the pass manager will silently route alias analysis queries directly to
-``BasicAliasAnalysis`` instead.
+``AliasAnalysisCounter`` (``-count-aa``) are implemented as ``ModulePass``
+classes, so if your alias analysis uses ``FunctionPass``, it won't be able to
+use these utilities. If you try to use them, the pass manager will silently
+route alias analysis queries directly to ``BasicAliasAnalysis`` instead.
 
 Similarly, the ``opt -p`` option introduces ``ModulePass`` passes between each
 pass, which prevents the use of ``FunctionPass`` alias analysis passes.
diff --git a/docs/Atomics.rst b/docs/Atomics.rst
index 9068df46b023..79ab74792dd4 100644
--- a/docs/Atomics.rst
+++ b/docs/Atomics.rst
@@ -446,7 +446,7 @@ It is often easiest for backends to use AtomicExpandPass to lower some of the
 atomic constructs. Here are some lowerings it can do:
 
 * cmpxchg -> loop with load-linked/store-conditional
-  by overriding ``hasLoadLinkedStoreConditional()``, ``emitLoadLinked()``,
+  by overriding ``shouldExpandAtomicCmpXchgInIR()``, ``emitLoadLinked()``,
   ``emitStoreConditional()``
 * large loads/stores -> ll-sc/cmpxchg
   by overriding ``shouldExpandAtomicStoreInIR()``/``shouldExpandAtomicLoadInIR()``
diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index 25ea421ed083..d6e3099bdb63 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -756,6 +756,7 @@ function. The operand fields are:
   * ``anyregcc``: code 13
   * ``preserve_mostcc``: code 14
   * ``preserve_allcc``: code 15
+  * ``cxx_fast_tlscc``: code 17
   * ``x86_stdcallcc``: code 64
   * ``x86_fastcallcc``: code 65
   * ``arm_apcscc``: code 66
@@ -851,7 +852,7 @@ in the *paramattr* field of module block `FUNCTION`_ records, or within the
 *attr* field of function block ``INST_INVOKE`` and ``INST_CALL`` records.
 
 Entries within ``PARAMATTR_BLOCK`` are constructed to ensure that each is unique
-(i.e., no two indicies represent equivalent attribute lists).
+(i.e., no two indices represent equivalent attribute lists).
 
 .. _PARAMATTR_CODE_ENTRY:
 
@@ -904,7 +905,7 @@ table entry, which may be referenced by 0-based index from instructions,
 constants, metadata, type symbol table entries, or other type operator records.
 
 Entries within ``TYPE_BLOCK`` are constructed to ensure that each entry is
-unique (i.e., no two indicies represent structurally equivalent types).
+unique (i.e., no two indices represent structurally equivalent types).
 
 .. _TYPE_CODE_NUMENTRY:
 .. _NUMENTRY:
diff --git a/docs/BitSets.rst b/docs/BitSets.rst
index c6ffdbdb8a11..18dbf6df563f 100644
--- a/docs/BitSets.rst
+++ b/docs/BitSets.rst
@@ -10,17 +10,41 @@ for the type of the class or its derived classes.
 
 To use the mechanism, a client creates a global metadata node named
 ``llvm.bitsets``.  Each element is a metadata node with three elements:
-the first is a metadata string containing an identifier for the bitset,
-the second is a global variable and the third is a byte offset into the
-global variable.
+
+1. a metadata object representing an identifier for the bitset
+2. either a global variable or a function
+3. a byte offset into the global (generally zero for functions)
+
+Each bitset must exclusively contain either global variables or functions.
+
+.. admonition:: Limitation
+
+  The current implementation only supports functions as members of bitsets on
+  the x86-32 and x86-64 architectures.
 
 This will cause a link-time optimization pass to generate bitsets from the
-memory addresses referenced from the elements of the bitset metadata. The pass
-will lay out the referenced globals consecutively, so their definitions must
-be available at LTO time. The `GlobalLayoutBuilder`_ class is responsible for
-laying out the globals efficiently to minimize the sizes of the underlying
-bitsets. An intrinsic, :ref:`llvm.bitset.test <bitset.test>`, generates code
-to test whether a given pointer is a member of a bitset.
+memory addresses referenced from the elements of the bitset metadata. The
+pass will lay out referenced global variables consecutively, so their
+definitions must be available at LTO time.
+
+A bit set containing functions is transformed into a jump table, which
+is a block of code consisting of one branch instruction for each of the
+functions in the bit set that branches to the target function, and redirect
+any taken function addresses to the corresponding jump table entry. In the
+object file's symbol table, the jump table entries take the identities of
+the original functions, so that addresses taken outside the module will pass
+any verification done inside the module.
+
+Jump tables may call external functions, so their definitions need not
+be available at LTO time. Note that if an externally defined function is a
+member of a bitset, there is no guarantee that its identity within the module
+will be the same as its identity outside of the module, as the former will
+be the jump table entry if a jump table is necessary.
+
+The `GlobalLayoutBuilder`_ class is responsible for laying out the globals
+efficiently to minimize the sizes of the underlying bitsets. An intrinsic,
+:ref:`llvm.bitset.test <bitset.test>`, generates code to test whether a
+given pointer is a member of a bitset.
 
 :Example:
 
@@ -33,13 +57,25 @@ to test whether a given pointer is a member of a bitset.
     @c = internal global i32 0
     @d = internal global [2 x i32] [i32 0, i32 0]
 
-    !llvm.bitsets = !{!0, !1, !2, !3, !4}
+    define void @e() {
+      ret void
+    }
+
+    define void @f() {
+      ret void
+    }
+
+    declare void @g()
+
+    !llvm.bitsets = !{!0, !1, !2, !3, !4, !5, !6}
 
     !0 = !{!"bitset1", i32* @a, i32 0}
     !1 = !{!"bitset1", i32* @b, i32 0}
     !2 = !{!"bitset2", i32* @b, i32 0}
     !3 = !{!"bitset2", i32* @c, i32 0}
     !4 = !{!"bitset2", i32* @d, i32 4}
+    !5 = !{!"bitset3", void ()* @e, i32 0}
+    !6 = !{!"bitset3", void ()* @g, i32 0}
 
     declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
 
@@ -55,6 +91,12 @@ to test whether a given pointer is a member of a bitset.
       ret i1 %x
     }
 
+    define i1 @baz(void ()* %p) {
+      %pi8 = bitcast void ()* %p to i8*
+      %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
+      ret i1 %x
+    }
+
     define void @main() {
       %a1 = call i1 @foo(i32* @a) ; returns 1
       %b1 = call i1 @foo(i32* @b) ; returns 1
@@ -64,6 +106,9 @@ to test whether a given pointer is a member of a bitset.
       %c2 = call i1 @bar(i32* @c) ; returns 1
       %d02 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 0)) ; returns 0
       %d12 = call i1 @bar(i32* getelementptr ([2 x i32]* @d, i32 0, i32 1)) ; returns 1
+      %e = call i1 @baz(void ()* @e) ; returns 1
+      %f = call i1 @baz(void ()* @f) ; returns 0
+      %g = call i1 @baz(void ()* @g) ; returns 1
       ret void
     }
 
diff --git a/docs/BranchWeightMetadata.rst b/docs/BranchWeightMetadata.rst
index 2ebc6c32416e..6cbcb0f0fb24 100644
--- a/docs/BranchWeightMetadata.rst
+++ b/docs/BranchWeightMetadata.rst
@@ -27,7 +27,7 @@ Supported Instructions
 ^^^^^^^^^^^^^^
 
 Metadata is only assigned to the conditional branches. There are two extra
-operarands for the true and the false branch.
+operands for the true and the false branch.
 
 .. code-block:: llvm
 
@@ -114,12 +114,12 @@ CFG Modifications
 
 Branch Weight Metatada is not proof against CFG changes. If terminator operands'
 are changed some action should be taken. In other case some misoptimizations may
-occur due to incorrent branch prediction information.
+occur due to incorrect branch prediction information.
 
 Function Entry Counts
 =====================
 
-To allow comparing different functions durint inter-procedural analysis and
+To allow comparing different functions during inter-procedural analysis and
 optimization, ``MD_prof`` nodes can also be assigned to a function definition.
 The first operand is a string indicating the name of the associated counter.
 
diff --git a/docs/BuildingLLVMWithAutotools.rst b/docs/BuildingLLVMWithAutotools.rst
index 6f9a13410555..083ead67ebb6 100644
--- a/docs/BuildingLLVMWithAutotools.rst
+++ b/docs/BuildingLLVMWithAutotools.rst
@@ -5,6 +5,12 @@ Building LLVM With Autotools
 .. contents::
    :local:
 
+.. warning::
+
+    Building LLVM with autoconf is deprecated as of 3.8. The autoconf build
+    system will be removed in 3.9. Please migrate to using CMake. For more
+    information see: `Building LLVM with CMake <CMake.html>`_
+
 Overview
 ========
 
diff --git a/docs/CMake.rst b/docs/CMake.rst
index 909fc04248c7..38199e5cc587 100644
--- a/docs/CMake.rst
+++ b/docs/CMake.rst
@@ -10,11 +10,11 @@ Introduction
 
 `CMake <http://www.cmake.org/>`_ is a cross-platform build-generator tool. CMake
 does not build the project, it generates the files needed by your build tool
-(GNU make, Visual Studio, etc) for building LLVM.
+(GNU make, Visual Studio, etc.) for building LLVM.
 
 If you are really anxious about getting a functional LLVM build, go to the
-`Quick start`_ section. If you are a CMake novice, start on `Basic CMake usage`_
-and then go back to the `Quick start`_ once you know what you are doing. The
+`Quick start`_ section. If you are a CMake novice, start with `Basic CMake usage`_
+and then go back to the `Quick start`_ section once you know what you are doing. The
 `Options and variables`_ section is a reference for customizing your build. If
 you already have experience with CMake, this is the recommended starting point.
 
@@ -31,35 +31,35 @@ We use here the command-line, non-interactive CMake interface.
 #. Open a shell. Your development tools must be reachable from this shell
    through the PATH environment variable.
 
-#. Create a directory for containing the build. It is not supported to build
-   LLVM on the source directory. cd to this directory:
+#. Create a build directory. Building LLVM in the source
+   directory is not supported. cd to this directory:
 
    .. code-block:: console
 
      $ mkdir mybuilddir
      $ cd mybuilddir
 
-#. Execute this command on the shell replacing `path/to/llvm/source/root` with
+#. Execute this command in the shell replacing `path/to/llvm/source/root` with
    the path to the root of your LLVM source tree:
 
    .. code-block:: console
 
      $ cmake path/to/llvm/source/root
 
-   CMake will detect your development environment, perform a series of test and
+   CMake will detect your development environment, perform a series of tests, and
    generate the files required for building LLVM. CMake will use default values
    for all build parameters. See the `Options and variables`_ section for
-   fine-tuning your build
+   a list of build parameters that you can modify.
 
    This can fail if CMake can't detect your toolset, or if it thinks that the
-   environment is not sane enough. On this case make sure that the toolset that
-   you intend to use is the only one reachable from the shell and that the shell
-   itself is the correct one for you development environment. CMake will refuse
+   environment is not sane enough. In this case, make sure that the toolset that
+   you intend to use is the only one reachable from the shell, and that the shell
+   itself is the correct one for your development environment. CMake will refuse
    to build MinGW makefiles if you have a POSIX shell reachable through the PATH
    environment variable, for instance. You can force CMake to use a given build
-   tool, see the `Usage`_ section.
+   tool; for instructions, see the `Usage`_ section, below.
 
-#. After CMake has finished running, proceed to use IDE project files or start
+#. After CMake has finished running, proceed to use IDE project files, or start
    the build from the build directory:
 
    .. code-block:: console
@@ -67,9 +67,9 @@ We use here the command-line, non-interactive CMake interface.
      $ cmake --build .
 
    The ``--build`` option tells ``cmake`` to invoke the underlying build
-   tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc).
+   tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc.)
 
-   The underlying build tool can be invoked directly either of course, but
+   The underlying build tool can be invoked directly, of course, but
    the ``--build`` option is portable.
 
 #. After LLVM has finished building, install it from the build directory:
@@ -95,33 +95,39 @@ We use here the command-line, non-interactive CMake interface.
 Basic CMake usage
 =================
 
-This section explains basic aspects of CMake, mostly for explaining those
-options which you may need on your day-to-day usage.
+This section explains basic aspects of CMake
+which you may need in your day-to-day usage.
 
-CMake comes with extensive documentation in the form of html files and on the
-cmake executable itself. Execute ``cmake --help`` for further help options.
+CMake comes with extensive documentation, in the form of html files, and as
+online help accessible via the ``cmake`` executable itself. Execute ``cmake
+--help`` for further help options.
 
-CMake requires to know for which build tool it shall generate files (GNU make,
-Visual Studio, Xcode, etc). If not specified on the command line, it tries to
-guess it based on you environment. Once identified the build tool, CMake uses
-the corresponding *Generator* for creating files for your build tool. You can
+CMake allows you to specify a build tool (e.g., GNU make, Visual Studio,
+or Xcode). If not specified on the command line, CMake tries to guess which
+build tool to use, based on your environment. Once it has identified your
+build tool, CMake uses the corresponding *Generator* to create files for your
+build tool (e.g., Makefiles or Visual Studio or Xcode project files). You can
 explicitly specify the generator with the command line option ``-G "Name of the
-generator"``. For knowing the available generators on your platform, execute
+generator"``. To see a list of the available generators on your system, execute
 
 .. code-block:: console
 
   $ cmake --help
 
-This will list the generator's names at the end of the help text. Generator's
-names are case-sensitive. Example:
+This will list the generator names at the end of the help text.
+
+Generators' names are case-sensitive, and may contain spaces. For this reason,
+you should enter them exactly as they are listed in the ``cmake --help``
+output, in quotes. For example, to generate project files specifically for
+Visual Studio 12, you can execute:
 
 .. code-block:: console
 
-  $ cmake -G "Visual Studio 11" path/to/llvm/source/root
+  $ cmake -G "Visual Studio 12" path/to/llvm/source/root
 
 For a given development platform there can be more than one adequate
-generator. If you use Visual Studio "NMake Makefiles" is a generator you can use
-for building with NMake. By default, CMake chooses the more specific generator
+generator. If you use Visual Studio, "NMake Makefiles" is a generator you can use
+for building with NMake. By default, CMake chooses the most specific generator
 supported by your development environment. If you want an alternative generator,
 you must tell this to CMake with the ``-G`` option.
 
@@ -142,18 +148,20 @@ CMake command line like this:
 
   $ cmake -DVARIABLE=value path/to/llvm/source
 
-You can set a variable after the initial CMake invocation for changing its
+You can set a variable after the initial CMake invocation to change its
 value. You can also undefine a variable:
 
 .. code-block:: console
 
   $ cmake -UVARIABLE path/to/llvm/source
 
-Variables are stored on the CMake cache. This is a file named ``CMakeCache.txt``
-on the root of the build directory. Do not hand-edit it.
+Variables are stored in the CMake cache. This is a file named ``CMakeCache.txt``
+stored at the root of your build directory that is generated by ``cmake``.
+Editing it yourself is not recommended.
 
-Variables are listed here appending its type after a colon. It is correct to
-write the variable and the type on the CMake command line:
+Variables are listed in the CMake cache and later in this document with
+the variable name and type separated by a colon. You can also specify the
+variable and type on the CMake command line:
 
 .. code-block:: console
 
@@ -163,17 +171,17 @@ Frequently-used CMake variables
 -------------------------------
 
 Here are some of the CMake variables that are used often, along with a
-brief explanation and LLVM-specific notes. For full documentation, check the
-CMake docs or execute ``cmake --help-variable VARIABLE_NAME``.
+brief explanation and LLVM-specific notes. For full documentation, consult the
+CMake manual, or execute ``cmake --help-variable VARIABLE_NAME``.
 
 **CMAKE_BUILD_TYPE**:STRING
-  Sets the build type for ``make`` based generators. Possible values are
-  Release, Debug, RelWithDebInfo and MinSizeRel. On systems like Visual Studio
-  the user sets the build type with the IDE settings.
+  Sets the build type for ``make``-based generators. Possible values are
+  Release, Debug, RelWithDebInfo and MinSizeRel. If you are using an IDE such as
+  Visual Studio, you should use the IDE settings to set the build type.
 
 **CMAKE_INSTALL_PREFIX**:PATH
   Path where LLVM will be installed if "make install" is invoked or the
-  "INSTALL" target is built.
+  "install" target is built.
 
 **LLVM_LIBDIR_SUFFIX**:STRING
   Extra suffix to append to the directory where libraries are to be
@@ -188,8 +196,9 @@ CMake docs or execute ``cmake --help-variable VARIABLE_NAME``.
 
 **BUILD_SHARED_LIBS**:BOOL
   Flag indicating if shared libraries will be built. Its default value is
-  OFF. Shared libraries are not supported on Windows and not recommended on the
-  other OSes.
+  OFF. This option is only recommended for use by LLVM developers.
+  On Windows, shared libraries may be used when building with MinGW, including
+  mingw-w64, but not when building with the Microsoft toolchain.
 
 .. _LLVM-specific variables:
 
@@ -203,13 +212,13 @@ LLVM-specific variables
 
 **LLVM_BUILD_TOOLS**:BOOL
   Build LLVM tools. Defaults to ON. Targets for building each tool are generated
-  in any case. You can build an tool separately by invoking its target. For
-  example, you can build *llvm-as* with a makefile-based system executing *make
-  llvm-as* on the root of your build directory.
+  in any case. You can build a tool separately by invoking its target. For
+  example, you can build *llvm-as* with a Makefile-based system by executing *make
+  llvm-as* at the root of your build directory.
 
 **LLVM_INCLUDE_TOOLS**:BOOL
-  Generate build targets for the LLVM tools. Defaults to ON. You can use that
-  option for disabling the generation of build targets for the LLVM tools.
+  Generate build targets for the LLVM tools. Defaults to ON. You can use this
+  option to disable the generation of build targets for the LLVM tools.
 
 **LLVM_BUILD_EXAMPLES**:BOOL
   Build LLVM examples. Defaults to OFF. Targets for building each example are
@@ -217,20 +226,20 @@ LLVM-specific variables
   details.
 
 **LLVM_INCLUDE_EXAMPLES**:BOOL
-  Generate build targets for the LLVM examples. Defaults to ON. You can use that
-  option for disabling the generation of build targets for the LLVM examples.
+  Generate build targets for the LLVM examples. Defaults to ON. You can use this
+  option to disable the generation of build targets for the LLVM examples.
 
 **LLVM_BUILD_TESTS**:BOOL
   Build LLVM unit tests. Defaults to OFF. Targets for building each unit test
-  are generated in any case. You can build a specific unit test with the target
-  *UnitTestNameTests* (where at this time *UnitTestName* can be ADT, Analysis,
-  ExecutionEngine, JIT, Support, Transform, VMCore; see the subdirectories of
-  *unittests* for an updated list.) It is possible to build all unit tests with
-  the target *UnitTests*.
+  are generated in any case. You can build a specific unit test using the
+  targets defined under *unittests*, such as ADTTests, IRTests, SupportTests,
+  etc. (Search for ``add_llvm_unittest`` in the subdirectories of *unittests*
+  for a complete list of unit tests.) It is possible to build all unit tests
+  with the target *UnitTests*.
 
 **LLVM_INCLUDE_TESTS**:BOOL
   Generate build targets for the LLVM unit tests. Defaults to ON. You can use
-  that option for disabling the generation of build targets for the LLVM unit
+  this option to disable the generation of build targets for the LLVM unit
   tests.
 
 **LLVM_APPEND_VC_REV**:BOOL
@@ -249,39 +258,39 @@ LLVM-specific variables
   is *Debug*.
 
 **LLVM_ENABLE_EH**:BOOL
-  Build LLVM with exception handling support. This is necessary if you wish to
+  Build LLVM with exception-handling support. This is necessary if you wish to
   link against LLVM libraries and make use of C++ exceptions in your own code
   that need to propagate through LLVM code. Defaults to OFF.
 
 **LLVM_ENABLE_PIC**:BOOL
-  Add the ``-fPIC`` flag for the compiler command-line, if the compiler supports
+  Add the ``-fPIC`` flag to the compiler command-line, if the compiler supports
   this flag. Some systems, like Windows, do not need this flag. Defaults to ON.
 
 **LLVM_ENABLE_RTTI**:BOOL
-  Build LLVM with run time type information. Defaults to OFF.
+  Build LLVM with run-time type information. Defaults to OFF.
 
 **LLVM_ENABLE_WARNINGS**:BOOL
   Enable all compiler warnings. Defaults to ON.
 
 **LLVM_ENABLE_PEDANTIC**:BOOL
-  Enable pedantic mode. This disables compiler specific extensions, if
+  Enable pedantic mode. This disables compiler-specific extensions, if
   possible. Defaults to ON.
 
 **LLVM_ENABLE_WERROR**:BOOL
-  Stop and fail build, if a compiler warning is triggered. Defaults to OFF.
+  Stop and fail the build, if a compiler warning is triggered. Defaults to OFF.
 
 **LLVM_ABI_BREAKING_CHECKS**:STRING
   Used to decide if LLVM should be built with ABI breaking checks or
   not.  Allowed values are `WITH_ASSERTS` (default), `FORCE_ON` and
   `FORCE_OFF`.  `WITH_ASSERTS` turns on ABI breaking checks in an
   assertion enabled build.  `FORCE_ON` (`FORCE_OFF`) turns them on
-  (off) irrespective of whether normal (`NDEBUG` based) assertions are
+  (off) irrespective of whether normal (`NDEBUG`-based) assertions are
   enabled or not.  A version of LLVM built with ABI breaking checks
   is not ABI compatible with a version built without it.
 
 **LLVM_BUILD_32_BITS**:BOOL
-  Build 32-bits executables and libraries on 64-bits systems. This option is
-  available only on some 64-bits unix systems. Defaults to OFF.
+  Build 32-bit executables and libraries on 64-bit systems. This option is
+  available only on some 64-bit Unix systems. Defaults to OFF.
 
 **LLVM_TARGET_ARCH**:STRING
   LLVM target to use for native code generation. This is required for JIT
@@ -290,7 +299,7 @@ LLVM-specific variables
   to the target architecture name.
 
 **LLVM_TABLEGEN**:STRING
-  Full path to a native TableGen executable (usually named ``tblgen``). This is
+  Full path to a native TableGen executable (usually named ``llvm-tblgen``). This is
   intended for cross-compiling: if the user sets this variable, no native
   TableGen will be created.
 
@@ -300,29 +309,40 @@ LLVM-specific variables
   others.
 
 **LLVM_LIT_TOOLS_DIR**:PATH
-  The path to GnuWin32 tools for tests. Valid on Windows host.  Defaults to "",
-  then Lit seeks tools according to %PATH%.  Lit can find tools(eg. grep, sort,
-  &c) on LLVM_LIT_TOOLS_DIR at first, without specifying GnuWin32 to %PATH%.
+  The path to GnuWin32 tools for tests. Valid on Windows host.  Defaults to
+  the empty string, in which case lit will look for tools needed for tests
+  (e.g. ``grep``, ``sort``, etc.) in your %PATH%. If GnuWin32 is not in your
+  %PATH%, then you can set this variable to the GnuWin32 directory so that
+  lit can find tools needed for tests in that directory.
 
 **LLVM_ENABLE_FFI**:BOOL
-  Indicates whether LLVM Interpreter will be linked with Foreign Function
-  Interface library. If the library or its headers are installed on a custom
-  location, you can set the variables FFI_INCLUDE_DIR and
-  FFI_LIBRARY_DIR. Defaults to OFF.
+  Indicates whether the LLVM Interpreter will be linked with the Foreign Function
+  Interface library (libffi) in order to enable calling external functions.
+  If the library or its headers are installed in a custom
+  location, you can also set the variables FFI_INCLUDE_DIR and
+  FFI_LIBRARY_DIR to the directories where ffi.h and libffi.so can be found,
+  respectively. Defaults to OFF.
 
 **LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH
-  Path to ``{Clang,lld,Polly}``\'s source directory. Defaults to
-  ``tools/{clang,lld,polly}``. ``{Clang,lld,Polly}`` will not be built when it
-  is empty or it does not point to a valid path.
+  These variables specify the path to the source directory for the external
+  LLVM projects Clang, lld, and Polly, respectively, relative to the top-level
+  source directory.  If the in-tree subdirectory for an external project
+  exists (e.g., llvm/tools/clang for Clang), then the corresponding variable
+  will not be used.  If the variable for an external project does not point
+  to a valid path, then that project will not be built.
 
 **LLVM_USE_OPROFILE**:BOOL
-  Enable building OProfile JIT support. Defaults to OFF
+  Enable building OProfile JIT support. Defaults to OFF.
+
+**LLVM_PROFDATA_FILE**:PATH
+  Path to a profdata file to pass into clang's -fprofile-instr-use flag. This
+  can only be specified if you're building with clang.
 
 **LLVM_USE_INTEL_JITEVENTS**:BOOL
-  Enable building support for Intel JIT Events API. Defaults to OFF
+  Enable building support for Intel JIT Events API. Defaults to OFF.
 
 **LLVM_ENABLE_ZLIB**:BOOL
-  Build with zlib to support compression/uncompression in LLVM tools.
+  Enable building with zlib to support compression/uncompression in LLVM tools.
   Defaults to ON.
 
 **LLVM_USE_SANITIZER**:STRING
@@ -361,14 +381,14 @@ LLVM-specific variables
   ``org.llvm.qch``.
   This option is only useful in combination with
   ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``;
-  otherwise this has no effect.
+  otherwise it has no effect.
 
 **LLVM_DOXYGEN_QHP_NAMESPACE**:STRING
   Namespace under which the intermediate Qt Help Project file lives. See `Qt
   Help Project`_
   for more information. Defaults to "org.llvm". This option is only useful in
   combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise
-  this has no effect.
+  it has no effect.
 
 **LLVM_DOXYGEN_QHP_CUST_FILTER_NAME**:STRING
   See `Qt Help Project`_ for
@@ -377,14 +397,14 @@ LLVM-specific variables
   be used in Qt Creator to select only documentation from LLVM when browsing
   through all the help files that you might have loaded. This option is only
   useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``;
-  otherwise this has no effect.
+  otherwise it has no effect.
 
 .. _Qt Help Project: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters
 
 **LLVM_DOXYGEN_QHELPGENERATOR_PATH**:STRING
   The path to the ``qhelpgenerator`` executable. Defaults to whatever CMake's
   ``find_program()`` can find. This option is only useful in combination with
-  ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise this has no
+  ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise it has no
   effect.
 
 **LLVM_DOXYGEN_SVG**:BOOL
@@ -416,18 +436,24 @@ LLVM-specific variables
   If enabled then sphinx documentation warnings will be treated as
   errors. Defaults to ON.
 
+**LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL
+  OS X Only: If enabled CMake will generate a target named
+  'install-xcode-toolchain'. This target will create a directory at
+  $CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can
+  be used to override the default system tools. 
+
 Executing the test suite
 ========================
 
-Testing is performed when the *check* target is built. For instance, if you are
-using makefiles, execute this command while on the top level of your build
-directory:
+Testing is performed when the *check-all* target is built. For instance, if you are
+using Makefiles, execute this command in the root of your build directory:
 
 .. code-block:: console
 
-  $ make check
+  $ make check-all
 
-On Visual Studio, you may run tests to build the project "check".
+On Visual Studio, you may run tests by building the project "check-all".
+For more information about testing, see the :doc:`TestingGuide`.
 
 Cross compiling
 ===============
@@ -447,10 +473,10 @@ Embedding LLVM in your project
 
 From LLVM 3.5 onwards both the CMake and autoconf/Makefile build systems export
 LLVM libraries as importable CMake targets. This means that clients of LLVM can
-now reliably use CMake to develop their own LLVM based projects against an
+now reliably use CMake to develop their own LLVM-based projects against an
 installed version of LLVM regardless of how it was built.
 
-Here is a simple example of CMakeLists.txt file that imports the LLVM libraries
+Here is a simple example of a CMakeLists.txt file that imports the LLVM libraries
 and uses them to build a simple application ``simple-tool``.
 
 .. code-block:: cmake
@@ -495,8 +521,8 @@ This file is available in two different locations.
   On Linux typically this is ``/usr/share/llvm/cmake/LLVMConfig.cmake``.
 
 * ``<LLVM_BUILD_ROOT>/share/llvm/cmake/LLVMConfig.cmake`` where
-  ``<LLVM_BUILD_ROOT>`` is the root of the LLVM build tree. **Note this only
-  available when building LLVM with CMake**
+  ``<LLVM_BUILD_ROOT>`` is the root of the LLVM build tree. **Note: this is only
+  available when building LLVM with CMake.**
 
 If LLVM is installed in your operating system's normal installation prefix (e.g.
 on Linux this is usually ``/usr/``) ``find_package(LLVM ...)`` will
@@ -529,7 +555,7 @@ include
   A list of include paths to directories containing LLVM header files.
 
 ``LLVM_PACKAGE_VERSION``
-  The LLVM version. This string can be used with CMake conditionals. E.g. ``if
+  The LLVM version. This string can be used with CMake conditionals, e.g., ``if
   (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5")``.
 
 ``LLVM_TOOLS_BINARY_DIR``
@@ -582,7 +608,7 @@ Contents of ``<project dir>/<pass name>/CMakeLists.txt``:
 
 Note if you intend for this pass to be merged into the LLVM source tree at some
 point in the future it might make more sense to use LLVM's internal
-add_llvm_loadable_module function instead by...
+``add_llvm_loadable_module`` function instead by...
 
 
 Adding the following to ``<project dir>/CMakeLists.txt`` (after
@@ -602,7 +628,7 @@ And then changing ``<project dir>/<pass name>/CMakeLists.txt`` to
     )
 
 When you are done developing your pass, you may wish to integrate it
-into LLVM source tree. You can achieve it in two easy steps:
+into the LLVM source tree. You can achieve it in two easy steps:
 
 #. Copying ``<pass name>`` folder into ``<LLVM root>/lib/Transform`` directory.
 
@@ -618,6 +644,6 @@ Microsoft Visual C++
 --------------------
 
 **LLVM_COMPILER_JOBS**:STRING
-  Specifies the maximum number of parallell compiler jobs to use per project
+  Specifies the maximum number of parallel compiler jobs to use per project
   when building with msbuild or Visual Studio. Only supported for the Visual
   Studio 2010 CMake generator. 0 means use all processors. Default is 0.
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
index 2388a92d39ef..eaa175062b61 100644
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -147,7 +147,9 @@ if( NOT uses_ocaml LESS 0 )
     COMMAND ${CMAKE_COMMAND} -E remove_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
     COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
     COMMAND ${OCAMLFIND} ocamldoc -d ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html
-                                  -sort -colorize-code -html ${odoc_files})
+                                  -sort -colorize-code -html ${odoc_files}
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/_ocamldoc/style.css
+                                     ${CMAKE_CURRENT_BINARY_DIR}/ocamldoc/html)
 
   add_dependencies(ocaml_doc ${doc_targets})
 
diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst
index 03f5cbd726d8..f3b949c7ad15 100644
--- a/docs/CodeGenerator.rst
+++ b/docs/CodeGenerator.rst
@@ -640,7 +640,7 @@ For target specific directives, the MCStreamer has a MCTargetStreamer instance.
 Each target that needs it defines a class that inherits from it and is a lot
 like MCStreamer itself: It has one method per directive and two classes that
 inherit from it, a target object streamer and a target asm streamer. The target
-asm streamer just prints it (``emitFnStart -> .fnstrart``), and the object
+asm streamer just prints it (``emitFnStart -> .fnstart``), and the object
 streamer implement the assembler logic for it.
 
 To make llvm use these classes, the target initialization must call
diff --git a/docs/CodingStandards.rst b/docs/CodingStandards.rst
index de4f73c546b5..91faadffea62 100644
--- a/docs/CodingStandards.rst
+++ b/docs/CodingStandards.rst
@@ -39,7 +39,7 @@ hand, it is reasonable to rename the methods of a class if you're about to
 change it in some other way.  Just do the reformating as a separate commit from
 the functionality change.
   
-The ultimate goal of these guidelines is the increase readability and
+The ultimate goal of these guidelines is to increase the readability and
 maintainability of our common source base. If you have suggestions for topics to
 be included, please mail them to `Chris <mailto:sabre@nondot.org>`_.
 
@@ -178,8 +178,6 @@ being aware of:
 * While most of the atomics library is well implemented, the fences are
   missing. Fortunately, they are rarely needed.
 * The locale support is incomplete.
-* ``std::equal()`` (and other algorithms) incorrectly assert in MSVC when given
-  ``nullptr`` as an iterator.
 
 Other than these areas you should assume the standard library is available and
 working as expected until some build bot tells you otherwise. If you're in an
diff --git a/docs/CommandGuide/index.rst b/docs/CommandGuide/index.rst
index ed18cd048aa5..46db57f1c845 100644
--- a/docs/CommandGuide/index.rst
+++ b/docs/CommandGuide/index.rst
@@ -21,6 +21,7 @@ Basic Commands
    lli
    llvm-link
    llvm-ar
+   llvm-lib
    llvm-nm
    llvm-config
    llvm-diff
diff --git a/docs/CommandGuide/lit.rst b/docs/CommandGuide/lit.rst
index e820eef2faff..0ec14bb2236e 100644
--- a/docs/CommandGuide/lit.rst
+++ b/docs/CommandGuide/lit.rst
@@ -80,6 +80,11 @@ OUTPUT OPTIONS
  Show more information on test failures, for example the entire test output
  instead of just the test result.
 
+.. option:: -a, --show-all
+
+ Show more information about all tests, for example the entire test
+ commandline and output.
+
 .. option:: --no-progress-bar
 
  Do not use curses based progress bar.
diff --git a/docs/CommandGuide/llc.rst b/docs/CommandGuide/llc.rst
index 8d5c9ce8f8a1..5094259f9f95 100644
--- a/docs/CommandGuide/llc.rst
+++ b/docs/CommandGuide/llc.rst
@@ -127,6 +127,12 @@ End-user Options
  implements an LLVM target.  This will permit the target name to be used with
  the :option:`-march` option so that code can be generated for that target.
 
+.. option:: -meabi=[default|gnu|4|5]
+
+ Specify which EABI version should conform to.  Valid EABI versions are *gnu*,
+ *4* and *5*.  Default value (*default*) depends on the triple.
+
+
 Tuning/Configuration Options
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/docs/CommandGuide/lli.rst b/docs/CommandGuide/lli.rst
index 502fbd609353..9da13ee47e0e 100644
--- a/docs/CommandGuide/lli.rst
+++ b/docs/CommandGuide/lli.rst
@@ -1,172 +1,127 @@
 lli - directly execute programs from LLVM bitcode
 =================================================
 
-
 SYNOPSIS
 --------
 
-
-**lli** [*options*] [*filename*] [*program args*]
-
+:program:`lli` [*options*] [*filename*] [*program args*]
 
 DESCRIPTION
 -----------
 
+:program:`lli` directly executes programs in LLVM bitcode format.  It takes a program
+in LLVM bitcode format and executes it using a just-in-time compiler or an
+interpreter.
 
-**lli** directly executes programs in LLVM bitcode format.  It takes a program
-in LLVM bitcode format and executes it using a just-in-time compiler, if one is
-available for the current architecture, or an interpreter.  **lli** takes all of
-the same code generator options as llc|llc, but they are only effective when
-**lli** is using the just-in-time compiler.
+:program:`lli` is *not* an emulator. It will not execute IR of different architectures
+and it can only interpret (or JIT-compile) for the host architecture.
 
-If *filename* is not specified, then **lli** reads the LLVM bitcode for the
+The JIT compiler takes the same arguments as other tools, like :program:`llc`,
+but they don't necessarily work for the interpreter.
+
+If `filename` is not specified, then :program:`lli` reads the LLVM bitcode for the
 program from standard input.
 
 The optional *args* specified on the command line are passed to the program as
 arguments.
 
-
 GENERAL OPTIONS
 ---------------
 
-
-
-**-fake-argv0**\ =\ *executable*
+.. option:: -fake-argv0=executable
 
  Override the ``argv[0]`` value passed into the executing program.
 
-
-
-**-force-interpreter**\ =\ *{false,true}*
+.. option:: -force-interpreter={false,true}
 
  If set to true, use the interpreter even if a just-in-time compiler is available
  for this architecture. Defaults to false.
 
-
-
-**-help**
+.. option:: -help
 
  Print a summary of command line options.
 
+.. option:: -load=pluginfilename
 
-
-**-load**\ =\ *pluginfilename*
-
- Causes **lli** to load the plugin (shared object) named *pluginfilename* and use
+ Causes :program:`lli` to load the plugin (shared object) named *pluginfilename* and use
  it for optimization.
 
-
-
-**-stats**
+.. option:: -stats
 
  Print statistics from the code-generation passes. This is only meaningful for
  the just-in-time compiler, at present.
 
-
-
-**-time-passes**
+.. option:: -time-passes
 
  Record the amount of time needed for each code-generation pass and print it to
  standard error.
 
+.. option:: -version
 
-
-**-version**
-
- Print out the version of **lli** and exit without doing anything else.
-
-
-
+ Print out the version of :program:`lli` and exit without doing anything else.
 
 TARGET OPTIONS
 --------------
 
-
-
-**-mtriple**\ =\ *target triple*
+.. option:: -mtriple=target triple
 
  Override the target triple specified in the input bitcode file with the
  specified string.  This may result in a crash if you pick an
  architecture which is not compatible with the current system.
 
-
-
-**-march**\ =\ *arch*
+.. option:: -march=arch
 
  Specify the architecture for which to generate assembly, overriding the target
  encoded in the bitcode file.  See the output of **llc -help** for a list of
  valid architectures.  By default this is inferred from the target triple or
  autodetected to the current architecture.
 
-
-
-**-mcpu**\ =\ *cpuname*
+.. option:: -mcpu=cpuname
 
  Specify a specific chip in the current architecture to generate code for.
  By default this is inferred from the target triple and autodetected to
  the current architecture.  For a list of available CPUs, use:
  **llvm-as < /dev/null | llc -march=xyz -mcpu=help**
 
-
-
-**-mattr**\ =\ *a1,+a2,-a3,...*
+.. option:: -mattr=a1,+a2,-a3,...
 
  Override or control specific attributes of the target, such as whether SIMD
  operations are enabled or not.  The default set of attributes is set by the
  current CPU.  For a list of available attributes, use:
  **llvm-as < /dev/null | llc -march=xyz -mattr=help**
 
-
-
-
 FLOATING POINT OPTIONS
 ----------------------
 
-
-
-**-disable-excess-fp-precision**
+.. option:: -disable-excess-fp-precision
 
  Disable optimizations that may increase floating point precision.
 
-
-
-**-enable-no-infs-fp-math**
+.. option:: -enable-no-infs-fp-math
 
  Enable optimizations that assume no Inf values.
 
-
-
-**-enable-no-nans-fp-math**
+.. option:: -enable-no-nans-fp-math
 
  Enable optimizations that assume no NAN values.
 
+.. option:: -enable-unsafe-fp-math
 
-
-**-enable-unsafe-fp-math**
-
- Causes **lli** to enable optimizations that may decrease floating point
+ Causes :program:`lli` to enable optimizations that may decrease floating point
  precision.
 
+.. option:: -soft-float
 
-
-**-soft-float**
-
- Causes **lli** to generate software floating point library calls instead of
+ Causes :program:`lli` to generate software floating point library calls instead of
  equivalent hardware instructions.
 
-
-
-
 CODE GENERATION OPTIONS
 -----------------------
 
-
-
-**-code-model**\ =\ *model*
+.. option:: -code-model=model
 
  Choose the code model from:
 
-
  .. code-block:: perl
 
       default: Target default code model
@@ -175,42 +130,30 @@ CODE GENERATION OPTIONS
       medium: Medium code model
       large: Large code model
 
-
-
-
-**-disable-post-RA-scheduler**
+.. option:: -disable-post-RA-scheduler
 
  Disable scheduling after register allocation.
 
-
-
-**-disable-spill-fusing**
+.. option:: -disable-spill-fusing
 
  Disable fusing of spill code into instructions.
 
-
-
-**-jit-enable-eh**
+.. option:: -jit-enable-eh
 
  Exception handling should be enabled in the just-in-time compiler.
 
-
-
-**-join-liveintervals**
+.. option:: -join-liveintervals
 
  Coalesce copies (default=true).
 
+.. option:: -nozero-initialized-in-bss
 
+  Don't place zero-initialized symbols into the BSS section.
 
-**-nozero-initialized-in-bss** Don't place zero-initialized symbols into the BSS section.
-
-
-
-**-pre-RA-sched**\ =\ *scheduler*
+.. option:: -pre-RA-sched=scheduler
 
  Instruction schedulers available (before register allocation):
 
-
  .. code-block:: perl
 
       =default: Best scheduler for the target
@@ -221,74 +164,51 @@ CODE GENERATION OPTIONS
       =list-tdrr: Top-down register reduction list scheduling
       =list-td: Top-down list scheduler -print-machineinstrs - Print generated machine code
 
-
-
-
-**-regalloc**\ =\ *allocator*
+.. option:: -regalloc=allocator
 
  Register allocator to use (default=linearscan)
 
-
  .. code-block:: perl
 
       =bigblock: Big-block register allocator
       =linearscan: linear scan register allocator =local -   local register allocator
       =simple: simple register allocator
 
-
-
-
-**-relocation-model**\ =\ *model*
+.. option:: -relocation-model=model
 
  Choose relocation model from:
 
-
  .. code-block:: perl
 
       =default: Target default relocation model
       =static: Non-relocatable code =pic -   Fully relocatable, position independent code
       =dynamic-no-pic: Relocatable external references, non-relocatable code
 
-
-
-
-**-spiller**
+.. option:: -spiller
 
  Spiller to use (default=local)
 
-
  .. code-block:: perl
 
       =simple: simple spiller
       =local: local spiller
 
-
-
-
-**-x86-asm-syntax**\ =\ *syntax*
+.. option:: -x86-asm-syntax=syntax
 
  Choose style of code to emit from X86 backend:
 
-
  .. code-block:: perl
 
       =att: Emit AT&T-style assembly
       =intel: Emit Intel-style assembly
 
-
-
-
-
 EXIT STATUS
 -----------
 
-
-If **lli** fails to load the program, it will exit with an exit code of 1.
+If :program:`lli` fails to load the program, it will exit with an exit code of 1.
 Otherwise, it will return the exit code of the program it executes.
 
-
 SEE ALSO
 --------
 
-
-llc|llc
+:program:`llc`
diff --git a/docs/CommandGuide/llvm-lib.rst b/docs/CommandGuide/llvm-lib.rst
new file mode 100644
index 000000000000..ecd0a7db7e37
--- /dev/null
+++ b/docs/CommandGuide/llvm-lib.rst
@@ -0,0 +1,31 @@
+llvm-lib - LLVM lib.exe compatible library tool
+===============================================
+
+
+SYNOPSIS
+--------
+
+
+**llvm-lib** [/libpath:<path>] [/out:<output>] [/llvmlibthin]
+[/ignore] [/machine] [/nologo] [files...]
+
+
+DESCRIPTION
+-----------
+
+
+The **llvm-lib** command is intended to be a ``lib.exe`` compatible
+tool. See https://msdn.microsoft.com/en-us/library/7ykb2k5f for the
+general description.
+
+**llvm-lib** has the following extensions:
+
+* Bitcode files in symbol tables.
+  **llvm-lib** includes symbols from both bitcode files and regular
+  object files in the symbol table.
+
+* Creating thin archives.
+  The /llvmlibthin option causes **llvm-lib** to create thin archive
+  that contain only the symbol table and the header for the various
+  members. These files are much smaller, but are not compatible with
+  link.exe (lld can handle them).
diff --git a/docs/CommandGuide/llvm-profdata.rst b/docs/CommandGuide/llvm-profdata.rst
index 7053b7fa710e..74fe4ee9d219 100644
--- a/docs/CommandGuide/llvm-profdata.rst
+++ b/docs/CommandGuide/llvm-profdata.rst
@@ -28,7 +28,7 @@ MERGE
 SYNOPSIS
 ^^^^^^^^
 
-:program:`llvm-profdata merge` [*options*] [*filenames...*]
+:program:`llvm-profdata merge` [*options*] [*filename...*]
 
 DESCRIPTION
 ^^^^^^^^^^^
@@ -37,6 +37,14 @@ DESCRIPTION
 generated by PGO instrumentation and merges them together into a single
 indexed profile data file.
 
+By default profile data is merged without modification. This means that the
+relative importance of each input file is proportional to the number of samples
+or counts it contains. In general, the input from a longer training run will be
+interpreted as relatively more important than a shorter run. Depending on the
+nature of the training runs it may be useful to adjust the weight given to each
+input file by using the ``-weighted-input`` option.
+
+
 OPTIONS
 ^^^^^^^
 
@@ -49,28 +57,63 @@ OPTIONS
  Specify the output file name.  *Output* cannot be ``-`` as the resulting
  indexed profile data can't be written to standard output.
 
+.. option:: -weighted-input=weight,filename
+
+ Specify an input file name along with a weight. The profile counts of the input
+ file will be scaled (multiplied) by the supplied ``weight``, where where ``weight``
+ is a decimal integer >= 1. Input files specified without using this option are
+ assigned a default weight of 1. Examples are shown below.
+
 .. option:: -instr (default)
 
  Specify that the input profile is an instrumentation-based profile.
 
 .. option:: -sample
 
- Specify that the input profile is a sample-based profile. When using
- sample-based profiles, the format of the generated file can be generated
- in one of three ways:
+ Specify that the input profile is a sample-based profile.
+ 
+ The format of the generated file can be generated in one of three ways:
 
  .. option:: -binary (default)
 
- Emit the profile using a binary encoding.
+ Emit the profile using a binary encoding. For instrumentation-based profile
+ the output format is the indexed binary format. 
 
  .. option:: -text
 
- Emit the profile in text mode.
+ Emit the profile in text mode. This option can also be used with both
+ sample-based and instrumentation-based profile. When this option is used
+ the profile will be dumped in the text format that is parsable by the profile
+ reader.
 
  .. option:: -gcc
 
  Emit the profile using GCC's gcov format (Not yet supported).
 
+EXAMPLES
+^^^^^^^^
+Basic Usage
++++++++++++
+Merge three profiles:
+
+::
+
+    llvm-profdata merge foo.profdata bar.profdata baz.profdata -output merged.profdata
+
+Weighted Input
+++++++++++++++
+The input file `foo.profdata` is especially important, multiply its counts by 10:
+
+::
+
+    llvm-profdata merge -weighted-input=10,foo.profdata bar.profdata baz.profdata -output merged.profdata
+
+Exactly equivalent to the previous invocation (explicit form; useful for programmatic invocation):
+
+::
+
+    llvm-profdata merge -weighted-input=10,foo.profdata -weighted-input=1,bar.profdata -weighted-input=1,baz.profdata -output merged.profdata
+
 .. program:: llvm-profdata show
 
 .. _profdata-show:
@@ -121,6 +164,13 @@ OPTIONS
 
  Specify that the input profile is an instrumentation-based profile.
 
+.. option:: -text
+
+ Instruct the profile dumper to show profile counts in the text format of the
+ instrumentation-based profile data representation. By default, the profile
+ information is dumped in a more human readable form (also in text) with
+ annotations.
+
 .. option:: -sample
 
  Specify that the input profile is a sample-based profile.
diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst
index 96720e633f2f..ec4178e4e7ab 100644
--- a/docs/CommandGuide/llvm-symbolizer.rst
+++ b/docs/CommandGuide/llvm-symbolizer.rst
@@ -56,6 +56,14 @@ EXAMPLE
 
   foo(int)
   /tmp/a.cc:12
+  $cat addr.txt
+  0x40054d
+  $llvm-symbolizer -inlining -print-address -pretty-print -obj=addr.exe < addr.txt
+  0x40054d: inc at /tmp/x.c:3:3
+   (inlined by) main at /tmp/x.c:9:0
+  $llvm-symbolizer -inlining -pretty-print -obj=addr.exe < addr.txt
+  inc at /tmp/x.c:3:3
+   (inlined by) main at /tmp/x.c:9:0
 
 OPTIONS
 -------
@@ -98,6 +106,14 @@ OPTIONS
  location, look for the debug info at the .dSYM path provided via the
  ``-dsym-hint`` flag. This flag can be used multiple times.
 
+.. option:: -print-address
+
+ Print address before the source code location. Defaults to false.
+
+.. option:: -pretty-print
+
+ Print human readable output. If ``-inlining`` is specified, enclosing scope is
+ prefixed by (inlined by). Refer to listed examples.
 
 EXIT STATUS
 -----------
diff --git a/docs/CommandLine.rst b/docs/CommandLine.rst
index 1d85215f2af3..556c302501e2 100644
--- a/docs/CommandLine.rst
+++ b/docs/CommandLine.rst
@@ -1737,6 +1737,7 @@ exported by the ``lib/VMCore/PassManager.cpp`` file.
 .. _dynamically loaded options:
 
 Dynamically adding command line options
+---------------------------------------
 
 .. todo::
 
diff --git a/docs/CompileCudaWithLLVM.rst b/docs/CompileCudaWithLLVM.rst
new file mode 100644
index 000000000000..a981ffe1e8f5
--- /dev/null
+++ b/docs/CompileCudaWithLLVM.rst
@@ -0,0 +1,169 @@
+===================================
+Compiling CUDA C/C++ with LLVM
+===================================
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+This document contains the user guides and the internals of compiling CUDA
+C/C++ with LLVM. It is aimed at both users who want to compile CUDA with LLVM
+and developers who want to improve LLVM for GPUs. This document assumes a basic
+familiarity with CUDA. Information about CUDA programming can be found in the
+`CUDA programming guide
+<http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html>`_.
+
+How to Build LLVM with CUDA Support
+===================================
+
+Below is a quick summary of downloading and building LLVM. Consult the `Getting
+Started <http://llvm.org/docs/GettingStarted.html>`_ page for more details on
+setting up LLVM.
+
+#. Checkout LLVM
+
+   .. code-block:: console
+
+     $ cd where-you-want-llvm-to-live
+     $ svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm
+
+#. Checkout Clang
+
+   .. code-block:: console
+
+     $ cd where-you-want-llvm-to-live
+     $ cd llvm/tools
+     $ svn co http://llvm.org/svn/llvm-project/cfe/trunk clang
+
+#. Configure and build LLVM and Clang
+
+   .. code-block:: console
+
+     $ cd where-you-want-llvm-to-live
+     $ mkdir build
+     $ cd build
+     $ cmake [options] ..
+     $ make
+
+How to Compile CUDA C/C++ with LLVM
+===================================
+
+We assume you have installed the CUDA driver and runtime. Consult the `NVIDIA
+CUDA installation Guide
+<https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html>`_ if
+you have not.
+
+Suppose you want to compile and run the following CUDA program (``axpy.cu``)
+which multiplies a ``float`` array by a ``float`` scalar (AXPY).
+
+.. code-block:: c++
+
+  #include <helper_cuda.h> // for checkCudaErrors
+
+  #include <iostream>
+
+  __global__ void axpy(float a, float* x, float* y) {
+    y[threadIdx.x] = a * x[threadIdx.x];
+  }
+
+  int main(int argc, char* argv[]) {
+    const int kDataLen = 4;
+
+    float a = 2.0f;
+    float host_x[kDataLen] = {1.0f, 2.0f, 3.0f, 4.0f};
+    float host_y[kDataLen];
+
+    // Copy input data to device.
+    float* device_x;
+    float* device_y;
+    checkCudaErrors(cudaMalloc(&device_x, kDataLen * sizeof(float)));
+    checkCudaErrors(cudaMalloc(&device_y, kDataLen * sizeof(float)));
+    checkCudaErrors(cudaMemcpy(device_x, host_x, kDataLen * sizeof(float),
+                               cudaMemcpyHostToDevice));
+
+    // Launch the kernel.
+    axpy<<<1, kDataLen>>>(a, device_x, device_y);
+
+    // Copy output data to host.
+    checkCudaErrors(cudaDeviceSynchronize());
+    checkCudaErrors(cudaMemcpy(host_y, device_y, kDataLen * sizeof(float),
+                               cudaMemcpyDeviceToHost));
+
+    // Print the results.
+    for (int i = 0; i < kDataLen; ++i) {
+      std::cout << "y[" << i << "] = " << host_y[i] << "\n";
+    }
+
+    checkCudaErrors(cudaDeviceReset());
+    return 0;
+  }
+
+The command line for compilation is similar to what you would use for C++.
+
+.. code-block:: console
+
+  $ clang++ -o axpy -I<CUDA install path>/samples/common/inc -L<CUDA install path>/<lib64 or lib> axpy.cu -lcudart_static -lcuda -ldl -lrt -pthread
+  $ ./axpy
+  y[0] = 2
+  y[1] = 4
+  y[2] = 6
+  y[3] = 8
+
+Note that ``helper_cuda.h`` comes from the CUDA samples, so you need the
+samples installed for this example. ``<CUDA install path>`` is the root
+directory where you installed CUDA SDK, typically ``/usr/local/cuda``.
+
+Optimizations
+=============
+
+CPU and GPU have different design philosophies and architectures. For example, a
+typical CPU has branch prediction, out-of-order execution, and is superscalar,
+whereas a typical GPU has none of these. Due to such differences, an
+optimization pipeline well-tuned for CPUs may be not suitable for GPUs.
+
+LLVM performs several general and CUDA-specific optimizations for GPUs. The
+list below shows some of the more important optimizations for GPUs. Most of
+them have been upstreamed to ``lib/Transforms/Scalar`` and
+``lib/Target/NVPTX``. A few of them have not been upstreamed due to lack of a
+customizable target-independent optimization pipeline.
+
+* **Straight-line scalar optimizations**. These optimizations reduce redundancy
+  in straight-line code. Details can be found in the `design document for
+  straight-line scalar optimizations <https://goo.gl/4Rb9As>`_.
+
+* **Inferring memory spaces**. `This optimization
+  <http://www.llvm.org/docs/doxygen/html/NVPTXFavorNonGenericAddrSpaces_8cpp_source.html>`_
+  infers the memory space of an address so that the backend can emit faster
+  special loads and stores from it. Details can be found in the `design
+  document for memory space inference <https://goo.gl/5wH2Ct>`_.
+
+* **Aggressive loop unrooling and function inlining**. Loop unrolling and
+  function inlining need to be more aggressive for GPUs than for CPUs because
+  control flow transfer in GPU is more expensive. They also promote other
+  optimizations such as constant propagation and SROA which sometimes speed up
+  code by over 10x. An empirical inline threshold for GPUs is 1100. This
+  configuration has yet to be upstreamed with a target-specific optimization
+  pipeline. LLVM also provides `loop unrolling pragmas
+  <http://clang.llvm.org/docs/AttributeReference.html#pragma-unroll-pragma-nounroll>`_
+  and ``__attribute__((always_inline))`` for programmers to force unrolling and
+  inling.
+
+* **Aggressive speculative execution**. `This transformation
+  <http://llvm.org/docs/doxygen/html/SpeculativeExecution_8cpp_source.html>`_ is
+  mainly for promoting straight-line scalar optimizations which are most
+  effective on code along dominator paths.
+
+* **Memory-space alias analysis**. `This alias analysis
+  <http://reviews.llvm.org/D12414>`_ infers that two pointers in different
+  special memory spaces do not alias. It has yet to be integrated to the new
+  alias analysis infrastructure; the new infrastructure does not run
+  target-specific alias analysis.
+
+* **Bypassing 64-bit divides**. `An existing optimization
+  <http://llvm.org/docs/doxygen/html/BypassSlowDivision_8cpp_source.html>`_
+  enabled in the NVPTX backend. 64-bit integer divides are much slower than
+  32-bit ones on NVIDIA GPUs due to lack of a divide unit. Many of the 64-bit
+  divides in our benchmarks have a divisor and dividend which fit in 32-bits at
+  runtime. This optimization provides a fast path for this common case.
diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst
index 900ba24e230f..6c3ff4b10f1e 100644
--- a/docs/CompilerWriterInfo.rst
+++ b/docs/CompilerWriterInfo.rst
@@ -22,14 +22,16 @@ ARM
 
 * `ABI Addenda and Errata <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf>`_
 
-* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053c/IHI0053C_acle_2_0.pdf>`_
 
 AArch64
 -------
 
+* `ARMv8 Architecture Reference Manual <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0487a.h/index.html>`_
+
 * `ARMv8 Instruction Set Overview <http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.genc010197a/index.html>`_
 
-* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053a/IHI0053A_acle.pdf>`_
+* `ARM C Language Extensions <http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053c/IHI0053C_acle_2_0.pdf>`_
 
 Itanium (ia64)
 --------------
diff --git a/docs/CoverageMappingFormat.rst b/docs/CoverageMappingFormat.rst
index 8fcffb838a3f..9ac476c88b34 100644
--- a/docs/CoverageMappingFormat.rst
+++ b/docs/CoverageMappingFormat.rst
@@ -405,7 +405,7 @@ LEB128 is an unsigned interger value that is encoded using DWARF's LEB128
 encoding, optimizing for the case where values are small
 (1 byte for values less than 128).
 
-.. _strings:
+.. _Strings:
 
 Strings
 ^^^^^^^
diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst
index 9e458559fbcd..17baf2d27b13 100644
--- a/docs/DeveloperPolicy.rst
+++ b/docs/DeveloperPolicy.rst
@@ -505,8 +505,15 @@ for llvm users and not imposing a big burden on llvm developers:
 * The textual format is not backwards compatible. We don't change it too often,
   but there are no specific promises.
 
-* The bitcode format produced by a X.Y release will be readable by all following
-  X.Z releases and the (X+1).0 release.
+* Additions and changes to the IR should be reflected in
+  ``test/Bitcode/compatibility.ll``.
+
+* The bitcode format produced by a X.Y release will be readable by all
+  following X.Z releases and the (X+1).0 release.
+
+* After each X.Y release, ``compatibility.ll`` must be copied to
+  ``compatibility-X.Y.ll``. The corresponding bitcode file should be assembled
+  using the X.Y build and committed as ``compatibility-X.Y.ll.bc``.
 
 * Newer releases can ignore features from older releases, but they cannot
   miscompile them. For example, if nsw is ever replaced with something else,
@@ -518,6 +525,33 @@ for llvm users and not imposing a big burden on llvm developers:
   it is to drop it. That is not very user friendly and a bit more effort is
   expected, but no promises are made.
 
+C API Changes
+----------------
+
+* Stability Guarantees: The C API is, in general, a "best effort" for stability.
+  This means that we make every attempt to keep the C API stable, but that
+  stability will be limited by the abstractness of the interface and the
+  stability of the C++ API that it wraps. In practice, this means that things
+  like "create debug info" or "create this type of instruction" are likely to be
+  less stable than "take this IR file and JIT it for my current machine".
+
+* Release stability: We won't break the C API on the release branch with patches
+  that go on that branch, with the exception that we will fix an unintentional
+  C API break that will keep the release consistent with both the previous and
+  next release.
+
+* Testing: Patches to the C API are expected to come with tests just like any
+  other patch.
+
+* Including new things into the API: If an LLVM subcomponent has a C API already
+  included, then expanding that C API is acceptable. Adding C API for
+  subcomponents that don't currently have one needs to be discussed on the
+  mailing list for design and maintainability feedback prior to implementation.
+
+* Documentation: Any changes to the C API are required to be documented in the
+  release notes so that it's clear to external users who do not follow the
+  project how the C API is changing and evolving.
+
 .. _copyright-license-patents:
 
 Copyright, License, and Patents
@@ -624,5 +658,5 @@ patent-related trouble with their changes (including from third parties).  If
 you or your employer own the rights to a patent and would like to contribute
 code to LLVM that relies on it, we require that the copyright owner sign an
 agreement that allows any other user of LLVM to freely use your patent.  Please
-contact the `oversight group <mailto:llvm-oversight@cs.uiuc.edu>`_ for more
+contact the `LLVM Foundation Board of Directors <mailto:board@llvm.org>`_ for more
 details.
diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst
index 55ffdb45efe9..74827c02a272 100644
--- a/docs/ExceptionHandling.rst
+++ b/docs/ExceptionHandling.rst
@@ -67,17 +67,10 @@ exception handling is generally preferred to SJLJ.
 Windows Runtime Exception Handling
 -----------------------------------
 
-Windows runtime based exception handling uses the same basic IR structure as
-Itanium ABI based exception handling, but it relies on the personality
-functions provided by the native Windows runtime library, ``__CxxFrameHandler3``
-for C++ exceptions: ``__C_specific_handler`` for 64-bit SEH or 
-``_frame_handler3/4`` for 32-bit SEH.  This results in a very different
-execution model and requires some minor modifications to the initial IR
-representation and a significant restructuring just before code generation.
-
-General information about the Windows x64 exception handling mechanism can be
-found at `MSDN Exception Handling (x64)
-<https://msdn.microsoft.com/en-us/library/1eyas8tf(v=vs.80).aspx>`_.
+LLVM supports handling exceptions produced by the Windows runtime, but it
+requires a very different intermediate representation. It is not based on the
+":ref:`landingpad <i_landingpad>`" instruction like the other two models, and is
+described later in this document under :ref:`wineh`.
 
 Overview
 --------
@@ -169,11 +162,11 @@ pad to the back end. For C++, the ``landingpad`` instruction returns a pointer
 and integer pair corresponding to the pointer to the *exception structure* and
 the *selector value* respectively.
 
-The ``landingpad`` instruction takes a reference to the personality function to
-be used for this ``try``/``catch`` sequence. The remainder of the instruction is
-a list of *cleanup*, *catch*, and *filter* clauses. The exception is tested
-against the clauses sequentially from first to last. The clauses have the
-following meanings:
+The ``landingpad`` instruction looks for a reference to the personality
+function to be used for this ``try``/``catch`` sequence in the parent
+function's attribute list. The instruction contains a list of *cleanup*,
+*catch*, and *filter* clauses. The exception is tested against the clauses
+sequentially from first to last. The clauses have the following meanings:
 
 -  ``catch <type> @ExcType``
 
@@ -321,97 +314,6 @@ the selector results they understand and then resume exception propagation with
 the `resume instruction <LangRef.html#i_resume>`_ if none of the conditions
 match.
 
-C++ Exception Handling using the Windows Runtime
-=================================================
-
-(Note: Windows C++ exception handling support is a work in progress and is
- not yet fully implemented.  The text below describes how it will work
- when completed.)
-
-The Windows runtime function for C++ exception handling uses a multi-phase
-approach.  When an exception occurs it searches the current callstack for a
-frame that has a handler for the exception.  If a handler is found, it then
-calls the cleanup handler for each frame above the handler which has a
-cleanup handler before calling the catch handler.  These calls are all made
-from a stack context different from the original frame in which the handler
-is defined.  Therefore, it is necessary to outline these handlers from their
-original context before code generation.
-
-Catch handlers are called with a pointer to the handler itself as the first
-argument and a pointer to the parent function's stack frame as the second
-argument.  The catch handler uses the `llvm.localrecover
-<LangRef.html#llvm-localescape-and-llvm-localrecover-intrinsics>`_ to get a
-pointer to a frame allocation block that is created in the parent frame using
-the `llvm.localescape
-<LangRef.html#llvm-localescape-and-llvm-localrecover-intrinsics>`_ intrinsic.
-The ``WinEHPrepare`` pass will have created a structure definition for the
-contents of this block.  The first two members of the structure will always be
-(1) a 32-bit integer that the runtime uses to track the exception state of the
-parent frame for the purposes of handling chained exceptions and (2) a pointer
-to the object associated with the exception (roughly, the parameter of the
-catch clause). These two members will be followed by any frame variables from
-the parent function which must be accessed in any of the functions unwind or
-catch handlers.  The catch handler returns the address at which execution
-should continue.
-
-Cleanup handlers perform any cleanup necessary as the frame goes out of scope,
-such as calling object destructors.  The runtime handles the actual unwinding
-of the stack.  If an exception occurs in a cleanup handler the runtime manages
-termination of the process. Cleanup handlers are called with the same arguments
-as catch handlers (a pointer to the handler and a pointer to the parent stack
-frame) and use the same mechanism described above to access frame variables
-in the parent function.  Cleanup handlers do not return a value.
-
-The IR generated for Windows runtime based C++ exception handling is initially
-very similar to the ``landingpad`` mechanism described above.  Calls to
-libc++abi functions (such as ``__cxa_begin_catch``/``__cxa_end_catch`` and
-``__cxa_throw_exception`` are replaced with calls to intrinsics or Windows
-runtime functions (such as ``llvm.eh.begincatch``/``llvm.eh.endcatch`` and
-``__CxxThrowException``).
-
-During the WinEHPrepare pass, the handler functions are outlined into handler
-functions and the original landing pad code is replaced with a call to the
-``llvm.eh.actions`` intrinsic that describes the order in which handlers will
-be processed from the logical location of the landing pad and an indirect
-branch to the return value of the ``llvm.eh.actions`` intrinsic. The
-``llvm.eh.actions`` intrinsic is defined as returning the address at which
-execution will continue.  This is a temporary construct which will be removed
-before code generation, but it allows for the accurate tracking of control
-flow until then.
-
-A typical landing pad will look like this after outlining:
-
-.. code-block:: llvm
-
-    lpad:
-      %vals = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
-	      cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-          catch i8* bitcast (i8** @_ZTIf to i8*)
-      %recover = call i8* (...)* @llvm.eh.actions(
-          i32 3, i8* bitcast (i8** @_ZTIi to i8*), i8* (i8*, i8*)* @_Z4testb.catch.1)
-          i32 2, i8* null, void (i8*, i8*)* @_Z4testb.cleanup.1)
-          i32 1, i8* bitcast (i8** @_ZTIf to i8*), i8* (i8*, i8*)* @_Z4testb.catch.0)
-          i32 0, i8* null, void (i8*, i8*)* @_Z4testb.cleanup.0)
-      indirectbr i8* %recover, [label %try.cont1, label %try.cont2]
-
-In this example, the landing pad represents an exception handling context with
-two catch handlers and a cleanup handler that have been outlined.  If an
-exception is thrown with a type that matches ``_ZTIi``, the ``_Z4testb.catch.1``
-handler will be called an no clean-up is needed.  If an exception is thrown
-with a type that matches ``_ZTIf``, first the ``_Z4testb.cleanup.1`` handler
-will be called to perform unwind-related cleanup, then the ``_Z4testb.catch.1``
-handler will be called.  If an exception is throw which does not match either
-of these types and the exception is handled by another frame further up the
-call stack, first the ``_Z4testb.cleanup.1`` handler will be called, then the
-``_Z4testb.cleanup.0`` handler (which corresponds to a different scope) will be
-called, and exception handling will continue at the next frame in the call
-stack will be called.  One of the catch handlers will return the address of
-``%try.cont1`` in the parent function and the other will return the address of
-``%try.cont2``, meaning that execution continues at one of those blocks after
-an exception is caught.
-
-
 Exception Handling Intrinsics
 =============================
 
@@ -498,50 +400,19 @@ When used in the native Windows C++ exception handling implementation, this
 intrinsic serves as a placeholder to delimit code before a catch handler is
 outlined.  After the handler is outlined, this intrinsic is simply removed.
 
-.. _llvm.eh.actions:
 
-``llvm.eh.actions``
-----------------------
+.. _llvm.eh.exceptionpointer:
+
+``llvm.eh.exceptionpointer``
+----------------------------
 
 .. code-block:: llvm
 
-  void @llvm.eh.actions()
+  i8 addrspace(N)* @llvm.eh.padparam.pNi8(token %catchpad)
 
-This intrinsic represents the list of actions to take when an exception is
-thrown. It is typically used by Windows exception handling schemes where cleanup
-outlining is required by the runtime. The arguments are a sequence of ``i32``
-sentinels indicating the action type followed by some pre-determined number of
-arguments required to implement that action.
 
-A code of ``i32 0`` indicates a cleanup action, which expects one additional
-argument. The argument is a pointer to a function that implements the cleanup
-action.
-
-A code of ``i32 1`` indicates a catch action, which expects three additional
-arguments. Different EH schemes give different meanings to the three arguments,
-but the first argument indicates whether the catch should fire, the second is
-the localescape index of the exception object, and the third is the code to run
-to catch the exception.
-
-For Windows C++ exception handling, the first argument for a catch handler is a
-pointer to the RTTI type descriptor for the object to catch. The second
-argument is an index into the argument list of the ``llvm.localescape`` call in
-the main function. The exception object will be copied into the provided stack
-object. If the exception object is not required, this argument should be -1.
-The third argument is a pointer to a function implementing the catch.  This
-function returns the address of the basic block where execution should resume
-after handling the exception.
-
-For Windows SEH, the first argument is a pointer to the filter function, which
-indicates if the exception should be caught or not.  The second argument is
-typically negative one. The third argument is the address of a basic block
-where the exception will be handled. In other words, catch handlers are not
-outlined in SEH. After running cleanups, execution immediately resumes at this
-PC.
-
-In order to preserve the structure of the CFG, a call to '``llvm.eh.actions``'
-must be followed by an ':ref:`indirectbr <i_indirectbr>`' instruction that
-jumps to the result of the intrinsic call.
+This intrinsic retrieves a pointer to the exception caught by the given
+``catchpad``.
 
 
 SJLJ Intrinsics
@@ -628,10 +499,279 @@ an exception handling frame for each function in a compile unit, plus a common
 exception handling frame that defines information common to all functions in the
 unit.
 
+The format of this call frame information (CFI) is often platform-dependent,
+however. ARM, for example, defines their own format. Apple has their own compact
+unwind info format.  On Windows, another format is used for all architectures
+since 32-bit x86.  LLVM will emit whatever information is required by the
+target.
+
 Exception Tables
 ----------------
 
 An exception table contains information about what actions to take when an
-exception is thrown in a particular part of a function's code. There is one
-exception table per function, except leaf functions and functions that have
-calls only to non-throwing functions. They do not need an exception table.
+exception is thrown in a particular part of a function's code. This is typically
+referred to as the language-specific data area (LSDA). The format of the LSDA
+table is specific to the personality function, but the majority of personalities
+out there use a variation of the tables consumed by ``__gxx_personality_v0``.
+There is one exception table per function, except leaf functions and functions
+that have calls only to non-throwing functions. They do not need an exception
+table.
+
+.. _wineh:
+
+Exception Handling using the Windows Runtime
+=================================================
+
+Background on Windows exceptions
+---------------------------------
+
+Interacting with exceptions on Windows is significantly more complicated than
+on Itanium C++ ABI platforms. The fundamental difference between the two models
+is that Itanium EH is designed around the idea of "successive unwinding," while
+Windows EH is not.
+
+Under Itanium, throwing an exception typically involes allocating thread local
+memory to hold the exception, and calling into the EH runtime. The runtime
+identifies frames with appropriate exception handling actions, and successively
+resets the register context of the current thread to the most recently active
+frame with actions to run. In LLVM, execution resumes at a ``landingpad``
+instruction, which produces register values provided by the runtime. If a
+function is only cleaning up allocated resources, the function is responsible
+for calling ``_Unwind_Resume`` to transition to the next most recently active
+frame after it is finished cleaning up. Eventually, the frame responsible for
+handling the exception calls ``__cxa_end_catch`` to destroy the exception,
+release its memory, and resume normal control flow.
+
+The Windows EH model does not use these successive register context resets.
+Instead, the active exception is typically described by a frame on the stack.
+In the case of C++ exceptions, the exception object is allocated in stack memory
+and its address is passed to ``__CxxThrowException``. General purpose structured
+exceptions (SEH) are more analogous to Linux signals, and they are dispatched by
+userspace DLLs provided with Windows. Each frame on the stack has an assigned EH
+personality routine, which decides what actions to take to handle the exception.
+There are a few major personalities for C and C++ code: the C++ personality
+(``__CxxFrameHandler3``) and the SEH personalities (``_except_handler3``,
+``_except_handler4``, and ``__C_specific_handler``). All of them implement
+cleanups by calling back into a "funclet" contained in the parent function.
+
+Funclets, in this context, are regions of the parent function that can be called
+as though they were a function pointer with a very special calling convention.
+The frame pointer of the parent frame is passed into the funclet either using
+the standard EBP register or as the first parameter register, depending on the
+architecture. The funclet implements the EH action by accessing local variables
+in memory through the frame pointer, and returning some appropriate value,
+continuing the EH process.  No variables live in to or out of the funclet can be
+allocated in registers.
+
+The C++ personality also uses funclets to contain the code for catch blocks
+(i.e. all user code between the braces in ``catch (Type obj) { ... }``). The
+runtime must use funclets for catch bodies because the C++ exception object is
+allocated in a child stack frame of the function handling the exception. If the
+runtime rewound the stack back to frame of the catch, the memory holding the
+exception would be overwritten quickly by subsequent function calls.  The use of
+funclets also allows ``__CxxFrameHandler3`` to implement rethrow without
+resorting to TLS. Instead, the runtime throws a special exception, and then uses
+SEH (``__try / __except``) to resume execution with new information in the child
+frame.
+
+In other words, the successive unwinding approach is incompatible with Visual
+C++ exceptions and general purpose Windows exception handling. Because the C++
+exception object lives in stack memory, LLVM cannot provide a custom personality
+function that uses landingpads.  Similarly, SEH does not provide any mechanism
+to rethrow an exception or continue unwinding.  Therefore, LLVM must use the IR
+constructs described later in this document to implement compatible exception
+handling.
+
+SEH filter expressions
+-----------------------
+
+The SEH personality functions also use funclets to implement filter expressions,
+which allow executing arbitrary user code to decide which exceptions to catch.
+Filter expressions should not be confused with the ``filter`` clause of the LLVM
+``landingpad`` instruction.  Typically filter expressions are used to determine
+if the exception came from a particular DLL or code region, or if code faulted
+while accessing a particular memory address range. LLVM does not currently have
+IR to represent filter expressions because it is difficult to represent their
+control dependencies.  Filter expressions run during the first phase of EH,
+before cleanups run, making it very difficult to build a faithful control flow
+graph.  For now, the new EH instructions cannot represent SEH filter
+expressions, and frontends must outline them ahead of time. Local variables of
+the parent function can be escaped and accessed using the ``llvm.localescape``
+and ``llvm.localrecover`` intrinsics.
+
+New exception handling instructions
+------------------------------------
+
+The primary design goal of the new EH instructions is to support funclet
+generation while preserving information about the CFG so that SSA formation
+still works.  As a secondary goal, they are designed to be generic across MSVC
+and Itanium C++ exceptions. They make very few assumptions about the data
+required by the personality, so long as it uses the familiar core EH actions:
+catch, cleanup, and terminate.  However, the new instructions are hard to modify
+without knowing details of the EH personality. While they can be used to
+represent Itanium EH, the landingpad model is strictly better for optimization
+purposes.
+
+The following new instructions are considered "exception handling pads", in that
+they must be the first non-phi instruction of a basic block that may be the
+unwind destination of an EH flow edge:
+``catchswitch``, ``catchpad``, and ``cleanuppad``.
+As with landingpads, when entering a try scope, if the
+frontend encounters a call site that may throw an exception, it should emit an
+invoke that unwinds to a ``catchswitch`` block. Similarly, inside the scope of a
+C++ object with a destructor, invokes should unwind to a ``cleanuppad``.
+
+New instructions are also used to mark the points where control is transferred
+out of a catch/cleanup handler (which will correspond to exits from the
+generated funclet).  A catch handler which reaches its end by normal execution
+executes a ``catchret`` instruction, which is a terminator indicating where in
+the function control is returned to.  A cleanup handler which reaches its end
+by normal execution executes a ``cleanupret`` instruction, which is a terminator
+indicating where the active exception will unwind to next.
+
+Each of these new EH pad instructions has a way to identify which action should
+be considered after this action. The ``catchswitch`` instruction is a terminator
+and has an unwind destination operand analogous to the unwind destination of an
+invoke.  The ``cleanuppad`` instruction is not
+a terminator, so the unwind destination is stored on the ``cleanupret``
+instruction instead. Successfully executing a catch handler should resume
+normal control flow, so neither ``catchpad`` nor ``catchret`` instructions can
+unwind. All of these "unwind edges" may refer to a basic block that contains an
+EH pad instruction, or they may unwind to the caller.  Unwinding to the caller
+has roughly the same semantics as the ``resume`` instruction in the landingpad
+model. When inlining through an invoke, instructions that unwind to the caller
+are hooked up to unwind to the unwind destination of the call site.
+
+Putting things together, here is a hypothetical lowering of some C++ that uses
+all of the new IR instructions:
+
+.. code-block:: c
+
+  struct Cleanup {
+    Cleanup();
+    ~Cleanup();
+    int m;
+  };
+  void may_throw();
+  int f() noexcept {
+    try {
+      Cleanup obj;
+      may_throw();
+    } catch (int e) {
+      may_throw();
+      return e;
+    }
+    return 0;
+  }
+
+.. code-block:: llvm
+
+  define i32 @f() nounwind personality i32 (...)* @__CxxFrameHandler3 {
+  entry:
+    %obj = alloca %struct.Cleanup, align 4
+    %e = alloca i32, align 4
+    %call = invoke %struct.Cleanup* @"\01??0Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj)
+            to label %invoke.cont unwind label %lpad.catch
+
+  invoke.cont:                                      ; preds = %entry
+    invoke void @"\01?may_throw@@YAXXZ"()
+            to label %invoke.cont.2 unwind label %lpad.cleanup
+
+  invoke.cont.2:                                    ; preds = %invoke.cont
+    call void @"\01??_DCleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind
+    br label %return
+
+  return:                                           ; preds = %invoke.cont.3, %invoke.cont.2
+    %retval.0 = phi i32 [ 0, %invoke.cont.2 ], [ %3, %invoke.cont.3 ]
+    ret i32 %retval.0
+
+  lpad.cleanup:                                     ; preds = %invoke.cont.2
+    %0 = cleanuppad within none []
+    call void @"\01??1Cleanup@@QEAA@XZ"(%struct.Cleanup* nonnull %obj) nounwind
+    cleanupret %0 unwind label %lpad.catch
+
+  lpad.catch:                                       ; preds = %lpad.cleanup, %entry
+    %1 = catchswitch within none [label %catch.body] unwind label %lpad.terminate
+
+  catch.body:                                       ; preds = %lpad.catch
+    %catch = catchpad within %1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %e]
+    invoke void @"\01?may_throw@@YAXXZ"()
+            to label %invoke.cont.3 unwind label %lpad.terminate
+
+  invoke.cont.3:                                    ; preds = %catch.body
+    %3 = load i32, i32* %e, align 4
+    catchret from %catch to label %return
+
+  lpad.terminate:                                   ; preds = %catch.body, %lpad.catch
+    cleanuppad within none []
+    call void @"\01?terminate@@YAXXZ"
+    unreachable
+  }
+
+Funclet parent tokens
+-----------------------
+
+In order to produce tables for EH personalities that use funclets, it is
+necessary to recover the nesting that was present in the source. This funclet
+parent relationship is encoded in the IR using tokens produced by the new "pad"
+instructions. The token operand of a "pad" or "ret" instruction indicates which
+funclet it is in, or "none" if it is not nested within another funclet.
+
+The ``catchpad`` and ``cleanuppad`` instructions establish new funclets, and
+their tokens are consumed by other "pad" instructions to establish membership.
+The ``catchswitch`` instruction does not create a funclet, but it produces a
+token that is always consumed by its immediate successor ``catchpad``
+instructions. This ensures that every catch handler modelled by a ``catchpad``
+belongs to exactly one ``catchswitch``, which models the dispatch point after a
+C++ try.
+
+Here is an example of what this nesting looks like using some hypothetical
+C++ code:
+
+.. code-block:: c
+
+  void f() {
+    try {
+      throw;
+    } catch (...) {
+      try {
+        throw;
+      } catch (...) {
+      }
+    }
+  }
+
+.. code-block:: llvm
+
+  define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+  entry:
+    invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+            to label %unreachable unwind label %catch.dispatch
+
+  catch.dispatch:                                   ; preds = %entry
+    %0 = catchswitch within none [label %catch] unwind to caller
+
+  catch:                                            ; preds = %catch.dispatch
+    %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+    invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+            to label %unreachable unwind label %catch.dispatch2
+
+  catch.dispatch2:                                  ; preds = %catch
+    %2 = catchswitch within %1 [label %catch3] unwind to caller
+
+  catch3:                                           ; preds = %catch.dispatch2
+    %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+    catchret from %3 to label %try.cont
+
+  try.cont:                                         ; preds = %catch3
+    catchret from %1 to label %try.cont6
+
+  try.cont6:                                        ; preds = %try.cont
+    ret void
+
+  unreachable:                                      ; preds = %catch, %entry
+    unreachable
+  }
+
+The "inner" ``catchswitch`` consumes ``%1`` which is produced by the outer
+catchswitch.
diff --git a/docs/ExtendingLLVM.rst b/docs/ExtendingLLVM.rst
index 3fd54c8360e5..87f48c993425 100644
--- a/docs/ExtendingLLVM.rst
+++ b/docs/ExtendingLLVM.rst
@@ -49,9 +49,9 @@ function and then be turned into an instruction if warranted.
 
    Add an entry for your intrinsic.  Describe its memory access characteristics
    for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note
-   that any intrinsic using the ``llvm_int_ty`` type for an argument will
-   be deemed by ``tblgen`` as overloaded and the corresponding suffix will
-   be required on the intrinsic's name.
+   that any intrinsic using one of the ``llvm_any*_ty`` types for an argument or
+   return type will be deemed by ``tblgen`` as overloaded and the corresponding
+   suffix will be required on the intrinsic's name.
 
 #. ``llvm/lib/Analysis/ConstantFolding.cpp``:
 
diff --git a/docs/Frontend/PerformanceTips.rst b/docs/Frontend/PerformanceTips.rst
index 8d0abcd1c172..142d262eb657 100644
--- a/docs/Frontend/PerformanceTips.rst
+++ b/docs/Frontend/PerformanceTips.rst
@@ -11,12 +11,60 @@ Abstract
 
 The intended audience of this document is developers of language frontends 
 targeting LLVM IR. This document is home to a collection of tips on how to 
-generate IR that optimizes well.  As with any optimizer, LLVM has its strengths
-and weaknesses.  In some cases, surprisingly small changes in the source IR 
-can have a large effect on the generated code.  
+generate IR that optimizes well.  
+
+IR Best Practices
+=================
+
+As with any optimizer, LLVM has its strengths and weaknesses.  In some cases, 
+surprisingly small changes in the source IR can have a large effect on the 
+generated code.  
+
+Beyond the specific items on the list below, it's worth noting that the most 
+mature frontend for LLVM is Clang.  As a result, the further your IR gets from what Clang might emit, the less likely it is to be effectively optimized.  It 
+can often be useful to write a quick C program with the semantics you're trying
+to model and see what decisions Clang's IRGen makes about what IR to emit.  
+Studying Clang's CodeGen directory can also be a good source of ideas.  Note 
+that Clang and LLVM are explicitly version locked so you'll need to make sure 
+you're using a Clang built from the same svn revision or release as the LLVM 
+library you're using.  As always, it's *strongly* recommended that you track 
+tip of tree development, particularly during bring up of a new project.
+
+The Basics
+^^^^^^^^^^^
+
+#. Make sure that your Modules contain both a data layout specification and 
+   target triple. Without these pieces, non of the target specific optimization
+   will be enabled.  This can have a major effect on the generated code quality.
+
+#. For each function or global emitted, use the most private linkage type
+   possible (private, internal or linkonce_odr preferably).  Doing so will 
+   make LLVM's inter-procedural optimizations much more effective.
+
+#. Avoid high in-degree basic blocks (e.g. basic blocks with dozens or hundreds
+   of predecessors).  Among other issues, the register allocator is known to 
+   perform badly with confronted with such structures.  The only exception to 
+   this guidance is that a unified return block with high in-degree is fine.
+
+Use of allocas
+^^^^^^^^^^^^^^
+
+An alloca instruction can be used to represent a function scoped stack slot, 
+but can also represent dynamic frame expansion.  When representing function 
+scoped variables or locations, placing alloca instructions at the beginning of 
+the entry block should be preferred.   In particular, place them before any 
+call instructions. Call instructions might get inlined and replaced with 
+multiple basic blocks. The end result is that a following alloca instruction 
+would no longer be in the entry basic block afterward.
+
+The SROA (Scalar Replacement Of Aggregates) and Mem2Reg passes only attempt
+to eliminate alloca instructions that are in the entry basic block.  Given 
+SSA is the canonical form expected by much of the optimizer; if allocas can 
+not be eliminated by Mem2Reg or SROA, the optimizer is likely to be less 
+effective than it could be.
 
 Avoid loads and stores of large aggregate type
-================================================
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 LLVM currently does not optimize well loads and stores of large :ref:`aggregate
 types <t_aggregate>` (i.e. structs and arrays).  As an alternative, consider 
@@ -27,7 +75,7 @@ instruction supported by the targeted hardware are well supported.  These can
 be an effective way to represent collections of small packed fields.  
 
 Prefer zext over sext when legal
-==================================
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 On some architectures (X86_64 is one), sign extension can involve an extra 
 instruction whereas zero extension can be folded into a load.  LLVM will try to
@@ -39,7 +87,7 @@ Alternatively, you can :ref:`specify the range of the value using metadata
 <range-metadata>` and LLVM can do the sext to zext conversion for you.
 
 Zext GEP indices to machine register width
-============================================
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Internally, LLVM often promotes the width of GEP indices to machine register
 width.  When it does so, it will default to using sign extension (sext) 
@@ -47,47 +95,37 @@ operations for safety.  If your source language provides information about
 the range of the index, you may wish to manually extend indices to machine 
 register width using a zext instruction.
 
-Other things to consider
-=========================
+When to specify alignment
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+LLVM will always generate correct code if you don’t specify alignment, but may
+generate inefficient code.  For example, if you are targeting MIPS (or older 
+ARM ISAs) then the hardware does not handle unaligned loads and stores, and 
+so you will enter a trap-and-emulate path if you do a load or store with 
+lower-than-natural alignment.  To avoid this, LLVM will emit a slower 
+sequence of loads, shifts and masks (or load-right + load-left on MIPS) for 
+all cases where the load / store does not have a sufficiently high alignment 
+in the IR.
 
-#. Make sure that a DataLayout is provided (this will likely become required in
-   the near future, but is certainly important for optimization).
+The alignment is used to guarantee the alignment on allocas and globals, 
+though in most cases this is unnecessary (most targets have a sufficiently 
+high default alignment that they’ll be fine).  It is also used to provide a 
+contract to the back end saying ‘either this load/store has this alignment, or
+it is undefined behavior’.  This means that the back end is free to emit 
+instructions that rely on that alignment (and mid-level optimizers are free to 
+perform transforms that require that alignment).  For x86, it doesn’t make 
+much difference, as almost all instructions are alignment-independent.  For 
+MIPS, it can make a big difference.
 
-#. Add nsw/nuw flags as appropriate.  Reasoning about overflow is 
-   generally hard for an optimizer so providing these facts from the frontend 
-   can be very impactful.  
+Note that if your loads and stores are atomic, the backend will be unable to 
+lower an under aligned access into a sequence of natively aligned accesses.  
+As a result, alignment is mandatory for atomic loads and stores.
 
-#. Use fast-math flags on floating point operations if legal.  If you don't 
-   need strict IEEE floating point semantics, there are a number of additional 
-   optimizations that can be performed.  This can be highly impactful for 
-   floating point intensive computations.
-
-#. Use inbounds on geps.  This can help to disambiguate some aliasing queries.
-
-#. Add noalias/align/dereferenceable/nonnull to function arguments and return 
-   values as appropriate
-
-#. Mark functions as readnone/readonly or noreturn/nounwind when known.  The 
-   optimizer will try to infer these flags, but may not always be able to.  
-   Manual annotations are particularly important for external functions that 
-   the optimizer can not analyze.
+Other Things to Consider
+^^^^^^^^^^^^^^^^^^^^^^^^
 
 #. Use ptrtoint/inttoptr sparingly (they interfere with pointer aliasing 
    analysis), prefer GEPs
 
-#. Use the lifetime.start/lifetime.end and invariant.start/invariant.end 
-   intrinsics where possible.  Common profitable uses are for stack like data 
-   structures (thus allowing dead store elimination) and for describing 
-   life times of allocas (thus allowing smaller stack sizes).  
-
-#. Use pointer aliasing metadata, especially tbaa metadata, to communicate 
-   otherwise-non-deducible pointer aliasing facts
-
-#. Use the "most-private" possible linkage types for the functions being defined
-   (private, internal or linkonce_odr preferably)
-
-#. Mark invariant locations using !invariant.load and TBAA's constant flags
-
 #. Prefer globals over inttoptr of a constant address - this gives you 
    dereferencability information.  In MCJIT, use getSymbolAddress to provide 
    actual address.
@@ -104,15 +142,6 @@ Other things to consider
    desired.  This is generally not required because the optimizer will convert
    an invoke with an unreachable unwind destination to a call instruction.
 
-#. If you language uses range checks, consider using the IRCE pass.  It is not 
-   currently part of the standard pass order.
-
-#. For languages with numerous rarely executed guard conditions (e.g. null 
-   checks, type checks, range checks) consider adding an extra execution or 
-   two of LoopUnswith and LICM to your pass order.  The standard pass order, 
-   which is tuned for C and C++ applications, may not be sufficient to remove 
-   all dischargeable checks from loops.
-
 #. Use profile metadata to indicate statically known cold paths, even if 
    dynamic profiling information is not available.  This can make a large 
    difference in code placement and thus the performance of tight loops.
@@ -136,11 +165,6 @@ Other things to consider
    improvement.  Note that this is not always profitable and does involve a 
    potentially large increase in code size.
 
-#. Avoid high in-degree basic blocks (e.g. basic blocks with dozens or hundreds
-   of predecessors).  Among other issues, the register allocator is known to 
-   perform badly with confronted with such structures.  The only exception to 
-   this guidance is that a unified return block with high in-degree is fine.
-
 #. When checking a value against a constant, emit the check using a consistent
    comparison type.  The GVN pass *will* optimize redundant equalities even if
    the type of comparison is inverted, but GVN only runs late in the pipeline.
@@ -164,10 +188,99 @@ Other things to consider
    time and optimization effectiveness.  The former is fixable with enough 
    effort, but the later is fairly fundamental to their designed purpose.
 
-p.s. If you want to help improve this document, patches expanding any of the 
-above items into standalone sections of their own with a more complete 
-discussion would be very welcome.  
 
+Describing Language Specific Properties
+=======================================
+
+When translating a source language to LLVM, finding ways to express concepts 
+and guarantees available in your source language which are not natively 
+provided by LLVM IR will greatly improve LLVM's ability to optimize your code. 
+As an example, C/C++'s ability to mark every add as "no signed wrap (nsw)" goes
+a long way to assisting the optimizer in reasoning about loop induction 
+variables and thus generating more optimal code for loops.  
+
+The LLVM LangRef includes a number of mechanisms for annotating the IR with 
+additional semantic information.  It is *strongly* recommended that you become 
+highly familiar with this document.  The list below is intended to highlight a 
+couple of items of particular interest, but is by no means exhaustive.
+
+Restricted Operation Semantics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#. Add nsw/nuw flags as appropriate.  Reasoning about overflow is 
+   generally hard for an optimizer so providing these facts from the frontend 
+   can be very impactful.  
+
+#. Use fast-math flags on floating point operations if legal.  If you don't 
+   need strict IEEE floating point semantics, there are a number of additional 
+   optimizations that can be performed.  This can be highly impactful for 
+   floating point intensive computations.
+
+Describing Aliasing Properties
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. Add noalias/align/dereferenceable/nonnull to function arguments and return 
+   values as appropriate
+
+#. Use pointer aliasing metadata, especially tbaa metadata, to communicate 
+   otherwise-non-deducible pointer aliasing facts
+
+#. Use inbounds on geps.  This can help to disambiguate some aliasing queries.
+
+
+Modeling Memory Effects
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+#. Mark functions as readnone/readonly/argmemonly or noreturn/nounwind when
+   known.  The optimizer will try to infer these flags, but may not always be
+   able to.  Manual annotations are particularly important for external 
+   functions that the optimizer can not analyze.
+
+#. Use the lifetime.start/lifetime.end and invariant.start/invariant.end 
+   intrinsics where possible.  Common profitable uses are for stack like data 
+   structures (thus allowing dead store elimination) and for describing 
+   life times of allocas (thus allowing smaller stack sizes).  
+
+#. Mark invariant locations using !invariant.load and TBAA's constant flags
+
+Pass Ordering
+^^^^^^^^^^^^^
+
+One of the most common mistakes made by new language frontend projects is to 
+use the existing -O2 or -O3 pass pipelines as is.  These pass pipelines make a
+good starting point for an optimizing compiler for any language, but they have 
+been carefully tuned for C and C++, not your target language.  You will almost 
+certainly need to use a custom pass order to achieve optimal performance.  A 
+couple specific suggestions:
+
+#. For languages with numerous rarely executed guard conditions (e.g. null 
+   checks, type checks, range checks) consider adding an extra execution or 
+   two of LoopUnswith and LICM to your pass order.  The standard pass order, 
+   which is tuned for C and C++ applications, may not be sufficient to remove 
+   all dischargeable checks from loops.
+
+#. If you language uses range checks, consider using the IRCE pass.  It is not 
+   currently part of the standard pass order.
+
+#. A useful sanity check to run is to run your optimized IR back through the 
+   -O2 pipeline again.  If you see noticeable improvement in the resulting IR, 
+   you likely need to adjust your pass order.
+
+
+I Still Can't Find What I'm Looking For
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If you didn't find what you were looking for above, consider proposing an piece
+of metadata which provides the optimization hint you need.  Such extensions are
+relatively common and are generally well received by the community.  You will 
+need to ensure that your proposal is sufficiently general so that it benefits 
+others if you wish to contribute it upstream.
+
+You should also consider describing the problem you're facing on `llvm-dev 
+<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ and asking for advice.  
+It's entirely possible someone has encountered your problem before and can 
+give good advice.  If there are multiple interested parties, that also 
+increases the chances that a metadata extension would be well received by the
+community as a whole.  
 
 Adding to this document
 =======================
diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst
index df6bd7bc6ba8..2585ce135ba6 100644
--- a/docs/GettingStarted.rst
+++ b/docs/GettingStarted.rst
@@ -1,5 +1,5 @@
 ====================================
-Getting Started with the LLVM System  
+Getting Started with the LLVM System
 ====================================
 
 .. contents::
@@ -49,12 +49,25 @@ Here's the short story for getting up and running quickly with LLVM:
    * ``cd llvm/tools``
    * ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang``
 
-#. Checkout Compiler-RT:
+#. Checkout Compiler-RT (required to build the sanitizers):
 
    * ``cd where-you-want-llvm-to-live``
    * ``cd llvm/projects``
    * ``svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt``
 
+#. Checkout Libomp (required for OpenMP support):
+
+   * ``cd where-you-want-llvm-to-live``
+   * ``cd llvm/projects``
+   * ``svn co http://llvm.org/svn/llvm-project/openmp/trunk openmp``
+
+#. Checkout libcxx and libcxxabi **[Optional]**:
+
+   * ``cd where-you-want-llvm-to-live``
+   * ``cd llvm/projects``
+   * ``svn co http://llvm.org/svn/llvm-project/libcxx/trunk libcxx``
+   * ``svn co http://llvm.org/svn/llvm-project/libcxxabi/trunk libcxxabi``
+
 #. Get the Test Suite Source Code **[Optional]**
 
    * ``cd where-you-want-llvm-to-live``
@@ -62,7 +75,7 @@ Here's the short story for getting up and running quickly with LLVM:
    * ``svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite``
 
 #. Configure and build LLVM and Clang:
-   
+
    The usual build uses `CMake <CMake.html>`_. If you would rather use
    autotools, see `Building LLVM with autotools <BuildingLLVMWithAutotools.html>`_.
 
@@ -70,16 +83,16 @@ Here's the short story for getting up and running quickly with LLVM:
    * ``mkdir build``
    * ``cd build``
    * ``cmake -G <generator> [options] <path to llvm sources>``
-     
+
      Some common generators are:
 
      * ``Unix Makefiles`` --- for generating make-compatible parallel makefiles.
      * ``Ninja`` --- for generating `Ninja <http://martine.github.io/ninja/>`
-        build files.
+        build files. Most llvm developers use Ninja.
      * ``Visual Studio`` --- for generating Visual Studio projects and
         solutions.
      * ``Xcode`` --- for generating Xcode projects.
-     
+
      Some Common options:
 
      * ``-DCMAKE_INSTALL_PREFIX=directory`` --- Specify for *directory* the full
@@ -125,20 +138,20 @@ Hardware
 LLVM is known to work on the following host platforms:
 
 ================== ===================== =============
-OS                 Arch                  Compilers               
+OS                 Arch                  Compilers
 ================== ===================== =============
-Linux              x86\ :sup:`1`         GCC, Clang              
-Linux              amd64                 GCC, Clang              
-Linux              ARM\ :sup:`4`         GCC, Clang              
-Linux              PowerPC               GCC, Clang              
-Solaris            V9 (Ultrasparc)       GCC                     
-FreeBSD            x86\ :sup:`1`         GCC, Clang              
-FreeBSD            amd64                 GCC, Clang              
-MacOS X\ :sup:`2`  PowerPC               GCC                     
-MacOS X            x86                   GCC, Clang              
-Cygwin/Win32       x86\ :sup:`1, 3`      GCC                     
-Windows            x86\ :sup:`1`         Visual Studio           
-Windows x64        x86-64                Visual Studio           
+Linux              x86\ :sup:`1`         GCC, Clang
+Linux              amd64                 GCC, Clang
+Linux              ARM\ :sup:`4`         GCC, Clang
+Linux              PowerPC               GCC, Clang
+Solaris            V9 (Ultrasparc)       GCC
+FreeBSD            x86\ :sup:`1`         GCC, Clang
+FreeBSD            amd64                 GCC, Clang
+MacOS X\ :sup:`2`  PowerPC               GCC
+MacOS X            x86                   GCC, Clang
+Cygwin/Win32       x86\ :sup:`1, 3`      GCC
+Windows            x86\ :sup:`1`         Visual Studio
+Windows x64        x86-64                Visual Studio
 ================== ===================== =============
 
 .. note::
@@ -207,14 +220,14 @@ Unix utilities. Specifically:
 * **chmod** --- change permissions on a file
 * **cat** --- output concatenation utility
 * **cp** --- copy files
-* **date** --- print the current date/time 
+* **date** --- print the current date/time
 * **echo** --- print to standard output
 * **egrep** --- extended regular expression search utility
 * **find** --- find files/dirs in a file system
 * **grep** --- regular expression search utility
 * **gzip** --- gzip command for distribution generation
 * **gunzip** --- gunzip command for distribution checking
-* **install** --- install directories/files 
+* **install** --- install directories/files
 * **mkdir** --- create a directory
 * **mv** --- move (rename) files
 * **ranlib** --- symbol table builder for archive libraries
@@ -521,13 +534,28 @@ If you want to check out clang too, run:
   % cd llvm/tools
   % git clone http://llvm.org/git/clang.git
 
-If you want to check out compiler-rt too, run:
+If you want to check out compiler-rt (required to build the sanitizers), run:
 
 .. code-block:: console
 
   % cd llvm/projects
   % git clone http://llvm.org/git/compiler-rt.git
 
+If you want to check out libomp (required for OpenMP support), run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/openmp.git
+
+If you want to check out libcxx and libcxxabi (optional), run:
+
+.. code-block:: console
+
+  % cd llvm/projects
+  % git clone http://llvm.org/git/libcxx.git
+  % git clone http://llvm.org/git/libcxxabi.git
+
 If you want to check out the Test Suite Source Code (optional), run:
 
 .. code-block:: console
@@ -619,7 +647,7 @@ To set up clone from which you can submit code using ``git-svn``, run:
   % git config svn-remote.svn.fetch :refs/remotes/origin/master
   % git svn rebase -l
 
-Likewise for compiler-rt and test-suite.
+Likewise for compiler-rt, libomp and test-suite.
 
 To update this clone without generating git-svn tags that conflict with the
 upstream Git repo, run:
@@ -633,7 +661,7 @@ upstream Git repo, run:
      git checkout master &&
      git svn rebase -l)
 
-Likewise for compiler-rt and test-suite.
+Likewise for compiler-rt, libomp and test-suite.
 
 This leaves your working directories on their master branches, so you'll need to
 ``checkout`` each working branch individually and ``rebase`` it on top of its
@@ -838,7 +866,7 @@ with the latest Xcode:
 
 .. code-block:: console
 
-  % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES=“armv7;armv7s;arm64"
+  % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64"
     -DCMAKE_TOOLCHAIN_FILE=<PATH_TO_LLVM>/cmake/platforms/iOS.cmake
     -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off
     -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options]
@@ -881,7 +909,7 @@ Underneath that directory there is another directory with a name ending in
 For example:
 
   .. code-block:: console
-  
+
     % cd llvm_build_dir
     % find lib/Support/ -name APFloat*
     lib/Support/CMakeFiles/LLVMSupport.dir/APFloat.cpp.o
@@ -990,7 +1018,7 @@ different `tools`_.
   code generation.  For example, the ``llvm/lib/Target/X86`` directory holds the
   X86 machine description while ``llvm/lib/Target/ARM`` implements the ARM
   backend.
-    
+
 ``llvm/lib/CodeGen/``
 
   This directory contains the major parts of the code generator: Instruction
@@ -1075,7 +1103,7 @@ the `Command Guide <CommandGuide/index.html>`_.
 
   The archiver produces an archive containing the given LLVM bitcode files,
   optionally with an index for faster lookup.
-  
+
 ``llvm-as``
 
   The assembler transforms the human readable LLVM assembly to LLVM bitcode.
@@ -1088,7 +1116,7 @@ the `Command Guide <CommandGuide/index.html>`_.
 
   ``llvm-link``, not surprisingly, links multiple LLVM modules into a single
   program.
-  
+
 ``lli``
 
   ``lli`` is the LLVM interpreter, which can directly execute LLVM bitcode
@@ -1219,7 +1247,7 @@ Example with clang
    .. code-block:: console
 
       % ./hello
- 
+
    and
 
    .. code-block:: console
diff --git a/docs/HowToBuildOnARM.rst b/docs/HowToBuildOnARM.rst
index 6579d36a72a6..356c846d82bc 100644
--- a/docs/HowToBuildOnARM.rst
+++ b/docs/HowToBuildOnARM.rst
@@ -18,33 +18,44 @@ Here are some notes on building/testing LLVM/Clang on ARM. Note that
 ARM encompasses a wide variety of CPUs; this advice is primarily based
 on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
 
-#. If you are building LLVM/Clang on an ARM board with 1G of memory or less,
-   please use ``gold`` rather then GNU ``ld``.
-   Building LLVM/Clang with ``--enable-optimized``
-   is preferred since it consumes less memory. Otherwise, the building
-   process will very likely fail due to insufficient memory. In any
-   case it is probably a good idea to set up a swap partition.
-
-#. If you want to run ``make check-all`` after building LLVM/Clang, to avoid
-   false alarms (e.g., ARCMT failure) please use at least the following
-   configuration:
-
-   .. code-block:: bash
-
-     $ ../$LLVM_SRC_DIR/configure --with-abi=aapcs-vfp
-
 #. The most popular Linaro/Ubuntu OS's for ARM boards, e.g., the
-   Pandaboard, have become hard-float platforms. The following set
-   of configuration options appears to be a good choice for this
-   platform:
+   Pandaboard, have become hard-float platforms. There are a number of
+   choices when using CMake. Autoconf usage is deprecated as of 3.8.
+
+   Building LLVM/Clang in ``Relese`` mode is preferred since it consumes
+   a lot less memory. Otherwise, the building process will very likely
+   fail due to insufficient memory. It's also a lot quicker to only build
+   the relevant back-ends (ARM and AArch64), since it's very unlikely that
+   you'll use an ARM board to cross-compile to other arches. If you're
+   running Compiler-RT tests, also include the x86 back-end, or some tests
+   will fail.
 
    .. code-block:: bash
 
-     ../$LLVM_SRC_DIR/configure --build=armv7l-unknown-linux-gnueabihf \
-     --host=armv7l-unknown-linux-gnueabihf \
-     --target=armv7l-unknown-linux-gnueabihf --with-cpu=cortex-a9 \
-     --with-float=hard --with-abi=aapcs-vfp --with-fpu=neon \
-     --enable-targets=arm --enable-optimized --enable-assertions
+     cmake $LLVM_SRC_DIR -DCMAKE_BUILD_TYPE=Release \
+                         -DLLVM_TARGETS_TO_BUILD="ARM;X86;AArch64"
+
+   Other options you can use are:
+
+   .. code-block:: bash
+
+     Use Ninja instead of Make: "-G Ninja"
+     Build with assertions on: "-DLLVM_ENABLE_ASSERTIONS=True"
+     Force Python2: "-DPYTHON_EXECUTABLE=/usr/bin/python2"
+     Local (non-sudo) install path: "-DCMAKE_INSTALL_PREFIX=$HOME/llvm/instal"
+     CPU flags: "DCMAKE_C_FLAGS=-mcpu=cortex-a15" (same for CXX_FLAGS)
+
+   After that, just typing ``make -jN`` or ``ninja`` will build everything.
+   ``make -jN check-all`` or ``ninja check-all`` will run all compiler tests. For
+   running the test suite, please refer to :doc:`TestingGuide`.
+
+#. If you are building LLVM/Clang on an ARM board with 1G of memory or less,
+   please use ``gold`` rather then GNU ``ld``. In any case it is probably a good
+   idea to set up a swap partition, too.
+
+   .. code-block:: bash
+
+     $ sudo ln -sf /usr/bin/ld /usr/bin/ld.gold
 
 #. ARM development boards can be unstable and you may experience that cores
    are disappearing, caches being flushed on every big.LITTLE switch, and
@@ -58,6 +69,10 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
           sudo cpufreq-set -c $cpu -g performance
       done
 
+   Remember to turn that off after the build, or you may risk burning your
+   CPU. Most modern kernels don't need that, so only use it if you have
+   problems.
+
 #. Running the build on SD cards is ok, but they are more prone to failures
    than good quality USB sticks, and those are more prone to failures than
    external hard-drives (those are also a lot faster). So, at least, you
@@ -66,4 +81,5 @@ on the ARMv6 and ARMv7 architectures and may be inapplicable to older chips.
 
 #. Make sure you have a decent power supply (dozens of dollars worth) that can
    provide *at least* 4 amperes, this is especially important if you use USB
-   devices with your board.
+   devices with your board. Externally powered USB/SATA harddrives are even
+   better than having a good power supply.
diff --git a/docs/HowToReleaseLLVM.rst b/docs/HowToReleaseLLVM.rst
index 26e9f3b2ee87..33c547e97a88 100644
--- a/docs/HowToReleaseLLVM.rst
+++ b/docs/HowToReleaseLLVM.rst
@@ -136,51 +136,24 @@ Regenerate the configure scripts for both ``llvm`` and the ``test-suite``.
 In addition, the version numbers of all the Bugzilla components must be updated
 for the next release.
 
-Build the LLVM Release Candidates
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Tagging the LLVM Release Candidates
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Create release candidates for ``llvm``, ``clang``, ``dragonegg``, and the LLVM
-``test-suite`` by tagging the branch with the respective release candidate
-number.  For instance, to create **Release Candidate 1** you would issue the
-following commands:
+Tag release candidates using the tag.sh script in utils/release.
 
 ::
 
-  $ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ
-  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/rc1
-
-  $ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ
-  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/rc1
-
-  $ svn mkdir https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ
-  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/rc1
-
-  $ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ
-  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/rc1
-
-Similarly, **Release Candidate 2** would be named ``RC2`` and so on.  This keeps
-a permanent copy of the release candidate around for people to export and build
-as they wish.  The final released sources will be tagged in the ``RELEASE_XYZ``
-directory as ``Final`` (c.f. :ref:`tag`).
+  $ ./tag.sh -release X.Y.Z -rc $RC
 
 The Release Manager may supply pre-packaged source tarballs for users.  This can
-be done with the following commands:
+be done with the export.sh script in utils/release.
 
 ::
 
-  $ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/rc1 llvm-X.Yrc1
-  $ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/rc1 clang-X.Yrc1
-  $ svn export https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/rc1 dragonegg-X.Yrc1
-  $ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/rc1 llvm-test-X.Yrc1
+  $ ./export.sh -release X.Y.Z -rc $RC
 
-  $ tar -cvf - llvm-X.Yrc1        | gzip > llvm-X.Yrc1.src.tar.gz
-  $ tar -cvf - clang-X.Yrc1       | gzip > clang-X.Yrc1.src.tar.gz
-  $ tar -cvf - dragonegg-X.Yrc1   | gzip > dragonegg-X.Yrc1.src.tar.gz
-  $ tar -cvf - llvm-test-X.Yrc1   | gzip > llvm-test-X.Yrc1.src.tar.gz
+This will generate source tarballs for each LLVM project being validated, which
+can be uploaded to the website for further testing.
 
 Building the Release
 --------------------
@@ -384,21 +357,11 @@ mainline into the release branch.
 Tag the LLVM Final Release
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Tag the final release sources using the following procedure:
+Tag the final release sources using the tag.sh script in utils/release.
 
 ::
 
-  $ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
-             https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XYZ/Final
-
-  $ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
-             https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XYZ/Final
-
-  $ svn copy https://llvm.org/svn/llvm-project/dragonegg/branches/release_XY \
-             https://llvm.org/svn/llvm-project/dragonegg/tags/RELEASE_XYZ/Final
-
-  $ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
-             https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XYZ/Final
+  $ ./tag.sh -release X.Y.Z -final
 
 Update the LLVM Demo Page
 -------------------------
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 0039d014275a..103d876b3cef 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -83,7 +83,7 @@ identifiers, for different purposes:
    can be used on global variables to suppress mangling.
 #. Unnamed values are represented as an unsigned numeric value with
    their prefix. For example, ``%12``, ``@2``, ``%44``.
-#. Constants, which are described in the section  Constants_ below.
+#. Constants, which are described in the section Constants_ below.
 
 LLVM requires that values start with a prefix for two reasons: Compilers
 don't need to worry about name clashes with reserved words, and the set
@@ -204,14 +204,15 @@ linkage:
     (``STB_LOCAL`` in the case of ELF) in the object file. This
     corresponds to the notion of the '``static``' keyword in C.
 ``available_externally``
-    Globals with "``available_externally``" linkage are never emitted
-    into the object file corresponding to the LLVM module. They exist to
-    allow inlining and other optimizations to take place given knowledge
-    of the definition of the global, which is known to be somewhere
-    outside the module. Globals with ``available_externally`` linkage
-    are allowed to be discarded at will, and are otherwise the same as
-    ``linkonce_odr``. This linkage type is only allowed on definitions,
-    not declarations.
+    Globals with "``available_externally``" linkage are never emitted into
+    the object file corresponding to the LLVM module. From the linker's
+    perspective, an ``available_externally`` global is equivalent to
+    an external declaration. They exist to allow inlining and other
+    optimizations to take place given knowledge of the definition of the
+    global, which is known to be somewhere outside the module. Globals
+    with ``available_externally`` linkage are allowed to be discarded at
+    will, and allow inlining and other optimizations. This linkage type is
+    only allowed on definitions, not declarations.
 ``linkonce``
     Globals with "``linkonce``" linkage are merged with other globals of
     the same name when linkage occurs. This can be used to implement
@@ -257,7 +258,7 @@ linkage:
     Some languages allow differing globals to be merged, such as two
     functions with different semantics. Other languages, such as
     ``C++``, ensure that only equivalent globals are ever merged (the
-    "one definition rule" --- "ODR").  Such languages can use the
+    "one definition rule" --- "ODR"). Such languages can use the
     ``linkonce_odr`` and ``weak_odr`` linkage types to indicate that the
     global will only be merged with equivalent globals. These linkage
     types are otherwise the same as their non-``odr`` versions.
@@ -406,6 +407,26 @@ added in the future:
     This calling convention, like the `PreserveMost` calling convention, will be
     used by a future version of the ObjectiveC runtime and should be considered
     experimental at this time.
+"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions
+    Clang generates an access function to access C++-style TLS. The access
+    function generally has an entry block, an exit block and an initialization
+    block that is run at the first time. The entry and exit blocks can access
+    a few TLS IR variables, each access will be lowered to a platform-specific
+    sequence.
+
+    This calling convention aims to minimize overhead in the caller by
+    preserving as many registers as possible (all the registers that are
+    perserved on the fast path, composed of the entry and exit blocks).
+
+    This calling convention behaves identical to the `C` calling convention on
+    how arguments and return values are passed, but it uses a different set of
+    caller/callee-saved registers.
+
+    Given that each platform has its own lowering sequence, hence its own set
+    of preserved registers, we can't use the existing `PreserveMost`.
+
+    - On X86-64 the callee preserves all general purpose registers, except for
+      RDI and RAX.
 "``cc <n>``" - Numbered convention
     Any calling convention may be specified by number, allowing
     target-specific calling conventions to be used. Target specific
@@ -491,26 +512,29 @@ more information on under which circumstances the different models may
 be used. The target may choose a different TLS model if the specified
 model is not supported, or if a better choice of model can be made.
 
-A model can also be specified in a alias, but then it only governs how
+A model can also be specified in an alias, but then it only governs how
 the alias is accessed. It will not have any effect in the aliasee.
 
+For platforms without linker support of ELF TLS model, the -femulated-tls
+flag can be used to generate GCC compatible emulated TLS code.
+
 .. _namedtypes:
 
 Structure Types
 ---------------
 
 LLVM IR allows you to specify both "identified" and "literal" :ref:`structure
-types <t_struct>`.  Literal types are uniqued structurally, but identified types
-are never uniqued.  An :ref:`opaque structural type <t_opaque>` can also be used
+types <t_struct>`. Literal types are uniqued structurally, but identified types
+are never uniqued. An :ref:`opaque structural type <t_opaque>` can also be used
 to forward declare a type that is not yet available.
 
-An example of a identified structure specification is:
+An example of an identified structure specification is:
 
 .. code-block:: llvm
 
     %mytype = type { %mytype*, i32 }
 
-Prior to the LLVM 3.0 release, identified types were structurally uniqued.  Only
+Prior to the LLVM 3.0 release, identified types were structurally uniqued. Only
 literal types are uniqued in recent versions of LLVM.
 
 .. _globalvars:
@@ -569,7 +593,7 @@ support.
 
 By default, global initializers are optimized by assuming that global
 variables defined within the module are not modified from their
-initial values before the start of the global initializer.  This is
+initial values before the start of the global initializer. This is
 true even for variables potentially accessible from outside the
 module, including those with external linkage or appearing in
 ``@llvm.used`` or dllexported variables. This assumption may be suppressed
@@ -637,6 +661,7 @@ an optional :ref:`comdat <langref_comdats>`,
 an optional :ref:`garbage collector name <gc>`, an optional :ref:`prefix <prefixdata>`,
 an optional :ref:`prologue <prologuedata>`,
 an optional :ref:`personality <personalityfn>`,
+an optional list of attached :ref:`metadata <metadata>`,
 an opening curly brace, a list of basic blocks, and a closing curly brace.
 
 LLVM function declarations consist of the "``declare``" keyword, an
@@ -685,10 +710,10 @@ Syntax::
            <ResultType> @<FunctionName> ([argument list])
            [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
            [align N] [gc] [prefix Constant] [prologue Constant]
-           [personality Constant] { ... }
+           [personality Constant] (!name !N)* { ... }
 
-The argument list is a comma seperated sequence of arguments where each
-argument is of the following form
+The argument list is a comma separated sequence of arguments where each
+argument is of the following form:
 
 Syntax::
 
@@ -712,7 +737,7 @@ Aliases may have an optional :ref:`linkage type <linkage>`, an optional
 
 Syntax::
 
-    @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+    @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal] [unnamed_addr] alias <AliaseeTy>, <AliaseeTy>* @<Aliasee>
 
 The linkage must be one of ``private``, ``internal``, ``linkonce``, ``weak``,
 ``linkonce_odr``, ``weak_odr``, ``external``. Note that some system linkers
@@ -742,9 +767,9 @@ Comdats
 
 Comdat IR provides access to COFF and ELF object file COMDAT functionality.
 
-Comdats have a name which represents the COMDAT key.  All global objects that
+Comdats have a name which represents the COMDAT key. All global objects that
 specify this key will only end up in the final object file if the linker chooses
-that key over some other key.  Aliases are placed in the same COMDAT that their
+that key over some other key. Aliases are placed in the same COMDAT that their
 aliasee computes to, if any.
 
 Comdats have a selection kind to provide input on how the linker should
@@ -819,13 +844,13 @@ For example:
    @g2 = global i32 42, section "sec", comdat($bar)
 
 From the object file perspective, this requires the creation of two sections
-with the same name.  This is necessary because both globals belong to different
+with the same name. This is necessary because both globals belong to different
 COMDAT groups and COMDATs, at the object file level, are represented by
 sections.
 
 Note that certain IR constructs like global variables and functions may
 create COMDATs in the object file in addition to any which are specified using
-COMDAT IR.  This arises when the code generator is configured to emit globals
+COMDAT IR. This arises when the code generator is configured to emit globals
 in individual sections (e.g. when `-data-sections` or `-function-sections`
 is supplied to `llc`).
 
@@ -891,7 +916,7 @@ Currently, only the following parameter attributes are defined:
     the callee (for a return value).
 ``inreg``
     This indicates that this parameter or return value should be treated
-    in a special target-dependent fashion during while emitting code for
+    in a special target-dependent fashion while emitting code for
     a function call or return (usually, by putting it in a register as
     opposed to memory, though some targets use it to distinguish between
     two different kinds of registers). Use of this attribute is
@@ -919,23 +944,23 @@ Currently, only the following parameter attributes are defined:
 ``inalloca``
 
     The ``inalloca`` argument attribute allows the caller to take the
-    address of outgoing stack arguments.  An ``inalloca`` argument must
+    address of outgoing stack arguments. An ``inalloca`` argument must
     be a pointer to stack memory produced by an ``alloca`` instruction.
     The alloca, or argument allocation, must also be tagged with the
-    inalloca keyword.  Only the last argument may have the ``inalloca``
+    inalloca keyword. Only the last argument may have the ``inalloca``
     attribute, and that argument is guaranteed to be passed in memory.
 
     An argument allocation may be used by a call at most once because
-    the call may deallocate it.  The ``inalloca`` attribute cannot be
+    the call may deallocate it. The ``inalloca`` attribute cannot be
     used in conjunction with other attributes that affect argument
-    storage, like ``inreg``, ``nest``, ``sret``, or ``byval``.  The
+    storage, like ``inreg``, ``nest``, ``sret``, or ``byval``. The
     ``inalloca`` attribute also disables LLVM's implicit lowering of
     large aggregate return values, which means that frontend authors
     must lower them with ``sret`` pointers.
 
     When the call site is reached, the argument allocation must have
     been the most recent stack allocation that is still live, or the
-    results are undefined.  It is possible to allocate additional stack
+    results are undefined. It is possible to allocate additional stack
     space after an argument allocation and before its call site, but it
     must be cleared off with :ref:`llvm.stackrestore
     <int_stackrestore>`.
@@ -1024,14 +1049,14 @@ Currently, only the following parameter attributes are defined:
 ``dereferenceable_or_null(<n>)``
     This indicates that the parameter or return value isn't both
     non-null and non-dereferenceable (up to ``<n>`` bytes) at the same
-    time.  All non-null pointers tagged with
+    time. All non-null pointers tagged with
     ``dereferenceable_or_null(<n>)`` are ``dereferenceable(<n>)``.
     For address space 0 ``dereferenceable_or_null(<n>)`` implies that
     a pointer is exactly one of ``dereferenceable(<n>)`` or ``null``,
     and in other address spaces ``dereferenceable_or_null(<n>)``
     implies that a pointer is at least one of ``dereferenceable(<n>)``
     or ``null`` (i.e. it may be both ``null`` and
-    ``dereferenceable(<n>)``).  This attribute may only be applied to
+    ``dereferenceable(<n>)``). This attribute may only be applied to
     pointer typed parameters.
 
 .. _gc:
@@ -1047,9 +1072,9 @@ string:
     define void @f() gc "name" { ... }
 
 The supported values of *name* includes those :ref:`built in to LLVM
-<builtin-gc-strategies>` and any provided by loaded plugins.  Specifying a GC
+<builtin-gc-strategies>` and any provided by loaded plugins. Specifying a GC
 strategy will cause the compiler to alter its output in order to support the
-named garbage collection algorithm.  Note that LLVM itself does not contain a
+named garbage collection algorithm. Note that LLVM itself does not contain a
 garbage collector, this functionality is restricted to generating machine code
 which can interoperate with a collector provided externally.
 
@@ -1067,7 +1092,7 @@ function pointer to be called.
 
 To access the data for a given function, a program may bitcast the
 function pointer to a pointer to the constant's type and dereference
-index -1.  This implies that the IR symbol points just past the end of
+index -1. This implies that the IR symbol points just past the end of
 the prefix data. For instance, take the example of a function annotated
 with a single ``i32``,
 
@@ -1084,14 +1109,14 @@ The prefix data can be referenced as,
     %b = load i32, i32* %a
 
 Prefix data is laid out as if it were an initializer for a global variable
-of the prefix data's type.  The function will be placed such that the
+of the prefix data's type. The function will be placed such that the
 beginning of the prefix data is aligned. This means that if the size
 of the prefix data is not a multiple of the alignment size, the
 function's entrypoint will not be aligned. If alignment of the
 function's entrypoint is desired, padding must be added to the prefix
 data.
 
-A function may have prefix data but no body.  This has similar semantics
+A function may have prefix data but no body. This has similar semantics
 to the ``available_externally`` linkage in that the data may be used by the
 optimizers but will not be emitted in the object file.
 
@@ -1105,12 +1130,12 @@ be inserted prior to the function body. This can be used for enabling
 function hot-patching and instrumentation.
 
 To maintain the semantics of ordinary function calls, the prologue data must
-have a particular format.  Specifically, it must begin with a sequence of
+have a particular format. Specifically, it must begin with a sequence of
 bytes which decode to a sequence of machine instructions, valid for the
 module's target, which transfer control to the point immediately succeeding
-the prologue data, without performing any other visible action.  This allows
+the prologue data, without performing any other visible action. This allows
 the inliner and other passes to reason about the semantics of the function
-definition without needing to reason about the prologue data.  Obviously this
+definition without needing to reason about the prologue data. Obviously this
 makes the format of the prologue data highly target dependent.
 
 A trivial example of valid prologue data for the x86 architecture is ``i8 144``,
@@ -1130,7 +1155,7 @@ x86_64 architecture, where the first two bytes encode ``jmp .+10``:
 
     define void @f() prologue %0 <{ i8 235, i8 8, i8* @md}> { ... }
 
-A function may have prologue data but no body.  This has similar semantics
+A function may have prologue data but no body. This has similar semantics
 to the ``available_externally`` linkage in that the data may be used by the
 optimizers but will not be emitted in the object file.
 
@@ -1216,10 +1241,16 @@ example:
 ``convergent``
     This attribute indicates that the callee is dependent on a convergent
     thread execution pattern under certain parallel execution models.
-    Transformations that are execution model agnostic may only move or
-    tranform this call if the final location is control equivalent to its
-    original position in the program, where control equivalence is defined as
-    A dominates B and B post-dominates A, or vice versa.
+    Transformations that are execution model agnostic may not make the execution
+    of a convergent operation control dependent on any additional values.
+``inaccessiblememonly``
+    This attribute indicates that the function may only access memory that
+    is not accessible by the module being compiled. This is a weaker form
+    of ``readnone``.
+``inaccessiblemem_or_argmemonly``
+    This attribute indicates that the function may only access memory that is
+    either not accessible by the module being compiled, or is pointed to
+    by its pointer arguments. This is a weaker form of  ``argmemonly``
 ``inlinehint``
     This attribute indicates that the source code contained a hint that
     inlining this function is desirable (such as the "inline" keyword in
@@ -1275,6 +1306,10 @@ example:
     This function attribute indicates that the function never returns
     normally. This produces undefined behavior at runtime if the
     function ever does dynamically return.
+``norecurse``
+    This function attribute indicates that the function does not call itself
+    either directly or indirectly down any possible call path. This produces
+    undefined behavior at runtime if the function ever does recurse.
 ``nounwind``
     This function attribute indicates that the function never raises an
     exception. If the function does raise an exception, its runtime
@@ -1283,9 +1318,9 @@ example:
     that are recognized by LLVM to handle asynchronous exceptions, such
     as SEH, will still provide their implementation defined semantics.
 ``optnone``
-    This function attribute indicates that the function is not optimized
-    by any optimization or code generator passes with the
-    exception of interprocedural optimization passes.
+    This function attribute indicates that most optimization passes will skip
+    this function, with the exception of interprocedural optimization passes.
+    Code generation defaults to the "fast" instruction selector.
     This attribute cannot be used together with the ``alwaysinline``
     attribute; this attribute is also incompatible
     with the ``minsize`` attribute and the ``optsize`` attribute.
@@ -1399,7 +1434,7 @@ example:
 ``sspstrong``
     This attribute indicates that the function should emit a stack smashing
     protector. This attribute causes a strong heuristic to be used when
-    determining if a function needs stack protectors.  The strong heuristic
+    determining if a function needs stack protectors. The strong heuristic
     will enable protectors for functions with:
 
     - Arrays of any size and type
@@ -1430,11 +1465,129 @@ example:
     match the thunk target prototype.
 ``uwtable``
     This attribute indicates that the ABI being targeted requires that
-    an unwind table entry be produce for this function even if we can
+    an unwind table entry be produced for this function even if we can
     show that no exceptions passes by it. This is normally the case for
     the ELF x86-64 abi, but it can be disabled for some compilation
     units.
 
+
+.. _opbundles:
+
+Operand Bundles
+---------------
+
+Note: operand bundles are a work in progress, and they should be
+considered experimental at this time.
+
+Operand bundles are tagged sets of SSA values that can be associated
+with certain LLVM instructions (currently only ``call`` s and
+``invoke`` s).  In a way they are like metadata, but dropping them is
+incorrect and will change program semantics.
+
+Syntax::
+
+    operand bundle set ::= '[' operand bundle (, operand bundle )* ']'
+    operand bundle ::= tag '(' [ bundle operand ] (, bundle operand )* ')'
+    bundle operand ::= SSA value
+    tag ::= string constant
+
+Operand bundles are **not** part of a function's signature, and a
+given function may be called from multiple places with different kinds
+of operand bundles.  This reflects the fact that the operand bundles
+are conceptually a part of the ``call`` (or ``invoke``), not the
+callee being dispatched to.
+
+Operand bundles are a generic mechanism intended to support
+runtime-introspection-like functionality for managed languages.  While
+the exact semantics of an operand bundle depend on the bundle tag,
+there are certain limitations to how much the presence of an operand
+bundle can influence the semantics of a program.  These restrictions
+are described as the semantics of an "unknown" operand bundle.  As
+long as the behavior of an operand bundle is describable within these
+restrictions, LLVM does not need to have special knowledge of the
+operand bundle to not miscompile programs containing it.
+
+- The bundle operands for an unknown operand bundle escape in unknown
+  ways before control is transferred to the callee or invokee.
+- Calls and invokes with operand bundles have unknown read / write
+  effect on the heap on entry and exit (even if the call target is
+  ``readnone`` or ``readonly``), unless they're overriden with
+  callsite specific attributes.
+- An operand bundle at a call site cannot change the implementation
+  of the called function.  Inter-procedural optimizations work as
+  usual as long as they take into account the first two properties.
+
+More specific types of operand bundles are described below.
+
+Deoptimization Operand Bundles
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Deoptimization operand bundles are characterized by the ``"deopt"``
+operand bundle tag.  These operand bundles represent an alternate
+"safe" continuation for the call site they're attached to, and can be
+used by a suitable runtime to deoptimize the compiled frame at the
+specified call site.  There can be at most one ``"deopt"`` operand
+bundle attached to a call site.  Exact details of deoptimization is
+out of scope for the language reference, but it usually involves
+rewriting a compiled frame into a set of interpreted frames.
+
+From the compiler's perspective, deoptimization operand bundles make
+the call sites they're attached to at least ``readonly``.  They read
+through all of their pointer typed operands (even if they're not
+otherwise escaped) and the entire visible heap.  Deoptimization
+operand bundles do not capture their operands except during
+deoptimization, in which case control will not be returned to the
+compiled frame.
+
+The inliner knows how to inline through calls that have deoptimization
+operand bundles.  Just like inlining through a normal call site
+involves composing the normal and exceptional continuations, inlining
+through a call site with a deoptimization operand bundle needs to
+appropriately compose the "safe" deoptimization continuation.  The
+inliner does this by prepending the parent's deoptimization
+continuation to every deoptimization continuation in the inlined body.
+E.g. inlining ``@f`` into ``@g`` in the following example
+
+.. code-block:: llvm
+
+    define void @f() {
+      call void @x()  ;; no deopt state
+      call void @y() [ "deopt"(i32 10) ]
+      call void @y() [ "deopt"(i32 10), "unknown"(i8* null) ]
+      ret void
+    }
+
+    define void @g() {
+      call void @f() [ "deopt"(i32 20) ]
+      ret void
+    }
+
+will result in
+
+.. code-block:: llvm
+
+    define void @g() {
+      call void @x()  ;; still no deopt state
+      call void @y() [ "deopt"(i32 20, i32 10) ]
+      call void @y() [ "deopt"(i32 20, i32 10), "unknown"(i8* null) ]
+      ret void
+    }
+
+It is the frontend's responsibility to structure or encode the
+deoptimization state in a way that syntactically prepending the
+caller's deoptimization state to the callee's deoptimization state is
+semantically equivalent to composing the caller's deoptimization
+continuation after the callee's deoptimization continuation.
+
+Funclet Operand Bundles
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Funclet operand bundles are characterized by the ``"funclet"``
+operand bundle tag.  These operand bundles indicate that a call site
+is within a particular funclet.  There can be at most one
+``"funclet"`` operand bundle attached to a call site and it must have
+exactly one bundle operand.
+
 .. _moduleasm:
 
 Module-Level Inline Assembly
@@ -1494,8 +1647,8 @@ as follows:
 ``p[n]:<size>:<abi>:<pref>``
     This specifies the *size* of a pointer and its ``<abi>`` and
     ``<pref>``\erred alignments for address space ``n``. All sizes are in
-    bits. The address space, ``n`` is optional, and if not specified,
-    denotes the default address space 0.  The value of ``n`` must be
+    bits. The address space, ``n``, is optional, and if not specified,
+    denotes the default address space 0. The value of ``n`` must be
     in the range [1,2^23).
 ``i<size>:<abi>:<pref>``
     This specifies the alignment for an integer type of a given bit
@@ -1521,6 +1674,8 @@ as follows:
       symbols get a ``_`` prefix.
     * ``w``: Windows COFF prefix:  Similar to Mach-O, but stdcall and fastcall
       functions also get a suffix based on the frame size.
+    * ``x``: Windows x86 COFF prefix:  Similar to Windows COFF, but use a ``_``
+      prefix for ``__cdecl`` functions.
 ``n<size1>:<size2>:<size3>...``
     This specifies a set of native integer widths for the target CPU in
     bits. For example, it might contain ``n32`` for 32-bit PowerPC,
@@ -1687,7 +1842,7 @@ target-legal volatile load/store instructions.
  this holds for an l-value of volatile primitive type with native
  hardware support, but not necessarily for aggregate types. The
  frontend upholds these expectations, which are intentionally
- unspecified in the IR. The rules above ensure that IR transformation
+ unspecified in the IR. The rules above ensure that IR transformations
  do not violate the frontend's contract with the language.
 
 .. _memmodel:
@@ -1877,12 +2032,12 @@ Use-list Order Directives
 -------------------------
 
 Use-list directives encode the in-memory order of each use-list, allowing the
-order to be recreated.  ``<order-indexes>`` is a comma-separated list of
-indexes that are assigned to the referenced value's uses.  The referenced
+order to be recreated. ``<order-indexes>`` is a comma-separated list of
+indexes that are assigned to the referenced value's uses. The referenced
 value's use-list is immediately sorted by these indexes.
 
-Use-list directives may appear at function scope or global scope.  They are not
-instructions, and have no effect on the semantics of the IR.  When they're at
+Use-list directives may appear at function scope or global scope. They are not
+instructions, and have no effect on the semantics of the IR. When they're at
 function scope, they must appear after the terminator of the final basic block.
 
 If basic blocks have their address taken via ``blockaddress()`` expressions,
@@ -1969,9 +2124,9 @@ and :ref:`metadata <t_metadata>` types.
 
 ...where '``<parameter list>``' is a comma-separated list of type
 specifiers. Optionally, the parameter list may include a type ``...``, which
-indicates that the function takes a variable number of arguments.  Variable
+indicates that the function takes a variable number of arguments. Variable
 argument functions can access their arguments with the :ref:`variable argument
-handling intrinsic <int_varargs>` functions.  '``<returntype>``' is any type
+handling intrinsic <int_varargs>` functions. '``<returntype>``' is any type
 except :ref:`label <t_label>` and :ref:`metadata <t_metadata>`.
 
 :Examples:
@@ -2165,6 +2320,26 @@ The label type represents code labels.
 
       label
 
+.. _t_token:
+
+Token Type
+^^^^^^^^^^
+
+:Overview:
+
+The token type is used when a value is associated with an instruction
+but all uses of the value must not attempt to introspect or obscure it.
+As such, it is not appropriate to have a :ref:`phi <i_phi>` or
+:ref:`select <i_select>` of type token.
+
+:Syntax:
+
+::
+
+      token
+
+
+
 .. _t_metadata:
 
 Metadata Type
@@ -2338,6 +2513,9 @@ Simple Constants
 **Null pointer constants**
     The identifier '``null``' is recognized as a null pointer constant
     and must be of :ref:`pointer type <t_pointer>`.
+**Token constants**
+    The identifier '``none``' is recognized as an empty token constant
+    and must be of :ref:`token type <t_token>`.
 
 The one non-intuitive notation for constants is the hexadecimal form of
 floating point constants. For example, the form
@@ -2406,8 +2584,8 @@ constants and smaller complex constants.
     having to print large zero initializers (e.g. for large arrays) and
     is always exactly equivalent to using explicit zero initializers.
 **Metadata node**
-    A metadata node is a constant tuple without types.  For example:
-    "``!{!0, !{!2, !0}, !"test"}``".  Metadata can reference constant values,
+    A metadata node is a constant tuple without types. For example:
+    "``!{!0, !{!2, !0}, !"test"}``". Metadata can reference constant values,
     for example: "``!{!0, i32 0, i8* @global, i64 (i64)* @function, !"str"}``".
     Unlike other typed constants that are meant to be interpreted as part of
     the instruction stream, metadata is a place to attach additional
@@ -3325,7 +3503,7 @@ and GCC likely indicates a bug in LLVM.
 
 Target-independent:
 
-- ``c``: Print an immediate integer constant  unadorned, without
+- ``c``: Print an immediate integer constant unadorned, without
   the target-specific immediate punctuation (e.g. no ``$`` prefix).
 - ``n``: Negate and print immediate integer constant unadorned, without the
   target-specific immediate punctuation (e.g. no ``$`` prefix).
@@ -3505,7 +3683,7 @@ that can convey extra information about the code to the optimizers and
 code generator. One example application of metadata is source-level
 debug information. There are two metadata primitives: strings and nodes.
 
-Metadata does not have a type, and is not a value.  If referenced from a
+Metadata does not have a type, and is not a value. If referenced from a
 ``call`` instruction, it uses the ``metadata`` type.
 
 All metadata are identified in syntax by a exclamation point ('``!``').
@@ -3536,7 +3714,7 @@ Metadata nodes that aren't uniqued use the ``distinct`` keyword. For example:
     !0 = distinct !{!"test\00", i32 10}
 
 ``distinct`` nodes are useful when nodes shouldn't be merged based on their
-content.  They can also occur when transformations cause uniquing collisions
+content. They can also occur when transformations cause uniquing collisions
 when metadata operands change.
 
 A :ref:`named metadata <namedmetadatastructure>` is a collection of
@@ -3554,13 +3732,22 @@ function is using two metadata arguments:
 
     call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
 
-Metadata can be attached with an instruction. Here metadata ``!21`` is
-attached to the ``add`` instruction using the ``!dbg`` identifier:
+Metadata can be attached to an instruction. Here metadata ``!21`` is attached
+to the ``add`` instruction using the ``!dbg`` identifier:
 
 .. code-block:: llvm
 
     %indvar.next = add i64 %indvar, 1, !dbg !21
 
+Metadata can also be attached to a function definition. Here metadata ``!22``
+is attached to the ``foo`` function using the ``!dbg`` identifier:
+
+.. code-block:: llvm
+
+    define void @foo() !dbg !22 {
+      ret void
+    }
+
 More information about specific metadata nodes recognized by the
 optimizers and code generator is found below.
 
@@ -3570,7 +3757,7 @@ Specialized Metadata Nodes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Specialized metadata nodes are custom data structures in metadata (as opposed
-to generic tuples).  Their fields are labelled, and can be specified in any
+to generic tuples). Their fields are labelled, and can be specified in any
 order.
 
 These aren't inherently debug info centric, but currently all the specialized
@@ -3581,10 +3768,10 @@ metadata nodes are related to debug info.
 DICompileUnit
 """""""""""""
 
-``DICompileUnit`` nodes represent a compile unit.  The ``enums:``,
-``retainedTypes:``, ``subprograms:``, ``globals:`` and ``imports:`` fields are
-tuples containing the debug info to be emitted along with the compile unit,
-regardless of code optimizations (some nodes are only emitted if there are
+``DICompileUnit`` nodes represent a compile unit. The ``enums:``,
+``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:``
+fields are tuples containing the debug info to be emitted along with the compile
+unit, regardless of code optimizations (some nodes are only emitted if there are
 references to them from instructions).
 
 .. code-block:: llvm
@@ -3593,11 +3780,11 @@ references to them from instructions).
                         isOptimized: true, flags: "-O2", runtimeVersion: 2,
                         splitDebugFilename: "abc.debug", emissionKind: 1,
                         enums: !2, retainedTypes: !3, subprograms: !4,
-                        globals: !5, imports: !6)
+                        globals: !5, imports: !6, macros: !7, dwoId: 0x0abcd)
 
 Compile unit descriptors provide the root scope for objects declared in a
-specific compilation unit.  File descriptors are defined using this scope.
-These descriptors are collected by a named metadata ``!llvm.dbg.cu``.  They
+specific compilation unit. File descriptors are defined using this scope.
+These descriptors are collected by a named metadata ``!llvm.dbg.cu``. They
 keep track of subprograms, global variables, type information, and imported
 entities (declarations and namespaces).
 
@@ -3606,7 +3793,7 @@ entities (declarations and namespaces).
 DIFile
 """"""
 
-``DIFile`` nodes represent files.  The ``filename:`` can include slashes.
+``DIFile`` nodes represent files. The ``filename:`` can include slashes.
 
 .. code-block:: llvm
 
@@ -3621,7 +3808,7 @@ DIBasicType
 """""""""""
 
 ``DIBasicType`` nodes represent primitive types, such as ``int``, ``bool`` and
-``float``.  ``tag:`` defaults to ``DW_TAG_base_type``.
+``float``. ``tag:`` defaults to ``DW_TAG_base_type``.
 
 .. code-block:: llvm
 
@@ -3629,7 +3816,7 @@ DIBasicType
                       encoding: DW_ATE_unsigned_char)
     !1 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "decltype(nullptr)")
 
-The ``encoding:`` describes the details of the type.  Usually it's one of the
+The ``encoding:`` describes the details of the type. Usually it's one of the
 following:
 
 .. code-block:: llvm
@@ -3647,9 +3834,9 @@ following:
 DISubroutineType
 """"""""""""""""
 
-``DISubroutineType`` nodes represent subroutine types.  Their ``types:`` field
+``DISubroutineType`` nodes represent subroutine types. Their ``types:`` field
 refers to a tuple; the first operand is the return type, while the rest are the
-types of the formal arguments in order.  If the first operand is ``null``, that
+types of the formal arguments in order. If the first operand is ``null``, that
 represents a function with no return value (such as ``void foo() {}`` in C++).
 
 .. code-block:: llvm
@@ -3688,8 +3875,8 @@ The following ``tag:`` values are valid:
   DW_TAG_restrict_type      = 55
 
 ``DW_TAG_member`` is used to define a member of a :ref:`composite type
-<DICompositeType>` or :ref:`subprogram <DISubprogram>`.  The type of the member
-is the ``baseType:``.  The ``offset:`` is the member's bit offset.
+<DICompositeType>` or :ref:`subprogram <DISubprogram>`. The type of the member
+is the ``baseType:``. The ``offset:`` is the member's bit offset.
 ``DW_TAG_formal_parameter`` is used to define a member which is a formal
 argument of a subprogram.
 
@@ -3707,10 +3894,10 @@ DICompositeType
 """""""""""""""
 
 ``DICompositeType`` nodes represent types composed of other types, like
-structures and unions.  ``elements:`` points to a tuple of the composed types.
+structures and unions. ``elements:`` points to a tuple of the composed types.
 
 If the source language supports ODR, the ``identifier:`` field gives the unique
-identifier used for type merging between modules.  When specified, other types
+identifier used for type merging between modules. When specified, other types
 can refer to composite types indirectly via a :ref:`metadata string
 <metadata-string>` that matches their identifier.
 
@@ -3738,12 +3925,12 @@ The following ``tag:`` values are valid:
 
 For ``DW_TAG_array_type``, the ``elements:`` should be :ref:`subrange
 descriptors <DISubrange>`, each representing the range of subscripts at that
-level of indexing.  The ``DIFlagVector`` flag to ``flags:`` indicates that an
+level of indexing. The ``DIFlagVector`` flag to ``flags:`` indicates that an
 array type is a native packed vector.
 
 For ``DW_TAG_enumeration_type``, the ``elements:`` should be :ref:`enumerator
 descriptors <DIEnumerator>`, each representing the definition of an enumeration
-value for the set.  All enumeration type descriptors are collected in the
+value for the set. All enumeration type descriptors are collected in the
 ``enums:`` field of the :ref:`compile unit <DICompileUnit>`.
 
 For ``DW_TAG_structure_type``, ``DW_TAG_class_type``, and
@@ -3756,7 +3943,7 @@ DISubrange
 """"""""""
 
 ``DISubrange`` nodes are the elements for ``DW_TAG_array_type`` variants of
-:ref:`DICompositeType`.  ``count: -1`` indicates an empty array.
+:ref:`DICompositeType`. ``count: -1`` indicates an empty array.
 
 .. code-block:: llvm
 
@@ -3782,7 +3969,7 @@ DITemplateTypeParameter
 """""""""""""""""""""""
 
 ``DITemplateTypeParameter`` nodes represent type parameters to generic source
-language constructs.  They are used (optionally) in :ref:`DICompositeType` and
+language constructs. They are used (optionally) in :ref:`DICompositeType` and
 :ref:`DISubprogram` ``templateParams:`` fields.
 
 .. code-block:: llvm
@@ -3793,9 +3980,9 @@ DITemplateValueParameter
 """"""""""""""""""""""""
 
 ``DITemplateValueParameter`` nodes represent value parameters to generic source
-language constructs.  ``tag:`` defaults to ``DW_TAG_template_value_parameter``,
+language constructs. ``tag:`` defaults to ``DW_TAG_template_value_parameter``,
 but if specified can also be set to ``DW_TAG_GNU_template_template_param`` or
-``DW_TAG_GNU_template_param_pack``.  They are used (optionally) in
+``DW_TAG_GNU_template_param_pack``. They are used (optionally) in
 :ref:`DICompositeType` and :ref:`DISubprogram` ``templateParams:`` fields.
 
 .. code-block:: llvm
@@ -3831,20 +4018,26 @@ All global variables should be referenced by the `globals:` field of a
 DISubprogram
 """"""""""""
 
-``DISubprogram`` nodes represent functions from the source language.  The
-``variables:`` field points at :ref:`variables <DILocalVariable>` that must be
-retained, even if their IR counterparts are optimized out of the IR.  The
-``type:`` field must point at an :ref:`DISubroutineType`.
+``DISubprogram`` nodes represent functions from the source language. A
+``DISubprogram`` may be attached to a function definition using ``!dbg``
+metadata. The ``variables:`` field points at :ref:`variables <DILocalVariable>`
+that must be retained, even if their IR counterparts are optimized out of
+the IR. The ``type:`` field must point at an :ref:`DISubroutineType`.
 
 .. code-block:: llvm
 
-    !0 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
-                       file: !2, line: 7, type: !3, isLocal: true,
-                       isDefinition: false, scopeLine: 8, containingType: !4,
-                       virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
-                       flags: DIFlagPrototyped, isOptimized: true,
-                       function: void ()* @_Z3foov,
-                       templateParams: !5, declaration: !6, variables: !7)
+    define void @_Z3foov() !dbg !0 {
+      ...
+    }
+
+    !0 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
+                                file: !2, line: 7, type: !3, isLocal: true,
+                                isDefinition: false, scopeLine: 8,
+                                containingType: !4,
+                                virtuality: DW_VIRTUALITY_pure_virtual,
+                                virtualIndex: 10, flags: DIFlagPrototyped,
+                                isOptimized: true, templateParams: !5,
+                                declaration: !6, variables: !7)
 
 .. _DILexicalBlock:
 
@@ -3852,8 +4045,8 @@ DILexicalBlock
 """"""""""""""
 
 ``DILexicalBlock`` nodes describe nested blocks within a :ref:`subprogram
-<DISubprogram>`.  The line number and column numbers are used to dinstinguish
-two lexical blocks at same depth.  They are valid targets for ``scope:``
+<DISubprogram>`. The line number and column numbers are used to distinguish
+two lexical blocks at same depth. They are valid targets for ``scope:``
 fields.
 
 .. code-block:: llvm
@@ -3869,7 +4062,7 @@ DILexicalBlockFile
 """"""""""""""""""
 
 ``DILexicalBlockFile`` nodes are used to discriminate between sections of a
-:ref:`lexical block <DILexicalBlock>`.  The ``file:`` field can be changed to
+:ref:`lexical block <DILexicalBlock>`. The ``file:`` field can be changed to
 indicate textual inclusion, or the ``discriminator:`` field can be used to
 discriminate between control flow within a single block in the source language.
 
@@ -3884,7 +4077,7 @@ discriminate between control flow within a single block in the source language.
 DILocation
 """"""""""
 
-``DILocation`` nodes represent source debug locations.  The ``scope:`` field is
+``DILocation`` nodes represent source debug locations. The ``scope:`` field is
 mandatory, and points at an :ref:`DILexicalBlockFile`, an
 :ref:`DILexicalBlock`, or an :ref:`DISubprogram`.
 
@@ -3897,27 +4090,23 @@ mandatory, and points at an :ref:`DILexicalBlockFile`, an
 DILocalVariable
 """""""""""""""
 
-``DILocalVariable`` nodes represent local variables in the source language.
-Instead of ``DW_TAG_variable``, they use LLVM-specific fake tags to
-discriminate between local variables (``DW_TAG_auto_variable``) and subprogram
-arguments (``DW_TAG_arg_variable``).  In the latter case, the ``arg:`` field
-specifies the argument position, and this variable will be included in the
-``variables:`` field of its :ref:`DISubprogram`.
+``DILocalVariable`` nodes represent local variables in the source language. If
+the ``arg:`` field is set to non-zero, then this variable is a subprogram
+parameter, and it will be included in the ``variables:`` field of its
+:ref:`DISubprogram`.
 
 .. code-block:: llvm
 
-    !0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 0,
-                          scope: !3, file: !2, line: 7, type: !3,
-                          flags: DIFlagArtificial)
-    !1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1,
-                          scope: !4, file: !2, line: 7, type: !3)
-    !1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y",
-                          scope: !5, file: !2, line: 7, type: !3)
+    !0 = !DILocalVariable(name: "this", arg: 1, scope: !3, file: !2, line: 7,
+                          type: !3, flags: DIFlagArtificial)
+    !1 = !DILocalVariable(name: "x", arg: 2, scope: !4, file: !2, line: 7,
+                          type: !3)
+    !2 = !DILocalVariable(name: "y", scope: !5, file: !2, line: 7, type: !3)
 
 DIExpression
 """"""""""""
 
-``DIExpression`` nodes represent DWARF expression sequences.  They are used in
+``DIExpression`` nodes represent DWARF expression sequences. They are used in
 :ref:`debug intrinsics<dbg_intrinsics>` (such as ``llvm.dbg.declare``) to
 describe how the referenced LLVM variable relates to the source language
 variable.
@@ -3957,6 +4146,32 @@ compile unit.
    !2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0,
                           entity: !1, line: 7)
 
+DIMacro
+"""""""
+
+``DIMacro`` nodes represent definition or undefinition of a macro identifiers.
+The ``name:`` field is the macro identifier, followed by macro parameters when
+definining a function-like macro, and the ``value`` field is the token-string
+used to expand the macro identifier.
+
+.. code-block:: llvm
+
+   !2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)",
+                 value: "((x) + 1)")
+   !3 = !DIMacro(macinfo: DW_MACINFO_undef, line: 30, name: "foo")
+
+DIMacroFile
+"""""""""""
+
+``DIMacroFile`` nodes represent inclusion of source files.
+The ``nodes:`` field is a list of ``DIMacro`` and ``DIMacroFile`` nodes that
+appear in the included source file.
+
+.. code-block:: llvm
+
+   !2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2,
+                     nodes: !3)
+
 '``tbaa``' Metadata
 ^^^^^^^^^^^^^^^^^^^
 
@@ -4041,13 +4256,13 @@ alias.
 
 The metadata identifying each domain is itself a list containing one or two
 entries. The first entry is the name of the domain. Note that if the name is a
-string then it can be combined accross functions and translation units. A
+string then it can be combined across functions and translation units. A
 self-reference can be used to create globally unique domain names. A
 descriptive string may optionally be provided as a second list entry.
 
 The metadata identifying each scope is also itself a list containing two or
 three entries. The first entry is the name of the scope. Note that if the name
-is a string then it can be combined accross functions and translation units. A
+is a string then it can be combined across functions and translation units. A
 self-reference can be used to create globally unique scope names. A metadata
 reference to the scope's domain is the second entry. A descriptive string may
 optionally be provided as a third list entry.
@@ -4144,6 +4359,16 @@ Examples:
     !2 = !{ i8 0, i8 2, i8 3, i8 6 }
     !3 = !{ i8 -2, i8 0, i8 3, i8 6 }
 
+'``unpredictable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``unpredictable`` metadata may be attached to any branch or switch
+instruction. It can be used to express the unpredictability of control
+flow. Similar to the llvm.expect intrinsic, it may be used to alter
+optimizations related to compare and branch instructions. The metadata
+is treated as a boolean value; if it exists, it signals that the branch
+or switch that it is attached to is completely unpredictable.
+
 '``llvm.loop``'
 ^^^^^^^^^^^^^^^
 
@@ -4182,11 +4407,11 @@ suggests an unroll factor to the loop unroller:
 
 Metadata prefixed with ``llvm.loop.vectorize`` or ``llvm.loop.interleave`` are
 used to control per-loop vectorization and interleaving parameters such as
-vectorization width and interleave count.  These metadata should be used in
-conjunction with ``llvm.loop`` loop identification metadata.  The
+vectorization width and interleave count. These metadata should be used in
+conjunction with ``llvm.loop`` loop identification metadata. The
 ``llvm.loop.vectorize`` and ``llvm.loop.interleave`` metadata are only
 optimization hints and the optimizer will only interleave and vectorize loops if
-it believes it is safe to do so.  The ``llvm.mem.parallel_loop_access`` metadata
+it believes it is safe to do so. The ``llvm.mem.parallel_loop_access`` metadata
 which contains information about loop-carried memory dependencies can be helpful
 in determining the safety of these transformations.
 
@@ -4203,7 +4428,7 @@ example:
    !0 = !{!"llvm.loop.interleave.count", i32 4}
 
 Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving
-multiple iterations of the loop.  If ``llvm.loop.interleave.count`` is set to 0
+multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0
 then the interleave count will be determined automatically.
 
 '``llvm.loop.vectorize.enable``' Metadata
@@ -4211,7 +4436,7 @@ then the interleave count will be determined automatically.
 
 This metadata selectively enables or disables vectorization for the loop. The
 first operand is the string ``llvm.loop.vectorize.enable`` and the second operand
-is a bit.  If the bit operand value is 1 vectorization is enabled. A value of
+is a bit. If the bit operand value is 1 vectorization is enabled. A value of
 0 disables vectorization:
 
 .. code-block:: llvm
@@ -4231,7 +4456,7 @@ operand is an integer specifying the width. For example:
    !0 = !{!"llvm.loop.vectorize.width", i32 4}
 
 Note that setting ``llvm.loop.vectorize.width`` to 1 disables
-vectorization of the loop.  If ``llvm.loop.vectorize.width`` is set to
+vectorization of the loop. If ``llvm.loop.vectorize.width`` is set to
 0 or if the loop does not have this metadata the width will be
 determined automatically.
 
@@ -4264,7 +4489,7 @@ will be partially unrolled.
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This metadata disables loop unrolling. The metadata has a single operand
-which is the string ``llvm.loop.unroll.disable``.  For example:
+which is the string ``llvm.loop.unroll.disable``. For example:
 
 .. code-block:: llvm
 
@@ -4274,12 +4499,24 @@ which is the string ``llvm.loop.unroll.disable``.  For example:
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This metadata disables runtime loop unrolling. The metadata has a single
-operand which is the string ``llvm.loop.unroll.runtime.disable``.  For example:
+operand which is the string ``llvm.loop.unroll.runtime.disable``. For example:
 
 .. code-block:: llvm
 
    !0 = !{!"llvm.loop.unroll.runtime.disable"}
 
+'``llvm.loop.unroll.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata suggests that the loop should be fully unrolled if the trip count
+is known at compile time and partially unrolled if the trip count is not known
+at compile time. The metadata has a single operand which is the string
+``llvm.loop.unroll.enable``.  For example:
+
+.. code-block:: llvm
+
+   !0 = !{!"llvm.loop.unroll.enable"}
+
 '``llvm.loop.unroll.full``' Metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -4319,7 +4556,7 @@ loop.
 
 Note that if not all memory access instructions have such metadata referring to
 the loop, then the loop is considered not being trivially parallel. Additional
-memory dependence analysis is required to make that determination.  As a fail
+memory dependence analysis is required to make that determination. As a fail
 safe mechanism, this causes loops that were originally parallel to be considered
 sequential (if optimization passes that are unaware of the parallel semantics
 insert new memory instructions into the loop body).
@@ -4380,6 +4617,50 @@ the loop identifier metadata node directly:
 The ``llvm.bitsets`` global metadata is used to implement
 :doc:`bitsets <BitSets>`.
 
+'``invariant.group``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``invariant.group`` metadata may be attached to ``load``/``store`` instructions.
+The existence of the ``invariant.group`` metadata on the instruction tells 
+the optimizer that every ``load`` and ``store`` to the same pointer operand 
+within the same invariant group can be assumed to load or store the same  
+value (but see the ``llvm.invariant.group.barrier`` intrinsic which affects 
+when two pointers are considered the same).
+
+Examples:
+
+.. code-block:: llvm
+
+   @unknownPtr = external global i8
+   ...
+   %ptr = alloca i8
+   store i8 42, i8* %ptr, !invariant.group !0
+   call void @foo(i8* %ptr)
+   
+   %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+   call void @foo(i8* %ptr)
+   %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
+  
+   %newPtr = call i8* @getPointer(i8* %ptr) 
+   %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+   
+   %unknownValue = load i8, i8* @unknownPtr
+   store i8 %unknownValue, i8* %ptr, !invariant.group !0 ; Can assume that %unknownValue == 42
+   
+   call void @foo(i8* %ptr)
+   %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+   %d = load i8, i8* %newPtr2, !invariant.group !0  ; Can't step through invariant.group.barrier to get value of %ptr
+   
+   ...
+   declare void @foo(i8*)
+   declare i8* @getPointer(i8*)
+   declare i8* @llvm.invariant.group.barrier(i8*)
+   
+   !0 = !{!"magic ptr"}
+   !1 = !{!"other ptr"}
+
+
+
 Module Flags Metadata
 =====================
 
@@ -4738,7 +5019,10 @@ control flow, not values (the one exception being the
 The terminator instructions are: ':ref:`ret <i_ret>`',
 ':ref:`br <i_br>`', ':ref:`switch <i_switch>`',
 ':ref:`indirectbr <i_indirectbr>`', ':ref:`invoke <i_invoke>`',
-':ref:`resume <i_resume>`', and ':ref:`unreachable <i_unreachable>`'.
+':ref:`resume <i_resume>`', ':ref:`catchswitch <i_catchswitch>`',
+':ref:`catchret <i_catchret>`',
+':ref:`cleanupret <i_cleanupret>`',
+and ':ref:`unreachable <i_unreachable>`'.
 
 .. _i_ret:
 
@@ -4970,7 +5254,7 @@ Syntax:
 ::
 
       <result> = invoke [cconv] [ret attrs] <ptr to function ty> <function ptr val>(<function args>) [fn attrs]
-                    to label <normal label> unwind label <exception label>
+                    [operand bundles] to label <normal label> unwind label <exception label>
 
 Overview:
 """""""""
@@ -5024,6 +5308,7 @@ This instruction requires several arguments:
 #. The optional :ref:`function attributes <fnattrs>` list. Only
    '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
    attributes are valid here.
+#. The optional :ref:`operand bundles <opbundles>` list.
 
 Semantics:
 """"""""""
@@ -5092,6 +5377,235 @@ Example:
 
       resume { i8*, i32 } %exn
 
+.. _i_catchswitch:
+
+'``catchswitch``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind to caller
+      <resultval> = catchswitch within <parent> [ label <handler1>, label <handler2>, ... ] unwind label <default>
+
+Overview:
+"""""""""
+
+The '``catchswitch``' instruction is used by `LLVM's exception handling system
+<ExceptionHandling.html#overview>`_ to describe the set of possible catch handlers
+that may be executed by the :ref:`EH personality routine <personalityfn>`.
+
+Arguments:
+""""""""""
+
+The ``parent`` argument is the token of the funclet that contains the
+``catchswitch`` instruction. If the ``catchswitch`` is not inside a funclet,
+this operand may be the token ``none``.
+
+The ``default`` argument is the label of another basic block beginning with a
+"pad" instruction, one of ``cleanuppad`` or ``catchswitch``.
+
+The ``handlers`` are a list of successor blocks that each begin with a
+:ref:`catchpad <i_catchpad>` instruction.
+
+Semantics:
+""""""""""
+
+Executing this instruction transfers control to one of the successors in
+``handlers``, if appropriate, or continues to unwind via the unwind label if
+present.
+
+The ``catchswitch`` is both a terminator and a "pad" instruction, meaning that
+it must be both the first non-phi instruction and last instruction in the basic
+block. Therefore, it must be the only non-phi instruction in the block.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    dispatch1:
+      %cs1 = catchswitch within none [label %handler0, label %handler1] unwind to caller
+    dispatch2:
+      %cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup
+
+.. _i_catchpad:
+
+'``catchpad``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = catchpad within <catchswitch> [<args>*]
+
+Overview:
+"""""""""
+
+The '``catchpad``' instruction is used by `LLVM's exception handling
+system <ExceptionHandling.html#overview>`_ to specify that a basic block
+begins a catch handler --- one where a personality routine attempts to transfer
+control to catch an exception.
+
+Arguments:
+""""""""""
+
+The ``catchswitch`` operand must always be a token produced by a
+:ref:`catchswitch <i_catchswitch>` instruction in a predecessor block. This
+ensures that each ``catchpad`` has exactly one predecessor block, and it always
+terminates in a ``catchswitch``.
+
+The ``args`` correspond to whatever information the personality routine
+requires to know if this is an appropriate handler for the exception. Control
+will transfer to the ``catchpad`` if this is the first appropriate handler for
+the exception.
+
+The ``resultval`` has the type :ref:`token <t_token>` and is used to match the
+``catchpad`` to corresponding :ref:`catchrets <i_catchret>` and other nested EH
+pads.
+
+Semantics:
+""""""""""
+
+When the call stack is being unwound due to an exception being thrown, the
+exception is compared against the ``args``. If it doesn't match, control will
+not reach the ``catchpad`` instruction.  The representation of ``args`` is
+entirely target and personality function-specific.
+
+Like the :ref:`landingpad <i_landingpad>` instruction, the ``catchpad``
+instruction must be the first non-phi of its parent basic block.
+
+The meaning of the tokens produced and consumed by ``catchpad`` and other "pad"
+instructions is described in the
+`Windows exception handling documentation <ExceptionHandling.html#wineh>`.
+
+Executing a ``catchpad`` instruction constitutes "entering" that pad.
+The pad may then be "exited" in one of three ways:
+
+1)  explicitly via a ``catchret`` that consumes it.  Executing such a ``catchret``
+    is undefined behavior if any descendant pads have been entered but not yet
+    exited.
+2)  implicitly via a call (which unwinds all the way to the current function's caller),
+    or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller.
+3)  implicitly via an unwind edge whose destination EH pad isn't a descendant of
+    the ``catchpad``.  When the ``catchpad`` is exited in this manner, it is
+    undefined behavior if the destination EH pad has a parent which is not an
+    ancestor of the ``catchpad`` being exited.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+    dispatch:
+      %cs = catchswitch within none [label %handler0] unwind to caller
+      ;; A catch block which can catch an integer.
+    handler0:
+      %tok = catchpad within %cs [i8** @_ZTIi]
+
+.. _i_catchret:
+
+'``catchret``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      catchret from <token> to label <normal>
+
+Overview:
+"""""""""
+
+The '``catchret``' instruction is a terminator instruction that has a
+single successor.
+
+
+Arguments:
+""""""""""
+
+The first argument to a '``catchret``' indicates which ``catchpad`` it
+exits.  It must be a :ref:`catchpad <i_catchpad>`.
+The second argument to a '``catchret``' specifies where control will
+transfer to next.
+
+Semantics:
+""""""""""
+
+The '``catchret``' instruction ends an existing (in-flight) exception whose
+unwinding was interrupted with a :ref:`catchpad <i_catchpad>` instruction.  The
+:ref:`personality function <personalityfn>` gets a chance to execute arbitrary
+code to, for example, destroy the active exception.  Control then transfers to
+``normal``.
+
+The ``token`` argument must be a token produced by a dominating ``catchpad``
+instruction. The ``catchret`` destroys the physical frame established by
+``catchpad``, so executing multiple returns on the same token without
+re-executing the ``catchpad`` will result in undefined behavior.
+See :ref:`catchpad <i_catchpad>` for more details.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      catchret from %catch label %continue
+
+.. _i_cleanupret:
+
+'``cleanupret``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      cleanupret from <value> unwind label <continue>
+      cleanupret from <value> unwind to caller
+
+Overview:
+"""""""""
+
+The '``cleanupret``' instruction is a terminator instruction that has
+an optional successor.
+
+
+Arguments:
+""""""""""
+
+The '``cleanupret``' instruction requires one argument, which indicates
+which ``cleanuppad`` it exits, and must be a :ref:`cleanuppad <i_cleanuppad>`.
+It also has an optional successor, ``continue``.
+
+Semantics:
+""""""""""
+
+The '``cleanupret``' instruction indicates to the
+:ref:`personality function <personalityfn>` that one
+:ref:`cleanuppad <i_cleanuppad>` it transferred control to has ended.
+It transfers control to ``continue`` or unwinds out of the function.
+
+The unwind destination ``continue``, if present, must be an EH pad
+whose parent is either ``none`` or an ancestor of the ``cleanuppad``
+being returned from.  This constitutes an exceptional exit from all
+ancestors of the completed ``cleanuppad``, up to but not including
+the parent of ``continue``.
+See :ref:`cleanuppad <i_cleanuppad>` for more details.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      cleanupret from %cleanup unwind to caller
+      cleanupret from %cleanup unwind label %continue
+
 .. _i_unreachable:
 
 '``unreachable``' Instruction
@@ -6165,7 +6679,7 @@ Arguments:
 """"""""""
 
 The first operand of an '``extractvalue``' instruction is a value of
-:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The operands are
+:ref:`struct <t_struct>` or :ref:`array <t_array>` type. The other operands are
 constant indices to specify which value to extract in a similar manner
 as indices in a '``getelementptr``' instruction.
 
@@ -6312,9 +6826,11 @@ Syntax:
 
 ::
 
-      <result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>][, !nonnull !<index>][, !dereferenceable !<index>][, !dereferenceable_or_null !<index>]
-      <result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment>
+      <result> = load [volatile] <ty>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.load !<index>][, !invariant.group !<index>][, !nonnull !<index>][, !dereferenceable !<deref_bytes_node>][, !dereferenceable_or_null !<deref_bytes_node>][, !align !<align_node>]
+      <result> = load atomic [volatile] <ty>* <pointer> [singlethread] <ordering>, align <alignment> [, !invariant.group !<index>]
       !<index> = !{ i32 1 }
+      !<deref_bytes_node> = !{i64 <dereferenceable_bytes>}
+      !<align_node> = !{ i64 <value_alignment> }
 
 Overview:
 """""""""
@@ -6331,17 +6847,16 @@ then the optimizer is not allowed to modify the number or order of
 execution of this ``load`` with other :ref:`volatile
 operations <volatile>`.
 
-If the ``load`` is marked as ``atomic``, it takes an extra
-:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
-``release`` and ``acq_rel`` orderings are not valid on ``load``
-instructions. Atomic loads produce :ref:`defined <memmodel>` results
-when they may see multiple atomic stores. The type of the pointee must
-be an integer type whose bit width is a power of two greater than or
-equal to eight and less than or equal to a target-specific size limit.
-``align`` must be explicitly specified on atomic loads, and the load has
-undefined behavior if the alignment is not set to a value which is at
-least the size in bytes of the pointee. ``!nontemporal`` does not have
-any defined semantics for atomic loads.
+If the ``load`` is marked as ``atomic``, it takes an extra :ref:`ordering
+<ordering>` and optional ``singlethread`` argument. The ``release`` and
+``acq_rel`` orderings are not valid on ``load`` instructions. Atomic loads
+produce :ref:`defined <memmodel>` results when they may see multiple atomic
+stores. The type of the pointee must be an integer, pointer, or floating-point
+type whose bit width is a power of two greater than or equal to eight and less
+than or equal to a target-specific size limit.  ``align`` must be explicitly
+specified on atomic loads, and the load has undefined behavior if the alignment
+is not set to a value which is at least the size in bytes of the
+pointee. ``!nontemporal`` does not have any defined semantics for atomic loads.
 
 The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
@@ -6369,33 +6884,44 @@ Being invariant does not imply that a location is dereferenceable,
 but it does imply that once the location is known dereferenceable
 its value is henceforth unchanging.
 
+The optional ``!invariant.group`` metadata must reference a single metadata name
+ ``<index>`` corresponding to a metadata node. See ``invariant.group`` metadata.
+
 The optional ``!nonnull`` metadata must reference a single
 metadata name ``<index>`` corresponding to a metadata node with no
 entries. The existence of the ``!nonnull`` metadata on the
 instruction tells the optimizer that the value loaded is known to
-never be null.  This is analogous to the ''nonnull'' attribute
-on parameters and return values.  This metadata can only be applied
+never be null. This is analogous to the ``nonnull`` attribute
+on parameters and return values. This metadata can only be applied
 to loads of a pointer type.
 
-The optional ``!dereferenceable`` metadata must reference a single
-metadata name ``<index>`` corresponding to a metadata node with one ``i64``
-entry. The existence of the ``!dereferenceable`` metadata on the instruction 
+The optional ``!dereferenceable`` metadata must reference a single metadata
+name ``<deref_bytes_node>`` corresponding to a metadata node with one ``i64``
+entry. The existence of the ``!dereferenceable`` metadata on the instruction
 tells the optimizer that the value loaded is known to be dereferenceable.
-The number of bytes known to be dereferenceable is specified by the integer 
-value in the metadata node. This is analogous to the ''dereferenceable'' 
-attribute on parameters and return values.  This metadata can only be applied 
+The number of bytes known to be dereferenceable is specified by the integer
+value in the metadata node. This is analogous to the ''dereferenceable''
+attribute on parameters and return values. This metadata can only be applied
 to loads of a pointer type.
 
 The optional ``!dereferenceable_or_null`` metadata must reference a single
-metadata name ``<index>`` corresponding to a metadata node with one ``i64``
-entry. The existence of the ``!dereferenceable_or_null`` metadata on the 
+metadata name ``<deref_bytes_node>`` corresponding to a metadata node with one
+``i64`` entry. The existence of the ``!dereferenceable_or_null`` metadata on the
 instruction tells the optimizer that the value loaded is known to be either
 dereferenceable or null.
-The number of bytes known to be dereferenceable is specified by the integer 
-value in the metadata node. This is analogous to the ''dereferenceable_or_null'' 
-attribute on parameters and return values.  This metadata can only be applied 
+The number of bytes known to be dereferenceable is specified by the integer
+value in the metadata node. This is analogous to the ''dereferenceable_or_null''
+attribute on parameters and return values. This metadata can only be applied
 to loads of a pointer type.
 
+The optional ``!align`` metadata must reference a single metadata name
+``<align_node>`` corresponding to a metadata node with one ``i64`` entry.
+The existence of the ``!align`` metadata on the instruction tells the
+optimizer that the value loaded is known to be aligned to a boundary specified
+by the integer value in the metadata node. The alignment must be a power of 2.
+This is analogous to the ''align'' attribute on parameters and return values.
+This metadata can only be applied to loads of a pointer type.
+
 Semantics:
 """"""""""
 
@@ -6426,8 +6952,8 @@ Syntax:
 
 ::
 
-      store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]        ; yields void
-      store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment>  ; yields void
+      store [volatile] <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>][, !invariant.group !<index>]        ; yields void
+      store atomic [volatile] <ty> <value>, <ty>* <pointer> [singlethread] <ordering>, align <alignment> [, !invariant.group !<index>] ; yields void
 
 Overview:
 """""""""
@@ -6445,17 +6971,16 @@ then the optimizer is not allowed to modify the number or order of
 execution of this ``store`` with other :ref:`volatile
 operations <volatile>`.
 
-If the ``store`` is marked as ``atomic``, it takes an extra
-:ref:`ordering <ordering>` and optional ``singlethread`` argument. The
-``acquire`` and ``acq_rel`` orderings aren't valid on ``store``
-instructions. Atomic loads produce :ref:`defined <memmodel>` results
-when they may see multiple atomic stores. The type of the pointee must
-be an integer type whose bit width is a power of two greater than or
-equal to eight and less than or equal to a target-specific size limit.
-``align`` must be explicitly specified on atomic stores, and the store
-has undefined behavior if the alignment is not set to a value which is
-at least the size in bytes of the pointee. ``!nontemporal`` does not
-have any defined semantics for atomic stores.
+If the ``store`` is marked as ``atomic``, it takes an extra :ref:`ordering
+<ordering>` and optional ``singlethread`` argument. The ``acquire`` and
+``acq_rel`` orderings aren't valid on ``store`` instructions. Atomic loads
+produce :ref:`defined <memmodel>` results when they may see multiple atomic
+stores. The type of the pointee must be an integer, pointer, or floating-point
+type whose bit width is a power of two greater than or equal to eight and less
+than or equal to a target-specific size limit.  ``align`` must be explicitly
+specified on atomic stores, and the store has undefined behavior if the
+alignment is not set to a value which is at least the size in bytes of the
+pointee. ``!nontemporal`` does not have any defined semantics for atomic stores.
 
 The optional constant ``align`` argument specifies the alignment of the
 operation (that is, the alignment of the memory address). A value of 0
@@ -6474,6 +6999,9 @@ be reused in the cache. The code generator may select special
 instructions to save cache bandwidth, such as the MOVNT instruction on
 x86.
 
+The optional ``!invariant.group`` metadata must reference a 
+single metadata name ``<index>``. See ``invariant.group`` metadata.
+
 Semantics:
 """"""""""
 
@@ -6869,15 +7397,15 @@ will be effectively broadcast into a vector during address calculation.
      ; All arguments are vectors:
      ;   A[i] = ptrs[i] + offsets[i]*sizeof(i8)
      %A = getelementptr i8, <4 x i8*> %ptrs, <4 x i64> %offsets
-     
+
      ; Add the same scalar offset to each pointer of a vector:
      ;   A[i] = ptrs[i] + offset*sizeof(i8)
      %A = getelementptr i8, <4 x i8*> %ptrs, i64 %offset
-     
+
      ; Add distinct offsets to the same pointer:
      ;   A[i] = ptr + offsets[i]*sizeof(i8)
      %A = getelementptr i8, i8* %ptr, <4 x i64> %offsets
-     
+
      ; In all cases described above the type of the result is <4 x i8*>
 
 The two following instructions are equivalent:
@@ -6889,7 +7417,7 @@ The two following instructions are equivalent:
        <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
        <4 x i32> %ind4,
        <4 x i64> <i64 13, i64 13, i64 13, i64 13>
-     
+
      getelementptr  %struct.ST, <4 x %struct.ST*> %s, <4 x i64> %ind1,
        i32 2, i32 1, <4 x i32> %ind4, i64 13
 
@@ -7068,10 +7596,12 @@ implies that ``fptrunc`` cannot be used to make a *no-op cast*.
 Semantics:
 """"""""""
 
-The '``fptrunc``' instruction truncates a ``value`` from a larger
+The '``fptrunc``' instruction casts a ``value`` from a larger
 :ref:`floating point <t_floating>` type to a smaller :ref:`floating
-point <t_floating>` type. If the value cannot fit within the
-destination type, ``ty2``, then the results are undefined.
+point <t_floating>` type. If the value cannot fit (i.e. overflows) within the
+destination type, ``ty2``, then the results are undefined. If the cast produces
+an inexact result, how rounding is performed (e.g. truncation, also known as
+round to zero) is undefined.
 
 Example:
 """"""""
@@ -7403,7 +7933,7 @@ The '``bitcast``' instruction takes a value to cast, which must be a
 non-aggregate first class value, and a type to cast it to, which must
 also be a non-aggregate :ref:`first class <t_firstclass>` type. The
 bit sizes of ``value`` and the destination type, ``ty2``, must be
-identical.  If the source type is a pointer, the destination type must
+identical. If the source type is a pointer, the destination type must
 also be a pointer of the same size. This instruction supports bitwise
 conversion of vectors to integers and to vectors of other types (as
 long as they have the same size).
@@ -7800,7 +8330,8 @@ Syntax:
 
 ::
 
-      <result> = [tail | musttail] call [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
+      <result> = [tail | musttail | notail ] call [fast-math flags] [cconv] [ret attrs] <ty> [<fnty>*] <fnptrval>(<function args>) [fn attrs]
+                   [ operand bundles ]
 
 Overview:
 """""""""
@@ -7813,10 +8344,10 @@ Arguments:
 This instruction requires several arguments:
 
 #. The optional ``tail`` and ``musttail`` markers indicate that the optimizers
-   should perform tail call optimization.  The ``tail`` marker is a hint that
-   `can be ignored <CodeGenerator.html#sibcallopt>`_.  The ``musttail`` marker
+   should perform tail call optimization. The ``tail`` marker is a hint that
+   `can be ignored <CodeGenerator.html#sibcallopt>`_. The ``musttail`` marker
    means that the call must be tail call optimized in order for the program to
-   be correct.  The ``musttail`` marker provides these guarantees:
+   be correct. The ``musttail`` marker provides these guarantees:
 
    #. The call will not cause unbounded stack growth if it is part of a
       recursive cycle in the call graph.
@@ -7824,14 +8355,14 @@ This instruction requires several arguments:
       forwarded in place.
 
    Both markers imply that the callee does not access allocas or varargs from
-   the caller.  Calls marked ``musttail`` must obey the following additional
+   the caller. Calls marked ``musttail`` must obey the following additional
    rules:
 
    - The call must immediately precede a :ref:`ret <i_ret>` instruction,
      or a pointer bitcast followed by a ret instruction.
    - The ret instruction must return the (possibly bitcasted) value
      produced by the call or void.
-   - The caller and callee prototypes must match.  Pointer types of
+   - The caller and callee prototypes must match. Pointer types of
      parameters or return types may differ in pointee type, but not
      in address space.
    - The calling conventions of the caller and callee must match.
@@ -7852,6 +8383,15 @@ This instruction requires several arguments:
    -  `Platform-specific constraints are
       met. <CodeGenerator.html#tailcallopt>`_
 
+#. The optional ``notail`` marker indicates that the optimizers should not add
+   ``tail`` or ``musttail`` markers to the call. It is used to prevent tail
+   call optimization from being performed on the call.
+
+#. The optional ``fast-math flags`` marker indicates that the call has one or more 
+   :ref:`fast-math flags <fastmath>`, which are optimization hints to enable
+   otherwise unsafe floating-point optimizations. Fast-math flags are only valid
+   for calls that return a floating-point scalar or vector type.
+
 #. The optional "cconv" marker indicates which :ref:`calling
    convention <callingconv>` the call should use. If none is
    specified, the call defaults to using C calling conventions. The
@@ -7880,6 +8420,7 @@ This instruction requires several arguments:
 #. The optional :ref:`function attributes <fnattrs>` list. Only
    '``noreturn``', '``nounwind``', '``readonly``' and '``readnone``'
    attributes are valid here.
+#. The optional :ref:`operand bundles <opbundles>` list.
 
 Semantics:
 """"""""""
@@ -8049,6 +8590,84 @@ Example:
                catch i8** @_ZTIi
                filter [1 x i8**] [@_ZTId]
 
+.. _i_cleanuppad:
+
+'``cleanuppad``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      <resultval> = cleanuppad within <parent> [<args>*]
+
+Overview:
+"""""""""
+
+The '``cleanuppad``' instruction is used by `LLVM's exception handling
+system <ExceptionHandling.html#overview>`_ to specify that a basic block
+is a cleanup block --- one where a personality routine attempts to
+transfer control to run cleanup actions.
+The ``args`` correspond to whatever additional
+information the :ref:`personality function <personalityfn>` requires to
+execute the cleanup.
+The ``resultval`` has the type :ref:`token <t_token>` and is used to
+match the ``cleanuppad`` to corresponding :ref:`cleanuprets <i_cleanupret>`.
+The ``parent`` argument is the token of the funclet that contains the
+``cleanuppad`` instruction. If the ``cleanuppad`` is not inside a funclet,
+this operand may be the token ``none``.
+
+Arguments:
+""""""""""
+
+The instruction takes a list of arbitrary values which are interpreted
+by the :ref:`personality function <personalityfn>`.
+
+Semantics:
+""""""""""
+
+When the call stack is being unwound due to an exception being thrown,
+the :ref:`personality function <personalityfn>` transfers control to the
+``cleanuppad`` with the aid of the personality-specific arguments.
+As with calling conventions, how the personality function results are
+represented in LLVM IR is target specific.
+
+The ``cleanuppad`` instruction has several restrictions:
+
+-  A cleanup block is a basic block which is the unwind destination of
+   an exceptional instruction.
+-  A cleanup block must have a '``cleanuppad``' instruction as its
+   first non-PHI instruction.
+-  There can be only one '``cleanuppad``' instruction within the
+   cleanup block.
+-  A basic block that is not a cleanup block may not include a
+   '``cleanuppad``' instruction.
+
+Executing a ``cleanuppad`` instruction constitutes "entering" that pad.
+The pad may then be "exited" in one of three ways:
+
+1)  explicitly via a ``cleanupret`` that consumes it.  Executing such a ``cleanupret``
+    is undefined behavior if any descendant pads have been entered but not yet
+    exited.
+2)  implicitly via a call (which unwinds all the way to the current function's caller),
+    or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller.
+3)  implicitly via an unwind edge whose destination EH pad isn't a descendant of
+    the ``cleanuppad``.  When the ``cleanuppad`` is exited in this manner, it is
+    undefined behavior if the destination EH pad has a parent which is not an
+    ancestor of the ``cleanuppad`` being exited.
+
+It is undefined behavior for the ``cleanuppad`` to exit via an unwind edge which
+does not transitively unwind to the same destination as a constituent
+``cleanupret``.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+      %tok = cleanuppad within %cs []
+
 .. _intrinsics:
 
 Intrinsic Functions
@@ -8265,11 +8884,11 @@ Experimental Statepoint Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 LLVM provides an second experimental set of intrinsics for describing garbage
-collection safepoints in compiled code.  These intrinsics are an alternative
+collection safepoints in compiled code. These intrinsics are an alternative
 to the ``llvm.gcroot`` intrinsics, but are compatible with the ones for
-:ref:`read <int_gcread>` and :ref:`write <int_gcwrite>` barriers.  The
+:ref:`read <int_gcread>` and :ref:`write <int_gcwrite>` barriers. The
 differences in approach are covered in the `Garbage Collection with LLVM
-<GarbageCollection.html>`_ documentation.  The intrinsics themselves are
+<GarbageCollection.html>`_ documentation. The intrinsics themselves are
 described in :doc:`Statepoints`.
 
 .. _int_gcroot:
@@ -8613,6 +9232,48 @@ Semantics:
 
 See the description for :ref:`llvm.stacksave <int_stacksave>`.
 
+.. _int_get_dynamic_area_offset:
+
+'``llvm.get.dynamic.area.offset``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 @llvm.get.dynamic.area.offset.i32()
+      declare i64 @llvm.get.dynamic.area.offset.i64()
+
+      Overview:
+      """""""""
+
+      The '``llvm.get.dynamic.area.offset.*``' intrinsic family is used to
+      get the offset from native stack pointer to the address of the most
+      recent dynamic alloca on the caller's stack. These intrinsics are
+      intendend for use in combination with
+      :ref:`llvm.stacksave <int_stacksave>` to get a
+      pointer to the most recent dynamic alloca. This is useful, for example,
+      for AddressSanitizer's stack unpoisoning routines.
+
+Semantics:
+""""""""""
+
+      These intrinsics return a non-negative integer value that can be used to
+      get the address of the most recent dynamic alloca, allocated by :ref:`alloca <i_alloca>`
+      on the caller's stack. In particular, for targets where stack grows downwards,
+      adding this offset to the native stack pointer would get the address of the most
+      recent dynamic alloca. For targets where stack grows upwards, the situation is a bit more
+      complicated, because substracting this value from stack pointer would get the address
+      one past the end of the most recent dynamic alloca.
+
+      Although for most targets `llvm.get.dynamic.area.offset <int_get_dynamic_area_offset>`
+      returns just a zero, for others, such as PowerPC and PowerPC64, it returns a
+      compile-time-known constant value.
+
+      The return value type of :ref:`llvm.get.dynamic.area.offset <int_get_dynamic_area_offset>`
+      must match the target's generic address space's (address space 0) pointer type.
+
 '``llvm.prefetch``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -8791,6 +9452,55 @@ structures and the code to increment the appropriate value, in a
 format that can be written out by a compiler runtime and consumed via
 the ``llvm-profdata`` tool.
 
+'``llvm.instrprof_value_profile``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare void @llvm.instrprof_value_profile(i8* <name>, i64 <hash>,
+                                                 i64 <value>, i32 <value_kind>,
+                                                 i32 <index>)
+
+Overview:
+"""""""""
+
+The '``llvm.instrprof_value_profile``' intrinsic can be emitted by a
+frontend for use with instrumentation based profiling. This will be
+lowered by the ``-instrprof`` pass to find out the target values,
+instrumented expressions take in a program at runtime.
+
+Arguments:
+""""""""""
+
+The first argument is a pointer to a global variable containing the
+name of the entity being instrumented. ``name`` should generally be the
+(mangled) function name for a set of counters.
+
+The second argument is a hash value that can be used by the consumer
+of the profile data to detect changes to the instrumented source. It
+is an error if ``hash`` differs between two instances of
+``llvm.instrprof_*`` that refer to the same name.
+
+The third argument is the value of the expression being profiled. The profiled
+expression's value should be representable as an unsigned 64-bit value. The
+fourth argument represents the kind of value profiling that is being done. The
+supported value profiling kinds are enumerated through the
+``InstrProfValueKind`` type declared in the
+``<include/llvm/ProfileData/InstrProf.h>`` header file. The last argument is the
+index of the instrumented expression within ``name``. It should be >= 0.
+
+Semantics:
+""""""""""
+
+This intrinsic represents the point where a call to a runtime routine
+should be inserted for value profiling of target expressions. ``-instrprof``
+pass will generate the appropriate data structures and replace the
+``llvm.instrprof_value_profile`` intrinsic with the call to the profile
+runtime library with proper arguments.
+
 Standard C Library Intrinsics
 -----------------------------
 
@@ -9734,6 +10444,34 @@ Bit Manipulation Intrinsics
 LLVM provides intrinsics for a few important bit manipulation
 operations. These allow efficient code generation for some algorithms.
 
+'``llvm.bitreverse.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic function. You can use bitreverse on any
+integer type.
+
+::
+
+      declare i16 @llvm.bitreverse.i16(i16 <id>)
+      declare i32 @llvm.bitreverse.i32(i32 <id>)
+      declare i64 @llvm.bitreverse.i64(i64 <id>)
+
+Overview:
+"""""""""
+
+The '``llvm.bitreverse``' family of intrinsics is used to reverse the
+bitpattern of an integer value; for example ``0b1234567`` becomes
+``0b7654321``.
+
+Semantics:
+""""""""""
+
+The ``llvm.bitreverse.iN`` intrinsic returns an i16 value that has bit
+``M`` in the input moved to bit ``N-M`` in the output.
+
 '``llvm.bswap.*``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -10225,23 +10963,23 @@ Overview:
 """""""""
 
 The '``llvm.canonicalize.*``' intrinsic returns the platform specific canonical
-encoding of a floating point number.  This canonicalization is useful for
+encoding of a floating point number. This canonicalization is useful for
 implementing certain numeric primitives such as frexp. The canonical encoding is
 defined by IEEE-754-2008 to be:
 
 ::
 
       2.1.8 canonical encoding: The preferred encoding of a floating-point
-      representation in a format.  Applied to declets, significands of finite
+      representation in a format. Applied to declets, significands of finite
       numbers, infinities, and NaNs, especially in decimal formats.
 
 This operation can also be considered equivalent to the IEEE-754-2008
-conversion of a floating-point value to the same format.  NaNs are handled
+conversion of a floating-point value to the same format. NaNs are handled
 according to section 6.2.
 
 Examples of non-canonical encodings:
 
-- x87 pseudo denormals, pseudo NaNs, pseudo Infinity, Unnormals.  These are
+- x87 pseudo denormals, pseudo NaNs, pseudo Infinity, Unnormals. These are
   converted to a canonical representation per hardware-specific protocol.
 - Many normal decimal floating point numbers have non-canonical alternative
   encodings.
@@ -10254,11 +10992,11 @@ default exception handling must signal an invalid exception, and produce a
 quiet NaN result.
 
 This function should always be implementable as multiplication by 1.0, provided
-that the compiler does not constant fold the operation.  Likewise, division by
-1.0 and ``llvm.minnum(x, x)`` are possible implementations.  Addition with
+that the compiler does not constant fold the operation. Likewise, division by
+1.0 and ``llvm.minnum(x, x)`` are possible implementations. Addition with
 -0.0 is also sufficient provided that the rounding mode is not -Infinity.
 
-``@llvm.canonicalize`` must preserve the equality relation.  That is:
+``@llvm.canonicalize`` must preserve the equality relation. That is:
 
 - ``(@llvm.canonicalize(x) == x)`` is equivalent to ``(x == x)``
 - ``(@llvm.canonicalize(x) == @llvm.canonicalize(y))`` is equivalent to
@@ -10269,15 +11007,15 @@ Additionally, the sign of zero must be conserved:
 
 The payload bits of a NaN must be conserved, with two exceptions.
 First, environments which use only a single canonical representation of NaN
-must perform said canonicalization.  Second, SNaNs must be quieted per the
+must perform said canonicalization. Second, SNaNs must be quieted per the
 usual methods.
 
 The canonicalization operation may be optimized away if:
 
-- The input is known to be canonical.  For example, it was produced by a
+- The input is known to be canonical. For example, it was produced by a
   floating-point operation that is required by the standard to be canonical.
 - The result is consumed only by (or fused with) other floating-point
-  operations.  That is, the bits of the floating point value are not examined.
+  operations. That is, the bits of the floating point value are not examined.
 
 '``llvm.fmuladd.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -10566,12 +11304,16 @@ LLVM provides intrinsics for predicated vector load and store operations. The pr
 
 Syntax:
 """""""
-This is an overloaded intrinsic. The loaded data is a vector of any integer or floating point data type.
+This is an overloaded intrinsic. The loaded data is a vector of any integer, floating point or pointer data type.
 
 ::
 
-      declare <16 x float> @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
-      declare <2 x double> @llvm.masked.load.v2f64  (<2 x double>* <ptr>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      declare <16 x float>  @llvm.masked.load.v16f32 (<16 x float>* <ptr>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+      declare <2 x double>  @llvm.masked.load.v2f64  (<2 x double>* <ptr>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      ;; The data is a vector of pointers to double
+      declare <8 x double*> @llvm.masked.load.v8p0f64    (<8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x double*> <passthru>)
+      ;; The data is a vector of function pointers
+      declare <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f (<8 x i32 ()*>* <ptr>, i32 <alignment>, <8 x i1> <mask>, <8 x i32 ()*> <passthru>)
 
 Overview:
 """""""""
@@ -10607,12 +11349,16 @@ The result of this operation is equivalent to a regular vector load instruction
 
 Syntax:
 """""""
-This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type.
+This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type.
 
 ::
 
-       declare void @llvm.masked.store.v8i32 (<8 x i32>  <value>, <8 x i32> * <ptr>, i32 <alignment>,  <8 x i1>  <mask>)
-       declare void @llvm.masked.store.v16f32(<16 x i32> <value>, <16 x i32>* <ptr>, i32 <alignment>,  <16 x i1> <mask>)
+       declare void @llvm.masked.store.v8i32  (<8  x i32>   <value>, <8  x i32>*   <ptr>, i32 <alignment>,  <8  x i1> <mask>)
+       declare void @llvm.masked.store.v16f32 (<16 x float> <value>, <16 x float>* <ptr>, i32 <alignment>,  <16 x i1> <mask>)
+       ;; The data is a vector of pointers to double
+       declare void @llvm.masked.store.v8p0f64    (<8 x double*> <value>, <8 x double*>* <ptr>, i32 <alignment>, <8 x i1> <mask>)
+       ;; The data is a vector of function pointers
+       declare void @llvm.masked.store.v4p0f_i32f (<4 x i32 ()*> <value>, <4 x i32 ()*>* <ptr>, i32 <alignment>, <4 x i1> <mask>)
 
 Overview:
 """""""""
@@ -10653,12 +11399,13 @@ LLVM provides intrinsics for vector gather and scatter operations. They are simi
 
 Syntax:
 """""""
-This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer or floating point data type gathered together into one vector.
+This is an overloaded intrinsic. The loaded data are multiple scalar values of any integer, floating point or pointer data type gathered together into one vector.
 
 ::
 
-      declare <16 x float> @llvm.masked.gather.v16f32 (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
-      declare <2 x double> @llvm.masked.gather.v2f64  (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      declare <16 x float> @llvm.masked.gather.v16f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+      declare <2 x double> @llvm.masked.gather.v2f64    (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      declare <8 x float*> @llvm.masked.gather.v8p0f32  (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
 
 Overview:
 """""""""
@@ -10706,12 +11453,13 @@ The semantics of this operation are equivalent to a sequence of conditional scal
 
 Syntax:
 """""""
-This is an overloaded intrinsic. The data stored in memory is a vector of any integer or floating point data type. Each vector element is stored in an arbitrary memory addresses. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
+This is an overloaded intrinsic. The data stored in memory is a vector of any integer, floating point or pointer data type. Each vector element is stored in an arbitrary memory address. Scatter with overlapping addresses is guaranteed to be ordered from least-significant to most-significant element.
 
 ::
 
-       declare void @llvm.masked.scatter.v8i32 (<8 x i32>  <value>, <8 x i32*>  <ptrs>, i32 <alignment>,  <8 x i1>  <mask>)
-       declare void @llvm.masked.scatter.v16f32(<16 x i32> <value>, <16 x i32*> <ptrs>, i32 <alignment>,  <16 x i1> <mask>)
+       declare void @llvm.masked.scatter.v8i32   (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
+       declare void @llvm.masked.scatter.v16f32  (<16 x float>  <value>, <16 x float*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+       declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
 
 Overview:
 """""""""
@@ -10727,7 +11475,7 @@ The first operand is a vector value to be written to memory. The second operand
 Semantics:
 """"""""""
 
-The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergency. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations.
+The '``llvm.masked.scatter``' intrinsics is designed for writing selected vector elements to arbitrary memory addresses in a single IR operation. The operation may be conditional, when not all bits in the mask are switched on. It is useful for targets that support vector masked scatter and allows vectorizing basic blocks with data and control divergence. Other targets may support this intrinsic differently, for example by lowering it into a sequence of branches that guard scalar store operations.
 
 ::
 
@@ -10881,6 +11629,36 @@ Semantics:
 
 This intrinsic indicates that the memory is mutable again.
 
+'``llvm.invariant.group.barrier``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i8* @llvm.invariant.group.barrier(i8* <ptr>)
+
+Overview:
+"""""""""
+
+The '``llvm.invariant.group.barrier``' intrinsic can be used when an invariant 
+established by invariant.group metadata no longer holds, to obtain a new pointer
+value that does not carry the invariant information.
+
+
+Arguments:
+""""""""""
+
+The ``llvm.invariant.group.barrier`` takes only one argument, which is
+the pointer to the memory for which the ``invariant.group`` no longer holds.
+
+Semantics:
+""""""""""
+
+Returns another pointer that aliases its argument but which is considered different 
+for the purposes of ``load``/``store`` ``invariant.group`` metadata.
+
 General Intrinsics
 ------------------
 
@@ -11253,7 +12031,7 @@ Arguments:
 """"""""""
 
 The first argument is a pointer to be tested. The second argument is a
-metadata string containing the name of a :doc:`bitset <BitSets>`.
+metadata object representing an identifier for a :doc:`bitset <BitSets>`.
 
 Overview:
 """""""""
diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst
index 1ac75a406985..84adff3616f7 100644
--- a/docs/LibFuzzer.rst
+++ b/docs/LibFuzzer.rst
@@ -21,7 +21,8 @@ This library is intended primarily for in-process coverage-guided fuzz testing
   optimizations options (e.g. -O0, -O1, -O2) to diversify testing.
 * Build a test driver using the same options as the library.
   The test driver is a C/C++ file containing interesting calls to the library
-  inside a single function  ``extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);``
+  inside a single function  ``extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);``.
+  Currently, the only expected return value is 0, others are reserved for future.
 * Link the Fuzzer, the library and the driver together into an executable
   using the same sanitizer options as for the library.
 * Collect the initial corpus of inputs for the
@@ -60,14 +61,18 @@ The most important flags are::
   cross_over                         	1	If 1, cross over inputs.
   mutate_depth                       	5	Apply this number of consecutive mutations to each input.
   timeout                            	1200	Timeout in seconds (if positive). If one unit runs more than this number of seconds the process will abort.
+  max_total_time                        0       If positive, indicates the maximal total time in seconds to run the fuzzer.
   help                               	0	Print help.
-  save_minimized_corpus              	0	If 1, the minimized corpus is saved into the first input directory
+  merge                                 0       If 1, the 2-nd, 3-rd, etc corpora will be merged into the 1-st corpus. Only interesting units will be taken.
   jobs                               	0	Number of jobs to run. If jobs >= 1 we spawn this number of jobs in separate worker processes with stdout/stderr redirected to fuzz-JOB.log.
   workers                            	0	Number of simultaneous worker processes to run the jobs. If zero, "min(jobs,NumberOfCpuCores()/2)" is used.
-  tokens                             	0	Use the file with tokens (one token per line) to fuzz a token based input language.
-  apply_tokens                       	0	Read the given input file, substitute bytes  with tokens and write the result to stdout.
   sync_command                       	0	Execute an external command "<sync_command> <test_corpus>" to synchronize the test corpus.
   sync_timeout                       	600	Minimum timeout between syncs.
+  use_traces                            0       Experimental: use instruction traces
+  only_ascii                            0       If 1, generate only ASCII (isprint+isspace) inputs.
+  test_single_input                     ""      Use specified file content as test input. Test will be run only once. Useful for debugging a particular case.
+  artifact_prefix                       ""      Write fuzzing artifacts (crash, timeout, or slow inputs) as $(artifact_prefix)file
+  exact_artifact_path                   ""      Write the single artifact on failure (crash, timeout) as $(exact_artifact_path). This overrides -artifact_prefix and will not use checksum in the file name. Do not use the same path for several parallel processes.
 
 For the full list of flags run the fuzzer binary with ``-help=1``.
 
@@ -80,11 +85,14 @@ Toy example
 A simple function that does something interesting if it receives the input "HI!"::
 
   cat << EOF >> test_fuzzer.cc
-  extern "C" void LLVMFuzzerTestOneInput(const unsigned char *data, unsigned long size) {
+  #include <stdint.h>
+  #include <stddef.h>
+  extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
     if (size > 0 && data[0] == 'H')
       if (size > 1 && data[1] == 'I')
          if (size > 2 && data[2] == '!')
          __builtin_trap();
+    return 0;
   }
   EOF
   # Get lib/Fuzzer. Assuming that you already have fresh clang in PATH.
@@ -115,9 +123,10 @@ Here we show how to use lib/Fuzzer on something real, yet simple: pcre2_::
   # Build the actual function that does something interesting with PCRE2.
   cat << EOF > pcre_fuzzer.cc
   #include <string.h>
+  #include <stdint.h>
   #include "pcre2posix.h"
-  extern "C" void LLVMFuzzerTestOneInput(const unsigned char *data, size_t size) {
-    if (size < 1) return;
+  extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    if (size < 1) return 0;
     char *str = new char[size+1];
     memcpy(str, data, size);
     str[size] = 0;
@@ -127,6 +136,7 @@ Here we show how to use lib/Fuzzer on something real, yet simple: pcre2_::
       regfree(&preg);
     }
     delete [] str;
+    return 0;
   }
   EOF
   clang++ -g -fsanitize=address $COV_FLAGS -c -std=c++11  -I inst/include/ pcre_fuzzer.cc
@@ -213,6 +223,9 @@ to find Heartbleed with LibFuzzer::
   #include <openssl/ssl.h>
   #include <openssl/err.h>
   #include <assert.h>
+  #include <stdint.h>
+  #include <stddef.h>
+
   SSL_CTX *sctx;
   int Init() {
     SSL_library_init();
@@ -224,7 +237,7 @@ to find Heartbleed with LibFuzzer::
     assert (SSL_CTX_use_PrivateKey_file(sctx, "server.key", SSL_FILETYPE_PEM));
     return 0;
   }
-  extern "C" void LLVMFuzzerTestOneInput(unsigned char *Data, size_t Size) {
+  extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     static int unused = Init();
     SSL *server = SSL_new(sctx);
     BIO *sinbio = BIO_new(BIO_s_mem());
@@ -234,9 +247,10 @@ to find Heartbleed with LibFuzzer::
     BIO_write(sinbio, Data, Size);
     SSL_do_handshake(server);
     SSL_free(server);
+    return 0;
   }
   EOF
-  # Build the fuzzer. 
+  # Build the fuzzer.
   clang++ -g handshake-fuzz.cc  -fsanitize=address \
     openssl-1.0.1f/libssl.a openssl-1.0.1f/libcrypto.a Fuzzer*.o
   # Run 20 independent fuzzer jobs.
@@ -252,26 +266,43 @@ Voila::
       #1 0x4db504 in tls1_process_heartbeat openssl-1.0.1f/ssl/t1_lib.c:2586:3
       #2 0x580be3 in ssl3_read_bytes openssl-1.0.1f/ssl/s3_pkt.c:1092:4
 
+Note: a `similar fuzzer <https://boringssl.googlesource.com/boringssl/+/HEAD/FUZZING.md>`_
+is now a part of the boringssl source tree.
+
 Advanced features
 =================
 
-Tokens
-------
+Dictionaries
+------------
+*EXPERIMENTAL*.
+LibFuzzer supports user-supplied dictionaries with input language keywords
+or other interesting byte sequences (e.g. multi-byte magic values).
+Use ``-dict=DICTIONARY_FILE``. For some input languages using a dictionary
+may significantly improve the search speed.
+The dictionary syntax is similar to that used by AFL_ for its ``-x`` option::
 
-By default, the fuzzer is not aware of complexities of the input language
-and when fuzzing e.g. a C++ parser it will mostly stress the lexer.
-It is very hard for the fuzzer to come up with something like ``reinterpret_cast<int>``
-from a test corpus that doesn't have it.
-See a detailed discussion of this topic at
-http://lcamtuf.blogspot.com/2015/01/afl-fuzz-making-up-grammar-with.html.
+  # Lines starting with '#' and empty lines are ignored.
 
-lib/Fuzzer implements a simple technique that allows to fuzz input languages with
-long tokens. All you need is to prepare a text file containing up to 253 tokens, one token per line,
-and pass it to the fuzzer as ``-tokens=TOKENS_FILE.txt``.
-Three implicit tokens are added: ``" "``, ``"\t"``, and ``"\n"``.
-The fuzzer itself will still be mutating a string of bytes
-but before passing this input to the target library it will replace every byte ``b`` with the ``b``-th token.
-If there are less than ``b`` tokens, a space will be added instead.
+  # Adds "blah" (w/o quotes) to the dictionary.
+  kw1="blah"
+  # Use \\ for backslash and \" for quotes.
+  kw2="\"ac\\dc\""
+  # Use \xAB for hex values
+  kw3="\xF7\xF8"
+  # the name of the keyword followed by '=' may be omitted:
+  "foo\x0Abar"
+
+Data-flow-guided fuzzing
+------------------------
+
+*EXPERIMENTAL*.
+With an additional compiler flag ``-fsanitize-coverage=trace-cmp`` (see SanitizerCoverageTraceDataFlow_)
+and extra run-time flag ``-use_traces=1`` the fuzzer will try to apply *data-flow-guided fuzzing*.
+That is, the fuzzer will record the inputs to comparison instructions, switch statements,
+and several libc functions (``memcmp``, ``strcmp``, ``strncmp``, etc).
+It will later use those recorded inputs during mutations.
+
+This mode can be combined with DataFlowSanitizer_ to achieve better sensitivity.
 
 AFL compatibility
 -----------------
@@ -321,18 +352,38 @@ Build (make sure to use fresh clang as the host compiler)::
 
 Optionally build other kinds of binaries (asan+Debug, msan, ubsan, etc).
 
-TODO: commit the pre-fuzzed corpus to svn (?).
-
 Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23052
 
 clang-fuzzer
 ------------
 
-The default behavior is very similar to ``clang-format-fuzzer``.
-Clang can also be fuzzed with Tokens_ using ``-tokens=$LLVM/lib/Fuzzer/cxx_fuzzer_tokens.txt`` option.
+The behavior is very similar to ``clang-format-fuzzer``.
 
 Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=23057
 
+llvm-as-fuzzer
+--------------
+
+Tracking bug: https://llvm.org/bugs/show_bug.cgi?id=24639
+
+llvm-mc-fuzzer
+--------------
+
+This tool fuzzes the MC layer. Currently it is only able to fuzz the
+disassembler but it is hoped that assembly, and round-trip verification will be
+added in future.
+
+When run in dissassembly mode, the inputs are opcodes to be disassembled. The
+fuzzer will consume as many instructions as possible and will stop when it
+finds an invalid instruction or runs out of data.
+
+Please note that the command line interface differs slightly from that of other
+fuzzers. The fuzzer arguments should follow ``--fuzzer-args`` and should have
+a single dash, while other arguments control the operation mode and target in a
+similar manner to ``llvm-mc`` and should have two dashes. For example::
+
+  llvm-mc-fuzzer --triple=aarch64-linux-gnu --disassemble --fuzzer-args -max_len=4 -jobs=10
+
 Buildbot
 --------
 
@@ -348,7 +399,7 @@ The corpuses are stored in git on github and can be used like this::
   git clone https://github.com/kcc/fuzzing-with-sanitizers.git
   bin/clang-format-fuzzer fuzzing-with-sanitizers/llvm/clang-format/C1
   bin/clang-fuzzer        fuzzing-with-sanitizers/llvm/clang/C1/
-  bin/clang-fuzzer        fuzzing-with-sanitizers/llvm/clang/TOK1  -tokens=$LLVM/llvm/lib/Fuzzer/cxx_fuzzer_tokens.txt
+  bin/llvm-as-fuzzer      fuzzing-with-sanitizers/llvm/llvm-as/C1  -only_ascii=1
 
 
 FAQ
@@ -407,11 +458,46 @@ small inputs, each input takes < 1ms to run, and the library code is not expecte
 to crash on invalid inputs.
 Examples: regular expression matchers, text or binary format parsers.
 
+Trophies
+========
+* GLIBC: https://sourceware.org/glibc/wiki/FuzzingLibc
+
+* MUSL LIBC:
+
+  * http://git.musl-libc.org/cgit/musl/commit/?id=39dfd58417ef642307d90306e1c7e50aaec5a35c
+  * http://www.openwall.com/lists/oss-security/2015/03/30/3
+
+* `pugixml <https://github.com/zeux/pugixml/issues/39>`_
+
+* PCRE: Search for "LLVM fuzzer" in http://vcs.pcre.org/pcre2/code/trunk/ChangeLog?view=markup;
+  also in `bugzilla <https://bugs.exim.org/buglist.cgi?bug_status=__all__&content=libfuzzer&no_redirect=1&order=Importance&product=PCRE&query_format=specific>`_
+
+* `ICU <http://bugs.icu-project.org/trac/ticket/11838>`_
+
+* `Freetype <https://savannah.nongnu.org/search/?words=LibFuzzer&type_of_search=bugs&Search=Search&exact=1#options>`_
+
+* `Harfbuzz <https://github.com/behdad/harfbuzz/issues/139>`_
+
+* `SQLite <http://www3.sqlite.org/cgi/src/info/088009efdd56160b>`_
+
+* `Python <http://bugs.python.org/issue25388>`_
+
+* OpenSSL/BoringSSL: `[1] <https://boringssl.googlesource.com/boringssl/+/cb852981cd61733a7a1ae4fd8755b7ff950e857d>`_
+
+* `Libxml2
+  <https://bugzilla.gnome.org/buglist.cgi?bug_status=__all__&content=libFuzzer&list_id=68957&order=Importance&product=libxml2&query_format=specific>`_
+
+* `Linux Kernel's BPF verifier <https://github.com/iovisor/bpf-fuzzer>`_
+
+* LLVM: `Clang <https://llvm.org/bugs/show_bug.cgi?id=23057>`_, `Clang-format <https://llvm.org/bugs/show_bug.cgi?id=23052>`_, `libc++ <https://llvm.org/bugs/show_bug.cgi?id=24411>`_, `llvm-as <https://llvm.org/bugs/show_bug.cgi?id=24639>`_, Disassembler: http://reviews.llvm.org/rL247405, http://reviews.llvm.org/rL247414, http://reviews.llvm.org/rL247416, http://reviews.llvm.org/rL247417, http://reviews.llvm.org/rL247420, http://reviews.llvm.org/rL247422.
+
 .. _pcre2: http://www.pcre.org/
 
 .. _AFL: http://lcamtuf.coredump.cx/afl/
 
 .. _SanitizerCoverage: http://clang.llvm.org/docs/SanitizerCoverage.html
+.. _SanitizerCoverageTraceDataFlow: http://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
+.. _DataFlowSanitizer: http://clang.llvm.org/docs/DataFlowSanitizer.html
 
 .. _Heartbleed: http://en.wikipedia.org/wiki/Heartbleed
 
diff --git a/docs/MIRLangRef.rst b/docs/MIRLangRef.rst
new file mode 100644
index 000000000000..a5f8c8c743ab
--- /dev/null
+++ b/docs/MIRLangRef.rst
@@ -0,0 +1,495 @@
+========================================
+Machine IR (MIR) Format Reference Manual
+========================================
+
+.. contents::
+   :local:
+
+.. warning::
+  This is a work in progress.
+
+Introduction
+============
+
+This document is a reference manual for the Machine IR (MIR) serialization
+format. MIR is a human readable serialization format that is used to represent
+LLVM's :ref:`machine specific intermediate representation
+<machine code representation>`.
+
+The MIR serialization format is designed to be used for testing the code
+generation passes in LLVM.
+
+Overview
+========
+
+The MIR serialization format uses a YAML container. YAML is a standard
+data serialization language, and the full YAML language spec can be read at
+`yaml.org
+<http://www.yaml.org/spec/1.2/spec.html#Introduction>`_.
+
+A MIR file is split up into a series of `YAML documents`_. The first document
+can contain an optional embedded LLVM IR module, and the rest of the documents
+contain the serialized machine functions.
+
+.. _YAML documents: http://www.yaml.org/spec/1.2/spec.html#id2800132
+
+MIR Testing Guide
+=================
+
+You can use the MIR format for testing in two different ways:
+
+- You can write MIR tests that invoke a single code generation pass using the
+  ``run-pass`` option in llc.
+
+- You can use llc's ``stop-after`` option with existing or new LLVM assembly
+  tests and check the MIR output of a specific code generation pass.
+
+Testing Individual Code Generation Passes
+-----------------------------------------
+
+The ``run-pass`` option in llc allows you to create MIR tests that invoke
+just a single code generation pass. When this option is used, llc will parse
+an input MIR file, run the specified code generation pass, and print the
+resulting MIR to the standard output stream.
+
+You can generate an input MIR file for the test by using the ``stop-after``
+option in llc. For example, if you would like to write a test for the
+post register allocation pseudo instruction expansion pass, you can specify
+the machine copy propagation pass in the ``stop-after`` option, as it runs
+just before the pass that we are trying to test:
+
+   ``llc -stop-after machine-cp bug-trigger.ll > test.mir``
+
+After generating the input MIR file, you'll have to add a run line that uses
+the ``-run-pass`` option to it. In order to test the post register allocation
+pseudo instruction expansion pass on X86-64, a run line like the one shown
+below can be used:
+
+    ``# RUN: llc -run-pass postrapseudos -march=x86-64 %s -o /dev/null | FileCheck %s``
+
+The MIR files are target dependent, so they have to be placed in the target
+specific test directories. They also need to specify a target triple or a
+target architecture either in the run line or in the embedded LLVM IR module.
+
+Limitations
+-----------
+
+Currently the MIR format has several limitations in terms of which state it
+can serialize:
+
+- The target-specific state in the target-specific ``MachineFunctionInfo``
+  subclasses isn't serialized at the moment.
+
+- The target-specific ``MachineConstantPoolValue`` subclasses (in the ARM and
+  SystemZ backends) aren't serialized at the moment.
+
+- The ``MCSymbol`` machine operands are only printed, they can't be parsed.
+
+- A lot of the state in ``MachineModuleInfo`` isn't serialized - only the CFI
+  instructions and the variable debug information from MMI is serialized right
+  now.
+
+These limitations impose restrictions on what you can test with the MIR format.
+For now, tests that would like to test some behaviour that depends on the state
+of certain ``MCSymbol``  operands or the exception handling state in MMI, can't
+use the MIR format. As well as that, tests that test some behaviour that
+depends on the state of the target specific ``MachineFunctionInfo`` or
+``MachineConstantPoolValue`` subclasses can't use the MIR format at the moment.
+
+High Level Structure
+====================
+
+.. _embedded-module:
+
+Embedded Module
+---------------
+
+When the first YAML document contains a `YAML block literal string`_, the MIR
+parser will treat this string as an LLVM assembly language string that
+represents an embedded LLVM IR module.
+Here is an example of a YAML document that contains an LLVM module:
+
+.. code-block:: llvm
+
+     --- |
+       define i32 @inc(i32* %x) {
+       entry:
+         %0 = load i32, i32* %x
+         %1 = add i32 %0, 1
+         store i32 %1, i32* %x
+         ret i32 %1
+       }
+     ...
+
+.. _YAML block literal string: http://www.yaml.org/spec/1.2/spec.html#id2795688
+
+Machine Functions
+-----------------
+
+The remaining YAML documents contain the machine functions. This is an example
+of such YAML document:
+
+.. code-block:: llvm
+
+     ---
+     name:            inc
+     tracksRegLiveness: true
+     liveins:
+       - { reg: '%rdi' }
+     body: |
+       bb.0.entry:
+         liveins: %rdi
+
+         %eax = MOV32rm %rdi, 1, _, 0, _
+         %eax = INC32r killed %eax, implicit-def dead %eflags
+         MOV32mr killed %rdi, 1, _, 0, _, %eax
+         RETQ %eax
+     ...
+
+The document above consists of attributes that represent the various
+properties and data structures in a machine function.
+
+The attribute ``name`` is required, and its value should be identical to the
+name of a function that this machine function is based on.
+
+The attribute ``body`` is a `YAML block literal string`_. Its value represents
+the function's machine basic blocks and their machine instructions.
+
+Machine Instructions Format Reference
+=====================================
+
+The machine basic blocks and their instructions are represented using a custom,
+human readable serialization language. This language is used in the
+`YAML block literal string`_ that corresponds to the machine function's body.
+
+A source string that uses this language contains a list of machine basic
+blocks, which are described in the section below.
+
+Machine Basic Blocks
+--------------------
+
+A machine basic block is defined in a single block definition source construct
+that contains the block's ID.
+The example below defines two blocks that have an ID of zero and one:
+
+.. code-block:: llvm
+
+    bb.0:
+      <instructions>
+    bb.1:
+      <instructions>
+
+A machine basic block can also have a name. It should be specified after the ID
+in the block's definition:
+
+.. code-block:: llvm
+
+    bb.0.entry:       ; This block's name is "entry"
+       <instructions>
+
+The block's name should be identical to the name of the IR block that this
+machine block is based on.
+
+Block References
+^^^^^^^^^^^^^^^^
+
+The machine basic blocks are identified by their ID numbers. Individual
+blocks are referenced using the following syntax:
+
+.. code-block:: llvm
+
+    %bb.<id>[.<name>]
+
+Examples:
+
+.. code-block:: llvm
+
+    %bb.0
+    %bb.1.then
+
+Successors
+^^^^^^^^^^
+
+The machine basic block's successors have to be specified before any of the
+instructions:
+
+.. code-block:: llvm
+
+    bb.0.entry:
+      successors: %bb.1.then, %bb.2.else
+      <instructions>
+    bb.1.then:
+      <instructions>
+    bb.2.else:
+      <instructions>
+
+The branch weights can be specified in brackets after the successor blocks.
+The example below defines a block that has two successors with branch weights
+of 32 and 16:
+
+.. code-block:: llvm
+
+    bb.0.entry:
+      successors: %bb.1.then(32), %bb.2.else(16)
+
+.. _bb-liveins:
+
+Live In Registers
+^^^^^^^^^^^^^^^^^
+
+The machine basic block's live in registers have to be specified before any of
+the instructions:
+
+.. code-block:: llvm
+
+    bb.0.entry:
+      liveins: %edi, %esi
+
+The list of live in registers and successors can be empty. The language also
+allows multiple live in register and successor lists - they are combined into
+one list by the parser.
+
+Miscellaneous Attributes
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The attributes ``IsAddressTaken``, ``IsLandingPad`` and ``Alignment`` can be
+specified in brackets after the block's definition:
+
+.. code-block:: llvm
+
+    bb.0.entry (address-taken):
+      <instructions>
+    bb.2.else (align 4):
+      <instructions>
+    bb.3(landing-pad, align 4):
+      <instructions>
+
+.. TODO: Describe the way the reference to an unnamed LLVM IR block can be
+   preserved.
+
+Machine Instructions
+--------------------
+
+A machine instruction is composed of a name,
+:ref:`machine operands <machine-operands>`,
+:ref:`instruction flags <instruction-flags>`, and machine memory operands.
+
+The instruction's name is usually specified before the operands. The example
+below shows an instance of the X86 ``RETQ`` instruction with a single machine
+operand:
+
+.. code-block:: llvm
+
+    RETQ %eax
+
+However, if the machine instruction has one or more explicitly defined register
+operands, the instruction's name has to be specified after them. The example
+below shows an instance of the AArch64 ``LDPXpost`` instruction with three
+defined register operands:
+
+.. code-block:: llvm
+
+    %sp, %fp, %lr = LDPXpost %sp, 2
+
+The instruction names are serialized using the exact definitions from the
+target's ``*InstrInfo.td`` files, and they are case sensitive. This means that
+similar instruction names like ``TSTri`` and ``tSTRi`` represent different
+machine instructions.
+
+.. _instruction-flags:
+
+Instruction Flags
+^^^^^^^^^^^^^^^^^
+
+The flag ``frame-setup`` can be specified before the instruction's name:
+
+.. code-block:: llvm
+
+    %fp = frame-setup ADDXri %sp, 0, 0
+
+.. _registers:
+
+Registers
+---------
+
+Registers are one of the key primitives in the machine instructions
+serialization language. They are primarly used in the
+:ref:`register machine operands <register-operands>`,
+but they can also be used in a number of other places, like the
+:ref:`basic block's live in list <bb-liveins>`.
+
+The physical registers are identified by their name. They use the following
+syntax:
+
+.. code-block:: llvm
+
+    %<name>
+
+The example below shows three X86 physical registers:
+
+.. code-block:: llvm
+
+    %eax
+    %r15
+    %eflags
+
+The virtual registers are identified by their ID number. They use the following
+syntax:
+
+.. code-block:: llvm
+
+    %<id>
+
+Example:
+
+.. code-block:: llvm
+
+    %0
+
+The null registers are represented using an underscore ('``_``'). They can also be
+represented using a '``%noreg``' named register, although the former syntax
+is preferred.
+
+.. _machine-operands:
+
+Machine Operands
+----------------
+
+There are seventeen different kinds of machine operands, and all of them, except
+the ``MCSymbol`` operand, can be serialized. The ``MCSymbol`` operands are
+just printed out - they can't be parsed back yet.
+
+Immediate Operands
+^^^^^^^^^^^^^^^^^^
+
+The immediate machine operands are untyped, 64-bit signed integers. The
+example below shows an instance of the X86 ``MOV32ri`` instruction that has an
+immediate machine operand ``-42``:
+
+.. code-block:: llvm
+
+    %eax = MOV32ri -42
+
+.. TODO: Describe the CIMM (Rare) and FPIMM immediate operands.
+
+.. _register-operands:
+
+Register Operands
+^^^^^^^^^^^^^^^^^
+
+The :ref:`register <registers>` primitive is used to represent the register
+machine operands. The register operands can also have optional
+:ref:`register flags <register-flags>`,
+:ref:`a subregister index <subregister-indices>`,
+and a reference to the tied register operand.
+The full syntax of a register operand is shown below:
+
+.. code-block:: llvm
+
+    [<flags>] <register> [ :<subregister-idx-name> ] [ (tied-def <tied-op>) ]
+
+This example shows an instance of the X86 ``XOR32rr`` instruction that has
+5 register operands with different register flags:
+
+.. code-block:: llvm
+
+  dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+
+.. _register-flags:
+
+Register Flags
+~~~~~~~~~~~~~~
+
+The table below shows all of the possible register flags along with the
+corresponding internal ``llvm::RegState`` representation:
+
+.. list-table::
+   :header-rows: 1
+
+   * - Flag
+     - Internal Value
+
+   * - ``implicit``
+     - ``RegState::Implicit``
+
+   * - ``implicit-def``
+     - ``RegState::ImplicitDefine``
+
+   * - ``def``
+     - ``RegState::Define``
+
+   * - ``dead``
+     - ``RegState::Dead``
+
+   * - ``killed``
+     - ``RegState::Kill``
+
+   * - ``undef``
+     - ``RegState::Undef``
+
+   * - ``internal``
+     - ``RegState::InternalRead``
+
+   * - ``early-clobber``
+     - ``RegState::EarlyClobber``
+
+   * - ``debug-use``
+     - ``RegState::Debug``
+
+.. _subregister-indices:
+
+Subregister Indices
+~~~~~~~~~~~~~~~~~~~
+
+The register machine operands can reference a portion of a register by using
+the subregister indices. The example below shows an instance of the ``COPY``
+pseudo instruction that uses the X86 ``sub_8bit`` subregister index to copy 8
+lower bits from the 32-bit virtual register 0 to the 8-bit virtual register 1:
+
+.. code-block:: llvm
+
+    %1 = COPY %0:sub_8bit
+
+The names of the subregister indices are target specific, and are typically
+defined in the target's ``*RegisterInfo.td`` file.
+
+Global Value Operands
+^^^^^^^^^^^^^^^^^^^^^
+
+The global value machine operands reference the global values from the
+:ref:`embedded LLVM IR module <embedded-module>`.
+The example below shows an instance of the X86 ``MOV64rm`` instruction that has
+a global value operand named ``G``:
+
+.. code-block:: llvm
+
+    %rax = MOV64rm %rip, 1, _, @G, _
+
+The named global values are represented using an identifier with the '@' prefix.
+If the identifier doesn't match the regular expression
+`[-a-zA-Z$._][-a-zA-Z$._0-9]*`, then this identifier must be quoted.
+
+The unnamed global values are represented using an unsigned numeric value with
+the '@' prefix, like in the following examples: ``@0``, ``@989``.
+
+.. TODO: Describe the parsers default behaviour when optional YAML attributes
+   are missing.
+.. TODO: Describe the syntax for the bundled instructions.
+.. TODO: Describe the syntax for virtual register YAML definitions.
+.. TODO: Describe the machine function's YAML flag attributes.
+.. TODO: Describe the syntax for the external symbol and register
+   mask machine operands.
+.. TODO: Describe the frame information YAML mapping.
+.. TODO: Describe the syntax of the stack object machine operands and their
+   YAML definitions.
+.. TODO: Describe the syntax of the constant pool machine operands and their
+   YAML definitions.
+.. TODO: Describe the syntax of the jump table machine operands and their
+   YAML definitions.
+.. TODO: Describe the syntax of the block address machine operands.
+.. TODO: Describe the syntax of the CFI index machine operands.
+.. TODO: Describe the syntax of the metadata machine operands, and the
+   instructions debug location attribute.
+.. TODO: Describe the syntax of the target index machine operands.
+.. TODO: Describe the syntax of the register live out machine operands.
+.. TODO: Describe the syntax of the machine memory operands.
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 3426bfff164f..73704d9b17d7 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -21,7 +21,7 @@ click the power icon in the top right. You can register with a GitHub account,
 a Google account, or you can create your own profile.
 
 Make *sure* that the email address registered with Phabricator is subscribed
-to the relevant -commits mailing list. If your are not subscribed to the commit
+to the relevant -commits mailing list. If you are not subscribed to the commit
 list, all mail sent by Phabricator on your behalf will be held for moderation.
 
 Note that if you use your Subversion user name as Phabricator user name,
@@ -66,7 +66,7 @@ To upload a new patch:
 * Leave the drop down on *Create a new Revision...* and click *Continue*.
 * Enter a descriptive title and summary.  The title and summary are usually
   in the form of a :ref:`commit message <commit messages>`.
-* Add reviewers and mailing
+* Add reviewers (see below for advice) and subscribe mailing
   lists that you want to be included in the review. If your patch is
   for LLVM, add llvm-commits as a Subscriber; if your patch is for Clang,
   add cfe-commits.
@@ -83,6 +83,24 @@ To submit an updated patch:
 * Leave the Repository and Project fields blank.
 * Add comments about the changes in the new diff. Click *Save*.
 
+Choosing reviewers: You typically pick one or two people as initial reviewers.
+This choice is not crucial, because you are merely suggesting and not requiring
+them to participate. Many people will see the email notification on cfe-commits
+or llvm-commits, and if the subject line suggests the patch is something they
+should look at, they will.
+
+Here are a couple of ways to pick the initial reviewer(s):
+
+* Use ``svn blame`` and the commit log to find names of people who have
+  recently modified the same area of code that you are modifying.
+* Look in CODE_OWNERS.TXT to see who might be responsible for that area.
+* If you've discussed the change on a dev list, the people who participated
+  might be appropriate reviewers.
+
+Even if you think the code owner is the busiest person in the world, it's still
+okay to put them as a reviewer. Being the code owner means they have accepted
+responsibility for making sure the review happens.
+
 Reviewing code with Phabricator
 -------------------------------
 
@@ -162,6 +180,6 @@ trivially a good fit for an official LLVM project.
 .. _LLVM's Phabricator: http://reviews.llvm.org
 .. _`http://reviews.llvm.org`: http://reviews.llvm.org
 .. _Code Repository Browser: http://reviews.llvm.org/diffusion/
-.. _Arcanist Quick Start: http://www.phabricator.com/docs/phabricator/article/Arcanist_Quick_Start.html
-.. _Arcanist User Guide: http://www.phabricator.com/docs/phabricator/article/Arcanist_User_Guide.html
+.. _Arcanist Quick Start: https://secure.phabricator.com/book/phabricator/article/arcanist_quick_start/
+.. _Arcanist User Guide: https://secure.phabricator.com/book/phabricator/article/arcanist/
 .. _llvm-reviews GitHub project: https://github.com/r4nt/llvm-reviews/
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 08cc61a187b5..44f76fef8f1f 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -366,7 +366,7 @@ Then you can run your pass like this:
 
 Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not
 have to create "yet another" command line option for the debug output for your
-pass.  Note that ``DEBUG()`` macros are disabled for optimized builds, so they
+pass.  Note that ``DEBUG()`` macros are disabled for non-asserts builds, so they
 do not cause a performance impact at all (for the same reason, they should also
 not contain side-effects!).
 
@@ -383,21 +383,17 @@ Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option
 Sometimes you may find yourself in a situation where enabling ``-debug`` just
 turns on **too much** information (such as when working on the code generator).
 If you want to enable debug information with more fine-grained control, you
-can define the ``DEBUG_TYPE`` macro and use the ``-debug-only`` option as
+should define the ``DEBUG_TYPE`` macro and use the ``-debug-only`` option as
 follows:
 
 .. code-block:: c++
 
-  #undef  DEBUG_TYPE
-  DEBUG(errs() << "No debug type\n");
   #define DEBUG_TYPE "foo"
   DEBUG(errs() << "'foo' debug type\n");
   #undef  DEBUG_TYPE
   #define DEBUG_TYPE "bar"
   DEBUG(errs() << "'bar' debug type\n"));
   #undef  DEBUG_TYPE
-  #define DEBUG_TYPE ""
-  DEBUG(errs() << "No debug type (2)\n");
 
 Then you can run your pass like this:
 
@@ -406,24 +402,22 @@ Then you can run your pass like this:
   $ opt < a.bc > /dev/null -mypass
   <no output>
   $ opt < a.bc > /dev/null -mypass -debug
-  No debug type
   'foo' debug type
   'bar' debug type
-  No debug type (2)
   $ opt < a.bc > /dev/null -mypass -debug-only=foo
   'foo' debug type
   $ opt < a.bc > /dev/null -mypass -debug-only=bar
   'bar' debug type
 
 Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file,
-to specify the debug type for the entire module (if you do this before you
-``#include "llvm/Support/Debug.h"``, you don't have to insert the ugly
-``#undef``'s).  Also, you should use names more meaningful than "foo" and "bar",
-because there is no system in place to ensure that names do not conflict.  If
-two different modules use the same string, they will all be turned on when the
-name is specified.  This allows, for example, all debug information for
-instruction scheduling to be enabled with ``-debug-only=InstrSched``, even if
-the source lives in multiple files.
+to specify the debug type for the entire module. Be careful that you only do
+this after including Debug.h and not around any #include of headers. Also, you
+should use names more meaningful than "foo" and "bar", because there is no
+system in place to ensure that names do not conflict. If two different modules
+use the same string, they will all be turned on when the name is specified.
+This allows, for example, all debug information for instruction scheduling to be
+enabled with ``-debug-only=InstrSched``, even if the source lives in multiple
+files.
 
 For performance reasons, -debug-only is not available in optimized build
 (``--enable-optimized``) of LLVM.
@@ -435,10 +429,8 @@ preceding example could be written as:
 
 .. code-block:: c++
 
-  DEBUG_WITH_TYPE("", errs() << "No debug type\n");
   DEBUG_WITH_TYPE("foo", errs() << "'foo' debug type\n");
   DEBUG_WITH_TYPE("bar", errs() << "'bar' debug type\n"));
-  DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");
 
 .. _Statistic:
 
diff --git a/docs/README.txt b/docs/README.txt
index 3d6342929808..31764b2951b2 100644
--- a/docs/README.txt
+++ b/docs/README.txt
@@ -44,7 +44,7 @@ viewable online (as noted above) at e.g.
 Checking links
 ==============
 
-The reachibility of external links in the documentation can be checked by
+The reachability of external links in the documentation can be checked by
 running:
 
     cd docs/
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index b68f5ecd493e..b3f7c005ed19 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -1,15 +1,21 @@
 ======================
-LLVM 3.7 Release Notes
+LLVM 3.8 Release Notes
 ======================
 
 .. contents::
     :local:
 
+.. warning::
+   These are in-progress notes for the upcoming LLVM 3.8 release.  You may
+   prefer the `LLVM 3.7 Release Notes <http://llvm.org/releases/3.7.0/docs
+   /ReleaseNotes.html>`_.
+
+
 Introduction
 ============
 
 This document contains the release notes for the LLVM Compiler Infrastructure,
-release 3.7.  Here we describe the status of LLVM, including major improvements
+release 3.8.  Here we describe the status of LLVM, including major improvements
 from the previous release, improvements in various subprojects of LLVM, and
 some of the current users of the code.  All LLVM releases may be downloaded
 from the `LLVM releases web site <http://llvm.org/releases/>`_.
@@ -25,36 +31,42 @@ LLVM web page, this document applies to the *next* release, not the current
 one.  To see the release notes for a specific release, please see the `releases
 page <http://llvm.org/releases/>`_.
 
-Major changes in 3.7.1
-======================
-
-* 3.7.0 was released with an inadvertent change to the signature of the C
-  API function: LLVMBuildLandingPad, which made the C API incompatible with
-  prior releases.  This has been corrected in LLVM 3.7.1.
-
-  As a result of this change, 3.7.0 is not ABI compatible with 3.7.1.
-
-  +----------------------------------------------------------------------------+
-  | History of the LLVMBuildLandingPad() function                              |
-  +===========================+================================================+
-  | 3.6.2 and prior releases  | LLVMBuildLandingPad(LLVMBuilderRef,            |
-  |                           |                     LLVMTypeRef,               |
-  |                           |                     LLVMValueRef,              |
-  |                           |                     unsigned, const char*)     |
-  +---------------------------+------------------------------------------------+
-  | 3.7.0                     | LLVMBuildLandingPad(LLVMBuilderRef,            |
-  |                           |                     LLVMTypeRef,               |
-  |                           |                     unsigned, const char*)     |
-  +---------------------------+------------------------------------------------+
-  | 3.7.1 and future releases | LLVMBuildLandingPad(LLVMBuilderRef,            |
-  |                           |                     LLVMTypeRef,               |
-  |                           |                     LLVMValueRef,              |
-  |                           |                     unsigned, const char*)     |
-  +---------------------------+------------------------------------------------+
-
-
-Non-comprehensive list of changes in 3.7.0
+Non-comprehensive list of changes in this release
 =================================================
+* With this release, the minimum Windows version required for running LLVM is
+  Windows 7. Earlier versions, including Windows Vista and XP are no longer
+  supported.
+
+* With this release, the autoconf build system is deprecated. It will be removed
+  in the 3.9 release. Please migrate to using CMake. For more information see:
+  `Building LLVM with CMake <CMake.html>`_
+
+* The C API function LLVMLinkModules is deprecated. It will be removed in the
+  3.9 release. Please migrate to LLVMLinkModules2. Unlike the old function the
+  new one
+
+   * Doesn't take an unused parameter.
+   * Destroys the source instead of only damaging it.
+   * Does not record a message. Use the diagnostic handler instead.
+
+* The C API functions LLVMParseBitcode, LLVMParseBitcodeInContext,
+  LLVMGetBitcodeModuleInContext and LLVMGetBitcodeModule have been deprecated.
+  They will be removed in 3.9. Please migrate to the versions with a 2 suffix.
+  Unlike the old ones the new ones do not record a diagnostic message. Use
+  the diagnostic handler instead.
+
+* The deprecated C APIs LLVMGetBitcodeModuleProviderInContext and
+  LLVMGetBitcodeModuleProvider have been removed.
+
+* The deprecated C APIs LLVMCreateExecutionEngine, LLVMCreateInterpreter,
+  LLVMCreateJITCompiler, LLVMAddModuleProvider and LLVMRemoveModuleProvider
+  have been removed.
+
+* With this release, the C API headers have been reorganized to improve build
+  time. Type specific declarations have been moved to Type.h, and error
+  handling routines have been moved to ErrorHandling.h. Both are included in
+  Core.h so nothing should change for projects directly including the headers,
+  but transitive dependencies may be affected.
 
 .. NOTE
    For small 1-3 sentence descriptions, just add an entry at the end of
@@ -63,429 +75,61 @@ Non-comprehensive list of changes in 3.7.0
    functionality, or simply have a lot to talk about), see the `NOTE` below
    for adding a new subsection.
 
-* The minimum required Visual Studio version for building LLVM is now 2013
-  Update 4.
+* ... next change ...
 
-* A new documentation page, :doc:`Frontend/PerformanceTips`, contains a
-  collection of tips for frontend authors on how to generate IR which LLVM is
-  able to effectively optimize.
+.. NOTE
+   If you would like to document a larger change, then you can add a
+   subsection about it right here. You can copy the following boilerplate
+   and un-indent it (the indentation causes it to be inside this comment).
 
-* The ``DataLayout`` is no longer optional. All the IR level optimizations expects
-  it to be present and the API has been changed to use a reference instead of
-  a pointer to make it explicit. The Module owns the datalayout and it has to
-  match the one attached to the TargetMachine for generating code.
+   Special New Feature
+   -------------------
 
-  In 3.6, a pass was inserted in the pipeline to make the ``DataLayout`` accessible:
-    ``MyPassManager->add(new DataLayoutPass(MyTargetMachine->getDataLayout()));``
-  In 3.7, you don't need a pass, you set the ``DataLayout`` on the ``Module``:
-    ``MyModule->setDataLayout(MyTargetMachine->createDataLayout());``
+   Makes programs 10x faster by doing Special New Thing.
 
-  The LLVM C API ``LLVMGetTargetMachineData`` is deprecated to reflect the fact
-  that it won't be available anymore from ``TargetMachine`` in 3.8.
+Changes to the ARM Backend
+--------------------------
 
-* Comdats are now orthogonal to the linkage. LLVM will not create
-  comdats for weak linkage globals and the frontends are responsible
-  for explicitly adding them.
+ During this release ...
 
-* On ELF we now support multiple sections with the same name and
-  comdat. This allows for smaller object files since multiple
-  sections can have a simple name (`.text`, `.rodata`, etc).
-
-* LLVM now lazily loads metadata in some cases. Creating archives
-  with IR files with debug info is now 25X faster.
-
-* llvm-ar can create archives in the BSD format used by OS X.
-
-* LLVM received a backend for the extended Berkely Packet Filter
-  instruction set that can be dynamically loaded into the Linux kernel via the
-  `bpf(2) <http://man7.org/linux/man-pages/man2/bpf.2.html>`_ syscall.
-
-  Support for BPF has been present in the kernel for some time, but starting
-  from 3.18 has been extended with such features as: 64-bit registers, 8
-  additional registers registers, conditional backwards jumps, call
-  instruction, shift instructions, map (hash table, array, etc.), 1-8 byte
-  load/store from stack, and more.
-
-  Up until now, users of BPF had to write bytecode by hand, or use
-  custom generators. This release adds a proper LLVM backend target for the BPF
-  bytecode architecture.
-
-  The BPF target is now available by default, and options exist in both Clang
-  (-target bpf) or llc (-march=bpf) to pick eBPF as a backend.
-
-* Switch-case lowering was rewritten to avoid generating unbalanced search trees
-  (`PR22262 <http://llvm.org/pr22262>`_) and to exploit profile information
-  when available. Some lowering strategies are now disabled when optimizations
-  are turned off, to save compile time.
-
-* The debug info IR class hierarchy now inherits from ``Metadata`` and has its
-  own bitcode records and assembly syntax
-  (`documented in LangRef <LangRef.html#specialized-metadata-nodes>`_).  The debug
-  info verifier has been merged with the main verifier.
-
-* LLVM IR and APIs are in a period of transition to aid in the removal of
-  pointer types (the end goal being that pointers are typeless/opaque - void*,
-  if you will). Some APIs and IR constructs have been modified to take
-  explicit types that are currently checked to match the target type of their
-  pre-existing pointer type operands. Further changes are still needed, but the
-  more you can avoid using ``PointerType::getPointeeType``, the easier the
-  migration will be.
-
-* Argument-less ``TargetMachine::getSubtarget`` and
-  ``TargetMachine::getSubtargetImpl`` have been removed from the tree. Updating
-  out of tree ports is as simple as implementing a non-virtual version in the
-  target, but implementing full ``Function`` based ``TargetSubtargetInfo``
-  support is recommended.
-
-* This is expected to be the last major release of LLVM that supports being
-  run on Windows XP and Windows Vista.  For the next major release the minimum
-  Windows version requirement will be Windows 7.
 
 Changes to the MIPS Target
 --------------------------
 
-During this release the MIPS target has:
+ During this release ...
 
-* Added support for MIPS32R3, MIPS32R5, MIPS32R3, MIPS32R5, and microMIPS32.
-
-* Added support for dynamic stack realignment. This is of particular importance
-  to MSA on 32-bit subtargets since vectors always exceed the stack alignment on
-  the O32 ABI.
-
-* Added support for compiler-rt including:
-
-  * Support for the Address, and Undefined Behaviour Sanitizers for all MIPS
-    subtargets.
-
-  * Support for the Data Flow, and Memory Sanitizer for 64-bit subtargets.
-
-  * Support for the Profiler for all MIPS subtargets.
-
-* Added support for libcxx, and libcxxabi.
-
-* Improved inline assembly support such that memory constraints may now make use
-  of the appropriate address offsets available to the instructions. Also, added
-  support for the ``ZC`` constraint.
-
-* Added support for 128-bit integers on 64-bit subtargets and 16-bit floating
-  point conversions on all subtargets.
-
-* Added support for read-only ``.eh_frame`` sections by storing type information
-  indirectly.
-
-* Added support for MCJIT on all 64-bit subtargets as well as MIPS32R6.
-
-* Added support for fast instruction selection on MIPS32 and MIPS32R2 with PIC.
-
-* Various bug fixes. Including the following notable fixes:
-
-  * Fixed 'jumpy' debug line info around calls where calculation of the address
-    of the function would inappropriately change the line number.
-
-  * Fixed missing ``__mips_isa_rev`` macro on the MIPS32R6 and MIPS32R6
-    subtargets.
-
-  * Fixed representation of NaN when targeting systems using traditional
-    encodings. Traditionally, MIPS has used NaN encodings that were compatible
-    with IEEE754-1985 but would later be found incompatible with IEEE754-2008.
-
-  * Fixed multiple segfaults and assertions in the disassembler when
-    disassembling instructions that have memory operands.
-
-  * Fixed multiple cases of suboptimal code generation involving $zero.
-
-  * Fixed code generation of 128-bit shifts on 64-bit subtargets.
-
-  * Prevented the delay slot filler from filling call delay slots with
-    instructions that modify or use $ra.
-
-  * Fixed some remaining N32/N64 calling convention bugs when using small
-    structures on big-endian subtargets.
-
-  * Fixed missing sign-extensions that are required by the N32/N64 calling
-    convention when generating calls to library functions with 32-bit
-    parameters.
-
-  * Corrected the ``int64_t`` typedef to be ``long`` for N64.
-
-  * ``-mno-odd-spreg`` is now honoured for vector insertion/extraction
-    operations when using -mmsa.
-
-  * Fixed vector insertion and extraction for MSA on 64-bit subtargets.
-
-  * Corrected the representation of member function pointers. This makes them
-    usable on microMIPS subtargets.
 
 Changes to the PowerPC Target
 -----------------------------
 
-There are numerous improvements to the PowerPC target in this release:
+ During this release ...
 
-* LLVM now supports the ISA 2.07B (POWER8) instruction set, including
-  direct moves between general registers and vector registers, and
-  built-in support for hardware transactional memory (HTM).  Some missing
-  instructions from ISA 2.06 (POWER7) were also added.
 
-* Code generation for the local-dynamic and global-dynamic thread-local
-  storage models has been improved.
-
-* Loops may be restructured to leverage pre-increment loads and stores.
-
-* QPX - The vector instruction set used by the IBM Blue Gene/Q supercomputers
-  is now supported.
-
-* Loads from the TOC area are now correctly treated as invariant.
-
-* PowerPC now has support for i128 and v1i128 types.  The types differ
-  in how they are passed in registers for the ELFv2 ABI.
-
-* Disassembly will now print shorter mnemonic aliases when available.
-
-* Optional register name prefixes for VSX and QPX registers are now
-  supported in the assembly parser.
-
-* The back end now contains a pass to remove unnecessary vector swaps
-  from POWER8 little-endian code generation.  Additional improvements
-  are planned for release 3.8.
-
-* The undefined-behavior sanitizer (UBSan) is now supported for PowerPC.
-
-* Many new vector programming APIs have been added to altivec.h.
-  Additional ones are planned for release 3.8.
-
-* PowerPC now supports __builtin_call_with_static_chain.
-
-* PowerPC now supports the revised -mrecip option that permits finer
-  control over reciprocal estimates.
-
-* Many bugs have been identified and fixed.
-
-Changes to the SystemZ Target
+Changes to the X86 Target
 -----------------------------
 
-* LLVM no longer attempts to automatically detect the current host CPU when
-  invoked natively.
+ During this release ...
 
-* Support for all thread-local storage models. (Previous releases would support
-  only the local-exec TLS model.)
-
-* The POPCNT instruction is now used on z196 and above.
-
-* The RISBGN instruction is now used on zEC12 and above.
-
-* Support for the transactional-execution facility on zEC12 and above.
-
-* Support for the z13 processor and its vector facility.
+* TLS is enabled for Cygwin as emutls.
 
 
-Changes to the JIT APIs
------------------------
+Changes to the OCaml bindings
+-----------------------------
 
-* Added a new C++ JIT API called On Request Compilation, or ORC.
+ During this release ...
 
-  ORC is a new JIT API inspired by MCJIT but designed to be more testable, and
-  easier to extend with new features. A key new feature already in tree is lazy,
-  function-at-a-time compilation for X86. Also included is a reimplementation of
-  MCJIT's API and behavior (OrcMCJITReplacement). MCJIT itself remains in tree,
-  and continues to be the default JIT ExecutionEngine, though new users are
-  encouraged to try ORC out for their projects. (A good place to start is the
-  new ORC tutorials under llvm/examples/kaleidoscope/orc).
+* The ocaml function link_modules has been replaced with link_modules' which
+  uses LLVMLinkModules2.
 
-Sub-project Status Update
-=========================
 
-In addition to the core LLVM 3.7 distribution of production-quality compiler
-infrastructure, the LLVM project includes sub-projects that use the LLVM core
-and share the same distribution license. This section provides updates on these
-sub-projects.
-
-Polly - The Polyhedral Loop Optimizer in LLVM
----------------------------------------------
-
-`Polly <http://polly.llvm.org>`_ is a polyhedral loop optimization
-infrastructure that provides data-locality optimizations to LLVM-based
-compilers. When compiled as part of clang or loaded as a module into clang,
-it can perform loop optimizations such as tiling, loop fusion or outer-loop
-vectorization. As a generic loop optimization infrastructure it allows
-developers to get a per-loop-iteration model of a loop nest on which detailed
-analysis and transformations can be performed.
-
-Changes since the last release:
-
-* isl imported into Polly distribution
-
-  `isl <http://repo.or.cz/w/isl.git>`_, the math library Polly uses, has been
-  imported into the source code repository of Polly and is now distributed as part
-  of Polly. As this was the last external library dependency of Polly, Polly can
-  now be compiled right after checking out the Polly source code without the need
-  for any additional libraries to be pre-installed.
-
-* Small integer optimization of isl
-
-  The MIT licensed imath backend using in `isl <http://repo.or.cz/w/isl.git>`_ for
-  arbitrary width integer computations has been optimized to use native integer
-  operations for the common case where the operands of a computation fit into 32
-  bit and to only fall back to large arbitrary precision integers for the
-  remaining cases. This optimization has greatly improved the compile-time
-  performance of Polly, both due to faster native operations also due to a
-  reduction in malloc traffic and pointer indirections. As a result, computations
-  that use arbitrary precision integers heavily have been speed up by almost 6x.
-  As a result, the compile-time of Polly on the Polybench test kernels in the LNT
-  suite has been reduced by 20% on average with compile time reductions between
-  9-43%.
-
-* Schedule Trees
-
-  Polly now uses internally so-called > Schedule Trees < to model the loop
-  structure it optimizes. Schedule trees are an easy to understand tree structure
-  that describes a loop nest using integer constraint sets to keep track of
-  execution constraints. It allows the developer to use per-tree-node operations
-  to modify the loop tree. Programatic analysis that work on the schedule tree
-  (e.g., as dependence analysis) also show a visible speedup as they can exploit
-  the tree structure of the schedule and need to fall back to ILP based
-  optimization problems less often. Section 6 of `Polyhedral AST generation is
-  more than scanning polyhedra
-  <http://www.grosser.es/#pub-polyhedral-AST-generation>`_ gives a detailed
-  explanation of this schedule trees.
-
-* Scalar and PHI node modeling - Polly as an analysis
-
-  Polly now requires almost no preprocessing to analyse LLVM-IR, which makes it
-  easier to use Polly as a pure analysis pass e.g. to provide more precise
-  dependence information to non-polyhedral transformation passes. Originally,
-  Polly required the input LLVM-IR to be preprocessed such that all scalar and
-  PHI-node dependences are translated to in-memory operations. Since this release,
-  Polly has full support for scalar and PHI node dependences and requires no
-  scalar-to-memory translation for such kind of dependences.
-
-* Modeling of modulo and non-affine conditions
-
-  Polly can now supports modulo operations such as A[t%2][i][j] as they appear
-  often in stencil computations and also allows data-dependent conditional
-  branches as they result e.g. from ternary conditions ala A[i] > 255 ? 255 :
-  A[i].
-
-* Delinearization
-
-  Polly now support the analysis of manually linearized multi-dimensional arrays
-  as they result form macros such as
-  "#define 2DARRAY(A,i,j) (A.data[(i) * A.size + (j)]". Similar constructs appear
-  in old C code written before C99, C++ code such as boost::ublas, LLVM exported
-  from Julia, Matlab generated code and many others. Our work titled
-  `Optimistic Delinearization of Parametrically Sized Arrays
-  <http://www.grosser.es/#pub-optimistic-delinerization>`_ gives details.
-
-* Compile time improvements
-
-  Pratik Bahtu worked on compile-time performance tuning of Polly. His work
-  together with the support for schedule trees and the small integer optimization
-  in isl notably reduced the compile time.
-
-* Increased compute timeouts
-
-  As Polly's compile time has been notabily improved, we were able to increase
-  the compile time saveguards in Polly. As a result, the default configuration
-  of Polly can now analyze larger loop nests without running into compile time
-  restrictions.
-
-* Export Debug Locations via JSCoP file
-
-  Polly's JSCoP import/export format gained support for debug locations that show
-  to the user the source code location of detected scops.
-
-* Improved windows support
-
-  The compilation of Polly on windows using cmake has been improved and several
-  visual studio build issues have been addressed.
-
-* Many bug fixes
-
-libunwind
----------
-
-The unwind implementation which use to reside in `libc++abi` has been moved into
-a separate repository.  This implementation can still be used for `libc++abi` by
-specifying `-DLIBCXXABI_USE_LLVM_UNWINDER=YES` and
-`-DLIBCXXABI_LIBUNWIND_PATH=<path to libunwind source>` when configuring
-`libc++abi`, which defaults to `true` when building on ARM.
-
-The new repository can also be built standalone if just `libunwind` is desired.
-
-External Open Source Projects Using LLVM 3.7
+External Open Source Projects Using LLVM 3.8
 ============================================
 
 An exciting aspect of LLVM is that it is used as an enabling technology for
 a lot of other language and tools projects. This section lists some of the
-projects that have already been updated to work with LLVM 3.7.
+projects that have already been updated to work with LLVM 3.8.
 
-
-LDC - the LLVM-based D compiler
--------------------------------
-
-`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
-pragmatically combines efficiency, control, and modeling power, with safety and
-programmer productivity. D supports powerful concepts like Compile-Time Function
-Execution (CTFE) and Template Meta-Programming, provides an innovative approach
-to concurrency and offers many classical paradigms.
-
-`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
-combined with LLVM as backend to produce efficient native code. LDC targets
-x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on
-PowerPC (32/64 bit). Ports to other architectures like ARM, AArch64 and MIPS64
-are underway.
-
-Portable Computing Language (pocl)
-----------------------------------
-
-In addition to producing an easily portable open source OpenCL
-implementation, another major goal of `pocl <http://portablecl.org/>`_
-is improving performance portability of OpenCL programs with
-compiler optimizations, reducing the need for target-dependent manual
-optimizations. An important part of pocl is a set of LLVM passes used to
-statically parallelize multiple work-items with the kernel compiler, even in
-the presence of work-group barriers.
-
-
-TTA-based Co-design Environment (TCE)
--------------------------------------
-
-`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
-exposed datapath processors based on the Transport triggered
-architecture (TTA).
-
-The toolset provides a complete co-design flow from C/C++
-programs down to synthesizable VHDL/Verilog and parallel program binaries.
-Processor customization points include the register files, function units,
-supported operations, and the interconnection network.
-
-TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
-optimizations and also for parts of code generation. It generates
-new LLVM-based code generators "on the fly" for the designed processors and
-loads them in to the compiler backend as runtime libraries to avoid
-per-target recompilation of larger parts of the compiler chain.
-
-BPF Compiler Collection (BCC)
------------------------------
-`BCC <https://github.com/iovisor/bcc>`_ is a Python + C framework for tracing and
-networking that is using Clang rewriter + 2nd pass of Clang + BPF backend to
-generate eBPF and push it into the kernel.
-
-LLVMSharp & ClangSharp
-----------------------
-
-`LLVMSharp <http://www.llvmsharp.org>`_ and
-`ClangSharp <http://www.clangsharp.org>`_ are type-safe C# bindings for
-Microsoft.NET and Mono that Platform Invoke into the native libraries.
-ClangSharp is self-hosted and is used to generated LLVMSharp using the
-LLVM-C API.
-
-`LLVMSharp Kaleidoscope Tutorials <http://www.llvmsharp.org/Kaleidoscope/>`_
-are instructive examples of writing a compiler in C#, with certain improvements
-like using the visitor pattern to generate LLVM IR.
-
-`ClangSharp PInvoke Generator <http://www.clangsharp.org/PInvoke/>`_ is the
-self-hosting mechanism for LLVM/ClangSharp and is demonstrative of using
-LibClang to generate Platform Invoke (PInvoke) signatures for C APIs.
+* A project
 
 
 Additional Information
@@ -500,3 +144,4 @@ going into the ``llvm/docs/`` directory in the LLVM tree.
 
 If you have any questions or comments about LLVM, please feel free to contact
 us via the `mailing lists <http://llvm.org/docs/#maillist>`_.
+
diff --git a/docs/ReleaseProcess.rst b/docs/ReleaseProcess.rst
index c4bbc91c63ce..d7f703126019 100644
--- a/docs/ReleaseProcess.rst
+++ b/docs/ReleaseProcess.rst
@@ -53,7 +53,7 @@ test-release.sh
 ---------------
 
 This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``,
-``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage.
+``libcxx``, ``libomp`` and ``clang-extra-tools``) in three stages, and will test the final stage.
 It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and
 that's the one you should use for the test-suite and other external tests.
 
diff --git a/docs/SourceLevelDebugging.rst b/docs/SourceLevelDebugging.rst
index 99186f581881..270c44eb50ba 100644
--- a/docs/SourceLevelDebugging.rst
+++ b/docs/SourceLevelDebugging.rst
@@ -231,7 +231,7 @@ Compiled to LLVM, this function would be represented like this:
 .. code-block:: llvm
 
   ; Function Attrs: nounwind ssp uwtable
-  define void @foo() #0 {
+  define void @foo() #0 !dbg !4 {
   entry:
     %X = alloca i32, align 4
     %Y = alloca i32, align 4
@@ -263,20 +263,20 @@ Compiled to LLVM, this function would be represented like this:
   !1 = !DIFile(filename: "/dev/stdin", directory: "/Users/dexonsmith/data/llvm/debug-info")
   !2 = !{}
   !3 = !{!4}
-  !4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, function: void ()* @foo, variables: !2)
+  !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
   !5 = !DISubroutineType(types: !6)
   !6 = !{null}
   !7 = !{i32 2, !"Dwarf Version", i32 2}
   !8 = !{i32 2, !"Debug Info Version", i32 3}
   !9 = !{i32 1, !"PIC Level", i32 2}
   !10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"}
-  !11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "X", scope: !4, file: !1, line: 2, type: !12)
+  !11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12)
   !12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
   !13 = !DIExpression()
   !14 = !DILocation(line: 2, column: 9, scope: !4)
-  !15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Y", scope: !4, file: !1, line: 3, type: !12)
+  !15 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12)
   !16 = !DILocation(line: 3, column: 9, scope: !4)
-  !17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Z", scope: !18, file: !1, line: 5, type: !12)
+  !17 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12)
   !18 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5)
   !19 = !DILocation(line: 5, column: 11, scope: !18)
   !20 = !DILocation(line: 6, column: 11, scope: !18)
@@ -304,10 +304,9 @@ scope information for the variable ``X``.
 .. code-block:: llvm
 
   !14 = !DILocation(line: 2, column: 9, scope: !4)
-  !4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
-                     isLocal: false, isDefinition: true, scopeLine: 1,
-                     isOptimized: false, function: void ()* @foo,
-                     variables: !2)
+  !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5,
+                              isLocal: false, isDefinition: true, scopeLine: 1,
+                              isOptimized: false, variables: !2)
 
 Here ``!14`` is metadata providing `location information
 <LangRef.html#dilocation>`_.  In this example, scope is encoded by ``!4``, a
@@ -368,15 +367,14 @@ C/C++ source file information
 
 ``llvm::Instruction`` provides easy access to metadata attached with an
 instruction.  One can extract line number information encoded in LLVM IR using
-``Instruction::getMetadata()`` and ``DILocation::getLineNumber()``.
+``Instruction::getDebugLoc()`` and ``DILocation::getLine()``.
 
 .. code-block:: c++
 
-  if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
-    DILocation Loc(N);                      // DILocation is in DebugInfo.h
-    unsigned Line = Loc.getLineNumber();
-    StringRef File = Loc.getFilename();
-    StringRef Dir = Loc.getDirectory();
+  if (DILocation *Loc = I->getDebugLoc()) { // Here I is an LLVM instruction
+    unsigned Line = Loc->getLine();
+    StringRef File = Loc->getFilename();
+    StringRef Dir = Loc->getDirectory();
   }
 
 C/C++ global variable information
@@ -464,12 +462,12 @@ a C/C++ front-end would generate the following descriptors:
   !4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5,
                      isLocal: false, isDefinition: true, scopeLine: 1,
                      flags: DIFlagPrototyped, isOptimized: false,
-                     function: i32 (i32, i8**)* @main, variables: !2)
+                     variables: !2)
 
   ;;
   ;; Define the subprogram itself.
   ;;
-  define i32 @main(i32 %argc, i8** %argv) {
+  define i32 @main(i32 %argc, i8** %argv) !dbg !4 {
   ...
   }
 
@@ -709,7 +707,7 @@ qualified name.  Debugger users tend not to enter their search strings as
 "``a::b::c``".  So the name entered in the name table must be demangled in
 order to chop it up appropriately and additional names must be manually entered
 into the table to make it effective as a name lookup table for debuggers to
-se.
+use.
 
 All debuggers currently ignore the "``.debug_pubnames``" table as a result of
 its inconsistent and useless public-only name content making it a waste of
diff --git a/docs/StackMaps.rst b/docs/StackMaps.rst
index dbdf78f992ca..5bdae38b699d 100644
--- a/docs/StackMaps.rst
+++ b/docs/StackMaps.rst
@@ -499,3 +499,13 @@ the same requirement imposed by the llvm.gcroot intrinsic.) LLVM
 transformations must not substitute the alloca with any intervening
 value. This can be verified by the runtime simply by checking that the
 stack map's location is a Direct location type.
+
+
+Supported Architectures
+=======================
+
+Support for StackMap generation and the related intrinsics requires 
+some code for each backend.  Today, only a subset of LLVM's backends 
+are supported.  The currently supported architectures are X86_64, 
+PowerPC, and Aarch64.
+
diff --git a/docs/Statepoints.rst b/docs/Statepoints.rst
index eb5866eb552f..442b1c269c47 100644
--- a/docs/Statepoints.rst
+++ b/docs/Statepoints.rst
@@ -53,7 +53,7 @@ load barriers, store barriers, and safepoints.
    loads, merely loads of a particular type (in the original source
    language), or none at all.
 
-#. Analogously, a store barrier is a code fragement that runs
+#. Analogously, a store barrier is a code fragment that runs
    immediately before the machine store instruction, but after the
    computation of the value stored.  The most common use of a store
    barrier is to update a 'card table' in a generational garbage
@@ -142,8 +142,8 @@ resulting relocation sequence is:
 
   define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) 
          gc "statepoint-example" {
-    %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
-    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 7, i32 7)
+    %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
+    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7)
     ret i8 addrspace(1)* %obj.relocated
   }
 
@@ -160,7 +160,7 @@ of the call, we use the ``gc.result`` intrinsic.  To get the relocation
 of each pointer in turn, we use the ``gc.relocate`` intrinsic with the
 appropriate index.  Note that both the ``gc.relocate`` and ``gc.result`` are
 tied to the statepoint.  The combination forms a "statepoint relocation 
-sequence" and represents the entitety of a parseable call or 'statepoint'.
+sequence" and represents the entirety of a parseable call or 'statepoint'.
 
 When lowered, this example would generate the following x86 assembly:
 
@@ -206,6 +206,52 @@ This example was taken from the tests for the :ref:`RewriteStatepointsForGC` uti
 
   opt -rewrite-statepoints-for-gc test/Transforms/RewriteStatepointsForGC/basics.ll -S | llc -debug-only=stackmaps
 
+Base & Derived Pointers
+^^^^^^^^^^^^^^^^^^^^^^^
+
+A "base pointer" is one which points to the starting address of an allocation
+(object).  A "derived pointer" is one which is offset from a base pointer by
+some amount.  When relocating objects, a garbage collector needs to be able 
+to relocate each derived pointer associated with an allocation to the same 
+offset from the new address.
+
+"Interior derived pointers" remain within the bounds of the allocation 
+they're associated with.  As a result, the base object can be found at 
+runtime provided the bounds of allocations are known to the runtime system.
+
+"Exterior derived pointers" are outside the bounds of the associated object;
+they may even fall within *another* allocations address range.  As a result,
+there is no way for a garbage collector to determine which allocation they 
+are associated with at runtime and compiler support is needed.
+
+The ``gc.relocate`` intrinsic supports an explicit operand for describing the
+allocation associated with a derived pointer.  This operand is frequently 
+referred to as the base operand, but does not strictly speaking have to be
+a base pointer, but it does need to lie within the bounds of the associated
+allocation.  Some collectors may require that the operand be an actual base
+pointer rather than merely an internal derived pointer. Note that during 
+lowering both the base and derived pointer operands are required to be live 
+over the associated call safepoint even if the base is otherwise unused 
+afterwards.
+
+If we extend our previous example to include a pointless derived pointer, 
+we get:
+
+.. code-block:: llvm
+
+  define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) 
+         gc "statepoint-example" {
+    %gep = getelementptr i8, i8 addrspace(1)* %obj, i64 20000
+    %token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %gep)
+    %obj.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 7)
+    %gep.relocated = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %token, i32 7, i32 8)
+    %p = getelementptr i8, i8 addrspace(1)* %gep, i64 -20000
+    ret i8 addrspace(1)* %p
+  }
+
+Note that in this example %p and %obj.relocate are the same address and we
+could replace one with the other, potentially removing the derived pointer
+from the live set at the safepoint entirely.  
 
 GC Transitions
 ^^^^^^^^^^^^^^^^^^
@@ -225,7 +271,7 @@ statepoint.
   transitions based on the function symbols involved (e.g. a call from a
   function with GC strategy "foo" to a function with GC strategy "bar"),
   indirect calls that are also GC transitions must also be supported. This
-  requirement is the driving force behing the decision to require that GC
+  requirement is the driving force behind the decision to require that GC
   transitions are explicitly marked.
 
 Let's revisit the sample given above, this time treating the call to ``@foo``
@@ -242,8 +288,8 @@ to unmanaged code. The resulting relocation sequence is:
   define i8 addrspace(1)* @test1(i8 addrspace(1) *%obj)
          gc "hypothetical-gc" {
 
-    %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 1, i32* @Flag, i32 0, i8 addrspace(1)* %obj)
-    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 7, i32 7)
+    %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 1, i32* @Flag, i32 0, i8 addrspace(1)* %obj)
+    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 7, i32 7)
     ret i8 addrspace(1)* %obj.relocated
   }
 
@@ -296,7 +342,7 @@ Syntax:
 
 ::
 
-      declare i32
+      declare token
         @llvm.experimental.gc.statepoint(i64 <id>, i32 <num patch bytes>,
                        func_type <target>, 
                        i64 <#call args>, i64 <flags>,
@@ -331,14 +377,16 @@ the user will patch over the 'num patch bytes' bytes of nops with a
 calling sequence specific to their runtime before executing the
 generated machine code.  There are no guarantees with respect to the
 alignment of the nop sequence.  Unlike :doc:`StackMaps` statepoints do
-not have a concept of shadow bytes.
+not have a concept of shadow bytes.  Note that semantically the
+statepoint still represents a call or invoke to 'target', and the nop
+sequence after patching is expected to represent an operation
+equivalent to a call or invoke to 'target'.
 
 The 'target' operand is the function actually being called.  The
 target can be specified as either a symbolic LLVM function, or as an
 arbitrary Value of appropriate function type.  Note that the function
 type must match the signature of the callee and the types of the 'call
-parameters' arguments.  If 'num patch bytes' is non-zero then 'target'
-has to be the constant pointer null of the appropriate function type.
+parameters' arguments.
 
 The '#call args' operand is the number of arguments to the actual
 call.  It must exactly match the number of arguments passed in the
@@ -408,7 +456,7 @@ Syntax:
 ::
 
       declare type*
-        @llvm.experimental.gc.result(i32 %statepoint_token)
+        @llvm.experimental.gc.result(token %statepoint_token)
 
 Overview:
 """""""""
@@ -424,7 +472,7 @@ Operands:
 
 The first and only argument is the ``gc.statepoint`` which starts
 the safepoint sequence of which this ``gc.result`` is a part.
-Despite the typing of this as a generic i32, *only* the value defined
+Despite the typing of this as a generic token, *only* the value defined 
 by a ``gc.statepoint`` is legal here.
 
 Semantics:
@@ -448,7 +496,7 @@ Syntax:
 ::
 
       declare <pointer type>
-        @llvm.experimental.gc.relocate(i32 %statepoint_token, 
+        @llvm.experimental.gc.relocate(token %statepoint_token, 
                                        i32 %base_offset, 
                                        i32 %pointer_offset)
 
@@ -463,13 +511,18 @@ Operands:
 
 The first argument is the ``gc.statepoint`` which starts the
 safepoint sequence of which this ``gc.relocation`` is a part.
-Despite the typing of this as a generic i32, *only* the value defined
+Despite the typing of this as a generic token, *only* the value defined 
 by a ``gc.statepoint`` is legal here.
 
 The second argument is an index into the statepoints list of arguments
-which specifies the base pointer for the pointer being relocated.
+which specifies the allocation for the pointer being relocated.
 This index must land within the 'gc parameter' section of the
-statepoint's argument list.
+statepoint's argument list.  The associated value must be within the
+object with which the pointer being relocated is associated. The optimizer
+is free to change *which* interior derived pointer is reported, provided that
+it does not replace an actual base pointer with another interior derived 
+pointer.  Collectors are allowed to rely on the base pointer operand 
+remaining an actual base pointer if so constructed.
 
 The third argument is an index into the statepoint's list of arguments
 which specify the (potentially) derived pointer being relocated.  It
@@ -590,7 +643,7 @@ As an example, given this code:
 
   define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) 
          gc "statepoint-example" {
-    call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+    call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
     ret i8 addrspace(1)* %obj
   }
 
@@ -600,8 +653,8 @@ The pass would produce this IR:
 
   define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) 
          gc "statepoint-example" {
-    %0 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
-    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12)
+    %0 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj)
+    %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
     ret i8 addrspace(1)* %obj.relocated
   }
 
@@ -612,8 +665,18 @@ non references.  Address space 1 is not globally reserved for this purpose.
 This pass can be used an utility function by a language frontend that doesn't 
 want to manually reason about liveness, base pointers, or relocation when 
 constructing IR.  As currently implemented, RewriteStatepointsForGC must be 
-run after SSA construction (i.e. mem2ref).  
+run after SSA construction (i.e. mem2ref).
 
+RewriteStatepointsForGC will ensure that appropriate base pointers are listed
+for every relocation created.  It will do so by duplicating code as needed to
+propagate the base pointer associated with each pointer being relocated to
+the appropriate safepoints.  The implementation assumes that the following 
+IR constructs produce base pointers: loads from the heap, addresses of global 
+variables, function arguments, function return values. Constant pointers (such
+as null) are also assumed to be base pointers.  In practice, this constraint
+can be relaxed to producing interior derived pointers provided the target 
+collector can find the associated allocation from an arbitrary interior 
+derived pointer.
 
 In practice, RewriteStatepointsForGC can be run much later in the pass 
 pipeline, after most optimization is already done.  This helps to improve 
@@ -654,8 +717,8 @@ This pass would produce the following IR:
 .. code-block:: llvm
 
   define void @test() gc "statepoint-example" {
-    %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-    %safepoint_token1 = call i32 (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+    %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+    %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
     ret void
   }
 
@@ -699,6 +762,12 @@ deoptimization or introspection) at safepoints.  In that case, ask on the
 llvm-dev mailing list for suggestions.
 
 
+Supported Architectures
+=======================
+
+Support for statepoint generation requires some code for each backend.
+Today, only X86_64 is supported.  
+
 Bugs and Enhancements
 =====================
 
diff --git a/docs/TestingGuide.rst b/docs/TestingGuide.rst
index adf5f3d4cfbd..134ddd88c87d 100644
--- a/docs/TestingGuide.rst
+++ b/docs/TestingGuide.rst
@@ -240,6 +240,10 @@ The recommended way to examine output to figure out if the test passes is using
 the :doc:`FileCheck tool <CommandGuide/FileCheck>`. *[The usage of grep in RUN
 lines is deprecated - please do not send or commit patches that use it.]*
 
+Put related tests into a single file rather than having a separate file per
+test. Check if there are files already covering your feature and consider
+adding your code there instead of creating a new file.
+
 Extra files
 -----------
 
diff --git a/docs/WritingAnLLVMPass.rst b/docs/WritingAnLLVMPass.rst
index 1d5a52f21b3f..241066842b7b 100644
--- a/docs/WritingAnLLVMPass.rst
+++ b/docs/WritingAnLLVMPass.rst
@@ -47,14 +47,11 @@ source tree in the ``lib/Transforms/Hello`` directory.
 Setting up the build environment
 --------------------------------
 
-.. FIXME: Why does this recommend to build in-tree?
-
-First, configure and build LLVM.  This needs to be done directly inside the
-LLVM source tree rather than in a separate objects directory.  Next, you need
-to create a new directory somewhere in the LLVM source base.  For this example,
-we'll assume that you made ``lib/Transforms/Hello``.  Finally, you must set up
-a build script (``Makefile``) that will compile the source code for the new
-pass.  To do this, copy the following into ``Makefile``:
+First, configure and build LLVM.  Next, you need to create a new directory
+somewhere in the LLVM source base.  For this example, we'll assume that you
+made ``lib/Transforms/Hello``.  Finally, you must set up a build script
+(``Makefile``) that will compile the source code for the new pass.  To do this,
+copy the following into ``Makefile``:
 
 .. code-block:: make
 
@@ -206,9 +203,8 @@ As a whole, the ``.cpp`` file looks like:
     static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
 
 Now that it's all together, compile the file with a simple "``gmake``" command
-in the local directory and you should get a new file
-"``Debug+Asserts/lib/Hello.so``" under the top level directory of the LLVM
-source tree (not in the local directory).  Note that everything in this file is
+from the top level of your build directory and you should get a new file
+"``Debug+Asserts/lib/Hello.so``".  Note that everything in this file is
 contained in an anonymous namespace --- this reflects the fact that passes
 are self contained units that do not need external interfaces (although they
 can have them) to be useful.
@@ -228,7 +224,7 @@ will work):
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null
   Hello: __main
   Hello: puts
   Hello: main
@@ -245,7 +241,7 @@ To see what happened to the other string you registered, try running
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -help
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -help
   OVERVIEW: llvm .bc -> .bc modular optimizer
 
   USAGE: opt [options] <input bitcode>
@@ -272,7 +268,7 @@ you queue up.  For example:
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
   Hello: __main
   Hello: puts
   Hello: main
@@ -1092,7 +1088,7 @@ passes.  Lets try it out with the gcse and licm passes:
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
   Module Pass Manager
     Function Pass Manager
       Dominator Set Construction
@@ -1129,7 +1125,7 @@ Lets see how this changes when we run the :ref:`Hello World
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
   Module Pass Manager
     Function Pass Manager
       Dominator Set Construction
@@ -1170,7 +1166,7 @@ Now when we run our pass, we get this output:
 
 .. code-block:: console
 
-  $ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
+  $ opt -load ../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
   Pass Arguments:  -gcse -hello -licm
   Module Pass Manager
     Function Pass Manager
diff --git a/docs/_ocamldoc/style.css b/docs/_ocamldoc/style.css
new file mode 100644
index 000000000000..00595d7f2f29
--- /dev/null
+++ b/docs/_ocamldoc/style.css
@@ -0,0 +1,97 @@
+/* A style for ocamldoc. Daniel C. Buenzli */
+
+/* Reset a few things. */
+html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,
+a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp,
+small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,
+form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td
+{ margin: 0; padding: 0; border: 0 none; outline: 0; font-size: 100%;
+  font-weight: inherit; font-style:inherit; font-family:inherit;
+  line-height: inherit; vertical-align: baseline; text-align:inherit;
+  color:inherit; background: transparent; }
+
+table { border-collapse: collapse; border-spacing: 0; }
+
+/* Basic page layout */
+
+body { font: normal 10pt/1.375em helvetica, arial, sans-serif; text-align:left;
+       margin: 1.375em 10%; min-width: 40ex; max-width: 72ex;
+       color: black; background: transparent /* url(line-height-22.gif) */; }
+
+b { font-weight: bold }
+em { font-style: italic }
+
+tt, code, pre { font-family: WorkAroundWebKitAndMozilla, monospace;
+                font-size: 1em; }
+pre code { font-size : inherit; }
+.codepre { margin-bottom:1.375em /* after code example we introduce space. */ }
+
+.superscript,.subscript
+{ font-size : 0.813em; line-height:0; margin-left:0.4ex;}
+.superscript { vertical-align: super; }
+.subscript { vertical-align: sub; }
+
+/* ocamldoc markup workaround hacks */
+
+
+
+hr, hr + br, div + br, center + br, span + br, ul + br, ol + br, pre + br
+{ display: none } /* annoying */
+
+div.info + br { display:block}
+
+.codepre br + br { display: none }
+h1 + pre { margin-bottom:1.375em} /* Toplevel module description */
+
+/* Sections and document divisions */
+
+/* .navbar { margin-bottom: -1.375em } */
+h1 { font-weight: bold; font-size: 1.5em; /* margin-top:1.833em; */
+     margin-top:0.917em; padding-top:0.875em;
+     border-top-style:solid; border-width:1px; border-color:#AAA; }
+h2 { font-weight: bold; font-size: 1.313em; margin-top: 1.048em }
+h3 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em }
+h3 { font-weight: bold; font-size: 1em; margin-top: 1.375em}
+h4 { font-style: italic;  }
+
+/* Used by OCaml's own library documentation. */
+ h6 { font-weight: bold; font-size: 1.125em; margin-top: 1.222em }
+ .h7 { font-weight: bold; font-size: 1em; margin-top: 1.375em }
+
+p { margin-top: 1.375em }
+pre { margin-top: 1.375em }
+.info { margin: 0.458em 0em -0.458em 2em;}/* Description of types values etc. */
+td .info { margin:0; padding:0; margin-left: 2em;} /* Description in indexes */
+
+ul, ol { margin-top:0.688em; padding-bottom:0.687em;
+   list-style-position:outside}
+ul + p, ol + p { margin-top: 0em }
+ul { list-style-type: square }
+
+
+/* h2 + ul, h3 + ul, p + ul { } */
+ul > li { margin-left: 1.375em; }
+ol > li { margin-left: 1.7em; }
+/* Links */
+
+a, a:link, a:visited, a:active, a:hover { color : #00B; text-decoration: none }
+a:hover { text-decoration : underline }
+*:target {background-color: #FFFF99;} /* anchor highlight */
+
+/* Code */
+
+.keyword { font-weight: bold; }
+.comment { color : red }
+.constructor { color : green }
+.string { color : brown }
+.warning { color : red ; font-weight : bold }
+
+/* Functors */
+
+.paramstable { border-style : hidden ; padding-bottom:1.375em}
+.paramstable code { margin-left: 1ex; margin-right: 1ex }
+.sig_block {margin-left: 1em}
+
+/* Images */
+
+img { margin-top: 1.375em }
diff --git a/docs/conf.py b/docs/conf.py
index 27919c20a7a5..6e3f16ceef1a 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -48,9 +48,9 @@ copyright = u'2003-%d, LLVM Project' % date.today().year
 # built documents.
 #
 # The short X.Y version.
-version = '3.7'
+version = '3.8'
 # The full version, including alpha/beta/rc tags.
-release = '3.7'
+release = '3.8'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
index 5c70db0332d5..5a74cecc8aac 100644
--- a/docs/doxygen.cfg.in
+++ b/docs/doxygen.cfg.in
@@ -1409,7 +1409,7 @@ FORMULA_TRANSPARENT    = YES
 # The default value is: NO.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-USE_MATHJAX            = NO
+USE_MATHJAX            = YES
 
 # When MathJax is enabled you can set the default output format to be used for
 # the MathJax output. See the MathJax site (see:
diff --git a/docs/index.rst b/docs/index.rst
index 66c55758c4db..a69ecfedc580 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,6 +1,11 @@
 Overview
 ========
 
+.. warning::
+
+   If you are using a released version of LLVM, see `the download page
+   <http://llvm.org/releases/>`_ to find your documentation.
+
 The LLVM compiler infrastructure supports a wide range of projects, from
 industrial strength compilers to specialized JIT applications to small
 research projects.
@@ -81,6 +86,7 @@ representation.
    GetElementPtr
    Frontend/PerformanceTips
    MCJITDesignAndImplementation
+   CompileCudaWithLLVM
 
 :doc:`GettingStarted`
    Discusses how to get up and running quickly with the LLVM infrastructure.
@@ -256,6 +262,7 @@ For API clients and LLVM developers.
    MergeFunctions
    BitSets
    FaultMaps
+   MIRLangRef
 
 :doc:`WritingAnLLVMPass`
    Information on how to write LLVM transformations and analyses.
@@ -268,6 +275,10 @@ For API clients and LLVM developers.
    working on retargetting LLVM to a new architecture, designing a new codegen
    pass, or enhancing existing components.
 
+:doc:`Machine IR (MIR) Format Reference Manual <MIRLangRef>`
+   A reference manual for the MIR serialization format, which is used to test
+   LLVM's code generation passes.
+
 :doc:`TableGen <TableGen/index>`
    Describes the TableGen tool, which is used heavily by the LLVM code
    generator.
@@ -361,6 +372,9 @@ For API clients and LLVM developers.
 :doc:`FaultMaps`
   LLVM support for folding control flow into faulting machine instructions.
 
+:doc:`CompileCudaWithLLVM`
+  LLVM support for CUDA.
+
 Development Process Documentation
 =================================
 
diff --git a/docs/tutorial/LangImpl1.rst b/docs/tutorial/LangImpl1.rst
index f4b019166af3..b04cde10274e 100644
--- a/docs/tutorial/LangImpl1.rst
+++ b/docs/tutorial/LangImpl1.rst
@@ -25,7 +25,7 @@ It is useful to point out ahead of time that this tutorial is really
 about teaching compiler techniques and LLVM specifically, *not* about
 teaching modern and sane software engineering principles. In practice,
 this means that we'll take a number of shortcuts to simplify the
-exposition. For example, the code leaks memory, uses global variables
+exposition. For example, the code uses global variables
 all over the place, doesn't use nice design patterns like
 `visitors <http://en.wikipedia.org/wiki/Visitor_pattern>`_, etc... but
 it is very simple. If you dig in and use the code as a basis for future
@@ -146,7 +146,7 @@ useful for mutually recursive functions). For example:
 
 A more interesting example is included in Chapter 6 where we write a
 little Kaleidoscope application that `displays a Mandelbrot
-Set <LangImpl6.html#example>`_ at various levels of magnification.
+Set <LangImpl6.html#kicking-the-tires>`_ at various levels of magnification.
 
 Lets dive into the implementation of this language!
 
@@ -169,14 +169,16 @@ numeric value of a number). First, we define the possibilities:
       tok_eof = -1,
 
       // commands
-      tok_def = -2, tok_extern = -3,
+      tok_def = -2,
+      tok_extern = -3,
 
       // primary
-      tok_identifier = -4, tok_number = -5,
+      tok_identifier = -4,
+      tok_number = -5,
     };
 
-    static std::string IdentifierStr;  // Filled in if tok_identifier
-    static double NumVal;              // Filled in if tok_number
+    static std::string IdentifierStr; // Filled in if tok_identifier
+    static double NumVal;             // Filled in if tok_number
 
 Each token returned by our lexer will either be one of the Token enum
 values or it will be an 'unknown' character like '+', which is returned
@@ -217,8 +219,10 @@ loop:
         while (isalnum((LastChar = getchar())))
           IdentifierStr += LastChar;
 
-        if (IdentifierStr == "def") return tok_def;
-        if (IdentifierStr == "extern") return tok_extern;
+        if (IdentifierStr == "def")
+          return tok_def;
+        if (IdentifierStr == "extern")
+          return tok_extern;
         return tok_identifier;
       }
 
@@ -250,7 +254,8 @@ extend it :). Next we handle comments:
 
       if (LastChar == '#') {
         // Comment until end of line.
-        do LastChar = getchar();
+        do
+          LastChar = getchar();
         while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
 
         if (LastChar != EOF)
@@ -275,7 +280,7 @@ file. These are handled with this code:
     }
 
 With this, we have the complete lexer for the basic Kaleidoscope
-language (the `full code listing <LangImpl2.html#code>`_ for the Lexer
+language (the `full code listing <LangImpl2.html#full-code-listing>`_ for the Lexer
 is available in the `next chapter <LangImpl2.html>`_ of the tutorial).
 Next we'll `build a simple parser that uses this to build an Abstract
 Syntax Tree <LangImpl2.html>`_. When we have that, we'll include a
diff --git a/docs/tutorial/LangImpl2.rst b/docs/tutorial/LangImpl2.rst
index 06b18ff6c239..dab60172b988 100644
--- a/docs/tutorial/LangImpl2.rst
+++ b/docs/tutorial/LangImpl2.rst
@@ -44,8 +44,9 @@ We'll start with expressions first:
     /// NumberExprAST - Expression class for numeric literals like "1.0".
     class NumberExprAST : public ExprAST {
       double Val;
+
     public:
-      NumberExprAST(double val) : Val(val) {}
+      NumberExprAST(double Val) : Val(Val) {}
     };
 
 The code above shows the definition of the base ExprAST class and one
@@ -65,26 +66,31 @@ language:
     /// VariableExprAST - Expression class for referencing a variable, like "a".
     class VariableExprAST : public ExprAST {
       std::string Name;
+
     public:
-      VariableExprAST(const std::string &name) : Name(name) {}
+      VariableExprAST(const std::string &Name) : Name(Name) {}
     };
 
     /// BinaryExprAST - Expression class for a binary operator.
     class BinaryExprAST : public ExprAST {
       char Op;
-      ExprAST *LHS, *RHS;
+      std::unique_ptr<ExprAST> LHS, RHS;
+
     public:
-      BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
-        : Op(op), LHS(lhs), RHS(rhs) {}
+      BinaryExprAST(char op, std::unique_ptr<ExprAST> LHS,
+                    std::unique_ptr<ExprAST> RHS)
+        : Op(op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
     };
 
     /// CallExprAST - Expression class for function calls.
     class CallExprAST : public ExprAST {
       std::string Callee;
-      std::vector<ExprAST*> Args;
+      std::vector<std::unique_ptr<ExprAST>> Args;
+
     public:
-      CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-        : Callee(callee), Args(args) {}
+      CallExprAST(const std::string &Callee,
+                  std::vector<std::unique_ptr<ExprAST>> Args)
+        : Callee(Callee), Args(std::move(Args)) {}
     };
 
 This is all (intentionally) rather straight-forward: variables capture
@@ -109,18 +115,21 @@ way to talk about functions themselves:
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
+
     public:
-      PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-        : Name(name), Args(args) {}
+      PrototypeAST(const std::string &name, std::vector<std::string> Args)
+        : Name(name), Args(std::move(Args)) {}
     };
 
     /// FunctionAST - This class represents a function definition itself.
     class FunctionAST {
-      PrototypeAST *Proto;
-      ExprAST *Body;
+      std::unique_ptr<PrototypeAST> Proto;
+      std::unique_ptr<ExprAST> Body;
+
     public:
-      FunctionAST(PrototypeAST *proto, ExprAST *body)
-        : Proto(proto), Body(body) {}
+      FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+                  std::unique_ptr<ExprAST> Body)
+        : Proto(std::move(Proto)), Body(std::move(Body)) {}
     };
 
 In Kaleidoscope, functions are typed with just a count of their
@@ -142,9 +151,10 @@ be generated with calls like this:
 
 .. code-block:: c++
 
-      ExprAST *X = new VariableExprAST("x");
-      ExprAST *Y = new VariableExprAST("y");
-      ExprAST *Result = new BinaryExprAST('+', X, Y);
+      auto LHS = llvm::make_unique<VariableExprAST>("x");
+      auto RHS = llvm::make_unique<VariableExprAST>("y");
+      auto Result = std::make_unique<BinaryExprAST>('+', std::move(LHS),
+                                                    std::move(RHS));
 
 In order to do this, we'll start by defining some basic helper routines:
 
@@ -167,9 +177,14 @@ be parsed.
 
 
     /// Error* - These are little helper functions for error handling.
-    ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-    PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-    FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+    std::unique_ptr<ExprAST> Error(const char *Str) {
+      fprintf(stderr, "Error: %s\n", Str);
+      return nullptr;
+    }
+    std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+      Error(Str);
+      return nullptr;
+    }
 
 The ``Error`` routines are simple helper routines that our parser will
 use to handle errors. The error recovery in our parser will not be the
@@ -190,10 +205,10 @@ which parses that production. For numeric literals, we have:
 .. code-block:: c++
 
     /// numberexpr ::= number
-    static ExprAST *ParseNumberExpr() {
-      ExprAST *Result = new NumberExprAST(NumVal);
+    static std::unique_ptr<ExprAST> ParseNumberExpr() {
+      auto Result = llvm::make_unique<NumberExprAST>(NumVal);
       getNextToken(); // consume the number
-      return Result;
+      return std::move(Result);
     }
 
 This routine is very simple: it expects to be called when the current
@@ -211,14 +226,15 @@ the parenthesis operator is defined like this:
 .. code-block:: c++
 
     /// parenexpr ::= '(' expression ')'
-    static ExprAST *ParseParenExpr() {
-      getNextToken();  // eat (.
-      ExprAST *V = ParseExpression();
-      if (!V) return 0;
+    static std::unique_ptr<ExprAST> ParseParenExpr() {
+      getNextToken(); // eat (.
+      auto V = ParseExpression();
+      if (!V)
+        return nullptr;
 
       if (CurTok != ')')
         return Error("expected ')'");
-      getNextToken();  // eat ).
+      getNextToken(); // eat ).
       return V;
     }
 
@@ -250,24 +266,26 @@ function calls:
     /// identifierexpr
     ///   ::= identifier
     ///   ::= identifier '(' expression* ')'
-    static ExprAST *ParseIdentifierExpr() {
+    static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
       std::string IdName = IdentifierStr;
 
       getNextToken();  // eat identifier.
 
       if (CurTok != '(') // Simple variable ref.
-        return new VariableExprAST(IdName);
+        return llvm::make_unique<VariableExprAST>(IdName);
 
       // Call.
       getNextToken();  // eat (
-      std::vector<ExprAST*> Args;
+      std::vector<std::unique_ptr<ExprAST>> Args;
       if (CurTok != ')') {
         while (1) {
-          ExprAST *Arg = ParseExpression();
-          if (!Arg) return 0;
-          Args.push_back(Arg);
+          if (auto Arg = ParseExpression())
+            Args.push_back(std::move(Arg));
+          else
+            return nullptr;
 
-          if (CurTok == ')') break;
+          if (CurTok == ')')
+            break;
 
           if (CurTok != ',')
             return Error("Expected ')' or ',' in argument list");
@@ -278,7 +296,7 @@ function calls:
       // Eat the ')'.
       getNextToken();
 
-      return new CallExprAST(IdName, Args);
+      return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
     }
 
 This routine follows the same style as the other routines. (It expects
@@ -294,7 +312,7 @@ Now that we have all of our simple expression-parsing logic in place, we
 can define a helper function to wrap it together into one entry point.
 We call this class of expressions "primary" expressions, for reasons
 that will become more clear `later in the
-tutorial <LangImpl6.html#unary>`_. In order to parse an arbitrary
+tutorial <LangImpl6.html#user-defined-unary-operators>`_. In order to parse an arbitrary
 primary expression, we need to determine what sort of expression it is:
 
 .. code-block:: c++
@@ -303,12 +321,16 @@ primary expression, we need to determine what sort of expression it is:
     ///   ::= identifierexpr
     ///   ::= numberexpr
     ///   ::= parenexpr
-    static ExprAST *ParsePrimary() {
+    static std::unique_ptr<ExprAST> ParsePrimary() {
       switch (CurTok) {
-      default: return Error("unknown token when expecting an expression");
-      case tok_identifier: return ParseIdentifierExpr();
-      case tok_number:     return ParseNumberExpr();
-      case '(':            return ParseParenExpr();
+      default:
+        return Error("unknown token when expecting an expression");
+      case tok_identifier:
+        return ParseIdentifierExpr();
+      case tok_number:
+        return ParseNumberExpr();
+      case '(':
+        return ParseParenExpr();
       }
     }
 
@@ -374,7 +396,7 @@ would be easy enough to eliminate the map and do the comparisons in the
 With the helper above defined, we can now start parsing binary
 expressions. The basic idea of operator precedence parsing is to break
 down an expression with potentially ambiguous binary operators into
-pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+pieces. Consider, for example, the expression "a+b+(c+d)\*e\*f+g".
 Operator precedence parsing considers this as a stream of primary
 expressions separated by binary operators. As such, it will first parse
 the leading primary expression "a", then it will see the pairs [+, b]
@@ -390,11 +412,12 @@ a sequence of [binop,primaryexpr] pairs:
     /// expression
     ///   ::= primary binoprhs
     ///
-    static ExprAST *ParseExpression() {
-      ExprAST *LHS = ParsePrimary();
-      if (!LHS) return 0;
+    static std::unique_ptr<ExprAST> ParseExpression() {
+      auto LHS = ParsePrimary();
+      if (!LHS)
+        return nullptr;
 
-      return ParseBinOpRHS(0, LHS);
+      return ParseBinOpRHS(0, std::move(LHS));
     }
 
 ``ParseBinOpRHS`` is the function that parses the sequence of pairs for
@@ -416,7 +439,8 @@ starts with:
 
     /// binoprhs
     ///   ::= ('+' primary)*
-    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+    static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                                  std::unique_ptr<ExprAST> LHS) {
       // If this is a binop, find its precedence.
       while (1) {
         int TokPrec = GetTokPrecedence();
@@ -440,8 +464,9 @@ expression:
         getNextToken();  // eat binop
 
         // Parse the primary expression after the binary operator.
-        ExprAST *RHS = ParsePrimary();
-        if (!RHS) return 0;
+        auto RHS = ParsePrimary();
+        if (!RHS)
+          return nullptr;
 
 As such, this code eats (and remembers) the binary operator and then
 parses the primary expression that follows. This builds up the whole
@@ -474,7 +499,8 @@ then continue parsing:
         }
 
         // Merge LHS/RHS.
-        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+        LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
+                                               std::move(RHS));
       }  // loop around to the top of the while loop.
     }
 
@@ -498,11 +524,13 @@ above two blocks duplicated for context):
         // the pending operator take RHS as its LHS.
         int NextPrec = GetTokPrecedence();
         if (TokPrec < NextPrec) {
-          RHS = ParseBinOpRHS(TokPrec+1, RHS);
-          if (RHS == 0) return 0;
+          RHS = ParseBinOpRHS(TokPrec+1, std::move(RHS));
+          if (!RHS)
+            return nullptr;
         }
         // Merge LHS/RHS.
-        LHS = new BinaryExprAST(BinOp, LHS, RHS);
+        LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
+                                               std::move(RHS));
       }  // loop around to the top of the while loop.
     }
 
@@ -541,7 +569,7 @@ expressions):
 
     /// prototype
     ///   ::= id '(' id* ')'
-    static PrototypeAST *ParsePrototype() {
+    static std::unique_ptr<PrototypeAST> ParsePrototype() {
       if (CurTok != tok_identifier)
         return ErrorP("Expected function name in prototype");
 
@@ -561,7 +589,7 @@ expressions):
       // success.
       getNextToken();  // eat ')'.
 
-      return new PrototypeAST(FnName, ArgNames);
+      return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
     }
 
 Given this, a function definition is very simple, just a prototype plus
@@ -570,14 +598,14 @@ an expression to implement the body:
 .. code-block:: c++
 
     /// definition ::= 'def' prototype expression
-    static FunctionAST *ParseDefinition() {
+    static std::unique_ptr<FunctionAST> ParseDefinition() {
       getNextToken();  // eat def.
-      PrototypeAST *Proto = ParsePrototype();
-      if (Proto == 0) return 0;
+      auto Proto = ParsePrototype();
+      if (!Proto) return nullptr;
 
-      if (ExprAST *E = ParseExpression())
-        return new FunctionAST(Proto, E);
-      return 0;
+      if (auto E = ParseExpression())
+        return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+      return nullptr;
     }
 
 In addition, we support 'extern' to declare functions like 'sin' and
@@ -587,7 +615,7 @@ In addition, we support 'extern' to declare functions like 'sin' and
 .. code-block:: c++
 
     /// external ::= 'extern' prototype
-    static PrototypeAST *ParseExtern() {
+    static std::unique_ptr<PrototypeAST> ParseExtern() {
       getNextToken();  // eat extern.
       return ParsePrototype();
     }
@@ -599,13 +627,13 @@ nullary (zero argument) functions for them:
 .. code-block:: c++
 
     /// toplevelexpr ::= expression
-    static FunctionAST *ParseTopLevelExpr() {
-      if (ExprAST *E = ParseExpression()) {
+    static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+      if (auto E = ParseExpression()) {
         // Make an anonymous proto.
-        PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-        return new FunctionAST(Proto, E);
+        auto Proto = llvm::make_unique<PrototypeAST>("", std::vector<std::string>());
+        return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
       }
-      return 0;
+      return nullptr;
     }
 
 Now that we have all the pieces, let's build a little driver that will
@@ -616,7 +644,7 @@ The Driver
 
 The driver for this simply invokes all of the parsing pieces with a
 top-level dispatch loop. There isn't much interesting here, so I'll just
-include the top-level loop. See `below <#code>`_ for full code in the
+include the top-level loop. See `below <#full-code-listing>`_ for full code in the
 "Top-Level Parsing" section.
 
 .. code-block:: c++
@@ -626,11 +654,20 @@ include the top-level loop. See `below <#code>`_ for full code in the
       while (1) {
         fprintf(stderr, "ready> ");
         switch (CurTok) {
-        case tok_eof:    return;
-        case ';':        getNextToken(); break;  // ignore top-level semicolons.
-        case tok_def:    HandleDefinition(); break;
-        case tok_extern: HandleExtern(); break;
-        default:         HandleTopLevelExpression(); break;
+        case tok_eof:
+          return;
+        case ';': // ignore top-level semicolons.
+          getNextToken();
+          break;
+        case tok_def:
+          HandleDefinition();
+          break;
+        case tok_extern:
+          HandleExtern();
+          break;
+        default:
+          HandleTopLevelExpression();
+          break;
         }
       }
     }
diff --git a/docs/tutorial/LangImpl3.rst b/docs/tutorial/LangImpl3.rst
index 26ba4aae956c..83ad35f14aee 100644
--- a/docs/tutorial/LangImpl3.rst
+++ b/docs/tutorial/LangImpl3.rst
@@ -15,8 +15,8 @@ LLVM IR. This will teach you a little bit about how LLVM does things, as
 well as demonstrate how easy it is to use. It's much more work to build
 a lexer and parser than it is to generate LLVM IR code. :)
 
-**Please note**: the code in this chapter and later require LLVM 2.2 or
-later. LLVM 2.1 and before will not work with it. Also note that you
+**Please note**: the code in this chapter and later require LLVM 3.7 or
+later. LLVM 3.6 and before will not work with it. Also note that you
 need to use a version of this tutorial that matches your LLVM release:
 If you are using an official LLVM release, use the version of the
 documentation included with your release or on the `llvm.org releases
@@ -35,19 +35,20 @@ class:
     class ExprAST {
     public:
       virtual ~ExprAST() {}
-      virtual Value *Codegen() = 0;
+      virtual Value *codegen() = 0;
     };
 
     /// NumberExprAST - Expression class for numeric literals like "1.0".
     class NumberExprAST : public ExprAST {
       double Val;
+
     public:
-      NumberExprAST(double val) : Val(val) {}
-      virtual Value *Codegen();
+      NumberExprAST(double Val) : Val(Val) {}
+      virtual Value *codegen();
     };
     ...
 
-The Codegen() method says to emit IR for that AST node along with all
+The codegen() method says to emit IR for that AST node along with all
 the things it depends on, and they all return an LLVM Value object.
 "Value" is the class used to represent a "`Static Single Assignment
 (SSA) <http://en.wikipedia.org/wiki/Static_single_assignment_form>`_
@@ -72,16 +73,20 @@ parser, which will be used to report errors found during code generation
 
 .. code-block:: c++
 
-    Value *ErrorV(const char *Str) { Error(Str); return 0; }
-
-    static Module *TheModule;
+    static std::unique_ptr<Module> *TheModule;
     static IRBuilder<> Builder(getGlobalContext());
     static std::map<std::string, Value*> NamedValues;
 
+    Value *ErrorV(const char *Str) {
+      Error(Str);
+      return nullptr;
+    }
+
 The static variables will be used during code generation. ``TheModule``
-is the LLVM construct that contains all of the functions and global
-variables in a chunk of code. In many ways, it is the top-level
-structure that the LLVM IR uses to contain code.
+is an LLVM construct that contains functions and global variables. In many
+ways, it is the top-level structure that the LLVM IR uses to contain code.
+It will own the memory for all of the IR that we generate, which is why
+the codegen() method returns a raw Value\*, rather than a unique_ptr<Value>.
 
 The ``Builder`` object is a helper object that makes it easy to generate
 LLVM instructions. Instances of the
@@ -110,7 +115,7 @@ First we'll do numeric literals:
 
 .. code-block:: c++
 
-    Value *NumberExprAST::Codegen() {
+    Value *NumberExprAST::codegen() {
       return ConstantFP::get(getGlobalContext(), APFloat(Val));
     }
 
@@ -124,10 +129,12 @@ are all uniqued together and shared. For this reason, the API uses the
 
 .. code-block:: c++
 
-    Value *VariableExprAST::Codegen() {
+    Value *VariableExprAST::codegen() {
       // Look this variable up in the function.
       Value *V = NamedValues[Name];
-      return V ? V : ErrorV("Unknown variable name");
+      if (!V)
+        ErrorV("Unknown variable name");
+      return V;
     }
 
 References to variables are also quite simple using LLVM. In the simple
@@ -137,26 +144,31 @@ values that can be in the ``NamedValues`` map are function arguments.
 This code simply checks to see that the specified name is in the map (if
 not, an unknown variable is being referenced) and returns the value for
 it. In future chapters, we'll add support for `loop induction
-variables <LangImpl5.html#for>`_ in the symbol table, and for `local
-variables <LangImpl7.html#localvars>`_.
+variables <LangImpl5.html#for-loop-expression>`_ in the symbol table, and for `local
+variables <LangImpl7.html#user-defined-local-variables>`_.
 
 .. code-block:: c++
 
-    Value *BinaryExprAST::Codegen() {
-      Value *L = LHS->Codegen();
-      Value *R = RHS->Codegen();
-      if (L == 0 || R == 0) return 0;
+    Value *BinaryExprAST::codegen() {
+      Value *L = LHS->codegen();
+      Value *R = RHS->codegen();
+      if (!L || !R)
+        return nullptr;
 
       switch (Op) {
-      case '+': return Builder.CreateFAdd(L, R, "addtmp");
-      case '-': return Builder.CreateFSub(L, R, "subtmp");
-      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '+':
+        return Builder.CreateFAdd(L, R, "addtmp");
+      case '-':
+        return Builder.CreateFSub(L, R, "subtmp");
+      case '*':
+        return Builder.CreateFMul(L, R, "multmp");
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
         return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
                                     "booltmp");
-      default: return ErrorV("invalid binary operator");
+      default:
+        return ErrorV("invalid binary operator");
       }
     }
 
@@ -178,55 +190,55 @@ automatically provide each one with an increasing, unique numeric
 suffix. Local value names for instructions are purely optional, but it
 makes it much easier to read the IR dumps.
 
-`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+`LLVM instructions <../LangRef.html#instruction-reference>`_ are constrained by strict
 rules: for example, the Left and Right operators of an `add
-instruction <../LangRef.html#i_add>`_ must have the same type, and the
+instruction <../LangRef.html#add-instruction>`_ must have the same type, and the
 result type of the add must match the operand types. Because all values
 in Kaleidoscope are doubles, this makes for very simple code for add,
 sub and mul.
 
 On the other hand, LLVM specifies that the `fcmp
-instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+instruction <../LangRef.html#fcmp-instruction>`_ always returns an 'i1' value (a
 one bit integer). The problem with this is that Kaleidoscope wants the
 value to be a 0.0 or 1.0 value. In order to get these semantics, we
 combine the fcmp instruction with a `uitofp
-instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+instruction <../LangRef.html#uitofp-to-instruction>`_. This instruction converts its
 input integer into a floating point value by treating the input as an
 unsigned value. In contrast, if we used the `sitofp
-instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+instruction <../LangRef.html#sitofp-to-instruction>`_, the Kaleidoscope '<' operator
 would return 0.0 and -1.0, depending on the input value.
 
 .. code-block:: c++
 
-    Value *CallExprAST::Codegen() {
+    Value *CallExprAST::codegen() {
       // Look up the name in the global module table.
       Function *CalleeF = TheModule->getFunction(Callee);
-      if (CalleeF == 0)
+      if (!CalleeF)
         return ErrorV("Unknown function referenced");
 
       // If argument mismatch error.
       if (CalleeF->arg_size() != Args.size())
         return ErrorV("Incorrect # arguments passed");
 
-      std::vector<Value*> ArgsV;
+      std::vector<Value *> ArgsV;
       for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-        ArgsV.push_back(Args[i]->Codegen());
-        if (ArgsV.back() == 0) return 0;
+        ArgsV.push_back(Args[i]->codegen());
+        if (!ArgsV.back())
+          return nullptr;
       }
 
       return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
     }
 
-Code generation for function calls is quite straightforward with LLVM.
-The code above initially does a function name lookup in the LLVM
-Module's symbol table. Recall that the LLVM Module is the container that
-holds all of the functions we are JIT'ing. By giving each function the
-same name as what the user specifies, we can use the LLVM symbol table
-to resolve function names for us.
+Code generation for function calls is quite straightforward with LLVM. The code
+above initially does a function name lookup in the LLVM Module's symbol table.
+Recall that the LLVM Module is the container that holds the functions we are
+JIT'ing. By giving each function the same name as what the user specifies, we
+can use the LLVM symbol table to resolve function names for us.
 
 Once we have the function to call, we recursively codegen each argument
 that is to be passed in, and create an LLVM `call
-instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+instruction <../LangRef.html#call-instruction>`_. Note that LLVM uses the native C
 calling conventions by default, allowing these calls to also call into
 standard library functions like "sin" and "cos", with no additional
 effort.
@@ -249,14 +261,15 @@ with:
 
 .. code-block:: c++
 
-    Function *PrototypeAST::Codegen() {
+    Function *PrototypeAST::codegen() {
       // Make the function type:  double(double,double) etc.
       std::vector<Type*> Doubles(Args.size(),
                                  Type::getDoubleTy(getGlobalContext()));
-      FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                           Doubles, false);
+      FunctionType *FT =
+        FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
-      Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+      Function *F =
+        Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
 
 This code packs a lot of power into a few lines. Note first that this
 function returns a "Function\*" instead of a "Value\*". Because a
@@ -273,118 +286,67 @@ double as a result, and that is not vararg (the false parameter
 indicates this). Note that Types in LLVM are uniqued just like Constants
 are, so you don't "new" a type, you "get" it.
 
-The final line above actually creates the function that the prototype
-will correspond to. This indicates the type, linkage and name to use, as
+The final line above actually creates the IR Function corresponding to
+the Prototype. This indicates the type, linkage and name to use, as
 well as which module to insert into. "`external
 linkage <../LangRef.html#linkage>`_" means that the function may be
 defined outside the current module and/or that it is callable by
 functions outside the module. The Name passed in is the name the user
 specified: since "``TheModule``" is specified, this name is registered
-in "``TheModule``"s symbol table, which is used by the function call
-code above.
+in "``TheModule``"s symbol table.
 
 .. code-block:: c++
 
-      // If F conflicted, there was already something named 'Name'.  If it has a
-      // body, don't allow redefinition or reextern.
-      if (F->getName() != Name) {
-        // Delete the one we just made and get the existing one.
-        F->eraseFromParent();
-        F = TheModule->getFunction(Name);
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
 
-The Module symbol table works just like the Function symbol table when
-it comes to name conflicts: if a new function is created with a name
-that was previously added to the symbol table, the new function will get
-implicitly renamed when added to the Module. The code above exploits
-this fact to determine if there was a previous definition of this
-function.
+  return F;
 
-In Kaleidoscope, I choose to allow redefinitions of functions in two
-cases: first, we want to allow 'extern'ing a function more than once, as
-long as the prototypes for the externs match (since all arguments have
-the same type, we just have to check that the number of arguments
-match). Second, we want to allow 'extern'ing a function and then
-defining a body for it. This is useful when defining mutually recursive
-functions.
+Finally, we set the name of each of the function's arguments according to the
+names given in the Prototype. This step isn't strictly necessary, but keeping
+the names consistent makes the IR more readable, and allows subsequent code to
+refer directly to the arguments for their names, rather than having to look up
+them up in the Prototype AST.
 
-In order to implement this, the code above first checks to see if there
-is a collision on the name of the function. If so, it deletes the
-function we just created (by calling ``eraseFromParent``) and then
-calling ``getFunction`` to get the existing function with the specified
-name. Note that many APIs in LLVM have "erase" forms and "remove" forms.
-The "remove" form unlinks the object from its parent (e.g. a Function
-from a Module) and returns it. The "erase" form unlinks the object and
-then deletes it.
+At this point we have a function prototype with no body. This is how LLVM IR
+represents function declarations. For extern statements in Kaleidoscope, this
+is as far as we need to go. For function definitions however, we need to
+codegen and attach a function body.
 
 .. code-block:: c++
 
-        // If F already has a body, reject this.
-        if (!F->empty()) {
-          ErrorF("redefinition of function");
-          return 0;
-        }
+  Function *FunctionAST::codegen() {
+      // First, check for an existing function from a previous 'extern' declaration.
+    Function *TheFunction = TheModule->getFunction(Proto->getName());
 
-        // If F took a different number of args, reject.
-        if (F->arg_size() != Args.size()) {
-          ErrorF("redefinition of function with different # args");
-          return 0;
-        }
-      }
+    if (!TheFunction)
+      TheFunction = Proto->codegen();
 
-In order to verify the logic above, we first check to see if the
-pre-existing function is "empty". In this case, empty means that it has
-no basic blocks in it, which means it has no body. If it has no body, it
-is a forward declaration. Since we don't allow anything after a full
-definition of the function, the code rejects this case. If the previous
-reference to a function was an 'extern', we simply verify that the
-number of arguments for that definition and this one match up. If not,
-we emit an error.
+    if (!TheFunction)
+      return nullptr;
+
+    if (!TheFunction->empty())
+      return (Function*)ErrorV("Function cannot be redefined.");
+
+
+For function definitions, we start by searching TheModule's symbol table for an
+existing version of this function, in case one has already been created using an
+'extern' statement. If Module::getFunction returns null then no previous version
+exists, so we'll codegen one from the Prototype. In either case, we want to
+assert that the function is empty (i.e. has no body yet) before we start.
 
 .. code-block:: c++
 
-      // Set names for all arguments.
-      unsigned Idx = 0;
-      for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-           ++AI, ++Idx) {
-        AI->setName(Args[Idx]);
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
 
-        // Add arguments to variable symbol table.
-        NamedValues[Args[Idx]] = AI;
-      }
-      return F;
-    }
-
-The last bit of code for prototypes loops over all of the arguments in
-the function, setting the name of the LLVM Argument objects to match,
-and registering the arguments in the ``NamedValues`` map for future use
-by the ``VariableExprAST`` AST node. Once this is set up, it returns the
-Function object to the caller. Note that we don't check for conflicting
-argument names here (e.g. "extern foo(a b a)"). Doing so would be very
-straight-forward with the mechanics we have already used above.
-
-.. code-block:: c++
-
-    Function *FunctionAST::Codegen() {
-      NamedValues.clear();
-
-      Function *TheFunction = Proto->Codegen();
-      if (TheFunction == 0)
-        return 0;
-
-Code generation for function definitions starts out simply enough: we
-just codegen the prototype (Proto) and verify that it is ok. We then
-clear out the ``NamedValues`` map to make sure that there isn't anything
-in it from the last function we compiled. Code generation of the
-prototype ensures that there is an LLVM Function object that is ready to
-go for us.
-
-.. code-block:: c++
-
-      // Create a new basic block to start insertion into.
-      BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-      Builder.SetInsertPoint(BB);
-
-      if (Value *RetVal = Body->Codegen()) {
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[Arg.getName()] = &Arg;
 
 Now we get to the point where the ``Builder`` is set up. The first line
 creates a new `basic block <http://en.wikipedia.org/wiki/Basic_block>`_
@@ -396,9 +358,12 @@ Graph <http://en.wikipedia.org/wiki/Control_flow_graph>`_. Since we
 don't have any control flow, our functions will only contain one block
 at this point. We'll fix this in `Chapter 5 <LangImpl5.html>`_ :).
 
+Next we add the function arguments to the NamedValues map (after first clearing
+it out) so that they're accessible to ``VariableExprAST`` nodes.
+
 .. code-block:: c++
 
-      if (Value *RetVal = Body->Codegen()) {
+      if (Value *RetVal = Body->codegen()) {
         // Finish off the function.
         Builder.CreateRet(RetVal);
 
@@ -408,11 +373,11 @@ at this point. We'll fix this in `Chapter 5 <LangImpl5.html>`_ :).
         return TheFunction;
       }
 
-Once the insertion point is set up, we call the ``CodeGen()`` method for
-the root expression of the function. If no error happens, this emits
-code to compute the expression into the entry block and returns the
-value that was computed. Assuming no error, we then create an LLVM `ret
-instruction <../LangRef.html#i_ret>`_, which completes the function.
+Once the insertion point has been set up and the NamedValues map populated,
+we call the ``codegen()`` method for the root expression of the function. If no
+error happens, this emits code to compute the expression into the entry block
+and returns the value that was computed. Assuming no error, we then create an
+LLVM `ret instruction <../LangRef.html#ret-instruction>`_, which completes the function.
 Once the function is built, we call ``verifyFunction``, which is
 provided by LLVM. This function does a variety of consistency checks on
 the generated code, to determine if our compiler is doing everything
@@ -423,7 +388,7 @@ function is finished and validated, we return it.
 
       // Error reading body, remove function.
       TheFunction->eraseFromParent();
-      return 0;
+      return nullptr;
     }
 
 The only piece left here is handling of the error case. For simplicity,
@@ -432,23 +397,25 @@ we handle this by merely deleting the function we produced with the
 that they incorrectly typed in before: if we didn't delete it, it would
 live in the symbol table, with a body, preventing future redefinition.
 
-This code does have a bug, though. Since the ``PrototypeAST::Codegen``
-can return a previously defined forward declaration, our code can
-actually delete a forward declaration. There are a number of ways to fix
-this bug, see what you can come up with! Here is a testcase:
+This code does have a bug, though: If the ``FunctionAST::codegen()`` method
+finds an existing IR Function, it does not validate its signature against the
+definition's own prototype. This means that an earlier 'extern' declaration will
+take precedence over the function definition's signature, which can cause
+codegen to fail, for instance if the function arguments are named differently.
+There are a number of ways to fix this bug, see what you can come up with! Here
+is a testcase:
 
 ::
 
-    extern foo(a b);     # ok, defines foo.
-    def foo(a b) c;      # error, 'c' is invalid.
-    def bar() foo(1, 2); # error, unknown function "foo"
+    extern foo(a);     # ok, defines foo.
+    def foo(b) b;      # Error: Unknown variable name. (decl using 'a' takes precedence).
 
 Driver Changes and Closing Thoughts
 ===================================
 
 For now, code generation to LLVM doesn't really get us much, except that
 we can look at the pretty IR calls. The sample code inserts calls to
-Codegen into the "``HandleDefinition``", "``HandleExtern``" etc
+codegen into the "``HandleDefinition``", "``HandleExtern``" etc
 functions, and then dumps out the LLVM IR. This gives a nice way to look
 at the LLVM IR for simple functions. For example:
 
@@ -463,10 +430,10 @@ at the LLVM IR for simple functions. For example:
 
 Note how the parser turns the top-level expression into anonymous
 functions for us. This will be handy when we add `JIT
-support <LangImpl4.html#jit>`_ in the next chapter. Also note that the
+support <LangImpl4.html#adding-a-jit-compiler>`_ in the next chapter. Also note that the
 code is very literally transcribed, no optimizations are being performed
 except simple constant folding done by IRBuilder. We will `add
-optimizations <LangImpl4.html#trivialconstfold>`_ explicitly in the next
+optimizations <LangImpl4.html#trivial-constant-folding>`_ explicitly in the next
 chapter.
 
 ::
diff --git a/docs/tutorial/LangImpl4.rst b/docs/tutorial/LangImpl4.rst
index cdaac634dd76..a671d0c37f9d 100644
--- a/docs/tutorial/LangImpl4.rst
+++ b/docs/tutorial/LangImpl4.rst
@@ -120,57 +120,53 @@ exactly the code we have now, except that we would defer running the
 optimizer until the entire file has been parsed.
 
 In order to get per-function optimizations going, we need to set up a
-`FunctionPassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold
+`FunctionPassManager <../WritingAnLLVMPass.html#what-passmanager-doesr>`_ to hold
 and organize the LLVM optimizations that we want to run. Once we have
-that, we can add a set of optimizations to run. The code looks like
-this:
+that, we can add a set of optimizations to run. We'll need a new
+FunctionPassManager for each module that we want to optimize, so we'll
+write a function to create and initialize both the module and pass manager
+for us:
 
 .. code-block:: c++
 
-      FunctionPassManager OurFPM(TheModule);
+    void InitializeModuleAndPassManager(void) {
+      // Open a new module.
+      TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+      TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+      // Create a new pass manager attached to it.
+      TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
 
-      // Set up the optimizer pipeline.  Start with registering info about how the
-      // target lays out data structures.
-      OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
       // Provide basic AliasAnalysis support for GVN.
-      OurFPM.add(createBasicAliasAnalysisPass());
+      TheFPM.add(createBasicAliasAnalysisPass());
       // Do simple "peephole" optimizations and bit-twiddling optzns.
-      OurFPM.add(createInstructionCombiningPass());
+      TheFPM.add(createInstructionCombiningPass());
       // Reassociate expressions.
-      OurFPM.add(createReassociatePass());
+      TheFPM.add(createReassociatePass());
       // Eliminate Common SubExpressions.
-      OurFPM.add(createGVNPass());
+      TheFPM.add(createGVNPass());
       // Simplify the control flow graph (deleting unreachable blocks, etc).
-      OurFPM.add(createCFGSimplificationPass());
+      TheFPM.add(createCFGSimplificationPass());
 
-      OurFPM.doInitialization();
+      TheFPM.doInitialization();
+    }
 
-      // Set the global so the code gen can use this.
-      TheFPM = &OurFPM;
+This code initializes the global module ``TheModule``, and the function pass
+manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is
+set up, we use a series of "add" calls to add a bunch of LLVM passes.
 
-      // Run the main "interpreter loop" now.
-      MainLoop();
-
-This code defines a ``FunctionPassManager``, "``OurFPM``". It requires a
-pointer to the ``Module`` to construct itself. Once it is set up, we use
-a series of "add" calls to add a bunch of LLVM passes. The first pass is
-basically boilerplate, it adds a pass so that later optimizations know
-how the data structures in the program are laid out. The
-"``TheExecutionEngine``" variable is related to the JIT, which we will
-get to in the next section.
-
-In this case, we choose to add 4 optimization passes. The passes we
-chose here are a pretty standard set of "cleanup" optimizations that are
-useful for a wide variety of code. I won't delve into what they do but,
-believe me, they are a good starting place :).
+In this case, we choose to add five passes: one analysis pass (alias analysis),
+and four optimization passes. The passes we choose here are a pretty standard set
+of "cleanup" optimizations that are useful for a wide variety of code. I won't
+delve into what they do but, believe me, they are a good starting place :).
 
 Once the PassManager is set up, we need to make use of it. We do this by
 running it after our newly created function is constructed (in
-``FunctionAST::Codegen``), but before it is returned to the client:
+``FunctionAST::codegen()``), but before it is returned to the client:
 
 .. code-block:: c++
 
-      if (Value *RetVal = Body->Codegen()) {
+      if (Value *RetVal = Body->codegen()) {
         // Finish off the function.
         Builder.CreateRet(RetVal);
 
@@ -231,55 +227,85 @@ should evaluate and print out 3. If they define a function, they should
 be able to call it from the command line.
 
 In order to do this, we first declare and initialize the JIT. This is
-done by adding a global variable and a call in ``main``:
+done by adding a global variable ``TheJIT``, and initializing it in
+``main``:
 
 .. code-block:: c++
 
-    static ExecutionEngine *TheExecutionEngine;
+    static std::unique_ptr<KaleidoscopeJIT> TheJIT;
     ...
     int main() {
       ..
-      // Create the JIT.  This takes ownership of the module.
-      TheExecutionEngine = EngineBuilder(TheModule).create();
-      ..
+      TheJIT = llvm::make_unique<KaleidoscopeJIT>();
+
+      // Run the main "interpreter loop" now.
+      MainLoop();
+
+      return 0;
     }
 
-This creates an abstract "Execution Engine" which can be either a JIT
-compiler or the LLVM interpreter. LLVM will automatically pick a JIT
-compiler for you if one is available for your platform, otherwise it
-will fall back to the interpreter.
+The KaleidoscopeJIT class is a simple JIT built specifically for these
+tutorials. In later chapters we will look at how it works and extend it with
+new features, but for now we will take it as given. Its API is very simple::
+``addModule`` adds an LLVM IR module to the JIT, making its functions
+available for execution; ``removeModule`` removes a module, freeing any
+memory associated with the code in that module; and ``findSymbol`` allows us
+to look up pointers to the compiled code.
 
-Once the ``ExecutionEngine`` is created, the JIT is ready to be used.
-There are a variety of APIs that are useful, but the simplest one is the
-"``getPointerToFunction(F)``" method. This method JIT compiles the
-specified LLVM Function and returns a function pointer to the generated
-machine code. In our case, this means that we can change the code that
-parses a top-level expression to look like this:
+We can take this simple API and change our code that parses top-level expressions to
+look like this:
 
 .. code-block:: c++
 
     static void HandleTopLevelExpression() {
       // Evaluate a top-level expression into an anonymous function.
-      if (FunctionAST *F = ParseTopLevelExpr()) {
-        if (Function *LF = F->Codegen()) {
-          LF->dump();  // Dump the function for exposition purposes.
+      if (auto FnAST = ParseTopLevelExpr()) {
+        if (FnAST->codegen()) {
 
-          // JIT the function, returning a function pointer.
-          void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+          // JIT the module containing the anonymous expression, keeping a handle so
+          // we can free it later.
+          auto H = TheJIT->addModule(std::move(TheModule));
+          InitializeModuleAndPassManager();
 
-          // Cast it to the right type (takes no arguments, returns a double) so we
-          // can call it as a native function.
-          double (*FP)() = (double (*)())(intptr_t)FPtr;
+          // Search the JIT for the __anon_expr symbol.
+          auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+          assert(ExprSymbol && "Function not found");
+
+          // Get the symbol's address and cast it to the right type (takes no
+          // arguments, returns a double) so we can call it as a native function.
+          double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
           fprintf(stderr, "Evaluated to %f\n", FP());
+
+          // Delete the anonymous expression module from the JIT.
+          TheJIT->removeModule(H);
         }
 
-Recall that we compile top-level expressions into a self-contained LLVM
-function that takes no arguments and returns the computed double.
-Because the LLVM JIT compiler matches the native platform ABI, this
-means that you can just cast the result pointer to a function pointer of
-that type and call it directly. This means, there is no difference
-between JIT compiled code and native machine code that is statically
-linked into your application.
+If parsing and codegen succeeed, the next step is to add the module containing
+the top-level expression to the JIT. We do this by calling addModule, which
+triggers code generation for all the functions in the module, and returns a
+handle that can be used to remove the module from the JIT later. Once the module
+has been added to the JIT it can no longer be modified, so we also open a new
+module to hold subsequent code by calling ``InitializeModuleAndPassManager()``.
+
+Once we've added the module to the JIT we need to get a pointer to the final
+generated code. We do this by calling the JIT's findSymbol method, and passing
+the name of the top-level expression function: ``__anon_expr``. Since we just
+added this function, we assert that findSymbol returned a result.
+
+Next, we get the in-memory address of the ``__anon_expr`` function by calling
+``getAddress()`` on the symbol. Recall that we compile top-level expressions
+into a self-contained LLVM function that takes no arguments and returns the
+computed double. Because the LLVM JIT compiler matches the native platform ABI,
+this means that you can just cast the result pointer to a function pointer of
+that type and call it directly. This means, there is no difference between JIT
+compiled code and native machine code that is statically linked into your
+application.
+
+Finally, since we don't support re-evaluation of top-level expressions, we
+remove the module from the JIT when we're done to free the associated memory.
+Recall, however, that the module we created a few lines earlier (via
+``InitializeModuleAndPassManager``) is still open and waiting for new code to be
+added.
 
 With just these two changes, lets see how Kaleidoscope works now!
 
@@ -320,19 +346,161 @@ demonstrates very basic functionality, but can we do more?
 
     Evaluated to 24.000000
 
-This illustrates that we can now call user code, but there is something
-a bit subtle going on here. Note that we only invoke the JIT on the
-anonymous functions that *call testfunc*, but we never invoked it on
-*testfunc* itself. What actually happened here is that the JIT scanned
-for all non-JIT'd functions transitively called from the anonymous
-function and compiled all of them before returning from
-``getPointerToFunction()``.
+    ready> testfunc(5, 10);
+    ready> LLVM ERROR: Program used external function 'testfunc' which could not be resolved!
 
-The JIT provides a number of other more advanced interfaces for things
-like freeing allocated machine code, rejit'ing functions to update them,
-etc. However, even with this simple code, we get some surprisingly
-powerful capabilities - check this out (I removed the dump of the
-anonymous functions, you should get the idea by now :) :
+
+Function definitions and calls also work, but something went very wrong on that
+last line. The call looks valid, so what happened? As you may have guessed from
+the the API a Module is a unit of allocation for the JIT, and testfunc was part
+of the same module that contained anonymous expression. When we removed that
+module from the JIT to free the memory for the anonymous expression, we deleted
+the definition of ``testfunc`` along with it. Then, when we tried to call
+testfunc a second time, the JIT could no longer find it.
+
+The easiest way to fix this is to put the anonymous expression in a separate
+module from the rest of the function definitions. The JIT will happily resolve
+function calls across module boundaries, as long as each of the functions called
+has a prototype, and is added to the JIT before it is called. By putting the
+anonymous expression in a different module we can delete it without affecting
+the rest of the functions.
+
+In fact, we're going to go a step further and put every function in its own
+module. Doing so allows us to exploit a useful property of the KaleidoscopeJIT
+that will make our environment more REPL-like: Functions can be added to the
+JIT more than once (unlike a module where every function must have a unique
+definition). When you look up a symbol in KaleidoscopeJIT it will always return
+the most recent definition:
+
+::
+
+    ready> def foo(x) x + 1;
+    Read function definition:
+    define double @foo(double %x) {
+    entry:
+      %addtmp = fadd double %x, 1.000000e+00
+      ret double %addtmp
+    }
+
+    ready> foo(2);
+    Evaluated to 3.000000
+
+    ready> def foo(x) x + 2;
+    define double @foo(double %x) {
+    entry:
+      %addtmp = fadd double %x, 2.000000e+00
+      ret double %addtmp
+    }
+
+    ready> foo(2);
+    Evaluated to 4.000000
+
+
+To allow each function to live in its own module we'll need a way to
+re-generate previous function declarations into each new module we open:
+
+.. code-block:: c++
+
+    static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+
+    ...
+
+    Function *getFunction(std::string Name) {
+      // First, see if the function has already been added to the current module.
+      if (auto *F = TheModule->getFunction(Name))
+        return F;
+
+      // If not, check whether we can codegen the declaration from some existing
+      // prototype.
+      auto FI = FunctionProtos.find(Name);
+      if (FI != FunctionProtos.end())
+        return FI->second->codegen();
+
+      // If no existing prototype exists, return null.
+      return nullptr;
+    }
+
+    ...
+
+    Value *CallExprAST::codegen() {
+      // Look up the name in the global module table.
+      Function *CalleeF = getFunction(Callee);
+
+    ...
+
+    Function *FunctionAST::codegen() {
+      // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+      // reference to it for use below.
+      auto &P = *Proto;
+      FunctionProtos[Proto->getName()] = std::move(Proto);
+      Function *TheFunction = getFunction(P.getName());
+      if (!TheFunction)
+        return nullptr;
+
+
+To enable this, we'll start by adding a new global, ``FunctionProtos``, that
+holds the most recent prototype for each function. We'll also add a convenience
+method, ``getFunction()``, to replace calls to ``TheModule->getFunction()``.
+Our convenience method searches ``TheModule`` for an existing function
+declaration, falling back to generating a new declaration from FunctionProtos if
+it doesn't find one. In ``CallExprAST::codegen()`` we just need to replace the
+call to ``TheModule->getFunction()``. In ``FunctionAST::codegen()`` we need to
+update the FunctionProtos map first, then call ``getFunction()``. With this
+done, we can always obtain a function declaration in the current module for any
+previously declared function.
+
+We also need to update HandleDefinition and HandleExtern:
+
+.. code-block:: c++
+
+    static void HandleDefinition() {
+      if (auto FnAST = ParseDefinition()) {
+        if (auto *FnIR = FnAST->codegen()) {
+          fprintf(stderr, "Read function definition:");
+          FnIR->dump();
+          TheJIT->addModule(std::move(TheModule));
+          InitializeModuleAndPassManager();
+        }
+      } else {
+        // Skip token for error recovery.
+         getNextToken();
+      }
+    }
+
+    static void HandleExtern() {
+      if (auto ProtoAST = ParseExtern()) {
+        if (auto *FnIR = ProtoAST->codegen()) {
+          fprintf(stderr, "Read extern: ");
+          FnIR->dump();
+          FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
+        }
+      } else {
+        // Skip token for error recovery.
+        getNextToken();
+      }
+    }
+
+In HandleDefinition, we add two lines to transfer the newly defined function to
+the JIT and open a new module. In HandleExtern, we just need to add one line to
+add the prototype to FunctionProtos.
+
+With these changes made, lets try our REPL again (I removed the dump of the
+anonymous functions this time, you should get the idea by now :) :
+
+::
+
+    ready> def foo(x) x + 1;
+    ready> foo(2);
+    Evaluated to 3.000000
+
+    ready> def foo(x) x + 2;
+    ready> foo(2);
+    Evaluated to 4.000000
+
+It works!
+
+Even with this simple code, we get some surprisingly powerful capabilities -
+check this out:
 
 ::
 
@@ -375,34 +543,30 @@ anonymous functions, you should get the idea by now :) :
 
     Evaluated to 1.000000
 
-Whoa, how does the JIT know about sin and cos? The answer is
-surprisingly simple: in this example, the JIT started execution of a
-function and got to a function call. It realized that the function was
-not yet JIT compiled and invoked the standard set of routines to resolve
-the function. In this case, there is no body defined for the function,
-so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
-process itself. Since "``sin``" is defined within the JIT's address
-space, it simply patches up calls in the module to call the libm version
-of ``sin`` directly.
+Whoa, how does the JIT know about sin and cos? The answer is surprisingly
+simple: The KaleidoscopeJIT has a straightforward symbol resolution rule that
+it uses to find symbols that aren't available in any given module: First
+it searches all the modules that have already been added to the JIT, from the
+most recent to the oldest, to find the newest definition. If no definition is
+found inside the JIT, it falls back to calling "``dlsym("sin")``" on the
+Kaleidoscope process itself. Since "``sin``" is defined within the JIT's
+address space, it simply patches up calls in the module to call the libm
+version of ``sin`` directly.
 
-The LLVM JIT provides a number of interfaces (look in the
-``ExecutionEngine.h`` file) for controlling how unknown functions get
-resolved. It allows you to establish explicit mappings between IR
-objects and addresses (useful for LLVM global variables that you want to
-map to static tables, for example), allows you to dynamically decide on
-the fly based on the function name, and even allows you to have the JIT
-compile functions lazily the first time they're called.
+In the future we'll see how tweaking this symbol resolution rule can be used to
+enable all sorts of useful features, from security (restricting the set of
+symbols available to JIT'd code), to dynamic code generation based on symbol
+names, and even lazy compilation.
 
-One interesting application of this is that we can now extend the
-language by writing arbitrary C++ code to implement operations. For
-example, if we add:
+One immediate benefit of the symbol resolution rule is that we can now extend
+the language by writing arbitrary C++ code to implement operations. For example,
+if we add:
 
 .. code-block:: c++
 
     /// putchard - putchar that takes a double and returns 0.
-    extern "C"
-    double putchard(double X) {
-      putchar((char)X);
+    extern "C" double putchard(double X) {
+      fputc((char)X, stderr);
       return 0;
     }
 
diff --git a/docs/tutorial/LangImpl5.rst b/docs/tutorial/LangImpl5.rst
index ca2ffebc19a2..d916f92bf99e 100644
--- a/docs/tutorial/LangImpl5.rst
+++ b/docs/tutorial/LangImpl5.rst
@@ -66,7 +66,9 @@ for the relevant tokens:
 .. code-block:: c++
 
       // control
-      tok_if = -6, tok_then = -7, tok_else = -8,
+      tok_if = -6,
+      tok_then = -7,
+      tok_else = -8,
 
 Once we have that, we recognize the new keywords in the lexer. This is
 pretty simple stuff:
@@ -74,11 +76,16 @@ pretty simple stuff:
 .. code-block:: c++
 
         ...
-        if (IdentifierStr == "def") return tok_def;
-        if (IdentifierStr == "extern") return tok_extern;
-        if (IdentifierStr == "if") return tok_if;
-        if (IdentifierStr == "then") return tok_then;
-        if (IdentifierStr == "else") return tok_else;
+        if (IdentifierStr == "def")
+          return tok_def;
+        if (IdentifierStr == "extern")
+          return tok_extern;
+        if (IdentifierStr == "if")
+          return tok_if;
+        if (IdentifierStr == "then")
+          return tok_then;
+        if (IdentifierStr == "else")
+          return tok_else;
         return tok_identifier;
 
 AST Extensions for If/Then/Else
@@ -90,11 +97,13 @@ To represent the new expression we add a new AST node for it:
 
     /// IfExprAST - Expression class for if/then/else.
     class IfExprAST : public ExprAST {
-      ExprAST *Cond, *Then, *Else;
+      std::unique_ptr<ExprAST> Cond, Then, Else;
+
     public:
-      IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-        : Cond(cond), Then(then), Else(_else) {}
-      virtual Value *Codegen();
+      IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+                std::unique_ptr<ExprAST> Else)
+        : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+      virtual Value *codegen();
     };
 
 The AST node just has pointers to the various subexpressions.
@@ -109,42 +118,51 @@ First we define a new parsing function:
 .. code-block:: c++
 
     /// ifexpr ::= 'if' expression 'then' expression 'else' expression
-    static ExprAST *ParseIfExpr() {
+    static std::unique_ptr<ExprAST> ParseIfExpr() {
       getNextToken();  // eat the if.
 
       // condition.
-      ExprAST *Cond = ParseExpression();
-      if (!Cond) return 0;
+      auto Cond = ParseExpression();
+      if (!Cond)
+        return nullptr;
 
       if (CurTok != tok_then)
         return Error("expected then");
       getNextToken();  // eat the then
 
-      ExprAST *Then = ParseExpression();
-      if (Then == 0) return 0;
+      auto Then = ParseExpression();
+      if (!Then)
+        return nullptr;
 
       if (CurTok != tok_else)
         return Error("expected else");
 
       getNextToken();
 
-      ExprAST *Else = ParseExpression();
-      if (!Else) return 0;
+      auto Else = ParseExpression();
+      if (!Else)
+        return nullptr;
 
-      return new IfExprAST(Cond, Then, Else);
+      return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                          std::move(Else));
     }
 
 Next we hook it up as a primary expression:
 
 .. code-block:: c++
 
-    static ExprAST *ParsePrimary() {
+    static std::unique_ptr<ExprAST> ParsePrimary() {
       switch (CurTok) {
-      default: return Error("unknown token when expecting an expression");
-      case tok_identifier: return ParseIdentifierExpr();
-      case tok_number:     return ParseNumberExpr();
-      case '(':            return ParseParenExpr();
-      case tok_if:         return ParseIfExpr();
+      default:
+        return Error("unknown token when expecting an expression");
+      case tok_identifier:
+        return ParseIdentifierExpr();
+      case tok_number:
+        return ParseNumberExpr();
+      case '(':
+        return ParseParenExpr();
+      case tok_if:
+        return ParseIfExpr();
       }
     }
 
@@ -196,7 +214,7 @@ Kaleidoscope looks like this:
 To visualize the control flow graph, you can use a nifty feature of the
 LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
 IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
-window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+window will pop up <../ProgrammersManual.html#viewing-graphs-while-debugging-code>`_ and you'll
 see this graph:
 
 .. figure:: LangImpl5-cfg.png
@@ -262,19 +280,19 @@ Okay, enough of the motivation and overview, lets generate code!
 Code Generation for If/Then/Else
 --------------------------------
 
-In order to generate code for this, we implement the ``Codegen`` method
+In order to generate code for this, we implement the ``codegen`` method
 for ``IfExprAST``:
 
 .. code-block:: c++
 
-    Value *IfExprAST::Codegen() {
-      Value *CondV = Cond->Codegen();
-      if (CondV == 0) return 0;
+    Value *IfExprAST::codegen() {
+      Value *CondV = Cond->codegen();
+      if (!CondV)
+        return nullptr;
 
       // Convert condition to a bool by comparing equal to 0.0.
-      CondV = Builder.CreateFCmpONE(CondV,
-                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                    "ifcond");
+      CondV = Builder.CreateFCmpONE(
+          CondV, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "ifcond");
 
 This code is straightforward and similar to what we saw before. We emit
 the expression for the condition, then compare that value to zero to get
@@ -286,7 +304,8 @@ a truth value as a 1-bit (bool) value.
 
       // Create blocks for the then and else cases.  Insert the 'then' block at the
       // end of the function.
-      BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+      BasicBlock *ThenBB =
+          BasicBlock::Create(getGlobalContext(), "then", TheFunction);
       BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
       BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
 
@@ -318,8 +337,9 @@ that LLVM supports forward references.
       // Emit then value.
       Builder.SetInsertPoint(ThenBB);
 
-      Value *ThenV = Then->Codegen();
-      if (ThenV == 0) return 0;
+      Value *ThenV = Then->codegen();
+      if (!ThenV)
+        return nullptr;
 
       Builder.CreateBr(MergeBB);
       // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
@@ -349,7 +369,7 @@ of the block in the CFG. Why then, are we getting the current block when
 we just set it to ThenBB 5 lines above? The problem is that the "Then"
 expression may actually itself change the block that the Builder is
 emitting into if, for example, it contains a nested "if/then/else"
-expression. Because calling Codegen recursively could arbitrarily change
+expression. Because calling ``codegen()`` recursively could arbitrarily change
 the notion of the current block, we are required to get an up-to-date
 value for code that will set up the Phi node.
 
@@ -359,11 +379,12 @@ value for code that will set up the Phi node.
       TheFunction->getBasicBlockList().push_back(ElseBB);
       Builder.SetInsertPoint(ElseBB);
 
-      Value *ElseV = Else->Codegen();
-      if (ElseV == 0) return 0;
+      Value *ElseV = Else->codegen();
+      if (!ElseV)
+        return nullptr;
 
       Builder.CreateBr(MergeBB);
-      // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+      // codegen of 'Else' can change the current block, update ElseBB for the PHI.
       ElseBB = Builder.GetInsertBlock();
 
 Code generation for the 'else' block is basically identical to codegen
@@ -378,8 +399,8 @@ code:
       // Emit merge block.
       TheFunction->getBasicBlockList().push_back(MergeBB);
       Builder.SetInsertPoint(MergeBB);
-      PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
-                                      "iftmp");
+      PHINode *PN =
+        Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp");
 
       PN->addIncoming(ThenV, ThenBB);
       PN->addIncoming(ElseV, ElseBB);
@@ -444,13 +465,20 @@ The lexer extensions are the same sort of thing as for if/then/else:
       tok_for = -9, tok_in = -10
 
       ... in gettok ...
-      if (IdentifierStr == "def") return tok_def;
-      if (IdentifierStr == "extern") return tok_extern;
-      if (IdentifierStr == "if") return tok_if;
-      if (IdentifierStr == "then") return tok_then;
-      if (IdentifierStr == "else") return tok_else;
-      if (IdentifierStr == "for") return tok_for;
-      if (IdentifierStr == "in") return tok_in;
+      if (IdentifierStr == "def")
+        return tok_def;
+      if (IdentifierStr == "extern")
+        return tok_extern;
+      if (IdentifierStr == "if")
+        return tok_if;
+      if (IdentifierStr == "then")
+        return tok_then;
+      if (IdentifierStr == "else")
+        return tok_else;
+      if (IdentifierStr == "for")
+        return tok_for;
+      if (IdentifierStr == "in")
+        return tok_in;
       return tok_identifier;
 
 AST Extensions for the 'for' Loop
@@ -464,12 +492,15 @@ variable name and the constituent expressions in the node.
     /// ForExprAST - Expression class for for/in.
     class ForExprAST : public ExprAST {
       std::string VarName;
-      ExprAST *Start, *End, *Step, *Body;
+      std::unique_ptr<ExprAST> Start, End, Step, Body;
+
     public:
-      ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-                 ExprAST *step, ExprAST *body)
-        : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-      virtual Value *Codegen();
+      ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+                 std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+                 std::unique_ptr<ExprAST> Body)
+        : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+          Step(std::move(Step)), Body(std::move(Body)) {}
+      virtual Value *codegen();
     };
 
 Parser Extensions for the 'for' Loop
@@ -483,7 +514,7 @@ value to null in the AST node:
 .. code-block:: c++
 
     /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-    static ExprAST *ParseForExpr() {
+    static std::unique_ptr<ExprAST> ParseForExpr() {
       getNextToken();  // eat the for.
 
       if (CurTok != tok_identifier)
@@ -497,31 +528,37 @@ value to null in the AST node:
       getNextToken();  // eat '='.
 
 
-      ExprAST *Start = ParseExpression();
-      if (Start == 0) return 0;
+      auto Start = ParseExpression();
+      if (!Start)
+        return nullptr;
       if (CurTok != ',')
         return Error("expected ',' after for start value");
       getNextToken();
 
-      ExprAST *End = ParseExpression();
-      if (End == 0) return 0;
+      auto End = ParseExpression();
+      if (!End)
+        return nullptr;
 
       // The step value is optional.
-      ExprAST *Step = 0;
+      std::unique_ptr<ExprAST> Step;
       if (CurTok == ',') {
         getNextToken();
         Step = ParseExpression();
-        if (Step == 0) return 0;
+        if (!Step)
+          return nullptr;
       }
 
       if (CurTok != tok_in)
         return Error("expected 'in' after for");
       getNextToken();  // eat 'in'.
 
-      ExprAST *Body = ParseExpression();
-      if (Body == 0) return 0;
+      auto Body = ParseExpression();
+      if (!Body)
+        return nullptr;
 
-      return new ForExprAST(IdName, Start, End, Step, Body);
+      return llvm::make_unique<ForExprAST>(IdName, std::move(Start),
+                                           std::move(End), std::move(Step),
+                                           std::move(Body));
     }
 
 LLVM IR for the 'for' Loop
@@ -565,14 +602,14 @@ together.
 Code Generation for the 'for' Loop
 ----------------------------------
 
-The first part of Codegen is very simple: we just output the start
+The first part of codegen is very simple: we just output the start
 expression for the loop value:
 
 .. code-block:: c++
 
-    Value *ForExprAST::Codegen() {
+    Value *ForExprAST::codegen() {
       // Emit the start code first, without 'variable' in scope.
-      Value *StartVal = Start->Codegen();
+      Value *StartVal = Start->codegen();
       if (StartVal == 0) return 0;
 
 With this out of the way, the next step is to set up the LLVM basic
@@ -587,7 +624,8 @@ expression).
       // block.
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
       BasicBlock *PreheaderBB = Builder.GetInsertBlock();
-      BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+      BasicBlock *LoopBB =
+          BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
 
       // Insert an explicit fall through from the current block to the LoopBB.
       Builder.CreateBr(LoopBB);
@@ -604,7 +642,8 @@ the two blocks.
       Builder.SetInsertPoint(LoopBB);
 
       // Start the PHI node with an entry for Start.
-      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str());
+      PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                            2, VarName.c_str());
       Variable->addIncoming(StartVal, PreheaderBB);
 
 Now that the "preheader" for the loop is set up, we switch to emitting
@@ -624,8 +663,8 @@ backedge, but we can't set it up yet (because it doesn't exist!).
       // Emit the body of the loop.  This, like any other expr, can change the
       // current BB.  Note that we ignore the value computed by the body, but don't
       // allow an error.
-      if (Body->Codegen() == 0)
-        return 0;
+      if (!Body->codegen())
+        return nullptr;
 
 Now the code starts to get more interesting. Our 'for' loop introduces a
 new variable to the symbol table. This means that our symbol table can
@@ -647,10 +686,11 @@ table.
 .. code-block:: c++
 
       // Emit the step value.
-      Value *StepVal;
+      Value *StepVal = nullptr;
       if (Step) {
-        StepVal = Step->Codegen();
-        if (StepVal == 0) return 0;
+        StepVal = Step->codegen();
+        if (!StepVal)
+          return nullptr;
       } else {
         // If not specified, use 1.0.
         StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
@@ -666,13 +706,13 @@ iteration of the loop.
 .. code-block:: c++
 
       // Compute the end condition.
-      Value *EndCond = End->Codegen();
-      if (EndCond == 0) return EndCond;
+      Value *EndCond = End->codegen();
+      if (!EndCond)
+        return nullptr;
 
       // Convert condition to a bool by comparing equal to 0.0.
-      EndCond = Builder.CreateFCmpONE(EndCond,
-                                  ConstantFP::get(getGlobalContext(), APFloat(0.0)),
-                                      "loopcond");
+      EndCond = Builder.CreateFCmpONE(
+          EndCond, ConstantFP::get(getGlobalContext(), APFloat(0.0)), "loopcond");
 
 Finally, we evaluate the exit value of the loop, to determine whether
 the loop should exit. This mirrors the condition evaluation for the
@@ -682,7 +722,8 @@ if/then/else statement.
 
       // Create the "after loop" block and insert it.
       BasicBlock *LoopEndBB = Builder.GetInsertBlock();
-      BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+      BasicBlock *AfterBB =
+          BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
 
       // Insert the conditional branch into the end of LoopEndBB.
       Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
@@ -718,7 +759,7 @@ value, we can add the incoming value to the loop PHI node. After that,
 we remove the loop variable from the symbol table, so that it isn't in
 scope after the for loop. Finally, code generation of the for loop
 always returns 0.0, so that is what we return from
-``ForExprAST::Codegen``.
+``ForExprAST::codegen()``.
 
 With this, we conclude the "adding control flow to Kaleidoscope" chapter
 of the tutorial. In this chapter we added two control flow constructs,
diff --git a/docs/tutorial/LangImpl6.rst b/docs/tutorial/LangImpl6.rst
index bf78bdea74d6..827cd392effb 100644
--- a/docs/tutorial/LangImpl6.rst
+++ b/docs/tutorial/LangImpl6.rst
@@ -24,7 +24,7 @@ is good or bad. In this tutorial we'll assume that it is okay to use
 this as a way to show some interesting parsing techniques.
 
 At the end of this tutorial, we'll run through an example Kaleidoscope
-application that `renders the Mandelbrot set <#example>`_. This gives an
+application that `renders the Mandelbrot set <#kicking-the-tires>`_. This gives an
 example of what you can build with Kaleidoscope and its feature set.
 
 User-defined Operators: the Idea
@@ -96,19 +96,24 @@ keywords:
     enum Token {
       ...
       // operators
-      tok_binary = -11, tok_unary = -12
+      tok_binary = -11,
+      tok_unary = -12
     };
     ...
     static int gettok() {
     ...
-        if (IdentifierStr == "for") return tok_for;
-        if (IdentifierStr == "in") return tok_in;
-        if (IdentifierStr == "binary") return tok_binary;
-        if (IdentifierStr == "unary") return tok_unary;
+        if (IdentifierStr == "for")
+          return tok_for;
+        if (IdentifierStr == "in")
+          return tok_in;
+        if (IdentifierStr == "binary")
+          return tok_binary;
+        if (IdentifierStr == "unary")
+          return tok_unary;
         return tok_identifier;
 
 This just adds lexer support for the unary and binary keywords, like we
-did in `previous chapters <LangImpl5.html#iflexer>`_. One nice thing
+did in `previous chapters <LangImpl5.html#lexer-extensions-for-if-then-else>`_. One nice thing
 about our current AST, is that we represent binary operators with full
 generalisation by using their ASCII code as the opcode. For our extended
 operators, we'll use this same representation, so we don't need any new
@@ -129,15 +134,17 @@ this:
     class PrototypeAST {
       std::string Name;
       std::vector<std::string> Args;
-      bool isOperator;
+      bool IsOperator;
       unsigned Precedence;  // Precedence if a binary op.
-    public:
-      PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-                   bool isoperator = false, unsigned prec = 0)
-      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
 
-      bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-      bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+    public:
+      PrototypeAST(const std::string &name, std::vector<std::string> Args,
+                   bool IsOperator = false, unsigned Prec = 0)
+      : Name(name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+
+      bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+      bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
 
       char getOperatorName() const {
         assert(isUnaryOp() || isBinaryOp());
@@ -146,7 +153,7 @@ this:
 
       unsigned getBinaryPrecedence() const { return Precedence; }
 
-      Function *Codegen();
+      Function *codegen();
     };
 
 Basically, in addition to knowing a name for the prototype, we now keep
@@ -161,7 +168,7 @@ user-defined operator, we need to parse it:
     /// prototype
     ///   ::= id '(' id* ')'
     ///   ::= binary LETTER number? (id, id)
-    static PrototypeAST *ParsePrototype() {
+    static std::unique_ptr<PrototypeAST> ParsePrototype() {
       std::string FnName;
 
       unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
@@ -210,7 +217,8 @@ user-defined operator, we need to parse it:
       if (Kind && ArgNames.size() != Kind)
         return ErrorP("Invalid number of operands for operator");
 
-      return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+      return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
+                                             BinaryPrecedence);
     }
 
 This is all fairly straightforward parsing code, and we have already
@@ -227,26 +235,31 @@ default case for our existing binary operator node:
 
 .. code-block:: c++
 
-    Value *BinaryExprAST::Codegen() {
-      Value *L = LHS->Codegen();
-      Value *R = RHS->Codegen();
-      if (L == 0 || R == 0) return 0;
+    Value *BinaryExprAST::codegen() {
+      Value *L = LHS->codegen();
+      Value *R = RHS->codegen();
+      if (!L || !R)
+        return nullptr;
 
       switch (Op) {
-      case '+': return Builder.CreateFAdd(L, R, "addtmp");
-      case '-': return Builder.CreateFSub(L, R, "subtmp");
-      case '*': return Builder.CreateFMul(L, R, "multmp");
+      case '+':
+        return Builder.CreateFAdd(L, R, "addtmp");
+      case '-':
+        return Builder.CreateFSub(L, R, "subtmp");
+      case '*':
+        return Builder.CreateFMul(L, R, "multmp");
       case '<':
         L = Builder.CreateFCmpULT(L, R, "cmptmp");
         // Convert bool 0/1 to double 0.0 or 1.0
         return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
                                     "booltmp");
-      default: break;
+      default:
+        break;
       }
 
       // If it wasn't a builtin binary operator, it must be a user defined one. Emit
       // a call to it.
-      Function *F = TheModule->getFunction(std::string("binary")+Op);
+      Function *F = TheModule->getFunction(std::string("binary") + Op);
       assert(F && "binary operator not found!");
 
       Value *Ops[2] = { L, R };
@@ -263,12 +276,12 @@ The final piece of code we are missing, is a bit of top-level magic:
 
 .. code-block:: c++
 
-    Function *FunctionAST::Codegen() {
+    Function *FunctionAST::codegen() {
       NamedValues.clear();
 
-      Function *TheFunction = Proto->Codegen();
-      if (TheFunction == 0)
-        return 0;
+      Function *TheFunction = Proto->codegen();
+      if (!TheFunction)
+        return nullptr;
 
       // If this is an operator, install it.
       if (Proto->isBinaryOp())
@@ -278,7 +291,7 @@ The final piece of code we are missing, is a bit of top-level magic:
       BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
       Builder.SetInsertPoint(BB);
 
-      if (Value *RetVal = Body->Codegen()) {
+      if (Value *RetVal = Body->codegen()) {
         ...
 
 Basically, before codegening a function, if it is a user-defined
@@ -305,11 +318,12 @@ that, we need an AST node:
     /// UnaryExprAST - Expression class for a unary operator.
     class UnaryExprAST : public ExprAST {
       char Opcode;
-      ExprAST *Operand;
+      std::unique_ptr<ExprAST> Operand;
+
     public:
-      UnaryExprAST(char opcode, ExprAST *operand)
-        : Opcode(opcode), Operand(operand) {}
-      virtual Value *Codegen();
+      UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+        : Opcode(Opcode), Operand(std::move(Operand)) {}
+      virtual Value *codegen();
     };
 
 This AST node is very simple and obvious by now. It directly mirrors the
@@ -322,7 +336,7 @@ simple: we'll add a new function to do it:
     /// unary
     ///   ::= primary
     ///   ::= '!' unary
-    static ExprAST *ParseUnary() {
+    static std::unique_ptr<ExprAST> ParseUnary() {
       // If the current token is not an operator, it must be a primary expr.
       if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
         return ParsePrimary();
@@ -330,9 +344,9 @@ simple: we'll add a new function to do it:
       // If this is a unary operator, read it.
       int Opc = CurTok;
       getNextToken();
-      if (ExprAST *Operand = ParseUnary())
-        return new UnaryExprAST(Opc, Operand);
-      return 0;
+      if (auto Operand = ParseUnary())
+        return llvm::unique_ptr<UnaryExprAST>(Opc, std::move(Operand));
+      return nullptr;
     }
 
 The grammar we add is pretty straightforward here. If we see a unary
@@ -350,21 +364,24 @@ call ParseUnary instead:
 
     /// binoprhs
     ///   ::= ('+' unary)*
-    static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+    static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                                  std::unique_ptr<ExprAST> LHS) {
       ...
         // Parse the unary expression after the binary operator.
-        ExprAST *RHS = ParseUnary();
-        if (!RHS) return 0;
+        auto RHS = ParseUnary();
+        if (!RHS)
+          return nullptr;
       ...
     }
     /// expression
     ///   ::= unary binoprhs
     ///
-    static ExprAST *ParseExpression() {
-      ExprAST *LHS = ParseUnary();
-      if (!LHS) return 0;
+    static std::unique_ptr<ExprAST> ParseExpression() {
+      auto LHS = ParseUnary();
+      if (!LHS)
+        return nullptr;
 
-      return ParseBinOpRHS(0, LHS);
+      return ParseBinOpRHS(0, std::move(LHS));
     }
 
 With these two simple changes, we are now able to parse unary operators
@@ -378,7 +395,7 @@ operator code above with:
     ///   ::= id '(' id* ')'
     ///   ::= binary LETTER number? (id, id)
     ///   ::= unary LETTER (id)
-    static PrototypeAST *ParsePrototype() {
+    static std::unique_ptr<PrototypeAST> ParsePrototype() {
       std::string FnName;
 
       unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
@@ -411,12 +428,13 @@ unary operators. It looks like this:
 
 .. code-block:: c++
 
-    Value *UnaryExprAST::Codegen() {
-      Value *OperandV = Operand->Codegen();
-      if (OperandV == 0) return 0;
+    Value *UnaryExprAST::codegen() {
+      Value *OperandV = Operand->codegen();
+      if (!OperandV)
+        return nullptr;
 
       Function *F = TheModule->getFunction(std::string("unary")+Opcode);
-      if (F == 0)
+      if (!F)
         return ErrorV("Unknown unary operator");
 
       return Builder.CreateCall(F, OperandV, "unop");
diff --git a/docs/tutorial/LangImpl7.rst b/docs/tutorial/LangImpl7.rst
index 648940785b09..1cd7d56fddb4 100644
--- a/docs/tutorial/LangImpl7.rst
+++ b/docs/tutorial/LangImpl7.rst
@@ -118,7 +118,7 @@ that @G defines *space* for an i32 in the global data area, but its
 *name* actually refers to the address for that space. Stack variables
 work the same way, except that instead of being declared with global
 variable definitions, they are declared with the `LLVM alloca
-instruction <../LangRef.html#i_alloca>`_:
+instruction <../LangRef.html#alloca-instruction>`_:
 
 .. code-block:: llvm
 
@@ -221,7 +221,7 @@ variables in certain circumstances:
    funny pointer arithmetic is involved, the alloca will not be
    promoted.
 #. mem2reg only works on allocas of `first
-   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   class <../LangRef.html#first-class-types>`_ values (such as pointers,
    scalars and vectors), and only if the array size of the allocation is
    1 (or missing in the .ll file). mem2reg is not capable of promoting
    structs or arrays to registers. Note that the "scalarrepl" pass is
@@ -355,10 +355,11 @@ from the stack slot:
 
 .. code-block:: c++
 
-    Value *VariableExprAST::Codegen() {
+    Value *VariableExprAST::codegen() {
       // Look this variable up in the function.
       Value *V = NamedValues[Name];
-      if (V == 0) return ErrorV("Unknown variable name");
+      if (!V)
+        return ErrorV("Unknown variable name");
 
       // Load the value.
       return Builder.CreateLoad(V, Name.c_str());
@@ -366,7 +367,7 @@ from the stack slot:
 
 As you can see, this is pretty straightforward. Now we need to update
 the things that define the variables to set up the alloca. We'll start
-with ``ForExprAST::Codegen`` (see the `full code listing <#code>`_ for
+with ``ForExprAST::codegen()`` (see the `full code listing <#id1>`_ for
 the unabridged code):
 
 .. code-block:: c++
@@ -377,16 +378,18 @@ the unabridged code):
       AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
 
         // Emit the start code first, without 'variable' in scope.
-      Value *StartVal = Start->Codegen();
-      if (StartVal == 0) return 0;
+      Value *StartVal = Start->codegen();
+      if (!StartVal)
+        return nullptr;
 
       // Store the value into the alloca.
       Builder.CreateStore(StartVal, Alloca);
       ...
 
       // Compute the end condition.
-      Value *EndCond = End->Codegen();
-      if (EndCond == 0) return EndCond;
+      Value *EndCond = End->codegen();
+      if (!EndCond)
+        return nullptr;
 
       // Reload, increment, and restore the alloca.  This handles the case where
       // the body of the loop mutates the variable.
@@ -396,7 +399,7 @@ the unabridged code):
       ...
 
 This code is virtually identical to the code `before we allowed mutable
-variables <LangImpl5.html#forcodegen>`_. The big difference is that we
+variables <LangImpl5.html#code-generation-for-the-for-loop>`_. The big difference is that we
 no longer have to construct a PHI node, and we use load/store to access
 the variable as needed.
 
@@ -423,7 +426,7 @@ them. The code for this is also pretty simple:
 
 For each argument, we make an alloca, store the input value to the
 function into the alloca, and register the alloca as the memory location
-for the argument. This method gets invoked by ``FunctionAST::Codegen``
+for the argument. This method gets invoked by ``FunctionAST::codegen()``
 right after it sets up the entry block for the function.
 
 The final missing piece is adding the mem2reg pass, which allows us to
@@ -569,11 +572,11 @@ implement codegen for the assignment operator. This looks like:
 
 .. code-block:: c++
 
-    Value *BinaryExprAST::Codegen() {
+    Value *BinaryExprAST::codegen() {
       // Special case '=' because we don't want to emit the LHS as an expression.
       if (Op == '=') {
         // Assignment requires the LHS to be an identifier.
-        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+        VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS.get());
         if (!LHSE)
           return ErrorV("destination of '=' must be a variable");
 
@@ -587,12 +590,14 @@ allowed.
 .. code-block:: c++
 
         // Codegen the RHS.
-        Value *Val = RHS->Codegen();
-        if (Val == 0) return 0;
+        Value *Val = RHS->codegen();
+        if (!Val)
+          return nullptr;
 
         // Look up the name.
         Value *Variable = NamedValues[LHSE->getName()];
-        if (Variable == 0) return ErrorV("Unknown variable name");
+        if (!Variable)
+          return ErrorV("Unknown variable name");
 
         Builder.CreateStore(Val, Variable);
         return Val;
@@ -649,10 +654,14 @@ this:
     ...
     static int gettok() {
     ...
-        if (IdentifierStr == "in") return tok_in;
-        if (IdentifierStr == "binary") return tok_binary;
-        if (IdentifierStr == "unary") return tok_unary;
-        if (IdentifierStr == "var") return tok_var;
+        if (IdentifierStr == "in")
+          return tok_in;
+        if (IdentifierStr == "binary")
+          return tok_binary;
+        if (IdentifierStr == "unary")
+          return tok_unary;
+        if (IdentifierStr == "var")
+          return tok_var;
         return tok_identifier;
     ...
 
@@ -663,14 +672,15 @@ var/in, it looks like this:
 
     /// VarExprAST - Expression class for var/in
     class VarExprAST : public ExprAST {
-      std::vector<std::pair<std::string, ExprAST*> > VarNames;
-      ExprAST *Body;
-    public:
-      VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
-                 ExprAST *body)
-      : VarNames(varnames), Body(body) {}
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+      std::unique_ptr<ExprAST> Body;
 
-      virtual Value *Codegen();
+    public:
+      VarExprAST(std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+                 std::unique_ptr<ExprAST> body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+
+      virtual Value *codegen();
     };
 
 var/in allows a list of names to be defined all at once, and each name
@@ -690,15 +700,22 @@ do is add it as a primary expression:
     ///   ::= ifexpr
     ///   ::= forexpr
     ///   ::= varexpr
-    static ExprAST *ParsePrimary() {
+    static std::unique_ptr<ExprAST> ParsePrimary() {
       switch (CurTok) {
-      default: return Error("unknown token when expecting an expression");
-      case tok_identifier: return ParseIdentifierExpr();
-      case tok_number:     return ParseNumberExpr();
-      case '(':            return ParseParenExpr();
-      case tok_if:         return ParseIfExpr();
-      case tok_for:        return ParseForExpr();
-      case tok_var:        return ParseVarExpr();
+      default:
+        return Error("unknown token when expecting an expression");
+      case tok_identifier:
+        return ParseIdentifierExpr();
+      case tok_number:
+        return ParseNumberExpr();
+      case '(':
+        return ParseParenExpr();
+      case tok_if:
+        return ParseIfExpr();
+      case tok_for:
+        return ParseForExpr();
+      case tok_var:
+        return ParseVarExpr();
       }
     }
 
@@ -708,10 +725,10 @@ Next we define ParseVarExpr:
 
     /// varexpr ::= 'var' identifier ('=' expression)?
     //                    (',' identifier ('=' expression)?)* 'in' expression
-    static ExprAST *ParseVarExpr() {
+    static std::unique_ptr<ExprAST> ParseVarExpr() {
       getNextToken();  // eat the var.
 
-      std::vector<std::pair<std::string, ExprAST*> > VarNames;
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
 
       // At least one variable name is required.
       if (CurTok != tok_identifier)
@@ -727,15 +744,15 @@ into the local ``VarNames`` vector.
         getNextToken();  // eat identifier.
 
         // Read the optional initializer.
-        ExprAST *Init = 0;
+        std::unique_ptr<ExprAST> Init;
         if (CurTok == '=') {
           getNextToken(); // eat the '='.
 
           Init = ParseExpression();
-          if (Init == 0) return 0;
+          if (!Init) return nullptr;
         }
 
-        VarNames.push_back(std::make_pair(Name, Init));
+        VarNames.push_back(std::make_pair(Name, std::move(Init)));
 
         // End of var list, exit loop.
         if (CurTok != ',') break;
@@ -755,10 +772,12 @@ AST node:
         return Error("expected 'in' keyword after 'var'");
       getNextToken();  // eat 'in'.
 
-      ExprAST *Body = ParseExpression();
-      if (Body == 0) return 0;
+      auto Body = ParseExpression();
+      if (!Body)
+        return nullptr;
 
-      return new VarExprAST(VarNames, Body);
+      return llvm::make_unique<VarExprAST>(std::move(VarNames),
+                                           std::move(Body));
     }
 
 Now that we can parse and represent the code, we need to support
@@ -766,7 +785,7 @@ emission of LLVM IR for it. This code starts out with:
 
 .. code-block:: c++
 
-    Value *VarExprAST::Codegen() {
+    Value *VarExprAST::codegen() {
       std::vector<AllocaInst *> OldBindings;
 
       Function *TheFunction = Builder.GetInsertBlock()->getParent();
@@ -774,7 +793,7 @@ emission of LLVM IR for it. This code starts out with:
       // Register all variables and emit their initializer.
       for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
         const std::string &VarName = VarNames[i].first;
-        ExprAST *Init = VarNames[i].second;
+        ExprAST *Init = VarNames[i].second.get();
 
 Basically it loops over all the variables, installing them one at a
 time. For each variable we put into the symbol table, we remember the
@@ -789,8 +808,9 @@ previous value that we replace in OldBindings.
         //    var a = a in ...   # refers to outer 'a'.
         Value *InitVal;
         if (Init) {
-          InitVal = Init->Codegen();
-          if (InitVal == 0) return 0;
+          InitVal = Init->codegen();
+          if (!InitVal)
+            return nullptr;
         } else { // If not specified, use 0.0.
           InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
         }
@@ -814,8 +834,9 @@ we evaluate the body of the var/in expression:
 .. code-block:: c++
 
       // Codegen the body, now that all vars are in scope.
-      Value *BodyVal = Body->Codegen();
-      if (BodyVal == 0) return 0;
+      Value *BodyVal = Body->codegen();
+      if (!BodyVal)
+        return nullptr;
 
 Finally, before returning, we restore the previous variable bindings:
 
diff --git a/docs/tutorial/LangImpl8.rst b/docs/tutorial/LangImpl8.rst
index 0b9b39c84b75..3b0f443f08d5 100644
--- a/docs/tutorial/LangImpl8.rst
+++ b/docs/tutorial/LangImpl8.rst
@@ -75,8 +75,8 @@ statement be our "main":
 
 .. code-block:: udiff
 
-  -    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-  +    PrototypeAST *Proto = new PrototypeAST("main", std::vector<std::string>());
+  -    auto Proto = llvm::make_unique<PrototypeAST>("", std::vector<std::string>());
+  +    auto Proto = llvm::make_unique<PrototypeAST>("main", std::vector<std::string>());
 
 just with the simple change of giving it a name.
 
@@ -108,19 +108,19 @@ code is that the llvm IR goes to standard error:
   @@ -1108,17 +1108,8 @@ static void HandleExtern() {
    static void HandleTopLevelExpression() {
      // Evaluate a top-level expression into an anonymous function.
-     if (FunctionAST *F = ParseTopLevelExpr()) {
-  -    if (Function *LF = F->Codegen()) {
+     if (auto FnAST = ParseTopLevelExpr()) {
+  -    if (auto *FnIR = FnAST->codegen()) {
   -      // We're just doing this to make sure it executes.
   -      TheExecutionEngine->finalizeObject();
   -      // JIT the function, returning a function pointer.
-  -      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+  -      void *FPtr = TheExecutionEngine->getPointerToFunction(FnIR);
   -
   -      // Cast it to the right type (takes no arguments, returns a double) so we
   -      // can call it as a native function.
   -      double (*FP)() = (double (*)())(intptr_t)FPtr;
   -      // Ignore the return value for this.
   -      (void)FP;
-  +    if (!F->Codegen()) {
+  +    if (!F->codegen()) {
   +      fprintf(stderr, "Error generating code for top level expr");
        }
      } else {
@@ -165,13 +165,13 @@ DWARF Emission Setup
 ====================
 
 Similar to the ``IRBuilder`` class we have a
-```DIBuilder`` <http://llvm.org/doxygen/classllvm_1_1DIBuilder.html>`_ class
+`DIBuilder <http://llvm.org/doxygen/classllvm_1_1DIBuilder.html>`_ class
 that helps in constructing debug metadata for an llvm IR file. It
 corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names.
 Using it does require that you be more familiar with DWARF terminology than
 you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you
 read through the general documentation on the
-```Metadata Format`` <http://llvm.org/docs/SourceLevelDebugging.html>`_ it
+`Metadata Format <http://llvm.org/docs/SourceLevelDebugging.html>`_ it
 should be a little more clear. We'll be using this class to construct all
 of our IR level descriptions. Construction for it takes a module so we
 need to construct it shortly after we construct our module. We've left it
@@ -237,7 +237,7 @@ Functions
 =========
 
 Now that we have our ``Compile Unit`` and our source locations, we can add
-function definitions to the debug info. So in ``PrototypeAST::Codegen`` we
+function definitions to the debug info. So in ``PrototypeAST::codegen()`` we
 add a few lines of code to describe a context for our subprogram, in this
 case the "File", and the actual definition of the function itself.
 
@@ -261,7 +261,8 @@ information) and construct our function definition:
   DISubprogram *SP = DBuilder->createFunction(
       FContext, Name, StringRef(), Unit, LineNo,
       CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
-      true /* definition */, ScopeLine, DINode::FlagPrototyped, false, F);
+      true /* definition */, ScopeLine, DINode::FlagPrototyped, false);
+  F->setSubprogram(SP);
 
 and we now have an DISubprogram that contains a reference to all of our
 metadata for the function.
@@ -307,10 +308,12 @@ and then we have added to all of our AST classes a source location:
      SourceLocation Loc;
 
      public:
+       ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
+       virtual ~ExprAST() {}
+       virtual Value* codegen() = 0;
        int getLine() const { return Loc.Line; }
        int getCol() const { return Loc.Col; }
-       ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
-       virtual std::ostream &dump(std::ostream &out, int ind) {
+       virtual raw_ostream &dump(raw_ostream &out, int ind) {
          return out << ':' << getLine() << ':' << getCol() << '\n';
        }
 
@@ -318,7 +321,8 @@ that we pass down through when we create a new expression:
 
 .. code-block:: c++
 
-   LHS = new BinaryExprAST(BinLoc, BinOp, LHS, RHS);
+   LHS = llvm::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
+                                          std::move(RHS));
 
 giving us locations for each of our expressions and variables.
 
@@ -395,13 +399,12 @@ argument allocas in ``PrototypeAST::CreateArgumentAllocas``.
   DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
   DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
                                       KSDbgInfo.TheCU.getDirectory());
-  DILocalVariable D = DBuilder->createLocalVariable(
-      dwarf::DW_TAG_arg_variable, Scope, Args[Idx], Unit, Line,
-      KSDbgInfo.getDoubleTy(), Idx);
+  DILocalVariable D = DBuilder->createParameterVariable(
+      Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true);
 
-  Instruction *Call = DBuilder->insertDeclare(
-      Alloca, D, DBuilder->createExpression(), Builder.GetInsertBlock());
-  Call->setDebugLoc(DebugLoc::get(Line, 0, Scope));
+  DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
+                          DebugLoc::get(Line, 0, Scope),
+                          Builder.GetInsertBlock());
 
 Here we're doing a few things. First, we're grabbing our current scope
 for the variable so we can say what range of code our variable is valid
@@ -409,7 +412,7 @@ through. Second, we're creating the variable, giving it the scope,
 the name, source location, type, and since it's an argument, the argument
 index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR
 level that we've got a variable in an alloca (and it gives a starting
-location for the variable). Lastly, we set a source location for the
+location for the variable), and setting a source location for the
 beginning of the scope on the declare.
 
 One interesting thing to note at this point is that various debuggers have
diff --git a/docs/tutorial/LangImpl9.rst b/docs/tutorial/LangImpl9.rst
index 6c43d53f90f9..f02bba857c14 100644
--- a/docs/tutorial/LangImpl9.rst
+++ b/docs/tutorial/LangImpl9.rst
@@ -49,7 +49,7 @@ For example, try adding:
    extending the type system in all sorts of interesting ways. Simple
    arrays are very easy and are quite useful for many different
    applications. Adding them is mostly an exercise in learning how the
-   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   LLVM `getelementptr <../LangRef.html#getelementptr-instruction>`_ instruction
    works: it is so nifty/unconventional, it `has its own
    FAQ <../GetElementPtr.html>`_! If you add support for recursive types
    (e.g. linked lists), make sure to read the `section in the LLVM
diff --git a/docs/tutorial/OCamlLangImpl1.rst b/docs/tutorial/OCamlLangImpl1.rst
index 94ca3a5aa4d3..cf968b5ae89c 100644
--- a/docs/tutorial/OCamlLangImpl1.rst
+++ b/docs/tutorial/OCamlLangImpl1.rst
@@ -139,7 +139,7 @@ useful for mutually recursive functions). For example:
 
 A more interesting example is included in Chapter 6 where we write a
 little Kaleidoscope application that `displays a Mandelbrot
-Set <OCamlLangImpl6.html#example>`_ at various levels of magnification.
+Set <OCamlLangImpl6.html#kicking-the-tires>`_ at various levels of magnification.
 
 Lets dive into the implementation of this language!
 
@@ -275,7 +275,7 @@ file. These are handled with this code:
       | [< >] -> [< >]
 
 With this, we have the complete lexer for the basic Kaleidoscope
-language (the `full code listing <OCamlLangImpl2.html#code>`_ for the
+language (the `full code listing <OCamlLangImpl2.html#full-code-listing>`_ for the
 Lexer is available in the `next chapter <OCamlLangImpl2.html>`_ of the
 tutorial). Next we'll `build a simple parser that uses this to build an
 Abstract Syntax Tree <OCamlLangImpl2.html>`_. When we have that, we'll
diff --git a/docs/tutorial/OCamlLangImpl2.rst b/docs/tutorial/OCamlLangImpl2.rst
index 905b306746f1..f5d6cd6822c9 100644
--- a/docs/tutorial/OCamlLangImpl2.rst
+++ b/docs/tutorial/OCamlLangImpl2.rst
@@ -130,7 +130,7 @@ We start with numeric literals, because they are the simplest to
 process. For each production in our grammar, we'll define a function
 which parses that production. We call this class of expressions
 "primary" expressions, for reasons that will become more clear `later in
-the tutorial <OCamlLangImpl6.html#unary>`_. In order to parse an
+the tutorial <OCamlLangImpl6.html#user-defined-unary-operators>`_. In order to parse an
 arbitrary primary expression, we need to determine what sort of
 expression it is. For numeric literals, we have:
 
@@ -280,7 +280,7 @@ fixed-size array).
 With the helper above defined, we can now start parsing binary
 expressions. The basic idea of operator precedence parsing is to break
 down an expression with potentially ambiguous binary operators into
-pieces. Consider ,for example, the expression "a+b+(c+d)\*e\*f+g".
+pieces. Consider, for example, the expression "a+b+(c+d)\*e\*f+g".
 Operator precedence parsing considers this as a stream of primary
 expressions separated by binary operators. As such, it will first parse
 the leading primary expression "a", then it will see the pairs [+, b]
@@ -505,7 +505,7 @@ The Driver
 
 The driver for this simply invokes all of the parsing pieces with a
 top-level dispatch loop. There isn't much interesting here, so I'll just
-include the top-level loop. See `below <#code>`_ for full code in the
+include the top-level loop. See `below <#full-code-listing>`_ for full code in the
 "Top-Level Parsing" section.
 
 .. code-block:: ocaml
diff --git a/docs/tutorial/OCamlLangImpl3.rst b/docs/tutorial/OCamlLangImpl3.rst
index 10d463b93ac3..a76b46d1bf6b 100644
--- a/docs/tutorial/OCamlLangImpl3.rst
+++ b/docs/tutorial/OCamlLangImpl3.rst
@@ -114,8 +114,8 @@ values that can be in the ``Codegen.named_values`` map are function
 arguments. This code simply checks to see that the specified name is in
 the map (if not, an unknown variable is being referenced) and returns
 the value for it. In future chapters, we'll add support for `loop
-induction variables <LangImpl5.html#for>`_ in the symbol table, and for
-`local variables <LangImpl7.html#localvars>`_.
+induction variables <LangImpl5.html#for-loop-expression>`_ in the symbol table, and for
+`local variables <LangImpl7.html#user-defined-local-variables>`_.
 
 .. code-block:: ocaml
 
@@ -152,22 +152,22 @@ automatically provide each one with an increasing, unique numeric
 suffix. Local value names for instructions are purely optional, but it
 makes it much easier to read the IR dumps.
 
-`LLVM instructions <../LangRef.html#instref>`_ are constrained by strict
+`LLVM instructions <../LangRef.html#instruction-reference>`_ are constrained by strict
 rules: for example, the Left and Right operators of an `add
-instruction <../LangRef.html#i_add>`_ must have the same type, and the
+instruction <../LangRef.html#add-instruction>`_ must have the same type, and the
 result type of the add must match the operand types. Because all values
 in Kaleidoscope are doubles, this makes for very simple code for add,
 sub and mul.
 
 On the other hand, LLVM specifies that the `fcmp
-instruction <../LangRef.html#i_fcmp>`_ always returns an 'i1' value (a
+instruction <../LangRef.html#fcmp-instruction>`_ always returns an 'i1' value (a
 one bit integer). The problem with this is that Kaleidoscope wants the
 value to be a 0.0 or 1.0 value. In order to get these semantics, we
 combine the fcmp instruction with a `uitofp
-instruction <../LangRef.html#i_uitofp>`_. This instruction converts its
+instruction <../LangRef.html#uitofp-to-instruction>`_. This instruction converts its
 input integer into a floating point value by treating the input as an
 unsigned value. In contrast, if we used the `sitofp
-instruction <../LangRef.html#i_sitofp>`_, the Kaleidoscope '<' operator
+instruction <../LangRef.html#sitofp-to-instruction>`_, the Kaleidoscope '<' operator
 would return 0.0 and -1.0, depending on the input value.
 
 .. code-block:: ocaml
@@ -196,7 +196,7 @@ to resolve function names for us.
 
 Once we have the function to call, we recursively codegen each argument
 that is to be passed in, and create an LLVM `call
-instruction <../LangRef.html#i_call>`_. Note that LLVM uses the native C
+instruction <../LangRef.html#call-instruction>`_. Note that LLVM uses the native C
 calling conventions by default, allowing these calls to also call into
 standard library functions like "sin" and "cos", with no additional
 effort.
@@ -253,7 +253,7 @@ The final line above checks if the function has already been defined in
 This indicates the type and name to use, as well as which module to
 insert into. By default we assume a function has
 ``Llvm.Linkage.ExternalLinkage``. "`external
-linkage <LangRef.html#linkage>`_" means that the function may be defined
+linkage <../LangRef.html#linkage>`_" means that the function may be defined
 outside the current module and/or that it is callable by functions
 outside the module. The "``name``" passed in is the name the user
 specified: this name is registered in "``Codegen.the_module``"s symbol
@@ -360,7 +360,7 @@ Once the insertion point is set up, we call the ``Codegen.codegen_func``
 method for the root expression of the function. If no error happens,
 this emits code to compute the expression into the entry block and
 returns the value that was computed. Assuming no error, we then create
-an LLVM `ret instruction <../LangRef.html#i_ret>`_, which completes the
+an LLVM `ret instruction <../LangRef.html#ret-instruction>`_, which completes the
 function. Once the function is built, we call
 ``Llvm_analysis.assert_valid_function``, which is provided by LLVM. This
 function does a variety of consistency checks on the generated code, to
@@ -413,10 +413,10 @@ For example:
 
 Note how the parser turns the top-level expression into anonymous
 functions for us. This will be handy when we add `JIT
-support <OCamlLangImpl4.html#jit>`_ in the next chapter. Also note that
+support <OCamlLangImpl4.html#adding-a-jit-compiler>`_ in the next chapter. Also note that
 the code is very literally transcribed, no optimizations are being
 performed. We will `add
-optimizations <OCamlLangImpl4.html#trivialconstfold>`_ explicitly in the
+optimizations <OCamlLangImpl4.html#trivial-constant-folding>`_ explicitly in the
 next chapter.
 
 ::
diff --git a/docs/tutorial/OCamlLangImpl4.rst b/docs/tutorial/OCamlLangImpl4.rst
index b13b2afa8883..feeba01be24b 100644
--- a/docs/tutorial/OCamlLangImpl4.rst
+++ b/docs/tutorial/OCamlLangImpl4.rst
@@ -130,7 +130,7 @@ exactly the code we have now, except that we would defer running the
 optimizer until the entire file has been parsed.
 
 In order to get per-function optimizations going, we need to set up a
-`Llvm.PassManager <../WritingAnLLVMPass.html#passmanager>`_ to hold and
+`Llvm.PassManager <../WritingAnLLVMPass.html#what-passmanager-does>`_ to hold and
 organize the LLVM optimizations that we want to run. Once we have that,
 we can add a set of optimizations to run. The code looks like this:
 
diff --git a/docs/tutorial/OCamlLangImpl5.rst b/docs/tutorial/OCamlLangImpl5.rst
index 0faecfb9222e..675b9bc1978b 100644
--- a/docs/tutorial/OCamlLangImpl5.rst
+++ b/docs/tutorial/OCamlLangImpl5.rst
@@ -175,7 +175,7 @@ Kaleidoscope looks like this:
 To visualize the control flow graph, you can use a nifty feature of the
 LLVM '`opt <http://llvm.org/cmds/opt.html>`_' tool. If you put this LLVM
 IR into "t.ll" and run "``llvm-as < t.ll | opt -analyze -view-cfg``", `a
-window will pop up <../ProgrammersManual.html#ViewGraph>`_ and you'll
+window will pop up <../ProgrammersManual.html#viewing-graphs-while-debugging-code>`_ and you'll
 see this graph:
 
 .. figure:: LangImpl5-cfg.png
diff --git a/docs/tutorial/OCamlLangImpl6.rst b/docs/tutorial/OCamlLangImpl6.rst
index 36bffa8e9696..a3ae11fd7e54 100644
--- a/docs/tutorial/OCamlLangImpl6.rst
+++ b/docs/tutorial/OCamlLangImpl6.rst
@@ -24,7 +24,7 @@ is good or bad. In this tutorial we'll assume that it is okay to use
 this as a way to show some interesting parsing techniques.
 
 At the end of this tutorial, we'll run through an example Kaleidoscope
-application that `renders the Mandelbrot set <#example>`_. This gives an
+application that `renders the Mandelbrot set <#kicking-the-tires>`_. This gives an
 example of what you can build with Kaleidoscope and its feature set.
 
 User-defined Operators: the Idea
@@ -108,7 +108,7 @@ keywords:
           | "unary" -> [< 'Token.Unary; stream >]
 
 This just adds lexer support for the unary and binary keywords, like we
-did in `previous chapters <OCamlLangImpl5.html#iflexer>`_. One nice
+did in `previous chapters <OCamlLangImpl5.html#lexer-extensions-for-if-then-else>`_. One nice
 thing about our current AST, is that we represent binary operators with
 full generalisation by using their ASCII code as the opcode. For our
 extended operators, we'll use this same representation, so we don't need
diff --git a/docs/tutorial/OCamlLangImpl7.rst b/docs/tutorial/OCamlLangImpl7.rst
index 98ea93f42f3f..c8c701b91012 100644
--- a/docs/tutorial/OCamlLangImpl7.rst
+++ b/docs/tutorial/OCamlLangImpl7.rst
@@ -118,7 +118,7 @@ that @G defines *space* for an i32 in the global data area, but its
 *name* actually refers to the address for that space. Stack variables
 work the same way, except that instead of being declared with global
 variable definitions, they are declared with the `LLVM alloca
-instruction <../LangRef.html#i_alloca>`_:
+instruction <../LangRef.html#alloca-instruction>`_:
 
 .. code-block:: llvm
 
@@ -221,7 +221,7 @@ variables in certain circumstances:
    funny pointer arithmetic is involved, the alloca will not be
    promoted.
 #. mem2reg only works on allocas of `first
-   class <../LangRef.html#t_classifications>`_ values (such as pointers,
+   class <../LangRef.html#first-class-types>`_ values (such as pointers,
    scalars and vectors), and only if the array size of the allocation is
    1 (or missing in the .ll file). mem2reg is not capable of promoting
    structs or arrays to registers. Note that the "scalarrepl" pass is
@@ -367,7 +367,7 @@ from the stack slot:
 
 As you can see, this is pretty straightforward. Now we need to update
 the things that define the variables to set up the alloca. We'll start
-with ``codegen_expr Ast.For ...`` (see the `full code listing <#code>`_
+with ``codegen_expr Ast.For ...`` (see the `full code listing <#id1>`_
 for the unabridged code):
 
 .. code-block:: ocaml
@@ -407,7 +407,7 @@ for the unabridged code):
           ...
 
 This code is virtually identical to the code `before we allowed mutable
-variables <OCamlLangImpl5.html#forcodegen>`_. The big difference is that
+variables <OCamlLangImpl5.html#code-generation-for-the-for-loop>`_. The big difference is that
 we no longer have to construct a PHI node, and we use load/store to
 access the variable as needed.
 
diff --git a/docs/tutorial/OCamlLangImpl8.rst b/docs/tutorial/OCamlLangImpl8.rst
index 0346fa9fed14..3ab6db35dfb0 100644
--- a/docs/tutorial/OCamlLangImpl8.rst
+++ b/docs/tutorial/OCamlLangImpl8.rst
@@ -48,7 +48,7 @@ For example, try adding:
    extending the type system in all sorts of interesting ways. Simple
    arrays are very easy and are quite useful for many different
    applications. Adding them is mostly an exercise in learning how the
-   LLVM `getelementptr <../LangRef.html#i_getelementptr>`_ instruction
+   LLVM `getelementptr <../LangRef.html#getelementptr-instruction>`_ instruction
    works: it is so nifty/unconventional, it `has its own
    FAQ <../GetElementPtr.html>`_! If you add support for recursive types
    (e.g. linked lists), make sure to read the `section in the LLVM
diff --git a/docs/yaml2obj.rst b/docs/yaml2obj.rst
index 1812e58914ae..d18ce02a336c 100644
--- a/docs/yaml2obj.rst
+++ b/docs/yaml2obj.rst
@@ -65,6 +65,7 @@ Here's a simplified Kwalify_ schema with an extension to allow alternate types.
                                  , IMAGE_FILE_MACHINE_AMD64
                                  , IMAGE_FILE_MACHINE_ARM
                                  , IMAGE_FILE_MACHINE_ARMNT
+                                 , IMAGE_FILE_MACHINE_ARM64
                                  , IMAGE_FILE_MACHINE_EBC
                                  , IMAGE_FILE_MACHINE_I386
                                  , IMAGE_FILE_MACHINE_IA64
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index 8026adc8d075..d8c54b50b854 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -29,6 +29,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include <iostream>
+
 using namespace llvm;
 
 //Set the constants for naming
@@ -44,7 +45,7 @@ Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf,
   comflag  = cf;
 
   header(Context);
-  readloop(0, 0, 0, Context);
+  readloop(nullptr, nullptr, nullptr, Context);
   delete builder;
   return module;
 }
@@ -68,7 +69,6 @@ void BrainF::header(LLVMContext& C) {
     getOrInsertFunction("putchar", IntegerType::getInt32Ty(C),
                         IntegerType::getInt32Ty(C), NULL));
 
-
   //Function header
 
   //define void @brainf()
@@ -85,7 +85,7 @@ void BrainF::header(LLVMContext& C) {
   Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty);
   allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
   ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, 
-                                   NULL, "arr");
+                                   nullptr, "arr");
   BB->getInstList().push_back(cast<Instruction>(ptr_arr));
 
   //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0)
@@ -114,8 +114,6 @@ void BrainF::header(LLVMContext& C) {
                                ConstantInt::get(C, APInt(32, memtotal/2)),
                                headreg);
 
-
-
   //Function footer
 
   //brainf.end:
@@ -127,8 +125,6 @@ void BrainF::header(LLVMContext& C) {
   //ret void
   ReturnInst::Create(C, endbb);
 
-
-
   //Error block for array out of bounds
   if (comflag & flag_arraybounds)
   {
diff --git a/examples/BrainF/BrainFDriver.cpp b/examples/BrainF/BrainFDriver.cpp
index 99c8ff36dc61..1a38c67b0d4a 100644
--- a/examples/BrainF/BrainFDriver.cpp
+++ b/examples/BrainF/BrainFDriver.cpp
@@ -64,9 +64,9 @@ void addMainFunction(Module *mod) {
                           IntegerType::getInt8Ty(mod->getContext()))), NULL));
   {
     Function::arg_iterator args = main_func->arg_begin();
-    Value *arg_0 = args++;
+    Value *arg_0 = &*args++;
     arg_0->setName("argc");
-    Value *arg_1 = args++;
+    Value *arg_1 = &*args++;
     arg_1->setName("argv");
   }
 
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f98c403deb0b..5727066d6227 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -4,7 +4,7 @@ add_subdirectory(HowToUseJIT)
 add_subdirectory(Kaleidoscope)
 add_subdirectory(ModuleMaker)
 
-if( ( NOT WIN32 ) AND ( NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM" ) )
+if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM"))
     add_subdirectory(ExceptionDemo)
 endif()
 
diff --git a/examples/ExceptionDemo/CMakeLists.txt b/examples/ExceptionDemo/CMakeLists.txt
index b4354f66f0d6..793cf291ca6f 100644
--- a/examples/ExceptionDemo/CMakeLists.txt
+++ b/examples/ExceptionDemo/CMakeLists.txt
@@ -5,14 +5,14 @@ set(LLVM_LINK_COMPONENTS
   MCJIT
   RuntimeDyld
   Support
+  Target
   nativecodegen
   )
 
 # Enable EH and RTTI for this demo
-set(LLVM_REQUIRES_EH 1)
-set(LLVM_REQUIRES_RTTI 1)
-
-set(LLVM_BUILD_EXAMPLES OFF)
+if(NOT LLVM_ENABLE_EH)
+  message(FATAL_ERROR "ExceptionDemo must require EH.")
+endif()
 
 add_llvm_example(ExceptionDemo
   ExceptionDemo.cpp
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index 81337c4823b0..444ee2649fa7 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -77,6 +77,7 @@
 #include <sstream>
 #include <stdexcept>
 
+#include <inttypes.h>
 
 #ifndef USE_GLOBAL_STR_CONSTS
 #define USE_GLOBAL_STR_CONSTS true
@@ -319,7 +320,7 @@ void printStr(char *toPrint) {
 }
 
 
-/// Deletes the true previosly allocated exception whose address
+/// Deletes the true previously allocated exception whose address
 /// is calculated from the supplied OurBaseException_t::unwindException
 /// member address. Handles (ignores), NULL pointers.
 /// @param expToDelete exception to delete
@@ -569,8 +570,8 @@ static bool handleActionValue(int64_t *resultAction,
   fprintf(stderr,
           "handleActionValue(...): exceptionObject = <%p>, "
           "excp = <%p>.\n",
-          exceptionObject,
-          excp);
+          (void*)exceptionObject,
+          (void*)excp);
 #endif
 
   const uint8_t *actionPos = (uint8_t*) actionEntry,
@@ -588,8 +589,8 @@ static bool handleActionValue(int64_t *resultAction,
 
 #ifdef DEBUG
     fprintf(stderr,
-            "handleActionValue(...):typeOffset: <%lld>, "
-            "actionOffset: <%lld>.\n",
+            "handleActionValue(...):typeOffset: <%" PRIi64 ">, "
+            "actionOffset: <%" PRIi64 ">.\n",
             typeOffset,
             actionOffset);
 #endif
@@ -848,7 +849,7 @@ _Unwind_Reason_Code ourPersonality(int version,
 #ifdef DEBUG
   fprintf(stderr,
           "ourPersonality(...):lsda = <%p>.\n",
-          lsda);
+          (void*)lsda);
 #endif
 
   // The real work of the personality function is captured here
@@ -971,7 +972,7 @@ void generateIntegerPrint(llvm::LLVMContext &context,
 
   llvm::Value *cast = builder.CreateBitCast(stringVar,
                                             builder.getInt8PtrTy());
-  builder.CreateCall2(&printFunct, &toPrint, cast);
+  builder.CreateCall(&printFunct, {&toPrint, cast});
 }
 
 
@@ -1264,10 +1265,10 @@ static llvm::Function *createCatchWrappedInvokeFunction(
   builder.SetInsertPoint(exceptionBlock);
 
   llvm::Function *personality = module.getFunction("ourPersonality");
+  ret->setPersonalityFn(personality);
 
   llvm::LandingPadInst *caughtResult =
     builder.CreateLandingPad(ourCaughtResultType,
-                             personality,
                              numExceptionsToCatch,
                              "landingPad");
 
@@ -1694,7 +1695,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
 #ifdef DEBUG
   fprintf(stderr,
           "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
-          "= %lld, sizeof(struct OurBaseException_t) - "
+          "= %" PRIi64 ", sizeof(struct OurBaseException_t) - "
           "sizeof(struct _Unwind_Exception) = %lu.\n",
           ourBaseFromUnwindOffset,
           sizeof(struct OurBaseException_t) -
@@ -1973,7 +1974,7 @@ int main(int argc, char *argv[]) {
     // Set up the optimizer pipeline.
     // Start with registering info about how the
     // target lays out data structures.
-    module->setDataLayout(*executionEngine->getDataLayout());
+    module->setDataLayout(executionEngine->getDataLayout());
 
     // Optimizations turned on
 #ifdef ADD_OPT_PASSES
diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp
index 8092e19380dd..ecb49eb92e1a 100644
--- a/examples/Fibonacci/fibonacci.cpp
+++ b/examples/Fibonacci/fibonacci.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
@@ -41,7 +42,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
   Function *FibF =
     cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context),
                                           Type::getInt32Ty(Context),
-                                          (Type *)0));
+                                          nullptr));
 
   // Add a basic block to the function.
   BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF);
@@ -51,7 +52,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
   Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2);
 
   // Get pointer to the integer argument of the add1 function...
-  Argument *ArgX = FibF->arg_begin();   // Get the arg.
+  Argument *ArgX = &*FibF->arg_begin(); // Get the arg.
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the true_block.
@@ -87,7 +88,6 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
   return FibF;
 }
 
-
 int main(int argc, char **argv) {
   int n = argc > 1 ? atol(argv[1]) : 24;
 
@@ -106,7 +106,6 @@ int main(int argc, char **argv) {
   ExecutionEngine *EE =
     EngineBuilder(std::move(Owner))
     .setErrorStr(&errStr)
-    .setEngineKind(EngineKind::JIT)
     .create();
 
   if (!EE) {
diff --git a/examples/HowToUseJIT/HowToUseJIT.cpp b/examples/HowToUseJIT/HowToUseJIT.cpp
index 91ea17dd22bf..e0bf6a00bf01 100644
--- a/examples/HowToUseJIT/HowToUseJIT.cpp
+++ b/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -65,7 +65,7 @@ int main() {
   Function *Add1F =
     cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context),
                                           Type::getInt32Ty(Context),
-                                          (Type *)0));
+                                          nullptr));
 
   // Add a basic block to the function. As before, it automatically inserts
   // because of the last argument.
@@ -80,7 +80,7 @@ int main() {
 
   // Get pointers to the integer argument of the add1 function...
   assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
-  Argument *ArgX = Add1F->arg_begin();  // Get the arg
+  Argument *ArgX = &*Add1F->arg_begin();          // Get the arg
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the add instruction, inserting it into the end of BB.
@@ -91,12 +91,11 @@ int main() {
 
   // Now, function add1 is ready.
 
-
   // Now we're going to create function `foo', which returns an int and takes no
   // arguments.
   Function *FooF =
     cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context),
-                                          (Type *)0));
+                                          nullptr));
 
   // Add a basic block to the FooF function.
   BB = BasicBlock::Create(Context, "EntryBlock", FooF);
diff --git a/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/examples/Kaleidoscope/Chapter2/CMakeLists.txt
index fed3f4b78c77..6224d9ac8640 100644
--- a/examples/Kaleidoscope/Chapter2/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter2/CMakeLists.txt
@@ -1,3 +1,9 @@
 add_kaleidoscope_chapter(Kaleidoscope-Ch2
   toy.cpp
   )
+
+if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
+  target_compile_options(Kaleidoscope-Ch2 PRIVATE
+    -Wno-unused-private-field
+    )
+endif()
diff --git a/examples/Kaleidoscope/Chapter2/Makefile b/examples/Kaleidoscope/Chapter2/Makefile
index 1a9b94ce541e..fa27e6e06687 100644
--- a/examples/Kaleidoscope/Chapter2/Makefile
+++ b/examples/Kaleidoscope/Chapter2/Makefile
@@ -10,4 +10,6 @@ LEVEL = ../../..
 TOOLNAME = Kaleidoscope-Ch2
 EXAMPLE_TOOL = 1
 
+LLVM_CXXFLAGS := -Wno-unused-private-field
+
 include $(LEVEL)/Makefile.common
diff --git a/examples/Kaleidoscope/Chapter2/toy.cpp b/examples/Kaleidoscope/Chapter2/toy.cpp
index cd901394a524..69f359961293 100644
--- a/examples/Kaleidoscope/Chapter2/toy.cpp
+++ b/examples/Kaleidoscope/Chapter2/toy.cpp
@@ -1,10 +1,22 @@
 #include <cctype>
 #include <cstdio>
-#include <cstdlib>
 #include <map>
+#include <memory>
 #include <string>
 #include <vector>
 
+namespace helper {
+// Cloning make_unique here until it's standard in C++14.
+// Using a namespace to avoid conflicting with MSVC's std::make_unique (which
+// ADL can sometimes find in unqualified calls).
+template <class T, class... Args>
+static
+    typename std::enable_if<!std::is_array<T>::value, std::unique_ptr<T>>::type
+    make_unique(Args &&... args) {
+  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+}
+
 //===----------------------------------------------------------------------===//
 // Lexer
 //===----------------------------------------------------------------------===//
@@ -15,14 +27,16 @@ enum Token {
   tok_eof = -1,
 
   // commands
-  tok_def = -2, tok_extern = -3,
+  tok_def = -2,
+  tok_extern = -3,
 
   // primary
-  tok_identifier = -4, tok_number = -5
+  tok_identifier = -4,
+  tok_number = -5
 };
 
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
 
 /// gettok - Return the next token from standard input.
 static int gettok() {
@@ -37,31 +51,34 @@ static int gettok() {
     while (isalnum((LastChar = getchar())))
       IdentifierStr += LastChar;
 
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
     return tok_identifier;
   }
 
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
     std::string NumStr;
     do {
       NumStr += LastChar;
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
   if (LastChar == '#') {
     // Comment until end of line.
-    do LastChar = getchar();
+    do
+      LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -84,30 +101,40 @@ public:
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
 class NumberExprAST : public ExprAST {
+  double Val;
+
 public:
-  NumberExprAST(double val) {}
+  NumberExprAST(double Val) : Val(Val) {}
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
 class VariableExprAST : public ExprAST {
   std::string Name;
+
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
+  VariableExprAST(const std::string &Name) : Name(Name) {}
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
+  char Op;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST*> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -116,16 +143,21 @@ public:
 class PrototypeAST {
   std::string Name;
   std::vector<std::string> Args;
+
 public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) {}
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
 };
 } // end anonymous namespace
 
@@ -137,9 +169,7 @@ public:
 /// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
+static int getNextToken() { return CurTok = gettok(); }
 
 /// BinopPrecedence - This holds the precedence for each binary operator that is
 /// defined.
@@ -149,40 +179,69 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
+  if (TokPrec <= 0)
+    return -1;
   return TokPrec;
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+std::unique_ptr<ExprAST> Error(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
 
-static ExprAST *ParseExpression();
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = helper::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
+
+  getNextToken(); // eat identifier.
+
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
+    return helper::make_unique<VariableExprAST>(IdName);
+
   // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
-      if (CurTok == ')') break;
+      if (CurTok == ')')
+        break;
 
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
@@ -192,133 +251,125 @@ static ExprAST *ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
 
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
+  return helper::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
 ///   ::= parenexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
+  default:
+    return Error("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
   }
 }
 
 /// binoprhs
 ///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
+    getNextToken(); // eat binop
+
     // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
-    
+
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS = helper::make_unique<BinaryExprAST>(BinOp, std::move(LHS),
+                                             std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= primary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   if (CurTok != tok_identifier)
     return ErrorP("Expected function name in prototype");
 
   std::string FnName = IdentifierStr;
   getNextToken();
-  
+
   if (CurTok != '(')
     return ErrorP("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorP("Expected ')' in prototype");
-  
+
   // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
+  getNextToken(); // eat ')'.
+
+  return helper::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = helper::make_unique<PrototypeAST>("__anon_expr",
+                                                   std::vector<std::string>());
+    return helper::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
   return ParsePrototype();
 }
 
@@ -359,11 +410,20 @@ static void MainLoop() {
   while (1) {
     fprintf(stderr, "ready> ");
     switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
     }
   }
 }
@@ -378,7 +438,7 @@ int main() {
   BinopPrecedence['<'] = 10;
   BinopPrecedence['+'] = 20;
   BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
+  BinopPrecedence['*'] = 40; // highest.
 
   // Prime the first token.
   fprintf(stderr, "ready> ");
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
index c60f76725fdb..05697ea70a49 100644
--- a/examples/Kaleidoscope/Chapter3/toy.cpp
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -1,13 +1,14 @@
-#include "llvm/IR/Verifier.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include <cctype>
 #include <cstdio>
 #include <map>
 #include <string>
 #include <vector>
+
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -20,14 +21,16 @@ enum Token {
   tok_eof = -1,
 
   // commands
-  tok_def = -2, tok_extern = -3,
+  tok_def = -2,
+  tok_extern = -3,
 
   // primary
-  tok_identifier = -4, tok_number = -5
+  tok_identifier = -4,
+  tok_number = -5
 };
 
-static std::string IdentifierStr;  // Filled in if tok_identifier
-static double NumVal;              // Filled in if tok_number
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
 
 /// gettok - Return the next token from standard input.
 static int gettok() {
@@ -42,31 +45,34 @@ static int gettok() {
     while (isalnum((LastChar = getchar())))
       IdentifierStr += LastChar;
 
-    if (IdentifierStr == "def") return tok_def;
-    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "def")
+      return tok_def;
+    if (IdentifierStr == "extern")
+      return tok_extern;
     return tok_identifier;
   }
 
-  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+  if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
     std::string NumStr;
     do {
       NumStr += LastChar;
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
   if (LastChar == '#') {
     // Comment until end of line.
-    do LastChar = getchar();
+    do
+      LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -85,43 +91,49 @@ namespace {
 class ExprAST {
 public:
   virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
+  virtual Value *codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
 class NumberExprAST : public ExprAST {
   double Val;
+
 public:
-  NumberExprAST(double val) : Val(val) {}
-  Value *Codegen() override;
+  NumberExprAST(double Val) : Val(Val) {}
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
 class VariableExprAST : public ExprAST {
   std::string Name;
+
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  Value *Codegen() override;
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+  Value *codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
+
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
-    : Op(op), LHS(lhs), RHS(rhs) {}
-  Value *Codegen() override;
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST*> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
+
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
-    : Callee(callee), Args(args) {}
-  Value *Codegen() override;
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -130,22 +142,24 @@ public:
 class PrototypeAST {
   std::string Name;
   std::vector<std::string> Args;
+
 public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-    : Name(name), Args(args) {}
-  
-  Function *Codegen();
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
+
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body)
-    : Proto(proto), Body(body) {}
-  
-  Function *Codegen();
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
 };
 } // end anonymous namespace
 
@@ -157,9 +171,7 @@ public:
 /// token the parser is looking at.  getNextToken reads another token from the
 /// lexer and updates CurTok with its results.
 static int CurTok;
-static int getNextToken() {
-  return CurTok = gettok();
-}
+static int getNextToken() { return CurTok = gettok(); }
 
 /// BinopPrecedence - This holds the precedence for each binary operator that is
 /// defined.
@@ -169,41 +181,70 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
-  if (TokPrec <= 0) return -1;
+  if (TokPrec <= 0)
+    return -1;
   return TokPrec;
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
-PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
-FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+std::unique_ptr<ExprAST> Error(const char *Str) {
+  fprintf(stderr, "Error: %s\n", Str);
+  return nullptr;
+}
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
-  getNextToken();  // eat identifier.
-  
+
+  getNextToken(); // eat identifier.
+
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
-  
+    return llvm::make_unique<VariableExprAST>(IdName);
+
   // Call.
-  getNextToken();  // eat (
-  std::vector<ExprAST*> Args;
+  getNextToken(); // eat (
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg) return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
-      if (CurTok == ')') break;
+      if (CurTok == ')')
+        break;
 
       if (CurTok != ',')
         return Error("Expected ')' or ',' in argument list");
@@ -213,133 +254,125 @@ static ExprAST *ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
-  return new CallExprAST(IdName, Args);
-}
 
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken();  // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V) return 0;
-  
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken();  // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
 ///   ::= parenexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
-  default: return Error("unknown token when expecting an expression");
-  case tok_identifier: return ParseIdentifierExpr();
-  case tok_number:     return ParseNumberExpr();
-  case '(':            return ParseParenExpr();
+  default:
+    return Error("unknown token when expecting an expression");
+  case tok_identifier:
+    return ParseIdentifierExpr();
+  case tok_number:
+    return ParseNumberExpr();
+  case '(':
+    return ParseParenExpr();
   }
 }
 
 /// binoprhs
 ///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
-    getNextToken();  // eat binop
-    
+    getNextToken(); // eat binop
+
     // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
-    if (!RHS) return 0;
-    
+    auto RHS = ParsePrimary();
+    if (!RHS)
+      return nullptr;
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec+1, RHS);
-      if (RHS == 0) return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
-    
+
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS =
+        llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= primary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
-  if (!LHS) return 0;
-  
-  return ParseBinOpRHS(0, LHS);
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
+  if (!LHS)
+    return nullptr;
+
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   if (CurTok != tok_identifier)
     return ErrorP("Expected function name in prototype");
 
   std::string FnName = IdentifierStr;
   getNextToken();
-  
+
   if (CurTok != '(')
     return ErrorP("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorP("Expected ')' in prototype");
-  
+
   // success.
-  getNextToken();  // eat ')'.
-  
-  return new PrototypeAST(FnName, ArgNames);
+  getNextToken(); // eat ')'.
+
+  return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
-  getNextToken();  // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0) return 0;
+static std::unique_ptr<FunctionAST> ParseDefinition() {
+  getNextToken(); // eat def.
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
-  getNextToken();  // eat extern.
+static std::unique_ptr<PrototypeAST> ParseExtern() {
+  getNextToken(); // eat extern.
   return ParsePrototype();
 }
 
@@ -347,113 +380,108 @@ static PrototypeAST *ParseExtern() {
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static Module *TheModule;
+static std::unique_ptr<Module> TheModule;
 static IRBuilder<> Builder(getGlobalContext());
-static std::map<std::string, Value*> NamedValues;
+static std::map<std::string, Value *> NamedValues;
 
-Value *ErrorV(const char *Str) { Error(Str); return 0; }
+Value *ErrorV(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
 
-Value *NumberExprAST::Codegen() {
+Value *NumberExprAST::codegen() {
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
+  if (!V)
+    return ErrorV("Unknown variable name");
+  return V;
 }
 
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0) return 0;
-  
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
+
   switch (Op) {
-  case '+': return Builder.CreateFAdd(L, R, "addtmp");
-  case '-': return Builder.CreateFSub(L, R, "subtmp");
-  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '+':
+    return Builder.CreateFAdd(L, R, "addtmp");
+  case '-':
+    return Builder.CreateFSub(L, R, "subtmp");
+  case '*':
+    return Builder.CreateFMul(L, R, "multmp");
   case '<':
     L = Builder.CreateFCmpULT(L, R, "cmptmp");
     // Convert bool 0/1 to double 0.0 or 1.0
     return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
                                 "booltmp");
-  default: return ErrorV("invalid binary operator");
+  default:
+    return ErrorV("invalid binary operator");
   }
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   // Look up the name in the global module table.
   Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
-  
+
   // If argument mismatch error.
   if (CalleeF->arg_size() != Args.size())
     return ErrorV("Incorrect # arguments passed");
 
-  std::vector<Value*> ArgsV;
+  std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0) return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
-  
+
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(),
-                             Type::getDoubleTy(getGlobalContext()));
-  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
-                                       Doubles, false);
-  
-  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-  
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-    
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-    
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
-  
+  std::vector<Type *> Doubles(Args.size(),
+                              Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT =
+      FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
+
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
+
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-    
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
-  
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
   return F;
 }
 
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-  
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
-  
+Function *FunctionAST::codegen() {
+  // First, check for an existing function from a previous 'extern' declaration.
+  Function *TheFunction = TheModule->getFunction(Proto->getName());
+
+  if (!TheFunction)
+    TheFunction = Proto->codegen();
+
+  if (!TheFunction)
+    return nullptr;
+
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
-  
-  if (Value *RetVal = Body->Codegen()) {
+
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[Arg.getName()] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
@@ -462,10 +490,10 @@ Function *FunctionAST::Codegen() {
 
     return TheFunction;
   }
-  
+
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
@@ -473,10 +501,10 @@ Function *FunctionAST::Codegen() {
 //===----------------------------------------------------------------------===//
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read function definition:");
-      LF->dump();
+      FnIR->dump();
     }
   } else {
     // Skip token for error recovery.
@@ -485,10 +513,10 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
       fprintf(stderr, "Read extern: ");
-      F->dump();
+      FnIR->dump();
     }
   } else {
     // Skip token for error recovery.
@@ -498,10 +526,10 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read top-level expression:");
-      LF->dump();
+      FnIR->dump();
     }
   } else {
     // Skip token for error recovery.
@@ -514,46 +542,42 @@ static void MainLoop() {
   while (1) {
     fprintf(stderr, "ready> ");
     switch (CurTok) {
-    case tok_eof:    return;
-    case ';':        getNextToken(); break;  // ignore top-level semicolons.
-    case tok_def:    HandleDefinition(); break;
-    case tok_extern: HandleExtern(); break;
-    default:         HandleTopLevelExpression(); break;
+    case tok_eof:
+      return;
+    case ';': // ignore top-level semicolons.
+      getNextToken();
+      break;
+    case tok_def:
+      HandleDefinition();
+      break;
+    case tok_extern:
+      HandleExtern();
+      break;
+    default:
+      HandleTopLevelExpression();
+      break;
     }
   }
 }
 
-//===----------------------------------------------------------------------===//
-// "Library" functions that can be "extern'd" from user code.
-//===----------------------------------------------------------------------===//
-
-/// putchard - putchar that takes a double and returns 0.
-extern "C" 
-double putchard(double X) {
-  putchar((char)X);
-  return 0;
-}
-
 //===----------------------------------------------------------------------===//
 // Main driver code.
 //===----------------------------------------------------------------------===//
 
 int main() {
-  LLVMContext &Context = getGlobalContext();
-
   // Install standard binary operators.
   // 1 is lowest precedence.
   BinopPrecedence['<'] = 10;
   BinopPrecedence['+'] = 20;
   BinopPrecedence['-'] = 20;
-  BinopPrecedence['*'] = 40;  // highest.
+  BinopPrecedence['*'] = 40; // highest.
 
   // Prime the first token.
   fprintf(stderr, "ready> ");
   getNextToken();
 
   // Make the module, which holds all the code.
-  TheModule = new Module("my cool jit", Context);
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
 
   // Run the main "interpreter loop" now.
   MainLoop();
diff --git a/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/examples/Kaleidoscope/Chapter4/CMakeLists.txt
index 2c01e120070a..89feed143adc 100644
--- a/examples/Kaleidoscope/Chapter4/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter4/CMakeLists.txt
@@ -3,14 +3,15 @@ set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
   InstCombine
-  MCJIT
+  Object
   RuntimeDyld
   ScalarOpts
   Support
-  TransformUtils
   native
   )
 
 add_kaleidoscope_chapter(Kaleidoscope-Ch4
   toy.cpp
   )
+
+export_executable_symbols(Kaleidoscope-Ch4)
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index ad091e4496b7..4f77ec862b1b 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -1,9 +1,5 @@
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -16,7 +12,10 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
 using namespace llvm;
+using namespace llvm::orc;
 
 //===----------------------------------------------------------------------===//
 // Lexer
@@ -66,7 +65,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -98,7 +97,7 @@ namespace {
 class ExprAST {
 public:
   virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
+  virtual Value *codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
@@ -106,8 +105,8 @@ class NumberExprAST : public ExprAST {
   double Val;
 
 public:
-  NumberExprAST(double val) : Val(val) {}
-  Value *Codegen() override;
+  NumberExprAST(double Val) : Val(Val) {}
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -115,30 +114,32 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  Value *Codegen() override;
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+  Value *codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
 
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
-      : Op(op), LHS(lhs), RHS(rhs) {}
-  Value *Codegen() override;
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
 
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
-      : Callee(callee), Args(args) {}
-  Value *Codegen() override;
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -149,21 +150,22 @@ class PrototypeAST {
   std::vector<std::string> Args;
 
 public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-      : Name(name), Args(args) {}
-
-  Function *Codegen();
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
-
-  Function *Codegen();
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
 };
 } // end anonymous namespace
 
@@ -194,41 +196,58 @@ static int GetTokPrecedence() {
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) {
+std::unique_ptr<ExprAST> Error(const char *Str) {
   fprintf(stderr, "Error: %s\n", Str);
-  return 0;
-}
-PrototypeAST *ErrorP(const char *Str) {
-  Error(Str);
-  return 0;
-}
-FunctionAST *ErrorF(const char *Str) {
-  Error(Str);
-  return 0;
+  return nullptr;
 }
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
 
   getNextToken(); // eat identifier.
 
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
+    return llvm::make_unique<VariableExprAST>(IdName);
 
   // Call.
   getNextToken(); // eat (
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg)
-        return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
       if (CurTok == ')')
         break;
@@ -242,34 +261,14 @@ static ExprAST *ParseIdentifierExpr() {
   // Eat the ')'.
   getNextToken();
 
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken(); // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V)
-    return 0;
-
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken(); // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// primary
 ///   ::= identifierexpr
 ///   ::= numberexpr
 ///   ::= parenexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
   default:
     return Error("unknown token when expecting an expression");
@@ -284,7 +283,8 @@ static ExprAST *ParsePrimary() {
 
 /// binoprhs
 ///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
@@ -299,38 +299,39 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
     getNextToken(); // eat binop
 
     // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
+    auto RHS = ParsePrimary();
     if (!RHS)
-      return 0;
+      return nullptr;
 
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec + 1, RHS);
-      if (RHS == 0)
-        return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
 
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS =
+        llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= primary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
   if (!LHS)
-    return 0;
+    return nullptr;
 
-  return ParseBinOpRHS(0, LHS);
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   if (CurTok != tok_identifier)
     return ErrorP("Expected function name in prototype");
 
@@ -349,300 +350,86 @@ static PrototypeAST *ParsePrototype() {
   // success.
   getNextToken(); // eat ')'.
 
-  return new PrototypeAST(FnName, ArgNames);
+  return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
+static std::unique_ptr<FunctionAST> ParseDefinition() {
   getNextToken(); // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0)
-    return 0;
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
+static std::unique_ptr<PrototypeAST> ParseExtern() {
   getNextToken(); // eat extern.
   return ParsePrototype();
 }
 
-//===----------------------------------------------------------------------===//
-// Quick and dirty hack
-//===----------------------------------------------------------------------===//
-
-// FIXME: Obviously we can do better than this
-std::string GenerateUniqueName(const char *root) {
-  static int i = 0;
-  char s[16];
-  sprintf(s, "%s%d", root, i++);
-  std::string S = s;
-  return S;
-}
-
-std::string MakeLegalFunctionName(std::string Name) {
-  std::string NewName;
-  if (!Name.length())
-    return GenerateUniqueName("anon_func_");
-
-  // Start with what we have
-  NewName = Name;
-
-  // Look for a numberic first character
-  if (NewName.find_first_of("0123456789") == 0) {
-    NewName.insert(0, 1, 'n');
-  }
-
-  // Replace illegal characters with their ASCII equivalent
-  std::string legal_elements =
-      "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
-  size_t pos;
-  while ((pos = NewName.find_first_not_of(legal_elements)) !=
-         std::string::npos) {
-    char old_c = NewName.at(pos);
-    char new_str[16];
-    sprintf(new_str, "%d", (int)old_c);
-    NewName = NewName.replace(pos, 1, new_str);
-  }
-
-  return NewName;
-}
-
-//===----------------------------------------------------------------------===//
-// MCJIT helper class
-//===----------------------------------------------------------------------===//
-
-class MCJITHelper {
-public:
-  MCJITHelper(LLVMContext &C) : Context(C), OpenModule(NULL) {}
-  ~MCJITHelper();
-
-  Function *getFunction(const std::string FnName);
-  Module *getModuleForNewFunction();
-  void *getPointerToFunction(Function *F);
-  void *getSymbolAddress(const std::string &Name);
-  void dump();
-
-private:
-  typedef std::vector<Module *> ModuleVector;
-  typedef std::vector<ExecutionEngine *> EngineVector;
-
-  LLVMContext &Context;
-  Module *OpenModule;
-  ModuleVector Modules;
-  EngineVector Engines;
-};
-
-class HelpingMemoryManager : public SectionMemoryManager {
-  HelpingMemoryManager(const HelpingMemoryManager &) = delete;
-  void operator=(const HelpingMemoryManager &) = delete;
-
-public:
-  HelpingMemoryManager(MCJITHelper *Helper) : MasterHelper(Helper) {}
-  ~HelpingMemoryManager() override {}
-
-  /// This method returns the address of the specified symbol.
-  /// Our implementation will attempt to find symbols in other
-  /// modules associated with the MCJITHelper to cross link symbols
-  /// from one generated module to another.
-  uint64_t getSymbolAddress(const std::string &Name) override;
-
-private:
-  MCJITHelper *MasterHelper;
-};
-
-uint64_t HelpingMemoryManager::getSymbolAddress(const std::string &Name) {
-  uint64_t FnAddr = SectionMemoryManager::getSymbolAddress(Name);
-  if (FnAddr)
-    return FnAddr;
-
-  uint64_t HelperFun = (uint64_t)MasterHelper->getSymbolAddress(Name);
-  if (!HelperFun)
-    report_fatal_error("Program used extern function '" + Name +
-                       "' which could not be resolved!");
-
-  return HelperFun;
-}
-
-MCJITHelper::~MCJITHelper() {
-  if (OpenModule)
-    delete OpenModule;
-  EngineVector::iterator begin = Engines.begin();
-  EngineVector::iterator end = Engines.end();
-  EngineVector::iterator it;
-  for (it = begin; it != end; ++it)
-    delete *it;
-}
-
-Function *MCJITHelper::getFunction(const std::string FnName) {
-  ModuleVector::iterator begin = Modules.begin();
-  ModuleVector::iterator end = Modules.end();
-  ModuleVector::iterator it;
-  for (it = begin; it != end; ++it) {
-    Function *F = (*it)->getFunction(FnName);
-    if (F) {
-      if (*it == OpenModule)
-        return F;
-
-      assert(OpenModule != NULL);
-
-      // This function is in a module that has already been JITed.
-      // We need to generate a new prototype for external linkage.
-      Function *PF = OpenModule->getFunction(FnName);
-      if (PF && !PF->empty()) {
-        ErrorF("redefinition of function across modules");
-        return 0;
-      }
-
-      // If we don't have a prototype yet, create one.
-      if (!PF)
-        PF = Function::Create(F->getFunctionType(), Function::ExternalLinkage,
-                              FnName, OpenModule);
-      return PF;
-    }
-  }
-  return NULL;
-}
-
-Module *MCJITHelper::getModuleForNewFunction() {
-  // If we have a Module that hasn't been JITed, use that.
-  if (OpenModule)
-    return OpenModule;
-
-  // Otherwise create a new Module.
-  std::string ModName = GenerateUniqueName("mcjit_module_");
-  Module *M = new Module(ModName, Context);
-  Modules.push_back(M);
-  OpenModule = M;
-  return M;
-}
-
-void *MCJITHelper::getPointerToFunction(Function *F) {
-  // See if an existing instance of MCJIT has this function.
-  EngineVector::iterator begin = Engines.begin();
-  EngineVector::iterator end = Engines.end();
-  EngineVector::iterator it;
-  for (it = begin; it != end; ++it) {
-    void *P = (*it)->getPointerToFunction(F);
-    if (P)
-      return P;
-  }
-
-  // If we didn't find the function, see if we can generate it.
-  if (OpenModule) {
-    std::string ErrStr;
-    ExecutionEngine *NewEngine =
-        EngineBuilder(std::unique_ptr<Module>(OpenModule))
-            .setErrorStr(&ErrStr)
-            .setMCJITMemoryManager(std::unique_ptr<HelpingMemoryManager>(
-                new HelpingMemoryManager(this)))
-            .create();
-    if (!NewEngine) {
-      fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-      exit(1);
-    }
-
-    // Create a function pass manager for this engine
-    auto *FPM = new legacy::FunctionPassManager(OpenModule);
-
-    // Set up the optimizer pipeline.  Start with registering info about how the
-    // target lays out data structures.
-    OpenModule->setDataLayout(*NewEngine->getDataLayout());
-    // Provide basic AliasAnalysis support for GVN.
-    FPM->add(createBasicAliasAnalysisPass());
-    // Promote allocas to registers.
-    FPM->add(createPromoteMemoryToRegisterPass());
-    // Do simple "peephole" optimizations and bit-twiddling optzns.
-    FPM->add(createInstructionCombiningPass());
-    // Reassociate expressions.
-    FPM->add(createReassociatePass());
-    // Eliminate Common SubExpressions.
-    FPM->add(createGVNPass());
-    // Simplify the control flow graph (deleting unreachable blocks, etc).
-    FPM->add(createCFGSimplificationPass());
-    FPM->doInitialization();
-
-    // For each function in the module
-    Module::iterator it;
-    Module::iterator end = OpenModule->end();
-    for (it = OpenModule->begin(); it != end; ++it) {
-      // Run the FPM on this function
-      FPM->run(*it);
-    }
-
-    // We don't need this anymore
-    delete FPM;
-
-    OpenModule = NULL;
-    Engines.push_back(NewEngine);
-    NewEngine->finalizeObject();
-    return NewEngine->getPointerToFunction(F);
-  }
-  return NULL;
-}
-
-void *MCJITHelper::getSymbolAddress(const std::string &Name) {
-  // Look for the symbol in each of our execution engines.
-  EngineVector::iterator begin = Engines.begin();
-  EngineVector::iterator end = Engines.end();
-  EngineVector::iterator it;
-  for (it = begin; it != end; ++it) {
-    uint64_t FAddr = (*it)->getFunctionAddress(Name);
-    if (FAddr) {
-      return (void *)FAddr;
-    }
-  }
-  return NULL;
-}
-
-void MCJITHelper::dump() {
-  ModuleVector::iterator begin = Modules.begin();
-  ModuleVector::iterator end = Modules.end();
-  ModuleVector::iterator it;
-  for (it = begin; it != end; ++it)
-    (*it)->dump();
-}
 //===----------------------------------------------------------------------===//
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static MCJITHelper *JITHelper;
+static std::unique_ptr<Module> TheModule;
 static IRBuilder<> Builder(getGlobalContext());
 static std::map<std::string, Value *> NamedValues;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 
 Value *ErrorV(const char *Str) {
   Error(Str);
-  return 0;
+  return nullptr;
 }
 
-Value *NumberExprAST::Codegen() {
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
+  if (!V)
+    return ErrorV("Unknown variable name");
+  return V;
 }
 
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0)
-    return 0;
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
 
   switch (Op) {
   case '+':
@@ -661,10 +448,10 @@ Value *BinaryExprAST::Codegen() {
   }
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   // Look up the name in the global module table.
-  Function *CalleeF = JITHelper->getFunction(Callee);
-  if (CalleeF == 0)
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
 
   // If argument mismatch error.
@@ -673,94 +460,99 @@ Value *CallExprAST::Codegen() {
 
   std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0)
-      return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
 
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
   std::vector<Type *> Doubles(Args.size(),
                               Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT =
       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
-  std::string FnName = MakeLegalFunctionName(Name);
-
-  Module *M = JITHelper->getModuleForNewFunction();
-
-  Function *F = Function::Create(FT, Function::ExternalLinkage, FnName, M);
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != FnName) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = JITHelper->getFunction(Name);
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
+  Function *F =
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
 
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
 
   return F;
 }
 
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
 
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
 
-  if (Value *RetVal = Body->Codegen()) {
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[Arg.getName()] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
 
+    // Run the optimizer on the function.
+    TheFPM->run(*TheFunction);
+
     return TheFunction;
   }
 
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // Top-Level parsing and JIT Driver
 //===----------------------------------------------------------------------===//
 
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+  // Create a new pass manager attached to it.
+  TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
+
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read function definition:");
-      LF->dump();
+      FnIR->dump();
+      TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
     }
   } else {
     // Skip token for error recovery.
@@ -769,10 +561,11 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
       fprintf(stderr, "Read extern: ");
-      F->dump();
+      FnIR->dump();
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
     }
   } else {
     // Skip token for error recovery.
@@ -782,15 +575,25 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      // JIT the function, returning a function pointer.
-      void *FPtr = JITHelper->getPointerToFunction(LF);
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
 
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      // JIT the module containing the anonymous expression, keeping a handle so
+      // we can free it later.
+      auto H = TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+      assert(ExprSymbol && "Function not found");
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
       fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      TheJIT->removeModule(H);
     }
   } else {
     // Skip token for error recovery.
@@ -805,9 +608,9 @@ static void MainLoop() {
     switch (CurTok) {
     case tok_eof:
       return;
-    case ';':
+    case ';': // ignore top-level semicolons.
       getNextToken();
-      break; // ignore top-level semicolons.
+      break;
     case tok_def:
       HandleDefinition();
       break;
@@ -827,7 +630,13 @@ static void MainLoop() {
 
 /// putchard - putchar that takes a double and returns 0.
 extern "C" double putchard(double X) {
-  putchar((char)X);
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" double printd(double X) {
+  fprintf(stderr, "%f\n", X);
   return 0;
 }
 
@@ -839,8 +648,6 @@ int main() {
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
   InitializeNativeTargetAsmParser();
-  LLVMContext &Context = getGlobalContext();
-  JITHelper = new MCJITHelper(Context);
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -853,11 +660,12 @@ int main() {
   fprintf(stderr, "ready> ");
   getNextToken();
 
+  TheJIT = llvm::make_unique<KaleidoscopeJIT>();
+
+  InitializeModuleAndPassManager();
+
   // Run the main "interpreter loop" now.
   MainLoop();
 
-  // Print out all of the generated code.
-  JITHelper->dump();
-
   return 0;
 }
diff --git a/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
index a938d9731fe8..c0ae70654c36 100644
--- a/examples/Kaleidoscope/Chapter5/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter5/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
   InstCombine
-  MCJIT
+  Object
   RuntimeDyld
   ScalarOpts
   Support
@@ -13,3 +13,5 @@ set(LLVM_LINK_COMPONENTS
 add_kaleidoscope_chapter(Kaleidoscope-Ch5
   toy.cpp
   )
+
+export_executable_symbols(Kaleidoscope-Ch5)
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index db9904895739..eeca4775eeb1 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -1,10 +1,5 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -17,7 +12,10 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
 using namespace llvm;
+using namespace llvm::orc;
 
 //===----------------------------------------------------------------------===//
 // Lexer
@@ -84,7 +82,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -116,7 +114,7 @@ namespace {
 class ExprAST {
 public:
   virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
+  virtual Value *codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
@@ -124,8 +122,8 @@ class NumberExprAST : public ExprAST {
   double Val;
 
 public:
-  NumberExprAST(double val) : Val(val) {}
-  Value *Codegen() override;
+  NumberExprAST(double Val) : Val(Val) {}
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -133,52 +131,57 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  Value *Codegen() override;
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+  Value *codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
 
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
-      : Op(op), LHS(lhs), RHS(rhs) {}
-  Value *Codegen() override;
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
 
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
-      : Callee(callee), Args(args) {}
-  Value *Codegen() override;
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
 class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
+  std::unique_ptr<ExprAST> Cond, Then, Else;
 
 public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-      : Cond(cond), Then(then), Else(_else) {}
-  Value *Codegen() override;
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+  Value *codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
 class ForExprAST : public ExprAST {
   std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
 
 public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-      : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  Value *Codegen() override;
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+  Value *codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -189,21 +192,22 @@ class PrototypeAST {
   std::vector<std::string> Args;
 
 public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
-      : Name(name), Args(args) {}
-
-  Function *Codegen();
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args)
+      : Name(Name), Args(std::move(Args)) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
-
-  Function *Codegen();
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
 };
 } // end anonymous namespace
 
@@ -234,41 +238,58 @@ static int GetTokPrecedence() {
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) {
+std::unique_ptr<ExprAST> Error(const char *Str) {
   fprintf(stderr, "Error: %s\n", Str);
-  return 0;
-}
-PrototypeAST *ErrorP(const char *Str) {
-  Error(Str);
-  return 0;
-}
-FunctionAST *ErrorF(const char *Str) {
-  Error(Str);
-  return 0;
+  return nullptr;
 }
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
 
   getNextToken(); // eat identifier.
 
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
+    return llvm::make_unique<VariableExprAST>(IdName);
 
   // Call.
   getNextToken(); // eat (
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg)
-        return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
       if (CurTok == ')')
         break;
@@ -282,60 +303,41 @@ static ExprAST *ParseIdentifierExpr() {
   // Eat the ')'.
   getNextToken();
 
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken(); // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V)
-    return 0;
-
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken(); // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
+static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken(); // eat the if.
 
   // condition.
-  ExprAST *Cond = ParseExpression();
+  auto Cond = ParseExpression();
   if (!Cond)
-    return 0;
+    return nullptr;
 
   if (CurTok != tok_then)
     return Error("expected then");
   getNextToken(); // eat the then
 
-  ExprAST *Then = ParseExpression();
-  if (Then == 0)
-    return 0;
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
 
   if (CurTok != tok_else)
     return Error("expected else");
 
   getNextToken();
 
-  ExprAST *Else = ParseExpression();
+  auto Else = ParseExpression();
   if (!Else)
-    return 0;
+    return nullptr;
 
-  return new IfExprAST(Cond, Then, Else);
+  return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
 }
 
 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
+static std::unique_ptr<ExprAST> ParseForExpr() {
   getNextToken(); // eat the for.
 
   if (CurTok != tok_identifier)
@@ -348,35 +350,36 @@ static ExprAST *ParseForExpr() {
     return Error("expected '=' after for");
   getNextToken(); // eat '='.
 
-  ExprAST *Start = ParseExpression();
-  if (Start == 0)
-    return 0;
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
   if (CurTok != ',')
     return Error("expected ',' after for start value");
   getNextToken();
 
-  ExprAST *End = ParseExpression();
-  if (End == 0)
-    return 0;
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
 
   // The step value is optional.
-  ExprAST *Step = 0;
+  std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
     getNextToken();
     Step = ParseExpression();
-    if (Step == 0)
-      return 0;
+    if (!Step)
+      return nullptr;
   }
 
   if (CurTok != tok_in)
     return Error("expected 'in' after for");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new ForExprAST(IdName, Start, End, Step, Body);
+  return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
 }
 
 /// primary
@@ -385,7 +388,7 @@ static ExprAST *ParseForExpr() {
 ///   ::= parenexpr
 ///   ::= ifexpr
 ///   ::= forexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
   default:
     return Error("unknown token when expecting an expression");
@@ -404,7 +407,8 @@ static ExprAST *ParsePrimary() {
 
 /// binoprhs
 ///   ::= ('+' primary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
@@ -419,38 +423,39 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
     getNextToken(); // eat binop
 
     // Parse the primary expression after the binary operator.
-    ExprAST *RHS = ParsePrimary();
+    auto RHS = ParsePrimary();
     if (!RHS)
-      return 0;
+      return nullptr;
 
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec + 1, RHS);
-      if (RHS == 0)
-        return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
 
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS =
+        llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= primary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParsePrimary();
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParsePrimary();
   if (!LHS)
-    return 0;
+    return nullptr;
 
-  return ParseBinOpRHS(0, LHS);
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   if (CurTok != tok_identifier)
     return ErrorP("Expected function name in prototype");
 
@@ -469,33 +474,34 @@ static PrototypeAST *ParsePrototype() {
   // success.
   getNextToken(); // eat ')'.
 
-  return new PrototypeAST(FnName, ArgNames);
+  return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames));
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
+static std::unique_ptr<FunctionAST> ParseDefinition() {
   getNextToken(); // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0)
-    return 0;
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
+static std::unique_ptr<PrototypeAST> ParseExtern() {
   getNextToken(); // eat extern.
   return ParsePrototype();
 }
@@ -504,31 +510,50 @@ static PrototypeAST *ParseExtern() {
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static Module *TheModule;
+static std::unique_ptr<Module> TheModule;
 static IRBuilder<> Builder(getGlobalContext());
 static std::map<std::string, Value *> NamedValues;
-static legacy::FunctionPassManager *TheFPM;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 
 Value *ErrorV(const char *Str) {
   Error(Str);
-  return 0;
+  return nullptr;
 }
 
-Value *NumberExprAST::Codegen() {
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
+  if (!V)
+    return ErrorV("Unknown variable name");
+  return V;
 }
 
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0)
-    return 0;
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
 
   switch (Op) {
   case '+':
@@ -547,10 +572,10 @@ Value *BinaryExprAST::Codegen() {
   }
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
 
   // If argument mismatch error.
@@ -559,18 +584,18 @@ Value *CallExprAST::Codegen() {
 
   std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0)
-      return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
 
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0)
-    return 0;
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(
@@ -590,9 +615,9 @@ Value *IfExprAST::Codegen() {
   // Emit then value.
   Builder.SetInsertPoint(ThenBB);
 
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0)
-    return 0;
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
@@ -602,9 +627,9 @@ Value *IfExprAST::Codegen() {
   TheFunction->getBasicBlockList().push_back(ElseBB);
   Builder.SetInsertPoint(ElseBB);
 
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0)
-    return 0;
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
@@ -621,27 +646,26 @@ Value *IfExprAST::Codegen() {
   return PN;
 }
 
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop:
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-
+// Output for-loop as:
+//   ...
+//   start = startexpr
+//   goto loop
+// loop:
+//   variable = phi [start, loopheader], [nextvariable, loopend]
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   nextvariable = variable + step
+//   endcond = endexpr
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
   // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0)
-    return 0;
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
 
   // Make the new basic block for the loop header, inserting after current
   // block.
@@ -669,15 +693,15 @@ Value *ForExprAST::Codegen() {
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
+  if (!Body->codegen())
+    return nullptr;
 
   // Emit the step value.
-  Value *StepVal;
+  Value *StepVal = nullptr;
   if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0)
-      return 0;
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
   } else {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
@@ -686,9 +710,9 @@ Value *ForExprAST::Codegen() {
   Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
 
   // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0)
-    return EndCond;
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(
@@ -718,7 +742,7 @@ Value *ForExprAST::Codegen() {
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
   std::vector<Type *> Doubles(Args.size(),
                               Type::getDoubleTy(getGlobalContext()));
@@ -726,60 +750,42 @@ Function *PrototypeAST::Codegen() {
       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
   Function *F =
-      Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
 
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
 
   return F;
 }
 
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
 
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
 
-  if (Value *RetVal = Body->Codegen()) {
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[Arg.getName()] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
 
-    // Optimize the function.
+    // Run the optimizer on the function.
     TheFPM->run(*TheFunction);
 
     return TheFunction;
@@ -787,20 +793,40 @@ Function *FunctionAST::Codegen() {
 
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // Top-Level parsing and JIT Driver
 //===----------------------------------------------------------------------===//
 
-static ExecutionEngine *TheExecutionEngine;
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+  // Create a new pass manager attached to it.
+  TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read function definition:");
-      LF->dump();
+      FnIR->dump();
+      TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
     }
   } else {
     // Skip token for error recovery.
@@ -809,10 +835,11 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
       fprintf(stderr, "Read extern: ");
-      F->dump();
+      FnIR->dump();
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
     }
   } else {
     // Skip token for error recovery.
@@ -822,16 +849,25 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      TheExecutionEngine->finalizeObject();
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
 
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      // JIT the module containing the anonymous expression, keeping a handle so
+      // we can free it later.
+      auto H = TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+      assert(ExprSymbol && "Function not found");
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
       fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      TheJIT->removeModule(H);
     }
   } else {
     // Skip token for error recovery.
@@ -846,9 +882,9 @@ static void MainLoop() {
     switch (CurTok) {
     case tok_eof:
       return;
-    case ';':
+    case ';': // ignore top-level semicolons.
       getNextToken();
-      break; // ignore top-level semicolons.
+      break;
     case tok_def:
       HandleDefinition();
       break;
@@ -868,7 +904,13 @@ static void MainLoop() {
 
 /// putchard - putchar that takes a double and returns 0.
 extern "C" double putchard(double X) {
-  putchar((char)X);
+  fputc((char)X, stderr);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" double printd(double X) {
+  fprintf(stderr, "%f\n", X);
   return 0;
 }
 
@@ -880,7 +922,6 @@ int main() {
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
   InitializeNativeTargetAsmParser();
-  LLVMContext &Context = getGlobalContext();
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -893,50 +934,12 @@ int main() {
   fprintf(stderr, "ready> ");
   getNextToken();
 
-  // Make the module, which holds all the code.
-  std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
-  TheModule = Owner.get();
+  TheJIT = llvm::make_unique<KaleidoscopeJIT>();
 
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine =
-      EngineBuilder(std::move(Owner))
-          .setErrorStr(&ErrStr)
-          .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
-          .create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  legacy::FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  TheModule->setDataLayout(*TheExecutionEngine->getDataLayout());
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
+  InitializeModuleAndPassManager();
 
   // Run the main "interpreter loop" now.
   MainLoop();
 
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
   return 0;
 }
diff --git a/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
index 7ac1ca49c4f9..49627f07ddf0 100644
--- a/examples/Kaleidoscope/Chapter6/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter6/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
   InstCombine
-  MCJIT
+  Object
   RuntimeDyld
   ScalarOpts
   Support
@@ -13,3 +13,5 @@ set(LLVM_LINK_COMPONENTS
 add_kaleidoscope_chapter(Kaleidoscope-Ch6
   toy.cpp
   )
+
+export_executable_symbols(Kaleidoscope-Ch6)
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index e978a3ea3682..4d04f7e888af 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -1,10 +1,5 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -17,7 +12,10 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
 using namespace llvm;
+using namespace llvm::orc;
 
 //===----------------------------------------------------------------------===//
 // Lexer
@@ -92,7 +90,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -124,7 +122,7 @@ namespace {
 class ExprAST {
 public:
   virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
+  virtual Value *codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
@@ -132,8 +130,8 @@ class NumberExprAST : public ExprAST {
   double Val;
 
 public:
-  NumberExprAST(double val) : Val(val) {}
-  Value *Codegen() override;
+  NumberExprAST(double Val) : Val(Val) {}
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -141,63 +139,68 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
-  Value *Codegen() override;
+  VariableExprAST(const std::string &Name) : Name(Name) {}
+  Value *codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
 class UnaryExprAST : public ExprAST {
   char Opcode;
-  ExprAST *Operand;
+  std::unique_ptr<ExprAST> Operand;
 
 public:
-  UnaryExprAST(char opcode, ExprAST *operand)
-      : Opcode(opcode), Operand(operand) {}
-  Value *Codegen() override;
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+  Value *codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
 
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
-      : Op(op), LHS(lhs), RHS(rhs) {}
-  Value *Codegen() override;
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
 
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
-      : Callee(callee), Args(args) {}
-  Value *Codegen() override;
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
 class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
+  std::unique_ptr<ExprAST> Cond, Then, Else;
 
 public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-      : Cond(cond), Then(then), Else(_else) {}
-  Value *Codegen() override;
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+  Value *codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
 class ForExprAST : public ExprAST {
   std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
 
 public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-      : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  Value *Codegen() override;
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+  Value *codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
@@ -206,15 +209,19 @@ public:
 class PrototypeAST {
   std::string Name;
   std::vector<std::string> Args;
-  bool isOperator;
+  bool IsOperator;
   unsigned Precedence; // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-               bool isoperator = false, unsigned prec = 0)
-      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
 
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
 
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
@@ -222,19 +229,18 @@ public:
   }
 
   unsigned getBinaryPrecedence() const { return Precedence; }
-
-  Function *Codegen();
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
-
-  Function *Codegen();
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
 };
 } // end anonymous namespace
 
@@ -265,41 +271,58 @@ static int GetTokPrecedence() {
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) {
+std::unique_ptr<ExprAST> Error(const char *Str) {
   fprintf(stderr, "Error: %s\n", Str);
-  return 0;
-}
-PrototypeAST *ErrorP(const char *Str) {
-  Error(Str);
-  return 0;
-}
-FunctionAST *ErrorF(const char *Str) {
-  Error(Str);
-  return 0;
+  return nullptr;
 }
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
 
   getNextToken(); // eat identifier.
 
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
+    return llvm::make_unique<VariableExprAST>(IdName);
 
   // Call.
   getNextToken(); // eat (
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg)
-        return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
       if (CurTok == ')')
         break;
@@ -313,60 +336,41 @@ static ExprAST *ParseIdentifierExpr() {
   // Eat the ')'.
   getNextToken();
 
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken(); // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V)
-    return 0;
-
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken(); // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
+static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken(); // eat the if.
 
   // condition.
-  ExprAST *Cond = ParseExpression();
+  auto Cond = ParseExpression();
   if (!Cond)
-    return 0;
+    return nullptr;
 
   if (CurTok != tok_then)
     return Error("expected then");
   getNextToken(); // eat the then
 
-  ExprAST *Then = ParseExpression();
-  if (Then == 0)
-    return 0;
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
 
   if (CurTok != tok_else)
     return Error("expected else");
 
   getNextToken();
 
-  ExprAST *Else = ParseExpression();
+  auto Else = ParseExpression();
   if (!Else)
-    return 0;
+    return nullptr;
 
-  return new IfExprAST(Cond, Then, Else);
+  return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
 }
 
 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
+static std::unique_ptr<ExprAST> ParseForExpr() {
   getNextToken(); // eat the for.
 
   if (CurTok != tok_identifier)
@@ -379,35 +383,36 @@ static ExprAST *ParseForExpr() {
     return Error("expected '=' after for");
   getNextToken(); // eat '='.
 
-  ExprAST *Start = ParseExpression();
-  if (Start == 0)
-    return 0;
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
   if (CurTok != ',')
     return Error("expected ',' after for start value");
   getNextToken();
 
-  ExprAST *End = ParseExpression();
-  if (End == 0)
-    return 0;
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
 
   // The step value is optional.
-  ExprAST *Step = 0;
+  std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
     getNextToken();
     Step = ParseExpression();
-    if (Step == 0)
-      return 0;
+    if (!Step)
+      return nullptr;
   }
 
   if (CurTok != tok_in)
     return Error("expected 'in' after for");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new ForExprAST(IdName, Start, End, Step, Body);
+  return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
 }
 
 /// primary
@@ -416,7 +421,7 @@ static ExprAST *ParseForExpr() {
 ///   ::= parenexpr
 ///   ::= ifexpr
 ///   ::= forexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
   default:
     return Error("unknown token when expecting an expression");
@@ -436,7 +441,7 @@ static ExprAST *ParsePrimary() {
 /// unary
 ///   ::= primary
 ///   ::= '!' unary
-static ExprAST *ParseUnary() {
+static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
@@ -444,14 +449,15 @@ static ExprAST *ParseUnary() {
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
+  if (auto Operand = ParseUnary())
+    return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
 }
 
 /// binoprhs
 ///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
@@ -466,40 +472,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
     getNextToken(); // eat binop
 
     // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
+    auto RHS = ParseUnary();
     if (!RHS)
-      return 0;
+      return nullptr;
 
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec + 1, RHS);
-      if (RHS == 0)
-        return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
 
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS =
+        llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= unary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
   if (!LHS)
-    return 0;
+    return nullptr;
 
-  return ParseBinOpRHS(0, LHS);
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
 ///   ::= binary LETTER number? (id, id)
 ///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
 
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
@@ -557,33 +564,35 @@ static PrototypeAST *ParsePrototype() {
   if (Kind && ArgNames.size() != Kind)
     return ErrorP("Invalid number of operands for operator");
 
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+  return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
+static std::unique_ptr<FunctionAST> ParseDefinition() {
   getNextToken(); // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0)
-    return 0;
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
+static std::unique_ptr<PrototypeAST> ParseExtern() {
   getNextToken(); // eat extern.
   return ParsePrototype();
 }
@@ -592,43 +601,62 @@ static PrototypeAST *ParseExtern() {
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static Module *TheModule;
+static std::unique_ptr<Module> TheModule;
 static IRBuilder<> Builder(getGlobalContext());
 static std::map<std::string, Value *> NamedValues;
-static legacy::FunctionPassManager *TheFPM;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 
 Value *ErrorV(const char *Str) {
   Error(Str);
-  return 0;
+  return nullptr;
 }
 
-Value *NumberExprAST::Codegen() {
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
+}
+
+Value *NumberExprAST::codegen() {
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  return V ? V : ErrorV("Unknown variable name");
+  if (!V)
+    return ErrorV("Unknown variable name");
+  return V;
 }
 
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0)
-    return 0;
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
 
-  Function *F = TheModule->getFunction(std::string("unary") + Opcode);
-  if (F == 0)
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
     return ErrorV("Unknown unary operator");
 
   return Builder.CreateCall(F, OperandV, "unop");
 }
 
-Value *BinaryExprAST::Codegen() {
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0)
-    return 0;
+Value *BinaryExprAST::codegen() {
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
 
   switch (Op) {
   case '+':
@@ -648,17 +676,17 @@ Value *BinaryExprAST::Codegen() {
 
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary") + Op);
+  Function *F = getFunction(std::string("binary") + Op);
   assert(F && "binary operator not found!");
 
-  Value *Ops[] = { L, R };
+  Value *Ops[] = {L, R};
   return Builder.CreateCall(F, Ops, "binop");
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
 
   // If argument mismatch error.
@@ -667,18 +695,18 @@ Value *CallExprAST::Codegen() {
 
   std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0)
-      return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
 
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0)
-    return 0;
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(
@@ -698,9 +726,9 @@ Value *IfExprAST::Codegen() {
   // Emit then value.
   Builder.SetInsertPoint(ThenBB);
 
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0)
-    return 0;
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
@@ -710,9 +738,9 @@ Value *IfExprAST::Codegen() {
   TheFunction->getBasicBlockList().push_back(ElseBB);
   Builder.SetInsertPoint(ElseBB);
 
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0)
-    return 0;
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
@@ -729,27 +757,26 @@ Value *IfExprAST::Codegen() {
   return PN;
 }
 
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   ...
-  //   start = startexpr
-  //   goto loop
-  // loop:
-  //   variable = phi [start, loopheader], [nextvariable, loopend]
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   nextvariable = variable + step
-  //   endcond = endexpr
-  //   br endcond, loop, endloop
-  // outloop:
-
+// Output for-loop as:
+//   ...
+//   start = startexpr
+//   goto loop
+// loop:
+//   variable = phi [start, loopheader], [nextvariable, loopend]
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   nextvariable = variable + step
+//   endcond = endexpr
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
   // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0)
-    return 0;
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
 
   // Make the new basic block for the loop header, inserting after current
   // block.
@@ -777,15 +804,15 @@ Value *ForExprAST::Codegen() {
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
+  if (!Body->codegen())
+    return nullptr;
 
   // Emit the step value.
-  Value *StepVal;
+  Value *StepVal = nullptr;
   if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0)
-      return 0;
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
   } else {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
@@ -794,9 +821,9 @@ Value *ForExprAST::Codegen() {
   Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
 
   // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0)
-    return EndCond;
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   EndCond = Builder.CreateFCmpONE(
@@ -826,7 +853,7 @@ Value *ForExprAST::Codegen() {
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
   std::vector<Type *> Doubles(Args.size(),
                               Type::getDoubleTy(getGlobalContext()));
@@ -834,64 +861,46 @@ Function *PrototypeAST::Codegen() {
       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
   Function *F =
-      Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
 
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx) {
-    AI->setName(Args[Idx]);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = AI;
-  }
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
 
   return F;
 }
 
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
 
   // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
 
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
 
-  if (Value *RetVal = Body->Codegen()) {
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args())
+    NamedValues[Arg.getName()] = &Arg;
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
 
-    // Optimize the function.
+    // Run the optimizer on the function.
     TheFPM->run(*TheFunction);
 
     return TheFunction;
@@ -900,22 +909,42 @@ Function *FunctionAST::Codegen() {
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
-  if (Proto->isBinaryOp())
+  if (P.isBinaryOp())
     BinopPrecedence.erase(Proto->getOperatorName());
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // Top-Level parsing and JIT Driver
 //===----------------------------------------------------------------------===//
 
-static ExecutionEngine *TheExecutionEngine;
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+  // Create a new pass manager attached to it.
+  TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read function definition:");
-      LF->dump();
+      FnIR->dump();
+      TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
     }
   } else {
     // Skip token for error recovery.
@@ -924,10 +953,11 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
       fprintf(stderr, "Read extern: ");
-      F->dump();
+      FnIR->dump();
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
     }
   } else {
     // Skip token for error recovery.
@@ -937,16 +967,25 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      TheExecutionEngine->finalizeObject();
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
 
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      // JIT the module containing the anonymous expression, keeping a handle so
+      // we can free it later.
+      auto H = TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+      assert(ExprSymbol && "Function not found");
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
       fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      TheJIT->removeModule(H);
     }
   } else {
     // Skip token for error recovery.
@@ -961,9 +1000,9 @@ static void MainLoop() {
     switch (CurTok) {
     case tok_eof:
       return;
-    case ';':
+    case ';': // ignore top-level semicolons.
       getNextToken();
-      break; // ignore top-level semicolons.
+      break;
     case tok_def:
       HandleDefinition();
       break;
@@ -983,13 +1022,13 @@ static void MainLoop() {
 
 /// putchard - putchar that takes a double and returns 0.
 extern "C" double putchard(double X) {
-  putchar((char)X);
+  fputc((char)X, stderr);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
 extern "C" double printd(double X) {
-  printf("%f\n", X);
+  fprintf(stderr, "%f\n", X);
   return 0;
 }
 
@@ -1001,7 +1040,6 @@ int main() {
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
   InitializeNativeTargetAsmParser();
-  LLVMContext &Context = getGlobalContext();
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -1014,50 +1052,12 @@ int main() {
   fprintf(stderr, "ready> ");
   getNextToken();
 
-  // Make the module, which holds all the code.
-  std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
-  TheModule = Owner.get();
+  TheJIT = llvm::make_unique<KaleidoscopeJIT>();
 
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine =
-      EngineBuilder(std::move(Owner))
-          .setErrorStr(&ErrStr)
-          .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
-          .create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  legacy::FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  TheModule->setDataLayout(*TheExecutionEngine->getDataLayout());
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
+  InitializeModuleAndPassManager();
 
   // Run the main "interpreter loop" now.
   MainLoop();
 
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
   return 0;
 }
diff --git a/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
index 8725e4761f78..e67d7928efe7 100644
--- a/examples/Kaleidoscope/Chapter7/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter7/CMakeLists.txt
@@ -3,14 +3,15 @@ set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
   InstCombine
-  MCJIT
+  Object
   RuntimeDyld
   ScalarOpts
   Support
-  TransformUtils
   native
   )
 
 add_kaleidoscope_chapter(Kaleidoscope-Ch7
   toy.cpp
   )
+
+export_executable_symbols(Kaleidoscope-Ch7)
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index b1a41fa01b76..5c0094013d97 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -1,10 +1,5 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -17,7 +12,10 @@
 #include <map>
 #include <string>
 #include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
 using namespace llvm;
+using namespace llvm::orc;
 
 //===----------------------------------------------------------------------===//
 // Lexer
@@ -97,7 +95,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -129,7 +127,7 @@ namespace {
 class ExprAST {
 public:
   virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
+  virtual Value *codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
@@ -137,8 +135,8 @@ class NumberExprAST : public ExprAST {
   double Val;
 
 public:
-  NumberExprAST(double val) : Val(val) {}
-  Value *Codegen() override;
+  NumberExprAST(double Val) : Val(Val) {}
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -146,93 +144,103 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 
 public:
-  VariableExprAST(const std::string &name) : Name(name) {}
+  VariableExprAST(const std::string &Name) : Name(Name) {}
   const std::string &getName() const { return Name; }
-  Value *Codegen() override;
+  Value *codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
 class UnaryExprAST : public ExprAST {
   char Opcode;
-  ExprAST *Operand;
+  std::unique_ptr<ExprAST> Operand;
 
 public:
-  UnaryExprAST(char opcode, ExprAST *operand)
-      : Opcode(opcode), Operand(operand) {}
-  Value *Codegen() override;
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+  Value *codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
 
 public:
-  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs)
-      : Op(op), LHS(lhs), RHS(rhs) {}
-  Value *Codegen() override;
+  BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
 
 public:
-  CallExprAST(const std::string &callee, std::vector<ExprAST *> &args)
-      : Callee(callee), Args(args) {}
-  Value *Codegen() override;
+  CallExprAST(const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
 class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
+  std::unique_ptr<ExprAST> Cond, Then, Else;
 
 public:
-  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
-      : Cond(cond), Then(then), Else(_else) {}
-  Value *Codegen() override;
+  IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
+            std::unique_ptr<ExprAST> Else)
+      : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
+  Value *codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
 class ForExprAST : public ExprAST {
   std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
 
 public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-      : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  Value *Codegen() override;
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+  Value *codegen() override;
 };
 
 /// VarExprAST - Expression class for var/in
 class VarExprAST : public ExprAST {
-  std::vector<std::pair<std::string, ExprAST *> > VarNames;
-  ExprAST *Body;
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  VarExprAST(const std::vector<std::pair<std::string, ExprAST *> > &varnames,
-             ExprAST *body)
-      : VarNames(varnames), Body(body) {}
-
-  Value *Codegen() override;
+  VarExprAST(
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+      std::unique_ptr<ExprAST> Body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+  Value *codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
 class PrototypeAST {
   std::string Name;
   std::vector<std::string> Args;
-  bool isOperator;
+  bool IsOperator;
   unsigned Precedence; // Precedence if a binary op.
-public:
-  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
-               bool isoperator = false, unsigned prec = 0)
-      : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
 
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+public:
+  PrototypeAST(const std::string &Name, std::vector<std::string> Args,
+               bool IsOperator = false, unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
+
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
 
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
@@ -240,21 +248,18 @@ public:
   }
 
   unsigned getBinaryPrecedence() const { return Precedence; }
-
-  Function *Codegen();
-
-  void CreateArgumentAllocas(Function *F);
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
-
-  Function *Codegen();
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
 };
 } // end anonymous namespace
 
@@ -285,41 +290,58 @@ static int GetTokPrecedence() {
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) {
+std::unique_ptr<ExprAST> Error(const char *Str) {
   fprintf(stderr, "Error: %s\n", Str);
-  return 0;
-}
-PrototypeAST *ErrorP(const char *Str) {
-  Error(Str);
-  return 0;
-}
-FunctionAST *ErrorF(const char *Str) {
-  Error(Str);
-  return 0;
+  return nullptr;
 }
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
 
   getNextToken(); // eat identifier.
 
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(IdName);
+    return llvm::make_unique<VariableExprAST>(IdName);
 
   // Call.
   getNextToken(); // eat (
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg)
-        return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
       if (CurTok == ')')
         break;
@@ -333,60 +355,41 @@ static ExprAST *ParseIdentifierExpr() {
   // Eat the ')'.
   getNextToken();
 
-  return new CallExprAST(IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken(); // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V)
-    return 0;
-
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken(); // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
+static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken(); // eat the if.
 
   // condition.
-  ExprAST *Cond = ParseExpression();
+  auto Cond = ParseExpression();
   if (!Cond)
-    return 0;
+    return nullptr;
 
   if (CurTok != tok_then)
     return Error("expected then");
   getNextToken(); // eat the then
 
-  ExprAST *Then = ParseExpression();
-  if (Then == 0)
-    return 0;
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
 
   if (CurTok != tok_else)
     return Error("expected else");
 
   getNextToken();
 
-  ExprAST *Else = ParseExpression();
+  auto Else = ParseExpression();
   if (!Else)
-    return 0;
+    return nullptr;
 
-  return new IfExprAST(Cond, Then, Else);
+  return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
+                                      std::move(Else));
 }
 
 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
+static std::unique_ptr<ExprAST> ParseForExpr() {
   getNextToken(); // eat the for.
 
   if (CurTok != tok_identifier)
@@ -399,43 +402,44 @@ static ExprAST *ParseForExpr() {
     return Error("expected '=' after for");
   getNextToken(); // eat '='.
 
-  ExprAST *Start = ParseExpression();
-  if (Start == 0)
-    return 0;
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
   if (CurTok != ',')
     return Error("expected ',' after for start value");
   getNextToken();
 
-  ExprAST *End = ParseExpression();
-  if (End == 0)
-    return 0;
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
 
   // The step value is optional.
-  ExprAST *Step = 0;
+  std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
     getNextToken();
     Step = ParseExpression();
-    if (Step == 0)
-      return 0;
+    if (!Step)
+      return nullptr;
   }
 
   if (CurTok != tok_in)
     return Error("expected 'in' after for");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new ForExprAST(IdName, Start, End, Step, Body);
+  return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
 }
 
 /// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
+static std::unique_ptr<ExprAST> ParseVarExpr() {
   getNextToken(); // eat the var.
 
-  std::vector<std::pair<std::string, ExprAST *> > VarNames;
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
 
   // At least one variable name is required.
   if (CurTok != tok_identifier)
@@ -446,16 +450,16 @@ static ExprAST *ParseVarExpr() {
     getNextToken(); // eat identifier.
 
     // Read the optional initializer.
-    ExprAST *Init = 0;
+    std::unique_ptr<ExprAST> Init = nullptr;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
 
       Init = ParseExpression();
-      if (Init == 0)
-        return 0;
+      if (!Init)
+        return nullptr;
     }
 
-    VarNames.push_back(std::make_pair(Name, Init));
+    VarNames.push_back(std::make_pair(Name, std::move(Init)));
 
     // End of var list, exit loop.
     if (CurTok != ',')
@@ -471,11 +475,11 @@ static ExprAST *ParseVarExpr() {
     return Error("expected 'in' keyword after 'var'");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new VarExprAST(VarNames, Body);
+  return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
 }
 
 /// primary
@@ -485,7 +489,7 @@ static ExprAST *ParseVarExpr() {
 ///   ::= ifexpr
 ///   ::= forexpr
 ///   ::= varexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
   default:
     return Error("unknown token when expecting an expression");
@@ -507,7 +511,7 @@ static ExprAST *ParsePrimary() {
 /// unary
 ///   ::= primary
 ///   ::= '!' unary
-static ExprAST *ParseUnary() {
+static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
@@ -515,14 +519,15 @@ static ExprAST *ParseUnary() {
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
+  if (auto Operand = ParseUnary())
+    return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
 }
 
 /// binoprhs
 ///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
@@ -537,40 +542,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
     getNextToken(); // eat binop
 
     // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
+    auto RHS = ParseUnary();
     if (!RHS)
-      return 0;
+      return nullptr;
 
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec + 1, RHS);
-      if (RHS == 0)
-        return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
 
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+    LHS =
+        llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= unary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
   if (!LHS)
-    return 0;
+    return nullptr;
 
-  return ParseBinOpRHS(0, LHS);
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
 ///   ::= binary LETTER number? (id, id)
 ///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
 
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
@@ -628,33 +634,35 @@ static PrototypeAST *ParsePrototype() {
   if (Kind && ArgNames.size() != Kind)
     return ErrorP("Invalid number of operands for operator");
 
-  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+  return llvm::make_unique<PrototypeAST>(FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
+static std::unique_ptr<FunctionAST> ParseDefinition() {
   getNextToken(); // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0)
-    return 0;
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
-  if (ExprAST *E = ParseExpression()) {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>("__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
+static std::unique_ptr<PrototypeAST> ParseExtern() {
   getNextToken(); // eat extern.
   return ParsePrototype();
 }
@@ -663,14 +671,31 @@ static PrototypeAST *ParseExtern() {
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static Module *TheModule;
+static std::unique_ptr<Module> TheModule;
 static IRBuilder<> Builder(getGlobalContext());
 static std::map<std::string, AllocaInst *> NamedValues;
-static legacy::FunctionPassManager *TheFPM;
+static std::unique_ptr<legacy::FunctionPassManager> TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 
 Value *ErrorV(const char *Str) {
   Error(Str);
-  return 0;
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
 }
 
 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
@@ -679,64 +704,64 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                    TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
-Value *NumberExprAST::Codegen() {
+Value *NumberExprAST::codegen() {
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  if (V == 0)
+  if (!V)
     return ErrorV("Unknown variable name");
 
   // Load the value.
   return Builder.CreateLoad(V, Name.c_str());
 }
 
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0)
-    return 0;
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
 
-  Function *F = TheModule->getFunction(std::string("unary") + Opcode);
-  if (F == 0)
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
     return ErrorV("Unknown unary operator");
 
   return Builder.CreateCall(F, OperandV, "unop");
 }
 
-Value *BinaryExprAST::Codegen() {
+Value *BinaryExprAST::codegen() {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
     // Assignment requires the LHS to be an identifier.
     // This assume we're building without RTTI because LLVM builds that way by
     // default.  If you build LLVM with RTTI this can be changed to a
     // dynamic_cast for automatic error checking.
-    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS);
+    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
     if (!LHSE)
       return ErrorV("destination of '=' must be a variable");
     // Codegen the RHS.
-    Value *Val = RHS->Codegen();
-    if (Val == 0)
-      return 0;
+    Value *Val = RHS->codegen();
+    if (!Val)
+      return nullptr;
 
     // Look up the name.
     Value *Variable = NamedValues[LHSE->getName()];
-    if (Variable == 0)
+    if (!Variable)
       return ErrorV("Unknown variable name");
 
     Builder.CreateStore(Val, Variable);
     return Val;
   }
 
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0)
-    return 0;
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
 
   switch (Op) {
   case '+':
@@ -756,17 +781,17 @@ Value *BinaryExprAST::Codegen() {
 
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary") + Op);
+  Function *F = getFunction(std::string("binary") + Op);
   assert(F && "binary operator not found!");
 
-  Value *Ops[] = { L, R };
+  Value *Ops[] = {L, R};
   return Builder.CreateCall(F, Ops, "binop");
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
 
   // If argument mismatch error.
@@ -775,18 +800,18 @@ Value *CallExprAST::Codegen() {
 
   std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0)
-      return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
 
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Value *IfExprAST::Codegen() {
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0)
-    return 0;
+Value *IfExprAST::codegen() {
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(
@@ -806,9 +831,9 @@ Value *IfExprAST::Codegen() {
   // Emit then value.
   Builder.SetInsertPoint(ThenBB);
 
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0)
-    return 0;
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
@@ -818,9 +843,9 @@ Value *IfExprAST::Codegen() {
   TheFunction->getBasicBlockList().push_back(ElseBB);
   Builder.SetInsertPoint(ElseBB);
 
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0)
-    return 0;
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
@@ -837,36 +862,35 @@ Value *IfExprAST::Codegen() {
   return PN;
 }
 
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   var = alloca double
-  //   ...
-  //   start = startexpr
-  //   store start -> var
-  //   goto loop
-  // loop:
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   endcond = endexpr
-  //
-  //   curvar = load var
-  //   nextvar = curvar + step
-  //   store nextvar -> var
-  //   br endcond, loop, endloop
-  // outloop:
-
+// Output for-loop as:
+//   var = alloca double
+//   ...
+//   start = startexpr
+//   store start -> var
+//   goto loop
+// loop:
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   endcond = endexpr
+//
+//   curvar = load var
+//   nextvar = curvar + step
+//   store nextvar -> var
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
   Function *TheFunction = Builder.GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
 
   // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0)
-    return 0;
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
 
   // Store the value into the alloca.
   Builder.CreateStore(StartVal, Alloca);
@@ -890,24 +914,24 @@ Value *ForExprAST::Codegen() {
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
+  if (!Body->codegen())
+    return nullptr;
 
   // Emit the step value.
-  Value *StepVal;
+  Value *StepVal = nullptr;
   if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0)
-      return 0;
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
   } else {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
 
   // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0)
-    return EndCond;
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
 
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
@@ -939,7 +963,7 @@ Value *ForExprAST::Codegen() {
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
-Value *VarExprAST::Codegen() {
+Value *VarExprAST::codegen() {
   std::vector<AllocaInst *> OldBindings;
 
   Function *TheFunction = Builder.GetInsertBlock()->getParent();
@@ -947,7 +971,7 @@ Value *VarExprAST::Codegen() {
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
     const std::string &VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
+    ExprAST *Init = VarNames[i].second.get();
 
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
@@ -956,9 +980,9 @@ Value *VarExprAST::Codegen() {
     //    var a = a in ...   # refers to outer 'a'.
     Value *InitVal;
     if (Init) {
-      InitVal = Init->Codegen();
-      if (InitVal == 0)
-        return 0;
+      InitVal = Init->codegen();
+      if (!InitVal)
+        return nullptr;
     } else { // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
     }
@@ -975,9 +999,9 @@ Value *VarExprAST::Codegen() {
   }
 
   // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body->Codegen();
-  if (BodyVal == 0)
-    return 0;
+  Value *BodyVal = Body->codegen();
+  if (!BodyVal)
+    return nullptr;
 
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
@@ -987,7 +1011,7 @@ Value *VarExprAST::Codegen() {
   return BodyVal;
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
   std::vector<Type *> Doubles(Args.size(),
                               Type::getDoubleTy(getGlobalContext()));
@@ -995,79 +1019,54 @@ Function *PrototypeAST::Codegen() {
       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
   Function *F =
-      Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
 
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx)
-    AI->setName(Args[Idx]);
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
 
   return F;
 }
 
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F->arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
 
   // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
 
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   Builder.SetInsertPoint(BB);
 
-  // Add all arguments to the symbol table and create their allocas.
-  Proto->CreateArgumentAllocas(TheFunction);
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
 
-  if (Value *RetVal = Body->Codegen()) {
+    // Store the initial value into the alloca.
+    Builder.CreateStore(&Arg, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Arg.getName()] = Alloca;
+  }
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
 
-    // Optimize the function.
+    // Run the optimizer on the function.
     TheFPM->run(*TheFunction);
 
     return TheFunction;
@@ -1076,22 +1075,42 @@ Function *FunctionAST::Codegen() {
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
-  if (Proto->isBinaryOp())
+  if (P.isBinaryOp())
     BinopPrecedence.erase(Proto->getOperatorName());
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // Top-Level parsing and JIT Driver
 //===----------------------------------------------------------------------===//
 
-static ExecutionEngine *TheExecutionEngine;
+static void InitializeModuleAndPassManager() {
+  // Open a new module.
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+
+  // Create a new pass manager attached to it.
+  TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
+
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  TheFPM->add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  TheFPM->add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  TheFPM->add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  TheFPM->add(createCFGSimplificationPass());
+
+  TheFPM->doInitialization();
+}
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (Function *LF = F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (auto *FnIR = FnAST->codegen()) {
       fprintf(stderr, "Read function definition:");
-      LF->dump();
+      FnIR->dump();
+      TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
     }
   } else {
     // Skip token for error recovery.
@@ -1100,10 +1119,11 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (Function *F = P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (auto *FnIR = ProtoAST->codegen()) {
       fprintf(stderr, "Read extern: ");
-      F->dump();
+      FnIR->dump();
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
     }
   } else {
     // Skip token for error recovery.
@@ -1113,16 +1133,25 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (Function *LF = F->Codegen()) {
-      TheExecutionEngine->finalizeObject();
-      // JIT the function, returning a function pointer.
-      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (FnAST->codegen()) {
 
-      // Cast it to the right type (takes no arguments, returns a double) so we
-      // can call it as a native function.
-      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      // JIT the module containing the anonymous expression, keeping a handle so
+      // we can free it later.
+      auto H = TheJIT->addModule(std::move(TheModule));
+      InitializeModuleAndPassManager();
+
+      // Search the JIT for the __anon_expr symbol.
+      auto ExprSymbol = TheJIT->findSymbol("__anon_expr");
+      assert(ExprSymbol && "Function not found");
+
+      // Get the symbol's address and cast it to the right type (takes no
+      // arguments, returns a double) so we can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
       fprintf(stderr, "Evaluated to %f\n", FP());
+
+      // Delete the anonymous expression module from the JIT.
+      TheJIT->removeModule(H);
     }
   } else {
     // Skip token for error recovery.
@@ -1137,9 +1166,9 @@ static void MainLoop() {
     switch (CurTok) {
     case tok_eof:
       return;
-    case ';':
+    case ';': // ignore top-level semicolons.
       getNextToken();
-      break; // ignore top-level semicolons.
+      break;
     case tok_def:
       HandleDefinition();
       break;
@@ -1159,13 +1188,13 @@ static void MainLoop() {
 
 /// putchard - putchar that takes a double and returns 0.
 extern "C" double putchard(double X) {
-  putchar((char)X);
+  fputc((char)X, stderr);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
 extern "C" double printd(double X) {
-  printf("%f\n", X);
+  fprintf(stderr, "%f\n", X);
   return 0;
 }
 
@@ -1177,7 +1206,6 @@ int main() {
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
   InitializeNativeTargetAsmParser();
-  LLVMContext &Context = getGlobalContext();
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -1191,52 +1219,12 @@ int main() {
   fprintf(stderr, "ready> ");
   getNextToken();
 
-  // Make the module, which holds all the code.
-  std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
-  TheModule = Owner.get();
+  TheJIT = llvm::make_unique<KaleidoscopeJIT>();
 
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine =
-      EngineBuilder(std::move(Owner))
-          .setErrorStr(&ErrStr)
-          .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
-          .create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  legacy::FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  TheModule->setDataLayout(*TheExecutionEngine->getDataLayout());
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Promote allocas to registers.
-  OurFPM.add(createPromoteMemoryToRegisterPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
+  InitializeModuleAndPassManager();
 
   // Run the main "interpreter loop" now.
   MainLoop();
 
-  TheFPM = 0;
-
-  // Print out all of the generated code.
-  TheModule->dump();
-
   return 0;
 }
diff --git a/examples/Kaleidoscope/Chapter8/CMakeLists.txt b/examples/Kaleidoscope/Chapter8/CMakeLists.txt
index f94ed7436189..d9b5cc421be3 100644
--- a/examples/Kaleidoscope/Chapter8/CMakeLists.txt
+++ b/examples/Kaleidoscope/Chapter8/CMakeLists.txt
@@ -1,8 +1,7 @@
 set(LLVM_LINK_COMPONENTS
   Core
   ExecutionEngine
-  MCJIT
-  RuntimeDyld
+  Object
   Support
   native
   )
@@ -10,3 +9,5 @@ set(LLVM_LINK_COMPONENTS
 add_kaleidoscope_chapter(Kaleidoscope-Ch8
   toy.cpp
   )
+
+export_executable_symbols(Kaleidoscope-Ch8)
diff --git a/examples/Kaleidoscope/Chapter8/toy.cpp b/examples/Kaleidoscope/Chapter8/toy.cpp
index 71bc2f684027..289209b3df49 100644
--- a/examples/Kaleidoscope/Chapter8/toy.cpp
+++ b/examples/Kaleidoscope/Chapter8/toy.cpp
@@ -1,27 +1,23 @@
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Transforms/Scalar.h"
 #include <cctype>
 #include <cstdio>
-#include <iostream>
 #include <map>
 #include <string>
 #include <vector>
+#include "../include/KaleidoscopeJIT.h"
+
 using namespace llvm;
+using namespace llvm::orc;
 
 //===----------------------------------------------------------------------===//
 // Lexer
@@ -96,20 +92,17 @@ struct DebugInfo {
   DICompileUnit *TheCU;
   DIType *DblTy;
   std::vector<DIScope *> LexicalBlocks;
-  std::map<const PrototypeAST *, DIScope *> FnScopeMap;
 
   void emitLocation(ExprAST *AST);
   DIType *getDoubleTy();
 } KSDbgInfo;
 
-static std::string IdentifierStr; // Filled in if tok_identifier
-static double NumVal;             // Filled in if tok_number
 struct SourceLocation {
   int Line;
   int Col;
 };
 static SourceLocation CurLoc;
-static SourceLocation LexLoc = { 1, 0 };
+static SourceLocation LexLoc = {1, 0};
 
 static int advance() {
   int LastChar = getchar();
@@ -122,6 +115,9 @@ static int advance() {
   return LastChar;
 }
 
+static std::string IdentifierStr; // Filled in if tok_identifier
+static double NumVal;             // Filled in if tok_number
+
 /// gettok - Return the next token from standard input.
 static int gettok() {
   static int LastChar = ' ';
@@ -167,7 +163,7 @@ static int gettok() {
       LastChar = advance();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -196,7 +192,7 @@ static int gettok() {
 //===----------------------------------------------------------------------===//
 namespace {
 
-std::ostream &indent(std::ostream &O, int size) {
+raw_ostream &indent(raw_ostream &O, int size) {
   return O << std::string(size, ' ');
 }
 
@@ -205,14 +201,14 @@ class ExprAST {
   SourceLocation Loc;
 
 public:
+  ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
+  virtual ~ExprAST() {}
+  virtual Value *codegen() = 0;
   int getLine() const { return Loc.Line; }
   int getCol() const { return Loc.Col; }
-  ExprAST(SourceLocation Loc = CurLoc) : Loc(Loc) {}
-  virtual std::ostream &dump(std::ostream &out, int ind) {
+  virtual raw_ostream &dump(raw_ostream &out, int ind) {
     return out << ':' << getLine() << ':' << getCol() << '\n';
   }
-  virtual ~ExprAST() {}
-  virtual Value *Codegen() = 0;
 };
 
 /// NumberExprAST - Expression class for numeric literals like "1.0".
@@ -220,11 +216,11 @@ class NumberExprAST : public ExprAST {
   double Val;
 
 public:
-  NumberExprAST(double val) : Val(val) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  NumberExprAST(double Val) : Val(Val) {}
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     return ExprAST::dump(out << Val, ind);
   }
-  Value *Codegen() override;
+  Value *codegen() override;
 };
 
 /// VariableExprAST - Expression class for referencing a variable, like "a".
@@ -232,93 +228,99 @@ class VariableExprAST : public ExprAST {
   std::string Name;
 
 public:
-  VariableExprAST(SourceLocation Loc, const std::string &name)
-      : ExprAST(Loc), Name(name) {}
+  VariableExprAST(SourceLocation Loc, const std::string &Name)
+      : ExprAST(Loc), Name(Name) {}
   const std::string &getName() const { return Name; }
-  std::ostream &dump(std::ostream &out, int ind) override {
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     return ExprAST::dump(out << Name, ind);
   }
-  Value *Codegen() override;
 };
 
 /// UnaryExprAST - Expression class for a unary operator.
 class UnaryExprAST : public ExprAST {
   char Opcode;
-  ExprAST *Operand;
+  std::unique_ptr<ExprAST> Operand;
 
 public:
-  UnaryExprAST(char opcode, ExprAST *operand)
-      : Opcode(opcode), Operand(operand) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
+      : Opcode(Opcode), Operand(std::move(Operand)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "unary" << Opcode, ind);
     Operand->dump(out, ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// BinaryExprAST - Expression class for a binary operator.
 class BinaryExprAST : public ExprAST {
   char Op;
-  ExprAST *LHS, *RHS;
+  std::unique_ptr<ExprAST> LHS, RHS;
 
 public:
-  BinaryExprAST(SourceLocation Loc, char op, ExprAST *lhs, ExprAST *rhs)
-      : ExprAST(Loc), Op(op), LHS(lhs), RHS(rhs) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  BinaryExprAST(SourceLocation Loc, char Op, std::unique_ptr<ExprAST> LHS,
+                std::unique_ptr<ExprAST> RHS)
+      : ExprAST(Loc), Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "binary" << Op, ind);
     LHS->dump(indent(out, ind) << "LHS:", ind + 1);
     RHS->dump(indent(out, ind) << "RHS:", ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// CallExprAST - Expression class for function calls.
 class CallExprAST : public ExprAST {
   std::string Callee;
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
 
 public:
-  CallExprAST(SourceLocation Loc, const std::string &callee,
-              std::vector<ExprAST *> &args)
-      : ExprAST(Loc), Callee(callee), Args(args) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  CallExprAST(SourceLocation Loc, const std::string &Callee,
+              std::vector<std::unique_ptr<ExprAST>> Args)
+      : ExprAST(Loc), Callee(Callee), Args(std::move(Args)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "call " << Callee, ind);
-    for (ExprAST *Arg : Args)
+    for (const auto &Arg : Args)
       Arg->dump(indent(out, ind + 1), ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// IfExprAST - Expression class for if/then/else.
 class IfExprAST : public ExprAST {
-  ExprAST *Cond, *Then, *Else;
+  std::unique_ptr<ExprAST> Cond, Then, Else;
 
 public:
-  IfExprAST(SourceLocation Loc, ExprAST *cond, ExprAST *then, ExprAST *_else)
-      : ExprAST(Loc), Cond(cond), Then(then), Else(_else) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  IfExprAST(SourceLocation Loc, std::unique_ptr<ExprAST> Cond,
+            std::unique_ptr<ExprAST> Then, std::unique_ptr<ExprAST> Else)
+      : ExprAST(Loc), Cond(std::move(Cond)), Then(std::move(Then)),
+        Else(std::move(Else)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "if", ind);
     Cond->dump(indent(out, ind) << "Cond:", ind + 1);
     Then->dump(indent(out, ind) << "Then:", ind + 1);
     Else->dump(indent(out, ind) << "Else:", ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// ForExprAST - Expression class for for/in.
 class ForExprAST : public ExprAST {
   std::string VarName;
-  ExprAST *Start, *End, *Step, *Body;
+  std::unique_ptr<ExprAST> Start, End, Step, Body;
 
 public:
-  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
-             ExprAST *step, ExprAST *body)
-      : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
-  std::ostream &dump(std::ostream &out, int ind) override {
+  ForExprAST(const std::string &VarName, std::unique_ptr<ExprAST> Start,
+             std::unique_ptr<ExprAST> End, std::unique_ptr<ExprAST> Step,
+             std::unique_ptr<ExprAST> Body)
+      : VarName(VarName), Start(std::move(Start)), End(std::move(End)),
+        Step(std::move(Step)), Body(std::move(Body)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "for", ind);
     Start->dump(indent(out, ind) << "Cond:", ind + 1);
     End->dump(indent(out, ind) << "End:", ind + 1);
@@ -326,47 +328,49 @@ public:
     Body->dump(indent(out, ind) << "Body:", ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// VarExprAST - Expression class for var/in
 class VarExprAST : public ExprAST {
-  std::vector<std::pair<std::string, ExprAST *> > VarNames;
-  ExprAST *Body;
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  VarExprAST(const std::vector<std::pair<std::string, ExprAST *> > &varnames,
-             ExprAST *body)
-      : VarNames(varnames), Body(body) {}
-
-  std::ostream &dump(std::ostream &out, int ind) override {
+  VarExprAST(
+      std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
+      std::unique_ptr<ExprAST> Body)
+      : VarNames(std::move(VarNames)), Body(std::move(Body)) {}
+  Value *codegen() override;
+  raw_ostream &dump(raw_ostream &out, int ind) override {
     ExprAST::dump(out << "var", ind);
     for (const auto &NamedVar : VarNames)
       NamedVar.second->dump(indent(out, ind) << NamedVar.first << ':', ind + 1);
     Body->dump(indent(out, ind) << "Body:", ind + 1);
     return out;
   }
-  Value *Codegen() override;
 };
 
 /// PrototypeAST - This class represents the "prototype" for a function,
-/// which captures its argument names as well as if it is an operator.
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
 class PrototypeAST {
   std::string Name;
   std::vector<std::string> Args;
-  bool isOperator;
+  bool IsOperator;
   unsigned Precedence; // Precedence if a binary op.
   int Line;
 
 public:
-  PrototypeAST(SourceLocation Loc, const std::string &name,
-               const std::vector<std::string> &args, bool isoperator = false,
-               unsigned prec = 0)
-      : Name(name), Args(args), isOperator(isoperator), Precedence(prec),
-        Line(Loc.Line) {}
+  PrototypeAST(SourceLocation Loc, const std::string &Name,
+               std::vector<std::string> Args, bool IsOperator = false,
+               unsigned Prec = 0)
+      : Name(Name), Args(std::move(Args)), IsOperator(IsOperator),
+        Precedence(Prec), Line(Loc.Line) {}
+  Function *codegen();
+  const std::string &getName() const { return Name; }
 
-  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
-  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+  bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
 
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
@@ -374,29 +378,25 @@ public:
   }
 
   unsigned getBinaryPrecedence() const { return Precedence; }
-
-  Function *Codegen();
-
-  void CreateArgumentAllocas(Function *F);
-  const std::vector<std::string> &getArgs() const { return Args; }
+  int getLine() const { return Line; }
 };
 
 /// FunctionAST - This class represents a function definition itself.
 class FunctionAST {
-  PrototypeAST *Proto;
-  ExprAST *Body;
+  std::unique_ptr<PrototypeAST> Proto;
+  std::unique_ptr<ExprAST> Body;
 
 public:
-  FunctionAST(PrototypeAST *proto, ExprAST *body) : Proto(proto), Body(body) {}
-
-  std::ostream &dump(std::ostream &out, int ind) {
+  FunctionAST(std::unique_ptr<PrototypeAST> Proto,
+              std::unique_ptr<ExprAST> Body)
+      : Proto(std::move(Proto)), Body(std::move(Body)) {}
+  Function *codegen();
+  raw_ostream &dump(raw_ostream &out, int ind) {
     indent(out, ind) << "FunctionAST\n";
     ++ind;
     indent(out, ind) << "Body:";
     return Body ? Body->dump(out, ind) : out << "null\n";
   }
-
-  Function *Codegen();
 };
 } // end anonymous namespace
 
@@ -427,25 +427,42 @@ static int GetTokPrecedence() {
 }
 
 /// Error* - These are little helper functions for error handling.
-ExprAST *Error(const char *Str) {
+std::unique_ptr<ExprAST> Error(const char *Str) {
   fprintf(stderr, "Error: %s\n", Str);
-  return 0;
-}
-PrototypeAST *ErrorP(const char *Str) {
-  Error(Str);
-  return 0;
-}
-FunctionAST *ErrorF(const char *Str) {
-  Error(Str);
-  return 0;
+  return nullptr;
 }
 
-static ExprAST *ParseExpression();
+std::unique_ptr<PrototypeAST> ErrorP(const char *Str) {
+  Error(Str);
+  return nullptr;
+}
+
+static std::unique_ptr<ExprAST> ParseExpression();
+
+/// numberexpr ::= number
+static std::unique_ptr<ExprAST> ParseNumberExpr() {
+  auto Result = llvm::make_unique<NumberExprAST>(NumVal);
+  getNextToken(); // consume the number
+  return std::move(Result);
+}
+
+/// parenexpr ::= '(' expression ')'
+static std::unique_ptr<ExprAST> ParseParenExpr() {
+  getNextToken(); // eat (.
+  auto V = ParseExpression();
+  if (!V)
+    return nullptr;
+
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken(); // eat ).
+  return V;
+}
 
 /// identifierexpr
 ///   ::= identifier
 ///   ::= identifier '(' expression* ')'
-static ExprAST *ParseIdentifierExpr() {
+static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
 
   SourceLocation LitLoc = CurLoc;
@@ -453,17 +470,17 @@ static ExprAST *ParseIdentifierExpr() {
   getNextToken(); // eat identifier.
 
   if (CurTok != '(') // Simple variable ref.
-    return new VariableExprAST(LitLoc, IdName);
+    return llvm::make_unique<VariableExprAST>(LitLoc, IdName);
 
   // Call.
   getNextToken(); // eat (
-  std::vector<ExprAST *> Args;
+  std::vector<std::unique_ptr<ExprAST>> Args;
   if (CurTok != ')') {
     while (1) {
-      ExprAST *Arg = ParseExpression();
-      if (!Arg)
-        return 0;
-      Args.push_back(Arg);
+      if (auto Arg = ParseExpression())
+        Args.push_back(std::move(Arg));
+      else
+        return nullptr;
 
       if (CurTok == ')')
         break;
@@ -477,62 +494,43 @@ static ExprAST *ParseIdentifierExpr() {
   // Eat the ')'.
   getNextToken();
 
-  return new CallExprAST(LitLoc, IdName, Args);
-}
-
-/// numberexpr ::= number
-static ExprAST *ParseNumberExpr() {
-  ExprAST *Result = new NumberExprAST(NumVal);
-  getNextToken(); // consume the number
-  return Result;
-}
-
-/// parenexpr ::= '(' expression ')'
-static ExprAST *ParseParenExpr() {
-  getNextToken(); // eat (.
-  ExprAST *V = ParseExpression();
-  if (!V)
-    return 0;
-
-  if (CurTok != ')')
-    return Error("expected ')'");
-  getNextToken(); // eat ).
-  return V;
+  return llvm::make_unique<CallExprAST>(LitLoc, IdName, std::move(Args));
 }
 
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
-static ExprAST *ParseIfExpr() {
+static std::unique_ptr<ExprAST> ParseIfExpr() {
   SourceLocation IfLoc = CurLoc;
 
   getNextToken(); // eat the if.
 
   // condition.
-  ExprAST *Cond = ParseExpression();
+  auto Cond = ParseExpression();
   if (!Cond)
-    return 0;
+    return nullptr;
 
   if (CurTok != tok_then)
     return Error("expected then");
   getNextToken(); // eat the then
 
-  ExprAST *Then = ParseExpression();
-  if (Then == 0)
-    return 0;
+  auto Then = ParseExpression();
+  if (!Then)
+    return nullptr;
 
   if (CurTok != tok_else)
     return Error("expected else");
 
   getNextToken();
 
-  ExprAST *Else = ParseExpression();
+  auto Else = ParseExpression();
   if (!Else)
-    return 0;
+    return nullptr;
 
-  return new IfExprAST(IfLoc, Cond, Then, Else);
+  return llvm::make_unique<IfExprAST>(IfLoc, std::move(Cond), std::move(Then),
+                                      std::move(Else));
 }
 
 /// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
-static ExprAST *ParseForExpr() {
+static std::unique_ptr<ExprAST> ParseForExpr() {
   getNextToken(); // eat the for.
 
   if (CurTok != tok_identifier)
@@ -545,43 +543,44 @@ static ExprAST *ParseForExpr() {
     return Error("expected '=' after for");
   getNextToken(); // eat '='.
 
-  ExprAST *Start = ParseExpression();
-  if (Start == 0)
-    return 0;
+  auto Start = ParseExpression();
+  if (!Start)
+    return nullptr;
   if (CurTok != ',')
     return Error("expected ',' after for start value");
   getNextToken();
 
-  ExprAST *End = ParseExpression();
-  if (End == 0)
-    return 0;
+  auto End = ParseExpression();
+  if (!End)
+    return nullptr;
 
   // The step value is optional.
-  ExprAST *Step = 0;
+  std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
     getNextToken();
     Step = ParseExpression();
-    if (Step == 0)
-      return 0;
+    if (!Step)
+      return nullptr;
   }
 
   if (CurTok != tok_in)
     return Error("expected 'in' after for");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new ForExprAST(IdName, Start, End, Step, Body);
+  return llvm::make_unique<ForExprAST>(IdName, std::move(Start), std::move(End),
+                                       std::move(Step), std::move(Body));
 }
 
 /// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
-static ExprAST *ParseVarExpr() {
+static std::unique_ptr<ExprAST> ParseVarExpr() {
   getNextToken(); // eat the var.
 
-  std::vector<std::pair<std::string, ExprAST *> > VarNames;
+  std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames;
 
   // At least one variable name is required.
   if (CurTok != tok_identifier)
@@ -592,16 +591,16 @@ static ExprAST *ParseVarExpr() {
     getNextToken(); // eat identifier.
 
     // Read the optional initializer.
-    ExprAST *Init = 0;
+    std::unique_ptr<ExprAST> Init = nullptr;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
 
       Init = ParseExpression();
-      if (Init == 0)
-        return 0;
+      if (!Init)
+        return nullptr;
     }
 
-    VarNames.push_back(std::make_pair(Name, Init));
+    VarNames.push_back(std::make_pair(Name, std::move(Init)));
 
     // End of var list, exit loop.
     if (CurTok != ',')
@@ -617,11 +616,11 @@ static ExprAST *ParseVarExpr() {
     return Error("expected 'in' keyword after 'var'");
   getNextToken(); // eat 'in'.
 
-  ExprAST *Body = ParseExpression();
-  if (Body == 0)
-    return 0;
+  auto Body = ParseExpression();
+  if (!Body)
+    return nullptr;
 
-  return new VarExprAST(VarNames, Body);
+  return llvm::make_unique<VarExprAST>(std::move(VarNames), std::move(Body));
 }
 
 /// primary
@@ -631,7 +630,7 @@ static ExprAST *ParseVarExpr() {
 ///   ::= ifexpr
 ///   ::= forexpr
 ///   ::= varexpr
-static ExprAST *ParsePrimary() {
+static std::unique_ptr<ExprAST> ParsePrimary() {
   switch (CurTok) {
   default:
     return Error("unknown token when expecting an expression");
@@ -653,7 +652,7 @@ static ExprAST *ParsePrimary() {
 /// unary
 ///   ::= primary
 ///   ::= '!' unary
-static ExprAST *ParseUnary() {
+static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
@@ -661,14 +660,15 @@ static ExprAST *ParseUnary() {
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
-  if (ExprAST *Operand = ParseUnary())
-    return new UnaryExprAST(Opc, Operand);
-  return 0;
+  if (auto Operand = ParseUnary())
+    return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
+  return nullptr;
 }
 
 /// binoprhs
 ///   ::= ('+' unary)*
-static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
+                                              std::unique_ptr<ExprAST> LHS) {
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
@@ -684,40 +684,41 @@ static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
     getNextToken(); // eat binop
 
     // Parse the unary expression after the binary operator.
-    ExprAST *RHS = ParseUnary();
+    auto RHS = ParseUnary();
     if (!RHS)
-      return 0;
+      return nullptr;
 
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
     if (TokPrec < NextPrec) {
-      RHS = ParseBinOpRHS(TokPrec + 1, RHS);
-      if (RHS == 0)
-        return 0;
+      RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS));
+      if (!RHS)
+        return nullptr;
     }
 
     // Merge LHS/RHS.
-    LHS = new BinaryExprAST(BinLoc, BinOp, LHS, RHS);
+    LHS = llvm::make_unique<BinaryExprAST>(BinLoc, BinOp, std::move(LHS),
+                                           std::move(RHS));
   }
 }
 
 /// expression
 ///   ::= unary binoprhs
 ///
-static ExprAST *ParseExpression() {
-  ExprAST *LHS = ParseUnary();
+static std::unique_ptr<ExprAST> ParseExpression() {
+  auto LHS = ParseUnary();
   if (!LHS)
-    return 0;
+    return nullptr;
 
-  return ParseBinOpRHS(0, LHS);
+  return ParseBinOpRHS(0, std::move(LHS));
 }
 
 /// prototype
 ///   ::= id '(' id* ')'
 ///   ::= binary LETTER number? (id, id)
 ///   ::= unary LETTER (id)
-static PrototypeAST *ParsePrototype() {
+static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
 
   SourceLocation FnLoc = CurLoc;
@@ -777,35 +778,36 @@ static PrototypeAST *ParsePrototype() {
   if (Kind && ArgNames.size() != Kind)
     return ErrorP("Invalid number of operands for operator");
 
-  return new PrototypeAST(FnLoc, FnName, ArgNames, Kind != 0, BinaryPrecedence);
+  return llvm::make_unique<PrototypeAST>(FnLoc, FnName, ArgNames, Kind != 0,
+                                         BinaryPrecedence);
 }
 
 /// definition ::= 'def' prototype expression
-static FunctionAST *ParseDefinition() {
+static std::unique_ptr<FunctionAST> ParseDefinition() {
   getNextToken(); // eat def.
-  PrototypeAST *Proto = ParsePrototype();
-  if (Proto == 0)
-    return 0;
+  auto Proto = ParsePrototype();
+  if (!Proto)
+    return nullptr;
 
-  if (ExprAST *E = ParseExpression())
-    return new FunctionAST(Proto, E);
-  return 0;
+  if (auto E = ParseExpression())
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
+  return nullptr;
 }
 
 /// toplevelexpr ::= expression
-static FunctionAST *ParseTopLevelExpr() {
+static std::unique_ptr<FunctionAST> ParseTopLevelExpr() {
   SourceLocation FnLoc = CurLoc;
-  if (ExprAST *E = ParseExpression()) {
+  if (auto E = ParseExpression()) {
     // Make an anonymous proto.
-    PrototypeAST *Proto =
-        new PrototypeAST(FnLoc, "main", std::vector<std::string>());
-    return new FunctionAST(Proto, E);
+    auto Proto = llvm::make_unique<PrototypeAST>(FnLoc, "__anon_expr",
+                                                 std::vector<std::string>());
+    return llvm::make_unique<FunctionAST>(std::move(Proto), std::move(E));
   }
-  return 0;
+  return nullptr;
 }
 
 /// external ::= 'extern' prototype
-static PrototypeAST *ParseExtern() {
+static std::unique_ptr<PrototypeAST> ParseExtern() {
   getNextToken(); // eat extern.
   return ParsePrototype();
 }
@@ -814,7 +816,7 @@ static PrototypeAST *ParseExtern() {
 // Debug Info Support
 //===----------------------------------------------------------------------===//
 
-static DIBuilder *DBuilder;
+static std::unique_ptr<DIBuilder> DBuilder;
 
 DIType *DebugInfo::getDoubleTy() {
   if (DblTy)
@@ -846,21 +848,36 @@ static DISubroutineType *CreateFunctionType(unsigned NumArgs, DIFile *Unit) {
   for (unsigned i = 0, e = NumArgs; i != e; ++i)
     EltTys.push_back(DblTy);
 
-  return DBuilder->createSubroutineType(Unit,
-                                        DBuilder->getOrCreateTypeArray(EltTys));
+  return DBuilder->createSubroutineType(DBuilder->getOrCreateTypeArray(EltTys));
 }
 
 //===----------------------------------------------------------------------===//
 // Code Generation
 //===----------------------------------------------------------------------===//
 
-static Module *TheModule;
+static std::unique_ptr<Module> TheModule;
 static std::map<std::string, AllocaInst *> NamedValues;
-static legacy::FunctionPassManager *TheFPM;
+static std::unique_ptr<KaleidoscopeJIT> TheJIT;
+static std::map<std::string, std::unique_ptr<PrototypeAST>> FunctionProtos;
 
 Value *ErrorV(const char *Str) {
   Error(Str);
-  return 0;
+  return nullptr;
+}
+
+Function *getFunction(std::string Name) {
+  // First, see if the function has already been added to the current module.
+  if (auto *F = TheModule->getFunction(Name))
+    return F;
+
+  // If not, check whether we can codegen the declaration from some existing
+  // prototype.
+  auto FI = FunctionProtos.find(Name);
+  if (FI != FunctionProtos.end())
+    return FI->second->codegen();
+
+  // If no existing prototype exists, return null.
+  return nullptr;
 }
 
 /// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
@@ -869,19 +886,19 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                    TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
-Value *NumberExprAST::Codegen() {
+Value *NumberExprAST::codegen() {
   KSDbgInfo.emitLocation(this);
   return ConstantFP::get(getGlobalContext(), APFloat(Val));
 }
 
-Value *VariableExprAST::Codegen() {
+Value *VariableExprAST::codegen() {
   // Look this variable up in the function.
   Value *V = NamedValues[Name];
-  if (V == 0)
+  if (!V)
     return ErrorV("Unknown variable name");
 
   KSDbgInfo.emitLocation(this);
@@ -889,20 +906,20 @@ Value *VariableExprAST::Codegen() {
   return Builder.CreateLoad(V, Name.c_str());
 }
 
-Value *UnaryExprAST::Codegen() {
-  Value *OperandV = Operand->Codegen();
-  if (OperandV == 0)
-    return 0;
+Value *UnaryExprAST::codegen() {
+  Value *OperandV = Operand->codegen();
+  if (!OperandV)
+    return nullptr;
 
-  Function *F = TheModule->getFunction(std::string("unary") + Opcode);
-  if (F == 0)
+  Function *F = getFunction(std::string("unary") + Opcode);
+  if (!F)
     return ErrorV("Unknown unary operator");
 
   KSDbgInfo.emitLocation(this);
   return Builder.CreateCall(F, OperandV, "unop");
 }
 
-Value *BinaryExprAST::Codegen() {
+Value *BinaryExprAST::codegen() {
   KSDbgInfo.emitLocation(this);
 
   // Special case '=' because we don't want to emit the LHS as an expression.
@@ -911,27 +928,27 @@ Value *BinaryExprAST::Codegen() {
     // This assume we're building without RTTI because LLVM builds that way by
     // default.  If you build LLVM with RTTI this can be changed to a
     // dynamic_cast for automatic error checking.
-    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS);
+    VariableExprAST *LHSE = static_cast<VariableExprAST *>(LHS.get());
     if (!LHSE)
       return ErrorV("destination of '=' must be a variable");
     // Codegen the RHS.
-    Value *Val = RHS->Codegen();
-    if (Val == 0)
-      return 0;
+    Value *Val = RHS->codegen();
+    if (!Val)
+      return nullptr;
 
     // Look up the name.
     Value *Variable = NamedValues[LHSE->getName()];
-    if (Variable == 0)
+    if (!Variable)
       return ErrorV("Unknown variable name");
 
     Builder.CreateStore(Val, Variable);
     return Val;
   }
 
-  Value *L = LHS->Codegen();
-  Value *R = RHS->Codegen();
-  if (L == 0 || R == 0)
-    return 0;
+  Value *L = LHS->codegen();
+  Value *R = RHS->codegen();
+  if (!L || !R)
+    return nullptr;
 
   switch (Op) {
   case '+':
@@ -951,19 +968,19 @@ Value *BinaryExprAST::Codegen() {
 
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
-  Function *F = TheModule->getFunction(std::string("binary") + Op);
+  Function *F = getFunction(std::string("binary") + Op);
   assert(F && "binary operator not found!");
 
-  Value *Ops[] = { L, R };
+  Value *Ops[] = {L, R};
   return Builder.CreateCall(F, Ops, "binop");
 }
 
-Value *CallExprAST::Codegen() {
+Value *CallExprAST::codegen() {
   KSDbgInfo.emitLocation(this);
 
   // Look up the name in the global module table.
-  Function *CalleeF = TheModule->getFunction(Callee);
-  if (CalleeF == 0)
+  Function *CalleeF = getFunction(Callee);
+  if (!CalleeF)
     return ErrorV("Unknown function referenced");
 
   // If argument mismatch error.
@@ -972,20 +989,20 @@ Value *CallExprAST::Codegen() {
 
   std::vector<Value *> ArgsV;
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
-    ArgsV.push_back(Args[i]->Codegen());
-    if (ArgsV.back() == 0)
-      return 0;
+    ArgsV.push_back(Args[i]->codegen());
+    if (!ArgsV.back())
+      return nullptr;
   }
 
   return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
-Value *IfExprAST::Codegen() {
+Value *IfExprAST::codegen() {
   KSDbgInfo.emitLocation(this);
 
-  Value *CondV = Cond->Codegen();
-  if (CondV == 0)
-    return 0;
+  Value *CondV = Cond->codegen();
+  if (!CondV)
+    return nullptr;
 
   // Convert condition to a bool by comparing equal to 0.0.
   CondV = Builder.CreateFCmpONE(
@@ -1005,9 +1022,9 @@ Value *IfExprAST::Codegen() {
   // Emit then value.
   Builder.SetInsertPoint(ThenBB);
 
-  Value *ThenV = Then->Codegen();
-  if (ThenV == 0)
-    return 0;
+  Value *ThenV = Then->codegen();
+  if (!ThenV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
@@ -1017,9 +1034,9 @@ Value *IfExprAST::Codegen() {
   TheFunction->getBasicBlockList().push_back(ElseBB);
   Builder.SetInsertPoint(ElseBB);
 
-  Value *ElseV = Else->Codegen();
-  if (ElseV == 0)
-    return 0;
+  Value *ElseV = Else->codegen();
+  if (!ElseV)
+    return nullptr;
 
   Builder.CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
@@ -1036,27 +1053,26 @@ Value *IfExprAST::Codegen() {
   return PN;
 }
 
-Value *ForExprAST::Codegen() {
-  // Output this as:
-  //   var = alloca double
-  //   ...
-  //   start = startexpr
-  //   store start -> var
-  //   goto loop
-  // loop:
-  //   ...
-  //   bodyexpr
-  //   ...
-  // loopend:
-  //   step = stepexpr
-  //   endcond = endexpr
-  //
-  //   curvar = load var
-  //   nextvar = curvar + step
-  //   store nextvar -> var
-  //   br endcond, loop, endloop
-  // outloop:
-
+// Output for-loop as:
+//   var = alloca double
+//   ...
+//   start = startexpr
+//   store start -> var
+//   goto loop
+// loop:
+//   ...
+//   bodyexpr
+//   ...
+// loopend:
+//   step = stepexpr
+//   endcond = endexpr
+//
+//   curvar = load var
+//   nextvar = curvar + step
+//   store nextvar -> var
+//   br endcond, loop, endloop
+// outloop:
+Value *ForExprAST::codegen() {
   Function *TheFunction = Builder.GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
@@ -1065,9 +1081,9 @@ Value *ForExprAST::Codegen() {
   KSDbgInfo.emitLocation(this);
 
   // Emit the start code first, without 'variable' in scope.
-  Value *StartVal = Start->Codegen();
-  if (StartVal == 0)
-    return 0;
+  Value *StartVal = Start->codegen();
+  if (!StartVal)
+    return nullptr;
 
   // Store the value into the alloca.
   Builder.CreateStore(StartVal, Alloca);
@@ -1091,24 +1107,24 @@ Value *ForExprAST::Codegen() {
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
-  if (Body->Codegen() == 0)
-    return 0;
+  if (!Body->codegen())
+    return nullptr;
 
   // Emit the step value.
-  Value *StepVal;
+  Value *StepVal = nullptr;
   if (Step) {
-    StepVal = Step->Codegen();
-    if (StepVal == 0)
-      return 0;
+    StepVal = Step->codegen();
+    if (!StepVal)
+      return nullptr;
   } else {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
 
   // Compute the end condition.
-  Value *EndCond = End->Codegen();
-  if (EndCond == 0)
-    return EndCond;
+  Value *EndCond = End->codegen();
+  if (!EndCond)
+    return nullptr;
 
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
@@ -1140,7 +1156,7 @@ Value *ForExprAST::Codegen() {
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
-Value *VarExprAST::Codegen() {
+Value *VarExprAST::codegen() {
   std::vector<AllocaInst *> OldBindings;
 
   Function *TheFunction = Builder.GetInsertBlock()->getParent();
@@ -1148,7 +1164,7 @@ Value *VarExprAST::Codegen() {
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
     const std::string &VarName = VarNames[i].first;
-    ExprAST *Init = VarNames[i].second;
+    ExprAST *Init = VarNames[i].second.get();
 
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
@@ -1157,9 +1173,9 @@ Value *VarExprAST::Codegen() {
     //    var a = a in ...   # refers to outer 'a'.
     Value *InitVal;
     if (Init) {
-      InitVal = Init->Codegen();
-      if (InitVal == 0)
-        return 0;
+      InitVal = Init->codegen();
+      if (!InitVal)
+        return nullptr;
     } else { // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
     }
@@ -1178,9 +1194,9 @@ Value *VarExprAST::Codegen() {
   KSDbgInfo.emitLocation(this);
 
   // Codegen the body, now that all vars are in scope.
-  Value *BodyVal = Body->Codegen();
-  if (BodyVal == 0)
-    return 0;
+  Value *BodyVal = Body->codegen();
+  if (!BodyVal)
+    return nullptr;
 
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
@@ -1190,7 +1206,7 @@ Value *VarExprAST::Codegen() {
   return BodyVal;
 }
 
-Function *PrototypeAST::Codegen() {
+Function *PrototypeAST::codegen() {
   // Make the function type:  double(double,double) etc.
   std::vector<Type *> Doubles(Args.size(),
                               Type::getDoubleTy(getGlobalContext()));
@@ -1198,106 +1214,80 @@ Function *PrototypeAST::Codegen() {
       FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false);
 
   Function *F =
-      Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
-
-  // If F conflicted, there was already something named 'Name'.  If it has a
-  // body, don't allow redefinition or reextern.
-  if (F->getName() != Name) {
-    // Delete the one we just made and get the existing one.
-    F->eraseFromParent();
-    F = TheModule->getFunction(Name);
-
-    // If F already has a body, reject this.
-    if (!F->empty()) {
-      ErrorF("redefinition of function");
-      return 0;
-    }
-
-    // If F took a different number of args, reject.
-    if (F->arg_size() != Args.size()) {
-      ErrorF("redefinition of function with different # args");
-      return 0;
-    }
-  }
+      Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get());
 
   // Set names for all arguments.
   unsigned Idx = 0;
-  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
-       ++AI, ++Idx)
-    AI->setName(Args[Idx]);
+  for (auto &Arg : F->args())
+    Arg.setName(Args[Idx++]);
+
+  return F;
+}
+
+Function *FunctionAST::codegen() {
+  // Transfer ownership of the prototype to the FunctionProtos map, but keep a
+  // reference to it for use below.
+  auto &P = *Proto;
+  FunctionProtos[Proto->getName()] = std::move(Proto);
+  Function *TheFunction = getFunction(P.getName());
+  if (!TheFunction)
+    return nullptr;
+
+  // If this is an operator, install it.
+  if (P.isBinaryOp())
+    BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
+
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
 
   // Create a subprogram DIE for this function.
   DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
                                       KSDbgInfo.TheCU->getDirectory());
   DIScope *FContext = Unit;
-  unsigned LineNo = Line;
-  unsigned ScopeLine = Line;
+  unsigned LineNo = P.getLine();
+  unsigned ScopeLine = LineNo;
   DISubprogram *SP = DBuilder->createFunction(
-      FContext, Name, StringRef(), Unit, LineNo,
-      CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
-      true /* definition */, ScopeLine, DINode::FlagPrototyped, false, F);
-
-  KSDbgInfo.FnScopeMap[this] = SP;
-  return F;
-}
-
-/// CreateArgumentAllocas - Create an alloca for each argument and register the
-/// argument in the symbol table so that references to it will succeed.
-void PrototypeAST::CreateArgumentAllocas(Function *F) {
-  Function::arg_iterator AI = F->arg_begin();
-  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
-    // Create an alloca for this variable.
-    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
-
-    // Create a debug descriptor for the variable.
-    DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
-    DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU->getFilename(),
-                                        KSDbgInfo.TheCU->getDirectory());
-    DILocalVariable *D = DBuilder->createLocalVariable(
-        dwarf::DW_TAG_arg_variable, Scope, Args[Idx], Unit, Line,
-        KSDbgInfo.getDoubleTy(), Idx);
-
-    DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
-                            DebugLoc::get(Line, 0, Scope),
-                            Builder.GetInsertBlock());
-
-    // Store the initial value into the alloca.
-    Builder.CreateStore(AI, Alloca);
-
-    // Add arguments to variable symbol table.
-    NamedValues[Args[Idx]] = Alloca;
-  }
-}
-
-Function *FunctionAST::Codegen() {
-  NamedValues.clear();
-
-  Function *TheFunction = Proto->Codegen();
-  if (TheFunction == 0)
-    return 0;
+      FContext, P.getName(), StringRef(), Unit, LineNo,
+      CreateFunctionType(TheFunction->arg_size(), Unit),
+      false /* internal linkage */, true /* definition */, ScopeLine,
+      DINode::FlagPrototyped, false);
+  TheFunction->setSubprogram(SP);
 
   // Push the current scope.
-  KSDbgInfo.LexicalBlocks.push_back(KSDbgInfo.FnScopeMap[Proto]);
+  KSDbgInfo.LexicalBlocks.push_back(SP);
 
   // Unset the location for the prologue emission (leading instructions with no
   // location in a function are considered part of the prologue and the debugger
   // will run past them when breaking on a function)
   KSDbgInfo.emitLocation(nullptr);
 
-  // If this is an operator, install it.
-  if (Proto->isBinaryOp())
-    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  // Record the function arguments in the NamedValues map.
+  NamedValues.clear();
+  unsigned ArgIdx = 0;
+  for (auto &Arg : TheFunction->args()) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
 
-  // Create a new basic block to start insertion into.
-  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
-  Builder.SetInsertPoint(BB);
+    // Create a debug descriptor for the variable.
+    DILocalVariable *D = DBuilder->createParameterVariable(
+        SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
+        true);
 
-  // Add all arguments to the symbol table and create their allocas.
-  Proto->CreateArgumentAllocas(TheFunction);
+    DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
+                            DebugLoc::get(LineNo, 0, SP),
+                            Builder.GetInsertBlock());
 
-  KSDbgInfo.emitLocation(Body);
+    // Store the initial value into the alloca.
+    Builder.CreateStore(&Arg, Alloca);
 
-  if (Value *RetVal = Body->Codegen()) {
+    // Add arguments to variable symbol table.
+    NamedValues[Arg.getName()] = Alloca;
+  }
+
+  KSDbgInfo.emitLocation(Body.get());
+
+  if (Value *RetVal = Body->codegen()) {
     // Finish off the function.
     Builder.CreateRet(RetVal);
 
@@ -1307,36 +1297,36 @@ Function *FunctionAST::Codegen() {
     // Validate the generated code, checking for consistency.
     verifyFunction(*TheFunction);
 
-    // Optimize the function.
-    TheFPM->run(*TheFunction);
-
     return TheFunction;
   }
 
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
-  if (Proto->isBinaryOp())
+  if (P.isBinaryOp())
     BinopPrecedence.erase(Proto->getOperatorName());
 
   // Pop off the lexical block for the function since we added it
   // unconditionally.
   KSDbgInfo.LexicalBlocks.pop_back();
 
-  return 0;
+  return nullptr;
 }
 
 //===----------------------------------------------------------------------===//
 // Top-Level parsing and JIT Driver
 //===----------------------------------------------------------------------===//
 
-static ExecutionEngine *TheExecutionEngine;
+static void InitializeModule() {
+  // Open a new module.
+  TheModule = llvm::make_unique<Module>("my cool jit", getGlobalContext());
+  TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
+}
 
 static void HandleDefinition() {
-  if (FunctionAST *F = ParseDefinition()) {
-    if (!F->Codegen()) {
+  if (auto FnAST = ParseDefinition()) {
+    if (!FnAST->codegen())
       fprintf(stderr, "Error reading function definition:");
-    }
   } else {
     // Skip token for error recovery.
     getNextToken();
@@ -1344,10 +1334,11 @@ static void HandleDefinition() {
 }
 
 static void HandleExtern() {
-  if (PrototypeAST *P = ParseExtern()) {
-    if (!P->Codegen()) {
+  if (auto ProtoAST = ParseExtern()) {
+    if (!ProtoAST->codegen())
       fprintf(stderr, "Error reading extern");
-    }
+    else
+      FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
   } else {
     // Skip token for error recovery.
     getNextToken();
@@ -1356,8 +1347,8 @@ static void HandleExtern() {
 
 static void HandleTopLevelExpression() {
   // Evaluate a top-level expression into an anonymous function.
-  if (FunctionAST *F = ParseTopLevelExpr()) {
-    if (!F->Codegen()) {
+  if (auto FnAST = ParseTopLevelExpr()) {
+    if (!FnAST->codegen()) {
       fprintf(stderr, "Error generating code for top level expr");
     }
   } else {
@@ -1372,9 +1363,9 @@ static void MainLoop() {
     switch (CurTok) {
     case tok_eof:
       return;
-    case ';':
+    case ';': // ignore top-level semicolons.
       getNextToken();
-      break; // ignore top-level semicolons.
+      break;
     case tok_def:
       HandleDefinition();
       break;
@@ -1394,13 +1385,13 @@ static void MainLoop() {
 
 /// putchard - putchar that takes a double and returns 0.
 extern "C" double putchard(double X) {
-  putchar((char)X);
+  fputc((char)X, stderr);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
 extern "C" double printd(double X) {
-  printf("%f\n", X);
+  fprintf(stderr, "%f\n", X);
   return 0;
 }
 
@@ -1412,7 +1403,6 @@ int main() {
   InitializeNativeTarget();
   InitializeNativeTargetAsmPrinter();
   InitializeNativeTargetAsmParser();
-  LLVMContext &Context = getGlobalContext();
 
   // Install standard binary operators.
   // 1 is lowest precedence.
@@ -1425,9 +1415,9 @@ int main() {
   // Prime the first token.
   getNextToken();
 
-  // Make the module, which holds all the code.
-  std::unique_ptr<Module> Owner = make_unique<Module>("my cool jit", Context);
-  TheModule = Owner.get();
+  TheJIT = llvm::make_unique<KaleidoscopeJIT>();
+
+  InitializeModule();
 
   // Add the current debug info version into the module.
   TheModule->addModuleFlag(Module::Warning, "Debug Info Version",
@@ -1438,7 +1428,7 @@ int main() {
     TheModule->addModuleFlag(llvm::Module::Warning, "Dwarf Version", 2);
 
   // Construct the DIBuilder, we do this here because we need the module.
-  DBuilder = new DIBuilder(*TheModule);
+  DBuilder = llvm::make_unique<DIBuilder>(*TheModule);
 
   // Create the compile unit for the module.
   // Currently down as "fib.ks" as a filename since we're redirecting stdin
@@ -1446,47 +1436,9 @@ int main() {
   KSDbgInfo.TheCU = DBuilder->createCompileUnit(
       dwarf::DW_LANG_C, "fib.ks", ".", "Kaleidoscope Compiler", 0, "", 0);
 
-  // Create the JIT.  This takes ownership of the module.
-  std::string ErrStr;
-  TheExecutionEngine =
-      EngineBuilder(std::move(Owner))
-          .setErrorStr(&ErrStr)
-          .setMCJITMemoryManager(llvm::make_unique<SectionMemoryManager>())
-          .create();
-  if (!TheExecutionEngine) {
-    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
-    exit(1);
-  }
-
-  legacy::FunctionPassManager OurFPM(TheModule);
-
-  // Set up the optimizer pipeline.  Start with registering info about how the
-  // target lays out data structures.
-  TheModule->setDataLayout(*TheExecutionEngine->getDataLayout());
-#if 0
-  // Provide basic AliasAnalysis support for GVN.
-  OurFPM.add(createBasicAliasAnalysisPass());
-  // Promote allocas to registers.
-  OurFPM.add(createPromoteMemoryToRegisterPass());
-  // Do simple "peephole" optimizations and bit-twiddling optzns.
-  OurFPM.add(createInstructionCombiningPass());
-  // Reassociate expressions.
-  OurFPM.add(createReassociatePass());
-  // Eliminate Common SubExpressions.
-  OurFPM.add(createGVNPass());
-  // Simplify the control flow graph (deleting unreachable blocks, etc).
-  OurFPM.add(createCFGSimplificationPass());
-  #endif
-  OurFPM.doInitialization();
-
-  // Set the global so the code gen can use this.
-  TheFPM = &OurFPM;
-
   // Run the main "interpreter loop" now.
   MainLoop();
 
-  TheFPM = 0;
-
   // Finalize the debug info.
   DBuilder->finalize();
 
diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
index c9b2c6af5658..78184f5d32cd 100644
--- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
+++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
@@ -39,14 +39,14 @@ enum Token {
 
   // primary
   tok_identifier = -4, tok_number = -5,
-  
+
   // control
   tok_if = -6, tok_then = -7, tok_else = -8,
   tok_for = -9, tok_in = -10,
-  
+
   // operators
   tok_binary = -11, tok_unary = -12,
-  
+
   // var definition
   tok_var = -13
 };
@@ -87,7 +87,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -95,11 +95,11 @@ static int gettok() {
     // Comment until end of line.
     do LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -140,7 +140,7 @@ struct VariableExprAST : public ExprAST {
 
 /// UnaryExprAST - Expression class for a unary operator.
 struct UnaryExprAST : public ExprAST {
-  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
     : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -152,7 +152,7 @@ struct UnaryExprAST : public ExprAST {
 /// BinaryExprAST - Expression class for a binary operator.
 struct BinaryExprAST : public ExprAST {
   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
-                std::unique_ptr<ExprAST> RHS) 
+                std::unique_ptr<ExprAST> RHS)
     : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -224,7 +224,7 @@ struct PrototypeAST {
 
   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
-  
+
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
     return Name[Name.size()-1];
@@ -268,7 +268,7 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
   if (TokPrec <= 0) return -1;
@@ -294,12 +294,12 @@ static std::unique_ptr<ExprAST> ParseExpression();
 ///   ::= identifier '(' expression* ')'
 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
+
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '(') // Simple variable ref.
     return llvm::make_unique<VariableExprAST>(IdName);
-  
+
   // Call.
   getNextToken();  // eat (
   std::vector<std::unique_ptr<ExprAST>> Args;
@@ -319,7 +319,7 @@ static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
+
   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
@@ -336,7 +336,7 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
   auto V = ParseExpression();
   if (!V)
     return nullptr;
-  
+
   if (CurTok != ')')
     return ErrorU<ExprAST>("expected ')'");
   getNextToken();  // eat ).
@@ -346,29 +346,29 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
 static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken();  // eat the if.
-  
+
   // condition.
   auto Cond = ParseExpression();
   if (!Cond)
     return nullptr;
-  
+
   if (CurTok != tok_then)
     return ErrorU<ExprAST>("expected then");
   getNextToken();  // eat the then
-  
+
   auto Then = ParseExpression();
   if (!Then)
     return nullptr;
-  
+
   if (CurTok != tok_else)
     return ErrorU<ExprAST>("expected else");
-  
+
   getNextToken();
-  
+
   auto Else = ParseExpression();
   if (!Else)
     return nullptr;
-  
+
   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
                                       std::move(Else));
 }
@@ -379,26 +379,25 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
 
   if (CurTok != tok_identifier)
     return ErrorU<ForExprAST>("expected identifier after for");
-  
+
   std::string IdName = IdentifierStr;
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '=')
     return ErrorU<ForExprAST>("expected '=' after for");
   getNextToken();  // eat '='.
-  
-  
+
   auto Start = ParseExpression();
   if (!Start)
     return nullptr;
   if (CurTok != ',')
     return ErrorU<ForExprAST>("expected ',' after for start value");
   getNextToken();
-  
+
   auto End = ParseExpression();
   if (!End)
     return nullptr;
-  
+
   // The step value is optional.
   std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
@@ -407,11 +406,11 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
     if (!Step)
       return nullptr;
   }
-  
+
   if (CurTok != tok_in)
     return ErrorU<ForExprAST>("expected 'in' after for");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (Body)
     return nullptr;
@@ -420,7 +419,7 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
                                        std::move(Step), std::move(Body));
 }
 
-/// varexpr ::= 'var' identifier ('=' expression)? 
+/// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
 static std::unique_ptr<VarExprAST> ParseVarExpr() {
   getNextToken();  // eat the var.
@@ -430,7 +429,7 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
   // At least one variable name is required.
   if (CurTok != tok_identifier)
     return ErrorU<VarExprAST>("expected identifier after var");
-  
+
   while (1) {
     std::string Name = IdentifierStr;
     getNextToken();  // eat identifier.
@@ -439,31 +438,31 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
     std::unique_ptr<ExprAST> Init;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
-      
+
       Init = ParseExpression();
       if (!Init)
         return nullptr;
     }
-    
+
     VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init)));
-    
+
     // End of var list, exit loop.
     if (CurTok != ',') break;
     getNextToken(); // eat the ','.
-    
+
     if (CurTok != tok_identifier)
       return ErrorU<VarExprAST>("expected identifier list after var");
   }
-  
+
   // At this point, we have to have 'in'.
   if (CurTok != tok_in)
     return ErrorU<VarExprAST>("expected 'in' keyword after 'var'");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (!Body)
     return nullptr;
-  
+
   return llvm::make_unique<VarExprAST>(std::move(VarBindings), std::move(Body));
 }
 
@@ -493,7 +492,7 @@ static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
-  
+
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
@@ -509,21 +508,21 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
     getNextToken();  // eat binop
-    
+
     // Parse the unary expression after the binary operator.
     auto RHS = ParseUnary();
     if (!RHS)
       return nullptr;
-    
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
@@ -532,7 +531,7 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
       if (!RHS)
         return nullptr;
     }
-    
+
     // Merge LHS/RHS.
     LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
@@ -545,7 +544,7 @@ static std::unique_ptr<ExprAST> ParseExpression() {
   auto LHS = ParseUnary();
   if (!LHS)
     return nullptr;
-  
+
   return ParseBinOpRHS(0, std::move(LHS));
 }
 
@@ -555,10 +554,10 @@ static std::unique_ptr<ExprAST> ParseExpression() {
 ///   ::= unary LETTER (id)
 static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
-  
+
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
-  
+
   switch (CurTok) {
   default:
     return ErrorU<PrototypeAST>("Expected function name in prototype");
@@ -584,7 +583,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     FnName += (char)CurTok;
     Kind = 2;
     getNextToken();
-    
+
     // Read the precedence if present.
     if (CurTok == tok_number) {
       if (NumVal < 1 || NumVal > 100)
@@ -594,23 +593,23 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     }
     break;
   }
-  
+
   if (CurTok != '(')
     return ErrorU<PrototypeAST>("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorU<PrototypeAST>("Expected ')' in prototype");
-  
+
   // success.
   getNextToken();  // eat ')'.
-  
+
   // Verify right number of names for operator.
   if (Kind && ArgNames.size() != Kind)
     return ErrorU<PrototypeAST>("Invalid number of operands for operator");
-  
+
   return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
                                          BinaryPrecedence);
 }
@@ -691,10 +690,10 @@ public:
   PrototypeAST* getPrototypeAST(const std::string &Name);
 private:
   typedef std::map<std::string, std::unique_ptr<PrototypeAST>> PrototypeMap;
-  
+
   LLVMContext &Context;
   std::unique_ptr<TargetMachine> TM;
-      
+
   PrototypeMap Prototypes;
 };
 
@@ -717,7 +716,7 @@ public:
       M(new Module(GenerateUniqueName("jit_module_"),
                    Session.getLLVMContext())),
       Builder(Session.getLLVMContext()) {
-    M->setDataLayout(*Session.getTarget().getDataLayout());
+    M->setDataLayout(Session.getTarget().createDataLayout());
   }
 
   SessionContext& getSession() { return Session; }
@@ -748,7 +747,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                  TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
@@ -760,7 +759,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const {
   // Look this variable up in the function.
   Value *V = C.NamedValues[Name];
 
-  if (V == 0)
+  if (!V)
     return ErrorP<Value>("Unknown variable name '" + Name + "'");
 
   // Load the value.
@@ -783,7 +782,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
     // Assignment requires the LHS to be an identifier.
-    auto LHSVar = static_cast<VariableExprAST&>(*LHS);
+    auto &LHSVar = static_cast<VariableExprAST &>(*LHS);
     // Codegen the RHS.
     Value *Val = RHS->IRGen(C);
     if (!Val) return nullptr;
@@ -795,11 +794,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     }
     return ErrorP<Value>("Unknown variable name");
   }
-  
+
   Value *L = LHS->IRGen(C);
   Value *R = RHS->IRGen(C);
   if (!L || !R) return nullptr;
-  
+
   switch (Op) {
   case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp");
   case '-': return C.getBuilder().CreateFSub(L, R, "subtmp");
@@ -812,7 +811,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
                                 "booltmp");
   default: break;
   }
-  
+
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
   std::string FnName = MakeLegalFunctionName(std::string("binary")+Op);
@@ -820,7 +819,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     Value *Ops[] = { L, R };
     return C.getBuilder().CreateCall(F, Ops, "binop");
   }
-   
+
   return ErrorP<Value>("Unknown binary operator");
 }
 
@@ -836,7 +835,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
       ArgsV.push_back(Args[i]->IRGen(C));
       if (!ArgsV.back()) return nullptr;
     }
-    
+
     return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp");
   }
 
@@ -846,49 +845,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
 Value *IfExprAST::IRGen(IRGenContext &C) const {
   Value *CondV = Cond->IRGen(C);
   if (!CondV) return nullptr;
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  ConstantFP *FPZero = 
+  ConstantFP *FPZero =
     ConstantFP::get(C.getLLVMContext(), APFloat(0.0));
   CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond");
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
-  
+
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
   BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction);
   BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else");
   BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont");
-  
+
   C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB);
-  
+
   // Emit then value.
   C.getBuilder().SetInsertPoint(ThenBB);
-  
+
   Value *ThenV = Then->IRGen(C);
   if (!ThenV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
   ThenBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit else block.
   TheFunction->getBasicBlockList().push_back(ElseBB);
   C.getBuilder().SetInsertPoint(ElseBB);
-  
+
   Value *ElseV = Else->IRGen(C);
   if (!ElseV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
   ElseBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   C.getBuilder().SetInsertPoint(MergeBB);
   PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
-  
+
   PN->addIncoming(ThenV, ThenBB);
   PN->addIncoming(ElseV, ElseBB);
   return PN;
@@ -901,7 +900,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   start = startexpr
   //   store start -> var
   //   goto loop
-  // loop: 
+  // loop:
   //   ...
   //   bodyexpr
   //   ...
@@ -914,40 +913,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   store nextvar -> var
   //   br endcond, loop, endloop
   // outloop:
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
+
   // Emit the start code first, without 'variable' in scope.
   Value *StartVal = Start->IRGen(C);
   if (!StartVal) return nullptr;
-  
+
   // Store the value into the alloca.
   C.getBuilder().CreateStore(StartVal, Alloca);
-  
+
   // Make the new basic block for the loop header, inserting after current
   // block.
   BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
+
   // Insert an explicit fall through from the current block to the LoopBB.
   C.getBuilder().CreateBr(LoopBB);
 
   // Start insertion in LoopBB.
   C.getBuilder().SetInsertPoint(LoopBB);
-  
+
   // Within the loop, the variable is defined equal to the PHI node.  If it
   // shadows an existing variable, we have to restore it, so save it now.
   AllocaInst *OldVal = C.NamedValues[VarName];
   C.NamedValues[VarName] = Alloca;
-  
+
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
   if (!Body->IRGen(C))
     return nullptr;
-  
+
   // Emit the step value.
   Value *StepVal;
   if (Step) {
@@ -957,52 +956,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
-  
+
   // Compute the end condition.
   Value *EndCond = End->IRGen(C);
-  if (EndCond == 0) return EndCond;
-  
+  if (!EndCond) return nullptr;
+
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
   Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str());
   Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar");
   C.getBuilder().CreateStore(NextVar, Alloca);
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = C.getBuilder().CreateFCmpONE(EndCond, 
+  EndCond = C.getBuilder().CreateFCmpONE(EndCond,
                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
-  
+
   // Create the "after loop" block and insert it.
   BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
+
   // Insert the conditional branch into the end of LoopEndBB.
   C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB);
-  
+
   // Any new code will be inserted in AfterBB.
   C.getBuilder().SetInsertPoint(AfterBB);
-  
+
   // Restore the unshadowed variable.
   if (OldVal)
     C.NamedValues[VarName] = OldVal;
   else
     C.NamedValues.erase(VarName);
 
-  
   // for expr always returns 0.0.
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Value *VarExprAST::IRGen(IRGenContext &C) const {
   std::vector<AllocaInst *> OldBindings;
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) {
     auto &VarName = VarBindings[i].first;
     auto &Init = VarBindings[i].second;
-    
+
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
     // like this:
@@ -1014,22 +1012,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const {
       if (!InitVal) return nullptr;
     } else // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    
+
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
     C.getBuilder().CreateStore(InitVal, Alloca);
 
     // Remember the old variable binding so that we can restore the binding when
     // we unrecurse.
     OldBindings.push_back(C.NamedValues[VarName]);
-    
+
     // Remember this binding.
     C.NamedValues[VarName] = Alloca;
   }
-  
+
   // Codegen the body, now that all vars are in scope.
   Value *BodyVal = Body->IRGen(C);
   if (!BodyVal) return nullptr;
-  
+
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i)
     C.NamedValues[VarBindings[i].first] = OldBindings[i];
@@ -1042,7 +1040,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
   std::string FnName = MakeLegalFunctionName(Name);
 
   // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(), 
+  std::vector<Type*> Doubles(Args.size(),
                              Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
@@ -1055,26 +1053,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
     // Delete the one we just made and get the existing one.
     F->eraseFromParent();
     F = C.getM().getFunction(Name);
-    
+
     // If F already has a body, reject this.
     if (!F->empty()) {
       ErrorP<Function>("redefinition of function");
       return nullptr;
     }
-    
+
     // If F took a different number of args, reject.
     if (F->arg_size() != Args.size()) {
       ErrorP<Function>("redefinition of function with different # args");
       return nullptr;
     }
   }
-  
+
   // Set names for all arguments.
   unsigned Idx = 0;
   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
        ++AI, ++Idx)
     AI->setName(Args[Idx]);
-    
+
   return F;
 }
 
@@ -1087,7 +1085,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
     AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
 
     // Store the initial value into the alloca.
-    C.getBuilder().CreateStore(AI, Alloca);
+    C.getBuilder().CreateStore(&*AI, Alloca);
 
     // Add arguments to variable symbol table.
     C.NamedValues[Args[Idx]] = Alloca;
@@ -1096,19 +1094,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
 
 Function *FunctionAST::IRGen(IRGenContext &C) const {
   C.NamedValues.clear();
-  
+
   Function *TheFunction = Proto->IRGen(C);
   if (!TheFunction)
     return nullptr;
-  
+
   // If this is an operator, install it.
   if (Proto->isBinaryOp())
     BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence;
-  
+
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   C.getBuilder().SetInsertPoint(BB);
-  
+
   // Add all arguments to the symbol table and create their allocas.
   Proto->CreateArgumentAllocas(TheFunction, C);
 
@@ -1121,7 +1119,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const {
 
     return TheFunction;
   }
-  
+
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
@@ -1170,16 +1168,14 @@ public:
     : Session(Session),
       CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
       LazyEmitLayer(CompileLayer),
-      CompileCallbacks(LazyEmitLayer, CCMgrMemMgr, Session.getLLVMContext(),
-                       reinterpret_cast<uintptr_t>(EarthShatteringKaboom),
-                       64) {}
+      CompileCallbacks(reinterpret_cast<uintptr_t>(EarthShatteringKaboom)) {}
 
   std::string mangle(const std::string &Name) {
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
       Mangler::getNameWithPrefix(MangledNameStream, Name,
-                                 *Session.getTarget().getDataLayout());
+                                 Session.getTarget().createDataLayout());
     }
     return MangledName;
   }
@@ -1236,7 +1232,7 @@ private:
   RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) {
     auto DefI = FunctionDefs.find(Name);
     if (DefI == FunctionDefs.end())
-      return 0;
+      return nullptr;
 
     // Return the address of the stub.
     // Take the FunctionAST out of the map.
@@ -1262,8 +1258,7 @@ private:
     //         the function. The resulting CallbackInfo type will let us set the
     //         compile and update actions for the callback, and get a pointer to
     //         the jit trampoline that we need to call to trigger those actions.
-    auto CallbackInfo =
-      CompileCallbacks.getCompileCallback(F->getContext());
+    auto CallbackInfo = CompileCallbacks.getCompileCallback();
 
     // Step 3) Create a stub that will indirectly call the body of this
     //         function once it is compiled. Initially, set the function
@@ -1313,7 +1308,7 @@ private:
 
   std::map<std::string, std::unique_ptr<FunctionAST>> FunctionDefs;
 
-  JITCompileCallbackManager<LazyEmitLayerT, OrcX86_64> CompileCallbacks;
+  LocalJITCompileCallbackManager<OrcX86_64> CompileCallbacks;
 };
 
 static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) {
@@ -1350,7 +1345,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) {
 
       // Get the address of the JIT'd function in memory.
       auto ExprSymbol = J.findUnmangledSymbol("__anon_expr");
-      
+
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
@@ -1393,20 +1388,20 @@ static void MainLoop() {
 //===----------------------------------------------------------------------===//
 
 /// putchard - putchar that takes a double and returns 0.
-extern "C" 
+extern "C"
 double putchard(double X) {
   putchar((char)X);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
+extern "C"
 double printd(double X) {
   printf("%f", X);
   return 0;
 }
 
-extern "C" 
+extern "C"
 double printlf() {
   printf("\n");
   return 0;
@@ -1443,4 +1438,3 @@ int main() {
 
   return 0;
 }
-
diff --git a/examples/Kaleidoscope/Orc/initial/toy.cpp b/examples/Kaleidoscope/Orc/initial/toy.cpp
index 7e99c0f5ba54..2a6bb92246d0 100644
--- a/examples/Kaleidoscope/Orc/initial/toy.cpp
+++ b/examples/Kaleidoscope/Orc/initial/toy.cpp
@@ -38,14 +38,14 @@ enum Token {
 
   // primary
   tok_identifier = -4, tok_number = -5,
-  
+
   // control
   tok_if = -6, tok_then = -7, tok_else = -8,
   tok_for = -9, tok_in = -10,
-  
+
   // operators
   tok_binary = -11, tok_unary = -12,
-  
+
   // var definition
   tok_var = -13
 };
@@ -86,7 +86,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -94,11 +94,11 @@ static int gettok() {
     // Comment until end of line.
     do LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST {
 
 /// UnaryExprAST - Expression class for a unary operator.
 struct UnaryExprAST : public ExprAST {
-  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
     : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST {
 /// BinaryExprAST - Expression class for a binary operator.
 struct BinaryExprAST : public ExprAST {
   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
-                std::unique_ptr<ExprAST> RHS) 
+                std::unique_ptr<ExprAST> RHS)
     : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -223,7 +223,7 @@ struct PrototypeAST {
 
   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
-  
+
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
     return Name[Name.size()-1];
@@ -267,7 +267,7 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
   if (TokPrec <= 0) return -1;
@@ -293,12 +293,12 @@ static std::unique_ptr<ExprAST> ParseExpression();
 ///   ::= identifier '(' expression* ')'
 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
+
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '(') // Simple variable ref.
     return llvm::make_unique<VariableExprAST>(IdName);
-  
+
   // Call.
   getNextToken();  // eat (
   std::vector<std::unique_ptr<ExprAST>> Args;
@@ -318,7 +318,7 @@ static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
+
   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
@@ -335,7 +335,7 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
   auto V = ParseExpression();
   if (!V)
     return nullptr;
-  
+
   if (CurTok != ')')
     return ErrorU<ExprAST>("expected ')'");
   getNextToken();  // eat ).
@@ -345,29 +345,29 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
 static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken();  // eat the if.
-  
+
   // condition.
   auto Cond = ParseExpression();
   if (!Cond)
     return nullptr;
-  
+
   if (CurTok != tok_then)
     return ErrorU<ExprAST>("expected then");
   getNextToken();  // eat the then
-  
+
   auto Then = ParseExpression();
   if (!Then)
     return nullptr;
-  
+
   if (CurTok != tok_else)
     return ErrorU<ExprAST>("expected else");
-  
+
   getNextToken();
-  
+
   auto Else = ParseExpression();
   if (!Else)
     return nullptr;
-  
+
   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
                                       std::move(Else));
 }
@@ -378,26 +378,25 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
 
   if (CurTok != tok_identifier)
     return ErrorU<ForExprAST>("expected identifier after for");
-  
+
   std::string IdName = IdentifierStr;
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '=')
     return ErrorU<ForExprAST>("expected '=' after for");
   getNextToken();  // eat '='.
-  
-  
+
   auto Start = ParseExpression();
   if (!Start)
     return nullptr;
   if (CurTok != ',')
     return ErrorU<ForExprAST>("expected ',' after for start value");
   getNextToken();
-  
+
   auto End = ParseExpression();
   if (!End)
     return nullptr;
-  
+
   // The step value is optional.
   std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
@@ -406,11 +405,11 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
     if (!Step)
       return nullptr;
   }
-  
+
   if (CurTok != tok_in)
     return ErrorU<ForExprAST>("expected 'in' after for");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (Body)
     return nullptr;
@@ -419,7 +418,7 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
                                        std::move(Step), std::move(Body));
 }
 
-/// varexpr ::= 'var' identifier ('=' expression)? 
+/// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
 static std::unique_ptr<VarExprAST> ParseVarExpr() {
   getNextToken();  // eat the var.
@@ -429,7 +428,7 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
   // At least one variable name is required.
   if (CurTok != tok_identifier)
     return ErrorU<VarExprAST>("expected identifier after var");
-  
+
   while (1) {
     std::string Name = IdentifierStr;
     getNextToken();  // eat identifier.
@@ -438,31 +437,31 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
     std::unique_ptr<ExprAST> Init;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
-      
+
       Init = ParseExpression();
       if (!Init)
         return nullptr;
     }
-    
+
     VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init)));
-    
+
     // End of var list, exit loop.
     if (CurTok != ',') break;
     getNextToken(); // eat the ','.
-    
+
     if (CurTok != tok_identifier)
       return ErrorU<VarExprAST>("expected identifier list after var");
   }
-  
+
   // At this point, we have to have 'in'.
   if (CurTok != tok_in)
     return ErrorU<VarExprAST>("expected 'in' keyword after 'var'");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (!Body)
     return nullptr;
-  
+
   return llvm::make_unique<VarExprAST>(std::move(VarBindings), std::move(Body));
 }
 
@@ -492,7 +491,7 @@ static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
-  
+
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
@@ -508,21 +507,21 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
     getNextToken();  // eat binop
-    
+
     // Parse the unary expression after the binary operator.
     auto RHS = ParseUnary();
     if (!RHS)
       return nullptr;
-    
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
@@ -531,7 +530,7 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
       if (!RHS)
         return nullptr;
     }
-    
+
     // Merge LHS/RHS.
     LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
@@ -544,7 +543,7 @@ static std::unique_ptr<ExprAST> ParseExpression() {
   auto LHS = ParseUnary();
   if (!LHS)
     return nullptr;
-  
+
   return ParseBinOpRHS(0, std::move(LHS));
 }
 
@@ -554,10 +553,10 @@ static std::unique_ptr<ExprAST> ParseExpression() {
 ///   ::= unary LETTER (id)
 static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
-  
+
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
-  
+
   switch (CurTok) {
   default:
     return ErrorU<PrototypeAST>("Expected function name in prototype");
@@ -583,7 +582,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     FnName += (char)CurTok;
     Kind = 2;
     getNextToken();
-    
+
     // Read the precedence if present.
     if (CurTok == tok_number) {
       if (NumVal < 1 || NumVal > 100)
@@ -593,23 +592,23 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     }
     break;
   }
-  
+
   if (CurTok != '(')
     return ErrorU<PrototypeAST>("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorU<PrototypeAST>("Expected ')' in prototype");
-  
+
   // success.
   getNextToken();  // eat ')'.
-  
+
   // Verify right number of names for operator.
   if (Kind && ArgNames.size() != Kind)
     return ErrorU<PrototypeAST>("Invalid number of operands for operator");
-  
+
   return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
                                          BinaryPrecedence);
 }
@@ -690,10 +689,10 @@ public:
   PrototypeAST* getPrototypeAST(const std::string &Name);
 private:
   typedef std::map<std::string, std::unique_ptr<PrototypeAST>> PrototypeMap;
-  
+
   LLVMContext &Context;
   std::unique_ptr<TargetMachine> TM;
-      
+
   PrototypeMap Prototypes;
 };
 
@@ -716,7 +715,7 @@ public:
       M(new Module(GenerateUniqueName("jit_module_"),
                    Session.getLLVMContext())),
       Builder(Session.getLLVMContext()) {
-    M->setDataLayout(*Session.getTarget().getDataLayout());
+    M->setDataLayout(Session.getTarget().createDataLayout());
   }
 
   SessionContext& getSession() { return Session; }
@@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                  TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
@@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const {
   // Look this variable up in the function.
   Value *V = C.NamedValues[Name];
 
-  if (V == 0)
+  if (!V)
     return ErrorP<Value>("Unknown variable name '" + Name + "'");
 
   // Load the value.
@@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
     // Assignment requires the LHS to be an identifier.
-    auto LHSVar = static_cast<VariableExprAST&>(*LHS);
+    auto &LHSVar = static_cast<VariableExprAST &>(*LHS);
     // Codegen the RHS.
     Value *Val = RHS->IRGen(C);
     if (!Val) return nullptr;
@@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     }
     return ErrorP<Value>("Unknown variable name");
   }
-  
+
   Value *L = LHS->IRGen(C);
   Value *R = RHS->IRGen(C);
   if (!L || !R) return nullptr;
-  
+
   switch (Op) {
   case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp");
   case '-': return C.getBuilder().CreateFSub(L, R, "subtmp");
@@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
                                 "booltmp");
   default: break;
   }
-  
+
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
   std::string FnName = MakeLegalFunctionName(std::string("binary")+Op);
@@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     Value *Ops[] = { L, R };
     return C.getBuilder().CreateCall(F, Ops, "binop");
   }
-   
+
   return ErrorP<Value>("Unknown binary operator");
 }
 
@@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
       ArgsV.push_back(Args[i]->IRGen(C));
       if (!ArgsV.back()) return nullptr;
     }
-    
+
     return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp");
   }
 
@@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
 Value *IfExprAST::IRGen(IRGenContext &C) const {
   Value *CondV = Cond->IRGen(C);
   if (!CondV) return nullptr;
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  ConstantFP *FPZero = 
+  ConstantFP *FPZero =
     ConstantFP::get(C.getLLVMContext(), APFloat(0.0));
   CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond");
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
-  
+
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
   BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction);
   BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else");
   BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont");
-  
+
   C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB);
-  
+
   // Emit then value.
   C.getBuilder().SetInsertPoint(ThenBB);
-  
+
   Value *ThenV = Then->IRGen(C);
   if (!ThenV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
   ThenBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit else block.
   TheFunction->getBasicBlockList().push_back(ElseBB);
   C.getBuilder().SetInsertPoint(ElseBB);
-  
+
   Value *ElseV = Else->IRGen(C);
   if (!ElseV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
   ElseBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   C.getBuilder().SetInsertPoint(MergeBB);
   PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
-  
+
   PN->addIncoming(ThenV, ThenBB);
   PN->addIncoming(ElseV, ElseBB);
   return PN;
@@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   start = startexpr
   //   store start -> var
   //   goto loop
-  // loop: 
+  // loop:
   //   ...
   //   bodyexpr
   //   ...
@@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   store nextvar -> var
   //   br endcond, loop, endloop
   // outloop:
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
+
   // Emit the start code first, without 'variable' in scope.
   Value *StartVal = Start->IRGen(C);
   if (!StartVal) return nullptr;
-  
+
   // Store the value into the alloca.
   C.getBuilder().CreateStore(StartVal, Alloca);
-  
+
   // Make the new basic block for the loop header, inserting after current
   // block.
   BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
+
   // Insert an explicit fall through from the current block to the LoopBB.
   C.getBuilder().CreateBr(LoopBB);
 
   // Start insertion in LoopBB.
   C.getBuilder().SetInsertPoint(LoopBB);
-  
+
   // Within the loop, the variable is defined equal to the PHI node.  If it
   // shadows an existing variable, we have to restore it, so save it now.
   AllocaInst *OldVal = C.NamedValues[VarName];
   C.NamedValues[VarName] = Alloca;
-  
+
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
   if (!Body->IRGen(C))
     return nullptr;
-  
+
   // Emit the step value.
   Value *StepVal;
   if (Step) {
@@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
-  
+
   // Compute the end condition.
   Value *EndCond = End->IRGen(C);
-  if (EndCond == 0) return EndCond;
-  
+  if (!EndCond) return nullptr;
+
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
   Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str());
   Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar");
   C.getBuilder().CreateStore(NextVar, Alloca);
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = C.getBuilder().CreateFCmpONE(EndCond, 
+  EndCond = C.getBuilder().CreateFCmpONE(EndCond,
                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
-  
+
   // Create the "after loop" block and insert it.
   BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
+
   // Insert the conditional branch into the end of LoopEndBB.
   C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB);
-  
+
   // Any new code will be inserted in AfterBB.
   C.getBuilder().SetInsertPoint(AfterBB);
-  
+
   // Restore the unshadowed variable.
   if (OldVal)
     C.NamedValues[VarName] = OldVal;
   else
     C.NamedValues.erase(VarName);
 
-  
   // for expr always returns 0.0.
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Value *VarExprAST::IRGen(IRGenContext &C) const {
   std::vector<AllocaInst *> OldBindings;
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) {
     auto &VarName = VarBindings[i].first;
     auto &Init = VarBindings[i].second;
-    
+
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
     // like this:
@@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const {
       if (!InitVal) return nullptr;
     } else // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    
+
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
     C.getBuilder().CreateStore(InitVal, Alloca);
 
     // Remember the old variable binding so that we can restore the binding when
     // we unrecurse.
     OldBindings.push_back(C.NamedValues[VarName]);
-    
+
     // Remember this binding.
     C.NamedValues[VarName] = Alloca;
   }
-  
+
   // Codegen the body, now that all vars are in scope.
   Value *BodyVal = Body->IRGen(C);
   if (!BodyVal) return nullptr;
-  
+
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i)
     C.NamedValues[VarBindings[i].first] = OldBindings[i];
@@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
   std::string FnName = MakeLegalFunctionName(Name);
 
   // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(), 
+  std::vector<Type*> Doubles(Args.size(),
                              Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
@@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
     // Delete the one we just made and get the existing one.
     F->eraseFromParent();
     F = C.getM().getFunction(Name);
-    
+
     // If F already has a body, reject this.
     if (!F->empty()) {
       ErrorP<Function>("redefinition of function");
       return nullptr;
     }
-    
+
     // If F took a different number of args, reject.
     if (F->arg_size() != Args.size()) {
       ErrorP<Function>("redefinition of function with different # args");
       return nullptr;
     }
   }
-  
+
   // Set names for all arguments.
   unsigned Idx = 0;
   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
        ++AI, ++Idx)
     AI->setName(Args[Idx]);
-    
+
   return F;
 }
 
@@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
     AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
 
     // Store the initial value into the alloca.
-    C.getBuilder().CreateStore(AI, Alloca);
+    C.getBuilder().CreateStore(&*AI, Alloca);
 
     // Add arguments to variable symbol table.
     C.NamedValues[Args[Idx]] = Alloca;
@@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
 
 Function *FunctionAST::IRGen(IRGenContext &C) const {
   C.NamedValues.clear();
-  
+
   Function *TheFunction = Proto->IRGen(C);
   if (!TheFunction)
     return nullptr;
-  
+
   // If this is an operator, install it.
   if (Proto->isBinaryOp())
     BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence;
-  
+
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   C.getBuilder().SetInsertPoint(BB);
-  
+
   // Add all arguments to the symbol table and create their allocas.
   Proto->CreateArgumentAllocas(TheFunction, C);
 
@@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const {
 
     return TheFunction;
   }
-  
+
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
@@ -1160,7 +1158,7 @@ public:
   typedef CompileLayerT::ModuleSetHandleT ModuleHandleT;
 
   KaleidoscopeJIT(SessionContext &Session)
-      : DL(*Session.getTarget().getDataLayout()),
+      : DL(Session.getTarget().createDataLayout()),
         CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())) {}
 
   std::string mangle(const std::string &Name) {
@@ -1201,7 +1199,7 @@ public:
   }
 
 private:
-  const DataLayout &DL;
+  const DataLayout DL;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
 };
@@ -1242,7 +1240,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) {
 
       // Get the address of the JIT'd function in memory.
       auto ExprSymbol = J.findUnmangledSymbol("__anon_expr");
-      
+
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
@@ -1285,20 +1283,20 @@ static void MainLoop() {
 //===----------------------------------------------------------------------===//
 
 /// putchard - putchar that takes a double and returns 0.
-extern "C" 
+extern "C"
 double putchard(double X) {
   putchar((char)X);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
+extern "C"
 double printd(double X) {
   printf("%f", X);
   return 0;
 }
 
-extern "C" 
+extern "C"
 double printlf() {
   printf("\n");
   return 0;
@@ -1335,4 +1333,3 @@ int main() {
 
   return 0;
 }
-
diff --git a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
index 4b4c191171b4..5205b406ed71 100644
--- a/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_codegen/toy.cpp
@@ -38,14 +38,14 @@ enum Token {
 
   // primary
   tok_identifier = -4, tok_number = -5,
-  
+
   // control
   tok_if = -6, tok_then = -7, tok_else = -8,
   tok_for = -9, tok_in = -10,
-  
+
   // operators
   tok_binary = -11, tok_unary = -12,
-  
+
   // var definition
   tok_var = -13
 };
@@ -86,7 +86,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -94,11 +94,11 @@ static int gettok() {
     // Comment until end of line.
     do LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST {
 
 /// UnaryExprAST - Expression class for a unary operator.
 struct UnaryExprAST : public ExprAST {
-  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
     : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST {
 /// BinaryExprAST - Expression class for a binary operator.
 struct BinaryExprAST : public ExprAST {
   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
-                std::unique_ptr<ExprAST> RHS) 
+                std::unique_ptr<ExprAST> RHS)
     : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -223,7 +223,7 @@ struct PrototypeAST {
 
   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
-  
+
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
     return Name[Name.size()-1];
@@ -267,7 +267,7 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
   if (TokPrec <= 0) return -1;
@@ -293,12 +293,12 @@ static std::unique_ptr<ExprAST> ParseExpression();
 ///   ::= identifier '(' expression* ')'
 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
+
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '(') // Simple variable ref.
     return llvm::make_unique<VariableExprAST>(IdName);
-  
+
   // Call.
   getNextToken();  // eat (
   std::vector<std::unique_ptr<ExprAST>> Args;
@@ -318,7 +318,7 @@ static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
+
   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
@@ -335,7 +335,7 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
   auto V = ParseExpression();
   if (!V)
     return nullptr;
-  
+
   if (CurTok != ')')
     return ErrorU<ExprAST>("expected ')'");
   getNextToken();  // eat ).
@@ -345,29 +345,29 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
 static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken();  // eat the if.
-  
+
   // condition.
   auto Cond = ParseExpression();
   if (!Cond)
     return nullptr;
-  
+
   if (CurTok != tok_then)
     return ErrorU<ExprAST>("expected then");
   getNextToken();  // eat the then
-  
+
   auto Then = ParseExpression();
   if (!Then)
     return nullptr;
-  
+
   if (CurTok != tok_else)
     return ErrorU<ExprAST>("expected else");
-  
+
   getNextToken();
-  
+
   auto Else = ParseExpression();
   if (!Else)
     return nullptr;
-  
+
   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
                                       std::move(Else));
 }
@@ -378,26 +378,25 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
 
   if (CurTok != tok_identifier)
     return ErrorU<ForExprAST>("expected identifier after for");
-  
+
   std::string IdName = IdentifierStr;
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '=')
     return ErrorU<ForExprAST>("expected '=' after for");
   getNextToken();  // eat '='.
-  
-  
+
   auto Start = ParseExpression();
   if (!Start)
     return nullptr;
   if (CurTok != ',')
     return ErrorU<ForExprAST>("expected ',' after for start value");
   getNextToken();
-  
+
   auto End = ParseExpression();
   if (!End)
     return nullptr;
-  
+
   // The step value is optional.
   std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
@@ -406,11 +405,11 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
     if (!Step)
       return nullptr;
   }
-  
+
   if (CurTok != tok_in)
     return ErrorU<ForExprAST>("expected 'in' after for");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (Body)
     return nullptr;
@@ -419,7 +418,7 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
                                        std::move(Step), std::move(Body));
 }
 
-/// varexpr ::= 'var' identifier ('=' expression)? 
+/// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
 static std::unique_ptr<VarExprAST> ParseVarExpr() {
   getNextToken();  // eat the var.
@@ -429,7 +428,7 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
   // At least one variable name is required.
   if (CurTok != tok_identifier)
     return ErrorU<VarExprAST>("expected identifier after var");
-  
+
   while (1) {
     std::string Name = IdentifierStr;
     getNextToken();  // eat identifier.
@@ -438,31 +437,31 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
     std::unique_ptr<ExprAST> Init;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
-      
+
       Init = ParseExpression();
       if (!Init)
         return nullptr;
     }
-    
+
     VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init)));
-    
+
     // End of var list, exit loop.
     if (CurTok != ',') break;
     getNextToken(); // eat the ','.
-    
+
     if (CurTok != tok_identifier)
       return ErrorU<VarExprAST>("expected identifier list after var");
   }
-  
+
   // At this point, we have to have 'in'.
   if (CurTok != tok_in)
     return ErrorU<VarExprAST>("expected 'in' keyword after 'var'");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (!Body)
     return nullptr;
-  
+
   return llvm::make_unique<VarExprAST>(std::move(VarBindings), std::move(Body));
 }
 
@@ -492,7 +491,7 @@ static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
-  
+
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
@@ -508,21 +507,21 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
     getNextToken();  // eat binop
-    
+
     // Parse the unary expression after the binary operator.
     auto RHS = ParseUnary();
     if (!RHS)
       return nullptr;
-    
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
@@ -531,7 +530,7 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
       if (!RHS)
         return nullptr;
     }
-    
+
     // Merge LHS/RHS.
     LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
@@ -544,7 +543,7 @@ static std::unique_ptr<ExprAST> ParseExpression() {
   auto LHS = ParseUnary();
   if (!LHS)
     return nullptr;
-  
+
   return ParseBinOpRHS(0, std::move(LHS));
 }
 
@@ -554,10 +553,10 @@ static std::unique_ptr<ExprAST> ParseExpression() {
 ///   ::= unary LETTER (id)
 static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
-  
+
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
-  
+
   switch (CurTok) {
   default:
     return ErrorU<PrototypeAST>("Expected function name in prototype");
@@ -583,7 +582,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     FnName += (char)CurTok;
     Kind = 2;
     getNextToken();
-    
+
     // Read the precedence if present.
     if (CurTok == tok_number) {
       if (NumVal < 1 || NumVal > 100)
@@ -593,23 +592,23 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     }
     break;
   }
-  
+
   if (CurTok != '(')
     return ErrorU<PrototypeAST>("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorU<PrototypeAST>("Expected ')' in prototype");
-  
+
   // success.
   getNextToken();  // eat ')'.
-  
+
   // Verify right number of names for operator.
   if (Kind && ArgNames.size() != Kind)
     return ErrorU<PrototypeAST>("Invalid number of operands for operator");
-  
+
   return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
                                          BinaryPrecedence);
 }
@@ -690,10 +689,10 @@ public:
   PrototypeAST* getPrototypeAST(const std::string &Name);
 private:
   typedef std::map<std::string, std::unique_ptr<PrototypeAST>> PrototypeMap;
-  
+
   LLVMContext &Context;
   std::unique_ptr<TargetMachine> TM;
-      
+
   PrototypeMap Prototypes;
 };
 
@@ -716,7 +715,7 @@ public:
       M(new Module(GenerateUniqueName("jit_module_"),
                    Session.getLLVMContext())),
       Builder(Session.getLLVMContext()) {
-    M->setDataLayout(*Session.getTarget().getDataLayout());
+    M->setDataLayout(Session.getTarget().createDataLayout());
   }
 
   SessionContext& getSession() { return Session; }
@@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                  TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
@@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const {
   // Look this variable up in the function.
   Value *V = C.NamedValues[Name];
 
-  if (V == 0)
+  if (!V)
     return ErrorP<Value>("Unknown variable name '" + Name + "'");
 
   // Load the value.
@@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
     // Assignment requires the LHS to be an identifier.
-    auto LHSVar = static_cast<VariableExprAST&>(*LHS);
+    auto &LHSVar = static_cast<VariableExprAST &>(*LHS);
     // Codegen the RHS.
     Value *Val = RHS->IRGen(C);
     if (!Val) return nullptr;
@@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     }
     return ErrorP<Value>("Unknown variable name");
   }
-  
+
   Value *L = LHS->IRGen(C);
   Value *R = RHS->IRGen(C);
   if (!L || !R) return nullptr;
-  
+
   switch (Op) {
   case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp");
   case '-': return C.getBuilder().CreateFSub(L, R, "subtmp");
@@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
                                 "booltmp");
   default: break;
   }
-  
+
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
   std::string FnName = MakeLegalFunctionName(std::string("binary")+Op);
@@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     Value *Ops[] = { L, R };
     return C.getBuilder().CreateCall(F, Ops, "binop");
   }
-   
+
   return ErrorP<Value>("Unknown binary operator");
 }
 
@@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
       ArgsV.push_back(Args[i]->IRGen(C));
       if (!ArgsV.back()) return nullptr;
     }
-    
+
     return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp");
   }
 
@@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
 Value *IfExprAST::IRGen(IRGenContext &C) const {
   Value *CondV = Cond->IRGen(C);
   if (!CondV) return nullptr;
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  ConstantFP *FPZero = 
+  ConstantFP *FPZero =
     ConstantFP::get(C.getLLVMContext(), APFloat(0.0));
   CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond");
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
-  
+
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
   BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction);
   BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else");
   BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont");
-  
+
   C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB);
-  
+
   // Emit then value.
   C.getBuilder().SetInsertPoint(ThenBB);
-  
+
   Value *ThenV = Then->IRGen(C);
   if (!ThenV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
   ThenBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit else block.
   TheFunction->getBasicBlockList().push_back(ElseBB);
   C.getBuilder().SetInsertPoint(ElseBB);
-  
+
   Value *ElseV = Else->IRGen(C);
   if (!ElseV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
   ElseBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   C.getBuilder().SetInsertPoint(MergeBB);
   PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
-  
+
   PN->addIncoming(ThenV, ThenBB);
   PN->addIncoming(ElseV, ElseBB);
   return PN;
@@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   start = startexpr
   //   store start -> var
   //   goto loop
-  // loop: 
+  // loop:
   //   ...
   //   bodyexpr
   //   ...
@@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   store nextvar -> var
   //   br endcond, loop, endloop
   // outloop:
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
+
   // Emit the start code first, without 'variable' in scope.
   Value *StartVal = Start->IRGen(C);
   if (!StartVal) return nullptr;
-  
+
   // Store the value into the alloca.
   C.getBuilder().CreateStore(StartVal, Alloca);
-  
+
   // Make the new basic block for the loop header, inserting after current
   // block.
   BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
+
   // Insert an explicit fall through from the current block to the LoopBB.
   C.getBuilder().CreateBr(LoopBB);
 
   // Start insertion in LoopBB.
   C.getBuilder().SetInsertPoint(LoopBB);
-  
+
   // Within the loop, the variable is defined equal to the PHI node.  If it
   // shadows an existing variable, we have to restore it, so save it now.
   AllocaInst *OldVal = C.NamedValues[VarName];
   C.NamedValues[VarName] = Alloca;
-  
+
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
   if (!Body->IRGen(C))
     return nullptr;
-  
+
   // Emit the step value.
   Value *StepVal;
   if (Step) {
@@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
-  
+
   // Compute the end condition.
   Value *EndCond = End->IRGen(C);
-  if (EndCond == 0) return EndCond;
-  
+  if (!EndCond) return nullptr;
+
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
   Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str());
   Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar");
   C.getBuilder().CreateStore(NextVar, Alloca);
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = C.getBuilder().CreateFCmpONE(EndCond, 
+  EndCond = C.getBuilder().CreateFCmpONE(EndCond,
                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
-  
+
   // Create the "after loop" block and insert it.
   BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
+
   // Insert the conditional branch into the end of LoopEndBB.
   C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB);
-  
+
   // Any new code will be inserted in AfterBB.
   C.getBuilder().SetInsertPoint(AfterBB);
-  
+
   // Restore the unshadowed variable.
   if (OldVal)
     C.NamedValues[VarName] = OldVal;
   else
     C.NamedValues.erase(VarName);
 
-  
   // for expr always returns 0.0.
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Value *VarExprAST::IRGen(IRGenContext &C) const {
   std::vector<AllocaInst *> OldBindings;
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) {
     auto &VarName = VarBindings[i].first;
     auto &Init = VarBindings[i].second;
-    
+
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
     // like this:
@@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const {
       if (!InitVal) return nullptr;
     } else // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    
+
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
     C.getBuilder().CreateStore(InitVal, Alloca);
 
     // Remember the old variable binding so that we can restore the binding when
     // we unrecurse.
     OldBindings.push_back(C.NamedValues[VarName]);
-    
+
     // Remember this binding.
     C.NamedValues[VarName] = Alloca;
   }
-  
+
   // Codegen the body, now that all vars are in scope.
   Value *BodyVal = Body->IRGen(C);
   if (!BodyVal) return nullptr;
-  
+
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i)
     C.NamedValues[VarBindings[i].first] = OldBindings[i];
@@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
   std::string FnName = MakeLegalFunctionName(Name);
 
   // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(), 
+  std::vector<Type*> Doubles(Args.size(),
                              Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
@@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
     // Delete the one we just made and get the existing one.
     F->eraseFromParent();
     F = C.getM().getFunction(Name);
-    
+
     // If F already has a body, reject this.
     if (!F->empty()) {
       ErrorP<Function>("redefinition of function");
       return nullptr;
     }
-    
+
     // If F took a different number of args, reject.
     if (F->arg_size() != Args.size()) {
       ErrorP<Function>("redefinition of function with different # args");
       return nullptr;
     }
   }
-  
+
   // Set names for all arguments.
   unsigned Idx = 0;
   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
        ++AI, ++Idx)
     AI->setName(Args[Idx]);
-    
+
   return F;
 }
 
@@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
     AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
 
     // Store the initial value into the alloca.
-    C.getBuilder().CreateStore(AI, Alloca);
+    C.getBuilder().CreateStore(&*AI, Alloca);
 
     // Add arguments to variable symbol table.
     C.NamedValues[Args[Idx]] = Alloca;
@@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
 
 Function *FunctionAST::IRGen(IRGenContext &C) const {
   C.NamedValues.clear();
-  
+
   Function *TheFunction = Proto->IRGen(C);
   if (!TheFunction)
     return nullptr;
-  
+
   // If this is an operator, install it.
   if (Proto->isBinaryOp())
     BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence;
-  
+
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   C.getBuilder().SetInsertPoint(BB);
-  
+
   // Add all arguments to the symbol table and create their allocas.
   Proto->CreateArgumentAllocas(TheFunction, C);
 
@@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const {
 
     return TheFunction;
   }
-  
+
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
@@ -1162,7 +1160,7 @@ public:
   typedef LazyEmitLayerT::ModuleSetHandleT ModuleHandleT;
 
   KaleidoscopeJIT(SessionContext &Session)
-      : DL(*Session.getTarget().getDataLayout()),
+      : DL(Session.getTarget().createDataLayout()),
         CompileLayer(ObjectLayer, SimpleCompiler(Session.getTarget())),
         LazyEmitLayer(CompileLayer) {}
 
@@ -1204,7 +1202,7 @@ public:
   }
 
 private:
-  const DataLayout &DL;
+  const DataLayout DL;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
   LazyEmitLayerT LazyEmitLayer;
@@ -1246,7 +1244,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) {
 
       // Get the address of the JIT'd function in memory.
       auto ExprSymbol = J.findUnmangledSymbol("__anon_expr");
-      
+
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
@@ -1289,20 +1287,20 @@ static void MainLoop() {
 //===----------------------------------------------------------------------===//
 
 /// putchard - putchar that takes a double and returns 0.
-extern "C" 
+extern "C"
 double putchard(double X) {
   putchar((char)X);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
+extern "C"
 double printd(double X) {
   printf("%f", X);
   return 0;
 }
 
-extern "C" 
+extern "C"
 double printlf() {
   printf("\n");
   return 0;
@@ -1339,4 +1337,3 @@ int main() {
 
   return 0;
 }
-
diff --git a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
index ca34de7e2244..ebaff49e89b2 100644
--- a/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
+++ b/examples/Kaleidoscope/Orc/lazy_irgen/toy.cpp
@@ -38,14 +38,14 @@ enum Token {
 
   // primary
   tok_identifier = -4, tok_number = -5,
-  
+
   // control
   tok_if = -6, tok_then = -7, tok_else = -8,
   tok_for = -9, tok_in = -10,
-  
+
   // operators
   tok_binary = -11, tok_unary = -12,
-  
+
   // var definition
   tok_var = -13
 };
@@ -86,7 +86,7 @@ static int gettok() {
       LastChar = getchar();
     } while (isdigit(LastChar) || LastChar == '.');
 
-    NumVal = strtod(NumStr.c_str(), 0);
+    NumVal = strtod(NumStr.c_str(), nullptr);
     return tok_number;
   }
 
@@ -94,11 +94,11 @@ static int gettok() {
     // Comment until end of line.
     do LastChar = getchar();
     while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
-    
+
     if (LastChar != EOF)
       return gettok();
   }
-  
+
   // Check for end of file.  Don't eat the EOF.
   if (LastChar == EOF)
     return tok_eof;
@@ -139,7 +139,7 @@ struct VariableExprAST : public ExprAST {
 
 /// UnaryExprAST - Expression class for a unary operator.
 struct UnaryExprAST : public ExprAST {
-  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand) 
+  UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
     : Opcode(std::move(Opcode)), Operand(std::move(Operand)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -151,7 +151,7 @@ struct UnaryExprAST : public ExprAST {
 /// BinaryExprAST - Expression class for a binary operator.
 struct BinaryExprAST : public ExprAST {
   BinaryExprAST(char Op, std::unique_ptr<ExprAST> LHS,
-                std::unique_ptr<ExprAST> RHS) 
+                std::unique_ptr<ExprAST> RHS)
     : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {}
 
   Value *IRGen(IRGenContext &C) const override;
@@ -223,7 +223,7 @@ struct PrototypeAST {
 
   bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
   bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
-  
+
   char getOperatorName() const {
     assert(isUnaryOp() || isBinaryOp());
     return Name[Name.size()-1];
@@ -267,7 +267,7 @@ static std::map<char, int> BinopPrecedence;
 static int GetTokPrecedence() {
   if (!isascii(CurTok))
     return -1;
-  
+
   // Make sure it's a declared binop.
   int TokPrec = BinopPrecedence[CurTok];
   if (TokPrec <= 0) return -1;
@@ -293,12 +293,12 @@ static std::unique_ptr<ExprAST> ParseExpression();
 ///   ::= identifier '(' expression* ')'
 static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
   std::string IdName = IdentifierStr;
-  
+
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '(') // Simple variable ref.
     return llvm::make_unique<VariableExprAST>(IdName);
-  
+
   // Call.
   getNextToken();  // eat (
   std::vector<std::unique_ptr<ExprAST>> Args;
@@ -318,7 +318,7 @@ static std::unique_ptr<ExprAST> ParseIdentifierExpr() {
 
   // Eat the ')'.
   getNextToken();
-  
+
   return llvm::make_unique<CallExprAST>(IdName, std::move(Args));
 }
 
@@ -335,7 +335,7 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
   auto V = ParseExpression();
   if (!V)
     return nullptr;
-  
+
   if (CurTok != ')')
     return ErrorU<ExprAST>("expected ')'");
   getNextToken();  // eat ).
@@ -345,29 +345,29 @@ static std::unique_ptr<ExprAST> ParseParenExpr() {
 /// ifexpr ::= 'if' expression 'then' expression 'else' expression
 static std::unique_ptr<ExprAST> ParseIfExpr() {
   getNextToken();  // eat the if.
-  
+
   // condition.
   auto Cond = ParseExpression();
   if (!Cond)
     return nullptr;
-  
+
   if (CurTok != tok_then)
     return ErrorU<ExprAST>("expected then");
   getNextToken();  // eat the then
-  
+
   auto Then = ParseExpression();
   if (!Then)
     return nullptr;
-  
+
   if (CurTok != tok_else)
     return ErrorU<ExprAST>("expected else");
-  
+
   getNextToken();
-  
+
   auto Else = ParseExpression();
   if (!Else)
     return nullptr;
-  
+
   return llvm::make_unique<IfExprAST>(std::move(Cond), std::move(Then),
                                       std::move(Else));
 }
@@ -378,26 +378,25 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
 
   if (CurTok != tok_identifier)
     return ErrorU<ForExprAST>("expected identifier after for");
-  
+
   std::string IdName = IdentifierStr;
   getNextToken();  // eat identifier.
-  
+
   if (CurTok != '=')
     return ErrorU<ForExprAST>("expected '=' after for");
   getNextToken();  // eat '='.
-  
-  
+
   auto Start = ParseExpression();
   if (!Start)
     return nullptr;
   if (CurTok != ',')
     return ErrorU<ForExprAST>("expected ',' after for start value");
   getNextToken();
-  
+
   auto End = ParseExpression();
   if (!End)
     return nullptr;
-  
+
   // The step value is optional.
   std::unique_ptr<ExprAST> Step;
   if (CurTok == ',') {
@@ -406,11 +405,11 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
     if (!Step)
       return nullptr;
   }
-  
+
   if (CurTok != tok_in)
     return ErrorU<ForExprAST>("expected 'in' after for");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (Body)
     return nullptr;
@@ -419,7 +418,7 @@ static std::unique_ptr<ForExprAST> ParseForExpr() {
                                        std::move(Step), std::move(Body));
 }
 
-/// varexpr ::= 'var' identifier ('=' expression)? 
+/// varexpr ::= 'var' identifier ('=' expression)?
 //                    (',' identifier ('=' expression)?)* 'in' expression
 static std::unique_ptr<VarExprAST> ParseVarExpr() {
   getNextToken();  // eat the var.
@@ -429,7 +428,7 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
   // At least one variable name is required.
   if (CurTok != tok_identifier)
     return ErrorU<VarExprAST>("expected identifier after var");
-  
+
   while (1) {
     std::string Name = IdentifierStr;
     getNextToken();  // eat identifier.
@@ -438,31 +437,31 @@ static std::unique_ptr<VarExprAST> ParseVarExpr() {
     std::unique_ptr<ExprAST> Init;
     if (CurTok == '=') {
       getNextToken(); // eat the '='.
-      
+
       Init = ParseExpression();
       if (!Init)
         return nullptr;
     }
-    
+
     VarBindings.push_back(VarExprAST::Binding(Name, std::move(Init)));
-    
+
     // End of var list, exit loop.
     if (CurTok != ',') break;
     getNextToken(); // eat the ','.
-    
+
     if (CurTok != tok_identifier)
       return ErrorU<VarExprAST>("expected identifier list after var");
   }
-  
+
   // At this point, we have to have 'in'.
   if (CurTok != tok_in)
     return ErrorU<VarExprAST>("expected 'in' keyword after 'var'");
   getNextToken();  // eat 'in'.
-  
+
   auto Body = ParseExpression();
   if (!Body)
     return nullptr;
-  
+
   return llvm::make_unique<VarExprAST>(std::move(VarBindings), std::move(Body));
 }
 
@@ -492,7 +491,7 @@ static std::unique_ptr<ExprAST> ParseUnary() {
   // If the current token is not an operator, it must be a primary expr.
   if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
     return ParsePrimary();
-  
+
   // If this is a unary operator, read it.
   int Opc = CurTok;
   getNextToken();
@@ -508,21 +507,21 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
   // If this is a binop, find its precedence.
   while (1) {
     int TokPrec = GetTokPrecedence();
-    
+
     // If this is a binop that binds at least as tightly as the current binop,
     // consume it, otherwise we are done.
     if (TokPrec < ExprPrec)
       return LHS;
-    
+
     // Okay, we know this is a binop.
     int BinOp = CurTok;
     getNextToken();  // eat binop
-    
+
     // Parse the unary expression after the binary operator.
     auto RHS = ParseUnary();
     if (!RHS)
       return nullptr;
-    
+
     // If BinOp binds less tightly with RHS than the operator after RHS, let
     // the pending operator take RHS as its LHS.
     int NextPrec = GetTokPrecedence();
@@ -531,7 +530,7 @@ static std::unique_ptr<ExprAST> ParseBinOpRHS(int ExprPrec,
       if (!RHS)
         return nullptr;
     }
-    
+
     // Merge LHS/RHS.
     LHS = llvm::make_unique<BinaryExprAST>(BinOp, std::move(LHS), std::move(RHS));
   }
@@ -544,7 +543,7 @@ static std::unique_ptr<ExprAST> ParseExpression() {
   auto LHS = ParseUnary();
   if (!LHS)
     return nullptr;
-  
+
   return ParseBinOpRHS(0, std::move(LHS));
 }
 
@@ -554,10 +553,10 @@ static std::unique_ptr<ExprAST> ParseExpression() {
 ///   ::= unary LETTER (id)
 static std::unique_ptr<PrototypeAST> ParsePrototype() {
   std::string FnName;
-  
+
   unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
   unsigned BinaryPrecedence = 30;
-  
+
   switch (CurTok) {
   default:
     return ErrorU<PrototypeAST>("Expected function name in prototype");
@@ -583,7 +582,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     FnName += (char)CurTok;
     Kind = 2;
     getNextToken();
-    
+
     // Read the precedence if present.
     if (CurTok == tok_number) {
       if (NumVal < 1 || NumVal > 100)
@@ -593,23 +592,23 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
     }
     break;
   }
-  
+
   if (CurTok != '(')
     return ErrorU<PrototypeAST>("Expected '(' in prototype");
-  
+
   std::vector<std::string> ArgNames;
   while (getNextToken() == tok_identifier)
     ArgNames.push_back(IdentifierStr);
   if (CurTok != ')')
     return ErrorU<PrototypeAST>("Expected ')' in prototype");
-  
+
   // success.
   getNextToken();  // eat ')'.
-  
+
   // Verify right number of names for operator.
   if (Kind && ArgNames.size() != Kind)
     return ErrorU<PrototypeAST>("Invalid number of operands for operator");
-  
+
   return llvm::make_unique<PrototypeAST>(FnName, std::move(ArgNames), Kind != 0,
                                          BinaryPrecedence);
 }
@@ -690,10 +689,10 @@ public:
   PrototypeAST* getPrototypeAST(const std::string &Name);
 private:
   typedef std::map<std::string, std::unique_ptr<PrototypeAST>> PrototypeMap;
-  
+
   LLVMContext &Context;
   std::unique_ptr<TargetMachine> TM;
-      
+
   PrototypeMap Prototypes;
 };
 
@@ -716,7 +715,7 @@ public:
       M(new Module(GenerateUniqueName("jit_module_"),
                    Session.getLLVMContext())),
       Builder(Session.getLLVMContext()) {
-    M->setDataLayout(*Session.getTarget().getDataLayout());
+    M->setDataLayout(Session.getTarget().createDataLayout());
   }
 
   SessionContext& getSession() { return Session; }
@@ -747,7 +746,7 @@ static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
                                           const std::string &VarName) {
   IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
                  TheFunction->getEntryBlock().begin());
-  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), nullptr,
                            VarName.c_str());
 }
 
@@ -759,7 +758,7 @@ Value *VariableExprAST::IRGen(IRGenContext &C) const {
   // Look this variable up in the function.
   Value *V = C.NamedValues[Name];
 
-  if (V == 0)
+  if (!V)
     return ErrorP<Value>("Unknown variable name '" + Name + "'");
 
   // Load the value.
@@ -782,7 +781,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
   // Special case '=' because we don't want to emit the LHS as an expression.
   if (Op == '=') {
     // Assignment requires the LHS to be an identifier.
-    auto LHSVar = static_cast<VariableExprAST&>(*LHS);
+    auto &LHSVar = static_cast<VariableExprAST &>(*LHS);
     // Codegen the RHS.
     Value *Val = RHS->IRGen(C);
     if (!Val) return nullptr;
@@ -794,11 +793,11 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     }
     return ErrorP<Value>("Unknown variable name");
   }
-  
+
   Value *L = LHS->IRGen(C);
   Value *R = RHS->IRGen(C);
   if (!L || !R) return nullptr;
-  
+
   switch (Op) {
   case '+': return C.getBuilder().CreateFAdd(L, R, "addtmp");
   case '-': return C.getBuilder().CreateFSub(L, R, "subtmp");
@@ -811,7 +810,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
                                 "booltmp");
   default: break;
   }
-  
+
   // If it wasn't a builtin binary operator, it must be a user defined one. Emit
   // a call to it.
   std::string FnName = MakeLegalFunctionName(std::string("binary")+Op);
@@ -819,7 +818,7 @@ Value *BinaryExprAST::IRGen(IRGenContext &C) const {
     Value *Ops[] = { L, R };
     return C.getBuilder().CreateCall(F, Ops, "binop");
   }
-   
+
   return ErrorP<Value>("Unknown binary operator");
 }
 
@@ -835,7 +834,7 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
       ArgsV.push_back(Args[i]->IRGen(C));
       if (!ArgsV.back()) return nullptr;
     }
-    
+
     return C.getBuilder().CreateCall(CalleeF, ArgsV, "calltmp");
   }
 
@@ -845,49 +844,49 @@ Value *CallExprAST::IRGen(IRGenContext &C) const {
 Value *IfExprAST::IRGen(IRGenContext &C) const {
   Value *CondV = Cond->IRGen(C);
   if (!CondV) return nullptr;
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  ConstantFP *FPZero = 
+  ConstantFP *FPZero =
     ConstantFP::get(C.getLLVMContext(), APFloat(0.0));
   CondV = C.getBuilder().CreateFCmpONE(CondV, FPZero, "ifcond");
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
-  
+
   // Create blocks for the then and else cases.  Insert the 'then' block at the
   // end of the function.
   BasicBlock *ThenBB = BasicBlock::Create(C.getLLVMContext(), "then", TheFunction);
   BasicBlock *ElseBB = BasicBlock::Create(C.getLLVMContext(), "else");
   BasicBlock *MergeBB = BasicBlock::Create(C.getLLVMContext(), "ifcont");
-  
+
   C.getBuilder().CreateCondBr(CondV, ThenBB, ElseBB);
-  
+
   // Emit then value.
   C.getBuilder().SetInsertPoint(ThenBB);
-  
+
   Value *ThenV = Then->IRGen(C);
   if (!ThenV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
   ThenBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit else block.
   TheFunction->getBasicBlockList().push_back(ElseBB);
   C.getBuilder().SetInsertPoint(ElseBB);
-  
+
   Value *ElseV = Else->IRGen(C);
   if (!ElseV) return nullptr;
-  
+
   C.getBuilder().CreateBr(MergeBB);
   // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
   ElseBB = C.getBuilder().GetInsertBlock();
-  
+
   // Emit merge block.
   TheFunction->getBasicBlockList().push_back(MergeBB);
   C.getBuilder().SetInsertPoint(MergeBB);
   PHINode *PN = C.getBuilder().CreatePHI(Type::getDoubleTy(getGlobalContext()), 2,
                                   "iftmp");
-  
+
   PN->addIncoming(ThenV, ThenBB);
   PN->addIncoming(ElseV, ElseBB);
   return PN;
@@ -900,7 +899,7 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   start = startexpr
   //   store start -> var
   //   goto loop
-  // loop: 
+  // loop:
   //   ...
   //   bodyexpr
   //   ...
@@ -913,40 +912,40 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
   //   store nextvar -> var
   //   br endcond, loop, endloop
   // outloop:
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Create an alloca for the variable in the entry block.
   AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
-  
+
   // Emit the start code first, without 'variable' in scope.
   Value *StartVal = Start->IRGen(C);
   if (!StartVal) return nullptr;
-  
+
   // Store the value into the alloca.
   C.getBuilder().CreateStore(StartVal, Alloca);
-  
+
   // Make the new basic block for the loop header, inserting after current
   // block.
   BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
-  
+
   // Insert an explicit fall through from the current block to the LoopBB.
   C.getBuilder().CreateBr(LoopBB);
 
   // Start insertion in LoopBB.
   C.getBuilder().SetInsertPoint(LoopBB);
-  
+
   // Within the loop, the variable is defined equal to the PHI node.  If it
   // shadows an existing variable, we have to restore it, so save it now.
   AllocaInst *OldVal = C.NamedValues[VarName];
   C.NamedValues[VarName] = Alloca;
-  
+
   // Emit the body of the loop.  This, like any other expr, can change the
   // current BB.  Note that we ignore the value computed by the body, but don't
   // allow an error.
   if (!Body->IRGen(C))
     return nullptr;
-  
+
   // Emit the step value.
   Value *StepVal;
   if (Step) {
@@ -956,52 +955,51 @@ Value *ForExprAST::IRGen(IRGenContext &C) const {
     // If not specified, use 1.0.
     StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
   }
-  
+
   // Compute the end condition.
   Value *EndCond = End->IRGen(C);
-  if (EndCond == 0) return EndCond;
-  
+  if (!EndCond) return nullptr;
+
   // Reload, increment, and restore the alloca.  This handles the case where
   // the body of the loop mutates the variable.
   Value *CurVar = C.getBuilder().CreateLoad(Alloca, VarName.c_str());
   Value *NextVar = C.getBuilder().CreateFAdd(CurVar, StepVal, "nextvar");
   C.getBuilder().CreateStore(NextVar, Alloca);
-  
+
   // Convert condition to a bool by comparing equal to 0.0.
-  EndCond = C.getBuilder().CreateFCmpONE(EndCond, 
+  EndCond = C.getBuilder().CreateFCmpONE(EndCond,
                               ConstantFP::get(getGlobalContext(), APFloat(0.0)),
                                   "loopcond");
-  
+
   // Create the "after loop" block and insert it.
   BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
-  
+
   // Insert the conditional branch into the end of LoopEndBB.
   C.getBuilder().CreateCondBr(EndCond, LoopBB, AfterBB);
-  
+
   // Any new code will be inserted in AfterBB.
   C.getBuilder().SetInsertPoint(AfterBB);
-  
+
   // Restore the unshadowed variable.
   if (OldVal)
     C.NamedValues[VarName] = OldVal;
   else
     C.NamedValues.erase(VarName);
 
-  
   // for expr always returns 0.0.
   return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
 }
 
 Value *VarExprAST::IRGen(IRGenContext &C) const {
   std::vector<AllocaInst *> OldBindings;
-  
+
   Function *TheFunction = C.getBuilder().GetInsertBlock()->getParent();
 
   // Register all variables and emit their initializer.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i) {
     auto &VarName = VarBindings[i].first;
     auto &Init = VarBindings[i].second;
-    
+
     // Emit the initializer before adding the variable to scope, this prevents
     // the initializer from referencing the variable itself, and permits stuff
     // like this:
@@ -1013,22 +1011,22 @@ Value *VarExprAST::IRGen(IRGenContext &C) const {
       if (!InitVal) return nullptr;
     } else // If not specified, use 0.0.
       InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
-    
+
     AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
     C.getBuilder().CreateStore(InitVal, Alloca);
 
     // Remember the old variable binding so that we can restore the binding when
     // we unrecurse.
     OldBindings.push_back(C.NamedValues[VarName]);
-    
+
     // Remember this binding.
     C.NamedValues[VarName] = Alloca;
   }
-  
+
   // Codegen the body, now that all vars are in scope.
   Value *BodyVal = Body->IRGen(C);
   if (!BodyVal) return nullptr;
-  
+
   // Pop all our variables from scope.
   for (unsigned i = 0, e = VarBindings.size(); i != e; ++i)
     C.NamedValues[VarBindings[i].first] = OldBindings[i];
@@ -1041,7 +1039,7 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
   std::string FnName = MakeLegalFunctionName(Name);
 
   // Make the function type:  double(double,double) etc.
-  std::vector<Type*> Doubles(Args.size(), 
+  std::vector<Type*> Doubles(Args.size(),
                              Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
@@ -1054,26 +1052,26 @@ Function *PrototypeAST::IRGen(IRGenContext &C) const {
     // Delete the one we just made and get the existing one.
     F->eraseFromParent();
     F = C.getM().getFunction(Name);
-    
+
     // If F already has a body, reject this.
     if (!F->empty()) {
       ErrorP<Function>("redefinition of function");
       return nullptr;
     }
-    
+
     // If F took a different number of args, reject.
     if (F->arg_size() != Args.size()) {
       ErrorP<Function>("redefinition of function with different # args");
       return nullptr;
     }
   }
-  
+
   // Set names for all arguments.
   unsigned Idx = 0;
   for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
        ++AI, ++Idx)
     AI->setName(Args[Idx]);
-    
+
   return F;
 }
 
@@ -1086,7 +1084,7 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
     AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
 
     // Store the initial value into the alloca.
-    C.getBuilder().CreateStore(AI, Alloca);
+    C.getBuilder().CreateStore(&*AI, Alloca);
 
     // Add arguments to variable symbol table.
     C.NamedValues[Args[Idx]] = Alloca;
@@ -1095,19 +1093,19 @@ void PrototypeAST::CreateArgumentAllocas(Function *F, IRGenContext &C) {
 
 Function *FunctionAST::IRGen(IRGenContext &C) const {
   C.NamedValues.clear();
-  
+
   Function *TheFunction = Proto->IRGen(C);
   if (!TheFunction)
     return nullptr;
-  
+
   // If this is an operator, install it.
   if (Proto->isBinaryOp())
     BinopPrecedence[Proto->getOperatorName()] = Proto->Precedence;
-  
+
   // Create a new basic block to start insertion into.
   BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
   C.getBuilder().SetInsertPoint(BB);
-  
+
   // Add all arguments to the symbol table and create their allocas.
   Proto->CreateArgumentAllocas(TheFunction, C);
 
@@ -1120,7 +1118,7 @@ Function *FunctionAST::IRGen(IRGenContext &C) const {
 
     return TheFunction;
   }
-  
+
   // Error reading body, remove function.
   TheFunction->eraseFromParent();
 
@@ -1170,7 +1168,7 @@ public:
     {
       raw_string_ostream MangledNameStream(MangledName);
       Mangler::getNameWithPrefix(MangledNameStream, Name,
-                                 *Session.getTarget().getDataLayout());
+                                 Session.getTarget().createDataLayout());
     }
     return MangledName;
   }
@@ -1223,7 +1221,7 @@ private:
   RuntimeDyld::SymbolInfo searchFunctionASTs(const std::string &Name) {
     auto DefI = FunctionDefs.find(Name);
     if (DefI == FunctionDefs.end())
-      return 0;
+      return nullptr;
 
     // Take the FunctionAST out of the map.
     auto FnAST = std::move(DefI->second);
@@ -1277,7 +1275,7 @@ static void HandleTopLevelExpression(SessionContext &S, KaleidoscopeJIT &J) {
 
       // Get the address of the JIT'd function in memory.
       auto ExprSymbol = J.findUnmangledSymbol("__anon_expr");
-      
+
       // Cast it to the right type (takes no arguments, returns a double) so we
       // can call it as a native function.
       double (*FP)() = (double (*)())(intptr_t)ExprSymbol.getAddress();
@@ -1320,20 +1318,20 @@ static void MainLoop() {
 //===----------------------------------------------------------------------===//
 
 /// putchard - putchar that takes a double and returns 0.
-extern "C" 
+extern "C"
 double putchard(double X) {
   putchar((char)X);
   return 0;
 }
 
 /// printd - printf that takes a double prints it as "%f\n", returning 0.
-extern "C" 
+extern "C"
 double printd(double X) {
   printf("%f", X);
   return 0;
 }
 
-extern "C" 
+extern "C"
 double printlf() {
   printf("\n");
   return 0;
@@ -1370,4 +1368,3 @@ int main() {
 
   return 0;
 }
-
diff --git a/examples/Kaleidoscope/include/KaleidoscopeJIT.h b/examples/Kaleidoscope/include/KaleidoscopeJIT.h
new file mode 100644
index 000000000000..0c825cc94c0e
--- /dev/null
+++ b/examples/Kaleidoscope/include/KaleidoscopeJIT.h
@@ -0,0 +1,114 @@
+//===----- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains a simple JIT definition for use in the kaleidoscope tutorials.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+namespace llvm {
+namespace orc {
+
+class KaleidoscopeJIT {
+public:
+  typedef ObjectLinkingLayer<> ObjLayerT;
+  typedef IRCompileLayer<ObjLayerT> CompileLayerT;
+  typedef CompileLayerT::ModuleSetHandleT ModuleHandleT;
+
+  KaleidoscopeJIT()
+      : TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
+        CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
+    llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
+  }
+
+  TargetMachine &getTargetMachine() { return *TM; }
+
+  ModuleHandleT addModule(std::unique_ptr<Module> M) {
+    // We need a memory manager to allocate memory and resolve symbols for this
+    // new module. Create one that resolves symbols by looking back into the
+    // JIT.
+    auto Resolver = createLambdaResolver(
+        [&](const std::string &Name) {
+          if (auto Sym = findMangledSymbol(Name))
+            return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+          return RuntimeDyld::SymbolInfo(nullptr);
+        },
+        [](const std::string &S) { return nullptr; });
+    auto H = CompileLayer.addModuleSet(singletonSet(std::move(M)),
+                                       make_unique<SectionMemoryManager>(),
+                                       std::move(Resolver));
+
+    ModuleHandles.push_back(H);
+    return H;
+  }
+
+  void removeModule(ModuleHandleT H) {
+    ModuleHandles.erase(
+        std::find(ModuleHandles.begin(), ModuleHandles.end(), H));
+    CompileLayer.removeModuleSet(H);
+  }
+
+  JITSymbol findSymbol(const std::string Name) {
+    return findMangledSymbol(mangle(Name));
+  }
+
+private:
+
+  std::string mangle(const std::string &Name) {
+    std::string MangledName;
+    {
+      raw_string_ostream MangledNameStream(MangledName);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
+    }
+    return MangledName;
+  }
+
+  template <typename T> static std::vector<T> singletonSet(T t) {
+    std::vector<T> Vec;
+    Vec.push_back(std::move(t));
+    return Vec;
+  }
+
+  JITSymbol findMangledSymbol(const std::string &Name) {
+    // Search modules in reverse order: from last added to first added.
+    // This is the opposite of the usual search order for dlsym, but makes more
+    // sense in a REPL where we want to bind to the newest available definition.
+    for (auto H : make_range(ModuleHandles.rbegin(), ModuleHandles.rend()))
+      if (auto Sym = CompileLayer.findSymbolIn(H, Name, true))
+        return Sym;
+
+    // If we can't find the symbol in the JIT, try looking in the host process.
+    if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name))
+      return JITSymbol(SymAddr, JITSymbolFlags::Exported);
+
+    return nullptr;
+  }
+
+  std::unique_ptr<TargetMachine> TM;
+  const DataLayout DL;
+  ObjLayerT ObjectLayer;
+  CompileLayerT CompileLayer;
+  std::vector<ModuleHandleT> ModuleHandles;
+};
+
+} // End namespace orc.
+} // End namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H
diff --git a/examples/ParallelJIT/ParallelJIT.cpp b/examples/ParallelJIT/ParallelJIT.cpp
index b2c53a9bb10e..3c485d4c964d 100644
--- a/examples/ParallelJIT/ParallelJIT.cpp
+++ b/examples/ParallelJIT/ParallelJIT.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include <iostream>
 #include <pthread.h>
+
 using namespace llvm;
 
 static Function* createAdd1(Module *M) {
@@ -38,7 +39,7 @@ static Function* createAdd1(Module *M) {
     cast<Function>(M->getOrInsertFunction("add1",
                                           Type::getInt32Ty(M->getContext()),
                                           Type::getInt32Ty(M->getContext()),
-                                          (Type *)0));
+                                          nullptr));
 
   // Add a basic block to the function. As before, it automatically inserts
   // because of the last argument.
@@ -49,7 +50,7 @@ static Function* createAdd1(Module *M) {
 
   // Get pointers to the integer argument of the add1 function...
   assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
-  Argument *ArgX = Add1F->arg_begin();  // Get the arg
+  Argument *ArgX = &*Add1F->arg_begin();          // Get the arg
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the add instruction, inserting it into the end of BB.
@@ -69,7 +70,7 @@ static Function *CreateFibFunction(Module *M) {
     cast<Function>(M->getOrInsertFunction("fib",
                                           Type::getInt32Ty(M->getContext()),
                                           Type::getInt32Ty(M->getContext()),
-                                          (Type *)0));
+                                          nullptr));
 
   // Add a basic block to the function.
   BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF);
@@ -79,7 +80,7 @@ static Function *CreateFibFunction(Module *M) {
   Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2);
 
   // Get pointer to the integer argument of the add1 function...
-  Argument *ArgX = FibF->arg_begin();   // Get the arg.
+  Argument *ArgX = &*FibF->arg_begin(); // Get the arg.
   ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
 
   // Create the true_block.
@@ -129,10 +130,10 @@ public:
     n = 0;
     waitFor = 0;
 
-    int result = pthread_cond_init( &condition, NULL );
+    int result = pthread_cond_init( &condition, nullptr );
     assert( result == 0 );
 
-    result = pthread_mutex_init( &mutex, NULL );
+    result = pthread_mutex_init( &mutex, nullptr );
     assert( result == 0 );
   }
 
@@ -261,21 +262,21 @@ int main() {
   struct threadParams fib2 = { EE, fibF, 42 };
 
   pthread_t add1Thread;
-  int result = pthread_create( &add1Thread, NULL, callFunc, &add1 );
+  int result = pthread_create( &add1Thread, nullptr, callFunc, &add1 );
   if ( result != 0 ) {
           std::cerr << "Could not create thread" << std::endl;
           return 1;
   }
 
   pthread_t fibThread1;
-  result = pthread_create( &fibThread1, NULL, callFunc, &fib1 );
+  result = pthread_create( &fibThread1, nullptr, callFunc, &fib1 );
   if ( result != 0 ) {
           std::cerr << "Could not create thread" << std::endl;
           return 1;
   }
 
   pthread_t fibThread2;
-  result = pthread_create( &fibThread2, NULL, callFunc, &fib2 );
+  result = pthread_create( &fibThread2, nullptr, callFunc, &fib2 );
   if ( result != 0 ) {
           std::cerr << "Could not create thread" << std::endl;
           return 1;
diff --git a/include/llvm-c/Analysis.h b/include/llvm-c/Analysis.h
index f0bdddc50ab7..36dcb89e0e08 100644
--- a/include/llvm-c/Analysis.h
+++ b/include/llvm-c/Analysis.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_ANALYSIS_H
 #define LLVM_C_ANALYSIS_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/BitReader.h b/include/llvm-c/BitReader.h
index f3b388bc4fb4..d1fc302767ba 100644
--- a/include/llvm-c/BitReader.h
+++ b/include/llvm-c/BitReader.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_BITREADER_H
 #define LLVM_C_BITREADER_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -34,36 +34,45 @@ extern "C" {
 
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
-   Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
-                          LLVMModuleRef *OutModule, char **OutMessage);
+   Optionally returns a human-readable error message via OutMessage.
 
+   This is deprecated. Use LLVMParseBitcode2. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule,
+                          char **OutMessage);
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+   reference to the module via the OutModule parameter. Returns 0 on success. */
+LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf,
+                           LLVMModuleRef *OutModule);
+
+/* This is deprecated. Use LLVMParseBitcodeInContext2. */
 LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
                                    LLVMMemoryBufferRef MemBuf,
                                    LLVMModuleRef *OutModule, char **OutMessage);
 
+LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef,
+                                    LLVMMemoryBufferRef MemBuf,
+                                    LLVMModuleRef *OutModule);
+
 /** Reads a module from the specified path, returning via the OutMP parameter
     a module provider which performs lazy deserialization. Returns 0 on success.
-    Optionally returns a human-readable error message via OutMessage. */
+    Optionally returns a human-readable error message via OutMessage.
+    This is deprecated. Use LLVMGetBitcodeModuleInContext2. */
 LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
                                        LLVMMemoryBufferRef MemBuf,
-                                       LLVMModuleRef *OutM,
-                                       char **OutMessage);
+                                       LLVMModuleRef *OutM, char **OutMessage);
 
+/** Reads a module from the specified path, returning via the OutMP parameter a
+ * module provider which performs lazy deserialization. Returns 0 on success. */
+LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef,
+                                        LLVMMemoryBufferRef MemBuf,
+                                        LLVMModuleRef *OutM);
+
+/* This is deprecated. Use LLVMGetBitcodeModule2. */
 LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
                               char **OutMessage);
 
-
-/** Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
-LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
-                                               LLVMMemoryBufferRef MemBuf,
-                                               LLVMModuleProviderRef *OutMP,
-                                               char **OutMessage);
-
-/** Deprecated: Use LLVMGetBitcodeModule instead. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
-                                      LLVMModuleProviderRef *OutMP,
-                                      char **OutMessage);
+LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM);
 
 /**
  * @}
diff --git a/include/llvm-c/BitWriter.h b/include/llvm-c/BitWriter.h
index f25ad3a445f5..797d03179ab3 100644
--- a/include/llvm-c/BitWriter.h
+++ b/include/llvm-c/BitWriter.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_BITWRITER_H
 #define LLVM_C_BITWRITER_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 9dbcbfea387f..c8fda15c5ed6 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -15,7 +15,8 @@
 #ifndef LLVM_C_CORE_H
 #define LLVM_C_CORE_H
 
-#include "llvm-c/Support.h"
+#include "llvm-c/ErrorHandling.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -40,15 +41,6 @@ extern "C" {
  * the LLVM intermediate representation as well as other related types
  * and utilities.
  *
- * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
- * parameters must be passed as base types. Despite the declared types, most
- * of the functions provided operate only on branches of the type hierarchy.
- * The declared parameter names are descriptive and specify which type is
- * required. Additionally, each type hierarchy is documented along with the
- * functions that operate upon it. For more detail, refer to LLVM's C++ code.
- * If in doubt, refer to Core.cpp, which performs parameter downcasts in the
- * form unwrap<RequiredType>(Param).
- *
  * Many exotic languages can interoperate with C code but have a harder time
  * with C++ due to name mangling. So in addition to C, this interface enables
  * tools written in such languages.
@@ -62,74 +54,6 @@ extern "C" {
  * @{
  */
 
-/* Opaque types. */
-
-/**
- * The top-level container for all LLVM global data. See the LLVMContext class.
- */
-typedef struct LLVMOpaqueContext *LLVMContextRef;
-
-/**
- * The top-level container for all other LLVM Intermediate Representation (IR)
- * objects.
- *
- * @see llvm::Module
- */
-typedef struct LLVMOpaqueModule *LLVMModuleRef;
-
-/**
- * Each value in the LLVM IR has a type, an LLVMTypeRef.
- *
- * @see llvm::Type
- */
-typedef struct LLVMOpaqueType *LLVMTypeRef;
-
-/**
- * Represents an individual value in LLVM IR.
- *
- * This models llvm::Value.
- */
-typedef struct LLVMOpaqueValue *LLVMValueRef;
-
-/**
- * Represents a basic block of instructions in LLVM IR.
- *
- * This models llvm::BasicBlock.
- */
-typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
-
-/**
- * Represents an LLVM basic block builder.
- *
- * This models llvm::IRBuilder.
- */
-typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
-
-/**
- * Interface used to provide a module to JIT or interpreter.
- * This is now just a synonym for llvm::Module, but we have to keep using the
- * different type to keep binary compatibility.
- */
-typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
-
-/** @see llvm::PassManagerBase */
-typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
-
-/** @see llvm::PassRegistry */
-typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
-
-/**
- * Used to get the users and usees of a Value.
- *
- * @see llvm::Use */
-typedef struct LLVMOpaqueUse *LLVMUseRef;
-
-
-/**
- * @see llvm::DiagnosticInfo
- */
-typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef;
-
 typedef enum {
     LLVMZExtAttribute       = 1<<0,
     LLVMSExtAttribute       = 1<<1,
@@ -248,8 +172,12 @@ typedef enum {
 
   /* Exception Handling Operators */
   LLVMResume         = 58,
-  LLVMLandingPad     = 59
-
+  LLVMLandingPad     = 59,
+  LLVMCleanupRet     = 61,
+  LLVMCatchRet       = 62,
+  LLVMCatchPad       = 63,
+  LLVMCleanupPad     = 64,
+  LLVMCatchSwitch    = 65
 } LLVMOpcode;
 
 typedef enum {
@@ -268,7 +196,8 @@ typedef enum {
   LLVMPointerTypeKind,     /**< Pointers */
   LLVMVectorTypeKind,      /**< SIMD 'packed' format, or other vector type */
   LLVMMetadataTypeKind,    /**< Metadata */
-  LLVMX86_MMXTypeKind      /**< X86 MMX */
+  LLVMX86_MMXTypeKind,     /**< X86 MMX */
+  LLVMTokenTypeKind        /**< Tokens */
 } LLVMTypeKind;
 
 typedef enum {
@@ -428,36 +357,11 @@ void LLVMInitializeCore(LLVMPassRegistryRef R);
     @see ManagedStatic */
 void LLVMShutdown(void);
 
-
 /*===-- Error handling ----------------------------------------------------===*/
 
 char *LLVMCreateMessage(const char *Message);
 void LLVMDisposeMessage(char *Message);
 
-typedef void (*LLVMFatalErrorHandler)(const char *Reason);
-
-/**
- * Install a fatal error handler. By default, if LLVM detects a fatal error, it
- * will call exit(1). This may not be appropriate in many contexts. For example,
- * doing exit(1) will bypass many crash reporting/tracing system tools. This
- * function allows you to install a callback that will be invoked prior to the
- * call to exit(1).
- */
-void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler);
-
-/**
- * Reset the fatal error handler. This resets LLVM's fatal error handling
- * behavior to the default.
- */
-void LLVMResetFatalErrorHandler(void);
-
-/**
- * Enable LLVM's built-in stack trace code. This intercepts the OS's crash
- * signals and prints which component of LLVM you were in at the time if the
- * crash.
- */
-void LLVMEnablePrettyStackTrace(void);
-
 /**
  * @defgroup LLVMCCoreContext Contexts
  *
@@ -808,6 +712,7 @@ LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt128TypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
 
 /**
@@ -819,6 +724,7 @@ LLVMTypeRef LLVMInt8Type(void);
 LLVMTypeRef LLVMInt16Type(void);
 LLVMTypeRef LLVMInt32Type(void);
 LLVMTypeRef LLVMInt64Type(void);
+LLVMTypeRef LLVMInt128Type(void);
 LLVMTypeRef LLVMIntType(unsigned NumBits);
 unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
 
@@ -1022,7 +928,6 @@ LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
  * @}
  */
 
-
 /**
  * @defgroup LLVMCCoreTypeSequential Sequential Types
  *
@@ -1178,6 +1083,7 @@ LLVMTypeRef LLVMX86MMXType(void);
       macro(ConstantInt)                    \
       macro(ConstantPointerNull)            \
       macro(ConstantStruct)                 \
+      macro(ConstantTokenNone)              \
       macro(ConstantVector)                 \
       macro(GlobalValue)                    \
         macro(GlobalAlias)                  \
@@ -1215,6 +1121,11 @@ LLVMTypeRef LLVMX86MMXType(void);
         macro(SwitchInst)                   \
         macro(UnreachableInst)              \
         macro(ResumeInst)                   \
+        macro(CleanupReturnInst)            \
+        macro(CatchReturnInst)              \
+      macro(FuncletPadInst)                 \
+        macro(CatchPadInst)                 \
+        macro(CleanupPadInst)               \
       macro(UnaryInstruction)               \
         macro(AllocaInst)                   \
         macro(CastInst)                     \
@@ -1950,7 +1861,7 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name);
 void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
 
 /**
- * Add a target-dependent attribute to a fuction
+ * Add a target-dependent attribute to a function
  * @see llvm::AttrBuilder::addAttribute()
  */
 void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
@@ -2427,7 +2338,7 @@ void LLVMInstructionEraseFromParent(LLVMValueRef Inst);
  *
  * @see llvm::Instruction::getOpCode()
  */
-LLVMOpcode   LLVMGetInstructionOpcode(LLVMValueRef Inst);
+LLVMOpcode LLVMGetInstructionOpcode(LLVMValueRef Inst);
 
 /**
  * Obtain the predicate of an instruction.
@@ -2780,6 +2691,8 @@ LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
                                       const char *Name);
 LLVMBool LLVMGetVolatile(LLVMValueRef MemoryAccessInst);
 void LLVMSetVolatile(LLVMValueRef MemoryAccessInst, LLVMBool IsVolatile);
+LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemoryAccessInst);
+void LLVMSetOrdering(LLVMValueRef MemoryAccessInst, LLVMAtomicOrdering Ordering);
 
 /* Casts */
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
@@ -3020,6 +2933,6 @@ LLVMBool LLVMIsMultithreaded(void);
 
 #ifdef __cplusplus
 }
-#endif /* !defined(__cplusplus) */
+#endif
 
-#endif /* !defined(LLVM_C_CORE_H) */
+#endif /* LLVM_C_CORE_H */
diff --git a/include/llvm-c/ErrorHandling.h b/include/llvm-c/ErrorHandling.h
new file mode 100644
index 000000000000..5a80bc5e654f
--- /dev/null
+++ b/include/llvm-c/ErrorHandling.h
@@ -0,0 +1,51 @@
+/*===-- llvm-c/ErrorHandling.h - Error Handling C Interface -------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file defines the C interface to LLVM's error handling mechanism.      *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ERROR_HANDLING_H
+#define LLVM_C_ERROR_HANDLING_H
+
+#include "llvm-c/Types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*LLVMFatalErrorHandler)(const char *Reason);
+
+/**
+ * Install a fatal error handler. By default, if LLVM detects a fatal error, it
+ * will call exit(1). This may not be appropriate in many contexts. For example,
+ * doing exit(1) will bypass many crash reporting/tracing system tools. This
+ * function allows you to install a callback that will be invoked prior to the
+ * call to exit(1).
+ */
+void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler);
+
+/**
+ * Reset the fatal error handler. This resets LLVM's fatal error handling
+ * behavior to the default.
+ */
+void LLVMResetFatalErrorHandler(void);
+
+/**
+ * Enable LLVM's built-in stack trace code. This intercepts the OS's crash
+ * signals and prints which component of LLVM you were in at the time if the
+ * crash.
+ */
+void LLVMEnablePrettyStackTrace(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index eb3ecabfa8a8..b72a91a8b137 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_EXECUTIONENGINE_H
 #define LLVM_C_EXECUTIONENGINE_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 #include "llvm-c/Target.h"
 #include "llvm-c/TargetMachine.h"
 
@@ -110,22 +110,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
   struct LLVMMCJITCompilerOptions *Options, size_t SizeOfOptions,
   char **OutError);
 
-/** Deprecated: Use LLVMCreateExecutionEngineForModule instead. */
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
-                                   LLVMModuleProviderRef MP,
-                                   char **OutError);
-
-/** Deprecated: Use LLVMCreateInterpreterForModule instead. */
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
-                               LLVMModuleProviderRef MP,
-                               char **OutError);
-
-/** Deprecated: Use LLVMCreateJITCompilerForModule instead. */
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
-                               LLVMModuleProviderRef MP,
-                               unsigned OptLevel,
-                               char **OutError);
-
 void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE);
 
 void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE);
@@ -144,17 +128,9 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F);
 
 void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M);
 
-/** Deprecated: Use LLVMAddModule instead. */
-void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP);
-
 LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
                           LLVMModuleRef *OutMod, char **OutError);
 
-/** Deprecated: Use LLVMRemoveModule instead. */
-LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
-                                  LLVMModuleProviderRef MP,
-                                  LLVMModuleRef *OutMod, char **OutError);
-
 LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
                           LLVMValueRef *OutFn);
 
diff --git a/include/llvm-c/IRReader.h b/include/llvm-c/IRReader.h
index 5001afb7ed7d..5b58d9921fb0 100644
--- a/include/llvm-c/IRReader.h
+++ b/include/llvm-c/IRReader.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_C_IRREADER_H
 #define LLVM_C_IRREADER_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Initialization.h b/include/llvm-c/Initialization.h
index 44194f8ea311..90c8396f7ad3 100644
--- a/include/llvm-c/Initialization.h
+++ b/include/llvm-c/Initialization.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_C_INITIALIZATION_H
 #define LLVM_C_INITIALIZATION_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Linker.h b/include/llvm-c/Linker.h
index 9f98a3342d0b..4d9bd46a259b 100644
--- a/include/llvm-c/Linker.h
+++ b/include/llvm-c/Linker.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_C_LINKER_H
 #define LLVM_C_LINKER_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -27,17 +27,27 @@ typedef enum {
                                           should not be used. */
 } LLVMLinkerMode;
 
-/* Links the source module into the destination module, taking ownership
- * of the source module away from the caller. Optionally returns a
- * human-readable description of any errors that occurred in linking.
- * OutMessage must be disposed with LLVMDisposeMessage. The return value
- * is true if an error occurred, false otherwise.
+/* Links the source module into the destination module. The source module is
+ * damaged. The only thing that can be done is destroy it. Optionally returns a
+ * human-readable description of any errors that occurred in linking. OutMessage
+ * must be disposed with LLVMDisposeMessage. The return value is true if an
+ * error occurred, false otherwise.
  *
  * Note that the linker mode parameter \p Unused is no longer used, and has
- * no effect. */
+ * no effect.
+ *
+ * This function is deprecated. Use LLVMLinkModules2 instead.
+ */
 LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
                          LLVMLinkerMode Unused, char **OutMessage);
 
+/* Links the source module into the destination module. The source module is
+ * destroyed.
+ * The return value is true if an error occurred, false otherwise.
+ * Use the diagnostic handler to get any diagnostic message.
+*/
+LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h
index 9cab5c426c45..a2980e89fe3d 100644
--- a/include/llvm-c/Object.h
+++ b/include/llvm-c/Object.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_OBJECT_H
 #define LLVM_C_OBJECT_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 #include "llvm/Config/llvm-config.h"
 
 #ifdef __cplusplus
diff --git a/include/llvm-c/OrcBindings.h b/include/llvm-c/OrcBindings.h
new file mode 100644
index 000000000000..f6aff916999a
--- /dev/null
+++ b/include/llvm-c/OrcBindings.h
@@ -0,0 +1,134 @@
+/*===----------- llvm-c/OrcBindings.h - Orc Lib C Iface ---------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMOrcJIT.a, which implements  *|
+|* JIT compilation of LLVM IR.                                                *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+|* Note: This interface is experimental. It is *NOT* stable, and may be       *|
+|*       changed without warning.                                             *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ORCBINDINGS_H
+#define LLVM_C_ORCBINDINGS_H
+
+#include "llvm-c/Object.h"
+#include "llvm-c/Support.h"
+#include "llvm-c/TargetMachine.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct LLVMOrcOpaqueJITStack *LLVMOrcJITStackRef;
+typedef uint32_t LLVMOrcModuleHandle;
+typedef uint64_t LLVMOrcTargetAddress;
+typedef uint64_t (*LLVMOrcSymbolResolverFn)(const char *Name,
+                                            void *LookupCtx);
+typedef uint64_t (*LLVMOrcLazyCompileCallbackFn)(LLVMOrcJITStackRef JITStack,
+                                                 void *CallbackCtx);
+
+/**
+ * Create an ORC JIT stack.
+ *
+ * The client owns the resulting stack, and must call OrcDisposeInstance(...)
+ * to destroy it and free its memory. The JIT stack will take ownership of the
+ * TargetMachine, which will be destroyed when the stack is destroyed. The
+ * client should not attempt to dispose of the Target Machine, or it will result
+ * in a double-free.
+ */
+LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM);
+
+/**
+ * Mangle the given symbol.
+ * Memory will be allocated for MangledSymbol to hold the result. The client
+ */
+void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledSymbol,
+                             const char *Symbol);
+
+/**
+ * Dispose of a mangled symbol.
+ */
+
+void LLVMOrcDisposeMangledSymbol(char *MangledSymbol);
+
+/**
+ * Create a lazy compile callback.
+ */
+LLVMOrcTargetAddress
+LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
+                                 LLVMOrcLazyCompileCallbackFn Callback,
+                                 void *CallbackCtx);
+
+/**
+ * Create a named indirect call stub.
+ */
+void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+                               const char *StubName,
+                               LLVMOrcTargetAddress InitAddr);
+
+/**
+ * Set the pointer for the given indirect stub.
+ */
+void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+                                   const char *StubName,
+                                   LLVMOrcTargetAddress NewAddr);
+
+/**
+ * Add module to be eagerly compiled.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+                            LLVMOrcSymbolResolverFn SymbolResolver,
+                            void *SymbolResolverCtx);
+
+/**
+ * Add module to be lazily compiled one function at a time.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+                           LLVMOrcSymbolResolverFn SymbolResolver,
+                           void *SymbolResolverCtx);
+
+/**
+ * Add an object file.
+ */
+LLVMOrcModuleHandle
+LLVMOrcAddObjectFile(LLVMOrcJITStackRef JITStack, LLVMObjectFileRef Obj,
+                     LLVMOrcSymbolResolverFn SymbolResolver,
+                     void *SymbolResolverCtx);
+
+/**
+ * Remove a module set from the JIT.
+ *
+ * This works for all modules that can be added via OrcAdd*, including object
+ * files.
+ */
+void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H);
+
+/**
+ * Get symbol address from JIT instance.
+ */
+LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+                                             const char *SymbolName);
+
+/**
+ * Dispose of an ORC JIT stack.
+ */
+void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack);
+
+#ifdef __cplusplus
+}
+#endif /* extern "C" */
+
+#endif /* LLVM_C_ORCBINDINGS_H */
diff --git a/include/llvm-c/Support.h b/include/llvm-c/Support.h
index eca3b7a42037..735d1fbc78cc 100644
--- a/include/llvm-c/Support.h
+++ b/include/llvm-c/Support.h
@@ -15,30 +15,12 @@
 #define LLVM_C_SUPPORT_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-/**
- * @defgroup LLVMCSupportTypes Types and Enumerations
- *
- * @{
- */
-
-typedef int LLVMBool;
-
-/**
- * Used to pass regions of memory through LLVM interfaces.
- *
- * @see llvm::MemoryBuffer
- */
-typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
-
-/**
- * @}
- */
-
 /**
  * This function permanently loads the dynamic library at the given path.
  * It is safe to call this function multiple times for the same library.
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index b465b4b88db5..24d2cb4c9598 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_TARGET_H
 #define LLVM_C_TARGET_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 #include "llvm/Config/llvm-config.h"
 
 #if defined(_MSC_VER) && !defined(inline)
diff --git a/include/llvm-c/TargetMachine.h b/include/llvm-c/TargetMachine.h
index 8cf1f43cb3c5..303708093653 100644
--- a/include/llvm-c/TargetMachine.h
+++ b/include/llvm-c/TargetMachine.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_TARGETMACHINE_H
 #define LLVM_C_TARGETMACHINE_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 #include "llvm-c/Target.h"
 
 #ifdef __cplusplus
@@ -115,7 +115,7 @@ char *LLVMGetTargetMachineCPU(LLVMTargetMachineRef T);
   LLVMDisposeMessage. */
 char *LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T);
 
-/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
+/** Returns the llvm::DataLayout used for this llvm:TargetMachine. */
 LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T);
 
 /** Set the target machine's ASM verbosity. */
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index 448078012eac..3af7425dd268 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_C_TRANSFORMS_IPO_H
 #define LLVM_C_TRANSFORMS_IPO_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Transforms/PassManagerBuilder.h b/include/llvm-c/Transforms/PassManagerBuilder.h
index 3d7a9d677eab..69786b341ab4 100644
--- a/include/llvm-c/Transforms/PassManagerBuilder.h
+++ b/include/llvm-c/Transforms/PassManagerBuilder.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
 #define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
 
diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h
index 48c19a6e3117..c989ee86b9f7 100644
--- a/include/llvm-c/Transforms/Scalar.h
+++ b/include/llvm-c/Transforms/Scalar.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_C_TRANSFORMS_SCALAR_H
 #define LLVM_C_TRANSFORMS_SCALAR_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/include/llvm-c/Transforms/Vectorize.h b/include/llvm-c/Transforms/Vectorize.h
index c9102da60297..a82ef49cb167 100644
--- a/include/llvm-c/Transforms/Vectorize.h
+++ b/include/llvm-c/Transforms/Vectorize.h
@@ -20,7 +20,7 @@
 #ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
 #define LLVM_C_TRANSFORMS_VECTORIZE_H
 
-#include "llvm-c/Core.h"
+#include "llvm-c/Types.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -51,4 +51,3 @@ void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
 #endif /* defined(__cplusplus) */
 
 #endif
-
diff --git a/include/llvm-c/Types.h b/include/llvm-c/Types.h
new file mode 100644
index 000000000000..19029584efcc
--- /dev/null
+++ b/include/llvm-c/Types.h
@@ -0,0 +1,124 @@
+/*===-- llvm-c/Support.h - C Interface Types declarations ---------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file defines types used by the the C interface to LLVM.               *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TYPES_H
+#define LLVM_C_TYPES_H
+
+#include "llvm/Support/DataTypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @defgroup LLVMCSupportTypes Types and Enumerations
+ *
+ * @{
+ */
+
+typedef int LLVMBool;
+
+/* Opaque types. */
+
+/**
+ * LLVM uses a polymorphic type hierarchy which C cannot represent, therefore
+ * parameters must be passed as base types. Despite the declared types, most
+ * of the functions provided operate only on branches of the type hierarchy.
+ * The declared parameter names are descriptive and specify which type is
+ * required. Additionally, each type hierarchy is documented along with the
+ * functions that operate upon it. For more detail, refer to LLVM's C++ code.
+ * If in doubt, refer to Core.cpp, which performs parameter downcasts in the
+ * form unwrap<RequiredType>(Param).
+ */
+
+/**
+ * Used to pass regions of memory through LLVM interfaces.
+ *
+ * @see llvm::MemoryBuffer
+ */
+typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
+
+/**
+ * The top-level container for all LLVM global data. See the LLVMContext class.
+ */
+typedef struct LLVMOpaqueContext *LLVMContextRef;
+
+/**
+ * The top-level container for all other LLVM Intermediate Representation (IR)
+ * objects.
+ *
+ * @see llvm::Module
+ */
+typedef struct LLVMOpaqueModule *LLVMModuleRef;
+
+/**
+ * Each value in the LLVM IR has a type, an LLVMTypeRef.
+ *
+ * @see llvm::Type
+ */
+typedef struct LLVMOpaqueType *LLVMTypeRef;
+
+/**
+ * Represents an individual value in LLVM IR.
+ *
+ * This models llvm::Value.
+ */
+typedef struct LLVMOpaqueValue *LLVMValueRef;
+
+/**
+ * Represents a basic block of instructions in LLVM IR.
+ *
+ * This models llvm::BasicBlock.
+ */
+typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
+
+/**
+ * Represents an LLVM basic block builder.
+ *
+ * This models llvm::IRBuilder.
+ */
+typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
+
+/**
+ * Interface used to provide a module to JIT or interpreter.
+ * This is now just a synonym for llvm::Module, but we have to keep using the
+ * different type to keep binary compatibility.
+ */
+typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
+
+/** @see llvm::PassManagerBase */
+typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
+
+/** @see llvm::PassRegistry */
+typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
+
+/**
+ * Used to get the users and usees of a Value.
+ *
+ * @see llvm::Use */
+typedef struct LLVMOpaqueUse *LLVMUseRef;
+
+/**
+ * @see llvm::DiagnosticInfo
+ */
+typedef struct LLVMOpaqueDiagnosticInfo *LLVMDiagnosticInfoRef;
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h
index cb3a69160454..691a0cd3f55c 100644
--- a/include/llvm-c/lto.h
+++ b/include/llvm-c/lto.h
@@ -374,8 +374,8 @@ extern lto_bool_t
 lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod);
 
 /**
- * Sets the object module for code generation. This will transfer the ownship of
- * the module to code generator.
+ * Sets the object module for code generation. This will transfer the ownership
+ * of the module to the code generator.
  *
  * \c cg and \c mod must both be in the same context.
  *
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 76615affb253..3fe04060fd59 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -142,6 +142,9 @@ public:
   /// @}
 
   static unsigned int semanticsPrecision(const fltSemantics &);
+  static ExponentType semanticsMinExponent(const fltSemantics &);
+  static ExponentType semanticsMaxExponent(const fltSemantics &);
+  static unsigned int semanticsSizeInBits(const fltSemantics &);
 
   /// IEEE-754R 5.11: Floating Point Comparison Relations.
   enum cmpResult {
@@ -296,7 +299,7 @@ public:
   /// IEEE remainder.
   opStatus remainder(const APFloat &);
   /// C fmod, or llvm frem.
-  opStatus mod(const APFloat &, roundingMode);
+  opStatus mod(const APFloat &);
   opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
   opStatus roundToIntegral(roundingMode);
   /// IEEE-754R 5.3.1: nextUp/nextDown.
@@ -445,6 +448,9 @@ public:
   /// Returns true if and only if the number has the largest possible finite
   /// magnitude in the current semantics.
   bool isLargest() const;
+  
+  /// Returns true if and only if the number is an exact integer.
+  bool isInteger() const;
 
   /// @}
 
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 5013f295f5c7..e2a0cb5e69dc 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -294,11 +294,12 @@ public:
       delete[] pVal;
   }
 
-  /// \brief Default constructor that creates an uninitialized APInt.
+  /// \brief Default constructor that creates an uninteresting APInt
+  /// representing a 1-bit zero value.
   ///
   /// This is useful for object deserialization (pair this with the static
   ///  method Read).
-  explicit APInt() : BitWidth(1) {}
+  explicit APInt() : BitWidth(1), VAL(0) {}
 
   /// \brief Returns whether this instance allocated memory.
   bool needsCleanup() const { return !isSingleWord(); }
@@ -1528,7 +1529,7 @@ public:
   /// \returns the nearest log base 2 of this APInt. Ties round up.
   ///
   /// NOTE: When we have a BitWidth of 1, we define:
-  /// 
+  ///
   ///   log2(0) = UINT32_MAX
   ///   log2(1) = 0
   ///
diff --git a/include/llvm/ADT/APSInt.h b/include/llvm/ADT/APSInt.h
index a187515f8592..a6552d0a2f36 100644
--- a/include/llvm/ADT/APSInt.h
+++ b/include/llvm/ADT/APSInt.h
@@ -21,6 +21,7 @@ namespace llvm {
 
 class APSInt : public APInt {
   bool IsUnsigned;
+
 public:
   /// Default constructor that creates an uninitialized APInt.
   explicit APSInt() : IsUnsigned(false) {}
@@ -246,8 +247,7 @@ public:
     return this->operator|(RHS);
   }
 
-
-  APSInt operator^(const APSInt& RHS) const {
+  APSInt operator^(const APSInt &RHS) const {
     assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
     return APSInt(static_cast<const APInt&>(*this) ^ RHS, IsUnsigned);
   }
@@ -286,7 +286,7 @@ public:
   }
 
   /// \brief Determine if two APSInts have the same value, zero- or
-  /// sign-extending as needed.  
+  /// sign-extending as needed.
   static bool isSameValue(const APSInt &I1, const APSInt &I2) {
     return !compareValues(I1, I2);
   }
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index c8795fd89e33..517ba39849e1 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_ADT_ARRAYREF_H
 #define LLVM_ADT_ARRAYREF_H
 
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallVector.h"
 #include <vector>
@@ -85,7 +86,7 @@ namespace llvm {
 
     /// Construct an ArrayRef from a std::initializer_list.
     /*implicit*/ ArrayRef(const std::initializer_list<T> &Vec)
-    : Data(Vec.begin() == Vec.end() ? (T*)0 : Vec.begin()),
+    : Data(Vec.begin() == Vec.end() ? (T*)nullptr : Vec.begin()),
       Length(Vec.size()) {}
 
     /// Construct an ArrayRef<const T*> from ArrayRef<T*>. This uses SFINAE to
@@ -148,7 +149,7 @@ namespace llvm {
     // copy - Allocate copy in Allocator and return ArrayRef<T> to it.
     template <typename Allocator> ArrayRef<T> copy(Allocator &A) {
       T *Buff = A.template Allocate<T>(Length);
-      std::copy(begin(), end(), Buff);
+      std::uninitialized_copy(begin(), end(), Buff);
       return ArrayRef<T>(Buff, Length);
     }
 
@@ -156,8 +157,6 @@ namespace llvm {
     bool equals(ArrayRef RHS) const {
       if (Length != RHS.Length)
         return false;
-      if (Length == 0)
-        return true;
       return std::equal(begin(), end(), RHS.begin());
     }
 
@@ -339,6 +338,16 @@ namespace llvm {
     return Vec;
   }
 
+  /// Construct an ArrayRef from an ArrayRef (no-op) (const)
+  template <typename T> ArrayRef<T> makeArrayRef(const ArrayRef<T> &Vec) {
+    return Vec;
+  }
+
+  /// Construct an ArrayRef from an ArrayRef (no-op)
+  template <typename T> ArrayRef<T> &makeArrayRef(ArrayRef<T> &Vec) {
+    return Vec;
+  }
+
   /// Construct an ArrayRef from a C array.
   template<typename T, size_t N>
   ArrayRef<T> makeArrayRef(const T (&Arr)[N]) {
@@ -366,6 +375,10 @@ namespace llvm {
   template <typename T> struct isPodLike<ArrayRef<T> > {
     static const bool value = true;
   };
+
+  template <typename T> hash_code hash_value(ArrayRef<T> S) {
+    return hash_combine_range(S.begin(), S.end());
+  }
 }
 
 #endif
diff --git a/include/llvm/ADT/BitVector.h b/include/llvm/ADT/BitVector.h
index f58dd7356c7d..ad00d51f99e9 100644
--- a/include/llvm/ADT/BitVector.h
+++ b/include/llvm/ADT/BitVector.h
@@ -34,7 +34,7 @@ class BitVector {
 
   BitWord  *Bits;        // Actual bits.
   unsigned Size;         // Size of bitvector in bits.
-  unsigned Capacity;     // Size of allocated memory in BitWord.
+  unsigned Capacity;     // Number of BitWords allocated in the Bits array.
 
 public:
   typedef unsigned size_type;
@@ -566,8 +566,16 @@ private:
     if (AddBits)
       clear_unused_bits();
   }
+
+public:
+  /// Return the size (in bytes) of the bit vector.
+  size_t getMemorySize() const { return Capacity * sizeof(BitWord); }
 };
 
+static inline size_t capacity_in_bytes(const BitVector &X) {
+  return X.getMemorySize();
+}
+
 } // End llvm namespace
 
 namespace std {
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
index 21bc1e80c9d8..a26f37dfdc7d 100644
--- a/include/llvm/ADT/DeltaAlgorithm.h
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -68,7 +68,7 @@ private:
   /// \return - True on success.
   bool Search(const changeset_ty &Changes, const changesetlist_ty &Sets,
               changeset_ty &Res);
-              
+
 protected:
   /// UpdatedSearchState - Callback used when the search state changes.
   virtual void UpdatedSearchState(const changeset_ty &Changes,
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 27f73157a29f..6ee1960b5c82 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -282,7 +282,7 @@ protected:
            "# initial buckets must be a power of two!");
     const KeyT EmptyKey = getEmptyKey();
     for (BucketT *B = getBuckets(), *E = getBucketsEnd(); B != E; ++B)
-      new (&B->getFirst()) KeyT(EmptyKey);
+      ::new (&B->getFirst()) KeyT(EmptyKey);
   }
 
   void moveFromOldBuckets(BucketT *OldBucketsBegin, BucketT *OldBucketsEnd) {
@@ -300,7 +300,7 @@ protected:
         (void)FoundVal; // silence warning.
         assert(!FoundVal && "Key already in new map?");
         DestBucket->getFirst() = std::move(B->getFirst());
-        new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond()));
+        ::new (&DestBucket->getSecond()) ValueT(std::move(B->getSecond()));
         incrementNumEntries();
 
         // Free the value.
@@ -324,11 +324,11 @@ protected:
              getNumBuckets() * sizeof(BucketT));
     else
       for (size_t i = 0; i < getNumBuckets(); ++i) {
-        new (&getBuckets()[i].getFirst())
+        ::new (&getBuckets()[i].getFirst())
             KeyT(other.getBuckets()[i].getFirst());
         if (!KeyInfoT::isEqual(getBuckets()[i].getFirst(), getEmptyKey()) &&
             !KeyInfoT::isEqual(getBuckets()[i].getFirst(), getTombstoneKey()))
-          new (&getBuckets()[i].getSecond())
+          ::new (&getBuckets()[i].getSecond())
               ValueT(other.getBuckets()[i].getSecond());
       }
   }
@@ -402,7 +402,7 @@ private:
     TheBucket = InsertIntoBucketImpl(Key, TheBucket);
 
     TheBucket->getFirst() = Key;
-    new (&TheBucket->getSecond()) ValueT(Value);
+    ::new (&TheBucket->getSecond()) ValueT(Value);
     return TheBucket;
   }
 
@@ -411,7 +411,7 @@ private:
     TheBucket = InsertIntoBucketImpl(Key, TheBucket);
 
     TheBucket->getFirst() = Key;
-    new (&TheBucket->getSecond()) ValueT(std::move(Value));
+    ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
     return TheBucket;
   }
 
@@ -419,7 +419,7 @@ private:
     TheBucket = InsertIntoBucketImpl(Key, TheBucket);
 
     TheBucket->getFirst() = std::move(Key);
-    new (&TheBucket->getSecond()) ValueT(std::move(Value));
+    ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
     return TheBucket;
   }
 
@@ -766,10 +766,10 @@ public:
         // Swap separately and handle any assymetry.
         std::swap(LHSB->getFirst(), RHSB->getFirst());
         if (hasLHSValue) {
-          new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond()));
+          ::new (&RHSB->getSecond()) ValueT(std::move(LHSB->getSecond()));
           LHSB->getSecond().~ValueT();
         } else if (hasRHSValue) {
-          new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond()));
+          ::new (&LHSB->getSecond()) ValueT(std::move(RHSB->getSecond()));
           RHSB->getSecond().~ValueT();
         }
       }
@@ -795,11 +795,11 @@ public:
     for (unsigned i = 0, e = InlineBuckets; i != e; ++i) {
       BucketT *NewB = &LargeSide.getInlineBuckets()[i],
               *OldB = &SmallSide.getInlineBuckets()[i];
-      new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
+      ::new (&NewB->getFirst()) KeyT(std::move(OldB->getFirst()));
       OldB->getFirst().~KeyT();
       if (!KeyInfoT::isEqual(NewB->getFirst(), EmptyKey) &&
           !KeyInfoT::isEqual(NewB->getFirst(), TombstoneKey)) {
-        new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
+        ::new (&NewB->getSecond()) ValueT(std::move(OldB->getSecond()));
         OldB->getSecond().~ValueT();
       }
     }
@@ -866,8 +866,8 @@ public:
             !KeyInfoT::isEqual(P->getFirst(), TombstoneKey)) {
           assert(size_t(TmpEnd - TmpBegin) < InlineBuckets &&
                  "Too many inline buckets!");
-          new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst()));
-          new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond()));
+          ::new (&TmpEnd->getFirst()) KeyT(std::move(P->getFirst()));
+          ::new (&TmpEnd->getSecond()) ValueT(std::move(P->getSecond()));
           ++TmpEnd;
           P->getSecond().~ValueT();
         }
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index b0a053072079..a844ebcccf5b 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ADT_DENSEMAPINFO_H
 #define LLVM_ADT_DENSEMAPINFO_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
@@ -58,7 +59,7 @@ template<> struct DenseMapInfo<char> {
     return LHS == RHS;
   }
 };
-  
+
 // Provide DenseMapInfo for unsigned ints.
 template<> struct DenseMapInfo<unsigned> {
   static inline unsigned getEmptyKey() { return ~0U; }
@@ -190,6 +191,31 @@ template <> struct DenseMapInfo<StringRef> {
   }
 };
 
+// Provide DenseMapInfo for ArrayRefs.
+template <typename T> struct DenseMapInfo<ArrayRef<T>> {
+  static inline ArrayRef<T> getEmptyKey() {
+    return ArrayRef<T>(reinterpret_cast<const T *>(~static_cast<uintptr_t>(0)),
+                       size_t(0));
+  }
+  static inline ArrayRef<T> getTombstoneKey() {
+    return ArrayRef<T>(reinterpret_cast<const T *>(~static_cast<uintptr_t>(1)),
+                       size_t(0));
+  }
+  static unsigned getHashValue(ArrayRef<T> Val) {
+    assert(Val.data() != getEmptyKey().data() && "Cannot hash the empty key!");
+    assert(Val.data() != getTombstoneKey().data() &&
+           "Cannot hash the tombstone key!");
+    return (unsigned)(hash_value(Val));
+  }
+  static bool isEqual(ArrayRef<T> LHS, ArrayRef<T> RHS) {
+    if (RHS.data() == getEmptyKey().data())
+      return LHS.data() == getEmptyKey().data();
+    if (RHS.data() == getTombstoneKey().data())
+      return LHS.data() == getTombstoneKey().data();
+    return LHS == RHS;
+  }
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index d34024005dfe..ef09dce37980 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -42,6 +42,7 @@ class DenseSet {
   static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT),
                 "DenseMap buckets unexpectedly large!");
   MapTy TheMap;
+
 public:
   typedef ValueT key_type;
   typedef ValueT value_type;
@@ -79,6 +80,7 @@ public:
   class Iterator {
     typename MapTy::iterator I;
     friend class DenseSet;
+
   public:
     typedef typename MapTy::iterator::difference_type difference_type;
     typedef ValueT value_type;
@@ -99,6 +101,7 @@ public:
   class ConstIterator {
     typename MapTy::const_iterator I;
     friend class DenseSet;
+
   public:
     typedef typename MapTy::const_iterator::difference_type difference_type;
     typedef ValueT value_type;
@@ -148,7 +151,7 @@ public:
     detail::DenseSetEmpty Empty;
     return TheMap.insert(std::make_pair(V, Empty));
   }
-  
+
   // Range insertion of values.
   template<typename InputIt>
   void insert(InputIt I, InputIt E) {
diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h
index d79b9acacfa9..c9317b8539b3 100644
--- a/include/llvm/ADT/DepthFirstIterator.h
+++ b/include/llvm/ADT/DepthFirstIterator.h
@@ -58,7 +58,6 @@ public:
   SetType &Visited;
 };
 
-
 // Generic Depth First Iterator
 template<class GraphT,
 class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
@@ -76,21 +75,22 @@ class df_iterator : public std::iterator<std::forward_iterator_tag,
   // VisitStack - Used to maintain the ordering.  Top = current block
   // First element is node pointer, second is the 'next child' to visit
   // if the int in PointerIntTy is 0, the 'next child' to visit is invalid
-  std::vector<std::pair<PointerIntTy, ChildItTy> > VisitStack;
+  std::vector<std::pair<PointerIntTy, ChildItTy>> VisitStack;
+
 private:
   inline df_iterator(NodeType *Node) {
     this->Visited.insert(Node);
-    VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
-                                        GT::child_begin(Node)));
+    VisitStack.push_back(
+        std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
   }
-  inline df_iterator() { 
-    // End is when stack is empty 
+  inline df_iterator() {
+    // End is when stack is empty
   }
   inline df_iterator(NodeType *Node, SetType &S)
     : df_iterator_storage<SetType, ExtStorage>(S) {
     if (!S.count(Node)) {
-      VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
-                                          GT::child_begin(Node)));
+      VisitStack.push_back(
+          std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
       this->Visited.insert(Node);
     }
   }
@@ -115,8 +115,8 @@ private:
         // Has our next sibling been visited?
         if (Next && this->Visited.insert(Next).second) {
           // No, do it now.
-          VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0), 
-                                              GT::child_begin(Next)));
+          VisitStack.push_back(
+              std::make_pair(PointerIntTy(Next, 0), GT::child_begin(Next)));
           return;
         }
       }
@@ -195,7 +195,6 @@ public:
   }
 };
 
-
 // Provide global constructors that automatically figure out correct types...
 //
 template <class T>
@@ -237,7 +236,6 @@ iterator_range<df_ext_iterator<T, SetTy>> depth_first_ext(const T& G,
   return make_range(df_ext_begin(G, S), df_ext_end(G, S));
 }
 
-
 // Provide global definitions of inverse depth first iterators...
 template <class T,
   class SetTy = llvm::SmallPtrSet<typename GraphTraits<T>::NodeType*, 8>,
diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h
index 52d10c1c1245..c9205396591b 100644
--- a/include/llvm/ADT/FoldingSet.h
+++ b/include/llvm/ADT/FoldingSet.h
@@ -122,9 +122,10 @@ protected:
   /// is greater than twice the number of buckets.
   unsigned NumNodes;
 
-  ~FoldingSetImpl();
-
   explicit FoldingSetImpl(unsigned Log2InitSize = 6);
+  FoldingSetImpl(FoldingSetImpl &&Arg);
+  FoldingSetImpl &operator=(FoldingSetImpl &&RHS);
+  ~FoldingSetImpl();
 
 public:
   //===--------------------------------------------------------------------===//
@@ -137,7 +138,6 @@ public:
     void *NextInFoldingSetBucket;
 
   public:
-
     Node() : NextInFoldingSetBucket(nullptr) {}
 
     // Accessors
@@ -182,13 +182,11 @@ public:
   bool empty() const { return NumNodes == 0; }
 
 private:
-
   /// GrowHashTable - Double the size of the hash table and rehash everything.
   ///
   void GrowHashTable();
 
 protected:
-
   /// GetNodeProfile - Instantiations of the FoldingSet template implement
   /// this function to gather data bits for the given node.
   virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
@@ -269,6 +267,7 @@ template<typename T, typename Ctx> struct ContextualFoldingSetTrait
 class FoldingSetNodeIDRef {
   const unsigned *Data;
   size_t Size;
+
 public:
   FoldingSetNodeIDRef() : Data(nullptr), Size(0) {}
   FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
@@ -393,6 +392,10 @@ DefaultContextualFoldingSetTrait<T, Ctx>::ComputeHash(T &X,
 /// implementation of the folding set to the node class T.  T must be a
 /// subclass of FoldingSetNode and implement a Profile function.
 ///
+/// Note that this set type is movable and move-assignable. However, its
+/// moved-from state is not a valid state for anything other than
+/// move-assigning and destroying. This is primarily to enable movable APIs
+/// that incorporate these objects.
 template <class T> class FoldingSet final : public FoldingSetImpl {
 private:
   /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
@@ -417,8 +420,13 @@ private:
 
 public:
   explicit FoldingSet(unsigned Log2InitSize = 6)
-  : FoldingSetImpl(Log2InitSize)
-  {}
+      : FoldingSetImpl(Log2InitSize) {}
+
+  FoldingSet(FoldingSet &&Arg) : FoldingSetImpl(std::move(Arg)) {}
+  FoldingSet &operator=(FoldingSet &&RHS) {
+    (void)FoldingSetImpl::operator=(std::move(RHS));
+    return *this;
+  }
 
   typedef FoldingSetIterator<T> iterator;
   iterator begin() { return iterator(Buckets); }
@@ -498,7 +506,6 @@ public:
 
   Ctx getContext() const { return Context; }
 
-
   typedef FoldingSetIterator<T> iterator;
   iterator begin() { return iterator(Buckets); }
   iterator end() { return iterator(Buckets+NumBuckets); }
@@ -614,9 +621,7 @@ public:
   }
 };
 
-
-template<class T>
-class FoldingSetIterator : public FoldingSetIteratorImpl {
+template <class T> class FoldingSetIterator : public FoldingSetIteratorImpl {
 public:
   explicit FoldingSetIterator(void **Bucket) : FoldingSetIteratorImpl(Bucket) {}
 
@@ -666,8 +671,7 @@ public:
   }
 };
 
-
-template<class T>
+template <class T>
 class FoldingSetBucketIterator : public FoldingSetBucketIteratorImpl {
 public:
   explicit FoldingSetBucketIterator(void **Bucket) :
@@ -694,6 +698,7 @@ public:
 template <typename T>
 class FoldingSetNodeWrapper : public FoldingSetNode {
   T data;
+
 public:
   template <typename... Ts>
   explicit FoldingSetNodeWrapper(Ts &&... Args)
@@ -716,12 +721,12 @@ public:
 /// information that would otherwise only be required for recomputing an ID.
 class FastFoldingSetNode : public FoldingSetNode {
   FoldingSetNodeID FastID;
+
 protected:
   explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
+
 public:
-  void Profile(FoldingSetNodeID &ID) const { 
-    ID.AddNodeID(FastID); 
-  }
+  void Profile(FoldingSetNodeID &ID) const { ID.AddNodeID(FastID); }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 748d3e4bf9ff..a1d26bd97045 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -28,7 +28,7 @@ class ImmutableListImpl : public FoldingSetNode {
   T Head;
   const ImmutableListImpl* Tail;
 
-  ImmutableListImpl(const T& head, const ImmutableListImpl* tail = 0)
+  ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr)
     : Head(head), Tail(tail) {}
 
   friend class ImmutableListFactory<T>;
@@ -72,7 +72,7 @@ public:
   // This constructor should normally only be called by ImmutableListFactory<T>.
   // There may be cases, however, when one needs to extract the internal pointer
   // and reconstruct a list object from that pointer.
-  ImmutableList(const ImmutableListImpl<T>* x = 0) : X(x) {}
+  ImmutableList(const ImmutableListImpl<T>* x = nullptr) : X(x) {}
 
   const ImmutableListImpl<T>* getInternalPointer() const {
     return X;
@@ -81,7 +81,7 @@ public:
   class iterator {
     const ImmutableListImpl<T>* L;
   public:
-    iterator() : L(0) {}
+    iterator() : L(nullptr) {}
     iterator(ImmutableList l) : L(l.getInternalPointer()) {}
 
     iterator& operator++() { L = L->getTail(); return *this; }
@@ -128,7 +128,7 @@ public:
   /// getTail - Returns the tail of the list, which is another (possibly empty)
   ///  ImmutableList.
   ImmutableList getTail() {
-    return X ? X->getTail() : 0;
+    return X ? X->getTail() : nullptr;
   }
 
   void Profile(FoldingSetNodeID& ID) const {
@@ -190,7 +190,7 @@ public:
   }
 
   ImmutableList<T> getEmptyList() const {
-    return ImmutableList<T>(0);
+    return ImmutableList<T>(nullptr);
   }
 
   ImmutableList<T> create(const T& X) {
@@ -226,4 +226,4 @@ struct isPodLike<ImmutableList<T> > { static const bool value = true; };
 
 } // end llvm namespace
 
-#endif
+#endif // LLVM_ADT_IMMUTABLELIST_H
diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h
index 438dec2333c5..7480cd73da61 100644
--- a/include/llvm/ADT/ImmutableMap.h
+++ b/include/llvm/ADT/ImmutableMap.h
@@ -55,7 +55,6 @@ struct ImutKeyValueInfo {
   }
 };
 
-
 template <typename KeyT, typename ValT,
           typename ValInfo = ImutKeyValueInfo<KeyT,ValT> >
 class ImmutableMap {
@@ -79,9 +78,11 @@ public:
   explicit ImmutableMap(const TreeTy* R) : Root(const_cast<TreeTy*>(R)) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableMap(const ImmutableMap &X) : Root(X.Root) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableMap &operator=(const ImmutableMap &X) {
     if (Root != X.Root) {
       if (X.Root) { X.Root->retain(); }
@@ -90,6 +91,7 @@ public:
     }
     return *this;
   }
+
   ~ImmutableMap() {
     if (Root) { Root->release(); }
   }
@@ -99,11 +101,10 @@ public:
     const bool Canonicalize;
 
   public:
-    Factory(bool canonicalize = true)
-      : Canonicalize(canonicalize) {}
-    
-    Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
-      : F(Alloc), Canonicalize(canonicalize) {}
+    Factory(bool canonicalize = true) : Canonicalize(canonicalize) {}
+
+    Factory(BumpPtrAllocator &Alloc, bool canonicalize = true)
+        : F(Alloc), Canonicalize(canonicalize) {}
 
     ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); }
 
@@ -143,14 +144,12 @@ public:
     return Root;
   }
 
-  TreeTy *getRootWithoutRetain() const {
-    return Root;
-  }
-  
+  TreeTy *getRootWithoutRetain() const { return Root; }
+
   void manualRetain() {
     if (Root) Root->retain();
   }
-  
+
   void manualRelease() {
     if (Root) Root->release();
   }
@@ -224,7 +223,7 @@ public:
 
     return nullptr;
   }
-  
+
   /// getMaxElement - Returns the <key,value> pair in the ImmutableMap for
   ///  which key is the highest in the ordering of keys in the map.  This
   ///  method returns NULL if the map is empty.
@@ -260,20 +259,21 @@ public:
   typedef typename ValInfo::data_type_ref   data_type_ref;
   typedef ImutAVLTree<ValInfo>              TreeTy;
   typedef typename TreeTy::Factory          FactoryTy;
-  
+
 protected:
   TreeTy *Root;
   FactoryTy *Factory;
-  
+
 public:
   /// Constructs a map from a pointer to a tree root.  In general one
   /// should use a Factory object to create maps instead of directly
   /// invoking the constructor, but there are cases where make this
   /// constructor public is useful.
-  explicit ImmutableMapRef(const TreeTy* R, FactoryTy *F) 
-    : Root(const_cast<TreeTy*>(R)),
-      Factory(F) {
-    if (Root) { Root->retain(); }
+  explicit ImmutableMapRef(const TreeTy *R, FactoryTy *F)
+      : Root(const_cast<TreeTy *>(R)), Factory(F) {
+    if (Root) {
+      Root->retain();
+    }
   }
 
   explicit ImmutableMapRef(const ImmutableMap<KeyT, ValT> &X,
@@ -282,21 +282,21 @@ public:
       Factory(F.getTreeFactory()) {
     if (Root) { Root->retain(); }
   }
-  
-  ImmutableMapRef(const ImmutableMapRef &X)
-    : Root(X.Root),
-      Factory(X.Factory) {
-    if (Root) { Root->retain(); }
+
+  ImmutableMapRef(const ImmutableMapRef &X) : Root(X.Root), Factory(X.Factory) {
+    if (Root) {
+      Root->retain();
+    }
   }
 
   ImmutableMapRef &operator=(const ImmutableMapRef &X) {
     if (Root != X.Root) {
       if (X.Root)
         X.Root->retain();
-      
+
       if (Root)
         Root->release();
-      
+
       Root = X.Root;
       Factory = X.Factory;
     }
@@ -307,7 +307,7 @@ public:
     if (Root)
       Root->release();
   }
-  
+
   static inline ImmutableMapRef getEmptyMap(FactoryTy *F) {
     return ImmutableMapRef(0, F);
   }
@@ -329,31 +329,34 @@ public:
     TreeTy *NewT = Factory->remove(Root, K);
     return ImmutableMapRef(NewT, Factory);
   }
-  
+
   bool contains(key_type_ref K) const {
     return Root ? Root->contains(K) : false;
   }
-  
+
   ImmutableMap<KeyT, ValT> asImmutableMap() const {
     return ImmutableMap<KeyT, ValT>(Factory->getCanonicalTree(Root));
   }
-  
+
   bool operator==(const ImmutableMapRef &RHS) const {
     return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
   }
-  
+
   bool operator!=(const ImmutableMapRef &RHS) const {
     return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
   }
-    
+
   bool isEmpty() const { return !Root; }
-    
+
   //===--------------------------------------------------===//
   // For testing.
   //===--------------------------------------------------===//
-  
-  void verify() const { if (Root) Root->verify(); }
-  
+
+  void verify() const {
+    if (Root)
+      Root->verify();
+  }
+
   //===--------------------------------------------------===//
   // Iterators.
   //===--------------------------------------------------===//
@@ -370,38 +373,36 @@ public:
 
   iterator begin() const { return iterator(Root); }
   iterator end() const { return iterator(); }
-  
-  data_type* lookup(key_type_ref K) const {
+
+  data_type *lookup(key_type_ref K) const {
     if (Root) {
       TreeTy* T = Root->find(K);
       if (T) return &T->getValue().second;
     }
-    
-    return 0;
+
+    return nullptr;
   }
-  
+
   /// getMaxElement - Returns the <key,value> pair in the ImmutableMap for
   ///  which key is the highest in the ordering of keys in the map.  This
   ///  method returns NULL if the map is empty.
   value_type* getMaxElement() const {
     return Root ? &(Root->getMaxElement()->getValue()) : 0;
   }
-  
+
   //===--------------------------------------------------===//
   // Utility methods.
   //===--------------------------------------------------===//
-  
+
   unsigned getHeight() const { return Root ? Root->getHeight() : 0; }
-  
-  static inline void Profile(FoldingSetNodeID& ID, const ImmutableMapRef &M) {
+
+  static inline void Profile(FoldingSetNodeID &ID, const ImmutableMapRef &M) {
     ID.AddPointer(M.Root);
   }
-  
-  inline void Profile(FoldingSetNodeID& ID) const {
-    return Profile(ID, *this);
-  }
+
+  inline void Profile(FoldingSetNodeID &ID) const { return Profile(ID, *this); }
 };
-  
+
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_IMMUTABLEMAP_H
diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h
index 65b2da793d7c..8057ec10be00 100644
--- a/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -154,7 +154,7 @@ public:
 
     template <class X>
     IntrusiveRefCntPtr(IntrusiveRefCntPtr<X>&& S) : Obj(S.get()) {
-      S.Obj = 0;
+      S.Obj = nullptr;
     }
 
     template <class X>
@@ -190,7 +190,7 @@ public:
     }
 
     void resetWithoutRelease() {
-      Obj = 0;
+      Obj = nullptr;
     }
 
   private:
diff --git a/include/llvm/ADT/Optional.h b/include/llvm/ADT/Optional.h
index 855ab890392e..d9acaf6d23b0 100644
--- a/include/llvm/ADT/Optional.h
+++ b/include/llvm/ADT/Optional.h
@@ -159,6 +159,25 @@ template <typename T> struct isPodLike<Optional<T> > {
 template<typename T, typename U>
 void operator==(const Optional<T> &X, const Optional<U> &Y);
 
+template<typename T>
+bool operator==(const Optional<T> &X, NoneType) {
+  return !X.hasValue();
+}
+
+template<typename T>
+bool operator==(NoneType, const Optional<T> &X) {
+  return X == None;
+}
+
+template<typename T>
+bool operator!=(const Optional<T> &X, NoneType) {
+  return !(X == None);
+}
+
+template<typename T>
+bool operator!=(NoneType, const Optional<T> &X) {
+  return X != None;
+}
 /// \brief Poison comparison between two \c Optional objects. Clients needs to
 /// explicitly compare the underlying values and account for empty \c Optional
 /// objects.
diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h
index 1ae2a77e7eaf..09267173fd77 100644
--- a/include/llvm/ADT/PackedVector.h
+++ b/include/llvm/ADT/PackedVector.h
@@ -83,9 +83,9 @@ public:
     PackedVector &Vec;
     const unsigned Idx;
 
-    reference();  // Undefined    
+    reference(); // Undefined
   public:
-    reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) { }    
+    reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) {}
 
     reference &operator=(T val) {
       Vec.setValue(Vec.Bits, Idx, val);
@@ -96,16 +96,16 @@ public:
     }
   };
 
-  PackedVector() { }
+  PackedVector() = default;
   explicit PackedVector(unsigned size) : Bits(size << (BitNum-1)) { }
 
   bool empty() const { return Bits.empty(); }
 
-  unsigned size() const { return Bits.size() >> (BitNum-1); }
-  
+  unsigned size() const { return Bits.size() >> (BitNum - 1); }
+
   void clear() { Bits.clear(); }
-  
-  void resize(unsigned N) { Bits.resize(N << (BitNum-1)); }
+
+  void resize(unsigned N) { Bits.resize(N << (BitNum - 1)); }
 
   void reserve(unsigned N) { Bits.reserve(N << (BitNum-1)); }
 
@@ -135,24 +135,14 @@ public:
     return Bits != RHS.Bits;
   }
 
-  const PackedVector &operator=(const PackedVector &RHS) {
-    Bits = RHS.Bits;
-    return *this;
-  }
-
   PackedVector &operator|=(const PackedVector &RHS) {
     Bits |= RHS.Bits;
     return *this;
   }
-
-  void swap(PackedVector &RHS) {
-    Bits.swap(RHS.Bits);
-  }
 };
 
-// Leave BitNum=0 undefined. 
-template <typename T>
-class PackedVector<T, 0>;
+// Leave BitNum=0 undefined.
+template <typename T> class PackedVector<T, 0>;
 
 } // end llvm namespace
 
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 45a40db85c04..0058d85d1ae4 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -21,8 +21,10 @@
 
 namespace llvm {
 
-template<typename T>
-struct DenseMapInfo;
+template <typename T> struct DenseMapInfo;
+
+template <typename PointerT, unsigned IntBits, typename PtrTraits>
+struct PointerIntPairInfo;
 
 /// PointerIntPair - This class implements a pair of a pointer and small
 /// integer.  It is designed to represent this in the space required by one
@@ -38,83 +40,35 @@ struct DenseMapInfo;
 ///   PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
 /// ... and the two bools will land in different bits.
 ///
-template <typename PointerTy, unsigned IntBits, typename IntType=unsigned,
-          typename PtrTraits = PointerLikeTypeTraits<PointerTy> >
+template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
+          typename PtrTraits = PointerLikeTypeTraits<PointerTy>,
+          typename Info = PointerIntPairInfo<PointerTy, IntBits, PtrTraits>>
 class PointerIntPair {
   intptr_t Value;
-  static_assert(PtrTraits::NumLowBitsAvailable <
-                std::numeric_limits<uintptr_t>::digits,
-                "cannot use a pointer type that has all bits free");
-  static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
-                "PointerIntPair with integer size too large for pointer");
-  enum : uintptr_t {
-    /// PointerBitMask - The bits that come from the pointer.
-    PointerBitMask =
-      ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable)-1),
 
-    /// IntShift - The number of low bits that we reserve for other uses, and
-    /// keep zero.
-    IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable-IntBits,
-    
-    /// IntMask - This is the unshifted mask for valid bits of the int type.
-    IntMask = (uintptr_t)(((intptr_t)1 << IntBits)-1),
-    
-    // ShiftedIntMask - This is the bits for the integer shifted in place.
-    ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
-  };
 public:
   PointerIntPair() : Value(0) {}
   PointerIntPair(PointerTy PtrVal, IntType IntVal) {
     setPointerAndInt(PtrVal, IntVal);
   }
-  explicit PointerIntPair(PointerTy PtrVal) {
-    initWithPointer(PtrVal);
-  }
+  explicit PointerIntPair(PointerTy PtrVal) { initWithPointer(PtrVal); }
 
-  PointerTy getPointer() const {
-    return PtrTraits::getFromVoidPointer(
-                         reinterpret_cast<void*>(Value & PointerBitMask));
-  }
+  PointerTy getPointer() const { return Info::getPointer(Value); }
 
-  IntType getInt() const {
-    return (IntType)((Value >> IntShift) & IntMask);
-  }
+  IntType getInt() const { return (IntType)Info::getInt(Value); }
 
   void setPointer(PointerTy PtrVal) {
-    intptr_t PtrWord
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
-    assert((PtrWord & ~PointerBitMask) == 0 &&
-           "Pointer is not sufficiently aligned");
-    // Preserve all low bits, just update the pointer.
-    Value = PtrWord | (Value & ~PointerBitMask);
+    Value = Info::updatePointer(Value, PtrVal);
   }
 
-  void setInt(IntType IntVal) {
-    intptr_t IntWord = static_cast<intptr_t>(IntVal);
-    assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
-    
-    // Preserve all bits other than the ones we are updating.
-    Value &= ~ShiftedIntMask;     // Remove integer field.
-    Value |= IntWord << IntShift;  // Set new integer.
-  }
+  void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); }
 
   void initWithPointer(PointerTy PtrVal) {
-    intptr_t PtrWord
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
-    assert((PtrWord & ~PointerBitMask) == 0 &&
-           "Pointer is not sufficiently aligned");
-    Value = PtrWord;
+    Value = Info::updatePointer(0, PtrVal);
   }
 
   void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
-    intptr_t PtrWord
-      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(PtrVal));
-    assert((PtrWord & ~PointerBitMask) == 0 &&
-           "Pointer is not sufficiently aligned");
-    intptr_t IntWord = static_cast<intptr_t>(IntVal);
-    assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
-
-    Value = PtrWord | (IntWord << IntShift);
+    Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal);
   }
 
   PointerTy const *getAddrOfPointer() const {
@@ -128,11 +82,15 @@ public:
     return reinterpret_cast<PointerTy *>(&Value);
   }
 
-  void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
-  void setFromOpaqueValue(void *Val) { Value = reinterpret_cast<intptr_t>(Val);}
+  void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
+  void setFromOpaqueValue(void *Val) {
+    Value = reinterpret_cast<intptr_t>(Val);
+  }
 
   static PointerIntPair getFromOpaqueValue(void *V) {
-    PointerIntPair P; P.setFromOpaqueValue(V); return P; 
+    PointerIntPair P;
+    P.setFromOpaqueValue(V);
+    return P;
   }
 
   // Allow PointerIntPairs to be created from const void * if and only if the
@@ -142,23 +100,81 @@ public:
     return getFromOpaqueValue(const_cast<void *>(V));
   }
 
-  bool operator==(const PointerIntPair &RHS) const {return Value == RHS.Value;}
-  bool operator!=(const PointerIntPair &RHS) const {return Value != RHS.Value;}
-  bool operator<(const PointerIntPair &RHS) const {return Value < RHS.Value;}
-  bool operator>(const PointerIntPair &RHS) const {return Value > RHS.Value;}
-  bool operator<=(const PointerIntPair &RHS) const {return Value <= RHS.Value;}
-  bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;}
+  bool operator==(const PointerIntPair &RHS) const {
+    return Value == RHS.Value;
+  }
+  bool operator!=(const PointerIntPair &RHS) const {
+    return Value != RHS.Value;
+  }
+  bool operator<(const PointerIntPair &RHS) const { return Value < RHS.Value; }
+  bool operator>(const PointerIntPair &RHS) const { return Value > RHS.Value; }
+  bool operator<=(const PointerIntPair &RHS) const {
+    return Value <= RHS.Value;
+  }
+  bool operator>=(const PointerIntPair &RHS) const {
+    return Value >= RHS.Value;
+  }
+};
+
+template <typename PointerT, unsigned IntBits, typename PtrTraits>
+struct PointerIntPairInfo {
+  static_assert(PtrTraits::NumLowBitsAvailable <
+                    std::numeric_limits<uintptr_t>::digits,
+                "cannot use a pointer type that has all bits free");
+  static_assert(IntBits <= PtrTraits::NumLowBitsAvailable,
+                "PointerIntPair with integer size too large for pointer");
+  enum : uintptr_t {
+    /// PointerBitMask - The bits that come from the pointer.
+    PointerBitMask =
+        ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable) - 1),
+
+    /// IntShift - The number of low bits that we reserve for other uses, and
+    /// keep zero.
+    IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable - IntBits,
+
+    /// IntMask - This is the unshifted mask for valid bits of the int type.
+    IntMask = (uintptr_t)(((intptr_t)1 << IntBits) - 1),
+
+    // ShiftedIntMask - This is the bits for the integer shifted in place.
+    ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
+  };
+
+  static PointerT getPointer(intptr_t Value) {
+    return PtrTraits::getFromVoidPointer(
+        reinterpret_cast<void *>(Value & PointerBitMask));
+  }
+
+  static intptr_t getInt(intptr_t Value) {
+    return (Value >> IntShift) & IntMask;
+  }
+
+  static intptr_t updatePointer(intptr_t OrigValue, PointerT Ptr) {
+    intptr_t PtrWord =
+        reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrWord & ~PointerBitMask) == 0 &&
+           "Pointer is not sufficiently aligned");
+    // Preserve all low bits, just update the pointer.
+    return PtrWord | (OrigValue & ~PointerBitMask);
+  }
+
+  static intptr_t updateInt(intptr_t OrigValue, intptr_t Int) {
+    intptr_t IntWord = static_cast<intptr_t>(Int);
+    assert((IntWord & ~IntMask) == 0 && "Integer too large for field");
+
+    // Preserve all bits other than the ones we are updating.
+    return (OrigValue & ~ShiftedIntMask) | IntWord << IntShift;
+  }
 };
 
 template <typename T> struct isPodLike;
-template<typename PointerTy, unsigned IntBits, typename IntType>
-struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType> > {
-   static const bool value = true;
+template <typename PointerTy, unsigned IntBits, typename IntType>
+struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType>> {
+  static const bool value = true;
 };
-  
+
 // Provide specialization of DenseMapInfo for PointerIntPair.
-template<typename PointerTy, unsigned IntBits, typename IntType>
-struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
+template <typename PointerTy, unsigned IntBits, typename IntType>
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType>> {
   typedef PointerIntPair<PointerTy, IntBits, IntType> Ty;
   static Ty getEmptyKey() {
     uintptr_t Val = static_cast<uintptr_t>(-1);
@@ -178,10 +194,10 @@ struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
 };
 
 // Teach SmallPtrSet that PointerIntPair is "basically a pointer".
-template<typename PointerTy, unsigned IntBits, typename IntType,
-         typename PtrTraits>
-class PointerLikeTypeTraits<PointerIntPair<PointerTy, IntBits, IntType,
-                                           PtrTraits> > {
+template <typename PointerTy, unsigned IntBits, typename IntType,
+          typename PtrTraits>
+class PointerLikeTypeTraits<
+    PointerIntPair<PointerTy, IntBits, IntType, PtrTraits>> {
 public:
   static inline void *
   getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
@@ -195,9 +211,7 @@ public:
   getFromVoidPointer(const void *P) {
     return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
   }
-  enum {
-    NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits
-  };
+  enum { NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits };
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h
index f27b81113ec5..6b3fe5749ad5 100644
--- a/include/llvm/ADT/PointerUnion.h
+++ b/include/llvm/ADT/PointerUnion.h
@@ -21,492 +21,454 @@
 
 namespace llvm {
 
-  template <typename T>
-  struct PointerUnionTypeSelectorReturn {
-    typedef T Return;
-  };
+template <typename T> struct PointerUnionTypeSelectorReturn {
+  typedef T Return;
+};
 
-  /// \brief Get a type based on whether two types are the same or not. For:
-  /// @code
-  /// typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
-  /// @endcode
-  /// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
-  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-  struct PointerUnionTypeSelector {
-    typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
-  };
+/// Get a type based on whether two types are the same or not.
+///
+/// For:
+/// 
+/// \code
+///   typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
+/// \endcode
+///
+/// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
+template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelector {
+  typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
+};
 
-  template <typename T, typename RET_EQ, typename RET_NE>
-  struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
-    typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
-  };
+template <typename T, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
+  typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
+};
 
-  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
-  struct PointerUnionTypeSelectorReturn<
-                            PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE> > {
-    typedef typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return
-        Return;
-  };
+template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+struct PointerUnionTypeSelectorReturn<
+    PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>> {
+  typedef
+      typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return Return;
+};
 
-  /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
-  /// for the two template arguments.
-  template <typename PT1, typename PT2>
-  class PointerUnionUIntTraits {
-  public:
-    static inline void *getAsVoidPointer(void *P) { return P; }
-    static inline void *getFromVoidPointer(void *P) { return P; }
-    enum {
-      PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
-      PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
-      NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
-    };
+/// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
+/// for the two template arguments.
+template <typename PT1, typename PT2> class PointerUnionUIntTraits {
+public:
+  static inline void *getAsVoidPointer(void *P) { return P; }
+  static inline void *getFromVoidPointer(void *P) { return P; }
+  enum {
+    PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
+    PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
+    NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
   };
+};
 
-  /// PointerUnion - This implements a discriminated union of two pointer types,
-  /// and keeps the discriminator bit-mangled into the low bits of the pointer.
-  /// This allows the implementation to be extremely efficient in space, but
-  /// permits a very natural and type-safe API.
+/// A discriminated union of two pointer types, with the discriminator in the
+/// low bit of the pointer.
+///
+/// This implementation is extremely efficient in space due to leveraging the
+/// low bits of the pointer, while exposing a natural and type-safe API.
+///
+/// Common use patterns would be something like this:
+///    PointerUnion<int*, float*> P;
+///    P = (int*)0;
+///    printf("%d %d", P.is<int*>(), P.is<float*>());  // prints "1 0"
+///    X = P.get<int*>();     // ok.
+///    Y = P.get<float*>();   // runtime assertion failure.
+///    Z = P.get<double*>();  // compile time failure.
+///    P = (float*)0;
+///    Y = P.get<float*>();   // ok.
+///    X = P.get<int*>();     // runtime assertion failure.
+template <typename PT1, typename PT2> class PointerUnion {
+public:
+  typedef PointerIntPair<void *, 1, bool, PointerUnionUIntTraits<PT1, PT2>>
+      ValTy;
+
+private:
+  ValTy Val;
+
+  struct IsPT1 {
+    static const int Num = 0;
+  };
+  struct IsPT2 {
+    static const int Num = 1;
+  };
+  template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
+
+public:
+  PointerUnion() {}
+
+  PointerUnion(PT1 V)
+      : Val(const_cast<void *>(
+            PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {}
+  PointerUnion(PT2 V)
+      : Val(const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)),
+            1) {}
+
+  /// Test if the pointer held in the union is null, regardless of
+  /// which type it is.
+  bool isNull() const {
+    // Convert from the void* to one of the pointer types, to make sure that
+    // we recursively strip off low bits if we have a nested PointerUnion.
+    return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
+  }
+  explicit operator bool() const { return !isNull(); }
+
+  /// Test if the Union currently holds the type matching T.
+  template <typename T> int is() const {
+    typedef typename ::llvm::PointerUnionTypeSelector<
+        PT1, T, IsPT1, ::llvm::PointerUnionTypeSelector<
+                           PT2, T, IsPT2, UNION_DOESNT_CONTAIN_TYPE<T>>>::Return
+        Ty;
+    int TyNo = Ty::Num;
+    return static_cast<int>(Val.getInt()) == TyNo;
+  }
+
+  /// Returns the value of the specified pointer type.
   ///
-  /// Common use patterns would be something like this:
-  ///    PointerUnion<int*, float*> P;
-  ///    P = (int*)0;
-  ///    printf("%d %d", P.is<int*>(), P.is<float*>());  // prints "1 0"
-  ///    X = P.get<int*>();     // ok.
-  ///    Y = P.get<float*>();   // runtime assertion failure.
-  ///    Z = P.get<double*>();  // compile time failure.
-  ///    P = (float*)0;
-  ///    Y = P.get<float*>();   // ok.
-  ///    X = P.get<int*>();     // runtime assertion failure.
-  template <typename PT1, typename PT2>
-  class PointerUnion {
-  public:
-    typedef PointerIntPair<void*, 1, bool,
-                           PointerUnionUIntTraits<PT1,PT2> > ValTy;
-  private:
-    ValTy Val;
+  /// If the specified pointer type is incorrect, assert.
+  template <typename T> T get() const {
+    assert(is<T>() && "Invalid accessor called");
+    return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
+  }
 
-    struct IsPT1 {
-      static const int Num = 0;
-    };
-    struct IsPT2 {
-      static const int Num = 1;
-    };
-    template <typename T>
-    struct UNION_DOESNT_CONTAIN_TYPE { };
+  /// Returns the current pointer if it is of the specified pointer type,
+  /// otherwises returns null.
+  template <typename T> T dyn_cast() const {
+    if (is<T>())
+      return get<T>();
+    return T();
+  }
 
-  public:
-    PointerUnion() {}
+  /// If the union is set to the first pointer type get an address pointing to
+  /// it.
+  PT1 const *getAddrOfPtr1() const {
+    return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
+  }
 
-    PointerUnion(PT1 V) : Val(
-      const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V))) {
-    }
-    PointerUnion(PT2 V) : Val(
-      const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)), 1) {
-    }
+  /// If the union is set to the first pointer type get an address pointing to
+  /// it.
+  PT1 *getAddrOfPtr1() {
+    assert(is<PT1>() && "Val is not the first pointer");
+    assert(
+        get<PT1>() == Val.getPointer() &&
+        "Can't get the address because PointerLikeTypeTraits changes the ptr");
+    return (PT1 *)Val.getAddrOfPointer();
+  }
 
-    /// isNull - Return true if the pointer held in the union is null,
-    /// regardless of which type it is.
-    bool isNull() const {
-      // Convert from the void* to one of the pointer types, to make sure that
-      // we recursively strip off low bits if we have a nested PointerUnion.
-      return !PointerLikeTypeTraits<PT1>::getFromVoidPointer(Val.getPointer());
-    }
-    explicit operator bool() const { return !isNull(); }
+  /// Assignment from nullptr which just clears the union.
+  const PointerUnion &operator=(std::nullptr_t) {
+    Val.initWithPointer(nullptr);
+    return *this;
+  }
 
-    /// is<T>() return true if the Union currently holds the type matching T.
-    template<typename T>
-    int is() const {
-      typedef typename
-        ::llvm::PointerUnionTypeSelector<PT1, T, IsPT1,
-          ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
-                                    UNION_DOESNT_CONTAIN_TYPE<T> > >::Return Ty;
-      int TyNo = Ty::Num;
-      return static_cast<int>(Val.getInt()) == TyNo;
-    }
-
-    /// get<T>() - Return the value of the specified pointer type. If the
-    /// specified pointer type is incorrect, assert.
-    template<typename T>
-    T get() const {
-      assert(is<T>() && "Invalid accessor called");
-      return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
-    }
-
-    /// dyn_cast<T>() - If the current value is of the specified pointer type,
-    /// return it, otherwise return null.
-    template<typename T>
-    T dyn_cast() const {
-      if (is<T>()) return get<T>();
-      return T();
-    }
-
-    /// \brief If the union is set to the first pointer type get an address
-    /// pointing to it.
-    PT1 const *getAddrOfPtr1() const {
-      return const_cast<PointerUnion *>(this)->getAddrOfPtr1();
-    }
-
-    /// \brief If the union is set to the first pointer type get an address
-    /// pointing to it.
-    PT1 *getAddrOfPtr1() {
-      assert(is<PT1>() && "Val is not the first pointer");
-      assert(get<PT1>() == Val.getPointer() &&
-         "Can't get the address because PointerLikeTypeTraits changes the ptr");
-      return (PT1 *)Val.getAddrOfPointer();
-    }
-
-    /// \brief Assignment from nullptr which just clears the union.
-    const PointerUnion &operator=(std::nullptr_t) {
-      Val.initWithPointer(nullptr);
-      return *this;
-    }
-
-    /// Assignment operators - Allow assigning into this union from either
-    /// pointer type, setting the discriminator to remember what it came from.
-    const PointerUnion &operator=(const PT1 &RHS) {
-      Val.initWithPointer(
-         const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
-      return *this;
-    }
-    const PointerUnion &operator=(const PT2 &RHS) {
-      Val.setPointerAndInt(
+  /// Assignment operators - Allow assigning into this union from either
+  /// pointer type, setting the discriminator to remember what it came from.
+  const PointerUnion &operator=(const PT1 &RHS) {
+    Val.initWithPointer(
+        const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
+    return *this;
+  }
+  const PointerUnion &operator=(const PT2 &RHS) {
+    Val.setPointerAndInt(
         const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)),
         1);
-      return *this;
-    }
+    return *this;
+  }
 
-    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
-    static inline PointerUnion getFromOpaqueValue(void *VP) {
-      PointerUnion V;
-      V.Val = ValTy::getFromOpaqueValue(VP);
-      return V;
-    }
+  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+  static inline PointerUnion getFromOpaqueValue(void *VP) {
+    PointerUnion V;
+    V.Val = ValTy::getFromOpaqueValue(VP);
+    return V;
+  }
+};
+
+template <typename PT1, typename PT2>
+static bool operator==(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+  return lhs.getOpaqueValue() == rhs.getOpaqueValue();
+}
+
+template <typename PT1, typename PT2>
+static bool operator!=(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+  return lhs.getOpaqueValue() != rhs.getOpaqueValue();
+}
+
+template <typename PT1, typename PT2>
+static bool operator<(PointerUnion<PT1, PT2> lhs, PointerUnion<PT1, PT2> rhs) {
+  return lhs.getOpaqueValue() < rhs.getOpaqueValue();
+}
+
+// Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits)-1.
+template <typename PT1, typename PT2>
+class PointerLikeTypeTraits<PointerUnion<PT1, PT2>> {
+public:
+  static inline void *getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
+    return P.getOpaqueValue();
+  }
+  static inline PointerUnion<PT1, PT2> getFromVoidPointer(void *P) {
+    return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
+  }
+
+  // The number of bits available are the min of the two pointer types.
+  enum {
+    NumLowBitsAvailable = PointerLikeTypeTraits<
+        typename PointerUnion<PT1, PT2>::ValTy>::NumLowBitsAvailable
   };
+};
 
-  template<typename PT1, typename PT2>
-  static bool operator==(PointerUnion<PT1, PT2> lhs,
-                         PointerUnion<PT1, PT2> rhs) {
-    return lhs.getOpaqueValue() == rhs.getOpaqueValue();
-  }
+/// A pointer union of three pointer types. See documentation for PointerUnion
+/// for usage.
+template <typename PT1, typename PT2, typename PT3> class PointerUnion3 {
+public:
+  typedef PointerUnion<PT1, PT2> InnerUnion;
+  typedef PointerUnion<InnerUnion, PT3> ValTy;
 
-  template<typename PT1, typename PT2>
-  static bool operator!=(PointerUnion<PT1, PT2> lhs,
-                         PointerUnion<PT1, PT2> rhs) {
-    return lhs.getOpaqueValue() != rhs.getOpaqueValue();
-  }
+private:
+  ValTy Val;
 
-  template<typename PT1, typename PT2>
-  static bool operator<(PointerUnion<PT1, PT2> lhs,
-                        PointerUnion<PT1, PT2> rhs) {
-    return lhs.getOpaqueValue() < rhs.getOpaqueValue();
-  }
-
-  // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
-  // # low bits available = min(PT1bits,PT2bits)-1.
-  template<typename PT1, typename PT2>
-  class PointerLikeTypeTraits<PointerUnion<PT1, PT2> > {
-  public:
-    static inline void *
-    getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
-      return P.getOpaqueValue();
-    }
-    static inline PointerUnion<PT1, PT2>
-    getFromVoidPointer(void *P) {
-      return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
-    }
-
-    // The number of bits available are the min of the two pointer types.
-    enum {
-      NumLowBitsAvailable =
-        PointerLikeTypeTraits<typename PointerUnion<PT1,PT2>::ValTy>
-          ::NumLowBitsAvailable
-    };
-  };
-
-
-  /// PointerUnion3 - This is a pointer union of three pointer types.  See
-  /// documentation for PointerUnion for usage.
-  template <typename PT1, typename PT2, typename PT3>
-  class PointerUnion3 {
-  public:
-    typedef PointerUnion<PT1, PT2> InnerUnion;
-    typedef PointerUnion<InnerUnion, PT3> ValTy;
-  private:
+  struct IsInnerUnion {
     ValTy Val;
-
-    struct IsInnerUnion {
-      ValTy Val;
-      IsInnerUnion(ValTy val) : Val(val) { }
-      template<typename T>
-      int is() const {
-        return Val.template is<InnerUnion>() &&
-               Val.template get<InnerUnion>().template is<T>();
-      }
-      template<typename T>
-      T get() const {
-        return Val.template get<InnerUnion>().template get<T>();
-      }
-    };
-
-    struct IsPT3 {
-      ValTy Val;
-      IsPT3(ValTy val) : Val(val) { }
-      template<typename T>
-      int is() const {
-        return Val.template is<T>();
-      }
-      template<typename T>
-      T get() const {
-        return Val.template get<T>();
-      }
-    };
-
-  public:
-    PointerUnion3() {}
-
-    PointerUnion3(PT1 V) {
-      Val = InnerUnion(V);
+    IsInnerUnion(ValTy val) : Val(val) {}
+    template <typename T> int is() const {
+      return Val.template is<InnerUnion>() &&
+             Val.template get<InnerUnion>().template is<T>();
     }
-    PointerUnion3(PT2 V) {
-      Val = InnerUnion(V);
-    }
-    PointerUnion3(PT3 V) {
-      Val = V;
-    }
-
-    /// isNull - Return true if the pointer held in the union is null,
-    /// regardless of which type it is.
-    bool isNull() const { return Val.isNull(); }
-    explicit operator bool() const { return !isNull(); }
-
-    /// is<T>() return true if the Union currently holds the type matching T.
-    template<typename T>
-    int is() const {
-      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
-      typedef typename
-        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
-          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
-                                                                   >::Return Ty;
-      return Ty(Val).template is<T>();
-    }
-
-    /// get<T>() - Return the value of the specified pointer type. If the
-    /// specified pointer type is incorrect, assert.
-    template<typename T>
-    T get() const {
-      assert(is<T>() && "Invalid accessor called");
-      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
-      typedef typename
-        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
-          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
-                                                                   >::Return Ty;
-      return Ty(Val).template get<T>();
-    }
-
-    /// dyn_cast<T>() - If the current value is of the specified pointer type,
-    /// return it, otherwise return null.
-    template<typename T>
-    T dyn_cast() const {
-      if (is<T>()) return get<T>();
-      return T();
-    }
-
-    /// \brief Assignment from nullptr which just clears the union.
-    const PointerUnion3 &operator=(std::nullptr_t) {
-      Val = nullptr;
-      return *this;
-    }
-
-    /// Assignment operators - Allow assigning into this union from either
-    /// pointer type, setting the discriminator to remember what it came from.
-    const PointerUnion3 &operator=(const PT1 &RHS) {
-      Val = InnerUnion(RHS);
-      return *this;
-    }
-    const PointerUnion3 &operator=(const PT2 &RHS) {
-      Val = InnerUnion(RHS);
-      return *this;
-    }
-    const PointerUnion3 &operator=(const PT3 &RHS) {
-      Val = RHS;
-      return *this;
-    }
-
-    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
-    static inline PointerUnion3 getFromOpaqueValue(void *VP) {
-      PointerUnion3 V;
-      V.Val = ValTy::getFromOpaqueValue(VP);
-      return V;
+    template <typename T> T get() const {
+      return Val.template get<InnerUnion>().template get<T>();
     }
   };
 
-  // Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
-  // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
-  template<typename PT1, typename PT2, typename PT3>
-  class PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3> > {
-  public:
-    static inline void *
-    getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
-      return P.getOpaqueValue();
-    }
-    static inline PointerUnion3<PT1, PT2, PT3>
-    getFromVoidPointer(void *P) {
-      return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
-    }
-
-    // The number of bits available are the min of the two pointer types.
-    enum {
-      NumLowBitsAvailable =
-        PointerLikeTypeTraits<typename PointerUnion3<PT1, PT2, PT3>::ValTy>
-          ::NumLowBitsAvailable
-    };
-  };
-
-  /// PointerUnion4 - This is a pointer union of four pointer types.  See
-  /// documentation for PointerUnion for usage.
-  template <typename PT1, typename PT2, typename PT3, typename PT4>
-  class PointerUnion4 {
-  public:
-    typedef PointerUnion<PT1, PT2> InnerUnion1;
-    typedef PointerUnion<PT3, PT4> InnerUnion2;
-    typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
-  private:
+  struct IsPT3 {
     ValTy Val;
-  public:
-    PointerUnion4() {}
-
-    PointerUnion4(PT1 V) {
-      Val = InnerUnion1(V);
-    }
-    PointerUnion4(PT2 V) {
-      Val = InnerUnion1(V);
-    }
-    PointerUnion4(PT3 V) {
-      Val = InnerUnion2(V);
-    }
-    PointerUnion4(PT4 V) {
-      Val = InnerUnion2(V);
-    }
-
-    /// isNull - Return true if the pointer held in the union is null,
-    /// regardless of which type it is.
-    bool isNull() const { return Val.isNull(); }
-    explicit operator bool() const { return !isNull(); }
-
-    /// is<T>() return true if the Union currently holds the type matching T.
-    template<typename T>
-    int is() const {
-      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
-      typedef typename
-        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
-          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
-                                                                   >::Return Ty;
-      return Val.template is<Ty>() &&
-             Val.template get<Ty>().template is<T>();
-    }
-
-    /// get<T>() - Return the value of the specified pointer type. If the
-    /// specified pointer type is incorrect, assert.
-    template<typename T>
-    T get() const {
-      assert(is<T>() && "Invalid accessor called");
-      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
-      typedef typename
-        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
-          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
-                                                                   >::Return Ty;
-      return Val.template get<Ty>().template get<T>();
-    }
-
-    /// dyn_cast<T>() - If the current value is of the specified pointer type,
-    /// return it, otherwise return null.
-    template<typename T>
-    T dyn_cast() const {
-      if (is<T>()) return get<T>();
-      return T();
-    }
-
-    /// \brief Assignment from nullptr which just clears the union.
-    const PointerUnion4 &operator=(std::nullptr_t) {
-      Val = nullptr;
-      return *this;
-    }
-
-    /// Assignment operators - Allow assigning into this union from either
-    /// pointer type, setting the discriminator to remember what it came from.
-    const PointerUnion4 &operator=(const PT1 &RHS) {
-      Val = InnerUnion1(RHS);
-      return *this;
-    }
-    const PointerUnion4 &operator=(const PT2 &RHS) {
-      Val = InnerUnion1(RHS);
-      return *this;
-    }
-    const PointerUnion4 &operator=(const PT3 &RHS) {
-      Val = InnerUnion2(RHS);
-      return *this;
-    }
-    const PointerUnion4 &operator=(const PT4 &RHS) {
-      Val = InnerUnion2(RHS);
-      return *this;
-    }
-
-    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
-    static inline PointerUnion4 getFromOpaqueValue(void *VP) {
-      PointerUnion4 V;
-      V.Val = ValTy::getFromOpaqueValue(VP);
-      return V;
-    }
+    IsPT3(ValTy val) : Val(val) {}
+    template <typename T> int is() const { return Val.template is<T>(); }
+    template <typename T> T get() const { return Val.template get<T>(); }
   };
 
-  // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
-  // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
-  template<typename PT1, typename PT2, typename PT3, typename PT4>
-  class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4> > {
-  public:
-    static inline void *
-    getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
-      return P.getOpaqueValue();
-    }
-    static inline PointerUnion4<PT1, PT2, PT3, PT4>
-    getFromVoidPointer(void *P) {
-      return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
-    }
+public:
+  PointerUnion3() {}
 
-    // The number of bits available are the min of the two pointer types.
-    enum {
-      NumLowBitsAvailable =
-        PointerLikeTypeTraits<typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>
-          ::NumLowBitsAvailable
-    };
+  PointerUnion3(PT1 V) { Val = InnerUnion(V); }
+  PointerUnion3(PT2 V) { Val = InnerUnion(V); }
+  PointerUnion3(PT3 V) { Val = V; }
+
+  /// Test if the pointer held in the union is null, regardless of
+  /// which type it is.
+  bool isNull() const { return Val.isNull(); }
+  explicit operator bool() const { return !isNull(); }
+
+  /// Test if the Union currently holds the type matching T.
+  template <typename T> int is() const {
+    // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+    typedef typename ::llvm::PointerUnionTypeSelector<
+        PT1, T, IsInnerUnion,
+        ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return
+        Ty;
+    return Ty(Val).template is<T>();
+  }
+
+  /// Returns the value of the specified pointer type.
+  ///
+  /// If the specified pointer type is incorrect, assert.
+  template <typename T> T get() const {
+    assert(is<T>() && "Invalid accessor called");
+    // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+    typedef typename ::llvm::PointerUnionTypeSelector<
+        PT1, T, IsInnerUnion,
+        ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3>>::Return
+        Ty;
+    return Ty(Val).template get<T>();
+  }
+
+  /// Returns the current pointer if it is of the specified pointer type,
+  /// otherwises returns null.
+  template <typename T> T dyn_cast() const {
+    if (is<T>())
+      return get<T>();
+    return T();
+  }
+
+  /// Assignment from nullptr which just clears the union.
+  const PointerUnion3 &operator=(std::nullptr_t) {
+    Val = nullptr;
+    return *this;
+  }
+
+  /// Assignment operators - Allow assigning into this union from either
+  /// pointer type, setting the discriminator to remember what it came from.
+  const PointerUnion3 &operator=(const PT1 &RHS) {
+    Val = InnerUnion(RHS);
+    return *this;
+  }
+  const PointerUnion3 &operator=(const PT2 &RHS) {
+    Val = InnerUnion(RHS);
+    return *this;
+  }
+  const PointerUnion3 &operator=(const PT3 &RHS) {
+    Val = RHS;
+    return *this;
+  }
+
+  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+  static inline PointerUnion3 getFromOpaqueValue(void *VP) {
+    PointerUnion3 V;
+    V.Val = ValTy::getFromOpaqueValue(VP);
+    return V;
+  }
+};
+
+// Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+template <typename PT1, typename PT2, typename PT3>
+class PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3>> {
+public:
+  static inline void *getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
+    return P.getOpaqueValue();
+  }
+  static inline PointerUnion3<PT1, PT2, PT3> getFromVoidPointer(void *P) {
+    return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
+  }
+
+  // The number of bits available are the min of the two pointer types.
+  enum {
+    NumLowBitsAvailable = PointerLikeTypeTraits<
+        typename PointerUnion3<PT1, PT2, PT3>::ValTy>::NumLowBitsAvailable
   };
+};
 
-  // Teach DenseMap how to use PointerUnions as keys.
-  template<typename T, typename U>
-  struct DenseMapInfo<PointerUnion<T, U> > {
-    typedef PointerUnion<T, U> Pair;
-    typedef DenseMapInfo<T> FirstInfo;
-    typedef DenseMapInfo<U> SecondInfo;
+/// A pointer union of four pointer types. See documentation for PointerUnion
+/// for usage.
+template <typename PT1, typename PT2, typename PT3, typename PT4>
+class PointerUnion4 {
+public:
+  typedef PointerUnion<PT1, PT2> InnerUnion1;
+  typedef PointerUnion<PT3, PT4> InnerUnion2;
+  typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
 
-    static inline Pair getEmptyKey() {
-      return Pair(FirstInfo::getEmptyKey());
-    }
-    static inline Pair getTombstoneKey() {
-      return Pair(FirstInfo::getTombstoneKey());
-    }
-    static unsigned getHashValue(const Pair &PairVal) {
-      intptr_t key = (intptr_t)PairVal.getOpaqueValue();
-      return DenseMapInfo<intptr_t>::getHashValue(key);
-    }
-    static bool isEqual(const Pair &LHS, const Pair &RHS) {
-      return LHS.template is<T>() == RHS.template is<T>() &&
-             (LHS.template is<T>() ?
-              FirstInfo::isEqual(LHS.template get<T>(),
-                                 RHS.template get<T>()) :
-              SecondInfo::isEqual(LHS.template get<U>(),
-                                  RHS.template get<U>()));
-    }
+private:
+  ValTy Val;
+
+public:
+  PointerUnion4() {}
+
+  PointerUnion4(PT1 V) { Val = InnerUnion1(V); }
+  PointerUnion4(PT2 V) { Val = InnerUnion1(V); }
+  PointerUnion4(PT3 V) { Val = InnerUnion2(V); }
+  PointerUnion4(PT4 V) { Val = InnerUnion2(V); }
+
+  /// Test if the pointer held in the union is null, regardless of
+  /// which type it is.
+  bool isNull() const { return Val.isNull(); }
+  explicit operator bool() const { return !isNull(); }
+
+  /// Test if the Union currently holds the type matching T.
+  template <typename T> int is() const {
+    // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+    typedef typename ::llvm::PointerUnionTypeSelector<
+        PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector<
+                                 PT2, T, InnerUnion1, InnerUnion2>>::Return Ty;
+    return Val.template is<Ty>() && Val.template get<Ty>().template is<T>();
+  }
+
+  /// Returns the value of the specified pointer type.
+  ///
+  /// If the specified pointer type is incorrect, assert.
+  template <typename T> T get() const {
+    assert(is<T>() && "Invalid accessor called");
+    // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+    typedef typename ::llvm::PointerUnionTypeSelector<
+        PT1, T, InnerUnion1, ::llvm::PointerUnionTypeSelector<
+                                 PT2, T, InnerUnion1, InnerUnion2>>::Return Ty;
+    return Val.template get<Ty>().template get<T>();
+  }
+
+  /// Returns the current pointer if it is of the specified pointer type,
+  /// otherwises returns null.
+  template <typename T> T dyn_cast() const {
+    if (is<T>())
+      return get<T>();
+    return T();
+  }
+
+  /// Assignment from nullptr which just clears the union.
+  const PointerUnion4 &operator=(std::nullptr_t) {
+    Val = nullptr;
+    return *this;
+  }
+
+  /// Assignment operators - Allow assigning into this union from either
+  /// pointer type, setting the discriminator to remember what it came from.
+  const PointerUnion4 &operator=(const PT1 &RHS) {
+    Val = InnerUnion1(RHS);
+    return *this;
+  }
+  const PointerUnion4 &operator=(const PT2 &RHS) {
+    Val = InnerUnion1(RHS);
+    return *this;
+  }
+  const PointerUnion4 &operator=(const PT3 &RHS) {
+    Val = InnerUnion2(RHS);
+    return *this;
+  }
+  const PointerUnion4 &operator=(const PT4 &RHS) {
+    Val = InnerUnion2(RHS);
+    return *this;
+  }
+
+  void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+  static inline PointerUnion4 getFromOpaqueValue(void *VP) {
+    PointerUnion4 V;
+    V.Val = ValTy::getFromOpaqueValue(VP);
+    return V;
+  }
+};
+
+// Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
+// # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+template <typename PT1, typename PT2, typename PT3, typename PT4>
+class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4>> {
+public:
+  static inline void *
+  getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
+    return P.getOpaqueValue();
+  }
+  static inline PointerUnion4<PT1, PT2, PT3, PT4> getFromVoidPointer(void *P) {
+    return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
+  }
+
+  // The number of bits available are the min of the two pointer types.
+  enum {
+    NumLowBitsAvailable = PointerLikeTypeTraits<
+        typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>::NumLowBitsAvailable
   };
+};
+
+// Teach DenseMap how to use PointerUnions as keys.
+template <typename T, typename U> struct DenseMapInfo<PointerUnion<T, U>> {
+  typedef PointerUnion<T, U> Pair;
+  typedef DenseMapInfo<T> FirstInfo;
+  typedef DenseMapInfo<U> SecondInfo;
+
+  static inline Pair getEmptyKey() { return Pair(FirstInfo::getEmptyKey()); }
+  static inline Pair getTombstoneKey() {
+    return Pair(FirstInfo::getTombstoneKey());
+  }
+  static unsigned getHashValue(const Pair &PairVal) {
+    intptr_t key = (intptr_t)PairVal.getOpaqueValue();
+    return DenseMapInfo<intptr_t>::getHashValue(key);
+  }
+  static bool isEqual(const Pair &LHS, const Pair &RHS) {
+    return LHS.template is<T>() == RHS.template is<T>() &&
+           (LHS.template is<T>() ? FirstInfo::isEqual(LHS.template get<T>(),
+                                                      RHS.template get<T>())
+                                 : SecondInfo::isEqual(LHS.template get<U>(),
+                                                       RHS.template get<U>()));
+  }
+};
+
 }
 
 #endif
diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h
index 759a2db24f2a..ce343a161b7b 100644
--- a/include/llvm/ADT/PostOrderIterator.h
+++ b/include/llvm/ADT/PostOrderIterator.h
@@ -215,8 +215,8 @@ struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External > {
 };
 
 template <class T>
-ipo_iterator<T> ipo_begin(const T &G, bool Reverse = false) {
-  return ipo_iterator<T>::begin(G, Reverse);
+ipo_iterator<T> ipo_begin(const T &G) {
+  return ipo_iterator<T>::begin(G);
 }
 
 template <class T>
@@ -225,8 +225,8 @@ ipo_iterator<T> ipo_end(const T &G){
 }
 
 template <class T>
-iterator_range<ipo_iterator<T>> inverse_post_order(const T &G, bool Reverse = false) {
-  return make_range(ipo_begin(G, Reverse), ipo_end(G));
+iterator_range<ipo_iterator<T>> inverse_post_order(const T &G) {
+  return make_range(ipo_begin(G), ipo_end(G));
 }
 
 // Provide global definitions of external inverse postorder iterators...
diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h
index b68345a1dcf6..d4360fa8d218 100644
--- a/include/llvm/ADT/STLExtras.h
+++ b/include/llvm/ADT/STLExtras.h
@@ -196,6 +196,41 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(const ItTy &I, FuncTy F) {
   return mapped_iterator<ItTy, FuncTy>(I, F);
 }
 
+/// \brief Metafunction to determine if type T has a member called rbegin().
+template <typename T> struct has_rbegin {
+  template <typename U> static char(&f(const U &, decltype(&U::rbegin)))[1];
+  static char(&f(...))[2];
+  const static bool value = sizeof(f(std::declval<T>(), nullptr)) == 1;
+};
+
+// Returns an iterator_range over the given container which iterates in reverse.
+// Note that the container must have rbegin()/rend() methods for this to work.
+template <typename ContainerTy>
+auto reverse(ContainerTy &&C,
+             typename std::enable_if<has_rbegin<ContainerTy>::value>::type * =
+                 nullptr) -> decltype(make_range(C.rbegin(), C.rend())) {
+  return make_range(C.rbegin(), C.rend());
+}
+
+// Returns a std::reverse_iterator wrapped around the given iterator.
+template <typename IteratorTy>
+std::reverse_iterator<IteratorTy> make_reverse_iterator(IteratorTy It) {
+  return std::reverse_iterator<IteratorTy>(It);
+}
+
+// Returns an iterator_range over the given container which iterates in reverse.
+// Note that the container must have begin()/end() methods which return
+// bidirectional iterators for this to work.
+template <typename ContainerTy>
+auto reverse(
+    ContainerTy &&C,
+    typename std::enable_if<!has_rbegin<ContainerTy>::value>::type * = nullptr)
+    -> decltype(make_range(llvm::make_reverse_iterator(std::end(C)),
+                           llvm::make_reverse_iterator(std::begin(C)))) {
+  return make_range(llvm::make_reverse_iterator(std::end(C)),
+                    llvm::make_reverse_iterator(std::begin(C)));
+}
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <utility>
 //===----------------------------------------------------------------------===//
@@ -329,13 +364,28 @@ void DeleteContainerSeconds(Container &C) {
 }
 
 /// Provide wrappers to std::all_of which take ranges instead of having to pass
-/// being/end explicitly.
+/// begin/end explicitly.
 template<typename R, class UnaryPredicate>
 bool all_of(R &&Range, UnaryPredicate &&P) {
   return std::all_of(Range.begin(), Range.end(),
                      std::forward<UnaryPredicate>(P));
 }
 
+/// Provide wrappers to std::any_of which take ranges instead of having to pass
+/// begin/end explicitly.
+template <typename R, class UnaryPredicate>
+bool any_of(R &&Range, UnaryPredicate &&P) {
+  return std::any_of(Range.begin(), Range.end(),
+                     std::forward<UnaryPredicate>(P));
+}
+
+/// Provide wrappers to std::find which take ranges instead of having to pass
+/// begin/end explicitly.
+template<typename R, class T>
+auto find(R &&Range, const T &val) -> decltype(Range.begin()) {
+  return std::find(Range.begin(), Range.end(), val);
+}
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <memory>
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h
index 5abe76c12259..4af3d6d37e33 100644
--- a/include/llvm/ADT/ScopedHashTable.h
+++ b/include/llvm/ADT/ScopedHashTable.h
@@ -1,4 +1,4 @@
-//===- ScopedHashTable.h - A simple scoped hash table ---------------------===//
+//===- ScopedHashTable.h - A simple scoped hash table -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -47,8 +47,8 @@ class ScopedHashTableVal {
   K Key;
   V Val;
   ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {}
-public:
 
+public:
   const K &getKey() const { return Key; }
   const V &getValue() const { return Val; }
   V &getValue() { return Val; }
@@ -56,7 +56,7 @@ public:
   ScopedHashTableVal *getNextForKey() { return NextForKey; }
   const ScopedHashTableVal *getNextForKey() const { return NextForKey; }
   ScopedHashTableVal *getNextInScope() { return NextInScope; }
-  
+
   template <typename AllocatorTy>
   static ScopedHashTableVal *Create(ScopedHashTableVal *nextInScope,
                                     ScopedHashTableVal *nextForKey,
@@ -66,12 +66,11 @@ public:
     // Set up the value.
     new (New) ScopedHashTableVal(key, val);
     New->NextInScope = nextInScope;
-    New->NextForKey = nextForKey; 
+    New->NextForKey = nextForKey;
     return New;
   }
-  
-  template <typename AllocatorTy>
-  void Destroy(AllocatorTy &Allocator) {
+
+  template <typename AllocatorTy> void Destroy(AllocatorTy &Allocator) {
     // Free memory referenced by the item.
     this->~ScopedHashTableVal();
     Allocator.Deallocate(this);
@@ -90,15 +89,16 @@ class ScopedHashTableScope {
   /// LastValInScope - This is the last value that was inserted for this scope
   /// or null if none have been inserted yet.
   ScopedHashTableVal<K, V> *LastValInScope;
-  void operator=(ScopedHashTableScope&) = delete;
-  ScopedHashTableScope(ScopedHashTableScope&) = delete;
+  void operator=(ScopedHashTableScope &) = delete;
+  ScopedHashTableScope(ScopedHashTableScope &) = delete;
+
 public:
   ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
   ~ScopedHashTableScope();
 
   ScopedHashTableScope *getParentScope() { return PrevScope; }
   const ScopedHashTableScope *getParentScope() const { return PrevScope; }
-  
+
 private:
   friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
   ScopedHashTableVal<K, V> *getLastValInScope() {
@@ -109,10 +109,10 @@ private:
   }
 };
 
-
-template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>>
 class ScopedHashTableIterator {
   ScopedHashTableVal<K, V> *Node;
+
 public:
   ScopedHashTableIterator(ScopedHashTableVal<K, V> *node) : Node(node) {}
 
@@ -141,7 +141,6 @@ public:
   }
 };
 
-
 template <typename K, typename V, typename KInfo, typename AllocatorTy>
 class ScopedHashTable {
 public:
@@ -149,23 +148,24 @@ public:
   /// to the name of the scope for this hash table.
   typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
   typedef unsigned size_type;
+
 private:
   typedef ScopedHashTableVal<K, V> ValTy;
   DenseMap<K, ValTy*, KInfo> TopLevelMap;
   ScopeTy *CurScope;
-  
+
   AllocatorTy Allocator;
-  
-  ScopedHashTable(const ScopedHashTable&); // NOT YET IMPLEMENTED
-  void operator=(const ScopedHashTable&);  // NOT YET IMPLEMENTED
+
+  ScopedHashTable(const ScopedHashTable &); // NOT YET IMPLEMENTED
+  void operator=(const ScopedHashTable &);  // NOT YET IMPLEMENTED
   friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
+
 public:
   ScopedHashTable() : CurScope(nullptr) {}
   ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {}
   ~ScopedHashTable() {
     assert(!CurScope && TopLevelMap.empty() && "Scope imbalance!");
   }
-  
 
   /// Access to the allocator.
   AllocatorTy &getAllocator() { return Allocator; }
@@ -180,7 +180,7 @@ public:
     typename DenseMap<K, ValTy*, KInfo>::iterator I = TopLevelMap.find(Key);
     if (I != TopLevelMap.end())
       return I->second->getValue();
-      
+
     return V();
   }
 
@@ -198,7 +198,7 @@ public:
     if (I == TopLevelMap.end()) return end();
     return iterator(I->second);
   }
-  
+
   ScopeTy *getCurScope() { return CurScope; }
   const ScopeTy *getCurScope() const { return CurScope; }
 
diff --git a/include/llvm/ADT/SetOperations.h b/include/llvm/ADT/SetOperations.h
index 71f5db380f6e..7c9f2fbe066e 100644
--- a/include/llvm/ADT/SetOperations.h
+++ b/include/llvm/ADT/SetOperations.h
@@ -39,7 +39,7 @@ bool set_union(S1Ty &S1, const S2Ty &S2) {
 template <class S1Ty, class S2Ty>
 void set_intersect(S1Ty &S1, const S2Ty &S2) {
    for (typename S1Ty::iterator I = S1.begin(); I != S1.end();) {
-     const typename S1Ty::key_type &E = *I;
+     const auto &E = *I;
      ++I;
      if (!S2.count(E)) S1.erase(E);   // Erase element if not in S2
    }
diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h
index a7fd408c854a..bc563570c203 100644
--- a/include/llvm/ADT/SetVector.h
+++ b/include/llvm/ADT/SetVector.h
@@ -20,6 +20,7 @@
 #ifndef LLVM_ADT_SETVECTOR_H
 #define LLVM_ADT_SETVECTOR_H
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include <algorithm>
 #include <cassert>
@@ -33,7 +34,7 @@ namespace llvm {
 /// property of a deterministic iteration order. The order of iteration is the
 /// order of insertion.
 template <typename T, typename Vector = std::vector<T>,
-                      typename Set = SmallSet<T, 16> >
+          typename Set = DenseSet<T>>
 class SetVector {
 public:
   typedef T value_type;
@@ -44,6 +45,8 @@ public:
   typedef Vector vector_type;
   typedef typename vector_type::const_iterator iterator;
   typedef typename vector_type::const_iterator const_iterator;
+  typedef typename vector_type::const_reverse_iterator reverse_iterator;
+  typedef typename vector_type::const_reverse_iterator const_reverse_iterator;
   typedef typename vector_type::size_type size_type;
 
   /// \brief Construct an empty SetVector
@@ -55,6 +58,8 @@ public:
     insert(Start, End);
   }
 
+  ArrayRef<T> getArrayRef() const { return vector_; }
+
   /// \brief Determine if the SetVector is empty or not.
   bool empty() const {
     return vector_.empty();
@@ -85,6 +90,26 @@ public:
     return vector_.end();
   }
 
+  /// \brief Get an reverse_iterator to the end of the SetVector.
+  reverse_iterator rbegin() {
+    return vector_.rbegin();
+  }
+
+  /// \brief Get a const_reverse_iterator to the end of the SetVector.
+  const_reverse_iterator rbegin() const {
+    return vector_.rbegin();
+  }
+
+  /// \brief Get a reverse_iterator to the beginning of the SetVector.
+  reverse_iterator rend() {
+    return vector_.rend();
+  }
+
+  /// \brief Get a const_reverse_iterator to the beginning of the SetVector.
+  const_reverse_iterator rend() const {
+    return vector_.rend();
+  }
+
   /// \brief Return the last element of the SetVector.
   const T &back() const {
     assert(!empty() && "Cannot call back() on empty SetVector!");
@@ -150,7 +175,6 @@ public:
     return true;
   }
 
-
   /// \brief Count the number of elements of a given key in the SetVector.
   /// \returns 0 if the element is not in the SetVector, 1 if it is.
   size_type count(const key_type &key) const {
@@ -169,7 +193,7 @@ public:
     set_.erase(back());
     vector_.pop_back();
   }
-  
+
   T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() {
     T Ret = back();
     pop_back();
diff --git a/include/llvm/ADT/SmallBitVector.h b/include/llvm/ADT/SmallBitVector.h
index ae3d645396fd..4aa3bc217f41 100644
--- a/include/llvm/ADT/SmallBitVector.h
+++ b/include/llvm/ADT/SmallBitVector.h
@@ -551,19 +551,18 @@ public:
   }
 
 private:
-  template<bool AddBits, bool InvertMask>
+  template <bool AddBits, bool InvertMask>
   void applyMask(const uint32_t *Mask, unsigned MaskWords) {
-    if (NumBaseBits == 64 && MaskWords >= 2) {
-      uint64_t M = Mask[0] | (uint64_t(Mask[1]) << 32);
-      if (InvertMask) M = ~M;
-      if (AddBits) setSmallBits(getSmallBits() | M);
-      else         setSmallBits(getSmallBits() & ~M);
-    } else {
-      uint32_t M = Mask[0];
-      if (InvertMask) M = ~M;
-      if (AddBits) setSmallBits(getSmallBits() | M);
-      else         setSmallBits(getSmallBits() & ~M);
-    }
+    assert(MaskWords <= sizeof(uintptr_t) && "Mask is larger than base!");
+    uintptr_t M = Mask[0];
+    if (NumBaseBits == 64)
+      M |= uint64_t(Mask[1]) << 32;
+    if (InvertMask)
+      M = ~M;
+    if (AddBits)
+      setSmallBits(getSmallBits() | M);
+    else
+      setSmallBits(getSmallBits() & ~M);
   }
 };
 
diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h
index 3e3c9c154ef4..3d98e8fac43b 100644
--- a/include/llvm/ADT/SmallPtrSet.h
+++ b/include/llvm/ADT/SmallPtrSet.h
@@ -48,6 +48,7 @@ class SmallPtrSetIteratorImpl;
 ///
 class SmallPtrSetImplBase {
   friend class SmallPtrSetIteratorImpl;
+
 protected:
   /// SmallArray - Points to a fixed size set of buckets, used in 'small mode'.
   const void **SmallArray;
@@ -133,6 +134,7 @@ private:
   void Grow(unsigned NewSize);
 
   void operator=(const SmallPtrSetImplBase &RHS) = delete;
+
 protected:
   /// swap - Swaps the elements of two sets.
   /// Note: This method assumes that both sets have the same small size.
@@ -148,6 +150,7 @@ class SmallPtrSetIteratorImpl {
 protected:
   const void *const *Bucket;
   const void *const *End;
+
 public:
   explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
     : Bucket(BP), End(E) {
@@ -178,14 +181,14 @@ protected:
 template<typename PtrTy>
 class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
   typedef PointerLikeTypeTraits<PtrTy> PtrTraits;
-  
+
 public:
   typedef PtrTy                     value_type;
   typedef PtrTy                     reference;
   typedef PtrTy                     pointer;
   typedef std::ptrdiff_t            difference_type;
   typedef std::forward_iterator_tag iterator_category;
-  
+
   explicit SmallPtrSetIterator(const void *const *BP, const void *const *E)
     : SmallPtrSetIteratorImpl(BP, E) {}
 
@@ -231,7 +234,6 @@ template<unsigned N>
 struct RoundUpToPowerOfTwo {
   enum { Val = RoundUpToPowerOfTwoH<N, (N&(N-1)) == 0>::Val };
 };
-  
 
 /// \brief A templated base class for \c SmallPtrSet which provides the
 /// typesafe interface that is common across all small sizes.
@@ -242,7 +244,8 @@ template <typename PtrType>
 class SmallPtrSetImpl : public SmallPtrSetImplBase {
   typedef PointerLikeTypeTraits<PtrType> PtrTraits;
 
-  SmallPtrSetImpl(const SmallPtrSetImpl&) = delete;
+  SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
+
 protected:
   // Constructors that forward to the base.
   SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl &that)
@@ -303,6 +306,7 @@ class SmallPtrSet : public SmallPtrSetImpl<PtrType> {
   enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
   /// SmallStorage - Fixed size storage used in 'small mode'.
   const void *SmallStorage[SmallSizePowTwo];
+
 public:
   SmallPtrSet() : BaseT(SmallStorage, SmallSizePowTwo) {}
   SmallPtrSet(const SmallPtrSet &that) : BaseT(SmallStorage, that) {}
@@ -333,7 +337,6 @@ public:
     SmallPtrSetImplBase::swap(RHS);
   }
 };
-
 }
 
 namespace std {
diff --git a/include/llvm/ADT/SmallSet.h b/include/llvm/ADT/SmallSet.h
index bc6493554c8b..39a57b87b2a7 100644
--- a/include/llvm/ADT/SmallSet.h
+++ b/include/llvm/ADT/SmallSet.h
@@ -37,6 +37,7 @@ class SmallSet {
   std::set<T, C> Set;
   typedef typename SmallVector<T, N>::const_iterator VIterator;
   typedef typename SmallVector<T, N>::iterator mutable_iterator;
+
 public:
   typedef size_t size_type;
   SmallSet() {}
@@ -92,7 +93,7 @@ public:
     for (; I != E; ++I)
       insert(*I);
   }
-  
+
   bool erase(const T &V) {
     if (!isSmall())
       return Set.erase(V);
@@ -108,6 +109,7 @@ public:
     Vector.clear();
     Set.clear();
   }
+
 private:
   bool isSmall() const { return Set.empty(); }
 
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index b9384702c3ba..d1062acbbb61 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -109,9 +109,13 @@ public:
   typedef const T *const_pointer;
 
   // forward iterator creation methods.
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   iterator begin() { return (iterator)this->BeginX; }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_iterator begin() const { return (const_iterator)this->BeginX; }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   iterator end() { return (iterator)this->EndX; }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_iterator end() const { return (const_iterator)this->EndX; }
 protected:
   iterator capacity_ptr() { return (iterator)this->CapacityX; }
@@ -124,6 +128,7 @@ public:
   reverse_iterator rend()              { return reverse_iterator(begin()); }
   const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   size_type size() const { return end()-begin(); }
   size_type max_size() const { return size_type(-1) / sizeof(T); }
 
@@ -135,10 +140,12 @@ public:
   /// Return a pointer to the vector's buffer, even if empty().
   const_pointer data() const { return const_pointer(begin()); }
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   reference operator[](size_type idx) {
     assert(idx < size());
     return begin()[idx];
   }
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_reference operator[](size_type idx) const {
     assert(idx < size());
     return begin()[idx];
diff --git a/include/llvm/ADT/SparseBitVector.h b/include/llvm/ADT/SparseBitVector.h
index 20cbe2cddfc2..e6e72413da4e 100644
--- a/include/llvm/ADT/SparseBitVector.h
+++ b/include/llvm/ADT/SparseBitVector.h
@@ -39,7 +39,6 @@ namespace llvm {
 /// etc) do not perform as well in practice as a linked list with this iterator
 /// kept up to date.  They are also significantly more memory intensive.
 
-
 template <unsigned ElementSize = 128>
 struct SparseBitVectorElement
   : public ilist_node<SparseBitVectorElement<ElementSize> > {
@@ -204,6 +203,7 @@ public:
     BecameZero = allzero;
     return changed;
   }
+
   // Intersect this Element with the complement of RHS and return true if this
   // one changed.  BecameZero is set to true if this element became all-zero
   // bits.
@@ -226,6 +226,7 @@ public:
     BecameZero = allzero;
     return changed;
   }
+
   // Three argument version of intersectWithComplement that intersects
   // RHS1 & ~RHS2 into this element
   void intersectWithComplement(const SparseBitVectorElement &RHS1,
@@ -408,12 +409,13 @@ class SparseBitVector {
       // bitmap.
       return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber;
     }
+
     bool operator!=(const SparseBitVectorIterator &RHS) const {
       return !(*this == RHS);
     }
-    SparseBitVectorIterator(): BitVector(NULL) {
-    }
 
+    SparseBitVectorIterator(): BitVector(nullptr) {
+    }
 
     SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
                             bool end = false):BitVector(RHS) {
@@ -453,6 +455,9 @@ public:
 
   // Assignment
   SparseBitVector& operator=(const SparseBitVector& RHS) {
+    if (this == &RHS)
+      return *this;
+
     Elements.clear();
 
     ElementListConstIter ElementIter = RHS.Elements.begin();
@@ -559,6 +564,9 @@ public:
 
   // Union our bitmap with the RHS and return true if we changed.
   bool operator|=(const SparseBitVector &RHS) {
+    if (this == &RHS)
+      return false;
+
     bool changed = false;
     ElementListIter Iter1 = Elements.begin();
     ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -587,6 +595,9 @@ public:
 
   // Intersect our bitmap with the RHS and return true if ours changed.
   bool operator&=(const SparseBitVector &RHS) {
+    if (this == &RHS)
+      return false;
+
     bool changed = false;
     ElementListIter Iter1 = Elements.begin();
     ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -619,9 +630,13 @@ public:
         ElementListIter IterTmp = Iter1;
         ++Iter1;
         Elements.erase(IterTmp);
+        changed = true;
       }
     }
-    Elements.erase(Iter1, Elements.end());
+    if (Iter1 != Elements.end()) {
+      Elements.erase(Iter1, Elements.end());
+      changed = true;
+    }
     CurrElementIter = Elements.begin();
     return changed;
   }
@@ -629,6 +644,14 @@ public:
   // Intersect our bitmap with the complement of the RHS and return true
   // if ours changed.
   bool intersectWithComplement(const SparseBitVector &RHS) {
+    if (this == &RHS) {
+      if (!empty()) {
+        clear();
+        return true;
+      }
+      return false;
+    }
+
     bool changed = false;
     ElementListIter Iter1 = Elements.begin();
     ElementListConstIter Iter2 = RHS.Elements.begin();
@@ -669,12 +692,20 @@ public:
     return intersectWithComplement(*RHS);
   }
 
-
   //  Three argument version of intersectWithComplement.
   //  Result of RHS1 & ~RHS2 is stored into this bitmap.
   void intersectWithComplement(const SparseBitVector<ElementSize> &RHS1,
                                const SparseBitVector<ElementSize> &RHS2)
   {
+    if (this == &RHS1) {
+      intersectWithComplement(RHS2);
+      return;
+    } else if (this == &RHS2) {
+      SparseBitVector RHS2Copy(RHS2);
+      intersectWithComplement(RHS1, RHS2Copy);
+      return;
+    }
+
     Elements.clear();
     CurrElementIter = Elements.begin();
     ElementListConstIter Iter1 = RHS1.Elements.begin();
@@ -719,8 +750,6 @@ public:
         Elements.push_back(NewElement);
         ++Iter1;
       }
-
-    return;
   }
 
   void intersectWithComplement(const SparseBitVector<ElementSize> *RHS1,
@@ -855,9 +884,6 @@ operator-(const SparseBitVector<ElementSize> &LHS,
   return Result;
 }
 
-
-
-
 // Dump a SparseBitVector to a stream
 template <unsigned ElementSize>
 void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
@@ -875,4 +901,4 @@ void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
 }
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SPARSEBITVECTOR_H
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index d98abc375e8a..7c84e3ef6b4d 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -28,9 +28,11 @@
 
 #include "llvm/Support/Atomic.h"
 #include "llvm/Support/Valgrind.h"
+#include <memory>
 
 namespace llvm {
 class raw_ostream;
+class raw_fd_ostream;
 
 class Statistic {
 public:
@@ -170,6 +172,9 @@ void EnableStatistics();
 /// \brief Check if statistics are enabled.
 bool AreStatisticsEnabled();
 
+/// \brief Return a file stream to print our output on.
+std::unique_ptr<raw_fd_ostream> CreateInfoOutputFile();
+
 /// \brief Print statistics to the file returned by CreateInfoOutputFile().
 void PrintStatistics();
 
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 9d038560bf92..700bb9e10ef7 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -30,6 +30,7 @@ namespace llvm {
 /// StringMapEntryBase - Shared base class of StringMapEntry instances.
 class StringMapEntryBase {
   unsigned StrLen;
+
 public:
   explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {}
 
@@ -48,6 +49,7 @@ protected:
   unsigned NumItems;
   unsigned NumTombstones;
   unsigned ItemSize;
+
 protected:
   explicit StringMapImpl(unsigned itemSize)
       : TheTable(nullptr),
@@ -85,8 +87,10 @@ protected:
   /// RemoveKey - Remove the StringMapEntry for the specified key from the
   /// table, returning it.  If the key is not in the table, this returns null.
   StringMapEntryBase *RemoveKey(StringRef Key);
+
 private:
   void init(unsigned Size);
+
 public:
   static StringMapEntryBase *getTombstoneVal() {
     return (StringMapEntryBase*)-1;
@@ -112,6 +116,7 @@ public:
 template<typename ValueTy>
 class StringMapEntry : public StringMapEntryBase {
   StringMapEntry(StringMapEntry &E) = delete;
+
 public:
   ValueTy second;
 
@@ -205,7 +210,6 @@ public:
   }
 };
 
-
 /// StringMap - This is an unconventional map that is specialized for handling
 /// keys that are "strings", which are basically ranges of bytes. This does some
 /// funky memory allocation and hashing things to make it extremely efficient,
@@ -213,9 +217,10 @@ public:
 template<typename ValueTy, typename AllocatorTy = MallocAllocator>
 class StringMap : public StringMapImpl {
   AllocatorTy Allocator;
+
 public:
   typedef StringMapEntry<ValueTy> MapEntryTy;
-  
+
   StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
   explicit StringMap(unsigned InitialSize)
     : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
@@ -227,6 +232,13 @@ public:
     : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))),
       Allocator(A) {}
 
+  StringMap(std::initializer_list<std::pair<StringRef, ValueTy>> List)
+      : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
+    for (const auto &P : List) {
+      insert(P);
+    }
+  }
+
   StringMap(StringMap &&RHS)
       : StringMapImpl(std::move(RHS)), Allocator(std::move(RHS.Allocator)) {}
 
@@ -386,11 +398,10 @@ public:
   }
 };
 
-
-template<typename ValueTy>
-class StringMapConstIterator {
+template <typename ValueTy> class StringMapConstIterator {
 protected:
   StringMapEntryBase **Ptr;
+
 public:
   typedef StringMapEntry<ValueTy> value_type;
 
@@ -447,7 +458,6 @@ public:
     return static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
 };
-
 }
 
 #endif
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index 95660a49f1f1..350032b8c4e7 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_ADT_STRINGREF_H
 #define LLVM_ADT_STRINGREF_H
 
+#include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
 #include <cstring>
@@ -53,6 +54,7 @@ namespace llvm {
 
     // Workaround memcmp issue with null pointers (undefined behavior)
     // by providing a specialized version
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     static int compareMemory(const char *Lhs, const char *Rhs, size_t Length) {
       if (Length == 0) { return 0; }
       return ::memcmp(Lhs,Rhs,Length);
@@ -73,6 +75,7 @@ namespace llvm {
       }
 
     /// Construct a string ref from a pointer and length.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const char *data, size_t length)
       : Data(data), Length(length) {
         assert((data || length == 0) &&
@@ -80,6 +83,7 @@ namespace llvm {
       }
 
     /// Construct a string ref from an std::string.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const std::string &Str)
       : Data(Str.data()), Length(Str.length()) {}
 
@@ -104,12 +108,15 @@ namespace llvm {
 
     /// data - Get a pointer to the start of the string (which may not be null
     /// terminated).
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     const char *data() const { return Data; }
 
     /// empty - Check if the string is empty.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool empty() const { return Length == 0; }
 
     /// size - Get the string size.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t size() const { return Length; }
 
     /// front - Get the first character in the string.
@@ -133,6 +140,7 @@ namespace llvm {
 
     /// equals - Check for string equality, this is more efficient than
     /// compare() when the relative ordering of inequal strings isn't needed.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool equals(StringRef RHS) const {
       return (Length == RHS.Length &&
               compareMemory(Data, RHS.Data, RHS.Length) == 0);
@@ -145,6 +153,7 @@ namespace llvm {
 
     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
     /// is lexicographically less than, equal to, or greater than the \p RHS.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     int compare(StringRef RHS) const {
       // Check the prefix for a mismatch.
       if (int Res = compareMemory(Data, RHS.Data, std::min(Length, RHS.Length)))
@@ -212,6 +221,7 @@ namespace llvm {
     /// @{
 
     /// Check if this string starts with the given \p Prefix.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool startswith(StringRef Prefix) const {
       return Length >= Prefix.Length &&
              compareMemory(Data, Prefix.Data, Prefix.Length) == 0;
@@ -221,6 +231,7 @@ namespace llvm {
     bool startswith_lower(StringRef Prefix) const;
 
     /// Check if this string ends with the given \p Suffix.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool endswith(StringRef Suffix) const {
       return Length >= Suffix.Length &&
         compareMemory(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
@@ -237,6 +248,7 @@ namespace llvm {
     ///
     /// \returns The index of the first occurrence of \p C, or npos if not
     /// found.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t find(char C, size_t From = 0) const {
       size_t FindBegin = std::min(From, Length);
       if (FindBegin < Length) { // Avoid calling memchr with nullptr.
@@ -402,6 +414,7 @@ namespace llvm {
     /// \param N The number of characters to included in the substring. If N
     /// exceeds the number of characters remaining in the string, the string
     /// suffix (starting with \p Start) will be returned.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef substr(size_t Start, size_t N = npos) const {
       Start = std::min(Start, Length);
       return StringRef(Data + Start, std::min(N, Length - Start));
@@ -409,6 +422,7 @@ namespace llvm {
 
     /// Return a StringRef equal to 'this' but with the first \p N elements
     /// dropped.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_front(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(N);
@@ -416,6 +430,7 @@ namespace llvm {
 
     /// Return a StringRef equal to 'this' but with the last \p N elements
     /// dropped.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_back(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(0, size()-N);
@@ -431,6 +446,7 @@ namespace llvm {
     /// substring. If this is npos, or less than \p Start, or exceeds the
     /// number of characters remaining in the string, the string suffix
     /// (starting with \p Start) will be returned.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef slice(size_t Start, size_t End) const {
       Start = std::min(Start, Length);
       End = std::min(std::max(Start, End), Length);
@@ -474,7 +490,7 @@ namespace llvm {
     /// Split into substrings around the occurrences of a separator string.
     ///
     /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
-    /// \p MaxSplit splits are done and consequently <= \p MaxSplit
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
     /// elements are added to A.
     /// If \p KeepEmpty is false, empty strings are not added to \p A. They
     /// still count when considering \p MaxSplit
@@ -489,6 +505,23 @@ namespace llvm {
                StringRef Separator, int MaxSplit = -1,
                bool KeepEmpty = true) const;
 
+    /// Split into substrings around the occurrences of a separator character.
+    ///
+    /// Each substring is stored in \p A. If \p MaxSplit is >= 0, at most
+    /// \p MaxSplit splits are done and consequently <= \p MaxSplit + 1
+    /// elements are added to A.
+    /// If \p KeepEmpty is false, empty strings are not added to \p A. They
+    /// still count when considering \p MaxSplit
+    /// An useful invariant is that
+    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
+    ///
+    /// \param A - Where to put the substrings.
+    /// \param Separator - The string to split on.
+    /// \param MaxSplit - The maximum number of times the string is split.
+    /// \param KeepEmpty - True if empty substring should be added.
+    void split(SmallVectorImpl<StringRef> &A, char Separator, int MaxSplit = -1,
+               bool KeepEmpty = true) const;
+
     /// Split into two substrings around the last occurrence of a separator
     /// character.
     ///
@@ -530,10 +563,12 @@ namespace llvm {
   /// @name StringRef Comparison Operators
   /// @{
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   inline bool operator==(StringRef LHS, StringRef RHS) {
     return LHS.equals(RHS);
   }
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   inline bool operator!=(StringRef LHS, StringRef RHS) {
     return !(LHS == RHS);
   }
diff --git a/include/llvm/ADT/StringSet.h b/include/llvm/ADT/StringSet.h
index 3e0cc200b6dd..08626dc7af84 100644
--- a/include/llvm/ADT/StringSet.h
+++ b/include/llvm/ADT/StringSet.h
@@ -23,6 +23,11 @@ namespace llvm {
   class StringSet : public llvm::StringMap<char, AllocatorTy> {
     typedef llvm::StringMap<char, AllocatorTy> base;
   public:
+    StringSet() = default;
+    StringSet(std::initializer_list<StringRef> S) {
+      for (StringRef X : S)
+        insert(X);
+    }
 
     std::pair<typename base::iterator, bool> insert(StringRef Key) {
       assert(!Key.empty());
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index 0393a0c373ef..42b0fc4bc441 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -14,6 +14,7 @@
 #define LLVM_ADT_STRINGSWITCH_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
 #include <cassert>
 #include <cstring>
 
@@ -48,10 +49,12 @@ class StringSwitch {
   const T *Result;
 
 public:
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   explicit StringSwitch(StringRef S)
   : Str(S), Result(nullptr) { }
 
   template<unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Case(const char (&S)[N], const T& Value) {
     if (!Result && N-1 == Str.size() &&
         (std::memcmp(S, Str.data(), N-1) == 0)) {
@@ -62,6 +65,7 @@ public:
   }
 
   template<unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& EndsWith(const char (&S)[N], const T &Value) {
     if (!Result && Str.size() >= N-1 &&
         std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) {
@@ -72,6 +76,7 @@ public:
   }
 
   template<unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& StartsWith(const char (&S)[N], const T &Value) {
     if (!Result && Str.size() >= N-1 &&
         std::memcmp(S, Str.data(), N-1) == 0) {
@@ -82,32 +87,66 @@ public:
   }
 
   template<unsigned N0, unsigned N1>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const T& Value) {
-    return Case(S0, Value).Case(S1, Value);
+    if (!Result && (
+        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0))) {
+      Result = &Value;
+    }
+
+    return *this;
   }
 
   template<unsigned N0, unsigned N1, unsigned N2>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const T& Value) {
-    return Case(S0, Value).Case(S1, Value).Case(S2, Value);
+    if (!Result && (
+        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0))) {
+      Result = &Value;
+    }
+
+    return *this;
   }
 
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
                       const T& Value) {
-    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value);
+    if (!Result && (
+        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
+        (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0))) {
+      Result = &Value;
+    }
+
+    return *this;
   }
 
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
                       const char (&S4)[N4], const T& Value) {
-    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value)
-      .Case(S4, Value);
+    if (!Result && (
+        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
+        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
+        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
+        (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0) ||
+        (N4-1 == Str.size() && std::memcmp(S4, Str.data(), N4-1) == 0))) {
+      Result = &Value;
+    }
+
+    return *this;
   }
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   R Default(const T& Value) const {
     if (Result)
       return *Result;
@@ -115,6 +154,7 @@ public:
     return Value;
   }
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
   operator R() const {
     assert(Result && "Fell off the end of a string-switch");
     return *Result;
diff --git a/include/llvm/ADT/TinyPtrVector.h b/include/llvm/ADT/TinyPtrVector.h
index f29608f3d3d1..487aa46cf642 100644
--- a/include/llvm/ADT/TinyPtrVector.h
+++ b/include/llvm/ADT/TinyPtrVector.h
@@ -15,7 +15,7 @@
 #include "llvm/ADT/SmallVector.h"
 
 namespace llvm {
-  
+
 /// TinyPtrVector - This class is specialized for cases where there are
 /// normally 0 or 1 element in a vector, but is general enough to go beyond that
 /// when required.
@@ -150,7 +150,6 @@ public:
       return Val.getAddrOfPtr1();
 
     return Val.template get<VecTy *>()->begin();
-
   }
   iterator end() {
     if (Val.template is<EltTy>())
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 947812d94ecb..e01db0a61fd5 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -50,6 +50,7 @@ public:
     armeb,      // ARM (big endian): armeb
     aarch64,    // AArch64 (little endian): aarch64
     aarch64_be, // AArch64 (big endian): aarch64_be
+    avr,        // AVR: Atmel AVR microcontroller
     bpfel,      // eBPF or extended BPF or 64-bit BPF (little endian)
     bpfeb,      // eBPF or extended BPF or 64-bit BPF (big endian)
     hexagon,    // Hexagon: hexagon
@@ -75,8 +76,8 @@ public:
     xcore,      // XCore: xcore
     nvptx,      // NVPTX: 32-bit
     nvptx64,    // NVPTX: 64-bit
-    le32,       // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
-    le64,       // le64: generic little-endian 64-bit CPU (PNaCl / Emscripten)
+    le32,       // le32: generic little-endian 32-bit CPU (PNaCl)
+    le64,       // le64: generic little-endian 64-bit CPU (PNaCl)
     amdil,      // AMDIL
     amdil64,    // AMDIL with 64-bit pointers
     hsail,      // AMD HSAIL
@@ -92,12 +93,14 @@ public:
   enum SubArchType {
     NoSubArch,
 
+    ARMSubArch_v8_2a,
     ARMSubArch_v8_1a,
     ARMSubArch_v8,
     ARMSubArch_v7,
     ARMSubArch_v7em,
     ARMSubArch_v7m,
     ARMSubArch_v7s,
+    ARMSubArch_v7k,
     ARMSubArch_v6,
     ARMSubArch_v6m,
     ARMSubArch_v6k,
@@ -124,7 +127,8 @@ public:
     MipsTechnologies,
     NVIDIA,
     CSR,
-    LastVendorType = CSR
+    Myriad,
+    LastVendorType = Myriad
   };
   enum OSType {
     UnknownOS,
@@ -153,7 +157,10 @@ public:
     NVCL,       // NVIDIA OpenCL
     AMDHSA,     // AMD HSA Runtime
     PS4,
-    LastOSType = PS4
+    ELFIAMCU,
+    TvOS,       // Apple tvOS
+    WatchOS,    // Apple watchOS
+    LastOSType = WatchOS
   };
   enum EnvironmentType {
     UnknownEnvironment,
@@ -170,7 +177,9 @@ public:
     MSVC,
     Itanium,
     Cygnus,
-    LastEnvironmentType = Cygnus
+    AMDOpenCL,
+    CoreCLR,
+    LastEnvironmentType = CoreCLR
   };
   enum ObjectFormatType {
     UnknownObjectFormat,
@@ -205,7 +214,7 @@ public:
   /// @name Constructors
   /// @{
 
-  /// \brief Default constructor is the same as an empty string and leaves all
+  /// Default constructor is the same as an empty string and leaves all
   /// triple fields unknown.
   Triple() : Data(), Arch(), Vendor(), OS(), Environment(), ObjectFormat() {}
 
@@ -231,7 +240,7 @@ public:
   /// common case in which otherwise valid components are in the wrong order.
   static std::string normalize(StringRef Str);
 
-  /// \brief Return the normalized form of this triple's string.
+  /// Return the normalized form of this triple's string.
   std::string normalize() const { return normalize(Data); }
 
   /// @}
@@ -259,7 +268,7 @@ public:
   /// getEnvironment - Get the parsed environment type of this triple.
   EnvironmentType getEnvironment() const { return Environment; }
 
-  /// \brief Parse the version number from the OS name component of the
+  /// Parse the version number from the OS name component of the
   /// triple, if present.
   ///
   /// For example, "fooos1.2.3" would return (1, 2, 3).
@@ -295,10 +304,15 @@ public:
                         unsigned &Micro) const;
 
   /// getiOSVersion - Parse the version number as with getOSVersion.  This should
-  /// only be called with IOS triples.
+  /// only be called with IOS or generic triples.
   void getiOSVersion(unsigned &Major, unsigned &Minor,
                      unsigned &Micro) const;
 
+  /// getWatchOSVersion - Parse the version number as with getOSVersion.  This
+  /// should only be called with WatchOS or generic triples.
+  void getWatchOSVersion(unsigned &Major, unsigned &Minor,
+                         unsigned &Micro) const;
+
   /// @}
   /// @name Direct Component Access
   /// @{
@@ -331,7 +345,7 @@ public:
   /// @name Convenience Predicates
   /// @{
 
-  /// \brief Test whether the architecture is 64-bit
+  /// Test whether the architecture is 64-bit
   ///
   /// Note that this tests for 64-bit pointer width, and nothing else. Note
   /// that we intentionally expose only three predicates, 64-bit, 32-bit, and
@@ -340,12 +354,12 @@ public:
   /// system is provided.
   bool isArch64Bit() const;
 
-  /// \brief Test whether the architecture is 32-bit
+  /// Test whether the architecture is 32-bit
   ///
   /// Note that this tests for 32-bit pointer width, and nothing else.
   bool isArch32Bit() const;
 
-  /// \brief Test whether the architecture is 16-bit
+  /// Test whether the architecture is 16-bit
   ///
   /// Note that this tests for 16-bit pointer width, and nothing else.
   bool isArch16Bit() const;
@@ -396,13 +410,27 @@ public:
   }
 
   /// Is this an iOS triple.
+  /// Note: This identifies tvOS as a variant of iOS. If that ever
+  /// changes, i.e., if the two operating systems diverge or their version
+  /// numbers get out of sync, that will need to be changed.
+  /// watchOS has completely different version numbers so it is not included.
   bool isiOS() const {
-    return getOS() == Triple::IOS;
+    return getOS() == Triple::IOS || isTvOS();
   }
 
-  /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS).
+  /// Is this an Apple tvOS triple.
+  bool isTvOS() const {
+    return getOS() == Triple::TvOS;
+  }
+
+  /// Is this an Apple watchOS triple.
+  bool isWatchOS() const {
+    return getOS() == Triple::WatchOS;
+  }
+
+  /// isOSDarwin - Is this a "Darwin" OS (OS X, iOS, or watchOS).
   bool isOSDarwin() const {
-    return isMacOSX() || isiOS();
+    return isMacOSX() || isiOS() || isWatchOS();
   }
 
   bool isOSNetBSD() const {
@@ -427,16 +455,26 @@ public:
     return getOS() == Triple::Bitrig;
   }
 
+  bool isOSIAMCU() const {
+    return getOS() == Triple::ELFIAMCU;
+  }
+
+  /// Checks if the environment could be MSVC.
   bool isWindowsMSVCEnvironment() const {
     return getOS() == Triple::Win32 &&
            (getEnvironment() == Triple::UnknownEnvironment ||
             getEnvironment() == Triple::MSVC);
   }
 
+  /// Checks if the environment is MSVC.
   bool isKnownWindowsMSVCEnvironment() const {
     return getOS() == Triple::Win32 && getEnvironment() == Triple::MSVC;
   }
 
+  bool isWindowsCoreCLREnvironment() const {
+    return getOS() == Triple::Win32 && getEnvironment() == Triple::CoreCLR;
+  }
+
   bool isWindowsItaniumEnvironment() const {
     return getOS() == Triple::Win32 && getEnvironment() == Triple::Itanium;
   }
@@ -449,60 +487,63 @@ public:
     return getOS() == Triple::Win32 && getEnvironment() == Triple::GNU;
   }
 
-  /// \brief Tests for either Cygwin or MinGW OS
+  /// Tests for either Cygwin or MinGW OS
   bool isOSCygMing() const {
     return isWindowsCygwinEnvironment() || isWindowsGNUEnvironment();
   }
 
-  /// \brief Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
+  /// Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
   bool isOSMSVCRT() const {
     return isWindowsMSVCEnvironment() || isWindowsGNUEnvironment() ||
            isWindowsItaniumEnvironment();
   }
 
-  /// \brief Tests whether the OS is Windows.
+  /// Tests whether the OS is Windows.
   bool isOSWindows() const {
     return getOS() == Triple::Win32;
   }
 
-  /// \brief Tests whether the OS is NaCl (Native Client)
+  /// Tests whether the OS is NaCl (Native Client)
   bool isOSNaCl() const {
     return getOS() == Triple::NaCl;
   }
 
-  /// \brief Tests whether the OS is Linux.
+  /// Tests whether the OS is Linux.
   bool isOSLinux() const {
     return getOS() == Triple::Linux;
   }
 
-  /// \brief Tests whether the OS uses the ELF binary format.
+  /// Tests whether the OS uses the ELF binary format.
   bool isOSBinFormatELF() const {
     return getObjectFormat() == Triple::ELF;
   }
 
-  /// \brief Tests whether the OS uses the COFF binary format.
+  /// Tests whether the OS uses the COFF binary format.
   bool isOSBinFormatCOFF() const {
     return getObjectFormat() == Triple::COFF;
   }
 
-  /// \brief Tests whether the environment is MachO.
+  /// Tests whether the environment is MachO.
   bool isOSBinFormatMachO() const {
     return getObjectFormat() == Triple::MachO;
   }
 
-  /// \brief Tests whether the target is the PS4 CPU
+  /// Tests whether the target is the PS4 CPU
   bool isPS4CPU() const {
     return getArch() == Triple::x86_64 &&
            getVendor() == Triple::SCEI &&
            getOS() == Triple::PS4;
   }
 
-  /// \brief Tests whether the target is the PS4 platform
+  /// Tests whether the target is the PS4 platform
   bool isPS4() const {
     return getVendor() == Triple::SCEI &&
            getOS() == Triple::PS4;
   }
 
+  /// Tests whether the target is Android
+  bool isAndroid() const { return getEnvironment() == Triple::Android; }
+
   /// @}
   /// @name Mutators
   /// @{
@@ -553,7 +594,7 @@ public:
   /// @name Helpers to build variants of a particular triple.
   /// @{
 
-  /// \brief Form a triple with a 32-bit variant of the current architecture.
+  /// Form a triple with a 32-bit variant of the current architecture.
   ///
   /// This can be used to move across "families" of architectures where useful.
   ///
@@ -561,7 +602,7 @@ public:
   ///          architecture if no such variant can be found.
   llvm::Triple get32BitArchVariant() const;
 
-  /// \brief Form a triple with a 64-bit variant of the current architecture.
+  /// Form a triple with a 64-bit variant of the current architecture.
   ///
   /// This can be used to move across "families" of architectures where useful.
   ///
@@ -589,7 +630,7 @@ public:
   ///
   /// \param Arch the architecture name (e.g., "armv7s"). If it is an empty
   /// string then the triple's arch name is used.
-  const char* getARMCPUForArch(StringRef Arch = StringRef()) const;
+  StringRef getARMCPUForArch(StringRef Arch = StringRef()) const;
 
   /// @}
   /// @name Static helpers for IDs.
diff --git a/include/llvm/ADT/UniqueVector.h b/include/llvm/ADT/UniqueVector.h
index a9cb2f5709eb..e1ab4b56023f 100644
--- a/include/llvm/ADT/UniqueVector.h
+++ b/include/llvm/ADT/UniqueVector.h
@@ -11,6 +11,7 @@
 #define LLVM_ADT_UNIQUEVECTOR_H
 
 #include <cassert>
+#include <cstddef>
 #include <map>
 #include <vector>
 
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index a7b9306b3a73..3044a6c435f1 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -104,6 +104,53 @@ struct ilist_sentinel_traits {
   }
 };
 
+template <typename NodeTy> class ilist_half_node;
+template <typename NodeTy> class ilist_node;
+
+/// Traits with an embedded ilist_node as a sentinel.
+///
+/// FIXME: The downcast in createSentinel() is UB.
+template <typename NodeTy> struct ilist_embedded_sentinel_traits {
+  /// Get hold of the node that marks the end of the list.
+  NodeTy *createSentinel() const {
+    // Since i(p)lists always publicly derive from their corresponding traits,
+    // placing a data member in this class will augment the i(p)list.  But since
+    // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
+    // there is a legal viable downcast from it to NodeTy. We use this trick to
+    // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
+    // sentinel. Dereferencing the sentinel is forbidden (save the
+    // ilist_node<NodeTy>), so no one will ever notice the superposition.
+    return static_cast<NodeTy *>(&Sentinel);
+  }
+  static void destroySentinel(NodeTy *) {}
+
+  NodeTy *provideInitialHead() const { return createSentinel(); }
+  NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
+  static void noteHead(NodeTy *, NodeTy *) {}
+
+private:
+  mutable ilist_node<NodeTy> Sentinel;
+};
+
+/// Trait with an embedded ilist_half_node as a sentinel.
+///
+/// FIXME: The downcast in createSentinel() is UB.
+template <typename NodeTy> struct ilist_half_embedded_sentinel_traits {
+  /// Get hold of the node that marks the end of the list.
+  NodeTy *createSentinel() const {
+    // See comment in ilist_embedded_sentinel_traits::createSentinel().
+    return static_cast<NodeTy *>(&Sentinel);
+  }
+  static void destroySentinel(NodeTy *) {}
+
+  NodeTy *provideInitialHead() const { return createSentinel(); }
+  NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
+  static void noteHead(NodeTy *, NodeTy *) {}
+
+private:
+  mutable ilist_half_node<NodeTy> Sentinel;
+};
+
 /// ilist_node_traits - A fragment for template traits for intrusive list
 /// that provides default node related operations.
 ///
@@ -173,8 +220,8 @@ private:
   template<class T> void operator-(T) const;
 public:
 
-  ilist_iterator(pointer NP) : NodePtr(NP) {}
-  ilist_iterator(reference NR) : NodePtr(&NR) {}
+  explicit ilist_iterator(pointer NP) : NodePtr(NP) {}
+  explicit ilist_iterator(reference NR) : NodePtr(&NR) {}
   ilist_iterator() : NodePtr(nullptr) {}
 
   // This is templated so that we can allow constructing a const iterator from
@@ -191,8 +238,10 @@ public:
     return *this;
   }
 
+  void reset(pointer NP) { NodePtr = NP; }
+
   // Accessors...
-  operator pointer() const {
+  explicit operator pointer() const {
     return NodePtr;
   }
 
@@ -202,11 +251,11 @@ public:
   pointer operator->() const { return &operator*(); }
 
   // Comparison operators
-  bool operator==(const ilist_iterator &RHS) const {
-    return NodePtr == RHS.NodePtr;
+  template <class Y> bool operator==(const ilist_iterator<Y> &RHS) const {
+    return NodePtr == RHS.getNodePtrUnchecked();
   }
-  bool operator!=(const ilist_iterator &RHS) const {
-    return NodePtr != RHS.NodePtr;
+  template <class Y> bool operator!=(const ilist_iterator<Y> &RHS) const {
+    return NodePtr != RHS.getNodePtrUnchecked();
   }
 
   // Increment and decrement operators...
@@ -422,7 +471,7 @@ public:
     this->setPrev(CurNode, New);
 
     this->addNodeToList(New);  // Notify traits that we added a node...
-    return New;
+    return iterator(New);
   }
 
   iterator insertAfter(iterator where, NodeTy *New) {
@@ -443,7 +492,7 @@ public:
     else
       Head = NextNode;
     this->setPrev(NextNode, PrevNode);
-    IT = NextNode;
+    IT.reset(NextNode);
     this->removeNodeFromList(Node);  // Notify traits that we removed a node...
 
     // Set the next/prev pointers of the current node to null.  This isn't
@@ -461,12 +510,18 @@ public:
     return remove(MutIt);
   }
 
+  NodeTy *remove(NodeTy *IT) { return remove(iterator(IT)); }
+  NodeTy *remove(NodeTy &IT) { return remove(iterator(IT)); }
+
   // erase - remove a node from the controlled sequence... and delete it.
   iterator erase(iterator where) {
     this->deleteNode(remove(where));
     return where;
   }
 
+  iterator erase(NodeTy *IT) { return erase(iterator(IT)); }
+  iterator erase(NodeTy &IT) { return erase(iterator(IT)); }
+
   /// Remove all nodes from the list like clear(), but do not call
   /// removeNodeFromList() or deleteNode().
   ///
@@ -522,7 +577,7 @@ private:
       this->setNext(Last, PosNext);
       this->setPrev(PosNext, Last);
 
-      this->transferNodesFromList(L2, First, PosNext);
+      this->transferNodesFromList(L2, iterator(First), iterator(PosNext));
 
       // Now that everything is set, restore the pointers to the list sentinels.
       L2.setTail(L2Sentinel);
@@ -579,6 +634,83 @@ public:
   void splice(iterator where, iplist &L2, iterator first, iterator last) {
     if (first != last) transfer(where, L2, first, last);
   }
+  void splice(iterator where, iplist &L2, NodeTy &N) {
+    splice(where, L2, iterator(N));
+  }
+  void splice(iterator where, iplist &L2, NodeTy *N) {
+    splice(where, L2, iterator(N));
+  }
+
+  template <class Compare>
+  void merge(iplist &Right, Compare comp) {
+    if (this == &Right)
+      return;
+    iterator First1 = begin(), Last1 = end();
+    iterator First2 = Right.begin(), Last2 = Right.end();
+    while (First1 != Last1 && First2 != Last2) {
+      if (comp(*First2, *First1)) {
+        iterator Next = First2;
+        transfer(First1, Right, First2, ++Next);
+        First2 = Next;
+      } else {
+        ++First1;
+      }
+    }
+    if (First2 != Last2)
+      transfer(Last1, Right, First2, Last2);
+  }
+  void merge(iplist &Right) { return merge(Right, op_less); }
+
+  template <class Compare>
+  void sort(Compare comp) {
+    // The list is empty, vacuously sorted.
+    if (empty())
+      return;
+    // The list has a single element, vacuously sorted.
+    if (std::next(begin()) == end())
+      return;
+    // Find the split point for the list.
+    iterator Center = begin(), End = begin();
+    while (End != end() && std::next(End) != end()) {
+      Center = std::next(Center);
+      End = std::next(std::next(End));
+    }
+    // Split the list into two.
+    iplist RightHalf;
+    RightHalf.splice(RightHalf.begin(), *this, Center, end());
+
+    // Sort the two sublists.
+    sort(comp);
+    RightHalf.sort(comp);
+
+    // Merge the two sublists back together.
+    merge(RightHalf, comp);
+  }
+  void sort() { sort(op_less); }
+
+  /// \brief Get the previous node, or \c nullptr for the list head.
+  NodeTy *getPrevNode(NodeTy &N) const {
+    auto I = N.getIterator();
+    if (I == begin())
+      return nullptr;
+    return &*std::prev(I);
+  }
+  /// \brief Get the previous node, or \c nullptr for the list head.
+  const NodeTy *getPrevNode(const NodeTy &N) const {
+    return getPrevNode(const_cast<NodeTy &>(N));
+  }
+
+  /// \brief Get the next node, or \c nullptr for the list tail.
+  NodeTy *getNextNode(NodeTy &N) const {
+    auto Next = std::next(N.getIterator());
+    if (Next == end())
+      return nullptr;
+    return &*Next;
+  }
+  /// \brief Get the next node, or \c nullptr for the list tail.
+  const NodeTy *getNextNode(const NodeTy &N) const {
+    return getNextNode(const_cast<NodeTy &>(N));
+  }
 };
 
 
diff --git a/include/llvm/ADT/ilist_node.h b/include/llvm/ADT/ilist_node.h
index 26d0b55e4093..7e5a0e0e5ad8 100644
--- a/include/llvm/ADT/ilist_node.h
+++ b/include/llvm/ADT/ilist_node.h
@@ -19,12 +19,15 @@ namespace llvm {
 
 template<typename NodeTy>
 struct ilist_traits;
+template <typename NodeTy> struct ilist_embedded_sentinel_traits;
+template <typename NodeTy> struct ilist_half_embedded_sentinel_traits;
 
 /// ilist_half_node - Base class that provides prev services for sentinels.
 ///
 template<typename NodeTy>
 class ilist_half_node {
   friend struct ilist_traits<NodeTy>;
+  friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
   NodeTy *Prev;
 protected:
   NodeTy *getPrev() { return Prev; }
@@ -36,6 +39,8 @@ protected:
 template<typename NodeTy>
 struct ilist_nextprev_traits;
 
+template <typename NodeTy> class ilist_iterator;
+
 /// ilist_node - Base class that provides next/prev services for nodes
 /// that use ilist_nextprev_traits or ilist_default_traits.
 ///
@@ -43,6 +48,8 @@ template<typename NodeTy>
 class ilist_node : private ilist_half_node<NodeTy> {
   friend struct ilist_nextprev_traits<NodeTy>;
   friend struct ilist_traits<NodeTy>;
+  friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
+  friend struct ilist_embedded_sentinel_traits<NodeTy>;
   NodeTy *Next;
   NodeTy *getNext() { return Next; }
   const NodeTy *getNext() const { return Next; }
@@ -50,54 +57,64 @@ class ilist_node : private ilist_half_node<NodeTy> {
 protected:
   ilist_node() : Next(nullptr) {}
 
+public:
+  ilist_iterator<NodeTy> getIterator() {
+    // FIXME: Stop downcasting to create the iterator (potential UB).
+    return ilist_iterator<NodeTy>(static_cast<NodeTy *>(this));
+  }
+  ilist_iterator<const NodeTy> getIterator() const {
+    // FIXME: Stop downcasting to create the iterator (potential UB).
+    return ilist_iterator<const NodeTy>(static_cast<const NodeTy *>(this));
+  }
+};
+
+/// An ilist node that can access its parent list.
+///
+/// Requires \c NodeTy to have \a getParent() to find the parent node, and the
+/// \c ParentTy to have \a getSublistAccess() to get a reference to the list.
+template <typename NodeTy, typename ParentTy>
+class ilist_node_with_parent : public ilist_node<NodeTy> {
+protected:
+  ilist_node_with_parent() = default;
+
+private:
+  /// Forward to NodeTy::getParent().
+  ///
+  /// Note: do not use the name "getParent()".  We want a compile error
+  /// (instead of recursion) when the subclass fails to implement \a
+  /// getParent().
+  const ParentTy *getNodeParent() const {
+    return static_cast<const NodeTy *>(this)->getParent();
+  }
+
 public:
   /// @name Adjacent Node Accessors
   /// @{
-
-  /// \brief Get the previous node, or 0 for the list head.
+  /// \brief Get the previous node, or \c nullptr for the list head.
   NodeTy *getPrevNode() {
-    NodeTy *Prev = this->getPrev();
-
-    // Check for sentinel.
-    if (!Prev->getNext())
-      return nullptr;
-
-    return Prev;
+    // Should be separated to a reused function, but then we couldn't use auto
+    // (and would need the type of the list).
+    const auto &List =
+        getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr));
+    return List.getPrevNode(*static_cast<NodeTy *>(this));
   }
-
-  /// \brief Get the previous node, or 0 for the list head.
+  /// \brief Get the previous node, or \c nullptr for the list head.
   const NodeTy *getPrevNode() const {
-    const NodeTy *Prev = this->getPrev();
-
-    // Check for sentinel.
-    if (!Prev->getNext())
-      return nullptr;
-
-    return Prev;
+    return const_cast<ilist_node_with_parent *>(this)->getPrevNode();
   }
 
-  /// \brief Get the next node, or 0 for the list tail.
+  /// \brief Get the next node, or \c nullptr for the list tail.
   NodeTy *getNextNode() {
-    NodeTy *Next = getNext();
-
-    // Check for sentinel.
-    if (!Next->getNext())
-      return nullptr;
-
-    return Next;
+    // Should be separated to a reused function, but then we couldn't use auto
+    // (and would need the type of the list).
+    const auto &List =
+        getNodeParent()->*(ParentTy::getSublistAccess((NodeTy *)nullptr));
+    return List.getNextNode(*static_cast<NodeTy *>(this));
   }
-
-  /// \brief Get the next node, or 0 for the list tail.
+  /// \brief Get the next node, or \c nullptr for the list tail.
   const NodeTy *getNextNode() const {
-    const NodeTy *Next = getNext();
-
-    // Check for sentinel.
-    if (!Next->getNext())
-      return nullptr;
-
-    return Next;
+    return const_cast<ilist_node_with_parent *>(this)->getNextNode();
   }
-
   /// @}
 };
 
diff --git a/include/llvm/ADT/iterator_range.h b/include/llvm/ADT/iterator_range.h
index 523a86f02e08..3dd679bd9b79 100644
--- a/include/llvm/ADT/iterator_range.h
+++ b/include/llvm/ADT/iterator_range.h
@@ -20,6 +20,7 @@
 #define LLVM_ADT_ITERATOR_RANGE_H
 
 #include <utility>
+#include <iterator>
 
 namespace llvm {
 
@@ -32,6 +33,12 @@ class iterator_range {
   IteratorT begin_iterator, end_iterator;
 
 public:
+  //TODO: Add SFINAE to test that the Container's iterators match the range's
+  //      iterators.
+  template <typename Container>
+  iterator_range(Container &&c)
+  //TODO: Consider ADL/non-member begin/end calls.
+      : begin_iterator(c.begin()), end_iterator(c.end()) {}
   iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
       : begin_iterator(std::move(begin_iterator)),
         end_iterator(std::move(end_iterator)) {}
@@ -51,6 +58,11 @@ template <class T> iterator_range<T> make_range(T x, T y) {
 template <typename T> iterator_range<T> make_range(std::pair<T, T> p) {
   return iterator_range<T>(std::move(p.first), std::move(p.second));
 }
+
+template<typename T>
+iterator_range<decltype(begin(std::declval<T>()))> drop_begin(T &&t, int n) {
+  return make_range(std::next(begin(t), n), end(t));
+}
 }
 
 #endif
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 36f8199a0322..5cc840a64a62 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -41,10 +41,11 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Analysis/MemoryLocation.h"
 
 namespace llvm {
-
+class BasicAAResult;
 class LoadInst;
 class StoreInst;
 class VAArgInst;
@@ -55,6 +56,7 @@ class AnalysisUsage;
 class MemTransferInst;
 class MemIntrinsic;
 class DominatorTree;
+class OrderedBasicBlock;
 
 /// The possible results of an alias query.
 ///
@@ -84,261 +86,390 @@ enum AliasResult {
   MustAlias,
 };
 
-class AliasAnalysis {
-protected:
-  const DataLayout *DL;
-  const TargetLibraryInfo *TLI;
+/// Flags indicating whether a memory access modifies or references memory.
+///
+/// This is no access at all, a modification, a reference, or both
+/// a modification and a reference. These are specifically structured such that
+/// they form a two bit matrix and bit-tests for 'mod' or 'ref' work with any
+/// of the possible values.
+enum ModRefInfo {
+  /// The access neither references nor modifies the value stored in memory.
+  MRI_NoModRef = 0,
+  /// The access references the value stored in memory.
+  MRI_Ref = 1,
+  /// The access modifies the value stored in memory.
+  MRI_Mod = 2,
+  /// The access both references and modifies the value stored in memory.
+  MRI_ModRef = MRI_Ref | MRI_Mod
+};
 
-private:
-  AliasAnalysis *AA;       // Previous Alias Analysis to chain to.
+/// The locations at which a function might access memory.
+///
+/// These are primarily used in conjunction with the \c AccessKind bits to
+/// describe both the nature of access and the locations of access for a
+/// function call.
+enum FunctionModRefLocation {
+  /// Base case is no access to memory.
+  FMRL_Nowhere = 0,
+  /// Access to memory via argument pointers.
+  FMRL_ArgumentPointees = 4,
+  /// Access to any memory.
+  FMRL_Anywhere = 8 | FMRL_ArgumentPointees
+};
 
-protected:
-  /// InitializeAliasAnalysis - Subclasses must call this method to initialize
-  /// the AliasAnalysis interface before any other methods are called.  This is
-  /// typically called by the run* methods of these subclasses.  This may be
-  /// called multiple times.
+/// Summary of how a function affects memory in the program.
+///
+/// Loads from constant globals are not considered memory accesses for this
+/// interface. Also, functions may freely modify stack space local to their
+/// invocation without having to report it through these interfaces.
+enum FunctionModRefBehavior {
+  /// This function does not perform any non-local loads or stores to memory.
   ///
-  void InitializeAliasAnalysis(Pass *P, const DataLayout *DL);
+  /// This property corresponds to the GCC 'const' attribute.
+  /// This property corresponds to the LLVM IR 'readnone' attribute.
+  /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
+  FMRB_DoesNotAccessMemory = FMRL_Nowhere | MRI_NoModRef,
 
-  /// getAnalysisUsage - All alias analysis implementations should invoke this
-  /// directly (using AliasAnalysis::getAnalysisUsage(AU)).
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  /// The only memory references in this function (if it has any) are
+  /// non-volatile loads from objects pointed to by its pointer-typed
+  /// arguments, with arbitrary offsets.
+  ///
+  /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
+  FMRB_OnlyReadsArgumentPointees = FMRL_ArgumentPointees | MRI_Ref,
 
+  /// The only memory references in this function (if it has any) are
+  /// non-volatile loads and stores from objects pointed to by its
+  /// pointer-typed arguments, with arbitrary offsets.
+  ///
+  /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
+  FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef,
+
+  /// This function does not perform any non-local stores or volatile loads,
+  /// but may read from any memory location.
+  ///
+  /// This property corresponds to the GCC 'pure' attribute.
+  /// This property corresponds to the LLVM IR 'readonly' attribute.
+  /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
+  FMRB_OnlyReadsMemory = FMRL_Anywhere | MRI_Ref,
+
+  /// This indicates that the function could not be classified into one of the
+  /// behaviors above.
+  FMRB_UnknownModRefBehavior = FMRL_Anywhere | MRI_ModRef
+};
+
+class AAResults {
 public:
-  static char ID; // Class identification, replacement for typeinfo
-  AliasAnalysis() : DL(nullptr), TLI(nullptr), AA(nullptr) {}
-  virtual ~AliasAnalysis();  // We want to be subclassed
+  // Make these results default constructable and movable. We have to spell
+  // these out because MSVC won't synthesize them.
+  AAResults() {}
+  AAResults(AAResults &&Arg);
+  AAResults &operator=(AAResults &&Arg);
+  ~AAResults();
 
-  /// getTargetLibraryInfo - Return a pointer to the current TargetLibraryInfo
-  /// object, or null if no TargetLibraryInfo object is available.
-  ///
-  const TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
-
-  /// getTypeStoreSize - Return the DataLayout store size for the given type,
-  /// if known, or a conservative value otherwise.
-  ///
-  uint64_t getTypeStoreSize(Type *Ty);
+  /// Register a specific AA result.
+  template <typename AAResultT> void addAAResult(AAResultT &AAResult) {
+    // FIXME: We should use a much lighter weight system than the usual
+    // polymorphic pattern because we don't own AAResult. It should
+    // ideally involve two pointers and no separate allocation.
+    AAs.emplace_back(new Model<AAResultT>(AAResult, *this));
+  }
 
   //===--------------------------------------------------------------------===//
-  /// Alias Queries...
-  ///
+  /// \name Alias Queries
+  /// @{
 
-  /// alias - The main low level interface to the alias analysis implementation.
+  /// The main low level interface to the alias analysis implementation.
   /// Returns an AliasResult indicating whether the two pointers are aliased to
-  /// each other.  This is the interface that must be implemented by specific
+  /// each other. This is the interface that must be implemented by specific
   /// alias analysis implementations.
-  virtual AliasResult alias(const MemoryLocation &LocA,
-                            const MemoryLocation &LocB);
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
-  /// alias - A convenience wrapper.
-  AliasResult alias(const Value *V1, uint64_t V1Size,
-                    const Value *V2, uint64_t V2Size) {
+  /// A convenience wrapper around the primary \c alias interface.
+  AliasResult alias(const Value *V1, uint64_t V1Size, const Value *V2,
+                    uint64_t V2Size) {
     return alias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
   }
 
-  /// alias - A convenience wrapper.
+  /// A convenience wrapper around the primary \c alias interface.
   AliasResult alias(const Value *V1, const Value *V2) {
     return alias(V1, MemoryLocation::UnknownSize, V2,
                  MemoryLocation::UnknownSize);
   }
 
-  /// isNoAlias - A trivial helper function to check to see if the specified
-  /// pointers are no-alias.
+  /// A trivial helper function to check to see if the specified pointers are
+  /// no-alias.
   bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
     return alias(LocA, LocB) == NoAlias;
   }
 
-  /// isNoAlias - A convenience wrapper.
-  bool isNoAlias(const Value *V1, uint64_t V1Size,
-                 const Value *V2, uint64_t V2Size) {
+  /// A convenience wrapper around the \c isNoAlias helper interface.
+  bool isNoAlias(const Value *V1, uint64_t V1Size, const Value *V2,
+                 uint64_t V2Size) {
     return isNoAlias(MemoryLocation(V1, V1Size), MemoryLocation(V2, V2Size));
   }
-  
-  /// isNoAlias - A convenience wrapper.
+
+  /// A convenience wrapper around the \c isNoAlias helper interface.
   bool isNoAlias(const Value *V1, const Value *V2) {
     return isNoAlias(MemoryLocation(V1), MemoryLocation(V2));
   }
-  
-  /// isMustAlias - A convenience wrapper.
+
+  /// A trivial helper function to check to see if the specified pointers are
+  /// must-alias.
   bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
     return alias(LocA, LocB) == MustAlias;
   }
 
-  /// isMustAlias - A convenience wrapper.
+  /// A convenience wrapper around the \c isMustAlias helper interface.
   bool isMustAlias(const Value *V1, const Value *V2) {
     return alias(V1, 1, V2, 1) == MustAlias;
   }
-  
-  /// pointsToConstantMemory - If the specified memory location is
-  /// known to be constant, return true. If OrLocal is true and the
-  /// specified memory location is known to be "local" (derived from
-  /// an alloca), return true. Otherwise return false.
-  virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                      bool OrLocal = false);
 
-  /// pointsToConstantMemory - A convenient wrapper.
+  /// Checks whether the given location points to constant memory, or if
+  /// \p OrLocal is true whether it points to a local alloca.
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false);
+
+  /// A convenience wrapper around the primary \c pointsToConstantMemory
+  /// interface.
   bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
     return pointsToConstantMemory(MemoryLocation(P), OrLocal);
   }
 
+  /// @}
   //===--------------------------------------------------------------------===//
-  /// Simple mod/ref information...
-  ///
-
-  /// ModRefResult - Represent the result of a mod/ref query.  Mod and Ref are
-  /// bits which may be or'd together.
-  ///
-  enum ModRefResult { NoModRef = 0, Ref = 1, Mod = 2, ModRef = 3 };
-
-  /// These values define additional bits used to define the
-  /// ModRefBehavior values.
-  enum { Nowhere = 0, ArgumentPointees = 4, Anywhere = 8 | ArgumentPointees };
-
-  /// ModRefBehavior - Summary of how a function affects memory in the program.
-  /// Loads from constant globals are not considered memory accesses for this
-  /// interface.  Also, functions may freely modify stack space local to their
-  /// invocation without having to report it through these interfaces.
-  enum ModRefBehavior {
-    /// DoesNotAccessMemory - This function does not perform any non-local loads
-    /// or stores to memory.
-    ///
-    /// This property corresponds to the GCC 'const' attribute.
-    /// This property corresponds to the LLVM IR 'readnone' attribute.
-    /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
-    DoesNotAccessMemory = Nowhere | NoModRef,
-
-    /// OnlyReadsArgumentPointees - The only memory references in this function
-    /// (if it has any) are non-volatile loads from objects pointed to by its
-    /// pointer-typed arguments, with arbitrary offsets.
-    ///
-    /// This property corresponds to the LLVM IR 'argmemonly' attribute combined
-    /// with 'readonly' attribute.
-    /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
-    OnlyReadsArgumentPointees = ArgumentPointees | Ref,
-
-    /// OnlyAccessesArgumentPointees - The only memory references in this
-    /// function (if it has any) are non-volatile loads and stores from objects
-    /// pointed to by its pointer-typed arguments, with arbitrary offsets.
-    ///
-    /// This property corresponds to the LLVM IR 'argmemonly' attribute.
-    /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
-    OnlyAccessesArgumentPointees = ArgumentPointees | ModRef,
-
-    /// OnlyReadsMemory - This function does not perform any non-local stores or
-    /// volatile loads, but may read from any memory location.
-    ///
-    /// This property corresponds to the GCC 'pure' attribute.
-    /// This property corresponds to the LLVM IR 'readonly' attribute.
-    /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
-    OnlyReadsMemory = Anywhere | Ref,
-
-    /// UnknownModRefBehavior - This indicates that the function could not be
-    /// classified into one of the behaviors above.
-    UnknownModRefBehavior = Anywhere | ModRef
-  };
+  /// \name Simple mod/ref information
+  /// @{
 
   /// Get the ModRef info associated with a pointer argument of a callsite. The
   /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
   /// that these bits do not necessarily account for the overall behavior of
   /// the function, but rather only provide additional per-argument
   /// information.
-  virtual ModRefResult getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
 
-  /// getModRefBehavior - Return the behavior when calling the given call site.
-  virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  /// Return the behavior of the given call site.
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
 
-  /// getModRefBehavior - Return the behavior when calling the given function.
-  /// For use when the call site is not known.
-  virtual ModRefBehavior getModRefBehavior(const Function *F);
+  /// Return the behavior when calling the given function.
+  FunctionModRefBehavior getModRefBehavior(const Function *F);
 
-  /// doesNotAccessMemory - If the specified call is known to never read or
-  /// write memory, return true.  If the call only reads from known-constant
-  /// memory, it is also legal to return true.  Calls that unwind the stack
-  /// are legal for this predicate.
+  /// Checks if the specified call is known to never read or write memory.
+  ///
+  /// Note that if the call only reads from known-constant memory, it is also
+  /// legal to return true. Also, calls that unwind the stack are legal for
+  /// this predicate.
   ///
   /// Many optimizations (such as CSE and LICM) can be performed on such calls
   /// without worrying about aliasing properties, and many calls have this
   /// property (e.g. calls to 'sin' and 'cos').
   ///
   /// This property corresponds to the GCC 'const' attribute.
-  ///
   bool doesNotAccessMemory(ImmutableCallSite CS) {
-    return getModRefBehavior(CS) == DoesNotAccessMemory;
+    return getModRefBehavior(CS) == FMRB_DoesNotAccessMemory;
   }
 
-  /// doesNotAccessMemory - If the specified function is known to never read or
-  /// write memory, return true.  For use when the call site is not known.
+  /// Checks if the specified function is known to never read or write memory.
   ///
+  /// Note that if the function only reads from known-constant memory, it is
+  /// also legal to return true. Also, function that unwind the stack are legal
+  /// for this predicate.
+  ///
+  /// Many optimizations (such as CSE and LICM) can be performed on such calls
+  /// to such functions without worrying about aliasing properties, and many
+  /// functions have this property (e.g. 'sin' and 'cos').
+  ///
+  /// This property corresponds to the GCC 'const' attribute.
   bool doesNotAccessMemory(const Function *F) {
-    return getModRefBehavior(F) == DoesNotAccessMemory;
+    return getModRefBehavior(F) == FMRB_DoesNotAccessMemory;
   }
 
-  /// onlyReadsMemory - If the specified call is known to only read from
-  /// non-volatile memory (or not access memory at all), return true.  Calls
-  /// that unwind the stack are legal for this predicate.
+  /// Checks if the specified call is known to only read from non-volatile
+  /// memory (or not access memory at all).
+  ///
+  /// Calls that unwind the stack are legal for this predicate.
   ///
   /// This property allows many common optimizations to be performed in the
   /// absence of interfering store instructions, such as CSE of strlen calls.
   ///
   /// This property corresponds to the GCC 'pure' attribute.
-  ///
   bool onlyReadsMemory(ImmutableCallSite CS) {
     return onlyReadsMemory(getModRefBehavior(CS));
   }
 
-  /// onlyReadsMemory - If the specified function is known to only read from
-  /// non-volatile memory (or not access memory at all), return true.  For use
-  /// when the call site is not known.
+  /// Checks if the specified function is known to only read from non-volatile
+  /// memory (or not access memory at all).
   ///
+  /// Functions that unwind the stack are legal for this predicate.
+  ///
+  /// This property allows many common optimizations to be performed in the
+  /// absence of interfering store instructions, such as CSE of strlen calls.
+  ///
+  /// This property corresponds to the GCC 'pure' attribute.
   bool onlyReadsMemory(const Function *F) {
     return onlyReadsMemory(getModRefBehavior(F));
   }
 
-  /// onlyReadsMemory - Return true if functions with the specified behavior are
-  /// known to only read from non-volatile memory (or not access memory at all).
-  ///
-  static bool onlyReadsMemory(ModRefBehavior MRB) {
-    return !(MRB & Mod);
+  /// Checks if functions with the specified behavior are known to only read
+  /// from non-volatile memory (or not access memory at all).
+  static bool onlyReadsMemory(FunctionModRefBehavior MRB) {
+    return !(MRB & MRI_Mod);
   }
 
-  /// onlyAccessesArgPointees - Return true if functions with the specified
-  /// behavior are known to read and write at most from objects pointed to by
-  /// their pointer-typed arguments (with arbitrary offsets).
-  ///
-  static bool onlyAccessesArgPointees(ModRefBehavior MRB) {
-    return !(MRB & Anywhere & ~ArgumentPointees);
+  /// Checks if functions with the specified behavior are known to read and
+  /// write at most from objects pointed to by their pointer-typed arguments
+  /// (with arbitrary offsets).
+  static bool onlyAccessesArgPointees(FunctionModRefBehavior MRB) {
+    return !(MRB & FMRL_Anywhere & ~FMRL_ArgumentPointees);
   }
 
-  /// doesAccessArgPointees - Return true if functions with the specified
-  /// behavior are known to potentially read or write from objects pointed
-  /// to be their pointer-typed arguments (with arbitrary offsets).
-  ///
-  static bool doesAccessArgPointees(ModRefBehavior MRB) {
-    return (MRB & ModRef) && (MRB & ArgumentPointees);
+  /// Checks if functions with the specified behavior are known to potentially
+  /// read or write from objects pointed to be their pointer-typed arguments
+  /// (with arbitrary offsets).
+  static bool doesAccessArgPointees(FunctionModRefBehavior MRB) {
+    return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees);
   }
 
-  /// getModRefInfo - Return information about whether or not an
-  /// instruction may read or write memory (without regard to a
-  /// specific location)
-  ModRefResult getModRefInfo(const Instruction *I) {
+  /// getModRefInfo (for call sites) - Return information about whether
+  /// a particular call site modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for call sites) - A convenience wrapper.
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const Value *P,
+                           uint64_t Size) {
+    return getModRefInfo(CS, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for calls) - Return information about whether
+  /// a particular call modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
+    return getModRefInfo(ImmutableCallSite(C), Loc);
+  }
+
+  /// getModRefInfo (for calls) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
+    return getModRefInfo(C, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for invokes) - Return information about whether
+  /// a particular invoke modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
+    return getModRefInfo(ImmutableCallSite(I), Loc);
+  }
+
+  /// getModRefInfo (for invokes) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const InvokeInst *I, const Value *P, uint64_t Size) {
+    return getModRefInfo(I, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for loads) - Return information about whether
+  /// a particular load modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for loads) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
+    return getModRefInfo(L, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for stores) - Return information about whether
+  /// a particular store modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for stores) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size) {
+    return getModRefInfo(S, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for fences) - Return information about whether
+  /// a particular store modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
+    // Conservatively correct.  (We could possibly be a bit smarter if
+    // Loc is a alloca that doesn't escape.)
+    return MRI_ModRef;
+  }
+
+  /// getModRefInfo (for fences) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size) {
+    return getModRefInfo(S, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for cmpxchges) - Return information about whether
+  /// a particular cmpxchg modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
+                           const MemoryLocation &Loc);
+
+  /// getModRefInfo (for cmpxchges) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX, const Value *P,
+                           unsigned Size) {
+    return getModRefInfo(CX, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for atomicrmws) - Return information about whether
+  /// a particular atomicrmw modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for atomicrmws) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const Value *P,
+                           unsigned Size) {
+    return getModRefInfo(RMW, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for va_args) - Return information about whether
+  /// a particular va_arg modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for va_args) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const VAArgInst *I, const Value *P, uint64_t Size) {
+    return getModRefInfo(I, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for catchpads) - Return information about whether
+  /// a particular catchpad modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for catchpads) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const CatchPadInst *I, const Value *P,
+                           uint64_t Size) {
+    return getModRefInfo(I, MemoryLocation(P, Size));
+  }
+
+  /// getModRefInfo (for catchrets) - Return information about whether
+  /// a particular catchret modifies or reads the specified memory location.
+  ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc);
+
+  /// getModRefInfo (for catchrets) - A convenience wrapper.
+  ModRefInfo getModRefInfo(const CatchReturnInst *I, const Value *P,
+                           uint64_t Size) {
+    return getModRefInfo(I, MemoryLocation(P, Size));
+  }
+
+  /// Check whether or not an instruction may read or write memory (without
+  /// regard to a specific location).
+  ///
+  /// For function calls, this delegates to the alias-analysis specific
+  /// call-site mod-ref behavior queries. Otherwise it delegates to the generic
+  /// mod ref information query without a location.
+  ModRefInfo getModRefInfo(const Instruction *I) {
     if (auto CS = ImmutableCallSite(I)) {
       auto MRB = getModRefBehavior(CS);
-      if (MRB & ModRef)
-        return ModRef;
-      else if (MRB & Ref)
-        return Ref;
-      else if (MRB & Mod)
-        return Mod;
-      return NoModRef;
+      if (MRB & MRI_ModRef)
+        return MRI_ModRef;
+      else if (MRB & MRI_Ref)
+        return MRI_Ref;
+      else if (MRB & MRI_Mod)
+        return MRI_Mod;
+      return MRI_NoModRef;
     }
 
     return getModRefInfo(I, MemoryLocation());
   }
 
-  /// getModRefInfo - Return information about whether or not an instruction may
-  /// read or write the specified memory location.  An instruction
-  /// that doesn't read or write memory may be trivially LICM'd for example.
-  ModRefResult getModRefInfo(const Instruction *I, const MemoryLocation &Loc) {
+  /// Check whether or not an instruction may read or write the specified
+  /// memory location.
+  ///
+  /// An instruction that doesn't read or write memory may be trivially LICM'd
+  /// for example.
+  ///
+  /// This primarily delegates to specific helpers above.
+  ModRefInfo getModRefInfo(const Instruction *I, const MemoryLocation &Loc) {
     switch (I->getOpcode()) {
     case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, Loc);
     case Instruction::Load:   return getModRefInfo((const LoadInst*)I,  Loc);
@@ -350,196 +481,476 @@ public:
       return getModRefInfo((const AtomicRMWInst*)I, Loc);
     case Instruction::Call:   return getModRefInfo((const CallInst*)I,  Loc);
     case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
-    default:                  return NoModRef;
+    case Instruction::CatchPad:
+      return getModRefInfo((const CatchPadInst *)I, Loc);
+    case Instruction::CatchRet:
+      return getModRefInfo((const CatchReturnInst *)I, Loc);
+    default:
+      return MRI_NoModRef;
     }
   }
 
-  /// getModRefInfo - A convenience wrapper.
-  ModRefResult getModRefInfo(const Instruction *I,
-                             const Value *P, uint64_t Size) {
+  /// A convenience wrapper for constructing the memory location.
+  ModRefInfo getModRefInfo(const Instruction *I, const Value *P,
+                           uint64_t Size) {
     return getModRefInfo(I, MemoryLocation(P, Size));
   }
 
-  /// getModRefInfo (for call sites) - Return information about whether
-  /// a particular call site modifies or reads the specified memory location.
-  virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
-                                     const MemoryLocation &Loc);
+  /// Return information about whether a call and an instruction may refer to
+  /// the same memory locations.
+  ModRefInfo getModRefInfo(Instruction *I, ImmutableCallSite Call);
 
-  /// getModRefInfo (for call sites) - A convenience wrapper.
-  ModRefResult getModRefInfo(ImmutableCallSite CS,
-                             const Value *P, uint64_t Size) {
-    return getModRefInfo(CS, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for calls) - Return information about whether
-  /// a particular call modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const CallInst *C, const MemoryLocation &Loc) {
-    return getModRefInfo(ImmutableCallSite(C), Loc);
-  }
-
-  /// getModRefInfo (for calls) - A convenience wrapper.
-  ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
-    return getModRefInfo(C, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for invokes) - Return information about whether
-  /// a particular invoke modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const InvokeInst *I, const MemoryLocation &Loc) {
-    return getModRefInfo(ImmutableCallSite(I), Loc);
-  }
-
-  /// getModRefInfo (for invokes) - A convenience wrapper.
-  ModRefResult getModRefInfo(const InvokeInst *I,
-                             const Value *P, uint64_t Size) {
-    return getModRefInfo(I, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for loads) - Return information about whether
-  /// a particular load modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const LoadInst *L, const MemoryLocation &Loc);
-
-  /// getModRefInfo (for loads) - A convenience wrapper.
-  ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
-    return getModRefInfo(L, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for stores) - Return information about whether
-  /// a particular store modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const StoreInst *S, const MemoryLocation &Loc);
-
-  /// getModRefInfo (for stores) - A convenience wrapper.
-  ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){
-    return getModRefInfo(S, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for fences) - Return information about whether
-  /// a particular store modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const FenceInst *S, const MemoryLocation &Loc) {
-    // Conservatively correct.  (We could possibly be a bit smarter if
-    // Loc is a alloca that doesn't escape.)
-    return ModRef;
-  }
-
-  /// getModRefInfo (for fences) - A convenience wrapper.
-  ModRefResult getModRefInfo(const FenceInst *S, const Value *P, uint64_t Size){
-    return getModRefInfo(S, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for cmpxchges) - Return information about whether
-  /// a particular cmpxchg modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
-                             const MemoryLocation &Loc);
-
-  /// getModRefInfo (for cmpxchges) - A convenience wrapper.
-  ModRefResult getModRefInfo(const AtomicCmpXchgInst *CX,
-                             const Value *P, unsigned Size) {
-    return getModRefInfo(CX, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for atomicrmws) - Return information about whether
-  /// a particular atomicrmw modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
-                             const MemoryLocation &Loc);
-
-  /// getModRefInfo (for atomicrmws) - A convenience wrapper.
-  ModRefResult getModRefInfo(const AtomicRMWInst *RMW,
-                             const Value *P, unsigned Size) {
-    return getModRefInfo(RMW, MemoryLocation(P, Size));
-  }
-
-  /// getModRefInfo (for va_args) - Return information about whether
-  /// a particular va_arg modifies or reads the specified memory location.
-  ModRefResult getModRefInfo(const VAArgInst *I, const MemoryLocation &Loc);
-
-  /// getModRefInfo (for va_args) - A convenience wrapper.
-  ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
-    return getModRefInfo(I, MemoryLocation(P, Size));
-  }
-  /// getModRefInfo - Return information about whether a call and an instruction
-  /// may refer to the same memory locations.
-  ModRefResult getModRefInfo(Instruction *I,
-                             ImmutableCallSite Call);
-
-  /// getModRefInfo - Return information about whether two call sites may refer
-  /// to the same set of memory locations.  See 
+  /// Return information about whether two call sites may refer to the same set
+  /// of memory locations. See the AA documentation for details:
   ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
-  /// for details.
-  virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                                     ImmutableCallSite CS2);
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
 
-  /// callCapturesBefore - Return information about whether a particular call 
-  /// site modifies or reads the specified memory location.
-  ModRefResult callCapturesBefore(const Instruction *I,
-                                  const MemoryLocation &MemLoc,
-                                  DominatorTree *DT);
+  /// \brief Return information about whether a particular call site modifies
+  /// or reads the specified memory location \p MemLoc before instruction \p I
+  /// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+  /// instruction ordering queries inside the BasicBlock containing \p I.
+  ModRefInfo callCapturesBefore(const Instruction *I,
+                                const MemoryLocation &MemLoc, DominatorTree *DT,
+                                OrderedBasicBlock *OBB = nullptr);
 
-  /// callCapturesBefore - A convenience wrapper.
-  ModRefResult callCapturesBefore(const Instruction *I, const Value *P,
-                                  uint64_t Size, DominatorTree *DT) {
-    return callCapturesBefore(I, MemoryLocation(P, Size), DT);
+  /// \brief A convenience wrapper to synthesize a memory location.
+  ModRefInfo callCapturesBefore(const Instruction *I, const Value *P,
+                                uint64_t Size, DominatorTree *DT,
+                                OrderedBasicBlock *OBB = nullptr) {
+    return callCapturesBefore(I, MemoryLocation(P, Size), DT, OBB);
   }
 
+  /// @}
   //===--------------------------------------------------------------------===//
-  /// Higher level methods for querying mod/ref information.
-  ///
+  /// \name Higher level methods for querying mod/ref information.
+  /// @{
 
-  /// canBasicBlockModify - Return true if it is possible for execution of the
-  /// specified basic block to modify the location Loc.
+  /// Check if it is possible for execution of the specified basic block to
+  /// modify the location Loc.
   bool canBasicBlockModify(const BasicBlock &BB, const MemoryLocation &Loc);
 
-  /// canBasicBlockModify - A convenience wrapper.
-  bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){
+  /// A convenience wrapper synthesizing a memory location.
+  bool canBasicBlockModify(const BasicBlock &BB, const Value *P,
+                           uint64_t Size) {
     return canBasicBlockModify(BB, MemoryLocation(P, Size));
   }
 
-  /// canInstructionRangeModRef - Return true if it is possible for the
-  /// execution of the specified instructions to mod\ref (according to the
-  /// mode) the location Loc. The instructions to consider are all
-  /// of the instructions in the range of [I1,I2] INCLUSIVE.
-  /// I1 and I2 must be in the same basic block.
+  /// Check if it is possible for the execution of the specified instructions
+  /// to mod\ref (according to the mode) the location Loc.
+  ///
+  /// The instructions to consider are all of the instructions in the range of
+  /// [I1,I2] INCLUSIVE. I1 and I2 must be in the same basic block.
   bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
                                  const MemoryLocation &Loc,
-                                 const ModRefResult Mode);
+                                 const ModRefInfo Mode);
 
-  /// canInstructionRangeModRef - A convenience wrapper.
-  bool canInstructionRangeModRef(const Instruction &I1,
-                                 const Instruction &I2, const Value *Ptr,
-                                 uint64_t Size, const ModRefResult Mode) {
+  /// A convenience wrapper synthesizing a memory location.
+  bool canInstructionRangeModRef(const Instruction &I1, const Instruction &I2,
+                                 const Value *Ptr, uint64_t Size,
+                                 const ModRefInfo Mode) {
     return canInstructionRangeModRef(I1, I2, MemoryLocation(Ptr, Size), Mode);
   }
 
+private:
+  class Concept;
+  template <typename T> class Model;
+
+  template <typename T> friend class AAResultBase;
+
+  std::vector<std::unique_ptr<Concept>> AAs;
+};
+
+/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
+/// pointer or reference.
+typedef AAResults AliasAnalysis;
+
+/// A private abstract base class describing the concept of an individual alias
+/// analysis implementation.
+///
+/// This interface is implemented by any \c Model instantiation. It is also the
+/// interface which a type used to instantiate the model must provide.
+///
+/// All of these methods model methods by the same name in the \c
+/// AAResults class. Only differences and specifics to how the
+/// implementations are called are documented here.
+class AAResults::Concept {
+public:
+  virtual ~Concept() = 0;
+
+  /// An update API used internally by the AAResults to provide
+  /// a handle back to the top level aggregation.
+  virtual void setAAResults(AAResults *NewAAR) = 0;
+
   //===--------------------------------------------------------------------===//
-  /// Methods that clients should call when they transform the program to allow
-  /// alias analyses to update their internal data structures.  Note that these
-  /// methods may be called on any instruction, regardless of whether or not
-  /// they have pointer-analysis implications.
-  ///
+  /// \name Alias Queries
+  /// @{
 
-  /// deleteValue - This method should be called whenever an LLVM Value is
-  /// deleted from the program, for example when an instruction is found to be
-  /// redundant and is eliminated.
-  ///
-  virtual void deleteValue(Value *V);
+  /// The main low level interface to the alias analysis implementation.
+  /// Returns an AliasResult indicating whether the two pointers are aliased to
+  /// each other. This is the interface that must be implemented by specific
+  /// alias analysis implementations.
+  virtual AliasResult alias(const MemoryLocation &LocA,
+                            const MemoryLocation &LocB) = 0;
 
-  /// addEscapingUse - This method should be used whenever an escaping use is
-  /// added to a pointer value.  Analysis implementations may either return
-  /// conservative responses for that value in the future, or may recompute
-  /// some or all internal state to continue providing precise responses.
-  ///
-  /// Escaping uses are considered by anything _except_ the following:
-  ///  - GEPs or bitcasts of the pointer
-  ///  - Loads through the pointer
-  ///  - Stores through (but not of) the pointer
-  virtual void addEscapingUse(Use &U);
+  /// Checks whether the given location points to constant memory, or if
+  /// \p OrLocal is true whether it points to a local alloca.
+  virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
+                                      bool OrLocal) = 0;
 
-  /// replaceWithNewValue - This method is the obvious combination of the two
-  /// above, and it provided as a helper to simplify client code.
-  ///
-  void replaceWithNewValue(Value *Old, Value *New) {
-    deleteValue(Old);
+  /// @}
+  //===--------------------------------------------------------------------===//
+  /// \name Simple mod/ref information
+  /// @{
+
+  /// Get the ModRef info associated with a pointer argument of a callsite. The
+  /// result's bits are set to indicate the allowed aliasing ModRef kinds. Note
+  /// that these bits do not necessarily account for the overall behavior of
+  /// the function, but rather only provide additional per-argument
+  /// information.
+  virtual ModRefInfo getArgModRefInfo(ImmutableCallSite CS,
+                                      unsigned ArgIdx) = 0;
+
+  /// Return the behavior of the given call site.
+  virtual FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) = 0;
+
+  /// Return the behavior when calling the given function.
+  virtual FunctionModRefBehavior getModRefBehavior(const Function *F) = 0;
+
+  /// getModRefInfo (for call sites) - Return information about whether
+  /// a particular call site modifies or reads the specified memory location.
+  virtual ModRefInfo getModRefInfo(ImmutableCallSite CS,
+                                   const MemoryLocation &Loc) = 0;
+
+  /// Return information about whether two call sites may refer to the same set
+  /// of memory locations. See the AA documentation for details:
+  ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
+  virtual ModRefInfo getModRefInfo(ImmutableCallSite CS1,
+                                   ImmutableCallSite CS2) = 0;
+
+  /// @}
+};
+
+/// A private class template which derives from \c Concept and wraps some other
+/// type.
+///
+/// This models the concept by directly forwarding each interface point to the
+/// wrapped type which must implement a compatible interface. This provides
+/// a type erased binding.
+template <typename AAResultT> class AAResults::Model final : public Concept {
+  AAResultT &Result;
+
+public:
+  explicit Model(AAResultT &Result, AAResults &AAR) : Result(Result) {
+    Result.setAAResults(&AAR);
+  }
+  ~Model() override {}
+
+  void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
+
+  AliasResult alias(const MemoryLocation &LocA,
+                    const MemoryLocation &LocB) override {
+    return Result.alias(LocA, LocB);
+  }
+
+  bool pointsToConstantMemory(const MemoryLocation &Loc,
+                              bool OrLocal) override {
+    return Result.pointsToConstantMemory(Loc, OrLocal);
+  }
+
+  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) override {
+    return Result.getArgModRefInfo(CS, ArgIdx);
+  }
+
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
+    return Result.getModRefBehavior(CS);
+  }
+
+  FunctionModRefBehavior getModRefBehavior(const Function *F) override {
+    return Result.getModRefBehavior(F);
+  }
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS,
+                           const MemoryLocation &Loc) override {
+    return Result.getModRefInfo(CS, Loc);
+  }
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1,
+                           ImmutableCallSite CS2) override {
+    return Result.getModRefInfo(CS1, CS2);
   }
 };
 
+/// A CRTP-driven "mixin" base class to help implement the function alias
+/// analysis results concept.
+///
+/// Because of the nature of many alias analysis implementations, they often
+/// only implement a subset of the interface. This base class will attempt to
+/// implement the remaining portions of the interface in terms of simpler forms
+/// of the interface where possible, and otherwise provide conservatively
+/// correct fallback implementations.
+///
+/// Implementors of an alias analysis should derive from this CRTP, and then
+/// override specific methods that they wish to customize. There is no need to
+/// use virtual anywhere, the CRTP base class does static dispatch to the
+/// derived type passed into it.
+template <typename DerivedT> class AAResultBase {
+  // Expose some parts of the interface only to the AAResults::Model
+  // for wrapping. Specifically, this allows the model to call our
+  // setAAResults method without exposing it as a fully public API.
+  friend class AAResults::Model<DerivedT>;
+
+  /// A pointer to the AAResults object that this AAResult is
+  /// aggregated within. May be null if not aggregated.
+  AAResults *AAR;
+
+  /// Helper to dispatch calls back through the derived type.
+  DerivedT &derived() { return static_cast<DerivedT &>(*this); }
+
+  /// A setter for the AAResults pointer, which is used to satisfy the
+  /// AAResults::Model contract.
+  void setAAResults(AAResults *NewAAR) { AAR = NewAAR; }
+
+protected:
+  /// This proxy class models a common pattern where we delegate to either the
+  /// top-level \c AAResults aggregation if one is registered, or to the
+  /// current result if none are registered.
+  class AAResultsProxy {
+    AAResults *AAR;
+    DerivedT &CurrentResult;
+
+  public:
+    AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
+        : AAR(AAR), CurrentResult(CurrentResult) {}
+
+    AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+      return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB);
+    }
+
+    bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+      return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal)
+                 : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+      return AAR ? AAR->getArgModRefInfo(CS, ArgIdx) : CurrentResult.getArgModRefInfo(CS, ArgIdx);
+    }
+
+    FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      return AAR ? AAR->getModRefBehavior(CS) : CurrentResult.getModRefBehavior(CS);
+    }
+
+    FunctionModRefBehavior getModRefBehavior(const Function *F) {
+      return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
+    }
+
+    ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
+      return AAR ? AAR->getModRefInfo(CS, Loc)
+                 : CurrentResult.getModRefInfo(CS, Loc);
+    }
+
+    ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+      return AAR ? AAR->getModRefInfo(CS1, CS2) : CurrentResult.getModRefInfo(CS1, CS2);
+    }
+  };
+
+  const TargetLibraryInfo &TLI;
+
+  explicit AAResultBase(const TargetLibraryInfo &TLI) : TLI(TLI) {}
+
+  // Provide all the copy and move constructors so that derived types aren't
+  // constrained.
+  AAResultBase(const AAResultBase &Arg) : TLI(Arg.TLI) {}
+  AAResultBase(AAResultBase &&Arg) : TLI(Arg.TLI) {}
+
+  /// Get a proxy for the best AA result set to query at this time.
+  ///
+  /// When this result is part of a larger aggregation, this will proxy to that
+  /// aggregation. When this result is used in isolation, it will just delegate
+  /// back to the derived class's implementation.
+  AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
+
+public:
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+    return MayAlias;
+  }
+
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+    return false;
+  }
+
+  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+    return MRI_ModRef;
+  }
+
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+    if (!CS.hasOperandBundles())
+      // If CS has operand bundles then aliasing attributes from the function it
+      // calls do not directly apply to the CallSite.  This can be made more
+      // precise in the future.
+      if (const Function *F = CS.getCalledFunction())
+        return getBestAAResults().getModRefBehavior(F);
+
+    return FMRB_UnknownModRefBehavior;
+  }
+
+  FunctionModRefBehavior getModRefBehavior(const Function *F) {
+    return FMRB_UnknownModRefBehavior;
+  }
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+};
+
+/// Synthesize \c ModRefInfo for a call site and memory location by examining
+/// the general behavior of the call site and any specific information for its
+/// arguments.
+///
+/// This essentially, delegates across the alias analysis interface to collect
+/// information which may be enough to (conservatively) fulfill the query.
+template <typename DerivedT>
+ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS,
+                                                 const MemoryLocation &Loc) {
+  auto MRB = getBestAAResults().getModRefBehavior(CS);
+  if (MRB == FMRB_DoesNotAccessMemory)
+    return MRI_NoModRef;
+
+  ModRefInfo Mask = MRI_ModRef;
+  if (AAResults::onlyReadsMemory(MRB))
+    Mask = MRI_Ref;
+
+  if (AAResults::onlyAccessesArgPointees(MRB)) {
+    bool DoesAlias = false;
+    ModRefInfo AllArgsMask = MRI_NoModRef;
+    if (AAResults::doesAccessArgPointees(MRB)) {
+      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(),
+                                           AE = CS.arg_end();
+           AI != AE; ++AI) {
+        const Value *Arg = *AI;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
+        MemoryLocation ArgLoc = MemoryLocation::getForArgument(CS, ArgIdx, TLI);
+        AliasResult ArgAlias = getBestAAResults().alias(ArgLoc, Loc);
+        if (ArgAlias != NoAlias) {
+          ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS, ArgIdx);
+          DoesAlias = true;
+          AllArgsMask = ModRefInfo(AllArgsMask | ArgMask);
+        }
+      }
+    }
+    if (!DoesAlias)
+      return MRI_NoModRef;
+    Mask = ModRefInfo(Mask & AllArgsMask);
+  }
+
+  // If Loc is a constant memory location, the call definitely could not
+  // modify the memory location.
+  if ((Mask & MRI_Mod) &&
+      getBestAAResults().pointsToConstantMemory(Loc, /*OrLocal*/ false))
+    Mask = ModRefInfo(Mask & ~MRI_Mod);
+
+  return Mask;
+}
+
+/// Synthesize \c ModRefInfo for two call sites by examining the general
+/// behavior of the call site and any specific information for its arguments.
+///
+/// This essentially, delegates across the alias analysis interface to collect
+/// information which may be enough to (conservatively) fulfill the query.
+template <typename DerivedT>
+ModRefInfo AAResultBase<DerivedT>::getModRefInfo(ImmutableCallSite CS1,
+                                                 ImmutableCallSite CS2) {
+  // If CS1 or CS2 are readnone, they don't interact.
+  auto CS1B = getBestAAResults().getModRefBehavior(CS1);
+  if (CS1B == FMRB_DoesNotAccessMemory)
+    return MRI_NoModRef;
+
+  auto CS2B = getBestAAResults().getModRefBehavior(CS2);
+  if (CS2B == FMRB_DoesNotAccessMemory)
+    return MRI_NoModRef;
+
+  // If they both only read from memory, there is no dependence.
+  if (AAResults::onlyReadsMemory(CS1B) && AAResults::onlyReadsMemory(CS2B))
+    return MRI_NoModRef;
+
+  ModRefInfo Mask = MRI_ModRef;
+
+  // If CS1 only reads memory, the only dependence on CS2 can be
+  // from CS1 reading memory written by CS2.
+  if (AAResults::onlyReadsMemory(CS1B))
+    Mask = ModRefInfo(Mask & MRI_Ref);
+
+  // If CS2 only access memory through arguments, accumulate the mod/ref
+  // information from CS1's references to the memory referenced by
+  // CS2's arguments.
+  if (AAResults::onlyAccessesArgPointees(CS2B)) {
+    ModRefInfo R = MRI_NoModRef;
+    if (AAResults::doesAccessArgPointees(CS2B)) {
+      for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(),
+                                           E = CS2.arg_end();
+           I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
+        auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, TLI);
+
+        // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence
+        // of CS1 on that location is the inverse.
+        ModRefInfo ArgMask =
+            getBestAAResults().getArgModRefInfo(CS2, CS2ArgIdx);
+        if (ArgMask == MRI_Mod)
+          ArgMask = MRI_ModRef;
+        else if (ArgMask == MRI_Ref)
+          ArgMask = MRI_Mod;
+
+        ArgMask = ModRefInfo(ArgMask &
+                             getBestAAResults().getModRefInfo(CS1, CS2ArgLoc));
+
+        R = ModRefInfo((R | ArgMask) & Mask);
+        if (R == Mask)
+          break;
+      }
+    }
+    return R;
+  }
+
+  // If CS1 only accesses memory through arguments, check if CS2 references
+  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+  if (AAResults::onlyAccessesArgPointees(CS1B)) {
+    ModRefInfo R = MRI_NoModRef;
+    if (AAResults::doesAccessArgPointees(CS1B)) {
+      for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(),
+                                           E = CS1.arg_end();
+           I != E; ++I) {
+        const Value *Arg = *I;
+        if (!Arg->getType()->isPointerTy())
+          continue;
+        unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
+        auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, TLI);
+
+        // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
+        // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
+        // might Ref, then we care only about a Mod by CS2.
+        ModRefInfo ArgMask = getBestAAResults().getArgModRefInfo(CS1, CS1ArgIdx);
+        ModRefInfo ArgR = getBestAAResults().getModRefInfo(CS2, CS1ArgLoc);
+        if (((ArgMask & MRI_Mod) != MRI_NoModRef &&
+             (ArgR & MRI_ModRef) != MRI_NoModRef) ||
+            ((ArgMask & MRI_Ref) != MRI_NoModRef &&
+             (ArgR & MRI_Mod) != MRI_NoModRef))
+          R = ModRefInfo((R | ArgMask) & Mask);
+
+        if (R == Mask)
+          break;
+      }
+    }
+    return R;
+  }
+
+  return Mask;
+}
+
 /// isNoAliasCall - Return true if this pointer is returned by a noalias
 /// function.
 bool isNoAliasCall(const Value *V);
@@ -564,6 +975,98 @@ bool isIdentifiedObject(const Value *V);
 /// IdentifiedObjects.
 bool isIdentifiedFunctionLocal(const Value *V);
 
+/// A manager for alias analyses.
+///
+/// This class can have analyses registered with it and when run, it will run
+/// all of them and aggregate their results into single AA results interface
+/// that dispatches across all of the alias analysis results available.
+///
+/// Note that the order in which analyses are registered is very significant.
+/// That is the order in which the results will be aggregated and queried.
+///
+/// This manager effectively wraps the AnalysisManager for registering alias
+/// analyses. When you register your alias analysis with this manager, it will
+/// ensure the analysis itself is registered with its AnalysisManager.
+class AAManager {
+public:
+  typedef AAResults Result;
+
+  // This type hase value semantics. We have to spell these out because MSVC
+  // won't synthesize them.
+  AAManager() {}
+  AAManager(AAManager &&Arg)
+      : FunctionResultGetters(std::move(Arg.FunctionResultGetters)) {}
+  AAManager(const AAManager &Arg)
+      : FunctionResultGetters(Arg.FunctionResultGetters) {}
+  AAManager &operator=(AAManager &&RHS) {
+    FunctionResultGetters = std::move(RHS.FunctionResultGetters);
+    return *this;
+  }
+  AAManager &operator=(const AAManager &RHS) {
+    FunctionResultGetters = RHS.FunctionResultGetters;
+    return *this;
+  }
+
+  /// Register a specific AA result.
+  template <typename AnalysisT> void registerFunctionAnalysis() {
+    FunctionResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>);
+  }
+
+  Result run(Function &F, AnalysisManager<Function> &AM) {
+    Result R;
+    for (auto &Getter : FunctionResultGetters)
+      (*Getter)(F, AM, R);
+    return R;
+  }
+
+private:
+  SmallVector<void (*)(Function &F, AnalysisManager<Function> &AM,
+                       AAResults &AAResults),
+              4> FunctionResultGetters;
+
+  template <typename AnalysisT>
+  static void getFunctionAAResultImpl(Function &F,
+                                      AnalysisManager<Function> &AM,
+                                      AAResults &AAResults) {
+    AAResults.addAAResult(AM.template getResult<AnalysisT>(F));
+  }
+};
+
+/// A wrapper pass to provide the legacy pass manager access to a suitably
+/// prepared AAResults object.
+class AAResultsWrapperPass : public FunctionPass {
+  std::unique_ptr<AAResults> AAR;
+
+public:
+  static char ID;
+
+  AAResultsWrapperPass();
+
+  AAResults &getAAResults() { return *AAR; }
+  const AAResults &getAAResults() const { return *AAR; }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+FunctionPass *createAAResultsWrapperPass();
+
+/// A wrapper pass around a callback which can be used to populate the
+/// AAResults in the AAResultsWrapperPass from an external AA.
+///
+/// The callback provided here will be used each time we prepare an AAResults
+/// object, and will receive a reference to the function wrapper pass, the
+/// function, and the AAResults object to populate. This should be used when
+/// setting up a custom pass pipeline to inject a hook into the AA results.
+ImmutablePass *createExternalAAWrapperPass(
+    std::function<void(Pass &, Function &, AAResults &)> Callback);
+
+/// A helper for the legacy pass manager to create a \c AAResults
+/// object populated to the best of our ability for a particular function when
+/// inside of a \c ModulePass or a \c CallGraphSCCPass.
+AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 881699d09225..37fd69b081cc 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -20,13 +20,13 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include <vector>
 
 namespace llvm {
 
-class AliasAnalysis;
 class LoadInst;
 class StoreInst;
 class VAArgInst;
@@ -42,13 +42,14 @@ class AliasSet : public ilist_node<AliasSet> {
     AliasSet *AS;
     uint64_t Size;
     AAMDNodes AAInfo;
+
   public:
     PointerRec(Value *V)
       : Val(V), PrevInList(nullptr), NextInList(nullptr), AS(nullptr), Size(0),
         AAInfo(DenseMapInfo<AAMDNodes>::getEmptyKey()) {}
 
     Value *getValue() const { return Val; }
-    
+
     PointerRec *getNext() const { return NextInList; }
     bool hasAliasSet() const { return AS != nullptr; }
 
@@ -156,7 +157,7 @@ class AliasSet : public ilist_node<AliasSet> {
     assert(i < UnknownInsts.size());
     return UnknownInsts[i];
   }
-  
+
 public:
   /// Accessors...
   bool isRef() const { return Access & RefAccess; }
@@ -190,6 +191,7 @@ public:
   class iterator : public std::iterator<std::forward_iterator_tag,
                                         PointerRec, ptrdiff_t> {
     PointerRec *CurNode;
+
   public:
     explicit iterator(PointerRec *CN = nullptr) : CurNode(CN) {}
 
@@ -282,14 +284,14 @@ inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
   return OS;
 }
 
-
 class AliasSetTracker {
   /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be
   /// notified whenever a Value is deleted.
-  class ASTCallbackVH : public CallbackVH {
+  class ASTCallbackVH final : public CallbackVH {
     AliasSetTracker *AST;
     void deleted() override;
     void allUsesReplacedWith(Value *) override;
+
   public:
     ASTCallbackVH(Value *V, AliasSetTracker *AST = nullptr);
     ASTCallbackVH &operator=(Value *V);
@@ -347,7 +349,7 @@ public:
   bool remove(Instruction *I);
   void remove(AliasSet &AS);
   bool removeUnknown(Instruction *I);
-  
+
   void clear();
 
   /// getAliasSets - Return the alias sets that are active.
@@ -398,7 +400,6 @@ public:
   ///
   void copyValue(Value *From, Value *To);
 
-
   typedef ilist<AliasSet>::iterator iterator;
   typedef ilist<AliasSet>::const_iterator const_iterator;
 
diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h
index 1f00b691b305..b903f96d55b2 100644
--- a/include/llvm/Analysis/AssumptionCache.h
+++ b/include/llvm/Analysis/AssumptionCache.h
@@ -66,7 +66,7 @@ public:
 
   /// \brief Add an @llvm.assume intrinsic to this function's cache.
   ///
-  /// The call passed in must be an instruction within this fuction and must
+  /// The call passed in must be an instruction within this function and must
   /// not already be in the cache.
   void registerAssumption(CallInst *CI);
 
@@ -79,7 +79,7 @@ public:
   }
 
   /// \brief Access the list of assumption handles currently tracked for this
-  /// fuction.
+  /// function.
   ///
   /// Note that these produce weak handles that may be null. The caller must
   /// handle that case.
@@ -140,7 +140,7 @@ public:
 class AssumptionCacheTracker : public ImmutablePass {
   /// A callback value handle applied to function objects, which we use to
   /// delete our cache of intrinsics for a function when it is deleted.
-  class FunctionCallbackVH : public CallbackVH {
+  class FunctionCallbackVH final : public CallbackVH {
     AssumptionCacheTracker *ACT;
     void deleted() override;
 
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
new file mode 100644
index 000000000000..181a9327024c
--- /dev/null
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -0,0 +1,223 @@
+//===- BasicAliasAnalysis.h - Stateless, local Alias Analysis ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for LLVM's primary stateless and local alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_BASICALIASANALYSIS_H
+#define LLVM_ANALYSIS_BASICALIASANALYSIS_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+class AssumptionCache;
+class DominatorTree;
+class LoopInfo;
+
+/// This is the AA result object for the basic, local, and stateless alias
+/// analysis. It implements the AA query interface in an entirely stateless
+/// manner. As one consequence, it is never invalidated. While it does retain
+/// some storage, that is used as an optimization and not to preserve
+/// information from query to query.
+class BasicAAResult : public AAResultBase<BasicAAResult> {
+  friend AAResultBase<BasicAAResult>;
+
+  const DataLayout &DL;
+  AssumptionCache &AC;
+  DominatorTree *DT;
+  LoopInfo *LI;
+
+public:
+  BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI,
+                AssumptionCache &AC, DominatorTree *DT = nullptr,
+                LoopInfo *LI = nullptr)
+      : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {}
+
+  BasicAAResult(const BasicAAResult &Arg)
+      : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {}
+  BasicAAResult(BasicAAResult &&Arg)
+      : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT),
+        LI(Arg.LI) {}
+
+  /// Handle invalidation events from the new pass manager.
+  ///
+  /// By definition, this result is stateless and so remains valid.
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+  /// Chases pointers until we find a (constant global) or not.
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+  /// Get the location associated with a pointer argument of a callsite.
+  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
+
+  /// Returns the behavior when calling the given call site.
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+  /// Returns the behavior when calling the given function. For use when the
+  /// call site is not known.
+  FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+private:
+  // A linear transformation of a Value; this class represents ZExt(SExt(V,
+  // SExtBits), ZExtBits) * Scale + Offset.
+  struct VariableGEPIndex {
+
+    // An opaque Value - we can't decompose this further.
+    const Value *V;
+
+    // We need to track what extensions we've done as we consider the same Value
+    // with different extensions as different variables in a GEP's linear
+    // expression;
+    // e.g.: if V == -1, then sext(x) != zext(x).
+    unsigned ZExtBits;
+    unsigned SExtBits;
+
+    int64_t Scale;
+
+    bool operator==(const VariableGEPIndex &Other) const {
+      return V == Other.V && ZExtBits == Other.ZExtBits &&
+             SExtBits == Other.SExtBits && Scale == Other.Scale;
+    }
+
+    bool operator!=(const VariableGEPIndex &Other) const {
+      return !operator==(Other);
+    }
+  };
+
+  /// Track alias queries to guard against recursion.
+  typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
+  typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
+  AliasCacheTy AliasCache;
+
+  /// Tracks phi nodes we have visited.
+  ///
+  /// When interpret "Value" pointer equality as value equality we need to make
+  /// sure that the "Value" is not part of a cycle. Otherwise, two uses could
+  /// come from different "iterations" of a cycle and see different values for
+  /// the same "Value" pointer.
+  ///
+  /// The following example shows the problem:
+  ///   %p = phi(%alloca1, %addr2)
+  ///   %l = load %ptr
+  ///   %addr1 = gep, %alloca2, 0, %l
+  ///   %addr2 = gep  %alloca2, 0, (%l + 1)
+  ///      alias(%p, %addr1) -> MayAlias !
+  ///   store %l, ...
+  SmallPtrSet<const BasicBlock *, 8> VisitedPhiBBs;
+
+  /// Tracks instructions visited by pointsToConstantMemory.
+  SmallPtrSet<const Value *, 16> Visited;
+
+  static const Value *
+  GetLinearExpression(const Value *V, APInt &Scale, APInt &Offset,
+                      unsigned &ZExtBits, unsigned &SExtBits,
+                      const DataLayout &DL, unsigned Depth, AssumptionCache *AC,
+                      DominatorTree *DT, bool &NSW, bool &NUW);
+
+  static const Value *
+  DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+                         SmallVectorImpl<VariableGEPIndex> &VarIndices,
+                         bool &MaxLookupReached, const DataLayout &DL,
+                         AssumptionCache *AC, DominatorTree *DT);
+  /// \brief A Heuristic for aliasGEP that searches for a constant offset
+  /// between the variables.
+  ///
+  /// GetLinearExpression has some limitations, as generally zext(%x + 1)
+  /// != zext(%x) + zext(1) if the arithmetic overflows. GetLinearExpression
+  /// will therefore conservatively refuse to decompose these expressions.
+  /// However, we know that, for all %x, zext(%x) != zext(%x + 1), even if
+  /// the addition overflows.
+  bool
+  constantOffsetHeuristic(const SmallVectorImpl<VariableGEPIndex> &VarIndices,
+                          uint64_t V1Size, uint64_t V2Size, int64_t BaseOffset,
+                          AssumptionCache *AC, DominatorTree *DT);
+
+  bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
+
+  void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                          const SmallVectorImpl<VariableGEPIndex> &Src);
+
+  AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+                       const AAMDNodes &V1AAInfo, const Value *V2,
+                       uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                       const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+  AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+                       const AAMDNodes &PNAAInfo, const Value *V2,
+                       uint64_t V2Size, const AAMDNodes &V2AAInfo);
+
+  AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+                          const AAMDNodes &SIAAInfo, const Value *V2,
+                          uint64_t V2Size, const AAMDNodes &V2AAInfo);
+
+  AliasResult aliasCheck(const Value *V1, uint64_t V1Size, AAMDNodes V1AATag,
+                         const Value *V2, uint64_t V2Size, AAMDNodes V2AATag);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class BasicAA {
+public:
+  typedef BasicAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  BasicAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "BasicAliasAnalysis"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the BasicAAResult object.
+class BasicAAWrapperPass : public FunctionPass {
+  std::unique_ptr<BasicAAResult> Result;
+
+  virtual void anchor();
+
+public:
+  static char ID;
+
+  BasicAAWrapperPass();
+
+  BasicAAResult &getResult() { return *Result; }
+  const BasicAAResult &getResult() const { return *Result; }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+FunctionPass *createBasicAAWrapperPass();
+
+/// A helper for the legacy pass manager to create a \c BasicAAResult object
+/// populated to the best of our ability for a particular function when inside
+/// of a \c ModulePass or a \c CallGraphSCCPass.
+BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F);
+}
+
+#endif
diff --git a/include/llvm/Analysis/BlockFrequencyInfo.h b/include/llvm/Analysis/BlockFrequencyInfo.h
index f27c32df9283..6f2a2b522769 100644
--- a/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -21,26 +21,20 @@
 namespace llvm {
 
 class BranchProbabilityInfo;
+class LoopInfo;
 template <class BlockT> class BlockFrequencyInfoImpl;
 
 /// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to
 /// estimate IR basic block frequencies.
-class BlockFrequencyInfo : public FunctionPass {
+class BlockFrequencyInfo {
   typedef BlockFrequencyInfoImpl<BasicBlock> ImplType;
   std::unique_ptr<ImplType> BFI;
 
 public:
-  static char ID;
-
   BlockFrequencyInfo();
+  BlockFrequencyInfo(const Function &F, const BranchProbabilityInfo &BPI,
+                     const LoopInfo &LI);
 
-  ~BlockFrequencyInfo() override;
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-  bool runOnFunction(Function &F) override;
-  void releaseMemory() override;
-  void print(raw_ostream &O, const Module *M) const override;
   const Function *getFunction() const;
   void view() const;
 
@@ -51,6 +45,13 @@ public:
   /// floating points.
   BlockFrequency getBlockFreq(const BasicBlock *BB) const;
 
+  // Set the frequency of the given basic block.
+  void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
+
+  /// calculate - compute block frequency info for the given function.
+  void calculate(const Function &F, const BranchProbabilityInfo &BPI,
+                 const LoopInfo &LI);
+
   // Print the block frequency Freq to OS using the current functions entry
   // frequency to convert freq into a relative decimal form.
   raw_ostream &printBlockFreq(raw_ostream &OS, const BlockFrequency Freq) const;
@@ -60,7 +61,28 @@ public:
   raw_ostream &printBlockFreq(raw_ostream &OS, const BasicBlock *BB) const;
 
   uint64_t getEntryFreq() const;
+  void releaseMemory();
+  void print(raw_ostream &OS) const;
+};
 
+/// \brief Legacy analysis pass which computes \c BlockFrequencyInfo.
+class BlockFrequencyInfoWrapperPass : public FunctionPass {
+  BlockFrequencyInfo BFI;
+
+public:
+  static char ID;
+
+  BlockFrequencyInfoWrapperPass();
+  ~BlockFrequencyInfoWrapperPass() override;
+
+  BlockFrequencyInfo &getBFI() { return BFI; }
+  const BlockFrequencyInfo &getBFI() const { return BFI; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnFunction(Function &F) override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M) const override;
 };
 
 }
diff --git a/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 32d96090f456..387e9a887d93 100644
--- a/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -84,7 +84,7 @@ public:
   /// \brief Add another mass.
   ///
   /// Adds another mass, saturating at \a isFull() rather than overflowing.
-  BlockMass &operator+=(const BlockMass &X) {
+  BlockMass &operator+=(BlockMass X) {
     uint64_t Sum = Mass + X.Mass;
     Mass = Sum < Mass ? UINT64_MAX : Sum;
     return *this;
@@ -94,23 +94,23 @@ public:
   ///
   /// Subtracts another mass, saturating at \a isEmpty() rather than
   /// undeflowing.
-  BlockMass &operator-=(const BlockMass &X) {
+  BlockMass &operator-=(BlockMass X) {
     uint64_t Diff = Mass - X.Mass;
     Mass = Diff > Mass ? 0 : Diff;
     return *this;
   }
 
-  BlockMass &operator*=(const BranchProbability &P) {
+  BlockMass &operator*=(BranchProbability P) {
     Mass = P.scale(Mass);
     return *this;
   }
 
-  bool operator==(const BlockMass &X) const { return Mass == X.Mass; }
-  bool operator!=(const BlockMass &X) const { return Mass != X.Mass; }
-  bool operator<=(const BlockMass &X) const { return Mass <= X.Mass; }
-  bool operator>=(const BlockMass &X) const { return Mass >= X.Mass; }
-  bool operator<(const BlockMass &X) const { return Mass < X.Mass; }
-  bool operator>(const BlockMass &X) const { return Mass > X.Mass; }
+  bool operator==(BlockMass X) const { return Mass == X.Mass; }
+  bool operator!=(BlockMass X) const { return Mass != X.Mass; }
+  bool operator<=(BlockMass X) const { return Mass <= X.Mass; }
+  bool operator>=(BlockMass X) const { return Mass >= X.Mass; }
+  bool operator<(BlockMass X) const { return Mass < X.Mass; }
+  bool operator>(BlockMass X) const { return Mass > X.Mass; }
 
   /// \brief Convert to scaled number.
   ///
@@ -122,20 +122,20 @@ public:
   raw_ostream &print(raw_ostream &OS) const;
 };
 
-inline BlockMass operator+(const BlockMass &L, const BlockMass &R) {
+inline BlockMass operator+(BlockMass L, BlockMass R) {
   return BlockMass(L) += R;
 }
-inline BlockMass operator-(const BlockMass &L, const BlockMass &R) {
+inline BlockMass operator-(BlockMass L, BlockMass R) {
   return BlockMass(L) -= R;
 }
-inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) {
+inline BlockMass operator*(BlockMass L, BranchProbability R) {
   return BlockMass(L) *= R;
 }
-inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) {
+inline BlockMass operator*(BranchProbability L, BlockMass R) {
   return BlockMass(R) *= L;
 }
 
-inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) {
+inline raw_ostream &operator<<(raw_ostream &OS, BlockMass X) {
   return X.print(OS);
 }
 
@@ -477,6 +477,8 @@ public:
 
   BlockFrequency getBlockFreq(const BlockNode &Node) const;
 
+  void setBlockFreq(const BlockNode &Node, uint64_t Freq);
+
   raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const;
   raw_ostream &printBlockFreq(raw_ostream &OS,
                               const BlockFrequency &Freq) const;
@@ -905,14 +907,15 @@ template <class BT> class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase {
 public:
   const FunctionT *getFunction() const { return F; }
 
-  void doFunction(const FunctionT *F, const BranchProbabilityInfoT *BPI,
-                  const LoopInfoT *LI);
+  void calculate(const FunctionT &F, const BranchProbabilityInfoT &BPI,
+                 const LoopInfoT &LI);
   BlockFrequencyInfoImpl() : BPI(nullptr), LI(nullptr), F(nullptr) {}
 
   using BlockFrequencyInfoImplBase::getEntryFreq;
   BlockFrequency getBlockFreq(const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB));
   }
+  void setBlockFreq(const BlockT *BB, uint64_t Freq);
   Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
   }
@@ -938,13 +941,13 @@ public:
 };
 
 template <class BT>
-void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
-                                            const BranchProbabilityInfoT *BPI,
-                                            const LoopInfoT *LI) {
+void BlockFrequencyInfoImpl<BT>::calculate(const FunctionT &F,
+                                           const BranchProbabilityInfoT &BPI,
+                                           const LoopInfoT &LI) {
   // Save the parameters.
-  this->BPI = BPI;
-  this->LI = LI;
-  this->F = F;
+  this->BPI = &BPI;
+  this->LI = &LI;
+  this->F = &F;
 
   // Clean up left-over data structures.
   BlockFrequencyInfoImplBase::clear();
@@ -952,8 +955,8 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
   Nodes.clear();
 
   // Initialize.
-  DEBUG(dbgs() << "\nblock-frequency: " << F->getName() << "\n================="
-               << std::string(F->getName().size(), '=') << "\n");
+  DEBUG(dbgs() << "\nblock-frequency: " << F.getName() << "\n================="
+               << std::string(F.getName().size(), '=') << "\n");
   initializeRPOT();
   initializeLoops();
 
@@ -965,8 +968,23 @@ void BlockFrequencyInfoImpl<BT>::doFunction(const FunctionT *F,
   finalizeMetrics();
 }
 
+template <class BT>
+void BlockFrequencyInfoImpl<BT>::setBlockFreq(const BlockT *BB, uint64_t Freq) {
+  if (Nodes.count(BB))
+    BlockFrequencyInfoImplBase::setBlockFreq(getNode(BB), Freq);
+  else {
+    // If BB is a newly added block after BFI is done, we need to create a new
+    // BlockNode for it assigned with a new index. The index can be determined
+    // by the size of Freqs.
+    BlockNode NewNode(Freqs.size());
+    Nodes[BB] = NewNode;
+    Freqs.emplace_back();
+    BlockFrequencyInfoImplBase::setBlockFreq(NewNode, Freq);
+  }
+}
+
 template <class BT> void BlockFrequencyInfoImpl<BT>::initializeRPOT() {
-  const BlockT *Entry = F->begin();
+  const BlockT *Entry = &F->front();
   RPOT.reserve(F->size());
   std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT));
   std::reverse(RPOT.begin(), RPOT.end());
@@ -1155,6 +1173,13 @@ void BlockFrequencyInfoImpl<BT>::computeIrreducibleMass(
   updateLoopWithIrreducible(*OuterLoop);
 }
 
+namespace {
+// A helper function that converts a branch probability into weight.
+inline uint32_t getWeightFromBranchProb(const BranchProbability Prob) {
+  return Prob.getNumerator();
+}
+} // namespace
+
 template <class BT>
 bool
 BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
@@ -1171,10 +1196,8 @@ BlockFrequencyInfoImpl<BT>::propagateMassToSuccessors(LoopData *OuterLoop,
     const BlockT *BB = getBlock(Node);
     for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB);
          SI != SE; ++SI)
-      // Do not dereference SI, or getEdgeWeight() is linear in the number of
-      // successors.
       if (!addToDist(Dist, OuterLoop, Node, getNode(*SI),
-                     BPI->getEdgeWeight(BB, SI)))
+                     getWeightFromBranchProb(BPI->getEdgeProbability(BB, SI))))
         // Irreducible backedge.
         return false;
   }
@@ -1190,10 +1213,11 @@ raw_ostream &BlockFrequencyInfoImpl<BT>::print(raw_ostream &OS) const {
   if (!F)
     return OS;
   OS << "block-frequency-info: " << F->getName() << "\n";
-  for (const BlockT &BB : *F)
-    OS << " - " << bfi_detail::getBlockName(&BB)
-       << ": float = " << getFloatingBlockFreq(&BB)
-       << ", int = " << getBlockFreq(&BB).getFrequency() << "\n";
+  for (const BlockT &BB : *F) {
+    OS << " - " << bfi_detail::getBlockName(&BB) << ": float = ";
+    getFloatingBlockFreq(&BB).print(OS, 5)
+        << ", int = " << getBlockFreq(&BB).getFrequency() << "\n";
+  }
 
   // Add an extra newline for readability.
   OS << "\n";
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 9d867567ba29..cfdf218491bd 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -25,9 +25,9 @@ namespace llvm {
 class LoopInfo;
 class raw_ostream;
 
-/// \brief Analysis pass providing branch probability information.
+/// \brief Analysis providing branch probability information.
 ///
-/// This is a function analysis pass which provides information on the relative
+/// This is a function analysis which provides information on the relative
 /// probabilities of each "edge" in the function's CFG where such an edge is
 /// defined by a pair (PredBlock and an index in the successors). The
 /// probability of an edge from one block is always relative to the
@@ -37,20 +37,14 @@ class raw_ostream;
 /// identify an edge, since we can have multiple edges from Src to Dst.
 /// As an example, we can have a switch which jumps to Dst with value 0 and
 /// value 10.
-class BranchProbabilityInfo : public FunctionPass {
+class BranchProbabilityInfo {
 public:
-  static char ID;
+  BranchProbabilityInfo() {}
+  BranchProbabilityInfo(Function &F, const LoopInfo &LI) { calculate(F, LI); }
 
-  BranchProbabilityInfo() : FunctionPass(ID) {
-    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
-  }
+  void releaseMemory();
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool runOnFunction(Function &F) override;
-
-  void releaseMemory() override;
-
-  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+  void print(raw_ostream &OS) const;
 
   /// \brief Get an edge's probability, relative to other out-edges of the Src.
   ///
@@ -67,6 +61,9 @@ public:
   BranchProbability getEdgeProbability(const BasicBlock *Src,
                                        const BasicBlock *Dst) const;
 
+  BranchProbability getEdgeProbability(const BasicBlock *Src,
+                                       succ_const_iterator Dst) const;
+
   /// \brief Test if an edge is hot relative to other out-edges of the Src.
   ///
   /// Check whether this edge out of the source block is 'hot'. We define hot
@@ -87,37 +84,22 @@ public:
   raw_ostream &printEdgeProbability(raw_ostream &OS, const BasicBlock *Src,
                                     const BasicBlock *Dst) const;
 
-  /// \brief Get the raw edge weight calculated for the edge.
+  /// \brief Set the raw edge probability for the given edge.
   ///
-  /// This returns the raw edge weight. It is guaranteed to fall between 1 and
-  /// UINT32_MAX. Note that the raw edge weight is not meaningful in isolation.
-  /// This interface should be very carefully, and primarily by routines that
-  /// are updating the analysis by later calling setEdgeWeight.
-  uint32_t getEdgeWeight(const BasicBlock *Src,
-                         unsigned IndexInSuccessors) const;
-
-  /// \brief Get the raw edge weight calculated for the block pair.
-  ///
-  /// This returns the sum of all raw edge weights from Src to Dst.
-  /// It is guaranteed to fall between 1 and UINT32_MAX.
-  uint32_t getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const;
-
-  uint32_t getEdgeWeight(const BasicBlock *Src,
-                         succ_const_iterator Dst) const;
-
-  /// \brief Set the raw edge weight for a given edge.
-  ///
-  /// This allows a pass to explicitly set the edge weight for an edge. It can
-  /// be used when updating the CFG to update and preserve the branch
+  /// This allows a pass to explicitly set the edge probability for an edge. It
+  /// can be used when updating the CFG to update and preserve the branch
   /// probability information. Read the implementation of how these edge
-  /// weights are calculated carefully before using!
-  void setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
-                     uint32_t Weight);
+  /// probabilities are calculated carefully before using!
+  void setEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors,
+                          BranchProbability Prob);
 
-  static uint32_t getBranchWeightStackProtector(bool IsLikely) {
-    return IsLikely ? (1u << 20) - 1 : 1;
+  static BranchProbability getBranchProbStackProtector(bool IsLikely) {
+    static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20);
+    return IsLikely ? LikelyProb : LikelyProb.getCompl();
   }
 
+  void calculate(Function &F, const LoopInfo& LI);
+
 private:
   // Since we allow duplicate edges from one basic block to another, we use
   // a pair (PredBlock and an index in the successors) to specify an edge.
@@ -131,10 +113,7 @@ private:
   // weight to just "inherit" the non-zero weight of an adjacent successor.
   static const uint32_t DEFAULT_WEIGHT = 16;
 
-  DenseMap<Edge, uint32_t> Weights;
-
-  /// \brief Handle to the LoopInfo analysis.
-  LoopInfo *LI;
+  DenseMap<Edge, BranchProbability> Probs;
 
   /// \brief Track the last function we run over for printing.
   Function *LastF;
@@ -145,19 +124,37 @@ private:
   /// \brief Track the set of blocks that always lead to a cold call.
   SmallPtrSet<BasicBlock *, 16> PostDominatedByColdCall;
 
-  /// \brief Get sum of the block successors' weights.
-  uint32_t getSumForBlock(const BasicBlock *BB) const;
-
   bool calcUnreachableHeuristics(BasicBlock *BB);
   bool calcMetadataWeights(BasicBlock *BB);
   bool calcColdCallHeuristics(BasicBlock *BB);
   bool calcPointerHeuristics(BasicBlock *BB);
-  bool calcLoopBranchHeuristics(BasicBlock *BB);
+  bool calcLoopBranchHeuristics(BasicBlock *BB, const LoopInfo &LI);
   bool calcZeroHeuristics(BasicBlock *BB);
   bool calcFloatingPointHeuristics(BasicBlock *BB);
   bool calcInvokeHeuristics(BasicBlock *BB);
 };
 
+/// \brief Legacy analysis pass which computes \c BranchProbabilityInfo.
+class BranchProbabilityInfoWrapperPass : public FunctionPass {
+  BranchProbabilityInfo BPI;
+
+public:
+  static char ID;
+
+  BranchProbabilityInfoWrapperPass() : FunctionPass(ID) {
+    initializeBranchProbabilityInfoWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  BranchProbabilityInfo &getBPI() { return BPI; }
+  const BranchProbabilityInfo &getBPI() const { return BPI; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M = nullptr) const override;
+};
+
 }
 
 #endif
diff --git a/include/llvm/Analysis/CFG.h b/include/llvm/Analysis/CFG.h
index 7c4df780198c..35165f4061f1 100644
--- a/include/llvm/Analysis/CFG.h
+++ b/include/llvm/Analysis/CFG.h
@@ -40,7 +40,7 @@ void FindFunctionBackedges(
 /// Search for the specified successor of basic block BB and return its position
 /// in the terminator instruction's list of successors.  It is an error to call
 /// this with a block that is not a successor.
-unsigned GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ);
+unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
 
 /// Return true if the specified edge is a critical edge. Critical edges are
 /// edges from a block with multiple successors to a block with multiple
diff --git a/include/llvm/Analysis/CFLAliasAnalysis.h b/include/llvm/Analysis/CFLAliasAnalysis.h
new file mode 100644
index 000000000000..7473a454ab30
--- /dev/null
+++ b/include/llvm/Analysis/CFLAliasAnalysis.h
@@ -0,0 +1,158 @@
+//===- CFLAliasAnalysis.h - CFL-Based Alias Analysis Interface ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for LLVM's primary stateless and local alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CFLALIASANALYSIS_H
+#define LLVM_ANALYSIS_CFLALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include <forward_list>
+
+namespace llvm {
+
+class CFLAAResult : public AAResultBase<CFLAAResult> {
+  friend AAResultBase<CFLAAResult>;
+
+  struct FunctionInfo;
+
+public:
+  explicit CFLAAResult(const TargetLibraryInfo &TLI);
+  CFLAAResult(CFLAAResult &&Arg);
+
+  /// Handle invalidation events from the new pass manager.
+  ///
+  /// By definition, this result is stateless and so remains valid.
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  /// \brief Inserts the given Function into the cache.
+  void scan(Function *Fn);
+
+  void evict(Function *Fn);
+
+  /// \brief Ensures that the given function is available in the cache.
+  /// Returns the appropriate entry from the cache.
+  const Optional<FunctionInfo> &ensureCached(Function *Fn);
+
+  AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+    if (LocA.Ptr == LocB.Ptr) {
+      if (LocA.Size == LocB.Size) {
+        return MustAlias;
+      } else {
+        return PartialAlias;
+      }
+    }
+
+    // Comparisons between global variables and other constants should be
+    // handled by BasicAA.
+    // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
+    // a GlobalValue and ConstantExpr, but every query needs to have at least
+    // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
+    // are.
+    if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
+      return AAResultBase::alias(LocA, LocB);
+    }
+
+    AliasResult QueryResult = query(LocA, LocB);
+    if (QueryResult == MayAlias)
+      return AAResultBase::alias(LocA, LocB);
+
+    return QueryResult;
+  }
+
+private:
+  struct FunctionHandle final : public CallbackVH {
+    FunctionHandle(Function *Fn, CFLAAResult *Result)
+        : CallbackVH(Fn), Result(Result) {
+      assert(Fn != nullptr);
+      assert(Result != nullptr);
+    }
+
+    void deleted() override { removeSelfFromCache(); }
+    void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
+
+  private:
+    CFLAAResult *Result;
+
+    void removeSelfFromCache() {
+      assert(Result != nullptr);
+      auto *Val = getValPtr();
+      Result->evict(cast<Function>(Val));
+      setValPtr(nullptr);
+    }
+  };
+
+  /// \brief Cached mapping of Functions to their StratifiedSets.
+  /// If a function's sets are currently being built, it is marked
+  /// in the cache as an Optional without a value. This way, if we
+  /// have any kind of recursion, it is discernable from a function
+  /// that simply has empty sets.
+  DenseMap<Function *, Optional<FunctionInfo>> Cache;
+  std::forward_list<FunctionHandle> Handles;
+
+  FunctionInfo buildSetsFrom(Function *F);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+///
+/// FIXME: We really should refactor CFL to use the analysis more heavily, and
+/// in particular to leverage invalidation to trigger re-computation of sets.
+class CFLAA {
+public:
+  typedef CFLAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  CFLAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "CFLAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the CFLAAResult object.
+class CFLAAWrapperPass : public ImmutablePass {
+  std::unique_ptr<CFLAAResult> Result;
+
+public:
+  static char ID;
+
+  CFLAAWrapperPass();
+
+  CFLAAResult &getResult() { return *Result; }
+  const CFLAAResult &getResult() const { return *Result; }
+
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createCFLAAWrapperPass - This pass implements a set-based approach to
+// alias analysis.
+//
+ImmutablePass *createCFLAAWrapperPass();
+}
+
+#endif
diff --git a/include/llvm/Analysis/CGSCCPassManager.h b/include/llvm/Analysis/CGSCCPassManager.h
index 6a406cd24402..e7635eb1ab67 100644
--- a/include/llvm/Analysis/CGSCCPassManager.h
+++ b/include/llvm/Analysis/CGSCCPassManager.h
@@ -358,7 +358,7 @@ private:
 /// returned PreservedAnalysis set.
 class CGSCCAnalysisManagerFunctionProxy {
 public:
-  /// \brief Result proxy object for \c ModuleAnalysisManagerFunctionProxy.
+  /// \brief Result proxy object for \c CGSCCAnalysisManagerFunctionProxy.
   class Result {
   public:
     explicit Result(const CGSCCAnalysisManager &CGAM) : CGAM(&CGAM) {}
diff --git a/include/llvm/Analysis/CallGraph.h b/include/llvm/Analysis/CallGraph.h
index 662ae0e6363c..5562e9b9465f 100644
--- a/include/llvm/Analysis/CallGraph.h
+++ b/include/llvm/Analysis/CallGraph.h
@@ -75,7 +75,8 @@ class CallGraphNode;
 class CallGraph {
   Module &M;
 
-  typedef std::map<const Function *, CallGraphNode *> FunctionMapTy;
+  typedef std::map<const Function *, std::unique_ptr<CallGraphNode>>
+      FunctionMapTy;
 
   /// \brief A map from \c Function* to \c CallGraphNode*.
   FunctionMapTy FunctionMap;
@@ -90,7 +91,7 @@ class CallGraph {
 
   /// \brief This node has edges to it from all functions making indirect calls
   /// or calling an external function.
-  CallGraphNode *CallsExternalNode;
+  std::unique_ptr<CallGraphNode> CallsExternalNode;
 
   /// \brief Replace the function represented by this node by another.
   ///
@@ -104,7 +105,8 @@ class CallGraph {
   void addToCallGraph(Function *F);
 
 public:
-  CallGraph(Module &M);
+  explicit CallGraph(Module &M);
+  CallGraph(CallGraph &&Arg);
   ~CallGraph();
 
   void print(raw_ostream &OS) const;
@@ -125,21 +127,23 @@ public:
   inline const CallGraphNode *operator[](const Function *F) const {
     const_iterator I = FunctionMap.find(F);
     assert(I != FunctionMap.end() && "Function not in callgraph!");
-    return I->second;
+    return I->second.get();
   }
 
   /// \brief Returns the call graph node for the provided function.
   inline CallGraphNode *operator[](const Function *F) {
     const_iterator I = FunctionMap.find(F);
     assert(I != FunctionMap.end() && "Function not in callgraph!");
-    return I->second;
+    return I->second.get();
   }
 
   /// \brief Returns the \c CallGraphNode which is used to represent
   /// undetermined calls into the callgraph.
   CallGraphNode *getExternalCallingNode() const { return ExternalCallingNode; }
 
-  CallGraphNode *getCallsExternalNode() const { return CallsExternalNode; }
+  CallGraphNode *getCallsExternalNode() const {
+    return CallsExternalNode.get();
+  }
 
   //===---------------------------------------------------------------------
   // Functions to keep a call graph up to date with a function that has been
@@ -444,8 +448,10 @@ struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
   static NodeType *getEntryNode(CallGraph *CGN) {
     return CGN->getExternalCallingNode(); // Start at the external node!
   }
-  typedef std::pair<const Function *, CallGraphNode *> PairTy;
-  typedef std::pointer_to_unary_function<PairTy, CallGraphNode &> DerefFun;
+  typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
+      PairTy;
+  typedef std::pointer_to_unary_function<const PairTy &, CallGraphNode &>
+      DerefFun;
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
   typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
@@ -456,7 +462,7 @@ struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
     return map_iterator(CG->end(), DerefFun(CGdereference));
   }
 
-  static CallGraphNode &CGdereference(PairTy P) { return *P.second; }
+  static CallGraphNode &CGdereference(const PairTy &P) { return *P.second; }
 };
 
 template <>
@@ -465,8 +471,9 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
   static NodeType *getEntryNode(const CallGraph *CGN) {
     return CGN->getExternalCallingNode(); // Start at the external node!
   }
-  typedef std::pair<const Function *, const CallGraphNode *> PairTy;
-  typedef std::pointer_to_unary_function<PairTy, const CallGraphNode &>
+  typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
+      PairTy;
+  typedef std::pointer_to_unary_function<const PairTy &, const CallGraphNode &>
       DerefFun;
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
@@ -478,7 +485,9 @@ struct GraphTraits<const CallGraph *> : public GraphTraits<
     return map_iterator(CG->end(), DerefFun(CGdereference));
   }
 
-  static const CallGraphNode &CGdereference(PairTy P) { return *P.second; }
+  static const CallGraphNode &CGdereference(const PairTy &P) {
+    return *P.second;
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/CallGraphSCCPass.h b/include/llvm/Analysis/CallGraphSCCPass.h
index 667e1715775f..9c7f7bd34cce 100644
--- a/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/include/llvm/Analysis/CallGraphSCCPass.h
@@ -30,7 +30,7 @@ class CallGraphNode;
 class CallGraph;
 class PMStack;
 class CallGraphSCC;
-  
+
 class CallGraphSCCPass : public Pass {
 public:
   explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
@@ -79,25 +79,26 @@ public:
   void getAnalysisUsage(AnalysisUsage &Info) const override;
 };
 
-/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. 
+/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
 class CallGraphSCC {
   void *Context; // The CGPassManager object that is vending this.
   std::vector<CallGraphNode*> Nodes;
+
 public:
   CallGraphSCC(void *context) : Context(context) {}
-  
-  void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
+
+  void initialize(CallGraphNode *const *I, CallGraphNode *const *E) {
     Nodes.assign(I, E);
   }
-  
+
   bool isSingular() const { return Nodes.size() == 1; }
   unsigned size() const { return Nodes.size(); }
-  
+
   /// ReplaceNode - This informs the SCC and the pass manager that the specified
   /// Old node has been deleted, and New is to be used in its place.
   void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
-  
-  typedef std::vector<CallGraphNode*>::const_iterator iterator;
+
+  typedef std::vector<CallGraphNode *>::const_iterator iterator;
   iterator begin() const { return Nodes.begin(); }
   iterator end() const { return Nodes.end(); }
 };
diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h
index 8b7c7a90f7c0..8d2c095d8585 100644
--- a/include/llvm/Analysis/CaptureTracking.h
+++ b/include/llvm/Analysis/CaptureTracking.h
@@ -20,6 +20,7 @@ namespace llvm {
   class Use;
   class Instruction;
   class DominatorTree;
+  class OrderedBasicBlock;
 
   /// PointerMayBeCaptured - Return true if this pointer value may be captured
   /// by the enclosing function (which is required to exist).  This routine can
@@ -41,10 +42,12 @@ namespace llvm {
   /// it or not.  The boolean StoreCaptures specified whether storing the value
   /// (or part of it) into memory anywhere automatically counts as capturing it
   /// or not. Captures by the provided instruction are considered if the
-  /// final parameter is true.
+  /// final parameter is true. An ordered basic block in \p OBB could be used
+  /// to speed up capture-tracker queries.
   bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
                                   bool StoreCaptures, const Instruction *I,
-                                  DominatorTree *DT, bool IncludeI = false);
+                                  DominatorTree *DT, bool IncludeI = false,
+                                  OrderedBasicBlock *OBB = nullptr);
 
   /// This callback is used in conjunction with PointerMayBeCaptured. In
   /// addition to the interface here, you'll need to provide your own getters
diff --git a/include/llvm/Analysis/DOTGraphTraitsPass.h b/include/llvm/Analysis/DOTGraphTraitsPass.h
index cb74e9f32d3d..ca50ee2f829a 100644
--- a/include/llvm/Analysis/DOTGraphTraitsPass.h
+++ b/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -36,8 +36,23 @@ public:
   DOTGraphTraitsViewer(StringRef GraphName, char &ID)
       : FunctionPass(ID), Name(GraphName) {}
 
+  /// @brief Return true if this function should be processed.
+  ///
+  /// An implementation of this class my override this function to indicate that
+  /// only certain functions should be viewed.
+  ///
+  /// @param Analysis The current analysis result for this function.
+  virtual bool processFunction(Function &F, AnalysisT &Analysis) {
+    return true;
+  }
+
   bool runOnFunction(Function &F) override {
-    GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
+    auto &Analysis = getAnalysis<AnalysisT>();
+
+    if (!processFunction(F, Analysis))
+      return false;
+
+    GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
     std::string GraphName = DOTGraphTraits<GraphT>::getGraphName(Graph);
     std::string Title = GraphName + " for '" + F.getName().str() + "' function";
 
@@ -63,8 +78,23 @@ public:
   DOTGraphTraitsPrinter(StringRef GraphName, char &ID)
       : FunctionPass(ID), Name(GraphName) {}
 
+  /// @brief Return true if this function should be processed.
+  ///
+  /// An implementation of this class my override this function to indicate that
+  /// only certain functions should be printed.
+  ///
+  /// @param Analysis The current analysis result for this function.
+  virtual bool processFunction(Function &F, AnalysisT &Analysis) {
+    return true;
+  }
+
   bool runOnFunction(Function &F) override {
-    GraphT Graph = AnalysisGraphTraitsT::getGraph(&getAnalysis<AnalysisT>());
+    auto &Analysis = getAnalysis<AnalysisT>();
+
+    if (!processFunction(F, Analysis))
+      return false;
+
+    GraphT Graph = AnalysisGraphTraitsT::getGraph(&Analysis);
     std::string Filename = Name + "." + F.getName().str() + ".dot";
     std::error_code EC;
 
diff --git a/include/llvm/Analysis/DemandedBits.h b/include/llvm/Analysis/DemandedBits.h
new file mode 100644
index 000000000000..42932bfd3491
--- /dev/null
+++ b/include/llvm/Analysis/DemandedBits.h
@@ -0,0 +1,75 @@
+//===-- llvm/Analysis/DemandedBits.h - Determine demanded bits --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+//   %1 = add i32 %x, %y
+//   %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEMANDED_BITS_H
+#define LLVM_ANALYSIS_DEMANDED_BITS_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class FunctionPass;
+class Function;
+class Instruction;
+class DominatorTree;
+class AssumptionCache;
+
+struct DemandedBits : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  DemandedBits();
+
+  bool runOnFunction(Function& F) override;
+  void getAnalysisUsage(AnalysisUsage& AU) const override;
+  void print(raw_ostream &OS, const Module *M) const override;
+  
+  /// Return the bits demanded from instruction I.
+  APInt getDemandedBits(Instruction *I);
+
+  /// Return true if, during analysis, I could not be reached.
+  bool isInstructionDead(Instruction *I);
+
+private:
+  void performAnalysis();
+  void determineLiveOperandBits(const Instruction *UserI,
+                                const Instruction *I, unsigned OperandNo,
+                                const APInt &AOut, APInt &AB,
+                                APInt &KnownZero, APInt &KnownOne,
+                                APInt &KnownZero2, APInt &KnownOne2);
+
+  AssumptionCache *AC;
+  DominatorTree *DT;
+  Function *F;
+  bool Analyzed;
+
+  // The set of visited instructions (non-integer-typed only).
+  SmallPtrSet<Instruction*, 128> Visited;
+  DenseMap<Instruction *, APInt> AliveBits;
+};
+
+/// Create a demanded bits analysis pass.
+FunctionPass *createDemandedBitsPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/DependenceAnalysis.h b/include/llvm/Analysis/DependenceAnalysis.h
index a08ce574ea56..5290552b41dc 100644
--- a/include/llvm/Analysis/DependenceAnalysis.h
+++ b/include/llvm/Analysis/DependenceAnalysis.h
@@ -42,11 +42,11 @@
 
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-  class AliasAnalysis;
   class Loop;
   class LoopInfo;
   class ScalarEvolution;
@@ -69,6 +69,15 @@ namespace llvm {
   /// as singly-linked lists, with the "next" fields stored in the dependence
   /// itelf.
   class Dependence {
+  protected:
+    Dependence(const Dependence &) = default;
+
+    // FIXME: When we move to MSVC 2015 as the base compiler for Visual Studio
+    // support, uncomment this line to allow a defaulted move constructor for
+    // Dependence. Currently, FullDependence relies on the copy constructor, but
+    // that is acceptable given the triviality of the class.
+    // Dependence(Dependence &&) = default;
+
   public:
     Dependence(Instruction *Source,
                Instruction *Destination) :
@@ -176,38 +185,30 @@ namespace llvm {
 
     /// getNextPredecessor - Returns the value of the NextPredecessor
     /// field.
-    const Dependence *getNextPredecessor() const {
-      return NextPredecessor;
-    }
-    
+    const Dependence *getNextPredecessor() const { return NextPredecessor; }
+
     /// getNextSuccessor - Returns the value of the NextSuccessor
     /// field.
-    const Dependence *getNextSuccessor() const {
-      return NextSuccessor;
-    }
-    
+    const Dependence *getNextSuccessor() const { return NextSuccessor; }
+
     /// setNextPredecessor - Sets the value of the NextPredecessor
     /// field.
-    void setNextPredecessor(const Dependence *pred) {
-      NextPredecessor = pred;
-    }
-    
+    void setNextPredecessor(const Dependence *pred) { NextPredecessor = pred; }
+
     /// setNextSuccessor - Sets the value of the NextSuccessor
     /// field.
-    void setNextSuccessor(const Dependence *succ) {
-      NextSuccessor = succ;
-    }
-    
+    void setNextSuccessor(const Dependence *succ) { NextSuccessor = succ; }
+
     /// dump - For debugging purposes, dumps a dependence to OS.
     ///
     void dump(raw_ostream &OS) const;
+
   private:
     Instruction *Src, *Dst;
     const Dependence *NextPredecessor, *NextSuccessor;
     friend class DependenceAnalysis;
   };
 
-
   /// FullDependence - This class represents a dependence between two memory
   /// references in a function. It contains detailed information about the
   /// dependence (direction vectors, etc.) and is used when the compiler is
@@ -216,11 +217,15 @@ namespace llvm {
   /// (for output, flow, and anti dependences), the dependence implies an
   /// ordering, where the source must precede the destination; in contrast,
   /// input dependences are unordered.
-  class FullDependence : public Dependence {
+  class FullDependence final : public Dependence {
   public:
     FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent,
                    unsigned Levels);
-    ~FullDependence() override { delete[] DV; }
+
+    FullDependence(FullDependence &&RHS)
+        : Dependence(std::move(RHS)), Levels(RHS.Levels),
+          LoopIndependent(RHS.LoopIndependent), Consistent(RHS.Consistent),
+          DV(std::move(RHS.DV)) {}
 
     /// isLoopIndependent - Returns true if this is a loop-independent
     /// dependence.
@@ -268,16 +273,16 @@ namespace llvm {
     unsigned short Levels;
     bool LoopIndependent;
     bool Consistent; // Init to true, then refine.
-    DVEntry *DV;
+    std::unique_ptr<DVEntry[]> DV;
     friend class DependenceAnalysis;
   };
 
-
   /// DependenceAnalysis - This class is the main dependence-analysis driver.
   ///
   class DependenceAnalysis : public FunctionPass {
     void operator=(const DependenceAnalysis &) = delete;
     DependenceAnalysis(const DependenceAnalysis &) = delete;
+
   public:
     /// depends - Tests for a dependence between the Src and Dst instructions.
     /// Returns NULL if no dependence; otherwise, returns a Dependence (or a
@@ -387,6 +392,7 @@ namespace llvm {
       const SCEV *B;
       const SCEV *C;
       const Loop *AssociatedLoop;
+
     public:
       /// isEmpty - Return true if the constraint is of kind Empty.
       bool isEmpty() const { return Kind == Empty; }
@@ -453,7 +459,6 @@ namespace llvm {
       void dump(raw_ostream &OS) const;
     };
 
-
     /// establishNestingLevels - Examines the loop nesting of the Src and Dst
     /// instructions and establishes their shared loops. Sets the variables
     /// CommonLevels, SrcLevels, and MaxLevels.
@@ -521,10 +526,10 @@ namespace llvm {
     /// in LoopNest.
     bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const;
 
-    /// Makes sure all subscript pairs share the same integer type by 
+    /// Makes sure all subscript pairs share the same integer type by
     /// sign-extending as necessary.
     /// Sign-extending a subscript is safe because getelementptr assumes the
-    /// array subscripts are signed. 
+    /// array subscripts are signed.
     void unifySubscriptType(ArrayRef<Subscript *> Pairs);
 
     /// removeMatchingExtensions - Examines a subscript pair.
@@ -806,7 +811,6 @@ namespace llvm {
                                const SCEV *Delta) const;
 
     /// testBounds - Returns true iff the current bounds are plausible.
-    ///
     bool testBounds(unsigned char DirKind,
                     unsigned Level,
                     BoundInfo *Bound,
@@ -913,9 +917,8 @@ namespace llvm {
     void updateDirection(Dependence::DVEntry &Level,
                          const Constraint &CurConstraint) const;
 
-    bool tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
-                        SmallVectorImpl<Subscript> &Pair,
-                        const SCEV *ElementSize);
+    bool tryDelinearize(Instruction *Src, Instruction *Dst,
+                        SmallVectorImpl<Subscript> &Pair);
 
   public:
     static char ID; // Class identification, replacement for typeinfo
diff --git a/include/llvm/Analysis/DivergenceAnalysis.h b/include/llvm/Analysis/DivergenceAnalysis.h
new file mode 100644
index 000000000000..aa2de571ba1b
--- /dev/null
+++ b/include/llvm/Analysis/DivergenceAnalysis.h
@@ -0,0 +1,48 @@
+//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The divergence analysis is an LLVM pass which can be used to find out
+// if a branch instruction in a GPU program is divergent or not. It can help
+// branch optimizations such as jump threading and loop unswitching to make
+// better decisions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class Value;
+class DivergenceAnalysis : public FunctionPass {
+public:
+  static char ID;
+
+  DivergenceAnalysis() : FunctionPass(ID) {
+    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  bool runOnFunction(Function &F) override;
+
+  // Print all divergent branches in the function.
+  void print(raw_ostream &OS, const Module *) const override;
+
+  // Returns true if V is divergent.
+  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
+
+  // Returns true if V is uniform/non-divergent.
+  bool isUniform(const Value *V) const { return !isDivergent(V); }
+
+private:
+  // Stores all divergent values.
+  DenseSet<const Value *> DivergentValues;
+};
+} // End llvm namespace
diff --git a/include/llvm/Analysis/EHPersonalities.h b/include/llvm/Analysis/EHPersonalities.h
new file mode 100644
index 000000000000..59e9672b88e5
--- /dev/null
+++ b/include/llvm/Analysis/EHPersonalities.h
@@ -0,0 +1,94 @@
+//===- EHPersonalities.h - Compute EH-related information -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_EHPERSONALITIES_H
+#define LLVM_ANALYSIS_EHPERSONALITIES_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+class BasicBlock;
+class Function;
+class Value;
+
+enum class EHPersonality {
+  Unknown,
+  GNU_Ada,
+  GNU_C,
+  GNU_CXX,
+  GNU_ObjC,
+  MSVC_X86SEH,
+  MSVC_Win64SEH,
+  MSVC_CXX,
+  CoreCLR
+};
+
+/// \brief See if the given exception handling personality function is one
+/// that we understand.  If so, return a description of it; otherwise return
+/// Unknown.
+EHPersonality classifyEHPersonality(const Value *Pers);
+
+/// \brief Returns true if this personality function catches asynchronous
+/// exceptions.
+inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
+  // The two SEH personality functions can catch asynch exceptions. We assume
+  // unknown personalities don't catch asynch exceptions.
+  switch (Pers) {
+  case EHPersonality::MSVC_X86SEH:
+  case EHPersonality::MSVC_Win64SEH:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+/// \brief Returns true if this is a personality function that invokes
+/// handler funclets (which must return to it).
+inline bool isFuncletEHPersonality(EHPersonality Pers) {
+  switch (Pers) {
+  case EHPersonality::MSVC_CXX:
+  case EHPersonality::MSVC_X86SEH:
+  case EHPersonality::MSVC_Win64SEH:
+  case EHPersonality::CoreCLR:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+/// \brief Return true if this personality may be safely removed if there
+/// are no invoke instructions remaining in the current function.
+inline bool isNoOpWithoutInvoke(EHPersonality Pers) {
+  switch (Pers) {
+  case EHPersonality::Unknown:
+    return false;
+  // All known personalities currently have this behavior
+  default:
+    return true;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+bool canSimplifyInvokeNoUnwind(const Function *F);
+
+typedef TinyPtrVector<BasicBlock *> ColorVector;
+
+/// \brief If an EH funclet personality is in use (see isFuncletEHPersonality),
+/// this will recompute which blocks are in which funclet. It is possible that
+/// some blocks are in multiple funclets. Consider this analysis to be
+/// expensive.
+DenseMap<BasicBlock *, ColorVector> colorEHFunclets(Function &F);
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/GlobalsModRef.h b/include/llvm/Analysis/GlobalsModRef.h
new file mode 100644
index 000000000000..bcd102e7ded2
--- /dev/null
+++ b/include/llvm/Analysis/GlobalsModRef.h
@@ -0,0 +1,160 @@
+//===- GlobalsModRef.h - Simple Mod/Ref AA for Globals ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a simple mod/ref and alias analysis over globals.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_GLOBALSMODREF_H
+#define LLVM_ANALYSIS_GLOBALSMODREF_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include <list>
+
+namespace llvm {
+
+/// An alias analysis result set for globals.
+///
+/// This focuses on handling aliasing properties of globals and interprocedural
+/// function call mod/ref information.
+class GlobalsAAResult : public AAResultBase<GlobalsAAResult> {
+  friend AAResultBase<GlobalsAAResult>;
+
+  class FunctionInfo;
+
+  const DataLayout &DL;
+
+  /// The globals that do not have their addresses taken.
+  SmallPtrSet<const GlobalValue *, 8> NonAddressTakenGlobals;
+
+  /// IndirectGlobals - The memory pointed to by this global is known to be
+  /// 'owned' by the global.
+  SmallPtrSet<const GlobalValue *, 8> IndirectGlobals;
+
+  /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+  /// indirect global, this map indicates which one.
+  DenseMap<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
+
+  /// For each function, keep track of what globals are modified or read.
+  DenseMap<const Function *, FunctionInfo> FunctionInfos;
+
+  /// A map of functions to SCC. The SCCs are described by a simple integer
+  /// ID that is only useful for comparing for equality (are two functions
+  /// in the same SCC or not?)
+  DenseMap<const Function *, unsigned> FunctionToSCCMap;
+
+  /// Handle to clear this analysis on deletion of values.
+  struct DeletionCallbackHandle final : CallbackVH {
+    GlobalsAAResult *GAR;
+    std::list<DeletionCallbackHandle>::iterator I;
+
+    DeletionCallbackHandle(GlobalsAAResult &GAR, Value *V)
+        : CallbackVH(V), GAR(&GAR) {}
+
+    void deleted() override;
+  };
+
+  /// List of callbacks for globals being tracked by this analysis. Note that
+  /// these objects are quite large, but we only anticipate having one per
+  /// global tracked by this analysis. There are numerous optimizations we
+  /// could perform to the memory utilization here if this becomes a problem.
+  std::list<DeletionCallbackHandle> Handles;
+
+  explicit GlobalsAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI);
+
+public:
+  GlobalsAAResult(GlobalsAAResult &&Arg);
+
+  static GlobalsAAResult analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+                                       CallGraph &CG);
+
+  //------------------------------------------------
+  // Implement the AliasAnalysis API
+  //
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+  using AAResultBase::getModRefInfo;
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+
+  /// getModRefBehavior - Return the behavior of the specified function if
+  /// called from the specified call site.  The call site may be null in which
+  /// case the most generic behavior of this function should be returned.
+  FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+  /// getModRefBehavior - Return the behavior of the specified function if
+  /// called from the specified call site.  The call site may be null in which
+  /// case the most generic behavior of this function should be returned.
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+private:
+  FunctionInfo *getFunctionInfo(const Function *F);
+
+  void AnalyzeGlobals(Module &M);
+  void AnalyzeCallGraph(CallGraph &CG, Module &M);
+  bool AnalyzeUsesOfPointer(Value *V,
+                            SmallPtrSetImpl<Function *> *Readers = nullptr,
+                            SmallPtrSetImpl<Function *> *Writers = nullptr,
+                            GlobalValue *OkayStoreDest = nullptr);
+  bool AnalyzeIndirectGlobalMemory(GlobalVariable *GV);
+  void CollectSCCMembership(CallGraph &CG);
+
+  bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
+  ModRefInfo getModRefInfoForArgument(ImmutableCallSite CS,
+                                      const GlobalValue *GV);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class GlobalsAA {
+public:
+  typedef GlobalsAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  GlobalsAAResult run(Module &M, AnalysisManager<Module> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "GlobalsAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the GlobalsAAResult object.
+class GlobalsAAWrapperPass : public ModulePass {
+  std::unique_ptr<GlobalsAAResult> Result;
+
+public:
+  static char ID;
+
+  GlobalsAAWrapperPass();
+
+  GlobalsAAResult &getResult() { return *Result; }
+  const GlobalsAAResult &getResult() const { return *Result; }
+
+  bool runOnModule(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createGlobalsAAWrapperPass - This pass provides alias and mod/ref info for
+// global values that do not have their addresses taken.
+//
+ModulePass *createGlobalsAAWrapperPass();
+}
+
+#endif
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 00dbcbdd7806..37d01490dac6 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -34,7 +34,7 @@ class DataLayout;
 /// The Expr member keeps track of the expression, User is the actual user
 /// instruction of the operand, and 'OperandValToReplace' is the operand of
 /// the User that is the use.
-class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+class IVStrideUse final : public CallbackVH, public ilist_node<IVStrideUse> {
   friend class IVUsers;
 public:
   IVStrideUse(IVUsers *P, Instruction* U, Value *O)
diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h
index 79ed74d82411..35f991cb3f67 100644
--- a/include/llvm/Analysis/InlineCost.h
+++ b/include/llvm/Analysis/InlineCost.h
@@ -23,7 +23,7 @@ class AssumptionCacheTracker;
 class CallSite;
 class DataLayout;
 class Function;
-class TargetTransformInfoWrapperPass;
+class TargetTransformInfo;
 
 namespace InlineConstants {
   // Various magic constants used to adjust heuristics.
@@ -98,46 +98,31 @@ public:
   int getCostDelta() const { return Threshold - getCost(); }
 };
 
-/// \brief Cost analyzer used by inliner.
-class InlineCostAnalysis : public CallGraphSCCPass {
-  TargetTransformInfoWrapperPass *TTIWP;
-  AssumptionCacheTracker *ACT;
+/// \brief Get an InlineCost object representing the cost of inlining this
+/// callsite.
+///
+/// Note that threshold is passed into this function. Only costs below the
+/// threshold are computed with any accuracy. The threshold can be used to
+/// bound the computation necessary to determine whether the cost is
+/// sufficiently low to warrant inlining.
+///
+/// Also note that calling this function *dynamically* computes the cost of
+/// inlining the callsite. It is an expensive, heavyweight call.
+InlineCost getInlineCost(CallSite CS, int Threshold,
+                         TargetTransformInfo &CalleeTTI,
+                         AssumptionCacheTracker *ACT);
 
-public:
-  static char ID;
-
-  InlineCostAnalysis();
-  ~InlineCostAnalysis() override;
-
-  // Pass interface implementation.
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool runOnSCC(CallGraphSCC &SCC) override;
-
-  /// \brief Get an InlineCost object representing the cost of inlining this
-  /// callsite.
-  ///
-  /// Note that threshold is passed into this function. Only costs below the
-  /// threshold are computed with any accuracy. The threshold can be used to
-  /// bound the computation necessary to determine whether the cost is
-  /// sufficiently low to warrant inlining.
-  ///
-  /// Also note that calling this function *dynamically* computes the cost of
-  /// inlining the callsite. It is an expensive, heavyweight call.
-  InlineCost getInlineCost(CallSite CS, int Threshold);
-
-  /// \brief Get an InlineCost with the callee explicitly specified.
-  /// This allows you to calculate the cost of inlining a function via a
-  /// pointer. This behaves exactly as the version with no explicit callee
-  /// parameter in all other respects.
-  //
-  //  Note: This is used by out-of-tree passes, please do not remove without
-  //  adding a replacement API.
-  InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold);
-
-  /// \brief Minimal filter to detect invalid constructs for inlining.
-  bool isInlineViable(Function &Callee);
-};
+/// \brief Get an InlineCost with the callee explicitly specified.
+/// This allows you to calculate the cost of inlining a function via a
+/// pointer. This behaves exactly as the version with no explicit callee
+/// parameter in all other respects.
+//
+InlineCost getInlineCost(CallSite CS, Function *Callee, int Threshold,
+                         TargetTransformInfo &CalleeTTI,
+                         AssumptionCacheTracker *ACT);
 
+/// \brief Minimal filter to detect invalid constructs for inlining.
+bool isInlineViable(Function &Callee);
 }
 
 #endif
diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h
index d44c5ff4078d..ed313dae9ab1 100644
--- a/include/llvm/Analysis/InstructionSimplify.h
+++ b/include/llvm/Analysis/InstructionSimplify.h
@@ -207,7 +207,7 @@ namespace llvm {
                           const TargetLibraryInfo *TLI = nullptr,
                           const DominatorTree *DT = nullptr,
                           AssumptionCache *AC = nullptr,
-                          Instruction *CxtI = nullptr);
+                          const Instruction *CxtI = nullptr);
 
   /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
   /// fold the result.  If not, this returns null.
diff --git a/include/llvm/Analysis/IteratedDominanceFrontier.h b/include/llvm/Analysis/IteratedDominanceFrontier.h
index 5a339f10f50f..a1ded2554d44 100644
--- a/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -34,7 +34,7 @@ namespace llvm {
 class BasicBlock;
 template <class T> class DomTreeNodeBase;
 typedef DomTreeNodeBase<BasicBlock> DomTreeNode;
-class DominatorTree;
+template <class T> class DominatorTreeBase;
 
 /// \brief Determine the iterated dominance frontier, given a set of defining
 /// blocks, and optionally, a set of live-in blocks.
@@ -47,7 +47,7 @@ class DominatorTree;
 class IDFCalculator {
 
 public:
-  IDFCalculator(DominatorTree &DT) : DT(DT), useLiveIn(false) {}
+  IDFCalculator(DominatorTreeBase<BasicBlock> &DT) : DT(DT), useLiveIn(false) {}
 
   /// \brief Give the IDF calculator the set of blocks in which the value is
   /// defined.  This is equivalent to the set of starting blocks it should be
@@ -85,7 +85,7 @@ public:
   void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);
 
 private:
-  DominatorTree &DT;
+  DominatorTreeBase<BasicBlock> &DT;
   bool useLiveIn;
   DenseMap<DomTreeNode *, unsigned> DomLevels;
   const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index b0b9068de34b..ef3d5e8fe3df 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -54,7 +54,7 @@ namespace llvm {
 class PreservedAnalyses;
 class raw_ostream;
 
-/// \brief A lazily constructed view of the call graph of a module.
+/// A lazily constructed view of the call graph of a module.
 ///
 /// With the edges of this graph, the motivating constraint that we are
 /// attempting to maintain is that function-local optimization, CGSCC-local
@@ -107,7 +107,7 @@ public:
   typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT;
   typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT;
 
-  /// \brief A lazy iterator used for both the entry nodes and child nodes.
+  /// A lazy iterator used for both the entry nodes and child nodes.
   ///
   /// When this iterator is dereferenced, if not yet available, a function will
   /// be scanned for "calls" or uses of functions and its child information
@@ -152,7 +152,7 @@ public:
     }
   };
 
-  /// \brief A node in the call graph.
+  /// A node in the call graph.
   ///
   /// This represents a single node. It's primary roles are to cache the list of
   /// callees, de-duplicate and provide fast testing of whether a function is
@@ -172,25 +172,23 @@ public:
     mutable NodeVectorT Callees;
     DenseMap<Function *, size_t> CalleeIndexMap;
 
-    /// \brief Basic constructor implements the scanning of F into Callees and
+    /// Basic constructor implements the scanning of F into Callees and
     /// CalleeIndexMap.
     Node(LazyCallGraph &G, Function &F);
 
-    /// \brief Internal helper to insert a callee.
+    /// Internal helper to insert a callee.
     void insertEdgeInternal(Function &Callee);
 
-    /// \brief Internal helper to insert a callee.
+    /// Internal helper to insert a callee.
     void insertEdgeInternal(Node &CalleeN);
 
-    /// \brief Internal helper to remove a callee from this node.
+    /// Internal helper to remove a callee from this node.
     void removeEdgeInternal(Function &Callee);
 
   public:
     typedef LazyCallGraph::iterator iterator;
 
-    Function &getFunction() const {
-      return F;
-    };
+    Function &getFunction() const { return F; }
 
     iterator begin() const {
       return iterator(*G, Callees.begin(), Callees.end());
@@ -202,7 +200,7 @@ public:
     bool operator!=(const Node &N) const { return !operator==(N); }
   };
 
-  /// \brief An SCC of the call graph.
+  /// An SCC of the call graph.
   ///
   /// This represents a Strongly Connected Component of the call graph as
   /// a collection of call graph nodes. While the order of nodes in the SCC is
@@ -226,7 +224,8 @@ public:
 
   public:
     typedef SmallVectorImpl<Node *>::const_iterator iterator;
-    typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator> parent_iterator;
+    typedef pointee_iterator<SmallPtrSet<SCC *, 1>::const_iterator>
+        parent_iterator;
 
     iterator begin() const { return Nodes.begin(); }
     iterator end() const { return Nodes.end(); }
@@ -235,24 +234,24 @@ public:
     parent_iterator parent_end() const { return ParentSCCs.end(); }
 
     iterator_range<parent_iterator> parents() const {
-      return iterator_range<parent_iterator>(parent_begin(), parent_end());
+      return make_range(parent_begin(), parent_end());
     }
 
-    /// \brief Test if this SCC is a parent of \a C.
+    /// Test if this SCC is a parent of \a C.
     bool isParentOf(const SCC &C) const { return C.isChildOf(*this); }
 
-    /// \brief Test if this SCC is an ancestor of \a C.
+    /// Test if this SCC is an ancestor of \a C.
     bool isAncestorOf(const SCC &C) const { return C.isDescendantOf(*this); }
 
-    /// \brief Test if this SCC is a child of \a C.
+    /// Test if this SCC is a child of \a C.
     bool isChildOf(const SCC &C) const {
       return ParentSCCs.count(const_cast<SCC *>(&C));
     }
 
-    /// \brief Test if this SCC is a descendant of \a C.
+    /// Test if this SCC is a descendant of \a C.
     bool isDescendantOf(const SCC &C) const;
 
-    /// \brief Short name useful for debugging or logging.
+    /// Short name useful for debugging or logging.
     ///
     /// We use the name of the first function in the SCC to name the SCC for
     /// the purposes of debugging and logging.
@@ -267,22 +266,21 @@ public:
     /// Note that these methods sometimes have complex runtimes, so be careful
     /// how you call them.
 
-    /// \brief Insert an edge from one node in this SCC to another in this SCC.
+    /// Insert an edge from one node in this SCC to another in this SCC.
     ///
     /// By the definition of an SCC, this does not change the nature or make-up
     /// of any SCCs.
     void insertIntraSCCEdge(Node &CallerN, Node &CalleeN);
 
-    /// \brief Insert an edge whose tail is in this SCC and head is in some
-    /// child SCC.
+    /// Insert an edge whose tail is in this SCC and head is in some child SCC.
     ///
     /// There must be an existing path from the caller to the callee. This
     /// operation is inexpensive and does not change the set of SCCs in the
     /// graph.
     void insertOutgoingEdge(Node &CallerN, Node &CalleeN);
 
-    /// \brief Insert an edge whose tail is in a descendant SCC and head is in
-    /// this SCC.
+    /// Insert an edge whose tail is in a descendant SCC and head is in this
+    /// SCC.
     ///
     /// There must be an existing path from the callee to the caller in this
     /// case. NB! This is has the potential to be a very expensive function. It
@@ -297,7 +295,7 @@ public:
     /// implementation for details, but that use case might impact users.
     SmallVector<SCC *, 1> insertIncomingEdge(Node &CallerN, Node &CalleeN);
 
-    /// \brief Remove an edge whose source is in this SCC and target is *not*.
+    /// Remove an edge whose source is in this SCC and target is *not*.
     ///
     /// This removes an inter-SCC edge. All inter-SCC edges originating from
     /// this SCC have been fully explored by any in-flight DFS SCC formation,
@@ -309,7 +307,7 @@ public:
     /// them.
     void removeInterSCCEdge(Node &CallerN, Node &CalleeN);
 
-    /// \brief Remove an edge which is entirely within this SCC.
+    /// Remove an edge which is entirely within this SCC.
     ///
     /// Both the \a Caller and the \a Callee must be within this SCC. Removing
     /// such an edge make break cycles that form this SCC and thus this
@@ -346,7 +344,7 @@ public:
     ///@}
   };
 
-  /// \brief A post-order depth-first SCC iterator over the call graph.
+  /// A post-order depth-first SCC iterator over the call graph.
   ///
   /// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for
   /// the call graph, walking it lazily in depth-first post-order. That is, it
@@ -358,7 +356,7 @@ public:
     friend class LazyCallGraph;
     friend class LazyCallGraph::Node;
 
-    /// \brief Nonce type to select the constructor for the end iterator.
+    /// Nonce type to select the constructor for the end iterator.
     struct IsAtEndT {};
 
     LazyCallGraph *G;
@@ -387,7 +385,7 @@ public:
     }
   };
 
-  /// \brief Construct a graph for the given module.
+  /// Construct a graph for the given module.
   ///
   /// This sets up the graph and computes all of the entry points of the graph.
   /// No function definitions are scanned until their nodes in the graph are
@@ -410,22 +408,20 @@ public:
   }
 
   iterator_range<postorder_scc_iterator> postorder_sccs() {
-    return iterator_range<postorder_scc_iterator>(postorder_scc_begin(),
-                                                  postorder_scc_end());
+    return make_range(postorder_scc_begin(), postorder_scc_end());
   }
 
-  /// \brief Lookup a function in the graph which has already been scanned and
-  /// added.
+  /// Lookup a function in the graph which has already been scanned and added.
   Node *lookup(const Function &F) const { return NodeMap.lookup(&F); }
 
-  /// \brief Lookup a function's SCC in the graph.
+  /// Lookup a function's SCC in the graph.
   ///
   /// \returns null if the function hasn't been assigned an SCC via the SCC
   /// iterator walk.
   SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }
 
-  /// \brief Get a graph node for a given function, scanning it to populate the
-  /// graph data as necessary.
+  /// Get a graph node for a given function, scanning it to populate the graph
+  /// data as necessary.
   Node &get(Function &F) {
     Node *&N = NodeMap[&F];
     if (N)
@@ -444,18 +440,18 @@ public:
   /// Once you begin manipulating a call graph's SCCs, you must perform all
   /// mutation of the graph via the SCC methods.
 
-  /// \brief Update the call graph after inserting a new edge.
+  /// Update the call graph after inserting a new edge.
   void insertEdge(Node &Caller, Function &Callee);
 
-  /// \brief Update the call graph after inserting a new edge.
+  /// Update the call graph after inserting a new edge.
   void insertEdge(Function &Caller, Function &Callee) {
     return insertEdge(get(Caller), Callee);
   }
 
-  /// \brief Update the call graph after deleting an edge.
+  /// Update the call graph after deleting an edge.
   void removeEdge(Node &Caller, Function &Callee);
 
-  /// \brief Update the call graph after deleting an edge.
+  /// Update the call graph after deleting an edge.
   void removeEdge(Function &Caller, Function &Callee) {
     return removeEdge(get(Caller), Callee);
   }
@@ -463,57 +459,56 @@ public:
   ///@}
 
 private:
-  /// \brief Allocator that holds all the call graph nodes.
+  /// Allocator that holds all the call graph nodes.
   SpecificBumpPtrAllocator<Node> BPA;
 
-  /// \brief Maps function->node for fast lookup.
+  /// Maps function->node for fast lookup.
   DenseMap<const Function *, Node *> NodeMap;
 
-  /// \brief The entry nodes to the graph.
+  /// The entry nodes to the graph.
   ///
   /// These nodes are reachable through "external" means. Put another way, they
   /// escape at the module scope.
   NodeVectorT EntryNodes;
 
-  /// \brief Map of the entry nodes in the graph to their indices in
-  /// \c EntryNodes.
+  /// Map of the entry nodes in the graph to their indices in \c EntryNodes.
   DenseMap<Function *, size_t> EntryIndexMap;
 
-  /// \brief Allocator that holds all the call graph SCCs.
+  /// Allocator that holds all the call graph SCCs.
   SpecificBumpPtrAllocator<SCC> SCCBPA;
 
-  /// \brief Maps Function -> SCC for fast lookup.
+  /// Maps Function -> SCC for fast lookup.
   DenseMap<Node *, SCC *> SCCMap;
 
-  /// \brief The leaf SCCs of the graph.
+  /// The leaf SCCs of the graph.
   ///
   /// These are all of the SCCs which have no children.
   SmallVector<SCC *, 4> LeafSCCs;
 
-  /// \brief Stack of nodes in the DFS walk.
+  /// Stack of nodes in the DFS walk.
   SmallVector<std::pair<Node *, iterator>, 4> DFSStack;
 
-  /// \brief Set of entry nodes not-yet-processed into SCCs.
+  /// Set of entry nodes not-yet-processed into SCCs.
   SmallVector<Function *, 4> SCCEntryNodes;
 
-  /// \brief Stack of nodes the DFS has walked but not yet put into a SCC.
+  /// Stack of nodes the DFS has walked but not yet put into a SCC.
   SmallVector<Node *, 4> PendingSCCStack;
 
-  /// \brief Counter for the next DFS number to assign.
+  /// Counter for the next DFS number to assign.
   int NextDFSNumber;
 
-  /// \brief Helper to insert a new function, with an already looked-up entry in
+  /// Helper to insert a new function, with an already looked-up entry in
   /// the NodeMap.
   Node &insertInto(Function &F, Node *&MappedN);
 
-  /// \brief Helper to update pointers back to the graph object during moves.
+  /// Helper to update pointers back to the graph object during moves.
   void updateGraphPtrs();
 
-  /// \brief Helper to form a new SCC out of the top of a DFSStack-like
+  /// Helper to form a new SCC out of the top of a DFSStack-like
   /// structure.
   SCC *formSCC(Node *RootN, SmallVectorImpl<Node *> &NodeStack);
 
-  /// \brief Retrieve the next node in the post-order SCC walk of the call graph.
+  /// Retrieve the next node in the post-order SCC walk of the call graph.
   SCC *getNextSCCInPostOrder();
 };
 
@@ -535,17 +530,17 @@ template <> struct GraphTraits<LazyCallGraph *> {
   static ChildIteratorType child_end(NodeType *N) { return N->end(); }
 };
 
-/// \brief An analysis pass which computes the call graph for a module.
+/// An analysis pass which computes the call graph for a module.
 class LazyCallGraphAnalysis {
 public:
-  /// \brief Inform generic clients of the result type.
+  /// Inform generic clients of the result type.
   typedef LazyCallGraph Result;
 
   static void *ID() { return (void *)&PassID; }
 
   static StringRef name() { return "Lazy CallGraph Analysis"; }
 
-  /// \brief Compute the \c LazyCallGraph for the module \c M.
+  /// Compute the \c LazyCallGraph for the module \c M.
   ///
   /// This just builds the set of entry points to the call graph. The rest is
   /// built lazily as it is walked.
@@ -555,7 +550,7 @@ private:
   static char PassID;
 };
 
-/// \brief A pass which prints the call graph to a \c raw_ostream.
+/// A pass which prints the call graph to a \c raw_ostream.
 ///
 /// This is primarily useful for testing the analysis.
 class LazyCallGraphPrinterPass {
diff --git a/include/llvm/Analysis/LazyValueInfo.h b/include/llvm/Analysis/LazyValueInfo.h
index 1051cff5efb7..42002062dca2 100644
--- a/include/llvm/Analysis/LazyValueInfo.h
+++ b/include/llvm/Analysis/LazyValueInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
   class Instruction;
   class TargetLibraryInfo;
   class Value;
-  
+
 /// This pass computes, caches, and vends lazy value constraint information.
 class LazyValueInfo : public FunctionPass {
   AssumptionCache *AC;
@@ -45,23 +45,22 @@ public:
   enum Tristate {
     Unknown = -1, False = 0, True = 1
   };
-  
-  
+
   // Public query interface.
-  
+
   /// Determine whether the specified value comparison with a constant is known
   /// to be true or false on the specified CFG edge.
   /// Pred is a CmpInst predicate.
   Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
                               BasicBlock *FromBB, BasicBlock *ToBB,
                               Instruction *CxtI = nullptr);
-  
+
   /// Determine whether the specified value comparison with a constant is known
   /// to be true or false at the specified instruction
   /// (from an assume intrinsic). Pred is a CmpInst predicate.
   Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
                           Instruction *CxtI);
- 
+
   /// Determine whether the specified value is known to be a
   /// constant at the end of the specified block.  Return null if not.
   Constant *getConstant(Value *V, BasicBlock *BB, Instruction *CxtI = nullptr);
@@ -70,14 +69,14 @@ public:
   /// constant on the specified edge.  Return null if not.
   Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
                               Instruction *CxtI = nullptr);
-  
+
   /// Inform the analysis cache that we have threaded an edge from
   /// PredBB to OldSucc to be from PredBB to NewSucc instead.
   void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc);
-  
+
   /// Inform the analysis cache that we have erased a block.
   void eraseBlock(BasicBlock *BB);
-  
+
   // Implementation boilerplate.
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h
deleted file mode 100644
index 6589ac13c746..000000000000
--- a/include/llvm/Analysis/LibCallAliasAnalysis.h
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- LibCallAliasAnalysis.h - Implement AliasAnalysis for libcalls ------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the LibCallAliasAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
-#define LLVM_ANALYSIS_LIBCALLALIASANALYSIS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
-  class LibCallInfo;
-  struct LibCallFunctionInfo;
-  
-  /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo.
-  struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis {
-    static char ID; // Class identification
-    
-    LibCallInfo *LCI;
-    
-    explicit LibCallAliasAnalysis(LibCallInfo *LC = nullptr)
-        : FunctionPass(ID), LCI(LC) {
-      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-    explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC)
-        : FunctionPass(ID), LCI(LC) {
-      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-    ~LibCallAliasAnalysis() override;
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override {
-      // TODO: Could compare two direct calls against each other if we cared to.
-      return AliasAnalysis::getModRefInfo(CS1, CS2);
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-    bool runOnFunction(Function &F) override;
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(const void *PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-    
-  private:
-    ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
-                                       ImmutableCallSite CS,
-                                       const MemoryLocation &Loc);
-  };
-}  // End of llvm namespace
-
-#endif
diff --git a/include/llvm/Analysis/LibCallSemantics.h b/include/llvm/Analysis/LibCallSemantics.h
deleted file mode 100644
index b4bef310e590..000000000000
--- a/include/llvm/Analysis/LibCallSemantics.h
+++ /dev/null
@@ -1,225 +0,0 @@
-//===- LibCallSemantics.h - Describe library semantics --------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines interfaces that can be used to describe language specific
-// runtime library interfaces (e.g. libc, libm, etc) to LLVM optimizers.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_LIBCALLSEMANTICS_H
-#define LLVM_ANALYSIS_LIBCALLSEMANTICS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-
-namespace llvm {
-class InvokeInst;
-
-  /// LibCallLocationInfo - This struct describes a set of memory locations that
-  /// are accessed by libcalls.  Identification of a location is doing with a
-  /// simple callback function.
-  ///
-  /// For example, the LibCallInfo may be set up to model the behavior of
-  /// standard libm functions.  The location that they may be interested in is
-  /// an abstract location that represents errno for the current target.  In
-  /// this case, a location for errno is anything such that the predicate
-  /// returns true.  On Mac OS X, this predicate would return true if the
-  /// pointer is the result of a call to "__error()".
-  ///
-  /// Locations can also be defined in a constant-sensitive way.  For example,
-  /// it is possible to define a location that returns true iff it is passed
-  /// into the call as a specific argument.  This is useful for modeling things
-  /// like "printf", which can store to memory, but only through pointers passed
-  /// with a '%n' constraint.
-  ///
-  struct LibCallLocationInfo {
-    // TODO: Flags: isContextSensitive etc.
-    
-    /// isLocation - Return a LocResult if the specified pointer refers to this
-    /// location for the specified call site.  This returns "Yes" if we can tell
-    /// that the pointer *does definitely* refer to the location, "No" if we can
-    /// tell that the location *definitely does not* refer to the location, and
-    /// returns "Unknown" if we cannot tell for certain.
-    enum LocResult {
-      Yes, No, Unknown
-    };
-    LocResult (*isLocation)(ImmutableCallSite CS, const MemoryLocation &Loc);
-  };
-  
-  /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
-  /// records the behavior of one libcall that is known by the optimizer.  This
-  /// captures things like the side effects of the call.  Side effects are
-  /// modeled both universally (in the readnone/readonly) sense, but also
-  /// potentially against a set of abstract locations defined by the optimizer.
-  /// This allows an optimizer to define that some libcall (e.g. sqrt) is
-  /// side-effect free except that it might modify errno (thus, the call is
-  /// *not* universally readonly).  Or it might say that the side effects
-  /// are unknown other than to say that errno is not modified.
-  ///
-  struct LibCallFunctionInfo {
-    /// Name - This is the name of the libcall this describes.
-    const char *Name;
-    
-    /// TODO: Constant folding function: Constant* vector -> Constant*.
-    
-    /// UniversalBehavior - This captures the absolute mod/ref behavior without
-    /// any specific context knowledge.  For example, if the function is known
-    /// to be readonly, this would be set to 'ref'.  If known to be readnone,
-    /// this is set to NoModRef.
-    AliasAnalysis::ModRefResult UniversalBehavior;
-    
-    /// LocationMRInfo - This pair captures info about whether a specific
-    /// location is modified or referenced by a libcall.
-    struct LocationMRInfo {
-      /// LocationID - ID # of the accessed location or ~0U for array end.
-      unsigned LocationID;
-      /// MRInfo - Mod/Ref info for this location.
-      AliasAnalysis::ModRefResult MRInfo;
-    };
-    
-    /// DetailsType - Indicate the sense of the LocationDetails array.  This
-    /// controls how the LocationDetails array is interpreted.
-    enum {
-      /// DoesOnly - If DetailsType is set to DoesOnly, then we know that the
-      /// *only* mod/ref behavior of this function is captured by the
-      /// LocationDetails array.  If we are trying to say that 'sqrt' can only
-      /// modify errno, we'd have the {errnoloc,mod} in the LocationDetails
-      /// array and have DetailsType set to DoesOnly.
-      DoesOnly,
-      
-      /// DoesNot - If DetailsType is set to DoesNot, then the sense of the
-      /// LocationDetails array is completely inverted.  This means that we *do
-      /// not* know everything about the side effects of this libcall, but we do
-      /// know things that the libcall cannot do.  This is useful for complex
-      /// functions like 'ctime' which have crazy mod/ref behavior, but are
-      /// known to never read or write errno.  In this case, we'd have
-      /// {errnoloc,modref} in the LocationDetails array and DetailsType would
-      /// be set to DoesNot, indicating that ctime does not read or write the
-      /// errno location.
-      DoesNot
-    } DetailsType;
-    
-    /// LocationDetails - This is a pointer to an array of LocationMRInfo
-    /// structs which indicates the behavior of the libcall w.r.t. specific
-    /// locations.  For example, if this libcall is known to only modify
-    /// 'errno', it would have a LocationDetails array with the errno ID and
-    /// 'mod' in it.  See the DetailsType field for how this is interpreted.
-    ///
-    /// In the "DoesOnly" case, this information is 'may' information for: there
-    /// is no guarantee that the specified side effect actually does happen,
-    /// just that it could.  In the "DoesNot" case, this is 'must not' info.
-    ///
-    /// If this pointer is null, no details are known.
-    ///
-    const LocationMRInfo *LocationDetails;
-  };
-  
-  
-  /// LibCallInfo - Abstract interface to query about library call information.
-  /// Instances of this class return known information about some set of
-  /// libcalls.
-  /// 
-  class LibCallInfo {
-    // Implementation details of this object, private.
-    mutable void *Impl;
-    mutable const LibCallLocationInfo *Locations;
-    mutable unsigned NumLocations;
-  public:
-    LibCallInfo() : Impl(nullptr), Locations(nullptr), NumLocations(0) {}
-    virtual ~LibCallInfo();
-    
-    //===------------------------------------------------------------------===//
-    //  Accessor Methods: Efficient access to contained data.
-    //===------------------------------------------------------------------===//
-    
-    /// getLocationInfo - Return information about the specified LocationID.
-    const LibCallLocationInfo &getLocationInfo(unsigned LocID) const;
-    
-    
-    /// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
-    /// the specified function if we have it.  If not, return null.
-    const LibCallFunctionInfo *getFunctionInfo(const Function *F) const;
-    
-    
-    //===------------------------------------------------------------------===//
-    //  Implementation Methods: Subclasses should implement these.
-    //===------------------------------------------------------------------===//
-    
-    /// getLocationInfo - Return descriptors for the locations referenced by
-    /// this set of libcalls.
-    virtual unsigned getLocationInfo(const LibCallLocationInfo *&Array) const {
-      return 0;
-    }
-    
-    /// getFunctionInfoArray - Return an array of descriptors that describe the
-    /// set of libcalls represented by this LibCallInfo object.  This array is
-    /// terminated by an entry with a NULL name.
-    virtual const LibCallFunctionInfo *getFunctionInfoArray() const = 0;
-  };
-
-  enum class EHPersonality {
-    Unknown,
-    GNU_Ada,
-    GNU_C,
-    GNU_CXX,
-    GNU_ObjC,
-    MSVC_X86SEH,
-    MSVC_Win64SEH,
-    MSVC_CXX,
-  };
-
-  /// \brief See if the given exception handling personality function is one
-  /// that we understand.  If so, return a description of it; otherwise return
-  /// Unknown.
-  EHPersonality classifyEHPersonality(const Value *Pers);
-
-  /// \brief Returns true if this personality function catches asynchronous
-  /// exceptions.
-  inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
-    // The two SEH personality functions can catch asynch exceptions. We assume
-    // unknown personalities don't catch asynch exceptions.
-    switch (Pers) {
-    case EHPersonality::MSVC_X86SEH:
-    case EHPersonality::MSVC_Win64SEH:
-      return true;
-    default: return false;
-    }
-    llvm_unreachable("invalid enum");
-  }
-
-  /// \brief Returns true if this is an MSVC personality function.
-  inline bool isMSVCEHPersonality(EHPersonality Pers) {
-    // The two SEH personality functions can catch asynch exceptions. We assume
-    // unknown personalities don't catch asynch exceptions.
-    switch (Pers) {
-    case EHPersonality::MSVC_CXX:
-    case EHPersonality::MSVC_X86SEH:
-    case EHPersonality::MSVC_Win64SEH:
-      return true;
-    default: return false;
-    }
-    llvm_unreachable("invalid enum");
-  }
-
-  /// \brief Return true if this personality may be safely removed if there
-  /// are no invoke instructions remaining in the current function.
-  inline bool isNoOpWithoutInvoke(EHPersonality Pers) {
-    switch (Pers) {
-    case EHPersonality::Unknown:
-      return false;
-    // All known personalities currently have this behavior
-    default: return true;
-    }
-    llvm_unreachable("invalid enum");
-  }
-
-  bool canSimplifyInvokeNoUnwind(const Function *F);
-
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h
index 42667d2af14a..939663b0def1 100644
--- a/include/llvm/Analysis/Loads.h
+++ b/include/llvm/Analysis/Loads.h
@@ -14,11 +14,12 @@
 #ifndef LLVM_ANALYSIS_LOADS_H
 #define LLVM_ANALYSIS_LOADS_H
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/Support/CommandLine.h"
 
 namespace llvm {
 
-class AliasAnalysis;
 class DataLayout;
 class MDNode;
 
@@ -29,15 +30,19 @@ class MDNode;
 bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
                                  unsigned Align);
 
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+extern cl::opt<unsigned> DefMaxInstsToScan;
+
 /// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at
 /// the instruction before ScanFrom) checking to see if we have the value at
 /// the memory address *Ptr locally available within a small number of
 ///  instructions. If the value is available, return it.
 ///
-/// If not, return the iterator for the last validated instruction that the 
+/// If not, return the iterator for the last validated instruction that the
 /// value would be live through.  If we scanned the entire block and didn't
 /// find something that invalidates *Ptr or provides it, ScanFrom would be
-/// left at begin() and this returns null.  ScanFrom could also be left 
+/// left at begin() and this returns null.  ScanFrom could also be left
 ///
 /// MaxInstsToScan specifies the maximum instructions to scan in the block.
 /// If it is set to 0, it will scan the whole block. You can also optionally
@@ -48,7 +53,7 @@ bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
 /// is found, it is left unmodified.
 Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
                                 BasicBlock::iterator &ScanFrom,
-                                unsigned MaxInstsToScan = 6,
+                                unsigned MaxInstsToScan = DefMaxInstsToScan,
                                 AliasAnalysis *AA = nullptr,
                                 AAMDNodes *AATags = nullptr);
 
diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h
index 476e4b6686bb..871d35e99b74 100644
--- a/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -29,10 +29,11 @@ namespace llvm {
 
 class Value;
 class DataLayout;
-class AliasAnalysis;
 class ScalarEvolution;
 class Loop;
 class SCEV;
+class SCEVUnionPredicate;
+class LoopAccessInfo;
 
 /// Optimization analysis message produced during vectorization. Messages inform
 /// the user why vectorization did not occur.
@@ -136,6 +137,14 @@ public:
       // We couldn't determine the direction or the distance.
       Unknown,
       // Lexically forward.
+      //
+      // FIXME: If we only have loop-independent forward dependences (e.g. a
+      // read and write of A[i]), LAA will locally deem the dependence "safe"
+      // without querying the MemoryDepChecker.  Therefore we can miss
+      // enumerating loop-independent forward dependences in
+      // getDependences.  Note that as soon as there are different
+      // indices used to access the same array, the MemoryDepChecker *is*
+      // queried and the dependence list is complete.
       Forward,
       // Forward, but if vectorized, is likely to prevent store-to-load
       // forwarding.
@@ -162,13 +171,20 @@ public:
     Dependence(unsigned Source, unsigned Destination, DepType Type)
         : Source(Source), Destination(Destination), Type(Type) {}
 
+    /// \brief Return the source instruction of the dependence.
+    Instruction *getSource(const LoopAccessInfo &LAI) const;
+    /// \brief Return the destination instruction of the dependence.
+    Instruction *getDestination(const LoopAccessInfo &LAI) const;
+
     /// \brief Dependence types that don't prevent vectorization.
     static bool isSafeForVectorization(DepType Type);
 
-    /// \brief Dependence types that can be queried from the analysis.
-    static bool isInterestingDependence(DepType Type);
+    /// \brief Lexically forward dependence.
+    bool isForward() const;
+    /// \brief Lexically backward dependence.
+    bool isBackward() const;
 
-    /// \brief Lexically backward dependence types.
+    /// \brief May be a lexically backward dependence type (includes Unknown).
     bool isPossiblyBackward() const;
 
     /// \brief Print the dependence.  \p Instr is used to map the instruction
@@ -177,10 +193,10 @@ public:
                const SmallVectorImpl<Instruction *> &Instrs) const;
   };
 
-  MemoryDepChecker(ScalarEvolution *Se, const Loop *L)
-      : SE(Se), InnermostLoop(L), AccessIdx(0),
+  MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
+      : PSE(PSE), InnermostLoop(L), AccessIdx(0),
         ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true),
-        RecordInterestingDependences(true) {}
+        RecordDependences(true) {}
 
   /// \brief Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -218,14 +234,14 @@ public:
   /// vectorize the loop with a dynamic array access check.
   bool shouldRetryWithRuntimeCheck() { return ShouldRetryWithRuntimeCheck; }
 
-  /// \brief Returns the interesting dependences.  If null is returned we
-  /// exceeded the MaxInterestingDependence threshold and this information is
-  /// not available.
-  const SmallVectorImpl<Dependence> *getInterestingDependences() const {
-    return RecordInterestingDependences ? &InterestingDependences : nullptr;
+  /// \brief Returns the memory dependences.  If null is returned we exceeded
+  /// the MaxDependences threshold and this information is not
+  /// available.
+  const SmallVectorImpl<Dependence> *getDependences() const {
+    return RecordDependences ? &Dependences : nullptr;
   }
 
-  void clearInterestingDependences() { InterestingDependences.clear(); }
+  void clearDependences() { Dependences.clear(); }
 
   /// \brief The vector of memory access instructions.  The indices are used as
   /// instruction identifiers in the Dependence class.
@@ -233,12 +249,29 @@ public:
     return InstMap;
   }
 
+  /// \brief Generate a mapping between the memory instructions and their
+  /// indices according to program order.
+  DenseMap<Instruction *, unsigned> generateInstructionOrderMap() const {
+    DenseMap<Instruction *, unsigned> OrderMap;
+
+    for (unsigned I = 0; I < InstMap.size(); ++I)
+      OrderMap[InstMap[I]] = I;
+
+    return OrderMap;
+  }
+
   /// \brief Find the set of instructions that read or write via \p Ptr.
   SmallVector<Instruction *, 4> getInstructionsForAccess(Value *Ptr,
                                                          bool isWrite) const;
 
 private:
-  ScalarEvolution *SE;
+  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
+  /// applies dynamic knowledge to simplify SCEV expressions and convert them
+  /// to a more usable form. We need this in case assumptions about SCEV
+  /// expressions need to be made in order to avoid unknown dependences. For
+  /// example we might assume a unit stride for a pointer in order to prove
+  /// that a memory access is strided and doesn't wrap.
+  PredicatedScalarEvolution &PSE;
   const Loop *InnermostLoop;
 
   /// \brief Maps access locations (ptr, read/write) to program order.
@@ -261,15 +294,14 @@ private:
   /// vectorization.
   bool SafeForVectorization;
 
-  //// \brief True if InterestingDependences reflects the dependences in the
-  //// loop.  If false we exceeded MaxInterestingDependence and
-  //// InterestingDependences is invalid.
-  bool RecordInterestingDependences;
+  //// \brief True if Dependences reflects the dependences in the
+  //// loop.  If false we exceeded MaxDependences and
+  //// Dependences is invalid.
+  bool RecordDependences;
 
-  /// \brief Interesting memory dependences collected during the analysis as
-  /// defined by isInterestingDependence.  Only valid if
-  /// RecordInterestingDependences is true.
-  SmallVector<Dependence, 8> InterestingDependences;
+  /// \brief Memory dependences collected during the analysis.  Only valid if
+  /// RecordDependences is true.
+  SmallVector<Dependence, 8> Dependences;
 
   /// \brief Check whether there is a plausible dependence between the two
   /// accesses.
@@ -327,11 +359,17 @@ public:
   void reset() {
     Need = false;
     Pointers.clear();
+    Checks.clear();
   }
 
   /// Insert a pointer and calculate the start and end SCEVs.
+  /// \p We need Preds in order to compute the SCEV expression of the pointer
+  /// according to the assumptions that we've made during the analysis.
+  /// The method might also version the pointer stride according to \p Strides,
+  /// and change \p Preds.
   void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
-              unsigned ASId, const ValueToValueMap &Strides);
+              unsigned ASId, const ValueToValueMap &Strides,
+              PredicatedScalarEvolution &PSE);
 
   /// \brief No run-time memory checking is necessary.
   bool empty() const { return Pointers.empty(); }
@@ -368,33 +406,38 @@ public:
     SmallVector<unsigned, 2> Members;
   };
 
-  /// \brief Groups pointers such that a single memcheck is required
-  /// between two different groups. This will clear the CheckingGroups vector
-  /// and re-compute it. We will only group dependecies if \p UseDependencies
-  /// is true, otherwise we will create a separate group for each pointer.
-  void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
-                   bool UseDependencies);
+  /// \brief A memcheck which made up of a pair of grouped pointers.
+  ///
+  /// These *have* to be const for now, since checks are generated from
+  /// CheckingPtrGroups in LAI::addRuntimeChecks which is a const member
+  /// function.  FIXME: once check-generation is moved inside this class (after
+  /// the PtrPartition hack is removed), we could drop const.
+  typedef std::pair<const CheckingPtrGroup *, const CheckingPtrGroup *>
+      PointerCheck;
+
+  /// \brief Generate the checks and store it.  This also performs the grouping
+  /// of pointers to reduce the number of memchecks necessary.
+  void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
+                      bool UseDependencies);
+
+  /// \brief Returns the checks that generateChecks created.
+  const SmallVector<PointerCheck, 4> &getChecks() const { return Checks; }
 
   /// \brief Decide if we need to add a check between two groups of pointers,
   /// according to needsChecking.
-  bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N,
-                     const SmallVectorImpl<int> *PtrPartition) const;
-
-  /// \brief Return true if any pointer requires run-time checking according
-  /// to needsChecking.
-  bool needsAnyChecking(const SmallVectorImpl<int> *PtrPartition) const;
+  bool needsChecking(const CheckingPtrGroup &M,
+                     const CheckingPtrGroup &N) const;
 
   /// \brief Returns the number of run-time checks required according to
   /// needsChecking.
-  unsigned getNumberOfChecks(const SmallVectorImpl<int> *PtrPartition) const;
+  unsigned getNumberOfChecks() const { return Checks.size(); }
 
   /// \brief Print the list run-time memory checks necessary.
-  ///
-  /// If \p PtrPartition is set, it contains the partition number for
-  /// pointers (-1 if the pointer belongs to multiple partitions).  In this
-  /// case omit checks between pointers belonging to the same partition.
-  void print(raw_ostream &OS, unsigned Depth = 0,
-             const SmallVectorImpl<int> *PtrPartition = nullptr) const;
+  void print(raw_ostream &OS, unsigned Depth = 0) const;
+
+  /// Print \p Checks.
+  void printChecks(raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+                   unsigned Depth = 0) const;
 
   /// This flag indicates if we need to add the runtime check.
   bool Need;
@@ -405,18 +448,41 @@ public:
   /// Holds a partitioning of pointers into "check groups".
   SmallVector<CheckingPtrGroup, 2> CheckingGroups;
 
-private:
+  /// \brief Check if pointers are in the same partition
+  ///
+  /// \p PtrToPartition contains the partition number for pointers (-1 if the
+  /// pointer belongs to multiple partitions).
+  static bool
+  arePointersInSamePartition(const SmallVectorImpl<int> &PtrToPartition,
+                             unsigned PtrIdx1, unsigned PtrIdx2);
+
   /// \brief Decide whether we need to issue a run-time check for pointer at
   /// index \p I and \p J to prove their independence.
-  ///
-  /// If \p PtrPartition is set, it contains the partition number for
-  /// pointers (-1 if the pointer belongs to multiple partitions).  In this
-  /// case omit checks between pointers belonging to the same partition.
-  bool needsChecking(unsigned I, unsigned J,
-                     const SmallVectorImpl<int> *PtrPartition) const;
+  bool needsChecking(unsigned I, unsigned J) const;
+
+  /// \brief Return PointerInfo for pointer at index \p PtrIdx.
+  const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
+    return Pointers[PtrIdx];
+  }
+
+private:
+  /// \brief Groups pointers such that a single memcheck is required
+  /// between two different groups. This will clear the CheckingGroups vector
+  /// and re-compute it. We will only group dependecies if \p UseDependencies
+  /// is true, otherwise we will create a separate group for each pointer.
+  void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
+                   bool UseDependencies);
+
+  /// Generate the checks and return them.
+  SmallVector<PointerCheck, 4>
+  generateChecks() const;
 
   /// Holds a pointer to the ScalarEvolution analysis.
   ScalarEvolution *SE;
+
+  /// \brief Set of run-time checks required to establish independence of
+  /// otherwise may-aliasing pointers in the loop.
+  SmallVector<PointerCheck, 4> Checks;
 };
 
 /// \brief Drive the analysis of memory accesses in the loop
@@ -433,6 +499,13 @@ private:
 /// generates run-time checks to prove independence.  This is done by
 /// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
 /// RuntimePointerCheck class.
+///
+/// If pointers can wrap or can't be expressed as affine AddRec expressions by
+/// ScalarEvolution, we will generate run-time checks by emitting a
+/// SCEVUnionPredicate.
+///
+/// Checks for both memory dependences and the SCEV predicates contained in the
+/// PSE must be emitted in order for the results of this analysis to be valid.
 class LoopAccessInfo {
 public:
   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
@@ -450,9 +523,8 @@ public:
 
   /// \brief Number of memchecks required to prove independence of otherwise
   /// may-alias pointers.
-  unsigned getNumRuntimePointerChecks(
-    const SmallVectorImpl<int> *PtrPartition = nullptr) const {
-    return PtrRtChecking.getNumberOfChecks(PtrPartition);
+  unsigned getNumRuntimePointerChecks() const {
+    return PtrRtChecking.getNumberOfChecks();
   }
 
   /// Return true if the block BB needs to be predicated in order for the loop
@@ -472,13 +544,18 @@ public:
   /// Returns a pair of instructions where the first element is the first
   /// instruction generated in possibly a sequence of instructions and the
   /// second value is the final comparator value or NULL if no check is needed.
-  ///
-  /// If \p PtrPartition is set, it contains the partition number for pointers
-  /// (-1 if the pointer belongs to multiple partitions).  In this case omit
-  /// checks between pointers belonging to the same partition.
   std::pair<Instruction *, Instruction *>
-  addRuntimeCheck(Instruction *Loc,
-                  const SmallVectorImpl<int> *PtrPartition = nullptr) const;
+  addRuntimeChecks(Instruction *Loc) const;
+
+  /// \brief Generete the instructions for the checks in \p PointerChecks.
+  ///
+  /// Returns a pair of instructions where the first element is the first
+  /// instruction generated in possibly a sequence of instructions and the
+  /// second value is the final comparator value or NULL if no check is needed.
+  std::pair<Instruction *, Instruction *>
+  addRuntimeChecks(Instruction *Loc,
+                   const SmallVectorImpl<RuntimePointerChecking::PointerCheck>
+                       &PointerChecks) const;
 
   /// \brief The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
@@ -510,6 +587,13 @@ public:
     return StoreToLoopInvariantAddress;
   }
 
+  /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
+  /// them to a more usable form.  All SCEV expressions during the analysis
+  /// should be re-written (and therefore simplified) according to PSE.
+  /// A user of LoopAccessAnalysis will need to emit the runtime checks
+  /// associated with this predicate.
+  PredicatedScalarEvolution PSE;
+
 private:
   /// \brief Analyze the loop.  Substitute symbolic strides using Strides.
   void analyzeLoop(const ValueToValueMap &Strides);
@@ -529,7 +613,6 @@ private:
   MemoryDepChecker DepChecker;
 
   Loop *TheLoop;
-  ScalarEvolution *SE;
   const DataLayout &DL;
   const TargetLibraryInfo *TLI;
   AliasAnalysis *AA;
@@ -556,18 +639,24 @@ private:
 Value *stripIntegerCast(Value *V);
 
 ///\brief Return the SCEV corresponding to a pointer with the symbolic stride
-///replaced with constant one.
+/// replaced with constant one, assuming \p Preds is true.
+///
+/// If necessary this method will version the stride of the pointer according
+/// to \p PtrToStride and therefore add a new predicate to \p Preds.
 ///
 /// If \p OrigPtr is not null, use it to look up the stride value instead of \p
 /// Ptr.  \p PtrToStride provides the mapping between the pointer value and its
 /// stride as collected by LoopVectorizationLegality::collectStridedAccess.
-const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
                                       const ValueToValueMap &PtrToStride,
                                       Value *Ptr, Value *OrigPtr = nullptr);
 
 /// \brief Check the stride of the pointer and ensure that it does not wrap in
-/// the address space.
-int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
+/// the address space, assuming \p Preds is true.
+///
+/// If necessary this method will version the stride of the pointer according
+/// to \p PtrToStride and therefore add a new predicate to \p Preds.
+int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
                  const ValueToValueMap &StridesMap);
 
 /// \brief This analysis provides dependence information for the memory accesses
@@ -616,6 +705,17 @@ private:
   DominatorTree *DT;
   LoopInfo *LI;
 };
+
+inline Instruction *MemoryDepChecker::Dependence::getSource(
+    const LoopAccessInfo &LAI) const {
+  return LAI.getDepChecker().getMemoryInstructions()[Source];
+}
+
+inline Instruction *MemoryDepChecker::Dependence::getDestination(
+    const LoopAccessInfo &LAI) const {
+  return LAI.getDepChecker().getMemoryInstructions()[Destination];
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 3ec83f2c21fd..c219bd85a48a 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -37,6 +37,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
 #include <algorithm>
 
@@ -72,6 +73,10 @@ class LoopBase {
 
   SmallPtrSet<const BlockT*, 8> DenseBlockSet;
 
+  /// Indicator that this loops has been "unlooped", so there's no loop here
+  /// anymore.
+  bool IsUnloop = false;
+
   LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
   const LoopBase<BlockT, LoopT>&
     operator=(const LoopBase<BlockT, LoopT> &) = delete;
@@ -140,12 +145,22 @@ public:
   typedef typename std::vector<BlockT*>::const_iterator block_iterator;
   block_iterator block_begin() const { return Blocks.begin(); }
   block_iterator block_end() const { return Blocks.end(); }
+  inline iterator_range<block_iterator> blocks() const {
+    return make_range(block_begin(), block_end());
+  }
 
   /// getNumBlocks - Get the number of blocks in this loop in constant time.
   unsigned getNumBlocks() const {
     return Blocks.size();
   }
 
+  /// Mark this loop as having been unlooped - the last backedge was removed and
+  /// we no longer have a loop.
+  void markUnlooped() { IsUnloop = true; }
+
+  /// Return true if this no longer represents a loop.
+  bool isUnloop() const { return IsUnloop; }
+
   /// isLoopExiting - True if terminator in the block can branch to another
   /// block that is outside of the current loop.
   ///
@@ -398,6 +413,9 @@ public:
   /// isLCSSAForm - Return true if the Loop is in LCSSA form
   bool isLCSSAForm(DominatorTree &DT) const;
 
+  /// \brief Return true if this Loop and all inner subloops are in LCSSA form.
+  bool isRecursivelyLCSSAForm(DominatorTree &DT) const;
+
   /// isLoopSimplifyForm - Return true if the Loop is in the form that
   /// the LoopSimplify form transforms loops to, which is sometimes called
   /// normal form.
@@ -622,7 +640,7 @@ public:
   }
 
   /// Create the loop forest using a stable algorithm.
-  void Analyze(DominatorTreeBase<BlockT> &DomTree);
+  void analyze(const DominatorTreeBase<BlockT> &DomTree);
 
   // Debugging
   void print(raw_ostream &OS) const;
@@ -642,6 +660,7 @@ class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
   LoopInfo(const LoopInfo &) = delete;
 public:
   LoopInfo() {}
+  explicit LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree);
 
   LoopInfo(LoopInfo &&Arg) : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
   LoopInfo &operator=(LoopInfo &&RHS) {
@@ -653,8 +672,9 @@ public:
 
   /// updateUnloop - Update LoopInfo after removing the last backedge from a
   /// loop--now the "unloop". This updates the loop forest and parent loops for
-  /// each block so that Unloop is no longer referenced, but the caller must
-  /// actually delete the Unloop object.
+  /// each block so that Unloop is no longer referenced, but does not actually
+  /// delete the Unloop object. Generally, the loop pass manager should manage
+  /// deleting the Unloop.
   void updateUnloop(Loop *Unloop);
 
   /// replacementPreservesLCSSAForm - Returns true if replacing From with To
@@ -677,6 +697,78 @@ public:
     // it as a replacement will not break LCSSA form.
     return ToLoop->contains(getLoopFor(From->getParent()));
   }
+
+  /// \brief Checks if moving a specific instruction can break LCSSA in any
+  /// loop.
+  ///
+  /// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
+  /// assuming that the function containing \p Inst and \p NewLoc is currently
+  /// in LCSSA form.
+  bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) {
+    assert(Inst->getFunction() == NewLoc->getFunction() &&
+           "Can't reason about IPO!");
+
+    auto *OldBB = Inst->getParent();
+    auto *NewBB = NewLoc->getParent();
+
+    // Movement within the same loop does not break LCSSA (the equality check is
+    // to avoid doing a hashtable lookup in case of intra-block movement).
+    if (OldBB == NewBB)
+      return true;
+
+    auto *OldLoop = getLoopFor(OldBB);
+    auto *NewLoop = getLoopFor(NewBB);
+
+    if (OldLoop == NewLoop)
+      return true;
+
+    // Check if Outer contains Inner; with the null loop counting as the
+    // "outermost" loop.
+    auto Contains = [](const Loop *Outer, const Loop *Inner) {
+      return !Outer || Outer->contains(Inner);
+    };
+
+    // To check that the movement of Inst to before NewLoc does not break LCSSA,
+    // we need to check two sets of uses for possible LCSSA violations at
+    // NewLoc: the users of NewInst, and the operands of NewInst.
+
+    // If we know we're hoisting Inst out of an inner loop to an outer loop,
+    // then the uses *of* Inst don't need to be checked.
+
+    if (!Contains(NewLoop, OldLoop)) {
+      for (Use &U : Inst->uses()) {
+        auto *UI = cast<Instruction>(U.getUser());
+        auto *UBB = isa<PHINode>(UI) ? cast<PHINode>(UI)->getIncomingBlock(U)
+                                     : UI->getParent();
+        if (UBB != NewBB && getLoopFor(UBB) != NewLoop)
+          return false;
+      }
+    }
+
+    // If we know we're sinking Inst from an outer loop into an inner loop, then
+    // the *operands* of Inst don't need to be checked.
+
+    if (!Contains(OldLoop, NewLoop)) {
+      // See below on why we can't handle phi nodes here.
+      if (isa<PHINode>(Inst))
+        return false;
+
+      for (Use &U : Inst->operands()) {
+        auto *DefI = dyn_cast<Instruction>(U.get());
+        if (!DefI)
+          return false;
+
+        // This would need adjustment if we allow Inst to be a phi node -- the
+        // new use block won't simply be NewBB.
+
+        auto *DefBlock = DefI->getParent();
+        if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop)
+          return false;
+      }
+    }
+
+    return true;
+  }
 };
 
 // Allow clients to walk the list of nested loops...
@@ -759,6 +851,19 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
+/// \brief Pass for printing a loop's contents as LLVM's text IR assembly.
+class PrintLoopPass {
+  raw_ostream &OS;
+  std::string Banner;
+
+public:
+  PrintLoopPass();
+  PrintLoopPass(raw_ostream &OS, const std::string &Banner = "");
+
+  PreservedAnalyses run(Loop &L);
+  static StringRef name() { return "PrintLoopPass"; }
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/LoopInfoImpl.h b/include/llvm/Analysis/LoopInfoImpl.h
index f5cc856f6247..824fc7e8f155 100644
--- a/include/llvm/Analysis/LoopInfoImpl.h
+++ b/include/llvm/Analysis/LoopInfoImpl.h
@@ -269,7 +269,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
       // A non-header loop shouldn't be reachable from outside the loop,
       // though it is permitted if the predecessor is not itself actually
       // reachable.
-      BlockT *EntryBB = BB->getParent()->begin();
+      BlockT *EntryBB = &BB->getParent()->front();
       for (BlockT *CB : depth_first(EntryBB))
         for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i)
           assert(CB != OutsideLoopPreds[i] &&
@@ -345,7 +345,7 @@ void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const {
 template<class BlockT, class LoopT>
 static void discoverAndMapSubloop(LoopT *L, ArrayRef<BlockT*> Backedges,
                                   LoopInfoBase<BlockT, LoopT> *LI,
-                                  DominatorTreeBase<BlockT> &DomTree) {
+                                  const DominatorTreeBase<BlockT> &DomTree) {
   typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
 
   unsigned NumBlocks = 0;
@@ -468,10 +468,10 @@ void PopulateLoopsDFS<BlockT, LoopT>::insertIntoLoop(BlockT *Block) {
 /// insertions per block.
 template<class BlockT, class LoopT>
 void LoopInfoBase<BlockT, LoopT>::
-Analyze(DominatorTreeBase<BlockT> &DomTree) {
+analyze(const DominatorTreeBase<BlockT> &DomTree) {
 
   // Postorder traversal of the dominator tree.
-  DomTreeNodeBase<BlockT>* DomRoot = DomTree.getRootNode();
+  const DomTreeNodeBase<BlockT> *DomRoot = DomTree.getRootNode();
   for (auto DomNode : post_order(DomRoot)) {
 
     BlockT *Header = DomNode->getBlock();
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 8650000fcfb6..2cf734e53bb4 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -127,20 +127,9 @@ public:
   }
 
 public:
-  // Delete loop from the loop queue and loop nest (LoopInfo).
-  void deleteLoopFromQueue(Loop *L);
-
-  // Insert loop into the loop queue and add it as a child of the
-  // given parent.
-  void insertLoop(Loop *L, Loop *ParentLoop);
-
-  // Insert a loop into the loop queue.
-  void insertLoopIntoQueue(Loop *L);
-
-  // Reoptimize this loop. LPPassManager will re-insert this loop into the
-  // queue. This allows LoopPass to change loop nest for the loop. This
-  // utility may send LPPassManager into infinite loops so use caution.
-  void redoLoop(Loop *L);
+  // Add a new loop into the loop queue as a child of the given parent, or at
+  // the top level if \c ParentLoop is null.
+  Loop &addLoop(Loop *ParentLoop);
 
   //===--------------------------------------------------------------------===//
   /// SimpleAnalysis - Provides simple interface to update analysis info
@@ -163,8 +152,6 @@ public:
 
 private:
   std::deque<Loop *> LQ;
-  bool skipThisLoop;
-  bool redoThisLoop;
   LoopInfo *LI;
   Loop *CurrentLoop;
 };
diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h
index 805a43dfb070..87fb3efaf50e 100644
--- a/include/llvm/Analysis/MemoryBuiltins.h
+++ b/include/llvm/Analysis/MemoryBuiltins.h
@@ -59,11 +59,6 @@ bool isCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
 bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
                    bool LookThroughBitCast = false);
 
-/// \brief Tests if a value is a call or invoke to a library function that
-/// reallocates memory (such as realloc).
-bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
-                     bool LookThroughBitCast = false);
-
 /// \brief Tests if a value is a call or invoke to a library function that
 /// allocates memory and never returns null (such as operator new).
 bool isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 511898071c22..daa1ba91c071 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -28,7 +28,6 @@ namespace llvm {
   class FunctionPass;
   class Instruction;
   class CallSite;
-  class AliasAnalysis;
   class AssumptionCache;
   class MemoryDependenceAnalysis;
   class PredIteratorCache;
@@ -97,6 +96,7 @@ namespace llvm {
     typedef PointerIntPair<Instruction*, 2, DepType> PairTy;
     PairTy Value;
     explicit MemDepResult(PairTy V) : Value(V) {}
+
   public:
     MemDepResult() : Value(nullptr, Invalid) {}
 
@@ -164,6 +164,7 @@ namespace llvm {
     bool operator!=(const MemDepResult &M) const { return Value != M.Value; }
     bool operator<(const MemDepResult &M) const { return Value < M.Value; }
     bool operator>(const MemDepResult &M) const { return Value > M.Value; }
+
   private:
     friend class MemoryDependenceAnalysis;
     /// Dirty - Entries with this marker occur in a LocalDeps map or
@@ -190,6 +191,7 @@ namespace llvm {
   class NonLocalDepEntry {
     BasicBlock *BB;
     MemDepResult Result;
+
   public:
     NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
       : BB(bb), Result(result) {}
@@ -215,6 +217,7 @@ namespace llvm {
   class NonLocalDepResult {
     NonLocalDepEntry Entry;
     Value *Address;
+
   public:
     NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
       : Entry(bb, result), Address(address) {}
@@ -261,6 +264,7 @@ namespace llvm {
 
   public:
     typedef std::vector<NonLocalDepEntry> NonLocalDepInfo;
+
   private:
     /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
     /// the dependence is a read only dependence, false if read/write.
@@ -302,7 +306,6 @@ namespace llvm {
                      SmallPtrSet<ValueIsLoadPair, 4> > ReverseNonLocalPtrDepTy;
     ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps;
 
-
     /// PerInstNLInfo - This is the instruction we keep for each cached access
     /// that we have for an instruction.  The pointer is an owning pointer and
     /// the bool indicates whether we have any dirty bits in the set.
@@ -326,6 +329,7 @@ namespace llvm {
     AliasAnalysis *AA;
     DominatorTree *DT;
     AssumptionCache *AC;
+    const TargetLibraryInfo *TLI;
     PredIteratorCache PredCache;
 
   public:
@@ -363,14 +367,13 @@ namespace llvm {
     /// that.
     const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
 
-
     /// getNonLocalPointerDependency - Perform a full dependency query for an
     /// access to the QueryInst's specified memory location, returning the set
     /// of instructions that either define or clobber the value.
     ///
     /// Warning: For a volatile query instruction, the dependencies will be
     /// accurate, and thus usable for reordering, but it is never legal to
-    /// remove the query instruction.  
+    /// remove the query instruction.
     ///
     /// This method assumes the pointer has a "NonLocal" dependency within
     /// QueryInst's parent basic block.
@@ -394,12 +397,12 @@ namespace llvm {
     /// critical edges.
     void invalidateCachedPredecessors();
 
-    /// getPointerDependencyFrom - Return the instruction on which a memory
-    /// location depends.  If isLoad is true, this routine ignores may-aliases
-    /// with read-only operations.  If isLoad is false, this routine ignores
-    /// may-aliases with reads from read-only locations. If possible, pass
-    /// the query instruction as well; this function may take advantage of 
-    /// the metadata annotated to the query instruction to refine the result.
+    /// \brief Return the instruction on which a memory location depends.
+    /// If isLoad is true, this routine ignores may-aliases with read-only
+    /// operations.  If isLoad is false, this routine ignores may-aliases
+    /// with reads from read-only locations. If possible, pass the query
+    /// instruction as well; this function may take advantage of the metadata
+    /// annotated to the query instruction to refine the result.
     ///
     /// Note that this is an uncached query, and thus may be inefficient.
     ///
@@ -409,6 +412,21 @@ namespace llvm {
                                           BasicBlock *BB,
                                           Instruction *QueryInst = nullptr);
 
+    MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
+                                                bool isLoad,
+                                                BasicBlock::iterator ScanIt,
+                                                BasicBlock *BB,
+                                                Instruction *QueryInst);
+
+    /// This analysis looks for other loads and stores with invariant.group
+    /// metadata and the same pointer operand. Returns Unknown if it does not
+    /// find anything, and Def if it can be assumed that 2 instructions load or
+    /// store the same value.
+    /// FIXME: This analysis works only on single block because of restrictions
+    /// at the call site.
+    MemDepResult getInvariantGroupPointerDependency(LoadInst *LI,
+                                                    BasicBlock *BB);
+
     /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that
     /// looks at a memory location for a load (specified by MemLocBase, Offs,
     /// and Size) and compares it against a load.  If the specified load could
@@ -442,7 +460,6 @@ namespace llvm {
     /// verifyRemoved - Verify that the specified instruction does not occur
     /// in our internal data structures.
     void verifyRemoved(Instruction *Inst) const;
-
   };
 
 } // End llvm namespace
diff --git a/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
new file mode 100644
index 000000000000..ac01154bac6c
--- /dev/null
+++ b/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -0,0 +1,102 @@
+//===- ObjCARCAliasAnalysis.h - ObjC ARC Alias Analysis ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
+/// of Objective C to enhance other optimization passes which rely on the Alias
+/// Analysis infrastructure.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H
+#define LLVM_ANALYSIS_OBJCARCALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief This is a simple alias analysis implementation that uses knowledge
+/// of ARC constructs to answer queries.
+///
+/// TODO: This class could be generalized to know about other ObjC-specific
+/// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+/// even though their offsets are dynamic.
+class ObjCARCAAResult : public AAResultBase<ObjCARCAAResult> {
+  friend AAResultBase<ObjCARCAAResult>;
+
+  const DataLayout &DL;
+
+public:
+  explicit ObjCARCAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI)
+      : AAResultBase(TLI), DL(DL) {}
+  ObjCARCAAResult(ObjCARCAAResult &&Arg)
+      : AAResultBase(std::move(Arg)), DL(Arg.DL) {}
+
+  /// Handle invalidation events from the new pass manager.
+  ///
+  /// By definition, this result is stateless and so remains valid.
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+
+  using AAResultBase::getModRefBehavior;
+  FunctionModRefBehavior getModRefBehavior(const Function *F);
+
+  using AAResultBase::getModRefInfo;
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class ObjCARCAA {
+public:
+  typedef ObjCARCAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  ObjCARCAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "ObjCARCAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the ObjCARCAAResult object.
+class ObjCARCAAWrapperPass : public ImmutablePass {
+  std::unique_ptr<ObjCARCAAResult> Result;
+
+public:
+  static char ID;
+
+  ObjCARCAAWrapperPass();
+
+  ObjCARCAAResult &getResult() { return *Result; }
+  const ObjCARCAAResult &getResult() const { return *Result; }
+
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+} // namespace objcarc
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
new file mode 100644
index 000000000000..29d99c9d316d
--- /dev/null
+++ b/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -0,0 +1,287 @@
+//===- ObjCARCAnalysisUtils.h - ObjC ARC Analysis Utilities -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines common analysis utilities used by the ObjC ARC Optimizer.
+/// ARC stands for Automatic Reference Counting and is a system for managing
+/// reference counts for objects in Objective C.
+///
+/// WARNING: This file knows about certain library functions. It recognizes them
+/// by name, and hardwires knowledge of their semantics.
+///
+/// WARNING: This file knows about how certain Objective-C library functions are
+/// used. Naive LLVM IR transformations which would otherwise be
+/// behavior-preserving may break these assumptions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+#define LLVM_LIB_ANALYSIS_OBJCARCANALYSISUTILS_H
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class raw_ostream;
+}
+
+namespace llvm {
+namespace objcarc {
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+extern bool EnableARCOpts;
+
+/// \brief Test if the given module looks interesting to run ARC optimization
+/// on.
+inline bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer") ||
+    M.getNamedValue("clang.arc.use");
+}
+
+/// \brief This is a wrapper around getUnderlyingObject which also knows how to
+/// look through objc_retain and objc_autorelease calls, which we know to return
+/// their argument verbatim.
+inline const Value *GetUnderlyingObjCPtr(const Value *V,
+                                                const DataLayout &DL) {
+  for (;;) {
+    V = GetUnderlyingObject(V, DL);
+    if (!IsForwarding(GetBasicARCInstKind(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+
+  return V;
+}
+
+/// The RCIdentity root of a value \p V is a dominating value U for which
+/// retaining or releasing U is equivalent to retaining or releasing V. In other
+/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
+///
+/// We use this in the ARC optimizer to make it easier to match up ARC
+/// operations by always mapping ARC operations to RCIdentityRoots instead of
+/// pointers themselves.
+///
+/// The two ways that we see RCIdentical values in ObjC are via:
+///
+///   1. PointerCasts
+///   2. Forwarding Calls that return their argument verbatim.
+///
+/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
+/// This implies that two RCIdentical values must alias.
+inline const Value *GetRCIdentityRoot(const Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicARCInstKind(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
+/// casts away the const of the result. For documentation about what an
+/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
+/// function.
+inline Value *GetRCIdentityRoot(Value *V) {
+  return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
+}
+
+/// \brief Assuming the given instruction is one of the special calls such as
+/// objc_retain or objc_release, return the RCIdentity root of the argument of
+/// the call.
+inline Value *GetArgRCIdentityRoot(Value *Inst) {
+  return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+inline bool IsNullOrUndef(const Value *V) {
+  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+inline bool IsNoopInstruction(const Instruction *I) {
+  return isa<BitCastInst>(I) ||
+    (isa<GetElementPtrInst>(I) &&
+     cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// \brief Test whether the given value is possible a retainable object pointer.
+inline bool IsPotentialRetainableObjPtr(const Value *Op) {
+  // Pointers to static or stack storage are not valid retainable object
+  // pointers.
+  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+    return false;
+  // Special arguments can not be a valid retainable object pointer.
+  if (const Argument *Arg = dyn_cast<Argument>(Op))
+    if (Arg->hasByValAttr() ||
+        Arg->hasInAllocaAttr() ||
+        Arg->hasNestAttr() ||
+        Arg->hasStructRetAttr())
+      return false;
+  // Only consider values with pointer types.
+  //
+  // It seemes intuitive to exclude function pointer types as well, since
+  // functions are never retainable object pointers, however clang occasionally
+  // bitcasts retainable object pointers to function-pointer type temporarily.
+  PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+  if (!Ty)
+    return false;
+  // Conservatively assume anything else is a potential retainable object
+  // pointer.
+  return true;
+}
+
+inline bool IsPotentialRetainableObjPtr(const Value *Op,
+                                               AliasAnalysis &AA) {
+  // First make the rudimentary check.
+  if (!IsPotentialRetainableObjPtr(Op))
+    return false;
+
+  // Objects in constant memory are not reference-counted.
+  if (AA.pointsToConstantMemory(Op))
+    return false;
+
+  // Pointers in constant memory are not pointing to reference-counted objects.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
+    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
+      return false;
+
+  // Otherwise assume the worst.
+  return true;
+}
+
+/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
+/// is.
+inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
+  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (IsPotentialRetainableObjPtr(*I))
+      return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
+
+  return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
+}
+
+/// \brief Return true if this value refers to a distinct and identifiable
+/// object.
+///
+/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
+/// special knowledge of ObjC conventions.
+inline bool IsObjCIdentifiedObject(const Value *V) {
+  // Assume that call results and arguments have their own "provenance".
+  // Constants (including GlobalVariables) and Allocas are never
+  // reference-counted.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+      isa<Argument>(V) || isa<Constant>(V) ||
+      isa<AllocaInst>(V))
+    return true;
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    const Value *Pointer =
+      GetRCIdentityRoot(LI->getPointerOperand());
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      // A constant pointer can't be pointing to an object on the heap. It may
+      // be reference-counted, but it won't be deleted.
+      if (GV->isConstant())
+        return true;
+      StringRef Name = GV->getName();
+      // These special variables are known to hold values which are not
+      // reference-counted pointers.
+      if (Name.startswith("\01l_objc_msgSend_fixup_"))
+        return true;
+
+      StringRef Section = GV->getSection();
+      if (Section.find("__message_refs") != StringRef::npos ||
+          Section.find("__objc_classrefs") != StringRef::npos ||
+          Section.find("__objc_superrefs") != StringRef::npos ||
+          Section.find("__objc_methname") != StringRef::npos ||
+          Section.find("__cstring") != StringRef::npos)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+enum class ARCMDKindID {
+  ImpreciseRelease,
+  CopyOnEscape,
+  NoObjCARCExceptions,
+};
+
+/// A cache of MDKinds used by various ARC optimizations.
+class ARCMDKindCache {
+  Module *M;
+
+  /// The Metadata Kind for clang.imprecise_release metadata.
+  llvm::Optional<unsigned> ImpreciseReleaseMDKind;
+
+  /// The Metadata Kind for clang.arc.copy_on_escape metadata.
+  llvm::Optional<unsigned> CopyOnEscapeMDKind;
+
+  /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
+  llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
+
+public:
+  void init(Module *Mod) {
+    M = Mod;
+    ImpreciseReleaseMDKind = NoneType::None;
+    CopyOnEscapeMDKind = NoneType::None;
+    NoObjCARCExceptionsMDKind = NoneType::None;
+  }
+
+  unsigned get(ARCMDKindID ID) {
+    switch (ID) {
+    case ARCMDKindID::ImpreciseRelease:
+      if (!ImpreciseReleaseMDKind)
+        ImpreciseReleaseMDKind =
+            M->getContext().getMDKindID("clang.imprecise_release");
+      return *ImpreciseReleaseMDKind;
+    case ARCMDKindID::CopyOnEscape:
+      if (!CopyOnEscapeMDKind)
+        CopyOnEscapeMDKind =
+            M->getContext().getMDKindID("clang.arc.copy_on_escape");
+      return *CopyOnEscapeMDKind;
+    case ARCMDKindID::NoObjCARCExceptions:
+      if (!NoObjCARCExceptionsMDKind)
+        NoObjCARCExceptionsMDKind =
+            M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
+      return *NoObjCARCExceptionsMDKind;
+    }
+    llvm_unreachable("Covered switch isn't covered?!");
+  }
+};
+
+} // end namespace objcarc
+} // end namespace llvm
+
+#endif
diff --git a/lib/Transforms/ObjCARC/ARCInstKind.h b/include/llvm/Analysis/ObjCARCInstKind.h
similarity index 94%
rename from lib/Transforms/ObjCARC/ARCInstKind.h
rename to include/llvm/Analysis/ObjCARCInstKind.h
index 636c65c9b627..13efb4b160be 100644
--- a/lib/Transforms/ObjCARC/ARCInstKind.h
+++ b/include/llvm/Analysis/ObjCARCInstKind.h
@@ -1,4 +1,4 @@
-//===--- ARCInstKind.h - ARC instruction equivalence classes -*- C++ -*----===//
+//===- ObjCARCInstKind.h - ARC instruction equivalence classes --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_ARCINSTKIND_H
+#ifndef LLVM_ANALYSIS_OBJCARCINSTKIND_H
+#define LLVM_ANALYSIS_OBJCARCINSTKIND_H
 
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Function.h"
@@ -98,7 +98,7 @@ ARCInstKind GetFunctionClass(const Function *F);
 /// This is similar to GetARCInstKind except that it only detects objc
 /// runtime calls. This allows it to be faster.
 ///
-static inline ARCInstKind GetBasicARCInstKind(const Value *V) {
+inline ARCInstKind GetBasicARCInstKind(const Value *V) {
   if (const CallInst *CI = dyn_cast<CallInst>(V)) {
     if (const Function *F = CI->getCalledFunction())
       return GetFunctionClass(F);
diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h
new file mode 100644
index 000000000000..5aa813eb4832
--- /dev/null
+++ b/include/llvm/Analysis/OrderedBasicBlock.h
@@ -0,0 +1,66 @@
+//===- llvm/Analysis/OrderedBasicBlock.h --------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrderedBasicBlock class. OrderedBasicBlock maintains
+// an interface where clients can query if one instruction comes before another
+// in a BasicBlock. Since BasicBlock currently lacks a reliable way to query
+// relative position between instructions one can use OrderedBasicBlock to do
+// such queries. OrderedBasicBlock is lazily built on a source BasicBlock and
+// maintains an internal Instruction -> Position map. A OrderedBasicBlock
+// instance should be discarded whenever the source BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_ORDEREDBASICBLOCK_H
+#define LLVM_ANALYSIS_ORDEREDBASICBLOCK_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/BasicBlock.h"
+
+namespace llvm {
+
+class Instruction;
+class BasicBlock;
+
+class OrderedBasicBlock {
+private:
+  /// \brief Map a instruction to its position in a BasicBlock.
+  SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
+
+  /// \brief Keep track of last instruction inserted into \p NumberedInsts.
+  /// It speeds up queries for uncached instructions by providing a start point
+  /// for new queries in OrderedBasicBlock::comesBefore.
+  BasicBlock::const_iterator LastInstFound;
+
+  /// \brief The position/number to tag the next instruction to be found.
+  unsigned NextInstPos;
+
+  /// \brief The source BasicBlock to map.
+  const BasicBlock *BB;
+
+  /// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+  /// Cache and number out instruction while walking \p BB.
+  bool comesBefore(const Instruction *A, const Instruction *B);
+
+public:
+  OrderedBasicBlock(const BasicBlock *BasicB);
+
+  /// \brief Find out whether \p A dominates \p B, meaning whether \p A
+  /// comes before \p B in \p BB. This is a simplification that considers
+  /// cached instruction positions and ignores other basic blocks, being
+  /// only relevant to compare relative instructions positions inside \p BB.
+  bool dominates(const Instruction *A, const Instruction *B);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
index cbdbb88f7407..f0f34f3a51f5 100644
--- a/include/llvm/Analysis/PHITransAddr.h
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -48,6 +48,7 @@ class PHITransAddr {
 
   /// InstInputs - The inputs for our symbolic address.
   SmallVector<Instruction*, 4> InstInputs;
+
 public:
   PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC)
       : Addr(addr), DL(DL), TLI(nullptr), AC(AC) {
@@ -55,9 +56,9 @@ public:
     if (Instruction *I = dyn_cast<Instruction>(Addr))
       InstInputs.push_back(I);
   }
-  
+
   Value *getAddr() const { return Addr; }
-  
+
   /// NeedsPHITranslationFromBlock - Return true if moving from the specified
   /// BasicBlock to its predecessors requires PHI translation.
   bool NeedsPHITranslationFromBlock(BasicBlock *BB) const {
@@ -68,12 +69,12 @@ public:
         return true;
     return false;
   }
-  
+
   /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
   /// if we have some hope of doing it.  This should be used as a filter to
   /// avoid calling PHITranslateValue in hopeless situations.
   bool IsPotentiallyPHITranslatable() const;
-  
+
   /// PHITranslateValue - PHI translate the current address up the CFG from
   /// CurBB to Pred, updating our state to reflect any needed changes.  If
   /// 'MustDominate' is true, the translated value must dominate
@@ -90,18 +91,19 @@ public:
   ///
   Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
                                    const DominatorTree &DT,
-                                   SmallVectorImpl<Instruction*> &NewInsts);
-  
+                                   SmallVectorImpl<Instruction *> &NewInsts);
+
   void dump() const;
-  
+
   /// Verify - Check internal consistency of this data structure.  If the
   /// structure is valid, it returns true.  If invalid, it prints errors and
   /// returns false.
   bool Verify() const;
+
 private:
   Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB,
                              const DominatorTree *DT);
-  
+
   /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated
   /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
   /// block.  All newly created instructions are added to the NewInsts list.
@@ -109,8 +111,8 @@ private:
   ///
   Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
                                     BasicBlock *PredBB, const DominatorTree &DT,
-                                    SmallVectorImpl<Instruction*> &NewInsts);
-  
+                                    SmallVectorImpl<Instruction *> &NewInsts);
+
   /// AddAsInput - If the specified value is an instruction, add it as an input.
   Value *AddAsInput(Value *V) {
     // If V is an instruction, it is now an input.
@@ -118,7 +120,6 @@ private:
       InstInputs.push_back(VI);
     return V;
   }
-  
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index d112ab1823b4..da17457d3446 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -22,27 +22,6 @@ namespace llvm {
   class ModulePass;
   class Pass;
   class PassInfo;
-  class LibCallInfo;
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createGlobalsModRefPass - This pass provides alias and mod/ref info for
-  // global values that do not have their addresses taken.
-  //
-  Pass *createGlobalsModRefPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createAliasDebugger - This pass helps debug clients of AA
-  //
-  Pass *createAliasDebugger();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createAliasAnalysisCounterPass - This pass counts alias queries and how the
-  // alias analysis implementation responds.
-  //
-  ModulePass *createAliasAnalysisCounterPass();
 
   //===--------------------------------------------------------------------===//
   //
@@ -53,59 +32,10 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
-  // createNoAAPass - This pass implements a "I don't know" alias analysis.
-  //
-  ImmutablePass *createNoAAPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createBasicAliasAnalysisPass - This pass implements the stateless alias
-  // analysis.
-  //
-  ImmutablePass *createBasicAliasAnalysisPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createCFLAliasAnalysisPass - This pass implements a set-based approach to
+  // createObjCARCAAWrapperPass - This pass implements ObjC-ARC-based
   // alias analysis.
   //
-  ImmutablePass *createCFLAliasAnalysisPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows
-  /// about the semantics of a set of libcalls specified by LCI.  The newly
-  /// constructed pass takes ownership of the pointer that is provided.
-  ///
-  FunctionPass *createLibCallAliasAnalysisPass(LibCallInfo *LCI);
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createScalarEvolutionAliasAnalysisPass - This pass implements a simple
-  // alias analysis using ScalarEvolution queries.
-  //
-  FunctionPass *createScalarEvolutionAliasAnalysisPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createTypeBasedAliasAnalysisPass - This pass implements metadata-based
-  // type-based alias analysis.
-  //
-  ImmutablePass *createTypeBasedAliasAnalysisPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createScopedNoAliasAAPass - This pass implements metadata-based
-  // scoped noalias analysis.
-  //
-  ImmutablePass *createScopedNoAliasAAPass();
-
-  //===--------------------------------------------------------------------===//
-  //
-  // createObjCARCAliasAnalysisPass - This pass implements ObjC-ARC-based
-  // alias analysis.
-  //
-  ImmutablePass *createObjCARCAliasAnalysisPass();
+  ImmutablePass *createObjCARCAAWrapperPass();
 
   FunctionPass *createPAEvalPass();
 
diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h
index 8560f1f67160..4988386fdc82 100644
--- a/include/llvm/Analysis/RegionInfo.h
+++ b/include/llvm/Analysis/RegionInfo.h
@@ -47,7 +47,7 @@
 
 namespace llvm {
 
-// RegionTraits - Class to be specialized for different users of RegionInfo
+// Class to be specialized for different users of RegionInfo
 // (i.e. BasicBlocks or MachineBasicBlocks). This is only to avoid needing to
 // pass around an unreasonable number of template parameters.
 template <class FuncT_>
@@ -282,17 +282,16 @@ class RegionBase : public RegionNodeBase<Tr> {
   // Save the BasicBlock RegionNodes that are element of this Region.
   mutable BBNodeMapT BBNodeMap;
 
-  /// verifyBBInRegion - Check if a BB is in this Region. This check also works
+  /// Check if a BB is in this Region. This check also works
   /// if the region is incorrectly built. (EXPENSIVE!)
   void verifyBBInRegion(BlockT *BB) const;
 
-  /// verifyWalk - Walk over all the BBs of the region starting from BB and
+  /// Walk over all the BBs of the region starting from BB and
   /// verify that all reachable basic blocks are elements of the region.
   /// (EXPENSIVE!)
   void verifyWalk(BlockT *BB, std::set<BlockT *> *visitedBB) const;
 
-  /// verifyRegionNest - Verify if the region and its children are valid
-  /// regions (EXPENSIVE!)
+  /// Verify if the region and its children are valid regions (EXPENSIVE!)
   void verifyRegionNest() const;
 
 public:
@@ -688,45 +687,50 @@ private:
   /// Map every BB to the smallest region, that contains BB.
   BBtoRegionMap BBtoRegion;
 
-  // isCommonDomFrontier - Returns true if BB is in the dominance frontier of
+  // Check whether the entries of BBtoRegion for the BBs of region
+  // SR are correct. Triggers an assertion if not. Calls itself recursively for
+  // subregions.
+  void verifyBBMap(const RegionT *SR) const;
+
+  // Returns true if BB is in the dominance frontier of
   // entry, because it was inherited from exit. In the other case there is an
   // edge going from entry to BB without passing exit.
   bool isCommonDomFrontier(BlockT *BB, BlockT *entry, BlockT *exit) const;
 
-  // isRegion - Check if entry and exit surround a valid region, based on
+  // Check if entry and exit surround a valid region, based on
   // dominance tree and dominance frontier.
   bool isRegion(BlockT *entry, BlockT *exit) const;
 
-  // insertShortCut - Saves a shortcut pointing from entry to exit.
+  // Saves a shortcut pointing from entry to exit.
   // This function may extend this shortcut if possible.
   void insertShortCut(BlockT *entry, BlockT *exit, BBtoBBMap *ShortCut) const;
 
-  // getNextPostDom - Returns the next BB that postdominates N, while skipping
+  // Returns the next BB that postdominates N, while skipping
   // all post dominators that cannot finish a canonical region.
   DomTreeNodeT *getNextPostDom(DomTreeNodeT *N, BBtoBBMap *ShortCut) const;
 
-  // isTrivialRegion - A region is trivial, if it contains only one BB.
+  // A region is trivial, if it contains only one BB.
   bool isTrivialRegion(BlockT *entry, BlockT *exit) const;
 
-  // createRegion - Creates a single entry single exit region.
+  // Creates a single entry single exit region.
   RegionT *createRegion(BlockT *entry, BlockT *exit);
 
-  // findRegionsWithEntry - Detect all regions starting with bb 'entry'.
+  // Detect all regions starting with bb 'entry'.
   void findRegionsWithEntry(BlockT *entry, BBtoBBMap *ShortCut);
 
-  // scanForRegions - Detects regions in F.
+  // Detects regions in F.
   void scanForRegions(FuncT &F, BBtoBBMap *ShortCut);
 
-  // getTopMostParent - Get the top most parent with the same entry block.
+  // Get the top most parent with the same entry block.
   RegionT *getTopMostParent(RegionT *region);
 
-  // buildRegionsTree - build the region hierarchy after all region detected.
+  // Build the region hierarchy after all region detected.
   void buildRegionsTree(DomTreeNodeT *N, RegionT *region);
 
-  // updateStatistics - Update statistic about created regions.
+  // Update statistic about created regions.
   virtual void updateStatistics(RegionT *R) = 0;
 
-  // calculate - detect all regions in function and build the region tree.
+  // Detect all regions in function and build the region tree.
   void calculate(FuncT &F);
 
 public:
@@ -796,12 +800,6 @@ public:
 
   RegionT *getTopLevelRegion() const { return TopLevelRegion; }
 
-  /// @brief Update RegionInfo after a basic block was split.
-  ///
-  /// @param NewBB The basic block that was created before OldBB.
-  /// @param OldBB The old basic block.
-  void splitBlock(BlockT *NewBB, BlockT *OldBB);
-
   /// @brief Clear the Node Cache for all Regions.
   ///
   /// @see Region::clearNodeCache()
@@ -847,6 +845,19 @@ public:
 
   void recalculate(Function &F, DominatorTree *DT, PostDominatorTree *PDT,
                    DominanceFrontier *DF);
+
+#ifndef NDEBUG
+  /// @brief Opens a viewer to show the GraphViz visualization of the regions.
+  ///
+  /// Useful during debugging as an alternative to dump().
+  void view();
+
+  /// @brief Opens a viewer to show the GraphViz visualization of this region
+  /// without instructions in the BasicBlocks.
+  ///
+  /// Useful during debugging as an alternative to dump().
+  void viewOnly();
+#endif
 };
 
 class RegionInfoPass : public FunctionPass {
diff --git a/include/llvm/Analysis/RegionInfoImpl.h b/include/llvm/Analysis/RegionInfoImpl.h
index b31eefc15f78..134cd8f96fbe 100644
--- a/include/llvm/Analysis/RegionInfoImpl.h
+++ b/include/llvm/Analysis/RegionInfoImpl.h
@@ -236,7 +236,7 @@ std::string RegionBase<Tr>::getNameStr() const {
 template <class Tr>
 void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
   if (!contains(BB))
-    llvm_unreachable("Broken region found!");
+    llvm_unreachable("Broken region found: enumerated BB not in region!");
 
   BlockT *entry = getEntry(), *exit = getExit();
 
@@ -244,7 +244,8 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
                   SE = BlockTraits::child_end(BB);
        SI != SE; ++SI) {
     if (!contains(*SI) && exit != *SI)
-      llvm_unreachable("Broken region found!");
+      llvm_unreachable("Broken region found: edges leaving the region must go "
+                       "to the exit node!");
   }
 
   if (entry != BB) {
@@ -252,7 +253,8 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
                     SE = InvBlockTraits::child_end(BB);
          SI != SE; ++SI) {
       if (!contains(*SI))
-        llvm_unreachable("Broken region found!");
+        llvm_unreachable("Broken region found: edges entering the region must "
+                         "go to the entry node!");
     }
   }
 }
@@ -442,16 +444,14 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
   if (NumSuccessors == 0)
     return nullptr;
 
-  for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
-                  PE = InvBlockTraits::child_end(getExit());
-       PI != PE; ++PI) {
-    if (!DT->dominates(getEntry(), *PI))
-      return nullptr;
-  }
-
   RegionT *R = RI->getRegionFor(exit);
 
   if (R->getEntry() != exit) {
+    for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
+                    PE = InvBlockTraits::child_end(getExit());
+         PI != PE; ++PI)
+      if (!contains(*PI))
+        return nullptr;
     if (Tr::getNumSuccessors(exit) == 1)
       return new RegionT(getEntry(), *BlockTraits::child_begin(exit), RI, DT);
     return nullptr;
@@ -460,13 +460,11 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
   while (R->getParent() && R->getParent()->getEntry() == exit)
     R = R->getParent();
 
-  if (!DT->dominates(getEntry(), R->getExit())) {
-    for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
-                    PE = InvBlockTraits::child_end(getExit());
-         PI != PE; ++PI) {
-      if (!DT->dominates(R->getExit(), *PI))
-        return nullptr;
-    }
+  for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
+                  PE = InvBlockTraits::child_end(getExit());
+       PI != PE; ++PI) {
+    if (!(contains(*PI) || R->contains(*PI)))
+      return nullptr;
   }
 
   return new RegionT(getEntry(), R->getExit(), RI, DT);
@@ -541,6 +539,21 @@ RegionInfoBase<Tr>::~RegionInfoBase() {
   releaseMemory();
 }
 
+template <class Tr>
+void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
+  assert(R && "Re must be non-null");
+  for (auto I = R->element_begin(), E = R->element_end(); I != E; ++I) {
+    if (I->isSubRegion()) {
+      const RegionT *SR = I->template getNodeAs<RegionT>();
+      verifyBBMap(SR);
+    } else {
+      BlockT *BB = I->template getNodeAs<BlockT>();
+      if (getRegionFor(BB) != R)
+        llvm_unreachable("BB map does not match region nesting");
+    }
+  }
+}
+
 template <class Tr>
 bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry,
                                              BlockT *exit) const {
@@ -786,7 +799,14 @@ void RegionInfoBase<Tr>::releaseMemory() {
 
 template <class Tr>
 void RegionInfoBase<Tr>::verifyAnalysis() const {
+  // Do only verify regions if explicitely activated using XDEBUG or
+  // -verify-region-info
+  if (!RegionInfoBase<Tr>::VerifyRegionInfo)
+    return;
+
   TopLevelRegion->verifyRegionNest();
+
+  verifyBBMap(TopLevelRegion);
 }
 
 // Region pass manager support.
@@ -886,20 +906,6 @@ RegionInfoBase<Tr>::getCommonRegion(SmallVectorImpl<BlockT *> &BBs) const {
   return ret;
 }
 
-template <class Tr>
-void RegionInfoBase<Tr>::splitBlock(BlockT *NewBB, BlockT *OldBB) {
-  RegionT *R = getRegionFor(OldBB);
-
-  setRegionFor(NewBB, R);
-
-  while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
-    R->replaceEntry(NewBB);
-    R = R->getParent();
-  }
-
-  setRegionFor(OldBB, R);
-}
-
 template <class Tr>
 void RegionInfoBase<Tr>::calculate(FuncT &F) {
   typedef typename std::add_pointer<FuncT>::type FuncPtrT;
diff --git a/include/llvm/Analysis/RegionPrinter.h b/include/llvm/Analysis/RegionPrinter.h
index 758748aad9e6..8f0035cfd8e6 100644
--- a/include/llvm/Analysis/RegionPrinter.h
+++ b/include/llvm/Analysis/RegionPrinter.h
@@ -17,10 +17,55 @@
 
 namespace llvm {
   class FunctionPass;
+  class Function;
+  class RegionInfo;
+
   FunctionPass *createRegionViewerPass();
   FunctionPass *createRegionOnlyViewerPass();
   FunctionPass *createRegionPrinterPass();
   FunctionPass *createRegionOnlyPrinterPass();
+
+#ifndef NDEBUG
+  /// @brief Open a viewer to display the GraphViz vizualization of the analysis
+  /// result.
+  ///
+  /// Practical to call in the debugger.
+  /// Includes the instructions in each BasicBlock.
+  ///
+  /// @param RI The analysis to display.
+  void viewRegion(llvm::RegionInfo *RI);
+
+  /// @brief Analyze the regions of a function and open its GraphViz
+  /// visualization in a viewer.
+  ///
+  /// Useful to call in the debugger.
+  /// Includes the instructions in each BasicBlock.
+  /// The result of a new analysis may differ from the RegionInfo the pass
+  /// manager currently holds.
+  ///
+  /// @param F Function to analyze.
+  void viewRegion(const llvm::Function *F);
+
+  /// @brief Open a viewer to display the GraphViz vizualization of the analysis
+  /// result.
+  ///
+  /// Useful to call in the debugger.
+  /// Shows only the BasicBlock names without their instructions.
+  ///
+  /// @param RI The analysis to display.
+  void viewRegionOnly(llvm::RegionInfo *RI);
+
+  /// @brief Analyze the regions of a function and open its GraphViz
+  /// visualization in a viewer.
+  ///
+  /// Useful to call in the debugger.
+  /// Shows only the BasicBlock names without their instructions.
+  /// The result of a new analysis may differ from the RegionInfo the pass
+  /// manager currently holds.
+  ///
+  /// @param F Function to analyze.
+  void viewRegionOnly(const llvm::Function *F);
+#endif
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index d47cab829ced..c08335de3e7d 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -23,10 +23,12 @@
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
@@ -44,30 +46,33 @@ namespace llvm {
   class DataLayout;
   class TargetLibraryInfo;
   class LLVMContext;
-  class Loop;
-  class LoopInfo;
   class Operator;
-  class SCEVUnknown;
   class SCEV;
-  template<> struct FoldingSetTrait<SCEV>;
+  class SCEVAddRecExpr;
+  class SCEVConstant;
+  class SCEVExpander;
+  class SCEVPredicate;
+  class SCEVUnknown;
 
-  /// SCEV - This class represents an analyzed expression in the program.  These
-  /// are opaque objects that the client is not allowed to do much with
-  /// directly.
+  template <> struct FoldingSetTrait<SCEV>;
+  template <> struct FoldingSetTrait<SCEVPredicate>;
+
+  /// This class represents an analyzed expression in the program.  These are
+  /// opaque objects that the client is not allowed to do much with directly.
   ///
   class SCEV : public FoldingSetNode {
     friend struct FoldingSetTrait<SCEV>;
 
-    /// FastID - A reference to an Interned FoldingSetNodeID for this node.
-    /// The ScalarEvolution's BumpPtrAllocator holds the data.
+    /// A reference to an Interned FoldingSetNodeID for this node.  The
+    /// ScalarEvolution's BumpPtrAllocator holds the data.
     FoldingSetNodeIDRef FastID;
 
     // The SCEV baseclass this node corresponds to
     const unsigned short SCEVType;
 
   protected:
-    /// SubclassData - This field is initialized to zero and may be used in
-    /// subclasses to store miscellaneous information.
+    /// This field is initialized to zero and may be used in subclasses to store
+    /// miscellaneous information.
     unsigned short SubclassData;
 
   private:
@@ -104,37 +109,32 @@ namespace llvm {
 
     unsigned getSCEVType() const { return SCEVType; }
 
-    /// getType - Return the LLVM type of this SCEV expression.
+    /// Return the LLVM type of this SCEV expression.
     ///
     Type *getType() const;
 
-    /// isZero - Return true if the expression is a constant zero.
+    /// Return true if the expression is a constant zero.
     ///
     bool isZero() const;
 
-    /// isOne - Return true if the expression is a constant one.
+    /// Return true if the expression is a constant one.
     ///
     bool isOne() const;
 
-    /// isAllOnesValue - Return true if the expression is a constant
-    /// all-ones value.
+    /// Return true if the expression is a constant all-ones value.
     ///
     bool isAllOnesValue() const;
 
-    /// isNonConstantNegative - Return true if the specified scev is negated,
-    /// but not a constant.
+    /// Return true if the specified scev is negated, but not a constant.
     bool isNonConstantNegative() const;
 
-    /// print - Print out the internal representation of this scalar to the
-    /// specified stream.  This should really only be used for debugging
-    /// purposes.
+    /// Print out the internal representation of this scalar to the specified
+    /// stream.  This should really only be used for debugging purposes.
     void print(raw_ostream &OS) const;
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-    /// dump - This method is used for debugging.
+    /// This method is used for debugging.
     ///
     void dump() const;
-#endif
   };
 
   // Specialize FoldingSetTrait for SCEV to avoid needing to compute
@@ -157,11 +157,10 @@ namespace llvm {
     return OS;
   }
 
-  /// SCEVCouldNotCompute - An object of this class is returned by queries that
-  /// could not be answered.  For example, if you ask for the number of
-  /// iterations of a linked-list traversal loop, you will get one of these.
-  /// None of the standard SCEV operations are valid on this class, it is just a
-  /// marker.
+  /// An object of this class is returned by queries that could not be answered.
+  /// For example, if you ask for the number of iterations of a linked-list
+  /// traversal loop, you will get one of these.  None of the standard SCEV
+  /// operations are valid on this class, it is just a marker.
   struct SCEVCouldNotCompute : public SCEV {
     SCEVCouldNotCompute();
 
@@ -169,22 +168,162 @@ namespace llvm {
     static bool classof(const SCEV *S);
   };
 
-  /// ScalarEvolution - This class is the main scalar evolution driver.  Because
-  /// client code (intentionally) can't do much with the SCEV objects directly,
-  /// they must ask this class for services.
-  ///
-  class ScalarEvolution : public FunctionPass {
+  /// SCEVPredicate - This class represents an assumption made using SCEV
+  /// expressions which can be checked at run-time.
+  class SCEVPredicate : public FoldingSetNode {
+    friend struct FoldingSetTrait<SCEVPredicate>;
+
+    /// A reference to an Interned FoldingSetNodeID for this node.  The
+    /// ScalarEvolution's BumpPtrAllocator holds the data.
+    FoldingSetNodeIDRef FastID;
+
   public:
-    /// LoopDisposition - An enum describing the relationship between a
-    /// SCEV and a loop.
+    enum SCEVPredicateKind { P_Union, P_Equal };
+
+  protected:
+    SCEVPredicateKind Kind;
+    ~SCEVPredicate() = default;
+    SCEVPredicate(const SCEVPredicate&) = default;
+    SCEVPredicate &operator=(const SCEVPredicate&) = default;
+
+  public:
+    SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind);
+
+    SCEVPredicateKind getKind() const { return Kind; }
+
+    /// \brief Returns the estimated complexity of this predicate.
+    /// This is roughly measured in the number of run-time checks required.
+    virtual unsigned getComplexity() const { return 1; }
+
+    /// \brief Returns true if the predicate is always true. This means that no
+    /// assumptions were made and nothing needs to be checked at run-time.
+    virtual bool isAlwaysTrue() const = 0;
+
+    /// \brief Returns true if this predicate implies \p N.
+    virtual bool implies(const SCEVPredicate *N) const = 0;
+
+    /// \brief Prints a textual representation of this predicate with an
+    /// indentation of \p Depth.
+    virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0;
+
+    /// \brief Returns the SCEV to which this predicate applies, or nullptr
+    /// if this is a SCEVUnionPredicate.
+    virtual const SCEV *getExpr() const = 0;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) {
+    P.print(OS);
+    return OS;
+  }
+
+  // Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute
+  // temporary FoldingSetNodeID values.
+  template <>
+  struct FoldingSetTrait<SCEVPredicate>
+      : DefaultFoldingSetTrait<SCEVPredicate> {
+
+    static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) {
+      ID = X.FastID;
+    }
+
+    static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID,
+                       unsigned IDHash, FoldingSetNodeID &TempID) {
+      return ID == X.FastID;
+    }
+    static unsigned ComputeHash(const SCEVPredicate &X,
+                                FoldingSetNodeID &TempID) {
+      return X.FastID.ComputeHash();
+    }
+  };
+
+  /// SCEVEqualPredicate - This class represents an assumption that two SCEV
+  /// expressions are equal, and this can be checked at run-time. We assume
+  /// that the left hand side is a SCEVUnknown and the right hand side a
+  /// constant.
+  class SCEVEqualPredicate final : public SCEVPredicate {
+    /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
+    /// constant.
+    const SCEVUnknown *LHS;
+    const SCEVConstant *RHS;
+
+  public:
+    SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
+                       const SCEVConstant *RHS);
+
+    /// Implementation of the SCEVPredicate interface
+    bool implies(const SCEVPredicate *N) const override;
+    void print(raw_ostream &OS, unsigned Depth = 0) const override;
+    bool isAlwaysTrue() const override;
+    const SCEV *getExpr() const override;
+
+    /// \brief Returns the left hand side of the equality.
+    const SCEVUnknown *getLHS() const { return LHS; }
+
+    /// \brief Returns the right hand side of the equality.
+    const SCEVConstant *getRHS() const { return RHS; }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVPredicate *P) {
+      return P->getKind() == P_Equal;
+    }
+  };
+
+  /// SCEVUnionPredicate - This class represents a composition of other
+  /// SCEV predicates, and is the class that most clients will interact with.
+  /// This is equivalent to a logical "AND" of all the predicates in the union.
+  class SCEVUnionPredicate final : public SCEVPredicate {
+  private:
+    typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>
+        PredicateMap;
+
+    /// Vector with references to all predicates in this union.
+    SmallVector<const SCEVPredicate *, 16> Preds;
+    /// Maps SCEVs to predicates for quick look-ups.
+    PredicateMap SCEVToPreds;
+
+  public:
+    SCEVUnionPredicate();
+
+    const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const {
+      return Preds;
+    }
+
+    /// \brief Adds a predicate to this union.
+    void add(const SCEVPredicate *N);
+
+    /// \brief Returns a reference to a vector containing all predicates
+    /// which apply to \p Expr.
+    ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr);
+
+    /// Implementation of the SCEVPredicate interface
+    bool isAlwaysTrue() const override;
+    bool implies(const SCEVPredicate *N) const override;
+    void print(raw_ostream &OS, unsigned Depth) const override;
+    const SCEV *getExpr() const override;
+
+    /// \brief We estimate the complexity of a union predicate as the size
+    /// number of predicates in the union.
+    unsigned getComplexity() const override { return Preds.size(); }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVPredicate *P) {
+      return P->getKind() == P_Union;
+    }
+  };
+
+  /// The main scalar evolution driver. Because client code (intentionally)
+  /// can't do much with the SCEV objects directly, they must ask this class
+  /// for services.
+  class ScalarEvolution {
+  public:
+    /// An enum describing the relationship between a SCEV and a loop.
     enum LoopDisposition {
       LoopVariant,    ///< The SCEV is loop-variant (unknown).
       LoopInvariant,  ///< The SCEV is loop-invariant.
       LoopComputable  ///< The SCEV varies predictably with the loop.
     };
 
-    /// BlockDisposition - An enum describing the relationship between a
-    /// SCEV and a basic block.
+    /// An enum describing the relationship between a SCEV and a basic block.
     enum BlockDisposition {
       DoesNotDominateBlock,  ///< The SCEV does not dominate the block.
       DominatesBlock,        ///< The SCEV dominates the block.
@@ -207,9 +346,9 @@ namespace llvm {
     }
 
   private:
-    /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be
-    /// notified whenever a Value is deleted.
-    class SCEVCallbackVH : public CallbackVH {
+    /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
+    /// Value is deleted.
+    class SCEVCallbackVH final : public CallbackVH {
       ScalarEvolution *SE;
       void deleted() override;
       void allUsesReplacedWith(Value *New) override;
@@ -221,35 +360,34 @@ namespace llvm {
     friend class SCEVExpander;
     friend class SCEVUnknown;
 
-    /// F - The function we are analyzing.
+    /// The function we are analyzing.
     ///
-    Function *F;
+    Function &F;
+
+    /// The target library information for the target we are targeting.
+    ///
+    TargetLibraryInfo &TLI;
 
     /// The tracker for @llvm.assume intrinsics in this function.
-    AssumptionCache *AC;
+    AssumptionCache &AC;
 
-    /// LI - The loop information for the function we are currently analyzing.
+    /// The dominator tree.
     ///
-    LoopInfo *LI;
+    DominatorTree &DT;
 
-    /// TLI - The target library information for the target we are targeting.
+    /// The loop information for the function we are currently analyzing.
     ///
-    TargetLibraryInfo *TLI;
+    LoopInfo &LI;
 
-    /// DT - The dominator tree.
-    ///
-    DominatorTree *DT;
+    /// This SCEV is used to represent unknown trip counts and things.
+    std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute;
 
-    /// CouldNotCompute - This SCEV is used to represent unknown trip
-    /// counts and things.
-    SCEVCouldNotCompute CouldNotCompute;
-
-    /// ValueExprMapType - The typedef for ValueExprMap.
+    /// The typedef for ValueExprMap.
     ///
     typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
       ValueExprMapType;
 
-    /// ValueExprMap - This is a cache of the values we have analyzed so far.
+    /// This is a cache of the values we have analyzed so far.
     ///
     ValueExprMapType ValueExprMap;
 
@@ -260,10 +398,14 @@ namespace llvm {
     /// conditions dominating the backedge of a loop.
     bool WalkingBEDominatingConds;
 
-    /// ExitLimit - Information about the number of loop iterations for which a
-    /// loop exit's branch condition evaluates to the not-taken path.  This is a
-    /// temporary pair of exact and max expressions that are eventually
-    /// summarized in ExitNotTakenInfo and BackedgeTakenInfo.
+    /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a
+    /// predicate by splitting it into a set of independent predicates.
+    bool ProvingSplitPredicate;
+
+    /// Information about the number of loop iterations for which a loop exit's
+    /// branch condition evaluates to the not-taken path.  This is a temporary
+    /// pair of exact and max expressions that are eventually summarized in
+    /// ExitNotTakenInfo and BackedgeTakenInfo.
     struct ExitLimit {
       const SCEV *Exact;
       const SCEV *Max;
@@ -272,16 +414,16 @@ namespace llvm {
 
       ExitLimit(const SCEV *E, const SCEV *M) : Exact(E), Max(M) {}
 
-      /// hasAnyInfo - Test whether this ExitLimit contains any computed
-      /// information, or whether it's all SCEVCouldNotCompute values.
+      /// Test whether this ExitLimit contains any computed information, or
+      /// whether it's all SCEVCouldNotCompute values.
       bool hasAnyInfo() const {
         return !isa<SCEVCouldNotCompute>(Exact) ||
           !isa<SCEVCouldNotCompute>(Max);
       }
     };
 
-    /// ExitNotTakenInfo - Information about the number of times a particular
-    /// loop exit may be reached before exiting the loop.
+    /// Information about the number of times a particular loop exit may be
+    /// reached before exiting the loop.
     struct ExitNotTakenInfo {
       AssertingVH<BasicBlock> ExitingBlock;
       const SCEV *ExactNotTaken;
@@ -289,14 +431,14 @@ namespace llvm {
 
       ExitNotTakenInfo() : ExitingBlock(nullptr), ExactNotTaken(nullptr) {}
 
-      /// isCompleteList - Return true if all loop exits are computable.
+      /// Return true if all loop exits are computable.
       bool isCompleteList() const {
         return NextExit.getInt() == 0;
       }
 
       void setIncomplete() { NextExit.setInt(1); }
 
-      /// getNextExit - Return a pointer to the next exit's not-taken info.
+      /// Return a pointer to the next exit's not-taken info.
       ExitNotTakenInfo *getNextExit() const {
         return NextExit.getPointer();
       }
@@ -304,16 +446,16 @@ namespace llvm {
       void setNextExit(ExitNotTakenInfo *ENT) { NextExit.setPointer(ENT); }
     };
 
-    /// BackedgeTakenInfo - Information about the backedge-taken count
-    /// of a loop. This currently includes an exact count and a maximum count.
+    /// Information about the backedge-taken count of a loop. This currently
+    /// includes an exact count and a maximum count.
     ///
     class BackedgeTakenInfo {
-      /// ExitNotTaken - A list of computable exits and their not-taken counts.
-      /// Loops almost never have more than one computable exit.
+      /// A list of computable exits and their not-taken counts.  Loops almost
+      /// never have more than one computable exit.
       ExitNotTakenInfo ExitNotTaken;
 
-      /// Max - An expression indicating the least maximum backedge-taken
-      /// count of the loop that is known, or a SCEVCouldNotCompute.
+      /// An expression indicating the least maximum backedge-taken count of the
+      /// loop that is known, or a SCEVCouldNotCompute.
       const SCEV *Max;
 
     public:
@@ -324,80 +466,78 @@ namespace llvm {
         SmallVectorImpl< std::pair<BasicBlock *, const SCEV *> > &ExitCounts,
         bool Complete, const SCEV *MaxCount);
 
-      /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any
-      /// computed information, or whether it's all SCEVCouldNotCompute
-      /// values.
+      /// Test whether this BackedgeTakenInfo contains any computed information,
+      /// or whether it's all SCEVCouldNotCompute values.
       bool hasAnyInfo() const {
         return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
       }
 
-      /// getExact - Return an expression indicating the exact backedge-taken
-      /// count of the loop if it is known, or SCEVCouldNotCompute
-      /// otherwise. This is the number of times the loop header can be
-      /// guaranteed to execute, minus one.
+      /// Return an expression indicating the exact backedge-taken count of the
+      /// loop if it is known, or SCEVCouldNotCompute otherwise. This is the
+      /// number of times the loop header can be guaranteed to execute, minus
+      /// one.
       const SCEV *getExact(ScalarEvolution *SE) const;
 
-      /// getExact - Return the number of times this loop exit may fall through
-      /// to the back edge, or SCEVCouldNotCompute. The loop is guaranteed not
-      /// to exit via this block before this number of iterations, but may exit
-      /// via another block.
+      /// Return the number of times this loop exit may fall through to the back
+      /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
+      /// this block before this number of iterations, but may exit via another
+      /// block.
       const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
 
-      /// getMax - Get the max backedge taken count for the loop.
+      /// Get the max backedge taken count for the loop.
       const SCEV *getMax(ScalarEvolution *SE) const;
 
       /// Return true if any backedge taken count expressions refer to the given
       /// subexpression.
       bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
 
-      /// clear - Invalidate this result and free associated memory.
+      /// Invalidate this result and free associated memory.
       void clear();
     };
 
-    /// BackedgeTakenCounts - Cache the backedge-taken count of the loops for
-    /// this function as they are computed.
+    /// Cache the backedge-taken count of the loops for this function as they
+    /// are computed.
     DenseMap<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
 
-    /// ConstantEvolutionLoopExitValue - This map contains entries for all of
-    /// the PHI instructions that we attempt to compute constant evolutions for.
-    /// This allows us to avoid potentially expensive recomputation of these
-    /// properties.  An instruction maps to null if we are unable to compute its
-    /// exit value.
+    /// This map contains entries for all of the PHI instructions that we
+    /// attempt to compute constant evolutions for.  This allows us to avoid
+    /// potentially expensive recomputation of these properties.  An instruction
+    /// maps to null if we are unable to compute its exit value.
     DenseMap<PHINode*, Constant*> ConstantEvolutionLoopExitValue;
 
-    /// ValuesAtScopes - This map contains entries for all the expressions
-    /// that we attempt to compute getSCEVAtScope information for, which can
-    /// be expensive in extreme cases.
+    /// This map contains entries for all the expressions that we attempt to
+    /// compute getSCEVAtScope information for, which can be expensive in
+    /// extreme cases.
     DenseMap<const SCEV *,
              SmallVector<std::pair<const Loop *, const SCEV *>, 2> > ValuesAtScopes;
 
-    /// LoopDispositions - Memoized computeLoopDisposition results.
+    /// Memoized computeLoopDisposition results.
     DenseMap<const SCEV *,
              SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
         LoopDispositions;
 
-    /// computeLoopDisposition - Compute a LoopDisposition value.
+    /// Compute a LoopDisposition value.
     LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 
-    /// BlockDispositions - Memoized computeBlockDisposition results.
+    /// Memoized computeBlockDisposition results.
     DenseMap<
         const SCEV *,
         SmallVector<PointerIntPair<const BasicBlock *, 2, BlockDisposition>, 2>>
         BlockDispositions;
 
-    /// computeBlockDisposition - Compute a BlockDisposition value.
+    /// Compute a BlockDisposition value.
     BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
 
-    /// UnsignedRanges - Memoized results from getRange
+    /// Memoized results from getRange
     DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
 
-    /// SignedRanges - Memoized results from getRange
+    /// Memoized results from getRange
     DenseMap<const SCEV *, ConstantRange> SignedRanges;
 
-    /// RangeSignHint - Used to parameterize getRange
+    /// Used to parameterize getRange
     enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED };
 
-    /// setRange - Set the memoized range for the given SCEV.
+    /// Set the memoized range for the given SCEV.
     const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint,
                                   const ConstantRange &CR) {
       DenseMap<const SCEV *, ConstantRange> &Cache =
@@ -410,198 +550,275 @@ namespace llvm {
       return Pair.first->second;
     }
 
-    /// getRange - Determine the range for a particular SCEV.
+    /// Determine the range for a particular SCEV.
     ConstantRange getRange(const SCEV *S, RangeSignHint Hint);
 
-    /// createSCEV - We know that there is no SCEV for the specified value.
-    /// Analyze the expression.
+    /// We know that there is no SCEV for the specified value.  Analyze the
+    /// expression.
     const SCEV *createSCEV(Value *V);
 
-    /// createNodeForPHI - Provide the special handling we need to analyze PHI
-    /// SCEVs.
+    /// Provide the special handling we need to analyze PHI SCEVs.
     const SCEV *createNodeForPHI(PHINode *PN);
 
-    /// createNodeForGEP - Provide the special handling we need to analyze GEP
-    /// SCEVs.
+    /// Helper function called from createNodeForPHI.
+    const SCEV *createAddRecFromPHI(PHINode *PN);
+
+    /// Helper function called from createNodeForPHI.
+    const SCEV *createNodeFromSelectLikePHI(PHINode *PN);
+
+    /// Provide special handling for a select-like instruction (currently this
+    /// is either a select instruction or a phi node).  \p I is the instruction
+    /// being processed, and it is assumed equivalent to "Cond ? TrueVal :
+    /// FalseVal".
+    const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond,
+                                         Value *TrueVal, Value *FalseVal);
+
+    /// Provide the special handling we need to analyze GEP SCEVs.
     const SCEV *createNodeForGEP(GEPOperator *GEP);
 
-    /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called
-    /// at most once for each SCEV+Loop pair.
+    /// Implementation code for getSCEVAtScope; called at most once for each
+    /// SCEV+Loop pair.
     ///
     const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
 
-    /// ForgetSymbolicValue - This looks up computed SCEV values for all
-    /// instructions that depend on the given instruction and removes them from
-    /// the ValueExprMap map if they reference SymName. This is used during PHI
-    /// resolution.
+    /// This looks up computed SCEV values for all instructions that depend on
+    /// the given instruction and removes them from the ValueExprMap map if they
+    /// reference SymName. This is used during PHI resolution.
     void ForgetSymbolicName(Instruction *I, const SCEV *SymName);
 
-    /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given
-    /// loop, lazily computing new values if the loop hasn't been analyzed
-    /// yet.
+    /// Return the BackedgeTakenInfo for the given loop, lazily computing new
+    /// values if the loop hasn't been analyzed yet.
     const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
 
-    /// ComputeBackedgeTakenCount - Compute the number of times the specified
-    /// loop will iterate.
-    BackedgeTakenInfo ComputeBackedgeTakenCount(const Loop *L);
+    /// Compute the number of times the specified loop will iterate.
+    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L);
 
-    /// ComputeExitLimit - Compute the number of times the backedge of the
-    /// specified loop will execute if it exits via the specified block.
-    ExitLimit ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
+    /// Compute the number of times the backedge of the specified loop will
+    /// execute if it exits via the specified block.
+    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock);
 
-    /// ComputeExitLimitFromCond - Compute the number of times the backedge of
-    /// the specified loop will execute if its exit condition were a conditional
-    /// branch of ExitCond, TBB, and FBB.
-    ExitLimit ComputeExitLimitFromCond(const Loop *L,
+    /// Compute the number of times the backedge of the specified loop will
+    /// execute if its exit condition were a conditional branch of ExitCond,
+    /// TBB, and FBB.
+    ExitLimit computeExitLimitFromCond(const Loop *L,
                                        Value *ExitCond,
                                        BasicBlock *TBB,
                                        BasicBlock *FBB,
                                        bool IsSubExpr);
 
-    /// ComputeExitLimitFromICmp - Compute the number of times the backedge of
-    /// the specified loop will execute if its exit condition were a conditional
-    /// branch of the ICmpInst ExitCond, TBB, and FBB.
-    ExitLimit ComputeExitLimitFromICmp(const Loop *L,
+    /// Compute the number of times the backedge of the specified loop will
+    /// execute if its exit condition were a conditional branch of the ICmpInst
+    /// ExitCond, TBB, and FBB.
+    ExitLimit computeExitLimitFromICmp(const Loop *L,
                                        ICmpInst *ExitCond,
                                        BasicBlock *TBB,
                                        BasicBlock *FBB,
                                        bool IsSubExpr);
 
-    /// ComputeExitLimitFromSingleExitSwitch - Compute the number of times the
-    /// backedge of the specified loop will execute if its exit condition were a
-    /// switch with a single exiting case to ExitingBB.
+    /// Compute the number of times the backedge of the specified loop will
+    /// execute if its exit condition were a switch with a single exiting case
+    /// to ExitingBB.
     ExitLimit
-    ComputeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch,
+    computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch,
                                BasicBlock *ExitingBB, bool IsSubExpr);
 
-    /// ComputeLoadConstantCompareExitLimit - Given an exit condition
-    /// of 'icmp op load X, cst', try to see if we can compute the
-    /// backedge-taken count.
-    ExitLimit ComputeLoadConstantCompareExitLimit(LoadInst *LI,
+    /// Given an exit condition of 'icmp op load X, cst', try to see if we can
+    /// compute the backedge-taken count.
+    ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI,
                                                   Constant *RHS,
                                                   const Loop *L,
                                                   ICmpInst::Predicate p);
 
-    /// ComputeExitCountExhaustively - If the loop is known to execute a
-    /// constant number of times (the condition evolves only from constants),
-    /// try to evaluate a few iterations of the loop until we get the exit
-    /// condition gets a value of ExitWhen (true or false).  If we cannot
-    /// evaluate the exit count of the loop, return CouldNotCompute.
-    const SCEV *ComputeExitCountExhaustively(const Loop *L,
+    /// Compute the exit limit of a loop that is controlled by a
+    /// "(IV >> 1) != 0" type comparison.  We cannot compute the exact trip
+    /// count in these cases (since SCEV has no way of expressing them), but we
+    /// can still sometimes compute an upper bound.
+    ///
+    /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred
+    /// RHS`.
+    ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS,
+                                           const Loop *L,
+                                           ICmpInst::Predicate Pred);
+
+    /// If the loop is known to execute a constant number of times (the
+    /// condition evolves only from constants), try to evaluate a few iterations
+    /// of the loop until we get the exit condition gets a value of ExitWhen
+    /// (true or false).  If we cannot evaluate the exit count of the loop,
+    /// return CouldNotCompute.
+    const SCEV *computeExitCountExhaustively(const Loop *L,
                                              Value *Cond,
                                              bool ExitWhen);
 
-    /// HowFarToZero - Return the number of times an exit condition comparing
-    /// the specified value to zero will execute.  If not computable, return
-    /// CouldNotCompute.
+    /// Return the number of times an exit condition comparing the specified
+    /// value to zero will execute.  If not computable, return CouldNotCompute.
     ExitLimit HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr);
 
-    /// HowFarToNonZero - Return the number of times an exit condition checking
-    /// the specified value for nonzero will execute.  If not computable, return
+    /// Return the number of times an exit condition checking the specified
+    /// value for nonzero will execute.  If not computable, return
     /// CouldNotCompute.
     ExitLimit HowFarToNonZero(const SCEV *V, const Loop *L);
 
-    /// HowManyLessThans - Return the number of times an exit condition
-    /// containing the specified less-than comparison will execute.  If not
-    /// computable, return CouldNotCompute. isSigned specifies whether the
-    /// less-than is signed.
+    /// Return the number of times an exit condition containing the specified
+    /// less-than comparison will execute.  If not computable, return
+    /// CouldNotCompute. isSigned specifies whether the less-than is signed.
     ExitLimit HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                const Loop *L, bool isSigned, bool IsSubExpr);
     ExitLimit HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool isSigned, bool IsSubExpr);
 
-    /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
-    /// (which may not be an immediate predecessor) which has exactly one
-    /// successor from which BB is reachable, or null if no such block is
-    /// found.
+    /// Return a predecessor of BB (which may not be an immediate predecessor)
+    /// which has exactly one successor from which BB is reachable, or null if
+    /// no such block is found.
     std::pair<BasicBlock *, BasicBlock *>
     getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
-    /// isImpliedCond - Test whether the condition described by Pred, LHS, and
-    /// RHS is true whenever the given FoundCondValue value evaluates to true.
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the given FoundCondValue value evaluates to true.
     bool isImpliedCond(ICmpInst::Predicate Pred,
                        const SCEV *LHS, const SCEV *RHS,
                        Value *FoundCondValue,
                        bool Inverse);
 
-    /// isImpliedCondOperands - Test whether the condition described by Pred,
-    /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
-    /// and FoundRHS is true.
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
+    /// true.
+    bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+                       const SCEV *RHS, ICmpInst::Predicate FoundPred,
+                       const SCEV *FoundLHS, const SCEV *FoundRHS);
+
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+    /// true.
     bool isImpliedCondOperands(ICmpInst::Predicate Pred,
                                const SCEV *LHS, const SCEV *RHS,
                                const SCEV *FoundLHS, const SCEV *FoundRHS);
 
-    /// isImpliedCondOperandsHelper - Test whether the condition described by
-    /// Pred, LHS, and RHS is true whenever the condition described by Pred,
-    /// FoundLHS, and FoundRHS is true.
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+    /// true.
     bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
                                      const SCEV *LHS, const SCEV *RHS,
                                      const SCEV *FoundLHS,
                                      const SCEV *FoundRHS);
 
-    /// isImpliedCondOperandsViaRanges - Test whether the condition described by
-    /// Pred, LHS, and RHS is true whenever the condition described by Pred,
-    /// FoundLHS, and FoundRHS is true.  Utility function used by
-    /// isImpliedCondOperands.
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+    /// true.  Utility function used by isImpliedCondOperands.
     bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
                                         const SCEV *LHS, const SCEV *RHS,
                                         const SCEV *FoundLHS,
                                         const SCEV *FoundRHS);
 
-    /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
-    /// in the header of its containing loop, we know the loop executes a
-    /// constant number of times, and the PHI node is just a recurrence
-    /// involving constants, fold it.
+    /// Test whether the condition described by Pred, LHS, and RHS is true
+    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+    /// true.
+    ///
+    /// This routine tries to rule out certain kinds of integer overflow, and
+    /// then tries to reason about arithmetic properties of the predicates.
+    bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS);
+
+    /// If we know that the specified Phi is in the header of its containing
+    /// loop, we know the loop executes a constant number of times, and the PHI
+    /// node is just a recurrence involving constants, fold it.
     Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
                                                 const Loop *L);
 
-    /// isKnownPredicateWithRanges - Test if the given expression is known to
-    /// satisfy the condition described by Pred and the known constant ranges
-    /// of LHS and RHS.
+    /// Test if the given expression is known to satisfy the condition described
+    /// by Pred and the known constant ranges of LHS and RHS.
     ///
     bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS);
 
-    /// forgetMemoizedResults - Drop memoized information computed for S.
+    /// Try to prove the condition described by "LHS Pred RHS" by ruling out
+    /// integer overflow.
+    ///
+    /// For instance, this will return true for "A s< (A + C)<nsw>" if C is
+    /// positive.
+    bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS);
+
+    /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to
+    /// prove them individually.
+    bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                      const SCEV *RHS);
+
+    /// Try to match the Expr as "(L + R)<Flags>".
+    bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
+                        SCEV::NoWrapFlags &Flags);
+
+    /// Return true if More == (Less + C), where C is a constant.  This is
+    /// intended to be used as a cheaper substitute for full SCEV subtraction.
+    bool computeConstantDifference(const SCEV *Less, const SCEV *More,
+                                   APInt &C);
+
+    /// Drop memoized information computed for S.
     void forgetMemoizedResults(const SCEV *S);
 
+    /// Return an existing SCEV for V if there is one, otherwise return nullptr.
+    const SCEV *getExistingSCEV(Value *V);
+
     /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
     /// pointer.
     bool checkValidity(const SCEV *S) const;
 
-    // Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be equal
-    // to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}.  This is equivalent to
-    // proving no signed (resp. unsigned) wrap in {`Start`,+,`Step`} if
-    // `ExtendOpTy` is `SCEVSignExtendExpr` (resp. `SCEVZeroExtendExpr`).
-    //
+    /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be
+    /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}.  This is
+    /// equivalent to proving no signed (resp. unsigned) wrap in
+    /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr`
+    /// (resp. `SCEVZeroExtendExpr`).
+    ///
     template<typename ExtendOpTy>
     bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step,
                                    const Loop *L);
 
+    bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+                                  ICmpInst::Predicate Pred, bool &Increasing);
+
+    /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
+    /// is monotonically increasing or decreasing.  In the former case set
+    /// `Increasing` to true and in the latter case set `Increasing` to false.
+    ///
+    /// A predicate is said to be monotonically increasing if may go from being
+    /// false to being true as the loop iterates, but never the other way
+    /// around.  A predicate is said to be monotonically decreasing if may go
+    /// from being true to being false as the loop iterates, but never the other
+    /// way around.
+    bool isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+                              ICmpInst::Predicate Pred, bool &Increasing);
+
+    // Return SCEV no-wrap flags that can be proven based on reasoning
+    // about how poison produced from no-wrap flags on this value
+    // (e.g. a nuw add) would trigger undefined behavior on overflow.
+    SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+
   public:
-    static char ID; // Pass identification, replacement for typeid
-    ScalarEvolution();
+    ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
+                    DominatorTree &DT, LoopInfo &LI);
+    ~ScalarEvolution();
+    ScalarEvolution(ScalarEvolution &&Arg);
 
-    LLVMContext &getContext() const { return F->getContext(); }
+    LLVMContext &getContext() const { return F.getContext(); }
 
-    /// isSCEVable - Test if values of the given type are analyzable within
-    /// the SCEV framework. This primarily includes integer types, and it
-    /// can optionally include pointer types if the ScalarEvolution class
-    /// has access to target-specific information.
+    /// Test if values of the given type are analyzable within the SCEV
+    /// framework. This primarily includes integer types, and it can optionally
+    /// include pointer types if the ScalarEvolution class has access to
+    /// target-specific information.
     bool isSCEVable(Type *Ty) const;
 
-    /// getTypeSizeInBits - Return the size in bits of the specified type,
-    /// for which isSCEVable must return true.
+    /// Return the size in bits of the specified type, for which isSCEVable must
+    /// return true.
     uint64_t getTypeSizeInBits(Type *Ty) const;
 
-    /// getEffectiveSCEVType - Return a type with the same bitwidth as
-    /// the given type and which represents how SCEV will treat the given
-    /// type, for which isSCEVable must return true. For pointer types,
-    /// this is the pointer-sized integer type.
+    /// Return a type with the same bitwidth as the given type and which
+    /// represents how SCEV will treat the given type, for which isSCEVable must
+    /// return true. For pointer types, this is the pointer-sized integer type.
     Type *getEffectiveSCEVType(Type *Ty) const;
 
-    /// getSCEV - Return a SCEV expression for the full generality of the
-    /// specified expression.
+    /// Return a SCEV expression for the full generality of the specified
+    /// expression.
     const SCEV *getSCEV(Value *V);
 
     const SCEV *getConstant(ConstantInt *V);
@@ -615,35 +832,24 @@ namespace llvm {
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
     const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 2> Ops;
-      Ops.push_back(LHS);
-      Ops.push_back(RHS);
+      SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
       return getAddExpr(Ops, Flags);
     }
     const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 3> Ops;
-      Ops.push_back(Op0);
-      Ops.push_back(Op1);
-      Ops.push_back(Op2);
+      SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
       return getAddExpr(Ops, Flags);
     }
     const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
     const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap)
-    {
-      SmallVector<const SCEV *, 2> Ops;
-      Ops.push_back(LHS);
-      Ops.push_back(RHS);
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+      SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
       return getMulExpr(Ops, Flags);
     }
     const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
                            SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 3> Ops;
-      Ops.push_back(Op0);
-      Ops.push_back(Op1);
-      Ops.push_back(Op2);
+      SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
       return getMulExpr(Ops, Flags);
     }
     const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
@@ -675,81 +881,80 @@ namespace llvm {
     const SCEV *getUnknown(Value *V);
     const SCEV *getCouldNotCompute();
 
-    /// getSizeOfExpr - Return an expression for sizeof AllocTy that is type
-    /// IntTy
+    /// \brief Return a SCEV for the constant 0 of a specific type.
+    const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); }
+
+    /// \brief Return a SCEV for the constant 1 of a specific type.
+    const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
+
+    /// Return an expression for sizeof AllocTy that is type IntTy
     ///
     const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
 
-    /// getOffsetOfExpr - Return an expression for offsetof on the given field
-    /// with type IntTy
+    /// Return an expression for offsetof on the given field with type IntTy
     ///
     const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
 
-    /// getNegativeSCEV - Return the SCEV object corresponding to -V.
+    /// Return the SCEV object corresponding to -V.
     ///
-    const SCEV *getNegativeSCEV(const SCEV *V);
+    const SCEV *getNegativeSCEV(const SCEV *V,
+                                SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
-    /// getNotSCEV - Return the SCEV object corresponding to ~V.
+    /// Return the SCEV object corresponding to ~V.
     ///
     const SCEV *getNotSCEV(const SCEV *V);
 
-    /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
+    /// Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
     const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
-    /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
-    /// of the input value to the specified type.  If the type must be
-    /// extended, it is zero extended.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type.  If the type must be extended, it is zero extended.
     const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
 
-    /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion
-    /// of the input value to the specified type.  If the type must be
-    /// extended, it is sign extended.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type.  If the type must be extended, it is sign extended.
     const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
 
-    /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of
-    /// the input value to the specified type.  If the type must be extended,
-    /// it is zero extended.  The conversion must not be narrowing.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type.  If the type must be extended, it is zero extended.  The
+    /// conversion must not be narrowing.
     const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty);
 
-    /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of
-    /// the input value to the specified type.  If the type must be extended,
-    /// it is sign extended.  The conversion must not be narrowing.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type.  If the type must be extended, it is sign extended.  The
+    /// conversion must not be narrowing.
     const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty);
 
-    /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
-    /// the input value to the specified type. If the type must be extended,
-    /// it is extended with unspecified bits. The conversion must not be
-    /// narrowing.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type. If the type must be extended, it is extended with
+    /// unspecified bits. The conversion must not be narrowing.
     const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty);
 
-    /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
-    /// input value to the specified type.  The conversion must not be
-    /// widening.
+    /// Return a SCEV corresponding to a conversion of the input value to the
+    /// specified type.  The conversion must not be widening.
     const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty);
 
-    /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
-    /// the types using zero-extension, and then perform a umax operation
-    /// with them.
+    /// Promote the operands to the wider of the types using zero-extension, and
+    /// then perform a umax operation with them.
     const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS,
                                            const SCEV *RHS);
 
-    /// getUMinFromMismatchedTypes - Promote the operands to the wider of
-    /// the types using zero-extension, and then perform a umin operation
-    /// with them.
+    /// Promote the operands to the wider of the types using zero-extension, and
+    /// then perform a umin operation with them.
     const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
                                            const SCEV *RHS);
 
-    /// getPointerBase - Transitively follow the chain of pointer-type operands
-    /// until reaching a SCEV that does not have a single pointer operand. This
-    /// returns a SCEVUnknown pointer for well-formed pointer-type expressions,
-    /// but corner cases do exist.
+    /// Transitively follow the chain of pointer-type operands until reaching a
+    /// SCEV that does not have a single pointer operand. This returns a
+    /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner
+    /// cases do exist.
     const SCEV *getPointerBase(const SCEV *V);
 
-    /// getSCEVAtScope - Return a SCEV expression for the specified value
-    /// at the specified scope in the program.  The L value specifies a loop
-    /// nest to evaluate the expression at, where null is the top-level or a
-    /// specified loop is immediately inside of the loop.
+    /// Return a SCEV expression for the specified value at the specified scope
+    /// in the program.  The L value specifies a loop nest to evaluate the
+    /// expression at, where null is the top-level or a specified loop is
+    /// immediately inside of the loop.
     ///
     /// This method can be used to compute the exit value for a variable defined
     /// in a loop by querying what the value will hold in the parent loop.
@@ -758,19 +963,17 @@ namespace llvm {
     /// original value V is returned.
     const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
 
-    /// getSCEVAtScope - This is a convenience function which does
-    /// getSCEVAtScope(getSCEV(V), L).
+    /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L).
     const SCEV *getSCEVAtScope(Value *V, const Loop *L);
 
-    /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
-    /// by a conditional between LHS and RHS.  This is used to help avoid max
-    /// expressions in loop trip counts, and to eliminate casts.
+    /// Test whether entry to the loop is protected by a conditional between LHS
+    /// and RHS.  This is used to help avoid max expressions in loop trip
+    /// counts, and to eliminate casts.
     bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
                                   const SCEV *LHS, const SCEV *RHS);
 
-    /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
-    /// protected by a conditional between LHS and RHS.  This is used to
-    /// to eliminate casts.
+    /// Test whether the backedge of the loop is protected by a conditional
+    /// between LHS and RHS.  This is used to to eliminate casts.
     bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
                                      const SCEV *LHS, const SCEV *RHS);
 
@@ -781,13 +984,13 @@ namespace llvm {
     /// the single exiting block passed to it. See that routine for details.
     unsigned getSmallConstantTripCount(Loop *L);
 
-    /// getSmallConstantTripCount - Returns the maximum trip count of this loop
-    /// as a normal unsigned value. Returns 0 if the trip count is unknown or
-    /// not constant. This "trip count" assumes that control exits via
-    /// ExitingBlock. More precisely, it is the number of times that control may
-    /// reach ExitingBlock before taking the branch. For loops with multiple
-    /// exits, it may not be the number times that the loop header executes if
-    /// the loop exits prematurely via another branch.
+    /// Returns the maximum trip count of this loop as a normal unsigned
+    /// value. Returns 0 if the trip count is unknown or not constant. This
+    /// "trip count" assumes that control exits via ExitingBlock. More
+    /// precisely, it is the number of times that control may reach ExitingBlock
+    /// before taking the branch. For loops with multiple exits, it may not be
+    /// the number times that the loop header executes if the loop exits
+    /// prematurely via another branch.
     unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
 
     /// \brief Returns the largest constant divisor of the trip count of the
@@ -798,25 +1001,25 @@ namespace llvm {
     /// the single exiting block passed to it. See that routine for details.
     unsigned getSmallConstantTripMultiple(Loop *L);
 
-    /// getSmallConstantTripMultiple - Returns the largest constant divisor of
-    /// the trip count of this loop as a normal unsigned value, if
-    /// possible. This means that the actual trip count is always a multiple of
-    /// the returned value (don't forget the trip count could very well be zero
-    /// as well!). As explained in the comments for getSmallConstantTripCount,
-    /// this assumes that control exits the loop via ExitingBlock.
+    /// Returns the largest constant divisor of the trip count of this loop as a
+    /// normal unsigned value, if possible. This means that the actual trip
+    /// count is always a multiple of the returned value (don't forget the trip
+    /// count could very well be zero as well!). As explained in the comments
+    /// for getSmallConstantTripCount, this assumes that control exits the loop
+    /// via ExitingBlock.
     unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
 
-    // getExitCount - Get the expression for the number of loop iterations for
-    // which this loop is guaranteed not to exit via ExitingBlock. Otherwise
-    // return SCEVCouldNotCompute.
+    /// Get the expression for the number of loop iterations for which this loop
+    /// is guaranteed not to exit via ExitingBlock. Otherwise return
+    /// SCEVCouldNotCompute.
     const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
 
-    /// getBackedgeTakenCount - If the specified loop has a predictable
-    /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
-    /// object. The backedge-taken count is the number of times the loop header
-    /// will be branched to from within the loop. This is one less than the
-    /// trip count of the loop, since it doesn't count the first iteration,
-    /// when the header is branched to from outside the loop.
+    /// If the specified loop has a predictable backedge-taken count, return it,
+    /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
+    /// is the number of times the loop header will be branched to from within
+    /// the loop. This is one less than the trip count of the loop, since it
+    /// doesn't count the first iteration, when the header is branched to from
+    /// outside the loop.
     ///
     /// Note that it is not valid to call this method on a loop without a
     /// loop-invariant backedge-taken count (see
@@ -824,24 +1027,23 @@ namespace llvm {
     ///
     const SCEV *getBackedgeTakenCount(const Loop *L);
 
-    /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
-    /// return the least SCEV value that is known never to be less than the
-    /// actual backedge taken count.
+    /// Similar to getBackedgeTakenCount, except return the least SCEV value
+    /// that is known never to be less than the actual backedge taken count.
     const SCEV *getMaxBackedgeTakenCount(const Loop *L);
 
-    /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop
-    /// has an analyzable loop-invariant backedge-taken count.
+    /// Return true if the specified loop has an analyzable loop-invariant
+    /// backedge-taken count.
     bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
 
-    /// forgetLoop - This method should be called by the client when it has
-    /// changed a loop in a way that may effect ScalarEvolution's ability to
-    /// compute a trip count, or if the loop is deleted.  This call is
-    /// potentially expensive for large loop bodies.
+    /// This method should be called by the client when it has changed a loop in
+    /// a way that may effect ScalarEvolution's ability to compute a trip count,
+    /// or if the loop is deleted.  This call is potentially expensive for large
+    /// loop bodies.
     void forgetLoop(const Loop *L);
 
-    /// forgetValue - This method should be called by the client when it has
-    /// changed a value in a way that may effect its value, or which may
-    /// disconnect it from a def-use chain linking it to a loop.
+    /// This method should be called by the client when it has changed a value
+    /// in a way that may effect its value, or which may disconnect it from a
+    /// def-use chain linking it to a loop.
     void forgetValue(Value *V);
 
     /// \brief Called when the client has changed the disposition of values in
@@ -851,92 +1053,97 @@ namespace llvm {
     /// recompute is simpler.
     void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); }
 
-    /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
-    /// is guaranteed to end in (at every loop iteration).  It is, at the same
-    /// time, the minimum number of times S is divisible by 2.  For example,
-    /// given {4,+,8} it returns 2.  If S is guaranteed to be 0, it returns the
-    /// bitwidth of S.
+    /// Determine the minimum number of zero bits that S is guaranteed to end in
+    /// (at every loop iteration).  It is, at the same time, the minimum number
+    /// of times S is divisible by 2.  For example, given {4,+,8} it returns 2.
+    /// If S is guaranteed to be 0, it returns the bitwidth of S.
     uint32_t GetMinTrailingZeros(const SCEV *S);
 
-    /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+    /// Determine the unsigned range for a particular SCEV.
     ///
     ConstantRange getUnsignedRange(const SCEV *S) {
       return getRange(S, HINT_RANGE_UNSIGNED);
     }
 
-    /// getSignedRange - Determine the signed range for a particular SCEV.
+    /// Determine the signed range for a particular SCEV.
     ///
     ConstantRange getSignedRange(const SCEV *S) {
       return getRange(S, HINT_RANGE_SIGNED);
     }
 
-    /// isKnownNegative - Test if the given expression is known to be negative.
+    /// Test if the given expression is known to be negative.
     ///
     bool isKnownNegative(const SCEV *S);
 
-    /// isKnownPositive - Test if the given expression is known to be positive.
+    /// Test if the given expression is known to be positive.
     ///
     bool isKnownPositive(const SCEV *S);
 
-    /// isKnownNonNegative - Test if the given expression is known to be
-    /// non-negative.
+    /// Test if the given expression is known to be non-negative.
     ///
     bool isKnownNonNegative(const SCEV *S);
 
-    /// isKnownNonPositive - Test if the given expression is known to be
-    /// non-positive.
+    /// Test if the given expression is known to be non-positive.
     ///
     bool isKnownNonPositive(const SCEV *S);
 
-    /// isKnownNonZero - Test if the given expression is known to be
-    /// non-zero.
+    /// Test if the given expression is known to be non-zero.
     ///
     bool isKnownNonZero(const SCEV *S);
 
-    /// isKnownPredicate - Test if the given expression is known to satisfy
-    /// the condition described by Pred, LHS, and RHS.
+    /// Test if the given expression is known to satisfy the condition described
+    /// by Pred, LHS, and RHS.
     ///
     bool isKnownPredicate(ICmpInst::Predicate Pred,
                           const SCEV *LHS, const SCEV *RHS);
 
-    /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
-    /// predicate Pred. Return true iff any changes were made. If the
-    /// operands are provably equal or unequal, LHS and RHS are set to
-    /// the same value and Pred is set to either ICMP_EQ or ICMP_NE.
+    /// Return true if the result of the predicate LHS `Pred` RHS is loop
+    /// invariant with respect to L.  Set InvariantPred, InvariantLHS and
+    /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
+    /// loop invariant form of LHS `Pred` RHS.
+    bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                  const SCEV *RHS, const Loop *L,
+                                  ICmpInst::Predicate &InvariantPred,
+                                  const SCEV *&InvariantLHS,
+                                  const SCEV *&InvariantRHS);
+
+    /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
+    /// iff any changes were made. If the operands are provably equal or
+    /// unequal, LHS and RHS are set to the same value and Pred is set to either
+    /// ICMP_EQ or ICMP_NE.
     ///
     bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
                               const SCEV *&LHS,
                               const SCEV *&RHS,
                               unsigned Depth = 0);
 
-    /// getLoopDisposition - Return the "disposition" of the given SCEV with
-    /// respect to the given loop.
+    /// Return the "disposition" of the given SCEV with respect to the given
+    /// loop.
     LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
 
-    /// isLoopInvariant - Return true if the value of the given SCEV is
-    /// unchanging in the specified loop.
+    /// Return true if the value of the given SCEV is unchanging in the
+    /// specified loop.
     bool isLoopInvariant(const SCEV *S, const Loop *L);
 
-    /// hasComputableLoopEvolution - Return true if the given SCEV changes value
-    /// in a known way in the specified loop.  This property being true implies
-    /// that the value is variant in the loop AND that we can emit an expression
-    /// to compute the value of the expression at any particular loop iteration.
+    /// Return true if the given SCEV changes value in a known way in the
+    /// specified loop.  This property being true implies that the value is
+    /// variant in the loop AND that we can emit an expression to compute the
+    /// value of the expression at any particular loop iteration.
     bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
 
-    /// getLoopDisposition - Return the "disposition" of the given SCEV with
-    /// respect to the given block.
+    /// Return the "disposition" of the given SCEV with respect to the given
+    /// block.
     BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
 
-    /// dominates - Return true if elements that makes up the given SCEV
-    /// dominate the specified basic block.
+    /// Return true if elements that makes up the given SCEV dominate the
+    /// specified basic block.
     bool dominates(const SCEV *S, const BasicBlock *BB);
 
-    /// properlyDominates - Return true if elements that makes up the given SCEV
-    /// properly dominate the specified basic block.
+    /// Return true if elements that makes up the given SCEV properly dominate
+    /// the specified basic block.
     bool properlyDominates(const SCEV *S, const BasicBlock *BB);
 
-    /// hasOperand - Test whether the given SCEV has Op as a direct or
-    /// indirect operand.
+    /// Test whether the given SCEV has Op as a direct or indirect operand.
     bool hasOperand(const SCEV *S, const SCEV *Op) const;
 
     /// Return the size of an element read or written by Inst.
@@ -948,11 +1155,8 @@ namespace llvm {
                              SmallVectorImpl<const SCEV *> &Sizes,
                              const SCEV *ElementSize) const;
 
-    bool runOnFunction(Function &F) override;
-    void releaseMemory() override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    void print(raw_ostream &OS, const Module* = nullptr) const override;
-    void verifyAnalysis() const override;
+    void print(raw_ostream &OS) const;
+    void verify() const;
 
     /// Collect parametric terms occurring in step expressions.
     void collectParametricTerms(const SCEV *Expr,
@@ -1034,6 +1238,18 @@ namespace llvm {
                      SmallVectorImpl<const SCEV *> &Sizes,
                      const SCEV *ElementSize);
 
+    /// Return the DataLayout associated with the module this SCEV instance is
+    /// operating on.
+    const DataLayout &getDataLayout() const {
+      return F.getParent()->getDataLayout();
+    }
+
+    const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
+                                           const SCEVConstant *RHS);
+
+    /// Re-writes the SCEV according to the Predicates in \p Preds.
+    const SCEV *rewriteUsingPredicate(const SCEV *Scev, SCEVUnionPredicate &A);
+
   private:
     /// Compute the backedge taken count knowing the interval difference, the
     /// stride and presence of the equality in the comparison.
@@ -1054,13 +1270,112 @@ namespace llvm {
 
   private:
     FoldingSet<SCEV> UniqueSCEVs;
+    FoldingSet<SCEVPredicate> UniquePreds;
     BumpPtrAllocator SCEVAllocator;
 
-    /// FirstUnknown - The head of a linked list of all SCEVUnknown
-    /// values that have been allocated. This is used by releaseMemory
-    /// to locate them all and call their destructors.
+    /// The head of a linked list of all SCEVUnknown values that have been
+    /// allocated. This is used by releaseMemory to locate them all and call
+    /// their destructors.
     SCEVUnknown *FirstUnknown;
   };
+
+  /// \brief Analysis pass that exposes the \c ScalarEvolution for a function.
+  class ScalarEvolutionAnalysis {
+    static char PassID;
+
+  public:
+    typedef ScalarEvolution Result;
+
+    /// \brief Opaque, unique identifier for this analysis pass.
+    static void *ID() { return (void *)&PassID; }
+
+    /// \brief Provide a name for the analysis for debugging and logging.
+    static StringRef name() { return "ScalarEvolutionAnalysis"; }
+
+    ScalarEvolution run(Function &F, AnalysisManager<Function> *AM);
+  };
+
+  /// \brief Printer pass for the \c ScalarEvolutionAnalysis results.
+  class ScalarEvolutionPrinterPass {
+    raw_ostream &OS;
+
+  public:
+    explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {}
+    PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+
+    static StringRef name() { return "ScalarEvolutionPrinterPass"; }
+  };
+
+  class ScalarEvolutionWrapperPass : public FunctionPass {
+    std::unique_ptr<ScalarEvolution> SE;
+
+  public:
+    static char ID;
+
+    ScalarEvolutionWrapperPass();
+
+    ScalarEvolution &getSE() { return *SE; }
+    const ScalarEvolution &getSE() const { return *SE; }
+
+    bool runOnFunction(Function &F) override;
+    void releaseMemory() override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override;
+    void print(raw_ostream &OS, const Module * = nullptr) const override;
+    void verifyAnalysis() const override;
+  };
+
+  /// An interface layer with SCEV used to manage how we see SCEV expressions
+  /// for values in the context of existing predicates. We can add new
+  /// predicates, but we cannot remove them.
+  ///
+  /// This layer has multiple purposes:
+  ///   - provides a simple interface for SCEV versioning.
+  ///   - guarantees that the order of transformations applied on a SCEV
+  ///     expression for a single Value is consistent across two different
+  ///     getSCEV calls. This means that, for example, once we've obtained
+  ///     an AddRec expression for a certain value through expression
+  ///     rewriting, we will continue to get an AddRec expression for that
+  ///     Value.
+  ///   - lowers the number of expression rewrites.
+  class PredicatedScalarEvolution {
+  public:
+    PredicatedScalarEvolution(ScalarEvolution &SE);
+    const SCEVUnionPredicate &getUnionPredicate() const;
+    /// \brief Returns the SCEV expression of V, in the context of the current
+    /// SCEV predicate.
+    /// The order of transformations applied on the expression of V returned
+    /// by ScalarEvolution is guaranteed to be preserved, even when adding new
+    /// predicates.
+    const SCEV *getSCEV(Value *V);
+    /// \brief Adds a new predicate.
+    void addPredicate(const SCEVPredicate &Pred);
+    /// \brief Returns the ScalarEvolution analysis used.
+    ScalarEvolution *getSE() const { return &SE; }
+
+  private:
+    /// \brief Increments the version number of the predicate.
+    /// This needs to be called every time the SCEV predicate changes.
+    void updateGeneration();
+    /// Holds a SCEV and the version number of the SCEV predicate used to
+    /// perform the rewrite of the expression.
+    typedef std::pair<unsigned, const SCEV *> RewriteEntry;
+    /// Maps a SCEV to the rewrite result of that SCEV at a certain version
+    /// number. If this number doesn't match the current Generation, we will
+    /// need to do a rewrite. To preserve the transformation order of previous
+    /// rewrites, we will rewrite the previous result instead of the original
+    /// SCEV.
+    DenseMap<const SCEV *, RewriteEntry> RewriteMap;
+    /// The ScalarEvolution analysis.
+    ScalarEvolution &SE;
+    /// The SCEVPredicate that forms our context. We will rewrite all
+    /// expressions assuming that this predicate true.
+    SCEVUnionPredicate Preds;
+    /// Marks the version of the SCEV predicate used. When rewriting a SCEV
+    /// expression we mark it with the version of the predicate. We use this to
+    /// figure out if the predicate has changed from the last rewrite of the
+    /// SCEV. If so, we need to perform a new rewrite.
+    unsigned Generation;
+  };
 }
 
 #endif
diff --git a/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
new file mode 100644
index 000000000000..7bbbf5562047
--- /dev/null
+++ b/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -0,0 +1,79 @@
+//===- ScalarEvolutionAliasAnalysis.h - SCEV-based AA -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a SCEV-based alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H
+#define LLVM_ANALYSIS_SCALAREVOLUTIONALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple alias analysis implementation that uses ScalarEvolution to answer
+/// queries.
+class SCEVAAResult : public AAResultBase<SCEVAAResult> {
+  ScalarEvolution &SE;
+
+public:
+  explicit SCEVAAResult(const TargetLibraryInfo &TLI, ScalarEvolution &SE)
+      : AAResultBase(TLI), SE(SE) {}
+  SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {}
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+
+private:
+  Value *GetBaseValue(const SCEV *S);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class SCEVAA {
+public:
+  typedef SCEVAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  SCEVAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "SCEVAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the SCEVAAResult object.
+class SCEVAAWrapperPass : public FunctionPass {
+  std::unique_ptr<SCEVAAResult> Result;
+
+public:
+  static char ID;
+
+  SCEVAAWrapperPass();
+
+  SCEVAAResult &getResult() { return *Result; }
+  const SCEVAAResult &getResult() const { return *Result; }
+
+  bool runOnFunction(Function &F) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+/// Creates an instance of \c SCEVAAWrapperPass.
+FunctionPass *createSCEVAAWrapperPass();
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 8ec2078258d1..b9939168a99d 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -117,9 +117,14 @@ namespace llvm {
 
     /// \brief Return true for expressions that may incur non-trivial cost to
     /// evaluate at runtime.
-    bool isHighCostExpansion(const SCEV *Expr, Loop *L) {
+    ///
+    /// At is an optional parameter which specifies point in code where user is
+    /// going to expand this expression. Sometimes this knowledge can lead to a
+    /// more accurate cost estimation.
+    bool isHighCostExpansion(const SCEV *Expr, Loop *L,
+                             const Instruction *At = nullptr) {
       SmallPtrSet<const SCEV *, 8> Processed;
-      return isHighCostExpansionHelper(Expr, L, Processed);
+      return isHighCostExpansionHelper(Expr, L, At, Processed);
     }
 
     /// \brief This method returns the canonical induction variable of the
@@ -146,6 +151,22 @@ namespace llvm {
     /// block.
     Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
 
+    /// \brief Generates a code sequence that evaluates this predicate.
+    /// The inserted instructions will be at position \p Loc.
+    /// The result will be of type i1 and will have a value of 0 when the
+    /// predicate is false and 1 otherwise.
+    Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
+
+    /// \brief A specialized variant of expandCodeForPredicate, handling the
+    /// case when we are expanding code for a SCEVEqualPredicate.
+    Value *expandEqualPredicate(const SCEVEqualPredicate *Pred,
+                                Instruction *Loc);
+
+    /// \brief A specialized variant of expandCodeForPredicate, handling the
+    /// case when we are expanding code for a SCEVUnionPredicate.
+    Value *expandUnionPredicate(const SCEVUnionPredicate *Pred,
+                                Instruction *Loc);
+
     /// \brief Set the current IV increment loop and position.
     void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
       assert(!CanonicalMode &&
@@ -193,11 +214,22 @@ namespace llvm {
 
     void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
 
+    /// \brief Try to find LLVM IR value for S available at the point At.
+    ///
+    /// L is a hint which tells in which loop to look for the suitable value.
+    /// On success return value which is equivalent to the expanded S at point
+    /// At. Return nullptr if value was not found.
+    ///
+    /// Note that this function does not perform an exhaustive search. I.e if it
+    /// didn't find any value it does not mean that there is no such value.
+    Value *findExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
     /// \brief Recursive helper function for isHighCostExpansion.
     bool isHighCostExpansionHelper(const SCEV *S, Loop *L,
+                                   const Instruction *At,
                                    SmallPtrSetImpl<const SCEV *> &Processed);
 
     /// \brief Insert the specified binary operator, doing a small amount
diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
index da24de281d47..16992680577c 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -43,6 +43,7 @@ namespace llvm {
       SCEV(ID, scConstant), V(v) {}
   public:
     ConstantInt *getValue() const { return V; }
+    const APInt &getAPInt() const { return getValue()->getValue(); }
 
     Type *getType() const { return V->getType(); }
 
@@ -404,7 +405,7 @@ namespace llvm {
   /// value, and only represent it as its LLVM Value.  This is the "bottom"
   /// value for the analysis.
   ///
-  class SCEVUnknown : public SCEV, private CallbackVH {
+  class SCEVUnknown final : public SCEV, private CallbackVH {
     friend class ScalarEvolution;
 
     // Implement CallbackVH.
@@ -553,12 +554,88 @@ namespace llvm {
     T.visitAll(Root);
   }
 
+  /// Recursively visits a SCEV expression and re-writes it.
+  template<typename SC>
+  class SCEVRewriteVisitor : public SCEVVisitor<SC, const SCEV *> {
+  protected:
+    ScalarEvolution &SE;
+  public:
+    SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {}
+
+    const SCEV *visitConstant(const SCEVConstant *Constant) {
+      return Constant;
+    }
+
+    const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
+      const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
+      return SE.getTruncateExpr(Operand, Expr->getType());
+    }
+
+    const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
+      const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
+      return SE.getZeroExtendExpr(Operand, Expr->getType());
+    }
+
+    const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+      const SCEV *Operand = ((SC*)this)->visit(Expr->getOperand());
+      return SE.getSignExtendExpr(Operand, Expr->getType());
+    }
+
+    const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
+      return SE.getAddExpr(Operands);
+    }
+
+    const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
+      return SE.getMulExpr(Operands);
+    }
+
+    const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
+      return SE.getUDivExpr(((SC*)this)->visit(Expr->getLHS()),
+                            ((SC*)this)->visit(Expr->getRHS()));
+    }
+
+    const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
+      return SE.getAddRecExpr(Operands, Expr->getLoop(),
+                              Expr->getNoWrapFlags());
+    }
+
+    const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
+      return SE.getSMaxExpr(Operands);
+    }
+
+    const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
+      SmallVector<const SCEV *, 2> Operands;
+      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
+        Operands.push_back(((SC*)this)->visit(Expr->getOperand(i)));
+      return SE.getUMaxExpr(Operands);
+    }
+
+    const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+      return Expr;
+    }
+
+    const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
+      return Expr;
+    }
+  };
+
   typedef DenseMap<const Value*, Value*> ValueToValueMap;
 
   /// The SCEVParameterRewriter takes a scalar evolution expression and updates
   /// the SCEVUnknown components following the Map (Value -> Value).
-  struct SCEVParameterRewriter
-    : public SCEVVisitor<SCEVParameterRewriter, const SCEV*> {
+  class SCEVParameterRewriter : public SCEVRewriteVisitor<SCEVParameterRewriter> {
   public:
     static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
                                ValueToValueMap &Map,
@@ -567,67 +644,8 @@ namespace llvm {
       return Rewriter.visit(Scev);
     }
 
-    SCEVParameterRewriter(ScalarEvolution &S, ValueToValueMap &M, bool C)
-      : SE(S), Map(M), InterpretConsts(C) {}
-
-    const SCEV *visitConstant(const SCEVConstant *Constant) {
-      return Constant;
-    }
-
-    const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getTruncateExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getZeroExtendExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getSignExtendExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getAddExpr(Operands);
-    }
-
-    const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getMulExpr(Operands);
-    }
-
-    const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-      return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
-    }
-
-    const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getAddRecExpr(Operands, Expr->getLoop(),
-                              Expr->getNoWrapFlags());
-    }
-
-    const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getSMaxExpr(Operands);
-    }
-
-    const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getUMaxExpr(Operands);
-    }
+    SCEVParameterRewriter(ScalarEvolution &SE, ValueToValueMap &M, bool C)
+      : SCEVRewriteVisitor(SE), Map(M), InterpretConsts(C) {}
 
     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
       Value *V = Expr->getValue();
@@ -640,68 +658,26 @@ namespace llvm {
       return Expr;
     }
 
-    const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-      return Expr;
-    }
-
   private:
-    ScalarEvolution &SE;
     ValueToValueMap &Map;
     bool InterpretConsts;
   };
 
   typedef DenseMap<const Loop*, const SCEV*> LoopToScevMapT;
 
-  /// The SCEVApplyRewriter takes a scalar evolution expression and applies
+  /// The SCEVLoopAddRecRewriter takes a scalar evolution expression and applies
   /// the Map (Loop -> SCEV) to all AddRecExprs.
-  struct SCEVApplyRewriter
-    : public SCEVVisitor<SCEVApplyRewriter, const SCEV*> {
+  class SCEVLoopAddRecRewriter
+      : public SCEVRewriteVisitor<SCEVLoopAddRecRewriter> {
   public:
     static const SCEV *rewrite(const SCEV *Scev, LoopToScevMapT &Map,
                                ScalarEvolution &SE) {
-      SCEVApplyRewriter Rewriter(SE, Map);
+      SCEVLoopAddRecRewriter Rewriter(SE, Map);
       return Rewriter.visit(Scev);
     }
 
-    SCEVApplyRewriter(ScalarEvolution &S, LoopToScevMapT &M)
-      : SE(S), Map(M) {}
-
-    const SCEV *visitConstant(const SCEVConstant *Constant) {
-      return Constant;
-    }
-
-    const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getTruncateExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getZeroExtendExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
-      const SCEV *Operand = visit(Expr->getOperand());
-      return SE.getSignExtendExpr(Operand, Expr->getType());
-    }
-
-    const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getAddExpr(Operands);
-    }
-
-    const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getMulExpr(Operands);
-    }
-
-    const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
-      return SE.getUDivExpr(visit(Expr->getLHS()), visit(Expr->getRHS()));
-    }
+    SCEVLoopAddRecRewriter(ScalarEvolution &SE, LoopToScevMapT &M)
+        : SCEVRewriteVisitor(SE), Map(M) {}
 
     const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
       SmallVector<const SCEV *, 2> Operands;
@@ -714,41 +690,18 @@ namespace llvm {
       if (0 == Map.count(L))
         return Res;
 
-      const SCEVAddRecExpr *Rec = (const SCEVAddRecExpr *) Res;
+      const SCEVAddRecExpr *Rec = cast<SCEVAddRecExpr>(Res);
       return Rec->evaluateAtIteration(Map[L], SE);
     }
 
-    const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getSMaxExpr(Operands);
-    }
-
-    const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
-      SmallVector<const SCEV *, 2> Operands;
-      for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
-        Operands.push_back(visit(Expr->getOperand(i)));
-      return SE.getUMaxExpr(Operands);
-    }
-
-    const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-      return Expr;
-    }
-
-    const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
-      return Expr;
-    }
-
   private:
-    ScalarEvolution &SE;
     LoopToScevMapT &Map;
   };
 
 /// Applies the Map (Loop -> SCEV) to the given Scev.
 static inline const SCEV *apply(const SCEV *Scev, LoopToScevMapT &Map,
                                 ScalarEvolution &SE) {
-  return SCEVApplyRewriter::rewrite(Scev, Map, SE);
+  return SCEVLoopAddRecRewriter::rewrite(Scev, Map, SE);
 }
 
 }
diff --git a/include/llvm/Analysis/ScopedNoAliasAA.h b/include/llvm/Analysis/ScopedNoAliasAA.h
new file mode 100644
index 000000000000..175561687157
--- /dev/null
+++ b/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -0,0 +1,92 @@
+//===- ScopedNoAliasAA.h - Scoped No-Alias Alias Analysis -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a metadata-based scoped no-alias analysis.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCOPEDNOALIASAA_H
+#define LLVM_ANALYSIS_SCOPEDNOALIASAA_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result which uses scoped-noalias metadata to answer queries.
+class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
+  friend AAResultBase<ScopedNoAliasAAResult>;
+
+public:
+  explicit ScopedNoAliasAAResult(const TargetLibraryInfo &TLI)
+      : AAResultBase(TLI) {}
+  ScopedNoAliasAAResult(ScopedNoAliasAAResult &&Arg)
+      : AAResultBase(std::move(Arg)) {}
+
+  /// Handle invalidation events from the new pass manager.
+  ///
+  /// By definition, this result is stateless and so remains valid.
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+private:
+  bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
+  void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+                         SmallPtrSetImpl<const MDNode *> &Nodes) const;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class ScopedNoAliasAA {
+public:
+  typedef ScopedNoAliasAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  ScopedNoAliasAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "ScopedNoAliasAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the ScopedNoAliasAAResult object.
+class ScopedNoAliasAAWrapperPass : public ImmutablePass {
+  std::unique_ptr<ScopedNoAliasAAResult> Result;
+
+public:
+  static char ID;
+
+  ScopedNoAliasAAWrapperPass();
+
+  ScopedNoAliasAAResult &getResult() { return *Result; }
+  const ScopedNoAliasAAResult &getResult() const { return *Result; }
+
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createScopedNoAliasAAWrapperPass - This pass implements metadata-based
+// scoped noalias analysis.
+//
+ImmutablePass *createScopedNoAliasAAWrapperPass();
+}
+
+#endif
diff --git a/include/llvm/Analysis/SparsePropagation.h b/include/llvm/Analysis/SparsePropagation.h
index 9ccae5ff89b7..2c7f5dd73547 100644
--- a/include/llvm/Analysis/SparsePropagation.h
+++ b/include/llvm/Analysis/SparsePropagation.h
@@ -21,19 +21,19 @@
 #include <vector>
 
 namespace llvm {
-  class Value;
-  class Constant;
-  class Argument;
-  class Instruction;
-  class PHINode;
-  class TerminatorInst;
-  class BasicBlock;
-  class Function;
-  class SparseSolver;
-  class raw_ostream;
+class Value;
+class Constant;
+class Argument;
+class Instruction;
+class PHINode;
+class TerminatorInst;
+class BasicBlock;
+class Function;
+class SparseSolver;
+class raw_ostream;
+
+template <typename T> class SmallVectorImpl;
 
-  template<typename T> class SmallVectorImpl;
-  
 /// AbstractLatticeFunction - This class is implemented by the dataflow instance
 /// to specify what the lattice values are and how they handle merges etc.
 /// This gives the client the power to compute lattice values from instructions,
@@ -44,8 +44,10 @@ namespace llvm {
 class AbstractLatticeFunction {
 public:
   typedef void *LatticeVal;
+
 private:
   LatticeVal UndefVal, OverdefinedVal, UntrackedVal;
+
 public:
   AbstractLatticeFunction(LatticeVal undefVal, LatticeVal overdefinedVal,
                           LatticeVal untrackedVal) {
@@ -54,18 +56,16 @@ public:
     UntrackedVal = untrackedVal;
   }
   virtual ~AbstractLatticeFunction();
-  
+
   LatticeVal getUndefVal()       const { return UndefVal; }
   LatticeVal getOverdefinedVal() const { return OverdefinedVal; }
   LatticeVal getUntrackedVal()   const { return UntrackedVal; }
-  
+
   /// IsUntrackedValue - If the specified Value is something that is obviously
   /// uninteresting to the analysis (and would always return UntrackedVal),
   /// this function can return true to avoid pointless work.
-  virtual bool IsUntrackedValue(Value *V) {
-    return false;
-  }
-  
+  virtual bool IsUntrackedValue(Value *V) { return false; }
+
   /// ComputeConstant - Given a constant value, compute and return a lattice
   /// value corresponding to the specified constant.
   virtual LatticeVal ComputeConstant(Constant *C) {
@@ -74,10 +74,8 @@ public:
 
   /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is
   /// one that the we want to handle through ComputeInstructionState.
-  virtual bool IsSpecialCasedPHI(PHINode *PN) {
-    return false;
-  }
-  
+  virtual bool IsSpecialCasedPHI(PHINode *PN) { return false; }
+
   /// GetConstant - If the specified lattice value is representable as an LLVM
   /// constant value, return it.  Otherwise return null.  The returned value
   /// must be in the same LLVM type as Val.
@@ -90,42 +88,41 @@ public:
   virtual LatticeVal ComputeArgument(Argument *I) {
     return getOverdefinedVal(); // always safe
   }
-  
+
   /// MergeValues - Compute and return the merge of the two specified lattice
   /// values.  Merging should only move one direction down the lattice to
   /// guarantee convergence (toward overdefined).
   virtual LatticeVal MergeValues(LatticeVal X, LatticeVal Y) {
     return getOverdefinedVal(); // always safe, never useful.
   }
-  
+
   /// ComputeInstructionState - Given an instruction and a vector of its operand
   /// values, compute the result value of the instruction.
   virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) {
     return getOverdefinedVal(); // always safe, never useful.
   }
-  
+
   /// PrintValue - Render the specified lattice value to the specified stream.
   virtual void PrintValue(LatticeVal V, raw_ostream &OS);
 };
 
-  
 /// SparseSolver - This class is a general purpose solver for Sparse Conditional
 /// Propagation with a programmable lattice function.
 ///
 class SparseSolver {
   typedef AbstractLatticeFunction::LatticeVal LatticeVal;
-  
+
   /// LatticeFunc - This is the object that knows the lattice and how to do
   /// compute transfer functions.
   AbstractLatticeFunction *LatticeFunc;
-  
-  DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
-  SmallPtrSet<BasicBlock*, 16> BBExecutable;   // The bbs that are executable.
-  
-  std::vector<Instruction*> InstWorkList;   // Worklist of insts to process.
-  
-  std::vector<BasicBlock*> BBWorkList;  // The BasicBlock work list
-  
+
+  DenseMap<Value *, LatticeVal> ValueState;   // The state each value is in.
+  SmallPtrSet<BasicBlock *, 16> BBExecutable; // The bbs that are executable.
+
+  std::vector<Instruction *> InstWorkList; // Worklist of insts to process.
+
+  std::vector<BasicBlock *> BBWorkList; // The BasicBlock work list
+
   /// KnownFeasibleEdges - Entries in this set are edges which have already had
   /// PHI nodes retriggered.
   typedef std::pair<BasicBlock*,BasicBlock*> Edge;
@@ -133,17 +130,16 @@ class SparseSolver {
 
   SparseSolver(const SparseSolver&) = delete;
   void operator=(const SparseSolver&) = delete;
+
 public:
   explicit SparseSolver(AbstractLatticeFunction *Lattice)
-    : LatticeFunc(Lattice) {}
-  ~SparseSolver() {
-    delete LatticeFunc;
-  }
-  
+      : LatticeFunc(Lattice) {}
+  ~SparseSolver() { delete LatticeFunc; }
+
   /// Solve - Solve for constants and executable blocks.
   ///
   void Solve(Function &F);
-  
+
   void Print(Function &F, raw_ostream &OS) const;
 
   /// getLatticeState - Return the LatticeVal object that corresponds to the
@@ -153,7 +149,7 @@ public:
     DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
     return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal();
   }
-  
+
   /// getOrInitValueState - Return the LatticeVal object that corresponds to the
   /// value, initializing the value's state if it hasn't been entered into the
   /// map yet.   This function is necessary because not all values should start
@@ -161,7 +157,7 @@ public:
   /// constants should be marked as constants.
   ///
   LatticeVal getOrInitValueState(Value *V);
-  
+
   /// isEdgeFeasible - Return true if the control flow edge from the 'From'
   /// basic block to the 'To' basic block is currently feasible.  If
   /// AggressiveUndef is true, then this treats values with unknown lattice
@@ -176,29 +172,28 @@ public:
   bool isBlockExecutable(BasicBlock *BB) const {
     return BBExecutable.count(BB);
   }
-  
+
 private:
   /// UpdateState - When the state for some instruction is potentially updated,
   /// this function notices and adds I to the worklist if needed.
   void UpdateState(Instruction &Inst, LatticeVal V);
-  
+
   /// MarkBlockExecutable - This method can be used by clients to mark all of
   /// the blocks that are known to be intrinsically live in the processed unit.
   void MarkBlockExecutable(BasicBlock *BB);
-  
+
   /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
   /// work list if it is not already executable.
   void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
-  
+
   /// getFeasibleSuccessors - Return a vector of booleans to indicate which
   /// successors are reachable from a given terminator instruction.
   void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs,
                              bool AggressiveUndef);
-  
+
   void visitInst(Instruction &I);
   void visitPHINode(PHINode &I);
   void visitTerminatorInst(TerminatorInst &TI);
-
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def
index 1c1fdfef980d..7798e3c88248 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/include/llvm/Analysis/TargetLibraryInfo.def
@@ -27,6 +27,86 @@
 #define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr,
 #endif
 
+/// void *new(unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_int)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z")
+
+/// void *new(unsigned int, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z")
+
+/// void *new(unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z")
+
+/// void *new(unsigned long long, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z")
+
+/// void operator delete(void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z")
+
+/// void operator delete(void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z")
+
+/// void operator delete(void*, unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z")
+
+/// void operator delete(void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z")
+
+/// void operator delete(void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z")
+
+/// void operator delete(void*, unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong)
+TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z")
+
+/// void *new[](unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z")
+
+/// void *new[](unsigned int, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z")
+
+/// void *new[](unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z")
+
+/// void *new[](unsigned long long, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z")
+
+/// void operator delete[](void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*, unsigned int);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z")
+
+/// void operator delete[](void*);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z")
+
+/// void operator delete[](void*, nothrow);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z")
+
+/// void operator delete[](void*, unsigned long long);
+TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong)
+TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z")
+
 /// int _IO_getc(_IO_FILE * __fp);
 TLI_DEFINE_ENUM_INTERNAL(under_IO_getc)
 TLI_DEFINE_STRING_INTERNAL("_IO_getc")
@@ -406,6 +486,15 @@ TLI_DEFINE_STRING_INTERNAL("floorf")
 /// long double floorl(long double x);
 TLI_DEFINE_ENUM_INTERNAL(floorl)
 TLI_DEFINE_STRING_INTERNAL("floorl")
+/// int fls(int i);
+TLI_DEFINE_ENUM_INTERNAL(fls)
+TLI_DEFINE_STRING_INTERNAL("fls")
+/// int flsl(long int i);
+TLI_DEFINE_ENUM_INTERNAL(flsl)
+TLI_DEFINE_STRING_INTERNAL("flsl")
+/// int flsll(long long int i);
+TLI_DEFINE_ENUM_INTERNAL(flsll)
+TLI_DEFINE_STRING_INTERNAL("flsll")
 /// double fmax(double x, double y);
 TLI_DEFINE_ENUM_INTERNAL(fmax)
 TLI_DEFINE_STRING_INTERNAL("fmax")
@@ -664,6 +753,7 @@ TLI_DEFINE_STRING_INTERNAL("modff")
 /// long double modfl(long double value, long double *iptr);
 TLI_DEFINE_ENUM_INTERNAL(modfl)
 TLI_DEFINE_STRING_INTERNAL("modfl")
+
 /// double nearbyint(double x);
 TLI_DEFINE_ENUM_INTERNAL(nearbyint)
 TLI_DEFINE_STRING_INTERNAL("nearbyint")
diff --git a/include/llvm/Analysis/TargetLibraryInfo.h b/include/llvm/Analysis/TargetLibraryInfo.h
index e0a1ee378274..7becdf033dd2 100644
--- a/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/include/llvm/Analysis/TargetLibraryInfo.h
@@ -42,7 +42,7 @@ class PreservedAnalyses;
 ///
 /// This class constructs tables that hold the target library information and
 /// make it available. However, it is somewhat expensive to compute and only
-/// depends on the triple. So users typicaly interact with the \c
+/// depends on the triple. So users typically interact with the \c
 /// TargetLibraryInfo wrapper below.
 class TargetLibraryInfoImpl {
   friend class TargetLibraryInfo;
@@ -201,13 +201,13 @@ public:
   }
   bool isFunctionVectorizable(StringRef F, unsigned VF) const {
     return Impl->isFunctionVectorizable(F, VF);
-  };
+  }
   bool isFunctionVectorizable(StringRef F) const {
     return Impl->isFunctionVectorizable(F);
-  };
+  }
   StringRef getVectorizedFunction(StringRef F, unsigned VF) const {
     return Impl->getVectorizedFunction(F, VF);
-  };
+  }
 
   /// \brief Tests if the function is both available and a candidate for
   /// optimized code generation.
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h
index 01f00896410e..3913cc3f107c 100644
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -42,11 +42,13 @@ class Value;
 /// \brief Information about a load/store intrinsic defined by the target.
 struct MemIntrinsicInfo {
   MemIntrinsicInfo()
-      : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0),
+      : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0),
         NumMemRefs(0), PtrVal(nullptr) {}
   bool ReadMem;
   bool WriteMem;
-  bool Vol;
+  /// True only if this memory operation is non-volatile, non-atomic, and
+  /// unordered.  (See LoadInst/StoreInst for details on each)
+  bool IsSimple;
   // Same Id is set by the target for corresponding load/store intrinsics.
   unsigned short MatchingId;
   int NumMemRefs;
@@ -97,11 +99,14 @@ public:
   ///
   /// Many APIs in this interface return a cost. This enum defines the
   /// fundamental values that should be used to interpret (and produce) those
-  /// costs. The costs are returned as an unsigned rather than a member of this
+  /// costs. The costs are returned as an int rather than a member of this
   /// enumeration because it is expected that the cost of one IR instruction
   /// may have a multiplicative factor to it or otherwise won't fit directly
   /// into the enum. Moreover, it is common to sum or average costs which works
   /// better as simple integral values. Thus this enum only provides constants.
+  /// Also note that the returned costs are signed integers to make it natural
+  /// to add, subtract, and test with zero (a common boundary condition). It is
+  /// not expected that 2^32 is a realistic cost to be modeling at any point.
   ///
   /// Note that these costs should usually reflect the intersection of code-size
   /// cost and execution cost. A free instruction is typically one that folds
@@ -128,15 +133,15 @@ public:
   ///
   /// The returned cost is defined in terms of \c TargetCostConstants, see its
   /// comments for a detailed explanation of the cost values.
-  unsigned getOperationCost(unsigned Opcode, Type *Ty,
-                            Type *OpTy = nullptr) const;
+  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
 
   /// \brief Estimate the cost of a GEP operation when lowered.
   ///
   /// The contract for this function is the same as \c getOperationCost except
   /// that it supports an interface that provides extra information specific to
   /// the GEP operation.
-  unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) const;
+  int getGEPCost(Type *PointeeType, const Value *Ptr,
+                 ArrayRef<const Value *> Operands) const;
 
   /// \brief Estimate the cost of a function call when lowered.
   ///
@@ -147,32 +152,31 @@ public:
   /// This is the most basic query for estimating call cost: it only knows the
   /// function type and (potentially) the number of arguments at the call site.
   /// The latter is only interesting for varargs function types.
-  unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+  int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
 
   /// \brief Estimate the cost of calling a specific function when lowered.
   ///
   /// This overload adds the ability to reason about the particular function
   /// being called in the event it is a library call with special lowering.
-  unsigned getCallCost(const Function *F, int NumArgs = -1) const;
+  int getCallCost(const Function *F, int NumArgs = -1) const;
 
   /// \brief Estimate the cost of calling a specific function when lowered.
   ///
   /// This overload allows specifying a set of candidate argument values.
-  unsigned getCallCost(const Function *F,
+  int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
+
+  /// \brief Estimate the cost of an intrinsic when lowered.
+  ///
+  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
+  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                       ArrayRef<Type *> ParamTys) const;
+
+  /// \brief Estimate the cost of an intrinsic when lowered.
+  ///
+  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
+  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                        ArrayRef<const Value *> Arguments) const;
 
-  /// \brief Estimate the cost of an intrinsic when lowered.
-  ///
-  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) const;
-
-  /// \brief Estimate the cost of an intrinsic when lowered.
-  ///
-  /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments) const;
-
   /// \brief Estimate the cost of a given IR user when lowered.
   ///
   /// This can estimate the cost of either a ConstantExpr or Instruction when
@@ -188,7 +192,7 @@ public:
   ///
   /// The returned cost is defined in terms of \c TargetCostConstants, see its
   /// comments for a detailed explanation of the cost values.
-  unsigned getUserCost(const User *U) const;
+  int getUserCost(const User *U) const;
 
   /// \brief Return true if branch divergence exists.
   ///
@@ -308,12 +312,17 @@ public:
                              bool HasBaseReg, int64_t Scale,
                              unsigned AddrSpace = 0) const;
 
-  /// \brief Return true if the target works with masked instruction
-  /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
-  /// AVX-512 architecture will also allow masks for non-consecutive memory
-  /// accesses.
-  bool isLegalMaskedStore(Type *DataType, int Consecutive) const;
-  bool isLegalMaskedLoad(Type *DataType, int Consecutive) const;
+  /// \brief Return true if the target supports masked load/store
+  /// AVX2 and AVX-512 targets allow masks for consecutive load and store for
+  /// 32 and 64 bit elements.
+  bool isLegalMaskedStore(Type *DataType) const;
+  bool isLegalMaskedLoad(Type *DataType) const;
+
+  /// \brief Return true if the target supports masked gather/scatter
+  /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
+  /// bits scalar type.
+  bool isLegalMaskedScatter(Type *DataType) const;
+  bool isLegalMaskedGather(Type *DataType) const;
 
   /// \brief Return the cost of the scaling factor used in the addressing
   /// mode represented by AM for this target, for a load/store
@@ -350,6 +359,9 @@ public:
   /// \brief Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
+  /// \brief Enable matching of interleaved access groups.
+  bool enableInterleavedAccessVectorization() const;
+
   /// \brief Return hardware support for population count.
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
 
@@ -358,19 +370,19 @@ public:
 
   /// \brief Return the expected cost of supporting the floating point operation
   /// of the specified type.
-  unsigned getFPOpCost(Type *Ty) const;
+  int getFPOpCost(Type *Ty) const;
 
   /// \brief Return the expected cost of materializing for the given integer
   /// immediate of the specified type.
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
+  int getIntImmCost(const APInt &Imm, Type *Ty) const;
 
   /// \brief Return the expected cost of materialization for the given integer
   /// immediate of the specified type for a given instruction. The cost can be
   /// zero if the immediate can be folded into the specified instruction.
-  unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const;
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) const;
+  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                    Type *Ty) const;
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty) const;
   /// @}
 
   /// \name Vector Target Information
@@ -410,43 +422,51 @@ public:
   unsigned getMaxInterleaveFactor(unsigned VF) const;
 
   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
-  unsigned
-  getArithmeticInstrCost(unsigned Opcode, Type *Ty,
-                         OperandValueKind Opd1Info = OK_AnyValue,
-                         OperandValueKind Opd2Info = OK_AnyValue,
-                         OperandValueProperties Opd1PropInfo = OP_None,
-                         OperandValueProperties Opd2PropInfo = OP_None) const;
+  int getArithmeticInstrCost(
+      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+      OperandValueKind Opd2Info = OK_AnyValue,
+      OperandValueProperties Opd1PropInfo = OP_None,
+      OperandValueProperties Opd2PropInfo = OP_None) const;
 
   /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
   /// The index and subtype parameters are used by the subvector insertion and
   /// extraction shuffle kinds.
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
-                          Type *SubTp = nullptr) const;
+  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
+                     Type *SubTp = nullptr) const;
 
   /// \return The expected cost of cast instructions, such as bitcast, trunc,
   /// zext, etc.
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const;
 
   /// \return The expected cost of control-flow related instructions such as
   /// Phi, Ret, Br.
-  unsigned getCFInstrCost(unsigned Opcode) const;
+  int getCFInstrCost(unsigned Opcode) const;
 
   /// \returns The expected cost of compare and select instructions.
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy = nullptr) const;
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                         Type *CondTy = nullptr) const;
 
   /// \return The expected cost of vector Insert and Extract.
   /// Use -1 to indicate that there is no information on the index value.
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index = -1) const;
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
 
   /// \return The cost of Load and Store instructions.
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) const;
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace) const;
 
   /// \return The cost of masked Load and Store instructions.
-  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) const;
+  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                            unsigned AddressSpace) const;
+
+  /// \return The cost of Gather or Scatter operation
+  /// \p Opcode - is a type of memory access Load or Store
+  /// \p DataTy - a vector type of the data to be loaded or stored
+  /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
+  /// \p VariableMask - true when the memory access is predicated with a mask
+  ///                   that is not a compile-time constant
+  /// \p Alignment - alignment of single element
+  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                             bool VariableMask, unsigned Alignment) const;
 
   /// \return The cost of the interleaved memory operation.
   /// \p Opcode is the memory operation code
@@ -456,11 +476,9 @@ public:
   ///    load allows gaps)
   /// \p Alignment is the alignment of the memory operation
   /// \p AddressSpace is address space of the pointer.
-  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                      unsigned Factor,
-                                      ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) const;
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+                                 ArrayRef<unsigned> Indices, unsigned Alignment,
+                                 unsigned AddressSpace) const;
 
   /// \brief Calculate the cost of performing a vector reduction.
   ///
@@ -475,16 +493,18 @@ public:
   /// Split:
   ///  (v0, v1, v2, v3)
   ///  ((v0+v2), (v1+v3), undef, undef)
-  unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                            bool IsPairwiseForm) const;
+  int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const;
 
-  /// \returns The cost of Intrinsic instructions.
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type *> Tys) const;
+  /// \returns The cost of Intrinsic instructions. Types analysis only.
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Type *> Tys) const;
+
+  /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Value *> Args) const;
 
   /// \returns The cost of Call instructions.
-  unsigned getCallInstrCost(Function *F, Type *RetTy,
-                            ArrayRef<Type *> Tys) const;
+  int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
 
   /// \returns The number of pieces into which the provided type must be
   /// split during legalization. Zero is returned when the answer is unknown.
@@ -497,7 +517,7 @@ public:
   /// The 'IsComplex' parameter is a hint that the address computation is likely
   /// to involve multiple instructions and as such unlikely to be merged into
   /// the address indexing mode.
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
+  int getAddressComputationCost(Type *Ty, bool IsComplex = false) const;
 
   /// \returns The cost, if any, of keeping values of the given types alive
   /// over a callsite.
@@ -521,8 +541,8 @@ public:
 
   /// \returns True if the two functions have compatible attributes for inlining
   /// purposes.
-  bool hasCompatibleFunctionAttributes(const Function *Caller,
-                                       const Function *Callee) const;
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const;
 
   /// @}
 
@@ -542,18 +562,18 @@ class TargetTransformInfo::Concept {
 public:
   virtual ~Concept() = 0;
   virtual const DataLayout &getDataLayout() const = 0;
-  virtual unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
-  virtual unsigned getGEPCost(const Value *Ptr,
-                              ArrayRef<const Value *> Operands) = 0;
-  virtual unsigned getCallCost(FunctionType *FTy, int NumArgs) = 0;
-  virtual unsigned getCallCost(const Function *F, int NumArgs) = 0;
-  virtual unsigned getCallCost(const Function *F,
+  virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
+  virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
+                         ArrayRef<const Value *> Operands) = 0;
+  virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
+  virtual int getCallCost(const Function *F, int NumArgs) = 0;
+  virtual int getCallCost(const Function *F,
+                          ArrayRef<const Value *> Arguments) = 0;
+  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                               ArrayRef<Type *> ParamTys) = 0;
+  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
                                ArrayRef<const Value *> Arguments) = 0;
-  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                                    ArrayRef<Type *> ParamTys) = 0;
-  virtual unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                                    ArrayRef<const Value *> Arguments) = 0;
-  virtual unsigned getUserCost(const User *U) = 0;
+  virtual int getUserCost(const User *U) = 0;
   virtual bool hasBranchDivergence() = 0;
   virtual bool isSourceOfDivergence(const Value *V) = 0;
   virtual bool isLoweredToCall(const Function *F) = 0;
@@ -564,8 +584,10 @@ public:
                                      int64_t BaseOffset, bool HasBaseReg,
                                      int64_t Scale,
                                      unsigned AddrSpace) = 0;
-  virtual bool isLegalMaskedStore(Type *DataType, int Consecutive) = 0;
-  virtual bool isLegalMaskedLoad(Type *DataType, int Consecutive) = 0;
+  virtual bool isLegalMaskedStore(Type *DataType) = 0;
+  virtual bool isLegalMaskedLoad(Type *DataType) = 0;
+  virtual bool isLegalMaskedScatter(Type *DataType) = 0;
+  virtual bool isLegalMaskedGather(Type *DataType) = 0;
   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                    int64_t BaseOffset, bool HasBaseReg,
                                    int64_t Scale, unsigned AddrSpace) = 0;
@@ -576,14 +598,15 @@ public:
   virtual unsigned getJumpBufSize() = 0;
   virtual bool shouldBuildLookupTables() = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
+  virtual bool enableInterleavedAccessVectorization() = 0;
   virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
   virtual bool haveFastSqrt(Type *Ty) = 0;
-  virtual unsigned getFPOpCost(Type *Ty) = 0;
-  virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
-  virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
-                                 Type *Ty) = 0;
-  virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                 const APInt &Imm, Type *Ty) = 0;
+  virtual int getFPOpCost(Type *Ty) = 0;
+  virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
+  virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                            Type *Ty) = 0;
+  virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                            Type *Ty) = 0;
   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
   virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
@@ -592,40 +615,44 @@ public:
                          OperandValueKind Opd2Info,
                          OperandValueProperties Opd1PropInfo,
                          OperandValueProperties Opd2PropInfo) = 0;
-  virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                                  Type *SubTp) = 0;
-  virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
-  virtual unsigned getCFInstrCost(unsigned Opcode) = 0;
-  virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                      Type *CondTy) = 0;
-  virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                                      unsigned Index) = 0;
-  virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
-                                   unsigned Alignment,
-                                   unsigned AddressSpace) = 0;
-  virtual unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+  virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                             Type *SubTp) = 0;
+  virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) = 0;
+  virtual int getCFInstrCost(unsigned Opcode) = 0;
+  virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                 Type *CondTy) = 0;
+  virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
+                                 unsigned Index) = 0;
+  virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                              unsigned AddressSpace) = 0;
+  virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                    unsigned Alignment,
+                                    unsigned AddressSpace) = 0;
+  virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+                                     Value *Ptr, bool VariableMask,
+                                     unsigned Alignment) = 0;
+  virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                         unsigned Factor,
+                                         ArrayRef<unsigned> Indices,
                                          unsigned Alignment,
                                          unsigned AddressSpace) = 0;
-  virtual unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                              unsigned Factor,
-                                              ArrayRef<unsigned> Indices,
-                                              unsigned Alignment,
-                                              unsigned AddressSpace) = 0;
-  virtual unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                                    bool IsPairwiseForm) = 0;
-  virtual unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                         ArrayRef<Type *> Tys) = 0;
-  virtual unsigned getCallInstrCost(Function *F, Type *RetTy,
+  virtual int getReductionCost(unsigned Opcode, Type *Ty,
+                               bool IsPairwiseForm) = 0;
+  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                     ArrayRef<Type *> Tys) = 0;
+  virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                    ArrayRef<Value *> Args) = 0;
+  virtual int getCallInstrCost(Function *F, Type *RetTy,
+                               ArrayRef<Type *> Tys) = 0;
   virtual unsigned getNumberOfParts(Type *Tp) = 0;
-  virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
+  virtual int getAddressComputationCost(Type *Ty, bool IsComplex) = 0;
   virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
   virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
                                   MemIntrinsicInfo &Info) = 0;
   virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
                                                    Type *ExpectedType) = 0;
-  virtual bool hasCompatibleFunctionAttributes(const Function *Caller,
-                                               const Function *Callee) const = 0;
+  virtual bool areInlineCompatible(const Function *Caller,
+                                   const Function *Callee) const = 0;
 };
 
 template <typename T>
@@ -640,32 +667,32 @@ public:
     return Impl.getDataLayout();
   }
 
-  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
+  int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
     return Impl.getOperationCost(Opcode, Ty, OpTy);
   }
-  unsigned getGEPCost(const Value *Ptr,
-                      ArrayRef<const Value *> Operands) override {
-    return Impl.getGEPCost(Ptr, Operands);
+  int getGEPCost(Type *PointeeType, const Value *Ptr,
+                 ArrayRef<const Value *> Operands) override {
+    return Impl.getGEPCost(PointeeType, Ptr, Operands);
   }
-  unsigned getCallCost(FunctionType *FTy, int NumArgs) override {
+  int getCallCost(FunctionType *FTy, int NumArgs) override {
     return Impl.getCallCost(FTy, NumArgs);
   }
-  unsigned getCallCost(const Function *F, int NumArgs) override {
+  int getCallCost(const Function *F, int NumArgs) override {
     return Impl.getCallCost(F, NumArgs);
   }
-  unsigned getCallCost(const Function *F,
-                       ArrayRef<const Value *> Arguments) override {
+  int getCallCost(const Function *F,
+                  ArrayRef<const Value *> Arguments) override {
     return Impl.getCallCost(F, Arguments);
   }
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys) override {
+  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                       ArrayRef<Type *> ParamTys) override {
     return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
   }
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments) override {
+  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+                       ArrayRef<const Value *> Arguments) override {
     return Impl.getIntrinsicCost(IID, RetTy, Arguments);
   }
-  unsigned getUserCost(const User *U) override { return Impl.getUserCost(U); }
+  int getUserCost(const User *U) override { return Impl.getUserCost(U); }
   bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
   bool isSourceOfDivergence(const Value *V) override {
     return Impl.isSourceOfDivergence(V);
@@ -688,11 +715,17 @@ public:
     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
                                       Scale, AddrSpace);
   }
-  bool isLegalMaskedStore(Type *DataType, int Consecutive) override {
-    return Impl.isLegalMaskedStore(DataType, Consecutive);
+  bool isLegalMaskedStore(Type *DataType) override {
+    return Impl.isLegalMaskedStore(DataType);
   }
-  bool isLegalMaskedLoad(Type *DataType, int Consecutive) override {
-    return Impl.isLegalMaskedLoad(DataType, Consecutive);
+  bool isLegalMaskedLoad(Type *DataType) override {
+    return Impl.isLegalMaskedLoad(DataType);
+  }
+  bool isLegalMaskedScatter(Type *DataType) override {
+    return Impl.isLegalMaskedScatter(DataType);
+  }
+  bool isLegalMaskedGather(Type *DataType) override {
+    return Impl.isLegalMaskedGather(DataType);
   }
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                            bool HasBaseReg, int64_t Scale,
@@ -715,24 +748,25 @@ public:
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }
+  bool enableInterleavedAccessVectorization() override {
+    return Impl.enableInterleavedAccessVectorization();
+  }
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
     return Impl.getPopcntSupport(IntTyWidthInBit);
   }
   bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
 
-  unsigned getFPOpCost(Type *Ty) override {
-    return Impl.getFPOpCost(Ty);
-  }
+  int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
 
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
+  int getIntImmCost(const APInt &Imm, Type *Ty) override {
     return Impl.getIntImmCost(Imm, Ty);
   }
-  unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
-                         Type *Ty) override {
+  int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
+                    Type *Ty) override {
     return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
   }
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty) override {
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty) override {
     return Impl.getIntImmCost(IID, Idx, Imm, Ty);
   }
   unsigned getNumberOfRegisters(bool Vector) override {
@@ -752,56 +786,62 @@ public:
     return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
                                        Opd1PropInfo, Opd2PropInfo);
   }
-  unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
-                          Type *SubTp) override {
+  int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+                     Type *SubTp) override {
     return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
   }
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) override {
     return Impl.getCastInstrCost(Opcode, Dst, Src);
   }
-  unsigned getCFInstrCost(unsigned Opcode) override {
+  int getCFInstrCost(unsigned Opcode) override {
     return Impl.getCFInstrCost(Opcode);
   }
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                              Type *CondTy) override {
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) override {
     return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy);
   }
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
-                              unsigned Index) override {
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
     return Impl.getVectorInstrCost(Opcode, Val, Index);
   }
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace) override {
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace) override {
     return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
   }
-  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace) override {
+  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                            unsigned AddressSpace) override {
     return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
   }
-  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                      unsigned Factor,
-                                      ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace) override {
+  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+                             Value *Ptr, bool VariableMask,
+                             unsigned Alignment) override {
+    return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                       Alignment);
+  }
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+                                 ArrayRef<unsigned> Indices, unsigned Alignment,
+                                 unsigned AddressSpace) override {
     return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                            Alignment, AddressSpace);
   }
-  unsigned getReductionCost(unsigned Opcode, Type *Ty,
-                            bool IsPairwiseForm) override {
+  int getReductionCost(unsigned Opcode, Type *Ty,
+                       bool IsPairwiseForm) override {
     return Impl.getReductionCost(Opcode, Ty, IsPairwiseForm);
   }
-  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                 ArrayRef<Type *> Tys) override {
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Type *> Tys) override {
     return Impl.getIntrinsicInstrCost(ID, RetTy, Tys);
   }
-  unsigned getCallInstrCost(Function *F, Type *RetTy,
-                            ArrayRef<Type *> Tys) override {
+  int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                            ArrayRef<Value *> Args) override {
+    return Impl.getIntrinsicInstrCost(ID, RetTy, Args);
+  }
+  int getCallInstrCost(Function *F, Type *RetTy,
+                       ArrayRef<Type *> Tys) override {
     return Impl.getCallInstrCost(F, RetTy, Tys);
   }
   unsigned getNumberOfParts(Type *Tp) override {
     return Impl.getNumberOfParts(Tp);
   }
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) override {
+  int getAddressComputationCost(Type *Ty, bool IsComplex) override {
     return Impl.getAddressComputationCost(Ty, IsComplex);
   }
   unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
@@ -815,9 +855,9 @@ public:
                                            Type *ExpectedType) override {
     return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
   }
-  bool hasCompatibleFunctionAttributes(const Function *Caller,
-                                       const Function *Callee) const override {
-    return Impl.hasCompatibleFunctionAttributes(Caller, Callee);
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const override {
+    return Impl.areInlineCompatible(Caller, Callee);
   }
 };
 
@@ -856,7 +896,7 @@ public:
   ///
   /// The callback will be called with a particular function for which the TTI
   /// is needed and must return a TTI object for that function.
-  TargetIRAnalysis(std::function<Result(Function &)> TTICallback);
+  TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
 
   // Value semantics. We spell out the constructors for MSVC.
   TargetIRAnalysis(const TargetIRAnalysis &Arg)
@@ -872,7 +912,7 @@ public:
     return *this;
   }
 
-  Result run(Function &F);
+  Result run(const Function &F);
 
 private:
   static char PassID;
@@ -887,10 +927,10 @@ private:
   /// the analysis and thus use a function_ref which would be lighter weight.
   /// This may also be less error prone as the callback is likely to reference
   /// the external TargetMachine, and that reference needs to never dangle.
-  std::function<Result(Function &)> TTICallback;
+  std::function<Result(const Function &)> TTICallback;
 
   /// \brief Helper function used as the callback in the default constructor.
-  static Result getDefaultTTI(Function &F);
+  static Result getDefaultTTI(const Function &F);
 };
 
 /// \brief Wrapper pass for TargetTransformInfo.
@@ -914,7 +954,7 @@ public:
 
   explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
 
-  TargetTransformInfo &getTTI(Function &F);
+  TargetTransformInfo &getTTI(const Function &F);
 };
 
 /// \brief Create an analysis pass wrapper around a TTI object.
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h
index 035cb04870a1..43815234051e 100644
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -19,8 +19,10 @@
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Analysis/VectorUtils.h"
 
 namespace llvm {
 
@@ -60,6 +62,14 @@ public:
       // Otherwise, the default basic cost is used.
       return TTI::TCC_Basic;
 
+    case Instruction::FDiv:
+    case Instruction::FRem:
+    case Instruction::SDiv:
+    case Instruction::SRem:
+    case Instruction::UDiv:
+    case Instruction::URem:
+      return TTI::TCC_Expensive;
+
     case Instruction::IntToPtr: {
       // An inttoptr cast is free so long as the input is a legal integer type
       // which doesn't contain values outside the range of a pointer.
@@ -92,7 +102,8 @@ public:
     }
   }
 
-  unsigned getGEPCost(const Value *Ptr, ArrayRef<const Value *> Operands) {
+  unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+                      ArrayRef<const Value *> Operands) {
     // In the basic model, we just assume that all-constant GEPs will be folded
     // into their uses via addressing modes.
     for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
@@ -137,9 +148,6 @@ public:
     case Intrinsic::objectsize:
     case Intrinsic::ptr_annotation:
     case Intrinsic::var_annotation:
-    case Intrinsic::experimental_gc_result_int:
-    case Intrinsic::experimental_gc_result_float:
-    case Intrinsic::experimental_gc_result_ptr:
     case Intrinsic::experimental_gc_result:
     case Intrinsic::experimental_gc_relocate:
       // These intrinsics don't actually represent code after lowering.
@@ -199,9 +207,13 @@ public:
     return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
   }
 
-  bool isLegalMaskedStore(Type *DataType, int Consecutive) { return false; }
+  bool isLegalMaskedStore(Type *DataType) { return false; }
 
-  bool isLegalMaskedLoad(Type *DataType, int Consecutive) { return false; }
+  bool isLegalMaskedLoad(Type *DataType) { return false; }
+
+  bool isLegalMaskedScatter(Type *DataType) { return false; }
+
+  bool isLegalMaskedGather(Type *DataType) { return false; }
 
   int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                            bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
@@ -226,6 +238,8 @@ public:
 
   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
 
+  bool enableInterleavedAccessVectorization() { return false; }
+
   TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
     return TTI::PSK_Software;
   }
@@ -287,6 +301,12 @@ public:
     return 1;
   }
 
+  unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                                  bool VariableMask,
+                                  unsigned Alignment) {
+    return 1;
+  }
+
   unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                       unsigned Factor,
                                       ArrayRef<unsigned> Indices,
@@ -299,6 +319,10 @@ public:
                                  ArrayRef<Type *> Tys) {
     return 1;
   }
+  unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                 ArrayRef<Value *> Args) {
+    return 1;
+  }
 
   unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
     return 1;
@@ -321,8 +345,8 @@ public:
     return nullptr;
   }
 
-  bool hasCompatibleFunctionAttributes(const Function *Caller,
-                                       const Function *Callee) const {
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const {
     return (Caller->getFnAttribute("target-cpu") ==
             Callee->getFnAttribute("target-cpu")) &&
            (Caller->getFnAttribute("target-features") ==
@@ -378,6 +402,61 @@ public:
     return static_cast<T *>(this)->getCallCost(F, Arguments.size());
   }
 
+  using BaseT::getGEPCost;
+
+  unsigned getGEPCost(Type *PointeeType, const Value *Ptr,
+                      ArrayRef<const Value *> Operands) {
+    const GlobalValue *BaseGV = nullptr;
+    if (Ptr != nullptr) {
+      // TODO: will remove this when pointers have an opaque type.
+      assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+                 PointeeType &&
+             "explicit pointee type doesn't match operand's pointee type");
+      BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
+    }
+    bool HasBaseReg = (BaseGV == nullptr);
+    int64_t BaseOffset = 0;
+    int64_t Scale = 0;
+
+    // Assumes the address space is 0 when Ptr is nullptr.
+    unsigned AS =
+        (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
+    auto GTI = gep_type_begin(PointerType::get(PointeeType, AS), Operands);
+    for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
+      // We assume that the cost of Scalar GEP with constant index and the
+      // cost of Vector GEP with splat constant index are the same.
+      const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
+      if (!ConstIdx)
+        if (auto Splat = getSplatValue(*I))
+          ConstIdx = dyn_cast<ConstantInt>(Splat);
+      if (isa<SequentialType>(*GTI)) {
+        int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
+        if (ConstIdx)
+          BaseOffset += ConstIdx->getSExtValue() * ElementSize;
+        else {
+          // Needs scale register.
+          if (Scale != 0)
+            // No addressing mode takes two scale registers.
+            return TTI::TCC_Basic;
+          Scale = ElementSize;
+        }
+      } else {
+        StructType *STy = cast<StructType>(*GTI);
+        // For structures the index is always splat or scalar constant
+        assert(ConstIdx && "Unexpected GEP index");
+        uint64_t Field = ConstIdx->getZExtValue();
+        BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
+      }
+    }
+
+    if (static_cast<T *>(this)->isLegalAddressingMode(
+            PointerType::get(*GTI, AS), const_cast<GlobalValue *>(BaseGV),
+            BaseOffset, HasBaseReg, Scale, AS)) {
+      return TTI::TCC_Free;
+    }
+    return TTI::TCC_Basic;
+  }
+
   using BaseT::getIntrinsicCost;
 
   unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
@@ -397,9 +476,9 @@ public:
       return TTI::TCC_Free; // Model all PHI nodes as free.
 
     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
-      SmallVector<const Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
-      return static_cast<T *>(this)
-          ->getGEPCost(GEP->getPointerOperand(), Indices);
+      SmallVector<Value *, 4> Indices(GEP->idx_begin(), GEP->idx_end());
+      return static_cast<T *>(this)->getGEPCost(
+          GEP->getSourceElementType(), GEP->getPointerOperand(), Indices);
     }
 
     if (auto CS = ImmutableCallSite(U)) {
diff --git a/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
new file mode 100644
index 000000000000..7b44ac73f1fa
--- /dev/null
+++ b/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -0,0 +1,93 @@
+//===- TypeBasedAliasAnalysis.h - Type-Based Alias Analysis -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the interface for a metadata-based TBAA. See the source file for
+/// details on the algorithm.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
+#define LLVM_ANALYSIS_TYPEBASEDALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+/// A simple AA result that uses TBAA metadata to answer queries.
+class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> {
+  friend AAResultBase<TypeBasedAAResult>;
+
+public:
+  explicit TypeBasedAAResult(const TargetLibraryInfo &TLI)
+      : AAResultBase(TLI) {}
+  TypeBasedAAResult(TypeBasedAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+  /// Handle invalidation events from the new pass manager.
+  ///
+  /// By definition, this result is stateless and so remains valid.
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+  FunctionModRefBehavior getModRefBehavior(const Function *F);
+  ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
+  ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2);
+
+private:
+  bool Aliases(const MDNode *A, const MDNode *B) const;
+  bool PathAliases(const MDNode *A, const MDNode *B) const;
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class TypeBasedAA {
+public:
+  typedef TypeBasedAAResult Result;
+
+  /// \brief Opaque, unique identifier for this analysis pass.
+  static void *ID() { return (void *)&PassID; }
+
+  TypeBasedAAResult run(Function &F, AnalysisManager<Function> *AM);
+
+  /// \brief Provide access to a name for this pass for debugging purposes.
+  static StringRef name() { return "TypeBasedAA"; }
+
+private:
+  static char PassID;
+};
+
+/// Legacy wrapper pass to provide the TypeBasedAAResult object.
+class TypeBasedAAWrapperPass : public ImmutablePass {
+  std::unique_ptr<TypeBasedAAResult> Result;
+
+public:
+  static char ID;
+
+  TypeBasedAAWrapperPass();
+
+  TypeBasedAAResult &getResult() { return *Result; }
+  const TypeBasedAAResult &getResult() const { return *Result; }
+
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+//===--------------------------------------------------------------------===//
+//
+// createTypeBasedAAWrapperPass - This pass implements metadata-based
+// type-based alias analysis.
+//
+ImmutablePass *createTypeBasedAAWrapperPass();
+}
+
+#endif
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 653821d02271..8e0291068472 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -16,20 +16,23 @@
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-  class Value;
-  class Instruction;
   class APInt;
-  class DataLayout;
-  class StringRef;
-  class MDNode;
+  class AddOperator;
   class AssumptionCache;
+  class DataLayout;
   class DominatorTree;
-  class TargetLibraryInfo;
+  class Instruction;
+  class Loop;
   class LoopInfo;
+  class MDNode;
+  class StringRef;
+  class TargetLibraryInfo;
+  class Value;
 
   /// Determine which bits of V are known to be either zero or one and return
   /// them in the KnownZero/KnownOne bit sets.
@@ -46,9 +49,10 @@ namespace llvm {
                         const DominatorTree *DT = nullptr);
   /// Compute known bits from the range metadata.
   /// \p KnownZero the set of bits that are known to be zero
+  /// \p KnownOne the set of bits that are known to be one
   void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                         APInt &KnownZero);
-  /// Returns true if LHS and RHS have no common bits set.
+                                         APInt &KnownZero, APInt &KnownOne);
+  /// Return true if LHS and RHS have no common bits set.
   bool haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
                            AssumptionCache *AC = nullptr,
                            const Instruction *CxtI = nullptr,
@@ -66,7 +70,7 @@ namespace llvm {
   /// exactly one bit set when defined. For vectors return true if every
   /// element is known to be a power of two when defined.  Supports values with
   /// integer or pointer type and vectors of integers.  If 'OrZero' is set then
-  /// returns true if the given value is either a power of two or zero.
+  /// return true if the given value is either a power of two or zero.
   bool isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL,
                               bool OrZero = false, unsigned Depth = 0,
                               AssumptionCache *AC = nullptr,
@@ -82,6 +86,19 @@ namespace llvm {
                       const Instruction *CxtI = nullptr,
                       const DominatorTree *DT = nullptr);
 
+  /// Returns true if the give value is known to be non-negative.
+  bool isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth = 0,
+                          AssumptionCache *AC = nullptr,
+                          const Instruction *CxtI = nullptr,
+                          const DominatorTree *DT = nullptr);
+
+  /// isKnownNonEqual - Return true if the given values are known to be
+  /// non-equal when defined. Supports scalar integer types only.
+  bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                      AssumptionCache *AC = nullptr,
+                      const Instruction *CxtI = nullptr,
+                      const DominatorTree *DT = nullptr);
+
   /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
   /// this predicate to simplify operations downstream.  Mask is known to be
   /// zero for bits that V cannot have.
@@ -118,12 +135,12 @@ namespace llvm {
                        bool LookThroughSExt = false,
                        unsigned Depth = 0);
 
-  /// CannotBeNegativeZero - Return true if we can prove that the specified FP 
+  /// CannotBeNegativeZero - Return true if we can prove that the specified FP
   /// value is never equal to -0.0.
   ///
   bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
 
-  /// CannotBeOrderedLessThanZero - Return true if we can prove that the 
+  /// CannotBeOrderedLessThanZero - Return true if we can prove that the
   /// specified FP value is either a NaN or never less than 0.0.
   ///
   bool CannotBeOrderedLessThanZero(const Value *V, unsigned Depth = 0);
@@ -134,7 +151,7 @@ namespace llvm {
   /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
   /// byte store (e.g. i16 0x1234), return null.
   Value *isBytewiseValue(Value *V);
-    
+
   /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
   /// the scalar value indexed is already around as a register, for example if
   /// it were inserted directly into the aggregrate.
@@ -156,7 +173,7 @@ namespace llvm {
     return GetPointerBaseWithConstantOffset(const_cast<Value *>(Ptr), Offset,
                                             DL);
   }
-  
+
   /// getConstantStringInfo - This function computes the length of a
   /// null-terminated C string pointed to by V.  If successful, it returns true
   /// and returns the string in Str.  If unsuccessful, it returns false.  This
@@ -227,7 +244,17 @@ namespace llvm {
                                 const Instruction *CtxI = nullptr,
                                 const DominatorTree *DT = nullptr,
                                 const TargetLibraryInfo *TLI = nullptr);
-  
+
+  /// Returns true if V is always a dereferenceable pointer with alignment
+  /// greater or equal than requested. If the context instruction is specified
+  /// performs context-sensitive analysis and returns true if the pointer is
+  /// dereferenceable at the specified instruction.
+  bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+                                          const DataLayout &DL,
+                                          const Instruction *CtxI = nullptr,
+                                          const DominatorTree *DT = nullptr,
+                                          const TargetLibraryInfo *TLI = nullptr);
+
   /// isSafeToSpeculativelyExecute - Return true if the instruction does not
   /// have any effects besides calculating the result and does not have
   /// undefined behavior.
@@ -257,6 +284,16 @@ namespace llvm {
                                     const DominatorTree *DT = nullptr,
                                     const TargetLibraryInfo *TLI = nullptr);
 
+  /// Returns true if the result or effects of the given instructions \p I
+  /// depend on or influence global memory.
+  /// Memory dependence arises for example if the instruction reads from
+  /// memory or may produce effects or undefined behaviour. Memory dependent
+  /// instructions generally cannot be reorderd with respect to other memory
+  /// dependent instructions or moved into non-dominated basic blocks.
+  /// Instructions which just compute a value based on the values of their
+  /// operands are not memory dependent.
+  bool mayBeMemoryDependent(const Instruction &I);
+
   /// isKnownNonNull - Return true if this pointer couldn't possibly be null by
   /// its definition.  This returns true for allocas, non-extern-weak globals
   /// and byval arguments.
@@ -288,16 +325,98 @@ namespace llvm {
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
                                                const DominatorTree *DT);
-  
+  OverflowResult computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+                                             const DataLayout &DL,
+                                             AssumptionCache *AC = nullptr,
+                                             const Instruction *CxtI = nullptr,
+                                             const DominatorTree *DT = nullptr);
+  /// This version also leverages the sign bit of Add if known.
+  OverflowResult computeOverflowForSignedAdd(AddOperator *Add,
+                                             const DataLayout &DL,
+                                             AssumptionCache *AC = nullptr,
+                                             const Instruction *CxtI = nullptr,
+                                             const DominatorTree *DT = nullptr);
+
+  /// Return true if this function can prove that the instruction I will
+  /// always transfer execution to one of its successors (including the next
+  /// instruction that follows within a basic block). E.g. this is not
+  /// guaranteed for function calls that could loop infinitely.
+  ///
+  /// In other words, this function returns false for instructions that may
+  /// transfer execution or fail to transfer execution in a way that is not
+  /// captured in the CFG nor in the sequence of instructions within a basic
+  /// block.
+  ///
+  /// Undefined behavior is assumed not to happen, so e.g. division is
+  /// guaranteed to transfer execution to the following instruction even
+  /// though division by zero might cause undefined behavior.
+  bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I);
+
+  /// Return true if this function can prove that the instruction I
+  /// is executed for every iteration of the loop L.
+  ///
+  /// Note that this currently only considers the loop header.
+  bool isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+                                              const Loop *L);
+
+  /// Return true if this function can prove that I is guaranteed to yield
+  /// full-poison (all bits poison) if at least one of its operands are
+  /// full-poison (all bits poison).
+  ///
+  /// The exact rules for how poison propagates through instructions have
+  /// not been settled as of 2015-07-10, so this function is conservative
+  /// and only considers poison to be propagated in uncontroversial
+  /// cases. There is no attempt to track values that may be only partially
+  /// poison.
+  bool propagatesFullPoison(const Instruction *I);
+
+  /// Return either nullptr or an operand of I such that I will trigger
+  /// undefined behavior if I is executed and that operand has a full-poison
+  /// value (all bits poison).
+  const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
+
+  /// Return true if this function can prove that if PoisonI is executed
+  /// and yields a full-poison value (all bits poison), then that will
+  /// trigger undefined behavior.
+  ///
+  /// Note that this currently only considers the basic block that is
+  /// the parent of I.
+  bool isKnownNotFullPoison(const Instruction *PoisonI);
+
   /// \brief Specific patterns of select instructions we can match.
   enum SelectPatternFlavor {
     SPF_UNKNOWN = 0,
-    SPF_SMIN,                   // Signed minimum
-    SPF_UMIN,                   // Unsigned minimum
-    SPF_SMAX,                   // Signed maximum
-    SPF_UMAX,                   // Unsigned maximum
-    SPF_ABS,                    // Absolute value
-    SPF_NABS                    // Negated absolute value
+    SPF_SMIN,                   /// Signed minimum
+    SPF_UMIN,                   /// Unsigned minimum
+    SPF_SMAX,                   /// Signed maximum
+    SPF_UMAX,                   /// Unsigned maximum
+    SPF_FMINNUM,                /// Floating point minnum
+    SPF_FMAXNUM,                /// Floating point maxnum
+    SPF_ABS,                    /// Absolute value
+    SPF_NABS                    /// Negated absolute value
+  };
+  /// \brief Behavior when a floating point min/max is given one NaN and one
+  /// non-NaN as input.
+  enum SelectPatternNaNBehavior {
+    SPNB_NA = 0,                /// NaN behavior not applicable.
+    SPNB_RETURNS_NAN,           /// Given one NaN input, returns the NaN.
+    SPNB_RETURNS_OTHER,         /// Given one NaN input, returns the non-NaN.
+    SPNB_RETURNS_ANY            /// Given one NaN input, can return either (or
+                                /// it has been determined that no operands can
+                                /// be NaN).
+  };
+  struct SelectPatternResult {
+    SelectPatternFlavor Flavor;
+    SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is
+                                          /// SPF_FMINNUM or SPF_FMAXNUM.
+    bool Ordered;               /// When implementing this min/max pattern as
+                                /// fcmp; select, does the fcmp have to be
+                                /// ordered?
+
+    /// \brief Return true if \p SPF is a min or a max pattern.
+    static bool isMinOrMax(SelectPatternFlavor SPF) {
+      return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS);
+    }
   };
   /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind
   /// and providing the out parameter results if we successfully match.
@@ -314,9 +433,26 @@ namespace llvm {
   ///
   /// -> LHS = %a, RHS = i32 4, *CastOp = Instruction::SExt
   ///
-  SelectPatternFlavor matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
+  SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                                          Instruction::CastOps *CastOp = nullptr);
 
+  /// Parse out a conservative ConstantRange from !range metadata.
+  ///
+  /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20).
+  ConstantRange getConstantRangeFromMetadata(MDNode &RangeMD);
+
+  /// Return true if RHS is known to be implied by LHS.  A & B must be i1
+  /// (boolean) values or a vector of such values. Note that the truth table for
+  /// implication is the same as <=u on i1 values (but not <=s!).  The truth
+  /// table for both is:
+  ///    | T | F (B)
+  ///  T | T | F
+  ///  F | T | T
+  /// (A)
+  bool isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+                          unsigned Depth = 0, AssumptionCache *AC = nullptr,
+                          const Instruction *CxtI = nullptr,
+                          const DominatorTree *DT = nullptr);
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/VectorUtils.h b/include/llvm/Analysis/VectorUtils.h
index d8e9ca42e623..531803adf5e4 100644
--- a/include/llvm/Analysis/VectorUtils.h
+++ b/include/llvm/Analysis/VectorUtils.h
@@ -14,15 +14,19 @@
 #ifndef LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
 #define LLVM_TRANSFORMS_UTILS_VECTORUTILS_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 
 namespace llvm {
 
+struct DemandedBits;
 class GetElementPtrInst;
 class Loop;
 class ScalarEvolution;
+class TargetTransformInfo;
 class Type;
 class Value;
 
@@ -62,8 +66,8 @@ Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI);
 /// pointer.
 unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
 
-/// \brief If the argument is a GEP, then returns the operand identified by 
-/// getGEPInductionOperand. However, if there is some other non-loop-invariant 
+/// \brief If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
 /// operand, it returns that instead.
 Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
 
@@ -79,6 +83,50 @@ Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
 /// from the vector.
 Value *findScalarElement(Value *V, unsigned EltNo);
 
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// The value may be extracted from a splat constants vector or from
+/// a sequence of instructions that broadcast a single value into a vector.
+const Value *getSplatValue(const Value *V);
+
+/// \brief Compute a map of integer instructions to their minimum legal type
+/// size.
+///
+/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int
+/// type (e.g. i32) whenever arithmetic is performed on them.
+///
+/// For targets with native i8 or i16 operations, usually InstCombine can shrink
+/// the arithmetic type down again. However InstCombine refuses to create
+/// illegal types, so for targets without i8 or i16 registers, the lengthening
+/// and shrinking remains.
+///
+/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when
+/// their scalar equivalents do not, so during vectorization it is important to
+/// remove these lengthens and truncates when deciding the profitability of
+/// vectorization.
+///
+/// This function analyzes the given range of instructions and determines the
+/// minimum type size each can be converted to. It attempts to remove or
+/// minimize type size changes across each def-use chain, so for example in the
+/// following code:
+///
+///   %1 = load i8, i8*
+///   %2 = add i8 %1, 2
+///   %3 = load i16, i16*
+///   %4 = zext i8 %2 to i32
+///   %5 = zext i16 %3 to i32
+///   %6 = add i32 %4, %5
+///   %7 = trunc i32 %6 to i16
+///
+/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes
+/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.
+///
+/// If the optional TargetTransformInfo is provided, this function tries harder
+/// to do less work by only looking at illegal types.
+MapVector<Instruction*, uint64_t>
+computeMinimumValueSizes(ArrayRef<BasicBlock*> Blocks,
+                         DemandedBits &DB,
+                         const TargetTransformInfo *TTI=nullptr);
+    
 } // llvm namespace
 
 #endif
diff --git a/include/llvm/AsmParser/Parser.h b/include/llvm/AsmParser/Parser.h
index 52151409f946..96a15c1ec45c 100644
--- a/include/llvm/AsmParser/Parser.h
+++ b/include/llvm/AsmParser/Parser.h
@@ -18,6 +18,7 @@
 
 namespace llvm {
 
+class Constant;
 class LLVMContext;
 class Module;
 struct SlotMapping;
@@ -79,6 +80,17 @@ std::unique_ptr<Module> parseAssembly(MemoryBufferRef F, SMDiagnostic &Err,
 bool parseAssemblyInto(MemoryBufferRef F, Module &M, SMDiagnostic &Err,
                        SlotMapping *Slots = nullptr);
 
+/// Parse a type and a constant value in the given string.
+///
+/// The constant value can be any LLVM constant, including a constant
+/// expression.
+///
+/// \param Slots The optional slot mapping that will restore the parsing state
+/// of the module.
+/// \return null on error.
+Constant *parseConstantValue(StringRef Asm, SMDiagnostic &Err, const Module &M,
+                             const SlotMapping *Slots = nullptr);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/AsmParser/SlotMapping.h b/include/llvm/AsmParser/SlotMapping.h
index c5f61d25c3a8..bd7e8fcad8bc 100644
--- a/include/llvm/AsmParser/SlotMapping.h
+++ b/include/llvm/AsmParser/SlotMapping.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_ASMPARSER_SLOTMAPPING_H
 #define LLVM_ASMPARSER_SLOTMAPPING_H
 
+#include "llvm/ADT/StringMap.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include <map>
 #include <vector>
@@ -21,12 +22,19 @@
 namespace llvm {
 
 class GlobalValue;
+class Type;
 
-/// This struct contains the mapping from the slot numbers to unnamed metadata
-/// nodes and global values.
+/// This struct contains the mappings from the slot numbers to unnamed metadata
+/// nodes, global values and types. It also contains the mapping for the named
+/// types.
+/// It can be used to save the parsing state of an LLVM IR module so that the
+/// textual references to the values in the module can be parsed outside of the
+/// module's source.
 struct SlotMapping {
   std::vector<GlobalValue *> GlobalValues;
   std::map<unsigned, TrackingMDNodeRef> MetadataNodes;
+  StringMap<Type *> NamedTypes;
+  std::map<unsigned, Type *> Types;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/Bitcode/BitcodeWriterPass.h b/include/llvm/Bitcode/BitcodeWriterPass.h
index ae915c688ba0..a1272cf156e5 100644
--- a/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -29,8 +29,12 @@ class PreservedAnalyses;
 ///
 /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
 /// reproduced when deserialized.
+///
+/// If \c EmitFunctionSummary, emit the function summary index (currently
+/// for use in ThinLTO optimization).
 ModulePass *createBitcodeWriterPass(raw_ostream &Str,
-                                    bool ShouldPreserveUseListOrder = false);
+                                    bool ShouldPreserveUseListOrder = false,
+                                    bool EmitFunctionSummary = false);
 
 /// \brief Pass for writing a module of IR out to a bitcode file.
 ///
@@ -39,15 +43,21 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str,
 class BitcodeWriterPass {
   raw_ostream &OS;
   bool ShouldPreserveUseListOrder;
+  bool EmitFunctionSummary;
 
 public:
   /// \brief Construct a bitcode writer pass around a particular output stream.
   ///
   /// If \c ShouldPreserveUseListOrder, encode use-list order so it can be
   /// reproduced when deserialized.
+  ///
+  /// If \c EmitFunctionSummary, emit the function summary index (currently
+  /// for use in ThinLTO optimization).
   explicit BitcodeWriterPass(raw_ostream &OS,
-                             bool ShouldPreserveUseListOrder = false)
-      : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
+                             bool ShouldPreserveUseListOrder = false,
+                             bool EmitFunctionSummary = false)
+      : OS(OS), ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+        EmitFunctionSummary(EmitFunctionSummary) {}
 
   /// \brief Run the bitcode writer pass, and output the module to the selected
   /// output stream.
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 4c040a7f3e22..c0cf6cde887f 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -325,6 +325,8 @@ public:
 
     // If we run out of data, stop at the end of the stream.
     if (BytesRead == 0) {
+      CurWord = 0;
+      BitsInCurWord = 0;
       Size = NextChar;
       return;
     }
diff --git a/include/llvm/Bitcode/BitstreamWriter.h b/include/llvm/Bitcode/BitstreamWriter.h
index 9f23023a1419..438f4a6fb69b 100644
--- a/include/llvm/Bitcode/BitstreamWriter.h
+++ b/include/llvm/Bitcode/BitstreamWriter.h
@@ -15,6 +15,8 @@
 #ifndef LLVM_BITCODE_BITSTREAMWRITER_H
 #define LLVM_BITCODE_BITSTREAMWRITER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitCodes.h"
@@ -45,9 +47,9 @@ class BitstreamWriter {
 
   struct Block {
     unsigned PrevCodeSize;
-    unsigned StartSizeWord;
+    size_t StartSizeWord;
     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
-    Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+    Block(unsigned PCS, size_t SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
   };
 
   /// BlockScope - This tracks the current blocks that we have entered.
@@ -61,12 +63,6 @@ class BitstreamWriter {
   };
   std::vector<BlockInfo> BlockInfoRecords;
 
-  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
-  // value.
-  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
-    support::endian::write32le(&Out[ByteNo], NewWord);
-  }
-
   void WriteByte(unsigned char Value) {
     Out.push_back(Value);
   }
@@ -77,12 +73,10 @@ class BitstreamWriter {
                reinterpret_cast<const char *>(&Value + 1));
   }
 
-  unsigned GetBufferOffset() const {
-    return Out.size();
-  }
+  size_t GetBufferOffset() const { return Out.size(); }
 
-  unsigned GetWordIndex() const {
-    unsigned Offset = GetBufferOffset();
+  size_t GetWordIndex() const {
+    size_t Offset = GetBufferOffset();
     assert((Offset & 3) == 0 && "Not 32-bit aligned");
     return Offset / 4;
   }
@@ -99,10 +93,25 @@ public:
   /// \brief Retrieve the current position in the stream, in bits.
   uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
 
+  /// \brief Retrieve the number of bits currently used to encode an abbrev ID.
+  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
+
   //===--------------------------------------------------------------------===//
   // Basic Primitives for emitting bits to the stream.
   //===--------------------------------------------------------------------===//
 
+  /// Backpatch a 32-bit word in the output at the given bit offset
+  /// with the specified value.
+  void BackpatchWord(uint64_t BitNo, unsigned NewWord) {
+    using namespace llvm::support;
+    unsigned ByteNo = BitNo / 8;
+    assert((!endian::readAtBitAlignment<uint32_t, little, unaligned>(
+               &Out[ByteNo], BitNo & 7)) &&
+           "Expected to be patching over 0-value placeholders");
+    endian::writeAtBitAlignment<uint32_t, little, unaligned>(
+        &Out[ByteNo], NewWord, BitNo & 7);
+  }
+
   void Emit(uint32_t Val, unsigned NumBits) {
     assert(NumBits && NumBits <= 32 && "Invalid value size!");
     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
@@ -200,7 +209,7 @@ public:
     EmitVBR(CodeLen, bitc::CodeLenWidth);
     FlushToWord();
 
-    unsigned BlockSizeWordIndex = GetWordIndex();
+    size_t BlockSizeWordIndex = GetWordIndex();
     unsigned OldCodeSize = CurCodeSize;
 
     // Emit a placeholder, which will be replaced when the block is popped.
@@ -231,11 +240,11 @@ public:
     FlushToWord();
 
     // Compute the size of the block, in words, not counting the size field.
-    unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
-    unsigned ByteNo = B.StartSizeWord*4;
+    size_t SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
+    uint64_t BitNo = uint64_t(B.StartSizeWord) * 32;
 
     // Update the block size field in the header of this sub-block.
-    BackpatchWord(ByteNo, SizeInWords);
+    BackpatchWord(BitNo, SizeInWords);
 
     // Restore the inner block's code size and abbrev table.
     CurCodeSize = B.PrevCodeSize;
@@ -285,10 +294,12 @@ private:
   /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
   /// emission code.  If BlobData is non-null, then it specifies an array of
   /// data that should be emitted as part of the Blob or Array operand that is
-  /// known to exist at the end of the record.
-  template<typename uintty>
-  void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                                StringRef Blob) {
+  /// known to exist at the end of the record. If Code is specified, then
+  /// it is the record code to emit before the Vals, which must not contain
+  /// the code.
+  template <typename uintty>
+  void EmitRecordWithAbbrevImpl(unsigned Abbrev, ArrayRef<uintty> Vals,
+                                StringRef Blob, Optional<unsigned> Code) {
     const char *BlobData = Blob.data();
     unsigned BlobLen = (unsigned) Blob.size();
     unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
@@ -297,9 +308,23 @@ private:
 
     EmitCode(Abbrev);
 
+    unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+    if (Code) {
+      assert(e && "Expected non-empty abbreviation");
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i++);
+
+      if (Op.isLiteral())
+        EmitAbbreviatedLiteral(Op, Code.getValue());
+      else {
+        assert(Op.getEncoding() != BitCodeAbbrevOp::Array &&
+               Op.getEncoding() != BitCodeAbbrevOp::Blob &&
+               "Expected literal or scalar");
+        EmitAbbreviatedField(Op, Code.getValue());
+      }
+    }
+
     unsigned RecordIdx = 0;
-    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
-         i != e; ++i) {
+    for (; i != e; ++i) {
       const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
       if (Op.isLiteral()) {
         assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
@@ -307,7 +332,7 @@ private:
         ++RecordIdx;
       } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
         // Array case.
-        assert(i+2 == e && "array op not second to last?");
+        assert(i + 2 == e && "array op not second to last?");
         const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
 
         // If this record has blob data, emit it, otherwise we must have record
@@ -381,32 +406,29 @@ public:
 
   /// EmitRecord - Emit the specified record to the stream, using an abbrev if
   /// we have one to compress the output.
-  template<typename uintty>
-  void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
-                  unsigned Abbrev = 0) {
+  template <typename Container>
+  void EmitRecord(unsigned Code, const Container &Vals, unsigned Abbrev = 0) {
     if (!Abbrev) {
       // If we don't have an abbrev to use, emit this in its fully unabbreviated
       // form.
+      auto Count = static_cast<uint32_t>(makeArrayRef(Vals).size());
       EmitCode(bitc::UNABBREV_RECORD);
       EmitVBR(Code, 6);
-      EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
-      for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
+      EmitVBR(Count, 6);
+      for (unsigned i = 0, e = Count; i != e; ++i)
         EmitVBR64(Vals[i], 6);
       return;
     }
 
-    // Insert the code into Vals to treat it uniformly.
-    Vals.insert(Vals.begin(), Code);
-
-    EmitRecordWithAbbrev(Abbrev, Vals);
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), Code);
   }
 
   /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
   /// Unlike EmitRecord, the code for the record should be included in Vals as
   /// the first entry.
-  template<typename uintty>
-  void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
+  template <typename Container>
+  void EmitRecordWithAbbrev(unsigned Abbrev, const Container &Vals) {
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), StringRef(), None);
   }
 
   /// EmitRecordWithBlob - Emit the specified record to the stream, using an
@@ -414,29 +436,30 @@ public:
   /// specified by the pointer and length specified at the end.  In contrast to
   /// EmitRecord, this routine expects that the first entry in Vals is the code
   /// of the record.
-  template<typename uintty>
-  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+  template <typename Container>
+  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
                           StringRef Blob) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Blob, None);
   }
-  template<typename uintty>
-  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+  template <typename Container>
+  void EmitRecordWithBlob(unsigned Abbrev, const Container &Vals,
                           const char *BlobData, unsigned BlobLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
+    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+                                    StringRef(BlobData, BlobLen), None);
   }
 
   /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
   /// that end with an array.
-  template<typename uintty>
-  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                          StringRef Array) {
-    EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
+  template <typename Container>
+  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+                           StringRef Array) {
+    EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals), Array, None);
   }
-  template<typename uintty>
-  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
-                          const char *ArrayData, unsigned ArrayLen) {
-    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData,
-                                                            ArrayLen));
+  template <typename Container>
+  void EmitRecordWithArray(unsigned Abbrev, const Container &Vals,
+                           const char *ArrayData, unsigned ArrayLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, makeArrayRef(Vals),
+                                    StringRef(ArrayData, ArrayLen), None);
   }
 
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 7130ee755237..bcc84bedbed0 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -23,28 +23,52 @@
 namespace llvm {
 namespace bitc {
   // The only top-level block type defined is for a module.
-  enum BlockIDs {
-    // Blocks
-    MODULE_BLOCK_ID          = FIRST_APPLICATION_BLOCKID,
+enum BlockIDs {
+  // Blocks
+  MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID,
 
-    // Module sub-block id's.
-    PARAMATTR_BLOCK_ID,
-    PARAMATTR_GROUP_BLOCK_ID,
+  // Module sub-block id's.
+  PARAMATTR_BLOCK_ID,
+  PARAMATTR_GROUP_BLOCK_ID,
 
-    CONSTANTS_BLOCK_ID,
-    FUNCTION_BLOCK_ID,
+  CONSTANTS_BLOCK_ID,
+  FUNCTION_BLOCK_ID,
 
-    UNUSED_ID1,
+  // Block intended to contains information on the bitcode versioning.
+  // Can be used to provide better error messages when we fail to parse a
+  // bitcode file.
+  IDENTIFICATION_BLOCK_ID,
 
-    VALUE_SYMTAB_BLOCK_ID,
-    METADATA_BLOCK_ID,
-    METADATA_ATTACHMENT_ID,
+  VALUE_SYMTAB_BLOCK_ID,
+  METADATA_BLOCK_ID,
+  METADATA_ATTACHMENT_ID,
 
-    TYPE_BLOCK_ID_NEW,
+  TYPE_BLOCK_ID_NEW,
 
-    USELIST_BLOCK_ID
-  };
+  USELIST_BLOCK_ID,
 
+  MODULE_STRTAB_BLOCK_ID,
+  FUNCTION_SUMMARY_BLOCK_ID,
+
+  OPERAND_BUNDLE_TAGS_BLOCK_ID,
+
+  METADATA_KIND_BLOCK_ID
+};
+
+/// Identification block contains a string that describes the producer details,
+/// and an epoch that defines the auto-upgrade capability.
+enum IdentificationCodes {
+  IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION:      [strchr x N]
+  IDENTIFICATION_CODE_EPOCH = 2,  // EPOCH:               [epoch#]
+};
+
+/// The epoch that defines the auto-upgrade compatibility for the bitcode.
+///
+/// LLVM guarantees in a major release that a minor release can read bitcode
+/// generated by previous minor releases. We translate this by making the reader
+/// accepting only bitcode with the same epoch, except for the X.0 release which
+/// also accepts N-1.
+enum { BITCODE_CURRENT_EPOCH = 0 };
 
   /// MODULE blocks have a number of optional fields and subblocks.
   enum ModuleCodes {
@@ -66,13 +90,21 @@ namespace bitc {
     MODULE_CODE_FUNCTION    = 8,
 
     // ALIAS: [alias type, aliasee val#, linkage, visibility]
-    MODULE_CODE_ALIAS       = 9,
+    MODULE_CODE_ALIAS_OLD   = 9,
 
     // MODULE_CODE_PURGEVALS: [numvals]
     MODULE_CODE_PURGEVALS   = 10,
 
     MODULE_CODE_GCNAME      = 11,  // GCNAME: [strchr x N]
     MODULE_CODE_COMDAT      = 12,  // COMDAT: [selection_kind, name]
+
+    MODULE_CODE_VSTOFFSET   = 13,  // VSTOFFSET: [offset]
+
+    // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility]
+    MODULE_CODE_ALIAS       = 14,
+
+    // METADATA_VALUES: [numvals]
+    MODULE_CODE_METADATA_VALUES = 15,
   };
 
   /// PARAMATTR blocks have code for defining a parameter attribute set.
@@ -121,7 +153,13 @@ namespace bitc {
     TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
     TYPE_CODE_STRUCT_NAMED = 20,// STRUCT_NAMED: [ispacked, eltty x N]
 
-    TYPE_CODE_FUNCTION = 21     // FUNCTION: [vararg, retty, paramty x N]
+    TYPE_CODE_FUNCTION = 21,    // FUNCTION: [vararg, retty, paramty x N]
+
+    TYPE_CODE_TOKEN = 22        // TOKEN
+  };
+
+  enum OperandBundleTagCode {
+    OPERAND_BUNDLE_TAG = 1,     // TAG: [strchr x N]
   };
 
   // The type symbol table only has one code (TST_ENTRY_CODE).
@@ -129,10 +167,25 @@ namespace bitc {
     TST_CODE_ENTRY = 1     // TST_ENTRY: [typeid, namechar x N]
   };
 
-  // The value symbol table only has one code (VST_ENTRY_CODE).
+  // Value symbol table codes.
   enum ValueSymtabCodes {
-    VST_CODE_ENTRY   = 1,  // VST_ENTRY: [valid, namechar x N]
-    VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
+    VST_CODE_ENTRY   = 1,   // VST_ENTRY: [valueid, namechar x N]
+    VST_CODE_BBENTRY = 2,   // VST_BBENTRY: [bbid, namechar x N]
+    VST_CODE_FNENTRY = 3,   // VST_FNENTRY: [valueid, offset, namechar x N]
+    // VST_COMBINED_FNENTRY: [offset, namechar x N]
+    VST_CODE_COMBINED_FNENTRY = 4
+  };
+
+  // The module path symbol table only has one code (MST_CODE_ENTRY).
+  enum ModulePathSymtabCodes {
+    MST_CODE_ENTRY   = 1,  // MST_ENTRY: [modid, namechar x N]
+  };
+
+  // The function summary section uses different codes in the per-module
+  // and combined index cases.
+  enum FunctionSummarySymtabCodes {
+    FS_CODE_PERMODULE_ENTRY = 1,  // FS_ENTRY: [valueid, islocal, instcount]
+    FS_CODE_COMBINED_ENTRY  = 2,  // FS_ENTRY: [modid, instcount]
   };
 
   enum MetadataCodes {
@@ -167,7 +220,9 @@ namespace bitc {
     METADATA_EXPRESSION    = 29,  // [distinct, n x element]
     METADATA_OBJC_PROPERTY = 30,  // [distinct, name, file, line, ...]
     METADATA_IMPORTED_ENTITY=31,  // [distinct, tag, scope, entity, line, name]
-    METADATA_MODULE=32,           // [distinct, scope, name, ...]
+    METADATA_MODULE        = 32,  // [distinct, scope, name, ...]
+    METADATA_MACRO         = 33,  // [distinct, macinfo, line, name, value]
+    METADATA_MACRO_FILE    = 34,  // [distinct, macinfo, line, file, ...]
   };
 
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -287,6 +342,16 @@ namespace bitc {
     SYNCHSCOPE_CROSSTHREAD = 1
   };
 
+  /// Markers and flags for call instruction.
+  enum CallMarkersFlags {
+    CALL_TAIL = 0,
+    CALL_CCONV = 1,
+    CALL_MUSTTAIL = 14,
+    CALL_EXPLICIT_TYPE = 15,
+    CALL_NOTAIL = 16,
+    CALL_FMF = 17  // Call has optional fast-math-flags.
+  };
+
   // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
   // can contain a constant block (CONSTANTS_BLOCK_ID).
   enum FunctionCodes {
@@ -354,6 +419,14 @@ namespace bitc {
     FUNC_CODE_INST_CMPXCHG     = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align,
                                      //           vol,ordering,synchscope]
     FUNC_CODE_INST_LANDINGPAD  = 47, // LANDINGPAD: [ty,val,num,id0,val0...]
+    FUNC_CODE_INST_CLEANUPRET  = 48, // CLEANUPRET: [val] or [val,bb#]
+    FUNC_CODE_INST_CATCHRET    = 49, // CATCHRET: [val,bb#]
+    FUNC_CODE_INST_CATCHPAD  = 50, // CATCHPAD: [bb#,bb#,num,args...]
+    FUNC_CODE_INST_CLEANUPPAD = 51, // CLEANUPPAD: [num,args...]
+    FUNC_CODE_INST_CATCHSWITCH = 52, // CATCHSWITCH: [num,args...] or [num,args...,bb]
+    // 53 is unused.
+    // 54 is unused.
+    FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...]
   };
 
   enum UseListCodes {
@@ -407,7 +480,12 @@ namespace bitc {
     ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42,
     ATTR_KIND_CONVERGENT = 43,
     ATTR_KIND_SAFESTACK = 44,
-    ATTR_KIND_ARGMEMONLY = 45
+    ATTR_KIND_ARGMEMONLY = 45,
+    ATTR_KIND_SWIFT_SELF = 46,
+    ATTR_KIND_SWIFT_ERROR = 47,
+    ATTR_KIND_NO_RECURSE = 48,
+    ATTR_KIND_INACCESSIBLEMEM_ONLY = 49,
+    ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50
   };
 
   enum ComdatSelectionKindCodes {
diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h
index 452ec3bd0187..60d865fd2355 100644
--- a/include/llvm/Bitcode/ReaderWriter.h
+++ b/include/llvm/Bitcode/ReaderWriter.h
@@ -15,6 +15,7 @@
 #define LLVM_BITCODE_READERWRITER_H
 
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -36,27 +37,54 @@ namespace llvm {
   ErrorOr<std::unique_ptr<Module>>
   getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
                        LLVMContext &Context,
-                       DiagnosticHandlerFunction DiagnosticHandler = nullptr,
                        bool ShouldLazyLoadMetadata = false);
 
   /// Read the header of the specified stream and prepare for lazy
   /// deserialization and streaming of function bodies.
-  ErrorOr<std::unique_ptr<Module>> getStreamedBitcodeModule(
-      StringRef Name, std::unique_ptr<DataStreamer> Streamer,
-      LLVMContext &Context,
-      DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+  ErrorOr<std::unique_ptr<Module>>
+  getStreamedBitcodeModule(StringRef Name,
+                           std::unique_ptr<DataStreamer> Streamer,
+                           LLVMContext &Context);
 
   /// Read the header of the specified bitcode buffer and extract just the
   /// triple information. If successful, this returns a string. On error, this
   /// returns "".
-  std::string
-  getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context,
-                         DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+  std::string getBitcodeTargetTriple(MemoryBufferRef Buffer,
+                                     LLVMContext &Context);
+
+  /// Read the header of the specified bitcode buffer and extract just the
+  /// producer string information. If successful, this returns a string. On
+  /// error, this returns "".
+  std::string getBitcodeProducerString(MemoryBufferRef Buffer,
+                                       LLVMContext &Context);
 
   /// Read the specified bitcode file, returning the module.
-  ErrorOr<std::unique_ptr<Module>>
-  parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
-                   DiagnosticHandlerFunction DiagnosticHandler = nullptr);
+  ErrorOr<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
+                                                    LLVMContext &Context);
+
+  /// Check if the given bitcode buffer contains a function summary block.
+  bool hasFunctionSummary(MemoryBufferRef Buffer,
+                          DiagnosticHandlerFunction DiagnosticHandler);
+
+  /// Parse the specified bitcode buffer, returning the function info index.
+  /// If IsLazy is true, parse the entire function summary into
+  /// the index. Otherwise skip the function summary section, and only create
+  /// an index object with a map from function name to function summary offset.
+  /// The index is used to perform lazy function summary reading later.
+  ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+  getFunctionInfoIndex(MemoryBufferRef Buffer,
+                       DiagnosticHandlerFunction DiagnosticHandler,
+                       bool IsLazy = false);
+
+  /// This method supports lazy reading of function summary data from the
+  /// combined index during function importing. When reading the combined index
+  /// file, getFunctionInfoIndex is first invoked with IsLazy=true.
+  /// Then this method is called for each function considered for importing,
+  /// to parse the summary information for the given function name into
+  /// the index.
+  std::error_code readFunctionSummary(
+      MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+      StringRef FunctionName, std::unique_ptr<FunctionInfoIndex> Index);
 
   /// \brief Write the specified module to the specified raw output stream.
   ///
@@ -66,8 +94,18 @@ namespace llvm {
   /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
   /// Value in \c M.  These will be reconstructed exactly when \a M is
   /// deserialized.
+  ///
+  /// If \c EmitFunctionSummary, emit the function summary index (currently
+  /// for use in ThinLTO optimization).
   void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
-                          bool ShouldPreserveUseListOrder = false);
+                          bool ShouldPreserveUseListOrder = false,
+                          bool EmitFunctionSummary = false);
+
+  /// Write the specified function summary index to the given raw output stream,
+  /// where it will be written in a new bitcode block. This is used when
+  /// writing the combined index file for ThinLTO.
+  void WriteFunctionSummaryToFile(const FunctionInfoIndex &Index,
+                                  raw_ostream &Out);
 
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
@@ -159,7 +197,7 @@ namespace llvm {
     BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity,
                           const Twine &Msg);
     void print(DiagnosticPrinter &DP) const override;
-    std::error_code getError() const { return EC; };
+    std::error_code getError() const { return EC; }
 
     static bool classof(const DiagnosticInfo *DI) {
       return DI->getKind() == DK_Bitcode;
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 82d1e8ada17d..38e64ad3be29 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_ANALYSIS_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/IR/CallSite.h"
@@ -23,6 +24,8 @@
 
 namespace llvm {
 class GlobalValue;
+class MachineBasicBlock;
+class MachineFunction;
 class TargetLoweringBase;
 class TargetLowering;
 class TargetMachine;
@@ -37,7 +40,7 @@ struct EVT;
 /// Given an LLVM IR aggregate type and a sequence of insertvalue or
 /// extractvalue indices that identify a member, return the linearized index of
 /// the start of the member, i.e the number of element in memory before the
-/// seeked one. This is disconnected from the number of bytes.
+/// sought one. This is disconnected from the number of bytes.
 ///
 /// \param Ty is the type indexed by \p Indices.
 /// \param Indices is an optional pointer in the indices list to the current
@@ -115,6 +118,9 @@ bool returnTypeIsEligibleForTailCall(const Function *F,
 // or we are in LTO.
 bool canBeOmittedFromSymbolTable(const GlobalValue *GV);
 
+DenseMap<const MachineBasicBlock *, int>
+getFuncletMembership(const MachineFunction &MF);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index fe7efae325c4..f5e778b2f262 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -165,6 +165,9 @@ public:
   /// Return information about data layout.
   const DataLayout &getDataLayout() const;
 
+  /// Return the pointer size from the TargetMachine
+  unsigned getPointerSize() const;
+
   /// Return information about subtarget.
   const MCSubtargetInfo &getSubtargetInfo() const;
 
@@ -233,7 +236,12 @@ public:
   /// Print assembly representations of the jump tables used by the current
   /// function to the current output stream.
   ///
-  void EmitJumpTableInfo();
+  virtual void EmitJumpTableInfo();
+
+  /// Emit the control variable for an emulated TLS variable.
+  virtual void EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
+                                              MCSymbol *EmittedSym,
+                                              bool AllZeroInitValue);
 
   /// Emit the specified global variable to the .s file.
   virtual void EmitGlobalVariable(const GlobalVariable *GV);
@@ -254,7 +262,7 @@ public:
   const MCExpr *lowerConstant(const Constant *CV);
 
   /// \brief Print a general LLVM constant to the .s file.
-  void EmitGlobalConstant(const Constant *CV);
+  void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
 
   /// \brief Unnamed constant global variables solely contaning a pointer to
   /// another globals variable act like a global variable "proxy", or GOT
@@ -317,7 +325,9 @@ public:
 
   /// Targets can override this to change how global constants that are part of
   /// a C++ static/global constructor list are emitted.
-  virtual void EmitXXStructor(const Constant *CV) { EmitGlobalConstant(CV); }
+  virtual void EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+    EmitGlobalConstant(DL, CV);
+  }
 
   /// Return true if the basic block has exactly one predecessor and the control
   /// transfer mechanism between the predecessor and this block is a
@@ -404,9 +414,6 @@ public:
   void EmitULEB128(uint64_t Value, const char *Desc = nullptr,
                    unsigned PadTo = 0) const;
 
-  /// Emit a .byte 42 directive for a DW_CFA_xxx value.
-  void EmitCFAByte(unsigned Val) const;
-
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
   /// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
@@ -446,7 +453,16 @@ public:
   void emitCFIInstruction(const MCCFIInstruction &Inst) const;
 
   /// \brief Emit Dwarf abbreviation table.
-  void emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const;
+  template <typename T> void emitDwarfAbbrevs(const T &Abbrevs) const {
+    // For each abbreviation.
+    for (const auto &Abbrev : Abbrevs)
+      emitDwarfAbbrev(*Abbrev);
+
+    // Mark end of abbreviations.
+    EmitULEB128(0, "EOM(3)");
+  }
+
+  void emitDwarfAbbrev(const DIEAbbrev &Abbrev) const;
 
   /// \brief Recursively emit Dwarf DIE tree.
   void emitDwarfDIE(const DIE &Die) const;
@@ -532,7 +548,8 @@ private:
   void EmitLLVMUsedList(const ConstantArray *InitList);
   /// Emit llvm.ident metadata in an '.ident' directive.
   void EmitModuleIdents(Module &M);
-  void EmitXXStructorList(const Constant *List, bool isCtor);
+  void EmitXXStructorList(const DataLayout &DL, const Constant *List,
+                          bool isCtor);
   GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy &C);
 };
 }
diff --git a/include/llvm/CodeGen/AtomicExpandUtils.h b/include/llvm/CodeGen/AtomicExpandUtils.h
new file mode 100644
index 000000000000..ac18eac8a1ce
--- /dev/null
+++ b/include/llvm/CodeGen/AtomicExpandUtils.h
@@ -0,0 +1,57 @@
+//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/IRBuilder.h"
+
+namespace llvm {
+class Value;
+class AtomicRMWInst;
+
+
+/// Parameters (see the expansion example below):
+/// (the builder, %addr, %loaded, %new_val, ordering,
+///  /* OUT */ %success, /* OUT */ %new_loaded)
+typedef function_ref<void(IRBuilder<> &, Value *, Value *, Value *,
+                          AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun;
+
+/// \brief Expand an atomic RMW instruction into a loop utilizing
+/// cmpxchg. You'll want to make sure your target machine likes cmpxchg
+/// instructions in the first place and that there isn't another, better,
+/// transformation available (for example AArch32/AArch64 have linked loads).
+///
+/// This is useful in passes which can't rewrite the more exotic RMW
+/// instructions directly into a platform specific intrinsics (because, say,
+/// those intrinsics don't exist). If such a pass is able to expand cmpxchg
+/// instructions directly however, then, with this function, it could avoid two
+/// extra module passes (avoiding passes by `-atomic-expand` and itself). A
+/// specific example would be PNaCl's `RewriteAtomics` pass.
+///
+/// Given: atomicrmw some_op iN* %addr, iN %incr ordering
+///
+/// The standard expansion we produce is:
+///     [...]
+///     %init_loaded = load atomic iN* %addr
+///     br label %loop
+/// loop:
+///     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+///     %new = some_op iN %loaded, %incr
+/// ; This is what -atomic-expand will produce using this function on i686 targets:
+///     %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val
+///     %new_loaded = extractvalue { iN, i1 } %pair, 0
+///     %success = extractvalue { iN, i1 } %pair, 1
+/// ; End callback produced IR
+///     br i1 %success, label %atomicrmw.end, label %loop
+/// atomicrmw.end:
+///     [...]
+///
+/// Returns true if the containing function was modified.
+bool
+expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory);
+}
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h
index 9ba25169fda6..d99054eb6f36 100644
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -166,7 +166,7 @@ public:
     }
 
     if (IID == Intrinsic::ctlz) {
-       if (getTLI()->isCheapToSpeculateCtlz())
+      if (getTLI()->isCheapToSpeculateCtlz())
         return TargetTransformInfo::TCC_Basic;
       return TargetTransformInfo::TCC_Expensive;
     }
@@ -256,7 +256,7 @@ public:
 
       for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
         if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
-          ImmutableCallSite CS(J);
+          ImmutableCallSite CS(&*J);
           if (const Function *F = CS.getCalledFunction()) {
             if (!static_cast<T *>(this)->isLoweredToCall(F))
               continue;
@@ -302,12 +302,8 @@ public:
 
     if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
       // The operation is legal. Assume it costs 1.
-      // If the type is split to multiple registers, assume that there is some
-      // overhead to this.
       // TODO: Once we have extract/insert subvector cost we need to use them.
-      if (LT.first > 1)
-        return LT.first * 2 * OpCost;
-      return LT.first * 1 * OpCost;
+      return LT.first * OpCost;
     }
 
     if (!TLI->isOperationExpand(ISD, LT.second)) {
@@ -496,13 +492,11 @@ public:
       // itself. Unless the corresponding extending load or truncating store is
       // legal, then this will scalarize.
       TargetLowering::LegalizeAction LA = TargetLowering::Expand;
-      EVT MemVT = getTLI()->getValueType(DL, Src, true);
-      if (MemVT.isSimple() && MemVT != MVT::Other) {
-        if (Opcode == Instruction::Store)
-          LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
-        else
-          LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
-      }
+      EVT MemVT = getTLI()->getValueType(DL, Src);
+      if (Opcode == Instruction::Store)
+        LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
+      else
+        LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
 
       if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
         // This is a vector load/store for some illegal type that is scalarized.
@@ -530,7 +524,8 @@ public:
     VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
 
     // Firstly, the cost of load/store operation.
-    unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+    unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
+        Opcode, VecTy, Alignment, AddressSpace);
 
     // Then plus the cost of interleave operation.
     if (Opcode == Instruction::Load) {
@@ -545,18 +540,20 @@ public:
 
       assert(Indices.size() <= Factor &&
              "Interleaved memory op has too many members");
+
       for (unsigned Index : Indices) {
         assert(Index < Factor && "Invalid index for interleaved memory op");
 
         // Extract elements from loaded vector for each sub vector.
         for (unsigned i = 0; i < NumSubElts; i++)
-          Cost += getVectorInstrCost(Instruction::ExtractElement, VT,
-                                     Index + i * Factor);
+          Cost += static_cast<T *>(this)->getVectorInstrCost(
+              Instruction::ExtractElement, VT, Index + i * Factor);
       }
 
       unsigned InsSubCost = 0;
       for (unsigned i = 0; i < NumSubElts; i++)
-        InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i);
+        InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
+            Instruction::InsertElement, SubVT, i);
 
       Cost += Indices.size() * InsSubCost;
     } else {
@@ -571,17 +568,51 @@ public:
 
       unsigned ExtSubCost = 0;
       for (unsigned i = 0; i < NumSubElts; i++)
-        ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
-
-      Cost += Factor * ExtSubCost;
+        ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
+            Instruction::ExtractElement, SubVT, i);
+      Cost += ExtSubCost * Factor;
 
       for (unsigned i = 0; i < NumElts; i++)
-        Cost += getVectorInstrCost(Instruction::InsertElement, VT, i);
+        Cost += static_cast<T *>(this)
+                    ->getVectorInstrCost(Instruction::InsertElement, VT, i);
     }
 
     return Cost;
   }
 
+  /// Get intrinsic cost based on arguments  
+  unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
+                                 ArrayRef<Value *> Args) {
+    switch (IID) {
+    default: {
+      SmallVector<Type *, 4> Types;
+      for (Value *Op : Args)
+        Types.push_back(Op->getType());
+      return getIntrinsicInstrCost(IID, RetTy, Types);
+    }
+    case Intrinsic::masked_scatter: {
+      Value *Mask = Args[3];
+      bool VarMask = !isa<Constant>(Mask);
+      unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
+      return
+        static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
+                                                       Args[0]->getType(),
+                                                       Args[1], VarMask,
+                                                       Alignment);
+    }
+    case Intrinsic::masked_gather: {
+      Value *Mask = Args[2];
+      bool VarMask = !isa<Constant>(Mask);
+      unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
+      return
+        static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
+                                                       RetTy, Args[0], VarMask,
+                                                       Alignment);
+    }
+    }
+  }
+  
+  /// Get intrinsic cost based on argument types
   unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                  ArrayRef<Type *> Tys) {
     unsigned ISD = 0;
@@ -800,7 +831,7 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
   const TargetLoweringBase *getTLI() const { return TLI; }
 
 public:
-  explicit BasicTTIImpl(const TargetMachine *ST, Function &F);
+  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
 
   // Provide value semantics. MSVC requires that we spell all of these out.
   BasicTTIImpl(const BasicTTIImpl &Arg)
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
index 91fb0a9d7e77..17c9415a81cb 100644
--- a/include/llvm/CodeGen/CalcSpillWeights.h
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -20,6 +20,7 @@ namespace llvm {
   class LiveIntervals;
   class MachineBlockFrequencyInfo;
   class MachineLoopInfo;
+  class VirtRegMap;
 
   /// \brief Normalize the spill weight of a live interval
   ///
@@ -51,6 +52,7 @@ namespace llvm {
   private:
     MachineFunction &MF;
     LiveIntervals &LIS;
+    VirtRegMap *VRM;
     const MachineLoopInfo &Loops;
     const MachineBlockFrequencyInfo &MBFI;
     DenseMap<unsigned, float> Hint;
@@ -58,10 +60,10 @@ namespace llvm {
 
   public:
     VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
-                   const MachineLoopInfo &loops,
+                   VirtRegMap *vrm, const MachineLoopInfo &loops,
                    const MachineBlockFrequencyInfo &mbfi,
                    NormalizingFn norm = normalizeSpillWeight)
-        : MF(mf), LIS(lis), Loops(loops), MBFI(mbfi), normalize(norm) {}
+        : MF(mf), LIS(lis), VRM(vrm), Loops(loops), MBFI(mbfi), normalize(norm) {}
 
     /// \brief (re)compute li's spill weight and allocation hint.
     void calculateSpillWeightAndHint(LiveInterval &li);
@@ -70,6 +72,7 @@ namespace llvm {
   /// \brief Compute spill weights and allocation hints for all virtual register
   /// live intervals.
   void calculateSpillWeightsAndHints(LiveIntervals &LIS, MachineFunction &MF,
+                                     VirtRegMap *VRM,
                                      const MachineLoopInfo &MLI,
                                      const MachineBlockFrequencyInfo &MBFI,
                                      VirtRegAuxInfo::NormalizingFn norm =
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
index 1fd4eeb46b38..415abb90da57 100644
--- a/include/llvm/CodeGen/CallingConvLower.h
+++ b/include/llvm/CodeGen/CallingConvLower.h
@@ -201,6 +201,7 @@ private:
   LLVMContext &Context;
 
   unsigned StackOffset;
+  unsigned MaxStackArgAlign;
   SmallVector<uint32_t, 16> UsedRegs;
   SmallVector<CCValAssign, 4> PendingLocs;
 
@@ -270,7 +271,18 @@ public:
   CallingConv::ID getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
 
-  unsigned getNextStackOffset() const { return StackOffset; }
+  /// getNextStackOffset - Return the next stack offset such that all stack
+  /// slots satisfy their alignment requirements.
+  unsigned getNextStackOffset() const {
+    return StackOffset;
+  }
+
+  /// getAlignedCallFrameSize - Return the size of the call frame needed to
+  /// be able to store all arguments and such that the alignment requirement
+  /// of each of the arguments is satisfied.
+  unsigned getAlignedCallFrameSize() const {
+    return RoundUpToAlignment(StackOffset, MaxStackArgAlign);
+  }
 
   /// isAllocated - Return true if the specified register (or an alias) is
   /// allocated.
@@ -357,7 +369,7 @@ public:
   /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive
   /// registers. If this is not possible, return zero. Otherwise, return the first
   /// register of the block that were allocated, marking the entire block as allocated.
-  unsigned AllocateRegBlock(ArrayRef<uint16_t> Regs, unsigned RegsRequired) {
+  unsigned AllocateRegBlock(ArrayRef<MCPhysReg> Regs, unsigned RegsRequired) {
     if (RegsRequired > Regs.size())
       return 0;
 
@@ -400,9 +412,10 @@ public:
   /// and alignment.
   unsigned AllocateStack(unsigned Size, unsigned Align) {
     assert(Align && ((Align - 1) & Align) == 0); // Align is power of 2.
-    StackOffset = ((StackOffset + Align - 1) & ~(Align - 1));
+    StackOffset = RoundUpToAlignment(StackOffset, Align);
     unsigned Result = StackOffset;
     StackOffset += Size;
+    MaxStackArgAlign = std::max(Align, MaxStackArgAlign);
     MF.getFrameInfo()->ensureMaxAlignment(Align);
     return Result;
   }
diff --git a/include/llvm/CodeGen/CommandFlags.h b/include/llvm/CodeGen/CommandFlags.h
index bedb7d5549eb..0d37dc00422f 100644
--- a/include/llvm/CodeGen/CommandFlags.h
+++ b/include/llvm/CodeGen/CommandFlags.h
@@ -182,6 +182,11 @@ OverrideStackAlignment("stack-alignment",
                        cl::desc("Override default stack alignment"),
                        cl::init(0));
 
+cl::opt<bool>
+StackRealign("stackrealign",
+             cl::desc("Force align the stack to the minimum alignment"),
+             cl::init(false));
+
 cl::opt<std::string>
 TrapFuncName("trap-func", cl::Hidden,
         cl::desc("Emit a call to trap function rather than a trap instruction"),
@@ -219,6 +224,10 @@ FunctionSections("function-sections",
                  cl::desc("Emit functions into separate sections"),
                  cl::init(false));
 
+cl::opt<bool> EmulatedTLS("emulated-tls",
+                          cl::desc("Use emulated TLS model"),
+                          cl::init(false));
+
 cl::opt<bool> UniqueSectionNames("unique-section-names",
                                  cl::desc("Give unique names to every section"),
                                  cl::init(true));
@@ -238,6 +247,26 @@ JTableType("jump-table-type",
                          "Create one table per unique function type."),
               clEnumValEnd));
 
+cl::opt<llvm::EABI> EABIVersion(
+    "meabi", cl::desc("Set EABI type (default depends on triple):"),
+    cl::init(EABI::Default),
+    cl::values(clEnumValN(EABI::Default, "default",
+                          "Triple default EABI version"),
+               clEnumValN(EABI::EABI4, "4", "EABI version 4"),
+               clEnumValN(EABI::EABI5, "5", "EABI version 5"),
+               clEnumValN(EABI::GNU, "gnu", "EABI GNU"), clEnumValEnd));
+
+cl::opt<DebuggerKind>
+DebuggerTuningOpt("debugger-tune",
+                  cl::desc("Tune debug info for a particular debugger"),
+                  cl::init(DebuggerKind::Default),
+                  cl::values(
+                      clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
+                      clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
+                      clEnumValN(DebuggerKind::SCE, "sce",
+                                 "SCE targets (e.g. PS4)"),
+                      clEnumValEnd));
+
 // Common utility function tightly tied to the options listed here. Initializes
 // a TargetOptions object with CodeGen flags and returns it.
 static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
@@ -260,11 +289,14 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.DataSections = DataSections;
   Options.FunctionSections = FunctionSections;
   Options.UniqueSectionNames = UniqueSectionNames;
+  Options.EmulatedTLS = EmulatedTLS;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
   Options.JTType = JTableType;
 
   Options.ThreadModel = TMModel;
+  Options.EABIVersion = EABIVersion;
+  Options.DebuggerTuning = DebuggerTuningOpt;
 
   return Options;
 }
@@ -325,6 +357,10 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
                                        "disable-tail-calls",
                                        toStringRef(DisableTailCalls));
 
+    if (StackRealign)
+      NewAttrs = NewAttrs.addAttribute(Ctx, AttributeSet::FunctionIndex,
+                                       "stackrealign");
+
     if (TrapFuncName.getNumOccurrences() > 0)
       for (auto &B : F)
         for (auto &I : B)
diff --git a/include/llvm/CodeGen/DFAPacketizer.h b/include/llvm/CodeGen/DFAPacketizer.h
index c44a7e0b6736..40ec201107e8 100644
--- a/include/llvm/CodeGen/DFAPacketizer.h
+++ b/include/llvm/CodeGen/DFAPacketizer.h
@@ -40,22 +40,51 @@ class InstrItineraryData;
 class DefaultVLIWScheduler;
 class SUnit;
 
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
+// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
+//
+// e.g. terms x resource bit combinations that fit in uint32_t:
+//      4 terms x 8  bits = 32 bits
+//      3 terms x 10 bits = 30 bits
+//      2 terms x 16 bits = 32 bits
+//
+// e.g. terms x resource bit combinations that fit in uint64_t:
+//      8 terms x 8  bits = 64 bits
+//      7 terms x 9  bits = 63 bits
+//      6 terms x 10 bits = 60 bits
+//      5 terms x 12 bits = 60 bits
+//      4 terms x 16 bits = 64 bits <--- current
+//      3 terms x 21 bits = 63 bits
+//      2 terms x 32 bits = 64 bits
+//
+#define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
+#define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
+
+typedef uint64_t                DFAInput;
+typedef int64_t                 DFAStateInput;
+#define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
+// --------------------------------------------------------------------
+
 class DFAPacketizer {
 private:
-  typedef std::pair<unsigned, unsigned> UnsignPair;
+  typedef std::pair<unsigned, DFAInput> UnsignPair;
+
   const InstrItineraryData *InstrItins;
   int CurrentState;
-  const int (*DFAStateInputTable)[2];
+  const DFAStateInput (*DFAStateInputTable)[2];
   const unsigned *DFAStateEntryTable;
 
   // CachedTable is a map from <FromState, Input> to ToState.
   DenseMap<UnsignPair, unsigned> CachedTable;
 
   // ReadTable - Read the DFA transition table and update CachedTable.
-  void ReadTable(unsigned int state);
+  void ReadTable(unsigned state);
 
 public:
-  DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+  DFAPacketizer(const InstrItineraryData *I, const DFAStateInput (*SIT)[2],
                 const unsigned *SET);
 
   // Reset the current state to make all resources available.
@@ -63,6 +92,12 @@ public:
     CurrentState = 0;
   }
 
+  // getInsnInput - Return the DFAInput for an instruction class.
+  DFAInput getInsnInput(unsigned InsnClass);
+
+  // getInsnInput - Return the DFAInput for an instruction class input vector.
+  static DFAInput getInsnInput(const std::vector<unsigned> &InsnClass);
+
   // canReserveResources - Check if the resources occupied by a MCInstrDesc
   // are available in the current state.
   bool canReserveResources(const llvm::MCInstrDesc *MID);
@@ -93,6 +128,7 @@ class VLIWPacketizerList {
 protected:
   MachineFunction &MF;
   const TargetInstrInfo *TII;
+  AliasAnalysis *AA;
 
   // The VLIW Scheduler.
   DefaultVLIWScheduler *VLIWScheduler;
@@ -106,7 +142,9 @@ protected:
   std::map<MachineInstr*, SUnit*> MIToSUnit;
 
 public:
-  VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI, bool IsPostRA);
+  // The AliasAnalysis parameter can be nullptr.
+  VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                     AliasAnalysis *AA);
 
   virtual ~VLIWPacketizerList();
 
@@ -126,8 +164,10 @@ public:
     return MII;
   }
 
-  // endPacket - End the current packet.
-  void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
+  // End the current packet and reset the state of the packetizer.
+  // Overriding this function allows the target-specific packetizer
+  // to perform custom finalization.
+  virtual void endPacket(MachineBasicBlock *MBB, MachineInstr *MI);
 
   // initPacketizerState - perform initialization before packetizing
   // an instruction. This function is supposed to be overrided by
@@ -135,14 +175,24 @@ public:
   virtual void initPacketizerState() { return; }
 
   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-  virtual bool ignorePseudoInstruction(MachineInstr *I,
-                                       MachineBasicBlock *MBB) {
+  virtual bool ignorePseudoInstruction(const MachineInstr *I,
+                                       const MachineBasicBlock *MBB) {
     return false;
   }
 
   // isSoloInstruction - return true if instruction MI can not be packetized
   // with any other instruction, which means that MI itself is a packet.
-  virtual bool isSoloInstruction(MachineInstr *MI) {
+  virtual bool isSoloInstruction(const MachineInstr *MI) {
+    return true;
+  }
+
+  // Check if the packetizer should try to add the given instruction to
+  // the current packet. One reasons for which it may not be desirable
+  // to include an instruction in the current packet could be that it
+  // would cause a stall.
+  // If this function returns "false", the current packet will be ended,
+  // and the instruction will be added to the next packet.
+  virtual bool shouldAddToPacket(const MachineInstr *MI) {
     return true;
   }
 
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index f07712a676da..fa612d981dec 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -100,10 +100,8 @@ public:
   ///
   void Emit(const AsmPrinter *AP) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O);
   void dump();
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -143,9 +141,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -164,9 +160,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -185,9 +179,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -203,9 +195,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -223,9 +213,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -252,9 +240,7 @@ public:
                                            : sizeof(int32_t);
   }
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -273,9 +259,7 @@ public:
     return 8;
   }
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -295,9 +279,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
@@ -444,10 +426,8 @@ public:
   ///
   unsigned SizeOf(const AsmPrinter *AP) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
   void dump() const;
-#endif
 };
 
 struct IntrusiveBackListNode {
@@ -566,64 +546,70 @@ class DIEValueList {
   ListTy List;
 
 public:
-  bool empty() const { return List.empty(); }
-
-  class const_iterator;
-  class iterator
-      : public iterator_adaptor_base<iterator, ListTy::iterator,
+  class const_value_iterator;
+  class value_iterator
+      : public iterator_adaptor_base<value_iterator, ListTy::iterator,
                                      std::forward_iterator_tag, DIEValue> {
-    friend class const_iterator;
-    typedef iterator_adaptor_base<iterator, ListTy::iterator,
+    friend class const_value_iterator;
+    typedef iterator_adaptor_base<value_iterator, ListTy::iterator,
                                   std::forward_iterator_tag,
                                   DIEValue> iterator_adaptor;
 
   public:
-    iterator() = default;
-    explicit iterator(ListTy::iterator X) : iterator_adaptor(X) {}
+    value_iterator() = default;
+    explicit value_iterator(ListTy::iterator X) : iterator_adaptor(X) {}
 
     explicit operator bool() const { return bool(wrapped()); }
     DIEValue &operator*() const { return wrapped()->V; }
   };
 
-  class const_iterator
-      : public iterator_adaptor_base<const_iterator, ListTy::const_iterator,
-                                     std::forward_iterator_tag,
-                                     const DIEValue> {
-    typedef iterator_adaptor_base<const_iterator, ListTy::const_iterator,
+  class const_value_iterator : public iterator_adaptor_base<
+                                   const_value_iterator, ListTy::const_iterator,
+                                   std::forward_iterator_tag, const DIEValue> {
+    typedef iterator_adaptor_base<const_value_iterator, ListTy::const_iterator,
                                   std::forward_iterator_tag,
                                   const DIEValue> iterator_adaptor;
 
   public:
-    const_iterator() = default;
-    const_iterator(DIEValueList::iterator X) : iterator_adaptor(X.wrapped()) {}
-    explicit const_iterator(ListTy::const_iterator X) : iterator_adaptor(X) {}
+    const_value_iterator() = default;
+    const_value_iterator(DIEValueList::value_iterator X)
+        : iterator_adaptor(X.wrapped()) {}
+    explicit const_value_iterator(ListTy::const_iterator X)
+        : iterator_adaptor(X) {}
 
     explicit operator bool() const { return bool(wrapped()); }
     const DIEValue &operator*() const { return wrapped()->V; }
   };
 
-  iterator insert(BumpPtrAllocator &Alloc, DIEValue V) {
+  typedef iterator_range<value_iterator> value_range;
+  typedef iterator_range<const_value_iterator> const_value_range;
+
+  value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue V) {
     List.push_back(*new (Alloc) Node(V));
-    return iterator(ListTy::toIterator(List.back()));
+    return value_iterator(ListTy::toIterator(List.back()));
   }
-  template <class... Ts>
-  iterator emplace(BumpPtrAllocator &Alloc, Ts &&... Args) {
-    return insert(Alloc, DIEValue(std::forward<Ts>(Args)...));
+  template <class T>
+  value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+                    dwarf::Form Form, T &&Value) {
+    return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
   }
 
-  iterator begin() { return iterator(List.begin()); }
-  iterator end() { return iterator(List.end()); }
-  const_iterator begin() const { return const_iterator(List.begin()); }
-  const_iterator end() const { return const_iterator(List.end()); }
+  value_range values() {
+    return llvm::make_range(value_iterator(List.begin()),
+                            value_iterator(List.end()));
+  }
+  const_value_range values() const {
+    return llvm::make_range(const_value_iterator(List.begin()),
+                            const_value_iterator(List.end()));
+  }
 };
 
 //===--------------------------------------------------------------------===//
 /// DIE - A structured debug information entry.  Has an abbreviation which
 /// describes its organization.
-class DIE : IntrusiveBackListNode {
+class DIE : IntrusiveBackListNode, public DIEValueList {
   friend class IntrusiveBackList<DIE>;
 
-protected:
   /// Offset - Offset in debug info section.
   ///
   unsigned Offset;
@@ -643,14 +629,7 @@ protected:
 
   DIE *Parent = nullptr;
 
-  /// Attribute values.
-  ///
-  DIEValueList Values;
-
-protected:
-  DIE() : Offset(0), Size(0) {}
-
-private:
+  DIE() = delete;
   explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
 
 public:
@@ -677,20 +656,6 @@ public:
     return llvm::make_range(Children.begin(), Children.end());
   }
 
-  typedef DIEValueList::iterator value_iterator;
-  typedef iterator_range<value_iterator> value_range;
-
-  value_range values() {
-    return llvm::make_range(Values.begin(), Values.end());
-  }
-
-  typedef DIEValueList::const_iterator const_value_iterator;
-  typedef iterator_range<const_value_iterator> const_value_range;
-
-  const_value_range values() const {
-    return llvm::make_range(Values.begin(), Values.end());
-  }
-
   DIE *getParent() const { return Parent; }
 
   /// Generate the abbreviation for this DIE.
@@ -711,17 +676,6 @@ public:
   void setOffset(unsigned O) { Offset = O; }
   void setSize(unsigned S) { Size = S; }
 
-  /// addValue - Add a value and attributes to a DIE.
-  ///
-  value_iterator addValue(BumpPtrAllocator &Alloc, DIEValue Value) {
-    return Values.insert(Alloc, Value);
-  }
-  template <class T>
-  value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
-                          dwarf::Form Form, T &&Value) {
-    return Values.emplace(Alloc, Attribute, Form, std::forward<T>(Value));
-  }
-
   /// Add a child to the DIE.
   DIE &addChild(DIE *Child) {
     assert(!Child->getParent() && "Child should be orphaned");
@@ -736,16 +690,14 @@ public:
   /// gives \a DIEValue::isNone) if no such attribute exists.
   DIEValue findAttribute(dwarf::Attribute Attribute) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O, unsigned IndentCount = 0) const;
   void dump();
-#endif
 };
 
 //===--------------------------------------------------------------------===//
 /// DIELoc - Represents an expression location.
 //
-class DIELoc : public DIE {
+class DIELoc : public DIEValueList {
   mutable unsigned Size; // Size in bytes excluding size header.
 
 public:
@@ -773,15 +725,13 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 //===--------------------------------------------------------------------===//
 /// DIEBlock - Represents a block of values.
 //
-class DIEBlock : public DIE {
+class DIEBlock : public DIEValueList {
   mutable unsigned Size; // Size in bytes excluding size header.
 
 public:
@@ -806,9 +756,7 @@ public:
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
   unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
-#ifndef NDEBUG
   void print(raw_ostream &O) const;
-#endif
 };
 
 } // end llvm namespace
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index f04a7cd69664..cc4e37059bb8 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -419,11 +419,11 @@ protected:
                             const TargetRegisterClass *RC, unsigned Op0,
                             bool Op0IsKill, uint64_t Imm1, uint64_t Imm2);
 
-  /// \brief Emit a MachineInstr with two register operands and a result
+  /// \brief Emit a MachineInstr with a floating point immediate, and a result
   /// register in the given register class.
-  unsigned fastEmitInst_rf(unsigned MachineInstOpcode,
-                           const TargetRegisterClass *RC, unsigned Op0,
-                           bool Op0IsKill, const ConstantFP *FPImm);
+  unsigned fastEmitInst_f(unsigned MachineInstOpcode,
+                          const TargetRegisterClass *RC,
+                          const ConstantFP *FPImm);
 
   /// \brief Emit a MachineInstr with two register operands, an immediate, and a
   /// result register in the given register class.
@@ -432,23 +432,11 @@ protected:
                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
                             uint64_t Imm);
 
-  /// \brief Emit a MachineInstr with two register operands, two immediates
-  /// operands, and a result register in the given register class.
-  unsigned fastEmitInst_rrii(unsigned MachineInstOpcode,
-                             const TargetRegisterClass *RC, unsigned Op0,
-                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
-                             uint64_t Imm1, uint64_t Imm2);
-
   /// \brief Emit a MachineInstr with a single immediate operand, and a result
   /// register in the given register class.
   unsigned fastEmitInst_i(unsigned MachineInstrOpcode,
                           const TargetRegisterClass *RC, uint64_t Imm);
 
-  /// \brief Emit a MachineInstr with a two immediate operands.
-  unsigned fastEmitInst_ii(unsigned MachineInstrOpcode,
-                           const TargetRegisterClass *RC, uint64_t Imm1,
-                           uint64_t Imm2);
-
   /// \brief Emit a MachineInstr for an extract_subreg from a specified index of
   /// a superregister to a specified type.
   unsigned fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill,
@@ -462,6 +450,11 @@ protected:
   /// immediate (fall-through) successor, and update the CFG.
   void fastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
 
+  /// Emit an unconditional branch to \p FalseMBB, obtains the branch weight
+  /// and adds TrueMBB and FalseMBB to the successor list.
+  void finishCondBranch(const BasicBlock *BranchBB, MachineBasicBlock *TrueMBB,
+                        MachineBasicBlock *FalseMBB);
+
   /// \brief Update the value map to include the new mapping for this
   /// instruction, or insert an extra copy to get the result in a previous
   /// determined register.
@@ -566,6 +559,9 @@ private:
   /// across heavy instructions like calls.
   void flushLocalValueMap();
 
+  /// \brief Removes dead local value instructions after SavedLastLocalvalue.
+  void removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue);
+
   /// \brief Insertion point before trying to select the current instruction.
   MachineBasicBlock::iterator SavedInsertPt;
 
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 82c762ed850f..09a9991912da 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -62,6 +62,9 @@ public:
   /// registers.
   bool CanLowerReturn;
 
+  /// True if part of the CSRs will be handled via explicit copies.
+  bool SplitCSR;
+
   /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
   /// allocated to hold a pointer to the hidden sret parameter.
   unsigned DemoteRegister;
@@ -72,7 +75,10 @@ public:
   /// ValueMap - Since we emit code for the function a basic block at a time,
   /// we must remember which virtual registers hold the values for
   /// cross-basic-block values.
-  DenseMap<const Value*, unsigned> ValueMap;
+  DenseMap<const Value *, unsigned> ValueMap;
+
+  /// Track virtual registers created for exception pointers.
+  DenseMap<const Value *, unsigned> CatchPadExceptionPointers;
 
   // Keep track of frame indices allocated for statepoints as they could be used
   // across basic block boundaries.
@@ -99,7 +105,7 @@ public:
   /// RegFixups - Registers which need to be replaced after isel is done.
   DenseMap<unsigned, unsigned> RegFixups;
 
-  /// StatepointStackSlots - A list of temporary stack slots (frame indices) 
+  /// StatepointStackSlots - A list of temporary stack slots (frame indices)
   /// used to spill values at a statepoint.  We store them here to enable
   /// reuse of the same stack slots across different statepoints in different
   /// basic blocks.
@@ -111,11 +117,6 @@ public:
   /// MBB - The current insert position inside the current block.
   MachineBasicBlock::iterator InsertPt;
 
-#ifndef NDEBUG
-  SmallPtrSet<const Instruction *, 8> CatchInfoLost;
-  SmallPtrSet<const Instruction *, 8> CatchInfoFound;
-#endif
-
   struct LiveOutInfo {
     unsigned NumSignBits : 31;
     bool IsValid : 1;
@@ -161,10 +162,13 @@ public:
   }
 
   unsigned CreateReg(MVT VT);
-  
+
   unsigned CreateRegs(Type *Ty);
-  
+
   unsigned InitializeRegForValue(const Value *V) {
+    // Tokens never live in vregs.
+    if (V->getType()->isTokenTy())
+      return 0;
     unsigned &R = ValueMap[V];
     assert(R == 0 && "Already initialized this value register!");
     return R = CreateRegs(V->getType());
@@ -231,6 +235,9 @@ public:
   /// getArgumentFrameIndex - Get frame index for the byval argument.
   int getArgumentFrameIndex(const Argument *A);
 
+  unsigned getCatchPadExceptionPointerVReg(const Value *CPI,
+                                           const TargetRegisterClass *RC);
+
 private:
   void addSEHHandlersForLPads(ArrayRef<const LandingPadInst *> LPads);
 
diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h
index e883bd196ea3..163117b0781c 100644
--- a/include/llvm/CodeGen/GCMetadata.h
+++ b/include/llvm/CodeGen/GCMetadata.h
@@ -160,9 +160,9 @@ class GCModuleInfo : public ImmutablePass {
 public:
   /// Lookup the GCStrategy object associated with the given gc name.
   /// Objects are owned internally; No caller should attempt to delete the
-  /// returned objects. 
+  /// returned objects.
   GCStrategy *getGCStrategy(const StringRef Name);
-  
+
   /// List of per function info objects.  In theory, Each of these
   /// may be associated with a different GC.
   typedef std::vector<std::unique_ptr<GCFunctionInfo>> FuncInfoVec;
diff --git a/include/llvm/CodeGen/GCStrategy.h b/include/llvm/CodeGen/GCStrategy.h
index a1b8e895898f..3088a86a3260 100644
--- a/include/llvm/CodeGen/GCStrategy.h
+++ b/include/llvm/CodeGen/GCStrategy.h
@@ -117,11 +117,11 @@ public:
   /** @name Statepoint Specific Properties */
   ///@{
 
-  /// If the value specified can be reliably distinguished, returns true for
+  /// If the type specified can be reliably distinguished, returns true for
   /// pointers to GC managed locations and false for pointers to non-GC
   /// managed locations.  Note a GCStrategy can always return 'None' (i.e. an
   /// empty optional indicating it can't reliably distinguish.
-  virtual Optional<bool> isGCManagedPointer(const Value *V) const {
+  virtual Optional<bool> isGCManagedPointer(const Type *Ty) const {
     return None;
   }
   ///@}
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index fa44301a2d4a..158ff3cd36a8 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -108,6 +108,10 @@ namespace ISD {
     /// and returns an outchain.
     EH_SJLJ_LONGJMP,
 
+    /// OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN)
+    /// The target initializes the dispatch table here.
+    EH_SJLJ_SETUP_DISPATCH,
+
     /// TargetConstant* - Like Constant*, but the DAG does not do any folding,
     /// simplification, or lowering of the constant. They are used for constants
     /// which are known to fit in the immediate fields of their users, or for
@@ -332,7 +336,7 @@ namespace ISD {
     SHL, SRA, SRL, ROTL, ROTR,
 
     /// Byte Swap and Counting operators.
-    BSWAP, CTTZ, CTLZ, CTPOP,
+    BSWAP, CTTZ, CTLZ, CTPOP, BITREVERSE,
 
     /// Bit counting operators with an undefined result for zero inputs.
     CTTZ_ZERO_UNDEF, CTLZ_ZERO_UNDEF,
@@ -364,9 +368,14 @@ namespace ISD {
     /// then the result type must also be a vector type.
     SETCC,
 
+    /// Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but
+    /// op #2 is a *carry value*. This operator checks the result of
+    /// "LHS - RHS - Carry", and can be used to compare two wide integers:
+    /// (setcce lhshi rhshi (subc lhslo rhslo) cc). Only valid for integers.
+    SETCCE,
+
     /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
-    /// integer shift operations, just like ADD/SUB_PARTS.  The operation
-    /// ordering is:
+    /// integer shift operations.  The operation ordering is:
     ///       [Lo,Hi] = op [LoLHS,HiLHS], Amt
     SHL_PARTS, SRA_PARTS, SRL_PARTS,
 
@@ -506,7 +515,15 @@ namespace ISD {
     FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
     FLOG, FLOG2, FLOG10, FEXP, FEXP2,
     FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR,
+    /// FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two
+    /// values.
+    /// In the case where a single input is NaN, the non-NaN input is returned.
+    ///
+    /// The return value of (FMINNUM 0.0, -0.0) could be either 0.0 or -0.0.
     FMINNUM, FMAXNUM,
+    /// FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that
+    /// when a single input is NaN, NaN is returned.
+    FMINNAN, FMAXNAN,
 
     /// FSINCOS - Compute both fsin and fcos as a single operation.
     FSINCOS,
@@ -575,6 +592,18 @@ namespace ISD {
     /// take a chain as input and return a chain.
     EH_LABEL,
 
+    /// CATCHPAD - Represents a catchpad instruction.
+    CATCHPAD,
+
+    /// CATCHRET - Represents a return from a catch block funclet. Used for
+    /// MSVC compatible exception handling. Takes a chain operand and a
+    /// destination basic block operand.
+    CATCHRET,
+
+    /// CLEANUPRET - Represents a return from a cleanup block funclet.  Used for
+    /// MSVC compatible exception handling. Takes only a chain operand.
+    CLEANUPRET,
+
     /// STACKSAVE - STACKSAVE has one operand, an input chain.  It produces a
     /// value, the same type as the pointer type for the system, and an output
     /// chain.
@@ -618,9 +647,11 @@ namespace ISD {
     PCMARKER,
 
     /// READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
-    /// The only operand is a chain and a value and a chain are produced.  The
-    /// value is the contents of the architecture specific cycle counter like
-    /// register (or other high accuracy low latency clock source)
+    /// It produces a chain and one i64 value. The only operand is a chain.
+    /// If i64 is not legal, the result will be expanded into smaller values.
+    /// Still, it returns an i64, so targets should set legality for i64.
+    /// The result is the content of the architecture-specific cycle
+    /// counter-like register (or other high accuracy low latency clock source).
     READCYCLECOUNTER,
 
     /// HANDLENODE node - Used as a handle for various purposes.
@@ -719,6 +750,12 @@ namespace ISD {
     GC_TRANSITION_START,
     GC_TRANSITION_END,
 
+    /// GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of
+    /// the most recent dynamic alloca. For most targets that would be 0, but
+    /// for some others (e.g. PowerPC, PowerPC64) that would be compile-time
+    /// known nonzero constant. The only operand here is the chain.
+    GET_DYNAMIC_AREA_OFFSET,
+
     /// BUILTIN_OP_END - This must be the last enum value in this list.
     /// The target-specific pre-isel opcode values start here.
     BUILTIN_OP_END
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 9e6ab7d45977..a404b9b70d3a 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -19,41 +19,40 @@
 #include "llvm/IR/Intrinsics.h"
 
 namespace llvm {
-  class CallInst;
-  class Module;
-  class DataLayout;
+class CallInst;
+class Module;
+class DataLayout;
 
-  class IntrinsicLowering {
-    const DataLayout& DL;
+class IntrinsicLowering {
+  const DataLayout &DL;
 
-    
-    bool Warned;
-  public:
-    explicit IntrinsicLowering(const DataLayout &DL) :
-      DL(DL), Warned(false) {}
+  bool Warned;
 
-    /// AddPrototypes - This method, if called, causes all of the prototypes
-    /// that might be needed by an intrinsic lowering implementation to be
-    /// inserted into the module specified.
-    void AddPrototypes(Module &M);
+public:
+  explicit IntrinsicLowering(const DataLayout &DL) : DL(DL), Warned(false) {}
 
-    /// LowerIntrinsicCall - This method replaces a call with the LLVM function
-    /// which should be used to implement the specified intrinsic function call.
-    /// If an intrinsic function must be implemented by the code generator 
-    /// (such as va_start), this function should print a message and abort.
-    ///
-    /// Otherwise, if an intrinsic function call can be lowered, the code to
-    /// implement it (often a call to a non-intrinsic function) is inserted
-    /// _after_ the call instruction and the call is deleted.  The caller must
-    /// be capable of handling this kind of change.
-    ///
-    void LowerIntrinsicCall(CallInst *CI);
+  /// AddPrototypes - This method, if called, causes all of the prototypes
+  /// that might be needed by an intrinsic lowering implementation to be
+  /// inserted into the module specified.
+  void AddPrototypes(Module &M);
 
-    /// LowerToByteSwap - Replace a call instruction into a call to bswap
-    /// intrinsic. Return false if it has determined the call is not a
-    /// simple integer bswap.
-    static bool LowerToByteSwap(CallInst *CI);
-  };
+  /// LowerIntrinsicCall - This method replaces a call with the LLVM function
+  /// which should be used to implement the specified intrinsic function call.
+  /// If an intrinsic function must be implemented by the code generator
+  /// (such as va_start), this function should print a message and abort.
+  ///
+  /// Otherwise, if an intrinsic function call can be lowered, the code to
+  /// implement it (often a call to a non-intrinsic function) is inserted
+  /// _after_ the call instruction and the call is deleted.  The caller must
+  /// be capable of handling this kind of change.
+  ///
+  void LowerIntrinsicCall(CallInst *CI);
+
+  /// LowerToByteSwap - Replace a call instruction into a call to bswap
+  /// intrinsic. Return false if it has determined the call is not a
+  /// simple integer bswap.
+  static bool LowerToByteSwap(CallInst *CI);
+};
 }
 
 #endif
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 9b8b91c9b80e..0157bf9117e5 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -25,6 +25,7 @@
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
 #include <climits>
 #include <set>
@@ -595,15 +596,15 @@ namespace llvm {
     class SubRange : public LiveRange {
     public:
       SubRange *Next;
-      unsigned LaneMask;
+      LaneBitmask LaneMask;
 
       /// Constructs a new SubRange object.
-      SubRange(unsigned LaneMask)
+      SubRange(LaneBitmask LaneMask)
         : Next(nullptr), LaneMask(LaneMask) {
       }
 
       /// Constructs a new SubRange object by copying liveness from @p Other.
-      SubRange(unsigned LaneMask, const LiveRange &Other,
+      SubRange(LaneBitmask LaneMask, const LiveRange &Other,
                BumpPtrAllocator &Allocator)
         : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) {
       }
@@ -677,7 +678,8 @@ namespace llvm {
 
     /// Creates a new empty subregister live range. The range is added at the
     /// beginning of the subrange list; subrange iterators stay valid.
-    SubRange *createSubRange(BumpPtrAllocator &Allocator, unsigned LaneMask) {
+    SubRange *createSubRange(BumpPtrAllocator &Allocator,
+                             LaneBitmask LaneMask) {
       SubRange *Range = new (Allocator) SubRange(LaneMask);
       appendSubRange(Range);
       return Range;
@@ -685,7 +687,8 @@ namespace llvm {
 
     /// Like createSubRange() but the new range is filled with a copy of the
     /// liveness information in @p CopyFrom.
-    SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator, unsigned LaneMask,
+    SubRange *createSubRangeFrom(BumpPtrAllocator &Allocator,
+                                 LaneBitmask LaneMask,
                                  const LiveRange &CopyFrom) {
       SubRange *Range = new (Allocator) SubRange(LaneMask, CopyFrom, Allocator);
       appendSubRange(Range);
@@ -842,11 +845,6 @@ namespace llvm {
     LiveIntervals &LIS;
     IntEqClasses EqClass;
 
-    // Note that values a and b are connected.
-    void Connect(unsigned a, unsigned b);
-
-    unsigned Renumber();
-
   public:
     explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {}
 
@@ -858,12 +856,12 @@ namespace llvm {
     /// the equivalence class assigned the VNI.
     unsigned getEqClass(const VNInfo *VNI) const { return EqClass[VNI->id]; }
 
-    /// Distribute - Distribute values in LIV[0] into a separate LiveInterval
-    /// for each connected component. LIV must have a LiveInterval for each
-    /// connected component. The LiveIntervals in Liv[1..] must be empty.
-    /// Instructions using LIV[0] are rewritten.
-    void Distribute(LiveInterval *LIV[], MachineRegisterInfo &MRI);
-
+    /// Distribute values in \p LI into a separate LiveIntervals
+    /// for each connected component. LIV must have an empty LiveInterval for
+    /// each additional connected component. The first connected component is
+    /// left in \p LI.
+    void Distribute(LiveInterval &LI, LiveInterval *LIV[],
+                    MachineRegisterInfo &MRI);
   };
 
 }
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 9673f80e0856..87421e2f83b4 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -22,6 +22,7 @@
 
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -36,7 +37,6 @@ namespace llvm {
 
 extern cl::opt<bool> UseSegmentSetForPhysRegs;
 
-  class AliasAnalysis;
   class BitVector;
   class BlockFrequency;
   class LiveRangeCalc;
@@ -147,13 +147,12 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     LiveInterval::Segment addSegmentToEndOfBlock(unsigned reg,
                                                  MachineInstr* startInst);
 
-    /// shrinkToUses - After removing some uses of a register, shrink its live
-    /// range to just the remaining uses. This method does not compute reaching
-    /// defs for new uses, and it doesn't remove dead defs.
-    /// Dead PHIDef values are marked as unused.
-    /// New dead machine instructions are added to the dead vector.
-    /// Return true if the interval may have been separated into multiple
-    /// connected components.
+    /// After removing some uses of a register, shrink its live range to just
+    /// the remaining uses. This method does not compute reaching defs for new
+    /// uses, and it doesn't remove dead defs.
+    /// Dead PHIDef values are marked as unused. New dead machine instructions
+    /// are added to the dead vector. Returns true if the interval may have been
+    /// separated into multiple connected components.
     bool shrinkToUses(LiveInterval *li,
                       SmallVectorImpl<MachineInstr*> *dead = nullptr);
 
@@ -161,6 +160,8 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// shrinkToUses(LiveInterval *li, SmallVectorImpl<MachineInstr*> *dead)
     /// that works on a subregister live range and only looks at uses matching
     /// the lane mask of the subregister range.
+    /// This may leave the subrange empty which needs to be cleaned up with
+    /// LiveInterval::removeEmptySubranges() afterwards.
     void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
 
     /// extendToIndices - Extend the live range of LI to reach all points in
@@ -257,11 +258,6 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
       Indexes->replaceMachineInstrInMaps(MI, NewMI);
     }
 
-    bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
-                        SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
-      return Indexes->findLiveInMBBs(Start, End, MBBs);
-    }
-
     VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -406,6 +402,10 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// that start at position @p Pos.
     void removeVRegDefAt(LiveInterval &LI, SlotIndex Pos);
 
+    /// Split separate components in LiveInterval \p LI into separate intervals.
+    void splitSeparateComponents(LiveInterval &LI,
+                                 SmallVectorImpl<LiveInterval*> &SplitLIs);
+
   private:
     /// Compute live intervals for all virtual registers.
     void computeVirtRegs();
@@ -440,7 +440,7 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     void repairOldRegInRange(MachineBasicBlock::iterator Begin,
                              MachineBasicBlock::iterator End,
                              const SlotIndex endIdx, LiveRange &LR,
-                             unsigned Reg, unsigned LaneMask = ~0u);
+                             unsigned Reg, LaneBitmask LaneMask = ~0u);
 
     class HMEditor;
   };
diff --git a/include/llvm/CodeGen/LivePhysRegs.h b/include/llvm/CodeGen/LivePhysRegs.h
index 6475e7b4af37..3bdf5ae8d013 100644
--- a/include/llvm/CodeGen/LivePhysRegs.h
+++ b/include/llvm/CodeGen/LivePhysRegs.h
@@ -109,7 +109,7 @@ public:
   /// \brief Simulates liveness when stepping forward over an
   /// instruction(bundle): Remove killed-uses, add defs. This is the not
   /// recommended way, because it depends on accurate kill flags. If possible
-  /// use stepBackwards() instead of this function.
+  /// use stepBackward() instead of this function.
   /// The clobbers set will be the list of registers either defined or clobbered
   /// by a regmask.  The operand will identify whether this is a regmask or
   /// register operand.
@@ -122,9 +122,9 @@ public:
   void addLiveIns(const MachineBasicBlock *MBB, bool AddPristines = false);
 
   /// \brief Adds all live-out registers of basic block @p MBB; After prologue/
-  /// epilogue insertion \p AddPristines should be set to true to insert the
-  /// pristine registers.
-  void addLiveOuts(const MachineBasicBlock *MBB, bool AddPristines = false);
+  /// epilogue insertion \p AddPristinesAndCSRs should be set to true.
+  void addLiveOuts(const MachineBasicBlock *MBB,
+                   bool AddPristinesAndCSRs = false);
 
   typedef SparseSet<unsigned>::const_iterator const_iterator;
   const_iterator begin() const { return LiveRegs.begin(); }
diff --git a/include/llvm/CodeGen/LiveRangeEdit.h b/include/llvm/CodeGen/LiveRangeEdit.h
index c97c636abbb4..2271e3352aa2 100644
--- a/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/include/llvm/CodeGen/LiveRangeEdit.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -28,7 +29,6 @@
 
 namespace llvm {
 
-class AliasAnalysis;
 class LiveIntervals;
 class MachineBlockFrequencyInfo;
 class MachineLoopInfo;
diff --git a/include/llvm/CodeGen/LiveRegMatrix.h b/include/llvm/CodeGen/LiveRegMatrix.h
index 86a0c7bd626f..e169058ca563 100644
--- a/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/include/llvm/CodeGen/LiveRegMatrix.h
@@ -32,13 +32,11 @@ namespace llvm {
 
 class LiveInterval;
 class LiveIntervalAnalysis;
-class MachineRegisterInfo;
 class TargetRegisterInfo;
 class VirtRegMap;
 
 class LiveRegMatrix : public MachineFunctionPass {
   const TargetRegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
   LiveIntervals *LIS;
   VirtRegMap *VRM;
 
diff --git a/include/llvm/CodeGen/LiveStackAnalysis.h b/include/llvm/CodeGen/LiveStackAnalysis.h
index f495507c66ec..3ffbe3d775b4 100644
--- a/include/llvm/CodeGen/LiveStackAnalysis.h
+++ b/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -25,76 +25,74 @@
 
 namespace llvm {
 
-  class LiveStacks : public MachineFunctionPass {
-    const TargetRegisterInfo *TRI;
+class LiveStacks : public MachineFunctionPass {
+  const TargetRegisterInfo *TRI;
 
-    /// Special pool allocator for VNInfo's (LiveInterval val#).
-    ///
-    VNInfo::Allocator VNInfoAllocator;
+  /// Special pool allocator for VNInfo's (LiveInterval val#).
+  ///
+  VNInfo::Allocator VNInfoAllocator;
 
-    /// S2IMap - Stack slot indices to live interval mapping.
-    ///
-    typedef std::unordered_map<int, LiveInterval> SS2IntervalMap;
-    SS2IntervalMap S2IMap;
+  /// S2IMap - Stack slot indices to live interval mapping.
+  ///
+  typedef std::unordered_map<int, LiveInterval> SS2IntervalMap;
+  SS2IntervalMap S2IMap;
 
-    /// S2RCMap - Stack slot indices to register class mapping.
-    std::map<int, const TargetRegisterClass*> S2RCMap;
-    
-  public:
-    static char ID; // Pass identification, replacement for typeid
-    LiveStacks() : MachineFunctionPass(ID) {
-      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-    }
+  /// S2RCMap - Stack slot indices to register class mapping.
+  std::map<int, const TargetRegisterClass *> S2RCMap;
 
-    typedef SS2IntervalMap::iterator iterator;
-    typedef SS2IntervalMap::const_iterator const_iterator;
-    const_iterator begin() const { return S2IMap.begin(); }
-    const_iterator end() const { return S2IMap.end(); }
-    iterator begin() { return S2IMap.begin(); }
-    iterator end() { return S2IMap.end(); }
+public:
+  static char ID; // Pass identification, replacement for typeid
+  LiveStacks() : MachineFunctionPass(ID) {
+    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  }
 
-    unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
+  typedef SS2IntervalMap::iterator iterator;
+  typedef SS2IntervalMap::const_iterator const_iterator;
+  const_iterator begin() const { return S2IMap.begin(); }
+  const_iterator end() const { return S2IMap.end(); }
+  iterator begin() { return S2IMap.begin(); }
+  iterator end() { return S2IMap.end(); }
 
-    LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
+  unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
 
-    LiveInterval &getInterval(int Slot) {
-      assert(Slot >= 0 && "Spill slot indice must be >= 0");
-      SS2IntervalMap::iterator I = S2IMap.find(Slot);
-      assert(I != S2IMap.end() && "Interval does not exist for stack slot");
-      return I->second;
-    }
+  LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
 
-    const LiveInterval &getInterval(int Slot) const {
-      assert(Slot >= 0 && "Spill slot indice must be >= 0");
-      SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
-      assert(I != S2IMap.end() && "Interval does not exist for stack slot");
-      return I->second;
-    }
+  LiveInterval &getInterval(int Slot) {
+    assert(Slot >= 0 && "Spill slot indice must be >= 0");
+    SS2IntervalMap::iterator I = S2IMap.find(Slot);
+    assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+    return I->second;
+  }
 
-    bool hasInterval(int Slot) const {
-      return S2IMap.count(Slot);
-    }
+  const LiveInterval &getInterval(int Slot) const {
+    assert(Slot >= 0 && "Spill slot indice must be >= 0");
+    SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
+    assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+    return I->second;
+  }
 
-    const TargetRegisterClass *getIntervalRegClass(int Slot) const {
-      assert(Slot >= 0 && "Spill slot indice must be >= 0");
-      std::map<int, const TargetRegisterClass*>::const_iterator
-        I = S2RCMap.find(Slot);
-      assert(I != S2RCMap.end() &&
-             "Register class info does not exist for stack slot");
-      return I->second;
-    }
+  bool hasInterval(int Slot) const { return S2IMap.count(Slot); }
 
-    VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
+  const TargetRegisterClass *getIntervalRegClass(int Slot) const {
+    assert(Slot >= 0 && "Spill slot indice must be >= 0");
+    std::map<int, const TargetRegisterClass *>::const_iterator I =
+        S2RCMap.find(Slot);
+    assert(I != S2RCMap.end() &&
+           "Register class info does not exist for stack slot");
+    return I->second;
+  }
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    void releaseMemory() override;
+  VNInfo::Allocator &getVNInfoAllocator() { return VNInfoAllocator; }
 
-    /// runOnMachineFunction - pass entry point
-    bool runOnMachineFunction(MachineFunction&) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
 
-    /// print - Implement the dump method.
-    void print(raw_ostream &O, const Module* = nullptr) const override;
-  };
+  /// runOnMachineFunction - pass entry point
+  bool runOnMachineFunction(MachineFunction &) override;
+
+  /// print - Implement the dump method.
+  void print(raw_ostream &O, const Module * = nullptr) const override;
+};
 }
 
 #endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */
diff --git a/include/llvm/CodeGen/MIRParser/MIRParser.h b/include/llvm/CodeGen/MIRParser/MIRParser.h
index 67b756d5e886..a569d5ec1f5e 100644
--- a/include/llvm/CodeGen/MIRParser/MIRParser.h
+++ b/include/llvm/CodeGen/MIRParser/MIRParser.h
@@ -1,4 +1,4 @@
-//===- MIRParser.h - MIR serialization format parser ----------------------===//
+//===- MIRParser.h - MIR serialization format parser ------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -37,7 +37,7 @@ class MIRParser : public MachineFunctionInitializer {
 public:
   MIRParser(std::unique_ptr<MIRParserImpl> Impl);
   MIRParser(const MIRParser &) = delete;
-  ~MIRParser();
+  ~MIRParser() override;
 
   /// Parse the optional LLVM IR module that's embedded in the MIR file.
   ///
@@ -78,4 +78,4 @@ createMIRParser(std::unique_ptr<MemoryBuffer> Contents, LLVMContext &Context);
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_MIRPARSER_MIRPARSER_H
diff --git a/include/llvm/CodeGen/MIRYamlMapping.h b/include/llvm/CodeGen/MIRYamlMapping.h
index 9798e5cef645..14d3744741c5 100644
--- a/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/include/llvm/CodeGen/MIRYamlMapping.h
@@ -19,6 +19,7 @@
 #define LLVM_LIB_CODEGEN_MIRYAMLMAPPING_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <vector>
 
@@ -72,54 +73,109 @@ template <> struct ScalarTraits<FlowStringValue> {
   static bool mustQuote(StringRef Scalar) { return needsQuotes(Scalar); }
 };
 
+struct BlockStringValue {
+  StringValue Value;
+};
+
+template <> struct BlockScalarTraits<BlockStringValue> {
+  static void output(const BlockStringValue &S, void *Ctx, raw_ostream &OS) {
+    return ScalarTraits<StringValue>::output(S.Value, Ctx, OS);
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, BlockStringValue &S) {
+    return ScalarTraits<StringValue>::input(Scalar, Ctx, S.Value);
+  }
+};
+
+/// A wrapper around unsigned which contains a source range that's being set
+/// during parsing.
+struct UnsignedValue {
+  unsigned Value;
+  SMRange SourceRange;
+
+  UnsignedValue() : Value(0) {}
+  UnsignedValue(unsigned Value) : Value(Value) {}
+
+  bool operator==(const UnsignedValue &Other) const {
+    return Value == Other.Value;
+  }
+};
+
+template <> struct ScalarTraits<UnsignedValue> {
+  static void output(const UnsignedValue &Value, void *Ctx, raw_ostream &OS) {
+    return ScalarTraits<unsigned>::output(Value.Value, Ctx, OS);
+  }
+
+  static StringRef input(StringRef Scalar, void *Ctx, UnsignedValue &Value) {
+    if (const auto *Node =
+            reinterpret_cast<yaml::Input *>(Ctx)->getCurrentNode())
+      Value.SourceRange = Node->getSourceRange();
+    return ScalarTraits<unsigned>::input(Scalar, Ctx, Value.Value);
+  }
+
+  static bool mustQuote(StringRef Scalar) {
+    return ScalarTraits<unsigned>::mustQuote(Scalar);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<MachineJumpTableInfo::JTEntryKind> {
+  static void enumeration(yaml::IO &IO,
+                          MachineJumpTableInfo::JTEntryKind &EntryKind) {
+    IO.enumCase(EntryKind, "block-address",
+                MachineJumpTableInfo::EK_BlockAddress);
+    IO.enumCase(EntryKind, "gp-rel64-block-address",
+                MachineJumpTableInfo::EK_GPRel64BlockAddress);
+    IO.enumCase(EntryKind, "gp-rel32-block-address",
+                MachineJumpTableInfo::EK_GPRel32BlockAddress);
+    IO.enumCase(EntryKind, "label-difference32",
+                MachineJumpTableInfo::EK_LabelDifference32);
+    IO.enumCase(EntryKind, "inline", MachineJumpTableInfo::EK_Inline);
+    IO.enumCase(EntryKind, "custom32", MachineJumpTableInfo::EK_Custom32);
+  }
+};
+
 } // end namespace yaml
 } // end namespace llvm
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::StringValue)
 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::FlowStringValue)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::UnsignedValue)
 
 namespace llvm {
 namespace yaml {
 
 struct VirtualRegisterDefinition {
-  unsigned ID;
+  UnsignedValue ID;
   StringValue Class;
-  // TODO: Serialize the virtual register hints.
+  StringValue PreferredRegister;
+  // TODO: Serialize the target specific register hints.
 };
 
 template <> struct MappingTraits<VirtualRegisterDefinition> {
   static void mapping(IO &YamlIO, VirtualRegisterDefinition &Reg) {
     YamlIO.mapRequired("id", Reg.ID);
     YamlIO.mapRequired("class", Reg.Class);
+    YamlIO.mapOptional("preferred-register", Reg.PreferredRegister,
+                       StringValue()); // Don't print out when it's empty.
   }
 
   static const bool flow = true;
 };
 
-struct MachineBasicBlock {
-  unsigned ID;
-  StringValue Name;
-  unsigned Alignment = 0;
-  bool IsLandingPad = false;
-  bool AddressTaken = false;
-  // TODO: Serialize the successor weights.
-  std::vector<FlowStringValue> Successors;
-  std::vector<FlowStringValue> LiveIns;
-  std::vector<StringValue> Instructions;
+struct MachineFunctionLiveIn {
+  StringValue Register;
+  StringValue VirtualRegister;
 };
 
-template <> struct MappingTraits<MachineBasicBlock> {
-  static void mapping(IO &YamlIO, MachineBasicBlock &MBB) {
-    YamlIO.mapRequired("id", MBB.ID);
-    YamlIO.mapOptional("name", MBB.Name,
-                       StringValue()); // Don't print out an empty name.
-    YamlIO.mapOptional("alignment", MBB.Alignment);
-    YamlIO.mapOptional("isLandingPad", MBB.IsLandingPad);
-    YamlIO.mapOptional("addressTaken", MBB.AddressTaken);
-    YamlIO.mapOptional("successors", MBB.Successors);
-    YamlIO.mapOptional("liveins", MBB.LiveIns);
-    YamlIO.mapOptional("instructions", MBB.Instructions);
+template <> struct MappingTraits<MachineFunctionLiveIn> {
+  static void mapping(IO &YamlIO, MachineFunctionLiveIn &LiveIn) {
+    YamlIO.mapRequired("reg", LiveIn.Register);
+    YamlIO.mapOptional(
+        "virtual-reg", LiveIn.VirtualRegister,
+        StringValue()); // Don't print the virtual register when it's empty.
   }
+
+  static const bool flow = true;
 };
 
 /// Serializable representation of stack object from the MachineFrameInfo class.
@@ -128,16 +184,21 @@ template <> struct MappingTraits<MachineBasicBlock> {
 /// determined by the object's type and frame information flags.
 /// Dead stack objects aren't serialized.
 ///
-/// TODO: Determine isPreallocated flag by mapping between objects and local
-/// objects (Serialize local objects).
+/// The 'isPreallocated' flag is determined by the local offset.
 struct MachineStackObject {
   enum ObjectType { DefaultType, SpillSlot, VariableSized };
-  // TODO: Serialize LLVM alloca reference.
-  unsigned ID;
+  UnsignedValue ID;
+  StringValue Name;
+  // TODO: Serialize unnamed LLVM alloca reference.
   ObjectType Type = DefaultType;
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
+  StringValue CalleeSavedRegister;
+  Optional<int64_t> LocalOffset;
+  StringValue DebugVar;
+  StringValue DebugExpr;
+  StringValue DebugLoc;
 };
 
 template <> struct ScalarEnumerationTraits<MachineStackObject::ObjectType> {
@@ -151,6 +212,8 @@ template <> struct ScalarEnumerationTraits<MachineStackObject::ObjectType> {
 template <> struct MappingTraits<MachineStackObject> {
   static void mapping(yaml::IO &YamlIO, MachineStackObject &Object) {
     YamlIO.mapRequired("id", Object.ID);
+    YamlIO.mapOptional("name", Object.Name,
+                       StringValue()); // Don't print out an empty name.
     YamlIO.mapOptional(
         "type", Object.Type,
         MachineStackObject::DefaultType); // Don't print the default type.
@@ -158,6 +221,15 @@ template <> struct MappingTraits<MachineStackObject> {
     if (Object.Type != MachineStackObject::VariableSized)
       YamlIO.mapRequired("size", Object.Size);
     YamlIO.mapOptional("alignment", Object.Alignment);
+    YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("local-offset", Object.LocalOffset);
+    YamlIO.mapOptional("di-variable", Object.DebugVar,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("di-expression", Object.DebugExpr,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("di-location", Object.DebugLoc,
+                       StringValue()); // Don't print it out when it's empty.
   }
 
   static const bool flow = true;
@@ -167,13 +239,14 @@ template <> struct MappingTraits<MachineStackObject> {
 /// MachineFrameInfo class.
 struct FixedMachineStackObject {
   enum ObjectType { DefaultType, SpillSlot };
-  unsigned ID;
+  UnsignedValue ID;
   ObjectType Type = DefaultType;
   int64_t Offset = 0;
   uint64_t Size = 0;
   unsigned Alignment = 0;
   bool IsImmutable = false;
   bool IsAliased = false;
+  StringValue CalleeSavedRegister;
 };
 
 template <>
@@ -198,22 +271,64 @@ template <> struct MappingTraits<FixedMachineStackObject> {
       YamlIO.mapOptional("isImmutable", Object.IsImmutable);
       YamlIO.mapOptional("isAliased", Object.IsAliased);
     }
+    YamlIO.mapOptional("callee-saved-register", Object.CalleeSavedRegister,
+                       StringValue()); // Don't print it out when it's empty.
   }
 
   static const bool flow = true;
 };
 
+struct MachineConstantPoolValue {
+  UnsignedValue ID;
+  StringValue Value;
+  unsigned Alignment = 0;
+};
+
+template <> struct MappingTraits<MachineConstantPoolValue> {
+  static void mapping(IO &YamlIO, MachineConstantPoolValue &Constant) {
+    YamlIO.mapRequired("id", Constant.ID);
+    YamlIO.mapOptional("value", Constant.Value);
+    YamlIO.mapOptional("alignment", Constant.Alignment);
+  }
+};
+
+struct MachineJumpTable {
+  struct Entry {
+    UnsignedValue ID;
+    std::vector<FlowStringValue> Blocks;
+  };
+
+  MachineJumpTableInfo::JTEntryKind Kind = MachineJumpTableInfo::EK_Custom32;
+  std::vector<Entry> Entries;
+};
+
+template <> struct MappingTraits<MachineJumpTable::Entry> {
+  static void mapping(IO &YamlIO, MachineJumpTable::Entry &Entry) {
+    YamlIO.mapRequired("id", Entry.ID);
+    YamlIO.mapOptional("blocks", Entry.Blocks);
+  }
+};
+
 } // end namespace yaml
 } // end namespace llvm
 
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineBasicBlock)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineJumpTable::Entry)
 
 namespace llvm {
 namespace yaml {
 
+template <> struct MappingTraits<MachineJumpTable> {
+  static void mapping(IO &YamlIO, MachineJumpTable &JT) {
+    YamlIO.mapRequired("kind", JT.Kind);
+    YamlIO.mapOptional("entries", JT.Entries);
+  }
+};
+
 /// Serializable representation of MachineFrameInfo.
 ///
 /// Doesn't serialize attributes like 'StackAlignment', 'IsStackRealignable' and
@@ -231,14 +346,14 @@ struct MachineFrameInfo {
   unsigned MaxAlignment = 0;
   bool AdjustsStack = false;
   bool HasCalls = false;
-  // TODO: Serialize StackProtectorIdx and FunctionContextIdx
+  StringValue StackProtector;
+  // TODO: Serialize FunctionContextIdx
   unsigned MaxCallFrameSize = 0;
-  // TODO: Serialize callee saved info.
-  // TODO: Serialize local frame objects.
   bool HasOpaqueSPAdjustment = false;
   bool HasVAStart = false;
   bool HasMustTailInVarArgFunc = false;
-  // TODO: Serialize save and restore MBB references.
+  StringValue SavePoint;
+  StringValue RestorePoint;
 };
 
 template <> struct MappingTraits<MachineFrameInfo> {
@@ -252,10 +367,16 @@ template <> struct MappingTraits<MachineFrameInfo> {
     YamlIO.mapOptional("maxAlignment", MFI.MaxAlignment);
     YamlIO.mapOptional("adjustsStack", MFI.AdjustsStack);
     YamlIO.mapOptional("hasCalls", MFI.HasCalls);
+    YamlIO.mapOptional("stackProtector", MFI.StackProtector,
+                       StringValue()); // Don't print it out when it's empty.
     YamlIO.mapOptional("maxCallFrameSize", MFI.MaxCallFrameSize);
     YamlIO.mapOptional("hasOpaqueSPAdjustment", MFI.HasOpaqueSPAdjustment);
     YamlIO.mapOptional("hasVAStart", MFI.HasVAStart);
     YamlIO.mapOptional("hasMustTailInVarArgFunc", MFI.HasMustTailInVarArgFunc);
+    YamlIO.mapOptional("savePoint", MFI.SavePoint,
+                       StringValue()); // Don't print it out when it's empty.
+    YamlIO.mapOptional("restorePoint", MFI.RestorePoint,
+                       StringValue()); // Don't print it out when it's empty.
   }
 };
 
@@ -269,14 +390,16 @@ struct MachineFunction {
   bool TracksRegLiveness = false;
   bool TracksSubRegLiveness = false;
   std::vector<VirtualRegisterDefinition> VirtualRegisters;
+  std::vector<MachineFunctionLiveIn> LiveIns;
+  Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
   // TODO: Serialize the various register masks.
-  // TODO: Serialize live in registers.
   // Frame information
   MachineFrameInfo FrameInfo;
   std::vector<FixedMachineStackObject> FixedStackObjects;
   std::vector<MachineStackObject> StackObjects;
-
-  std::vector<MachineBasicBlock> BasicBlocks;
+  std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
+  MachineJumpTable JumpTableInfo;
+  BlockStringValue Body;
 };
 
 template <> struct MappingTraits<MachineFunction> {
@@ -289,10 +412,15 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
     YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness);
     YamlIO.mapOptional("registers", MF.VirtualRegisters);
+    YamlIO.mapOptional("liveins", MF.LiveIns);
+    YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters);
     YamlIO.mapOptional("frameInfo", MF.FrameInfo);
     YamlIO.mapOptional("fixedStack", MF.FixedStackObjects);
     YamlIO.mapOptional("stack", MF.StackObjects);
-    YamlIO.mapOptional("body", MF.BasicBlocks);
+    YamlIO.mapOptional("constants", MF.Constants);
+    if (!YamlIO.outputting() || !MF.JumpTableInfo.Entries.empty())
+      YamlIO.mapOptional("jumpTable", MF.JumpTableInfo);
+    YamlIO.mapOptional("body", MF.Body);
   }
 };
 
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index 5e5f45cae8fb..3d58c499823e 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -16,6 +16,8 @@
 
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
 
@@ -25,11 +27,15 @@ class Pass;
 class BasicBlock;
 class MachineFunction;
 class MCSymbol;
+class MIPrinter;
 class SlotIndexes;
 class StringRef;
 class raw_ostream;
 class MachineBranchProbabilityInfo;
 
+// Forward declaration to avoid circular include problem with TargetRegisterInfo
+typedef unsigned LaneBitmask;
+
 template <>
 struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
 private:
@@ -52,57 +58,76 @@ public:
   void addNodeToList(MachineInstr* N);
   void removeNodeFromList(MachineInstr* N);
   void transferNodesFromList(ilist_traits &SrcTraits,
-                             ilist_iterator<MachineInstr> first,
-                             ilist_iterator<MachineInstr> last);
+                             ilist_iterator<MachineInstr> First,
+                             ilist_iterator<MachineInstr> Last);
   void deleteNode(MachineInstr *N);
 private:
   void createNode(const MachineInstr &);
 };
 
-class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
+class MachineBasicBlock
+    : public ilist_node_with_parent<MachineBasicBlock, MachineFunction> {
+public:
+  /// Pair of physical register and lane mask.
+  /// This is not simply a std::pair typedef because the members should be named
+  /// clearly as they both have an integer type.
+  struct RegisterMaskPair {
+  public:
+    MCPhysReg PhysReg;
+    LaneBitmask LaneMask;
+
+    RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask)
+        : PhysReg(PhysReg), LaneMask(LaneMask) {}
+  };
+
+private:
   typedef ilist<MachineInstr> Instructions;
   Instructions Insts;
   const BasicBlock *BB;
   int Number;
   MachineFunction *xParent;
 
-  /// Predecessors/Successors - Keep track of the predecessor / successor
-  /// basicblocks.
+  /// Keep track of the predecessor / successor basic blocks.
   std::vector<MachineBasicBlock *> Predecessors;
   std::vector<MachineBasicBlock *> Successors;
 
-  /// Weights - Keep track of the weights to the successors. This vector
-  /// has the same order as Successors, or it is empty if we don't use it
-  /// (disable optimization).
-  std::vector<uint32_t> Weights;
-  typedef std::vector<uint32_t>::iterator weight_iterator;
-  typedef std::vector<uint32_t>::const_iterator const_weight_iterator;
+  /// Keep track of the probabilities to the successors. This vector has the
+  /// same order as Successors, or it is empty if we don't use it (disable
+  /// optimization).
+  std::vector<BranchProbability> Probs;
+  typedef std::vector<BranchProbability>::iterator probability_iterator;
+  typedef std::vector<BranchProbability>::const_iterator
+      const_probability_iterator;
 
-  /// LiveIns - Keep track of the physical registers that are livein of
-  /// the basicblock.
-  std::vector<unsigned> LiveIns;
+  /// Keep track of the physical registers that are livein of the basicblock.
+  typedef std::vector<RegisterMaskPair> LiveInVector;
+  LiveInVector LiveIns;
 
-  /// Alignment - Alignment of the basic block. Zero if the basic block does
-  /// not need to be aligned.
-  /// The alignment is specified as log2(bytes).
-  unsigned Alignment;
+  /// Alignment of the basic block. Zero if the basic block does not need to be
+  /// aligned. The alignment is specified as log2(bytes).
+  unsigned Alignment = 0;
 
-  /// IsLandingPad - Indicate that this basic block is entered via an
-  /// exception handler.
-  bool IsLandingPad;
+  /// Indicate that this basic block is entered via an exception handler.
+  bool IsEHPad = false;
 
-  /// AddressTaken - Indicate that this basic block is potentially the
-  /// target of an indirect branch.
-  bool AddressTaken;
+  /// Indicate that this basic block is potentially the target of an indirect
+  /// branch.
+  bool AddressTaken = false;
+
+  /// Indicate that this basic block is the entry block of an EH funclet.
+  bool IsEHFuncletEntry = false;
+
+  /// Indicate that this basic block is the entry block of a cleanup funclet.
+  bool IsCleanupFuncletEntry = false;
 
   /// \brief since getSymbol is a relatively heavy-weight operation, the symbol
   /// is only computed once and is cached.
-  mutable MCSymbol *CachedMCSymbol;
+  mutable MCSymbol *CachedMCSymbol = nullptr;
 
   // Intrusive list support
   MachineBasicBlock() {}
 
-  explicit MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb);
+  explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB);
 
   ~MachineBasicBlock();
 
@@ -110,50 +135,44 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   friend class MachineFunction;
 
 public:
-  /// getBasicBlock - Return the LLVM basic block that this instance
-  /// corresponded to originally. Note that this may be NULL if this instance
-  /// does not correspond directly to an LLVM basic block.
-  ///
+  /// Return the LLVM basic block that this instance corresponded to originally.
+  /// Note that this may be NULL if this instance does not correspond directly
+  /// to an LLVM basic block.
   const BasicBlock *getBasicBlock() const { return BB; }
 
-  /// getName - Return the name of the corresponding LLVM basic block, or
-  /// "(null)".
+  /// Return the name of the corresponding LLVM basic block, or "(null)".
   StringRef getName() const;
 
-  /// getFullName - Return a formatted string to identify this block and its
-  /// parent function.
+  /// Return a formatted string to identify this block and its parent function.
   std::string getFullName() const;
 
-  /// hasAddressTaken - Test whether this block is potentially the target
-  /// of an indirect branch.
+  /// Test whether this block is potentially the target of an indirect branch.
   bool hasAddressTaken() const { return AddressTaken; }
 
-  /// setHasAddressTaken - Set this block to reflect that it potentially
-  /// is the target of an indirect branch.
+  /// Set this block to reflect that it potentially is the target of an indirect
+  /// branch.
   void setHasAddressTaken() { AddressTaken = true; }
 
-  /// getParent - Return the MachineFunction containing this basic block.
-  ///
+  /// Return the MachineFunction containing this basic block.
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
-
-  /// bundle_iterator - MachineBasicBlock iterator that automatically skips over
-  /// MIs that are inside bundles (i.e. walk top level MIs only).
+  /// MachineBasicBlock iterator that automatically skips over MIs that are
+  /// inside bundles (i.e. walk top level MIs only).
   template<typename Ty, typename IterTy>
   class bundle_iterator
     : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
     IterTy MII;
 
   public:
-    bundle_iterator(IterTy mii) : MII(mii) {}
+    bundle_iterator(IterTy MI) : MII(MI) {}
 
-    bundle_iterator(Ty &mi) : MII(mi) {
-      assert(!mi.isBundledWithPred() &&
+    bundle_iterator(Ty &MI) : MII(MI) {
+      assert(!MI.isBundledWithPred() &&
              "It's not legal to initialize bundle_iterator with a bundled MI");
     }
-    bundle_iterator(Ty *mi) : MII(mi) {
-      assert((!mi || !mi->isBundledWithPred()) &&
+    bundle_iterator(Ty *MI) : MII(MI) {
+      assert((!MI || !MI->isBundledWithPred()) &&
              "It's not legal to initialize bundle_iterator with a bundled MI");
     }
     // Template allows conversion from const to nonconst.
@@ -165,13 +184,13 @@ public:
     Ty &operator*() const { return *MII; }
     Ty *operator->() const { return &operator*(); }
 
-    operator Ty*() const { return MII; }
+    operator Ty *() const { return MII.getNodePtrUnchecked(); }
 
-    bool operator==(const bundle_iterator &x) const {
-      return MII == x.MII;
+    bool operator==(const bundle_iterator &X) const {
+      return MII == X.MII;
     }
-    bool operator!=(const bundle_iterator &x) const {
-      return !operator==(x);
+    bool operator!=(const bundle_iterator &X) const {
+      return !operator==(X);
     }
 
     // Increment and decrement operators...
@@ -247,11 +266,16 @@ public:
   reverse_iterator       rend  ()       { return instr_rend();   }
   const_reverse_iterator rend  () const { return instr_rend();   }
 
+  /// Support for MachineInstr::getNextNode().
+  static Instructions MachineBasicBlock::*getSublistAccess(MachineInstr *) {
+    return &MachineBasicBlock::Insts;
+  }
+
   inline iterator_range<iterator> terminators() {
-    return iterator_range<iterator>(getFirstTerminator(), end());
+    return make_range(getFirstTerminator(), end());
   }
   inline iterator_range<const_iterator> terminators() const {
-    return iterator_range<const_iterator>(getFirstTerminator(), end());
+    return make_range(getFirstTerminator(), end());
   }
 
   // Machine-CFG iterators
@@ -301,16 +325,16 @@ public:
   bool                 succ_empty() const { return Successors.empty();   }
 
   inline iterator_range<pred_iterator> predecessors() {
-    return iterator_range<pred_iterator>(pred_begin(), pred_end());
+    return make_range(pred_begin(), pred_end());
   }
   inline iterator_range<const_pred_iterator> predecessors() const {
-    return iterator_range<const_pred_iterator>(pred_begin(), pred_end());
+    return make_range(pred_begin(), pred_end());
   }
   inline iterator_range<succ_iterator> successors() {
-    return iterator_range<succ_iterator>(succ_begin(), succ_end());
+    return make_range(succ_begin(), succ_end());
   }
   inline iterator_range<const_succ_iterator> successors() const {
-    return iterator_range<const_succ_iterator>(succ_begin(), succ_end());
+    return make_range(succ_begin(), succ_end());
   }
 
   // LiveIn management methods.
@@ -318,131 +342,177 @@ public:
   /// Adds the specified register as a live in. Note that it is an error to add
   /// the same register to the same set more than once unless the intention is
   /// to call sortUniqueLiveIns after all registers are added.
-  void addLiveIn(unsigned Reg) { LiveIns.push_back(Reg); }
+  void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask = ~0u) {
+    LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
+  }
+  void addLiveIn(const RegisterMaskPair &RegMaskPair) {
+    LiveIns.push_back(RegMaskPair);
+  }
 
   /// Sorts and uniques the LiveIns vector. It can be significantly faster to do
   /// this than repeatedly calling isLiveIn before calling addLiveIn for every
   /// LiveIn insertion.
-  void sortUniqueLiveIns() {
-    std::sort(LiveIns.begin(), LiveIns.end());
-    LiveIns.erase(std::unique(LiveIns.begin(), LiveIns.end()), LiveIns.end());
-  }
+  void sortUniqueLiveIns();
 
   /// Add PhysReg as live in to this block, and ensure that there is a copy of
   /// PhysReg to a virtual register of class RC. Return the virtual register
   /// that is a copy of the live in PhysReg.
-  unsigned addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC);
+  unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC);
 
-  /// removeLiveIn - Remove the specified register from the live in set.
-  ///
-  void removeLiveIn(unsigned Reg);
+  /// Remove the specified register from the live in set.
+  void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u);
 
-  /// isLiveIn - Return true if the specified register is in the live in set.
-  ///
-  bool isLiveIn(unsigned Reg) const;
+  /// Return true if the specified register is in the live in set.
+  bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u) const;
 
   // Iteration support for live in sets.  These sets are kept in sorted
   // order by their register number.
-  typedef std::vector<unsigned>::const_iterator livein_iterator;
+  typedef LiveInVector::const_iterator livein_iterator;
   livein_iterator livein_begin() const { return LiveIns.begin(); }
   livein_iterator livein_end()   const { return LiveIns.end(); }
   bool            livein_empty() const { return LiveIns.empty(); }
+  iterator_range<livein_iterator> liveins() const {
+    return make_range(livein_begin(), livein_end());
+  }
 
-  /// getAlignment - Return alignment of the basic block.
-  /// The alignment is specified as log2(bytes).
-  ///
+  /// Get the clobber mask for the start of this basic block. Funclets use this
+  /// to prevent register allocation across funclet transitions.
+  const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const;
+
+  /// Get the clobber mask for the end of the basic block.
+  /// \see getBeginClobberMask()
+  const uint32_t *getEndClobberMask(const TargetRegisterInfo *TRI) const;
+
+  /// Return alignment of the basic block. The alignment is specified as
+  /// log2(bytes).
   unsigned getAlignment() const { return Alignment; }
 
-  /// setAlignment - Set alignment of the basic block.
-  /// The alignment is specified as log2(bytes).
-  ///
+  /// Set alignment of the basic block. The alignment is specified as
+  /// log2(bytes).
   void setAlignment(unsigned Align) { Alignment = Align; }
 
-  /// isLandingPad - Returns true if the block is a landing pad. That is
-  /// this basic block is entered via an exception handler.
-  bool isLandingPad() const { return IsLandingPad; }
+  /// Returns true if the block is a landing pad. That is this basic block is
+  /// entered via an exception handler.
+  bool isEHPad() const { return IsEHPad; }
 
-  /// setIsLandingPad - Indicates the block is a landing pad.  That is
-  /// this basic block is entered via an exception handler.
-  void setIsLandingPad(bool V = true) { IsLandingPad = V; }
+  /// Indicates the block is a landing pad.  That is this basic block is entered
+  /// via an exception handler.
+  void setIsEHPad(bool V = true) { IsEHPad = V; }
 
-  /// getLandingPadSuccessor - If this block has a successor that is a landing
-  /// pad, return it. Otherwise return NULL.
+  /// If this block has a successor that is a landing pad, return it. Otherwise
+  /// return NULL.
   const MachineBasicBlock *getLandingPadSuccessor() const;
 
+  bool hasEHPadSuccessor() const;
+
+  /// Returns true if this is the entry block of an EH funclet.
+  bool isEHFuncletEntry() const { return IsEHFuncletEntry; }
+
+  /// Indicates if this is the entry block of an EH funclet.
+  void setIsEHFuncletEntry(bool V = true) { IsEHFuncletEntry = V; }
+
+  /// Returns true if this is the entry block of a cleanup funclet.
+  bool isCleanupFuncletEntry() const { return IsCleanupFuncletEntry; }
+
+  /// Indicates if this is the entry block of a cleanup funclet.
+  void setIsCleanupFuncletEntry(bool V = true) { IsCleanupFuncletEntry = V; }
+
   // Code Layout methods.
 
-  /// moveBefore/moveAfter - move 'this' block before or after the specified
-  /// block.  This only moves the block, it does not modify the CFG or adjust
-  /// potential fall-throughs at the end of the block.
+  /// Move 'this' block before or after the specified block.  This only moves
+  /// the block, it does not modify the CFG or adjust potential fall-throughs at
+  /// the end of the block.
   void moveBefore(MachineBasicBlock *NewAfter);
   void moveAfter(MachineBasicBlock *NewBefore);
 
-  /// updateTerminator - Update the terminator instructions in block to account
-  /// for changes to the layout. If the block previously used a fallthrough,
-  /// it may now need a branch, and if it previously used branching it may now
-  /// be able to use a fallthrough.
+  /// Update the terminator instructions in block to account for changes to the
+  /// layout. If the block previously used a fallthrough, it may now need a
+  /// branch, and if it previously used branching it may now be able to use a
+  /// fallthrough.
   void updateTerminator();
 
   // Machine-CFG mutators
 
-  /// addSuccessor - Add succ as a successor of this MachineBasicBlock.
-  /// The Predecessors list of succ is automatically updated. WEIGHT
-  /// parameter is stored in Weights list and it may be used by
-  /// MachineBranchProbabilityInfo analysis to calculate branch probability.
+  /// Add Succ as a successor of this MachineBasicBlock.  The Predecessors list
+  /// of Succ is automatically updated. PROB parameter is stored in
+  /// Probabilities list. The default probability is set as unknown. Mixing
+  /// known and unknown probabilities in successor list is not allowed. When all
+  /// successors have unknown probabilities, 1 / N is returned as the
+  /// probability for each successor, where N is the number of successors.
   ///
   /// Note that duplicate Machine CFG edges are not allowed.
-  ///
-  void addSuccessor(MachineBasicBlock *succ, uint32_t weight = 0);
+  void addSuccessor(MachineBasicBlock *Succ,
+                    BranchProbability Prob = BranchProbability::getUnknown());
 
-  /// Set successor weight of a given iterator.
-  void setSuccWeight(succ_iterator I, uint32_t weight);
+  /// Add Succ as a successor of this MachineBasicBlock.  The Predecessors list
+  /// of Succ is automatically updated. The probability is not provided because
+  /// BPI is not available (e.g. -O0 is used), in which case edge probabilities
+  /// won't be used. Using this interface can save some space.
+  void addSuccessorWithoutProb(MachineBasicBlock *Succ);
 
-  /// removeSuccessor - Remove successor from the successors list of this
-  /// MachineBasicBlock. The Predecessors list of succ is automatically updated.
-  ///
-  void removeSuccessor(MachineBasicBlock *succ);
+  /// Set successor probability of a given iterator.
+  void setSuccProbability(succ_iterator I, BranchProbability Prob);
 
-  /// removeSuccessor - Remove specified successor from the successors list of
-  /// this MachineBasicBlock. The Predecessors list of succ is automatically
-  /// updated.  Return the iterator to the element after the one removed.
-  ///
-  succ_iterator removeSuccessor(succ_iterator I);
+  /// Normalize probabilities of all successors so that the sum of them becomes
+  /// one. This is usually done when the current update on this MBB is done, and
+  /// the sum of its successors' probabilities is not guaranteed to be one. The
+  /// user is responsible for the correct use of this function.
+  /// MBB::removeSuccessor() has an option to do this automatically.
+  void normalizeSuccProbs() {
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+  }
 
-  /// replaceSuccessor - Replace successor OLD with NEW and update weight info.
-  ///
+  /// Validate successors' probabilities and check if the sum of them is
+  /// approximate one. This only works in DEBUG mode.
+  void validateSuccProbs() const;
+
+  /// Remove successor from the successors list of this MachineBasicBlock. The
+  /// Predecessors list of Succ is automatically updated.
+  /// If NormalizeSuccProbs is true, then normalize successors' probabilities
+  /// after the successor is removed.
+  void removeSuccessor(MachineBasicBlock *Succ,
+                       bool NormalizeSuccProbs = false);
+
+  /// Remove specified successor from the successors list of this
+  /// MachineBasicBlock. The Predecessors list of Succ is automatically updated.
+  /// If NormalizeSuccProbs is true, then normalize successors' probabilities
+  /// after the successor is removed.
+  /// Return the iterator to the element after the one removed.
+  succ_iterator removeSuccessor(succ_iterator I,
+                                bool NormalizeSuccProbs = false);
+
+  /// Replace successor OLD with NEW and update probability info.
   void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
 
+  /// Transfers all the successors from MBB to this machine basic block (i.e.,
+  /// copies all the successors FromMBB and remove all the successors from
+  /// FromMBB).
+  void transferSuccessors(MachineBasicBlock *FromMBB);
 
-  /// transferSuccessors - Transfers all the successors from MBB to this
-  /// machine basic block (i.e., copies all the successors fromMBB and
-  /// remove all the successors from fromMBB).
-  void transferSuccessors(MachineBasicBlock *fromMBB);
+  /// Transfers all the successors, as in transferSuccessors, and update PHI
+  /// operands in the successor blocks which refer to FromMBB to refer to this.
+  void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB);
 
-  /// transferSuccessorsAndUpdatePHIs - Transfers all the successors, as
-  /// in transferSuccessors, and update PHI operands in the successor blocks
-  /// which refer to fromMBB to refer to this.
-  void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB);
+  /// Return true if any of the successors have probabilities attached to them.
+  bool hasSuccessorProbabilities() const { return !Probs.empty(); }
 
-  /// isPredecessor - Return true if the specified MBB is a predecessor of this
-  /// block.
+  /// Return true if the specified MBB is a predecessor of this block.
   bool isPredecessor(const MachineBasicBlock *MBB) const;
 
-  /// isSuccessor - Return true if the specified MBB is a successor of this
-  /// block.
+  /// Return true if the specified MBB is a successor of this block.
   bool isSuccessor(const MachineBasicBlock *MBB) const;
 
-  /// isLayoutSuccessor - Return true if the specified MBB will be emitted
-  /// immediately after this block, such that if this block exits by
-  /// falling through, control will transfer to the specified MBB. Note
-  /// that MBB need not be a successor at all, for example if this block
-  /// ends with an unconditional branch to some other block.
+  /// Return true if the specified MBB will be emitted immediately after this
+  /// block, such that if this block exits by falling through, control will
+  /// transfer to the specified MBB. Note that MBB need not be a successor at
+  /// all, for example if this block ends with an unconditional branch to some
+  /// other block.
   bool isLayoutSuccessor(const MachineBasicBlock *MBB) const;
 
-  /// canFallThrough - Return true if the block can implicitly transfer
-  /// control to the block after it by falling off the end of it.  This should
-  /// return false if it can reach the block after it, but it uses an explicit
-  /// branch to do so (e.g., a table jump).  True is a conservative answer.
+  /// Return true if the block can implicitly transfer control to the block
+  /// after it by falling off the end of it.  This should return false if it can
+  /// reach the block after it, but it uses an explicit branch to do so (e.g., a
+  /// table jump).  True is a conservative answer.
   bool canFallThrough();
 
   /// Returns a pointer to the first instruction in this block that is not a
@@ -452,40 +522,44 @@ public:
   /// Returns end() is there's no non-PHI instruction.
   iterator getFirstNonPHI();
 
-  /// SkipPHIsAndLabels - Return the first instruction in MBB after I that is
-  /// not a PHI or a label. This is the correct point to insert copies at the
-  /// beginning of a basic block.
+  /// Return the first instruction in MBB after I that is not a PHI or a label.
+  /// This is the correct point to insert copies at the beginning of a basic
+  /// block.
   iterator SkipPHIsAndLabels(iterator I);
 
-  /// getFirstTerminator - returns an iterator to the first terminator
-  /// instruction of this basic block. If a terminator does not exist,
-  /// it returns end()
+  /// Returns an iterator to the first terminator instruction of this basic
+  /// block. If a terminator does not exist, it returns end().
   iterator getFirstTerminator();
   const_iterator getFirstTerminator() const {
     return const_cast<MachineBasicBlock *>(this)->getFirstTerminator();
   }
 
-  /// getFirstInstrTerminator - Same getFirstTerminator but it ignores bundles
-  /// and return an instr_iterator instead.
+  /// Same getFirstTerminator but it ignores bundles and return an
+  /// instr_iterator instead.
   instr_iterator getFirstInstrTerminator();
 
-  /// getFirstNonDebugInstr - returns an iterator to the first non-debug
-  /// instruction in the basic block, or end()
+  /// Returns an iterator to the first non-debug instruction in the basic block,
+  /// or end().
   iterator getFirstNonDebugInstr();
   const_iterator getFirstNonDebugInstr() const {
     return const_cast<MachineBasicBlock *>(this)->getFirstNonDebugInstr();
   }
 
-  /// getLastNonDebugInstr - returns an iterator to the last non-debug
-  /// instruction in the basic block, or end()
+  /// Returns an iterator to the last non-debug instruction in the basic block,
+  /// or end().
   iterator getLastNonDebugInstr();
   const_iterator getLastNonDebugInstr() const {
     return const_cast<MachineBasicBlock *>(this)->getLastNonDebugInstr();
   }
 
-  /// SplitCriticalEdge - Split the critical edge from this block to the
-  /// given successor block, and return the newly created block, or null
-  /// if splitting is not possible.
+  /// Convenience function that returns true if the block ends in a return
+  /// instruction.
+  bool isReturnBlock() const {
+    return !empty() && back().isReturn();
+  }
+
+  /// Split the critical edge from this block to the given successor block, and
+  /// return the newly created block, or null if splitting is not possible.
   ///
   /// This function updates LiveVariables, MachineDominatorTree, and
   /// MachineLoopInfo, as applicable.
@@ -570,7 +644,7 @@ public:
   /// remove_instr to remove individual instructions from a bundle.
   MachineInstr *remove(MachineInstr *I) {
     assert(!I->isBundled() && "Cannot remove bundled instructions");
-    return Insts.remove(I);
+    return Insts.remove(instr_iterator(I));
   }
 
   /// Remove the possibly bundled instruction from the instruction list
@@ -605,30 +679,29 @@ public:
                  From.getInstrIterator(), To.getInstrIterator());
   }
 
-  /// removeFromParent - This method unlinks 'this' from the containing
-  /// function, and returns it, but does not delete it.
+  /// This method unlinks 'this' from the containing function, and returns it,
+  /// but does not delete it.
   MachineBasicBlock *removeFromParent();
 
-  /// eraseFromParent - This method unlinks 'this' from the containing
-  /// function and deletes it.
+  /// This method unlinks 'this' from the containing function and deletes it.
   void eraseFromParent();
 
-  /// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
-  /// 'Old', change the code and CFG so that it branches to 'New' instead.
+  /// Given a machine basic block that branched to 'Old', change the code and
+  /// CFG so that it branches to 'New' instead.
   void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New);
 
-  /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in
-  /// the CFG to be inserted.  If we have proven that MBB can only branch to
-  /// DestA and DestB, remove any other MBB successors from the CFG. DestA and
-  /// DestB can be null. Besides DestA and DestB, retain other edges leading
-  /// to LandingPads (currently there can be only one; we don't check or require
-  /// that here). Note it is possible that DestA and/or DestB are LandingPads.
+  /// Various pieces of code can cause excess edges in the CFG to be inserted.
+  /// If we have proven that MBB can only branch to DestA and DestB, remove any
+  /// other MBB successors from the CFG. DestA and DestB can be null. Besides
+  /// DestA and DestB, retain other edges leading to LandingPads (currently
+  /// there can be only one; we don't check or require that here). Note it is
+  /// possible that DestA and/or DestB are LandingPads.
   bool CorrectExtraCFGEdges(MachineBasicBlock *DestA,
                             MachineBasicBlock *DestB,
-                            bool isCond);
+                            bool IsCond);
 
-  /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
-  /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
+  /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+  /// instructions.  Return UnknownLoc if there is none.
   DebugLoc findDebugLoc(instr_iterator MBBI);
   DebugLoc findDebugLoc(iterator MBBI) {
     return findDebugLoc(MBBI.getInstrIterator());
@@ -636,12 +709,9 @@ public:
 
   /// Possible outcome of a register liveness query to computeRegisterLiveness()
   enum LivenessQueryResult {
-    LQR_Live,            ///< Register is known to be live.
-    LQR_OverlappingLive, ///< Register itself is not live, but some overlapping
-                         ///< register is.
-    LQR_Dead,            ///< Register is known to be dead.
-    LQR_Unknown          ///< Register liveness not decidable from local
-                         ///< neighborhood.
+    LQR_Live,   ///< Register is known to be (at least partially) live.
+    LQR_Dead,   ///< Register is known to be fully dead.
+    LQR_Unknown ///< Register liveness not decidable from local neighborhood.
   };
 
   /// Return whether (physical) register \p Reg has been <def>ined and not
@@ -666,49 +736,43 @@ public:
   // Printing method used by LoopInfo.
   void printAsOperand(raw_ostream &OS, bool PrintType = true) const;
 
-  /// getNumber - MachineBasicBlocks are uniquely numbered at the function
-  /// level, unless they're not in a MachineFunction yet, in which case this
-  /// will return -1.
-  ///
+  /// MachineBasicBlocks are uniquely numbered at the function level, unless
+  /// they're not in a MachineFunction yet, in which case this will return -1.
   int getNumber() const { return Number; }
   void setNumber(int N) { Number = N; }
 
-  /// getSymbol - Return the MCSymbol for this basic block.
-  ///
+  /// Return the MCSymbol for this basic block.
   MCSymbol *getSymbol() const;
 
 
 private:
-  /// getWeightIterator - Return weight iterator corresponding to the I
-  /// successor iterator.
-  weight_iterator getWeightIterator(succ_iterator I);
-  const_weight_iterator getWeightIterator(const_succ_iterator I) const;
+  /// Return probability iterator corresponding to the I successor iterator.
+  probability_iterator getProbabilityIterator(succ_iterator I);
+  const_probability_iterator
+  getProbabilityIterator(const_succ_iterator I) const;
 
   friend class MachineBranchProbabilityInfo;
+  friend class MIPrinter;
 
-  /// getSuccWeight - Return weight of the edge from this block to MBB. This
-  /// method should NOT be called directly, but by using getEdgeWeight method
-  /// from MachineBranchProbabilityInfo class.
-  uint32_t getSuccWeight(const_succ_iterator Succ) const;
-
+  /// Return probability of the edge from this block to MBB. This method should
+  /// NOT be called directly, but by using getEdgeProbability method from
+  /// MachineBranchProbabilityInfo class.
+  BranchProbability getSuccProbability(const_succ_iterator Succ) const;
 
   // Methods used to maintain doubly linked list of blocks...
   friend struct ilist_traits<MachineBasicBlock>;
 
   // Machine-CFG mutators
 
-  /// addPredecessor - Remove pred as a predecessor of this MachineBasicBlock.
-  /// Don't do this unless you know what you're doing, because it doesn't
-  /// update pred's successors list. Use pred->addSuccessor instead.
-  ///
-  void addPredecessor(MachineBasicBlock *pred);
+  /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this
+  /// unless you know what you're doing, because it doesn't update Pred's
+  /// successors list. Use Pred->addSuccessor instead.
+  void addPredecessor(MachineBasicBlock *Pred);
 
-  /// removePredecessor - Remove pred as a predecessor of this
-  /// MachineBasicBlock. Don't do this unless you know what you're
-  /// doing, because it doesn't update pred's successors list. Use
-  /// pred->removeSuccessor instead.
-  ///
-  void removePredecessor(MachineBasicBlock *pred);
+  /// Remove Pred as a predecessor of this MachineBasicBlock. Don't do this
+  /// unless you know what you're doing, because it doesn't update Pred's
+  /// successors list. Use Pred->removeSuccessor instead.
+  void removePredecessor(MachineBasicBlock *Pred);
 };
 
 raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
@@ -726,7 +790,7 @@ struct MBB2NumberFunctor :
 //===--------------------------------------------------------------------===//
 
 // Provide specializations of GraphTraits to be able to treat a
-// MachineFunction as a graph of MachineBasicBlocks...
+// MachineFunction as a graph of MachineBasicBlocks.
 //
 
 template <> struct GraphTraits<MachineBasicBlock *> {
@@ -756,7 +820,7 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
 };
 
 // Provide specializations of GraphTraits to be able to treat a
-// MachineFunction as a graph of MachineBasicBlocks... and to walk it
+// MachineFunction as a graph of MachineBasicBlocks and to walk it
 // in inverse order.  Inverse order for a function is considered
 // to be when traversing the predecessor edges of a MBB
 // instead of the successor edges.
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 7ba749559c0f..81b0524cf0a4 100644
--- a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 #include <climits>
+#include <numeric>
 
 namespace llvm {
 
@@ -44,20 +45,15 @@ public:
     AU.setPreservesAll();
   }
 
-  // Return edge weight. If we don't have any informations about it - return
-  // DEFAULT_WEIGHT.
-  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
-                         const MachineBasicBlock *Dst) const;
+  // Return edge probability.
+  BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+                                       const MachineBasicBlock *Dst) const;
 
-  // Same thing, but using a const_succ_iterator from Src. This is faster when
-  // the iterator is already available.
-  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
-                         MachineBasicBlock::const_succ_iterator Dst) const;
-
-  // Get sum of the block successors' weights, potentially scaling them to fit
-  // within 32-bits. If scaling is required, sets Scale based on the necessary
-  // adjustment. Any edge weights used with the sum should be divided by Scale.
-  uint32_t getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const;
+  // Same as above, but using a const_succ_iterator from Src. This is faster
+  // when the iterator is already available.
+  BranchProbability
+  getEdgeProbability(const MachineBasicBlock *Src,
+                     MachineBasicBlock::const_succ_iterator Dst) const;
 
   // A 'Hot' edge is an edge which probability is >= 80%.
   bool isEdgeHot(const MachineBasicBlock *Src,
@@ -67,15 +63,6 @@ public:
   // NB: This routine's complexity is linear on the number of successors.
   MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const;
 
-  // Return a probability as a fraction between 0 (0% probability) and
-  // 1 (100% probability), however the value is never equal to 0, and can be 1
-  // only iff SRC block has only one successor.
-  // NB: This routine's complexity is linear on the number of successors of
-  // Src. Querying sequentially for each successor's probability is a quadratic
-  // query pattern.
-  BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
-                                       const MachineBasicBlock *Dst) const;
-
   // Print value between 0 (0% probability) and 1 (100% probability),
   // however the value is never equal to 0, and can be 1 only iff SRC block
   // has only one successor.
diff --git a/include/llvm/CodeGen/MachineCombinerPattern.h b/include/llvm/CodeGen/MachineCombinerPattern.h
index 176af14dc317..f3891227746f 100644
--- a/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -17,13 +17,30 @@
 
 namespace llvm {
 
-/// Enumeration of instruction pattern supported by machine combiner
-///
-///
-namespace MachineCombinerPattern {
-// Forward declaration
-enum MC_PATTERN : int;
-} // end namespace MachineCombinerPattern
+/// These are instruction patterns matched by the machine combiner pass.
+enum class MachineCombinerPattern {
+  // These are commutative variants for reassociating a computation chain. See
+  // the comments before getMachineCombinerPatterns() in TargetInstrInfo.cpp.
+  REASSOC_AX_BY,
+  REASSOC_AX_YB,
+  REASSOC_XA_BY,
+  REASSOC_XA_YB,
+
+  // These are multiply-add patterns matched by the AArch64 machine combiner.
+  MULADDW_OP1,
+  MULADDW_OP2,
+  MULSUBW_OP1,
+  MULSUBW_OP2,
+  MULADDWI_OP1,
+  MULSUBWI_OP1,
+  MULADDX_OP1,
+  MULADDX_OP2,
+  MULSUBX_OP1,
+  MULSUBX_OP2,
+  MULADDXI_OP1,
+  MULSUBXI_OP1
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h
index 628400322f60..d2036c4a29a5 100644
--- a/include/llvm/CodeGen/MachineConstantPool.h
+++ b/include/llvm/CodeGen/MachineConstantPool.h
@@ -46,13 +46,6 @@ public:
   ///
   Type *getType() const { return Ty; }
 
-  
-  /// getRelocationInfo - This method classifies the entry according to
-  /// whether or not it may generate a relocation entry.  This must be
-  /// conservative, so if it might codegen to a relocatable entry, it should say
-  /// so.  The return values are the same as Constant::getRelocationInfo().
-  virtual unsigned getRelocationInfo() const = 0;
-  
   virtual int getExistingMachineCPValue(MachineConstantPool *CP,
                                         unsigned Alignment) = 0;
 
@@ -67,7 +60,6 @@ inline raw_ostream &operator<<(raw_ostream &OS,
   V.print(OS);
   return OS;
 }
-  
 
 /// This class is a data container for one entry in a MachineConstantPool.
 /// It contains a pointer to the value and an offset from the start of
@@ -90,9 +82,9 @@ public:
     Val.ConstVal = V;
   }
   MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
-    : Alignment(A) {
-    Val.MachineCPVal = V; 
-    Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1);
+      : Alignment(A) {
+    Val.MachineCPVal = V;
+    Alignment |= 1U << (sizeof(unsigned) * CHAR_BIT - 1);
   }
 
   /// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry
@@ -102,28 +94,20 @@ public:
     return (int)Alignment < 0;
   }
 
-  int getAlignment() const { 
-    return Alignment & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
+  int getAlignment() const {
+    return Alignment & ~(1 << (sizeof(unsigned) * CHAR_BIT - 1));
   }
 
   Type *getType() const;
-  
-  /// getRelocationInfo - This method classifies the entry according to
-  /// whether or not it may generate a relocation entry.  This must be
-  /// conservative, so if it might codegen to a relocatable entry, it should say
-  /// so.  The return values are:
-  /// 
-  ///  0: This constant pool entry is guaranteed to never have a relocation
-  ///     applied to it (because it holds a simple constant like '4').
-  ///  1: This entry has relocations, but the entries are guaranteed to be
-  ///     resolvable by the static linker, so the dynamic linker will never see
-  ///     them.
-  ///  2: This entry may have arbitrary relocations. 
-  unsigned getRelocationInfo() const;
+
+  /// This method classifies the entry according to whether or not it may
+  /// generate a relocation entry.  This must be conservative, so if it might
+  /// codegen to a relocatable entry, it should say so.
+  bool needsRelocation() const;
 
   SectionKind getSectionKind(const DataLayout *DL) const;
 };
-  
+
 /// The MachineConstantPool class keeps track of constants referenced by a
 /// function which must be spilled to memory.  This is used for constants which
 /// are unable to be used directly as operands to instructions, which typically
@@ -148,17 +132,18 @@ public:
   explicit MachineConstantPool(const DataLayout &DL)
       : PoolAlignment(1), DL(DL) {}
   ~MachineConstantPool();
-    
+
   /// getConstantPoolAlignment - Return the alignment required by
   /// the whole constant pool, of which the first element must be aligned.
   unsigned getConstantPoolAlignment() const { return PoolAlignment; }
-  
+
   /// getConstantPoolIndex - Create a new entry in the constant pool or return
   /// an existing one.  User must specify the minimum required alignment for
   /// the object.
   unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment);
-  unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment);
-  
+  unsigned getConstantPoolIndex(MachineConstantPoolValue *V,
+                                unsigned Alignment);
+
   /// isEmpty - Return true if this constant pool contains no constants.
   bool isEmpty() const { return Constants.empty(); }
 
diff --git a/include/llvm/CodeGen/MachineDominators.h b/include/llvm/CodeGen/MachineDominators.h
index 735dd069cf7f..a69936f6e267 100644
--- a/include/llvm/CodeGen/MachineDominators.h
+++ b/include/llvm/CodeGen/MachineDominators.h
@@ -246,21 +246,29 @@ public:
 /// iterable by generic graph iterators.
 ///
 
-template<class T> struct GraphTraits;
+template <class Node, class ChildIterator>
+struct MachineDomTreeGraphTraitsBase {
+  typedef Node NodeType;
+  typedef ChildIterator ChildIteratorType;
 
-template <> struct GraphTraits<MachineDomTreeNode *> {
-  typedef MachineDomTreeNode NodeType;
-  typedef NodeType::iterator  ChildIteratorType;
-
-  static NodeType *getEntryNode(NodeType *N) {
-    return N;
-  }
-  static inline ChildIteratorType child_begin(NodeType* N) {
+  static NodeType *getEntryNode(NodeType *N) { return N; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
     return N->begin();
   }
-  static inline ChildIteratorType child_end(NodeType* N) {
-    return N->end();
-  }
+  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+};
+
+template <class T> struct GraphTraits;
+
+template <>
+struct GraphTraits<MachineDomTreeNode *>
+    : public MachineDomTreeGraphTraitsBase<MachineDomTreeNode,
+                                           MachineDomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const MachineDomTreeNode *>
+    : public MachineDomTreeGraphTraitsBase<const MachineDomTreeNode,
+                                           MachineDomTreeNode::const_iterator> {
 };
 
 template <> struct GraphTraits<MachineDominatorTree*>
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index cbc4e66ccc46..48e8ca75052e 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -101,6 +101,13 @@ class MachineFrameInfo {
     // cannot alias any other memory objects.
     bool isSpillSlot;
 
+    /// If true, this stack slot is used to spill a value (could be deopt
+    /// and/or GC related) over a statepoint. We know that the address of the
+    /// slot can't alias any LLVM IR value.  This is very similiar to a Spill
+    /// Slot, but is created by statepoint lowering is SelectionDAG, not the
+    /// register allocator. 
+    bool isStatepointSpillSlot;
+
     /// If this stack object is originated from an Alloca instruction
     /// this value saves the original IR allocation. Can be NULL.
     const AllocaInst *Alloca;
@@ -118,13 +125,24 @@ class MachineFrameInfo {
     StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
                 bool isSS, const AllocaInst *Val, bool A)
       : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
-        isSpillSlot(isSS), Alloca(Val), PreAllocated(false), isAliased(A) {}
+        isSpillSlot(isSS), isStatepointSpillSlot(false), Alloca(Val),
+        PreAllocated(false), isAliased(A) {}
   };
 
   /// The alignment of the stack.
   unsigned StackAlignment;
 
   /// Can the stack be realigned.
+  /// Targets that set this to false don't have the ability to overalign
+  /// their stack frame, and thus, overaligned allocas are all treated
+  /// as dynamic allocations and the target must handle them as part
+  /// of DYNAMIC_STACKALLOC lowering.
+  /// FIXME: There is room for improvement in this case, in terms of
+  /// grouping overaligned allocas into a "secondary stack frame" and
+  /// then only use a single alloca to allocate this frame and only a
+  /// single virtual register to access it. Currently, without such an
+  /// optimization, each such alloca gets it's own dynamic
+  /// realignment.
   bool StackRealignable;
 
   /// The list of stack objects allocated.
@@ -168,7 +186,7 @@ class MachineFrameInfo {
   /// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set
   /// to the distance between the initial SP and the value in FP.  For many
   /// targets, this value is only used when generating debug info (via
-  /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the
+  /// TargetRegisterInfo::getFrameIndexReference); when generating code, the
   /// corresponding adjustments are performed directly.
   int OffsetAdjustment;
 
@@ -198,7 +216,7 @@ class MachineFrameInfo {
   /// This contains the size of the largest call frame if the target uses frame
   /// setup/destroy pseudo instructions (as defined in the TargetFrameInfo
   /// class).  This information is important for frame pointer elimination.
-  /// If is only valid during and after prolog/epilog code insertion.
+  /// It is only valid during and after prolog/epilog code insertion.
   unsigned MaxCallFrameSize;
 
   /// The prolog/epilog code inserter fills in this vector with each
@@ -288,6 +306,7 @@ public:
   /// Return the index for the stack protector object.
   int getStackProtectorIndex() const { return StackProtectorIdx; }
   void setStackProtectorIndex(int I) { StackProtectorIdx = I; }
+  bool hasStackProtectorIndex() const { return StackProtectorIdx != -1; }
 
   /// Return the index for the function context object.
   /// This object is used for SjLj exceptions.
@@ -337,14 +356,14 @@ public:
   }
 
   /// Get the local offset mapping for a for an object.
-  std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
+  std::pair<int, int64_t> getLocalFrameObjectMap(int i) const {
     assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
             "Invalid local object reference!");
     return LocalFrameObjects[i];
   }
 
   /// Return the number of objects allocated into the local object block.
-  int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
+  int64_t getLocalFrameObjectCount() const { return LocalFrameObjects.size(); }
 
   /// Set the size of the local object blob.
   void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; }
@@ -361,7 +380,9 @@ public:
 
   /// Get whether the local allocation blob should be allocated together or
   /// let PEI allocate the locals in it directly.
-  bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;}
+  bool getUseLocalStackAllocationBlock() const {
+    return UseLocalStackAllocationBlock;
+  }
 
   /// setUseLocalStackAllocationBlock - Set whether the local allocation blob
   /// should be allocated together or let PEI allocate the locals in it
@@ -534,6 +555,12 @@ public:
     return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;
   }
 
+  bool isStatepointSpillSlotObjectIndex(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot;
+  }
+
   /// Returns true if the specified index corresponds to a dead object.
   bool isDeadObjectIndex(int ObjectIdx) const {
     assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
@@ -549,6 +576,13 @@ public:
     return Objects[ObjectIdx + NumFixedObjects].Size == 0;
   }
 
+  void markAsStatepointSpillSlotObjectIndex(int ObjectIdx) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].isStatepointSpillSlot = true;
+    assert(isStatepointSpillSlotObjectIndex(ObjectIdx) && "inconsistent");
+  }
+
   /// Create a new statically sized stack object, returning
   /// a nonnegative identifier to represent it.
   int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index c15ee1c006cd..82c30d39afd6 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -38,10 +38,12 @@ class MachineJumpTableInfo;
 class MachineModuleInfo;
 class MCContext;
 class Pass;
+class PseudoSourceValueManager;
 class TargetMachine;
 class TargetSubtargetInfo;
 class TargetRegisterClass;
 struct MachinePointerInfo;
+struct WinEHFuncInfo;
 
 template <>
 struct ilist_traits<MachineBasicBlock>
@@ -102,10 +104,14 @@ class MachineFunction {
 
   // Keep track of constants which are spilled to memory
   MachineConstantPool *ConstantPool;
-  
+
   // Keep track of jump tables for switch instructions
   MachineJumpTableInfo *JumpTableInfo;
 
+  // Keeps track of Windows exception handling related data. This will be null
+  // for functions that aren't using a funclet-based EH personality.
+  WinEHFuncInfo *WinEHInfo = nullptr;
+
   // Function-level unique numbering for MachineBasicBlocks.  When a
   // MachineBasicBlock is inserted into a MachineFunction is it automatically
   // numbered and this vector keeps track of the mapping from ID's to MBB's.
@@ -131,7 +137,7 @@ class MachineFunction {
   /// this translation unit.
   ///
   unsigned FunctionNumber;
-  
+
   /// Alignment - The alignment of the function.
   unsigned Alignment;
 
@@ -145,6 +151,9 @@ class MachineFunction {
   /// True if the function includes any inline assembly.
   bool HasInlineAsm;
 
+  // Allocation management for pseudo source values.
+  std::unique_ptr<PseudoSourceValueManager> PSVManager;
+
   MachineFunction(const MachineFunction &) = delete;
   void operator=(const MachineFunction&) = delete;
 public:
@@ -155,6 +164,8 @@ public:
   MachineModuleInfo &getMMI() const { return MMI; }
   MCContext &getContext() const { return Ctx; }
 
+  PseudoSourceValueManager &getPSVManager() const { return *PSVManager; }
+
   /// Return the DataLayout attached to the Module associated to this MF.
   const DataLayout &getDataLayout() const;
 
@@ -198,7 +209,7 @@ public:
   MachineFrameInfo *getFrameInfo() { return FrameInfo; }
   const MachineFrameInfo *getFrameInfo() const { return FrameInfo; }
 
-  /// getJumpTableInfo - Return the jump table info object for the current 
+  /// getJumpTableInfo - Return the jump table info object for the current
   /// function.  This object contains information about jump tables in the
   /// current function.  If the current function has no jump tables, this will
   /// return null.
@@ -209,13 +220,18 @@ public:
   /// does already exist, allocate one.
   MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind);
 
-  
   /// getConstantPool - Return the constant pool object for the current
   /// function.
   ///
   MachineConstantPool *getConstantPool() { return ConstantPool; }
   const MachineConstantPool *getConstantPool() const { return ConstantPool; }
 
+  /// getWinEHFuncInfo - Return information about how the current function uses
+  /// Windows exception handling. Returns null for functions that don't use
+  /// funclets for exception handling.
+  const WinEHFuncInfo *getWinEHFuncInfo() const { return WinEHInfo; }
+  WinEHFuncInfo *getWinEHFuncInfo() { return WinEHInfo; }
+
   /// getAlignment - Return the alignment (log2, not bytes) of the function.
   ///
   unsigned getAlignment() const { return Alignment; }
@@ -284,14 +300,14 @@ public:
   /// getNumBlockIDs - Return the number of MBB ID's allocated.
   ///
   unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }
-  
+
   /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
   /// recomputes them.  This guarantees that the MBB numbers are sequential,
   /// dense, and match the ordering of the blocks within the function.  If a
   /// specific MachineBasicBlock is specified, only that block and those after
   /// it are renumbered.
   void RenumberBlocks(MachineBasicBlock *MBBFrom = nullptr);
-  
+
   /// print - Print out the MachineFunction in a format suitable for debugging
   /// to the specified stream.
   ///
@@ -326,6 +342,12 @@ public:
   typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
   typedef std::reverse_iterator<iterator>             reverse_iterator;
 
+  /// Support for MachineBasicBlock::getNextNode().
+  static BasicBlockListType MachineFunction::*
+  getSublistAccess(MachineBasicBlock *) {
+    return &MachineFunction::BasicBlocks;
+  }
+
   /// addLiveIn - Add the specified physical register as a live-in value and
   /// create a corresponding virtual register for it.
   unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC);
@@ -358,15 +380,21 @@ public:
   void splice(iterator InsertPt, iterator MBBI) {
     BasicBlocks.splice(InsertPt, BasicBlocks, MBBI);
   }
+  void splice(iterator InsertPt, MachineBasicBlock *MBB) {
+    BasicBlocks.splice(InsertPt, BasicBlocks, MBB);
+  }
   void splice(iterator InsertPt, iterator MBBI, iterator MBBE) {
     BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE);
   }
 
-  void remove(iterator MBBI) {
-    BasicBlocks.remove(MBBI);
-  }
-  void erase(iterator MBBI) {
-    BasicBlocks.erase(MBBI);
+  void remove(iterator MBBI) { BasicBlocks.remove(MBBI); }
+  void remove(MachineBasicBlock *MBBI) { BasicBlocks.remove(MBBI); }
+  void erase(iterator MBBI) { BasicBlocks.erase(MBBI); }
+  void erase(MachineBasicBlock *MBBI) { BasicBlocks.erase(MBBI); }
+
+  template <typename Comp>
+  void sort(Comp comp) {
+    BasicBlocks.sort(comp);
   }
 
   //===--------------------------------------------------------------------===//
@@ -425,7 +453,7 @@ public:
                                           unsigned base_alignment,
                                           const AAMDNodes &AAInfo = AAMDNodes(),
                                           const MDNode *Ranges = nullptr);
-  
+
   /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
   /// an existing one, adjusting by an offset and using the given size.
   /// MachineMemOperands are owned by the MachineFunction and need not be
@@ -475,16 +503,19 @@ public:
     extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
                         MachineInstr::mmo_iterator End);
 
+  /// Allocate a string and populate it with the given external symbol name.
+  const char *createExternalSymbolName(StringRef Name);
+
   //===--------------------------------------------------------------------===//
   // Label Manipulation.
   //
-  
+
   /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
   /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
   /// normal 'L' label is returned.
-  MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, 
+  MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx,
                          bool isLinkerPrivate = false) const;
-  
+
   /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
   /// base.
   MCSymbol *getPICBaseSymbol() const;
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index de7e0a29ea0d..978864e96ca5 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
@@ -34,7 +35,6 @@
 namespace llvm {
 
 template <typename T> class SmallVectorImpl;
-class AliasAnalysis;
 class TargetInstrInfo;
 class TargetRegisterClass;
 class TargetRegisterInfo;
@@ -48,7 +48,8 @@ class MachineMemOperand;
 /// MachineFunction is deleted, all the contained MachineInstrs are deallocated
 /// without having their destructor called.
 ///
-class MachineInstr : public ilist_node<MachineInstr> {
+class MachineInstr
+    : public ilist_node_with_parent<MachineInstr, MachineBasicBlock> {
 public:
   typedef MachineMemOperand **mmo_iterator;
 
@@ -64,8 +65,10 @@ public:
     NoFlags      = 0,
     FrameSetup   = 1 << 0,              // Instruction is used as a part of
                                         // function frame setup code.
-    BundledPred  = 1 << 1,              // Instruction has bundled predecessors.
-    BundledSucc  = 1 << 2               // Instruction has bundled successors.
+    FrameDestroy = 1 << 1,              // Instruction is used as a part of
+                                        // function frame destruction code.
+    BundledPred  = 1 << 2,              // Instruction has bundled predecessors.
+    BundledSucc  = 1 << 3               // Instruction has bundled successors.
   };
 private:
   const MCInstrDesc *MCID;              // Instruction descriptor.
@@ -89,6 +92,12 @@ private:
                                         // information to AsmPrinter.
 
   uint8_t NumMemRefs;                   // Information on memory references.
+  // Note that MemRefs == nullptr,  means 'don't know', not 'no memory access'.
+  // Calling code must treat missing information conservatively.  If the number
+  // of memory operands required to be precise exceeds the maximum value of
+  // NumMemRefs - currently 256 - we remove the operands entirely. Note also
+  // that this is a non-owning reference to a shared copy on write buffer owned
+  // by the MachineFunction and created via MF.allocateMemRefsArray. 
   mmo_iterator MemRefs;
 
   DebugLoc debugLoc;                    // Source line information.
@@ -293,42 +302,46 @@ public:
   const_mop_iterator operands_end() const { return Operands + NumOperands; }
 
   iterator_range<mop_iterator> operands() {
-    return iterator_range<mop_iterator>(operands_begin(), operands_end());
+    return make_range(operands_begin(), operands_end());
   }
   iterator_range<const_mop_iterator> operands() const {
-    return iterator_range<const_mop_iterator>(operands_begin(), operands_end());
+    return make_range(operands_begin(), operands_end());
   }
   iterator_range<mop_iterator> explicit_operands() {
-    return iterator_range<mop_iterator>(
-        operands_begin(), operands_begin() + getNumExplicitOperands());
+    return make_range(operands_begin(),
+                      operands_begin() + getNumExplicitOperands());
   }
   iterator_range<const_mop_iterator> explicit_operands() const {
-    return iterator_range<const_mop_iterator>(
-        operands_begin(), operands_begin() + getNumExplicitOperands());
+    return make_range(operands_begin(),
+                      operands_begin() + getNumExplicitOperands());
   }
   iterator_range<mop_iterator> implicit_operands() {
-    return iterator_range<mop_iterator>(explicit_operands().end(),
-                                        operands_end());
+    return make_range(explicit_operands().end(), operands_end());
   }
   iterator_range<const_mop_iterator> implicit_operands() const {
-    return iterator_range<const_mop_iterator>(explicit_operands().end(),
-                                              operands_end());
+    return make_range(explicit_operands().end(), operands_end());
   }
+  /// Returns a range over all explicit operands that are register definitions.
+  /// Implicit definition are not included!
   iterator_range<mop_iterator> defs() {
-    return iterator_range<mop_iterator>(
-        operands_begin(), operands_begin() + getDesc().getNumDefs());
+    return make_range(operands_begin(),
+                      operands_begin() + getDesc().getNumDefs());
   }
+  /// \copydoc defs()
   iterator_range<const_mop_iterator> defs() const {
-    return iterator_range<const_mop_iterator>(
-        operands_begin(), operands_begin() + getDesc().getNumDefs());
+    return make_range(operands_begin(),
+                      operands_begin() + getDesc().getNumDefs());
   }
+  /// Returns a range that includes all operands that are register uses.
+  /// This may include unrelated operands which are not register uses.
   iterator_range<mop_iterator> uses() {
-    return iterator_range<mop_iterator>(
-        operands_begin() + getDesc().getNumDefs(), operands_end());
+    return make_range(operands_begin() + getDesc().getNumDefs(),
+                      operands_end());
   }
+  /// \copydoc uses()
   iterator_range<const_mop_iterator> uses() const {
-    return iterator_range<const_mop_iterator>(
-        operands_begin() + getDesc().getNumDefs(), operands_end());
+    return make_range(operands_begin() + getDesc().getNumDefs(),
+                      operands_end());
   }
 
   /// Returns the number of the operand iterator \p I points to.
@@ -339,13 +352,16 @@ public:
   /// Access to memory operands of the instruction
   mmo_iterator memoperands_begin() const { return MemRefs; }
   mmo_iterator memoperands_end() const { return MemRefs + NumMemRefs; }
+  /// Return true if we don't have any memory operands which described the the
+  /// memory access done by this instruction.  If this is true, calling code
+  /// must be conservative.    
   bool memoperands_empty() const { return NumMemRefs == 0; }
 
   iterator_range<mmo_iterator>  memoperands() {
-    return iterator_range<mmo_iterator>(memoperands_begin(), memoperands_end());
+    return make_range(memoperands_begin(), memoperands_end());
   }
   iterator_range<mmo_iterator> memoperands() const {
-    return iterator_range<mmo_iterator>(memoperands_begin(), memoperands_end());
+    return make_range(memoperands_begin(), memoperands_end());
   }
 
   /// Return true if this instruction has exactly one MachineMemOperand.
@@ -489,8 +505,8 @@ public:
   }
 
   /// Return true if this instruction is convergent.
-  /// Convergent instructions can only be moved to locations that are
-  /// control-equivalent to their initial position.
+  /// Convergent instructions can not be made control-dependent on any
+  /// additional values.
   bool isConvergent(QueryType Type = AnyInBundle) const {
     return hasProperty(MCID::Convergent, Type);
   }
@@ -897,6 +913,13 @@ public:
     return (Idx == -1) ? nullptr : &getOperand(Idx);
   }
 
+  const MachineOperand *findRegisterUseOperand(
+    unsigned Reg, bool isKill = false,
+    const TargetRegisterInfo *TRI = nullptr) const {
+    return const_cast<MachineInstr *>(this)->
+      findRegisterUseOperand(Reg, isKill, TRI);
+  }
+
   /// Returns the operand index that is a def of the specified register or
   /// -1 if it is not found. If isDead is true, defs that are not dead are
   /// skipped. If Overlap is true, then it also looks for defs that merely
@@ -1048,7 +1071,7 @@ public:
   /// Mark all subregister defs of register @p Reg with the undef flag.
   /// This function is used when we determined to have a subregister def in an
   /// otherwise undefined super register.
-  void addRegisterDefReadUndef(unsigned Reg);
+  void setRegisterDefReadUndef(unsigned Reg, bool IsUndef = true);
 
   /// We have determined MI defines a register. Make sure there is an operand
   /// defining Reg.
@@ -1094,6 +1117,9 @@ public:
   ///
   bool hasUnmodeledSideEffects() const;
 
+  /// Returns true if it is illegal to fold a load across this instruction.
+  bool isLoadFoldBarrier() const;
+
   /// Return true if all the defs of this instruction are dead.
   bool allDefsAreDead() const;
 
@@ -1159,8 +1185,11 @@ public:
     assert(NumMemRefs == NewMemRefsEnd - NewMemRefs && "Too many memrefs");
   }
 
-  /// Clear this MachineInstr's memory reference descriptor list.
-  void clearMemRefs() {
+  /// Clear this MachineInstr's memory reference descriptor list.  This resets
+  /// the memrefs to their most conservative state.  This should be used only
+  /// as a last resort since it greatly pessimizes our knowledge of the memory
+  /// access performed by the instruction.
+  void dropMemRefs() {
     MemRefs = nullptr;
     NumMemRefs = 0;
   }
@@ -1174,6 +1203,8 @@ public:
     }
   }
 
+  /// Add all implicit def and use operands to this instruction.
+  void addImplicitDefUseOperands(MachineFunction &MF);
 
 private:
   /// If this instruction is embedded into a MachineFunction, return the
@@ -1181,9 +1212,6 @@ private:
   /// return null.
   MachineRegisterInfo *getRegInfo();
 
-  /// Add all implicit def and use operands to this instruction.
-  void addImplicitDefUseOperands(MachineFunction &MF);
-
   /// Unlink all of the register operands in this instruction from their
   /// respective use lists.  This requires that the operands already be on their
   /// use lists.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index 4f68f38b7bbf..aa5f4b24df61 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -49,11 +49,10 @@ public:
   MachineInstrBuilder() : MF(nullptr), MI(nullptr) {}
 
   /// Create a MachineInstrBuilder for manipulating an existing instruction.
-  /// F must be the machine function  that was used to allocate I.
+  /// F must be the machine function that was used to allocate I.
   MachineInstrBuilder(MachineFunction &F, MachineInstr *I) : MF(&F), MI(I) {}
 
   /// Allow automatic conversion to the machine instruction we are working on.
-  ///
   operator MachineInstr*() const { return MI; }
   MachineInstr *operator->() const { return MI; }
   operator MachineBasicBlock::iterator() const { return MI; }
@@ -62,11 +61,9 @@ public:
   /// explicitly.
   MachineInstr *getInstr() const { return MI; }
 
-  /// addReg - Add a new virtual register operand...
-  ///
-  const
-  MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
-                              unsigned SubReg = 0) const {
+  /// Add a new virtual register operand.
+  const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
+                                    unsigned SubReg = 0) const {
     assert((flags & 0x1) == 0 &&
            "Passing in 'true' to addReg is forbidden! Use enums instead.");
     MI->addOperand(*MF, MachineOperand::CreateReg(RegNo,
@@ -82,8 +79,7 @@ public:
     return *this;
   }
 
-  /// addImm - Add a new immediate operand.
-  ///
+  /// Add a new immediate operand.
   const MachineInstrBuilder &addImm(int64_t Val) const {
     MI->addOperand(*MF, MachineOperand::CreateImm(Val));
     return *this;
@@ -204,44 +200,44 @@ public:
   // Add a displacement from an existing MachineOperand with an added offset.
   const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off,
                                      unsigned char TargetFlags = 0) const {
+    // If caller specifies new TargetFlags then use it, otherwise the
+    // default behavior is to copy the target flags from the existing
+    // MachineOperand. This means if the caller wants to clear the
+    // target flags it needs to do so explicitly.
+    if (0 == TargetFlags)
+      TargetFlags = Disp.getTargetFlags();
+
     switch (Disp.getType()) {
       default:
         llvm_unreachable("Unhandled operand type in addDisp()");
       case MachineOperand::MO_Immediate:
         return addImm(Disp.getImm() + off);
-      case MachineOperand::MO_GlobalAddress: {
-        // If caller specifies new TargetFlags then use it, otherwise the
-        // default behavior is to copy the target flags from the existing
-        // MachineOperand. This means if the caller wants to clear the
-        // target flags it needs to do so explicitly.
-        if (TargetFlags)
-          return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
-                                  TargetFlags);
+      case MachineOperand::MO_ConstantPoolIndex:
+        return addConstantPoolIndex(Disp.getIndex(), Disp.getOffset() + off,
+                                    TargetFlags);
+      case MachineOperand::MO_GlobalAddress:
         return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off,
-                                Disp.getTargetFlags());
-      }
+                                TargetFlags);
     }
   }
 
   /// Copy all the implicit operands from OtherMI onto this one.
-  const MachineInstrBuilder &copyImplicitOps(const MachineInstr *OtherMI) {
+  const MachineInstrBuilder &
+  copyImplicitOps(const MachineInstr *OtherMI) const {
     MI->copyImplicitOps(*MF, OtherMI);
     return *this;
   }
 };
 
-/// BuildMI - Builder interface.  Specify how to create the initial instruction
-/// itself.
-///
+/// Builder interface. Specify how to create the initial instruction itself.
 inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
   return MachineInstrBuilder(MF, MF.CreateMachineInstr(MCID, DL));
 }
 
-/// BuildMI - This version of the builder sets up the first operand as a
+/// This version of the builder sets up the first operand as a
 /// destination virtual register.
-///
 inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID,
@@ -250,10 +246,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
            .addReg(DestReg, RegState::Define);
 }
 
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction before the given position in the given MachineBasicBlock, and
-/// sets up the first operand as a destination virtual register.
-///
+/// This version of the builder inserts the newly-built instruction before
+/// the given position in the given MachineBasicBlock, and sets up the first
+/// operand as a destination virtual register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
                                    DebugLoc DL,
@@ -282,7 +277,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    const MCInstrDesc &MCID,
                                    unsigned DestReg) {
   if (I->isInsideBundle()) {
-    MachineBasicBlock::instr_iterator MII = I;
+    MachineBasicBlock::instr_iterator MII(I);
     return BuildMI(BB, MII, DL, MCID, DestReg);
   }
 
@@ -290,10 +285,9 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return BuildMI(BB, MII, DL, MCID, DestReg);
 }
 
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction before the given position in the given MachineBasicBlock, and
-/// does NOT take a destination register.
-///
+/// This version of the builder inserts the newly-built instruction before the
+/// given position in the given MachineBasicBlock, and does NOT take a
+/// destination register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
                                    DebugLoc DL,
@@ -319,7 +313,7 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
   if (I->isInsideBundle()) {
-    MachineBasicBlock::instr_iterator MII = I;
+    MachineBasicBlock::instr_iterator MII(I);
     return BuildMI(BB, MII, DL, MCID);
   }
 
@@ -327,20 +321,17 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
   return BuildMI(BB, MII, DL, MCID);
 }
 
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction at the end of the given MachineBasicBlock, and does NOT take a
-/// destination register.
-///
+/// This version of the builder inserts the newly-built instruction at the end
+/// of the given MachineBasicBlock, and does NOT take a destination register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID) {
   return BuildMI(*BB, BB->end(), DL, MCID);
 }
 
-/// BuildMI - This version of the builder inserts the newly-built
-/// instruction at the end of the given MachineBasicBlock, and sets up the first
-/// operand as a destination virtual register.
-///
+/// This version of the builder inserts the newly-built instruction at the
+/// end of the given MachineBasicBlock, and sets up the first operand as a
+/// destination virtual register.
 inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
                                    DebugLoc DL,
                                    const MCInstrDesc &MCID,
@@ -348,11 +339,10 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
   return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
 }
 
-/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic
+/// This version of the builder builds a DBG_VALUE intrinsic
 /// for either a value in a register or a register-indirect+offset
 /// address.  The convention is that a DBG_VALUE is indirect iff the
 /// second operand is an immediate.
-///
 inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
                                    const MCInstrDesc &MCID, bool IsIndirect,
                                    unsigned Reg, unsigned Offset,
@@ -377,10 +367,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL,
   }
 }
 
-/// BuildMI - This version of the builder builds a DBG_VALUE intrinsic
+/// This version of the builder builds a DBG_VALUE intrinsic
 /// for either a value in a register or a register-indirect+offset
 /// address and inserts it at position I.
-///
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I, DebugLoc DL,
                                    const MCInstrDesc &MCID, bool IsIndirect,
@@ -476,7 +465,7 @@ public:
     if (I == Begin) {
       if (!empty())
         MI->bundleWithSucc();
-      Begin = MI;
+      Begin = MI->getIterator();
       return *this;
     }
     if (I == End) {
diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h
index 122022486345..4fbe206fceb9 100644
--- a/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/include/llvm/CodeGen/MachineInstrBundle.h
@@ -28,7 +28,7 @@ namespace llvm {
 void finalizeBundle(MachineBasicBlock &MBB,
                     MachineBasicBlock::instr_iterator FirstMI,
                     MachineBasicBlock::instr_iterator LastMI);
-  
+
 /// finalizeBundle - Same functionality as the previous finalizeBundle except
 /// the last instruction in the bundle is not provided as an input. This is
 /// used in cases where bundles are pre-determined by marking instructions
@@ -44,23 +44,23 @@ bool finalizeBundles(MachineFunction &MF);
 /// getBundleStart - Returns the first instruction in the bundle containing MI.
 ///
 inline MachineInstr *getBundleStart(MachineInstr *MI) {
-  MachineBasicBlock::instr_iterator I = MI;
+  MachineBasicBlock::instr_iterator I(MI);
   while (I->isBundledWithPred())
     --I;
-  return I;
+  return &*I;
 }
 
 inline const MachineInstr *getBundleStart(const MachineInstr *MI) {
-  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator I(MI);
   while (I->isBundledWithPred())
     --I;
-  return I;
+  return &*I;
 }
 
 /// Return an iterator pointing beyond the bundle containing MI.
 inline MachineBasicBlock::instr_iterator
 getBundleEnd(MachineInstr *MI) {
-  MachineBasicBlock::instr_iterator I = MI;
+  MachineBasicBlock::instr_iterator I(MI);
   while (I->isBundledWithSucc())
     ++I;
   return ++I;
@@ -69,7 +69,7 @@ getBundleEnd(MachineInstr *MI) {
 /// Return an iterator pointing beyond the bundle containing MI.
 inline MachineBasicBlock::const_instr_iterator
 getBundleEnd(const MachineInstr *MI) {
-  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator I(MI);
   while (I->isBundledWithSucc())
     ++I;
   return ++I;
@@ -116,10 +116,10 @@ protected:
   ///
   explicit MachineOperandIteratorBase(MachineInstr *MI, bool WholeBundle) {
     if (WholeBundle) {
-      InstrI = getBundleStart(MI);
+      InstrI = getBundleStart(MI)->getIterator();
       InstrE = MI->getParent()->instr_end();
     } else {
-      InstrI = InstrE = MI;
+      InstrI = InstrE = MI->getIterator();
       ++InstrE;
     }
     OpI = InstrI->operands_begin();
@@ -164,27 +164,32 @@ public:
     bool Tied;
   };
 
-  /// PhysRegInfo - Information about a physical register used by a set of
+  /// Information about how a physical register Reg is used by a set of
   /// operands.
   struct PhysRegInfo {
-    /// Clobbers - Reg or an overlapping register is defined, or a regmask
-    /// clobbers Reg.
-    bool Clobbers;
+    /// There is a regmask operand indicating Reg is clobbered.
+    /// \see MachineOperand::CreateRegMask().
+    bool Clobbered;
 
-    /// Defines - Reg or a super-register is defined.
-    bool Defines;
+    /// Reg or one of its aliases is defined. The definition may only cover
+    /// parts of the register.
+    bool Defined;
+    /// Reg or a super-register is defined. The definition covers the full
+    /// register.
+    bool FullyDefined;
 
-    /// Reads - Read or a super-register is read.
-    bool Reads;
+    /// Reg or ont of its aliases is read. The register may only be read
+    /// partially.
+    bool Read;
+    /// Reg or a super-register is read. The full register is read.
+    bool FullyRead;
 
-    /// ReadsOverlap - Reg or an overlapping register is read.
-    bool ReadsOverlap;
+    /// Reg is FullyDefined and all defs of reg or an overlapping register are
+    /// dead.
+    bool DeadDef;
 
-    /// DefinesDead - All defs of a Reg or a super-register are dead.
-    bool DefinesDead;
-
-    /// There is a kill of Reg or a super-register.
-    bool Kills;
+    /// There is a use operand of reg or a super-register with kill flag set.
+    bool Killed;
   };
 
   /// analyzeVirtReg - Analyze how the current instruction or bundle uses a
diff --git a/include/llvm/CodeGen/MachineMemOperand.h b/include/llvm/CodeGen/MachineMemOperand.h
index a73b92f9a252..1ca0d90465a4 100644
--- a/include/llvm/CodeGen/MachineMemOperand.h
+++ b/include/llvm/CodeGen/MachineMemOperand.h
@@ -27,6 +27,7 @@ namespace llvm {
 class FoldingSetNodeID;
 class MDNode;
 class raw_ostream;
+class MachineFunction;
 class ModuleSlotTracker;
 
 /// MachinePointerInfo - This class contains a discriminated union of
@@ -62,22 +63,23 @@ struct MachinePointerInfo {
 
   /// getConstantPool - Return a MachinePointerInfo record that refers to the
   /// constant pool.
-  static MachinePointerInfo getConstantPool();
+  static MachinePointerInfo getConstantPool(MachineFunction &MF);
 
   /// getFixedStack - Return a MachinePointerInfo record that refers to the
   /// the specified FrameIndex.
-  static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0);
+  static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI,
+                                          int64_t Offset = 0);
 
   /// getJumpTable - Return a MachinePointerInfo record that refers to a
   /// jump table entry.
-  static MachinePointerInfo getJumpTable();
+  static MachinePointerInfo getJumpTable(MachineFunction &MF);
 
   /// getGOT - Return a MachinePointerInfo record that refers to a
   /// GOT entry.
-  static MachinePointerInfo getGOT();
+  static MachinePointerInfo getGOT(MachineFunction &MF);
 
   /// getStack - stack pointer relative access.
-  static MachinePointerInfo getStack(int64_t Offset);
+  static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset);
 };
 
 
diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h
index 4cdfe2463c99..77571124a1b8 100644
--- a/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/include/llvm/CodeGen/MachineModuleInfo.h
@@ -35,11 +35,12 @@
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
@@ -59,7 +60,6 @@ class MachineFunction;
 class Module;
 class PointerType;
 class StructType;
-struct WinEHFuncInfo;
 
 struct SEHHandler {
   // Filter or finally function. Null indicates a catch-all.
@@ -79,13 +79,10 @@ struct LandingPadInfo {
   SmallVector<MCSymbol *, 1> EndLabels;    // Labels after invoke.
   SmallVector<SEHHandler, 1> SEHHandlers;  // SEH handlers active at this lpad.
   MCSymbol *LandingPadLabel;               // Label at beginning of landing pad.
-  const Function *Personality;             // Personality function.
   std::vector<int> TypeIds;               // List of type ids (filters negative).
-  int WinEHState;                         // WinEH specific state number.
 
   explicit LandingPadInfo(MachineBasicBlock *MBB)
-      : LandingPadBlock(MBB), LandingPadLabel(nullptr), Personality(nullptr),
-        WinEHState(-1) {}
+      : LandingPadBlock(MBB), LandingPadLabel(nullptr) {}
 };
 
 //===----------------------------------------------------------------------===//
@@ -163,6 +160,13 @@ class MachineModuleInfo : public ImmutablePass {
 
   bool CallsEHReturn;
   bool CallsUnwindInit;
+  bool HasEHFunclets;
+
+  // TODO: Ideally, what we'd like is to have a switch that allows emitting 
+  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
+  // even under this switch, we'd like .debug_frame to be precise when using.
+  // -g. At this moment, there's no way to specify that some CFI directives
+  // go into .eh_frame only, while others go into .debug_frame only.
 
   /// DbgInfoAvailable - True if debugging information is available
   /// in this module.
@@ -182,8 +186,6 @@ class MachineModuleInfo : public ImmutablePass {
 
   EHPersonality PersonalityTypeCache;
 
-  DenseMap<const Function *, std::unique_ptr<WinEHFuncInfo>> FuncInfoMap;
-
 public:
   static char ID; // Pass identification, replacement for typeid
 
@@ -220,12 +222,6 @@ public:
   void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
 
-  const Function *getWinEHParent(const Function *F) const;
-  WinEHFuncInfo &getWinEHFuncInfo(const Function *F);
-  bool hasWinEHFuncInfo(const Function *F) const {
-    return FuncInfoMap.count(getWinEHParent(F)) > 0;
-  }
-
   /// getInfo - Keep track of various per-function pieces of information for
   /// backends that would like to do so.
   ///
@@ -252,6 +248,9 @@ public:
   bool callsUnwindInit() const { return CallsUnwindInit; }
   void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
 
+  bool hasEHFunclets() const { return HasEHFunclets; }
+  void setHasEHFunclets(bool V) { HasEHFunclets = V; }
+
   bool usesVAFloatArgument() const {
     return UsesVAFloatArgument;
   }
@@ -318,16 +317,8 @@ public:
 
   /// addPersonality - Provide the personality function for the exception
   /// information.
-  void addPersonality(MachineBasicBlock *LandingPad,
-                      const Function *Personality);
   void addPersonality(const Function *Personality);
 
-  void addWinEHState(MachineBasicBlock *LandingPad, int State);
-
-  /// getPersonalityIndex - Get index of the current personality function inside
-  /// Personalitites array
-  unsigned getPersonalityIndex() const;
-
   /// getPersonalities - Return array of personality functions ever seen.
   const std::vector<const Function *>& getPersonalities() const {
     return Personalities;
@@ -426,13 +417,6 @@ public:
     return FilterIds;
   }
 
-  /// getPersonality - Return a personality function if available.  The presence
-  /// of one is required to emit exception handling info.
-  const Function *getPersonality() const;
-
-  /// Classify the personality function amongst known EH styles.
-  EHPersonality getPersonalityType();
-
   /// setVariableDbgInfo - Collect information used to emit debugging
   /// information of a variable.
   void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
diff --git a/include/llvm/CodeGen/MachineModuleInfoImpls.h b/include/llvm/CodeGen/MachineModuleInfoImpls.h
index a67f9b5666b1..e7472145e71f 100644
--- a/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -18,79 +18,71 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 
 namespace llvm {
-  class MCSymbol;
+class MCSymbol;
 
-  /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
-  /// for MachO targets.
-  class MachineModuleInfoMachO : public MachineModuleInfoImpl {
-    /// FnStubs - Darwin '$stub' stubs.  The key is something like "Lfoo$stub",
-    /// the value is something like "_foo".
-    DenseMap<MCSymbol*, StubValueTy> FnStubs;
-    
-    /// GVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
-    /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
-    /// is true if this GV is external.
-    DenseMap<MCSymbol*, StubValueTy> GVStubs;
-    
-    /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
-    /// "Lfoo$non_lazy_ptr", the value is something like "_foo".  Unlike GVStubs
-    /// these are for things with hidden visibility. The extra bit is true if
-    /// this GV is external.
-    DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs;
-    
-    virtual void anchor();  // Out of line virtual method.
-  public:
-    MachineModuleInfoMachO(const MachineModuleInfo &) {}
-    
-    StubValueTy &getFnStubEntry(MCSymbol *Sym) {
-      assert(Sym && "Key cannot be null");
-      return FnStubs[Sym];
-    }
+/// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
+/// for MachO targets.
+class MachineModuleInfoMachO : public MachineModuleInfoImpl {
+  /// FnStubs - Darwin '$stub' stubs.  The key is something like "Lfoo$stub",
+  /// the value is something like "_foo".
+  DenseMap<MCSymbol *, StubValueTy> FnStubs;
 
-    StubValueTy &getGVStubEntry(MCSymbol *Sym) {
-      assert(Sym && "Key cannot be null");
-      return GVStubs[Sym];
-    }
+  /// GVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+  /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
+  /// is true if this GV is external.
+  DenseMap<MCSymbol *, StubValueTy> GVStubs;
 
-    StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
-      assert(Sym && "Key cannot be null");
-      return HiddenGVStubs[Sym];
-    }
+  /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+  /// "Lfoo$non_lazy_ptr", the value is something like "_foo".  Unlike GVStubs
+  /// these are for things with hidden visibility. The extra bit is true if
+  /// this GV is external.
+  DenseMap<MCSymbol *, StubValueTy> HiddenGVStubs;
 
-    /// Accessor methods to return the set of stubs in sorted order.
-    SymbolListTy GetFnStubList() {
-      return getSortedStubs(FnStubs);
-    }
-    SymbolListTy GetGVStubList() {
-      return getSortedStubs(GVStubs);
-    }
-    SymbolListTy GetHiddenGVStubList() {
-      return getSortedStubs(HiddenGVStubs);
-    }
-  };
+  virtual void anchor(); // Out of line virtual method.
+public:
+  MachineModuleInfoMachO(const MachineModuleInfo &) {}
 
-  /// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
-  /// for ELF targets.
-  class MachineModuleInfoELF : public MachineModuleInfoImpl {
-    /// GVStubs - These stubs are used to materialize global addresses in PIC
-    /// mode.
-    DenseMap<MCSymbol*, StubValueTy> GVStubs;
+  StubValueTy &getFnStubEntry(MCSymbol *Sym) {
+    assert(Sym && "Key cannot be null");
+    return FnStubs[Sym];
+  }
 
-    virtual void anchor();  // Out of line virtual method.
-  public:
-    MachineModuleInfoELF(const MachineModuleInfo &) {}
+  StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+    assert(Sym && "Key cannot be null");
+    return GVStubs[Sym];
+  }
 
-    StubValueTy &getGVStubEntry(MCSymbol *Sym) {
-      assert(Sym && "Key cannot be null");
-      return GVStubs[Sym];
-    }
+  StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
+    assert(Sym && "Key cannot be null");
+    return HiddenGVStubs[Sym];
+  }
 
-    /// Accessor methods to return the set of stubs in sorted order.
+  /// Accessor methods to return the set of stubs in sorted order.
+  SymbolListTy GetFnStubList() { return getSortedStubs(FnStubs); }
+  SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+  SymbolListTy GetHiddenGVStubList() { return getSortedStubs(HiddenGVStubs); }
+};
 
-    SymbolListTy GetGVStubList() {
-      return getSortedStubs(GVStubs);
-    }
-  };
+/// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
+/// for ELF targets.
+class MachineModuleInfoELF : public MachineModuleInfoImpl {
+  /// GVStubs - These stubs are used to materialize global addresses in PIC
+  /// mode.
+  DenseMap<MCSymbol *, StubValueTy> GVStubs;
+
+  virtual void anchor(); // Out of line virtual method.
+public:
+  MachineModuleInfoELF(const MachineModuleInfo &) {}
+
+  StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+    assert(Sym && "Key cannot be null");
+    return GVStubs[Sym];
+  }
+
+  /// Accessor methods to return the set of stubs in sorted order.
+
+  SymbolListTy GetGVStubList() { return getSortedStubs(GVStubs); }
+};
 
 } // end namespace llvm
 
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 5e607cdae48e..04191bc1b74f 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -73,7 +73,7 @@ private:
 
   /// PhysRegUseDefLists - This is an array of the head of the use/def list for
   /// physical registers.
-  std::vector<MachineOperand *> PhysRegUseDefLists;
+  std::unique_ptr<MachineOperand *[]> PhysRegUseDefLists;
 
   /// getRegUseDefListHead - Return the head pointer for the register use/def
   /// list for the specified virtual or physical register.
@@ -95,20 +95,8 @@ private:
     return MO->Contents.Reg.Next;
   }
 
-  /// UsedRegUnits - This is a bit vector that is computed and set by the
-  /// register allocator, and must be kept up to date by passes that run after
-  /// register allocation (though most don't modify this).  This is used
-  /// so that the code generator knows which callee save registers to save and
-  /// for other target specific uses.
-  /// This vector has bits set for register units that are modified in the
-  /// current function. It doesn't include registers clobbered by function
-  /// calls with register mask operands.
-  BitVector UsedRegUnits;
-
   /// UsedPhysRegMask - Additional used physregs including aliases.
   /// This bit vector represents all the registers clobbered by function calls.
-  /// It can model things that UsedRegUnits can't, such as function calls that
-  /// clobber ymm7 but preserve the low half in xmm7.
   BitVector UsedPhysRegMask;
 
   /// ReservedRegs - This is a bit vector of reserved registers.  The target
@@ -246,7 +234,7 @@ public:
   static reg_iterator reg_end() { return reg_iterator(nullptr); }
 
   inline iterator_range<reg_iterator>  reg_operands(unsigned Reg) const {
-    return iterator_range<reg_iterator>(reg_begin(Reg), reg_end());
+    return make_range(reg_begin(Reg), reg_end());
   }
 
   /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses
@@ -262,8 +250,7 @@ public:
 
   inline iterator_range<reg_instr_iterator>
   reg_instructions(unsigned Reg) const {
-    return iterator_range<reg_instr_iterator>(reg_instr_begin(Reg),
-                                              reg_instr_end());
+    return make_range(reg_instr_begin(Reg), reg_instr_end());
   }
 
   /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses
@@ -278,8 +265,7 @@ public:
   }
 
   inline iterator_range<reg_bundle_iterator> reg_bundles(unsigned Reg) const {
-    return iterator_range<reg_bundle_iterator>(reg_bundle_begin(Reg),
-                                               reg_bundle_end());
+    return make_range(reg_bundle_begin(Reg), reg_bundle_end());
   }
 
   /// reg_empty - Return true if there are no instructions using or defining the
@@ -299,8 +285,7 @@ public:
 
   inline iterator_range<reg_nodbg_iterator>
   reg_nodbg_operands(unsigned Reg) const {
-    return iterator_range<reg_nodbg_iterator>(reg_nodbg_begin(Reg),
-                                              reg_nodbg_end());
+    return make_range(reg_nodbg_begin(Reg), reg_nodbg_end());
   }
 
   /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk
@@ -317,8 +302,7 @@ public:
 
   inline iterator_range<reg_instr_nodbg_iterator>
   reg_nodbg_instructions(unsigned Reg) const {
-    return iterator_range<reg_instr_nodbg_iterator>(reg_instr_nodbg_begin(Reg),
-                                                    reg_instr_nodbg_end());
+    return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end());
   }
 
   /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk
@@ -333,10 +317,9 @@ public:
     return reg_bundle_nodbg_iterator(nullptr);
   }
 
-  inline iterator_range<reg_bundle_nodbg_iterator> 
+  inline iterator_range<reg_bundle_nodbg_iterator>
   reg_nodbg_bundles(unsigned Reg) const {
-    return iterator_range<reg_bundle_nodbg_iterator>(reg_bundle_nodbg_begin(Reg),
-                                                     reg_bundle_nodbg_end());
+    return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end());
   }
 
   /// reg_nodbg_empty - Return true if the only instructions using or defining
@@ -354,7 +337,7 @@ public:
   static def_iterator def_end() { return def_iterator(nullptr); }
 
   inline iterator_range<def_iterator> def_operands(unsigned Reg) const {
-    return iterator_range<def_iterator>(def_begin(Reg), def_end());
+    return make_range(def_begin(Reg), def_end());
   }
 
   /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the
@@ -370,8 +353,7 @@ public:
 
   inline iterator_range<def_instr_iterator>
   def_instructions(unsigned Reg) const {
-    return iterator_range<def_instr_iterator>(def_instr_begin(Reg),
-                                              def_instr_end());
+    return make_range(def_instr_begin(Reg), def_instr_end());
   }
 
   /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the
@@ -386,8 +368,7 @@ public:
   }
 
   inline iterator_range<def_bundle_iterator> def_bundles(unsigned Reg) const {
-    return iterator_range<def_bundle_iterator>(def_bundle_begin(Reg),
-                                               def_bundle_end());
+    return make_range(def_bundle_begin(Reg), def_bundle_end());
   }
 
   /// def_empty - Return true if there are no instructions defining the
@@ -412,7 +393,7 @@ public:
   static use_iterator use_end() { return use_iterator(nullptr); }
 
   inline iterator_range<use_iterator> use_operands(unsigned Reg) const {
-    return iterator_range<use_iterator>(use_begin(Reg), use_end());
+    return make_range(use_begin(Reg), use_end());
   }
 
   /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the
@@ -428,8 +409,7 @@ public:
 
   inline iterator_range<use_instr_iterator>
   use_instructions(unsigned Reg) const {
-    return iterator_range<use_instr_iterator>(use_instr_begin(Reg),
-                                              use_instr_end());
+    return make_range(use_instr_begin(Reg), use_instr_end());
   }
 
   /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the
@@ -444,8 +424,7 @@ public:
   }
 
   inline iterator_range<use_bundle_iterator> use_bundles(unsigned Reg) const {
-    return iterator_range<use_bundle_iterator>(use_bundle_begin(Reg),
-                                               use_bundle_end());
+    return make_range(use_bundle_begin(Reg), use_bundle_end());
   }
 
   /// use_empty - Return true if there are no instructions using the specified
@@ -474,8 +453,7 @@ public:
 
   inline iterator_range<use_nodbg_iterator>
   use_nodbg_operands(unsigned Reg) const {
-    return iterator_range<use_nodbg_iterator>(use_nodbg_begin(Reg),
-                                              use_nodbg_end());
+    return make_range(use_nodbg_begin(Reg), use_nodbg_end());
   }
 
   /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk
@@ -492,8 +470,7 @@ public:
 
   inline iterator_range<use_instr_nodbg_iterator>
   use_nodbg_instructions(unsigned Reg) const {
-    return iterator_range<use_instr_nodbg_iterator>(use_instr_nodbg_begin(Reg),
-                                                    use_instr_nodbg_end());
+    return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end());
   }
 
   /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk
@@ -510,8 +487,7 @@ public:
 
   inline iterator_range<use_bundle_nodbg_iterator>
   use_nodbg_bundles(unsigned Reg) const {
-    return iterator_range<use_bundle_nodbg_iterator>(use_bundle_nodbg_begin(Reg),
-                                                     use_bundle_nodbg_end());
+    return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end());
   }
 
   /// use_nodbg_empty - Return true if there are no non-Debug instructions
@@ -540,7 +516,7 @@ public:
   /// apply sub registers to ToReg in order to obtain a final/proper physical
   /// register.
   void replaceRegWith(unsigned FromReg, unsigned ToReg);
-  
+
   /// getVRegDef - Return the machine instr that defines the specified virtual
   /// register or null if none is found.  This assumes that the code is in SSA
   /// form, so there should only be one definition.
@@ -626,6 +602,12 @@ public:
     RegAllocHints[VReg].second = PrefReg;
   }
 
+  /// Specify the preferred register allocation hint for the specified virtual
+  /// register.
+  void setSimpleHint(unsigned VReg, unsigned PrefReg) {
+    setRegAllocationHint(VReg, /*Type=*/0, PrefReg);
+  }
+
   /// getRegAllocationHint - Return the register allocation hint for the
   /// specified virtual register.
   std::pair<unsigned, unsigned>
@@ -650,41 +632,15 @@ public:
   /// Return true if the specified register is modified in this function.
   /// This checks that no defining machine operands exist for the register or
   /// any of its aliases. Definitions found on functions marked noreturn are
-  /// ignored.
+  /// ignored. The register is also considered modified when it is set in the
+  /// UsedPhysRegMask.
   bool isPhysRegModified(unsigned PhysReg) const;
 
-  //===--------------------------------------------------------------------===//
-  // Physical Register Use Info
-  //===--------------------------------------------------------------------===//
-
-  /// isPhysRegUsed - Return true if the specified register is used in this
-  /// function. Also check for clobbered aliases and registers clobbered by
-  /// function calls with register mask operands.
-  ///
-  /// This only works after register allocation.
-  bool isPhysRegUsed(unsigned Reg) const {
-    if (UsedPhysRegMask.test(Reg))
-      return true;
-    for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
-         Units.isValid(); ++Units)
-      if (UsedRegUnits.test(*Units))
-        return true;
-    return false;
-  }
-
-  /// Mark the specified register unit as used in this function.
-  /// This should only be called during and after register allocation.
-  void setRegUnitUsed(unsigned RegUnit) {
-    UsedRegUnits.set(RegUnit);
-  }
-
-  /// setPhysRegUsed - Mark the specified register used in this function.
-  /// This should only be called during and after register allocation.
-  void setPhysRegUsed(unsigned Reg) {
-    for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
-         Units.isValid(); ++Units)
-      UsedRegUnits.set(*Units);
-  }
+  /// Return true if the specified register is modified or read in this
+  /// function. This checks that no machine operands exist for the register or
+  /// any of its aliases. The register is also considered used when it is set
+  /// in the UsedPhysRegMask.
+  bool isPhysRegUsed(unsigned PhysReg) const;
 
   /// addPhysRegsUsedFromRegMask - Mark any registers not in RegMask as used.
   /// This corresponds to the bit mask attached to register mask operands.
@@ -692,15 +648,9 @@ public:
     UsedPhysRegMask.setBitsNotInMask(RegMask);
   }
 
-  /// setPhysRegUnused - Mark the specified register unused in this function.
-  /// This should only be called during and after register allocation.
-  void setPhysRegUnused(unsigned Reg) {
-    UsedPhysRegMask.reset(Reg);
-    for (MCRegUnitIterator Units(Reg, getTargetRegisterInfo());
-         Units.isValid(); ++Units)
-      UsedRegUnits.reset(*Units);
-  }
+  const BitVector &getUsedPhysRegsMask() const { return UsedPhysRegMask; }
 
+  void setUsedPhysRegMask(BitVector &Mask) { UsedPhysRegMask = Mask; }
 
   //===--------------------------------------------------------------------===//
   // Reserved Register Info
@@ -797,7 +747,7 @@ public:
 
   /// Returns a mask covering all bits that can appear in lane masks of
   /// subregisters of the virtual register @p Reg.
-  unsigned getMaxLaneMaskForVReg(unsigned Reg) const;
+  LaneBitmask getMaxLaneMaskForVReg(unsigned Reg) const;
 
   /// defusechain_iterator - This class provides iterator support for machine
   /// operands in the function that use or define a specific register.  If
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index e80e14e5ccf7..358fd5a3732a 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -77,6 +77,7 @@
 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
 #define LLVM_CODEGEN_MACHINESCHEDULER_H
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/RegisterPressure.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
@@ -87,7 +88,6 @@ namespace llvm {
 extern cl::opt<bool> ForceTopDown;
 extern cl::opt<bool> ForceBottomUp;
 
-class AliasAnalysis;
 class LiveIntervals;
 class MachineDominatorTree;
 class MachineLoopInfo;
@@ -156,8 +156,12 @@ struct MachineSchedPolicy {
   bool OnlyTopDown;
   bool OnlyBottomUp;
 
+  // Disable heuristic that tries to fetch nodes from long dependency chains
+  // first.
+  bool DisableLatencyHeuristic;
+
   MachineSchedPolicy(): ShouldTrackPressure(false), OnlyTopDown(false),
-    OnlyBottomUp(false) {}
+    OnlyBottomUp(false), DisableLatencyHeuristic(false) {}
 };
 
 /// MachineSchedStrategy - Interface to the scheduling algorithm used by
@@ -175,6 +179,8 @@ public:
                           MachineBasicBlock::iterator End,
                           unsigned NumRegionInstrs) {}
 
+  virtual void dumpPolicy() {}
+
   /// Check if pressure tracking is needed before building the DAG and
   /// initializing this strategy. Called after initPolicy.
   virtual bool shouldTrackPressure() const { return true; }
@@ -222,6 +228,7 @@ public:
 class ScheduleDAGMI : public ScheduleDAGInstrs {
 protected:
   AliasAnalysis *AA;
+  LiveIntervals *LIS;
   std::unique_ptr<MachineSchedStrategy> SchedImpl;
 
   /// Topo - A topological ordering for SUnits which permits fast IsReachable
@@ -248,11 +255,11 @@ protected:
 #endif
 public:
   ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr<MachineSchedStrategy> S,
-                bool IsPostRA)
-      : ScheduleDAGInstrs(*C->MF, C->MLI, IsPostRA,
-                          /*RemoveKillFlags=*/IsPostRA, C->LIS),
-        AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(),
-        CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) {
+                bool RemoveKillFlags)
+      : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
+        LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU),
+        CurrentTop(), CurrentBottom(), NextClusterPred(nullptr),
+        NextClusterSucc(nullptr) {
 #ifndef NDEBUG
     NumInstrsScheduled = 0;
 #endif
@@ -261,6 +268,9 @@ public:
   // Provide a vtable anchor
   ~ScheduleDAGMI() override;
 
+  // Returns LiveIntervals instance for use in DAG mutators and such.
+  LiveIntervals *getLIS() const { return LIS; }
+
   /// Return true if this DAG supports VReg liveness and RegPressure.
   virtual bool hasVRegLiveness() const { return false; }
 
@@ -380,7 +390,7 @@ protected:
 public:
   ScheduleDAGMILive(MachineSchedContext *C,
                     std::unique_ptr<MachineSchedStrategy> S)
-      : ScheduleDAGMI(C, std::move(S), /*IsPostRA=*/false),
+      : ScheduleDAGMI(C, std::move(S), /*RemoveKillFlags=*/false),
         RegClassInfo(C->RegClassInfo), DFSResult(nullptr),
         ShouldTrackPressure(false), RPTracker(RegPressure),
         TopRPTracker(TopPressure), BotRPTracker(BotPressure) {}
@@ -858,6 +868,8 @@ public:
                   MachineBasicBlock::iterator End,
                   unsigned NumRegionInstrs) override;
 
+  void dumpPolicy() override;
+
   bool shouldTrackPressure() const override {
     return RegionPolicy.ShouldTrackPressure;
   }
@@ -915,7 +927,7 @@ public:
                   MachineBasicBlock::iterator End,
                   unsigned NumRegionInstrs) override {
     /* no configurable policy */
-  };
+  }
 
   /// PostRA scheduling does not track pressure.
   bool shouldTrackPressure() const override { return false; }
diff --git a/include/llvm/CodeGen/MachineValueType.h b/include/llvm/CodeGen/MachineValueType.h
index a728df354677..04d6ee3be531 100644
--- a/include/llvm/CodeGen/MachineValueType.h
+++ b/include/llvm/CodeGen/MachineValueType.h
@@ -56,53 +56,66 @@ class MVT {
       FIRST_FP_VALUETYPE = f16,
       LAST_FP_VALUETYPE  = ppcf128,
 
-      v2i1           =  13,   //  2 x i1
-      v4i1           =  14,   //  4 x i1
-      v8i1           =  15,   //  8 x i1
-      v16i1          =  16,   // 16 x i1
-      v32i1          =  17,   // 32 x i1
-      v64i1          =  18,   // 64 x i1
+      v2i1           =  13,   //    2 x i1
+      v4i1           =  14,   //    4 x i1
+      v8i1           =  15,   //    8 x i1
+      v16i1          =  16,   //   16 x i1
+      v32i1          =  17,   //   32 x i1
+      v64i1          =  18,   //   64 x i1
+      v512i1         =  19,   //  512 x i1
+      v1024i1        =  20,   // 1024 x i1
+
+      v1i8           =  21,   //  1 x i8
+      v2i8           =  22,   //  2 x i8
+      v4i8           =  23,   //  4 x i8
+      v8i8           =  24,   //  8 x i8
+      v16i8          =  25,   // 16 x i8
+      v32i8          =  26,   // 32 x i8
+      v64i8          =  27,   // 64 x i8
+      v128i8         =  28,   //128 x i8
+      v256i8         =  29,   //256 x i8
+
+      v1i16          =  30,   //  1 x i16
+      v2i16          =  31,   //  2 x i16
+      v4i16          =  32,   //  4 x i16
+      v8i16          =  33,   //  8 x i16
+      v16i16         =  34,   // 16 x i16
+      v32i16         =  35,   // 32 x i16
+      v64i16         =  36,   // 64 x i16
+      v128i16        =  37,   //128 x i16
+
+      v1i32          =  38,   //  1 x i32
+      v2i32          =  39,   //  2 x i32
+      v4i32          =  40,   //  4 x i32
+      v8i32          =  41,   //  8 x i32
+      v16i32         =  42,   // 16 x i32
+      v32i32         =  43,   // 32 x i32
+      v64i32         =  44,   // 64 x i32
+
+      v1i64          =  45,   //  1 x i64
+      v2i64          =  46,   //  2 x i64
+      v4i64          =  47,   //  4 x i64
+      v8i64          =  48,   //  8 x i64
+      v16i64         =  49,   // 16 x i64
+      v32i64         =  50,   // 32 x i64
+
+      v1i128         =  51,   //  1 x i128
 
-      v1i8           =  19,   //  1 x i8
-      v2i8           =  20,   //  2 x i8
-      v4i8           =  21,   //  4 x i8
-      v8i8           =  22,   //  8 x i8
-      v16i8          =  23,   // 16 x i8
-      v32i8          =  24,   // 32 x i8
-      v64i8          =  25,   // 64 x i8
-      v1i16          =  26,   //  1 x i16
-      v2i16          =  27,   //  2 x i16
-      v4i16          =  28,   //  4 x i16
-      v8i16          =  29,   //  8 x i16
-      v16i16         =  30,   // 16 x i16
-      v32i16         =  31,   // 32 x i16
-      v1i32          =  32,   //  1 x i32
-      v2i32          =  33,   //  2 x i32
-      v4i32          =  34,   //  4 x i32
-      v8i32          =  35,   //  8 x i32
-      v16i32         =  36,   // 16 x i32
-      v1i64          =  37,   //  1 x i64
-      v2i64          =  38,   //  2 x i64
-      v4i64          =  39,   //  4 x i64
-      v8i64          =  40,   //  8 x i64
-      v16i64         =  41,   // 16 x i64
-      v1i128         =  42,   //  1 x i128
-      
       FIRST_INTEGER_VECTOR_VALUETYPE = v2i1,
       LAST_INTEGER_VECTOR_VALUETYPE = v1i128,
 
-      v2f16          =  43,   //  2 x f16
-      v4f16          =  44,   //  4 x f16
-      v8f16          =  45,   //  8 x f16
-      v1f32          =  46,   //  1 x f32
-      v2f32          =  47,   //  2 x f32
-      v4f32          =  48,   //  4 x f32
-      v8f32          =  49,   //  8 x f32
-      v16f32         =  50,   // 16 x f32
-      v1f64          =  51,   //  1 x f64
-      v2f64          =  52,   //  2 x f64
-      v4f64          =  53,   //  4 x f64
-      v8f64          =  54,   //  8 x f64
+      v2f16          =  52,   //  2 x f16
+      v4f16          =  53,   //  4 x f16
+      v8f16          =  54,   //  8 x f16
+      v1f32          =  55,   //  1 x f32
+      v2f32          =  56,   //  2 x f32
+      v4f32          =  57,   //  4 x f32
+      v8f32          =  58,   //  8 x f32
+      v16f32         =  59,   // 16 x f32
+      v1f64          =  60,   //  1 x f64
+      v2f64          =  61,   //  2 x f64
+      v4f64          =  62,   //  4 x f64
+      v8f64          =  63,   //  8 x f64
 
       FIRST_FP_VECTOR_VALUETYPE = v2f16,
       LAST_FP_VECTOR_VALUETYPE = v8f64,
@@ -110,23 +123,26 @@ class MVT {
       FIRST_VECTOR_VALUETYPE = v2i1,
       LAST_VECTOR_VALUETYPE  = v8f64,
 
-      x86mmx         =  55,   // This is an X86 MMX value
+      x86mmx         =  64,   // This is an X86 MMX value
 
-      Glue           =  56,   // This glues nodes together during pre-RA sched
+      Glue           =  65,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  57,   // This has no value
+      isVoid         =  66,   // This has no value
 
-      Untyped        =  58,   // This value takes a register, but has
+      Untyped        =  67,   // This value takes a register, but has
                               // unspecified type.  The register class
                               // will be determined by the opcode.
 
       FIRST_VALUETYPE = 0,    // This is always the beginning of the list.
-      LAST_VALUETYPE =  59,   // This always remains at the end of the list.
+      LAST_VALUETYPE =  68,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
       // This value must be a multiple of 32.
-      MAX_ALLOWED_VALUETYPE = 64,
+      MAX_ALLOWED_VALUETYPE = 96,
+
+      // Token - A value of type llvm::TokenTy
+      token          = 249,
 
       // Metadata - This is MDNode or MDString.
       Metadata       = 250,
@@ -238,14 +254,23 @@ class MVT {
 
     /// is512BitVector - Return true if this is a 512-bit vector type.
     bool is512BitVector() const {
-      return (SimpleTy == MVT::v8f64 || SimpleTy == MVT::v16f32 ||
-              SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
-              SimpleTy == MVT::v8i64 || SimpleTy == MVT::v16i32);
+      return (SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64  ||
+              SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8  ||
+              SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
+              SimpleTy == MVT::v8i64);
     }
 
     /// is1024BitVector - Return true if this is a 1024-bit vector type.
     bool is1024BitVector() const {
-      return (SimpleTy == MVT::v16i64);
+      return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 ||
+              SimpleTy == MVT::v64i16  || SimpleTy == MVT::v32i32 ||
+              SimpleTy == MVT::v16i64);
+    }
+
+    /// is2048BitVector - Return true if this is a 1024-bit vector type.
+    bool is2048BitVector() const {
+      return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 ||
+              SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64);
     }
 
     /// isOverloaded - Return true if this is an overloaded type for TableGen.
@@ -282,35 +307,44 @@ class MVT {
       switch (SimpleTy) {
       default:
         llvm_unreachable("Not a vector MVT!");
-      case v2i1 :
-      case v4i1 :
-      case v8i1 :
-      case v16i1 :
-      case v32i1 :
-      case v64i1: return i1;
-      case v1i8 :
-      case v2i8 :
-      case v4i8 :
-      case v8i8 :
+      case v2i1:
+      case v4i1:
+      case v8i1:
+      case v16i1:
+      case v32i1:
+      case v64i1:
+      case v512i1:
+      case v1024i1: return i1;
+      case v1i8:
+      case v2i8:
+      case v4i8:
+      case v8i8:
       case v16i8:
       case v32i8:
-      case v64i8: return i8;
+      case v64i8:
+      case v128i8:
+      case v256i8: return i8;
       case v1i16:
       case v2i16:
       case v4i16:
       case v8i16:
       case v16i16:
-      case v32i16: return i16;
+      case v32i16:
+      case v64i16:
+      case v128i16: return i16;
       case v1i32:
       case v2i32:
       case v4i32:
       case v8i32:
-      case v16i32: return i32;
+      case v16i32:
+      case v32i32:
+      case v64i32: return i32;
       case v1i64:
       case v2i64:
       case v4i64:
       case v8i64:
-      case v16i64: return i64;
+      case v16i64:
+      case v32i64: return i64;
       case v1i128: return i128;
       case v2f16:
       case v4f16:
@@ -331,19 +365,28 @@ class MVT {
       switch (SimpleTy) {
       default:
         llvm_unreachable("Not a vector MVT!");
+      case v1024i1: return 1024;
+      case v512i1: return 512;
+      case v256i8: return 256;
+      case v128i8:
+      case v128i16: return 128;
+      case v64i1:
+      case v64i8:
+      case v64i16:
+      case v64i32: return 64;
       case v32i1:
       case v32i8:
-      case v32i16: return 32;
-      case v64i1:
-      case v64i8: return 64;
+      case v32i16:
+      case v32i32:
+      case v32i64: return 32;
       case v16i1:
       case v16i8:
       case v16i16:
       case v16i32:
       case v16i64:
       case v16f32: return 16;
-      case v8i1 :
-      case v8i8 :
+      case v8i1:
+      case v8i8:
       case v8i16:
       case v8i32:
       case v8i64:
@@ -390,6 +433,9 @@ class MVT {
       case vAny:
       case Any:
         llvm_unreachable("Value type is overloaded.");
+      case token:
+        llvm_unreachable("Token type is a sentinel that cannot be used "
+                         "in codegen and has no size");
       case Metadata:
         llvm_unreachable("Value type is metadata.");
       case i1  :  return 1;
@@ -440,13 +486,22 @@ class MVT {
       case v4i64:
       case v8f32:
       case v4f64: return 256;
+      case v512i1:
       case v64i8:
       case v32i16:
       case v16i32:
       case v8i64:
       case v16f32:
       case v8f64: return 512;
-      case v16i64:return 1024;
+      case v1024i1:
+      case v128i8:
+      case v64i16:
+      case v32i32:
+      case v16i64: return 1024;
+      case v256i8:
+      case v128i16:
+      case v64i32:
+      case v32i64: return 2048;
       }
     }
 
@@ -528,29 +583,35 @@ class MVT {
       default:
         break;
       case MVT::i1:
-        if (NumElements == 2)  return MVT::v2i1;
-        if (NumElements == 4)  return MVT::v4i1;
-        if (NumElements == 8)  return MVT::v8i1;
-        if (NumElements == 16) return MVT::v16i1;
-        if (NumElements == 32) return MVT::v32i1;
-        if (NumElements == 64) return MVT::v64i1;
+        if (NumElements == 2)    return MVT::v2i1;
+        if (NumElements == 4)    return MVT::v4i1;
+        if (NumElements == 8)    return MVT::v8i1;
+        if (NumElements == 16)   return MVT::v16i1;
+        if (NumElements == 32)   return MVT::v32i1;
+        if (NumElements == 64)   return MVT::v64i1;
+        if (NumElements == 512)  return MVT::v512i1;
+        if (NumElements == 1024) return MVT::v1024i1;
         break;
       case MVT::i8:
-        if (NumElements == 1)  return MVT::v1i8;
-        if (NumElements == 2)  return MVT::v2i8;
-        if (NumElements == 4)  return MVT::v4i8;
-        if (NumElements == 8)  return MVT::v8i8;
-        if (NumElements == 16) return MVT::v16i8;
-        if (NumElements == 32) return MVT::v32i8;
-        if (NumElements == 64) return MVT::v64i8;
+        if (NumElements == 1)   return MVT::v1i8;
+        if (NumElements == 2)   return MVT::v2i8;
+        if (NumElements == 4)   return MVT::v4i8;
+        if (NumElements == 8)   return MVT::v8i8;
+        if (NumElements == 16)  return MVT::v16i8;
+        if (NumElements == 32)  return MVT::v32i8;
+        if (NumElements == 64)  return MVT::v64i8;
+        if (NumElements == 128) return MVT::v128i8;
+        if (NumElements == 256) return MVT::v256i8;
         break;
       case MVT::i16:
-        if (NumElements == 1)  return MVT::v1i16;
-        if (NumElements == 2)  return MVT::v2i16;
-        if (NumElements == 4)  return MVT::v4i16;
-        if (NumElements == 8)  return MVT::v8i16;
-        if (NumElements == 16) return MVT::v16i16;
-        if (NumElements == 32) return MVT::v32i16;
+        if (NumElements == 1)   return MVT::v1i16;
+        if (NumElements == 2)   return MVT::v2i16;
+        if (NumElements == 4)   return MVT::v4i16;
+        if (NumElements == 8)   return MVT::v8i16;
+        if (NumElements == 16)  return MVT::v16i16;
+        if (NumElements == 32)  return MVT::v32i16;
+        if (NumElements == 64)  return MVT::v64i16;
+        if (NumElements == 128) return MVT::v128i16;
         break;
       case MVT::i32:
         if (NumElements == 1)  return MVT::v1i32;
@@ -558,6 +619,8 @@ class MVT {
         if (NumElements == 4)  return MVT::v4i32;
         if (NumElements == 8)  return MVT::v8i32;
         if (NumElements == 16) return MVT::v16i32;
+        if (NumElements == 32) return MVT::v32i32;
+        if (NumElements == 64) return MVT::v64i32;
         break;
       case MVT::i64:
         if (NumElements == 1)  return MVT::v1i64;
@@ -565,6 +628,7 @@ class MVT {
         if (NumElements == 4)  return MVT::v4i64;
         if (NumElements == 8)  return MVT::v8i64;
         if (NumElements == 16) return MVT::v16i64;
+        if (NumElements == 32) return MVT::v32i64;
         break;
       case MVT::i128:
         if (NumElements == 1)  return MVT::v1i128;
diff --git a/include/llvm/CodeGen/ParallelCG.h b/include/llvm/CodeGen/ParallelCG.h
new file mode 100644
index 000000000000..fa7002fa21fb
--- /dev/null
+++ b/include/llvm/CodeGen/ParallelCG.h
@@ -0,0 +1,43 @@
+//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PARALLELCG_H
+#define LLVM_CODEGEN_PARALLELCG_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class Module;
+class TargetOptions;
+class raw_pwrite_stream;
+
+/// Split M into OSs.size() partitions, and generate code for each. Writes
+/// OSs.size() output files to the output streams in OSs. The resulting output
+/// files if linked together are intended to be equivalent to the single output
+/// file that would have been code generated from M.
+///
+/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr<Module>().
+std::unique_ptr<Module>
+splitCodeGen(std::unique_ptr<Module> M, ArrayRef<raw_pwrite_stream *> OSs,
+             StringRef CPU, StringRef Features, const TargetOptions &Options,
+             Reloc::Model RM = Reloc::Default,
+             CodeModel::Model CM = CodeModel::Default,
+             CodeGenOpt::Level OL = CodeGenOpt::Default,
+             TargetMachine::CodeGenFileType FT = TargetMachine::CGFT_ObjectFile);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 5d8292174476..f45f0ed57d6b 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -120,9 +120,6 @@ protected:
   /// Default setting for -enable-tail-merge on this target.
   bool EnableTailMerge;
 
-  /// Default setting for -enable-shrink-wrap on this target.
-  bool EnableShrinkWrap;
-
 public:
   TargetPassConfig(TargetMachine *tm, PassManagerBase &pm);
   // Dummy constructor.
@@ -173,7 +170,8 @@ public:
   void substitutePass(AnalysisID StandardID, IdentifyingPassPtr TargetID);
 
   /// Insert InsertedPassID pass after TargetPassID pass.
-  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID);
+  void insertPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+                  bool VerifyAfter = true, bool PrintAfter = true);
 
   /// Allow the target to enable a specific standard pass by default.
   void enablePass(AnalysisID PassID) { substitutePass(PassID, PassID); }
@@ -228,7 +226,7 @@ public:
   ///
   /// This can also be used to plug a new MachineSchedStrategy into an instance
   /// of the standard ScheduleDAGMI:
-  ///   return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /* IsPostRA= */false)
+  ///   return new ScheduleDAGMI(C, make_unique<MyStrategy>(C), /*RemoveKillFlags=*/false)
   ///
   /// Return NULL to select the default (generic) machine scheduler.
   virtual ScheduleDAGInstrs *
@@ -585,6 +583,9 @@ namespace llvm {
   /// StackSlotColoring - This pass performs stack slot coloring.
   extern char &StackSlotColoringID;
 
+  /// \brief This pass lays out funclets contiguously.
+  extern char &FuncletLayoutID;
+
   /// createStackProtectorPass - This pass adds stack protectors to functions.
   ///
   FunctionPass *createStackProtectorPass(const TargetMachine *TM);
@@ -639,6 +640,9 @@ namespace llvm {
   /// the intrinsic for later emission to the StackMap.
   extern char &StackMapLivenessID;
 
+  /// LiveDebugValues pass
+  extern char &LiveDebugValuesID;
+
   /// createJumpInstrTables - This pass creates jump-instruction tables.
   ModulePass *createJumpInstrTablesPass();
 
diff --git a/include/llvm/CodeGen/PseudoSourceValue.h b/include/llvm/CodeGen/PseudoSourceValue.h
index a518b6233250..f67552030db4 100644
--- a/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/include/llvm/CodeGen/PseudoSourceValue.h
@@ -14,97 +14,170 @@
 #ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
 #define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
 
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/ValueMap.h"
+#include <map>
 
 namespace llvm {
-  class MachineFrameInfo;
-  class MachineMemOperand;
-  class raw_ostream;
 
-  raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
+class MachineFrameInfo;
+class MachineMemOperand;
+class raw_ostream;
 
-  /// PseudoSourceValue - Special value supplied for machine level alias
-  /// analysis. It indicates that a memory access references the functions
-  /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument
-  /// space), or constant pool.
-  class PseudoSourceValue {
-  private:
-    friend class MachineMemOperand; // For printCustom().
+raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MMO);
 
-    /// printCustom - Implement printing for PseudoSourceValue. This is called
-    /// from Value::print or Value's operator<<.
-    ///
-    virtual void printCustom(raw_ostream &O) const;
-
-  public:
-    /// isFixed - Whether this is a FixedStackPseudoSourceValue.
-    bool isFixed;
-
-    explicit PseudoSourceValue(bool isFixed = false);
-
-    virtual ~PseudoSourceValue();
-
-    /// isConstant - Test whether the memory pointed to by this
-    /// PseudoSourceValue has a constant value.
-    ///
-    virtual bool isConstant(const MachineFrameInfo *) const;
-
-    /// isAliased - Test whether the memory pointed to by this
-    /// PseudoSourceValue may also be pointed to by an LLVM IR Value.
-    virtual bool isAliased(const MachineFrameInfo *) const;
-
-    /// mayAlias - Return true if the memory pointed to by this
-    /// PseudoSourceValue can ever alias an LLVM IR Value.
-    virtual bool mayAlias(const MachineFrameInfo *) const;
-
-    /// A pseudo source value referencing a fixed stack frame entry,
-    /// e.g., a spill slot.
-    static const PseudoSourceValue *getFixedStack(int FI);
-
-    /// A pseudo source value referencing the area below the stack frame of
-    /// a function, e.g., the argument space.
-    static const PseudoSourceValue *getStack();
-
-    /// A pseudo source value referencing the global offset table
-    /// (or something the like).
-    static const PseudoSourceValue *getGOT();
-
-    /// A pseudo source value referencing the constant pool. Since constant
-    /// pools are constant, this doesn't need to identify a specific constant
-    /// pool entry.
-    static const PseudoSourceValue *getConstantPool();
-
-    /// A pseudo source value referencing a jump table. Since jump tables are
-    /// constant, this doesn't need to identify a specific jump table.
-    static const PseudoSourceValue *getJumpTable();
+/// Special value supplied for machine level alias analysis. It indicates that
+/// a memory access references the functions stack frame (e.g., a spill slot),
+/// below the stack frame (e.g., argument space), or constant pool.
+class PseudoSourceValue {
+public:
+  enum PSVKind {
+    Stack,
+    GOT,
+    JumpTable,
+    ConstantPool,
+    FixedStack,
+    GlobalValueCallEntry,
+    ExternalSymbolCallEntry
   };
 
-  /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
-  /// for holding FixedStack values, which must include a frame
-  /// index.
-  class FixedStackPseudoSourceValue : public PseudoSourceValue {
-    const int FI;
-  public:
-    explicit FixedStackPseudoSourceValue(int fi) :
-        PseudoSourceValue(true), FI(fi) {}
+private:
+  PSVKind Kind;
 
-    /// classof - Methods for support type inquiry through isa, cast, and
-    /// dyn_cast:
-    ///
-    static inline bool classof(const PseudoSourceValue *V) {
-      return V->isFixed == true;
-    }
+  friend class MachineMemOperand; // For printCustom().
 
-    bool isConstant(const MachineFrameInfo *MFI) const override;
+  /// Implement printing for PseudoSourceValue. This is called from
+  /// Value::print or Value's operator<<.
+  virtual void printCustom(raw_ostream &O) const;
 
-    bool isAliased(const MachineFrameInfo *MFI) const override;
+public:
+  explicit PseudoSourceValue(PSVKind Kind);
 
-    bool mayAlias(const MachineFrameInfo *) const override;
+  virtual ~PseudoSourceValue();
 
-    void printCustom(raw_ostream &OS) const override;
+  PSVKind kind() const { return Kind; }
 
-    int getFrameIndex() const { return FI; }
-  };
-} // End llvm namespace
+  bool isStack() const { return Kind == Stack; }
+  bool isGOT() const { return Kind == GOT; }
+  bool isConstantPool() const { return Kind == ConstantPool; }
+  bool isJumpTable() const { return Kind == JumpTable; }
+
+  /// Test whether the memory pointed to by this PseudoSourceValue has a
+  /// constant value.
+  virtual bool isConstant(const MachineFrameInfo *) const;
+
+  /// Test whether the memory pointed to by this PseudoSourceValue may also be
+  /// pointed to by an LLVM IR Value.
+  virtual bool isAliased(const MachineFrameInfo *) const;
+
+  /// Return true if the memory pointed to by this PseudoSourceValue can ever
+  /// alias an LLVM IR Value.
+  virtual bool mayAlias(const MachineFrameInfo *) const;
+};
+
+/// A specialized PseudoSourceValue for holding FixedStack values, which must
+/// include a frame index.
+class FixedStackPseudoSourceValue : public PseudoSourceValue {
+  const int FI;
+
+public:
+  explicit FixedStackPseudoSourceValue(int FI)
+      : PseudoSourceValue(FixedStack), FI(FI) {}
+
+  static inline bool classof(const PseudoSourceValue *V) {
+    return V->kind() == FixedStack;
+  }
+
+  bool isConstant(const MachineFrameInfo *MFI) const override;
+
+  bool isAliased(const MachineFrameInfo *MFI) const override;
+
+  bool mayAlias(const MachineFrameInfo *) const override;
+
+  void printCustom(raw_ostream &OS) const override;
+
+  int getFrameIndex() const { return FI; }
+};
+
+class CallEntryPseudoSourceValue : public PseudoSourceValue {
+protected:
+  CallEntryPseudoSourceValue(PSVKind Kind);
+
+public:
+  bool isConstant(const MachineFrameInfo *) const override;
+  bool isAliased(const MachineFrameInfo *) const override;
+  bool mayAlias(const MachineFrameInfo *) const override;
+};
+
+/// A specialized pseudo soruce value for holding GlobalValue values.
+class GlobalValuePseudoSourceValue : public CallEntryPseudoSourceValue {
+  const GlobalValue *GV;
+
+public:
+  GlobalValuePseudoSourceValue(const GlobalValue *GV);
+
+  static inline bool classof(const PseudoSourceValue *V) {
+    return V->kind() == GlobalValueCallEntry;
+  }
+
+  const GlobalValue *getValue() const { return GV; }
+};
+
+/// A specialized pseudo source value for holding external symbol values.
+class ExternalSymbolPseudoSourceValue : public CallEntryPseudoSourceValue {
+  const char *ES;
+
+public:
+  ExternalSymbolPseudoSourceValue(const char *ES);
+
+  static inline bool classof(const PseudoSourceValue *V) {
+    return V->kind() == ExternalSymbolCallEntry;
+  }
+
+  const char *getSymbol() const { return ES; }
+};
+
+/// Manages creation of pseudo source values.
+class PseudoSourceValueManager {
+  const PseudoSourceValue StackPSV, GOTPSV, JumpTablePSV, ConstantPoolPSV;
+  std::map<int, std::unique_ptr<FixedStackPseudoSourceValue>> FSValues;
+  StringMap<std::unique_ptr<const ExternalSymbolPseudoSourceValue>>
+      ExternalCallEntries;
+  ValueMap<const GlobalValue *,
+           std::unique_ptr<const GlobalValuePseudoSourceValue>>
+      GlobalCallEntries;
+
+public:
+  PseudoSourceValueManager();
+
+  /// Return a pseudo source value referencing the area below the stack frame of
+  /// a function, e.g., the argument space.
+  const PseudoSourceValue *getStack();
+
+  /// Return a pseudo source value referencing the global offset table
+  /// (or something the like).
+  const PseudoSourceValue *getGOT();
+
+  /// Return a pseudo source value referencing the constant pool. Since constant
+  /// pools are constant, this doesn't need to identify a specific constant
+  /// pool entry.
+  const PseudoSourceValue *getConstantPool();
+
+  /// Return a pseudo source value referencing a jump table. Since jump tables
+  /// are constant, this doesn't need to identify a specific jump table.
+  const PseudoSourceValue *getJumpTable();
+
+  /// Return a pseudo source value referencing a fixed stack frame entry,
+  /// e.g., a spill slot.
+  const PseudoSourceValue *getFixedStack(int FI);
+
+  const PseudoSourceValue *getGlobalValueCallEntry(const GlobalValue *GV);
+
+  const PseudoSourceValue *getExternalSymbolCallEntry(const char *ES);
+};
+
+} // end namespace llvm
 
 #endif
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 6046e46547b2..4122811a9e5c 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -134,7 +134,7 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) {
                       hash_combine_range(OStart, OEnd));
 }
 
-/// \brief Holds graph-level metadata relevent to PBQP RA problems.
+/// \brief Holds graph-level metadata relevant to PBQP RA problems.
 class GraphMetadata {
 private:
   typedef ValuePool<AllowedRegVector> AllowedRegVecPool;
diff --git a/include/llvm/CodeGen/RegAllocRegistry.h b/include/llvm/CodeGen/RegAllocRegistry.h
index ca495778446f..5c7e9999cc9a 100644
--- a/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/include/llvm/CodeGen/RegAllocRegistry.h
@@ -33,12 +33,10 @@ public:
   static MachinePassRegistry Registry;
 
   RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
-  : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
-  { 
-     Registry.Add(this); 
+      : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
+    Registry.Add(this);
   }
   ~RegisterRegAlloc() { Registry.Remove(this); }
-  
 
   // Accessors.
   //
@@ -57,7 +55,6 @@ public:
   static void setListener(MachinePassRegistryListener *L) {
     Registry.setListener(L);
   }
-  
 };
 
 } // end namespace llvm
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 9d8843d1d74a..987634fb36c3 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -125,11 +125,13 @@ class PressureDiff {
   enum { MaxPSets = 16 };
 
   PressureChange PressureChanges[MaxPSets];
-public:
+
   typedef PressureChange* iterator;
+  iterator nonconst_begin() { return &PressureChanges[0]; }
+  iterator nonconst_end() { return &PressureChanges[MaxPSets]; }
+
+public:
   typedef const PressureChange* const_iterator;
-  iterator begin() { return &PressureChanges[0]; }
-  iterator end() { return &PressureChanges[MaxPSets]; }
   const_iterator begin() const { return &PressureChanges[0]; }
   const_iterator end() const { return &PressureChanges[MaxPSets]; }
 
@@ -191,30 +193,56 @@ struct RegPressureDelta {
   }
 };
 
-/// \brief A set of live virtual registers and physical register units.
+/// A set of live virtual registers and physical register units.
 ///
-/// Virtual and physical register numbers require separate sparse sets, but most
-/// of the RegisterPressureTracker handles them uniformly.
-struct LiveRegSet {
-  SparseSet<unsigned> PhysRegs;
-  SparseSet<unsigned, VirtReg2IndexFunctor> VirtRegs;
+/// This is a wrapper around a SparseSet which deals with mapping register unit
+/// and virtual register indexes to an index usable by the sparse set.
+class LiveRegSet {
+private:
+  SparseSet<unsigned> Regs;
+  unsigned NumRegUnits;
+
+  unsigned getSparseIndexFromReg(unsigned Reg) const {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return TargetRegisterInfo::virtReg2Index(Reg) + NumRegUnits;
+    assert(Reg < NumRegUnits);
+    return Reg;
+  }
+  unsigned getRegFromSparseIndex(unsigned SparseIndex) const {
+    if (SparseIndex >= NumRegUnits)
+      return TargetRegisterInfo::index2VirtReg(SparseIndex-NumRegUnits);
+    return SparseIndex;
+  }
+
+public:
+  void clear();
+  void init(const MachineRegisterInfo &MRI);
 
   bool contains(unsigned Reg) const {
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      return VirtRegs.count(Reg);
-    return PhysRegs.count(Reg);
+    unsigned SparseIndex = getSparseIndexFromReg(Reg);
+    return Regs.count(SparseIndex);
   }
 
   bool insert(unsigned Reg) {
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      return VirtRegs.insert(Reg).second;
-    return PhysRegs.insert(Reg).second;
+    unsigned SparseIndex = getSparseIndexFromReg(Reg);
+    return Regs.insert(SparseIndex).second;
   }
 
   bool erase(unsigned Reg) {
-    if (TargetRegisterInfo::isVirtualRegister(Reg))
-      return VirtRegs.erase(Reg);
-    return PhysRegs.erase(Reg);
+    unsigned SparseIndex = getSparseIndexFromReg(Reg);
+    return Regs.erase(SparseIndex);
+  }
+
+  size_t size() const {
+    return Regs.size();
+  }
+
+  template<typename ContainerT>
+  void appendTo(ContainerT &To) const {
+    for (unsigned I : Regs) {
+      unsigned Reg = getRegFromSparseIndex(I);
+      To.push_back(Reg);
+    }
   }
 };
 
@@ -300,16 +328,12 @@ public:
   // position changes while pressure does not.
   void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
 
-  /// \brief Get the SlotIndex for the first nondebug instruction including or
-  /// after the current position.
-  SlotIndex getCurrSlot() const;
-
   /// Recede across the previous instruction.
-  bool recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
+  void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
               PressureDiff *PDiff = nullptr);
 
   /// Advance across the current instruction.
-  bool advance();
+  void advance();
 
   /// Finalize the region boundaries and recored live ins and live outs.
   void closeRegion();
@@ -326,17 +350,15 @@ public:
   ArrayRef<unsigned> getLiveThru() const { return LiveThruPressure; }
 
   /// Get the resulting register pressure over the traversed region.
-  /// This result is complete if either advance() or recede() has returned true,
-  /// or if closeRegion() was explicitly invoked.
+  /// This result is complete if closeRegion() was explicitly invoked.
   RegisterPressure &getPressure() { return P; }
   const RegisterPressure &getPressure() const { return P; }
 
   /// Get the register set pressure at the current position, which may be less
   /// than the pressure across the traversed region.
-  std::vector<unsigned> &getRegSetPressureAtPos() { return CurrSetPressure; }
-
-  void discoverLiveOut(unsigned Reg);
-  void discoverLiveIn(unsigned Reg);
+  const std::vector<unsigned> &getRegSetPressureAtPos() const {
+    return CurrSetPressure;
+  }
 
   bool isTopClosed() const;
   bool isBottomClosed() const;
@@ -412,7 +434,12 @@ public:
   void dump() const;
 
 protected:
-  const LiveRange *getLiveRange(unsigned Reg) const;
+  void discoverLiveOut(unsigned Reg);
+  void discoverLiveIn(unsigned Reg);
+
+  /// \brief Get the SlotIndex for the first nondebug instruction including or
+  /// after the current position.
+  SlotIndex getCurrSlot() const;
 
   void increaseRegPressure(ArrayRef<unsigned> Regs);
   void decreaseRegPressure(ArrayRef<unsigned> Regs);
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index df3fd34e0af6..122c78534253 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -74,10 +74,6 @@ public:
   /// Start tracking liveness from the begin of the specific basic block.
   void enterBasicBlock(MachineBasicBlock *mbb);
 
-  /// Allow resetting register state info for multiple
-  /// passes over/within the same function.
-  void initRegState();
-
   /// Move the internal MBB iterator and update register states.
   void forward();
 
@@ -104,10 +100,8 @@ public:
     MBBI = I;
   }
 
-  MachineBasicBlock::iterator getCurrentPosition() const {
-    return MBBI;
-  }
-  
+  MachineBasicBlock::iterator getCurrentPosition() const { return MBBI; }
+
   /// Return if a specific register is currently used.
   bool isRegUsed(unsigned Reg, bool includeReserved = true) const;
 
@@ -152,7 +146,7 @@ public:
   }
 
   /// Tell the scavenger a register is used.
-  void setRegUsed(unsigned Reg);
+  void setRegUsed(unsigned Reg, LaneBitmask LaneMask = ~0u);
 private:
   /// Returns true if a register is reserved. It is never "unused".
   bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
@@ -169,10 +163,10 @@ private:
   /// Processes the current instruction and fill the KillRegUnits and
   /// DefRegUnits bit vectors.
   void determineKillsAndDefs();
-  
+
   /// Add all Reg Units that Reg contains to BV.
   void addRegUnits(BitVector &BV, unsigned Reg);
-  
+
   /// Return the candidate register that is unused for the longest after
   /// StartMI. UseMI is set to the instruction where the search stopped.
   ///
@@ -182,6 +176,9 @@ private:
                            unsigned InstrLimit,
                            MachineBasicBlock::iterator &UseMI);
 
+  /// Allow resetting register state info for multiple
+  /// passes over/within the same function.
+  void initRegState();
 };
 
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 2be5de640e29..7db03459f9bf 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -231,13 +231,9 @@ namespace RTLIB {
     FPROUND_F80_F64,
     FPROUND_F128_F64,
     FPROUND_PPCF128_F64,
-    FPTOSINT_F32_I8,
-    FPTOSINT_F32_I16,
     FPTOSINT_F32_I32,
     FPTOSINT_F32_I64,
     FPTOSINT_F32_I128,
-    FPTOSINT_F64_I8,
-    FPTOSINT_F64_I16,
     FPTOSINT_F64_I32,
     FPTOSINT_F64_I64,
     FPTOSINT_F64_I128,
@@ -250,13 +246,9 @@ namespace RTLIB {
     FPTOSINT_PPCF128_I32,
     FPTOSINT_PPCF128_I64,
     FPTOSINT_PPCF128_I128,
-    FPTOUINT_F32_I8,
-    FPTOUINT_F32_I16,
     FPTOUINT_F32_I32,
     FPTOUINT_F32_I64,
     FPTOUINT_F32_I128,
-    FPTOUINT_F64_I8,
-    FPTOUINT_F64_I16,
     FPTOUINT_F64_I32,
     FPTOUINT_F64_I64,
     FPTOUINT_F64_I128,
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 839131416560..bda9dbd51fff 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -20,11 +20,11 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
-  class AliasAnalysis;
   class SUnit;
   class MachineConstantPool;
   class MachineFunction;
@@ -122,18 +122,7 @@ namespace llvm {
     }
 
     /// Return true if the specified SDep is equivalent except for latency.
-    bool overlaps(const SDep &Other) const {
-      if (Dep != Other.Dep) return false;
-      switch (Dep.getInt()) {
-      case Data:
-      case Anti:
-      case Output:
-        return Contents.Reg == Other.Contents.Reg;
-      case Order:
-        return Contents.OrdKind == Other.Contents.OrdKind;
-      }
-      llvm_unreachable("Invalid dependency kind!");
-    }
+    bool overlaps(const SDep &Other) const;
 
     bool operator==(const SDep &Other) const {
       return overlaps(Other) && Latency == Other.Latency;
@@ -157,19 +146,13 @@ namespace llvm {
     }
 
     //// getSUnit - Return the SUnit to which this edge points.
-    SUnit *getSUnit() const {
-      return Dep.getPointer();
-    }
+    SUnit *getSUnit() const;
 
     //// setSUnit - Assign the SUnit to which this edge points.
-    void setSUnit(SUnit *SU) {
-      Dep.setPointer(SU);
-    }
+    void setSUnit(SUnit *SU);
 
     /// getKind - Return an enum value representing the kind of the dependence.
-    Kind getKind() const {
-      return Dep.getInt();
-    }
+    Kind getKind() const;
 
     /// isCtrl - Shorthand for getKind() != SDep::Data.
     bool isCtrl() const {
@@ -374,7 +357,7 @@ namespace llvm {
     /// correspond to schedulable entities (e.g. instructions) and do not have a
     /// valid ID. Consequently, always check for boundary nodes before accessing
     /// an assoicative data structure keyed on node ID.
-    bool isBoundaryNode() const { return NodeNum == BoundaryID; };
+    bool isBoundaryNode() const { return NodeNum == BoundaryID; }
 
     /// setNode - Assign the representative SDNode for this SUnit.
     /// This may be used during pre-regalloc scheduling.
@@ -490,6 +473,30 @@ namespace llvm {
     void ComputeHeight();
   };
 
+  /// Return true if the specified SDep is equivalent except for latency.
+  inline bool SDep::overlaps(const SDep &Other) const {
+    if (Dep != Other.Dep)
+      return false;
+    switch (Dep.getInt()) {
+    case Data:
+    case Anti:
+    case Output:
+      return Contents.Reg == Other.Contents.Reg;
+    case Order:
+      return Contents.OrdKind == Other.Contents.OrdKind;
+    }
+    llvm_unreachable("Invalid dependency kind!");
+  }
+
+  //// getSUnit - Return the SUnit to which this edge points.
+  inline SUnit *SDep::getSUnit() const { return Dep.getPointer(); }
+
+  //// setSUnit - Assign the SUnit to which this edge points.
+  inline void SDep::setSUnit(SUnit *SU) { Dep.setPointer(SU); }
+
+  /// getKind - Return an enum value representing the kind of the dependence.
+  inline SDep::Kind SDep::getKind() const { return Dep.getInt(); }
+
   //===--------------------------------------------------------------------===//
   /// SchedulingPriorityQueue - This interface is used to plug different
   /// priorities computation algorithms into the list scheduler. It implements
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index b56d5ec8ce63..c574df094911 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -26,22 +26,32 @@ namespace llvm {
   class MachineFrameInfo;
   class MachineLoopInfo;
   class MachineDominatorTree;
-  class LiveIntervals;
   class RegPressureTracker;
   class PressureDiffs;
 
   /// An individual mapping from virtual register number to SUnit.
   struct VReg2SUnit {
     unsigned VirtReg;
+    LaneBitmask LaneMask;
     SUnit *SU;
 
-    VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
+    VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
+      : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
 
     unsigned getSparseSetIndex() const {
       return TargetRegisterInfo::virtReg2Index(VirtReg);
     }
   };
 
+  /// Mapping from virtual register to SUnit including an operand index.
+  struct VReg2SUnitOperIdx : public VReg2SUnit {
+    unsigned OperandIndex;
+
+    VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
+                      unsigned OperandIndex, SUnit *SU)
+      : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
+  };
+
   /// Record a physical register access.
   /// For non-data-dependent uses, OpIdx == -1.
   struct PhysRegSUOper {
@@ -69,7 +79,10 @@ namespace llvm {
   /// Track local uses of virtual registers. These uses are gathered by the DAG
   /// builder and may be consulted by the scheduler to avoid iterating an entire
   /// vreg use list.
-  typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2UseMap;
+  typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
+
+  typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
+    VReg2SUnitOperIdxMultiMap;
 
   /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
   /// MachineInstrs.
@@ -78,15 +91,9 @@ namespace llvm {
     const MachineLoopInfo *MLI;
     const MachineFrameInfo *MFI;
 
-    /// Live Intervals provides reaching defs in preRA scheduling.
-    LiveIntervals *LIS;
-
     /// TargetSchedModel provides an interface to the machine model.
     TargetSchedModel SchedModel;
 
-    /// isPostRA flag indicates vregs cannot be present.
-    bool IsPostRA;
-
     /// True if the DAG builder should remove kill flags (in preparation for
     /// rescheduling).
     bool RemoveKillFlags;
@@ -98,6 +105,9 @@ namespace llvm {
     /// it has taken responsibility for scheduling the terminator correctly.
     bool CanHandleTerminators;
 
+    /// Whether lane masks should get tracked.
+    bool TrackLaneMasks;
+
     /// State specific to the current scheduling region.
     /// ------------------------------------------------
 
@@ -120,7 +130,7 @@ namespace llvm {
     /// After calling BuildSchedGraph, each vreg used in the scheduling region
     /// is mapped to a set of SUnits. These include all local vreg uses, not
     /// just the uses for a singly defined vreg.
-    VReg2UseMap VRegUses;
+    VReg2SUnitMultiMap VRegUses;
 
     /// State internal to DAG building.
     /// -------------------------------
@@ -132,8 +142,12 @@ namespace llvm {
     Reg2SUnitsMap Defs;
     Reg2SUnitsMap Uses;
 
-    /// Track the last instruction in this region defining each virtual register.
-    VReg2SUnitMap VRegDefs;
+    /// Tracks the last instruction(s) in this region defining each virtual
+    /// register. There may be multiple current definitions for a register with
+    /// disjunct lanemasks.
+    VReg2SUnitMultiMap CurrentVRegDefs;
+    /// Tracks the last instructions in this region using each virtual register.
+    VReg2SUnitOperIdxMultiMap CurrentVRegUses;
 
     /// PendingLoads - Remember where unknown loads are after the most recent
     /// unknown store, as we iterate. As with Defs and Uses, this is here
@@ -154,17 +168,10 @@ namespace llvm {
   public:
     explicit ScheduleDAGInstrs(MachineFunction &mf,
                                const MachineLoopInfo *mli,
-                               bool IsPostRAFlag,
-                               bool RemoveKillFlags = false,
-                               LiveIntervals *LIS = nullptr);
+                               bool RemoveKillFlags = false);
 
     ~ScheduleDAGInstrs() override {}
 
-    bool isPostRA() const { return IsPostRA; }
-
-    /// \brief Expose LiveIntervals for use in DAG mutators and such.
-    LiveIntervals *getLIS() const { return LIS; }
-
     /// \brief Get the machine model for instruction scheduling.
     const TargetSchedModel *getSchedModel() const { return &SchedModel; }
 
@@ -206,7 +213,8 @@ namespace llvm {
     /// input.
     void buildSchedGraph(AliasAnalysis *AA,
                          RegPressureTracker *RPTracker = nullptr,
-                         PressureDiffs *PDiffs = nullptr);
+                         PressureDiffs *PDiffs = nullptr,
+                         bool TrackLaneMasks = false);
 
     /// addSchedBarrierDeps - Add dependencies from instructions in the current
     /// list of instructions being scheduled to scheduling barrier. We want to
@@ -253,6 +261,12 @@ namespace llvm {
     /// Other adjustments may be made to the instruction if necessary. Return
     /// true if the operand has been deleted, false if not.
     bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+
+    /// Returns a mask for which lanes get read/written by the given (register)
+    /// machine operand.
+    LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
+
+    void collectVRegUses(SUnit *SU);
   };
 
   /// newSUnit - Creates a new SUnit and return a ptr to it.
diff --git a/include/llvm/CodeGen/SchedulerRegistry.h b/include/llvm/CodeGen/SchedulerRegistry.h
index 51ac7f28527f..a7a6227664de 100644
--- a/include/llvm/CodeGen/SchedulerRegistry.h
+++ b/include/llvm/CodeGen/SchedulerRegistry.h
@@ -52,12 +52,6 @@ public:
   static RegisterScheduler *getList() {
     return (RegisterScheduler *)Registry.getList();
   }
-  static FunctionPassCtor getDefault() {
-    return (FunctionPassCtor)Registry.getDefault();
-  }
-  static void setDefault(FunctionPassCtor C) {
-    Registry.setDefault((MachinePassCtor)C);
-  }
   static void setListener(MachinePassRegistryListener *L) {
     Registry.setListener(L);
   }
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 1ee92380e690..a21e9ae881a7 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/ilist.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/DAGCombine.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -31,7 +32,6 @@
 
 namespace llvm {
 
-class AliasAnalysis;
 class MachineConstantPoolValue;
 class MachineFunction;
 class MDNode;
@@ -215,6 +215,8 @@ class SelectionDAG {
   /// Tracks dbg_value information through SDISel.
   SDDbgInfo *DbgInfo;
 
+  uint16_t NextPersistentId = 0;
+
 public:
   /// Clients of various APIs that cause global effects on
   /// the DAG can optionally implement this interface.  This allows the clients
@@ -324,11 +326,10 @@ public:
   }
 
   iterator_range<allnodes_iterator> allnodes() {
-    return iterator_range<allnodes_iterator>(allnodes_begin(), allnodes_end());
+    return make_range(allnodes_begin(), allnodes_end());
   }
   iterator_range<allnodes_const_iterator> allnodes() const {
-    return iterator_range<allnodes_const_iterator>(allnodes_begin(),
-                                                   allnodes_end());
+    return make_range(allnodes_begin(), allnodes_end());
   }
 
   /// Return the root tag of the SelectionDAG.
@@ -532,7 +533,7 @@ public:
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
     return getNode(ISD::CopyToReg, dl, VTs,
-                   ArrayRef<SDValue>(Ops, Glue.getNode() ? 4 : 3));
+                   makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
   }
 
   // Similar to last getCopyToReg() except parameter Reg is a SDValue
@@ -541,7 +542,7 @@ public:
     SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, Reg, N, Glue };
     return getNode(ISD::CopyToReg, dl, VTs,
-                   ArrayRef<SDValue>(Ops, Glue.getNode() ? 4 : 3));
+                   makeArrayRef(Ops, Glue.getNode() ? 4 : 3));
   }
 
   SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT) {
@@ -558,7 +559,7 @@ public:
     SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
     return getNode(ISD::CopyFromReg, dl, VTs,
-                   ArrayRef<SDValue>(Ops, Glue.getNode() ? 3 : 2));
+                   makeArrayRef(Ops, Glue.getNode() ? 3 : 2));
   }
 
   SDValue getCondCode(ISD::CondCode Cond);
@@ -670,7 +671,7 @@ public:
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
                   ArrayRef<SDUse> Ops);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT,
-                  ArrayRef<SDValue> Ops);
+                  ArrayRef<SDValue> Ops, const SDNodeFlags *Flags = nullptr);
   SDValue getNode(unsigned Opcode, SDLoc DL, ArrayRef<EVT> ResultTys,
                   ArrayRef<SDValue> Ops);
   SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
@@ -687,7 +688,7 @@ public:
                   SDValue N3, SDValue N4);
   SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2,
                   SDValue N3, SDValue N4, SDValue N5);
-  
+
   // Specialize again based on number of operands for nodes with a VTList
   // rather than a single VT.
   SDValue getNode(unsigned Opcode, SDLoc DL, SDVTList VTs);
@@ -901,6 +902,12 @@ public:
   /// the target's desired shift amount type.
   SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
 
+  /// Expand the specified \c ISD::VAARG node as the Legalize pass would.
+  SDValue expandVAArg(SDNode *Node);
+
+  /// Expand the specified \c ISD::VACOPY node as the Legalize pass would.
+  SDValue expandVACopy(SDNode *Node);
+
   /// *Mutate* the specified node in-place to have the
   /// specified operands.  If the resultant node already exists in the DAG,
   /// this does not modify the specified node, instead it returns the node that
@@ -1072,6 +1079,10 @@ public:
     // target info.
     switch (Opcode) {
     case ISD::ADD:
+    case ISD::SMIN:
+    case ISD::SMAX:
+    case ISD::UMIN:
+    case ISD::UMAX:
     case ISD::MUL:
     case ISD::MULHU:
     case ISD::MULHS:
@@ -1088,6 +1099,8 @@ public:
     case ISD::ADDE:
     case ISD::FMINNUM:
     case ISD::FMAXNUM:
+    case ISD::FMINNAN:
+    case ISD::FMAXNAN:
       return true;
     default: return false;
     }
@@ -1150,6 +1163,10 @@ public:
                                  const ConstantSDNode *Cst1,
                                  const ConstantSDNode *Cst2);
 
+  SDValue FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+                                       EVT VT, ArrayRef<SDValue> Ops,
+                                       const SDNodeFlags *Flags = nullptr);
+
   /// Constant fold a setcc to true or false.
   SDValue FoldSetCC(EVT VT, SDValue N1,
                     SDValue N2, ISD::CondCode Cond, SDLoc dl);
@@ -1199,6 +1216,10 @@ public:
   /// other positive zero.
   bool isEqualTo(SDValue A, SDValue B) const;
 
+  /// Return true if A and B have no common bits set. As an example, this can
+  /// allow an 'add' to be transformed into an 'or'.
+  bool haveNoCommonBitsSet(SDValue A, SDValue B) const;
+
   /// Utility function used by legalize and lowering to
   /// "unroll" a vector operation by splitting out the scalars and operating
   /// on each element individually.  If the ResNE is 0, fully unroll the vector
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 4821d1aae9e5..23816bde07c0 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -44,6 +44,7 @@ class GlobalValue;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
 class SDNode;
+class BinaryWithFlagsSDNode;
 class Value;
 class MCSymbol;
 template <typename T> struct DenseMapInfo;
@@ -81,11 +82,6 @@ namespace ISD {
   /// all ConstantFPSDNode or undef.
   bool isBuildVectorOfConstantFPSDNodes(const SDNode *N);
 
-  /// Return true if the specified node is a
-  /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
-  /// element is not an undef.
-  bool isScalarToVector(const SDNode *N);
-
   /// Return true if the node has at least one operand
   /// and all operands of the specified node are ISD::UNDEF.
   bool allOperandsUndef(const SDNode *N);
@@ -139,7 +135,7 @@ public:
     return SDValue(Node, R);
   }
 
-  // Return true if this node is an operand of N.
+  /// Return true if this node is an operand of N.
   bool isOperandOf(const SDNode *N) const;
 
   /// Return the ValueType of the referenced return value.
@@ -167,6 +163,7 @@ public:
   inline bool isTargetMemoryOpcode() const;
   inline bool isTargetOpcode() const;
   inline bool isMachineOpcode() const;
+  inline bool isUndef() const;
   inline unsigned getMachineOpcode() const;
   inline const DebugLoc &getDebugLoc() const;
   inline void dump() const;
@@ -318,6 +315,61 @@ template<> struct simplify_type<SDUse> {
   }
 };
 
+/// These are IR-level optimization flags that may be propagated to SDNodes.
+/// TODO: This data structure should be shared by the IR optimizer and the
+/// the backend.
+struct SDNodeFlags {
+private:
+  bool NoUnsignedWrap : 1;
+  bool NoSignedWrap : 1;
+  bool Exact : 1;
+  bool UnsafeAlgebra : 1;
+  bool NoNaNs : 1;
+  bool NoInfs : 1;
+  bool NoSignedZeros : 1;
+  bool AllowReciprocal : 1;
+
+public:
+  /// Default constructor turns off all optimization flags.
+  SDNodeFlags() {
+    NoUnsignedWrap = false;
+    NoSignedWrap = false;
+    Exact = false;
+    UnsafeAlgebra = false;
+    NoNaNs = false;
+    NoInfs = false;
+    NoSignedZeros = false;
+    AllowReciprocal = false;
+  }
+
+  // These are mutators for each flag.
+  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
+  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
+  void setExact(bool b) { Exact = b; }
+  void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
+  void setNoNaNs(bool b) { NoNaNs = b; }
+  void setNoInfs(bool b) { NoInfs = b; }
+  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
+  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
+
+  // These are accessors for each flag.
+  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
+  bool hasNoSignedWrap() const { return NoSignedWrap; }
+  bool hasExact() const { return Exact; }
+  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
+  bool hasNoNaNs() const { return NoNaNs; }
+  bool hasNoInfs() const { return NoInfs; }
+  bool hasNoSignedZeros() const { return NoSignedZeros; }
+  bool hasAllowReciprocal() const { return AllowReciprocal; }
+
+  /// Return a raw encoding of the flags.
+  /// This function should only be used to add data to the NodeID value.
+  unsigned getRawFlags() const {
+    return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
+    (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
+    (NoSignedZeros << 6) | (AllowReciprocal << 7);
+  }
+};
 
 /// Represents one node in the SelectionDAG.
 ///
@@ -374,6 +426,10 @@ private:
   friend struct ilist_traits<SDNode>;
 
 public:
+  /// Unique and persistent id per SDNode in the DAG.
+  /// Used for debug printing.
+  uint16_t PersistentId;
+
   //===--------------------------------------------------------------------===//
   //  Accessors
   //
@@ -395,6 +451,9 @@ public:
     return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
   }
 
+  /// Return true if the type of the node type undefined.
+  bool isUndef() const { return NodeType == ISD::UNDEF; }
+
   /// Test if this node is a memory intrinsic (with valid pointer information).
   /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for
   /// non-memory intrinsics (with chains) that are not really instances of
@@ -517,10 +576,10 @@ public:
   static use_iterator use_end() { return use_iterator(nullptr); }
 
   inline iterator_range<use_iterator> uses() {
-    return iterator_range<use_iterator>(use_begin(), use_end());
+    return make_range(use_begin(), use_end());
   }
   inline iterator_range<use_iterator> uses() const {
-    return iterator_range<use_iterator>(use_begin(), use_end());
+    return make_range(use_begin(), use_end());
   }
 
   /// Return true if there are exactly NUSES uses of the indicated value.
@@ -592,8 +651,8 @@ public:
   };
 
   iterator_range<value_op_iterator> op_values() const {
-    return iterator_range<value_op_iterator>(value_op_iterator(op_begin()),
-                                             value_op_iterator(op_end()));
+    return make_range(value_op_iterator(op_begin()),
+                      value_op_iterator(op_end()));
   }
 
   SDVTList getVTList() const {
@@ -605,27 +664,11 @@ public:
   /// to which the glue operand points. Otherwise return NULL.
   SDNode *getGluedNode() const {
     if (getNumOperands() != 0 &&
-      getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
+        getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
       return getOperand(getNumOperands()-1).getNode();
     return nullptr;
   }
 
-  // If this is a pseudo op, like copyfromreg, look to see if there is a
-  // real target node glued to it.  If so, return the target node.
-  const SDNode *getGluedMachineNode() const {
-    const SDNode *FoundNode = this;
-
-    // Climb up glue edges until a machine-opcode node is found, or the
-    // end of the chain is reached.
-    while (!FoundNode->isMachineOpcode()) {
-      const SDNode *N = FoundNode->getGluedNode();
-      if (!N) break;
-      FoundNode = N;
-    }
-
-    return FoundNode;
-  }
-
   /// If this node has a glue value with a user, return
   /// the user (there is at most one). Otherwise return NULL.
   SDNode *getGluedUser() const {
@@ -635,6 +678,10 @@ public:
     return nullptr;
   }
 
+  /// This could be defined as a virtual function and implemented more simply
+  /// and directly, but it is not to avoid creating a vtable for this class.
+  const SDNodeFlags *getFlags() const;
+
   /// Return the number of values defined/returned by this operator.
   unsigned getNumValues() const { return NumValues; }
 
@@ -909,6 +956,9 @@ inline bool SDValue::isMachineOpcode() const {
 inline unsigned SDValue::getMachineOpcode() const {
   return Node->getMachineOpcode();
 }
+inline bool SDValue::isUndef() const {
+  return Node->isUndef();
+}
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
 }
@@ -943,62 +993,6 @@ inline void SDUse::setNode(SDNode *N) {
   if (N) N->addUse(*this);
 }
 
-/// These are IR-level optimization flags that may be propagated to SDNodes.
-/// TODO: This data structure should be shared by the IR optimizer and the
-/// the backend.
-struct SDNodeFlags {
-private:
-  bool NoUnsignedWrap : 1;
-  bool NoSignedWrap : 1;
-  bool Exact : 1;
-  bool UnsafeAlgebra : 1;
-  bool NoNaNs : 1;
-  bool NoInfs : 1;
-  bool NoSignedZeros : 1;
-  bool AllowReciprocal : 1;
-
-public:
-  /// Default constructor turns off all optimization flags.
-  SDNodeFlags() {
-    NoUnsignedWrap = false;
-    NoSignedWrap = false;
-    Exact = false;
-    UnsafeAlgebra = false;
-    NoNaNs = false;
-    NoInfs = false;
-    NoSignedZeros = false;
-    AllowReciprocal = false;
-  }
-
-  // These are mutators for each flag.
-  void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; }
-  void setNoSignedWrap(bool b) { NoSignedWrap = b; }
-  void setExact(bool b) { Exact = b; }
-  void setUnsafeAlgebra(bool b) { UnsafeAlgebra = b; }
-  void setNoNaNs(bool b) { NoNaNs = b; }
-  void setNoInfs(bool b) { NoInfs = b; }
-  void setNoSignedZeros(bool b) { NoSignedZeros = b; }
-  void setAllowReciprocal(bool b) { AllowReciprocal = b; }
-
-  // These are accessors for each flag.
-  bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
-  bool hasNoSignedWrap() const { return NoSignedWrap; }
-  bool hasExact() const { return Exact; }
-  bool hasUnsafeAlgebra() const { return UnsafeAlgebra; }
-  bool hasNoNaNs() const { return NoNaNs; }
-  bool hasNoInfs() const { return NoInfs; }
-  bool hasNoSignedZeros() const { return NoSignedZeros; }
-  bool hasAllowReciprocal() const { return AllowReciprocal; }
-
-  /// Return a raw encoding of the flags.
-  /// This function should only be used to add data to the NodeID value.
-  unsigned getRawFlags() const {
-    return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
-           (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
-           (NoSignedZeros << 6) | (AllowReciprocal << 7);
-  }
-};
-
 /// This class is used for single-operand SDNodes.  This is solely
 /// to allow co-allocation of node operands with the node itself.
 class UnarySDNode : public SDNode {
@@ -1080,6 +1074,9 @@ class HandleSDNode : public SDNode {
 public:
   explicit HandleSDNode(SDValue X)
     : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
+    // HandleSDNodes are never inserted into the DAG, so they won't be
+    // auto-numbered. Use ID 65535 as a sentinel.
+    PersistentId = 0xffff;
     InitOperands(&Op, X);
   }
   ~HandleSDNode();
@@ -1497,6 +1494,15 @@ public:
   }
 };
 
+/// Returns true if \p V is a constant integer zero.
+bool isNullConstant(SDValue V);
+/// Returns true if \p V is an FP constant with a value of positive zero.
+bool isNullFPConstant(SDValue V);
+/// Returns true if \p V is an integer constant with all bits set.
+bool isAllOnesConstant(SDValue V);
+/// Returns true if \p V is a constant integer one.
+bool isOneConstant(SDValue V);
+
 class GlobalAddressSDNode : public SDNode {
   const GlobalValue *TheGlobal;
   int64_t Offset;
@@ -1697,6 +1703,14 @@ public:
   ConstantFPSDNode *
   getConstantFPSplatNode(BitVector *UndefElements = nullptr) const;
 
+  /// \brief If this is a constant FP splat and the splatted constant FP is an
+  /// exact power or 2, return the log base 2 integer value.  Otherwise,
+  /// return -1.
+  ///
+  /// The BitWidth specifies the necessary bit precision.
+  int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+                                          uint32_t BitWidth) const;
+
   bool isConstant() const;
 
   static inline bool classof(const SDNode *N) {
@@ -2003,9 +2017,9 @@ class MaskedLoadStoreSDNode : public MemSDNode {
 public:
   friend class SelectionDAG;
   MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, DebugLoc dl,
-                   SDValue *Operands, unsigned numOperands, 
-                   SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
-    : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+                        SDValue *Operands, unsigned numOperands, SDVTList VTs,
+                        EVT MemVT, MachineMemOperand *MMO)
+      : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
     InitOperands(Ops, Operands, numOperands);
   }
 
@@ -2036,7 +2050,7 @@ public:
 
   ISD::LoadExtType getExtensionType() const {
     return ISD::LoadExtType(SubclassData & 3);
-  } 
+  }
   const SDValue &getSrc0() const { return getOperand(3); }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MLOAD;
@@ -2103,17 +2117,18 @@ public:
 class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
 public:
   friend class SelectionDAG;
-  MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef<SDValue> Operands, 
+  MaskedGatherSDNode(unsigned Order, DebugLoc dl, ArrayRef<SDValue> Operands,
                      SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
     : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, Operands, VTs, MemVT,
                                 MMO) {
     assert(getValue().getValueType() == getValueType(0) &&
-           "Incompatible type of the PathThru value in MaskedGatherSDNode");
-    assert(getMask().getValueType().getVectorNumElements() == 
-           getValueType(0).getVectorNumElements() && 
-           "Vector width mismatch between mask and data");
-    assert(getMask().getValueType().getScalarType() == MVT::i1 && 
+           "Incompatible type of the PassThru value in MaskedGatherSDNode");
+    assert(getMask().getValueType().getVectorNumElements() ==
+           getValueType(0).getVectorNumElements() &&
            "Vector width mismatch between mask and data");
+    assert(getIndex().getValueType().getVectorNumElements() ==
+           getValueType(0).getVectorNumElements() &&
+           "Vector width mismatch between index and data");
   }
 
   static bool classof(const SDNode *N) {
@@ -2131,11 +2146,12 @@ public:
                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
     : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, Operands, VTs, MemVT,
                                 MMO) {
-    assert(getMask().getValueType().getVectorNumElements() == 
-           getValue().getValueType().getVectorNumElements() && 
-           "Vector width mismatch between mask and data");
-    assert(getMask().getValueType().getScalarType() == MVT::i1 && 
+    assert(getMask().getValueType().getVectorNumElements() ==
+           getValue().getValueType().getVectorNumElements() &&
            "Vector width mismatch between mask and data");
+    assert(getIndex().getValueType().getVectorNumElements() ==
+           getValue().getValueType().getVectorNumElements() &&
+           "Vector width mismatch between index and data");
   }
 
   static bool classof(const SDNode *N) {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 9d6d6f5b1be0..7b621bee259f 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -155,7 +155,7 @@ namespace llvm {
              "Attempt to construct index with 0 pointer.");
     }
 
-    /// Returns true if this is a valid index. Invalid indicies do
+    /// Returns true if this is a valid index. Invalid indices do
     /// not point into an index table, and cannot be compared.
     bool isValid() const {
       return lie.getPointer();
@@ -272,7 +272,7 @@ namespace llvm {
     SlotIndex getNextSlot() const {
       Slot s = getSlot();
       if (s == Slot_Dead) {
-        return SlotIndex(listEntry()->getNextNode(), Slot_Block);
+        return SlotIndex(&*++listEntry()->getIterator(), Slot_Block);
       }
       return SlotIndex(listEntry(), s + 1);
     }
@@ -280,7 +280,7 @@ namespace llvm {
     /// Returns the next index. This is the index corresponding to the this
     /// index's slot, but for the next instruction.
     SlotIndex getNextIndex() const {
-      return SlotIndex(listEntry()->getNextNode(), getSlot());
+      return SlotIndex(&*++listEntry()->getIterator(), getSlot());
     }
 
     /// Returns the previous slot in the index list. This could be either the
@@ -292,7 +292,7 @@ namespace llvm {
     SlotIndex getPrevSlot() const {
       Slot s = getSlot();
       if (s == Slot_Block) {
-        return SlotIndex(listEntry()->getPrevNode(), Slot_Dead);
+        return SlotIndex(&*--listEntry()->getIterator(), Slot_Dead);
       }
       return SlotIndex(listEntry(), s - 1);
     }
@@ -300,7 +300,7 @@ namespace llvm {
     /// Returns the previous index. This is the index corresponding to this
     /// index's slot, but for the previous instruction.
     SlotIndex getPrevIndex() const {
-      return SlotIndex(listEntry()->getPrevNode(), getSlot());
+      return SlotIndex(&*--listEntry()->getIterator(), getSlot());
     }
 
   };
@@ -333,6 +333,8 @@ namespace llvm {
   /// This pass assigns indexes to each instruction.
   class SlotIndexes : public MachineFunctionPass {
   private:
+    // IndexListEntry allocator.
+    BumpPtrAllocator ileAllocator;
 
     typedef ilist<IndexListEntry> IndexList;
     IndexList indexList;
@@ -353,9 +355,6 @@ namespace llvm {
     /// and MBB id.
     SmallVector<IdxMBBPair, 8> idx2MBBMap;
 
-    // IndexListEntry allocator.
-    BumpPtrAllocator ileAllocator;
-
     IndexListEntry* createEntry(MachineInstr *mi, unsigned index) {
       IndexListEntry *entry =
         static_cast<IndexListEntry*>(
@@ -377,6 +376,11 @@ namespace llvm {
       initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     }
 
+    ~SlotIndexes() {
+      // The indexList's nodes are all allocated in the BumpPtrAllocator.
+      indexList.clearAndLeakNodesUnsafely();
+    }
+
     void getAnalysisUsage(AnalysisUsage &au) const override;
     void releaseMemory() override;
 
@@ -427,11 +431,11 @@ namespace llvm {
     /// Returns the next non-null index, if one exists.
     /// Otherwise returns getLastIndex().
     SlotIndex getNextNonNullIndex(SlotIndex Index) {
-      IndexList::iterator I = Index.listEntry();
+      IndexList::iterator I = Index.listEntry()->getIterator();
       IndexList::iterator E = indexList.end();
       while (++I != E)
         if (I->getInstr())
-          return SlotIndex(I, Index.getSlot());
+          return SlotIndex(&*I, Index.getSlot());
       // We reached the end of the function.
       return getLastIndex();
     }
@@ -502,49 +506,52 @@ namespace llvm {
       return getMBBRange(mbb).second;
     }
 
+    /// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block
+    /// begin and basic block)
+    typedef SmallVectorImpl<IdxMBBPair>::const_iterator MBBIndexIterator;
+    /// Move iterator to the next IdxMBBPair where the SlotIndex is greater or
+    /// equal to \p To.
+    MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const {
+      return std::lower_bound(I, idx2MBBMap.end(), To);
+    }
+    /// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex
+    /// that is greater or equal to \p Idx.
+    MBBIndexIterator findMBBIndex(SlotIndex Idx) const {
+      return advanceMBBIndex(idx2MBBMap.begin(), Idx);
+    }
+    /// Returns an iterator for the begin of the idx2MBBMap.
+    MBBIndexIterator MBBIndexBegin() const {
+      return idx2MBBMap.begin();
+    }
+    /// Return an iterator for the end of the idx2MBBMap.
+    MBBIndexIterator MBBIndexEnd() const {
+      return idx2MBBMap.end();
+    }
+
     /// Returns the basic block which the given index falls in.
     MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
       if (MachineInstr *MI = getInstructionFromIndex(index))
         return MI->getParent();
-      SmallVectorImpl<IdxMBBPair>::const_iterator I =
-        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index);
-      // Take the pair containing the index
-      SmallVectorImpl<IdxMBBPair>::const_iterator J =
-        ((I != idx2MBBMap.end() && I->first > index) ||
-         (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I;
 
-      assert(J != idx2MBBMap.end() && J->first <= index &&
+      MBBIndexIterator I = findMBBIndex(index);
+      // Take the pair containing the index
+      MBBIndexIterator J =
+        ((I != MBBIndexEnd() && I->first > index) ||
+         (I == MBBIndexEnd() && !idx2MBBMap.empty())) ? std::prev(I) : I;
+
+      assert(J != MBBIndexEnd() && J->first <= index &&
              index < getMBBEndIdx(J->second) &&
              "index does not correspond to an MBB");
       return J->second;
     }
 
-    bool findLiveInMBBs(SlotIndex start, SlotIndex end,
-                        SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
-      SmallVectorImpl<IdxMBBPair>::const_iterator itr =
-        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
-      bool resVal = false;
-
-      while (itr != idx2MBBMap.end()) {
-        if (itr->first >= end)
-          break;
-        mbbs.push_back(itr->second);
-        resVal = true;
-        ++itr;
-      }
-      return resVal;
-    }
-
     /// Returns the MBB covering the given range, or null if the range covers
     /// more than one basic block.
     MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
 
       assert(start < end && "Backwards ranges not allowed.");
-
-      SmallVectorImpl<IdxMBBPair>::const_iterator itr =
-        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
-
-      if (itr == idx2MBBMap.end()) {
+      MBBIndexIterator itr = findMBBIndex(start);
+      if (itr == MBBIndexEnd()) {
         itr = std::prev(itr);
         return itr->second;
       }
@@ -580,11 +587,11 @@ namespace llvm {
       IndexList::iterator prevItr, nextItr;
       if (Late) {
         // Insert mi's index immediately before the following instruction.
-        nextItr = getIndexAfter(mi).listEntry();
+        nextItr = getIndexAfter(mi).listEntry()->getIterator();
         prevItr = std::prev(nextItr);
       } else {
         // Insert mi's index immediately after the preceding instruction.
-        prevItr = getIndexBefore(mi).listEntry();
+        prevItr = getIndexBefore(mi).listEntry()->getIterator();
         nextItr = std::next(prevItr);
       }
 
@@ -646,11 +653,11 @@ namespace llvm {
       if (nextMBB == mbb->getParent()->end()) {
         startEntry = &indexList.back();
         endEntry = createEntry(nullptr, 0);
-        newItr = indexList.insertAfter(startEntry, endEntry);
+        newItr = indexList.insertAfter(startEntry->getIterator(), endEntry);
       } else {
         startEntry = createEntry(nullptr, 0);
-        endEntry = getMBBStartIdx(nextMBB).listEntry();
-        newItr = indexList.insert(endEntry, startEntry);
+        endEntry = getMBBStartIdx(&*nextMBB).listEntry();
+        newItr = indexList.insert(endEntry->getIterator(), startEntry);
       }
 
       SlotIndex startIdx(startEntry, SlotIndex::Slot_Block);
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index fdc1a9143ed2..972a616ad779 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Debug.h"
 #include <map>
 #include <vector>
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10c099d2c2f5..2f1379131cbd 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -41,12 +41,12 @@ public:
 
   ~TargetLoweringObjectFileELF() override {}
 
-  void emitPersonalityValue(MCStreamer &Streamer, const TargetMachine &TM,
+  void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
                             const MCSymbol *Sym) const override;
 
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
-  MCSection *getSectionForConstant(SectionKind Kind,
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                    const Constant *C) const override;
 
   MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
@@ -103,7 +103,7 @@ public:
                                       Mangler &Mang,
                                       const TargetMachine &TM) const override;
 
-  MCSection *getSectionForConstant(SectionKind Kind,
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                    const Constant *C) const override;
 
   /// The mach-o version of this method defaults to returning a stub reference.
@@ -123,6 +123,9 @@ public:
                                           const MCValue &MV, int64_t Offset,
                                           MachineModuleInfo *MMI,
                                           MCStreamer &Streamer) const override;
+
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+                         Mangler &Mang, const TargetMachine &TM) const override;
 };
 
 
@@ -140,8 +143,7 @@ public:
                                     const TargetMachine &TM) const override;
 
   void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-                         bool CannotUsePrivateLabel, Mangler &Mang,
-                         const TargetMachine &TM) const override;
+                         Mangler &Mang, const TargetMachine &TM) const override;
 
   MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
                                     const TargetMachine &TM) const override;
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 751fac411ce6..81054aba066f 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -81,6 +81,12 @@ public:
     return nullptr;
   }
 
+  /// \brief Return true if this machine model includes an instruction-level
+  /// scheduling model or cycle-to-cycle itinerary data.
+  bool hasInstrSchedModelOrItineraries() const {
+    return hasInstrSchedModel() || hasInstrItineraries();
+  }
+
   /// \brief Identify the processor corresponding to the current subtarget.
   unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
 
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index e1a9fd38290b..929eb88a0393 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -89,6 +89,19 @@ namespace llvm {
       return VecTy;
     }
 
+    /// Return the type converted to an equivalently sized integer or vector
+    /// with integer element type. Similar to changeVectorElementTypeToInteger,
+    /// but also handles scalars.
+    EVT changeTypeToInteger() {
+      if (isVector())
+        return changeVectorElementTypeToInteger();
+
+      if (isSimple())
+        return MVT::getIntegerVT(getSizeInBits());
+
+      return changeExtendedTypeToInteger();
+    }
+
     /// isSimple - Test if the given EVT is simple (as opposed to being
     /// extended).
     bool isSimple() const {
@@ -151,6 +164,11 @@ namespace llvm {
       return isSimple() ? V.is1024BitVector() : isExtended1024BitVector();
     }
 
+    /// is2048BitVector - Return true if this is a 2048-bit vector type.
+    bool is2048BitVector() const {
+      return isSimple() ? V.is2048BitVector() : isExtended2048BitVector();
+    }
+
     /// isOverloaded - Return true if this is an overloaded type for TableGen.
     bool isOverloaded() const {
       return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
@@ -342,6 +360,7 @@ namespace llvm {
     // Methods for handling the Extended-type case in functions above.
     // These are all out-of-line to prevent users of this header file
     // from having a dependency on Type.h.
+    EVT changeExtendedTypeToInteger() const;
     EVT changeExtendedVectorElementTypeToInteger() const;
     static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
     static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
@@ -356,6 +375,7 @@ namespace llvm {
     bool isExtended256BitVector() const LLVM_READONLY;
     bool isExtended512BitVector() const LLVM_READONLY;
     bool isExtended1024BitVector() const LLVM_READONLY;
+    bool isExtended2048BitVector() const LLVM_READONLY;
     EVT getExtendedVectorElementType() const;
     unsigned getExtendedVectorNumElements() const LLVM_READONLY;
     unsigned getExtendedSizeInBits() const;
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 2b30f14f902c..f29ec42714e8 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -33,55 +33,70 @@ def f80    : ValueType<80 , 10>;   // 80-bit floating point value
 def f128   : ValueType<128, 11>;   // 128-bit floating point value
 def ppcf128: ValueType<128, 12>;   // PPC 128-bit floating point value
 
-def v2i1   : ValueType<2 ,  13>;   //  2 x i1  vector value
-def v4i1   : ValueType<4 ,  14>;   //  4 x i1  vector value
-def v8i1   : ValueType<8 ,  15>;   //  8 x i1  vector value
-def v16i1  : ValueType<16,  16>;   // 16 x i1  vector value
-def v32i1  : ValueType<32 , 17>;   // 32 x i1  vector value
-def v64i1  : ValueType<64 , 18>;   // 64 x i1  vector value
-def v1i8   : ValueType<16, 19>;    //  1 x i8  vector value
-def v2i8   : ValueType<16 , 20>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 21>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 22>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 23>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 24>;   // 32 x i8 vector value
-def v64i8  : ValueType<512, 25>;   // 64 x i8 vector value
-def v1i16  : ValueType<16 , 26>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 27>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 28>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 29>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 30>;   // 16 x i16 vector value
-def v32i16 : ValueType<512, 31>;   // 32 x i16 vector value
-def v1i32  : ValueType<32 , 32>;   //  1 x i32 vector value
-def v2i32  : ValueType<64 , 33>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 34>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 35>;   //  8 x i32 vector value
-def v16i32 : ValueType<512, 36>;   // 16 x i32 vector value
-def v1i64  : ValueType<64 , 37>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 38>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 39>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 40>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,41>;   // 16 x i64 vector value
-def v1i128 : ValueType<128, 42>;   //  1 x i128 vector value
+def v2i1   : ValueType<2 ,  13>;   //   2 x i1 vector value
+def v4i1   : ValueType<4 ,  14>;   //   4 x i1 vector value
+def v8i1   : ValueType<8 ,  15>;   //   8 x i1 vector value
+def v16i1  : ValueType<16,  16>;   //  16 x i1 vector value
+def v32i1  : ValueType<32 , 17>;   //  32 x i1 vector value
+def v64i1  : ValueType<64 , 18>;   //  64 x i1 vector value
+def v512i1 : ValueType<512, 19>;   // 512 x i8 vector value
+def v1024i1: ValueType<1024,20>;   //1024 x i8 vector value
 
-def v2f16  : ValueType<32 , 43>;   //  2 x f16 vector value
-def v4f16  : ValueType<64 , 44>;   //  4 x f16 vector value
-def v8f16  : ValueType<128, 45>;   //  8 x f16 vector value
-def v1f32  : ValueType<32 , 46>;   //  1 x f32 vector value
-def v2f32  : ValueType<64 , 47>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 48>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 49>;   //  8 x f32 vector value
-def v16f32 : ValueType<512, 50>;   // 16 x f32 vector value
-def v1f64  : ValueType<64, 51>;    //  1 x f64 vector value
-def v2f64  : ValueType<128, 52>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 53>;   //  4 x f64 vector value
-def v8f64  : ValueType<512, 54>;   //  8 x f64 vector value
+def v1i8   : ValueType<16,  21>;   //  1 x i8  vector value
+def v2i8   : ValueType<16 , 22>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 23>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 24>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 25>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 26>;   // 32 x i8  vector value
+def v64i8  : ValueType<512, 27>;   // 64 x i8  vector value
+def v128i8 : ValueType<1024,28>;   //128 x i8  vector value
+def v256i8 : ValueType<2048,29>;   //256 x i8  vector value
+
+def v1i16  : ValueType<16 , 30>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 31>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 32>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 33>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 34>;   // 16 x i16 vector value
+def v32i16 : ValueType<512, 35>;   // 32 x i16 vector value
+def v64i16 : ValueType<1024,36>;   // 64 x i16 vector value
+def v128i16: ValueType<2048,37>;   //128 x i16 vector value
+
+def v1i32  : ValueType<32 , 38>;   //  1 x i32 vector value
+def v2i32  : ValueType<64 , 39>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 40>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 41>;   //  8 x i32 vector value
+def v16i32 : ValueType<512, 42>;   // 16 x i32 vector value
+def v32i32 : ValueType<1024,43>;   // 32 x i32 vector value
+def v64i32 : ValueType<2048,44>;   // 32 x i32 vector value
+
+def v1i64  : ValueType<64 , 45>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 46>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 47>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 48>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,49>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,50>;   // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 51>;   //  1 x i128 vector value
+
+def v2f16  : ValueType<32 , 52>;   //  2 x f16 vector value
+def v4f16  : ValueType<64 , 53>;   //  4 x f16 vector value
+def v8f16  : ValueType<128, 54>;   //  8 x f16 vector value
+def v1f32  : ValueType<32 , 55>;   //  1 x f32 vector value
+def v2f32  : ValueType<64 , 56>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 57>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 58>;   //  8 x f32 vector value
+def v16f32 : ValueType<512, 59>;   // 16 x f32 vector value
+def v1f64  : ValueType<64,  60>;   //  1 x f64 vector value
+def v2f64  : ValueType<128, 61>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 62>;   //  4 x f64 vector value
+def v8f64  : ValueType<512, 63>;   //  8 x f64 vector value
 
 
-def x86mmx : ValueType<64 , 55>;   // X86 MMX value
-def FlagVT : ValueType<0  , 56>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 57>;   // Produces no value
-def untyped: ValueType<8  , 58>;   // Produces an untyped value
+def x86mmx : ValueType<64 , 64>;   // X86 MMX value
+def FlagVT : ValueType<0  , 65>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 66>;   // Produces no value
+def untyped: ValueType<8  , 67>;   // Produces an untyped value
+def token  : ValueType<0  , 249>;  // TokenTy
 def MetadataVT: ValueType<0, 250>; // Metadata
 
 // Pseudo valuetype mapped to the current pointer size to any address space.
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index 75638a058a30..70d558f5cfbd 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -14,145 +14,103 @@
 #ifndef LLVM_CODEGEN_WINEHFUNCINFO_H
 #define LLVM_CODEGEN_WINEHFUNCINFO_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
+class AllocaInst;
 class BasicBlock;
+class CatchReturnInst;
 class Constant;
 class Function;
 class GlobalVariable;
 class InvokeInst;
 class IntrinsicInst;
 class LandingPadInst;
+class MCExpr;
 class MCSymbol;
+class MachineBasicBlock;
 class Value;
 
-enum ActionType { Catch, Cleanup };
+// The following structs respresent the .xdata tables for various
+// Windows-related EH personalities.
 
-class ActionHandler {
-public:
-  ActionHandler(BasicBlock *BB, ActionType Type)
-      : StartBB(BB), Type(Type), EHState(-1), HandlerBlockOrFunc(nullptr) {}
+typedef PointerUnion<const BasicBlock *, MachineBasicBlock *> MBBOrBasicBlock;
 
-  ActionType getType() const { return Type; }
-  BasicBlock *getStartBlock() const { return StartBB; }
-
-  bool hasBeenProcessed() { return HandlerBlockOrFunc != nullptr; }
-
-  void setHandlerBlockOrFunc(Constant *F) { HandlerBlockOrFunc = F; }
-  Constant *getHandlerBlockOrFunc() { return HandlerBlockOrFunc; }
-
-  void setEHState(int State) { EHState = State; }
-  int getEHState() const { return EHState; }
-
-private:
-  BasicBlock *StartBB;
-  ActionType Type;
-  int EHState;
-
-  // Can be either a BlockAddress or a Function depending on the EH personality.
-  Constant *HandlerBlockOrFunc;
-};
-
-class CatchHandler : public ActionHandler {
-public:
-  CatchHandler(BasicBlock *BB, Constant *Selector, BasicBlock *NextBB)
-      : ActionHandler(BB, ActionType::Catch), Selector(Selector),
-      NextBB(NextBB), ExceptionObjectVar(nullptr),
-      ExceptionObjectIndex(-1) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Catch;
-  }
-
-  Constant *getSelector() const { return Selector; }
-  BasicBlock *getNextBB() const { return NextBB; }
-
-  const Value *getExceptionVar() { return ExceptionObjectVar; }
-  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-  void setExceptionVar(const Value *Val) { ExceptionObjectVar = Val; }
-  void setExceptionVarIndex(int Index) { ExceptionObjectIndex = Index;  }
-  int getExceptionVarIndex() const { return ExceptionObjectIndex; }
-  void setReturnTargets(TinyPtrVector<BasicBlock *> &Targets) {
-    ReturnTargets = Targets;
-  }
-
-private:
-  Constant *Selector;
-  BasicBlock *NextBB;
-  // While catch handlers are being outlined the ExceptionObjectVar field will
-  // be populated with the instruction in the parent frame that corresponds
-  // to the exception object (or nullptr if the catch does not use an
-  // exception object) and the ExceptionObjectIndex field will be -1.
-  // When the parseEHActions function is called to populate a vector of
-  // instances of this class, the ExceptionObjectVar field will be nullptr
-  // and the ExceptionObjectIndex will be the index of the exception object in
-  // the parent function's localescape block.
-  const Value *ExceptionObjectVar;
-  int ExceptionObjectIndex;
-  TinyPtrVector<BasicBlock *> ReturnTargets;
-};
-
-class CleanupHandler : public ActionHandler {
-public:
-  CleanupHandler(BasicBlock *BB) : ActionHandler(BB, ActionType::Cleanup) {}
-
-  // Method for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const ActionHandler *H) {
-    return H->getType() == ActionType::Cleanup;
-  }
-};
-
-void parseEHActions(const IntrinsicInst *II,
-                    SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions);
-
-// The following structs respresent the .xdata for functions using C++
-// exceptions on Windows.
-
-struct WinEHUnwindMapEntry {
+struct CxxUnwindMapEntry {
   int ToState;
-  Function *Cleanup;
+  MBBOrBasicBlock Cleanup;
+};
+
+/// Similar to CxxUnwindMapEntry, but supports SEH filters.
+struct SEHUnwindMapEntry {
+  /// If unwinding continues through this handler, transition to the handler at
+  /// this state. This indexes into SEHUnwindMap.
+  int ToState = -1;
+
+  bool IsFinally = false;
+
+  /// Holds the filter expression function.
+  const Function *Filter = nullptr;
+
+  /// Holds the __except or __finally basic block.
+  MBBOrBasicBlock Handler;
 };
 
 struct WinEHHandlerType {
   int Adjectives;
+  /// The CatchObj starts out life as an LLVM alloca and is eventually turned
+  /// frame index.
+  union {
+    const AllocaInst *Alloca;
+    int FrameIndex;
+  } CatchObj = {};
   GlobalVariable *TypeDescriptor;
-  int CatchObjRecoverIdx;
-  Function *Handler;
+  MBBOrBasicBlock Handler;
 };
 
 struct WinEHTryBlockMapEntry {
-  int TryLow;
-  int TryHigh;
+  int TryLow = -1;
+  int TryHigh = -1;
+  int CatchHigh = -1;
   SmallVector<WinEHHandlerType, 1> HandlerArray;
 };
 
+enum class ClrHandlerType { Catch, Finally, Fault, Filter };
+
+struct ClrEHUnwindMapEntry {
+  MBBOrBasicBlock Handler;
+  uint32_t TypeToken;
+  int Parent;
+  ClrHandlerType HandlerType;
+};
+
 struct WinEHFuncInfo {
-  DenseMap<const Function *, const LandingPadInst *> RootLPad;
-  DenseMap<const Function *, const InvokeInst *> LastInvoke;
-  DenseMap<const Function *, int> HandlerEnclosedState;
-  DenseMap<const Function *, bool> LastInvokeVisited;
-  DenseMap<const LandingPadInst *, int> LandingPadStateMap;
-  DenseMap<const Function *, int> CatchHandlerParentFrameObjIdx;
-  DenseMap<const Function *, int> CatchHandlerParentFrameObjOffset;
-  DenseMap<const Function *, int> CatchHandlerMaxState;
-  DenseMap<const Function *, int> HandlerBaseState;
-  SmallVector<WinEHUnwindMapEntry, 4> UnwindMap;
+  DenseMap<const Instruction *, int> EHPadStateMap;
+  DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap;
+  DenseMap<const InvokeInst *, int> InvokeStateMap;
+  DenseMap<const CatchReturnInst *, const BasicBlock *>
+      CatchRetSuccessorColorMap;
+  DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap;
+  SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap;
   SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
-  SmallVector<std::pair<MCSymbol *, int>, 4> IPToStateList;
+  SmallVector<SEHUnwindMapEntry, 4> SEHUnwindMap;
+  SmallVector<ClrEHUnwindMapEntry, 4> ClrEHUnwindMap;
   int UnwindHelpFrameIdx = INT_MAX;
-  int UnwindHelpFrameOffset = -1;
-  unsigned NumIPToStateFuncsVisited = 0;
+  int PSPSymFrameIdx = INT_MAX;
 
-  /// localescape index of the 32-bit EH registration node. Set by
-  /// WinEHStatePass and used indirectly by SEH filter functions of the parent.
-  int EHRegNodeEscapeIndex = INT_MAX;
+  int getLastStateNumber() const { return CxxUnwindMap.size() - 1; }
 
-  WinEHFuncInfo() {}
+  void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin,
+                         MCSymbol *InvokeEnd);
+
+  int EHRegNodeFrameIndex = INT_MAX;
+  int EHRegNodeEndOffset = INT_MAX;
+  int SEHSetFrameOffset = INT_MAX;
+
+  WinEHFuncInfo();
 };
 
 /// Analyze the IR in ParentFn and it's handlers to build WinEHFuncInfo, which
@@ -161,5 +119,12 @@ struct WinEHFuncInfo {
 void calculateWinCXXEHStateNumbers(const Function *ParentFn,
                                    WinEHFuncInfo &FuncInfo);
 
+void calculateSEHStateNumbers(const Function *ParentFn,
+                              WinEHFuncInfo &FuncInfo);
+
+void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
+
+void calculateCatchReturnSuccessorColors(const Function *Fn,
+                                         WinEHFuncInfo &FuncInfo);
 }
 #endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index b9fd4504ad76..6a5ac889e6f0 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -324,9 +324,6 @@
 /* Define to 1 if you have the <sys/uio.h> header file. */
 #cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H}
 
-/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
-#cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
-
 /* Define if the setupterm() function is supported this platform. */
 #cmakedefine HAVE_TERMINFO ${HAVE_TERMINFO}
 
@@ -423,8 +420,10 @@
 /* Installation directory for data files */
 #cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
 
-/* Target triple LLVM will generate code for by default */
-#cmakedefine LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
+/* Target triple LLVM will generate code for by default
+ * Doesn't use `cmakedefine` because it is allowed to be empty.
+ */
+#define LLVM_DEFAULT_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}"
 
 /* Installation directory for documentation */
 #cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 09706499ea30..498aa9ee6b5d 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -152,6 +152,9 @@
 /* Define to 1 if you have the `shell32' library (-lshell32). */
 #undef HAVE_LIBSHELL32
 
+/* Define to 1 if you have the `uuid' library (-luuid). */
+#undef HAVE_LIBUUID
+
 /* Define to 1 if you have the `z' library (-lz). */
 #undef HAVE_LIBZ
 
@@ -229,9 +232,6 @@
 /* Have pthread_rwlock_init */
 #undef HAVE_PTHREAD_RWLOCK_INIT
 
-/* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
-#undef HAVE_RAND48
-
 /* Define to 1 if you have the `realpath' function. */
 #undef HAVE_REALPATH
 
@@ -250,15 +250,9 @@
 /* Define to 1 if you have the `setrlimit' function. */
 #undef HAVE_SETRLIMIT
 
-/* Define to 1 if you have the `siglongjmp' function. */
-#undef HAVE_SIGLONGJMP
-
 /* Define to 1 if you have the <signal.h> header file. */
 #undef HAVE_SIGNAL_H
 
-/* Define to 1 if you have the `sigsetjmp' function. */
-#undef HAVE_SIGSETJMP
-
 /* Define to 1 if you have the <stdint.h> header file. */
 #undef HAVE_STDINT_H
 
@@ -318,9 +312,6 @@
 /* Define to 1 if you have the <sys/uio.h> header file. */
 #undef HAVE_SYS_UIO_H
 
-/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
-#undef HAVE_SYS_WAIT_H
-
 /* Define if the setupterm() function is supported this platform. */
 #undef HAVE_TERMINFO
 
@@ -333,9 +324,6 @@
 /* Define to 1 if you have the <unistd.h> header file. */
 #undef HAVE_UNISTD_H
 
-/* Define to 1 if you have the <utime.h> header file. */
-#undef HAVE_UTIME_H
-
 /* Define to 1 if the system has the type `u_int64_t'. */
 #undef HAVE_U_INT64_T
 
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
new file mode 100644
index 000000000000..7728120d68de
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -0,0 +1,367 @@
+//===- CodeView.h -----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H
+#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEW_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+enum class CallingConvention : uint8_t {
+  NearC = 0x00,       // near right to left push, caller pops stack
+  FarC = 0x01,        // far right to left push, caller pops stack
+  NearPascal = 0x02,  // near left to right push, callee pops stack
+  FarPascal = 0x03,   // far left to right push, callee pops stack
+  NearFast = 0x04,    // near left to right push with regs, callee pops stack
+  FarFast = 0x05,     // far left to right push with regs, callee pops stack
+  NearStdCall = 0x07, // near standard call
+  FarStdCall = 0x08,  // far standard call
+  NearSysCall = 0x09, // near sys call
+  FarSysCall = 0x0a,  // far sys call
+  ThisCall = 0x0b,    // this call (this passed in register)
+  MipsCall = 0x0c,    // Mips call
+  Generic = 0x0d,     // Generic call sequence
+  AlphaCall = 0x0e,   // Alpha call
+  PpcCall = 0x0f,     // PPC call
+  SHCall = 0x10,      // Hitachi SuperH call
+  ArmCall = 0x11,     // ARM call
+  AM33Call = 0x12,    // AM33 call
+  TriCall = 0x13,     // TriCore Call
+  SH5Call = 0x14,     // Hitachi SuperH-5 call
+  M32RCall = 0x15,    // M32R Call
+  ClrCall = 0x16,     // clr call
+  Inline =
+      0x17, // Marker for routines always inlined and thus lacking a convention
+  NearVector = 0x18 // near left to right push with regs, callee pops stack
+};
+
+enum class ClassOptions : uint16_t {
+  None = 0x0000,
+  Packed = 0x0001,
+  HasConstructorOrDestructor = 0x0002,
+  HasOverloadedOperator = 0x0004,
+  Nested = 0x0008,
+  ContainsNestedClass = 0x0010,
+  HasOverloadedAssignmentOperator = 0x0020,
+  HasConversionOperator = 0x0040,
+  ForwardReference = 0x0080,
+  Scoped = 0x0100,
+  HasUniqueName = 0x0200,
+  Sealed = 0x0400,
+  Intrinsic = 0x2000
+};
+
+inline ClassOptions operator|(ClassOptions a, ClassOptions b) {
+  return static_cast<ClassOptions>(static_cast<uint16_t>(a) |
+                                   static_cast<uint16_t>(b));
+}
+
+inline ClassOptions operator&(ClassOptions a, ClassOptions b) {
+  return static_cast<ClassOptions>(static_cast<uint16_t>(a) &
+                                   static_cast<uint16_t>(b));
+}
+
+inline ClassOptions operator~(ClassOptions a) {
+  return static_cast<ClassOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class FrameProcedureOptions : uint32_t {
+  None = 0x00000000,
+  HasAlloca = 0x00000001,
+  HasSetJmp = 0x00000002,
+  HasLongJmp = 0x00000004,
+  HasInlineAssembly = 0x00000008,
+  HasExceptionHandling = 0x00000010,
+  MarkedInline = 0x00000020,
+  HasStructuredExceptionHandling = 0x00000040,
+  Naked = 0x00000080,
+  SecurityChecks = 0x00000100,
+  AsynchronousExceptionHandling = 0x00000200,
+  NoStackOrderingForSecurityChecks = 0x00000400,
+  Inlined = 0x00000800,
+  StrictSecurityChecks = 0x00001000,
+  SafeBuffers = 0x00002000,
+  ProfileGuidedOptimization = 0x00040000,
+  ValidProfileCounts = 0x00080000,
+  OptimizedForSpeed = 0x00100000,
+  GuardCfg = 0x00200000,
+  GuardCfw = 0x00400000
+};
+
+inline FrameProcedureOptions operator|(FrameProcedureOptions a,
+                                       FrameProcedureOptions b) {
+  return static_cast<FrameProcedureOptions>(static_cast<uint32_t>(a) |
+                                            static_cast<uint32_t>(b));
+}
+
+inline FrameProcedureOptions operator&(FrameProcedureOptions a,
+                                       FrameProcedureOptions b) {
+  return static_cast<FrameProcedureOptions>(static_cast<uint32_t>(a) &
+                                            static_cast<uint32_t>(b));
+}
+
+inline FrameProcedureOptions operator~(FrameProcedureOptions a) {
+  return static_cast<FrameProcedureOptions>(~static_cast<uint32_t>(a));
+}
+
+enum class FunctionOptions : uint8_t {
+  None = 0x00,
+  CxxReturnUdt = 0x01,
+  Constructor = 0x02,
+  ConstructorWithVirtualBases = 0x04
+};
+
+inline FunctionOptions operator|(FunctionOptions a, FunctionOptions b) {
+  return static_cast<FunctionOptions>(static_cast<uint8_t>(a) |
+                                      static_cast<uint8_t>(b));
+}
+
+inline FunctionOptions operator&(FunctionOptions a, FunctionOptions b) {
+  return static_cast<FunctionOptions>(static_cast<uint8_t>(a) &
+                                      static_cast<uint8_t>(b));
+}
+
+inline FunctionOptions operator~(FunctionOptions a) {
+  return static_cast<FunctionOptions>(~static_cast<uint8_t>(a));
+}
+
+enum class HfaKind : uint8_t {
+  None = 0x00,
+  Float = 0x01,
+  Double = 0x02,
+  Other = 0x03
+};
+
+enum class MemberAccess : uint8_t {
+  None = 0,
+  Private = 1,
+  Protected = 2,
+  Public = 3
+};
+
+enum class MethodKind : uint8_t {
+  Vanilla = 0x00,
+  Virtual = 0x01,
+  Static = 0x02,
+  Friend = 0x03,
+  IntroducingVirtual = 0x04,
+  PureVirtual = 0x05,
+  PureIntroducingVirtual = 0x06
+};
+
+enum class MethodOptions : uint16_t {
+  None = 0x0000,
+  Pseudo = 0x0020,
+  CompilerGenerated = 0x0100,
+  Sealed = 0x0200
+};
+
+inline MethodOptions operator|(MethodOptions a, MethodOptions b) {
+  return static_cast<MethodOptions>(static_cast<uint16_t>(a) |
+                                    static_cast<uint16_t>(b));
+}
+
+inline MethodOptions operator&(MethodOptions a, MethodOptions b) {
+  return static_cast<MethodOptions>(static_cast<uint16_t>(a) &
+                                    static_cast<uint16_t>(b));
+}
+
+inline MethodOptions operator~(MethodOptions a) {
+  return static_cast<MethodOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class ModifierOptions : uint16_t {
+  None = 0x0000,
+  Const = 0x0001,
+  Volatile = 0x0002,
+  Unaligned = 0x0004
+};
+
+inline ModifierOptions operator|(ModifierOptions a, ModifierOptions b) {
+  return static_cast<ModifierOptions>(static_cast<uint16_t>(a) |
+                                      static_cast<uint16_t>(b));
+}
+
+inline ModifierOptions operator&(ModifierOptions a, ModifierOptions b) {
+  return static_cast<ModifierOptions>(static_cast<uint16_t>(a) &
+                                      static_cast<uint16_t>(b));
+}
+
+inline ModifierOptions operator~(ModifierOptions a) {
+  return static_cast<ModifierOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class ModuleSubstreamKind : uint32_t {
+  Symbols = 0xf1,
+  Lines = 0xf2,
+  StringTable = 0xf3,
+  FileChecksums = 0xf4,
+  FrameData = 0xf5,
+  InlineeLines = 0xf6,
+  CrossScopeImports = 0xf7,
+  CrossScopeExports = 0xf8
+};
+
+enum class PointerKind : uint8_t {
+  Near16 = 0x00,                // 16 bit pointer
+  Far16 = 0x01,                 // 16:16 far pointer
+  Huge16 = 0x02,                // 16:16 huge pointer
+  BasedOnSegment = 0x03,        // based on segment
+  BasedOnValue = 0x04,          // based on value of base
+  BasedOnSegmentValue = 0x05,   // based on segment value of base
+  BasedOnAddress = 0x06,        // based on address of base
+  BasedOnSegmentAddress = 0x07, // based on segment address of base
+  BasedOnType = 0x08,           // based on type
+  BasedOnSelf = 0x09,           // based on self
+  Near32 = 0x0a,                // 32 bit pointer
+  Far32 = 0x0b,                 // 16:32 pointer
+  Near64 = 0x0c                 // 64 bit pointer
+};
+
+enum class PointerMode : uint8_t {
+  Pointer = 0x00,                 // "normal" pointer
+  LValueReference = 0x01,         // "old" reference
+  PointerToDataMember = 0x02,     // pointer to data member
+  PointerToMemberFunction = 0x03, // pointer to member function
+  RValueReference = 0x04          // r-value reference
+};
+
+enum class PointerOptions : uint32_t {
+  None = 0x00000000,
+  Flat32 = 0x00000100,
+  Volatile = 0x00000200,
+  Const = 0x00000400,
+  Unaligned = 0x00000800,
+  Restrict = 0x00001000,
+  WinRTSmartPointer = 0x00080000
+};
+
+inline PointerOptions operator|(PointerOptions a, PointerOptions b) {
+  return static_cast<PointerOptions>(static_cast<uint16_t>(a) |
+                                     static_cast<uint16_t>(b));
+}
+
+inline PointerOptions operator&(PointerOptions a, PointerOptions b) {
+  return static_cast<PointerOptions>(static_cast<uint16_t>(a) &
+                                     static_cast<uint16_t>(b));
+}
+
+inline PointerOptions operator~(PointerOptions a) {
+  return static_cast<PointerOptions>(~static_cast<uint16_t>(a));
+}
+
+enum class PointerToMemberRepresentation : uint16_t {
+  Unknown = 0x00,                     // not specified (pre VC8)
+  SingleInheritanceData = 0x01,       // member data, single inheritance
+  MultipleInheritanceData = 0x02,     // member data, multiple inheritance
+  VirtualInheritanceData = 0x03,      // member data, virtual inheritance
+  GeneralData = 0x04,                 // member data, most general
+  SingleInheritanceFunction = 0x05,   // member function, single inheritance
+  MultipleInheritanceFunction = 0x06, // member function, multiple inheritance
+  VirtualInheritanceFunction = 0x07,  // member function, virtual inheritance
+  GeneralFunction = 0x08              // member function, most general
+};
+
+enum class TypeRecordKind : uint16_t {
+  None = 0,
+
+  VirtualTableShape = 0x000a,
+  Label = 0x000e,
+  EndPrecompiledHeader = 0x0014,
+
+  Modifier = 0x1001,
+  Pointer = 0x1002,
+  Procedure = 0x1008,
+  MemberFunction = 0x1009,
+
+  Oem = 0x100f,
+  Oem2 = 0x1011,
+
+  ArgumentList = 0x1201,
+  FieldList = 0x1203,
+  BitField = 0x1205,
+  MethodList = 0x1206,
+
+  BaseClass = 0x1400,
+  VirtualBaseClass = 0x1401,
+  IndirectVirtualBaseClass = 0x1402,
+  Index = 0x1404,
+  VirtualFunctionTablePointer = 0x1409,
+
+  Enumerate = 0x1502,
+  Array = 0x1503,
+  Class = 0x1504,
+  Structure = 0x1505,
+  Union = 0x1506,
+  Enum = 0x1507,
+  Alias = 0x150a,
+  Member = 0x150d,
+  StaticMember = 0x150e,
+  Method = 0x150f,
+  NestedType = 0x1510,
+  OneMethod = 0x1511,
+  VirtualFunctionTable = 0x151d,
+
+  FunctionId = 0x1601,
+  MemberFunctionId = 0x1602,
+  BuildInfo = 0x1603,
+  SubstringList = 0x1604,
+  StringId = 0x1605,
+  UdtSourceLine = 0x1606,
+
+  SByte = 0x8000,
+  Int16 = 0x8001,
+  UInt16 = 0x8002,
+  Int32 = 0x8003,
+  UInt32 = 0x8004,
+  Single = 0x8005,
+  Double = 0x8006,
+  Float80 = 0x8007,
+  Float128 = 0x8008,
+  Int64 = 0x8009,
+  UInt64 = 0x800a,
+  Float48 = 0x800b,
+  Complex32 = 0x800c,
+  Complex64 = 0x800d,
+  Complex80 = 0x800e,
+  Complex128 = 0x800f,
+  VarString = 0x8010,
+
+  Int128 = 0x8017,
+  UInt128 = 0x8018,
+
+  Decimal = 0x8019,
+  Date = 0x801a,
+  Utf8String = 0x801b,
+
+  Float16 = 0x801c
+};
+
+enum class VirtualTableSlotKind : uint8_t {
+  Near16 = 0x00,
+  Far16 = 0x01,
+  This = 0x02,
+  Outer = 0x03,
+  Meta = 0x04,
+  Near = 0x05,
+  Far = 0x06
+};
+
+enum class WindowsRTClassKind : uint8_t {
+  None = 0x00,
+  RefClass = 0x01,
+  ValueClass = 0x02,
+  Interface = 0x03
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewOStream.h b/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
new file mode 100644
index 000000000000..14d057a249a5
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
@@ -0,0 +1,39 @@
+//===- CodeViewOStream.h ----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
+#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+namespace llvm {
+namespace codeview {
+
+template <typename Writer> class CodeViewOStream {
+private:
+  CodeViewOStream(const CodeViewOStream &) = delete;
+  CodeViewOStream &operator=(const CodeViewOStream &) = delete;
+
+public:
+  typedef typename Writer::LabelType LabelType;
+
+public:
+  explicit CodeViewOStream(Writer &W);
+
+private:
+  uint64_t size() const { return W.tell(); }
+
+private:
+  Writer &W;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
new file mode 100644
index 000000000000..1ed62487aecc
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
@@ -0,0 +1,78 @@
+//===- FieldListRecordBuilder.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class MethodInfo {
+public:
+  MethodInfo() : Access(), Kind(), Options(), Type(), VTableSlotOffset(-1) {}
+
+  MethodInfo(MemberAccess Access, MethodKind Kind, MethodOptions Options,
+             TypeIndex Type, int32_t VTableSlotOffset)
+      : Access(Access), Kind(Kind), Options(Options), Type(Type),
+        VTableSlotOffset(VTableSlotOffset) {}
+
+  MemberAccess getAccess() const { return Access; }
+  MethodKind getKind() const { return Kind; }
+  MethodOptions getOptions() const { return Options; }
+  TypeIndex getType() const { return Type; }
+  int32_t getVTableSlotOffset() const { return VTableSlotOffset; }
+
+private:
+  MemberAccess Access;
+  MethodKind Kind;
+  MethodOptions Options;
+  TypeIndex Type;
+  int32_t VTableSlotOffset;
+};
+
+class FieldListRecordBuilder : public ListRecordBuilder {
+private:
+  FieldListRecordBuilder(const FieldListRecordBuilder &) = delete;
+  void operator=(const FieldListRecordBuilder &) = delete;
+
+public:
+  FieldListRecordBuilder();
+
+  void writeBaseClass(MemberAccess Access, TypeIndex Type, uint64_t Offset);
+  void writeEnumerate(MemberAccess Access, uint64_t Value, StringRef Name);
+  void writeIndirectVirtualBaseClass(MemberAccess Access, TypeIndex Type,
+                                     TypeIndex VirtualBasePointerType,
+                                     int64_t VirtualBasePointerOffset,
+                                     uint64_t SlotIndex);
+  void writeMember(MemberAccess Access, TypeIndex Type, uint64_t Offset,
+                   StringRef Name);
+  void writeOneMethod(MemberAccess Access, MethodKind Kind,
+                      MethodOptions Options, TypeIndex Type,
+                      int32_t VTableSlotOffset, StringRef Name);
+  void writeOneMethod(const MethodInfo &Method, StringRef Name);
+  void writeMethod(uint16_t OverloadCount, TypeIndex MethodList,
+                   StringRef Name);
+  void writeNestedType(TypeIndex Type, StringRef Name);
+  void writeStaticMember(MemberAccess Access, TypeIndex Type, StringRef Name);
+  void writeVirtualBaseClass(MemberAccess Access, TypeIndex Type,
+                             TypeIndex VirtualBasePointerType,
+                             int64_t VirtualBasePointerOffset,
+                             uint64_t SlotIndex);
+  void writeVirtualBaseClass(TypeRecordKind Kind, MemberAccess Access,
+                             TypeIndex Type, TypeIndex VirtualBasePointerType,
+                             int64_t VirtualBasePointerOffset,
+                             uint64_t SlotIndex);
+  void writeVirtualFunctionTablePointer(TypeIndex Type);
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/FunctionId.h b/include/llvm/DebugInfo/CodeView/FunctionId.h
new file mode 100644
index 000000000000..1af3da810b5a
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/FunctionId.h
@@ -0,0 +1,56 @@
+//===- FunctionId.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H
+#define LLVM_DEBUGINFO_CODEVIEW_FUNCTIONID_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class FunctionId {
+public:
+  FunctionId() : Index(0) {}
+
+  explicit FunctionId(uint32_t Index) : Index(Index) {}
+
+  uint32_t getIndex() const { return Index; }
+
+private:
+  uint32_t Index;
+};
+
+inline bool operator==(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() == B.getIndex();
+}
+
+inline bool operator!=(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() != B.getIndex();
+}
+
+inline bool operator<(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() < B.getIndex();
+}
+
+inline bool operator<=(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() <= B.getIndex();
+}
+
+inline bool operator>(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() > B.getIndex();
+}
+
+inline bool operator>=(const FunctionId &A, const FunctionId &B) {
+  return A.getIndex() >= B.getIndex();
+}
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/Line.h b/include/llvm/DebugInfo/CodeView/Line.h
new file mode 100644
index 000000000000..a7cdbdaac32f
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/Line.h
@@ -0,0 +1,124 @@
+//===- Line.h ---------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_LINE_H
+#define LLVM_DEBUGINFO_CODEVIEW_LINE_H
+
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class LineInfo {
+public:
+  static const uint32_t AlwaysStepIntoLineNumber = 0xfeefee;
+  static const uint32_t NeverStepIntoLineNumber = 0xf00f00;
+
+private:
+  static const uint32_t StartLineMask = 0x00ffffff;
+  static const uint32_t EndLineDeltaMask = 0x7f000000;
+  static const int EndLineDeltaShift = 24;
+  static const uint32_t StatementFlag = 0x80000000u;
+
+public:
+  LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement);
+
+  uint32_t getStartLine() const { return LineData & StartLineMask; }
+
+  uint32_t getLineDelta() const {
+    return (LineData & EndLineDeltaMask) >> EndLineDeltaShift;
+  }
+
+  uint32_t getEndLine() const { return getStartLine() + getLineDelta(); }
+
+  bool isStatement() const { return (LineData & StatementFlag) != 0; }
+
+  uint32_t getRawData() const { return LineData; }
+
+  bool isAlwaysStepInto() const {
+    return getStartLine() == AlwaysStepIntoLineNumber;
+  }
+
+  bool isNeverStepInto() const {
+    return getStartLine() == NeverStepIntoLineNumber;
+  }
+
+private:
+  uint32_t LineData;
+};
+
+class ColumnInfo {
+private:
+  static const uint32_t StartColumnMask = 0x0000ffffu;
+  static const uint32_t EndColumnMask = 0xffff0000u;
+  static const int EndColumnShift = 16;
+
+public:
+  ColumnInfo(uint16_t StartColumn, uint16_t EndColumn) {
+    ColumnData =
+        (static_cast<uint32_t>(StartColumn) & StartColumnMask) |
+        ((static_cast<uint32_t>(EndColumn) << EndColumnShift) & EndColumnMask);
+  }
+
+  uint16_t getStartColumn() const {
+    return static_cast<uint16_t>(ColumnData & StartColumnMask);
+  }
+
+  uint16_t getEndColumn() const {
+    return static_cast<uint16_t>((ColumnData & EndColumnMask) >>
+                                 EndColumnShift);
+  }
+
+  uint32_t getRawData() const { return ColumnData; }
+
+private:
+  uint32_t ColumnData;
+};
+
+class Line {
+private:
+  int32_t CodeOffset;
+  LineInfo LineInf;
+  ColumnInfo ColumnInf;
+
+public:
+  Line(int32_t CodeOffset, uint32_t StartLine, uint32_t EndLine,
+       uint16_t StartColumn, uint16_t EndColumn, bool IsStatement)
+      : CodeOffset(CodeOffset), LineInf(StartLine, EndLine, IsStatement),
+        ColumnInf(StartColumn, EndColumn) {}
+
+  Line(int32_t CodeOffset, LineInfo LineInf, ColumnInfo ColumnInf)
+      : CodeOffset(CodeOffset), LineInf(LineInf), ColumnInf(ColumnInf) {}
+
+  LineInfo getLineInfo() const { return LineInf; }
+
+  ColumnInfo getColumnInfo() const { return ColumnInf; }
+
+  int32_t getCodeOffset() const { return CodeOffset; }
+
+  uint32_t getStartLine() const { return LineInf.getStartLine(); }
+
+  uint32_t getLineDelta() const { return LineInf.getLineDelta(); }
+
+  uint32_t getEndLine() const { return LineInf.getEndLine(); }
+
+  uint16_t getStartColumn() const { return ColumnInf.getStartColumn(); }
+
+  uint16_t getEndColumn() const { return ColumnInf.getEndColumn(); }
+
+  bool isStatement() const { return LineInf.isStatement(); }
+
+  bool isAlwaysStepInto() const { return LineInf.isAlwaysStepInto(); }
+
+  bool isNeverStepInto() const { return LineInf.isNeverStepInto(); }
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
new file mode 100644
index 000000000000..df0a2e08a418
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
@@ -0,0 +1,43 @@
+//===- ListRecordBuilder.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class ListRecordBuilder {
+private:
+  ListRecordBuilder(const ListRecordBuilder &) = delete;
+  ListRecordBuilder &operator=(const ListRecordBuilder &) = delete;
+
+protected:
+  const int MethodKindShift = 2;
+
+  explicit ListRecordBuilder(TypeRecordKind Kind);
+
+public:
+  llvm::StringRef str() { return Builder.str(); }
+
+protected:
+  void finishSubRecord();
+
+  TypeRecordBuilder &getBuilder() { return Builder; }
+
+private:
+  TypeRecordBuilder Builder;
+  SmallVector<size_t, 4> ContinuationOffsets;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
new file mode 100644
index 000000000000..5bfe2a068672
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
@@ -0,0 +1,68 @@
+//===- MemoryTypeTableBuilder.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
+
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include <functional>
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+namespace codeview {
+
+class MemoryTypeTableBuilder : public TypeTableBuilder {
+public:
+  class Record {
+  public:
+    explicit Record(llvm::StringRef RData);
+
+    const char *data() const { return Data.get(); }
+    uint16_t size() const { return Size; }
+
+  private:
+    uint16_t Size;
+    std::unique_ptr<char[]> Data;
+  };
+
+private:
+  class RecordHash : std::unary_function<llvm::StringRef, size_t> {
+  public:
+    size_t operator()(llvm::StringRef Val) const {
+      return static_cast<size_t>(llvm::hash_value(Val));
+    }
+  };
+
+public:
+  MemoryTypeTableBuilder() {}
+
+  template <typename TFunc> void ForEachRecord(TFunc Func) {
+    uint32_t Index = TypeIndex::FirstNonSimpleIndex;
+
+    for (const std::unique_ptr<Record> &R : Records) {
+      Func(TypeIndex(Index), R.get());
+      ++Index;
+    }
+  }
+
+private:
+  virtual TypeIndex writeRecord(llvm::StringRef Data) override;
+
+private:
+  std::vector<std::unique_ptr<Record>> Records;
+  std::unordered_map<llvm::StringRef, TypeIndex, RecordHash> HashedRecords;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h b/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
new file mode 100644
index 000000000000..faa404d41b1f
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
@@ -0,0 +1,35 @@
+//===- MethodListRecordBuilder.h --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+namespace llvm {
+namespace codeview {
+
+class MethodInfo;
+
+class MethodListRecordBuilder : public ListRecordBuilder {
+private:
+  MethodListRecordBuilder(const MethodListRecordBuilder &) = delete;
+  MethodListRecordBuilder &operator=(const MethodListRecordBuilder &) = delete;
+
+public:
+  MethodListRecordBuilder();
+
+  void writeMethod(MemberAccess Access, MethodKind Kind, MethodOptions Options,
+                   TypeIndex Type, int32_t VTableSlotOffset);
+  void writeMethod(const MethodInfo &Method);
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/TypeIndex.h b/include/llvm/DebugInfo/CodeView/TypeIndex.h
new file mode 100644
index 000000000000..d3a541be4c62
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -0,0 +1,176 @@
+//===- TypeIndex.h ----------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEX_H
+
+#include <cassert>
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+enum class SimpleTypeKind : uint32_t {
+  None = 0x0000,          // uncharacterized type (no type)
+  Void = 0x0003,          // void
+  NotTranslated = 0x0007, // type not translated by cvpack
+  HResult = 0x0008,       // OLE/COM HRESULT
+
+  SignedCharacter = 0x0010,   // 8 bit signed
+  UnsignedCharacter = 0x0020, // 8 bit unsigned
+  NarrowCharacter = 0x0070,   // really a char
+  WideCharacter = 0x0071,     // wide char
+
+  SByte = 0x0068,       // 8 bit signed int
+  Byte = 0x0069,        // 8 bit unsigned int
+  Int16Short = 0x0011,  // 16 bit signed
+  UInt16Short = 0x0021, // 16 bit unsigned
+  Int16 = 0x0072,       // 16 bit signed int
+  UInt16 = 0x0073,      // 16 bit unsigned int
+  Int32Long = 0x0012,   // 32 bit signed
+  UInt32Long = 0x0022,  // 32 bit unsigned
+  Int32 = 0x0074,       // 32 bit signed int
+  UInt32 = 0x0075,      // 32 bit unsigned int
+  Int64Quad = 0x0013,   // 64 bit signed
+  UInt64Quad = 0x0023,  // 64 bit unsigned
+  Int64 = 0x0076,       // 64 bit signed int
+  UInt64 = 0x0077,      // 64 bit unsigned int
+  Int128 = 0x0078,      // 128 bit signed int
+  UInt128 = 0x0079,     // 128 bit unsigned int
+
+  Float16 = 0x0046,                 // 16 bit real
+  Float32 = 0x0040,                 // 32 bit real
+  Float32PartialPrecision = 0x0045, // 32 bit PP real
+  Float48 = 0x0044,                 // 48 bit real
+  Float64 = 0x0041,                 // 64 bit real
+  Float80 = 0x0042,                 // 80 bit real
+  Float128 = 0x0043,                // 128 bit real
+
+  Complex32 = 0x0050,  // 32 bit complex
+  Complex64 = 0x0051,  // 64 bit complex
+  Complex80 = 0x0052,  // 80 bit complex
+  Complex128 = 0x0053, // 128 bit complex
+
+  Boolean8 = 0x0030,  // 8 bit boolean
+  Boolean16 = 0x0031, // 16 bit boolean
+  Boolean32 = 0x0032, // 32 bit boolean
+  Boolean64 = 0x0033  // 64 bit boolean
+};
+
+enum class SimpleTypeMode : uint32_t {
+  Direct = 0x00000000,        // Not a pointer
+  NearPointer = 0x00000100,   // Near pointer
+  FarPointer = 0x00000200,    // Far pointer
+  HugePointer = 0x00000300,   // Huge pointer
+  NearPointer32 = 0x00000400, // 32 bit near pointer
+  FarPointer32 = 0x00000500,  // 32 bit far pointer
+  NearPointer64 = 0x00000600, // 64 bit near pointer
+  NearPointer128 = 0x00000700 // 128 bit near pointer
+};
+
+class TypeIndex {
+public:
+  static const uint32_t FirstNonSimpleIndex = 0x1000;
+  static const uint32_t SimpleKindMask = 0x000000ff;
+  static const uint32_t SimpleModeMask = 0x00000700;
+
+public:
+  TypeIndex() : Index(0) {}
+  explicit TypeIndex(uint32_t Index) : Index(Index) {}
+  explicit TypeIndex(SimpleTypeKind Kind)
+      : Index(static_cast<uint32_t>(Kind)) {}
+  TypeIndex(SimpleTypeKind Kind, SimpleTypeMode Mode)
+      : Index(static_cast<uint32_t>(Kind) | static_cast<uint32_t>(Mode)) {}
+
+  uint32_t getIndex() const { return Index; }
+  bool isSimple() const { return Index < FirstNonSimpleIndex; }
+
+  SimpleTypeKind getSimpleKind() const {
+    assert(isSimple());
+    return static_cast<SimpleTypeKind>(Index & SimpleKindMask);
+  }
+
+  SimpleTypeMode getSimpleMode() const {
+    assert(isSimple());
+    return static_cast<SimpleTypeMode>(Index & SimpleModeMask);
+  }
+
+  static TypeIndex Void() { return TypeIndex(SimpleTypeKind::Void); }
+  static TypeIndex VoidPointer32() {
+    return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer32);
+  }
+  static TypeIndex VoidPointer64() {
+    return TypeIndex(SimpleTypeKind::Void, SimpleTypeMode::NearPointer64);
+  }
+
+  static TypeIndex SignedCharacter() {
+    return TypeIndex(SimpleTypeKind::SignedCharacter);
+  }
+  static TypeIndex UnsignedCharacter() {
+    return TypeIndex(SimpleTypeKind::UnsignedCharacter);
+  }
+  static TypeIndex NarrowCharacter() {
+    return TypeIndex(SimpleTypeKind::NarrowCharacter);
+  }
+  static TypeIndex WideCharacter() {
+    return TypeIndex(SimpleTypeKind::WideCharacter);
+  }
+  static TypeIndex Int16Short() {
+    return TypeIndex(SimpleTypeKind::Int16Short);
+  }
+  static TypeIndex UInt16Short() {
+    return TypeIndex(SimpleTypeKind::UInt16Short);
+  }
+  static TypeIndex Int32() { return TypeIndex(SimpleTypeKind::Int32); }
+  static TypeIndex UInt32() { return TypeIndex(SimpleTypeKind::UInt32); }
+  static TypeIndex Int32Long() { return TypeIndex(SimpleTypeKind::Int32Long); }
+  static TypeIndex UInt32Long() {
+    return TypeIndex(SimpleTypeKind::UInt32Long);
+  }
+  static TypeIndex Int64() { return TypeIndex(SimpleTypeKind::Int64); }
+  static TypeIndex UInt64() { return TypeIndex(SimpleTypeKind::UInt64); }
+  static TypeIndex Int64Quad() { return TypeIndex(SimpleTypeKind::Int64Quad); }
+  static TypeIndex UInt64Quad() {
+    return TypeIndex(SimpleTypeKind::UInt64Quad);
+  }
+
+  static TypeIndex Float32() { return TypeIndex(SimpleTypeKind::Float32); }
+  static TypeIndex Float64() { return TypeIndex(SimpleTypeKind::Float64); }
+
+private:
+  uint32_t Index;
+};
+
+inline bool operator==(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() == B.getIndex();
+}
+
+inline bool operator!=(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() != B.getIndex();
+}
+
+inline bool operator<(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() < B.getIndex();
+}
+
+inline bool operator<=(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() <= B.getIndex();
+}
+
+inline bool operator>(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() > B.getIndex();
+}
+
+inline bool operator>=(const TypeIndex &A, const TypeIndex &B) {
+  return A.getIndex() >= B.getIndex();
+}
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecord.h b/include/llvm/DebugInfo/CodeView/TypeRecord.h
new file mode 100644
index 000000000000..21755f5d9b09
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -0,0 +1,270 @@
+//===- TypeRecord.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include <cinttypes>
+
+namespace llvm {
+namespace codeview {
+
+class TypeRecord {
+protected:
+  explicit TypeRecord(TypeRecordKind Kind) : Kind(Kind) {}
+
+public:
+  TypeRecordKind getKind() const { return Kind; }
+
+private:
+  TypeRecordKind Kind;
+};
+
+class ModifierRecord : public TypeRecord {
+public:
+  ModifierRecord(TypeIndex ModifiedType, ModifierOptions Options)
+      : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType),
+        Options(Options) {}
+
+  TypeIndex getModifiedType() const { return ModifiedType; }
+  ModifierOptions getOptions() const { return Options; }
+
+private:
+  TypeIndex ModifiedType;
+  ModifierOptions Options;
+};
+
+class ProcedureRecord : public TypeRecord {
+public:
+  ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv,
+                  FunctionOptions Options, uint16_t ParameterCount,
+                  TypeIndex ArgumentList)
+      : TypeRecord(TypeRecordKind::Procedure), ReturnType(ReturnType),
+        CallConv(CallConv), Options(Options), ParameterCount(ParameterCount),
+        ArgumentList(ArgumentList) {}
+
+  TypeIndex getReturnType() const { return ReturnType; }
+  CallingConvention getCallConv() const { return CallConv; }
+  FunctionOptions getOptions() const { return Options; }
+  uint16_t getParameterCount() const { return ParameterCount; }
+  TypeIndex getArgumentList() const { return ArgumentList; }
+
+private:
+  TypeIndex ReturnType;
+  CallingConvention CallConv;
+  FunctionOptions Options;
+  uint16_t ParameterCount;
+  TypeIndex ArgumentList;
+};
+
+class MemberFunctionRecord : public TypeRecord {
+public:
+  MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType,
+                       TypeIndex ThisType, CallingConvention CallConv,
+                       FunctionOptions Options, uint16_t ParameterCount,
+                       TypeIndex ArgumentList, int32_t ThisPointerAdjustment)
+      : TypeRecord(TypeRecordKind::MemberFunction), ReturnType(ReturnType),
+        ClassType(ClassType), ThisType(ThisType), CallConv(CallConv),
+        Options(Options), ParameterCount(ParameterCount),
+        ArgumentList(ArgumentList),
+        ThisPointerAdjustment(ThisPointerAdjustment) {}
+
+  TypeIndex getReturnType() const { return ReturnType; }
+  TypeIndex getClassType() const { return ClassType; }
+  TypeIndex getThisType() const { return ThisType; }
+  CallingConvention getCallConv() const { return CallConv; }
+  FunctionOptions getOptions() const { return Options; }
+  uint16_t getParameterCount() const { return ParameterCount; }
+  TypeIndex getArgumentList() const { return ArgumentList; }
+  int32_t getThisPointerAdjustment() const { return ThisPointerAdjustment; }
+
+private:
+  TypeIndex ReturnType;
+  TypeIndex ClassType;
+  TypeIndex ThisType;
+  CallingConvention CallConv;
+  FunctionOptions Options;
+  uint16_t ParameterCount;
+  TypeIndex ArgumentList;
+  int32_t ThisPointerAdjustment;
+};
+
+class ArgumentListRecord : public TypeRecord {
+public:
+  explicit ArgumentListRecord(llvm::ArrayRef<TypeIndex> ArgumentTypes)
+      : TypeRecord(TypeRecordKind::ArgumentList), ArgumentTypes(ArgumentTypes) {
+  }
+
+  llvm::ArrayRef<TypeIndex> getArgumentTypes() const { return ArgumentTypes; }
+
+private:
+  llvm::ArrayRef<TypeIndex> ArgumentTypes;
+};
+
+class PointerRecordBase : public TypeRecord {
+public:
+  PointerRecordBase(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
+                    PointerOptions Options, uint8_t Size)
+      : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
+        PtrKind(Kind), Mode(Mode), Options(Options), Size(Size) {}
+
+  TypeIndex getReferentType() const { return ReferentType; }
+  PointerKind getPointerKind() const { return PtrKind; }
+  PointerMode getMode() const { return Mode; }
+  PointerOptions getOptions() const { return Options; }
+  uint8_t getSize() const { return Size; }
+
+private:
+  TypeIndex ReferentType;
+  PointerKind PtrKind;
+  PointerMode Mode;
+  PointerOptions Options;
+  uint8_t Size;
+};
+
+class PointerRecord : public PointerRecordBase {
+public:
+  PointerRecord(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
+                PointerOptions Options, uint8_t Size)
+      : PointerRecordBase(ReferentType, Kind, Mode, Options, Size) {}
+};
+
+class PointerToMemberRecord : public PointerRecordBase {
+public:
+  PointerToMemberRecord(TypeIndex ReferentType, PointerKind Kind,
+                        PointerMode Mode, PointerOptions Options, uint8_t Size,
+                        TypeIndex ContainingType,
+                        PointerToMemberRepresentation Representation)
+      : PointerRecordBase(ReferentType, Kind, Mode, Options, Size),
+        ContainingType(ContainingType), Representation(Representation) {}
+
+  TypeIndex getContainingType() const { return ContainingType; }
+  PointerToMemberRepresentation getRepresentation() const {
+    return Representation;
+  }
+
+private:
+  TypeIndex ContainingType;
+  PointerToMemberRepresentation Representation;
+};
+
+class ArrayRecord : public TypeRecord {
+public:
+  ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size,
+              llvm::StringRef Name)
+      : TypeRecord(TypeRecordKind::Array), ElementType(ElementType),
+        IndexType(IndexType), Size(Size), Name(Name) {}
+
+  TypeIndex getElementType() const { return ElementType; }
+  TypeIndex getIndexType() const { return IndexType; }
+  uint64_t getSize() const { return Size; }
+  llvm::StringRef getName() const { return Name; }
+
+private:
+  TypeIndex ElementType;
+  TypeIndex IndexType;
+  uint64_t Size;
+  llvm::StringRef Name;
+};
+
+class TagRecord : public TypeRecord {
+protected:
+  TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options,
+            TypeIndex FieldList, StringRef Name, StringRef UniqueName)
+      : TypeRecord(Kind), MemberCount(MemberCount), Options(Options),
+        FieldList(FieldList), Name(Name), UniqueName(UniqueName) {}
+
+public:
+  uint16_t getMemberCount() const { return MemberCount; }
+  ClassOptions getOptions() const { return Options; }
+  TypeIndex getFieldList() const { return FieldList; }
+  StringRef getName() const { return Name; }
+  StringRef getUniqueName() const { return UniqueName; }
+
+private:
+  uint16_t MemberCount;
+  ClassOptions Options;
+  TypeIndex FieldList;
+  StringRef Name;
+  StringRef UniqueName;
+};
+
+class AggregateRecord : public TagRecord {
+public:
+  AggregateRecord(TypeRecordKind Kind, uint16_t MemberCount,
+                  ClassOptions Options, HfaKind Hfa,
+                  WindowsRTClassKind WinRTKind, TypeIndex FieldList,
+                  TypeIndex DerivationList, TypeIndex VTableShape,
+                  uint64_t Size, StringRef Name, StringRef UniqueName)
+      : TagRecord(Kind, MemberCount, Options, FieldList, Name, UniqueName),
+        Hfa(Hfa), WinRTKind(WinRTKind), DerivationList(DerivationList),
+        VTableShape(VTableShape), Size(Size) {}
+
+  HfaKind getHfa() const { return Hfa; }
+  WindowsRTClassKind getWinRTKind() const { return WinRTKind; }
+  TypeIndex getDerivationList() const { return DerivationList; }
+  TypeIndex getVTableShape() const { return VTableShape; }
+  uint64_t getSize() const { return Size; }
+
+private:
+  HfaKind Hfa;
+  WindowsRTClassKind WinRTKind;
+  TypeIndex DerivationList;
+  TypeIndex VTableShape;
+  uint64_t Size;
+};
+
+class EnumRecord : public TagRecord {
+public:
+  EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList,
+             StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType)
+      : TagRecord(TypeRecordKind::Enum, MemberCount, Options, FieldList, Name,
+                  UniqueName),
+        UnderlyingType(UnderlyingType) {}
+
+  TypeIndex getUnderlyingType() const { return UnderlyingType; }
+
+private:
+  TypeIndex UnderlyingType;
+};
+
+class BitFieldRecord : TypeRecord {
+public:
+  BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset)
+      : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize),
+        BitOffset(BitOffset) {}
+
+  TypeIndex getType() const { return Type; }
+  uint8_t getBitOffset() const { return BitOffset; }
+  uint8_t getBitSize() const { return BitSize; }
+
+private:
+  TypeIndex Type;
+  uint8_t BitSize;
+  uint8_t BitOffset;
+};
+
+class VirtualTableShapeRecord : TypeRecord {
+public:
+  explicit VirtualTableShapeRecord(ArrayRef<VirtualTableSlotKind> Slots)
+      : TypeRecord(TypeRecordKind::VirtualTableShape), Slots(Slots) {}
+
+  ArrayRef<VirtualTableSlotKind> getSlots() const { return Slots; }
+
+private:
+  ArrayRef<VirtualTableSlotKind> Slots;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h b/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
new file mode 100644
index 000000000000..1f48cf70666d
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
@@ -0,0 +1,57 @@
+//===- TypeRecordBuilder.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDBUILDER_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace codeview {
+
+class TypeRecordBuilder {
+private:
+  TypeRecordBuilder(const TypeRecordBuilder &) = delete;
+  TypeRecordBuilder &operator=(const TypeRecordBuilder &) = delete;
+
+public:
+  explicit TypeRecordBuilder(TypeRecordKind Kind);
+
+  void writeUInt8(uint8_t Value);
+  void writeInt16(int16_t Value);
+  void writeUInt16(uint16_t Value);
+  void writeInt32(int32_t Value);
+  void writeUInt32(uint32_t Value);
+  void writeInt64(int64_t Value);
+  void writeUInt64(uint64_t Value);
+  void writeTypeIndex(TypeIndex TypeInd);
+  void writeTypeRecordKind(TypeRecordKind Kind);
+  void writeEncodedInteger(int64_t Value);
+  void writeEncodedSignedInteger(int64_t Value);
+  void writeEncodedUnsignedInteger(uint64_t Value);
+  void writeNullTerminatedString(const char *Value);
+  void writeNullTerminatedString(StringRef Value);
+
+  llvm::StringRef str();
+
+  uint64_t size() const { return Stream.tell(); }
+
+private:
+  llvm::SmallVector<char, 256> Buffer;
+  llvm::raw_svector_ostream Stream;
+  llvm::support::endian::Writer<llvm::support::endianness::little> Writer;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
new file mode 100644
index 000000000000..9de110e8236f
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeSymbolEmitter.h
@@ -0,0 +1,37 @@
+//===- TypeSymbolEmitter.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPESYMBOLEMITTER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+namespace llvm {
+namespace codeview {
+
+class TypeSymbolEmitter {
+private:
+  TypeSymbolEmitter(const TypeSymbolEmitter &) = delete;
+  TypeSymbolEmitter &operator=(const TypeSymbolEmitter &) = delete;
+
+protected:
+  TypeSymbolEmitter() {}
+
+public:
+  virtual ~TypeSymbolEmitter() {}
+
+public:
+  virtual void writeUserDefinedType(TypeIndex TI, StringRef Name) = 0;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
new file mode 100644
index 000000000000..2c950e8af792
--- /dev/null
+++ b/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
@@ -0,0 +1,60 @@
+//===- TypeTableBuilder.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+namespace codeview {
+
+class FieldListRecordBuilder;
+class MethodListRecordBuilder;
+class TypeRecordBuilder;
+
+class TypeTableBuilder {
+private:
+  TypeTableBuilder(const TypeTableBuilder &) = delete;
+  TypeTableBuilder &operator=(const TypeTableBuilder &) = delete;
+
+protected:
+  TypeTableBuilder();
+
+public:
+  virtual ~TypeTableBuilder();
+
+public:
+  TypeIndex writeModifier(const ModifierRecord &Record);
+  TypeIndex writeProcedure(const ProcedureRecord &Record);
+  TypeIndex writeMemberFunction(const MemberFunctionRecord &Record);
+  TypeIndex writeArgumentList(const ArgumentListRecord &Record);
+  TypeIndex writeRecord(TypeRecordBuilder &builder);
+  TypeIndex writePointer(const PointerRecord &Record);
+  TypeIndex writePointerToMember(const PointerToMemberRecord &Record);
+  TypeIndex writeArray(const ArrayRecord &Record);
+  TypeIndex writeAggregate(const AggregateRecord &Record);
+  TypeIndex writeEnum(const EnumRecord &Record);
+  TypeIndex writeBitField(const BitFieldRecord &Record);
+  TypeIndex writeVirtualTableShape(const VirtualTableShapeRecord &Record);
+
+  TypeIndex writeFieldList(FieldListRecordBuilder &FieldList);
+  TypeIndex writeMethodList(MethodListRecordBuilder &MethodList);
+
+private:
+  virtual TypeIndex writeRecord(llvm::StringRef record) = 0;
+};
+}
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/DIContext.h b/include/llvm/DebugInfo/DIContext.h
index 871e60c56b13..6659a97a042b 100644
--- a/include/llvm/DebugInfo/DIContext.h
+++ b/include/llvm/DebugInfo/DIContext.h
@@ -57,6 +57,10 @@ class DIInliningInfo {
     assert(Index < Frames.size());
     return Frames[Index];
   }
+  DILineInfo *getMutableFrame(unsigned Index) {
+    assert(Index < Frames.size());
+    return &Frames[Index];
+  }
   uint32_t getNumberOfFrames() const {
     return Frames.size();
   }
@@ -65,6 +69,15 @@ class DIInliningInfo {
   }
 };
 
+/// DIGlobal - container for description of a global variable.
+struct DIGlobal {
+  std::string Name;
+  uint64_t Start;
+  uint64_t Size;
+
+  DIGlobal() : Name("<invalid>"), Start(0), Size(0) {}
+};
+
 /// A DINameKind is passed to name search methods to specify a
 /// preference regarding the type of name resolution the caller wants.
 enum class DINameKind { None, ShortName, LinkageName };
@@ -99,6 +112,7 @@ enum DIDumpType {
   DIDT_LineDwo,
   DIDT_Loc,
   DIDT_LocDwo,
+  DIDT_Macro,
   DIDT_Ranges,
   DIDT_Pubnames,
   DIDT_Pubtypes,
@@ -110,7 +124,9 @@ enum DIDumpType {
   DIDT_AppleNames,
   DIDT_AppleTypes,
   DIDT_AppleNamespaces,
-  DIDT_AppleObjC
+  DIDT_AppleObjC,
+  DIDT_CUIndex,
+  DIDT_TUIndex,
 };
 
 class DIContext {
@@ -140,17 +156,21 @@ private:
 /// to be used by the DIContext implementations when applying relocations
 /// on the fly.
 class LoadedObjectInfo {
+protected:
+  LoadedObjectInfo(const LoadedObjectInfo &) = default;
+  LoadedObjectInfo() = default;
+
 public:
   virtual ~LoadedObjectInfo() = default;
 
-  /// Obtain the Load Address of a section by Name.
+  /// Obtain the Load Address of a section by SectionRef.
   ///
-  /// Calculate the address of the section identified by the passed in Name.
+  /// Calculate the address of the given section.
   /// The section need not be present in the local address space. The addresses
   /// need to be consistent with the addresses used to query the DIContext and
   /// the output of this function should be deterministic, i.e. repeated calls with
-  /// the same Name should give the same address.
-  virtual uint64_t getSectionLoadAddress(StringRef Name) const = 0;
+  /// the same Sec should give the same address.
+  virtual uint64_t getSectionLoadAddress(const object::SectionRef &Sec) const = 0;
 
   /// If conveniently available, return the content of the given Section.
   ///
@@ -162,7 +182,8 @@ public:
   /// local (unrelocated) object file and applied on the fly. Note that this method
   /// is used purely for optimzation purposes in the common case of JITting in the
   /// local address space, so returning false should always be correct.
-  virtual bool getLoadedSectionContents(StringRef Name, StringRef &Data) const {
+  virtual bool getLoadedSectionContents(const object::SectionRef &Sec,
+                                        StringRef &Data) const {
     return false;
   }
 
diff --git a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
index 743f9c696e9e..bae3154b3b5f 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFCompileUnit.h
@@ -18,10 +18,13 @@ class DWARFCompileUnit : public DWARFUnit {
 public:
   DWARFCompileUnit(DWARFContext &Context, const DWARFSection &Section,
                    const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-                   StringRef SOS, StringRef AOS, bool LE,
-                   const DWARFUnitSectionBase &UnitSection)
-      : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
+                   StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+                   const DWARFUnitSectionBase &UnitSection,
+                   const DWARFUnitIndex::Entry *Entry)
+      : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection,
+                  Entry) {}
   void dump(raw_ostream &OS);
+  static const DWARFSectionKind Section = DW_SECT_INFO;
   // VTable anchor.
   ~DWARFCompileUnit() override;
 };
diff --git a/include/llvm/DebugInfo/DWARF/DWARFContext.h b/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 423c0d32f1b5..c91012bc9a24 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -18,6 +18,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
 #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
@@ -40,11 +41,14 @@ class DWARFContext : public DIContext {
 
   DWARFUnitSection<DWARFCompileUnit> CUs;
   std::vector<DWARFUnitSection<DWARFTypeUnit>> TUs;
+  std::unique_ptr<DWARFUnitIndex> CUIndex;
+  std::unique_ptr<DWARFUnitIndex> TUIndex;
   std::unique_ptr<DWARFDebugAbbrev> Abbrev;
   std::unique_ptr<DWARFDebugLoc> Loc;
   std::unique_ptr<DWARFDebugAranges> Aranges;
   std::unique_ptr<DWARFDebugLine> Line;
   std::unique_ptr<DWARFDebugFrame> DebugFrame;
+  std::unique_ptr<DWARFDebugMacro> Macro;
 
   DWARFUnitSection<DWARFCompileUnit> DWOCUs;
   std::vector<DWARFUnitSection<DWARFTypeUnit>> DWOTUs;
@@ -143,6 +147,9 @@ public:
     return DWOCUs[index].get();
   }
 
+  const DWARFUnitIndex &getCUIndex();
+  const DWARFUnitIndex &getTUIndex();
+
   /// Get a pointer to the parsed DebugAbbrev object.
   const DWARFDebugAbbrev *getDebugAbbrev();
 
@@ -161,6 +168,9 @@ public:
   /// Get a pointer to the parsed frame information object.
   const DWARFDebugFrame *getDebugFrame();
 
+  /// Get a pointer to the parsed DebugMacro object.
+  const DWARFDebugMacro *getDebugMacro();
+
   /// Get a pointer to a parsed line table corresponding to a compile unit.
   const DWARFDebugLine::LineTable *getLineTableForUnit(DWARFUnit *cu);
 
@@ -184,6 +194,7 @@ public:
   virtual const DWARFSection &getLineSection() = 0;
   virtual StringRef getStringSection() = 0;
   virtual StringRef getRangeSection() = 0;
+  virtual StringRef getMacinfoSection() = 0;
   virtual StringRef getPubNamesSection() = 0;
   virtual StringRef getPubTypesSection() = 0;
   virtual StringRef getGnuPubNamesSection() = 0;
@@ -203,9 +214,11 @@ public:
   virtual const DWARFSection& getAppleTypesSection() = 0;
   virtual const DWARFSection& getAppleNamespacesSection() = 0;
   virtual const DWARFSection& getAppleObjCSection() = 0;
+  virtual StringRef getCUIndexSection() = 0;
+  virtual StringRef getTUIndexSection() = 0;
 
   static bool isSupportedVersion(unsigned version) {
-    return version == 2 || version == 3 || version == 4;
+    return version == 2 || version == 3 || version == 4 || version == 5;
   }
 private:
   /// Return the compile unit that includes an offset (relative to .debug_info).
@@ -232,6 +245,7 @@ class DWARFContextInMemory : public DWARFContext {
   DWARFSection LineSection;
   StringRef StringSection;
   StringRef RangeSection;
+  StringRef MacinfoSection;
   StringRef PubNamesSection;
   StringRef PubTypesSection;
   StringRef GnuPubNamesSection;
@@ -251,6 +265,8 @@ class DWARFContextInMemory : public DWARFContext {
   DWARFSection AppleTypesSection;
   DWARFSection AppleNamespacesSection;
   DWARFSection AppleObjCSection;
+  StringRef CUIndexSection;
+  StringRef TUIndexSection;
 
   SmallVector<SmallString<32>, 4> UncompressedSections;
 
@@ -268,6 +284,7 @@ public:
   const DWARFSection &getLineSection() override { return LineSection; }
   StringRef getStringSection() override { return StringSection; }
   StringRef getRangeSection() override { return RangeSection; }
+  StringRef getMacinfoSection() override { return MacinfoSection; }
   StringRef getPubNamesSection() override { return PubNamesSection; }
   StringRef getPubTypesSection() override { return PubTypesSection; }
   StringRef getGnuPubNamesSection() override { return GnuPubNamesSection; }
@@ -293,6 +310,8 @@ public:
   StringRef getAddrSection() override {
     return AddrSection;
   }
+  StringRef getCUIndexSection() override { return CUIndexSection; }
+  StringRef getTUIndexSection() override { return TUIndexSection; }
 };
 
 }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 93e7c790ccf9..760950b726b3 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -196,7 +196,7 @@ public:
 
     // Fills the Result argument with the file and line information
     // corresponding to Address. Returns true on success.
-    bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir, 
+    bool getFileLineInfoForAddress(uint64_t Address, const char *CompDir,
                                    DILineInfoSpecifier::FileLineInfoKind Kind,
                                    DILineInfo &Result) const;
 
@@ -247,7 +247,6 @@ private:
   const RelocAddrMap *RelocMap;
   LineTableMapTy LineTableMap;
 };
-
 }
 
 #endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
new file mode 100644
index 000000000000..e17c993d275b
--- /dev/null
+++ b/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -0,0 +1,59 @@
+//===-- DWARFDebugMacro.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H
+#define LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Dwarf.h"
+
+namespace llvm {
+
+class raw_ostream;
+
+class DWARFDebugMacro {
+  /// A single macro entry within a macro list.
+  struct Entry {
+    /// The type of the macro entry.
+    uint32_t Type;
+    union {
+      /// The source line where the macro is defined.
+      uint64_t Line;
+      /// Vendor extension constant value.
+      uint64_t ExtConstant;
+    };
+
+    union {
+      /// The string (name, value) of the macro entry.
+      const char *MacroStr;
+      // An unsigned integer indicating the identity of the source file.
+      uint64_t File;
+      /// Vendor extension string.
+      const char *ExtStr;
+    };
+  };
+
+  typedef SmallVector<Entry, 4> MacroList;
+
+  /// A list of all the macro entries in the debug_macinfo section.
+  MacroList Macros;
+
+public:
+  DWARFDebugMacro() {}
+  /// Print the macro list found within the debug_macinfo section.
+  void dump(raw_ostream &OS) const;
+  /// Parse the debug_macinfo section accessible via the 'data' parameter.
+  void parse(DataExtractor data);
+};
+
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 7ddcc0d81d59..3c32a3e5b794 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -84,6 +84,9 @@ public:
                  const DWARFUnit *u) const;
   static bool skipValue(uint16_t form, DataExtractor debug_info_data,
                         uint32_t *offset_ptr, const DWARFUnit *u);
+  static bool skipValue(uint16_t form, DataExtractor debug_info_data,
+                        uint32_t *offset_ptr, uint16_t Version,
+                        uint8_t AddrSize);
 
   static ArrayRef<uint8_t> getFixedFormSizes(uint8_t AddrSize,
                                              uint16_t Version);
diff --git a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index f24e27819da2..894a88dce440 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -21,13 +21,17 @@ private:
 public:
   DWARFTypeUnit(DWARFContext &Context, const DWARFSection &Section,
                 const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-                StringRef SOS, StringRef AOS, bool LE,
-                const DWARFUnitSectionBase &UnitSection)
-      : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LE, UnitSection) {}
+                StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+                const DWARFUnitSectionBase &UnitSection,
+                const DWARFUnitIndex::Entry *Entry)
+      : DWARFUnit(Context, Section, DA, RS, SS, SOS, AOS, LS, LE, UnitSection,
+                  Entry) {}
   uint32_t getHeaderSize() const override {
     return DWARFUnit::getHeaderSize() + 12;
   }
   void dump(raw_ostream &OS);
+  static const DWARFSectionKind Section = DW_SECT_TYPES;
+
 protected:
   bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override;
 };
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 5604b93f2205..681b2aa19a79 100644
--- a/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -16,6 +16,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include <vector>
 
 namespace llvm {
@@ -39,28 +40,25 @@ public:
   virtual DWARFUnit *getUnitForOffset(uint32_t Offset) const = 0;
 
   void parse(DWARFContext &C, const DWARFSection &Section);
-  void parseDWO(DWARFContext &C, const DWARFSection &DWOSection);
+  void parseDWO(DWARFContext &C, const DWARFSection &DWOSection,
+                DWARFUnitIndex *Index = nullptr);
 
 protected:
   virtual void parseImpl(DWARFContext &Context, const DWARFSection &Section,
                          const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-                         StringRef SOS, StringRef AOS, bool isLittleEndian) = 0;
+                         StringRef SOS, StringRef AOS, StringRef LS,
+                         bool isLittleEndian) = 0;
 
   ~DWARFUnitSectionBase() = default;
 };
 
+const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
+                                        DWARFSectionKind Kind);
+
 /// Concrete instance of DWARFUnitSection, specialized for one Unit type.
 template<typename UnitType>
 class DWARFUnitSection final : public SmallVector<std::unique_ptr<UnitType>, 1>,
                                public DWARFUnitSectionBase {
-
-  struct UnitOffsetComparator {
-    bool operator()(uint32_t LHS,
-                    const std::unique_ptr<UnitType> &RHS) const {
-      return LHS < RHS->getNextUnitOffset();
-    }
-  };
-
   bool Parsed;
 
 public:
@@ -73,8 +71,11 @@ public:
   typedef llvm::iterator_range<typename UnitVector::iterator> iterator_range;
 
   UnitType *getUnitForOffset(uint32_t Offset) const override {
-    auto *CU = std::upper_bound(this->begin(), this->end(), Offset,
-                                UnitOffsetComparator());
+    auto *CU = std::upper_bound(
+        this->begin(), this->end(), Offset,
+        [](uint32_t LHS, const std::unique_ptr<UnitType> &RHS) {
+          return LHS < RHS->getNextUnitOffset();
+        });
     if (CU != this->end())
       return CU->get();
     return nullptr;
@@ -83,14 +84,16 @@ public:
 private:
   void parseImpl(DWARFContext &Context, const DWARFSection &Section,
                  const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-                 StringRef SOS, StringRef AOS, bool LE) override {
+                 StringRef SOS, StringRef AOS, StringRef LS, bool LE) override {
     if (Parsed)
       return;
+    const auto &Index = getDWARFUnitIndex(Context, UnitType::Section);
     DataExtractor Data(Section.Data, LE, 0);
     uint32_t Offset = 0;
     while (Data.isValidOffset(Offset)) {
       auto U = llvm::make_unique<UnitType>(Context, Section, DA, RS, SS, SOS,
-                                           AOS, LE, *this);
+                                           AOS, LS, LE, *this,
+                                           Index.getFromOffset(Offset));
       if (!U->extract(Data, &Offset))
         break;
       this->push_back(std::move(U));
@@ -108,6 +111,7 @@ class DWARFUnit {
   const DWARFDebugAbbrev *Abbrev;
   StringRef RangeSection;
   uint32_t RangeSectionBase;
+  StringRef LineSection;
   StringRef StringSection;
   StringRef StringOffsetSection;
   StringRef AddrOffsetSection;
@@ -134,6 +138,8 @@ class DWARFUnit {
   };
   std::unique_ptr<DWOHolder> DWO;
 
+  const DWARFUnitIndex::Entry *IndexEntry;
+
 protected:
   virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr);
   /// Size in bytes of the unit header.
@@ -142,13 +148,15 @@ protected:
 public:
   DWARFUnit(DWARFContext &Context, const DWARFSection &Section,
             const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-            StringRef SOS, StringRef AOS, bool LE,
-            const DWARFUnitSectionBase &UnitSection);
+            StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+            const DWARFUnitSectionBase &UnitSection,
+            const DWARFUnitIndex::Entry *IndexEntry = nullptr);
 
   virtual ~DWARFUnit();
 
   DWARFContext& getContext() const { return Context; }
 
+  StringRef getLineSection() const { return LineSection; }
   StringRef getStringSection() const { return StringSection; }
   StringRef getStringOffsetSection() const { return StringOffsetSection; }
   void setAddrOffsetSection(StringRef AOS, uint32_t Base) {
@@ -246,12 +254,19 @@ public:
     assert(!DieArray.empty());
     auto it = std::lower_bound(
         DieArray.begin(), DieArray.end(), Offset,
-        [=](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) {
+        [](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) {
           return LHS.getOffset() < Offset;
         });
     return it == DieArray.end() ? nullptr : &*it;
   }
 
+  uint32_t getLineTableOffset() const {
+    if (IndexEntry)
+      if (const auto *Contrib = IndexEntry->getOffset(DW_SECT_LINE))
+        return Contrib->Offset;
+    return 0;
+  }
+
 private:
   /// Size in bytes of the .debug_info data associated with this compile unit.
   size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); }
diff --git a/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
new file mode 100644
index 000000000000..a85c2f9f0a23
--- /dev/null
+++ b/include/llvm/DebugInfo/DWARF/DWARFUnitIndex.h
@@ -0,0 +1,81 @@
+//===-- DWARFUnitIndex.h --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H
+#define LLVM_LIB_DEBUGINFO_DWARFUNITINDEX_H
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+
+namespace llvm {
+
+enum DWARFSectionKind {
+  DW_SECT_INFO = 1,
+  DW_SECT_TYPES,
+  DW_SECT_ABBREV,
+  DW_SECT_LINE,
+  DW_SECT_LOC,
+  DW_SECT_STR_OFFSETS,
+  DW_SECT_MACINFO,
+  DW_SECT_MACRO,
+};
+
+class DWARFUnitIndex {
+  struct Header {
+    uint32_t Version;
+    uint32_t NumColumns;
+    uint32_t NumUnits;
+    uint32_t NumBuckets = 0;
+
+    bool parse(DataExtractor IndexData, uint32_t *OffsetPtr);
+    void dump(raw_ostream &OS) const;
+  };
+
+public:
+  class Entry {
+  public:
+    struct SectionContribution {
+      uint32_t Offset;
+      uint32_t Length;
+    };
+
+  private:
+    const DWARFUnitIndex *Index;
+    uint64_t Signature;
+    std::unique_ptr<SectionContribution[]> Contributions;
+    friend class DWARFUnitIndex;
+
+  public:
+    const SectionContribution *getOffset(DWARFSectionKind Sec) const;
+    const SectionContribution *getOffset() const;
+  };
+
+private:
+  struct Header Header;
+
+  DWARFSectionKind InfoColumnKind;
+  int InfoColumn = -1;
+  std::unique_ptr<DWARFSectionKind[]> ColumnKinds;
+  std::unique_ptr<Entry[]> Rows;
+
+  static StringRef getColumnHeader(DWARFSectionKind DS);
+  bool parseImpl(DataExtractor IndexData);
+
+public:
+  bool parse(DataExtractor IndexData);
+  DWARFUnitIndex(DWARFSectionKind InfoColumnKind)
+      : InfoColumnKind(InfoColumnKind) {}
+  void dump(raw_ostream &OS) const;
+  const Entry *getFromOffset(uint32_t Offset) const;
+};
+}
+
+#endif
diff --git a/include/llvm/DebugInfo/PDB/PDBContext.h b/include/llvm/DebugInfo/PDB/PDBContext.h
index 2bb97463f90d..9404a5922449 100644
--- a/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -32,8 +32,7 @@ class PDBContext : public DIContext {
 
 public:
   PDBContext(const object::COFFObjectFile &Object,
-             std::unique_ptr<IPDBSession> PDBSession,
-             bool RelativeAddress);
+             std::unique_ptr<IPDBSession> PDBSession);
 
   static bool classof(const DIContext *DICtx) {
     return DICtx->getKind() == CK_PDB;
diff --git a/include/llvm/DebugInfo/PDB/PDBTypes.h b/include/llvm/DebugInfo/PDB/PDBTypes.h
index 2d19e792d3d0..a932a56bb953 100644
--- a/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -11,6 +11,7 @@
 #define LLVM_DEBUGINFO_PDB_PDBTYPES_H
 
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Endian.h"
 #include <functional>
 #include <stdint.h>
 
@@ -500,6 +501,35 @@ struct Variant {
   bool operator!=(const Variant &Other) const { return !(*this == Other); }
 };
 
+namespace PDB {
+static const char Magic[] = {'M',  'i',  'c',    'r', 'o', 's',  'o',  'f',
+                             't',  ' ',  'C',    '/', 'C', '+',  '+',  ' ',
+                             'M',  'S',  'F',    ' ', '7', '.',  '0',  '0',
+                             '\r', '\n', '\x1a', 'D', 'S', '\0', '\0', '\0'};
+
+// The superblock is overlaid at the beginning of the file (offset 0).
+// It starts with a magic header and is followed by information which describes
+// the layout of the file system.
+struct SuperBlock {
+  char MagicBytes[sizeof(Magic)];
+  // The file system is split into a variable number of fixed size elements.
+  // These elements are referred to as blocks.  The size of a block may vary
+  // from system to system.
+  support::ulittle32_t BlockSize;
+  // This field's purpose is not yet known.
+  support::ulittle32_t Unknown0;
+  // This contains the number of blocks resident in the file system.  In
+  // practice, NumBlocks * BlockSize is equivalent to the size of the PDB file.
+  support::ulittle32_t NumBlocks;
+  // This contains the number of bytes which make up the directory.
+  support::ulittle32_t NumDirectoryBytes;
+  // This field's purpose is not yet known.
+  support::ulittle32_t Unknown1;
+  // This contains the block # of the block map.
+  support::ulittle32_t BlockMapAddr;
+};
+}
+
 } // namespace llvm
 
 namespace std {
@@ -513,4 +543,5 @@ template <> struct hash<llvm::PDB_SymType> {
 };
 }
 
+
 #endif
diff --git a/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
new file mode 100644
index 000000000000..0703fb14da61
--- /dev/null
+++ b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -0,0 +1,47 @@
+//===- llvm/DebugInfo/Symbolize/DIPrinter.h ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the DIPrinter class, which is responsible for printing
+// structures defined in DebugInfo/DIContext.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_DIPRINTER_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+struct DILineInfo;
+class DIInliningInfo;
+struct DIGlobal;
+
+namespace symbolize {
+
+class DIPrinter {
+  raw_ostream &OS;
+  bool PrintFunctionNames;
+  bool PrintPretty;
+  void printName(const DILineInfo &Info, bool Inlined);
+
+public:
+  DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
+            bool PrintPretty = false)
+      : OS(OS), PrintFunctionNames(PrintFunctionNames),
+        PrintPretty(PrintPretty) {}
+
+  DIPrinter &operator<<(const DILineInfo &Info);
+  DIPrinter &operator<<(const DIInliningInfo &Info);
+  DIPrinter &operator<<(const DIGlobal &Global);
+};
+}
+}
+
+#endif
+
diff --git a/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
new file mode 100644
index 000000000000..ff9cc808875d
--- /dev/null
+++ b/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h
@@ -0,0 +1,53 @@
+//===-- SymbolizableModule.h ------------------------------------ C++ -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolizableModule interface.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
+
+#include "llvm/DebugInfo/DIContext.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace object {
+class ObjectFile;
+}
+}
+
+namespace llvm {
+namespace symbolize {
+
+using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
+
+class SymbolizableModule {
+public:
+  virtual ~SymbolizableModule() {}
+  virtual DILineInfo symbolizeCode(uint64_t ModuleOffset,
+                                   FunctionNameKind FNKind,
+                                   bool UseSymbolTable) const = 0;
+  virtual DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+                                              FunctionNameKind FNKind,
+                                              bool UseSymbolTable) const = 0;
+  virtual DIGlobal symbolizeData(uint64_t ModuleOffset) const = 0;
+
+  // Return true if this is a 32-bit x86 PE COFF module.
+  virtual bool isWin32Module() const = 0;
+
+  // Returns the preferred base of the module, i.e. where the loader would place
+  // it in memory assuming there were no conflicts.
+  virtual uint64_t getModulePreferredBase() const = 0;
+};
+
+}  // namespace symbolize
+}  // namespace llvm
+
+#endif  // LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEMODULE_H
diff --git a/include/llvm/DebugInfo/Symbolize/Symbolize.h b/include/llvm/DebugInfo/Symbolize/Symbolize.h
new file mode 100644
index 000000000000..ec3ae002659c
--- /dev/null
+++ b/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -0,0 +1,105 @@
+//===-- Symbolize.h --------------------------------------------- C++ -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Header for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
+#define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H
+
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ErrorOr.h"
+#include <map>
+#include <memory>
+#include <string>
+
+namespace llvm {
+namespace symbolize {
+
+using namespace object;
+using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind;
+
+class LLVMSymbolizer {
+public:
+  struct Options {
+    FunctionNameKind PrintFunctions;
+    bool UseSymbolTable : 1;
+    bool Demangle : 1;
+    bool RelativeAddresses : 1;
+    std::string DefaultArch;
+    std::vector<std::string> DsymHints;
+    Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
+            bool UseSymbolTable = true, bool Demangle = true,
+            bool RelativeAddresses = false, std::string DefaultArch = "")
+        : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
+          Demangle(Demangle), RelativeAddresses(RelativeAddresses),
+          DefaultArch(DefaultArch) {}
+  };
+
+  LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
+  ~LLVMSymbolizer() {
+    flush();
+  }
+
+  ErrorOr<DILineInfo> symbolizeCode(const std::string &ModuleName,
+                                    uint64_t ModuleOffset);
+  ErrorOr<DIInliningInfo> symbolizeInlinedCode(const std::string &ModuleName,
+                                               uint64_t ModuleOffset);
+  ErrorOr<DIGlobal> symbolizeData(const std::string &ModuleName,
+                                  uint64_t ModuleOffset);
+  void flush();
+  static std::string DemangleName(const std::string &Name,
+                                  const SymbolizableModule *ModInfo);
+
+private:
+  // Bundles together object file with code/data and object file with
+  // corresponding debug info. These objects can be the same.
+  typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
+
+  ErrorOr<SymbolizableModule *>
+  getOrCreateModuleInfo(const std::string &ModuleName);
+  ObjectFile *lookUpDsymFile(const std::string &Path,
+                             const MachOObjectFile *ExeObj,
+                             const std::string &ArchName);
+  ObjectFile *lookUpDebuglinkObject(const std::string &Path,
+                                    const ObjectFile *Obj,
+                                    const std::string &ArchName);
+
+  /// \brief Returns pair of pointers to object and debug object.
+  ErrorOr<ObjectPair> getOrCreateObjectPair(const std::string &Path,
+                                            const std::string &ArchName);
+
+  /// \brief Return a pointer to object file at specified path, for a specified
+  /// architecture (e.g. if path refers to a Mach-O universal binary, only one
+  /// object file from it will be returned).
+  ErrorOr<ObjectFile *> getOrCreateObject(const std::string &Path,
+                                          const std::string &ArchName);
+
+  std::map<std::string, ErrorOr<std::unique_ptr<SymbolizableModule>>> Modules;
+
+  /// \brief Contains cached results of getOrCreateObjectPair().
+  std::map<std::pair<std::string, std::string>, ErrorOr<ObjectPair>>
+      ObjectPairForPathArch;
+
+  /// \brief Contains parsed binary for each path, or parsing error.
+  std::map<std::string, ErrorOr<OwningBinary<Binary>>> BinaryForPath;
+
+  /// \brief Parsed object file for path/architecture pair, where "path" refers
+  /// to Mach-O universal binary.
+  std::map<std::pair<std::string, std::string>, ErrorOr<std::unique_ptr<ObjectFile>>>
+      ObjectForUBPathAndArch;
+
+  Options Opts;
+};
+
+} // namespace symbolize
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 821c0181ce83..a7302602dcd8 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -104,7 +104,12 @@ class ExecutionEngine {
   ExecutionEngineState EEState;
 
   /// The target data for the platform for which execution is being performed.
-  const DataLayout *DL;
+  ///
+  /// Note: the DataLayout is LLVMContext specific because it has an
+  /// internal cache based on type pointers. It makes unsafe to reuse the
+  /// ExecutionEngine across context, we don't enforce this rule but undefined
+  /// behavior can occurs if the user tries to do it.
+  const DataLayout DL;
 
   /// Whether lazy JIT compilation is enabled.
   bool CompilingLazily;
@@ -126,8 +131,6 @@ protected:
   /// optimize for the case where there is only one module.
   SmallVector<std::unique_ptr<Module>, 1> Modules;
 
-  void setDataLayout(const DataLayout *Val) { DL = Val; }
-
   /// getMemoryforGV - Allocate memory for a global variable.
   virtual char *getMemoryForGV(const GlobalVariable *GV);
 
@@ -194,7 +197,7 @@ public:
 
   //===--------------------------------------------------------------------===//
 
-  const DataLayout *getDataLayout() const { return DL; }
+  const DataLayout &getDataLayout() const { return DL; }
 
   /// removeModule - Remove a Module from the list of modules.  Returns true if
   /// M is found.
@@ -478,7 +481,8 @@ public:
   }
 
 protected:
-  ExecutionEngine() {}
+  ExecutionEngine(const DataLayout DL) : DL(std::move(DL)){}
+  explicit ExecutionEngine(DataLayout DL, std::unique_ptr<Module> M);
   explicit ExecutionEngine(std::unique_ptr<Module> M);
 
   void emitGlobals();
@@ -488,6 +492,9 @@ protected:
   GenericValue getConstantValue(const Constant *C);
   void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr,
                            Type *Ty);
+
+private:
+  void Init(std::unique_ptr<Module> M);
 };
 
 namespace EngineKind {
diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h
index f49d0c487fe9..a14707840ad8 100644
--- a/include/llvm/ExecutionEngine/Interpreter.h
+++ b/include/llvm/ExecutionEngine/Interpreter.h
@@ -16,22 +16,12 @@
 #define LLVM_EXECUTIONENGINE_INTERPRETER_H
 
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include <cstdlib>
 
 extern "C" void LLVMLinkInInterpreter();
 
 namespace {
   struct ForceInterpreterLinking {
-    ForceInterpreterLinking() {
-      // We must reference the interpreter in such a way that compilers will not
-      // delete it all as dead code, even with whole program optimization,
-      // yet is effectively a NO-OP. As the compiler isn't smart enough
-      // to know that getenv() never returns -1, this will do the job.
-      if (std::getenv("bar") != (char*) -1)
-        return;
-
-      LLVMLinkInInterpreter();
-    }
+    ForceInterpreterLinking() { LLVMLinkInInterpreter(); }
   } ForceInterpreterLinking;
 }
 
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 9694b80d1928..7dab5d1bc67f 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -22,6 +22,7 @@
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <list>
+#include <memory>
 #include <set>
 
 #include "llvm/Support/Debug.h"
@@ -36,56 +37,89 @@ namespace orc {
 /// added to the layer below. When a stub is called it triggers the extraction
 /// of the function body from the original module. The extracted body is then
 /// compiled and executed.
-template <typename BaseLayerT, typename CompileCallbackMgrT,
-          typename PartitioningFtor =
-            std::function<std::set<Function*>(Function&)>>
+template <typename BaseLayerT,
+          typename CompileCallbackMgrT = JITCompileCallbackManager,
+          typename IndirectStubsMgrT = IndirectStubsManager>
 class CompileOnDemandLayer {
 private:
 
-  // Utility class for MapValue. Only materializes declarations for global
-  // variables.
-  class GlobalDeclMaterializer : public ValueMaterializer {
+  template <typename MaterializerFtor>
+  class LambdaMaterializer final : public ValueMaterializer {
   public:
-    typedef std::set<const Function*> StubSet;
+    LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {}
+    Value *materializeDeclFor(Value *V) final { return M(V); }
 
-    GlobalDeclMaterializer(Module &Dst, const StubSet *StubsToClone = nullptr)
-        : Dst(Dst), StubsToClone(StubsToClone) {}
-
-    Value* materializeValueFor(Value *V) final {
-      if (auto *GV = dyn_cast<GlobalVariable>(V))
-        return cloneGlobalVariableDecl(Dst, *GV);
-      else if (auto *F = dyn_cast<Function>(V)) {
-        auto *ClonedF = cloneFunctionDecl(Dst, *F);
-        if (StubsToClone && StubsToClone->count(F)) {
-          GlobalVariable *FnBodyPtr =
-            createImplPointer(*ClonedF->getType(), *ClonedF->getParent(),
-                              ClonedF->getName() + "$orc_addr", nullptr);
-          makeStub(*ClonedF, *FnBodyPtr);
-          ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
-          ClonedF->addFnAttr(Attribute::AlwaysInline);
-        }
-        return ClonedF;
-      }
-      // Else.
-      return nullptr;
-    }
   private:
-    Module &Dst;
-    const StubSet *StubsToClone;
+    MaterializerFtor M;
   };
 
+  template <typename MaterializerFtor>
+  LambdaMaterializer<MaterializerFtor>
+  createLambdaMaterializer(MaterializerFtor M) {
+    return LambdaMaterializer<MaterializerFtor>(std::move(M));
+  }
+
   typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
 
-  struct LogicalModuleResources {
-    std::shared_ptr<Module> SourceModule;
-    std::set<const Function*> StubsToClone;
+  class ModuleOwner {
+  public:
+    ModuleOwner() = default;
+    ModuleOwner(const ModuleOwner&) = delete;
+    ModuleOwner& operator=(const ModuleOwner&) = delete;
+    virtual ~ModuleOwner() { }
+    virtual Module& getModule() const = 0;
   };
 
+  template <typename ModulePtrT>
+  class ModuleOwnerImpl : public ModuleOwner {
+  public:
+    ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {}
+    Module& getModule() const override { return *ModulePtr; }
+  private:
+    ModulePtrT ModulePtr;
+  };
+
+  template <typename ModulePtrT>
+  std::unique_ptr<ModuleOwner> wrapOwnership(ModulePtrT ModulePtr) {
+    return llvm::make_unique<ModuleOwnerImpl<ModulePtrT>>(std::move(ModulePtr));
+  }
+
+  struct LogicalModuleResources {
+    std::unique_ptr<ModuleOwner> SourceModuleOwner;
+    std::set<const Function*> StubsToClone;
+    std::unique_ptr<IndirectStubsMgrT> StubsMgr;
+
+    LogicalModuleResources() = default;
+
+    // Explicit move constructor to make MSVC happy.
+    LogicalModuleResources(LogicalModuleResources &&Other)
+        : SourceModuleOwner(std::move(Other.SourceModuleOwner)),
+          StubsToClone(std::move(Other.StubsToClone)),
+          StubsMgr(std::move(Other.StubsMgr)) {}
+
+    // Explicit move assignment to make MSVC happy.
+    LogicalModuleResources& operator=(LogicalModuleResources &&Other) {
+      SourceModuleOwner = std::move(Other.SourceModuleOwner);
+      StubsToClone = std::move(Other.StubsToClone);
+      StubsMgr = std::move(Other.StubsMgr);
+    }
+
+    JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
+      if (Name.endswith("$stub_ptr") && !ExportedSymbolsOnly) {
+        assert(!ExportedSymbolsOnly && "Stubs are never exported");
+        return StubsMgr->findPointer(Name.drop_back(9));
+      }
+      return StubsMgr->findStub(Name, ExportedSymbolsOnly);
+    }
+
+  };
+
+
+
   struct LogicalDylibResources {
     typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
       SymbolResolverFtor;
     SymbolResolverFtor ExternalSymbolResolver;
-    PartitioningFtor Partitioner;
   };
 
   typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
@@ -95,13 +129,25 @@ private:
   typedef std::list<CODLogicalDylib> LogicalDylibList;
 
 public:
+
   /// @brief Handle to a set of loaded modules.
   typedef typename LogicalDylibList::iterator ModuleSetHandleT;
 
+  /// @brief Module partitioning functor.
+  typedef std::function<std::set<Function*>(Function&)> PartitioningFtor;
+
+  /// @brief Builder for IndirectStubsManagers.
+  typedef std::function<std::unique_ptr<IndirectStubsMgrT>()>
+    IndirectStubsManagerBuilderT;
+
   /// @brief Construct a compile-on-demand layer instance.
-  CompileOnDemandLayer(BaseLayerT &BaseLayer, CompileCallbackMgrT &CallbackMgr,
-                       bool CloneStubsIntoPartitions)
-      : BaseLayer(BaseLayer), CompileCallbackMgr(CallbackMgr),
+  CompileOnDemandLayer(BaseLayerT &BaseLayer, PartitioningFtor Partition,
+                       CompileCallbackMgrT &CallbackMgr,
+                       IndirectStubsManagerBuilderT CreateIndirectStubsManager,
+                       bool CloneStubsIntoPartitions = true)
+      : BaseLayer(BaseLayer),  Partition(Partition),
+        CompileCallbackMgr(CallbackMgr),
+        CreateIndirectStubsManager(std::move(CreateIndirectStubsManager)),
         CloneStubsIntoPartitions(CloneStubsIntoPartitions) {}
 
   /// @brief Add a module to the compile-on-demand layer.
@@ -122,17 +168,9 @@ public:
         return Resolver->findSymbol(Name);
       };
 
-    LDResources.Partitioner =
-      [](Function &F) {
-        std::set<Function*> Partition;
-        Partition.insert(&F);
-        return Partition;
-      };
-
     // Process each of the modules in this module set.
     for (auto &M : Ms)
-      addLogicalModule(LogicalDylibs.back(),
-                       std::shared_ptr<Module>(std::move(M)));
+      addLogicalModule(LogicalDylibs.back(), std::move(M));
 
     return std::prev(LogicalDylibs.end());
   }
@@ -150,6 +188,10 @@ public:
   /// @param ExportedSymbolsOnly If true, search only for exported symbols.
   /// @return A handle for the given named symbol, if it exists.
   JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
+    for (auto LDI = LogicalDylibs.begin(), LDE = LogicalDylibs.end();
+         LDI != LDE; ++LDI)
+      if (auto Symbol = findSymbolIn(LDI, Name, ExportedSymbolsOnly))
+        return Symbol;
     return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
   }
 
@@ -162,85 +204,138 @@ public:
 
 private:
 
-  void addLogicalModule(CODLogicalDylib &LD, std::shared_ptr<Module> SrcM) {
+  template <typename ModulePtrT>
+  void addLogicalModule(CODLogicalDylib &LD, ModulePtrT SrcMPtr) {
 
     // Bump the linkage and rename any anonymous/privote members in SrcM to
     // ensure that everything will resolve properly after we partition SrcM.
-    makeAllSymbolsExternallyAccessible(*SrcM);
+    makeAllSymbolsExternallyAccessible(*SrcMPtr);
 
     // Create a logical module handle for SrcM within the logical dylib.
     auto LMH = LD.createLogicalModule();
     auto &LMResources =  LD.getLogicalModuleResources(LMH);
-    LMResources.SourceModule = SrcM;
 
-    // Create the GVs-and-stubs module.
-    auto GVsAndStubsM = llvm::make_unique<Module>(
-                          (SrcM->getName() + ".globals_and_stubs").str(),
-                          SrcM->getContext());
-    GVsAndStubsM->setDataLayout(SrcM->getDataLayout());
+    LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr));
+
+    Module &SrcM = LMResources.SourceModuleOwner->getModule();
+
+    // Create the GlobalValues module.
+    const DataLayout &DL = SrcM.getDataLayout();
+    auto GVsM = llvm::make_unique<Module>((SrcM.getName() + ".globals").str(),
+                                          SrcM.getContext());
+    GVsM->setDataLayout(DL);
+
+    // Create function stubs.
     ValueToValueMapTy VMap;
+    {
+      typename IndirectStubsMgrT::StubInitsMap StubInits;
+      for (auto &F : SrcM) {
+        // Skip declarations.
+        if (F.isDeclaration())
+          continue;
 
-    // Process module and create stubs.
-    // We create the stubs before copying the global variables as we know the
-    // stubs won't refer to any globals (they only refer to their implementation
-    // pointer) so there's no ordering/value-mapping issues.
-    for (auto &F : *SrcM) {
+        // Record all functions defined by this module.
+        if (CloneStubsIntoPartitions)
+          LMResources.StubsToClone.insert(&F);
 
-      // Skip declarations.
-      if (F.isDeclaration())
-        continue;
-
-      // Record all functions defined by this module.
-      if (CloneStubsIntoPartitions)
-        LMResources.StubsToClone.insert(&F);
-
-      // For each definition: create a callback, a stub, and a function body
-      // pointer. Initialize the function body pointer to point at the callback,
-      // and set the callback to compile the function body.
-      auto CCInfo = CompileCallbackMgr.getCompileCallback(SrcM->getContext());
-      Function *StubF = cloneFunctionDecl(*GVsAndStubsM, F, &VMap);
-      GlobalVariable *FnBodyPtr =
-        createImplPointer(*StubF->getType(), *StubF->getParent(),
-                          StubF->getName() + "$orc_addr",
-                          createIRTypedAddress(*StubF->getFunctionType(),
-                                               CCInfo.getAddress()));
-      makeStub(*StubF, *FnBodyPtr);
-      CCInfo.setCompileAction(
-        [this, &LD, LMH, &F]() {
+        // Create a callback, associate it with the stub for the function,
+        // and set the compile action to compile the partition containing the
+        // function.
+        auto CCInfo = CompileCallbackMgr.getCompileCallback();
+        StubInits[mangle(F.getName(), DL)] =
+          std::make_pair(CCInfo.getAddress(),
+                         JITSymbolBase::flagsFromGlobalValue(F));
+        CCInfo.setCompileAction([this, &LD, LMH, &F]() {
           return this->extractAndCompile(LD, LMH, F);
         });
+      }
+
+      LMResources.StubsMgr = CreateIndirectStubsManager();
+      auto EC = LMResources.StubsMgr->createStubs(StubInits);
+      (void)EC;
+      // FIXME: This should be propagated back to the user. Stub creation may
+      //        fail for remote JITs.
+      assert(!EC && "Error generating stubs");
     }
 
-    // Now clone the global variable declarations.
-    GlobalDeclMaterializer GDMat(*GVsAndStubsM);
-    for (auto &GV : SrcM->globals())
-      if (!GV.isDeclaration())
-        cloneGlobalVariableDecl(*GVsAndStubsM, GV, &VMap);
+    // Clone global variable decls.
+    for (auto &GV : SrcM.globals())
+      if (!GV.isDeclaration() && !VMap.count(&GV))
+        cloneGlobalVariableDecl(*GVsM, GV, &VMap);
 
-    // Then clone the initializers.
-    for (auto &GV : SrcM->globals())
-      if (!GV.isDeclaration())
-        moveGlobalVariableInitializer(GV, VMap, &GDMat);
+    // And the aliases.
+    for (auto &A : SrcM.aliases())
+      if (!VMap.count(&A))
+        cloneGlobalAliasDecl(*GVsM, A, VMap);
 
-    // Build a resolver for the stubs module and add it to the base layer.
-    auto GVsAndStubsResolver = createLambdaResolver(
-        [&LD](const std::string &Name) {
+    // Now we need to clone the GV and alias initializers.
+
+    // Initializers may refer to functions declared (but not defined) in this
+    // module. Build a materializer to clone decls on demand.
+    auto Materializer = createLambdaMaterializer(
+      [this, &GVsM, &LMResources](Value *V) -> Value* {
+        if (auto *F = dyn_cast<Function>(V)) {
+          // Decls in the original module just get cloned.
+          if (F->isDeclaration())
+            return cloneFunctionDecl(*GVsM, *F);
+
+          // Definitions in the original module (which we have emitted stubs
+          // for at this point) get turned into a constant alias to the stub
+          // instead.
+          const DataLayout &DL = GVsM->getDataLayout();
+          std::string FName = mangle(F->getName(), DL);
+          auto StubSym = LMResources.StubsMgr->findStub(FName, false);
+          unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType());
+          ConstantInt *StubAddr =
+            ConstantInt::get(GVsM->getContext(),
+                             APInt(PtrBitWidth, StubSym.getAddress()));
+          Constant *Init = ConstantExpr::getCast(Instruction::IntToPtr,
+                                                 StubAddr, F->getType());
+          return GlobalAlias::create(F->getFunctionType(),
+                                     F->getType()->getAddressSpace(),
+                                     F->getLinkage(), F->getName(),
+                                     Init, GVsM.get());
+        }
+        // else....
+        return nullptr;
+      });
+
+    // Clone the global variable initializers.
+    for (auto &GV : SrcM.globals())
+      if (!GV.isDeclaration())
+        moveGlobalVariableInitializer(GV, VMap, &Materializer);
+
+    // Clone the global alias initializers.
+    for (auto &A : SrcM.aliases()) {
+      auto *NewA = cast<GlobalAlias>(VMap[&A]);
+      assert(NewA && "Alias not cloned?");
+      Value *Init = MapValue(A.getAliasee(), VMap, RF_None, nullptr,
+                             &Materializer);
+      NewA->setAliasee(cast<Constant>(Init));
+    }
+
+    // Build a resolver for the globals module and add it to the base layer.
+    auto GVsResolver = createLambdaResolver(
+        [&LD, LMH](const std::string &Name) {
+          auto &LMResources = LD.getLogicalModuleResources(LMH);
+          if (auto Sym = LMResources.StubsMgr->findStub(Name, false))
+            return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
           return LD.getDylibResources().ExternalSymbolResolver(Name);
         },
         [](const std::string &Name) {
           return RuntimeDyld::SymbolInfo(nullptr);
         });
 
-    std::vector<std::unique_ptr<Module>> GVsAndStubsMSet;
-    GVsAndStubsMSet.push_back(std::move(GVsAndStubsM));
-    auto GVsAndStubsH =
-      BaseLayer.addModuleSet(std::move(GVsAndStubsMSet),
+    std::vector<std::unique_ptr<Module>> GVsMSet;
+    GVsMSet.push_back(std::move(GVsM));
+    auto GVsH =
+      BaseLayer.addModuleSet(std::move(GVsMSet),
                              llvm::make_unique<SectionMemoryManager>(),
-                             std::move(GVsAndStubsResolver));
-    LD.addToLogicalModule(LMH, GVsAndStubsH);
+                             std::move(GVsResolver));
+    LD.addToLogicalModule(LMH, GVsH);
   }
 
-  static std::string Mangle(StringRef Name, const DataLayout &DL) {
+  static std::string mangle(StringRef Name, const DataLayout &DL) {
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
@@ -252,42 +347,35 @@ private:
   TargetAddress extractAndCompile(CODLogicalDylib &LD,
                                   LogicalModuleHandle LMH,
                                   Function &F) {
-    Module &SrcM = *LD.getLogicalModuleResources(LMH).SourceModule;
+    auto &LMResources = LD.getLogicalModuleResources(LMH);
+    Module &SrcM = LMResources.SourceModuleOwner->getModule();
 
     // If F is a declaration we must already have compiled it.
     if (F.isDeclaration())
       return 0;
 
     // Grab the name of the function being called here.
-    std::string CalledFnName = Mangle(F.getName(), SrcM.getDataLayout());
+    std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout());
 
-    auto Partition = LD.getDylibResources().Partitioner(F);
-    auto PartitionH = emitPartition(LD, LMH, Partition);
+    auto Part = Partition(F);
+    auto PartH = emitPartition(LD, LMH, Part);
 
     TargetAddress CalledAddr = 0;
-    for (auto *SubF : Partition) {
-      std::string FName = SubF->getName();
-      auto FnBodySym =
-        BaseLayer.findSymbolIn(PartitionH, Mangle(FName, SrcM.getDataLayout()),
-                               false);
-      auto FnPtrSym =
-        BaseLayer.findSymbolIn(*LD.moduleHandlesBegin(LMH),
-                               Mangle(FName + "$orc_addr",
-                                      SrcM.getDataLayout()),
-                               false);
+    for (auto *SubF : Part) {
+      std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout());
+      auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false);
       assert(FnBodySym && "Couldn't find function body.");
-      assert(FnPtrSym && "Couldn't find function body pointer.");
 
       TargetAddress FnBodyAddr = FnBodySym.getAddress();
-      void *FnPtrAddr = reinterpret_cast<void*>(
-          static_cast<uintptr_t>(FnPtrSym.getAddress()));
 
       // If this is the function we're calling record the address so we can
       // return it from this function.
       if (SubF == &F)
         CalledAddr = FnBodyAddr;
 
-      memcpy(FnPtrAddr, &FnBodyAddr, sizeof(uintptr_t));
+      // Update the function body pointer for the stub.
+      if (auto EC = LMResources.StubsMgr->updatePointer(FnName, FnBodyAddr))
+        return 0;
     }
 
     return CalledAddr;
@@ -296,13 +384,13 @@ private:
   template <typename PartitionT>
   BaseLayerModuleSetHandleT emitPartition(CODLogicalDylib &LD,
                                           LogicalModuleHandle LMH,
-                                          const PartitionT &Partition) {
+                                          const PartitionT &Part) {
     auto &LMResources = LD.getLogicalModuleResources(LMH);
-    Module &SrcM = *LMResources.SourceModule;
+    Module &SrcM = LMResources.SourceModuleOwner->getModule();
 
     // Create the module.
     std::string NewName = SrcM.getName();
-    for (auto *F : Partition) {
+    for (auto *F : Part) {
       NewName += ".";
       NewName += F->getName();
     }
@@ -310,15 +398,51 @@ private:
     auto M = llvm::make_unique<Module>(NewName, SrcM.getContext());
     M->setDataLayout(SrcM.getDataLayout());
     ValueToValueMapTy VMap;
-    GlobalDeclMaterializer GDM(*M, &LMResources.StubsToClone);
+
+    auto Materializer = createLambdaMaterializer([this, &LMResources, &M,
+                                                  &VMap](Value *V) -> Value * {
+      if (auto *GV = dyn_cast<GlobalVariable>(V))
+        return cloneGlobalVariableDecl(*M, *GV);
+
+      if (auto *F = dyn_cast<Function>(V)) {
+        // Check whether we want to clone an available_externally definition.
+        if (!LMResources.StubsToClone.count(F))
+          return cloneFunctionDecl(*M, *F);
+
+        // Ok - we want an inlinable stub. For that to work we need a decl
+        // for the stub pointer.
+        auto *StubPtr = createImplPointer(*F->getType(), *M,
+                                          F->getName() + "$stub_ptr", nullptr);
+        auto *ClonedF = cloneFunctionDecl(*M, *F);
+        makeStub(*ClonedF, *StubPtr);
+        ClonedF->setLinkage(GlobalValue::AvailableExternallyLinkage);
+        ClonedF->addFnAttr(Attribute::AlwaysInline);
+        return ClonedF;
+      }
+
+      if (auto *A = dyn_cast<GlobalAlias>(V)) {
+        auto *Ty = A->getValueType();
+        if (Ty->isFunctionTy())
+          return Function::Create(cast<FunctionType>(Ty),
+                                  GlobalValue::ExternalLinkage, A->getName(),
+                                  M.get());
+
+        return new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+                                  nullptr, A->getName(), nullptr,
+                                  GlobalValue::NotThreadLocal,
+                                  A->getType()->getAddressSpace());
+      }
+
+      return nullptr;
+    });
 
     // Create decls in the new module.
-    for (auto *F : Partition)
+    for (auto *F : Part)
       cloneFunctionDecl(*M, *F, &VMap);
 
     // Move the function bodies.
-    for (auto *F : Partition)
-      moveFunctionBody(*F, VMap, &GDM);
+    for (auto *F : Part)
+      moveFunctionBody(*F, VMap, &Materializer);
 
     // Create memory manager and symbol resolver.
     auto MemMgr = llvm::make_unique<SectionMemoryManager>();
@@ -342,7 +466,10 @@ private:
   }
 
   BaseLayerT &BaseLayer;
+  PartitioningFtor Partition;
   CompileCallbackMgrT &CompileCallbackMgr;
+  IndirectStubsManagerBuilderT CreateIndirectStubsManager;
+
   LogicalDylibList LogicalDylibs;
   bool CloneStubsIntoPartitions;
 };
diff --git a/include/llvm/ExecutionEngine/Orc/CompileUtils.h b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
index 49a1fbadb295..1e7d211196f5 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileUtils.h
@@ -40,7 +40,6 @@ public:
     if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
       llvm_unreachable("Target does not support MC emission.");
     PM.run(M);
-    ObjStream.flush();
     std::unique_ptr<MemoryBuffer> ObjBuffer(
         new ObjectMemoryBuffer(std::move(ObjBufferSV)));
     ErrorOr<std::unique_ptr<object::ObjectFile>> Obj =
diff --git a/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
new file mode 100644
index 000000000000..9fa222c340f8
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
@@ -0,0 +1,108 @@
+//===---- GlobalMappingLayer.h - Run all IR through a functor ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Convenience layer for injecting symbols that will appear in calls to
+// findSymbol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
+#define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
+
+#include "JITSymbol.h"
+#include <map>
+
+namespace llvm {
+namespace orc {
+
+/// @brief Global mapping layer.
+///
+///   This layer overrides the findSymbol method to first search a local symbol
+/// table that the client can define. It can be used to inject new symbol
+/// mappings into the JIT. Beware, however: symbols within a single IR module or
+/// object file will still resolve locally (via RuntimeDyld's symbol table) -
+/// such internal references cannot be overriden via this layer.
+template <typename BaseLayerT>
+class GlobalMappingLayer {
+public:
+  /// @brief Handle to a set of added modules.
+  typedef typename BaseLayerT::ModuleSetHandleT ModuleSetHandleT;
+
+  /// @brief Construct an GlobalMappingLayer with the given BaseLayer
+  GlobalMappingLayer(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
+
+  /// @brief Add the given module set to the JIT.
+  /// @return A handle for the added modules.
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
+  ModuleSetHandleT addModuleSet(ModuleSetT Ms,
+                                MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
+    return BaseLayer.addModuleSet(std::move(Ms), std::move(MemMgr),
+                                  std::move(Resolver));
+  }
+
+  /// @brief Remove the module set associated with the handle H.
+  void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); }
+
+  /// @brief Manually set the address to return for the given symbol.
+  void setGlobalMapping(const std::string &Name, TargetAddress Addr) {
+    SymbolTable[Name] = Addr;
+  }
+
+  /// @brief Remove the given symbol from the global mapping.
+  void eraseGlobalMapping(const std::string &Name) {
+    SymbolTable.erase(Name);
+  }
+
+  /// @brief Search for the given named symbol.
+  ///
+  ///          This method will first search the local symbol table, returning
+  ///        any symbol found there. If the symbol is not found in the local
+  ///        table then this call will be passed through to the base layer.
+  ///
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it exists.
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    auto I = SymbolTable.find(Name);
+    if (I != SymbolTable.end())
+      return JITSymbol(I->second, JITSymbolFlags::Exported);
+    return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Get the address of the given symbol in the context of the set of
+  ///        modules represented by the handle H. This call is forwarded to the
+  ///        base layer's implementation.
+  /// @param H The handle for the module set to search in.
+  /// @param Name The name of the symbol to search for.
+  /// @param ExportedSymbolsOnly If true, search only for exported symbols.
+  /// @return A handle for the given named symbol, if it is found in the
+  ///         given module set.
+  JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+    return BaseLayer.findSymbolIn(H, Name, ExportedSymbolsOnly);
+  }
+
+  /// @brief Immediately emit and finalize the module set represented by the
+  ///        given handle.
+  /// @param H Handle for module set to emit/finalize.
+  void emitAndFinalize(ModuleSetHandleT H) {
+    BaseLayer.emitAndFinalize(H);
+  }
+
+private:
+  BaseLayerT &BaseLayer;
+  std::map<std::string, TargetAddress> SymbolTable;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
diff --git a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index 637902200786..e4bed95fdabf 100644
--- a/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -85,8 +85,6 @@ public:
     ModuleSetHandleT H =
       BaseLayer.addObjectSet(Objects, std::move(MemMgr), std::move(Resolver));
 
-    BaseLayer.takeOwnershipOfBuffers(H, std::move(Buffers));
-
     return H;
   }
 
diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 4b7fc5e84b9c..d6ee3a846b04 100644
--- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -27,9 +27,8 @@
 namespace llvm {
 namespace orc {
 
-/// @brief Base class for JITLayer independent aspects of
-///        JITCompileCallbackManager.
-class JITCompileCallbackManagerBase {
+/// @brief Target-independent base class for compile callback management.
+class JITCompileCallbackManager {
 public:
 
   typedef std::function<TargetAddress()> CompileFtor;
@@ -51,18 +50,13 @@ public:
     CompileFtor &Compile;
   };
 
-  /// @brief Construct a JITCompileCallbackManagerBase.
+  /// @brief Construct a JITCompileCallbackManager.
   /// @param ErrorHandlerAddress The address of an error handler in the target
   ///                            process to be used if a compile callback fails.
-  /// @param NumTrampolinesPerBlock Number of trampolines to emit if there is no
-  ///                             available trampoline when getCompileCallback is
-  ///                             called.
-  JITCompileCallbackManagerBase(TargetAddress ErrorHandlerAddress,
-                                unsigned NumTrampolinesPerBlock)
-    : ErrorHandlerAddress(ErrorHandlerAddress),
-      NumTrampolinesPerBlock(NumTrampolinesPerBlock) {}
+  JITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+    : ErrorHandlerAddress(ErrorHandlerAddress) {}
 
-  virtual ~JITCompileCallbackManagerBase() {}
+  virtual ~JITCompileCallbackManager() {}
 
   /// @brief Execute the callback for the given trampoline id. Called by the JIT
   ///        to compile functions on demand.
@@ -90,7 +84,11 @@ public:
   }
 
   /// @brief Reserve a compile callback.
-  virtual CompileCallbackInfo getCompileCallback(LLVMContext &Context) = 0;
+  CompileCallbackInfo getCompileCallback() {
+    TargetAddress TrampolineAddr = getAvailableTrampolineAddr();
+    auto &Compile = this->ActiveTrampolines[TrampolineAddr];
+    return CompileCallbackInfo(TrampolineAddr, Compile);
+  }
 
   /// @brief Get a CompileCallbackInfo for an existing callback.
   CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) {
@@ -113,77 +111,16 @@ public:
 
 protected:
   TargetAddress ErrorHandlerAddress;
-  unsigned NumTrampolinesPerBlock;
 
   typedef std::map<TargetAddress, CompileFtor> TrampolineMapT;
   TrampolineMapT ActiveTrampolines;
   std::vector<TargetAddress> AvailableTrampolines;
-};
-
-/// @brief Manage compile callbacks.
-template <typename JITLayerT, typename TargetT>
-class JITCompileCallbackManager : public JITCompileCallbackManagerBase {
-public:
-
-  /// @brief Construct a JITCompileCallbackManager.
-  /// @param JIT JIT layer to emit callback trampolines, etc. into.
-  /// @param Context LLVMContext to use for trampoline & resolve block modules.
-  /// @param ErrorHandlerAddress The address of an error handler in the target
-  ///                            process to be used if a compile callback fails.
-  /// @param NumTrampolinesPerBlock Number of trampolines to allocate whenever
-  ///                               there is no existing callback trampoline.
-  ///                               (Trampolines are allocated in blocks for
-  ///                               efficiency.)
-  JITCompileCallbackManager(JITLayerT &JIT, RuntimeDyld::MemoryManager &MemMgr,
-                            LLVMContext &Context,
-                            TargetAddress ErrorHandlerAddress,
-                            unsigned NumTrampolinesPerBlock)
-    : JITCompileCallbackManagerBase(ErrorHandlerAddress,
-                                    NumTrampolinesPerBlock),
-      JIT(JIT), MemMgr(MemMgr) {
-    emitResolverBlock(Context);
-  }
-
-  /// @brief Get/create a compile callback with the given signature.
-  CompileCallbackInfo getCompileCallback(LLVMContext &Context) final {
-    TargetAddress TrampolineAddr = getAvailableTrampolineAddr(Context);
-    auto &Compile = this->ActiveTrampolines[TrampolineAddr];
-    return CompileCallbackInfo(TrampolineAddr, Compile);
-  }
 
 private:
 
-  std::vector<std::unique_ptr<Module>>
-  SingletonSet(std::unique_ptr<Module> M) {
-    std::vector<std::unique_ptr<Module>> Ms;
-    Ms.push_back(std::move(M));
-    return Ms;
-  }
-
-  void emitResolverBlock(LLVMContext &Context) {
-    std::unique_ptr<Module> M(new Module("resolver_block_module",
-                                         Context));
-    TargetT::insertResolverBlock(*M, *this);
-    auto NonResolver =
-      createLambdaResolver(
-          [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
-            llvm_unreachable("External symbols in resolver block?");
-          },
-          [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
-            llvm_unreachable("Dylib symbols in resolver block?");
-          });
-    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
-                              std::move(NonResolver));
-    JIT.emitAndFinalize(H);
-    auto ResolverBlockSymbol =
-      JIT.findSymbolIn(H, TargetT::ResolverBlockName, false);
-    assert(ResolverBlockSymbol && "Failed to insert resolver block");
-    ResolverBlockAddr = ResolverBlockSymbol.getAddress();
-  }
-
-  TargetAddress getAvailableTrampolineAddr(LLVMContext &Context) {
+  TargetAddress getAvailableTrampolineAddr() {
     if (this->AvailableTrampolines.empty())
-      grow(Context);
+      grow();
     assert(!this->AvailableTrampolines.empty() &&
            "Failed to grow available trampolines.");
     TargetAddress TrampolineAddr = this->AvailableTrampolines.back();
@@ -191,35 +128,212 @@ private:
     return TrampolineAddr;
   }
 
-  void grow(LLVMContext &Context) {
-    assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
-    std::unique_ptr<Module> M(new Module("trampoline_block", Context));
-    auto GetLabelName =
-      TargetT::insertCompileCallbackTrampolines(*M, ResolverBlockAddr,
-                                                this->NumTrampolinesPerBlock,
-                                                this->ActiveTrampolines.size());
-    auto NonResolver =
-      createLambdaResolver(
-          [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
-            llvm_unreachable("External symbols in trampoline block?");
-          },
-          [](const std::string &Name) -> RuntimeDyld::SymbolInfo {
-            llvm_unreachable("Dylib symbols in trampoline block?");
-          });
-    auto H = JIT.addModuleSet(SingletonSet(std::move(M)), &MemMgr,
-                              std::move(NonResolver));
-    JIT.emitAndFinalize(H);
-    for (unsigned I = 0; I < this->NumTrampolinesPerBlock; ++I) {
-      std::string Name = GetLabelName(I);
-      auto TrampolineSymbol = JIT.findSymbolIn(H, Name, false);
-      assert(TrampolineSymbol && "Failed to emit trampoline.");
-      this->AvailableTrampolines.push_back(TrampolineSymbol.getAddress());
-    }
+  // Create new trampolines - to be implemented in subclasses.
+  virtual void grow() = 0;
+
+  virtual void anchor();
+};
+
+/// @brief Manage compile callbacks for in-process JITs.
+template <typename TargetT>
+class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
+public:
+
+  /// @brief Construct a InProcessJITCompileCallbackManager.
+  /// @param ErrorHandlerAddress The address of an error handler in the target
+  ///                            process to be used if a compile callback fails.
+  LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+    : JITCompileCallbackManager(ErrorHandlerAddress) {
+
+    /// Set up the resolver block.
+    std::error_code EC;
+    ResolverBlock =
+      sys::OwningMemoryBlock(
+        sys::Memory::allocateMappedMemory(TargetT::ResolverCodeSize, nullptr,
+                                          sys::Memory::MF_READ |
+                                          sys::Memory::MF_WRITE, EC));
+    assert(!EC && "Failed to allocate resolver block");
+
+    TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+                               &reenter, this);
+
+    EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
+                                          sys::Memory::MF_READ |
+                                              sys::Memory::MF_EXEC);
+    assert(!EC && "Failed to mprotect resolver block");
   }
 
-  JITLayerT &JIT;
-  RuntimeDyld::MemoryManager &MemMgr;
-  TargetAddress ResolverBlockAddr;
+private:
+
+  static TargetAddress reenter(void *CCMgr, void *TrampolineId) {
+    JITCompileCallbackManager *Mgr =
+      static_cast<JITCompileCallbackManager*>(CCMgr);
+    return Mgr->executeCompileCallback(
+             static_cast<TargetAddress>(
+               reinterpret_cast<uintptr_t>(TrampolineId)));
+  }
+
+  void grow() override {
+    assert(this->AvailableTrampolines.empty() && "Growing prematurely?");
+
+    std::error_code EC;
+    auto TrampolineBlock =
+      sys::OwningMemoryBlock(
+        sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr,
+                                          sys::Memory::MF_READ |
+                                          sys::Memory::MF_WRITE, EC));
+    assert(!EC && "Failed to allocate trampoline block");
+
+
+    unsigned NumTrampolines =
+      (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize;
+
+    uint8_t *TrampolineMem = static_cast<uint8_t*>(TrampolineBlock.base());
+    TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+                              NumTrampolines);
+
+    for (unsigned I = 0; I < NumTrampolines; ++I)
+      this->AvailableTrampolines.push_back(
+          static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(
+              TrampolineMem + (I * TargetT::TrampolineSize))));
+
+    EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
+                                          sys::Memory::MF_READ |
+                                              sys::Memory::MF_EXEC);
+    assert(!EC && "Failed to mprotect trampoline block");
+
+    TrampolineBlocks.push_back(std::move(TrampolineBlock));
+  }
+
+  sys::OwningMemoryBlock ResolverBlock;
+  std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+};
+
+/// @brief Base class for managing collections of named indirect stubs.
+class IndirectStubsManager {
+public:
+
+  /// @brief Map type for initializing the manager. See init.
+  typedef StringMap<std::pair<TargetAddress, JITSymbolFlags>> StubInitsMap;
+
+  virtual ~IndirectStubsManager() {}
+
+  /// @brief Create a single stub with the given name, target address and flags.
+  virtual std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+                                     JITSymbolFlags StubFlags) = 0;
+
+  /// @brief Create StubInits.size() stubs with the given names, target
+  ///        addresses, and flags.
+  virtual std::error_code createStubs(const StubInitsMap &StubInits) = 0;
+
+  /// @brief Find the stub with the given name. If ExportedStubsOnly is true,
+  ///        this will only return a result if the stub's flags indicate that it
+  ///        is exported.
+  virtual JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) = 0;
+
+  /// @brief Find the implementation-pointer for the stub.
+  virtual JITSymbol findPointer(StringRef Name) = 0;
+
+  /// @brief Change the value of the implementation pointer for the stub.
+  virtual std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) = 0;
+private:
+  virtual void anchor();
+};
+
+/// @brief IndirectStubsManager implementation for a concrete target, e.g.
+///        OrcX86_64. (See OrcTargetSupport.h).
+template <typename TargetT>
+class LocalIndirectStubsManager : public IndirectStubsManager {
+public:
+
+  std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+                             JITSymbolFlags StubFlags) override {
+    if (auto EC = reserveStubs(1))
+      return EC;
+
+    createStubInternal(StubName, StubAddr, StubFlags);
+
+    return std::error_code();
+  }
+
+  std::error_code createStubs(const StubInitsMap &StubInits) override {
+    if (auto EC = reserveStubs(StubInits.size()))
+      return EC;
+
+    for (auto &Entry : StubInits)
+      createStubInternal(Entry.first(), Entry.second.first,
+                         Entry.second.second);
+
+    return std::error_code();
+  }
+
+  JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+    auto I = StubIndexes.find(Name);
+    if (I == StubIndexes.end())
+      return nullptr;
+    auto Key = I->second.first;
+    void *StubAddr = IndirectStubsInfos[Key.first].getStub(Key.second);
+    assert(StubAddr && "Missing stub address");
+    auto StubTargetAddr =
+      static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(StubAddr));
+    auto StubSymbol = JITSymbol(StubTargetAddr, I->second.second);
+    if (ExportedStubsOnly && !StubSymbol.isExported())
+      return nullptr;
+    return StubSymbol;
+  }
+
+  JITSymbol findPointer(StringRef Name) override {
+    auto I = StubIndexes.find(Name);
+    if (I == StubIndexes.end())
+      return nullptr;
+    auto Key = I->second.first;
+    void *PtrAddr = IndirectStubsInfos[Key.first].getPtr(Key.second);
+    assert(PtrAddr && "Missing pointer address");
+    auto PtrTargetAddr =
+      static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(PtrAddr));
+    return JITSymbol(PtrTargetAddr, I->second.second);
+  }
+
+  std::error_code updatePointer(StringRef Name, TargetAddress NewAddr) override {
+    auto I = StubIndexes.find(Name);
+    assert(I != StubIndexes.end() && "No stub pointer for symbol");
+    auto Key = I->second.first;
+    *IndirectStubsInfos[Key.first].getPtr(Key.second) =
+      reinterpret_cast<void*>(static_cast<uintptr_t>(NewAddr));
+    return std::error_code();
+  }
+
+private:
+
+  std::error_code reserveStubs(unsigned NumStubs) {
+    if (NumStubs <= FreeStubs.size())
+      return std::error_code();
+
+    unsigned NewStubsRequired = NumStubs - FreeStubs.size();
+    unsigned NewBlockId = IndirectStubsInfos.size();
+    typename TargetT::IndirectStubsInfo ISI;
+    if (auto EC = TargetT::emitIndirectStubsBlock(ISI, NewStubsRequired,
+                                                  nullptr))
+      return EC;
+    for (unsigned I = 0; I < ISI.getNumStubs(); ++I)
+      FreeStubs.push_back(std::make_pair(NewBlockId, I));
+    IndirectStubsInfos.push_back(std::move(ISI));
+    return std::error_code();
+  }
+
+  void createStubInternal(StringRef StubName, TargetAddress InitAddr,
+                          JITSymbolFlags StubFlags) {
+    auto Key = FreeStubs.back();
+    FreeStubs.pop_back();
+    *IndirectStubsInfos[Key.first].getPtr(Key.second) =
+      reinterpret_cast<void*>(static_cast<uintptr_t>(InitAddr));
+    StubIndexes[StubName] = std::make_pair(Key, StubFlags);
+  }
+
+  std::vector<typename TargetT::IndirectStubsInfo> IndirectStubsInfos;
+  typedef std::pair<uint16_t, uint16_t> StubKey;
+  std::vector<StubKey> FreeStubs;
+  StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
 };
 
 /// @brief Build a function pointer of FunctionType with the given constant
@@ -236,7 +350,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M,
 
 /// @brief Turn a function declaration into a stub function that makes an
 ///        indirect call using the given function pointer.
-void makeStub(Function &F, GlobalVariable &ImplPointer);
+void makeStub(Function &F, Value &ImplPointer);
 
 /// @brief Raise linkage types and rename as necessary to ensure that all
 ///        symbols are accessible for other modules.
@@ -289,6 +403,10 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
                                    ValueMaterializer *Materializer = nullptr,
                                    GlobalVariable *NewGV = nullptr);
 
+/// @brief Clone
+GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
+                                  ValueToValueMapTy &VMap);
+
 } // End namespace orc.
 } // End namespace llvm.
 
diff --git a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index 93ba02b38706..a5286ff9adde 100644
--- a/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -67,10 +67,10 @@ private:
         } else
           return nullptr;
       case Emitting:
-        // Calling "emit" can trigger external symbol lookup (e.g. to check for
-        // pre-existing definitions of common-symbol), but it will never find in
-        // this module that it would not have found already, so return null from
-        // here.
+        // Calling "emit" can trigger a recursive call to 'find' (e.g. to check
+        // for pre-existing definitions of common-symbol), but any symbol in
+        // this module would already have been found internally (in the
+        // RuntimeDyld that did the lookup), so just return a nullptr here.
         return nullptr;
       case Emitted:
         return B.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
diff --git a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
index 28700ef347d6..883fa9eac560 100644
--- a/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
+++ b/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
@@ -14,6 +14,10 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
 #define LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
 
+#include "llvm/ExecutionEngine/Orc/JITSymbol.h"
+#include <string>
+#include <vector>
+
 namespace llvm {
 namespace orc {
 
@@ -28,6 +32,12 @@ private:
   typedef std::vector<BaseLayerModuleSetHandleT> BaseLayerHandleList;
 
   struct LogicalModule {
+    // Make this move-only to ensure they don't get duplicated across moves of
+    // LogicalDylib or anything like that.
+    LogicalModule(LogicalModule &&RHS)
+        : Resources(std::move(RHS.Resources)),
+          BaseLayerHandles(std::move(RHS.BaseLayerHandles)) {}
+    LogicalModule() = default;
     LogicalModuleResources Resources;
     BaseLayerHandleList BaseLayerHandles;
   };
@@ -46,6 +56,13 @@ public:
         BaseLayer.removeModuleSet(BLH);
   }
 
+  // If possible, remove this and ~LogicalDylib once the work in the dtor is
+  // moved to members (eg: self-unregistering base layer handles).
+  LogicalDylib(LogicalDylib &&RHS)
+      : BaseLayer(std::move(RHS.BaseLayer)),
+        LogicalModules(std::move(RHS.LogicalModules)),
+        DylibResources(std::move(RHS.DylibResources)) {}
+
   LogicalModuleHandle createLogicalModule() {
     LogicalModules.push_back(LogicalModule());
     return std::prev(LogicalModules.end());
@@ -69,22 +86,27 @@ public:
   }
 
   JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH,
-                                      const std::string &Name) {
+                                      const std::string &Name,
+                                      bool ExportedSymbolsOnly) {
+
+    if (auto StubSym = LMH->Resources.findSymbol(Name, ExportedSymbolsOnly))
+      return StubSym;
+
     for (auto BLH : LMH->BaseLayerHandles)
-      if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, false))
+      if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
         return Symbol;
     return nullptr;
   }
 
   JITSymbol findSymbolInternally(LogicalModuleHandle LMH,
                                  const std::string &Name) {
-    if (auto Symbol = findSymbolInLogicalModule(LMH, Name))
+    if (auto Symbol = findSymbolInLogicalModule(LMH, Name, false))
       return Symbol;
 
     for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
            LMI != LME; ++LMI) {
       if (LMI != LMH)
-        if (auto Symbol = findSymbolInLogicalModule(LMI, Name))
+        if (auto Symbol = findSymbolInLogicalModule(LMI, Name, false))
           return Symbol;
     }
 
@@ -92,11 +114,10 @@ public:
   }
 
   JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
-    for (auto &LM : LogicalModules)
-      for (auto BLH : LM.BaseLayerHandles)
-        if (auto Symbol =
-            BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
-          return Symbol;
+    for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
+         LMI != LME; ++LMI)
+      if (auto Sym = findSymbolInLogicalModule(LMI, Name, ExportedSymbolsOnly))
+        return Sym;
     return nullptr;
   }
 
@@ -106,7 +127,6 @@ protected:
   BaseLayerT BaseLayer;
   LogicalModuleList LogicalModules;
   LogicalDylibResources DylibResources;
-
 };
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index f3094dafae3c..2acfecfb94dc 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -39,9 +39,12 @@ protected:
     void operator=(const LinkedObjectSet&) = delete;
   public:
     LinkedObjectSet(RuntimeDyld::MemoryManager &MemMgr,
-                    RuntimeDyld::SymbolResolver &Resolver)
+                    RuntimeDyld::SymbolResolver &Resolver,
+                    bool ProcessAllSections)
         : RTDyld(llvm::make_unique<RuntimeDyld>(MemMgr, Resolver)),
-          State(Raw) {}
+          State(Raw) {
+      RTDyld->setProcessAllSections(ProcessAllSections);
+    }
 
     virtual ~LinkedObjectSet() {}
 
@@ -64,18 +67,9 @@ protected:
       RTDyld->mapSectionAddress(LocalAddress, TargetAddr);
     }
 
-    void takeOwnershipOfBuffer(std::unique_ptr<MemoryBuffer> B) {
-      OwnedBuffers.push_back(std::move(B));
-    }
-
   protected:
     std::unique_ptr<RuntimeDyld> RTDyld;
     enum { Raw, Finalizing, Finalized } State;
-
-    // FIXME: This ownership hack only exists because RuntimeDyldELF still
-    //        wants to be able to inspect the original object when resolving
-    //        relocations. As soon as that can be fixed this should be removed.
-    std::vector<std::unique_ptr<MemoryBuffer>> OwnedBuffers;
   };
 
   typedef std::list<std::unique_ptr<LinkedObjectSet>> LinkedObjectSetListT;
@@ -83,16 +77,6 @@ protected:
 public:
   /// @brief Handle to a set of loaded objects.
   typedef LinkedObjectSetListT::iterator ObjSetHandleT;
-
-  // Ownership hack.
-  // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without
-  //        referencing the original object.
-  template <typename OwningMBSet>
-  void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
-    for (auto &MB : MBs)
-      (*H)->takeOwnershipOfBuffer(std::move(MB));
-  }
-
 };
 
 /// @brief Default (no-op) action to perform when loading objects.
@@ -117,16 +101,16 @@ private:
   class ConcreteLinkedObjectSet : public LinkedObjectSet {
   public:
     ConcreteLinkedObjectSet(MemoryManagerPtrT MemMgr,
-                            SymbolResolverPtrT Resolver)
-      : LinkedObjectSet(*MemMgr, *Resolver), MemMgr(std::move(MemMgr)),
-        Resolver(std::move(Resolver)) { }
+                            SymbolResolverPtrT Resolver,
+                            bool ProcessAllSections)
+      : LinkedObjectSet(*MemMgr, *Resolver, ProcessAllSections),
+        MemMgr(std::move(MemMgr)), Resolver(std::move(Resolver)) { }
 
     void Finalize() override {
       State = Finalizing;
       RTDyld->resolveRelocations();
       RTDyld->registerEHFrames();
       MemMgr->finalizeMemory();
-      OwnedBuffers.clear();
       State = Finalized;
     }
 
@@ -137,9 +121,11 @@ private:
 
   template <typename MemoryManagerPtrT, typename SymbolResolverPtrT>
   std::unique_ptr<LinkedObjectSet>
-  createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver) {
+  createLinkedObjectSet(MemoryManagerPtrT MemMgr, SymbolResolverPtrT Resolver,
+                        bool ProcessAllSections) {
     typedef ConcreteLinkedObjectSet<MemoryManagerPtrT, SymbolResolverPtrT> LOS;
-    return llvm::make_unique<LOS>(std::move(MemMgr), std::move(Resolver));
+    return llvm::make_unique<LOS>(std::move(MemMgr), std::move(Resolver),
+                                  ProcessAllSections);
   }
 
 public:
@@ -158,7 +144,18 @@ public:
       NotifyLoadedFtor NotifyLoaded = NotifyLoadedFtor(),
       NotifyFinalizedFtor NotifyFinalized = NotifyFinalizedFtor())
       : NotifyLoaded(std::move(NotifyLoaded)),
-        NotifyFinalized(std::move(NotifyFinalized)) {}
+        NotifyFinalized(std::move(NotifyFinalized)),
+        ProcessAllSections(false) {}
+
+  /// @brief Set the 'ProcessAllSections' flag.
+  ///
+  /// If set to true, all sections in each object file will be allocated using
+  /// the memory manager, rather than just the sections required for execution.
+  ///
+  /// This is kludgy, and may be removed in the future.
+  void setProcessAllSections(bool ProcessAllSections) {
+    this->ProcessAllSections = ProcessAllSections;
+  }
 
   /// @brief Add a set of objects (or archives) that will be treated as a unit
   ///        for the purposes of symbol lookup and memory management.
@@ -180,7 +177,8 @@ public:
     ObjSetHandleT Handle =
       LinkedObjSetList.insert(
         LinkedObjSetList.end(),
-        createLinkedObjectSet(std::move(MemMgr), std::move(Resolver)));
+        createLinkedObjectSet(std::move(MemMgr), std::move(Resolver),
+                              ProcessAllSections));
 
     LinkedObjectSet &LOS = **Handle;
     LoadedObjInfoList LoadedObjInfos;
@@ -276,6 +274,7 @@ private:
   LinkedObjectSetListT LinkedObjSetList;
   NotifyLoadedFtor NotifyLoaded;
   NotifyFinalizedFtor NotifyFinalized;
+  bool ProcessAllSections;
 };
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index 7af662085474..f96e83ed5a1a 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -87,14 +87,6 @@ public:
     BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr);
   }
 
-  // Ownership hack.
-  // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without
-  //        referencing the original object.
-  template <typename OwningMBSet>
-  void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
-    BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs));
-  }
-
   /// @brief Access the transform functor directly.
   TransformFtor &getTransform() { return Transform; }
 
diff --git a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
index 309f5a96090e..246d3e0a9fc6 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
@@ -9,42 +9,92 @@
 //
 // Target specific code for Orc, e.g. callback assembly.
 //
+// Target classes should be part of the JIT *target* process, not the host
+// process (except where you're doing hosted JITing and the two are one and the
+// same).
+//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
 #define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
 
 #include "IndirectionUtils.h"
+#include "llvm/Support/Memory.h"
 
 namespace llvm {
 namespace orc {
 
 class OrcX86_64 {
 public:
-  static const char *ResolverBlockName;
+  static const unsigned PageSize = 4096;
+  static const unsigned PointerSize = 8;
+  static const unsigned TrampolineSize = 8;
+  static const unsigned ResolverCodeSize = 0x78;
 
-  /// @brief Insert module-level inline callback asm into module M for the
-  /// symbols managed by JITResolveCallbackHandler J.
-  static void insertResolverBlock(Module &M,
-                                  JITCompileCallbackManagerBase &JCBM);
+  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                        void *TrampolineId);
 
-  /// @brief Get a label name from the given index.
-  typedef std::function<std::string(unsigned)> LabelNameFtor;
+  /// @brief Write the resolver code into the given memory. The user is be
+  ///        responsible for allocating the memory and setting permissions.
+  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+                                void *CallbackMgr);
 
-  /// @brief Insert the requested number of trampolines into the given module.
-  /// @param M Module to insert the call block into.
-  /// @param NumCalls Number of calls to create in the call block.
-  /// @param StartIndex Optional argument specifying the index suffix to start
-  ///                   with.
-  /// @return A functor that provides the symbol name for each entry in the call
-  ///         block.
+  /// @brief Write the requsted number of trampolines into the given memory,
+  ///        which must be big enough to hold 1 pointer, plus NumTrampolines
+  ///        trampolines.
+  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+                               unsigned NumTrampolines);
+
+  /// @brief Provide information about stub blocks generated by the
+  ///        makeIndirectStubsBlock function.
+  class IndirectStubsInfo {
+    friend class OrcX86_64;
+  public:
+    const static unsigned StubSize = 8;
+    const static unsigned PtrSize = 8;
+
+    IndirectStubsInfo() : NumStubs(0) {}
+    IndirectStubsInfo(IndirectStubsInfo &&Other)
+        : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) {
+      Other.NumStubs = 0;
+    }
+    IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) {
+      NumStubs = Other.NumStubs;
+      Other.NumStubs = 0;
+      StubsMem = std::move(Other.StubsMem);
+      return *this;
+    }
+
+    /// @brief Number of stubs in this block.
+    unsigned getNumStubs() const { return NumStubs; }
+
+    /// @brief Get a pointer to the stub at the given index, which must be in
+    ///        the range 0 .. getNumStubs() - 1.
+    void* getStub(unsigned Idx) const {
+      return static_cast<uint64_t*>(StubsMem.base()) + Idx;
+    }
+
+    /// @brief Get a pointer to the implementation-pointer at the given index,
+    ///        which must be in the range 0 .. getNumStubs() - 1.
+    void** getPtr(unsigned Idx) const {
+      char *PtrsBase =
+        static_cast<char*>(StubsMem.base()) + NumStubs * StubSize;
+      return reinterpret_cast<void**>(PtrsBase) + Idx;
+    }
+  private:
+    unsigned NumStubs;
+    sys::OwningMemoryBlock StubsMem;
+  };
+
+  /// @brief Emit at least MinStubs worth of indirect call stubs, rounded out to
+  ///        the nearest page size.
   ///
-  static LabelNameFtor insertCompileCallbackTrampolines(
-                                                    Module &M,
-                                                    TargetAddress TrampolineAddr,
-                                                    unsigned NumCalls,
-                                                    unsigned StartIndex = 0);
-
+  ///   E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
+  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
+  /// will return a block of 1024 (2-pages worth).
+  static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                                unsigned MinStubs,
+                                                void *InitialPtrVal);
 };
 
 } // End namespace orc.
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index a808d9231167..385b8d0a30b1 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -17,8 +17,10 @@
 #include "JITSymbolFlags.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/DebugInfo/DIContext.h"
+#include <map>
 #include <memory>
 
 namespace llvm {
@@ -59,26 +61,33 @@ public:
   class LoadedObjectInfo : public llvm::LoadedObjectInfo {
     friend class RuntimeDyldImpl;
   public:
-    LoadedObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
-                     unsigned EndIdx)
-      : RTDyld(RTDyld), BeginIdx(BeginIdx), EndIdx(EndIdx) { }
+    typedef std::map<object::SectionRef, unsigned> ObjSectionToIDMap;
+
+    LoadedObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+      : RTDyld(RTDyld), ObjSecToIDMap(ObjSecToIDMap) { }
 
     virtual object::OwningBinary<object::ObjectFile>
     getObjectForDebug(const object::ObjectFile &Obj) const = 0;
 
-    uint64_t getSectionLoadAddress(StringRef Name) const;
+    uint64_t
+    getSectionLoadAddress(const object::SectionRef &Sec) const override;
 
   protected:
     virtual void anchor();
 
     RuntimeDyldImpl &RTDyld;
-    unsigned BeginIdx, EndIdx;
+    ObjSectionToIDMap ObjSecToIDMap;
   };
 
   template <typename Derived> struct LoadedObjectInfoHelper : LoadedObjectInfo {
-    LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
-                           unsigned EndIdx)
-        : LoadedObjectInfo(RTDyld, BeginIdx, EndIdx) {}
+  protected:
+    LoadedObjectInfoHelper(const LoadedObjectInfoHelper &) = default;
+    LoadedObjectInfoHelper() = default;
+
+  public:
+    LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld,
+                           LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap)
+        : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {}
     std::unique_ptr<llvm::LoadedObjectInfo> clone() const override {
       return llvm::make_unique<Derived>(static_cast<const Derived &>(*this));
     }
@@ -87,7 +96,7 @@ public:
   /// \brief Memory Management.
   class MemoryManager {
   public:
-    virtual ~MemoryManager() {};
+    virtual ~MemoryManager() {}
 
     /// Allocate a memory block of (at least) the given size suitable for
     /// executable code. The SectionID is a unique identifier assigned by the
@@ -149,7 +158,7 @@ public:
   /// \brief Symbol resolution.
   class SymbolResolver {
   public:
-    virtual ~SymbolResolver() {};
+    virtual ~SymbolResolver() {}
 
     /// This method returns the address of the specified function or variable.
     /// It is used to resolve symbols during module linking.
@@ -244,4 +253,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
diff --git a/include/llvm/ExecutionEngine/SectionMemoryManager.h b/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 0b0dcb021f14..7bb96eb8b71b 100644
--- a/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -83,10 +83,28 @@ public:
   virtual void invalidateInstructionCache();
 
 private:
+  struct FreeMemBlock {
+    // The actual block of free memory
+    sys::MemoryBlock Free;
+    // If there is a pending allocation from the same reservation right before
+    // this block, store it's index in PendingMem, to be able to update the
+    // pending region if part of this block is allocated, rather than having to
+    // create a new one
+    unsigned PendingPrefixIndex;
+  };
+
   struct MemoryGroup {
-      SmallVector<sys::MemoryBlock, 16> AllocatedMem;
-      SmallVector<sys::MemoryBlock, 16> FreeMem;
-      sys::MemoryBlock Near;
+    // PendingMem contains all blocks of memory (subblocks of AllocatedMem)
+    // which have not yet had their permissions applied, but have been given
+    // out to the user. FreeMem contains all block of memory, which have
+    // neither had their permissions applied, nor been given out to the user.
+    SmallVector<sys::MemoryBlock, 16> PendingMem;
+    SmallVector<FreeMemBlock, 16> FreeMem;
+
+    // All memory blocks that have been requested from the system
+    SmallVector<sys::MemoryBlock, 16> AllocatedMem;
+
+    sys::MemoryBlock Near;
   };
 
   uint8_t *allocateSection(MemoryGroup &MemGroup, uintptr_t Size,
@@ -103,4 +121,3 @@ private:
 }
 
 #endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
-
diff --git a/include/llvm/IR/Argument.h b/include/llvm/IR/Argument.h
index fc04fe71cbf0..0092f49e49ad 100644
--- a/include/llvm/IR/Argument.h
+++ b/include/llvm/IR/Argument.h
@@ -21,8 +21,7 @@
 
 namespace llvm {
 
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
+template <typename NodeTy> class SymbolTableListTraits;
 
 /// \brief LLVM Argument representation
 ///
@@ -36,7 +35,7 @@ class Argument : public Value, public ilist_node<Argument> {
   virtual void anchor();
   Function *Parent;
 
-  friend class SymbolTableListTraits<Argument, Function>;
+  friend class SymbolTableListTraits<Argument>;
   void setParent(Function *parent);
 
 public:
@@ -64,8 +63,8 @@ public:
   /// containing function, return the number of bytes known to be
   /// dereferenceable. Otherwise, zero is returned.
   uint64_t getDereferenceableBytes() const;
-  
-  /// \brief If this argument has the dereferenceable_or_null attribute on 
+
+  /// \brief If this argument has the dereferenceable_or_null attribute on
   /// it in its containing function, return the number of bytes known to be
   /// dereferenceable. Otherwise, zero is returned.
   uint64_t getDereferenceableOrNullBytes() const;
diff --git a/include/llvm/IR/AssemblyAnnotationWriter.h b/include/llvm/IR/AssemblyAnnotationWriter.h
index 19e32a2dcdcc..6e1f5c43e12e 100644
--- a/include/llvm/IR/AssemblyAnnotationWriter.h
+++ b/include/llvm/IR/AssemblyAnnotationWriter.h
@@ -27,7 +27,6 @@ class formatted_raw_ostream;
 
 class AssemblyAnnotationWriter {
 public:
-
   virtual ~AssemblyAnnotationWriter();
 
   /// emitFunctionAnnot - This may be implemented to emit a string right before
@@ -50,7 +49,7 @@ public:
 
   /// emitInstructionAnnot - This may be implemented to emit a string right
   /// before an instruction is emitted.
-  virtual void emitInstructionAnnot(const Instruction *, 
+  virtual void emitInstructionAnnot(const Instruction *,
                                     formatted_raw_ostream &) {}
 
   /// printInfoComment - This may be implemented to emit a comment to the
diff --git a/include/llvm/IR/Attributes.h b/include/llvm/IR/Attributes.h
index 4d6d7da1fa5b..0e3373165407 100644
--- a/include/llvm/IR/Attributes.h
+++ b/include/llvm/IR/Attributes.h
@@ -33,6 +33,7 @@ class AttributeSetImpl;
 class AttributeSetNode;
 class Constant;
 template<typename T> struct DenseMapInfo;
+class Function;
 class LLVMContext;
 class Type;
 
@@ -64,65 +65,15 @@ public:
   enum AttrKind {
     // IR-Level Attributes
     None,                  ///< No attributes have been set
-    Alignment,             ///< Alignment of parameter (5 bits)
-                           ///< stored as log2 of alignment with +1 bias
-                           ///< 0 means unaligned (different from align(1))
-    AlwaysInline,          ///< inline=always
-    Builtin,               ///< Callee is recognized as a builtin, despite
-                           ///< nobuiltin attribute on its declaration.
-    ByVal,                 ///< Pass structure by value
-    InAlloca,              ///< Pass structure in an alloca
-    Cold,                  ///< Marks function as being in a cold path.
-    Convergent,            ///< Can only be moved to control-equivalent blocks
-    InlineHint,            ///< Source said inlining was desirable
-    InReg,                 ///< Force argument to be passed in register
-    JumpTable,             ///< Build jump-instruction tables and replace refs.
-    MinSize,               ///< Function must be optimized for size first
-    Naked,                 ///< Naked function
-    Nest,                  ///< Nested function static chain
-    NoAlias,               ///< Considered to not alias after call
-    NoBuiltin,             ///< Callee isn't recognized as a builtin
-    NoCapture,             ///< Function creates no aliases of pointer
-    NoDuplicate,           ///< Call cannot be duplicated
-    NoImplicitFloat,       ///< Disable implicit floating point insts
-    NoInline,              ///< inline=never
-    NonLazyBind,           ///< Function is called early and/or
-                           ///< often, so lazy binding isn't worthwhile
-    NonNull,               ///< Pointer is known to be not null
-    Dereferenceable,       ///< Pointer is known to be dereferenceable
-    DereferenceableOrNull, ///< Pointer is either null or dereferenceable
-    NoRedZone,             ///< Disable redzone
-    NoReturn,              ///< Mark the function as not returning
-    NoUnwind,              ///< Function doesn't unwind stack
-    OptimizeForSize,       ///< opt_size
-    OptimizeNone,          ///< Function must not be optimized.
-    ReadNone,              ///< Function does not access memory
-    ReadOnly,              ///< Function only reads from memory
-    ArgMemOnly,            ///< Funciton can access memory only using pointers
-                           ///< based on its arguments.
-    Returned,              ///< Return value is always equal to this argument
-    ReturnsTwice,          ///< Function can return twice
-    SExt,                  ///< Sign extended before/after call
-    StackAlignment,        ///< Alignment of stack for function (3 bits)
-                           ///< stored as log2 of alignment with +1 bias 0
-                           ///< means unaligned (different from
-                           ///< alignstack=(1))
-    StackProtect,          ///< Stack protection.
-    StackProtectReq,       ///< Stack protection required.
-    StackProtectStrong,    ///< Strong Stack protection.
-    SafeStack,             ///< Safe Stack protection.
-    StructRet,             ///< Hidden pointer to structure to return
-    SanitizeAddress,       ///< AddressSanitizer is on.
-    SanitizeThread,        ///< ThreadSanitizer is on.
-    SanitizeMemory,        ///< MemorySanitizer is on.
-    UWTable,               ///< Function must be in a unwind table
-    ZExt,                  ///< Zero extended before/after call
-
+    #define GET_ATTR_ENUM
+    #include "llvm/IR/Attributes.inc"
     EndAttrKinds           ///< Sentinal value useful for loops
   };
+
 private:
   AttributeImpl *pImpl;
   Attribute(AttributeImpl *A) : pImpl(A) {}
+
 public:
   Attribute() : pImpl(nullptr) {}
 
@@ -189,11 +140,11 @@ public:
   unsigned getStackAlignment() const;
 
   /// \brief Returns the number of dereferenceable bytes from the
-  /// dereferenceable attribute (or zero if unknown).
+  /// dereferenceable attribute.
   uint64_t getDereferenceableBytes() const;
 
   /// \brief Returns the number of dereferenceable_or_null bytes from the
-  /// dereferenceable_or_null attribute (or zero if unknown).
+  /// dereferenceable_or_null attribute.
   uint64_t getDereferenceableOrNullBytes() const;
 
   /// \brief The Attribute is converted to a string of equivalent mnemonic. This
@@ -226,6 +177,7 @@ public:
     ReturnIndex = 0U,
     FunctionIndex = ~0U
   };
+
 private:
   friend class AttrBuilder;
   friend class AttributeSetImpl;
@@ -249,8 +201,8 @@ private:
                               ArrayRef<std::pair<unsigned,
                                                  AttributeSetNode*> > Attrs);
 
-
   explicit AttributeSet(AttributeSetImpl *LI) : pImpl(LI) {}
+
 public:
   AttributeSet() : pImpl(nullptr) {}
 
@@ -276,6 +228,11 @@ public:
   AttributeSet addAttribute(LLVMContext &C, unsigned Index,
                             StringRef Kind, StringRef Value) const;
 
+  /// Add an attribute to the attribute set at the given indices. Because
+  /// attribute sets are immutable, this returns a new set.
+  AttributeSet addAttribute(LLVMContext &C, ArrayRef<unsigned> Indices,
+                            Attribute A) const;
+
   /// \brief Add attributes to the attribute set at the given index. Because
   /// attribute sets are immutable, this returns a new set.
   AttributeSet addAttributes(LLVMContext &C, unsigned Index,
@@ -284,13 +241,13 @@ public:
   /// \brief Remove the specified attribute at the specified index from this
   /// attribute list. Because attribute lists are immutable, this returns the
   /// new list.
-  AttributeSet removeAttribute(LLVMContext &C, unsigned Index, 
+  AttributeSet removeAttribute(LLVMContext &C, unsigned Index,
                                Attribute::AttrKind Attr) const;
 
   /// \brief Remove the specified attributes at the specified index from this
   /// attribute list. Because attribute lists are immutable, this returns the
   /// new list.
-  AttributeSet removeAttributes(LLVMContext &C, unsigned Index, 
+  AttributeSet removeAttributes(LLVMContext &C, unsigned Index,
                                 AttributeSet Attrs) const;
 
   /// \brief Remove the specified attributes at the specified index from this
@@ -439,6 +396,7 @@ class AttrBuilder {
   uint64_t StackAlignment;
   uint64_t DerefBytes;
   uint64_t DerefOrNullBytes;
+
 public:
   AttrBuilder()
       : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
@@ -511,8 +469,8 @@ public:
   /// \brief Retrieve the stack alignment attribute, if it exists.
   uint64_t getStackAlignment() const { return StackAlignment; }
 
-  /// \brief Retrieve the number of dereferenceable bytes, if the dereferenceable
-  /// attribute exists (zero is returned otherwise).
+  /// \brief Retrieve the number of dereferenceable bytes, if the
+  /// dereferenceable attribute exists (zero is returned otherwise).
   uint64_t getDereferenceableBytes() const { return DerefBytes; }
 
   /// \brief Retrieve the number of dereferenceable_or_null bytes, if the
@@ -573,7 +531,14 @@ public:
 namespace AttributeFuncs {
 
 /// \brief Which attributes cannot be applied to a type.
-AttrBuilder typeIncompatible(const Type *Ty);
+AttrBuilder typeIncompatible(Type *Ty);
+
+/// \returns Return true if the two functions have compatible target-independent
+/// attributes for inlining purposes.
+bool areInlineCompatible(const Function &Caller, const Function &Callee);
+
+/// \brief Merge caller's and callee's attributes.
+void mergeAttributesForInlining(Function &Caller, const Function &Callee);
 
 } // end AttributeFuncs namespace
 
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
new file mode 100644
index 000000000000..797cd55427b3
--- /dev/null
+++ b/include/llvm/IR/Attributes.td
@@ -0,0 +1,192 @@
+/// Attribute base class.
+class Attr<string S> {
+  // String representation of this attribute in the IR.
+  string AttrString = S;
+}
+
+/// Enum attribute.
+class EnumAttr<string S> : Attr<S>;
+
+/// StringBool attribute.
+class StrBoolAttr<string S> : Attr<S>;
+
+/// Target-independent enum attributes.
+
+/// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias.
+/// 0 means unaligned (different from align(1)).
+def Alignment : EnumAttr<"align">;
+
+/// inline=always.
+def AlwaysInline : EnumAttr<"alwaysinline">;
+
+/// Function can access memory only using pointers based on its arguments.
+def ArgMemOnly : EnumAttr<"argmemonly">;
+
+/// Callee is recognized as a builtin, despite nobuiltin attribute on its
+/// declaration.
+def Builtin : EnumAttr<"builtin">;
+
+/// Pass structure by value.
+def ByVal : EnumAttr<"byval">;
+
+/// Marks function as being in a cold path.
+def Cold : EnumAttr<"cold">;
+
+/// Can only be moved to control-equivalent blocks.
+def Convergent : EnumAttr<"convergent">;
+
+/// Pointer is known to be dereferenceable.
+def Dereferenceable : EnumAttr<"dereferenceable">;
+
+/// Pointer is either null or dereferenceable.
+def DereferenceableOrNull : EnumAttr<"dereferenceable_or_null">;
+
+/// Function may only access memory that is inaccessible from IR.
+def InaccessibleMemOnly : EnumAttr<"inaccessiblememonly">;
+
+/// Function may only access memory that is either inaccessible from the IR,
+/// or pointed to by its pointer arguments.
+def InaccessibleMemOrArgMemOnly : EnumAttr<"inaccessiblemem_or_argmemonly">;
+
+/// Pass structure in an alloca.
+def InAlloca : EnumAttr<"inalloca">;
+
+/// Source said inlining was desirable.
+def InlineHint : EnumAttr<"inlinehint">;
+
+/// Force argument to be passed in register.
+def InReg : EnumAttr<"inreg">;
+
+/// Build jump-instruction tables and replace refs.
+def JumpTable : EnumAttr<"jumptable">;
+
+/// Function must be optimized for size first.
+def MinSize : EnumAttr<"minsize">;
+
+/// Naked function.
+def Naked : EnumAttr<"naked">;
+
+/// Nested function static chain.
+def Nest : EnumAttr<"nest">;
+
+/// Considered to not alias after call.
+def NoAlias : EnumAttr<"noalias">;
+
+/// Callee isn't recognized as a builtin.
+def NoBuiltin : EnumAttr<"nobuiltin">;
+
+/// Function creates no aliases of pointer.
+def NoCapture : EnumAttr<"nocapture">;
+
+/// Call cannot be duplicated.
+def NoDuplicate : EnumAttr<"noduplicate">;
+
+/// Disable implicit floating point insts.
+def NoImplicitFloat : EnumAttr<"noimplicitfloat">;
+
+/// inline=never.
+def NoInline : EnumAttr<"noinline">;
+
+/// Function is called early and/or often, so lazy binding isn't worthwhile.
+def NonLazyBind : EnumAttr<"nonlazybind">;
+
+/// Pointer is known to be not null.
+def NonNull : EnumAttr<"nonnull">;
+
+/// The function does not recurse.
+def NoRecurse : EnumAttr<"norecurse">;
+
+/// Disable redzone.
+def NoRedZone : EnumAttr<"noredzone">;
+
+/// Mark the function as not returning.
+def NoReturn : EnumAttr<"noreturn">;
+
+/// Function doesn't unwind stack.
+def NoUnwind : EnumAttr<"nounwind">;
+
+/// opt_size.
+def OptimizeForSize : EnumAttr<"optsize">;
+
+/// Function must not be optimized.
+def OptimizeNone : EnumAttr<"optnone">;
+
+/// Function does not access memory.
+def ReadNone : EnumAttr<"readnone">;
+
+/// Function only reads from memory.
+def ReadOnly : EnumAttr<"readonly">;
+
+/// Return value is always equal to this argument.
+def Returned : EnumAttr<"returned">;
+
+/// Function can return twice.
+def ReturnsTwice : EnumAttr<"returns_twice">;
+
+/// Safe Stack protection.
+def SafeStack : EnumAttr<"safestack">;
+
+/// Sign extended before/after call.
+def SExt : EnumAttr<"signext">;
+
+/// Alignment of stack for function (3 bits)  stored as log2 of alignment with
+/// +1 bias 0 means unaligned (different from alignstack=(1)).
+def StackAlignment : EnumAttr<"alignstack">;
+
+/// Stack protection.
+def StackProtect : EnumAttr<"ssp">;
+
+/// Stack protection required.
+def StackProtectReq : EnumAttr<"sspreq">;
+
+/// Strong Stack protection.
+def StackProtectStrong : EnumAttr<"sspstrong">;
+
+/// Hidden pointer to structure to return.
+def StructRet : EnumAttr<"sret">;
+
+/// AddressSanitizer is on.
+def SanitizeAddress : EnumAttr<"sanitize_address">;
+
+/// ThreadSanitizer is on.
+def SanitizeThread : EnumAttr<"sanitize_thread">;
+
+/// MemorySanitizer is on.
+def SanitizeMemory : EnumAttr<"sanitize_memory">;
+
+/// Function must be in a unwind table.
+def UWTable : EnumAttr<"uwtable">;
+
+/// Zero extended before/after call.
+def ZExt : EnumAttr<"zeroext">;
+
+/// Target-independent string attributes.
+def LessPreciseFPMAD : StrBoolAttr<"less-precise-fpmad">;
+def NoInfsFPMath : StrBoolAttr<"no-infs-fp-math">;
+def NoNansFPMath : StrBoolAttr<"no-nans-fp-math">;
+def UnsafeFPMath : StrBoolAttr<"unsafe-fp-math">;
+
+class CompatRule<string F> {
+  // The name of the function called to check the attribute of the caller and
+  // callee and decide whether inlining should be allowed. The function's
+  // signature must match "bool(const Function&, const Function &)", where the
+  // first parameter is the reference to the caller and the second parameter is
+  // the reference to the callee. It must return false if the attributes of the
+  // caller and callee are incompatible, and true otherwise.
+  string CompatFunc = F;
+}
+
+def : CompatRule<"isEqual<SanitizeAddressAttr>">;
+def : CompatRule<"isEqual<SanitizeThreadAttr>">;
+def : CompatRule<"isEqual<SanitizeMemoryAttr>">;
+
+class MergeRule<string F> {
+  // The name of the function called to merge the attributes of the caller and
+  // callee. The function's signature must match
+  // "void(Function&, const Function &)", where the first parameter is the
+  // reference to the caller and the second parameter is the reference to the
+  // callee.
+  string MergeFunc = F;
+}
+
+def : MergeRule<"adjustCallerSSPLevel">;
diff --git a/include/llvm/IR/BasicBlock.h b/include/llvm/IR/BasicBlock.h
index 66581bfedbe6..c6b54d308ce6 100644
--- a/include/llvm/IR/BasicBlock.h
+++ b/include/llvm/IR/BasicBlock.h
@@ -30,22 +30,9 @@ class LLVMContext;
 class BlockAddress;
 class Function;
 
-// Traits for intrusive list of basic blocks...
-template<> struct ilist_traits<BasicBlock>
-  : public SymbolTableListTraits<BasicBlock, Function> {
-
-  BasicBlock *createSentinel() const;
-  static void destroySentinel(BasicBlock*) {}
-
-  BasicBlock *provideInitialHead() const { return createSentinel(); }
-  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
-  static void noteHead(BasicBlock*, BasicBlock*) {}
-
-  static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
-  mutable ilist_half_node<BasicBlock> Sentinel;
-};
-
+template <>
+struct SymbolTableListSentinelTraits<BasicBlock>
+    : public ilist_half_embedded_sentinel_traits<BasicBlock> {};
 
 /// \brief LLVM Basic Block Representation
 ///
@@ -63,16 +50,17 @@ private:
 /// modifying a program. However, the verifier will ensure that basic blocks
 /// are "well formed".
 class BasicBlock : public Value, // Basic blocks are data objects also
-                   public ilist_node<BasicBlock> {
+                   public ilist_node_with_parent<BasicBlock, Function> {
   friend class BlockAddress;
 public:
-  typedef iplist<Instruction> InstListType;
+  typedef SymbolTableList<Instruction> InstListType;
+
 private:
   InstListType InstList;
   Function *Parent;
 
   void setParent(Function *parent);
-  friend class SymbolTableListTraits<BasicBlock, Function>;
+  friend class SymbolTableListTraits<BasicBlock>;
 
   BasicBlock(const BasicBlock &) = delete;
   void operator=(const BasicBlock &) = delete;
@@ -171,7 +159,7 @@ public:
   /// \brief Unlink 'this' from the containing function and delete it.
   ///
   // \returns an iterator pointing to the element after the erased one.
-  iplist<BasicBlock>::iterator eraseFromParent();
+  SymbolTableList<BasicBlock>::iterator eraseFromParent();
 
   /// \brief Unlink this basic block from its current function and insert it
   /// into the function that \p MovePos lives in, right before \p MovePos.
@@ -253,7 +241,7 @@ public:
         InstListType &getInstList()       { return InstList; }
 
   /// \brief Returns a pointer to a member of the instruction list.
-  static iplist<Instruction> BasicBlock::*getSublistAccess(Instruction*) {
+  static InstListType BasicBlock::*getSublistAccess(Instruction*) {
     return &BasicBlock::InstList;
   }
 
@@ -283,6 +271,8 @@ public:
   /// should be called while the predecessor still refers to this block.
   void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
 
+  bool canSplitPredecessors() const;
+
   /// \brief Split the basic block into two basic blocks at the specified
   /// instruction.
   ///
@@ -300,6 +290,9 @@ public:
   /// Also note that this doesn't preserve any passes. To split blocks while
   /// keeping loop information consistent, use the SplitBlock utility function.
   BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
+  BasicBlock *splitBasicBlock(Instruction *I, const Twine &BBName = "") {
+    return splitBasicBlock(I->getIterator(), BBName);
+  }
 
   /// \brief Returns true if there are any uses of this basic block other than
   /// direct branches, switches, etc. to it.
@@ -309,6 +302,9 @@ public:
   /// basic block \p New instead of to it.
   void replaceSuccessorsPhiUsesWith(BasicBlock *New);
 
+  /// \brief Return true if this basic block is an exception handling block.
+  bool isEHPad() const { return getFirstNonPHI()->isEHPad(); }
+
   /// \brief Return true if this basic block is a landing pad.
   ///
   /// Being a ``landing pad'' means that the basic block is the destination of
@@ -337,12 +333,6 @@ private:
   }
 };
 
-// createSentinel is used to get hold of the node that marks the end of the
-// list... (same trick used here as in ilist_traits<Instruction>)
-inline BasicBlock *ilist_traits<BasicBlock>::createSentinel() const {
-    return static_cast<BasicBlock*>(&Sentinel);
-}
-
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
diff --git a/include/llvm/IR/CFG.h b/include/llvm/IR/CFG.h
index f78220a52033..e9bf09333a23 100644
--- a/include/llvm/IR/CFG.h
+++ b/include/llvm/IR/CFG.h
@@ -107,149 +107,13 @@ inline pred_const_range predecessors(const BasicBlock *BB) {
 }
 
 //===----------------------------------------------------------------------===//
-// BasicBlock succ_iterator definition
+// BasicBlock succ_iterator helpers
 //===----------------------------------------------------------------------===//
 
-template <class Term_, class BB_>           // Successor Iterator
-class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB_,
-                                          int, BB_ *, BB_ *> {
-  typedef std::iterator<std::random_access_iterator_tag, BB_, int, BB_ *, BB_ *>
-  super;
-
-public:
-  typedef typename super::pointer pointer;
-  typedef typename super::reference reference;
-
-private:
-  Term_ Term;
-  unsigned idx;
-  typedef SuccIterator<Term_, BB_> Self;
-
-  inline bool index_is_valid(int idx) {
-    return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
-  }
-
-  /// \brief Proxy object to allow write access in operator[]
-  class SuccessorProxy {
-    Self it;
-
-  public:
-    explicit SuccessorProxy(const Self &it) : it(it) {}
-
-    SuccessorProxy(const SuccessorProxy&) = default;
-
-    SuccessorProxy &operator=(SuccessorProxy r) {
-      *this = reference(r);
-      return *this;
-    }
-
-    SuccessorProxy &operator=(reference r) {
-      it.Term->setSuccessor(it.idx, r);
-      return *this;
-    }
-
-    operator reference() const { return *it; }
-  };
-
-public:
-  explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
-  }
-  inline SuccIterator(Term_ T, bool)                       // end iterator
-    : Term(T) {
-    if (Term)
-      idx = Term->getNumSuccessors();
-    else
-      // Term == NULL happens, if a basic block is not fully constructed and
-      // consequently getTerminator() returns NULL. In this case we construct a
-      // SuccIterator which describes a basic block that has zero successors.
-      // Defining SuccIterator for incomplete and malformed CFGs is especially
-      // useful for debugging.
-      idx = 0;
-  }
-
-  /// getSuccessorIndex - This is used to interface between code that wants to
-  /// operate on terminator instructions directly.
-  unsigned getSuccessorIndex() const { return idx; }
-
-  inline bool operator==(const Self& x) const { return idx == x.idx; }
-  inline bool operator!=(const Self& x) const { return !operator==(x); }
-
-  inline reference operator*() const { return Term->getSuccessor(idx); }
-  inline pointer operator->() const { return operator*(); }
-
-  inline Self& operator++() { ++idx; return *this; } // Preincrement
-
-  inline Self operator++(int) { // Postincrement
-    Self tmp = *this; ++*this; return tmp;
-  }
-
-  inline Self& operator--() { --idx; return *this; }  // Predecrement
-  inline Self operator--(int) { // Postdecrement
-    Self tmp = *this; --*this; return tmp;
-  }
-
-  inline bool operator<(const Self& x) const {
-    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
-    return idx < x.idx;
-  }
-
-  inline bool operator<=(const Self& x) const {
-    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
-    return idx <= x.idx;
-  }
-  inline bool operator>=(const Self& x) const {
-    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
-    return idx >= x.idx;
-  }
-
-  inline bool operator>(const Self& x) const {
-    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
-    return idx > x.idx;
-  }
-
-  inline Self& operator+=(int Right) {
-    unsigned new_idx = idx + Right;
-    assert(index_is_valid(new_idx) && "Iterator index out of bound");
-    idx = new_idx;
-    return *this;
-  }
-
-  inline Self operator+(int Right) const {
-    Self tmp = *this;
-    tmp += Right;
-    return tmp;
-  }
-
-  inline Self& operator-=(int Right) {
-    return operator+=(-Right);
-  }
-
-  inline Self operator-(int Right) const {
-    return operator+(-Right);
-  }
-
-  inline int operator-(const Self& x) const {
-    assert(Term == x.Term && "Cannot work on iterators of different blocks!");
-    int distance = idx - x.idx;
-    return distance;
-  }
-
-  inline SuccessorProxy operator[](int offset) {
-   Self tmp = *this;
-   tmp += offset;
-   return SuccessorProxy(tmp);
-  }
-
-  /// Get the source BB of this iterator.
-  inline BB_ *getSource() {
-    assert(Term && "Source not available, if basic block was malformed");
-    return Term->getParent();
-  }
-};
-
-typedef SuccIterator<TerminatorInst*, BasicBlock> succ_iterator;
-typedef SuccIterator<const TerminatorInst*,
-                     const BasicBlock> succ_const_iterator;
+typedef TerminatorInst::SuccIterator<TerminatorInst *, BasicBlock>
+    succ_iterator;
+typedef TerminatorInst::SuccIterator<const TerminatorInst *, const BasicBlock>
+    succ_const_iterator;
 typedef llvm::iterator_range<succ_iterator> succ_range;
 typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
 
@@ -275,8 +139,8 @@ inline succ_const_range successors(const BasicBlock *BB) {
   return succ_const_range(succ_begin(BB), succ_end(BB));
 }
 
-
-template <typename T, typename U> struct isPodLike<SuccIterator<T, U> > {
+template <typename T, typename U>
+struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
   static const bool value = isPodLike<T>::value;
 };
 
diff --git a/include/llvm/IR/CMakeLists.txt b/include/llvm/IR/CMakeLists.txt
index dd8e04f1510c..eade87e05bc9 100644
--- a/include/llvm/IR/CMakeLists.txt
+++ b/include/llvm/IR/CMakeLists.txt
@@ -1,5 +1,6 @@
+set(LLVM_TARGET_DEFINITIONS Attributes.td)
+tablegen(LLVM Attributes.inc -gen-attrs)
+
 set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
-
 tablegen(LLVM Intrinsics.gen -gen-intrinsic)
-
 add_public_tablegen_target(intrinsics_gen)
diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h
index 2841781e8a9e..f4b8a8a5a1c9 100644
--- a/include/llvm/IR/CallSite.h
+++ b/include/llvm/IR/CallSite.h
@@ -41,6 +41,7 @@ template <typename FunTy = const Function,
           typename BBTy = const BasicBlock,
           typename ValTy = const Value,
           typename UserTy = const User,
+          typename UseTy = const Use,
           typename InstrTy = const Instruction,
           typename CallTy = const CallInst,
           typename InvokeTy = const InvokeInst,
@@ -69,6 +70,7 @@ private:
     }
     return CallSiteBase();
   }
+
 public:
   /// isCall - true if a CallInst is enclosed.
   /// Note that !isCall() does not mean it is an InvokeInst enclosed,
@@ -116,6 +118,43 @@ public:
   /// Determine whether this Use is the callee operand's Use.
   bool isCallee(const Use *U) const { return getCallee() == U; }
 
+  /// \brief Determine whether the passed iterator points to an argument
+  /// operand.
+  bool isArgOperand(Value::const_user_iterator UI) const {
+    return isArgOperand(&UI.getUse());
+  }
+
+  /// \brief Determine whether the passed use points to an argument operand.
+  bool isArgOperand(const Use *U) const {
+    assert(getInstruction() == U->getUser());
+    return arg_begin() <= U && U < arg_end();
+  }
+
+  /// \brief Determine whether the passed iterator points to a bundle operand.
+  bool isBundleOperand(Value::const_user_iterator UI) const {
+    return isBundleOperand(&UI.getUse());
+  }
+
+  /// \brief Determine whether the passed use points to a bundle operand.
+  bool isBundleOperand(const Use *U) const {
+    assert(getInstruction() == U->getUser());
+    if (!hasOperandBundles())
+      return false;
+    unsigned OperandNo = U - (*this)->op_begin();
+    return getBundleOperandsStartIndex() <= OperandNo &&
+           OperandNo < getBundleOperandsEndIndex();
+  }
+
+  /// \brief Determine whether the passed iterator points to a data operand.
+  bool isDataOperand(Value::const_user_iterator UI) const {
+    return isDataOperand(&UI.getUse());
+  }
+
+  /// \brief Determine whether the passed use points to a data operand.
+  bool isDataOperand(const Use *U) const {
+    return data_operands_begin() <= U && U < data_operands_end();
+  }
+
   ValTy *getArgument(unsigned ArgNo) const {
     assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
     return *(arg_begin() + ArgNo);
@@ -137,8 +176,7 @@ public:
   /// it.
   unsigned getArgumentNo(const Use *U) const {
     assert(getInstruction() && "Not a call or invoke instruction!");
-    assert(arg_begin() <= U && U < arg_end()
-           && "Argument # out of range!");
+    assert(isArgOperand(U) && "Argument # out of range!");
     return U - arg_begin();
   }
 
@@ -146,21 +184,55 @@ public:
   /// arguments at this call site.
   typedef IterTy arg_iterator;
 
-  /// arg_begin/arg_end - Return iterators corresponding to the actual argument
-  /// list for a call site.
-  IterTy arg_begin() const {
-    assert(getInstruction() && "Not a call or invoke instruction!");
-    // Skip non-arguments
-    return (*this)->op_begin();
-  }
-
-  IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); }
   iterator_range<IterTy> args() const {
-    return iterator_range<IterTy>(arg_begin(), arg_end());
+    return make_range(arg_begin(), arg_end());
   }
   bool arg_empty() const { return arg_end() == arg_begin(); }
   unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
 
+  /// Given a value use iterator, returns the data operand that corresponds to
+  /// it.
+  /// Iterator must actually correspond to a data operand.
+  unsigned getDataOperandNo(Value::const_user_iterator UI) const {
+    return getDataOperandNo(&UI.getUse());
+  }
+
+  /// Given a use for a data operand, get the data operand number that
+  /// corresponds to it.
+  unsigned getDataOperandNo(const Use *U) const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(isDataOperand(U) && "Data operand # out of range!");
+    return U - data_operands_begin();
+  }
+
+  /// Type of iterator to use when looping over data operands at this call site
+  /// (see below).
+  typedef IterTy data_operand_iterator;
+
+  /// data_operands_begin/data_operands_end - Return iterators iterating over
+  /// the call / invoke argument list and bundle operands.  For invokes, this is
+  /// the set of instruction operands except the invoke target and the two
+  /// successor blocks; and for calls this is the set of instruction operands
+  /// except the call target.
+
+  IterTy data_operands_begin() const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    return (*this)->op_begin();
+  }
+  IterTy data_operands_end() const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    return (*this)->op_end() - (isCall() ? 1 : 3);
+  }
+  iterator_range<IterTy> data_ops() const {
+    return make_range(data_operands_begin(), data_operands_end());
+  }
+  bool data_operands_empty() const {
+    return data_operands_end() == data_operands_begin();
+  }
+  unsigned data_operands_size() const {
+    return std::distance(data_operands_begin(), data_operands_end());
+  }
+
   /// getType - Return the type of the instruction that generated this call site
   ///
   Type *getType() const { return (*this)->getType(); }
@@ -197,11 +269,11 @@ public:
     CALLSITE_DELEGATE_GETTER(getNumArgOperands());
   }
 
-  ValTy *getArgOperand(unsigned i) const { 
+  ValTy *getArgOperand(unsigned i) const {
     CALLSITE_DELEGATE_GETTER(getArgOperand(i));
   }
 
-  bool isInlineAsm() const { 
+  bool isInlineAsm() const {
     if (isCall())
       return cast<CallInst>(getInstruction())->isInlineAsm();
     return false;
@@ -243,6 +315,17 @@ public:
     CALLSITE_DELEGATE_GETTER(paramHasAttr(i, A));
   }
 
+  /// \brief Return true if the data operand at index \p i directly or
+  /// indirectly has the attribute \p A.
+  ///
+  /// Normal call or invoke arguments have per operand attributes, as specified
+  /// in the attribute set attached to this instruction, while operand bundle
+  /// operands may have some attributes implied by the type of its containing
+  /// operand bundle.
+  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const {
+    CALLSITE_DELEGATE_GETTER(dataOperandHasImpliedAttr(i, A));
+  }
+
   /// @brief Extract the alignment for a call or parameter (0=unknown).
   uint16_t getParamAlignment(uint16_t i) const {
     CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
@@ -253,13 +336,20 @@ public:
   uint64_t getDereferenceableBytes(uint16_t i) const {
     CALLSITE_DELEGATE_GETTER(getDereferenceableBytes(i));
   }
-  
+
   /// @brief Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableOrNullBytes(uint16_t i) const {
     CALLSITE_DELEGATE_GETTER(getDereferenceableOrNullBytes(i));
   }
-  
+
+  /// @brief Determine if the parameter or return value is marked with NoAlias
+  /// attribute.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotAlias(unsigned n) const {
+    CALLSITE_DELEGATE_GETTER(doesNotAlias(n));
+  }
+
   /// \brief Return true if the call should not be treated as a call to a
   /// builtin.
   bool isNoBuiltin() const {
@@ -315,12 +405,62 @@ public:
     CALLSITE_DELEGATE_SETTER(setDoesNotThrow());
   }
 
+  unsigned getNumOperandBundles() const {
+    CALLSITE_DELEGATE_GETTER(getNumOperandBundles());
+  }
+
+  bool hasOperandBundles() const {
+    CALLSITE_DELEGATE_GETTER(hasOperandBundles());
+  }
+
+  unsigned getBundleOperandsStartIndex() const {
+    CALLSITE_DELEGATE_GETTER(getBundleOperandsStartIndex());
+  }
+
+  unsigned getBundleOperandsEndIndex() const {
+    CALLSITE_DELEGATE_GETTER(getBundleOperandsEndIndex());
+  }
+
+  unsigned getNumTotalBundleOperands() const {
+    CALLSITE_DELEGATE_GETTER(getNumTotalBundleOperands());
+  }
+
+  OperandBundleUse getOperandBundleAt(unsigned Index) const {
+    CALLSITE_DELEGATE_GETTER(getOperandBundleAt(Index));
+  }
+
+  Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
+    CALLSITE_DELEGATE_GETTER(getOperandBundle(Name));
+  }
+
+  Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
+    CALLSITE_DELEGATE_GETTER(getOperandBundle(ID));
+  }
+
+  IterTy arg_begin() const {
+    CALLSITE_DELEGATE_GETTER(arg_begin());
+  }
+
+  IterTy arg_end() const {
+    CALLSITE_DELEGATE_GETTER(arg_end());
+  }
+
 #undef CALLSITE_DELEGATE_GETTER
 #undef CALLSITE_DELEGATE_SETTER
 
-  /// @brief Determine whether this argument is not captured.
-  bool doesNotCapture(unsigned ArgNo) const {
-    return paramHasAttr(ArgNo + 1, Attribute::NoCapture);
+  void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
+    const Instruction *II = getInstruction();
+    // Since this is actually a getter that "looks like" a setter, don't use the
+    // above macros to avoid confusion.
+    if (isCall())
+      cast<CallInst>(II)->getOperandBundlesAsDefs(Defs);
+    else
+      cast<InvokeInst>(II)->getOperandBundlesAsDefs(Defs);
+  }
+
+  /// @brief Determine whether this data operand is not captured.
+  bool doesNotCapture(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::NoCapture);
   }
 
   /// @brief Determine whether this argument is passed by value.
@@ -345,13 +485,13 @@ public:
     return paramHasAttr(arg_size(), Attribute::InAlloca);
   }
 
-  bool doesNotAccessMemory(unsigned ArgNo) const {
-    return paramHasAttr(ArgNo + 1, Attribute::ReadNone);
+  bool doesNotAccessMemory(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
   }
 
-  bool onlyReadsMemory(unsigned ArgNo) const {
-    return paramHasAttr(ArgNo + 1, Attribute::ReadOnly) ||
-           paramHasAttr(ArgNo + 1, Attribute::ReadNone);
+  bool onlyReadsMemory(unsigned OpNo) const {
+    return dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadOnly) ||
+           dataOperandHasImpliedAttr(OpNo + 1, Attribute::ReadNone);
   }
 
   /// @brief Return true if the return value is known to be not null.
@@ -378,13 +518,6 @@ public:
   }
 
 private:
-  unsigned getArgumentEndOffset() const {
-    if (isCall())
-      return 1; // Skip Callee
-    else
-      return 3; // Skip BB, BB, Callee
-  }
-
   IterTy getCallee() const {
     if (isCall()) // Skip Callee
       return cast<CallInst>(getInstruction())->op_end() - 1;
@@ -393,7 +526,7 @@ private:
   }
 };
 
-class CallSite : public CallSiteBase<Function, BasicBlock, Value, User,
+class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
                                      Instruction, CallInst, InvokeInst,
                                      User::op_iterator> {
 public:
diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h
index 9872e6ec794d..bc050928266e 100644
--- a/include/llvm/IR/CallingConv.h
+++ b/include/llvm/IR/CallingConv.h
@@ -69,6 +69,12 @@ namespace CallingConv {
     // (almost) all registers.
     PreserveAll = 15,
 
+    // Swift - Calling convention for Swift.
+    Swift = 16,
+
+    // CXX_FAST_TLS - Calling convention for access functions.
+    CXX_FAST_TLS = 17,
+
     // Target - This is the start of the target-specific calling conventions,
     // e.g. fastcall and thiscall on X86.
     FirstTargetCC = 64,
@@ -144,7 +150,26 @@ namespace CallingConv {
 
     /// \brief MSVC calling convention that passes vectors and vector aggregates
     /// in SSE registers.
-    X86_VectorCall = 80
+    X86_VectorCall = 80,
+
+    /// \brief Calling convention used by HipHop Virtual Machine (HHVM) to
+    /// perform calls to and from translation cache, and for calling PHP
+    /// functions.
+    /// HHVM calling convention supports tail/sibling call elimination.
+    HHVM = 81,
+
+    /// \brief HHVM calling convention for invoking C/C++ helpers.
+    HHVM_C = 82,
+
+    /// X86_INTR - x86 hardware interrupt context. Callee may take one or two
+    /// parameters, where the 1st represents a pointer to hardware context frame
+    /// and the 2nd represents hardware error code, the presence of the later
+    /// depends on the interrupt vector taken. Valid for both 32- and 64-bit
+    /// subtargets.
+    X86_INTR = 83,
+
+    /// The highest possible calling convention ID. Must be some 2^k - 1.
+    MaxID = 1023
   };
 } // End CallingConv namespace
 
diff --git a/include/llvm/IR/Comdat.h b/include/llvm/IR/Comdat.h
index 4d4c15fb68cd..fb79e13af3a5 100644
--- a/include/llvm/IR/Comdat.h
+++ b/include/llvm/IR/Comdat.h
@@ -42,7 +42,7 @@ public:
   SelectionKind getSelectionKind() const { return SK; }
   void setSelectionKind(SelectionKind Val) { SK = Val; }
   StringRef getName() const;
-  void print(raw_ostream &OS) const;
+  void print(raw_ostream &OS, bool IsForDebug = false) const;
   void dump() const;
 
 private:
diff --git a/include/llvm/IR/Constant.h b/include/llvm/IR/Constant.h
index 019b4343a133..bb88905aa57a 100644
--- a/include/llvm/IR/Constant.h
+++ b/include/llvm/IR/Constant.h
@@ -24,18 +24,18 @@ namespace llvm {
 /// This is an important base class in LLVM. It provides the common facilities
 /// of all constant values in an LLVM program. A constant is a value that is
 /// immutable at runtime. Functions are constants because their address is
-/// immutable. Same with global variables. 
-/// 
+/// immutable. Same with global variables.
+///
 /// All constants share the capabilities provided in this class. All constants
 /// can have a null value. They can have an operand list. Constants can be
 /// simple (integer and floating point values), complex (arrays and structures),
-/// or expression based (computations yielding a constant value composed of 
+/// or expression based (computations yielding a constant value composed of
 /// only certain operators and other constant values).
-/// 
-/// Note that Constants are immutable (once created they never change) 
-/// and are fully shared by structural equivalence.  This means that two 
-/// structurally equivalent constants will always have the same address.  
-/// Constants are created on demand as needed and never deleted: thus clients 
+///
+/// Note that Constants are immutable (once created they never change)
+/// and are fully shared by structural equivalence.  This means that two
+/// structurally equivalent constants will always have the same address.
+/// Constants are created on demand as needed and never deleted: thus clients
 /// don't have to worry about the lifetime of the objects.
 /// @brief LLVM Constant Representation
 class Constant : public User {
@@ -59,7 +59,7 @@ public:
   /// getAllOnesValue.
   bool isAllOnesValue() const;
 
-  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// isNegativeZeroValue - Return true if the value is what would be returned
   /// by getZeroValueForNegation.
   bool isNegativeZeroValue() const;
 
@@ -85,29 +85,14 @@ public:
   /// isConstantUsed - Return true if the constant has users other than constant
   /// exprs and other dangling things.
   bool isConstantUsed() const;
-  
-  enum PossibleRelocationsTy {
-    NoRelocation = 0,
-    LocalRelocation = 1,
-    GlobalRelocations = 2
-  };
-  
-  /// getRelocationInfo - This method classifies the entry according to
-  /// whether or not it may generate a relocation entry.  This must be
-  /// conservative, so if it might codegen to a relocatable entry, it should say
-  /// so.  The return values are:
-  /// 
-  ///  NoRelocation: This constant pool entry is guaranteed to never have a
-  ///     relocation applied to it (because it holds a simple constant like
-  ///     '4').
-  ///  LocalRelocation: This entry has relocations, but the entries are
-  ///     guaranteed to be resolvable by the static linker, so the dynamic
-  ///     linker will never see them.
-  ///  GlobalRelocations: This entry may have arbitrary relocations.
+
+  /// This method classifies the entry according to whether or not it may
+  /// generate a relocation entry.  This must be conservative, so if it might
+  /// codegen to a relocatable entry, it should say so.
   ///
-  /// FIXME: This really should not be in VMCore.
-  PossibleRelocationsTy getRelocationInfo() const;
-  
+  /// FIXME: This really should not be in IR.
+  bool needsRelocation() const;
+
   /// getAggregateElement - For aggregates (struct/array/vector) return the
   /// constant that corresponds to the specified element if possible, or null if
   /// not.  This can return null if the element index is a ConstantExpr, or if
@@ -159,8 +144,8 @@ public:
 
   /// getIntegerValue - Return the value for an integer or pointer constant,
   /// or a vector thereof, with the given scalar value.
-  static Constant *getIntegerValue(Type* Ty, const APInt &V);
-  
+  static Constant *getIntegerValue(Type *Ty, const APInt &V);
+
   /// removeDeadConstantUsers - If there are any dead constant users dangling
   /// off of this constant, remove them.  This method is useful for clients
   /// that want to check to see if a global is unused, but don't want to deal
diff --git a/include/llvm/IR/ConstantRange.h b/include/llvm/IR/ConstantRange.h
index 9ded3ca36a70..fb596a3bf16e 100644
--- a/include/llvm/IR/ConstantRange.h
+++ b/include/llvm/IR/ConstantRange.h
@@ -82,6 +82,17 @@ public:
   static ConstantRange makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
                                                 const ConstantRange &Other);
 
+  /// Return the largest range containing all X such that "X BinOpC C" does not
+  /// wrap (overflow).
+  ///
+  /// Example:
+  ///  typedef OverflowingBinaryOperator OBO;
+  ///  makeNoWrapRegion(Add, i8 1, OBO::NoSignedWrap) == [-128, 127)
+  ///  makeNoWrapRegion(Add, i8 1, OBO::NoUnsignedWrap) == [0, -1)
+  ///  makeNoWrapRegion(Add, i8 0, OBO::NoUnsignedWrap) == Full Set
+  static ConstantRange makeNoWrapRegion(Instruction::BinaryOps BinOp,
+                                        const APInt &C, unsigned NoWrapKind);
+
   /// Return the lower value for this range.
   ///
   const APInt &getLower() const { return Lower; }
@@ -207,7 +218,7 @@ public:
   /// Make this range have the bit width given by \p BitWidth. The
   /// value is zero extended, truncated, or left alone to make it that width.
   ConstantRange zextOrTrunc(uint32_t BitWidth) const;
-  
+
   /// Make this range have the bit width given by \p BitWidth. The
   /// value is sign extended, truncated, or left alone to make it that width.
   ConstantRange sextOrTrunc(uint32_t BitWidth) const;
@@ -258,7 +269,7 @@ public:
   /// Return a new range that is the logical not of the current set.
   ///
   ConstantRange inverse() const;
-  
+
   /// Print out the bounds to a stream.
   ///
   void print(raw_ostream &OS) const;
diff --git a/include/llvm/IR/Constants.h b/include/llvm/IR/Constants.h
index 0c7a84fc8bfe..a5a20c9c5701 100644
--- a/include/llvm/IR/Constants.h
+++ b/include/llvm/IR/Constants.h
@@ -590,7 +590,7 @@ public:
   /// formed with a vector or array of the specified element type.
   /// ConstantDataArray only works with normal float and int types that are
   /// stored densely in memory, not with things like i42 or x86_f80.
-  static bool isElementTypeCompatible(const Type *Ty);
+  static bool isElementTypeCompatible(Type *Ty);
 
   /// getElementAsInteger - If this is a sequential container of integers (of
   /// any size), return the specified element in the low bits of a uint64_t.
@@ -795,7 +795,32 @@ public:
   }
 };
 
+//===----------------------------------------------------------------------===//
+/// ConstantTokenNone - a constant token which is empty
+///
+class ConstantTokenNone : public Constant {
+  void *operator new(size_t, unsigned) = delete;
+  ConstantTokenNone(const ConstantTokenNone &) = delete;
 
+  friend class Constant;
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To, Use *U);
+
+protected:
+  explicit ConstantTokenNone(LLVMContext &Context)
+      : Constant(Type::getTokenTy(Context), ConstantTokenNoneVal, nullptr, 0) {}
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) { return User::operator new(s, 0); }
+
+public:
+  /// Return the ConstantTokenNone.
+  static ConstantTokenNone *get(LLVMContext &Context);
+
+  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantTokenNoneVal;
+  }
+};
 
 /// BlockAddress - The address of a basic block.
 ///
@@ -1175,7 +1200,8 @@ public:
   /// gets constant-folded, the type changes, or the expression is otherwise
   /// canonicalized.  This parameter should almost always be \c false.
   Constant *getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
-                            bool OnlyIfReduced = false) const;
+                            bool OnlyIfReduced = false,
+                            Type *SrcTy = nullptr) const;
 
   /// getAsInstruction - Returns an Instruction which implements the same
   /// operation as this ConstantExpr. The instruction is not linked to any basic
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index aa43c02d5cd8..aeec39541154 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -158,7 +158,9 @@ namespace llvm {
 
     /// Create debugging information entry for a c++
     /// style reference or rvalue reference type.
-    DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy);
+    DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy,
+                                       uint64_t SizeInBits = 0,
+                                       uint64_t AlignInBits = 0);
 
     /// Create debugging information entry for a typedef.
     /// \param Ty          Original type.
@@ -375,15 +377,20 @@ namespace llvm {
         DIType *UnderlyingType, StringRef UniqueIdentifier = "");
 
     /// Create subroutine type.
-    /// \param File            File in which this subroutine is defined.
     /// \param ParameterTypes  An array of subroutine parameter types. This
     ///                        includes return type at 0th index.
     /// \param Flags           E.g.: LValueReference.
     ///                        These flags are used to emit dwarf attributes.
-    DISubroutineType *createSubroutineType(DIFile *File,
-                                           DITypeRefArray ParameterTypes,
+    DISubroutineType *createSubroutineType(DITypeRefArray ParameterTypes,
                                            unsigned Flags = 0);
 
+    /// Create an external type reference.
+    /// \param Tag              Dwarf TAG.
+    /// \param File             File in which the type is defined.
+    /// \param UniqueIdentifier A unique identifier for the type.
+    DICompositeType *createExternalTypeRef(unsigned Tag, DIFile *File,
+                                           StringRef UniqueIdentifier);
+
     /// Create a new DIType* with "artificial" flag set.
     DIType *createArtificialType(DIType *Ty);
 
@@ -450,26 +457,36 @@ namespace llvm {
         unsigned LineNo, DIType *Ty, bool isLocalToUnit, llvm::Constant *Val,
         MDNode *Decl = nullptr);
 
-    /// Create a new descriptor for the specified
-    /// local variable.
-    /// \param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
-    ///                    DW_TAG_arg_variable.
-    /// \param Scope       Variable scope.
-    /// \param Name        Variable name.
-    /// \param File        File where this variable is defined.
-    /// \param LineNo      Line number.
-    /// \param Ty          Variable Type
-    /// \param AlwaysPreserve Boolean. Set to true if debug info for this
-    ///                       variable should be preserved in optimized build.
-    /// \param Flags       Flags, e.g. artificial variable.
-    /// \param ArgNo       If this variable is an argument then this argument's
-    ///                    number. 1 indicates 1st argument.
-    DILocalVariable *createLocalVariable(unsigned Tag, DIScope *Scope,
-                                         StringRef Name, DIFile *File,
-                                         unsigned LineNo, DIType *Ty,
+    /// Create a new descriptor for an auto variable.  This is a local variable
+    /// that is not a subprogram parameter.
+    ///
+    /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
+    /// leads to a \a DISubprogram.
+    ///
+    /// If \c AlwaysPreserve, this variable will be referenced from its
+    /// containing subprogram, and will survive some optimizations.
+    DILocalVariable *createAutoVariable(DIScope *Scope, StringRef Name,
+                                         DIFile *File, unsigned LineNo,
+                                         DIType *Ty,
                                          bool AlwaysPreserve = false,
-                                         unsigned Flags = 0,
-                                         unsigned ArgNo = 0);
+                                         unsigned Flags = 0);
+
+    /// Create a new descriptor for a parameter variable.
+    ///
+    /// \c Scope must be a \a DILocalScope, and thus its scope chain eventually
+    /// leads to a \a DISubprogram.
+    ///
+    /// \c ArgNo is the index (starting from \c 1) of this variable in the
+    /// subprogram parameters.  \c ArgNo should not conflict with other
+    /// parameters of the same subprogram.
+    ///
+    /// If \c AlwaysPreserve, this variable will be referenced from its
+    /// containing subprogram, and will survive some optimizations.
+    DILocalVariable *createParameterVariable(DIScope *Scope, StringRef Name,
+                                             unsigned ArgNo, DIFile *File,
+                                             unsigned LineNo, DIType *Ty,
+                                             bool AlwaysPreserve = false,
+                                             unsigned Flags = 0);
 
     /// Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
@@ -499,15 +516,15 @@ namespace llvm {
     /// \param Flags         e.g. is this function prototyped or not.
     ///                      These flags are used to emit dwarf attributes.
     /// \param isOptimized   True if optimization is ON.
-    /// \param Fn            llvm::Function pointer.
-    /// \param TParam        Function template parameters.
-    DISubprogram *
-    createFunction(DIScope *Scope, StringRef Name, StringRef LinkageName,
-                   DIFile *File, unsigned LineNo, DISubroutineType *Ty,
-                   bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
-                   unsigned Flags = 0, bool isOptimized = false,
-                   Function *Fn = nullptr, MDNode *TParam = nullptr,
-                   MDNode *Decl = nullptr);
+    /// \param TParams       Function template parameters.
+    DISubprogram *createFunction(DIScope *Scope, StringRef Name,
+                                 StringRef LinkageName, DIFile *File,
+                                 unsigned LineNo, DISubroutineType *Ty,
+                                 bool isLocalToUnit, bool isDefinition,
+                                 unsigned ScopeLine, unsigned Flags = 0,
+                                 bool isOptimized = false,
+                                 DITemplateParameterArray TParams = nullptr,
+                                 DISubprogram *Decl = nullptr);
 
     /// Identical to createFunction,
     /// except that the resulting DbgNode is meant to be RAUWed.
@@ -515,18 +532,19 @@ namespace llvm {
         DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
         bool isDefinition, unsigned ScopeLine, unsigned Flags = 0,
-        bool isOptimized = false, Function *Fn = nullptr,
-        MDNode *TParam = nullptr, MDNode *Decl = nullptr);
+        bool isOptimized = false, DITemplateParameterArray TParams = nullptr,
+        DISubprogram *Decl = nullptr);
 
     /// FIXME: this is added for dragonegg. Once we update dragonegg
     /// to call resolve function, this will be removed.
-    DISubprogram *
-    createFunction(DIScopeRef Scope, StringRef Name, StringRef LinkageName,
-                   DIFile *File, unsigned LineNo, DISubroutineType *Ty,
-                   bool isLocalToUnit, bool isDefinition, unsigned ScopeLine,
-                   unsigned Flags = 0, bool isOptimized = false,
-                   Function *Fn = nullptr, MDNode *TParam = nullptr,
-                   MDNode *Decl = nullptr);
+    DISubprogram *createFunction(DIScopeRef Scope, StringRef Name,
+                                 StringRef LinkageName, DIFile *File,
+                                 unsigned LineNo, DISubroutineType *Ty,
+                                 bool isLocalToUnit, bool isDefinition,
+                                 unsigned ScopeLine, unsigned Flags = 0,
+                                 bool isOptimized = false,
+                                 DITemplateParameterArray TParams = nullptr,
+                                 DISubprogram *Decl = nullptr);
 
     /// Create a new descriptor for the specified C++ method.
     /// See comments in \a DISubprogram* for descriptions of these fields.
@@ -545,15 +563,14 @@ namespace llvm {
     /// \param Flags         e.g. is this function prototyped or not.
     ///                      This flags are used to emit dwarf attributes.
     /// \param isOptimized   True if optimization is ON.
-    /// \param Fn            llvm::Function pointer.
-    /// \param TParam        Function template parameters.
+    /// \param TParams       Function template parameters.
     DISubprogram *
     createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName,
                  DIFile *File, unsigned LineNo, DISubroutineType *Ty,
                  bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0,
                  unsigned VTableIndex = 0, DIType *VTableHolder = nullptr,
                  unsigned Flags = 0, bool isOptimized = false,
-                 Function *Fn = nullptr, MDNode *TParam = nullptr);
+                 DITemplateParameterArray TParams = nullptr);
 
     /// This creates new descriptor for a namespace with the specified
     /// parent scope.
@@ -685,7 +702,7 @@ namespace llvm {
     /// has a self-reference -- \a DIBuilder needs to track the array to
     /// resolve cycles.
     void replaceArrays(DICompositeType *&T, DINodeArray Elements,
-                       DINodeArray TParems = DINodeArray());
+                       DINodeArray TParams = DINodeArray());
 
     /// Replace a temporary node.
     ///
diff --git a/include/llvm/IR/DataLayout.h b/include/llvm/IR/DataLayout.h
index 892d6c9936c0..19a3a6661feb 100644
--- a/include/llvm/IR/DataLayout.h
+++ b/include/llvm/IR/DataLayout.h
@@ -475,7 +475,8 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
 class StructLayout {
   uint64_t StructSize;
   unsigned StructAlignment;
-  unsigned NumElements;
+  bool IsPadded : 1;
+  unsigned NumElements : 31;
   uint64_t MemberOffsets[1]; // variable sized array!
 public:
   uint64_t getSizeInBytes() const { return StructSize; }
@@ -484,6 +485,10 @@ public:
 
   unsigned getAlignment() const { return StructAlignment; }
 
+  /// Returns whether the struct has padding or not between its fields.
+  /// NB: Padding in nested element is not taken into account.
+  bool hasPadding() const { return IsPadded; }
+
   /// \brief Given a valid byte offset into the structure, returns the structure
   /// index that contains it.
   unsigned getElementContainingOffset(uint64_t Offset) const;
diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h
index 5429648ade2c..4caceacbb58e 100644
--- a/include/llvm/IR/DebugInfo.h
+++ b/include/llvm/IR/DebugInfo.h
@@ -44,9 +44,6 @@ DISubprogram *getDISubprogram(const MDNode *Scope);
 /// \returns a valid subprogram, if found. Otherwise, return \c nullptr.
 DISubprogram *getDISubprogram(const Function *F);
 
-/// \brief Find underlying composite type.
-DICompositeTypeBase *getDICompositeType(DIType *T);
-
 /// \brief Generate map by visiting all retained types.
 DITypeIdentifierMap generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes);
 
@@ -108,23 +105,23 @@ public:
   typedef SmallVectorImpl<DIScope *>::const_iterator scope_iterator;
 
   iterator_range<compile_unit_iterator> compile_units() const {
-    return iterator_range<compile_unit_iterator>(CUs.begin(), CUs.end());
+    return make_range(CUs.begin(), CUs.end());
   }
 
   iterator_range<subprogram_iterator> subprograms() const {
-    return iterator_range<subprogram_iterator>(SPs.begin(), SPs.end());
+    return make_range(SPs.begin(), SPs.end());
   }
 
   iterator_range<global_variable_iterator> global_variables() const {
-    return iterator_range<global_variable_iterator>(GVs.begin(), GVs.end());
+    return make_range(GVs.begin(), GVs.end());
   }
 
   iterator_range<type_iterator> types() const {
-    return iterator_range<type_iterator>(TYs.begin(), TYs.end());
+    return make_range(TYs.begin(), TYs.end());
   }
 
   iterator_range<scope_iterator> scopes() const {
-    return iterator_range<scope_iterator>(Scopes.begin(), Scopes.end());
+    return make_range(Scopes.begin(), Scopes.end());
   }
 
   unsigned compile_unit_count() const { return CUs.size(); }
@@ -146,8 +143,6 @@ private:
   bool TypeMapInitialized;
 };
 
-DenseMap<const Function *, DISubprogram *> makeSubprogramMap(const Module &M);
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/IR/DebugInfoFlags.def b/include/llvm/IR/DebugInfoFlags.def
index d5de8683fd3b..9756c12264b4 100644
--- a/include/llvm/IR/DebugInfoFlags.def
+++ b/include/llvm/IR/DebugInfoFlags.def
@@ -32,5 +32,6 @@ HANDLE_DI_FLAG((1 << 11), Vector)
 HANDLE_DI_FLAG((1 << 12), StaticMember)
 HANDLE_DI_FLAG((1 << 13), LValueReference)
 HANDLE_DI_FLAG((1 << 14), RValueReference)
+HANDLE_DI_FLAG((1 << 15), ExternalTypeRef)
 
 #undef HANDLE_DI_FLAG
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 9c5a95721d79..456313a70e83 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -20,15 +20,7 @@
 // Helper macros for defining get() overrides.
 #define DEFINE_MDNODE_GET_UNPACK_IMPL(...) __VA_ARGS__
 #define DEFINE_MDNODE_GET_UNPACK(ARGS) DEFINE_MDNODE_GET_UNPACK_IMPL ARGS
-#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS)                                 \
-  static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) {  \
-    return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued);          \
-  }                                                                            \
-  static CLASS *getIfExists(LLVMContext &Context,                              \
-                            DEFINE_MDNODE_GET_UNPACK(FORMAL)) {                \
-    return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued,           \
-                   /* ShouldCreate */ false);                                  \
-  }                                                                            \
+#define DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS)              \
   static CLASS *getDistinct(LLVMContext &Context,                              \
                             DEFINE_MDNODE_GET_UNPACK(FORMAL)) {                \
     return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Distinct);         \
@@ -38,6 +30,16 @@
     return Temp##CLASS(                                                        \
         getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Temporary));          \
   }
+#define DEFINE_MDNODE_GET(CLASS, FORMAL, ARGS)                                 \
+  static CLASS *get(LLVMContext &Context, DEFINE_MDNODE_GET_UNPACK(FORMAL)) {  \
+    return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued);          \
+  }                                                                            \
+  static CLASS *getIfExists(LLVMContext &Context,                              \
+                            DEFINE_MDNODE_GET_UNPACK(FORMAL)) {                \
+    return getImpl(Context, DEFINE_MDNODE_GET_UNPACK(ARGS), Uniqued,           \
+                   /* ShouldCreate */ false);                                  \
+  }                                                                            \
+  DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(CLASS, FORMAL, ARGS)
 
 namespace llvm {
 
@@ -67,8 +69,8 @@ public:
 
   operator Metadata *() const { return const_cast<Metadata *>(MD); }
 
-  bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; };
-  bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; };
+  bool operator==(const TypedDINodeRef<T> &X) const { return MD == X.MD; }
+  bool operator!=(const TypedDINodeRef<T> &X) const { return MD != X.MD; }
 
   /// \brief Create a reference.
   ///
@@ -97,6 +99,7 @@ class DITypeRefArray {
   const MDTuple *N = nullptr;
 
 public:
+  DITypeRefArray() = default;
   DITypeRefArray(const MDTuple *N) : N(N) {}
 
   explicit operator bool() const { return get(); }
@@ -574,6 +577,7 @@ public:
   bool isStaticMember() const { return getFlags() & FlagStaticMember; }
   bool isLValueReference() const { return getFlags() & FlagLValueReference; }
   bool isRValueReference() const { return getFlags() & FlagRValueReference; }
+  bool isExternalTypeRef() const { return getFlags() & FlagExternalTypeRef; }
 
   DITypeRef getRef() const { return DITypeRef::get(this); }
 
@@ -646,45 +650,21 @@ public:
   }
 };
 
-/// \brief Base class for DIDerivedType and DICompositeType.
-///
-/// TODO: Delete; they're not really related.
-class DIDerivedTypeBase : public DIType {
-protected:
-  DIDerivedTypeBase(LLVMContext &C, unsigned ID, StorageType Storage,
-                    unsigned Tag, unsigned Line, uint64_t SizeInBits,
-                    uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
-                    ArrayRef<Metadata *> Ops)
-      : DIType(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits, OffsetInBits,
-               Flags, Ops) {}
-  ~DIDerivedTypeBase() = default;
-
-public:
-  DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
-  Metadata *getRawBaseType() const { return getOperand(3); }
-
-  static bool classof(const Metadata *MD) {
-    return MD->getMetadataID() == DIDerivedTypeKind ||
-           MD->getMetadataID() == DICompositeTypeKind ||
-           MD->getMetadataID() == DISubroutineTypeKind;
-  }
-};
-
 /// \brief Derived types.
 ///
 /// This includes qualified types, pointers, references, friends, typedefs, and
 /// class members.
 ///
 /// TODO: Split out members (inheritance, fields, methods, etc.).
-class DIDerivedType : public DIDerivedTypeBase {
+class DIDerivedType : public DIType {
   friend class LLVMContextImpl;
   friend class MDNode;
 
   DIDerivedType(LLVMContext &C, StorageType Storage, unsigned Tag,
                 unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits,
                 uint64_t OffsetInBits, unsigned Flags, ArrayRef<Metadata *> Ops)
-      : DIDerivedTypeBase(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits,
-                          AlignInBits, OffsetInBits, Flags, Ops) {}
+      : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits,
+               AlignInBits, OffsetInBits, Flags, Ops) {}
   ~DIDerivedType() = default;
 
   static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
@@ -732,6 +712,10 @@ public:
 
   TempDIDerivedType clone() const { return cloneImpl(); }
 
+  //// Get the base type this is derived from.
+  DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+  Metadata *getRawBaseType() const { return getOperand(3); }
+
   /// \brief Get extra data associated with this derived type.
   ///
   /// Class type for pointer-to-members, objective-c property node for ivars,
@@ -764,88 +748,23 @@ public:
   }
 };
 
-/// \brief Base class for DICompositeType and DISubroutineType.
-///
-/// TODO: Delete; they're not really related.
-class DICompositeTypeBase : public DIDerivedTypeBase {
-  unsigned RuntimeLang;
-
-protected:
-  DICompositeTypeBase(LLVMContext &C, unsigned ID, StorageType Storage,
-                      unsigned Tag, unsigned Line, unsigned RuntimeLang,
-                      uint64_t SizeInBits, uint64_t AlignInBits,
-                      uint64_t OffsetInBits, unsigned Flags,
-                      ArrayRef<Metadata *> Ops)
-      : DIDerivedTypeBase(C, ID, Storage, Tag, Line, SizeInBits, AlignInBits,
-                          OffsetInBits, Flags, Ops),
-        RuntimeLang(RuntimeLang) {}
-  ~DICompositeTypeBase() = default;
-
-public:
-  /// \brief Get the elements of the composite type.
-  ///
-  /// \note Calling this is only valid for \a DICompositeType.  This assertion
-  /// can be removed once \a DISubroutineType has been separated from
-  /// "composite types".
-  DINodeArray getElements() const {
-    assert(!isa<DISubroutineType>(this) && "no elements for DISubroutineType");
-    return cast_or_null<MDTuple>(getRawElements());
-  }
-  DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
-  DITemplateParameterArray getTemplateParams() const {
-    return cast_or_null<MDTuple>(getRawTemplateParams());
-  }
-  StringRef getIdentifier() const { return getStringOperand(7); }
-  unsigned getRuntimeLang() const { return RuntimeLang; }
-
-  Metadata *getRawElements() const { return getOperand(4); }
-  Metadata *getRawVTableHolder() const { return getOperand(5); }
-  Metadata *getRawTemplateParams() const { return getOperand(6); }
-  MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
-
-  /// \brief Replace operands.
-  ///
-  /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
-  /// this will be RAUW'ed and deleted.  Use a \a TrackingMDRef to keep track
-  /// of its movement if necessary.
-  /// @{
-  void replaceElements(DINodeArray Elements) {
-#ifndef NDEBUG
-    for (DINode *Op : getElements())
-      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
-             "Lost a member during member list replacement");
-#endif
-    replaceOperandWith(4, Elements.get());
-  }
-  void replaceVTableHolder(DITypeRef VTableHolder) {
-    replaceOperandWith(5, VTableHolder);
-  }
-  void replaceTemplateParams(DITemplateParameterArray TemplateParams) {
-    replaceOperandWith(6, TemplateParams.get());
-  }
-  /// @}
-
-  static bool classof(const Metadata *MD) {
-    return MD->getMetadataID() == DICompositeTypeKind ||
-           MD->getMetadataID() == DISubroutineTypeKind;
-  }
-};
-
 /// \brief Composite types.
 ///
 /// TODO: Detach from DerivedTypeBase (split out MDEnumType?).
 /// TODO: Create a custom, unrelated node for DW_TAG_array_type.
-class DICompositeType : public DICompositeTypeBase {
+class DICompositeType : public DIType {
   friend class LLVMContextImpl;
   friend class MDNode;
 
+  unsigned RuntimeLang;
+
   DICompositeType(LLVMContext &C, StorageType Storage, unsigned Tag,
                   unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits,
                   uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
                   ArrayRef<Metadata *> Ops)
-      : DICompositeTypeBase(C, DICompositeTypeKind, Storage, Tag, Line,
-                            RuntimeLang, SizeInBits, AlignInBits, OffsetInBits,
-                            Flags, Ops) {}
+      : DIType(C, DICompositeTypeKind, Storage, Tag, Line, SizeInBits,
+               AlignInBits, OffsetInBits, Flags, Ops),
+        RuntimeLang(RuntimeLang) {}
   ~DICompositeType() = default;
 
   static DICompositeType *
@@ -903,6 +822,45 @@ public:
 
   TempDICompositeType clone() const { return cloneImpl(); }
 
+  DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
+  DINodeArray getElements() const {
+    return cast_or_null<MDTuple>(getRawElements());
+  }
+  DITypeRef getVTableHolder() const { return DITypeRef(getRawVTableHolder()); }
+  DITemplateParameterArray getTemplateParams() const {
+    return cast_or_null<MDTuple>(getRawTemplateParams());
+  }
+  StringRef getIdentifier() const { return getStringOperand(7); }
+  unsigned getRuntimeLang() const { return RuntimeLang; }
+
+  Metadata *getRawBaseType() const { return getOperand(3); }
+  Metadata *getRawElements() const { return getOperand(4); }
+  Metadata *getRawVTableHolder() const { return getOperand(5); }
+  Metadata *getRawTemplateParams() const { return getOperand(6); }
+  MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
+
+  /// \brief Replace operands.
+  ///
+  /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
+  /// this will be RAUW'ed and deleted.  Use a \a TrackingMDRef to keep track
+  /// of its movement if necessary.
+  /// @{
+  void replaceElements(DINodeArray Elements) {
+#ifndef NDEBUG
+    for (DINode *Op : getElements())
+      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+             "Lost a member during member list replacement");
+#endif
+    replaceOperandWith(4, Elements.get());
+  }
+  void replaceVTableHolder(DITypeRef VTableHolder) {
+    replaceOperandWith(5, VTableHolder);
+  }
+  void replaceTemplateParams(DITemplateParameterArray TemplateParams) {
+    replaceOperandWith(6, TemplateParams.get());
+  }
+  /// @}
+
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DICompositeTypeKind;
   }
@@ -918,17 +876,15 @@ template <class T> TypedDINodeRef<T> TypedDINodeRef<T>::get(const T *N) {
 
 /// \brief Type array for a subprogram.
 ///
-/// TODO: Detach from CompositeType, and fold the array of types in directly
-/// as operands.
-class DISubroutineType : public DICompositeTypeBase {
+/// TODO: Fold the array of types in directly as operands.
+class DISubroutineType : public DIType {
   friend class LLVMContextImpl;
   friend class MDNode;
 
   DISubroutineType(LLVMContext &C, StorageType Storage, unsigned Flags,
                    ArrayRef<Metadata *> Ops)
-      : DICompositeTypeBase(C, DISubroutineTypeKind, Storage,
-                            dwarf::DW_TAG_subroutine_type, 0, 0, 0, 0, 0, Flags,
-                            Ops) {}
+      : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type,
+               0, 0, 0, 0, Flags, Ops) {}
   ~DISubroutineType() = default;
 
   static DISubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
@@ -957,7 +913,7 @@ public:
   DITypeRefArray getTypeArray() const {
     return cast_or_null<MDTuple>(getRawTypeArray());
   }
-  Metadata *getRawTypeArray() const { return getRawElements(); }
+  Metadata *getRawTypeArray() const { return getOperand(3); }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DISubroutineTypeKind;
@@ -981,7 +937,9 @@ class DICompileUnit : public DIScope {
       : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
         SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
         RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
-        DWOId(DWOId) {}
+        DWOId(DWOId) {
+    assert(Storage != Uniqued);
+  }
   ~DICompileUnit() = default;
 
   static DICompileUnit *
@@ -991,15 +949,16 @@ class DICompileUnit : public DIScope {
           unsigned EmissionKind, DICompositeTypeArray EnumTypes,
           DITypeArray RetainedTypes, DISubprogramArray Subprograms,
           DIGlobalVariableArray GlobalVariables,
-          DIImportedEntityArray ImportedEntities, uint64_t DWOId,
-          StorageType Storage, bool ShouldCreate = true) {
+          DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
+          uint64_t DWOId, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, SourceLanguage, File,
                    getCanonicalMDString(Context, Producer), IsOptimized,
                    getCanonicalMDString(Context, Flags), RuntimeVersion,
                    getCanonicalMDString(Context, SplitDebugFilename),
                    EmissionKind, EnumTypes.get(), RetainedTypes.get(),
                    Subprograms.get(), GlobalVariables.get(),
-                   ImportedEntities.get(), DWOId, Storage, ShouldCreate);
+                   ImportedEntities.get(), Macros.get(), DWOId, Storage,
+                   ShouldCreate);
   }
   static DICompileUnit *
   getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -1007,40 +966,44 @@ class DICompileUnit : public DIScope {
           unsigned RuntimeVersion, MDString *SplitDebugFilename,
           unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
           Metadata *Subprograms, Metadata *GlobalVariables,
-          Metadata *ImportedEntities, uint64_t DWOId, StorageType Storage,
-          bool ShouldCreate = true);
+          Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
+          StorageType Storage, bool ShouldCreate = true);
 
   TempDICompileUnit cloneImpl() const {
     return getTemporary(
         getContext(), getSourceLanguage(), getFile(), getProducer(),
         isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(),
         getEmissionKind(), getEnumTypes(), getRetainedTypes(), getSubprograms(),
-        getGlobalVariables(), getImportedEntities(), DWOId);
+        getGlobalVariables(), getImportedEntities(), getMacros(), DWOId);
   }
 
+  static void get() = delete;
+  static void getIfExists() = delete;
+
 public:
-  DEFINE_MDNODE_GET(DICompileUnit,
-                    (unsigned SourceLanguage, DIFile *File, StringRef Producer,
-                     bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
-                     StringRef SplitDebugFilename, unsigned EmissionKind,
-                     DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes,
-                     DISubprogramArray Subprograms,
-                     DIGlobalVariableArray GlobalVariables,
-                     DIImportedEntityArray ImportedEntities, uint64_t DWOId),
-                    (SourceLanguage, File, Producer, IsOptimized, Flags,
-                     RuntimeVersion, SplitDebugFilename, EmissionKind,
-                     EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
-                     ImportedEntities, DWOId))
-  DEFINE_MDNODE_GET(
+  DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
+      DICompileUnit,
+      (unsigned SourceLanguage, DIFile *File, StringRef Producer,
+       bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
+       StringRef SplitDebugFilename, unsigned EmissionKind,
+       DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes,
+       DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables,
+       DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
+       uint64_t DWOId),
+      (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
+       SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms,
+       GlobalVariables, ImportedEntities, Macros, DWOId))
+  DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
       DICompileUnit,
       (unsigned SourceLanguage, Metadata *File, MDString *Producer,
        bool IsOptimized, MDString *Flags, unsigned RuntimeVersion,
        MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
        Metadata *RetainedTypes, Metadata *Subprograms,
-       Metadata *GlobalVariables, Metadata *ImportedEntities, uint64_t DWOId),
+       Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
+       uint64_t DWOId),
       (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
        SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms,
-       GlobalVariables, ImportedEntities, DWOId))
+       GlobalVariables, ImportedEntities, Macros, DWOId))
 
   TempDICompileUnit clone() const { return cloneImpl(); }
 
@@ -1066,7 +1029,11 @@ public:
   DIImportedEntityArray getImportedEntities() const {
     return cast_or_null<MDTuple>(getRawImportedEntities());
   }
-  unsigned getDWOId() const { return DWOId; }
+  DIMacroNodeArray getMacros() const {
+    return cast_or_null<MDTuple>(getRawMacros());
+  }
+  uint64_t getDWOId() const { return DWOId; }
+  void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
 
   MDString *getRawProducer() const { return getOperandAs<MDString>(1); }
   MDString *getRawFlags() const { return getOperandAs<MDString>(2); }
@@ -1078,6 +1045,7 @@ public:
   Metadata *getRawSubprograms() const { return getOperand(6); }
   Metadata *getRawGlobalVariables() const { return getOperand(7); }
   Metadata *getRawImportedEntities() const { return getOperand(8); }
+  Metadata *getRawMacros() const { return getOperand(9); }
 
   /// \brief Replace arrays.
   ///
@@ -1100,6 +1068,7 @@ public:
   void replaceImportedEntities(DIImportedEntityArray N) {
     replaceOperandWith(8, N.get());
   }
+  void replaceMacros(DIMacroNodeArray N) { replaceOperandWith(9, N.get()); }
   /// @}
 
   static bool classof(const Metadata *MD) {
@@ -1157,8 +1126,10 @@ class DILocation : public MDNode {
   }
 
   TempDILocation cloneImpl() const {
-    return getTemporary(getContext(), getLine(), getColumn(), getScope(),
-                        getInlinedAt());
+    // Get the raw scope/inlinedAt since it is possible to invoke this on
+    // a DILocation containing temporary metadata.
+    return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
+                        getRawInlinedAt());
   }
 
   // Disallow replacing operands.
@@ -1276,14 +1247,13 @@ class DISubprogram : public DILocalScope {
           DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
           unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality,
           unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
-          Constant *Function, DITemplateParameterArray TemplateParams,
-          DISubprogram *Declaration, DILocalVariableArray Variables,
-          StorageType Storage, bool ShouldCreate = true) {
+          DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
+          DILocalVariableArray Variables, StorageType Storage,
+          bool ShouldCreate = true) {
     return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
                    getCanonicalMDString(Context, LinkageName), File, Line, Type,
                    IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
                    Virtuality, VirtualIndex, Flags, IsOptimized,
-                   Function ? ConstantAsMetadata::get(Function) : nullptr,
                    TemplateParams.get(), Declaration, Variables.get(), Storage,
                    ShouldCreate);
   }
@@ -1292,17 +1262,16 @@ class DISubprogram : public DILocalScope {
           MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
           bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
           Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-          unsigned Flags, bool IsOptimized, Metadata *Function,
-          Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
-          StorageType Storage, bool ShouldCreate = true);
+          unsigned Flags, bool IsOptimized, Metadata *TemplateParams,
+          Metadata *Declaration, Metadata *Variables, StorageType Storage,
+          bool ShouldCreate = true);
 
   TempDISubprogram cloneImpl() const {
-    return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
-                        getFile(), getLine(), getType(), isLocalToUnit(),
-                        isDefinition(), getScopeLine(), getContainingType(),
-                        getVirtuality(), getVirtualIndex(), getFlags(),
-                        isOptimized(), getFunctionConstant(),
-                        getTemplateParams(), getDeclaration(), getVariables());
+    return getTemporary(
+        getContext(), getScope(), getName(), getLinkageName(), getFile(),
+        getLine(), getType(), isLocalToUnit(), isDefinition(), getScopeLine(),
+        getContainingType(), getVirtuality(), getVirtualIndex(), getFlags(),
+        isOptimized(), getTemplateParams(), getDeclaration(), getVariables());
   }
 
 public:
@@ -1312,13 +1281,12 @@ public:
                      bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
                      DITypeRef ContainingType, unsigned Virtuality,
                      unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
-                     Constant *Function = nullptr,
                      DITemplateParameterArray TemplateParams = nullptr,
                      DISubprogram *Declaration = nullptr,
                      DILocalVariableArray Variables = nullptr),
                     (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
                      IsDefinition, ScopeLine, ContainingType, Virtuality,
-                     VirtualIndex, Flags, IsOptimized, Function, TemplateParams,
+                     VirtualIndex, Flags, IsOptimized, TemplateParams,
                      Declaration, Variables))
   DEFINE_MDNODE_GET(
       DISubprogram,
@@ -1326,11 +1294,11 @@ public:
        unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
        unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
        unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
-       Metadata *Function = nullptr, Metadata *TemplateParams = nullptr,
-       Metadata *Declaration = nullptr, Metadata *Variables = nullptr),
+       Metadata *TemplateParams = nullptr, Metadata *Declaration = nullptr,
+       Metadata *Variables = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
        ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized,
-       Function, TemplateParams, Declaration, Variables))
+       TemplateParams, Declaration, Variables))
 
   TempDISubprogram clone() const { return cloneImpl(); }
 
@@ -1389,11 +1357,6 @@ public:
     return DITypeRef(getRawContainingType());
   }
 
-  Constant *getFunctionConstant() const {
-    if (auto *C = cast_or_null<ConstantAsMetadata>(getRawFunction()))
-      return C->getValue();
-    return nullptr;
-  }
   DITemplateParameterArray getTemplateParams() const {
     return cast_or_null<MDTuple>(getRawTemplateParams());
   }
@@ -1407,30 +1370,11 @@ public:
   Metadata *getRawScope() const { return getOperand(1); }
   Metadata *getRawType() const { return getOperand(5); }
   Metadata *getRawContainingType() const { return getOperand(6); }
-  Metadata *getRawFunction() const { return getOperand(7); }
-  Metadata *getRawTemplateParams() const { return getOperand(8); }
-  Metadata *getRawDeclaration() const { return getOperand(9); }
-  Metadata *getRawVariables() const { return getOperand(10); }
+  Metadata *getRawTemplateParams() const { return getOperand(7); }
+  Metadata *getRawDeclaration() const { return getOperand(8); }
+  Metadata *getRawVariables() const { return getOperand(9); }
 
-  /// \brief Get a pointer to the function this subprogram describes.
-  ///
-  /// This dyn_casts \a getFunctionConstant() to \a Function.
-  ///
-  /// FIXME: Should this be looking through bitcasts?
-  Function *getFunction() const;
-
-  /// \brief Replace the function.
-  ///
-  /// If \a isUniqued() and not \a isResolved(), this could node will be
-  /// RAUW'ed and deleted out from under the caller.  Use a \a TrackingMDRef if
-  /// that's a problem.
-  /// @{
-  void replaceFunction(Function *F);
-  void replaceFunction(ConstantAsMetadata *MD) { replaceOperandWith(7, MD); }
-  void replaceFunction(std::nullptr_t) { replaceOperandWith(7, nullptr); }
-  /// @}
-
-  /// \brief Check if this subprogram decribes the given function.
+  /// \brief Check if this subprogram describes the given function.
   ///
   /// FIXME: Should this be looking through bitcasts?
   bool describes(const Function *F) const;
@@ -1452,13 +1396,6 @@ public:
 
   Metadata *getRawScope() const { return getOperand(1); }
 
-  /// \brief Forwarding accessors to LexicalBlock.
-  ///
-  /// TODO: Remove these and update code to use \a DILexicalBlock directly.
-  /// @{
-  inline unsigned getLine() const;
-  inline unsigned getColumn() const;
-  /// @}
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DILexicalBlockKind ||
            MD->getMetadataID() == DILexicalBlockFileKind;
@@ -1470,12 +1407,14 @@ class DILexicalBlock : public DILexicalBlockBase {
   friend class MDNode;
 
   unsigned Line;
-  unsigned Column;
+  uint16_t Column;
 
   DILexicalBlock(LLVMContext &C, StorageType Storage, unsigned Line,
                  unsigned Column, ArrayRef<Metadata *> Ops)
       : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops), Line(Line),
-        Column(Column) {}
+        Column(Column) {
+    assert(Column < (1u << 16) && "Expected 16-bit column");
+  }
   ~DILexicalBlock() = default;
 
   static DILexicalBlock *getImpl(LLVMContext &Context, DILocalScope *Scope,
@@ -1514,18 +1453,6 @@ public:
   }
 };
 
-unsigned DILexicalBlockBase::getLine() const {
-  if (auto *N = dyn_cast<DILexicalBlock>(this))
-    return N->getLine();
-  return 0;
-}
-
-unsigned DILexicalBlockBase::getColumn() const {
-  if (auto *N = dyn_cast<DILexicalBlock>(this))
-    return N->getColumn();
-  return 0;
-}
-
 class DILexicalBlockFile : public DILexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -1797,15 +1724,13 @@ public:
 };
 
 /// \brief Base class for variables.
-///
-/// TODO: Hardcode to DW_TAG_variable.
 class DIVariable : public DINode {
   unsigned Line;
 
 protected:
-  DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
-             unsigned Line, ArrayRef<Metadata *> Ops)
-      : DINode(C, ID, Storage, Tag, Ops), Line(Line) {}
+  DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line,
+             ArrayRef<Metadata *> Ops)
+      : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {}
   ~DIVariable() = default;
 
 public:
@@ -1850,8 +1775,7 @@ class DIGlobalVariable : public DIVariable {
   DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
                    bool IsLocalToUnit, bool IsDefinition,
                    ArrayRef<Metadata *> Ops)
-      : DIVariable(C, DIGlobalVariableKind, Storage, dwarf::DW_TAG_variable,
-                   Line, Ops),
+      : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops),
         IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
   ~DIGlobalVariable() = default;
 
@@ -1923,8 +1847,6 @@ public:
 
 /// \brief Local variable.
 ///
-/// TODO: Split between arguments and otherwise.
-/// TODO: Use \c DW_TAG_variable instead of fake tags.
 /// TODO: Split up flags.
 class DILocalVariable : public DIVariable {
   friend class LLVMContextImpl;
@@ -1933,42 +1855,42 @@ class DILocalVariable : public DIVariable {
   unsigned Arg;
   unsigned Flags;
 
-  DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Tag,
-                  unsigned Line, unsigned Arg, unsigned Flags,
-                  ArrayRef<Metadata *> Ops)
-      : DIVariable(C, DILocalVariableKind, Storage, Tag, Line, Ops), Arg(Arg),
+  DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
+                  unsigned Arg, unsigned Flags, ArrayRef<Metadata *> Ops)
+      : DIVariable(C, DILocalVariableKind, Storage, Line, Ops), Arg(Arg),
         Flags(Flags) {}
   ~DILocalVariable() = default;
 
-  static DILocalVariable *getImpl(LLVMContext &Context, unsigned Tag,
-                                  DIScope *Scope, StringRef Name, DIFile *File,
-                                  unsigned Line, DITypeRef Type, unsigned Arg,
-                                  unsigned Flags, StorageType Storage,
+  static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
+                                  StringRef Name, DIFile *File, unsigned Line,
+                                  DITypeRef Type, unsigned Arg, unsigned Flags,
+                                  StorageType Storage,
                                   bool ShouldCreate = true) {
-    return getImpl(Context, Tag, Scope, getCanonicalMDString(Context, Name),
-                   File, Line, Type, Arg, Flags, Storage, ShouldCreate);
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
+                   Line, Type, Arg, Flags, Storage, ShouldCreate);
   }
-  static DILocalVariable *
-  getImpl(LLVMContext &Context, unsigned Tag, Metadata *Scope, MDString *Name,
-          Metadata *File, unsigned Line, Metadata *Type, unsigned Arg,
-          unsigned Flags, StorageType Storage, bool ShouldCreate = true);
+  static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
+                                  MDString *Name, Metadata *File, unsigned Line,
+                                  Metadata *Type, unsigned Arg, unsigned Flags,
+                                  StorageType Storage,
+                                  bool ShouldCreate = true);
 
   TempDILocalVariable cloneImpl() const {
-    return getTemporary(getContext(), getTag(), getScope(), getName(),
-                        getFile(), getLine(), getType(), getArg(), getFlags());
+    return getTemporary(getContext(), getScope(), getName(), getFile(),
+                        getLine(), getType(), getArg(), getFlags());
   }
 
 public:
   DEFINE_MDNODE_GET(DILocalVariable,
-                    (unsigned Tag, DILocalScope *Scope, StringRef Name,
-                     DIFile *File, unsigned Line, DITypeRef Type, unsigned Arg,
+                    (DILocalScope * Scope, StringRef Name, DIFile *File,
+                     unsigned Line, DITypeRef Type, unsigned Arg,
                      unsigned Flags),
-                    (Tag, Scope, Name, File, Line, Type, Arg, Flags))
+                    (Scope, Name, File, Line, Type, Arg, Flags))
   DEFINE_MDNODE_GET(DILocalVariable,
-                    (unsigned Tag, Metadata *Scope, MDString *Name,
-                     Metadata *File, unsigned Line, Metadata *Type,
-                     unsigned Arg, unsigned Flags),
-                    (Tag, Scope, Name, File, Line, Type, Arg, Flags))
+                    (Metadata * Scope, MDString *Name, Metadata *File,
+                     unsigned Line, Metadata *Type, unsigned Arg,
+                     unsigned Flags),
+                    (Scope, Name, File, Line, Type, Arg, Flags))
 
   TempDILocalVariable clone() const { return cloneImpl(); }
 
@@ -1979,6 +1901,7 @@ public:
     return cast<DILocalScope>(DIVariable::getScope());
   }
 
+  bool isParameter() const { return Arg; }
   unsigned getArg() const { return Arg; }
   unsigned getFlags() const { return Flags; }
 
@@ -1988,7 +1911,7 @@ public:
   /// \brief Check that a location is valid for this variable.
   ///
   /// Check that \c DL exists, is in the same subprogram, and has the same
-  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachemnt
+  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachment
   /// to a \a DbgInfoIntrinsic.)
   bool isValidLocationForIntrinsic(const DILocation *DL) const {
     return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
@@ -2284,6 +2207,165 @@ public:
   }
 };
 
+/// \brief Macro Info DWARF-like metadata node.
+///
+/// A metadata node with a DWARF macro info (i.e., a constant named
+/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h).  Called \a DIMacroNode
+/// because it's potentially used for non-DWARF output.
+class DIMacroNode : public MDNode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+protected:
+  DIMacroNode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned MIType,
+              ArrayRef<Metadata *> Ops1, ArrayRef<Metadata *> Ops2 = None)
+      : MDNode(C, ID, Storage, Ops1, Ops2) {
+    assert(MIType < 1u << 16);
+    SubclassData16 = MIType;
+  }
+  ~DIMacroNode() = default;
+
+  template <class Ty> Ty *getOperandAs(unsigned I) const {
+    return cast_or_null<Ty>(getOperand(I));
+  }
+
+  StringRef getStringOperand(unsigned I) const {
+    if (auto *S = getOperandAs<MDString>(I))
+      return S->getString();
+    return StringRef();
+  }
+
+  static MDString *getCanonicalMDString(LLVMContext &Context, StringRef S) {
+    if (S.empty())
+      return nullptr;
+    return MDString::get(Context, S);
+  }
+
+public:
+  unsigned getMacinfoType() const { return SubclassData16; }
+
+  static bool classof(const Metadata *MD) {
+    switch (MD->getMetadataID()) {
+    default:
+      return false;
+    case DIMacroKind:
+    case DIMacroFileKind:
+      return true;
+    }
+  }
+};
+
+class DIMacro : public DIMacroNode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  unsigned Line;
+
+  DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line,
+          ArrayRef<Metadata *> Ops)
+      : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {}
+  ~DIMacro() = default;
+
+  static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+                          StringRef Name, StringRef Value, StorageType Storage,
+                          bool ShouldCreate = true) {
+    return getImpl(Context, MIType, Line, getCanonicalMDString(Context, Name),
+                   getCanonicalMDString(Context, Value), Storage, ShouldCreate);
+  }
+  static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
+                          MDString *Name, MDString *Value, StorageType Storage,
+                          bool ShouldCreate = true);
+
+  TempDIMacro cloneImpl() const {
+    return getTemporary(getContext(), getMacinfoType(), getLine(), getName(),
+                        getValue());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name,
+                              StringRef Value = ""),
+                    (MIType, Line, Name, Value))
+  DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name,
+                              MDString *Value),
+                    (MIType, Line, Name, Value))
+
+  TempDIMacro clone() const { return cloneImpl(); }
+
+  unsigned getLine() const { return Line; }
+
+  StringRef getName() const { return getStringOperand(0); }
+  StringRef getValue() const { return getStringOperand(1); }
+
+  MDString *getRawName() const { return getOperandAs<MDString>(0); }
+  MDString *getRawValue() const { return getOperandAs<MDString>(1); }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DIMacroKind;
+  }
+};
+
+class DIMacroFile : public DIMacroNode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  unsigned Line;
+
+  DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType,
+              unsigned Line, ArrayRef<Metadata *> Ops)
+      : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {}
+  ~DIMacroFile() = default;
+
+  static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
+                              unsigned Line, DIFile *File,
+                              DIMacroNodeArray Elements, StorageType Storage,
+                              bool ShouldCreate = true) {
+    return getImpl(Context, MIType, Line, static_cast<Metadata *>(File),
+                   Elements.get(), Storage, ShouldCreate);
+  }
+
+  static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
+                              unsigned Line, Metadata *File, Metadata *Elements,
+                              StorageType Storage, bool ShouldCreate = true);
+
+  TempDIMacroFile cloneImpl() const {
+    return getTemporary(getContext(), getMacinfoType(), getLine(), getFile(),
+                        getElements());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File,
+                                  DIMacroNodeArray Elements),
+                    (MIType, Line, File, Elements))
+  DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line,
+                                  Metadata *File, Metadata *Elements),
+                    (MIType, Line, File, Elements))
+
+  TempDIMacroFile clone() const { return cloneImpl(); }
+
+  void replaceElements(DIMacroNodeArray Elements) {
+#ifndef NDEBUG
+    for (DIMacroNode *Op : getElements())
+      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+             "Lost a macro node during macro node list replacement");
+#endif
+    replaceOperandWith(1, Elements.get());
+  }
+
+  unsigned getLine() const { return Line; }
+  DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
+
+  DIMacroNodeArray getElements() const {
+    return cast_or_null<MDTuple>(getRawElements());
+  }
+
+  Metadata *getRawFile() const { return getOperand(0); }
+  Metadata *getRawElements() const { return getOperand(1); }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DIMacroFileKind;
+  }
+};
+
 } // end namespace llvm
 
 #undef DEFINE_MDNODE_GET_UNPACK_IMPL
diff --git a/include/llvm/IR/DerivedTypes.h b/include/llvm/IR/DerivedTypes.h
index 4a94499b4cf5..071e69b1e808 100644
--- a/include/llvm/IR/DerivedTypes.h
+++ b/include/llvm/IR/DerivedTypes.h
@@ -36,11 +36,12 @@ class StringRef;
 /// @brief Integer representation type
 class IntegerType : public Type {
   friend class LLVMContextImpl;
-  
+
 protected:
   explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
     setSubclassData(NumBits);
   }
+
 public:
   /// This enum is just used to hold constants we need for IntegerType.
   enum {
@@ -90,6 +91,9 @@ public:
   }
 };
 
+unsigned Type::getIntegerBitWidth() const {
+  return cast<IntegerType>(this)->getBitWidth();
+}
 
 /// FunctionType - Class to represent function types
 ///
@@ -108,7 +112,7 @@ public:
   /// FunctionType::get - Create a FunctionType taking no parameters.
   ///
   static FunctionType *get(Type *Result, bool isVarArg);
-  
+
   /// isValidReturnType - Return true if the specified type is valid as a return
   /// type.
   static bool isValidReturnType(Type *RetTy);
@@ -143,18 +147,30 @@ public:
 static_assert(AlignOf<FunctionType>::Alignment >= AlignOf<Type *>::Alignment,
               "Alignment sufficient for objects appended to FunctionType");
 
+bool Type::isFunctionVarArg() const {
+  return cast<FunctionType>(this)->isVarArg();
+}
+
+Type *Type::getFunctionParamType(unsigned i) const {
+  return cast<FunctionType>(this)->getParamType(i);
+}
+
+unsigned Type::getFunctionNumParams() const {
+  return cast<FunctionType>(this)->getNumParams();
+}
+
 /// CompositeType - Common super class of ArrayType, StructType, PointerType
 /// and VectorType.
 class CompositeType : public Type {
 protected:
-  explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { }
-public:
+  explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) {}
 
+public:
   /// getTypeAtIndex - Given an index value into the type, return the type of
   /// the element.
   ///
-  Type *getTypeAtIndex(const Value *V);
-  Type *getTypeAtIndex(unsigned Idx);
+  Type *getTypeAtIndex(const Value *V) const;
+  Type *getTypeAtIndex(unsigned Idx) const;
   bool indexValid(const Value *V) const;
   bool indexValid(unsigned Idx) const;
 
@@ -167,14 +183,13 @@ public:
   }
 };
 
-
 /// StructType - Class to represent struct types.  There are two different kinds
 /// of struct types: Literal structs and Identified structs.
 ///
 /// Literal struct types (e.g. { i32, i32 }) are uniqued structurally, and must
 /// always have a body when created.  You can get one of these by using one of
 /// the StructType::get() forms.
-///  
+///
 /// Identified structs (e.g. %foo or %42) may optionally have a name and are not
 /// uniqued.  The names for identified structs are managed at the LLVMContext
 /// level, so there can only be a single identified struct with a given name in
@@ -205,23 +220,20 @@ class StructType : public CompositeType {
   /// pointer to the symbol table entry (maintained by LLVMContext) for the
   /// struct.  This is null if the type is an literal struct or if it is
   /// a identified type that has an empty name.
-  /// 
+  ///
   void *SymbolTableEntry;
-public:
 
+public:
   /// StructType::create - This creates an identified struct.
   static StructType *create(LLVMContext &Context, StringRef Name);
   static StructType *create(LLVMContext &Context);
-  
-  static StructType *create(ArrayRef<Type*> Elements,
-                            StringRef Name,
+
+  static StructType *create(ArrayRef<Type *> Elements, StringRef Name,
                             bool isPacked = false);
-  static StructType *create(ArrayRef<Type*> Elements);
-  static StructType *create(LLVMContext &Context,
-                            ArrayRef<Type*> Elements,
-                            StringRef Name,
-                            bool isPacked = false);
-  static StructType *create(LLVMContext &Context, ArrayRef<Type*> Elements);
+  static StructType *create(ArrayRef<Type *> Elements);
+  static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements,
+                            StringRef Name, bool isPacked = false);
+  static StructType *create(LLVMContext &Context, ArrayRef<Type *> Elements);
   static StructType *create(StringRef Name, Type *elt1, ...) LLVM_END_WITH_NULL;
 
   /// StructType::get - This static method is the primary way to create a
@@ -232,7 +244,7 @@ public:
   /// StructType::get - Create an empty structure type.
   ///
   static StructType *get(LLVMContext &Context, bool isPacked = false);
-  
+
   /// StructType::get - This static method is a convenience method for creating
   /// structure types by specifying the elements as arguments.  Note that this
   /// method always returns a non-packed struct, and requires at least one
@@ -240,26 +252,26 @@ public:
   static StructType *get(Type *elt1, ...) LLVM_END_WITH_NULL;
 
   bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
-  
+
   /// isLiteral - Return true if this type is uniqued by structural
   /// equivalence, false if it is a struct definition.
   bool isLiteral() const { return (getSubclassData() & SCDB_IsLiteral) != 0; }
-  
+
   /// isOpaque - Return true if this is a type with an identity that has no body
   /// specified yet.  These prints as 'opaque' in .ll files.
   bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
 
   /// isSized - Return true if this is a sized type.
-  bool isSized(SmallPtrSetImpl<const Type*> *Visited = nullptr) const;
-  
+  bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
+
   /// hasName - Return true if this is a named struct that has a non-empty name.
   bool hasName() const { return SymbolTableEntry != nullptr; }
-  
+
   /// getName - Return the name for this struct type if it has an identity.
   /// This may return an empty string for an unnamed struct type.  Do not call
   /// this on an literal type.
   StringRef getName() const;
-  
+
   /// setName - Change the name of this type to the specified name, or to a name
   /// with a suffix if there is a collision.  Do not call this on an literal
   /// type.
@@ -268,11 +280,10 @@ public:
   /// setBody - Specify a body for an opaque identified type.
   void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
   void setBody(Type *elt1, ...) LLVM_END_WITH_NULL;
-  
+
   /// isValidElementType - Return true if the specified type is valid as a
   /// element type.
   static bool isValidElementType(Type *ElemTy);
-  
 
   // Iterator access to the elements.
   typedef Type::subtype_iterator element_iterator;
@@ -284,8 +295,8 @@ public:
 
   /// isLayoutIdentical - Return true if this is layout identical to the
   /// specified struct.
-  bool isLayoutIdentical(StructType *Other) const;  
-  
+  bool isLayoutIdentical(StructType *Other) const;
+
   /// Random access to the elements
   unsigned getNumElements() const { return NumContainedTys; }
   Type *getElementType(unsigned N) const {
@@ -299,6 +310,18 @@ public:
   }
 };
 
+StringRef Type::getStructName() const {
+  return cast<StructType>(this)->getName();
+}
+
+unsigned Type::getStructNumElements() const {
+  return cast<StructType>(this)->getNumElements();
+}
+
+Type *Type::getStructElementType(unsigned N) const {
+  return cast<StructType>(this)->getElementType(N);
+}
+
 /// SequentialType - This is the superclass of the array, pointer and vector
 /// type classes.  All of these represent "arrays" in memory.  The array type
 /// represents a specifically sized array, pointer types are unsized/unknown
@@ -330,6 +353,9 @@ public:
   }
 };
 
+Type *Type::getSequentialElementType() const {
+  return cast<SequentialType>(this)->getElementType();
+}
 
 /// ArrayType - Class to represent array types.
 ///
@@ -339,6 +365,7 @@ class ArrayType : public SequentialType {
   ArrayType(const ArrayType &) = delete;
   const ArrayType &operator=(const ArrayType &) = delete;
   ArrayType(Type *ElType, uint64_t NumEl);
+
 public:
   /// ArrayType::get - This static method is the primary way to construct an
   /// ArrayType
@@ -357,6 +384,10 @@ public:
   }
 };
 
+uint64_t Type::getArrayNumElements() const {
+  return cast<ArrayType>(this)->getNumElements();
+}
+
 /// VectorType - Class to represent vector types.
 ///
 class VectorType : public SequentialType {
@@ -365,6 +396,7 @@ class VectorType : public SequentialType {
   VectorType(const VectorType &) = delete;
   const VectorType &operator=(const VectorType &) = delete;
   VectorType(Type *ElType, unsigned NumEl);
+
 public:
   /// VectorType::get - This static method is the primary way to construct an
   /// VectorType.
@@ -443,6 +475,9 @@ public:
   }
 };
 
+unsigned Type::getVectorNumElements() const {
+  return cast<VectorType>(this)->getNumElements();
+}
 
 /// PointerType - Class to represent pointers.
 ///
@@ -450,6 +485,7 @@ class PointerType : public SequentialType {
   PointerType(const PointerType &) = delete;
   const PointerType &operator=(const PointerType &) = delete;
   explicit PointerType(Type *ElType, unsigned AddrSpace);
+
 public:
   /// PointerType::get - This constructs a pointer to an object of the specified
   /// type in a numbered address space.
@@ -477,6 +513,10 @@ public:
   }
 };
 
+unsigned Type::getPointerAddressSpace() const {
+  return cast<PointerType>(getScalarType())->getAddressSpace();
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h
index f38313f82ea7..f69955e5ed48 100644
--- a/include/llvm/IR/DiagnosticInfo.h
+++ b/include/llvm/IR/DiagnosticInfo.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_IR_DIAGNOSTICINFO_H
 #define LLVM_IR_DIAGNOSTICINFO_H
 
-#include "llvm-c/Core.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Module.h"
@@ -56,8 +55,11 @@ enum DiagnosticKind {
   DK_OptimizationRemark,
   DK_OptimizationRemarkMissed,
   DK_OptimizationRemarkAnalysis,
+  DK_OptimizationRemarkAnalysisFPCommute,
+  DK_OptimizationRemarkAnalysisAliasing,
   DK_OptimizationFailure,
   DK_MIRParser,
+  DK_PGOProfile,
   DK_FirstPluginKind
 };
 
@@ -99,6 +101,8 @@ public:
   /// The printed message must not end with '.' nor start with a severity
   /// keyword.
   virtual void print(DiagnosticPrinter &DP) const = 0;
+
+  static const char *AlwaysPrint;
 };
 
 typedef std::function<void(const DiagnosticInfo &)> DiagnosticHandlerFunction;
@@ -210,19 +214,18 @@ public:
 /// Diagnostic information for the sample profiler.
 class DiagnosticInfoSampleProfile : public DiagnosticInfo {
 public:
-  DiagnosticInfoSampleProfile(const char *FileName, unsigned LineNum,
+  DiagnosticInfoSampleProfile(StringRef FileName, unsigned LineNum,
                               const Twine &Msg,
                               DiagnosticSeverity Severity = DS_Error)
       : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName),
         LineNum(LineNum), Msg(Msg) {}
-  DiagnosticInfoSampleProfile(const char *FileName, const Twine &Msg,
+  DiagnosticInfoSampleProfile(StringRef FileName, const Twine &Msg,
                               DiagnosticSeverity Severity = DS_Error)
       : DiagnosticInfo(DK_SampleProfile, Severity), FileName(FileName),
         LineNum(0), Msg(Msg) {}
   DiagnosticInfoSampleProfile(const Twine &Msg,
                               DiagnosticSeverity Severity = DS_Error)
-      : DiagnosticInfo(DK_SampleProfile, Severity), FileName(nullptr),
-        LineNum(0), Msg(Msg) {}
+      : DiagnosticInfo(DK_SampleProfile, Severity), LineNum(0), Msg(Msg) {}
 
   /// \see DiagnosticInfo::print.
   void print(DiagnosticPrinter &DP) const override;
@@ -231,13 +234,13 @@ public:
     return DI->getKind() == DK_SampleProfile;
   }
 
-  const char *getFileName() const { return FileName; }
+  StringRef getFileName() const { return FileName; }
   unsigned getLineNum() const { return LineNum; }
   const Twine &getMsg() const { return Msg; }
 
 private:
   /// Name of the input file associated with this diagnostic.
-  const char *FileName;
+  StringRef FileName;
 
   /// Line number where the diagnostic occurred. If 0, no line number will
   /// be emitted in the message.
@@ -247,6 +250,31 @@ private:
   const Twine &Msg;
 };
 
+/// Diagnostic information for the PGO profiler.
+class DiagnosticInfoPGOProfile : public DiagnosticInfo {
+public:
+  DiagnosticInfoPGOProfile(const char *FileName, const Twine &Msg,
+                           DiagnosticSeverity Severity = DS_Error)
+      : DiagnosticInfo(DK_PGOProfile, Severity), FileName(FileName), Msg(Msg) {}
+
+  /// \see DiagnosticInfo::print.
+  void print(DiagnosticPrinter &DP) const override;
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_PGOProfile;
+  }
+
+  const char *getFileName() const { return FileName; }
+  const Twine &getMsg() const { return Msg; }
+
+private:
+  /// Name of the input file associated with this diagnostic.
+  const char *FileName;
+
+  /// Message to report.
+  const Twine &Msg;
+};
+
 /// Common features for diagnostics dealing with optimization remarks.
 class DiagnosticInfoOptimizationBase : public DiagnosticInfo {
 public:
@@ -267,10 +295,6 @@ public:
   /// \see DiagnosticInfo::print.
   void print(DiagnosticPrinter &DP) const override;
 
-  static bool classof(const DiagnosticInfo *DI) {
-    return DI->getKind() == DK_OptimizationRemark;
-  }
-
   /// Return true if this optimization remark is enabled by one of
   /// of the LLVM command line flags (-pass-remarks, -pass-remarks-missed,
   /// or -pass-remarks-analysis). Note that this only handles the LLVM
@@ -386,6 +410,69 @@ public:
 
   /// \see DiagnosticInfoOptimizationBase::isEnabled.
   bool isEnabled() const override;
+
+protected:
+  DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind,
+                                           const char *PassName,
+                                           const Function &Fn,
+                                           const DebugLoc &DLoc,
+                                           const Twine &Msg)
+      : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc,
+                                       Msg) {}
+};
+
+/// Diagnostic information for optimization analysis remarks related to
+/// floating-point non-commutativity.
+class DiagnosticInfoOptimizationRemarkAnalysisFPCommute
+    : public DiagnosticInfoOptimizationRemarkAnalysis {
+public:
+  /// \p PassName is the name of the pass emitting this diagnostic. If
+  /// this name matches the regular expression given in -Rpass-analysis=, then
+  /// the diagnostic will be emitted. \p Fn is the function where the diagnostic
+  /// is being emitted. \p DLoc is the location information to use in the
+  /// diagnostic. If line table information is available, the diagnostic will
+  /// include the source code location. \p Msg is the message to show. The
+  /// front-end will append its own message related to options that address
+  /// floating-point non-commutativity. Note that this class does not copy this
+  /// message, so this reference must be valid for the whole life time of the
+  /// diagnostic.
+  DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName,
+                                                    const Function &Fn,
+                                                    const DebugLoc &DLoc,
+                                                    const Twine &Msg)
+      : DiagnosticInfoOptimizationRemarkAnalysis(
+            DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {}
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute;
+  }
+};
+
+/// Diagnostic information for optimization analysis remarks related to
+/// pointer aliasing.
+class DiagnosticInfoOptimizationRemarkAnalysisAliasing
+    : public DiagnosticInfoOptimizationRemarkAnalysis {
+public:
+  /// \p PassName is the name of the pass emitting this diagnostic. If
+  /// this name matches the regular expression given in -Rpass-analysis=, then
+  /// the diagnostic will be emitted. \p Fn is the function where the diagnostic
+  /// is being emitted. \p DLoc is the location information to use in the
+  /// diagnostic. If line table information is available, the diagnostic will
+  /// include the source code location. \p Msg is the message to show. The
+  /// front-end will append its own message related to options that address
+  /// pointer aliasing legality. Note that this class does not copy this
+  /// message, so this reference must be valid for the whole life time of the
+  /// diagnostic.
+  DiagnosticInfoOptimizationRemarkAnalysisAliasing(const char *PassName,
+                                                   const Function &Fn,
+                                                   const DebugLoc &DLoc,
+                                                   const Twine &Msg)
+      : DiagnosticInfoOptimizationRemarkAnalysis(
+            DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg) {}
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_OptimizationRemarkAnalysisAliasing;
+  }
 };
 
 /// Diagnostic information for machine IR parser.
@@ -438,6 +525,30 @@ void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName,
                                     const Function &Fn, const DebugLoc &DLoc,
                                     const Twine &Msg);
 
+/// Emit an optimization analysis remark related to messages about
+/// floating-point non-commutativity. \p PassName is the name of the pass
+/// emitting the message. If -Rpass-analysis= is given and \p PassName matches
+/// the regular expression in -Rpass, then the remark will be emitted. \p Fn is
+/// the function triggering the remark, \p DLoc is the debug location where the
+/// diagnostic is generated. \p Msg is the message string to use.
+void emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
+                                             const char *PassName,
+                                             const Function &Fn,
+                                             const DebugLoc &DLoc,
+                                             const Twine &Msg);
+
+/// Emit an optimization analysis remark related to messages about
+/// pointer aliasing. \p PassName is the name of the pass emitting the message.
+/// If -Rpass-analysis= is given and \p PassName matches the regular expression
+/// in -Rpass, then the remark will be emitted. \p Fn is the function triggering
+/// the remark, \p DLoc is the debug location where the diagnostic is generated.
+/// \p Msg is the message string to use.
+void emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
+                                            const char *PassName,
+                                            const Function &Fn,
+                                            const DebugLoc &DLoc,
+                                            const Twine &Msg);
+
 /// Diagnostic information for optimization failures.
 class DiagnosticInfoOptimizationFailure
     : public DiagnosticInfoOptimizationBase {
diff --git a/include/llvm/IR/DiagnosticPrinter.h b/include/llvm/IR/DiagnosticPrinter.h
index 735e3ad7a8b0..1bcd73738b66 100644
--- a/include/llvm/IR/DiagnosticPrinter.h
+++ b/include/llvm/IR/DiagnosticPrinter.h
@@ -63,7 +63,7 @@ protected:
   raw_ostream &Stream;
 
 public:
-  DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {};
+  DiagnosticPrinterRawOStream(raw_ostream &Stream) : Stream(Stream) {}
 
   // Simple types.
   DiagnosticPrinter &operator<<(char C) override;
diff --git a/include/llvm/IR/Dominators.h b/include/llvm/IR/Dominators.h
index 27d989b0344c..37447c353b19 100644
--- a/include/llvm/IR/Dominators.h
+++ b/include/llvm/IR/Dominators.h
@@ -64,11 +64,30 @@ public:
 
 /// \brief Concrete subclass of DominatorTreeBase that is used to compute a
 /// normal dominator tree.
+///
+/// Definition: A block is said to be forward statically reachable if there is
+/// a path from the entry of the function to the block.  A statically reachable
+/// block may become statically unreachable during optimization.
+///
+/// A forward unreachable block may appear in the dominator tree, or it may
+/// not.  If it does, dominance queries will return results as if all reachable
+/// blocks dominate it.  When asking for a Node corresponding to a potentially
+/// unreachable block, calling code must handle the case where the block was
+/// unreachable and the result of getNode() is nullptr.
+///
+/// Generally, a block known to be unreachable when the dominator tree is
+/// constructed will not be in the tree.  One which becomes unreachable after
+/// the dominator tree is initially constructed may still exist in the tree,
+/// even if the tree is properly updated. Calling code should not rely on the
+/// preceding statements; this is stated only to assist human understanding.
 class DominatorTree : public DominatorTreeBase<BasicBlock> {
 public:
   typedef DominatorTreeBase<BasicBlock> Base;
 
   DominatorTree() : DominatorTreeBase<BasicBlock>(false) {}
+  explicit DominatorTree(Function &F) : DominatorTreeBase<BasicBlock>(false) {
+    recalculate(F);
+  }
 
   DominatorTree(DominatorTree &&Arg)
       : Base(std::move(static_cast<Base &>(Arg))) {}
@@ -122,31 +141,35 @@ public:
 // DominatorTree GraphTraits specializations so the DominatorTree can be
 // iterable by generic graph iterators.
 
-template <> struct GraphTraits<DomTreeNode*> {
-  typedef DomTreeNode NodeType;
-  typedef NodeType::iterator  ChildIteratorType;
+template <class Node, class ChildIterator> struct DomTreeGraphTraitsBase {
+  typedef Node NodeType;
+  typedef ChildIterator ChildIteratorType;
+  typedef df_iterator<Node *, SmallPtrSet<NodeType *, 8>> nodes_iterator;
 
-  static NodeType *getEntryNode(NodeType *N) {
-    return N;
-  }
+  static NodeType *getEntryNode(NodeType *N) { return N; }
   static inline ChildIteratorType child_begin(NodeType *N) {
     return N->begin();
   }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->end();
-  }
+  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
 
-  typedef df_iterator<DomTreeNode*> nodes_iterator;
-
-  static nodes_iterator nodes_begin(DomTreeNode *N) {
+  static nodes_iterator nodes_begin(NodeType *N) {
     return df_begin(getEntryNode(N));
   }
 
-  static nodes_iterator nodes_end(DomTreeNode *N) {
+  static nodes_iterator nodes_end(NodeType *N) {
     return df_end(getEntryNode(N));
   }
 };
 
+template <>
+struct GraphTraits<DomTreeNode *>
+    : public DomTreeGraphTraitsBase<DomTreeNode, DomTreeNode::iterator> {};
+
+template <>
+struct GraphTraits<const DomTreeNode *>
+    : public DomTreeGraphTraitsBase<const DomTreeNode,
+                                    DomTreeNode::const_iterator> {};
+
 template <> struct GraphTraits<DominatorTree*>
   : public GraphTraits<DomTreeNode*> {
   static NodeType *getEntryNode(DominatorTree *DT) {
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index ec9f4cad094a..2a983930bf4d 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -32,28 +32,16 @@ namespace llvm {
 
 class FunctionType;
 class LLVMContext;
+class DISubprogram;
 
-template<> struct ilist_traits<Argument>
-  : public SymbolTableListTraits<Argument, Function> {
-
-  Argument *createSentinel() const {
-    return static_cast<Argument*>(&Sentinel);
-  }
-  static void destroySentinel(Argument*) {}
-
-  Argument *provideInitialHead() const { return createSentinel(); }
-  Argument *ensureHead(Argument*) const { return createSentinel(); }
-  static void noteHead(Argument*, Argument*) {}
-
-  static ValueSymbolTable *getSymTab(Function *ItemParent);
-private:
-  mutable ilist_half_node<Argument> Sentinel;
-};
+template <>
+struct SymbolTableListSentinelTraits<Argument>
+    : public ilist_half_embedded_sentinel_traits<Argument> {};
 
 class Function : public GlobalObject, public ilist_node<Function> {
 public:
-  typedef iplist<Argument> ArgumentListType;
-  typedef iplist<BasicBlock> BasicBlockListType;
+  typedef SymbolTableList<Argument> ArgumentListType;
+  typedef SymbolTableList<BasicBlock> BasicBlockListType;
 
   // BasicBlock iterators...
   typedef BasicBlockListType::iterator iterator;
@@ -73,10 +61,12 @@ private:
   /*
    * Value::SubclassData
    *
-   * bit 0  : HasLazyArguments
-   * bit 1  : HasPrefixData
-   * bit 2  : HasPrologueData
-   * bit 3-6: CallingConvention
+   * bit 0      : HasLazyArguments
+   * bit 1      : HasPrefixData
+   * bit 2      : HasPrologueData
+   * bit 3      : HasPersonalityFn
+   * bits 4-13  : CallingConvention
+   * bits 14-15 : [reserved]
    */
 
   /// Bits from GlobalObject::GlobalObjectSubclassData.
@@ -90,7 +80,7 @@ private:
                                 (Value ? Mask : 0u));
   }
 
-  friend class SymbolTableListTraits<Function, Module>;
+  friend class SymbolTableListTraits<Function>;
 
   void setParent(Module *parent);
 
@@ -120,7 +110,7 @@ private:
 public:
   static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
                           const Twine &N = "", Module *M = nullptr) {
-    return new(1) Function(Ty, Linkage, N, M);
+    return new Function(Ty, Linkage, N, M);
   }
 
   ~Function() override;
@@ -128,14 +118,6 @@ public:
   /// \brief Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// \brief Get the personality function associated with this function.
-  bool hasPersonalityFn() const { return getNumOperands() != 0; }
-  Constant *getPersonalityFn() const {
-    assert(hasPersonalityFn());
-    return cast<Constant>(Op<0>());
-  }
-  void setPersonalityFn(Constant *C);
-
   Type *getReturnType() const;           // Return the type of the ret val
   FunctionType *getFunctionType() const; // Return the FunctionType for me
 
@@ -170,11 +152,13 @@ public:
   /// calling convention of this function.  The enum values for the known
   /// calling conventions are defined in CallingConv.h.
   CallingConv::ID getCallingConv() const {
-    return static_cast<CallingConv::ID>(getSubclassDataFromValue() >> 3);
+    return static_cast<CallingConv::ID>((getSubclassDataFromValue() >> 4) &
+                                        CallingConv::MaxID);
   }
   void setCallingConv(CallingConv::ID CC) {
-    setValueSubclassData((getSubclassDataFromValue() & 7) |
-                         (static_cast<unsigned>(CC) << 3));
+    auto ID = static_cast<unsigned>(CC);
+    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+    setValueSubclassData((getSubclassDataFromValue() & 0xc00f) | (ID << 4));
   }
 
   /// @brief Return the attribute list for this Function.
@@ -267,13 +251,13 @@ public:
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeSets.getDereferenceableBytes(i);
   }
-  
+
   /// @brief Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeSets.getDereferenceableOrNullBytes(i);
   }
-  
+
   /// @brief Determine if the function does not access memory.
   bool doesNotAccessMemory() const {
     return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
@@ -299,10 +283,28 @@ public:
     return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
                                       Attribute::ArgMemOnly);
   }
-  void setOnlyAccessesArgMemory() {
-    addFnAttr(Attribute::ArgMemOnly);
+  void setOnlyAccessesArgMemory() { addFnAttr(Attribute::ArgMemOnly); }
+
+  /// @brief Determine if the function may only access memory that is 
+  ///  inaccessible from the IR.
+  bool onlyAccessesInaccessibleMemory() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::InaccessibleMemOnly);
   }
-  
+  void setOnlyAccessesInaccessibleMemory() {
+    addFnAttr(Attribute::InaccessibleMemOnly);
+  }
+
+  /// @brief Determine if the function may only access memory that is
+  //  either inaccessible from the IR or pointed to by its arguments.
+  bool onlyAccessesInaccessibleMemOrArgMem() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::InaccessibleMemOrArgMemOnly);
+  }
+  void setOnlyAccessesInaccessibleMemOrArgMem() {
+    addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+  }
+
   /// @brief Determine if the function cannot return.
   bool doesNotReturn() const {
     return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
@@ -339,6 +341,15 @@ public:
     addFnAttr(Attribute::Convergent);
   }
 
+  /// Determine if the function is known not to recurse, directly or
+  /// indirectly.
+  bool doesNotRecurse() const {
+    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex,
+                                      Attribute::NoRecurse);
+  }
+  void setDoesNotRecurse() {
+    addFnAttr(Attribute::NoRecurse);
+  }  
 
   /// @brief True if the ABI mandates (or the user requested) that this
   /// function be in a unwind table.
@@ -362,7 +373,8 @@ public:
            AttributeSets.hasAttribute(2, Attribute::StructRet);
   }
 
-  /// @brief Determine if the parameter does not alias other parameters.
+  /// @brief Determine if the parameter or return value is marked with NoAlias
+  /// attribute.
   /// @param n The parameter to check. 1 is the first parameter, 0 is the return
   bool doesNotAlias(unsigned n) const {
     return AttributeSets.hasAttribute(n, Attribute::NoAlias);
@@ -395,6 +407,14 @@ public:
     addAttribute(n, Attribute::ReadOnly);
   }
 
+  /// Optimize this function for minimum size (-Oz).
+  bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); };
+
+  /// Optimize this function for size (-Os) or minimum size (-Oz).
+  bool optForSize() const {
+    return hasFnAttribute(Attribute::OptimizeForSize) || optForMinSize();
+  }
+
   /// copyAttributesFrom - copy all additional attributes (those not needed to
   /// create a Function) from the Function Src to this one.
   void copyAttributesFrom(const GlobalValue *Src) override;
@@ -417,7 +437,6 @@ public:
   ///
   void eraseFromParent() override;
 
-
   /// Get the underlying elements of the Function... the basic block list is
   /// empty for external functions.
   ///
@@ -429,13 +448,13 @@ public:
     CheckLazyArguments();
     return ArgumentList;
   }
-  static iplist<Argument> Function::*getSublistAccess(Argument*) {
+  static ArgumentListType Function::*getSublistAccess(Argument*) {
     return &Function::ArgumentList;
   }
 
   const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
         BasicBlockListType &getBasicBlockList()       { return BasicBlocks; }
-  static iplist<BasicBlock> Function::*getSublistAccess(BasicBlock*) {
+  static BasicBlockListType Function::*getSublistAccess(BasicBlock*) {
     return &Function::BasicBlocks;
   }
 
@@ -450,7 +469,6 @@ public:
   inline       ValueSymbolTable &getValueSymbolTable()       { return *SymTab; }
   inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
 
-
   //===--------------------------------------------------------------------===//
   // BasicBlock iterator forwarding functions
   //
@@ -487,11 +505,11 @@ public:
   }
 
   iterator_range<arg_iterator> args() {
-    return iterator_range<arg_iterator>(arg_begin(), arg_end());
+    return make_range(arg_begin(), arg_end());
   }
 
   iterator_range<const_arg_iterator> args() const {
-    return iterator_range<const_arg_iterator>(arg_begin(), arg_end());
+    return make_range(arg_begin(), arg_end());
   }
 
 /// @}
@@ -499,24 +517,33 @@ public:
   size_t arg_size() const;
   bool arg_empty() const;
 
+  /// \brief Check whether this function has a personality function.
+  bool hasPersonalityFn() const {
+    return getSubclassDataFromValue() & (1<<3);
+  }
+
+  /// \brief Get the personality function associated with this function.
+  Constant *getPersonalityFn() const;
+  void setPersonalityFn(Constant *Fn);
+
+  /// \brief Check whether this function has prefix data.
   bool hasPrefixData() const {
     return getSubclassDataFromValue() & (1<<1);
   }
 
+  /// \brief Get the prefix data associated with this function.
   Constant *getPrefixData() const;
   void setPrefixData(Constant *PrefixData);
 
+  /// \brief Check whether this function has prologue data.
   bool hasPrologueData() const {
     return getSubclassDataFromValue() & (1<<2);
   }
 
+  /// \brief Get the prologue data associated with this function.
   Constant *getPrologueData() const;
   void setPrologueData(Constant *PrologueData);
 
-  /// Print the function to an output stream with an optional
-  /// AssemblyAnnotationWriter.
-  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr) const;
-
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
   /// program, displaying the CFG of the current function with the code for each
@@ -596,12 +623,27 @@ public:
   /// Drop all metadata from \c this not included in \c KnownIDs.
   void dropUnknownMetadata(ArrayRef<unsigned> KnownIDs);
 
+  /// \brief Set the attached subprogram.
+  ///
+  /// Calls \a setMetadata() with \a LLVMContext::MD_dbg.
+  void setSubprogram(DISubprogram *SP);
+
+  /// \brief Get the attached subprogram.
+  ///
+  /// Calls \a getMetadata() with \a LLVMContext::MD_dbg and casts the result
+  /// to \a DISubprogram.
+  DISubprogram *getSubprogram() const;
+
 private:
+  void allocHungoffUselist();
+  template<int Idx> void setHungoffOperand(Constant *C);
+
   // Shadow Value::setValueSubclassData with a private forwarding method so that
   // subclasses cannot accidentally use it.
   void setValueSubclassData(unsigned short D) {
     Value::setValueSubclassData(D);
   }
+  void setValueSubclassDataBit(unsigned Bit, bool On);
 
   bool hasMetadataHashEntry() const {
     return getGlobalObjectSubClassData() & HasMetadataHashEntryBit;
@@ -613,18 +655,8 @@ private:
   void clearMetadata();
 };
 
-inline ValueSymbolTable *
-ilist_traits<BasicBlock>::getSymTab(Function *F) {
-  return F ? &F->getValueSymbolTable() : nullptr;
-}
-
-inline ValueSymbolTable *
-ilist_traits<Argument>::getSymTab(Function *F) {
-  return F ? &F->getValueSymbolTable() : nullptr;
-}
-
 template <>
-struct OperandTraits<Function> : public OptionalOperandTraits<Function> {};
+struct OperandTraits<Function> : public HungoffOperandTraits<3> {};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
 
diff --git a/include/llvm/IR/FunctionInfo.h b/include/llvm/IR/FunctionInfo.h
new file mode 100644
index 000000000000..eba088a61bc0
--- /dev/null
+++ b/include/llvm/IR/FunctionInfo.h
@@ -0,0 +1,241 @@
+//===-- llvm/FunctionInfo.h - Function Info Index ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// FunctionInfo.h This file contains the declarations the classes that hold
+///  the function info index and summary.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_FUNCTIONINFO_H
+#define LLVM_IR_FUNCTIONINFO_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// \brief Function summary information to aid decisions and implementation of
+/// importing.
+///
+/// This is a separate class from FunctionInfo to enable lazy reading of this
+/// function summary information from the combined index file during imporing.
+class FunctionSummary {
+private:
+  /// \brief Path of module containing function IR, used to locate module when
+  /// importing this function.
+  ///
+  /// This is only used during parsing of the combined function index, or when
+  /// parsing the per-module index for creation of the combined function index,
+  /// not during writing of the per-module index which doesn't contain a
+  /// module path string table.
+  StringRef ModulePath;
+
+  /// \brief Used to flag functions that have local linkage types and need to
+  /// have module identifier appended before placing into the combined
+  /// index, to disambiguate from other functions with the same name.
+  ///
+  /// This is only used in the per-module function index, as it is consumed
+  /// while creating the combined index.
+  bool IsLocalFunction;
+
+  // The rest of the information is used to help decide whether importing
+  // is likely to be profitable.
+  // Other information will be added as the importing is tuned, such
+  // as hotness (when profile available), and other function characteristics.
+
+  /// Number of instructions (ignoring debug instructions, e.g.) computed
+  /// during the initial compile step when the function index is first built.
+  unsigned InstCount;
+
+public:
+  /// Construct a summary object from summary data expected for all
+  /// summary records.
+  FunctionSummary(unsigned NumInsts) : InstCount(NumInsts) {}
+
+  /// Set the path to the module containing this function, for use in
+  /// the combined index.
+  void setModulePath(StringRef ModPath) { ModulePath = ModPath; }
+
+  /// Get the path to the module containing this function.
+  StringRef modulePath() const { return ModulePath; }
+
+  /// Record whether this is a local function in the per-module index.
+  void setLocalFunction(bool IsLocal) { IsLocalFunction = IsLocal; }
+
+  /// Check whether this was a local function, for use in creating
+  /// the combined index.
+  bool isLocalFunction() const { return IsLocalFunction; }
+
+  /// Get the instruction count recorded for this function.
+  unsigned instCount() const { return InstCount; }
+};
+
+/// \brief Class to hold pointer to function summary and information required
+/// for parsing it.
+///
+/// For the per-module index, this holds the bitcode offset
+/// of the corresponding function block. For the combined index,
+/// after parsing of the \a ValueSymbolTable, this initially
+/// holds the offset of the corresponding function summary bitcode
+/// record. After parsing the associated summary information from the summary
+/// block the \a FunctionSummary is populated and stored here.
+class FunctionInfo {
+private:
+  /// Function summary information used to help make ThinLTO importing
+  /// decisions.
+  std::unique_ptr<FunctionSummary> Summary;
+
+  /// \brief The bitcode offset corresponding to either the associated
+  /// function's function body record, or its function summary record,
+  /// depending on whether this is a per-module or combined index.
+  ///
+  /// This bitcode offset is written to or read from the associated
+  /// \a ValueSymbolTable entry for the function.
+  /// For the per-module index this holds the bitcode offset of the
+  /// function's body record  within bitcode module block in its module,
+  /// which is used during lazy function parsing or ThinLTO importing.
+  /// For the combined index this holds the offset of the corresponding
+  /// function summary record, to enable associating the combined index
+  /// VST records with the summary records.
+  uint64_t BitcodeIndex;
+
+public:
+  /// Constructor used during parsing of VST entries.
+  FunctionInfo(uint64_t FuncOffset)
+      : Summary(nullptr), BitcodeIndex(FuncOffset) {}
+
+  /// Constructor used for per-module index bitcode writing.
+  FunctionInfo(uint64_t FuncOffset,
+               std::unique_ptr<FunctionSummary> FuncSummary)
+      : Summary(std::move(FuncSummary)), BitcodeIndex(FuncOffset) {}
+
+  /// Record the function summary information parsed out of the function
+  /// summary block during parsing or combined index creation.
+  void setFunctionSummary(std::unique_ptr<FunctionSummary> FuncSummary) {
+    Summary = std::move(FuncSummary);
+  }
+
+  /// Get the function summary recorded for this function.
+  FunctionSummary *functionSummary() const { return Summary.get(); }
+
+  /// Get the bitcode index recorded for this function, depending on
+  /// the index type.
+  uint64_t bitcodeIndex() const { return BitcodeIndex; }
+
+  /// Record the bitcode index for this function, depending on
+  /// the index type.
+  void setBitcodeIndex(uint64_t FuncOffset) { BitcodeIndex = FuncOffset; }
+};
+
+/// List of function info structures for a particular function name held
+/// in the FunctionMap. Requires a vector in the case of multiple
+/// COMDAT functions of the same name.
+typedef std::vector<std::unique_ptr<FunctionInfo>> FunctionInfoList;
+
+/// Map from function name to corresponding function info structures.
+typedef StringMap<FunctionInfoList> FunctionInfoMapTy;
+
+/// Type used for iterating through the function info map.
+typedef FunctionInfoMapTy::const_iterator const_funcinfo_iterator;
+typedef FunctionInfoMapTy::iterator funcinfo_iterator;
+
+/// String table to hold/own module path strings, which additionally holds the
+/// module ID assigned to each module during the plugin step. The StringMap
+/// makes a copy of and owns inserted strings.
+typedef StringMap<uint64_t> ModulePathStringTableTy;
+
+/// Class to hold module path string table and function map,
+/// and encapsulate methods for operating on them.
+class FunctionInfoIndex {
+private:
+  /// Map from function name to list of function information instances
+  /// for functions of that name (may be duplicates in the COMDAT case, e.g.).
+  FunctionInfoMapTy FunctionMap;
+
+  /// Holds strings for combined index, mapping to the corresponding module ID.
+  ModulePathStringTableTy ModulePathStringTable;
+
+public:
+  FunctionInfoIndex() = default;
+
+  // Disable the copy constructor and assignment operators, so
+  // no unexpected copying/moving occurs.
+  FunctionInfoIndex(const FunctionInfoIndex &) = delete;
+  void operator=(const FunctionInfoIndex &) = delete;
+
+  funcinfo_iterator begin() { return FunctionMap.begin(); }
+  const_funcinfo_iterator begin() const { return FunctionMap.begin(); }
+  funcinfo_iterator end() { return FunctionMap.end(); }
+  const_funcinfo_iterator end() const { return FunctionMap.end(); }
+
+  /// Get the list of function info objects for a given function.
+  const FunctionInfoList &getFunctionInfoList(StringRef FuncName) {
+    return FunctionMap[FuncName];
+  }
+
+  /// Get the list of function info objects for a given function.
+  const const_funcinfo_iterator findFunctionInfoList(StringRef FuncName) const {
+    return FunctionMap.find(FuncName);
+  }
+
+  /// Add a function info for a function of the given name.
+  void addFunctionInfo(StringRef FuncName, std::unique_ptr<FunctionInfo> Info) {
+    FunctionMap[FuncName].push_back(std::move(Info));
+  }
+
+  /// Iterator to allow writer to walk through table during emission.
+  iterator_range<StringMap<uint64_t>::const_iterator>
+  modPathStringEntries() const {
+    return llvm::make_range(ModulePathStringTable.begin(),
+                            ModulePathStringTable.end());
+  }
+
+  /// Get the module ID recorded for the given module path.
+  uint64_t getModuleId(const StringRef ModPath) const {
+    return ModulePathStringTable.lookup(ModPath);
+  }
+
+  /// Add the given per-module index into this function index/summary,
+  /// assigning it the given module ID. Each module merged in should have
+  /// a unique ID, necessary for consistent renaming of promoted
+  /// static (local) variables.
+  void mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
+                 uint64_t NextModuleId);
+
+  /// Convenience method for creating a promoted global name
+  /// for the given value name of a local, and its original module's ID.
+  static std::string getGlobalNameForLocal(StringRef Name, uint64_t ModId) {
+    SmallString<256> NewName(Name);
+    NewName += ".llvm.";
+    raw_svector_ostream(NewName) << ModId;
+    return NewName.str();
+  }
+
+  /// Add a new module path, mapped to the given module Id, and return StringRef
+  /// owned by string table map.
+  StringRef addModulePath(StringRef ModPath, uint64_t ModId) {
+    return ModulePathStringTable.insert(std::make_pair(ModPath, ModId))
+        .first->first();
+  }
+
+  /// Check if the given Module has any functions available for exporting
+  /// in the index. We consider any module present in the ModulePathStringTable
+  /// to have exported functions.
+  bool hasExportedFunctions(const Module &M) const {
+    return ModulePathStringTable.count(M.getModuleIdentifier());
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IR/GVMaterializer.h b/include/llvm/IR/GVMaterializer.h
index 1d6c9157f0b8..6cb593c7a3da 100644
--- a/include/llvm/IR/GVMaterializer.h
+++ b/include/llvm/IR/GVMaterializer.h
@@ -18,12 +18,14 @@
 #ifndef LLVM_IR_GVMATERIALIZER_H
 #define LLVM_IR_GVMATERIALIZER_H
 
+#include "llvm/ADT/DenseMap.h"
 #include <system_error>
 #include <vector>
 
 namespace llvm {
 class Function;
 class GlobalValue;
+class Metadata;
 class Module;
 class StructType;
 
@@ -34,28 +36,25 @@ protected:
 public:
   virtual ~GVMaterializer();
 
-  /// True if GV has been materialized and can be dematerialized back to
-  /// whatever backing store this GVMaterializer uses.
-  virtual bool isDematerializable(const GlobalValue *GV) const = 0;
-
   /// Make sure the given GlobalValue is fully read.
   ///
   virtual std::error_code materialize(GlobalValue *GV) = 0;
 
-  /// If the given GlobalValue is read in, and if the GVMaterializer supports
-  /// it, release the memory for the GV, and set it up to be materialized
-  /// lazily. If the Materializer doesn't support this capability, this method
-  /// is a noop.
-  ///
-  virtual void dematerialize(GlobalValue *) {}
-
   /// Make sure the entire Module has been completely read.
   ///
-  virtual std::error_code materializeModule(Module *M) = 0;
+  virtual std::error_code materializeModule() = 0;
 
   virtual std::error_code materializeMetadata() = 0;
   virtual void setStripDebugInfo() = 0;
 
+  /// Client should define this interface if the mapping between metadata
+  /// values and value ids needs to be preserved, e.g. across materializer
+  /// instantiations. If OnlyTempMD is true, only those that have remained
+  /// temporary metadata are recorded in the map.
+  virtual void
+  saveMetadataList(DenseMap<const Metadata *, unsigned> &MetadataToIDs,
+                   bool OnlyTempMD) {}
+
   virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
 };
 
diff --git a/include/llvm/IR/GetElementPtrTypeIterator.h b/include/llvm/IR/GetElementPtrTypeIterator.h
index 6bba0ae29e98..7cb13fa33aa6 100644
--- a/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -78,7 +78,7 @@ namespace llvm {
     // current type directly.
     Type *operator->() const { return operator*(); }
 
-    Value *getOperand() const { return *OpIt; }
+    Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
 
     generic_gep_type_iterator& operator++() {   // Preincrement
       if (CurTy.getInt()) {
diff --git a/include/llvm/IR/GlobalAlias.h b/include/llvm/IR/GlobalAlias.h
index ce73b7af8ca1..b0772143309f 100644
--- a/include/llvm/IR/GlobalAlias.h
+++ b/include/llvm/IR/GlobalAlias.h
@@ -23,18 +23,17 @@
 namespace llvm {
 
 class Module;
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
+template <typename ValueSubClass> class SymbolTableListTraits;
 
 class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
-  friend class SymbolTableListTraits<GlobalAlias, Module>;
+  friend class SymbolTableListTraits<GlobalAlias>;
   void operator=(const GlobalAlias &) = delete;
   GlobalAlias(const GlobalAlias &) = delete;
 
   void setParent(Module *parent);
 
-  GlobalAlias(PointerType *Ty, LinkageTypes Linkage, const Twine &Name,
-              Constant *Aliasee, Module *Parent);
+  GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
+              const Twine &Name, Constant *Aliasee, Module *Parent);
 
 public:
   // allocate space for exactly one operand
@@ -44,17 +43,19 @@ public:
 
   /// If a parent module is specified, the alias is automatically inserted into
   /// the end of the specified module's alias list.
-  static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
-                             const Twine &Name, Constant *Aliasee,
-                             Module *Parent);
+  static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+                             LinkageTypes Linkage, const Twine &Name,
+                             Constant *Aliasee, Module *Parent);
 
   // Without the Aliasee.
-  static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
-                             const Twine &Name, Module *Parent);
+  static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+                             LinkageTypes Linkage, const Twine &Name,
+                             Module *Parent);
 
   // The module is taken from the Aliasee.
-  static GlobalAlias *create(PointerType *Ty, LinkageTypes Linkage,
-                             const Twine &Name, GlobalValue *Aliasee);
+  static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
+                             LinkageTypes Linkage, const Twine &Name,
+                             GlobalValue *Aliasee);
 
   // Type, Parent and AddressSpace taken from the Aliasee.
   static GlobalAlias *create(LinkageTypes Linkage, const Twine &Name,
diff --git a/include/llvm/IR/GlobalObject.h b/include/llvm/IR/GlobalObject.h
index f0552410b61d..ee111a046d73 100644
--- a/include/llvm/IR/GlobalObject.h
+++ b/include/llvm/IR/GlobalObject.h
@@ -27,9 +27,11 @@ class GlobalObject : public GlobalValue {
   GlobalObject(const GlobalObject &) = delete;
 
 protected:
-  GlobalObject(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
-               LinkageTypes Linkage, const Twine &Name)
-      : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name), ObjComdat(nullptr) {
+  GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+               LinkageTypes Linkage, const Twine &Name,
+               unsigned AddressSpace = 0)
+      : GlobalValue(Ty, VTy, Ops, NumOps, Linkage, Name, AddressSpace),
+        ObjComdat(nullptr) {
     setGlobalValueSubClassData(0);
   }
 
diff --git a/include/llvm/IR/GlobalValue.h b/include/llvm/IR/GlobalValue.h
index 2961369a7327..4fa4e7daeab0 100644
--- a/include/llvm/IR/GlobalValue.h
+++ b/include/llvm/IR/GlobalValue.h
@@ -65,15 +65,16 @@ public:
   };
 
 protected:
-  GlobalValue(PointerType *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
-              LinkageTypes Linkage, const Twine &Name)
-      : Constant(Ty, VTy, Ops, NumOps), Linkage(Linkage),
-        Visibility(DefaultVisibility), UnnamedAddr(0),
-        DllStorageClass(DefaultStorageClass),
+  GlobalValue(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
+              LinkageTypes Linkage, const Twine &Name, unsigned AddressSpace)
+      : Constant(PointerType::get(Ty, AddressSpace), VTy, Ops, NumOps),
+        ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility),
+        UnnamedAddr(0), DllStorageClass(DefaultStorageClass),
         ThreadLocal(NotThreadLocal), IntID((Intrinsic::ID)0U), Parent(nullptr) {
     setName(Name);
   }
 
+  Type *ValueType;
   // Note: VC++ treats enums as signed, so an extra bit is required to prevent
   // Linkage and Visibility from turning into negative values.
   LinkageTypes Linkage : 5;   // The linkage of this global
@@ -184,7 +185,7 @@ public:
   /// Global values are always pointers.
   PointerType *getType() const { return cast<PointerType>(User::getType()); }
 
-  Type *getValueType() const { return getType()->getElementType(); }
+  Type *getValueType() const { return ValueType; }
 
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
     return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
@@ -236,7 +237,8 @@ public:
   /// Whether the definition of this global may be discarded if it is not used
   /// in its compilation unit.
   static bool isDiscardableIfUnused(LinkageTypes Linkage) {
-    return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage);
+    return isLinkOnceLinkage(Linkage) || isLocalLinkage(Linkage) ||
+           isAvailableExternallyLinkage(Linkage);
   }
 
   /// Whether the definition of this global may be replaced by something
@@ -320,21 +322,11 @@ public:
   /// function has been read in yet or not.
   bool isMaterializable() const;
 
-  /// Returns true if this function was loaded from a GVMaterializer that's
-  /// still attached to its Module and that knows how to dematerialize the
-  /// function.
-  bool isDematerializable() const;
-
   /// Make sure this GlobalValue is fully read. If the module is corrupt, this
   /// returns true and fills in the optional string with information about the
   /// problem.  If successful, this returns false.
   std::error_code materialize();
 
-  /// If this GlobalValue is read in, and if the GVMaterializer supports it,
-  /// release the memory for the function, and set it up to be materialized
-  /// lazily. If !isDematerializable(), this method is a noop.
-  void dematerialize();
-
 /// @}
 
   /// Return true if the primary definition of this global value is outside of
diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h
index a0159830ba3b..342bdc01bfbd 100644
--- a/include/llvm/IR/GlobalVariable.h
+++ b/include/llvm/IR/GlobalVariable.h
@@ -29,11 +29,10 @@ namespace llvm {
 
 class Module;
 class Constant;
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
+template <typename ValueSubClass> class SymbolTableListTraits;
 
 class GlobalVariable : public GlobalObject, public ilist_node<GlobalVariable> {
-  friend class SymbolTableListTraits<GlobalVariable, Module>;
+  friend class SymbolTableListTraits<GlobalVariable>;
   void *operator new(size_t, unsigned) = delete;
   void operator=(const GlobalVariable &) = delete;
   GlobalVariable(const GlobalVariable &) = delete;
@@ -106,18 +105,13 @@ public:
   /// hasUniqueInitializer - Whether the global variable has an initializer, and
   /// any changes made to the initializer will turn up in the final executable.
   inline bool hasUniqueInitializer() const {
-    return hasInitializer() &&
-      // It's not safe to modify initializers of global variables with weak
-      // linkage, because the linker might choose to discard the initializer and
-      // use the initializer from another instance of the global variable
-      // instead. It is wrong to modify the initializer of a global variable
-      // with *_odr linkage because then different instances of the global may
-      // have different initializers, breaking the One Definition Rule.
-      !isWeakForLinker() &&
-      // It is not safe to modify initializers of global variables with the
-      // external_initializer marker since the value may be changed at runtime
-      // before C++ initializers are evaluated.
-      !isExternallyInitialized();
+    return
+        // We need to be sure this is the definition that will actually be used
+        isStrongDefinitionForLinker() &&
+        // It is not safe to modify initializers of global variables with the
+        // external_initializer marker since the value may be changed at runtime
+        // before C++ initializers are evaluated.
+        !isExternallyInitialized();
   }
 
   /// getInitializer - Return the initializer for this global variable.  It is
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index 6c67c79b6c0e..7fe04f2a091a 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -24,6 +24,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
@@ -51,6 +52,7 @@ protected:
 /// \brief Common base class shared among various IRBuilders.
 class IRBuilderBase {
   DebugLoc CurDbgLocation;
+
 protected:
   BasicBlock *BB;
   BasicBlock::iterator InsertPt;
@@ -58,8 +60,8 @@ protected:
 
   MDNode *DefaultFPMathTag;
   FastMathFlags FMF;
-public:
 
+public:
   IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr)
     : Context(context), DefaultFPMathTag(FPMathTag), FMF() {
     ClearInsertionPoint();
@@ -73,7 +75,7 @@ public:
   /// inserted into a block.
   void ClearInsertionPoint() {
     BB = nullptr;
-    InsertPt = nullptr;
+    InsertPt.reset(nullptr);
   }
 
   BasicBlock *GetInsertBlock() const { return BB; }
@@ -91,8 +93,8 @@ public:
   /// the specified instruction.
   void SetInsertPoint(Instruction *I) {
     BB = I->getParent();
-    InsertPt = I;
-    assert(I != BB->end() && "Can't read debug loc from end()");
+    InsertPt = I->getIterator();
+    assert(InsertPt != BB->end() && "Can't read debug loc from end()");
     SetCurrentDebugLocation(I->getDebugLoc());
   }
 
@@ -313,10 +315,8 @@ public:
   }
 
   /// \brief Fetch the type representing a 128-bit integer.
-  IntegerType *getInt128Ty() {
-    return Type::getInt128Ty(Context);
-  }
-  
+  IntegerType *getInt128Ty() { return Type::getInt128Ty(Context); }
+
   /// \brief Fetch the type representing an N-bit integer.
   IntegerType *getIntNTy(unsigned N) {
     return Type::getIntNTy(Context, N);
@@ -426,7 +426,7 @@ public:
 
   /// \brief Create a call to Masked Load intrinsic
   CallInst *CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask,
-                             Value *PassThru = 0, const Twine &Name = "");
+                             Value *PassThru = nullptr, const Twine &Name = "");
 
   /// \brief Create a call to Masked Store intrinsic
   CallInst *CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align,
@@ -445,6 +445,16 @@ public:
                                    ArrayRef<Value *> GCArgs,
                                    const Twine &Name = "");
 
+  /// \brief Create a call to the experimental.gc.statepoint intrinsic to
+  /// start a new statepoint sequence.
+  CallInst *CreateGCStatepointCall(uint64_t ID, uint32_t NumPatchBytes,
+                                   Value *ActualCallee, uint32_t Flags,
+                                   ArrayRef<Use> CallArgs,
+                                   ArrayRef<Use> TransitionArgs,
+                                   ArrayRef<Use> DeoptArgs,
+                                   ArrayRef<Value *> GCArgs,
+                                   const Twine &Name = "");
+
   // \brief Conveninence function for the common case when CallArgs are filled
   // in using makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be
   // .get()'ed to get the Value pointer.
@@ -463,6 +473,15 @@ public:
                            ArrayRef<Value *> DeoptArgs,
                            ArrayRef<Value *> GCArgs, const Twine &Name = "");
 
+  /// brief Create an invoke to the experimental.gc.statepoint intrinsic to
+  /// start a new statepoint sequence.
+  InvokeInst *CreateGCStatepointInvoke(
+      uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+      BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
+      ArrayRef<Use> InvokeArgs, ArrayRef<Use> TransitionArgs,
+      ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs,
+      const Twine &Name = "");
+
   // Conveninence function for the common case when CallArgs are filled in using
   // makeArrayRef(CS.arg_begin(), CS.arg_end()); Use needs to be .get()'ed to
   // get the Value *.
@@ -516,11 +535,11 @@ template<bool preserveNames = true, typename T = ConstantFolder,
          typename Inserter = IRBuilderDefaultInserter<preserveNames> >
 class IRBuilder : public IRBuilderBase, public Inserter {
   T Folder;
+
 public:
-  IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter(),
+  IRBuilder(LLVMContext &C, const T &F, Inserter I = Inserter(),
             MDNode *FPMathTag = nullptr)
-    : IRBuilderBase(C, FPMathTag), Inserter(I), Folder(F) {
-  }
+      : IRBuilderBase(C, FPMathTag), Inserter(std::move(I)), Folder(F) {}
 
   explicit IRBuilder(LLVMContext &C, MDNode *FPMathTag = nullptr)
     : IRBuilderBase(C, FPMathTag), Folder() {
@@ -578,12 +597,15 @@ public:
   //===--------------------------------------------------------------------===//
 
 private:
-  /// \brief Helper to add branch weight metadata onto an instruction.
+  /// \brief Helper to add branch weight and unpredictable metadata onto an
+  /// instruction.
   /// \returns The annotated instruction.
   template <typename InstTy>
-  InstTy *addBranchWeights(InstTy *I, MDNode *Weights) {
+  InstTy *addBranchMetadata(InstTy *I, MDNode *Weights, MDNode *Unpredictable) {
     if (Weights)
       I->setMetadata(LLVMContext::MD_prof, Weights);
+    if (Unpredictable)
+      I->setMetadata(LLVMContext::MD_unpredictable, Unpredictable);
     return I;
   }
 
@@ -620,18 +642,20 @@ public:
   /// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
   /// instruction.
   BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
-                           MDNode *BranchWeights = nullptr) {
-    return Insert(addBranchWeights(BranchInst::Create(True, False, Cond),
-                                   BranchWeights));
+                           MDNode *BranchWeights = nullptr,
+                           MDNode *Unpredictable = nullptr) {
+    return Insert(addBranchMetadata(BranchInst::Create(True, False, Cond),
+                                    BranchWeights, Unpredictable));
   }
 
   /// \brief Create a switch instruction with the specified value, default dest,
   /// and with a hint for the number of cases that will be added (for efficient
   /// allocation).
   SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10,
-                           MDNode *BranchWeights = nullptr) {
-    return Insert(addBranchWeights(SwitchInst::Create(V, Dest, NumCases),
-                                   BranchWeights));
+                           MDNode *BranchWeights = nullptr,
+                           MDNode *Unpredictable = nullptr) {
+    return Insert(addBranchMetadata(SwitchInst::Create(V, Dest, NumCases),
+                                    BranchWeights, Unpredictable));
   }
 
   /// \brief Create an indirect branch instruction with the specified address
@@ -667,11 +691,45 @@ public:
     return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
                   Name);
   }
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, ArrayRef<Value *> Args,
+                           ArrayRef<OperandBundleDef> OpBundles,
+                           const Twine &Name = "") {
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     OpBundles), Name);
+  }
 
   ResumeInst *CreateResume(Value *Exn) {
     return Insert(ResumeInst::Create(Exn));
   }
 
+  CleanupReturnInst *CreateCleanupRet(CleanupPadInst *CleanupPad,
+                                      BasicBlock *UnwindBB = nullptr) {
+    return Insert(CleanupReturnInst::Create(CleanupPad, UnwindBB));
+  }
+
+  CatchSwitchInst *CreateCatchSwitch(Value *ParentPad, BasicBlock *UnwindBB,
+                                     unsigned NumHandlers,
+                                     const Twine &Name = "") {
+    return Insert(CatchSwitchInst::Create(ParentPad, UnwindBB, NumHandlers),
+                  Name);
+  }
+
+  CatchPadInst *CreateCatchPad(Value *ParentPad, ArrayRef<Value *> Args,
+                               const Twine &Name = "") {
+    return Insert(CatchPadInst::Create(ParentPad, Args), Name);
+  }
+
+  CleanupPadInst *CreateCleanupPad(Value *ParentPad,
+                                   ArrayRef<Value *> Args = None,
+                                   const Twine &Name = "") {
+    return Insert(CleanupPadInst::Create(ParentPad, Args), Name);
+  }
+
+  CatchReturnInst *CreateCatchRet(CatchPadInst *CatchPad, BasicBlock *BB) {
+    return Insert(CatchReturnInst::Create(CatchPad, BB));
+  }
+
   UnreachableInst *CreateUnreachable() {
     return Insert(new UnreachableInst(Context));
   }
@@ -700,6 +758,7 @@ private:
     I->setFastMathFlags(FMF);
     return I;
   }
+
 public:
   Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
                    bool HasNUW = false, bool HasNSW = false) {
@@ -1326,18 +1385,22 @@ public:
                                 const Twine &Name = "") {
     if (V->getType() == DestTy)
       return V;
-    if (V->getType()->isPointerTy() && DestTy->isIntegerTy())
+    if (V->getType()->getScalarType()->isPointerTy() &&
+        DestTy->getScalarType()->isIntegerTy())
       return CreatePtrToInt(V, DestTy, Name);
-    if (V->getType()->isIntegerTy() && DestTy->isPointerTy())
+    if (V->getType()->getScalarType()->isIntegerTy() &&
+        DestTy->getScalarType()->isPointerTy())
       return CreateIntToPtr(V, DestTy, Name);
 
     return CreateBitCast(V, DestTy, Name);
   }
+
 private:
   // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
   // compile time error, instead of converting the string to bool for the
   // isSigned parameter.
   Value *CreateIntCast(Value *, Type *, const char *) = delete;
+
 public:
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
@@ -1465,18 +1528,30 @@ public:
   }
 
   CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args = None,
+                       ArrayRef<OperandBundleDef> OpBundles = None,
                        const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, Args), Name);
+    return Insert(CallInst::Create(Callee, Args, OpBundles), Name);
+  }
+
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
+                       const Twine &Name, MDNode *FPMathTag = nullptr) {
+    PointerType *PTy = cast<PointerType>(Callee->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    return CreateCall(FTy, Callee, Args, Name, FPMathTag);
   }
 
   CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
-                       ArrayRef<Value *> Args, const Twine &Name = "") {
-    return Insert(CallInst::Create(FTy, Callee, Args), Name);
+                       ArrayRef<Value *> Args, const Twine &Name = "",
+                       MDNode *FPMathTag = nullptr) {
+    CallInst *CI = CallInst::Create(FTy, Callee, Args);
+    if (isa<FPMathOperator>(CI))
+      CI = cast<CallInst>(AddFPMathAttributes(CI, FPMathTag, FMF));
+    return Insert(CI, Name);
   }
 
   CallInst *CreateCall(Function *Callee, ArrayRef<Value *> Args,
-                       const Twine &Name = "") {
-    return CreateCall(Callee->getFunctionType(), Callee, Args, Name);
+                       const Twine &Name = "", MDNode *FPMathTag = nullptr) {
+    return CreateCall(Callee->getFunctionType(), Callee, Args, Name, FPMathTag);
   }
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
@@ -1594,6 +1669,32 @@ public:
                            Name);
   }
 
+  /// \brief Create an invariant.group.barrier intrinsic call, that stops
+  /// optimizer to propagate equality using invariant.group metadata.
+  /// If Ptr type is different from i8*, it's casted to i8* before call
+  /// and casted back to Ptr type after call.
+  Value *CreateInvariantGroupBarrier(Value *Ptr) {
+    Module *M = BB->getParent()->getParent();
+    Function *FnInvariantGroupBarrier = Intrinsic::getDeclaration(M,
+            Intrinsic::invariant_group_barrier);
+
+    Type *ArgumentAndReturnType = FnInvariantGroupBarrier->getReturnType();
+    assert(ArgumentAndReturnType ==
+        FnInvariantGroupBarrier->getFunctionType()->getParamType(0) &&
+        "InvariantGroupBarrier should take and return the same type");
+    Type *PtrType = Ptr->getType();
+
+    bool PtrTypeConversionNeeded = PtrType != ArgumentAndReturnType;
+    if (PtrTypeConversionNeeded)
+      Ptr = CreateBitCast(Ptr, ArgumentAndReturnType);
+
+    CallInst *Fn = CreateCall(FnInvariantGroupBarrier, {Ptr});
+
+    if (PtrTypeConversionNeeded)
+      return CreateBitCast(Fn, PtrType);
+    return Fn;
+  }
+
   /// \brief Return a vector value that contains \arg V broadcasted to \p
   /// NumElts elements.
   Value *CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name = "") {
@@ -1676,6 +1777,6 @@ public:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>, LLVMBuilderRef)
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_IRBUILDER_H
diff --git a/include/llvm/IR/IRPrintingPasses.h b/include/llvm/IR/IRPrintingPasses.h
index 5f1d56f7e831..88b18e826daf 100644
--- a/include/llvm/IR/IRPrintingPasses.h
+++ b/include/llvm/IR/IRPrintingPasses.h
@@ -47,6 +47,12 @@ FunctionPass *createPrintFunctionPass(raw_ostream &OS,
 BasicBlockPass *createPrintBasicBlockPass(raw_ostream &OS,
                                           const std::string &Banner = "");
 
+/// Print out a name of an LLVM value without any prefixes.
+///
+/// The name is surrounded with ""'s and escaped if it has any special or
+/// non-printable characters in it.
+void printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name);
+
 /// \brief Pass for printing a Module as LLVM's text IR assembly.
 ///
 /// Note: This pass is for use with the new pass manager. Use the create...Pass
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 08b51021116c..d2e9e48539ce 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -44,11 +44,12 @@ private:
   void operator=(const InlineAsm&) = delete;
 
   std::string AsmString, Constraints;
+  FunctionType *FTy;
   bool HasSideEffects;
   bool IsAlignStack;
   AsmDialect Dialect;
 
-  InlineAsm(PointerType *Ty, const std::string &AsmString,
+  InlineAsm(FunctionType *Ty, const std::string &AsmString,
             const std::string &Constraints, bool hasSideEffects,
             bool isAlignStack, AsmDialect asmDialect);
   ~InlineAsm() override;
@@ -56,15 +57,15 @@ private:
   /// When the ConstantUniqueMap merges two types and makes two InlineAsms
   /// identical, it destroys one of them with this method.
   void destroyConstant();
-public:
 
+public:
   /// InlineAsm::get - Return the specified uniqued inline asm string.
   ///
   static InlineAsm *get(FunctionType *Ty, StringRef AsmString,
                         StringRef Constraints, bool hasSideEffects,
                         bool isAlignStack = false,
                         AsmDialect asmDialect = AD_ATT);
-  
+
   bool hasSideEffects() const { return HasSideEffects; }
   bool isAlignStack() const { return IsAlignStack; }
   AsmDialect getDialect() const { return Dialect; }
@@ -74,11 +75,11 @@ public:
   PointerType *getType() const {
     return reinterpret_cast<PointerType*>(Value::getType());
   }
-  
+
   /// getFunctionType - InlineAsm's are always pointers to functions.
   ///
   FunctionType *getFunctionType() const;
-  
+
   const std::string &getAsmString() const { return AsmString; }
   const std::string &getConstraintString() const { return Constraints; }
 
@@ -88,15 +89,15 @@ public:
   ///
   static bool Verify(FunctionType *Ty, StringRef Constraints);
 
-  // Constraint String Parsing 
+  // Constraint String Parsing
   enum ConstraintPrefix {
     isInput,            // 'x'
     isOutput,           // '=x'
     isClobber           // '~x'
   };
-  
+
   typedef std::vector<std::string> ConstraintCodeVector;
-  
+
   struct SubConstraintInfo {
     /// MatchingInput - If this is not -1, this is an output constraint where an
     /// input constraint is required to match it (e.g. "0").  The value is the
@@ -113,80 +114,79 @@ public:
   typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
   struct ConstraintInfo;
   typedef std::vector<ConstraintInfo> ConstraintInfoVector;
-  
+
   struct ConstraintInfo {
     /// Type - The basic type of the constraint: input/output/clobber
     ///
     ConstraintPrefix Type;
-    
+
     /// isEarlyClobber - "&": output operand writes result before inputs are all
     /// read.  This is only ever set for an output operand.
-    bool isEarlyClobber; 
-    
+    bool isEarlyClobber;
+
     /// MatchingInput - If this is not -1, this is an output constraint where an
     /// input constraint is required to match it (e.g. "0").  The value is the
     /// constraint number that matches this one (for example, if this is
     /// constraint #0 and constraint #4 has the value "0", this will be 4).
     signed char MatchingInput;
-    
+
     /// hasMatchingInput - Return true if this is an output constraint that has
     /// a matching input constraint.
     bool hasMatchingInput() const { return MatchingInput != -1; }
-    
+
     /// isCommutative - This is set to true for a constraint that is commutative
     /// with the next operand.
     bool isCommutative;
-    
+
     /// isIndirect - True if this operand is an indirect operand.  This means
     /// that the address of the source or destination is present in the call
     /// instruction, instead of it being returned or passed in explicitly.  This
     /// is represented with a '*' in the asm string.
     bool isIndirect;
-    
+
     /// Code - The constraint code, either the register name (in braces) or the
     /// constraint letter/number.
     ConstraintCodeVector Codes;
-    
+
     /// isMultipleAlternative - '|': has multiple-alternative constraints.
     bool isMultipleAlternative;
-    
+
     /// multipleAlternatives - If there are multiple alternative constraints,
     /// this array will contain them.  Otherwise it will be empty.
     SubConstraintInfoVector multipleAlternatives;
-    
+
     /// The currently selected alternative constraint index.
     unsigned currentAlternativeIndex;
-    
-    ///Default constructor.
+
+    /// Default constructor.
     ConstraintInfo();
-    
+
     /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
     /// fields in this structure.  If the constraint string is not understood,
     /// return true, otherwise return false.
     bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
-               
+
     /// selectAlternative - Point this constraint to the alternative constraint
     /// indicated by the index.
     void selectAlternative(unsigned index);
   };
-  
+
   /// ParseConstraints - Split up the constraint string into the specific
   /// constraints and their prefixes.  If this returns an empty vector, and if
   /// the constraint string itself isn't empty, there was an error parsing.
   static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
-  
-  /// ParseConstraints - Parse the constraints of this inlineasm object, 
+
+  /// ParseConstraints - Parse the constraints of this inlineasm object,
   /// returning them the same way that ParseConstraints(str) does.
   ConstraintInfoVector ParseConstraints() const {
     return ParseConstraints(Constraints);
   }
-  
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::InlineAsmVal;
   }
 
-  
   // These are helper methods for dealing with flags in the INLINEASM SDNode
   // in the backend.
   //
@@ -203,7 +203,7 @@ public:
   //                 code.
   //   Else:
   //     Bit 30-16 - The register class ID to use for the operand.
-  
+
   enum : uint32_t {
     // Fixed operands on an INLINEASM SDNode.
     Op_InputChain = 0,
@@ -264,15 +264,15 @@ public:
 
     Flag_MatchingOperand = 0x80000000
   };
-  
+
   static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
     assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
     assert(Kind >= Kind_RegUse && Kind <= Kind_Mem && "Invalid Kind");
     return Kind | (NumOps << 3);
   }
-  
+
   /// getFlagWordForMatchingOp - Augment an existing flag word returned by
-  /// getFlagWord with information indicating that this input operand is tied 
+  /// getFlagWord with information indicating that this input operand is tied
   /// to a previous output operand.
   static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
                                            unsigned MatchedOperandNo) {
@@ -355,7 +355,6 @@ public:
     RC = High - 1;
     return true;
   }
-
 };
 
 } // End llvm namespace
diff --git a/include/llvm/IR/InstIterator.h b/include/llvm/IR/InstIterator.h
index f3ce6490fb66..1baca21c73af 100644
--- a/include/llvm/IR/InstIterator.h
+++ b/include/llvm/IR/InstIterator.h
@@ -115,19 +115,18 @@ private:
   }
 };
 
-
-typedef InstIterator<iplist<BasicBlock>,
-                     Function::iterator, BasicBlock::iterator,
-                     Instruction> inst_iterator;
-typedef InstIterator<const iplist<BasicBlock>,
-                     Function::const_iterator,
-                     BasicBlock::const_iterator,
+typedef InstIterator<SymbolTableList<BasicBlock>, Function::iterator,
+                     BasicBlock::iterator, Instruction> inst_iterator;
+typedef InstIterator<const SymbolTableList<BasicBlock>,
+                     Function::const_iterator, BasicBlock::const_iterator,
                      const Instruction> const_inst_iterator;
+typedef iterator_range<inst_iterator> inst_range;
+typedef iterator_range<const_inst_iterator> const_inst_range;
 
 inline inst_iterator inst_begin(Function *F) { return inst_iterator(*F); }
 inline inst_iterator inst_end(Function *F)   { return inst_iterator(*F, true); }
-inline iterator_range<inst_iterator> inst_range(Function *F) {
-  return iterator_range<inst_iterator>(inst_begin(F), inst_end(F));
+inline inst_range instructions(Function *F) {
+  return inst_range(inst_begin(F), inst_end(F));
 }
 inline const_inst_iterator inst_begin(const Function *F) {
   return const_inst_iterator(*F);
@@ -135,13 +134,13 @@ inline const_inst_iterator inst_begin(const Function *F) {
 inline const_inst_iterator inst_end(const Function *F) {
   return const_inst_iterator(*F, true);
 }
-inline iterator_range<const_inst_iterator> inst_range(const Function *F) {
-  return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
+inline const_inst_range instructions(const Function *F) {
+  return const_inst_range(inst_begin(F), inst_end(F));
 }
 inline inst_iterator inst_begin(Function &F) { return inst_iterator(F); }
 inline inst_iterator inst_end(Function &F)   { return inst_iterator(F, true); }
-inline iterator_range<inst_iterator> inst_range(Function &F) {
-  return iterator_range<inst_iterator>(inst_begin(F), inst_end(F));
+inline inst_range instructions(Function &F) {
+  return inst_range(inst_begin(F), inst_end(F));
 }
 inline const_inst_iterator inst_begin(const Function &F) {
   return const_inst_iterator(F);
@@ -149,8 +148,8 @@ inline const_inst_iterator inst_begin(const Function &F) {
 inline const_inst_iterator inst_end(const Function &F) {
   return const_inst_iterator(F, true);
 }
-inline iterator_range<const_inst_iterator> inst_range(const Function &F) {
-  return iterator_range<const_inst_iterator>(inst_begin(F), inst_end(F));
+inline const_inst_range instructions(const Function &F) {
+  return const_inst_range(inst_begin(F), inst_end(F));
 }
 
 } // End llvm namespace
diff --git a/include/llvm/IR/InstVisitor.h b/include/llvm/IR/InstVisitor.h
index 581e860b8382..088d3e0fbfa5 100644
--- a/include/llvm/IR/InstVisitor.h
+++ b/include/llvm/IR/InstVisitor.h
@@ -169,6 +169,9 @@ public:
   RetTy visitIndirectBrInst(IndirectBrInst &I)    { DELEGATE(TerminatorInst);}
   RetTy visitResumeInst(ResumeInst &I)            { DELEGATE(TerminatorInst);}
   RetTy visitUnreachableInst(UnreachableInst &I)  { DELEGATE(TerminatorInst);}
+  RetTy visitCleanupReturnInst(CleanupReturnInst &I) { DELEGATE(TerminatorInst);}
+  RetTy visitCatchReturnInst(CatchReturnInst &I)  { DELEGATE(TerminatorInst); }
+  RetTy visitCatchSwitchInst(CatchSwitchInst &I)  { DELEGATE(TerminatorInst);}
   RetTy visitICmpInst(ICmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitFCmpInst(FCmpInst &I)                { DELEGATE(CmpInst);}
   RetTy visitAllocaInst(AllocaInst &I)            { DELEGATE(UnaryInstruction);}
@@ -200,6 +203,9 @@ public:
   RetTy visitExtractValueInst(ExtractValueInst &I){ DELEGATE(UnaryInstruction);}
   RetTy visitInsertValueInst(InsertValueInst &I)  { DELEGATE(Instruction); }
   RetTy visitLandingPadInst(LandingPadInst &I)    { DELEGATE(Instruction); }
+  RetTy visitFuncletPadInst(FuncletPadInst &I) { DELEGATE(Instruction); }
+  RetTy visitCleanupPadInst(CleanupPadInst &I) { DELEGATE(FuncletPadInst); }
+  RetTy visitCatchPadInst(CatchPadInst &I)     { DELEGATE(FuncletPadInst); }
 
   // Handle the special instrinsic instruction classes.
   RetTy visitDbgDeclareInst(DbgDeclareInst &I)    { DELEGATE(DbgInfoIntrinsic);}
diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h
index b791ded0e194..5091bb407833 100644
--- a/include/llvm/IR/InstrTypes.h
+++ b/include/llvm/IR/InstrTypes.h
@@ -16,9 +16,12 @@
 #ifndef LLVM_IR_INSTRTYPES_H
 #define LLVM_IR_INSTRTYPES_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OperandTraits.h"
 
 namespace llvm {
@@ -51,8 +54,8 @@ protected:
   virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
   virtual unsigned getNumSuccessorsV() const = 0;
   virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
-public:
 
+public:
   /// Return the number of successors that this terminator has.
   unsigned getNumSuccessors() const {
     return getNumSuccessorsV();
@@ -75,8 +78,198 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-};
 
+  // \brief Returns true if this terminator relates to exception handling.
+  bool isExceptional() const {
+    switch (getOpcode()) {
+    case Instruction::CatchSwitch:
+    case Instruction::CatchRet:
+    case Instruction::CleanupRet:
+    case Instruction::Invoke:
+    case Instruction::Resume:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  //===--------------------------------------------------------------------===//
+  // succ_iterator definition
+  //===--------------------------------------------------------------------===//
+
+  template <class Term, class BB> // Successor Iterator
+  class SuccIterator : public std::iterator<std::random_access_iterator_tag, BB,
+                                            int, BB *, BB *> {
+    typedef std::iterator<std::random_access_iterator_tag, BB, int, BB *, BB *>
+        super;
+
+  public:
+    typedef typename super::pointer pointer;
+    typedef typename super::reference reference;
+
+  private:
+    Term TermInst;
+    unsigned idx;
+    typedef SuccIterator<Term, BB> Self;
+
+    inline bool index_is_valid(unsigned idx) {
+      return idx < TermInst->getNumSuccessors();
+    }
+
+    /// \brief Proxy object to allow write access in operator[]
+    class SuccessorProxy {
+      Self it;
+
+    public:
+      explicit SuccessorProxy(const Self &it) : it(it) {}
+
+      SuccessorProxy(const SuccessorProxy &) = default;
+
+      SuccessorProxy &operator=(SuccessorProxy r) {
+        *this = reference(r);
+        return *this;
+      }
+
+      SuccessorProxy &operator=(reference r) {
+        it.TermInst->setSuccessor(it.idx, r);
+        return *this;
+      }
+
+      operator reference() const { return *it; }
+    };
+
+  public:
+    // begin iterator
+    explicit inline SuccIterator(Term T) : TermInst(T), idx(0) {}
+    // end iterator
+    inline SuccIterator(Term T, bool) : TermInst(T) {
+      if (TermInst)
+        idx = TermInst->getNumSuccessors();
+      else
+        // Term == NULL happens, if a basic block is not fully constructed and
+        // consequently getTerminator() returns NULL. In this case we construct
+        // a SuccIterator which describes a basic block that has zero
+        // successors.
+        // Defining SuccIterator for incomplete and malformed CFGs is especially
+        // useful for debugging.
+        idx = 0;
+    }
+
+    /// This is used to interface between code that wants to
+    /// operate on terminator instructions directly.
+    unsigned getSuccessorIndex() const { return idx; }
+
+    inline bool operator==(const Self &x) const { return idx == x.idx; }
+    inline bool operator!=(const Self &x) const { return !operator==(x); }
+
+    inline reference operator*() const { return TermInst->getSuccessor(idx); }
+    inline pointer operator->() const { return operator*(); }
+
+    inline Self &operator++() {
+      ++idx;
+      return *this;
+    } // Preincrement
+
+    inline Self operator++(int) { // Postincrement
+      Self tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    inline Self &operator--() {
+      --idx;
+      return *this;
+    }                             // Predecrement
+    inline Self operator--(int) { // Postdecrement
+      Self tmp = *this;
+      --*this;
+      return tmp;
+    }
+
+    inline bool operator<(const Self &x) const {
+      assert(TermInst == x.TermInst &&
+             "Cannot compare iterators of different blocks!");
+      return idx < x.idx;
+    }
+
+    inline bool operator<=(const Self &x) const {
+      assert(TermInst == x.TermInst &&
+             "Cannot compare iterators of different blocks!");
+      return idx <= x.idx;
+    }
+    inline bool operator>=(const Self &x) const {
+      assert(TermInst == x.TermInst &&
+             "Cannot compare iterators of different blocks!");
+      return idx >= x.idx;
+    }
+
+    inline bool operator>(const Self &x) const {
+      assert(TermInst == x.TermInst &&
+             "Cannot compare iterators of different blocks!");
+      return idx > x.idx;
+    }
+
+    inline Self &operator+=(int Right) {
+      unsigned new_idx = idx + Right;
+      assert(index_is_valid(new_idx) && "Iterator index out of bound");
+      idx = new_idx;
+      return *this;
+    }
+
+    inline Self operator+(int Right) const {
+      Self tmp = *this;
+      tmp += Right;
+      return tmp;
+    }
+
+    inline Self &operator-=(int Right) { return operator+=(-Right); }
+
+    inline Self operator-(int Right) const { return operator+(-Right); }
+
+    inline int operator-(const Self &x) const {
+      assert(TermInst == x.TermInst &&
+             "Cannot work on iterators of different blocks!");
+      int distance = idx - x.idx;
+      return distance;
+    }
+
+    inline SuccessorProxy operator[](int offset) {
+      Self tmp = *this;
+      tmp += offset;
+      return SuccessorProxy(tmp);
+    }
+
+    /// Get the source BB of this iterator.
+    inline BB *getSource() {
+      assert(TermInst && "Source not available, if basic block was malformed");
+      return TermInst->getParent();
+    }
+  };
+
+  typedef SuccIterator<TerminatorInst *, BasicBlock> succ_iterator;
+  typedef SuccIterator<const TerminatorInst *, const BasicBlock>
+      succ_const_iterator;
+  typedef llvm::iterator_range<succ_iterator> succ_range;
+  typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
+
+private:
+  inline succ_iterator succ_begin() { return succ_iterator(this); }
+  inline succ_const_iterator succ_begin() const {
+    return succ_const_iterator(this);
+  }
+  inline succ_iterator succ_end() { return succ_iterator(this, true); }
+  inline succ_const_iterator succ_end() const {
+    return succ_const_iterator(this, true);
+  }
+
+public:
+  inline succ_range successors() {
+    return succ_range(succ_begin(), succ_end());
+  }
+  inline succ_const_range successors() const {
+    return succ_const_range(succ_begin(), succ_end());
+  }
+};
 
 //===----------------------------------------------------------------------===//
 //                          UnaryInstruction Class
@@ -95,6 +288,7 @@ protected:
     : Instruction(Ty, iType, &Op<0>(), 1, IAE) {
     Op<0>() = V;
   }
+
 public:
   // allocate space for exactly one operand
   void *operator new(size_t s) {
@@ -133,6 +327,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
 
 class BinaryOperator : public Instruction {
   void *operator new(size_t, unsigned) = delete;
+
 protected:
   void init(BinaryOps iType);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
@@ -209,7 +404,7 @@ public:
     BO->setHasNoSignedWrap(true);
     return BO;
   }
-  
+
   static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
                                    const Twine &Name = "") {
     BinaryOperator *BO = Create(Opc, V1, V2, Name);
@@ -228,7 +423,7 @@ public:
     BO->setHasNoUnsignedWrap(true);
     return BO;
   }
-  
+
   static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
                                      const Twine &Name = "") {
     BinaryOperator *BO = Create(Opc, V1, V2, Name);
@@ -247,29 +442,29 @@ public:
     BO->setIsExact(true);
     return BO;
   }
-  
-#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                     \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name = "") {                  \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name);            \
-  }                                                                          \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {       \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);        \
-  }                                                                          \
-  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
-           (Value *V1, Value *V2, const Twine &Name, Instruction *I) {       \
-    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);         \
+
+#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                       \
+  static BinaryOperator *Create##NUWNSWEXACT##OPC(Value *V1, Value *V2,        \
+                                                  const Twine &Name = "") {    \
+    return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name);                \
+  }                                                                            \
+  static BinaryOperator *Create##NUWNSWEXACT##OPC(                             \
+      Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {               \
+    return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);            \
+  }                                                                            \
+  static BinaryOperator *Create##NUWNSWEXACT##OPC(                             \
+      Value *V1, Value *V2, const Twine &Name, Instruction *I) {               \
+    return Create##NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);             \
   }
-  
-  DEFINE_HELPERS(Add, NSW)  // CreateNSWAdd
-  DEFINE_HELPERS(Add, NUW)  // CreateNUWAdd
-  DEFINE_HELPERS(Sub, NSW)  // CreateNSWSub
-  DEFINE_HELPERS(Sub, NUW)  // CreateNUWSub
-  DEFINE_HELPERS(Mul, NSW)  // CreateNSWMul
-  DEFINE_HELPERS(Mul, NUW)  // CreateNUWMul
-  DEFINE_HELPERS(Shl, NSW)  // CreateNSWShl
-  DEFINE_HELPERS(Shl, NUW)  // CreateNUWShl
+
+  DEFINE_HELPERS(Add, NSW) // CreateNSWAdd
+  DEFINE_HELPERS(Add, NUW) // CreateNUWAdd
+  DEFINE_HELPERS(Sub, NSW) // CreateNSWSub
+  DEFINE_HELPERS(Sub, NUW) // CreateNUWSub
+  DEFINE_HELPERS(Mul, NSW) // CreateNSWMul
+  DEFINE_HELPERS(Mul, NUW) // CreateNUWMul
+  DEFINE_HELPERS(Shl, NSW) // CreateNSWShl
+  DEFINE_HELPERS(Shl, NUW) // CreateNUWShl
 
   DEFINE_HELPERS(SDiv, Exact)  // CreateExactSDiv
   DEFINE_HELPERS(UDiv, Exact)  // CreateExactUDiv
@@ -277,7 +472,7 @@ public:
   DEFINE_HELPERS(LShr, Exact)  // CreateExactLShr
 
 #undef DEFINE_HELPERS
-  
+
   /// Helper functions to construct and inspect unary operations (NEG and NOT)
   /// via binary operators SUB and XOR:
   ///
@@ -355,7 +550,7 @@ public:
   /// Convenience method to copy supported wrapping, exact, and fast-math flags
   /// from V to this instruction.
   void copyIRFlags(const Value *V);
-  
+
   /// Logical 'and' of any supported wrapping, exact, and fast-math flags of
   /// V and this instruction.
   void andIRFlags(const Value *V);
@@ -388,6 +583,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
 /// @brief Base class of casting instructions.
 class CastInst : public UnaryInstruction {
   void anchor() override;
+
 protected:
   /// @brief Constructor with insert-before-instruction semantics for subclasses
   CastInst(Type *Ty, unsigned iType, Value *S,
@@ -401,6 +597,7 @@ protected:
     : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
     setName(NameStr);
   }
+
 public:
   /// Provides a way to construct any of the CastInst subclasses using an
   /// opcode instead of the subclass's constructor. The opcode must be in the
@@ -490,7 +687,7 @@ public:
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
     const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+    Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
   /// @brief Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
@@ -503,7 +700,7 @@ public:
     Value *S,                ///< The pointer value to be casted (operand 0)
     Type *Ty,          ///< The type to which cast should be made
     const Twine &Name = "", ///< Name for the instruction
-    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+    Instruction *InsertBefore = nullptr ///< Place to insert the instruction
   );
 
   /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
@@ -677,18 +874,6 @@ public:
 /// This class is the base class for the comparison instructions.
 /// @brief Abstract base class of comparison instructions.
 class CmpInst : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
-  CmpInst() = delete;
-protected:
-  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const Twine &Name = "",
-          Instruction *InsertBefore = nullptr);
-
-  CmpInst(Type *ty, Instruction::OtherOps op, unsigned short pred,
-          Value *LHS, Value *RHS, const Twine &Name,
-          BasicBlock *InsertAtEnd);
-
-  void anchor() override; // Out of line virtual method.
 public:
   /// This enumeration lists the possible predicates for CmpInst subclasses.
   /// Values in the range 0-31 are reserved for FCmpInst, while values in the
@@ -730,6 +915,22 @@ public:
     BAD_ICMP_PREDICATE = ICMP_SLE + 1
   };
 
+private:
+  void *operator new(size_t, unsigned) = delete;
+  CmpInst() = delete;
+
+protected:
+  CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
+          Value *LHS, Value *RHS, const Twine &Name = "",
+          Instruction *InsertBefore = nullptr);
+
+  CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
+          Value *LHS, Value *RHS, const Twine &Name,
+          BasicBlock *InsertAtEnd);
+
+  void anchor() override; // Out of line virtual method.
+
+public:
   // allocate space for exactly two operands
   void *operator new(size_t s) {
     return User::operator new(s, 2);
@@ -740,7 +941,7 @@ public:
   /// The specified Instruction is allowed to be a dereferenced end iterator.
   /// @brief Create a CmpInst
   static CmpInst *Create(OtherOps Op,
-                         unsigned short predicate, Value *S1,
+                         Predicate predicate, Value *S1,
                          Value *S2, const Twine &Name = "",
                          Instruction *InsertBefore = nullptr);
 
@@ -748,7 +949,7 @@ public:
   /// two operands.  Also automatically insert this instruction to the end of
   /// the BasicBlock specified.
   /// @brief Create a CmpInst
-  static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
+  static CmpInst *Create(OtherOps Op, Predicate predicate, Value *S1,
                          Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
 
   /// @brief Get the opcode casted to the right type
@@ -775,7 +976,6 @@ public:
   bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
   bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
 
-
   /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
   ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
   /// @returns the inverse predicate for the instruction's current predicate.
@@ -833,6 +1033,19 @@ public:
     return isUnsigned(getPredicate());
   }
 
+  /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
+  /// @returns the signed version of the unsigned predicate pred.
+  /// @brief return the signed version of a predicate
+  static Predicate getSignedPredicate(Predicate pred);
+
+  /// For example, ULT->SLT, ULE->SLE, UGT->SGT, UGE->SGE, SLT->Failed assert
+  /// @returns the signed version of the predicate for this instruction (which
+  /// has to be an unsigned predicate).
+  /// @brief return the signed version of a predicate
+  Predicate getSignedPredicate() {
+    return getSignedPredicate(getPredicate());
+  }
+
   /// This is just a convenience.
   /// @brief Determine if this is true when both operands are the same.
   bool isTrueWhenEqual() const {
@@ -847,23 +1060,23 @@ public:
 
   /// @returns true if the predicate is unsigned, false otherwise.
   /// @brief Determine if the predicate is an unsigned operation.
-  static bool isUnsigned(unsigned short predicate);
+  static bool isUnsigned(Predicate predicate);
 
   /// @returns true if the predicate is signed, false otherwise.
   /// @brief Determine if the predicate is an signed operation.
-  static bool isSigned(unsigned short predicate);
+  static bool isSigned(Predicate predicate);
 
   /// @brief Determine if the predicate is an ordered operation.
-  static bool isOrdered(unsigned short predicate);
+  static bool isOrdered(Predicate predicate);
 
   /// @brief Determine if the predicate is an unordered operation.
-  static bool isUnordered(unsigned short predicate);
+  static bool isUnordered(Predicate predicate);
 
   /// Determine if the predicate is true when comparing a value with itself.
-  static bool isTrueWhenEqual(unsigned short predicate);
+  static bool isTrueWhenEqual(Predicate predicate);
 
   /// Determine if the predicate is false when comparing a value with itself.
-  static bool isFalseWhenEqual(unsigned short predicate);
+  static bool isFalseWhenEqual(Predicate predicate);
 
   /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
@@ -882,6 +1095,7 @@ public:
     }
     return Type::getInt1Ty(opnd_type->getContext());
   }
+
 private:
   // Shadow Value::setValueSubclassData with a private forwarding method so that
   // subclasses cannot accidentally use it.
@@ -890,7 +1104,6 @@ private:
   }
 };
 
-
 // FIXME: these are redundant if CmpInst < BinaryOperator
 template <>
 struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
@@ -898,6 +1111,523 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
 
-} // End llvm namespace
+//===----------------------------------------------------------------------===//
+//                           FuncletPadInst Class
+//===----------------------------------------------------------------------===//
+class FuncletPadInst : public Instruction {
+private:
+  void init(Value *ParentPad, ArrayRef<Value *> Args, const Twine &NameStr);
 
-#endif
+  FuncletPadInst(const FuncletPadInst &CPI);
+
+  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                          ArrayRef<Value *> Args, unsigned Values,
+                          const Twine &NameStr, Instruction *InsertBefore);
+  explicit FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                          ArrayRef<Value *> Args, unsigned Values,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  friend class CatchPadInst;
+  friend class CleanupPadInst;
+  FuncletPadInst *cloneImpl() const;
+
+public:
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of funcletpad arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+  /// Convenience accessors
+
+  /// \brief Return the outer EH-pad this funclet is nested within.
+  ///
+  /// Note: This returns the associated CatchSwitchInst if this FuncletPadInst
+  /// is a CatchPadInst.
+  Value *getParentPad() const { return Op<-1>(); }
+  void setParentPad(Value *ParentPad) {
+    assert(ParentPad);
+    Op<-1>() = ParentPad;
+  }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th funcletpad argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// arg_operands - iteration adapter for range-for loops.
+  op_range arg_operands() { return op_range(op_begin(), op_end() - 1); }
+
+  /// arg_operands - iteration adapter for range-for loops.
+  const_op_range arg_operands() const {
+    return const_op_range(op_begin(), op_end() - 1);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) { return I->isFuncletPad(); }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<FuncletPadInst>
+    : public VariadicOperandTraits<FuncletPadInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
+
+/// \brief A lightweight accessor for an operand bundle meant to be passed
+/// around by value.
+struct OperandBundleUse {
+  ArrayRef<Use> Inputs;
+
+  OperandBundleUse() {}
+  explicit OperandBundleUse(StringMapEntry<uint32_t> *Tag, ArrayRef<Use> Inputs)
+      : Inputs(Inputs), Tag(Tag) {}
+
+  /// \brief Return true if the operand at index \p Idx in this operand bundle
+  /// has the attribute A.
+  bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const {
+    if (isDeoptOperandBundle())
+      if (A == Attribute::ReadOnly || A == Attribute::NoCapture)
+        return Inputs[Idx]->getType()->isPointerTy();
+
+    // Conservative answer:  no operands have any attributes.
+    return false;
+  };
+
+  /// \brief Return the tag of this operand bundle as a string.
+  StringRef getTagName() const {
+    return Tag->getKey();
+  }
+
+  /// \brief Return the tag of this operand bundle as an integer.
+  ///
+  /// Operand bundle tags are interned by LLVMContextImpl::getOrInsertBundleTag,
+  /// and this function returns the unique integer getOrInsertBundleTag
+  /// associated the tag of this operand bundle to.
+  uint32_t getTagID() const {
+    return Tag->getValue();
+  }
+
+  /// \brief Return true if this is a "deopt" operand bundle.
+  bool isDeoptOperandBundle() const {
+    return getTagID() == LLVMContext::OB_deopt;
+  }
+
+  /// \brief Return true if this is a "funclet" operand bundle.
+  bool isFuncletOperandBundle() const {
+    return getTagID() == LLVMContext::OB_funclet;
+  }
+
+private:
+  /// \brief Pointer to an entry in LLVMContextImpl::getOrInsertBundleTag.
+  StringMapEntry<uint32_t> *Tag;
+};
+
+/// \brief A container for an operand bundle being viewed as a set of values
+/// rather than a set of uses.
+///
+/// Unlike OperandBundleUse, OperandBundleDefT owns the memory it carries, and
+/// so it is possible to create and pass around "self-contained" instances of
+/// OperandBundleDef and ConstOperandBundleDef.
+template <typename InputTy> class OperandBundleDefT {
+  std::string Tag;
+  std::vector<InputTy> Inputs;
+
+public:
+  explicit OperandBundleDefT(std::string Tag, std::vector<InputTy> Inputs)
+      : Tag(std::move(Tag)), Inputs(std::move(Inputs)) {}
+  explicit OperandBundleDefT(std::string Tag, ArrayRef<InputTy> Inputs)
+      : Tag(std::move(Tag)), Inputs(Inputs) {}
+
+  explicit OperandBundleDefT(const OperandBundleUse &OBU) {
+    Tag = OBU.getTagName();
+    Inputs.insert(Inputs.end(), OBU.Inputs.begin(), OBU.Inputs.end());
+  }
+
+  ArrayRef<InputTy> inputs() const { return Inputs; }
+
+  typedef typename std::vector<InputTy>::const_iterator input_iterator;
+  size_t input_size() const { return Inputs.size(); }
+  input_iterator input_begin() const { return Inputs.begin(); }
+  input_iterator input_end() const { return Inputs.end(); }
+
+  StringRef getTag() const { return Tag; }
+};
+
+typedef OperandBundleDefT<Value *> OperandBundleDef;
+typedef OperandBundleDefT<const Value *> ConstOperandBundleDef;
+
+/// \brief A mixin to add operand bundle functionality to llvm instruction
+/// classes.
+///
+/// OperandBundleUser uses the descriptor area co-allocated with the host User
+/// to store some meta information about which operands are "normal" operands,
+/// and which ones belong to some operand bundle.
+///
+/// The layout of an operand bundle user is
+///
+///          +-----------uint32_t End-------------------------------------+
+///          |                                                            |
+///          |  +--------uint32_t Begin--------------------+              |
+///          |  |                                          |              |
+///          ^  ^                                          v              v
+///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
+///  | BOI0 | BOI1 | .. | DU | U0 | U1 | .. | BOI0_U0 | .. | BOI1_U0 | .. | Un
+///  |------|------|----|----|----|----|----|---------|----|---------|----|-----
+///   v  v                                  ^              ^
+///   |  |                                  |              |
+///   |  +--------uint32_t Begin------------+              |
+///   |                                                    |
+///   +-----------uint32_t End-----------------------------+
+///
+///
+/// BOI0, BOI1 ... are descriptions of operand bundles in this User's use list.
+/// These descriptions are installed and managed by this class, and they're all
+/// instances of OperandBundleUser<T>::BundleOpInfo.
+///
+/// DU is an additional descriptor installed by User's 'operator new' to keep
+/// track of the 'BOI0 ... BOIN' co-allocation.  OperandBundleUser does not
+/// access or modify DU in any way, it's an implementation detail private to
+/// User.
+///
+/// The regular Use& vector for the User starts at U0.  The operand bundle uses
+/// are part of the Use& vector, just like normal uses.  In the diagram above,
+/// the operand bundle uses start at BOI0_U0.  Each instance of BundleOpInfo has
+/// information about a contiguous set of uses constituting an operand bundle,
+/// and the total set of operand bundle uses themselves form a contiguous set of
+/// uses (i.e. there are no gaps between uses corresponding to individual
+/// operand bundles).
+///
+/// This class does not know the location of the set of operand bundle uses
+/// within the use list -- that is decided by the User using this class via the
+/// BeginIdx argument in populateBundleOperandInfos.
+///
+/// Currently operand bundle users with hung-off operands are not supported.
+template <typename InstrTy, typename OpIteratorTy> class OperandBundleUser {
+public:
+  /// \brief Return the number of operand bundles associated with this User.
+  unsigned getNumOperandBundles() const {
+    return std::distance(bundle_op_info_begin(), bundle_op_info_end());
+  }
+
+  /// \brief Return true if this User has any operand bundles.
+  bool hasOperandBundles() const { return getNumOperandBundles() != 0; }
+
+  /// \brief Return the index of the first bundle operand in the Use array.
+  unsigned getBundleOperandsStartIndex() const {
+    assert(hasOperandBundles() && "Don't call otherwise!");
+    return bundle_op_info_begin()->Begin;
+  }
+
+  /// \brief Return the index of the last bundle operand in the Use array.
+  unsigned getBundleOperandsEndIndex() const {
+    assert(hasOperandBundles() && "Don't call otherwise!");
+    return bundle_op_info_end()[-1].End;
+  }
+
+  /// \brief Return the total number operands (not operand bundles) used by
+  /// every operand bundle in this OperandBundleUser.
+  unsigned getNumTotalBundleOperands() const {
+    if (!hasOperandBundles())
+      return 0;
+
+    unsigned Begin = getBundleOperandsStartIndex();
+    unsigned End = getBundleOperandsEndIndex();
+
+    assert(Begin <= End && "Should be!");
+    return End - Begin;
+  }
+
+  /// \brief Return the operand bundle at a specific index.
+  OperandBundleUse getOperandBundleAt(unsigned Index) const {
+    assert(Index < getNumOperandBundles() && "Index out of bounds!");
+    return operandBundleFromBundleOpInfo(*(bundle_op_info_begin() + Index));
+  }
+
+  /// \brief Return the number of operand bundles with the tag Name attached to
+  /// this instruction.
+  unsigned countOperandBundlesOfType(StringRef Name) const {
+    unsigned Count = 0;
+    for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+      if (getOperandBundleAt(i).getTagName() == Name)
+        Count++;
+
+    return Count;
+  }
+
+  /// \brief Return the number of operand bundles with the tag ID attached to
+  /// this instruction.
+  unsigned countOperandBundlesOfType(uint32_t ID) const {
+    unsigned Count = 0;
+    for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+      if (getOperandBundleAt(i).getTagID() == ID)
+        Count++;
+
+    return Count;
+  }
+
+  /// \brief Return an operand bundle by name, if present.
+  ///
+  /// It is an error to call this for operand bundle types that may have
+  /// multiple instances of them on the same instruction.
+  Optional<OperandBundleUse> getOperandBundle(StringRef Name) const {
+    assert(countOperandBundlesOfType(Name) < 2 && "Precondition violated!");
+
+    for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
+      OperandBundleUse U = getOperandBundleAt(i);
+      if (U.getTagName() == Name)
+        return U;
+    }
+
+    return None;
+  }
+
+  /// \brief Return an operand bundle by tag ID, if present.
+  ///
+  /// It is an error to call this for operand bundle types that may have
+  /// multiple instances of them on the same instruction.
+  Optional<OperandBundleUse> getOperandBundle(uint32_t ID) const {
+    assert(countOperandBundlesOfType(ID) < 2 && "Precondition violated!");
+
+    for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
+      OperandBundleUse U = getOperandBundleAt(i);
+      if (U.getTagID() == ID)
+        return U;
+    }
+
+    return None;
+  }
+
+  /// \brief Return the list of operand bundles attached to this instruction as
+  /// a vector of OperandBundleDefs.
+  ///
+  /// This function copies the OperandBundeUse instances associated with this
+  /// OperandBundleUser to a vector of OperandBundleDefs.  Note:
+  /// OperandBundeUses and OperandBundleDefs are non-trivially *different*
+  /// representations of operand bundles (see documentation above).
+  void getOperandBundlesAsDefs(SmallVectorImpl<OperandBundleDef> &Defs) const {
+    for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i)
+      Defs.emplace_back(getOperandBundleAt(i));
+  }
+
+  /// \brief Return the operand bundle for the operand at index OpIdx.
+  ///
+  /// It is an error to call this with an OpIdx that does not correspond to an
+  /// bundle operand.
+  OperandBundleUse getOperandBundleForOperand(unsigned OpIdx) const {
+    return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx));
+  }
+
+  /// \brief Return true if this operand bundle user has operand bundles that
+  /// may read from the heap.
+  bool hasReadingOperandBundles() const {
+    // Implementation note: this is a conservative implementation of operand
+    // bundle semantics, where *any* operand bundle forces a callsite to be at
+    // least readonly.
+    return hasOperandBundles();
+  }
+
+  /// \brief Return true if this operand bundle user has operand bundles that
+  /// may write to the heap.
+  bool hasClobberingOperandBundles() const {
+    for (auto &BOI : bundle_op_infos()) {
+      if (BOI.Tag->second == LLVMContext::OB_deopt ||
+          BOI.Tag->second == LLVMContext::OB_funclet)
+        continue;
+
+      // This instruction has an operand bundle that is not known to us.
+      // Assume the worst.
+      return true;
+    }
+
+    return false;
+  }
+
+  /// \brief Return true if the bundle operand at index \p OpIdx has the
+  /// attribute \p A.
+  bool bundleOperandHasAttr(unsigned OpIdx,  Attribute::AttrKind A) const {
+    auto &BOI = getBundleOpInfoForOperand(OpIdx);
+    auto OBU = operandBundleFromBundleOpInfo(BOI);
+    return OBU.operandHasAttr(OpIdx - BOI.Begin, A);
+  }
+
+  /// \brief Return true if \p Other has the same sequence of operand bundle
+  /// tags with the same number of operands on each one of them as this
+  /// OperandBundleUser.
+  bool hasIdenticalOperandBundleSchema(
+      const OperandBundleUser<InstrTy, OpIteratorTy> &Other) const {
+    if (getNumOperandBundles() != Other.getNumOperandBundles())
+      return false;
+
+    return std::equal(bundle_op_info_begin(), bundle_op_info_end(),
+                      Other.bundle_op_info_begin());
+  };
+
+protected:
+  /// \brief Is the function attribute S disallowed by some operand bundle on
+  /// this operand bundle user?
+  bool isFnAttrDisallowedByOpBundle(StringRef S) const {
+    // Operand bundles only possibly disallow readnone, readonly and argmenonly
+    // attributes.  All String attributes are fine.
+    return false;
+  }
+
+  /// \brief Is the function attribute A disallowed by some operand bundle on
+  /// this operand bundle user?
+  bool isFnAttrDisallowedByOpBundle(Attribute::AttrKind A) const {
+    switch (A) {
+    default:
+      return false;
+
+    case Attribute::ArgMemOnly:
+      return hasReadingOperandBundles();
+
+    case Attribute::ReadNone:
+      return hasReadingOperandBundles();
+
+    case Attribute::ReadOnly:
+      return hasClobberingOperandBundles();
+    }
+
+    llvm_unreachable("switch has a default case!");
+  }
+
+  /// \brief Used to keep track of an operand bundle.  See the main comment on
+  /// OperandBundleUser above.
+  struct BundleOpInfo {
+    /// \brief The operand bundle tag, interned by
+    /// LLVMContextImpl::getOrInsertBundleTag.
+    StringMapEntry<uint32_t> *Tag;
+
+    /// \brief The index in the Use& vector where operands for this operand
+    /// bundle starts.
+    uint32_t Begin;
+
+    /// \brief The index in the Use& vector where operands for this operand
+    /// bundle ends.
+    uint32_t End;
+
+    bool operator==(const BundleOpInfo &Other) const {
+      return Tag == Other.Tag && Begin == Other.Begin && End == Other.End;
+    }
+  };
+
+  /// \brief Simple helper function to map a BundleOpInfo to an
+  /// OperandBundleUse.
+  OperandBundleUse
+  operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
+    auto op_begin = static_cast<const InstrTy *>(this)->op_begin();
+    ArrayRef<Use> Inputs(op_begin + BOI.Begin, op_begin + BOI.End);
+    return OperandBundleUse(BOI.Tag, Inputs);
+  }
+
+  typedef BundleOpInfo *bundle_op_iterator;
+  typedef const BundleOpInfo *const_bundle_op_iterator;
+
+  /// \brief Return the start of the list of BundleOpInfo instances associated
+  /// with this OperandBundleUser.
+  bundle_op_iterator bundle_op_info_begin() {
+    if (!static_cast<InstrTy *>(this)->hasDescriptor())
+      return nullptr;
+
+    uint8_t *BytesBegin = static_cast<InstrTy *>(this)->getDescriptor().begin();
+    return reinterpret_cast<bundle_op_iterator>(BytesBegin);
+  }
+
+  /// \brief Return the start of the list of BundleOpInfo instances associated
+  /// with this OperandBundleUser.
+  const_bundle_op_iterator bundle_op_info_begin() const {
+    auto *NonConstThis =
+        const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+    return NonConstThis->bundle_op_info_begin();
+  }
+
+  /// \brief Return the end of the list of BundleOpInfo instances associated
+  /// with this OperandBundleUser.
+  bundle_op_iterator bundle_op_info_end() {
+    if (!static_cast<InstrTy *>(this)->hasDescriptor())
+      return nullptr;
+
+    uint8_t *BytesEnd = static_cast<InstrTy *>(this)->getDescriptor().end();
+    return reinterpret_cast<bundle_op_iterator>(BytesEnd);
+  }
+
+  /// \brief Return the end of the list of BundleOpInfo instances associated
+  /// with this OperandBundleUser.
+  const_bundle_op_iterator bundle_op_info_end() const {
+    auto *NonConstThis =
+        const_cast<OperandBundleUser<InstrTy, OpIteratorTy> *>(this);
+    return NonConstThis->bundle_op_info_end();
+  }
+
+  /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+  iterator_range<bundle_op_iterator> bundle_op_infos() {
+    return make_range(bundle_op_info_begin(), bundle_op_info_end());
+  }
+
+  /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end).
+  iterator_range<const_bundle_op_iterator> bundle_op_infos() const {
+    return make_range(bundle_op_info_begin(), bundle_op_info_end());
+  }
+
+  /// \brief Populate the BundleOpInfo instances and the Use& vector from \p
+  /// Bundles.  Return the op_iterator pointing to the Use& one past the last
+  /// last bundle operand use.
+  ///
+  /// Each \p OperandBundleDef instance is tracked by a OperandBundleInfo
+  /// instance allocated in this User's descriptor.
+  OpIteratorTy populateBundleOperandInfos(ArrayRef<OperandBundleDef> Bundles,
+                                          const unsigned BeginIndex) {
+    auto It = static_cast<InstrTy *>(this)->op_begin() + BeginIndex;
+    for (auto &B : Bundles)
+      It = std::copy(B.input_begin(), B.input_end(), It);
+
+    auto *ContextImpl = static_cast<InstrTy *>(this)->getContext().pImpl;
+    auto BI = Bundles.begin();
+    unsigned CurrentIndex = BeginIndex;
+
+    for (auto &BOI : bundle_op_infos()) {
+      assert(BI != Bundles.end() && "Incorrect allocation?");
+
+      BOI.Tag = ContextImpl->getOrInsertBundleTag(BI->getTag());
+      BOI.Begin = CurrentIndex;
+      BOI.End = CurrentIndex + BI->input_size();
+      CurrentIndex = BOI.End;
+      BI++;
+    }
+
+    assert(BI == Bundles.end() && "Incorrect allocation?");
+
+    return It;
+  }
+
+  /// \brief Return the BundleOpInfo for the operand at index OpIdx.
+  ///
+  /// It is an error to call this with an OpIdx that does not correspond to an
+  /// bundle operand.
+  const BundleOpInfo &getBundleOpInfoForOperand(unsigned OpIdx) const {
+    for (auto &BOI : bundle_op_infos())
+      if (BOI.Begin <= OpIdx && OpIdx < BOI.End)
+        return BOI;
+
+    llvm_unreachable("Did not find operand bundle for operand!");
+  }
+
+  /// \brief Return the total number of values used in \p Bundles.
+  static unsigned CountBundleInputs(ArrayRef<OperandBundleDef> Bundles) {
+    unsigned Total = 0;
+    for (auto &B : Bundles)
+      Total += B.input_size();
+    return Total;
+  }
+};
+
+} // end llvm namespace
+
+#endif // LLVM_IR_INSTRTYPES_H
diff --git a/include/llvm/IR/Instruction.def b/include/llvm/IR/Instruction.def
index d46314cc761d..18711abb8060 100644
--- a/include/llvm/IR/Instruction.def
+++ b/include/llvm/IR/Instruction.def
@@ -1,21 +1,21 @@
 //===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // This file contains descriptions of the various LLVM instructions.  This is
-// used as a central place for enumerating the different instructions and 
+// used as a central place for enumerating the different instructions and
 // should eventually be the place to put comments about the instructions.
 //
 //===----------------------------------------------------------------------===//
 
 // NOTE: NO INCLUDE GUARD DESIRED!
 
-// Provide definitions of macros so that users of this file do not have to 
+// Provide definitions of macros so that users of this file do not have to
 // define everything to use it...
 //
 #ifndef FIRST_TERM_INST
@@ -74,6 +74,20 @@
 #define LAST_CAST_INST(num)
 #endif
 
+#ifndef FIRST_FUNCLETPAD_INST
+#define FIRST_FUNCLETPAD_INST(num)
+#endif
+#ifndef HANDLE_FUNCLETPAD_INST
+#ifndef HANDLE_INST
+#define HANDLE_FUNCLETPAD_INST(num, opcode, Class)
+#else
+#define HANDLE_FUNCLETPAD_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_FUNCLETPAD_INST
+#define LAST_FUNCLETPAD_INST(num)
+#endif
+
 #ifndef FIRST_OTHER_INST
 #define FIRST_OTHER_INST(num)
 #endif
@@ -88,92 +102,99 @@
 #define LAST_OTHER_INST(num)
 #endif
 
-
 // Terminator Instructions - These instructions are used to terminate a basic
 // block of the program.   Every basic block must end with one of these
 // instructions for it to be a well formed basic block.
 //
  FIRST_TERM_INST  ( 1)
-HANDLE_TERM_INST  ( 1, Ret        , ReturnInst)
-HANDLE_TERM_INST  ( 2, Br         , BranchInst)
-HANDLE_TERM_INST  ( 3, Switch     , SwitchInst)
-HANDLE_TERM_INST  ( 4, IndirectBr , IndirectBrInst)
-HANDLE_TERM_INST  ( 5, Invoke     , InvokeInst)
-HANDLE_TERM_INST  ( 6, Resume     , ResumeInst)
-HANDLE_TERM_INST  ( 7, Unreachable, UnreachableInst)
-  LAST_TERM_INST  ( 7)
+HANDLE_TERM_INST  ( 1, Ret           , ReturnInst)
+HANDLE_TERM_INST  ( 2, Br            , BranchInst)
+HANDLE_TERM_INST  ( 3, Switch        , SwitchInst)
+HANDLE_TERM_INST  ( 4, IndirectBr    , IndirectBrInst)
+HANDLE_TERM_INST  ( 5, Invoke        , InvokeInst)
+HANDLE_TERM_INST  ( 6, Resume        , ResumeInst)
+HANDLE_TERM_INST  ( 7, Unreachable   , UnreachableInst)
+HANDLE_TERM_INST  ( 8, CleanupRet    , CleanupReturnInst)
+HANDLE_TERM_INST  ( 9, CatchRet      , CatchReturnInst)
+HANDLE_TERM_INST  (10, CatchSwitch   , CatchSwitchInst)
+  LAST_TERM_INST  (10)
 
 // Standard binary operators...
- FIRST_BINARY_INST( 8)
-HANDLE_BINARY_INST( 8, Add  , BinaryOperator)
-HANDLE_BINARY_INST( 9, FAdd , BinaryOperator)
-HANDLE_BINARY_INST(10, Sub  , BinaryOperator)
-HANDLE_BINARY_INST(11, FSub , BinaryOperator)
-HANDLE_BINARY_INST(12, Mul  , BinaryOperator)
-HANDLE_BINARY_INST(13, FMul , BinaryOperator)
-HANDLE_BINARY_INST(14, UDiv , BinaryOperator)
-HANDLE_BINARY_INST(15, SDiv , BinaryOperator)
-HANDLE_BINARY_INST(16, FDiv , BinaryOperator)
-HANDLE_BINARY_INST(17, URem , BinaryOperator)
-HANDLE_BINARY_INST(18, SRem , BinaryOperator)
-HANDLE_BINARY_INST(19, FRem , BinaryOperator)
+ FIRST_BINARY_INST(11)
+HANDLE_BINARY_INST(11, Add  , BinaryOperator)
+HANDLE_BINARY_INST(12, FAdd , BinaryOperator)
+HANDLE_BINARY_INST(13, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(14, FSub , BinaryOperator)
+HANDLE_BINARY_INST(15, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(16, FMul , BinaryOperator)
+HANDLE_BINARY_INST(17, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(18, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(19, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(20, URem , BinaryOperator)
+HANDLE_BINARY_INST(21, SRem , BinaryOperator)
+HANDLE_BINARY_INST(22, FRem , BinaryOperator)
 
 // Logical operators (integer operands)
-HANDLE_BINARY_INST(20, Shl  , BinaryOperator) // Shift left  (logical)
-HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical)
-HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic)
-HANDLE_BINARY_INST(23, And  , BinaryOperator)
-HANDLE_BINARY_INST(24, Or   , BinaryOperator)
-HANDLE_BINARY_INST(25, Xor  , BinaryOperator)
-  LAST_BINARY_INST(25)
+HANDLE_BINARY_INST(23, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(24, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(25, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(26, And  , BinaryOperator)
+HANDLE_BINARY_INST(27, Or   , BinaryOperator)
+HANDLE_BINARY_INST(28, Xor  , BinaryOperator)
+  LAST_BINARY_INST(28)
 
 // Memory operators...
- FIRST_MEMORY_INST(26)
-HANDLE_MEMORY_INST(26, Alloca, AllocaInst)  // Stack management
-HANDLE_MEMORY_INST(27, Load  , LoadInst  )  // Memory manipulation instrs
-HANDLE_MEMORY_INST(28, Store , StoreInst )
-HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst)
-HANDLE_MEMORY_INST(30, Fence , FenceInst )
-HANDLE_MEMORY_INST(31, AtomicCmpXchg , AtomicCmpXchgInst )
-HANDLE_MEMORY_INST(32, AtomicRMW , AtomicRMWInst )
-  LAST_MEMORY_INST(32)
+ FIRST_MEMORY_INST(29)
+HANDLE_MEMORY_INST(29, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(30, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(31, Store , StoreInst )
+HANDLE_MEMORY_INST(32, GetElementPtr, GetElementPtrInst)
+HANDLE_MEMORY_INST(33, Fence , FenceInst )
+HANDLE_MEMORY_INST(34, AtomicCmpXchg , AtomicCmpXchgInst )
+HANDLE_MEMORY_INST(35, AtomicRMW , AtomicRMWInst )
+  LAST_MEMORY_INST(35)
 
 // Cast operators ...
-// NOTE: The order matters here because CastInst::isEliminableCastPair 
+// NOTE: The order matters here because CastInst::isEliminableCastPair
 // NOTE: (see Instructions.cpp) encodes a table based on this ordering.
- FIRST_CAST_INST(33)
-HANDLE_CAST_INST(33, Trunc   , TruncInst   )  // Truncate integers
-HANDLE_CAST_INST(34, ZExt    , ZExtInst    )  // Zero extend integers
-HANDLE_CAST_INST(35, SExt    , SExtInst    )  // Sign extend integers
-HANDLE_CAST_INST(36, FPToUI  , FPToUIInst  )  // floating point -> UInt
-HANDLE_CAST_INST(37, FPToSI  , FPToSIInst  )  // floating point -> SInt
-HANDLE_CAST_INST(38, UIToFP  , UIToFPInst  )  // UInt -> floating point
-HANDLE_CAST_INST(39, SIToFP  , SIToFPInst  )  // SInt -> floating point
-HANDLE_CAST_INST(40, FPTrunc , FPTruncInst )  // Truncate floating point
-HANDLE_CAST_INST(41, FPExt   , FPExtInst   )  // Extend floating point
-HANDLE_CAST_INST(42, PtrToInt, PtrToIntInst)  // Pointer -> Integer
-HANDLE_CAST_INST(43, IntToPtr, IntToPtrInst)  // Integer -> Pointer
-HANDLE_CAST_INST(44, BitCast , BitCastInst )  // Type cast
-HANDLE_CAST_INST(45, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
-  LAST_CAST_INST(45)
+ FIRST_CAST_INST(36)
+HANDLE_CAST_INST(36, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(37, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(38, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(39, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(40, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(41, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(42, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(43, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(44, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(45, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(46, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(47, BitCast , BitCastInst )  // Type cast
+HANDLE_CAST_INST(48, AddrSpaceCast, AddrSpaceCastInst)  // addrspace cast
+  LAST_CAST_INST(48)
+
+ FIRST_FUNCLETPAD_INST(49)
+HANDLE_FUNCLETPAD_INST(49, CleanupPad, CleanupPadInst)
+HANDLE_FUNCLETPAD_INST(50, CatchPad  , CatchPadInst)
+  LAST_FUNCLETPAD_INST(50)
 
 // Other operators...
- FIRST_OTHER_INST(46)
-HANDLE_OTHER_INST(46, ICmp   , ICmpInst   )  // Integer comparison instruction
-HANDLE_OTHER_INST(47, FCmp   , FCmpInst   )  // Floating point comparison instr.
-HANDLE_OTHER_INST(48, PHI    , PHINode    )  // PHI node instruction
-HANDLE_OTHER_INST(49, Call   , CallInst   )  // Call a function
-HANDLE_OTHER_INST(50, Select , SelectInst )  // select instruction
-HANDLE_OTHER_INST(51, UserOp1, Instruction)  // May be used internally in a pass
-HANDLE_OTHER_INST(52, UserOp2, Instruction)  // Internal to passes only
-HANDLE_OTHER_INST(53, VAArg  , VAArgInst  )  // vaarg instruction
-HANDLE_OTHER_INST(54, ExtractElement, ExtractElementInst)// extract from vector
-HANDLE_OTHER_INST(55, InsertElement, InsertElementInst)  // insert into vector
-HANDLE_OTHER_INST(56, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
-HANDLE_OTHER_INST(57, ExtractValue, ExtractValueInst)// extract from aggregate
-HANDLE_OTHER_INST(58, InsertValue, InsertValueInst)  // insert into aggregate
-HANDLE_OTHER_INST(59, LandingPad, LandingPadInst)  // Landing pad instruction.
-  LAST_OTHER_INST(59)
+ FIRST_OTHER_INST(51)
+HANDLE_OTHER_INST(51, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(52, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(53, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(54, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(55, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(56, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(57, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(58, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(59, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(60, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(61, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(62, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(63, InsertValue, InsertValueInst)  // insert into aggregate
+HANDLE_OTHER_INST(64, LandingPad, LandingPadInst)  // Landing pad instruction.
+  LAST_OTHER_INST(64)
 
 #undef  FIRST_TERM_INST
 #undef HANDLE_TERM_INST
@@ -191,6 +212,10 @@ HANDLE_OTHER_INST(59, LandingPad, LandingPadInst)  // Landing pad instruction.
 #undef HANDLE_CAST_INST
 #undef   LAST_CAST_INST
 
+#undef  FIRST_FUNCLETPAD_INST
+#undef HANDLE_FUNCLETPAD_INST
+#undef   LAST_FUNCLETPAD_INST
+
 #undef  FIRST_OTHER_INST
 #undef HANDLE_OTHER_INST
 #undef   LAST_OTHER_INST
diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h
index 31f363f70a5b..03c45497fa95 100644
--- a/include/llvm/IR/Instruction.h
+++ b/include/llvm/IR/Instruction.h
@@ -30,25 +30,11 @@ class BasicBlock;
 struct AAMDNodes;
 
 template <>
-struct ilist_traits<Instruction>
-    : public SymbolTableListTraits<Instruction, BasicBlock> {
+struct SymbolTableListSentinelTraits<Instruction>
+    : public ilist_half_embedded_sentinel_traits<Instruction> {};
 
-  /// \brief Return a node that marks the end of a list.
-  ///
-  /// The sentinel is relative to this instance, so we use a non-static
-  /// method.
-  Instruction *createSentinel() const;
-  static void destroySentinel(Instruction *) {}
-
-  Instruction *provideInitialHead() const { return createSentinel(); }
-  Instruction *ensureHead(Instruction *) const { return createSentinel(); }
-  static void noteHead(Instruction *, Instruction *) {}
-
-private:
-  mutable ilist_half_node<Instruction> Sentinel;
-};
-
-class Instruction : public User, public ilist_node<Instruction> {
+class Instruction : public User,
+                    public ilist_node_with_parent<Instruction, BasicBlock> {
   void operator=(const Instruction &) = delete;
   Instruction(const Instruction &) = delete;
 
@@ -80,6 +66,13 @@ public:
   const Module *getModule() const;
   Module *getModule();
 
+  /// \brief Return the function this instruction belongs to.
+  ///
+  /// Note: it is undefined behavior to call this on an instruction not
+  /// currently inserted into a function.
+  const Function *getFunction() const;
+  Function *getFunction();
+
   /// removeFromParent - This method unlinks 'this' from the containing basic
   /// block, but does not delete it.
   ///
@@ -89,7 +82,7 @@ public:
   /// block and deletes it.
   ///
   /// \returns an iterator pointing to the element after the erased one
-  iplist<Instruction>::iterator eraseFromParent();
+  SymbolTableList<Instruction>::iterator eraseFromParent();
 
   /// Insert an unlinked instruction into a basic block immediately before
   /// the specified instruction.
@@ -116,6 +109,7 @@ public:
   bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
   bool isShift() { return isShift(getOpcode()); }
   bool isCast() const { return isCast(getOpcode()); }
+  bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
 
   static const char* getOpcodeName(unsigned OpCode);
 
@@ -148,6 +142,11 @@ public:
     return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
   }
 
+  /// @brief Determine if the OpCode is one of the FuncletPadInst instructions.
+  static inline bool isFuncletPad(unsigned OpCode) {
+    return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd;
+  }
+
   //===--------------------------------------------------------------------===//
   // Metadata manipulation.
   //===--------------------------------------------------------------------===//
@@ -204,20 +203,22 @@ public:
   void setMetadata(unsigned KindID, MDNode *Node);
   void setMetadata(StringRef Kind, MDNode *Node);
 
-  /// \brief Drop unknown metadata.
+  /// Drop all unknown metadata except for debug locations.
+  /// @{
   /// Passes are required to drop metadata they don't understand. This is a
   /// convenience method for passes to do so.
-  void dropUnknownMetadata(ArrayRef<unsigned> KnownIDs);
-  void dropUnknownMetadata() {
-    return dropUnknownMetadata(None);
+  void dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs);
+  void dropUnknownNonDebugMetadata() {
+    return dropUnknownNonDebugMetadata(None);
   }
-  void dropUnknownMetadata(unsigned ID1) {
-    return dropUnknownMetadata(makeArrayRef(ID1));
+  void dropUnknownNonDebugMetadata(unsigned ID1) {
+    return dropUnknownNonDebugMetadata(makeArrayRef(ID1));
   }
-  void dropUnknownMetadata(unsigned ID1, unsigned ID2) {
+  void dropUnknownNonDebugMetadata(unsigned ID1, unsigned ID2) {
     unsigned IDs[] = {ID1, ID2};
-    return dropUnknownMetadata(IDs);
+    return dropUnknownNonDebugMetadata(IDs);
   }
+  /// @}
 
   /// setAAMetadata - Sets the metadata on this instruction from the
   /// AAMDNodes structure.
@@ -388,6 +389,19 @@ public:
     return mayWriteToMemory() || mayThrow() || !mayReturn();
   }
 
+  /// \brief Return true if the instruction is a variety of EH-block.
+  bool isEHPad() const {
+    switch (getOpcode()) {
+    case Instruction::CatchSwitch:
+    case Instruction::CatchPad:
+    case Instruction::CleanupPad:
+    case Instruction::LandingPad:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// clone() - Create a copy of 'this' instruction that is identical in all
   /// ways except the following:
   ///   * The instruction has no parent
@@ -468,6 +482,13 @@ public:
 #include "llvm/IR/Instruction.def"
   };
 
+  enum FuncletPadOps {
+#define  FIRST_FUNCLETPAD_INST(N)             FuncletPadOpsBegin = N,
+#define HANDLE_FUNCLETPAD_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_FUNCLETPAD_INST(N)             FuncletPadOpsEnd = N+1
+#include "llvm/IR/Instruction.def"
+  };
+
   enum OtherOps {
 #define  FIRST_OTHER_INST(N)             OtherOpsBegin = N,
 #define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N,
@@ -489,7 +510,7 @@ private:
                          (V ? HasMetadataBit : 0));
   }
 
-  friend class SymbolTableListTraits<Instruction, BasicBlock>;
+  friend class SymbolTableListTraits<Instruction>;
   void setParent(BasicBlock *P);
 protected:
   // Instruction subclasses can stick up to 15 bits of stuff into the
@@ -515,17 +536,6 @@ private:
   Instruction *cloneImpl() const;
 };
 
-inline Instruction *ilist_traits<Instruction>::createSentinel() const {
-  // Since i(p)lists always publicly derive from their corresponding traits,
-  // placing a data member in this class will augment the i(p)list.  But since
-  // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
-  // there is a legal viable downcast from it to NodeTy. We use this trick to
-  // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
-  // sentinel. Dereferencing the sentinel is forbidden (save the
-  // ilist_node<NodeTy>), so no one will ever notice the superposition.
-  return static_cast<Instruction *>(&Sentinel);
-}
-
 // Instruction* is only 4-byte aligned.
 template<>
 class PointerLikeTypeTraits<Instruction*> {
diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h
index 07d5f111b9e1..d781c7af36d7 100644
--- a/include/llvm/IR/Instructions.h
+++ b/include/llvm/IR/Instructions.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
@@ -158,6 +159,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -166,7 +168,6 @@ private:
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 //                                LoadInst Class
 //===----------------------------------------------------------------------===//
@@ -176,6 +177,7 @@ private:
 ///
 class LoadInst : public UnaryInstruction {
   void AssertOK();
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -290,7 +292,6 @@ public:
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::Load;
@@ -298,6 +299,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -306,7 +308,6 @@ private:
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 //                                StoreInst Class
 //===----------------------------------------------------------------------===//
@@ -316,6 +317,7 @@ private:
 class StoreInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
   void AssertOK();
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -344,7 +346,6 @@ public:
             SynchronizationScope SynchScope,
             BasicBlock *InsertAtEnd);
 
-
   /// isVolatile - Return true if this is a store to a volatile memory
   /// location.
   ///
@@ -422,6 +423,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -445,6 +447,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
 class FenceInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
   void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -496,6 +499,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -517,6 +521,7 @@ class AtomicCmpXchgInst : public Instruction {
   void Init(Value *Ptr, Value *Cmp, Value *NewVal,
             AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
             SynchronizationScope SynchScope);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -648,6 +653,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
@@ -673,6 +679,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
 ///
 class AtomicRMWInst : public Instruction {
   void *operator new(size_t, unsigned) = delete;
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -795,6 +802,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   void Init(BinOp Operation, Value *Ptr, Value *Val,
             AtomicOrdering Ordering, SynchronizationScope SynchScope);
@@ -831,6 +839,8 @@ class GetElementPtrInst : public Instruction {
   Type *SourceElementType;
   Type *ResultElementType;
 
+  void anchor() override;
+
   GetElementPtrInst(const GetElementPtrInst &GEPI);
   void init(Value *Ptr, ArrayRef<Value *> IdxList, const Twine &NameStr);
 
@@ -1078,10 +1088,8 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
   init(Ptr, IdxList, NameStr);
 }
 
-
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 
-
 //===----------------------------------------------------------------------===//
 //                               ICmpInst Class
 //===----------------------------------------------------------------------===//
@@ -1091,6 +1099,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 /// must be identical types.
 /// \brief Represent an integer comparison operator.
 class ICmpInst: public CmpInst {
+  void anchor() override;
+
   void AssertOK() {
     assert(getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
            getPredicate() <= CmpInst::LAST_ICMP_PREDICATE &&
@@ -1226,7 +1236,6 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-
 };
 
 //===----------------------------------------------------------------------===//
@@ -1350,62 +1359,102 @@ public:
 /// field to indicate whether or not this is a tail call.  The rest of the bits
 /// hold the calling convention of the call.
 ///
-class CallInst : public Instruction {
+class CallInst : public Instruction,
+                 public OperandBundleUser<CallInst, User::op_iterator> {
   AttributeSet AttributeList; ///< parameter attributes for call
   FunctionType *FTy;
   CallInst(const CallInst &CI);
-  void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+  void init(Value *Func, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
     init(cast<FunctionType>(
              cast<PointerType>(Func->getType())->getElementType()),
-         Func, Args, NameStr);
+         Func, Args, Bundles, NameStr);
   }
   void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
-            const Twine &NameStr);
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
   void init(Value *Func, const Twine &NameStr);
 
   /// Construct a CallInst given a range of arguments.
   /// \brief Construct a CallInst from a range of arguments
   inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                  const Twine &NameStr, Instruction *InsertBefore);
-  inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                  Instruction *InsertBefore);
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   Instruction *InsertBefore)
       : CallInst(cast<FunctionType>(
                      cast<PointerType>(Func->getType())->getElementType()),
-                 Func, Args, NameStr, InsertBefore) {}
+                 Func, Args, Bundles, NameStr, InsertBefore) {}
+
+  inline CallInst(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr,
+                  Instruction *InsertBefore)
+      : CallInst(Func, Args, None, NameStr, InsertBefore) {}
 
   /// Construct a CallInst given a range of arguments.
   /// \brief Construct a CallInst from a range of arguments
   inline CallInst(Value *Func, ArrayRef<Value *> Args,
-                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+                  ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                  BasicBlock *InsertAtEnd);
 
   explicit CallInst(Value *F, const Twine &NameStr,
                     Instruction *InsertBefore);
   CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  friend class OperandBundleUser<CallInst, User::op_iterator>;
+  bool hasDescriptor() const { return HasDescriptor; }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
   CallInst *cloneImpl() const;
 
 public:
-  static CallInst *Create(Value *Func,
-                          ArrayRef<Value *> Args,
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
                           const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
     return Create(cast<FunctionType>(
                       cast<PointerType>(Func->getType())->getElementType()),
-                  Func, Args, NameStr, InsertBefore);
+                  Func, Args, Bundles, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, Args, None, NameStr, InsertBefore);
   }
   static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                          const Twine &NameStr = "",
+                          const Twine &NameStr,
                           Instruction *InsertBefore = nullptr) {
     return new (unsigned(Args.size() + 1))
-        CallInst(Ty, Func, Args, NameStr, InsertBefore);
+        CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
   }
-  static CallInst *Create(Value *Func,
-                          ArrayRef<Value *> Args,
+  static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles = None,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = nullptr) {
+    const unsigned TotalOps =
+        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (TotalOps, DescriptorBytes)
+        CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          ArrayRef<OperandBundleDef> Bundles,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new(unsigned(Args.size() + 1))
-      CallInst(Func, Args, NameStr, InsertAtEnd);
+    const unsigned TotalOps =
+        unsigned(Args.size()) + CountBundleInputs(Bundles) + 1;
+    const unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (TotalOps, DescriptorBytes)
+        CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
+  }
+  static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new (unsigned(Args.size() + 1))
+        CallInst(Func, Args, None, NameStr, InsertAtEnd);
   }
   static CallInst *Create(Value *F, const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
@@ -1415,6 +1464,16 @@ public:
                           BasicBlock *InsertAtEnd) {
     return new(1) CallInst(F, NameStr, InsertAtEnd);
   }
+
+  /// \brief Create a clone of \p CI with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned call instruction is identical \p CI in every way except that
+  /// the operand bundles for the new instruction are set to the operand bundles
+  /// in \p Bundles.
+  static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
+                          Instruction *InsertPt = nullptr);
+
   /// CreateMalloc - Generate the IR for a call to malloc:
   /// 1. Compute the malloc call's argument as the specified type's size,
   ///    possibly multiplied by the array size if the array size is not
@@ -1445,16 +1504,21 @@ public:
   }
 
   // Note that 'musttail' implies 'tail'.
-  enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2 };
+  enum TailCallKind { TCK_None = 0, TCK_Tail = 1, TCK_MustTail = 2,
+                      TCK_NoTail = 3 };
   TailCallKind getTailCallKind() const {
     return TailCallKind(getSubclassDataFromInstruction() & 3);
   }
   bool isTailCall() const {
-    return (getSubclassDataFromInstruction() & 3) != TCK_None;
+    unsigned Kind = getSubclassDataFromInstruction() & 3;
+    return Kind == TCK_Tail || Kind == TCK_MustTail;
   }
   bool isMustTailCall() const {
     return (getSubclassDataFromInstruction() & 3) == TCK_MustTail;
   }
+  bool isNoTailCall() const {
+    return (getSubclassDataFromInstruction() & 3) == TCK_NoTail;
+  }
   void setTailCall(bool isTC = true) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
                                unsigned(isTC ? TCK_Tail : TCK_None));
@@ -1469,28 +1533,58 @@ public:
 
   /// getNumArgOperands - Return the number of call arguments.
   ///
-  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+  unsigned getNumArgOperands() const {
+    return getNumOperands() - getNumTotalBundleOperands() - 1;
+  }
 
   /// getArgOperand/setArgOperand - Return/set the i-th call argument.
   ///
-  Value *getArgOperand(unsigned i) const { return getOperand(i); }
-  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
-  /// arg_operands - iteration adapter for range-for loops.
-  iterator_range<op_iterator> arg_operands() {
-    // The last operand in the op list is the callee - it's not one of the args
-    // so we don't want to iterate over it.
-    return iterator_range<op_iterator>(op_begin(), op_end() - 1);
+  Value *getArgOperand(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperand(i);
+  }
+  void setArgOperand(unsigned i, Value *v) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    setOperand(i, v);
   }
 
-  /// arg_operands - iteration adapter for range-for loops.
+  /// \brief Return the iterator pointing to the beginning of the argument list.
+  op_iterator arg_begin() { return op_begin(); }
+
+  /// \brief Return the iterator pointing to the end of the argument list.
+  op_iterator arg_end() {
+    // [ call args ], [ operand bundles ], callee
+    return op_end() - getNumTotalBundleOperands() - 1;
+  };
+
+  /// \brief Iteration adapter for range-for loops.
+  iterator_range<op_iterator> arg_operands() {
+    return make_range(arg_begin(), arg_end());
+  }
+
+  /// \brief Return the iterator pointing to the beginning of the argument list.
+  const_op_iterator arg_begin() const { return op_begin(); }
+
+  /// \brief Return the iterator pointing to the end of the argument list.
+  const_op_iterator arg_end() const {
+    // [ call args ], [ operand bundles ], callee
+    return op_end() - getNumTotalBundleOperands() - 1;
+  };
+
+  /// \brief Iteration adapter for range-for loops.
   iterator_range<const_op_iterator> arg_operands() const {
-    return iterator_range<const_op_iterator>(op_begin(), op_end() - 1);
+    return make_range(arg_begin(), arg_end());
   }
 
   /// \brief Wrappers for getting the \c Use of a call argument.
-  const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); }
-  Use &getArgOperandUse(unsigned i) { return getOperandUse(i); }
+  const Use &getArgOperandUse(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperandUse(i);
+  }
+  Use &getArgOperandUse(unsigned i) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperandUse(i);
+  }
 
   /// getCallingConv/setCallingConv - Get or set the calling convention of this
   /// function call.
@@ -1498,8 +1592,10 @@ public:
     return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 2);
   }
   void setCallingConv(CallingConv::ID CC) {
+    auto ID = static_cast<unsigned>(CC);
+    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
     setInstructionSubclassData((getSubclassDataFromInstruction() & 3) |
-                               (static_cast<unsigned>(CC) << 2));
+                               (ID << 2));
   }
 
   /// getAttributes - Return the parameter attributes for this call.
@@ -1541,6 +1637,21 @@ public:
   /// \brief Determine whether the call or the callee has the given attributes.
   bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
 
+  /// \brief Return true if the data operand at index \p i has the attribute \p
+  /// A.
+  ///
+  /// Data operands include call arguments and values used in operand bundles,
+  /// but does not include the callee operand.  This routine dispatches to the
+  /// underlying AttributeList or the OperandBundleUser as appropriate.
+  ///
+  /// The index \p i is interpreted as
+  ///
+  ///  \p i == Attribute::ReturnIndex  -> the return value
+  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
+  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+  ///     (\p i - 1) in the operand list.
+  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const;
+
   /// \brief Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned i) const {
     return AttributeList.getParamAlignment(i);
@@ -1557,7 +1668,14 @@ public:
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeList.getDereferenceableOrNullBytes(i);
   }
-  
+
+  /// @brief Determine if the parameter or return value is marked with NoAlias
+  /// attribute.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotAlias(unsigned n) const {
+    return AttributeList.hasAttribute(n, Attribute::NoAlias);
+  }
+
   /// \brief Return true if the call should not be treated as a call to a
   /// builtin.
   bool isNoBuiltin() const {
@@ -1622,9 +1740,18 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate);
   }
 
+  /// \brief Determine if the call is convergent
+  bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
+  void setConvergent() {
+    addAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
+  }
+
   /// \brief Determine if the call returns a structure through first
   /// pointer argument.
   bool hasStructRetAttr() const {
+    if (getNumArgOperands() == 0)
+      return false;
+
     // Be friendly and also check the callee.
     return paramHasAttr(1, Attribute::StructRet);
   }
@@ -1671,12 +1798,17 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
-private:
 
-  template<typename AttrKind>
-  bool hasFnAttrImpl(AttrKind A) const {
+private:
+  template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
     if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
       return true;
+
+    // Operand bundles override attributes on the called function, but don't
+    // override attributes directly present on the call instruction.
+    if (isFnAttrDisallowedByOpBundle(A))
+      return false;
+
     if (const Function *F = getCalledFunction())
       return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
     return false;
@@ -1694,24 +1826,28 @@ struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
 };
 
 CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
-                   const Twine &NameStr, BasicBlock *InsertAtEnd)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
-                unsigned(Args.size() + 1), InsertAtEnd) {
-  init(Func, Args, NameStr);
+                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                   BasicBlock *InsertAtEnd)
+    : Instruction(
+          cast<FunctionType>(cast<PointerType>(Func->getType())
+                                 ->getElementType())->getReturnType(),
+          Instruction::Call, OperandTraits<CallInst>::op_end(this) -
+                                 (Args.size() + CountBundleInputs(Bundles) + 1),
+          unsigned(Args.size() + CountBundleInputs(Bundles) + 1), InsertAtEnd) {
+  init(Func, Args, Bundles, NameStr);
 }
 
 CallInst::CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
-                   const Twine &NameStr, Instruction *InsertBefore)
+                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
+                   Instruction *InsertBefore)
     : Instruction(Ty->getReturnType(), Instruction::Call,
-                  OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
-                  unsigned(Args.size() + 1), InsertBefore) {
-  init(Ty, Func, Args, NameStr);
+                  OperandTraits<CallInst>::op_end(this) -
+                      (Args.size() + CountBundleInputs(Bundles) + 1),
+                  unsigned(Args.size() + CountBundleInputs(Bundles) + 1),
+                  InsertBefore) {
+  init(Ty, Func, Args, Bundles, NameStr);
 }
 
-
 // Note: if you get compile errors about private methods then
 //       please update your code to use the high-level operand
 //       interfaces. See line 943 above.
@@ -1745,6 +1881,7 @@ class SelectInst : public Instruction {
     init(C, S1, S2);
     setName(NameStr);
   }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -1845,6 +1982,7 @@ class ExtractElementInst : public Instruction {
                      Instruction *InsertBefore = nullptr);
   ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
                      BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -1875,7 +2013,6 @@ public:
     return cast<VectorType>(getVectorOperand()->getType());
   }
 
-
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -1906,8 +2043,9 @@ class InsertElementInst : public Instruction {
   InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
                     const Twine &NameStr = "",
                     Instruction *InsertBefore = nullptr);
-  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
-                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr,
+                    BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -2020,7 +2158,6 @@ public:
     return Mask;
   }
 
-
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::ShuffleVector;
@@ -2063,9 +2200,8 @@ class ExtractValueInst : public UnaryInstruction {
                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
+  void *operator new(size_t s) { return User::operator new(s, 1); }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -2096,7 +2232,7 @@ public:
   inline idx_iterator idx_begin() const { return Indices.begin(); }
   inline idx_iterator idx_end()   const { return Indices.end(); }
   inline iterator_range<idx_iterator> indices() const {
-    return iterator_range<idx_iterator>(idx_begin(), idx_end());
+    return make_range(idx_begin(), idx_end());
   }
 
   Value *getAggregateOperand() {
@@ -2147,7 +2283,6 @@ ExtractValueInst::ExtractValueInst(Value *Agg,
   init(Idxs, NameStr);
 }
 
-
 //===----------------------------------------------------------------------===//
 //                                InsertValueInst Class
 //===----------------------------------------------------------------------===//
@@ -2177,11 +2312,12 @@ class InsertValueInst : public Instruction {
 
   /// Constructors - These two constructors are convenience methods because one
   /// and two index insertvalue instructions are so common.
-  InsertValueInst(Value *Agg, Value *Val,
-                  unsigned Idx, const Twine &NameStr = "",
-                  Instruction *InsertBefore = nullptr);
   InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
-                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+                  const Twine &NameStr = "",
+                  Instruction *InsertBefore = nullptr);
+  InsertValueInst(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr,
+                  BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -2213,7 +2349,7 @@ public:
   inline idx_iterator idx_begin() const { return Indices.begin(); }
   inline idx_iterator idx_end()   const { return Indices.end(); }
   inline iterator_range<idx_iterator> indices() const {
-    return iterator_range<idx_iterator>(idx_begin(), idx_end());
+    return make_range(idx_begin(), idx_end());
   }
 
   Value *getAggregateOperand() {
@@ -2294,6 +2430,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
 // scientist's overactive imagination.
 //
 class PHINode : public Instruction {
+  void anchor() override;
+
   void *operator new(size_t, unsigned) = delete;
   /// ReservedSpace - The number of operands actually allocated.  NumOperands is
   /// the number actually in use.
@@ -2319,6 +2457,7 @@ class PHINode : public Instruction {
     setName(NameStr);
     allocHungoffUses(ReservedSpace);
   }
+
 protected:
   // allocHungoffUses - this is more complicated than the generic
   // User::allocHungoffUses, because we have to allocate Uses for the incoming
@@ -2387,6 +2526,9 @@ public:
     return getOperand(i);
   }
   void setIncomingValue(unsigned i, Value *V) {
+    assert(V && "PHI node got a null value!");
+    assert(getType() == V->getType() &&
+           "All operands to PHI node must be the same type as the PHI node!");
     setOperand(i, V);
   }
   static unsigned getOperandNumForIncomingValue(unsigned i) {
@@ -2418,16 +2560,13 @@ public:
   }
 
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
+    assert(BB && "PHI node got a null basic block!");
     block_begin()[i] = BB;
   }
 
   /// addIncoming - Add an incoming value to the end of the PHI list
   ///
   void addIncoming(Value *V, BasicBlock *BB) {
-    assert(V && "PHI node got a null value!");
-    assert(BB && "PHI node got a null basic block!");
-    assert(getType() == V->getType() &&
-           "All operands to PHI node must be the same type as the PHI node!");
     if (getNumOperands() == ReservedSpace)
       growOperands();  // Get more space!
     // Initialize some new operands.
@@ -2479,7 +2618,8 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
- private:
+
+private:
   void growOperands();
 };
 
@@ -2506,8 +2646,10 @@ class LandingPadInst : public Instruction {
   /// the number actually in use.
   unsigned ReservedSpace;
   LandingPadInst(const LandingPadInst &LP);
+
 public:
   enum ClauseType { Catch, Filter };
+
 private:
   void *operator new(size_t, unsigned) = delete;
   // Allocate space for exactly zero operands.
@@ -2618,6 +2760,7 @@ private:
                       Instruction *InsertBefore = nullptr);
   ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
   explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -2654,7 +2797,8 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
- private:
+
+private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
   void setSuccessorV(unsigned idx, BasicBlock *B) override;
@@ -2693,6 +2837,7 @@ class BranchInst : public TerminatorInst {
   BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
   BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
              BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -2740,7 +2885,7 @@ public:
 
   void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
     assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
-    *(&Op<-1>() - idx) = (Value*)NewSucc;
+    *(&Op<-1>() - idx) = NewSucc;
   }
 
   /// \brief Swap the successors of this branch instruction.
@@ -2757,6 +2902,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
@@ -2803,25 +2949,23 @@ class SwitchInst : public TerminatorInst {
   /// constructor also autoinserts at the end of the specified BasicBlock.
   SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
              BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
   SwitchInst *cloneImpl() const;
 
 public:
-
   // -2
   static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
 
   template <class SwitchInstTy, class ConstantIntTy, class BasicBlockTy>
   class CaseIteratorT {
   protected:
-
     SwitchInstTy *SI;
     unsigned Index;
 
   public:
-
     typedef CaseIteratorT<SwitchInstTy, ConstantIntTy, BasicBlockTy> Self;
 
     /// Initializes case iterator for given SwitchInst and for given
@@ -2912,8 +3056,7 @@ public:
     typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy;
 
   public:
-
-    CaseIt(const ParentTy& Src) : ParentTy(Src) {}
+    CaseIt(const ParentTy &Src) : ParentTy(Src) {}
     CaseIt(SwitchInst *SI, unsigned CaseNum) : ParentTy(SI, CaseNum) {}
 
     /// Sets the new value for current case.
@@ -2983,12 +3126,12 @@ public:
 
   /// cases - iteration adapter for range-for loops.
   iterator_range<CaseIt> cases() {
-    return iterator_range<CaseIt>(case_begin(), case_end());
+    return make_range(case_begin(), case_end());
   }
 
   /// cases - iteration adapter for range-for loops.
   iterator_range<ConstCaseIt> cases() const {
-    return iterator_range<ConstCaseIt>(case_begin(), case_end());
+    return make_range(case_begin(), case_end());
   }
 
   /// Returns an iterator that points to the default case.
@@ -3056,7 +3199,7 @@ public:
   }
   void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
     assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
-    setOperand(idx*2+1, (Value*)NewSucc);
+    setOperand(idx * 2 + 1, NewSucc);
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -3066,6 +3209,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
@@ -3078,7 +3222,6 @@ struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
 
-
 //===----------------------------------------------------------------------===//
 //                             IndirectBrInst Class
 //===----------------------------------------------------------------------===//
@@ -3111,6 +3254,7 @@ class IndirectBrInst : public TerminatorInst {
   /// here to make memory allocation more efficient.  This constructor also
   /// autoinserts at the end of the specified BasicBlock.
   IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -3134,7 +3278,6 @@ public:
   const Value *getAddress() const { return getOperand(0); }
   void setAddress(Value *V) { setOperand(0, V); }
 
-
   /// getNumDestinations - return the number of possible destinations in this
   /// indirectbr instruction.
   unsigned getNumDestinations() const { return getNumOperands()-1; }
@@ -3156,7 +3299,7 @@ public:
     return cast<BasicBlock>(getOperand(i+1));
   }
   void setSuccessor(unsigned i, BasicBlock *NewSucc) {
-    setOperand(i+1, (Value*)NewSucc);
+    setOperand(i + 1, NewSucc);
   }
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -3166,6 +3309,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
@@ -3178,7 +3322,6 @@ struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 
-
 //===----------------------------------------------------------------------===//
 //                               InvokeInst Class
 //===----------------------------------------------------------------------===//
@@ -3186,71 +3329,122 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 /// InvokeInst - Invoke instruction.  The SubclassData field is used to hold the
 /// calling convention of the call.
 ///
-class InvokeInst : public TerminatorInst {
+class InvokeInst : public TerminatorInst,
+                   public OperandBundleUser<InvokeInst, User::op_iterator> {
   AttributeSet AttributeList;
   FunctionType *FTy;
   InvokeInst(const InvokeInst &BI);
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            ArrayRef<Value *> Args, const Twine &NameStr) {
+            ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+            const Twine &NameStr) {
     init(cast<FunctionType>(
              cast<PointerType>(Func->getType())->getElementType()),
-         Func, IfNormal, IfException, Args, NameStr);
+         Func, IfNormal, IfException, Args, Bundles, NameStr);
   }
   void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
             BasicBlock *IfException, ArrayRef<Value *> Args,
-            const Twine &NameStr);
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
 
   /// Construct an InvokeInst given a range of arguments.
   ///
   /// \brief Construct an InvokeInst from a range of arguments
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, unsigned Values,
-                    const Twine &NameStr, Instruction *InsertBefore)
+                    ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+                    unsigned Values, const Twine &NameStr,
+                    Instruction *InsertBefore)
       : InvokeInst(cast<FunctionType>(
                        cast<PointerType>(Func->getType())->getElementType()),
-                   Func, IfNormal, IfException, Args, Values, NameStr,
+                   Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
                    InsertBefore) {}
 
   inline InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                     BasicBlock *IfException, ArrayRef<Value *> Args,
-                    unsigned Values, const Twine &NameStr,
-                    Instruction *InsertBefore);
+                    ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+                    const Twine &NameStr, Instruction *InsertBefore);
   /// Construct an InvokeInst given a range of arguments.
   ///
   /// \brief Construct an InvokeInst from a range of arguments
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    ArrayRef<Value *> Args, unsigned Values,
-                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+                    ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+                    unsigned Values, const Twine &NameStr,
+                    BasicBlock *InsertAtEnd);
+
+  friend class OperandBundleUser<InvokeInst, User::op_iterator>;
+  bool hasDescriptor() const { return HasDescriptor; }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
   InvokeInst *cloneImpl() const;
 
 public:
-  static InvokeInst *Create(Value *Func,
-                            BasicBlock *IfNormal, BasicBlock *IfException,
-                            ArrayRef<Value *> Args, const Twine &NameStr = "",
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            const Twine &NameStr,
                             Instruction *InsertBefore = nullptr) {
     return Create(cast<FunctionType>(
                       cast<PointerType>(Func->getType())->getElementType()),
-                  Func, IfNormal, IfException, Args, NameStr, InsertBefore);
+                  Func, IfNormal, IfException, Args, None, NameStr,
+                  InsertBefore);
+  }
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles = None,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = nullptr) {
+    return Create(cast<FunctionType>(
+                      cast<PointerType>(Func->getType())->getElementType()),
+                  Func, IfNormal, IfException, Args, Bundles, NameStr,
+                  InsertBefore);
   }
   static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
-                            const Twine &NameStr = "",
+                            const Twine &NameStr,
                             Instruction *InsertBefore = nullptr) {
     unsigned Values = unsigned(Args.size()) + 3;
-    return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args,
+    return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None,
                                    Values, NameStr, InsertBefore);
   }
+  static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles = None,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = nullptr) {
+    unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (Values, DescriptorBytes)
+        InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values,
+                   NameStr, InsertBefore);
+  }
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
                             ArrayRef<Value *> Args, const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
     unsigned Values = unsigned(Args.size()) + 3;
-    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
-                                  Values, NameStr, InsertAtEnd);
+    return new (Values) InvokeInst(Func, IfNormal, IfException, Args, None,
+                                   Values, NameStr, InsertAtEnd);
   }
+  static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
+                            BasicBlock *IfException, ArrayRef<Value *> Args,
+                            ArrayRef<OperandBundleDef> Bundles,
+                            const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    unsigned Values = unsigned(Args.size()) + CountBundleInputs(Bundles) + 3;
+    unsigned DescriptorBytes = Bundles.size() * sizeof(BundleOpInfo);
+
+    return new (Values, DescriptorBytes)
+        InvokeInst(Func, IfNormal, IfException, Args, Bundles, Values, NameStr,
+                   InsertAtEnd);
+  }
+
+  /// \brief Create a clone of \p II with a different set of operand bundles and
+  /// insert it before \p InsertPt.
+  ///
+  /// The returned invoke instruction is identical to \p II in every way except
+  /// that the operand bundles for the new instruction are set to the operand
+  /// bundles in \p Bundles.
+  static InvokeInst *Create(InvokeInst *II, ArrayRef<OperandBundleDef> Bundles,
+                            Instruction *InsertPt = nullptr);
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -3264,26 +3458,58 @@ public:
 
   /// getNumArgOperands - Return the number of invoke arguments.
   ///
-  unsigned getNumArgOperands() const { return getNumOperands() - 3; }
+  unsigned getNumArgOperands() const {
+    return getNumOperands() - getNumTotalBundleOperands() - 3;
+  }
 
   /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
   ///
-  Value *getArgOperand(unsigned i) const { return getOperand(i); }
-  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
-
-  /// arg_operands - iteration adapter for range-for loops.
-  iterator_range<op_iterator> arg_operands() {
-    return iterator_range<op_iterator>(op_begin(), op_end() - 3);
+  Value *getArgOperand(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperand(i);
+  }
+  void setArgOperand(unsigned i, Value *v) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    setOperand(i, v);
   }
 
-  /// arg_operands - iteration adapter for range-for loops.
+  /// \brief Return the iterator pointing to the beginning of the argument list.
+  op_iterator arg_begin() { return op_begin(); }
+
+  /// \brief Return the iterator pointing to the end of the argument list.
+  op_iterator arg_end() {
+    // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
+    return op_end() - getNumTotalBundleOperands() - 3;
+  };
+
+  /// \brief Iteration adapter for range-for loops.
+  iterator_range<op_iterator> arg_operands() {
+    return make_range(arg_begin(), arg_end());
+  }
+
+  /// \brief Return the iterator pointing to the beginning of the argument list.
+  const_op_iterator arg_begin() const { return op_begin(); }
+
+  /// \brief Return the iterator pointing to the end of the argument list.
+  const_op_iterator arg_end() const {
+    // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
+    return op_end() - getNumTotalBundleOperands() - 3;
+  };
+
+  /// \brief Iteration adapter for range-for loops.
   iterator_range<const_op_iterator> arg_operands() const {
-    return iterator_range<const_op_iterator>(op_begin(), op_end() - 3);
+    return make_range(arg_begin(), arg_end());
   }
 
   /// \brief Wrappers for getting the \c Use of a invoke argument.
-  const Use &getArgOperandUse(unsigned i) const { return getOperandUse(i); }
-  Use &getArgOperandUse(unsigned i) { return getOperandUse(i); }
+  const Use &getArgOperandUse(unsigned i) const {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperandUse(i);
+  }
+  Use &getArgOperandUse(unsigned i) {
+    assert(i < getNumArgOperands() && "Out of bounds!");
+    return getOperandUse(i);
+  }
 
   /// getCallingConv/setCallingConv - Get or set the calling convention of this
   /// function call.
@@ -3291,7 +3517,9 @@ public:
     return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
   }
   void setCallingConv(CallingConv::ID CC) {
-    setInstructionSubclassData(static_cast<unsigned>(CC));
+    auto ID = static_cast<unsigned>(CC);
+    assert(!(ID & ~CallingConv::MaxID) && "Unsupported calling convention");
+    setInstructionSubclassData(ID);
   }
 
   /// getAttributes - Return the parameter attributes for this invoke.
@@ -3325,6 +3553,22 @@ public:
   /// \brief Determine whether the call or the callee has the given attributes.
   bool paramHasAttr(unsigned i, Attribute::AttrKind A) const;
 
+  /// \brief Return true if the data operand at index \p i has the attribute \p
+  /// A.
+  ///
+  /// Data operands include invoke arguments and values used in operand bundles,
+  /// but does not include the invokee operand, or the two successor blocks.
+  /// This routine dispatches to the underlying AttributeList or the
+  /// OperandBundleUser as appropriate.
+  ///
+  /// The index \p i is interpreted as
+  ///
+  ///  \p i == Attribute::ReturnIndex  -> the return value
+  ///  \p i in [1, arg_size + 1)  -> argument number (\p i - 1)
+  ///  \p i in [arg_size + 1, data_operand_size + 1) -> bundle operand at index
+  ///     (\p i - 1) in the operand list.
+  bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind A) const;
+
   /// \brief Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned i) const {
     return AttributeList.getParamAlignment(i);
@@ -3335,13 +3579,20 @@ public:
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeList.getDereferenceableBytes(i);
   }
-  
+
   /// \brief Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeList.getDereferenceableOrNullBytes(i);
   }
 
+  /// @brief Determine if the parameter or return value is marked with NoAlias
+  /// attribute.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotAlias(unsigned n) const {
+    return AttributeList.hasAttribute(n, Attribute::NoAlias);
+  }
+
   /// \brief Return true if the call should not be treated as a call to a
   /// builtin.
   bool isNoBuiltin() const {
@@ -3403,6 +3654,9 @@ public:
   /// \brief Determine if the call returns a structure through first
   /// pointer argument.
   bool hasStructRetAttr() const {
+    if (getNumArgOperands() == 0)
+      return false;
+
     // Be friendly and also check the callee.
     return paramHasAttr(1, Attribute::StructRet);
   }
@@ -3495,23 +3749,23 @@ struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
 
 InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
-                       unsigned Values, const Twine &NameStr,
-                       Instruction *InsertBefore)
+                       ArrayRef<OperandBundleDef> Bundles, unsigned Values,
+                       const Twine &NameStr, Instruction *InsertBefore)
     : TerminatorInst(Ty->getReturnType(), Instruction::Invoke,
                      OperandTraits<InvokeInst>::op_end(this) - Values, Values,
                      InsertBefore) {
-  init(Ty, Func, IfNormal, IfException, Args, NameStr);
+  init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
-InvokeInst::InvokeInst(Value *Func,
-                       BasicBlock *IfNormal, BasicBlock *IfException,
-                       ArrayRef<Value *> Args, unsigned Values,
+InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
+                       BasicBlock *IfException, ArrayRef<Value *> Args,
+                       ArrayRef<OperandBundleDef> Bundles, unsigned Values,
                        const Twine &NameStr, BasicBlock *InsertAtEnd)
-  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                      ->getElementType())->getReturnType(),
-                   Instruction::Invoke,
-                   OperandTraits<InvokeInst>::op_end(this) - Values,
-                   Values, InsertAtEnd) {
-  init(Func, IfNormal, IfException, Args, NameStr);
+    : TerminatorInst(
+          cast<FunctionType>(cast<PointerType>(Func->getType())
+                                 ->getElementType())->getReturnType(),
+          Instruction::Invoke, OperandTraits<InvokeInst>::op_end(this) - Values,
+          Values, InsertAtEnd) {
+  init(Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
@@ -3528,6 +3782,7 @@ class ResumeInst : public TerminatorInst {
 
   explicit ResumeInst(Value *Exn, Instruction *InsertBefore=nullptr);
   ResumeInst(Value *Exn, BasicBlock *InsertAtEnd);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -3556,6 +3811,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
@@ -3569,6 +3825,430 @@ struct OperandTraits<ResumeInst> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
 
+//===----------------------------------------------------------------------===//
+//                         CatchSwitchInst Class
+//===----------------------------------------------------------------------===//
+class CatchSwitchInst : public TerminatorInst {
+  void *operator new(size_t, unsigned) = delete;
+  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// the number actually in use.
+  unsigned ReservedSpace;
+  // Operand[0] = Outer scope
+  // Operand[1] = Unwind block destination
+  // Operand[n] = BasicBlock to go to on match
+  CatchSwitchInst(const CatchSwitchInst &CSI);
+  void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved);
+  void growOperands(unsigned Size);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) { return User::operator new(s); }
+  /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+  /// default destination.  The number of additional handlers can be specified
+  /// here to make memory allocation more efficient.
+  /// This constructor can also autoinsert before another instruction.
+  CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+                  unsigned NumHandlers, const Twine &NameStr,
+                  Instruction *InsertBefore);
+
+  /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+  /// default destination.  The number of additional handlers can be specified
+  /// here to make memory allocation more efficient.
+  /// This constructor also autoinserts at the end of the specified BasicBlock.
+  CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+                  unsigned NumHandlers, const Twine &NameStr,
+                  BasicBlock *InsertAtEnd);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  CatchSwitchInst *cloneImpl() const;
+
+public:
+  static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
+                                 unsigned NumHandlers,
+                                 const Twine &NameStr = "",
+                                 Instruction *InsertBefore = nullptr) {
+    return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr,
+                               InsertBefore);
+  }
+  static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
+                                 unsigned NumHandlers, const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd) {
+    return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr,
+                               InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Accessor Methods for CatchSwitch stmt
+  Value *getParentPad() const { return getOperand(0); }
+  void setParentPad(Value *ParentPad) { setOperand(0, ParentPad); }
+
+  // Accessor Methods for CatchSwitch stmt
+  bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; }
+  bool unwindsToCaller() const { return !hasUnwindDest(); }
+  BasicBlock *getUnwindDest() const {
+    if (hasUnwindDest())
+      return cast<BasicBlock>(getOperand(1));
+    return nullptr;
+  }
+  void setUnwindDest(BasicBlock *UnwindDest) {
+    assert(UnwindDest);
+    assert(hasUnwindDest());
+    setOperand(1, UnwindDest);
+  }
+
+  /// getNumHandlers - return the number of 'handlers' in this catchswitch
+  /// instruction, except the default handler
+  unsigned getNumHandlers() const {
+    if (hasUnwindDest())
+      return getNumOperands() - 2;
+    return getNumOperands() - 1;
+  }
+
+private:
+  static BasicBlock *handler_helper(Value *V) { return cast<BasicBlock>(V); }
+  static const BasicBlock *handler_helper(const Value *V) {
+    return cast<BasicBlock>(V);
+  }
+
+public:
+  typedef std::pointer_to_unary_function<Value *, BasicBlock *> DerefFnTy;
+  typedef mapped_iterator<op_iterator, DerefFnTy> handler_iterator;
+  typedef iterator_range<handler_iterator> handler_range;
+
+
+  typedef std::pointer_to_unary_function<const Value *, const BasicBlock *>
+      ConstDerefFnTy;
+  typedef mapped_iterator<const_op_iterator, ConstDerefFnTy> const_handler_iterator;
+  typedef iterator_range<const_handler_iterator> const_handler_range;
+
+  /// Returns an iterator that points to the first handler in CatchSwitchInst.
+  handler_iterator handler_begin() {
+    op_iterator It = op_begin() + 1;
+    if (hasUnwindDest())
+      ++It;
+    return handler_iterator(It, DerefFnTy(handler_helper));
+  }
+  /// Returns an iterator that points to the first handler in the
+  /// CatchSwitchInst.
+  const_handler_iterator handler_begin() const {
+    const_op_iterator It = op_begin() + 1;
+    if (hasUnwindDest())
+      ++It;
+    return const_handler_iterator(It, ConstDerefFnTy(handler_helper));
+  }
+
+  /// Returns a read-only iterator that points one past the last
+  /// handler in the CatchSwitchInst.
+  handler_iterator handler_end() {
+    return handler_iterator(op_end(), DerefFnTy(handler_helper));
+  }
+  /// Returns an iterator that points one past the last handler in the
+  /// CatchSwitchInst.
+  const_handler_iterator handler_end() const {
+    return const_handler_iterator(op_end(), ConstDerefFnTy(handler_helper));
+  }
+
+  /// handlers - iteration adapter for range-for loops.
+  handler_range handlers() {
+    return make_range(handler_begin(), handler_end());
+  }
+
+  /// handlers - iteration adapter for range-for loops.
+  const_handler_range handlers() const {
+    return make_range(handler_begin(), handler_end());
+  }
+
+  /// addHandler - Add an entry to the switch instruction...
+  /// Note:
+  /// This action invalidates handler_end(). Old handler_end() iterator will
+  /// point to the added handler.
+  void addHandler(BasicBlock *Dest);
+
+  unsigned getNumSuccessors() const { return getNumOperands() - 1; }
+  BasicBlock *getSuccessor(unsigned Idx) const {
+    assert(Idx < getNumSuccessors() &&
+           "Successor # out of range for catchswitch!");
+    return cast<BasicBlock>(getOperand(Idx + 1));
+  }
+  void setSuccessor(unsigned Idx, BasicBlock *NewSucc) {
+    assert(Idx < getNumSuccessors() &&
+           "Successor # out of range for catchswitch!");
+    setOperand(Idx + 1, NewSucc);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::CatchSwitch;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+  BasicBlock *getSuccessorV(unsigned Idx) const override;
+  unsigned getNumSuccessorsV() const override;
+  void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+};
+
+template <>
+struct OperandTraits<CatchSwitchInst> : public HungoffOperandTraits<2> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchSwitchInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               CleanupPadInst Class
+//===----------------------------------------------------------------------===//
+class CleanupPadInst : public FuncletPadInst {
+private:
+  explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args,
+                          unsigned Values, const Twine &NameStr,
+                          Instruction *InsertBefore)
+      : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values,
+                       NameStr, InsertBefore) {}
+  explicit CleanupPadInst(Value *ParentPad, ArrayRef<Value *> Args,
+                          unsigned Values, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd)
+      : FuncletPadInst(Instruction::CleanupPad, ParentPad, Args, Values,
+                       NameStr, InsertAtEnd) {}
+
+public:
+  static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args = None,
+                                const Twine &NameStr = "",
+                                Instruction *InsertBefore = nullptr) {
+    unsigned Values = 1 + Args.size();
+    return new (Values)
+        CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore);
+  }
+  static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args,
+                                const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    unsigned Values = 1 + Args.size();
+    return new (Values)
+        CleanupPadInst(ParentPad, Args, Values, NameStr, InsertAtEnd);
+  }
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::CleanupPad;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                               CatchPadInst Class
+//===----------------------------------------------------------------------===//
+class CatchPadInst : public FuncletPadInst {
+private:
+  explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args,
+                        unsigned Values, const Twine &NameStr,
+                        Instruction *InsertBefore)
+      : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values,
+                       NameStr, InsertBefore) {}
+  explicit CatchPadInst(Value *CatchSwitch, ArrayRef<Value *> Args,
+                        unsigned Values, const Twine &NameStr,
+                        BasicBlock *InsertAtEnd)
+      : FuncletPadInst(Instruction::CatchPad, CatchSwitch, Args, Values,
+                       NameStr, InsertAtEnd) {}
+
+public:
+  static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args,
+                              const Twine &NameStr = "",
+                              Instruction *InsertBefore = nullptr) {
+    unsigned Values = 1 + Args.size();
+    return new (Values)
+        CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore);
+  }
+  static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args,
+                              const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    unsigned Values = 1 + Args.size();
+    return new (Values)
+        CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertAtEnd);
+  }
+
+  /// Convenience accessors
+  CatchSwitchInst *getCatchSwitch() const {
+    return cast<CatchSwitchInst>(Op<-1>());
+  }
+  void setCatchSwitch(Value *CatchSwitch) {
+    assert(CatchSwitch);
+    Op<-1>() = CatchSwitch;
+  }
+
+  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::CatchPad;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                               CatchReturnInst Class
+//===----------------------------------------------------------------------===//
+
+class CatchReturnInst : public TerminatorInst {
+  CatchReturnInst(const CatchReturnInst &RI);
+
+  void init(Value *CatchPad, BasicBlock *BB);
+  CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore);
+  CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  CatchReturnInst *cloneImpl() const;
+
+public:
+  static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB,
+                                 Instruction *InsertBefore = nullptr) {
+    assert(CatchPad);
+    assert(BB);
+    return new (2) CatchReturnInst(CatchPad, BB, InsertBefore);
+  }
+  static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB,
+                                 BasicBlock *InsertAtEnd) {
+    assert(CatchPad);
+    assert(BB);
+    return new (2) CatchReturnInst(CatchPad, BB, InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Convenience accessors.
+  CatchPadInst *getCatchPad() const { return cast<CatchPadInst>(Op<0>()); }
+  void setCatchPad(CatchPadInst *CatchPad) {
+    assert(CatchPad);
+    Op<0>() = CatchPad;
+  }
+
+  BasicBlock *getSuccessor() const { return cast<BasicBlock>(Op<1>()); }
+  void setSuccessor(BasicBlock *NewSucc) {
+    assert(NewSucc);
+    Op<1>() = NewSucc;
+  }
+  unsigned getNumSuccessors() const { return 1; }
+
+  Value *getParentPad() const {
+    return getCatchPad()->getCatchSwitch()->getParentPad();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::CatchRet);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+  BasicBlock *getSuccessorV(unsigned Idx) const override;
+  unsigned getNumSuccessorsV() const override;
+  void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+};
+
+template <>
+struct OperandTraits<CatchReturnInst>
+    : public FixedNumOperandTraits<CatchReturnInst, 2> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               CleanupReturnInst Class
+//===----------------------------------------------------------------------===//
+
+class CleanupReturnInst : public TerminatorInst {
+private:
+  CleanupReturnInst(const CleanupReturnInst &RI);
+
+  void init(Value *CleanupPad, BasicBlock *UnwindBB);
+  CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
+                    Instruction *InsertBefore = nullptr);
+  CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
+                    BasicBlock *InsertAtEnd);
+
+protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+  CleanupReturnInst *cloneImpl() const;
+
+public:
+  static CleanupReturnInst *Create(Value *CleanupPad,
+                                   BasicBlock *UnwindBB = nullptr,
+                                   Instruction *InsertBefore = nullptr) {
+    assert(CleanupPad);
+    unsigned Values = 1;
+    if (UnwindBB)
+      ++Values;
+    return new (Values)
+        CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore);
+  }
+  static CleanupReturnInst *Create(Value *CleanupPad, BasicBlock *UnwindBB,
+                                   BasicBlock *InsertAtEnd) {
+    assert(CleanupPad);
+    unsigned Values = 1;
+    if (UnwindBB)
+      ++Values;
+    return new (Values)
+        CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  bool hasUnwindDest() const { return getSubclassDataFromInstruction() & 1; }
+  bool unwindsToCaller() const { return !hasUnwindDest(); }
+
+  /// Convenience accessor.
+  CleanupPadInst *getCleanupPad() const {
+    return cast<CleanupPadInst>(Op<0>());
+  }
+  void setCleanupPad(CleanupPadInst *CleanupPad) {
+    assert(CleanupPad);
+    Op<0>() = CleanupPad;
+  }
+
+  unsigned getNumSuccessors() const { return hasUnwindDest() ? 1 : 0; }
+
+  BasicBlock *getUnwindDest() const {
+    return hasUnwindDest() ? cast<BasicBlock>(Op<1>()) : nullptr;
+  }
+  void setUnwindDest(BasicBlock *NewDest) {
+    assert(NewDest);
+    assert(hasUnwindDest());
+    Op<1>() = NewDest;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::CleanupRet);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+  BasicBlock *getSuccessorV(unsigned Idx) const override;
+  unsigned getNumSuccessorsV() const override;
+  void setSuccessorV(unsigned Idx, BasicBlock *B) override;
+
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<CleanupReturnInst>
+    : public VariadicOperandTraits<CleanupReturnInst, /*MINARITY=*/1> {};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)
+
 //===----------------------------------------------------------------------===//
 //                           UnreachableInst Class
 //===----------------------------------------------------------------------===//
@@ -3580,6 +4260,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
 ///
 class UnreachableInst : public TerminatorInst {
   void *operator new(size_t, unsigned) = delete;
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
@@ -3602,6 +4283,7 @@ public:
   static inline bool classof(const Value *V) {
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
+
 private:
   BasicBlock *getSuccessorV(unsigned idx) const override;
   unsigned getNumSuccessorsV() const override;
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h
index 2c8b6eb6f39a..169bcc021984 100644
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -372,6 +372,39 @@ namespace llvm {
       return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
     }
   };
-}
+
+  /// This represents the llvm.instrprof_value_profile intrinsic.
+  class InstrProfValueProfileInst : public IntrinsicInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::instrprof_value_profile;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    GlobalVariable *getName() const {
+      return cast<GlobalVariable>(
+          const_cast<Value *>(getArgOperand(0))->stripPointerCasts());
+    }
+
+    ConstantInt *getHash() const {
+      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+    }
+
+    Value *getTargetValue() const {
+      return cast<Value>(const_cast<Value *>(getArgOperand(2)));
+    }
+
+    ConstantInt *getValueKind() const {
+      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
+    }
+
+    // Returns the value site index.
+    ConstantInt *getIndex() const {
+      return cast<ConstantInt>(const_cast<Value *>(getArgOperand(4)));
+    }
+  };
+} // namespace llvm
 
 #endif
diff --git a/include/llvm/IR/Intrinsics.h b/include/llvm/IR/Intrinsics.h
index 43b8325107fa..314e2aaecf4b 100644
--- a/include/llvm/IR/Intrinsics.h
+++ b/include/llvm/IR/Intrinsics.h
@@ -79,7 +79,7 @@ namespace Intrinsic {
   /// intrinsic. This is returned by getIntrinsicInfoTableEntries.
   struct IITDescriptor {
     enum IITDescriptorKind {
-      Void, VarArg, MMX, Metadata, Half, Float, Double,
+      Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
       SameVecWidthArgument, PtrToArgument, VecOfPtrsToElt
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index bbae720b4e12..5a95ddced538 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -73,8 +73,8 @@ def IntrNoReturn : IntrinsicProperty;
 // Parallels the noduplicate attribute on LLVM IR functions.
 def IntrNoDuplicate : IntrinsicProperty;
 
-// IntrConvergent - Calls to this intrinsic are convergent and may only be
-// moved to control equivalent blocks.
+// IntrConvergent - Calls to this intrinsic are convergent and may not be made
+// control-dependent on any additional values.
 // Parallels the convergent attribute on LLVM IR functions.
 def IntrConvergent : IntrinsicProperty;
 
@@ -150,16 +150,20 @@ def llvm_anyptr_ty     : LLVMAnyPointerType<llvm_i8_ty>;          // (space)i8*
 def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
 def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
 def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
+def llvm_token_ty      : LLVMType<token>;                         // token
 
 def llvm_x86mmx_ty     : LLVMType<x86mmx>;
 def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
 
-def llvm_v2i1_ty       : LLVMType<v2i1>;     //  2 x i1
-def llvm_v4i1_ty       : LLVMType<v4i1>;     //  4 x i1
-def llvm_v8i1_ty       : LLVMType<v8i1>;     //  8 x i1
-def llvm_v16i1_ty      : LLVMType<v16i1>;    // 16 x i1
-def llvm_v32i1_ty      : LLVMType<v32i1>;    // 32 x i1
-def llvm_v64i1_ty      : LLVMType<v64i1>;    // 64 x i1
+def llvm_v2i1_ty       : LLVMType<v2i1>;     //   2 x i1
+def llvm_v4i1_ty       : LLVMType<v4i1>;     //   4 x i1
+def llvm_v8i1_ty       : LLVMType<v8i1>;     //   8 x i1
+def llvm_v16i1_ty      : LLVMType<v16i1>;    //  16 x i1
+def llvm_v32i1_ty      : LLVMType<v32i1>;    //  32 x i1
+def llvm_v64i1_ty      : LLVMType<v64i1>;    //  64 x i1
+def llvm_v512i1_ty     : LLVMType<v512i1>;   // 512 x i1
+def llvm_v1024i1_ty    : LLVMType<v1024i1>;  //1024 x i1
+
 def llvm_v1i8_ty       : LLVMType<v1i8>;     //  1 x i8
 def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
 def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
@@ -167,6 +171,8 @@ def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
 def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
 def llvm_v32i8_ty      : LLVMType<v32i8>;    // 32 x i8
 def llvm_v64i8_ty      : LLVMType<v64i8>;    // 64 x i8
+def llvm_v128i8_ty     : LLVMType<v128i8>;   //128 x i8
+def llvm_v256i8_ty     : LLVMType<v256i8>;   //256 x i8
 
 def llvm_v1i16_ty      : LLVMType<v1i16>;    //  1 x i16
 def llvm_v2i16_ty      : LLVMType<v2i16>;    //  2 x i16
@@ -174,17 +180,23 @@ def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
 def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
 def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
 def llvm_v32i16_ty     : LLVMType<v32i16>;   // 32 x i16
+def llvm_v64i16_ty     : LLVMType<v64i16>;   // 64 x i16
+def llvm_v128i16_ty    : LLVMType<v128i16>;  //128 x i16
 
 def llvm_v1i32_ty      : LLVMType<v1i32>;    //  1 x i32
 def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
 def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
 def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32
 def llvm_v16i32_ty     : LLVMType<v16i32>;   // 16 x i32
+def llvm_v32i32_ty     : LLVMType<v32i32>;   // 32 x i32
+def llvm_v64i32_ty     : LLVMType<v64i32>;   // 64 x i32
+
 def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
 def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
 def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
 def llvm_v8i64_ty      : LLVMType<v8i64>;    //  8 x i64
 def llvm_v16i64_ty     : LLVMType<v16i64>;   // 16 x i64
+def llvm_v32i64_ty     : LLVMType<v32i64>;   // 32 x i64
 
 def llvm_v1i128_ty     : LLVMType<v1i128>;   //  1 x i128
 
@@ -292,6 +304,8 @@ def int_stacksave     : Intrinsic<[llvm_ptr_ty]>,
 def int_stackrestore  : Intrinsic<[], [llvm_ptr_ty]>,
                         GCCBuiltin<"__builtin_stack_restore">;
 
+def int_get_dynamic_area_offset : Intrinsic<[llvm_anyint_ty]>;
+
 // IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
 // however it does conveniently prevent the prefetch from being reordered
 // with respect to nearby accesses to the same memory.
@@ -319,6 +333,14 @@ def int_instrprof_increment : Intrinsic<[],
                                          llvm_i32_ty, llvm_i32_ty],
                                         []>;
 
+// A call to profile runtime for value profiling of target expressions
+// through instrumentation based profiling.
+def int_instrprof_value_profile : Intrinsic<[],
+                                            [llvm_ptr_ty, llvm_i64_ty,
+                                             llvm_i64_ty, llvm_i32_ty,
+                                             llvm_i32_ty],
+                                            []>;
+
 //===------------------- Standard C Library Intrinsics --------------------===//
 //
 
@@ -399,6 +421,7 @@ let Properties = [IntrNoMem] in {
   def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
   def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
   def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, llvm_i1_ty]>;
+  def int_bitreverse : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
 }
 
 //===------------------------ Debugger Intrinsics -------------------------===//
@@ -428,17 +451,13 @@ def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty], [IntrNoMem]>;
 def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
 def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
 
-// eh.begincatch takes a pointer returned by a landingpad instruction and
-// copies the exception object into the memory pointed to by the second
-// parameter. If the second parameter is null, no copy occurs.
-def int_eh_begincatch : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty],
-                                  [NoCapture<0>, NoCapture<1>]>;
-def int_eh_endcatch : Intrinsic<[], []>;
+// eh.exceptionpointer returns the pointer to the exception caught by
+// the given `catchpad`.
+def int_eh_exceptionpointer : Intrinsic<[llvm_anyptr_ty], [llvm_token_ty],
+                                        [IntrNoMem]>;
 
-// Represents the list of actions to take when an exception is thrown.
-def int_eh_actions : Intrinsic<[llvm_ptr_ty], [llvm_vararg_ty], []>;
-
-def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
+// Gets the exception code from a catchpad token. Only used on some platforms.
+def int_eh_exceptioncode : Intrinsic<[llvm_i32_ty], [llvm_token_ty], [IntrNoMem]>;
 
 // __builtin_unwind_init is an undocumented GCC intrinsic that causes all
 // callee-saved registers to be saved and restored (regardless of whether they
@@ -455,6 +474,7 @@ let Properties = [IntrNoMem] in {
 def int_eh_sjlj_functioncontext : Intrinsic<[], [llvm_ptr_ty]>;
 def int_eh_sjlj_setjmp          : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp         : Intrinsic<[], [llvm_ptr_ty], [IntrNoReturn]>;
+def int_eh_sjlj_setup_dispatch  : Intrinsic<[], []>;
 
 //===---------------- Generic Variable Attribute Intrinsics----------------===//
 //
@@ -523,6 +543,10 @@ def int_invariant_end   : Intrinsic<[],
                                      llvm_ptr_ty],
                                     [IntrReadWriteArgMem, NoCapture<2>]>;
 
+def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], 
+                                            [llvm_ptr_ty], 
+                                            [IntrNoMem]>;
+
 //===------------------------ Stackmap Intrinsics -------------------------===//
 //
 def int_experimental_stackmap : Intrinsic<[],
@@ -543,21 +567,17 @@ def int_experimental_patchpoint_i64 : Intrinsic<[llvm_i64_ty],
 //===------------------------ Garbage Collection Intrinsics ---------------===//
 // These are documented in docs/Statepoint.rst
 
-def int_experimental_gc_statepoint : Intrinsic<[llvm_i32_ty],
+def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
                                [llvm_i64_ty, llvm_i32_ty,
                                 llvm_anyptr_ty, llvm_i32_ty,
                                 llvm_i32_ty, llvm_vararg_ty],
                                 [Throws]>;
 
-def int_experimental_gc_result   : Intrinsic<[llvm_any_ty], [llvm_i32_ty]>;
+def int_experimental_gc_result   : Intrinsic<[llvm_any_ty], [llvm_token_ty],
+                                             [IntrReadMem]>;
 def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty],
-                                [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
-
-// Deprecated: will be removed in a couple of weeks
-def int_experimental_gc_result_int : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty]>;
-def int_experimental_gc_result_float : Intrinsic<[llvm_anyfloat_ty],
-                                                 [llvm_i32_ty]>;
-def int_experimental_gc_result_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty]>;
+                                [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
+                                [IntrReadMem]>;
 
 //===-------------------------- Other Intrinsics --------------------------===//
 //
diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td
index 7d69ed52171c..578f259aae14 100644
--- a/include/llvm/IR/IntrinsicsAArch64.td
+++ b/include/llvm/IR/IntrinsicsAArch64.td
@@ -13,6 +13,9 @@
 
 let TargetPrefix = "aarch64" in {
 
+def int_aarch64_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">,
+            Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+
 def int_aarch64_ldxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
 def int_aarch64_ldaxr : Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty]>;
 def int_aarch64_stxr : Intrinsic<[llvm_i32_ty], [llvm_i64_ty, llvm_anyptr_ty]>;
diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td
index 510e5ad2d9b4..84582e8b9925 100644
--- a/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -33,6 +33,14 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tgid">;
 defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
                                        "__builtin_r600_read_tidig">;
+
+def int_r600_rat_store_typed :
+  // 1st parameter: Data
+  // 2nd parameter: Index
+  // 3rd parameter: Constant RAT ID
+  Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>,
+  GCCBuiltin<"__builtin_r600_rat_store_typed">;
+
 } // End TargetPrefix = "r600"
 
 let TargetPrefix = "AMDGPU" in {
@@ -83,3 +91,67 @@ def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
                                        "__builtin_amdgpu_read_workdim">;
 
 } // End TargetPrefix = "AMDGPU"
+
+let TargetPrefix = "amdgcn" in {
+
+// SI only
+def int_amdgcn_buffer_wbinvl1_sc :
+  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
+  Intrinsic<[], [], []>;
+
+// On CI+
+def int_amdgcn_buffer_wbinvl1_vol :
+  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_vol">,
+  Intrinsic<[], [], []>;
+
+def int_amdgcn_buffer_wbinvl1 :
+  GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1">,
+  Intrinsic<[], [], []>;
+
+def int_amdgcn_s_dcache_inv :
+  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv">,
+  Intrinsic<[], [], []>;
+
+// CI+
+def int_amdgcn_s_dcache_inv_vol :
+  GCCBuiltin<"__builtin_amdgcn_s_dcache_inv_vol">,
+  Intrinsic<[], [], []>;
+
+// VI
+def int_amdgcn_s_dcache_wb :
+  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb">,
+  Intrinsic<[], [], []>;
+
+// VI
+def int_amdgcn_s_dcache_wb_vol :
+  GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">,
+  Intrinsic<[], [], []>;
+
+def int_amdgcn_dispatch_ptr :
+  GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+// __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p1 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p1">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
+                           // values are constant, so it behaves like IntrNoMem.
+
+// __builtin_amdgcn_interp_p2 <p1>, <j>, <attr_chan>, <attr>, <m0>
+def int_amdgcn_interp_p2 :
+  GCCBuiltin<"__builtin_amdgcn_interp_p2">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
+                           // IntrNoMem.
+
+def int_amdgcn_mbcnt_lo :
+  GCCBuiltin<"__builtin_amdgcn_mbcnt_lo">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_mbcnt_hi :
+  GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td
index 1dff80878592..c1d911cefee2 100644
--- a/include/llvm/IR/IntrinsicsARM.td
+++ b/include/llvm/IR/IntrinsicsARM.td
@@ -405,36 +405,36 @@ def int_arm_neon_vrintp : Neon_1Arg_Intrinsic;
 // De-interleaving vector loads from N-element structures.
 // Source operands are the address and alignment.
 def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
-                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_i32_ty],
                                   [IntrReadArgMem]>;
 def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
-                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_i32_ty],
                                   [IntrReadArgMem]>;
 def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                    LLVMMatchType<0>],
-                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_i32_ty],
                                   [IntrReadArgMem]>;
 def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                    LLVMMatchType<0>, LLVMMatchType<0>],
-                                  [llvm_ptr_ty, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_i32_ty],
                                   [IntrReadArgMem]>;
 
 // Vector load N-element structure to one lane.
 // Source operands are: the address, the N input vectors (since only one
 // lane is assigned), the lane number, and the alignment.
 def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
-                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>, llvm_i32_ty,
                                        llvm_i32_ty], [IntrReadArgMem]>;
 def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>],
-                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>, LLVMMatchType<0>,
                                        llvm_i32_ty, llvm_i32_ty],
                                       [IntrReadArgMem]>;
 def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>, LLVMMatchType<0>],
-                                      [llvm_ptr_ty, LLVMMatchType<0>,
+                                      [llvm_anyptr_ty, LLVMMatchType<0>,
                                        LLVMMatchType<0>, LLVMMatchType<0>,
                                        LLVMMatchType<0>, llvm_i32_ty,
                                        llvm_i32_ty], [IntrReadArgMem]>;
@@ -442,38 +442,38 @@ def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
 // Interleaving vector stores from N-element structures.
 // Source operands are: the address, the N vectors, and the alignment.
 def int_arm_neon_vst1 : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_anyvector_ty,
+                                  [llvm_anyptr_ty, llvm_anyvector_ty,
                                    llvm_i32_ty], [IntrReadWriteArgMem]>;
 def int_arm_neon_vst2 : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_anyvector_ty,
-                                   LLVMMatchType<0>, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<1>, llvm_i32_ty],
                                   [IntrReadWriteArgMem]>;
 def int_arm_neon_vst3 : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_anyvector_ty,
-                                   LLVMMatchType<0>, LLVMMatchType<0>,
+                                  [llvm_anyptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<1>, LLVMMatchType<1>,
                                    llvm_i32_ty], [IntrReadWriteArgMem]>;
 def int_arm_neon_vst4 : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_anyvector_ty,
-                                   LLVMMatchType<0>, LLVMMatchType<0>,
-                                   LLVMMatchType<0>, llvm_i32_ty],
+                                  [llvm_anyptr_ty, llvm_anyvector_ty,
+                                   LLVMMatchType<1>, LLVMMatchType<1>,
+                                   LLVMMatchType<1>, llvm_i32_ty],
                                   [IntrReadWriteArgMem]>;
 
 // Vector store N-element structure from one lane.
 // Source operands are: the address, the N vectors, the lane number, and
 // the alignment.
 def int_arm_neon_vst2lane : Intrinsic<[],
-                                      [llvm_ptr_ty, llvm_anyvector_ty,
-                                       LLVMMatchType<0>, llvm_i32_ty,
+                                      [llvm_anyptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<1>, llvm_i32_ty,
                                        llvm_i32_ty], [IntrReadWriteArgMem]>;
 def int_arm_neon_vst3lane : Intrinsic<[],
-                                      [llvm_ptr_ty, llvm_anyvector_ty,
-                                       LLVMMatchType<0>, LLVMMatchType<0>,
+                                      [llvm_anyptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<1>, LLVMMatchType<1>,
                                        llvm_i32_ty, llvm_i32_ty],
                                       [IntrReadWriteArgMem]>;
 def int_arm_neon_vst4lane : Intrinsic<[],
-                                      [llvm_ptr_ty, llvm_anyvector_ty,
-                                       LLVMMatchType<0>, LLVMMatchType<0>,
-                                       LLVMMatchType<0>, llvm_i32_ty,
+                                      [llvm_anyptr_ty, llvm_anyvector_ty,
+                                       LLVMMatchType<1>, LLVMMatchType<1>,
+                                       LLVMMatchType<1>, llvm_i32_ty,
                                        llvm_i32_ty], [IntrReadWriteArgMem]>;
 
 // Vector bitwise select.
diff --git a/include/llvm/IR/IntrinsicsHexagon.td b/include/llvm/IR/IntrinsicsHexagon.td
index 78ee651d20f9..ca6fcbd44337 100644
--- a/include/llvm/IR/IntrinsicsHexagon.td
+++ b/include/llvm/IR/IntrinsicsHexagon.td
@@ -32,14 +32,16 @@ class Hexagon_qi_mem_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
                           [llvm_i1_ty], [llvm_ptr_ty],
                           [IntrNoMem]>;
+
 //
 // DEF_FUNCTION_TYPE_1(void_ftype_SI,BT_VOID,BT_INT) ->
 // Hexagon_void_si_Intrinsic<string GCCIntSuffix>
 //
 class Hexagon_void_si_Intrinsic<string GCCIntSuffix>
   : Hexagon_Intrinsic<GCCIntSuffix,
-                          [llvm_void_ty], [llvm_i32_ty],
-                          [IntrNoMem]>;
+                          [], [llvm_ptr_ty],
+                          []>;
+
 //
 // DEF_FUNCTION_TYPE_1(HI_ftype_SI,BT_I16,BT_INT) ->
 // Hexagon_hi_si_Intrinsic<string GCCIntSuffix>
@@ -458,6 +460,11 @@ class Hexagon_mem_memdisisi_Intrinsic<string GCCIntSuffix>
                            llvm_i32_ty, llvm_i32_ty],
                           [IntrReadWriteArgMem]>;
 
+class Hexagon_v256_v256v256_Intrinsic<string GCCIntSuffix>
+  : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
+                          [IntrReadWriteArgMem]>;
+
 //
 // Hexagon_sf_df_Intrinsic<string GCCIntSuffix>
 //
@@ -756,6 +763,12 @@ def int_hexagon_circ_stb :
 Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
 
 
+def int_hexagon_mm256i_vaddw :
+Hexagon_v256_v256v256_Intrinsic<"_mm256i_vaddw">;
+
+
+// This one above will not be auto-generated,
+// so make sure, you don't overwrite this one.
 //
 // BUILTIN_INFO(HEXAGON.C2_cmpeq,QI_ftype_SISI,2)
 //
@@ -4946,6 +4959,11 @@ Hexagon_di_di_Intrinsic<"HEXAGON_S2_interleave">;
 //
 def int_hexagon_S2_deinterleave :
 Hexagon_di_di_Intrinsic<"HEXAGON_S2_deinterleave">;
+//
+// BUILTIN_INFO(HEXAGON.dcfetch_A,v_ftype_DI*,1)
+//
+def int_hexagon_prefetch :
+Hexagon_void_si_Intrinsic<"HEXAGON_prefetch">;
 
 def llvm_ptr32_ty : LLVMPointerType<llvm_i32_ty>;
 def llvm_ptr64_ty : LLVMPointerType<llvm_i64_ty>;
@@ -4964,3 +4982,4392 @@ Hexagon_Intrinsic<"HEXAGON_S2_storew_locked", [llvm_i32_ty],
 def int_hexagon_S4_stored_locked :
 Hexagon_Intrinsic<"HEXAGON_S4_stored_locked", [llvm_i32_ty],
       [llvm_ptr64_ty, llvm_i64_ty], [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// V60
+
+class Hexagon_v2048v2048_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty],
+                          [IntrNoMem]>;
+
+// tag : V6_hi_W
+// tag : V6_lo_W
+class Hexagon_v512v1024_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+// tag : V6_hi_W_128B
+// tag : V6_lo_W_128B
+class Hexagon_v1024v2048_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v64i32_ty],
+                          [IntrNoMem]>;
+
+class Hexagon_v1024v1024_Intrinsic_T<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
+// tag : V6_hi
+def int_hexagon_V6_hi :
+Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_hi">;
+
+// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
+// tag : V6_lo
+def int_hexagon_V6_lo :
+Hexagon_v512v1024_Intrinsic_T<"HEXAGON_V6_lo">;
+
+// BUILTIN_INFO(HEXAGON.V6_hi_W,VI_ftype_VI,1)
+// tag : V6_hi_128B
+def int_hexagon_V6_hi_128B :
+Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_hi_128B">;
+
+// BUILTIN_INFO(HEXAGON.V6_lo_W,VI_ftype_VI,1)
+// tag : V6_lo_128B
+def int_hexagon_V6_lo_128B :
+Hexagon_v1024v2048_Intrinsic_T<"HEXAGON_V6_lo_128B">;
+
+// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
+// tag : V6_vassignp
+def int_hexagon_V6_vassignp :
+Hexagon_v1024v1024_Intrinsic_T<"HEXAGON_V6_vassignp">;
+
+// BUILTIN_INFO(HEXAGON.V6_vassignp,VI_ftype_VI,1)
+// tag : V6_vassignp_128B
+def int_hexagon_V6_vassignp_128B :
+Hexagon_v2048v2048_Intrinsic_T<"HEXAGON_V6_vassignp_128B">;
+
+
+
+//
+// Hexagon_iii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_r
+class Hexagon_iii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_p
+class Hexagon_LLiLLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_r_acc
+class Hexagon_iiii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_i32_ty,llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_rol_i_p_acc
+class Hexagon_LLiLLiLLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_valignb
+class Hexagon_v512v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_valignb_128B
+class Hexagon_v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vror
+class Hexagon_v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vror_128B
+class Hexagon_v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackub
+class Hexagon_v1024v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackub_128B
+class Hexagon_v2048v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackob
+class Hexagon_v1024v1024v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vunpackob_128B
+class Hexagon_v2048v2048v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vpackeb
+class Hexagon_v512v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vpackeb_128B
+class Hexagon_v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpybus_dv_128B
+class Hexagon_v2048v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpybus_dv_acc_128B
+class Hexagon_v2048v2048v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhvsat_acc
+class Hexagon_v512v512v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhvsat_acc_128B
+class Hexagon_v1024v1024v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat
+class Hexagon_v512v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_128B
+class Hexagon_v1024v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v64i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_acc
+class Hexagon_v512v512v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vdmpyhisat_acc_128B
+class Hexagon_v1024v1024v2048i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v64i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi
+class Hexagon_v1024v1024ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_128B
+class Hexagon_v2048v2048ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_acc
+class Hexagon_v1024v1024v1024ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vrmpyubi_acc_128B
+class Hexagon_v2048v2048v2048ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddb_dv_128B
+class Hexagon_v2048v2048v2048_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddubh
+class Hexagon_v1024v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddubh_128B
+class Hexagon_v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vd0
+class Hexagon_v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vd0_128B
+class Hexagon_v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddbq
+class Hexagon_v512v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vaddbq_128B
+class Hexagon_v1024v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vabsh
+class Hexagon_v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vabsh_128B
+class Hexagon_v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpybv_acc
+class Hexagon_v1024v1024v512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpybv_acc_128B
+class Hexagon_v2048v2048v1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub
+class Hexagon_v1024v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_128B
+class Hexagon_v2048v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_acc
+class Hexagon_v1024v1024v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vmpyub_acc_128B
+class Hexagon_v2048v2048v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt
+class Hexagon_v512v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v512i1_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_128B
+class Hexagon_v1024v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v1024i1_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_acc
+class Hexagon_v512v512v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v512i1_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandqrt_acc_128B
+class Hexagon_v1024v1024v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v1024i1_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt
+class Hexagon_v64iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_128B
+class Hexagon_v128iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_acc
+class Hexagon_v64iv64iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vandvrt_acc_128B
+class Hexagon_v128iv128iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw
+class Hexagon_v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_128B
+class Hexagon_v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_and
+class Hexagon_v64iv64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vgtw_and_128B
+class Hexagon_v128iv128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_or
+class Hexagon_v64iv64iv64i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v512i1_ty,llvm_v512i1_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_or_128B
+class Hexagon_v128iv128iv128i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v1024i1_ty,llvm_v1024i1_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_not
+class Hexagon_v64iv64i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_v512i1_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_not_128B
+class Hexagon_v128iv128i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_v1024i1_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_scalar2
+class Hexagon_v64ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v512i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
+// tag : V6_pred_scalar2_128B
+class Hexagon_v128ii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v1024i1_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
+// tag : V6_vswap
+class Hexagon_v1024v64iv512v512_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v512i1_ty,llvm_v16i32_ty,llvm_v16i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+// tag : V6_vswap_128B
+class Hexagon_v2048v128iv1024v1024_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v1024i1_ty,llvm_v32i32_ty,llvm_v32i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vshuffvdd
+class Hexagon_v1024v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vshuffvdd_128B
+class Hexagon_v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+
+//
+// Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_extractw
+class Hexagon_iv512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_extractw_128B
+class Hexagon_iv1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i32_ty], [llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_lvsplatw
+class Hexagon_v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_lvsplatw_128B
+class Hexagon_v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb
+class Hexagon_v512v512LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_128B
+class Hexagon_v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_acc
+class Hexagon_v512v512v512LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_acc_128B
+class Hexagon_v1024v1024v1024LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_dv_128B
+class Hexagon_v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutb_dv_acc_128B
+class Hexagon_v2048v2048v2048LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v64i32_ty,llvm_i64_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvvb_oracc
+class Hexagon_v512v512v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i32_ty], [llvm_v16i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvvb_oracc_128B
+class Hexagon_v1024v1024v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvwh_oracc
+class Hexagon_v1024v1024v512v512i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v32i32_ty], [llvm_v32i32_ty,llvm_v16i32_ty,llvm_v16i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+// tag : V6_vlutvwh_oracc_128B
+class Hexagon_v2048v2048v1024v1024i_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_v64i32_ty], [llvm_v64i32_ty,llvm_v32i32_ty,llvm_v32i32_ty,llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
+// tag : M6_vabsdiffb
+class Hexagon_LLiLLiLLi_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i64_ty,llvm_i64_ty],
+                          [IntrNoMem]>;
+
+//
+// Hexagon_LLii_Intrinsic<string GCCIntSuffix>
+// tag : S6_vsplatrbp
+class Hexagon_LLii_Intrinsic<string GCCIntSuffix>
+ : Hexagon_Intrinsic<GCCIntSuffix,
+                          [llvm_i64_ty], [llvm_i32_ty],
+                          [IntrNoMem]>;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r,SI_ftype_SISI,2)
+// tag : S6_rol_i_r
+def int_hexagon_S6_rol_i_r :
+Hexagon_iii_Intrinsic<"HEXAGON_S6_rol_i_r">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p,DI_ftype_DISI,2)
+// tag : S6_rol_i_p
+def int_hexagon_S6_rol_i_p :
+Hexagon_LLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_acc,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_acc
+def int_hexagon_S6_rol_i_r_acc :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_acc,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_acc
+def int_hexagon_S6_rol_i_p_acc :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_nac,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_nac
+def int_hexagon_S6_rol_i_r_nac :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_nac">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_nac,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_nac
+def int_hexagon_S6_rol_i_p_nac :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_nac">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_xacc,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_xacc
+def int_hexagon_S6_rol_i_r_xacc :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_xacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_xacc,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_xacc
+def int_hexagon_S6_rol_i_p_xacc :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_xacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_and,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_and
+def int_hexagon_S6_rol_i_r_and :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_r_or,SI_ftype_SISISI,3)
+// tag : S6_rol_i_r_or
+def int_hexagon_S6_rol_i_r_or :
+Hexagon_iiii_Intrinsic<"HEXAGON_S6_rol_i_r_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_and,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_and
+def int_hexagon_S6_rol_i_p_and :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_rol_i_p_or,DI_ftype_DIDISI,3)
+// tag : S6_rol_i_p_or
+def int_hexagon_S6_rol_i_p_or :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S6_rol_i_p_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.S2_cabacencbin,DI_ftype_DIDIQI,3)
+// tag : S2_cabacencbin
+def int_hexagon_S2_cabacencbin :
+Hexagon_LLiLLiLLii_Intrinsic<"HEXAGON_S2_cabacencbin">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignb,VI_ftype_VIVISI,3)
+// tag : V6_valignb
+def int_hexagon_V6_valignb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignb_128B,VI_ftype_VIVISI,3)
+// tag : V6_valignb_128B
+def int_hexagon_V6_valignb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignb,VI_ftype_VIVISI,3)
+// tag : V6_vlalignb
+def int_hexagon_V6_vlalignb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignb_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlalignb_128B
+def int_hexagon_V6_vlalignb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignbi,VI_ftype_VIVISI,3)
+// tag : V6_valignbi
+def int_hexagon_V6_valignbi :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_valignbi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_valignbi_128B,VI_ftype_VIVISI,3)
+// tag : V6_valignbi_128B
+def int_hexagon_V6_valignbi_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_valignbi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignbi,VI_ftype_VIVISI,3)
+// tag : V6_vlalignbi
+def int_hexagon_V6_vlalignbi :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlalignbi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlalignbi_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlalignbi_128B
+def int_hexagon_V6_vlalignbi_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlalignbi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vror,VI_ftype_VISI,2)
+// tag : V6_vror
+def int_hexagon_V6_vror :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vror">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vror_128B,VI_ftype_VISI,2)
+// tag : V6_vror_128B
+def int_hexagon_V6_vror_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vror_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackub,VD_ftype_VI,1)
+// tag : V6_vunpackub
+def int_hexagon_V6_vunpackub :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackub_128B,VD_ftype_VI,1)
+// tag : V6_vunpackub_128B
+def int_hexagon_V6_vunpackub_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackb,VD_ftype_VI,1)
+// tag : V6_vunpackb
+def int_hexagon_V6_vunpackb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackb_128B,VD_ftype_VI,1)
+// tag : V6_vunpackb_128B
+def int_hexagon_V6_vunpackb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackuh,VD_ftype_VI,1)
+// tag : V6_vunpackuh
+def int_hexagon_V6_vunpackuh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackuh_128B,VD_ftype_VI,1)
+// tag : V6_vunpackuh_128B
+def int_hexagon_V6_vunpackuh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackh,VD_ftype_VI,1)
+// tag : V6_vunpackh
+def int_hexagon_V6_vunpackh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vunpackh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackh_128B,VD_ftype_VI,1)
+// tag : V6_vunpackh_128B
+def int_hexagon_V6_vunpackh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vunpackh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackob,VD_ftype_VDVI,2)
+// tag : V6_vunpackob
+def int_hexagon_V6_vunpackob :
+Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackob_128B,VD_ftype_VDVI,2)
+// tag : V6_vunpackob_128B
+def int_hexagon_V6_vunpackob_128B :
+Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackoh,VD_ftype_VDVI,2)
+// tag : V6_vunpackoh
+def int_hexagon_V6_vunpackoh :
+Hexagon_v1024v1024v512_Intrinsic<"HEXAGON_V6_vunpackoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vunpackoh_128B,VD_ftype_VDVI,2)
+// tag : V6_vunpackoh_128B
+def int_hexagon_V6_vunpackoh_128B :
+Hexagon_v2048v2048v1024_Intrinsic<"HEXAGON_V6_vunpackoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeb,VI_ftype_VIVI,2)
+// tag : V6_vpackeb
+def int_hexagon_V6_vpackeb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeb_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackeb_128B
+def int_hexagon_V6_vpackeb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeh,VI_ftype_VIVI,2)
+// tag : V6_vpackeh
+def int_hexagon_V6_vpackeh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackeh_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackeh_128B
+def int_hexagon_V6_vpackeh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackob,VI_ftype_VIVI,2)
+// tag : V6_vpackob
+def int_hexagon_V6_vpackob :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackob_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackob_128B
+def int_hexagon_V6_vpackob_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackoh,VI_ftype_VIVI,2)
+// tag : V6_vpackoh
+def int_hexagon_V6_vpackoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackoh_128B
+def int_hexagon_V6_vpackoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackhub_sat
+def int_hexagon_V6_vpackhub_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhub_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhub_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackhub_sat_128B
+def int_hexagon_V6_vpackhub_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhub_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackhb_sat
+def int_hexagon_V6_vpackhb_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackhb_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackhb_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackhb_sat_128B
+def int_hexagon_V6_vpackhb_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackhb_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackwuh_sat
+def int_hexagon_V6_vpackwuh_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwuh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwuh_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackwuh_sat_128B
+def int_hexagon_V6_vpackwuh_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwuh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat,VI_ftype_VIVI,2)
+// tag : V6_vpackwh_sat
+def int_hexagon_V6_vpackwh_sat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vpackwh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpackwh_sat_128B,VI_ftype_VIVI,2)
+// tag : V6_vpackwh_sat_128B
+def int_hexagon_V6_vpackwh_sat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vpackwh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzb,VD_ftype_VI,1)
+// tag : V6_vzb
+def int_hexagon_V6_vzb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzb_128B,VD_ftype_VI,1)
+// tag : V6_vzb_128B
+def int_hexagon_V6_vzb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsb,VD_ftype_VI,1)
+// tag : V6_vsb
+def int_hexagon_V6_vsb :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsb_128B,VD_ftype_VI,1)
+// tag : V6_vsb_128B
+def int_hexagon_V6_vsb_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzh,VD_ftype_VI,1)
+// tag : V6_vzh
+def int_hexagon_V6_vzh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vzh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vzh_128B,VD_ftype_VI,1)
+// tag : V6_vzh_128B
+def int_hexagon_V6_vzh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vzh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsh,VD_ftype_VI,1)
+// tag : V6_vsh
+def int_hexagon_V6_vsh :
+Hexagon_v1024v512_Intrinsic<"HEXAGON_V6_vsh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsh_128B,VD_ftype_VI,1)
+// tag : V6_vsh_128B
+def int_hexagon_V6_vsh_128B :
+Hexagon_v2048v1024_Intrinsic<"HEXAGON_V6_vsh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus,VI_ftype_VISI,2)
+// tag : V6_vdmpybus
+def int_hexagon_V6_vdmpybus :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpybus_128B
+def int_hexagon_V6_vdmpybus_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpybus_acc
+def int_hexagon_V6_vdmpybus_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpybus_acc_128B
+def int_hexagon_V6_vdmpybus_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv,VD_ftype_VDSI,2)
+// tag : V6_vdmpybus_dv
+def int_hexagon_V6_vdmpybus_dv :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_128B,VD_ftype_VDSI,2)
+// tag : V6_vdmpybus_dv_128B
+def int_hexagon_V6_vdmpybus_dv_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpybus_dv_acc
+def int_hexagon_V6_vdmpybus_dv_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpybus_dv_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpybus_dv_acc_128B
+def int_hexagon_V6_vdmpybus_dv_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpybus_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb,VI_ftype_VISI,2)
+// tag : V6_vdmpyhb
+def int_hexagon_V6_vdmpyhb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhb_128B
+def int_hexagon_V6_vdmpyhb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhb_acc
+def int_hexagon_V6_vdmpyhb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhb_acc_128B
+def int_hexagon_V6_vdmpyhb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv,VD_ftype_VDSI,2)
+// tag : V6_vdmpyhb_dv
+def int_hexagon_V6_vdmpyhb_dv :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_128B,VD_ftype_VDSI,2)
+// tag : V6_vdmpyhb_dv_128B
+def int_hexagon_V6_vdmpyhb_dv_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpyhb_dv_acc
+def int_hexagon_V6_vdmpyhb_dv_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhb_dv_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdmpyhb_dv_acc_128B
+def int_hexagon_V6_vdmpyhb_dv_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdmpyhb_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat,VI_ftype_VIVI,2)
+// tag : V6_vdmpyhvsat
+def int_hexagon_V6_vdmpyhvsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vdmpyhvsat_128B
+def int_hexagon_V6_vdmpyhvsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vdmpyhvsat_acc
+def int_hexagon_V6_vdmpyhvsat_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhvsat_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vdmpyhvsat_acc_128B
+def int_hexagon_V6_vdmpyhvsat_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdmpyhvsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsat
+def int_hexagon_V6_vdmpyhsat :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsat_128B
+def int_hexagon_V6_vdmpyhsat_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsat_acc
+def int_hexagon_V6_vdmpyhsat_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsat_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsat_acc_128B
+def int_hexagon_V6_vdmpyhsat_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhisat
+def int_hexagon_V6_vdmpyhisat :
+Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_128B,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhisat_128B
+def int_hexagon_V6_vdmpyhisat_128B :
+Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhisat_acc
+def int_hexagon_V6_vdmpyhisat_acc :
+Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhisat_acc_128B,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhisat_acc_128B
+def int_hexagon_V6_vdmpyhisat_acc_128B :
+Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhisat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsusat
+def int_hexagon_V6_vdmpyhsusat :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_128B,VI_ftype_VISI,2)
+// tag : V6_vdmpyhsusat_128B
+def int_hexagon_V6_vdmpyhsusat_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsusat_acc
+def int_hexagon_V6_vdmpyhsusat_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsusat_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vdmpyhsusat_acc_128B
+def int_hexagon_V6_vdmpyhsusat_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsusat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhsuisat
+def int_hexagon_V6_vdmpyhsuisat :
+Hexagon_v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_128B,VI_ftype_VDSI,2)
+// tag : V6_vdmpyhsuisat_128B
+def int_hexagon_V6_vdmpyhsuisat_128B :
+Hexagon_v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhsuisat_acc
+def int_hexagon_V6_vdmpyhsuisat_acc :
+Hexagon_v512v512v1024i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdmpyhsuisat_acc_128B,VI_ftype_VIVDSI,3)
+// tag : V6_vdmpyhsuisat_acc_128B
+def int_hexagon_V6_vdmpyhsuisat_acc_128B :
+Hexagon_v1024v1024v2048i_Intrinsic<"HEXAGON_V6_vdmpyhsuisat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb,VD_ftype_VDSI,2)
+// tag : V6_vtmpyb
+def int_hexagon_V6_vtmpyb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpyb_128B
+def int_hexagon_V6_vtmpyb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyb_acc
+def int_hexagon_V6_vtmpyb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyb_acc_128B
+def int_hexagon_V6_vtmpyb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus,VD_ftype_VDSI,2)
+// tag : V6_vtmpybus
+def int_hexagon_V6_vtmpybus :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpybus_128B
+def int_hexagon_V6_vtmpybus_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpybus_acc
+def int_hexagon_V6_vtmpybus_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpybus_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpybus_acc_128B
+def int_hexagon_V6_vtmpybus_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb,VD_ftype_VDSI,2)
+// tag : V6_vtmpyhb
+def int_hexagon_V6_vtmpyhb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_128B,VD_ftype_VDSI,2)
+// tag : V6_vtmpyhb_128B
+def int_hexagon_V6_vtmpyhb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyhb_acc
+def int_hexagon_V6_vtmpyhb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vtmpyhb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vtmpyhb_acc_128B
+def int_hexagon_V6_vtmpyhb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vtmpyhb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub,VI_ftype_VISI,2)
+// tag : V6_vrmpyub
+def int_hexagon_V6_vrmpyub :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_128B,VI_ftype_VISI,2)
+// tag : V6_vrmpyub_128B
+def int_hexagon_V6_vrmpyub_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc,VI_ftype_VIVISI,3)
+// tag : V6_vrmpyub_acc
+def int_hexagon_V6_vrmpyub_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpyub_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyub_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vrmpyub_acc_128B
+def int_hexagon_V6_vrmpyub_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpyub_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv,VI_ftype_VIVI,2)
+// tag : V6_vrmpyubv
+def int_hexagon_V6_vrmpyubv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpyubv_128B
+def int_hexagon_V6_vrmpyubv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpyubv_acc
+def int_hexagon_V6_vrmpyubv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpyubv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpyubv_acc_128B
+def int_hexagon_V6_vrmpyubv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpyubv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv,VI_ftype_VIVI,2)
+// tag : V6_vrmpybv
+def int_hexagon_V6_vrmpybv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpybv_128B
+def int_hexagon_V6_vrmpybv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybv_acc
+def int_hexagon_V6_vrmpybv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybv_acc_128B
+def int_hexagon_V6_vrmpybv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi,VD_ftype_VDSISI,3)
+// tag : V6_vrmpyubi
+def int_hexagon_V6_vrmpyubi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrmpyubi_128B
+def int_hexagon_V6_vrmpyubi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpyubi_acc
+def int_hexagon_V6_vrmpyubi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpyubi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpyubi_acc_128B
+def int_hexagon_V6_vrmpyubi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpyubi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus,VI_ftype_VISI,2)
+// tag : V6_vrmpybus
+def int_hexagon_V6_vrmpybus :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_128B,VI_ftype_VISI,2)
+// tag : V6_vrmpybus_128B
+def int_hexagon_V6_vrmpybus_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc,VI_ftype_VIVISI,3)
+// tag : V6_vrmpybus_acc
+def int_hexagon_V6_vrmpybus_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vrmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybus_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vrmpybus_acc_128B
+def int_hexagon_V6_vrmpybus_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vrmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi,VD_ftype_VDSISI,3)
+// tag : V6_vrmpybusi
+def int_hexagon_V6_vrmpybusi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrmpybusi_128B
+def int_hexagon_V6_vrmpybusi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpybusi_acc
+def int_hexagon_V6_vrmpybusi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrmpybusi_acc_128B
+def int_hexagon_V6_vrmpybusi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrmpybusi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv,VI_ftype_VIVI,2)
+// tag : V6_vrmpybusv
+def int_hexagon_V6_vrmpybusv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_128B,VI_ftype_VIVI,2)
+// tag : V6_vrmpybusv_128B
+def int_hexagon_V6_vrmpybusv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybusv_acc
+def int_hexagon_V6_vrmpybusv_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vrmpybusv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrmpybusv_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vrmpybusv_acc_128B
+def int_hexagon_V6_vrmpybusv_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrmpybusv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh,VD_ftype_VDSI,2)
+// tag : V6_vdsaduh
+def int_hexagon_V6_vdsaduh :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_128B,VD_ftype_VDSI,2)
+// tag : V6_vdsaduh_128B
+def int_hexagon_V6_vdsaduh_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vdsaduh_acc
+def int_hexagon_V6_vdsaduh_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vdsaduh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdsaduh_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vdsaduh_acc_128B
+def int_hexagon_V6_vdsaduh_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vdsaduh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi,VD_ftype_VDSISI,3)
+// tag : V6_vrsadubi
+def int_hexagon_V6_vrsadubi :
+Hexagon_v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_128B,VD_ftype_VDSISI,3)
+// tag : V6_vrsadubi_128B
+def int_hexagon_V6_vrsadubi_128B :
+Hexagon_v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc,VD_ftype_VDVDSISI,4)
+// tag : V6_vrsadubi_acc
+def int_hexagon_V6_vrsadubi_acc :
+Hexagon_v1024v1024v1024ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrsadubi_acc_128B,VD_ftype_VDVDSISI,4)
+// tag : V6_vrsadubi_acc_128B
+def int_hexagon_V6_vrsadubi_acc_128B :
+Hexagon_v2048v2048v2048ii_Intrinsic<"HEXAGON_V6_vrsadubi_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw,VI_ftype_VISI,2)
+// tag : V6_vasrw
+def int_hexagon_V6_vasrw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_128B,VI_ftype_VISI,2)
+// tag : V6_vasrw_128B
+def int_hexagon_V6_vasrw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_128B">;
+
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw,VI_ftype_VISI,2)
+// tag : V6_vaslw
+def int_hexagon_V6_vaslw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_128B,VI_ftype_VISI,2)
+// tag : V6_vaslw_128B
+def int_hexagon_V6_vaslw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrw,VI_ftype_VISI,2)
+// tag : V6_vlsrw
+def int_hexagon_V6_vlsrw :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrw_128B,VI_ftype_VISI,2)
+// tag : V6_vlsrw_128B
+def int_hexagon_V6_vlsrw_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwv,VI_ftype_VIVI,2)
+// tag : V6_vasrwv
+def int_hexagon_V6_vasrwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vasrwv_128B
+def int_hexagon_V6_vasrwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslwv,VI_ftype_VIVI,2)
+// tag : V6_vaslwv
+def int_hexagon_V6_vaslwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vaslwv_128B
+def int_hexagon_V6_vaslwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrwv,VI_ftype_VIVI,2)
+// tag : V6_vlsrwv
+def int_hexagon_V6_vlsrwv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrwv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrwv_128B,VI_ftype_VIVI,2)
+// tag : V6_vlsrwv_128B
+def int_hexagon_V6_vlsrwv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrwv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrh,VI_ftype_VISI,2)
+// tag : V6_vasrh
+def int_hexagon_V6_vasrh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vasrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrh_128B,VI_ftype_VISI,2)
+// tag : V6_vasrh_128B
+def int_hexagon_V6_vasrh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vasrh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslh,VI_ftype_VISI,2)
+// tag : V6_vaslh
+def int_hexagon_V6_vaslh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vaslh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslh_128B,VI_ftype_VISI,2)
+// tag : V6_vaslh_128B
+def int_hexagon_V6_vaslh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vaslh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrh,VI_ftype_VISI,2)
+// tag : V6_vlsrh
+def int_hexagon_V6_vlsrh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vlsrh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrh_128B,VI_ftype_VISI,2)
+// tag : V6_vlsrh_128B
+def int_hexagon_V6_vlsrh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vlsrh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhv,VI_ftype_VIVI,2)
+// tag : V6_vasrhv
+def int_hexagon_V6_vasrhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vasrhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vasrhv_128B
+def int_hexagon_V6_vasrhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vasrhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslhv,VI_ftype_VIVI,2)
+// tag : V6_vaslhv
+def int_hexagon_V6_vaslhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaslhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vaslhv_128B
+def int_hexagon_V6_vaslhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaslhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrhv,VI_ftype_VIVI,2)
+// tag : V6_vlsrhv
+def int_hexagon_V6_vlsrhv :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vlsrhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlsrhv_128B,VI_ftype_VIVI,2)
+// tag : V6_vlsrhv_128B
+def int_hexagon_V6_vlsrhv_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vlsrhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwh,VI_ftype_VIVISI,3)
+// tag : V6_vasrwh
+def int_hexagon_V6_vasrwh :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwh_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwh_128B
+def int_hexagon_V6_vasrwh_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhsat
+def int_hexagon_V6_vasrwhsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhsat_128B
+def int_hexagon_V6_vasrwhsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhrndsat
+def int_hexagon_V6_vasrwhrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwhrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwhrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwhrndsat_128B
+def int_hexagon_V6_vasrwhrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwhrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrwuhsat
+def int_hexagon_V6_vasrwuhsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrwuhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrwuhsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrwuhsat_128B
+def int_hexagon_V6_vasrwuhsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrwuhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwh,VI_ftype_VIVI,2)
+// tag : V6_vroundwh
+def int_hexagon_V6_vroundwh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwh_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundwh_128B
+def int_hexagon_V6_vroundwh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwuh,VI_ftype_VIVI,2)
+// tag : V6_vroundwuh
+def int_hexagon_V6_vroundwuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundwuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundwuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundwuh_128B
+def int_hexagon_V6_vroundwuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundwuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubsat
+def int_hexagon_V6_vasrhubsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubsat_128B
+def int_hexagon_V6_vasrhubsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubrndsat
+def int_hexagon_V6_vasrhubrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhubrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhubrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhubrndsat_128B
+def int_hexagon_V6_vasrhubrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhubrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat,VI_ftype_VIVISI,3)
+// tag : V6_vasrhbrndsat
+def int_hexagon_V6_vasrhbrndsat :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrhbrndsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrhbrndsat_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrhbrndsat_128B
+def int_hexagon_V6_vasrhbrndsat_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrhbrndsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhb,VI_ftype_VIVI,2)
+// tag : V6_vroundhb
+def int_hexagon_V6_vroundhb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhb_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundhb_128B
+def int_hexagon_V6_vroundhb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhub,VI_ftype_VIVI,2)
+// tag : V6_vroundhub
+def int_hexagon_V6_vroundhub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vroundhub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vroundhub_128B,VI_ftype_VIVI,2)
+// tag : V6_vroundhub_128B
+def int_hexagon_V6_vroundhub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vroundhub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_acc,VI_ftype_VIVISI,3)
+// tag : V6_vaslw_acc
+def int_hexagon_V6_vaslw_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vaslw_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaslw_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vaslw_acc_128B
+def int_hexagon_V6_vaslw_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vaslw_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_acc,VI_ftype_VIVISI,3)
+// tag : V6_vasrw_acc
+def int_hexagon_V6_vasrw_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vasrw_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vasrw_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vasrw_acc_128B
+def int_hexagon_V6_vasrw_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vasrw_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb,VI_ftype_VIVI,2)
+// tag : V6_vaddb
+def int_hexagon_V6_vaddb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddb_128B
+def int_hexagon_V6_vaddb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb,VI_ftype_VIVI,2)
+// tag : V6_vsubb
+def int_hexagon_V6_vsubb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubb_128B
+def int_hexagon_V6_vsubb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddb_dv
+def int_hexagon_V6_vaddb_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddb_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddb_dv_128B
+def int_hexagon_V6_vaddb_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubb_dv
+def int_hexagon_V6_vsubb_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubb_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubb_dv_128B
+def int_hexagon_V6_vsubb_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh,VI_ftype_VIVI,2)
+// tag : V6_vaddh
+def int_hexagon_V6_vaddh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddh_128B
+def int_hexagon_V6_vaddh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh,VI_ftype_VIVI,2)
+// tag : V6_vsubh
+def int_hexagon_V6_vsubh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubh_128B
+def int_hexagon_V6_vsubh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddh_dv
+def int_hexagon_V6_vaddh_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddh_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddh_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddh_dv_128B
+def int_hexagon_V6_vaddh_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddh_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubh_dv
+def int_hexagon_V6_vsubh_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubh_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubh_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubh_dv_128B
+def int_hexagon_V6_vsubh_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubh_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw,VI_ftype_VIVI,2)
+// tag : V6_vaddw
+def int_hexagon_V6_vaddw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddw_128B
+def int_hexagon_V6_vaddw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw,VI_ftype_VIVI,2)
+// tag : V6_vsubw
+def int_hexagon_V6_vsubw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubw_128B
+def int_hexagon_V6_vsubw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddw_dv
+def int_hexagon_V6_vaddw_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddw_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddw_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddw_dv_128B
+def int_hexagon_V6_vaddw_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddw_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubw_dv
+def int_hexagon_V6_vsubw_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubw_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubw_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubw_dv_128B
+def int_hexagon_V6_vsubw_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubw_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat,VI_ftype_VIVI,2)
+// tag : V6_vaddubsat
+def int_hexagon_V6_vaddubsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddubsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddubsat_128B
+def int_hexagon_V6_vaddubsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddubsat_dv
+def int_hexagon_V6_vaddubsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddubsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddubsat_dv_128B
+def int_hexagon_V6_vaddubsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddubsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat,VI_ftype_VIVI,2)
+// tag : V6_vsububsat
+def int_hexagon_V6_vsububsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsububsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsububsat_128B
+def int_hexagon_V6_vsububsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsububsat_dv
+def int_hexagon_V6_vsububsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsububsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsububsat_dv_128B
+def int_hexagon_V6_vsububsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsububsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat,VI_ftype_VIVI,2)
+// tag : V6_vadduhsat
+def int_hexagon_V6_vadduhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vadduhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vadduhsat_128B
+def int_hexagon_V6_vadduhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vadduhsat_dv
+def int_hexagon_V6_vadduhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vadduhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vadduhsat_dv_128B
+def int_hexagon_V6_vadduhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vadduhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat,VI_ftype_VIVI,2)
+// tag : V6_vsubuhsat
+def int_hexagon_V6_vsubuhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubuhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubuhsat_128B
+def int_hexagon_V6_vsubuhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubuhsat_dv
+def int_hexagon_V6_vsubuhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubuhsat_dv_128B
+def int_hexagon_V6_vsubuhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubuhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat,VI_ftype_VIVI,2)
+// tag : V6_vaddhsat
+def int_hexagon_V6_vaddhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddhsat_128B
+def int_hexagon_V6_vaddhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddhsat_dv
+def int_hexagon_V6_vaddhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddhsat_dv_128B
+def int_hexagon_V6_vaddhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat,VI_ftype_VIVI,2)
+// tag : V6_vsubhsat
+def int_hexagon_V6_vsubhsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubhsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubhsat_128B
+def int_hexagon_V6_vsubhsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubhsat_dv
+def int_hexagon_V6_vsubhsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubhsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubhsat_dv_128B
+def int_hexagon_V6_vsubhsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubhsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat,VI_ftype_VIVI,2)
+// tag : V6_vaddwsat
+def int_hexagon_V6_vaddwsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vaddwsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vaddwsat_128B
+def int_hexagon_V6_vaddwsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vaddwsat_dv
+def int_hexagon_V6_vaddwsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vaddwsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vaddwsat_dv_128B
+def int_hexagon_V6_vaddwsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vaddwsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat,VI_ftype_VIVI,2)
+// tag : V6_vsubwsat
+def int_hexagon_V6_vsubwsat :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsubwsat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_128B,VI_ftype_VIVI,2)
+// tag : V6_vsubwsat_128B
+def int_hexagon_V6_vsubwsat_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv,VD_ftype_VDVD,2)
+// tag : V6_vsubwsat_dv
+def int_hexagon_V6_vsubwsat_dv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsubwsat_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwsat_dv_128B,VD_ftype_VDVD,2)
+// tag : V6_vsubwsat_dv_128B
+def int_hexagon_V6_vsubwsat_dv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vsubwsat_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgub,VI_ftype_VIVI,2)
+// tag : V6_vavgub
+def int_hexagon_V6_vavgub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgub_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgub_128B
+def int_hexagon_V6_vavgub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgubrnd,VI_ftype_VIVI,2)
+// tag : V6_vavgubrnd
+def int_hexagon_V6_vavgubrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgubrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgubrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgubrnd_128B
+def int_hexagon_V6_vavgubrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgubrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguh,VI_ftype_VIVI,2)
+// tag : V6_vavguh
+def int_hexagon_V6_vavguh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguh_128B,VI_ftype_VIVI,2)
+// tag : V6_vavguh_128B
+def int_hexagon_V6_vavguh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguhrnd,VI_ftype_VIVI,2)
+// tag : V6_vavguhrnd
+def int_hexagon_V6_vavguhrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavguhrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavguhrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavguhrnd_128B
+def int_hexagon_V6_vavguhrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavguhrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgh,VI_ftype_VIVI,2)
+// tag : V6_vavgh
+def int_hexagon_V6_vavgh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgh_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgh_128B
+def int_hexagon_V6_vavgh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavghrnd,VI_ftype_VIVI,2)
+// tag : V6_vavghrnd
+def int_hexagon_V6_vavghrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavghrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavghrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavghrnd_128B
+def int_hexagon_V6_vavghrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavghrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgh,VI_ftype_VIVI,2)
+// tag : V6_vnavgh
+def int_hexagon_V6_vnavgh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgh_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgh_128B
+def int_hexagon_V6_vnavgh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgw,VI_ftype_VIVI,2)
+// tag : V6_vavgw
+def int_hexagon_V6_vavgw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgw_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgw_128B
+def int_hexagon_V6_vavgw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgwrnd,VI_ftype_VIVI,2)
+// tag : V6_vavgwrnd
+def int_hexagon_V6_vavgwrnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vavgwrnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vavgwrnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vavgwrnd_128B
+def int_hexagon_V6_vavgwrnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vavgwrnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgw,VI_ftype_VIVI,2)
+// tag : V6_vnavgw
+def int_hexagon_V6_vnavgw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgw_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgw_128B
+def int_hexagon_V6_vnavgw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffub,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffub
+def int_hexagon_V6_vabsdiffub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffub_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffub_128B
+def int_hexagon_V6_vabsdiffub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffuh
+def int_hexagon_V6_vabsdiffuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffuh_128B
+def int_hexagon_V6_vabsdiffuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffh,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffh
+def int_hexagon_V6_vabsdiffh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffh_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffh_128B
+def int_hexagon_V6_vabsdiffh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffw,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffw
+def int_hexagon_V6_vabsdiffw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vabsdiffw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsdiffw_128B,VI_ftype_VIVI,2)
+// tag : V6_vabsdiffw_128B
+def int_hexagon_V6_vabsdiffw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vabsdiffw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgub,VI_ftype_VIVI,2)
+// tag : V6_vnavgub
+def int_hexagon_V6_vnavgub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vnavgub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnavgub_128B,VI_ftype_VIVI,2)
+// tag : V6_vnavgub_128B
+def int_hexagon_V6_vnavgub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vnavgub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubh,VD_ftype_VIVI,2)
+// tag : V6_vaddubh
+def int_hexagon_V6_vaddubh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddubh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddubh_128B,VD_ftype_VIVI,2)
+// tag : V6_vaddubh_128B
+def int_hexagon_V6_vaddubh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddubh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububh,VD_ftype_VIVI,2)
+// tag : V6_vsububh
+def int_hexagon_V6_vsububh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsububh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsububh_128B,VD_ftype_VIVI,2)
+// tag : V6_vsububh_128B
+def int_hexagon_V6_vsububh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsububh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhw,VD_ftype_VIVI,2)
+// tag : V6_vaddhw
+def int_hexagon_V6_vaddhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vaddhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vaddhw_128B
+def int_hexagon_V6_vaddhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vaddhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhw,VD_ftype_VIVI,2)
+// tag : V6_vsubhw
+def int_hexagon_V6_vsubhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vsubhw_128B
+def int_hexagon_V6_vsubhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhw,VD_ftype_VIVI,2)
+// tag : V6_vadduhw
+def int_hexagon_V6_vadduhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vadduhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vadduhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vadduhw_128B
+def int_hexagon_V6_vadduhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vadduhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhw,VD_ftype_VIVI,2)
+// tag : V6_vsubuhw
+def int_hexagon_V6_vsubuhw :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vsubuhw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubuhw_128B,VD_ftype_VIVI,2)
+// tag : V6_vsubuhw_128B
+def int_hexagon_V6_vsubuhw_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vsubuhw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vd0,VI_ftype_,0)
+// tag : V6_vd0
+def int_hexagon_V6_vd0 :
+Hexagon_v512_Intrinsic<"HEXAGON_V6_vd0">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vd0_128B,VI_ftype_,0)
+// tag : V6_vd0_128B
+def int_hexagon_V6_vd0_128B :
+Hexagon_v1024_Intrinsic<"HEXAGON_V6_vd0_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbq
+def int_hexagon_V6_vaddbq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbq_128B
+def int_hexagon_V6_vaddbq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbq_128B">;
+
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbq
+def int_hexagon_V6_vsubbq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbq_128B
+def int_hexagon_V6_vsubbq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbnq
+def int_hexagon_V6_vaddbnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddbnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddbnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddbnq_128B
+def int_hexagon_V6_vaddbnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddbnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbnq
+def int_hexagon_V6_vsubbnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubbnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubbnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubbnq_128B
+def int_hexagon_V6_vsubbnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubbnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhq
+def int_hexagon_V6_vaddhq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhq_128B
+def int_hexagon_V6_vaddhq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhq
+def int_hexagon_V6_vsubhq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhq_128B
+def int_hexagon_V6_vsubhq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhnq
+def int_hexagon_V6_vaddhnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddhnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddhnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddhnq_128B
+def int_hexagon_V6_vaddhnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddhnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhnq
+def int_hexagon_V6_vsubhnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubhnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubhnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubhnq_128B
+def int_hexagon_V6_vsubhnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubhnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwq
+def int_hexagon_V6_vaddwq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwq_128B
+def int_hexagon_V6_vaddwq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwq
+def int_hexagon_V6_vsubwq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwq_128B
+def int_hexagon_V6_vsubwq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwnq,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwnq
+def int_hexagon_V6_vaddwnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vaddwnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vaddwnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vaddwnq_128B
+def int_hexagon_V6_vaddwnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vaddwnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwnq,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwnq
+def int_hexagon_V6_vsubwnq :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vsubwnq">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsubwnq_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vsubwnq_128B
+def int_hexagon_V6_vsubwnq_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vsubwnq_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh,VI_ftype_VI,1)
+// tag : V6_vabsh
+def int_hexagon_V6_vabsh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_128B,VI_ftype_VI,1)
+// tag : V6_vabsh_128B
+def int_hexagon_V6_vabsh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_sat,VI_ftype_VI,1)
+// tag : V6_vabsh_sat
+def int_hexagon_V6_vabsh_sat :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsh_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsh_sat_128B,VI_ftype_VI,1)
+// tag : V6_vabsh_sat_128B
+def int_hexagon_V6_vabsh_sat_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsh_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw,VI_ftype_VI,1)
+// tag : V6_vabsw
+def int_hexagon_V6_vabsw :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_128B,VI_ftype_VI,1)
+// tag : V6_vabsw_128B
+def int_hexagon_V6_vabsw_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_sat,VI_ftype_VI,1)
+// tag : V6_vabsw_sat
+def int_hexagon_V6_vabsw_sat :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vabsw_sat">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vabsw_sat_128B,VI_ftype_VI,1)
+// tag : V6_vabsw_sat_128B
+def int_hexagon_V6_vabsw_sat_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vabsw_sat_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv,VD_ftype_VIVI,2)
+// tag : V6_vmpybv
+def int_hexagon_V6_vmpybv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpybv_128B
+def int_hexagon_V6_vmpybv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybv_acc
+def int_hexagon_V6_vmpybv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybv_acc_128B
+def int_hexagon_V6_vmpybv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv,VD_ftype_VIVI,2)
+// tag : V6_vmpyubv
+def int_hexagon_V6_vmpyubv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyubv_128B
+def int_hexagon_V6_vmpyubv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyubv_acc
+def int_hexagon_V6_vmpyubv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyubv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyubv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyubv_acc_128B
+def int_hexagon_V6_vmpyubv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyubv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv,VD_ftype_VIVI,2)
+// tag : V6_vmpybusv
+def int_hexagon_V6_vmpybusv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpybusv_128B
+def int_hexagon_V6_vmpybusv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybusv_acc
+def int_hexagon_V6_vmpybusv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpybusv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybusv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpybusv_acc_128B
+def int_hexagon_V6_vmpybusv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpybusv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabusv,VD_ftype_VDVD,2)
+// tag : V6_vmpabusv
+def int_hexagon_V6_vmpabusv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabusv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabusv_128B,VD_ftype_VDVD,2)
+// tag : V6_vmpabusv_128B
+def int_hexagon_V6_vmpabusv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabusv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabuuv,VD_ftype_VDVD,2)
+// tag : V6_vmpabuuv
+def int_hexagon_V6_vmpabuuv :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpabuuv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabuuv_128B,VD_ftype_VDVD,2)
+// tag : V6_vmpabuuv_128B
+def int_hexagon_V6_vmpabuuv_128B :
+Hexagon_v2048v2048v2048_Intrinsic<"HEXAGON_V6_vmpabuuv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv,VD_ftype_VIVI,2)
+// tag : V6_vmpyhv
+def int_hexagon_V6_vmpyhv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyhv_128B
+def int_hexagon_V6_vmpyhv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhv_acc
+def int_hexagon_V6_vmpyhv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhv_acc_128B
+def int_hexagon_V6_vmpyhv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv,VD_ftype_VIVI,2)
+// tag : V6_vmpyuhv
+def int_hexagon_V6_vmpyuhv :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyuhv_128B
+def int_hexagon_V6_vmpyuhv_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyuhv_acc
+def int_hexagon_V6_vmpyuhv_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyuhv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuhv_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyuhv_acc_128B
+def int_hexagon_V6_vmpyuhv_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyuhv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs,VI_ftype_VIVI,2)
+// tag : V6_vmpyhvsrs
+def int_hexagon_V6_vmpyhvsrs :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyhvsrs">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhvsrs_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyhvsrs_128B
+def int_hexagon_V6_vmpyhvsrs_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhvsrs_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus,VD_ftype_VIVI,2)
+// tag : V6_vmpyhus
+def int_hexagon_V6_vmpyhus :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_128B,VD_ftype_VIVI,2)
+// tag : V6_vmpyhus_128B
+def int_hexagon_V6_vmpyhus_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhus_acc
+def int_hexagon_V6_vmpyhus_acc :
+Hexagon_v1024v1024v512v512_Intrinsic<"HEXAGON_V6_vmpyhus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhus_acc_128B,VD_ftype_VDVIVI,3)
+// tag : V6_vmpyhus_acc_128B
+def int_hexagon_V6_vmpyhus_acc_128B :
+Hexagon_v2048v2048v1024v1024_Intrinsic<"HEXAGON_V6_vmpyhus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih,VI_ftype_VIVI,2)
+// tag : V6_vmpyih
+def int_hexagon_V6_vmpyih :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyih_128B
+def int_hexagon_V6_vmpyih_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyih_acc
+def int_hexagon_V6_vmpyih_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyih_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyih_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyih_acc_128B
+def int_hexagon_V6_vmpyih_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyih_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyewuh,VI_ftype_VIVI,2)
+// tag : V6_vmpyewuh
+def int_hexagon_V6_vmpyewuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyewuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyewuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyewuh_128B
+def int_hexagon_V6_vmpyewuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyewuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh
+def int_hexagon_V6_vmpyowh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_128B
+def int_hexagon_V6_vmpyowh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_rnd
+def int_hexagon_V6_vmpyowh_rnd :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyowh_rnd_128B
+def int_hexagon_V6_vmpyowh_rnd_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_sacc
+def int_hexagon_V6_vmpyowh_sacc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_sacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_sacc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_sacc_128B
+def int_hexagon_V6_vmpyowh_sacc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_sacc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_rnd_sacc
+def int_hexagon_V6_vmpyowh_rnd_sacc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyowh_rnd_sacc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyowh_rnd_sacc_128B
+def int_hexagon_V6_vmpyowh_rnd_sacc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyowh_rnd_sacc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyieoh,VI_ftype_VIVI,2)
+// tag : V6_vmpyieoh
+def int_hexagon_V6_vmpyieoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyieoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyieoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyieoh_128B
+def int_hexagon_V6_vmpyieoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyieoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh,VI_ftype_VIVI,2)
+// tag : V6_vmpyiewuh
+def int_hexagon_V6_vmpyiewuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyiewuh_128B
+def int_hexagon_V6_vmpyiewuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiowh,VI_ftype_VIVI,2)
+// tag : V6_vmpyiowh
+def int_hexagon_V6_vmpyiowh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiowh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiowh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmpyiowh_128B
+def int_hexagon_V6_vmpyiowh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiowh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewh_acc
+def int_hexagon_V6_vmpyiewh_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewh_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewh_acc_128B
+def int_hexagon_V6_vmpyiewh_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewuh_acc
+def int_hexagon_V6_vmpyiewuh_acc :
+Hexagon_v512v512v512v512_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiewuh_acc_128B,VI_ftype_VIVIVI,3)
+// tag : V6_vmpyiewuh_acc_128B
+def int_hexagon_V6_vmpyiewuh_acc_128B :
+Hexagon_v1024v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmpyiewuh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub,VD_ftype_VISI,2)
+// tag : V6_vmpyub
+def int_hexagon_V6_vmpyub :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyub_128B
+def int_hexagon_V6_vmpyub_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyub_acc
+def int_hexagon_V6_vmpyub_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyub_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyub_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyub_acc_128B
+def int_hexagon_V6_vmpyub_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyub_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus,VD_ftype_VISI,2)
+// tag : V6_vmpybus
+def int_hexagon_V6_vmpybus :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_128B,VD_ftype_VISI,2)
+// tag : V6_vmpybus_128B
+def int_hexagon_V6_vmpybus_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpybus_acc
+def int_hexagon_V6_vmpybus_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpybus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpybus_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpybus_acc_128B
+def int_hexagon_V6_vmpybus_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpybus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus,VD_ftype_VDSI,2)
+// tag : V6_vmpabus
+def int_hexagon_V6_vmpabus :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_128B,VD_ftype_VDSI,2)
+// tag : V6_vmpabus_128B
+def int_hexagon_V6_vmpabus_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vmpabus_acc
+def int_hexagon_V6_vmpabus_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpabus_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpabus_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vmpabus_acc_128B
+def int_hexagon_V6_vmpabus_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpabus_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb,VD_ftype_VDSI,2)
+// tag : V6_vmpahb
+def int_hexagon_V6_vmpahb :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_128B,VD_ftype_VDSI,2)
+// tag : V6_vmpahb_128B
+def int_hexagon_V6_vmpahb_128B :
+Hexagon_v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc,VD_ftype_VDVDSI,3)
+// tag : V6_vmpahb_acc
+def int_hexagon_V6_vmpahb_acc :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpahb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpahb_acc_128B,VD_ftype_VDVDSI,3)
+// tag : V6_vmpahb_acc_128B
+def int_hexagon_V6_vmpahb_acc_128B :
+Hexagon_v2048v2048v2048i_Intrinsic<"HEXAGON_V6_vmpahb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyh,VD_ftype_VISI,2)
+// tag : V6_vmpyh
+def int_hexagon_V6_vmpyh :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyh_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyh_128B
+def int_hexagon_V6_vmpyh_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyhsat_acc
+def int_hexagon_V6_vmpyhsat_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsat_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyhsat_acc_128B
+def int_hexagon_V6_vmpyhsat_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyhsat_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhss,VI_ftype_VISI,2)
+// tag : V6_vmpyhss
+def int_hexagon_V6_vmpyhss :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhss">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhss_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyhss_128B
+def int_hexagon_V6_vmpyhss_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhss_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs,VI_ftype_VISI,2)
+// tag : V6_vmpyhsrs
+def int_hexagon_V6_vmpyhsrs :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyhsrs">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyhsrs_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyhsrs_128B
+def int_hexagon_V6_vmpyhsrs_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyhsrs_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh,VD_ftype_VISI,2)
+// tag : V6_vmpyuh
+def int_hexagon_V6_vmpyuh :
+Hexagon_v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_128B,VD_ftype_VISI,2)
+// tag : V6_vmpyuh_128B
+def int_hexagon_V6_vmpyuh_128B :
+Hexagon_v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc,VD_ftype_VDVISI,3)
+// tag : V6_vmpyuh_acc
+def int_hexagon_V6_vmpyuh_acc :
+Hexagon_v1024v1024v512i_Intrinsic<"HEXAGON_V6_vmpyuh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyuh_acc_128B,VD_ftype_VDVISI,3)
+// tag : V6_vmpyuh_acc_128B
+def int_hexagon_V6_vmpyuh_acc_128B :
+Hexagon_v2048v2048v1024i_Intrinsic<"HEXAGON_V6_vmpyuh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb,VI_ftype_VISI,2)
+// tag : V6_vmpyihb
+def int_hexagon_V6_vmpyihb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyihb_128B
+def int_hexagon_V6_vmpyihb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyihb_acc
+def int_hexagon_V6_vmpyihb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyihb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyihb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyihb_acc_128B
+def int_hexagon_V6_vmpyihb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyihb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb,VI_ftype_VISI,2)
+// tag : V6_vmpyiwb
+def int_hexagon_V6_vmpyiwb :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyiwb_128B
+def int_hexagon_V6_vmpyiwb_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwb_acc
+def int_hexagon_V6_vmpyiwb_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwb_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwb_acc_128B
+def int_hexagon_V6_vmpyiwb_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh,VI_ftype_VISI,2)
+// tag : V6_vmpyiwh
+def int_hexagon_V6_vmpyiwh :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_128B,VI_ftype_VISI,2)
+// tag : V6_vmpyiwh_128B
+def int_hexagon_V6_vmpyiwh_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwh_acc
+def int_hexagon_V6_vmpyiwh_acc :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmpyiwh_acc_128B,VI_ftype_VIVISI,3)
+// tag : V6_vmpyiwh_acc_128B
+def int_hexagon_V6_vmpyiwh_acc_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vmpyiwh_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vand,VI_ftype_VIVI,2)
+// tag : V6_vand
+def int_hexagon_V6_vand :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vand">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vand_128B,VI_ftype_VIVI,2)
+// tag : V6_vand_128B
+def int_hexagon_V6_vand_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vand_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vor,VI_ftype_VIVI,2)
+// tag : V6_vor
+def int_hexagon_V6_vor :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vor_128B,VI_ftype_VIVI,2)
+// tag : V6_vor_128B
+def int_hexagon_V6_vor_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vxor,VI_ftype_VIVI,2)
+// tag : V6_vxor
+def int_hexagon_V6_vxor :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vxor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vxor_128B,VI_ftype_VIVI,2)
+// tag : V6_vxor_128B
+def int_hexagon_V6_vxor_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vxor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnot,VI_ftype_VI,1)
+// tag : V6_vnot
+def int_hexagon_V6_vnot :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnot">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnot_128B,VI_ftype_VI,1)
+// tag : V6_vnot_128B
+def int_hexagon_V6_vnot_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnot_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt,VI_ftype_QVSI,2)
+// tag : V6_vandqrt
+def int_hexagon_V6_vandqrt :
+Hexagon_v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_128B,VI_ftype_QVSI,2)
+// tag : V6_vandqrt_128B
+def int_hexagon_V6_vandqrt_128B :
+Hexagon_v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc,VI_ftype_VIQVSI,3)
+// tag : V6_vandqrt_acc
+def int_hexagon_V6_vandqrt_acc :
+Hexagon_v512v512v64ii_Intrinsic<"HEXAGON_V6_vandqrt_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandqrt_acc_128B,VI_ftype_VIQVSI,3)
+// tag : V6_vandqrt_acc_128B
+def int_hexagon_V6_vandqrt_acc_128B :
+Hexagon_v1024v1024v128ii_Intrinsic<"HEXAGON_V6_vandqrt_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt,QV_ftype_VISI,2)
+// tag : V6_vandvrt
+def int_hexagon_V6_vandvrt :
+Hexagon_v64iv512i_Intrinsic<"HEXAGON_V6_vandvrt">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_128B,QV_ftype_VISI,2)
+// tag : V6_vandvrt_128B
+def int_hexagon_V6_vandvrt_128B :
+Hexagon_v128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc,QV_ftype_QVVISI,3)
+// tag : V6_vandvrt_acc
+def int_hexagon_V6_vandvrt_acc :
+Hexagon_v64iv64iv512i_Intrinsic<"HEXAGON_V6_vandvrt_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vandvrt_acc_128B,QV_ftype_QVVISI,3)
+// tag : V6_vandvrt_acc_128B
+def int_hexagon_V6_vandvrt_acc_128B :
+Hexagon_v128iv128iv1024i_Intrinsic<"HEXAGON_V6_vandvrt_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw,QV_ftype_VIVI,2)
+// tag : V6_vgtw
+def int_hexagon_V6_vgtw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtw_128B
+def int_hexagon_V6_vgtw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_and
+def int_hexagon_V6_vgtw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_and_128B
+def int_hexagon_V6_vgtw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_or
+def int_hexagon_V6_vgtw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_or_128B
+def int_hexagon_V6_vgtw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_xor
+def int_hexagon_V6_vgtw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtw_xor_128B
+def int_hexagon_V6_vgtw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw,QV_ftype_VIVI,2)
+// tag : V6_veqw
+def int_hexagon_V6_veqw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_128B,QV_ftype_VIVI,2)
+// tag : V6_veqw_128B
+def int_hexagon_V6_veqw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_and
+def int_hexagon_V6_veqw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_and_128B
+def int_hexagon_V6_veqw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_or
+def int_hexagon_V6_veqw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_or_128B
+def int_hexagon_V6_veqw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_xor
+def int_hexagon_V6_veqw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqw_xor_128B
+def int_hexagon_V6_veqw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth,QV_ftype_VIVI,2)
+// tag : V6_vgth
+def int_hexagon_V6_vgth :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_128B,QV_ftype_VIVI,2)
+// tag : V6_vgth_128B
+def int_hexagon_V6_vgth_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_and
+def int_hexagon_V6_vgth_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_and_128B
+def int_hexagon_V6_vgth_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_or
+def int_hexagon_V6_vgth_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_or_128B
+def int_hexagon_V6_vgth_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_xor
+def int_hexagon_V6_vgth_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgth_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgth_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgth_xor_128B
+def int_hexagon_V6_vgth_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgth_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh,QV_ftype_VIVI,2)
+// tag : V6_veqh
+def int_hexagon_V6_veqh :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_128B,QV_ftype_VIVI,2)
+// tag : V6_veqh_128B
+def int_hexagon_V6_veqh_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_and
+def int_hexagon_V6_veqh_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_and_128B
+def int_hexagon_V6_veqh_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_or
+def int_hexagon_V6_veqh_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_or_128B
+def int_hexagon_V6_veqh_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_xor
+def int_hexagon_V6_veqh_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqh_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqh_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqh_xor_128B
+def int_hexagon_V6_veqh_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqh_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb,QV_ftype_VIVI,2)
+// tag : V6_vgtb
+def int_hexagon_V6_vgtb :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtb_128B
+def int_hexagon_V6_vgtb_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_and
+def int_hexagon_V6_vgtb_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_and_128B
+def int_hexagon_V6_vgtb_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_or
+def int_hexagon_V6_vgtb_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_or_128B
+def int_hexagon_V6_vgtb_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_xor
+def int_hexagon_V6_vgtb_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtb_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtb_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtb_xor_128B
+def int_hexagon_V6_vgtb_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtb_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb,QV_ftype_VIVI,2)
+// tag : V6_veqb
+def int_hexagon_V6_veqb :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_veqb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_128B,QV_ftype_VIVI,2)
+// tag : V6_veqb_128B
+def int_hexagon_V6_veqb_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_and,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_and
+def int_hexagon_V6_veqb_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_and_128B
+def int_hexagon_V6_veqb_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_or,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_or
+def int_hexagon_V6_veqb_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_or_128B
+def int_hexagon_V6_veqb_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_xor,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_xor
+def int_hexagon_V6_veqb_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_veqb_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_veqb_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_veqb_xor_128B
+def int_hexagon_V6_veqb_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_veqb_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw,QV_ftype_VIVI,2)
+// tag : V6_vgtuw
+def int_hexagon_V6_vgtuw :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtuw_128B
+def int_hexagon_V6_vgtuw_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_and
+def int_hexagon_V6_vgtuw_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_and_128B
+def int_hexagon_V6_vgtuw_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_or
+def int_hexagon_V6_vgtuw_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_or_128B
+def int_hexagon_V6_vgtuw_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_xor
+def int_hexagon_V6_vgtuw_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuw_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuw_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuw_xor_128B
+def int_hexagon_V6_vgtuw_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuw_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh,QV_ftype_VIVI,2)
+// tag : V6_vgtuh
+def int_hexagon_V6_vgtuh :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtuh_128B
+def int_hexagon_V6_vgtuh_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_and
+def int_hexagon_V6_vgtuh_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_and_128B
+def int_hexagon_V6_vgtuh_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_or
+def int_hexagon_V6_vgtuh_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_or_128B
+def int_hexagon_V6_vgtuh_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_xor
+def int_hexagon_V6_vgtuh_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtuh_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtuh_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtuh_xor_128B
+def int_hexagon_V6_vgtuh_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtuh_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub,QV_ftype_VIVI,2)
+// tag : V6_vgtub
+def int_hexagon_V6_vgtub :
+Hexagon_v64iv512v512_Intrinsic<"HEXAGON_V6_vgtub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_128B,QV_ftype_VIVI,2)
+// tag : V6_vgtub_128B
+def int_hexagon_V6_vgtub_128B :
+Hexagon_v128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_and,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_and
+def int_hexagon_V6_vgtub_and :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_and_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_and_128B
+def int_hexagon_V6_vgtub_and_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_or,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_or
+def int_hexagon_V6_vgtub_or :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_or_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_or_128B
+def int_hexagon_V6_vgtub_or_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_xor,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_xor
+def int_hexagon_V6_vgtub_xor :
+Hexagon_v64iv64iv512v512_Intrinsic<"HEXAGON_V6_vgtub_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vgtub_xor_128B,QV_ftype_QVVIVI,3)
+// tag : V6_vgtub_xor_128B
+def int_hexagon_V6_vgtub_xor_128B :
+Hexagon_v128iv128iv1024v1024_Intrinsic<"HEXAGON_V6_vgtub_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or,QV_ftype_QVQV,2)
+// tag : V6_pred_or
+def int_hexagon_V6_pred_or :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_or_128B
+def int_hexagon_V6_pred_or_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and,QV_ftype_QVQV,2)
+// tag : V6_pred_and
+def int_hexagon_V6_pred_and :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_and_128B
+def int_hexagon_V6_pred_and_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_not,QV_ftype_QV,1)
+// tag : V6_pred_not
+def int_hexagon_V6_pred_not :
+Hexagon_v64iv64i_Intrinsic<"HEXAGON_V6_pred_not">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_not_128B,QV_ftype_QV,1)
+// tag : V6_pred_not_128B
+def int_hexagon_V6_pred_not_128B :
+Hexagon_v128iv128i_Intrinsic<"HEXAGON_V6_pred_not_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_xor,QV_ftype_QVQV,2)
+// tag : V6_pred_xor
+def int_hexagon_V6_pred_xor :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_xor">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_xor_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_xor_128B
+def int_hexagon_V6_pred_xor_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_xor_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_n,QV_ftype_QVQV,2)
+// tag : V6_pred_and_n
+def int_hexagon_V6_pred_and_n :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_and_n">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_and_n_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_and_n_128B
+def int_hexagon_V6_pred_and_n_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_and_n_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_n,QV_ftype_QVQV,2)
+// tag : V6_pred_or_n
+def int_hexagon_V6_pred_or_n :
+Hexagon_v64iv64iv64i_Intrinsic<"HEXAGON_V6_pred_or_n">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_or_n_128B,QV_ftype_QVQV,2)
+// tag : V6_pred_or_n_128B
+def int_hexagon_V6_pred_or_n_128B :
+Hexagon_v128iv128iv128i_Intrinsic<"HEXAGON_V6_pred_or_n_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_scalar2,QV_ftype_SI,1)
+// tag : V6_pred_scalar2
+def int_hexagon_V6_pred_scalar2 :
+Hexagon_v64ii_Intrinsic<"HEXAGON_V6_pred_scalar2">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_pred_scalar2_128B,QV_ftype_SI,1)
+// tag : V6_pred_scalar2_128B
+def int_hexagon_V6_pred_scalar2_128B :
+Hexagon_v128ii_Intrinsic<"HEXAGON_V6_pred_scalar2_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmux,VI_ftype_QVVIVI,3)
+// tag : V6_vmux
+def int_hexagon_V6_vmux :
+Hexagon_v512v64iv512v512_Intrinsic<"HEXAGON_V6_vmux">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmux_128B,VI_ftype_QVVIVI,3)
+// tag : V6_vmux_128B
+def int_hexagon_V6_vmux_128B :
+Hexagon_v1024v128iv1024v1024_Intrinsic<"HEXAGON_V6_vmux_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vswap,VD_ftype_QVVIVI,3)
+// tag : V6_vswap
+def int_hexagon_V6_vswap :
+Hexagon_v1024v64iv512v512_Intrinsic<"HEXAGON_V6_vswap">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vswap_128B,VD_ftype_QVVIVI,3)
+// tag : V6_vswap_128B
+def int_hexagon_V6_vswap_128B :
+Hexagon_v2048v128iv1024v1024_Intrinsic<"HEXAGON_V6_vswap_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxub,VI_ftype_VIVI,2)
+// tag : V6_vmaxub
+def int_hexagon_V6_vmaxub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxub_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxub_128B
+def int_hexagon_V6_vmaxub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminub,VI_ftype_VIVI,2)
+// tag : V6_vminub
+def int_hexagon_V6_vminub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminub_128B,VI_ftype_VIVI,2)
+// tag : V6_vminub_128B
+def int_hexagon_V6_vminub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxuh,VI_ftype_VIVI,2)
+// tag : V6_vmaxuh
+def int_hexagon_V6_vmaxuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxuh_128B
+def int_hexagon_V6_vmaxuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminuh,VI_ftype_VIVI,2)
+// tag : V6_vminuh
+def int_hexagon_V6_vminuh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminuh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminuh_128B,VI_ftype_VIVI,2)
+// tag : V6_vminuh_128B
+def int_hexagon_V6_vminuh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminuh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxh,VI_ftype_VIVI,2)
+// tag : V6_vmaxh
+def int_hexagon_V6_vmaxh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxh_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxh_128B
+def int_hexagon_V6_vmaxh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminh,VI_ftype_VIVI,2)
+// tag : V6_vminh
+def int_hexagon_V6_vminh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminh_128B,VI_ftype_VIVI,2)
+// tag : V6_vminh_128B
+def int_hexagon_V6_vminh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxw,VI_ftype_VIVI,2)
+// tag : V6_vmaxw
+def int_hexagon_V6_vmaxw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vmaxw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vmaxw_128B,VI_ftype_VIVI,2)
+// tag : V6_vmaxw_128B
+def int_hexagon_V6_vmaxw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vmaxw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminw,VI_ftype_VIVI,2)
+// tag : V6_vminw
+def int_hexagon_V6_vminw :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vminw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vminw_128B,VI_ftype_VIVI,2)
+// tag : V6_vminw_128B
+def int_hexagon_V6_vminw_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vminw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsathub,VI_ftype_VIVI,2)
+// tag : V6_vsathub
+def int_hexagon_V6_vsathub :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsathub">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsathub_128B,VI_ftype_VIVI,2)
+// tag : V6_vsathub_128B
+def int_hexagon_V6_vsathub_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsathub_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsatwh,VI_ftype_VIVI,2)
+// tag : V6_vsatwh
+def int_hexagon_V6_vsatwh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vsatwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vsatwh_128B,VI_ftype_VIVI,2)
+// tag : V6_vsatwh_128B
+def int_hexagon_V6_vsatwh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vsatwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffeb,VI_ftype_VIVI,2)
+// tag : V6_vshuffeb
+def int_hexagon_V6_vshuffeb :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffeb_128B,VI_ftype_VIVI,2)
+// tag : V6_vshuffeb_128B
+def int_hexagon_V6_vshuffeb_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffob,VI_ftype_VIVI,2)
+// tag : V6_vshuffob
+def int_hexagon_V6_vshuffob :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshuffob">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffob_128B,VI_ftype_VIVI,2)
+// tag : V6_vshuffob_128B
+def int_hexagon_V6_vshuffob_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshuffob_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufeh,VI_ftype_VIVI,2)
+// tag : V6_vshufeh
+def int_hexagon_V6_vshufeh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufeh_128B,VI_ftype_VIVI,2)
+// tag : V6_vshufeh_128B
+def int_hexagon_V6_vshufeh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoh,VI_ftype_VIVI,2)
+// tag : V6_vshufoh
+def int_hexagon_V6_vshufoh :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vshufoh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoh_128B,VI_ftype_VIVI,2)
+// tag : V6_vshufoh_128B
+def int_hexagon_V6_vshufoh_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vshufoh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffvdd,VD_ftype_VIVISI,3)
+// tag : V6_vshuffvdd
+def int_hexagon_V6_vshuffvdd :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vshuffvdd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffvdd_128B,VD_ftype_VIVISI,3)
+// tag : V6_vshuffvdd_128B
+def int_hexagon_V6_vshuffvdd_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vshuffvdd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealvdd,VD_ftype_VIVISI,3)
+// tag : V6_vdealvdd
+def int_hexagon_V6_vdealvdd :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vdealvdd">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealvdd_128B,VD_ftype_VIVISI,3)
+// tag : V6_vdealvdd_128B
+def int_hexagon_V6_vdealvdd_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vdealvdd_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeh,VD_ftype_VIVI,2)
+// tag : V6_vshufoeh
+def int_hexagon_V6_vshufoeh :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeh_128B,VD_ftype_VIVI,2)
+// tag : V6_vshufoeh_128B
+def int_hexagon_V6_vshufoeh_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeb,VD_ftype_VIVI,2)
+// tag : V6_vshufoeb
+def int_hexagon_V6_vshufoeb :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vshufoeb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshufoeb_128B,VD_ftype_VIVI,2)
+// tag : V6_vshufoeb_128B
+def int_hexagon_V6_vshufoeb_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vshufoeb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealh,VI_ftype_VI,1)
+// tag : V6_vdealh
+def int_hexagon_V6_vdealh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealh_128B,VI_ftype_VI,1)
+// tag : V6_vdealh_128B
+def int_hexagon_V6_vdealh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb,VI_ftype_VI,1)
+// tag : V6_vdealb
+def int_hexagon_V6_vdealb :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vdealb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb_128B,VI_ftype_VI,1)
+// tag : V6_vdealb_128B
+def int_hexagon_V6_vdealb_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vdealb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb4w,VI_ftype_VIVI,2)
+// tag : V6_vdealb4w
+def int_hexagon_V6_vdealb4w :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdealb4w">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdealb4w_128B,VI_ftype_VIVI,2)
+// tag : V6_vdealb4w_128B
+def int_hexagon_V6_vdealb4w_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdealb4w_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffh,VI_ftype_VI,1)
+// tag : V6_vshuffh
+def int_hexagon_V6_vshuffh :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffh_128B,VI_ftype_VI,1)
+// tag : V6_vshuffh_128B
+def int_hexagon_V6_vshuffh_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffb,VI_ftype_VI,1)
+// tag : V6_vshuffb
+def int_hexagon_V6_vshuffb :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vshuffb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vshuffb_128B,VI_ftype_VI,1)
+// tag : V6_vshuffb_128B
+def int_hexagon_V6_vshuffb_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vshuffb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_extractw,SI_ftype_VISI,2)
+// tag : V6_extractw
+def int_hexagon_V6_extractw :
+Hexagon_iv512i_Intrinsic<"HEXAGON_V6_extractw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_extractw_128B,SI_ftype_VISI,2)
+// tag : V6_extractw_128B
+def int_hexagon_V6_extractw_128B :
+Hexagon_iv1024i_Intrinsic<"HEXAGON_V6_extractw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vinsertwr,VI_ftype_VISI,2)
+// tag : V6_vinsertwr
+def int_hexagon_V6_vinsertwr :
+Hexagon_v512v512i_Intrinsic<"HEXAGON_V6_vinsertwr">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vinsertwr_128B,VI_ftype_VISI,2)
+// tag : V6_vinsertwr_128B
+def int_hexagon_V6_vinsertwr_128B :
+Hexagon_v1024v1024i_Intrinsic<"HEXAGON_V6_vinsertwr_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_lvsplatw,VI_ftype_SI,1)
+// tag : V6_lvsplatw
+def int_hexagon_V6_lvsplatw :
+Hexagon_v512i_Intrinsic<"HEXAGON_V6_lvsplatw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_lvsplatw_128B,VI_ftype_SI,1)
+// tag : V6_lvsplatw_128B
+def int_hexagon_V6_lvsplatw_128B :
+Hexagon_v1024i_Intrinsic<"HEXAGON_V6_lvsplatw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vassign,VI_ftype_VI,1)
+// tag : V6_vassign
+def int_hexagon_V6_vassign :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vassign">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vassign_128B,VI_ftype_VI,1)
+// tag : V6_vassign_128B
+def int_hexagon_V6_vassign_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vassign_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcombine,VD_ftype_VIVI,2)
+// tag : V6_vcombine
+def int_hexagon_V6_vcombine :
+Hexagon_v1024v512v512_Intrinsic<"HEXAGON_V6_vcombine">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcombine_128B,VD_ftype_VIVI,2)
+// tag : V6_vcombine_128B
+def int_hexagon_V6_vcombine_128B :
+Hexagon_v2048v1024v1024_Intrinsic<"HEXAGON_V6_vcombine_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb,VI_ftype_VIDISI,3)
+// tag : V6_vlutb
+def int_hexagon_V6_vlutb :
+Hexagon_v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_128B,VI_ftype_VIDISI,3)
+// tag : V6_vlutb_128B
+def int_hexagon_V6_vlutb_128B :
+Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_acc,VI_ftype_VIVIDISI,4)
+// tag : V6_vlutb_acc
+def int_hexagon_V6_vlutb_acc :
+Hexagon_v512v512v512LLii_Intrinsic<"HEXAGON_V6_vlutb_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_acc_128B,VI_ftype_VIVIDISI,4)
+// tag : V6_vlutb_acc_128B
+def int_hexagon_V6_vlutb_acc_128B :
+Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv,VD_ftype_VDDISI,3)
+// tag : V6_vlutb_dv
+def int_hexagon_V6_vlutb_dv :
+Hexagon_v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_128B,VD_ftype_VDDISI,3)
+// tag : V6_vlutb_dv_128B
+def int_hexagon_V6_vlutb_dv_128B :
+Hexagon_v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc,VD_ftype_VDVDDISI,4)
+// tag : V6_vlutb_dv_acc
+def int_hexagon_V6_vlutb_dv_acc :
+Hexagon_v1024v1024v1024LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutb_dv_acc_128B,VD_ftype_VDVDDISI,4)
+// tag : V6_vlutb_dv_acc_128B
+def int_hexagon_V6_vlutb_dv_acc_128B :
+Hexagon_v2048v2048v2048LLii_Intrinsic<"HEXAGON_V6_vlutb_dv_acc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdelta,VI_ftype_VIVI,2)
+// tag : V6_vdelta
+def int_hexagon_V6_vdelta :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vdelta">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vdelta_128B,VI_ftype_VIVI,2)
+// tag : V6_vdelta_128B
+def int_hexagon_V6_vdelta_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vdelta_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrdelta,VI_ftype_VIVI,2)
+// tag : V6_vrdelta
+def int_hexagon_V6_vrdelta :
+Hexagon_v512v512v512_Intrinsic<"HEXAGON_V6_vrdelta">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vrdelta_128B,VI_ftype_VIVI,2)
+// tag : V6_vrdelta_128B
+def int_hexagon_V6_vrdelta_128B :
+Hexagon_v1024v1024v1024_Intrinsic<"HEXAGON_V6_vrdelta_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0w,VI_ftype_VI,1)
+// tag : V6_vcl0w
+def int_hexagon_V6_vcl0w :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0w">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0w_128B,VI_ftype_VI,1)
+// tag : V6_vcl0w_128B
+def int_hexagon_V6_vcl0w_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0w_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0h,VI_ftype_VI,1)
+// tag : V6_vcl0h
+def int_hexagon_V6_vcl0h :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vcl0h">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vcl0h_128B,VI_ftype_VI,1)
+// tag : V6_vcl0h_128B
+def int_hexagon_V6_vcl0h_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vcl0h_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamtw,VI_ftype_VI,1)
+// tag : V6_vnormamtw
+def int_hexagon_V6_vnormamtw :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamtw">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamtw_128B,VI_ftype_VI,1)
+// tag : V6_vnormamtw_128B
+def int_hexagon_V6_vnormamtw_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamtw_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamth,VI_ftype_VI,1)
+// tag : V6_vnormamth
+def int_hexagon_V6_vnormamth :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vnormamth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vnormamth_128B,VI_ftype_VI,1)
+// tag : V6_vnormamth_128B
+def int_hexagon_V6_vnormamth_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vnormamth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpopcounth,VI_ftype_VI,1)
+// tag : V6_vpopcounth
+def int_hexagon_V6_vpopcounth :
+Hexagon_v512v512_Intrinsic<"HEXAGON_V6_vpopcounth">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vpopcounth_128B,VI_ftype_VI,1)
+// tag : V6_vpopcounth_128B
+def int_hexagon_V6_vpopcounth_128B :
+Hexagon_v1024v1024_Intrinsic<"HEXAGON_V6_vpopcounth_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb,VI_ftype_VIVISI,3)
+// tag : V6_vlutvvb
+def int_hexagon_V6_vlutvvb :
+Hexagon_v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_128B,VI_ftype_VIVISI,3)
+// tag : V6_vlutvvb_128B
+def int_hexagon_V6_vlutvvb_128B :
+Hexagon_v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc,VI_ftype_VIVIVISI,4)
+// tag : V6_vlutvvb_oracc
+def int_hexagon_V6_vlutvvb_oracc :
+Hexagon_v512v512v512v512i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvvb_oracc_128B,VI_ftype_VIVIVISI,4)
+// tag : V6_vlutvvb_oracc_128B
+def int_hexagon_V6_vlutvvb_oracc_128B :
+Hexagon_v1024v1024v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvvb_oracc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh,VD_ftype_VIVISI,3)
+// tag : V6_vlutvwh
+def int_hexagon_V6_vlutvwh :
+Hexagon_v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_128B,VD_ftype_VIVISI,3)
+// tag : V6_vlutvwh_128B
+def int_hexagon_V6_vlutvwh_128B :
+Hexagon_v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc,VD_ftype_VDVIVISI,4)
+// tag : V6_vlutvwh_oracc
+def int_hexagon_V6_vlutvwh_oracc :
+Hexagon_v1024v1024v512v512i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc">;
+
+//
+// BUILTIN_INFO(HEXAGON.V6_vlutvwh_oracc_128B,VD_ftype_VDVIVISI,4)
+// tag : V6_vlutvwh_oracc_128B
+def int_hexagon_V6_vlutvwh_oracc_128B :
+Hexagon_v2048v2048v1024v1024i_Intrinsic<"HEXAGON_V6_vlutvwh_oracc_128B">;
+
+//
+// BUILTIN_INFO(HEXAGON.M6_vabsdiffb,DI_ftype_DIDI,2)
+// tag : M6_vabsdiffb
+def int_hexagon_M6_vabsdiffb :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffb">;
+
+//
+// BUILTIN_INFO(HEXAGON.M6_vabsdiffub,DI_ftype_DIDI,2)
+// tag : M6_vabsdiffub
+def int_hexagon_M6_vabsdiffub :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_M6_vabsdiffub">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vsplatrbp,DI_ftype_SI,1)
+// tag : S6_vsplatrbp
+def int_hexagon_S6_vsplatrbp :
+Hexagon_LLii_Intrinsic<"HEXAGON_S6_vsplatrbp">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vtrunehb_ppp,DI_ftype_DIDI,2)
+// tag : S6_vtrunehb_ppp
+def int_hexagon_S6_vtrunehb_ppp :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunehb_ppp">;
+
+//
+// BUILTIN_INFO(HEXAGON.S6_vtrunohb_ppp,DI_ftype_DIDI,2)
+// tag : S6_vtrunohb_ppp
+def int_hexagon_S6_vtrunohb_ppp :
+Hexagon_LLiLLiLLi_Intrinsic<"HEXAGON_S6_vtrunohb_ppp">;
diff --git a/include/llvm/IR/IntrinsicsPowerPC.td b/include/llvm/IR/IntrinsicsPowerPC.td
index eb8f1e6cd079..06dfc329fe32 100644
--- a/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/include/llvm/IR/IntrinsicsPowerPC.td
@@ -710,21 +710,39 @@ def int_ppc_vsx_xvrsqrtedp : GCCBuiltin<"__builtin_vsx_xvrsqrtedp">,
 def int_ppc_vsx_xvcmpeqdp :
       PowerPC_VSX_Intrinsic<"xvcmpeqdp", [llvm_v2i64_ty],
                             [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpeqdp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqdp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xvcmpeqsp :
       PowerPC_VSX_Intrinsic<"xvcmpeqsp", [llvm_v4i32_ty],
                             [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpeqsp_p : GCCBuiltin<"__builtin_vsx_xvcmpeqsp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xvcmpgedp :
       PowerPC_VSX_Intrinsic<"xvcmpgedp", [llvm_v2i64_ty],
                             [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgedp_p : GCCBuiltin<"__builtin_vsx_xvcmpgedp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xvcmpgesp :
       PowerPC_VSX_Intrinsic<"xvcmpgesp", [llvm_v4i32_ty],
                             [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgesp_p : GCCBuiltin<"__builtin_vsx_xvcmpgesp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xvcmpgtdp :
       PowerPC_VSX_Intrinsic<"xvcmpgtdp", [llvm_v2i64_ty],
                             [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtdp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtdp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v2f64_ty,llvm_v2f64_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xvcmpgtsp :
       PowerPC_VSX_Intrinsic<"xvcmpgtsp", [llvm_v4i32_ty],
                             [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
 def int_ppc_vsx_xxleqv :
       PowerPC_VSX_Intrinsic<"xxleqv", [llvm_v4i32_ty],
                             [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
diff --git a/include/llvm/IR/IntrinsicsWebAssembly.td b/include/llvm/IR/IntrinsicsWebAssembly.td
index 3ccde4742384..3953aef43dad 100644
--- a/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -13,4 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 let TargetPrefix = "wasm" in {  // All intrinsics start with "llvm.wasm.".
+
+// Note that memory_size is not IntrNoMem because it must be sequenced with
+// respect to grow_memory calls.
+def int_wasm_memory_size : Intrinsic<[llvm_anyint_ty], [], [IntrReadMem]>;
+def int_wasm_grow_memory : Intrinsic<[], [llvm_anyint_ty], []>;
+
 }
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index a3bc4af84308..18390f853510 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -22,10 +22,8 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {
   def int_x86_seh_lsda : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], [IntrNoMem]>;
 
-  // Restores the frame, base, and stack pointers as necessary after recovering
-  // from an exception. Any block resuming control flow in the parent function
-  // should call this before accessing any stack memory.
-  def int_x86_seh_restoreframe : Intrinsic<[], [], []>;
+  // Marks the EH registration node created in LLVM IR prior to code generation.
+  def int_x86_seh_ehregnode : Intrinsic<[], [llvm_ptr_ty], []>;
 
   // Given a pointer to the end of an EH registration object, returns the true
   // parent frame address that can be used with llvm.localrecover.
@@ -1406,6 +1404,78 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
           [IntrNoMem]>;
 
+  def int_x86_avx512_mask_vpermil_pd_128 :
+        GCCBuiltin<"__builtin_ia32_vpermilpd_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermil_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermil_pd_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermil_ps_128 :
+        GCCBuiltin<"__builtin_ia32_vpermilps_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermil_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermil_ps_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_pd_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_pd_128 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_ps_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpermilvar_ps_128 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
   def int_x86_avx512_mask_pshuf_b_128 : 
         GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
           Intrinsic<[llvm_v16i8_ty],
@@ -1423,8 +1493,145 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v64i8_ty],
           [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
           [IntrNoMem]>;
-}
 
+  def int_x86_avx512_mask_shuf_f32x4_256 :
+         GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_f32x4 :
+         GCCBuiltin<"__builtin_ia32_shuf_f32x4_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_f64x2_256 :
+         GCCBuiltin<"__builtin_ia32_shuf_f64x2_256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_f64x2 :
+         GCCBuiltin<"__builtin_ia32_shuf_f64x2_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_i32x4_256 :
+         GCCBuiltin<"__builtin_ia32_shuf_i32x4_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_i32x4 :
+         GCCBuiltin<"__builtin_ia32_shuf_i32x4_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_i64x2_256 :
+         GCCBuiltin<"__builtin_ia32_shuf_i64x2_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_i64x2 :
+         GCCBuiltin<"__builtin_ia32_shuf_i64x2_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_128 :
+         GCCBuiltin<"__builtin_ia32_shufpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_256 :
+         GCCBuiltin<"__builtin_ia32_shufpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_pd_512 :
+         GCCBuiltin<"__builtin_ia32_shufpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_128 :
+         GCCBuiltin<"__builtin_ia32_shufps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_256 :
+         GCCBuiltin<"__builtin_ia32_shufps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_shuf_ps_512 :
+         GCCBuiltin<"__builtin_ia32_shufps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movshdup_128 :
+         GCCBuiltin<"__builtin_ia32_movshdup128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movshdup_256 :
+         GCCBuiltin<"__builtin_ia32_movshdup256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movshdup_512 :
+         GCCBuiltin<"__builtin_ia32_movshdup512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movsldup_128 :
+         GCCBuiltin<"__builtin_ia32_movsldup128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movsldup_256 :
+         GCCBuiltin<"__builtin_ia32_movsldup256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movsldup_512 :
+         GCCBuiltin<"__builtin_ia32_movsldup512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movddup_128 :
+         GCCBuiltin<"__builtin_ia32_movddup128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movddup_256 :
+         GCCBuiltin<"__builtin_ia32_movddup256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_movddup_512 :
+         GCCBuiltin<"__builtin_ia32_movddup512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+}
 
 // Vector blend
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
@@ -1526,6 +1733,38 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_mask_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">,
         Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_pd_128 : 
+         GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">,
+          Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_pd_256 : 
+         GCCBuiltin<"__builtin_ia32_fpclasspd256_mask">,
+          Intrinsic<[llvm_i8_ty], [llvm_v4f64_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_pd_512 : 
+         GCCBuiltin<"__builtin_ia32_fpclasspd512_mask">,
+          Intrinsic<[llvm_i8_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_ps_128 : 
+         GCCBuiltin<"__builtin_ia32_fpclassps128_mask">,
+          Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_ps_256 : 
+         GCCBuiltin<"__builtin_ia32_fpclassps256_mask">,
+          Intrinsic<[llvm_i8_ty], [llvm_v8f32_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_ps_512 : 
+         GCCBuiltin<"__builtin_ia32_fpclassps512_mask">,
+          Intrinsic<[llvm_i16_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_sd : 
+         GCCBuiltin<"__builtin_ia32_fpclasssd">,
+          Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_fpclass_ss : 
+         GCCBuiltin<"__builtin_ia32_fpclassss">,
+          Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
 }
 
 // Vector extract sign mask
@@ -1573,16 +1812,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 // Conditional load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
-        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty],
+        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2i64_ty],
                   [IntrReadArgMem]>;
   def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
-        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty],
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4i32_ty],
                   [IntrReadArgMem]>;
   def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty],
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
                   [IntrReadArgMem]>;
   def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty],
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
                   [IntrReadArgMem]>;
   def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
@@ -1596,24 +1835,31 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
                   [IntrReadArgMem]>;
+
+  def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+                  [IntrNoMem]>;
+  def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+                  [IntrNoMem]>;
 }
 
 // Conditional store ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
         Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v2f64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
+                  llvm_v2i64_ty, llvm_v2f64_ty], [IntrReadWriteArgMem]>;
   def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
         Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v4f32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
+                  llvm_v4i32_ty, llvm_v4f32_ty], [IntrReadWriteArgMem]>;
   def int_x86_avx_maskstore_pd_256 :
         GCCBuiltin<"__builtin_ia32_maskstorepd256">,
         Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
+                  llvm_v4i64_ty, llvm_v4f64_ty], [IntrReadWriteArgMem]>;
   def int_x86_avx_maskstore_ps_256 :
         GCCBuiltin<"__builtin_ia32_maskstoreps256">,
         Intrinsic<[], [llvm_ptr_ty,
-                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
+                  llvm_v8i32_ty, llvm_v8f32_ty], [IntrReadWriteArgMem]>;
   def int_x86_avx512_mask_storeu_ps_512 :
         GCCBuiltin<"__builtin_ia32_storeups512_mask">,
         Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
@@ -1946,6 +2192,25 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask_psrl_w_128 : GCCBuiltin<"__builtin_ia32_psrlw128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_w_256 : GCCBuiltin<"__builtin_ia32_psrlw256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_v8i16_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_wi_128 : GCCBuiltin<"__builtin_ia32_psrlwi128_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i8_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_wi_256 : GCCBuiltin<"__builtin_ia32_psrlwi256_mask">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+                         llvm_i8_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_psrl_wi_512 : GCCBuiltin<"__builtin_ia32_psrlwi512_mask">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_i8_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
                          llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -2167,39 +2432,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector load with broadcast
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx2_vbroadcast_ss_ps :
-              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx2_vbroadcast_sd_pd_256 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
-              Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx2_vbroadcast_ss_ps_256 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastb_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastb_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastw_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastw_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastd_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastd_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastq_128 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
-  def int_x86_avx2_pbroadcastq_256 :
-              GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pbroadcast_d_gpr_512 :
               GCCBuiltin<"__builtin_ia32_pbroadcastd512_gpr_mask">,
               Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty, llvm_v16i32_ty,
@@ -2220,7 +2452,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
                         [IntrNoMem]>;
   def int_x86_avx2_permps : GCCBuiltin<"__builtin_ia32_permvarsf256">,
-              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty],
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty],
                         [IntrNoMem]>;
   def int_x86_avx2_vperm2i128 : GCCBuiltin<"__builtin_ia32_permti256">,
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
@@ -2231,20 +2463,124 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_mask_vextractf32x4_512 :
       GCCBuiltin<"__builtin_ia32_extractf32x4_mask">,
-                 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i8_ty,
-                           llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+                 Intrinsic<[llvm_v4f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
+                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_vextracti32x4_512 :
       GCCBuiltin<"__builtin_ia32_extracti32x4_mask">,
-                 Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i8_ty,
-                           llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+                 Intrinsic<[llvm_v4i32_ty], [llvm_v16i32_ty, llvm_i32_ty,
+                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextractf32x4_256 :
+      GCCBuiltin<"__builtin_ia32_extractf32x4_256_mask">,
+                 Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+                            llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti32x4_256 :
+      GCCBuiltin<"__builtin_ia32_extracti32x4_256_mask">,
+                 Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i32_ty,
+                            llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextractf64x2_256 :
+      GCCBuiltin<"__builtin_ia32_extractf64x2_256_mask">,
+                 Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti64x2_256 :
+      GCCBuiltin<"__builtin_ia32_extracti64x2_256_mask">,
+                 Intrinsic<[llvm_v2i64_ty], [llvm_v4i64_ty, llvm_i32_ty,
+                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextractf64x2_512 :
+      GCCBuiltin<"__builtin_ia32_extractf64x2_512_mask">,
+                 Intrinsic<[llvm_v2f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
+                            llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti64x2_512 :
+      GCCBuiltin<"__builtin_ia32_extracti64x2_512_mask">,
+                 Intrinsic<[llvm_v2i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
+                            llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextractf32x8_512 :
+      GCCBuiltin<"__builtin_ia32_extractf32x8_mask">,
+                 Intrinsic<[llvm_v8f32_ty], [llvm_v16f32_ty, llvm_i32_ty,
+                            llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vextracti32x8_512 :
+      GCCBuiltin<"__builtin_ia32_extracti32x8_mask">,
+                 Intrinsic<[llvm_v8i32_ty],[llvm_v16i32_ty, llvm_i32_ty,
+                            llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_vextractf64x4_512 :
       GCCBuiltin<"__builtin_ia32_extractf64x4_mask">,
-                 Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i8_ty,
-                           llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+                 Intrinsic<[llvm_v4f64_ty], [llvm_v8f64_ty, llvm_i32_ty,
+                            llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_vextracti64x4_512 :
       GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
-                 Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i8_ty,
-                           llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                 Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
+                            llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf32x4_256 :
+        GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf32x4_512 :
+        GCCBuiltin<"__builtin_ia32_insertf32x4_512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf32x8_512 :
+        GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf64x2_256 :
+        GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf64x2_512 :
+        GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_insertf64x4_512 :
+        GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti32x4_256 :
+        GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti32x4_512 :
+        GCCBuiltin<"__builtin_ia32_inserti32x4_512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti32x8_512 :
+        GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti64x2_256 :
+        GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti64x2_512 :
+        GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_inserti64x4_512 :
+        GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+          [IntrNoMem]>;                            
 }
 
 // Conditional load ops
@@ -2354,6 +2690,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
                          llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], 
                         [IntrNoMem]>;
+  def int_x86_avx512_psll_dq_512 : GCCBuiltin<"__builtin_ia32_pslldq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], 
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrl_dq_512 : GCCBuiltin<"__builtin_ia32_psrldq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_i32_ty], 
+                        [IntrNoMem]>;                        
 }
 
 // Gather ops
@@ -3544,6 +3886,43 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[], [llvm_ptr_ty], []>;
 }
 
+//===----------------------------------------------------------------------===//
+// XSAVE
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_xsave :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsave64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xrstor :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xrstor64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsaveopt :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsaveopt64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xrstors :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xrstors64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsavec :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsavec64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsaves :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xsaves64 :
+              Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Support protection key
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_rdpkru : GCCBuiltin <"__builtin_ia32_rdpkru">,
+              Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  def int_x86_wrpkru : GCCBuiltin<"__builtin_ia32_wrpkru">,
+              Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
+}
 //===----------------------------------------------------------------------===//
 // Half float conversion
 
@@ -3561,9 +3940,21 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_mask_vcvtph2ps_512 : GCCBuiltin<"__builtin_ia32_vcvtph2ps512_mask">,
               Intrinsic<[llvm_v16f32_ty], [llvm_v16i16_ty, llvm_v16f32_ty,
                                            llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vcvtph2ps_256 : GCCBuiltin<"__builtin_ia32_vcvtph2ps256_mask">,
+              Intrinsic<[llvm_v8f32_ty], [llvm_v8i16_ty, llvm_v8f32_ty,
+                                           llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vcvtph2ps_128 : GCCBuiltin<"__builtin_ia32_vcvtph2ps_mask">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v8i16_ty, llvm_v4f32_ty,
+                                           llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_vcvtps2ph_512 : GCCBuiltin<"__builtin_ia32_vcvtps2ph512_mask">,
               Intrinsic<[llvm_v16i16_ty], [llvm_v16f32_ty, llvm_i32_ty,
                                            llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vcvtps2ph_256 : GCCBuiltin<"__builtin_ia32_vcvtps2ph256_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8f32_ty, llvm_i32_ty,
+                                           llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vcvtps2ph_128 : GCCBuiltin<"__builtin_ia32_vcvtps2ph_mask">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4f32_ty, llvm_i32_ty,
+                                           llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3657,6 +4048,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">,
               Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
                          [IntrNoMem]>;
+  def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                         [IntrNoMem]>;
+  def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                         [IntrNoMem]>;
   def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">,
               Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
                         [IntrNoMem]>;
@@ -3671,10 +4068,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_cvttss2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_cvttss2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2usi : GCCBuiltin<"__builtin_ia32_vcvttss2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttss2usi64 : GCCBuiltin<"__builtin_ia32_vcvttss2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtusi2ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss32">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -3686,10 +4087,14 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_cvttsd2usi">,
-              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_cvttsd2usi64">,
-              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2usi : GCCBuiltin<"__builtin_ia32_vcvttsd2usi32">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvttsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvttsd2usi64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtusi2sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd32">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
@@ -3698,17 +4103,74 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">,
-                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
-                Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">,
-                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                           llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
-                Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                           llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; 
+
+  def int_x86_avx512_cvtb2mask_128 : GCCBuiltin<"__builtin_ia32_cvtb2mask128">,
+              Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtb2mask_256 : GCCBuiltin<"__builtin_ia32_cvtb2mask256">,
+              Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtb2mask_512 : GCCBuiltin<"__builtin_ia32_cvtb2mask512">,
+              Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty], [IntrNoMem]>;
+  
+  def int_x86_avx512_cvtw2mask_128 : GCCBuiltin<"__builtin_ia32_cvtw2mask128">,
+              Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtw2mask_256 : GCCBuiltin<"__builtin_ia32_cvtw2mask256">,
+              Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtw2mask_512 : GCCBuiltin<"__builtin_ia32_cvtw2mask512">,
+              Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty], [IntrNoMem]>;
+  
+  def int_x86_avx512_cvtd2mask_128 : GCCBuiltin<"__builtin_ia32_cvtd2mask128">,
+              Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtd2mask_256 : GCCBuiltin<"__builtin_ia32_cvtd2mask256">,
+              Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty], [IntrNoMem]>;  
+  def int_x86_avx512_cvtd2mask_512 : GCCBuiltin<"__builtin_ia32_cvtd2mask512">,
+              Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_cvtq2mask_128 : GCCBuiltin<"__builtin_ia32_cvtq2mask128">,
+              Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty], [IntrNoMem]>;  
+  def int_x86_avx512_cvtq2mask_256 : GCCBuiltin<"__builtin_ia32_cvtq2mask256">,
+              Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtq2mask_512 : GCCBuiltin<"__builtin_ia32_cvtq2mask512">,
+              Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_cvtmask2b_128 : GCCBuiltin<"__builtin_ia32_cvtmask2b128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2b_256 : GCCBuiltin<"__builtin_ia32_cvtmask2b256">,
+              Intrinsic<[llvm_v32i8_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2b_512 : GCCBuiltin<"__builtin_ia32_cvtmask2b512">,
+              Intrinsic<[llvm_v64i8_ty], [llvm_i64_ty], [IntrNoMem]>;
+  
+  def int_x86_avx512_cvtmask2w_128 : GCCBuiltin<"__builtin_ia32_cvtmask2w128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2w_256 : GCCBuiltin<"__builtin_ia32_cvtmask2w256">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2w_512 : GCCBuiltin<"__builtin_ia32_cvtmask2w512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_i32_ty], [IntrNoMem]>;
+  
+  def int_x86_avx512_cvtmask2d_128 : GCCBuiltin<"__builtin_ia32_cvtmask2d128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2d_256 : GCCBuiltin<"__builtin_ia32_cvtmask2d256">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_i8_ty], [IntrNoMem]>;  
+  def int_x86_avx512_cvtmask2d_512 : GCCBuiltin<"__builtin_ia32_cvtmask2d512">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+  
+  def int_x86_avx512_cvtmask2q_128 : GCCBuiltin<"__builtin_ia32_cvtmask2q128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;  
+  def int_x86_avx512_cvtmask2q_256 : GCCBuiltin<"__builtin_ia32_cvtmask2q256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_cvtmask2q_512 : GCCBuiltin<"__builtin_ia32_cvtmask2q512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+  
 }
 
 // Pack ops.
@@ -3751,53 +4213,761 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
 }
 
+// Unpack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_mask_unpckh_pd_128 :
+         GCCBuiltin<"__builtin_ia32_unpckhpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckh_pd_256 :
+         GCCBuiltin<"__builtin_ia32_unpckhpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckh_pd_512 :
+         GCCBuiltin<"__builtin_ia32_unpckhpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckh_ps_128 :
+         GCCBuiltin<"__builtin_ia32_unpckhps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckh_ps_256 :
+         GCCBuiltin<"__builtin_ia32_unpckhps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckh_ps_512 :
+         GCCBuiltin<"__builtin_ia32_unpckhps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_pd_128 :
+         GCCBuiltin<"__builtin_ia32_unpcklpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_pd_256 :
+         GCCBuiltin<"__builtin_ia32_unpcklpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_pd_512 :
+         GCCBuiltin<"__builtin_ia32_unpcklpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_ps_128 :
+         GCCBuiltin<"__builtin_ia32_unpcklps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_ps_256 :
+         GCCBuiltin<"__builtin_ia32_unpcklps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_unpckl_ps_512 :
+         GCCBuiltin<"__builtin_ia32_unpcklps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhb_w_128 :
+         GCCBuiltin<"__builtin_ia32_punpckhbw128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhb_w_256 :
+         GCCBuiltin<"__builtin_ia32_punpckhbw256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhb_w_512 :
+         GCCBuiltin<"__builtin_ia32_punpckhbw512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhd_q_128 :
+         GCCBuiltin<"__builtin_ia32_punpckhdq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhd_q_256 :
+         GCCBuiltin<"__builtin_ia32_punpckhdq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhd_q_512 :
+         GCCBuiltin<"__builtin_ia32_punpckhdq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhqd_q_128 :
+         GCCBuiltin<"__builtin_ia32_punpckhqdq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhqd_q_256 :
+         GCCBuiltin<"__builtin_ia32_punpckhqdq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhqd_q_512 :
+         GCCBuiltin<"__builtin_ia32_punpckhqdq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhw_d_128 :
+         GCCBuiltin<"__builtin_ia32_punpckhwd128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhw_d_256 :
+         GCCBuiltin<"__builtin_ia32_punpckhwd256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckhw_d_512 :
+         GCCBuiltin<"__builtin_ia32_punpckhwd512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklb_w_128 :
+         GCCBuiltin<"__builtin_ia32_punpcklbw128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklb_w_256 :
+         GCCBuiltin<"__builtin_ia32_punpcklbw256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklb_w_512 :
+         GCCBuiltin<"__builtin_ia32_punpcklbw512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckld_q_128 :
+         GCCBuiltin<"__builtin_ia32_punpckldq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckld_q_256 :
+         GCCBuiltin<"__builtin_ia32_punpckldq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpckld_q_512 :
+         GCCBuiltin<"__builtin_ia32_punpckldq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklqd_q_128 :
+         GCCBuiltin<"__builtin_ia32_punpcklqdq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklqd_q_256 :
+         GCCBuiltin<"__builtin_ia32_punpcklqdq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklqd_q_512 :
+         GCCBuiltin<"__builtin_ia32_punpcklqdq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklw_d_128 :
+         GCCBuiltin<"__builtin_ia32_punpcklwd128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklw_d_256 :
+         GCCBuiltin<"__builtin_ia32_punpcklwd256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_punpcklw_d_512 :
+         GCCBuiltin<"__builtin_ia32_punpcklwd512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+}
+
 // Vector convert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_cvttps2dq_512: GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvttps2udq_512: GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvttpd2dq_512: GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvttpd2udq_512: GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_rndscale_ps_512: GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_rndscale_pd_512: GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
+  def int_x86_avx512_mask_cvtdq2pd_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v4i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtdq2pd_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtdq2pd_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8i32_ty, llvm_v8f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtdq2ps_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtdq2ps_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2ps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtdq2ps_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16i32_ty, llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2dq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2dq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2dq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2dq256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2dq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2ps_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2ps256_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2ps_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtsd2ss_round : 
+        GCCBuiltin<"__builtin_ia32_cvtsd2ss_round">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtss2sd_round : 
+        GCCBuiltin<"__builtin_ia32_cvtss2sd_round">,
+          Intrinsic<[llvm_v2f64_ty],
+          [ llvm_v4f32_ty, llvm_v4f32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2ps : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2ps_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v2f64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2qq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2qq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2qq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2qq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2qq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2qq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2udq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2udq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2udq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2udq256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2udq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2uqq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2uqq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2uqq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2uqq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtpd2uqq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtpd2uqq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2dq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2dq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2dq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2dq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2dq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2pd_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2pd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v4f32_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2pd_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2pd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f32_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2pd_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2pd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f32_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2qq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2qq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v4f32_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2qq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2qq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f32_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2qq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2qq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2udq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2udq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2udq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2udq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2udq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2uqq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2uqq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v4f32_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2uqq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2uqq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f32_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtps2uqq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtps2uqq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2pd_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2pd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2i64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2pd_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2pd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2pd_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2pd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8i64_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2ps_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2ps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v2i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2ps_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2ps256_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtqq2ps_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtqq2ps512_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2dq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2dq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2dq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2dq256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2dq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2dq512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2qq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2qq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2qq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2qq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2qq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2qq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2udq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2udq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v2f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2udq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2udq256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f64_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2udq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2udq512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f64_ty, llvm_v8i32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2uqq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2uqq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v2f64_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2uqq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2uqq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f64_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttpd2uqq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttpd2uqq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f64_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2dq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2dq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2dq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2dq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2dq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2dq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2qq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2qq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v4f32_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2qq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2qq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f32_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2qq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2qq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2udq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2udq128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v4f32_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2udq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2udq256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v8f32_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2udq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2udq512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v16f32_ty, llvm_v16i32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2uqq_128 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2uqq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+          [llvm_v4f32_ty, llvm_v2i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2uqq_256 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2uqq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+          [llvm_v4f32_ty, llvm_v4i64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvttps2uqq_512 : 
+        GCCBuiltin<"__builtin_ia32_cvttps2uqq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+          [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2pd_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v4i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2pd_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2pd_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8i32_ty, llvm_v8f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2ps_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2ps_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2ps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtudq2ps_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16i32_ty, llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2pd_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2pd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2i64_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2pd_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2pd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4i64_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2pd_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2pd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8i64_ty, llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2ps_128 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2ps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v2i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2ps_256 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2ps256_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4i64_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_cvtuqq2ps_512 : 
+        GCCBuiltin<"__builtin_ia32_cvtuqq2ps512_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8i64_ty, llvm_v8f32_ty,  llvm_i8_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_rndscale_pd_128 : GCCBuiltin<"__builtin_ia32_rndscalepd_128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty,
+                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_pd_256 : GCCBuiltin<"__builtin_ia32_rndscalepd_256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty, 
+                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_pd_512 : GCCBuiltin<"__builtin_ia32_rndscalepd_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
                                      llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtps2dq_512: GCCBuiltin<"__builtin_ia32_cvtps2dq512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
+  def int_x86_avx512_mask_rndscale_ps_128 : GCCBuiltin<"__builtin_ia32_rndscaleps_128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty,
+                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_ps_256 : GCCBuiltin<"__builtin_ia32_rndscaleps_256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty, 
+                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_rndscale_ps_512 : GCCBuiltin<"__builtin_ia32_rndscaleps_mask">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtpd2dq_512: GCCBuiltin<"__builtin_ia32_cvtpd2dq512_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtps2udq_512: GCCBuiltin<"__builtin_ia32_cvtps2udq512_mask">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16f32_ty, llvm_v16i32_ty,
+  def int_x86_avx512_mask_reduce_pd_128 : GCCBuiltin<"__builtin_ia32_reducepd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i32_ty, 
+                                     llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_pd_256 : GCCBuiltin<"__builtin_ia32_reducepd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_i32_ty,
+                                     llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_pd_512 : GCCBuiltin<"__builtin_ia32_reducepd512_mask">,
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty,
+                                     llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_128 : GCCBuiltin<"__builtin_ia32_reduceps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, 
+                                     llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_256 : GCCBuiltin<"__builtin_ia32_reduceps256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_i32_ty,
+                                     llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ps_512 : GCCBuiltin<"__builtin_ia32_reduceps512_mask">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtpd2udq_512: GCCBuiltin<"__builtin_ia32_cvtpd2udq512_mask">,
-        Intrinsic<[llvm_v8i32_ty], [llvm_v8f64_ty, llvm_v8i32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtdq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtdq2ps512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtdq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
-                                    llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtudq2ps_512 : GCCBuiltin<"__builtin_ia32_cvtudq2ps512_mask">,
-        Intrinsic<[llvm_v16f32_ty], [llvm_v16i32_ty, llvm_v16f32_ty,
-                                     llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtudq2pd_512 : GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
-        Intrinsic<[llvm_v8f64_ty], [llvm_v8i32_ty, llvm_v8f64_ty,
-                                    llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_cvtpd2ps_512 : GCCBuiltin<"__builtin_ia32_cvtpd2ps512_mask">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_v8f64_ty, llvm_v8f32_ty,
-                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_128 : GCCBuiltin<"__builtin_ia32_rangepd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, 
+                                    llvm_v2f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_256 : GCCBuiltin<"__builtin_ia32_rangepd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty,
+                                    llvm_v4f64_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_pd_512 : GCCBuiltin<"__builtin_ia32_rangepd512_mask">,
+        Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty,
+                                    llvm_v8f64_ty,  llvm_i8_ty,  llvm_i32_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_128 : GCCBuiltin<"__builtin_ia32_rangeps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty,
+                                    llvm_v4f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_256 : GCCBuiltin<"__builtin_ia32_rangeps256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, 
+                                    llvm_v8f32_ty,  llvm_i8_ty], [IntrNoMem]>;
+def int_x86_avx512_mask_range_ps_512 : GCCBuiltin<"__builtin_ia32_rangeps512_mask">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty,
+                                     llvm_v16f32_ty,  llvm_i16_ty,  llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Vector load with broadcast
@@ -3805,28 +4975,183 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_vbroadcast_ss_512 :
         GCCBuiltin<"__builtin_ia32_vbroadcastss512">,
         Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx512_vbroadcast_ss_ps_512 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastss_ps512">,
-              Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_broadcast_ss_ps_512 :
+        GCCBuiltin<"__builtin_ia32_broadcastss512">,
+        Intrinsic<[llvm_v16f32_ty], [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_broadcast_ss_ps_256 :
+        GCCBuiltin<"__builtin_ia32_broadcastss256_mask">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_broadcast_ss_ps_128 :
+        GCCBuiltin<"__builtin_ia32_broadcastss128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
 
   def int_x86_avx512_vbroadcast_sd_512 :
         GCCBuiltin<"__builtin_ia32_vbroadcastsd512">,
         Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty], [IntrReadArgMem]>;
-  def int_x86_avx512_vbroadcast_sd_pd_512 :
-              GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">,
-              Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_broadcast_sd_pd_512 :
+        GCCBuiltin<"__builtin_ia32_broadcastsd512">,
+        Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_broadcast_sd_pd_256 :
+        GCCBuiltin<"__builtin_ia32_broadcastsd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_pbroadcastb_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastb_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+                    [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastb_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">,
+          Intrinsic<[llvm_v64i8_ty],
+                    [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+                    [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastw_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+                    [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastd_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastd_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_pbroadcastd_512 :
-         GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
-         Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+          GCCBuiltin<"__builtin_ia32_pbroadcastd512">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_128 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_256 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pbroadcastq_512 :
+          GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x2_256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x2_512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_128 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x2_512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x4_256 :
+            GCCBuiltin<"__builtin_ia32_broadcastf32x4_256_mask">,
+            Intrinsic<[llvm_v8f32_ty],
+                    [llvm_v4f32_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x4_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x4_512">,
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_v4f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf32x8_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf32x8_512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_v8f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf64x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastf64x2_256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+                    [llvm_v2f64_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf64x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf64x2_512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+                    [llvm_v2f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcastf64x4_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastf64x4_512">,
+          Intrinsic<[llvm_v8f64_ty],
+                    [llvm_v4f64_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x4_256 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x4_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x4_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x4_512">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti32x8_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti32x8_512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v8i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti64x2_256 :
+          GCCBuiltin<"__builtin_ia32_broadcasti64x2_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti64x2_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti64x2_512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_broadcasti64x4_512 :
+          GCCBuiltin<"__builtin_ia32_broadcasti64x4_512">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v4i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
   def int_x86_avx512_pbroadcastd_i32_512 :
          Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_pbroadcastq_512 :
-         GCCBuiltin<"__builtin_ia32_pbroadcastq512">,
-         Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
   def int_x86_avx512_pbroadcastq_i64_512 :
          Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw512">,
+          Intrinsic<[llvm_v16i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw256">,
+          Intrinsic<[llvm_v8i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmw_128 :
+          GCCBuiltin<"__builtin_ia32_broadcastmw128">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_i16_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_512 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb512">,
+          Intrinsic<[llvm_v8i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_256 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb256">,
+          Intrinsic<[llvm_v4i64_ty], [llvm_i8_ty], [IntrNoMem]>;
+   def int_x86_avx512_broadcastmb_128 :
+          GCCBuiltin<"__builtin_ia32_broadcastmb128">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Vector sign and zero extend
@@ -4071,15 +5396,36 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                      llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">,
+  def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round">,
           Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
                                      [IntrNoMem]>;
-  def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">,
+  def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                       llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
                                      [IntrNoMem]>;
-
+  def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                      llvm_i8_ty, llvm_i32_ty, llvm_i32_ty],
+                                     [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_sd : GCCBuiltin<"__builtin_ia32_scalefsd_round">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, 
+                                      llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_scalef_ss : GCCBuiltin<"__builtin_ia32_scalefss_round">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, 
+                                      llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_scalef_pd_128 : GCCBuiltin<"__builtin_ia32_scalefpd128_mask">,
           Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, 
                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -4099,12 +5445,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, 
                     llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_sqrt_ss        : GCCBuiltin<"__builtin_ia32_sqrtrndss">,
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_sqrt_sd        : GCCBuiltin<"__builtin_ia32_sqrtrndsd">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
-                        [IntrNoMem]>;
+  def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_sqrt_pd_128 : GCCBuiltin<"__builtin_ia32_sqrtpd128_mask">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
@@ -4143,29 +5489,108 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
+  def int_x86_avx512_mask_getexp_ss : GCCBuiltin<"__builtin_ia32_getexpss_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
+                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_getexp_sd : GCCBuiltin<"__builtin_ia32_getexpsd_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
+                                    llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_pd_128 :
+         GCCBuiltin<"__builtin_ia32_getmantpd128_mask">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty,llvm_i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_pd_256 :
+         GCCBuiltin<"__builtin_ia32_getmantpd256_mask">,
+          Intrinsic<[llvm_v4f64_ty],
+          [llvm_v4f64_ty,llvm_i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_pd_512 :
+         GCCBuiltin<"__builtin_ia32_getmantpd512_mask">,
+          Intrinsic<[llvm_v8f64_ty],
+          [llvm_v8f64_ty,llvm_i32_ty, llvm_v8f64_ty,  llvm_i8_ty,llvm_i32_ty ],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_ps_128 :
+         GCCBuiltin<"__builtin_ia32_getmantps128_mask">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_ps_256 :
+         GCCBuiltin<"__builtin_ia32_getmantps256_mask">,
+          Intrinsic<[llvm_v8f32_ty],
+          [llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_ps_512 :
+         GCCBuiltin<"__builtin_ia32_getmantps512_mask">,
+          Intrinsic<[llvm_v16f32_ty],
+          [llvm_v16f32_ty,llvm_i32_ty, llvm_v16f32_ty,llvm_i16_ty,llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_ss :
+         GCCBuiltin<"__builtin_ia32_getmantss_round">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty,
+           llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_getmant_sd :
+         GCCBuiltin<"__builtin_ia32_getmantsd_round">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty,
+           llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">,
+  def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_rsqrt14_pd_128 : GCCBuiltin<"__builtin_ia32_rsqrt14pd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                                    llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rsqrt14_pd_256 : GCCBuiltin<"__builtin_ia32_rsqrt14pd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+                                    llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_rsqrt14_pd_512 : GCCBuiltin<"__builtin_ia32_rsqrt14pd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rsqrt14_ps_128 : GCCBuiltin<"__builtin_ia32_rsqrt14ps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rsqrt14_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrt14ps256_mask">,
+          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+                                     llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">,
+  def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">,
         Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">,
+  def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">,
         Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_rcp14_pd_128 : GCCBuiltin<"__builtin_ia32_rcp14pd128_mask">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                                    llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rcp14_pd_256 : GCCBuiltin<"__builtin_ia32_rcp14pd256_mask">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
+                                    llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_rcp14_pd_512 : GCCBuiltin<"__builtin_ia32_rcp14pd512_mask">,
         Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                                     llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rcp14_ps_128 : GCCBuiltin<"__builtin_ia32_rcp14ps128_mask">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                                     llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_rcp14_ps_256 : GCCBuiltin<"__builtin_ia32_rcp14ps256_mask">,
+          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
+                                     llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_rcp14_ps_512 : GCCBuiltin<"__builtin_ia32_rcp14ps512_mask">,
         Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                                      llvm_i16_ty], [IntrNoMem]>;
@@ -4183,11 +5608,11 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
             Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                                         llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">,
+  def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round">,
             Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                         llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
-  def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">,
+  def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round">,
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
@@ -4199,14 +5624,17 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
             Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                                         llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
-  def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">,
+  def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round">,
             Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
                                         llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
-  def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">,
+  def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round">,
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
+def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
+            Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+                      [IntrNoMem]>;
 }
 // FP logical ops
 let TargetPrefix = "x86" in {
@@ -4511,6 +5939,54 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_pavg_w_256 : GCCBuiltin<"__builtin_ia32_pavgw256_mask">,
           Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, 
                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddw_d_128 :
+         GCCBuiltin<"__builtin_ia32_pmaddwd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+          [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v4i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddw_d_256 :
+         GCCBuiltin<"__builtin_ia32_pmaddwd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+          [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v8i32_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddw_d_512 :
+         GCCBuiltin<"__builtin_ia32_pmaddwd512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+          [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v16i32_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddubs_w_128 :
+         GCCBuiltin<"__builtin_ia32_pmaddubsw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v8i16_ty,  llvm_i8_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddubs_w_256 :
+         GCCBuiltin<"__builtin_ia32_pmaddubsw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v16i16_ty,  llvm_i16_ty],
+          [IntrNoMem]>;
+  def int_x86_avx512_mask_pmaddubs_w_512 :
+         GCCBuiltin<"__builtin_ia32_pmaddubsw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v32i16_ty,  llvm_i32_ty],
+          [IntrNoMem]>;
+
+  def int_x86_avx512_mask_dbpsadbw_128 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v8i16_ty,
+           llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_dbpsadbw_256 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw256_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v16i16_ty,
+           llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_dbpsadbw_512 :
+         GCCBuiltin<"__builtin_ia32_dbpsadbw512_mask">,
+          Intrinsic<[llvm_v32i16_ty],
+          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v32i16_ty,
+           llvm_i32_ty], [IntrNoMem]>;
 }
 
 // Gather and Scatter ops
@@ -4807,27 +6283,71 @@ let TargetPrefix = "x86" in {
                      llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>;
 }
 
-// AVX-512 conflict detection
+// AVX-512 conflict detection instruction
+// Instructions that count the number of leading zero bits
 let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_conflict_d_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_conflict_d_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
   def int_x86_avx512_mask_conflict_d_512 :
           GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                    llvm_v16i32_ty, llvm_i16_ty],
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_conflict_q_128 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_conflict_q_256 :
+          GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_conflict_q_512 :
           GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                    llvm_v8i64_ty, llvm_i8_ty],
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_lzcnt_d_128 :
+          GCCBuiltin<"__builtin_ia32_vplzcntd_128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_lzcnt_d_256 :
+          GCCBuiltin<"__builtin_ia32_vplzcntd_256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_lzcnt_d_512 :
           GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                    llvm_v16i32_ty, llvm_i16_ty],
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+
+  def int_x86_avx512_mask_lzcnt_q_128 :
+          GCCBuiltin<"__builtin_ia32_vplzcntq_128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+ def int_x86_avx512_mask_lzcnt_q_256 :
+          GCCBuiltin<"__builtin_ia32_vplzcntq_256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
   def int_x86_avx512_mask_lzcnt_q_512 :
           GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                    llvm_v8i64_ty, llvm_i8_ty],
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
                     [IntrNoMem]>;
 }
 
@@ -4911,20 +6431,70 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 }
 
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_valign_q_512 : GCCBuiltin<"__builtin_ia32_alignq512_mask">,
+  def int_x86_avx512_mask_valign_q_512 :
+        GCCBuiltin<"__builtin_ia32_alignq512_mask">,
         Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty],
-                  [IntrNoMem]>;
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_valign_d_512 : GCCBuiltin<"__builtin_ia32_alignd512_mask">,
+  def int_x86_avx512_mask_valign_d_512 :
+        GCCBuiltin<"__builtin_ia32_alignd512_mask">,
         Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty],
-                  [IntrNoMem]>;
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_valign_q_256 :
+        GCCBuiltin<"__builtin_ia32_alignq256_mask">,
+        Intrinsic<[llvm_v4i64_ty],
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_valign_d_256 :
+        GCCBuiltin<"__builtin_ia32_alignd256_mask">,
+        Intrinsic<[llvm_v8i32_ty],
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty, 
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_valign_q_128 :
+        GCCBuiltin<"__builtin_ia32_alignq128_mask">,
+        Intrinsic<[llvm_v2i64_ty],
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty, 
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_valign_d_128 :
+        GCCBuiltin<"__builtin_ia32_alignd128_mask">,
+        Intrinsic<[llvm_v4i32_ty],
+                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_palignr_128 :
+        GCCBuiltin<"__builtin_ia32_palignr128_mask">,
+        Intrinsic<[llvm_v16i8_ty],
+                  [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_palignr_256 :
+        GCCBuiltin<"__builtin_ia32_palignr256_mask">,
+        Intrinsic<[llvm_v32i8_ty],
+                  [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty, llvm_v32i8_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_palignr_512 :
+        GCCBuiltin<"__builtin_ia32_palignr512_mask">,
+        Intrinsic<[llvm_v64i8_ty],
+                  [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty, llvm_v64i8_ty,
+                   llvm_i64_ty], [IntrNoMem]>;
 }
 
 // Compares
 let TargetPrefix = "x86" in {
   // 512-bit
+  def int_x86_avx512_vcomi_sd : GCCBuiltin<"__builtin_ia32_vcomisd">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_vcomi_ss : GCCBuiltin<"__builtin_ia32_vcomiss">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pcmpeq_b_512 : GCCBuiltin<"__builtin_ia32_pcmpeqb512_mask">,
         Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
                   [IntrNoMem]>;
@@ -5288,6 +6858,626 @@ let TargetPrefix = "x86" in {
                    llvm_i8_ty], [IntrReadArgMem]>;
 
 }
+
+// truncate
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_pmov_qb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v2i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i64_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v2i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqw512_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw512_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw512_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i64_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qd_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qd_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovqd128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qd_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qd_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qd_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v2i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qd_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qd_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqd256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qd_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovqd256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qd_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qd_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qd_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd256_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i64_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qd_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_qd_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqd512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_qd_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovqd512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_qd_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_qd_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsqd512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_qd_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd512_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i64_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_qd_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusqd512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_db_128 :
+          GCCBuiltin<"__builtin_ia32_pmovdb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_db_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovdb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_db_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_db_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_db_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v4i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_db_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_db_256 :
+          GCCBuiltin<"__builtin_ia32_pmovdb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_db_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovdb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_db_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_db_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_db_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i32_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_db_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_db_512 :
+          GCCBuiltin<"__builtin_ia32_pmovdb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_db_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovdb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_db_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_db_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsdb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_db_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb512_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i32_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_db_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusdb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_dw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovdw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_dw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovdw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_dw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_dw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_dw_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw128_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v4i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_dw_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_dw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovdw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_dw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovdw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_dw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_dw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_dw_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw256_mask">,
+          Intrinsic<[llvm_v8i16_ty],
+                    [llvm_v8i32_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_dw_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_dw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovdw512_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_dw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovdw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_dw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw512_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_dw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovsdw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_dw_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw512_mask">,
+          Intrinsic<[llvm_v16i16_ty],
+                    [llvm_v16i32_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_dw_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovusdw512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_wb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovwb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_wb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovwb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_wb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovswb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_wb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovswb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_wb_128 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb128_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v8i16_ty, llvm_v16i8_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_wb_mem_128 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb128mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_wb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovwb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_wb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovwb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_wb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovswb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_wb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovswb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_wb_256 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb256_mask">,
+          Intrinsic<[llvm_v16i8_ty],
+                    [llvm_v16i16_ty, llvm_v16i8_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_wb_mem_256 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb256mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmov_wb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovwb512_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmov_wb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovwb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovs_wb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovswb512_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovs_wb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovswb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+                    [IntrReadWriteArgMem]>;
+  def int_x86_avx512_mask_pmovus_wb_512 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb512_mask">,
+          Intrinsic<[llvm_v32i8_ty],
+                    [llvm_v32i16_ty, llvm_v32i8_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_avx512_mask_pmovus_wb_mem_512 :
+          GCCBuiltin<"__builtin_ia32_pmovuswb512mem_mask">,
+          Intrinsic<[],
+                    [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
+                    [IntrReadWriteArgMem]>;
+}
+
+// Bitwise ternary logic
+let TargetPrefix = "x86" in {
+  def int_x86_avx512_mask_pternlog_d_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogd128_mask">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_d_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">,
+          Intrinsic<[llvm_v4i32_ty],
+                    [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pternlog_d_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogd256_mask">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_d_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">,
+          Intrinsic<[llvm_v8i32_ty],
+                    [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pternlog_d_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogd512_mask">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
+                     llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_d_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">,
+          Intrinsic<[llvm_v16i32_ty],
+                    [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
+                     llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pternlog_q_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogq128_mask">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_q_128 :
+          GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">,
+          Intrinsic<[llvm_v2i64_ty],
+                    [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pternlog_q_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogq256_mask">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_q_256 :
+          GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">,
+          Intrinsic<[llvm_v4i64_ty],
+                    [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_pternlog_q_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogq512_mask">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_maskz_pternlog_q_512 :
+          GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">,
+          Intrinsic<[llvm_v8i64_ty],
+                    [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
+                     llvm_i8_ty], [IntrNoMem]>;
+}
+
 // Misc.
 let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_cmp_ps_512 :
@@ -5314,6 +7504,14 @@ let TargetPrefix = "x86" in {
         GCCBuiltin<"__builtin_ia32_cmppd128_mask">,
             Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                        llvm_i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_cmp_ss :
+        GCCBuiltin<"__builtin_ia32_cmpss_mask">,
+              Intrinsic<[llvm_i8_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_cmp_sd :
+        GCCBuiltin<"__builtin_ia32_cmpsd_mask">,
+              Intrinsic<[llvm_i8_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_i32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
 
   def int_x86_avx512_movntdqa :
         GCCBuiltin<"__builtin_ia32_movntdqa512">,
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index e6c22090ab6d..c546fc3d1ee0 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_IR_LLVMCONTEXT_H
 #define LLVM_IR_LLVMCONTEXT_H
 
-#include "llvm-c/Core.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Options.h"
@@ -60,7 +59,20 @@ public:
     MD_mem_parallel_loop_access = 10, // "llvm.mem.parallel_loop_access"
     MD_nonnull = 11, // "nonnull"
     MD_dereferenceable = 12, // "dereferenceable"
-    MD_dereferenceable_or_null = 13 // "dereferenceable_or_null"
+    MD_dereferenceable_or_null = 13, // "dereferenceable_or_null"
+    MD_make_implicit = 14, // "make.implicit"
+    MD_unpredictable = 15, // "unpredictable"
+    MD_invariant_group = 16, // "invariant.group"
+    MD_align = 17 // "align"
+  };
+
+  /// Known operand bundle tag IDs, which always have the same value.  All
+  /// operand bundle tags that LLVM has special knowledge of are listed here.
+  /// Additionally, this scheme allows LLVM to efficiently check for specific
+  /// operand bundle tags without comparing strings.
+  enum {
+    OB_deopt = 0,   // "deopt"
+    OB_funclet = 1, // "funclet"
   };
 
   /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -71,6 +83,15 @@ public:
   /// custom metadata IDs registered in this LLVMContext.
   void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
 
+  /// getOperandBundleTags - Populate client supplied SmallVector with the
+  /// bundle tags registered in this LLVMContext.  The bundle tags are ordered
+  /// by increasing bundle IDs.
+  /// \see LLVMContext::getOperandBundleTagID
+  void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
+
+  /// getOperandBundleTagID - Maps a bundle tag to an integer ID.  Every bundle
+  /// tag registered with an LLVMContext has an unique ID.
+  uint32_t getOperandBundleTagID(StringRef Tag) const;
 
   typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
                                          unsigned LocCookie);
diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h
index 7f7889ad5fb3..b8e33478d6a9 100644
--- a/include/llvm/IR/LegacyPassManagers.h
+++ b/include/llvm/IR/LegacyPassManagers.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Pass.h"
@@ -118,6 +119,7 @@ class PassManagerPrettyStackEntry : public PrettyStackTraceEntry {
   Pass *P;
   Value *V;
   Module *M;
+
 public:
   explicit PassManagerPrettyStackEntry(Pass *p)
     : P(p), V(nullptr), M(nullptr) {}  // When P is releaseMemory'd.
@@ -130,7 +132,6 @@ public:
   void print(raw_ostream &OS) const override;
 };
 
-
 //===----------------------------------------------------------------------===//
 // PMStack
 //
@@ -158,7 +159,6 @@ private:
   std::vector<PMDataManager *> S;
 };
 
-
 //===----------------------------------------------------------------------===//
 // PMTopLevelManager
 //
@@ -204,10 +204,7 @@ public:
   virtual ~PMTopLevelManager();
 
   /// Add immutable pass and initialize it.
-  inline void addImmutablePass(ImmutablePass *P) {
-    P->initializePass();
-    ImmutablePasses.push_back(P);
-  }
+  void addImmutablePass(ImmutablePass *P);
 
   inline SmallVectorImpl<ImmutablePass *>& getImmutablePasses() {
     return ImmutablePasses;
@@ -231,12 +228,10 @@ public:
   PMStack activeStack;
 
 protected:
-
   /// Collection of pass managers
   SmallVector<PMDataManager *, 8> PassManagers;
 
 private:
-
   /// Collection of pass managers that are not directly maintained
   /// by this pass manager
   SmallVector<PMDataManager *, 8> IndirectPassManagers;
@@ -253,7 +248,46 @@ private:
   /// Immutable passes are managed by top level manager.
   SmallVector<ImmutablePass *, 16> ImmutablePasses;
 
-  DenseMap<Pass *, AnalysisUsage *> AnUsageMap;
+  /// Map from ID to immutable passes.
+  SmallDenseMap<AnalysisID, ImmutablePass *, 8> ImmutablePassMap;
+
+
+  /// A wrapper around AnalysisUsage for the purpose of uniqueing.  The wrapper
+  /// is used to avoid needing to make AnalysisUsage itself a folding set node.
+  struct AUFoldingSetNode : public FoldingSetNode {
+    AnalysisUsage AU;
+    AUFoldingSetNode(const AnalysisUsage &AU) : AU(AU) {}
+    void Profile(FoldingSetNodeID &ID) const {
+      Profile(ID, AU);
+    }
+    static void Profile(FoldingSetNodeID &ID, const AnalysisUsage &AU) {
+      // TODO: We could consider sorting the dependency arrays within the
+      // AnalysisUsage (since they are conceptually unordered).
+      ID.AddBoolean(AU.getPreservesAll());
+      auto ProfileVec = [&](const SmallVectorImpl<AnalysisID>& Vec) {
+        ID.AddInteger(Vec.size());
+        for(AnalysisID AID : Vec)
+          ID.AddPointer(AID);
+      };
+      ProfileVec(AU.getRequiredSet());
+      ProfileVec(AU.getRequiredTransitiveSet());
+      ProfileVec(AU.getPreservedSet());
+      ProfileVec(AU.getUsedSet());
+    }
+  };
+
+  // Contains all of the unique combinations of AnalysisUsage.  This is helpful
+  // when we have multiple instances of the same pass since they'll usually
+  // have the same analysis usage and can share storage.
+  FoldingSet<AUFoldingSetNode> UniqueAnalysisUsages;
+  
+  // Allocator used for allocating UAFoldingSetNodes.  This handles deletion of
+  // all allocated nodes in one fell swoop.
+  SpecificBumpPtrAllocator<AUFoldingSetNode> AUFoldingSetNodeAllocator;
+  
+  // Maps from a pass to it's associated entry in UniqueAnalysisUsages.  Does
+  // not own the storage associated with either key or value.. 
+  DenseMap<Pass *, AnalysisUsage*> AnUsageMap;
 
   /// Collection of PassInfo objects found via analysis IDs and in this top
   /// level manager. This is used to memoize queries to the pass registry.
@@ -262,8 +296,6 @@ private:
   mutable DenseMap<AnalysisID, const PassInfo *> AnalysisPassInfos;
 };
 
-
-
 //===----------------------------------------------------------------------===//
 // PMDataManager
 
@@ -271,7 +303,6 @@ private:
 /// used by pass managers.
 class PMDataManager {
 public:
-
   explicit PMDataManager() : TPM(nullptr), Depth(0) {
     initializeAnalysisInfo();
   }
@@ -319,13 +350,12 @@ public:
   // passes that are managed by this manager.
   bool preserveHigherLevelAnalysis(Pass *P);
 
-
-  /// Populate RequiredPasses with analysis pass that are required by
-  /// pass P and are available. Populate ReqPassNotAvailable with analysis
-  /// pass that are required by pass P but are not available.
-  void collectRequiredAnalysis(SmallVectorImpl<Pass *> &RequiredPasses,
-                               SmallVectorImpl<AnalysisID> &ReqPassNotAvailable,
-                               Pass *P);
+  /// Populate UsedPasses with analysis pass that are used or required by pass
+  /// P and are available. Populate ReqPassNotAvailable with analysis pass that
+  /// are required by pass P but are not available.
+  void collectRequiredAndUsedAnalyses(
+      SmallVectorImpl<Pass *> &UsedPasses,
+      SmallVectorImpl<AnalysisID> &ReqPassNotAvailable, Pass *P);
 
   /// All Required analyses should be available to the pass as it runs!  Here
   /// we fill in the AnalysisImpls member of the pass so that it can
@@ -351,6 +381,7 @@ public:
                     enum PassDebuggingString S2, StringRef Msg);
   void dumpRequiredSet(const Pass *P) const;
   void dumpPreservedSet(const Pass *P) const;
+  void dumpUsedSet(const Pass *P) const;
 
   unsigned getNumContainedPasses() const {
     return (unsigned)PassVector.size();
@@ -374,7 +405,6 @@ public:
   }
 
 protected:
-
   // Top level manager.
   PMTopLevelManager *TPM;
 
@@ -439,9 +469,9 @@ public:
 
   /// doFinalization - Overrides ModulePass doFinalization for global
   /// finalization tasks
-  /// 
+  ///
   using ModulePass::doFinalization;
-  
+
   /// doFinalization - Run all of the finalizers for the function passes.
   ///
   bool doFinalization(Module &M) override;
@@ -473,7 +503,6 @@ public:
 };
 
 Timer *getPassTimer(Pass *);
-
 }
 
 #endif
diff --git a/include/llvm/IR/MDBuilder.h b/include/llvm/IR/MDBuilder.h
index ceb1c736e5c7..35341e3271ff 100644
--- a/include/llvm/IR/MDBuilder.h
+++ b/include/llvm/IR/MDBuilder.h
@@ -60,6 +60,9 @@ public:
   /// \brief Return metadata containing a number of branch weights.
   MDNode *createBranchWeights(ArrayRef<uint32_t> Weights);
 
+  /// Return metadata specifying that a branch or switch is unpredictable.
+  MDNode *createUnpredictable();
+
   /// Return metadata containing the entry count for a function.
   MDNode *createFunctionEntryCount(uint64_t Count);
 
diff --git a/include/llvm/IR/Mangler.h b/include/llvm/IR/Mangler.h
index b72b259097c3..ea2f0c3f09f3 100644
--- a/include/llvm/IR/Mangler.h
+++ b/include/llvm/IR/Mangler.h
@@ -15,12 +15,12 @@
 #define LLVM_IR_MANGLER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
 class DataLayout;
-class GlobalValue;
 template <typename T> class SmallVectorImpl;
 class Twine;
 
diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def
index 857e4637d1e4..b1d22178e262 100644
--- a/include/llvm/IR/Metadata.def
+++ b/include/llvm/IR/Metadata.def
@@ -13,7 +13,8 @@
 
 #if !(defined HANDLE_METADATA || defined HANDLE_METADATA_LEAF ||               \
       defined HANDLE_METADATA_BRANCH || defined HANDLE_MDNODE_LEAF ||          \
-      defined HANDLE_MDNODE_BRANCH ||                                          \
+      defined HANDLE_MDNODE_LEAF_UNIQUABLE || defined HANDLE_MDNODE_BRANCH ||  \
+      defined HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE ||                      \
       defined HANDLE_SPECIALIZED_MDNODE_LEAF ||                                \
       defined HANDLE_SPECIALIZED_MDNODE_BRANCH)
 #error "Missing macro definition of HANDLE_METADATA*"
@@ -34,6 +35,24 @@
 #define HANDLE_METADATA_BRANCH(CLASS) HANDLE_METADATA(CLASS)
 #endif
 
+// Handler for specialized and uniquable leaf nodes under MDNode.  Defers to
+// HANDLE_MDNODE_LEAF_UNIQUABLE if it's defined, otherwise to
+// HANDLE_SPECIALIZED_MDNODE_LEAF.
+#ifndef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
+#ifdef HANDLE_MDNODE_LEAF_UNIQUABLE
+#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS)                        \
+  HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)
+#else
+#define HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(CLASS)                        \
+  HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)
+#endif
+#endif
+
+// Handler for leaf nodes under MDNode.
+#ifndef HANDLE_MDNODE_LEAF_UNIQUABLE
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS) HANDLE_MDNODE_LEAF(CLASS)
+#endif
+
 // Handler for leaf nodes under MDNode.
 #ifndef HANDLE_MDNODE_LEAF
 #define HANDLE_MDNODE_LEAF(CLASS) HANDLE_METADATA_LEAF(CLASS)
@@ -59,43 +78,46 @@ HANDLE_METADATA_BRANCH(ValueAsMetadata)
 HANDLE_METADATA_LEAF(ConstantAsMetadata)
 HANDLE_METADATA_LEAF(LocalAsMetadata)
 HANDLE_MDNODE_BRANCH(MDNode)
-HANDLE_MDNODE_LEAF(MDTuple)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILocation)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIExpression)
+HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode)
-HANDLE_SPECIALIZED_MDNODE_LEAF(GenericDINode)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubrange)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIEnumerator)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubrange)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIEnumerator)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIScope)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIBasicType)
-HANDLE_SPECIALIZED_MDNODE_BRANCH(DIDerivedTypeBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIDerivedType)
-HANDLE_SPECIALIZED_MDNODE_BRANCH(DICompositeTypeBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DICompositeType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubroutineType)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIBasicType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIDerivedType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DICompositeType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubroutineType)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIFile)
 HANDLE_SPECIALIZED_MDNODE_LEAF(DICompileUnit)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DILocalScope)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DISubprogram)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubprogram)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DILexicalBlockBase)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlock)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILexicalBlockFile)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DINamespace)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIModule)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlock)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILexicalBlockFile)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DINamespace)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIModule)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DITemplateParameter)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateTypeParameter)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DITemplateValueParameter)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateTypeParameter)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DITemplateValueParameter)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DIVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIGlobalVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DILocalVariable)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIObjCProperty)
-HANDLE_SPECIALIZED_MDNODE_LEAF(DIImportedEntity)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariable)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity)
+HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile)
 
 #undef HANDLE_METADATA
 #undef HANDLE_METADATA_LEAF
 #undef HANDLE_METADATA_BRANCH
 #undef HANDLE_MDNODE_LEAF
+#undef HANDLE_MDNODE_LEAF_UNIQUABLE
 #undef HANDLE_MDNODE_BRANCH
 #undef HANDLE_SPECIALIZED_MDNODE_LEAF
+#undef HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE
 #undef HANDLE_SPECIALIZED_MDNODE_BRANCH
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index c639625bf16c..2ea591383f82 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -18,10 +18,11 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Constant.h"
-#include "llvm/IR/MetadataTracking.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <type_traits>
@@ -32,9 +33,6 @@ class LLVMContext;
 class Module;
 class ModuleSlotTracker;
 
-template<typename ValueSubClass, typename ItemParentClass>
-  class SymbolTableListTraits;
-
 enum LLVMConstants : uint32_t {
   DEBUG_METADATA_VERSION = 3 // Current debug info version number.
 };
@@ -86,7 +84,9 @@ public:
     DIImportedEntityKind,
     ConstantAsMetadataKind,
     LocalAsMetadataKind,
-    MDStringKind
+    MDStringKind,
+    DIMacroKind,
+    DIMacroFileKind
   };
 
 protected:
@@ -126,9 +126,10 @@ public:
   /// If \c M is provided, metadata nodes will be numbered canonically;
   /// otherwise, pointer addresses are substituted.
   /// @{
-  void print(raw_ostream &OS, const Module *M = nullptr) const;
-  void print(raw_ostream &OS, ModuleSlotTracker &MST,
-             const Module *M = nullptr) const;
+  void print(raw_ostream &OS, const Module *M = nullptr,
+             bool IsForDebug = false) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST, const Module *M = nullptr,
+             bool IsForDebug = false) const;
   /// @}
 
   /// \brief Print as operand.
@@ -196,6 +197,77 @@ private:
   void untrack();
 };
 
+/// \brief API for tracking metadata references through RAUW and deletion.
+///
+/// Shared API for updating \a Metadata pointers in subclasses that support
+/// RAUW.
+///
+/// This API is not meant to be used directly.  See \a TrackingMDRef for a
+/// user-friendly tracking reference.
+class MetadataTracking {
+public:
+  /// \brief Track the reference to metadata.
+  ///
+  /// Register \c MD with \c *MD, if the subclass supports tracking.  If \c *MD
+  /// gets RAUW'ed, \c MD will be updated to the new address.  If \c *MD gets
+  /// deleted, \c MD will be set to \c nullptr.
+  ///
+  /// If tracking isn't supported, \c *MD will not change.
+  ///
+  /// \return true iff tracking is supported by \c MD.
+  static bool track(Metadata *&MD) {
+    return track(&MD, *MD, static_cast<Metadata *>(nullptr));
+  }
+
+  /// \brief Track the reference to metadata for \a Metadata.
+  ///
+  /// As \a track(Metadata*&), but with support for calling back to \c Owner to
+  /// tell it that its operand changed.  This could trigger \c Owner being
+  /// re-uniqued.
+  static bool track(void *Ref, Metadata &MD, Metadata &Owner) {
+    return track(Ref, MD, &Owner);
+  }
+
+  /// \brief Track the reference to metadata for \a MetadataAsValue.
+  ///
+  /// As \a track(Metadata*&), but with support for calling back to \c Owner to
+  /// tell it that its operand changed.  This could trigger \c Owner being
+  /// re-uniqued.
+  static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) {
+    return track(Ref, MD, &Owner);
+  }
+
+  /// \brief Stop tracking a reference to metadata.
+  ///
+  /// Stops \c *MD from tracking \c MD.
+  static void untrack(Metadata *&MD) { untrack(&MD, *MD); }
+  static void untrack(void *Ref, Metadata &MD);
+
+  /// \brief Move tracking from one reference to another.
+  ///
+  /// Semantically equivalent to \c untrack(MD) followed by \c track(New),
+  /// except that ownership callbacks are maintained.
+  ///
+  /// Note: it is an error if \c *MD does not equal \c New.
+  ///
+  /// \return true iff tracking is supported by \c MD.
+  static bool retrack(Metadata *&MD, Metadata *&New) {
+    return retrack(&MD, *MD, &New);
+  }
+  static bool retrack(void *Ref, Metadata &MD, void *New);
+
+  /// \brief Check whether metadata is replaceable.
+  static bool isReplaceable(const Metadata &MD);
+
+  typedef PointerUnion<MetadataAsValue *, Metadata *> OwnerTy;
+
+private:
+  /// \brief Track a reference to metadata for an owner.
+  ///
+  /// Generalized version of tracking.
+  static bool track(void *Ref, Metadata &MD, OwnerTy Owner);
+};
+
 /// \brief Shared implementation of use-lists for replaceable metadata.
 ///
 /// Most metadata cannot be RAUW'ed.  This is a shared implementation of
@@ -572,10 +644,12 @@ struct AAMDNodes {
 template<>
 struct DenseMapInfo<AAMDNodes> {
   static inline AAMDNodes getEmptyKey() {
-    return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(), 0, 0);
+    return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
+                     nullptr, nullptr);
   }
   static inline AAMDNodes getTombstoneKey() {
-    return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(), 0, 0);
+    return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
+                     nullptr, nullptr);
   }
   static unsigned getHashValue(const AAMDNodes &Val) {
     return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
@@ -830,10 +904,11 @@ public:
   /// \brief Resolve cycles.
   ///
   /// Once all forward declarations have been resolved, force cycles to be
-  /// resolved.
+  /// resolved. If \p MDMaterialized is true, then any temporary metadata
+  /// is ignored, otherwise it asserts when encountering temporary metadata.
   ///
   /// \pre No operands (or operands' operands, etc.) have \a isTemporary().
-  void resolveCycles();
+  void resolveCycles(bool MDMaterialized = true);
 
   /// \brief Replace a temporary node with a permanent one.
   ///
@@ -881,6 +956,7 @@ protected:
   void storeDistinctInContext();
   template <class T, class StoreT>
   static T *storeImpl(T *N, StorageType Storage, StoreT &Store);
+  template <class T> static T *storeImpl(T *N, StorageType Storage);
 
 private:
   void handleChangedOperand(void *Ref, Metadata *New);
@@ -913,13 +989,13 @@ private:
     N->recalculateHash();
   }
   template <class NodeTy>
-  static void dispatchRecalculateHash(NodeTy *N, std::false_type) {}
+  static void dispatchRecalculateHash(NodeTy *, std::false_type) {}
   template <class NodeTy>
   static void dispatchResetHash(NodeTy *N, std::true_type) {
     N->setHash(0);
   }
   template <class NodeTy>
-  static void dispatchResetHash(NodeTy *N, std::false_type) {}
+  static void dispatchResetHash(NodeTy *, std::false_type) {}
 
 public:
   typedef const MDOperand *op_iterator;
@@ -963,6 +1039,8 @@ public:
   static MDNode *getMostGenericFPMath(MDNode *A, MDNode *B);
   static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
   static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);
+  static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B);
+
 };
 
 /// \brief Tuple of metadata.
@@ -1125,7 +1203,6 @@ public:
 ///
 /// TODO: Inherit from Metadata.
 class NamedMDNode : public ilist_node<NamedMDNode> {
-  friend class SymbolTableListTraits<NamedMDNode, Module>;
   friend struct ilist_traits<NamedMDNode>;
   friend class LLVMContextImpl;
   friend class Module;
@@ -1193,7 +1270,7 @@ public:
   void addOperand(MDNode *M);
   void setOperand(unsigned I, MDNode *New);
   StringRef getName() const;
-  void print(raw_ostream &ROS) const;
+  void print(raw_ostream &ROS, bool IsForDebug = false) const;
   void dump() const;
 
   // ---------------------------------------------------------------------------
@@ -1208,13 +1285,13 @@ public:
   const_op_iterator op_end()   const { return const_op_iterator(this, getNumOperands()); }
 
   inline iterator_range<op_iterator>  operands() {
-    return iterator_range<op_iterator>(op_begin(), op_end());
+    return make_range(op_begin(), op_end());
   }
   inline iterator_range<const_op_iterator> operands() const {
-    return iterator_range<const_op_iterator>(op_begin(), op_end());
+    return make_range(op_begin(), op_end());
   }
 };
 
 } // end llvm namespace
 
-#endif
+#endif // LLVM_IR_METADATA_H
diff --git a/include/llvm/IR/MetadataTracking.h b/include/llvm/IR/MetadataTracking.h
deleted file mode 100644
index 541d9b3b1245..000000000000
--- a/include/llvm/IR/MetadataTracking.h
+++ /dev/null
@@ -1,99 +0,0 @@
-//===- llvm/IR/MetadataTracking.h - Metadata tracking ---------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Low-level functions to enable tracking of metadata that could RAUW.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_IR_METADATATRACKING_H
-#define LLVM_IR_METADATATRACKING_H
-
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/Support/Casting.h"
-#include <type_traits>
-
-namespace llvm {
-
-class Metadata;
-class MetadataAsValue;
-
-/// \brief API for tracking metadata references through RAUW and deletion.
-///
-/// Shared API for updating \a Metadata pointers in subclasses that support
-/// RAUW.
-///
-/// This API is not meant to be used directly.  See \a TrackingMDRef for a
-/// user-friendly tracking reference.
-class MetadataTracking {
-public:
-  /// \brief Track the reference to metadata.
-  ///
-  /// Register \c MD with \c *MD, if the subclass supports tracking.  If \c *MD
-  /// gets RAUW'ed, \c MD will be updated to the new address.  If \c *MD gets
-  /// deleted, \c MD will be set to \c nullptr.
-  ///
-  /// If tracking isn't supported, \c *MD will not change.
-  ///
-  /// \return true iff tracking is supported by \c MD.
-  static bool track(Metadata *&MD) {
-    return track(&MD, *MD, static_cast<Metadata *>(nullptr));
-  }
-
-  /// \brief Track the reference to metadata for \a Metadata.
-  ///
-  /// As \a track(Metadata*&), but with support for calling back to \c Owner to
-  /// tell it that its operand changed.  This could trigger \c Owner being
-  /// re-uniqued.
-  static bool track(void *Ref, Metadata &MD, Metadata &Owner) {
-    return track(Ref, MD, &Owner);
-  }
-
-  /// \brief Track the reference to metadata for \a MetadataAsValue.
-  ///
-  /// As \a track(Metadata*&), but with support for calling back to \c Owner to
-  /// tell it that its operand changed.  This could trigger \c Owner being
-  /// re-uniqued.
-  static bool track(void *Ref, Metadata &MD, MetadataAsValue &Owner) {
-    return track(Ref, MD, &Owner);
-  }
-
-  /// \brief Stop tracking a reference to metadata.
-  ///
-  /// Stops \c *MD from tracking \c MD.
-  static void untrack(Metadata *&MD) { untrack(&MD, *MD); }
-  static void untrack(void *Ref, Metadata &MD);
-
-  /// \brief Move tracking from one reference to another.
-  ///
-  /// Semantically equivalent to \c untrack(MD) followed by \c track(New),
-  /// except that ownership callbacks are maintained.
-  ///
-  /// Note: it is an error if \c *MD does not equal \c New.
-  ///
-  /// \return true iff tracking is supported by \c MD.
-  static bool retrack(Metadata *&MD, Metadata *&New) {
-    return retrack(&MD, *MD, &New);
-  }
-  static bool retrack(void *Ref, Metadata &MD, void *New);
-
-  /// \brief Check whether metadata is replaceable.
-  static bool isReplaceable(const Metadata &MD);
-
-  typedef PointerUnion<MetadataAsValue *, Metadata *> OwnerTy;
-
-private:
-  /// \brief Track a reference to metadata for an owner.
-  ///
-  /// Generalized version of tracking.
-  static bool track(void *Ref, Metadata &MD, OwnerTy Owner);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h
index 1668b95c8bd1..942f68543cb6 100644
--- a/include/llvm/IR/Module.h
+++ b/include/llvm/IR/Module.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_IR_MODULE_H
 #define LLVM_IR_MODULE_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Comdat.h"
 #include "llvm/IR/DataLayout.h"
@@ -34,54 +35,6 @@ class LLVMContext;
 class RandomNumberGenerator;
 class StructType;
 
-template<> struct ilist_traits<Function>
-  : public SymbolTableListTraits<Function, Module> {
-
-  // createSentinel is used to get hold of the node that marks the end of the
-  // list... (same trick used here as in ilist_traits<Instruction>)
-  Function *createSentinel() const {
-    return static_cast<Function*>(&Sentinel);
-  }
-  static void destroySentinel(Function*) {}
-
-  Function *provideInitialHead() const { return createSentinel(); }
-  Function *ensureHead(Function*) const { return createSentinel(); }
-  static void noteHead(Function*, Function*) {}
-
-private:
-  mutable ilist_node<Function> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalVariable>
-  : public SymbolTableListTraits<GlobalVariable, Module> {
-  // createSentinel is used to create a node that marks the end of the list.
-  GlobalVariable *createSentinel() const {
-    return static_cast<GlobalVariable*>(&Sentinel);
-  }
-  static void destroySentinel(GlobalVariable*) {}
-
-  GlobalVariable *provideInitialHead() const { return createSentinel(); }
-  GlobalVariable *ensureHead(GlobalVariable*) const { return createSentinel(); }
-  static void noteHead(GlobalVariable*, GlobalVariable*) {}
-private:
-  mutable ilist_node<GlobalVariable> Sentinel;
-};
-
-template<> struct ilist_traits<GlobalAlias>
-  : public SymbolTableListTraits<GlobalAlias, Module> {
-  // createSentinel is used to create a node that marks the end of the list.
-  GlobalAlias *createSentinel() const {
-    return static_cast<GlobalAlias*>(&Sentinel);
-  }
-  static void destroySentinel(GlobalAlias*) {}
-
-  GlobalAlias *provideInitialHead() const { return createSentinel(); }
-  GlobalAlias *ensureHead(GlobalAlias*) const { return createSentinel(); }
-  static void noteHead(GlobalAlias*, GlobalAlias*) {}
-private:
-  mutable ilist_node<GlobalAlias> Sentinel;
-};
-
 template<> struct ilist_traits<NamedMDNode>
   : public ilist_default_traits<NamedMDNode> {
   // createSentinel is used to get hold of a node that marks the end of
@@ -96,6 +49,7 @@ template<> struct ilist_traits<NamedMDNode>
   static void noteHead(NamedMDNode*, NamedMDNode*) {}
   void addNodeToList(NamedMDNode *) {}
   void removeNodeFromList(NamedMDNode *) {}
+
 private:
   mutable ilist_node<NamedMDNode> Sentinel;
 };
@@ -116,11 +70,11 @@ class Module {
 /// @{
 public:
   /// The type for the list of global variables.
-  typedef iplist<GlobalVariable> GlobalListType;
+  typedef SymbolTableList<GlobalVariable> GlobalListType;
   /// The type for the list of functions.
-  typedef iplist<Function> FunctionListType;
+  typedef SymbolTableList<Function> FunctionListType;
   /// The type for the list of aliases.
-  typedef iplist<GlobalAlias> AliasListType;
+  typedef SymbolTableList<GlobalAlias> AliasListType;
   /// The type for the list of named metadata.
   typedef ilist<NamedMDNode> NamedMDListType;
   /// The type of the comdat "symbol" table.
@@ -328,6 +282,11 @@ public:
   /// registered in this LLVMContext.
   void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
 
+  /// Populate client supplied SmallVector with the bundle tags registered in
+  /// this LLVMContext.  The bundle tags are ordered by increasing bundle IDs.
+  /// \see LLVMContext::getOperandBundleTagID
+  void getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const;
+
   /// Return the type with the specified name, or null if there is none by that
   /// name.
   StructType *getTypeByName(StringRef Name) const;
@@ -472,7 +431,7 @@ public:
 
   /// Sets the GVMaterializer to GVM. This module must not yet have a
   /// Materializer. To reset the materializer for a module that already has one,
-  /// call MaterializeAllPermanently first. Destroying this module will destroy
+  /// call materializeAll first. Destroying this module will destroy
   /// its materializer without materializing any more GlobalValues. Without
   /// destroying the Module, there is no way to detach or destroy a materializer
   /// without materializing all the GVs it controls, to avoid leaving orphan
@@ -480,27 +439,16 @@ public:
   void setMaterializer(GVMaterializer *GVM);
   /// Retrieves the GVMaterializer, if any, for this Module.
   GVMaterializer *getMaterializer() const { return Materializer.get(); }
-
-  /// Returns true if this GV was loaded from this Module's GVMaterializer and
-  /// the GVMaterializer knows how to dematerialize the GV.
-  bool isDematerializable(const GlobalValue *GV) const;
+  bool isMaterialized() const { return !getMaterializer(); }
 
   /// Make sure the GlobalValue is fully read. If the module is corrupt, this
   /// returns true and fills in the optional string with information about the
   /// problem. If successful, this returns false.
   std::error_code materialize(GlobalValue *GV);
-  /// If the GlobalValue is read in, and if the GVMaterializer supports it,
-  /// release the memory for the function, and set it up to be materialized
-  /// lazily. If !isDematerializable(), this method is a no-op.
-  void dematerialize(GlobalValue *GV);
-
-  /// Make sure all GlobalValues in this Module are fully read.
-  std::error_code materializeAll();
 
   /// Make sure all GlobalValues in this Module are fully read and clear the
-  /// Materializer. If the module is corrupt, this DOES NOT clear the old
   /// Materializer.
-  std::error_code materializeAllPermanently();
+  std::error_code materializeAll();
 
   std::error_code materializeMetadata();
 
@@ -556,10 +504,10 @@ public:
   bool                  global_empty() const { return GlobalList.empty(); }
 
   iterator_range<global_iterator> globals() {
-    return iterator_range<global_iterator>(global_begin(), global_end());
+    return make_range(global_begin(), global_end());
   }
   iterator_range<const_global_iterator> globals() const {
-    return iterator_range<const_global_iterator>(global_begin(), global_end());
+    return make_range(global_begin(), global_end());
   }
 
 /// @}
@@ -578,10 +526,10 @@ public:
   bool                    empty() const { return FunctionList.empty(); }
 
   iterator_range<iterator> functions() {
-    return iterator_range<iterator>(begin(), end());
+    return make_range(begin(), end());
   }
   iterator_range<const_iterator> functions() const {
-    return iterator_range<const_iterator>(begin(), end());
+    return make_range(begin(), end());
   }
 
 /// @}
@@ -596,10 +544,10 @@ public:
   bool                 alias_empty() const      { return AliasList.empty(); }
 
   iterator_range<alias_iterator> aliases() {
-    return iterator_range<alias_iterator>(alias_begin(), alias_end());
+    return make_range(alias_begin(), alias_end());
   }
   iterator_range<const_alias_iterator> aliases() const {
-    return iterator_range<const_alias_iterator>(alias_begin(), alias_end());
+    return make_range(alias_begin(), alias_end());
   }
 
 /// @}
@@ -620,12 +568,10 @@ public:
   bool named_metadata_empty() const { return NamedMDList.empty(); }
 
   iterator_range<named_metadata_iterator> named_metadata() {
-    return iterator_range<named_metadata_iterator>(named_metadata_begin(),
-                                                   named_metadata_end());
+    return make_range(named_metadata_begin(), named_metadata_end());
   }
   iterator_range<const_named_metadata_iterator> named_metadata() const {
-    return iterator_range<const_named_metadata_iterator>(named_metadata_begin(),
-                                                         named_metadata_end());
+    return make_range(named_metadata_begin(), named_metadata_end());
   }
 
   /// Destroy ConstantArrays in LLVMContext if they are not used.
@@ -646,11 +592,12 @@ public:
   /// uselistorder directives so that use-lists can be recreated when reading
   /// the assembly.
   void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW,
-             bool ShouldPreserveUseListOrder = false) const;
+             bool ShouldPreserveUseListOrder = false,
+             bool IsForDebug = false) const;
 
   /// Dump the module to stderr (for debugging).
   void dump() const;
-  
+
   /// This function causes all the subinstructions to "let go" of all references
   /// that they are maintaining.  This allows one to 'delete' a whole class at
   /// a time, even though there may be circular references... first all
@@ -666,6 +613,10 @@ public:
   /// \brief Returns the Dwarf Version by checking module flags.
   unsigned getDwarfVersion() const;
 
+  /// \brief Returns the CodeView Version by checking module flags.
+  /// Returns zero if not present in module.
+  unsigned getCodeViewFlag() const;
+
 /// @}
 /// @name Utility functions for querying and setting PIC level
 /// @{
@@ -676,6 +627,16 @@ public:
   /// \brief Set the PIC level (small or large model)
   void setPICLevel(PICLevel::Level PL);
 /// @}
+
+  /// @name Utility functions for querying and setting PGO counts
+  /// @{
+
+  /// \brief Set maximum function count in PGO mode
+  void setMaximumFunctionCount(uint64_t);
+
+  /// \brief Returns maximum function count in PGO mode
+  Optional<uint64_t> getMaximumFunctionCount();
+  /// @}
 };
 
 /// An raw_ostream inserter for modules.
@@ -693,7 +654,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module, LLVMModuleRef)
 inline Module *unwrap(LLVMModuleProviderRef MP) {
   return reinterpret_cast<Module*>(MP);
 }
-  
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/ModuleSlotTracker.h b/include/llvm/IR/ModuleSlotTracker.h
index c37dcecf8e40..49730a66bdf6 100644
--- a/include/llvm/IR/ModuleSlotTracker.h
+++ b/include/llvm/IR/ModuleSlotTracker.h
@@ -17,6 +17,7 @@ namespace llvm {
 class Module;
 class Function;
 class SlotTracker;
+class Value;
 
 /// Manage lifetime of a slot tracker for printing IR.
 ///
@@ -61,6 +62,13 @@ public:
   /// Purge the currently incorporated function and incorporate \c F.  If \c F
   /// is currently incorporated, this is a no-op.
   void incorporateFunction(const Function &F);
+
+  /// Return the slot number of the specified local value.
+  ///
+  /// A function that defines this value should be incorporated prior to calling
+  /// this method.
+  /// Return -1 if the value is not in the function's SlotTracker.
+  int getLocalSlot(const Value *V);
 };
 
 } // end namespace llvm
diff --git a/include/llvm/IR/PassManager.h b/include/llvm/IR/PassManager.h
index 4166babd63e5..2ceb53d21b7a 100644
--- a/include/llvm/IR/PassManager.h
+++ b/include/llvm/IR/PassManager.h
@@ -203,7 +203,8 @@ public:
 
     for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
       if (DebugLogging)
-        dbgs() << "Running pass: " << Passes[Idx]->name() << "\n";
+        dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
+               << IR.getName() << "\n";
 
       PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM);
 
diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h
index 41154e6441a9..f4d7d8c44416 100644
--- a/include/llvm/IR/PatternMatch.h
+++ b/include/llvm/IR/PatternMatch.h
@@ -1272,6 +1272,46 @@ inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
   return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
 }
 
+template <typename Opnd_t> struct Signum_match {
+  Opnd_t Val;
+  Signum_match(const Opnd_t &V) : Val(V) {}
+
+  template <typename OpTy> bool match(OpTy *V) {
+    unsigned TypeSize = V->getType()->getScalarSizeInBits();
+    if (TypeSize == 0)
+      return false;
+
+    unsigned ShiftWidth = TypeSize - 1;
+    Value *OpL = nullptr, *OpR = nullptr;
+
+    // This is the representation of signum we match:
+    //
+    //  signum(x) == (x >> 63) | (-x >>u 63)
+    //
+    // An i1 value is its own signum, so it's correct to match
+    //
+    //  signum(x) == (x >> 0)  | (-x >>u 0)
+    //
+    // for i1 values.
+
+    auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
+    auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
+    auto Signum = m_Or(LHS, RHS);
+
+    return Signum.match(V) && OpL == OpR && Val.match(OpL);
+  }
+};
+
+/// \brief Matches a signum pattern.
+///
+/// signum(x) =
+///      x >  0  ->  1
+///      x == 0  ->  0
+///      x <  0  -> -1
+template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
+  return Signum_match<Val_t>(V);
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index 4ab1f8497adb..7310c5697a7e 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -173,7 +173,7 @@ public:
 
   /// range adapter for call arguments
   iterator_range<arg_iterator> call_args() const {
-    return iterator_range<arg_iterator>(arg_begin(), arg_end());
+    return make_range(arg_begin(), arg_end());
   }
 
   /// \brief Return true if the call or the callee has the given attribute.
@@ -201,8 +201,7 @@ public:
 
   /// range adapter for GC transition arguments
   iterator_range<arg_iterator> gc_transition_args() const {
-    return iterator_range<arg_iterator>(gc_transition_args_begin(),
-                                        gc_transition_args_end());
+    return make_range(gc_transition_args_begin(), gc_transition_args_end());
   }
 
   /// Number of additional arguments excluding those intended
@@ -225,7 +224,7 @@ public:
 
   /// range adapter for vm state arguments
   iterator_range<arg_iterator> vm_state_args() const {
-    return iterator_range<arg_iterator>(vm_state_begin(), vm_state_end());
+    return make_range(vm_state_begin(), vm_state_end());
   }
 
   typename CallSiteTy::arg_iterator gc_args_begin() const {
@@ -235,9 +234,13 @@ public:
     return getCallSite().arg_end();
   }
 
+  unsigned gcArgsStartIdx() const {
+    return gc_args_begin() - getInstruction()->op_begin();
+  }
+
   /// range adapter for gc arguments
   iterator_range<arg_iterator> gc_args() const {
-    return iterator_range<arg_iterator>(gc_args_begin(), gc_args_end());
+    return make_range(gc_args_begin(), gc_args_end());
   }
 
   /// Get list of all gc reloactes linked to this statepoint
@@ -320,7 +323,7 @@ public:
   bool isTiedToInvoke() const {
     const Value *Token = RelocateCS.getArgument(0);
 
-    return isa<ExtractValueInst>(Token) || isa<InvokeInst>(Token);
+    return isa<LandingPadInst>(Token) || isa<InvokeInst>(Token);
   }
 
   /// Get enclosed relocate intrinsic
@@ -332,7 +335,7 @@ public:
 
     // This takes care both of relocates for call statepoints and relocates
     // on normal path of invoke statepoint.
-    if (!isa<ExtractValueInst>(Token)) {
+    if (!isa<LandingPadInst>(Token)) {
       return cast<Instruction>(Token);
     }
 
@@ -396,16 +399,10 @@ StatepointBase<FunTy, InstructionTy, ValueTy, CallSiteTy>::getRelocates()
   LandingPadInst *LandingPad =
       cast<InvokeInst>(getInstruction())->getLandingPadInst();
 
-  // Search for extract value from landingpad instruction to which
-  // gc relocates will be attached
+  // Search for gc relocates that are attached to this landingpad.
   for (const User *LandingPadUser : LandingPad->users()) {
-    if (!isa<ExtractValueInst>(LandingPadUser))
-      continue;
-
-    // gc relocates should be attached to this extract value
-    for (const User *U : LandingPadUser->users())
-      if (isGCRelocate(U))
-        Result.push_back(GCRelocateOperands(U));
+    if (isGCRelocate(LandingPadUser))
+      Result.push_back(GCRelocateOperands(LandingPadUser));
   }
   return Result;
 }
diff --git a/include/llvm/IR/SymbolTableListTraits.h b/include/llvm/IR/SymbolTableListTraits.h
index 0a5149c3d938..5fc48d10d63f 100644
--- a/include/llvm/IR/SymbolTableListTraits.h
+++ b/include/llvm/IR/SymbolTableListTraits.h
@@ -29,31 +29,66 @@
 
 namespace llvm {
 class ValueSymbolTable;
-  
-template<typename NodeTy> class ilist_iterator;
-template<typename NodeTy, typename Traits> class iplist;
-template<typename Ty> struct ilist_traits;
+
+template <typename NodeTy> class ilist_iterator;
+template <typename NodeTy, typename Traits> class iplist;
+template <typename Ty> struct ilist_traits;
+
+template <typename NodeTy>
+struct SymbolTableListSentinelTraits
+    : public ilist_embedded_sentinel_traits<NodeTy> {};
+
+/// Template metafunction to get the parent type for a symbol table list.
+///
+/// Implementations create a typedef called \c type so that we only need a
+/// single template parameter for the list and traits.
+template <typename NodeTy> struct SymbolTableListParentType {};
+class Argument;
+class BasicBlock;
+class Function;
+class Instruction;
+class GlobalVariable;
+class GlobalAlias;
+class Module;
+#define DEFINE_SYMBOL_TABLE_PARENT_TYPE(NODE, PARENT)                          \
+  template <> struct SymbolTableListParentType<NODE> { typedef PARENT type; };
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Instruction, BasicBlock)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(BasicBlock, Function)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Argument, Function)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(Function, Module)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalVariable, Module)
+DEFINE_SYMBOL_TABLE_PARENT_TYPE(GlobalAlias, Module)
+#undef DEFINE_SYMBOL_TABLE_PARENT_TYPE
+
+template <typename NodeTy> class SymbolTableList;
 
 // ValueSubClass   - The type of objects that I hold, e.g. Instruction.
 // ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
 //
-template<typename ValueSubClass, typename ItemParentClass>
-class SymbolTableListTraits : public ilist_default_traits<ValueSubClass> {
-  typedef ilist_traits<ValueSubClass> TraitsClass;
+template <typename ValueSubClass>
+class SymbolTableListTraits
+    : public ilist_nextprev_traits<ValueSubClass>,
+      public SymbolTableListSentinelTraits<ValueSubClass>,
+      public ilist_node_traits<ValueSubClass> {
+  typedef SymbolTableList<ValueSubClass> ListTy;
+  typedef
+      typename SymbolTableListParentType<ValueSubClass>::type ItemParentClass;
+
 public:
   SymbolTableListTraits() {}
 
+private:
   /// getListOwner - Return the object that owns this list.  If this is a list
   /// of instructions, it returns the BasicBlock that owns them.
   ItemParentClass *getListOwner() {
     size_t Offset(size_t(&((ItemParentClass*)nullptr->*ItemParentClass::
                            getSublistAccess(static_cast<ValueSubClass*>(nullptr)))));
-    iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this));
+    ListTy *Anchor(static_cast<ListTy *>(this));
     return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
                                               Offset);
   }
 
-  static iplist<ValueSubClass> &getList(ItemParentClass *Par) {
+  static ListTy &getList(ItemParentClass *Par) {
     return Par->*(Par->getSublistAccess((ValueSubClass*)nullptr));
   }
 
@@ -61,9 +96,10 @@ public:
     return Par ? toPtr(Par->getValueSymbolTable()) : nullptr;
   }
 
+public:
   void addNodeToList(ValueSubClass *V);
   void removeNodeFromList(ValueSubClass *V);
-  void transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+  void transferNodesFromList(SymbolTableListTraits &L2,
                              ilist_iterator<ValueSubClass> first,
                              ilist_iterator<ValueSubClass> last);
 //private:
@@ -73,6 +109,14 @@ public:
   static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
 };
 
+/// List that automatically updates parent links and symbol tables.
+///
+/// When nodes are inserted into and removed from this list, the associated
+/// symbol table will be automatically updated.  Similarly, parent links get
+/// updated automatically.
+template <typename NodeTy>
+class SymbolTableList : public iplist<NodeTy, SymbolTableListTraits<NodeTy>> {};
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/TrackingMDRef.h b/include/llvm/IR/TrackingMDRef.h
index e24112154e16..97efaff7a377 100644
--- a/include/llvm/IR/TrackingMDRef.h
+++ b/include/llvm/IR/TrackingMDRef.h
@@ -14,15 +14,11 @@
 #ifndef LLVM_IR_TRACKINGMDREF_H
 #define LLVM_IR_TRACKINGMDREF_H
 
-#include "llvm/IR/MetadataTracking.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/Support/Casting.h"
 
 namespace llvm {
 
-class Metadata;
-class MDNode;
-class ValueAsMetadata;
-
 /// \brief Tracking metadata reference.
 ///
 /// This class behaves like \a TrackingVH, but for metadata.
diff --git a/include/llvm/IR/Type.h b/include/llvm/IR/Type.h
index 6ab0bd0631a0..b2920dd3de63 100644
--- a/include/llvm/IR/Type.h
+++ b/include/llvm/IR/Type.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_IR_TYPE_H
 #define LLVM_IR_TYPE_H
 
-#include "llvm-c/Core.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CBindingWrapping.h"
@@ -38,10 +37,10 @@ template<class GraphType> struct GraphTraits;
 /// they are never changed.  Also note that only one instance of a particular
 /// type is ever created.  Thus seeing if two types are equal is a matter of
 /// doing a trivial pointer comparison. To enforce that no two equal instances
-/// are created, Type instances can only be created via static factory methods 
+/// are created, Type instances can only be created via static factory methods
 /// in class Type and in derived classes.  Once allocated, Types are never
 /// free'd.
-/// 
+///
 class Type {
 public:
   //===--------------------------------------------------------------------===//
@@ -63,45 +62,36 @@ public:
     LabelTyID,       ///<  7: Labels
     MetadataTyID,    ///<  8: Metadata
     X86_MMXTyID,     ///<  9: MMX vectors (64 bits, X86 specific)
+    TokenTyID,       ///< 10: Tokens
 
     // Derived types... see DerivedTypes.h file.
     // Make sure FirstDerivedTyID stays up to date!
-    IntegerTyID,     ///< 10: Arbitrary bit width integers
-    FunctionTyID,    ///< 11: Functions
-    StructTyID,      ///< 12: Structures
-    ArrayTyID,       ///< 13: Arrays
-    PointerTyID,     ///< 14: Pointers
-    VectorTyID       ///< 15: SIMD 'packed' format, or other vector type
+    IntegerTyID,     ///< 11: Arbitrary bit width integers
+    FunctionTyID,    ///< 12: Functions
+    StructTyID,      ///< 13: Structures
+    ArrayTyID,       ///< 14: Arrays
+    PointerTyID,     ///< 15: Pointers
+    VectorTyID       ///< 16: SIMD 'packed' format, or other vector type
   };
 
 private:
   /// Context - This refers to the LLVMContext in which this type was uniqued.
   LLVMContext &Context;
 
-  // Due to Ubuntu GCC bug 910363:
-  // https://bugs.launchpad.net/ubuntu/+source/gcc-4.5/+bug/910363
-  // Bitpack ID and SubclassData manually.
-  // Note: TypeID : low 8 bit; SubclassData : high 24 bit.
-  uint32_t IDAndSubclassData;
+  TypeID   ID : 8;            // The current base type of this type.
+  unsigned SubclassData : 24; // Space for subclasses to store data.
 
 protected:
   friend class LLVMContextImpl;
   explicit Type(LLVMContext &C, TypeID tid)
-    : Context(C), IDAndSubclassData(0),
-      NumContainedTys(0), ContainedTys(nullptr) {
-    setTypeID(tid);
-  }
+    : Context(C), ID(tid), SubclassData(0),
+      NumContainedTys(0), ContainedTys(nullptr) {}
   ~Type() = default;
 
-  void setTypeID(TypeID ID) {
-    IDAndSubclassData = (ID & 0xFF) | (IDAndSubclassData & 0xFFFFFF00);
-    assert(getTypeID() == ID && "TypeID data too large for field");
-  }
-  
-  unsigned getSubclassData() const { return IDAndSubclassData >> 8; }
-  
+  unsigned getSubclassData() const { return SubclassData; }
+
   void setSubclassData(unsigned val) {
-    IDAndSubclassData = (IDAndSubclassData & 0xFF) | (val << 8);
+    SubclassData = val;
     // Ensure we don't have any accidental truncation.
     assert(getSubclassData() == val && "Subclass data too large for field");
   }
@@ -118,7 +108,7 @@ protected:
   Type * const *ContainedTys;
 
 public:
-  void print(raw_ostream &O) const;
+  void print(raw_ostream &O, bool IsForDebug = false) const;
   void dump() const;
 
   /// getContext - Return the LLVMContext in which this type was uniqued.
@@ -131,7 +121,7 @@ public:
   /// getTypeID - Return the type id for the type.  This will return one
   /// of the TypeID enum elements defined above.
   ///
-  TypeID getTypeID() const { return (TypeID)(IDAndSubclassData & 0xFF); }
+  TypeID getTypeID() const { return ID; }
 
   /// isVoidTy - Return true if this is 'void'.
   bool isVoidTy() const { return getTypeID() == VoidTyID; }
@@ -141,7 +131,7 @@ public:
 
   /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
   bool isFloatTy() const { return getTypeID() == FloatTyID; }
-  
+
   /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
   bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
 
@@ -181,16 +171,19 @@ public:
   /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
   ///
   bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
- 
+
   /// isLabelTy - Return true if this is 'label'.
   bool isLabelTy() const { return getTypeID() == LabelTyID; }
 
   /// isMetadataTy - Return true if this is 'metadata'.
   bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
 
+  /// isTokenTy - Return true if this is 'token'.
+  bool isTokenTy() const { return getTypeID() == TokenTyID; }
+
   /// isIntegerTy - True if this is an instance of IntegerType.
   ///
-  bool isIntegerTy() const { return getTypeID() == IntegerTyID; } 
+  bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
 
   /// isIntegerTy - Return true if this is an IntegerType of the given width.
   bool isIntegerTy(unsigned Bitwidth) const;
@@ -199,7 +192,7 @@ public:
   /// integer types.
   ///
   bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
-  
+
   /// isFunctionTy - True if this is an instance of FunctionType.
   ///
   bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
@@ -220,14 +213,14 @@ public:
   /// pointer types.
   ///
   bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
- 
+
   /// isVectorTy - True if this is an instance of VectorType.
   ///
   bool isVectorTy() const { return getTypeID() == VectorTyID; }
 
-  /// canLosslesslyBitCastTo - Return true if this type could be converted 
-  /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
-  /// are valid for types of the same size only where no re-interpretation of 
+  /// canLosslesslyBitCastTo - Return true if this type could be converted
+  /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts
+  /// are valid for types of the same size only where no re-interpretation of
   /// the bits is done.
   /// @brief Determine if this type could be losslessly bitcast to Ty
   bool canLosslesslyBitCastTo(Type *Ty) const;
@@ -265,7 +258,7 @@ public:
   /// get the actual size for a particular target, it is reasonable to use the
   /// DataLayout subsystem to do this.
   ///
-  bool isSized(SmallPtrSetImpl<const Type*> *Visited = nullptr) const {
+  bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
     // If it's a primitive, it is always sized.
     if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
         getTypeID() == PointerTyID ||
@@ -304,8 +297,7 @@ public:
 
   /// getScalarType - If this is a vector type, return the element type,
   /// otherwise return 'this'.
-  const Type *getScalarType() const LLVM_READONLY;
-  Type *getScalarType() LLVM_READONLY;
+  Type *getScalarType() const LLVM_READONLY;
 
   //===--------------------------------------------------------------------===//
   // Type Iteration support.
@@ -344,30 +336,30 @@ public:
   // example) is shorthand for cast<VectorType>(Ty)->getNumElements().  This is
   // only intended to cover the core methods that are frequently used, helper
   // methods should not be added here.
-  
-  unsigned getIntegerBitWidth() const;
 
-  Type *getFunctionParamType(unsigned i) const;
-  unsigned getFunctionNumParams() const;
-  bool isFunctionVarArg() const;
-  
-  StringRef getStructName() const;
-  unsigned getStructNumElements() const;
-  Type *getStructElementType(unsigned N) const;
-  
-  Type *getSequentialElementType() const;
-  
-  uint64_t getArrayNumElements() const;
+  inline unsigned getIntegerBitWidth() const;
+
+  inline Type *getFunctionParamType(unsigned i) const;
+  inline unsigned getFunctionNumParams() const;
+  inline bool isFunctionVarArg() const;
+
+  inline StringRef getStructName() const;
+  inline unsigned getStructNumElements() const;
+  inline Type *getStructElementType(unsigned N) const;
+
+  inline Type *getSequentialElementType() const;
+
+  inline uint64_t getArrayNumElements() const;
   Type *getArrayElementType() const { return getSequentialElementType(); }
 
-  unsigned getVectorNumElements() const;
+  inline unsigned getVectorNumElements() const;
   Type *getVectorElementType() const { return getSequentialElementType(); }
 
   Type *getPointerElementType() const { return getSequentialElementType(); }
 
   /// \brief Get the address space of this pointer or pointer vector type.
-  unsigned getPointerAddressSpace() const;
-  
+  inline unsigned getPointerAddressSpace() const;
+
   //===--------------------------------------------------------------------===//
   // Static members exported by the Type class itself.  Useful for getting
   // instances of Type.
@@ -389,6 +381,7 @@ public:
   static Type *getFP128Ty(LLVMContext &C);
   static Type *getPPC_FP128Ty(LLVMContext &C);
   static Type *getX86_MMXTy(LLVMContext &C);
+  static Type *getTokenTy(LLVMContext &C);
   static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
   static IntegerType *getInt1Ty(LLVMContext &C);
   static IntegerType *getInt8Ty(LLVMContext &C);
@@ -396,7 +389,7 @@ public:
   static IntegerType *getInt32Ty(LLVMContext &C);
   static IntegerType *getInt64Ty(LLVMContext &C);
   static IntegerType *getInt128Ty(LLVMContext &C);
-  
+
   //===--------------------------------------------------------------------===//
   // Convenience methods for getting pointer types with one of the above builtin
   // types as pointee.
@@ -417,13 +410,13 @@ public:
 
   /// getPointerTo - Return a pointer to the current type.  This is equivalent
   /// to PointerType::get(Foo, AddrSpace).
-  PointerType *getPointerTo(unsigned AddrSpace = 0);
+  PointerType *getPointerTo(unsigned AddrSpace = 0) const;
 
 private:
   /// isSizedDerivedType - Derived types like structures and arrays are sized
   /// iff all of the members of the type are sized as well.  Since asking for
   /// their size is relatively uncommon, move this operation out of line.
-  bool isSizedDerivedType(SmallPtrSetImpl<const Type*> *Visited = nullptr) const;
+  bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
 };
 
 // Printing of types.
@@ -439,13 +432,11 @@ template <> struct isa_impl<PointerType, Type> {
   }
 };
 
-  
 //===----------------------------------------------------------------------===//
 // Provide specializations of GraphTraits to be able to treat a type as a
 // graph of sub types.
 
-
-template <> struct GraphTraits<Type*> {
+template <> struct GraphTraits<Type *> {
   typedef Type NodeType;
   typedef Type::subtype_iterator ChildIteratorType;
 
@@ -483,7 +474,7 @@ inline Type **unwrap(LLVMTypeRef* Tys) {
 inline LLVMTypeRef *wrap(Type **Tys) {
   return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
 }
-  
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/IR/TypeFinder.h b/include/llvm/IR/TypeFinder.h
index 73a63ad0349e..5f3854377c16 100644
--- a/include/llvm/IR/TypeFinder.h
+++ b/include/llvm/IR/TypeFinder.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declaration of the TypeFinder class. 
+// This file contains the declaration of the TypeFinder class.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/IR/Use.h b/include/llvm/IR/Use.h
index 160d71b03e7f..a738677f8e5b 100644
--- a/include/llvm/IR/Use.h
+++ b/include/llvm/IR/Use.h
@@ -25,7 +25,6 @@
 #ifndef LLVM_IR_USE_H
 #define LLVM_IR_USE_H
 
-#include "llvm-c/Core.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Compiler.h"
diff --git a/include/llvm/IR/UseListOrder.h b/include/llvm/IR/UseListOrder.h
index b7c2418d348d..1cabf03d1b00 100644
--- a/include/llvm/IR/UseListOrder.h
+++ b/include/llvm/IR/UseListOrder.h
@@ -34,7 +34,7 @@ struct UseListOrder {
   UseListOrder(const Value *V, const Function *F, size_t ShuffleSize)
       : V(V), F(F), Shuffle(ShuffleSize) {}
 
-  UseListOrder() : V(0), F(0) {}
+  UseListOrder() : V(nullptr), F(nullptr) {}
   UseListOrder(UseListOrder &&X)
       : V(X.V), F(X.F), Shuffle(std::move(X.Shuffle)) {}
   UseListOrder &operator=(UseListOrder &&X) {
@@ -53,4 +53,4 @@ typedef std::vector<UseListOrder> UseListOrderStack;
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_IR_USELISTORDER_H
diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h
index 93614fab5759..885ae197d228 100644
--- a/include/llvm/IR/User.h
+++ b/include/llvm/IR/User.h
@@ -19,6 +19,7 @@
 #ifndef LLVM_IR_USER_H
 #define LLVM_IR_USER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Value.h"
@@ -39,6 +40,9 @@ class User : public Value {
   friend struct HungoffOperandTraits;
   virtual void anchor();
 
+  LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void *
+  allocateFixedOperandUser(size_t, unsigned, unsigned);
+
 protected:
   /// Allocate a User with an operand pointer co-allocated.
   ///
@@ -51,7 +55,17 @@ protected:
   /// This is used for subclasses which have a fixed number of operands.
   void *operator new(size_t Size, unsigned Us);
 
-  User(Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
+  /// Allocate a User with the operands co-allocated.  If DescBytes is non-zero
+  /// then allocate an additional DescBytes bytes before the operands. These
+  /// bytes can be accessed by calling getDescriptor.
+  ///
+  /// DescBytes needs to be divisible by sizeof(void *).  The allocated
+  /// descriptor, if any, is aligned to sizeof(void *) bytes.
+  ///
+  /// This is used for subclasses which have a fixed number of operands.
+  void *operator new(size_t Size, unsigned Us, unsigned DescBytes);
+
+  User(Type *ty, unsigned vty, Use *, unsigned NumOps)
       : Value(ty, vty) {
     assert(NumOps < (1u << NumUserOperandsBits) && "Too many operands");
     NumUserOperands = NumOps;
@@ -137,6 +151,12 @@ public:
 
   unsigned getNumOperands() const { return NumUserOperands; }
 
+  /// Returns the descriptor co-allocated with this User instance.
+  ArrayRef<const uint8_t> getDescriptor() const;
+
+  /// Returns the descriptor co-allocated with this User instance.
+  MutableArrayRef<uint8_t> getDescriptor();
+
   /// Set the number of operands on a GlobalVariable.
   ///
   /// GlobalVariable always allocates space for a single operands, but
@@ -150,19 +170,6 @@ public:
     NumUserOperands = NumOps;
   }
 
-  /// Set the number of operands on a Function.
-  ///
-  /// Function always allocates space for a single operands, but
-  /// doesn't always use it.
-  ///
-  /// FIXME: As that the number of operands is used to find the start of
-  /// the allocated memory in operator delete, we need to always think we have
-  /// 1 operand before delete.
-  void setFunctionNumOperands(unsigned NumOps) {
-    assert(NumOps <= 1 && "Function can only have 0 or 1 operands");
-    NumUserOperands = NumOps;
-  }
-
   /// \brief Subclasses with hung off uses need to manage the operand count
   /// themselves.  In these instances, the operand count isn't used to find the
   /// OperandList, so there's no issue in having the operand count change.
@@ -213,7 +220,7 @@ public:
     return value_op_iterator(op_end());
   }
   iterator_range<value_op_iterator> operand_values() {
-    return iterator_range<value_op_iterator>(value_op_begin(), value_op_end());
+    return make_range(value_op_begin(), value_op_end());
   }
 
   /// \brief Drop all references to operands.
diff --git a/include/llvm/IR/Value.def b/include/llvm/IR/Value.def
index c2a0639603ed..4c5d452fc3c3 100644
--- a/include/llvm/IR/Value.def
+++ b/include/llvm/IR/Value.def
@@ -70,6 +70,7 @@ HANDLE_CONSTANT(ConstantArray)
 HANDLE_CONSTANT(ConstantStruct)
 HANDLE_CONSTANT(ConstantVector)
 HANDLE_CONSTANT(ConstantPointerNull)
+HANDLE_CONSTANT(ConstantTokenNone)
 
 HANDLE_METADATA_VALUE(MetadataAsValue)
 HANDLE_INLINE_ASM_VALUE(InlineAsm)
@@ -79,7 +80,7 @@ HANDLE_INSTRUCTION(Instruction)
 // don't add new values here!
 
 HANDLE_CONSTANT_MARKER(ConstantFirstVal, Function)
-HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantPointerNull)
+HANDLE_CONSTANT_MARKER(ConstantLastVal, ConstantTokenNone)
 
 #undef HANDLE_GLOBAL_VALUE
 #undef HANDLE_CONSTANT
diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h
index 17a80c82d1bc..bb7ff278fdef 100644
--- a/include/llvm/IR/Value.h
+++ b/include/llvm/IR/Value.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_IR_VALUE_H
 #define LLVM_IR_VALUE_H
 
-#include "llvm-c/Core.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/Use.h"
 #include "llvm/Support/CBindingWrapping.h"
@@ -104,12 +103,13 @@ protected:
   ///
   /// Note, this should *NOT* be used directly by any class other than User.
   /// User uses this value to find the Use list.
-  enum : unsigned { NumUserOperandsBits = 29 };
+  enum : unsigned { NumUserOperandsBits = 28 };
   unsigned NumUserOperands : NumUserOperandsBits;
 
   bool IsUsedByMD : 1;
   bool HasName : 1;
   bool HasHungOffUses : 1;
+  bool HasDescriptor : 1;
 
 private:
   template <typename UseT> // UseT == 'Use' or 'const Use'
@@ -201,8 +201,9 @@ public:
 
   /// \brief Implement operator<< on Value.
   /// @{
-  void print(raw_ostream &O) const;
-  void print(raw_ostream &O, ModuleSlotTracker &MST) const;
+  void print(raw_ostream &O, bool IsForDebug = false) const;
+  void print(raw_ostream &O, ModuleSlotTracker &MST,
+             bool IsForDebug = false) const;
   /// @}
 
   /// \brief Print the name of this Value out to the specified raw_ostream.
@@ -272,36 +273,91 @@ public:
   //----------------------------------------------------------------------
   // Methods for handling the chain of uses of this Value.
   //
-  bool               use_empty() const { return UseList == nullptr; }
+  // Materializing a function can introduce new uses, so these methods come in
+  // two variants:
+  // The methods that start with materialized_ check the uses that are
+  // currently known given which functions are materialized. Be very careful
+  // when using them since you might not get all uses.
+  // The methods that don't start with materialized_ assert that modules is
+  // fully materialized.
+#ifdef NDEBUG
+  void assertModuleIsMaterialized() const {}
+#else
+  void assertModuleIsMaterialized() const;
+#endif
 
-  typedef use_iterator_impl<Use>       use_iterator;
+  bool use_empty() const {
+    assertModuleIsMaterialized();
+    return UseList == nullptr;
+  }
+
+  typedef use_iterator_impl<Use> use_iterator;
   typedef use_iterator_impl<const Use> const_use_iterator;
-  use_iterator       use_begin()       { return use_iterator(UseList); }
-  const_use_iterator use_begin() const { return const_use_iterator(UseList); }
-  use_iterator       use_end()         { return use_iterator();   }
-  const_use_iterator use_end()   const { return const_use_iterator();   }
+  use_iterator materialized_use_begin() { return use_iterator(UseList); }
+  const_use_iterator materialized_use_begin() const {
+    return const_use_iterator(UseList);
+  }
+  use_iterator use_begin() {
+    assertModuleIsMaterialized();
+    return materialized_use_begin();
+  }
+  const_use_iterator use_begin() const {
+    assertModuleIsMaterialized();
+    return materialized_use_begin();
+  }
+  use_iterator use_end() { return use_iterator(); }
+  const_use_iterator use_end() const { return const_use_iterator(); }
+  iterator_range<use_iterator> materialized_uses() {
+    return make_range(materialized_use_begin(), use_end());
+  }
+  iterator_range<const_use_iterator> materialized_uses() const {
+    return make_range(materialized_use_begin(), use_end());
+  }
   iterator_range<use_iterator> uses() {
-    return iterator_range<use_iterator>(use_begin(), use_end());
+    assertModuleIsMaterialized();
+    return materialized_uses();
   }
   iterator_range<const_use_iterator> uses() const {
-    return iterator_range<const_use_iterator>(use_begin(), use_end());
+    assertModuleIsMaterialized();
+    return materialized_uses();
   }
 
-  bool               user_empty() const { return UseList == nullptr; }
+  bool user_empty() const {
+    assertModuleIsMaterialized();
+    return UseList == nullptr;
+  }
 
-  typedef user_iterator_impl<User>       user_iterator;
+  typedef user_iterator_impl<User> user_iterator;
   typedef user_iterator_impl<const User> const_user_iterator;
-  user_iterator       user_begin()       { return user_iterator(UseList); }
-  const_user_iterator user_begin() const { return const_user_iterator(UseList); }
-  user_iterator       user_end()         { return user_iterator();   }
-  const_user_iterator user_end()   const { return const_user_iterator();   }
-  User               *user_back()        { return *user_begin(); }
-  const User         *user_back()  const { return *user_begin(); }
+  user_iterator materialized_user_begin() { return user_iterator(UseList); }
+  const_user_iterator materialized_user_begin() const {
+    return const_user_iterator(UseList);
+  }
+  user_iterator user_begin() {
+    assertModuleIsMaterialized();
+    return materialized_user_begin();
+  }
+  const_user_iterator user_begin() const {
+    assertModuleIsMaterialized();
+    return materialized_user_begin();
+  }
+  user_iterator user_end() { return user_iterator(); }
+  const_user_iterator user_end() const { return const_user_iterator(); }
+  User *user_back() {
+    assertModuleIsMaterialized();
+    return *materialized_user_begin();
+  }
+  const User *user_back() const {
+    assertModuleIsMaterialized();
+    return *materialized_user_begin();
+  }
   iterator_range<user_iterator> users() {
-    return iterator_range<user_iterator>(user_begin(), user_end());
+    assertModuleIsMaterialized();
+    return make_range(materialized_user_begin(), user_end());
   }
   iterator_range<const_user_iterator> users() const {
-    return iterator_range<const_user_iterator>(user_begin(), user_end());
+    assertModuleIsMaterialized();
+    return make_range(materialized_user_begin(), user_end());
   }
 
   /// \brief Return true if there is exactly one user of this value.
@@ -493,7 +549,28 @@ private:
   template <class Compare>
   static Use *mergeUseLists(Use *L, Use *R, Compare Cmp) {
     Use *Merged;
-    mergeUseListsImpl(L, R, &Merged, Cmp);
+    Use **Next = &Merged;
+
+    for (;;) {
+      if (!L) {
+        *Next = R;
+        break;
+      }
+      if (!R) {
+        *Next = L;
+        break;
+      }
+      if (Cmp(*R, *L)) {
+        *Next = R;
+        Next = &R->Next;
+        R = R->Next;
+      } else {
+        *Next = L;
+        Next = &L->Next;
+        L = L->Next;
+      }
+    }
+
     return Merged;
   }
 
@@ -586,25 +663,6 @@ template <class Compare> void Value::sortUseList(Compare Cmp) {
   }
 }
 
-template <class Compare>
-void Value::mergeUseListsImpl(Use *L, Use *R, Use **Next, Compare Cmp) {
-  if (!L) {
-    *Next = R;
-    return;
-  }
-  if (!R) {
-    *Next = L;
-    return;
-  }
-  if (Cmp(*R, *L)) {
-    *Next = R;
-    mergeUseListsImpl(L, R->Next, &R->Next, Cmp);
-    return;
-  }
-  *Next = L;
-  mergeUseListsImpl(L->Next, R, &L->Next, Cmp);
-}
-
 // isa - Provide some specializations of isa so that we don't have to include
 // the subtype header files to test to see if the value is a subclass...
 //
diff --git a/include/llvm/IR/ValueHandle.h b/include/llvm/IR/ValueHandle.h
index 53fa80a626aa..3c2805913ef5 100644
--- a/include/llvm/IR/ValueHandle.h
+++ b/include/llvm/IR/ValueHandle.h
@@ -52,13 +52,21 @@ protected:
     Weak
   };
 
+  ValueHandleBase(const ValueHandleBase &RHS)
+      : ValueHandleBase(RHS.PrevPair.getInt(), RHS) {}
+
+  ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
+      : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) {
+    if (isValid(V))
+      AddToExistingUseList(RHS.getPrevPtr());
+  }
+
 private:
   PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
   ValueHandleBase *Next;
 
   Value* V;
 
-  ValueHandleBase(const ValueHandleBase&) = delete;
 public:
   explicit ValueHandleBase(HandleBaseKind Kind)
     : PrevPair(nullptr, Kind), Next(nullptr), V(nullptr) {}
@@ -67,11 +75,7 @@ public:
     if (isValid(V))
       AddToUseList();
   }
-  ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
-    : PrevPair(nullptr, Kind), Next(nullptr), V(RHS.V) {
-    if (isValid(V))
-      AddToExistingUseList(RHS.getPrevPtr());
-  }
+
   ~ValueHandleBase() {
     if (isValid(V))
       RemoveFromUseList();
@@ -145,6 +149,8 @@ public:
   WeakVH(const WeakVH &RHS)
     : ValueHandleBase(Weak, RHS) {}
 
+  WeakVH &operator=(const WeakVH &RHS) = default;
+
   Value *operator=(Value *RHS) {
     return ValueHandleBase::operator=(RHS);
   }
@@ -314,7 +320,6 @@ class TrackingVH : public ValueHandleBase {
 public:
   TrackingVH() : ValueHandleBase(Tracking) {}
   TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {}
-  TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {}
 
   operator ValueTy*() const {
     return getValPtr();
@@ -324,10 +329,6 @@ public:
     setValPtr(RHS);
     return getValPtr();
   }
-  ValueTy *operator=(const TrackingVH<ValueTy> &RHS) {
-    setValPtr(RHS.getValPtr());
-    return getValPtr();
-  }
 
   ValueTy *operator->() const { return getValPtr(); }
   ValueTy &operator*() const { return *getValPtr(); }
@@ -339,15 +340,13 @@ public:
 /// when the underlying Value has RAUW called on it or is destroyed.  This
 /// class can be used as the key of a map, as long as the user takes it out of
 /// the map before calling setValPtr() (since the map has to rearrange itself
-/// when the pointer changes).  Unlike ValueHandleBase, this class has a vtable
-/// and a virtual destructor.
+/// when the pointer changes).  Unlike ValueHandleBase, this class has a vtable.
 class CallbackVH : public ValueHandleBase {
   virtual void anchor();
 protected:
-  CallbackVH(const CallbackVH &RHS)
-    : ValueHandleBase(Callback, RHS) {}
-
-  virtual ~CallbackVH() {}
+  ~CallbackVH() = default;
+  CallbackVH(const CallbackVH &) = default;
+  CallbackVH &operator=(const CallbackVH &) = default;
 
   void setValPtr(Value *P) {
     ValueHandleBase::operator=(P);
diff --git a/include/llvm/IR/ValueMap.h b/include/llvm/IR/ValueMap.h
index 4d00b637609c..ad518ac053b2 100644
--- a/include/llvm/IR/ValueMap.h
+++ b/include/llvm/IR/ValueMap.h
@@ -214,8 +214,8 @@ private:
 
 // This CallbackVH updates its ValueMap when the contained Value changes,
 // according to the user's preferences expressed through the Config object.
-template<typename KeyT, typename ValueT, typename Config>
-class ValueMapCallbackVH : public CallbackVH {
+template <typename KeyT, typename ValueT, typename Config>
+class ValueMapCallbackVH final : public CallbackVH {
   friend class ValueMap<KeyT, ValueT, Config>;
   friend struct DenseMapInfo<ValueMapCallbackVH>;
   typedef ValueMap<KeyT, ValueT, Config> ValueMapT;
diff --git a/include/llvm/IR/ValueSymbolTable.h b/include/llvm/IR/ValueSymbolTable.h
index bf1fade1ccef..65bd7fc2fec1 100644
--- a/include/llvm/IR/ValueSymbolTable.h
+++ b/include/llvm/IR/ValueSymbolTable.h
@@ -14,13 +14,13 @@
 #ifndef LLVM_IR_VALUESYMBOLTABLE_H
 #define LLVM_IR_VALUESYMBOLTABLE_H
 
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-  template<typename ValueSubClass, typename ItemParentClass>
-        class SymbolTableListTraits;
+  template <typename ValueSubClass> class SymbolTableListTraits;
   class BasicBlock;
   class Function;
   class NamedMDNode;
@@ -33,12 +33,12 @@ namespace llvm {
 ///
 class ValueSymbolTable {
   friend class Value;
-  friend class SymbolTableListTraits<Argument, Function>;
-  friend class SymbolTableListTraits<BasicBlock, Function>;
-  friend class SymbolTableListTraits<Instruction, BasicBlock>;
-  friend class SymbolTableListTraits<Function, Module>;
-  friend class SymbolTableListTraits<GlobalVariable, Module>;
-  friend class SymbolTableListTraits<GlobalAlias, Module>;
+  friend class SymbolTableListTraits<Argument>;
+  friend class SymbolTableListTraits<BasicBlock>;
+  friend class SymbolTableListTraits<Instruction>;
+  friend class SymbolTableListTraits<Function>;
+  friend class SymbolTableListTraits<GlobalVariable>;
+  friend class SymbolTableListTraits<GlobalAlias>;
 /// @name Types
 /// @{
 public:
@@ -55,7 +55,6 @@ public:
 /// @name Constructors
 /// @{
 public:
-
   ValueSymbolTable() : vmap(0), LastUnique(0) {}
   ~ValueSymbolTable();
 
@@ -63,9 +62,8 @@ public:
 /// @name Accessors
 /// @{
 public:
-
   /// This method finds the value with the given \p Name in the
-  /// the symbol table. 
+  /// the symbol table.
   /// @returns the value associated with the \p Name
   /// @brief Lookup a named Value.
   Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
@@ -97,30 +95,32 @@ public:
 
   /// @brief Get a const_iterator to the end of the symbol table.
   inline const_iterator end() const { return vmap.end(); }
-  
-/// @}
-/// @name Mutators
-/// @{
+
+  /// @}
+  /// @name Mutators
+  /// @{
 private:
+  ValueName *makeUniqueName(Value *V, SmallString<256> &UniqueName);
+
   /// This method adds the provided value \p N to the symbol table.  The Value
-  /// must have a name which is used to place the value in the symbol table. 
+  /// must have a name which is used to place the value in the symbol table.
   /// If the inserted name conflicts, this renames the value.
   /// @brief Add a named value to the symbol table
   void reinsertValue(Value *V);
-    
+
   /// createValueName - This method attempts to create a value name and insert
   /// it into the symbol table with the specified name.  If it conflicts, it
   /// auto-renames the name and returns that instead.
   ValueName *createValueName(StringRef Name, Value *V);
-  
+
   /// This method removes a value from the symbol table.  It leaves the
   /// ValueName attached to the value, but it is no longer inserted in the
   /// symtab.
   void removeValueName(ValueName *V);
-  
-/// @}
-/// @name Internal Data
-/// @{
+
+  /// @}
+  /// @name Internal Data
+  /// @{
 private:
   ValueMap vmap;                    ///< The map that holds the symbol table.
   mutable uint32_t LastUnique; ///< Counter for tracking unique names
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index 2d9ace0b62a0..523cd3d6df72 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -27,10 +27,11 @@ class LLVMContext;
 /// If the given file holds a bitcode image, return a Module
 /// for it which does lazy deserialization of function bodies.  Otherwise,
 /// attempt to parse it as LLVM Assembly and return a fully populated
-/// Module.
-std::unique_ptr<Module> getLazyIRFileModule(StringRef Filename,
-                                            SMDiagnostic &Err,
-                                            LLVMContext &Context);
+/// Module. The ShouldLazyLoadMetadata flag is passed down to the bitcode
+/// reader to optionally enable lazy metadata loading.
+std::unique_ptr<Module>
+getLazyIRFileModule(StringRef Filename, SMDiagnostic &Err, LLVMContext &Context,
+                    bool ShouldLazyLoadMetadata = false);
 
 /// If the given MemoryBuffer holds a bitcode image, return a Module
 /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index e3b9a95f0a3d..cb2b1394e92b 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -53,9 +53,6 @@ void initializeInstrumentation(PassRegistry&);
 /// initializeAnalysis - Initialize all passes linked into the Analysis library.
 void initializeAnalysis(PassRegistry&);
 
-/// initializeIPA - Initialize all passes linked into the IPA library.
-void initializeIPA(PassRegistry&);
-
 /// initializeCodeGen - Initialize all passes linked into the CodeGen library.
 void initializeCodeGen(PassRegistry&);
 
@@ -64,11 +61,8 @@ void initializeTarget(PassRegistry&);
 
 void initializeAAEvalPass(PassRegistry&);
 void initializeAddDiscriminatorsPass(PassRegistry&);
-void initializeADCEPass(PassRegistry&);
+void initializeADCELegacyPassPass(PassRegistry&);
 void initializeBDCEPass(PassRegistry&);
-void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
-void initializeAliasAnalysisCounterPass(PassRegistry&);
-void initializeAliasDebuggerPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlwaysInlinerPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
@@ -76,13 +70,13 @@ void initializeAtomicExpandPass(PassRegistry&);
 void initializeSampleProfileLoaderPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
 void initializeBarrierNoopPass(PassRegistry&);
-void initializeBasicAliasAnalysisPass(PassRegistry&);
+void initializeBasicAAWrapperPassPass(PassRegistry&);
 void initializeCallGraphWrapperPassPass(PassRegistry &);
 void initializeBlockExtractorPassPass(PassRegistry&);
-void initializeBlockFrequencyInfoPass(PassRegistry&);
+void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
 void initializeBoundsCheckingPass(PassRegistry&);
 void initializeBranchFolderPassPass(PassRegistry&);
-void initializeBranchProbabilityInfoPass(PassRegistry&);
+void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
 void initializeCallGraphPrinterPass(PassRegistry&);
 void initializeCallGraphViewerPass(PassRegistry&);
@@ -90,7 +84,8 @@ void initializeCFGOnlyPrinterPass(PassRegistry&);
 void initializeCFGOnlyViewerPass(PassRegistry&);
 void initializeCFGPrinterPass(PassRegistry&);
 void initializeCFGSimplifyPassPass(PassRegistry&);
-void initializeCFLAliasAnalysisPass(PassRegistry&);
+void initializeCFLAAWrapperPassPass(PassRegistry&);
+void initializeExternalAAWrapperPassPass(PassRegistry&);
 void initializeForwardControlFlowIntegrityPass(PassRegistry&);
 void initializeFlattenCFGPassPass(PassRegistry&);
 void initializeStructurizeCFGPass(PassRegistry&);
@@ -102,6 +97,7 @@ void initializeConstantPropagationPass(PassRegistry&);
 void initializeMachineCopyPropagationPass(PassRegistry&);
 void initializeCostModelAnalysisPass(PassRegistry&);
 void initializeCorrelatedValuePropagationPass(PassRegistry&);
+void initializeCrossDSOCFIPass(PassRegistry&);
 void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
 void initializeDCEPass(PassRegistry&);
@@ -120,7 +116,10 @@ void initializeDominatorTreeWrapperPassPass(PassRegistry&);
 void initializeEarlyIfConverterPass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
+void initializeAAResultsWrapperPassPass(PassRegistry &);
 void initializeGCOVProfilerPass(PassRegistry&);
+void initializePGOInstrumentationGenPass(PassRegistry&);
+void initializePGOInstrumentationUsePass(PassRegistry&);
 void initializeInstrProfilingPass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
 void initializeAddressSanitizerModulePass(PassRegistry&);
@@ -132,19 +131,21 @@ void initializeScalarizerPass(PassRegistry&);
 void initializeEarlyCSELegacyPassPass(PassRegistry &);
 void initializeEliminateAvailableExternallyPass(PassRegistry&);
 void initializeExpandISelPseudosPass(PassRegistry&);
+void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeFunctionAttrsPass(PassRegistry&);
 void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
 void initializeGVNPass(PassRegistry&);
 void initializeGlobalDCEPass(PassRegistry&);
 void initializeGlobalOptPass(PassRegistry&);
-void initializeGlobalsModRefPass(PassRegistry&);
+void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeIPCPPass(PassRegistry&);
 void initializeIPSCCPPass(PassRegistry&);
 void initializeIVUsersPass(PassRegistry&);
 void initializeIfConverterPass(PassRegistry&);
 void initializeInductiveRangeCheckEliminationPass(PassRegistry&);
 void initializeIndVarSimplifyPass(PassRegistry&);
+void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeInlineCostAnalysisPass(PassRegistry&);
 void initializeInstructionCombiningPassPass(PassRegistry&);
 void initializeInstCountPass(PassRegistry&);
@@ -155,7 +156,6 @@ void initializeJumpThreadingPass(PassRegistry&);
 void initializeLCSSAPass(PassRegistry&);
 void initializeLICMPass(PassRegistry&);
 void initializeLazyValueInfoPass(PassRegistry&);
-void initializeLibCallAliasAnalysisPass(PassRegistry&);
 void initializeLintPass(PassRegistry&);
 void initializeLiveDebugVariablesPass(PassRegistry&);
 void initializeLiveIntervalsPass(PassRegistry&);
@@ -210,7 +210,7 @@ void initializeMergeFunctionsPass(PassRegistry&);
 void initializeModuleDebugInfoPrinterPass(PassRegistry&);
 void initializeNaryReassociatePass(PassRegistry&);
 void initializeNoAAPass(PassRegistry&);
-void initializeObjCARCAliasAnalysisPass(PassRegistry&);
+void initializeObjCARCAAWrapperPassPass(PassRegistry&);
 void initializeObjCARCAPElimPass(PassRegistry&);
 void initializeObjCARCExpandPass(PassRegistry&);
 void initializeObjCARCContractPass(PassRegistry&);
@@ -245,14 +245,14 @@ void initializeRegionViewerPass(PassRegistry&);
 void initializeRewriteStatepointsForGCPass(PassRegistry&);
 void initializeSafeStackPass(PassRegistry&);
 void initializeSCCPPass(PassRegistry&);
-void initializeSROAPass(PassRegistry&);
+void initializeSROALegacyPassPass(PassRegistry&);
 void initializeSROA_DTPass(PassRegistry&);
 void initializeSROA_SSAUpPass(PassRegistry&);
-void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
-void initializeScalarEvolutionPass(PassRegistry&);
+void initializeSCEVAAWrapperPassPass(PassRegistry&);
+void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
 void initializeShrinkWrapPass(PassRegistry &);
 void initializeSimpleInlinerPass(PassRegistry&);
-void initializeShadowStackGCLoweringPass(PassRegistry&);  
+void initializeShadowStackGCLoweringPass(PassRegistry&);
 void initializeRegisterCoalescerPass(PassRegistry&);
 void initializeSingleLoopExtractorPass(PassRegistry&);
 void initializeSinkingPass(PassRegistry&);
@@ -265,7 +265,7 @@ void initializeStackColoringPass(PassRegistry&);
 void initializeStackSlotColoringPass(PassRegistry&);
 void initializeStraightLineStrengthReducePass(PassRegistry &);
 void initializeStripDeadDebugInfoPass(PassRegistry&);
-void initializeStripDeadPrototypesPassPass(PassRegistry&);
+void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&);
 void initializeStripDebugDeclarePass(PassRegistry&);
 void initializeStripNonDebugSymbolsPass(PassRegistry&);
 void initializeStripSymbolsPass(PassRegistry&);
@@ -276,8 +276,8 @@ void initializeTargetTransformInfoWrapperPassPass(PassRegistry &);
 void initializeTargetLibraryInfoWrapperPassPass(PassRegistry &);
 void initializeAssumptionCacheTrackerPass(PassRegistry &);
 void initializeTwoAddressInstructionPassPass(PassRegistry&);
-void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
-void initializeScopedNoAliasAAPass(PassRegistry&);
+void initializeTypeBasedAAWrapperPassPass(PassRegistry&);
+void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
 void initializeUnifyFunctionExitNodesPass(PassRegistry&);
 void initializeUnreachableBlockElimPass(PassRegistry&);
 void initializeUnreachableMachineBlockElimPass(PassRegistry&);
@@ -294,6 +294,7 @@ void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 void initializeMIRPrintingPassPass(PassRegistry&);
 void initializeStackMapLivenessPass(PassRegistry&);
+void initializeLiveDebugValuesPass(PassRegistry&);
 void initializeMachineCombinerPass(PassRegistry &);
 void initializeLoadCombinePass(PassRegistry&);
 void initializeRewriteSymbolsPass(PassRegistry&);
@@ -304,6 +305,10 @@ void initializeDwarfEHPreparePass(PassRegistry&);
 void initializeFloat2IntPass(PassRegistry&);
 void initializeLoopDistributePass(PassRegistry&);
 void initializeSjLjEHPreparePass(PassRegistry&);
+void initializeDemandedBitsPass(PassRegistry&);
+void initializeFuncletLayoutPass(PassRegistry &);
+void initializeLoopLoadEliminationPass(PassRegistry&);
+void initializeFunctionImportPassPass(PassRegistry &);
 }
 
 #endif
diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h
index 0c46fc048a43..3820b211a381 100644
--- a/include/llvm/LTO/LTOCodeGenerator.h
+++ b/include/llvm/LTO/LTOCodeGenerator.h
@@ -39,7 +39,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Linker/Linker.h"
+#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <string>
 #include <vector>
@@ -48,6 +48,7 @@ namespace llvm {
   class LLVMContext;
   class DiagnosticInfo;
   class GlobalValue;
+  class Linker;
   class Mangler;
   class MemoryBuffer;
   class TargetLibraryInfo;
@@ -61,121 +62,135 @@ namespace llvm {
 struct LTOCodeGenerator {
   static const char *getVersionString();
 
-  LTOCodeGenerator();
-  LTOCodeGenerator(std::unique_ptr<LLVMContext> Context);
+  LTOCodeGenerator(LLVMContext &Context);
   ~LTOCodeGenerator();
 
-  // Merge given module, return true on success.
+  /// Merge given module.  Return true on success.
   bool addModule(struct LTOModule *);
 
-  // Set the destination module.
-  void setModule(struct LTOModule *);
+  /// Set the destination module.
+  void setModule(std::unique_ptr<LTOModule> M);
 
-  void setTargetOptions(TargetOptions options);
+  void setTargetOptions(TargetOptions Options);
   void setDebugInfo(lto_debug_model);
-  void setCodePICModel(lto_codegen_model);
+  void setCodePICModel(Reloc::Model Model) { RelocModel = Model; }
+  
+  /// Set the file type to be emitted (assembly or object code).
+  /// The default is TargetMachine::CGFT_ObjectFile. 
+  void setFileType(TargetMachine::CodeGenFileType FT) { FileType = FT; }
 
-  void setCpu(const char *mCpu) { MCpu = mCpu; }
-  void setAttr(const char *mAttr) { MAttr = mAttr; }
-  void setOptLevel(unsigned optLevel) { OptLevel = optLevel; }
+  void setCpu(const char *MCpu) { this->MCpu = MCpu; }
+  void setAttr(const char *MAttr) { this->MAttr = MAttr; }
+  void setOptLevel(unsigned OptLevel);
 
   void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
   void setShouldEmbedUselists(bool Value) { ShouldEmbedUselists = Value; }
 
-  void addMustPreserveSymbol(StringRef sym) { MustPreserveSymbols[sym] = 1; }
+  void addMustPreserveSymbol(StringRef Sym) { MustPreserveSymbols[Sym] = 1; }
 
-  // To pass options to the driver and optimization passes. These options are
-  // not necessarily for debugging purpose (The function name is misleading).
-  // This function should be called before LTOCodeGenerator::compilexxx(),
-  // and LTOCodeGenerator::writeMergedModules().
-  void setCodeGenDebugOptions(const char *opts);
+  /// Pass options to the driver and optimization passes.
+  ///
+  /// These options are not necessarily for debugging purpose (the function
+  /// name is misleading).  This function should be called before
+  /// LTOCodeGenerator::compilexxx(), and
+  /// LTOCodeGenerator::writeMergedModules().
+  void setCodeGenDebugOptions(const char *Opts);
 
-  // Parse the options set in setCodeGenDebugOptions. Like
-  // setCodeGenDebugOptions, this must be called before
-  // LTOCodeGenerator::compilexxx() and LTOCodeGenerator::writeMergedModules()
+  /// Parse the options set in setCodeGenDebugOptions.
+  ///
+  /// Like \a setCodeGenDebugOptions(), this must be called before
+  /// LTOCodeGenerator::compilexxx() and
+  /// LTOCodeGenerator::writeMergedModules().
   void parseCodeGenDebugOptions();
 
-  // Write the merged module to the file specified by the given path.
-  // Return true on success.
-  bool writeMergedModules(const char *path, std::string &errMsg);
+  /// Write the merged module to the file specified by the given path.  Return
+  /// true on success.
+  bool writeMergedModules(const char *Path);
 
-  // Compile the merged module into a *single* object file; the path to object
-  // file is returned to the caller via argument "name". Return true on
-  // success.
-  //
-  // NOTE that it is up to the linker to remove the intermediate object file.
-  //  Do not try to remove the object file in LTOCodeGenerator's destructor
-  //  as we don't who (LTOCodeGenerator or the obj file) will last longer.
-  bool compile_to_file(const char **name,
-                       bool disableInline,
-                       bool disableGVNLoadPRE,
-                       bool disableVectorization,
-                       std::string &errMsg);
+  /// Compile the merged module into a *single* output file; the path to output
+  /// file is returned to the caller via argument "name". Return true on
+  /// success.
+  ///
+  /// \note It is up to the linker to remove the intermediate output file.  Do
+  /// not try to remove the object file in LTOCodeGenerator's destructor as we
+  /// don't who (LTOCodeGenerator or the output file) will last longer.
+  bool compile_to_file(const char **Name, bool DisableVerify,
+                       bool DisableInline, bool DisableGVNLoadPRE,
+                       bool DisableVectorization);
 
-  // As with compile_to_file(), this function compiles the merged module into
-  // single object file. Instead of returning the object-file-path to the caller
-  // (linker), it brings the object to a buffer, and return the buffer to the
-  // caller. This function should delete intermediate object file once its content
-  // is brought to memory. Return NULL if the compilation was not successful.
-  std::unique_ptr<MemoryBuffer> compile(bool disableInline,
-                                        bool disableGVNLoadPRE,
-                                        bool disableVectorization,
-                                        std::string &errMsg);
+  /// As with compile_to_file(), this function compiles the merged module into
+  /// single output file. Instead of returning the output file path to the
+  /// caller (linker), it brings the output to a buffer, and returns the buffer
+  /// to the caller. This function should delete the intermediate file once
+  /// its content is brought to memory. Return NULL if the compilation was not
+  /// successful.
+  std::unique_ptr<MemoryBuffer> compile(bool DisableVerify, bool DisableInline,
+                                        bool DisableGVNLoadPRE,
+                                        bool DisableVectorization);
 
-  // Optimizes the merged module. Returns true on success.
-  bool optimize(bool disableInline,
-                bool disableGVNLoadPRE,
-                bool disableVectorization,
-                std::string &errMsg);
+  /// Optimizes the merged module.  Returns true on success.
+  bool optimize(bool DisableVerify, bool DisableInline, bool DisableGVNLoadPRE,
+                bool DisableVectorization);
 
-  // Compiles the merged optimized module into a single object file. It brings
-  // the object to a buffer, and returns the buffer to the caller. Return NULL
-  // if the compilation was not successful.
-  std::unique_ptr<MemoryBuffer> compileOptimized(std::string &errMsg);
+  /// Compiles the merged optimized module into a single output file. It brings
+  /// the output to a buffer, and returns the buffer to the caller. Return NULL
+  /// if the compilation was not successful.
+  std::unique_ptr<MemoryBuffer> compileOptimized();
+
+  /// Compile the merged optimized module into out.size() output files each
+  /// representing a linkable partition of the module. If out contains more
+  /// than one element, code generation is done in parallel with out.size()
+  /// threads.  Output files will be written to members of out. Returns true on
+  /// success.
+  bool compileOptimized(ArrayRef<raw_pwrite_stream *> Out);
 
   void setDiagnosticHandler(lto_diagnostic_handler_t, void *);
 
   LLVMContext &getContext() { return Context; }
 
+  void resetMergedModule() { MergedModule.reset(); }
+
 private:
   void initializeLTOPasses();
 
-  bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg);
-  bool compileOptimizedToFile(const char **name, std::string &errMsg);
+  bool compileOptimizedToFile(const char **Name);
   void applyScopeRestrictions();
   void applyRestriction(GlobalValue &GV, ArrayRef<StringRef> Libcalls,
                         std::vector<const char *> &MustPreserveList,
                         SmallPtrSetImpl<GlobalValue *> &AsmUsed,
                         Mangler &Mangler);
-  bool determineTarget(std::string &errMsg);
+  bool determineTarget();
 
   static void DiagnosticHandler(const DiagnosticInfo &DI, void *Context);
 
   void DiagnosticHandler2(const DiagnosticInfo &DI);
 
+  void emitError(const std::string &ErrMsg);
+
   typedef StringMap<uint8_t> StringSet;
 
-  void destroyMergedModule();
-  std::unique_ptr<LLVMContext> OwnedContext;
   LLVMContext &Context;
-  Linker IRLinker;
-  TargetMachine *TargetMach = nullptr;
+  std::unique_ptr<Module> MergedModule;
+  std::unique_ptr<Linker> TheLinker;
+  std::unique_ptr<TargetMachine> TargetMach;
   bool EmitDwarfDebugInfo = false;
   bool ScopeRestrictionsDone = false;
-  lto_codegen_model CodeModel = LTO_CODEGEN_PIC_MODEL_DEFAULT;
+  Reloc::Model RelocModel = Reloc::Default;
   StringSet MustPreserveSymbols;
   StringSet AsmUndefinedRefs;
-  std::vector<char *> CodegenOptions;
+  std::vector<std::string> CodegenOptions;
+  std::string FeatureStr;
   std::string MCpu;
   std::string MAttr;
   std::string NativeObjectPath;
   TargetOptions Options;
+  CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
   unsigned OptLevel = 2;
   lto_diagnostic_handler_t DiagHandler = nullptr;
   void *DiagContext = nullptr;
-  LTOModule *OwnedModule = nullptr;
   bool ShouldInternalize = true;
   bool ShouldEmbedUselists = false;
+  TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
 };
 }
 #endif
diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h
index c4e2be627399..97b5865bd47f 100644
--- a/include/llvm/LTO/LTOModule.h
+++ b/include/llvm/LTO/LTOModule.h
@@ -74,6 +74,11 @@ public:
   static bool isBitcodeForTarget(MemoryBuffer *memBuffer,
                                  StringRef triplePrefix);
 
+  /// Returns a string representing the producer identification stored in the
+  /// bitcode, or "" if the bitcode does not contains any.
+  ///
+  static std::string getProducerString(MemoryBuffer *Buffer);
+
   /// Create a MemoryBuffer from a memory range with an optional name.
   static std::unique_ptr<MemoryBuffer>
   makeBuffer(const void *mem, size_t length, StringRef name = "");
@@ -86,25 +91,24 @@ public:
   /// InitializeAllTargetMCs();
   /// InitializeAllAsmPrinters();
   /// InitializeAllAsmParsers();
-  static LTOModule *createFromFile(const char *path, TargetOptions options,
-                                   std::string &errMsg);
-  static LTOModule *createFromOpenFile(int fd, const char *path, size_t size,
-                                       TargetOptions options,
-                                       std::string &errMsg);
-  static LTOModule *createFromOpenFileSlice(int fd, const char *path,
-                                            size_t map_size, off_t offset,
-                                            TargetOptions options,
-                                            std::string &errMsg);
-  static LTOModule *createFromBuffer(const void *mem, size_t length,
-                                     TargetOptions options, std::string &errMsg,
-                                     StringRef path = "");
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createFromFile(LLVMContext &Context, const char *path, TargetOptions options);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createFromOpenFile(LLVMContext &Context, int fd, const char *path,
+                     size_t size, TargetOptions options);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path,
+                          size_t map_size, off_t offset, TargetOptions options);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createFromBuffer(LLVMContext &Context, const void *mem, size_t length,
+                   TargetOptions options, StringRef path = "");
 
-  static LTOModule *createInLocalContext(const void *mem, size_t length,
-                                         TargetOptions options,
-                                         std::string &errMsg, StringRef path);
-  static LTOModule *createInContext(const void *mem, size_t length,
-                                    TargetOptions options, std::string &errMsg,
-                                    StringRef path, LLVMContext *Context);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createInLocalContext(const void *mem, size_t length, TargetOptions options,
+                       StringRef path);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  createInContext(const void *mem, size_t length, TargetOptions options,
+                  StringRef path, LLVMContext *Context);
 
   const Module &getModule() const {
     return const_cast<LTOModule*>(this)->getModule();
@@ -113,6 +117,8 @@ public:
     return IRFile->getModule();
   }
 
+  std::unique_ptr<Module> takeModule() { return IRFile->takeModule(); }
+
   /// Return the Module's target triple.
   const std::string &getTargetTriple() {
     return getModule().getTargetTriple();
@@ -163,7 +169,7 @@ private:
 
   /// Parse the symbols from the module and model-level ASM and add them to
   /// either the defined or undefined lists.
-  bool parseSymbols(std::string &errMsg);
+  void parseSymbols();
 
   /// Add a symbol which isn't defined just yet to a list to be resolved later.
   void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
@@ -200,8 +206,9 @@ private:
   bool objcClassNameFromExpression(const Constant *c, std::string &name);
 
   /// Create an LTOModule (private version).
-  static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
-                                  std::string &errMsg, LLVMContext *Context);
+  static ErrorOr<std::unique_ptr<LTOModule>>
+  makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
+                LLVMContext *Context);
 };
 }
 #endif
diff --git a/include/llvm/LibDriver/LibDriver.h b/include/llvm/LibDriver/LibDriver.h
index aaaa7b7d21c3..09495650c1b9 100644
--- a/include/llvm/LibDriver/LibDriver.h
+++ b/include/llvm/LibDriver/LibDriver.h
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Defines an interface to a lib.exe-compatible driver that also understands
-// bitcode files. Used by llvm-lib and lld-link2 /lib.
+// bitcode files. Used by llvm-lib and lld-link /lib.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index cea5530db3b8..29fcd93a2a1c 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -17,8 +17,11 @@
 
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
 #include "llvm/Analysis/CallPrinter.h"
 #include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/IntervalPartition.h"
 #include "llvm/Analysis/Lint.h"
 #include "llvm/Analysis/Passes.h"
@@ -26,6 +29,9 @@
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/Analysis/RegionPrinter.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
@@ -52,21 +58,18 @@ namespace {
       (void) llvm::createAAEvalPass();
       (void) llvm::createAggressiveDCEPass();
       (void) llvm::createBitTrackingDCEPass();
-      (void) llvm::createAliasAnalysisCounterPass();
-      (void) llvm::createAliasDebugger();
       (void) llvm::createArgumentPromotionPass();
       (void) llvm::createAlignmentFromAssumptionsPass();
-      (void) llvm::createBasicAliasAnalysisPass();
-      (void) llvm::createLibCallAliasAnalysisPass(nullptr);
-      (void) llvm::createScalarEvolutionAliasAnalysisPass();
-      (void) llvm::createTypeBasedAliasAnalysisPass();
-      (void) llvm::createScopedNoAliasAAPass();
+      (void) llvm::createBasicAAWrapperPass();
+      (void) llvm::createSCEVAAWrapperPass();
+      (void) llvm::createTypeBasedAAWrapperPass();
+      (void) llvm::createScopedNoAliasAAWrapperPass();
       (void) llvm::createBoundsCheckingPass();
       (void) llvm::createBreakCriticalEdgesPass();
       (void) llvm::createCallGraphPrinterPass();
       (void) llvm::createCallGraphViewerPass();
       (void) llvm::createCFGSimplificationPass();
-      (void) llvm::createCFLAliasAnalysisPass();
+      (void) llvm::createCFLAAWrapperPass();
       (void) llvm::createStructurizeCFGPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
@@ -82,12 +85,15 @@ namespace {
       (void) llvm::createDomOnlyViewerPass();
       (void) llvm::createDomViewerPass();
       (void) llvm::createGCOVProfilerPass();
+      (void) llvm::createPGOInstrumentationGenPass();
+      (void) llvm::createPGOInstrumentationUsePass();
       (void) llvm::createInstrProfilingPass();
+      (void) llvm::createFunctionImportPass();
       (void) llvm::createFunctionInliningPass();
       (void) llvm::createAlwaysInlinerPass();
       (void) llvm::createGlobalDCEPass();
       (void) llvm::createGlobalOptimizerPass();
-      (void) llvm::createGlobalsModRefPass();
+      (void) llvm::createGlobalsAAWrapperPass();
       (void) llvm::createIPConstantPropagationPass();
       (void) llvm::createIPSCCPPass();
       (void) llvm::createInductiveRangeCheckEliminationPass();
@@ -110,8 +116,7 @@ namespace {
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSwitchPass();
       (void) llvm::createNaryReassociatePass();
-      (void) llvm::createNoAAPass();
-      (void) llvm::createObjCARCAliasAnalysisPass();
+      (void) llvm::createObjCARCAAWrapperPass();
       (void) llvm::createObjCARCAPElimPass();
       (void) llvm::createObjCARCExpandPass();
       (void) llvm::createObjCARCContractPass();
@@ -179,7 +184,7 @@ namespace {
       (void) llvm::createEliminateAvailableExternallyPass();
 
       (void)new llvm::IntervalPartition();
-      (void)new llvm::ScalarEvolution();
+      (void)new llvm::ScalarEvolutionWrapperPass();
       ((llvm::Function*)nullptr)->viewCFGOnly();
       llvm::RGPassManager RGM;
       ((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM);
diff --git a/include/llvm/Linker/IRMover.h b/include/llvm/Linker/IRMover.h
new file mode 100644
index 000000000000..a964cc4b72c5
--- /dev/null
+++ b/include/llvm/Linker/IRMover.h
@@ -0,0 +1,76 @@
+//===- IRMover.h ------------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKER_IRMOVER_H
+#define LLVM_LINKER_IRMOVER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseSet.h"
+#include <functional>
+
+namespace llvm {
+class GlobalValue;
+class MDNode;
+class Module;
+class StructType;
+class Type;
+
+class IRMover {
+  struct StructTypeKeyInfo {
+    struct KeyTy {
+      ArrayRef<Type *> ETypes;
+      bool IsPacked;
+      KeyTy(ArrayRef<Type *> E, bool P);
+      KeyTy(const StructType *ST);
+      bool operator==(const KeyTy &that) const;
+      bool operator!=(const KeyTy &that) const;
+    };
+    static StructType *getEmptyKey();
+    static StructType *getTombstoneKey();
+    static unsigned getHashValue(const KeyTy &Key);
+    static unsigned getHashValue(const StructType *ST);
+    static bool isEqual(const KeyTy &LHS, const StructType *RHS);
+    static bool isEqual(const StructType *LHS, const StructType *RHS);
+  };
+
+public:
+  class IdentifiedStructTypeSet {
+    // The set of opaque types is the composite module.
+    DenseSet<StructType *> OpaqueStructTypes;
+
+    // The set of identified but non opaque structures in the composite module.
+    DenseSet<StructType *, StructTypeKeyInfo> NonOpaqueStructTypes;
+
+  public:
+    void addNonOpaque(StructType *Ty);
+    void switchToNonOpaque(StructType *Ty);
+    void addOpaque(StructType *Ty);
+    StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
+    bool hasType(StructType *Ty);
+  };
+
+  IRMover(Module &M);
+
+  typedef std::function<void(GlobalValue &)> ValueAdder;
+  /// Move in the provide values. The source is destroyed.
+  /// Returns true on error.
+  bool move(Module &Src, ArrayRef<GlobalValue *> ValuesToLink,
+            std::function<void(GlobalValue &GV, ValueAdder Add)> AddLazyFor,
+            DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr,
+            bool IsMetadataLinkingPostpass = false);
+  Module &getModule() { return Composite; }
+
+private:
+  Module &Composite;
+  IdentifiedStructTypeSet IdentifiedStructTypes;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h
index c43b90e9cd26..dde3f73883ca 100644
--- a/include/llvm/Linker/Linker.h
+++ b/include/llvm/Linker/Linker.h
@@ -10,10 +10,8 @@
 #ifndef LLVM_LINKER_LINKER_H
 #define LLVM_LINKER_LINKER_H
 
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/Linker/IRMover.h"
 
 namespace llvm {
 class Module;
@@ -25,71 +23,55 @@ class Type;
 /// module since it is assumed that the user of this class will want to do
 /// something with it after the linking.
 class Linker {
+  IRMover Mover;
+
 public:
-  struct StructTypeKeyInfo {
-    struct KeyTy {
-      ArrayRef<Type *> ETypes;
-      bool IsPacked;
-      KeyTy(ArrayRef<Type *> E, bool P);
-      KeyTy(const StructType *ST);
-      bool operator==(const KeyTy &that) const;
-      bool operator!=(const KeyTy &that) const;
-    };
-    static StructType *getEmptyKey();
-    static StructType *getTombstoneKey();
-    static unsigned getHashValue(const KeyTy &Key);
-    static unsigned getHashValue(const StructType *ST);
-    static bool isEqual(const KeyTy &LHS, const StructType *RHS);
-    static bool isEqual(const StructType *LHS, const StructType *RHS);
+  enum Flags {
+    None = 0,
+    OverrideFromSrc = (1 << 0),
+    LinkOnlyNeeded = (1 << 1),
+    InternalizeLinkedSymbols = (1 << 2)
   };
 
-  typedef DenseSet<StructType *, StructTypeKeyInfo> NonOpaqueStructTypeSet;
-  typedef DenseSet<StructType *> OpaqueStructTypeSet;
+  Linker(Module &M);
 
-  struct IdentifiedStructTypeSet {
-    // The set of opaque types is the composite module.
-    OpaqueStructTypeSet OpaqueStructTypes;
-
-    // The set of identified but non opaque structures in the composite module.
-    NonOpaqueStructTypeSet NonOpaqueStructTypes;
-
-    void addNonOpaque(StructType *Ty);
-    void switchToNonOpaque(StructType *Ty);
-    void addOpaque(StructType *Ty);
-    StructType *findNonOpaque(ArrayRef<Type *> ETypes, bool IsPacked);
-    bool hasType(StructType *Ty);
-  };
-
-  Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
-  Linker(Module *M);
-  ~Linker();
-
-  Module *getModule() const { return Composite; }
-  void deleteModule();
-
-  /// \brief Link \p Src into the composite. The source is destroyed.
+  /// \brief Link \p Src into the composite.
+  ///
   /// Passing OverrideSymbols as true will have symbols from Src
   /// shadow those in the Dest.
+  /// For ThinLTO function importing/exporting the \p FunctionInfoIndex
+  /// is passed. If \p FunctionsToImport is provided, only the functions that
+  /// are part of the set will be imported from the source module.
+  /// The \p ValIDToTempMDMap is populated by the linker when function
+  /// importing is performed.
+  ///
   /// Returns true on error.
-  bool linkInModule(Module *Src, bool OverrideSymbols = false);
+  bool linkInModule(std::unique_ptr<Module> Src, unsigned Flags = Flags::None,
+                    const FunctionInfoIndex *Index = nullptr,
+                    DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+                    DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr);
 
-  /// \brief Set the composite to the passed-in module.
-  void setModule(Module *Dst);
+  /// This exists to implement the deprecated LLVMLinkModules C api. Don't use
+  /// for anything else.
+  bool linkInModuleForCAPI(Module &Src);
 
-  static bool LinkModules(Module *Dest, Module *Src,
-                          DiagnosticHandlerFunction DiagnosticHandler);
+  static bool linkModules(Module &Dest, std::unique_ptr<Module> Src,
+                          unsigned Flags = Flags::None);
 
-  static bool LinkModules(Module *Dest, Module *Src);
-
-private:
-  void init(Module *M, DiagnosticHandlerFunction DiagnosticHandler);
-  Module *Composite;
-
-  IdentifiedStructTypeSet IdentifiedStructTypes;
-
-  DiagnosticHandlerFunction DiagnosticHandler;
+  /// \brief Link metadata from \p Src into the composite. The source is
+  /// destroyed.
+  ///
+  /// The \p ValIDToTempMDMap sound have been populated earlier during function
+  /// importing from \p Src.
+  bool linkInMetadata(Module &Src,
+                      DenseMap<unsigned, MDNode *> *ValIDToTempMDMap);
 };
 
+/// Create a new module with exported local functions renamed and promoted
+/// for ThinLTO.
+std::unique_ptr<Module> renameModuleForThinLTO(std::unique_ptr<Module> M,
+                                               const FunctionInfoIndex *Index);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/MC/ConstantPools.h b/include/llvm/MC/ConstantPools.h
index 9aa4663ba0fc..552e1443e7d0 100644
--- a/include/llvm/MC/ConstantPools.h
+++ b/include/llvm/MC/ConstantPools.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/SMLoc.h"
 
 namespace llvm {
 class MCContext;
@@ -26,11 +27,12 @@ class MCStreamer;
 class MCSymbol;
 
 struct ConstantPoolEntry {
-  ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz)
-    : Label(L), Value(Val), Size(Sz) {}
+  ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz, SMLoc Loc_)
+    : Label(L), Value(Val), Size(Sz), Loc(Loc_) {}
   MCSymbol *Label;
   const MCExpr *Value;
   unsigned Size;
+  SMLoc Loc;
 };
 
 // A class to keep track of assembler-generated constant pools that are use to
@@ -49,7 +51,7 @@ public:
   //
   // \returns a MCExpr that references the newly inserted value
   const MCExpr *addEntry(const MCExpr *Value, MCContext &Context,
-                         unsigned Size);
+                         unsigned Size, SMLoc Loc);
 
   // Emit the contents of the constant pool using the provided streamer.
   void emitEntries(MCStreamer &Streamer);
@@ -80,7 +82,7 @@ public:
   void emitAll(MCStreamer &Streamer);
   void emitForCurrentSection(MCStreamer &Streamer);
   const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr,
-                         unsigned Size);
+                         unsigned Size, SMLoc Loc);
 
 private:
   ConstantPool *getConstantPool(MCSection *Section);
diff --git a/include/llvm/MC/MCAsmBackend.h b/include/llvm/MC/MCAsmBackend.h
index 2bfad2d355b8..51312ff80447 100644
--- a/include/llvm/MC/MCAsmBackend.h
+++ b/include/llvm/MC/MCAsmBackend.h
@@ -67,6 +67,11 @@ public:
   /// Get the number of target specific fixup kinds.
   virtual unsigned getNumFixupKinds() const = 0;
 
+  /// Map a relocation name used in .reloc to a fixup kind.
+  /// Returns true and sets MappedKind if Name is successfully mapped.
+  /// Otherwise returns false and leaves MappedKind unchanged.
+  virtual bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const;
+
   /// Get information on a fixup kind.
   virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
 
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 9bb0fa63c523..384584ef4ef0 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -414,6 +414,15 @@ public:
   /// syntactically correct.
   virtual bool isValidUnquotedName(StringRef Name) const;
 
+  /// Return true if the .section directive should be omitted when
+  /// emitting \p SectionName.  For example:
+  ///
+  /// shouldOmitSectionDirective(".text")
+  ///
+  /// returns false => .section .text,#alloc,#execinstr
+  /// returns true  => .text
+  virtual bool shouldOmitSectionDirective(StringRef SectionName) const;
+
   bool usesSunStyleELFSectionSwitchSyntax() const {
     return SunStyleELFSectionSwitchSyntax;
   }
diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h
index 0642af837e7e..c0bd12875839 100644
--- a/include/llvm/MC/MCAssembler.h
+++ b/include/llvm/MC/MCAssembler.h
@@ -10,23 +10,18 @@
 #ifndef LLVM_MC_MCASSEMBLER_H
 #define LLVM_MC_MCASSEMBLER_H
 
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFragment.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
-#include <algorithm>
-#include <vector> // FIXME: Shouldn't be needed.
+#include "llvm/MC/MCSymbol.h"
 
 namespace llvm {
 class raw_ostream;
@@ -42,476 +37,6 @@ class MCSubtargetInfo;
 class MCValue;
 class MCAsmBackend;
 
-class MCFragment : public ilist_node<MCFragment> {
-  friend class MCAsmLayout;
-
-  MCFragment(const MCFragment &) = delete;
-  void operator=(const MCFragment &) = delete;
-
-public:
-  enum FragmentType : uint8_t {
-    FT_Align,
-    FT_Data,
-    FT_CompactEncodedInst,
-    FT_Fill,
-    FT_Relaxable,
-    FT_Org,
-    FT_Dwarf,
-    FT_DwarfFrame,
-    FT_LEB,
-    FT_SafeSEH
-  };
-
-private:
-  FragmentType Kind;
-
-protected:
-  bool HasInstructions;
-
-private:
-  /// \brief Should this fragment be aligned to the end of a bundle?
-  bool AlignToBundleEnd;
-
-  uint8_t BundlePadding;
-
-  /// LayoutOrder - The layout order of this fragment.
-  unsigned LayoutOrder;
-
-  /// The data for the section this fragment is in.
-  MCSection *Parent;
-
-  /// Atom - The atom this fragment is in, as represented by it's defining
-  /// symbol.
-  const MCSymbol *Atom;
-
-  /// \name Assembler Backend Data
-  /// @{
-  //
-  // FIXME: This could all be kept private to the assembler implementation.
-
-  /// Offset - The offset of this fragment in its section. This is ~0 until
-  /// initialized.
-  uint64_t Offset;
-
-  /// @}
-
-protected:
-  MCFragment(FragmentType Kind, bool HasInstructions,
-             uint8_t BundlePadding, MCSection *Parent = nullptr);
-
-  ~MCFragment();
-private:
-
-  // This is a friend so that the sentinal can be created.
-  friend struct ilist_sentinel_traits<MCFragment>;
-  MCFragment();
-
-public:
-  /// Destroys the current fragment.
-  ///
-  /// This must be used instead of delete as MCFragment is non-virtual.
-  /// This method will dispatch to the appropriate subclass.
-  void destroy();
-
-  FragmentType getKind() const { return Kind; }
-
-  MCSection *getParent() const { return Parent; }
-  void setParent(MCSection *Value) { Parent = Value; }
-
-  const MCSymbol *getAtom() const { return Atom; }
-  void setAtom(const MCSymbol *Value) { Atom = Value; }
-
-  unsigned getLayoutOrder() const { return LayoutOrder; }
-  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
-
-  /// \brief Does this fragment have instructions emitted into it? By default
-  /// this is false, but specific fragment types may set it to true.
-  bool hasInstructions() const { return HasInstructions; }
-
-  /// \brief Should this fragment be placed at the end of an aligned bundle?
-  bool alignToBundleEnd() const { return AlignToBundleEnd; }
-  void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
-
-  /// \brief Get the padding size that must be inserted before this fragment.
-  /// Used for bundling. By default, no padding is inserted.
-  /// Note that padding size is restricted to 8 bits. This is an optimization
-  /// to reduce the amount of space used for each fragment. In practice, larger
-  /// padding should never be required.
-  uint8_t getBundlePadding() const { return BundlePadding; }
-
-  /// \brief Set the padding size for this fragment. By default it's a no-op,
-  /// and only some fragments have a meaningful implementation.
-  void setBundlePadding(uint8_t N) { BundlePadding = N; }
-
-  void dump();
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-///
-class MCEncodedFragment : public MCFragment {
-protected:
-  MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions,
-                    MCSection *Sec)
-      : MCFragment(FType, HasInstructions, 0, Sec) {}
-
-public:
-  static bool classof(const MCFragment *F) {
-    MCFragment::FragmentType Kind = F->getKind();
-    switch (Kind) {
-    default:
-      return false;
-    case MCFragment::FT_Relaxable:
-    case MCFragment::FT_CompactEncodedInst:
-    case MCFragment::FT_Data:
-      return true;
-    }
-  }
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data.
-///
-template<unsigned ContentsSize>
-class MCEncodedFragmentWithContents : public MCEncodedFragment {
-  SmallVector<char, ContentsSize> Contents;
-
-protected:
-  MCEncodedFragmentWithContents(MCFragment::FragmentType FType,
-                                bool HasInstructions,
-                                MCSection *Sec)
-      : MCEncodedFragment(FType, HasInstructions, Sec) {}
-
-public:
-  SmallVectorImpl<char> &getContents() { return Contents; }
-  const SmallVectorImpl<char> &getContents() const { return Contents; }
-};
-
-/// Interface implemented by fragments that contain encoded instructions and/or
-/// data and also have fixups registered.
-///
-template<unsigned ContentsSize, unsigned FixupsSize>
-class MCEncodedFragmentWithFixups :
-  public MCEncodedFragmentWithContents<ContentsSize> {
-
-  /// Fixups - The list of fixups in this fragment.
-  SmallVector<MCFixup, FixupsSize> Fixups;
-
-protected:
-  MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
-                              bool HasInstructions,
-                              MCSection *Sec)
-      : MCEncodedFragmentWithContents<ContentsSize>(FType, HasInstructions,
-                                                    Sec) {}
-
-public:
-  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
-  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
-
-  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
-  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
-
-  fixup_iterator fixup_begin() { return Fixups.begin(); }
-  const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
-
-  fixup_iterator fixup_end() { return Fixups.end(); }
-  const_fixup_iterator fixup_end() const { return Fixups.end(); }
-
-  static bool classof(const MCFragment *F) {
-    MCFragment::FragmentType Kind = F->getKind();
-    return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
-  }
-};
-
-/// Fragment for data and encoded instructions.
-///
-class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
-public:
-  MCDataFragment(MCSection *Sec = nullptr)
-      : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {}
-
-  void setHasInstructions(bool V) { HasInstructions = V; }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Data;
-  }
-};
-
-/// This is a compact (memory-size-wise) fragment for holding an encoded
-/// instruction (non-relaxable) that has no fixups registered. When applicable,
-/// it can be used instead of MCDataFragment and lead to lower memory
-/// consumption.
-///
-class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> {
-public:
-  MCCompactEncodedInstFragment(MCSection *Sec = nullptr)
-      : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) {
-  }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_CompactEncodedInst;
-  }
-};
-
-/// A relaxable fragment holds on to its MCInst, since it may need to be
-/// relaxed during the assembler layout and relaxation stage.
-///
-class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
-
-  /// Inst - The instruction this is a fragment for.
-  MCInst Inst;
-
-  /// STI - The MCSubtargetInfo in effect when the instruction was encoded.
-  /// Keep a copy instead of a reference to make sure that updates to STI
-  /// in the assembler are not seen here.
-  const MCSubtargetInfo STI;
-
-public:
-  MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI,
-                      MCSection *Sec = nullptr)
-      : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec),
-        Inst(Inst), STI(STI) {}
-
-  const MCInst &getInst() const { return Inst; }
-  void setInst(const MCInst &Value) { Inst = Value; }
-
-  const MCSubtargetInfo &getSubtargetInfo() { return STI; }
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Relaxable;
-  }
-};
-
-class MCAlignFragment : public MCFragment {
-
-  /// Alignment - The alignment to ensure, in bytes.
-  unsigned Alignment;
-
-  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
-  /// of using the provided value. The exact interpretation of this flag is
-  /// target dependent.
-  bool EmitNops : 1;
-
-  /// Value - Value to use for filling padding bytes.
-  int64_t Value;
-
-  /// ValueSize - The size of the integer (in bytes) of \p Value.
-  unsigned ValueSize;
-
-  /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
-  /// cannot be satisfied in this width then this fragment is ignored.
-  unsigned MaxBytesToEmit;
-
-public:
-  MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
-                  unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
-      : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment),
-        EmitNops(false), Value(Value),
-        ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
-
-  /// \name Accessors
-  /// @{
-
-  unsigned getAlignment() const { return Alignment; }
-
-  int64_t getValue() const { return Value; }
-
-  unsigned getValueSize() const { return ValueSize; }
-
-  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
-
-  bool hasEmitNops() const { return EmitNops; }
-  void setEmitNops(bool Value) { EmitNops = Value; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Align;
-  }
-};
-
-class MCFillFragment : public MCFragment {
-
-  /// Value - Value to use for filling bytes.
-  int64_t Value;
-
-  /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if
-  /// this is a virtual fill fragment.
-  unsigned ValueSize;
-
-  /// Size - The number of bytes to insert.
-  uint64_t Size;
-
-public:
-  MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size,
-                 MCSection *Sec = nullptr)
-      : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize),
-        Size(Size) {
-    assert((!ValueSize || (Size % ValueSize) == 0) &&
-           "Fill size must be a multiple of the value size!");
-  }
-
-  /// \name Accessors
-  /// @{
-
-  int64_t getValue() const { return Value; }
-
-  unsigned getValueSize() const { return ValueSize; }
-
-  uint64_t getSize() const { return Size; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Fill;
-  }
-};
-
-class MCOrgFragment : public MCFragment {
-
-  /// Offset - The offset this fragment should start at.
-  const MCExpr *Offset;
-
-  /// Value - Value to use for filling bytes.
-  int8_t Value;
-
-public:
-  MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
-      : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
-
-  /// \name Accessors
-  /// @{
-
-  const MCExpr &getOffset() const { return *Offset; }
-
-  uint8_t getValue() const { return Value; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Org;
-  }
-};
-
-class MCLEBFragment : public MCFragment {
-
-  /// Value - The value this fragment should contain.
-  const MCExpr *Value;
-
-  /// IsSigned - True if this is a sleb128, false if uleb128.
-  bool IsSigned;
-
-  SmallString<8> Contents;
-
-public:
-  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
-      : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) {
-    Contents.push_back(0);
-  }
-
-  /// \name Accessors
-  /// @{
-
-  const MCExpr &getValue() const { return *Value; }
-
-  bool isSigned() const { return IsSigned; }
-
-  SmallString<8> &getContents() { return Contents; }
-  const SmallString<8> &getContents() const { return Contents; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_LEB;
-  }
-};
-
-class MCDwarfLineAddrFragment : public MCFragment {
-
-  /// LineDelta - the value of the difference between the two line numbers
-  /// between two .loc dwarf directives.
-  int64_t LineDelta;
-
-  /// AddrDelta - The expression for the difference of the two symbols that
-  /// make up the address delta between two .loc dwarf directives.
-  const MCExpr *AddrDelta;
-
-  SmallString<8> Contents;
-
-public:
-  MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
-                          MCSection *Sec = nullptr)
-      : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta),
-        AddrDelta(&AddrDelta) {
-    Contents.push_back(0);
-  }
-
-  /// \name Accessors
-  /// @{
-
-  int64_t getLineDelta() const { return LineDelta; }
-
-  const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
-  SmallString<8> &getContents() { return Contents; }
-  const SmallString<8> &getContents() const { return Contents; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_Dwarf;
-  }
-};
-
-class MCDwarfCallFrameFragment : public MCFragment {
-
-  /// AddrDelta - The expression for the difference of the two symbols that
-  /// make up the address delta between two .cfi_* dwarf directives.
-  const MCExpr *AddrDelta;
-
-  SmallString<8> Contents;
-
-public:
-  MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
-      : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) {
-    Contents.push_back(0);
-  }
-
-  /// \name Accessors
-  /// @{
-
-  const MCExpr &getAddrDelta() const { return *AddrDelta; }
-
-  SmallString<8> &getContents() { return Contents; }
-  const SmallString<8> &getContents() const { return Contents; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_DwarfFrame;
-  }
-};
-
-class MCSafeSEHFragment : public MCFragment {
-  const MCSymbol *Sym;
-
-public:
-  MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
-      : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {}
-
-  /// \name Accessors
-  /// @{
-
-  const MCSymbol *getSymbol() { return Sym; }
-  const MCSymbol *getSymbol() const { return Sym; }
-
-  /// @}
-
-  static bool classof(const MCFragment *F) {
-    return F->getKind() == MCFragment::FT_SafeSEH;
-  }
-};
-
 // FIXME: This really doesn't belong here. See comments below.
 struct IndirectSymbolData {
   MCSymbol *Symbol;
@@ -575,8 +100,6 @@ private:
 
   MCObjectWriter &Writer;
 
-  raw_ostream &OS;
-
   SectionListType Sections;
 
   SymbolDataListType Symbols;
@@ -591,6 +114,8 @@ private:
   /// List of declared file names
   std::vector<std::string> FileNames;
 
+  MCDwarfLineTableParams LTParams;
+
   /// The set of function symbols for which a .thumb_func directive has
   /// been seen.
   //
@@ -607,6 +132,7 @@ private:
 
   unsigned RelaxAll : 1;
   unsigned SubsectionsViaSymbols : 1;
+  unsigned IncrementalLinkerCompatible : 1;
 
   /// ELF specific e_header flags
   // It would be good if there were an MCELFAssembler class to hold this.
@@ -712,16 +238,13 @@ public:
 
 public:
   /// Construct a new assembler instance.
-  ///
-  /// \param OS The stream to output to.
   //
   // FIXME: How are we going to parameterize this? Two obvious options are stay
   // concrete and require clients to pass in a target like object. The other
   // option is to make this abstract, and have targets provide concrete
   // implementations as we do with AsmParser.
   MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
-              MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
-              raw_ostream &OS);
+              MCCodeEmitter &Emitter_, MCObjectWriter &Writer_);
   ~MCAssembler();
 
   /// Reuse an assembler instance
@@ -736,15 +259,28 @@ public:
 
   MCObjectWriter &getWriter() const { return Writer; }
 
+  MCDwarfLineTableParams getDWARFLinetableParams() const { return LTParams; }
+  void setDWARFLinetableParams(MCDwarfLineTableParams P) { LTParams = P; }
+
   /// Finish - Do final processing and write the object to the output stream.
   /// \p Writer is used for custom object writer (as the MCJIT does),
   /// if not specified it is automatically created from backend.
   void Finish();
 
+  // Layout all section and prepare them for emission.
+  void layout(MCAsmLayout &Layout);
+
   // FIXME: This does not belong here.
   bool getSubsectionsViaSymbols() const { return SubsectionsViaSymbols; }
   void setSubsectionsViaSymbols(bool Value) { SubsectionsViaSymbols = Value; }
 
+  bool isIncrementalLinkerCompatible() const {
+    return IncrementalLinkerCompatible;
+  }
+  void setIncrementalLinkerCompatible(bool Value) {
+    IncrementalLinkerCompatible = Value;
+  }
+
   bool getRelaxAll() const { return RelaxAll; }
   void setRelaxAll(bool Value) { RelaxAll = Value; }
 
@@ -856,13 +392,7 @@ public:
   /// \name Backend Data Access
   /// @{
 
-  bool registerSection(MCSection &Section) {
-    if (Section.isRegistered())
-      return false;
-    Sections.push_back(&Section);
-    Section.setIsRegistered(true);
-    return true;
-  }
+  bool registerSection(MCSection &Section);
 
   void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr);
 
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 41169e9a12a0..e5a9afd9968c 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -71,6 +72,10 @@ namespace llvm {
     /// objects.
     BumpPtrAllocator Allocator;
 
+    SpecificBumpPtrAllocator<MCSectionCOFF> COFFAllocator;
+    SpecificBumpPtrAllocator<MCSectionELF> ELFAllocator;
+    SpecificBumpPtrAllocator<MCSectionMachO> MachOAllocator;
+
     /// Bindings of names to symbols.
     SymbolTable Symbols;
 
@@ -108,7 +113,7 @@ namespace llvm {
     /// directive is used or it is an error.
     char *SecureLogFile;
     /// The stream that gets written to for the .secure_log_unique directive.
-    raw_ostream *SecureLog;
+    std::unique_ptr<raw_fd_ostream> SecureLog;
     /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to
     /// catch errors if .secure_log_unique appears twice without
     /// .secure_log_reset appearing between them.
@@ -203,9 +208,13 @@ namespace llvm {
     std::map<COFFSectionKey, MCSectionCOFF *> COFFUniquingMap;
     StringMap<bool> ELFRelSecNames;
 
+    SpecificBumpPtrAllocator<MCSubtargetInfo> MCSubtargetAllocator;
+
     /// Do automatic reset in destructor
     bool AutoReset;
 
+    bool HadError;
+
     MCSymbol *createSymbolImpl(const StringMapEntry<bool> *Name,
                                bool CanBeUnnamed);
     MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix,
@@ -376,6 +385,9 @@ namespace llvm {
     MCSectionCOFF *getAssociativeCOFFSection(MCSectionCOFF *Sec,
                                              const MCSymbol *KeySym);
 
+    // Create and save a copy of STI and return a reference to the copy.
+    MCSubtargetInfo &getSubtargetCopy(const MCSubtargetInfo &STI);
+
     /// @}
 
     /// \name Dwarf Management
@@ -494,9 +506,11 @@ namespace llvm {
     /// @}
 
     char *getSecureLogFile() { return SecureLogFile; }
-    raw_ostream *getSecureLog() { return SecureLog; }
+    raw_fd_ostream *getSecureLog() { return SecureLog.get(); }
     bool getSecureLogUsed() { return SecureLogUsed; }
-    void setSecureLog(raw_ostream *Value) { SecureLog = Value; }
+    void setSecureLog(std::unique_ptr<raw_fd_ostream> Value) {
+      SecureLog = std::move(Value);
+    }
     void setSecureLogUsed(bool Value) { SecureLogUsed = Value; }
 
     void *allocate(unsigned Size, unsigned Align = 8) {
@@ -504,11 +518,13 @@ namespace llvm {
     }
     void deallocate(void *Ptr) {}
 
+    bool hadError() { return HadError; }
+    void reportError(SMLoc L, const Twine &Msg);
     // Unrecoverable error has occurred. Display the best diagnostic we can
     // and bail via exit(1). For now, most MC backend errors are unrecoverable.
     // FIXME: We should really do something about that.
     LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L,
-                                                  const Twine &Msg) const;
+                                                  const Twine &Msg);
   };
 
 } // end namespace llvm
@@ -538,7 +554,7 @@ namespace llvm {
 ///                  allocator supports it).
 /// \return The allocated memory. Could be NULL.
 inline void *operator new(size_t Bytes, llvm::MCContext &C,
-                          size_t Alignment = 8) throw() {
+                          size_t Alignment = 8) LLVM_NOEXCEPT {
   return C.allocate(Bytes, Alignment);
 }
 /// \brief Placement delete companion to the new above.
@@ -547,8 +563,8 @@ inline void *operator new(size_t Bytes, llvm::MCContext &C,
 /// invoking it directly; see the new operator for more details. This operator
 /// is called implicitly by the compiler if a placement new expression using
 /// the MCContext throws in the object constructor.
-inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
-              throw () {
+inline void operator delete(void *Ptr, llvm::MCContext &C,
+                            size_t) LLVM_NOEXCEPT {
   C.deallocate(Ptr);
 }
 
@@ -571,8 +587,8 @@ inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
 /// \param Alignment The alignment of the allocated memory (if the underlying
 ///                  allocator supports it).
 /// \return The allocated memory. Could be NULL.
-inline void *operator new[](size_t Bytes, llvm::MCContext& C,
-                            size_t Alignment = 8) throw() {
+inline void *operator new[](size_t Bytes, llvm::MCContext &C,
+                            size_t Alignment = 8) LLVM_NOEXCEPT {
   return C.allocate(Bytes, Alignment);
 }
 
@@ -582,7 +598,7 @@ inline void *operator new[](size_t Bytes, llvm::MCContext& C,
 /// invoking it directly; see the new[] operator for more details. This operator
 /// is called implicitly by the compiler if a placement new[] expression using
 /// the MCContext throws in the object constructor.
-inline void operator delete[](void *Ptr, llvm::MCContext &C) throw () {
+inline void operator delete[](void *Ptr, llvm::MCContext &C) LLVM_NOEXCEPT {
   C.deallocate(Ptr);
 }
 
diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h
index f9d66e0b15d7..326b2a1ac061 100644
--- a/include/llvm/MC/MCDirectives.h
+++ b/include/llvm/MC/MCDirectives.h
@@ -62,7 +62,9 @@ enum MCDataRegionType {
 
 enum MCVersionMinType {
   MCVM_IOSVersionMin,         ///< .ios_version_min
-  MCVM_OSXVersionMin          ///< .macosx_version_min
+  MCVM_OSXVersionMin,         ///< .macosx_version_min
+  MCVM_TvOSVersionMin,        ///< .tvos_version_min
+  MCVM_WatchOSVersionMin,     ///< .watchos_version_min
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h
index 1e72dfee4ad1..8a50863a0c39 100644
--- a/include/llvm/MC/MCDwarf.h
+++ b/include/llvm/MC/MCDwarf.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/raw_ostream.h"
@@ -31,7 +32,6 @@ namespace llvm {
 class MCAsmBackend;
 class MCContext;
 class MCObjectStreamer;
-class MCSection;
 class MCStreamer;
 class MCSymbol;
 class SourceMgr;
@@ -182,6 +182,19 @@ public:
   }
 };
 
+struct MCDwarfLineTableParams {
+  /// First special line opcode - leave room for the standard opcodes.
+  /// Note: If you want to change this, you'll have to update the
+  /// "StandardOpcodeLengths" table that is emitted in
+  /// \c Emit().
+  uint8_t DWARF2LineOpcodeBase = 13;
+  /// Minimum line offset in a special line info. opcode.  The value
+  /// -5 was chosen to give a reasonable range of values.
+  int8_t DWARF2LineBase = -5;
+  /// Range of line offsets in a special line info. opcode.
+  uint8_t DWARF2LineRange = 14;
+};
+
 struct MCDwarfLineTableHeader {
   MCSymbol *Label;
   SmallVector<std::string, 3> MCDwarfDirs;
@@ -192,9 +205,11 @@ struct MCDwarfLineTableHeader {
   MCDwarfLineTableHeader() : Label(nullptr) {}
   unsigned getFile(StringRef &Directory, StringRef &FileName,
                    unsigned FileNumber = 0);
-  std::pair<MCSymbol *, MCSymbol *> Emit(MCStreamer *MCOS) const;
+  std::pair<MCSymbol *, MCSymbol *> Emit(MCStreamer *MCOS,
+                                         MCDwarfLineTableParams Params) const;
   std::pair<MCSymbol *, MCSymbol *>
-  Emit(MCStreamer *MCOS, ArrayRef<char> SpecialOpcodeLengths) const;
+  Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+       ArrayRef<char> SpecialOpcodeLengths) const;
 };
 
 class MCDwarfDwoLineTable {
@@ -206,7 +221,7 @@ public:
   unsigned getFile(StringRef Directory, StringRef FileName) {
     return Header.getFile(Directory, FileName);
   }
-  void Emit(MCStreamer &MCOS) const;
+  void Emit(MCStreamer &MCOS, MCDwarfLineTableParams Params) const;
 };
 
 class MCDwarfLineTable {
@@ -215,10 +230,10 @@ class MCDwarfLineTable {
 
 public:
   // This emits the Dwarf file and the line tables for all Compile Units.
-  static void Emit(MCObjectStreamer *MCOS);
+  static void Emit(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params);
 
   // This emits the Dwarf file and the line tables for a given Compile Unit.
-  void EmitCU(MCObjectStreamer *MCOS) const;
+  void EmitCU(MCObjectStreamer *MCOS, MCDwarfLineTableParams Params) const;
 
   unsigned getFile(StringRef &Directory, StringRef &FileName,
                    unsigned FileNumber = 0);
@@ -262,11 +277,12 @@ public:
 class MCDwarfLineAddr {
 public:
   /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
-  static void Encode(MCContext &Context, int64_t LineDelta, uint64_t AddrDelta,
-                     raw_ostream &OS);
+  static void Encode(MCContext &Context, MCDwarfLineTableParams Params,
+                     int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
 
   /// Utility function to emit the encoding to a streamer.
-  static void Emit(MCStreamer *MCOS, int64_t LineDelta, uint64_t AddrDelta);
+  static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+                   int64_t LineDelta, uint64_t AddrDelta);
 };
 
 class MCGenDwarfInfo {
@@ -324,7 +340,8 @@ public:
     OpRestore,
     OpUndefined,
     OpRegister,
-    OpWindowSave
+    OpWindowSave,
+    OpGnuArgsSize
   };
 
 private:
@@ -438,6 +455,11 @@ public:
     return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
   }
 
+  /// \brief A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
+  static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size) {
+    return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, "");
+  }
+
   OpType getOperation() const { return Operation; }
   MCSymbol *getLabel() const { return Label; }
 
@@ -457,7 +479,7 @@ public:
   int getOffset() const {
     assert(Operation == OpDefCfa || Operation == OpOffset ||
            Operation == OpRelOffset || Operation == OpDefCfaOffset ||
-           Operation == OpAdjustCfaOffset);
+           Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize);
     return Offset;
   }
 
diff --git a/include/llvm/MC/MCELFObjectWriter.h b/include/llvm/MC/MCELFObjectWriter.h
index 01f694d3b756..193dac018b2b 100644
--- a/include/llvm/MC/MCELFObjectWriter.h
+++ b/include/llvm/MC/MCELFObjectWriter.h
@@ -57,8 +57,6 @@ public:
       case Triple::PS4:
       case Triple::FreeBSD:
         return ELF::ELFOSABI_FREEBSD;
-      case Triple::Linux:
-        return ELF::ELFOSABI_LINUX;
       default:
         return ELF::ELFOSABI_NONE;
     }
diff --git a/include/llvm/MC/MCELFStreamer.h b/include/llvm/MC/MCELFStreamer.h
index a5b257f5958b..6eb2c2c343ff 100644
--- a/include/llvm/MC/MCELFStreamer.h
+++ b/include/llvm/MC/MCELFStreamer.h
@@ -36,7 +36,6 @@ public:
   /// state management
   void reset() override {
     SeenIdent = false;
-    LocalCommons.clear();
     BundleGroups.clear();
     MCObjectStreamer::reset();
   }
@@ -69,7 +68,7 @@ public:
   void EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol, uint64_t Size,
                       unsigned ByteAlignment = 0) override;
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc = SMLoc()) override;
+                     SMLoc Loc = SMLoc()) override;
 
   void EmitFileDirective(StringRef Filename) override;
 
@@ -77,8 +76,6 @@ public:
 
   void EmitValueToAlignment(unsigned, int64_t, unsigned, unsigned) override;
 
-  void Flush() override;
-
   void FinishImpl() override;
 
   void EmitBundleAlignMode(unsigned AlignPow2) override;
@@ -97,14 +94,6 @@ private:
 
   bool SeenIdent;
 
-  struct LocalCommon {
-    const MCSymbol *Symbol;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-
-  std::vector<LocalCommon> LocalCommons;
-
   /// BundleGroups - The stack of fragments holding the bundle-locked
   /// instructions.
   llvm::SmallVector<MCDataFragment *, 4> BundleGroups;
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index b3a607351a82..1d6bdef0af27 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -20,6 +20,7 @@ class MCAsmLayout;
 class MCAssembler;
 class MCContext;
 class MCFixup;
+class MCFragment;
 class MCSection;
 class MCStreamer;
 class MCSymbol;
@@ -115,7 +116,7 @@ public:
   /// currently defined as the absolute section for constants, or
   /// otherwise the section associated with the first defined symbol in the
   /// expression.
-  MCSection *findAssociatedSection() const;
+  MCFragment *findAssociatedFragment() const;
 
   /// @}
 };
@@ -187,6 +188,7 @@ public:
     VK_WEAKREF,   // The link between the symbols in .weakref foo, bar
 
     VK_ARM_NONE,
+    VK_ARM_GOT_PREL,
     VK_ARM_TARGET1,
     VK_ARM_TARGET2,
     VK_ARM_PREL31,
@@ -556,7 +558,7 @@ public:
                                          const MCAsmLayout *Layout,
                                          const MCFixup *Fixup) const = 0;
   virtual void visitUsedExpr(MCStreamer& Streamer) const = 0;
-  virtual MCSection *findAssociatedSection() const = 0;
+  virtual MCFragment *findAssociatedFragment() const = 0;
 
   virtual void fixELFSymbolsInTLSFixups(MCAssembler &) const = 0;
 
diff --git a/include/llvm/MC/MCFixedLenDisassembler.h b/include/llvm/MC/MCFixedLenDisassembler.h
index ad99943df2c3..ad34d9494bb9 100644
--- a/include/llvm/MC/MCFixedLenDisassembler.h
+++ b/include/llvm/MC/MCFixedLenDisassembler.h
@@ -22,6 +22,8 @@ enum DecoderOps {
                         //                uleb128 Val, uint16_t NumToSkip)
   OPC_CheckPredicate,   // OPC_CheckPredicate(uleb128 PIdx, uint16_t NumToSkip)
   OPC_Decode,           // OPC_Decode(uleb128 Opcode, uleb128 DIdx)
+  OPC_TryDecode,        // OPC_TryDecode(uleb128 Opcode, uleb128 DIdx,
+                        //               uint16_t NumToSkip)
   OPC_SoftFail,         // OPC_SoftFail(uleb128 PMask, uleb128 NMask)
   OPC_Fail              // OPC_Fail()
 };
diff --git a/include/llvm/MC/MCFragment.h b/include/llvm/MC/MCFragment.h
new file mode 100644
index 000000000000..7d6db525ce61
--- /dev/null
+++ b/include/llvm/MC/MCFragment.h
@@ -0,0 +1,506 @@
+//===- MCFragment.h - Fragment type hierarchy -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFRAGMENT_H
+#define LLVM_MC_MCFRAGMENT_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+
+namespace llvm {
+class MCSection;
+class MCSymbol;
+class MCSubtargetInfo;
+
+class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
+  friend class MCAsmLayout;
+
+  MCFragment(const MCFragment &) = delete;
+  void operator=(const MCFragment &) = delete;
+
+public:
+  enum FragmentType : uint8_t {
+    FT_Align,
+    FT_Data,
+    FT_CompactEncodedInst,
+    FT_Fill,
+    FT_Relaxable,
+    FT_Org,
+    FT_Dwarf,
+    FT_DwarfFrame,
+    FT_LEB,
+    FT_SafeSEH,
+    FT_Dummy
+  };
+
+private:
+  FragmentType Kind;
+
+protected:
+  bool HasInstructions;
+
+private:
+  /// \brief Should this fragment be aligned to the end of a bundle?
+  bool AlignToBundleEnd;
+
+  uint8_t BundlePadding;
+
+  /// LayoutOrder - The layout order of this fragment.
+  unsigned LayoutOrder;
+
+  /// The data for the section this fragment is in.
+  MCSection *Parent;
+
+  /// Atom - The atom this fragment is in, as represented by it's defining
+  /// symbol.
+  const MCSymbol *Atom;
+
+  /// \name Assembler Backend Data
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// Offset - The offset of this fragment in its section. This is ~0 until
+  /// initialized.
+  uint64_t Offset;
+
+  /// @}
+
+protected:
+  MCFragment(FragmentType Kind, bool HasInstructions,
+             uint8_t BundlePadding, MCSection *Parent = nullptr);
+
+  ~MCFragment();
+private:
+
+  // This is a friend so that the sentinal can be created.
+  friend struct ilist_sentinel_traits<MCFragment>;
+  MCFragment();
+
+public:
+  /// Destroys the current fragment.
+  ///
+  /// This must be used instead of delete as MCFragment is non-virtual.
+  /// This method will dispatch to the appropriate subclass.
+  void destroy();
+
+  FragmentType getKind() const { return Kind; }
+
+  MCSection *getParent() const { return Parent; }
+  void setParent(MCSection *Value) { Parent = Value; }
+
+  const MCSymbol *getAtom() const { return Atom; }
+  void setAtom(const MCSymbol *Value) { Atom = Value; }
+
+  unsigned getLayoutOrder() const { return LayoutOrder; }
+  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
+
+  /// \brief Does this fragment have instructions emitted into it? By default
+  /// this is false, but specific fragment types may set it to true.
+  bool hasInstructions() const { return HasInstructions; }
+
+  /// \brief Should this fragment be placed at the end of an aligned bundle?
+  bool alignToBundleEnd() const { return AlignToBundleEnd; }
+  void setAlignToBundleEnd(bool V) { AlignToBundleEnd = V; }
+
+  /// \brief Get the padding size that must be inserted before this fragment.
+  /// Used for bundling. By default, no padding is inserted.
+  /// Note that padding size is restricted to 8 bits. This is an optimization
+  /// to reduce the amount of space used for each fragment. In practice, larger
+  /// padding should never be required.
+  uint8_t getBundlePadding() const { return BundlePadding; }
+
+  /// \brief Set the padding size for this fragment. By default it's a no-op,
+  /// and only some fragments have a meaningful implementation.
+  void setBundlePadding(uint8_t N) { BundlePadding = N; }
+
+  /// \brief Return true if given frgment has FT_Dummy type.
+  bool isDummy() const { return Kind == FT_Dummy; }
+
+  void dump();
+};
+
+class MCDummyFragment : public MCFragment {
+public:
+  explicit MCDummyFragment(MCSection *Sec)
+      : MCFragment(FT_Dummy, false, 0, Sec){};
+  static bool classof(const MCFragment *F) { return F->getKind() == FT_Dummy; }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data.
+///
+class MCEncodedFragment : public MCFragment {
+protected:
+  MCEncodedFragment(MCFragment::FragmentType FType, bool HasInstructions,
+                    MCSection *Sec)
+      : MCFragment(FType, HasInstructions, 0, Sec) {}
+
+public:
+  static bool classof(const MCFragment *F) {
+    MCFragment::FragmentType Kind = F->getKind();
+    switch (Kind) {
+    default:
+      return false;
+    case MCFragment::FT_Relaxable:
+    case MCFragment::FT_CompactEncodedInst:
+    case MCFragment::FT_Data:
+      return true;
+    }
+  }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data.
+///
+template<unsigned ContentsSize>
+class MCEncodedFragmentWithContents : public MCEncodedFragment {
+  SmallVector<char, ContentsSize> Contents;
+
+protected:
+  MCEncodedFragmentWithContents(MCFragment::FragmentType FType,
+                                bool HasInstructions,
+                                MCSection *Sec)
+      : MCEncodedFragment(FType, HasInstructions, Sec) {}
+
+public:
+  SmallVectorImpl<char> &getContents() { return Contents; }
+  const SmallVectorImpl<char> &getContents() const { return Contents; }
+};
+
+/// Interface implemented by fragments that contain encoded instructions and/or
+/// data and also have fixups registered.
+///
+template<unsigned ContentsSize, unsigned FixupsSize>
+class MCEncodedFragmentWithFixups :
+  public MCEncodedFragmentWithContents<ContentsSize> {
+
+  /// Fixups - The list of fixups in this fragment.
+  SmallVector<MCFixup, FixupsSize> Fixups;
+
+protected:
+  MCEncodedFragmentWithFixups(MCFragment::FragmentType FType,
+                              bool HasInstructions,
+                              MCSection *Sec)
+      : MCEncodedFragmentWithContents<ContentsSize>(FType, HasInstructions,
+                                                    Sec) {}
+
+public:
+  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
+  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
+
+  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
+  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
+
+  fixup_iterator fixup_begin() { return Fixups.begin(); }
+  const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
+
+  fixup_iterator fixup_end() { return Fixups.end(); }
+  const_fixup_iterator fixup_end() const { return Fixups.end(); }
+
+  static bool classof(const MCFragment *F) {
+    MCFragment::FragmentType Kind = F->getKind();
+    return Kind == MCFragment::FT_Relaxable || Kind == MCFragment::FT_Data;
+  }
+};
+
+/// Fragment for data and encoded instructions.
+///
+class MCDataFragment : public MCEncodedFragmentWithFixups<32, 4> {
+public:
+  MCDataFragment(MCSection *Sec = nullptr)
+      : MCEncodedFragmentWithFixups<32, 4>(FT_Data, false, Sec) {}
+
+  void setHasInstructions(bool V) { HasInstructions = V; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Data;
+  }
+};
+
+/// This is a compact (memory-size-wise) fragment for holding an encoded
+/// instruction (non-relaxable) that has no fixups registered. When applicable,
+/// it can be used instead of MCDataFragment and lead to lower memory
+/// consumption.
+///
+class MCCompactEncodedInstFragment : public MCEncodedFragmentWithContents<4> {
+public:
+  MCCompactEncodedInstFragment(MCSection *Sec = nullptr)
+      : MCEncodedFragmentWithContents(FT_CompactEncodedInst, true, Sec) {
+  }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_CompactEncodedInst;
+  }
+};
+
+/// A relaxable fragment holds on to its MCInst, since it may need to be
+/// relaxed during the assembler layout and relaxation stage.
+///
+class MCRelaxableFragment : public MCEncodedFragmentWithFixups<8, 1> {
+
+  /// Inst - The instruction this is a fragment for.
+  MCInst Inst;
+
+  /// STI - The MCSubtargetInfo in effect when the instruction was encoded.
+  const MCSubtargetInfo &STI;
+
+public:
+  MCRelaxableFragment(const MCInst &Inst, const MCSubtargetInfo &STI,
+                      MCSection *Sec = nullptr)
+      : MCEncodedFragmentWithFixups(FT_Relaxable, true, Sec),
+        Inst(Inst), STI(STI) {}
+
+  const MCInst &getInst() const { return Inst; }
+  void setInst(const MCInst &Value) { Inst = Value; }
+
+  const MCSubtargetInfo &getSubtargetInfo() { return STI; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Relaxable;
+  }
+};
+
+class MCAlignFragment : public MCFragment {
+
+  /// Alignment - The alignment to ensure, in bytes.
+  unsigned Alignment;
+
+  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+  /// of using the provided value. The exact interpretation of this flag is
+  /// target dependent.
+  bool EmitNops : 1;
+
+  /// Value - Value to use for filling padding bytes.
+  int64_t Value;
+
+  /// ValueSize - The size of the integer (in bytes) of \p Value.
+  unsigned ValueSize;
+
+  /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
+  /// cannot be satisfied in this width then this fragment is ignored.
+  unsigned MaxBytesToEmit;
+
+public:
+  MCAlignFragment(unsigned Alignment, int64_t Value, unsigned ValueSize,
+                  unsigned MaxBytesToEmit, MCSection *Sec = nullptr)
+      : MCFragment(FT_Align, false, 0, Sec), Alignment(Alignment),
+        EmitNops(false), Value(Value),
+        ValueSize(ValueSize), MaxBytesToEmit(MaxBytesToEmit) {}
+
+  /// \name Accessors
+  /// @{
+
+  unsigned getAlignment() const { return Alignment; }
+
+  int64_t getValue() const { return Value; }
+
+  unsigned getValueSize() const { return ValueSize; }
+
+  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
+
+  bool hasEmitNops() const { return EmitNops; }
+  void setEmitNops(bool Value) { EmitNops = Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Align;
+  }
+};
+
+class MCFillFragment : public MCFragment {
+
+  /// Value - Value to use for filling bytes.
+  int64_t Value;
+
+  /// ValueSize - The size (in bytes) of \p Value to use when filling, or 0 if
+  /// this is a virtual fill fragment.
+  unsigned ValueSize;
+
+  /// Size - The number of bytes to insert.
+  uint64_t Size;
+
+public:
+  MCFillFragment(int64_t Value, unsigned ValueSize, uint64_t Size,
+                 MCSection *Sec = nullptr)
+      : MCFragment(FT_Fill, false, 0, Sec), Value(Value), ValueSize(ValueSize),
+        Size(Size) {
+    assert((!ValueSize || (Size % ValueSize) == 0) &&
+           "Fill size must be a multiple of the value size!");
+  }
+
+  /// \name Accessors
+  /// @{
+
+  int64_t getValue() const { return Value; }
+
+  unsigned getValueSize() const { return ValueSize; }
+
+  uint64_t getSize() const { return Size; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Fill;
+  }
+};
+
+class MCOrgFragment : public MCFragment {
+
+  /// Offset - The offset this fragment should start at.
+  const MCExpr *Offset;
+
+  /// Value - Value to use for filling bytes.
+  int8_t Value;
+
+public:
+  MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
+      : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
+
+  /// \name Accessors
+  /// @{
+
+  const MCExpr &getOffset() const { return *Offset; }
+
+  uint8_t getValue() const { return Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Org;
+  }
+};
+
+class MCLEBFragment : public MCFragment {
+
+  /// Value - The value this fragment should contain.
+  const MCExpr *Value;
+
+  /// IsSigned - True if this is a sleb128, false if uleb128.
+  bool IsSigned;
+
+  SmallString<8> Contents;
+
+public:
+  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSection *Sec = nullptr)
+      : MCFragment(FT_LEB, false, 0, Sec), Value(&Value_), IsSigned(IsSigned_) {
+    Contents.push_back(0);
+  }
+
+  /// \name Accessors
+  /// @{
+
+  const MCExpr &getValue() const { return *Value; }
+
+  bool isSigned() const { return IsSigned; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_LEB;
+  }
+};
+
+class MCDwarfLineAddrFragment : public MCFragment {
+
+  /// LineDelta - the value of the difference between the two line numbers
+  /// between two .loc dwarf directives.
+  int64_t LineDelta;
+
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .loc dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfLineAddrFragment(int64_t LineDelta, const MCExpr &AddrDelta,
+                          MCSection *Sec = nullptr)
+      : MCFragment(FT_Dwarf, false, 0, Sec), LineDelta(LineDelta),
+        AddrDelta(&AddrDelta) {
+    Contents.push_back(0);
+  }
+
+  /// \name Accessors
+  /// @{
+
+  int64_t getLineDelta() const { return LineDelta; }
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Dwarf;
+  }
+};
+
+class MCDwarfCallFrameFragment : public MCFragment {
+
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .cfi_* dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfCallFrameFragment(const MCExpr &AddrDelta, MCSection *Sec = nullptr)
+      : MCFragment(FT_DwarfFrame, false, 0, Sec), AddrDelta(&AddrDelta) {
+    Contents.push_back(0);
+  }
+
+  /// \name Accessors
+  /// @{
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_DwarfFrame;
+  }
+};
+
+class MCSafeSEHFragment : public MCFragment {
+  const MCSymbol *Sym;
+
+public:
+  MCSafeSEHFragment(const MCSymbol *Sym, MCSection *Sec = nullptr)
+      : MCFragment(FT_SafeSEH, false, 0, Sec), Sym(Sym) {}
+
+  /// \name Accessors
+  /// @{
+
+  const MCSymbol *getSymbol() { return Sym; }
+  const MCSymbol *getSymbol() const { return Sym; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_SafeSEH;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 6a582e82d00e..88aab73d4058 100644
--- a/include/llvm/MC/MCInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -15,12 +15,12 @@
 #ifndef LLVM_MC_MCINSTRDESC_H
 #define LLVM_MC_MCINSTRDESC_H
 
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include <string>
 
 namespace llvm {
   class MCInst;
-  class MCRegisterInfo;
   class MCSubtargetInfo;
   class FeatureBitset;
 
@@ -137,16 +137,16 @@ enum Flag {
 /// directly to describe itself.
 class MCInstrDesc {
 public:
-  unsigned short Opcode;        // The opcode number
-  unsigned short NumOperands;   // Num of args (may be more if variable_ops)
-  unsigned char NumDefs;        // Num of args that are definitions
-  unsigned char Size;           // Number of bytes in encoding.
-  unsigned short SchedClass;    // enum identifying instr sched class
-  uint64_t Flags;               // Flags identifying machine instr class
-  uint64_t TSFlags;             // Target Specific Flag values
-  const uint16_t *ImplicitUses; // Registers implicitly read by this instr
-  const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr
-  const MCOperandInfo *OpInfo;  // 'NumOperands' entries about operands
+  unsigned short Opcode;         // The opcode number
+  unsigned short NumOperands;    // Num of args (may be more if variable_ops)
+  unsigned char NumDefs;         // Num of args that are definitions
+  unsigned char Size;            // Number of bytes in encoding.
+  unsigned short SchedClass;     // enum identifying instr sched class
+  uint64_t Flags;                // Flags identifying machine instr class
+  uint64_t TSFlags;              // Target Specific Flag values
+  const MCPhysReg *ImplicitUses; // Registers implicitly read by this instr
+  const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr
+  const MCOperandInfo *OpInfo;   // 'NumOperands' entries about operands
   // Subtarget feature that this is deprecated on, if any
   // -1 implies this is not deprecated by any single feature. It may still be 
   // deprecated due to a "complex" reason, below.
@@ -336,8 +336,8 @@ public:
 
   /// \brief Return true if this instruction is convergent.
   ///
-  /// Convergent instructions may only be moved to locations that are
-  /// control-equivalent to their original positions.
+  /// Convergent instructions may not be made control-dependent on any
+  /// additional values.
   bool isConvergent() const { return Flags & (1 << MCID::Convergent); }
 
   //===--------------------------------------------------------------------===//
@@ -472,7 +472,7 @@ public:
   /// marked as implicitly reading the 'CL' register, which it always does.
   ///
   /// This method returns null if the instruction has no implicit uses.
-  const uint16_t *getImplicitUses() const { return ImplicitUses; }
+  const MCPhysReg *getImplicitUses() const { return ImplicitUses; }
 
   /// \brief Return the number of implicit uses this instruction has.
   unsigned getNumImplicitUses() const {
@@ -494,7 +494,7 @@ public:
   /// EAX/EDX/EFLAGS registers.
   ///
   /// This method returns null if the instruction has no implicit defs.
-  const uint16_t *getImplicitDefs() const { return ImplicitDefs; }
+  const MCPhysReg *getImplicitDefs() const { return ImplicitDefs; }
 
   /// \brief Return the number of implicit defs this instruct has.
   unsigned getNumImplicitDefs() const {
@@ -509,7 +509,7 @@ public:
   /// \brief Return true if this instruction implicitly
   /// uses the specified physical register.
   bool hasImplicitUseOfPhysReg(unsigned Reg) const {
-    if (const uint16_t *ImpUses = ImplicitUses)
+    if (const MCPhysReg *ImpUses = ImplicitUses)
       for (; *ImpUses; ++ImpUses)
         if (*ImpUses == Reg)
           return true;
diff --git a/include/llvm/MC/MCInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index 161705de7c4e..b2871a9805e1 100644
--- a/include/llvm/MC/MCInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -38,7 +38,7 @@ namespace llvm {
 ///
 ///   { 2, x|y, 1 }
 ///      indicates that the stage occupies either FU x or FU y for 2
-///      consecuative cycles and that the next stage starts one cycle
+///      consecutive cycles and that the next stage starts one cycle
 ///      after this stage starts. That is, the stage requirements
 ///      overlap in time.
 ///
diff --git a/include/llvm/MC/MCLinkerOptimizationHint.h b/include/llvm/MC/MCLinkerOptimizationHint.h
index 4b6f7ecc9fba..a519c4b71b03 100644
--- a/include/llvm/MC/MCLinkerOptimizationHint.h
+++ b/include/llvm/MC/MCLinkerOptimizationHint.h
@@ -160,7 +160,7 @@ class MCLOHContainer {
 public:
   typedef SmallVectorImpl<MCLOHDirective> LOHDirectives;
 
-  MCLOHContainer() : EmitSize(0) {};
+  MCLOHContainer() : EmitSize(0) {}
 
   /// Const accessor to the directives.
   const LOHDirectives &getDirectives() const {
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index 175d73e72c10..cd3db957afc1 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/DataTypes.h"
@@ -104,7 +105,7 @@ class MachObjectWriter : public MCObjectWriter {
   /// \name Symbol Table Data
   /// @{
 
-  StringTableBuilder StringTable;
+  StringTableBuilder StringTable{StringTableBuilder::MachO};
   std::vector<MachSymbolData> LocalSymbolData;
   std::vector<MachSymbolData> ExternalSymbolData;
   std::vector<MachSymbolData> UndefinedSymbolData;
@@ -159,19 +160,21 @@ public:
 
   /// @}
 
-  void writeHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols);
+  void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands,
+                   unsigned LoadCommandsSize, bool SubsectionsViaSymbols);
 
   /// Write a segment load command.
   ///
   /// \param NumSections The number of sections in this segment.
   /// \param SectionDataSize The total size of the sections.
-  void writeSegmentLoadCommand(unsigned NumSections, uint64_t VMSize,
+  void writeSegmentLoadCommand(StringRef Name, unsigned NumSections,
+                               uint64_t VMAddr, uint64_t VMSize,
                                uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize);
+                               uint64_t SectionDataSize, uint32_t MaxProt,
+                               uint32_t InitProt);
 
-  void writeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                    const MCSection &Sec, uint64_t FileOffset,
+  void writeSection(const MCAsmLayout &Layout, const MCSection &Sec,
+                    uint64_t VMAddr, uint64_t FileOffset, unsigned Flags,
                     uint64_t RelocationsStart, unsigned NumRelocations);
 
   void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
@@ -245,6 +248,11 @@ public:
   void executePostLayoutBinding(MCAssembler &Asm,
                                 const MCAsmLayout &Layout) override;
 
+  bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                              const MCSymbol &A,
+                                              const MCSymbol &B,
+                                              bool InSet) const override;
+
   bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                               const MCSymbol &SymA,
                                               const MCFragment &FB, bool InSet,
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index 99e3f92bfe26..cf2c3f12bb6b 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -35,16 +35,18 @@ protected:
   /// without an associated EH frame section.
   bool SupportsCompactUnwindWithoutEHFrame;
 
-  /// Some encoding values for EH.
+  /// OmitDwarfIfHaveCompactUnwind - True if the target object file
+  /// supports having some functions with compact unwind and other with
+  /// dwarf unwind.
+  bool OmitDwarfIfHaveCompactUnwind;
+
+  /// PersonalityEncoding, LSDAEncoding, TTypeEncoding - Some encoding values
+  /// for EH.
   unsigned PersonalityEncoding;
   unsigned LSDAEncoding;
   unsigned FDECFIEncoding;
   unsigned TTypeEncoding;
 
-  /// Section flags for eh_frame
-  unsigned EHSectionType;
-  unsigned EHSectionFlags;
-
   /// Compact unwind encoding indicating that we should emit only an EH frame.
   unsigned CompactUnwindDwarfEHFrameOnly;
 
@@ -114,6 +116,10 @@ protected:
   MCSection *DwarfStrOffDWOSection;
   MCSection *DwarfAddrSection;
 
+  // These are for Fission DWP files.
+  MCSection *DwarfCUIndexSection;
+  MCSection *DwarfTUIndexSection;
+
   /// Section for newer gnu pubnames.
   MCSection *DwarfGnuPubNamesSection;
   /// Section for newer gnu pubtypes.
@@ -147,10 +153,7 @@ protected:
   MCSection *EHFrameSection;
 
   // ELF specific sections.
-  MCSection *DataRelSection;
-  const MCSection *DataRelLocalSection;
   MCSection *DataRelROSection;
-  MCSection *DataRelROLocalSection;
   MCSection *MergeableConst4Section;
   MCSection *MergeableConst8Section;
   MCSection *MergeableConst16Section;
@@ -200,6 +203,10 @@ public:
   bool getSupportsCompactUnwindWithoutEHFrame() const {
     return SupportsCompactUnwindWithoutEHFrame;
   }
+  bool getOmitDwarfIfHaveCompactUnwind() const {
+    return OmitDwarfIfHaveCompactUnwind;
+  }
+
   bool getCommDirectiveSupportsAlignment() const {
     return CommDirectiveSupportsAlignment;
   }
@@ -216,6 +223,7 @@ public:
   MCSection *getTextSection() const { return TextSection; }
   MCSection *getDataSection() const { return DataSection; }
   MCSection *getBSSSection() const { return BSSSection; }
+  MCSection *getReadOnlySection() const { return ReadOnlySection; }
   MCSection *getLSDASection() const { return LSDASection; }
   MCSection *getCompactUnwindSection() const { return CompactUnwindSection; }
   MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
@@ -258,6 +266,8 @@ public:
   MCSection *getDwarfLocDWOSection() const { return DwarfLocDWOSection; }
   MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; }
   MCSection *getDwarfAddrSection() const { return DwarfAddrSection; }
+  MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; }
+  MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; }
 
   MCSection *getCOFFDebugSymbolsSection() const {
     return COFFDebugSymbolsSection;
@@ -271,12 +281,7 @@ public:
   MCSection *getFaultMapSection() const { return FaultMapSection; }
 
   // ELF specific sections.
-  MCSection *getDataRelSection() const { return DataRelSection; }
-  const MCSection *getDataRelLocalSection() const {
-    return DataRelLocalSection;
-  }
   MCSection *getDataRelROSection() const { return DataRelROSection; }
-  MCSection *getDataRelROLocalSection() const { return DataRelROLocalSection; }
   const MCSection *getMergeableConst4Section() const {
     return MergeableConst4Section;
   }
@@ -325,8 +330,6 @@ public:
   MCSection *getSXDataSection() const { return SXDataSection; }
 
   MCSection *getEHFrameSection() {
-    if (!EHFrameSection)
-      InitEHFrameSection();
     return EHFrameSection;
   }
 
@@ -346,9 +349,6 @@ private:
   void initELFMCObjectFileInfo(Triple T);
   void initCOFFMCObjectFileInfo(Triple T);
 
-  /// Initialize EHFrameSection on demand.
-  void InitEHFrameSection();
-
 public:
   const Triple &getTargetTriple() const { return TT; }
 };
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index ce1fc80f2cf2..9fe2fda21353 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -92,7 +92,7 @@ public:
   void EmitLabel(MCSymbol *Symbol) override;
   void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc = SMLoc()) override;
+                     SMLoc Loc = SMLoc()) override;
   void EmitULEB128Value(const MCExpr *Value) override;
   void EmitSLEB128Value(const MCExpr *Value) override;
   void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
@@ -112,7 +112,7 @@ public:
                             unsigned MaxBytesToEmit = 0) override;
   void EmitCodeAlignment(unsigned ByteAlignment,
                          unsigned MaxBytesToEmit = 0) override;
-  bool EmitValueToOffset(const MCExpr *Offset, unsigned char Value) override;
+  void emitValueToOffset(const MCExpr *Offset, unsigned char Value) override;
   void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                              unsigned Column, unsigned Flags,
                              unsigned Isa, unsigned Discriminator,
@@ -124,8 +124,9 @@ public:
                                  const MCSymbol *Label);
   void EmitGPRel32Value(const MCExpr *Value) override;
   void EmitGPRel64Value(const MCExpr *Value) override;
+  bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+                          const MCExpr *Expr, SMLoc Loc) override;
   void EmitFill(uint64_t NumBytes, uint8_t FillValue) override;
-  void EmitZeros(uint64_t NumBytes) override;
   void FinishImpl() override;
 
   /// Emit the absolute difference between two symbols if possible.
diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h
index 2211673efc31..63c833ac20d6 100644
--- a/include/llvm/MC/MCObjectWriter.h
+++ b/include/llvm/MC/MCObjectWriter.h
@@ -40,14 +40,18 @@ class MCObjectWriter {
   MCObjectWriter(const MCObjectWriter &) = delete;
   void operator=(const MCObjectWriter &) = delete;
 
-protected:
-  raw_pwrite_stream &OS;
+  raw_pwrite_stream *OS;
 
+protected:
   unsigned IsLittleEndian : 1;
 
 protected: // Can only create subclasses.
   MCObjectWriter(raw_pwrite_stream &OS, bool IsLittleEndian)
-      : OS(OS), IsLittleEndian(IsLittleEndian) {}
+      : OS(&OS), IsLittleEndian(IsLittleEndian) {}
+
+  unsigned getInitialOffset() {
+    return OS->tell();
+  }
 
 public:
   virtual ~MCObjectWriter();
@@ -57,7 +61,8 @@ public:
 
   bool isLittleEndian() const { return IsLittleEndian; }
 
-  raw_ostream &getStream() { return OS; }
+  raw_pwrite_stream &getStream() { return *OS; }
+  void setStream(raw_pwrite_stream &NewOS) { OS = &NewOS; }
 
   /// \name High-Level API
   /// @{
@@ -91,6 +96,11 @@ public:
                                           const MCSymbolRefExpr *B,
                                           bool InSet) const;
 
+  virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbol &A,
+                                                      const MCSymbol &B,
+                                                      bool InSet) const;
+
   virtual bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
                                                       const MCSymbol &SymA,
                                                       const MCFragment &FB,
@@ -113,30 +123,30 @@ public:
   /// \name Binary Output
   /// @{
 
-  void write8(uint8_t Value) { OS << char(Value); }
+  void write8(uint8_t Value) { *OS << char(Value); }
 
   void writeLE16(uint16_t Value) {
-    support::endian::Writer<support::little>(OS).write(Value);
+    support::endian::Writer<support::little>(*OS).write(Value);
   }
 
   void writeLE32(uint32_t Value) {
-    support::endian::Writer<support::little>(OS).write(Value);
+    support::endian::Writer<support::little>(*OS).write(Value);
   }
 
   void writeLE64(uint64_t Value) {
-    support::endian::Writer<support::little>(OS).write(Value);
+    support::endian::Writer<support::little>(*OS).write(Value);
   }
 
   void writeBE16(uint16_t Value) {
-    support::endian::Writer<support::big>(OS).write(Value);
+    support::endian::Writer<support::big>(*OS).write(Value);
   }
 
   void writeBE32(uint32_t Value) {
-    support::endian::Writer<support::big>(OS).write(Value);
+    support::endian::Writer<support::big>(*OS).write(Value);
   }
 
   void writeBE64(uint64_t Value) {
-    support::endian::Writer<support::big>(OS).write(Value);
+    support::endian::Writer<support::big>(*OS).write(Value);
   }
 
   void write16(uint16_t Value) {
@@ -164,9 +174,9 @@ public:
     const char Zeros[16] = {0};
 
     for (unsigned i = 0, e = N / 16; i != e; ++i)
-      OS << StringRef(Zeros, 16);
+      *OS << StringRef(Zeros, 16);
 
-    OS << StringRef(Zeros, N % 16);
+    *OS << StringRef(Zeros, N % 16);
   }
 
   void writeBytes(const SmallVectorImpl<char> &ByteVec,
@@ -180,7 +190,7 @@ public:
     assert(
         (ZeroFillSize == 0 || Str.size() <= ZeroFillSize) &&
         "data size greater than fill size, unexpected large write will occur");
-    OS << Str;
+    *OS << Str;
     if (ZeroFillSize)
       WriteZeros(ZeroFillSize - Str.size());
   }
diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h
index 62d39b26c860..1bb6d212784e 100644
--- a/include/llvm/MC/MCParser/AsmLexer.h
+++ b/include/llvm/MC/MCParser/AsmLexer.h
@@ -47,7 +47,8 @@ public:
   StringRef LexUntilEndOfStatement() override;
   StringRef LexUntilEndOfLine();
 
-  const AsmToken peekTok(bool ShouldSkipSpace = true) override;
+  size_t peekTokens(MutableArrayRef<AsmToken> Buf,
+                    bool ShouldSkipSpace = true) override;
 
   bool isAtStartOfComment(const char *Ptr);
   bool isAtStatementSeparator(const char *Ptr);
diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h
index 71f15b37c331..55279f49529a 100644
--- a/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -118,7 +118,7 @@ public:
 /// lexers.
 class MCAsmLexer {
   /// The current token, stored in the base class for faster access.
-  AsmToken CurTok;
+  SmallVector<AsmToken, 1> CurTok;
 
   /// The location and description of the current error
   SMLoc ErrLoc;
@@ -135,7 +135,7 @@ protected: // Can only create subclasses.
 
   virtual AsmToken LexToken() = 0;
 
-  void SetError(const SMLoc &errLoc, const std::string &err) {
+  void SetError(SMLoc errLoc, const std::string &err) {
     ErrLoc = errLoc;
     Err = err;
   }
@@ -148,7 +148,15 @@ public:
   /// The lexer will continuosly return the end-of-file token once the end of
   /// the main input file has been reached.
   const AsmToken &Lex() {
-    return CurTok = LexToken();
+    assert(!CurTok.empty());
+    CurTok.erase(CurTok.begin());
+    if (CurTok.empty())
+      CurTok.emplace_back(LexToken());
+    return CurTok.front();
+  }
+
+  void UnLex(AsmToken const &Token) {
+    CurTok.insert(CurTok.begin(), Token);
   }
 
   virtual StringRef LexUntilEndOfStatement() = 0;
@@ -158,14 +166,28 @@ public:
 
   /// Get the current (last) lexed token.
   const AsmToken &getTok() const {
-    return CurTok;
+    return CurTok[0];
   }
 
   /// Look ahead at the next token to be lexed.
-  virtual const AsmToken peekTok(bool ShouldSkipSpace = true) = 0;
+  const AsmToken peekTok(bool ShouldSkipSpace = true) {
+    AsmToken Tok;
+
+    MutableArrayRef<AsmToken> Buf(Tok);
+    size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
+
+    assert(ReadCount == 1);
+    (void)ReadCount;
+
+    return Tok;
+  }
+
+  /// Look ahead an arbitrary number of tokens.
+  virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
+                            bool ShouldSkipSpace = true) = 0;
 
   /// Get the current error location
-  const SMLoc &getErrLoc() {
+  SMLoc getErrLoc() {
     return ErrLoc;
   }
 
@@ -175,13 +197,13 @@ public:
   }
 
   /// Get the kind of current token.
-  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+  AsmToken::TokenKind getKind() const { return getTok().getKind(); }
 
   /// Check if the current token has kind \p K.
-  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+  bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
 
   /// Check if the current token has kind \p K.
-  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+  bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
 
   /// Set whether spaces should be ignored by the lexer
   void setSkipSpace(bool val) { SkipSpace = val; }
diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 077fd21e073c..30b25dcfdaec 100644
--- a/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -71,6 +71,9 @@ public:
   bool Error(SMLoc L, const Twine &Msg) {
     return getParser().Error(L, Msg);
   }
+  void Note(SMLoc L, const Twine &Msg) {
+    getParser().Note(L, Msg);
+  }
   bool TokError(const Twine &Msg) {
     return getParser().TokError(Msg);
   }
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index a25108a0effb..a90d280c240c 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -30,8 +30,16 @@ class MCParsedAsmOperand {
   /// MS-style inline assembly.
   std::string Constraint;
 
+protected:
+  // This only seems to need to be movable (by ARMOperand) but ARMOperand has
+  // lots of members and MSVC doesn't support defaulted move ops, so to avoid
+  // that verbosity, just rely on defaulted copy ops. It's only the Constraint
+  // string member that would benefit from movement anyway.
+  MCParsedAsmOperand(const MCParsedAsmOperand &RHS) = default;
+  MCParsedAsmOperand &operator=(const MCParsedAsmOperand &) = default;
+  MCParsedAsmOperand() = default;
+
 public:
-  MCParsedAsmOperand() {}
   virtual ~MCParsedAsmOperand() {}
 
   void setConstraint(StringRef C) { Constraint = C.str(); }
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
index 8e25ee18e08d..a4d5e0867232 100644
--- a/include/llvm/MC/MCRegisterInfo.h
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -632,7 +632,7 @@ private:
   unsigned Reg;
   const MCRegisterInfo *MCRI;
   bool IncludeSelf;
-  
+
   MCRegUnitIterator RI;
   MCRegUnitRootIterator RRI;
   MCSuperRegIterator SI;
@@ -652,10 +652,8 @@ public:
     }
   }
 
-  bool isValid() const {
-    return RI.isValid();
-  }
-  
+  bool isValid() const { return RI.isValid(); }
+
   unsigned operator*() const {
     assert (SI.isValid() && "Cannot dereference an invalid iterator.");
     return *SI;
diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h
index c09791631056..d7f9b69a9a2c 100644
--- a/include/llvm/MC/MCSchedule.h
+++ b/include/llvm/MC/MCSchedule.h
@@ -183,7 +183,7 @@ struct MCSchedModel {
   // takes to recover from a branch misprediction.
   unsigned MispredictPenalty;
   static const unsigned DefaultMispredictPenalty = 10;
-  
+
   bool PostRAScheduler; // default value is false
 
   bool CompleteModel;
@@ -206,6 +206,9 @@ struct MCSchedModel {
   /// scheduling class (itinerary class or SchedRW list).
   bool isComplete() const { return CompleteModel; }
 
+  /// Return true if machine supports out of order execution.
+  bool isOutOfOrder() const { return MicroOpBufferSize > 1; }
+
   unsigned getNumProcResourceKinds() const {
     return NumProcResourceKinds;
   }
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
index 2d0d4dfc5913..09a98929113a 100644
--- a/include/llvm/MC/MCSection.h
+++ b/include/llvm/MC/MCSection.h
@@ -18,12 +18,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/MC/MCFragment.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
-class MCAssembler;
 class MCAsmInfo;
+class MCAssembler;
 class MCContext;
 class MCExpr;
 class MCFragment;
@@ -92,6 +93,8 @@ private:
 
   unsigned IsRegistered : 1;
 
+  MCDummyFragment DummyFragment;
+
   FragmentListType Fragments;
 
   /// Mapping from subsection number to insertion point for subsection numbers
@@ -102,10 +105,9 @@ protected:
   MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin);
   SectionVariant Variant;
   SectionKind Kind;
+  ~MCSection();
 
 public:
-  virtual ~MCSection();
-
   SectionKind getKind() const { return Kind; }
 
   SectionVariant getVariant() const { return Variant; }
@@ -152,6 +154,14 @@ public:
     return const_cast<MCSection *>(this)->getFragmentList();
   }
 
+  /// Support for MCFragment::getNextNode().
+  static FragmentListType MCSection::*getSublistAccess(MCFragment *) {
+    return &MCSection::Fragments;
+  }
+
+  const MCDummyFragment &getDummyFragment() const { return DummyFragment; }
+  MCDummyFragment &getDummyFragment() { return DummyFragment; }
+
   MCSection::iterator begin();
   MCSection::const_iterator begin() const {
     return const_cast<MCSection *>(this)->begin();
diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h
index 237f6d31fb1b..d94682c8c381 100644
--- a/include/llvm/MC/MCSectionCOFF.h
+++ b/include/llvm/MC/MCSectionCOFF.h
@@ -16,66 +16,63 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCSection.h"
-#include "llvm/Support/COFF.h"
 
 namespace llvm {
 class MCSymbol;
 
-/// MCSectionCOFF - This represents a section on Windows
-  class MCSectionCOFF : public MCSection {
-    // The memory for this string is stored in the same MCContext as *this.
-    StringRef SectionName;
+/// This represents a section on Windows
+class MCSectionCOFF final : public MCSection {
+  // The memory for this string is stored in the same MCContext as *this.
+  StringRef SectionName;
 
-    // FIXME: The following fields should not be mutable, but are for now so
-    // the asm parser can honor the .linkonce directive.
+  // FIXME: The following fields should not be mutable, but are for now so the
+  // asm parser can honor the .linkonce directive.
 
-    /// Characteristics - This is the Characteristics field of a section,
-    /// drawn from the enums below.
-    mutable unsigned Characteristics;
+  /// This is the Characteristics field of a section, drawn from the enums
+  /// below.
+  mutable unsigned Characteristics;
 
-    /// The COMDAT symbol of this section. Only valid if this is a COMDAT
-    /// section. Two COMDAT sections are merged if they have the same
-    /// COMDAT symbol.
-    MCSymbol *COMDATSymbol;
+  /// The COMDAT symbol of this section. Only valid if this is a COMDAT section.
+  /// Two COMDAT sections are merged if they have the same COMDAT symbol.
+  MCSymbol *COMDATSymbol;
 
-    /// Selection - This is the Selection field for the section symbol, if
-    /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
-    mutable int Selection;
+  /// This is the Selection field for the section symbol, if it is a COMDAT
+  /// section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
+  mutable int Selection;
 
-  private:
-    friend class MCContext;
-    MCSectionCOFF(StringRef Section, unsigned Characteristics,
-                  MCSymbol *COMDATSymbol, int Selection, SectionKind K,
-                  MCSymbol *Begin)
-        : MCSection(SV_COFF, K, Begin), SectionName(Section),
-          Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
-          Selection(Selection) {
-      assert ((Characteristics & 0x00F00000) == 0 &&
-        "alignment must not be set upon section creation");
-    }
-    ~MCSectionCOFF() override;
+private:
+  friend class MCContext;
+  MCSectionCOFF(StringRef Section, unsigned Characteristics,
+                MCSymbol *COMDATSymbol, int Selection, SectionKind K,
+                MCSymbol *Begin)
+      : MCSection(SV_COFF, K, Begin), SectionName(Section),
+        Characteristics(Characteristics), COMDATSymbol(COMDATSymbol),
+        Selection(Selection) {
+    assert((Characteristics & 0x00F00000) == 0 &&
+           "alignment must not be set upon section creation");
+  }
 
-  public:
-    /// ShouldOmitSectionDirective - Decides whether a '.section' directive
-    /// should be printed before the section name
-    bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+public:
+  ~MCSectionCOFF();
 
-    StringRef getSectionName() const { return SectionName; }
-    unsigned getCharacteristics() const { return Characteristics; }
-    MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; }
-    int getSelection() const { return Selection; }
+  /// Decides whether a '.section' directive should be printed before the
+  /// section name
+  bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
-    void setSelection(int Selection) const;
+  StringRef getSectionName() const { return SectionName; }
+  unsigned getCharacteristics() const { return Characteristics; }
+  MCSymbol *getCOMDATSymbol() const { return COMDATSymbol; }
+  int getSelection() const { return Selection; }
 
-    void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS,
-                              const MCExpr *Subsection) const override;
-    bool UseCodeAlign() const override;
-    bool isVirtualSection() const override;
+  void setSelection(int Selection) const;
 
-    static bool classof(const MCSection *S) {
-      return S->getVariant() == SV_COFF;
-    }
-  };
+  void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS,
+                            const MCExpr *Subsection) const override;
+  bool UseCodeAlign() const override;
+  bool isVirtualSection() const override;
+
+  static bool classof(const MCSection *S) { return S->getVariant() == SV_COFF; }
+};
 
 } // end namespace llvm
 
diff --git a/include/llvm/MC/MCSectionELF.h b/include/llvm/MC/MCSectionELF.h
index f6730371fe15..b3bb3ad4e02c 100644
--- a/include/llvm/MC/MCSectionELF.h
+++ b/include/llvm/MC/MCSectionELF.h
@@ -25,25 +25,24 @@ namespace llvm {
 
 class MCSymbol;
 
-/// MCSectionELF - This represents a section on linux, lots of unix variants
-/// and some bare metal systems.
-class MCSectionELF : public MCSection {
-  /// SectionName - This is the name of the section.  The referenced memory is
-  /// owned by TargetLoweringObjectFileELF's ELFUniqueMap.
+/// This represents a section on linux, lots of unix variants and some bare
+/// metal systems.
+class MCSectionELF final : public MCSection {
+  /// This is the name of the section.  The referenced memory is owned by
+  /// TargetLoweringObjectFileELF's ELFUniqueMap.
   StringRef SectionName;
 
-  /// Type - This is the sh_type field of a section, drawn from the enums below.
+  /// This is the sh_type field of a section, drawn from the enums below.
   unsigned Type;
 
-  /// Flags - This is the sh_flags field of a section, drawn from the enums.
-  /// below.
+  /// This is the sh_flags field of a section, drawn from the enums below.
   unsigned Flags;
 
   unsigned UniqueID;
 
-  /// EntrySize - The size of each entry in this section. This size only
-  /// makes sense for sections that contain fixed-sized entries. If a
-  /// section does not contain fixed-sized entries 'EntrySize' will be 0.
+  /// The size of each entry in this section. This size only makes sense for
+  /// sections that contain fixed-sized entries. If a section does not contain
+  /// fixed-sized entries 'EntrySize' will be 0.
   unsigned EntrySize;
 
   const MCSymbolELF *Group;
@@ -62,14 +61,14 @@ private:
     if (Group)
       Group->setIsSignature();
   }
-  ~MCSectionELF() override;
 
   void setSectionName(StringRef Name) { SectionName = Name; }
 
 public:
+  ~MCSectionELF();
 
-  /// ShouldOmitSectionDirective - Decides whether a '.section' directive
-  /// should be printed before the section name
+  /// Decides whether a '.section' directive should be printed before the
+  /// section name
   bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
 
   StringRef getSectionName() const { return SectionName; }
diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h
index 97227517c82d..658dfcda7268 100644
--- a/include/llvm/MC/MCSectionMachO.h
+++ b/include/llvm/MC/MCSectionMachO.h
@@ -20,19 +20,18 @@
 
 namespace llvm {
 
-/// MCSectionMachO - This represents a section on a Mach-O system (used by
-/// Mac OS X).  On a Mac system, these are also described in
-/// /usr/include/mach-o/loader.h.
-class MCSectionMachO : public MCSection {
+/// This represents a section on a Mach-O system (used by Mac OS X).  On a Mac
+/// system, these are also described in /usr/include/mach-o/loader.h.
+class MCSectionMachO final : public MCSection {
   char SegmentName[16];  // Not necessarily null terminated!
   char SectionName[16];  // Not necessarily null terminated!
 
-  /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES
-  /// field of a section, drawn from the enums below.
+  /// This is the SECTION_TYPE and SECTION_ATTRIBUTES field of a section, drawn
+  /// from the enums below.
   unsigned TypeAndAttributes;
 
-  /// Reserved2 - The 'reserved2' field of a section, used to represent the
-  /// size of stubs, for example.
+  /// The 'reserved2' field of a section, used to represent the size of stubs,
+  /// for example.
   unsigned Reserved2;
 
   MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA,
@@ -64,12 +63,12 @@ public:
     return (TypeAndAttributes & Value) != 0;
   }
 
-  /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
-  /// This is a string that can appear after a .section directive in a mach-o
-  /// flavored .s file.  If successful, this fills in the specified Out
-  /// parameters and returns an empty string.  When an invalid section
-  /// specifier is present, this returns a string indicating the problem.
-  /// If no TAA was parsed, TAA is not altered, and TAAWasSet becomes false.
+  /// Parse the section specifier indicated by "Spec". This is a string that can
+  /// appear after a .section directive in a mach-o flavored .s file.  If
+  /// successful, this fills in the specified Out parameters and returns an
+  /// empty string.  When an invalid section specifier is present, this returns
+  /// a string indicating the problem. If no TAA was parsed, TAA is not altered,
+  /// and TAAWasSet becomes false.
   static std::string ParseSectionSpecifier(StringRef Spec,       // In.
                                            StringRef &Segment,   // Out.
                                            StringRef &Section,   // Out.
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 6b9b8a153845..494f02dfad3e 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWinEH.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/SMLoc.h"
@@ -33,7 +34,6 @@ class MCInst;
 class MCInstPrinter;
 class MCSection;
 class MCStreamer;
-class MCSymbol;
 class MCSymbolELF;
 class MCSymbolRefExpr;
 class MCSubtargetInfo;
@@ -134,7 +134,7 @@ public:
   /// Callback used to implement the ldr= pseudo.
   /// Add a new entry to the constant pool for the current section and return an
   /// MCExpr that can be used to refer to the constant pool location.
-  const MCExpr *addConstantPoolEntry(const MCExpr *);
+  const MCExpr *addConstantPoolEntry(const MCExpr *, SMLoc Loc);
 
   /// Callback used to implemnt the .ltorg directive.
   /// Emit contents of constant pool for the current section.
@@ -358,7 +358,7 @@ public:
   ///
   /// Each emitted symbol will be tracked in the ordering table,
   /// so we can sort on them later.
-  void AssignSection(MCSymbol *Symbol, MCSection *Section);
+  void AssignFragment(MCSymbol *Symbol, MCFragment *Fragment);
 
   /// \brief Emit a label for \p Symbol into the current section.
   ///
@@ -522,10 +522,9 @@ public:
   /// match a native machine width.
   /// \param Loc - The location of the expression for error reporting.
   virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                             const SMLoc &Loc = SMLoc());
+                             SMLoc Loc = SMLoc());
 
-  void EmitValue(const MCExpr *Value, unsigned Size,
-                 const SMLoc &Loc = SMLoc());
+  void EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc = SMLoc());
 
   /// \brief Special case of EmitValue that avoids the client having
   /// to pass in a MCExpr for constant integers.
@@ -568,7 +567,7 @@ public:
 
   /// \brief Emit NumBytes worth of zeros.
   /// This function properly handles data in virtual sections.
-  virtual void EmitZeros(uint64_t NumBytes);
+  void EmitZeros(uint64_t NumBytes);
 
   /// \brief Emit some number of copies of \p Value until the byte alignment \p
   /// ByteAlignment is reached.
@@ -612,9 +611,7 @@ public:
   /// \param Offset - The offset to reach. This may be an expression, but the
   /// expression must be associated with the current section.
   /// \param Value - The value to use when filling bytes.
-  /// \return false on success, true if the offset was invalid.
-  virtual bool EmitValueToOffset(const MCExpr *Offset,
-                                 unsigned char Value = 0);
+  virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value = 0);
 
   /// @}
 
@@ -662,6 +659,7 @@ public:
   virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
   virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
   virtual void EmitCFIEscape(StringRef Values);
+  virtual void EmitCFIGnuArgsSize(int64_t Size);
   virtual void EmitCFISignalFrame();
   virtual void EmitCFIUndefined(int64_t Register);
   virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
@@ -682,6 +680,16 @@ public:
   virtual void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except);
   virtual void EmitWinEHHandlerData();
 
+  virtual void EmitSyntaxDirective();
+
+  /// \brief Emit a .reloc directive.
+  /// Returns true if the relocation could not be emitted because Name is not
+  /// known.
+  virtual bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                  const MCExpr *Expr, SMLoc Loc) {
+    return true;
+  }
+
   /// \brief Emit the given \p Instruction into the current section.
   virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
 
@@ -704,9 +712,6 @@ public:
   /// the hasRawTextSupport() predicate.  By default this aborts.
   void EmitRawText(const Twine &String);
 
-  /// \brief Causes any cached state to be written out.
-  virtual void Flush() {}
-
   /// \brief Streamer specific finalization.
   virtual void FinishImpl();
   /// \brief Finish emission of machine code.
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
index d5ad4eebf9ef..446feefc4500 100644
--- a/include/llvm/MC/MCSubtargetInfo.h
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -86,8 +86,9 @@ protected:
   void InitMCProcessorInfo(StringRef CPU, StringRef FS);
 
 public:
-  /// Set the features to the default for the given CPU.
-  void setDefaultFeatures(StringRef CPU);
+  /// Set the features to the default for the given CPU with an appended feature
+  /// string.
+  void setDefaultFeatures(StringRef CPU, StringRef FS);
 
   /// ToggleFeature - Toggle a feature and returns the re-computed feature
   /// bits. This version does not change the implied bits.
@@ -159,11 +160,8 @@ public:
 
   /// Check whether the CPU string is valid.
   bool isCPUStringValid(StringRef CPU) const {
-    auto Found = std::find_if(ProcDesc.begin(), ProcDesc.end(),
-                              [=](const SubtargetFeatureKV &KV) {
-                                return CPU == KV.Key; 
-                              });
-    return Found != ProcDesc.end();
+    auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
+    return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
   }
 };
 
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
index b2910dfccd63..c51ecfcb0c5c 100644
--- a/include/llvm/MC/MCSymbol.h
+++ b/include/llvm/MC/MCSymbol.h
@@ -56,19 +56,17 @@ protected:
     SymContentsCommon,
   };
 
-  // Special sentinal value for the absolute pseudo section.
-  //
-  // FIXME: Use a PointerInt wrapper for this?
-  static MCSection *AbsolutePseudoSection;
+  // Special sentinal value for the absolute pseudo fragment.
+  static MCFragment *AbsolutePseudoFragment;
 
   /// If a symbol has a Fragment, the section is implied, so we only need
   /// one pointer.
+  /// The special AbsolutePseudoFragment value is for absolute symbols.
+  /// If this is a variable symbol, this caches the variable value's fragment.
   /// FIXME: We might be able to simplify this by having the asm streamer create
   /// dummy fragments.
   /// If this is a section, then it gives the symbol is defined in. This is null
-  /// for undefined symbols, and the special AbsolutePseudoSection value for
-  /// absolute symbols. If this is a variable symbol, this caches the variable
-  /// value's section.
+  /// for undefined symbols.
   ///
   /// If this is a fragment, then it gives the fragment this symbol's value is
   /// relative to, if any.
@@ -76,8 +74,7 @@ protected:
   /// For the 'HasName' integer, this is true if this symbol is named.
   /// A named symbol will have a pointer to the name allocated in the bytes
   /// immediately prior to the MCSymbol.
-  mutable PointerIntPair<PointerUnion<MCSection *, MCFragment *>, 1>
-      SectionOrFragmentAndHasName;
+  mutable PointerIntPair<MCFragment *, 1> FragmentAndHasName;
 
   /// IsTemporary - True if this is an assembler temporary label, which
   /// typically does not survive in the .o file's symbol table.  Usually
@@ -155,7 +152,7 @@ protected: // MCContext creates and uniques these.
         Kind(Kind), IsUsedInReloc(false), SymbolContents(SymContentsUnset),
         CommonAlignLog2(0), Flags(0) {
     Offset = 0;
-    SectionOrFragmentAndHasName.setInt(!!Name);
+    FragmentAndHasName.setInt(!!Name);
     if (Name)
       getNameEntryPtr() = Name;
   }
@@ -179,20 +176,17 @@ private:
 
   MCSymbol(const MCSymbol &) = delete;
   void operator=(const MCSymbol &) = delete;
-  MCSection *getSectionPtr() const {
-    if (MCFragment *F = getFragment())
+  MCSection *getSectionPtr(bool SetUsed = true) const {
+    if (MCFragment *F = getFragment(SetUsed)) {
+      assert(F != AbsolutePseudoFragment);
       return F->getParent();
-    const auto &SectionOrFragment = SectionOrFragmentAndHasName.getPointer();
-    assert(!SectionOrFragment.is<MCFragment *>() && "Section or null expected");
-    MCSection *Section = SectionOrFragment.dyn_cast<MCSection *>();
-    if (Section || !isVariable())
-      return Section;
-    return Section = getVariableValue()->findAssociatedSection();
+    }
+    return nullptr;
   }
 
   /// \brief Get a reference to the name field.  Requires that we have a name
   const StringMapEntry<bool> *&getNameEntryPtr() {
-    assert(SectionOrFragmentAndHasName.getInt() && "Name is required");
+    assert(FragmentAndHasName.getInt() && "Name is required");
     NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
     return (*(Name - 1)).NameEntry;
   }
@@ -203,7 +197,7 @@ private:
 public:
   /// getName - Get the symbol name.
   StringRef getName() const {
-    if (!SectionOrFragmentAndHasName.getInt())
+    if (!FragmentAndHasName.getInt())
       return StringRef();
 
     return getNameEntryPtr()->first();
@@ -223,7 +217,7 @@ public:
 
   /// isUsed - Check if this is used.
   bool isUsed() const { return IsUsed; }
-  void setUsed(bool Value) const { IsUsed = Value; }
+  void setUsed(bool Value) const { IsUsed |= Value; }
 
   /// \brief Check if this symbol is redefinable.
   bool isRedefinable() const { return IsRedefinable; }
@@ -248,37 +242,38 @@ public:
   /// isDefined - Check if this symbol is defined (i.e., it has an address).
   ///
   /// Defined symbols are either absolute or in some section.
-  bool isDefined() const { return getSectionPtr() != nullptr; }
+  bool isDefined(bool SetUsed = true) const {
+    return getFragment(SetUsed) != nullptr;
+  }
 
   /// isInSection - Check if this symbol is defined in some section (i.e., it
   /// is defined but not absolute).
-  bool isInSection() const { return isDefined() && !isAbsolute(); }
-
-  /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
-  bool isUndefined() const { return !isDefined(); }
-
-  /// isAbsolute - Check if this is an absolute symbol.
-  bool isAbsolute() const { return getSectionPtr() == AbsolutePseudoSection; }
-
-  /// Get the section associated with a defined, non-absolute symbol.
-  MCSection &getSection() const {
-    assert(isInSection() && "Invalid accessor!");
-    return *getSectionPtr();
+  bool isInSection(bool SetUsed = true) const {
+    return isDefined(SetUsed) && !isAbsolute(SetUsed);
   }
 
-  /// Mark the symbol as defined in the section \p S.
-  void setSection(MCSection &S) {
-    assert(!isVariable() && "Cannot set section of variable");
-    assert(!SectionOrFragmentAndHasName.getPointer().is<MCFragment *>() &&
-           "Section or null expected");
-    SectionOrFragmentAndHasName.setPointer(&S);
+  /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
+  bool isUndefined(bool SetUsed = true) const { return !isDefined(SetUsed); }
+
+  /// isAbsolute - Check if this is an absolute symbol.
+  bool isAbsolute(bool SetUsed = true) const {
+    return getFragment(SetUsed) == AbsolutePseudoFragment;
+  }
+
+  /// Get the section associated with a defined, non-absolute symbol.
+  MCSection &getSection(bool SetUsed = true) const {
+    assert(isInSection(SetUsed) && "Invalid accessor!");
+    return *getSectionPtr(SetUsed);
+  }
+
+  /// Mark the symbol as defined in the fragment \p F.
+  void setFragment(MCFragment *F) const {
+    assert(!isVariable() && "Cannot set fragment of variable");
+    FragmentAndHasName.setPointer(F);
   }
 
   /// Mark the symbol as undefined.
-  void setUndefined() {
-    SectionOrFragmentAndHasName.setPointer(
-        PointerUnion<MCSection *, MCFragment *>());
-  }
+  void setUndefined() { FragmentAndHasName.setPointer(nullptr); }
 
   bool isELF() const { return Kind == SymbolKindELF; }
 
@@ -295,10 +290,10 @@ public:
     return SymbolContents == SymContentsVariable;
   }
 
-  /// getVariableValue() - Get the value for variable symbols.
-  const MCExpr *getVariableValue() const {
+  /// getVariableValue - Get the value for variable symbols.
+  const MCExpr *getVariableValue(bool SetUsed = true) const {
     assert(isVariable() && "Invalid accessor!");
-    IsUsed = true;
+    IsUsed |= SetUsed;
     return Value;
   }
 
@@ -379,11 +374,13 @@ public:
     return SymbolContents == SymContentsCommon;
   }
 
-  MCFragment *getFragment() const {
-    return SectionOrFragmentAndHasName.getPointer().dyn_cast<MCFragment *>();
-  }
-  void setFragment(MCFragment *Value) const {
-    SectionOrFragmentAndHasName.setPointer(Value);
+  MCFragment *getFragment(bool SetUsed = true) const {
+    MCFragment *Fragment = FragmentAndHasName.getPointer();
+    if (Fragment || !isVariable())
+      return Fragment;
+    Fragment = getVariableValue(SetUsed)->findAssociatedFragment();
+    FragmentAndHasName.setPointer(Fragment);
+    return Fragment;
   }
 
   bool isExternal() const { return IsExternal; }
diff --git a/include/llvm/MC/MCTargetAsmParser.h b/include/llvm/MC/MCTargetAsmParser.h
index 36db3914f017..03b2dc9a282c 100644
--- a/include/llvm/MC/MCTargetAsmParser.h
+++ b/include/llvm/MC/MCTargetAsmParser.h
@@ -20,6 +20,7 @@ class AsmToken;
 class MCInst;
 class MCParsedAsmOperand;
 class MCStreamer;
+class MCSubtargetInfo;
 class SMLoc;
 class StringRef;
 template <typename T> class SmallVectorImpl;
@@ -29,6 +30,7 @@ typedef SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> OperandVector;
 enum AsmRewriteKind {
   AOK_Delete = 0,     // Rewrite should be ignored.
   AOK_Align,          // Rewrite align as .align.
+  AOK_EVEN,           // Rewrite even as .even.
   AOK_DotOperator,    // Rewrite a dot operator expression as an immediate.
                       // E.g., [eax].foo.bar -> [eax].8
   AOK_Emit,           // Rewrite _emit as .byte.
@@ -44,6 +46,7 @@ enum AsmRewriteKind {
 const char AsmRewritePrecedence [] = {
   0, // AOK_Delete
   2, // AOK_Align
+  2, // AOK_EVEN
   2, // AOK_DotOperator
   2, // AOK_Emit
   4, // AOK_Imm
@@ -92,7 +95,10 @@ private:
   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
   void operator=(const MCTargetAsmParser &) = delete;
 protected: // Can only create subclasses.
-  MCTargetAsmParser();
+  MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI);
+
+  /// Create a copy of STI and return a non-const reference to it.
+  MCSubtargetInfo &copySTI();
 
   /// AvailableFeatures - The current set of available features.
   uint64_t AvailableFeatures;
@@ -107,9 +113,14 @@ protected: // Can only create subclasses.
   /// Set of options which affects instrumentation of inline assembly.
   MCTargetOptions MCOptions;
 
+  /// Current STI.
+  const MCSubtargetInfo *STI;
+
 public:
   ~MCTargetAsmParser() override;
 
+  const MCSubtargetInfo &getSTI() const;
+
   uint64_t getAvailableFeatures() const { return AvailableFeatures; }
   void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; }
 
@@ -143,6 +154,10 @@ public:
   /// \return True on failure.
   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                                 SMLoc NameLoc, OperandVector &Operands) = 0;
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                AsmToken Token, OperandVector &Operands) {
+    return ParseInstruction(Info, Name, Token.getLoc(), Operands);
+  }
 
   /// ParseDirective - Parse a target specific assembler directive
   ///
@@ -156,10 +171,6 @@ public:
   /// \param DirectiveID - the identifier token of the directive.
   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
 
-  /// mnemonicIsValid - This returns true if this is a valid mnemonic and false
-  /// otherwise.
-  virtual bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) = 0;
-
   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
   /// This returns false on success and returns true on failure to match.
@@ -192,13 +203,18 @@ public:
   virtual void convertToMapAndConstraints(unsigned Kind,
                                           const OperandVector &Operands) = 0;
 
+  // Return whether this parser uses assignment statements with equals tokens
+  virtual bool equalIsAsmAssignment() { return true; };
+  // Return whether this start of statement identifier is a label
+  virtual bool isLabel(AsmToken &Token) { return true; };
+
   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
                                             MCSymbolRefExpr::VariantKind,
                                             MCContext &Ctx) {
     return nullptr;
   }
 
-  virtual void onLabelParsed(MCSymbol *Symbol) { };
+  virtual void onLabelParsed(MCSymbol *Symbol) { }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/MC/MCTargetOptions.h b/include/llvm/MC/MCTargetOptions.h
index 7f4f23eda27f..4b66a750cb7d 100644
--- a/include/llvm/MC/MCTargetOptions.h
+++ b/include/llvm/MC/MCTargetOptions.h
@@ -29,8 +29,10 @@ public:
   bool MCRelaxAll : 1;
   bool MCNoExecStack : 1;
   bool MCFatalWarnings : 1;
+  bool MCNoWarn : 1;
   bool MCSaveTempLabels : 1;
   bool MCUseDwarfDirectory : 1;
+  bool MCIncrementalLinkerCompatible : 1;
   bool ShowMCEncoding : 1;
   bool ShowMCInst : 1;
   bool AsmVerbose : 1;
@@ -49,8 +51,10 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
           ARE_EQUAL(MCRelaxAll) &&
           ARE_EQUAL(MCNoExecStack) &&
           ARE_EQUAL(MCFatalWarnings) &&
+          ARE_EQUAL(MCNoWarn) &&
           ARE_EQUAL(MCSaveTempLabels) &&
           ARE_EQUAL(MCUseDwarfDirectory) &&
+          ARE_EQUAL(MCIncrementalLinkerCompatible) &&
           ARE_EQUAL(ShowMCEncoding) &&
           ARE_EQUAL(ShowMCInst) &&
           ARE_EQUAL(AsmVerbose) &&
diff --git a/include/llvm/MC/MCTargetOptionsCommandFlags.h b/include/llvm/MC/MCTargetOptionsCommandFlags.h
index af23a92e6e99..5180208d33b6 100644
--- a/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -33,6 +33,12 @@ cl::opt<bool> RelaxAll("mc-relax-all",
                        cl::desc("When used with filetype=obj, "
                                 "relax all fixups in the emitted object file"));
 
+cl::opt<bool> IncrementalLinkerCompatible(
+    "incremental-linker-compatible",
+    cl::desc(
+        "When used with filetype=obj, "
+        "emit an object file which can be used with an incremental linker"));
+
 cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
                           cl::init(0));
 
@@ -40,6 +46,12 @@ cl::opt<bool> ShowMCInst("asm-show-inst",
                          cl::desc("Emit internal instruction representation to "
                                   "assembly file"));
 
+cl::opt<bool> FatalWarnings("fatal-warnings",
+                            cl::desc("Treat warnings as errors"));
+
+cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings"));
+cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn));
+
 cl::opt<std::string>
 ABIName("target-abi", cl::Hidden,
         cl::desc("The name of the ABI to be targeted from the backend."),
@@ -50,9 +62,12 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() {
   Options.SanitizeAddress =
       (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
   Options.MCRelaxAll = RelaxAll;
+  Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
   Options.DwarfVersion = DwarfVersion;
   Options.ShowMCInst = ShowMCInst;
   Options.ABIName = ABIName;
+  Options.MCFatalWarnings = FatalWarnings;
+  Options.MCNoWarn = NoWarn;
   return Options;
 }
 
diff --git a/include/llvm/MC/MCValue.h b/include/llvm/MC/MCValue.h
index 6bdf43685f21..ead08fd90ca0 100644
--- a/include/llvm/MC/MCValue.h
+++ b/include/llvm/MC/MCValue.h
@@ -35,10 +35,6 @@ class raw_ostream;
 /// relocation modifiers apply to the closest symbol or the whole
 /// expression.
 ///
-/// In the general form, SymbolB can only be defined if SymbolA is, and both
-/// must be in the same (non-external) section. The latter constraint is not
-/// enforced, since a symbol's section may not be known at construction.
-///
 /// Note that this class must remain a simple POD value class, because we need
 /// it to live in unions etc.
 class MCValue {
@@ -67,7 +63,6 @@ public:
                      const MCSymbolRefExpr *SymB = nullptr,
                      int64_t Val = 0, uint32_t RefKind = 0) {
     MCValue R;
-    assert((!SymB || SymA) && "Invalid relocatable MCValue!");
     R.Cst = Val;
     R.SymA = SymA;
     R.SymB = SymB;
diff --git a/include/llvm/MC/MCWinCOFFStreamer.h b/include/llvm/MC/MCWinCOFFStreamer.h
index 6fbc754f1125..fe1ada9b9e5b 100644
--- a/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/include/llvm/MC/MCWinCOFFStreamer.h
@@ -73,7 +73,7 @@ protected:
   void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
 private:
-  LLVM_ATTRIBUTE_NORETURN void FatalError(const Twine &Msg) const;
+  void Error(const Twine &Msg) const;
 };
 }
 
diff --git a/include/llvm/MC/MachineLocation.h b/include/llvm/MC/MachineLocation.h
index 2a18615eff62..4b5cf4357793 100644
--- a/include/llvm/MC/MachineLocation.h
+++ b/include/llvm/MC/MachineLocation.h
@@ -68,10 +68,6 @@ public:
     Register = R;
     Offset = O;
   }
-
-#ifndef NDEBUG
-  void dump();
-#endif
 };
 
 inline bool operator!=(const MachineLocation &LHS, const MachineLocation &RHS) {
diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h
index 9e8b68f4340c..b09b93cfc377 100644
--- a/include/llvm/MC/SectionKind.h
+++ b/include/llvm/MC/SectionKind.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//===-- llvm/MC/SectionKind.h - Classification of sections ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,11 +6,6 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This file implements classes used to handle lowerings specific to common
-// object file formats.
-//
-//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_MC_SECTIONKIND_H
 #define LLVM_MC_SECTIONKIND_H
@@ -99,21 +94,8 @@ class SectionKind {
            /// marked 'constant'.
            Common,
 
-           /// DataRel - This is the most general form of data that is written
-           /// to by the program, it can have random relocations to arbitrary
-           /// globals.
-           DataRel,
-
-               /// DataRelLocal - This is writeable data that has a non-zero
-               /// initializer and has relocations in it, but all of the
-               /// relocations are known to be within the final linked image
-               /// the global is linked into.
-               DataRelLocal,
-
-                   /// DataNoRel - This is writeable data that has a non-zero
-                   /// initializer, but whose initializer is known to have no
-                   /// relocations.
-                   DataNoRel,
+           /// This is writeable data that has a non-zero initializer.
+           Data,
 
            /// ReadOnlyWithRel - These are global variables that are never
            /// written to by the program, but that have relocations, so they
@@ -121,15 +103,7 @@ class SectionKind {
            /// can write to them.  If it chooses to, the dynamic linker can
            /// mark the pages these globals end up on as read-only after it is
            /// done with its relocation phase.
-           ReadOnlyWithRel,
-
-               /// ReadOnlyWithRelLocal - This is data that is readonly by the
-               /// program, but must be writeable so that the dynamic linker
-               /// can perform relocations in it.  This is used when we know
-               /// that all the relocations are to globals in this final
-               /// linked image.
-               ReadOnlyWithRelLocal
-
+           ReadOnlyWithRel
   } K : 8;
 public:
 
@@ -169,7 +143,7 @@ public:
   bool isThreadData() const { return K == ThreadData; }
 
   bool isGlobalWriteableData() const {
-    return isBSS() || isCommon() || isDataRel() || isReadOnlyWithRel();
+    return isBSS() || isCommon() || isData() || isReadOnlyWithRel();
   }
 
   bool isBSS() const { return K == BSS || K == BSSLocal || K == BSSExtern; }
@@ -178,22 +152,10 @@ public:
 
   bool isCommon() const { return K == Common; }
 
-  bool isDataRel() const {
-    return K == DataRel || K == DataRelLocal || K == DataNoRel;
-  }
-
-  bool isDataRelLocal() const {
-    return K == DataRelLocal || K == DataNoRel;
-  }
-
-  bool isDataNoRel() const { return K == DataNoRel; }
+  bool isData() const { return K == Data; }
 
   bool isReadOnlyWithRel() const {
-    return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal;
-  }
-
-  bool isReadOnlyWithRelLocal() const {
-    return K == ReadOnlyWithRelLocal;
+    return K == ReadOnlyWithRel;
   }
 private:
   static SectionKind get(Kind K) {
@@ -224,13 +186,8 @@ public:
   static SectionKind getBSSLocal() { return get(BSSLocal); }
   static SectionKind getBSSExtern() { return get(BSSExtern); }
   static SectionKind getCommon() { return get(Common); }
-  static SectionKind getDataRel() { return get(DataRel); }
-  static SectionKind getDataRelLocal() { return get(DataRelLocal); }
-  static SectionKind getDataNoRel() { return get(DataNoRel); }
+  static SectionKind getData() { return get(Data); }
   static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); }
-  static SectionKind getReadOnlyWithRelLocal(){
-    return get(ReadOnlyWithRelLocal);
-  }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/MC/StringTableBuilder.h b/include/llvm/MC/StringTableBuilder.h
index 897d449254ea..adde86b45583 100644
--- a/include/llvm/MC/StringTableBuilder.h
+++ b/include/llvm/MC/StringTableBuilder.h
@@ -11,53 +11,51 @@
 #define LLVM_MC_STRINGTABLEBUILDER_H
 
 #include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
 #include <cassert>
 
 namespace llvm {
 
 /// \brief Utility for building string tables with deduplicated suffixes.
 class StringTableBuilder {
+public:
+  enum Kind { ELF, WinCOFF, MachO, RAW };
+
+private:
   SmallString<256> StringTable;
-  StringMap<size_t> StringIndexMap;
+  DenseMap<StringRef, size_t> StringIndexMap;
+  size_t Size = 0;
+  Kind K;
 
 public:
-  /// \brief Add a string to the builder. Returns a StringRef to the internal
-  /// copy of s. Can only be used before the table is finalized.
-  StringRef add(StringRef s) {
-    assert(!isFinalized());
-    return StringIndexMap.insert(std::make_pair(s, 0)).first->first();
-  }
+  StringTableBuilder(Kind K);
 
-  enum Kind {
-    ELF,
-    WinCOFF,
-    MachO
-  };
+  /// \brief Add a string to the builder. Returns the position of S in the
+  /// table. The position will be changed if finalize is used.
+  /// Can only be used before the table is finalized.
+  size_t add(StringRef S);
 
   /// \brief Analyze the strings and build the final table. No more strings can
   /// be added after this point.
-  void finalize(Kind kind);
+  void finalize();
 
   /// \brief Retrieve the string table data. Can only be used after the table
   /// is finalized.
-  StringRef data() {
+  StringRef data() const {
     assert(isFinalized());
     return StringTable;
   }
 
   /// \brief Get the offest of a string in the string table. Can only be used
   /// after the table is finalized.
-  size_t getOffset(StringRef s) {
-    assert(isFinalized());
-    assert(StringIndexMap.count(s) && "String is not in table!");
-    return StringIndexMap[s];
-  }
+  size_t getOffset(StringRef S) const;
 
+  const DenseMap<StringRef, size_t> &getMap() const { return StringIndexMap; }
+  size_t getSize() const { return Size; }
   void clear();
 
 private:
-  bool isFinalized() {
+  bool isFinalized() const {
     return !StringTable.empty();
   }
 };
diff --git a/include/llvm/MC/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 2fb9b4ae2503..0d97b226d728 100644
--- a/include/llvm/MC/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -30,7 +30,7 @@ namespace llvm {
 // A container class for subtarget features.
 // This is convenient because std::bitset does not have a constructor
 // with an initializer list of set bits.
-const unsigned MAX_SUBTARGET_FEATURES = 64;
+const unsigned MAX_SUBTARGET_FEATURES = 128;
 class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
 public:
   // Cannot inherit constructors because it's not supported by VC++..
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index 597f0d48c118..8dd042a2533f 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -37,7 +37,7 @@ struct ArchiveMemberHeader {
   llvm::StringRef getName() const;
 
   /// Members are not larger than 4GB.
-  uint32_t getSize() const;
+  ErrorOr<uint32_t> getSize() const;
 
   sys::fs::perms getAccessMode() const;
   sys::TimeValue getLastModified() const;
@@ -52,6 +52,7 @@ class Archive : public Binary {
   virtual void anchor();
 public:
   class Child {
+    friend Archive;
     const Archive *Parent;
     /// \brief Includes header but not padding byte.
     StringRef Data;
@@ -62,19 +63,19 @@ public:
       return reinterpret_cast<const ArchiveMemberHeader *>(Data.data());
     }
 
+    bool isThinMember() const;
+
   public:
-    Child(const Archive *Parent, const char *Start);
+    Child(const Archive *Parent, const char *Start, std::error_code *EC);
+    Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
 
     bool operator ==(const Child &other) const {
       assert(Parent == other.Parent);
       return Data.begin() == other.Data.begin();
     }
 
-    bool operator <(const Child &other) const {
-      return Data.begin() < other.Data.begin();
-    }
-
-    Child getNext() const;
+    const Archive *getParent() const { return Parent; }
+    ErrorOr<Child> getNext() const;
 
     ErrorOr<StringRef> getName() const;
     StringRef getRawName() const { return getHeader()->getName(); }
@@ -90,9 +91,9 @@ public:
       return getHeader()->getAccessMode();
     }
     /// \return the size of the archive member without the header or padding.
-    uint64_t getSize() const;
+    ErrorOr<uint64_t> getSize() const;
     /// \return the size in the archive header for this member.
-    uint64_t getRawSize() const;
+    ErrorOr<uint64_t> getRawSize() const;
 
     ErrorOr<StringRef> getBuffer() const;
     uint64_t getChildOffset() const;
@@ -104,28 +105,32 @@ public:
   };
 
   class child_iterator {
-    Child child;
+    ErrorOr<Child> child;
 
   public:
-    child_iterator() : child(Child(nullptr, nullptr)) {}
+    child_iterator() : child(Child(nullptr, nullptr, nullptr)) {}
     child_iterator(const Child &c) : child(c) {}
-    const Child *operator->() const { return &child; }
-    const Child &operator*() const { return child; }
+    child_iterator(std::error_code EC) : child(EC) {}
+    const ErrorOr<Child> *operator->() const { return &child; }
+    const ErrorOr<Child> &operator*() const { return child; }
 
     bool operator==(const child_iterator &other) const {
-      return child == other.child;
+      // We ignore error states so that comparisions with end() work, which
+      // allows range loops.
+      if (child.getError() || other.child.getError())
+        return false;
+      return *child == *other.child;
     }
 
     bool operator!=(const child_iterator &other) const {
       return !(*this == other);
     }
 
-    bool operator<(const child_iterator &other) const {
-      return child < other.child;
-    }
-
+    // Code in loops with child_iterators must check for errors on each loop
+    // iteration.  And if there is an error break out of the loop.
     child_iterator &operator++() { // Preincrement
-      child = child.getNext();
+      assert(child && "Can't increment iterator with error");
+      child = child->getNext();
       return *this;
     }
   };
@@ -145,7 +150,7 @@ public:
       , SymbolIndex(symi)
       , StringIndex(stri) {}
     StringRef getName() const;
-    ErrorOr<child_iterator> getMember() const;
+    ErrorOr<Child> getMember() const;
     Symbol getNext() const;
   };
 
@@ -186,14 +191,13 @@ public:
   child_iterator child_begin(bool SkipInternal = true) const;
   child_iterator child_end() const;
   iterator_range<child_iterator> children(bool SkipInternal = true) const {
-    return iterator_range<child_iterator>(child_begin(SkipInternal),
-                                          child_end());
+    return make_range(child_begin(SkipInternal), child_end());
   }
 
   symbol_iterator symbol_begin() const;
   symbol_iterator symbol_end() const;
   iterator_range<symbol_iterator> symbols() const {
-    return iterator_range<symbol_iterator>(symbol_begin(), symbol_end());
+    return make_range(symbol_begin(), symbol_end());
   }
 
   // Cast methods.
@@ -205,18 +209,17 @@ public:
   child_iterator findSym(StringRef name) const;
 
   bool hasSymbolTable() const;
-  child_iterator getSymbolTableChild() const { return SymbolTable; }
-  StringRef getSymbolTable() const {
-    // We know that the symbol table is not an external file,
-    // so we just assert there is no error.
-    return *SymbolTable->getBuffer();
-  }
+  StringRef getSymbolTable() const { return SymbolTable; }
   uint32_t getNumberOfSymbols() const;
 
 private:
-  child_iterator SymbolTable;
-  child_iterator StringTable;
-  child_iterator FirstRegular;
+  StringRef SymbolTable;
+  StringRef StringTable;
+
+  StringRef FirstRegularData;
+  uint16_t FirstRegularStartOfFile = -1;
+  void setFirstRegular(const Child &C);
+
   unsigned Format : 2;
   unsigned IsThin : 1;
   mutable std::vector<std::unique_ptr<MemoryBuffer>> ThinBuffers;
diff --git a/include/llvm/Object/ArchiveWriter.h b/include/llvm/Object/ArchiveWriter.h
index 3648d0c77fb5..b5d2ba358080 100644
--- a/include/llvm/Object/ArchiveWriter.h
+++ b/include/llvm/Object/ArchiveWriter.h
@@ -24,17 +24,15 @@ class NewArchiveIterator {
   bool IsNewMember;
   StringRef Name;
 
-  object::Archive::child_iterator OldI;
-
-  StringRef NewFilename;
+  object::Archive::Child OldMember;
 
 public:
-  NewArchiveIterator(object::Archive::child_iterator I, StringRef Name);
-  NewArchiveIterator(StringRef I, StringRef Name);
+  NewArchiveIterator(const object::Archive::Child &OldMember, StringRef Name);
+  NewArchiveIterator(StringRef FileName);
   bool isNewMember() const;
   StringRef getName() const;
 
-  object::Archive::child_iterator getOld() const;
+  const object::Archive::Child &getOld() const;
 
   StringRef getNew() const;
   llvm::ErrorOr<int> getFD(sys::fs::file_status &NewStatus) const;
@@ -43,7 +41,8 @@ public:
 
 std::pair<StringRef, std::error_code>
 writeArchive(StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
-             bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic);
+             bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic,
+             bool Thin);
 }
 
 #endif
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
index a3d6d0d4d428..a0d1127781f6 100644
--- a/include/llvm/Object/Binary.h
+++ b/include/llvm/Object/Binary.h
@@ -41,7 +41,9 @@ protected:
   enum {
     ID_Archive,
     ID_MachOUniversalBinary,
-    ID_IR, // LLVM IR
+    ID_COFFImportFile,
+    ID_IR,            // LLVM IR
+    ID_FunctionIndex, // Function summary index
 
     // Object and children.
     ID_StartObjects,
@@ -113,10 +115,16 @@ public:
     return TypeID == ID_COFF;
   }
 
+  bool isCOFFImportFile() const {
+    return TypeID == ID_COFFImportFile;
+  }
+
   bool isIR() const {
     return TypeID == ID_IR;
   }
 
+  bool isFunctionIndex() const { return TypeID == ID_FunctionIndex; }
+
   bool isLittleEndian() const {
     return !(TypeID == ID_ELF32B || TypeID == ID_ELF64B ||
              TypeID == ID_MachO32B || TypeID == ID_MachO64B);
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index 025a9dbc6bc0..1b0e2e36bd5e 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -653,8 +653,7 @@ protected:
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
   SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
-  std::error_code getSymbolSection(DataRefImpl Symb,
-                                   section_iterator &Res) const override;
+  ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
   void moveSectionNext(DataRefImpl &Sec) const override;
   std::error_code getSectionName(DataRefImpl Sec,
                                  StringRef &Res) const override;
@@ -774,6 +773,7 @@ public:
   std::error_code getSectionContents(const coff_section *Sec,
                                      ArrayRef<uint8_t> &Res) const;
 
+  uint64_t getImageBase() const;
   std::error_code getVaPtr(uint64_t VA, uintptr_t &Res) const;
   std::error_code getRvaPtr(uint32_t Rva, uintptr_t &Res) const;
   std::error_code getHintName(uint32_t Rva, uint16_t &Hint,
diff --git a/include/llvm/Object/COFFImportFile.h b/include/llvm/Object/COFFImportFile.h
new file mode 100644
index 000000000000..b04a44ea60d2
--- /dev/null
+++ b/include/llvm/Object/COFFImportFile.h
@@ -0,0 +1,74 @@
+//===- COFFImportFile.h - COFF short import file implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF short import file is a special kind of file which contains
+// only symbol names for DLL-exported symbols. This class implements
+// SymbolicFile interface for the file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_COFF_IMPORT_FILE_H
+#define LLVM_OBJECT_COFF_IMPORT_FILE_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/SymbolicFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace object {
+
+class COFFImportFile : public SymbolicFile {
+public:
+  COFFImportFile(MemoryBufferRef Source)
+      : SymbolicFile(ID_COFFImportFile, Source) {}
+
+  static inline bool classof(Binary const *V) { return V->isCOFFImportFile(); }
+
+  void moveSymbolNext(DataRefImpl &Symb) const override { ++Symb.p; }
+
+  std::error_code printSymbolName(raw_ostream &OS,
+                                  DataRefImpl Symb) const override {
+    if (Symb.p == 0)
+      OS << "__imp_";
+    OS << StringRef(Data.getBufferStart() + sizeof(coff_import_header));
+    return std::error_code();
+  }
+
+  uint32_t getSymbolFlags(DataRefImpl Symb) const override {
+    return SymbolRef::SF_Global;
+  }
+
+  basic_symbol_iterator symbol_begin_impl() const override {
+    return BasicSymbolRef(DataRefImpl(), this);
+  }
+
+  basic_symbol_iterator symbol_end_impl() const override {
+    DataRefImpl Symb;
+    Symb.p = isCode() ? 2 : 1;
+    return BasicSymbolRef(Symb, this);
+  }
+
+  const coff_import_header *getCOFFImportHeader() const {
+    return reinterpret_cast<const object::coff_import_header *>(
+        Data.getBufferStart());
+  }
+
+private:
+  bool isCode() const {
+    return getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
+  }
+};
+
+} // namespace object
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Object/ELF.h b/include/llvm/Object/ELF.h
index cc271851e6b0..b0eaa3f5ed4d 100644
--- a/include/llvm/Object/ELF.h
+++ b/include/llvm/Object/ELF.h
@@ -14,25 +14,9 @@
 #ifndef LLVM_OBJECT_ELF_H
 #define LLVM_OBJECT_ELF_H
 
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IntervalMap.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Object/ELFTypes.h"
-#include "llvm/Object/Error.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <limits>
-#include <utility>
 
 namespace llvm {
 namespace object {
@@ -56,78 +40,6 @@ public:
   typedef typename std::conditional<ELFT::Is64Bits,
                                     uint64_t, uint32_t>::type uintX_t;
 
-  /// \brief Iterate over constant sized entities.
-  template <class EntT>
-  class ELFEntityIterator {
-  public:
-    typedef ptrdiff_t difference_type;
-    typedef EntT value_type;
-    typedef std::forward_iterator_tag iterator_category;
-    typedef value_type &reference;
-    typedef value_type *pointer;
-
-    /// \brief Default construct iterator.
-    ELFEntityIterator() : EntitySize(0), Current(nullptr) {}
-    ELFEntityIterator(uintX_t EntSize, const char *Start)
-        : EntitySize(EntSize), Current(Start) {}
-
-    reference operator *() {
-      assert(Current && "Attempted to dereference an invalid iterator!");
-      return *reinterpret_cast<pointer>(Current);
-    }
-
-    pointer operator ->() {
-      assert(Current && "Attempted to dereference an invalid iterator!");
-      return reinterpret_cast<pointer>(Current);
-    }
-
-    bool operator ==(const ELFEntityIterator &Other) {
-      return Current == Other.Current;
-    }
-
-    bool operator !=(const ELFEntityIterator &Other) {
-      return !(*this == Other);
-    }
-
-    ELFEntityIterator &operator ++() {
-      assert(Current && "Attempted to increment an invalid iterator!");
-      Current += EntitySize;
-      return *this;
-    }
-
-    ELFEntityIterator &operator+(difference_type n) {
-      assert(Current && "Attempted to increment an invalid iterator!");
-      Current += (n * EntitySize);
-      return *this;
-    }
-
-    ELFEntityIterator &operator-(difference_type n) {
-      assert(Current && "Attempted to subtract an invalid iterator!");
-      Current -= (n * EntitySize);
-      return *this;
-    }
-
-    ELFEntityIterator operator ++(int) {
-      ELFEntityIterator Tmp = *this;
-      ++*this;
-      return Tmp;
-    }
-
-    difference_type operator -(const ELFEntityIterator &Other) const {
-      assert(EntitySize == Other.EntitySize &&
-             "Subtracting iterators of different EntitySize!");
-      return (Current - Other.Current) / EntitySize;
-    }
-
-    const char *get() const { return Current; }
-
-    uintX_t getEntSize() const { return EntitySize; }
-
-  private:
-    uintX_t EntitySize;
-    const char *Current;
-  };
-
   typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
   typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
   typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -141,98 +53,22 @@ public:
   typedef Elf_Vernaux_Impl<ELFT> Elf_Vernaux;
   typedef Elf_Versym_Impl<ELFT> Elf_Versym;
   typedef Elf_Hash_Impl<ELFT> Elf_Hash;
-  typedef ELFEntityIterator<const Elf_Dyn> Elf_Dyn_Iter;
-  typedef iterator_range<Elf_Dyn_Iter> Elf_Dyn_Range;
-  typedef ELFEntityIterator<const Elf_Rela> Elf_Rela_Iter;
-  typedef ELFEntityIterator<const Elf_Rel> Elf_Rel_Iter;
+  typedef Elf_GnuHash_Impl<ELFT> Elf_GnuHash;
+  typedef iterator_range<const Elf_Dyn *> Elf_Dyn_Range;
   typedef iterator_range<const Elf_Shdr *> Elf_Shdr_Range;
-
-  /// \brief Archive files are 2 byte aligned, so we need this for
-  ///     PointerIntPair to work.
-  template <typename T>
-  class ArchivePointerTypeTraits {
-  public:
-    static inline const void *getAsVoidPointer(T *P) { return P; }
-    static inline T *getFromVoidPointer(const void *P) {
-      return static_cast<T *>(P);
-    }
-    enum { NumLowBitsAvailable = 1 };
-  };
-
   typedef iterator_range<const Elf_Sym *> Elf_Sym_Range;
 
-private:
-  typedef SmallVector<const Elf_Shdr *, 2> Sections_t;
-  typedef DenseMap<unsigned, unsigned> IndexMap_t;
-
-  StringRef Buf;
-
   const uint8_t *base() const {
     return reinterpret_cast<const uint8_t *>(Buf.data());
   }
 
+private:
+
+  StringRef Buf;
+
   const Elf_Ehdr *Header;
   const Elf_Shdr *SectionHeaderTable = nullptr;
   StringRef DotShstrtab;                    // Section header string table.
-  StringRef DotStrtab;                      // Symbol header string table.
-  const Elf_Shdr *dot_symtab_sec = nullptr; // Symbol table section.
-  const Elf_Shdr *DotDynSymSec = nullptr;   // Dynamic symbol table section.
-  const Elf_Hash *HashTable = nullptr;
-
-  const Elf_Shdr *SymbolTableSectionHeaderIndex = nullptr;
-  DenseMap<const Elf_Sym *, ELF::Elf64_Word> ExtendedSymbolTable;
-
-  const Elf_Shdr *dot_gnu_version_sec = nullptr;   // .gnu.version
-  const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
-  const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
-
-  /// \brief Represents a region described by entries in the .dynamic table.
-  struct DynRegionInfo {
-    DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {}
-    /// \brief Address in current address space.
-    const void *Addr;
-    /// \brief Size in bytes of the region.
-    uintX_t Size;
-    /// \brief Size of each entity in the region.
-    uintX_t EntSize;
-  };
-
-  DynRegionInfo DynamicRegion;
-  DynRegionInfo DynHashRegion;
-  DynRegionInfo DynStrRegion;
-  DynRegionInfo DynRelaRegion;
-
-  // Pointer to SONAME entry in dynamic string table
-  // This is set the first time getLoadName is called.
-  mutable const char *dt_soname = nullptr;
-
-  // Records for each version index the corresponding Verdef or Vernaux entry.
-  // This is filled the first time LoadVersionMap() is called.
-  class VersionMapEntry : public PointerIntPair<const void*, 1> {
-    public:
-    // If the integer is 0, this is an Elf_Verdef*.
-    // If the integer is 1, this is an Elf_Vernaux*.
-    VersionMapEntry() : PointerIntPair<const void*, 1>(nullptr, 0) { }
-    VersionMapEntry(const Elf_Verdef *verdef)
-        : PointerIntPair<const void*, 1>(verdef, 0) { }
-    VersionMapEntry(const Elf_Vernaux *vernaux)
-        : PointerIntPair<const void*, 1>(vernaux, 1) { }
-    bool isNull() const { return getPointer() == nullptr; }
-    bool isVerdef() const { return !isNull() && getInt() == 0; }
-    bool isVernaux() const { return !isNull() && getInt() == 1; }
-    const Elf_Verdef *getVerdef() const {
-      return isVerdef() ? (const Elf_Verdef*)getPointer() : nullptr;
-    }
-    const Elf_Vernaux *getVernaux() const {
-      return isVernaux() ? (const Elf_Vernaux*)getPointer() : nullptr;
-    }
-  };
-  mutable SmallVector<VersionMapEntry, 16> VersionMap;
-  void LoadVersionDefs(const Elf_Shdr *sec) const;
-  void LoadVersionNeeds(const Elf_Shdr *ec) const;
-  void LoadVersionMap() const;
-
-  void scanDynamicTable();
 
 public:
   template<typename T>
@@ -240,25 +76,20 @@ public:
   template <typename T>
   const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
 
-  const Elf_Shdr *getDotSymtabSec() const { return dot_symtab_sec; }
-  const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; }
-  const Elf_Hash *getHashTable() const { return HashTable; }
-
   ErrorOr<StringRef> getStringTable(const Elf_Shdr *Section) const;
-  const char *getDynamicString(uintX_t Offset) const;
-  ErrorOr<StringRef> getSymbolVersion(const Elf_Shdr *section,
-                                      const Elf_Sym *Symb,
-                                      bool &IsDefault) const;
+  ErrorOr<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
+
+  ErrorOr<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const;
+
   void VerifyStrTab(const Elf_Shdr *sh) const;
 
   StringRef getRelocationTypeName(uint32_t Type) const;
   void getRelocationTypeName(uint32_t Type,
                              SmallVectorImpl<char> &Result) const;
 
-  /// \brief Get the symbol table section and symbol for a given relocation.
-  template <class RelT>
-  std::pair<const Elf_Shdr *, const Elf_Sym *>
-  getRelocationSymbol(const Elf_Shdr *RelSec, const RelT *Rel) const;
+  /// \brief Get the symbol for a given relocation.
+  const Elf_Sym *getRelocationSymbol(const Elf_Rel *Rel,
+                                     const Elf_Shdr *SymTab) const;
 
   ELFFile(StringRef Object, std::error_code &EC);
 
@@ -273,111 +104,116 @@ public:
       Header->getDataEncoding() == ELF::ELFDATA2LSB;
   }
 
+  ErrorOr<const Elf_Dyn *> dynamic_table_begin(const Elf_Phdr *Phdr) const;
+  ErrorOr<const Elf_Dyn *> dynamic_table_end(const Elf_Phdr *Phdr) const;
+  ErrorOr<Elf_Dyn_Range> dynamic_table(const Elf_Phdr *Phdr) const {
+    ErrorOr<const Elf_Dyn *> Begin = dynamic_table_begin(Phdr);
+    if (std::error_code EC = Begin.getError())
+      return EC;
+    ErrorOr<const Elf_Dyn *> End = dynamic_table_end(Phdr);
+    if (std::error_code EC = End.getError())
+      return EC;
+    return make_range(*Begin, *End);
+  }
+
   const Elf_Shdr *section_begin() const;
   const Elf_Shdr *section_end() const;
   Elf_Shdr_Range sections() const {
     return make_range(section_begin(), section_end());
   }
 
-  const Elf_Sym *symbol_begin() const;
-  const Elf_Sym *symbol_end() const;
-  Elf_Sym_Range symbols() const {
-    return make_range(symbol_begin(), symbol_end());
-  }
-
-  Elf_Dyn_Iter dynamic_table_begin() const;
-  /// \param NULLEnd use one past the first DT_NULL entry as the end instead of
-  /// the section size.
-  Elf_Dyn_Iter dynamic_table_end(bool NULLEnd = false) const;
-  Elf_Dyn_Range dynamic_table(bool NULLEnd = false) const {
-    return make_range(dynamic_table_begin(), dynamic_table_end(NULLEnd));
-  }
-
-  const Elf_Sym *dynamic_symbol_begin() const {
-    if (!DotDynSymSec)
+  const Elf_Sym *symbol_begin(const Elf_Shdr *Sec) const {
+    if (!Sec)
       return nullptr;
-    if (DotDynSymSec->sh_entsize != sizeof(Elf_Sym))
+    if (Sec->sh_entsize != sizeof(Elf_Sym))
       report_fatal_error("Invalid symbol size");
-    return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset);
+    return reinterpret_cast<const Elf_Sym *>(base() + Sec->sh_offset);
   }
-
-  const Elf_Sym *dynamic_symbol_end() const {
-    if (!DotDynSymSec)
+  const Elf_Sym *symbol_end(const Elf_Shdr *Sec) const {
+    if (!Sec)
       return nullptr;
-    return reinterpret_cast<const Elf_Sym *>(base() + DotDynSymSec->sh_offset +
-                                             DotDynSymSec->sh_size);
+    uint64_t Size = Sec->sh_size;
+    if (Size % sizeof(Elf_Sym))
+      report_fatal_error("Invalid symbol table size");
+    return symbol_begin(Sec) + Size / sizeof(Elf_Sym);
+  }
+  Elf_Sym_Range symbols(const Elf_Shdr *Sec) const {
+    return make_range(symbol_begin(Sec), symbol_end(Sec));
   }
 
-  Elf_Sym_Range dynamic_symbols() const {
-    return make_range(dynamic_symbol_begin(), dynamic_symbol_end());
+  typedef iterator_range<const Elf_Rela *> Elf_Rela_Range;
+
+  const Elf_Rela *rela_begin(const Elf_Shdr *sec) const {
+    if (sec->sh_entsize != sizeof(Elf_Rela))
+      report_fatal_error("Invalid relocation entry size");
+    return reinterpret_cast<const Elf_Rela *>(base() + sec->sh_offset);
   }
 
-  Elf_Rela_Iter dyn_rela_begin() const {
-    if (DynRelaRegion.Addr)
-      return Elf_Rela_Iter(DynRelaRegion.EntSize,
-        (const char *)DynRelaRegion.Addr);
-    return Elf_Rela_Iter(0, nullptr);
+  const Elf_Rela *rela_end(const Elf_Shdr *sec) const {
+    uint64_t Size = sec->sh_size;
+    if (Size % sizeof(Elf_Rela))
+      report_fatal_error("Invalid relocation table size");
+    return rela_begin(sec) + Size / sizeof(Elf_Rela);
   }
 
-  Elf_Rela_Iter dyn_rela_end() const {
-    if (DynRelaRegion.Addr)
-      return Elf_Rela_Iter(
-        DynRelaRegion.EntSize,
-        (const char *)DynRelaRegion.Addr + DynRelaRegion.Size);
-    return Elf_Rela_Iter(0, nullptr);
+  Elf_Rela_Range relas(const Elf_Shdr *Sec) const {
+    return make_range(rela_begin(Sec), rela_end(Sec));
   }
 
-  Elf_Rela_Iter rela_begin(const Elf_Shdr *sec) const {
-    return Elf_Rela_Iter(sec->sh_entsize,
-                         (const char *)(base() + sec->sh_offset));
+  const Elf_Rel *rel_begin(const Elf_Shdr *sec) const {
+    if (sec->sh_entsize != sizeof(Elf_Rel))
+      report_fatal_error("Invalid relocation entry size");
+    return reinterpret_cast<const Elf_Rel *>(base() + sec->sh_offset);
   }
 
-  Elf_Rela_Iter rela_end(const Elf_Shdr *sec) const {
-    return Elf_Rela_Iter(
-        sec->sh_entsize,
-        (const char *)(base() + sec->sh_offset + sec->sh_size));
+  const Elf_Rel *rel_end(const Elf_Shdr *sec) const {
+    uint64_t Size = sec->sh_size;
+    if (Size % sizeof(Elf_Rel))
+      report_fatal_error("Invalid relocation table size");
+    return rel_begin(sec) + Size / sizeof(Elf_Rel);
   }
 
-  Elf_Rel_Iter rel_begin(const Elf_Shdr *sec) const {
-    return Elf_Rel_Iter(sec->sh_entsize,
-                        (const char *)(base() + sec->sh_offset));
-  }
-
-  Elf_Rel_Iter rel_end(const Elf_Shdr *sec) const {
-    return Elf_Rel_Iter(sec->sh_entsize,
-                        (const char *)(base() + sec->sh_offset + sec->sh_size));
+  typedef iterator_range<const Elf_Rel *> Elf_Rel_Range;
+  Elf_Rel_Range rels(const Elf_Shdr *Sec) const {
+    return make_range(rel_begin(Sec), rel_end(Sec));
   }
 
   /// \brief Iterate over program header table.
-  typedef ELFEntityIterator<const Elf_Phdr> Elf_Phdr_Iter;
-
-  Elf_Phdr_Iter program_header_begin() const {
-    return Elf_Phdr_Iter(Header->e_phentsize,
-                         (const char*)base() + Header->e_phoff);
+  const Elf_Phdr *program_header_begin() const {
+    if (Header->e_phnum && Header->e_phentsize != sizeof(Elf_Phdr))
+      report_fatal_error("Invalid program header size");
+    return reinterpret_cast<const Elf_Phdr *>(base() + Header->e_phoff);
   }
 
-  Elf_Phdr_Iter program_header_end() const {
-    return Elf_Phdr_Iter(Header->e_phentsize,
-                         (const char*)base() +
-                           Header->e_phoff +
-                           (Header->e_phnum * Header->e_phentsize));
+  const Elf_Phdr *program_header_end() const {
+    return program_header_begin() + Header->e_phnum;
+  }
+
+  typedef iterator_range<const Elf_Phdr *> Elf_Phdr_Range;
+
+  const Elf_Phdr_Range program_headers() const {
+    return make_range(program_header_begin(), program_header_end());
   }
 
   uint64_t getNumSections() const;
   uintX_t getStringTableIndex() const;
-  ELF::Elf64_Word getExtendedSymbolTableIndex(const Elf_Sym *symb) const;
+  uint32_t getExtendedSymbolTableIndex(const Elf_Sym *Sym,
+                                       const Elf_Shdr *SymTab,
+                                       ArrayRef<Elf_Word> ShndxTable) const;
   const Elf_Ehdr *getHeader() const { return Header; }
-  ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *symb) const;
+  ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+                                       const Elf_Shdr *SymTab,
+                                       ArrayRef<Elf_Word> ShndxTable) const;
   ErrorOr<const Elf_Shdr *> getSection(uint32_t Index) const;
-  const Elf_Sym *getSymbol(uint32_t index) const;
 
-  ErrorOr<StringRef> getStaticSymbolName(const Elf_Sym *Symb) const;
-  ErrorOr<StringRef> getDynamicSymbolName(const Elf_Sym *Symb) const;
-  ErrorOr<StringRef> getSymbolName(const Elf_Sym *Symb, bool IsDynamic) const;
+  const Elf_Sym *getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
+    return &*(symbol_begin(Sec) + Index);
+  }
 
   ErrorOr<StringRef> getSectionName(const Elf_Shdr *Section) const;
+  template <typename T>
+  ErrorOr<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const;
   ErrorOr<ArrayRef<uint8_t> > getSectionContents(const Elf_Shdr *Sec) const;
-  StringRef getLoadName() const;
 };
 
 typedef ELFFile<ELFType<support::little, false>> ELF32LEFile;
@@ -385,118 +221,50 @@ typedef ELFFile<ELFType<support::little, true>> ELF64LEFile;
 typedef ELFFile<ELFType<support::big, false>> ELF32BEFile;
 typedef ELFFile<ELFType<support::big, true>> ELF64BEFile;
 
-// Iterate through the version definitions, and place each Elf_Verdef
-// in the VersionMap according to its index.
 template <class ELFT>
-void ELFFile<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
-  unsigned vd_size = sec->sh_size;  // Size of section in bytes
-  unsigned vd_count = sec->sh_info; // Number of Verdef entries
-  const char *sec_start = (const char*)base() + sec->sh_offset;
-  const char *sec_end = sec_start + vd_size;
-  // The first Verdef entry is at the start of the section.
-  const char *p = sec_start;
-  for (unsigned i = 0; i < vd_count; i++) {
-    if (p + sizeof(Elf_Verdef) > sec_end)
-      report_fatal_error("Section ended unexpectedly while scanning "
-                         "version definitions.");
-    const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
-    if (vd->vd_version != ELF::VER_DEF_CURRENT)
-      report_fatal_error("Unexpected verdef version");
-    size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
-    if (index >= VersionMap.size())
-      VersionMap.resize(index + 1);
-    VersionMap[index] = VersionMapEntry(vd);
-    p += vd->vd_next;
-  }
-}
+uint32_t ELFFile<ELFT>::getExtendedSymbolTableIndex(
+    const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+    ArrayRef<Elf_Word> ShndxTable) const {
+  assert(Sym->st_shndx == ELF::SHN_XINDEX);
+  unsigned Index = Sym - symbol_begin(SymTab);
 
-// Iterate through the versions needed section, and place each Elf_Vernaux
-// in the VersionMap according to its index.
-template <class ELFT>
-void ELFFile<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
-  unsigned vn_size = sec->sh_size;  // Size of section in bytes
-  unsigned vn_count = sec->sh_info; // Number of Verneed entries
-  const char *sec_start = (const char *)base() + sec->sh_offset;
-  const char *sec_end = sec_start + vn_size;
-  // The first Verneed entry is at the start of the section.
-  const char *p = sec_start;
-  for (unsigned i = 0; i < vn_count; i++) {
-    if (p + sizeof(Elf_Verneed) > sec_end)
-      report_fatal_error("Section ended unexpectedly while scanning "
-                         "version needed records.");
-    const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
-    if (vn->vn_version != ELF::VER_NEED_CURRENT)
-      report_fatal_error("Unexpected verneed version");
-    // Iterate through the Vernaux entries
-    const char *paux = p + vn->vn_aux;
-    for (unsigned j = 0; j < vn->vn_cnt; j++) {
-      if (paux + sizeof(Elf_Vernaux) > sec_end)
-        report_fatal_error("Section ended unexpected while scanning auxiliary "
-                           "version needed records.");
-      const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
-      size_t index = vna->vna_other & ELF::VERSYM_VERSION;
-      if (index >= VersionMap.size())
-        VersionMap.resize(index + 1);
-      VersionMap[index] = VersionMapEntry(vna);
-      paux += vna->vna_next;
-    }
-    p += vn->vn_next;
-  }
-}
-
-template <class ELFT>
-void ELFFile<ELFT>::LoadVersionMap() const {
-  // If there is no dynamic symtab or version table, there is nothing to do.
-  if (!DotDynSymSec || !dot_gnu_version_sec)
-    return;
-
-  // Has the VersionMap already been loaded?
-  if (VersionMap.size() > 0)
-    return;
-
-  // The first two version indexes are reserved.
-  // Index 0 is LOCAL, index 1 is GLOBAL.
-  VersionMap.push_back(VersionMapEntry());
-  VersionMap.push_back(VersionMapEntry());
-
-  if (dot_gnu_version_d_sec)
-    LoadVersionDefs(dot_gnu_version_d_sec);
-
-  if (dot_gnu_version_r_sec)
-    LoadVersionNeeds(dot_gnu_version_r_sec);
-}
-
-template <class ELFT>
-ELF::Elf64_Word
-ELFFile<ELFT>::getExtendedSymbolTableIndex(const Elf_Sym *symb) const {
-  assert(symb->st_shndx == ELF::SHN_XINDEX);
-  return ExtendedSymbolTable.lookup(symb);
+  // The size of the table was checked in getSHNDXTable.
+  return ShndxTable[Index];
 }
 
 template <class ELFT>
 ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
-ELFFile<ELFT>::getSection(const Elf_Sym *symb) const {
-  uint32_t Index = symb->st_shndx;
+ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                          ArrayRef<Elf_Word> ShndxTable) const {
+  uint32_t Index = Sym->st_shndx;
   if (Index == ELF::SHN_XINDEX)
-    return getSection(ExtendedSymbolTable.lookup(symb));
+    return getSection(getExtendedSymbolTableIndex(Sym, SymTab, ShndxTable));
+
   if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE)
     return nullptr;
-  return getSection(symb->st_shndx);
+  return getSection(Sym->st_shndx);
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *
-ELFFile<ELFT>::getSymbol(uint32_t Index) const {
-  return &*(symbol_begin() + Index);
-}
+template <typename T>
+ErrorOr<ArrayRef<T>>
+ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
+  uintX_t Offset = Sec->sh_offset;
+  uintX_t Size = Sec->sh_size;
 
-template <class ELFT>
-ErrorOr<ArrayRef<uint8_t> >
-ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const {
-  if (Sec->sh_offset + Sec->sh_size > Buf.size())
+  if (Size % sizeof(T))
     return object_error::parse_failed;
-  const uint8_t *Start = base() + Sec->sh_offset;
-  return makeArrayRef(Start, Sec->sh_size);
+  if (Offset + Size > Buf.size())
+    return object_error::parse_failed;
+
+  const T *Start = reinterpret_cast<const T *>(base() + Offset);
+  return makeArrayRef(Start, Size / sizeof(T));
+}
+
+template <class ELFT>
+ErrorOr<ArrayRef<uint8_t>>
+ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const {
+  return getSectionContentsAsArray<uint8_t>(Sec);
 }
 
 template <class ELFT>
@@ -536,18 +304,13 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
 }
 
 template <class ELFT>
-template <class RelT>
-std::pair<const typename ELFFile<ELFT>::Elf_Shdr *,
-          const typename ELFFile<ELFT>::Elf_Sym *>
-ELFFile<ELFT>::getRelocationSymbol(const Elf_Shdr *Sec, const RelT *Rel) const {
-  if (!Sec->sh_link)
-    return std::make_pair(nullptr, nullptr);
-  ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Sec->sh_link);
-  if (std::error_code EC = SymTableOrErr.getError())
-    report_fatal_error(EC.message());
-  const Elf_Shdr *SymTable = *SymTableOrErr;
-  return std::make_pair(
-      SymTable, getEntry<Elf_Sym>(SymTable, Rel->getSymbol(isMips64EL())));
+const typename ELFFile<ELFT>::Elf_Sym *
+ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
+                                   const Elf_Shdr *SymTab) const {
+  uint32_t Index = Rel->getSymbol(isMips64EL());
+  if (Index == 0)
+    return nullptr;
+  return getEntry<Elf_Sym>(SymTab, Index);
 }
 
 template <class ELFT>
@@ -584,10 +347,8 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
 
   Header = reinterpret_cast<const Elf_Ehdr *>(base());
 
-  if (Header->e_shoff == 0) {
-    scanDynamicTable();
+  if (Header->e_shoff == 0)
     return;
-  }
 
   const uint64_t SectionTableOffset = Header->e_shoff;
 
@@ -608,185 +369,25 @@ ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
     return;
   }
 
-  // Scan sections for special sections.
-
-  for (const Elf_Shdr &Sec : sections()) {
-    switch (Sec.sh_type) {
-    case ELF::SHT_HASH:
-      if (HashTable) {
-        EC = object_error::parse_failed;
-        return;
-      }
-      HashTable = reinterpret_cast<const Elf_Hash *>(base() + Sec.sh_offset);
-      break;
-    case ELF::SHT_SYMTAB_SHNDX:
-      if (SymbolTableSectionHeaderIndex) {
-        // More than one .symtab_shndx!
-        EC = object_error::parse_failed;
-        return;
-      }
-      SymbolTableSectionHeaderIndex = &Sec;
-      break;
-    case ELF::SHT_SYMTAB: {
-      if (dot_symtab_sec) {
-        // More than one .symtab!
-        EC = object_error::parse_failed;
-        return;
-      }
-      dot_symtab_sec = &Sec;
-      ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
-      if ((EC = SectionOrErr.getError()))
-        return;
-      ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
-      if ((EC = SymtabOrErr.getError()))
-        return;
-      DotStrtab = *SymtabOrErr;
-    } break;
-    case ELF::SHT_DYNSYM: {
-      if (DotDynSymSec) {
-        // More than one .dynsym!
-        EC = object_error::parse_failed;
-        return;
-      }
-      DotDynSymSec = &Sec;
-      ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
-      if ((EC = SectionOrErr.getError()))
-        return;
-      ErrorOr<StringRef> SymtabOrErr = getStringTable(*SectionOrErr);
-      if ((EC = SymtabOrErr.getError()))
-        return;
-      DynStrRegion.Addr = SymtabOrErr->data();
-      DynStrRegion.Size = SymtabOrErr->size();
-      DynStrRegion.EntSize = 1;
-      break;
-    }
-    case ELF::SHT_DYNAMIC:
-      if (DynamicRegion.Addr) {
-        // More than one .dynamic!
-        EC = object_error::parse_failed;
-        return;
-      }
-      DynamicRegion.Addr = base() + Sec.sh_offset;
-      DynamicRegion.Size = Sec.sh_size;
-      DynamicRegion.EntSize = Sec.sh_entsize;
-      break;
-    case ELF::SHT_GNU_versym:
-      if (dot_gnu_version_sec != nullptr) {
-        // More than one .gnu.version section!
-        EC = object_error::parse_failed;
-        return;
-      }
-      dot_gnu_version_sec = &Sec;
-      break;
-    case ELF::SHT_GNU_verdef:
-      if (dot_gnu_version_d_sec != nullptr) {
-        // More than one .gnu.version_d section!
-        EC = object_error::parse_failed;
-        return;
-      }
-      dot_gnu_version_d_sec = &Sec;
-      break;
-    case ELF::SHT_GNU_verneed:
-      if (dot_gnu_version_r_sec != nullptr) {
-        // More than one .gnu.version_r section!
-        EC = object_error::parse_failed;
-        return;
-      }
-      dot_gnu_version_r_sec = &Sec;
-      break;
-    }
-  }
-
   // Get string table sections.
-  ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(getStringTableIndex());
-  if ((EC = StrTabSecOrErr.getError()))
-    return;
+  uintX_t StringTableIndex = getStringTableIndex();
+  if (StringTableIndex) {
+    ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(StringTableIndex);
+    if ((EC = StrTabSecOrErr.getError()))
+      return;
 
-  ErrorOr<StringRef> SymtabOrErr = getStringTable(*StrTabSecOrErr);
-  if ((EC = SymtabOrErr.getError()))
-    return;
-  DotShstrtab = *SymtabOrErr;
-
-  // Build symbol name side-mapping if there is one.
-  if (SymbolTableSectionHeaderIndex) {
-    const Elf_Word *ShndxTable = reinterpret_cast<const Elf_Word*>(base() +
-                                      SymbolTableSectionHeaderIndex->sh_offset);
-    for (const Elf_Sym &S : symbols()) {
-      if (*ShndxTable != ELF::SHN_UNDEF)
-        ExtendedSymbolTable[&S] = *ShndxTable;
-      ++ShndxTable;
-    }
+    ErrorOr<StringRef> StringTableOrErr = getStringTable(*StrTabSecOrErr);
+    if ((EC = StringTableOrErr.getError()))
+      return;
+    DotShstrtab = *StringTableOrErr;
   }
 
-  scanDynamicTable();
-
   EC = std::error_code();
 }
 
 template <class ELFT>
-void ELFFile<ELFT>::scanDynamicTable() {
-  // Build load-address to file-offset map.
-  typedef IntervalMap<
-      uintX_t, uintptr_t,
-      IntervalMapImpl::NodeSizer<uintX_t, uintptr_t>::LeafSize,
-      IntervalMapHalfOpenInfo<uintX_t>> LoadMapT;
-  typename LoadMapT::Allocator Alloc;
-  // Allocate the IntervalMap on the heap to work around MSVC bug where the
-  // stack doesn't get realigned despite LoadMap having alignment 8 (PR24113).
-  std::unique_ptr<LoadMapT> LoadMap(new LoadMapT(Alloc));
-
-  for (Elf_Phdr_Iter PhdrI = program_header_begin(),
-                     PhdrE = program_header_end();
-       PhdrI != PhdrE; ++PhdrI) {
-    if (PhdrI->p_type == ELF::PT_DYNAMIC) {
-      DynamicRegion.Addr = base() + PhdrI->p_offset;
-      DynamicRegion.Size = PhdrI->p_filesz;
-      DynamicRegion.EntSize = sizeof(Elf_Dyn);
-      continue;
-    }
-    if (PhdrI->p_type != ELF::PT_LOAD)
-      continue;
-    if (PhdrI->p_filesz == 0)
-      continue;
-    LoadMap->insert(PhdrI->p_vaddr, PhdrI->p_vaddr + PhdrI->p_filesz,
-                    PhdrI->p_offset);
-  }
-
-  auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
-    auto I = LoadMap->find(VAddr);
-    if (I == LoadMap->end())
-      return nullptr;
-    return this->base() + I.value() + (VAddr - I.start());
-  };
-
-  for (Elf_Dyn_Iter DynI = dynamic_table_begin(), DynE = dynamic_table_end();
-       DynI != DynE; ++DynI) {
-    switch (DynI->d_tag) {
-    case ELF::DT_HASH:
-      if (HashTable)
-        continue;
-      HashTable =
-          reinterpret_cast<const Elf_Hash *>(toMappedAddr(DynI->getPtr()));
-      break;
-    case ELF::DT_STRTAB:
-      if (!DynStrRegion.Addr)
-        DynStrRegion.Addr = toMappedAddr(DynI->getPtr());
-      break;
-    case ELF::DT_STRSZ:
-      if (!DynStrRegion.Size)
-        DynStrRegion.Size = DynI->getVal();
-      break;
-    case ELF::DT_RELA:
-      if (!DynRelaRegion.Addr)
-        DynRelaRegion.Addr = toMappedAddr(DynI->getPtr());
-      break;
-    case ELF::DT_RELASZ:
-      DynRelaRegion.Size = DynI->getVal();
-      break;
-    case ELF::DT_RELAENT:
-      DynRelaRegion.EntSize = DynI->getVal();
-    }
-  }
+static bool compareAddr(uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
+  return VAddr < Phdr->p_vaddr;
 }
 
 template <class ELFT>
@@ -803,64 +404,31 @@ const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_end() const {
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_begin() const {
-  if (!dot_symtab_sec)
+ErrorOr<const typename ELFFile<ELFT>::Elf_Dyn *>
+ELFFile<ELFT>::dynamic_table_begin(const Elf_Phdr *Phdr) const {
+  if (!Phdr)
     return nullptr;
-  if (dot_symtab_sec->sh_entsize != sizeof(Elf_Sym))
-    report_fatal_error("Invalid symbol size");
-  return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset);
+  assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header");
+  uintX_t Offset = Phdr->p_offset;
+  if (Offset > Buf.size())
+    return object_error::parse_failed;
+  return reinterpret_cast<const Elf_Dyn *>(base() + Offset);
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *ELFFile<ELFT>::symbol_end() const {
-  if (!dot_symtab_sec)
+ErrorOr<const typename ELFFile<ELFT>::Elf_Dyn *>
+ELFFile<ELFT>::dynamic_table_end(const Elf_Phdr *Phdr) const {
+  if (!Phdr)
     return nullptr;
-  return reinterpret_cast<const Elf_Sym *>(base() + dot_symtab_sec->sh_offset +
-                                           dot_symtab_sec->sh_size);
-}
-
-template <class ELFT>
-typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::dynamic_table_begin() const {
-  if (DynamicRegion.Addr)
-    return Elf_Dyn_Iter(DynamicRegion.EntSize,
-                        (const char *)DynamicRegion.Addr);
-  return Elf_Dyn_Iter(0, nullptr);
-}
-
-template <class ELFT>
-typename ELFFile<ELFT>::Elf_Dyn_Iter
-ELFFile<ELFT>::dynamic_table_end(bool NULLEnd) const {
-  if (!DynamicRegion.Addr)
-    return Elf_Dyn_Iter(0, nullptr);
-  Elf_Dyn_Iter Ret(DynamicRegion.EntSize,
-                    (const char *)DynamicRegion.Addr + DynamicRegion.Size);
-
-  if (NULLEnd) {
-    Elf_Dyn_Iter Start = dynamic_table_begin();
-    while (Start != Ret && Start->getTag() != ELF::DT_NULL)
-      ++Start;
-
-    // Include the DT_NULL.
-    if (Start != Ret)
-      ++Start;
-    Ret = Start;
-  }
-  return Ret;
-}
-
-template <class ELFT>
-StringRef ELFFile<ELFT>::getLoadName() const {
-  if (!dt_soname) {
-    dt_soname = "";
-    // Find the DT_SONAME entry
-    for (const auto &Entry : dynamic_table())
-      if (Entry.getTag() == ELF::DT_SONAME) {
-        dt_soname = getDynamicString(Entry.getVal());
-        break;
-      }
-  }
-  return dt_soname;
+  assert(Phdr->p_type == ELF::PT_DYNAMIC && "Got the wrong program header");
+  uintX_t Size = Phdr->p_filesz;
+  if (Size % sizeof(Elf_Dyn))
+    return object_error::elf_invalid_dynamic_table_size;
+  // FIKME: Check for overflow?
+  uintX_t End = Phdr->p_offset + Size;
+  if (End > Buf.size())
+    return object_error::parse_failed;
+  return reinterpret_cast<const Elf_Dyn *>(base() + End);
 }
 
 template <class ELFT>
@@ -908,127 +476,52 @@ ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
 }
 
 template <class ELFT>
-const char *ELFFile<ELFT>::getDynamicString(uintX_t Offset) const {
-  if (Offset >= DynStrRegion.Size)
-    return nullptr;
-  return (const char *)DynStrRegion.Addr + Offset;
+ErrorOr<ArrayRef<typename ELFFile<ELFT>::Elf_Word>>
+ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section) const {
+  assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX);
+  const Elf_Word *ShndxTableBegin =
+      reinterpret_cast<const Elf_Word *>(base() + Section.sh_offset);
+  uintX_t Size = Section.sh_size;
+  if (Size % sizeof(uint32_t))
+    return object_error::parse_failed;
+  uintX_t NumSymbols = Size / sizeof(uint32_t);
+  const Elf_Word *ShndxTableEnd = ShndxTableBegin + NumSymbols;
+  if (reinterpret_cast<const char *>(ShndxTableEnd) > Buf.end())
+    return object_error::parse_failed;
+  ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Section.sh_link);
+  if (std::error_code EC = SymTableOrErr.getError())
+    return EC;
+  const Elf_Shdr &SymTable = **SymTableOrErr;
+  if (SymTable.sh_type != ELF::SHT_SYMTAB &&
+      SymTable.sh_type != ELF::SHT_DYNSYM)
+    return object_error::parse_failed;
+  if (NumSymbols != (SymTable.sh_size / sizeof(Elf_Sym)))
+    return object_error::parse_failed;
+  return makeArrayRef(ShndxTableBegin, ShndxTableEnd);
 }
 
 template <class ELFT>
 ErrorOr<StringRef>
-ELFFile<ELFT>::getStaticSymbolName(const Elf_Sym *Symb) const {
-  return Symb->getName(DotStrtab);
-}
-
-template <class ELFT>
-ErrorOr<StringRef>
-ELFFile<ELFT>::getDynamicSymbolName(const Elf_Sym *Symb) const {
-  return StringRef(getDynamicString(Symb->st_name));
-}
-
-template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolName(const Elf_Sym *Symb,
-                                                bool IsDynamic) const {
-  if (IsDynamic)
-    return getDynamicSymbolName(Symb);
-  return getStaticSymbolName(Symb);
+ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec) const {
+  if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
+    return object_error::parse_failed;
+  ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
+  if (std::error_code EC = SectionOrErr.getError())
+    return EC;
+  return getStringTable(*SectionOrErr);
 }
 
 template <class ELFT>
 ErrorOr<StringRef>
 ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const {
   uint32_t Offset = Section->sh_name;
+  if (Offset == 0)
+    return StringRef();
   if (Offset >= DotShstrtab.size())
     return object_error::parse_failed;
   return StringRef(DotShstrtab.data() + Offset);
 }
 
-template <class ELFT>
-ErrorOr<StringRef> ELFFile<ELFT>::getSymbolVersion(const Elf_Shdr *section,
-                                                   const Elf_Sym *symb,
-                                                   bool &IsDefault) const {
-  StringRef StrTab;
-  if (section) {
-    ErrorOr<StringRef> StrTabOrErr = getStringTable(section);
-    if (std::error_code EC = StrTabOrErr.getError())
-      return EC;
-    StrTab = *StrTabOrErr;
-  }
-  // Handle non-dynamic symbols.
-  if (section != DotDynSymSec && section != nullptr) {
-    // Non-dynamic symbols can have versions in their names
-    // A name of the form 'foo@V1' indicates version 'V1', non-default.
-    // A name of the form 'foo@@V2' indicates version 'V2', default version.
-    ErrorOr<StringRef> SymName = symb->getName(StrTab);
-    if (!SymName)
-      return SymName;
-    StringRef Name = *SymName;
-    size_t atpos = Name.find('@');
-    if (atpos == StringRef::npos) {
-      IsDefault = false;
-      return StringRef("");
-    }
-    ++atpos;
-    if (atpos < Name.size() && Name[atpos] == '@') {
-      IsDefault = true;
-      ++atpos;
-    } else {
-      IsDefault = false;
-    }
-    return Name.substr(atpos);
-  }
-
-  // This is a dynamic symbol. Look in the GNU symbol version table.
-  if (!dot_gnu_version_sec) {
-    // No version table.
-    IsDefault = false;
-    return StringRef("");
-  }
-
-  // Determine the position in the symbol table of this entry.
-  size_t entry_index =
-      (reinterpret_cast<uintptr_t>(symb) - DotDynSymSec->sh_offset -
-       reinterpret_cast<uintptr_t>(base())) /
-      sizeof(Elf_Sym);
-
-  // Get the corresponding version index entry
-  const Elf_Versym *vs = getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
-  size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
-
-  // Special markers for unversioned symbols.
-  if (version_index == ELF::VER_NDX_LOCAL ||
-      version_index == ELF::VER_NDX_GLOBAL) {
-    IsDefault = false;
-    return StringRef("");
-  }
-
-  // Lookup this symbol in the version table
-  LoadVersionMap();
-  if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
-    return object_error::parse_failed;
-  const VersionMapEntry &entry = VersionMap[version_index];
-
-  // Get the version name string
-  size_t name_offset;
-  if (entry.isVerdef()) {
-    // The first Verdaux entry holds the name.
-    name_offset = entry.getVerdef()->getAux()->vda_name;
-  } else {
-    name_offset = entry.getVernaux()->vna_name;
-  }
-
-  // Set IsDefault
-  if (entry.isVerdef()) {
-    IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
-  } else {
-    IsDefault = false;
-  }
-
-  if (name_offset >= DynStrRegion.Size)
-    return object_error::parse_failed;
-  return StringRef(getDynamicString(name_offset));
-}
-
 /// This function returns the hash value for a symbol in the .dynsym section
 /// Name of the API remains consistent as specified in the libelf
 /// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 6e8ace427a20..5823848aaacb 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -189,11 +189,13 @@ public:
   typedef typename ELFFile<ELFT>::Elf_Rela Elf_Rela;
   typedef typename ELFFile<ELFT>::Elf_Dyn Elf_Dyn;
 
-  typedef typename ELFFile<ELFT>::Elf_Dyn_Iter Elf_Dyn_Iter;
-
 protected:
   ELFFile<ELFT> EF;
 
+  const Elf_Shdr *DotDynSymSec = nullptr; // Dynamic symbol table section.
+  const Elf_Shdr *DotSymtabSec = nullptr; // Symbol table section.
+  ArrayRef<Elf_Word> ShndxTable;
+
   void moveSymbolNext(DataRefImpl &Symb) const override;
   ErrorOr<StringRef> getSymbolName(DataRefImpl Symb) const override;
   ErrorOr<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
@@ -204,9 +206,9 @@ protected:
   uint8_t getSymbolOther(DataRefImpl Symb) const override;
   uint8_t getSymbolELFType(DataRefImpl Symb) const override;
   SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
-  section_iterator getSymbolSection(const Elf_Sym *Symb) const;
-  std::error_code getSymbolSection(DataRefImpl Symb,
-                                   section_iterator &Res) const override;
+  ErrorOr<section_iterator> getSymbolSection(const Elf_Sym *Symb,
+                                             const Elf_Shdr *SymTab) const;
+  ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
 
   void moveSectionNext(DataRefImpl &Sec) const override;
   std::error_code getSectionName(DataRefImpl Sec,
@@ -240,10 +242,6 @@ protected:
     return *EF.getSection(Rel.d.a);
   }
 
-  const Elf_Sym *toELFSymIter(DataRefImpl Sym) const {
-    return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
-  }
-
   DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
     DataRefImpl DRI;
     if (!SymTable) {
@@ -273,9 +271,9 @@ protected:
     return DRI;
   }
 
-  DataRefImpl toDRI(Elf_Dyn_Iter Dyn) const {
+  DataRefImpl toDRI(const Elf_Dyn *Dyn) const {
     DataRefImpl DRI;
-    DRI.p = reinterpret_cast<uintptr_t>(Dyn.get());
+    DRI.p = reinterpret_cast<uintptr_t>(Dyn);
     return DRI;
   }
 
@@ -304,7 +302,13 @@ public:
   const Elf_Rel *getRel(DataRefImpl Rel) const;
   const Elf_Rela *getRela(DataRefImpl Rela) const;
 
-  const Elf_Sym *getSymbol(DataRefImpl Symb) const;
+  const Elf_Sym *getSymbol(DataRefImpl Sym) const {
+    return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
+  }
+
+  const Elf_Shdr *getSection(DataRefImpl Sec) const {
+    return reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  }
 
   basic_symbol_iterator symbol_begin_impl() const override;
   basic_symbol_iterator symbol_end_impl() const override;
@@ -320,7 +324,6 @@ public:
   uint8_t getBytesInAddress() const override;
   StringRef getFileFormatName() const override;
   unsigned getArch() const override;
-  StringRef getLoadName() const;
 
   std::error_code getPlatformFlags(unsigned &Result) const override {
     Result = EF.getHeader()->e_flags;
@@ -352,7 +355,7 @@ void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const {
 
 template <class ELFT>
 ErrorOr<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
-  const Elf_Sym *ESym = toELFSymIter(Sym);
+  const Elf_Sym *ESym = getSymbol(Sym);
   const Elf_Shdr *SymTableSec = *EF.getSection(Sym.d.a);
   const Elf_Shdr *StringTableSec = *EF.getSection(SymTableSec->sh_link);
   StringRef SymTable = *EF.getStringTable(StringTableSec);
@@ -361,12 +364,12 @@ ErrorOr<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSectionFlags(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_flags;
+  return getSection(Sec)->sh_flags;
 }
 
 template <class ELFT>
 uint32_t ELFObjectFile<ELFT>::getSectionType(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_type;
+  return getSection(Sec)->sh_type;
 }
 
 template <class ELFT>
@@ -398,9 +401,11 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
   }
 
   const Elf_Ehdr *Header = EF.getHeader();
+  const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
 
   if (Header->e_type == ELF::ET_REL) {
-    ErrorOr<const Elf_Shdr *> SectionOrErr = EF.getSection(ESym);
+    ErrorOr<const Elf_Shdr *> SectionOrErr =
+        EF.getSection(ESym, SymTab, ShndxTable);
     if (std::error_code EC = SectionOrErr.getError())
       return EC;
     const Elf_Shdr *Section = *SectionOrErr;
@@ -413,7 +418,7 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
 
 template <class ELFT>
 uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
-  const Elf_Sym *Sym = toELFSymIter(Symb);
+  const Elf_Sym *Sym = getSymbol(Symb);
   if (Sym->st_shndx == ELF::SHN_COMMON)
     return Sym->st_value;
   return 0;
@@ -421,22 +426,22 @@ uint32_t ELFObjectFile<ELFT>::getSymbolAlignment(DataRefImpl Symb) const {
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSymbolSize(DataRefImpl Sym) const {
-  return toELFSymIter(Sym)->st_size;
+  return getSymbol(Sym)->st_size;
 }
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
-  return toELFSymIter(Symb)->st_size;
+  return getSymbol(Symb)->st_size;
 }
 
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolOther(DataRefImpl Symb) const {
-  return toELFSymIter(Symb)->st_other;
+  return getSymbol(Symb)->st_other;
 }
 
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getSymbolELFType(DataRefImpl Symb) const {
-  return toELFSymIter(Symb)->getType();
+  return getSymbol(Symb)->getType();
 }
 
 template <class ELFT>
@@ -463,7 +468,7 @@ SymbolRef::Type ELFObjectFile<ELFT>::getSymbolType(DataRefImpl Symb) const {
 
 template <class ELFT>
 uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
-  const Elf_Sym *ESym = toELFSymIter(Sym);
+  const Elf_Sym *ESym = getSymbol(Sym);
 
   uint32_t Result = SymbolRef::SF_None;
 
@@ -477,7 +482,8 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
     Result |= SymbolRef::SF_Absolute;
 
   if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION ||
-      ESym == EF.symbol_begin() || ESym == EF.dynamic_symbol_begin())
+      ESym == EF.symbol_begin(DotSymtabSec) ||
+      ESym == EF.symbol_begin(DotDynSymSec))
     Result |= SymbolRef::SF_FormatSpecific;
 
   if (EF.getHeader()->e_machine == ELF::EM_ARM) {
@@ -505,11 +511,12 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
 }
 
 template <class ELFT>
-section_iterator
-ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym) const {
-  ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym);
+ErrorOr<section_iterator>
+ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym,
+                                      const Elf_Shdr *SymTab) const {
+  ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable);
   if (std::error_code EC = ESecOrErr.getError())
-    report_fatal_error(EC.message());
+    return EC;
 
   const Elf_Shdr *ESec = *ESecOrErr;
   if (!ESec)
@@ -521,23 +528,23 @@ ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym) const {
 }
 
 template <class ELFT>
-std::error_code
-ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb,
-                                      section_iterator &Res) const {
-  Res = getSymbolSection(getSymbol(Symb));
-  return std::error_code();
+ErrorOr<section_iterator>
+ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb) const {
+  const Elf_Sym *Sym = getSymbol(Symb);
+  const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
+  return getSymbolSection(Sym, SymTab);
 }
 
 template <class ELFT>
 void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
-  const Elf_Shdr *ESec = toELFShdrIter(Sec);
+  const Elf_Shdr *ESec = getSection(Sec);
   Sec = toDRI(++ESec);
 }
 
 template <class ELFT>
 std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
                                                     StringRef &Result) const {
-  ErrorOr<StringRef> Name = EF.getSectionName(&*toELFShdrIter(Sec));
+  ErrorOr<StringRef> Name = EF.getSectionName(&*getSection(Sec));
   if (!Name)
     return Name.getError();
   Result = *Name;
@@ -546,50 +553,50 @@ std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSectionAddress(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_addr;
+  return getSection(Sec)->sh_addr;
 }
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSectionSize(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_size;
+  return getSection(Sec)->sh_size;
 }
 
 template <class ELFT>
 std::error_code
 ELFObjectFile<ELFT>::getSectionContents(DataRefImpl Sec,
                                         StringRef &Result) const {
-  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = getSection(Sec);
   Result = StringRef((const char *)base() + EShdr->sh_offset, EShdr->sh_size);
   return std::error_code();
 }
 
 template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSectionAlignment(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_addralign;
+  return getSection(Sec)->sh_addralign;
 }
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionText(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_flags & ELF::SHF_EXECINSTR;
+  return getSection(Sec)->sh_flags & ELF::SHF_EXECINSTR;
 }
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionData(DataRefImpl Sec) const {
-  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = getSection(Sec);
   return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
          EShdr->sh_type == ELF::SHT_PROGBITS;
 }
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionBSS(DataRefImpl Sec) const {
-  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = getSection(Sec);
   return EShdr->sh_flags & (ELF::SHF_ALLOC | ELF::SHF_WRITE) &&
          EShdr->sh_type == ELF::SHT_NOBITS;
 }
 
 template <class ELFT>
 bool ELFObjectFile<ELFT>::isSectionVirtual(DataRefImpl Sec) const {
-  return toELFShdrIter(Sec)->sh_type == ELF::SHT_NOBITS;
+  return getSection(Sec)->sh_type == ELF::SHT_NOBITS;
 }
 
 template <class ELFT>
@@ -636,7 +643,7 @@ ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
   if (EF.getHeader()->e_type != ELF::ET_REL)
     return section_end();
 
-  const Elf_Shdr *EShdr = toELFShdrIter(Sec);
+  const Elf_Shdr *EShdr = getSection(Sec);
   uintX_t Type = EShdr->sh_type;
   if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
     return section_end();
@@ -668,9 +675,9 @@ ELFObjectFile<ELFT>::getRelocationSymbol(DataRefImpl Rel) const {
   bool IsDyn = Rel.d.b & 1;
   DataRefImpl SymbolData;
   if (IsDyn)
-    SymbolData = toDRI(EF.getDotDynSymSec(), symbolIdx);
+    SymbolData = toDRI(DotDynSymSec, symbolIdx);
   else
-    SymbolData = toDRI(EF.getDotSymtabSec(), symbolIdx);
+    SymbolData = toDRI(DotSymtabSec, symbolIdx);
   return symbol_iterator(SymbolRef(SymbolData, this));
 }
 
@@ -714,12 +721,6 @@ ELFObjectFile<ELFT>::getRelocationAddend(DataRefImpl Rel) const {
   return (int64_t)getRela(Rel)->r_addend;
 }
 
-template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *
-ELFObjectFile<ELFT>::getSymbol(DataRefImpl Symb) const {
-  return &*toELFSymIter(Symb);
-}
-
 template <class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Rel *
 ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
@@ -737,21 +738,51 @@ ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
 template <class ELFT>
 ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
     : ELFObjectFileBase(
-          getELFType(static_cast<endianness>(ELFT::TargetEndianness) ==
-                         support::little,
-                     ELFT::Is64Bits),
+          getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits),
           Object),
-      EF(Data.getBuffer(), EC) {}
+      EF(Data.getBuffer(), EC) {
+  if (EC)
+    return;
+  for (const Elf_Shdr &Sec : EF.sections()) {
+    switch (Sec.sh_type) {
+    case ELF::SHT_DYNSYM: {
+      if (DotDynSymSec) {
+        // More than one .dynsym!
+        EC = object_error::parse_failed;
+        return;
+      }
+      DotDynSymSec = &Sec;
+      break;
+    }
+    case ELF::SHT_SYMTAB: {
+      if (DotSymtabSec) {
+        // More than one .dynsym!
+        EC = object_error::parse_failed;
+        return;
+      }
+      DotSymtabSec = &Sec;
+      break;
+    }
+    case ELF::SHT_SYMTAB_SHNDX: {
+      ErrorOr<ArrayRef<Elf_Word>> TableOrErr = EF.getSHNDXTable(Sec);
+      if ((EC = TableOrErr.getError()))
+        return;
+      ShndxTable = *TableOrErr;
+      break;
+    }
+    }
+  }
+}
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin_impl() const {
-  DataRefImpl Sym = toDRI(EF.getDotSymtabSec(), 0);
+  DataRefImpl Sym = toDRI(DotSymtabSec, 0);
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
 basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
-  const Elf_Shdr *SymTab = EF.getDotSymtabSec();
+  const Elf_Shdr *SymTab = DotSymtabSec;
   if (!SymTab)
     return symbol_begin_impl();
   DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
@@ -760,13 +791,13 @@ basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
 
 template <class ELFT>
 elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_begin() const {
-  DataRefImpl Sym = toDRI(EF.getDotDynSymSec(), 0);
+  DataRefImpl Sym = toDRI(DotDynSymSec, 0);
   return symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
 elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_end() const {
-  const Elf_Shdr *SymTab = EF.getDotDynSymSec();
+  const Elf_Shdr *SymTab = DotDynSymSec;
   DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
@@ -781,19 +812,6 @@ section_iterator ELFObjectFile<ELFT>::section_end() const {
   return section_iterator(SectionRef(toDRI(EF.section_end()), this));
 }
 
-template <class ELFT>
-StringRef ELFObjectFile<ELFT>::getLoadName() const {
-  Elf_Dyn_Iter DI = EF.dynamic_table_begin();
-  Elf_Dyn_Iter DE = EF.dynamic_table_end();
-
-  while (DI != DE && DI->getTag() != ELF::DT_SONAME)
-    ++DI;
-
-  if (DI != DE)
-    return EF.getDynamicString(DI->getVal());
-  return "";
-}
-
 template <class ELFT>
 uint8_t ELFObjectFile<ELFT>::getBytesInAddress() const {
   return ELFT::Is64Bits ? 8 : 4;
@@ -807,10 +825,14 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
     switch (EF.getHeader()->e_machine) {
     case ELF::EM_386:
       return "ELF32-i386";
+    case ELF::EM_IAMCU:
+      return "ELF32-iamcu";
     case ELF::EM_X86_64:
       return "ELF32-x86-64";
     case ELF::EM_ARM:
       return (IsLittleEndian ? "ELF32-arm-little" : "ELF32-arm-big");
+    case ELF::EM_AVR:
+      return "ELF32-avr";
     case ELF::EM_HEXAGON:
       return "ELF32-hexagon";
     case ELF::EM_MIPS:
@@ -853,6 +875,7 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
   bool IsLittleEndian = ELFT::TargetEndianness == support::little;
   switch (EF.getHeader()->e_machine) {
   case ELF::EM_386:
+  case ELF::EM_IAMCU:
     return Triple::x86;
   case ELF::EM_X86_64:
     return Triple::x86_64;
@@ -860,6 +883,8 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
     return Triple::aarch64;
   case ELF::EM_ARM:
     return Triple::arm;
+  case ELF::EM_AVR:
+    return Triple::avr;
   case ELF::EM_HEXAGON:
     return Triple::hexagon;
   case ELF::EM_MIPS:
diff --git a/include/llvm/Object/ELFTypes.h b/include/llvm/Object/ELFTypes.h
index 27e987ba2852..07b312a7d77c 100644
--- a/include/llvm/Object/ELFTypes.h
+++ b/include/llvm/Object/ELFTypes.h
@@ -12,7 +12,6 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Object/Error.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorOr.h"
@@ -307,14 +306,18 @@ struct Elf_Dyn_Base<ELFType<TargetEndianness, true>> {
   } d_un;
 };
 
-/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters and setters.
+/// Elf_Dyn_Impl: This inherits from Elf_Dyn_Base, adding getters.
 template <class ELFT>
 struct Elf_Dyn_Impl : Elf_Dyn_Base<ELFT> {
   using Elf_Dyn_Base<ELFT>::d_tag;
   using Elf_Dyn_Base<ELFT>::d_un;
-  int64_t getTag() const { return d_tag; }
-  uint64_t getVal() const { return d_un.d_val; }
-  uint64_t getPtr() const { return d_un.d_ptr; }
+  typedef typename std::conditional<ELFT::Is64Bits,
+                                    int64_t, int32_t>::type intX_t;
+  typedef typename std::conditional<ELFT::Is64Bits,
+                                    uint64_t, uint32_t>::type uintX_t;
+  intX_t getTag() const { return d_tag; }
+  uintX_t getVal() const { return d_un.d_val; }
+  uintX_t getPtr() const { return d_un.d_ptr; }
 };
 
 // Elf_Rel: Elf Relocation
@@ -481,6 +484,30 @@ struct Elf_Hash_Impl {
   }
 };
 
+// .gnu.hash section
+template <class ELFT>
+struct Elf_GnuHash_Impl {
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
+  Elf_Word nbuckets;
+  Elf_Word symndx;
+  Elf_Word maskwords;
+  Elf_Word shift2;
+
+  ArrayRef<Elf_Off> filter() const {
+    return ArrayRef<Elf_Off>(reinterpret_cast<const Elf_Off *>(&shift2 + 1),
+                             maskwords);
+  }
+
+  ArrayRef<Elf_Word> buckets() const {
+    return ArrayRef<Elf_Word>(
+        reinterpret_cast<const Elf_Word *>(filter().end()), nbuckets);
+  }
+
+  ArrayRef<Elf_Word> values(unsigned DynamicSymCount) const {
+    return ArrayRef<Elf_Word>(buckets().end(), DynamicSymCount - symndx);
+  }
+};
+
 // MIPS .reginfo section
 template <class ELFT>
 struct Elf_Mips_RegInfo;
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
index aa320bb51a46..0f79a6ed0dd8 100644
--- a/include/llvm/Object/Error.h
+++ b/include/llvm/Object/Error.h
@@ -30,6 +30,7 @@ enum class object_error {
   string_table_non_null_end,
   invalid_section_index,
   bitcode_section_not_found,
+  elf_invalid_dynamic_table_size,
   macho_small_load_command,
   macho_load_segment_too_many_sections,
   macho_load_segment_too_small,
diff --git a/include/llvm/Object/FunctionIndexObjectFile.h b/include/llvm/Object/FunctionIndexObjectFile.h
new file mode 100644
index 000000000000..74b461dc7cc7
--- /dev/null
+++ b/include/llvm/Object/FunctionIndexObjectFile.h
@@ -0,0 +1,110 @@
+//===- FunctionIndexObjectFile.h - Function index file implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the FunctionIndexObjectFile template class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H
+#define LLVM_OBJECT_FUNCTIONINDEXOBJECTFILE_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Object/SymbolicFile.h"
+
+namespace llvm {
+class FunctionInfoIndex;
+class Module;
+
+namespace object {
+class ObjectFile;
+
+/// This class is used to read just the function summary index related
+/// sections out of the given object (which may contain a single module's
+/// bitcode or be a combined index bitcode file). It builds a FunctionInfoIndex
+/// object.
+class FunctionIndexObjectFile : public SymbolicFile {
+  std::unique_ptr<FunctionInfoIndex> Index;
+
+public:
+  FunctionIndexObjectFile(MemoryBufferRef Object,
+                          std::unique_ptr<FunctionInfoIndex> I);
+  ~FunctionIndexObjectFile() override;
+
+  // TODO: Walk through FunctionMap entries for function symbols.
+  // However, currently these interfaces are not used by any consumers.
+  void moveSymbolNext(DataRefImpl &Symb) const override {
+    llvm_unreachable("not implemented");
+  }
+  std::error_code printSymbolName(raw_ostream &OS,
+                                  DataRefImpl Symb) const override {
+    llvm_unreachable("not implemented");
+    return std::error_code();
+  }
+  uint32_t getSymbolFlags(DataRefImpl Symb) const override {
+    llvm_unreachable("not implemented");
+    return 0;
+  }
+  basic_symbol_iterator symbol_begin_impl() const override {
+    llvm_unreachable("not implemented");
+    return basic_symbol_iterator(BasicSymbolRef());
+  }
+  basic_symbol_iterator symbol_end_impl() const override {
+    llvm_unreachable("not implemented");
+    return basic_symbol_iterator(BasicSymbolRef());
+  }
+
+  const FunctionInfoIndex &getIndex() const {
+    return const_cast<FunctionIndexObjectFile *>(this)->getIndex();
+  }
+  FunctionInfoIndex &getIndex() { return *Index; }
+  std::unique_ptr<FunctionInfoIndex> takeIndex();
+
+  static inline bool classof(const Binary *v) { return v->isFunctionIndex(); }
+
+  /// \brief Finds and returns bitcode embedded in the given object file, or an
+  /// error code if not found.
+  static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
+
+  /// \brief Finds and returns bitcode in the given memory buffer (which may
+  /// be either a bitcode file or a native object file with embedded bitcode),
+  /// or an error code if not found.
+  static ErrorOr<MemoryBufferRef>
+  findBitcodeInMemBuffer(MemoryBufferRef Object);
+
+  /// \brief Looks for function summary in the given memory buffer,
+  /// returns true if found, else false.
+  static bool
+  hasFunctionSummaryInMemBuffer(MemoryBufferRef Object,
+                                DiagnosticHandlerFunction DiagnosticHandler);
+
+  /// \brief Parse function index in the given memory buffer.
+  /// Return new FunctionIndexObjectFile instance containing parsed function
+  /// summary/index.
+  static ErrorOr<std::unique_ptr<FunctionIndexObjectFile>>
+  create(MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler,
+         bool IsLazy = false);
+
+  /// \brief Parse the function summary information for function with the
+  /// given name out of the given buffer. Parsed information is
+  /// stored on the index object saved in this object.
+  std::error_code
+  findFunctionSummaryInMemBuffer(MemoryBufferRef Object,
+                                 DiagnosticHandlerFunction DiagnosticHandler,
+                                 StringRef FunctionName);
+};
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+getFunctionIndexForFile(StringRef Path,
+                        DiagnosticHandlerFunction DiagnosticHandler);
+}
+
+#endif
diff --git a/include/llvm/Object/MachO.h b/include/llvm/Object/MachO.h
index 489ecef5c996..e02ce3b21416 100644
--- a/include/llvm/Object/MachO.h
+++ b/include/llvm/Object/MachO.h
@@ -100,7 +100,7 @@ private:
 };
 typedef content_iterator<ExportEntry> export_iterator;
 
-/// MachORebaseEntry encapsulates the current state in the decompression of   
+/// MachORebaseEntry encapsulates the current state in the decompression of
 /// rebasing opcodes. This allows you to iterate through the compressed table of
 /// rebasing using:
 ///    for (const llvm::object::MachORebaseEntry &Entry : Obj->rebaseTable()) {
@@ -116,7 +116,7 @@ public:
   bool operator==(const MachORebaseEntry &) const;
 
   void moveNext();
-  
+
 private:
   friend class MachOObjectFile;
   void moveToFirst();
@@ -210,8 +210,7 @@ public:
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   SymbolRef::Type getSymbolType(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
-  std::error_code getSymbolSection(DataRefImpl Symb,
-                                   section_iterator &Res) const override;
+  ErrorOr<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
   unsigned getSymbolSectionID(SymbolRef Symb) const;
   unsigned getSectionID(SectionRef Sec) const;
 
@@ -423,6 +422,24 @@ public:
     return v->isMachO();
   }
 
+  static uint32_t
+  getVersionMinMajor(MachO::version_min_command &C, bool SDK) {
+    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+    return (VersionOrSDK >> 16) & 0xffff;
+  }
+
+  static uint32_t
+  getVersionMinMinor(MachO::version_min_command &C, bool SDK) {
+    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+    return (VersionOrSDK >> 8) & 0xff;
+  }
+
+  static uint32_t
+  getVersionMinUpdate(MachO::version_min_command &C, bool SDK) {
+    uint32_t VersionOrSDK = (SDK) ? C.sdk : C.version;
+    return VersionOrSDK & 0xff;
+  }
+
 private:
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
 
@@ -504,4 +521,3 @@ inline const ObjectFile *DiceRef::getObjectFile() const {
 }
 
 #endif
-
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index 8dd525626218..ce0c891ee0c2 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -100,8 +100,7 @@ public:
   relocation_iterator relocation_begin() const;
   relocation_iterator relocation_end() const;
   iterator_range<relocation_iterator> relocations() const {
-    return iterator_range<relocation_iterator>(relocation_begin(),
-                                               relocation_end());
+    return make_range(relocation_begin(), relocation_end());
   }
   section_iterator getRelocatedSection() const;
 
@@ -147,7 +146,7 @@ public:
 
   /// @brief Get section this symbol is defined in reference to. Result is
   /// end_sections() if it is undefined or is an absolute symbol.
-  std::error_code getSection(section_iterator &Result) const;
+  ErrorOr<section_iterator> getSection() const;
 
   const ObjectFile *getObject() const;
 };
@@ -202,8 +201,8 @@ protected:
   virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const;
   virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0;
   virtual SymbolRef::Type getSymbolType(DataRefImpl Symb) const = 0;
-  virtual std::error_code getSymbolSection(DataRefImpl Symb,
-                                           section_iterator &Res) const = 0;
+  virtual ErrorOr<section_iterator>
+  getSymbolSection(DataRefImpl Symb) const = 0;
 
   // Same as above for SectionRef.
   friend class SectionRef;
@@ -323,8 +322,8 @@ inline uint64_t SymbolRef::getCommonSize() const {
   return getObject()->getCommonSymbolSize(getRawDataRefImpl());
 }
 
-inline std::error_code SymbolRef::getSection(section_iterator &Result) const {
-  return getObject()->getSymbolSection(getRawDataRefImpl(), Result);
+inline ErrorOr<section_iterator> SymbolRef::getSection() const {
+  return getObject()->getSymbolSection(getRawDataRefImpl());
 }
 
 inline SymbolRef::Type SymbolRef::getType() const {
diff --git a/include/llvm/Object/SymbolicFile.h b/include/llvm/Object/SymbolicFile.h
index 537997ac6318..0c5b38111a9c 100644
--- a/include/llvm/Object/SymbolicFile.h
+++ b/include/llvm/Object/SymbolicFile.h
@@ -15,6 +15,7 @@
 #define LLVM_OBJECT_SYMBOLICFILE_H
 
 #include "llvm/Object/Binary.h"
+#include "llvm/Support/Format.h"
 
 namespace llvm {
 namespace object {
@@ -29,6 +30,12 @@ union DataRefImpl {
   DataRefImpl() { std::memset(this, 0, sizeof(DataRefImpl)); }
 };
 
+template <typename OStream>
+OStream& operator<<(OStream &OS, const DataRefImpl &D) {
+  OS << "(" << format("0x%x8", D.p) << " (" << format("0x%x8", D.d.a) << ", " << format("0x%x8", D.d.b) << "))";
+  return OS;
+}
+
 inline bool operator==(const DataRefImpl &a, const DataRefImpl &b) {
   // Check bitwise identical. This is the only legal way to compare a union w/o
   // knowing which member is in use.
@@ -94,6 +101,7 @@ public:
                                  // (e.g. section symbols)
     SF_Thumb = 1U << 8,          // Thumb symbol in a 32-bit ARM binary
     SF_Hidden = 1U << 9,         // Symbol has hidden visibility
+    SF_Const = 1U << 10,         // Symbol value is constant
   };
 
   BasicSymbolRef() : OwningObject(nullptr) { }
diff --git a/include/llvm/Option/Arg.h b/include/llvm/Option/Arg.h
index e1b72b6267cf..99d329693de2 100644
--- a/include/llvm/Option/Arg.h
+++ b/include/llvm/Option/Arg.h
@@ -113,6 +113,7 @@ public:
   /// when rendered as a input (e.g., Xlinker).
   void renderAsInput(const ArgList &Args, ArgStringList &Output) const;
 
+  void print(raw_ostream &O) const;
   void dump() const;
 
   /// \brief Return a formatted version of the argument and
diff --git a/include/llvm/Option/ArgList.h b/include/llvm/Option/ArgList.h
index ef4005761b75..89771b5c3cf1 100644
--- a/include/llvm/Option/ArgList.h
+++ b/include/llvm/Option/ArgList.h
@@ -259,6 +259,9 @@ public:
   void AddLastArg(ArgStringList &Output, OptSpecifier Id0,
                   OptSpecifier Id1) const;
 
+  /// AddAllArgs - Render all arguments matching any of the given ids.
+  void AddAllArgs(ArgStringList &Output, ArrayRef<OptSpecifier> Ids) const;
+
   /// AddAllArgs - Render all arguments matching the given ids.
   void AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
                   OptSpecifier Id1 = 0U, OptSpecifier Id2 = 0U) const;
@@ -303,6 +306,9 @@ public:
   const char *GetOrMakeJoinedArgString(unsigned Index, StringRef LHS,
                                         StringRef RHS) const;
 
+  void print(raw_ostream &O) const;
+  void dump() const;
+
   /// @}
 };
 
diff --git a/include/llvm/Option/OptTable.h b/include/llvm/Option/OptTable.h
index 96f51cf3317d..390e52774fea 100644
--- a/include/llvm/Option/OptTable.h
+++ b/include/llvm/Option/OptTable.h
@@ -50,8 +50,7 @@ public:
 
 private:
   /// \brief The static option information table.
-  const Info *OptionInfos;
-  unsigned NumOptionInfos;
+  ArrayRef<Info> OptionInfos;
   bool IgnoreCase;
 
   unsigned TheInputOptionID;
@@ -74,14 +73,13 @@ private:
   }
 
 protected:
-  OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
-           bool IgnoreCase = false);
+  OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
 
 public:
   ~OptTable();
 
   /// \brief Return the total number of option classes.
-  unsigned getNumOptions() const { return NumOptionInfos; }
+  unsigned getNumOptions() const { return OptionInfos.size(); }
 
   /// \brief Get the given Opt's Option instance, lazily creating it
   /// if necessary.
diff --git a/include/llvm/Option/Option.h b/include/llvm/Option/Option.h
index 09be26c7cf20..494987a135ef 100644
--- a/include/llvm/Option/Option.h
+++ b/include/llvm/Option/Option.h
@@ -195,6 +195,7 @@ public:
   ///                start.
   Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
 
+  void print(raw_ostream &O) const;
   void dump() const;
 };
 
diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h
index 0b318fc8fb07..492a4ef464f8 100644
--- a/include/llvm/PassAnalysisSupport.h
+++ b/include/llvm/PassAnalysisSupport.h
@@ -36,11 +36,17 @@ namespace llvm {
 ///
 class AnalysisUsage {
 public:
-  typedef SmallVector<AnalysisID, 32> VectorType;
+  typedef SmallVectorImpl<AnalysisID> VectorType;
 
 private:
   /// Sets of analyses required and preserved by a pass
-  VectorType Required, RequiredTransitive, Preserved;
+  // TODO: It's not clear that SmallVector is an appropriate data structure for
+  // this usecase.  The sizes were picked to minimize wasted space, but are
+  // otherwise fairly meaningless.
+  SmallVector<AnalysisID, 8> Required;
+  SmallVector<AnalysisID, 2> RequiredTransitive;
+  SmallVector<AnalysisID, 2> Preserved;
+  SmallVector<AnalysisID, 0> Used;
   bool PreservesAll;
 
 public:
@@ -72,14 +78,32 @@ public:
     Preserved.push_back(&ID);
     return *this;
   }
-  ///@}
-
   /// Add the specified Pass class to the set of analyses preserved by this pass.
   template<class PassClass>
   AnalysisUsage &addPreserved() {
     Preserved.push_back(&PassClass::ID);
     return *this;
   }
+  ///@}
+
+  ///@{
+  /// Add the specified ID to the set of analyses used by this pass if they are
+  /// available..
+  AnalysisUsage &addUsedIfAvailableID(const void *ID) {
+    Used.push_back(ID);
+    return *this;
+  }
+  AnalysisUsage &addUsedIfAvailableID(char &ID) {
+    Used.push_back(&ID);
+    return *this;
+  }
+  /// Add the specified Pass class to the set of analyses used by this pass.
+  template<class PassClass>
+  AnalysisUsage &addUsedIfAvailable() {
+    Used.push_back(&PassClass::ID);
+    return *this;
+  }
+  ///@}
 
   /// Add the Pass with the specified argument string to the set of analyses
   /// preserved by this pass. If no such Pass exists, do nothing. This can be
@@ -108,6 +132,7 @@ public:
     return RequiredTransitive;
   }
   const VectorType &getPreservedSet() const { return Preserved; }
+  const VectorType &getUsedSet() const { return Used; }
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/PassInfo.h b/include/llvm/PassInfo.h
index d10761831b3a..cee4ade323e4 100644
--- a/include/llvm/PassInfo.h
+++ b/include/llvm/PassInfo.h
@@ -33,13 +33,13 @@ public:
   typedef Pass *(*TargetMachineCtor_t)(TargetMachine *);
 
 private:
-  const char      *const PassName;     // Nice name for Pass
-  const char      *const PassArgument; // Command Line argument to run this pass
-  const void *PassID;      
-  const bool IsCFGOnlyPass;            // Pass only looks at the CFG.
-  const bool IsAnalysis;               // True if an analysis pass.
-  const bool IsAnalysisGroup;          // True if an analysis group.
-  std::vector<const PassInfo*> ItfImpl;// Interfaces implemented by this pass
+  const char *const PassName;     // Nice name for Pass
+  const char *const PassArgument; // Command Line argument to run this pass
+  const void *PassID;
+  const bool IsCFGOnlyPass;              // Pass only looks at the CFG.
+  const bool IsAnalysis;                 // True if an analysis pass.
+  const bool IsAnalysisGroup;            // True if an analysis group.
+  std::vector<const PassInfo *> ItfImpl; // Interfaces implemented by this pass
 
   NormalCtor_t NormalCtor;
   TargetMachineCtor_t TargetMachineCtor;
@@ -50,18 +50,16 @@ public:
   PassInfo(const char *name, const char *arg, const void *pi,
            NormalCtor_t normal, bool isCFGOnly, bool is_analysis,
            TargetMachineCtor_t machine = nullptr)
-    : PassName(name), PassArgument(arg), PassID(pi), 
-      IsCFGOnlyPass(isCFGOnly), 
-      IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal),
-      TargetMachineCtor(machine) {}
+      : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly),
+        IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal),
+        TargetMachineCtor(machine) {}
   /// PassInfo ctor - Do not call this directly, this should only be invoked
   /// through RegisterPass. This version is for use by analysis groups; it
   /// does not auto-register the pass.
   PassInfo(const char *name, const void *pi)
-    : PassName(name), PassArgument(""), PassID(pi), 
-      IsCFGOnlyPass(false), 
-      IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr),
-      TargetMachineCtor(nullptr) {}
+      : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false),
+        IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr),
+        TargetMachineCtor(nullptr) {}
 
   /// getPassName - Return the friendly name for the pass, never returns null
   ///
@@ -78,10 +76,8 @@ public:
   const void *getTypeInfo() const { return PassID; }
 
   /// Return true if this PassID implements the specified ID pointer.
-  bool isPassID(const void *IDPtr) const {
-    return PassID == IDPtr;
-  }
-  
+  bool isPassID(const void *IDPtr) const { return PassID == IDPtr; }
+
   /// isAnalysisGroup - Return true if this is an analysis group, not a normal
   /// pass.
   ///
@@ -91,7 +87,7 @@ public:
   /// isCFGOnlyPass - return true if this pass only looks at the CFG for the
   /// function.
   bool isCFGOnlyPass() const { return IsCFGOnlyPass; }
-  
+
   /// getNormalCtor - Return a pointer to a function, that when called, creates
   /// an instance of the pass and returns it.  This pointer may be null if there
   /// is no default constructor for the pass.
diff --git a/include/llvm/PassRegistry.h b/include/llvm/PassRegistry.h
index 8c28ef5e7e61..e7fe1f53a4d4 100644
--- a/include/llvm/PassRegistry.h
+++ b/include/llvm/PassRegistry.h
@@ -17,7 +17,6 @@
 #ifndef LLVM_PASSREGISTRY_H
 #define LLVM_PASSREGISTRY_H
 
-#include "llvm-c/Core.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h
index 6cb6516412e8..7c3d49f02e8f 100644
--- a/include/llvm/PassSupport.h
+++ b/include/llvm/PassSupport.h
@@ -26,7 +26,7 @@
 #include "llvm/PassInfo.h"
 #include "llvm/PassRegistry.h"
 #include "llvm/Support/Atomic.h"
-#include "llvm/Support/Valgrind.h"
+#include "llvm/Support/Compiler.h"
 #include <vector>
 
 namespace llvm {
diff --git a/include/llvm/ProfileData/CoverageMapping.h b/include/llvm/ProfileData/CoverageMapping.h
index 3488e793d84f..3790e1358449 100644
--- a/include/llvm/ProfileData/CoverageMapping.h
+++ b/include/llvm/ProfileData/CoverageMapping.h
@@ -104,7 +104,7 @@ struct CounterExpression {
 };
 
 /// \brief A Counter expression builder is used to construct the
-/// counter expressions. It avoids unecessary duplication
+/// counter expressions. It avoids unnecessary duplication
 /// and simplifies algebraic expressions.
 class CounterExpressionBuilder {
   /// \brief A list of all the counter expressions
@@ -236,7 +236,7 @@ class CounterMappingContext {
 
 public:
   CounterMappingContext(ArrayRef<CounterExpression> Expressions,
-                        ArrayRef<uint64_t> CounterValues = ArrayRef<uint64_t>())
+                        ArrayRef<uint64_t> CounterValues = None)
       : Expressions(Expressions), CounterValues(CounterValues) {}
 
   void setCounts(ArrayRef<uint64_t> Counts) { CounterValues = Counts; }
@@ -443,7 +443,7 @@ public:
 
   /// \brief Get the list of function instantiations in the file.
   ///
-  /// Fucntions that are instantiated more than once, such as C++ template
+  /// Functions that are instantiated more than once, such as C++ template
   /// specializations, have distinct coverage records for each instantiation.
   std::vector<const FunctionRecord *> getInstantiations(StringRef Filename);
 
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index 77055ba87268..4688759a3bd1 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -16,34 +16,310 @@
 #ifndef LLVM_PROFILEDATA_INSTRPROF_H_
 #define LLVM_PROFILEDATA_INSTRPROF_H_
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/ProfileData/InstrProfData.inc"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MD5.h"
 #include <cstdint>
+#include <list>
 #include <system_error>
 #include <vector>
 
+#define INSTR_PROF_INDEX_VERSION 3
 namespace llvm {
+
+class Function;
+class GlobalVariable;
+class Module;
+
+/// Return the name of data section containing profile counter variables.
+inline StringRef getInstrProfCountersSectionName(bool AddSegment) {
+  return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR
+                    : INSTR_PROF_CNTS_SECT_NAME_STR;
+}
+
+/// Return the name of data section containing names of instrumented
+/// functions.
+inline StringRef getInstrProfNameSectionName(bool AddSegment) {
+  return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR
+                    : INSTR_PROF_NAME_SECT_NAME_STR;
+}
+
+/// Return the name of the data section containing per-function control
+/// data.
+inline StringRef getInstrProfDataSectionName(bool AddSegment) {
+  return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR
+                    : INSTR_PROF_DATA_SECT_NAME_STR;
+}
+
+/// Return the name profile runtime entry point to do value profiling
+/// for a given site.
+inline StringRef getInstrProfValueProfFuncName() {
+  return INSTR_PROF_VALUE_PROF_FUNC_STR;
+}
+
+/// Return the name of the section containing function coverage mapping
+/// data.
+inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
+  return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+}
+
+/// Return the name prefix of variables containing instrumented function names.
+inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
+
+/// Return the name prefix of variables containing per-function control data.
+inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; }
+
+/// Return the name prefix of profile counter variables.
+inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; }
+
+/// Return the name prefix of the COMDAT group for instrumentation variables
+/// associated with a COMDAT function.
+inline StringRef getInstrProfComdatPrefix() { return "__profv_"; }
+
+/// Return the name of a covarage mapping variable (internal linkage)
+/// for each instrumented source module. Such variables are allocated
+/// in the __llvm_covmap section.
+inline StringRef getCoverageMappingVarName() {
+  return "__llvm_coverage_mapping";
+}
+
+/// Return the name of function that registers all the per-function control
+/// data at program startup time by calling __llvm_register_function. This
+/// function has internal linkage and is called by  __llvm_profile_init
+/// runtime method. This function is not generated for these platforms:
+/// Darwin, Linux, and FreeBSD.
+inline StringRef getInstrProfRegFuncsName() {
+  return "__llvm_profile_register_functions";
+}
+
+/// Return the name of the runtime interface that registers per-function control
+/// data for one instrumented function.
+inline StringRef getInstrProfRegFuncName() {
+  return "__llvm_profile_register_function";
+}
+
+/// Return the name of the runtime initialization method that is generated by
+/// the compiler. The function calls __llvm_profile_register_functions and
+/// __llvm_profile_override_default_filename functions if needed. This function
+/// has internal linkage and invoked at startup time via init_array.
+inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
+
+/// Return the name of the hook variable defined in profile runtime library.
+/// A reference to the variable causes the linker to link in the runtime
+/// initialization module (which defines the hook variable).
+inline StringRef getInstrProfRuntimeHookVarName() {
+  return "__llvm_profile_runtime";
+}
+
+/// Return the name of the compiler generated function that references the
+/// runtime hook variable. The function is a weak global.
+inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
+  return "__llvm_profile_runtime_user";
+}
+
+/// Return the name of the profile runtime interface that overrides the default
+/// profile data file name.
+inline StringRef getInstrProfFileOverriderFuncName() {
+  return "__llvm_profile_override_default_filename";
+}
+
+/// Return the modified name for function \c F suitable to be
+/// used the key for profile lookup.
+std::string getPGOFuncName(const Function &F,
+                           uint64_t Version = INSTR_PROF_INDEX_VERSION);
+
+/// Return the modified name for a function suitable to be
+/// used the key for profile lookup. The function's original
+/// name is \c RawFuncName and has linkage of type \c Linkage.
+/// The function is defined in module \c FileName.
+std::string getPGOFuncName(StringRef RawFuncName,
+                           GlobalValue::LinkageTypes Linkage,
+                           StringRef FileName,
+                           uint64_t Version = INSTR_PROF_INDEX_VERSION);
+
+/// Create and return the global variable for function name used in PGO
+/// instrumentation. \c FuncName is the name of the function returned
+/// by \c getPGOFuncName call.
+GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName);
+
+/// Create and return the global variable for function name used in PGO
+/// instrumentation.  /// \c FuncName is the name of the function
+/// returned by \c getPGOFuncName call, \c M is the owning module,
+/// and \c Linkage is the linkage of the instrumented function.
+GlobalVariable *createPGOFuncNameVar(Module &M,
+                                     GlobalValue::LinkageTypes Linkage,
+                                     StringRef FuncName);
+
+/// Given a PGO function name, remove the filename prefix and return
+/// the original (static) function name.
+StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName);
+
 const std::error_category &instrprof_category();
 
 enum class instrprof_error {
-    success = 0,
-    eof,
-    bad_magic,
-    bad_header,
-    unsupported_version,
-    unsupported_hash_type,
-    too_large,
-    truncated,
-    malformed,
-    unknown_function,
-    hash_mismatch,
-    count_mismatch,
-    counter_overflow
+  success = 0,
+  eof,
+  unrecognized_format,
+  bad_magic,
+  bad_header,
+  unsupported_version,
+  unsupported_hash_type,
+  too_large,
+  truncated,
+  malformed,
+  unknown_function,
+  hash_mismatch,
+  count_mismatch,
+  counter_overflow,
+  value_site_count_mismatch
 };
 
 inline std::error_code make_error_code(instrprof_error E) {
   return std::error_code(static_cast<int>(E), instrprof_category());
 }
 
+inline instrprof_error MergeResult(instrprof_error &Accumulator,
+                                   instrprof_error Result) {
+  // Prefer first error encountered as later errors may be secondary effects of
+  // the initial problem.
+  if (Accumulator == instrprof_error::success &&
+      Result != instrprof_error::success)
+    Accumulator = Result;
+  return Accumulator;
+}
+
+enum InstrProfValueKind : uint32_t {
+#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+namespace object {
+class SectionRef;
+}
+
+namespace IndexedInstrProf {
+uint64_t ComputeHash(StringRef K);
+}
+
+/// A symbol table used for function PGO name look-up with keys
+/// (such as pointers, md5hash values) to the function. A function's
+/// PGO name or name's md5hash are used in retrieving the profile
+/// data of the function. See \c getPGOFuncName() method for details
+/// on how PGO name is formed.
+class InstrProfSymtab {
+public:
+  typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap;
+
+private:
+  StringRef Data;
+  uint64_t Address;
+  // A map from MD5 hash keys to function name strings.
+  std::vector<std::pair<uint64_t, std::string>> HashNameMap;
+  // A map from function runtime address to function name MD5 hash.
+  // This map is only populated and used by raw instr profile reader.
+  AddrHashMap AddrToMD5Map;
+
+public:
+  InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {}
+
+  /// Create InstrProfSymtab from an object file section which
+  /// contains function PGO names that are uncompressed.
+  /// This interface is used by CoverageMappingReader.
+  std::error_code create(object::SectionRef &Section);
+  /// This interface is used by reader of CoverageMapping test
+  /// format.
+  inline std::error_code create(StringRef D, uint64_t BaseAddr);
+  /// Create InstrProfSymtab from a set of names iteratable from
+  /// \p IterRange. This interface is used by IndexedProfReader.
+  template <typename NameIterRange> void create(const NameIterRange &IterRange);
+  // If the symtab is created by a series of calls to \c addFuncName, \c
+  // finalizeSymtab needs to be called before looking up function names.
+  // This is required because the underlying map is a vector (for space
+  // efficiency) which needs to be sorted.
+  inline void finalizeSymtab();
+  /// Update the symtab by adding \p FuncName to the table. This interface
+  /// is used by the raw and text profile readers.
+  void addFuncName(StringRef FuncName) {
+    HashNameMap.push_back(std::make_pair(
+        IndexedInstrProf::ComputeHash(FuncName), FuncName.str()));
+  }
+  /// Map a function address to its name's MD5 hash. This interface
+  /// is only used by the raw profiler reader.
+  void mapAddress(uint64_t Addr, uint64_t MD5Val) {
+    AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val));
+  }
+  AddrHashMap &getAddrHashMap() { return AddrToMD5Map; }
+  /// Return function's PGO name from the function name's symabol
+  /// address in the object file. If an error occurs, Return
+  /// an empty string.
+  StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize);
+  /// Return function's PGO name from the name's md5 hash value.
+  /// If not found, return an empty string.
+  inline StringRef getFuncName(uint64_t FuncMD5Hash);
+};
+
+std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
+  Data = D;
+  Address = BaseAddr;
+  return std::error_code();
+}
+
+template <typename NameIterRange>
+void InstrProfSymtab::create(const NameIterRange &IterRange) {
+  for (auto Name : IterRange)
+    HashNameMap.push_back(
+        std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str()));
+  finalizeSymtab();
+}
+
+void InstrProfSymtab::finalizeSymtab() {
+  std::sort(HashNameMap.begin(), HashNameMap.end(), less_first());
+  HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()),
+                    HashNameMap.end());
+  std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first());
+  AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
+                     AddrToMD5Map.end());
+}
+
+StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) {
+  auto Result =
+      std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash,
+                       [](const std::pair<uint64_t, std::string> &LHS,
+                          uint64_t RHS) { return LHS.first < RHS; });
+  if (Result != HashNameMap.end())
+    return Result->second;
+  return StringRef();
+}
+
+struct InstrProfValueSiteRecord {
+  /// Value profiling data pairs at a given value site.
+  std::list<InstrProfValueData> ValueData;
+
+  InstrProfValueSiteRecord() { ValueData.clear(); }
+  template <class InputIterator>
+  InstrProfValueSiteRecord(InputIterator F, InputIterator L)
+      : ValueData(F, L) {}
+
+  /// Sort ValueData ascending by Value
+  void sortByTargetValues() {
+    ValueData.sort(
+        [](const InstrProfValueData &left, const InstrProfValueData &right) {
+          return left.Value < right.Value;
+        });
+  }
+
+  /// Merge data from another InstrProfValueSiteRecord
+  /// Optionally scale merged counts by \p Weight.
+  instrprof_error mergeValueData(InstrProfValueSiteRecord &Input,
+                                 uint64_t Weight = 1);
+};
+
 /// Profiling information for a single function.
 struct InstrProfRecord {
   InstrProfRecord() {}
@@ -52,8 +328,258 @@ struct InstrProfRecord {
   StringRef Name;
   uint64_t Hash;
   std::vector<uint64_t> Counts;
+
+  typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType;
+
+  /// Return the number of value profile kinds with non-zero number
+  /// of profile sites.
+  inline uint32_t getNumValueKinds() const;
+  /// Return the number of instrumented sites for ValueKind.
+  inline uint32_t getNumValueSites(uint32_t ValueKind) const;
+  /// Return the total number of ValueData for ValueKind.
+  inline uint32_t getNumValueData(uint32_t ValueKind) const;
+  /// Return the number of value data collected for ValueKind at profiling
+  /// site: Site.
+  inline uint32_t getNumValueDataForSite(uint32_t ValueKind,
+                                         uint32_t Site) const;
+  /// Return the array of profiled values at \p Site.
+  inline std::unique_ptr<InstrProfValueData[]>
+  getValueForSite(uint32_t ValueKind, uint32_t Site,
+                  uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const;
+  inline void
+  getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, uint32_t Site,
+                  uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const;
+  /// Reserve space for NumValueSites sites.
+  inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites);
+  /// Add ValueData for ValueKind at value Site.
+  void addValueData(uint32_t ValueKind, uint32_t Site,
+                    InstrProfValueData *VData, uint32_t N,
+                    ValueMapType *ValueMap);
+
+  /// Merge the counts in \p Other into this one.
+  /// Optionally scale merged counts by \p Weight.
+  instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1);
+
+  /// Clear value data entries
+  void clearValueData() {
+    for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+      getValueSitesForKind(Kind).clear();
+  }
+
+private:
+  std::vector<InstrProfValueSiteRecord> IndirectCallSites;
+  const std::vector<InstrProfValueSiteRecord> &
+  getValueSitesForKind(uint32_t ValueKind) const {
+    switch (ValueKind) {
+    case IPVK_IndirectCallTarget:
+      return IndirectCallSites;
+    default:
+      llvm_unreachable("Unknown value kind!");
+    }
+    return IndirectCallSites;
+  }
+
+  std::vector<InstrProfValueSiteRecord> &
+  getValueSitesForKind(uint32_t ValueKind) {
+    return const_cast<std::vector<InstrProfValueSiteRecord> &>(
+        const_cast<const InstrProfRecord *>(this)
+            ->getValueSitesForKind(ValueKind));
+  }
+
+  // Map indirect call target name hash to name string.
+  uint64_t remapValue(uint64_t Value, uint32_t ValueKind,
+                      ValueMapType *HashKeys);
+
+  // Merge Value Profile data from Src record to this record for ValueKind.
+  // Scale merged value counts by \p Weight.
+  instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
+                                     uint64_t Weight);
 };
 
+uint32_t InstrProfRecord::getNumValueKinds() const {
+  uint32_t NumValueKinds = 0;
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    NumValueKinds += !(getValueSitesForKind(Kind).empty());
+  return NumValueKinds;
+}
+
+uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const {
+  uint32_t N = 0;
+  const std::vector<InstrProfValueSiteRecord> &SiteRecords =
+      getValueSitesForKind(ValueKind);
+  for (auto &SR : SiteRecords) {
+    N += SR.ValueData.size();
+  }
+  return N;
+}
+
+uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const {
+  return getValueSitesForKind(ValueKind).size();
+}
+
+uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind,
+                                                 uint32_t Site) const {
+  return getValueSitesForKind(ValueKind)[Site].ValueData.size();
+}
+
+std::unique_ptr<InstrProfValueData[]> InstrProfRecord::getValueForSite(
+    uint32_t ValueKind, uint32_t Site,
+    uint64_t (*ValueMapper)(uint32_t, uint64_t)) const {
+  uint32_t N = getNumValueDataForSite(ValueKind, Site);
+  if (N == 0)
+    return std::unique_ptr<InstrProfValueData[]>(nullptr);
+
+  auto VD = llvm::make_unique<InstrProfValueData[]>(N);
+  getValueForSite(VD.get(), ValueKind, Site, ValueMapper);
+
+  return VD;
+}
+
+void InstrProfRecord::getValueForSite(InstrProfValueData Dest[],
+                                      uint32_t ValueKind, uint32_t Site,
+                                      uint64_t (*ValueMapper)(uint32_t,
+                                                              uint64_t)) const {
+  uint32_t I = 0;
+  for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) {
+    Dest[I].Value = ValueMapper ? ValueMapper(ValueKind, V.Value) : V.Value;
+    Dest[I].Count = V.Count;
+    I++;
+  }
+}
+
+void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) {
+  std::vector<InstrProfValueSiteRecord> &ValueSites =
+      getValueSitesForKind(ValueKind);
+  ValueSites.reserve(NumValueSites);
+}
+
+inline support::endianness getHostEndianness() {
+  return sys::IsLittleEndianHost ? support::little : support::big;
+}
+
+// Include definitions for value profile data
+#define INSTR_PROF_VALUE_PROF_DATA
+#include "llvm/ProfileData/InstrProfData.inc"
+
+ /*
+ * Initialize the record for runtime value profile data.
+ * Return 0 if the initialization is successful, otherwise
+ * return 1.
+ */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+                                     const uint16_t *NumValueSites,
+                                     ValueProfNode **Nodes);
+
+/* Release memory allocated for the runtime record.  */
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord);
+
+/* Return the size of ValueProfData structure that can be used to store
+   the value profile data collected at runtime. */
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record);
+
+/* Return a ValueProfData instance that stores the data collected at runtime. */
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+                             ValueProfData *Dst);
+
+namespace IndexedInstrProf {
+
+enum class HashT : uint32_t {
+  MD5,
+
+  Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+  MD5 Hash;
+  Hash.update(Str);
+  llvm::MD5::MD5Result Result;
+  Hash.final(Result);
+  // Return the least significant 8 bytes. Our MD5 implementation returns the
+  // result in little endian, so we may need to swap bytes.
+  using namespace llvm::support;
+  return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+inline uint64_t ComputeHash(HashT Type, StringRef K) {
+  switch (Type) {
+  case HashT::MD5:
+    return IndexedInstrProf::MD5Hash(K);
+  }
+  llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = INSTR_PROF_INDEX_VERSION;
+const HashT HashType = HashT::MD5;
+
+inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
+
+// This structure defines the file header of the LLVM profile
+// data file in indexed-format.
+struct Header {
+  uint64_t Magic;
+  uint64_t Version;
+  uint64_t MaxFunctionCount;
+  uint64_t HashType;
+  uint64_t HashOffset;
+};
+
+} // end namespace IndexedInstrProf
+
+namespace RawInstrProf {
+
+const uint64_t Version = INSTR_PROF_RAW_VERSION;
+
+template <class IntPtrT> inline uint64_t getMagic();
+template <> inline uint64_t getMagic<uint64_t>() {
+  return INSTR_PROF_RAW_MAGIC_64;
+}
+
+template <> inline uint64_t getMagic<uint32_t>() {
+  return INSTR_PROF_RAW_MAGIC_32;
+}
+
+// Per-function profile data header/control structure.
+// The definition should match the structure defined in
+// compiler-rt/lib/profile/InstrProfiling.h.
+// It should also match the synthesized type in
+// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters.
+template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData {
+  #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name;
+  #include "llvm/ProfileData/InstrProfData.inc"
+};
+
+// File header structure of the LLVM profile data in raw format.
+// The definition should match the header referenced in
+// compiler-rt/lib/profile/InstrProfilingFile.c  and
+// InstrProfilingBuffer.c.
+struct Header {
+#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+}  // end namespace RawInstrProf
+
+namespace coverage {
+
+// Profile coverage map has the following layout:
+// [CoverageMapFileHeader]
+// [ArrayStart]
+//  [CovMapFunctionRecord]
+//  [CovMapFunctionRecord]
+//  ...
+// [ArrayEnd]
+// [Encoded Region Mapping Data]
+LLVM_PACKED_START
+template <class IntPtrT> struct CovMapFunctionRecord {
+  #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
+  #include "llvm/ProfileData/InstrProfData.inc"
+};
+LLVM_PACKED_END
+
+}
+
 } // end namespace llvm
 
 namespace std {
diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc
new file mode 100644
index 000000000000..48dae506cabb
--- /dev/null
+++ b/include/llvm/ProfileData/InstrProfData.inc
@@ -0,0 +1,735 @@
+/*===-- InstrProfData.inc - instr profiling runtime structures -----------=== *\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+\*===----------------------------------------------------------------------===*/
+/*
+ * This is the master file that defines all the data structure, signature,
+ * constant literals that are shared across profiling runtime library,
+ * compiler (instrumentation), and host tools (reader/writer). The entities
+ * defined in this file affect the profile runtime ABI, the raw profile format,
+ * or both.
+ *
+ * The file has two identical copies. The master copy lives in LLVM and
+ * the other one  sits in compiler-rt/lib/profile directory. To make changes
+ * in this file, first modify the master copy and copy it over to compiler-rt.
+ * Testing of any change in this file can start only after the two copies are
+ * synced up.
+ *
+ * The first part of the file includes macros that defines types, names, and
+ * initializers for the member fields of the core data structures. The field
+ * declarations for one structure is enabled by defining the field activation
+ * macro associated with that structure. Only one field activation record
+ * can be defined at one time and the rest definitions will be filtered out by
+ * the preprocessor.
+ *
+ * Examples of how the template is used to instantiate structure definition:
+ * 1. To declare a structure:
+ * 
+ * struct ProfData {
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ *    Type Name;
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ * 2. To construct LLVM type arrays for the struct type:
+ *
+ * Type *DataTypes[] = {
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ *   LLVMType,
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ * 4. To construct constant array for the initializers:
+ * #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
+ *   Initializer,
+ * Constant *ConstantVals[] = {
+ * #include "llvm/ProfileData/InstrProfData.inc"
+ * };
+ *
+ *
+ * The second part of the file includes definitions all other entities that
+ * are related to runtime ABI and format. When no field activation macro is
+ * defined, this file can be included to introduce the definitions.
+ *
+\*===----------------------------------------------------------------------===*/
+
+/* INSTR_PROF_DATA start. */
+/* Definition of member fields of the per-function control structure. */
+#ifndef INSTR_PROF_DATA
+#define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+
+INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
+                ConstantInt::get(llvm::Type::getInt32Ty(Ctx), \
+                NamePtr->getType()->getPointerElementType()->getArrayNumElements()))
+INSTR_PROF_DATA(const uint32_t, llvm::Type::getInt32Ty(Ctx), NumCounters, \
+                ConstantInt::get(llvm::Type::getInt32Ty(Ctx), NumCounters))
+INSTR_PROF_DATA(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
+                ConstantInt::get(llvm::Type::getInt64Ty(Ctx), \
+                Inc->getHash()->getZExtValue()))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), NamePtr, \
+                ConstantExpr::getBitCast(NamePtr, llvm::Type::getInt8PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt64PtrTy(Ctx), CounterPtr, \
+                ConstantExpr::getBitCast(CounterPtr, \
+                llvm::Type::getInt64PtrTy(Ctx)))
+INSTR_PROF_DATA(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), FunctionPointer, \
+                FunctionAddr)
+INSTR_PROF_DATA(IntPtrT, llvm::Type::getInt8PtrTy(Ctx), Values, \
+                ConstantPointerNull::get(Int8PtrTy))
+INSTR_PROF_DATA(const uint16_t, Int16ArrayTy, NumValueSites[IPVK_Last+1], \
+                ConstantArray::get(Int16ArrayTy, Int16ArrayVals))
+#undef INSTR_PROF_DATA
+/* INSTR_PROF_DATA end. */
+
+/* INSTR_PROF_RAW_HEADER  start */
+/* Definition of member fields of the raw profile header data structure. */
+#ifndef INSTR_PROF_RAW_HEADER
+#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+INSTR_PROF_RAW_HEADER(uint64_t, Magic, __llvm_profile_get_magic())
+INSTR_PROF_RAW_HEADER(uint64_t, Version, __llvm_profile_get_version())
+INSTR_PROF_RAW_HEADER(uint64_t, DataSize, DataSize)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersSize, CountersSize)
+INSTR_PROF_RAW_HEADER(uint64_t, NamesSize,  NamesSize)
+INSTR_PROF_RAW_HEADER(uint64_t, CountersDelta, (uintptr_t)CountersBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, NamesDelta, (uintptr_t)NamesBegin)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueKindLast, IPVK_Last)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueDataSize, ValueDataSize)
+INSTR_PROF_RAW_HEADER(uint64_t, ValueDataDelta, (uintptr_t)ValueDataBegin)
+#undef INSTR_PROF_RAW_HEADER
+/* INSTR_PROF_RAW_HEADER  end */
+
+/* VALUE_PROF_FUNC_PARAM start */
+/* Definition of parameter types of the runtime API used to do value profiling
+ * for a given value site.
+ */
+#ifndef VALUE_PROF_FUNC_PARAM
+#define VALUE_PROF_FUNC_PARAM(ArgType, ArgName, ArgLLVMType)
+#define INSTR_PROF_COMMA
+#else
+#define INSTR_PROF_DATA_DEFINED
+#define INSTR_PROF_COMMA ,
+#endif
+VALUE_PROF_FUNC_PARAM(uint64_t, TargetValue, Type::getInt64Ty(Ctx)) \
+                      INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
+VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
+#undef VALUE_PROF_FUNC_PARAM
+#undef INSTR_PROF_COMMA
+/* VALUE_PROF_FUNC_PARAM end */
+
+/* VALUE_PROF_KIND start */
+#ifndef VALUE_PROF_KIND
+#define VALUE_PROF_KIND(Enumerator, Value)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+VALUE_PROF_KIND(IPVK_IndirectCallTarget, 0)
+/* These two kinds must be the last to be
+ * declared. This is to make sure the string
+ * array created with the template can be
+ * indexed with the kind value.
+ */
+VALUE_PROF_KIND(IPVK_First, IPVK_IndirectCallTarget)
+VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget)
+
+#undef VALUE_PROF_KIND
+/* VALUE_PROF_KIND end */
+
+/* COVMAP_FUNC_RECORD start */
+/* Definition of member fields of the function record structure in coverage
+ * map.
+ */
+#ifndef COVMAP_FUNC_RECORD
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Initializer)
+#else
+#define INSTR_PROF_DATA_DEFINED
+#endif
+COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \
+                   NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \
+                   llvm::Type::getInt8PtrTy(Ctx))) 
+COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
+                   llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
+                   NameValue.size()))
+COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), DataSize, \
+                   llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
+                   CoverageMapping.size()))
+COVMAP_FUNC_RECORD(const uint64_t, llvm::Type::getInt64Ty(Ctx), FuncHash, \
+                   llvm::ConstantInt::get(llvm::Type::getInt64Ty(Ctx), FuncHash))
+#undef COVMAP_FUNC_RECORD
+/* COVMAP_FUNC_RECORD end.  */
+
+
+#ifdef INSTR_PROF_VALUE_PROF_DATA
+#define INSTR_PROF_DATA_DEFINED
+
+/*! 
+ * This is the header of the data structure that defines the on-disk
+ * layout of the value profile data of a particular kind for one function.
+ */
+typedef struct ValueProfRecord {
+  /* The kind of the value profile record. */
+  uint32_t Kind;
+  /*
+   * The number of value profile sites. It is guaranteed to be non-zero;
+   * otherwise the record for this kind won't be emitted.
+   */
+  uint32_t NumValueSites;
+  /* 
+   * The first element of the array that stores the number of profiled
+   * values for each value site. The size of the array is NumValueSites.
+   * Since NumValueSites is greater than zero, there is at least one
+   * element in the array.
+   */
+  uint8_t SiteCountArray[1];
+
+  /*
+   * The fake declaration is for documentation purpose only.
+   * Align the start of next field to be on 8 byte boundaries.
+  uint8_t Padding[X];
+   */
+
+  /* The array of value profile data. The size of the array is the sum
+   * of all elements in SiteCountArray[].
+  InstrProfValueData ValueData[];
+   */
+
+#ifdef __cplusplus
+  /*!
+   * \brief Return the number of value sites.
+   */
+  uint32_t getNumValueSites() const { return NumValueSites; }
+  /*! 
+   * \brief Read data from this record and save it to Record.
+   */
+  void deserializeTo(InstrProfRecord &Record,
+                     InstrProfRecord::ValueMapType *VMap);
+  /*
+   * In-place byte swap:
+   * Do byte swap for this instance. \c Old is the original order before
+   * the swap, and \c New is the New byte order.
+   */
+  void swapBytes(support::endianness Old, support::endianness New);
+#endif
+} ValueProfRecord;
+
+/*!
+ * Per-function header/control data structure for value profiling
+ * data in indexed format.
+ */
+typedef struct ValueProfData {
+  /*
+   * Total size in bytes including this field. It must be a multiple
+   * of sizeof(uint64_t). 
+   */
+  uint32_t TotalSize;
+  /* 
+   *The number of value profile kinds that has value profile data.
+   * In this implementation, a value profile kind is considered to
+   * have profile data if the number of value profile sites for the
+   * kind is not zero. More aggressively, the implementation can
+   * choose to check the actual data value: if none of the value sites
+   * has any profiled values, the kind can be skipped.
+   */
+  uint32_t NumValueKinds;
+
+  /* 
+   * Following are a sequence of variable length records. The prefix/header
+   * of each record is defined by ValueProfRecord type. The number of
+   * records is NumValueKinds.
+   * ValueProfRecord Record_1;
+   * ValueProfRecord Record_N;
+   */
+
+#if __cplusplus
+  /*!
+   * Return the total size in bytes of the on-disk value profile data
+   * given the data stored in Record.
+   */
+  static uint32_t getSize(const InstrProfRecord &Record);
+  /*!
+   * Return a pointer to \c ValueProfData instance ready to be streamed.
+   */
+  static std::unique_ptr<ValueProfData>
+  serializeFrom(const InstrProfRecord &Record);
+  /*!
+   * Check the integrity of the record. Return the error code when
+   * an error is detected, otherwise return instrprof_error::success.
+   */
+  instrprof_error checkIntegrity();
+  /*!
+   * Return a pointer to \c ValueProfileData instance ready to be read.
+   * All data in the instance are properly byte swapped. The input
+   * data is assumed to be in little endian order.
+   */
+  static ErrorOr<std::unique_ptr<ValueProfData>>
+  getValueProfData(const unsigned char *SrcBuffer,
+                   const unsigned char *const SrcBufferEnd,
+                   support::endianness SrcDataEndianness);
+  /*!
+   * Swap byte order from \c Endianness order to host byte order.
+   */
+  void swapBytesToHost(support::endianness Endianness);
+  /*!
+   * Swap byte order from host byte order to \c Endianness order.
+   */
+  void swapBytesFromHost(support::endianness Endianness);
+  /*!
+   * Return the total size of \c ValueProfileData.
+   */
+  uint32_t getSize() const { return TotalSize; }
+  /*!
+   * Read data from this data and save it to \c Record.
+   */
+  void deserializeTo(InstrProfRecord &Record,
+                     InstrProfRecord::ValueMapType *VMap);
+  void operator delete(void *ptr) { ::operator delete(ptr); }
+#endif
+} ValueProfData;
+
+/* 
+ * The closure is designed to abstact away two types of value profile data:
+ * - InstrProfRecord which is the primary data structure used to
+ *   represent profile data in host tools (reader, writer, and profile-use)
+ * - value profile runtime data structure suitable to be used by C
+ *   runtime library.
+ *
+ * Both sources of data need to serialize to disk/memory-buffer in common
+ * format: ValueProfData. The abstraction allows compiler-rt's raw profiler
+ * writer to share the same format and code with indexed profile writer.
+ *
+ * For documentation of the member methods below, refer to corresponding methods
+ * in class InstrProfRecord.
+ */
+typedef struct ValueProfRecordClosure {
+  const void *Record;
+  uint32_t (*GetNumValueKinds)(const void *Record);
+  uint32_t (*GetNumValueSites)(const void *Record, uint32_t VKind);
+  uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind);
+  uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S);
+
+  /* 
+   * After extracting the value profile data from the value profile record,
+   * this method is used to map the in-memory value to on-disk value. If
+   * the method is null, value will be written out untranslated.
+   */
+  uint64_t (*RemapValueData)(uint32_t, uint64_t Value);
+  void (*GetValueForSite)(const void *R, InstrProfValueData *Dst, uint32_t K,
+                          uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t));
+  ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes);
+} ValueProfRecordClosure;
+
+/* 
+ * A wrapper struct that represents value profile runtime data.
+ * Like InstrProfRecord class which is used by profiling host tools,
+ * ValueProfRuntimeRecord also implements the abstract intefaces defined in
+ * ValueProfRecordClosure so that the runtime data can be serialized using
+ * shared C implementation. In this structure, NumValueSites and Nodes
+ * members are the primary fields while other fields hold the derived
+ * information for fast implementation of closure interfaces.
+ */
+typedef struct ValueProfRuntimeRecord {
+  /* Number of sites for each value profile kind.  */
+  const uint16_t *NumValueSites;
+  /* An array of linked-list headers. The size of of the array is the
+   * total number of value profile sites : sum(NumValueSites[*])). Each
+   * linked-list stores the values profiled for a value profile site. */
+  ValueProfNode **Nodes;
+
+  /* Total number of value profile kinds which have at least one
+   *  value profile sites. */
+  uint32_t NumValueKinds;
+  /* An array recording the number of values tracked at each site.
+   * The size of the array is TotalNumValueSites. */
+  uint8_t *SiteCountArray[IPVK_Last + 1];
+  ValueProfNode **NodesKind[IPVK_Last + 1];
+} ValueProfRuntimeRecord;
+
+/* Forward declarations of C interfaces.  */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+                                     const uint16_t *NumValueSites,
+                                     ValueProfNode **Nodes);
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord);
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record);
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+                             ValueProfData *Dst);
+uint32_t getNumValueKindsRT(const void *R);
+
+#undef INSTR_PROF_VALUE_PROF_DATA
+#endif  /* INSTR_PROF_VALUE_PROF_DATA */ 
+
+
+#ifdef INSTR_PROF_COMMON_API_IMPL
+#define INSTR_PROF_DATA_DEFINED
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+/*!
+ * \brief Return the \c ValueProfRecord header size including the
+ * padding bytes.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) {
+  uint32_t Size = offsetof(ValueProfRecord, SiteCountArray) +
+                  sizeof(uint8_t) * NumValueSites;
+  /* Round the size to multiple of 8 bytes. */
+  Size = (Size + 7) & ~7;
+  return Size;
+}
+
+/*! 
+ * \brief Return the total size of the value profile record including the
+ * header and the value data.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordSize(uint32_t NumValueSites,
+                                uint32_t NumValueData) {
+  return getValueProfRecordHeaderSize(NumValueSites) +
+         sizeof(InstrProfValueData) * NumValueData;
+}
+
+/*!
+ * \brief Return the pointer to the start of value data array.
+ */
+INSTR_PROF_INLINE
+InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) {
+  return (InstrProfValueData *)((char *)This + getValueProfRecordHeaderSize(
+                                                   This->NumValueSites));
+}
+
+/*! 
+ * \brief Return the total number of value data for \c This record.
+ */
+INSTR_PROF_INLINE
+uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) {
+  uint32_t NumValueData = 0;
+  uint32_t I;
+  for (I = 0; I < This->NumValueSites; I++)
+    NumValueData += This->SiteCountArray[I];
+  return NumValueData;
+}
+
+/*! 
+ * \brief Use this method to advance to the next \c This \c ValueProfRecord.
+ */
+INSTR_PROF_INLINE
+ValueProfRecord *getValueProfRecordNext(ValueProfRecord *This) {
+  uint32_t NumValueData = getValueProfRecordNumValueData(This);
+  return (ValueProfRecord *)((char *)This +
+                             getValueProfRecordSize(This->NumValueSites,
+                                                    NumValueData));
+}
+
+/*!
+ * \brief Return the first \c ValueProfRecord instance.
+ */
+INSTR_PROF_INLINE
+ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) {
+  return (ValueProfRecord *)((char *)This + sizeof(ValueProfData));
+}
+
+/* Closure based interfaces.  */
+
+/*! 
+ * Return the total size in bytes of the on-disk value profile data
+ * given the data stored in Record.
+ */
+uint32_t getValueProfDataSize(ValueProfRecordClosure *Closure) {
+  uint32_t Kind;
+  uint32_t TotalSize = sizeof(ValueProfData);
+  const void *Record = Closure->Record;
+  uint32_t NumValueKinds = Closure->GetNumValueKinds(Record);
+  if (NumValueKinds == 0)
+    return TotalSize;
+
+  for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) {
+    uint32_t NumValueSites = Closure->GetNumValueSites(Record, Kind);
+    if (!NumValueSites)
+      continue;
+    TotalSize += getValueProfRecordSize(NumValueSites,
+                                        Closure->GetNumValueData(Record, Kind));
+  }
+  return TotalSize;
+}
+
+/*!
+ * Extract value profile data of a function for the profile kind \c ValueKind
+ * from the \c Closure and serialize the data into \c This record instance.
+ */
+void serializeValueProfRecordFrom(ValueProfRecord *This,
+                                  ValueProfRecordClosure *Closure,
+                                  uint32_t ValueKind, uint32_t NumValueSites) {
+  uint32_t S;
+  const void *Record = Closure->Record;
+  This->Kind = ValueKind;
+  This->NumValueSites = NumValueSites;
+  InstrProfValueData *DstVD = getValueProfRecordValueData(This);
+
+  for (S = 0; S < NumValueSites; S++) {
+    uint32_t ND = Closure->GetNumValueDataForSite(Record, ValueKind, S);
+    This->SiteCountArray[S] = ND;
+    Closure->GetValueForSite(Record, DstVD, ValueKind, S,
+                             Closure->RemapValueData);
+    DstVD += ND;
+  }
+}
+
+/*!
+ * Extract value profile data of a function  from the \c Closure
+ * and serialize the data into \c DstData if it is not NULL or heap
+ * memory allocated by the \c Closure's allocator method.
+ */
+ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
+                                          ValueProfData *DstData) {
+  uint32_t Kind;
+  uint32_t TotalSize = getValueProfDataSize(Closure);
+
+  ValueProfData *VPD =
+      DstData ? DstData : Closure->AllocValueProfData(TotalSize);
+
+  VPD->TotalSize = TotalSize;
+  VPD->NumValueKinds = Closure->GetNumValueKinds(Closure->Record);
+  ValueProfRecord *VR = getFirstValueProfRecord(VPD);
+  for (Kind = IPVK_First; Kind <= IPVK_Last; Kind++) {
+    uint32_t NumValueSites = Closure->GetNumValueSites(Closure->Record, Kind);
+    if (!NumValueSites)
+      continue;
+    serializeValueProfRecordFrom(VR, Closure, Kind, NumValueSites);
+    VR = getValueProfRecordNext(VR);
+  }
+  return VPD;
+}
+
+/* 
+ * The value profiler runtime library stores the value profile data
+ * for a given function in \c NumValueSites and \c Nodes structures.
+ * \c ValueProfRuntimeRecord class is used to encapsulate the runtime
+ * profile data and provides fast interfaces to retrieve the profile
+ * information. This interface is used to initialize the runtime record
+ * and pre-compute the information needed for efficient implementation
+ * of callbacks required by ValueProfRecordClosure class.
+ */
+int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
+                                     const uint16_t *NumValueSites,
+                                     ValueProfNode **Nodes) {
+  unsigned I, J, S = 0, NumValueKinds = 0;
+  RuntimeRecord->NumValueSites = NumValueSites;
+  RuntimeRecord->Nodes = Nodes;
+  for (I = 0; I <= IPVK_Last; I++) {
+    uint16_t N = NumValueSites[I];
+    if (!N) {
+      RuntimeRecord->SiteCountArray[I] = 0;
+      continue;
+    }
+    NumValueKinds++;
+    RuntimeRecord->SiteCountArray[I] = (uint8_t *)calloc(N, 1);
+    if (!RuntimeRecord->SiteCountArray[I])
+      return 1;
+    RuntimeRecord->NodesKind[I] = Nodes ? &Nodes[S] : NULL;
+    for (J = 0; J < N; J++) {
+      /* Compute value count for each site. */
+      uint32_t C = 0;
+      ValueProfNode *Site = Nodes ? RuntimeRecord->NodesKind[I][J] : NULL;
+      while (Site) {
+        C++;
+        Site = Site->Next;
+      }
+      if (C > UCHAR_MAX)
+        C = UCHAR_MAX;
+      RuntimeRecord->SiteCountArray[I][J] = C;
+    }
+    S += N;
+  }
+  RuntimeRecord->NumValueKinds = NumValueKinds;
+  return 0;
+}
+
+void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord) {
+  unsigned I;
+  for (I = 0; I <= IPVK_Last; I++) {
+    if (RuntimeRecord->SiteCountArray[I])
+      free(RuntimeRecord->SiteCountArray[I]);
+  }
+}
+
+/* ValueProfRecordClosure Interface implementation for
+ * ValueProfDataRuntimeRecord.  */
+uint32_t getNumValueKindsRT(const void *R) {
+  return ((const ValueProfRuntimeRecord *)R)->NumValueKinds;
+}
+
+uint32_t getNumValueSitesRT(const void *R, uint32_t VK) {
+  return ((const ValueProfRuntimeRecord *)R)->NumValueSites[VK];
+}
+
+uint32_t getNumValueDataForSiteRT(const void *R, uint32_t VK, uint32_t S) {
+  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+  return Record->SiteCountArray[VK][S];
+}
+
+uint32_t getNumValueDataRT(const void *R, uint32_t VK) {
+  unsigned I, S = 0;
+  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+  if (Record->SiteCountArray[VK] == 0)
+    return 0;
+  for (I = 0; I < Record->NumValueSites[VK]; I++)
+    S += Record->SiteCountArray[VK][I];
+  return S;
+}
+
+void getValueForSiteRT(const void *R, InstrProfValueData *Dst, uint32_t VK,
+                       uint32_t S, uint64_t (*Mapper)(uint32_t, uint64_t)) {
+  unsigned I, N = 0;
+  const ValueProfRuntimeRecord *Record = (const ValueProfRuntimeRecord *)R;
+  N = getNumValueDataForSiteRT(R, VK, S);
+  if (N == 0)
+    return;
+  ValueProfNode *VNode = Record->NodesKind[VK][S];
+  for (I = 0; I < N; I++) {
+    Dst[I] = VNode->VData;
+    VNode = VNode->Next;
+  }
+}
+
+ValueProfData *allocValueProfDataRT(size_t TotalSizeInBytes) {
+  return (ValueProfData *)calloc(TotalSizeInBytes, 1);
+}
+
+static ValueProfRecordClosure RTRecordClosure = {0,
+                                                 getNumValueKindsRT,
+                                                 getNumValueSitesRT,
+                                                 getNumValueDataRT,
+                                                 getNumValueDataForSiteRT,
+                                                 0,
+                                                 getValueForSiteRT,
+                                                 allocValueProfDataRT};
+
+/* 
+ * Return the size of ValueProfData structure to store data
+ * recorded in the runtime record.
+ */
+uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) {
+  RTRecordClosure.Record = Record;
+  return getValueProfDataSize(&RTRecordClosure);
+}
+
+/* 
+ * Return a ValueProfData instance that stores the data collected
+ * from runtime. If \c DstData is provided by the caller, the value
+ * profile data will be store in *DstData and DstData is returned,
+ * otherwise the method will allocate space for the value data and
+ * return pointer to the newly allocated space.
+ */
+ValueProfData *
+serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
+                             ValueProfData *DstData) {
+  RTRecordClosure.Record = Record;
+  return serializeValueProfDataFrom(&RTRecordClosure, DstData);
+}
+
+
+#undef INSTR_PROF_COMMON_API_IMPL
+#endif /* INSTR_PROF_COMMON_API_IMPL */
+
+/*============================================================================*/
+
+
+#ifndef INSTR_PROF_DATA_DEFINED
+
+#ifndef INSTR_PROF_DATA_INC_
+#define INSTR_PROF_DATA_INC_
+
+/* Helper macros.  */
+#define INSTR_PROF_SIMPLE_QUOTE(x) #x
+#define INSTR_PROF_QUOTE(x) INSTR_PROF_SIMPLE_QUOTE(x)
+#define INSTR_PROF_SIMPLE_CONCAT(x,y) x ## y
+#define INSTR_PROF_CONCAT(x,y) INSTR_PROF_SIMPLE_CONCAT(x,y)
+
+/* Magic number to detect file format and endianness.
+ * Use 255 at one end, since no UTF-8 file can use that character.  Avoid 0,
+ * so that utilities, like strings, don't grab it as a string.  129 is also
+ * invalid UTF-8, and high enough to be interesting.
+ * Use "lprofr" in the centre to stand for "LLVM Profile Raw", or "lprofR"
+ * for 32-bit platforms.
+ */
+#define INSTR_PROF_RAW_MAGIC_64 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+       (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 |  \
+        (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129
+#define INSTR_PROF_RAW_MAGIC_32 (uint64_t)255 << 56 | (uint64_t)'l' << 48 | \
+       (uint64_t)'p' << 40 | (uint64_t)'r' << 32 | (uint64_t)'o' << 24 |  \
+        (uint64_t)'f' << 16 | (uint64_t)'R' << 8 | (uint64_t)129
+
+/* Raw profile format version. */
+#define INSTR_PROF_RAW_VERSION 2
+
+/* Runtime section names and name strings.  */
+#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
+#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
+#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+
+#define INSTR_PROF_DATA_SECT_NAME_STR \
+        INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
+#define INSTR_PROF_NAME_SECT_NAME_STR \
+        INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
+#define INSTR_PROF_CNTS_SECT_NAME_STR \
+        INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+
+/* Macros to define start/stop section symbol for a given
+ * section on Linux. For instance
+ * INSTR_PROF_SECT_START(INSTR_PROF_DATA_SECT_NAME) will
+ * expand to __start___llvm_prof_data
+ */
+#define INSTR_PROF_SECT_START(Sect) \
+        INSTR_PROF_CONCAT(__start_,Sect)
+#define INSTR_PROF_SECT_STOP(Sect) \
+        INSTR_PROF_CONCAT(__stop_,Sect)
+
+/* Value Profiling API linkage name.  */
+#define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
+#define INSTR_PROF_VALUE_PROF_FUNC_STR \
+        INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+
+/* InstrProfile per-function control data alignment.  */
+#define INSTR_PROF_DATA_ALIGNMENT 8
+
+/* The data structure that represents a tracked value by the
+ * value profiler.
+ */
+typedef struct InstrProfValueData {
+  /* Profiled value. */
+  uint64_t Value;
+  /* Number of times the value appears in the training run. */
+  uint64_t Count;
+} InstrProfValueData;
+
+/* This is an internal data structure used by value profiler. It
+ * is defined here to allow serialization code sharing by LLVM
+ * to be used in unit test.
+ */
+typedef struct ValueProfNode {
+  InstrProfValueData VData;
+  struct ValueProfNode *Next;
+} ValueProfNode;
+
+#endif /* INSTR_PROF_DATA_INC_ */
+
+#else
+#undef INSTR_PROF_DATA_DEFINED
+#endif
+
diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h
index f937e7d08d54..fed3e693e7a0 100644
--- a/include/llvm/ProfileData/InstrProfReader.h
+++ b/include/llvm/ProfileData/InstrProfReader.h
@@ -23,6 +23,7 @@
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include "llvm/Support/raw_ostream.h"
 #include <iterator>
 
 namespace llvm {
@@ -53,7 +54,7 @@ class InstrProfReader {
   std::error_code LastError;
 
 public:
-  InstrProfReader() : LastError(instrprof_error::success) {}
+  InstrProfReader() : LastError(instrprof_error::success), Symtab() {}
   virtual ~InstrProfReader() {}
 
   /// Read the header.  Required before reading first record.
@@ -64,7 +65,20 @@ public:
   InstrProfIterator begin() { return InstrProfIterator(this); }
   InstrProfIterator end() { return InstrProfIterator(); }
 
+  /// Return the PGO symtab. There are three different readers:
+  /// Raw, Text, and Indexed profile readers. The first two types
+  /// of readers are used only by llvm-profdata tool, while the indexed
+  /// profile reader is also used by llvm-cov tool and the compiler (
+  /// backend or frontend). Since creating PGO symtab can create
+  /// significant runtime and memory overhead (as it touches data
+  /// for the whole program), InstrProfSymtab for the indexed profile
+  /// reader should be created on demand and it is recommended to be
+  /// only used for dumping purpose with llvm-proftool, not with the
+  /// compiler.
+  virtual InstrProfSymtab &getSymtab() = 0;
+
 protected:
+  std::unique_ptr<InstrProfSymtab> Symtab;
   /// Set the current std::error_code and return same.
   std::error_code error(std::error_code EC) {
     LastError = EC;
@@ -107,14 +121,24 @@ private:
 
   TextInstrProfReader(const TextInstrProfReader &) = delete;
   TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
+  std::error_code readValueProfileData(InstrProfRecord &Record);
+
 public:
   TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
       : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
 
+  /// Return true if the given buffer is in text instrprof format.
+  static bool hasFormat(const MemoryBuffer &Buffer);
+
   /// Read the header.
-  std::error_code readHeader() override { return success(); }
+  std::error_code readHeader() override;
   /// Read a single record.
   std::error_code readNextRecord(InstrProfRecord &Record) override;
+
+  InstrProfSymtab &getSymtab() override {
+    assert(Symtab.get());
+    return *Symtab.get();
+  }
 };
 
 /// Reader for the raw instrprof binary format from runtime.
@@ -129,31 +153,19 @@ class RawInstrProfReader : public InstrProfReader {
 private:
   /// The profile data file contents.
   std::unique_ptr<MemoryBuffer> DataBuffer;
-  struct ProfileData {
-    const uint32_t NameSize;
-    const uint32_t NumCounters;
-    const uint64_t FuncHash;
-    const IntPtrT NamePtr;
-    const IntPtrT CounterPtr;
-  };
-  struct RawHeader {
-    const uint64_t Magic;
-    const uint64_t Version;
-    const uint64_t DataSize;
-    const uint64_t CountersSize;
-    const uint64_t NamesSize;
-    const uint64_t CountersDelta;
-    const uint64_t NamesDelta;
-  };
-
   bool ShouldSwapBytes;
   uint64_t CountersDelta;
   uint64_t NamesDelta;
-  const ProfileData *Data;
-  const ProfileData *DataEnd;
+  const RawInstrProf::ProfileData<IntPtrT> *Data;
+  const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
   const uint64_t *CountersStart;
   const char *NamesStart;
+  const uint8_t *ValueDataStart;
   const char *ProfileEnd;
+  uint32_t ValueKindLast;
+  uint32_t CurValueDataSize;
+
+  InstrProfRecord::ValueMapType FunctionPtrToNameMap;
 
   RawInstrProfReader(const RawInstrProfReader &) = delete;
   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
@@ -165,13 +177,41 @@ public:
   std::error_code readHeader() override;
   std::error_code readNextRecord(InstrProfRecord &Record) override;
 
+  InstrProfSymtab &getSymtab() override {
+    assert(Symtab.get());
+    return *Symtab.get();
+  }
+
 private:
+  void createSymtab(InstrProfSymtab &Symtab);
   std::error_code readNextHeader(const char *CurrentPos);
-  std::error_code readHeader(const RawHeader &Header);
-  template <class IntT>
-  IntT swap(IntT Int) const {
+  std::error_code readHeader(const RawInstrProf::Header &Header);
+  template <class IntT> IntT swap(IntT Int) const {
     return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
   }
+  support::endianness getDataEndianness() const {
+    support::endianness HostEndian = getHostEndianness();
+    if (!ShouldSwapBytes)
+      return HostEndian;
+    if (HostEndian == support::little)
+      return support::big;
+    else
+      return support::little;
+  }
+
+  inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
+    return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
+  }
+  std::error_code readName(InstrProfRecord &Record);
+  std::error_code readFuncHash(InstrProfRecord &Record);
+  std::error_code readRawCounts(InstrProfRecord &Record);
+  std::error_code readValueProfilingData(InstrProfRecord &Record);
+  bool atEnd() const { return Data == DataEnd; }
+  void advanceData() {
+    Data++;
+    ValueDataStart += CurValueDataSize;
+  }
+
   const uint64_t *getCounter(IntPtrT CounterPtr) const {
     ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
     return CountersStart + Offset;
@@ -195,10 +235,15 @@ class InstrProfLookupTrait {
   std::vector<InstrProfRecord> DataBuffer;
   IndexedInstrProf::HashT HashType;
   unsigned FormatVersion;
+  // Endianness of the input value profile data.
+  // It should be LE by default, but can be changed
+  // for testing purpose.
+  support::endianness ValueProfDataEndianness;
 
 public:
   InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
-      : HashType(HashType), FormatVersion(FormatVersion) {}
+      : HashType(HashType), FormatVersion(FormatVersion),
+        ValueProfDataEndianness(support::little) {}
 
   typedef ArrayRef<InstrProfRecord> data_type;
 
@@ -209,6 +254,7 @@ public:
 
   static bool EqualKey(StringRef A, StringRef B) { return A == B; }
   static StringRef GetInternalKey(StringRef K) { return K; }
+  static StringRef GetExternalKey(StringRef K) { return K; }
 
   hash_value_type ComputeHash(StringRef K);
 
@@ -224,11 +270,64 @@ public:
     return StringRef((const char *)D, N);
   }
 
+  bool readValueProfilingData(const unsigned char *&D,
+                              const unsigned char *const End);
   data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
+
+  // Used for testing purpose only.
+  void setValueProfDataEndianness(support::endianness Endianness) {
+    ValueProfDataEndianness = Endianness;
+  }
+};
+
+struct InstrProfReaderIndexBase {
+  // Read all the profile records with the same key pointed to the current
+  // iterator.
+  virtual std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) = 0;
+  // Read all the profile records with the key equal to FuncName
+  virtual std::error_code getRecords(StringRef FuncName,
+                                     ArrayRef<InstrProfRecord> &Data) = 0;
+  virtual void advanceToNextKey() = 0;
+  virtual bool atEnd() const = 0;
+  virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
+  virtual ~InstrProfReaderIndexBase() {}
+  virtual uint64_t getVersion() const = 0;
+  virtual void populateSymtab(InstrProfSymtab &) = 0;
 };
 
 typedef OnDiskIterableChainedHashTable<InstrProfLookupTrait>
-    InstrProfReaderIndex;
+    OnDiskHashTableImplV3;
+
+template <typename HashTableImpl>
+class InstrProfReaderIndex : public InstrProfReaderIndexBase {
+
+private:
+  std::unique_ptr<HashTableImpl> HashTable;
+  typename HashTableImpl::data_iterator RecordIterator;
+  uint64_t FormatVersion;
+
+public:
+  InstrProfReaderIndex(const unsigned char *Buckets,
+                       const unsigned char *const Payload,
+                       const unsigned char *const Base,
+                       IndexedInstrProf::HashT HashType, uint64_t Version);
+
+  std::error_code getRecords(ArrayRef<InstrProfRecord> &Data) override;
+  std::error_code getRecords(StringRef FuncName,
+                             ArrayRef<InstrProfRecord> &Data) override;
+  void advanceToNextKey() override { RecordIterator++; }
+  bool atEnd() const override {
+    return RecordIterator == HashTable->data_end();
+  }
+  void setValueProfDataEndianness(support::endianness Endianness) override {
+    HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
+  }
+  ~InstrProfReaderIndex() override {}
+  uint64_t getVersion() const override { return FormatVersion; }
+  void populateSymtab(InstrProfSymtab &Symtab) override {
+    Symtab.create(HashTable->keys());
+  }
+};
 
 /// Reader for the indexed binary instrprof format.
 class IndexedInstrProfReader : public InstrProfReader {
@@ -236,17 +335,15 @@ private:
   /// The profile data file contents.
   std::unique_ptr<MemoryBuffer> DataBuffer;
   /// The index into the profile data.
-  std::unique_ptr<InstrProfReaderIndex> Index;
-  /// Iterator over the profile data.
-  InstrProfReaderIndex::data_iterator RecordIterator;
-  /// The file format version of the profile data.
-  uint64_t FormatVersion;
+  std::unique_ptr<InstrProfReaderIndexBase> Index;
   /// The maximal execution count among all functions.
   uint64_t MaxFunctionCount;
 
   IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
   IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
+
 public:
+  uint64_t getVersion() const { return Index->getVersion(); }
   IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
       : DataBuffer(std::move(DataBuffer)), Index(nullptr) {}
 
@@ -258,9 +355,15 @@ public:
   /// Read a single record.
   std::error_code readNextRecord(InstrProfRecord &Record) override;
 
+  /// Return the pointer to InstrProfRecord associated with FuncName
+  /// and FuncHash
+  ErrorOr<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
+                                              uint64_t FuncHash);
+
   /// Fill Counts with the profile data for the given function name.
   std::error_code getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
                                     std::vector<uint64_t> &Counts);
+
   /// Return the maximum of all known function counts.
   uint64_t getMaximumFunctionCount() { return MaxFunctionCount; }
 
@@ -270,6 +373,16 @@ public:
 
   static ErrorOr<std::unique_ptr<IndexedInstrProfReader>>
   create(std::unique_ptr<MemoryBuffer> Buffer);
+
+  // Used for testing purpose only.
+  void setValueProfDataEndianness(support::endianness Endianness) {
+    Index->setValueProfDataEndianness(Endianness);
+  }
+
+  // See description in the base class. This interface is designed
+  // to be used by llvm-profdata (for dumping). Avoid using this when
+  // the client is the compiler.
+  InstrProfSymtab &getSymtab() override;
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ProfileData/InstrProfWriter.h b/include/llvm/ProfileData/InstrProfWriter.h
index ce0bb5242498..e7f53de051c3 100644
--- a/include/llvm/ProfileData/InstrProfWriter.h
+++ b/include/llvm/ProfileData/InstrProfWriter.h
@@ -15,38 +15,43 @@
 #ifndef LLVM_PROFILEDATA_INSTRPROFWRITER_H
 #define LLVM_PROFILEDATA_INSTRPROFWRITER_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include <vector>
 
 namespace llvm {
 
 /// Writer for instrumentation based profile data.
 class InstrProfWriter {
 public:
-  typedef SmallDenseMap<uint64_t, std::vector<uint64_t>, 1> CounterData;
+  typedef SmallDenseMap<uint64_t, InstrProfRecord, 1> ProfilingData;
+
 private:
-  StringMap<CounterData> FunctionData;
+  StringMap<ProfilingData> FunctionData;
   uint64_t MaxFunctionCount;
+
 public:
   InstrProfWriter() : MaxFunctionCount(0) {}
 
   /// Add function counts for the given function. If there are already counts
   /// for this function and the hash and number of counts match, each counter is
-  /// summed.
-  std::error_code addFunctionCounts(StringRef FunctionName,
-                                    uint64_t FunctionHash,
-                                    ArrayRef<uint64_t> Counters);
+  /// summed. Optionally scale counts by \p Weight.
+  std::error_code addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
   /// Write the profile to \c OS
   void write(raw_fd_ostream &OS);
+  /// Write the profile in text format to \c OS
+  void writeText(raw_fd_ostream &OS);
+  /// Write \c Record in text format to \c OS
+  static void writeRecordInText(const InstrProfRecord &Record,
+                                InstrProfSymtab &Symtab, raw_fd_ostream &OS);
   /// Write the profile, returning the raw data. For testing.
   std::unique_ptr<MemoryBuffer> writeBuffer();
 
+  // Internal interface for testing purpose only.
+  void setValueProfDataEndianness(support::endianness Endianness);
+
 private:
   std::pair<uint64_t, uint64_t> writeImpl(raw_ostream &OS);
 };
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index 1b82e55aa77a..8df3fe803209 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -11,14 +11,17 @@
 // sample profile data.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_PROFILEDATA_SAMPLEPROF_H_
 #define LLVM_PROFILEDATA_SAMPLEPROF_H_
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/raw_ostream.h"
+
+#include <map>
 #include <system_error>
 
 namespace llvm {
@@ -32,13 +35,27 @@ enum class sampleprof_error {
   too_large,
   truncated,
   malformed,
-  unrecognized_format
+  unrecognized_format,
+  unsupported_writing_format,
+  truncated_name_table,
+  not_implemented,
+  counter_overflow
 };
 
 inline std::error_code make_error_code(sampleprof_error E) {
   return std::error_code(static_cast<int>(E), sampleprof_category());
 }
 
+inline sampleprof_error MergeResult(sampleprof_error &Accumulator,
+                                    sampleprof_error Result) {
+  // Prefer first error encountered as later errors may be secondary effects of
+  // the initial problem.
+  if (Accumulator == sampleprof_error::success &&
+      Result != sampleprof_error::success)
+    Accumulator = Result;
+  return Accumulator;
+}
+
 } // end namespace llvm
 
 namespace std {
@@ -57,7 +74,7 @@ static inline uint64_t SPMagic() {
          uint64_t('2') << (64 - 56) | uint64_t(0xff);
 }
 
-static inline uint64_t SPVersion() { return 100; }
+static inline uint64_t SPVersion() { return 102; }
 
 /// Represents the relative location of an instruction.
 ///
@@ -69,36 +86,36 @@ static inline uint64_t SPVersion() { return 100; }
 /// that are on the same line but belong to different basic blocks
 /// (e.g., the two post-increment instructions in "if (p) x++; else y++;").
 struct LineLocation {
-  LineLocation(int L, unsigned D) : LineOffset(L), Discriminator(D) {}
-  int LineOffset;
-  unsigned Discriminator;
+  LineLocation(uint32_t L, uint32_t D) : LineOffset(L), Discriminator(D) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+  bool operator<(const LineLocation &O) const {
+    return LineOffset < O.LineOffset ||
+           (LineOffset == O.LineOffset && Discriminator < O.Discriminator);
+  }
+
+  uint32_t LineOffset;
+  uint32_t Discriminator;
 };
 
-} // End namespace sampleprof
+raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
-template <> struct DenseMapInfo<sampleprof::LineLocation> {
-  typedef DenseMapInfo<int> OffsetInfo;
-  typedef DenseMapInfo<unsigned> DiscriminatorInfo;
-  static inline sampleprof::LineLocation getEmptyKey() {
-    return sampleprof::LineLocation(OffsetInfo::getEmptyKey(),
-                                    DiscriminatorInfo::getEmptyKey());
-  }
-  static inline sampleprof::LineLocation getTombstoneKey() {
-    return sampleprof::LineLocation(OffsetInfo::getTombstoneKey(),
-                                    DiscriminatorInfo::getTombstoneKey());
-  }
-  static inline unsigned getHashValue(sampleprof::LineLocation Val) {
-    return DenseMapInfo<std::pair<int, unsigned>>::getHashValue(
-        std::pair<int, unsigned>(Val.LineOffset, Val.Discriminator));
-  }
-  static inline bool isEqual(sampleprof::LineLocation LHS,
-                             sampleprof::LineLocation RHS) {
-    return LHS.LineOffset == RHS.LineOffset &&
-           LHS.Discriminator == RHS.Discriminator;
-  }
+/// Represents the relative location of a callsite.
+///
+/// Callsite locations are specified by the line offset from the
+/// beginning of the function (marked by the line where the function
+/// head is), the discriminator value within that line, and the callee
+/// function name.
+struct CallsiteLocation : public LineLocation {
+  CallsiteLocation(uint32_t L, uint32_t D, StringRef N)
+      : LineLocation(L, D), CalleeName(N) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+  StringRef CalleeName;
 };
 
-namespace sampleprof {
+raw_ostream &operator<<(raw_ostream &OS, const CallsiteLocation &Loc);
 
 /// Representation of a single sample record.
 ///
@@ -112,52 +129,79 @@ namespace sampleprof {
 /// will be a list of one or more functions.
 class SampleRecord {
 public:
-  typedef StringMap<unsigned> CallTargetMap;
+  typedef StringMap<uint64_t> CallTargetMap;
 
   SampleRecord() : NumSamples(0), CallTargets() {}
 
   /// Increment the number of samples for this record by \p S.
+  /// Optionally scale sample count \p S by \p Weight.
   ///
   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
   /// around unsigned integers.
-  void addSamples(unsigned S) {
-    if (NumSamples <= std::numeric_limits<unsigned>::max() - S)
-      NumSamples += S;
-    else
-      NumSamples = std::numeric_limits<unsigned>::max();
+  sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
+    bool Overflowed;
+    if (Weight > 1) {
+      S = SaturatingMultiply(S, Weight, &Overflowed);
+      if (Overflowed)
+        return sampleprof_error::counter_overflow;
+    }
+    NumSamples = SaturatingAdd(NumSamples, S, &Overflowed);
+    if (Overflowed)
+      return sampleprof_error::counter_overflow;
+
+    return sampleprof_error::success;
   }
 
   /// Add called function \p F with samples \p S.
+  /// Optionally scale sample count \p S by \p Weight.
   ///
   /// Sample counts accumulate using saturating arithmetic, to avoid wrapping
   /// around unsigned integers.
-  void addCalledTarget(StringRef F, unsigned S) {
-    unsigned &TargetSamples = CallTargets[F];
-    if (TargetSamples <= std::numeric_limits<unsigned>::max() - S)
-      TargetSamples += S;
-    else
-      TargetSamples = std::numeric_limits<unsigned>::max();
+  sampleprof_error addCalledTarget(StringRef F, uint64_t S,
+                                   uint64_t Weight = 1) {
+    uint64_t &TargetSamples = CallTargets[F];
+    bool Overflowed;
+    if (Weight > 1) {
+      S = SaturatingMultiply(S, Weight, &Overflowed);
+      if (Overflowed)
+        return sampleprof_error::counter_overflow;
+    }
+    TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed);
+    if (Overflowed)
+      return sampleprof_error::counter_overflow;
+
+    return sampleprof_error::success;
   }
 
   /// Return true if this sample record contains function calls.
   bool hasCalls() const { return CallTargets.size() > 0; }
 
-  unsigned getSamples() const { return NumSamples; }
+  uint64_t getSamples() const { return NumSamples; }
   const CallTargetMap &getCallTargets() const { return CallTargets; }
 
   /// Merge the samples in \p Other into this record.
-  void merge(const SampleRecord &Other) {
-    addSamples(Other.getSamples());
-    for (const auto &I : Other.getCallTargets())
-      addCalledTarget(I.first(), I.second);
+  /// Optionally scale sample counts by \p Weight.
+  sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
+    sampleprof_error Result = addSamples(Other.getSamples(), Weight);
+    for (const auto &I : Other.getCallTargets()) {
+      MergeResult(Result, addCalledTarget(I.first(), I.second, Weight));
+    }
+    return Result;
   }
 
+  void print(raw_ostream &OS, unsigned Indent) const;
+  void dump() const;
+
 private:
-  unsigned NumSamples;
+  uint64_t NumSamples;
   CallTargetMap CallTargets;
 };
 
-typedef DenseMap<LineLocation, SampleRecord> BodySampleMap;
+raw_ostream &operator<<(raw_ostream &OS, const SampleRecord &Sample);
+
+typedef std::map<LineLocation, SampleRecord> BodySampleMap;
+class FunctionSamples;
+typedef std::map<CallsiteLocation, FunctionSamples> CallsiteSampleMap;
 
 /// Representation of the samples collected for a function.
 ///
@@ -167,59 +211,109 @@ typedef DenseMap<LineLocation, SampleRecord> BodySampleMap;
 class FunctionSamples {
 public:
   FunctionSamples() : TotalSamples(0), TotalHeadSamples(0) {}
-  void print(raw_ostream &OS = dbgs());
-  void addTotalSamples(unsigned Num) { TotalSamples += Num; }
-  void addHeadSamples(unsigned Num) { TotalHeadSamples += Num; }
-  void addBodySamples(int LineOffset, unsigned Discriminator, unsigned Num) {
-    assert(LineOffset >= 0);
-    // When dealing with instruction weights, we use the value
-    // zero to indicate the absence of a sample. If we read an
-    // actual zero from the profile file, use the value 1 to
-    // avoid the confusion later on.
-    if (Num == 0)
-      Num = 1;
-    BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(Num);
-  }
-  void addCalledTargetSamples(int LineOffset, unsigned Discriminator,
-                              std::string FName, unsigned Num) {
-    assert(LineOffset >= 0);
-    BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(FName,
-                                                                         Num);
-  }
+  void print(raw_ostream &OS = dbgs(), unsigned Indent = 0) const;
+  void dump() const;
+  sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
+    bool Overflowed;
+    if (Weight > 1) {
+      Num = SaturatingMultiply(Num, Weight, &Overflowed);
+      if (Overflowed)
+        return sampleprof_error::counter_overflow;
+    }
+    TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed);
+    if (Overflowed)
+      return sampleprof_error::counter_overflow;
 
-  /// Return the sample record at the given location.
-  /// Each location is specified by \p LineOffset and \p Discriminator.
-  SampleRecord &sampleRecordAt(const LineLocation &Loc) {
-    return BodySamples[Loc];
+    return sampleprof_error::success;
+  }
+  sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
+    bool Overflowed;
+    if (Weight > 1) {
+      Num = SaturatingMultiply(Num, Weight, &Overflowed);
+      if (Overflowed)
+        return sampleprof_error::counter_overflow;
+    }
+    TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed);
+    if (Overflowed)
+      return sampleprof_error::counter_overflow;
+
+    return sampleprof_error::success;
+  }
+  sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
+                                  uint64_t Num, uint64_t Weight = 1) {
+    return BodySamples[LineLocation(LineOffset, Discriminator)].addSamples(
+        Num, Weight);
+  }
+  sampleprof_error addCalledTargetSamples(uint32_t LineOffset,
+                                          uint32_t Discriminator,
+                                          std::string FName, uint64_t Num,
+                                          uint64_t Weight = 1) {
+    return BodySamples[LineLocation(LineOffset, Discriminator)].addCalledTarget(
+        FName, Num, Weight);
   }
 
   /// Return the number of samples collected at the given location.
   /// Each location is specified by \p LineOffset and \p Discriminator.
-  unsigned samplesAt(int LineOffset, unsigned Discriminator) {
-    return sampleRecordAt(LineLocation(LineOffset, Discriminator)).getSamples();
+  /// If the location is not found in profile, return error.
+  ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
+                                  uint32_t Discriminator) const {
+    const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
+    if (ret == BodySamples.end())
+      return std::error_code();
+    else
+      return ret->second.getSamples();
   }
 
-  bool empty() const { return BodySamples.empty(); }
+  /// Return the function samples at the given callsite location.
+  FunctionSamples &functionSamplesAt(const CallsiteLocation &Loc) {
+    return CallsiteSamples[Loc];
+  }
+
+  /// Return a pointer to function samples at the given callsite location.
+  const FunctionSamples *
+  findFunctionSamplesAt(const CallsiteLocation &Loc) const {
+    auto iter = CallsiteSamples.find(Loc);
+    if (iter == CallsiteSamples.end()) {
+      return nullptr;
+    } else {
+      return &iter->second;
+    }
+  }
+
+  bool empty() const { return TotalSamples == 0; }
 
   /// Return the total number of samples collected inside the function.
-  unsigned getTotalSamples() const { return TotalSamples; }
+  uint64_t getTotalSamples() const { return TotalSamples; }
 
   /// Return the total number of samples collected at the head of the
   /// function.
-  unsigned getHeadSamples() const { return TotalHeadSamples; }
+  uint64_t getHeadSamples() const { return TotalHeadSamples; }
 
   /// Return all the samples collected in the body of the function.
   const BodySampleMap &getBodySamples() const { return BodySamples; }
 
+  /// Return all the callsite samples collected in the body of the function.
+  const CallsiteSampleMap &getCallsiteSamples() const {
+    return CallsiteSamples;
+  }
+
   /// Merge the samples in \p Other into this one.
-  void merge(const FunctionSamples &Other) {
-    addTotalSamples(Other.getTotalSamples());
-    addHeadSamples(Other.getHeadSamples());
+  /// Optionally scale samples by \p Weight.
+  sampleprof_error merge(const FunctionSamples &Other, uint64_t Weight = 1) {
+    sampleprof_error Result = sampleprof_error::success;
+    MergeResult(Result, addTotalSamples(Other.getTotalSamples(), Weight));
+    MergeResult(Result, addHeadSamples(Other.getHeadSamples(), Weight));
     for (const auto &I : Other.getBodySamples()) {
       const LineLocation &Loc = I.first;
       const SampleRecord &Rec = I.second;
-      sampleRecordAt(Loc).merge(Rec);
+      MergeResult(Result, BodySamples[Loc].merge(Rec, Weight));
     }
+    for (const auto &I : Other.getCallsiteSamples()) {
+      const CallsiteLocation &Loc = I.first;
+      const FunctionSamples &Rec = I.second;
+      MergeResult(Result, functionSamplesAt(Loc).merge(Rec, Weight));
+    }
+    return Result;
   }
 
 private:
@@ -227,12 +321,12 @@ private:
   ///
   /// Samples are cumulative, they include all the samples collected
   /// inside this function and all its inlined callees.
-  unsigned TotalSamples;
+  uint64_t TotalSamples;
 
   /// Total number of samples collected at the head of the function.
   /// This is an approximation of the number of calls made to this function
   /// at runtime.
-  unsigned TotalHeadSamples;
+  uint64_t TotalHeadSamples;
 
   /// Map instruction locations to collected samples.
   ///
@@ -240,10 +334,53 @@ private:
   /// collected at the corresponding line offset. All line locations
   /// are an offset from the start of the function.
   BodySampleMap BodySamples;
+
+  /// Map call sites to collected samples for the called function.
+  ///
+  /// Each entry in this map corresponds to all the samples
+  /// collected for the inlined function call at the given
+  /// location. For example, given:
+  ///
+  ///     void foo() {
+  ///  1    bar();
+  ///  ...
+  ///  8    baz();
+  ///     }
+  ///
+  /// If the bar() and baz() calls were inlined inside foo(), this
+  /// map will contain two entries.  One for all the samples collected
+  /// in the call to bar() at line offset 1, the other for all the samples
+  /// collected in the call to baz() at line offset 8.
+  CallsiteSampleMap CallsiteSamples;
 };
 
-} // End namespace sampleprof
+raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
 
-} // End namespace llvm
+/// Sort a LocationT->SampleT map by LocationT.
+///
+/// It produces a sorted list of <LocationT, SampleT> records by ascending
+/// order of LocationT.
+template <class LocationT, class SampleT> class SampleSorter {
+public:
+  typedef std::pair<const LocationT, SampleT> SamplesWithLoc;
+  typedef SmallVector<const SamplesWithLoc *, 20> SamplesWithLocList;
+
+  SampleSorter(const std::map<LocationT, SampleT> &Samples) {
+    for (const auto &I : Samples)
+      V.push_back(&I);
+    std::stable_sort(V.begin(), V.end(),
+                     [](const SamplesWithLoc *A, const SamplesWithLoc *B) {
+                       return A->first < B->first;
+                     });
+  }
+  const SamplesWithLocList &get() const { return V; }
+
+private:
+  SamplesWithLocList V;
+};
+
+} // end namespace sampleprof
+
+} // end namespace llvm
 
 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H_
diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h
index c082a1abe951..6db0fbb0e7ab 100644
--- a/include/llvm/ProfileData/SampleProfReader.h
+++ b/include/llvm/ProfileData/SampleProfReader.h
@@ -9,11 +9,181 @@
 //
 // This file contains definitions needed for reading sample profiles.
 //
+// NOTE: If you are making changes to this file format, please remember
+//       to document them in the Clang documentation at
+//       tools/clang/docs/UsersManual.rst.
+//
+// Text format
+// -----------
+//
+// Sample profiles are written as ASCII text. The file is divided into
+// sections, which correspond to each of the functions executed at runtime.
+// Each section has the following format
+//
+//     function1:total_samples:total_head_samples
+//      offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+//      offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+//      ...
+//      offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+//      offsetA[.discriminator]: fnA:num_of_total_samples
+//       offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
+//       ...
+//
+// This is a nested tree in which the identation represents the nesting level
+// of the inline stack. There are no blank lines in the file. And the spacing
+// within a single line is fixed. Additional spaces will result in an error
+// while reading the file.
+//
+// Any line starting with the '#' character is completely ignored.
+//
+// Inlined calls are represented with indentation. The Inline stack is a
+// stack of source locations in which the top of the stack represents the
+// leaf function, and the bottom of the stack represents the actual
+// symbol to which the instruction belongs.
+//
+// Function names must be mangled in order for the profile loader to
+// match them in the current translation unit. The two numbers in the
+// function header specify how many total samples were accumulated in the
+// function (first number), and the total number of samples accumulated
+// in the prologue of the function (second number). This head sample
+// count provides an indicator of how frequently the function is invoked.
+//
+// There are two types of lines in the function body.
+//
+// * Sampled line represents the profile information of a source location.
+// * Callsite line represents the profile information of a callsite.
+//
+// Each sampled line may contain several items. Some are optional (marked
+// below):
+//
+// a. Source line offset. This number represents the line number
+//    in the function where the sample was collected. The line number is
+//    always relative to the line where symbol of the function is
+//    defined. So, if the function has its header at line 280, the offset
+//    13 is at line 293 in the file.
+//
+//    Note that this offset should never be a negative number. This could
+//    happen in cases like macros. The debug machinery will register the
+//    line number at the point of macro expansion. So, if the macro was
+//    expanded in a line before the start of the function, the profile
+//    converter should emit a 0 as the offset (this means that the optimizers
+//    will not be able to associate a meaningful weight to the instructions
+//    in the macro).
+//
+// b. [OPTIONAL] Discriminator. This is used if the sampled program
+//    was compiled with DWARF discriminator support
+//    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
+//    DWARF discriminators are unsigned integer values that allow the
+//    compiler to distinguish between multiple execution paths on the
+//    same source line location.
+//
+//    For example, consider the line of code ``if (cond) foo(); else bar();``.
+//    If the predicate ``cond`` is true 80% of the time, then the edge
+//    into function ``foo`` should be considered to be taken most of the
+//    time. But both calls to ``foo`` and ``bar`` are at the same source
+//    line, so a sample count at that line is not sufficient. The
+//    compiler needs to know which part of that line is taken more
+//    frequently.
+//
+//    This is what discriminators provide. In this case, the calls to
+//    ``foo`` and ``bar`` will be at the same line, but will have
+//    different discriminator values. This allows the compiler to correctly
+//    set edge weights into ``foo`` and ``bar``.
+//
+// c. Number of samples. This is an integer quantity representing the
+//    number of samples collected by the profiler at this source
+//    location.
+//
+// d. [OPTIONAL] Potential call targets and samples. If present, this
+//    line contains a call instruction. This models both direct and
+//    number of samples. For example,
+//
+//      130: 7  foo:3  bar:2  baz:7
+//
+//    The above means that at relative line offset 130 there is a call
+//    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
+//    with ``baz()`` being the relatively more frequently called target.
+//
+// Each callsite line may contain several items. Some are optional.
+//
+// a. Source line offset. This number represents the line number of the
+//    callsite that is inlined in the profiled binary.
+//
+// b. [OPTIONAL] Discriminator. Same as the discriminator for sampled line.
+//
+// c. Number of samples. This is an integer quantity representing the
+//    total number of samples collected for the inlined instance at this
+//    callsite
+//
+//
+// Binary format
+// -------------
+//
+// This is a more compact encoding. Numbers are encoded as ULEB128 values
+// and all strings are encoded in a name table. The file is organized in
+// the following sections:
+//
+// MAGIC (uint64_t)
+//    File identifier computed by function SPMagic() (0x5350524f463432ff)
+//
+// VERSION (uint32_t)
+//    File format version number computed by SPVersion()
+//
+// NAME TABLE
+//    SIZE (uint32_t)
+//        Number of entries in the name table.
+//    NAMES
+//        A NUL-separated list of SIZE strings.
+//
+// FUNCTION BODY (one for each uninlined function body present in the profile)
+//    HEAD_SAMPLES (uint64_t) [only for top-level functions]
+//        Total number of samples collected at the head (prologue) of the
+//        function.
+//        NOTE: This field should only be present for top-level functions
+//              (i.e., not inlined into any caller). Inlined function calls
+//              have no prologue, so they don't need this.
+//    NAME_IDX (uint32_t)
+//        Index into the name table indicating the function name.
+//    SAMPLES (uint64_t)
+//        Total number of samples collected in this function.
+//    NRECS (uint32_t)
+//        Total number of sampling records this function's profile.
+//    BODY RECORDS
+//        A list of NRECS entries. Each entry contains:
+//          OFFSET (uint32_t)
+//            Line offset from the start of the function.
+//          DISCRIMINATOR (uint32_t)
+//            Discriminator value (see description of discriminators
+//            in the text format documentation above).
+//          SAMPLES (uint64_t)
+//            Number of samples collected at this location.
+//          NUM_CALLS (uint32_t)
+//            Number of non-inlined function calls made at this location. In the
+//            case of direct calls, this number will always be 1. For indirect
+//            calls (virtual functions and function pointers) this will
+//            represent all the actual functions called at runtime.
+//          CALL_TARGETS
+//            A list of NUM_CALLS entries for each called function:
+//               NAME_IDX (uint32_t)
+//                  Index into the name table with the callee name.
+//               SAMPLES (uint64_t)
+//                  Number of samples collected at the call site.
+//    NUM_INLINED_FUNCTIONS (uint32_t)
+//      Number of callees inlined into this function.
+//    INLINED FUNCTION RECORDS
+//      A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
+//      callees.
+//        OFFSET (uint32_t)
+//          Line offset from the start of the function.
+//        DISCRIMINATOR (uint32_t)
+//          Discriminator value (see description of discriminators
+//          in the text format documentation above).
+//        FUNCTION BODY
+//          A FUNCTION BODY entry describing the inlined function.
 //===----------------------------------------------------------------------===//
 #ifndef LLVM_PROFILEDATA_SAMPLEPROFREADER_H
 #define LLVM_PROFILEDATA_SAMPLEPROFREADER_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
@@ -24,6 +194,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/GCOV.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -57,7 +228,7 @@ namespace sampleprof {
 ///
 /// The reader supports two file formats: text and binary. The text format
 /// is useful for debugging and testing, while the binary format is more
-/// compact. They can both be used interchangeably.
+/// compact and I/O efficient. They can both be used interchangeably.
 class SampleProfileReader {
 public:
   SampleProfileReader(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
@@ -86,7 +257,7 @@ public:
   StringMap<FunctionSamples> &getProfiles() { return Profiles; }
 
   /// \brief Report a parse error message.
-  void reportParseError(int64_t LineNumber, Twine Msg) const {
+  void reportError(int64_t LineNumber, Twine Msg) const {
     Ctx.diagnose(DiagnosticInfoSampleProfile(Buffer->getBufferIdentifier(),
                                              LineNumber, Msg));
   }
@@ -95,6 +266,10 @@ public:
   static ErrorOr<std::unique_ptr<SampleProfileReader>>
   create(StringRef Filename, LLVMContext &C);
 
+  /// \brief Create a sample profile reader from the supplied memory buffer.
+  static ErrorOr<std::unique_ptr<SampleProfileReader>>
+  create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C);
+
 protected:
   /// \brief Map every function to its associated profile.
   ///
@@ -120,6 +295,9 @@ public:
 
   /// \brief Read sample profiles from the associated file.
   std::error_code read() override;
+
+  /// \brief Return true if \p Buffer is in the format supported by this class.
+  static bool hasFormat(const MemoryBuffer &Buffer);
 };
 
 class SampleProfileReaderBinary : public SampleProfileReader {
@@ -153,14 +331,75 @@ protected:
   /// \returns the read value.
   ErrorOr<StringRef> readString();
 
+  /// Read a string indirectly via the name table.
+  ErrorOr<StringRef> readStringFromTable();
+
   /// \brief Return true if we've reached the end of file.
   bool at_eof() const { return Data >= End; }
 
+  /// Read the contents of the given profile instance.
+  std::error_code readProfile(FunctionSamples &FProfile);
+
   /// \brief Points to the current location in the buffer.
   const uint8_t *Data;
 
   /// \brief Points to the end of the buffer.
   const uint8_t *End;
+
+  /// Function name table.
+  std::vector<StringRef> NameTable;
+};
+
+typedef SmallVector<FunctionSamples *, 10> InlineCallStack;
+
+// Supported histogram types in GCC.  Currently, we only need support for
+// call target histograms.
+enum HistType {
+  HIST_TYPE_INTERVAL,
+  HIST_TYPE_POW2,
+  HIST_TYPE_SINGLE_VALUE,
+  HIST_TYPE_CONST_DELTA,
+  HIST_TYPE_INDIR_CALL,
+  HIST_TYPE_AVERAGE,
+  HIST_TYPE_IOR,
+  HIST_TYPE_INDIR_CALL_TOPN
+};
+
+class SampleProfileReaderGCC : public SampleProfileReader {
+public:
+  SampleProfileReaderGCC(std::unique_ptr<MemoryBuffer> B, LLVMContext &C)
+      : SampleProfileReader(std::move(B), C), GcovBuffer(Buffer.get()) {}
+
+  /// \brief Read and validate the file header.
+  std::error_code readHeader() override;
+
+  /// \brief Read sample profiles from the associated file.
+  std::error_code read() override;
+
+  /// \brief Return true if \p Buffer is in the format supported by this class.
+  static bool hasFormat(const MemoryBuffer &Buffer);
+
+protected:
+  std::error_code readNameTable();
+  std::error_code readOneFunctionProfile(const InlineCallStack &InlineStack,
+                                         bool Update, uint32_t Offset);
+  std::error_code readFunctionProfiles();
+  std::error_code skipNextWord();
+  template <typename T> ErrorOr<T> readNumber();
+  ErrorOr<StringRef> readString();
+
+  /// \brief Read the section tag and check that it's the same as \p Expected.
+  std::error_code readSectionTag(uint32_t Expected);
+
+  /// GCOV buffer containing the profile.
+  GCOVBuffer GcovBuffer;
+
+  /// Function names in this profile.
+  std::vector<std::string> Names;
+
+  /// GCOV tags used to separate sections in the profile file.
+  static const uint32_t GCOVTagAFDOFileNames = 0xaa000000;
+  static const uint32_t GCOVTagAFDOFunction = 0xac000000;
 };
 
 } // End namespace sampleprof
diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h
index 302a82d32861..029dd2ebacb0 100644
--- a/include/llvm/ProfileData/SampleProfWriter.h
+++ b/include/llvm/ProfileData/SampleProfWriter.h
@@ -13,9 +13,8 @@
 #ifndef LLVM_PROFILEDATA_SAMPLEPROFWRITER_H
 #define LLVM_PROFILEDATA_SAMPLEPROFWRITER_H
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Module.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
@@ -30,77 +29,102 @@ enum SampleProfileFormat { SPF_None = 0, SPF_Text, SPF_Binary, SPF_GCC };
 /// \brief Sample-based profile writer. Base class.
 class SampleProfileWriter {
 public:
-  SampleProfileWriter(StringRef Filename, std::error_code &EC,
-                      sys::fs::OpenFlags Flags)
-      : OS(Filename, EC, Flags) {}
   virtual ~SampleProfileWriter() {}
 
-  /// \brief Write sample profiles in \p S for function \p FName.
+  /// Write sample profiles in \p S for function \p FName.
   ///
-  /// \returns true if the file was updated successfully. False, otherwise.
-  virtual bool write(StringRef FName, const FunctionSamples &S) = 0;
+  /// \returns status code of the file update operation.
+  virtual std::error_code write(StringRef FName, const FunctionSamples &S) = 0;
 
-  /// \brief Write sample profiles in \p S for function \p F.
-  bool write(const Function &F, const FunctionSamples &S) {
-    return write(F.getName(), S);
-  }
-
-  /// \brief Write all the sample profiles for all the functions in \p M.
+  /// Write all the sample profiles in the given map of samples.
   ///
-  /// \returns true if the file was updated successfully. False, otherwise.
-  bool write(const Module &M, StringMap<FunctionSamples> &P) {
-    for (const auto &F : M) {
-      StringRef Name = F.getName();
-      if (!write(Name, P[Name]))
-        return false;
-    }
-    return true;
-  }
+  /// \returns status code of the file update operation.
+  std::error_code write(const StringMap<FunctionSamples> &ProfileMap) {
+    if (std::error_code EC = writeHeader(ProfileMap))
+      return EC;
 
-  /// \brief Write all the sample profiles in the given map of samples.
-  ///
-  /// \returns true if the file was updated successfully. False, otherwise.
-  bool write(StringMap<FunctionSamples> &ProfileMap) {
-    for (auto &I : ProfileMap) {
+    for (const auto &I : ProfileMap) {
       StringRef FName = I.first();
-      FunctionSamples &Profile = I.second;
-      if (!write(FName, Profile))
-        return false;
+      const FunctionSamples &Profile = I.second;
+      if (std::error_code EC = write(FName, Profile))
+        return EC;
     }
-    return true;
+    return sampleprof_error::success;
   }
 
-  /// \brief Profile writer factory. Create a new writer based on the value of
-  /// \p Format.
+  raw_ostream &getOutputStream() { return *OutputStream; }
+
+  /// Profile writer factory.
+  ///
+  /// Create a new file writer based on the value of \p Format.
   static ErrorOr<std::unique_ptr<SampleProfileWriter>>
   create(StringRef Filename, SampleProfileFormat Format);
 
+  /// Create a new stream writer based on the value of \p Format.
+  /// For testing.
+  static ErrorOr<std::unique_ptr<SampleProfileWriter>>
+  create(std::unique_ptr<raw_ostream> &OS, SampleProfileFormat Format);
+
 protected:
+  SampleProfileWriter(std::unique_ptr<raw_ostream> &OS)
+      : OutputStream(std::move(OS)) {}
+
+  /// \brief Write a file header for the profile file.
+  virtual std::error_code
+  writeHeader(const StringMap<FunctionSamples> &ProfileMap) = 0;
+
   /// \brief Output stream where to emit the profile to.
-  raw_fd_ostream OS;
+  std::unique_ptr<raw_ostream> OutputStream;
 };
 
 /// \brief Sample-based profile writer (text format).
 class SampleProfileWriterText : public SampleProfileWriter {
 public:
-  SampleProfileWriterText(StringRef F, std::error_code &EC)
-      : SampleProfileWriter(F, EC, sys::fs::F_Text) {}
+  std::error_code write(StringRef FName, const FunctionSamples &S) override;
 
-  bool write(StringRef FName, const FunctionSamples &S) override;
-  bool write(const Module &M, StringMap<FunctionSamples> &P) {
-    return SampleProfileWriter::write(M, P);
+protected:
+  SampleProfileWriterText(std::unique_ptr<raw_ostream> &OS)
+      : SampleProfileWriter(OS), Indent(0) {}
+
+  std::error_code
+  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override {
+    return sampleprof_error::success;
   }
+
+private:
+  /// Indent level to use when writing.
+  ///
+  /// This is used when printing inlined callees.
+  unsigned Indent;
+
+  friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
+  SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+                              SampleProfileFormat Format);
 };
 
 /// \brief Sample-based profile writer (binary format).
 class SampleProfileWriterBinary : public SampleProfileWriter {
 public:
-  SampleProfileWriterBinary(StringRef F, std::error_code &EC);
+  std::error_code write(StringRef F, const FunctionSamples &S) override;
 
-  bool write(StringRef F, const FunctionSamples &S) override;
-  bool write(const Module &M, StringMap<FunctionSamples> &P) {
-    return SampleProfileWriter::write(M, P);
-  }
+protected:
+  SampleProfileWriterBinary(std::unique_ptr<raw_ostream> &OS)
+      : SampleProfileWriter(OS), NameTable() {}
+
+  std::error_code
+  writeHeader(const StringMap<FunctionSamples> &ProfileMap) override;
+  std::error_code writeNameIdx(StringRef FName);
+  std::error_code writeBody(StringRef FName, const FunctionSamples &S);
+
+private:
+  void addName(StringRef FName);
+  void addNames(const FunctionSamples &S);
+
+  MapVector<StringRef, uint32_t> NameTable;
+
+  friend ErrorOr<std::unique_ptr<SampleProfileWriter>>
+  SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+                              SampleProfileFormat Format);
 };
 
 } // End namespace sampleprof
diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def
new file mode 100644
index 000000000000..2f99b0717adf
--- /dev/null
+++ b/include/llvm/Support/ARMTargetParser.def
@@ -0,0 +1,223 @@
+//===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides defines to build up the ARM target parser's logic.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef ARM_FPU
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION)
+#endif
+ARM_FPU("invalid", FK_INVALID, FV_NONE, NS_None, FR_None)
+ARM_FPU("none", FK_NONE, FV_NONE, NS_None, FR_None)
+ARM_FPU("vfp", FK_VFP, FV_VFPV2, NS_None, FR_None)
+ARM_FPU("vfpv2", FK_VFPV2, FV_VFPV2, NS_None, FR_None)
+ARM_FPU("vfpv3", FK_VFPV3, FV_VFPV3, NS_None, FR_None)
+ARM_FPU("vfpv3-fp16", FK_VFPV3_FP16, FV_VFPV3_FP16, NS_None, FR_None)
+ARM_FPU("vfpv3-d16", FK_VFPV3_D16, FV_VFPV3, NS_None, FR_D16)
+ARM_FPU("vfpv3-d16-fp16", FK_VFPV3_D16_FP16, FV_VFPV3_FP16, NS_None, FR_D16)
+ARM_FPU("vfpv3xd", FK_VFPV3XD, FV_VFPV3, NS_None, FR_SP_D16)
+ARM_FPU("vfpv3xd-fp16", FK_VFPV3XD_FP16, FV_VFPV3_FP16, NS_None, FR_SP_D16)
+ARM_FPU("vfpv4", FK_VFPV4, FV_VFPV4, NS_None, FR_None)
+ARM_FPU("vfpv4-d16", FK_VFPV4_D16, FV_VFPV4, NS_None, FR_D16)
+ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FV_VFPV4, NS_None, FR_SP_D16)
+ARM_FPU("fpv5-d16", FK_FPV5_D16, FV_VFPV5, NS_None, FR_D16)
+ARM_FPU("fpv5-sp-d16", FK_FPV5_SP_D16, FV_VFPV5, NS_None, FR_SP_D16)
+ARM_FPU("fp-armv8", FK_FP_ARMV8, FV_VFPV5, NS_None, FR_None)
+ARM_FPU("neon", FK_NEON, FV_VFPV3, NS_Neon, FR_None)
+ARM_FPU("neon-fp16", FK_NEON_FP16, FV_VFPV3_FP16, NS_Neon, FR_None)
+ARM_FPU("neon-vfpv4", FK_NEON_VFPV4, FV_VFPV4, NS_Neon, FR_None)
+ARM_FPU("neon-fp-armv8", FK_NEON_FP_ARMV8, FV_VFPV5, NS_Neon, FR_None)
+ARM_FPU("crypto-neon-fp-armv8", FK_CRYPTO_NEON_FP_ARMV8, FV_VFPV5, NS_Crypto,
+        FR_None)
+ARM_FPU("softvfp", FK_SOFTVFP, FV_NONE, NS_None, FR_None)
+#undef ARM_FPU
+
+#ifndef ARM_ARCH
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)
+#endif
+ARM_ARCH("invalid", AK_INVALID, nullptr, nullptr,
+          ARMBuildAttrs::CPUArch::Pre_v4, FK_NONE, AEK_NONE)
+ARM_ARCH("armv2", AK_ARMV2, "2", "v2", ARMBuildAttrs::CPUArch::Pre_v4,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv2a", AK_ARMV2A, "2A", "v2a", ARMBuildAttrs::CPUArch::Pre_v4,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv3", AK_ARMV3, "3", "v3", ARMBuildAttrs::CPUArch::Pre_v4,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv3m", AK_ARMV3M, "3M", "v3m", ARMBuildAttrs::CPUArch::Pre_v4,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv4", AK_ARMV4, "4", "v4", ARMBuildAttrs::CPUArch::v4,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv4t", AK_ARMV4T, "4T", "v4t", ARMBuildAttrs::CPUArch::v4T,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv5t", AK_ARMV5T, "5T", "v5", ARMBuildAttrs::CPUArch::v5T,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv5te", AK_ARMV5TE, "5TE", "v5e", ARMBuildAttrs::CPUArch::v5TE,
+          FK_NONE, AEK_DSP)
+ARM_ARCH("armv5tej", AK_ARMV5TEJ, "5TEJ", "v5e", ARMBuildAttrs::CPUArch::v5TEJ,
+          FK_NONE, AEK_DSP)
+ARM_ARCH("armv6", AK_ARMV6, "6", "v6", ARMBuildAttrs::CPUArch::v6,
+          FK_VFPV2, AEK_DSP)
+ARM_ARCH("armv6k", AK_ARMV6K, "6K", "v6k", ARMBuildAttrs::CPUArch::v6K,
+          FK_VFPV2, AEK_DSP)
+ARM_ARCH("armv6t2", AK_ARMV6T2, "6T2", "v6t2", ARMBuildAttrs::CPUArch::v6T2,
+          FK_NONE, AEK_DSP)
+ARM_ARCH("armv6kz", AK_ARMV6KZ, "6KZ", "v6kz", ARMBuildAttrs::CPUArch::v6KZ,
+          FK_VFPV2, (AEK_SEC | AEK_DSP))
+ARM_ARCH("armv6-m", AK_ARMV6M, "6-M", "v6m", ARMBuildAttrs::CPUArch::v6_M,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv7-a", AK_ARMV7A, "7-A", "v7", ARMBuildAttrs::CPUArch::v7,
+          FK_NEON, AEK_DSP)
+ARM_ARCH("armv7-r", AK_ARMV7R, "7-R", "v7r", ARMBuildAttrs::CPUArch::v7,
+          FK_NONE, (AEK_HWDIV | AEK_DSP))
+ARM_ARCH("armv7-m", AK_ARMV7M, "7-M", "v7m", ARMBuildAttrs::CPUArch::v7,
+          FK_NONE, AEK_HWDIV)
+ARM_ARCH("armv7e-m", AK_ARMV7EM, "7E-M", "v7em", ARMBuildAttrs::CPUArch::v7E_M,
+          FK_NONE, (AEK_HWDIV | AEK_DSP))
+ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8,
+          FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+                             AEK_HWDIV | AEK_DSP | AEK_CRC))
+ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a", ARMBuildAttrs::CPUArch::v8,
+          FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+                             AEK_HWDIV | AEK_DSP | AEK_CRC))
+ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a", ARMBuildAttrs::CPUArch::v8,
+          FK_CRYPTO_NEON_FP_ARMV8, (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM |
+                             AEK_HWDIV | AEK_DSP | AEK_CRC))
+// Non-standard Arch names.
+ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("iwmmxt2", AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("xscale", AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE,
+          FK_NONE, AEK_NONE)
+ARM_ARCH("armv7s", AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7,
+          FK_NEON_VFPV4, AEK_DSP)
+ARM_ARCH("armv7k", AK_ARMV7K, "7-K", "v7k", ARMBuildAttrs::CPUArch::v7,
+          FK_NONE, AEK_DSP)
+#undef ARM_ARCH
+
+#ifndef ARM_ARCH_EXT_NAME
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
+#endif
+// FIXME: This would be nicer were it tablegen
+ARM_ARCH_EXT_NAME("invalid",  AEK_INVALID,  nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("none",     AEK_NONE,     nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("crc",      AEK_CRC,      "+crc",   "-crc")
+ARM_ARCH_EXT_NAME("crypto",   AEK_CRYPTO,   "+crypto","-crypto")
+ARM_ARCH_EXT_NAME("fp",       AEK_FP,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("idiv",     (AEK_HWDIVARM | AEK_HWDIV), nullptr, nullptr)
+ARM_ARCH_EXT_NAME("mp",       AEK_MP,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("simd",     AEK_SIMD,     nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("sec",      AEK_SEC,      nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("virt",     AEK_VIRT,     nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("fp16",     AEK_FP16,     "+fullfp16",  "-fullfp16")
+ARM_ARCH_EXT_NAME("os",       AEK_OS,       nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("iwmmxt",   AEK_IWMMXT,   nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("iwmmxt2",  AEK_IWMMXT2,  nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("maverick", AEK_MAVERICK, nullptr,  nullptr)
+ARM_ARCH_EXT_NAME("xscale",   AEK_XSCALE,   nullptr,  nullptr)
+#undef ARM_ARCH_EXT_NAME
+
+#ifndef ARM_HW_DIV_NAME
+#define ARM_HW_DIV_NAME(NAME, ID)
+#endif
+ARM_HW_DIV_NAME("invalid", AEK_INVALID)
+ARM_HW_DIV_NAME("none", AEK_NONE)
+ARM_HW_DIV_NAME("thumb", AEK_HWDIV)
+ARM_HW_DIV_NAME("arm", AEK_HWDIVARM)
+ARM_HW_DIV_NAME("arm,thumb", (AEK_HWDIVARM | AEK_HWDIV))
+#undef ARM_HW_DIV_NAME
+
+#ifndef ARM_CPU_NAME
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT)
+#endif
+ARM_CPU_NAME("arm2", AK_ARMV2, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm3", AK_ARMV2A, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm6", AK_ARMV3, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm7m", AK_ARMV3M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm8", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm810", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm", AK_ARMV4, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("strongarm110", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm1100", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("strongarm1110", AK_ARMV4, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm7tdmi", AK_ARMV4T, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm7tdmi-s", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm710t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm720t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9tdmi", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm920", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm920t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm922t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9312", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm940t", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("ep9312", AK_ARMV4T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm10tdmi", AK_ARMV5T, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1020t", AK_ARMV5T, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm9e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm946e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm966e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm968e-s", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm10e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1020e", AK_ARMV5TE, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1022e", AK_ARMV5TE, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm926ej-s", AK_ARMV5TEJ, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1136j-s", AK_ARMV6, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1136jf-s", AK_ARMV6, FK_VFPV2, true, AEK_NONE)
+ARM_CPU_NAME("arm1136jz-s", AK_ARMV6, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1176j-s", AK_ARMV6K, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1176jz-s", AK_ARMV6KZ, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("mpcore", AK_ARMV6K, FK_VFPV2, false, AEK_NONE)
+ARM_CPU_NAME("mpcorenovfp", AK_ARMV6K, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("arm1176jzf-s", AK_ARMV6KZ, FK_VFPV2, true, AEK_NONE)
+ARM_CPU_NAME("arm1156t2-s", AK_ARMV6T2, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("arm1156t2f-s", AK_ARMV6T2, FK_VFPV2, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m0", AK_ARMV6M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m0plus", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m1", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("sc000", AK_ARMV6M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-a5", AK_ARMV7A, FK_NEON_VFPV4, false, (AEK_SEC | AEK_MP))
+ARM_CPU_NAME("cortex-a7", AK_ARMV7A, FK_NEON_VFPV4, false,
+             (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a8", AK_ARMV7A, FK_NEON, true, AEK_SEC)
+ARM_CPU_NAME("cortex-a9", AK_ARMV7A, FK_NEON_FP16, false, (AEK_SEC | AEK_MP))
+ARM_CPU_NAME("cortex-a12", AK_ARMV7A, FK_NEON_VFPV4, false,
+             (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a15", AK_ARMV7A, FK_NEON_VFPV4, false,
+             (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-a17", AK_ARMV7A, FK_NEON_VFPV4, false,
+             (AEK_SEC | AEK_MP | AEK_VIRT | AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("krait", AK_ARMV7A, FK_NEON_VFPV4, false,
+             (AEK_HWDIVARM | AEK_HWDIV))
+ARM_CPU_NAME("cortex-r4", AK_ARMV7R, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-r4f", AK_ARMV7R, FK_VFPV3_D16, false, AEK_NONE)
+ARM_CPU_NAME("cortex-r5", AK_ARMV7R, FK_VFPV3_D16, false,
+             (AEK_MP | AEK_HWDIVARM))
+ARM_CPU_NAME("cortex-r7", AK_ARMV7R, FK_VFPV3_D16_FP16, false,
+             (AEK_MP | AEK_HWDIVARM))
+ARM_CPU_NAME("sc300", AK_ARMV7M, FK_NONE, false, AEK_NONE)
+ARM_CPU_NAME("cortex-m3", AK_ARMV7M, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m4", AK_ARMV7EM, FK_FPV4_SP_D16, true, AEK_NONE)
+ARM_CPU_NAME("cortex-m7", AK_ARMV7EM, FK_FPV5_D16, false, AEK_NONE)
+ARM_CPU_NAME("cortex-a35", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cortex-a53", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, true, AEK_CRC)
+ARM_CPU_NAME("cortex-a57", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, AEK_CRC)
+// Non-standard Arch names.
+ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, AEK_NONE)
+ARM_CPU_NAME("swift", AK_ARMV7S, FK_NEON_VFPV4, true,
+             (AEK_HWDIVARM | AEK_HWDIV))
+// Invalid CPU
+ARM_CPU_NAME("invalid", AK_INVALID, FK_INVALID, true, AEK_INVALID)
+#undef ARM_CPU_NAME
diff --git a/include/llvm/Support/AlignOf.h b/include/llvm/Support/AlignOf.h
index 07da02d063c7..5268c8d16986 100644
--- a/include/llvm/Support/AlignOf.h
+++ b/include/llvm/Support/AlignOf.h
@@ -17,9 +17,15 @@
 
 #include "llvm/Support/Compiler.h"
 #include <cstddef>
+#include <type_traits>
 
 namespace llvm {
-template <typename T>
+
+namespace detail {
+
+// For everything other than an abstract class we can calulate alignment by
+// building a class with a single character and a member of the given type.
+template <typename T, bool = std::is_abstract<T>::value>
 struct AlignmentCalcImpl {
   char x;
 #if defined(_MSC_VER)
@@ -35,6 +41,25 @@ private:
   AlignmentCalcImpl() {} // Never instantiate.
 };
 
+// Abstract base class helper, this will have the minimal alignment and size
+// for any abstract class. We don't even define its destructor because this
+// type should never be used in a way that requires it.
+struct AlignmentCalcImplBase {
+  virtual ~AlignmentCalcImplBase() = 0;
+};
+
+// When we have an abstract class type, specialize the alignment computation
+// engine to create another abstract class that derives from both an empty
+// abstract base class and the provided type. This has the same effect as the
+// above except that it handles the fact that we can't actually create a member
+// of type T.
+template <typename T>
+struct AlignmentCalcImpl<T, true> : AlignmentCalcImplBase, T {
+  virtual ~AlignmentCalcImpl() = 0;
+};
+
+} // End detail namespace.
+
 /// AlignOf - A templated class that contains an enum value representing
 ///  the alignment of the template argument.  For example,
 ///  AlignOf<int>::Alignment represents the alignment of type "int".  The
@@ -50,11 +75,13 @@ struct AlignOf {
   //   llvm::AlignOf<Y>::<anonymous>' [-Wenum-compare]
   // by using constexpr instead of enum.
   // (except on MSVC, since it doesn't support constexpr yet).
-  static constexpr unsigned Alignment =
-      static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T));
+  static constexpr unsigned Alignment = static_cast<unsigned int>(
+      sizeof(detail::AlignmentCalcImpl<T>) - sizeof(T));
 #else
-  enum { Alignment =
-         static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T)) };
+  enum {
+    Alignment = static_cast<unsigned int>(sizeof(detail::AlignmentCalcImpl<T>) -
+                                          sizeof(T))
+  };
 #endif
   enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 };
   enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 };
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index f9b5cf22f97d..c608736fa956 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -222,6 +222,8 @@ public:
       // Without this, MemorySanitizer messages for values originated from here
       // will point to the allocation of the entire slab.
       __msan_allocated_memory(AlignedPtr, Size);
+      // Similarly, tell ASan about this space.
+      __asan_unpoison_memory_region(AlignedPtr, Size);
       return AlignedPtr;
     }
 
@@ -229,12 +231,16 @@ public:
     size_t PaddedSize = Size + Alignment - 1;
     if (PaddedSize > SizeThreshold) {
       void *NewSlab = Allocator.Allocate(PaddedSize, 0);
+      // We own the new slab and don't want anyone reading anyting other than
+      // pieces returned from this method.  So poison the whole slab.
+      __asan_poison_memory_region(NewSlab, PaddedSize);
       CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize));
 
       uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment);
       assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize);
       char *AlignedPtr = (char*)AlignedAddr;
       __msan_allocated_memory(AlignedPtr, Size);
+      __asan_unpoison_memory_region(AlignedPtr, Size);
       return AlignedPtr;
     }
 
@@ -246,13 +252,16 @@ public:
     char *AlignedPtr = (char*)AlignedAddr;
     CurPtr = AlignedPtr + Size;
     __msan_allocated_memory(AlignedPtr, Size);
+    __asan_unpoison_memory_region(AlignedPtr, Size);
     return AlignedPtr;
   }
 
   // Pull in base class overloads.
   using AllocatorBase<BumpPtrAllocatorImpl>::Allocate;
 
-  void Deallocate(const void * /*Ptr*/, size_t /*Size*/) {}
+  void Deallocate(const void *Ptr, size_t Size) {
+    __asan_poison_memory_region(Ptr, Size);
+  }
 
   // Pull in base class overloads.
   using AllocatorBase<BumpPtrAllocatorImpl>::Deallocate;
@@ -310,6 +319,10 @@ private:
     size_t AllocatedSlabSize = computeSlabSize(Slabs.size());
 
     void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0);
+    // We own the new slab and don't want anyone reading anything other than
+    // pieces returned from this method.  So poison the whole slab.
+    __asan_poison_memory_region(NewSlab, AllocatedSlabSize);
+
     Slabs.push_back(NewSlab);
     CurPtr = (char *)(NewSlab);
     End = ((char *)NewSlab) + AllocatedSlabSize;
diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h
index 4304a253b287..1b45cc52973f 100644
--- a/include/llvm/Support/BlockFrequency.h
+++ b/include/llvm/Support/BlockFrequency.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H
 #define LLVM_SUPPORT_BLOCKFREQUENCY_H
 
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
 class raw_ostream;
-class BranchProbability;
 
 // This class represents Block Frequency as a 64-bit value.
 class BlockFrequency {
@@ -37,34 +37,38 @@ public:
 
   /// \brief Multiplies with a branch probability. The computation will never
   /// overflow.
-  BlockFrequency &operator*=(const BranchProbability &Prob);
-  const BlockFrequency operator*(const BranchProbability &Prob) const;
+  BlockFrequency &operator*=(BranchProbability Prob);
+  BlockFrequency operator*(BranchProbability Prob) const;
 
   /// \brief Divide by a non-zero branch probability using saturating
   /// arithmetic.
-  BlockFrequency &operator/=(const BranchProbability &Prob);
-  BlockFrequency operator/(const BranchProbability &Prob) const;
+  BlockFrequency &operator/=(BranchProbability Prob);
+  BlockFrequency operator/(BranchProbability Prob) const;
 
   /// \brief Adds another block frequency using saturating arithmetic.
-  BlockFrequency &operator+=(const BlockFrequency &Freq);
-  const BlockFrequency operator+(const BlockFrequency &Freq) const;
+  BlockFrequency &operator+=(BlockFrequency Freq);
+  BlockFrequency operator+(BlockFrequency Freq) const;
+
+  /// \brief Subtracts another block frequency using saturating arithmetic.
+  BlockFrequency &operator-=(BlockFrequency Freq);
+  BlockFrequency operator-(BlockFrequency Freq) const;
 
   /// \brief Shift block frequency to the right by count digits saturating to 1.
   BlockFrequency &operator>>=(const unsigned count);
 
-  bool operator<(const BlockFrequency &RHS) const {
+  bool operator<(BlockFrequency RHS) const {
     return Frequency < RHS.Frequency;
   }
 
-  bool operator<=(const BlockFrequency &RHS) const {
+  bool operator<=(BlockFrequency RHS) const {
     return Frequency <= RHS.Frequency;
   }
 
-  bool operator>(const BlockFrequency &RHS) const {
+  bool operator>(BlockFrequency RHS) const {
     return Frequency > RHS.Frequency;
   }
 
-  bool operator>=(const BlockFrequency &RHS) const {
+  bool operator>=(BlockFrequency RHS) const {
     return Frequency >= RHS.Frequency;
   }
 };
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index a6429dd22a3b..26bc888d1cab 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -15,36 +15,59 @@
 #define LLVM_SUPPORT_BRANCHPROBABILITY_H
 
 #include "llvm/Support/DataTypes.h"
+#include <algorithm>
 #include <cassert>
+#include <climits>
+#include <numeric>
 
 namespace llvm {
 
 class raw_ostream;
 
-// This class represents Branch Probability as a non-negative fraction.
+// This class represents Branch Probability as a non-negative fraction that is
+// no greater than 1. It uses a fixed-point-like implementation, in which the
+// denominator is always a constant value (here we use 1<<31 for maximum
+// precision).
 class BranchProbability {
   // Numerator
   uint32_t N;
 
-  // Denominator
-  uint32_t D;
+  // Denominator, which is a constant value.
+  static const uint32_t D = 1u << 31;
+  static const uint32_t UnknownN = UINT32_MAX;
+
+  // Construct a BranchProbability with only numerator assuming the denominator
+  // is 1<<31. For internal use only.
+  explicit BranchProbability(uint32_t n) : N(n) {}
 
 public:
-  BranchProbability(uint32_t n, uint32_t d) : N(n), D(d) {
-    assert(d > 0 && "Denominator cannot be 0!");
-    assert(n <= d && "Probability cannot be bigger than 1!");
-  }
+  BranchProbability() : N(UnknownN) {}
+  BranchProbability(uint32_t Numerator, uint32_t Denominator);
 
-  static BranchProbability getZero() { return BranchProbability(0, 1); }
-  static BranchProbability getOne() { return BranchProbability(1, 1); }
+  bool isZero() const { return N == 0; }
+  bool isUnknown() const { return N == UnknownN; }
+
+  static BranchProbability getZero() { return BranchProbability(0); }
+  static BranchProbability getOne() { return BranchProbability(D); }
+  static BranchProbability getUnknown() { return BranchProbability(UnknownN); }
+  // Create a BranchProbability object with the given numerator and 1<<31
+  // as denominator.
+  static BranchProbability getRaw(uint32_t N) { return BranchProbability(N); }
+  // Create a BranchProbability object from 64-bit integers.
+  static BranchProbability getBranchProbability(uint64_t Numerator,
+                                                uint64_t Denominator);
+
+  // Normalize given probabilties so that the sum of them becomes approximate
+  // one.
+  template <class ProbabilityIter>
+  static void normalizeProbabilities(ProbabilityIter Begin,
+                                     ProbabilityIter End);
 
   uint32_t getNumerator() const { return N; }
-  uint32_t getDenominator() const { return D; }
+  static uint32_t getDenominator() { return D; }
 
   // Return (1 - Probability).
-  BranchProbability getCompl() const {
-    return BranchProbability(D - N, D);
-  }
+  BranchProbability getCompl() const { return BranchProbability(D - N); }
 
   raw_ostream &print(raw_ostream &OS) const;
 
@@ -66,24 +89,131 @@ public:
   /// \return \c Num divided by \c this.
   uint64_t scaleByInverse(uint64_t Num) const;
 
-  bool operator==(BranchProbability RHS) const {
-    return (uint64_t)N * RHS.D == (uint64_t)D * RHS.N;
+  BranchProbability &operator+=(BranchProbability RHS) {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    // Saturate the result in case of overflow.
+    N = (uint64_t(N) + RHS.N > D) ? D : N + RHS.N;
+    return *this;
   }
-  bool operator!=(BranchProbability RHS) const {
-    return !(*this == RHS);
+
+  BranchProbability &operator-=(BranchProbability RHS) {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    // Saturate the result in case of underflow.
+    N = N < RHS.N ? 0 : N - RHS.N;
+    return *this;
   }
+
+  BranchProbability &operator*=(BranchProbability RHS) {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    N = (static_cast<uint64_t>(N) * RHS.N + D / 2) / D;
+    return *this;
+  }
+
+  BranchProbability &operator/=(uint32_t RHS) {
+    assert(N != UnknownN &&
+           "Unknown probability cannot participate in arithmetics.");
+    assert(RHS > 0 && "The divider cannot be zero.");
+    N /= RHS;
+    return *this;
+  }
+
+  BranchProbability operator+(BranchProbability RHS) const {
+    BranchProbability Prob(*this);
+    return Prob += RHS;
+  }
+
+  BranchProbability operator-(BranchProbability RHS) const {
+    BranchProbability Prob(*this);
+    return Prob -= RHS;
+  }
+
+  BranchProbability operator*(BranchProbability RHS) const {
+    BranchProbability Prob(*this);
+    return Prob *= RHS;
+  }
+
+  BranchProbability operator/(uint32_t RHS) const {
+    BranchProbability Prob(*this);
+    return Prob /= RHS;
+  }
+
+  bool operator==(BranchProbability RHS) const { return N == RHS.N; }
+  bool operator!=(BranchProbability RHS) const { return !(*this == RHS); }
+
   bool operator<(BranchProbability RHS) const {
-    return (uint64_t)N * RHS.D < (uint64_t)D * RHS.N;
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in comparisons.");
+    return N < RHS.N;
+  }
+
+  bool operator>(BranchProbability RHS) const {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in comparisons.");
+    return RHS < *this;
+  }
+
+  bool operator<=(BranchProbability RHS) const {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in comparisons.");
+    return !(RHS < *this);
+  }
+
+  bool operator>=(BranchProbability RHS) const {
+    assert(N != UnknownN && RHS.N != UnknownN &&
+           "Unknown probability cannot participate in comparisons.");
+    return !(*this < RHS);
   }
-  bool operator>(BranchProbability RHS) const { return RHS < *this; }
-  bool operator<=(BranchProbability RHS) const { return !(RHS < *this); }
-  bool operator>=(BranchProbability RHS) const { return !(*this < RHS); }
 };
 
-inline raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
+inline raw_ostream &operator<<(raw_ostream &OS, BranchProbability Prob) {
   return Prob.print(OS);
 }
 
+template <class ProbabilityIter>
+void BranchProbability::normalizeProbabilities(ProbabilityIter Begin,
+                                               ProbabilityIter End) {
+  if (Begin == End)
+    return;
+
+  unsigned UnknownProbCount = 0;
+  uint64_t Sum = std::accumulate(Begin, End, uint64_t(0),
+                                 [&](uint64_t S, const BranchProbability &BP) {
+                                   if (!BP.isUnknown())
+                                     return S + BP.N;
+                                   UnknownProbCount++;
+                                   return S;
+                                 });
+
+  if (UnknownProbCount > 0) {
+    BranchProbability ProbForUnknown = BranchProbability::getZero();
+    // If the sum of all known probabilities is less than one, evenly distribute
+    // the complement of sum to unknown probabilities. Otherwise, set unknown
+    // probabilities to zeros and continue to normalize known probabilities.
+    if (Sum < BranchProbability::getDenominator())
+      ProbForUnknown = BranchProbability::getRaw(
+          (BranchProbability::getDenominator() - Sum) / UnknownProbCount);
+
+    std::replace_if(Begin, End,
+                    [](const BranchProbability &BP) { return BP.isUnknown(); },
+                    ProbForUnknown);
+
+    if (Sum <= BranchProbability::getDenominator())
+      return;
+  }
+ 
+  if (Sum == 0) {
+    BranchProbability BP(1, std::distance(Begin, End));
+    std::fill(Begin, End, BP);
+    return;
+  }
+
+  for (auto I = Begin; I != End; ++I)
+    I->N = (I->N * uint64_t(D) + Sum / 2) / Sum;
+}
+
 }
 
 #endif
diff --git a/include/llvm/Support/CBindingWrapping.h b/include/llvm/Support/CBindingWrapping.h
index 786ba183b3b0..d4633aa7d3c6 100644
--- a/include/llvm/Support/CBindingWrapping.h
+++ b/include/llvm/Support/CBindingWrapping.h
@@ -15,6 +15,7 @@
 #define LLVM_SUPPORT_CBINDINGWRAPPING_H
 
 #include "llvm/Support/Casting.h"
+#include "llvm-c/Types.h"
 
 #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)     \
   inline ty *unwrap(ref P) {                            \
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 3c5ee06969d0..0162175efe3e 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -88,6 +88,7 @@ namespace COFF {
     IMAGE_FILE_MACHINE_AMD64     = 0x8664,
     IMAGE_FILE_MACHINE_ARM       = 0x1C0,
     IMAGE_FILE_MACHINE_ARMNT     = 0x1C4,
+    IMAGE_FILE_MACHINE_ARM64     = 0xAA64,
     IMAGE_FILE_MACHINE_EBC       = 0xEBC,
     IMAGE_FILE_MACHINE_I386      = 0x14C,
     IMAGE_FILE_MACHINE_IA64      = 0x200,
@@ -247,6 +248,7 @@ namespace COFF {
   enum SectionCharacteristics : uint32_t {
     SC_Invalid = 0xffffffff,
 
+    IMAGE_SCN_TYPE_NOLOAD            = 0x00000002,
     IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
     IMAGE_SCN_CNT_CODE               = 0x00000020,
     IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 379d06a65741..943d2df37708 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -33,7 +33,6 @@
 
 namespace llvm {
 
-class BumpPtrStringSaver;
 class StringSaver;
 
 /// cl Namespace - This namespace contains all of the command line option
@@ -206,9 +205,9 @@ class Option {
   unsigned AdditionalVals; // Greater than 0 for multi-valued option.
 
 public:
-  const char *ArgStr;   // The argument string itself (ex: "help", "o")
-  const char *HelpStr;  // The descriptive text message for -help
-  const char *ValueStr; // String describing what the value of this option is
+  StringRef ArgStr;   // The argument string itself (ex: "help", "o")
+  StringRef HelpStr;  // The descriptive text message for -help
+  StringRef ValueStr; // String describing what the value of this option is
   OptionCategory *Category; // The Category this option belongs to
   bool FullyInitialized;    // Has addArguemnt been called?
 
@@ -229,14 +228,14 @@ public:
   inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
 
   // hasArgStr - Return true if the argstr != ""
-  bool hasArgStr() const { return ArgStr[0] != 0; }
+  bool hasArgStr() const { return !ArgStr.empty(); }
 
   //-------------------------------------------------------------------------===
   // Accessor functions set by OptionModifiers
   //
-  void setArgStr(const char *S);
-  void setDescription(const char *S) { HelpStr = S; }
-  void setValueStr(const char *S) { ValueStr = S; }
+  void setArgStr(StringRef S);
+  void setDescription(StringRef S) { HelpStr = S; }
+  void setValueStr(StringRef S) { ValueStr = S; }
   void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) { Occurrences = Val; }
   void setValueExpectedFlag(enum ValueExpected Val) { Value = Val; }
   void setHiddenFlag(enum OptionHidden Val) { HiddenFlag = Val; }
@@ -276,7 +275,7 @@ public:
 
   virtual void printOptionValue(size_t GlobalWidth, bool Force) const = 0;
 
-  virtual void getExtraOptionNames(SmallVectorImpl<const char *> &) {}
+  virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
 
   // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
   //
@@ -606,7 +605,7 @@ public:
 
   void initialize() {}
 
-  void getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) {
+  void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) {
     // If there has been no argstr specified, that means that we need to add an
     // argument for every possible option.  This ensures that our options are
     // vectored to us.
@@ -715,14 +714,14 @@ public:
 //
 class basic_parser_impl { // non-template implementation of basic_parser<t>
 public:
-  basic_parser_impl(Option &O) {}
+  basic_parser_impl(Option &) {}
 
 
   enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueRequired;
   }
 
-  void getExtraOptionNames(SmallVectorImpl<const char *> &) {}
+  void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
 
   void initialize() {}
 
@@ -1206,8 +1205,7 @@ class opt : public Option,
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
-  void
-  getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+  void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
@@ -1368,8 +1366,7 @@ class list : public Option, public list_storage<DataType, StorageClass> {
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
-  void
-  getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+  void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
@@ -1508,8 +1505,7 @@ class bits : public Option, public bits_storage<DataType, Storage> {
   enum ValueExpected getValueExpectedFlagDefault() const override {
     return Parser.getValueExpectedFlagDefault();
   }
-  void
-  getExtraOptionNames(SmallVectorImpl<const char *> &OptionNames) override {
+  void getExtraOptionNames(SmallVectorImpl<StringRef> &OptionNames) override {
     return Parser.getExtraOptionNames(OptionNames);
   }
 
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index 141639839cc2..b3416bbfffb6 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -69,7 +69,7 @@
 #if !defined(_MSC_VER) || defined(__clang__) || LLVM_MSC_PREREQ(1900)
 #define LLVM_NOEXCEPT noexcept
 #else
-#define LLVM_NOEXCEPT
+#define LLVM_NOEXCEPT throw()
 #endif
 
 /// \brief Does the compiler support ref-qualifiers for *this?
@@ -189,7 +189,7 @@
 /// 3.4 supported this but is buggy in various cases and produces unimplemented
 /// errors, just use it in GCC 4.0 and later.
 #if __has_attribute(always_inline) || LLVM_GNUC_PREREQ(4, 0, 0)
-#define LLVM_ATTRIBUTE_ALWAYS_INLINE inline __attribute__((always_inline))
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
 #elif defined(_MSC_VER)
 #define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
 #else
@@ -293,6 +293,34 @@
 # define LLVM_ALIGNAS(x) alignas(x)
 #endif
 
+/// \macro LLVM_PACKED
+/// \brief Used to specify a packed structure.
+/// LLVM_PACKED(
+///    struct A {
+///      int i;
+///      int j;
+///      int k;
+///      long long l;
+///   });
+///
+/// LLVM_PACKED_START
+/// struct B {
+///   int i;
+///   int j;
+///   int k;
+///   long long l;
+/// };
+/// LLVM_PACKED_END 
+#ifdef _MSC_VER
+# define LLVM_PACKED(d) __pragma(pack(push, 1)) d __pragma(pack(pop))
+# define LLVM_PACKED_START __pragma(pack(push, 1))
+# define LLVM_PACKED_END   __pragma(pack(pop))
+#else
+# define LLVM_PACKED(d) d __attribute__((packed))
+# define LLVM_PACKED_START _Pragma("pack(push, 1)")
+# define LLVM_PACKED_END   _Pragma("pack(pop)")
+#endif
+
 /// \macro LLVM_PTR_SIZE
 /// \brief A constant integer equivalent to the value of sizeof(void*).
 /// Generally used in combination with LLVM_ALIGNAS or when doing computation in
@@ -333,8 +361,50 @@
 /// \brief Whether LLVM itself is built with AddressSanitizer instrumentation.
 #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__)
 # define LLVM_ADDRESS_SANITIZER_BUILD 1
+# include <sanitizer/asan_interface.h>
 #else
 # define LLVM_ADDRESS_SANITIZER_BUILD 0
+# define __asan_poison_memory_region(p, size)
+# define __asan_unpoison_memory_region(p, size)
+#endif
+
+/// \macro LLVM_THREAD_SANITIZER_BUILD
+/// \brief Whether LLVM itself is built with ThreadSanitizer instrumentation.
+#if __has_feature(thread_sanitizer) || defined(__SANITIZE_THREAD__)
+# define LLVM_THREAD_SANITIZER_BUILD 1
+#else
+# define LLVM_THREAD_SANITIZER_BUILD 0
+#endif
+
+#if LLVM_THREAD_SANITIZER_BUILD
+// Thread Sanitizer is a tool that finds races in code.
+// See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
+// tsan detects these exact functions by name.
+extern "C" {
+void AnnotateHappensAfter(const char *file, int line, const volatile void *cv);
+void AnnotateHappensBefore(const char *file, int line, const volatile void *cv);
+void AnnotateIgnoreWritesBegin(const char *file, int line);
+void AnnotateIgnoreWritesEnd(const char *file, int line);
+}
+
+// This marker is used to define a happens-before arc. The race detector will
+// infer an arc from the begin to the end when they share the same pointer
+// argument.
+# define TsanHappensBefore(cv) AnnotateHappensBefore(__FILE__, __LINE__, cv)
+
+// This marker defines the destination of a happens-before arc.
+# define TsanHappensAfter(cv) AnnotateHappensAfter(__FILE__, __LINE__, cv)
+
+// Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
+# define TsanIgnoreWritesBegin() AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
+
+// Resume checking for racy writes.
+# define TsanIgnoreWritesEnd() AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
+#else
+# define TsanHappensBefore(cv)
+# define TsanHappensAfter(cv)
+# define TsanIgnoreWritesBegin()
+# define TsanIgnoreWritesEnd()
 #endif
 
 /// \brief Mark debug helper function definitions like dump() that should not be
diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h
index c08c3c1f0d21..1a1c74368761 100644
--- a/include/llvm/Support/CrashRecoveryContext.h
+++ b/include/llvm/Support/CrashRecoveryContext.h
@@ -39,8 +39,6 @@ class CrashRecoveryContextCleanup;
 ///
 ///      ... no crash was detected ...
 ///    }
-///
-/// Crash recovery contexts may not be nested.
 class CrashRecoveryContext {
   void *Impl;
   CrashRecoveryContextCleanup *head;
@@ -109,10 +107,11 @@ class CrashRecoveryContextCleanup {
 protected:
   CrashRecoveryContext *context;
   CrashRecoveryContextCleanup(CrashRecoveryContext *context)
-    : context(context), cleanupFired(false) {}
+      : context(context), cleanupFired(false) {}
+
 public:
   bool cleanupFired;
-  
+
   virtual ~CrashRecoveryContextCleanup();
   virtual void recoverResources() = 0;
 
@@ -129,15 +128,16 @@ template<typename DERIVED, typename T>
 class CrashRecoveryContextCleanupBase : public CrashRecoveryContextCleanup {
 protected:
   T *resource;
-  CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T* resource)
-    : CrashRecoveryContextCleanup(context), resource(resource) {}
+  CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T *resource)
+      : CrashRecoveryContextCleanup(context), resource(resource) {}
+
 public:
   static DERIVED *create(T *x) {
     if (x) {
       if (CrashRecoveryContext *context = CrashRecoveryContext::GetCurrent())
         return new DERIVED(context, x);
     }
-    return 0;
+    return nullptr;
   }
 };
 
@@ -146,9 +146,9 @@ class CrashRecoveryContextDestructorCleanup : public
   CrashRecoveryContextCleanupBase<CrashRecoveryContextDestructorCleanup<T>, T> {
 public:
   CrashRecoveryContextDestructorCleanup(CrashRecoveryContext *context,
-                                        T *resource) 
-    : CrashRecoveryContextCleanupBase<
-        CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
+                                        T *resource)
+      : CrashRecoveryContextCleanupBase<
+            CrashRecoveryContextDestructorCleanup<T>, T>(context, resource) {}
 
   virtual void recoverResources() {
     this->resource->~T();
@@ -171,7 +171,7 @@ class CrashRecoveryContextReleaseRefCleanup : public
   CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>, T>
 {
 public:
-  CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context, 
+  CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context,
                                         T *resource)
     : CrashRecoveryContextCleanupBase<CrashRecoveryContextReleaseRefCleanup<T>,
           T>(context, resource) {}
@@ -182,6 +182,7 @@ public:
 template <typename T, typename Cleanup = CrashRecoveryContextDeleteCleanup<T> >
 class CrashRecoveryContextCleanupRegistrar {
   CrashRecoveryContextCleanup *cleanup;
+
 public:
   CrashRecoveryContextCleanupRegistrar(T *x)
     : cleanup(Cleanup::create(x)) {
@@ -189,16 +190,14 @@ public:
       cleanup->getContext()->registerCleanup(cleanup);
   }
 
-  ~CrashRecoveryContextCleanupRegistrar() {
-    unregister();
-  }
-  
+  ~CrashRecoveryContextCleanupRegistrar() { unregister(); }
+
   void unregister() {
     if (cleanup && !cleanup->cleanupFired)
       cleanup->getContext()->unregisterCleanup(cleanup);
-    cleanup = 0;
+    cleanup = nullptr;
   }
 };
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 95e37c01d7d5..4381b5bf1633 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -72,11 +72,12 @@ public:
     return "";
   }
 
-  /// hasNodeAddressLabel - If this method returns true, the address of the node
-  /// is added to the label of the node.
-  template<typename GraphType>
-  static bool hasNodeAddressLabel(const void *, const GraphType &) {
-    return false;
+  // getNodeIdentifierLabel - Returns a string representing the
+  // address or other unique identifier of the node. (Only used if
+  // non-empty.)
+  template <typename GraphType>
+  static std::string getNodeIdentifierLabel(const void *, const GraphType &) {
+    return "";
   }
 
   template<typename GraphType>
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index fff4f986a6c0..6e213477d710 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -13,7 +13,7 @@
 //
 // In particular, just wrap your code with the DEBUG() macro, and it will be
 // enabled automatically if you specify '-debug' on the command-line.
-// Alternatively, you can also define the DEBUG_TYPE macro to "foo" specify
+// DEBUG() requires the DEBUG_TYPE macro to be defined. Set it to "foo" specify
 // that your debug code belongs to class "foo". Be careful that you only do
 // this after including Debug.h and not around any #include of headers. Headers
 // should define and undef the macro acround the code that needs to use the
diff --git a/include/llvm/Support/Dwarf.def b/include/llvm/Support/Dwarf.def
index 4b923b897e6f..b15070b3e9b0 100644
--- a/include/llvm/Support/Dwarf.def
+++ b/include/llvm/Support/Dwarf.def
@@ -99,10 +99,6 @@ HANDLE_DW_TAG(0x0041, type_unit)
 HANDLE_DW_TAG(0x0042, rvalue_reference_type)
 HANDLE_DW_TAG(0x0043, template_alias)
 
-// Mock tags we use as discriminators.
-HANDLE_DW_TAG(0x0100, auto_variable) // Tag for local (auto) variables.
-HANDLE_DW_TAG(0x0101, arg_variable)  // Tag for argument variables.
-
 // New in DWARF v5.
 HANDLE_DW_TAG(0x0044, coarray_type)
 HANDLE_DW_TAG(0x0045, generic_subrange)
@@ -117,6 +113,11 @@ HANDLE_DW_TAG(0x4106, GNU_template_template_param)
 HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack)
 HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack)
 HANDLE_DW_TAG(0x4200, APPLE_property)
+HANDLE_DW_TAG(0xb000, BORLAND_property)
+HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string)
+HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array)
+HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set)
+HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant)
 
 HANDLE_DW_OP(0x03, addr)
 HANDLE_DW_OP(0x06, deref)
@@ -319,6 +320,7 @@ HANDLE_DW_LANG(0x0021, C_plus_plus_14)
 HANDLE_DW_LANG(0x0022, Fortran03)
 HANDLE_DW_LANG(0x0023, Fortran08)
 HANDLE_DW_LANG(0x8001, Mips_Assembler)
+HANDLE_DW_LANG(0xb000, BORLAND_Delphi)
 
 // DWARF attribute type encodings.
 HANDLE_DW_ATE(0x01, address)
diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h
index 17e9c1540a41..cea61bd75833 100644
--- a/include/llvm/Support/Dwarf.h
+++ b/include/llvm/Support/Dwarf.h
@@ -40,6 +40,7 @@ enum LLVMConstants : uint32_t {
   // LLVM mock tags (see also llvm/Support/Dwarf.def).
   DW_TAG_invalid = ~0U,        // Tag for invalid results.
   DW_VIRTUALITY_invalid = ~0U, // Virtuality for invalid results.
+  DW_MACINFO_invalid = ~0U,    // Macinfo type for invalid results.
 
   // Other constants.
   DWARF_VERSION = 4,       // Default dwarf version we output.
@@ -195,6 +196,7 @@ enum Attribute : uint16_t {
   DW_AT_dwo_name = 0x76,
   DW_AT_reference = 0x77,
   DW_AT_rvalue_reference = 0x78,
+  DW_AT_macros = 0x79,
 
   DW_AT_lo_user = 0x2000,
   DW_AT_hi_user = 0x3fff,
@@ -230,6 +232,7 @@ enum Attribute : uint16_t {
   DW_AT_GNU_template_name = 0x2110,
 
   DW_AT_GNU_odr_signature = 0x210f,
+  DW_AT_GNU_macros = 0x2119,
 
   // Extensions for Fission proposal.
   DW_AT_GNU_dwo_name = 0x2130,
@@ -238,6 +241,26 @@ enum Attribute : uint16_t {
   DW_AT_GNU_addr_base = 0x2133,
   DW_AT_GNU_pubnames = 0x2134,
   DW_AT_GNU_pubtypes = 0x2135,
+  DW_AT_GNU_discriminator = 0x2136,
+
+  // Borland extensions.
+  DW_AT_BORLAND_property_read = 0x3b11,
+  DW_AT_BORLAND_property_write = 0x3b12,
+  DW_AT_BORLAND_property_implements = 0x3b13,
+  DW_AT_BORLAND_property_index = 0x3b14,
+  DW_AT_BORLAND_property_default = 0x3b15,
+  DW_AT_BORLAND_Delphi_unit = 0x3b20,
+  DW_AT_BORLAND_Delphi_class = 0x3b21,
+  DW_AT_BORLAND_Delphi_record = 0x3b22,
+  DW_AT_BORLAND_Delphi_metaclass = 0x3b23,
+  DW_AT_BORLAND_Delphi_constructor = 0x3b24,
+  DW_AT_BORLAND_Delphi_destructor = 0x3b25,
+  DW_AT_BORLAND_Delphi_anonymous_method = 0x3b26,
+  DW_AT_BORLAND_Delphi_interface = 0x3b27,
+  DW_AT_BORLAND_Delphi_ABI = 0x3b28,
+  DW_AT_BORLAND_Delphi_return = 0x3b29,
+  DW_AT_BORLAND_Delphi_frameptr = 0x3b30,
+  DW_AT_BORLAND_closure = 0x3b31,
 
   // LLVM project extensions.
   DW_AT_LLVM_include_path = 0x3e00,
@@ -370,6 +393,14 @@ enum CallingConvention {
   DW_CC_program = 0x02,
   DW_CC_nocall = 0x03,
   DW_CC_lo_user = 0x40,
+  DW_CC_GNU_borland_fastcall_i386 = 0x41,
+  DW_CC_BORLAND_safecall = 0xb0,
+  DW_CC_BORLAND_stdcall = 0xb1,
+  DW_CC_BORLAND_pascal = 0xb2,
+  DW_CC_BORLAND_msfastcall = 0xb3,
+  DW_CC_BORLAND_msreturn = 0xb4,
+  DW_CC_BORLAND_thiscall = 0xb5,
+  DW_CC_BORLAND_fastcall = 0xb6,
   DW_CC_hi_user = 0xff
 };
 
@@ -429,6 +460,24 @@ enum MacinfoRecordType {
   DW_MACINFO_vendor_ext = 0xff
 };
 
+enum MacroEntryType {
+  // Macro Information Entry Type Encodings
+  DW_MACRO_define = 0x01,
+  DW_MACRO_undef = 0x02,
+  DW_MACRO_start_file = 0x03,
+  DW_MACRO_end_file = 0x04,
+  DW_MACRO_define_indirect = 0x05,
+  DW_MACRO_undef_indirect = 0x06,
+  DW_MACRO_transparent_include = 0x07,
+  DW_MACRO_define_indirect_sup = 0x08,
+  DW_MACRO_undef_indirect_sup = 0x09,
+  DW_MACRO_transparent_include_sup = 0x0a,
+  DW_MACRO_define_indirectx = 0x0b,
+  DW_MACRO_undef_indirectx = 0x0c,
+  DW_MACRO_lo_user = 0xe0,
+  DW_MACRO_hi_user = 0xff
+};
+
 enum CallFrameInfo {
   // Call frame instruction encodings
   DW_CFA_extended = 0x00,
@@ -596,6 +645,7 @@ const char *GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage);
 ///
 /// \li \a getTag() returns \a DW_TAG_invalid on invalid input.
 /// \li \a getVirtuality() returns \a DW_VIRTUALITY_invalid on invalid input.
+/// \li \a getMacinfo() returns \a DW_MACINFO_invalid on invalid input.
 ///
 /// @{
 unsigned getTag(StringRef TagString);
@@ -603,6 +653,7 @@ unsigned getOperationEncoding(StringRef OperationEncodingString);
 unsigned getVirtuality(StringRef VirtualityString);
 unsigned getLanguage(StringRef LanguageString);
 unsigned getAttributeEncoding(StringRef EncodingString);
+unsigned getMacinfo(StringRef MacinfoString);
 /// @}
 
 /// \brief Returns the symbolic string representing Val when used as a value
@@ -610,7 +661,7 @@ unsigned getAttributeEncoding(StringRef EncodingString);
 const char *AttributeValueString(uint16_t Attr, unsigned Val);
 
 /// \brief Decsribes an entry of the various gnu_pub* debug sections.
-/// 
+///
 /// The gnu_pub* kind looks like:
 ///
 /// 0-3  reserved
@@ -642,7 +693,6 @@ private:
   };
 };
 
-
 } // End of namespace dwarf
 
 } // End of namespace llvm
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 94a4bfb22025..97708a7cdd63 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -429,6 +429,33 @@ enum {
 #include "ELFRelocs/ARM.def"
 };
 
+// AVR specific e_flags
+enum : unsigned {
+  EF_AVR_ARCH_AVR1    = 1,
+  EF_AVR_ARCH_AVR2    = 2,
+  EF_AVR_ARCH_AVR25   = 25,
+  EF_AVR_ARCH_AVR3    = 3,
+  EF_AVR_ARCH_AVR31   = 31,
+  EF_AVR_ARCH_AVR35   = 35,
+  EF_AVR_ARCH_AVR4    = 4,
+  EF_AVR_ARCH_AVR5    = 5,
+  EF_AVR_ARCH_AVR51   = 51,
+  EF_AVR_ARCH_AVR6    = 6,
+  EF_AVR_ARCH_AVRTINY = 100,
+  EF_AVR_ARCH_XMEGA1  = 101,
+  EF_AVR_ARCH_XMEGA2  = 102,
+  EF_AVR_ARCH_XMEGA3  = 103,
+  EF_AVR_ARCH_XMEGA4  = 104,
+  EF_AVR_ARCH_XMEGA5  = 105,
+  EF_AVR_ARCH_XMEGA6  = 106,
+  EF_AVR_ARCH_XMEGA7  = 107
+};
+
+// ELF Relocation types for AVR
+enum {
+#include "ELFRelocs/AVR.def"
+};
+
 // Mips Specific e_flags
 enum : unsigned {
   EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
@@ -522,26 +549,28 @@ enum {
   ODK_PAGESIZE   = 11   // Page size information
 };
 
-// Hexagon Specific e_flags
-// Release 5 ABI
+// Hexagon-specific e_flags
 enum {
-  // Object processor version flags, bits[3:0]
+  // Object processor version flags, bits[11:0]
   EF_HEXAGON_MACH_V2      = 0x00000001,   // Hexagon V2
   EF_HEXAGON_MACH_V3      = 0x00000002,   // Hexagon V3
   EF_HEXAGON_MACH_V4      = 0x00000003,   // Hexagon V4
   EF_HEXAGON_MACH_V5      = 0x00000004,   // Hexagon V5
+  EF_HEXAGON_MACH_V55     = 0x00000005,   // Hexagon V55
+  EF_HEXAGON_MACH_V60     = 0x00000060,   // Hexagon V60
 
   // Highest ISA version flags
-  EF_HEXAGON_ISA_MACH     = 0x00000000,   // Same as specified in bits[3:0]
+  EF_HEXAGON_ISA_MACH     = 0x00000000,   // Same as specified in bits[11:0]
                                           // of e_flags
   EF_HEXAGON_ISA_V2       = 0x00000010,   // Hexagon V2 ISA
   EF_HEXAGON_ISA_V3       = 0x00000020,   // Hexagon V3 ISA
   EF_HEXAGON_ISA_V4       = 0x00000030,   // Hexagon V4 ISA
-  EF_HEXAGON_ISA_V5       = 0x00000040    // Hexagon V5 ISA
+  EF_HEXAGON_ISA_V5       = 0x00000040,   // Hexagon V5 ISA
+  EF_HEXAGON_ISA_V55      = 0x00000050,   // Hexagon V55 ISA
+  EF_HEXAGON_ISA_V60      = 0x00000060,   // Hexagon V60 ISA
 };
 
-// Hexagon specific Section indexes for common small data
-// Release 5 ABI
+// Hexagon-specific section indexes for common small data
 enum {
   SHN_HEXAGON_SCOMMON     = 0xff00,       // Other access sizes
   SHN_HEXAGON_SCOMMON_1   = 0xff01,       // Byte-sized access
@@ -747,7 +776,12 @@ enum : unsigned {
   SHF_MIPS_ADDR    = 0x40000000,
 
   // Section data is string data by default.
-  SHF_MIPS_STRING  = 0x80000000
+  SHF_MIPS_STRING  = 0x80000000,
+
+  SHF_AMDGPU_HSA_GLOBAL   = 0x00100000,
+  SHF_AMDGPU_HSA_READONLY = 0x00200000,
+  SHF_AMDGPU_HSA_CODE     = 0x00400000,
+  SHF_AMDGPU_HSA_AGENT    = 0x00800000
 };
 
 // Section Group Flags
@@ -828,7 +862,12 @@ enum {
   STT_LOOS    = 10,  // Lowest operating system-specific symbol type
   STT_HIOS    = 12,  // Highest operating system-specific symbol type
   STT_LOPROC  = 13,  // Lowest processor-specific symbol type
-  STT_HIPROC  = 15   // Highest processor-specific symbol type
+  STT_HIPROC  = 15,  // Highest processor-specific symbol type
+
+  // AMDGPU symbol types
+  STT_AMDGPU_HSA_KERNEL            = 10,
+  STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11,
+  STT_AMDGPU_HSA_METADATA          = 12
 };
 
 enum {
@@ -979,7 +1018,13 @@ enum {
   PT_MIPS_REGINFO  = 0x70000000,  // Register usage information.
   PT_MIPS_RTPROC   = 0x70000001,  // Runtime procedure table.
   PT_MIPS_OPTIONS  = 0x70000002,  // Options segment.
-  PT_MIPS_ABIFLAGS = 0x70000003   // Abiflags segment.
+  PT_MIPS_ABIFLAGS = 0x70000003,  // Abiflags segment.
+
+  // AMDGPU program header types.
+  PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
+  PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT   = 0x60000001,
+  PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
+  PT_AMDGPU_HSA_LOAD_CODE_AGENT     = 0x60000003
 };
 
 // Segment flag bits.
@@ -1139,8 +1184,10 @@ enum {
   DT_MIPS_GP_VALUE          = 0x70000030, // GP value for auxiliary GOTs.
   DT_MIPS_AUX_DYNAMIC       = 0x70000031, // Address of auxiliary .dynamic.
   DT_MIPS_PLTGOT            = 0x70000032, // Address of the base of the PLTGOT.
-  DT_MIPS_RWPLT             = 0x70000034  // Points to the base
+  DT_MIPS_RWPLT             = 0x70000034, // Points to the base
                                           // of a writable PLT.
+  DT_MIPS_RLD_MAP_REL       = 0x70000035  // Relative offset of run time loader
+                                          // map, used for debugging.
 };
 
 // DT_FLAGS values.
diff --git a/include/llvm/Support/ELFRelocs/AVR.def b/include/llvm/Support/ELFRelocs/AVR.def
new file mode 100644
index 000000000000..5692d6cb9aa0
--- /dev/null
+++ b/include/llvm/Support/ELFRelocs/AVR.def
@@ -0,0 +1,40 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_AVR_NONE,                  0)
+ELF_RELOC(R_AVR_32,                    1)
+ELF_RELOC(R_AVR_7_PCREL,               2)
+ELF_RELOC(R_AVR_13_PCREL,              3)
+ELF_RELOC(R_AVR_16,                    4)
+ELF_RELOC(R_AVR_16_PM,                 5)
+ELF_RELOC(R_AVR_LO8_LDI,               6)
+ELF_RELOC(R_AVR_HI8_LDI,               7)
+ELF_RELOC(R_AVR_HH8_LDI,               8)
+ELF_RELOC(R_AVR_LO8_LDI_NEG,           9)
+ELF_RELOC(R_AVR_HI8_LDI_NEG,          10)
+ELF_RELOC(R_AVR_HH8_LDI_NEG,          11)
+ELF_RELOC(R_AVR_LO8_LDI_PM,           12)
+ELF_RELOC(R_AVR_HI8_LDI_PM,           13)
+ELF_RELOC(R_AVR_HH8_LDI_PM,           14)
+ELF_RELOC(R_AVR_LO8_LDI_PM_NEG,       15)
+ELF_RELOC(R_AVR_HI8_LDI_PM_NEG,       16)
+ELF_RELOC(R_AVR_HH8_LDI_PM_NEG,       17)
+ELF_RELOC(R_AVR_CALL,                 18)
+ELF_RELOC(R_AVR_LDI,                  19)
+ELF_RELOC(R_AVR_6,                    20)
+ELF_RELOC(R_AVR_6_ADIW,               21)
+ELF_RELOC(R_AVR_MS8_LDI,              22)
+ELF_RELOC(R_AVR_MS8_LDI_NEG,          23)
+ELF_RELOC(R_AVR_LO8_LDI_GS,           24)
+ELF_RELOC(R_AVR_HI8_LDI_GS,           25)
+ELF_RELOC(R_AVR_8,                    26)
+ELF_RELOC(R_AVR_8_LO8,                27)
+ELF_RELOC(R_AVR_8_HI8,                28)
+ELF_RELOC(R_AVR_8_HLO8,               29)
+ELF_RELOC(R_AVR_SYM_DIFF,             30)
+ELF_RELOC(R_AVR_16_LDST,              31)
+ELF_RELOC(R_AVR_LDS_STS_16,           33)
+ELF_RELOC(R_AVR_PORT6,                34)
+ELF_RELOC(R_AVR_PORT5,                35)
diff --git a/include/llvm/Support/ELFRelocs/PowerPC.def b/include/llvm/Support/ELFRelocs/PowerPC.def
index b6c39419b0f7..e4f8ee0ebe2b 100644
--- a/include/llvm/Support/ELFRelocs/PowerPC.def
+++ b/include/llvm/Support/ELFRelocs/PowerPC.def
@@ -3,6 +3,68 @@
 #error "ELF_RELOC must be defined"
 #endif
 
+// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the
+// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc.
+// to their corresponding integer values. As a result, we need to undef them
+// here before continuing.
+
+#undef R_PPC_NONE
+#undef R_PPC_ADDR32
+#undef R_PPC_ADDR24
+#undef R_PPC_ADDR16
+#undef R_PPC_ADDR16_LO
+#undef R_PPC_ADDR16_HI
+#undef R_PPC_ADDR16_HA
+#undef R_PPC_ADDR14
+#undef R_PPC_ADDR14_BRTAKEN
+#undef R_PPC_ADDR14_BRNTAKEN
+#undef R_PPC_REL24
+#undef R_PPC_REL14
+#undef R_PPC_REL14_BRTAKEN
+#undef R_PPC_REL14_BRNTAKEN
+#undef R_PPC_GOT16
+#undef R_PPC_GOT16_LO
+#undef R_PPC_GOT16_HI
+#undef R_PPC_GOT16_HA
+#undef R_PPC_PLTREL24
+#undef R_PPC_JMP_SLOT
+#undef R_PPC_LOCAL24PC
+#undef R_PPC_REL32
+#undef R_PPC_TLS
+#undef R_PPC_DTPMOD32
+#undef R_PPC_TPREL16
+#undef R_PPC_TPREL16_LO
+#undef R_PPC_TPREL16_HI
+#undef R_PPC_TPREL16_HA
+#undef R_PPC_TPREL32
+#undef R_PPC_DTPREL16
+#undef R_PPC_DTPREL16_LO
+#undef R_PPC_DTPREL16_HI
+#undef R_PPC_DTPREL16_HA
+#undef R_PPC_DTPREL32
+#undef R_PPC_GOT_TLSGD16
+#undef R_PPC_GOT_TLSGD16_LO
+#undef R_PPC_GOT_TLSGD16_HI
+#undef R_PPC_GOT_TLSGD16_HA
+#undef R_PPC_GOT_TLSLD16
+#undef R_PPC_GOT_TLSLD16_LO
+#undef R_PPC_GOT_TLSLD16_HI
+#undef R_PPC_GOT_TLSLD16_HA
+#undef R_PPC_GOT_TPREL16
+#undef R_PPC_GOT_TPREL16_LO
+#undef R_PPC_GOT_TPREL16_HI
+#undef R_PPC_GOT_TPREL16_HA
+#undef R_PPC_GOT_DTPREL16
+#undef R_PPC_GOT_DTPREL16_LO
+#undef R_PPC_GOT_DTPREL16_HI
+#undef R_PPC_GOT_DTPREL16_HA
+#undef R_PPC_TLSGD
+#undef R_PPC_TLSLD
+#undef R_PPC_REL16
+#undef R_PPC_REL16_LO
+#undef R_PPC_REL16_HI
+#undef R_PPC_REL16_HA
+
 ELF_RELOC(R_PPC_NONE,                   0)      /* No relocation. */
 ELF_RELOC(R_PPC_ADDR32,                 1)
 ELF_RELOC(R_PPC_ADDR24,                 2)
diff --git a/include/llvm/Support/ELFRelocs/PowerPC64.def b/include/llvm/Support/ELFRelocs/PowerPC64.def
index 7b2a3cb2235b..3a47c5a07574 100644
--- a/include/llvm/Support/ELFRelocs/PowerPC64.def
+++ b/include/llvm/Support/ELFRelocs/PowerPC64.def
@@ -3,6 +3,97 @@
 #error "ELF_RELOC must be defined"
 #endif
 
+// glibc's PowerPC asm/sigcontext.h, when compiling for PPC64, has the
+// unfortunate behavior of including asm/elf.h, which defines R_PPC_NONE, etc.
+// to their corresponding integer values. As a result, we need to undef them
+// here before continuing.
+
+#undef R_PPC64_NONE
+#undef R_PPC64_ADDR32
+#undef R_PPC64_ADDR24
+#undef R_PPC64_ADDR16
+#undef R_PPC64_ADDR16_LO
+#undef R_PPC64_ADDR16_HI
+#undef R_PPC64_ADDR16_HA
+#undef R_PPC64_ADDR14
+#undef R_PPC64_ADDR14_BRTAKEN
+#undef R_PPC64_ADDR14_BRNTAKEN
+#undef R_PPC64_REL24
+#undef R_PPC64_REL14
+#undef R_PPC64_REL14_BRTAKEN
+#undef R_PPC64_REL14_BRNTAKEN
+#undef R_PPC64_GOT16
+#undef R_PPC64_GOT16_LO
+#undef R_PPC64_GOT16_HI
+#undef R_PPC64_GOT16_HA
+#undef R_PPC64_GLOB_DAT
+#undef R_PPC64_JMP_SLOT
+#undef R_PPC64_RELATIVE
+#undef R_PPC64_REL32
+#undef R_PPC64_ADDR64
+#undef R_PPC64_ADDR16_HIGHER
+#undef R_PPC64_ADDR16_HIGHERA
+#undef R_PPC64_ADDR16_HIGHEST
+#undef R_PPC64_ADDR16_HIGHESTA
+#undef R_PPC64_REL64
+#undef R_PPC64_TOC16
+#undef R_PPC64_TOC16_LO
+#undef R_PPC64_TOC16_HI
+#undef R_PPC64_TOC16_HA
+#undef R_PPC64_TOC
+#undef R_PPC64_ADDR16_DS
+#undef R_PPC64_ADDR16_LO_DS
+#undef R_PPC64_GOT16_DS
+#undef R_PPC64_GOT16_LO_DS
+#undef R_PPC64_TOC16_DS
+#undef R_PPC64_TOC16_LO_DS
+#undef R_PPC64_TLS
+#undef R_PPC64_DTPMOD64
+#undef R_PPC64_TPREL16
+#undef R_PPC64_TPREL16_LO
+#undef R_PPC64_TPREL16_HI
+#undef R_PPC64_TPREL16_HA
+#undef R_PPC64_TPREL64
+#undef R_PPC64_DTPREL16
+#undef R_PPC64_DTPREL16_LO
+#undef R_PPC64_DTPREL16_HI
+#undef R_PPC64_DTPREL16_HA
+#undef R_PPC64_DTPREL64
+#undef R_PPC64_GOT_TLSGD16
+#undef R_PPC64_GOT_TLSGD16_LO
+#undef R_PPC64_GOT_TLSGD16_HI
+#undef R_PPC64_GOT_TLSGD16_HA
+#undef R_PPC64_GOT_TLSLD16
+#undef R_PPC64_GOT_TLSLD16_LO
+#undef R_PPC64_GOT_TLSLD16_HI
+#undef R_PPC64_GOT_TLSLD16_HA
+#undef R_PPC64_GOT_TPREL16_DS
+#undef R_PPC64_GOT_TPREL16_LO_DS
+#undef R_PPC64_GOT_TPREL16_HI
+#undef R_PPC64_GOT_TPREL16_HA
+#undef R_PPC64_GOT_DTPREL16_DS
+#undef R_PPC64_GOT_DTPREL16_LO_DS
+#undef R_PPC64_GOT_DTPREL16_HI
+#undef R_PPC64_GOT_DTPREL16_HA
+#undef R_PPC64_TPREL16_DS
+#undef R_PPC64_TPREL16_LO_DS
+#undef R_PPC64_TPREL16_HIGHER
+#undef R_PPC64_TPREL16_HIGHERA
+#undef R_PPC64_TPREL16_HIGHEST
+#undef R_PPC64_TPREL16_HIGHESTA
+#undef R_PPC64_DTPREL16_DS
+#undef R_PPC64_DTPREL16_LO_DS
+#undef R_PPC64_DTPREL16_HIGHER
+#undef R_PPC64_DTPREL16_HIGHERA
+#undef R_PPC64_DTPREL16_HIGHEST
+#undef R_PPC64_DTPREL16_HIGHESTA
+#undef R_PPC64_TLSGD
+#undef R_PPC64_TLSLD
+#undef R_PPC64_REL16
+#undef R_PPC64_REL16_LO
+#undef R_PPC64_REL16_HI
+#undef R_PPC64_REL16_HA
+
 ELF_RELOC(R_PPC64_NONE,                 0)
 ELF_RELOC(R_PPC64_ADDR32,               1)
 ELF_RELOC(R_PPC64_ADDR24,               2)
@@ -21,7 +112,9 @@ ELF_RELOC(R_PPC64_GOT16,                14)
 ELF_RELOC(R_PPC64_GOT16_LO,             15)
 ELF_RELOC(R_PPC64_GOT16_HI,             16)
 ELF_RELOC(R_PPC64_GOT16_HA,             17)
+ELF_RELOC(R_PPC64_GLOB_DAT,             20)
 ELF_RELOC(R_PPC64_JMP_SLOT,             21)
+ELF_RELOC(R_PPC64_RELATIVE,             22)
 ELF_RELOC(R_PPC64_REL32,                26)
 ELF_RELOC(R_PPC64_ADDR64,               38)
 ELF_RELOC(R_PPC64_ADDR16_HIGHER,        39)
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index fd59009e0d3a..bc93c9a66eef 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -77,6 +77,95 @@ inline void write(void *memory, value_type value) {
          &value,
          sizeof(value_type));
 }
+
+template <typename value_type>
+using make_unsigned_t = typename std::make_unsigned<value_type>::type;
+
+/// Read a value of a particular endianness from memory, for a location
+/// that starts at the given bit offset within the first byte.
+template <typename value_type, endianness endian, std::size_t alignment>
+inline value_type readAtBitAlignment(const void *memory, uint64_t startBit) {
+  assert(startBit < 8);
+  if (startBit == 0)
+    return read<value_type, endian, alignment>(memory);
+  else {
+    // Read two values and compose the result from them.
+    value_type val[2];
+    memcpy(&val[0],
+           LLVM_ASSUME_ALIGNED(
+               memory, (detail::PickAlignment<value_type, alignment>::value)),
+           sizeof(value_type) * 2);
+    val[0] = byte_swap<value_type, endian>(val[0]);
+    val[1] = byte_swap<value_type, endian>(val[1]);
+
+    // Shift bits from the lower value into place.
+    make_unsigned_t<value_type> lowerVal = val[0] >> startBit;
+    // Mask off upper bits after right shift in case of signed type.
+    make_unsigned_t<value_type> numBitsFirstVal =
+        (sizeof(value_type) * 8) - startBit;
+    lowerVal &= ((make_unsigned_t<value_type>)1 << numBitsFirstVal) - 1;
+
+    // Get the bits from the upper value.
+    make_unsigned_t<value_type> upperVal =
+        val[1] & (((make_unsigned_t<value_type>)1 << startBit) - 1);
+    // Shift them in to place.
+    upperVal <<= numBitsFirstVal;
+
+    return lowerVal | upperVal;
+  }
+}
+
+/// Write a value to memory with a particular endianness, for a location
+/// that starts at the given bit offset within the first byte.
+template <typename value_type, endianness endian, std::size_t alignment>
+inline void writeAtBitAlignment(void *memory, value_type value,
+                                uint64_t startBit) {
+  assert(startBit < 8);
+  if (startBit == 0)
+    write<value_type, endian, alignment>(memory, value);
+  else {
+    // Read two values and shift the result into them.
+    value_type val[2];
+    memcpy(&val[0],
+           LLVM_ASSUME_ALIGNED(
+               memory, (detail::PickAlignment<value_type, alignment>::value)),
+           sizeof(value_type) * 2);
+    val[0] = byte_swap<value_type, endian>(val[0]);
+    val[1] = byte_swap<value_type, endian>(val[1]);
+
+    // Mask off any existing bits in the upper part of the lower value that
+    // we want to replace.
+    val[0] &= ((make_unsigned_t<value_type>)1 << startBit) - 1;
+    make_unsigned_t<value_type> numBitsFirstVal =
+        (sizeof(value_type) * 8) - startBit;
+    make_unsigned_t<value_type> lowerVal = value;
+    if (startBit > 0) {
+      // Mask off the upper bits in the new value that are not going to go into
+      // the lower value. This avoids a left shift of a negative value, which
+      // is undefined behavior.
+      lowerVal &= (((make_unsigned_t<value_type>)1 << numBitsFirstVal) - 1);
+      // Now shift the new bits into place
+      lowerVal <<= startBit;
+    }
+    val[0] |= lowerVal;
+
+    // Mask off any existing bits in the lower part of the upper value that
+    // we want to replace.
+    val[1] &= ~(((make_unsigned_t<value_type>)1 << startBit) - 1);
+    // Next shift the bits that go into the upper value into position.
+    make_unsigned_t<value_type> upperVal = value >> numBitsFirstVal;
+    // Mask off upper bits after right shift in case of signed type.
+    upperVal &= ((make_unsigned_t<value_type>)1 << startBit) - 1;
+    val[1] |= upperVal;
+
+    // Finally, rewrite values.
+    val[0] = byte_swap<value_type, endian>(val[0]);
+    val[1] = byte_swap<value_type, endian>(val[1]);
+    memcpy(LLVM_ASSUME_ALIGNED(
+               memory, (detail::PickAlignment<value_type, alignment>::value)),
+           &val[0], sizeof(value_type) * 2);
+  }
+}
 } // end namespace endian
 
 namespace detail {
@@ -208,19 +297,47 @@ typedef detail::packed_endian_specific_integral
                    <int64_t, native, unaligned> unaligned_int64_t;
 
 namespace endian {
-inline uint16_t read16le(const void *p) { return *(const ulittle16_t *)p; }
-inline uint32_t read32le(const void *p) { return *(const ulittle32_t *)p; }
-inline uint64_t read64le(const void *p) { return *(const ulittle64_t *)p; }
-inline uint16_t read16be(const void *p) { return *(const ubig16_t *)p; }
-inline uint32_t read32be(const void *p) { return *(const ubig32_t *)p; }
-inline uint64_t read64be(const void *p) { return *(const ubig64_t *)p; }
+template <typename T, endianness E> inline T read(const void *P) {
+  return *(const detail::packed_endian_specific_integral<T, E, unaligned> *)P;
+}
 
-inline void write16le(void *p, uint16_t v) { *(ulittle16_t *)p = v; }
-inline void write32le(void *p, uint32_t v) { *(ulittle32_t *)p = v; }
-inline void write64le(void *p, uint64_t v) { *(ulittle64_t *)p = v; }
-inline void write16be(void *p, uint16_t v) { *(ubig16_t *)p = v; }
-inline void write32be(void *p, uint32_t v) { *(ubig32_t *)p = v; }
-inline void write64be(void *p, uint64_t v) { *(ubig64_t *)p = v; }
+template <endianness E> inline uint16_t read16(const void *P) {
+  return read<uint16_t, E>(P);
+}
+template <endianness E> inline uint32_t read32(const void *P) {
+  return read<uint32_t, E>(P);
+}
+template <endianness E> inline uint64_t read64(const void *P) {
+  return read<uint64_t, E>(P);
+}
+
+inline uint16_t read16le(const void *P) { return read16<little>(P); }
+inline uint32_t read32le(const void *P) { return read32<little>(P); }
+inline uint64_t read64le(const void *P) { return read64<little>(P); }
+inline uint16_t read16be(const void *P) { return read16<big>(P); }
+inline uint32_t read32be(const void *P) { return read32<big>(P); }
+inline uint64_t read64be(const void *P) { return read64<big>(P); }
+
+template <typename T, endianness E> inline void write(void *P, T V) {
+  *(detail::packed_endian_specific_integral<T, E, unaligned> *)P = V;
+}
+
+template <endianness E> inline void write16(void *P, uint16_t V) {
+  write<uint16_t, E>(P, V);
+}
+template <endianness E> inline void write32(void *P, uint32_t V) {
+  write<uint32_t, E>(P, V);
+}
+template <endianness E> inline void write64(void *P, uint64_t V) {
+  write<uint64_t, E>(P, V);
+}
+
+inline void write16le(void *P, uint16_t V) { write16<little>(P, V); }
+inline void write32le(void *P, uint32_t V) { write32<little>(P, V); }
+inline void write64le(void *P, uint64_t V) { write64<little>(P, V); }
+inline void write16be(void *P, uint16_t V) { write16<big>(P, V); }
+inline void write32be(void *P, uint32_t V) { write32<big>(P, V); }
+inline void write64be(void *P, uint64_t V) { write64<big>(P, V); }
 } // end namespace endian
 } // end namespace support
 } // end namespace llvm
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 9afd52d1abc7..32f05e0e9610 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -61,22 +61,22 @@ namespace llvm {
     ~ScopedFatalErrorHandler() { remove_fatal_error_handler(); }
   };
 
-  /// Reports a serious error, calling any installed error handler. These
-  /// functions are intended to be used for error conditions which are outside
-  /// the control of the compiler (I/O errors, invalid user input, etc.)
-  ///
-  /// If no error handler is installed the default is to print the message to
-  /// standard error, followed by a newline.
-  /// After the error handler is called this function will call exit(1), it 
-  /// does not return.
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
-                                                  bool gen_crash_diag = true);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
-                                                  bool gen_crash_diag = true);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
-                                                  bool gen_crash_diag = true);
-  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
-                                                  bool gen_crash_diag = true);
+/// Reports a serious error, calling any installed error handler. These
+/// functions are intended to be used for error conditions which are outside
+/// the control of the compiler (I/O errors, invalid user input, etc.)
+///
+/// If no error handler is installed the default is to print the message to
+/// standard error, followed by a newline.
+/// After the error handler is called this function will call exit(1), it
+/// does not return.
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason,
+                                                bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason,
+                                                bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason,
+                                                bool gen_crash_diag = true);
+LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason,
+                                                bool gen_crash_diag = true);
 
   /// This function calls abort(), and prints the optional message to stderr.
   /// Use the llvm_unreachable macro (that adds location info), instead of
diff --git a/include/llvm/Support/ErrorOr.h b/include/llvm/Support/ErrorOr.h
index 589404f9b4ee..ca6ede73e8df 100644
--- a/include/llvm/Support/ErrorOr.h
+++ b/include/llvm/Support/ErrorOr.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/ErrorOr.h - Error Smart Pointer -----------------------===//
+//===- llvm/Support/ErrorOr.h - Error Smart Pointer -------------*- C++ -*-===//
 //
 //                             The LLVM Linker
 //
@@ -91,6 +91,7 @@ private:
   typedef typename std::remove_reference<T>::type &reference;
   typedef const typename std::remove_reference<T>::type &const_reference;
   typedef typename std::remove_reference<T>::type *pointer;
+  typedef const typename std::remove_reference<T>::type *const_pointer;
 
 public:
   template <class E>
@@ -183,10 +184,14 @@ public:
     return toPointer(getStorage());
   }
 
+  const_pointer operator->() const { return toPointer(getStorage()); }
+
   reference operator *() {
     return *getStorage();
   }
 
+  const_reference operator*() const { return *getStorage(); }
+
 private:
   template <class OtherT>
   void copyConstruct(const ErrorOr<OtherT> &Other) {
@@ -246,10 +251,14 @@ private:
     return Val;
   }
 
+  const_pointer toPointer(const_pointer Val) const { return Val; }
+
   pointer toPointer(wrap *Val) {
     return &Val->get();
   }
 
+  const_pointer toPointer(const wrap *Val) const { return &Val->get(); }
+
   storage_type *getStorage() {
     assert(!HasError && "Cannot get value when an error exists!");
     return reinterpret_cast<storage_type*>(TStorage.buffer);
diff --git a/include/llvm/Support/FileOutputBuffer.h b/include/llvm/Support/FileOutputBuffer.h
index fd8879c84622..3bcf64a8a08b 100644
--- a/include/llvm/Support/FileOutputBuffer.h
+++ b/include/llvm/Support/FileOutputBuffer.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 
 namespace llvm {
@@ -37,9 +38,8 @@ public:
   /// Factory method to create an OutputBuffer object which manages a read/write
   /// buffer of the specified size. When committed, the buffer will be written
   /// to the file at the specified path.
-  static std::error_code create(StringRef FilePath, size_t Size,
-                                std::unique_ptr<FileOutputBuffer> &Result,
-                                unsigned Flags = 0);
+  static ErrorOr<std::unique_ptr<FileOutputBuffer>>
+  create(StringRef FilePath, size_t Size, unsigned Flags = 0);
 
   /// Returns a pointer to the start of the buffer.
   uint8_t *getBufferStart() {
diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h
index a736c324f8aa..4733ddb77575 100644
--- a/include/llvm/Support/FileSystem.h
+++ b/include/llvm/Support/FileSystem.h
@@ -95,21 +95,21 @@ enum perms {
 };
 
 // Helper functions so that you can use & and | to manipulate perms bits:
-inline perms operator|(perms l , perms r) {
-  return static_cast<perms>(
-             static_cast<unsigned short>(l) | static_cast<unsigned short>(r)); 
+inline perms operator|(perms l, perms r) {
+  return static_cast<perms>(static_cast<unsigned short>(l) |
+                            static_cast<unsigned short>(r));
 }
-inline perms operator&(perms l , perms r) {
-  return static_cast<perms>(
-             static_cast<unsigned short>(l) & static_cast<unsigned short>(r)); 
+inline perms operator&(perms l, perms r) {
+  return static_cast<perms>(static_cast<unsigned short>(l) &
+                            static_cast<unsigned short>(r));
 }
 inline perms &operator|=(perms &l, perms r) {
-  l = l | r; 
-  return l; 
+  l = l | r;
+  return l;
 }
 inline perms &operator&=(perms &l, perms r) {
-  l = l & r; 
-  return l; 
+  l = l & r;
+  return l;
 }
 inline perms operator~(perms x) {
   return static_cast<perms>(~static_cast<unsigned short>(x));
@@ -156,6 +156,7 @@ class file_status
   friend bool equivalent(file_status A, file_status B);
   file_type Type;
   perms Perms;
+
 public:
   #if defined(LLVM_ON_UNIX)
     file_status() : fs_st_dev(0), fs_st_ino(0), fs_st_mtime(0),
@@ -265,6 +266,20 @@ private:
 /// @name Physical Operators
 /// @{
 
+/// @brief Make \a path an absolute path.
+///
+/// Makes \a path absolute using the \a current_directory if it is not already.
+/// An empty \a path will result in the \a current_directory.
+///
+/// /absolute/path   => /absolute/path
+/// relative/../path => <current-directory>/relative/../path
+///
+/// @param path A path that is modified to be an absolute path.
+/// @returns errc::success if \a path has been made absolute, otherwise a
+///          platform-specific error_code.
+std::error_code make_absolute(const Twine &current_directory,
+                              SmallVectorImpl<char> &path);
+
 /// @brief Make \a path an absolute path.
 ///
 /// Makes \a path absolute using the current directory if it is not already. An
@@ -285,7 +300,8 @@ std::error_code make_absolute(SmallVectorImpl<char> &path);
 ///          specific error_code. If IgnoreExisting is false, also returns
 ///          error if the directory already existed.
 std::error_code create_directories(const Twine &path,
-                                   bool IgnoreExisting = true);
+                                   bool IgnoreExisting = true,
+                                   perms Perms = owner_all | group_all);
 
 /// @brief Create the directory in path.
 ///
@@ -293,7 +309,8 @@ std::error_code create_directories(const Twine &path,
 /// @returns errc::success if is_directory(path), otherwise a platform
 ///          specific error_code. If IgnoreExisting is false, also returns
 ///          error if the directory already existed.
-std::error_code create_directory(const Twine &path, bool IgnoreExisting = true);
+std::error_code create_directory(const Twine &path, bool IgnoreExisting = true,
+                                 perms Perms = owner_all | group_all);
 
 /// @brief Create a link from \a from to \a to.
 ///
@@ -375,9 +392,7 @@ inline bool exists(const Twine &Path) {
 ///
 /// @param Path Input path.
 /// @returns True if we can execute it, false otherwise.
-inline bool can_execute(const Twine &Path) {
-  return !access(Path, AccessMode::Execute);
-}
+bool can_execute(const Twine &Path);
 
 /// @brief Can we write this file?
 ///
@@ -531,15 +546,15 @@ std::error_code status_known(const Twine &path, bool &result);
 ///
 /// Generates a unique path suitable for a temporary file and then opens it as a
 /// file. The name is based on \a model with '%' replaced by a random char in
-/// [0-9a-f]. If \a model is not an absolute path, a suitable temporary
-/// directory will be prepended.
+/// [0-9a-f]. If \a model is not an absolute path, the temporary file will be
+/// created in the current directory.
 ///
 /// Example: clang-%%-%%-%%-%%-%%.s => clang-a0-b1-c2-d3-e4.s
 ///
 /// This is an atomic operation. Either the file is created and opened, or the
 /// file system is left untouched.
 ///
-/// The intendend use is for files that are to be kept, possibly after
+/// The intended use is for files that are to be kept, possibly after
 /// renaming them. For example, when running 'clang -c foo.o', the file can
 /// be first created as foo-abc123.o and then renamed.
 ///
diff --git a/include/llvm/Support/Format.h b/include/llvm/Support/Format.h
index 4319a3ba2745..f0b437a0cbed 100644
--- a/include/llvm/Support/Format.h
+++ b/include/llvm/Support/Format.h
@@ -118,6 +118,7 @@ class FormattedString {
   unsigned Width;
   bool RightJustify;
   friend class raw_ostream;
+
 public:
     FormattedString(StringRef S, unsigned W, bool R)
       : Str(S), Width(W), RightJustify(R) { }
@@ -146,6 +147,7 @@ class FormattedNumber {
   bool Upper;
   bool HexPrefix;
   friend class raw_ostream;
+
 public:
   FormattedNumber(uint64_t HV, int64_t DV, unsigned W, bool H, bool U,
                   bool Prefix)
@@ -178,7 +180,7 @@ inline FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width,
   return FormattedNumber(N, 0, Width, true, Upper, false);
 }
 
-/// format_decimal - Output \p N as a right justified, fixed-width decimal. If 
+/// format_decimal - Output \p N as a right justified, fixed-width decimal. If
 /// number will not fit in width, full number is still printed.  Examples:
 ///   OS << format_decimal(0, 5)     => "    0"
 ///   OS << format_decimal(255, 5)   => "  255"
@@ -188,7 +190,6 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) {
   return FormattedNumber(0, N, Width, false, false, false);
 }
 
-
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/GCOV.h b/include/llvm/Support/GCOV.h
index c2e34bd3eaeb..544434f036a4 100644
--- a/include/llvm/Support/GCOV.h
+++ b/include/llvm/Support/GCOV.h
@@ -30,12 +30,11 @@ class GCOVBlock;
 class FileInfo;
 
 namespace GCOV {
-enum GCOVVersion { V402, V404 };
-} // end GCOV namespace
+enum GCOVVersion { V402, V404, V704 };
 
-/// GCOVOptions - A struct for passing gcov options between functions.
-struct GCOVOptions {
-  GCOVOptions(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
+/// \brief A struct for passing gcov options between functions.
+struct Options {
+  Options(bool A, bool B, bool C, bool F, bool P, bool U, bool L, bool N)
       : AllBlocks(A), BranchInfo(B), BranchCount(C), FuncCoverage(F),
         PreservePaths(P), UncondBranch(U), LongFileNames(L), NoOutput(N) {}
 
@@ -48,6 +47,7 @@ struct GCOVOptions {
   bool LongFileNames;
   bool NoOutput;
 };
+} // end GCOV namespace
 
 /// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific
 /// read operations.
@@ -90,6 +90,11 @@ public:
       Version = GCOV::V404;
       return true;
     }
+    if (VersionStr == "*704") {
+      Cursor += 4;
+      Version = GCOV::V704;
+      return true;
+    }
     errs() << "Unexpected version: " << VersionStr << ".\n";
     return false;
   }
@@ -390,7 +395,7 @@ class FileInfo {
   };
 
 public:
-  FileInfo(const GCOVOptions &Options)
+  FileInfo(const GCOV::Options &Options)
       : Options(Options), LineInfo(), RunCount(0), ProgramCount(0) {}
 
   void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) {
@@ -424,7 +429,7 @@ private:
   void printFuncCoverage(raw_ostream &OS) const;
   void printFileCoverage(raw_ostream &OS) const;
 
-  const GCOVOptions &Options;
+  const GCOV::Options &Options;
   StringMap<LineData> LineInfo;
   uint32_t RunCount;
   uint32_t ProgramCount;
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 63678bb98bb1..8751f272cd29 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -371,8 +371,9 @@ public:
   void releaseMemory() { reset(); }
 
   /// getNode - return the (Post)DominatorTree node for the specified basic
-  /// block.  This is the same as using operator[] on this class.
-  ///
+  /// block.  This is the same as using operator[] on this class.  The result
+  /// may (but is not required to) be null for a forward (backwards)
+  /// statically unreachable block.
   DomTreeNodeBase<NodeT> *getNode(NodeT *BB) const {
     auto I = DomTreeNodes.find(BB);
     if (I != DomTreeNodes.end())
@@ -380,6 +381,7 @@ public:
     return nullptr;
   }
 
+  /// See getNode.
   DomTreeNodeBase<NodeT> *operator[](NodeT *BB) const { return getNode(BB); }
 
   /// getRootNode - This returns the entry node for the CFG of the function.  If
@@ -732,13 +734,13 @@ public:
       for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
                                              E = TraitsTy::nodes_end(&F);
            I != E; ++I) {
-        if (TraitsTy::child_begin(I) == TraitsTy::child_end(I))
-          addRoot(I);
+        if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I))
+          addRoot(&*I);
 
         // Prepopulate maps so that we don't get iterator invalidation issues
         // later.
-        this->IDoms[I] = nullptr;
-        this->DomTreeNodes[I] = nullptr;
+        this->IDoms[&*I] = nullptr;
+        this->DomTreeNodes[&*I] = nullptr;
       }
 
       Calculate<FT, Inverse<NodeT *>>(*this, F);
diff --git a/include/llvm/Support/GenericDomTreeConstruction.h b/include/llvm/Support/GenericDomTreeConstruction.h
index 7c065f939256..3e867dc6cbf1 100644
--- a/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/include/llvm/Support/GenericDomTreeConstruction.h
@@ -21,7 +21,6 @@
 ///
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
 #define LLVM_SUPPORT_GENERICDOMTREECONSTRUCTION_H
 
@@ -88,7 +87,7 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
 
     // Increment the successor number for the next time we get to it.
     ++Worklist.back().second;
-    
+
     // Visit the successor next, if it isn't already visited.
     typename GraphT::NodeType* Succ = *NextSucc;
 
@@ -103,9 +102,9 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
     return N;
 }
 
-template<class GraphT>
-typename GraphT::NodeType* 
-Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
+template <class GraphT>
+typename GraphT::NodeType *
+Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
      typename GraphT::NodeType *VIn, unsigned LastLinked) {
   typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInInfo =
                                                                   DT.Info[VIn];
@@ -117,7 +116,7 @@ Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
 
   if (VInInfo.Parent >= LastLinked)
     Work.push_back(VIn);
-  
+
   while (!Work.empty()) {
     typename GraphT::NodeType* V = Work.back();
     typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
@@ -128,8 +127,8 @@ Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
     if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
       Work.push_back(VAncestor);
       continue;
-    } 
-    Work.pop_back(); 
+    }
+    Work.pop_back();
 
     // Update VInfo based on Ancestor info
     if (VInfo.Parent < LastLinked)
@@ -169,7 +168,7 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
        i != e; ++i)
     N = DFSPass<GraphT>(DT, DT.Roots[i], N);
 
-  // it might be that some blocks did not get a DFS number (e.g., blocks of 
+  // it might be that some blocks did not get a DFS number (e.g., blocks of
   // infinite loops). In these cases an artificial exit node is required.
   MultipleRoots |= (DT.isPostDominator() && N != GraphTraits<FuncT*>::size(&F));
 
@@ -287,7 +286,6 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
 
   DT.updateDFSNumbers();
 }
-
 }
 
 #endif
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index b1af3d7c2632..86985c569464 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -175,8 +175,9 @@ public:
       O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
 
       // If we should include the address of the node in the label, do so now.
-      if (DTraits.hasNodeAddressLabel(Node, G))
-        O << "|" << static_cast<const void*>(Node);
+      std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
+      if (!Id.empty())
+        O << "|" << DOT::EscapeString(Id);
 
       std::string NodeDesc = DTraits.getNodeDescription(Node, G);
       if (!NodeDesc.empty())
@@ -199,8 +200,9 @@ public:
       O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
 
       // If we should include the address of the node in the label, do so now.
-      if (DTraits.hasNodeAddressLabel(Node, G))
-        O << "|" << static_cast<const void*>(Node);
+      std::string Id = DTraits.getNodeIdentifierLabel(Node, G);
+      if (!Id.empty())
+        O << "|" << DOT::EscapeString(Id);
 
       std::string NodeDesc = DTraits.getNodeDescription(Node, G);
       if (!NodeDesc.empty())
diff --git a/include/llvm/Support/JamCRC.h b/include/llvm/Support/JamCRC.h
new file mode 100644
index 000000000000..20c28a5f8e45
--- /dev/null
+++ b/include/llvm/Support/JamCRC.h
@@ -0,0 +1,48 @@
+//===-- llvm/Support/JamCRC.h - Cyclic Redundancy Check ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of JamCRC.
+//
+// We will use the "Rocksoft^tm Model CRC Algorithm" to describe the properties
+// of this CRC:
+//   Width  : 32
+//   Poly   : 04C11DB7
+//   Init   : FFFFFFFF
+//   RefIn  : True
+//   RefOut : True
+//   XorOut : 00000000
+//   Check  : 340BC6D9 (result of CRC for "123456789")
+//
+// N.B.  We permit flexibility of the "Init" value.  Some consumers of this need
+//       it to be zero.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_JAMCRC_H
+#define LLVM_SUPPORT_JAMCRC_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class JamCRC {
+public:
+  JamCRC(uint32_t Init = 0xFFFFFFFFU) : CRC(Init) {}
+
+  // \brief Update the CRC calculation with Data.
+  void update(ArrayRef<char> Data);
+
+  uint32_t getCRC() const { return CRC; }
+
+private:
+  uint32_t CRC;
+};
+} // End of namespace llvm
+
+#endif
diff --git a/include/llvm/Support/MachO.h b/include/llvm/Support/MachO.h
index 775127505923..54b8745de1c1 100644
--- a/include/llvm/Support/MachO.h
+++ b/include/llvm/Support/MachO.h
@@ -132,7 +132,9 @@ namespace llvm {
       LC_DYLIB_CODE_SIGN_DRS  = 0x0000002Bu,
       LC_ENCRYPTION_INFO_64   = 0x0000002Cu,
       LC_LINKER_OPTION        = 0x0000002Du,
-      LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu
+      LC_LINKER_OPTIMIZATION_HINT = 0x0000002Eu,
+      LC_VERSION_MIN_TVOS     = 0x0000002Fu,
+      LC_VERSION_MIN_WATCHOS  = 0x00000030u,
     };
 
     enum : uint32_t {
@@ -142,7 +144,6 @@ namespace llvm {
       SG_NORELOC             = 0x4u,
       SG_PROTECTED_VERSION_1 = 0x8u,
 
-
       // Constant masks for the "flags" field in llvm::MachO::section and
       // llvm::MachO::section_64
       SECTION_TYPE           = 0x000000ffu, // SECTION_TYPE
@@ -334,7 +335,6 @@ namespace llvm {
       EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE       = 0x02u
     };
 
-
     enum {
       // Constant masks for the "n_type" field in llvm::MachO::nlist and
       // llvm::MachO::nlist_64
@@ -385,7 +385,7 @@ namespace llvm {
       SELF_LIBRARY_ORDINAL   = 0x0,
       MAX_LIBRARY_ORDINAL    = 0xfd,
       DYNAMIC_LOOKUP_ORDINAL = 0xfe,
-      EXECUTABLE_ORDINAL     = 0xff 
+      EXECUTABLE_ORDINAL     = 0xff
     };
 
     enum StabType {
@@ -506,7 +506,6 @@ namespace llvm {
       // Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
       ARM64_RELOC_ADDEND              = 10,
 
-
       // Constant values for the r_type field in an x86_64 architecture
       // llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
       // structure
@@ -530,7 +529,6 @@ namespace llvm {
       VM_PROT_EXECUTE = 0x4
     };
 
-
     // Structs from <mach-o/loader.h>
 
     struct mach_header {
@@ -784,7 +782,6 @@ namespace llvm {
                flags:8;
     };
 
-
     struct twolevel_hints_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -924,7 +921,6 @@ namespace llvm {
       uint64_t stacksize;
     };
 
-
     // Structs from <mach-o/fat.h>
     struct fat_header {
       uint32_t magic;
@@ -995,7 +991,6 @@ namespace llvm {
       uint64_t n_value;
     };
 
-
     // Byte order swapping functions for MachO structs
 
     inline void swapStruct(mach_header &mh) {
diff --git a/include/llvm/Support/ManagedStatic.h b/include/llvm/Support/ManagedStatic.h
index addd34e704bc..2e131e47177d 100644
--- a/include/llvm/Support/ManagedStatic.h
+++ b/include/llvm/Support/ManagedStatic.h
@@ -15,8 +15,8 @@
 #define LLVM_SUPPORT_MANAGEDSTATIC_H
 
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Threading.h"
-#include "llvm/Support/Valgrind.h"
 
 namespace llvm {
 
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 2cf7e0e5d0b3..8111aeebe6ee 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -63,7 +63,7 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
   }
 };
 
-#if __GNUC__ >= 4 || _MSC_VER
+#if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
   static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
@@ -71,7 +71,7 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if __has_builtin(__builtin_ctz) || LLVM_GNUC_PREREQ(4, 0, 0)
     return __builtin_ctz(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
     unsigned long Index;
     _BitScanForward(&Index, Val);
     return Index;
@@ -87,7 +87,7 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 
 #if __has_builtin(__builtin_ctzll) || LLVM_GNUC_PREREQ(4, 0, 0)
     return __builtin_ctzll(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
     unsigned long Index;
     _BitScanForward64(&Index, Val);
     return Index;
@@ -132,7 +132,7 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
   }
 };
 
-#if __GNUC__ >= 4 || _MSC_VER
+#if __GNUC__ >= 4 || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
   static std::size_t count(T Val, ZeroBehavior ZB) {
     if (ZB != ZB_Undefined && Val == 0)
@@ -140,7 +140,7 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if __has_builtin(__builtin_clz) || LLVM_GNUC_PREREQ(4, 0, 0)
     return __builtin_clz(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
     unsigned long Index;
     _BitScanReverse(&Index, Val);
     return Index ^ 31;
@@ -156,7 +156,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 
 #if __has_builtin(__builtin_clzll) || LLVM_GNUC_PREREQ(4, 0, 0)
     return __builtin_clzll(Val);
-#elif _MSC_VER
+#elif defined(_MSC_VER)
     unsigned long Index;
     _BitScanReverse64(&Index, Val);
     return Index ^ 63;
@@ -313,7 +313,7 @@ inline bool isShiftedUInt(uint64_t x) {
 /// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
 /// bit width.
 inline bool isUIntN(unsigned N, uint64_t x) {
-  return x == (x & (~0ULL >> (64 - N)));
+  return N >= 64 || x < (UINT64_C(1)<<(N));
 }
 
 /// isIntN - Checks if an signed integer fits into the given (dynamic)
@@ -552,7 +552,7 @@ inline uint32_t FloatToBits(float Float) {
 inline uint64_t MinAlign(uint64_t A, uint64_t B) {
   // The largest power of 2 that divides both A and B.
   //
-  // Replace "-Value" by "1+~Value" in the following commented code to avoid 
+  // Replace "-Value" by "1+~Value" in the following commented code to avoid
   // MSVC warning C4146
   //    return (A | B) & -(A | B);
   return (A | B) & (1 + ~(A | B));
@@ -599,15 +599,27 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
 /// Returns the next integer (mod 2**64) that is greater than or equal to
 /// \p Value and is a multiple of \p Align. \p Align must be non-zero.
 ///
+/// If non-zero \p Skew is specified, the return value will be a minimal
+/// integer that is greater than or equal to \p Value and equal to
+/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
+/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
+///
 /// Examples:
 /// \code
 ///   RoundUpToAlignment(5, 8) = 8
 ///   RoundUpToAlignment(17, 8) = 24
 ///   RoundUpToAlignment(~0LL, 8) = 0
 ///   RoundUpToAlignment(321, 255) = 510
+///
+///   RoundUpToAlignment(5, 8, 7) = 7
+///   RoundUpToAlignment(17, 8, 1) = 17
+///   RoundUpToAlignment(~0LL, 8, 3) = 3
+///   RoundUpToAlignment(321, 255, 42) = 552
 /// \endcode
-inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
-  return (Value + Align - 1) / Align * Align;
+inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align,
+                                   uint64_t Skew = 0) {
+  Skew %= Align;
+  return (Value + Align - 1 - Skew) / Align * Align + Skew;
 }
 
 /// Returns the offset to the next integer (mod 2**64) that is greater than
@@ -641,6 +653,70 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
   return int64_t(X << (64 - B)) >> (64 - B);
 }
 
+/// \brief Add two unsigned integers, X and Y, of type T.
+/// Clamp the result to the maximum representable value of T on overflow.
+/// ResultOverflowed indicates if the result is larger than the maximum
+/// representable value of type T.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
+  bool Dummy;
+  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+  // Hacker's Delight, p. 29
+  T Z = X + Y;
+  Overflowed = (Z < X || Z < Y);
+  if (Overflowed)
+    return std::numeric_limits<T>::max();
+  else
+    return Z;
+}
+
+/// \brief Multiply two unsigned integers, X and Y, of type T.
+/// Clamp the result to the maximum representable value of T on overflow.
+/// ResultOverflowed indicates if the result is larger than the maximum
+/// representable value of type T.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
+  bool Dummy;
+  bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+
+  // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
+  // because it fails for uint16_t (where multiplication can have undefined
+  // behavior due to promotion to int), and requires a division in addition
+  // to the multiplication.
+
+  Overflowed = false;
+
+  // Log2(Z) would be either Log2Z or Log2Z + 1.
+  // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
+  // will necessarily be less than Log2Max as desired.
+  int Log2Z = Log2_64(X) + Log2_64(Y);
+  const T Max = std::numeric_limits<T>::max();
+  int Log2Max = Log2_64(Max);
+  if (Log2Z < Log2Max) {
+    return X * Y;
+  }
+  if (Log2Z > Log2Max) {
+    Overflowed = true;
+    return Max;
+  }
+
+  // We're going to use the top bit, and maybe overflow one
+  // bit past it. Multiply all but the bottom bit then add
+  // that on at the end.
+  T Z = (X >> 1) * Y;
+  if (Z & ~(Max >> 1)) {
+    Overflowed = true;
+    return Max;
+  }
+  Z <<= 1;
+  if (X & 1)
+    return SaturatingAdd(Z, Y, ResultOverflowed);
+
+  return Z;
+}
+
 extern const float huge_valf;
 } // End llvm namespace
 
diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h
index b4305cb697d0..8103aea2fa25 100644
--- a/include/llvm/Support/Memory.h
+++ b/include/llvm/Support/Memory.h
@@ -1,4 +1,4 @@
-//===- llvm/Support/Memory.h - Memory Support --------------------*- C++ -*-===//
+//===- llvm/Support/Memory.h - Memory Support -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,6 +32,7 @@ namespace sys {
     MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
     void *base() const { return Address; }
     size_t size() const { return Size; }
+
   private:
     void *Address;    ///< Address of first byte of memory area
     size_t Size;      ///< Size, in bytes of the memory area
@@ -70,7 +71,7 @@ namespace sys {
     /// If the address following \p NearBlock is not so aligned, it will be
     /// rounded up to the next allocation granularity boundary.
     ///
-    /// \r a non-null MemoryBlock if the function was successful, 
+    /// \r a non-null MemoryBlock if the function was successful,
     /// otherwise a null MemoryBlock is with \p EC describing the error.
     ///
     /// @brief Allocate mapped memory.
@@ -86,7 +87,7 @@ namespace sys {
     ///
     /// \r error_success if the function was successful, or an error_code
     /// describing the failure if an error occurred.
-    /// 
+    ///
     /// @brief Release mapped memory.
     static std::error_code releaseMappedMemory(MemoryBlock &Block);
 
@@ -131,7 +132,6 @@ namespace sys {
     /// @brief Release Read/Write/Execute memory.
     static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = nullptr);
 
-
     /// InvalidateInstructionCache - Before the JIT can run a block of code
     /// that has been emitted it must invalidate the instruction cache on some
     /// platforms.
@@ -155,6 +155,31 @@ namespace sys {
     /// as writable.
     static bool setRangeWritable(const void *Addr, size_t Size);
   };
+
+  /// Owning version of MemoryBlock.
+  class OwningMemoryBlock {
+  public:
+    OwningMemoryBlock() = default;
+    explicit OwningMemoryBlock(MemoryBlock M) : M(M) {}
+    OwningMemoryBlock(OwningMemoryBlock &&Other) {
+      M = Other.M;
+      Other.M = MemoryBlock();
+    }
+    OwningMemoryBlock& operator=(OwningMemoryBlock &&Other) {
+      M = Other.M;
+      Other.M = MemoryBlock();
+      return *this;
+    }
+    ~OwningMemoryBlock() {
+      Memory::releaseMappedMemory(M);
+    }
+    void *base() const { return M.base(); }
+    size_t size() const { return M.size(); }
+    MemoryBlock getMemoryBlock() const { return M; }
+  private:
+    MemoryBlock M;
+  };
+
 }
 }
 
diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h
index 81616d8ba3ac..73d643537a6f 100644
--- a/include/llvm/Support/MemoryBuffer.h
+++ b/include/llvm/Support/MemoryBuffer.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_MEMORYBUFFER_H
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
-#include "llvm-c/Support.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/DataTypes.h"
@@ -122,7 +121,8 @@ public:
   /// Open the specified file as a MemoryBuffer, or open stdin if the Filename
   /// is "-".
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
-  getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1);
+  getFileOrSTDIN(const Twine &Filename, int64_t FileSize = -1,
+                 bool RequiresNullTerminator = true);
 
   /// Map a subrange of the specified file as a MemoryBuffer.
   static ErrorOr<std::unique_ptr<MemoryBuffer>>
@@ -151,6 +151,8 @@ class MemoryBufferRef {
 
 public:
   MemoryBufferRef() {}
+  MemoryBufferRef(MemoryBuffer& Buffer)
+      : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
   MemoryBufferRef(StringRef Buffer, StringRef Identifier)
       : Buffer(Buffer), Identifier(Identifier) {}
 
diff --git a/include/llvm/Support/OnDiskHashTable.h b/include/llvm/Support/OnDiskHashTable.h
index 08e277ad5ce1..ac978d4c242c 100644
--- a/include/llvm/Support/OnDiskHashTable.h
+++ b/include/llvm/Support/OnDiskHashTable.h
@@ -53,6 +53,8 @@ namespace llvm {
 ///   /// Write Data to Out.  DataLen is the length from EmitKeyDataLength.
 ///   static void EmitData(raw_ostream &Out, key_type_ref Key,
 ///                        data_type_ref Data, offset_type DataLen);
+///   /// Determine if two keys are equal. Optional, only needed by contains.
+///   static bool EqualKey(key_type_ref Key1, key_type_ref Key2);
 /// };
 /// \endcode
 template <typename Info> class OnDiskChainedHashTableGenerator {
@@ -122,13 +124,21 @@ public:
   /// Uses the provided Info instead of a stack allocated one.
   void insert(typename Info::key_type_ref Key,
               typename Info::data_type_ref Data, Info &InfoObj) {
-
     ++NumEntries;
     if (4 * NumEntries >= 3 * NumBuckets)
       resize(NumBuckets * 2);
     insert(Buckets, NumBuckets, new (BA.Allocate()) Item(Key, Data, InfoObj));
   }
 
+  /// \brief Determine whether an entry has been inserted.
+  bool contains(typename Info::key_type_ref Key, Info &InfoObj) {
+    unsigned Hash = InfoObj.ComputeHash(Key);
+    for (Item *I = Buckets[Hash & (NumBuckets - 1)].Head; I; I = I->Next)
+      if (I->Hash == Hash && InfoObj.EqualKey(I->Key, Key))
+        return true;
+    return false;
+  }
+
   /// \brief Emit the table to Out, which must not be at offset 0.
   offset_type Emit(raw_ostream &Out) {
     Info InfoObj;
@@ -161,8 +171,22 @@ public:
         LE.write<typename Info::hash_value_type>(I->Hash);
         const std::pair<offset_type, offset_type> &Len =
             InfoObj.EmitKeyDataLength(Out, I->Key, I->Data);
+#ifdef NDEBUG
         InfoObj.EmitKey(Out, I->Key, Len.first);
         InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
+#else
+        // In asserts mode, check that the users length matches the data they
+        // wrote.
+        uint64_t KeyStart = Out.tell();
+        InfoObj.EmitKey(Out, I->Key, Len.first);
+        uint64_t DataStart = Out.tell();
+        InfoObj.EmitData(Out, I->Key, I->Data, Len.second);
+        uint64_t End = Out.tell();
+        assert(offset_type(DataStart - KeyStart) == Len.first &&
+               "key length does not match bytes written");
+        assert(offset_type(End - DataStart) == Len.second &&
+               "data length does not match bytes written");
+#endif
       }
     }
 
@@ -239,11 +263,12 @@ template <typename Info> class OnDiskChainedHashTable {
   Info InfoObj;
 
 public:
+  typedef Info InfoType;
   typedef typename Info::internal_key_type internal_key_type;
   typedef typename Info::external_key_type external_key_type;
-  typedef typename Info::data_type         data_type;
-  typedef typename Info::hash_value_type   hash_value_type;
-  typedef typename Info::offset_type       offset_type;
+  typedef typename Info::data_type data_type;
+  typedef typename Info::hash_value_type hash_value_type;
+  typedef typename Info::offset_type offset_type;
 
   OnDiskChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
                          const unsigned char *Buckets,
@@ -255,6 +280,21 @@ public:
            "'buckets' must have a 4-byte alignment");
   }
 
+  /// Read the number of buckets and the number of entries from a hash table
+  /// produced by OnDiskHashTableGenerator::Emit, and advance the Buckets
+  /// pointer past them.
+  static std::pair<offset_type, offset_type>
+  readNumBucketsAndEntries(const unsigned char *&Buckets) {
+    assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
+           "buckets should be 4-byte aligned.");
+    using namespace llvm::support;
+    offset_type NumBuckets =
+        endian::readNext<offset_type, little, aligned>(Buckets);
+    offset_type NumEntries =
+        endian::readNext<offset_type, little, aligned>(Buckets);
+    return std::make_pair(NumBuckets, NumEntries);
+  }
+
   offset_type getNumBuckets() const { return NumBuckets; }
   offset_type getNumEntries() const { return NumEntries; }
   const unsigned char *getBase() const { return Base; }
@@ -275,6 +315,10 @@ public:
         : Key(K), Data(D), Len(L), InfoObj(InfoObj) {}
 
     data_type operator*() const { return InfoObj->ReadData(Key, Data, Len); }
+
+    const unsigned char *getDataPtr() const { return Data; }
+    offset_type getDataLen() const { return Len; }
+
     bool operator==(const iterator &X) const { return X.Data == Data; }
     bool operator!=(const iterator &X) const { return X.Data != Data; }
   };
@@ -356,17 +400,11 @@ public:
   static OnDiskChainedHashTable *Create(const unsigned char *Buckets,
                                         const unsigned char *const Base,
                                         const Info &InfoObj = Info()) {
-    using namespace llvm::support;
     assert(Buckets > Base);
-    assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
-           "buckets should be 4-byte aligned.");
-
-    offset_type NumBuckets =
-        endian::readNext<offset_type, little, aligned>(Buckets);
-    offset_type NumEntries =
-        endian::readNext<offset_type, little, aligned>(Buckets);
-    return new OnDiskChainedHashTable<Info>(NumBuckets, NumEntries, Buckets,
-                                            Base, InfoObj);
+    auto NumBucketsAndEntries = readNumBucketsAndEntries(Buckets);
+    return new OnDiskChainedHashTable<Info>(NumBucketsAndEntries.first,
+                                            NumBucketsAndEntries.second,
+                                            Buckets, Base, InfoObj);
   }
 };
 
@@ -385,40 +423,30 @@ public:
   typedef typename base_type::hash_value_type   hash_value_type;
   typedef typename base_type::offset_type       offset_type;
 
-  OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
-                                 const unsigned char *Buckets,
-                                 const unsigned char *Payload,
-                                 const unsigned char *Base,
-                                 const Info &InfoObj = Info())
-      : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
-        Payload(Payload) {}
-
+private:
   /// \brief Iterates over all of the keys in the table.
-  class key_iterator {
+  class iterator_base {
     const unsigned char *Ptr;
     offset_type NumItemsInBucketLeft;
     offset_type NumEntriesLeft;
-    Info *InfoObj;
 
   public:
     typedef external_key_type value_type;
 
-    key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
-                 Info *InfoObj)
-        : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
-          InfoObj(InfoObj) {}
-    key_iterator()
-        : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0),
-          InfoObj(0) {}
+    iterator_base(const unsigned char *const Ptr, offset_type NumEntries)
+        : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries) {}
+    iterator_base()
+        : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0) {}
 
-    friend bool operator==(const key_iterator &X, const key_iterator &Y) {
+    friend bool operator==(const iterator_base &X, const iterator_base &Y) {
       return X.NumEntriesLeft == Y.NumEntriesLeft;
     }
-    friend bool operator!=(const key_iterator &X, const key_iterator &Y) {
+    friend bool operator!=(const iterator_base &X, const iterator_base &Y) {
       return X.NumEntriesLeft != Y.NumEntriesLeft;
     }
 
-    key_iterator &operator++() { // Preincrement
+    /// Move to the next item.
+    void advance() {
       using namespace llvm::support;
       if (!NumItemsInBucketLeft) {
         // 'Items' starts with a 16-bit unsigned integer representing the
@@ -435,25 +463,58 @@ public:
       --NumItemsInBucketLeft;
       assert(NumEntriesLeft);
       --NumEntriesLeft;
+    }
+
+    /// Get the start of the item as written by the trait (after the hash and
+    /// immediately before the key and value length).
+    const unsigned char *getItem() const {
+      return Ptr + (NumItemsInBucketLeft ? 0 : 2) + sizeof(hash_value_type);
+    }
+  };
+
+public:
+  OnDiskIterableChainedHashTable(offset_type NumBuckets, offset_type NumEntries,
+                                 const unsigned char *Buckets,
+                                 const unsigned char *Payload,
+                                 const unsigned char *Base,
+                                 const Info &InfoObj = Info())
+      : base_type(NumBuckets, NumEntries, Buckets, Base, InfoObj),
+        Payload(Payload) {}
+
+  /// \brief Iterates over all of the keys in the table.
+  class key_iterator : public iterator_base {
+    Info *InfoObj;
+
+  public:
+    typedef external_key_type value_type;
+
+    key_iterator(const unsigned char *const Ptr, offset_type NumEntries,
+                 Info *InfoObj)
+        : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
+    key_iterator() : iterator_base(), InfoObj() {}
+
+    key_iterator &operator++() {
+      this->advance();
       return *this;
     }
     key_iterator operator++(int) { // Postincrement
-      key_iterator tmp = *this; ++*this; return tmp;
+      key_iterator tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    internal_key_type getInternalKey() const {
+      auto *LocalPtr = this->getItem();
+
+      // Determine the length of the key and the data.
+      auto L = Info::ReadKeyDataLength(LocalPtr);
+
+      // Read the key.
+      return InfoObj->ReadKey(LocalPtr, L.first);
     }
 
     value_type operator*() const {
-      const unsigned char *LocalPtr = Ptr;
-      if (!NumItemsInBucketLeft)
-        LocalPtr += 2; // number of items in bucket
-      LocalPtr += sizeof(hash_value_type); // Skip the hash.
-
-      // Determine the length of the key and the data.
-      const std::pair<offset_type, offset_type> &L =
-          Info::ReadKeyDataLength(LocalPtr);
-
-      // Read the key.
-      const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
-      return InfoObj->GetExternalKey(Key);
+      return InfoObj->GetExternalKey(getInternalKey());
     }
   };
 
@@ -467,10 +528,7 @@ public:
   }
 
   /// \brief Iterates over all the entries in the table, returning the data.
-  class data_iterator {
-    const unsigned char *Ptr;
-    offset_type NumItemsInBucketLeft;
-    offset_type NumEntriesLeft;
+  class data_iterator : public iterator_base {
     Info *InfoObj;
 
   public:
@@ -478,51 +536,24 @@ public:
 
     data_iterator(const unsigned char *const Ptr, offset_type NumEntries,
                   Info *InfoObj)
-        : Ptr(Ptr), NumItemsInBucketLeft(0), NumEntriesLeft(NumEntries),
-          InfoObj(InfoObj) {}
-    data_iterator()
-        : Ptr(nullptr), NumItemsInBucketLeft(0), NumEntriesLeft(0),
-          InfoObj(nullptr) {}
-
-    bool operator==(const data_iterator &X) const {
-      return X.NumEntriesLeft == NumEntriesLeft;
-    }
-    bool operator!=(const data_iterator &X) const {
-      return X.NumEntriesLeft != NumEntriesLeft;
-    }
+        : iterator_base(Ptr, NumEntries), InfoObj(InfoObj) {}
+    data_iterator() : iterator_base(), InfoObj() {}
 
     data_iterator &operator++() { // Preincrement
-      using namespace llvm::support;
-      if (!NumItemsInBucketLeft) {
-        // 'Items' starts with a 16-bit unsigned integer representing the
-        // number of items in this bucket.
-        NumItemsInBucketLeft =
-            endian::readNext<uint16_t, little, unaligned>(Ptr);
-      }
-      Ptr += sizeof(hash_value_type); // Skip the hash.
-      // Determine the length of the key and the data.
-      const std::pair<offset_type, offset_type> &L =
-          Info::ReadKeyDataLength(Ptr);
-      Ptr += L.first + L.second;
-      assert(NumItemsInBucketLeft);
-      --NumItemsInBucketLeft;
-      assert(NumEntriesLeft);
-      --NumEntriesLeft;
+      this->advance();
       return *this;
     }
     data_iterator operator++(int) { // Postincrement
-      data_iterator tmp = *this; ++*this; return tmp;
+      data_iterator tmp = *this;
+      ++*this;
+      return tmp;
     }
 
     value_type operator*() const {
-      const unsigned char *LocalPtr = Ptr;
-      if (!NumItemsInBucketLeft)
-        LocalPtr += 2; // number of items in bucket
-      LocalPtr += sizeof(hash_value_type); // Skip the hash.
+      auto *LocalPtr = this->getItem();
 
       // Determine the length of the key and the data.
-      const std::pair<offset_type, offset_type> &L =
-          Info::ReadKeyDataLength(LocalPtr);
+      auto L = Info::ReadKeyDataLength(LocalPtr);
 
       // Read the key.
       const internal_key_type &Key = InfoObj->ReadKey(LocalPtr, L.first);
@@ -555,17 +586,12 @@ public:
   static OnDiskIterableChainedHashTable *
   Create(const unsigned char *Buckets, const unsigned char *const Payload,
          const unsigned char *const Base, const Info &InfoObj = Info()) {
-    using namespace llvm::support;
     assert(Buckets > Base);
-    assert((reinterpret_cast<uintptr_t>(Buckets) & 0x3) == 0 &&
-           "buckets should be 4-byte aligned.");
-
-    offset_type NumBuckets =
-        endian::readNext<offset_type, little, aligned>(Buckets);
-    offset_type NumEntries =
-        endian::readNext<offset_type, little, aligned>(Buckets);
+    auto NumBucketsAndEntries =
+        OnDiskIterableChainedHashTable<Info>::readNumBucketsAndEntries(Buckets);
     return new OnDiskIterableChainedHashTable<Info>(
-        NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj);
+        NumBucketsAndEntries.first, NumBucketsAndEntries.second,
+        Buckets, Payload, Base, InfoObj);
   }
 };
 
diff --git a/include/llvm/Support/Options.h b/include/llvm/Support/Options.h
index 2742d3907c62..7b61b2308f57 100644
--- a/include/llvm/Support/Options.h
+++ b/include/llvm/Support/Options.h
@@ -71,7 +71,7 @@ private:
   /// \param Key unique key for option
   /// \param O option to map to \p Key
   ///
-  /// Allocated cl::Options are owened by the OptionRegistry and are deallocated
+  /// Allocated cl::Options are owned by the OptionRegistry and are deallocated
   /// on destruction or removal
   void addOption(void *Key, cl::Option *O);
 
@@ -91,7 +91,7 @@ public:
   /// Options are keyed off the template parameters to generate unique static
   /// characters. The template parameters are (1) the type of the data the
   /// option stores (\p ValT), the class that will read the option (\p Base),
-  /// and the memeber that the class will store the data into (\p Mem).
+  /// and the member that the class will store the data into (\p Mem).
   template <typename ValT, typename Base, ValT(Base::*Mem)>
   static void registerOption(const char *ArgStr, const char *Desc,
                              const ValT &InitValue) {
diff --git a/include/llvm/Support/OutputBuffer.h b/include/llvm/Support/OutputBuffer.h
deleted file mode 100644
index 6b98e99e28e0..000000000000
--- a/include/llvm/Support/OutputBuffer.h
+++ /dev/null
@@ -1,166 +0,0 @@
-//=== OutputBuffer.h - Output Buffer ----------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Methods to output values to a data buffer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_OUTPUTBUFFER_H
-#define LLVM_SUPPORT_OUTPUTBUFFER_H
-
-#include <cassert>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-  class OutputBuffer {
-    /// Output buffer.
-    std::vector<unsigned char> &Output;
-
-    /// is64Bit/isLittleEndian - This information is inferred from the target
-    /// machine directly, indicating what header values and flags to set.
-    bool is64Bit, isLittleEndian;
-  public:
-    OutputBuffer(std::vector<unsigned char> &Out,
-                 bool is64bit, bool le)
-      : Output(Out), is64Bit(is64bit), isLittleEndian(le) {}
-
-    // align - Emit padding into the file until the current output position is
-    // aligned to the specified power of two boundary.
-    void align(unsigned Boundary) {
-      assert(Boundary && (Boundary & (Boundary - 1)) == 0 &&
-             "Must align to 2^k boundary");
-      size_t Size = Output.size();
-
-      if (Size & (Boundary - 1)) {
-        // Add padding to get alignment to the correct place.
-        size_t Pad = Boundary - (Size & (Boundary - 1));
-        Output.resize(Size + Pad);
-      }
-    }
-
-    //===------------------------------------------------------------------===//
-    // Out Functions - Output the specified value to the data buffer.
-
-    void outbyte(unsigned char X) {
-      Output.push_back(X);
-    }
-    void outhalf(unsigned short X) {
-      if (isLittleEndian) {
-        Output.push_back(X & 255);
-        Output.push_back(X >> 8);
-      } else {
-        Output.push_back(X >> 8);
-        Output.push_back(X & 255);
-      }
-    }
-    void outword(unsigned X) {
-      if (isLittleEndian) {
-        Output.push_back((X >>  0) & 255);
-        Output.push_back((X >>  8) & 255);
-        Output.push_back((X >> 16) & 255);
-        Output.push_back((X >> 24) & 255);
-      } else {
-        Output.push_back((X >> 24) & 255);
-        Output.push_back((X >> 16) & 255);
-        Output.push_back((X >>  8) & 255);
-        Output.push_back((X >>  0) & 255);
-      }
-    }
-    void outxword(uint64_t X) {
-      if (isLittleEndian) {
-        Output.push_back(unsigned(X >>  0) & 255);
-        Output.push_back(unsigned(X >>  8) & 255);
-        Output.push_back(unsigned(X >> 16) & 255);
-        Output.push_back(unsigned(X >> 24) & 255);
-        Output.push_back(unsigned(X >> 32) & 255);
-        Output.push_back(unsigned(X >> 40) & 255);
-        Output.push_back(unsigned(X >> 48) & 255);
-        Output.push_back(unsigned(X >> 56) & 255);
-      } else {
-        Output.push_back(unsigned(X >> 56) & 255);
-        Output.push_back(unsigned(X >> 48) & 255);
-        Output.push_back(unsigned(X >> 40) & 255);
-        Output.push_back(unsigned(X >> 32) & 255);
-        Output.push_back(unsigned(X >> 24) & 255);
-        Output.push_back(unsigned(X >> 16) & 255);
-        Output.push_back(unsigned(X >>  8) & 255);
-        Output.push_back(unsigned(X >>  0) & 255);
-      }
-    }
-    void outaddr32(unsigned X) {
-      outword(X);
-    }
-    void outaddr64(uint64_t X) {
-      outxword(X);
-    }
-    void outaddr(uint64_t X) {
-      if (!is64Bit)
-        outword((unsigned)X);
-      else
-        outxword(X);
-    }
-    void outstring(const std::string &S, unsigned Length) {
-      unsigned len_to_copy = static_cast<unsigned>(S.length()) < Length
-        ? static_cast<unsigned>(S.length()) : Length;
-      unsigned len_to_fill = static_cast<unsigned>(S.length()) < Length
-        ? Length - static_cast<unsigned>(S.length()) : 0;
-
-      for (unsigned i = 0; i < len_to_copy; ++i)
-        outbyte(S[i]);
-
-      for (unsigned i = 0; i < len_to_fill; ++i)
-        outbyte(0);
-    }
-
-    //===------------------------------------------------------------------===//
-    // Fix Functions - Replace an existing entry at an offset.
-
-    void fixhalf(unsigned short X, unsigned Offset) {
-      unsigned char *P = &Output[Offset];
-      P[0] = (X >> (isLittleEndian ?  0 : 8)) & 255;
-      P[1] = (X >> (isLittleEndian ?  8 : 0)) & 255;
-    }
-    void fixword(unsigned X, unsigned Offset) {
-      unsigned char *P = &Output[Offset];
-      P[0] = (X >> (isLittleEndian ?  0 : 24)) & 255;
-      P[1] = (X >> (isLittleEndian ?  8 : 16)) & 255;
-      P[2] = (X >> (isLittleEndian ? 16 :  8)) & 255;
-      P[3] = (X >> (isLittleEndian ? 24 :  0)) & 255;
-    }
-    void fixxword(uint64_t X, unsigned Offset) {
-      unsigned char *P = &Output[Offset];
-      P[0] = (X >> (isLittleEndian ?  0 : 56)) & 255;
-      P[1] = (X >> (isLittleEndian ?  8 : 48)) & 255;
-      P[2] = (X >> (isLittleEndian ? 16 : 40)) & 255;
-      P[3] = (X >> (isLittleEndian ? 24 : 32)) & 255;
-      P[4] = (X >> (isLittleEndian ? 32 : 24)) & 255;
-      P[5] = (X >> (isLittleEndian ? 40 : 16)) & 255;
-      P[6] = (X >> (isLittleEndian ? 48 :  8)) & 255;
-      P[7] = (X >> (isLittleEndian ? 56 :  0)) & 255;
-    }
-    void fixaddr(uint64_t X, unsigned Offset) {
-      if (!is64Bit)
-        fixword((unsigned)X, Offset);
-      else
-        fixxword(X, Offset);
-    }
-
-    unsigned char &operator[](unsigned Index) {
-      return Output[Index];
-    }
-    const unsigned char &operator[](unsigned Index) const {
-      return Output[Index];
-    }
-  };
-
-} // end llvm namespace
-
-#endif // LLVM_SUPPORT_OUTPUTBUFFER_H
diff --git a/include/llvm/Support/Path.h b/include/llvm/Support/Path.h
index 8fae853e2cf4..955cc991d9b7 100644
--- a/include/llvm/Support/Path.h
+++ b/include/llvm/Support/Path.h
@@ -61,7 +61,6 @@ public:
   reference operator*() const { return Component; }
   pointer   operator->() const { return &Component; }
   const_iterator &operator++();    // preincrement
-  const_iterator &operator++(int); // postincrement
   bool operator==(const const_iterator &RHS) const;
   bool operator!=(const const_iterator &RHS) const { return !(*this == RHS); }
 
@@ -87,7 +86,6 @@ public:
   reference operator*() const { return Component; }
   pointer   operator->() const { return &Component; }
   reverse_iterator &operator++();    // preincrement
-  reverse_iterator &operator++(int); // postincrement
   bool operator==(const reverse_iterator &RHS) const;
   bool operator!=(const reverse_iterator &RHS) const { return !(*this == RHS); }
 };
@@ -218,7 +216,7 @@ StringRef root_name(StringRef path);
 /// @result The root directory of \a path if it has one, otherwise
 ///               "".
 StringRef root_directory(StringRef path);
-  
+
 /// @brief Get root path.
 ///
 /// Equivalent to root_name + root_directory.
@@ -310,7 +308,7 @@ bool is_separator(char value);
 /// @result StringRef of the preferred separator, null-terminated.
 StringRef get_separator();
 
-/// @brief Get the typical temporary directory for the system, e.g., 
+/// @brief Get the typical temporary directory for the system, e.g.,
 /// "/var/tmp" or "C:/TEMP"
 ///
 /// @param erasedOnReboot Whether to favor a path that is erased on reboot
@@ -327,6 +325,22 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result);
 /// @result True if a home directory is set, false otherwise.
 bool home_directory(SmallVectorImpl<char> &result);
 
+/// @brief Get the user's cache directory.
+///
+/// Expect the resulting path to be a directory shared with other
+/// applications/services used by the user. Params \p Path1 to \p Path3 can be
+/// used to append additional directory names to the resulting path. Recommended
+/// pattern is <user_cache_directory>/<vendor>/<application>.
+///
+/// @param Result Holds the resulting path.
+/// @param Path1 Additional path to be appended to the user's cache directory
+/// path. "" can be used to append nothing.
+/// @param Path2 Second additional path to be appended.
+/// @param Path3 Third additional path to be appended.
+/// @result True if a cache directory path is set, false otherwise.
+bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
+                          const Twine &Path2 = "", const Twine &Path3 = "");
+
 /// @brief Has root name?
 ///
 /// root_name != ""
@@ -403,6 +417,19 @@ bool is_absolute(const Twine &path);
 /// @result True if the path is relative, false if it is not.
 bool is_relative(const Twine &path);
 
+/// @brief Remove redundant leading "./" pieces and consecutive separators.
+///
+/// @param path Input path.
+/// @result The cleaned-up \a path.
+StringRef remove_leading_dotslash(StringRef path);
+
+/// @brief In-place remove any './' and optionally '../' components from a path.
+///
+/// @param path processed path
+/// @param remove_dot_dot specify if '../' should be removed
+/// @result True if path was changed
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false);
+
 } // end namespace path
 } // end namespace sys
 } // end namespace llvm
diff --git a/include/llvm/Support/PointerLikeTypeTraits.h b/include/llvm/Support/PointerLikeTypeTraits.h
index 837082139214..c12d237b2796 100644
--- a/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/include/llvm/Support/PointerLikeTypeTraits.h
@@ -15,59 +15,70 @@
 #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 
+#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-  
-/// PointerLikeTypeTraits - This is a traits object that is used to handle
-/// pointer types and things that are just wrappers for pointers as a uniform
-/// entity.
-template <typename T>
-class PointerLikeTypeTraits {
+
+/// A traits type that is used to handle pointer types and things that are just
+/// wrappers for pointers as a uniform entity.
+template <typename T> class PointerLikeTypeTraits {
   // getAsVoidPointer
   // getFromVoidPointer
   // getNumLowBitsAvailable
 };
 
+namespace detail {
+/// A tiny meta function to compute the log2 of a compile time constant.
+template <size_t N>
+struct ConstantLog2
+    : std::integral_constant<size_t, ConstantLog2<N / 2>::value + 1> {};
+template <> struct ConstantLog2<1> : std::integral_constant<size_t, 0> {};
+}
+
 // Provide PointerLikeTypeTraits for non-cvr pointers.
-template<typename T>
-class PointerLikeTypeTraits<T*> {
-public:
-  static inline void *getAsVoidPointer(T* P) { return P; }
-  static inline T *getFromVoidPointer(void *P) {
-    return static_cast<T*>(P);
-  }
-  
-  /// Note, we assume here that malloc returns objects at least 4-byte aligned.
-  /// However, this may be wrong, or pointers may be from something other than
-  /// malloc.  In this case, you should specialize this template to reduce this.
+template <typename T> struct PointerLikeTypeTraits<T *> {
+  static inline void *getAsVoidPointer(T *P) { return P; }
+  static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
+
+  enum {
+    NumLowBitsAvailable = detail::ConstantLog2<AlignOf<T>::Alignment>::value
+  };
+};
+
+template <> struct PointerLikeTypeTraits<void *> {
+  static inline void *getAsVoidPointer(void *P) { return P; }
+  static inline void *getFromVoidPointer(void *P) { return P; }
+
+  /// Note, we assume here that void* is related to raw malloc'ed memory and
+  /// that malloc returns objects at least 4-byte aligned. However, this may be
+  /// wrong, or pointers may be from something other than malloc. In this case,
+  /// you should specify a real typed pointer or avoid this template.
   ///
   /// All clients should use assertions to do a run-time check to ensure that
   /// this is actually true.
   enum { NumLowBitsAvailable = 2 };
 };
-  
+
 // Provide PointerLikeTypeTraits for const pointers.
-template<typename T>
-class PointerLikeTypeTraits<const T*> {
-  typedef PointerLikeTypeTraits<T*> NonConst;
+template <typename T> class PointerLikeTypeTraits<const T *> {
+  typedef PointerLikeTypeTraits<T *> NonConst;
 
 public:
-  static inline const void *getAsVoidPointer(const T* P) {
-    return NonConst::getAsVoidPointer(const_cast<T*>(P));
+  static inline const void *getAsVoidPointer(const T *P) {
+    return NonConst::getAsVoidPointer(const_cast<T *>(P));
   }
   static inline const T *getFromVoidPointer(const void *P) {
-    return NonConst::getFromVoidPointer(const_cast<void*>(P));
+    return NonConst::getFromVoidPointer(const_cast<void *>(P));
   }
   enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
 };
 
 // Provide PointerLikeTypeTraits for uintptr_t.
-template<>
-class PointerLikeTypeTraits<uintptr_t> {
+template <> class PointerLikeTypeTraits<uintptr_t> {
 public:
   static inline void *getAsVoidPointer(uintptr_t P) {
-    return reinterpret_cast<void*>(P);
+    return reinterpret_cast<void *>(P);
   }
   static inline uintptr_t getFromVoidPointer(void *P) {
     return reinterpret_cast<uintptr_t>(P);
@@ -75,7 +86,7 @@ public:
   // No bits are available!
   enum { NumLowBitsAvailable = 0 };
 };
-  
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h
index 96afb60d8e51..027f9433969d 100644
--- a/include/llvm/Support/PrettyStackTrace.h
+++ b/include/llvm/Support/PrettyStackTrace.h
@@ -66,6 +66,18 @@ namespace llvm {
     void print(raw_ostream &OS) const override;
   };
 
+  /// Returns the topmost element of the "pretty" stack state.
+  const void* SavePrettyStackState();
+
+  /// Restores the topmost element of the "pretty" stack state to State, which
+  /// should come from a previous call to SavePrettyStackState().  This is
+  /// useful when using a CrashRecoveryContext in code that also uses
+  /// PrettyStackTraceEntries, to make sure the stack that's printed if a crash
+  /// happens after a crash that's been recovered by CrashRecoveryContext
+  /// doesn't have frames on it that were added in code unwound by the
+  /// CrashRecoveryContext.
+  void RestorePrettyStackState(const void* State);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h
new file mode 100644
index 000000000000..5c1b8d5070d4
--- /dev/null
+++ b/include/llvm/Support/Printable.h
@@ -0,0 +1,52 @@
+//===--- Printable.h - Print function helpers -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the Printable struct.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PRINTABLE_H
+#define LLVM_SUPPORT_PRINTABLE_H
+
+#include <functional>
+
+namespace llvm {
+
+class raw_ostream;
+
+/// Simple wrapper around std::function<void(raw_ostream&)>.
+/// This class is usefull to construct print helpers for raw_ostream.
+///
+/// Example:
+///     Printable PrintRegister(unsigned Register) {
+///       return Printable([Register](raw_ostream &OS) {
+///         OS << getRegisterName(Register);
+///       }
+///     }
+///     ... OS << PrintRegister(Register); ...
+///
+/// Implementation note: Ideally this would just be a typedef, but doing so
+/// leads to operator << being ambiguous as function has matching constructors
+/// in some STL versions. I have seen the problem on gcc 4.6 libstdc++ and
+/// microsoft STL.
+class Printable {
+public:
+  std::function<void(raw_ostream &OS)> Print;
+  Printable(const std::function<void(raw_ostream &OS)> Print)
+    : Print(Print) {}
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) {
+  P.Print(OS);
+  return OS;
+}
+
+}
+
+#endif
diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h
index b89a0f73ec68..43302101e3e0 100644
--- a/include/llvm/Support/Program.h
+++ b/include/llvm/Support/Program.h
@@ -67,8 +67,7 @@ struct ProcessInfo {
   /// \returns The fully qualified path to the first \p Name in \p Paths if it
   ///   exists. \p Name if \p Name has slashes in it. Otherwise an error.
   ErrorOr<std::string>
-  findProgramByName(StringRef Name,
-                    ArrayRef<StringRef> Paths = ArrayRef<StringRef>());
+  findProgramByName(StringRef Name, ArrayRef<StringRef> Paths = None);
 
   // These functions change the specified standard stream (stdin or stdout) to
   // binary mode. They return errc::success if the specified stream
diff --git a/include/llvm/Support/Recycler.h b/include/llvm/Support/Recycler.h
index e97f36a735fd..a38050d81903 100644
--- a/include/llvm/Support/Recycler.h
+++ b/include/llvm/Support/Recycler.h
@@ -28,53 +28,36 @@ namespace llvm {
 ///
 void PrintRecyclerStats(size_t Size, size_t Align, size_t FreeListSize);
 
-/// RecyclerStruct - Implementation detail for Recycler. This is a
-/// class that the recycler imposes on free'd memory to carve out
-/// next/prev pointers.
-struct RecyclerStruct {
-  RecyclerStruct *Prev, *Next;
-};
-
-template<>
-struct ilist_traits<RecyclerStruct> :
-    public ilist_default_traits<RecyclerStruct> {
-  static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; }
-  static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; }
-  static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; }
-  static void setNext(RecyclerStruct *t, RecyclerStruct *n) { t->Next = n; }
-
-  mutable RecyclerStruct Sentinel;
-  RecyclerStruct *createSentinel() const {
-    return &Sentinel;
-  }
-  static void destroySentinel(RecyclerStruct *) {}
-
-  RecyclerStruct *provideInitialHead() const { return createSentinel(); }
-  RecyclerStruct *ensureHead(RecyclerStruct*) const { return createSentinel(); }
-  static void noteHead(RecyclerStruct*, RecyclerStruct*) {}
-
-  static void deleteNode(RecyclerStruct *) {
-    llvm_unreachable("Recycler's ilist_traits shouldn't see a deleteNode call!");
-  }
-};
-
 /// Recycler - This class manages a linked-list of deallocated nodes
 /// and facilitates reusing deallocated memory in place of allocating
 /// new memory.
 ///
 template<class T, size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
 class Recycler {
-  /// FreeList - Doubly-linked list of nodes that have deleted contents and
-  /// are not in active use.
-  ///
-  iplist<RecyclerStruct> FreeList;
+  struct FreeNode {
+    FreeNode *Next;
+  };
+
+  /// List of nodes that have deleted contents and are not in active use.
+  FreeNode *FreeList = nullptr;
+
+  FreeNode *pop_val() {
+    auto *Val = FreeList;
+    FreeList = FreeList->Next;
+    return Val;
+  }
+
+  void push(FreeNode *N) {
+    N->Next = FreeList;
+    FreeList = N;
+  }
 
 public:
   ~Recycler() {
     // If this fails, either the callee has lost track of some allocation,
     // or the callee isn't tracking allocations and should just call
     // clear() before deleting the Recycler.
-    assert(FreeList.empty() && "Non-empty recycler deleted!");
+    assert(!FreeList && "Non-empty recycler deleted!");
   }
 
   /// clear - Release all the tracked allocations to the allocator. The
@@ -82,8 +65,8 @@ public:
   /// deleted; calling clear is one way to ensure this.
   template<class AllocatorType>
   void clear(AllocatorType &Allocator) {
-    while (!FreeList.empty()) {
-      T *t = reinterpret_cast<T *>(FreeList.remove(FreeList.begin()));
+    while (FreeList) {
+      T *t = reinterpret_cast<T *>(pop_val());
       Allocator.Deallocate(t);
     }
   }
@@ -93,9 +76,7 @@ public:
   ///
   /// There is no need to traverse the free list, pulling all the objects into
   /// cache.
-  void clear(BumpPtrAllocator&) {
-    FreeList.clearAndLeakNodesUnsafely();
-  }
+  void clear(BumpPtrAllocator &) { FreeList = nullptr; }
 
   template<class SubClass, class AllocatorType>
   SubClass *Allocate(AllocatorType &Allocator) {
@@ -103,9 +84,8 @@ public:
                   "Recycler allocation alignment is less than object align!");
     static_assert(sizeof(SubClass) <= Size,
                   "Recycler allocation size is less than object size!");
-    return !FreeList.empty() ?
-           reinterpret_cast<SubClass *>(FreeList.remove(FreeList.begin())) :
-           static_cast<SubClass *>(Allocator.Allocate(Size, Align));
+    return FreeList ? reinterpret_cast<SubClass *>(pop_val())
+                    : static_cast<SubClass *>(Allocator.Allocate(Size, Align));
   }
 
   template<class AllocatorType>
@@ -115,14 +95,20 @@ public:
 
   template<class SubClass, class AllocatorType>
   void Deallocate(AllocatorType & /*Allocator*/, SubClass* Element) {
-    FreeList.push_front(reinterpret_cast<RecyclerStruct *>(Element));
+    push(reinterpret_cast<FreeNode *>(Element));
   }
 
-  void PrintStats() {
-    PrintRecyclerStats(Size, Align, FreeList.size());
-  }
+  void PrintStats();
 };
 
+template <class T, size_t Size, size_t Align>
+void Recycler<T, Size, Align>::PrintStats() {
+  size_t S = 0;
+  for (auto *I = FreeList; I; I = I->Next)
+    ++S;
+  PrintRecyclerStats(Size, Align, S);
+}
+
 }
 
 #endif
diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h
index 95c4e96f7f29..bbea97b289a6 100644
--- a/include/llvm/Support/Registry.h
+++ b/include/llvm/Support/Registry.h
@@ -37,7 +37,6 @@ namespace llvm {
     std::unique_ptr<T> instantiate() const { return Ctor(); }
   };
 
-
   /// Traits for registry entries. If using other than SimpleRegistryEntry, it
   /// is necessary to define an alternate traits class.
   template <typename T>
@@ -53,7 +52,6 @@ namespace llvm {
     static const char *descof(const entry &Entry) { return Entry.getDesc(); }
   };
 
-
   /// A global registry used in conjunction with static constructors to make
   /// pluggable components (like targets or garbage collectors) "just work" when
   /// linked with an executable.
@@ -102,7 +100,6 @@ namespace llvm {
       }
     };
 
-
     /// Iterators for registry entries.
     ///
     class iterator {
@@ -122,10 +119,9 @@ namespace llvm {
     static iterator end()   { return iterator(nullptr); }
 
     static iterator_range<iterator> entries() {
-      return iterator_range<iterator>(begin(), end());
+      return make_range(begin(), end());
     }
 
-
     /// Abstract base class for registry listeners, which are informed when new
     /// entries are added to the registry. Simply subclass and instantiate:
     ///
@@ -160,7 +156,7 @@ namespace llvm {
       }
 
     public:
-      listener() : Prev(ListenerTail), Next(0) {
+      listener() : Prev(ListenerTail), Next(nullptr) {
         if (Prev)
           Prev->Next = this;
         else
@@ -180,7 +176,6 @@ namespace llvm {
       }
     };
 
-
     /// A static registration template. Use like such:
     ///
     ///   Registry<Collector>::Add<FancyGC>
@@ -210,7 +205,6 @@ namespace llvm {
     };
 
     /// Registry::Parser now lives in llvm/Support/RegistryParser.h.
-
   };
 
   // Since these are defined in a header file, plugins must be sure to export
@@ -228,6 +222,6 @@ namespace llvm {
   template <typename T, typename U>
   typename Registry<T,U>::listener *Registry<T,U>::ListenerTail;
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_REGISTRY_H
diff --git a/include/llvm/Support/SMLoc.h b/include/llvm/Support/SMLoc.h
index d5b4c57a8fd6..c6e9a14e82ac 100644
--- a/include/llvm/Support/SMLoc.h
+++ b/include/llvm/Support/SMLoc.h
@@ -22,6 +22,7 @@ namespace llvm {
 /// Represents a location in source code.
 class SMLoc {
   const char *Ptr;
+
 public:
   SMLoc() : Ptr(nullptr) {}
 
@@ -53,11 +54,10 @@ public:
     assert(Start.isValid() == End.isValid() &&
            "Start and end should either both be valid or both be invalid!");
   }
-  
+
   bool isValid() const { return Start.isValid(); }
 };
-  
+
 } // end namespace llvm
 
 #endif
-
diff --git a/include/llvm/Support/ScaledNumber.h b/include/llvm/Support/ScaledNumber.h
index 0a4262b7eec5..c6421efc8b49 100644
--- a/include/llvm/Support/ScaledNumber.h
+++ b/include/llvm/Support/ScaledNumber.h
@@ -282,7 +282,7 @@ int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale) {
 ///
 /// As a convenience, returns the matching scale.  If the output value of one
 /// number is zero, returns the scale of the other.  If both are zero, which
-/// scale is returned is unspecifed.
+/// scale is returned is unspecified.
 template <class DigitsT>
 int16_t matchScales(DigitsT &LDigits, int16_t &LScale, DigitsT &RDigits,
                     int16_t &RScale) {
@@ -334,7 +334,7 @@ std::pair<DigitsT, int16_t> getSum(DigitsT LDigits, int16_t LScale,
                                    DigitsT RDigits, int16_t RScale) {
   static_assert(!std::numeric_limits<DigitsT>::is_signed, "expected unsigned");
 
-  // Check inputs up front.  This is only relevent if addition overflows, but
+  // Check inputs up front.  This is only relevant if addition overflows, but
   // testing here should catch more bugs.
   assert(LScale < INT16_MAX && "scale too large");
   assert(RScale < INT16_MAX && "scale too large");
diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h
index 7e165d7f3a42..2a4d84bd891a 100644
--- a/include/llvm/Support/Signals.h
+++ b/include/llvm/Support/Signals.h
@@ -47,6 +47,9 @@ namespace sys {
   /// \brief Print the stack trace using the given \c raw_ostream object.
   void PrintStackTrace(raw_ostream &OS);
 
+  // Run all registered signal handlers.
+  void RunSignalHandlers();
+
   /// AddSignalHandler - Add a function to be called when an abort/kill signal
   /// is delivered to the process.  The handler can have a cookie passed to it
   /// to identify what instance of the handler it is.
diff --git a/include/llvm/Support/StreamingMemoryObject.h b/include/llvm/Support/StreamingMemoryObject.h
index 7cb6438d1342..a5980c235946 100644
--- a/include/llvm/Support/StreamingMemoryObject.h
+++ b/include/llvm/Support/StreamingMemoryObject.h
@@ -50,8 +50,10 @@ public:
   /// starts (although it can be called anytime).
   void setKnownObjectSize(size_t size);
 
+  /// The number of bytes read at a time from the data streamer.
+  static const uint32_t kChunkSize = 4096 * 4;
+
 private:
-  const static uint32_t kChunkSize = 4096 * 4;
   mutable std::vector<unsigned char> Bytes;
   std::unique_ptr<DataStreamer> Streamer;
   mutable size_t BytesRead;   // Bytes read from stream
diff --git a/include/llvm/Support/StringSaver.h b/include/llvm/Support/StringSaver.h
index f3853ee91570..38fb7bb38339 100644
--- a/include/llvm/Support/StringSaver.h
+++ b/include/llvm/Support/StringSaver.h
@@ -18,25 +18,15 @@ namespace llvm {
 
 /// \brief Saves strings in the inheritor's stable storage and returns a stable
 /// raw character pointer.
-class StringSaver {
-protected:
-  ~StringSaver() {}
-  virtual const char *saveImpl(StringRef S);
+class StringSaver final {
+  BumpPtrAllocator &Alloc;
 
 public:
   StringSaver(BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
   const char *save(const char *S) { return save(StringRef(S)); }
-  const char *save(StringRef S) { return saveImpl(S); }
+  const char *save(StringRef S);
   const char *save(const Twine &S) { return save(StringRef(S.str())); }
   const char *save(std::string &S) { return save(StringRef(S)); }
-
-private:
-  BumpPtrAllocator &Alloc;
-};
-
-class BumpPtrStringSaver final : public StringSaver {
-public:
-  BumpPtrStringSaver(BumpPtrAllocator &Alloc) : StringSaver(Alloc) {}
 };
 }
 #endif
diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h
index dab724895e86..c21019d0c5b8 100644
--- a/include/llvm/Support/TargetParser.h
+++ b/include/llvm/Support/TargetParser.h
@@ -20,7 +20,7 @@
 #include <vector>
 
 namespace llvm {
-  class StringRef;
+class StringRef;
 
 // Target specific information into their own namespaces. These should be
 // generated from TableGen because the information is already there, and there
@@ -29,177 +29,117 @@ namespace llvm {
 // even if the back-end is not compiled with LLVM, plus we need to create a new
 // back-end to TableGen to create these clean tables.
 namespace ARM {
-  // FPU names.
-  enum FPUKind {
-    FK_INVALID = 0,
-    FK_NONE,
-    FK_VFP,
-    FK_VFPV2,
-    FK_VFPV3,
-    FK_VFPV3_FP16,
-    FK_VFPV3_D16,
-    FK_VFPV3_D16_FP16,
-    FK_VFPV3XD,
-    FK_VFPV3XD_FP16,
-    FK_VFPV4,
-    FK_VFPV4_D16,
-    FK_FPV4_SP_D16,
-    FK_FPV5_D16,
-    FK_FPV5_SP_D16,
-    FK_FP_ARMV8,
-    FK_NEON,
-    FK_NEON_FP16,
-    FK_NEON_VFPV4,
-    FK_NEON_FP_ARMV8,
-    FK_CRYPTO_NEON_FP_ARMV8,
-    FK_SOFTVFP,
-    FK_LAST
-  };
-
-  // FPU Version
-  enum FPUVersion {
-    FV_NONE = 0,
-    FV_VFPV2,
-    FV_VFPV3,
-    FV_VFPV3_FP16,
-    FV_VFPV4,
-    FV_VFPV5
-  };
-
-  // An FPU name implies one of three levels of Neon support:
-  enum NeonSupportLevel {
-    NS_None = 0, ///< No Neon
-    NS_Neon,     ///< Neon
-    NS_Crypto    ///< Neon with Crypto
-  };
-
-  // An FPU name restricts the FPU in one of three ways:
-  enum FPURestriction {
-    FR_None = 0, ///< No restriction
-    FR_D16,      ///< Only 16 D registers
-    FR_SP_D16    ///< Only single-precision instructions, with 16 D registers
-  };
-
-  // Arch names.
-  enum ArchKind {
-    AK_INVALID = 0,
-    AK_ARMV2,
-    AK_ARMV2A,
-    AK_ARMV3,
-    AK_ARMV3M,
-    AK_ARMV4,
-    AK_ARMV4T,
-    AK_ARMV5T,
-    AK_ARMV5TE,
-    AK_ARMV5TEJ,
-    AK_ARMV6,
-    AK_ARMV6K,
-    AK_ARMV6T2,
-    AK_ARMV6Z,
-    AK_ARMV6ZK,
-    AK_ARMV6M,
-    AK_ARMV6SM,
-    AK_ARMV7A,
-    AK_ARMV7R,
-    AK_ARMV7M,
-    AK_ARMV7EM,
-    AK_ARMV8A,
-    AK_ARMV8_1A,
-    // Non-standard Arch names.
-    AK_IWMMXT,
-    AK_IWMMXT2,
-    AK_XSCALE,
-    AK_ARMV5,
-    AK_ARMV5E,
-    AK_ARMV6J,
-    AK_ARMV6HL,
-    AK_ARMV7,
-    AK_ARMV7L,
-    AK_ARMV7HL,
-    AK_ARMV7S,
-    AK_LAST
-  };
-
-  // Arch extension modifiers for CPUs.
-  enum ArchExtKind {
-    AEK_INVALID = 0,
-    AEK_CRC,
-    AEK_CRYPTO,
-    AEK_FP,
-    AEK_HWDIV,
-    AEK_MP,
-    AEK_SIMD,
-    AEK_SEC,
-    AEK_VIRT,
-    // Unsupported extensions.
-    AEK_OS,
-    AEK_IWMMXT,
-    AEK_IWMMXT2,
-    AEK_MAVERICK,
-    AEK_XSCALE,
-    AEK_LAST
-  };
-
-  // ISA kinds.
-  enum ISAKind {
-    IK_INVALID = 0,
-    IK_ARM,
-    IK_THUMB,
-    IK_AARCH64
-  };
-
-  // Endianness
-  // FIXME: BE8 vs. BE32?
-  enum EndianKind {
-    EK_INVALID = 0,
-    EK_LITTLE,
-    EK_BIG
-  };
-
-  // v6/v7/v8 Profile
-  enum ProfileKind {
-    PK_INVALID = 0,
-    PK_A,
-    PK_R,
-    PK_M
-  };
-} // namespace ARM
-
-// Target Parsers, one per architecture.
-class ARMTargetParser {
-  static StringRef getFPUSynonym(StringRef FPU);
-  static StringRef getArchSynonym(StringRef Arch);
-
-public:
-  static StringRef getCanonicalArchName(StringRef Arch);
-
-  // Information by ID
-  static const char * getFPUName(unsigned FPUKind);
-  static     unsigned getFPUVersion(unsigned FPUKind);
-  static     unsigned getFPUNeonSupportLevel(unsigned FPUKind);
-  static     unsigned getFPURestriction(unsigned FPUKind);
-  // FIXME: This should be moved to TargetTuple once it exists
-  static       bool   getFPUFeatures(unsigned FPUKind,
-                                     std::vector<const char*> &Features);
-  static const char * getArchName(unsigned ArchKind);
-  static   unsigned   getArchAttr(unsigned ArchKind);
-  static const char * getCPUAttr(unsigned ArchKind);
-  static const char * getSubArch(unsigned ArchKind);
-  static const char * getArchExtName(unsigned ArchExtKind);
-  static const char * getDefaultCPU(StringRef Arch);
-
-  // Parser
-  static unsigned parseFPU(StringRef FPU);
-  static unsigned parseArch(StringRef Arch);
-  static unsigned parseArchExt(StringRef ArchExt);
-  static unsigned parseCPUArch(StringRef CPU);
-  static unsigned parseArchISA(StringRef Arch);
-  static unsigned parseArchEndian(StringRef Arch);
-  static unsigned parseArchProfile(StringRef Arch);
-  static unsigned parseArchVersion(StringRef Arch);
 
+// FPU names.
+enum FPUKind {
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) KIND,
+#include "ARMTargetParser.def"
+  FK_LAST
 };
 
+// FPU Version
+enum FPUVersion {
+  FV_NONE = 0,
+  FV_VFPV2,
+  FV_VFPV3,
+  FV_VFPV3_FP16,
+  FV_VFPV4,
+  FV_VFPV5
+};
+
+// An FPU name implies one of three levels of Neon support:
+enum NeonSupportLevel {
+  NS_None = 0, ///< No Neon
+  NS_Neon,     ///< Neon
+  NS_Crypto    ///< Neon with Crypto
+};
+
+// An FPU name restricts the FPU in one of three ways:
+enum FPURestriction {
+  FR_None = 0, ///< No restriction
+  FR_D16,      ///< Only 16 D registers
+  FR_SP_D16    ///< Only single-precision instructions, with 16 D registers
+};
+
+// Arch names.
+enum ArchKind {
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "ARMTargetParser.def"
+  AK_LAST
+};
+
+// Arch extension modifiers for CPUs.
+enum ArchExtKind : unsigned {
+  AEK_INVALID = 0x0,
+  AEK_NONE = 0x1,
+  AEK_CRC = 0x2,
+  AEK_CRYPTO = 0x4,
+  AEK_FP = 0x8,
+  AEK_HWDIV = 0x10,
+  AEK_HWDIVARM = 0x20,
+  AEK_MP = 0x40,
+  AEK_SIMD = 0x80,
+  AEK_SEC = 0x100,
+  AEK_VIRT = 0x200,
+  AEK_DSP = 0x400,
+  AEK_FP16 = 0x800,
+  // Unsupported extensions.
+  AEK_OS = 0x8000000,
+  AEK_IWMMXT = 0x10000000,
+  AEK_IWMMXT2 = 0x20000000,
+  AEK_MAVERICK = 0x40000000,
+  AEK_XSCALE = 0x80000000,
+};
+
+// ISA kinds.
+enum ISAKind { IK_INVALID = 0, IK_ARM, IK_THUMB, IK_AARCH64 };
+
+// Endianness
+// FIXME: BE8 vs. BE32?
+enum EndianKind { EK_INVALID = 0, EK_LITTLE, EK_BIG };
+
+// v6/v7/v8 Profile
+enum ProfileKind { PK_INVALID = 0, PK_A, PK_R, PK_M };
+
+StringRef getCanonicalArchName(StringRef Arch);
+
+// Information by ID
+StringRef getFPUName(unsigned FPUKind);
+unsigned getFPUVersion(unsigned FPUKind);
+unsigned getFPUNeonSupportLevel(unsigned FPUKind);
+unsigned getFPURestriction(unsigned FPUKind);
+
+// FIXME: These should be moved to TargetTuple once it exists
+bool getFPUFeatures(unsigned FPUKind, std::vector<const char *> &Features);
+bool getHWDivFeatures(unsigned HWDivKind, std::vector<const char *> &Features);
+bool getExtensionFeatures(unsigned Extensions,
+                                   std::vector<const char*> &Features);
+
+StringRef getArchName(unsigned ArchKind);
+unsigned getArchAttr(unsigned ArchKind);
+StringRef getCPUAttr(unsigned ArchKind);
+StringRef getSubArch(unsigned ArchKind);
+StringRef getArchExtName(unsigned ArchExtKind);
+const char *getArchExtFeature(StringRef ArchExt);
+StringRef getHWDivName(unsigned HWDivKind);
+
+// Information by Name
+unsigned  getDefaultFPU(StringRef CPU, unsigned ArchKind);
+unsigned  getDefaultExtensions(StringRef CPU, unsigned ArchKind);
+StringRef getDefaultCPU(StringRef Arch);
+
+// Parser
+unsigned parseHWDiv(StringRef HWDiv);
+unsigned parseFPU(StringRef FPU);
+unsigned parseArch(StringRef Arch);
+unsigned parseArchExt(StringRef ArchExt);
+unsigned parseCPUArch(StringRef CPU);
+unsigned parseArchISA(StringRef Arch);
+unsigned parseArchEndian(StringRef Arch);
+unsigned parseArchProfile(StringRef Arch);
+unsigned parseArchVersion(StringRef Arch);
+
+} // namespace ARM
 } // namespace llvm
 
 #endif
diff --git a/include/llvm/Support/TargetRegistry.h b/include/llvm/Support/TargetRegistry.h
index 40bf6fb20c9f..aec181b1d266 100644
--- a/include/llvm/Support/TargetRegistry.h
+++ b/include/llvm/Support/TargetRegistry.h
@@ -115,7 +115,7 @@ public:
                                               const MCRegisterInfo &MRI,
                                               const Triple &TT, StringRef CPU);
   typedef MCTargetAsmParser *(*MCAsmParserCtorTy)(
-      MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
+      const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
       const MCTargetOptions &Options);
   typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T,
                                                   const MCSubtargetInfo &STI,
@@ -141,7 +141,8 @@ public:
   typedef MCStreamer *(*COFFStreamerCtorTy)(MCContext &Ctx, MCAsmBackend &TAB,
                                             raw_pwrite_stream &OS,
                                             MCCodeEmitter *Emitter,
-                                            bool RelaxAll);
+                                            bool RelaxAll,
+                                            bool IncrementalLinkerCompatible);
   typedef MCTargetStreamer *(*NullTargetStreamerCtorTy)(MCStreamer &S);
   typedef MCTargetStreamer *(*AsmTargetStreamerCtorTy)(
       MCStreamer &S, formatted_raw_ostream &OS, MCInstPrinter *InstPrint,
@@ -382,7 +383,7 @@ public:
   ///
   /// \param Parser The target independent parser implementation to use for
   /// parsing and lexing.
-  MCTargetAsmParser *createMCAsmParser(MCSubtargetInfo &STI,
+  MCTargetAsmParser *createMCAsmParser(const MCSubtargetInfo &STI,
                                        MCAsmParser &Parser,
                                        const MCInstrInfo &MII,
                                        const MCTargetOptions &Options) const {
@@ -437,6 +438,7 @@ public:
                                      MCAsmBackend &TAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter,
                                      const MCSubtargetInfo &STI, bool RelaxAll,
+                                     bool IncrementalLinkerCompatible,
                                      bool DWARFMustBeAtTheEnd) const {
     MCStreamer *S;
     switch (T.getObjectFormat()) {
@@ -444,7 +446,8 @@ public:
       llvm_unreachable("Unknown object format");
     case Triple::COFF:
       assert(T.isOSWindows() && "only Windows COFF is supported");
-      S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll);
+      S = COFFStreamerCtorFn(Ctx, TAB, OS, Emitter, RelaxAll,
+                             IncrementalLinkerCompatible);
       break;
     case Triple::MachO:
       if (MachOStreamerCtorFn)
@@ -1133,8 +1136,8 @@ template <class MCAsmParserImpl> struct RegisterMCAsmParser {
   }
 
 private:
-  static MCTargetAsmParser *Allocator(MCSubtargetInfo &STI, MCAsmParser &P,
-                                      const MCInstrInfo &MII,
+  static MCTargetAsmParser *Allocator(const MCSubtargetInfo &STI,
+                                      MCAsmParser &P, const MCInstrInfo &MII,
                                       const MCTargetOptions &Options) {
     return new MCAsmParserImpl(STI, P, MII, Options);
   }
diff --git a/include/llvm/Support/TargetSelect.h b/include/llvm/Support/TargetSelect.h
index a86e953f00ea..582785cb69a5 100644
--- a/include/llvm/Support/TargetSelect.h
+++ b/include/llvm/Support/TargetSelect.h
@@ -25,11 +25,11 @@ extern "C" {
 
 #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
-  
+
   // Declare all of the target-MC-initialization functions that are available.
 #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetMC();
 #include "llvm/Config/Targets.def"
-  
+
   // Declare all of the available assembly printer initialization functions.
 #define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
@@ -54,7 +54,7 @@ namespace llvm {
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
 #include "llvm/Config/Targets.def"
   }
-  
+
   /// InitializeAllTargets - The main program should call this function if it
   /// wants access to all available target machines that LLVM is configured to
   /// support, to make them available via the TargetRegistry.
@@ -67,7 +67,7 @@ namespace llvm {
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   }
-  
+
   /// InitializeAllTargetMCs - The main program should call this function if it
   /// wants access to all available target MC that LLVM is configured to
   /// support, to make them available via the TargetRegistry.
@@ -77,7 +77,7 @@ namespace llvm {
 #define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetMC();
 #include "llvm/Config/Targets.def"
   }
-  
+
   /// InitializeAllAsmPrinters - The main program should call this function if
   /// it wants all asm printers that LLVM is configured to support, to make them
   /// available via the TargetRegistry.
@@ -87,7 +87,7 @@ namespace llvm {
 #define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
   }
-  
+
   /// InitializeAllAsmParsers - The main program should call this function if it
   /// wants all asm parsers that LLVM is configured to support, to make them
   /// available via the TargetRegistry.
@@ -97,7 +97,7 @@ namespace llvm {
 #define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
 #include "llvm/Config/AsmParsers.def"
   }
-  
+
   /// InitializeAllDisassemblers - The main program should call this function if
   /// it wants all disassemblers that LLVM is configured to support, to make
   /// them available via the TargetRegistry.
@@ -107,9 +107,9 @@ namespace llvm {
 #define LLVM_DISASSEMBLER(TargetName) LLVMInitialize##TargetName##Disassembler();
 #include "llvm/Config/Disassemblers.def"
   }
-  
+
   /// InitializeNativeTarget - The main program should call this function to
-  /// initialize the native target corresponding to the host.  This is useful 
+  /// initialize the native target corresponding to the host.  This is useful
   /// for JIT applications to ensure that the target gets linked in correctly.
   ///
   /// It is legal for a client to make multiple calls to this function.
@@ -123,7 +123,7 @@ namespace llvm {
 #else
     return true;
 #endif
-  }  
+  }
 
   /// InitializeNativeTargetAsmPrinter - The main program should call
   /// this function to initialize the native target asm printer.
@@ -135,7 +135,7 @@ namespace llvm {
 #else
     return true;
 #endif
-  }  
+  }
 
   /// InitializeNativeTargetAsmParser - The main program should call
   /// this function to initialize the native target asm parser.
@@ -147,7 +147,7 @@ namespace llvm {
 #else
     return true;
 #endif
-  }  
+  }
 
   /// InitializeNativeTargetDisassembler - The main program should call
   /// this function to initialize the native target disassembler.
@@ -159,8 +159,7 @@ namespace llvm {
 #else
     return true;
 #endif
-  }  
-
+  }
 }
 
 #endif
diff --git a/include/llvm/Support/ThreadPool.h b/include/llvm/Support/ThreadPool.h
new file mode 100644
index 000000000000..745334db4450
--- /dev/null
+++ b/include/llvm/Support/ThreadPool.h
@@ -0,0 +1,136 @@
+//===-- llvm/Support/ThreadPool.h - A ThreadPool implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a crude C++11 based thread pool.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_THREAD_POOL_H
+#define LLVM_SUPPORT_THREAD_POOL_H
+
+#include "llvm/Support/thread.h"
+
+#ifdef _MSC_VER
+// concrt.h depends on eh.h for __uncaught_exception declaration
+// even if we disable exceptions.
+#include <eh.h>
+
+// Disable warnings from ppltasks.h transitively included by <future>.
+#pragma warning(push)
+#pragma warning(disable:4530)
+#pragma warning(disable:4062)
+#endif
+
+#include <future>
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#include <condition_variable>
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <utility>
+
+namespace llvm {
+
+/// A ThreadPool for asynchronous parallel execution on a defined number of
+/// threads.
+///
+/// The pool keeps a vector of threads alive, waiting on a condition variable
+/// for some work to become available.
+class ThreadPool {
+public:
+#ifndef _MSC_VER
+  using VoidTy = void;
+  using TaskTy = std::function<void()>;
+  using PackagedTaskTy = std::packaged_task<void()>;
+#else
+  // MSVC 2013 has a bug and can't use std::packaged_task<void()>;
+  // We force it to use bool(bool) instead.
+  using VoidTy = bool;
+  using TaskTy = std::function<bool(bool)>;
+  using PackagedTaskTy = std::packaged_task<bool(bool)>;
+#endif
+
+  /// Construct a pool with the number of core available on the system (or
+  /// whatever the value returned by std::thread::hardware_concurrency() is).
+  ThreadPool();
+
+  /// Construct a pool of \p ThreadCount threads
+  ThreadPool(unsigned ThreadCount);
+
+  /// Blocking destructor: the pool will wait for all the threads to complete.
+  ~ThreadPool();
+
+  /// Asynchronous submission of a task to the pool. The returned future can be
+  /// used to wait for the task to finish and is *non-blocking* on destruction.
+  template <typename Function, typename... Args>
+  inline std::shared_future<VoidTy> async(Function &&F, Args &&... ArgList) {
+    auto Task =
+        std::bind(std::forward<Function>(F), std::forward<Args>(ArgList)...);
+#ifndef _MSC_VER
+    return asyncImpl(std::move(Task));
+#else
+    // This lambda has to be marked mutable because MSVC 2013's std::bind call
+    // operator isn't const qualified.
+    return asyncImpl([Task](VoidTy) mutable -> VoidTy {
+      Task();
+      return VoidTy();
+    });
+#endif
+  }
+
+  /// Asynchronous submission of a task to the pool. The returned future can be
+  /// used to wait for the task to finish and is *non-blocking* on destruction.
+  template <typename Function>
+  inline std::shared_future<VoidTy> async(Function &&F) {
+#ifndef _MSC_VER
+    return asyncImpl(std::forward<Function>(F));
+#else
+    return asyncImpl([F] (VoidTy) -> VoidTy { F(); return VoidTy(); });
+#endif
+  }
+
+  /// Blocking wait for all the threads to complete and the queue to be empty.
+  /// It is an error to try to add new tasks while blocking on this call.
+  void wait();
+
+private:
+  /// Asynchronous submission of a task to the pool. The returned future can be
+  /// used to wait for the task to finish and is *non-blocking* on destruction.
+  std::shared_future<VoidTy> asyncImpl(TaskTy F);
+
+  /// Threads in flight
+  std::vector<llvm::thread> Threads;
+
+  /// Tasks waiting for execution in the pool.
+  std::queue<PackagedTaskTy> Tasks;
+
+  /// Locking and signaling for accessing the Tasks queue.
+  std::mutex QueueLock;
+  std::condition_variable QueueCondition;
+
+  /// Locking and signaling for job completion
+  std::mutex CompletionLock;
+  std::condition_variable CompletionCondition;
+
+  /// Keep track of the number of thread actually busy
+  std::atomic<unsigned> ActiveThreads;
+
+#if LLVM_ENABLE_THREADS // avoids warning for unused variable
+  /// Signal for the destruction of the pool, asking thread to exit.
+  bool EnableFlag;
+#endif
+};
+}
+
+#endif // LLVM_SUPPORT_THREAD_POOL_H
diff --git a/include/llvm/Support/Threading.h b/include/llvm/Support/Threading.h
index 3cca1d6a9913..9007c132a99a 100644
--- a/include/llvm/Support/Threading.h
+++ b/include/llvm/Support/Threading.h
@@ -21,7 +21,7 @@ namespace llvm {
   bool llvm_is_multithreaded();
 
   /// llvm_execute_on_thread - Execute the given \p UserFn on a separate
-  /// thread, passing it the provided \p UserData and waits for thread 
+  /// thread, passing it the provided \p UserData and waits for thread
   /// completion.
   ///
   /// This function does not guarantee that the code will actually be executed
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index 2cd30e2aaf32..499fe7b7e70c 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -30,26 +30,25 @@ class TimeRecord {
   ssize_t MemUsed;       // Memory allocated (in bytes)
 public:
   TimeRecord() : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0) {}
-  
+
   /// getCurrentTime - Get the current time and memory usage.  If Start is true
   /// we get the memory usage before the time, otherwise we get time before
   /// memory usage.  This matters if the time to get the memory usage is
   /// significant and shouldn't be counted as part of a duration.
   static TimeRecord getCurrentTime(bool Start = true);
-  
-  double getProcessTime() const { return UserTime+SystemTime; }
+
+  double getProcessTime() const { return UserTime + SystemTime; }
   double getUserTime() const { return UserTime; }
   double getSystemTime() const { return SystemTime; }
   double getWallTime() const { return WallTime; }
   ssize_t getMemUsed() const { return MemUsed; }
-  
-  
+
   // operator< - Allow sorting.
   bool operator<(const TimeRecord &T) const {
     // Sort by Wall Time elapsed, as it is the only thing really accurate
     return WallTime < T.WallTime;
   }
-  
+
   void operator+=(const TimeRecord &RHS) {
     WallTime   += RHS.WallTime;
     UserTime   += RHS.UserTime;
@@ -62,12 +61,12 @@ public:
     SystemTime -= RHS.SystemTime;
     MemUsed    -= RHS.MemUsed;
   }
-  
-  /// print - Print the current timer to standard error, and reset the "Started"
-  /// flag.
+
+  /// Print the current time record to \p OS, with a breakdown showing
+  /// contributions to the \p Total time record.
   void print(const TimeRecord &Total, raw_ostream &OS) const;
 };
-  
+
 /// Timer - This class is used to track the amount of time spent between
 /// invocations of its startTimer()/stopTimer() methods.  Given appropriate OS
 /// support it can also keep track of the RSS of the program at various points.
@@ -77,11 +76,13 @@ public:
 /// if they are never started.
 ///
 class Timer {
-  TimeRecord Time;
+  TimeRecord Time;       // The total time captured
+  TimeRecord StartTime;  // The time startTimer() was last called
   std::string Name;      // The name of this time variable.
-  bool Started;          // Has this time variable ever been started?
+  bool Running;          // Is the timer currently running?
+  bool Triggered;        // Has the timer ever been triggered?
   TimerGroup *TG;        // The TimerGroup this Timer is in.
-  
+
   Timer **Prev, *Next;   // Doubly linked list of timers in the group.
 public:
   explicit Timer(StringRef N) : TG(nullptr) { init(N); }
@@ -99,25 +100,31 @@ public:
   explicit Timer() : TG(nullptr) {}
   void init(StringRef N);
   void init(StringRef N, TimerGroup &tg);
-  
+
   const std::string &getName() const { return Name; }
   bool isInitialized() const { return TG != nullptr; }
-  
-  /// startTimer - Start the timer running.  Time between calls to
-  /// startTimer/stopTimer is counted by the Timer class.  Note that these calls
-  /// must be correctly paired.
-  ///
+
+  /// Check if startTimer() has ever been called on this timer.
+  bool hasTriggered() const { return Triggered; }
+
+  /// Start the timer running.  Time between calls to startTimer/stopTimer is
+  /// counted by the Timer class.  Note that these calls must be correctly
+  /// paired.
   void startTimer();
 
-  /// stopTimer - Stop the timer.
-  ///
+  /// Stop the timer.
   void stopTimer();
 
+  /// Clear the timer state.
+  void clear();
+
+  /// Return the duration for which this timer has been running.
+  TimeRecord getTotalTime() const { return Time; }
+
 private:
   friend class TimerGroup;
 };
 
-
 /// The TimeRegion class is used as a helper class to call the startTimer() and
 /// stopTimer() methods of the Timer class.  When the object is constructed, it
 /// starts the timer specified as its argument.  When it is destroyed, it stops
@@ -126,6 +133,7 @@ private:
 class TimeRegion {
   Timer *T;
   TimeRegion(const TimeRegion &) = delete;
+
 public:
   explicit TimeRegion(Timer &t) : T(&t) {
     T->startTimer();
@@ -138,7 +146,6 @@ public:
   }
 };
 
-
 /// NamedRegionTimer - This class is basically a combination of TimeRegion and
 /// Timer.  It allows you to declare a new timer, AND specify the region to
 /// time, all in one statement.  All timers with the same name are merged.  This
@@ -151,7 +158,6 @@ struct NamedRegionTimer : public TimeRegion {
                             bool Enabled = true);
 };
 
-
 /// The TimerGroup class is used to group together related timers into a single
 /// report that is printed when the TimerGroup is destroyed.  It is illegal to
 /// destroy a TimerGroup object before all of the Timers in it are gone.  A
@@ -160,11 +166,12 @@ struct NamedRegionTimer : public TimeRegion {
 class TimerGroup {
   std::string Name;
   Timer *FirstTimer;   // First timer in the group.
-  std::vector<std::pair<TimeRecord, std::string> > TimersToPrint;
-  
+  std::vector<std::pair<TimeRecord, std::string>> TimersToPrint;
+
   TimerGroup **Prev, *Next; // Doubly linked list of TimerGroup's.
   TimerGroup(const TimerGroup &TG) = delete;
   void operator=(const TimerGroup &TG) = delete;
+
 public:
   explicit TimerGroup(StringRef name);
   ~TimerGroup();
@@ -173,10 +180,10 @@ public:
 
   /// print - Print any started timers in this group and zero them.
   void print(raw_ostream &OS);
-  
+
   /// printAll - This static method prints all timers and clears them all out.
   static void printAll(raw_ostream &OS);
-  
+
 private:
   friend class Timer;
   void addTimer(Timer &T);
diff --git a/include/llvm/Support/TrailingObjects.h b/include/llvm/Support/TrailingObjects.h
new file mode 100644
index 000000000000..8529746eeccc
--- /dev/null
+++ b/include/llvm/Support/TrailingObjects.h
@@ -0,0 +1,349 @@
+//===--- TrailingObjects.h - Variable-length classes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This header defines support for implementing classes that have
+/// some trailing object (or arrays of objects) appended to them. The
+/// main purpose is to make it obvious where this idiom is being used,
+/// and to make the usage more idiomatic and more difficult to get
+/// wrong.
+///
+/// The TrailingObject template abstracts away the reinterpret_cast,
+/// pointer arithmetic, and size calculations used for the allocation
+/// and access of appended arrays of objects, and takes care that they
+/// are all allocated at their required alignment. Additionally, it
+/// ensures that the base type is final -- deriving from a class that
+/// expects data appended immediately after it is typically not safe.
+///
+/// Users are expected to derive from this template, and provide
+/// numTrailingObjects implementations for each trailing type except
+/// the last, e.g. like this sample:
+///
+/// \code
+/// class VarLengthObj : private TrailingObjects<VarLengthObj, int, double> {
+///   friend TrailingObjects;
+///
+///   unsigned NumInts, NumDoubles;
+///   size_t numTrailingObjects(OverloadToken<int>) const { return NumInts; }
+///  };
+/// \endcode
+///
+/// You can access the appended arrays via 'getTrailingObjects', and
+/// determine the size needed for allocation via
+/// 'additionalSizeToAlloc' and 'totalSizeToAlloc'.
+///
+/// All the methods implemented by this class are are intended for use
+/// by the implementation of the class, not as part of its interface
+/// (thus, private inheritance is suggested).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TRAILINGOBJECTS_H
+#define LLVM_SUPPORT_TRAILINGOBJECTS_H
+
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/type_traits.h"
+#include <new>
+#include <type_traits>
+
+namespace llvm {
+
+namespace trailing_objects_internal {
+/// Helper template to calculate the max alignment requirement for a set of
+/// objects.
+template <typename First, typename... Rest> class AlignmentCalcHelper {
+private:
+  enum {
+    FirstAlignment = AlignOf<First>::Alignment,
+    RestAlignment = AlignmentCalcHelper<Rest...>::Alignment,
+  };
+
+public:
+  enum {
+    Alignment = FirstAlignment > RestAlignment ? FirstAlignment : RestAlignment
+  };
+};
+
+template <typename First> class AlignmentCalcHelper<First> {
+public:
+  enum { Alignment = AlignOf<First>::Alignment };
+};
+
+/// The base class for TrailingObjects* classes.
+class TrailingObjectsBase {
+protected:
+  /// OverloadToken's purpose is to allow specifying function overloads
+  /// for different types, without actually taking the types as
+  /// parameters. (Necessary because member function templates cannot
+  /// be specialized, so overloads must be used instead of
+  /// specialization.)
+  template <typename T> struct OverloadToken {};
+};
+
+/// This helper template works-around MSVC 2013's lack of useful
+/// alignas() support. The argument to LLVM_ALIGNAS(), in MSVC, is
+/// required to be a literal integer. But, you *can* use template
+/// specialization to select between a bunch of different LLVM_ALIGNAS
+/// expressions...
+template <int Align>
+class TrailingObjectsAligner : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(1) TrailingObjectsAligner<1> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(2) TrailingObjectsAligner<2> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(4) TrailingObjectsAligner<4> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(8) TrailingObjectsAligner<8> : public TrailingObjectsBase {};
+template <>
+class LLVM_ALIGNAS(16) TrailingObjectsAligner<16> : public TrailingObjectsBase {
+};
+template <>
+class LLVM_ALIGNAS(32) TrailingObjectsAligner<32> : public TrailingObjectsBase {
+};
+
+// Just a little helper for transforming a type pack into the same
+// number of a different type. e.g.:
+//   ExtractSecondType<Foo..., int>::type
+template <typename Ty1, typename Ty2> struct ExtractSecondType {
+  typedef Ty2 type;
+};
+
+// TrailingObjectsImpl is somewhat complicated, because it is a
+// recursively inheriting template, in order to handle the template
+// varargs. Each level of inheritance picks off a single trailing type
+// then recurses on the rest. The "Align", "BaseTy", and
+// "TopTrailingObj" arguments are passed through unchanged through the
+// recursion. "PrevTy" is, at each level, the type handled by the
+// level right above it.
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+          typename... MoreTys>
+struct TrailingObjectsImpl {
+  // The main template definition is never used -- the two
+  // specializations cover all possibilities.
+};
+
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
+          typename NextTy, typename... MoreTys>
+struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
+                           MoreTys...>
+    : public TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy,
+                                 MoreTys...> {
+
+  typedef TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy, MoreTys...>
+      ParentType;
+
+  // Ensure the methods we inherit are not hidden.
+  using ParentType::getTrailingObjectsImpl;
+  using ParentType::additionalSizeToAllocImpl;
+
+  static LLVM_CONSTEXPR bool requiresRealignment() {
+    return llvm::AlignOf<PrevTy>::Alignment < llvm::AlignOf<NextTy>::Alignment;
+  }
+
+  // These two functions are helper functions for
+  // TrailingObjects::getTrailingObjects. They recurse to the left --
+  // the result for each type in the list of trailing types depends on
+  // the result of calling the function on the type to the
+  // left. However, the function for the type to the left is
+  // implemented by a *subclass* of this class, so we invoke it via
+  // the TopTrailingObj, which is, via the
+  // curiously-recurring-template-pattern, the most-derived type in
+  // this recursion, and thus, contains all the overloads.
+  static const NextTy *
+  getTrailingObjectsImpl(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<NextTy>) {
+    auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+                TopTrailingObj::callNumTrailingObjects(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+    if (requiresRealignment())
+      return reinterpret_cast<const NextTy *>(
+          llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+    else
+      return reinterpret_cast<const NextTy *>(Ptr);
+  }
+
+  static NextTy *
+  getTrailingObjectsImpl(BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<NextTy>) {
+    auto *Ptr = TopTrailingObj::getTrailingObjectsImpl(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>()) +
+                TopTrailingObj::callNumTrailingObjects(
+                    Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
+
+    if (requiresRealignment())
+      return reinterpret_cast<NextTy *>(
+          llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+    else
+      return reinterpret_cast<NextTy *>(Ptr);
+  }
+
+  // Helper function for TrailingObjects::additionalSizeToAlloc: this
+  // function recurses to superclasses, each of which requires one
+  // fewer size_t argument, and adds its own size.
+  static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(
+      size_t SizeSoFar, size_t Count1,
+      typename ExtractSecondType<MoreTys, size_t>::type... MoreCounts) {
+    return additionalSizeToAllocImpl(
+        (requiresRealignment()
+             ? llvm::RoundUpToAlignment(SizeSoFar, llvm::alignOf<NextTy>())
+             : SizeSoFar) +
+            sizeof(NextTy) * Count1,
+        MoreCounts...);
+  }
+};
+
+// The base case of the TrailingObjectsImpl inheritance recursion,
+// when there's no more trailing types.
+template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy>
+struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
+    : public TrailingObjectsAligner<Align> {
+  // This is a dummy method, only here so the "using" doesn't fail --
+  // it will never be called, because this function recurses backwards
+  // up the inheritance chain to subclasses.
+  static void getTrailingObjectsImpl();
+
+  static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(size_t SizeSoFar) {
+    return SizeSoFar;
+  }
+
+  template <bool CheckAlignment> static void verifyTrailingObjectsAlignment() {}
+};
+
+} // end namespace trailing_objects_internal
+
+// Finally, the main type defined in this file, the one intended for users...
+
+/// See the file comment for details on the usage of the
+/// TrailingObjects type.
+template <typename BaseTy, typename... TrailingTys>
+class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
+                            trailing_objects_internal::AlignmentCalcHelper<
+                                TrailingTys...>::Alignment,
+                            BaseTy, TrailingObjects<BaseTy, TrailingTys...>,
+                            BaseTy, TrailingTys...> {
+
+  template <int A, typename B, typename T, typename P, typename... M>
+  friend struct trailing_objects_internal::TrailingObjectsImpl;
+
+  template <typename... Tys> class Foo {};
+
+  typedef trailing_objects_internal::TrailingObjectsImpl<
+      trailing_objects_internal::AlignmentCalcHelper<TrailingTys...>::Alignment,
+      BaseTy, TrailingObjects<BaseTy, TrailingTys...>, BaseTy, TrailingTys...>
+      ParentType;
+  using TrailingObjectsBase = trailing_objects_internal::TrailingObjectsBase;
+
+  using ParentType::getTrailingObjectsImpl;
+
+  // This function contains only a static_assert BaseTy is final. The
+  // static_assert must be in a function, and not at class-level
+  // because BaseTy isn't complete at class instantiation time, but
+  // will be by the time this function is instantiated.
+  static void verifyTrailingObjectsAssertions() {
+#ifdef LLVM_IS_FINAL
+    static_assert(LLVM_IS_FINAL(BaseTy), "BaseTy must be final.");
+#endif
+  }
+
+  // These two methods are the base of the recursion for this method.
+  static const BaseTy *
+  getTrailingObjectsImpl(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return Obj;
+  }
+
+  static BaseTy *
+  getTrailingObjectsImpl(BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return Obj;
+  }
+
+  // callNumTrailingObjects simply calls numTrailingObjects on the
+  // provided Obj -- except when the type being queried is BaseTy
+  // itself. There is always only one of the base object, so that case
+  // is handled here. (An additional benefit of indirecting through
+  // this function is that consumers only say "friend
+  // TrailingObjects", and thus, only this class itself can call the
+  // numTrailingObjects function.)
+  static size_t
+  callNumTrailingObjects(const BaseTy *Obj,
+                         TrailingObjectsBase::OverloadToken<BaseTy>) {
+    return 1;
+  }
+
+  template <typename T>
+  static size_t callNumTrailingObjects(const BaseTy *Obj,
+                                       TrailingObjectsBase::OverloadToken<T>) {
+    return Obj->numTrailingObjects(TrailingObjectsBase::OverloadToken<T>());
+  }
+
+public:
+  // make this (privately inherited) class public.
+  using ParentType::OverloadToken;
+
+  /// Returns a pointer to the trailing object array of the given type
+  /// (which must be one of those specified in the class template). The
+  /// array may have zero or more elements in it.
+  template <typename T> const T *getTrailingObjects() const {
+    verifyTrailingObjectsAssertions();
+    // Forwards to an impl function with overloads, since member
+    // function templates can't be specialized.
+    return this->getTrailingObjectsImpl(
+        static_cast<const BaseTy *>(this),
+        TrailingObjectsBase::OverloadToken<T>());
+  }
+
+  /// Returns a pointer to the trailing object array of the given type
+  /// (which must be one of those specified in the class template). The
+  /// array may have zero or more elements in it.
+  template <typename T> T *getTrailingObjects() {
+    verifyTrailingObjectsAssertions();
+    // Forwards to an impl function with overloads, since member
+    // function templates can't be specialized.
+    return this->getTrailingObjectsImpl(
+        static_cast<BaseTy *>(this), TrailingObjectsBase::OverloadToken<T>());
+  }
+
+  /// Returns the size of the trailing data, if an object were
+  /// allocated with the given counts (The counts are in the same order
+  /// as the template arguments). This does not include the size of the
+  /// base object.  The template arguments must be the same as those
+  /// used in the class; they are supplied here redundantly only so
+  /// that it's clear what the counts are counting in callers.
+  template <typename... Tys>
+  static LLVM_CONSTEXPR typename std::enable_if<
+      std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+      additionalSizeToAlloc(
+          typename trailing_objects_internal::ExtractSecondType<
+              TrailingTys, size_t>::type... Counts) {
+    return ParentType::additionalSizeToAllocImpl(0, Counts...);
+  }
+
+  /// Returns the total size of an object if it were allocated with the
+  /// given trailing object counts. This is the same as
+  /// additionalSizeToAlloc, except it *does* include the size of the base
+  /// object.
+  template <typename... Tys>
+  static LLVM_CONSTEXPR typename std::enable_if<
+      std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
+      totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+                       TrailingTys, size_t>::type... Counts) {
+    return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Support/UnicodeCharRanges.h b/include/llvm/Support/UnicodeCharRanges.h
index 9f738dff1107..134698c3ec6b 100644
--- a/include/llvm/Support/UnicodeCharRanges.h
+++ b/include/llvm/Support/UnicodeCharRanges.h
@@ -51,6 +51,11 @@ public:
   /// the constructor, so it makes sense to create as few UnicodeCharSet
   /// instances per each array of ranges, as possible.
 #ifdef NDEBUG
+
+  // FIXME: This could use constexpr + static_assert. This way we
+  // may get rid of NDEBUG in this header. Unfortunately there are some
+  // problems to get this working with MSVC 2013. Change this when
+  // the support for MSVC 2013 is dropped.
   LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
 #else
   UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
diff --git a/include/llvm/Support/Valgrind.h b/include/llvm/Support/Valgrind.h
index cebf75c49c19..12b0dc961daa 100644
--- a/include/llvm/Support/Valgrind.h
+++ b/include/llvm/Support/Valgrind.h
@@ -20,17 +20,6 @@
 #include "llvm/Support/Compiler.h"
 #include <stddef.h>
 
-#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
-// tsan (Thread Sanitizer) is a valgrind-based tool that detects these exact
-// functions by name.
-extern "C" {
-void AnnotateHappensAfter(const char *file, int line, const volatile void *cv);
-void AnnotateHappensBefore(const char *file, int line, const volatile void *cv);
-void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line);
-}
-#endif
-
 namespace llvm {
 namespace sys {
   // True if Valgrind is controlling this process.
@@ -39,34 +28,6 @@ namespace sys {
   // Discard valgrind's translation of code in the range [Addr .. Addr + Len).
   // Otherwise valgrind may continue to execute the old version of the code.
   void ValgrindDiscardTranslations(const void *Addr, size_t Len);
-
-#if LLVM_ENABLE_THREADS != 0 && !defined(NDEBUG)
-  // Thread Sanitizer is a valgrind tool that finds races in code.
-  // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
-
-  // This marker is used to define a happens-before arc. The race detector will
-  // infer an arc from the begin to the end when they share the same pointer
-  // argument.
-  #define TsanHappensBefore(cv) \
-    AnnotateHappensBefore(__FILE__, __LINE__, cv)
-
-  // This marker defines the destination of a happens-before arc.
-  #define TsanHappensAfter(cv) \
-    AnnotateHappensAfter(__FILE__, __LINE__, cv)
-
-  // Ignore any races on writes between here and the next TsanIgnoreWritesEnd.
-  #define TsanIgnoreWritesBegin() \
-    AnnotateIgnoreWritesBegin(__FILE__, __LINE__)
-
-  // Resume checking for racy writes.
-  #define TsanIgnoreWritesEnd() \
-    AnnotateIgnoreWritesEnd(__FILE__, __LINE__)
-#else
-  #define TsanHappensBefore(cv)
-  #define TsanHappensAfter(cv)
-  #define TsanIgnoreWritesBegin()
-  #define TsanIgnoreWritesEnd()
-#endif
 }
 }
 
diff --git a/include/llvm/Support/YAMLParser.h b/include/llvm/Support/YAMLParser.h
index 0fbb7d2e6c7e..b056ab6c1ce2 100644
--- a/include/llvm/Support/YAMLParser.h
+++ b/include/llvm/Support/YAMLParser.h
@@ -145,11 +145,12 @@ public:
   unsigned int getType() const { return TypeID; }
 
   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
-                     size_t Alignment = 16) throw() {
+                     size_t Alignment = 16) LLVM_NOEXCEPT {
     return Alloc.Allocate(Size, Alignment);
   }
 
-  void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t Size) throw() {
+  void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
+                       size_t Size) LLVM_NOEXCEPT {
     Alloc.Deallocate(Ptr, Size);
   }
 
@@ -157,7 +158,7 @@ protected:
   std::unique_ptr<Document> &Doc;
   SMRange SourceRange;
 
-  void operator delete(void *) throw() {}
+  void operator delete(void *) LLVM_NOEXCEPT = delete;
 
   ~Node() = default;
 
diff --git a/include/llvm/Support/YAMLTraits.h b/include/llvm/Support/YAMLTraits.h
index c04294a5e87a..fb2badfd93ba 100644
--- a/include/llvm/Support/YAMLTraits.h
+++ b/include/llvm/Support/YAMLTraits.h
@@ -10,7 +10,6 @@
 #ifndef LLVM_SUPPORT_YAMLTRAITS_H
 #define LLVM_SUPPORT_YAMLTRAITS_H
 
-
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Optional.h"
@@ -29,7 +28,6 @@
 namespace llvm {
 namespace yaml {
 
-
 /// This class should be specialized by any type that needs to be converted
 /// to/from a YAML mapping.  For example:
 ///
@@ -52,7 +50,6 @@ struct MappingTraits {
   // static const bool flow = true;
 };
 
-
 /// This class should be specialized by any integral type that converts
 /// to/from a YAML scalar where there is a one-to-one mapping between
 /// in-memory values and a string in YAML.  For example:
@@ -70,7 +67,6 @@ struct ScalarEnumerationTraits {
   // static void enumeration(IO &io, T &value);
 };
 
-
 /// This class should be specialized by any integer type that is a union
 /// of bit values and the YAML representation is a flow sequence of
 /// strings.  For example:
@@ -88,7 +84,6 @@ struct ScalarBitSetTraits {
   // static void bitset(IO &io, T &value);
 };
 
-
 /// This class should be specialized by type that requires custom conversion
 /// to/from a yaml scalar.  For example:
 ///
@@ -149,7 +144,6 @@ struct BlockScalarTraits {
   // static StringRef input(StringRef Scalar, void *ctxt, T &Value);
 };
 
-
 /// This class should be specialized by any type that needs to be converted
 /// to/from a YAML sequence.  For example:
 ///
@@ -175,7 +169,6 @@ struct SequenceTraits {
   // static const bool flow = true;
 };
 
-
 /// This class should be specialized by any type that needs to be converted
 /// to/from a list of YAML documents.
 template<typename T>
@@ -185,7 +178,6 @@ struct DocumentListTraits {
   // static T::value_type& element(IO &io, T &seq, size_t index);
 };
 
-
 // Only used by compiler if both template types are the same
 template <typename T, T>
 struct SameType;
@@ -194,8 +186,6 @@ struct SameType;
 template <typename T>
 struct MissingTrait;
 
-
-
 // Test if ScalarEnumerationTraits<T> is defined on type T.
 template <class T>
 struct has_ScalarEnumerationTraits
@@ -213,7 +203,6 @@ public:
     (sizeof(test<ScalarEnumerationTraits<T> >(nullptr)) == 1);
 };
 
-
 // Test if ScalarBitSetTraits<T> is defined on type T.
 template <class T>
 struct has_ScalarBitSetTraits
@@ -230,7 +219,6 @@ public:
   static bool const value = (sizeof(test<ScalarBitSetTraits<T> >(nullptr)) == 1);
 };
 
-
 // Test if ScalarTraits<T> is defined on type T.
 template <class T>
 struct has_ScalarTraits
@@ -252,7 +240,6 @@ public:
       (sizeof(test<ScalarTraits<T>>(nullptr, nullptr, nullptr)) == 1);
 };
 
-
 // Test if BlockScalarTraits<T> is defined on type T.
 template <class T>
 struct has_BlockScalarTraits
@@ -272,7 +259,6 @@ public:
       (sizeof(test<BlockScalarTraits<T>>(nullptr, nullptr)) == 1);
 };
 
-
 // Test if MappingTraits<T> is defined on type T.
 template <class T>
 struct has_MappingTraits
@@ -305,8 +291,6 @@ public:
   static bool const value = (sizeof(test<MappingTraits<T> >(nullptr)) == 1);
 };
 
-
-
 // Test if SequenceTraits<T> is defined on type T.
 template <class T>
 struct has_SequenceMethodTraits
@@ -323,7 +307,6 @@ public:
   static bool const value =  (sizeof(test<SequenceTraits<T> >(nullptr)) == 1);
 };
 
-
 // has_FlowTraits<int> will cause an error with some compilers because
 // it subclasses int.  Using this wrapper only instantiates the
 // real has_FlowTraits only if the template type is a class.
@@ -353,14 +336,11 @@ public:
   static bool const value = sizeof(f<Derived>(nullptr)) == 2;
 };
 
-
-
 // Test if SequenceTraits<T> is defined on type T
 template<typename T>
 struct has_SequenceTraits : public std::integral_constant<bool,
                                       has_SequenceMethodTraits<T>::value > { };
 
-
 // Test if DocumentListTraits<T> is defined on type T
 template <class T>
 struct has_DocumentListTraits
@@ -453,7 +433,6 @@ inline bool needsQuotes(StringRef S) {
   return false;
 }
 
-
 template<typename T>
 struct missingTraits : public std::integral_constant<bool,
                                          !has_ScalarEnumerationTraits<T>::value
@@ -654,8 +633,6 @@ private:
   void  *Ctxt;
 };
 
-
-
 template<typename T>
 typename std::enable_if<has_ScalarEnumerationTraits<T>::value,void>::type
 yamlize(IO &io, T &Val, bool) {
@@ -676,7 +653,6 @@ yamlize(IO &io, T &Val, bool) {
   }
 }
 
-
 template<typename T>
 typename std::enable_if<has_ScalarTraits<T>::value,void>::type
 yamlize(IO &io, T &Val, bool) {
@@ -791,7 +767,6 @@ yamlize(IO &io, T &Seq, bool) {
   }
 }
 
-
 template<>
 struct ScalarTraits<bool> {
   static void output(const bool &, void*, llvm::raw_ostream &);
@@ -883,8 +858,6 @@ struct ScalarTraits<double> {
   static bool mustQuote(StringRef) { return false; }
 };
 
-
-
 // Utility for use within MappingTraits<>::mapping() method
 // to [de]normalize an object for use with YAML conversion.
 template <typename TNorm, typename TFinal>
@@ -917,14 +890,12 @@ private:
   TFinal       &Result;
 };
 
-
-
 // Utility for use within MappingTraits<>::mapping() method
 // to [de]normalize an object for use with YAML conversion.
 template <typename TNorm, typename TFinal>
 struct MappingNormalizationHeap {
   MappingNormalizationHeap(IO &i_o, TFinal &Obj)
-    : io(i_o), BufPtr(NULL), Result(Obj) {
+    : io(i_o), BufPtr(nullptr), Result(Obj) {
     if ( io.outputting() ) {
       BufPtr = new (&Buffer) TNorm(io, Obj);
     }
@@ -953,8 +924,6 @@ private:
   TFinal       &Result;
 };
 
-
-
 ///
 /// The Input class is used to parse a yaml document into in-memory structs
 /// and vectors.
@@ -1083,7 +1052,6 @@ private:
   void setError(HNode *hnode, const Twine &message);
   void setError(Node *node, const Twine &message);
 
-
 public:
   // These are only used by operator>>. They could be private
   // if those templated things could be made friends.
@@ -1105,9 +1073,6 @@ private:
   bool                                ScalarMatchFound;
 };
 
-
-
-
 ///
 /// The Output class is used to generate a yaml document from in-memory structs
 /// and vectors.
@@ -1181,9 +1146,6 @@ private:
   bool                     NeedsNewLine;
 };
 
-
-
-
 /// YAML I/O does conversion based on types. But often native data types
 /// are just a typedef of built in intergral types (e.g. int).  But the C++
 /// type matching system sees through the typedef and all the typedefed types
@@ -1206,8 +1168,6 @@ private:
         _base value;                                                           \
     };
 
-
-
 ///
 /// Use these types instead of uintXX_t in any mapping to have
 /// its yaml output formatted as hexadecimal.
@@ -1217,7 +1177,6 @@ LLVM_YAML_STRONG_TYPEDEF(uint16_t, Hex16)
 LLVM_YAML_STRONG_TYPEDEF(uint32_t, Hex32)
 LLVM_YAML_STRONG_TYPEDEF(uint64_t, Hex64)
 
-
 template<>
 struct ScalarTraits<Hex8> {
   static void output(const Hex8 &, void*, llvm::raw_ostream &);
@@ -1246,7 +1205,6 @@ struct ScalarTraits<Hex64> {
   static bool mustQuote(StringRef) { return false; }
 };
 
-
 // Define non-member operator>> so that Input can stream in a document list.
 template <typename T>
 inline
@@ -1303,7 +1261,6 @@ operator>>(Input &yin, T &docSeq) {
   return yin;
 }
 
-
 // Define non-member operator<< so that Output can stream out document list.
 template <typename T>
 inline
@@ -1372,11 +1329,9 @@ operator<<(Output &yout, T &seq) {
   return yout;
 }
 
-
 } // namespace yaml
 } // namespace llvm
 
-
 /// Utility for declaring that a std::vector of a particular type
 /// should be considered a YAML sequence.
 #define LLVM_YAML_IS_SEQUENCE_VECTOR(_type)                                 \
@@ -1436,6 +1391,4 @@ operator<<(Output &yout, T &seq) {
   }                                                                         \
   }
 
-
-
 #endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/include/llvm/Support/circular_raw_ostream.h b/include/llvm/Support/circular_raw_ostream.h
index 19f9c2c4b155..b46fd7f730c9 100644
--- a/include/llvm/Support/circular_raw_ostream.h
+++ b/include/llvm/Support/circular_raw_ostream.h
@@ -17,8 +17,7 @@
 
 #include "llvm/Support/raw_ostream.h"
 
-namespace llvm 
-{
+namespace llvm {
   /// circular_raw_ostream - A raw_ostream which *can* save its data
   /// to a circular buffer, or can pass it through directly to an
   /// underlying stream if specified with a buffer of zero.
@@ -154,5 +153,4 @@ namespace llvm
   };
 } // end llvm namespace
 
-
 #endif
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index 28e512c86941..d1e96f892a4b 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -218,14 +218,13 @@ public:
 
   // Formatted output, see the leftJustify() function in Support/Format.h.
   raw_ostream &operator<<(const FormattedString &);
-  
+
   // Formatted output, see the formatHex() function in Support/Format.h.
   raw_ostream &operator<<(const FormattedNumber &);
-  
+
   /// indent - Insert 'NumSpaces' spaces.
   raw_ostream &indent(unsigned NumSpaces);
 
-
   /// Changes the foreground color of text that will be output from this point
   /// forward.
   /// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
@@ -246,7 +245,7 @@ public:
   /// outputting colored text, or before program exit.
   virtual raw_ostream &resetColor() { return *this; }
 
-  /// Reverses the forground and background colors.
+  /// Reverses the foreground and background colors.
   virtual raw_ostream &reverseColor() { return *this; }
 
   /// This function determines if this stream is connected to a "tty" or
@@ -316,7 +315,7 @@ private:
 };
 
 /// An abstract base class for streams implementations that also support a
-/// pwrite operation. This is usefull for code that can mostly stream out data,
+/// pwrite operation. This is useful for code that can mostly stream out data,
 /// but needs to patch in a header that needs to know the output size.
 class raw_pwrite_stream : public raw_ostream {
   virtual void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) = 0;
@@ -350,10 +349,6 @@ class raw_fd_ostream : public raw_pwrite_stream {
   ///
   bool Error;
 
-  /// Controls whether the stream should attempt to use atomic writes, when
-  /// possible.
-  bool UseAtomicWrites;
-
   uint64_t pos;
 
   bool SupportsSeeking;
@@ -403,16 +398,6 @@ public:
   /// to the offset specified from the beginning of the file.
   uint64_t seek(uint64_t off);
 
-  /// Set the stream to attempt to use atomic writes for individual output
-  /// routines where possible.
-  ///
-  /// Note that because raw_ostream's are typically buffered, this flag is only
-  /// sensible when used on unbuffered streams which will flush their output
-  /// immediately.
-  void SetUseAtomicWrites(bool Value) {
-    UseAtomicWrites = Value;
-  }
-
   raw_ostream &changeColor(enum Colors colors, bool bold=false,
                            bool bg=false) override;
   raw_ostream &resetColor() override;
@@ -471,6 +456,7 @@ class raw_string_ostream : public raw_ostream {
   /// Return the current position within the stream, not counting the bytes
   /// currently in the buffer.
   uint64_t current_pos() const override { return OS.size(); }
+
 public:
   explicit raw_string_ostream(std::string &O) : OS(O) {}
   ~raw_string_ostream() override;
@@ -485,6 +471,9 @@ public:
 
 /// A raw_ostream that writes to an SmallVector or SmallString.  This is a
 /// simple adaptor class. This class does not encounter output errors.
+/// raw_svector_ostream operates without a buffer, delegating all memory
+/// management to the SmallString. Thus the SmallString is always up-to-date,
+/// may be used directly and there is no need to call flush().
 class raw_svector_ostream : public raw_pwrite_stream {
   SmallVectorImpl<char> &OS;
 
@@ -493,32 +482,23 @@ class raw_svector_ostream : public raw_pwrite_stream {
 
   void pwrite_impl(const char *Ptr, size_t Size, uint64_t Offset) override;
 
-  /// Return the current position within the stream, not counting the bytes
-  /// currently in the buffer.
+  /// Return the current position within the stream.
   uint64_t current_pos() const override;
 
-protected:
-  // Like the regular constructor, but doesn't call init.
-  explicit raw_svector_ostream(SmallVectorImpl<char> &O, unsigned);
-  void init();
-
 public:
   /// Construct a new raw_svector_ostream.
   ///
   /// \param O The vector to write to; this should generally have at least 128
   /// bytes free to avoid any extraneous memory overhead.
-  explicit raw_svector_ostream(SmallVectorImpl<char> &O);
-  ~raw_svector_ostream() override;
+  explicit raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+    SetUnbuffered();
+  }
+  ~raw_svector_ostream() override {}
 
+  void flush() = delete;
 
-  /// This is called when the SmallVector we're appending to is changed outside
-  /// of the raw_svector_ostream's control.  It is only safe to do this if the
-  /// raw_svector_ostream has previously been flushed.
-  void resync();
-
-  /// Flushes the stream contents to the target vector and return a StringRef
-  /// for the vector contents.
-  StringRef str();
+  /// Return a StringRef for the vector contents.
+  StringRef str() { return StringRef(OS.data(), OS.size()); }
 };
 
 /// A raw_ostream that discards all output.
@@ -541,12 +521,10 @@ class buffer_ostream : public raw_svector_ostream {
   SmallVector<char, 0> Buffer;
 
 public:
-  buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer, 0), OS(OS) {
-    init();
-  }
-  ~buffer_ostream() { OS << str(); }
+  buffer_ostream(raw_ostream &OS) : raw_svector_ostream(Buffer), OS(OS) {}
+  ~buffer_ostream() override { OS << str(); }
 };
 
 } // end llvm namespace
 
-#endif
+#endif // LLVM_SUPPORT_RAW_OSTREAM_H
diff --git a/include/llvm/Support/thread.h b/include/llvm/Support/thread.h
new file mode 100644
index 000000000000..2d130418a57f
--- /dev/null
+++ b/include/llvm/Support/thread.h
@@ -0,0 +1,66 @@
+//===-- llvm/Support/thread.h - Wrapper for <thread> ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header is a wrapper for <thread> that works around problems with the
+// MSVC headers when exceptions are disabled. It also provides llvm::thread,
+// which is either a typedef of std::thread or a replacement that calls the
+// function synchronously depending on the value of LLVM_ENABLE_THREADS.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_THREAD_H
+#define LLVM_SUPPORT_THREAD_H
+
+#include "llvm/Config/llvm-config.h"
+
+#if LLVM_ENABLE_THREADS
+
+#ifdef _MSC_VER
+// concrt.h depends on eh.h for __uncaught_exception declaration
+// even if we disable exceptions.
+#include <eh.h>
+
+// Suppress 'C++ exception handler used, but unwind semantics are not enabled.'
+#pragma warning(push)
+#pragma warning(disable:4530)
+#endif
+
+#include <thread>
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+namespace llvm {
+typedef std::thread thread;
+}
+
+#else // !LLVM_ENABLE_THREADS
+
+#include <utility>
+
+namespace llvm {
+
+struct thread {
+  thread() {}
+  thread(thread &&other) {}
+  template <class Function, class... Args>
+  explicit thread(Function &&f, Args &&... args) {
+    f(std::forward<Args>(args)...);
+  }
+  thread(const thread &) = delete;
+
+  void join() {}
+};
+
+}
+
+#endif // LLVM_ENABLE_THREADS
+
+#endif
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index 45465aea004b..88385c3fae1e 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -93,6 +93,15 @@ struct add_const_past_pointer<
 
 }
 
+// If the compiler supports detecting whether a class is final, define
+// an LLVM_IS_FINAL macro. If it cannot be defined properly, this
+// macro will be left undefined.
+#if __cplusplus >= 201402L
+#define LLVM_IS_FINAL(Ty) std::is_final<Ty>()
+#elif __has_feature(is_final) || LLVM_GNUC_PREREQ(4, 7, 0)
+#define LLVM_IS_FINAL(Ty) __is_final(Ty)
+#endif
+
 #ifdef LLVM_DEFINED_HAS_FEATURE
 #undef __has_feature
 #endif
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index b4642c991192..eb1c5c78b9c0 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -366,7 +366,7 @@ class TypedInit : public Init {
 
 protected:
   explicit TypedInit(InitKind K, RecTy *T) : Init(K), Ty(T) {}
-  ~TypedInit() {
+  ~TypedInit() override {
     // If this is a DefInit we need to delete the RecordRecTy.
     if (getKind() == IK_DefInit)
       delete Ty;
@@ -547,7 +547,7 @@ public:
 class StringInit : public TypedInit {
   std::string Value;
 
-  explicit StringInit(const std::string &V)
+  explicit StringInit(StringRef V)
     : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {}
 
   StringInit(const StringInit &Other) = delete;
@@ -836,8 +836,6 @@ public:
 class VarInit : public TypedInit {
   Init *VarName;
 
-  explicit VarInit(const std::string &VN, RecTy *T)
-      : TypedInit(IK_VarInit, T), VarName(StringInit::get(VN)) {}
   explicit VarInit(Init *VN, RecTy *T)
       : TypedInit(IK_VarInit, T), VarName(VN) {}
 
@@ -1589,6 +1587,6 @@ Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
 Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
                   const std::string &Name, const std::string &Scoper);
 
-} // End llvm namespace
+} // end llvm namespace
 
-#endif
+#endif // LLVM_TABLEGEN_RECORD_H
diff --git a/include/llvm/Target/CostTable.h b/include/llvm/Target/CostTable.h
index 34f6041137c1..2499f5c3189c 100644
--- a/include/llvm/Target/CostTable.h
+++ b/include/llvm/Target/CostTable.h
@@ -15,64 +15,54 @@
 #ifndef LLVM_TARGET_COSTTABLE_H_
 #define LLVM_TARGET_COSTTABLE_H_
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/MachineValueType.h"
+
 namespace llvm {
 
 /// Cost Table Entry
-template <class TypeTy>
 struct CostTblEntry {
   int ISD;
-  TypeTy Type;
+  MVT::SimpleValueType Type;
   unsigned Cost;
 };
 
 /// Find in cost table, TypeTy must be comparable to CompareTy by ==
-template <class TypeTy, class CompareTy>
-int CostTableLookup(const CostTblEntry<TypeTy> *Tbl, unsigned len, int ISD,
-                    CompareTy Ty) {
-  for (unsigned int i = 0; i < len; ++i)
-    if (ISD == Tbl[i].ISD && Ty == Tbl[i].Type)
-      return i;
+inline const CostTblEntry *CostTableLookup(ArrayRef<CostTblEntry> Tbl,
+                                           int ISD, MVT Ty) {
+  auto I = std::find_if(Tbl.begin(), Tbl.end(),
+                        [=](const CostTblEntry &Entry) {
+                          return ISD == Entry.ISD && Ty == Entry.Type; });
+  if (I != Tbl.end())
+    return I;
 
   // Could not find an entry.
-  return -1;
-}
-
-/// Find in cost table, TypeTy must be comparable to CompareTy by ==
-template <class TypeTy, class CompareTy, unsigned N>
-int CostTableLookup(const CostTblEntry<TypeTy>(&Tbl)[N], int ISD,
-                    CompareTy Ty) {
-  return CostTableLookup(Tbl, N, ISD, Ty);
+  return nullptr;
 }
 
 /// Type Conversion Cost Table
-template <class TypeTy>
 struct TypeConversionCostTblEntry {
   int ISD;
-  TypeTy Dst;
-  TypeTy Src;
+  MVT::SimpleValueType Dst;
+  MVT::SimpleValueType Src;
   unsigned Cost;
 };
 
 /// Find in type conversion cost table, TypeTy must be comparable to CompareTy
 /// by ==
-template <class TypeTy, class CompareTy>
-int ConvertCostTableLookup(const TypeConversionCostTblEntry<TypeTy> *Tbl,
-                           unsigned len, int ISD, CompareTy Dst,
-                           CompareTy Src) {
-  for (unsigned int i = 0; i < len; ++i)
-    if (ISD == Tbl[i].ISD && Src == Tbl[i].Src && Dst == Tbl[i].Dst)
-      return i;
+inline const TypeConversionCostTblEntry *
+ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntry> Tbl,
+                       int ISD, MVT Dst, MVT Src) {
+  auto I = std::find_if(Tbl.begin(), Tbl.end(),
+                        [=](const TypeConversionCostTblEntry &Entry) {
+                          return ISD == Entry.ISD && Src == Entry.Src &&
+                                 Dst == Entry.Dst;
+                        });
+  if (I != Tbl.end())
+    return I;
 
   // Could not find an entry.
-  return -1;
-}
-
-/// Find in type conversion cost table, TypeTy must be comparable to CompareTy
-/// by ==
-template <class TypeTy, class CompareTy, unsigned N>
-int ConvertCostTableLookup(const TypeConversionCostTblEntry<TypeTy>(&Tbl)[N],
-                           int ISD, CompareTy Dst, CompareTy Src) {
-  return ConvertCostTableLookup(Tbl, N, ISD, Dst, Src);
+  return nullptr;
 }
 
 } // namespace llvm
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index e0aea181a639..79046b2b7352 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -441,6 +441,30 @@ class Instruction {
   string PostEncoderMethod = "";
   string DecoderMethod = "";
 
+  // Is the instruction decoder method able to completely determine if the
+  // given instruction is valid or not. If the TableGen definition of the
+  // instruction specifies bitpattern A??B where A and B are static bits, the
+  // hasCompleteDecoder flag says whether the decoder method fully handles the
+  // ?? space, i.e. if it is a final arbiter for the instruction validity.
+  // If not then the decoder attempts to continue decoding when the decoder
+  // method fails.
+  //
+  // This allows to handle situations where the encoding is not fully
+  // orthogonal. Example:
+  // * InstA with bitpattern 0b0000????,
+  // * InstB with bitpattern 0b000000?? but the associated decoder method
+  //   DecodeInstB() returns Fail when ?? is 0b00 or 0b11.
+  //
+  // The decoder tries to decode a bitpattern that matches both InstA and
+  // InstB bitpatterns first as InstB (because it is the most specific
+  // encoding). In the default case (hasCompleteDecoder = 1), when
+  // DecodeInstB() returns Fail the bitpattern gets rejected. By setting
+  // hasCompleteDecoder = 0 in InstB, the decoder is informed that
+  // DecodeInstB() is not able to determine if all possible values of ?? are
+  // valid or not. If DecodeInstB() returns Fail the decoder will attempt to
+  // decode the bitpattern as InstA too.
+  bit hasCompleteDecoder = 1;
+
   /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
   bits<64> TSFlags = 0;
 
@@ -595,6 +619,8 @@ class Operand<ValueType ty> : DAGOperand {
   string PrintMethod = "printOperand";
   string EncoderMethod = "";
   string DecoderMethod = "";
+  bit hasCompleteDecoder = 1;
+  string OperandNamespace = "MCOI";
   string OperandType = "OPERAND_UNKNOWN";
   dag MIOperandInfo = (ops);
 
@@ -910,9 +936,6 @@ class AsmParser {
   // ShouldEmitMatchRegisterName - Set to false if the target needs a hand
   // written register name matcher
   bit ShouldEmitMatchRegisterName = 1;
-
-  /// Does the instruction mnemonic allow '.'
-  bit MnemonicContainsDot = 0;
 }
 def DefaultAsmParser : AsmParser;
 
@@ -940,6 +963,15 @@ class AsmParserVariant {
   // register tokens as constrained registers, instead of tokens, for the
   // purposes of matching.
   string RegisterPrefix = "";
+
+  // TokenizingCharacters - Characters that are standalone tokens
+  string TokenizingCharacters = "[]*!";
+
+  // SeparatorCharacters - Characters that are not tokens
+  string SeparatorCharacters = " \t,";
+
+  // BreakCharacters - Characters that start new identifiers
+  string BreakCharacters = "";
 }
 def DefaultAsmParserVariant : AsmParserVariant;
 
diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h
index 9d4e7a04d905..0c6c1f1468c4 100644
--- a/include/llvm/Target/TargetCallingConv.h
+++ b/include/llvm/Target/TargetCallingConv.h
@@ -46,6 +46,8 @@ namespace ISD {
     static const uint64_t SplitOffs      = 11;
     static const uint64_t InAlloca       = 1ULL<<12; ///< Passed with inalloca
     static const uint64_t InAllocaOffs   = 12;
+    static const uint64_t SplitEnd       = 1ULL<<13; ///< Last part of a split
+    static const uint64_t SplitEndOffs   = 13;
     static const uint64_t OrigAlign      = 0x1FULL<<27;
     static const uint64_t OrigAlignOffs  = 27;
     static const uint64_t ByValSize      = 0x3fffffffULL<<32; ///< Struct size
@@ -103,6 +105,9 @@ namespace ISD {
     bool isSplit()   const { return Flags & Split; }
     void setSplit()  { Flags |= One << SplitOffs; }
 
+    bool isSplitEnd()   const { return Flags & SplitEnd; }
+    void setSplitEnd()  { Flags |= One << SplitEndOffs; }
+
     unsigned getOrigAlign() const {
       return (unsigned)
         ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index 3af2227410f7..cadd07d71f12 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -70,6 +70,18 @@ public:
   ///
   unsigned getStackAlignment() const { return StackAlignment; }
 
+  /// alignSPAdjust - This method aligns the stack adjustment to the correct
+  /// alignment.
+  ///
+  int alignSPAdjust(int SPAdj) const {
+    if (SPAdj < 0) {
+      SPAdj = -RoundUpToAlignment(-SPAdj, StackAlignment);
+    } else {
+      SPAdj = RoundUpToAlignment(SPAdj, StackAlignment);
+    }
+    return SPAdj;
+  }
+
   /// getTransientStackAlignment - This method returns the number of bytes to
   /// which the stack pointer must be aligned at all times, even between
   /// calls.
@@ -84,6 +96,11 @@ public:
     return StackRealignable;
   }
 
+  /// Return the skew that has to be applied to stack alignment under
+  /// certain conditions (e.g. stack was adjusted before function \p MF
+  /// was called).
+  virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
+
   /// getOffsetOfLocalArea - This method returns the offset of the local area
   /// from the stack pointer on entrance to a function.
   ///
@@ -129,6 +146,11 @@ public:
     return false;
   }
 
+  /// Returns true if the target will correctly handle shrink wrapping.
+  virtual bool enableShrinkWrapping(const MachineFunction &MF) const {
+    return false;
+  }
+
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
   virtual void emitPrologue(MachineFunction &MF,
@@ -136,6 +158,10 @@ public:
   virtual void emitEpilogue(MachineFunction &MF,
                             MachineBasicBlock &MBB) const = 0;
 
+  /// Replace a StackProbe stub (if any) with the actual probe code inline
+  virtual void inlineStackProbe(MachineFunction &MF,
+                                MachineBasicBlock &PrologueMBB) const {}
+
   /// Adjust the prologue to have the function use segmented stacks. This works
   /// by adding a check even before the "normal" function prologue.
   virtual void adjustForSegmentedStacks(MachineFunction &MF,
@@ -207,10 +233,6 @@ public:
   // has any stack objects. However, targets may want to override this.
   virtual bool needsFrameIndexResolution(const MachineFunction &MF) const;
 
-  /// getFrameIndexOffset - Returns the displacement from the frame register to
-  /// the stack frame of the specified index.
-  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
-
   /// getFrameIndexReference - This method should return the base register
   /// and offset used to reference a frame index location. The offset is
   /// returned directly, and the base register is returned via FrameReg.
@@ -218,10 +240,11 @@ public:
                                      unsigned &FrameReg) const;
 
   /// Same as above, except that the 'base register' will always be RSP, not
-  /// RBP on x86.  This is used exclusively for lowering STATEPOINT nodes.
+  /// RBP on x86. This is generally used for emitting statepoint or EH tables
+  /// that use offsets from RSP.
   /// TODO: This should really be a parameterizable choice.
   virtual int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
-                                          unsigned &FrameReg) const {
+                                           unsigned &FrameReg) const {
     // default to calling normal version, we override this on x86 only
     llvm_unreachable("unimplemented for non-x86");
     return 0;
@@ -246,6 +269,10 @@ public:
                                              RegScavenger *RS = nullptr) const {
   }
 
+  virtual unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const {
+    report_fatal_error("WinEH not implemented for this target");
+  }
+
   /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
   /// code insertion to eliminate call frame setup and destroy pseudo
   /// instructions (but only if the Target is using them).  It is responsible
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 8b314f454b18..0cebcf1c6b5d 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
@@ -38,7 +39,6 @@ class SelectionDAG;
 class ScheduleDAG;
 class TargetRegisterClass;
 class TargetRegisterInfo;
-class BranchProbability;
 class TargetSubtargetInfo;
 class TargetSchedModel;
 class DFAPacketizer;
@@ -54,13 +54,18 @@ class TargetInstrInfo : public MCInstrInfo {
   TargetInstrInfo(const TargetInstrInfo &) = delete;
   void operator=(const TargetInstrInfo &) = delete;
 public:
-  TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u)
-    : CallFrameSetupOpcode(CFSetupOpcode),
-      CallFrameDestroyOpcode(CFDestroyOpcode) {
-  }
+  TargetInstrInfo(unsigned CFSetupOpcode = ~0u, unsigned CFDestroyOpcode = ~0u,
+                  unsigned CatchRetOpcode = ~0u)
+      : CallFrameSetupOpcode(CFSetupOpcode),
+        CallFrameDestroyOpcode(CFDestroyOpcode),
+        CatchRetOpcode(CatchRetOpcode) {}
 
   virtual ~TargetInstrInfo();
 
+  static bool isGenericOpcode(unsigned Opc) {
+    return Opc <= TargetOpcode::GENERIC_OP_END;
+  }
+
   /// Given a machine instruction descriptor, returns the register
   /// class constraint for OpNum, or NULL.
   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
@@ -94,6 +99,41 @@ protected:
     return false;
   }
 
+  /// This method commutes the operands of the given machine instruction MI.
+  /// The operands to be commuted are specified by their indices OpIdx1 and
+  /// OpIdx2.
+  ///
+  /// If a target has any instructions that are commutable but require
+  /// converting to different instructions or making non-trivial changes
+  /// to commute them, this method can be overloaded to do that.
+  /// The default implementation simply swaps the commutable operands.
+  ///
+  /// If NewMI is false, MI is modified in place and returned; otherwise, a
+  /// new machine instruction is created and returned.
+  ///
+  /// Do not call this method for a non-commutable instruction.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  virtual MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+                                               bool NewMI,
+                                               unsigned OpIdx1,
+                                               unsigned OpIdx2) const;
+
+  /// Assigns the (CommutableOpIdx1, CommutableOpIdx2) pair of commutable
+  /// operand indices to (ResultIdx1, ResultIdx2).
+  /// One or both input values of the pair: (ResultIdx1, ResultIdx2) may be
+  /// predefined to some indices or be undefined (designated by the special
+  /// value 'CommuteAnyOperandIndex').
+  /// The predefined result indices cannot be re-defined.
+  /// The function returns true iff after the result pair redefinition
+  /// the fixed result pair is equal to or equivalent to the source pair of
+  /// indices: (CommutableOpIdx1, CommutableOpIdx2). It is assumed here that
+  /// the pairs (x,y) and (y,x) are equivalent.
+  static bool fixCommutedOpIndices(unsigned &ResultIdx1,
+                                   unsigned &ResultIdx2,
+                                   unsigned CommutableOpIdx1,
+                                   unsigned CommutableOpIdx2);
+
 private:
   /// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
   /// set and the target hook isReallyTriviallyReMaterializable returns false,
@@ -111,6 +151,8 @@ public:
   unsigned getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
   unsigned getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
 
+  unsigned getCatchReturnOpcode() const { return CatchRetOpcode; }
+
   /// Returns the actual stack pointer adjustment made by an instruction
   /// as part of a call sequence. By default, only call frame setup/destroy
   /// instructions adjust the stack, but targets may want to override this
@@ -250,20 +292,51 @@ public:
     return nullptr;
   }
 
-  /// If a target has any instructions that are commutable but require
-  /// converting to different instructions or making non-trivial changes to
-  /// commute them, this method can overloaded to do that.
-  /// The default implementation simply swaps the commutable operands.
-  /// If NewMI is false, MI is modified in place and returned; otherwise, a
-  /// new machine instruction is created and returned.  Do not call this
-  /// method for a non-commutable instruction, but there may be some cases
-  /// where this method fails and returns null.
-  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
-                                           bool NewMI = false) const;
+  // This constant can be used as an input value of operand index passed to
+  // the method findCommutedOpIndices() to tell the method that the
+  // corresponding operand index is not pre-defined and that the method
+  // can pick any commutable operand.
+  static const unsigned CommuteAnyOperandIndex = ~0U;
 
-  /// If specified MI is commutable, return the two operand indices that would
-  /// swap value. Return false if the instruction
-  /// is not in a form which this routine understands.
+  /// This method commutes the operands of the given machine instruction MI.
+  ///
+  /// The operands to be commuted are specified by their indices OpIdx1 and
+  /// OpIdx2. OpIdx1 and OpIdx2 arguments may be set to a special value
+  /// 'CommuteAnyOperandIndex', which means that the method is free to choose
+  /// any arbitrarily chosen commutable operand. If both arguments are set to
+  /// 'CommuteAnyOperandIndex' then the method looks for 2 different commutable
+  /// operands; then commutes them if such operands could be found.
+  ///
+  /// If NewMI is false, MI is modified in place and returned; otherwise, a
+  /// new machine instruction is created and returned.
+  ///
+  /// Do not call this method for a non-commutable instruction or
+  /// for non-commuable operands.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  MachineInstr *
+  commuteInstruction(MachineInstr *MI,
+                     bool NewMI = false,
+                     unsigned OpIdx1 = CommuteAnyOperandIndex,
+                     unsigned OpIdx2 = CommuteAnyOperandIndex) const;
+
+  /// Returns true iff the routine could find two commutable operands in the
+  /// given machine instruction.
+  /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments.
+  /// If any of the INPUT values is set to the special value
+  /// 'CommuteAnyOperandIndex' then the method arbitrarily picks a commutable
+  /// operand, then returns its index in the corresponding argument.
+  /// If both of INPUT values are set to 'CommuteAnyOperandIndex' then method
+  /// looks for 2 commutable operands.
+  /// If INPUT values refer to some operands of MI, then the method simply
+  /// returns true if the corresponding operands are commutable and returns
+  /// false otherwise.
+  ///
+  /// For example, calling this method this way:
+  ///     unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+  ///     findCommutedOpIndices(MI, Op1, Op2);
+  /// can be interpreted as a query asking to find an operand that would be
+  /// commutable with the operand#1.
   virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
                                      unsigned &SrcOpIdx2) const;
 
@@ -511,7 +584,7 @@ public:
   virtual
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                            unsigned ExtraPredCycles,
-                           const BranchProbability &Probability) const {
+                           BranchProbability Probability) const {
     return false;
   }
 
@@ -526,7 +599,7 @@ public:
                       unsigned NumTCycles, unsigned ExtraTCycles,
                       MachineBasicBlock &FMBB,
                       unsigned NumFCycles, unsigned ExtraFCycles,
-                      const BranchProbability &Probability) const {
+                      BranchProbability Probability) const {
     return false;
   }
 
@@ -538,7 +611,7 @@ public:
   /// will be properly predicted.
   virtual bool
   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                            const BranchProbability &Probability) const {
+                            BranchProbability Probability) const {
     return false;
   }
 
@@ -724,13 +797,30 @@ public:
   /// order since the pattern evaluator stops checking as soon as it finds a
   /// faster sequence.
   /// \param Root - Instruction that could be combined with one of its operands
-  /// \param Pattern - Vector of possible combination patterns
+  /// \param Patterns - Vector of possible combination patterns
   virtual bool getMachineCombinerPatterns(
       MachineInstr &Root,
-      SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Pattern) const {
+      SmallVectorImpl<MachineCombinerPattern> &Patterns) const;
+
+  /// Return true if the input \P Inst is part of a chain of dependent ops
+  /// that are suitable for reassociation, otherwise return false.
+  /// If the instruction's operands must be commuted to have a previous
+  /// instruction of the same type define the first source operand, \P Commuted
+  /// will be set to true.
+  bool isReassociationCandidate(const MachineInstr &Inst, bool &Commuted) const;
+
+  /// Return true when \P Inst is both associative and commutative.
+  virtual bool isAssociativeAndCommutative(const MachineInstr &Inst) const {
     return false;
   }
 
+  /// Return true when \P Inst has reassociable operands in the same \P MBB.
+  virtual bool hasReassociableOperands(const MachineInstr &Inst,
+                                       const MachineBasicBlock *MBB) const;
+
+  /// Return true when \P Inst has reassociable sibling.
+  bool hasReassociableSibling(const MachineInstr &Inst, bool &Commuted) const;
+
   /// When getMachineCombinerPatterns() finds patterns, this function generates
   /// the instructions that could replace the original code sequence. The client
   /// has to decide whether the actual replacement is beneficial or not.
@@ -742,12 +832,26 @@ public:
   /// \param InstrIdxForVirtReg - map of virtual register to instruction in
   /// InsInstr that defines it
   virtual void genAlternativeCodeSequence(
-      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+      MachineInstr &Root, MachineCombinerPattern Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
       SmallVectorImpl<MachineInstr *> &DelInstrs,
-      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+  /// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
+  /// reduce critical path length.
+  void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
+                      MachineCombinerPattern Pattern,
+                      SmallVectorImpl<MachineInstr *> &InsInstrs,
+                      SmallVectorImpl<MachineInstr *> &DelInstrs,
+                      DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
+
+  /// This is an architecture-specific helper function of reassociateOps.
+  /// Set special operand attributes for new instructions after reassociation.
+  virtual void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+                                     MachineInstr &NewMI1,
+                                     MachineInstr &NewMI2) const {
     return;
-  }
+  };
 
   /// Return true when a target supports MachineCombiner.
   virtual bool useMachineCombiner() const { return false; }
@@ -819,10 +923,6 @@ protected:
   }
 
 public:
-  /// Returns true for the specified load / store if folding is possible.
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    ArrayRef<unsigned> Ops) const;
-
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
   /// possible, returns true as well as the new instructions by reference.
@@ -1266,8 +1366,73 @@ public:
     return 5;
   }
 
+  /// Return an array that contains the ids of the target indices (used for the
+  /// TargetIndex machine operand) and their names.
+  ///
+  /// MIR Serialization is able to serialize only the target indices that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<int, const char *>>
+  getSerializableTargetIndices() const {
+    return None;
+  }
+
+  /// Decompose the machine operand's target flags into two values - the direct
+  /// target flag value and any of bit flags that are applied.
+  virtual std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned /*TF*/) const {
+    return std::make_pair(0u, 0u);
+  }
+
+  /// Return an array that contains the direct target flag values and their
+  /// names.
+  ///
+  /// MIR Serialization is able to serialize only the target flags that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const {
+    return None;
+  }
+
+  /// Return an array that contains the bitmask target flag values and their
+  /// names.
+  ///
+  /// MIR Serialization is able to serialize only the target flags that are
+  /// defined by this method.
+  virtual ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableBitmaskMachineOperandTargetFlags() const {
+    return None;
+  }
+
 private:
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
+  unsigned CatchRetOpcode;
+};
+
+/// \brief Provide DenseMapInfo for TargetInstrInfo::RegSubRegPair.
+template<>
+struct DenseMapInfo<TargetInstrInfo::RegSubRegPair> {
+  typedef DenseMapInfo<unsigned> RegInfo;
+
+  static inline TargetInstrInfo::RegSubRegPair getEmptyKey() {
+    return TargetInstrInfo::RegSubRegPair(RegInfo::getEmptyKey(),
+                         RegInfo::getEmptyKey());
+  }
+  static inline TargetInstrInfo::RegSubRegPair getTombstoneKey() {
+    return TargetInstrInfo::RegSubRegPair(RegInfo::getTombstoneKey(),
+                         RegInfo::getTombstoneKey());
+  }
+  /// \brief Reuse getHashValue implementation from
+  /// std::pair<unsigned, unsigned>.
+  static unsigned getHashValue(const TargetInstrInfo::RegSubRegPair &Val) {
+    std::pair<unsigned, unsigned> PairVal =
+        std::make_pair(Val.Reg, Val.SubReg);
+    return DenseMapInfo<std::pair<unsigned, unsigned>>::getHashValue(PairVal);
+  }
+  static bool isEqual(const TargetInstrInfo::RegSubRegPair &LHS,
+                      const TargetInstrInfo::RegSubRegPair &RHS) {
+    return RegInfo::isEqual(LHS.Reg, RHS.Reg) &&
+           RegInfo::isEqual(LHS.SubReg, RHS.SubReg);
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetItinerary.td b/include/llvm/Target/TargetItinerary.td
index cc74006dc9fe..a37bbf2474c5 100644
--- a/include/llvm/Target/TargetItinerary.td
+++ b/include/llvm/Target/TargetItinerary.td
@@ -134,3 +134,19 @@ class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
 // info. Subtargets using NoItineraries can bypass the scheduler's
 // expensive HazardRecognizer because no reservation table is needed.
 def NoItineraries : ProcessorItineraries<[], [], []>;
+
+//===----------------------------------------------------------------------===//
+// Combo Function Unit data - This is a map of combo function unit names to
+// the list of functional units that are included in the combination.
+//
+class ComboFuncData<FuncUnit ComboFunc, list<FuncUnit> funclist> {
+  FuncUnit TheComboFunc = ComboFunc;
+  list<FuncUnit> FuncList = funclist;
+}
+
+//===----------------------------------------------------------------------===//
+// Combo Function Units - This is a list of all combo function unit data.
+class ComboFuncUnits<list<ComboFuncData> cfd> {
+  list<ComboFuncData> CFD = cfd;
+}
+
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 4412d9b3c68e..140c36591acc 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -83,20 +83,22 @@ class TargetLoweringBase {
 public:
   /// This enum indicates whether operations are valid for a target, and if not,
   /// what action should be used to make them valid.
-  enum LegalizeAction {
+  enum LegalizeAction : uint8_t {
     Legal,      // The target natively supports this operation.
     Promote,    // This operation should be executed in a larger type.
     Expand,     // Try to expand this to other ops, otherwise use a libcall.
+    LibCall,    // Don't try to expand this to other ops, always use a libcall.
     Custom      // Use the LowerOperation hook to implement custom lowering.
   };
 
   /// This enum indicates whether a types are legal for a target, and if not,
   /// what action should be used to make them valid.
-  enum LegalizeTypeAction {
+  enum LegalizeTypeAction : uint8_t {
     TypeLegal,           // The target natively supports this type.
     TypePromoteInteger,  // Replace this integer with a larger one.
     TypeExpandInteger,   // Split this integer into two of half the size.
-    TypeSoftenFloat,     // Convert this float to a same size integer type.
+    TypeSoftenFloat,     // Convert this float to a same size integer type,
+                         // if an operation is not supported in target HW.
     TypeExpandFloat,     // Split this float into two of half the size.
     TypeScalarizeVector, // Replace this one-element vector with its element.
     TypeSplitVector,     // Split this vector into two of half the size.
@@ -124,16 +126,17 @@ public:
                           // mask (ex: x86 blends).
   };
 
-  /// Enum that specifies what a AtomicRMWInst is expanded to, if at all. Exists
-  /// because different targets have different levels of support for these
-  /// atomic RMW instructions, and also have different options w.r.t. what they
-  /// should expand to.
-  enum class AtomicRMWExpansionKind {
-    None,      // Don't expand the instruction.
-    LLSC,      // Expand the instruction into loadlinked/storeconditional; used
-               // by ARM/AArch64. Implies `hasLoadLinkedStoreConditional`
-               // returns true.
-    CmpXChg,   // Expand the instruction into cmpxchg; used by at least X86.
+  /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
+  /// to, if at all. Exists because different targets have different levels of
+  /// support for these atomic instructions, and also have different options
+  /// w.r.t. what they should expand to.
+  enum class AtomicExpansionKind {
+    None,    // Don't expand the instruction.
+    LLSC,    // Expand the instruction into loadlinked/storeconditional; used
+             // by ARM/AArch64.
+    LLOnly,  // Expand the (load) instruction into just a load-linked, which has
+             // greater atomic guarantees than a normal load.
+    CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
   };
 
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
@@ -226,7 +229,11 @@ public:
 
   /// Return true if integer divide is usually cheaper than a sequence of
   /// several shifts, adds, and multiplies for this target.
-  bool isIntDivCheap() const { return IntDivIsCheap; }
+  /// The definition of "cheaper" may depend on whether we're optimizing
+  /// for speed or for size.
+  virtual bool isIntDivCheap(EVT VT, AttributeSet Attr) const {
+    return false;
+  }
 
   /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
   bool isFsqrtCheap() const {
@@ -242,9 +249,6 @@ public:
     return BypassSlowDivWidths;
   }
 
-  /// Return true if pow2 sdiv is cheaper than a chain of sra/srl/add/sra.
-  bool isPow2SDivCheap() const { return Pow2SDivIsCheap; }
-
   /// Return true if Flow Control is an expensive operation that should be
   /// avoided.
   bool isJumpExpensive() const { return JumpIsExpensive; }
@@ -409,20 +413,20 @@ public:
   class ValueTypeActionImpl {
     /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
     /// that indicates how instruction selection should deal with the type.
-    uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
+    LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
 
   public:
     ValueTypeActionImpl() {
-      std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 0);
+      std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
+                TypeLegal);
     }
 
     LegalizeTypeAction getTypeAction(MVT VT) const {
-      return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy];
+      return ValueTypeActions[VT.SimpleTy];
     }
 
     void setTypeAction(MVT VT, LegalizeTypeAction Action) {
-      unsigned I = VT.SimpleTy;
-      ValueTypeActions[I] = Action;
+      ValueTypeActions[VT.SimpleTy] = Action;
     }
   };
 
@@ -546,8 +550,7 @@ public:
     // If a target-specific SDNode requires legalization, require the target
     // to provide custom legalization for it.
     if (Op > array_lengthof(OpActions[0])) return Custom;
-    unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
-    return (LegalizeAction)OpActions[I][Op];
+    return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
   /// Return true if the specified operation is legal on this target or can be
@@ -591,7 +594,7 @@ public:
     unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
     assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&
            MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!");
-    return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType];
+    return LoadExtActions[ValI][MemI][ExtType];
   }
 
   /// Return true if the specified load with extension is legal on this target.
@@ -617,7 +620,7 @@ public:
     unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
     assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
            "Table isn't big enough!");
-    return (LegalizeAction)TruncStoreActions[ValI][MemI];
+    return TruncStoreActions[ValI][MemI];
   }
 
   /// Return true if the specified store with truncation is legal on this
@@ -672,9 +675,9 @@ public:
            ((unsigned)VT.SimpleTy >> 4) < array_lengthof(CondCodeActions[0]) &&
            "Table isn't big enough!");
     // See setCondCodeAction for how this is encoded.
-    uint32_t Shift = 2 * (VT.SimpleTy & 0xF);
-    uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 4];
-    LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0x3);
+    uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+    uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
+    LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
     assert(Action != Promote && "Can't promote condition code!");
     return Action;
   }
@@ -832,6 +835,10 @@ public:
     return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
   }
 
+  unsigned getGatherAllAliasesMaxDepth() const {
+    return GatherAllAliasesMaxDepth;
+  }
+
   /// \brief Get maximum # of store operations permitted for llvm.memset
   ///
   /// This function returns the maximum number of store operations permitted
@@ -878,6 +885,14 @@ public:
     return false;
   }
 
+  /// Return true if the target supports a memory access of this type for the
+  /// given address space and alignment. If the access is allowed, the optional
+  /// final parameter returns if the access is also fast (as defined by the
+  /// target).
+  bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
+                          unsigned AddrSpace = 0, unsigned Alignment = 1,
+                          bool *Fast = nullptr) const;
+
   /// Returns the target specific optimal type for load and store operations as
   /// a result of memset, memcpy, and memmove lowering.
   ///
@@ -930,15 +945,19 @@ public:
   }
 
   /// If a physical register, this returns the register that receives the
-  /// exception address on entry to a landing pad.
-  unsigned getExceptionPointerRegister() const {
-    return ExceptionPointerRegister;
+  /// exception address on entry to an EH pad.
+  virtual unsigned
+  getExceptionPointerRegister(const Constant *PersonalityFn) const {
+    // 0 is guaranteed to be the NoRegister value on all targets
+    return 0;
   }
 
   /// If a physical register, this returns the register that receives the
   /// exception typeid on entry to a landing pad.
-  unsigned getExceptionSelectorRegister() const {
-    return ExceptionSelectorRegister;
+  virtual unsigned
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const {
+    // 0 is guaranteed to be the NoRegister value on all targets
+    return 0;
   }
 
   /// Returns the target's jmp_buf size in bytes (if never set, the default is
@@ -987,6 +1006,10 @@ public:
     return false;
   }
 
+  /// If the target has a standard location for the unsafe stack pointer,
+  /// returns the address of that location. Otherwise, returns nullptr.
+  virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
+
   /// Returns true if a cast between SrcAS and DestAS is a noop.
   virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
     return false;
@@ -1009,8 +1032,8 @@ public:
   int InstructionOpcodeToISD(unsigned Opcode) const;
 
   /// Estimate the cost of type-legalization and the legalized type.
-  std::pair<unsigned, MVT> getTypeLegalizationCost(const DataLayout &DL,
-                                                   Type *Ty) const;
+  std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
+                                              Type *Ty) const;
 
   /// @}
 
@@ -1018,10 +1041,6 @@ public:
   /// \name Helpers for atomic expansion.
   /// @{
 
-  /// True if AtomicExpandPass should use emitLoadLinked/emitStoreConditional
-  /// and expand AtomicCmpXchgInst.
-  virtual bool hasLoadLinkedStoreConditional() const { return false; }
-
   /// Perform a load-linked operation on Addr, returning a "Value *" with the
   /// corresponding pointee type. This may entail some non-trivial operations to
   /// truncate or reconstruct types that will be illegal in the backend. See
@@ -1093,6 +1112,14 @@ public:
   }
   /// @}
 
+  // Emits code that executes when the comparison result in the ll/sc
+  // expansion of a cmpxchg instruction is such that the store-conditional will
+  // not execute.  This makes it possible to balance out the load-linked with
+  // a dedicated instruction, if desired.
+  // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
+  // be unnecessarily held, except if clrex, inserted by this hook, is executed.
+  virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
+
   /// Returns true if the given (atomic) store should be expanded by the
   /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
   virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
@@ -1102,18 +1129,25 @@ public:
   /// Returns true if arguments should be sign-extended in lib calls.
   virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
     return IsSigned;
- }
+  }
 
-  /// Returns true if the given (atomic) load should be expanded by the
-  /// IR-level AtomicExpand pass into a load-linked instruction
-  /// (through emitLoadLinked()).
-  virtual bool shouldExpandAtomicLoadInIR(LoadInst *LI) const { return false; }
+  /// Returns how the given (atomic) load should be expanded by the
+  /// IR-level AtomicExpand pass.
+  virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+    return AtomicExpansionKind::None;
+  }
+
+  /// Returns true if the given atomic cmpxchg should be expanded by the
+  /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
+  /// (through emitLoadLinked() and emitStoreConditional()).
+  virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+    return false;
+  }
 
   /// Returns how the IR-level AtomicExpand pass should expand the given
   /// AtomicRMW, if at all. Default is to never expand.
-  virtual AtomicRMWExpansionKind
-  shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
-    return AtomicRMWExpansionKind::None;
+  virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const {
+    return AtomicExpansionKind::None;
   }
 
   /// On some platforms, an AtomicRMW that never actually modifies the value
@@ -1204,18 +1238,6 @@ protected:
     StackPointerRegisterToSaveRestore = R;
   }
 
-  /// If set to a physical register, this sets the register that receives the
-  /// exception address on entry to a landing pad.
-  void setExceptionPointerRegister(unsigned R) {
-    ExceptionPointerRegister = R;
-  }
-
-  /// If set to a physical register, this sets the register that receives the
-  /// exception typeid on entry to a landing pad.
-  void setExceptionSelectorRegister(unsigned R) {
-    ExceptionSelectorRegister = R;
-  }
-
   /// Tells the code generator not to expand operations into sequences that use
   /// the select operations if possible.
   void setSelectIsExpensive(bool isExpensive = true) {
@@ -1244,11 +1266,6 @@ protected:
   /// control.
   void setJumpIsExpensive(bool isExpensive = true);
 
-  /// Tells the code generator that integer divide is expensive, and if
-  /// possible, should be replaced by an alternate sequence of instructions not
-  /// containing an integer divide.
-  void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
-
   /// Tells the code generator that fsqrt is cheap, and should not be replaced
   /// with an alternative sequence of instructions.
   void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
@@ -1264,10 +1281,6 @@ protected:
     BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
   }
 
-  /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
-  /// signed divide by power of two; let the target handle it.
-  void setPow2SDivIsCheap(bool isCheap = true) { Pow2SDivIsCheap = isCheap; }
-
   /// Add the specified register class as an available regclass for the
   /// specified value type. This indicates the selector can handle values of
   /// that class natively.
@@ -1279,7 +1292,7 @@ protected:
 
   /// Remove all register classes.
   void clearRegisterClasses() {
-    memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*));
+    std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
 
     AvailableRegClasses.clear();
   }
@@ -1302,7 +1315,7 @@ protected:
   void setOperationAction(unsigned Op, MVT VT,
                           LegalizeAction Action) {
     assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
-    OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action;
+    OpActions[(unsigned)VT.SimpleTy][Op] = Action;
   }
 
   /// Indicate that the specified load with extension does not work with the
@@ -1311,7 +1324,7 @@ protected:
                         LegalizeAction Action) {
     assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&
            MemVT.isValid() && "Table isn't big enough!");
-    LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action;
+    LoadExtActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = Action;
   }
 
   /// Indicate that the specified truncating store does not work with the
@@ -1319,7 +1332,7 @@ protected:
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
     assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!");
-    TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action;
+    TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
   }
 
   /// Indicate that the specified indexed load does or does not work with the
@@ -1356,12 +1369,13 @@ protected:
                          LegalizeAction Action) {
     assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
            "Table isn't big enough!");
-    /// The lower 5 bits of the SimpleTy index into Nth 2bit set from the 32-bit
-    /// value and the upper 27 bits index into the second dimension of the array
+    assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
+    /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
+    /// value and the upper 29 bits index into the second dimension of the array
     /// to select what 32-bit value to use.
-    uint32_t Shift = 2 * (VT.SimpleTy & 0xF);
-    CondCodeActions[CC][VT.SimpleTy >> 4] &= ~((uint32_t)0x3 << Shift);
-    CondCodeActions[CC][VT.SimpleTy >> 4] |= (uint32_t)Action << Shift;
+    uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
+    CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
+    CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
   }
 
   /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
@@ -1504,23 +1518,24 @@ public:
     return false;
   }
 
-  /// Return true if it's free to truncate a value of type Ty1 to type
-  /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
+  /// Return true if it's free to truncate a value of type FromTy to type
+  /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
-  virtual bool isTruncateFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
+  /// Targets must return false when FromTy <= ToTy.
+  virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
     return false;
   }
 
-  /// Return true if a truncation from Ty1 to Ty2 is permitted when deciding
+  /// Return true if a truncation from FromTy to ToTy is permitted when deciding
   /// whether a call is in tail position. Typically this means that both results
   /// would be assigned to the same register or stack slot, but it could mean
   /// the target performs adequate checks of its own before proceeding with the
-  /// tail call.
-  virtual bool allowTruncateForTailCall(Type * /*Ty1*/, Type * /*Ty2*/) const {
+  /// tail call.  Targets must return false when FromTy <= ToTy.
+  virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
     return false;
   }
 
-  virtual bool isTruncateFree(EVT /*VT1*/, EVT /*VT2*/) const {
+  virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
     return false;
   }
 
@@ -1553,19 +1568,21 @@ public:
     return isExtFreeImpl(I);
   }
 
-  /// Return true if any actual instruction that defines a value of type Ty1
-  /// implicitly zero-extends the value to Ty2 in the result register.
+  /// Return true if any actual instruction that defines a value of type FromTy
+  /// implicitly zero-extends the value to ToTy in the result register.
   ///
-  /// This does not necessarily include registers defined in unknown ways, such
-  /// as incoming arguments, or copies from unknown virtual registers. Also, if
-  /// isTruncateFree(Ty2, Ty1) is true, this does not necessarily apply to
-  /// truncate instructions. e.g. on x86-64, all instructions that define 32-bit
-  /// values implicit zero-extend the result out to 64 bits.
-  virtual bool isZExtFree(Type * /*Ty1*/, Type * /*Ty2*/) const {
+  /// The function should return true when it is likely that the truncate can
+  /// be freely folded with an instruction defining a value of FromTy. If
+  /// the defining instruction is unknown (because you're looking at a
+  /// function argument, PHI, etc.) then the target may require an
+  /// explicit truncate, which is not necessarily free, but this function
+  /// does not deal with those cases.
+  /// Targets must return false when FromTy >= ToTy.
+  virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
     return false;
   }
 
-  virtual bool isZExtFree(EVT /*VT1*/, EVT /*VT2*/) const {
+  virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
     return false;
   }
 
@@ -1699,6 +1716,12 @@ public:
     return false;
   }
 
+  // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
+  // even if the vector itself has multiple uses.
+  virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+    return false;
+  }
+
   //===--------------------------------------------------------------------===//
   // Runtime Library hooks
   //
@@ -1755,12 +1778,6 @@ private:
   /// combined with "shift" to BitExtract instructions.
   bool HasExtractBitsInsn;
 
-  /// Tells the code generator not to expand integer divides by constants into a
-  /// sequence of muls, adds, and shifts.  This is a hack until a real cost
-  /// model is in place.  If we ever optimize for size, this will be set to true
-  /// unconditionally.
-  bool IntDivIsCheap;
-
   // Don't expand fsqrt with an approximation based on the inverse sqrt.
   bool FsqrtIsCheap;
 
@@ -1770,10 +1787,6 @@ private:
   /// div/rem when the operands are positive and less than 256.
   DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
 
-  /// Tells the code generator that it shouldn't generate sra/srl/add/sra for a
-  /// signed divide by power of two; let the target handle it.
-  bool Pow2SDivIsCheap;
-
   /// Tells the code generator that it shouldn't generate extra flow control
   /// instructions and should attempt to combine flow control instructions via
   /// predication.
@@ -1841,14 +1854,6 @@ private:
   /// llvm.savestack/llvm.restorestack should save and restore.
   unsigned StackPointerRegisterToSaveRestore;
 
-  /// If set to a physical register, this specifies the register that receives
-  /// the exception address on entry to a landing pad.
-  unsigned ExceptionPointerRegister;
-
-  /// If set to a physical register, this specifies the register that receives
-  /// the exception typeid on entry to a landing pad.
-  unsigned ExceptionSelectorRegister;
-
   /// This indicates the default register class to use for each ValueType the
   /// target supports natively.
   const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
@@ -1880,17 +1885,17 @@ private:
   /// operations are Legal (aka, supported natively by the target), but
   /// operations that are not should be described.  Note that operations on
   /// non-legal value types are not described here.
-  uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
+  LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
 
   /// For each load extension type and each value type, keep a LegalizeAction
   /// that indicates how instruction selection should deal with a load of a
   /// specific value type and extension type.
-  uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]
-                        [ISD::LAST_LOADEXT_TYPE];
+  LegalizeAction LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]
+                               [ISD::LAST_LOADEXT_TYPE];
 
   /// For each value type pair keep a LegalizeAction that indicates whether a
   /// truncating store of a specific value type and truncating type is legal.
-  uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
+  LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
 
   /// For each indexed mode and each value type, keep a pair of LegalizeAction
   /// that indicates how instruction selection should deal with the load /
@@ -1903,11 +1908,12 @@ private:
   /// For each condition code (ISD::CondCode) keep a LegalizeAction that
   /// indicates how instruction selection should deal with the condition code.
   ///
-  /// Because each CC action takes up 2 bits, we need to have the array size be
+  /// Because each CC action takes up 4 bits, we need to have the array size be
   /// large enough to fit all of the value types. This can be done by rounding
-  /// up the MVT::LAST_VALUETYPE value to the next multiple of 16.
-  uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 15) / 16];
+  /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
+  uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
 
+protected:
   ValueTypeActionImpl ValueTypeActions;
 
 private:
@@ -1947,6 +1953,12 @@ protected:
   ///      is[Z|FP]ExtFree of the related types is not true.
   virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
 
+  /// Depth that GatherAllAliases should should continue looking for chain
+  /// dependencies when trying to find a more preferrable chain. As an
+  /// approximation, this should be more than the number of consecutive stores
+  /// expected to be merged.
+  unsigned GatherAllAliasesMaxDepth;
+
   /// \brief Specify maximum number of store instructions per memset call.
   ///
   /// When lowering \@llvm.memset this field specifies the maximum number of
@@ -1993,7 +2005,7 @@ protected:
   unsigned MaxStoresPerMemmove;
 
   /// Maximum number of store instructions that may be substituted for a call to
-  /// memmove, used for functions with OpSize attribute.
+  /// memmove, used for functions with OptSize attribute.
   unsigned MaxStoresPerMemmoveOptSize;
 
   /// Tells the code generator that select is more expensive than a branch if
@@ -2087,9 +2099,9 @@ public:
   /// Returns a pair of (return value, chain).
   /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
   std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
-                                          EVT RetVT, const SDValue *Ops,
-                                          unsigned NumOps, bool isSigned,
-                                          SDLoc dl, bool doesNotReturn = false,
+                                          EVT RetVT, ArrayRef<SDValue> Ops,
+                                          bool isSigned, SDLoc dl,
+                                          bool doesNotReturn = false,
                                           bool isReturnValueUsed = true) const;
 
   //===--------------------------------------------------------------------===//
@@ -2251,6 +2263,29 @@ public:
     return false;
   }
 
+  /// Return true if the target supports that a subset of CSRs for the given
+  /// machine function is handled explicitly via copies.
+  virtual bool supportSplitCSR(MachineFunction *MF) const {
+    return false;
+  }
+
+  /// Perform necessary initialization to handle a subset of CSRs explicitly
+  /// via copies. This function is called at the beginning of instruction
+  /// selection.
+  virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
+    llvm_unreachable("Not Implemented");
+  }
+
+  /// Insert explicit copies in entry and exit blocks. We copy a subset of
+  /// CSRs to virtual registers in the entry block, and copy them back to
+  /// physical registers in the exit blocks. This function is called at the end
+  /// of instruction selection.
+  virtual void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+    llvm_unreachable("Not Implemented");
+  }
+
   //===--------------------------------------------------------------------===//
   // Lowering methods - These methods must be implemented by targets so that
   // the SelectionDAGBuilder code knows how to lower these.
@@ -2726,16 +2761,21 @@ public:
   SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                     bool IsAfterLegalization,
                     std::vector<SDNode *> *Created) const;
+
+  /// Targets may override this function to provide custom SDIV lowering for
+  /// power-of-2 denominators.  If the target returns an empty SDValue, LLVM
+  /// assumes SDIV is expensive and replaces it with a series of other integer
+  /// operations.
   virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
                                 SelectionDAG &DAG,
-                                std::vector<SDNode *> *Created) const {
-    return SDValue();
-  }
+                                std::vector<SDNode *> *Created) const;
 
-  /// Indicate whether this target prefers to combine the given number of FDIVs
-  /// with the same divisor.
-  virtual bool combineRepeatedFPDivisors(unsigned NumUsers) const {
-    return false;
+  /// Indicate whether this target prefers to combine FDIVs with the same
+  /// divisor. If the transform should never be done, return zero. If the
+  /// transform should be done, return the minimum number of divisor uses
+  /// that must exist.
+  virtual unsigned combineRepeatedFPDivisors() const {
+    return 0;
   }
 
   /// Hooks for building estimates in place of slower divisions and square
@@ -2821,6 +2861,10 @@ public:
   virtual bool useLoadStackGuardNode() const {
     return false;
   }
+
+  /// Lower TLS global address SDNode for target independent emulated TLS model.
+  virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+                                          SelectionDAG &DAG) const;
 };
 
 /// Given an LLVM IR type and return type attributes, compute the return value
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 5b626c244ba0..cb52698c58b9 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -42,16 +42,15 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
   void operator=(const TargetLoweringObjectFile&) = delete;
 
 protected:
-  const DataLayout *DL;
   bool SupportIndirectSymViaGOTPCRel;
   bool SupportGOTPCRelWithOffset;
 
 public:
   MCContext &getContext() const { return *Ctx; }
 
-  TargetLoweringObjectFile() : MCObjectFileInfo(), Ctx(nullptr), DL(nullptr),
-                               SupportIndirectSymViaGOTPCRel(false),
-                               SupportGOTPCRelWithOffset(true) {}
+  TargetLoweringObjectFile()
+      : MCObjectFileInfo(), Ctx(nullptr), SupportIndirectSymViaGOTPCRel(false),
+        SupportGOTPCRelWithOffset(true) {}
 
   virtual ~TargetLoweringObjectFile();
 
@@ -60,8 +59,7 @@ public:
   /// implementations a chance to set up their default sections.
   virtual void Initialize(MCContext &ctx, const TargetMachine &TM);
 
-  virtual void emitPersonalityValue(MCStreamer &Streamer,
-                                    const TargetMachine &TM,
+  virtual void emitPersonalityValue(MCStreamer &Streamer, const DataLayout &TM,
                                     const MCSymbol *Sym) const;
 
   /// Emit the module flags that the platform cares about.
@@ -71,7 +69,8 @@ public:
 
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
-  virtual MCSection *getSectionForConstant(SectionKind Kind,
+  virtual MCSection *getSectionForConstant(const DataLayout &DL,
+                                           SectionKind Kind,
                                            const Constant *C) const;
 
   /// Classify the specified global variable into a set of target independent
@@ -94,8 +93,7 @@ public:
   }
 
   virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName,
-                                 const GlobalValue *GV,
-                                 bool CannotUsePrivateLabel, Mangler &Mang,
+                                 const GlobalValue *GV, Mangler &Mang,
                                  const TargetMachine &TM) const;
 
   virtual MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index f1e9d1718f5a..74e91b5790cb 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -76,7 +76,12 @@ protected: // Can only create subclasses.
   /// The Target that this machine was created for.
   const Target &TheTarget;
 
-  /// For ABI type size and alignment.
+  /// DataLayout for the target: keep ABI type size and alignment.
+  ///
+  /// The DataLayout is created based on the string representation provided
+  /// during construction. It is kept here only to avoid reparsing the string
+  /// but should not really be used during compilation, because it has an
+  /// internal cache that is context specific.
   const DataLayout DL;
 
   /// Triple string, CPU name, and target feature strings the TargetMachine
@@ -97,6 +102,12 @@ protected: // Can only create subclasses.
   const MCSubtargetInfo *STI;
 
   unsigned RequireStructuredCFG : 1;
+  unsigned O0WantsFastISel : 1;
+
+  /// This API is here to support the C API, deprecated in 3.7 release.
+  /// This should never be used outside of legacy existing client.
+  const DataLayout &getDataLayout() const { return DL; }
+  friend struct C_API_PRIVATE_ACCESS;
 
 public:
   mutable TargetOptions Options;
@@ -125,15 +136,23 @@ public:
     return *static_cast<const STC*>(getSubtargetImpl(F));
   }
 
-  /// Deprecated in 3.7, will be removed in 3.8. Use createDataLayout() instead.
-  ///
-  /// This method returns a pointer to the DataLayout for the target. It should
-  /// be unchanging for every subtarget.
-  const DataLayout *getDataLayout() const { return &DL; }
-
   /// Create a DataLayout.
   const DataLayout createDataLayout() const { return DL; }
 
+  /// Test if a DataLayout if compatible with the CodeGen for this target.
+  ///
+  /// The LLVM Module owns a DataLayout that is used for the target independent
+  /// optimizations and code generation. This hook provides a target specific
+  /// check on the validity of this DataLayout.
+  bool isCompatibleDataLayout(const DataLayout &Candidate) const {
+    return DL == Candidate;
+  }
+
+  /// Get the pointer size for this target.
+  ///
+  /// This is the only time the DataLayout in the TargetMachine is used.
+  unsigned getPointerSize() const { return DL.getPointerSize(); }
+
   /// \brief Reset the target options based on the function's attributes.
   // FIXME: Remove TargetOptions that affect per-function code generation
   // from TargetMachine.
@@ -172,6 +191,8 @@ public:
   void setOptLevel(CodeGenOpt::Level Level) const;
 
   void setFastISel(bool Enable) { Options.EnableFastISel = Enable; }
+  bool getO0WantsFastISel() { return O0WantsFastISel; }
+  void setO0WantsFastISel(bool Enable) { O0WantsFastISel = Enable; }
 
   bool shouldPrintMachineCode() const { return Options.PrintMachineCode; }
 
@@ -234,6 +255,13 @@ public:
     return true;
   }
 
+  /// True if subtarget inserts the final scheduling pass on its own.
+  ///
+  /// Branch relaxation, which must happen after block placement, can
+  /// on some targets (e.g. SystemZ) expose additional post-RA
+  /// scheduling opportunities.
+  virtual bool targetSchedulesPostRAScheduling() const { return false; };
+
   void getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV,
                          Mangler &Mang, bool MayAlwaysUsePrivate = false) const;
   MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const;
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 50197191109d..db37bdb62582 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -126,8 +126,12 @@ enum {
   /// Loading instruction that may page fault, bundled with associated
   /// information on how to handle such a page fault.  It is intended to support
   /// "zero cost" null checks in managed languages by allowing LLVM to fold
-  /// comparisions into existing memory operations.
+  /// comparisons into existing memory operations.
   FAULTING_LOAD_OP = 22,
+
+  /// BUILTIN_OP_END - This must be the last enum value in this list.
+  /// The target-specific post-isel opcode values start here.
+  GENERIC_OP_END = FAULTING_LOAD_OP,
 };
 } // end namespace TargetOpcode
 } // end namespace llvm
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index d52cb60cf108..d98d0fa0ed5f 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -58,24 +58,53 @@ namespace llvm {
     };
   }
 
+  enum class EABI {
+    Unknown,
+    Default, // Default means not specified
+    EABI4,   // Target-specific (either 4, 5 or gnu depending on triple).
+    EABI5,
+    GNU
+  };
+
+  /// Identify a debugger for "tuning" the debug info.
+  ///
+  /// The "debugger tuning" concept allows us to present a more intuitive
+  /// interface that unpacks into different sets of defaults for the various
+  /// individual feature-flag settings, that suit the preferences of the
+  /// various debuggers.  However, it's worth remembering that debuggers are
+  /// not the only consumers of debug info, and some variations in DWARF might
+  /// better be treated as target/platform issues. Fundamentally,
+  /// o if the feature is useful (or not) to a particular debugger, regardless
+  ///   of the target, that's a tuning decision;
+  /// o if the feature is useful (or not) on a particular platform, regardless
+  ///   of the debugger, that's a target decision.
+  /// It's not impossible to see both factors in some specific case.
+  ///
+  /// The "tuning" should be used to set defaults for individual feature flags
+  /// in DwarfDebug; if a given feature has a more specific command-line option,
+  /// that option should take precedence over the tuning.
+  enum class DebuggerKind {
+    Default,  // No specific tuning requested.
+    GDB,      // Tune debug info for gdb.
+    LLDB,     // Tune debug info for lldb.
+    SCE       // Tune debug info for SCE targets (e.g. PS4).
+  };
+
   class TargetOptions {
   public:
     TargetOptions()
-        : PrintMachineCode(false),
-          LessPreciseFPMADOption(false), UnsafeFPMath(false),
-          NoInfsFPMath(false), NoNaNsFPMath(false),
-          HonorSignDependentRoundingFPMathOption(false),
-          NoZerosInBSS(false),
-          GuaranteedTailCallOpt(false),
-          StackAlignmentOverride(0),
+        : PrintMachineCode(false), LessPreciseFPMADOption(false),
+          UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
+          HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
+          GuaranteedTailCallOpt(false), StackAlignmentOverride(0),
           EnableFastISel(false), PositionIndependentExecutable(false),
           UseInitArray(false), DisableIntegratedAS(false),
           CompressDebugSections(false), FunctionSections(false),
           DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
-          FloatABIType(FloatABI::Default),
+          EmulatedTLS(false), FloatABIType(FloatABI::Default),
           AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
-          JTType(JumpTable::Single),
-          ThreadModel(ThreadModel::POSIX) {}
+          JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX),
+          EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default) {}
 
     /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
     /// option is specified on the command line, and should enable debugging
@@ -172,6 +201,10 @@ namespace llvm {
     /// Emit target-specific trap instruction for 'unreachable' IR instructions.
     unsigned TrapUnreachable : 1;
 
+    /// EmulatedTLS - This flag enables emulated TLS model, using emutls
+    /// function in the runtime library..
+    unsigned EmulatedTLS : 1;
+
     /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
     /// on the command line. This setting may either be Default, Soft, or Hard.
     /// Default selects the target's default behavior. Soft selects the ABI for
@@ -200,7 +233,7 @@ namespace llvm {
 
     /// This class encapsulates options for reciprocal-estimate code generation.
     TargetRecip Reciprocals;
-    
+
     /// JTType - This flag specifies the type of jump-instruction table to
     /// create for functions that have the jumptable attribute.
     JumpTable::JumpTableType JTType;
@@ -209,6 +242,12 @@ namespace llvm {
     /// for things like atomics
     ThreadModel::Model ThreadModel;
 
+    /// EABIVersion - This flag specifies the EABI version
+    EABI EABIVersion;
+
+    /// Which debugger to tune for.
+    DebuggerKind DebuggerTuning;
+
     /// Machine level options.
     MCTargetOptions MCOptions;
   };
@@ -231,11 +270,14 @@ inline bool operator==(const TargetOptions &LHS,
     ARE_EQUAL(PositionIndependentExecutable) &&
     ARE_EQUAL(UseInitArray) &&
     ARE_EQUAL(TrapUnreachable) &&
+    ARE_EQUAL(EmulatedTLS) &&
     ARE_EQUAL(FloatABIType) &&
     ARE_EQUAL(AllowFPOpFusion) &&
     ARE_EQUAL(Reciprocals) &&
     ARE_EQUAL(JTType) &&
     ARE_EQUAL(ThreadModel) &&
+    ARE_EQUAL(EABIVersion) &&
+    ARE_EQUAL(DebuggerTuning) &&
     ARE_EQUAL(MCOptions);
 #undef ARE_EQUAL
 }
diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h
index 4cc3672d758d..210d49324848 100644
--- a/include/llvm/Target/TargetRecip.h
+++ b/include/llvm/Target/TargetRecip.h
@@ -31,20 +31,20 @@ public:
   /// Initialize all or part of the operations from command-line options or
   /// a front end.
   TargetRecip(const std::vector<std::string> &Args);
-  
+
   /// Set whether a particular reciprocal operation is enabled and how many
   /// refinement steps are needed when using it. Use "all" to set enablement
   /// and refinement steps for all operations.
-  void setDefaults(const StringRef &Key, bool Enable, unsigned RefSteps);
+  void setDefaults(StringRef Key, bool Enable, unsigned RefSteps);
 
   /// Return true if the reciprocal operation has been enabled by default or
   /// from the command-line. Return false if the operation has been disabled
   /// by default or from the command-line.
-  bool isEnabled(const StringRef &Key) const;
+  bool isEnabled(StringRef Key) const;
 
   /// Return the number of iterations necessary to refine the
   /// the result of a machine instruction for the given reciprocal operation.
-  unsigned getRefinementSteps(const StringRef &Key) const;
+  unsigned getRefinementSteps(StringRef Key) const;
 
   bool operator==(const TargetRecip &Other) const;
 
@@ -52,14 +52,14 @@ private:
   enum {
     Uninitialized = -1
   };
-  
+
   struct RecipParams {
     int8_t Enabled;
     int8_t RefinementSteps;
-    
+
     RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
   };
-  
+
   std::map<StringRef, RecipParams> RecipMap;
   typedef std::map<StringRef, RecipParams>::iterator RecipIter;
   typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index 0ee936a76211..fccaad4705d5 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -21,6 +21,8 @@
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Printable.h"
 #include <cassert>
 #include <functional>
 
@@ -32,6 +34,24 @@ class RegScavenger;
 template<class T> class SmallVectorImpl;
 class VirtRegMap;
 class raw_ostream;
+class LiveRegMatrix;
+
+/// A bitmask representing the covering of a register with sub-registers.
+///
+/// This is typically used to track liveness at sub-register granularity.
+/// Lane masks for sub-register indices are similar to register units for
+/// physical registers. The individual bits in a lane mask can't be assigned
+/// any specific meaning. They can be used to check if two sub-register
+/// indices overlap.
+///
+/// Iff the target has a register such that:
+///
+///   getSubReg(Reg, A) overlaps getSubReg(Reg, B)
+///
+/// then:
+///
+///   (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
+typedef unsigned LaneBitmask;
 
 class TargetRegisterClass {
 public:
@@ -45,7 +65,7 @@ public:
   const vt_iterator VTs;
   const uint32_t *SubClassMask;
   const uint16_t *SuperRegIndices;
-  const unsigned LaneMask;
+  const LaneBitmask LaneMask;
   /// Classes with a higher priority value are assigned first by register
   /// allocators using a greedy heuristic. The value is in the range [0,63].
   const uint8_t AllocationPriority;
@@ -54,8 +74,7 @@ public:
   const sc_iterator SuperClasses;
   ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
 
-  /// getID() - Return the register class ID number.
-  ///
+  /// Return the register class ID number.
   unsigned getID() const { return MC->getID(); }
 
   /// begin/end - Return all of the registers in this class.
@@ -63,46 +82,42 @@ public:
   iterator       begin() const { return MC->begin(); }
   iterator         end() const { return MC->end(); }
 
-  /// getNumRegs - Return the number of registers in this class.
-  ///
+  /// Return the number of registers in this class.
   unsigned getNumRegs() const { return MC->getNumRegs(); }
 
-  /// getRegister - Return the specified register in the class.
-  ///
+  /// Return the specified register in the class.
   unsigned getRegister(unsigned i) const {
     return MC->getRegister(i);
   }
 
-  /// contains - Return true if the specified register is included in this
-  /// register class.  This does not include virtual registers.
+  /// Return true if the specified register is included in this register class.
+  /// This does not include virtual registers.
   bool contains(unsigned Reg) const {
     return MC->contains(Reg);
   }
 
-  /// contains - Return true if both registers are in this class.
+  /// Return true if both registers are in this class.
   bool contains(unsigned Reg1, unsigned Reg2) const {
     return MC->contains(Reg1, Reg2);
   }
 
-  /// getSize - Return the size of the register in bytes, which is also the size
+  /// Return the size of the register in bytes, which is also the size
   /// of a stack slot allocated to hold a spilled copy of this register.
   unsigned getSize() const { return MC->getSize(); }
 
-  /// getAlignment - Return the minimum required alignment for a register of
-  /// this class.
+  /// Return the minimum required alignment for a register of this class.
   unsigned getAlignment() const { return MC->getAlignment(); }
 
-  /// getCopyCost - Return the cost of copying a value between two registers in
-  /// this class. A negative number means the register class is very expensive
+  /// Return the cost of copying a value between two registers in this class.
+  /// A negative number means the register class is very expensive
   /// to copy e.g. status flag register classes.
   int getCopyCost() const { return MC->getCopyCost(); }
 
-  /// isAllocatable - Return true if this register class may be used to create
-  /// virtual registers.
+  /// Return true if this register class may be used to create virtual
+  /// registers.
   bool isAllocatable() const { return MC->isAllocatable(); }
 
-  /// hasType - return true if this TargetRegisterClass has the ValueType vt.
-  ///
+  /// Return true if this TargetRegisterClass has the ValueType vt.
   bool hasType(MVT vt) const {
     for(int i = 0; VTs[i] != MVT::Other; ++i)
       if (MVT(VTs[i]) == vt)
@@ -122,41 +137,39 @@ public:
     return I;
   }
 
-  /// hasSubClass - return true if the specified TargetRegisterClass
+  /// Return true if the specified TargetRegisterClass
   /// is a proper sub-class of this TargetRegisterClass.
   bool hasSubClass(const TargetRegisterClass *RC) const {
     return RC != this && hasSubClassEq(RC);
   }
 
-  /// hasSubClassEq - Returns true if RC is a sub-class of or equal to this
-  /// class.
+  /// Returns true if RC is a sub-class of or equal to this class.
   bool hasSubClassEq(const TargetRegisterClass *RC) const {
     unsigned ID = RC->getID();
     return (SubClassMask[ID / 32] >> (ID % 32)) & 1;
   }
 
-  /// hasSuperClass - return true if the specified TargetRegisterClass is a
+  /// Return true if the specified TargetRegisterClass is a
   /// proper super-class of this TargetRegisterClass.
   bool hasSuperClass(const TargetRegisterClass *RC) const {
     return RC->hasSubClass(this);
   }
 
-  /// hasSuperClassEq - Returns true if RC is a super-class of or equal to this
-  /// class.
+  /// Returns true if RC is a super-class of or equal to this class.
   bool hasSuperClassEq(const TargetRegisterClass *RC) const {
     return RC->hasSubClassEq(this);
   }
 
-  /// getSubClassMask - Returns a bit vector of subclasses, including this one.
+  /// Returns a bit vector of subclasses, including this one.
   /// The vector is indexed by class IDs, see hasSubClassEq() above for how to
   /// use it.
   const uint32_t *getSubClassMask() const {
     return SubClassMask;
   }
 
-  /// getSuperRegIndices - Returns a 0-terminated list of sub-register indices
-  /// that project some super-register class into this register class. The list
-  /// has an entry for each Idx such that:
+  /// Returns a 0-terminated list of sub-register indices that project some
+  /// super-register class into this register class. The list has an entry for
+  /// each Idx such that:
   ///
   ///   There exists SuperRC where:
   ///     For all Reg in SuperRC:
@@ -166,23 +179,23 @@ public:
     return SuperRegIndices;
   }
 
-  /// getSuperClasses - Returns a NULL terminated list of super-classes.  The
+  /// Returns a NULL-terminated list of super-classes.  The
   /// classes are ordered by ID which is also a topological ordering from large
   /// to small classes.  The list does NOT include the current class.
   sc_iterator getSuperClasses() const {
     return SuperClasses;
   }
 
-  /// isASubClass - return true if this TargetRegisterClass is a subset
+  /// Return true if this TargetRegisterClass is a subset
   /// class of at least one other TargetRegisterClass.
   bool isASubClass() const {
     return SuperClasses[0] != nullptr;
   }
 
-  /// getRawAllocationOrder - Returns the preferred order for allocating
-  /// registers from this register class in MF. The raw order comes directly
-  /// from the .td file and may include reserved registers that are not
-  /// allocatable. Register allocators should also make sure to allocate
+  /// Returns the preferred order for allocating registers from this register
+  /// class in MF. The raw order comes directly from the .td file and may
+  /// include reserved registers that are not allocatable.
+  /// Register allocators should also make sure to allocate
   /// callee-saved registers only after all the volatiles are used. The
   /// RegisterClassInfo class provides filtered allocation orders with
   /// callee-saved registers moved to the end.
@@ -200,13 +213,13 @@ public:
   /// Returns the combination of all lane masks of register in this class.
   /// The lane masks of the registers are the combination of all lane masks
   /// of their subregisters.
-  unsigned getLaneMask() const {
+  LaneBitmask getLaneMask() const {
     return LaneMask;
   }
 };
 
-/// TargetRegisterInfoDesc - Extra information, not in MCRegisterDesc, about
-/// registers. These are used by codegen, not by MC.
+/// Extra information, not in MCRegisterDesc, about registers.
+/// These are used by codegen, not by MC.
 struct TargetRegisterInfoDesc {
   unsigned CostPerUse;          // Extra cost of instructions using register.
   bool inAllocatableClass;      // Register belongs to an allocatable regclass.
@@ -232,7 +245,7 @@ private:
   const TargetRegisterInfoDesc *InfoDesc;     // Extra desc array for codegen
   const char *const *SubRegIndexNames;        // Names of subreg indexes.
   // Pointer to array of lane masks, one per sub-reg index.
-  const unsigned *SubRegIndexLaneMasks;
+  const LaneBitmask *SubRegIndexLaneMasks;
 
   regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
   unsigned CoveringLanes;
@@ -242,7 +255,7 @@ protected:
                      regclass_iterator RegClassBegin,
                      regclass_iterator RegClassEnd,
                      const char *const *SRINames,
-                     const unsigned *SRILaneMasks,
+                     const LaneBitmask *SRILaneMasks,
                      unsigned CoveringLanes);
   virtual ~TargetRegisterInfo();
 public:
@@ -270,77 +283,74 @@ public:
     return int(Reg) >= (1 << 30);
   }
 
-  /// stackSlot2Index - Compute the frame index from a register value
-  /// representing a stack slot.
+  /// Compute the frame index from a register value representing a stack slot.
   static int stackSlot2Index(unsigned Reg) {
     assert(isStackSlot(Reg) && "Not a stack slot");
     return int(Reg - (1u << 30));
   }
 
-  /// index2StackSlot - Convert a non-negative frame index to a stack slot
-  /// register value.
+  /// Convert a non-negative frame index to a stack slot register value.
   static unsigned index2StackSlot(int FI) {
     assert(FI >= 0 && "Cannot hold a negative frame index.");
     return FI + (1u << 30);
   }
 
-  /// isPhysicalRegister - Return true if the specified register number is in
+  /// Return true if the specified register number is in
   /// the physical register namespace.
   static bool isPhysicalRegister(unsigned Reg) {
     assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
     return int(Reg) > 0;
   }
 
-  /// isVirtualRegister - Return true if the specified register number is in
+  /// Return true if the specified register number is in
   /// the virtual register namespace.
   static bool isVirtualRegister(unsigned Reg) {
     assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
     return int(Reg) < 0;
   }
 
-  /// virtReg2Index - Convert a virtual register number to a 0-based index.
+  /// Convert a virtual register number to a 0-based index.
   /// The first virtual register in a function will get the index 0.
   static unsigned virtReg2Index(unsigned Reg) {
     assert(isVirtualRegister(Reg) && "Not a virtual register");
     return Reg & ~(1u << 31);
   }
 
-  /// index2VirtReg - Convert a 0-based index to a virtual register number.
+  /// Convert a 0-based index to a virtual register number.
   /// This is the inverse operation of VirtReg2IndexFunctor below.
   static unsigned index2VirtReg(unsigned Index) {
     return Index | (1u << 31);
   }
 
-  /// getMinimalPhysRegClass - Returns the Register Class of a physical
-  /// register of the given type, picking the most sub register class of
-  /// the right type that contains this physreg.
+  /// Returns the Register Class of a physical register of the given type,
+  /// picking the most sub register class of the right type that contains this
+  /// physreg.
   const TargetRegisterClass *
     getMinimalPhysRegClass(unsigned Reg, MVT VT = MVT::Other) const;
 
-  /// getAllocatableClass - Return the maximal subclass of the given register
-  /// class that is alloctable, or NULL.
+  /// Return the maximal subclass of the given register class that is
+  /// allocatable or NULL.
   const TargetRegisterClass *
     getAllocatableClass(const TargetRegisterClass *RC) const;
 
-  /// getAllocatableSet - Returns a bitset indexed by register number
-  /// indicating if a register is allocatable or not. If a register class is
-  /// specified, returns the subset for the class.
+  /// Returns a bitset indexed by register number indicating if a register is
+  /// allocatable or not. If a register class is specified, returns the subset
+  /// for the class.
   BitVector getAllocatableSet(const MachineFunction &MF,
                               const TargetRegisterClass *RC = nullptr) const;
 
-  /// getCostPerUse - Return the additional cost of using this register instead
+  /// Return the additional cost of using this register instead
   /// of other registers in its class.
   unsigned getCostPerUse(unsigned RegNo) const {
     return InfoDesc[RegNo].CostPerUse;
   }
 
-  /// isInAllocatableClass - Return true if the register is in the allocation
-  /// of any register class.
+  /// Return true if the register is in the allocation of any register class.
   bool isInAllocatableClass(unsigned RegNo) const {
     return InfoDesc[RegNo].inAllocatableClass;
   }
 
-  /// getSubRegIndexName - Return the human-readable symbolic target-specific
+  /// Return the human-readable symbolic target-specific
   /// name for the specified SubRegIndex.
   const char *getSubRegIndexName(unsigned SubIdx) const {
     assert(SubIdx && SubIdx < getNumSubRegIndices() &&
@@ -348,44 +358,15 @@ public:
     return SubRegIndexNames[SubIdx-1];
   }
 
-  /// getSubRegIndexLaneMask - Return a bitmask representing the parts of a
-  /// register that are covered by SubIdx.
+  /// Return a bitmask representing the parts of a register that are covered by
+  /// SubIdx \see LaneBitmask.
   ///
-  /// Lane masks for sub-register indices are similar to register units for
-  /// physical registers. The individual bits in a lane mask can't be assigned
-  /// any specific meaning. They can be used to check if two sub-register
-  /// indices overlap.
-  ///
-  /// If the target has a register such that:
-  ///
-  ///   getSubReg(Reg, A) overlaps getSubReg(Reg, B)
-  ///
-  /// then:
-  ///
-  ///   (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
-  ///
-  /// The converse is not necessarily true. If two lane masks have a common
-  /// bit, the corresponding sub-registers may not overlap, but it can be
-  /// assumed that they usually will.
   /// SubIdx == 0 is allowed, it has the lane mask ~0u.
-  unsigned getSubRegIndexLaneMask(unsigned SubIdx) const {
+  LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const {
     assert(SubIdx < getNumSubRegIndices() && "This is not a subregister index");
     return SubRegIndexLaneMasks[SubIdx];
   }
 
-  /// Returns true if the given lane mask is imprecise.
-  ///
-  /// LaneMasks as given by getSubRegIndexLaneMask() have a limited number of
-  /// bits, so for targets with more than 31 disjunct subregister indices there
-  /// may be cases where:
-  ///    getSubReg(Reg,A) does not overlap getSubReg(Reg,B)
-  /// but we still have
-  ///    (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0.
-  /// This function returns true in those cases.
-  static bool isImpreciseLaneMask(unsigned LaneMask) {
-    return LaneMask & 0x80000000u;
-  }
-
   /// The lane masks returned by getSubRegIndexLaneMask() above can only be
   /// used to determine if sub-registers overlap - they can't be used to
   /// determine if a set of sub-registers completely cover another
@@ -409,10 +390,10 @@ public:
   ///
   /// If (MaskA & ~(MaskB & Covering)) == 0, then SubA is completely covered by
   /// SubB.
-  unsigned getCoveringLanes() const { return CoveringLanes; }
+  LaneBitmask getCoveringLanes() const { return CoveringLanes; }
 
-  /// regsOverlap - Returns true if the two registers are equal or alias each
-  /// other. The registers may be virtual register.
+  /// Returns true if the two registers are equal or alias each other.
+  /// The registers may be virtual registers.
   bool regsOverlap(unsigned regA, unsigned regB) const {
     if (regA == regB) return true;
     if (isVirtualRegister(regA) || isVirtualRegister(regB))
@@ -429,7 +410,7 @@ public:
     return false;
   }
 
-  /// hasRegUnit - Returns true if Reg contains RegUnit.
+  /// Returns true if Reg contains RegUnit.
   bool hasRegUnit(unsigned Reg, unsigned RegUnit) const {
     for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
       if (*Units == RegUnit)
@@ -437,18 +418,23 @@ public:
     return false;
   }
 
-  /// getCalleeSavedRegs - Return a null-terminated list of all of the
-  /// callee saved registers on this target. The register should be in the
-  /// order of desired callee-save stack frame offset. The first register is
-  /// closest to the incoming stack pointer if stack grows down, and vice versa.
+  /// Return a null-terminated list of all of the callee-saved registers on
+  /// this target. The register should be in the order of desired callee-save
+  /// stack frame offset. The first register is closest to the incoming stack
+  /// pointer if stack grows down, and vice versa.
   ///
   virtual const MCPhysReg*
   getCalleeSavedRegs(const MachineFunction *MF) const = 0;
 
-  /// getCallPreservedMask - Return a mask of call-preserved registers for the
-  /// given calling convention on the current function.  The mask should
-  /// include all call-preserved aliases.  This is used by the register
-  /// allocator to determine which registers can be live across a call.
+  virtual const MCPhysReg*
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
+    return nullptr;
+  }
+
+  /// Return a mask of call-preserved registers for the given calling convention
+  /// on the current function. The mask should include all call-preserved
+  /// aliases. This is used by the register allocator to determine which
+  /// registers can be live across a call.
   ///
   /// The mask is an array containing (TRI::getNumRegs()+31)/32 entries.
   /// A set bit indicates that all bits of the corresponding register are
@@ -469,13 +455,18 @@ public:
     return nullptr;
   }
 
+  /// Return a register mask that clobbers everything.
+  virtual const uint32_t *getNoPreservedMask() const {
+    llvm_unreachable("target does not provide no presered mask");
+  }
+
   /// Return all the call-preserved register masks defined for this target.
   virtual ArrayRef<const uint32_t *> getRegMasks() const = 0;
   virtual ArrayRef<const char *> getRegMaskNames() const = 0;
 
-  /// getReservedRegs - Returns a bitset indexed by physical register number
-  /// indicating if a register is a special register that has particular uses
-  /// and should be considered unavailable at all times, e.g. SP, RA. This is
+  /// Returns a bitset indexed by physical register number indicating if a
+  /// register is a special register that has particular uses and should be
+  /// considered unavailable at all times, e.g. SP, RA. This is
   /// used by register scavenger to determine what registers are free.
   virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
 
@@ -484,14 +475,14 @@ public:
   /// remove pseudo-registers that should be ignored).
   virtual void adjustStackMapLiveOutMask(uint32_t *Mask) const { }
 
-  /// getMatchingSuperReg - Return a super-register of the specified register
+  /// Return a super-register of the specified register
   /// Reg so its sub-register of index SubIdx is Reg.
   unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
                                const TargetRegisterClass *RC) const {
     return MCRegisterInfo::getMatchingSuperReg(Reg, SubIdx, RC->MC);
   }
 
-  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// Return a subclass of the specified register
   /// class A so that each register in it has a sub-register of the
   /// specified sub-register index which is in the specified register class B.
   ///
@@ -500,7 +491,16 @@ public:
   getMatchingSuperRegClass(const TargetRegisterClass *A,
                            const TargetRegisterClass *B, unsigned Idx) const;
 
-  /// getSubClassWithSubReg - Returns the largest legal sub-class of RC that
+  // For a copy-like instruction that defines a register of class DefRC with
+  // subreg index DefSubReg, reading from another source with class SrcRC and
+  // subregister SrcSubReg return true if this is a preferrable copy
+  // instruction or an earlier use should be used.
+  virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                                    unsigned DefSubReg,
+                                    const TargetRegisterClass *SrcRC,
+                                    unsigned SrcSubReg) const;
+
+  /// Returns the largest legal sub-class of RC that
   /// supports the sub-register index Idx.
   /// If no such sub-class exists, return NULL.
   /// If all registers in RC already have an Idx sub-register, return RC.
@@ -518,7 +518,7 @@ public:
     return RC;
   }
 
-  /// composeSubRegIndices - Return the subregister index you get from composing
+  /// Return the subregister index you get from composing
   /// two subregister indices.
   ///
   /// The special null sub-register index composes as the identity.
@@ -541,10 +541,11 @@ public:
   /// Transforms a LaneMask computed for one subregister to the lanemask that
   /// would have been computed when composing the subsubregisters with IdxA
   /// first. @sa composeSubRegIndices()
-  unsigned composeSubRegIndexLaneMask(unsigned IdxA, unsigned LaneMask) const {
+  LaneBitmask composeSubRegIndexLaneMask(unsigned IdxA,
+                                         LaneBitmask Mask) const {
     if (!IdxA)
-      return LaneMask;
-    return composeSubRegIndexLaneMaskImpl(IdxA, LaneMask);
+      return Mask;
+    return composeSubRegIndexLaneMaskImpl(IdxA, Mask);
   }
 
   /// Debugging helper: dump register in human readable form to dbgs() stream.
@@ -558,13 +559,13 @@ protected:
   }
 
   /// Overridden by TableGen in targets that have sub-registers.
-  virtual unsigned
-  composeSubRegIndexLaneMaskImpl(unsigned, unsigned) const {
+  virtual LaneBitmask
+  composeSubRegIndexLaneMaskImpl(unsigned, LaneBitmask) const {
     llvm_unreachable("Target has no sub-registers");
   }
 
 public:
-  /// getCommonSuperRegClass - Find a common super-register class if it exists.
+  /// Find a common super-register class if it exists.
   ///
   /// Find a register class, SuperRC and two sub-register indices, PreA and
   /// PreB, such that:
@@ -605,44 +606,47 @@ public:
     return (unsigned)(regclass_end()-regclass_begin());
   }
 
-  /// getRegClass - Returns the register class associated with the enumeration
-  /// value.  See class MCOperandInfo.
+  /// Returns the register class associated with the enumeration value.
+  /// See class MCOperandInfo.
   const TargetRegisterClass *getRegClass(unsigned i) const {
     assert(i < getNumRegClasses() && "Register Class ID out of range");
     return RegClassBegin[i];
   }
 
-  /// getRegClassName - Returns the name of the register class.
+  /// Returns the name of the register class.
   const char *getRegClassName(const TargetRegisterClass *Class) const {
     return MCRegisterInfo::getRegClassName(Class->MC);
   }
 
-  /// getCommonSubClass - find the largest common subclass of A and B. Return
-  /// NULL if there is no common subclass.
+  /// Find the largest common subclass of A and B.
+  /// Return NULL if there is no common subclass.
+  /// The common subclass should contain
+  /// simple value type SVT if it is not the Any type.
   const TargetRegisterClass *
   getCommonSubClass(const TargetRegisterClass *A,
-                    const TargetRegisterClass *B) const;
+                    const TargetRegisterClass *B,
+                    const MVT::SimpleValueType SVT =
+                    MVT::SimpleValueType::Any) const;
 
-  /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
-  /// values.  If a target supports multiple different pointer register classes,
+  /// Returns a TargetRegisterClass used for pointer values.
+  /// If a target supports multiple different pointer register classes,
   /// kind specifies which one is indicated.
   virtual const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const {
     llvm_unreachable("Target didn't implement getPointerRegClass!");
   }
 
-  /// getCrossCopyRegClass - Returns a legal register class to copy a register
-  /// in the specified class to or from. If it is possible to copy the register
-  /// directly without using a cross register class copy, return the specified
-  /// RC. Returns NULL if it is not possible to copy between a two registers of
-  /// the specified class.
+  /// Returns a legal register class to copy a register in the specified class
+  /// to or from. If it is possible to copy the register directly without using
+  /// a cross register class copy, return the specified RC. Returns NULL if it
+  /// is not possible to copy between two registers of the specified class.
   virtual const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const {
     return RC;
   }
 
-  /// getLargestLegalSuperClass - Returns the largest super class of RC that is
-  /// legal to use in the current sub-target and has the same spill size.
+  /// Returns the largest super class of RC that is legal to use in the current
+  /// sub-target and has the same spill size.
   /// The returned register class can be used to create virtual registers which
   /// means that all its registers can be copied and spilled.
   virtual const TargetRegisterClass *
@@ -653,9 +657,9 @@ public:
     return RC;
   }
 
-  /// getRegPressureLimit - Return the register pressure "high water mark" for
-  /// the specific register class. The scheduler is in high register pressure
-  /// mode (for the specific register class) if it goes over the limit.
+  /// Return the register pressure "high water mark" for the specific register
+  /// class. The scheduler is in high register pressure mode (for the specific
+  /// register class) if it goes over the limit.
   ///
   /// Note: this is the old register pressure model that relies on a manually
   /// specified representative register class per value type.
@@ -664,6 +668,15 @@ public:
     return 0;
   }
 
+  /// Return a heuristic for the machine scheduler to compare the profitability
+  /// of increasing one register pressure set versus another.  The scheduler
+  /// will prefer increasing the register pressure of the set which returns
+  /// the largest value for this function.
+  virtual unsigned getRegPressureSetScore(const MachineFunction &MF,
+                                          unsigned PSetID) const {
+    return PSetID;
+  }
+
   /// Get the weight in units of pressure for this register class.
   virtual const RegClassWeight &getRegClassWeight(
     const TargetRegisterClass *RC) const = 0;
@@ -709,14 +722,15 @@ public:
                                      ArrayRef<MCPhysReg> Order,
                                      SmallVectorImpl<MCPhysReg> &Hints,
                                      const MachineFunction &MF,
-                                     const VirtRegMap *VRM = nullptr) const;
+                                     const VirtRegMap *VRM = nullptr,
+                                     const LiveRegMatrix *Matrix = nullptr)
+    const;
 
-  /// updateRegAllocHint - A callback to allow target a chance to update
-  /// register allocation hints when a register is "changed" (e.g. coalesced)
-  /// to another register. e.g. On ARM, some virtual registers should target
-  /// register pairs, if one of pair is coalesced to another register, the
-  /// allocation hint of the other half of the pair should be changed to point
-  /// to the new register.
+  /// A callback to allow target a chance to update register allocation hints
+  /// when a register is "changed" (e.g. coalesced) to another register.
+  /// e.g. On ARM, some virtual registers should target register pairs,
+  /// if one of pair is coalesced to another register, the allocation hint of
+  /// the other half of the pair should be changed to point to the new register.
   virtual void updateRegAllocHint(unsigned Reg, unsigned NewReg,
                                   MachineFunction &MF) const {
     // Do nothing.
@@ -738,73 +752,72 @@ public:
   /// register if it is available.
   virtual unsigned getCSRFirstUseCost() const { return 0; }
 
-  /// requiresRegisterScavenging - returns true if the target requires (and can
-  /// make use of) the register scavenger.
+  /// Returns true if the target requires (and can make use of) the register
+  /// scavenger.
   virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
     return false;
   }
 
-  /// useFPForScavengingIndex - returns true if the target wants to use
-  /// frame pointer based accesses to spill to the scavenger emergency spill
-  /// slot.
+  /// Returns true if the target wants to use frame pointer based accesses to
+  /// spill to the scavenger emergency spill slot.
   virtual bool useFPForScavengingIndex(const MachineFunction &MF) const {
     return true;
   }
 
-  /// requiresFrameIndexScavenging - returns true if the target requires post
-  /// PEI scavenging of registers for materializing frame index constants.
+  /// Returns true if the target requires post PEI scavenging of registers for
+  /// materializing frame index constants.
   virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
     return false;
   }
 
-  /// requiresVirtualBaseRegisters - Returns true if the target wants the
-  /// LocalStackAllocation pass to be run and virtual base registers
-  /// used for more efficient stack access.
+  /// Returns true if the target wants the LocalStackAllocation pass to be run
+  /// and virtual base registers used for more efficient stack access.
   virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
     return false;
   }
 
-  /// hasReservedSpillSlot - Return true if target has reserved a spill slot in
-  /// the stack frame of the given function for the specified register. e.g. On
-  /// x86, if the frame register is required, the first fixed stack object is
-  /// reserved as its spill slot. This tells PEI not to create a new stack frame
+  /// Return true if target has reserved a spill slot in the stack frame of
+  /// the given function for the specified register. e.g. On x86, if the frame
+  /// register is required, the first fixed stack object is reserved as its
+  /// spill slot. This tells PEI not to create a new stack frame
   /// object for the given register. It should be called only after
-  /// processFunctionBeforeCalleeSavedScan().
+  /// determineCalleeSaves().
   virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
                                     int &FrameIdx) const {
     return false;
   }
 
-  /// trackLivenessAfterRegAlloc - returns true if the live-ins should be tracked
-  /// after register allocation.
+  /// Returns true if the live-ins should be tracked after register allocation.
   virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
     return false;
   }
 
-  /// needsStackRealignment - true if storage within the function requires the
-  /// stack pointer to be aligned more than the normal calling convention calls
-  /// for.
-  virtual bool needsStackRealignment(const MachineFunction &MF) const {
-    return false;
-  }
+  /// True if the stack can be realigned for the target.
+  virtual bool canRealignStack(const MachineFunction &MF) const;
 
-  /// getFrameIndexInstrOffset - Get the offset from the referenced frame
-  /// index in the instruction, if there is one.
+  /// True if storage within the function requires the stack pointer to be
+  /// aligned more than the normal calling convention calls for.
+  /// This cannot be overriden by the target, but canRealignStack can be
+  /// overridden.
+  bool needsStackRealignment(const MachineFunction &MF) const;
+
+  /// Get the offset from the referenced frame index in the instruction,
+  /// if there is one.
   virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
                                            int Idx) const {
     return 0;
   }
 
-  /// needsFrameBaseReg - Returns true if the instruction's frame index
-  /// reference would be better served by a base register other than FP
-  /// or SP. Used by LocalStackFrameAllocation to determine which frame index
+  /// Returns true if the instruction's frame index reference would be better
+  /// served by a base register other than FP or SP.
+  /// Used by LocalStackFrameAllocation to determine which frame index
   /// references it should create new base registers for.
   virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
     return false;
   }
 
-  /// materializeFrameBaseRegister - Insert defining instruction(s) for
-  /// BaseReg to be a pointer to FrameIdx before insertion point I.
+  /// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
+  /// before insertion point I.
   virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
                                             unsigned BaseReg, int FrameIdx,
                                             int64_t Offset) const {
@@ -812,24 +825,23 @@ public:
                      "target");
   }
 
-  /// resolveFrameIndex - Resolve a frame index operand of an instruction
+  /// Resolve a frame index operand of an instruction
   /// to reference the indicated base register plus offset instead.
   virtual void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
                                  int64_t Offset) const {
     llvm_unreachable("resolveFrameIndex does not exist on this target");
   }
 
-  /// isFrameOffsetLegal - Determine whether a given base register plus offset
-  /// immediate is encodable to resolve a frame index.
+  /// Determine whether a given base register plus offset immediate is
+  /// encodable to resolve a frame index.
   virtual bool isFrameOffsetLegal(const MachineInstr *MI, unsigned BaseReg,
                                   int64_t Offset) const {
     llvm_unreachable("isFrameOffsetLegal does not exist on this target");
   }
 
-
-  /// saveScavengerRegister - Spill the register so it can be used by the
-  /// register scavenger. Return true if the register was spilled, false
-  /// otherwise. If this function does not spill the register, the scavenger
+  /// Spill the register so it can be used by the register scavenger.
+  /// Return true if the register was spilled, false otherwise.
+  /// If this function does not spill the register, the scavenger
   /// will instead spill it to the emergency spill slot.
   ///
   virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
@@ -840,13 +852,13 @@ public:
     return false;
   }
 
-  /// eliminateFrameIndex - This method must be overriden to eliminate abstract
-  /// frame indices from instructions which may use them.  The instruction
-  /// referenced by the iterator contains an MO_FrameIndex operand which must be
-  /// eliminated by this method.  This method may modify or replace the
-  /// specified instruction, as long as it keeps the iterator pointing at the
-  /// finished product.  SPAdj is the SP adjustment due to call frame setup
-  /// instruction.  FIOperandNum is the FI operand number.
+  /// This method must be overriden to eliminate abstract frame indices from
+  /// instructions which may use them. The instruction referenced by the
+  /// iterator contains an MO_FrameIndex operand which must be eliminated by
+  /// this method. This method may modify or replace the specified instruction,
+  /// as long as it keeps the iterator pointing at the finished product.
+  /// SPAdj is the SP adjustment due to call frame setup instruction.
+  /// FIOperandNum is the FI operand number.
   virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
                                    int SPAdj, unsigned FIOperandNum,
                                    RegScavenger *RS = nullptr) const = 0;
@@ -935,7 +947,6 @@ struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
   }
 };
 
-/// PrintReg - Helper class for printing registers on a raw_ostream.
 /// Prints virtual and physical registers with or without a TRI instance.
 ///
 /// The format is:
@@ -946,24 +957,10 @@ struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
 ///   %physreg17      - a physical register when no TRI instance given.
 ///
 /// Usage: OS << PrintReg(Reg, TRI) << '\n';
-///
-class PrintReg {
-  const TargetRegisterInfo *TRI;
-  unsigned Reg;
-  unsigned SubIdx;
-public:
-  explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr,
-                    unsigned subidx = 0)
-    : TRI(tri), Reg(reg), SubIdx(subidx) {}
-  void print(raw_ostream&) const;
-};
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr,
+                   unsigned SubRegIdx = 0);
 
-static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
-  PR.print(OS);
-  return OS;
-}
-
-/// PrintRegUnit - Helper class for printing register units on a raw_ostream.
+/// Create Printable object to print register units on a \ref raw_ostream.
 ///
 /// Register units are named after their root registers:
 ///
@@ -971,36 +968,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
 ///   FP0~ST7 - Dual roots.
 ///
 /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n';
-///
-class PrintRegUnit {
-protected:
-  const TargetRegisterInfo *TRI;
-  unsigned Unit;
-public:
-  PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri)
-    : TRI(tri), Unit(unit) {}
-  void print(raw_ostream&) const;
-};
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
 
-static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) {
-  PR.print(OS);
-  return OS;
-}
+/// \brief Create Printable object to print virtual registers and physical
+/// registers on a \ref raw_ostream.
+Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
 
-/// PrintVRegOrUnit - It is often convenient to track virtual registers and
-/// physical register units in the same list.
-class PrintVRegOrUnit : protected PrintRegUnit {
-public:
-  PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri)
-    : PrintRegUnit(VRegOrUnit, tri) {}
-  void print(raw_ostream&) const;
-};
-
-static inline raw_ostream &operator<<(raw_ostream &OS,
-                                      const PrintVRegOrUnit &PR) {
-  PR.print(OS);
-  return OS;
-}
+/// Create Printable object to print LaneBitmasks on a \ref raw_ostream.
+Printable PrintLaneMask(LaneBitmask LaneMask);
 
 } // End llvm namespace
 
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 4abbe3793995..565473658404 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -80,6 +80,11 @@ class SDTCisSameNumEltsAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
   int OtherOperandNum = OtherOp;
 }
 
+// SDTCisSameSizeAs - The two specified operands have identical size.
+class SDTCisSameSizeAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+  int OtherOperandNum = OtherOp;
+}
+
 //===----------------------------------------------------------------------===//
 // Selection DAG Type Profile definitions.
 //
@@ -186,6 +191,10 @@ def SDTBrind : SDTypeProfile<0, 1, [        // brind
   SDTCisPtrTy<0>
 ]>;
 
+def SDTCatchret : SDTypeProfile<0, 2, [     // catchret
+  SDTCisVT<0, OtherVT>, SDTCisVT<1, OtherVT>
+]>;
+
 def SDTNone : SDTypeProfile<0, 0, []>;      // ret, trap
 
 def SDTLoad : SDTypeProfile<1, 1, [         // load
@@ -201,11 +210,12 @@ def SDTIStore : SDTypeProfile<1, 3, [       // indexed store
 ]>;
 
 def SDTMaskedStore: SDTypeProfile<0, 3, [       // masked store
-  SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>
+  SDTCisPtrTy<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameNumEltsAs<1, 2>
 ]>;
 
 def SDTMaskedLoad: SDTypeProfile<1, 3, [       // masked load
-  SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>
+  SDTCisVec<0>, SDTCisPtrTy<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
+  SDTCisSameNumEltsAs<0, 2>
 ]>;
 
 def SDTMaskedGather: SDTypeProfile<2, 3, [       // masked gather
@@ -387,6 +397,7 @@ def umin       : SDNode<"ISD::UMIN"      , SDTIntBinOp>;
 def umax       : SDNode<"ISD::UMAX"      , SDTIntBinOp>;
 
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
+def bitreverse : SDNode<"ISD::BITREVERSE" , SDTIntUnaryOp>;
 def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
 def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
 def cttz       : SDNode<"ISD::CTTZ"       , SDTIntUnaryOp>;
@@ -412,6 +423,8 @@ def fmad       : SDNode<"ISD::FMAD"       , SDTFPTernaryOp>;
 def fabs       : SDNode<"ISD::FABS"       , SDTFPUnaryOp>;
 def fminnum    : SDNode<"ISD::FMINNUM"    , SDTFPBinOp>;
 def fmaxnum    : SDNode<"ISD::FMAXNUM"    , SDTFPBinOp>;
+def fminnan    : SDNode<"ISD::FMINNAN"    , SDTFPBinOp>;
+def fmaxnan    : SDNode<"ISD::FMAXNAN"    , SDTFPBinOp>;
 def fgetsign   : SDNode<"ISD::FGETSIGN"   , SDTFPToIntOp>;
 def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
 def fsqrt      : SDNode<"ISD::FSQRT"      , SDTFPUnaryOp>;
@@ -447,6 +460,12 @@ def brcc       : SDNode<"ISD::BR_CC"      , SDTBrCC,   [SDNPHasChain]>;
 def brcond     : SDNode<"ISD::BRCOND"     , SDTBrcond, [SDNPHasChain]>;
 def brind      : SDNode<"ISD::BRIND"      , SDTBrind,  [SDNPHasChain]>;
 def br         : SDNode<"ISD::BR"         , SDTBr,     [SDNPHasChain]>;
+def catchret   : SDNode<"ISD::CATCHRET"   , SDTCatchret,
+                        [SDNPHasChain, SDNPSideEffect]>;
+def cleanupret : SDNode<"ISD::CLEANUPRET" , SDTNone,   [SDNPHasChain]>;
+def catchpad   : SDNode<"ISD::CATCHPAD"   , SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
 def trap       : SDNode<"ISD::TRAP"       , SDTNone,
                         [SDNPHasChain, SDNPSideEffect]>;
 def debugtrap  : SDNode<"ISD::DEBUGTRAP"  , SDTNone,
@@ -513,6 +532,9 @@ def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
 def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
                               []>;
+
+// vector_extract/vector_insert are deprecated. extractelt/insertelt
+// are preferred.
 def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
     SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
 def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
@@ -523,7 +545,7 @@ def concat_vectors : SDNode<"ISD::CONCAT_VECTORS",
 // This operator does not do subvector type checking.  The ARM
 // backend, at least, needs it.
 def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
-    SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, 
+    SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>,
     []>;
 
 // This operator does subvector type checking.
@@ -815,6 +837,21 @@ def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
   return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
 }]>;
 
+def truncstorevi8 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstorevi16 : PatFrag<(ops node:$val, node:$ptr),
+                             (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstorevi32 : PatFrag<(ops node:$val, node:$ptr),
+                             (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
 // indexed store fragments.
 def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
                      (ist node:$val, node:$base, node:$offset), [{
@@ -889,6 +926,24 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
   return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
 }]>;
 
+// nontemporal store fragments.
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                               (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->isNonTemporal();
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                                      (nontemporalstore node:$val, node:$ptr), [{
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  return St->getAlignment() >= St->getMemoryVT().getStoreSize();
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                                        (nontemporalstore node:$val, node:$ptr), [{
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  return St->getAlignment() < St->getMemoryVT().getStoreSize();
+}]>;
+
 // setcc convenience fragments.
 def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
                      (setcc node:$lhs, node:$rhs, SETOEQ)>;
diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h
index 53db5aa84292..a7143ac3fa66 100644
--- a/include/llvm/Target/TargetSelectionDAGInfo.h
+++ b/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -21,7 +21,7 @@
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
-/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the
+/// Targets can subclass this to parameterize the
 /// SelectionDAG lowering and instruction selection process.
 ///
 class TargetSelectionDAGInfo {
@@ -32,8 +32,8 @@ public:
   explicit TargetSelectionDAGInfo() = default;
   virtual ~TargetSelectionDAGInfo();
 
-  /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
-  /// memcpy. This can be used by targets to provide code sequences for cases
+  /// Emit target-specific code that performs a memcpy.
+  /// This can be used by targets to provide code sequences for cases
   /// that don't fit the target's parameters for simple loads/stores and can be
   /// more efficient than using a library call. This function can return a null
   /// SDValue if the target declines to use custom code and a different
@@ -56,8 +56,8 @@ public:
     return SDValue();
   }
 
-  /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
-  /// memmove. This can be used by targets to provide code sequences for cases
+  /// Emit target-specific code that performs a memmove.
+  /// This can be used by targets to provide code sequences for cases
   /// that don't fit the target's parameters for simple loads/stores and can be
   /// more efficient than using a library call. This function can return a null
   /// SDValue if the target declines to use custom code and a different
@@ -72,8 +72,8 @@ public:
     return SDValue();
   }
 
-  /// EmitTargetCodeForMemset - Emit target-specific code that performs a
-  /// memset. This can be used by targets to provide code sequences for cases
+  /// Emit target-specific code that performs a memset.
+  /// This can be used by targets to provide code sequences for cases
   /// that don't fit the target's parameters for simple stores and can be more
   /// efficient than using a library call. This function can return a null
   /// SDValue if the target declines to use custom code and a different
@@ -87,11 +87,10 @@ public:
     return SDValue();
   }
 
-  /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a
-  /// memcmp, in cases where that is faster than a libcall.  The first
-  /// returned SDValue is the result of the memcmp and the second is
-  /// the chain.  Both SDValues can be null if a normal libcall should
-  /// be used.
+  /// Emit target-specific code that performs a memcmp, in cases where that is
+  /// faster than a libcall. The first returned SDValue is the result of the
+  /// memcmp and the second is the chain. Both SDValues can be null if a normal
+  /// libcall should be used.
   virtual std::pair<SDValue, SDValue>
   EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl,
                           SDValue Chain,
@@ -101,11 +100,10 @@ public:
     return std::make_pair(SDValue(), SDValue());
   }
 
-  /// EmitTargetCodeForMemchr - Emit target-specific code that performs a
-  /// memchr, in cases where that is faster than a libcall.  The first
-  /// returned SDValue is the result of the memchr and the second is
-  /// the chain.  Both SDValues can be null if a normal libcall should
-  /// be used.
+  /// Emit target-specific code that performs a memchr, in cases where that is
+  /// faster than a libcall. The first returned SDValue is the result of the
+  /// memchr and the second is the chain. Both SDValues can be null if a normal
+  /// libcall should be used.
   virtual std::pair<SDValue, SDValue>
   EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
                           SDValue Src, SDValue Char, SDValue Length,
@@ -113,8 +111,8 @@ public:
     return std::make_pair(SDValue(), SDValue());
   }
 
-  /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a
-  /// strcpy or stpcpy, in cases where that is faster than a libcall.
+  /// Emit target-specific code that performs a strcpy or stpcpy, in cases
+  /// where that is faster than a libcall.
   /// The first returned SDValue is the result of the copy (the start
   /// of the destination string for strcpy, a pointer to the null terminator
   /// for stpcpy) and the second is the chain.  Both SDValues can be null
@@ -128,11 +126,10 @@ public:
     return std::make_pair(SDValue(), SDValue());
   }
 
-  /// EmitTargetCodeForStrcmp - Emit target-specific code that performs a
-  /// strcmp, in cases where that is faster than a libcall.  The first
-  /// returned SDValue is the result of the strcmp and the second is
-  /// the chain.  Both SDValues can be null if a normal libcall should
-  /// be used.
+  /// Emit target-specific code that performs a strcmp, in cases where that is
+  /// faster than a libcall.
+  /// The first returned SDValue is the result of the strcmp and the second is
+  /// the chain. Both SDValues can be null if a normal libcall should be used.
   virtual std::pair<SDValue, SDValue>
   EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl,
                           SDValue Chain,
diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h
index 07c0c66bfa18..d50aa4932f8f 100644
--- a/include/llvm/Target/TargetSubtargetInfo.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_TARGET_TARGETSUBTARGETINFO_H
 
 #include "llvm/CodeGen/PBQPRAConstraint.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/CodeGen.h"
 
@@ -81,6 +82,11 @@ public:
   virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
     return nullptr;
   }
+  /// Target can subclass this hook to select a different DAG scheduler.
+  virtual RegisterScheduler::FunctionPassCtor
+      getDAGScheduler(CodeGenOpt::Level) const {
+    return nullptr;
+  }
 
   /// getRegisterInfo - If register information is available, return it.  If
   /// not, return null.  This is kept separate from RegInfo until RegInfo has
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 2ea47301bb4c..0c374a070ce8 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -16,9 +16,11 @@
 #define LLVM_TRANSFORMS_IPO_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
 
+class FunctionInfoIndex;
 class ModulePass;
 class Pass;
 class Function;
@@ -84,6 +86,10 @@ ModulePass *createEliminateAvailableExternallyPass();
 ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
                                    deleteFn = false);
 
+//===----------------------------------------------------------------------===//
+/// This pass performs iterative function importing from other modules.
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr);
+
 //===----------------------------------------------------------------------===//
 /// createFunctionInliningPass - Return a new pass object that uses a heuristic
 /// to inline direct function calls to small functions.
@@ -209,6 +215,15 @@ ModulePass *createBarrierNoopPass();
 /// to bitsets.
 ModulePass *createLowerBitSetsPass();
 
+/// \brief This pass export CFI checks for use by external modules.
+ModulePass *createCrossDSOCFIPass();
+
+//===----------------------------------------------------------------------===//
+// SampleProfilePass - Loads sample profile data from disk and generates
+// IR metadata to reflect the profile.
+ModulePass *createSampleProfileLoaderPass();
+ModulePass *createSampleProfileLoaderPass(StringRef Name);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
new file mode 100644
index 000000000000..0ff4afe79b0c
--- /dev/null
+++ b/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -0,0 +1,35 @@
+//===-- ForceFunctionAttrs.h - Force function attrs for debugging ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Super simple passes to force specific function attrs from the commandline
+/// into the IR for debugging purposes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
+#define LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Pass which forces specific function attributes into the IR, primarily as
+/// a debugging tool.
+class ForceFunctionAttrsPass {
+public:
+  static StringRef name() { return "ForceFunctionAttrsPass"; }
+  PreservedAnalyses run(Module &M);
+};
+
+/// Create a legacy pass manager instance of a pass to force function attrs.
+Pass *createForceFunctionAttrsLegacyPass();
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h
new file mode 100644
index 000000000000..d7707790a017
--- /dev/null
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@@ -0,0 +1,43 @@
+//===- llvm/Transforms/IPO/FunctionImport.h - ThinLTO importing -*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUNCTIONIMPORT_H
+#define LLVM_FUNCTIONIMPORT_H
+
+#include "llvm/ADT/StringMap.h"
+#include <functional>
+
+namespace llvm {
+class LLVMContext;
+class Module;
+class FunctionInfoIndex;
+
+/// The function importer is automatically importing function from other modules
+/// based on the provided summary informations.
+class FunctionImporter {
+
+  /// The summaries index used to trigger importing.
+  const FunctionInfoIndex &Index;
+
+  /// Factory function to load a Module for a given identifier
+  std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader;
+
+public:
+  /// Create a Function Importer.
+  FunctionImporter(
+      const FunctionInfoIndex &Index,
+      std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader)
+      : Index(Index), ModuleLoader(ModuleLoader) {}
+
+  /// Import functions in Module \p M based on the summary informations.
+  bool importFunctions(Module &M);
+};
+}
+
+#endif // LLVM_FUNCTIONIMPORT_H
diff --git a/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
new file mode 100644
index 000000000000..80afc02c62ae
--- /dev/null
+++ b/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -0,0 +1,38 @@
+//===-- InferFunctionAttrs.h - Infer implicit function attributes ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Interfaces for passes which infer implicit function attributes from the
+/// name and signature of function declarations.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
+#define LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A pass which infers function attributes from the names and signatures of
+/// function declarations in a module.
+class InferFunctionAttrsPass {
+public:
+  static StringRef name() { return "InferFunctionAttrsPass"; }
+  PreservedAnalyses run(Module &M, AnalysisManager<Module> *AM);
+};
+
+/// Create a legacy pass manager instance of a pass to infer function
+/// attributes.
+Pass *createInferFunctionAttrsLegacyPass();
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
diff --git a/include/llvm/Transforms/IPO/InlinerPass.h b/include/llvm/Transforms/IPO/InlinerPass.h
index 6a644ad4a63b..58ef0cbbfb5d 100644
--- a/include/llvm/Transforms/IPO/InlinerPass.h
+++ b/include/llvm/Transforms/IPO/InlinerPass.h
@@ -20,11 +20,11 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 
 namespace llvm {
-  class CallSite;
-  class DataLayout;
-  class InlineCost;
-  template<class PtrType, unsigned SmallSize>
-  class SmallPtrSet;
+class AssumptionCacheTracker;
+class CallSite;
+class DataLayout;
+class InlineCost;
+template <class PtrType, unsigned SmallSize> class SmallPtrSet;
 
 /// Inliner - This class contains all of the helper code which is used to
 /// perform the inlining operations that do not depend on the policy.
@@ -84,6 +84,9 @@ private:
   /// shouldInline - Return true if the inliner should attempt to
   /// inline at the given CallSite.
   bool shouldInline(CallSite CS);
+
+protected:
+  AssumptionCacheTracker *ACT;
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Transforms/IPO/LowerBitSets.h b/include/llvm/Transforms/IPO/LowerBitSets.h
index 55d7d84560a0..e5fb7b98fcb3 100644
--- a/include/llvm/Transforms/IPO/LowerBitSets.h
+++ b/include/llvm/Transforms/IPO/LowerBitSets.h
@@ -26,8 +26,9 @@
 namespace llvm {
 
 class DataLayout;
-class GlobalVariable;
+class GlobalObject;
 class Value;
+class raw_ostream;
 
 struct BitSetInfo {
   // The indices of the set bits in the bitset.
@@ -55,8 +56,10 @@ struct BitSetInfo {
   bool containsGlobalOffset(uint64_t Offset) const;
 
   bool containsValue(const DataLayout &DL,
-                     const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout,
+                     const DenseMap<GlobalObject *, uint64_t> &GlobalLayout,
                      Value *V, uint64_t COffset = 0) const;
+
+  void print(raw_ostream &OS) const;
 };
 
 struct BitSetBuilder {
diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 1334dd0da23c..a4e7bce8ef4a 100644
--- a/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -15,9 +15,11 @@
 #ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
 #define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
 
+#include <memory>
 #include <vector>
 
 namespace llvm {
+class FunctionInfoIndex;
 class Pass;
 class TargetLibraryInfoImpl;
 class TargetMachine;
@@ -81,6 +83,11 @@ public:
     /// run after everything else.
     EP_OptimizerLast,
 
+    /// EP_VectorizerStart - This extension point allows adding optimization
+    /// passes before the vectorizer and other highly target specific
+    /// optimization passes are executed.
+    EP_VectorizerStart,
+
     /// EP_EnabledOnOptLevel0 - This extension point allows adding passes that
     /// should not be disabled by O0 optimization level. The passes will be
     /// inserted after the inlining pass.
@@ -109,6 +116,9 @@ public:
   /// added to the per-module passes.
   Pass *Inliner;
 
+  /// The function summary index to use for function importing.
+  const FunctionInfoIndex *FunctionIndex;
+
   bool DisableTailCalls;
   bool DisableUnitAtATime;
   bool DisableUnrollLoops;
diff --git a/include/llvm/Transforms/IPO/StripDeadPrototypes.h b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
new file mode 100644
index 000000000000..9dddd12871c4
--- /dev/null
+++ b/include/llvm/Transforms/IPO/StripDeadPrototypes.h
@@ -0,0 +1,34 @@
+//===-- StripDeadPrototypes.h - Remove unused function declarations -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
+#define LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Pass to remove unused function declarations.
+class StripDeadPrototypesPass {
+public:
+  static StringRef name() { return "StripDeadPrototypesPass"; }
+  PreservedAnalyses run(Module &M);
+};
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_STRIPDEADPROTOTYPES_H
diff --git a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index a6bad343db43..5d2b2d000009 100644
--- a/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -60,13 +60,13 @@ public:
   /// AddInitialGroup - Add the specified batch of stuff in reverse order.
   /// which should only be done when the worklist is empty and when the group
   /// has no duplicates.
-  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+  void AddInitialGroup(ArrayRef<Instruction *> List) {
     assert(Worklist.empty() && "Worklist must be empty to add initial group");
-    Worklist.reserve(NumEntries+16);
-    WorklistMap.resize(NumEntries);
-    DEBUG(dbgs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
-    for (unsigned Idx = 0; NumEntries; --NumEntries) {
-      Instruction *I = List[NumEntries-1];
+    Worklist.reserve(List.size()+16);
+    WorklistMap.resize(List.size());
+    DEBUG(dbgs() << "IC: ADDING: " << List.size() << " instrs to worklist\n");
+    unsigned Idx = 0;
+    for (Instruction *I : reverse(List)) {
       WorklistMap.insert(std::make_pair(I, Idx++));
       Worklist.push_back(I);
     }
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 250e3893cb15..38dfeb04ace3 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_INSTRUMENTATION_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
 #include <vector>
 
 #if defined(__GNUC__) && defined(__linux__) && !defined(ANDROID)
@@ -33,6 +34,16 @@ inline void *getDFSanRetValTLSPtrForJIT() {
 
 namespace llvm {
 
+class TargetMachine;
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator PrepareToSplitEntryBlock(BasicBlock &BB,
+                                              BasicBlock::iterator IP);
+
 class ModulePass;
 class FunctionPass;
 
@@ -68,6 +79,11 @@ struct GCOVOptions {
 ModulePass *createGCOVProfilerPass(const GCOVOptions &Options =
                                    GCOVOptions::getDefault());
 
+// PGO Instrumention
+ModulePass *createPGOInstrumentationGenPass();
+ModulePass *
+createPGOInstrumentationUsePass(StringRef Filename = StringRef(""));
+
 /// Options for the frontend instrumentation based profiling pass.
 struct InstrProfOptions {
   InstrProfOptions() : NoRedZone(false) {}
@@ -84,8 +100,10 @@ ModulePass *createInstrProfilingPass(
     const InstrProfOptions &Options = InstrProfOptions());
 
 // Insert AddressSanitizer (address sanity checking) instrumentation
-FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false);
-ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false);
+FunctionPass *createAddressSanitizerFunctionPass(bool CompileKernel = false,
+                                                 bool Recover = false);
+ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
+                                             bool Recover = false);
 
 // Insert MemorySanitizer instrumentation (detection of uninitialized reads)
 FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0);
@@ -134,7 +152,25 @@ FunctionPass *createBoundsCheckingPass();
 
 /// \brief This pass splits the stack into a safe stack and an unsafe stack to
 /// protect against stack-based overflow vulnerabilities.
-FunctionPass *createSafeStackPass();
+FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr);
+
+/// \brief Calculate what to divide by to scale counts.
+///
+/// Given the maximum count, calculate a divisor that will scale all the
+/// weights to strictly less than UINT32_MAX.
+static inline uint64_t calculateCountScale(uint64_t MaxCount) {
+  return MaxCount < UINT32_MAX ? 1 : MaxCount / UINT32_MAX + 1;
+}
+
+/// \brief Scale an individual branch count.
+///
+/// Scale a 64-bit weight down to 32-bits using \c Scale.
+///
+static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) {
+  uint64_t Scaled = Count / Scale;
+  assert(Scaled <= UINT32_MAX && "overflow 32-bits");
+  return Scaled;
+}
 
 } // End llvm namespace
 
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index 4676c95d7cd4..9173de1112f3 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -93,7 +93,7 @@ FunctionPass *createBitTrackingDCEPass();
 //
 // SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
 //
-FunctionPass *createSROAPass(bool RequiresDomTree = true);
+FunctionPass *createSROAPass();
 
 //===----------------------------------------------------------------------===//
 //
@@ -161,7 +161,8 @@ Pass *createLoopStrengthReducePass();
 // It can also be configured to focus on size optimizations only.
 //
 Pass *createGlobalMergePass(const TargetMachine *TM, unsigned MaximalOffset,
-                            bool OnlyOptimizeForSize = false);
+                            bool OnlyOptimizeForSize = false,
+                            bool MergeExternalByDefault = false);
 
 //===----------------------------------------------------------------------===//
 //
@@ -405,13 +406,6 @@ FunctionPass *createLowerExpectIntrinsicPass();
 //
 FunctionPass *createPartiallyInlineLibCallsPass();
 
-//===----------------------------------------------------------------------===//
-//
-// SampleProfilePass - Loads sample profile data from disk and generates
-// IR metadata to reflect the profile.
-FunctionPass *createSampleProfileLoaderPass();
-FunctionPass *createSampleProfileLoaderPass(StringRef Name);
-
 //===----------------------------------------------------------------------===//
 //
 // ScalarizerPass - Converts vector operations into scalar operations
@@ -486,6 +480,12 @@ FunctionPass *createNaryReassociatePass();
 //
 FunctionPass *createLoopDistributePass();
 
+//===----------------------------------------------------------------------===//
+//
+// LoopLoadElimination - Perform loop-aware load elimination.
+//
+FunctionPass *createLoopLoadEliminationPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Scalar/ADCE.h b/include/llvm/Transforms/Scalar/ADCE.h
new file mode 100644
index 000000000000..f9bc7b77c14a
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/ADCE.h
@@ -0,0 +1,38 @@
+//===- ADCE.h - Aggressive dead code elimination --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the Aggressive Dead Code Elimination
+// pass. This pass optimistically assumes that all instructions are dead until
+// proven otherwise, allowing it to eliminate dead computations that other DCE
+// passes do not catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H
+#define LLVM_TRANSFORMS_SCALAR_ADCE_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A DCE pass that assumes instructions are dead until proven otherwise.
+///
+/// This pass eliminates dead code by optimistically assuming that all
+/// instructions are dead until proven otherwise. This allows it to eliminate
+/// dead computations that other DCE passes do not catch, particularly involving
+/// loop computations.
+class ADCEPass {
+public:
+  static StringRef name() { return "ADCEPass"; }
+  PreservedAnalyses run(Function &F);
+};
+}
+
+#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H
diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h
new file mode 100644
index 000000000000..f90cc7b686ba
--- /dev/null
+++ b/include/llvm/Transforms/Scalar/SROA.h
@@ -0,0 +1,129 @@
+//===- SROA.h - Scalar Replacement Of Aggregates ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for LLVM's Scalar Replacement of
+/// Aggregates pass. This pass provides both aggregate splitting and the
+/// primary SSA formation used in the compiler.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SROA_H
+#define LLVM_TRANSFORMS_SCALAR_SROA_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// A private "module" namespace for types and utilities used by SROA. These
+/// are implementation details and should not be used by clients.
+namespace sroa {
+class AllocaSliceRewriter;
+class AllocaSlices;
+class Partition;
+class SROALegacyPass;
+}
+
+/// \brief An optimization pass providing Scalar Replacement of Aggregates.
+///
+/// This pass takes allocations which can be completely analyzed (that is, they
+/// don't escape) and tries to turn them into scalar SSA values. There are
+/// a few steps to this process.
+///
+/// 1) It takes allocations of aggregates and analyzes the ways in which they
+///    are used to try to split them into smaller allocations, ideally of
+///    a single scalar data type. It will split up memcpy and memset accesses
+///    as necessary and try to isolate individual scalar accesses.
+/// 2) It will transform accesses into forms which are suitable for SSA value
+///    promotion. This can be replacing a memset with a scalar store of an
+///    integer value, or it can involve speculating operations on a PHI or
+///    select to be a PHI or select of the results.
+/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
+///    onto insert and extract operations on a vector value, and convert them to
+///    this form. By doing so, it will enable promotion of vector aggregates to
+///    SSA vector values.
+class SROA {
+  LLVMContext *C;
+  DominatorTree *DT;
+  AssumptionCache *AC;
+
+  /// \brief Worklist of alloca instructions to simplify.
+  ///
+  /// Each alloca in the function is added to this. Each new alloca formed gets
+  /// added to it as well to recursively simplify unless that alloca can be
+  /// directly promoted. Finally, each time we rewrite a use of an alloca other
+  /// the one being actively rewritten, we add it back onto the list if not
+  /// already present to ensure it is re-visited.
+  SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
+
+  /// \brief A collection of instructions to delete.
+  /// We try to batch deletions to simplify code and make things a bit more
+  /// efficient.
+  SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
+
+  /// \brief Post-promotion worklist.
+  ///
+  /// Sometimes we discover an alloca which has a high probability of becoming
+  /// viable for SROA after a round of promotion takes place. In those cases,
+  /// the alloca is enqueued here for re-processing.
+  ///
+  /// Note that we have to be very careful to clear allocas out of this list in
+  /// the event they are deleted.
+  SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
+
+  /// \brief A collection of alloca instructions we can directly promote.
+  std::vector<AllocaInst *> PromotableAllocas;
+
+  /// \brief A worklist of PHIs to speculate prior to promoting allocas.
+  ///
+  /// All of these PHIs have been checked for the safety of speculation and by
+  /// being speculated will allow promoting allocas currently in the promotable
+  /// queue.
+  SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
+
+  /// \brief A worklist of select instructions to speculate prior to promoting
+  /// allocas.
+  ///
+  /// All of these select instructions have been checked for the safety of
+  /// speculation and by being speculated will allow promoting allocas
+  /// currently in the promotable queue.
+  SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
+
+public:
+  SROA() : C(nullptr), DT(nullptr), AC(nullptr) {}
+
+  static StringRef name() { return "SROA"; }
+
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> *AM);
+
+private:
+  friend class sroa::AllocaSliceRewriter;
+  friend class sroa::SROALegacyPass;
+
+  /// Helper used by both the public run method and by the legacy pass.
+  PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT,
+                            AssumptionCache &RunAC);
+
+  bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS);
+  AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS,
+                               sroa::Partition &P);
+  bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS);
+  bool runOnAlloca(AllocaInst &AI);
+  void clobberUse(Use &U);
+  void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
+  bool promoteAllocas(Function &F);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 9b919b62ee41..13c856dfdc9a 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -22,7 +22,6 @@
 
 namespace llvm {
 
-class AliasAnalysis;
 class MemoryDependenceAnalysis;
 class DominatorTree;
 class LoopInfo;
@@ -40,7 +39,7 @@ void DeleteDeadBlock(BasicBlock *BB);
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA = nullptr,
+void FoldSingleEntryPHINodes(BasicBlock *BB,
                              MemoryDependenceAnalysis *MemDep = nullptr);
 
 /// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
@@ -54,7 +53,6 @@ bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI = nullptr);
 /// if possible.  The return value indicates success or failure.
 bool MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT = nullptr,
                                LoopInfo *LI = nullptr,
-                               AliasAnalysis *AA = nullptr,
                                MemoryDependenceAnalysis *MemDep = nullptr);
 
 // ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
@@ -82,27 +80,15 @@ void ReplaceInstWithInst(Instruction *From, Instruction *To);
 /// This provides a builder interface for overriding the default options used
 /// during critical edge splitting.
 struct CriticalEdgeSplittingOptions {
-  AliasAnalysis *AA;
   DominatorTree *DT;
   LoopInfo *LI;
   bool MergeIdenticalEdges;
   bool DontDeleteUselessPHIs;
   bool PreserveLCSSA;
 
-  CriticalEdgeSplittingOptions()
-      : AA(nullptr), DT(nullptr), LI(nullptr), MergeIdenticalEdges(false),
-        DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
-
-  /// \brief Basic case of setting up all the analysis.
-  CriticalEdgeSplittingOptions(AliasAnalysis *AA, DominatorTree *DT = nullptr,
+  CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr,
                                LoopInfo *LI = nullptr)
-      : AA(AA), DT(DT), LI(LI), MergeIdenticalEdges(false),
-        DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
-
-  /// \brief A common pattern is to preserve the dominator tree and loop
-  /// info but not care about AA.
-  CriticalEdgeSplittingOptions(DominatorTree *DT, LoopInfo *LI)
-      : AA(nullptr), DT(DT), LI(LI), MergeIdenticalEdges(false),
+      : DT(DT), LI(LI), MergeIdenticalEdges(false),
         DontDeleteUselessPHIs(false), PreserveLCSSA(false) {}
 
   CriticalEdgeSplittingOptions &setMergeIdenticalEdges() {
@@ -214,15 +200,13 @@ BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt,
 /// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more
 /// details on this case.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
-/// In particular, it does not preserve LoopSimplify (because it's
-/// complicated to handle the case where one of the edges being split
-/// is an exit of a loop with other exits).
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
 ///
 BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
                                    const char *Suffix,
-                                   AliasAnalysis *AA = nullptr,
                                    DominatorTree *DT = nullptr,
                                    LoopInfo *LI = nullptr,
                                    bool PreserveLCSSA = false);
@@ -234,17 +218,15 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
 /// OrigBB is clone into both of the new basic blocks. The new blocks are given
 /// the suffixes 'Suffix1' and 'Suffix2', and are returned in the NewBBs vector.
 ///
-/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
-/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses. In particular,
-/// it does not preserve LoopSimplify (because it's complicated to handle the
-/// case where one of the edges being split is an exit of a loop with other
-/// exits).
+/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but
+/// no other analyses. In particular, it does not preserve LoopSimplify
+/// (because it's complicated to handle the case where one of the edges being
+/// split is an exit of a loop with other exits).
 ///
 void SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                  ArrayRef<BasicBlock *> Preds,
                                  const char *Suffix, const char *Suffix2,
                                  SmallVectorImpl<BasicBlock *> &NewBBs,
-                                 AliasAnalysis *AA = nullptr,
                                  DominatorTree *DT = nullptr,
                                  LoopInfo *LI = nullptr,
                                  bool PreserveLCSSA = false);
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 2caa9a2462df..92a1d52f1011 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -20,9 +20,11 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include <functional>
 
 namespace llvm {
 
@@ -43,14 +45,21 @@ class DataLayout;
 class Loop;
 class LoopInfo;
 class AllocaInst;
-class AliasAnalysis;
 class AssumptionCacheTracker;
 class DominatorTree;
 
-/// CloneModule - Return an exact copy of the specified module
+/// Return an exact copy of the specified module
 ///
-Module *CloneModule(const Module *M);
-Module *CloneModule(const Module *M, ValueToValueMapTy &VMap);
+std::unique_ptr<Module> CloneModule(const Module *M);
+std::unique_ptr<Module> CloneModule(const Module *M, ValueToValueMapTy &VMap);
+
+/// Return a copy of the specified module. The ShouldCloneDefinition function
+/// controls whether a specific GlobalValue's definition is cloned. If the
+/// function returns false, the module copy will contain an external reference
+/// in place of the global definition.
+std::unique_ptr<Module>
+CloneModule(const Module *M, ValueToValueMapTy &VMap,
+            std::function<bool(const GlobalValue *)> ShouldCloneDefinition);
 
 /// ClonedCodeInfo - This struct can be used to capture information about code
 /// being cloned, while it is being cloned.
@@ -65,6 +74,11 @@ struct ClonedCodeInfo {
   /// size.
   bool ContainsDynamicAllocas;
 
+  /// All cloned call sites that have operand bundles attached are appended to
+  /// this vector.  This vector may contain nulls or undefs if some of the
+  /// originally inserted callsites were DCE'ed after they were cloned.
+  std::vector<WeakVH> OperandBundleCallSites;
+
   ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {}
 };
 
@@ -193,14 +207,12 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 class InlineFunctionInfo {
 public:
   explicit InlineFunctionInfo(CallGraph *cg = nullptr,
-                              AliasAnalysis *AA = nullptr,
                               AssumptionCacheTracker *ACT = nullptr)
-      : CG(cg), AA(AA), ACT(ACT) {}
+      : CG(cg), ACT(ACT) {}
 
   /// CG - If non-null, InlineFunction will update the callgraph to reflect the
   /// changes it makes.
   CallGraph *CG;
-  AliasAnalysis *AA;
   AssumptionCacheTracker *ACT;
 
   /// StaticAllocas - InlineFunction fills this in with all static allocas that
@@ -228,11 +240,11 @@ public:
 /// function by one level.
 ///
 bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
-                    bool InsertLifetime = true);
+                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
 bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
-                    bool InsertLifetime = true);
+                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
 bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
-                    bool InsertLifetime = true);
+                    AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
 
 /// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p
 /// Blocks.
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index a1bb367ac7b6..81b376f0c212 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
 #define LLVM_TRANSFORMS_UTILS_LOCAL_H
 
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -40,7 +41,6 @@ class DataLayout;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 class DIBuilder;
-class AliasAnalysis;
 class DominatorTree;
 
 template<typename T> class SmallVectorImpl;
@@ -271,11 +271,34 @@ bool LowerDbgDeclare(Function &F);
 /// an alloca, if any.
 DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
 
-/// \brief Replaces llvm.dbg.declare instruction when an alloca is replaced with
-/// a new value.  If Deref is true, tan additional DW_OP_deref is prepended to
-/// the expression.
+/// \brief Replaces llvm.dbg.declare instruction when the address it describes
+/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
+/// prepended to the expression. If Offset is non-zero, a constant displacement
+/// is added to the expression (after the optional Deref). Offset can be
+/// negative.
+bool replaceDbgDeclare(Value *Address, Value *NewAddress,
+                       Instruction *InsertBefore, DIBuilder &Builder,
+                       bool Deref, int Offset);
+
+/// \brief Replaces llvm.dbg.declare instruction when the alloca it describes
+/// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
+/// prepended to the expression. If Offset is non-zero, a constant displacement
+/// is added to the expression (after the optional Deref). Offset can be
+/// negative. New llvm.dbg.declare is inserted immediately before AI.
 bool replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                DIBuilder &Builder, bool Deref);
+                                DIBuilder &Builder, bool Deref, int Offset = 0);
+
+/// \brief Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+void changeToUnreachable(Instruction *I, bool UseLLVMTrap);
+
+/// Replace 'BB's terminator with one that does not have an unwind successor
+/// block.  Rewrites `invoke` to `call`, etc.  Updates any PHIs in unwind
+/// successor.
+///
+/// \param BB  Block whose terminator will be replaced.  Its terminator must
+///            have an unwind successor.
+void removeUnwindEdge(BasicBlock *BB);
 
 /// \brief Remove all blocks that can not be reached from the function's entry.
 ///
@@ -291,6 +314,22 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> Kn
 /// the given edge.  Returns the number of replacements made.
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
                                   const BasicBlockEdge &Edge);
+/// \brief Replace each use of 'From' with 'To' if that use is dominated by
+/// the given BasicBlock. Returns the number of replacements made.
+unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
+                                  const BasicBlock *BB);
+
+
+/// \brief Return true if the CallSite CS calls a gc leaf function.
+///
+/// A leaf function is a function that does not safepoint the thread during its
+/// execution.  During a call or invoke to such a function, the callers stack
+/// does not have to be made parseable.
+///
+/// Most passes can and should ignore this information, and it is only used
+/// during lowering by the GC infrastructure.
+bool callsGCLeafFunction(ImmutableCallSite CS);
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/LoopUtils.h b/include/llvm/Transforms/Utils/LoopUtils.h
index 15747bc7f1ac..17aaee03e4a8 100644
--- a/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/include/llvm/Transforms/Utils/LoopUtils.h
@@ -15,11 +15,11 @@
 #define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
 
 namespace llvm {
-class AliasAnalysis;
 class AliasSet;
 class AliasSetTracker;
 class AssumptionCache;
@@ -85,24 +85,35 @@ public:
 
   RecurrenceDescriptor()
       : StartValue(nullptr), LoopExitInstr(nullptr), Kind(RK_NoRecurrence),
-        MinMaxKind(MRK_Invalid) {}
+        MinMaxKind(MRK_Invalid), UnsafeAlgebraInst(nullptr),
+        RecurrenceType(nullptr), IsSigned(false) {}
 
   RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
-                       MinMaxRecurrenceKind MK)
-      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK) {}
+                       MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
+                       bool Signed, SmallPtrSetImpl<Instruction *> &CI)
+      : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
+        UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+    CastInsts.insert(CI.begin(), CI.end());
+  }
 
   /// This POD struct holds information about a potential recurrence operation.
   class InstDesc {
 
   public:
-    InstDesc(bool IsRecur, Instruction *I)
-        : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid) {}
+    InstDesc(bool IsRecur, Instruction *I, Instruction *UAI = nullptr)
+        : IsRecurrence(IsRecur), PatternLastInst(I), MinMaxKind(MRK_Invalid),
+          UnsafeAlgebraInst(UAI) {}
 
-    InstDesc(Instruction *I, MinMaxRecurrenceKind K)
-        : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K) {}
+    InstDesc(Instruction *I, MinMaxRecurrenceKind K, Instruction *UAI = nullptr)
+        : IsRecurrence(true), PatternLastInst(I), MinMaxKind(K),
+          UnsafeAlgebraInst(UAI) {}
 
     bool isRecurrence() { return IsRecurrence; }
 
+    bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+    Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
     MinMaxRecurrenceKind getMinMaxKind() { return MinMaxKind; }
 
     Instruction *getPatternInst() { return PatternLastInst; }
@@ -115,6 +126,8 @@ public:
     Instruction *PatternLastInst;
     // If this is a min/max pattern the comparison predicate.
     MinMaxRecurrenceKind MinMaxKind;
+    // Recurrence has unsafe algebra.
+    Instruction *UnsafeAlgebraInst;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -125,7 +138,7 @@ public:
   static InstDesc isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                     InstDesc &Prev, bool HasFunNoNaNAttr);
 
-  /// Returns true if instuction I has multiple uses in Insts
+  /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
                                 SmallPtrSetImpl<Instruction *> &Insts);
 
@@ -167,6 +180,51 @@ public:
 
   Instruction *getLoopExitInstr() { return LoopExitInstr; }
 
+  /// Returns true if the recurrence has unsafe algebra which requires a relaxed
+  /// floating-point model.
+  bool hasUnsafeAlgebra() { return UnsafeAlgebraInst != nullptr; }
+
+  /// Returns first unsafe algebra instruction in the PHI node's use-chain.
+  Instruction *getUnsafeAlgebraInst() { return UnsafeAlgebraInst; }
+
+  /// Returns true if the recurrence kind is an integer kind.
+  static bool isIntegerRecurrenceKind(RecurrenceKind Kind);
+
+  /// Returns true if the recurrence kind is a floating point kind.
+  static bool isFloatingPointRecurrenceKind(RecurrenceKind Kind);
+
+  /// Returns true if the recurrence kind is an arithmetic kind.
+  static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);
+
+  /// Determines if Phi may have been type-promoted. If Phi has a single user
+  /// that ANDs the Phi with a type mask, return the user. RT is updated to
+  /// account for the narrower bit width represented by the mask, and the AND
+  /// instruction is added to CI.
+  static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+                                     SmallPtrSetImpl<Instruction *> &Visited,
+                                     SmallPtrSetImpl<Instruction *> &CI);
+
+  /// Returns true if all the source operands of a recurrence are either
+  /// SExtInsts or ZExtInsts. This function is intended to be used with
+  /// lookThroughAnd to determine if the recurrence has been type-promoted. The
+  /// source operands are added to CI, and IsSigned is updated to indicate if
+  /// all source operands are SExtInsts.
+  static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
+                                     Type *RT, bool &IsSigned,
+                                     SmallPtrSetImpl<Instruction *> &Visited,
+                                     SmallPtrSetImpl<Instruction *> &CI);
+
+  /// Returns the type of the recurrence. This type can be narrower than the
+  /// actual type of the Phi if the recurrence has been type-promoted.
+  Type *getRecurrenceType() { return RecurrenceType; }
+
+  /// Returns a reference to the instructions used for type-promoting the
+  /// recurrence.
+  SmallPtrSet<Instruction *, 8> &getCastInsts() { return CastInsts; }
+
+  /// Returns true if all source operands of the recurrence are SExtInsts.
+  bool isSigned() { return IsSigned; }
+
 private:
   // The starting value of the recurrence.
   // It does not have to be zero!
@@ -177,19 +235,74 @@ private:
   RecurrenceKind Kind;
   // If this a min/max recurrence the kind of recurrence.
   MinMaxRecurrenceKind MinMaxKind;
+  // First occurance of unasfe algebra in the PHI's use-chain.
+  Instruction *UnsafeAlgebraInst;
+  // The type of the recurrence.
+  Type *RecurrenceType;
+  // True if all source operands of the recurrence are SExtInsts.
+  bool IsSigned;
+  // Instructions used for type-promoting the recurrence.
+  SmallPtrSet<Instruction *, 8> CastInsts;
 };
 
-BasicBlock *InsertPreheaderForLoop(Loop *L, Pass *P);
+/// A struct for saving information about induction variables.
+class InductionDescriptor {
+public:
+  /// This enum represents the kinds of inductions that we support.
+  enum InductionKind {
+    IK_NoInduction,  ///< Not an induction variable.
+    IK_IntInduction, ///< Integer induction variable. Step = C.
+    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
+  };
+
+public:
+  /// Default constructor - creates an invalid induction.
+  InductionDescriptor()
+    : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
+
+  /// Get the consecutive direction. Returns:
+  ///   0 - unknown or non-consecutive.
+  ///   1 - consecutive and increasing.
+  ///  -1 - consecutive and decreasing.
+  int getConsecutiveDirection() const;
+
+  /// Compute the transformed value of Index at offset StartValue using step
+  /// StepValue.
+  /// For integer induction, returns StartValue + Index * StepValue.
+  /// For pointer induction, returns StartValue[Index * StepValue].
+  /// FIXME: The newly created binary instructions should contain nsw/nuw
+  /// flags, which can be found from the original scalar operations.
+  Value *transform(IRBuilder<> &B, Value *Index) const;
+
+  Value *getStartValue() const { return StartValue; }
+  InductionKind getKind() const { return IK; }
+  ConstantInt *getStepValue() const { return StepValue; }
+
+  static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+                             InductionDescriptor &D);
+
+private:
+  /// Private constructor - used by \c isInductionPHI.
+  InductionDescriptor(Value *Start, InductionKind K, ConstantInt *Step);
+
+  /// Start value.
+  TrackingVH<Value> StartValue;
+  /// Induction kind.
+  InductionKind IK;
+  /// Step value.
+  ConstantInt *StepValue;
+};
+
+BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+                                   bool PreserveLCSSA);
 
 /// \brief Simplify each loop in a loop nest recursively.
 ///
 /// This takes a potentially un-simplified loop L (and its children) and turns
-/// it into a simplified loop nest with preheaders and single backedges. It
-/// will optionally update \c AliasAnalysis and \c ScalarEvolution analyses if
-/// passed into it.
-bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
-                  AliasAnalysis *AA = nullptr, ScalarEvolution *SE = nullptr,
-                  AssumptionCache *AC = nullptr);
+/// it into a simplified loop nest with preheaders and single backedges. It will
+/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null.
+bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+                  AssumptionCache *AC, bool PreserveLCSSA);
 
 /// \brief Put loop into LCSSA form.
 ///
@@ -203,7 +316,7 @@ bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
 ///
 /// Returns true if any modifications are made to the loop.
 bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
-               ScalarEvolution *SE = nullptr);
+               ScalarEvolution *SE);
 
 /// \brief Put a loop nest into LCSSA form.
 ///
@@ -215,7 +328,7 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
 ///
 /// Returns true if any modifications are made to the loop.
 bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
-                          ScalarEvolution *SE = nullptr);
+                          ScalarEvolution *SE);
 
 /// \brief Walk the specified region of the CFG (defined by all blocks
 /// dominated by the specified block, and that are in the current loop) in
@@ -242,10 +355,10 @@ bool hoistRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *,
 
 /// \brief Try to promote memory values to scalars by sinking stores out of
 /// the loop and moving loads to before the loop.  We do this by looping over
-/// the stores in the loop, looking for stores to Must pointers which are 
+/// the stores in the loop, looking for stores to Must pointers which are
 /// loop invariant. It takes AliasSet, Loop exit blocks vector, loop exit blocks
 /// insertion point vector, PredIteratorCache, LoopInfo, DominatorTree, Loop,
-/// AliasSet information for all instructions of the loop and loop safety 
+/// AliasSet information for all instructions of the loop and loop safety
 /// information as arguments. It returns changed status.
 bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
                                   SmallVectorImpl<Instruction*> &,
@@ -254,15 +367,13 @@ bool promoteLoopAccessesToScalars(AliasSet &, SmallVectorImpl<BasicBlock*> &,
                                   LICMSafetyInfo *);
 
 /// \brief Computes safety information for a loop
-/// checks loop body & header for the possiblity of may throw
+/// checks loop body & header for the possibility of may throw
 /// exception, it takes LICMSafetyInfo and loop as argument.
 /// Updates safety information in LICMSafetyInfo argument.
 void computeLICMSafetyInfo(LICMSafetyInfo *, Loop *);
 
-/// \brief Checks if the given PHINode in a loop header is an induction
-/// variable. Returns true if this is an induction PHI along with the step
-/// value.
-bool isInductionPHI(PHINode *, ScalarEvolution *, ConstantInt *&);
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
 }
 
 #endif
diff --git a/include/llvm/Transforms/Utils/LoopVersioning.h b/include/llvm/Transforms/Utils/LoopVersioning.h
index 009fba48c6a3..3b70594e0b63 100644
--- a/include/llvm/Transforms/Utils/LoopVersioning.h
+++ b/include/llvm/Transforms/Utils/LoopVersioning.h
@@ -16,13 +16,17 @@
 #ifndef LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
 #define LLVM_TRANSFORMS_UTILS_LOOPVERSIONING_H
 
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 
 namespace llvm {
 
 class Loop;
 class LoopAccessInfo;
 class LoopInfo;
+class ScalarEvolution;
 
 /// \brief This class emits a version of the loop where run-time checks ensure
 /// that may-alias pointers can't overlap.
@@ -31,13 +35,13 @@ class LoopInfo;
 /// already has a preheader.
 class LoopVersioning {
 public:
+  /// \brief Expects LoopAccessInfo, Loop, LoopInfo, DominatorTree as input.
+  /// It uses runtime check provided by the user. If \p UseLAIChecks is true,
+  /// we will retain the default checks made by LAI. Otherwise, construct an
+  /// object having no checks and we expect the user to add them.
   LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
-                 DominatorTree *DT,
-                 const SmallVector<int, 8> *PtrToPartition = nullptr);
-
-  /// \brief Returns true if we need memchecks to disambiguate may-aliasing
-  /// accesses.
-  bool needsRuntimeChecks() const;
+                 DominatorTree *DT, ScalarEvolution *SE,
+                 bool UseLAIChecks = true);
 
   /// \brief Performs the CFG manipulation part of versioning the loop including
   /// the DominatorTree and LoopInfo updates.
@@ -52,15 +56,11 @@ public:
   ///        analyze L
   ///        if versioning is necessary version L
   ///        transform L
-  void versionLoop(Pass *P);
+  void versionLoop() { versionLoop(findDefsUsedOutsideOfLoop(VersionedLoop)); }
 
-  /// \brief Adds the necessary PHI nodes for the versioned loops based on the
-  /// loop-defined values used outside of the loop.
-  ///
-  /// This needs to be called after versionLoop if there are defs in the loop
-  /// that are used outside the loop.  FIXME: this should be invoked internally
-  /// by versionLoop and made private.
-  void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
+  /// \brief Same but if the client has already precomputed the set of values
+  /// used outside the loop, this API will allows passing that.
+  void versionLoop(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
 
   /// \brief Returns the versioned loop.  Control flows here if pointers in the
   /// loop don't alias (i.e. all memchecks passed).  (This loop is actually the
@@ -71,7 +71,21 @@ public:
   /// loop may alias (i.e. one of the memchecks failed).
   Loop *getNonVersionedLoop() { return NonVersionedLoop; }
 
+  /// \brief Sets the runtime alias checks for versioning the loop.
+  void setAliasChecks(
+      const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks);
+
+  /// \brief Sets the runtime SCEV checks for versioning the loop.
+  void setSCEVChecks(SCEVUnionPredicate Check);
+
 private:
+  /// \brief Adds the necessary PHI nodes for the versioned loops based on the
+  /// loop-defined values used outside of the loop.
+  ///
+  /// This needs to be called after versionLoop if there are defs in the loop
+  /// that are used outside the loop.
+  void addPHINodes(const SmallVectorImpl<Instruction *> &DefsUsedOutside);
+
   /// \brief The original loop.  This becomes the "versioned" one.  I.e.,
   /// control flows here if pointers in the loop don't alias.
   Loop *VersionedLoop;
@@ -79,21 +93,21 @@ private:
   /// loop may alias (memchecks failed).
   Loop *NonVersionedLoop;
 
-  /// \brief For each memory pointer it contains the partitionId it is used in.
-  /// If nullptr, no partitioning is used.
-  ///
-  /// The I-th entry corresponds to I-th entry in LAI.getRuntimePointerCheck().
-  /// If the pointer is used in multiple partitions the entry is set to -1.
-  const SmallVector<int, 8> *PtrToPartition;
-
   /// \brief This maps the instructions from VersionedLoop to their counterpart
   /// in NonVersionedLoop.
   ValueToValueMapTy VMap;
 
+  /// \brief The set of alias checks that we are versioning for.
+  SmallVector<RuntimePointerChecking::PointerCheck, 4> AliasChecks;
+
+  /// \brief The set of SCEV checks that we are versioning for.
+  SCEVUnionPredicate Preds;
+
   /// \brief Analyses used.
   const LoopAccessInfo &LAI;
   LoopInfo *LI;
   DominatorTree *DT;
+  ScalarEvolution *SE;
 };
 }
 
diff --git a/include/llvm/Transforms/Utils/ModuleUtils.h b/include/llvm/Transforms/Utils/ModuleUtils.h
index 622265bae143..0f23d34de5db 100644
--- a/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include <utility> // for std::pair
 
 namespace llvm {
@@ -56,7 +57,8 @@ Function *checkSanitizerInterfaceFunction(Constant *FuncOrBitcast);
 /// respectively.
 std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
-    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs);
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+    StringRef VersionCheckName = StringRef());
 } // End llvm namespace
 
 #endif //  LLVM_TRANSFORMS_UTILS_MODULEUTILS_H
diff --git a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index ed0841c46c27..425ecd3cfb5e 100644
--- a/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -378,7 +378,7 @@ public:
   void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) {
     for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end();
          BBI != BBE; ++BBI) {
-      PhiT *SomePHI = Traits::InstrIsPHI(BBI);
+      PhiT *SomePHI = Traits::InstrIsPHI(&*BBI);
       if (!SomePHI)
         break;
       if (CheckIfPHIMatches(SomePHI)) {
diff --git a/include/llvm/Transforms/Utils/SimplifyIndVar.h b/include/llvm/Transforms/Utils/SimplifyIndVar.h
index dcb1d67cbf75..3c55e64537c7 100644
--- a/include/llvm/Transforms/Utils/SimplifyIndVar.h
+++ b/include/llvm/Transforms/Utils/SimplifyIndVar.h
@@ -25,7 +25,7 @@ class CastInst;
 class DominatorTree;
 class IVUsers;
 class Loop;
-class LPPassManager;
+class LoopInfo;
 class PHINode;
 class ScalarEvolution;
 
@@ -57,13 +57,14 @@ public:
 
 /// simplifyUsersOfIV - Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
-                       SmallVectorImpl<WeakVH> &Dead, IVVisitor *V = nullptr);
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+                       IVVisitor *V = nullptr);
 
 /// SimplifyLoopIVs - Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
-                     SmallVectorImpl<WeakVH> &Dead);
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead);
 
 } // namespace llvm
 
diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 41159603aae5..410a075aeb98 100644
--- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -131,8 +131,11 @@ private:
   Value *optimizePow(CallInst *CI, IRBuilder<> &B);
   Value *optimizeExp2(CallInst *CI, IRBuilder<> &B);
   Value *optimizeFabs(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeLog(CallInst *CI, IRBuilder<> &B);
   Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B);
   Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeTan(CallInst *CI, IRBuilder<> &B);
 
   // Integer Library Call Optimizations
   Value *optimizeFFS(CallInst *CI, IRBuilder<> &B);
diff --git a/include/llvm/Transforms/Utils/SplitModule.h b/include/llvm/Transforms/Utils/SplitModule.h
new file mode 100644
index 000000000000..7d896d1993d6
--- /dev/null
+++ b/include/llvm/Transforms/Utils/SplitModule.h
@@ -0,0 +1,43 @@
+//===- SplitModule.h - Split a module into partitions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
+#define LLVM_TRANSFORMS_UTILS_SPLITMODULE_H
+
+#include <functional>
+#include <memory>
+
+namespace llvm {
+
+class Module;
+class StringRef;
+
+/// Splits the module M into N linkable partitions. The function ModuleCallback
+/// is called N times passing each individual partition as the MPart argument.
+///
+/// FIXME: This function does not deal with the somewhat subtle symbol
+/// visibility issues around module splitting, including (but not limited to):
+///
+/// - Internal symbols should not collide with symbols defined outside the
+///   module.
+/// - Internal symbols defined in module-level inline asm should be visible to
+///   each partition.
+void SplitModule(
+    std::unique_ptr<Module> M, unsigned N,
+    std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback);
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Transforms/Utils/UnrollLoop.h b/include/llvm/Transforms/Utils/UnrollLoop.h
index 7f2cf8d7f59e..710817cddf6a 100644
--- a/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -21,20 +21,23 @@
 namespace llvm {
 
 class AssumptionCache;
+class DominatorTree;
 class Loop;
 class LoopInfo;
 class LPPassManager;
 class MDNode;
 class Pass;
+class ScalarEvolution;
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
                 bool AllowExpensiveTripCount, unsigned TripMultiple,
-                LoopInfo *LI, Pass *PP, LPPassManager *LPM,
-                AssumptionCache *AC);
+                LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
+                AssumptionCache *AC, bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                              bool AllowExpensiveTripCount, LoopInfo *LI,
-                             LPPassManager *LPM);
+                             ScalarEvolution *SE, DominatorTree *DT,
+                             bool PreserveLCSSA);
 
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 }
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index 047ab818711b..469022f34c56 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -38,13 +38,34 @@ namespace llvm {
   /// to materialize Values on demand.
   class ValueMaterializer {
     virtual void anchor(); // Out of line method.
-  public:
-    virtual ~ValueMaterializer() {}
 
-    /// materializeValueFor - The client should implement this method if they
-    /// want to generate a mapped Value on demand. For example, if linking
-    /// lazily.
-    virtual Value *materializeValueFor(Value *V) = 0;
+  protected:
+    ~ValueMaterializer() = default;
+    ValueMaterializer() = default;
+    ValueMaterializer(const ValueMaterializer&) = default;
+    ValueMaterializer &operator=(const ValueMaterializer&) = default;
+
+  public:
+    /// The client should implement this method if they want to generate a
+    /// mapped Value on demand. For example, if linking lazily.
+    virtual Value *materializeDeclFor(Value *V) = 0;
+
+    /// If the data being mapped is recursive, the above function can map
+    /// just the declaration and this is called to compute the initializer.
+    /// It is called after the mapping is recorded, so it doesn't need to worry
+    /// about recursion.
+    virtual void materializeInitFor(GlobalValue *New, GlobalValue *Old);
+
+    /// If the client needs to handle temporary metadata it must implement
+    /// these methods.
+    virtual Metadata *mapTemporaryMetadata(Metadata *MD) { return nullptr; }
+    virtual void replaceTemporaryMetadata(const Metadata *OrigMD,
+                                          Metadata *NewMD) {}
+
+    /// The client should implement this method if some metadata need
+    /// not be mapped, for example DISubprogram metadata for functions not
+    /// linked into the destination module.
+    virtual bool isMetadataNeeded(Metadata *MD) { return true; }
   };
 
   /// RemapFlags - These are flags that the value mapping APIs allow.
@@ -59,7 +80,20 @@ namespace llvm {
     /// RF_IgnoreMissingEntries - If this flag is set, the remapper ignores
     /// entries that are not in the value map.  If it is unset, it aborts if an
     /// operand is asked to be remapped which doesn't exist in the mapping.
-    RF_IgnoreMissingEntries = 2
+    RF_IgnoreMissingEntries = 2,
+
+    /// Instruct the remapper to move distinct metadata instead of duplicating
+    /// it when there are module-level changes.
+    RF_MoveDistinctMDs = 4,
+
+    /// Any global values not in value map are mapped to null instead of
+    /// mapping to self. Illegal if RF_IgnoreMissingEntries is also set.
+    RF_NullMapMissingGlobalValues = 8,
+
+    /// Set when there is still temporary metadata that must be handled,
+    /// such as when we are doing function importing and will materialize
+    /// and link metadata as a postpass.
+    RF_HaveUnmaterializedMetadata = 16,
   };
 
   static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index dcc5ce1059ff..0adce0c9602d 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -190,17 +190,15 @@ module LLVM_Utils {
     // Exclude this; it's fundamentally non-modular.
     exclude header "Support/PluginLoader.h"
 
-    // Exclude this; it's a weirdly-factored part of llvm-gcov and conflicts
-    // with the Analysis module (which also defines an llvm::GCOVOptions).
-    exclude header "Support/GCOV.h"
-
     // FIXME: Mislayered?
     exclude header "Support/TargetRegistry.h"
 
     // These are intended for textual inclusion.
+    textual header "Support/ARMTargetParser.def"
     textual header "Support/Dwarf.def"
     textual header "Support/ELFRelocs/AArch64.def"
     textual header "Support/ELFRelocs/ARM.def"
+    textual header "Support/ELFRelocs/AVR.def"
     textual header "Support/ELFRelocs/Hexagon.def"
     textual header "Support/ELFRelocs/i386.def"
     textual header "Support/ELFRelocs/Mips.def"
@@ -210,6 +208,12 @@ module LLVM_Utils {
     textual header "Support/ELFRelocs/SystemZ.def"
     textual header "Support/ELFRelocs/x86_64.def"
   }
+
+  // This part of the module is usable from both C and C++ code.
+  module ConvertUTF {
+    header "Support/ConvertUTF.h"
+    export *
+  }
 }
 
 module LLVM_CodeGen_MachineValueType {
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 44d137dffd22..35f2e97622fa 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -25,9 +25,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -40,44 +47,72 @@
 #include "llvm/Pass.h"
 using namespace llvm;
 
-// Register the AliasAnalysis interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
-char AliasAnalysis::ID = 0;
+/// Allow disabling BasicAA from the AA results. This is particularly useful
+/// when testing to isolate a single AA implementation.
+static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
+                                    cl::init(false));
+
+AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) {
+  for (auto &AA : AAs)
+    AA->setAAResults(this);
+}
+
+AAResults &AAResults::operator=(AAResults &&Arg) {
+  AAs = std::move(Arg.AAs);
+  for (auto &AA : AAs)
+    AA->setAAResults(this);
+  return *this;
+}
+
+AAResults::~AAResults() {
+// FIXME; It would be nice to at least clear out the pointers back to this
+// aggregation here, but we end up with non-nesting lifetimes in the legacy
+// pass manager that prevent this from working. In the legacy pass manager
+// we'll end up with dangling references here in some cases.
+#if 0
+  for (auto &AA : AAs)
+    AA->setAAResults(nullptr);
+#endif
+}
 
 //===----------------------------------------------------------------------===//
 // Default chaining methods
 //===----------------------------------------------------------------------===//
 
-AliasResult AliasAnalysis::alias(const MemoryLocation &LocA,
-                                 const MemoryLocation &LocB) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->alias(LocA, LocB);
+AliasResult AAResults::alias(const MemoryLocation &LocA,
+                             const MemoryLocation &LocB) {
+  for (const auto &AA : AAs) {
+    auto Result = AA->alias(LocA, LocB);
+    if (Result != MayAlias)
+      return Result;
+  }
+  return MayAlias;
 }
 
-bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
-                                           bool OrLocal) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->pointsToConstantMemory(Loc, OrLocal);
+bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc,
+                                       bool OrLocal) {
+  for (const auto &AA : AAs)
+    if (AA->pointsToConstantMemory(Loc, OrLocal))
+      return true;
+
+  return false;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->getArgModRefInfo(CS, ArgIdx);
+ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+  ModRefInfo Result = MRI_ModRef;
+
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
+  }
+
+  return Result;
 }
 
-void AliasAnalysis::deleteValue(Value *V) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  AA->deleteValue(V);
-}
-
-void AliasAnalysis::addEscapingUse(Use &U) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  AA->addEscapingUse(U);
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
+ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
   // We may have two calls
   if (auto CS = ImmutableCallSite(I)) {
     // Check if the two calls modify the same memory
@@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {
     // is that if the call references what this instruction
     // defines, it must be clobbered by this location.
     const MemoryLocation DefLoc = MemoryLocation::get(I);
-    if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef)
-      return AliasAnalysis::ModRef;
+    if (getModRefInfo(Call, DefLoc) != MRI_NoModRef)
+      return MRI_ModRef;
   }
-  return AliasAnalysis::NoModRef;
+  return MRI_NoModRef;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
+                                    const MemoryLocation &Loc) {
+  ModRefInfo Result = MRI_ModRef;
 
-  ModRefBehavior MRB = getModRefBehavior(CS);
-  if (MRB == DoesNotAccessMemory)
-    return NoModRef;
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc));
 
-  ModRefResult Mask = ModRef;
-  if (onlyReadsMemory(MRB))
-    Mask = Ref;
-
-  if (onlyAccessesArgPointees(MRB)) {
-    bool doesAlias = false;
-    ModRefResult AllArgsMask = NoModRef;
-    if (doesAccessArgPointees(MRB)) {
-      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-           AI != AE; ++AI) {
-        const Value *Arg = *AI;
-        if (!Arg->getType()->isPointerTy())
-          continue;
-        unsigned ArgIdx = std::distance(CS.arg_begin(), AI);
-        MemoryLocation ArgLoc =
-            MemoryLocation::getForArgument(CS, ArgIdx, *TLI);
-        if (!isNoAlias(ArgLoc, Loc)) {
-          ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx);
-          doesAlias = true;
-          AllArgsMask = ModRefResult(AllArgsMask | ArgMask);
-        }
-      }
-    }
-    if (!doesAlias)
-      return NoModRef;
-    Mask = ModRefResult(Mask & AllArgsMask);
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
   }
 
-  // If Loc is a constant memory location, the call definitely could not
-  // modify the memory location.
-  if ((Mask & Mod) && pointsToConstantMemory(Loc))
-    Mask = ModRefResult(Mask & ~Mod);
-
-  // If this is the end of the chain, don't forward.
-  if (!AA) return Mask;
-
-  // Otherwise, fall back to the next AA in the chain. But we can merge
-  // in any mask we've managed to compute.
-  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+  return Result;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  ModRefInfo Result = MRI_ModRef;
 
-  // If CS1 or CS2 are readnone, they don't interact.
-  ModRefBehavior CS1B = getModRefBehavior(CS1);
-  if (CS1B == DoesNotAccessMemory) return NoModRef;
+  for (const auto &AA : AAs) {
+    Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2));
 
-  ModRefBehavior CS2B = getModRefBehavior(CS2);
-  if (CS2B == DoesNotAccessMemory) return NoModRef;
-
-  // If they both only read from memory, there is no dependence.
-  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
-    return NoModRef;
-
-  AliasAnalysis::ModRefResult Mask = ModRef;
-
-  // If CS1 only reads memory, the only dependence on CS2 can be
-  // from CS1 reading memory written by CS2.
-  if (onlyReadsMemory(CS1B))
-    Mask = ModRefResult(Mask & Ref);
-
-  // If CS2 only access memory through arguments, accumulate the mod/ref
-  // information from CS1's references to the memory referenced by
-  // CS2's arguments.
-  if (onlyAccessesArgPointees(CS2B)) {
-    AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS2B)) {
-      for (ImmutableCallSite::arg_iterator
-           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
-        const Value *Arg = *I;
-        if (!Arg->getType()->isPointerTy())
-          continue;
-        unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I);
-        auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI);
-
-        // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of
-        // CS1 on that location is the inverse.
-        ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx);
-        if (ArgMask == Mod)
-          ArgMask = ModRef;
-        else if (ArgMask == Ref)
-          ArgMask = Mod;
-
-        R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask);
-        if (R == Mask)
-          break;
-      }
-    }
-    return R;
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == MRI_NoModRef)
+      return Result;
   }
 
-  // If CS1 only accesses memory through arguments, check if CS2 references
-  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
-  if (onlyAccessesArgPointees(CS1B)) {
-    AliasAnalysis::ModRefResult R = NoModRef;
-    if (doesAccessArgPointees(CS1B)) {
-      for (ImmutableCallSite::arg_iterator
-           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) {
-        const Value *Arg = *I;
-        if (!Arg->getType()->isPointerTy())
-          continue;
-        unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I);
-        auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI);
+  return Result;
+}
 
-        // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod
-        // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1
-        // might Ref, then we care only about a Mod by CS2.
-        ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx);
-        ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc);
-        if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) ||
-            ((ArgMask & Ref) != NoModRef && (ArgR & Mod)    != NoModRef))
-          R = ModRefResult((R | ArgMask) & Mask);
+FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) {
+  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
 
-        if (R == Mask)
-          break;
-      }
-    }
-    return R;
+  for (const auto &AA : AAs) {
+    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS));
+
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == FMRB_DoesNotAccessMemory)
+      return Result;
   }
 
-  // If this is the end of the chain, don't forward.
-  if (!AA) return Mask;
-
-  // Otherwise, fall back to the next AA in the chain. But we can merge
-  // in any mask we've managed to compute.
-  return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+  return Result;
 }
 
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) {
+  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior;
 
-  ModRefBehavior Min = UnknownModRefBehavior;
+  for (const auto &AA : AAs) {
+    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F));
 
-  // Call back into the alias analysis with the other form of getModRefBehavior
-  // to see if it can give a better response.
-  if (const Function *F = CS.getCalledFunction())
-    Min = getModRefBehavior(F);
+    // Early-exit the moment we reach the bottom of the lattice.
+    if (Result == FMRB_DoesNotAccessMemory)
+      return Result;
+  }
 
-  // If this is the end of the chain, don't forward.
-  if (!AA) return Min;
-
-  // Otherwise, fall back to the next AA in the chain. But we can merge
-  // in any result we've managed to compute.
-  return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
-}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(const Function *F) {
-  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
-  return AA->getModRefBehavior(F);
+  return Result;
 }
 
 //===----------------------------------------------------------------------===//
-// AliasAnalysis non-virtual helper method implementation
+// Helper method implementation
 //===----------------------------------------------------------------------===//
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const LoadInst *L,
+                                    const MemoryLocation &Loc) {
   // Be conservative in the face of volatile/atomic.
   if (!L->isUnordered())
-    return ModRef;
+    return MRI_ModRef;
 
   // If the load address doesn't alias the given address, it doesn't read
   // or write the specified memory.
   if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc))
-    return NoModRef;
+    return MRI_NoModRef;
 
   // Otherwise, a load just reads.
-  return Ref;
+  return MRI_Ref;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const StoreInst *S,
+                                    const MemoryLocation &Loc) {
   // Be conservative in the face of volatile/atomic.
   if (!S->isUnordered())
-    return ModRef;
+    return MRI_ModRef;
 
   if (Loc.Ptr) {
     // If the store address cannot alias the pointer in question, then the
     // specified memory cannot be modified by the store.
     if (!alias(MemoryLocation::get(S), Loc))
-      return NoModRef;
+      return MRI_NoModRef;
 
     // If the pointer is a pointer to constant memory, then it could not have
     // been modified by this store.
     if (pointsToConstantMemory(Loc))
-      return NoModRef;
-
+      return MRI_NoModRef;
   }
 
   // Otherwise, a store just writes.
-  return Mod;
+  return MRI_Mod;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const VAArgInst *V,
+                                    const MemoryLocation &Loc) {
 
   if (Loc.Ptr) {
     // If the va_arg address cannot alias the pointer in question, then the
     // specified memory cannot be accessed by the va_arg.
     if (!alias(MemoryLocation::get(V), Loc))
-      return NoModRef;
+      return MRI_NoModRef;
 
     // If the pointer is a pointer to constant memory, then it could not have
     // been modified by this va_arg.
     if (pointsToConstantMemory(Loc))
-      return NoModRef;
+      return MRI_NoModRef;
   }
 
   // Otherwise, a va_arg reads and writes.
-  return ModRef;
+  return MRI_ModRef;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX,
-                             const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad,
+                                    const MemoryLocation &Loc) {
+  if (Loc.Ptr) {
+    // If the pointer is a pointer to constant memory,
+    // then it could not have been modified by this catchpad.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a catchpad reads and writes.
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet,
+                                    const MemoryLocation &Loc) {
+  if (Loc.Ptr) {
+    // If the pointer is a pointer to constant memory,
+    // then it could not have been modified by this catchpad.
+    if (pointsToConstantMemory(Loc))
+      return MRI_NoModRef;
+  }
+
+  // Otherwise, a catchret reads and writes.
+  return MRI_ModRef;
+}
+
+ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX,
+                                    const MemoryLocation &Loc) {
   // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.
   if (CX->getSuccessOrdering() > Monotonic)
-    return ModRef;
+    return MRI_ModRef;
 
   // If the cmpxchg address does not alias the location, it does not access it.
   if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc))
-    return NoModRef;
+    return MRI_NoModRef;
 
-  return ModRef;
+  return MRI_ModRef;
 }
 
-AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW,
-                             const MemoryLocation &Loc) {
+ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW,
+                                    const MemoryLocation &Loc) {
   // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.
   if (RMW->getOrdering() > Monotonic)
-    return ModRef;
+    return MRI_ModRef;
 
   // If the atomicrmw address does not alias the location, it does not access it.
   if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc))
-    return NoModRef;
+    return MRI_NoModRef;
 
-  return ModRef;
+  return MRI_ModRef;
 }
 
-// FIXME: this is really just shoring-up a deficiency in alias analysis.
-// BasicAA isn't willing to spend linear time determining whether an alloca
-// was captured before or after this particular call, while we are. However,
-// with a smarter AA in place, this test is just wasting compile time.
-AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
-    const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) {
+/// \brief Return information about whether a particular call site modifies
+/// or reads the specified memory location \p MemLoc before instruction \p I
+/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up
+/// instruction-ordering queries inside the BasicBlock containing \p I.
+/// FIXME: this is really just shoring-up a deficiency in alias analysis.
+/// BasicAA isn't willing to spend linear time determining whether an alloca
+/// was captured before or after this particular call, while we are. However,
+/// with a smarter AA in place, this test is just wasting compile time.
+ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
+                                         const MemoryLocation &MemLoc,
+                                         DominatorTree *DT,
+                                         OrderedBasicBlock *OBB) {
   if (!DT)
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
 
-  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL);
+  const Value *Object =
+      GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());
   if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||
       isa<Constant>(Object))
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
 
   ImmutableCallSite CS(I);
   if (!CS.getInstruction() || CS.getInstruction() == Object)
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
 
   if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,
                                        /* StoreCaptures */ true, I, DT,
-                                       /* include Object */ true))
-    return AliasAnalysis::ModRef;
+                                       /* include Object */ true,
+                                       /* OrderedBasicBlock */ OBB))
+    return MRI_ModRef;
 
   unsigned ArgNo = 0;
-  AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef;
+  ModRefInfo R = MRI_NoModRef;
   for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
        CI != CE; ++CI, ++ArgNo) {
     // Only look at the no-capture or byval pointer arguments.  If this
@@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(
     if (CS.doesNotAccessMemory(ArgNo))
       continue;
     if (CS.onlyReadsMemory(ArgNo)) {
-      R = AliasAnalysis::Ref;
+      R = MRI_Ref;
       continue;
     }
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
   }
   return R;
 }
 
-// AliasAnalysis destructor: DO NOT move this to the header file for
-// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
-// the AliasAnalysis.o file in the current .a file, causing alias analysis
-// support to not be included in the tool correctly!
-//
-AliasAnalysis::~AliasAnalysis() {}
-
-/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
-/// AliasAnalysis interface before any other methods are called.
-///
-void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) {
-  DL = NewDL;
-  auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  TLI = TLIP ? &TLIP->getTLI() : nullptr;
-  AA = &P->getAnalysis<AliasAnalysis>();
-}
-
-// getAnalysisUsage - All alias analysis implementations should invoke this
-// directly (using AliasAnalysis::getAnalysisUsage(AU)).
-void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();         // All AA's chain
-}
-
-/// getTypeStoreSize - Return the DataLayout store size for the given type,
-/// if known, or a conservative value otherwise.
-///
-uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) {
-  return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize;
-}
-
 /// canBasicBlockModify - Return true if it is possible for execution of the
 /// specified basic block to modify the location Loc.
 ///
-bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
-                                        const MemoryLocation &Loc) {
-  return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod);
+bool AAResults::canBasicBlockModify(const BasicBlock &BB,
+                                    const MemoryLocation &Loc) {
+  return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);
 }
 
 /// canInstructionRangeModRef - Return true if it is possible for the
@@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
 /// mode) the location Loc. The instructions to consider are all
 /// of the instructions in the range of [I1,I2] INCLUSIVE.
 /// I1 and I2 must be in the same basic block.
-bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1,
-                                              const Instruction &I2,
-                                              const MemoryLocation &Loc,
-                                              const ModRefResult Mode) {
+bool AAResults::canInstructionRangeModRef(const Instruction &I1,
+                                          const Instruction &I2,
+                                          const MemoryLocation &Loc,
+                                          const ModRefInfo Mode) {
   assert(I1.getParent() == I2.getParent() &&
          "Instructions not in same basic block!");
-  BasicBlock::const_iterator I = &I1;
-  BasicBlock::const_iterator E = &I2;
+  BasicBlock::const_iterator I = I1.getIterator();
+  BasicBlock::const_iterator E = I2.getIterator();
   ++E;  // Convert from inclusive to exclusive range.
 
   for (; I != E; ++I) // Check every instruction in range
-    if (getModRefInfo(I, Loc) & Mode)
+    if (getModRefInfo(&*I, Loc) & Mode)
       return true;
   return false;
 }
 
+// Provide a definition for the root virtual destructor.
+AAResults::Concept::~Concept() {}
+
+namespace {
+/// A wrapper pass for external alias analyses. This just squirrels away the
+/// callback used to run any analyses and register their results.
+struct ExternalAAWrapperPass : ImmutablePass {
+  typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT;
+
+  CallbackT CB;
+
+  static char ID;
+
+  ExternalAAWrapperPass() : ImmutablePass(ID) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+  explicit ExternalAAWrapperPass(CallbackT CB)
+      : ImmutablePass(ID), CB(std::move(CB)) {
+    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+};
+}
+
+char ExternalAAWrapperPass::ID = 0;
+INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis",
+                false, true)
+
+ImmutablePass *
+llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) {
+  return new ExternalAAWrapperPass(std::move(Callback));
+}
+
+AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) {
+  initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char AAResultsWrapperPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa",
+                      "Function Alias Analysis Results", false, true)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass)
+INITIALIZE_PASS_END(AAResultsWrapperPass, "aa",
+                    "Function Alias Analysis Results", false, true)
+
+FunctionPass *llvm::createAAResultsWrapperPass() {
+  return new AAResultsWrapperPass();
+}
+
+/// Run the wrapper pass to rebuild an aggregation over known AA passes.
+///
+/// This is the legacy pass manager's interface to the new-style AA results
+/// aggregation object. Because this is somewhat shoe-horned into the legacy
+/// pass manager, we hard code all the specific alias analyses available into
+/// it. While the particular set enabled is configured via commandline flags,
+/// adding a new alias analysis to LLVM will require adding support for it to
+/// this list.
+bool AAResultsWrapperPass::runOnFunction(Function &F) {
+  // NB! This *must* be reset before adding new AA results to the new
+  // AAResults object because in the legacy pass manager, each instance
+  // of these will refer to the *same* immutable analyses, registering and
+  // unregistering themselves with them. We need to carefully tear down the
+  // previous object first, in this case replacing it with an empty one, before
+  // registering new results.
+  AAR.reset(new AAResults());
+
+  // BasicAA is always available for function analyses. Also, we add it first
+  // so that it can trump TBAA results when it proves MustAlias.
+  // FIXME: TBAA should have an explicit mode to support this and then we
+  // should reconsider the ordering here.
+  if (!DisableBasicAA)
+    AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult());
+
+  // Populate the results with the currently available AAs.
+  if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass =
+          getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>())
+    AAR->addAAResult(WrapperPass->getResult());
+
+  // If available, run an external AA providing callback over the results as
+  // well.
+  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>())
+    if (WrapperPass->CB)
+      WrapperPass->CB(*this, F, *AAR);
+
+  // Analyses don't mutate the IR, so return false.
+  return false;
+}
+
+void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<BasicAAWrapperPass>();
+
+  // We also need to mark all the alias analysis passes we will potentially
+  // probe in runOnFunction as used here to ensure the legacy pass manager
+  // preserves them. This hard coding of lists of alias analyses is specific to
+  // the legacy pass manager.
+  AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>();
+  AU.addUsedIfAvailable<TypeBasedAAWrapperPass>();
+  AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>();
+  AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
+  AU.addUsedIfAvailable<SCEVAAWrapperPass>();
+  AU.addUsedIfAvailable<CFLAAWrapperPass>();
+}
+
+AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
+                                        BasicAAResult &BAR) {
+  AAResults AAR;
+
+  // Add in our explicitly constructed BasicAA results.
+  if (!DisableBasicAA)
+    AAR.addAAResult(BAR);
+
+  // Populate the results with the other currently available AAs.
+  if (auto *WrapperPass =
+          P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass =
+          P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+  if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>())
+    AAR.addAAResult(WrapperPass->getResult());
+
+  return AAR;
+}
+
 /// isNoAliasCall - Return true if this pointer is returned by a noalias
 /// function.
 bool llvm::isNoAliasCall(const Value *V) {
-  if (isa<CallInst>(V) || isa<InvokeInst>(V))
-    return ImmutableCallSite(cast<Instruction>(V))
-      .paramHasAttr(0, Attribute::NoAlias);
+  if (auto CS = ImmutableCallSite(V))
+    return CS.paramHasAttr(0, Attribute::NoAlias);
   return false;
 }
 
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
deleted file mode 100644
index 9b6a5a44d80c..000000000000
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ /dev/null
@@ -1,173 +0,0 @@
-//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass which can be used to count how many alias queries
-// are being made and how the alias analysis implementation being used responds.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-static cl::opt<bool>
-PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
-static cl::opt<bool>
-PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
-
-namespace {
-  class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
-    unsigned No, May, Partial, Must;
-    unsigned NoMR, JustRef, JustMod, MR;
-    Module *M;
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    AliasAnalysisCounter() : ModulePass(ID) {
-      initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
-      No = May = Partial = Must = 0;
-      NoMR = JustRef = JustMod = MR = 0;
-    }
-
-    void printLine(const char *Desc, unsigned Val, unsigned Sum) {
-      errs() <<  "  " << Val << " " << Desc << " responses ("
-             << Val*100/Sum << "%)\n";
-    }
-    ~AliasAnalysisCounter() override {
-      unsigned AASum = No+May+Partial+Must;
-      unsigned MRSum = NoMR+JustRef+JustMod+MR;
-      if (AASum + MRSum) { // Print a report if any counted queries occurred...
-        errs() << "\n===== Alias Analysis Counter Report =====\n"
-               << "  Analysis counted:\n"
-               << "  " << AASum << " Total Alias Queries Performed\n";
-        if (AASum) {
-          printLine("no alias",     No, AASum);
-          printLine("may alias",   May, AASum);
-          printLine("partial alias", Partial, AASum);
-          printLine("must alias", Must, AASum);
-          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
-                 << May*100/AASum << "%/"
-                 << Partial*100/AASum << "%/"
-                 << Must*100/AASum<<"%\n\n";
-        }
-
-        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
-        if (MRSum) {
-          printLine("no mod/ref",    NoMR, MRSum);
-          printLine("ref",        JustRef, MRSum);
-          printLine("mod",        JustMod, MRSum);
-          printLine("mod/ref",         MR, MRSum);
-          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
-                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
-                 << "%/" << MR*100/MRSum <<"%\n\n";
-        }
-      }
-    }
-
-    bool runOnModule(Module &M) override {
-      this->M = &M;
-      InitializeAliasAnalysis(this, &M.getDataLayout());
-      return false;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AliasAnalysis::getAnalysisUsage(AU);
-      AU.addRequired<AliasAnalysis>();
-      AU.setPreservesAll();
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-    
-    // FIXME: We could count these too...
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override {
-      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
-    }
-
-    // Forwarding functions: just delegate to a real AA implementation, counting
-    // the number of responses...
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override;
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override {
-      return AliasAnalysis::getModRefInfo(CS1,CS2);
-    }
-  };
-}
-
-char AliasAnalysisCounter::ID = 0;
-INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
-                   "Count Alias Analysis Query Responses", false, true, false)
-
-ModulePass *llvm::createAliasAnalysisCounterPass() {
-  return new AliasAnalysisCounter();
-}
-
-AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA,
-                                        const MemoryLocation &LocB) {
-  AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
-
-  const char *AliasString = nullptr;
-  switch (R) {
-  case NoAlias:   No++;   AliasString = "No alias"; break;
-  case MayAlias:  May++;  AliasString = "May alias"; break;
-  case PartialAlias: Partial++; AliasString = "Partial alias"; break;
-  case MustAlias: Must++; AliasString = "Must alias"; break;
-  }
-
-  if (PrintAll || (PrintAllFailures && R == MayAlias)) {
-    errs() << AliasString << ":\t";
-    errs() << "[" << LocA.Size << "B] ";
-    LocA.Ptr->printAsOperand(errs(), true, M);
-    errs() << ", ";
-    errs() << "[" << LocB.Size << "B] ";
-    LocB.Ptr->printAsOperand(errs(), true, M);
-    errs() << "\n";
-  }
-
-  return R;
-}
-
-AliasAnalysis::ModRefResult
-AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
-                                    const MemoryLocation &Loc) {
-  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
-
-  const char *MRString = nullptr;
-  switch (R) {
-  case NoModRef: NoMR++;     MRString = "NoModRef"; break;
-  case Ref:      JustRef++;  MRString = "JustRef"; break;
-  case Mod:      JustMod++;  MRString = "JustMod"; break;
-  case ModRef:   MR++;       MRString = "ModRef"; break;
-  }
-
-  if (PrintAll || (PrintAllFailures && R == ModRef)) {
-    errs() << MRString << ":  Ptr: ";
-    errs() << "[" << Loc.Size << "B] ";
-    Loc.Ptr->printAsOperand(errs(), true, M);
-    errs() << "\t<->" << *CS.getInstruction() << '\n';
-  }
-  return R;
-}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 5d1b001fe161..12917b650e5e 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Pass.h"
@@ -57,7 +59,7 @@ namespace {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.setPreservesAll();
     }
 
@@ -81,7 +83,7 @@ namespace {
 char AAEval::ID = 0;
 INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
                 "Exhaustive Alias Analysis Precision Evaluator", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(AAEval, "aa-eval",
                 "Exhaustive Alias Analysis Precision Evaluator", false, true)
 
@@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) {
 }
 
 bool AAEval::runOnFunction(Function &F) {
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   SetVector<Value *> Pointers;
-  SetVector<CallSite> CallSites;
+  SmallSetVector<CallSite, 16> CallSites;
   SetVector<Value *> Loads;
   SetVector<Value *> Stores;
 
-  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
-    if (I->getType()->isPointerTy())    // Add all pointer arguments.
-      Pointers.insert(I);
+  for (auto &I : F.args())
+    if (I.getType()->isPointerTy())    // Add all pointer arguments.
+      Pointers.insert(&I);
 
   for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
     if (I->getType()->isPointerTy()) // Add all pointer instructions.
@@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) {
       if (!isa<Function>(Callee) && isInterestingPointer(Callee))
         Pointers.insert(Callee);
       // Consider formals.
-      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
-           AI != AE; ++AI)
-        if (isInterestingPointer(*AI))
-          Pointers.insert(*AI);
+      for (Use &DataOp : CS.data_ops())
+        if (isInterestingPointer(DataOp))
+          Pointers.insert(DataOp);
       CallSites.insert(CS);
     } else {
       // Consider all operands.
@@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) {
        I1 != E; ++I1) {
     uint64_t I1Size = MemoryLocation::UnknownSize;
     Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
-    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+    if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);
 
     for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
       uint64_t I2Size = MemoryLocation::UnknownSize;
       Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
-      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+      if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);
 
       switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
       case NoAlias:
@@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) {
   }
 
   // Mod/ref alias analysis: compare all pairs of calls and values
-  for (SetVector<CallSite>::iterator C = CallSites.begin(),
-         Ce = CallSites.end(); C != Ce; ++C) {
+  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
     Instruction *I = C->getInstruction();
 
     for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
          V != Ve; ++V) {
       uint64_t Size = MemoryLocation::UnknownSize;
       Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
-      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+      if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);
 
       switch (AA.getModRefInfo(*C, *V, Size)) {
-      case AliasAnalysis::NoModRef:
+      case MRI_NoModRef:
         PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
         ++NoModRefCount;
         break;
-      case AliasAnalysis::Mod:
+      case MRI_Mod:
         PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
         ++ModCount;
         break;
-      case AliasAnalysis::Ref:
+      case MRI_Ref:
         PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
         ++RefCount;
         break;
-      case AliasAnalysis::ModRef:
+      case MRI_ModRef:
         PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
         ++ModRefCount;
         break;
@@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) {
   }
 
   // Mod/ref alias analysis: compare all pairs of calls
-  for (SetVector<CallSite>::iterator C = CallSites.begin(),
-         Ce = CallSites.end(); C != Ce; ++C) {
-    for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {
+    for (auto D = CallSites.begin(); D != Ce; ++D) {
       if (D == C)
         continue;
       switch (AA.getModRefInfo(*C, *D)) {
-      case AliasAnalysis::NoModRef:
+      case MRI_NoModRef:
         PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
         ++NoModRefCount;
         break;
-      case AliasAnalysis::Mod:
+      case MRI_Mod:
         PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
         ++ModCount;
         break;
-      case AliasAnalysis::Ref:
+      case MRI_Ref:
         PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
         ++RefCount;
         break;
-      case AliasAnalysis::ModRef:
+      case MRI_ModRef:
         PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
         ++ModRefCount;
         break;
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
deleted file mode 100644
index e5107b3bc827..000000000000
--- a/lib/Analysis/AliasDebugger.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass checks alias analysis users to ensure that if they
-// create a new value, they do not query AA without informing it of the value.
-// It acts as a shim over any other AA pass you want.
-//
-// Yes keeping track of every value in the program is expensive, but this is 
-// a debugging pass.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include <set>
-using namespace llvm;
-
-namespace {
-  
-  class AliasDebugger : public ModulePass, public AliasAnalysis {
-
-    //What we do is simple.  Keep track of every value the AA could
-    //know about, and verify that queries are one of those.
-    //A query to a value that didn't exist when the AA was created
-    //means someone forgot to update the AA when creating new values
-
-    std::set<const Value*> Vals;
-    
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    AliasDebugger() : ModulePass(ID) {
-      initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool runOnModule(Module &M) override {
-      InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class
-
-      for(Module::global_iterator I = M.global_begin(),
-            E = M.global_end(); I != E; ++I) {
-        Vals.insert(&*I);
-        for (User::const_op_iterator OI = I->op_begin(),
-             OE = I->op_end(); OI != OE; ++OI)
-          Vals.insert(*OI);
-      }
-
-      for(Module::iterator I = M.begin(),
-            E = M.end(); I != E; ++I){
-        Vals.insert(&*I);
-        if(!I->isDeclaration()) {
-          for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
-               AI != AE; ++AI) 
-            Vals.insert(&*AI);     
-          for (Function::const_iterator FI = I->begin(), FE = I->end();
-               FI != FE; ++FI) 
-            for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
-                 BI != BE; ++BI) {
-              Vals.insert(&*BI);
-              for (User::const_op_iterator OI = BI->op_begin(),
-                   OE = BI->op_end(); OI != OE; ++OI)
-                Vals.insert(*OI);
-            }
-        }
-        
-      }
-      return false;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AliasAnalysis::getAnalysisUsage(AU);
-      AU.setPreservesAll();                         // Does not transform code
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-    
-    //------------------------------------------------
-    // Implement the AliasAnalysis API
-    //
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override {
-      assert(Vals.find(LocA.Ptr) != Vals.end() &&
-             "Never seen value in AA before");
-      assert(Vals.find(LocB.Ptr) != Vals.end() &&
-             "Never seen value in AA before");
-      return AliasAnalysis::alias(LocA, LocB);
-    }
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override {
-      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
-      return AliasAnalysis::getModRefInfo(CS, Loc);
-    }
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override {
-      return AliasAnalysis::getModRefInfo(CS1,CS2);
-    }
-
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override {
-      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
-      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
-    }
-
-    void deleteValue(Value *V) override {
-      assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
-      AliasAnalysis::deleteValue(V);
-    }
-
-  };
-}
-
-char AliasDebugger::ID = 0;
-INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
-                   "AA use debugger", false, true, false)
-
-Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
-
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 54d0f4304e1f..3094049b3cc3 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -17,6 +17,7 @@
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
@@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
   if (!UnknownInsts.empty()) {
     for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)
       if (AA.getModRefInfo(UnknownInsts[i],
-                           MemoryLocation(Ptr, Size, AAInfo)) !=
-          AliasAnalysis::NoModRef)
+                           MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)
         return true;
   }
 
@@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
 
   for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {
     ImmutableCallSite C1(getUnknownInst(i)), C2(Inst);
-    if (!C1 || !C2 ||
-        AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef ||
-        AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef)
+    if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef ||
+        AA.getModRefInfo(C2, C1) != MRI_NoModRef)
       return true;
   }
 
   for (iterator I = begin(), E = end(); I != E; ++I)
-    if (AA.getModRefInfo(
-            Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) !=
-        AliasAnalysis::NoModRef)
+    if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(),
+                                              I.getAAInfo())) != MRI_NoModRef)
       return true;
 
   return false;
@@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
     if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;
     
     if (!FoundSet) {      // If this is the first alias set ptr can go into.
-      FoundSet = Cur;     // Remember it.
+      FoundSet = &*Cur;   // Remember it.
     } else {              // Otherwise, we must merge the sets.
       FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
     }
@@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
     if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))
       continue;
     if (!FoundSet)            // If this is the first alias set ptr can go into.
-      FoundSet = Cur;         // Remember it.
+      FoundSet = &*Cur;       // Remember it.
     else if (!Cur->Forward)   // Otherwise, we must merge the sets.
       FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.
   }
@@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) {
 
   AliasSet::AccessLattice Access = AliasSet::RefAccess;
   bool NewPtr;
+  const DataLayout &DL = LI->getModule()->getDataLayout();
   AliasSet &AS = addPointer(LI->getOperand(0),
-                            AA.getTypeStoreSize(LI->getType()),
+                            DL.getTypeStoreSize(LI->getType()),
                             AAInfo, Access, NewPtr);
   if (LI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) {
 
   AliasSet::AccessLattice Access = AliasSet::ModAccess;
   bool NewPtr;
+  const DataLayout &DL = SI->getModule()->getDataLayout();
   Value *Val = SI->getOperand(0);
   AliasSet &AS = addPointer(SI->getOperand(1),
-                            AA.getTypeStoreSize(Val->getType()),
+                            DL.getTypeStoreSize(Val->getType()),
                             AAInfo, Access, NewPtr);
   if (SI->isVolatile()) AS.setVolatile();
   return NewPtr;
@@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) {
 }
 
 void AliasSetTracker::add(BasicBlock &BB) {
-  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
-    add(I);
+  for (auto &I : BB)
+    add(&I);
 }
 
 void AliasSetTracker::add(const AliasSetTracker &AST) {
@@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
 }
 
 bool AliasSetTracker::remove(LoadInst *LI) {
-  uint64_t Size = AA.getTypeStoreSize(LI->getType());
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(LI->getType());
 
   AAMDNodes AAInfo;
   LI->getAAMetadata(AAInfo);
@@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) {
 }
 
 bool AliasSetTracker::remove(StoreInst *SI) {
-  uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+  const DataLayout &DL = SI->getModule()->getDataLayout();
+  uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
 
   AAMDNodes AAInfo;
   SI->getAAMetadata(AAInfo);
@@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) {
 // dangling pointers to deleted instructions.
 //
 void AliasSetTracker::deleteValue(Value *PtrVal) {
-  // Notify the alias analysis implementation that this value is gone.
-  AA.deleteValue(PtrVal);
-
   // If this is a call instruction, remove the callsite from the appropriate
   // AliasSet (if present).
   if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) {
@@ -650,11 +649,12 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
     }
 
     bool runOnFunction(Function &F) override {
-      Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+      auto &AAWP = getAnalysis<AAResultsWrapperPass>();
+      Tracker = new AliasSetTracker(AAWP.getAAResults());
 
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
         Tracker->add(&*I);
@@ -668,6 +668,6 @@ namespace {
 char AliasSetPrinter::ID = 0;
 INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
                 "Alias Set Printer", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
                 "Alias Set Printer", false, true)
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index 842ff0a14e2f..9c1ac000be2c 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -20,23 +20,23 @@ using namespace llvm;
 
 /// initializeAnalysis - Initialize all passes linked into the Analysis library.
 void llvm::initializeAnalysis(PassRegistry &Registry) {
-  initializeAliasAnalysisAnalysisGroup(Registry);
-  initializeAliasAnalysisCounterPass(Registry);
   initializeAAEvalPass(Registry);
-  initializeAliasDebuggerPass(Registry);
   initializeAliasSetPrinterPass(Registry);
-  initializeNoAAPass(Registry);
-  initializeBasicAliasAnalysisPass(Registry);
-  initializeBlockFrequencyInfoPass(Registry);
-  initializeBranchProbabilityInfoPass(Registry);
+  initializeBasicAAWrapperPassPass(Registry);
+  initializeBlockFrequencyInfoWrapperPassPass(Registry);
+  initializeBranchProbabilityInfoWrapperPassPass(Registry);
+  initializeCallGraphWrapperPassPass(Registry);
+  initializeCallGraphPrinterPass(Registry);
+  initializeCallGraphViewerPass(Registry);
   initializeCostModelAnalysisPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
   initializeCFGOnlyViewerPass(Registry);
   initializeCFGOnlyPrinterPass(Registry);
-  initializeCFLAliasAnalysisPass(Registry);
+  initializeCFLAAWrapperPassPass(Registry);
   initializeDependenceAnalysisPass(Registry);
   initializeDelinearizationPass(Registry);
+  initializeDemandedBitsPass(Registry);
   initializeDivergenceAnalysisPass(Registry);
   initializeDominanceFrontierPass(Registry);
   initializeDomViewerPass(Registry);
@@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializePostDomPrinterPass(Registry);
   initializePostDomOnlyViewerPass(Registry);
   initializePostDomOnlyPrinterPass(Registry);
+  initializeAAResultsWrapperPassPass(Registry);
+  initializeGlobalsAAWrapperPassPass(Registry);
   initializeIVUsersPass(Registry);
   initializeInstCountPass(Registry);
   initializeIntervalPartitionPass(Registry);
   initializeLazyValueInfoPass(Registry);
-  initializeLibCallAliasAnalysisPass(Registry);
   initializeLintPass(Registry);
   initializeLoopInfoWrapperPassPass(Registry);
   initializeMemDepPrinterPass(Registry);
   initializeMemDerefPrinterPass(Registry);
   initializeMemoryDependenceAnalysisPass(Registry);
   initializeModuleDebugInfoPrinterPass(Registry);
+  initializeObjCARCAAWrapperPassPass(Registry);
   initializePostDominatorTreePass(Registry);
   initializeRegionInfoPassPass(Registry);
   initializeRegionViewerPass(Registry);
   initializeRegionPrinterPass(Registry);
   initializeRegionOnlyViewerPass(Registry);
   initializeRegionOnlyPrinterPass(Registry);
-  initializeScalarEvolutionPass(Registry);
-  initializeScalarEvolutionAliasAnalysisPass(Registry);
+  initializeSCEVAAWrapperPassPass(Registry);
+  initializeScalarEvolutionWrapperPassPass(Registry);
   initializeTargetTransformInfoWrapperPassPass(Registry);
-  initializeTypeBasedAliasAnalysisPass(Registry);
-  initializeScopedNoAliasAAPass(Registry);
+  initializeTypeBasedAAWrapperPassPass(Registry);
+  initializeScopedNoAliasAAWrapperPassPass(Registry);
 }
 
 void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
   initializeAnalysis(*unwrap(R));
 }
 
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
 LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
                           char **OutMessages) {
   raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 35863542f437..00f346ea115d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -13,24 +13,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
@@ -42,6 +39,18 @@
 #include <algorithm>
 using namespace llvm;
 
+/// Enable analysis of recursive PHI nodes.
+static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden,
+                                          cl::init(false));
+
+/// SearchLimitReached / SearchTimes shows how often the limit of
+/// to decompose GEPs is reached. It will affect the precision
+/// of basic alias analysis.
+#define DEBUG_TYPE "basicaa"
+STATISTIC(SearchLimitReached, "Number of times the limit to "
+                              "decompose GEPs is reached");
+STATISTIC(SearchTimes, "Number of times a GEP is decomposed");
+
 /// Cutoff after which to stop analysing a set of phi nodes potentially involved
 /// in a cycle. Because we are analysing 'through' phi nodes we need to be
 /// careful with value equivalence. We use reachability to make sure a value
@@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6;
 // Useful predicates
 //===----------------------------------------------------------------------===//
 
-/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
-/// object that never escapes from the function.
+/// Returns true if the pointer is to a function-local object that never
+/// escapes from the function.
 static bool isNonEscapingLocalObject(const Value *V) {
   // If this is a local allocation, check to see if it escapes.
   if (isa<AllocaInst>(V) || isNoAliasCall(V))
@@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) {
   return false;
 }
 
-/// isEscapeSource - Return true if the pointer is one which would have
-/// been considered an escape by isNonEscapingLocalObject.
+/// Returns true if the pointer is one which would have been considered an
+/// escape by isNonEscapingLocalObject.
 static bool isEscapeSource(const Value *V) {
   if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
     return true;
@@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) {
   return false;
 }
 
-/// getObjectSize - Return the size of the object specified by V, or
-/// UnknownSize if unknown.
+/// Returns the size of the object specified by V, or UnknownSize if unknown.
 static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
                               const TargetLibraryInfo &TLI,
                               bool RoundToAlign = false) {
@@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,
   return MemoryLocation::UnknownSize;
 }
 
-/// isObjectSmallerThan - Return true if we can prove that the object specified
-/// by V is smaller than Size.
+/// Returns true if we can prove that the object specified by V is smaller than
+/// Size.
 static bool isObjectSmallerThan(const Value *V, uint64_t Size,
                                 const DataLayout &DL,
                                 const TargetLibraryInfo &TLI) {
@@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,
 
   // This function needs to use the aligned object size because we allow
   // reads a bit past the end given sufficient alignment.
-  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true);
+  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);
 
   return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;
 }
 
-/// isObjectSize - Return true if we can prove that the object specified
-/// by V has size Size.
-static bool isObjectSize(const Value *V, uint64_t Size,
-                         const DataLayout &DL, const TargetLibraryInfo &TLI) {
+/// Returns true if we can prove that the object specified by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
+                         const TargetLibraryInfo &TLI) {
   uint64_t ObjectSize = getObjectSize(V, DL, TLI);
   return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;
 }
@@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size,
 // GetElementPtr Instruction Decomposition and Analysis
 //===----------------------------------------------------------------------===//
 
-namespace {
-  enum ExtensionKind {
-    EK_NotExtended,
-    EK_SignExt,
-    EK_ZeroExt
-  };
-
-  struct VariableGEPIndex {
-    const Value *V;
-    ExtensionKind Extension;
-    int64_t Scale;
-
-    bool operator==(const VariableGEPIndex &Other) const {
-      return V == Other.V && Extension == Other.Extension &&
-        Scale == Other.Scale;
-    }
-
-    bool operator!=(const VariableGEPIndex &Other) const {
-      return !operator==(Other);
-    }
-  };
-}
-
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers.  Return the scale and offset
-/// values as APInts and return V as a Value*, and return whether we looked
-/// through any sign or zero extends.  The incoming Value is known to have
-/// IntegerType and it may already be sign or zero extended.
+/// Analyzes the specified value as a linear expression: "A*V + B", where A and
+/// B are constant integers.
+///
+/// Returns the scale and offset values as APInts and return V as a Value*, and
+/// return whether we looked through any sign or zero extends.  The incoming
+/// Value is known to have IntegerType and it may already be sign or zero
+/// extended.
 ///
 /// Note that this looks through extends, so the high bits may not be
 /// represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
-                                  ExtensionKind &Extension,
-                                  const DataLayout &DL, unsigned Depth,
-                                  AssumptionCache *AC, DominatorTree *DT) {
+/*static*/ const Value *BasicAAResult::GetLinearExpression(
+    const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits,
+    unsigned &SExtBits, const DataLayout &DL, unsigned Depth,
+    AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) {
   assert(V->getType()->isIntegerTy() && "Not an integer value");
 
   // Limit our recursion depth.
@@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
     return V;
   }
 
-  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+  if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) {
+    // if it's a constant, just convert it to an offset and remove the variable.
+    // If we've been called recursively the Offset bit width will be greater
+    // than the constant's (the Offset's always as wide as the outermost call),
+    // so we'll zext here and process any extension in the isa<SExtInst> &
+    // isa<ZExtInst> cases below.
+    Offset += Const->getValue().zextOrSelf(Offset.getBitWidth());
+    assert(Scale == 0 && "Constant values don't have a scale");
+    return V;
+  }
+
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+
+      // If we've been called recursively then Offset and Scale will be wider
+      // that the BOp operands. We'll always zext it here as we'll process sign
+      // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases).
+      APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth());
+
       switch (BOp->getOpcode()) {
-      default: break;
+      default:
+        // We don't understand this instruction, so we can't decompose it any
+        // further.
+        Scale = 1;
+        Offset = 0;
+        return V;
       case Instruction::Or:
         // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
         // analyze it.
         if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC,
-                               BOp, DT))
-          break;
-        // FALL THROUGH.
+                               BOp, DT)) {
+          Scale = 1;
+          Offset = 0;
+          return V;
+        }
+      // FALL THROUGH.
       case Instruction::Add:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                DL, Depth + 1, AC, DT);
-        Offset += RHSC->getValue();
-        return V;
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset += RHS;
+        break;
+      case Instruction::Sub:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset -= RHS;
+        break;
       case Instruction::Mul:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                DL, Depth + 1, AC, DT);
-        Offset *= RHSC->getValue();
-        Scale *= RHSC->getValue();
-        return V;
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset *= RHS;
+        Scale *= RHS;
+        break;
       case Instruction::Shl:
-        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
-                                DL, Depth + 1, AC, DT);
-        Offset <<= RHSC->getValue().getLimitedValue();
-        Scale <<= RHSC->getValue().getLimitedValue();
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
+                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
+        Offset <<= RHS.getLimitedValue();
+        Scale <<= RHS.getLimitedValue();
+        // the semantics of nsw and nuw for left shifts don't match those of
+        // multiplications, so we won't propagate them.
+        NSW = NUW = false;
         return V;
       }
+
+      if (isa<OverflowingBinaryOperator>(BOp)) {
+        NUW &= BOp->hasNoUnsignedWrap();
+        NSW &= BOp->hasNoSignedWrap();
+      }
+      return V;
     }
   }
 
   // Since GEP indices are sign extended anyway, we don't care about the high
   // bits of a sign or zero extended value - just scales and offsets.  The
   // extensions have to be consistent though.
-  if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
-      (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
     Value *CastOp = cast<CastInst>(V)->getOperand(0);
-    unsigned OldWidth = Scale.getBitWidth();
+    unsigned NewWidth = V->getType()->getPrimitiveSizeInBits();
     unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
-    Scale = Scale.trunc(SmallWidth);
-    Offset = Offset.trunc(SmallWidth);
-    Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+    unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits;
+    const Value *Result =
+        GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
+                            Depth + 1, AC, DT, NSW, NUW);
 
-    Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL,
-                                        Depth + 1, AC, DT);
-    Scale = Scale.zext(OldWidth);
-    Offset = Offset.zext(OldWidth);
+    // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+    // by just incrementing the number of bits we've extended by.
+    unsigned ExtendedBy = NewWidth - SmallWidth;
+
+    if (isa<SExtInst>(V) && ZExtBits == 0) {
+      // sext(sext(%x, a), b) == sext(%x, a + b)
+
+      if (NSW) {
+        // We haven't sign-wrapped, so it's valid to decompose sext(%x + c)
+        // into sext(%x) + sext(c). We'll sext the Offset ourselves:
+        unsigned OldWidth = Offset.getBitWidth();
+        Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth);
+      } else {
+        // We may have signed-wrapped, so don't decompose sext(%x + c) into
+        // sext(%x) + sext(c)
+        Scale = 1;
+        Offset = 0;
+        Result = CastOp;
+        ZExtBits = OldZExtBits;
+        SExtBits = OldSExtBits;
+      }
+      SExtBits += ExtendedBy;
+    } else {
+      // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b)
+
+      if (!NUW) {
+        // We may have unsigned-wrapped, so don't decompose zext(%x + c) into
+        // zext(%x) + zext(c)
+        Scale = 1;
+        Offset = 0;
+        Result = CastOp;
+        ZExtBits = OldZExtBits;
+        SExtBits = OldSExtBits;
+      }
+      ZExtBits += ExtendedBy;
+    }
 
     return Result;
   }
@@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
   return V;
 }
 
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
+/// If V is a symbolic pointer expression, decompose it into a base pointer
+/// with a constant offset and a number of scaled symbolic offsets.
 ///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale
+/// in the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As
+/// such, the gep cannot necessarily be reconstructed from its decomposed form.
 ///
 /// When DataLayout is around, this function is capable of analyzing everything
 /// that GetUnderlyingObject can look through. To be able to do that
 /// GetUnderlyingObject and DecomposeGEPExpression must use the same search
-/// depth (MaxLookupSearchDepth).
-/// When DataLayout not is around, it just looks through pointer casts.
-///
-static const Value *
-DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
-                       SmallVectorImpl<VariableGEPIndex> &VarIndices,
-                       bool &MaxLookupReached, const DataLayout &DL,
-                       AssumptionCache *AC, DominatorTree *DT) {
+/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks
+/// through pointer casts.
+/*static*/ const Value *BasicAAResult::DecomposeGEPExpression(
+    const Value *V, int64_t &BaseOffs,
+    SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached,
+    const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {
   // Limit recursion depth to limit compile time in crazy cases.
   unsigned MaxLookup = MaxLookupSearchDepth;
   MaxLookupReached = false;
+  SearchTimes++;
 
   BaseOffs = 0;
   do {
@@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
         // updated when GetUnderlyingObject is updated). TLI should be
         // provided also.
         if (const Value *Simplified =
-              SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
+                SimplifyInstruction(const_cast<Instruction *>(I), DL)) {
           V = Simplified;
           continue;
         }
@@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
     unsigned AS = GEPOp->getPointerAddressSpace();
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
-    for (User::const_op_iterator I = GEPOp->op_begin()+1,
-         E = GEPOp->op_end(); I != E; ++I) {
-      Value *Index = *I;
+    for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
+         I != E; ++I) {
+      const Value *Index = *I;
       // Compute the (potentially symbolic) offset in bytes for this index.
       if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
         // For a struct, add the member offset.
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
-        if (FieldNo == 0) continue;
+        if (FieldNo == 0)
+          continue;
 
         BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);
         continue;
       }
 
       // For an array/pointer, add the element offset, explicitly scaled.
-      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
-        if (CIdx->isZero()) continue;
+      if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+        if (CIdx->isZero())
+          continue;
         BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
         continue;
       }
 
       uint64_t Scale = DL.getTypeAllocSize(*GTI);
-      ExtensionKind Extension = EK_NotExtended;
+      unsigned ZExtBits = 0, SExtBits = 0;
 
       // If the integer type is smaller than the pointer size, it is implicitly
       // sign extended to pointer size.
       unsigned Width = Index->getType()->getIntegerBitWidth();
-      if (DL.getPointerSizeInBits(AS) > Width)
-        Extension = EK_SignExt;
+      unsigned PointerSize = DL.getPointerSizeInBits(AS);
+      if (PointerSize > Width)
+        SExtBits += PointerSize - Width;
 
       // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
       APInt IndexScale(Width, 0), IndexOffset(Width, 0);
-      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL,
-                                  0, AC, DT);
+      bool NSW = true, NUW = true;
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits,
+                                  SExtBits, DL, 0, AC, DT, NSW, NUW);
 
       // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
       // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
-      BaseOffs += IndexOffset.getSExtValue()*Scale;
+      BaseOffs += IndexOffset.getSExtValue() * Scale;
       Scale *= IndexScale.getSExtValue();
 
       // If we already had an occurrence of this index variable, merge this
@@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
       for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
-        if (VarIndices[i].V == Index &&
-            VarIndices[i].Extension == Extension) {
+        if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits &&
+            VarIndices[i].SExtBits == SExtBits) {
           Scale += VarIndices[i].Scale;
-          VarIndices.erase(VarIndices.begin()+i);
+          VarIndices.erase(VarIndices.begin() + i);
           break;
         }
       }
 
       // Make sure that we have a scale that makes sense for this target's
       // pointer size.
-      if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) {
+      if (unsigned ShiftBits = 64 - PointerSize) {
         Scale <<= ShiftBits;
         Scale = (int64_t)Scale >> ShiftBits;
       }
 
       if (Scale) {
-        VariableGEPIndex Entry = {Index, Extension,
+        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,
                                   static_cast<int64_t>(Scale)};
         VarIndices.push_back(Entry);
       }
@@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
 
   // If the chain of expressions is too deep, just return early.
   MaxLookupReached = true;
+  SearchLimitReached++;
   return V;
 }
 
-//===----------------------------------------------------------------------===//
-// BasicAliasAnalysis Pass
-//===----------------------------------------------------------------------===//
-
-#ifndef NDEBUG
-static const Function *getParent(const Value *V) {
-  if (const Instruction *inst = dyn_cast<Instruction>(V))
-    return inst->getParent()->getParent();
-
-  if (const Argument *arg = dyn_cast<Argument>(V))
-    return arg->getParent();
-
-  return nullptr;
-}
-
-static bool notDifferentParent(const Value *O1, const Value *O2) {
-
-  const Function *F1 = getParent(O1);
-  const Function *F2 = getParent(O2);
-
-  return !F1 || !F2 || F1 == F2;
-}
-#endif
-
-namespace {
-  /// BasicAliasAnalysis - This is the primary alias analysis implementation.
-  struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
-    static char ID; // Class identification, replacement for typeinfo
-    BasicAliasAnalysis() : ImmutablePass(ID) {
-      initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-
-    bool doInitialization(Module &M) override;
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AliasAnalysis>();
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addRequired<TargetLibraryInfoWrapperPass>();
-    }
-
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override {
-      assert(AliasCache.empty() && "AliasCache must be cleared after use!");
-      assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
-             "BasicAliasAnalysis doesn't support interprocedural queries.");
-      AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags,
-                                     LocB.Ptr, LocB.Size, LocB.AATags);
-      // AliasCache rarely has more than 1 or 2 elements, always use
-      // shrink_and_clear so it quickly returns to the inline capacity of the
-      // SmallDenseMap if it ever grows larger.
-      // FIXME: This should really be shrink_to_inline_capacity_and_clear().
-      AliasCache.shrink_and_clear();
-      VisitedPhiBBs.clear();
-      return Alias;
-    }
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override;
-
-    /// pointsToConstantMemory - Chase pointers until we find a (constant
-    /// global) or not.
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override;
-
-    /// Get the location associated with a pointer argument of a callsite.
-    ModRefResult getArgModRefInfo(ImmutableCallSite CS,
-                                  unsigned ArgIdx) override;
-
-    /// getModRefBehavior - Return the behavior when calling the given
-    /// call site.
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-
-    /// getModRefBehavior - Return the behavior when calling the given function.
-    /// For use when the call site is not known.
-    ModRefBehavior getModRefBehavior(const Function *F) override;
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(const void *ID) override {
-      if (ID == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-
-  private:
-    // AliasCache - Track alias queries to guard against recursion.
-    typedef std::pair<MemoryLocation, MemoryLocation> LocPair;
-    typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy;
-    AliasCacheTy AliasCache;
-
-    /// \brief Track phi nodes we have visited. When interpret "Value" pointer
-    /// equality as value equality we need to make sure that the "Value" is not
-    /// part of a cycle. Otherwise, two uses could come from different
-    /// "iterations" of a cycle and see different values for the same "Value"
-    /// pointer.
-    /// The following example shows the problem:
-    ///   %p = phi(%alloca1, %addr2)
-    ///   %l = load %ptr
-    ///   %addr1 = gep, %alloca2, 0, %l
-    ///   %addr2 = gep  %alloca2, 0, (%l + 1)
-    ///      alias(%p, %addr1) -> MayAlias !
-    ///   store %l, ...
-    SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs;
-
-    // Visited - Track instructions visited by pointsToConstantMemory.
-    SmallPtrSet<const Value*, 16> Visited;
-
-    /// \brief Check whether two Values can be considered equivalent.
-    ///
-    /// In addition to pointer equivalence of \p V1 and \p V2 this checks
-    /// whether they can not be part of a cycle in the value graph by looking at
-    /// all visited phi nodes an making sure that the phis cannot reach the
-    /// value. We have to do this because we are looking through phi nodes (That
-    /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
-    bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2);
-
-    /// \brief Dest and Src are the variable indices from two decomposed
-    /// GetElementPtr instructions GEP1 and GEP2 which have common base
-    /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-    /// difference between the two pointers.
-    void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
-                            const SmallVectorImpl<VariableGEPIndex> &Src);
-
-    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
-    // instruction against another.
-    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
-                         const AAMDNodes &V1AAInfo,
-                         const Value *V2, uint64_t V2Size,
-                         const AAMDNodes &V2AAInfo,
-                         const Value *UnderlyingV1, const Value *UnderlyingV2);
-
-    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
-    // instruction against another.
-    AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
-                         const AAMDNodes &PNAAInfo,
-                         const Value *V2, uint64_t V2Size,
-                         const AAMDNodes &V2AAInfo);
-
-    /// aliasSelect - Disambiguate a Select instruction against another value.
-    AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
-                            const AAMDNodes &SIAAInfo,
-                            const Value *V2, uint64_t V2Size,
-                            const AAMDNodes &V2AAInfo);
-
-    AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
-                           AAMDNodes V1AATag,
-                           const Value *V2, uint64_t V2Size,
-                           AAMDNodes V2AATag);
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char BasicAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa",
-                   "Basic Alias Analysis (stateless AA impl)",
-                   false, true, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa",
-                   "Basic Alias Analysis (stateless AA impl)",
-                   false, true, false)
-
-
-ImmutablePass *llvm::createBasicAliasAnalysisPass() {
-  return new BasicAliasAnalysis();
-}
-
-/// pointsToConstantMemory - Returns whether the given pointer value
-/// points to memory that is local to the function, with global constants being
-/// considered local to all functions.
-bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
-                                                bool OrLocal) {
+/// Returns whether the given pointer value points to memory that is local to
+/// the function, with global constants being considered local to all
+/// functions.
+bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                           bool OrLocal) {
   assert(Visited.empty() && "Visited must be cleared after use!");
 
   unsigned MaxLookup = 8;
   SmallVector<const Value *, 16> Worklist;
   Worklist.push_back(Loc.Ptr);
   do {
-    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL);
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);
     if (!Visited.insert(V).second) {
       Visited.clear();
-      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
     }
 
     // An alloca instruction defines local memory.
@@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
       // others.  GV may even be a declaration, not a definition.
       if (!GV->isConstant()) {
         Visited.clear();
-        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
       }
       continue;
     }
@@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
       // Don't bother inspecting phi nodes with many operands.
       if (PN->getNumIncomingValues() > MaxLookup) {
         Visited.clear();
-        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
       }
       for (Value *IncValue : PN->incoming_values())
         Worklist.push_back(IncValue);
@@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
 
     // Otherwise be conservative.
     Visited.clear();
-    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
 
   } while (!Worklist.empty() && --MaxLookup);
 
@@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS,
   return false;
 }
 
-/// getModRefBehavior - Return the behavior when calling the given call site.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+/// Returns the behavior when calling the given call site.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {
   if (CS.doesNotAccessMemory())
     // Can't do better than this.
-    return DoesNotAccessMemory;
+    return FMRB_DoesNotAccessMemory;
 
-  ModRefBehavior Min = UnknownModRefBehavior;
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
 
   // If the callsite knows it only reads memory, don't return worse
   // than that.
   if (CS.onlyReadsMemory())
-    Min = OnlyReadsMemory;
+    Min = FMRB_OnlyReadsMemory;
 
   if (CS.onlyAccessesArgMemory())
-    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
 
-  // The AliasAnalysis base class has some smarts, lets use them.
-  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+  // The AAResultBase base class has some smarts, lets use them.
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
 }
 
-/// getModRefBehavior - Return the behavior when calling the given function.
-/// For use when the call site is not known.
-AliasAnalysis::ModRefBehavior
-BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+/// Returns the behavior when calling the given function. For use when the call
+/// site is not known.
+FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
   // If the function declares it doesn't access memory, we can't do better.
   if (F->doesNotAccessMemory())
-    return DoesNotAccessMemory;
+    return FMRB_DoesNotAccessMemory;
 
-  // For intrinsics, we can check the table.
-  if (Intrinsic::ID iid = F->getIntrinsicID()) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/IR/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
-  }
-
-  ModRefBehavior Min = UnknownModRefBehavior;
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
 
   // If the function declares it only reads memory, go with that.
   if (F->onlyReadsMemory())
-    Min = OnlyReadsMemory;
+    Min = FMRB_OnlyReadsMemory;
 
   if (F->onlyAccessesArgMemory())
-    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees);
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
 
-  const TargetLibraryInfo &TLI =
-      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   if (isMemsetPattern16(F, TLI))
-    Min = OnlyAccessesArgumentPointees;
+    Min = FMRB_OnlyAccessesArgumentPointees;
 
   // Otherwise be conservative.
-  return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
 }
 
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+                                           unsigned ArgIdx) {
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
     switch (II->getIntrinsicID()) {
     default:
@@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
     case Intrinsic::memmove:
       assert((ArgIdx == 0 || ArgIdx == 1) &&
              "Invalid argument index for memory intrinsic");
-      return ArgIdx ? Ref : Mod;
+      return ArgIdx ? MRI_Ref : MRI_Mod;
     }
 
   // We can bound the aliasing properties of memset_pattern16 just as we can
@@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {
   // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16
   // whenever possible.
   if (CS.getCalledFunction() &&
-      isMemsetPattern16(CS.getCalledFunction(), *TLI)) {
+      isMemsetPattern16(CS.getCalledFunction(), TLI)) {
     assert((ArgIdx == 0 || ArgIdx == 1) &&
            "Invalid argument index for memset_pattern16");
-    return ArgIdx ? Ref : Mod;
+    return ArgIdx ? MRI_Ref : MRI_Mod;
   }
   // FIXME: Handle memset_pattern4 and memset_pattern8 also.
 
-  return AliasAnalysis::getArgModRefInfo(CS, ArgIdx);
+  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
+    return MRI_Ref;
+
+  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone))
+    return MRI_NoModRef;
+
+  return AAResultBase::getArgModRefInfo(CS, ArgIdx);
 }
 
 static bool isAssumeIntrinsic(ImmutableCallSite CS) {
   const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
-  if (II && II->getIntrinsicID() == Intrinsic::assume)
-    return true;
-
-  return false;
+  return II && II->getIntrinsicID() == Intrinsic::assume;
 }
 
-bool BasicAliasAnalysis::doInitialization(Module &M) {
-  InitializeAliasAnalysis(this, &M.getDataLayout());
-  return true;
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+  if (const Instruction *inst = dyn_cast<Instruction>(V))
+    return inst->getParent()->getParent();
+
+  if (const Argument *arg = dyn_cast<Argument>(V))
+    return arg->getParent();
+
+  return nullptr;
 }
 
-/// getModRefInfo - Check to see if the specified callsite can clobber the
-/// specified memory object.  Since we only look at local properties of this
-/// function, we really can't say much about this query.  We do, however, use
-/// simple "address taken" analysis on local objects.
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                  const MemoryLocation &Loc) {
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+  const Function *F1 = getParent(O1);
+  const Function *F2 = getParent(O2);
+
+  return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
+                                 const MemoryLocation &LocB) {
+  assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+         "BasicAliasAnalysis doesn't support interprocedural queries.");
+
+  // If we have a directly cached entry for these locations, we have recursed
+  // through this once, so just return the cached results. Notably, when this
+  // happens, we don't clear the cache.
+  auto CacheIt = AliasCache.find(LocPair(LocA, LocB));
+  if (CacheIt != AliasCache.end())
+    return CacheIt->second;
+
+  AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr,
+                                 LocB.Size, LocB.AATags);
+  // AliasCache rarely has more than 1 or 2 elements, always use
+  // shrink_and_clear so it quickly returns to the inline capacity of the
+  // SmallDenseMap if it ever grows larger.
+  // FIXME: This should really be shrink_to_inline_capacity_and_clear().
+  AliasCache.shrink_and_clear();
+  VisitedPhiBBs.clear();
+  return Alias;
+}
+
+/// Checks to see if the specified callsite can clobber the specified memory
+/// object.
+///
+/// Since we only look at local properties of this function, we really can't
+/// say much about this query.  We do, however, use simple "address taken"
+/// analysis on local objects.
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
+                                        const MemoryLocation &Loc) {
   assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
          "AliasAnalysis query involving multiple functions!");
 
-  const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL);
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);
 
   // If this is a tail call and Loc.Ptr points to a stack location, we know that
   // the tail call cannot access or modify the local stack.
@@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
   if (isa<AllocaInst>(Object))
     if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
       if (CI->isTailCall())
-        return NoModRef;
+        return MRI_NoModRef;
 
   // If the pointer is to a locally allocated object that does not escape,
   // then the call can not mod/ref the pointer unless the call takes the pointer
@@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
       // is impossible to alias the pointer we're checking.  If not, we have to
       // assume that the call could touch the pointer, even though it doesn't
       // escape.
-      if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) {
+      AliasResult AR =
+          getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object));
+      if (AR) {
         PassedAsArg = true;
         break;
       }
     }
 
     if (!PassedAsArg)
-      return NoModRef;
+      return MRI_NoModRef;
   }
 
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
   if (isAssumeIntrinsic(CS))
-    return NoModRef;
+    return MRI_NoModRef;
 
-  // The AliasAnalysis base class has some smarts, lets use them.
-  return AliasAnalysis::getModRefInfo(CS, Loc);
+  // The AAResultBase base class has some smarts, lets use them.
+  return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
-                                  ImmutableCallSite CS2) {
+ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                        ImmutableCallSite CS2) {
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
   if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2))
-    return NoModRef;
+    return MRI_NoModRef;
 
-  // The AliasAnalysis base class has some smarts, lets use them.
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
+  // The AAResultBase base class has some smarts, lets use them.
+  return AAResultBase::getModRefInfo(CS1, CS2);
 }
 
-/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP
-/// operators, both having the exact same pointer operand.
+/// Provide ad-hoc rules to disambiguate accesses through two GEP operators,
+/// both having the exact same pointer operand.
 static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
                                             uint64_t V1Size,
                                             const GEPOperator *GEP2,
@@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
   ConstantInt *C2 =
       dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1));
 
-  // If the last (struct) indices aren't constants, we can't say anything.
-  // If they're identical, the other indices might be also be dynamically
-  // equal, so the GEPs can alias.
-  if (!C1 || !C2 || C1 == C2)
+  // If the last (struct) indices are constants and are equal, the other indices
+  // might be also be dynamically equal, so the GEPs can alias.
+  if (C1 && C2 && C1 == C2)
     return MayAlias;
 
   // Find the last-indexed type of the GEP, i.e., the type you'd get if
@@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
     IntermediateIndices.push_back(GEP1->getOperand(i + 1));
   }
 
-  StructType *LastIndexedStruct =
-      dyn_cast<StructType>(GetElementPtrInst::getIndexedType(
-          GEP1->getSourceElementType(), IntermediateIndices));
+  auto *Ty = GetElementPtrInst::getIndexedType(
+    GEP1->getSourceElementType(), IntermediateIndices);
+  StructType *LastIndexedStruct = dyn_cast<StructType>(Ty);
 
-  if (!LastIndexedStruct)
+  if (isa<SequentialType>(Ty)) {
+    // We know that:
+    // - both GEPs begin indexing from the exact same pointer;
+    // - the last indices in both GEPs are constants, indexing into a sequential
+    //   type (array or pointer);
+    // - both GEPs only index through arrays prior to that.
+    //
+    // Because array indices greater than the number of elements are valid in
+    // GEPs, unless we know the intermediate indices are identical between
+    // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't
+    // partially overlap. We also need to check that the loaded size matches
+    // the element size, otherwise we could still have overlap.
+    const uint64_t ElementSize =
+        DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType());
+    if (V1Size != ElementSize || V2Size != ElementSize)
+      return MayAlias;
+
+    for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i)
+      if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1))
+        return MayAlias;
+
+    // Now we know that the array/pointer that GEP1 indexes into and that
+    // that GEP2 indexes into must either precisely overlap or be disjoint.
+    // Because they cannot partially overlap and because fields in an array
+    // cannot overlap, if we can prove the final indices are different between
+    // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias.
+    
+    // If the last indices are constants, we've already checked they don't
+    // equal each other so we can exit early.
+    if (C1 && C2)
+      return NoAlias;
+    if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1),
+                        GEP2->getOperand(GEP2->getNumOperands() - 1),
+                        DL))
+      return NoAlias;
     return MayAlias;
+  } else if (!LastIndexedStruct || !C1 || !C2) {
+    return MayAlias;
+  }
 
   // We know that:
   // - both GEPs begin indexing from the exact same pointer;
@@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,
   return MayAlias;
 }
 
-/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
-/// against another pointer.  We know that V1 is a GEP, but we don't know
-/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
-/// UnderlyingV2 is the same for V2.
+/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against
+/// another pointer.
 ///
-AliasResult BasicAliasAnalysis::aliasGEP(
-    const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo,
-    const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo,
-    const Value *UnderlyingV1, const Value *UnderlyingV2) {
+/// We know that V1 is a GEP, but we don't know anything about V2.
+/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for
+/// V2.
+AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+                                    const AAMDNodes &V1AAInfo, const Value *V2,
+                                    uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                                    const Value *UnderlyingV1,
+                                    const Value *UnderlyingV2) {
   int64_t GEP1BaseOffset;
   bool GEP1MaxLookupReached;
   SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
 
-  // We have to get two AssumptionCaches here because GEP1 and V2 may be from
-  // different functions.
-  // FIXME: This really doesn't make any sense. We get a dominator tree below
-  // that can only refer to a single function. But this function (aliasGEP) is
-  // a method on an immutable pass that can be called when there *isn't*
-  // a single function. The old pass management layer makes this "work", but
-  // this isn't really a clean solution.
-  AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>();
-  AssumptionCache *AC1 = nullptr, *AC2 = nullptr;
-  if (auto *GEP1I = dyn_cast<Instruction>(GEP1))
-    AC1 = &ACT.getAssumptionCache(
-        const_cast<Function &>(*GEP1I->getParent()->getParent()));
-  if (auto *I2 = dyn_cast<Instruction>(V2))
-    AC2 = &ACT.getAssumptionCache(
-        const_cast<Function &>(*I2->getParent()->getParent()));
-
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
   // If we have two gep instructions with must-alias or not-alias'ing base
   // pointers, figure out if the indexes to the GEP tell us anything about the
   // derived pointer.
@@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP(
     // identical.
     if ((BaseAlias == MayAlias) && V1Size == V2Size) {
       // Do the base pointers alias assuming type and size.
-      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size,
-                                                V1AAInfo, UnderlyingV2,
-                                                V2Size, V2AAInfo);
+      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo,
+                                                UnderlyingV2, V2Size, V2AAInfo);
       if (PreciseBaseAlias == NoAlias) {
         // See if the computed offset from the common pointer tells us about the
         // relation of the resulting pointer.
@@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP(
         SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
         const Value *GEP2BasePtr =
             DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                                   GEP2MaxLookupReached, *DL, AC2, DT);
+                                   GEP2MaxLookupReached, DL, &AC, DT);
         const Value *GEP1BasePtr =
             DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                                   GEP1MaxLookupReached, *DL, AC1, DT);
+                                   GEP1MaxLookupReached, DL, &AC, DT);
         // DecomposeGEPExpression and GetUnderlyingObject should return the
         // same result except when DecomposeGEPExpression has no DataLayout.
+        // FIXME: They always have a DataLayout so this should become an
+        // assert.
         if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-          assert(!DL &&
-                 "DecomposeGEPExpression and GetUnderlyingObject disagree!");
           return MayAlias;
         }
         // If the max search depth is reached the result is undefined
@@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP(
 
     // If we get a No or May, then return it immediately, no amount of analysis
     // will improve this situation.
-    if (BaseAlias != MustAlias) return BaseAlias;
+    if (BaseAlias != MustAlias)
+      return BaseAlias;
 
     // Otherwise, we have a MustAlias.  Since the base pointers alias each other
     // exactly, see if the computed offset from the common pointer tells us
     // about the relation of the resulting pointer.
     const Value *GEP1BasePtr =
         DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, *DL, AC1, DT);
+                               GEP1MaxLookupReached, DL, &AC, DT);
 
     int64_t GEP2BaseOffset;
     bool GEP2MaxLookupReached;
     SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
     const Value *GEP2BasePtr =
         DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices,
-                               GEP2MaxLookupReached, *DL, AC2, DT);
+                               GEP2MaxLookupReached, DL, &AC, DT);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
+    // FIXME: They always have a DataLayout so this should become an assert.
     if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
-      assert(!DL &&
-             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
 
     // If we know the two GEPs are based off of the exact same pointer (and not
     // just the same underlying object), see if that tells us anything about
     // the resulting pointers.
-    if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
-      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL);
+    if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) {
+      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);
       // If we couldn't find anything interesting, don't abandon just yet.
       if (R != MayAlias)
         return R;
@@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(
 
     const Value *GEP1BasePtr =
         DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices,
-                               GEP1MaxLookupReached, *DL, AC1, DT);
+                               GEP1MaxLookupReached, DL, &AC, DT);
 
     // DecomposeGEPExpression and GetUnderlyingObject should return the
     // same result except when DecomposeGEPExpression has no DataLayout.
+    // FIXME: They always have a DataLayout so this should become an assert.
     if (GEP1BasePtr != UnderlyingV1) {
-      assert(!DL &&
-             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
       return MayAlias;
     }
     // If the max search depth is reached the result is undefined
@@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP(
     }
   }
 
-  // Try to distinguish something like &A[i][1] against &A[42][0].
-  // Grab the least significant bit set in any of the scales.
   if (!GEP1VariableIndices.empty()) {
     uint64_t Modulo = 0;
-    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i)
-      Modulo |= (uint64_t) GEP1VariableIndices[i].Scale;
+    bool AllPositive = true;
+    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) {
+
+      // Try to distinguish something like &A[i][1] against &A[42][0].
+      // Grab the least significant bit set in any of the scales. We
+      // don't need std::abs here (even if the scale's negative) as we'll
+      // be ^'ing Modulo with itself later.
+      Modulo |= (uint64_t)GEP1VariableIndices[i].Scale;
+
+      if (AllPositive) {
+        // If the Value could change between cycles, then any reasoning about
+        // the Value this cycle may not hold in the next cycle. We'll just
+        // give up if we can't determine conditions that hold for every cycle:
+        const Value *V = GEP1VariableIndices[i].V;
+
+        bool SignKnownZero, SignKnownOne;
+        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL,
+                       0, &AC, nullptr, DT);
+
+        // Zero-extension widens the variable, and so forces the sign
+        // bit to zero.
+        bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V);
+        SignKnownZero |= IsZExt;
+        SignKnownOne &= !IsZExt;
+
+        // If the variable begins with a zero then we know it's
+        // positive, regardless of whether the value is signed or
+        // unsigned.
+        int64_t Scale = GEP1VariableIndices[i].Scale;
+        AllPositive =
+            (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0);
+      }
+    }
+
     Modulo = Modulo ^ (Modulo & (Modulo - 1));
 
     // We can compute the difference between the two addresses
@@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP(
         V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&
         V1Size <= Modulo - ModOffset)
       return NoAlias;
+
+    // If we know all the variables are positive, then GEP1 >= GEP1BasePtr.
+    // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers
+    // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr.
+    if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset)
+      return NoAlias;
+
+    if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size,
+                                GEP1BaseOffset, &AC, DT))
+      return NoAlias;
   }
 
   // Statically, we can see that the base objects are the same, but the
@@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
   return MayAlias;
 }
 
-/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
-/// instruction against another.
-AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI,
-                                            uint64_t SISize,
-                                            const AAMDNodes &SIAAInfo,
-                                            const Value *V2, uint64_t V2Size,
-                                            const AAMDNodes &V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction
+/// against another.
+AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
+                                       const AAMDNodes &SIAAInfo,
+                                       const Value *V2, uint64_t V2Size,
+                                       const AAMDNodes &V2AAInfo) {
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
     if (SI->getCondition() == SI2->getCondition()) {
-      AliasResult Alias =
-        aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
-                   SI2->getTrueValue(), V2Size, V2AAInfo);
+      AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo,
+                                     SI2->getTrueValue(), V2Size, V2AAInfo);
       if (Alias == MayAlias)
         return MayAlias;
       AliasResult ThisAlias =
-        aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
-                   SI2->getFalseValue(), V2Size, V2AAInfo);
+          aliasCheck(SI->getFalseValue(), SISize, SIAAInfo,
+                     SI2->getFalseValue(), V2Size, V2AAInfo);
       return MergeAliasResults(ThisAlias, Alias);
     }
 
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
   AliasResult Alias =
-    aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
   if (Alias == MayAlias)
     return MayAlias;
 
   AliasResult ThisAlias =
-    aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
   return MergeAliasResults(ThisAlias, Alias);
 }
 
-// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
-// against another.
-AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
-                                         const AAMDNodes &PNAAInfo,
-                                         const Value *V2, uint64_t V2Size,
-                                         const AAMDNodes &V2AAInfo) {
+/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against
+/// another.
+AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                                    const AAMDNodes &PNAAInfo, const Value *V2,
+                                    uint64_t V2Size,
+                                    const AAMDNodes &V2AAInfo) {
   // Track phi nodes we have visited. We use this information when we determine
   // value equivalence.
   VisitedPhiBBs.insert(PN->getParent());
@@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
 
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         AliasResult ThisAlias =
-          aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
-                     PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
-                     V2Size, V2AAInfo);
+            aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo,
+                       PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+                       V2Size, V2AAInfo);
         Alias = MergeAliasResults(ThisAlias, Alias);
         if (Alias == MayAlias)
           break;
@@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
       return Alias;
     }
 
-  SmallPtrSet<Value*, 4> UniqueSrc;
-  SmallVector<Value*, 4> V1Srcs;
+  SmallPtrSet<Value *, 4> UniqueSrc;
+  SmallVector<Value *, 4> V1Srcs;
+  bool isRecursive = false;
   for (Value *PV1 : PN->incoming_values()) {
     if (isa<PHINode>(PV1))
       // If any of the source itself is a PHI, return MayAlias conservatively
@@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
       // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
       // and 'n' are the number of PHI sources.
       return MayAlias;
+
+    if (EnableRecPhiAnalysis)
+      if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
+        // Check whether the incoming value is a GEP that advances the pointer
+        // result of this PHI node (e.g. in a loop). If this is the case, we
+        // would recurse and always get a MayAlias. Handle this case specially
+        // below.
+        if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
+            isa<ConstantInt>(PV1GEP->idx_begin())) {
+          isRecursive = true;
+          continue;
+        }
+      }
+
     if (UniqueSrc.insert(PV1).second)
       V1Srcs.push_back(PV1);
   }
 
-  AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo,
-                                 V1Srcs[0], PNSize, PNAAInfo);
+  // If this PHI node is recursive, set the size of the accessed memory to
+  // unknown to represent all the possible values the GEP could advance the
+  // pointer to.
+  if (isRecursive)
+    PNSize = MemoryLocation::UnknownSize;
+
+  AliasResult Alias =
+      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
   if (Alias == MayAlias)
@@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
   for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
     Value *V = V1Srcs[i];
 
-    AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo,
-                                       V, PNSize, PNAAInfo);
+    AliasResult ThisAlias =
+        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
     Alias = MergeAliasResults(ThisAlias, Alias);
     if (Alias == MayAlias)
       break;
@@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
   return Alias;
 }
 
-// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
-// such as array references.
-//
-AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
-                                           AAMDNodes V1AAInfo, const Value *V2,
-                                           uint64_t V2Size,
-                                           AAMDNodes V2AAInfo) {
+/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as
+/// array references.
+AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
+                                      AAMDNodes V1AAInfo, const Value *V2,
+                                      uint64_t V2Size, AAMDNodes V2AAInfo) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size == 0 || V2Size == 0)
@@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     return MustAlias;
 
   if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
-    return NoAlias;  // Scalars cannot alias each other
+    return NoAlias; // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth);
-  const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth);
+  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
 
   // If the size of one access is larger than the entire object on the other
   // side, then we know such behavior is undefined and can assume no alias.
-  if (DL)
-    if ((V1Size != MemoryLocation::UnknownSize &&
-         isObjectSmallerThan(O2, V1Size, *DL, *TLI)) ||
-        (V2Size != MemoryLocation::UnknownSize &&
-         isObjectSmallerThan(O1, V2Size, *DL, *TLI)))
-      return NoAlias;
+  if ((V1Size != MemoryLocation::UnknownSize &&
+       isObjectSmallerThan(O2, V1Size, DL, TLI)) ||
+      (V2Size != MemoryLocation::UnknownSize &&
+       isObjectSmallerThan(O1, V2Size, DL, TLI)))
+    return NoAlias;
 
   // Check the cache before climbing up use-def chains. This also terminates
   // otherwise infinitely recursive queries.
@@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
   if (V1 > V2)
     std::swap(Locs.first, Locs.second);
   std::pair<AliasCacheTy::iterator, bool> Pair =
-    AliasCache.insert(std::make_pair(Locs, MayAlias));
+      AliasCache.insert(std::make_pair(Locs, MayAlias));
   if (!Pair.second)
     return Pair.first->second;
 
@@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
-    AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
-    if (Result != MayAlias) return AliasCache[Locs] = Result;
+    AliasResult Result =
+        aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
   }
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
@@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
-    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
-                                  V2, V2Size, V2AAInfo);
-    if (Result != MayAlias) return AliasCache[Locs] = Result;
+    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
   }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
@@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
-    AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo,
-                                     V2, V2Size, V2AAInfo);
-    if (Result != MayAlias) return AliasCache[Locs] = Result;
+    AliasResult Result =
+        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    if (Result != MayAlias)
+      return AliasCache[Locs] = Result;
   }
 
   // If both pointers are pointing into the same object and one of them
   // accesses is accessing the entire object, then the accesses must
   // overlap in some way.
-  if (DL && O1 == O2)
+  if (O1 == O2)
     if ((V1Size != MemoryLocation::UnknownSize &&
-         isObjectSize(O1, V1Size, *DL, *TLI)) ||
+         isObjectSize(O1, V1Size, DL, TLI)) ||
         (V2Size != MemoryLocation::UnknownSize &&
-         isObjectSize(O2, V2Size, *DL, *TLI)))
+         isObjectSize(O2, V2Size, DL, TLI)))
       return AliasCache[Locs] = PartialAlias;
 
-  AliasResult Result =
-      AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo),
-                           MemoryLocation(V2, V2Size, V2AAInfo));
+  // Recurse back into the best AA results we have, potentially with refined
+  // memory locations. We have already ensured that BasicAA has a MayAlias
+  // cache result for these, so any recursion back into BasicAA won't loop.
+  AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);
   return AliasCache[Locs] = Result;
 }
 
-bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
-                                                       const Value *V2) {
+/// Check whether two Values can be considered equivalent.
+///
+/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether
+/// they can not be part of a cycle in the value graph by looking at all
+/// visited phi nodes an making sure that the phis cannot reach the value. We
+/// have to do this because we are looking through phi nodes (That is we say
+/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB).
+bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V,
+                                                  const Value *V2) {
   if (V != V2)
     return false;
 
@@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
   if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)
     return false;
 
-  // Use dominance or loop info if available.
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
-  LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
-
   // Make sure that the visited phis cannot reach the Value. This ensures that
   // the Values cannot come from different iterations of a potential cycle the
   // phi nodes could be involved in.
   for (auto *P : VisitedPhiBBs)
-    if (isPotentiallyReachable(P->begin(), Inst, DT, LI))
+    if (isPotentiallyReachable(&P->front(), Inst, DT, LI))
       return false;
 
   return true;
 }
 
-/// GetIndexDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-void BasicAliasAnalysis::GetIndexDifference(
+/// Computes the symbolic difference between two de-composed GEPs.
+///
+/// Dest and Src are the variable indices from two decomposed GetElementPtr
+/// instructions GEP1 and GEP2 which have common base pointers.
+void BasicAAResult::GetIndexDifference(
     SmallVectorImpl<VariableGEPIndex> &Dest,
     const SmallVectorImpl<VariableGEPIndex> &Src) {
   if (Src.empty())
@@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference(
 
   for (unsigned i = 0, e = Src.size(); i != e; ++i) {
     const Value *V = Src[i].V;
-    ExtensionKind Extension = Src[i].Extension;
+    unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;
     int64_t Scale = Src[i].Scale;
 
     // Find V in Dest.  This is N^2, but pointer indices almost never have more
     // than a few variable indexes.
     for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
       if (!isValueEqualInPotentialCycles(Dest[j].V, V) ||
-          Dest[j].Extension != Extension)
+          Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)
         continue;
 
       // If we found it, subtract off Scale V's from the entry in Dest.  If it
@@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference(
 
     // If we didn't consume this entry, add it to the end of the Dest list.
     if (Scale) {
-      VariableGEPIndex Entry = { V, Extension, -Scale };
+      VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};
       Dest.push_back(Entry);
     }
   }
 }
+
+bool BasicAAResult::constantOffsetHeuristic(
+    const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size,
+    uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC,
+    DominatorTree *DT) {
+  if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize ||
+      V2Size == MemoryLocation::UnknownSize)
+    return false;
+
+  const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1];
+
+  if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits ||
+      Var0.Scale != -Var1.Scale)
+    return false;
+
+  unsigned Width = Var1.V->getType()->getIntegerBitWidth();
+
+  // We'll strip off the Extensions of Var0 and Var1 and do another round
+  // of GetLinearExpression decomposition. In the example above, if Var0
+  // is zext(%x + 1) we should get V1 == %x and V1Offset == 1.
+
+  APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0),
+      V1Offset(Width, 0);
+  bool NSW = true, NUW = true;
+  unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0;
+  const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits,
+                                        V0SExtBits, DL, 0, AC, DT, NSW, NUW);
+  NSW = true, NUW = true;
+  const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits,
+                                        V1SExtBits, DL, 0, AC, DT, NSW, NUW);
+
+  if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits ||
+      V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1))
+    return false;
+
+  // We have a hit - Var0 and Var1 only differ by a constant offset!
+
+  // If we've been sext'ed then zext'd the maximum difference between Var0 and
+  // Var1 is possible to calculate, but we're just interested in the absolute
+  // minimum difference between the two. The minimum distance may occur due to
+  // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so
+  // the minimum distance between %i and %i + 5 is 3.
+  APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff;
+  MinDiff = APIntOps::umin(MinDiff, Wrapped);
+  uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale);
+
+  // We can't definitely say whether GEP1 is before or after V2 due to wrapping
+  // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other
+  // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and
+  // V2Size can fit in the MinDiffBytes gap.
+  return V1Size + std::abs(BaseOffset) <= MinDiffBytes &&
+         V2Size + std::abs(BaseOffset) <= MinDiffBytes;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+char BasicAA::PassID;
+
+BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return BasicAAResult(F.getParent()->getDataLayout(),
+                       AM->getResult<TargetLibraryAnalysis>(F),
+                       AM->getResult<AssumptionAnalysis>(F),
+                       AM->getCachedResult<DominatorTreeAnalysis>(F),
+                       AM->getCachedResult<LoopAnalysis>(F));
+}
+
+BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) {
+    initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+char BasicAAWrapperPass::ID = 0;
+void BasicAAWrapperPass::anchor() {}
+
+INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa",
+                      "Basic Alias Analysis (stateless AA impl)", true, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa",
+                    "Basic Alias Analysis (stateless AA impl)", true, true)
+
+FunctionPass *llvm::createBasicAAWrapperPass() {
+  return new BasicAAWrapperPass();
+}
+
+bool BasicAAWrapperPass::runOnFunction(Function &F) {
+  auto &ACT = getAnalysis<AssumptionCacheTracker>();
+  auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>();
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+
+  Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(),
+                                 ACT.getAssumptionCache(F),
+                                 DTWP ? &DTWP->getDomTree() : nullptr,
+                                 LIWP ? &LIWP->getLoopInfo() : nullptr));
+
+  return false;
+}
+
+void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
+  return BasicAAResult(
+      F.getParent()->getDataLayout(),
+      P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+}
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 3d819eb596d4..90b7a339a0fe 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -55,7 +55,7 @@ struct GraphTraits<BlockFrequencyInfo *> {
   typedef Function::const_iterator nodes_iterator;
 
   static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return &G->getFunction()->front();
   }
   static ChildIteratorType child_begin(const NodeType *N) {
     return succ_begin(N);
@@ -105,51 +105,36 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
 } // end namespace llvm
 #endif
 
-INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
-                      "Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
-                    "Block Frequency Analysis", true, true)
+BlockFrequencyInfo::BlockFrequencyInfo() {}
 
-char BlockFrequencyInfo::ID = 0;
-
-
-BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) {
-  initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry());
+BlockFrequencyInfo::BlockFrequencyInfo(const Function &F,
+                                       const BranchProbabilityInfo &BPI,
+                                       const LoopInfo &LI) {
+  calculate(F, BPI, LI);
 }
 
-BlockFrequencyInfo::~BlockFrequencyInfo() {}
-
-void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BranchProbabilityInfo>();
-  AU.addRequired<LoopInfoWrapperPass>();
-  AU.setPreservesAll();
-}
-
-bool BlockFrequencyInfo::runOnFunction(Function &F) {
-  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
-  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+void BlockFrequencyInfo::calculate(const Function &F,
+                                   const BranchProbabilityInfo &BPI,
+                                   const LoopInfo &LI) {
   if (!BFI)
     BFI.reset(new ImplType);
-  BFI->doFunction(&F, &BPI, &LI);
+  BFI->calculate(F, BPI, LI);
 #ifndef NDEBUG
   if (ViewBlockFreqPropagationDAG != GVDT_None)
     view();
 #endif
-  return false;
-}
-
-void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
-
-void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const {
-  if (BFI) BFI->print(O);
 }
 
 BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {
   return BFI ? BFI->getBlockFreq(BB) : 0;
 }
 
+void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB,
+                                      uint64_t Freq) {
+  assert(BFI && "Expected analysis to be available");
+  BFI->setBlockFreq(BB, Freq);
+}
+
 /// Pop up a ghostview window with the current block frequency propagation
 /// rendered using dot.
 void BlockFrequencyInfo::view() const {
@@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,
 uint64_t BlockFrequencyInfo::getEntryFreq() const {
   return BFI ? BFI->getEntryFreq() : 0;
 }
+
+void BlockFrequencyInfo::releaseMemory() { BFI.reset(); }
+
+void BlockFrequencyInfo::print(raw_ostream &OS) const {
+  if (BFI)
+    BFI->print(OS);
+}
+
+
+INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq",
+                      "Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq",
+                    "Block Frequency Analysis", true, true)
+
+char BlockFrequencyInfoWrapperPass::ID = 0;
+
+
+BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass()
+    : FunctionPass(ID) {
+  initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {}
+
+void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS,
+                                          const Module *) const {
+  BFI.print(OS);
+}
+
+void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); }
+
+bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI =
+      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BFI.calculate(F, BPI, LI);
+  return false;
+}
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 6ceda06aac14..48e23af2690a 100644
--- a/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
   return Freqs[Node.Index].Scaled;
 }
 
+void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node,
+                                              uint64_t Freq) {
+  assert(Node.isValid() && "Expected valid node");
+  assert(Node.Index < Freqs.size() && "Expected legal index");
+  Freqs[Node.Index].Integer = Freq;
+}
+
 std::string
 BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
   return std::string();
@@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {
     auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];
     DEBUG(dbgs() << " - Add back edge mass for node "
                  << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n");
-    Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+    if (BackedgeMass.getMass() > 0)
+      Dist.addLocal(HeaderNode, BackedgeMass.getMass());
+    else
+      DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");
   }
 
   DitheringDistributer D(Dist, LoopMass);
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 430b41241edf..cf0cc8da6ef8 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -27,13 +27,13 @@ using namespace llvm;
 
 #define DEBUG_TYPE "branch-prob"
 
-INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",
                       "Branch Probability Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
+INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",
                     "Branch Probability Analysis", false, true)
 
-char BranchProbabilityInfo::ID = 0;
+char BranchProbabilityInfoWrapperPass::ID = 0;
 
 // Weights are for internal use only. They are used by heuristics to help to
 // estimate edges' probability. Example:
@@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;
 /// instruction. This is essentially never taken.
 static const uint32_t IH_NONTAKEN_WEIGHT = 1;
 
-// Standard weight value. Used when none of the heuristics set weight for
-// the edge.
-static const uint32_t NORMAL_WEIGHT = 16;
-
-// Minimum weight of an edge. Please note, that weight is NEVER 0.
-static const uint32_t MIN_WEIGHT = 1;
-
 /// \brief Calculate edge weights for successors lead to unreachable.
 ///
 /// Predict that a successor which leads necessarily to an
@@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {
   if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())
     return false;
 
-  uint32_t UnreachableWeight =
-    std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT);
-  for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(),
-                                           E = UnreachableEdges.end();
-       I != E; ++I)
-    setEdgeWeight(BB, *I, UnreachableWeight);
+  // If the terminator is an InvokeInst, check only the normal destination block
+  // as the unwind edge of InvokeInst is also very unlikely taken.
+  if (auto *II = dyn_cast<InvokeInst>(TI))
+    if (PostDominatedByUnreachable.count(II->getNormalDest())) {
+      PostDominatedByUnreachable.insert(BB);
+      // Return false here so that edge weights for InvokeInst could be decided
+      // in calcInvokeHeuristics().
+      return false;
+    }
 
-  if (ReachableEdges.empty())
+  if (ReachableEdges.empty()) {
+    BranchProbability Prob(1, UnreachableEdges.size());
+    for (unsigned SuccIdx : UnreachableEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
     return true;
-  uint32_t ReachableWeight =
-    std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(),
-             NORMAL_WEIGHT);
-  for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(),
-                                           E = ReachableEdges.end();
-       I != E; ++I)
-    setEdgeWeight(BB, *I, ReachableWeight);
+  }
+
+  BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
+                                    (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                                        UnreachableEdges.size());
+  BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
+                                  (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                                      ReachableEdges.size());
+
+  for (unsigned SuccIdx : UnreachableEdges)
+    setEdgeProbability(BB, SuccIdx, UnreachableProb);
+  for (unsigned SuccIdx : ReachableEdges)
+    setEdgeProbability(BB, SuccIdx, ReachableProb);
 
   return true;
 }
@@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {
 
   WeightSum = 0;
   for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    uint32_t W = Weights[i] / ScalingFactor;
-    WeightSum += W;
-    setEdgeWeight(BB, i, W);
+    Weights[i] /= ScalingFactor;
+    WeightSum += Weights[i];
   }
+
+  if (WeightSum == 0) {
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      setEdgeProbability(BB, i, {1, e});
+  } else {
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)});
+  }
+
   assert(WeightSum <= UINT32_MAX &&
          "Expected weights to scale down to 32 bits");
 
@@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {
   if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
     return false;
 
-  uint32_t ColdWeight =
-      std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT);
-  for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(),
-                                           E = ColdEdges.end();
-       I != E; ++I)
-    setEdgeWeight(BB, *I, ColdWeight);
-
-  if (NormalEdges.empty())
+  if (NormalEdges.empty()) {
+    BranchProbability Prob(1, ColdEdges.size());
+    for (unsigned SuccIdx : ColdEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
     return true;
-  uint32_t NormalWeight = std::max(
-      CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT);
-  for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(),
-                                           E = NormalEdges.end();
-       I != E; ++I)
-    setEdgeWeight(BB, *I, NormalWeight);
+  }
+
+  BranchProbability ColdProb(CC_TAKEN_WEIGHT,
+                             (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+                                 ColdEdges.size());
+  BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
+                               (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
+                                   NormalEdges.size());
+
+  for (unsigned SuccIdx : ColdEdges)
+    setEdgeProbability(BB, SuccIdx, ColdProb);
+  for (unsigned SuccIdx : NormalEdges)
+    setEdgeProbability(BB, SuccIdx, NormalProb);
 
   return true;
 }
@@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(TakenIdx, NonTakenIdx);
 
-  setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT);
-  setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT);
+  BranchProbability TakenProb(PH_TAKEN_WEIGHT,
+                              PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
   return true;
 }
 
 // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges
 // as taken, exiting edges as not-taken.
-bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
-  Loop *L = LI->getLoopFor(BB);
+bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB,
+                                                     const LoopInfo &LI) {
+  Loop *L = LI.getLoopFor(BB);
   if (!L)
     return false;
 
@@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
   if (BackEdges.empty() && ExitingEdges.empty())
     return false;
 
-  if (uint32_t numBackEdges = BackEdges.size()) {
-    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
-    if (backWeight < NORMAL_WEIGHT)
-      backWeight = NORMAL_WEIGHT;
+  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and
+  // normalize them so that they sum up to one.
+  SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero());
+  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) +
+                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT);
+  if (!BackEdges.empty())
+    Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+  if (!InEdges.empty())
+    Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom);
+  if (!ExitingEdges.empty())
+    Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom);
 
-    for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(),
-         EE = BackEdges.end(); EI != EE; ++EI) {
-      setEdgeWeight(BB, *EI, backWeight);
-    }
+  if (uint32_t numBackEdges = BackEdges.size()) {
+    auto Prob = Probs[0] / numBackEdges;
+    for (unsigned SuccIdx : BackEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
   }
 
   if (uint32_t numInEdges = InEdges.size()) {
-    uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges;
-    if (inWeight < NORMAL_WEIGHT)
-      inWeight = NORMAL_WEIGHT;
-
-    for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(),
-         EE = InEdges.end(); EI != EE; ++EI) {
-      setEdgeWeight(BB, *EI, inWeight);
-    }
+    auto Prob = Probs[1] / numInEdges;
+    for (unsigned SuccIdx : InEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
   }
 
   if (uint32_t numExitingEdges = ExitingEdges.size()) {
-    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges;
-    if (exitWeight < MIN_WEIGHT)
-      exitWeight = MIN_WEIGHT;
-
-    for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(),
-         EE = ExitingEdges.end(); EI != EE; ++EI) {
-      setEdgeWeight(BB, *EI, exitWeight);
-    }
+    auto Prob = Probs[2] / numExitingEdges;
+    for (unsigned SuccIdx : ExitingEdges)
+      setEdgeProbability(BB, SuccIdx, Prob);
   }
 
   return true;
@@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(TakenIdx, NonTakenIdx);
 
-  setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT);
-  setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT);
-
+  BranchProbability TakenProb(ZH_TAKEN_WEIGHT,
+                              ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
   return true;
 }
 
@@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {
   if (!isProb)
     std::swap(TakenIdx, NonTakenIdx);
 
-  setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT);
-  setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT);
-
+  BranchProbability TakenProb(FPH_TAKEN_WEIGHT,
+                              FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, TakenIdx, TakenProb);
+  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());
   return true;
 }
 
@@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {
   if (!II)
     return false;
 
-  setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT);
-  setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT);
+  BranchProbability TakenProb(IH_TAKEN_WEIGHT,
+                              IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT);
+  setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb);
+  setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());
   return true;
 }
 
-void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<LoopInfoWrapperPass>();
-  AU.setPreservesAll();
-}
-
-bool BranchProbabilityInfo::runOnFunction(Function &F) {
-  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
-               << " ----\n\n");
-  LastF = &F; // Store the last function we ran on for printing.
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  assert(PostDominatedByUnreachable.empty());
-  assert(PostDominatedByColdCall.empty());
-
-  // Walk the basic blocks in post-order so that we can build up state about
-  // the successors of a block iteratively.
-  for (auto BB : post_order(&F.getEntryBlock())) {
-    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
-    if (calcUnreachableHeuristics(BB))
-      continue;
-    if (calcMetadataWeights(BB))
-      continue;
-    if (calcColdCallHeuristics(BB))
-      continue;
-    if (calcLoopBranchHeuristics(BB))
-      continue;
-    if (calcPointerHeuristics(BB))
-      continue;
-    if (calcZeroHeuristics(BB))
-      continue;
-    if (calcFloatingPointHeuristics(BB))
-      continue;
-    calcInvokeHeuristics(BB);
-  }
-
-  PostDominatedByUnreachable.clear();
-  PostDominatedByColdCall.clear();
-  return false;
-}
-
 void BranchProbabilityInfo::releaseMemory() {
-  Weights.clear();
+  Probs.clear();
 }
 
-void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const {
+void BranchProbabilityInfo::print(raw_ostream &OS) const {
   OS << "---- Branch Probabilities ----\n";
   // We print the probabilities from the last function the analysis ran over,
   // or the function it is currently running over.
   assert(LastF && "Cannot print prior to running over a function");
-  for (Function::const_iterator BI = LastF->begin(), BE = LastF->end();
-       BI != BE; ++BI) {
-    for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI);
-         SI != SE; ++SI) {
-      printEdgeProbability(OS << "  ", BI, *SI);
+  for (const auto &BI : *LastF) {
+    for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE;
+         ++SI) {
+      printEdgeProbability(OS << "  ", &BI, *SI);
     }
   }
 }
 
-uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const {
-  uint32_t Sum = 0;
-
-  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex());
-    uint32_t PrevSum = Sum;
-
-    Sum += Weight;
-    assert(Sum >= PrevSum); (void) PrevSum;
-  }
-
-  return Sum;
-}
-
 bool BranchProbabilityInfo::
 isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
@@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
 }
 
 BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
-  uint32_t Sum = 0;
-  uint32_t MaxWeight = 0;
+  auto MaxProb = BranchProbability::getZero();
   BasicBlock *MaxSucc = nullptr;
 
   for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
     BasicBlock *Succ = *I;
-    uint32_t Weight = getEdgeWeight(BB, Succ);
-    uint32_t PrevSum = Sum;
-
-    Sum += Weight;
-    assert(Sum > PrevSum); (void) PrevSum;
-
-    if (Weight > MaxWeight) {
-      MaxWeight = Weight;
+    auto Prob = getEdgeProbability(BB, Succ);
+    if (Prob > MaxProb) {
+      MaxProb = Prob;
       MaxSucc = Succ;
     }
   }
 
   // Hot probability is at least 4/5 = 80%
-  if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
+  if (MaxProb > BranchProbability(4, 5))
     return MaxSucc;
 
   return nullptr;
 }
 
-/// Get the raw edge weight for the edge. If can't find it, return
-/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index
-/// to the successors.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
-  DenseMap<Edge, uint32_t>::const_iterator I =
-      Weights.find(std::make_pair(Src, IndexInSuccessors));
+/// Get the raw edge probability for the edge. If can't find it, return a
+/// default probability 1/N where N is the number of successors. Here an edge is
+/// specified using PredBlock and an
+/// index to the successors.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          unsigned IndexInSuccessors) const {
+  auto I = Probs.find(std::make_pair(Src, IndexInSuccessors));
 
-  if (I != Weights.end())
+  if (I != Probs.end())
     return I->second;
 
-  return DEFAULT_WEIGHT;
+  return {1,
+          static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};
 }
 
-uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
-                                              succ_const_iterator Dst) const {
-  return getEdgeWeight(Src, Dst.getSuccessorIndex());
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          succ_const_iterator Dst) const {
+  return getEdgeProbability(Src, Dst.getSuccessorIndex());
 }
 
-/// Get the raw edge weight calculated for the block pair. This returns the sum
-/// of all raw edge weights from Src to Dst.
-uint32_t BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const {
-  uint32_t Weight = 0;
-  bool FoundWeight = false;
-  DenseMap<Edge, uint32_t>::const_iterator MapI;
+/// Get the raw edge probability calculated for the block pair. This returns the
+/// sum of all raw edge probabilities from Src to Dst.
+BranchProbability
+BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src,
+                                          const BasicBlock *Dst) const {
+  auto Prob = BranchProbability::getZero();
+  bool FoundProb = false;
   for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)
     if (*I == Dst) {
-      MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex()));
-      if (MapI != Weights.end()) {
-        FoundWeight = true;
-        Weight += MapI->second;
+      auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex()));
+      if (MapI != Probs.end()) {
+        FoundProb = true;
+        Prob += MapI->second;
       }
     }
-  return (!FoundWeight) ? DEFAULT_WEIGHT : Weight;
+  uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src));
+  return FoundProb ? Prob : BranchProbability(1, succ_num);
 }
 
-/// Set the edge weight for a given edge specified by PredBlock and an index
-/// to the successors.
-void BranchProbabilityInfo::
-setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors,
-              uint32_t Weight) {
-  Weights[std::make_pair(Src, IndexInSuccessors)] = Weight;
-  DEBUG(dbgs() << "set edge " << Src->getName() << " -> "
-               << IndexInSuccessors << " successor weight to "
-               << Weight << "\n");
-}
-
-/// Get an edge's probability, relative to other out-edges from Src.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const {
-  uint32_t N = getEdgeWeight(Src, IndexInSuccessors);
-  uint32_t D = getSumForBlock(Src);
-
-  return BranchProbability(N, D);
-}
-
-/// Get the probability of going from Src to Dst. It returns the sum of all
-/// probabilities for edges from Src to Dst.
-BranchProbability BranchProbabilityInfo::
-getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const {
-
-  uint32_t N = getEdgeWeight(Src, Dst);
-  uint32_t D = getSumForBlock(Src);
-
-  return BranchProbability(N, D);
+/// Set the edge probability for a given edge specified by PredBlock and an
+/// index to the successors.
+void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src,
+                                               unsigned IndexInSuccessors,
+                                               BranchProbability Prob) {
+  Probs[std::make_pair(Src, IndexInSuccessors)] = Prob;
+  DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors
+               << " successor probability to " << Prob << "\n");
 }
 
 raw_ostream &
@@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
 
   return OS;
 }
+
+void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) {
+  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName()
+               << " ----\n\n");
+  LastF = &F; // Store the last function we ran on for printing.
+  assert(PostDominatedByUnreachable.empty());
+  assert(PostDominatedByColdCall.empty());
+
+  // Walk the basic blocks in post-order so that we can build up state about
+  // the successors of a block iteratively.
+  for (auto BB : post_order(&F.getEntryBlock())) {
+    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n");
+    if (calcUnreachableHeuristics(BB))
+      continue;
+    if (calcMetadataWeights(BB))
+      continue;
+    if (calcColdCallHeuristics(BB))
+      continue;
+    if (calcLoopBranchHeuristics(BB, LI))
+      continue;
+    if (calcPointerHeuristics(BB))
+      continue;
+    if (calcZeroHeuristics(BB))
+      continue;
+    if (calcFloatingPointHeuristics(BB))
+      continue;
+    calcInvokeHeuristics(BB);
+  }
+
+  PostDominatedByUnreachable.clear();
+  PostDominatedByColdCall.clear();
+}
+
+void BranchProbabilityInfoWrapperPass::getAnalysisUsage(
+    AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) {
+  const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  BPI.calculate(F, LI);
+  return false;
+}
+
+void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); }
+
+void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
+                                             const Module *) const {
+  BPI.print(OS);
+}
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index e15109bd2702..0dfd57d3cb6b 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F,
 /// and return its position in the terminator instruction's list of
 /// successors.  It is an error to call this with a block that is not a
 /// successor.
-unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
-  TerminatorInst *Term = BB->getTerminator();
+unsigned llvm::GetSuccessorNumber(const BasicBlock *BB,
+    const BasicBlock *Succ) {
+  const TerminatorInst *Term = BB->getTerminator();
 #ifndef NDEBUG
   unsigned e = Term->getNumSuccessors();
 #endif
@@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
       return true;
 
     // Linear scan, start at 'A', see whether we hit 'B' or the end first.
-    for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E;
+         ++I) {
       if (&*I == B)
         return true;
     }
diff --git a/lib/Analysis/CFLAliasAnalysis.cpp b/lib/Analysis/CFLAliasAnalysis.cpp
index fe1c088886bc..4843ed6587a8 100644
--- a/lib/Analysis/CFLAliasAnalysis.cpp
+++ b/lib/Analysis/CFLAliasAnalysis.cpp
@@ -27,18 +27,17 @@
 // time.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/CFLAliasAnalysis.h"
 #include "StratifiedSets.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
@@ -47,7 +46,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
-#include <forward_list>
 #include <memory>
 #include <tuple>
 
@@ -55,6 +53,19 @@ using namespace llvm;
 
 #define DEBUG_TYPE "cfl-aa"
 
+CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {}
+CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+// \brief Information we have about a function and would like to keep around
+struct CFLAAResult::FunctionInfo {
+  StratifiedSets<Value *> Sets;
+  // Lots of functions have < 4 returns. Adjust as necessary.
+  SmallVector<Value *, 4> ReturnedValues;
+
+  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
+      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
+};
+
 // Try to go from a Value* to a Function*. Never returns nullptr.
 static Optional<Function *> parentFunctionOfValue(Value *);
 
@@ -141,129 +152,13 @@ struct Edge {
       : From(From), To(To), Weight(W), AdditionalAttrs(A) {}
 };
 
-// \brief Information we have about a function and would like to keep around
-struct FunctionInfo {
-  StratifiedSets<Value *> Sets;
-  // Lots of functions have < 4 returns. Adjust as necessary.
-  SmallVector<Value *, 4> ReturnedValues;
-
-  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV)
-      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {}
-};
-
-struct CFLAliasAnalysis;
-
-struct FunctionHandle : public CallbackVH {
-  FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA)
-      : CallbackVH(Fn), CFLAA(CFLAA) {
-    assert(Fn != nullptr);
-    assert(CFLAA != nullptr);
-  }
-
-  ~FunctionHandle() override {}
-
-  void deleted() override { removeSelfFromCache(); }
-  void allUsesReplacedWith(Value *) override { removeSelfFromCache(); }
-
-private:
-  CFLAliasAnalysis *CFLAA;
-
-  void removeSelfFromCache();
-};
-
-struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis {
-private:
-  /// \brief Cached mapping of Functions to their StratifiedSets.
-  /// If a function's sets are currently being built, it is marked
-  /// in the cache as an Optional without a value. This way, if we
-  /// have any kind of recursion, it is discernable from a function
-  /// that simply has empty sets.
-  DenseMap<Function *, Optional<FunctionInfo>> Cache;
-  std::forward_list<FunctionHandle> Handles;
-
-public:
-  static char ID;
-
-  CFLAliasAnalysis() : ImmutablePass(ID) {
-    initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry());
-  }
-
-  ~CFLAliasAnalysis() override {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AliasAnalysis::getAnalysisUsage(AU);
-  }
-
-  void *getAdjustedAnalysisPointer(const void *ID) override {
-    if (ID == &AliasAnalysis::ID)
-      return (AliasAnalysis *)this;
-    return this;
-  }
-
-  /// \brief Inserts the given Function into the cache.
-  void scan(Function *Fn);
-
-  void evict(Function *Fn) { Cache.erase(Fn); }
-
-  /// \brief Ensures that the given function is available in the cache.
-  /// Returns the appropriate entry from the cache.
-  const Optional<FunctionInfo> &ensureCached(Function *Fn) {
-    auto Iter = Cache.find(Fn);
-    if (Iter == Cache.end()) {
-      scan(Fn);
-      Iter = Cache.find(Fn);
-      assert(Iter != Cache.end());
-      assert(Iter->second.hasValue());
-    }
-    return Iter->second;
-  }
-
-  AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
-
-  AliasResult alias(const MemoryLocation &LocA,
-                    const MemoryLocation &LocB) override {
-    if (LocA.Ptr == LocB.Ptr) {
-      if (LocA.Size == LocB.Size) {
-        return MustAlias;
-      } else {
-        return PartialAlias;
-      }
-    }
-
-    // Comparisons between global variables and other constants should be
-    // handled by BasicAA.
-    // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing
-    // a GlobalValue and ConstantExpr, but every query needs to have at least
-    // one Value tied to a Function, and neither GlobalValues nor ConstantExprs
-    // are.
-    if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) {
-      return AliasAnalysis::alias(LocA, LocB);
-    }
-
-    AliasResult QueryResult = query(LocA, LocB);
-    if (QueryResult == MayAlias)
-      return AliasAnalysis::alias(LocA, LocB);
-
-    return QueryResult;
-  }
-
-  bool doInitialization(Module &M) override;
-};
-
-void FunctionHandle::removeSelfFromCache() {
-  assert(CFLAA != nullptr);
-  auto *Val = getValPtr();
-  CFLAA->evict(cast<Function>(Val));
-  setValPtr(nullptr);
-}
-
 // \brief Gets the edges our graph should have, based on an Instruction*
 class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
-  CFLAliasAnalysis &AA;
+  CFLAAResult &AA;
   SmallVectorImpl<Edge> &Output;
 
 public:
-  GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output)
+  GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output)
       : AA(AA), Output(Output) {}
 
   void visitInstruction(Instruction &) {
@@ -480,6 +375,8 @@ public:
   }
 
   template <typename InstT> void visitCallLikeInst(InstT &Inst) {
+    // TODO: Add support for noalias args/all the other fun function attributes
+    // that we can tack on.
     SmallVector<Function *, 4> Targets;
     if (getPossibleTargets(&Inst, Targets)) {
       if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands()))
@@ -488,8 +385,16 @@ public:
       Output.clear();
     }
 
+    // Because the function is opaque, we need to note that anything
+    // could have happened to the arguments, and that the result could alias
+    // just about anything, too.
+    // The goal of the loop is in part to unify many Values into one set, so we
+    // don't care if the function is void there.
     for (Value *V : Inst.arg_operands())
       Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll));
+    if (Inst.getNumArgOperands() == 0 &&
+        Inst.getType() != Type::getVoidTy(Inst.getContext()))
+      Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));
   }
 
   void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); }
@@ -624,7 +529,7 @@ public:
   // ----- Various Edge iterators for the graph ----- //
 
   // \brief Iterator for edges. Because this graph is bidirected, we don't
-  // allow modificaiton of the edges using this iterator. Additionally, the
+  // allow modification of the edges using this iterator. Additionally, the
   // iterator becomes invalid if you add edges to or from the node you're
   // getting the edges of.
   struct EdgeIterator : public std::iterator<std::forward_iterator_tag,
@@ -727,16 +632,6 @@ typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;
 typedef DenseMap<Value *, GraphT::Node> NodeMapT;
 }
 
-// -- Setting up/registering CFLAA pass -- //
-char CFLAliasAnalysis::ID = 0;
-
-INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa",
-                   "CFL-Based AA implementation", false, true, false)
-
-ImmutablePass *llvm::createCFLAliasAnalysisPass() {
-  return new CFLAliasAnalysis();
-}
-
 //===----------------------------------------------------------------------===//
 // Function declarations that require types defined in the namespace above
 //===----------------------------------------------------------------------===//
@@ -751,12 +646,10 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);
 static EdgeType flipWeight(EdgeType);
 
 // Gets edges of the given Instruction*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, Instruction *,
-                        SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &);
 
 // Gets edges of the given ConstantExpr*, writing them to the SmallVector*.
-static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *,
-                        SmallVectorImpl<Edge> &);
+static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &);
 
 // Gets the "Level" that one should travel in StratifiedSets
 // given an EdgeType.
@@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType);
 
 // Builds the graph needed for constructing the StratifiedSets for the
 // given function
-static void buildGraphFrom(CFLAliasAnalysis &, Function *,
+static void buildGraphFrom(CFLAAResult &, Function *,
                            SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);
 
 // Gets the edges of a ConstantExpr as if it was an Instruction. This
 // function also acts on any nested ConstantExprs, adding the edges
 // of those to the given SmallVector as well.
-static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
+static void constexprToEdges(CFLAAResult &, ConstantExpr &,
                              SmallVectorImpl<Edge> &);
 
 // Given an Instruction, this will add it to the graph, along with any
@@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,
 //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2
 // addInstructionToGraph would add both the `load` and `getelementptr`
 // instructions to the graph appropriately.
-static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &,
+static void addInstructionToGraph(CFLAAResult &, Instruction &,
                                   SmallVectorImpl<Value *> &, NodeMapT &,
                                   GraphT &);
 
 // Notes whether it would be pointless to add the given Value to our sets.
 static bool canSkipAddingToSets(Value *Val);
 
-// Builds the graph + StratifiedSets for a function.
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *);
-
 static Optional<Function *> parentFunctionOfValue(Value *Val) {
   if (auto *Inst = dyn_cast<Instruction>(Val)) {
     auto *Bb = Inst->getParent();
@@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) {
 }
 
 static bool hasUsefulEdges(ConstantExpr *CE) {
-  // ConstantExpr doens't have terminators, invokes, or fences, so only needs
+  // ConstantExpr doesn't have terminators, invokes, or fences, so only needs
   // to check for compares.
   return CE->getOpcode() != Instruction::ICmp &&
          CE->getOpcode() != Instruction::FCmp;
@@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) {
   llvm_unreachable("Incomplete coverage of EdgeType enum");
 }
 
-static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
+static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst,
                         SmallVectorImpl<Edge> &Output) {
   assert(hasUsefulEdges(Inst) &&
          "Expected instructions to have 'useful' edges");
@@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,
   v.visit(Inst);
 }
 
-static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE,
+static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE,
                         SmallVectorImpl<Edge> &Output) {
   assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");
   GetEdgesVisitor v(Analysis, Output);
@@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) {
   llvm_unreachable("Incomplete switch coverage");
 }
 
-static void constexprToEdges(CFLAliasAnalysis &Analysis,
+static void constexprToEdges(CFLAAResult &Analysis,
                              ConstantExpr &CExprToCollapse,
                              SmallVectorImpl<Edge> &Results) {
   SmallVector<ConstantExpr *, 4> Worklist;
@@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis,
   }
 }
 
-static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
+static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst,
                                   SmallVectorImpl<Value *> &ReturnedValues,
                                   NodeMapT &Map, GraphT &Graph) {
   const auto findOrInsertNode = [&Map, &Graph](Value *Val) {
@@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,
 // buy us much that we don't already have. I'd like to add interprocedural
 // analysis prior to this however, in case that somehow requires the graph
 // produced by this for efficient execution
-static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn,
+static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn,
                            SmallVectorImpl<Value *> &ReturnedValues,
                            NodeMapT &Map, GraphT &Graph) {
   for (auto &Bb : Fn->getBasicBlockList())
@@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) {
   return false;
 }
 
-static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
+// Builds the graph + StratifiedSets for a function.
+CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {
   NodeMapT Map;
   GraphT Graph;
   SmallVector<Value *, 4> ReturnedValues;
 
-  buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph);
+  buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph);
 
   DenseMap<GraphT::Node, Value *> NodeValueMap;
   NodeValueMap.resize(Map.size());
@@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {
   return FunctionInfo(Builder.build(), std::move(ReturnedValues));
 }
 
-void CFLAliasAnalysis::scan(Function *Fn) {
+void CFLAAResult::scan(Function *Fn) {
   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
   (void)InsertPair;
   assert(InsertPair.second &&
          "Trying to scan a function that has already been cached");
 
-  FunctionInfo Info(buildSetsFrom(*this, Fn));
+  FunctionInfo Info(buildSetsFrom(Fn));
   Cache[Fn] = std::move(Info);
   Handles.push_front(FunctionHandle(Fn, this));
 }
 
-AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
-                                    const MemoryLocation &LocB) {
+void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); }
+
+/// \brief Ensures that the given function is available in the cache.
+/// Returns the appropriate entry from the cache.
+const Optional<CFLAAResult::FunctionInfo> &
+CFLAAResult::ensureCached(Function *Fn) {
+  auto Iter = Cache.find(Fn);
+  if (Iter == Cache.end()) {
+    scan(Fn);
+    Iter = Cache.find(Fn);
+    assert(Iter != Cache.end());
+    assert(Iter->second.hasValue());
+  }
+  return Iter->second;
+}
+
+AliasResult CFLAAResult::query(const MemoryLocation &LocA,
+                               const MemoryLocation &LocB) {
   auto *ValA = const_cast<Value *>(LocA.Ptr);
   auto *ValB = const_cast<Value *>(LocB.Ptr);
 
@@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,
   return NoAlias;
 }
 
-bool CFLAliasAnalysis::doInitialization(Module &M) {
-  InitializeAliasAnalysis(this, &M.getDataLayout());
-  return true;
+CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char CFLAA::PassID;
+
+char CFLAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis",
+                    false, true)
+
+ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); }
+
+CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) {
+  initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool CFLAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(
+      new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool CFLAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 3ec79adba57f..69623619a8b0 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -1,8 +1,6 @@
 add_llvm_library(LLVMAnalysis
   AliasAnalysis.cpp
-  AliasAnalysisCounter.cpp
   AliasAnalysisEvaluator.cpp
-  AliasDebugger.cpp
   AliasSetTracker.cpp
   Analysis.cpp
   AssumptionCache.cpp
@@ -14,16 +12,23 @@ add_llvm_library(LLVMAnalysis
   CFGPrinter.cpp
   CFLAliasAnalysis.cpp
   CGSCCPassManager.cpp
+  CallGraph.cpp
+  CallGraphSCCPass.cpp
+  CallPrinter.cpp
   CaptureTracking.cpp
   CostModel.cpp
   CodeMetrics.cpp
   ConstantFolding.cpp
   Delinearization.cpp
+  DemandedBits.cpp
   DependenceAnalysis.cpp
   DivergenceAnalysis.cpp
   DomPrinter.cpp
   DominanceFrontier.cpp
+  EHPersonalities.cpp
+  GlobalsModRef.cpp
   IVUsers.cpp
+  InlineCost.cpp
   InstCount.cpp
   InstructionSimplify.cpp
   Interval.cpp
@@ -31,8 +36,6 @@ add_llvm_library(LLVMAnalysis
   IteratedDominanceFrontier.cpp
   LazyCallGraph.cpp
   LazyValueInfo.cpp
-  LibCallAliasAnalysis.cpp
-  LibCallSemantics.cpp
   Lint.cpp
   Loads.cpp
   LoopAccessAnalysis.cpp
@@ -44,7 +47,10 @@ add_llvm_library(LLVMAnalysis
   MemoryDependenceAnalysis.cpp
   MemoryLocation.cpp
   ModuleDebugInfoPrinter.cpp
-  NoAliasAnalysis.cpp
+  ObjCARCAliasAnalysis.cpp
+  ObjCARCAnalysisUtils.cpp
+  ObjCARCInstKind.cpp
+  OrderedBasicBlock.cpp
   PHITransAddr.cpp
   PostDominators.cpp
   PtrUseVisitor.cpp
@@ -69,5 +75,3 @@ add_llvm_library(LLVMAnalysis
   )
 
 add_dependencies(LLVMAnalysis intrinsics_gen)
-
-add_subdirectory(IPA)
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/CallGraph.cpp
similarity index 91%
rename from lib/Analysis/IPA/CallGraph.cpp
rename to lib/Analysis/CallGraph.cpp
index e2799d965a7d..7cec962678e8 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/CallGraph.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 CallGraph::CallGraph(Module &M)
     : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
-      CallsExternalNode(new CallGraphNode(nullptr)) {
+      CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {
   // Add every function to the call graph.
   for (Function &F : M)
     addToCallGraph(&F);
@@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M)
     Root = ExternalCallingNode;
 }
 
+CallGraph::CallGraph(CallGraph &&Arg)
+    : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root),
+      ExternalCallingNode(Arg.ExternalCallingNode),
+      CallsExternalNode(std::move(Arg.CallsExternalNode)) {
+  Arg.FunctionMap.clear();
+  Arg.Root = nullptr;
+  Arg.ExternalCallingNode = nullptr;
+}
+
 CallGraph::~CallGraph() {
   // CallsExternalNode is not in the function map, delete it explicitly.
-  CallsExternalNode->allReferencesDropped();
-  delete CallsExternalNode;
+  if (CallsExternalNode)
+    CallsExternalNode->allReferencesDropped();
 
 // Reset all node's use counts to zero before deleting them to prevent an
 // assertion from firing.
@@ -43,8 +52,6 @@ CallGraph::~CallGraph() {
   for (auto &I : FunctionMap)
     I.second->allReferencesDropped();
 #endif
-  for (auto &I : FunctionMap)
-    delete I.second;
 }
 
 void CallGraph::addToCallGraph(Function *F) {
@@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) {
   // If this function is not defined in this translation unit, it could call
   // anything.
   if (F->isDeclaration() && !F->isIntrinsic())
-    Node->addCalledFunction(CallSite(), CallsExternalNode);
+    Node->addCalledFunction(CallSite(), CallsExternalNode.get());
 
   // Look for calls by this function.
   for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
@@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {
           // Indirect calls of intrinsics are not allowed so no need to check.
           // We can be more precise here by using TargetArg returned by
           // Intrinsic::isLeaf.
-          Node->addCalledFunction(CS, CallsExternalNode);
+          Node->addCalledFunction(CS, CallsExternalNode.get());
         else if (!Callee->isIntrinsic())
           Node->addCalledFunction(CS, getOrInsertFunction(Callee));
       }
@@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const {
   Nodes.reserve(FunctionMap.size());
 
   for (auto I = begin(), E = end(); I != E; ++I)
-    Nodes.push_back(I->second);
+    Nodes.push_back(I->second.get());
 
   std::sort(Nodes.begin(), Nodes.end(),
             [](CallGraphNode *LHS, CallGraphNode *RHS) {
@@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const {
     CN->print(OS);
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void CallGraph::dump() const { print(dbgs()); }
-#endif
 
 // removeFunctionFromModule - Unlink the function from this module, returning
 // it.  Because this removes the function from the module, the call graph node
@@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
   assert(CGN->empty() && "Cannot remove function from call "
          "graph if it references other functions!");
   Function *F = CGN->getFunction(); // Get the function for the call graph node
-  delete CGN;                       // Delete the call graph node for this func
   FunctionMap.erase(F);             // Remove the call graph node from the map
 
   M.getFunctionList().remove(F);
@@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
          "Pointing CallGraphNode at a function that already exists");
   FunctionMapTy::iterator I = FunctionMap.find(From);
   I->second->F = const_cast<Function*>(To);
-  FunctionMap[To] = I->second;
+  FunctionMap[To] = std::move(I->second);
   FunctionMap.erase(I);
 }
 
@@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {
 // it will insert a new CallGraphNode for the specified function if one does
 // not already exist.
 CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
-  CallGraphNode *&CGN = FunctionMap[F];
+  auto &CGN = FunctionMap[F];
   if (CGN)
-    return CGN;
+    return CGN.get();
 
   assert((!F || F->getParent() == &M) && "Function not in current module!");
-  return CGN = new CallGraphNode(const_cast<Function*>(F));
+  CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F));
+  return CGN.get();
 }
 
 //===----------------------------------------------------------------------===//
@@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void CallGraphNode::dump() const { print(dbgs()); }
-#endif
 
 /// removeCallEdgeFor - This method removes the edge in the node for the
 /// specified call site.  Note that this method takes linear time, so it
@@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
   G->print(OS);
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
-#endif
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
similarity index 100%
rename from lib/Analysis/IPA/CallGraphSCCPass.cpp
rename to lib/Analysis/CallGraphSCCPass.cpp
diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/CallPrinter.cpp
similarity index 100%
rename from lib/Analysis/IPA/CallPrinter.cpp
rename to lib/Analysis/CallPrinter.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 52ef807aeb59..1add2fa77566 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
@@ -52,63 +53,6 @@ namespace {
     bool Captured;
   };
 
-  struct NumberedInstCache {
-    SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts;
-    BasicBlock::const_iterator LastInstFound;
-    unsigned LastInstPos;
-    const BasicBlock *BB;
-
-    NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) {
-      LastInstFound = BB->end();
-    }
-
-    /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out
-    /// instruction while walking 'BB'.
-    const Instruction *find(const Instruction *A, const Instruction *B) {
-      const Instruction *Inst = nullptr;
-      assert(!(LastInstFound == BB->end() && LastInstPos != 0) &&
-             "Instruction supposed to be in NumberedInsts");
-
-      // Start the search with the instruction found in the last lookup round.
-      auto II = BB->begin();
-      auto IE = BB->end();
-      if (LastInstFound != IE)
-        II = std::next(LastInstFound);
-
-      // Number all instructions up to the point where we find 'A' or 'B'.
-      for (++LastInstPos; II != IE; ++II, ++LastInstPos) {
-        Inst = cast<Instruction>(II);
-        NumberedInsts[Inst] = LastInstPos;
-        if (Inst == A || Inst == B)
-          break;
-      }
-
-      assert(II != IE && "Instruction not found?");
-      LastInstFound = II;
-      return Inst;
-    }
-
-    /// \brief Find out whether 'A' dominates 'B', meaning whether 'A'
-    /// comes before 'B' in 'BB'. This is a simplification that considers
-    /// cached instruction positions and ignores other basic blocks, being
-    /// only relevant to compare relative instructions positions inside 'BB'.
-    bool dominates(const Instruction *A, const Instruction *B) {
-      assert(A->getParent() == B->getParent() &&
-             "Instructions must be in the same basic block!");
-
-      unsigned NA = NumberedInsts.lookup(A);
-      unsigned NB = NumberedInsts.lookup(B);
-      if (NA && NB)
-        return NA < NB;
-      if (NA)
-        return true;
-      if (NB)
-        return false;
-
-      return A == find(A, B);
-    }
-  };
-
   /// Only find pointer captures which happen before the given instruction. Uses
   /// the dominator tree to determine whether one instruction is before another.
   /// Only support the case where the Value is defined in the same basic block
@@ -116,8 +60,8 @@ namespace {
   struct CapturesBefore : public CaptureTracker {
 
     CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT,
-                   bool IncludeI)
-      : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT),
+                   bool IncludeI, OrderedBasicBlock *IC)
+      : OrderedBB(IC), BeforeHere(I), DT(DT),
         ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}
 
     void tooManyUses() override { Captured = true; }
@@ -131,18 +75,18 @@ namespace {
 
       // Compute the case where both instructions are inside the same basic
       // block. Since instructions in the same BB as BeforeHere are numbered in
-      // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable'
+      // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable'
       // which are very expensive for large basic blocks.
       if (BB == BeforeHere->getParent()) {
         // 'I' dominates 'BeforeHere' => not safe to prune.
         //
-        // The value defined by an invoke dominates an instruction only if it
-        // dominates every instruction in UseBB. A PHI is dominated only if
-        // the instruction dominates every possible use in the UseBB. Since
+        // The value defined by an invoke dominates an instruction only
+        // if it dominates every instruction in UseBB. A PHI is dominated only
+        // if the instruction dominates every possible use in the UseBB. Since
         // UseBB == BB, avoid pruning.
         if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)
           return false;
-        if (!LocalInstCache.dominates(BeforeHere, I))
+        if (!OrderedBB->dominates(BeforeHere, I))
           return false;
 
         // 'BeforeHere' comes before 'I', it's safe to prune if we also
@@ -157,10 +101,7 @@ namespace {
 
         SmallVector<BasicBlock*, 32> Worklist;
         Worklist.append(succ_begin(BB), succ_end(BB));
-        if (!isPotentiallyReachableFromMany(Worklist, BB, DT))
-          return true;
-
-        return false;
+        return !isPotentiallyReachableFromMany(Worklist, BB, DT);
       }
 
       // If the value is defined in the same basic block as use and BeforeHere,
@@ -196,7 +137,7 @@ namespace {
       return true;
     }
 
-    NumberedInstCache LocalInstCache;
+    OrderedBasicBlock *OrderedBB;
     const Instruction *BeforeHere;
     DominatorTree *DT;
 
@@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V,
 /// returning the value (or part of it) from the function counts as capturing
 /// it or not.  The boolean StoreCaptures specified whether storing the value
 /// (or part of it) into memory anywhere automatically counts as capturing it
-/// or not.
+/// or not. A ordered basic block \p OBB can be used in order to speed up
+/// queries about relative order among instructions in the same basic block.
 bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,
                                       bool StoreCaptures, const Instruction *I,
-                                      DominatorTree *DT, bool IncludeI) {
+                                      DominatorTree *DT, bool IncludeI,
+                                      OrderedBasicBlock *OBB) {
   assert(!isa<GlobalValue>(V) &&
          "It doesn't make sense to ask whether a global is captured.");
+  bool UseNewOBB = OBB == nullptr;
 
   if (!DT)
     return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures);
+  if (UseNewOBB)
+    OBB = new OrderedBasicBlock(I->getParent());
 
   // TODO: See comment in PointerMayBeCaptured regarding what could be done
   // with StoreCaptures.
 
-  CapturesBefore CB(ReturnCaptures, I, DT, IncludeI);
+  CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);
   PointerMayBeCaptured(V, &CB);
+
+  if (UseNewOBB)
+    delete OBB;
   return CB.Captured;
 }
 
@@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {
       // that loading a value from a pointer does not cause the pointer to be
       // captured, even though the loaded value might be the pointer itself
       // (think of self-referential objects).
-      CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
-      for (CallSite::arg_iterator A = B; A != E; ++A)
+      CallSite::data_operand_iterator B =
+        CS.data_operands_begin(), E = CS.data_operands_end();
+      for (CallSite::data_operand_iterator A = B; A != E; ++A)
         if (A->get() == V && !CS.doesNotCapture(A - B))
           // The parameter is not marked 'nocapture' - captured.
           if (Tracker->captured(U))
diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp
index 46a2c43b1690..4090b4cd752b 100644
--- a/lib/Analysis/CodeMetrics.cpp
+++ b/lib/Analysis/CodeMetrics.cpp
@@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
       continue;
 
     // If all uses of this value are ephemeral, then so is this value.
-    bool FoundNEUse = false;
-    for (const User *I : V->users())
-      if (!EphValues.count(I)) {
-        FoundNEUse = true;
-        break;
-      }
-
-    if (FoundNEUse)
+    if (!std::all_of(V->user_begin(), V->user_end(),
+                     [&](const User *U) { return EphValues.count(U); }))
       continue;
 
     EphValues.insert(V);
@@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
   for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
        II != E; ++II) {
     // Skip ephemeral values.
-    if (EphValues.count(II))
+    if (EphValues.count(&*II))
       continue;
 
     // Special handling for calls.
@@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
     if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
       ++NumVectorInsts;
 
+    if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB))
+      notDuplicatable = true;
+
     if (const CallInst *CI = dyn_cast<CallInst>(II))
       if (CI->cannotDuplicate())
         notDuplicatable = true;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 02a5aef03223..ccb56631b846 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
 
   // Look through ptr->int and ptr->ptr casts.
   if (CE->getOpcode() == Instruction::PtrToInt ||
-      CE->getOpcode() == Instruction::BitCast ||
-      CE->getOpcode() == Instruction::AddrSpaceCast)
+      CE->getOpcode() == Instruction::BitCast)
     return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);
 
   // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)
@@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
     if (GV->isConstant() && GV->hasDefinitiveInitializer())
       return GV->getInitializer();
 
+  if (auto *GA = dyn_cast<GlobalAlias>(C))
+    if (GA->getAliasee() && !GA->mayBeOverridden())
+      return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL);
+
   // If the loaded value isn't a constant expr, we can't handle it.
   ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
   if (!CE)
@@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   case Intrinsic::sqrt:
   case Intrinsic::sin:
   case Intrinsic::cos:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
   case Intrinsic::pow:
   case Intrinsic::powi:
   case Intrinsic::bswap:
@@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
   // return true for a name like "cos\0blah" which strcmp would return equal to
   // "cos", but has length 8.
   switch (Name[0]) {
-  default: return false;
+  default:
+    return false;
   case 'a':
-    return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2";
+    return Name == "acos" || Name == "asin" || Name == "atan" ||
+           Name == "atan2" || Name == "acosf" || Name == "asinf" ||
+           Name == "atanf" || Name == "atan2f";
   case 'c':
-    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+    return Name == "ceil" || Name == "cos" || Name == "cosh" ||
+           Name == "ceilf" || Name == "cosf" || Name == "coshf";
   case 'e':
-    return Name == "exp" || Name == "exp2";
+    return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";
   case 'f':
-    return Name == "fabs" || Name == "fmod" || Name == "floor";
+    return Name == "fabs" || Name == "floor" || Name == "fmod" ||
+           Name == "fabsf" || Name == "floorf" || Name == "fmodf";
   case 'l':
-    return Name == "log" || Name == "log10";
+    return Name == "log" || Name == "log10" || Name == "logf" ||
+           Name == "log10f";
   case 'p':
-    return Name == "pow";
+    return Name == "pow" || Name == "powf";
   case 's':
     return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
-      Name == "sinf" || Name == "sqrtf";
+           Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
   case 't':
-    return Name == "tan" || Name == "tanh";
+    return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";
   }
 }
 
@@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
         return ConstantFP::get(Ty->getContext(), V);
       }
 
+      if (IntrinsicID == Intrinsic::floor) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardNegative);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::ceil) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardPositive);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::trunc) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmTowardZero);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::rint) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
+      if (IntrinsicID == Intrinsic::nearbyint) {
+        APFloat V = Op->getValueAPF();
+        V.roundToIntegral(APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(Ty->getContext(), V);
+      }
+
       /// We only fold functions with finite arguments. Folding NaN and inf is
       /// likely to be aborted with an exception anyway, and some host libms
       /// have known errors raising exceptions.
@@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
           return ConstantFoldFP(exp, V, Ty);
         case Intrinsic::exp2:
           return ConstantFoldFP(exp2, V, Ty);
-        case Intrinsic::floor:
-          return ConstantFoldFP(floor, V, Ty);
-        case Intrinsic::ceil:
-          return ConstantFoldFP(ceil, V, Ty);
         case Intrinsic::sin:
           return ConstantFoldFP(sin, V, Ty);
         case Intrinsic::cos:
@@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
 
       switch (Name[0]) {
       case 'a':
-        if (Name == "acos" && TLI->has(LibFunc::acos))
+        if ((Name == "acos" && TLI->has(LibFunc::acos)) ||
+            (Name == "acosf" && TLI->has(LibFunc::acosf)))
           return ConstantFoldFP(acos, V, Ty);
-        else if (Name == "asin" && TLI->has(LibFunc::asin))
+        else if ((Name == "asin" && TLI->has(LibFunc::asin)) ||
+                 (Name == "asinf" && TLI->has(LibFunc::asinf)))
           return ConstantFoldFP(asin, V, Ty);
-        else if (Name == "atan" && TLI->has(LibFunc::atan))
+        else if ((Name == "atan" && TLI->has(LibFunc::atan)) ||
+                 (Name == "atanf" && TLI->has(LibFunc::atanf)))
           return ConstantFoldFP(atan, V, Ty);
         break;
       case 'c':
-        if (Name == "ceil" && TLI->has(LibFunc::ceil))
+        if ((Name == "ceil" && TLI->has(LibFunc::ceil)) ||
+            (Name == "ceilf" && TLI->has(LibFunc::ceilf)))
           return ConstantFoldFP(ceil, V, Ty);
-        else if (Name == "cos" && TLI->has(LibFunc::cos))
+        else if ((Name == "cos" && TLI->has(LibFunc::cos)) ||
+                 (Name == "cosf" && TLI->has(LibFunc::cosf)))
           return ConstantFoldFP(cos, V, Ty);
-        else if (Name == "cosh" && TLI->has(LibFunc::cosh))
+        else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) ||
+                 (Name == "coshf" && TLI->has(LibFunc::coshf)))
           return ConstantFoldFP(cosh, V, Ty);
-        else if (Name == "cosf" && TLI->has(LibFunc::cosf))
-          return ConstantFoldFP(cos, V, Ty);
         break;
       case 'e':
-        if (Name == "exp" && TLI->has(LibFunc::exp))
+        if ((Name == "exp" && TLI->has(LibFunc::exp)) ||
+            (Name == "expf" && TLI->has(LibFunc::expf)))
           return ConstantFoldFP(exp, V, Ty);
-
-        if (Name == "exp2" && TLI->has(LibFunc::exp2)) {
+        if ((Name == "exp2" && TLI->has(LibFunc::exp2)) ||
+            (Name == "exp2f" && TLI->has(LibFunc::exp2f)))
           // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a
           // C99 library.
           return ConstantFoldBinaryFP(pow, 2.0, V, Ty);
-        }
         break;
       case 'f':
-        if (Name == "fabs" && TLI->has(LibFunc::fabs))
+        if ((Name == "fabs" && TLI->has(LibFunc::fabs)) ||
+            (Name == "fabsf" && TLI->has(LibFunc::fabsf)))
           return ConstantFoldFP(fabs, V, Ty);
-        else if (Name == "floor" && TLI->has(LibFunc::floor))
+        else if ((Name == "floor" && TLI->has(LibFunc::floor)) ||
+                 (Name == "floorf" && TLI->has(LibFunc::floorf)))
           return ConstantFoldFP(floor, V, Ty);
         break;
       case 'l':
-        if (Name == "log" && V > 0 && TLI->has(LibFunc::log))
+        if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) ||
+            (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))
           return ConstantFoldFP(log, V, Ty);
-        else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10))
+        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) ||
+                 (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))
           return ConstantFoldFP(log10, V, Ty);
         else if (IntrinsicID == Intrinsic::sqrt &&
                  (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) {
@@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
         }
         break;
       case 's':
-        if (Name == "sin" && TLI->has(LibFunc::sin))
+        if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
+            (Name == "sinf" && TLI->has(LibFunc::sinf)))
           return ConstantFoldFP(sin, V, Ty);
-        else if (Name == "sinh" && TLI->has(LibFunc::sinh))
+        else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) ||
+                 (Name == "sinhf" && TLI->has(LibFunc::sinhf)))
           return ConstantFoldFP(sinh, V, Ty);
-        else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt))
+        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) ||
+                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))
           return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf))
-          return ConstantFoldFP(sqrt, V, Ty);
-        else if (Name == "sinf" && TLI->has(LibFunc::sinf))
-          return ConstantFoldFP(sin, V, Ty);
         break;
       case 't':
-        if (Name == "tan" && TLI->has(LibFunc::tan))
+        if ((Name == "tan" && TLI->has(LibFunc::tan)) ||
+            (Name == "tanf" && TLI->has(LibFunc::tanf)))
           return ConstantFoldFP(tan, V, Ty);
-        else if (Name == "tanh" && TLI->has(LibFunc::tanh))
+        else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) ||
+                 (Name == "tanhf" && TLI->has(LibFunc::tanhf)))
           return ConstantFoldFP(tanh, V, Ty);
         break;
       default:
@@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
 
         if (!TLI)
           return nullptr;
-        if (Name == "pow" && TLI->has(LibFunc::pow))
+        if ((Name == "pow" && TLI->has(LibFunc::pow)) ||
+            (Name == "powf" && TLI->has(LibFunc::powf)))
           return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
-        if (Name == "fmod" && TLI->has(LibFunc::fmod))
+        if ((Name == "fmod" && TLI->has(LibFunc::fmod)) ||
+            (Name == "fmodf" && TLI->has(LibFunc::fmodf)))
           return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
-        if (Name == "atan2" && TLI->has(LibFunc::atan2))
+        if ((Name == "atan2" && TLI->has(LibFunc::atan2)) ||
+            (Name == "atan2f" && TLI->has(LibFunc::atan2f)))
           return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
       } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
         if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy())
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index b529c1a70aa3..0383cbfbbe4c 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
     Mask[i] = val;
 
   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
-  if (Mask != ActualMask)
-    return false;
-
-  return true;
+  return Mask == ActualMask;
 }
 
 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
@@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
     return -1;
 
   switch (I->getOpcode()) {
-  case Instruction::GetElementPtr:{
-    Type *ValTy = I->getOperand(0)->getType()->getPointerElementType();
-    return TTI->getAddressComputationCost(ValTy);
-  }
+  case Instruction::GetElementPtr:
+    return TTI->getUserCost(I);
 
   case Instruction::Ret:
   case Instruction::PHI:
@@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
   }
   case Instruction::Call:
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      SmallVector<Type*, 4> Tys;
+      SmallVector<Value *, 4> Args;
       for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J)
-        Tys.push_back(II->getArgOperand(J)->getType());
+        Args.push_back(II->getArgOperand(J));
 
       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
-                                        Tys);
+                                        Args);
     }
     return -1;
   default:
@@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
 
   for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {
     for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) {
-      Instruction *Inst = it;
+      Instruction *Inst = &*it;
       unsigned Cost = getInstructionCost(Inst);
       if (Cost != (unsigned)-1)
         OS << "Cost Model: Found an estimated cost of " << Cost;
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index 9d1578603268..baee8b3b084b 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -60,12 +60,12 @@ public:
 void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<LoopInfoWrapperPass>();
-  AU.addRequired<ScalarEvolution>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
 }
 
 bool Delinearization::runOnFunction(Function &F) {
   this->F = &F;
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   return false;
 }
@@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
       if (!BasePointer)
         break;
       AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
-      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
-
-      // Do not try to delinearize memory accesses that are not AddRecs.
-      if (!AR)
-        break;
-
 
       O << "\n";
       O << "Inst:" << *Inst << "\n";
       O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
-      O << "AddRec: " << *AR << "\n";
+      O << "AccessFunction: " << *AccessFn << "\n";
 
       SmallVector<const SCEV *, 3> Subscripts, Sizes;
-      SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst));
+      SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));
       if (Subscripts.size() == 0 || Sizes.size() == 0 ||
           Subscripts.size() != Sizes.size()) {
         O << "failed to delinearize\n";
diff --git a/lib/Analysis/DemandedBits.cpp b/lib/Analysis/DemandedBits.cpp
new file mode 100644
index 000000000000..912c5ceb754d
--- /dev/null
+++ b/lib/Analysis/DemandedBits.cpp
@@ -0,0 +1,392 @@
+//===---- DemandedBits.cpp - Determine demanded bits ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a demanded bits analysis. A demanded bit is one that
+// contributes to a result; bits that are not demanded can be either zero or
+// one without affecting control or data flow. For example in this sequence:
+//
+//   %1 = add i32 %x, %y
+//   %2 = trunc i32 %1 to i16
+//
+// Only the lowest 16 bits of %1 are demanded; the rest are removed by the
+// trunc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "demanded-bits"
+
+char DemandedBits::ID = 0;
+INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis",
+                    false, false)
+
+DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) {
+  initializeDemandedBitsPass(*PassRegistry::getPassRegistry());
+}
+
+void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.setPreservesAll();
+}
+
+static bool isAlwaysLive(Instruction *I) {
+  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+      I->isEHPad() || I->mayHaveSideEffects();
+}
+
+void DemandedBits::determineLiveOperandBits(
+    const Instruction *UserI, const Instruction *I, unsigned OperandNo,
+    const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne,
+    APInt &KnownZero2, APInt &KnownOne2) {
+  unsigned BitWidth = AB.getBitWidth();
+
+  // We're called once per operand, but for some instructions, we need to
+  // compute known bits of both operands in order to determine the live bits of
+  // either (when both operands are instructions themselves). We don't,
+  // however, want to do this twice, so we cache the result in APInts that live
+  // in the caller. For the two-relevant-operands case, both operand values are
+  // provided here.
+  auto ComputeKnownBits =
+      [&](unsigned BitWidth, const Value *V1, const Value *V2) {
+        const DataLayout &DL = I->getModule()->getDataLayout();
+        KnownZero = APInt(BitWidth, 0);
+        KnownOne = APInt(BitWidth, 0);
+        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
+                         AC, UserI, DT);
+
+        if (V2) {
+          KnownZero2 = APInt(BitWidth, 0);
+          KnownOne2 = APInt(BitWidth, 0);
+          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
+                           0, AC, UserI, DT);
+        }
+      };
+
+  switch (UserI->getOpcode()) {
+  default: break;
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap:
+        // The alive bits of the input are the swapped alive bits of
+        // the output.
+        AB = AOut.byteSwap();
+        break;
+      case Intrinsic::ctlz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the left of, and including, the leftmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getHighBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
+        }
+        break;
+      case Intrinsic::cttz:
+        if (OperandNo == 0) {
+          // We need some output bits, so we need all bits of the
+          // input to the right of, and including, the rightmost bit
+          // known to be one.
+          ComputeKnownBits(BitWidth, I, nullptr);
+          AB = APInt::getLowBitsSet(BitWidth,
+                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
+        }
+        break;
+      }
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // Find the highest live output bit. We don't need any more input
+    // bits than that (adds, and thus subtracts, ripple only to the
+    // left).
+    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
+    break;
+  case Instruction::Shl:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.lshr(ShiftAmt);
+
+        // If the shift is nuw/nsw, then the high bits are not dead
+        // (because we've promised that they *must* be zero).
+        const ShlOperator *S = cast<ShlOperator>(UserI);
+        if (S->hasNoSignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+        else if (S->hasNoUnsignedWrap())
+          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::LShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<LShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::AShr:
+    if (OperandNo == 0)
+      if (ConstantInt *CI =
+            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
+        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
+        AB = AOut.shl(ShiftAmt);
+        // Because the high input bit is replicated into the
+        // high-order bits of the result, if we need any of those
+        // bits, then we must keep the highest input bit.
+        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
+            .getBoolValue())
+          AB.setBit(BitWidth-1);
+
+        // If the shift is exact, then the low bits are not dead
+        // (they must be zero).
+        if (cast<AShrOperator>(UserI)->isExact())
+          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      }
+    break;
+  case Instruction::And:
+    AB = AOut;
+
+    // For bits that are known zero, the corresponding bits in the
+    // other operand are dead (unless they're both zero, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownZero2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownZero & ~KnownZero2);
+    }
+    break;
+  case Instruction::Or:
+    AB = AOut;
+
+    // For bits that are known one, the corresponding bits in the
+    // other operand are dead (unless they're both one, in which
+    // case they can't both be dead, so just mark the LHS bits as
+    // dead).
+    if (OperandNo == 0) {
+      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+      AB &= ~KnownOne2;
+    } else {
+      if (!isa<Instruction>(UserI->getOperand(0)))
+        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
+      AB &= ~(KnownOne & ~KnownOne2);
+    }
+    break;
+  case Instruction::Xor:
+  case Instruction::PHI:
+    AB = AOut;
+    break;
+  case Instruction::Trunc:
+    AB = AOut.zext(BitWidth);
+    break;
+  case Instruction::ZExt:
+    AB = AOut.trunc(BitWidth);
+    break;
+  case Instruction::SExt:
+    AB = AOut.trunc(BitWidth);
+    // Because the high input bit is replicated into the
+    // high-order bits of the result, if we need any of those
+    // bits, then we must keep the highest input bit.
+    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
+                                      AOut.getBitWidth() - BitWidth))
+        .getBoolValue())
+      AB.setBit(BitWidth-1);
+    break;
+  case Instruction::Select:
+    if (OperandNo != 0)
+      AB = AOut;
+    break;
+  case Instruction::ICmp:
+    // Count the number of leading zeroes in each operand.
+    ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
+    auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(),
+                                     KnownZero2.countLeadingOnes());
+    AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes);
+    break;
+  }
+}
+
+bool DemandedBits::runOnFunction(Function& Fn) {
+  F = &Fn;
+  Analyzed = false;
+  return false;
+}
+
+void DemandedBits::performAnalysis() {
+  if (Analyzed)
+    // Analysis already completed for this function.
+    return;
+  Analyzed = true;
+  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  
+  Visited.clear();
+  AliveBits.clear();
+
+  SmallVector<Instruction*, 128> Worklist;
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : instructions(*F)) {
+    if (!isAlwaysLive(&I))
+      continue;
+
+    DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n");
+    // For integer-valued instructions, set up an initial empty set of alive
+    // bits and add the instruction to the work list. For other instructions
+    // add their operands to the work list (for integer values operands, mark
+    // all bits as live).
+    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+      if (!AliveBits.count(&I)) {
+        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+        Worklist.push_back(&I);
+      }
+
+      continue;
+    }
+
+    // Non-integer-typed instructions...
+    for (Use &OI : I.operands()) {
+      if (Instruction *J = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
+          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
+        Worklist.push_back(J);
+      }
+    }
+    // To save memory, we don't add I to the Visited set here. Instead, we
+    // check isAlwaysLive on every instruction when searching for dead
+    // instructions later (we need to check isAlwaysLive for the
+    // integer-typed instructions anyway).
+  }
+
+  // Propagate liveness backwards to operands.
+  while (!Worklist.empty()) {
+    Instruction *UserI = Worklist.pop_back_val();
+
+    DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI);
+    APInt AOut;
+    if (UserI->getType()->isIntegerTy()) {
+      AOut = AliveBits[UserI];
+      DEBUG(dbgs() << " Alive Out: " << AOut);
+    }
+    DEBUG(dbgs() << "\n");
+
+    if (!UserI->getType()->isIntegerTy())
+      Visited.insert(UserI);
+
+    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
+    // Compute the set of alive bits for each operand. These are anded into the
+    // existing set, if any, and if that changes the set of alive bits, the
+    // operand is added to the work-list.
+    for (Use &OI : UserI->operands()) {
+      if (Instruction *I = dyn_cast<Instruction>(OI)) {
+        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
+          unsigned BitWidth = IT->getBitWidth();
+          APInt AB = APInt::getAllOnesValue(BitWidth);
+          if (UserI->getType()->isIntegerTy() && !AOut &&
+              !isAlwaysLive(UserI)) {
+            AB = APInt(BitWidth, 0);
+          } else {
+            // If all bits of the output are dead, then all bits of the input
+            // Bits of each operand that are used to compute alive bits of the
+            // output are alive, all others are dead.
+            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
+                                     KnownZero, KnownOne,
+                                     KnownZero2, KnownOne2);
+          }
+
+          // If we've added to the set of alive bits (or the operand has not
+          // been previously visited), then re-queue the operand to be visited
+          // again.
+          APInt ABPrev(BitWidth, 0);
+          auto ABI = AliveBits.find(I);
+          if (ABI != AliveBits.end())
+            ABPrev = ABI->second;
+
+          APInt ABNew = AB | ABPrev;
+          if (ABNew != ABPrev || ABI == AliveBits.end()) {
+            AliveBits[I] = std::move(ABNew);
+            Worklist.push_back(I);
+          }
+        } else if (!Visited.count(I)) {
+          Worklist.push_back(I);
+        }
+      }
+    }
+  }
+}
+
+APInt DemandedBits::getDemandedBits(Instruction *I) {
+  performAnalysis();
+  
+  const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
+  if (AliveBits.count(I))
+    return AliveBits[I];
+  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
+}
+
+bool DemandedBits::isInstructionDead(Instruction *I) {
+  performAnalysis();
+
+  return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
+    !isAlwaysLive(I);
+}
+
+void DemandedBits::print(raw_ostream &OS, const Module *M) const {
+  // This is gross. But the alternative is making all the state mutable
+  // just because of this one debugging method.
+  const_cast<DemandedBits*>(this)->performAnalysis();
+  for (auto &KV : AliveBits) {
+    OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for "
+       << *KV.first << "\n";
+  }
+}
+
+FunctionPass *llvm::createDemandedBitsPass() {
+  return new DemandedBits();
+}
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index 4826ac407d7f..4040ad3cacd5 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
 INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
                       "Dependence Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(DependenceAnalysis, "da",
                     "Dependence Analysis", true, true)
 
@@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() {
 
 bool DependenceAnalysis::runOnFunction(Function &F) {
   this->F = &F;
-  AA = &getAnalysis<AliasAnalysis>();
-  SE = &getAnalysis<ScalarEvolution>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   return false;
 }
@@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() {
 
 void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequiredTransitive<AliasAnalysis>();
-  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.addRequiredTransitive<AAResultsWrapperPass>();
+  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
   AU.addRequiredTransitive<LoopInfoWrapperPass>();
 }
 
@@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,
     : Dependence(Source, Destination), Levels(CommonLevels),
       LoopIndependent(PossiblyLoopIndependent) {
   Consistent = true;
-  DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
+  if (CommonLevels)
+    DV = make_unique<DVEntry[]>(CommonLevels);
 }
 
 // The rest are simple getters that hide the implementation.
@@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
 void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
                                                  const Loop *CurLoop) {
   Kind = Distance;
-  A = SE->getConstant(D->getType(), 1);
+  A = SE->getOne(D->getType());
   B = SE->getNegativeSCEV(A);
   C = SE->getNegativeSCEV(D);
   AssociatedLoop = CurLoop;
@@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
       if (!C1B2_C2B1 || !C1A2_C2A1 ||
           !A1B2_A2B1 || !A2B1_A1B2)
         return false;
-      APInt Xtop = C1B2_C2B1->getValue()->getValue();
-      APInt Xbot = A1B2_A2B1->getValue()->getValue();
-      APInt Ytop = C1A2_C2A1->getValue()->getValue();
-      APInt Ybot = A2B1_A1B2->getValue()->getValue();
+      APInt Xtop = C1B2_C2B1->getAPInt();
+      APInt Xbot = A1B2_A2B1->getAPInt();
+      APInt Ytop = C1A2_C2A1->getAPInt();
+      APInt Ybot = A2B1_A1B2->getAPInt();
       DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
       DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
       DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
@@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,
       }
       if (const SCEVConstant *CUB =
           collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
-        APInt UpperBound = CUB->getValue()->getValue();
+        APInt UpperBound = CUB->getAPInt();
         DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
           X->setEmpty();
@@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
                                           const Value *B) {
   const Value *AObj = GetUnderlyingObject(A, DL);
   const Value *BObj = GetUnderlyingObject(B, DL);
-  return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
-                   BObj, AA->getTypeStoreSize(BObj->getType()));
+  return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()),
+                   BObj, DL.getTypeStoreSize(BObj->getType()));
 }
 
 
@@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
 
   // Can we compute distance?
   if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
-    APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
-    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
+    APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt();
+    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();
     APInt Distance  = ConstDelta; // these need to be initialized
     APInt Remainder = ConstDelta;
     APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
@@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
   assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
 
   // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
-  SplitIter =
-    SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
-                                    Delta),
-                    SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
-                                   ConstCoeff));
+  SplitIter = SE->getUDivExpr(
+      SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta),
+      SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));
   DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
 
   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
@@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
         return true;
       }
       Result.DV[Level].Splitable = false;
-      Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
+      Result.DV[Level].Distance = SE->getZero(Delta->getType());
       return false;
     }
   }
 
   // check that Coeff divides Delta
-  APInt APDelta = ConstDelta->getValue()->getValue();
-  APInt APCoeff = ConstCoeff->getValue()->getValue();
+  APInt APDelta = ConstDelta->getAPInt();
+  APInt APCoeff = ConstCoeff->getAPInt();
   APInt Distance = APDelta; // these need to be initialzed
   APInt Remainder = APDelta;
   APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
@@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
 
   // find gcd
   APInt G, X, Y;
-  APInt AM = ConstSrcCoeff->getValue()->getValue();
-  APInt BM = ConstDstCoeff->getValue()->getValue();
+  APInt AM = ConstSrcCoeff->getAPInt();
+  APInt BM = ConstDstCoeff->getAPInt();
   unsigned Bits = AM.getBitWidth();
-  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
     // gcd doesn't divide Delta, no dependence
     ++ExactSIVindependence;
     ++ExactSIVsuccesses;
@@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
   // UM is perhaps unavailable, let's check
   if (const SCEVConstant *CUB =
       collectConstantUpperBound(CurLoop, Delta->getType())) {
-    UM = CUB->getValue()->getValue();
+    UM = CUB->getAPInt();
     DEBUG(dbgs() << "\t    UM = " << UM << "\n");
     UMvalid = true;
   }
@@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
 static
 bool isRemainderZero(const SCEVConstant *Dividend,
                      const SCEVConstant *Divisor) {
-  APInt ConstDividend = Dividend->getValue()->getValue();
-  APInt ConstDivisor = Divisor->getValue()->getValue();
+  APInt ConstDividend = Dividend->getAPInt();
+  APInt ConstDivisor = Divisor->getAPInt();
   return ConstDividend.srem(ConstDivisor) == 0;
 }
 
@@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   Level--;
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
-  NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
-                        DstCoeff, Delta, CurLoop);
+  NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
+                        CurLoop);
   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
     if (Level < CommonLevels) {
@@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   Level--;
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
-  NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
-                        Delta, CurLoop);
+  NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
+                        CurLoop);
   DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
     if (Level < CommonLevels) {
@@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
 
   // find gcd
   APInt G, X, Y;
-  APInt AM = ConstSrcCoeff->getValue()->getValue();
-  APInt BM = ConstDstCoeff->getValue()->getValue();
+  APInt AM = ConstSrcCoeff->getAPInt();
+  APInt BM = ConstDstCoeff->getAPInt();
   unsigned Bits = AM.getBitWidth();
-  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
+  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {
     // gcd doesn't divide Delta, no dependence
     ++ExactRDIVindependence;
     return true;
@@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
   // SrcUM is perhaps unavailable, let's check
   if (const SCEVConstant *UpperBound =
       collectConstantUpperBound(SrcLoop, Delta->getType())) {
-    SrcUM = UpperBound->getValue()->getValue();
+    SrcUM = UpperBound->getAPInt();
     DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
     SrcUMvalid = true;
   }
@@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
   // UM is perhaps unavailable, let's check
   if (const SCEVConstant *UpperBound =
       collectConstantUpperBound(DstLoop, Delta->getType())) {
-    DstUM = UpperBound->getValue()->getValue();
+    DstUM = UpperBound->getAPInt();
     DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
     DstUMvalid = true;
   }
@@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
       Constant = getConstantPart(Product);
     if (!Constant)
       return false;
-    APInt ConstCoeff = Constant->getValue()->getValue();
+    APInt ConstCoeff = Constant->getAPInt();
     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
     Coefficients = AddRec->getStart();
   }
@@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
       Constant = getConstantPart(Product);
     if (!Constant)
       return false;
-    APInt ConstCoeff = Constant->getValue()->getValue();
+    APInt ConstCoeff = Constant->getAPInt();
     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
     Coefficients = AddRec->getStart();
   }
@@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
         const SCEVConstant *ConstOp = getConstantPart(Product);
         if (!ConstOp)
           return false;
-        APInt ConstOpValue = ConstOp->getValue()->getValue();
+        APInt ConstOpValue = ConstOp->getAPInt();
         ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
                                                    ConstOpValue.abs());
       }
@@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
   }
   if (!Constant)
     return false;
-  APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
+  APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();
   DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
   if (ConstDelta == 0)
     return false;
@@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
           Constant = getConstantPart(Product);
         else
           Constant = cast<SCEVConstant>(Coeff);
-        APInt ConstCoeff = Constant->getValue()->getValue();
+        APInt ConstCoeff = Constant->getAPInt();
         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
       }
       Inner = AddRec->getStart();
@@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
           Constant = getConstantPart(Product);
         else
           Constant = cast<SCEVConstant>(Coeff);
-        APInt ConstCoeff = Constant->getValue()->getValue();
+        APInt ConstCoeff = Constant->getAPInt();
         RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
       }
       Inner = AddRec->getStart();
@@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
       // or constant, in which case we give up on this direction.
       continue;
     }
-    APInt ConstCoeff = Constant->getValue()->getValue();
+    APInt ConstCoeff = Constant->getAPInt();
     RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
     DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
     if (RunningGCD != 0) {
@@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
     // If the difference is 0, we won't need to know the number of iterations.
     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
       Bound[K].Lower[Dependence::DVEntry::ALL] =
-        SE->getConstant(A[K].Coeff->getType(), 0);
+          SE->getZero(A[K].Coeff->getType());
     if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
       Bound[K].Upper[Dependence::DVEntry::ALL] =
-        SE->getConstant(A[K].Coeff->getType(), 0);
+          SE->getZero(A[K].Coeff->getType());
   }
 }
 
@@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
   Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
   Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
-    const SCEV *Iter_1 =
-      SE->getMinusSCEV(Bound[K].Iterations,
-                       SE->getConstant(Bound[K].Iterations->getType(), 1));
+    const SCEV *Iter_1 = SE->getMinusSCEV(
+        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
     const SCEV *NegPart =
       getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
     Bound[K].Lower[Dependence::DVEntry::LT] =
@@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
   Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
   Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
   if (Bound[K].Iterations) {
-    const SCEV *Iter_1 =
-      SE->getMinusSCEV(Bound[K].Iterations,
-                       SE->getConstant(Bound[K].Iterations->getType(), 1));
+    const SCEV *Iter_1 = SE->getMinusSCEV(
+        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));
     const SCEV *NegPart =
       getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
     Bound[K].Lower[Dependence::DVEntry::GT] =
@@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
 
 // X^+ = max(X, 0)
 const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
-  return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
+  return SE->getSMaxExpr(X, SE->getZero(X->getType()));
 }
 
 
 // X^- = min(X, 0)
 const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
-  return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
+  return SE->getSMinExpr(X, SE->getZero(X->getType()));
 }
 
 
@@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo *
 DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
                                      bool SrcFlag,
                                      const SCEV *&Constant) const {
-  const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
+  const SCEV *Zero = SE->getZero(Subscript->getType());
   CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
   for (unsigned K = 1; K <= MaxLevels; ++K) {
     CI[K].Coeff = Zero;
@@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
                                                 const Loop *TargetLoop)  const {
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
   if (!AddRec)
-    return SE->getConstant(Expr->getType(), 0);
+    return SE->getZero(Expr->getType());
   if (AddRec->getLoop() == TargetLoop)
     return AddRec->getStepRecurrence(*SE);
   return findCoefficient(AddRec->getStart(), TargetLoop);
@@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
     const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
     if (!Bconst || !Cconst) return false;
-    APInt Beta = Bconst->getValue()->getValue();
-    APInt Charlie = Cconst->getValue()->getValue();
+    APInt Beta = Bconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
     APInt CdivB = Charlie.sdiv(Beta);
     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
     const SCEV *AP_K = findCoefficient(Dst, CurLoop);
@@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
     if (!Aconst || !Cconst) return false;
-    APInt Alpha = Aconst->getValue()->getValue();
-    APInt Charlie = Cconst->getValue()->getValue();
+    APInt Alpha = Aconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
     const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,
     const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
     const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
     if (!Aconst || !Cconst) return false;
-    APInt Alpha = Aconst->getValue()->getValue();
-    APInt Charlie = Cconst->getValue()->getValue();
+    APInt Alpha = Aconst->getAPInt();
+    APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
     const SCEV *A_K = findCoefficient(Src, CurLoop);
@@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
 /// source and destination array references are recurrences on a nested loop,
 /// this function flattens the nested recurrences into separate recurrences
 /// for each loop level.
-bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
-                                        const SCEV *DstSCEV,
-                                        SmallVectorImpl<Subscript> &Pair,
-                                        const SCEV *ElementSize) {
+bool DependenceAnalysis::tryDelinearize(Instruction *Src,
+                                        Instruction *Dst,
+                                        SmallVectorImpl<Subscript> &Pair)
+{
+  Value *SrcPtr = getPointerOperand(Src);
+  Value *DstPtr = getPointerOperand(Dst);
+
+  Loop *SrcLoop = LI->getLoopFor(Src->getParent());
+  Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+
+  // Below code mimics the code in Delinearization.cpp
+  const SCEV *SrcAccessFn =
+    SE->getSCEVAtScope(SrcPtr, SrcLoop);
+  const SCEV *DstAccessFn =
+    SE->getSCEVAtScope(DstPtr, DstLoop);
+
   const SCEVUnknown *SrcBase =
-      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
   const SCEVUnknown *DstBase =
-      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV));
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
 
   if (!SrcBase || !DstBase || SrcBase != DstBase)
     return false;
 
-  SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase);
-  DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase);
+  const SCEV *ElementSize = SE->getElementSize(Src);
+  if (ElementSize != SE->getElementSize(Dst))
+    return false;
+
+  const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase);
+  const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);
 
   const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
   const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
@@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) {
 }
 #endif
 
-
 // depends -
 // Returns NULL if there is no dependence.
 // Otherwise, return a Dependence with as many details as possible.
@@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
     Pair[0].Dst = DstSCEV;
   }
 
-  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
-      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
-    DEBUG(dbgs() << "    delinerized GEP\n");
-    Pairs = Pair.size();
+  if (Delinearize && CommonLevels > 1) {
+    if (tryDelinearize(Src, Dst, Pair)) {
+      DEBUG(dbgs() << "    delinerized GEP\n");
+      Pairs = Pair.size();
+    }
   }
 
   for (unsigned P = 0; P < Pairs; ++P) {
@@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,
       return nullptr;
   }
 
-  auto Final = make_unique<FullDependence>(Result);
-  Result.DV = nullptr;
-  return std::move(Final);
+  return make_unique<FullDependence>(std::move(Result));
 }
 
 
@@ -3852,10 +3863,11 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,
     Pair[0].Dst = DstSCEV;
   }
 
-  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
-      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
-    DEBUG(dbgs() << "    delinerized GEP\n");
-    Pairs = Pair.size();
+  if (Delinearize && CommonLevels > 1) {
+    if (tryDelinearize(Src, Dst, Pair)) {
+      DEBUG(dbgs() << "    delinerized GEP\n");
+      Pairs = Pair.size();
+    }
   }
 
   for (unsigned P = 0; P < Pairs; ++P) {
diff --git a/lib/Analysis/DivergenceAnalysis.cpp b/lib/Analysis/DivergenceAnalysis.cpp
index e5ee2959c15d..5ae6d74130a7 100644
--- a/lib/Analysis/DivergenceAnalysis.cpp
+++ b/lib/Analysis/DivergenceAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===//
+//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines divergence analysis which determines whether a branch in a
-// GPU program is divergent. It can help branch optimizations such as jump
+// This file implements divergence analysis which determines whether a branch
+// in a GPU program is divergent.It can help branch optimizations such as jump
 // threading and loop unswitching to make better decisions.
 //
 // GPU programs typically use the SIMD execution model, where multiple threads
@@ -61,75 +61,31 @@
 // 2. memory as black box. It conservatively considers values loaded from
 //    generic or local address as divergent. This can be improved by leveraging
 //    pointer analysis.
+//
 //===----------------------------------------------------------------------===//
 
-#include <vector>
-#include "llvm/IR/Dominators.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
+#include <vector>
 using namespace llvm;
 
-#define DEBUG_TYPE "divergence"
-
-namespace {
-class DivergenceAnalysis : public FunctionPass {
-public:
-  static char ID;
-
-  DivergenceAnalysis() : FunctionPass(ID) {
-    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<PostDominatorTree>();
-    AU.setPreservesAll();
-  }
-
-  bool runOnFunction(Function &F) override;
-
-  // Print all divergent branches in the function.
-  void print(raw_ostream &OS, const Module *) const override;
-
-  // Returns true if V is divergent.
-  bool isDivergent(const Value *V) const { return DivergentValues.count(V); }
-  // Returns true if V is uniform/non-divergent.
-  bool isUniform(const Value *V) const { return !isDivergent(V); }
-
-private:
-  // Stores all divergent values.
-  DenseSet<const Value *> DivergentValues;
-};
-} // End of anonymous namespace
-
-// Register this pass.
-char DivergenceAnalysis::ID = 0;
-INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
-                      false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
-                    false, true)
-
 namespace {
 
 class DivergencePropagator {
 public:
-  DivergencePropagator(Function &F, TargetTransformInfo &TTI,
-                       DominatorTree &DT, PostDominatorTree &PDT,
-                       DenseSet<const Value *> &DV)
+  DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
+                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)
       : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}
   void populateWithSourcesOfDivergence();
   void propagate();
@@ -140,7 +96,7 @@ private:
   // A helper function that explores sync dependents of TI.
   void exploreSyncDependency(TerminatorInst *TI);
   // Computes the influence region from Start to End. This region includes all
-  // basic blocks on any path from Start to End.
+  // basic blocks on any simple path from Start to End.
   void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
                               DenseSet<BasicBlock *> &InfluenceRegion);
   // Finds all users of I that are outside the influence region, and add these
@@ -153,13 +109,13 @@ private:
   DominatorTree &DT;
   PostDominatorTree &PDT;
   std::vector<Value *> Worklist; // Stack for DFS.
-  DenseSet<const Value *> &DV; // Stores all divergent values.
+  DenseSet<const Value *> &DV;   // Stores all divergent values.
 };
 
 void DivergencePropagator::populateWithSourcesOfDivergence() {
   Worklist.clear();
   DV.clear();
-  for (auto &I : inst_range(F)) {
+  for (auto &I : instructions(F)) {
     if (TTI.isSourceOfDivergence(&I)) {
       Worklist.push_back(&I);
       DV.insert(&I);
@@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {
   for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
     // A PHINode is uniform if it returns the same value no matter which path is
     // taken.
-    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second)
-      Worklist.push_back(I);
+    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second)
+      Worklist.push_back(&*I);
   }
 
   // Propagation rule 2: if a value defined in a loop is used outside, the user
@@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion(
   }
 }
 
+// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
+// to the influence region.
+static void
+addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
+                               DenseSet<BasicBlock *> &InfluenceRegion,
+                               std::vector<BasicBlock *> &InfluenceStack) {
+  for (BasicBlock *Succ : successors(ThisBB)) {
+    if (Succ != End && InfluenceRegion.insert(Succ).second)
+      InfluenceStack.push_back(Succ);
+  }
+}
+
 void DivergencePropagator::computeInfluenceRegion(
     BasicBlock *Start, BasicBlock *End,
     DenseSet<BasicBlock *> &InfluenceRegion) {
   assert(PDT.properlyDominates(End, Start) &&
          "End does not properly dominate Start");
+
+  // The influence region starts from the end of "Start" to the beginning of
+  // "End". Therefore, "Start" should not be in the region unless "Start" is in
+  // a loop that doesn't contain "End".
   std::vector<BasicBlock *> InfluenceStack;
-  InfluenceStack.push_back(Start);
-  InfluenceRegion.insert(Start);
+  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
   while (!InfluenceStack.empty()) {
     BasicBlock *BB = InfluenceStack.back();
     InfluenceStack.pop_back();
-    for (BasicBlock *Succ : successors(BB)) {
-      if (End != Succ && InfluenceRegion.insert(Succ).second)
-        InfluenceStack.push_back(Succ);
-    }
+    addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
   }
 }
 
@@ -286,10 +254,25 @@ void DivergencePropagator::propagate() {
 
 } /// end namespace anonymous
 
+// Register this pass.
+char DivergenceAnalysis::ID = 0;
+INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis",
+                    false, true)
+
 FunctionPass *llvm::createDivergenceAnalysisPass() {
   return new DivergenceAnalysis();
 }
 
+void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<DominatorTreeWrapperPass>();
+  AU.addRequired<PostDominatorTree>();
+  AU.setPreservesAll();
+}
+
 bool DivergenceAnalysis::runOnFunction(Function &F) {
   auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
   if (TTIWP == nullptr)
@@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {
     if (DivergentValues.count(&Arg))
       OS << "DIVERGENT:  " << Arg << "\n";
   }
-  // Iterate instructions using inst_range to ensure a deterministic order.
-  for (auto &I : inst_range(F)) {
+  // Iterate instructions using instructions() to ensure a deterministic order.
+  for (auto &I : instructions(F)) {
     if (DivergentValues.count(&I))
       OS << "DIVERGENT:" << I << "\n";
   }
diff --git a/lib/Analysis/EHPersonalities.cpp b/lib/Analysis/EHPersonalities.cpp
new file mode 100644
index 000000000000..01be8b38fadd
--- /dev/null
+++ b/lib/Analysis/EHPersonalities.cpp
@@ -0,0 +1,106 @@
+//===- EHPersonalities.cpp - Compute EH-related information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// See if the given exception handling personality function is one that we
+/// understand.  If so, return a description of it; otherwise return Unknown.
+EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
+  const Function *F =
+      Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr;
+  if (!F)
+    return EHPersonality::Unknown;
+  return StringSwitch<EHPersonality>(F->getName())
+    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
+    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX)
+    .Case("__gcc_personality_v0",  EHPersonality::GNU_C)
+    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
+    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH)
+    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH)
+    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH)
+    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX)
+    .Case("ProcessCLRException",   EHPersonality::CoreCLR)
+    .Default(EHPersonality::Unknown);
+}
+
+bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
+  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
+  // We can't simplify any invokes to nounwind functions if the personality
+  // function wants to catch asynch exceptions.  The nounwind attribute only
+  // implies that the function does not throw synchronous exceptions.
+  return !isAsynchronousEHPersonality(Personality);
+}
+
+DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
+  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist;
+  BasicBlock *EntryBlock = &F.getEntryBlock();
+  DenseMap<BasicBlock *, ColorVector> BlockColors;
+
+  // Build up the color map, which maps each block to its set of 'colors'.
+  // For any block B the "colors" of B are the set of funclets F (possibly
+  // including a root "funclet" representing the main function) such that
+  // F will need to directly contain B or a copy of B (where the term "directly
+  // contain" is used to distinguish from being "transitively contained" in
+  // a nested funclet).
+  //
+  // Note: Despite not being a funclet in the truest sense, a catchswitch is
+  // considered to belong to its own funclet for the purposes of coloring.
+
+  DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for "
+                                                  << F.getName() << "\n");
+
+  Worklist.push_back({EntryBlock, EntryBlock});
+
+  while (!Worklist.empty()) {
+    BasicBlock *Visiting;
+    BasicBlock *Color;
+    std::tie(Visiting, Color) = Worklist.pop_back_val();
+    DEBUG_WITH_TYPE("winehprepare-coloring",
+                    dbgs() << "Visiting " << Visiting->getName() << ", "
+                           << Color->getName() << "\n");
+    Instruction *VisitingHead = Visiting->getFirstNonPHI();
+    if (VisitingHead->isEHPad()) {
+      // Mark this funclet head as a member of itself.
+      Color = Visiting;
+    }
+    // Note that this is a member of the given color.
+    ColorVector &Colors = BlockColors[Visiting];
+    if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+      Colors.push_back(Color);
+    else
+      continue;
+
+    DEBUG_WITH_TYPE("winehprepare-coloring",
+                    dbgs() << "  Assigned color \'" << Color->getName()
+                           << "\' to block \'" << Visiting->getName()
+                           << "\'.\n");
+
+    BasicBlock *SuccColor = Color;
+    TerminatorInst *Terminator = Visiting->getTerminator();
+    if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) {
+      Value *ParentPad = CatchRet->getParentPad();
+      if (isa<ConstantTokenNone>(ParentPad))
+        SuccColor = EntryBlock;
+      else
+        SuccColor = cast<Instruction>(ParentPad)->getParent();
+    }
+
+    for (BasicBlock *Succ : successors(Visiting))
+      Worklist.push_back({Succ, SuccColor});
+  }
+  return BlockColors;
+}
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
new file mode 100644
index 000000000000..ab2263ae374e
--- /dev/null
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -0,0 +1,1002 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure").  For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "globalsmodref-aa"
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+          "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+// An option to enable unsafe alias results from the GlobalsModRef analysis.
+// When enabled, GlobalsModRef will provide no-alias results which in extremely
+// rare cases may not be conservatively correct. In particular, in the face of
+// transforms which cause assymetry between how effective GetUnderlyingObject
+// is for two pointers, it may produce incorrect results.
+//
+// These unsafe results have been returned by GMR for many years without
+// causing significant issues in the wild and so we provide a mechanism to
+// re-enable them for users of LLVM that have a particular performance
+// sensitivity and no known issues. The option also makes it easy to evaluate
+// the performance impact of these results.
+static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults(
+    "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden);
+
+/// The mod/ref information collected for a particular function.
+///
+/// We collect information about mod/ref behavior of a function here, both in
+/// general and as pertains to specific globals. We only have this detailed
+/// information when we know *something* useful about the behavior. If we
+/// saturate to fully general mod/ref, we remove the info for the function.
+class GlobalsAAResult::FunctionInfo {
+  typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType;
+
+  /// Build a wrapper struct that has 8-byte alignment. All heap allocations
+  /// should provide this much alignment at least, but this makes it clear we
+  /// specifically rely on this amount of alignment.
+  struct LLVM_ALIGNAS(8) AlignedMap {
+    AlignedMap() {}
+    AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {}
+    GlobalInfoMapType Map;
+  };
+
+  /// Pointer traits for our aligned map.
+  struct AlignedMapPointerTraits {
+    static inline void *getAsVoidPointer(AlignedMap *P) { return P; }
+    static inline AlignedMap *getFromVoidPointer(void *P) {
+      return (AlignedMap *)P;
+    }
+    enum { NumLowBitsAvailable = 3 };
+    static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+                  "AlignedMap insufficiently aligned to have enough low bits.");
+  };
+
+  /// The bit that flags that this function may read any global. This is
+  /// chosen to mix together with ModRefInfo bits.
+  enum { MayReadAnyGlobal = 4 };
+
+  /// Checks to document the invariants of the bit packing here.
+  static_assert((MayReadAnyGlobal & MRI_ModRef) == 0,
+                "ModRef and the MayReadAnyGlobal flag bits overlap.");
+  static_assert(((MayReadAnyGlobal | MRI_ModRef) >>
+                 AlignedMapPointerTraits::NumLowBitsAvailable) == 0,
+                "Insufficient low bits to store our flag and ModRef info.");
+
+public:
+  FunctionInfo() : Info() {}
+  ~FunctionInfo() {
+    delete Info.getPointer();
+  }
+  // Spell out the copy ond move constructors and assignment operators to get
+  // deep copy semantics and correct move semantics in the face of the
+  // pointer-int pair.
+  FunctionInfo(const FunctionInfo &Arg)
+      : Info(nullptr, Arg.Info.getInt()) {
+    if (const auto *ArgPtr = Arg.Info.getPointer())
+      Info.setPointer(new AlignedMap(*ArgPtr));
+  }
+  FunctionInfo(FunctionInfo &&Arg)
+      : Info(Arg.Info.getPointer(), Arg.Info.getInt()) {
+    Arg.Info.setPointerAndInt(nullptr, 0);
+  }
+  FunctionInfo &operator=(const FunctionInfo &RHS) {
+    delete Info.getPointer();
+    Info.setPointerAndInt(nullptr, RHS.Info.getInt());
+    if (const auto *RHSPtr = RHS.Info.getPointer())
+      Info.setPointer(new AlignedMap(*RHSPtr));
+    return *this;
+  }
+  FunctionInfo &operator=(FunctionInfo &&RHS) {
+    delete Info.getPointer();
+    Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt());
+    RHS.Info.setPointerAndInt(nullptr, 0);
+    return *this;
+  }
+
+  /// Returns the \c ModRefInfo info for this function.
+  ModRefInfo getModRefInfo() const {
+    return ModRefInfo(Info.getInt() & MRI_ModRef);
+  }
+
+  /// Adds new \c ModRefInfo for this function to its state.
+  void addModRefInfo(ModRefInfo NewMRI) {
+    Info.setInt(Info.getInt() | NewMRI);
+  }
+
+  /// Returns whether this function may read any global variable, and we don't
+  /// know which global.
+  bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; }
+
+  /// Sets this function as potentially reading from any global.
+  void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); }
+
+  /// Returns the \c ModRefInfo info for this function w.r.t. a particular
+  /// global, which may be more precise than the general information above.
+  ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const {
+    ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef;
+    if (AlignedMap *P = Info.getPointer()) {
+      auto I = P->Map.find(&GV);
+      if (I != P->Map.end())
+        GlobalMRI = ModRefInfo(GlobalMRI | I->second);
+    }
+    return GlobalMRI;
+  }
+
+  /// Add mod/ref info from another function into ours, saturating towards
+  /// MRI_ModRef.
+  void addFunctionInfo(const FunctionInfo &FI) {
+    addModRefInfo(FI.getModRefInfo());
+
+    if (FI.mayReadAnyGlobal())
+      setMayReadAnyGlobal();
+
+    if (AlignedMap *P = FI.Info.getPointer())
+      for (const auto &G : P->Map)
+        addModRefInfoForGlobal(*G.first, G.second);
+  }
+
+  void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) {
+    AlignedMap *P = Info.getPointer();
+    if (!P) {
+      P = new AlignedMap();
+      Info.setPointer(P);
+    }
+    auto &GlobalMRI = P->Map[&GV];
+    GlobalMRI = ModRefInfo(GlobalMRI | NewMRI);
+  }
+
+  /// Clear a global's ModRef info. Should be used when a global is being
+  /// deleted.
+  void eraseModRefInfoForGlobal(const GlobalValue &GV) {
+    if (AlignedMap *P = Info.getPointer())
+      P->Map.erase(&GV);
+  }
+
+private:
+  /// All of the information is encoded into a single pointer, with a three bit
+  /// integer in the low three bits. The high bit provides a flag for when this
+  /// function may read any global. The low two bits are the ModRefInfo. And
+  /// the pointer, when non-null, points to a map from GlobalValue to
+  /// ModRefInfo specific to that GlobalValue.
+  PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info;
+};
+
+void GlobalsAAResult::DeletionCallbackHandle::deleted() {
+  Value *V = getValPtr();
+  if (auto *F = dyn_cast<Function>(V))
+    GAR->FunctionInfos.erase(F);
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (GAR->NonAddressTakenGlobals.erase(GV)) {
+      // This global might be an indirect global.  If so, remove it and
+      // remove any AllocRelatedValues for it.
+      if (GAR->IndirectGlobals.erase(GV)) {
+        // Remove any entries in AllocsForIndirectGlobals for this global.
+        for (auto I = GAR->AllocsForIndirectGlobals.begin(),
+                  E = GAR->AllocsForIndirectGlobals.end();
+             I != E; ++I)
+          if (I->second == GV)
+            GAR->AllocsForIndirectGlobals.erase(I);
+      }
+
+      // Scan the function info we have collected and remove this global
+      // from all of them.
+      for (auto &FIPair : GAR->FunctionInfos)
+        FIPair.second.eraseModRefInfoForGlobal(*GV);
+    }
+  }
+
+  // If this is an allocation related to an indirect global, remove it.
+  GAR->AllocsForIndirectGlobals.erase(V);
+
+  // And clear out the handle.
+  setValPtr(nullptr);
+  GAR->Handles.erase(I);
+  // This object is now destroyed!
+}
+
+FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) {
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  if (FunctionInfo *FI = getFunctionInfo(F)) {
+    if (FI->getModRefInfo() == MRI_NoModRef)
+      Min = FMRB_DoesNotAccessMemory;
+    else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+      Min = FMRB_OnlyReadsMemory;
+  }
+
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
+}
+
+FunctionModRefBehavior
+GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) {
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
+
+  if (const Function *F = CS.getCalledFunction())
+    if (FunctionInfo *FI = getFunctionInfo(F)) {
+      if (FI->getModRefInfo() == MRI_NoModRef)
+        Min = FMRB_DoesNotAccessMemory;
+      else if ((FI->getModRefInfo() & MRI_Mod) == 0)
+        Min = FMRB_OnlyReadsMemory;
+    }
+
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
+}
+
+/// Returns the function info for the function, or null if we don't have
+/// anything useful to say about it.
+GlobalsAAResult::FunctionInfo *
+GlobalsAAResult::getFunctionInfo(const Function *F) {
+  auto I = FunctionInfos.find(F);
+  if (I != FunctionInfos.end())
+    return &I->second;
+  return nullptr;
+}
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program.  If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsAAResult::AnalyzeGlobals(Module &M) {
+  SmallPtrSet<Function *, 64> TrackedFunctions;
+  for (Function &F : M)
+    if (F.hasLocalLinkage())
+      if (!AnalyzeUsesOfPointer(&F)) {
+        // Remember that we are tracking this global.
+        NonAddressTakenGlobals.insert(&F);
+        TrackedFunctions.insert(&F);
+        Handles.emplace_front(*this, &F);
+        Handles.front().I = Handles.begin();
+        ++NumNonAddrTakenFunctions;
+      }
+
+  SmallPtrSet<Function *, 64> Readers, Writers;
+  for (GlobalVariable &GV : M.globals())
+    if (GV.hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(&GV, &Readers,
+                                GV.isConstant() ? nullptr : &Writers)) {
+        // Remember that we are tracking this global, and the mod/ref fns
+        NonAddressTakenGlobals.insert(&GV);
+        Handles.emplace_front(*this, &GV);
+        Handles.front().I = Handles.begin();
+
+        for (Function *Reader : Readers) {
+          if (TrackedFunctions.insert(Reader).second) {
+            Handles.emplace_front(*this, Reader);
+            Handles.front().I = Handles.begin();
+          }
+          FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref);
+        }
+
+        if (!GV.isConstant()) // No need to keep track of writers to constants
+          for (Function *Writer : Writers) {
+            if (TrackedFunctions.insert(Writer).second) {
+              Handles.emplace_front(*this, Writer);
+              Handles.front().I = Handles.begin();
+            }
+            FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod);
+          }
+        ++NumNonAddrTakenGlobalVars;
+
+        // If this global holds a pointer type, see if it is an indirect global.
+        if (GV.getType()->getElementType()->isPointerTy() &&
+            AnalyzeIndirectGlobalMemory(&GV))
+          ++NumIndirectGlobalVars;
+      }
+      Readers.clear();
+      Writers.clear();
+    }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true.  Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
+                                           SmallPtrSetImpl<Function *> *Readers,
+                                           SmallPtrSetImpl<Function *> *Writers,
+                                           GlobalValue *OkayStoreDest) {
+  if (!V->getType()->isPointerTy())
+    return true;
+
+  for (Use &U : V->uses()) {
+    User *I = U.getUser();
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      if (Readers)
+        Readers->insert(LI->getParent()->getParent());
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      if (V == SI->getOperand(1)) {
+        if (Writers)
+          Writers->insert(SI->getParent()->getParent());
+      } else if (SI->getOperand(1) != OkayStoreDest) {
+        return true; // Storing the pointer
+      }
+    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers))
+        return true;
+    } else if (Operator::getOpcode(I) == Instruction::BitCast) {
+      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
+        return true;
+    } else if (auto CS = CallSite(I)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      if (CS.isDataOperand(&U)) {
+        // Detect calls to free.
+        if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
+          if (Writers)
+            Writers->insert(CS->getParent()->getParent());
+        } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
+          Function *ParentF = CS->getParent()->getParent();
+          // A nocapture argument may be read from or written to, but does not
+          // escape unless the call can somehow recurse.
+          //
+          // nocapture "indicates that the callee does not make any copies of
+          // the pointer that outlive itself". Therefore if we directly or
+          // indirectly recurse, we must treat the pointer as escaping.
+          if (FunctionToSCCMap[ParentF] ==
+              FunctionToSCCMap[CS.getCalledFunction()])
+            return true;
+          if (Readers)
+            Readers->insert(ParentF);
+          if (Writers)
+            Writers->insert(ParentF);
+        } else {
+          return true; // Argument of an unknown call.
+        }
+        // If the Callee is not ReadNone, it may read the global,
+        // and if it is not ReadOnly, it may also write to it.
+        Function *CalleeF = CS.getCalledFunction();
+        if (!CalleeF->doesNotAccessMemory()) {
+          if (Readers)
+            Readers->insert(CalleeF);
+          if (Writers && !CalleeF->onlyReadsMemory())
+            Writers->insert(CalleeF);
+        }
+      }
+    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return true; // Allow comparison against null.
+    } else {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type.  See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere.  If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) {
+  // Keep track of values related to the allocation of the memory, f.e. the
+  // value produced by the malloc call and any casts.
+  std::vector<Value *> AllocRelatedValues;
+
+  // If the initializer is a valid pointer, bail.
+  if (Constant *C = GV->getInitializer())
+    if (!C->isNullValue())
+      return false;
+    
+  // Walk the user list of the global.  If we find anything other than a direct
+  // load or store, bail out.
+  for (User *U : GV->users()) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // The pointer loaded from the global can only be used in simple ways:
+      // we allow addressing of it and loading storing to it.  We do *not* allow
+      // storing the loaded pointer somewhere else or passing to a function.
+      if (AnalyzeUsesOfPointer(LI))
+        return false; // Loaded pointer escapes.
+      // TODO: Could try some IP mod/ref of the loaded pointer.
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Storing the global itself.
+      if (SI->getOperand(0) == GV)
+        return false;
+
+      // If storing the null pointer, ignore it.
+      if (isa<ConstantPointerNull>(SI->getOperand(0)))
+        continue;
+
+      // Check the value being stored.
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
+                                       GV->getParent()->getDataLayout());
+
+      if (!isAllocLikeFn(Ptr, &TLI))
+        return false; // Too hard to analyze.
+
+      // Analyze all uses of the allocation.  If any of them are used in a
+      // non-simple way (e.g. stored to another global) bail out.
+      if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr,
+                               GV))
+        return false; // Loaded pointer escapes.
+
+      // Remember that this allocation is related to the indirect global.
+      AllocRelatedValues.push_back(Ptr);
+    } else {
+      // Something complex, bail out.
+      return false;
+    }
+  }
+
+  // Okay, this is an indirect global.  Remember all of the allocations for
+  // this global in AllocsForIndirectGlobals.
+  while (!AllocRelatedValues.empty()) {
+    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+    Handles.emplace_front(*this, AllocRelatedValues.back());
+    Handles.front().I = Handles.begin();
+    AllocRelatedValues.pop_back();
+  }
+  IndirectGlobals.insert(GV);
+  Handles.emplace_front(*this, GV);
+  Handles.front().I = Handles.begin();
+  return true;
+}
+
+void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {  
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  unsigned SCCID = 0;
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    for (auto *CGN : SCC)
+      if (Function *F = CGN->getFunction())
+        FunctionToSCCMap[F] = SCCID;
+    ++SCCID;
+  }
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from.  Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+    const std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) {
+      // Calls externally or is weak - can't say anything useful. Remove any existing
+      // function records (may have been created when scanning globals).
+      for (auto *Node : SCC)
+        FunctionInfos.erase(Node->getFunction());
+      continue;
+    }
+
+    FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()];
+    bool KnowNothing = false;
+
+    // Collect the mod/ref properties due to called functions.  We only compute
+    // one mod-ref set.
+    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+      Function *F = SCC[i]->getFunction();
+      if (!F) {
+        KnowNothing = true;
+        break;
+      }
+
+      if (F->isDeclaration()) {
+        // Try to get mod/ref behaviour from function attributes.
+        if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) {
+          // Can't do better than that!
+        } else if (F->onlyReadsMemory()) {
+          FI.addModRefInfo(MRI_Ref);
+          if (!F->isIntrinsic())
+            // This function might call back into the module and read a global -
+            // consider every global as possibly being read by this function.
+            FI.setMayReadAnyGlobal();
+        } else if (F->onlyAccessesArgMemory() || 
+                   F->onlyAccessesInaccessibleMemOrArgMem()) {
+          // This function may only access (read/write) memory pointed to by its
+          // arguments. If this pointer is to a global, this escaping use of the
+          // pointer is captured in AnalyzeUsesOfPointer().
+          FI.addModRefInfo(MRI_ModRef);
+        } else {
+          FI.addModRefInfo(MRI_ModRef);
+          // Can't say anything useful unless it's an intrinsic - they don't
+          // read or write global variables of the kind considered here.
+          KnowNothing = !F->isIntrinsic();
+        }
+        continue;
+      }
+
+      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+           CI != E && !KnowNothing; ++CI)
+        if (Function *Callee = CI->second->getFunction()) {
+          if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) {
+            // Propagate function effect up.
+            FI.addFunctionInfo(*CalleeFI);
+          } else {
+            // Can't say anything about it.  However, if it is inside our SCC,
+            // then nothing needs to be done.
+            CallGraphNode *CalleeNode = CG[Callee];
+            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+              KnowNothing = true;
+          }
+        } else {
+          KnowNothing = true;
+        }
+    }
+
+    // If we can't say anything useful about this SCC, remove all SCC functions
+    // from the FunctionInfos map.
+    if (KnowNothing) {
+      for (auto *Node : SCC)
+        FunctionInfos.erase(Node->getFunction());
+      continue;
+    }
+
+    // Scan the function bodies for explicit loads or stores.
+    for (auto *Node : SCC) {
+      if (FI.getModRefInfo() == MRI_ModRef)
+        break; // The mod/ref lattice saturates here.
+      for (Instruction &I : instructions(Node->getFunction())) {
+        if (FI.getModRefInfo() == MRI_ModRef)
+          break; // The mod/ref lattice saturates here.
+
+        // We handle calls specially because the graph-relevant aspects are
+        // handled above.
+        if (auto CS = CallSite(&I)) {
+          if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) {
+            // FIXME: It is completely unclear why this is necessary and not
+            // handled by the above graph code.
+            FI.addModRefInfo(MRI_ModRef);
+          } else if (Function *Callee = CS.getCalledFunction()) {
+            // The callgraph doesn't include intrinsic calls.
+            if (Callee->isIntrinsic()) {
+              FunctionModRefBehavior Behaviour =
+                  AAResultBase::getModRefBehavior(Callee);
+              FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef));
+            }
+          }
+          continue;
+        }
+
+        // All non-call instructions we use the primary predicates for whether
+        // thay read or write memory.
+        if (I.mayReadFromMemory())
+          FI.addModRefInfo(MRI_Ref);
+        if (I.mayWriteToMemory())
+          FI.addModRefInfo(MRI_Mod);
+      }
+    }
+
+    if ((FI.getModRefInfo() & MRI_Mod) == 0)
+      ++NumReadMemFunctions;
+    if (FI.getModRefInfo() == MRI_NoModRef)
+      ++NumNoMemFunctions;
+
+    // Finally, now that we know the full effect on this SCC, clone the
+    // information to each function in the SCC.
+    // FI is a reference into FunctionInfos, so copy it now so that it doesn't
+    // get invalidated if DenseMap decides to re-hash.
+    FunctionInfo CachedFI = FI;
+    for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+      FunctionInfos[SCC[i]->getFunction()] = CachedFI;
+  }
+}
+
+// GV is a non-escaping global. V is a pointer address that has been loaded from.
+// If we can prove that V must escape, we can conclude that a load from V cannot
+// alias GV.
+static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV,
+                                               const Value *V,
+                                               int &Depth,
+                                               const DataLayout &DL) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Inputs;
+  Visited.insert(V);
+  Inputs.push_back(V);
+  do {
+    const Value *Input = Inputs.pop_back_val();
+    
+    if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) ||
+        isa<InvokeInst>(Input))
+      // Arguments to functions or returns from functions are inherently
+      // escaping, so we can immediately classify those as not aliasing any
+      // non-addr-taken globals.
+      //
+      // (Transitive) loads from a global are also safe - if this aliased
+      // another global, its address would escape, so no alias.
+      continue;
+
+    // Recurse through a limited number of selects, loads and PHIs. This is an
+    // arbitrary depth of 4, lower numbers could be used to fix compile time
+    // issues if needed, but this is generally expected to be only be important
+    // for small depths.
+    if (++Depth > 4)
+      return false;
+
+    if (auto *LI = dyn_cast<LoadInst>(Input)) {
+      Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL));
+      continue;
+    }  
+    if (auto *SI = dyn_cast<SelectInst>(Input)) {
+      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+      if (Visited.insert(LHS).second)
+        Inputs.push_back(LHS);
+      if (Visited.insert(RHS).second)
+        Inputs.push_back(RHS);
+      continue;
+    }
+    if (auto *PN = dyn_cast<PHINode>(Input)) {
+      for (const Value *Op : PN->incoming_values()) {
+        Op = GetUnderlyingObject(Op, DL);
+        if (Visited.insert(Op).second)
+          Inputs.push_back(Op);
+      }
+      continue;
+    }
+    
+    return false;
+  } while (!Inputs.empty());
+
+  // All inputs were known to be no-alias.
+  return true;
+}
+
+// There are particular cases where we can conclude no-alias between
+// a non-addr-taken global and some other underlying object. Specifically,
+// a non-addr-taken global is known to not be escaped from any function. It is
+// also incorrect for a transformation to introduce an escape of a global in
+// a way that is observable when it was not there previously. One function
+// being transformed to introduce an escape which could possibly be observed
+// (via loading from a global or the return value for example) within another
+// function is never safe. If the observation is made through non-atomic
+// operations on different threads, it is a data-race and UB. If the
+// observation is well defined, by being observed the transformation would have
+// changed program behavior by introducing the observed escape, making it an
+// invalid transform.
+//
+// This property does require that transformations which *temporarily* escape
+// a global that was not previously escaped, prior to restoring it, cannot rely
+// on the results of GMR::alias. This seems a reasonable restriction, although
+// currently there is no way to enforce it. There is also no realistic
+// optimization pass that would make this mistake. The closest example is
+// a transformation pass which does reg2mem of SSA values but stores them into
+// global variables temporarily before restoring the global variable's value.
+// This could be useful to expose "benign" races for example. However, it seems
+// reasonable to require that a pass which introduces escapes of global
+// variables in this way to either not trust AA results while the escape is
+// active, or to be forced to operate as a module pass that cannot co-exist
+// with an alias analysis such as GMR.
+bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV,
+                                                 const Value *V) {
+  // In order to know that the underlying object cannot alias the
+  // non-addr-taken global, we must know that it would have to be an escape.
+  // Thus if the underlying object is a function argument, a load from
+  // a global, or the return of a function, it cannot alias. We can also
+  // recurse through PHI nodes and select nodes provided all of their inputs
+  // resolve to one of these known-escaping roots.
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Inputs;
+  Visited.insert(V);
+  Inputs.push_back(V);
+  int Depth = 0;
+  do {
+    const Value *Input = Inputs.pop_back_val();
+
+    if (auto *InputGV = dyn_cast<GlobalValue>(Input)) {
+      // If one input is the very global we're querying against, then we can't
+      // conclude no-alias.
+      if (InputGV == GV)
+        return false;
+
+      // Distinct GlobalVariables never alias, unless overriden or zero-sized.
+      // FIXME: The condition can be refined, but be conservative for now.
+      auto *GVar = dyn_cast<GlobalVariable>(GV);
+      auto *InputGVar = dyn_cast<GlobalVariable>(InputGV);
+      if (GVar && InputGVar &&
+          !GVar->isDeclaration() && !InputGVar->isDeclaration() &&
+          !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) {
+        Type *GVType = GVar->getInitializer()->getType();
+        Type *InputGVType = InputGVar->getInitializer()->getType();
+        if (GVType->isSized() && InputGVType->isSized() &&
+            (DL.getTypeAllocSize(GVType) > 0) &&
+            (DL.getTypeAllocSize(InputGVType) > 0))
+          continue;
+      }
+
+      // Conservatively return false, even though we could be smarter
+      // (e.g. look through GlobalAliases).
+      return false;
+    }
+
+    if (isa<Argument>(Input) || isa<CallInst>(Input) ||
+        isa<InvokeInst>(Input)) {
+      // Arguments to functions or returns from functions are inherently
+      // escaping, so we can immediately classify those as not aliasing any
+      // non-addr-taken globals.
+      continue;
+    }
+    
+    // Recurse through a limited number of selects, loads and PHIs. This is an
+    // arbitrary depth of 4, lower numbers could be used to fix compile time
+    // issues if needed, but this is generally expected to be only be important
+    // for small depths.
+    if (++Depth > 4)
+      return false;
+
+    if (auto *LI = dyn_cast<LoadInst>(Input)) {
+      // A pointer loaded from a global would have been captured, and we know
+      // that the global is non-escaping, so no alias.
+      const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL);
+      if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL))
+        // The load does not alias with GV.
+        continue;
+      // Otherwise, a load could come from anywhere, so bail.
+      return false;
+    }
+    if (auto *SI = dyn_cast<SelectInst>(Input)) {
+      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL);
+      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL);
+      if (Visited.insert(LHS).second)
+        Inputs.push_back(LHS);
+      if (Visited.insert(RHS).second)
+        Inputs.push_back(RHS);
+      continue;
+    }
+    if (auto *PN = dyn_cast<PHINode>(Input)) {
+      for (const Value *Op : PN->incoming_values()) {
+        Op = GetUnderlyingObject(Op, DL);
+        if (Visited.insert(Op).second)
+          Inputs.push_back(Op);
+      }
+      continue;
+    }
+
+    // FIXME: It would be good to handle other obvious no-alias cases here, but
+    // it isn't clear how to do so reasonbly without building a small version
+    // of BasicAA into this code. We could recurse into AAResultBase::alias
+    // here but that seems likely to go poorly as we're inside the
+    // implementation of such a query. Until then, just conservatievly retun
+    // false.
+    return false;
+  } while (!Inputs.empty());
+
+  // If all the inputs to V were definitively no-alias, then V is no-alias.
+  return true;
+}
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA,
+                                   const MemoryLocation &LocB) {
+  // Get the base object these pointers point to.
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL);
+
+  // If either of the underlying values is a global, they may be non-addr-taken
+  // globals, which we can answer queries about.
+  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+  if (GV1 || GV2) {
+    // If the global's address is taken, pretend we don't know it's a pointer to
+    // the global.
+    if (GV1 && !NonAddressTakenGlobals.count(GV1))
+      GV1 = nullptr;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2))
+      GV2 = nullptr;
+
+    // If the two pointers are derived from two different non-addr-taken
+    // globals we know these can't alias.
+    if (GV1 && GV2 && GV1 != GV2)
+      return NoAlias;
+
+    // If one is and the other isn't, it isn't strictly safe but we can fake
+    // this result if necessary for performance. This does not appear to be
+    // a common problem in practice.
+    if (EnableUnsafeGlobalsModRefAliasResults)
+      if ((GV1 || GV2) && GV1 != GV2)
+        return NoAlias;
+
+    // Check for a special case where a non-escaping global can be used to
+    // conclude no-alias.
+    if ((GV1 || GV2) && GV1 != GV2) {
+      const GlobalValue *GV = GV1 ? GV1 : GV2;
+      const Value *UV = GV1 ? UV2 : UV1;
+      if (isNonEscapingGlobalNoAlias(GV, UV))
+        return NoAlias;
+    }
+
+    // Otherwise if they are both derived from the same addr-taken global, we
+    // can't know the two accesses don't overlap.
+  }
+
+  // These pointers may be based on the memory owned by an indirect global.  If
+  // so, we may be able to handle this.  First check to see if the base pointer
+  // is a direct load from an indirect global.
+  GV1 = GV2 = nullptr;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV1 = GV;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV2 = GV;
+
+  // These pointers may also be from an allocation for the indirect global.  If
+  // so, also handle them.
+  if (!GV1)
+    GV1 = AllocsForIndirectGlobals.lookup(UV1);
+  if (!GV2)
+    GV2 = AllocsForIndirectGlobals.lookup(UV2);
+
+  // Now that we know whether the two pointers are related to indirect globals,
+  // use this to disambiguate the pointers. If the pointers are based on
+  // different indirect globals they cannot alias.
+  if (GV1 && GV2 && GV1 != GV2)
+    return NoAlias;
+
+  // If one is based on an indirect global and the other isn't, it isn't
+  // strictly safe but we can fake this result if necessary for performance.
+  // This does not appear to be a common problem in practice.
+  if (EnableUnsafeGlobalsModRefAliasResults)
+    if ((GV1 || GV2) && GV1 != GV2)
+      return NoAlias;
+
+  return AAResultBase::alias(LocA, LocB);
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
+                                                     const GlobalValue *GV) {
+  if (CS.doesNotAccessMemory())
+    return MRI_NoModRef;
+  ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
+  
+  // Iterate through all the arguments to the called function. If any argument
+  // is based on GV, return the conservative result.
+  for (auto &A : CS.args()) {
+    SmallVector<Value*, 4> Objects;
+    GetUnderlyingObjects(A, Objects, DL);
+    
+    // All objects must be identified.
+    if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject))
+      return ConservativeResult;
+
+    if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+      return ConservativeResult;
+  }
+
+  // We identified all objects in the argument list, and none of them were GV.
+  return MRI_NoModRef;
+}
+
+ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS,
+                                          const MemoryLocation &Loc) {
+  unsigned Known = MRI_ModRef;
+
+  // If we are asking for mod/ref info of a direct call with a pointer to a
+  // global we are tracking, return information if we have it.
+  if (const GlobalValue *GV =
+          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
+    if (GV->hasLocalLinkage())
+      if (const Function *F = CS.getCalledFunction())
+        if (NonAddressTakenGlobals.count(GV))
+          if (const FunctionInfo *FI = getFunctionInfo(F))
+            Known = FI->getModRefInfoForGlobal(*GV) |
+              getModRefInfoForArgument(CS, GV);
+
+  if (Known == MRI_NoModRef)
+    return MRI_NoModRef; // No need to query other mod/ref analyses
+  return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc));
+}
+
+GlobalsAAResult::GlobalsAAResult(const DataLayout &DL,
+                                 const TargetLibraryInfo &TLI)
+    : AAResultBase(TLI), DL(DL) {}
+
+GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg)
+    : AAResultBase(std::move(Arg)), DL(Arg.DL),
+      NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)),
+      IndirectGlobals(std::move(Arg.IndirectGlobals)),
+      AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)),
+      FunctionInfos(std::move(Arg.FunctionInfos)),
+      Handles(std::move(Arg.Handles)) {
+  // Update the parent for each DeletionCallbackHandle.
+  for (auto &H : Handles) {
+    assert(H.GAR == &Arg);
+    H.GAR = this;
+  }
+}
+
+/*static*/ GlobalsAAResult
+GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
+                               CallGraph &CG) {
+  GlobalsAAResult Result(M.getDataLayout(), TLI);
+
+  // Discover which functions aren't recursive, to feed into AnalyzeGlobals.
+  Result.CollectSCCMembership(CG);
+
+  // Find non-addr taken globals.
+  Result.AnalyzeGlobals(M);
+
+  // Propagate on CG.
+  Result.AnalyzeCallGraph(CG, M);
+
+  return Result;
+}
+
+GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) {
+  return GlobalsAAResult::analyzeModule(M,
+                                        AM->getResult<TargetLibraryAnalysis>(M),
+                                        AM->getResult<CallGraphAnalysis>(M));
+}
+
+char GlobalsAA::PassID;
+
+char GlobalsAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa",
+                      "Globals Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa",
+                    "Globals Alias Analysis", false, true)
+
+ModulePass *llvm::createGlobalsAAWrapperPass() {
+  return new GlobalsAAWrapperPass();
+}
+
+GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) {
+  initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool GlobalsAAWrapperPass::runOnModule(Module &M) {
+  Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule(
+      M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      getAnalysis<CallGraphWrapperPass>().getCallGraph())));
+  return false;
+}
+
+bool GlobalsAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<CallGraphWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
deleted file mode 100644
index 6095136d60a1..000000000000
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-add_llvm_library(LLVMipa
-  CallGraph.cpp
-  CallGraphSCCPass.cpp
-  CallPrinter.cpp
-  GlobalsModRef.cpp
-  IPA.cpp
-  InlineCost.cpp
-  )
-
-add_dependencies(LLVMipa intrinsics_gen)
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
deleted file mode 100644
index 28fb49c89019..000000000000
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ /dev/null
@@ -1,609 +0,0 @@
-//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This simple pass provides alias and mod/ref information for global values
-// that do not have their address taken, and keeps track of whether functions
-// read or write memory (are "pure").  For this simple (but very common) case,
-// we can provide pretty accurate and useful information.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/CommandLine.h"
-#include <set>
-using namespace llvm;
-
-#define DEBUG_TYPE "globalsmodref-aa"
-
-STATISTIC(NumNonAddrTakenGlobalVars,
-          "Number of global vars without address taken");
-STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
-STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
-STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
-STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
-
-namespace {
-/// FunctionRecord - One instance of this structure is stored for every
-/// function in the program.  Later, the entries for these functions are
-/// removed if the function is found to call an external function (in which
-/// case we know nothing about it.
-struct FunctionRecord {
-  /// GlobalInfo - Maintain mod/ref info for all of the globals without
-  /// addresses taken that are read or written (transitively) by this
-  /// function.
-  std::map<const GlobalValue *, unsigned> GlobalInfo;
-
-  /// MayReadAnyGlobal - May read global variables, but it is not known which.
-  bool MayReadAnyGlobal;
-
-  unsigned getInfoForGlobal(const GlobalValue *GV) const {
-    unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
-    std::map<const GlobalValue *, unsigned>::const_iterator I =
-        GlobalInfo.find(GV);
-    if (I != GlobalInfo.end())
-      Effect |= I->second;
-    return Effect;
-  }
-
-  /// FunctionEffect - Capture whether or not this function reads or writes to
-  /// ANY memory.  If not, we can do a lot of aggressive analysis on it.
-  unsigned FunctionEffect;
-
-  FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {}
-};
-
-/// GlobalsModRef - The actual analysis pass.
-class GlobalsModRef : public ModulePass, public AliasAnalysis {
-  /// NonAddressTakenGlobals - The globals that do not have their addresses
-  /// taken.
-  std::set<const GlobalValue *> NonAddressTakenGlobals;
-
-  /// IndirectGlobals - The memory pointed to by this global is known to be
-  /// 'owned' by the global.
-  std::set<const GlobalValue *> IndirectGlobals;
-
-  /// AllocsForIndirectGlobals - If an instruction allocates memory for an
-  /// indirect global, this map indicates which one.
-  std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals;
-
-  /// FunctionInfo - For each function, keep track of what globals are
-  /// modified or read.
-  std::map<const Function *, FunctionRecord> FunctionInfo;
-
-public:
-  static char ID;
-  GlobalsModRef() : ModulePass(ID) {
-    initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnModule(Module &M) override {
-    InitializeAliasAnalysis(this, &M.getDataLayout());
-
-    // Find non-addr taken globals.
-    AnalyzeGlobals(M);
-
-    // Propagate on CG.
-    AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M);
-    return false;
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AliasAnalysis::getAnalysisUsage(AU);
-    AU.addRequired<CallGraphWrapperPass>();
-    AU.setPreservesAll(); // Does not transform code
-  }
-
-  //------------------------------------------------
-  // Implement the AliasAnalysis API
-  //
-  AliasResult alias(const MemoryLocation &LocA,
-                    const MemoryLocation &LocB) override;
-  ModRefResult getModRefInfo(ImmutableCallSite CS,
-                             const MemoryLocation &Loc) override;
-  ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                             ImmutableCallSite CS2) override {
-    return AliasAnalysis::getModRefInfo(CS1, CS2);
-  }
-
-  /// getModRefBehavior - Return the behavior of the specified function if
-  /// called from the specified call site.  The call site may be null in which
-  /// case the most generic behavior of this function should be returned.
-  ModRefBehavior getModRefBehavior(const Function *F) override {
-    ModRefBehavior Min = UnknownModRefBehavior;
-
-    if (FunctionRecord *FR = getFunctionInfo(F)) {
-      if (FR->FunctionEffect == 0)
-        Min = DoesNotAccessMemory;
-      else if ((FR->FunctionEffect & Mod) == 0)
-        Min = OnlyReadsMemory;
-    }
-
-    return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
-  }
-
-  /// getModRefBehavior - Return the behavior of the specified function if
-  /// called from the specified call site.  The call site may be null in which
-  /// case the most generic behavior of this function should be returned.
-  ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
-    ModRefBehavior Min = UnknownModRefBehavior;
-
-    if (const Function *F = CS.getCalledFunction())
-      if (FunctionRecord *FR = getFunctionInfo(F)) {
-        if (FR->FunctionEffect == 0)
-          Min = DoesNotAccessMemory;
-        else if ((FR->FunctionEffect & Mod) == 0)
-          Min = OnlyReadsMemory;
-      }
-
-    return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
-  }
-
-  void deleteValue(Value *V) override;
-  void addEscapingUse(Use &U) override;
-
-  /// getAdjustedAnalysisPointer - This method is used when a pass implements
-  /// an analysis interface through multiple inheritance.  If needed, it
-  /// should override this to adjust the this pointer as needed for the
-  /// specified pass info.
-  void *getAdjustedAnalysisPointer(AnalysisID PI) override {
-    if (PI == &AliasAnalysis::ID)
-      return (AliasAnalysis *)this;
-    return this;
-  }
-
-private:
-  /// getFunctionInfo - Return the function info for the function, or null if
-  /// we don't have anything useful to say about it.
-  FunctionRecord *getFunctionInfo(const Function *F) {
-    std::map<const Function *, FunctionRecord>::iterator I =
-        FunctionInfo.find(F);
-    if (I != FunctionInfo.end())
-      return &I->second;
-    return nullptr;
-  }
-
-  void AnalyzeGlobals(Module &M);
-  void AnalyzeCallGraph(CallGraph &CG, Module &M);
-  bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers,
-                            std::vector<Function *> &Writers,
-                            GlobalValue *OkayStoreDest = nullptr);
-  bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
-};
-}
-
-char GlobalsModRef::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
-                         "Simple mod/ref analysis for globals", false, true,
-                         false)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa",
-                       "Simple mod/ref analysis for globals", false, true,
-                       false)
-
-Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
-
-/// AnalyzeGlobals - Scan through the users of all of the internal
-/// GlobalValue's in the program.  If none of them have their "address taken"
-/// (really, their address passed to something nontrivial), record this fact,
-/// and record the functions that they are used directly in.
-void GlobalsModRef::AnalyzeGlobals(Module &M) {
-  std::vector<Function *> Readers, Writers;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (I->hasLocalLinkage()) {
-      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
-        // Remember that we are tracking this global.
-        NonAddressTakenGlobals.insert(I);
-        ++NumNonAddrTakenFunctions;
-      }
-      Readers.clear();
-      Writers.clear();
-    }
-
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E;
-       ++I)
-    if (I->hasLocalLinkage()) {
-      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
-        // Remember that we are tracking this global, and the mod/ref fns
-        NonAddressTakenGlobals.insert(I);
-
-        for (unsigned i = 0, e = Readers.size(); i != e; ++i)
-          FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
-
-        if (!I->isConstant()) // No need to keep track of writers to constants
-          for (unsigned i = 0, e = Writers.size(); i != e; ++i)
-            FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
-        ++NumNonAddrTakenGlobalVars;
-
-        // If this global holds a pointer type, see if it is an indirect global.
-        if (I->getType()->getElementType()->isPointerTy() &&
-            AnalyzeIndirectGlobalMemory(I))
-          ++NumIndirectGlobalVars;
-      }
-      Readers.clear();
-      Writers.clear();
-    }
-}
-
-/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true.  Also, while we are at it, keep track of those functions that read and
-/// write to the value.
-///
-/// If OkayStoreDest is non-null, stores into this global are allowed.
-bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
-                                         std::vector<Function *> &Readers,
-                                         std::vector<Function *> &Writers,
-                                         GlobalValue *OkayStoreDest) {
-  if (!V->getType()->isPointerTy())
-    return true;
-
-  for (Use &U : V->uses()) {
-    User *I = U.getUser();
-    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-      Readers.push_back(LI->getParent()->getParent());
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-      if (V == SI->getOperand(1)) {
-        Writers.push_back(SI->getParent()->getParent());
-      } else if (SI->getOperand(1) != OkayStoreDest) {
-        return true; // Storing the pointer
-      }
-    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) {
-      if (AnalyzeUsesOfPointer(I, Readers, Writers))
-        return true;
-    } else if (Operator::getOpcode(I) == Instruction::BitCast) {
-      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest))
-        return true;
-    } else if (auto CS = CallSite(I)) {
-      // Make sure that this is just the function being called, not that it is
-      // passing into the function.
-      if (!CS.isCallee(&U)) {
-        // Detect calls to free.
-        if (isFreeCall(I, TLI))
-          Writers.push_back(CS->getParent()->getParent());
-        else
-          return true; // Argument of an unknown call.
-      }
-    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
-      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
-        return true; // Allow comparison against null.
-    } else {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
-/// which holds a pointer type.  See if the global always points to non-aliased
-/// heap memory: that is, all initializers of the globals are allocations, and
-/// those allocations have no use other than initialization of the global.
-/// Further, all loads out of GV must directly use the memory, not store the
-/// pointer somewhere.  If this is true, we consider the memory pointed to by
-/// GV to be owned by GV and can disambiguate other pointers from it.
-bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
-  // Keep track of values related to the allocation of the memory, f.e. the
-  // value produced by the malloc call and any casts.
-  std::vector<Value *> AllocRelatedValues;
-
-  // Walk the user list of the global.  If we find anything other than a direct
-  // load or store, bail out.
-  for (User *U : GV->users()) {
-    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
-      // The pointer loaded from the global can only be used in simple ways:
-      // we allow addressing of it and loading storing to it.  We do *not* allow
-      // storing the loaded pointer somewhere else or passing to a function.
-      std::vector<Function *> ReadersWriters;
-      if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
-        return false; // Loaded pointer escapes.
-      // TODO: Could try some IP mod/ref of the loaded pointer.
-    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
-      // Storing the global itself.
-      if (SI->getOperand(0) == GV)
-        return false;
-
-      // If storing the null pointer, ignore it.
-      if (isa<ConstantPointerNull>(SI->getOperand(0)))
-        continue;
-
-      // Check the value being stored.
-      Value *Ptr = GetUnderlyingObject(SI->getOperand(0),
-                                       GV->getParent()->getDataLayout());
-
-      if (!isAllocLikeFn(Ptr, TLI))
-        return false; // Too hard to analyze.
-
-      // Analyze all uses of the allocation.  If any of them are used in a
-      // non-simple way (e.g. stored to another global) bail out.
-      std::vector<Function *> ReadersWriters;
-      if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
-        return false; // Loaded pointer escapes.
-
-      // Remember that this allocation is related to the indirect global.
-      AllocRelatedValues.push_back(Ptr);
-    } else {
-      // Something complex, bail out.
-      return false;
-    }
-  }
-
-  // Okay, this is an indirect global.  Remember all of the allocations for
-  // this global in AllocsForIndirectGlobals.
-  while (!AllocRelatedValues.empty()) {
-    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
-    AllocRelatedValues.pop_back();
-  }
-  IndirectGlobals.insert(GV);
-  return true;
-}
-
-/// AnalyzeCallGraph - At this point, we know the functions where globals are
-/// immediately stored to and read from.  Propagate this information up the call
-/// graph to all callers and compute the mod/ref info for all memory for each
-/// function.
-void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
-  // We do a bottom-up SCC traversal of the call graph.  In other words, we
-  // visit all callees before callers (leaf-first).
-  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
-    const std::vector<CallGraphNode *> &SCC = *I;
-    assert(!SCC.empty() && "SCC with no functions?");
-
-    if (!SCC[0]->getFunction()) {
-      // Calls externally - can't say anything useful.  Remove any existing
-      // function records (may have been created when scanning globals).
-      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-        FunctionInfo.erase(SCC[i]->getFunction());
-      continue;
-    }
-
-    FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
-
-    bool KnowNothing = false;
-    unsigned FunctionEffect = 0;
-
-    // Collect the mod/ref properties due to called functions.  We only compute
-    // one mod-ref set.
-    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
-      Function *F = SCC[i]->getFunction();
-      if (!F) {
-        KnowNothing = true;
-        break;
-      }
-
-      if (F->isDeclaration()) {
-        // Try to get mod/ref behaviour from function attributes.
-        if (F->doesNotAccessMemory()) {
-          // Can't do better than that!
-        } else if (F->onlyReadsMemory()) {
-          FunctionEffect |= Ref;
-          if (!F->isIntrinsic())
-            // This function might call back into the module and read a global -
-            // consider every global as possibly being read by this function.
-            FR.MayReadAnyGlobal = true;
-        } else {
-          FunctionEffect |= ModRef;
-          // Can't say anything useful unless it's an intrinsic - they don't
-          // read or write global variables of the kind considered here.
-          KnowNothing = !F->isIntrinsic();
-        }
-        continue;
-      }
-
-      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
-           CI != E && !KnowNothing; ++CI)
-        if (Function *Callee = CI->second->getFunction()) {
-          if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
-            // Propagate function effect up.
-            FunctionEffect |= CalleeFR->FunctionEffect;
-
-            // Incorporate callee's effects on globals into our info.
-            for (const auto &G : CalleeFR->GlobalInfo)
-              FR.GlobalInfo[G.first] |= G.second;
-            FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
-          } else {
-            // Can't say anything about it.  However, if it is inside our SCC,
-            // then nothing needs to be done.
-            CallGraphNode *CalleeNode = CG[Callee];
-            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
-              KnowNothing = true;
-          }
-        } else {
-          KnowNothing = true;
-        }
-    }
-
-    // If we can't say anything useful about this SCC, remove all SCC functions
-    // from the FunctionInfo map.
-    if (KnowNothing) {
-      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-        FunctionInfo.erase(SCC[i]->getFunction());
-      continue;
-    }
-
-    // Scan the function bodies for explicit loads or stores.
-    for (auto *Node : SCC) {
-      if (FunctionEffect == ModRef)
-        break; // The mod/ref lattice saturates here.
-      for (Instruction &I : inst_range(Node->getFunction())) {
-        if (FunctionEffect == ModRef)
-          break; // The mod/ref lattice saturates here.
-
-        // We handle calls specially because the graph-relevant aspects are
-        // handled above.
-        if (auto CS = CallSite(&I)) {
-          if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) {
-            // FIXME: It is completely unclear why this is necessary and not
-            // handled by the above graph code.
-            FunctionEffect |= ModRef;
-          } else if (Function *Callee = CS.getCalledFunction()) {
-            // The callgraph doesn't include intrinsic calls.
-            if (Callee->isIntrinsic()) {
-              ModRefBehavior Behaviour =
-                  AliasAnalysis::getModRefBehavior(Callee);
-              FunctionEffect |= (Behaviour & ModRef);
-            }
-          }
-          continue;
-        }
-
-        // All non-call instructions we use the primary predicates for whether
-        // thay read or write memory.
-        if (I.mayReadFromMemory())
-          FunctionEffect |= Ref;
-        if (I.mayWriteToMemory())
-          FunctionEffect |= Mod;
-      }
-    }
-
-    if ((FunctionEffect & Mod) == 0)
-      ++NumReadMemFunctions;
-    if (FunctionEffect == 0)
-      ++NumNoMemFunctions;
-    FR.FunctionEffect = FunctionEffect;
-
-    // Finally, now that we know the full effect on this SCC, clone the
-    // information to each function in the SCC.
-    for (unsigned i = 1, e = SCC.size(); i != e; ++i)
-      FunctionInfo[SCC[i]->getFunction()] = FR;
-  }
-}
-
-/// alias - If one of the pointers is to a global that we are tracking, and the
-/// other is some random pointer, we know there cannot be an alias, because the
-/// address of the global isn't taken.
-AliasResult GlobalsModRef::alias(const MemoryLocation &LocA,
-                                 const MemoryLocation &LocB) {
-  // Get the base object these pointers point to.
-  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL);
-  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL);
-
-  // If either of the underlying values is a global, they may be non-addr-taken
-  // globals, which we can answer queries about.
-  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
-  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
-  if (GV1 || GV2) {
-    // If the global's address is taken, pretend we don't know it's a pointer to
-    // the global.
-    if (GV1 && !NonAddressTakenGlobals.count(GV1))
-      GV1 = nullptr;
-    if (GV2 && !NonAddressTakenGlobals.count(GV2))
-      GV2 = nullptr;
-
-    // If the two pointers are derived from two different non-addr-taken
-    // globals, or if one is and the other isn't, we know these can't alias.
-    if ((GV1 || GV2) && GV1 != GV2)
-      return NoAlias;
-
-    // Otherwise if they are both derived from the same addr-taken global, we
-    // can't know the two accesses don't overlap.
-  }
-
-  // These pointers may be based on the memory owned by an indirect global.  If
-  // so, we may be able to handle this.  First check to see if the base pointer
-  // is a direct load from an indirect global.
-  GV1 = GV2 = nullptr;
-  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
-    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
-      if (IndirectGlobals.count(GV))
-        GV1 = GV;
-  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
-      if (IndirectGlobals.count(GV))
-        GV2 = GV;
-
-  // These pointers may also be from an allocation for the indirect global.  If
-  // so, also handle them.
-  if (AllocsForIndirectGlobals.count(UV1))
-    GV1 = AllocsForIndirectGlobals[UV1];
-  if (AllocsForIndirectGlobals.count(UV2))
-    GV2 = AllocsForIndirectGlobals[UV2];
-
-  // Now that we know whether the two pointers are related to indirect globals,
-  // use this to disambiguate the pointers.  If either pointer is based on an
-  // indirect global and if they are not both based on the same indirect global,
-  // they cannot alias.
-  if ((GV1 || GV2) && GV1 != GV2)
-    return NoAlias;
-
-  return AliasAnalysis::alias(LocA, LocB);
-}
-
-AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) {
-  unsigned Known = ModRef;
-
-  // If we are asking for mod/ref info of a direct call with a pointer to a
-  // global we are tracking, return information if we have it.
-  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
-  if (const GlobalValue *GV =
-          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL)))
-    if (GV->hasLocalLinkage())
-      if (const Function *F = CS.getCalledFunction())
-        if (NonAddressTakenGlobals.count(GV))
-          if (const FunctionRecord *FR = getFunctionInfo(F))
-            Known = FR->getInfoForGlobal(GV);
-
-  if (Known == NoModRef)
-    return NoModRef; // No need to query other mod/ref analyses
-  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
-}
-
-//===----------------------------------------------------------------------===//
-// Methods to update the analysis as a result of the client transformation.
-//
-void GlobalsModRef::deleteValue(Value *V) {
-  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
-    if (NonAddressTakenGlobals.erase(GV)) {
-      // This global might be an indirect global.  If so, remove it and remove
-      // any AllocRelatedValues for it.
-      if (IndirectGlobals.erase(GV)) {
-        // Remove any entries in AllocsForIndirectGlobals for this global.
-        for (std::map<const Value *, const GlobalValue *>::iterator
-                 I = AllocsForIndirectGlobals.begin(),
-                 E = AllocsForIndirectGlobals.end();
-             I != E;) {
-          if (I->second == GV) {
-            AllocsForIndirectGlobals.erase(I++);
-          } else {
-            ++I;
-          }
-        }
-      }
-    }
-  }
-
-  // Otherwise, if this is an allocation related to an indirect global, remove
-  // it.
-  AllocsForIndirectGlobals.erase(V);
-
-  AliasAnalysis::deleteValue(V);
-}
-
-void GlobalsModRef::addEscapingUse(Use &U) {
-  // For the purposes of this analysis, it is conservatively correct to treat
-  // a newly escaping value equivalently to a deleted one.  We could perhaps
-  // be more precise by processing the new use and attempting to update our
-  // saved analysis results to accommodate it.
-  deleteValue(U);
-
-  AliasAnalysis::addEscapingUse(U);
-}
diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp
deleted file mode 100644
index 806bfb81b6d5..000000000000
--- a/lib/Analysis/IPA/IPA.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===-- IPA.cpp -----------------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the common initialization routines for the IPA library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/InitializePasses.h"
-#include "llvm-c/Initialization.h"
-#include "llvm/PassRegistry.h"
-
-using namespace llvm;
-
-/// initializeIPA - Initialize all passes linked into the IPA library.
-void llvm::initializeIPA(PassRegistry &Registry) {
-  initializeCallGraphWrapperPassPass(Registry);
-  initializeCallGraphPrinterPass(Registry);
-  initializeCallGraphViewerPass(Registry);
-  initializeGlobalsModRefPass(Registry);
-}
-
-void LLVMInitializeIPA(LLVMPassRegistryRef R) {
-  initializeIPA(*unwrap(R));
-}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 926787d3be91..e0c5d8fa5f5a 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
 
@@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<LoopInfoWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
-  AU.addRequired<ScalarEvolution>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
   AU.setPreservesAll();
 }
 
@@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
       *L->getHeader()->getParent());
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
 
   // Collect ephemeral values so that AddUsersIfInteresting skips them.
   EphValues.clear();
@@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I);
+    (void)AddUsersIfInteresting(&*I);
 
   return false;
 }
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
similarity index 95%
rename from lib/Analysis/IPA/InlineCost.cpp
rename to lib/Analysis/InlineCost.cpp
index c0d2e375cb04..a86a703ed9d6 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// inlining has the given attribute set either at the call site or the
   /// function declaration.  Primarily used to inspect call site specific
   /// attributes since these can be more precise than the ones on the callee
-  /// itself. 
+  /// itself.
   bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);
   
   /// Return true if the given value is known non null within the callee if
-  /// inlined through this particular callsite. 
+  /// inlined through this particular callsite.
   bool isKnownNonNullInCallee(Value *V);
 
   // Custom analysis routines.
@@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitSwitchInst(SwitchInst &SI);
   bool visitIndirectBrInst(IndirectBrInst &IBI);
   bool visitResumeInst(ResumeInst &RI);
+  bool visitCleanupReturnInst(CleanupReturnInst &RI);
+  bool visitCatchReturnInst(CatchReturnInst &RI);
   bool visitUnreachableInst(UnreachableInst &I);
 
 public:
@@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
-    // bonus we want to apply, but don't go below zero.
-    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost());
+    // threshold to get the bonus we want to apply, but don't go below zero.
+    Cost -= std::max(0, CA.getThreshold() - CA.getCost());
   }
 
   return Base::visitCallSite(CS);
@@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {
   return false;
 }
 
+bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a cleanupret instruction.
+  return false;
+}
+
+bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) {
+  // FIXME: It's not clear that a single instruction is an accurate model for
+  // the inline cost of a catchret instruction.
+  return false;
+}
+
 bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {
   // FIXME: It might be reasonably to discount the cost of instructions leading
   // to unreachable as they have the lowest possible impact on both runtime and
@@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
       continue;
 
     // Skip ephemeral values.
-    if (EphValues.count(I))
+    if (EphValues.count(&*I))
       continue;
 
     ++NumInstructions;
     if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
       ++NumVectorInstructions;
 
-    // If the instruction is floating point, and the target says this operation is
-    // expensive or the function has the "use-soft-float" attribute, this may
-    // eventually become a library call.  Treat the cost as such.
+    // If the instruction is floating point, and the target says this operation
+    // is expensive or the function has the "use-soft-float" attribute, this may
+    // eventually become a library call. Treat the cost as such.
     if (I->getType()->isFloatingPointTy()) {
       bool hasSoftFloatAttr = false;
 
-      // If the function has the "use-soft-float" attribute, mark it as expensive.
+      // If the function has the "use-soft-float" attribute, mark it as
+      // expensive.
       if (F.hasFnAttribute("use-soft-float")) {
         Attribute Attr = F.getFnAttribute("use-soft-float");
         StringRef Val = Attr.getValueAsString();
@@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
     // all of the per-instruction logic. The visit tree returns true if we
     // consumed the instruction in any way, and false if the instruction's base
     // cost should count against inlining.
-    if (Base::visit(I))
+    if (Base::visit(&*I))
       ++NumInstructionsSimplified;
     else
       Cost += InlineConstants::InstrCost;
@@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
        FAI != FAE; ++FAI, ++CAI) {
     assert(CAI != CS.arg_end());
     if (Constant *C = dyn_cast<Constant>(CAI))
-      SimplifiedValues[FAI] = C;
+      SimplifiedValues[&*FAI] = C;
 
     Value *PtrArg = *CAI;
     if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) {
-      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue());
+      ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());
 
       // We can SROA any pointer arguments derived from alloca instructions.
       if (isa<AllocaInst>(PtrArg)) {
-        SROAArgValues[FAI] = PtrArg;
+        SROAArgValues[&*FAI] = PtrArg;
         SROAArgCosts[PtrArg] = 0;
       }
     }
@@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   else if (NumVectorInstructions <= NumInstructions / 2)
     Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus);
 
-  return Cost < Threshold;
+  return Cost <= std::max(0, Threshold);
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() {
 }
 #endif
 
-INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
-                      true, true)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
-                    true, true)
-
-char InlineCostAnalysis::ID = 0;
-
-InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {}
-
-InlineCostAnalysis::~InlineCostAnalysis() {}
-
-void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequired<TargetTransformInfoWrapperPass>();
-  CallGraphSCCPass::getAnalysisUsage(AU);
-}
-
-bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
-  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
-  ACT = &getAnalysis<AssumptionCacheTracker>();
-  return false;
-}
-
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) {
-  return getInlineCost(CS, CS.getCalledFunction(), Threshold);
-}
-
 /// \brief Test that two functions either have or have not the given attribute
 ///        at the same time.
 template<typename AttrKind>
@@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {
 static bool functionsHaveCompatibleAttributes(Function *Caller,
                                               Function *Callee,
                                               TargetTransformInfo &TTI) {
-  return TTI.hasCompatibleFunctionAttributes(Caller, Callee) &&
-         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) &&
-         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) &&
-         attributeMatches(Caller, Callee, Attribute::SanitizeThread);
+  return TTI.areInlineCompatible(Caller, Callee) &&
+         AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
-InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
-                                             int Threshold) {
+InlineCost llvm::getInlineCost(CallSite CS, int Threshold,
+                               TargetTransformInfo &CalleeTTI,
+                               AssumptionCacheTracker *ACT) {
+  return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);
+}
+
+InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold,
+                               TargetTransformInfo &CalleeTTI,
+                               AssumptionCacheTracker *ACT) {
   // Cannot inline indirect calls.
   if (!Callee)
     return llvm::InlineCost::getNever();
@@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
 
   // Never inline functions with conflicting attributes (unless callee has
   // always-inline attribute).
-  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee,
-                                         TTIWP->getTTI(*Callee)))
+  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))
     return llvm::InlineCost::getNever();
 
   // Don't inline this call if the caller has the optnone attribute.
@@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
         << "...\n");
 
-  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS);
+  CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
@@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
   return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());
 }
 
-bool InlineCostAnalysis::isInlineViable(Function &F) {
+bool llvm::isInlineViable(Function &F) {
   bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     // Disallow inlining of functions which contain indirect branches or
@@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {
     if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())
       return false;
 
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-         ++II) {
-      CallSite CS(II);
+    for (auto &II : *BI) {
+      CallSite CS(&II);
       if (!CS)
         continue;
 
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index a7f8f5c8c99b..b89ff268d11e 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
     return DT->dominates(I, P);
   }
 
-  // Otherwise, if the instruction is in the entry block, and is not an invoke,
+  // Otherwise, if the instruction is in the entry block and is not an invoke,
   // then it obviously dominates all phi nodes.
   if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
       !isa<InvokeInst>(I))
@@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL,
 
     // Is the set of underlying objects all noalias calls?
     auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(),
-                         [](Value *V){ return isNoAliasCall(V); });
+      return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
     };
 
     // Is the set of underlying objects all things which must be disjoint from
@@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       // X >=u 1 -> X
       if (match(RHS, m_One()))
         return LHS;
+      if (isImpliedCondition(RHS, LHS, Q.DL))
+        return getTrue(ITy);
+      break;
+    case ICmpInst::ICMP_SGE:
+      /// For signed comparison, the values for an i1 are 0 and -1 
+      /// respectively. This maps into a truth table of:
+      /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
+      ///  0  |  0  |  1 (0 >= 0)   |  1
+      ///  0  |  1  |  1 (0 >= -1)  |  1
+      ///  1  |  0  |  0 (-1 >= 0)  |  0
+      ///  1  |  1  |  1 (-1 >= -1) |  1
+      if (isImpliedCondition(LHS, RHS, Q.DL))
+        return getTrue(ITy);
       break;
     case ICmpInst::ICMP_SLT:
       // X <s 0 -> X
@@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (match(RHS, m_One()))
         return LHS;
       break;
+    case ICmpInst::ICMP_ULE:
+      if (isImpliedCondition(LHS, RHS, Q.DL))
+        return getTrue(ITy);
+      break;
     }
   }
 
@@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
       // 'and x, CI2' produces [0, CI2].
       Upper = CI2->getValue() + 1;
+    } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
+      // 'add nuw x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
     }
-    if (Lower != Upper) {
-      ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+
+    ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
+                                          : ConstantRange(Width, true);
+
+    if (auto *I = dyn_cast<Instruction>(LHS))
+      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+        LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+
+    if (!LHS_CR.isFullSet()) {
       if (RHS_CR.contains(LHS_CR))
         return ConstantInt::getTrue(RHS->getContext());
       if (RHS_CR.inverse().contains(LHS_CR))
@@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // If both operands have range metadata, use the metadata
+  // to simplify the comparison.
+  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+    auto RHS_Instr = dyn_cast<Instruction>(RHS);
+    auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+      auto RHS_CR = getConstantRangeFromMetadata(
+          *RHS_Instr->getMetadata(LLVMContext::MD_range));
+      auto LHS_CR = getConstantRangeFromMetadata(
+          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+      if (Satisfied_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+
+      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+                CmpInst::getInversePredicate(Pred), RHS_CR);
+      if (InversedSatisfied_CR.contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
   // Compare of cast, for example (zext X) != 0 -> X != 0
   if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
     Instruction *LI = cast<CastInst>(LHS);
@@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     }
   }
 
+  // icmp eq|ne X, Y -> false|true if X != Y
+  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+    LLVMContext &Ctx = LHS->getType()->getContext();
+    return Pred == ICmpInst::ICMP_NE ?
+      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+  }
+  
   // Special logic for binary operators.
   BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
@@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                               const DataLayout &DL,
                               const TargetLibraryInfo *TLI,
                               const DominatorTree *DT, AssumptionCache *AC,
-                              Instruction *CxtI) {
+                              const Instruction *CxtI) {
   return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),
                             RecursionLimit);
 }
@@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
     break;
   }
 
+  // In general, it is possible for computeKnownBits to determine all bits in a
+  // value even when the operands are not all constants.
+  if (!Result && I->getType()->isIntegerTy()) {
+    unsigned BitWidth = I->getType()->getScalarSizeInBits();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
+    if ((KnownZero | KnownOne).isAllOnesValue())
+      Result = ConstantInt::get(I->getContext(), KnownOne);
+  }
+
   /// If called on unreachable code, the above logic may report that the
   /// instruction simplified to itself.  Make life easier for users by
   /// detecting that case here, returning a safe value instead.
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index 3039ddea4ff5..bddf1a3ac201 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -15,9 +15,6 @@
 ;
 ;===------------------------------------------------------------------------===;
 
-[common]
-subdirectories = IPA
-
 [component_0]
 type = Library
 name = Analysis
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index c8d0410c1e0f..0f0f31e62ac7 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
   assert(CalleeC.isDescendantOf(*this) &&
          "Callee must be a descendant of the Caller.");
 
-  // The only change required is to add this SCC to the parent set of the callee.
+  // The only change required is to add this SCC to the parent set of the
+  // callee.
   CalleeC.ParentSCCs.insert(this);
 }
 
@@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS(
 }
 
 SmallVector<LazyCallGraph::SCC *, 1>
-LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
-                                       Node &CalleeN) {
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) {
   // First remove it from the node.
   CallerN.removeEdgeInternal(CalleeN.getFunction());
 
@@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
   // the leaf SCC list.
   if (!IsLeafSCC && !ResultSCCs.empty())
     G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
-                     G->LeafSCCs.end());
+                      G->LeafSCCs.end());
 
   // Return the new list of SCCs.
   return ResultSCCs;
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index a6ae7f2229c5..0d1d34e0cb4f 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
@@ -64,10 +65,10 @@ class LVILatticeVal {
   enum LatticeValueTy {
     /// This Value has no known value yet.
     undefined,
-    
+
     /// This Value has a specific constant value.
     constant,
-    
+
     /// This Value is known to not have the specified value.
     notconstant,
 
@@ -77,13 +78,13 @@ class LVILatticeVal {
     /// This value is not known to be constant, and we know that it has a value.
     overdefined
   };
-  
+
   /// Val: This stores the current lattice value along with the Constant* for
   /// the constant if this is a 'constant' or 'notconstant' value.
   LatticeValueTy Tag;
   Constant *Val;
   ConstantRange Range;
-  
+
 public:
   LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
 
@@ -104,29 +105,34 @@ public:
     Res.markConstantRange(CR);
     return Res;
   }
+  static LVILatticeVal getOverdefined() {
+    LVILatticeVal Res;
+    Res.markOverdefined();
+    return Res;
+  }
   
   bool isUndefined() const     { return Tag == undefined; }
   bool isConstant() const      { return Tag == constant; }
   bool isNotConstant() const   { return Tag == notconstant; }
   bool isConstantRange() const { return Tag == constantrange; }
   bool isOverdefined() const   { return Tag == overdefined; }
-  
+
   Constant *getConstant() const {
     assert(isConstant() && "Cannot get the constant of a non-constant!");
     return Val;
   }
-  
+
   Constant *getNotConstant() const {
     assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
     return Val;
   }
-  
+
   ConstantRange getConstantRange() const {
     assert(isConstantRange() &&
            "Cannot get the constant-range of a non-constant-range!");
     return Range;
   }
-  
+
   /// Return true if this is a change in status.
   bool markOverdefined() {
     if (isOverdefined())
@@ -150,7 +156,7 @@ public:
     Val = V;
     return true;
   }
-  
+
   /// Return true if this is a change in status.
   bool markNotConstant(Constant *V) {
     assert(V && "Marking constant with NULL");
@@ -168,27 +174,27 @@ public:
     Val = V;
     return true;
   }
-  
+
   /// Return true if this is a change in status.
   bool markConstantRange(const ConstantRange NewR) {
     if (isConstantRange()) {
       if (NewR.isEmptySet())
         return markOverdefined();
-      
+
       bool changed = Range != NewR;
       Range = NewR;
       return changed;
     }
-    
+
     assert(isUndefined());
     if (NewR.isEmptySet())
       return markOverdefined();
-    
+
     Tag = constantrange;
     Range = NewR;
     return true;
   }
-  
+
   /// Merge the specified lattice value into this one, updating this
   /// one and returning true if anything changed.
   bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
@@ -267,7 +273,7 @@ public:
     return markConstantRange(NewR);
   }
 };
-  
+
 } // end anonymous namespace.
 
 namespace llvm {
@@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
 namespace {
   /// A callback value handle updates the cache when values are erased.
   class LazyValueInfoCache;
-  struct LVIValueHandle : public CallbackVH {
+  struct LVIValueHandle final : public CallbackVH {
     LazyValueInfoCache *Parent;
-      
+
     LVIValueHandle(Value *V, LazyValueInfoCache *P)
       : CallbackVH(V), Parent(P) { }
 
@@ -308,24 +314,27 @@ namespace {
   };
 }
 
-namespace { 
+namespace {
   /// This is the cache kept by LazyValueInfo which
   /// maintains information about queries across the clients' queries.
   class LazyValueInfoCache {
     /// This is all of the cached block information for exactly one Value*.
     /// The entries are sorted by the BasicBlock* of the
     /// entries, allowing us to do a lookup with a binary search.
-    typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+    /// Over-defined lattice values are recorded in OverDefinedCache to reduce
+    /// memory overhead.
+    typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
+        ValueCacheEntryTy;
 
     /// This is all of the cached information for all values,
     /// mapped from Value* to key information.
     std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
-    
+
     /// This tracks, on a per-block basis, the set of values that are
-    /// over-defined at the end of that block.  This is required
-    /// for cache updating.
-    typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
-    DenseSet<OverDefinedPairTy> OverDefinedCache;
+    /// over-defined at the end of that block.
+    typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>>
+        OverDefinedCacheTy;
+    OverDefinedCacheTy OverDefinedCache;
 
     /// Keep track of all blocks that we have ever seen, so we
     /// don't spend time removing unused blocks from our caches.
@@ -357,9 +366,13 @@ namespace {
 
     void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
       SeenBlocks.insert(BB);
-      lookup(Val)[BB] = Result;
+
+      // Insert over-defined values into their own cache to reduce memory
+      // overhead.
       if (Result.isOverdefined())
-        OverDefinedCache.insert(std::make_pair(BB, Val));
+        OverDefinedCache[BB].insert(Val);
+      else
+        lookup(Val)[BB] = Result;
     }
 
     LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
@@ -382,11 +395,39 @@ namespace {
                                             Instruction *BBI);
 
     void solve();
-    
+
     ValueCacheEntryTy &lookup(Value *V) {
       return ValueCache[LVIValueHandle(V, this)];
     }
 
+    bool isOverdefined(Value *V, BasicBlock *BB) const {
+      auto ODI = OverDefinedCache.find(BB);
+
+      if (ODI == OverDefinedCache.end())
+        return false;
+
+      return ODI->second.count(V);
+    }
+
+    bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
+      if (isOverdefined(V, BB))
+        return true;
+
+      LVIValueHandle ValHandle(V, this);
+      auto I = ValueCache.find(ValHandle);
+      if (I == ValueCache.end())
+        return false;
+
+      return I->second.count(BB);
+    }
+
+    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
+      if (isOverdefined(V, BB))
+        return LVILatticeVal::getOverdefined();
+
+      return lookup(V)[BB];
+    }
+    
   public:
     /// This is the query interface to determine the lattice
     /// value for the specified Value* at the end of the specified block.
@@ -402,15 +443,15 @@ namespace {
     /// value for the specified Value* that is true on the specified edge.
     LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
                                  Instruction *CxtI = nullptr);
-    
+
     /// This is the update interface to inform the cache that an edge from
     /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
     void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
-    
+
     /// This is part of the update interface to inform the cache
     /// that a block has been deleted.
     void eraseBlock(BasicBlock *BB);
-    
+
     /// clear - Empty the cache.
     void clear() {
       SeenBlocks.clear();
@@ -425,15 +466,17 @@ namespace {
 } // end anonymous namespace
 
 void LVIValueHandle::deleted() {
-  typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
-  
-  SmallVector<OverDefinedPairTy, 4> ToErase;
-  for (const OverDefinedPairTy &P : Parent->OverDefinedCache)
-    if (P.second == getValPtr())
-      ToErase.push_back(P);
-  for (const OverDefinedPairTy &P : ToErase)
-    Parent->OverDefinedCache.erase(P);
-  
+  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+  for (auto &I : Parent->OverDefinedCache) {
+    SmallPtrSetImpl<Value *> &ValueSet = I.second;
+    if (ValueSet.count(getValPtr()))
+      ValueSet.erase(getValPtr());
+    if (ValueSet.empty())
+      ToErase.push_back(I.first);
+  }
+  for (auto &BB : ToErase)
+    Parent->OverDefinedCache.erase(BB);
+
   // This erasure deallocates *this, so it MUST happen after we're done
   // using any and all members of *this.
   Parent->ValueCache.erase(*this);
@@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
     return;
   SeenBlocks.erase(I);
 
-  SmallVector<OverDefinedPairTy, 4> ToErase;
-  for (const OverDefinedPairTy& P : OverDefinedCache)
-    if (P.first == BB)
-      ToErase.push_back(P);
-  for (const OverDefinedPairTy &P : ToErase)
-    OverDefinedCache.erase(P);
+  auto ODI = OverDefinedCache.find(BB);
+  if (ODI != OverDefinedCache.end())
+    OverDefinedCache.erase(ODI);
 
-  for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
-       I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+  for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
     I->second.erase(BB);
 }
 
@@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() {
     if (solveBlockValue(e.second, e.first)) {
       // The work item was completely processed.
       assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
-      assert(lookup(e.second).count(e.first) && "Result should be in cache!");
+      assert(hasCachedValueInfo(e.second, e.first) &&
+             "Result should be in cache!");
 
       BlockValueStack.pop();
       BlockValueSet.erase(e);
@@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
   if (isa<Constant>(Val))
     return true;
 
-  LVIValueHandle ValHandle(Val, this);
-  std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I =
-    ValueCache.find(ValHandle);
-  if (I == ValueCache.end()) return false;
-  return I->second.count(BB);
+  return hasCachedValueInfo(Val, BB);
 }
 
 LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
@@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
     return LVILatticeVal::get(VC);
 
   SeenBlocks.insert(BB);
-  return lookup(Val)[BB];
+  return getCachedValueInfo(Val, BB);
+}
+
+static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
+  switch (BBI->getOpcode()) {
+  default: break;
+  case Instruction::Load:
+  case Instruction::Call:
+  case Instruction::Invoke:
+    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) 
+      if (isa<IntegerType>(BBI->getType())) {
+        ConstantRange Result = getConstantRangeFromMetadata(*Ranges);
+        return LVILatticeVal::getRange(Result);
+      }
+    break;
+  };
+  // Nothing known - Note that we do not want overdefined here.  We may know
+  // something else about the value and not having range metadata shouldn't
+  // cause us to throw away those facts.
+  return LVILatticeVal();
 }
 
 bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   if (isa<Constant>(Val))
     return true;
 
-  if (lookup(Val).count(BB)) {
+  if (hasCachedValueInfo(Val, BB)) {
     // If we have a cached value, use that.
     DEBUG(dbgs() << "  reuse BB '" << BB->getName()
-                 << "' val=" << lookup(Val)[BB] << '\n');
+                 << "' val=" << getCachedValueInfo(Val, BB) << '\n');
 
     // Since we're reusing a cached value, we don't need to update the
     // OverDefinedCache. The cache will have been properly updated whenever the
@@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   // Hold off inserting this value into the Cache in case we have to return
   // false and come back later.
   LVILatticeVal Res;
-  
+
   Instruction *BBI = dyn_cast<Instruction>(Val);
   if (!BBI || BBI->getParent() != BB) {
     if (!solveBlockValueNonLocal(Res, Val, BB))
@@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
     return true;
   }
 
-  if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
-    Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+  // If this value is a nonnull pointer, record it's range and bailout.
+  PointerType *PT = dyn_cast<PointerType>(BBI->getType());
+  if (PT && isKnownNonNull(BBI)) {
+    Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
     insertResult(Val, BB, Res);
     return true;
   }
 
+  // If this is an instruction which supports range metadata, return the
+  // implied range.  TODO: This should be an intersection, not a union.
+  Res.mergeIn(getFromRangeMetadata(BBI), DL);
+
   // We can only analyze the definitions of certain classes of instructions
   // (integral binops and casts at the moment), so bail if this isn't one.
   LVILatticeVal Result;
@@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
         PointerType *PTy = cast<PointerType>(Val->getType());
         Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
       }
-      
+
       BBLV = Result;
       return true;
     }
@@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   BBLV = Result;
   return true;
 }
-  
+
 bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
                                                 PHINode *PN, BasicBlock *BB) {
   LVILatticeVal Result;  // Start Undefined.
@@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
     if (Result.isOverdefined()) {
       DEBUG(dbgs() << " compute BB '" << BB->getName()
             << "' - overdefined because of pred.\n");
-      
+
       BBLV = Result;
       return true;
     }
@@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
     BBLV.markOverdefined();
     return true;
   }
-  
+
   ConstantRange LHSRange = LHSVal.getConstantRange();
   ConstantRange RHSRange(1);
   IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
@@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
   case Instruction::Or:
     Result.markConstantRange(LHSRange.binaryOr(RHSRange));
     break;
-  
+
   // Unhandled instructions are overdefined.
   default:
     DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
     Result.markOverdefined();
     break;
   }
-  
+
   BBLV = Result;
   return true;
 }
@@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
 /// Val is not constrained on the edge.
 static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
                               BasicBlock *BBTo, LVILatticeVal &Result) {
-  // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we
+  // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
   // know that v != 0.
   if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
     // If this is a conditional branch and only one successor goes to BBTo, then
@@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
       bool isTrueDest = BI->getSuccessor(0) == BBTo;
       assert(BI->getSuccessor(!isTrueDest) == BBTo &&
              "BBTo isn't a successor of BBFrom");
-      
+
       // If V is the condition of the branch itself, then we know exactly what
       // it is.
       if (BI->getCondition() == Val) {
@@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
                               Type::getInt1Ty(Val->getContext()), isTrueDest));
         return true;
       }
-      
+
       // If the condition of the branch is an equality comparison, we may be
       // able to infer the value.
       if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
@@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
                                                   Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
         << BB->getName() << "'\n");
-  
+
   assert(BlockValueStack.empty() && BlockValueSet.empty());
   pushBlockValue(std::make_pair(BB, V));
 
@@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
         << CxtI->getName() << "'\n");
 
   LVILatticeVal Result;
+  if (auto *I = dyn_cast<Instruction>(V))
+    Result = getFromRangeMetadata(I);
   mergeAssumeBlockValueConstantRange(V, Result, CxtI);
 
   DEBUG(dbgs() << "  Result = " << Result << "\n");
@@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
                Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
         << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
-  
+
   LVILatticeVal Result;
   if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {
     solve();
@@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
 
 void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                     BasicBlock *NewSucc) {
-  // When an edge in the graph has been threaded, values that we could not 
-  // determine a value for before (i.e. were marked overdefined) may be possible
-  // to solve now.  We do NOT try to proactively update these values.  Instead,
-  // we clear their entries from the cache, and allow lazy updating to recompute
-  // them when needed.
-  
+  // When an edge in the graph has been threaded, values that we could not
+  // determine a value for before (i.e. were marked overdefined) may be
+  // possible to solve now. We do NOT try to proactively update these values.
+  // Instead, we clear their entries from the cache, and allow lazy updating to
+  // recompute them when needed.
+
   // The updating process is fairly simple: we need to drop cached info
   // for all values that were marked overdefined in OldSucc, and for those same
   // values in any successor of OldSucc (except NewSucc) in which they were
   // also marked overdefined.
   std::vector<BasicBlock*> worklist;
   worklist.push_back(OldSucc);
-  
-  DenseSet<Value*> ClearSet;
-  for (OverDefinedPairTy &P : OverDefinedCache)
-    if (P.first == OldSucc)
-      ClearSet.insert(P.second);
-  
+
+  auto I = OverDefinedCache.find(OldSucc);
+  if (I == OverDefinedCache.end())
+    return; // Nothing to process here.
+  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
   // Use a worklist to perform a depth-first search of OldSucc's successors.
   // NOTE: We do not need a visited list since any blocks we have already
   // visited will have had their overdefined markers cleared already, and we
@@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
   while (!worklist.empty()) {
     BasicBlock *ToUpdate = worklist.back();
     worklist.pop_back();
-    
+
     // Skip blocks only accessible through NewSucc.
     if (ToUpdate == NewSucc) continue;
-    
+
     bool changed = false;
-    for (Value *V : ClearSet) {
+    for (Value *V : ValsToClear) {
       // If a value was marked overdefined in OldSucc, and is here too...
-      DenseSet<OverDefinedPairTy>::iterator OI =
-        OverDefinedCache.find(std::make_pair(ToUpdate, V));
-      if (OI == OverDefinedCache.end()) continue;
+      auto OI = OverDefinedCache.find(ToUpdate);
+      if (OI == OverDefinedCache.end())
+        continue;
+      SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+      if (!ValueSet.count(V))
+        continue;
 
-      // Remove it from the caches.
-      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)];
-      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+      ValueSet.erase(V);
+      if (ValueSet.empty())
+        OverDefinedCache.erase(OI);
 
-      assert(CI != Entry.end() && "Couldn't find entry to update?");
-      Entry.erase(CI);
-      OverDefinedCache.erase(OI);
-
-      // If we removed anything, then we potentially need to update 
+      // If we removed anything, then we potentially need to update
       // blocks successors too.
       changed = true;
     }
 
     if (!changed) continue;
-    
+
     worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
   }
 }
@@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
 }
 
 /// Determine whether the specified value is known to be a
-/// constant on the specified edge.  Return null if not.
+/// constant on the specified edge. Return null if not.
 Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
                                            BasicBlock *ToBB,
                                            Instruction *CxtI) {
@@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
       return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
     return LazyValueInfo::Unknown;
   }
-  
+
   if (Result.isConstantRange()) {
     ConstantInt *CI = dyn_cast<ConstantInt>(C);
     if (!CI) return LazyValueInfo::Unknown;
-    
+
     ConstantRange CR = Result.getConstantRange();
     if (Pred == ICmpInst::ICMP_EQ) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::False;
-      
+
       if (CR.isSingleElement() && CR.contains(CI->getValue()))
         return LazyValueInfo::True;
     } else if (Pred == ICmpInst::ICMP_NE) {
       if (!CR.contains(CI->getValue()))
         return LazyValueInfo::True;
-      
+
       if (CR.isSingleElement() && CR.contains(CI->getValue()))
         return LazyValueInfo::False;
     }
-    
+
     // Handle more complex predicates.
     ConstantRange TrueValues =
         ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
@@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
       return LazyValueInfo::False;
     return LazyValueInfo::Unknown;
   }
-  
+
   if (Result.isNotConstant()) {
     // If this is an equality comparison, we can try to fold it knowing that
     // "V != C1".
@@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
     }
     return LazyValueInfo::Unknown;
   }
-  
+
   return LazyValueInfo::Unknown;
 }
 
@@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
   if (Ret != Unknown)
     return Ret;
 
-  // TODO: Move this logic inside getValueAt so that it can be cached rather
-  // than re-queried on each call.  This would also allow us to merge the
-  // underlying lattice values to get more information 
+  // Note: The following bit of code is somewhat distinct from the rest of LVI;
+  // LVI as a whole tries to compute a lattice value which is conservatively
+  // correct at a given location.  In this case, we have a predicate which we
+  // weren't able to prove about the merged result, and we're pushing that
+  // predicate back along each incoming edge to see if we can prove it
+  // separately for each input.  As a motivating example, consider:
+  // bb1:
+  //   %v1 = ... ; constantrange<1, 5>
+  //   br label %merge
+  // bb2:
+  //   %v2 = ... ; constantrange<10, 20>
+  //   br label %merge
+  // merge:
+  //   %phi = phi [%v1, %v2] ; constantrange<1,20>
+  //   %pred = icmp eq i32 %phi, 8
+  // We can't tell from the lattice value for '%phi' that '%pred' is false
+  // along each path, but by checking the predicate over each input separately,
+  // we can.
+  // We limit the search to one step backwards from the current BB and value.
+  // We could consider extending this to search further backwards through the
+  // CFG and/or value graph, but there are non-obvious compile time vs quality
+  // tradeoffs.  
   if (CxtI) {
-    // For a comparison where the V is outside this block, it's possible
-    // that we've branched on it before.  Look to see if the value is known
-    // on all incoming edges.
     BasicBlock *BB = CxtI->getParent();
+
+    // Function entry or an unreachable block.  Bail to avoid confusing
+    // analysis below.
     pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-    if (PI != PE &&
-        (!isa<Instruction>(V) ||
-         cast<Instruction>(V)->getParent() != BB)) {
+    if (PI == PE)
+      return Unknown;
+
+    // If V is a PHI node in the same block as the context, we need to ask
+    // questions about the predicate as applied to the incoming value along
+    // each edge. This is useful for eliminating cases where the predicate is
+    // known along all incoming edges.
+    if (auto *PHI = dyn_cast<PHINode>(V))
+      if (PHI->getParent() == BB) {
+        Tristate Baseline = Unknown;
+        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) {
+          Value *Incoming = PHI->getIncomingValue(i);
+          BasicBlock *PredBB = PHI->getIncomingBlock(i);
+          // Note that PredBB may be BB itself.        
+          Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB,
+                                               CxtI);
+          
+          // Keep going as long as we've seen a consistent known result for
+          // all inputs.
+          Baseline = (i == 0) ? Result /* First iteration */
+            : (Baseline == Result ? Baseline : Unknown); /* All others */
+          if (Baseline == Unknown)
+            break;
+        }
+        if (Baseline != Unknown)
+          return Baseline;
+      }    
+
+    // For a comparison where the V is outside this block, it's possible
+    // that we've branched on it before. Look to see if the value is known
+    // on all incoming edges.
+    if (!isa<Instruction>(V) ||
+        cast<Instruction>(V)->getParent() != BB) {
       // For predecessor edge, determine if the comparison is true or false
-      // on that edge.  If they're all true or all false, we can conclude 
+      // on that edge. If they're all true or all false, we can conclude
       // the value of the comparison in this block.
       Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);
       if (Baseline != Unknown) {
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
deleted file mode 100644
index 991a0e3e2752..000000000000
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the LibCallAliasAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallAliasAnalysis.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-  
-// Register this pass...
-char LibCallAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
-                   "LibCall Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
-  return new LibCallAliasAnalysis(LCI);
-}
-
-LibCallAliasAnalysis::~LibCallAliasAnalysis() {
-  delete LCI;
-}
-
-void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AliasAnalysis::getAnalysisUsage(AU);
-  AU.setPreservesAll();                         // Does not transform code
-}
-
-bool LibCallAliasAnalysis::runOnFunction(Function &F) {
-  // set up super class
-  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
-  return false;
-}
-
-/// AnalyzeLibCallDetails - Given a call to a function with the specified
-/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
-/// vs the specified pointer/size.
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
-                                            ImmutableCallSite CS,
-                                            const MemoryLocation &Loc) {
-  // If we have a function, check to see what kind of mod/ref effects it
-  // has.  Start by including any info globally known about the function.
-  AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
-  if (MRInfo == NoModRef) return MRInfo;
-  
-  // If that didn't tell us that the function is 'readnone', check to see
-  // if we have detailed info and if 'P' is any of the locations we know
-  // about.
-  const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
-  if (Details == nullptr)
-    return MRInfo;
-  
-  // If the details array is of the 'DoesNot' kind, we only know something if
-  // the pointer is a match for one of the locations in 'Details'.  If we find a
-  // match, we can prove some interactions cannot happen.
-  // 
-  if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
-    // Find out if the pointer refers to a known location.
-    for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
-      const LibCallLocationInfo &LocInfo =
-      LCI->getLocationInfo(Details[i].LocationID);
-      LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
-      if (Res != LibCallLocationInfo::Yes) continue;
-      
-      // If we find a match against a location that we 'do not' interact with,
-      // learn this info into MRInfo.
-      return ModRefResult(MRInfo & ~Details[i].MRInfo);
-    }
-    return MRInfo;
-  }
-  
-  // If the details are of the 'DoesOnly' sort, we know something if the pointer
-  // is a match for one of the locations in 'Details'.  Also, if we can prove
-  // that the pointers is *not* one of the locations in 'Details', we know that
-  // the call is NoModRef.
-  assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
-  
-  // Find out if the pointer refers to a known location.
-  bool NoneMatch = true;
-  for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
-    const LibCallLocationInfo &LocInfo =
-    LCI->getLocationInfo(Details[i].LocationID);
-    LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
-    if (Res == LibCallLocationInfo::No) continue;
-    
-    // If we don't know if this pointer points to the location, then we have to
-    // assume it might alias in some case.
-    if (Res == LibCallLocationInfo::Unknown) {
-      NoneMatch = false;
-      continue;
-    }
-    
-    // If we know that this pointer definitely is pointing into the location,
-    // merge in this information.
-    return ModRefResult(MRInfo & Details[i].MRInfo);
-  }
-  
-  // If we found that the pointer is guaranteed to not match any of the
-  // locations in our 'DoesOnly' rule, then we know that the pointer must point
-  // to some other location.  Since the libcall doesn't mod/ref any other
-  // locations, return NoModRef.
-  if (NoneMatch)
-    return NoModRef;
-  
-  // Otherwise, return any other info gained so far.
-  return MRInfo;
-}
-
-// getModRefInfo - Check to see if the specified callsite can clobber the
-// specified memory object.
-//
-AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                    const MemoryLocation &Loc) {
-  ModRefResult MRInfo = ModRef;
-  
-  // If this is a direct call to a function that LCI knows about, get the
-  // information about the runtime function.
-  if (LCI) {
-    if (const Function *F = CS.getCalledFunction()) {
-      if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
-        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
-        if (MRInfo == NoModRef) return NoModRef;
-      }
-    }
-  }
-  
-  // The AliasAnalysis base class has some smarts, lets use them.
-  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
-}
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
deleted file mode 100644
index 003c81e87b60..000000000000
--- a/lib/Analysis/LibCallSemantics.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements interfaces that can be used to describe language
-// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
-// optimizers.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/Function.h"
-using namespace llvm;
-
-/// This impl pointer in ~LibCallInfo is actually a StringMap.  This
-/// helper does the cast.
-static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
-  return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
-}
-
-LibCallInfo::~LibCallInfo() {
-  delete getMap(Impl);
-}
-
-const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
-  // Get location info on the first call.
-  if (NumLocations == 0)
-    NumLocations = getLocationInfo(Locations);
-  
-  assert(LocID < NumLocations && "Invalid location ID!");
-  return Locations[LocID];
-}
-
-
-/// Return the LibCallFunctionInfo object corresponding to
-/// the specified function if we have it.  If not, return null.
-const LibCallFunctionInfo *
-LibCallInfo::getFunctionInfo(const Function *F) const {
-  StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
-  
-  /// If this is the first time we are querying for this info, lazily construct
-  /// the StringMap to index it.
-  if (!Map) {
-    Impl = Map = new StringMap<const LibCallFunctionInfo*>();
-    
-    const LibCallFunctionInfo *Array = getFunctionInfoArray();
-    if (!Array) return nullptr;
-    
-    // We now have the array of entries.  Populate the StringMap.
-    for (unsigned i = 0; Array[i].Name; ++i)
-      (*Map)[Array[i].Name] = Array+i;
-  }
-  
-  // Look up this function in the string map.
-  return Map->lookup(F->getName());
-}
-
-/// See if the given exception handling personality function is one that we
-/// understand.  If so, return a description of it; otherwise return Unknown.
-EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
-  const Function *F = dyn_cast<Function>(Pers->stripPointerCasts());
-  if (!F)
-    return EHPersonality::Unknown;
-  return StringSwitch<EHPersonality>(F->getName())
-    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada)
-    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX)
-    .Case("__gcc_personality_v0",  EHPersonality::GNU_C)
-    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC)
-    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH)
-    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH)
-    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH)
-    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX)
-    .Default(EHPersonality::Unknown);
-}
-
-bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
-  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
-  // We can't simplify any invokes to nounwind functions if the personality
-  // function wants to catch asynch exceptions.  The nounwind attribute only
-  // implies that the function does not throw synchronous exceptions.
-  return !isAsynchronousEHPersonality(Personality);
-}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 0b9308a573a0..2dfb09c95ad6 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -49,6 +49,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -98,12 +99,13 @@ namespace {
     void visitInsertElementInst(InsertElementInst &I);
     void visitUnreachableInst(UnreachableInst &I);
 
-    Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const;
-    Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+    Value *findValue(Value *V, bool OffsetOk) const;
+    Value *findValueImpl(Value *V, bool OffsetOk,
                          SmallPtrSetImpl<Value *> &Visited) const;
 
   public:
     Module *Mod;
+    const DataLayout *DL;
     AliasAnalysis *AA;
     AssumptionCache *AC;
     DominatorTree *DT;
@@ -121,7 +123,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesAll();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
@@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
                     false, true)
 
@@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
 //
 bool Lint::runOnFunction(Function &F) {
   Mod = F.getParent();
-  AA = &getAnalysis<AliasAnalysis>();
+  DL = &F.getParent()->getDataLayout();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) {
 void Lint::visitCallSite(CallSite CS) {
   Instruction &I = *CS.getInstruction();
   Value *Callee = CS.getCalledValue();
-  const DataLayout &DL = CS->getModule()->getDataLayout();
 
   visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,
                        MemRef::Callee);
 
-  if (Function *F = dyn_cast<Function>(findValue(Callee, DL,
+  if (Function *F = dyn_cast<Function>(findValue(Callee,
                                                  /*OffsetOk=*/false))) {
     Assert(CS.getCallingConv() == F->getCallingConv(),
            "Undefined behavior: Caller and callee calling convention differ",
@@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) {
     for (; AI != AE; ++AI) {
       Value *Actual = *AI;
       if (PI != PE) {
-        Argument *Formal = PI++;
+        Argument *Formal = &*PI++;
         Assert(Formal->getType() == Actual->getType(),
                "Undefined behavior: Call argument type mismatches "
                "callee parameter type",
@@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {
         if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
           Type *Ty =
             cast<PointerType>(Formal->getType())->getElementType();
-          visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
-                               DL.getABITypeAlignment(Ty), Ty,
+          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty),
+                               DL->getABITypeAlignment(Ty), Ty,
                                MemRef::Read | MemRef::Write);
         }
       }
@@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) {
   if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
     for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
          AI != AE; ++AI) {
-      Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true);
+      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
       Assert(!isa<AllocaInst>(Obj),
              "Undefined behavior: Call with \"tail\" keyword references "
              "alloca",
@@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) {
       // overlap is not distinguished from the case where nothing is known.
       uint64_t Size = 0;
       if (const ConstantInt *Len =
-              dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL,
+              dyn_cast<ConstantInt>(findValue(MCI->getLength(),
                                               /*OffsetOk=*/false)))
         if (Len->getValue().isIntN(32))
           Size = Len->getValue().getZExtValue();
@@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) {
       visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,
                            nullptr, MemRef::Read | MemRef::Write);
       break;
-
-    case Intrinsic::eh_begincatch:
-      visitEHBeginCatch(II);
-      break;
-    case Intrinsic::eh_endcatch:
-      visitEHEndCatch(II);
-      break;
     }
 }
 
@@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) {
          "Unusual: Return statement in function with noreturn attribute", &I);
 
   if (Value *V = I.getReturnValue()) {
-    Value *Obj =
-        findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true);
+    Value *Obj = findValue(V, /*OffsetOk=*/true);
     Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
   }
 }
@@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I,
   if (Size == 0)
     return;
 
-  Value *UnderlyingObject =
-      findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true);
+  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
   Assert(!isa<ConstantPointerNull>(UnderlyingObject),
          "Undefined behavior: Null pointer dereference", &I);
   Assert(!isa<UndefValue>(UnderlyingObject),
@@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,
   // Check for buffer overflows and misalignment.
   // Only handles memory references that read/write something simple like an
   // alloca instruction or a global variable.
-  auto &DL = I.getModule()->getDataLayout();
   int64_t Offset = 0;
-  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) {
+  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {
     // OK, so the access is to a constant offset from Ptr.  Check that Ptr is
     // something we can handle and if so extract the size of this base object
     // along with its alignment.
@@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I,
     if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
       Type *ATy = AI->getAllocatedType();
       if (!AI->isArrayAllocation() && ATy->isSized())
-        BaseSize = DL.getTypeAllocSize(ATy);
+        BaseSize = DL->getTypeAllocSize(ATy);
       BaseAlign = AI->getAlignment();
       if (BaseAlign == 0 && ATy->isSized())
-        BaseAlign = DL.getABITypeAlignment(ATy);
+        BaseAlign = DL->getABITypeAlignment(ATy);
     } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
       // If the global may be defined differently in another compilation unit
       // then don't warn about funky memory accesses.
       if (GV->hasDefinitiveInitializer()) {
         Type *GTy = GV->getType()->getElementType();
         if (GTy->isSized())
-          BaseSize = DL.getTypeAllocSize(GTy);
+          BaseSize = DL->getTypeAllocSize(GTy);
         BaseAlign = GV->getAlignment();
         if (BaseAlign == 0 && GTy->isSized())
-          BaseAlign = DL.getABITypeAlignment(GTy);
+          BaseAlign = DL->getABITypeAlignment(GTy);
       }
     }
 
@@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I,
     // Accesses that say that the memory is more aligned than it is are not
     // defined.
     if (Align == 0 && Ty && Ty->isSized())
-      Align = DL.getABITypeAlignment(Ty);
+      Align = DL->getABITypeAlignment(Ty);
     Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),
            "Undefined behavior: Memory reference address is misaligned", &I);
   }
@@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I,
 
 void Lint::visitLoadInst(LoadInst &I) {
   visitMemoryReference(I, I.getPointerOperand(),
-                       AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+                       DL->getTypeStoreSize(I.getType()), I.getAlignment(),
                        I.getType(), MemRef::Read);
 }
 
 void Lint::visitStoreInst(StoreInst &I) {
   visitMemoryReference(I, I.getPointerOperand(),
-                       AA->getTypeStoreSize(I.getOperand(0)->getType()),
+                       DL->getTypeStoreSize(I.getOperand(0)->getType()),
                        I.getAlignment(),
                        I.getOperand(0)->getType(), MemRef::Write);
 }
@@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) {
 }
 
 void Lint::visitLShr(BinaryOperator &I) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(
-          findValue(I.getOperand(1), I.getModule()->getDataLayout(),
-                    /*OffsetOk=*/false)))
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
+                                                        /*OffsetOk=*/false)))
     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
            "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitAShr(BinaryOperator &I) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
-          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+  if (ConstantInt *CI =
+          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
            "Undefined result: Shift count out of range", &I);
 }
 
 void Lint::visitShl(BinaryOperator &I) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(
-          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false)))
+  if (ConstantInt *CI =
+          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
     Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
            "Undefined result: Shift count out of range", &I);
 }
 
-static bool
-allPredsCameFromLandingPad(BasicBlock *BB,
-                           SmallSet<BasicBlock *, 4> &VisitedBlocks) {
-  VisitedBlocks.insert(BB);
-  if (BB->isLandingPad())
-    return true;
-  // If we find a block with no predecessors, the search failed.
-  if (pred_empty(BB))
-    return false;
-  for (BasicBlock *Pred : predecessors(BB)) {
-    if (VisitedBlocks.count(Pred))
-      continue;
-    if (!allPredsCameFromLandingPad(Pred, VisitedBlocks))
-      return false;
-  }
-  return true;
-}
-
-static bool
-allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin,
-                           IntrinsicInst **SecondBeginCatch,
-                           SmallSet<BasicBlock *, 4> &VisitedBlocks) {
-  VisitedBlocks.insert(BB);
-  for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) {
-    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I);
-    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch)
-      return true;
-    // If we find another begincatch while looking for an endcatch,
-    // that's also an error.
-    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) {
-      *SecondBeginCatch = IC;
-      return false;
-    }
-  }
-
-  // If we reach a block with no successors while searching, the
-  // search has failed.
-  if (succ_empty(BB))
-    return false;
-  // Otherwise, search all of the successors.
-  for (BasicBlock *Succ : successors(BB)) {
-    if (VisitedBlocks.count(Succ))
-      continue;
-    if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch,
-                                    VisitedBlocks))
-      return false;
-  }
-  return true;
-}
-
-void Lint::visitEHBeginCatch(IntrinsicInst *II) {
-  // The checks in this function make a potentially dubious assumption about
-  // the CFG, namely that any block involved in a catch is only used for the
-  // catch.  This will very likely be true of IR generated by a front end,
-  // but it may cease to be true, for example, if the IR is run through a
-  // pass which combines similar blocks.
-  //
-  // In general, if we encounter a block the isn't dominated by the catch
-  // block while we are searching the catch block's successors for a call
-  // to end catch intrinsic, then it is possible that it will be legal for
-  // a path through this block to never reach a call to llvm.eh.endcatch.
-  // An analogous statement could be made about our search for a landing
-  // pad among the catch block's predecessors.
-  //
-  // What is actually required is that no path is possible at runtime that
-  // reaches a call to llvm.eh.begincatch without having previously visited
-  // a landingpad instruction and that no path is possible at runtime that
-  // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch
-  // (mentally adjusting for the fact that in reality these calls will be
-  // removed before code generation).
-  //
-  // Because this is a lint check, we take a pessimistic approach and warn if
-  // the control flow is potentially incorrect.
-
-  SmallSet<BasicBlock *, 4> VisitedBlocks;
-  BasicBlock *CatchBB = II->getParent();
-
-  // The begin catch must occur in a landing pad block or all paths
-  // to it must have come from a landing pad.
-  Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks),
-         "llvm.eh.begincatch may be reachable without passing a landingpad",
-         II);
-
-  // Reset the visited block list.
-  VisitedBlocks.clear();
-
-  IntrinsicInst *SecondBeginCatch = nullptr;
-
-  // This has to be called before it is asserted.  Otherwise, the first assert
-  // below can never be hit.
-  bool EndCatchFound = allSuccessorsReachEndCatch(
-      CatchBB, std::next(static_cast<BasicBlock::iterator>(II)),
-      &SecondBeginCatch, VisitedBlocks);
-  Assert(
-      SecondBeginCatch == nullptr,
-      "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch",
-      II, SecondBeginCatch);
-  Assert(EndCatchFound,
-         "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch",
-         II);
-}
-
-static bool allPredCameFromBeginCatch(
-    BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin,
-    IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) {
-  VisitedBlocks.insert(BB);
-  // Look for a begincatch in this block.
-  for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE;
-       ++RI) {
-    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI);
-    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch)
-      return true;
-    // If we find another end catch before we find a begin catch, that's
-    // an error.
-    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) {
-      *SecondEndCatch = IC;
-      return false;
-    }
-    // If we encounter a landingpad instruction, the search failed.
-    if (isa<LandingPadInst>(*RI))
-      return false;
-  }
-  // If while searching we find a block with no predeccesors,
-  // the search failed.
-  if (pred_empty(BB))
-    return false;
-  // Search any predecessors we haven't seen before.
-  for (BasicBlock *Pred : predecessors(BB)) {
-    if (VisitedBlocks.count(Pred))
-      continue;
-    if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch,
-                                   VisitedBlocks))
-      return false;
-  }
-  return true;
-}
-
-void Lint::visitEHEndCatch(IntrinsicInst *II) {
-  // The check in this function makes a potentially dubious assumption about
-  // the CFG, namely that any block involved in a catch is only used for the
-  // catch.  This will very likely be true of IR generated by a front end,
-  // but it may cease to be true, for example, if the IR is run through a
-  // pass which combines similar blocks.
-  //
-  // In general, if we encounter a block the isn't post-dominated by the
-  // end catch block while we are searching the end catch block's predecessors
-  // for a call to the begin catch intrinsic, then it is possible that it will
-  // be legal for a path to reach the end catch block without ever having
-  // called llvm.eh.begincatch.
-  //
-  // What is actually required is that no path is possible at runtime that
-  // reaches a call to llvm.eh.endcatch without having previously visited
-  // a call to llvm.eh.begincatch (mentally adjusting for the fact that in
-  // reality these calls will be removed before code generation).
-  //
-  // Because this is a lint check, we take a pessimistic approach and warn if
-  // the control flow is potentially incorrect.
-
-  BasicBlock *EndCatchBB = II->getParent();
-
-  // Alls paths to the end catch call must pass through a begin catch call.
-
-  // If llvm.eh.begincatch wasn't called in the current block, we'll use this
-  // lambda to recursively look for it in predecessors.
-  SmallSet<BasicBlock *, 4> VisitedBlocks;
-  IntrinsicInst *SecondEndCatch = nullptr;
-
-  // This has to be called before it is asserted.  Otherwise, the first assert
-  // below can never be hit.
-  bool BeginCatchFound =
-      allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II),
-                                &SecondEndCatch, VisitedBlocks);
-  Assert(
-      SecondEndCatch == nullptr,
-      "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch",
-      II, SecondEndCatch);
-  Assert(BeginCatchFound,
-         "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch",
-         II);
-}
-
 static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
                    AssumptionCache *AC) {
   // Assume undef could be zero.
@@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {
 }
 
 void Lint::visitExtractElementInst(ExtractElementInst &I) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(
-          findValue(I.getIndexOperand(), I.getModule()->getDataLayout(),
-                    /*OffsetOk=*/false)))
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+                                                        /*OffsetOk=*/false)))
     Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
            "Undefined result: extractelement index out of range", &I);
 }
 
 void Lint::visitInsertElementInst(InsertElementInst &I) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(
-          findValue(I.getOperand(2), I.getModule()->getDataLayout(),
-                    /*OffsetOk=*/false)))
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+                                                        /*OffsetOk=*/false)))
     Assert(CI->getValue().ult(I.getType()->getNumElements()),
            "Undefined result: insertelement index out of range", &I);
 }
 
 void Lint::visitUnreachableInst(UnreachableInst &I) {
   // This isn't undefined behavior, it's merely suspicious.
-  Assert(&I == I.getParent()->begin() ||
-             std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+  Assert(&I == &I.getParent()->front() ||
+             std::prev(I.getIterator())->mayHaveSideEffects(),
          "Unusual: unreachable immediately preceded by instruction without "
          "side effects",
          &I);
@@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
 /// Most analysis passes don't require this logic, because instcombine
 /// will simplify most of these kinds of things away. But it's a goal of
 /// this Lint pass to be useful even on non-optimized IR.
-Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const {
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
   SmallPtrSet<Value *, 4> Visited;
-  return findValueImpl(V, DL, OffsetOk, Visited);
+  return findValueImpl(V, OffsetOk, Visited);
 }
 
 /// findValueImpl - Implementation helper for findValue.
-Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
                            SmallPtrSetImpl<Value *> &Visited) const {
   // Detect self-referential values.
   if (!Visited.insert(V).second)
@@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
   // TODO: Look through eliminable cast pairs.
   // TODO: Look through calls with unique return values.
   // TODO: Look through vector insert/extract/shuffle.
-  V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts();
+  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();
   if (LoadInst *L = dyn_cast<LoadInst>(V)) {
-    BasicBlock::iterator BBI = L;
+    BasicBlock::iterator BBI = L->getIterator();
     BasicBlock *BB = L->getParent();
     SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
     for (;;) {
       if (!VisitedBlocks.insert(BB).second)
         break;
-      if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
-                                              BB, BBI, 6, AA))
-        return findValueImpl(U, DL, OffsetOk, Visited);
+      if (Value *U =
+          FindAvailableLoadedValue(L->getPointerOperand(),
+                                   BB, BBI, DefMaxInstsToScan, AA))
+        return findValueImpl(U, OffsetOk, Visited);
       if (BBI != BB->begin()) break;
       BB = BB->getUniquePredecessor();
       if (!BB) break;
@@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,
   } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
     if (Value *W = PN->hasConstantValue())
       if (W != V)
-        return findValueImpl(W, DL, OffsetOk, Visited);
+        return findValueImpl(W, OffsetOk, Visited);
   } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
-    if (CI->isNoopCast(DL))
-      return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited);
+    if (CI->isNoopCast(*DL))
+      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
                                      Ex->getIndices()))
       if (W != V)
-        return findValueImpl(W, DL, OffsetOk, Visited);
+        return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
     // Same as above, but for ConstantExpr instead of Instruction.
     if (Instruction::isCast(CE->getOpcode())) {
       if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
                                CE->getOperand(0)->getType(), CE->getType(),
-                               DL.getIntPtrType(V->getType())))
-        return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited);
+                               DL->getIntPtrType(V->getType())))
+        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
       ArrayRef<unsigned> Indices = CE->getIndices();
       if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
         if (W != V)
-          return findValueImpl(W, DL, OffsetOk, Visited);
+          return findValueImpl(W, OffsetOk, Visited);
     }
   }
 
   // As a last resort, try SimplifyInstruction or constant folding.
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
-    if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC))
-      return findValueImpl(W, DL, OffsetOk, Visited);
+    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
+      return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI))
+    if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
       if (W != V)
-        return findValueImpl(W, DL, OffsetOk, Visited);
+        return findValueImpl(W, OffsetOk, Visited);
   }
 
   return V;
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 624c5a18d679..4b2fa3c6505a 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
   // from/to.  If so, the previous load or store would have already trapped,
   // so there is no harm doing an extra load (also, CSE will later eliminate
   // the load entirely).
-  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+  BasicBlock::iterator BBI = ScanFrom->getIterator(),
+                       E = ScanFrom->getParent()->begin();
 
   // We can at least always strip pointer casts even though we can't use the
   // base here.
@@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
   return false;
 }
 
+/// DefMaxInstsToScan - the default number of maximum instructions
+/// to scan in the block, used by FindAvailableLoadedValue().
+/// FindAvailableLoadedValue() was introduced in r60148, to improve jump
+/// threading in part by eliminating partially redundant loads.
+/// At that point, the value of MaxInstsToScan was already set to '6'
+/// without documented explanation.
+cl::opt<unsigned>
+llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
+  cl::desc("Use this to specify the default maximum number of instructions "
+           "to scan backward from a given instruction, when searching for "
+           "available loaded value"));
+
 /// \brief Scan the ScanBB block backwards to see if we have the value at the
 /// memory address *Ptr locally available within a small number of instructions.
 ///
@@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
   while (ScanFrom != ScanBB->begin()) {
     // We must ignore debug info directives when counting (otherwise they
     // would affect codegen).
-    Instruction *Inst = --ScanFrom;
+    Instruction *Inst = &*--ScanFrom;
     if (isa<DbgInfoIntrinsic>(Inst))
       continue;
 
@@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
 
       // If we have alias analysis and it says the store won't modify the loaded
       // value, ignore the store.
-      if (AA &&
-          (AA->getModRefInfo(SI, StrippedPtr, AccessSize) &
-           AliasAnalysis::Mod) == 0)
+      if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)
         continue;
 
       // Otherwise the store that may or may not alias the pointer, bail out.
@@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
       // If alias analysis claims that it really won't modify the load,
       // ignore it.
       if (AA &&
-          (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) &
-           AliasAnalysis::Mod) == 0)
+          (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)
         continue;
 
       // May modify the pointer, bail out.
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index becbae4c5b50..d7896ade3543 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(
 /// Maximum SIMD width.
 const unsigned VectorizerParams::MaxVectorWidth = 64;
 
-/// \brief We collect interesting dependences up to this threshold.
-static cl::opt<unsigned> MaxInterestingDependence(
-    "max-interesting-dependences", cl::Hidden,
-    cl::desc("Maximum number of interesting dependences collected by "
-             "loop-access analysis (default = 100)"),
-    cl::init(100));
+/// \brief We collect dependences up to this threshold.
+static cl::opt<unsigned>
+    MaxDependences("max-dependences", cl::Hidden,
+                   cl::desc("Maximum number of dependences collected by "
+                            "loop-access analysis (default = 100)"),
+                   cl::init(100));
 
 bool VectorizerParams::isInterleaveForced() {
   return ::VectorizationInterleave.getNumOccurrences() > 0;
@@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) {
   return V;
 }
 
-const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
+const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
                                             const ValueToValueMap &PtrToStride,
                                             Value *Ptr, Value *OrigPtr) {
-
-  const SCEV *OrigSCEV = SE->getSCEV(Ptr);
+  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
 
   // If there is an entry in the map return the SCEV of the pointer with the
   // symbolic stride replaced by one.
@@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,
     ValueToValueMap RewriteMap;
     RewriteMap[StrideVal] = One;
 
-    const SCEV *ByOne =
-        SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
-    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
+    ScalarEvolution *SE = PSE.getSE();
+    const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
+    const auto *CT =
+        static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
+
+    PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+    auto *Expr = PSE.getSCEV(Ptr);
+
+    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
                  << "\n");
-    return ByOne;
+    return Expr;
   }
 
   // Otherwise, just return the SCEV of the original pointer.
-  return SE->getSCEV(Ptr);
+  return OrigSCEV;
 }
 
 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
                                     unsigned DepSetId, unsigned ASId,
-                                    const ValueToValueMap &Strides) {
+                                    const ValueToValueMap &Strides,
+                                    PredicatedScalarEvolution &PSE) {
   // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
   assert(AR && "Invalid addrec expression");
+  ScalarEvolution *SE = PSE.getSE();
   const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
+
+  const SCEV *ScStart = AR->getStart();
   const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
-  Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
-                        Sc);
+  const SCEV *Step = AR->getStepRecurrence(*SE);
+
+  // For expressions with negative step, the upper bound is ScStart and the
+  // lower bound is ScEnd.
+  if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) {
+    if (CStep->getValue()->isNegative())
+      std::swap(ScStart, ScEnd);
+  } else {
+    // Fallback case: the step is not constant, but the we can still
+    // get the upper and lower bounds of the interval by using min/max
+    // expressions.
+    ScStart = SE->getUMinExpr(ScStart, ScEnd);
+    ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
+  }
+
+  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
 }
 
-bool RuntimePointerChecking::needsChecking(
-    const CheckingPtrGroup &M, const CheckingPtrGroup &N,
-    const SmallVectorImpl<int> *PtrPartition) const {
+SmallVector<RuntimePointerChecking::PointerCheck, 4>
+RuntimePointerChecking::generateChecks() const {
+  SmallVector<PointerCheck, 4> Checks;
+
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
+      const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
+      const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
+
+      if (needsChecking(CGI, CGJ))
+        Checks.push_back(std::make_pair(&CGI, &CGJ));
+    }
+  }
+  return Checks;
+}
+
+void RuntimePointerChecking::generateChecks(
+    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
+  assert(Checks.empty() && "Checks is not empty");
+  groupChecks(DepCands, UseDependencies);
+  Checks = generateChecks();
+}
+
+bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M,
+                                           const CheckingPtrGroup &N) const {
   for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
     for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
-      if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
+      if (needsChecking(M.Members[I], N.Members[J]))
         return true;
   return false;
 }
@@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks(
 
   CheckingGroups.clear();
 
+  // If we need to check two pointers to the same underlying object
+  // with a non-constant difference, we shouldn't perform any pointer
+  // grouping with those pointers. This is because we can easily get
+  // into cases where the resulting check would return false, even when
+  // the accesses are safe.
+  //
+  // The following example shows this:
+  // for (i = 0; i < 1000; ++i)
+  //   a[5000 + i * m] = a[i] + a[i + 9000]
+  //
+  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
+  // (0, 10000) which is always false. However, if m is 1, there is no
+  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
+  // us to perform an accurate check in this case.
+  //
+  // The above case requires that we have an UnknownDependence between
+  // accesses to the same underlying object. This cannot happen unless
+  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
+  // is also false. In this case we will use the fallback path and create
+  // separate checking groups for all pointers.
+
   // If we don't have the dependency partitions, construct a new
-  // checking pointer group for each pointer.
+  // checking pointer group for each pointer. This is also required
+  // for correctness, because in this case we can have checking between
+  // pointers to the same underlying object.
   if (!UseDependencies) {
     for (unsigned I = 0; I < Pointers.size(); ++I)
       CheckingGroups.push_back(CheckingPtrGroup(I, *this));
@@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks(
   // don't process them twice.
   SmallSet<unsigned, 2> Seen;
 
-  // Go through all equivalence classes, get the the "pointer check groups"
+  // Go through all equivalence classes, get the "pointer check groups"
   // and add them to the overall solution. We use the order in which accesses
   // appear in 'Pointers' to enforce determinism.
   for (unsigned I = 0; I < Pointers.size(); ++I) {
@@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks(
   }
 }
 
-bool RuntimePointerChecking::needsChecking(
-    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
+bool RuntimePointerChecking::arePointersInSamePartition(
+    const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
+    unsigned PtrIdx2) {
+  return (PtrToPartition[PtrIdx1] != -1 &&
+          PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
+}
+
+bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
   const PointerInfo &PointerI = Pointers[I];
   const PointerInfo &PointerJ = Pointers[J];
 
@@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking(
   if (PointerI.AliasSetId != PointerJ.AliasSetId)
     return false;
 
-  // If PtrPartition is set omit checks between pointers of the same partition.
-  // Partition number -1 means that the pointer is used in multiple partitions.
-  // In this case we can't omit the check.
-  if (PtrPartition && (*PtrPartition)[I] != -1 &&
-      (*PtrPartition)[I] == (*PtrPartition)[J])
-    return false;
-
   return true;
 }
 
-void RuntimePointerChecking::print(
-    raw_ostream &OS, unsigned Depth,
-    const SmallVectorImpl<int> *PtrPartition) const {
-
-  OS.indent(Depth) << "Run-time memory checks:\n";
-
+void RuntimePointerChecking::printChecks(
+    raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
+    unsigned Depth) const {
   unsigned N = 0;
-  for (unsigned I = 0; I < CheckingGroups.size(); ++I)
-    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
-      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
-        OS.indent(Depth) << "Check " << N++ << ":\n";
-        OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
+  for (const auto &Check : Checks) {
+    const auto &First = Check.first->Members, &Second = Check.second->Members;
 
-        for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
-          OS.indent(Depth + 2)
-              << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
-          if (PtrPartition)
-            OS << " (Partition: "
-               << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
-               << "\n";
-        }
+    OS.indent(Depth) << "Check " << N++ << ":\n";
 
-        OS.indent(Depth + 2) << "Against group " << J << ":\n";
+    OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
+    for (unsigned K = 0; K < First.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
 
-        for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
-          OS.indent(Depth + 2)
-              << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
-          if (PtrPartition)
-            OS << " (Partition: "
-               << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
-               << "\n";
-        }
-      }
-
-  OS.indent(Depth) << "Grouped accesses:\n";
-  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
-    OS.indent(Depth + 2) << "Group " << I << ":\n";
-    OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
-                         << " High: " << *CheckingGroups[I].High << ")\n";
-    for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
-      OS.indent(Depth + 6) << "Member: "
-                           << *Pointers[CheckingGroups[I].Members[J]].Expr
-                           << "\n";
-    }
+    OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
+    for (unsigned K = 0; K < Second.size(); ++K)
+      OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
   }
 }
 
-unsigned RuntimePointerChecking::getNumberOfChecks(
-    const SmallVectorImpl<int> *PtrPartition) const {
+void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
 
-  unsigned NumPartitions = CheckingGroups.size();
-  unsigned CheckCount = 0;
+  OS.indent(Depth) << "Run-time memory checks:\n";
+  printChecks(OS, Checks, Depth);
 
-  for (unsigned I = 0; I < NumPartitions; ++I)
-    for (unsigned J = I + 1; J < NumPartitions; ++J)
-      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
-        CheckCount++;
-  return CheckCount;
-}
+  OS.indent(Depth) << "Grouped accesses:\n";
+  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
+    const auto &CG = CheckingGroups[I];
 
-bool RuntimePointerChecking::needsAnyChecking(
-    const SmallVectorImpl<int> *PtrPartition) const {
-  unsigned NumPointers = Pointers.size();
-
-  for (unsigned I = 0; I < NumPointers; ++I)
-    for (unsigned J = I + 1; J < NumPointers; ++J)
-      if (needsChecking(I, J, PtrPartition))
-        return true;
-  return false;
+    OS.indent(Depth + 2) << "Group " << &CG << ":\n";
+    OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
+                         << ")\n";
+    for (unsigned J = 0; J < CG.Members.size(); ++J) {
+      OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
+                           << "\n";
+    }
+  }
 }
 
 namespace {
@@ -390,9 +424,10 @@ public:
   typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
 
   AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
-                 MemoryDepChecker::DepCandidates &DA)
-      : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
-        IsRTCheckAnalysisNeeded(false) {}
+                 MemoryDepChecker::DepCandidates &DA,
+                 PredicatedScalarEvolution &PSE)
+      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
+        PSE(PSE) {}
 
   /// \brief Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -435,7 +470,7 @@ public:
   /// We decided that no dependence analysis would be used.  Reset the state.
   void resetDepChecks(MemoryDepChecker &DepChecker) {
     CheckDeps.clear();
-    DepChecker.clearInterestingDependences();
+    DepChecker.clearDependences();
   }
 
   MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
@@ -477,14 +512,18 @@ private:
   /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
   /// while this remains set if we have potentially dependent accesses.
   bool IsRTCheckAnalysisNeeded;
+
+  /// The SCEV predicate containing all the SCEV-related assumptions.
+  PredicatedScalarEvolution &PSE;
 };
 
 } // end anonymous namespace
 
 /// \brief Check whether a pointer can participate in a runtime bounds check.
-static bool hasComputableBounds(ScalarEvolution *SE,
-                                const ValueToValueMap &Strides, Value *Ptr) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
+                                const ValueToValueMap &Strides, Value *Ptr,
+                                Loop *L) {
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (!AR)
     return false;
@@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
       else
         ++NumReadPtrChecks;
 
-      if (hasComputableBounds(SE, StridesMap, Ptr) &&
+      if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
           // When we run after a failing dependency check we have to make sure
           // we don't have wrapping pointers.
           (!ShouldCheckStride ||
-           isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
+           isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {
         // The id of the dependence set.
         unsigned DepId;
 
@@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
           // Each access has its own dependence set.
           DepId = RunningDepId++;
 
-        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
+        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
 
         DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
       } else {
@@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
   }
 
   if (NeedRTCheck && CanDoRT)
-    RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
+    RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
 
-  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
+  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
                << " pointer comparisons.\n");
 
   RtCheck.Need = NeedRTCheck;
@@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() {
           GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
           DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
           for (Value *UnderlyingObj : TempObjects) {
+            // nullptr never alias, don't join sets for pointer that have "null"
+            // in their UnderlyingObjects list.
+            if (isa<ConstantPointerNull>(UnderlyingObj))
+              continue;
+
             UnderlyingObjToAccessMap::iterator Prev =
                 ObjToLastAccess.find(UnderlyingObj);
             if (Prev != ObjToLastAccess.end())
@@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
 }
 
 /// \brief Check whether the access through \p Ptr has a constant stride.
-int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
-                       const ValueToValueMap &StridesMap) {
-  const Type *Ty = Ptr->getType();
+int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
+                       const Loop *Lp, const ValueToValueMap &StridesMap) {
+  Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
   // Make sure that the pointer does not point to aggregate types.
-  const PointerType *PtrTy = cast<PointerType>(Ty);
+  auto *PtrTy = cast<PointerType>(Ty);
   if (PtrTy->getElementType()->isAggregateType()) {
     DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
           << *Ptr << "\n");
     return 0;
   }
 
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
+  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
 
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
   if (!AR) {
@@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
   // to access the pointer value "0" which is undefined behavior in address
   // space 0, therefore we can also vectorize this case.
   bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
+  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);
   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
     DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
-          << *Ptr << " SCEV: " << *PtrScev << "\n");
+                 << *Ptr << " SCEV: " << *PtrScev << "\n");
     return 0;
   }
 
   // Check the step is constant.
-  const SCEV *Step = AR->getStepRecurrence(*SE);
+  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
 
   // Calculate the pointer stride and check if it is constant.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
 
   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
   int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
-  const APInt &APStepVal = C->getValue()->getValue();
+  const APInt &APStepVal = C->getAPInt();
 
   // Huge step value - give up.
   if (APStepVal.getBitWidth() > 64)
@@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
   llvm_unreachable("unexpected DepType!");
 }
 
-bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
+bool MemoryDepChecker::Dependence::isBackward() const {
   switch (Type) {
   case NoDep:
   case Forward:
+  case ForwardButPreventsForwarding:
+  case Unknown:
     return false;
 
   case BackwardVectorizable:
-  case Unknown:
-  case ForwardButPreventsForwarding:
   case Backward:
   case BackwardVectorizableButPreventsForwarding:
     return true;
@@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {
 }
 
 bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
+  return isBackward() || Type == Unknown;
+}
+
+bool MemoryDepChecker::Dependence::isForward() const {
   switch (Type) {
-  case NoDep:
   case Forward:
   case ForwardButPreventsForwarding:
-    return false;
+    return true;
 
+  case NoDep:
   case Unknown:
   case BackwardVectorizable:
   case Backward:
   case BackwardVectorizableButPreventsForwarding:
-    return true;
+    return false;
   }
   llvm_unreachable("unexpected DepType!");
 }
@@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
       BPtr->getType()->getPointerAddressSpace())
     return Dependence::Unknown;
 
-  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
-  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
+  const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr);
+  const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr);
 
-  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
-  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
+  int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides);
+  int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);
 
   const SCEV *Src = AScev;
   const SCEV *Sink = BScev;
@@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
     std::swap(StrideAPtr, StrideBPtr);
   }
 
-  const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
+  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
 
   DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
-        << "(Induction step: " << StrideAPtr <<  ")\n");
+               << "(Induction step: " << StrideAPtr << ")\n");
   DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
-        << *InstMap[BIdx] << ": " << *Dist << "\n");
+               << *InstMap[BIdx] << ": " << *Dist << "\n");
 
   // Need accesses with constant stride. We don't want to vectorize
   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
@@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
 
   // Negative distances are not plausible dependencies.
-  const APInt &Val = C->getValue()->getValue();
+  const APInt &Val = C->getAPInt();
   if (Val.isNegative()) {
     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
     if (IsTrueDataDependence &&
@@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
   // Could be improved to assert type sizes are the same (i32 == float, etc).
   if (Val == 0) {
     if (ATy == BTy)
-      return Dependence::NoDep;
+      return Dependence::Forward;
     DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
     return Dependence::Unknown;
   }
@@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
             SafeForVectorization &= Dependence::isSafeForVectorization(Type);
 
-            // Gather dependences unless we accumulated MaxInterestingDependence
+            // Gather dependences unless we accumulated MaxDependences
             // dependences.  In that case return as soon as we find the first
             // unsafe dependence.  This puts a limit on this quadratic
             // algorithm.
-            if (RecordInterestingDependences) {
-              if (Dependence::isInterestingDependence(Type))
-                InterestingDependences.push_back(
-                    Dependence(A.second, B.second, Type));
+            if (RecordDependences) {
+              if (Type != Dependence::NoDep)
+                Dependences.push_back(Dependence(A.second, B.second, Type));
 
-              if (InterestingDependences.size() >= MaxInterestingDependence) {
-                RecordInterestingDependences = false;
-                InterestingDependences.clear();
+              if (Dependences.size() >= MaxDependences) {
+                RecordDependences = false;
+                Dependences.clear();
                 DEBUG(dbgs() << "Too many dependences, stopped recording\n");
               }
             }
-            if (!RecordInterestingDependences && !SafeForVectorization)
+            if (!RecordDependences && !SafeForVectorization)
               return false;
           }
         ++OI;
@@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
     }
   }
 
-  DEBUG(dbgs() << "Total Interesting Dependences: "
-               << InterestingDependences.size() << "\n");
+  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
   return SafeForVectorization;
 }
 
@@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
-  if (ExitCount == SE->getCouldNotCompute()) {
-    emitAnalysis(LoopAccessReport() <<
-                 "could not determine number of loop iterations");
+  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+    emitAnalysis(LoopAccessReport()
+                 << "could not determine number of loop iterations");
     DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
       if (it->mayWriteToMemory()) {
         StoreInst *St = dyn_cast<StoreInst>(it);
         if (!St) {
-          emitAnalysis(LoopAccessReport(it) <<
+          emitAnalysis(LoopAccessReport(&*it) <<
                        "instruction cannot be vectorized");
           CanVecMem = false;
           return;
@@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
 
   MemoryDepChecker::DepCandidates DependentAccesses;
   AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
-                          AA, LI, DependentAccesses);
+                          AA, LI, DependentAccesses, PSE);
 
   // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
   // multiple times on the same object. If the ptr is accessed twice, once
@@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
     // read a few words, modify, and write a few words, and some of the
     // words may be written to the same address.
     bool IsReadOnlyPtr = false;
-    if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
+    if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {
       ++NumReads;
       IsReadOnlyPtr = true;
     }
@@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
   // Find pointers with computable bounds. We are going to use this information
   // to place a runtime bound check.
   bool CanDoRTIfNeeded =
-      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
+      Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);
   if (!CanDoRTIfNeeded) {
     emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
     DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
@@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
       PtrRtChecking.reset();
       PtrRtChecking.Need = true;
 
+      auto *SE = PSE.getSE();
       CanDoRTIfNeeded =
           Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
 
@@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
 }
 
 bool LoopAccessInfo::isUniform(Value *V) const {
-  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+  return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));
 }
 
 // FIXME: this function is currently a duplicate of the one in
@@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
   return nullptr;
 }
 
-std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
-    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
-  if (!PtrRtChecking.Need)
-    return std::make_pair(nullptr, nullptr);
+namespace {
+/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
+/// need to use value-handles because SCEV expansion can invalidate previously
+/// expanded values.  Thus expansion of a pointer can invalidate the bounds for
+/// a previous one.
+struct PointerBounds {
+  TrackingVH<Value> Start;
+  TrackingVH<Value> End;
+};
+} // end anonymous namespace
 
-  SmallVector<TrackingVH<Value>, 2> Starts;
-  SmallVector<TrackingVH<Value>, 2> Ends;
+/// \brief Expand code for the lower and upper bound of the pointer group \p CG
+/// in \p TheLoop.  \return the values for the bounds.
+static PointerBounds
+expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+             Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
+             const RuntimePointerChecking &PtrRtChecking) {
+  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+  const SCEV *Sc = SE->getSCEV(Ptr);
+
+  if (SE->isLoopInvariant(Sc, TheLoop)) {
+    DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+                 << "\n");
+    return {Ptr, Ptr};
+  } else {
+    unsigned AS = Ptr->getType()->getPointerAddressSpace();
+    LLVMContext &Ctx = Loc->getContext();
+
+    // Use this type for pointer arithmetic.
+    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+    Value *Start = nullptr, *End = nullptr;
+
+    DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+    Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+    End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+    DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
+    return {Start, End};
+  }
+}
+
+/// \brief Turns a collection of checks into a collection of expanded upper and
+/// lower bounds for both pointers in the check.
+static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
+    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks,
+    Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
+    const RuntimePointerChecking &PtrRtChecking) {
+  SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
+
+  // Here we're relying on the SCEV Expander's cache to only emit code for the
+  // same bounds once.
+  std::transform(
+      PointerChecks.begin(), PointerChecks.end(),
+      std::back_inserter(ChecksWithBounds),
+      [&](const RuntimePointerChecking::PointerCheck &Check) {
+        PointerBounds
+          First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
+          Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
+        return std::make_pair(First, Second);
+      });
+
+  return ChecksWithBounds;
+}
+
+std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
+    Instruction *Loc,
+    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks)
+    const {
+  auto *SE = PSE.getSE();
+  SCEVExpander Exp(*SE, DL, "induction");
+  auto ExpandedChecks =
+      expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking);
 
   LLVMContext &Ctx = Loc->getContext();
-  SCEVExpander Exp(*SE, DL, "induction");
   Instruction *FirstInst = nullptr;
-
-  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
-    const RuntimePointerChecking::CheckingPtrGroup &CG =
-        PtrRtChecking.CheckingGroups[i];
-    Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
-    const SCEV *Sc = SE->getSCEV(Ptr);
-
-    if (SE->isLoopInvariant(Sc, TheLoop)) {
-      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
-                   << "\n");
-      Starts.push_back(Ptr);
-      Ends.push_back(Ptr);
-    } else {
-      unsigned AS = Ptr->getType()->getPointerAddressSpace();
-
-      // Use this type for pointer arithmetic.
-      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
-      Value *Start = nullptr, *End = nullptr;
-
-      DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
-      Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
-      End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
-      DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
-      Starts.push_back(Start);
-      Ends.push_back(End);
-    }
-  }
-
   IRBuilder<> ChkBuilder(Loc);
   // Our instructions might fold to a constant.
   Value *MemoryRuntimeCheck = nullptr;
-  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
-    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
-      const RuntimePointerChecking::CheckingPtrGroup &CGI =
-          PtrRtChecking.CheckingGroups[i];
-      const RuntimePointerChecking::CheckingPtrGroup &CGJ =
-          PtrRtChecking.CheckingGroups[j];
 
-      if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
-        continue;
+  for (const auto &Check : ExpandedChecks) {
+    const PointerBounds &A = Check.first, &B = Check.second;
+    // Check if two pointers (A and B) conflict where conflict is computed as:
+    // start(A) <= end(B) && start(B) <= end(A)
+    unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
+    unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
 
-      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
-      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
+    assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
+           (AS1 == A.End->getType()->getPointerAddressSpace()) &&
+           "Trying to bounds check pointers with different address spaces");
 
-      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
-             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
-             "Trying to bounds check pointers with different address spaces");
+    Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
+    Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
 
-      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
-      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
+    Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
+    Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
+    Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc");
+    Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc");
 
-      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
-      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
-      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc");
-      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc");
-
-      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
-      FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
-      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
-      FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
-      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
+    FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
+    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
+    FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
+    Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
+    FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
+    if (MemoryRuntimeCheck) {
+      IsConflict =
+          ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
       FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      if (MemoryRuntimeCheck) {
-        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
-                                         "conflict.rdx");
-        FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
-      }
-      MemoryRuntimeCheck = IsConflict;
     }
+    MemoryRuntimeCheck = IsConflict;
   }
 
   if (!MemoryRuntimeCheck)
@@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
   return std::make_pair(FirstInst, Check);
 }
 
+std::pair<Instruction *, Instruction *>
+LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const {
+  if (!PtrRtChecking.Need)
+    return std::make_pair(nullptr, nullptr);
+
+  return addRuntimeChecks(Loc, PtrRtChecking.getChecks());
+}
+
 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                const DataLayout &DL,
                                const TargetLibraryInfo *TLI, AliasAnalysis *AA,
                                DominatorTree *DT, LoopInfo *LI,
                                const ValueToValueMap &Strides)
-    : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
+    : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
       TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
       MaxSafeDepDistBytes(-1U), CanVecMem(false),
       StoreToLoopInvariantAddress(false) {
@@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   if (Report)
     OS.indent(Depth) << "Report: " << Report->str() << "\n";
 
-  if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
-    OS.indent(Depth) << "Interesting Dependences:\n";
-    for (auto &Dep : *InterestingDependences) {
+  if (auto *Dependences = DepChecker.getDependences()) {
+    OS.indent(Depth) << "Dependences:\n";
+    for (auto &Dep : *Dependences) {
       Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
       OS << "\n";
     }
   } else
-    OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
+    OS.indent(Depth) << "Too many dependences, not recorded\n";
 
   // List the pair of accesses need run-time checks to prove independence.
   PtrRtChecking.print(OS, Depth);
@@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   OS.indent(Depth) << "Store to invariant address was "
                    << (StoreToLoopInvariantAddress ? "" : "not ")
                    << "found in loop.\n";
+
+  OS.indent(Depth) << "SCEV assumptions:\n";
+  PSE.getUnionPredicate().print(OS, Depth);
 }
 
 const LoopAccessInfo &
@@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {
 
   if (!LAI) {
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
-                                            Strides);
+    LAI =
+        llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);
 #ifndef NDEBUG
     LAI->NumSymbolicStrides = Strides.size();
 #endif
@@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
 }
 
 bool LoopAccessAnalysis::runOnFunction(Function &F) {
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   TLI = TLIP ? &TLIP->getTLI() : nullptr;
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 
@@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {
 }
 
 void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<ScalarEvolution>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
 
@@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis";
 #define LAA_NAME "loop-accesses"
 
 INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true)
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 6b6faf8a66c3..9ab9eead584f 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
     return false;
   if (I->mayReadFromMemory())
     return false;
-  // The landingpad instruction is immobile.
-  if (isa<LandingPadInst>(I))
+  // EH block instructions are immobile.
+  if (I->isEHPad())
     return false;
   // Determine the insertion point, unless one was given.
   if (!InsertPt) {
@@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
 
   // Hoist.
   I->moveBefore(InsertPt);
+
+  // There is possibility of hoisting this instruction above some arbitrary
+  // condition. Any metadata defined on it can be control dependent on this
+  // condition. Conservatively strip it here so that we don't give any wrong
+  // information to the optimizer.
+  I->dropUnknownNonDebugMetadata();
+
   Changed = true;
   return true;
 }
@@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
 bool Loop::isLCSSAForm(DominatorTree &DT) const {
   for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
     BasicBlock *BB = *BI;
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) {
+      // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+      // optimizations, so for the purposes of considered LCSSA form, we
+      // can ignore them.
+      if (I->getType()->isTokenTy())
+        continue;
+
       for (Use &U : I->uses()) {
         Instruction *UI = cast<Instruction>(U.getUser());
         BasicBlock *UserBB = UI->getParent();
@@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {
             DT.isReachableFromEntry(UserBB))
           return false;
       }
+    }
   }
 
   return true;
 }
 
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
+  if (!isLCSSAForm(DT))
+    return false;
+
+  return std::all_of(begin(), end(), [&](const Loop *L) {
+    return L->isRecursivelyLCSSAForm(DT);
+  });
+}
+
 /// isLoopSimplifyForm - Return true if the Loop is in the form that
 /// the LoopSimplify form transforms loops to, which is sometimes called
 /// normal form.
@@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const {
     if (isa<IndirectBrInst>((*I)->getTerminator()))
       return false;
 
-    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator()))
+    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
       if (II->cannotDuplicate())
         return false;
+      // Return false if any loop blocks contain invokes to EH-pads other than
+      // landingpads;  we don't know how to split those edges yet.
+      auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI();
+      if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI))
+        return false;
+    }
 
     for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
       if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
         if (CI->cannotDuplicate())
           return false;
       }
+      if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I))
+        return false;
     }
   }
   return true;
@@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
   return NearLoop;
 }
 
-/// updateUnloop - The last backedge has been removed from a loop--now the
-/// "unloop". Find a new parent for the blocks contained within unloop and
-/// update the loop tree. We don't necessarily have valid dominators at this
-/// point, but LoopInfo is still valid except for the removal of this loop.
-///
-/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without
-/// checking first is illegal.
+LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
+  analyze(DomTree);
+}
+
 void LoopInfo::updateUnloop(Loop *Unloop) {
+  Unloop->markUnlooped();
 
   // First handle the special case of no parent loop to simplify the algorithm.
   if (!Unloop->getParentLoop()) {
@@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {
   // objects. I don't want to add that kind of complexity until the scope of
   // the problem is better understood.
   LoopInfo LI;
-  LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F));
+  LI.analyze(AM->getResult<DominatorTreeAnalysis>(F));
   return LI;
 }
 
@@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,
   return PreservedAnalyses::all();
 }
 
+PrintLoopPass::PrintLoopPass() : OS(dbgs()) {}
+PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner)
+    : OS(OS), Banner(Banner) {}
+
+PreservedAnalyses PrintLoopPass::run(Loop &L) {
+  OS << Banner;
+  for (auto *Block : L.blocks())
+    if (Block)
+      Block->print(OS);
+    else
+      OS << "Printing <null> block";
+  return PreservedAnalyses::all();
+}
+
 //===----------------------------------------------------------------------===//
 // LoopInfo implementation
 //
@@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",
 
 bool LoopInfoWrapperPass::runOnFunction(Function &) {
   releaseMemory();
-  LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
+  LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());
   return false;
 }
 
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index e9fcf02118b9..dc424734dd56 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -27,35 +28,26 @@ namespace {
 
 /// PrintLoopPass - Print a Function corresponding to a Loop.
 ///
-class PrintLoopPass : public LoopPass {
-private:
-  std::string Banner;
-  raw_ostream &Out;       // raw_ostream to print on.
+class PrintLoopPassWrapper : public LoopPass {
+  PrintLoopPass P;
 
 public:
   static char ID;
-  PrintLoopPass(const std::string &B, raw_ostream &o)
-      : LoopPass(ID), Banner(B), Out(o) {}
+  PrintLoopPassWrapper() : LoopPass(ID) {}
+  PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner)
+      : LoopPass(ID), P(OS, Banner) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
 
   bool runOnLoop(Loop *L, LPPassManager &) override {
-    Out << Banner;
-    for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
-         b != be;
-         ++b) {
-      if (*b)
-        (*b)->print(Out);
-      else
-        Out << "Printing <null> block";
-    }
+    P.run(*L);
     return false;
   }
 };
 
-char PrintLoopPass::ID = 0;
+char PrintLoopPassWrapper::ID = 0;
 }
 
 //===----------------------------------------------------------------------===//
@@ -66,81 +58,34 @@ char LPPassManager::ID = 0;
 
 LPPassManager::LPPassManager()
   : FunctionPass(ID), PMDataManager() {
-  skipThisLoop = false;
-  redoThisLoop = false;
   LI = nullptr;
   CurrentLoop = nullptr;
 }
 
-/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
-void LPPassManager::deleteLoopFromQueue(Loop *L) {
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+Loop &LPPassManager::addLoop(Loop *ParentLoop) {
+  // Create a new loop. LI will take ownership.
+  Loop *L = new Loop();
 
-  LI->updateUnloop(L);
+  // Insert into the loop nest and the loop queue.
+  if (!ParentLoop) {
+    // This is the top level loop.
+    LI->addTopLevelLoop(L);
+    LQ.push_front(L);
+    return *L;
+  }
 
-  // Notify passes that the loop is being deleted.
-  deleteSimpleAnalysisLoop(L);
-
-  // If L is current loop then skip rest of the passes and let
-  // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
-  // and continue applying other passes on CurrentLoop.
-  if (CurrentLoop == L)
-    skipThisLoop = true;
-
-  delete L;
-
-  if (skipThisLoop)
-    return;
-
-  for (std::deque<Loop *>::iterator I = LQ.begin(),
-         E = LQ.end(); I != E; ++I) {
-    if (*I == L) {
-      LQ.erase(I);
+  ParentLoop->addChildLoop(L);
+  // Insert L into the loop queue after the parent loop.
+  for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) {
+    if (*I == L->getParentLoop()) {
+      // deque does not support insert after.
+      ++I;
+      LQ.insert(I, 1, L);
       break;
     }
   }
-}
-
-// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
-void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
-
-  assert (CurrentLoop != L && "Cannot insert CurrentLoop");
-
-  // Insert into loop nest
-  if (ParentLoop)
-    ParentLoop->addChildLoop(L);
-  else
-    LI->addTopLevelLoop(L);
-
-  insertLoopIntoQueue(L);
-}
-
-void LPPassManager::insertLoopIntoQueue(Loop *L) {
-  // Insert L into loop queue
-  if (L == CurrentLoop)
-    redoLoop(L);
-  else if (!L->getParentLoop())
-    // This is top level loop.
-    LQ.push_front(L);
-  else {
-    // Insert L after the parent loop.
-    for (std::deque<Loop *>::iterator I = LQ.begin(),
-           E = LQ.end(); I != E; ++I) {
-      if (*I == L->getParentLoop()) {
-        // deque does not support insert after.
-        ++I;
-        LQ.insert(I, 1, L);
-        break;
-      }
-    }
-  }
-}
-
-// Reoptimize this loop. LPPassManager will re-insert this loop into the
-// queue. This allows LoopPass to change loop nest for the loop. This
-// utility may send LPPassManager into infinite loops so use caution.
-void LPPassManager::redoLoop(Loop *L) {
-  assert (CurrentLoop == L && "Can redo only CurrentLoop");
-  redoThisLoop = true;
+  return *L;
 }
 
 /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
@@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) {
   // Walk Loops
   while (!LQ.empty()) {
 
-    CurrentLoop  = LQ.back();
-    skipThisLoop = false;
-    redoThisLoop = false;
-
+    CurrentLoop = LQ.back();
     // Run all passes on the current Loop.
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
@@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) {
 
       if (Changed)
         dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
-                     skipThisLoop ? "<deleted>" :
-                                    CurrentLoop->getHeader()->getName());
+                     CurrentLoop->isUnloop()
+                         ? "<deleted>"
+                         : CurrentLoop->getHeader()->getName());
       dumpPreservedSet(P);
 
-      if (!skipThisLoop) {
+      if (CurrentLoop->isUnloop()) {
+        // Notify passes that the loop is being deleted.
+        deleteSimpleAnalysisLoop(CurrentLoop);
+      } else {
         // Manually check that this loop is still healthy. This is done
         // instead of relying on LoopInfo::verifyLoop since LoopInfo
         // is a function pass and it's really expensive to verify every
@@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) {
 
       removeNotPreservedAnalysis(P);
       recordAvailableAnalysis(P);
-      removeDeadPasses(P,
-                       skipThisLoop ? "<deleted>" :
-                                      CurrentLoop->getHeader()->getName(),
+      removeDeadPasses(P, CurrentLoop->isUnloop()
+                              ? "<deleted>"
+                              : CurrentLoop->getHeader()->getName(),
                        ON_LOOP_MSG);
 
-      if (skipThisLoop)
+      if (CurrentLoop->isUnloop())
         // Do not run other passes on this loop.
         break;
     }
@@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) {
     // If the loop was deleted, release all the loop passes. This frees up
     // some memory, and avoids trouble with the pass manager trying to call
     // verifyAnalysis on them.
-    if (skipThisLoop)
+    if (CurrentLoop->isUnloop()) {
       for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
         Pass *P = getContainedPass(Index);
         freePass(P, "<deleted>", ON_LOOP_MSG);
       }
+      delete CurrentLoop;
+    }
 
     // Pop the loop from queue after running all passes.
     LQ.pop_back();
-
-    if (redoThisLoop)
-      LQ.push_back(CurrentLoop);
   }
 
   // Finalization
@@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) {
 
 Pass *LoopPass::createPrinterPass(raw_ostream &O,
                                   const std::string &Banner) const {
-  return new PrintLoopPass(Banner, O);
+  return new PrintLoopPassWrapper(O, Banner);
 }
 
 // Check if this pass is suitable for the current LPPassManager, if
diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile
index 4af6d350a645..93fd7f9bdd93 100644
--- a/lib/Analysis/Makefile
+++ b/lib/Analysis/Makefile
@@ -9,7 +9,6 @@
 
 LEVEL = ../..
 LIBRARYNAME = LLVMAnalysis
-DIRS = IPA
 BUILD_ARCHIVE = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index da3b829b6d31..078cefe51807 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -49,7 +49,7 @@ namespace {
     void print(raw_ostream &OS, const Module * = nullptr) const override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequiredTransitive<AliasAnalysis>();
+      AU.addRequiredTransitive<AAResultsWrapperPass>();
       AU.addRequiredTransitive<MemoryDependenceAnalysis>();
       AU.setPreservesAll();
     }
@@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
   // All this code uses non-const interfaces because MemDep is not
   // const-friendly, though nothing is actually modified.
-  for (auto &I : inst_range(F)) {
+  for (auto &I : instructions(F)) {
     Instruction *Inst = &I;
 
     if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
@@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 }
 
 void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
-  for (const auto &I : inst_range(*F)) {
+  for (const auto &I : instructions(*F)) {
     const Instruction *Inst = &I;
 
     DepSetMap::const_iterator DI = Deps.find(Inst);
diff --git a/lib/Analysis/MemDerefPrinter.cpp b/lib/Analysis/MemDerefPrinter.cpp
index fa292a28ec87..36f1424c8cf9 100644
--- a/lib/Analysis/MemDerefPrinter.cpp
+++ b/lib/Analysis/MemDerefPrinter.cpp
@@ -22,7 +22,8 @@ using namespace llvm;
 
 namespace {
   struct MemDerefPrinter : public FunctionPass {
-    SmallVector<Value *, 4> Vec;
+    SmallVector<Value *, 4> Deref;
+    SmallPtrSet<Value *, 4> DerefAndAligned;
 
     static char ID; // Pass identification, replacement for typeid
     MemDerefPrinter() : FunctionPass(ID) {
@@ -34,7 +35,8 @@ namespace {
     bool runOnFunction(Function &F) override;
     void print(raw_ostream &OS, const Module * = nullptr) const override;
     void releaseMemory() override {
-      Vec.clear();
+      Deref.clear();
+      DerefAndAligned.clear();
     }
   };
 }
@@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() {
 
 bool MemDerefPrinter::runOnFunction(Function &F) {
   const DataLayout &DL = F.getParent()->getDataLayout();
-  for (auto &I: inst_range(F)) {
+  for (auto &I: instructions(F)) {
     if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
       Value *PO = LI->getPointerOperand();
       if (isDereferenceablePointer(PO, DL))
-        Vec.push_back(PO);
+        Deref.push_back(PO);
+      if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL))
+        DerefAndAligned.insert(PO);
     }
   }
   return false;
@@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) {
 
 void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
   OS << "The following are dereferenceable:\n";
-  for (auto &V: Vec) {
+  for (Value *V: Deref) {
     V->print(OS);
+    if (DerefAndAligned.count(V))
+      OS << "\t(aligned)";
+    else
+      OS << "\t(unaligned)";
     OS << "\n\n";
   }
 }
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 8ddac8ffb971..b19ecadd3161 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "memory-builtins"
 
-enum AllocType {
+enum AllocType : uint8_t {
   OpNewLike          = 1<<0, // allocates; never returns null
   MallocLike         = 1<<1 | OpNewLike, // allocates; may return null
   CallocLike         = 1<<2, // allocates + bzero
@@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = {
   {LibFunc::ZnajRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)
   {LibFunc::Znam,                OpNewLike,   1, 0,  -1}, // new[](unsigned long)
   {LibFunc::ZnamRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned long, nothrow)
+  {LibFunc::msvc_new_int,         OpNewLike,   1, 0,  -1}, // new(unsigned int)
+  {LibFunc::msvc_new_int_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned int, nothrow)
+  {LibFunc::msvc_new_longlong,         OpNewLike,   1, 0,  -1}, // new(unsigned long long)
+  {LibFunc::msvc_new_longlong_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned long long, nothrow)
+  {LibFunc::msvc_new_array_int,         OpNewLike,   1, 0,  -1}, // new[](unsigned int)
+  {LibFunc::msvc_new_array_int_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)
+  {LibFunc::msvc_new_array_longlong,         OpNewLike,   1, 0,  -1}, // new[](unsigned long long)
+  {LibFunc::msvc_new_array_longlong_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned long long, nothrow)
   {LibFunc::calloc,              CallocLike,  2, 0,   1},
   {LibFunc::realloc,             ReallocLike, 2, 1,  -1},
   {LibFunc::reallocf,            ReallocLike, 2, 1,  -1},
@@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return nullptr;
 
-  unsigned i = 0;
-  bool found = false;
-  for ( ; i < array_lengthof(AllocationFnData); ++i) {
-    if (AllocationFnData[i].Func == TLIFn) {
-      found = true;
-      break;
-    }
-  }
-  if (!found)
+  const AllocFnsTy *FnData =
+      std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
+                   [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; });
+
+  if (FnData == std::end(AllocationFnData))
     return nullptr;
 
-  const AllocFnsTy *FnData = &AllocationFnData[i];
   if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
     return nullptr;
 
@@ -184,13 +187,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
   return getAllocationData(V, AllocLike, TLI, LookThroughBitCast);
 }
 
-/// \brief Tests if a value is a call or invoke to a library function that
-/// reallocates memory (such as realloc).
-bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
-                           bool LookThroughBitCast) {
-  return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast);
-}
-
 /// \brief Tests if a value is a call or invoke to a library function that
 /// allocates memory and never returns null (such as operator new).
 bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
@@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
   unsigned ExpectedNumParams;
   if (TLIFn == LibFunc::free ||
       TLIFn == LibFunc::ZdlPv || // operator delete(void*)
-      TLIFn == LibFunc::ZdaPv)   // operator delete[](void*)
+      TLIFn == LibFunc::ZdaPv || // operator delete[](void*)
+      TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*)
+      TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*)
+      TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*)
+      TLIFn == LibFunc::msvc_delete_array_ptr64)   // operator delete[](void*)
     ExpectedNumParams = 1;
   else if (TLIFn == LibFunc::ZdlPvj ||              // delete(void*, uint)
            TLIFn == LibFunc::ZdlPvm ||              // delete(void*, ulong)
            TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)
            TLIFn == LibFunc::ZdaPvj ||              // delete[](void*, uint)
            TLIFn == LibFunc::ZdaPvm ||              // delete[](void*, ulong)
-           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t)   // delete[](void*, nothrow)
+           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_ptr32_int ||      // delete(void*, uint)
+           TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong)
+           TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_array_ptr32_int ||      // delete[](void*, uint)
+           TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong)
+           TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow)
+           TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow)   // delete[](void*, nothrow)
     ExpectedNumParams = 2;
   else
     return nullptr;
@@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
 
   // always generate code immediately before the instruction being
   // processed, so that the generated code dominates the same BBs
-  Instruction *PrevInsertPoint = Builder.GetInsertPoint();
+  BuilderTy::InsertPointGuard Guard(Builder);
   if (Instruction *I = dyn_cast<Instruction>(V))
     Builder.SetInsertPoint(I);
 
@@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {
     Result = unknown();
   }
 
-  if (PrevInsertPoint)
-    Builder.SetInsertPoint(PrevInsertPoint);
-
   // Don't reuse CacheIt since it may be invalid at this point.
   CacheMap[V] = Result;
   return Result;
@@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {
 
   // compute offset/size for each PHI incoming pointer
   for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) {
-    Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt());
+    Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());
     SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));
 
     if (!bothKnown(EdgeData)) {
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 782a67bf72d5..3e80bfe1fdfb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -22,7 +22,9 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
 // Limit for the number of instructions to scan in a block.
-static const unsigned int BlockScanLimit = 100;
+
+static cl::opt<unsigned> BlockScanLimit(
+    "memdep-block-scan-limit", cl::Hidden, cl::init(100),
+    cl::desc("The number of instructions to scan in a block in memory "
+             "dependency analysis (default = 100)"));
 
 // Limit on the number of memdep results to process.
 static const unsigned int NumResultsLimit = 100;
@@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0;
 INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
                 "Memory Dependence Analysis", false, true)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
                       "Memory Dependence Analysis", false, true)
 
@@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() {
 void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequiredTransitive<AliasAnalysis>();
+  AU.addRequiredTransitive<AAResultsWrapperPass>();
+  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
 bool MemoryDependenceAnalysis::runOnFunction(Function &F) {
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DominatorTreeWrapperPass *DTWP =
       getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   return false;
 }
 
@@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,
 /// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
 /// Return a ModRefInfo value describing the general behavior of the
 /// instruction.
-static AliasAnalysis::ModRefResult
-GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
+static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc,
+                              const TargetLibraryInfo &TLI) {
   if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
     if (LI->isUnordered()) {
       Loc = MemoryLocation::get(LI);
-      return AliasAnalysis::Ref;
+      return MRI_Ref;
     }
     if (LI->getOrdering() == Monotonic) {
       Loc = MemoryLocation::get(LI);
-      return AliasAnalysis::ModRef;
+      return MRI_ModRef;
     }
     Loc = MemoryLocation();
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
   }
 
   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
     if (SI->isUnordered()) {
       Loc = MemoryLocation::get(SI);
-      return AliasAnalysis::Mod;
+      return MRI_Mod;
     }
     if (SI->getOrdering() == Monotonic) {
       Loc = MemoryLocation::get(SI);
-      return AliasAnalysis::ModRef;
+      return MRI_ModRef;
     }
     Loc = MemoryLocation();
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
   }
 
   if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
     Loc = MemoryLocation::get(V);
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
   }
 
-  if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) {
+  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {
     // calls to free() deallocate the entire structure
     Loc = MemoryLocation(CI->getArgOperand(0));
-    return AliasAnalysis::Mod;
+    return MRI_Mod;
   }
 
   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
           cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);
       // These intrinsics don't really modify the memory, but returning Mod
       // will allow them to be handled conservatively.
-      return AliasAnalysis::Mod;
+      return MRI_Mod;
     case Intrinsic::invariant_end:
       II->getAAMetadata(AAInfo);
       Loc = MemoryLocation(
@@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
           cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);
       // These intrinsics don't really modify the memory, but returning Mod
       // will allow them to be handled conservatively.
-      return AliasAnalysis::Mod;
+      return MRI_Mod;
     default:
       break;
     }
@@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {
 
   // Otherwise, just do the coarse-grained thing that always works.
   if (Inst->mayWriteToMemory())
-    return AliasAnalysis::ModRef;
+    return MRI_ModRef;
   if (Inst->mayReadFromMemory())
-    return AliasAnalysis::Ref;
-  return AliasAnalysis::NoModRef;
+    return MRI_Ref;
+  return MRI_NoModRef;
 }
 
 /// getCallSiteDependencyFrom - Private helper for finding the local
@@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     if (!Limit)
       return MemDepResult::getUnknown();
 
-    Instruction *Inst = --ScanIt;
+    Instruction *Inst = &*--ScanIt;
 
     // If this inst is a memory op, get the pointer it accessed
     MemoryLocation Loc;
-    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+    ModRefInfo MR = GetLocation(Inst, Loc, *TLI);
     if (Loc.Ptr) {
       // A simple instruction.
-      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+      if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)
         return MemDepResult::getClobber(Inst);
       continue;
     }
@@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
       if (isa<DbgInfoIntrinsic>(Inst)) continue;
       // If these two calls do not interfere, look past it.
       switch (AA->getModRefInfo(CS, InstCS)) {
-      case AliasAnalysis::NoModRef:
+      case MRI_NoModRef:
         // If the two calls are the same, return InstCS as a Def, so that
         // CS can be found redundant and eliminated.
-        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+        if (isReadOnlyCall && !(MR & MRI_Mod) &&
             CS.getInstruction()->isIdenticalToWhenDefined(Inst))
           return MemDepResult::getDef(Inst);
 
@@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
 
     // If we could not obtain a pointer for the instruction and the instruction
     // touches memory then assume that this is a dependency.
-    if (MR != AliasAnalysis::NoModRef)
+    if (MR != MRI_NoModRef)
       return MemDepResult::getClobber(Inst);
   }
 
@@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
     BasicBlock *BB, Instruction *QueryInst) {
 
+  if (QueryInst != nullptr) {
+    if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
+      MemDepResult invariantGroupDependency =
+          getInvariantGroupPointerDependency(LI, BB);
+
+      if (invariantGroupDependency.isDef())
+        return invariantGroupDependency;
+    }
+  }
+  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+}
+
+MemDepResult
+MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI,
+                                                             BasicBlock *BB) {
+  Value *LoadOperand = LI->getPointerOperand();
+  // It's is not safe to walk the use list of global value, because function
+  // passes aren't allowed to look outside their functions.
+  if (isa<GlobalValue>(LoadOperand))
+    return MemDepResult::getUnknown();
+
+  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+  if (!InvariantGroupMD)
+    return MemDepResult::getUnknown();
+
+  MemDepResult Result = MemDepResult::getUnknown();
+  llvm::SmallSet<Value *, 14> Seen;
+  // Queue to process all pointers that are equivalent to load operand.
+  llvm::SmallVector<Value *, 8> LoadOperandsQueue;
+  LoadOperandsQueue.push_back(LoadOperand);
+  while (!LoadOperandsQueue.empty()) {
+    Value *Ptr = LoadOperandsQueue.pop_back_val();
+    if (isa<GlobalValue>(Ptr))
+      continue;
+
+    if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
+      if (!Seen.count(BCI->getOperand(0))) {
+        LoadOperandsQueue.push_back(BCI->getOperand(0));
+        Seen.insert(BCI->getOperand(0));
+      }
+    }
+
+    for (Use &Us : Ptr->uses()) {
+      auto *U = dyn_cast<Instruction>(Us.getUser());
+      if (!U || U == LI || !DT->dominates(U, LI))
+        continue;
+
+      if (auto *BCI = dyn_cast<BitCastInst>(U)) {
+        if (!Seen.count(BCI)) {
+          LoadOperandsQueue.push_back(BCI);
+          Seen.insert(BCI);
+        }
+        continue;
+      }
+      // If we hit load/store with the same invariant.group metadata (and the
+      // same pointer operand) we can assume that value pointed by pointer
+      // operand didn't change.
+      if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB &&
+          U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
+        return MemDepResult::getDef(U);
+    }
+  }
+  return Result;
+}
+
+MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom(
+    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
+    BasicBlock *BB, Instruction *QueryInst) {
+
   const Value *MemLocBase = nullptr;
   int64_t MemLocOffset = 0;
   unsigned Limit = BlockScanLimit;
@@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
 
   const DataLayout &DL = BB->getModule()->getDataLayout();
 
+  // Create a numbered basic block to lazily compute and cache instruction
+  // positions inside a BB. This is used to provide fast queries for relative
+  // position between two instructions in a BB and can be used by
+  // AliasAnalysis::callCapturesBefore.
+  OrderedBasicBlock OBB(BB);
+
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
-    Instruction *Inst = --ScanIt;
+    Instruction *Inst = &*--ScanIt;
 
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
       // Debug intrinsics don't (and can't) cause dependencies.
@@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
       // the query pointer points to constant memory etc.
-      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+      if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)
         continue;
 
       // Ok, this store might clobber the query pointer.  Check to see if it is
@@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
     // a subsequent bitcast of the malloc call result.  There can be stores to
     // the malloced memory between the malloc call and its bitcast uses, and we
     // need to continue scanning until the malloc call.
-    const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();
     if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {
       const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL);
 
@@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(
        continue;
 
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
-    AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
+    ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);
     // If necessary, perform additional analysis.
-    if (MR == AliasAnalysis::ModRef)
-      MR = AA->callCapturesBefore(Inst, MemLoc, DT);
+    if (MR == MRI_ModRef)
+      MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);
     switch (MR) {
-    case AliasAnalysis::NoModRef:
+    case MRI_NoModRef:
       // If the call has no effect on the queried pointer, just ignore it.
       continue;
-    case AliasAnalysis::Mod:
+    case MRI_Mod:
       return MemDepResult::getClobber(Inst);
-    case AliasAnalysis::Ref:
+    case MRI_Ref:
       // If the call is known to never store to the pointer, and if this is a
       // load query, we can safely ignore it (scan past it).
       if (isLoad)
@@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       LocalCache = MemDepResult::getNonFuncLocal();
   } else {
     MemoryLocation MemLoc;
-    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+    ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);
     if (MemLoc.Ptr) {
       // If we can do a pointer scan, make it happen.
-      bool isLoad = !(MR & AliasAnalysis::Mod);
+      bool isLoad = !(MR & MRI_Mod);
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
-      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
-                                            QueryParent, QueryInst);
+      LocalCache = getPointerDependencyFrom(
+          MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);
     } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
       CallSite QueryCS(QueryInst);
       bool isReadOnly = AA->onlyReadsMemory(QueryCS);
-      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
-                                             QueryParent);
+      LocalCache = getCallSiteDependencyFrom(
+          QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);
     } else
       // Non-memory instruction.
       LocalCache = MemDepResult::getUnknown();
@@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     BasicBlock::iterator ScanPos = DirtyBB->end();
     if (ExistingResult) {
       if (Instruction *Inst = ExistingResult->getResult().getInst()) {
-        ScanPos = Inst;
+        ScanPos = Inst->getIterator();
         // We're removing QueryInst's use of Inst.
         RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
                              QueryCS.getInstruction());
@@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(
     assert(ExistingResult->getResult().getInst()->getParent() == BB &&
            "Instruction invalidated?");
     ++NumCacheDirtyNonLocalPtr;
-    ScanPos = ExistingResult->getResult().getInst();
+    ScanPos = ExistingResult->getResult().getInst()->getIterator();
 
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
-    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);
   } else {
     ++NumUncacheNonLocalPtr;
   }
@@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
   // the entire block to get to this point.
   MemDepResult NewDirtyVal;
   if (!RemInst->isTerminator())
-    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+    NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());
 
   ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
   if (ReverseDepIt != ReverseLocalDeps.end()) {
@@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
 
 
   assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
-  AA->deleteValue(RemInst);
   DEBUG(verifyRemoved(RemInst));
 }
 /// verifyRemoved - Verify that the specified instruction does not occur
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
deleted file mode 100644
index 322a9a80de4c..000000000000
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the default implementation of the Alias Analysis interface
-// that simply returns "I don't know" for all queries.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-using namespace llvm;
-
-namespace {
-  /// NoAA - This class implements the -no-aa pass, which always returns "I
-  /// don't know" for alias queries.  NoAA is unlike other alias analysis
-  /// implementations, in that it does not chain to a previous analysis.  As
-  /// such it doesn't follow many of the rules that other alias analyses must.
-  ///
-  struct NoAA : public ImmutablePass, public AliasAnalysis {
-    static char ID; // Class identification, replacement for typeinfo
-    NoAA() : ImmutablePass(ID) {
-      initializeNoAAPass(*PassRegistry::getPassRegistry());
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {}
-
-    bool doInitialization(Module &M) override {
-      // Note: NoAA does not call InitializeAliasAnalysis because it's
-      // special and does not support chaining.
-      DL = &M.getDataLayout();
-      return true;
-    }
-
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override {
-      return MayAlias;
-    }
-
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override {
-      return UnknownModRefBehavior;
-    }
-    ModRefBehavior getModRefBehavior(const Function *F) override {
-      return UnknownModRefBehavior;
-    }
-
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override {
-      return false;
-    }
-    ModRefResult getArgModRefInfo(ImmutableCallSite CS,
-                                  unsigned ArgIdx) override {
-      return ModRef;
-    }
-
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override {
-      return ModRef;
-    }
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override {
-      return ModRef;
-    }
-
-    void deleteValue(Value *V) override {}
-    void addEscapingUse(Use &U) override {}
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(const void *ID) override {
-      if (ID == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char NoAA::ID = 0;
-INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
-                   "No Alias Analysis (always returns 'may' alias)",
-                   true, true, true)
-
-ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Analysis/ObjCARCAliasAnalysis.cpp
similarity index 53%
rename from lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
rename to lib/Analysis/ObjCARCAliasAnalysis.cpp
index 3893aab76b2a..25f660ffe221 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -18,66 +18,46 @@
 /// used. Naive LLVM IR transformations which would otherwise be
 /// behavior-preserving may break these assumptions.
 ///
+/// TODO: Theoretically we could check for dependencies between objc_* calls
+/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls.
+///
 //===----------------------------------------------------------------------===//
 
-#include "ObjCARC.h"
-#include "ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Value.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/PassAnalysisSupport.h"
 #include "llvm/PassSupport.h"
 
 #define DEBUG_TYPE "objc-arc-aa"
 
-namespace llvm {
-  class Function;
-  class Value;
-}
-
 using namespace llvm;
 using namespace llvm::objcarc;
 
-// Register this pass...
-char ObjCARCAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
-                   "ObjC-ARC-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
-  return new ObjCARCAliasAnalysis();
-}
-
-bool ObjCARCAliasAnalysis::doInitialization(Module &M) {
-  InitializeAliasAnalysis(this, &M.getDataLayout());
-  return true;
-}
-
-void
-ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
-}
-
-AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
-                                        const MemoryLocation &LocB) {
+AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA,
+                                   const MemoryLocation &LocB) {
   if (!EnableARCOpts)
-    return AliasAnalysis::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB);
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making a
   // precise alias query.
   const Value *SA = GetRCIdentityRoot(LocA.Ptr);
   const Value *SB = GetRCIdentityRoot(LocB.Ptr);
   AliasResult Result =
-      AliasAnalysis::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
-                           MemoryLocation(SB, LocB.Size, LocB.AATags));
+      AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags),
+                          MemoryLocation(SB, LocB.Size, LocB.AATags));
   if (Result != MayAlias)
     return Result;
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *UA = GetUnderlyingObjCPtr(SA, *DL);
-  const Value *UB = GetUnderlyingObjCPtr(SB, *DL);
+  const Value *UA = GetUnderlyingObjCPtr(SA, DL);
+  const Value *UB = GetUnderlyingObjCPtr(SB, DL);
   if (UA != SA || UB != SB) {
-    Result = AliasAnalysis::alias(MemoryLocation(UA), MemoryLocation(UB));
+    Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB));
     // We can't use MustAlias or PartialAlias results here because
     // GetUnderlyingObjCPtr may return an offsetted pointer value.
     if (Result == NoAlias)
@@ -89,55 +69,47 @@ AliasResult ObjCARCAliasAnalysis::alias(const MemoryLocation &LocA,
   return MayAlias;
 }
 
-bool ObjCARCAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
-                                                  bool OrLocal) {
+bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                             bool OrLocal) {
   if (!EnableARCOpts)
-    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
 
   // First, strip off no-ops, including ObjC-specific no-ops, and try making
   // a precise alias query.
   const Value *S = GetRCIdentityRoot(Loc.Ptr);
-  if (AliasAnalysis::pointsToConstantMemory(
+  if (AAResultBase::pointsToConstantMemory(
           MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal))
     return true;
 
   // If that failed, climb to the underlying object, including climbing through
   // ObjC-specific no-ops, and try making an imprecise alias query.
-  const Value *U = GetUnderlyingObjCPtr(S, *DL);
+  const Value *U = GetUnderlyingObjCPtr(S, DL);
   if (U != S)
-    return AliasAnalysis::pointsToConstantMemory(MemoryLocation(U), OrLocal);
+    return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal);
 
   // If that failed, fail. We don't need to chain here, since that's covered
   // by the earlier precise query.
   return false;
 }
 
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
-  // We have nothing to do. Just chain to the next AliasAnalysis.
-  return AliasAnalysis::getModRefBehavior(CS);
-}
-
-AliasAnalysis::ModRefBehavior
-ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) {
   if (!EnableARCOpts)
-    return AliasAnalysis::getModRefBehavior(F);
+    return AAResultBase::getModRefBehavior(F);
 
   switch (GetFunctionClass(F)) {
   case ARCInstKind::NoopCast:
-    return DoesNotAccessMemory;
+    return FMRB_DoesNotAccessMemory;
   default:
     break;
   }
 
-  return AliasAnalysis::getModRefBehavior(F);
+  return AAResultBase::getModRefBehavior(F);
 }
 
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                    const MemoryLocation &Loc) {
+ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
+                                          const MemoryLocation &Loc) {
   if (!EnableARCOpts)
-    return AliasAnalysis::getModRefInfo(CS, Loc);
+    return AAResultBase::getModRefInfo(CS, Loc);
 
   switch (GetBasicARCInstKind(CS.getInstruction())) {
   case ARCInstKind::Retain:
@@ -151,18 +123,48 @@ ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
     // These functions don't access any memory visible to the compiler.
     // Note that this doesn't include objc_retainBlock, because it updates
     // pointers when it copies block data.
-    return NoModRef;
+    return MRI_NoModRef;
   default:
     break;
   }
 
-  return AliasAnalysis::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-AliasAnalysis::ModRefResult
-ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
-                                    ImmutableCallSite CS2) {
-  // TODO: Theoretically we could check for dependencies between objc_* calls
-  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
+ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return ObjCARCAAResult(F.getParent()->getDataLayout(),
+                         AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char ObjCARCAA::PassID;
+
+char ObjCARCAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa",
+                      "ObjC-ARC-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa",
+                    "ObjC-ARC-Based Alias Analysis", false, true)
+
+ImmutablePass *llvm::createObjCARCAAWrapperPass() {
+  return new ObjCARCAAWrapperPass();
+}
+
+ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) {
+  initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ObjCARCAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new ObjCARCAAResult(
+      M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool ObjCARCAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
diff --git a/lib/Analysis/ObjCARCAnalysisUtils.cpp b/lib/Analysis/ObjCARCAnalysisUtils.cpp
new file mode 100644
index 000000000000..e3e74aa249da
--- /dev/null
+++ b/lib/Analysis/ObjCARCAnalysisUtils.cpp
@@ -0,0 +1,28 @@
+//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMObjCARCOpts.a, which
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+using namespace llvm::objcarc;
+
+/// \brief A handy option to enable/disable all ARC Optimizations.
+bool llvm::objcarc::EnableARCOpts;
+static cl::opt<bool, true>
+EnableARCOptimizations("enable-objc-arc-opts",
+                       cl::desc("enable/disable all ARC Optimizations"),
+                       cl::location(EnableARCOpts),
+                       cl::init(true));
diff --git a/lib/Transforms/ObjCARC/ARCInstKind.cpp b/lib/Analysis/ObjCARCInstKind.cpp
similarity index 99%
rename from lib/Transforms/ObjCARC/ARCInstKind.cpp
rename to lib/Analysis/ObjCARCInstKind.cpp
index afb873a355a7..133b63513c87 100644
--- a/lib/Transforms/ObjCARC/ARCInstKind.cpp
+++ b/lib/Analysis/ObjCARCInstKind.cpp
@@ -19,7 +19,9 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "ObjCARC.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/IR/Intrinsics.h"
 
 using namespace llvm;
@@ -91,7 +93,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
         .Default(ARCInstKind::CallOrUser);
 
   // One argument.
-  const Argument *A0 = AI++;
+  const Argument *A0 = &*AI++;
   if (AI == AE)
     // Argument is a pointer.
     if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
@@ -129,7 +131,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
     }
 
   // Two arguments, first is i8**.
-  const Argument *A1 = AI++;
+  const Argument *A1 = &*AI++;
   if (AI == AE)
     if (PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
       if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
new file mode 100644
index 000000000000..0f0016f22cc0
--- /dev/null
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -0,0 +1,85 @@
+//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the OrderedBasicBlock class. OrderedBasicBlock
+// maintains an interface where clients can query if one instruction comes
+// before another in a BasicBlock. Since BasicBlock currently lacks a reliable
+// way to query relative position between instructions one can use
+// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a
+// source BasicBlock and maintains an internal Instruction -> Position map. A
+// OrderedBasicBlock instance should be discarded whenever the source
+// BasicBlock changes.
+//
+// It's currently used by the CaptureTracker in order to find relative
+// positions of a pair of instructions inside a BasicBlock.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/IR/Instruction.h"
+using namespace llvm;
+
+OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB)
+    : NextInstPos(0), BB(BasicB) {
+  LastInstFound = BB->end();
+}
+
+/// \brief Given no cached results, find if \p A comes before \p B in \p BB.
+/// Cache and number out instruction while walking \p BB.
+bool OrderedBasicBlock::comesBefore(const Instruction *A,
+                                    const Instruction *B) {
+  const Instruction *Inst = nullptr;
+  assert(!(LastInstFound == BB->end() && NextInstPos != 0) &&
+         "Instruction supposed to be in NumberedInsts");
+
+  // Start the search with the instruction found in the last lookup round.
+  auto II = BB->begin();
+  auto IE = BB->end();
+  if (LastInstFound != IE)
+    II = std::next(LastInstFound);
+
+  // Number all instructions up to the point where we find 'A' or 'B'.
+  for (; II != IE; ++II) {
+    Inst = cast<Instruction>(II);
+    NumberedInsts[Inst] = NextInstPos++;
+    if (Inst == A || Inst == B)
+      break;
+  }
+
+  assert(II != IE && "Instruction not found?");
+  assert((Inst == A || Inst == B) && "Should find A or B");
+  LastInstFound = II;
+  return Inst == A;
+}
+
+/// \brief Find out whether \p A dominates \p B, meaning whether \p A
+/// comes before \p B in \p BB. This is a simplification that considers
+/// cached instruction positions and ignores other basic blocks, being
+/// only relevant to compare relative instructions positions inside \p BB.
+bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) {
+  assert(A->getParent() == B->getParent() &&
+         "Instructions must be in the same basic block!");
+
+  // First we lookup the instructions. If they don't exist, lookup will give us
+  // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA
+  // exists and NB doesn't, it means NA must come before NB because we would
+  // have numbered NB as well if it didn't. The same is true for NB. If it
+  // exists, but NA does not, NA must come after it. If neither exist, we need
+  // to number the block and cache the results (by calling comesBefore).
+  auto NAI = NumberedInsts.find(A);
+  auto NBI = NumberedInsts.find(B);
+  if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end())
+    return NAI->second < NBI->second;
+  if (NAI != NumberedInsts.end())
+    return true;
+  if (NBI != NumberedInsts.end())
+    return false;
+
+  return comesBefore(A, B);
+}
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index 8cd85348fdcc..f59d26730327 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -21,6 +21,9 @@
 #include <algorithm>
 #include <iterator>
 #include <set>
+#ifndef NDEBUG
+#include "llvm/Analysis/RegionPrinter.h"
+#endif
 
 using namespace llvm;
 
@@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_,
   calculate(F);
 }
 
+#ifndef NDEBUG
+void RegionInfo::view() { viewRegion(this); }
+
+void RegionInfo::viewOnly() { viewRegionOnly(this); }
+#endif
+
 //===----------------------------------------------------------------------===//
 // RegionInfoPass implementation
 //
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index d7f510984881..acb218d5fea0 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -20,6 +20,9 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include "llvm/IR/LegacyPassManager.h"
+#endif
 
 using namespace llvm;
 
@@ -55,25 +58,22 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
   }
 };
 
-template<>
-struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
+template <>
+struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {
 
   DOTGraphTraits (bool isSimple = false)
     : DOTGraphTraits<RegionNode*>(isSimple) {}
 
-  static std::string getGraphName(RegionInfoPass *DT) {
-    return "Region Graph";
-  }
+  static std::string getGraphName(const RegionInfo *) { return "Region Graph"; }
 
-  std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) {
-    RegionInfo &RI = G->getRegionInfo();
-    return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
-                                                     reinterpret_cast<RegionNode*>(RI.getTopLevelRegion()));
+  std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+    return DOTGraphTraits<RegionNode *>::getNodeLabel(
+        Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion()));
   }
 
   std::string getEdgeAttributes(RegionNode *srcNode,
-    GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) {
-    RegionInfo &RI = G->getRegionInfo();
+                                GraphTraits<RegionInfo *>::ChildIteratorType CI,
+                                RegionInfo *G) {
     RegionNode *destNode = *CI;
 
     if (srcNode->isSubRegion() || destNode->isSubRegion())
@@ -83,7 +83,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
     BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
     BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
 
-    Region *R = RI.getRegionFor(destBB);
+    Region *R = G->getRegionFor(destBB);
 
     while (R && R->getParent())
       if (R->getParent()->getEntry() == destBB)
@@ -91,7 +91,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
       else
         break;
 
-    if (R->getEntry() == destBB && R->contains(srcBB))
+    if (R && R->getEntry() == destBB && R->contains(srcBB))
       return "constraint=false";
 
     return "";
@@ -99,8 +99,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
 
   // Print the cluster of the subregions. This groups the single basic blocks
   // and adds a different background color for each group.
-  static void printRegionCluster(const Region &R,
-                                 GraphWriter<RegionInfoPass*> &GW,
+  static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW,
                                  unsigned depth = 0) {
     raw_ostream &O = GW.getOStream();
     O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
@@ -132,50 +131,81 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {
     O.indent(2 * depth) << "}\n";
   }
 
-  static void addCustomGraphFeatures(const RegionInfoPass* RIP,
-                                     GraphWriter<RegionInfoPass*> &GW) {
-    const RegionInfo &RI = RIP->getRegionInfo();
+  static void addCustomGraphFeatures(const RegionInfo *G,
+                                     GraphWriter<RegionInfo *> &GW) {
     raw_ostream &O = GW.getOStream();
     O << "\tcolorscheme = \"paired12\"\n";
-    printRegionCluster(*RI.getTopLevelRegion(), GW, 4);
+    printRegionCluster(*G->getTopLevelRegion(), GW, 4);
   }
 };
 } //end namespace llvm
 
 namespace {
 
-struct RegionViewer
-  : public DOTGraphTraitsViewer<RegionInfoPass, false> {
+struct RegionInfoPassGraphTraits {
+  static RegionInfo *getGraph(RegionInfoPass *RIP) {
+    return &RIP->getRegionInfo();
+  }
+};
+
+struct RegionPrinter
+    : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+                                   RegionInfoPassGraphTraits> {
   static char ID;
-  RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){
+  RegionPrinter()
+      : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *,
+                              RegionInfoPassGraphTraits>("reg", ID) {
+    initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionPrinter::ID = 0;
+
+struct RegionOnlyPrinter
+    : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+                                   RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionOnlyPrinter()
+      : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *,
+                              RegionInfoPassGraphTraits>("reg", ID) {
+    initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionOnlyPrinter::ID = 0;
+
+struct RegionViewer
+    : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+                                  RegionInfoPassGraphTraits> {
+  static char ID;
+  RegionViewer()
+      : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *,
+                             RegionInfoPassGraphTraits>("reg", ID) {
     initializeRegionViewerPass(*PassRegistry::getPassRegistry());
   }
 };
 char RegionViewer::ID = 0;
 
 struct RegionOnlyViewer
-  : public DOTGraphTraitsViewer<RegionInfoPass, true> {
+    : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+                                  RegionInfoPassGraphTraits> {
   static char ID;
-  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) {
+  RegionOnlyViewer()
+      : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *,
+                             RegionInfoPassGraphTraits>("regonly", ID) {
     initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
   }
 };
 char RegionOnlyViewer::ID = 0;
 
-struct RegionPrinter
-  : public DOTGraphTraitsPrinter<RegionInfoPass, false> {
-  static char ID;
-  RegionPrinter() :
-    DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) {
-      initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
-    }
-};
-char RegionPrinter::ID = 0;
 } //end anonymous namespace
 
 INITIALIZE_PASS(RegionPrinter, "dot-regions",
                 "Print regions of function to 'dot' file", true, true)
 
+INITIALIZE_PASS(
+    RegionOnlyPrinter, "dot-regions-only",
+    "Print regions of function to 'dot' file (with no function bodies)", true,
+    true)
+
 INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
                 true, true)
 
@@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
                 "View regions of function (with no function bodies)",
                 true, true)
 
-namespace {
-
-struct RegionOnlyPrinter
-  : public DOTGraphTraitsPrinter<RegionInfoPass, true> {
-  static char ID;
-  RegionOnlyPrinter() :
-    DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) {
-      initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
-    }
-};
+FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); }
 
+FunctionPass *llvm::createRegionOnlyPrinterPass() {
+  return new RegionOnlyPrinter();
 }
 
-char RegionOnlyPrinter::ID = 0;
-INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
-                "Print regions of function to 'dot' file "
-                "(with no function bodies)",
-                true, true)
-
 FunctionPass* llvm::createRegionViewerPass() {
   return new RegionViewer();
 }
@@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() {
   return new RegionOnlyViewer();
 }
 
-FunctionPass* llvm::createRegionPrinterPass() {
-  return new RegionPrinter();
+#ifndef NDEBUG
+static void viewRegionInfo(RegionInfo *RI, bool ShortNames) {
+  assert(RI && "Argument must be non-null");
+
+  llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent();
+  std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI);
+
+  llvm::ViewGraph(RI, "reg", ShortNames,
+                  Twine(GraphName) + " for '" + F->getName() + "' function");
 }
 
-FunctionPass* llvm::createRegionOnlyPrinterPass() {
-  return new RegionOnlyPrinter();
+static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) {
+  assert(F && "Argument must be non-null");
+  assert(!F->isDeclaration() && "Function must have an implementation");
+
+  // The viewer and analysis passes do not modify anything, so we can safely
+  // remove the const qualifier
+  auto NonConstF = const_cast<Function *>(F);
+
+  llvm::legacy::FunctionPassManager FPM(NonConstF->getParent());
+  FPM.add(ViewerPass);
+  FPM.doInitialization();
+  FPM.run(*NonConstF);
+  FPM.doFinalization();
 }
 
+void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); }
+
+void llvm::viewRegion(const Function *F) {
+  invokeFunctionPass(F, createRegionViewerPass());
+}
+
+void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); }
+
+void llvm::viewRegionOnly(const Function *F) {
+  invokeFunctionPass(F, createRegionOnlyViewerPass());
+}
+#endif
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 9c7c1754e387..34074efd1ceb 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -83,11 +83,13 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SaveAndRestore.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -114,16 +116,6 @@ static cl::opt<bool>
 VerifySCEV("verify-scev",
            cl::desc("Verify ScalarEvolution's backedge taken counts (slow)"));
 
-INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
-                "Scalar Evolution Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
-                "Scalar Evolution Analysis", false, true)
-char ScalarEvolution::ID = 0;
-
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0;
 // Implementation of the SCEV class.
 //
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void SCEV::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
-#endif
 
 void SCEV::print(raw_ostream &OS) const {
   switch (static_cast<SCEVTypes>(getSCEVType())) {
@@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const {
   if (!SC) return false;
 
   // Return true if the value is negative, this matches things like (-42 * V).
-  return SC->getValue()->getValue().isNegative();
+  return SC->getAPInt().isNegative();
 }
 
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
@@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
-  /// than the complexity of the RHS.  This comparator is used to canonicalize
-  /// expressions.
-  class SCEVComplexityCompare {
-    const LoopInfo *const LI;
-  public:
-    explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+/// than the complexity of the RHS.  This comparator is used to canonicalize
+/// expressions.
+class SCEVComplexityCompare {
+  const LoopInfo *const LI;
+public:
+  explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
 
-    // Return true or false if LHS is less than, or at least RHS, respectively.
-    bool operator()(const SCEV *LHS, const SCEV *RHS) const {
-      return compare(LHS, RHS) < 0;
-    }
+  // Return true or false if LHS is less than, or at least RHS, respectively.
+  bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+    return compare(LHS, RHS) < 0;
+  }
 
-    // Return negative, zero, or positive, if LHS is less than, equal to, or
-    // greater than RHS, respectively. A three-way result allows recursive
-    // comparisons to be more efficient.
-    int compare(const SCEV *LHS, const SCEV *RHS) const {
-      // Fast-path: SCEVs are uniqued so we can do a quick equality check.
-      if (LHS == RHS)
-        return 0;
+  // Return negative, zero, or positive, if LHS is less than, equal to, or
+  // greater than RHS, respectively. A three-way result allows recursive
+  // comparisons to be more efficient.
+  int compare(const SCEV *LHS, const SCEV *RHS) const {
+    // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+    if (LHS == RHS)
+      return 0;
 
-      // Primarily, sort the SCEVs by their getSCEVType().
-      unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
-      if (LType != RType)
-        return (int)LType - (int)RType;
+    // Primarily, sort the SCEVs by their getSCEVType().
+    unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+    if (LType != RType)
+      return (int)LType - (int)RType;
 
-      // Aside from the getSCEVType() ordering, the particular ordering
-      // isn't very important except that it's beneficial to be consistent,
-      // so that (a + b) and (b + a) don't end up as different expressions.
-      switch (static_cast<SCEVTypes>(LType)) {
-      case scUnknown: {
-        const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
-        const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+    // Aside from the getSCEVType() ordering, the particular ordering
+    // isn't very important except that it's beneficial to be consistent,
+    // so that (a + b) and (b + a) don't end up as different expressions.
+    switch (static_cast<SCEVTypes>(LType)) {
+    case scUnknown: {
+      const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+      const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
 
-        // Sort SCEVUnknown values with some loose heuristics. TODO: This is
-        // not as complete as it could be.
-        const Value *LV = LU->getValue(), *RV = RU->getValue();
+      // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+      // not as complete as it could be.
+      const Value *LV = LU->getValue(), *RV = RU->getValue();
 
-        // Order pointer values after integer values. This helps SCEVExpander
-        // form GEPs.
-        bool LIsPointer = LV->getType()->isPointerTy(),
-             RIsPointer = RV->getType()->isPointerTy();
-        if (LIsPointer != RIsPointer)
-          return (int)LIsPointer - (int)RIsPointer;
+      // Order pointer values after integer values. This helps SCEVExpander
+      // form GEPs.
+      bool LIsPointer = LV->getType()->isPointerTy(),
+        RIsPointer = RV->getType()->isPointerTy();
+      if (LIsPointer != RIsPointer)
+        return (int)LIsPointer - (int)RIsPointer;
 
-        // Compare getValueID values.
-        unsigned LID = LV->getValueID(),
-                 RID = RV->getValueID();
-        if (LID != RID)
-          return (int)LID - (int)RID;
+      // Compare getValueID values.
+      unsigned LID = LV->getValueID(),
+        RID = RV->getValueID();
+      if (LID != RID)
+        return (int)LID - (int)RID;
 
-        // Sort arguments by their position.
-        if (const Argument *LA = dyn_cast<Argument>(LV)) {
-          const Argument *RA = cast<Argument>(RV);
-          unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
-          return (int)LArgNo - (int)RArgNo;
-        }
-
-        // For instructions, compare their loop depth, and their operand
-        // count.  This is pretty loose.
-        if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
-          const Instruction *RInst = cast<Instruction>(RV);
-
-          // Compare loop depths.
-          const BasicBlock *LParent = LInst->getParent(),
-                           *RParent = RInst->getParent();
-          if (LParent != RParent) {
-            unsigned LDepth = LI->getLoopDepth(LParent),
-                     RDepth = LI->getLoopDepth(RParent);
-            if (LDepth != RDepth)
-              return (int)LDepth - (int)RDepth;
-          }
-
-          // Compare the number of operands.
-          unsigned LNumOps = LInst->getNumOperands(),
-                   RNumOps = RInst->getNumOperands();
-          return (int)LNumOps - (int)RNumOps;
-        }
-
-        return 0;
+      // Sort arguments by their position.
+      if (const Argument *LA = dyn_cast<Argument>(LV)) {
+        const Argument *RA = cast<Argument>(RV);
+        unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+        return (int)LArgNo - (int)RArgNo;
       }
 
-      case scConstant: {
-        const SCEVConstant *LC = cast<SCEVConstant>(LHS);
-        const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+      // For instructions, compare their loop depth, and their operand
+      // count.  This is pretty loose.
+      if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+        const Instruction *RInst = cast<Instruction>(RV);
 
-        // Compare constant values.
-        const APInt &LA = LC->getValue()->getValue();
-        const APInt &RA = RC->getValue()->getValue();
-        unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
-        if (LBitWidth != RBitWidth)
-          return (int)LBitWidth - (int)RBitWidth;
-        return LA.ult(RA) ? -1 : 1;
-      }
-
-      case scAddRecExpr: {
-        const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
-        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
-
-        // Compare addrec loop depths.
-        const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
-        if (LLoop != RLoop) {
-          unsigned LDepth = LLoop->getLoopDepth(),
-                   RDepth = RLoop->getLoopDepth();
+        // Compare loop depths.
+        const BasicBlock *LParent = LInst->getParent(),
+          *RParent = RInst->getParent();
+        if (LParent != RParent) {
+          unsigned LDepth = LI->getLoopDepth(LParent),
+            RDepth = LI->getLoopDepth(RParent);
           if (LDepth != RDepth)
             return (int)LDepth - (int)RDepth;
         }
 
-        // Addrec complexity grows with operand count.
-        unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
-        if (LNumOps != RNumOps)
-          return (int)LNumOps - (int)RNumOps;
-
-        // Lexicographically compare.
-        for (unsigned i = 0; i != LNumOps; ++i) {
-          long X = compare(LA->getOperand(i), RA->getOperand(i));
-          if (X != 0)
-            return X;
-        }
-
-        return 0;
-      }
-
-      case scAddExpr:
-      case scMulExpr:
-      case scSMaxExpr:
-      case scUMaxExpr: {
-        const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
-        const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
-        // Lexicographically compare n-ary expressions.
-        unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
-        if (LNumOps != RNumOps)
-          return (int)LNumOps - (int)RNumOps;
-
-        for (unsigned i = 0; i != LNumOps; ++i) {
-          if (i >= RNumOps)
-            return 1;
-          long X = compare(LC->getOperand(i), RC->getOperand(i));
-          if (X != 0)
-            return X;
-        }
+        // Compare the number of operands.
+        unsigned LNumOps = LInst->getNumOperands(),
+          RNumOps = RInst->getNumOperands();
         return (int)LNumOps - (int)RNumOps;
       }
 
-      case scUDivExpr: {
-        const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
-        const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+      return 0;
+    }
 
-        // Lexicographically compare udiv expressions.
-        long X = compare(LC->getLHS(), RC->getLHS());
+    case scConstant: {
+      const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+      const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+      // Compare constant values.
+      const APInt &LA = LC->getAPInt();
+      const APInt &RA = RC->getAPInt();
+      unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+      if (LBitWidth != RBitWidth)
+        return (int)LBitWidth - (int)RBitWidth;
+      return LA.ult(RA) ? -1 : 1;
+    }
+
+    case scAddRecExpr: {
+      const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+      const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+      // Compare addrec loop depths.
+      const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+      if (LLoop != RLoop) {
+        unsigned LDepth = LLoop->getLoopDepth(),
+          RDepth = RLoop->getLoopDepth();
+        if (LDepth != RDepth)
+          return (int)LDepth - (int)RDepth;
+      }
+
+      // Addrec complexity grows with operand count.
+      unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+      if (LNumOps != RNumOps)
+        return (int)LNumOps - (int)RNumOps;
+
+      // Lexicographically compare.
+      for (unsigned i = 0; i != LNumOps; ++i) {
+        long X = compare(LA->getOperand(i), RA->getOperand(i));
         if (X != 0)
           return X;
-        return compare(LC->getRHS(), RC->getRHS());
       }
 
-      case scTruncate:
-      case scZeroExtend:
-      case scSignExtend: {
-        const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
-        const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
-
-        // Compare cast expressions by operand.
-        return compare(LC->getOperand(), RC->getOperand());
-      }
-
-      case scCouldNotCompute:
-        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-      }
-      llvm_unreachable("Unknown SCEV kind!");
+      return 0;
     }
-  };
-}
+
+    case scAddExpr:
+    case scMulExpr:
+    case scSMaxExpr:
+    case scUMaxExpr: {
+      const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+      const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+      // Lexicographically compare n-ary expressions.
+      unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+      if (LNumOps != RNumOps)
+        return (int)LNumOps - (int)RNumOps;
+
+      for (unsigned i = 0; i != LNumOps; ++i) {
+        if (i >= RNumOps)
+          return 1;
+        long X = compare(LC->getOperand(i), RC->getOperand(i));
+        if (X != 0)
+          return X;
+      }
+      return (int)LNumOps - (int)RNumOps;
+    }
+
+    case scUDivExpr: {
+      const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+      const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+      // Lexicographically compare udiv expressions.
+      long X = compare(LC->getLHS(), RC->getLHS());
+      if (X != 0)
+        return X;
+      return compare(LC->getRHS(), RC->getRHS());
+    }
+
+    case scTruncate:
+    case scZeroExtend:
+    case scSignExtend: {
+      const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+      const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+      // Compare cast expressions by operand.
+      return compare(LC->getOperand(), RC->getOperand());
+    }
+
+    case scCouldNotCompute:
+      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    }
+    llvm_unreachable("Unknown SCEV kind!");
+  }
+};
+}  // end anonymous namespace
 
 /// GroupByComplexity - Given a list of SCEV objects, order them by their
 /// complexity, and group objects of the same complexity together by value.
@@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
   }
 }
 
-namespace {
-struct FindSCEVSize {
-  int Size;
-  FindSCEVSize() : Size(0) {}
-
-  bool follow(const SCEV *S) {
-    ++Size;
-    // Keep looking at all operands of S.
-    return true;
-  }
-  bool isDone() const {
-    return false;
-  }
-};
-}
-
 // Returns the size of the SCEV S.
 static inline int sizeOfSCEV(const SCEV *S) {
+  struct FindSCEVSize {
+    int Size;
+    FindSCEVSize() : Size(0) {}
+
+    bool follow(const SCEV *S) {
+      ++Size;
+      // Keep looking at all operands of S.
+      return true;
+    }
+    bool isDone() const {
+      return false;
+    }
+  };
+
   FindSCEVSize F;
   SCEVTraversal<FindSCEVSize> ST(F);
   ST.visitAll(S);
@@ -771,8 +760,8 @@ public:
 
   void visitConstant(const SCEVConstant *Numerator) {
     if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
-      APInt NumeratorVal = Numerator->getValue()->getValue();
-      APInt DenominatorVal = D->getValue()->getValue();
+      APInt NumeratorVal = Numerator->getAPInt();
+      APInt DenominatorVal = D->getAPInt();
       uint32_t NumeratorBW = NumeratorVal.getBitWidth();
       uint32_t DenominatorBW = DenominatorVal.getBitWidth();
 
@@ -792,17 +781,15 @@ public:
 
   void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
     const SCEV *StartQ, *StartR, *StepQ, *StepR;
-    assert(Numerator->isAffine() && "Numerator should be affine");
+    if (!Numerator->isAffine())
+      return cannotDivide(Numerator);
     divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
     divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
     // Bail out if the types do not match.
     Type *Ty = Denominator->getType();
     if (Ty != StartQ->getType() || Ty != StartR->getType() ||
-        Ty != StepQ->getType() || Ty != StepR->getType()) {
-      Quotient = Zero;
-      Remainder = Numerator;
-      return;
-    }
+        Ty != StepQ->getType() || Ty != StepR->getType())
+      return cannotDivide(Numerator);
     Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
                                 Numerator->getNoWrapFlags());
     Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
@@ -818,11 +805,8 @@ public:
       divide(SE, Op, Denominator, &Q, &R);
 
       // Bail out if types do not match.
-      if (Ty != Q->getType() || Ty != R->getType()) {
-        Quotient = Zero;
-        Remainder = Numerator;
-        return;
-      }
+      if (Ty != Q->getType() || Ty != R->getType())
+        return cannotDivide(Numerator);
 
       Qs.push_back(Q);
       Rs.push_back(R);
@@ -845,11 +829,8 @@ public:
     bool FoundDenominatorTerm = false;
     for (const SCEV *Op : Numerator->operands()) {
       // Bail out if types do not match.
-      if (Ty != Op->getType()) {
-        Quotient = Zero;
-        Remainder = Numerator;
-        return;
-      }
+      if (Ty != Op->getType())
+        return cannotDivide(Numerator);
 
       if (FoundDenominatorTerm) {
         Qs.push_back(Op);
@@ -865,11 +846,8 @@ public:
       }
 
       // Bail out if types do not match.
-      if (Ty != Q->getType()) {
-        Quotient = Zero;
-        Remainder = Numerator;
-        return;
-      }
+      if (Ty != Q->getType())
+        return cannotDivide(Numerator);
 
       FoundDenominatorTerm = true;
       Qs.push_back(Q);
@@ -884,11 +862,8 @@ public:
       return;
     }
 
-    if (!isa<SCEVUnknown>(Denominator)) {
-      Quotient = Zero;
-      Remainder = Numerator;
-      return;
-    }
+    if (!isa<SCEVUnknown>(Denominator))
+      return cannotDivide(Numerator);
 
     // The Remainder is obtained by replacing Denominator by 0 in Numerator.
     ValueToValueMap RewriteMap;
@@ -908,15 +883,12 @@ public:
     // Quotient is (Numerator - Remainder) divided by Denominator.
     const SCEV *Q, *R;
     const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
-    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
-      // This SCEV does not seem to simplify: fail the division here.
-      Quotient = Zero;
-      Remainder = Numerator;
-      return;
-    }
+    // This SCEV does not seem to simplify: fail the division here.
+    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator))
+      return cannotDivide(Numerator);
     divide(SE, Diff, Denominator, &Q, &R);
-    assert(R == Zero &&
-           "(Numerator - Remainder) should evenly divide Denominator");
+    if (R != Zero)
+      return cannotDivide(Numerator);
     Quotient = Q;
   }
 
@@ -924,11 +896,18 @@ private:
   SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,
                const SCEV *Denominator)
       : SE(S), Denominator(Denominator) {
-    Zero = SE.getConstant(Denominator->getType(), 0);
-    One = SE.getConstant(Denominator->getType(), 1);
+    Zero = SE.getZero(Denominator->getType());
+    One = SE.getOne(Denominator->getType());
 
-    // By default, we don't know how to divide Expr by Denominator.
-    // Providing the default here simplifies the rest of the code.
+    // We generally do not know how to divide Expr by Denominator. We
+    // initialize the division to a "cannot divide" state to simplify the rest
+    // of the code.
+    cannotDivide(Numerator);
+  }
+
+  // Convenience function for giving up on the division. We set the quotient to
+  // be equal to zero and the remainder to be equal to the numerator.
+  void cannotDivide(const SCEV *Numerator) {
     Quotient = Zero;
     Remainder = Numerator;
   }
@@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
   // If the input value is a chrec scev, truncate the chrec's operands.
   if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
     SmallVector<const SCEV *, 4> Operands;
-    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
-      Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+    for (const SCEV *Op : AddRec->operands())
+      Operands.push_back(getTruncateExpr(Op, Ty));
     return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);
   }
 
@@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
   // `Step`:
 
   // 1. NSW/NUW flags on the step increment.
-  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags());
+  auto PreStartFlags =
+    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW);
+  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);
   const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(
       SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap));
 
@@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,
       ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);
 
   if (OverflowLimit &&
-      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
+      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))
     return PreStart;
-  }
+
   return nullptr;
 }
 
@@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,
   if (!StartC)
     return false;
 
-  APInt StartAI = StartC->getValue()->getValue();
+  APInt StartAI = StartC->getAPInt();
 
   for (unsigned Delta : {-2, -1, 1, 2}) {
     const SCEV *PreStart = getConstant(StartAI - Delta);
 
+    FoldingSetNodeID ID;
+    ID.AddInteger(scAddRecExpr);
+    ID.AddPointer(PreStart);
+    ID.AddPointer(Step);
+    ID.AddPointer(L);
+    void *IP = nullptr;
+    const auto *PreAR =
+      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+
     // Give up if we don't already have the add recurrence we need because
     // actually constructing an add recurrence is relatively expensive.
-    const SCEVAddRecExpr *PreAR = [&]() {
-      FoldingSetNodeID ID;
-      ID.AddInteger(scAddRecExpr);
-      ID.AddPointer(PreStart);
-      ID.AddPointer(Step);
-      ID.AddPointer(L);
-      void *IP = nullptr;
-      return static_cast<SCEVAddRecExpr *>(
-          this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-    }();
-
     if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)
       const SCEV *DeltaS = getConstant(StartC->getType(), Delta);
       ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
@@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       }
     }
 
+  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw>
+    if (SA->getNoWrapFlags(SCEV::FlagNUW)) {
+      // If the addition does not unsign overflow then we can, by definition,
+      // commute the zero extension with the addition operation.
+      SmallVector<const SCEV *, 4> Ops;
+      for (const auto *Op : SA->operands())
+        Ops.push_back(getZeroExtendExpr(Op, Ty));
+      return getAddExpr(Ops, SCEV::FlagNUW);
+    }
+  }
+
   // The cast wasn't folded; create an explicit cast node.
   // Recompute the insert position, as it may have been invalidated.
   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
@@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
   }
 
   // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
-  if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {
     if (SA->getNumOperands() == 2) {
-      auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
-      auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+      auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+      auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
       if (SMul && SC1) {
-        if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
-          const APInt &C1 = SC1->getValue()->getValue();
-          const APInt &C2 = SC2->getValue()->getValue();
+        if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+          const APInt &C1 = SC1->getAPInt();
+          const APInt &C2 = SC2->getAPInt();
           if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
               C2.ugt(C1) && C2.isPowerOf2())
             return getAddExpr(getSignExtendExpr(SC1, Ty),
@@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
         }
       }
     }
+
+    // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw>
+    if (SA->getNoWrapFlags(SCEV::FlagNSW)) {
+      // If the addition does not sign overflow then we can, by definition,
+      // commute the sign extension with the addition operation.
+      SmallVector<const SCEV *, 4> Ops;
+      for (const auto *Op : SA->operands())
+        Ops.push_back(getSignExtendExpr(Op, Ty));
+      return getAddExpr(Ops, SCEV::FlagNSW);
+    }
   }
   // If the input value is a chrec scev, and we can prove that the value
   // did not overflow the old, smaller, value, we can sign extend all of the
@@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       // If Start and Step are constants, check if we can apply this
       // transformation:
       // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
-      auto SC1 = dyn_cast<SCEVConstant>(Start);
-      auto SC2 = dyn_cast<SCEVConstant>(Step);
+      auto *SC1 = dyn_cast<SCEVConstant>(Start);
+      auto *SC2 = dyn_cast<SCEVConstant>(Step);
       if (SC1 && SC2) {
-        const APInt &C1 = SC1->getValue()->getValue();
-        const APInt &C2 = SC2->getValue()->getValue();
+        const APInt &C1 = SC1->getAPInt();
+        const APInt &C2 = SC2->getAPInt();
         if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
             C2.isPowerOf2()) {
           Start = getSignExtendExpr(Start, Ty);
-          const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
-                                            L, AR->getNoWrapFlags());
+          const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L,
+                                            AR->getNoWrapFlags());
           return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
         }
       }
@@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
 
   // Sign-extend negative constants.
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
-    if (SC->getValue()->getValue().isNegative())
+    if (SC->getAPInt().isNegative())
       return getSignExtendExpr(Op, Ty);
 
   // Peel off a truncate cast.
@@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
     // Pull a buried constant out to the outside.
     if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
       Interesting = true;
-    AccumulatedConstant += Scale * C->getValue()->getValue();
+    AccumulatedConstant += Scale * C->getAPInt();
   }
 
   // Next comes everything else. We're especially interested in multiplies
@@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
     const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
     if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
       APInt NewScale =
-        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+          Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();
       if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
         // A multiplication of a constant with another add; recurse.
         const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
@@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
         // the map.
         SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
         const SCEV *Key = SE.getMulExpr(MulOps);
-        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
-          M.insert(std::make_pair(Key, NewScale));
+        auto Pair = M.insert(std::make_pair(Key, NewScale));
         if (Pair.second) {
           NewOps.push_back(Pair.first->first);
         } else {
@@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
   return Interesting;
 }
 
-namespace {
-  struct APIntCompare {
-    bool operator()(const APInt &LHS, const APInt &RHS) const {
-      return LHS.ult(RHS);
-    }
-  };
-}
-
 // We're trying to construct a SCEV of type `Type' with `Ops' as operands and
 // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of
 // can't-overflow flags for the operation if possible.
 static SCEV::NoWrapFlags
 StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
                       const SmallVectorImpl<const SCEV *> &Ops,
-                      SCEV::NoWrapFlags OldFlags) {
+                      SCEV::NoWrapFlags Flags) {
   using namespace std::placeholders;
+  typedef OverflowingBinaryOperator OBO;
 
   bool CanAnalyze =
       Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr;
@@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
 
   int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;
   SCEV::NoWrapFlags SignOrUnsignWrap =
-      ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask);
+      ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
 
   // If FlagNSW is true and all the operands are non-negative, infer FlagNUW.
-  auto IsKnownNonNegative =
-    std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1);
+  auto IsKnownNonNegative = [&](const SCEV *S) {
+    return SE->isKnownNonNegative(S);
+  };
 
-  if (SignOrUnsignWrap == SCEV::FlagNSW &&
-      std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative))
-    return ScalarEvolution::setFlags(OldFlags,
-                                     (SCEV::NoWrapFlags)SignOrUnsignMask);
+  if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative))
+    Flags =
+        ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask);
 
-  return OldFlags;
+  SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);
+
+  if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr &&
+      Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) {
+
+    // (A + C) --> (A + C)<nsw> if the addition does not sign overflow
+    // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow
+
+    const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt();
+    if (!(SignOrUnsignWrap & SCEV::FlagNSW)) {
+      auto NSWRegion =
+        ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
+      if (NSWRegion.contains(SE->getSignedRange(Ops[1])))
+        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+    }
+    if (!(SignOrUnsignWrap & SCEV::FlagNUW)) {
+      auto NUWRegion =
+        ConstantRange::makeNoWrapRegion(Instruction::Add, C,
+                                        OBO::NoUnsignedWrap);
+      if (NUWRegion.contains(SE->getUnsignedRange(Ops[1])))
+        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+    }
+  }
+
+  return Flags;
 }
 
 /// getAddExpr - Get a canonical add expression, or something simpler if
@@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVAddExpr operand types don't match!");
 #endif
 
-  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
-
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
+
+  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      Ops[0] = getConstant(LHSC->getValue()->getValue() +
-                           RHSC->getValue()->getValue());
+      Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());
       if (Ops.size() == 2) return Ops[0];
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       LHSC = cast<SCEVConstant>(Ops[0]);
@@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
               break;
             }
             LargeMulOps.push_back(T->getOperand());
-          } else if (const SCEVConstant *C =
-                       dyn_cast<SCEVConstant>(M->getOperand(j))) {
+          } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
             LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
           } else {
             Ok = false;
@@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
     if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
                                      Ops.data(), Ops.size(),
                                      APInt(BitWidth, 1), *this)) {
+      struct APIntCompare {
+        bool operator()(const APInt &LHS, const APInt &RHS) const {
+          return LHS.ult(RHS);
+        }
+      };
+
       // Some interesting folding opportunity is present, so its worthwhile to
       // re-generate the operands list. Group the operands by constant scale,
       // to avoid multiplying by the same constant scale multiple times.
       std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
-      for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(),
-           E = NewOps.end(); I != E; ++I)
-        MulOpLists[M.find(*I)->second].push_back(*I);
+      for (const SCEV *NewOp : NewOps)
+        MulOpLists[M.find(NewOp)->second].push_back(NewOp);
       // Re-generate the operands list.
       Ops.clear();
       if (AccumulatedConstant != 0)
         Ops.push_back(getConstant(AccumulatedConstant));
-      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
-           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
-        if (I->first != 0)
-          Ops.push_back(getMulExpr(getConstant(I->first),
-                                   getAddExpr(I->second)));
+      for (auto &MulOp : MulOpLists)
+        if (MulOp.first != 0)
+          Ops.push_back(getMulExpr(getConstant(MulOp.first),
+                                   getAddExpr(MulOp.second)));
       if (Ops.empty())
-        return getConstant(Ty, 0);
+        return getZero(Ty);
       if (Ops.size() == 1)
         return Ops[0];
       return getAddExpr(Ops);
@@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
             MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
             InnerMul = getMulExpr(MulOps);
           }
-          const SCEV *One = getConstant(Ty, 1);
+          const SCEV *One = getOne(Ty);
           const SCEV *AddOne = getAddExpr(One, InnerMul);
           const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
           if (Ops.size() == 2) return OuterMul;
@@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                                AddRec->op_end());
         for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
              ++OtherIdx)
-          if (const SCEVAddRecExpr *OtherAddRec =
-                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
             if (OtherAddRec->getLoop() == AddRecLoop) {
               for (unsigned i = 0, e = OtherAddRec->getNumOperands();
                    i != e; ++i) {
@@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
            "SCEVMulExpr operand types don't match!");
 #endif
 
-  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
-
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
+
+  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(getContext(),
-                                           LHSC->getValue()->getValue() *
-                                           RHSC->getValue()->getValue());
+      ConstantInt *Fold =
+          ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
@@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
         if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
           SmallVector<const SCEV *, 4> NewOps;
           bool AnyFolded = false;
-          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(),
-                 E = Add->op_end(); I != E; ++I) {
-            const SCEV *Mul = getMulExpr(Ops[0], *I);
+          for (const SCEV *AddOp : Add->operands()) {
+            const SCEV *Mul = getMulExpr(Ops[0], AddOp);
             if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
             NewOps.push_back(Mul);
           }
           if (AnyFolded)
             return getAddExpr(NewOps);
-        }
-        else if (const SCEVAddRecExpr *
-                 AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
+        } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {
           // Negation preserves a recurrence's no self-wrap property.
           SmallVector<const SCEV *, 4> Operands;
-          for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(),
-                 E = AddRec->op_end(); I != E; ++I) {
-            Operands.push_back(getMulExpr(Ops[0], *I));
-          }
+          for (const SCEV *AddRecOp : AddRec->operands())
+            Operands.push_back(getMulExpr(Ops[0], AddRecOp));
+
           return getAddRecExpr(Operands, AddRec->getLoop(),
                                AddRec->getNoWrapFlags(SCEV::FlagNW));
         }
@@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       SmallVector<const SCEV*, 7> AddRecOps;
       for (int x = 0, xe = AddRec->getNumOperands() +
              OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) {
-        const SCEV *Term = getConstant(Ty, 0);
+        const SCEV *Term = getZero(Ty);
         for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {
           uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);
           for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1),
@@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
       // its operands.
       // TODO: Generalize this to non-constants by using known-bits information.
       Type *Ty = LHS->getType();
-      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+      unsigned LZ = RHSC->getAPInt().countLeadingZeros();
       unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
       // For non-power-of-two values, effectively round the value up to the
       // nearest power of two.
-      if (!RHSC->getValue()->getValue().isPowerOf2())
+      if (!RHSC->getAPInt().isPowerOf2())
         ++MaxShiftAmt;
       IntegerType *ExtTy =
         IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
@@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
         if (const SCEVConstant *Step =
             dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {
           // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
-          const APInt &StepInt = Step->getValue()->getValue();
-          const APInt &DivInt = RHSC->getValue()->getValue();
+          const APInt &StepInt = Step->getAPInt();
+          const APInt &DivInt = RHSC->getAPInt();
           if (!StepInt.urem(DivInt) &&
               getZeroExtendExpr(AR, ExtTy) ==
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
                             AR->getLoop(), SCEV::FlagAnyWrap)) {
             SmallVector<const SCEV *, 4> Operands;
-            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
-              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
-            return getAddRecExpr(Operands, AR->getLoop(),
-                                 SCEV::FlagNW);
+            for (const SCEV *Op : AR->operands())
+              Operands.push_back(getUDivExpr(Op, RHS));
+            return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);
           }
           /// Get a canonical UDivExpr for a recurrence.
           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
@@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
               getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
                             getZeroExtendExpr(Step, ExtTy),
                             AR->getLoop(), SCEV::FlagAnyWrap)) {
-            const APInt &StartInt = StartC->getValue()->getValue();
+            const APInt &StartInt = StartC->getAPInt();
             const APInt &StartRem = StartInt.urem(StepInt);
             if (StartRem != 0)
               LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step,
@@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
       // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
       if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
-        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
-          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+        for (const SCEV *Op : M->operands())
+          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
         if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
           // Find an operand that's safely divisible.
           for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
@@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
       // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
       if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {
         SmallVector<const SCEV *, 4> Operands;
-        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
-          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+        for (const SCEV *Op : A->operands())
+          Operands.push_back(getZeroExtendExpr(Op, ExtTy));
         if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
           Operands.clear();
           for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
@@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
 }
 
 static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
-  APInt A = C1->getValue()->getValue().abs();
-  APInt B = C2->getValue()->getValue().abs();
+  APInt A = C1->getAPInt().abs();
+  APInt B = C2->getAPInt().abs();
   uint32_t ABW = A.getBitWidth();
   uint32_t BBW = B.getBitWidth();
 
@@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
   if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {
     // If the mulexpr multiplies by a constant, then that constant must be the
     // first element of the mulexpr.
-    if (const SCEVConstant *LHSCst =
-            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+    if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
       if (LHSCst == RHSCst) {
         SmallVector<const SCEV *, 2> Operands;
         Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,
       // check.
       APInt Factor = gcd(LHSCst, RHSCst);
       if (!Factor.isIntN(1)) {
-        LHSCst = cast<SCEVConstant>(
-            getConstant(LHSCst->getValue()->getValue().udiv(Factor)));
-        RHSCst = cast<SCEVConstant>(
-            getConstant(RHSCst->getValue()->getValue().udiv(Factor)));
+        LHSCst =
+            cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor)));
+        RHSCst =
+            cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));
         SmallVector<const SCEV *, 2> Operands;
         Operands.push_back(LHSCst);
         Operands.append(Mul->op_begin() + 1, Mul->op_end());
@@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
   // Canonicalize nested AddRecs in by nesting them in order of loop depth.
   if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
     const Loop *NestedLoop = NestedAR->getLoop();
-    if (L->contains(NestedLoop) ?
-        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
-        (!NestedLoop->contains(L) &&
-         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+    if (L->contains(NestedLoop)
+            ? (L->getLoopDepth() < NestedLoop->getLoopDepth())
+            : (!NestedLoop->contains(L) &&
+               DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {
       SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
                                                   NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
       // AddRecs require their operands be loop-invariant with respect to their
       // loops. Don't perform this transformation if it would break this
       // requirement.
-      bool AllInvariant = true;
-      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
-        if (!isLoopInvariant(Operands[i], L)) {
-          AllInvariant = false;
-          break;
-        }
+      bool AllInvariant = all_of(
+          Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); });
+
       if (AllInvariant) {
         // Create a recurrence for the outer loop with the same step size.
         //
@@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
           maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());
 
         NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags);
-        AllInvariant = true;
-        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
-          if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
-            AllInvariant = false;
-            break;
-          }
+        AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) {
+          return isLoopInvariant(Op, NestedLoop);
+        });
+
         if (AllInvariant) {
           // Ok, both add recurrences are valid after the transformation.
           //
@@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
   // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP
   // instruction to its SCEV, because the Instruction may be guarded by control
   // flow and the no-overflow bits may not be valid for the expression in any
-  // context.
+  // context. This can be fixed similarly to how these flags are handled for
+  // adds.
   SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
 
-  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+  const SCEV *TotalOffset = getZero(IntPtrTy);
   // The address space is unimportant. The first thing we do on CurTy is getting
   // its element type.
   Type *CurTy = PointerType::getUnqual(PointeeType);
@@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(getContext(),
-                              APIntOps::smax(LHSC->getValue()->getValue(),
-                                             RHSC->getValue()->getValue()));
+      ConstantInt *Fold = ConstantInt::get(
+          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
@@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
 #endif
 
   // Sort by complexity, this groups all similar expression types together.
-  GroupByComplexity(Ops, LI);
+  GroupByComplexity(Ops, &LI);
 
   // If there are any constants, fold them together.
   unsigned Idx = 0;
@@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
     assert(Idx < Ops.size());
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(getContext(),
-                              APIntOps::umax(LHSC->getValue()->getValue(),
-                                             RHSC->getValue()->getValue()));
+      ConstantInt *Fold = ConstantInt::get(
+          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
       if (Ops.size() == 1) return Ops[0];
@@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
   // We can bypass creating a target-independent
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
-  return getConstant(IntTy,
-                     F->getParent()->getDataLayout().getTypeAllocSize(AllocTy));
+  return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
 }
 
 const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
@@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
   // constant expression and then folding it back into a ConstantInt.
   // This is just a compile-time optimization.
   return getConstant(
-      IntTy,
-      F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset(
-          FieldNo));
+      IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
 }
 
 const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {
 /// for which isSCEVable must return true.
 uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
-  return F->getParent()->getDataLayout().getTypeSizeInBits(Ty);
+  return getDataLayout().getTypeSizeInBits(Ty);
 }
 
 /// getEffectiveSCEVType - Return a type with the same bitwidth as
@@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {
 Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {
   assert(isSCEVable(Ty) && "Type is not SCEVable!");
 
-  if (Ty->isIntegerTy()) {
+  if (Ty->isIntegerTy())
     return Ty;
-  }
 
   // The only other support type is pointer.
   assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
-  return F->getParent()->getDataLayout().getIntPtrType(Ty);
+  return getDataLayout().getIntPtrType(Ty);
 }
 
 const SCEV *ScalarEvolution::getCouldNotCompute() {
-  return &CouldNotCompute;
+  return CouldNotCompute.get();
 }
 
-namespace {
+
+bool ScalarEvolution::checkValidity(const SCEV *S) const {
   // Helper class working with SCEVTraversal to figure out if a SCEV contains
   // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
   // is set iff if find such SCEVUnknown.
@@ -3300,9 +3302,7 @@ namespace {
     }
     bool isDone() const { return FindOne; }
   };
-}
 
-bool ScalarEvolution::checkValidity(const SCEV *S) const {
   FindInvalidSCEVUnknown F;
   SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
   ST.visitAll(S);
@@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const {
 const SCEV *ScalarEvolution::getSCEV(Value *V) {
   assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
 
+  const SCEV *S = getExistingSCEV(V);
+  if (S == nullptr) {
+    S = createSCEV(V);
+    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+  }
+  return S;
+}
+
+const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
+  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
     if (checkValidity(S))
       return S;
-    else
-      ValueExprMap.erase(I);
+    ValueExprMap.erase(I);
   }
-  const SCEV *S = createSCEV(V);
-
-  // The process of creating a SCEV for V may have caused other SCEVs
-  // to have been created, so it's necessary to insert the new entry
-  // from scratch, rather than trying to remember the insert position
-  // above.
-  ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
-  return S;
+  return nullptr;
 }
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
-const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
+                                             SCEV::NoWrapFlags Flags) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
     return getConstant(
                cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
-  return getMulExpr(V,
-                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+  return getMulExpr(
+      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
 }
 
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
@@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
 /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
 const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
                                           SCEV::NoWrapFlags Flags) {
-  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW");
-
   // Fast path: X - X --> 0.
   if (LHS == RHS)
-    return getConstant(LHS->getType(), 0);
+    return getZero(LHS->getType());
 
-  // X - Y --> X + -Y.
-  // X -(nsw || nuw) Y --> X + -Y.
-  return getAddExpr(LHS, getNegativeSCEV(RHS));
+  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation
+  // makes it so that we cannot make much use of NUW.
+  auto AddFlags = SCEV::FlagAnyWrap;
+  const bool RHSIsNotMinSigned =
+      !getSignedRange(RHS).getSignedMin().isMinSignedValue();
+  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) {
+    // Let M be the minimum representable signed value. Then (-1)*RHS
+    // signed-wraps if and only if RHS is M. That can happen even for
+    // a NSW subtraction because e.g. (-1)*M signed-wraps even though
+    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS +
+    // (-1)*RHS, we need to prove that RHS != M.
+    //
+    // If LHS is non-negative and we know that LHS - RHS does not
+    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap
+    // either by proving that RHS > M or that LHS >= 0.
+    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) {
+      AddFlags = SCEV::FlagNSW;
+    }
+  }
+
+  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS -
+  // RHS is NSW and LHS >= 0.
+  //
+  // The difficulty here is that the NSW flag may have been proven
+  // relative to a loop that is to be found in a recurrence in LHS and
+  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a
+  // larger scope than intended.
+  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);
 }
 
 /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
@@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
 
   if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {
     return getPointerBase(Cast->getOperand());
-  }
-  else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
+  } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
     const SCEV *PtrOp = nullptr;
-    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-         I != E; ++I) {
-      if ((*I)->getType()->isPointerTy()) {
+    for (const SCEV *NAryOp : NAry->operands()) {
+      if (NAryOp->getType()->isPointerTy()) {
         // Cannot find the base of an expression with multiple pointer operands.
         if (PtrOp)
           return V;
-        PtrOp = *I;
+        PtrOp = NAryOp;
       }
     }
     if (!PtrOp)
@@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
     if (!Visited.insert(I).second)
       continue;
 
-    ValueExprMapType::iterator It =
-      ValueExprMap.find_as(static_cast<Value *>(I));
+    auto It = ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
       const SCEV *Old = It->second;
 
@@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
   }
 }
 
-/// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
-/// a loop header, making it a potential recurrence, or it doesn't.
-///
-const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
-  if (const Loop *L = LI->getLoopFor(PN->getParent()))
-    if (L->getHeader() == PN->getParent()) {
-      // The loop may have multiple entrances or multiple exits; we can analyze
-      // this phi as an addrec if it has a unique entry value and a unique
-      // backedge value.
-      Value *BEValueV = nullptr, *StartValueV = nullptr;
-      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
-        Value *V = PN->getIncomingValue(i);
-        if (L->contains(PN->getIncomingBlock(i))) {
-          if (!BEValueV) {
-            BEValueV = V;
-          } else if (BEValueV != V) {
-            BEValueV = nullptr;
+namespace {
+class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+                             ScalarEvolution &SE) {
+    SCEVInitRewriter Rewriter(L, SE);
+    const SCEV *Result = Rewriter.visit(Scev);
+    return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+  }
+
+  SCEVInitRewriter(const Loop *L, ScalarEvolution &SE)
+      : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+      Valid = false;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    // Only allow AddRecExprs for this loop.
+    if (Expr->getLoop() == L)
+      return Expr->getStart();
+    Valid = false;
+    return Expr;
+  }
+
+  bool isValid() { return Valid; }
+
+private:
+  const Loop *L;
+  bool Valid;
+};
+
+class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, const Loop *L,
+                             ScalarEvolution &SE) {
+    SCEVShiftRewriter Rewriter(L, SE);
+    const SCEV *Result = Rewriter.visit(Scev);
+    return Rewriter.isValid() ? Result : SE.getCouldNotCompute();
+  }
+
+  SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE)
+      : SCEVRewriteVisitor(SE), L(L), Valid(true) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    // Only allow AddRecExprs for this loop.
+    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant))
+      Valid = false;
+    return Expr;
+  }
+
+  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+    if (Expr->getLoop() == L && Expr->isAffine())
+      return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE));
+    Valid = false;
+    return Expr;
+  }
+  bool isValid() { return Valid; }
+
+private:
+  const Loop *L;
+  bool Valid;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
+  const Loop *L = LI.getLoopFor(PN->getParent());
+  if (!L || L->getHeader() != PN->getParent())
+    return nullptr;
+
+  // The loop may have multiple entrances or multiple exits; we can analyze
+  // this phi as an addrec if it has a unique entry value and a unique
+  // backedge value.
+  Value *BEValueV = nullptr, *StartValueV = nullptr;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (L->contains(PN->getIncomingBlock(i))) {
+      if (!BEValueV) {
+        BEValueV = V;
+      } else if (BEValueV != V) {
+        BEValueV = nullptr;
+        break;
+      }
+    } else if (!StartValueV) {
+      StartValueV = V;
+    } else if (StartValueV != V) {
+      StartValueV = nullptr;
+      break;
+    }
+  }
+  if (BEValueV && StartValueV) {
+    // While we are analyzing this PHI node, handle its value symbolically.
+    const SCEV *SymbolicName = getUnknown(PN);
+    assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
+           "PHI node already processed?");
+    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+    // Using this symbolic name for the PHI, analyze the value coming around
+    // the back-edge.
+    const SCEV *BEValue = getSCEV(BEValueV);
+
+    // NOTE: If BEValue is loop invariant, we know that the PHI node just
+    // has a special value for the first iteration of the loop.
+
+    // If the value coming around the backedge is an add with the symbolic
+    // value we just inserted, then we found a simple induction variable!
+    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+      // If there is a single occurrence of the symbolic value, replace it
+      // with a recurrence.
+      unsigned FoundIndex = Add->getNumOperands();
+      for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+        if (Add->getOperand(i) == SymbolicName)
+          if (FoundIndex == e) {
+            FoundIndex = i;
             break;
           }
-        } else if (!StartValueV) {
-          StartValueV = V;
-        } else if (StartValueV != V) {
-          StartValueV = nullptr;
-          break;
+
+      if (FoundIndex != Add->getNumOperands()) {
+        // Create an add with everything but the specified operand.
+        SmallVector<const SCEV *, 8> Ops;
+        for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+          if (i != FoundIndex)
+            Ops.push_back(Add->getOperand(i));
+        const SCEV *Accum = getAddExpr(Ops);
+
+        // This is not a valid addrec if the step amount is varying each
+        // loop iteration, but is not itself an addrec in this loop.
+        if (isLoopInvariant(Accum, L) ||
+            (isa<SCEVAddRecExpr>(Accum) &&
+             cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+
+          // If the increment doesn't overflow, then neither the addrec nor
+          // the post-increment will overflow.
+          if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+            if (OBO->getOperand(0) == PN) {
+              if (OBO->hasNoUnsignedWrap())
+                Flags = setFlags(Flags, SCEV::FlagNUW);
+              if (OBO->hasNoSignedWrap())
+                Flags = setFlags(Flags, SCEV::FlagNSW);
+            }
+          } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
+            // If the increment is an inbounds GEP, then we know the address
+            // space cannot be wrapped around. We cannot make any guarantee
+            // about signed or unsigned overflow because pointers are
+            // unsigned but we may have a negative index from the base
+            // pointer. We can guarantee that no unsigned wrap occurs if the
+            // indices form a positive value.
+            if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
+              Flags = setFlags(Flags, SCEV::FlagNW);
+
+              const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
+              if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+                Flags = setFlags(Flags, SCEV::FlagNUW);
+            }
+
+            // We cannot transfer nuw and nsw flags from subtraction
+            // operations -- sub nuw X, Y is not the same as add nuw X, -Y
+            // for instance.
+          }
+
+          const SCEV *StartVal = getSCEV(StartValueV);
+          const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
+
+          // Since the no-wrap flags are on the increment, they apply to the
+          // post-incremented value as well.
+          if (isLoopInvariant(Accum, L))
+            (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags);
+
+          // Okay, for the entire analysis of this edge we assumed the PHI
+          // to be symbolic.  We now need to go back and purge all of the
+          // entries for the scalars that use the symbolic expression.
+          ForgetSymbolicName(PN, SymbolicName);
+          ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+          return PHISCEV;
         }
       }
-      if (BEValueV && StartValueV) {
-        // While we are analyzing this PHI node, handle its value symbolically.
-        const SCEV *SymbolicName = getUnknown(PN);
-        assert(ValueExprMap.find_as(PN) == ValueExprMap.end() &&
-               "PHI node already processed?");
-        ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
-
-        // Using this symbolic name for the PHI, analyze the value coming around
-        // the back-edge.
-        const SCEV *BEValue = getSCEV(BEValueV);
-
-        // NOTE: If BEValue is loop invariant, we know that the PHI node just
-        // has a special value for the first iteration of the loop.
-
-        // If the value coming around the backedge is an add with the symbolic
-        // value we just inserted, then we found a simple induction variable!
-        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
-          // If there is a single occurrence of the symbolic value, replace it
-          // with a recurrence.
-          unsigned FoundIndex = Add->getNumOperands();
-          for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-            if (Add->getOperand(i) == SymbolicName)
-              if (FoundIndex == e) {
-                FoundIndex = i;
-                break;
-              }
-
-          if (FoundIndex != Add->getNumOperands()) {
-            // Create an add with everything but the specified operand.
-            SmallVector<const SCEV *, 8> Ops;
-            for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
-              if (i != FoundIndex)
-                Ops.push_back(Add->getOperand(i));
-            const SCEV *Accum = getAddExpr(Ops);
-
-            // This is not a valid addrec if the step amount is varying each
-            // loop iteration, but is not itself an addrec in this loop.
-            if (isLoopInvariant(Accum, L) ||
-                (isa<SCEVAddRecExpr>(Accum) &&
-                 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
-
-              // If the increment doesn't overflow, then neither the addrec nor
-              // the post-increment will overflow.
-              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
-                if (OBO->getOperand(0) == PN) {
-                  if (OBO->hasNoUnsignedWrap())
-                    Flags = setFlags(Flags, SCEV::FlagNUW);
-                  if (OBO->hasNoSignedWrap())
-                    Flags = setFlags(Flags, SCEV::FlagNSW);
-                }
-              } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) {
-                // If the increment is an inbounds GEP, then we know the address
-                // space cannot be wrapped around. We cannot make any guarantee
-                // about signed or unsigned overflow because pointers are
-                // unsigned but we may have a negative index from the base
-                // pointer. We can guarantee that no unsigned wrap occurs if the
-                // indices form a positive value.
-                if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
-                  Flags = setFlags(Flags, SCEV::FlagNW);
-
-                  const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
-                  if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
-                    Flags = setFlags(Flags, SCEV::FlagNUW);
-                }
-
-                // We cannot transfer nuw and nsw flags from subtraction
-                // operations -- sub nuw X, Y is not the same as add nuw X, -Y
-                // for instance.
-              }
-
-              const SCEV *StartVal = getSCEV(StartValueV);
-              const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
-              // Since the no-wrap flags are on the increment, they apply to the
-              // post-incremented value as well.
-              if (isLoopInvariant(Accum, L))
-                (void)getAddRecExpr(getAddExpr(StartVal, Accum),
-                                    Accum, L, Flags);
-
-              // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and purge all of the
-              // entries for the scalars that use the symbolic expression.
-              ForgetSymbolicName(PN, SymbolicName);
-              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
-              return PHISCEV;
-            }
-          }
-        } else if (const SCEVAddRecExpr *AddRec =
-                     dyn_cast<SCEVAddRecExpr>(BEValue)) {
-          // Otherwise, this could be a loop like this:
-          //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
-          // In this case, j = {1,+,1}  and BEValue is j.
-          // Because the other in-value of i (0) fits the evolution of BEValue
-          // i really is an addrec evolution.
-          if (AddRec->getLoop() == L && AddRec->isAffine()) {
-            const SCEV *StartVal = getSCEV(StartValueV);
-
-            // If StartVal = j.start - j.stride, we can use StartVal as the
-            // initial step of the addrec evolution.
-            if (StartVal == getMinusSCEV(AddRec->getOperand(0),
-                                         AddRec->getOperand(1))) {
-              // FIXME: For constant StartVal, we should be able to infer
-              // no-wrap flags.
-              const SCEV *PHISCEV =
-                getAddRecExpr(StartVal, AddRec->getOperand(1), L,
-                              SCEV::FlagAnyWrap);
-
-              // Okay, for the entire analysis of this edge we assumed the PHI
-              // to be symbolic.  We now need to go back and purge all of the
-              // entries for the scalars that use the symbolic expression.
-              ForgetSymbolicName(PN, SymbolicName);
-              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
-              return PHISCEV;
-            }
-          }
+    } else {
+      // Otherwise, this could be a loop like this:
+      //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+      // In this case, j = {1,+,1}  and BEValue is j.
+      // Because the other in-value of i (0) fits the evolution of BEValue
+      // i really is an addrec evolution.
+      //
+      // We can generalize this saying that i is the shifted value of BEValue
+      // by one iteration:
+      //   PHI(f(0), f({1,+,1})) --> f({0,+,1})
+      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this);
+      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this);
+      if (Shifted != getCouldNotCompute() &&
+          Start != getCouldNotCompute()) {
+        const SCEV *StartVal = getSCEV(StartValueV);
+        if (Start == StartVal) {
+          // Okay, for the entire analysis of this edge we assumed the PHI
+          // to be symbolic.  We now need to go back and purge all of the
+          // entries for the scalars that use the symbolic expression.
+          ForgetSymbolicName(PN, SymbolicName);
+          ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+          return Shifted;
         }
       }
     }
+  }
+
+  return nullptr;
+}
+
+// Checks if the SCEV S is available at BB.  S is considered available at BB
+// if S can be materialized at BB without introducing a fault.
+static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
+                               BasicBlock *BB) {
+  struct CheckAvailable {
+    bool TraversalDone = false;
+    bool Available = true;
+
+    const Loop *L = nullptr;  // The loop BB is in (can be nullptr)
+    BasicBlock *BB = nullptr;
+    DominatorTree &DT;
+
+    CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
+      : L(L), BB(BB), DT(DT) {}
+
+    bool setUnavailable() {
+      TraversalDone = true;
+      Available = false;
+      return false;
+    }
+
+    bool follow(const SCEV *S) {
+      switch (S->getSCEVType()) {
+      case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
+      case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
+        // These expressions are available if their operand(s) is/are.
+        return true;
+
+      case scAddRecExpr: {
+        // We allow add recurrences that are on the loop BB is in, or some
+        // outer loop.  This guarantees availability because the value of the
+        // add recurrence at BB is simply the "current" value of the induction
+        // variable.  We can relax this in the future; for instance an add
+        // recurrence on a sibling dominating loop is also available at BB.
+        const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
+        if (L && (ARLoop == L || ARLoop->contains(L)))
+          return true;
+
+        return setUnavailable();
+      }
+
+      case scUnknown: {
+        // For SCEVUnknown, we check for simple dominance.
+        const auto *SU = cast<SCEVUnknown>(S);
+        Value *V = SU->getValue();
+
+        if (isa<Argument>(V))
+          return false;
+
+        if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
+          return false;
+
+        return setUnavailable();
+      }
+
+      case scUDivExpr:
+      case scCouldNotCompute:
+        // We do not try to smart about these at all.
+        return setUnavailable();
+      }
+      llvm_unreachable("switch should be fully covered!");
+    }
+
+    bool isDone() { return TraversalDone; }
+  };
+
+  CheckAvailable CA(L, BB, DT);
+  SCEVTraversal<CheckAvailable> ST(CA);
+
+  ST.visitAll(S);
+  return CA.Available;
+}
+
+// Try to match a control flow sequence that branches out at BI and merges back
+// at Merge into a "C ? LHS : RHS" select pattern.  Return true on a successful
+// match.
+static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
+                          Value *&C, Value *&LHS, Value *&RHS) {
+  C = BI->getCondition();
+
+  BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0));
+  BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1));
+
+  if (!LeftEdge.isSingleEdge())
+    return false;
+
+  assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()");
+
+  Use &LeftUse = Merge->getOperandUse(0);
+  Use &RightUse = Merge->getOperandUse(1);
+
+  if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) {
+    LHS = LeftUse;
+    RHS = RightUse;
+    return true;
+  }
+
+  if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) {
+    LHS = RightUse;
+    RHS = LeftUse;
+    return true;
+  }
+
+  return false;
+}
+
+const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
+  if (PN->getNumIncomingValues() == 2) {
+    const Loop *L = LI.getLoopFor(PN->getParent());
+
+    // We don't want to break LCSSA, even in a SCEV expression tree.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
+        return nullptr;
+
+    // Try to match
+    //
+    //  br %cond, label %left, label %right
+    // left:
+    //  br label %merge
+    // right:
+    //  br label %merge
+    // merge:
+    //  V = phi [ %x, %left ], [ %y, %right ]
+    //
+    // as "select %cond, %x, %y"
+
+    BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock();
+    assert(IDom && "At least the entry block should dominate PN");
+
+    auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+    Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr;
+
+    if (BI && BI->isConditional() &&
+        BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
+        IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
+        IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
+      return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
+  }
+
+  return nullptr;
+}
+
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+  if (const SCEV *S = createAddRecFromPHI(PN))
+    return S;
+
+  if (const SCEV *S = createNodeFromSelectLikePHI(PN))
+    return S;
 
   // If the PHI has a single incoming value, follow that value, unless the
   // PHI's incoming blocks are in a different loop, in which case doing so
   // risks breaking LCSSA form. Instcombine would normally zap these, but
   // it doesn't have DominatorTree information, so it may miss cases.
-  if (Value *V =
-          SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC))
-    if (LI->replacementPreservesLCSSAForm(PN, V))
+  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC))
+    if (LI.replacementPreservesLCSSAForm(PN, V))
       return getSCEV(V);
 
   // If it's not a loop phi, we can't handle it yet.
   return getUnknown(PN);
 }
 
+const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
+                                                      Value *Cond,
+                                                      Value *TrueVal,
+                                                      Value *FalseVal) {
+  // Handle "constant" branch or select. This can occur for instance when a
+  // loop pass transforms an inner loop and moves on to process the outer loop.
+  if (auto *CI = dyn_cast<ConstantInt>(Cond))
+    return getSCEV(CI->isOne() ? TrueVal : FalseVal);
+
+  // Try to match some simple smax or umax patterns.
+  auto *ICI = dyn_cast<ICmpInst>(Cond);
+  if (!ICI)
+    return getUnknown(I);
+
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+
+  switch (ICI->getPredicate()) {
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    std::swap(LHS, RHS);
+  // fall through
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    // a >s b ? a+x : b+x  ->  smax(a, b)+x
+    // a >s b ? b+x : a+x  ->  smin(a, b)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+      const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType());
+      const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, RS);
+      if (LDiff == RDiff)
+        return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+      LDiff = getMinusSCEV(LA, RS);
+      RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getSMinExpr(LS, RS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    std::swap(LHS, RHS);
+  // fall through
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    // a >u b ? a+x : b+x  ->  umax(a, b)+x
+    // a >u b ? b+x : a+x  ->  umin(a, b)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) {
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, RS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+      LDiff = getMinusSCEV(LA, RS);
+      RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMinExpr(LS, RS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_NE:
+    // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+      const SCEV *One = getOne(I->getType());
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, LS);
+      const SCEV *RDiff = getMinusSCEV(RA, One);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(One, LS), LDiff);
+    }
+    break;
+  case ICmpInst::ICMP_EQ:
+    // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) &&
+        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
+      const SCEV *One = getOne(I->getType());
+      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType());
+      const SCEV *LA = getSCEV(TrueVal);
+      const SCEV *RA = getSCEV(FalseVal);
+      const SCEV *LDiff = getMinusSCEV(LA, One);
+      const SCEV *RDiff = getMinusSCEV(RA, LS);
+      if (LDiff == RDiff)
+        return getAddExpr(getUMaxExpr(One, LS), LDiff);
+    }
+    break;
+  default:
+    break;
+  }
+
+  return getUnknown(I);
+}
+
 /// createNodeForGEP - Expand GEP instructions into add and multiply
 /// operations. This allows them to be analyzed by regular SCEV code.
 ///
@@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
 uint32_t
 ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return C->getValue()->getValue().countTrailingZeros();
+    return C->getAPInt().countTrailingZeros();
 
   if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
     return std::min(GetMinTrailingZeros(T->getOperand()),
@@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
     // For a SCEVUnknown, ask ValueTracking.
     unsigned BitWidth = getTypeSizeInBits(U->getType());
     APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-    computeKnownBits(U->getValue(), Zeros, Ones,
-                     F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+    computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC,
+                     nullptr, &DT);
     return Zeros.countTrailingOnes();
   }
 
@@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
 /// GetRangeFromMetadata - Helper method to assign a range to V from
 /// metadata present in the IR.
 static Optional<ConstantRange> GetRangeFromMetadata(Value *V) {
-  if (Instruction *I = dyn_cast<Instruction>(V)) {
-    if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) {
-      ConstantRange TotalRange(
-          cast<IntegerType>(I->getType())->getBitWidth(), false);
-
-      unsigned NumRanges = MD->getNumOperands() / 2;
-      assert(NumRanges >= 1);
-
-      for (unsigned i = 0; i < NumRanges; ++i) {
-        ConstantInt *Lower =
-            mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0));
-        ConstantInt *Upper =
-            mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1));
-        ConstantRange Range(Lower->getValue(), Upper->getValue());
-        TotalRange = TotalRange.unionWith(Range);
-      }
-
-      return TotalRange;
-    }
-  }
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (MDNode *MD = I->getMetadata(LLVMContext::MD_range))
+      return getConstantRangeFromMetadata(*MD);
 
   return None;
 }
@@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S,
     return I->second;
 
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
-    return setRange(C, SignHint, ConstantRange(C->getValue()->getValue()));
+    return setRange(C, SignHint, ConstantRange(C->getAPInt()));
 
   unsigned BitWidth = getTypeSizeInBits(S->getType());
   ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
@@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S,
     if (AddRec->getNoWrapFlags(SCEV::FlagNUW))
       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
         if (!C->getValue()->isZero())
-          ConservativeResult =
-            ConservativeResult.intersectWith(
-              ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+          ConservativeResult = ConservativeResult.intersectWith(
+              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));
 
     // If there's no signed wrap, and all the operands have the same sign or
     // zero, the value won't ever change sign.
@@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S,
     // Split here to avoid paying the compile-time cost of calling both
     // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted
     // if needed.
-    const DataLayout &DL = F->getParent()->getDataLayout();
+    const DataLayout &DL = getDataLayout();
     if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
       // For a SCEVUnknown, ask ValueTracking.
       APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
-      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT);
+      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);
       if (Ones != ~Zeros + 1)
         ConservativeResult =
             ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));
     } else {
       assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&
              "generalize as needed!");
-      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT);
+      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
       if (NS > 1)
         ConservativeResult = ConservativeResult.intersectWith(
             ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
@@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S,
   return setRange(S, SignHint, ConservativeResult);
 }
 
-/// createSCEV - We know that there is no SCEV for the specified value.
-/// Analyze the expression.
+SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) {
+  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap;
+  const BinaryOperator *BinOp = cast<BinaryOperator>(V);
+
+  // Return early if there are no flags to propagate to the SCEV.
+  SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap;
+  if (BinOp->hasNoUnsignedWrap())
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW);
+  if (BinOp->hasNoSignedWrap())
+    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
+  if (Flags == SCEV::FlagAnyWrap) {
+    return SCEV::FlagAnyWrap;
+  }
+
+  // Here we check that BinOp is in the header of the innermost loop
+  // containing BinOp, since we only deal with instructions in the loop
+  // header. The actual loop we need to check later will come from an add
+  // recurrence, but getting that requires computing the SCEV of the operands,
+  // which can be expensive. This check we can do cheaply to rule out some
+  // cases early.
+  Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent());
+  if (innermostContainingLoop == nullptr ||
+      innermostContainingLoop->getHeader() != BinOp->getParent())
+    return SCEV::FlagAnyWrap;
+
+  // Only proceed if we can prove that BinOp does not yield poison.
+  if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap;
+
+  // At this point we know that if V is executed, then it does not wrap
+  // according to at least one of NSW or NUW. If V is not executed, then we do
+  // not know if the calculation that V represents would wrap. Multiple
+  // instructions can map to the same SCEV. If we apply NSW or NUW from V to
+  // the SCEV, we must guarantee no wrapping for that SCEV also when it is
+  // derived from other instructions that map to the same SCEV. We cannot make
+  // that guarantee for cases where V is not executed. So we need to find the
+  // loop that V is considered in relation to and prove that V is executed for
+  // every iteration of that loop. That implies that the value that V
+  // calculates does not wrap anywhere in the loop, so then we can apply the
+  // flags to the SCEV.
+  //
+  // We check isLoopInvariant to disambiguate in case we are adding two
+  // recurrences from different loops, so that we know which loop to prove
+  // that V is executed in.
+  for (int OpIndex = 0; OpIndex < 2; ++OpIndex) {
+    const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex));
+    if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+      const int OtherOpIndex = 1 - OpIndex;
+      const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex));
+      if (isLoopInvariant(OtherOp, AddRec->getLoop()) &&
+          isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop()))
+        return Flags;
+    }
+  }
+  return SCEV::FlagAnyWrap;
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.  Analyze
+/// the expression.
 ///
 const SCEV *ScalarEvolution::createSCEV(Value *V) {
   if (!isSCEVable(V->getType()))
@@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // reachable. Such instructions don't matter, and they aren't required
     // to obey basic rules for definitions dominating uses which this
     // analysis depends on.
-    if (!DT->isReachableFromEntry(I->getParent()))
+    if (!DT.isReachableFromEntry(I->getParent()))
       return getUnknown(V);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
     Opcode = CE->getOpcode();
   else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
     return getConstant(CI);
   else if (isa<ConstantPointerNull>(V))
-    return getConstant(V->getType(), 0);
+    return getZero(V->getType());
   else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
     return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
   else
@@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     // because it leads to N-1 getAddExpr calls for N ultimate operands.
     // Instead, gather up all the operands and make a single getAddExpr call.
     // LLVM IR canonical form means we need only traverse the left operands.
-    //
-    // Don't apply this instruction's NSW or NUW flags to the new
-    // expression. The instruction may be guarded by control flow that the
-    // no-wrap behavior depends on. Non-control-equivalent instructions can be
-    // mapped to the same SCEV expression, and it would be incorrect to transfer
-    // NSW/NUW semantics to those operations.
     SmallVector<const SCEV *, 4> AddOps;
-    AddOps.push_back(getSCEV(U->getOperand(1)));
-    for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
-      unsigned Opcode = Op->getValueID() - Value::InstructionVal;
-      if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      unsigned Opcode = U ? U->getOpcode() : 0;
+      if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) {
+        assert(Op != V && "V should be an add");
+        AddOps.push_back(getSCEV(Op));
         break;
-      U = cast<Operator>(Op);
-      const SCEV *Op1 = getSCEV(U->getOperand(1));
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        AddOps.push_back(OpSCEV);
+        break;
+      }
+
+      // If a NUW or NSW flag can be applied to the SCEV for this
+      // addition, then compute the SCEV for this addition by itself
+      // with a separate call to getAddExpr. We need to do that
+      // instead of pushing the operands of the addition onto AddOps,
+      // since the flags are only known to apply to this particular
+      // addition - they may not apply to other additions that can be
+      // formed with operands from AddOps.
+      const SCEV *RHS = getSCEV(U->getOperand(1));
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        const SCEV *LHS = getSCEV(U->getOperand(0));
+        if (Opcode == Instruction::Sub)
+          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags));
+        else
+          AddOps.push_back(getAddExpr(LHS, RHS, Flags));
+        break;
+      }
+
       if (Opcode == Instruction::Sub)
-        AddOps.push_back(getNegativeSCEV(Op1));
+        AddOps.push_back(getNegativeSCEV(RHS));
       else
-        AddOps.push_back(Op1);
+        AddOps.push_back(RHS);
     }
-    AddOps.push_back(getSCEV(U->getOperand(0)));
     return getAddExpr(AddOps);
   }
+
   case Instruction::Mul: {
-    // Don't transfer NSW/NUW for the same reason as AddExpr.
     SmallVector<const SCEV *, 4> MulOps;
-    MulOps.push_back(getSCEV(U->getOperand(1)));
-    for (Value *Op = U->getOperand(0);
-         Op->getValueID() == Instruction::Mul + Value::InstructionVal;
-         Op = U->getOperand(0)) {
-      U = cast<Operator>(Op);
+    for (Value *Op = U;; Op = U->getOperand(0)) {
+      U = dyn_cast<Operator>(Op);
+      if (!U || U->getOpcode() != Instruction::Mul) {
+        assert(Op != V && "V should be a mul");
+        MulOps.push_back(getSCEV(Op));
+        break;
+      }
+
+      if (auto *OpSCEV = getExistingSCEV(U)) {
+        MulOps.push_back(OpSCEV);
+        break;
+      }
+
+      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U);
+      if (Flags != SCEV::FlagAnyWrap) {
+        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)),
+                                    getSCEV(U->getOperand(1)), Flags));
+        break;
+      }
+
       MulOps.push_back(getSCEV(U->getOperand(1)));
     }
-    MulOps.push_back(getSCEV(U->getOperand(0)));
     return getMulExpr(MulOps);
   }
   case Instruction::UDiv:
     return getUDivExpr(getSCEV(U->getOperand(0)),
                        getSCEV(U->getOperand(1)));
   case Instruction::Sub:
-    return getMinusSCEV(getSCEV(U->getOperand(0)),
-                        getSCEV(U->getOperand(1)));
+    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)),
+                        getNoWrapFlagsFromUB(U));
   case Instruction::And:
     // For an expression like x&255 that merely masks off the high bits,
     // use zext(trunc(x)) as the SCEV expression.
@@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       unsigned TZ = A.countTrailingZeros();
       unsigned BitWidth = A.getBitWidth();
       APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
-      computeKnownBits(U->getOperand(0), KnownZero, KnownOne,
-                       F->getParent()->getDataLayout(), 0, AC, nullptr, DT);
+      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(),
+                       0, &AC, nullptr, &DT);
 
       APInt EffectiveMask =
           APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
       if (SA->getValue().uge(BitWidth))
         break;
 
+      // It is currently not resolved how to interpret NSW for left
+      // shift by BitWidth - 1, so we avoid applying flags in that
+      // case. Remove this check (or this comment) once the situation
+      // is resolved. See
+      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html
+      // and http://reviews.llvm.org/D8890 .
+      auto Flags = SCEV::FlagAnyWrap;
+      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U);
+
       Constant *X = ConstantInt::get(getContext(),
         APInt::getOneBitSet(BitWidth, SA->getZExtValue()));
-      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);
     }
     break;
 
@@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
     return createNodeForPHI(cast<PHINode>(U));
 
   case Instruction::Select:
-    // This could be a smax or umax that was lowered earlier.
-    // Try to recover it.
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
-      Value *LHS = ICI->getOperand(0);
-      Value *RHS = ICI->getOperand(1);
-      switch (ICI->getPredicate()) {
-      case ICmpInst::ICMP_SLT:
-      case ICmpInst::ICMP_SLE:
-        std::swap(LHS, RHS);
-        // fall through
-      case ICmpInst::ICMP_SGT:
-      case ICmpInst::ICMP_SGE:
-        // a >s b ? a+x : b+x  ->  smax(a, b)+x
-        // a >s b ? b+x : a+x  ->  smin(a, b)+x
-        if (getTypeSizeInBits(LHS->getType()) <=
-            getTypeSizeInBits(U->getType())) {
-          const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType());
-          const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType());
-          const SCEV *LA = getSCEV(U->getOperand(1));
-          const SCEV *RA = getSCEV(U->getOperand(2));
-          const SCEV *LDiff = getMinusSCEV(LA, LS);
-          const SCEV *RDiff = getMinusSCEV(RA, RS);
-          if (LDiff == RDiff)
-            return getAddExpr(getSMaxExpr(LS, RS), LDiff);
-          LDiff = getMinusSCEV(LA, RS);
-          RDiff = getMinusSCEV(RA, LS);
-          if (LDiff == RDiff)
-            return getAddExpr(getSMinExpr(LS, RS), LDiff);
-        }
-        break;
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_ULE:
-        std::swap(LHS, RHS);
-        // fall through
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_UGE:
-        // a >u b ? a+x : b+x  ->  umax(a, b)+x
-        // a >u b ? b+x : a+x  ->  umin(a, b)+x
-        if (getTypeSizeInBits(LHS->getType()) <=
-            getTypeSizeInBits(U->getType())) {
-          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
-          const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType());
-          const SCEV *LA = getSCEV(U->getOperand(1));
-          const SCEV *RA = getSCEV(U->getOperand(2));
-          const SCEV *LDiff = getMinusSCEV(LA, LS);
-          const SCEV *RDiff = getMinusSCEV(RA, RS);
-          if (LDiff == RDiff)
-            return getAddExpr(getUMaxExpr(LS, RS), LDiff);
-          LDiff = getMinusSCEV(LA, RS);
-          RDiff = getMinusSCEV(RA, LS);
-          if (LDiff == RDiff)
-            return getAddExpr(getUMinExpr(LS, RS), LDiff);
-        }
-        break;
-      case ICmpInst::ICMP_NE:
-        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
-        if (getTypeSizeInBits(LHS->getType()) <=
-                getTypeSizeInBits(U->getType()) &&
-            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
-          const SCEV *One = getConstant(U->getType(), 1);
-          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
-          const SCEV *LA = getSCEV(U->getOperand(1));
-          const SCEV *RA = getSCEV(U->getOperand(2));
-          const SCEV *LDiff = getMinusSCEV(LA, LS);
-          const SCEV *RDiff = getMinusSCEV(RA, One);
-          if (LDiff == RDiff)
-            return getAddExpr(getUMaxExpr(One, LS), LDiff);
-        }
-        break;
-      case ICmpInst::ICMP_EQ:
-        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
-        if (getTypeSizeInBits(LHS->getType()) <=
-                getTypeSizeInBits(U->getType()) &&
-            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) {
-          const SCEV *One = getConstant(U->getType(), 1);
-          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType());
-          const SCEV *LA = getSCEV(U->getOperand(1));
-          const SCEV *RA = getSCEV(U->getOperand(2));
-          const SCEV *LDiff = getMinusSCEV(LA, One);
-          const SCEV *RDiff = getMinusSCEV(RA, LS);
-          if (LDiff == RDiff)
-            return getAddExpr(getUMaxExpr(One, LS), LDiff);
-        }
-        break;
-      default:
-        break;
-      }
-    }
+    // U can also be a select constant expr, which let fall through.  Since
+    // createNodeForSelect only works for a condition that is an `ICmpInst`, and
+    // constant expressions cannot have instructions as operands, we'd have
+    // returned getUnknown for a select constant expressions anyway.
+    if (isa<Instruction>(U))
+      return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0),
+                                      U->getOperand(1), U->getOperand(2));
 
   default: // We cannot analyze this expression.
     break;
@@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,
     return 1;
 
   // Get the trip count from the BE count by adding 1.
-  const SCEV *TCMul = getAddExpr(ExitCount,
-                                 getConstant(ExitCount->getType(), 1));
+  const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));
   // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt
   // to factor simple cases.
   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul))
@@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   if (!Pair.second)
     return Pair.first->second;
 
-  // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it
+  // computeBackedgeTakenCount may allocate memory for its result. Inserting it
   // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result
   // must be cleared in this scope.
-  BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L);
+  BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
 
   if (Result.getExact(this) != getCouldNotCompute()) {
     assert(isLoopInvariant(Result.getExact(this), L) &&
@@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   }
 
   // Re-lookup the insert position, since the call to
-  // ComputeBackedgeTakenCount above could result in a
+  // computeBackedgeTakenCount above could result in a
   // recusive call to getBackedgeTakenInfo (on a different
   // loop), which would invalidate the iterator computed
   // earlier.
@@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) {
 }
 
 /// getExact - Get the exact loop backedge taken count considering all loop
-/// exits. A computable result can only be return for loops with a single exit.
-/// Returning the minimum taken count among all exits is incorrect because one
-/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that
-/// the limit of each loop test is never skipped. This is a valid assumption as
-/// long as the loop exits via that test. For precise results, it is the
-/// caller's responsibility to specify the relevant loop exit using
+/// exits. A computable result can only be returned for loops with a single
+/// exit.  Returning the minimum taken count among all exits is incorrect
+/// because one of the loop's exit limit's may have been skipped. HowFarToZero
+/// assumes that the limit of each loop test is never skipped. This is a valid
+/// assumption as long as the loop exits via that test. For precise results, it
+/// is the caller's responsibility to specify the relevant loop exit using
 /// getExact(ExitingBlock, SE).
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
@@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() {
   delete[] ExitNotTaken.getNextExit();
 }
 
-/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// computeBackedgeTakenCount - Compute the number of times the backedge
 /// of the specified loop will execute.
 ScalarEvolution::BackedgeTakenInfo
-ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
@@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   // and compute maxBECount.
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBB = ExitingBlocks[i];
-    ExitLimit EL = ComputeExitLimit(L, ExitBB);
+    ExitLimit EL = computeExitLimit(L, ExitBB);
 
     // 1. For each exit that can be computed, add an entry to ExitCounts.
     // CouldComputeBECount is true only if all exits can be computed.
@@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
     // considered greater than any computable EL.Max.
     if (EL.Max != getCouldNotCompute() && Latch &&
-        DT->dominates(ExitBB, Latch)) {
+        DT.dominates(ExitBB, Latch)) {
       if (!MustExitMaxBECount)
         MustExitMaxBECount = EL.Max;
       else {
@@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
 }
 
-/// ComputeExitLimit - Compute the number of times the backedge of the specified
-/// loop will execute if it exits via the specified block.
 ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
+ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
 
-  // Okay, we've chosen an exiting block.  See what condition causes us to
-  // exit at this block and remember the exit block and whether all other targets
+  // Okay, we've chosen an exiting block.  See what condition causes us to exit
+  // at this block and remember the exit block and whether all other targets
   // lead to the loop header.
   bool MustExecuteLoopHeader = true;
   BasicBlock *Exit = nullptr;
@@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
       if (!Pred)
         return getCouldNotCompute();
       TerminatorInst *PredTerm = Pred->getTerminator();
-      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
-        BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+      for (const BasicBlock *PredSucc : PredTerm->successors()) {
         if (PredSucc == BB)
           continue;
         // If the predecessor has a successor that isn't BB and isn't
@@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
   if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {
     assert(BI->isConditional() && "If unconditional, it can't be in loop!");
     // Proceed to the next level to examine the exit condition expression.
-    return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
+    return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),
                                     BI->getSuccessor(1),
                                     /*ControlsExit=*/IsOnlyExit);
   }
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(Term))
-    return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit,
+    return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
                                                 /*ControlsExit=*/IsOnlyExit);
 
   return getCouldNotCompute();
 }
 
-/// ComputeExitLimitFromCond - Compute the number of times the
+/// computeExitLimitFromCond - Compute the number of times the
 /// backedge of the specified loop will execute if its exit condition
 /// were a conditional branch of ExitCond, TBB, and FBB.
 ///
@@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
 /// condition is true and can infer that failing to meet the condition prior to
 /// integer wraparound results in undefined behavior.
 ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
+ScalarEvolution::computeExitLimitFromCond(const Loop *L,
                                           Value *ExitCond,
                                           BasicBlock *TBB,
                                           BasicBlock *FBB,
@@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
       bool EitherMayExit = L->contains(TBB);
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
                                                ControlsExit && !EitherMayExit);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
                                                ControlsExit && !EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
@@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
       bool EitherMayExit = L->contains(FBB);
-      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
+      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,
                                                ControlsExit && !EitherMayExit);
-      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
+      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,
                                                ControlsExit && !EitherMayExit);
       const SCEV *BECount = getCouldNotCompute();
       const SCEV *MaxBECount = getCouldNotCompute();
@@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
   // With an icmp, it may be feasible to compute an exact backedge-taken count.
   // Proceed to the next level to examine the icmp.
   if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
-    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
+    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);
 
   // Check for a constant condition. These are normally stripped out by
   // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
@@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,
       return getCouldNotCompute();
     else
       // The backedge is never taken.
-      return getConstant(CI->getType(), 0);
+      return getZero(CI->getType());
   }
 
   // If it's not an integer or pointer comparison then compute it the hard way.
-  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
-/// ComputeExitLimitFromICmp - Compute the number of times the
-/// backedge of the specified loop will execute if its exit condition
-/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
 ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
+ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
                                           ICmpInst *ExitCond,
                                           BasicBlock *TBB,
                                           BasicBlock *FBB,
@@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
   if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
     if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
       ExitLimit ItCnt =
-        ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
+        computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);
       if (ItCnt.hasAnyInfo())
         return ItCnt;
     }
 
+  ExitLimit ShiftEL = computeShiftCompareExitLimit(
+      ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond);
+  if (ShiftEL.hasAnyInfo())
+    return ShiftEL;
+
   const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
   const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
 
@@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
       if (AddRec->getLoop() == L) {
         // Form the constant range.
         ConstantRange CompRange(
-            ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+            ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
 
         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,
     break;
   }
   default:
-#if 0
-    dbgs() << "ComputeBackedgeTakenCount ";
-    if (ExitCond->getOperand(0)->getType()->isUnsigned())
-      dbgs() << "[unsigned] ";
-    dbgs() << *LHS << "   "
-         << Instruction::getOpcodeName(Instruction::ICmp)
-         << "   " << *RHS << "\n";
-#endif
     break;
   }
-  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
+  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));
 }
 
 ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L,
+ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
                                                       SwitchInst *Switch,
                                                       BasicBlock *ExitingBlock,
                                                       bool ControlsExit) {
@@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
   return cast<SCEVConstant>(Val)->getValue();
 }
 
-/// ComputeLoadConstantCompareExitLimit - Given an exit condition of
+/// computeLoadConstantCompareExitLimit - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
 ScalarEvolution::ExitLimit
-ScalarEvolution::ComputeLoadConstantCompareExitLimit(
+ScalarEvolution::computeLoadConstantCompareExitLimit(
   LoadInst *LI,
   Constant *RHS,
   const Loop *L,
@@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
     Result = ConstantExpr::getICmp(predicate, Result, RHS);
     if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
     if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
-#if 0
-      dbgs() << "\n***\n*** Computed loop count " << *ItCst
-             << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
-             << "***\n";
-#endif
       ++NumArrayLenItCounts;
       return getConstant(ItCst);   // Found terminating iteration!
     }
@@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
   return getCouldNotCompute();
 }
 
+ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
+    Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) {
+  ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV);
+  if (!RHS)
+    return getCouldNotCompute();
+
+  const BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return getCouldNotCompute();
+
+  const BasicBlock *Predecessor = L->getLoopPredecessor();
+  if (!Predecessor)
+    return getCouldNotCompute();
+
+  // Return true if V is of the form "LHS `shift_op` <positive constant>".
+  // Return LHS in OutLHS and shift_opt in OutOpCode.
+  auto MatchPositiveShift =
+      [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) {
+
+    using namespace PatternMatch;
+
+    ConstantInt *ShiftAmt;
+    if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::LShr;
+    else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::AShr;
+    else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt))))
+      OutOpCode = Instruction::Shl;
+    else
+      return false;
+
+    return ShiftAmt->getValue().isStrictlyPositive();
+  };
+
+  // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in
+  //
+  // loop:
+  //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
+  //   %iv.shifted = lshr i32 %iv, <positive constant>
+  //
+  // Return true on a succesful match.  Return the corresponding PHI node (%iv
+  // above) in PNOut and the opcode of the shift operation in OpCodeOut.
+  auto MatchShiftRecurrence =
+      [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
+    Optional<Instruction::BinaryOps> PostShiftOpCode;
+
+    {
+      Instruction::BinaryOps OpC;
+      Value *V;
+
+      // If we encounter a shift instruction, "peel off" the shift operation,
+      // and remember that we did so.  Later when we inspect %iv's backedge
+      // value, we will make sure that the backedge value uses the same
+      // operation.
+      //
+      // Note: the peeled shift operation does not have to be the same
+      // instruction as the one feeding into the PHI's backedge value.  We only
+      // really care about it being the same *kind* of shift instruction --
+      // that's all that is required for our later inferences to hold.
+      if (MatchPositiveShift(LHS, V, OpC)) {
+        PostShiftOpCode = OpC;
+        LHS = V;
+      }
+    }
+
+    PNOut = dyn_cast<PHINode>(LHS);
+    if (!PNOut || PNOut->getParent() != L->getHeader())
+      return false;
+
+    Value *BEValue = PNOut->getIncomingValueForBlock(Latch);
+    Value *OpLHS;
+
+    return
+        // The backedge value for the PHI node must be a shift by a positive
+        // amount
+        MatchPositiveShift(BEValue, OpLHS, OpCodeOut) &&
+
+        // of the PHI node itself
+        OpLHS == PNOut &&
+
+        // and the kind of shift should be match the kind of shift we peeled
+        // off, if any.
+        (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut);
+  };
+
+  PHINode *PN;
+  Instruction::BinaryOps OpCode;
+  if (!MatchShiftRecurrence(LHS, PN, OpCode))
+    return getCouldNotCompute();
+
+  const DataLayout &DL = getDataLayout();
+
+  // The key rationale for this optimization is that for some kinds of shift
+  // recurrences, the value of the recurrence "stabilizes" to either 0 or -1
+  // within a finite number of iterations.  If the condition guarding the
+  // backedge (in the sense that the backedge is taken if the condition is true)
+  // is false for the value the shift recurrence stabilizes to, then we know
+  // that the backedge is taken only a finite number of times.
+
+  ConstantInt *StableValue = nullptr;
+  switch (OpCode) {
+  default:
+    llvm_unreachable("Impossible case!");
+
+  case Instruction::AShr: {
+    // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most
+    // bitwidth(K) iterations.
+    Value *FirstValue = PN->getIncomingValueForBlock(Predecessor);
+    bool KnownZero, KnownOne;
+    ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr,
+                   Predecessor->getTerminator(), &DT);
+    auto *Ty = cast<IntegerType>(RHS->getType());
+    if (KnownZero)
+      StableValue = ConstantInt::get(Ty, 0);
+    else if (KnownOne)
+      StableValue = ConstantInt::get(Ty, -1, true);
+    else
+      return getCouldNotCompute();
+
+    break;
+  }
+  case Instruction::LShr:
+  case Instruction::Shl:
+    // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>}
+    // stabilize to 0 in at most bitwidth(K) iterations.
+    StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0);
+    break;
+  }
+
+  auto *Result =
+      ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI);
+  assert(Result->getType()->isIntegerTy(1) &&
+         "Otherwise cannot be an operand to a branch instruction");
+
+  if (Result->isZeroValue()) {
+    unsigned BitWidth = getTypeSizeInBits(RHS->getType());
+    const SCEV *UpperBound =
+        getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
+    return ExitLimit(getCouldNotCompute(), UpperBound);
+  }
+
+  return getCouldNotCompute();
+}
 
 /// CanConstantFold - Return true if we can constant fold an instruction of the
 /// specified type, assuming that all operands were constants.
@@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
   // Otherwise, we can evaluate this instruction if all of its operands are
   // constant or derived from a PHI node themselves.
   PHINode *PHI = nullptr;
-  for (Instruction::op_iterator OpI = UseInst->op_begin(),
-         OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
+  for (Value *Op : UseInst->operands()) {
+    if (isa<Constant>(Op)) continue;
 
-    if (isa<Constant>(*OpI)) continue;
-
-    Instruction *OpInst = dyn_cast<Instruction>(*OpI);
+    Instruction *OpInst = dyn_cast<Instruction>(Op);
     if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
 
     PHINode *P = dyn_cast<PHINode>(OpInst);
@@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !canConstantEvolve(I, L)) return nullptr;
 
-  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+  if (PHINode *PN = dyn_cast<PHINode>(I))
     return PN;
-  }
 
   // Record non-constant instructions contained by the loop.
   DenseMap<Instruction *, PHINode *> PHIMap;
@@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
                                   TLI);
 }
 
+
+// If every incoming value to PN except the one for BB is a specific Constant,
+// return that, else return nullptr.
+static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) {
+  Constant *IncomingVal = nullptr;
+
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    if (PN->getIncomingBlock(i) == BB)
+      continue;
+
+    auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i));
+    if (!CurrentVal)
+      return nullptr;
+
+    if (IncomingVal != CurrentVal) {
+      if (IncomingVal)
+        return nullptr;
+      IncomingVal = CurrentVal;
+    }
+  }
+
+  return IncomingVal;
+}
+
 /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
 /// in the header of its containing loop, we know the loop executes a
 /// constant number of times, and the PHI node is just a recurrence
@@ -5462,8 +5946,7 @@ Constant *
 ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
                                                    const APInt &BEs,
                                                    const Loop *L) {
-  DenseMap<PHINode*, Constant*>::const_iterator I =
-    ConstantEvolutionLoopExitValue.find(PN);
+  auto I = ConstantEvolutionLoopExitValue.find(PN);
   if (I != ConstantEvolutionLoopExitValue.end())
     return I->second;
 
@@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   BasicBlock *Header = L->getHeader();
   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
 
-  // Since the loop is canonicalized, the PHI node must have two entries.  One
-  // entry must be a constant (coming in from outside of the loop), and the
-  // second must be derived from the same PHI.
-  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = nullptr;
-  for (BasicBlock::iterator I = Header->begin();
-       (PHI = dyn_cast<PHINode>(I)); ++I) {
-    Constant *StartCST =
-      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return nullptr;
+
+  for (auto &I : *Header) {
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI) break;
+    auto *StartCST = getOtherIncomingValue(PHI, Latch);
     if (!StartCST) continue;
     CurrentIterVals[PHI] = StartCST;
   }
   if (!CurrentIterVals.count(PN))
     return RetVal = nullptr;
 
-  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+  Value *BEValue = PN->getIncomingValueForBlock(Latch);
 
   // Execute the loop symbolically to determine the exit value.
   if (BEs.getActiveBits() >= 32)
@@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
 
   unsigned NumIterations = BEs.getZExtValue(); // must be in range
   unsigned IterationNum = 0;
-  const DataLayout &DL = F->getParent()->getDataLayout();
+  const DataLayout &DL = getDataLayout();
   for (; ; ++IterationNum) {
     if (IterationNum == NumIterations)
       return RetVal = CurrentIterVals[PN];  // Got exit value!
@@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     // EvaluateExpression adds non-phi values to the CurrentIterVals map.
     DenseMap<Instruction *, Constant *> NextIterVals;
     Constant *NextPHI =
-        EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+        EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
     if (!NextPHI)
       return nullptr;        // Couldn't evaluate!
     NextIterVals[PN] = NextPHI;
@@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
     // cease to be able to evaluate one of them or if they stop evolving,
     // because that doesn't necessarily prevent us from computing PN.
     SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute;
-    for (DenseMap<Instruction *, Constant *>::const_iterator
-           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
-      PHINode *PHI = dyn_cast<PHINode>(I->first);
+    for (const auto &I : CurrentIterVals) {
+      PHINode *PHI = dyn_cast<PHINode>(I.first);
       if (!PHI || PHI == PN || PHI->getParent() != Header) continue;
-      PHIsToCompute.push_back(std::make_pair(PHI, I->second));
+      PHIsToCompute.emplace_back(PHI, I.second);
     }
     // We use two distinct loops because EvaluateExpression may invalidate any
     // iterators into CurrentIterVals.
-    for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator
-             I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) {
-      PHINode *PHI = I->first;
+    for (const auto &I : PHIsToCompute) {
+      PHINode *PHI = I.first;
       Constant *&NextPHI = NextIterVals[PHI];
       if (!NextPHI) {   // Not already computed.
-        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+        Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
       }
-      if (NextPHI != I->second)
+      if (NextPHI != I.second)
         StoppedEvolving = false;
     }
 
@@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
   }
 }
 
-/// ComputeExitCountExhaustively - If the loop is known to execute a
-/// constant number of times (the condition evolves only from constants),
-/// try to evaluate a few iterations of the loop until we get the exit
-/// condition gets a value of ExitWhen (true or false).  If we cannot
-/// evaluate the trip count of the loop, return getCouldNotCompute().
-const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
+const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,
                                                           Value *Cond,
                                                           bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
@@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   BasicBlock *Header = L->getHeader();
   assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!");
 
-  // One entry must be a constant (coming in from outside of the loop), and the
-  // second must be derived from the same PHI.
-  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
-  PHINode *PHI = nullptr;
-  for (BasicBlock::iterator I = Header->begin();
-       (PHI = dyn_cast<PHINode>(I)); ++I) {
-    Constant *StartCST =
-      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
+  BasicBlock *Latch = L->getLoopLatch();
+  assert(Latch && "Should follow from NumIncomingValues == 2!");
+
+  for (auto &I : *Header) {
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI)
+      break;
+    auto *StartCST = getOtherIncomingValue(PHI, Latch);
     if (!StartCST) continue;
     CurrentIterVals[PHI] = StartCST;
   }
@@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
   // the loop symbolically to determine when the condition gets a value of
   // "ExitWhen".
   unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
-  const DataLayout &DL = F->getParent()->getDataLayout();
+  const DataLayout &DL = getDataLayout();
   for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){
-    ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>(
-        EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI));
+    auto *CondVal = dyn_cast_or_null<ConstantInt>(
+        EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));
 
     // Couldn't symbolically evaluate.
     if (!CondVal) return getCouldNotCompute();
@@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
     // calling EvaluateExpression on them because that may invalidate iterators
     // into CurrentIterVals.
     SmallVector<PHINode *, 8> PHIsToCompute;
-    for (DenseMap<Instruction *, Constant *>::const_iterator
-           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){
-      PHINode *PHI = dyn_cast<PHINode>(I->first);
+    for (const auto &I : CurrentIterVals) {
+      PHINode *PHI = dyn_cast<PHINode>(I.first);
       if (!PHI || PHI->getParent() != Header) continue;
       PHIsToCompute.push_back(PHI);
     }
-    for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(),
-             E = PHIsToCompute.end(); I != E; ++I) {
-      PHINode *PHI = *I;
+    for (PHINode *PHI : PHIsToCompute) {
       Constant *&NextPHI = NextIterVals[PHI];
       if (NextPHI) continue;    // Already computed!
 
-      Value *BEValue = PHI->getIncomingValue(SecondIsBackedge);
-      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI);
+      Value *BEValue = PHI->getIncomingValueForBlock(Latch);
+      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);
     }
     CurrentIterVals.swap(NextIterVals);
   }
@@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
 /// In the case that a relevant loop exit value cannot be computed, the
 /// original value V is returned.
 const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values =
+      ValuesAtScopes[V];
   // Check to see if we've folded this expression at this loop before.
-  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V];
-  for (unsigned u = 0; u < Values.size(); u++) {
-    if (Values[u].first == L)
-      return Values[u].second ? Values[u].second : V;
-  }
-  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
+  for (auto &LS : Values)
+    if (LS.first == L)
+      return LS.second ? LS.second : V;
+
+  Values.emplace_back(L, nullptr);
+
   // Otherwise compute it.
   const SCEV *C = computeSCEVAtScope(V, L);
-  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
-  for (unsigned u = Values2.size(); u > 0; u--) {
-    if (Values2[u - 1].first == L) {
-      Values2[u - 1].second = C;
+  for (auto &LS : reverse(ValuesAtScopes[V]))
+    if (LS.first == L) {
+      LS.second = C;
       break;
     }
-  }
   return C;
 }
 
@@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
   // exit value from the loop without using SCEVs.
   if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
     if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
-      const Loop *LI = (*this->LI)[I->getParent()];
+      const Loop *LI = this->LI[I->getParent()];
       if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
         if (PHINode *PN = dyn_cast<PHINode>(I))
           if (PN->getParent() == LI->getHeader()) {
@@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
               // Okay, we know how many times the containing loop executes.  If
               // this is a constant evolving PHI node, get the final value at
               // the specified iteration number.
-              Constant *RV = getConstantEvolutionLoopExitValue(PN,
-                                                   BTCC->getValue()->getValue(),
-                                                               LI);
+              Constant *RV =
+                  getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);
               if (RV) return getSCEV(RV);
             }
           }
@@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
       if (CanConstantFold(I)) {
         SmallVector<Constant *, 4> Operands;
         bool MadeImprovement = false;
-        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-          Value *Op = I->getOperand(i);
+        for (Value *Op : I->operands()) {
           if (Constant *C = dyn_cast<Constant>(Op)) {
             Operands.push_back(C);
             continue;
@@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
         // Check to see if getSCEVAtScope actually made an improvement.
         if (MadeImprovement) {
           Constant *C = nullptr;
-          const DataLayout &DL = F->getParent()->getDataLayout();
+          const DataLayout &DL = getDataLayout();
           if (const CmpInst *CI = dyn_cast<CmpInst>(I))
             C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
-                                                Operands[1], DL, TLI);
+                                                Operands[1], DL, &TLI);
           else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
             if (!LI->isVolatile())
               C = ConstantFoldLoadFromConstPtr(Operands[0], DL);
           } else
             C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands,
-                                         DL, TLI);
+                                         DL, &TLI);
           if (!C) return V;
           return getSCEV(C);
         }
@@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     return std::make_pair(CNC, CNC);
   }
 
-  uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
-  const APInt &L = LC->getValue()->getValue();
-  const APInt &M = MC->getValue()->getValue();
-  const APInt &N = NC->getValue()->getValue();
+  uint32_t BitWidth = LC->getAPInt().getBitWidth();
+  const APInt &L = LC->getAPInt();
+  const APInt &M = MC->getAPInt();
+  const APInt &N = NC->getAPInt();
   APInt Two(BitWidth, 2);
   APInt Four(BitWidth, 4);
 
@@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
     if (R1 && R2) {
-#if 0
-      dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
-             << "  sol#2: " << *R2 << "\n";
-#endif
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
           dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
@@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
   // For negative steps (counting down to zero):
   //   N = Start/-Step
   // First compute the unsigned distance from zero in the direction of Step.
-  bool CountDown = StepC->getValue()->getValue().isNegative();
+  bool CountDown = StepC->getAPInt().isNegative();
   const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);
 
   // Handle unitary steps, which cannot wraparound.
@@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
   // done by counting and comparing the number of trailing zeros of Step and
   // Distance.
   if (!CountDown) {
-    const APInt &StepV = StepC->getValue()->getValue();
+    const APInt &StepV = StepC->getAPInt();
     // StepV.isPowerOf2() returns true if StepV is an positive power of two.  It
     // also returns true if StepV is maximally negative (eg, INT_MIN), but that
     // case is not handled as this code is guarded by !CountDown.
     if (StepV.isPowerOf2() &&
-        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros())
-      return getUDivExactExpr(Distance, Step);
+        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) {
+      // Here we've constrained the equation to be of the form
+      //
+      //   2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W)  ... (0)
+      //
+      // where we're operating on a W bit wide integer domain and k is
+      // non-negative.  The smallest unsigned solution for X is the trip count.
+      //
+      // (0) is equivalent to:
+      //
+      //      2^(N + k) * Distance' - 2^N * X = L * 2^W
+      // <=>  2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N
+      // <=>  2^k * Distance' - X = L * 2^(W - N)
+      // <=>  2^k * Distance'     = L * 2^(W - N) + X    ... (1)
+      //
+      // The smallest X satisfying (1) is unsigned remainder of dividing the LHS
+      // by 2^(W - N).
+      //
+      // <=>  X = 2^k * Distance' URem 2^(W - N)   ... (2)
+      //
+      // E.g. say we're solving
+      //
+      //   2 * Val = 2 * X  (in i8)   ... (3)
+      //
+      // then from (2), we get X = Val URem i8 128 (k = 0 in this case).
+      //
+      // Note: It is tempting to solve (3) by setting X = Val, but Val is not
+      // necessarily the smallest unsigned value of X that satisfies (3).
+      // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3)
+      // is i8 1, not i8 -127
+
+      const auto *ModuloResult = getUDivExactExpr(Distance, Step);
+
+      // Since SCEV does not have a URem node, we construct one using a truncate
+      // and a zero extend.
+
+      unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros();
+      auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth);
+      auto *WideTy = Distance->getType();
+
+      return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
+    }
   }
 
   // If the condition controls loop exit (the loop exits only if the expression
@@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
-    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
-                                        -StartC->getValue()->getValue(),
+    return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),
                                         *this);
   return getCouldNotCompute();
 }
@@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
   // already.  If so, the backedge will execute zero times.
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     if (!C->getValue()->isNullValue())
-      return getConstant(C->getType(), 0);
+      return getZero(C->getType());
     return getCouldNotCompute();  // Otherwise it will loop infinitely.
   }
 
@@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
   // A loop's header is defined to be a block that dominates the loop.
   // If the header has a unique predecessor outside the loop, it must be
   // a block that has exactly one successor that can reach the loop.
-  if (Loop *L = LI->getLoopFor(BB))
+  if (Loop *L = LI.getLoopFor(BB))
     return std::make_pair(L->getLoopPredecessor(), L->getHeader());
 
   return std::pair<BasicBlock *, BasicBlock *>();
@@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
   // Quick check to see if they are the same SCEV.
   if (A == B) return true;
 
+  auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) {
+    // Not all instructions that are "identical" compute the same value.  For
+    // instance, two distinct alloca instructions allocating the same type are
+    // identical and do not read memory; but compute distinct values.
+    return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A));
+  };
+
   // Otherwise, if they're both SCEVUnknown, it's possible that they hold
   // two different instructions with the same value. Check for this case.
   if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
     if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
       if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
         if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
-          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+          if (ComputesEqualValues(AI, BI))
             return true;
 
   // Otherwise assume they may have a different value.
@@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // If there's a constant operand, canonicalize comparisons with boundary
   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
-    const APInt &RA = RC->getValue()->getValue();
+    const APInt &RA = RC->getAPInt();
     switch (Pred) {
     default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
     case ICmpInst::ICMP_EQ:
@@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
-      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
-                       SCEV::FlagNUW);
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);
       Pred = ICmpInst::ICMP_ULT;
       Changed = true;
     }
     break;
   case ICmpInst::ICMP_UGE:
     if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
-      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
-                       SCEV::FlagNUW);
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
       Pred = ICmpInst::ICMP_UGT;
       Changed = true;
     } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
@@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
   if (LeftGuarded && RightGuarded)
     return true;
 
+  if (isKnownPredicateViaSplitting(Pred, LHS, RHS))
+    return true;
+
   // Otherwise see what can be done with known constant ranges.
   return isKnownPredicateWithRanges(Pred, LHS, RHS);
 }
 
+bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS,
+                                           ICmpInst::Predicate Pred,
+                                           bool &Increasing) {
+  bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing);
+
+#ifndef NDEBUG
+  // Verify an invariant: inverting the predicate should turn a monotonically
+  // increasing change to a monotonically decreasing one, and vice versa.
+  bool IncreasingSwapped;
+  bool ResultSwapped = isMonotonicPredicateImpl(
+      LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped);
+
+  assert(Result == ResultSwapped && "should be able to analyze both!");
+  if (ResultSwapped)
+    assert(Increasing == !IncreasingSwapped &&
+           "monotonicity should flip as we flip the predicate");
+#endif
+
+  return Result;
+}
+
+bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+                                               ICmpInst::Predicate Pred,
+                                               bool &Increasing) {
+
+  // A zero step value for LHS means the induction variable is essentially a
+  // loop invariant value. We don't really depend on the predicate actually
+  // flipping from false to true (for increasing predicates, and the other way
+  // around for decreasing predicates), all we care about is that *if* the
+  // predicate changes then it only changes from false to true.
+  //
+  // A zero step value in itself is not very useful, but there may be places
+  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be
+  // as general as possible.
+
+  switch (Pred) {
+  default:
+    return false; // Conservative answer
+
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (!LHS->getNoWrapFlags(SCEV::FlagNUW))
+      return false;
+
+    Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE;
+    return true;
+
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE: {
+    if (!LHS->getNoWrapFlags(SCEV::FlagNSW))
+      return false;
+
+    const SCEV *Step = LHS->getStepRecurrence(*this);
+
+    if (isKnownNonNegative(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE;
+      return true;
+    }
+
+    if (isKnownNonPositive(Step)) {
+      Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE;
+      return true;
+    }
+
+    return false;
+  }
+
+  }
+
+  llvm_unreachable("switch has default clause!");
+}
+
+bool ScalarEvolution::isLoopInvariantPredicate(
+    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L,
+    ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+    const SCEV *&InvariantRHS) {
+
+  // If there is a loop-invariant, force it into the RHS, otherwise bail out.
+  if (!isLoopInvariant(RHS, L)) {
+    if (!isLoopInvariant(LHS, L))
+      return false;
+
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!ArLHS || ArLHS->getLoop() != L)
+    return false;
+
+  bool Increasing;
+  if (!isMonotonicPredicate(ArLHS, Pred, Increasing))
+    return false;
+
+  // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to
+  // true as the loop iterates, and the backedge is control dependent on
+  // "ArLHS `Pred` RHS" == true then we can reason as follows:
+  //
+  //   * if the predicate was false in the first iteration then the predicate
+  //     is never evaluated again, since the loop exits without taking the
+  //     backedge.
+  //   * if the predicate was true in the first iteration then it will
+  //     continue to be true for all future iterations since it is
+  //     monotonically increasing.
+  //
+  // For both the above possibilities, we can replace the loop varying
+  // predicate with its value on the first iteration of the loop (which is
+  // loop invariant).
+  //
+  // A similar reasoning applies for a monotonically decreasing predicate, by
+  // replacing true with false and false with true in the above two bullets.
+
+  auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred);
+
+  if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS))
+    return false;
+
+  InvariantPred = Pred;
+  InvariantLHS = ArLHS->getStart();
+  InvariantRHS = RHS;
+  return true;
+}
+
 bool
 ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS) {
@@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
   return false;
 }
 
+bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
+                                                    const SCEV *LHS,
+                                                    const SCEV *RHS) {
+
+  // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer.
+  // Return Y via OutY.
+  auto MatchBinaryAddToConst =
+      [this](const SCEV *Result, const SCEV *X, APInt &OutY,
+             SCEV::NoWrapFlags ExpectedFlags) {
+    const SCEV *NonConstOp, *ConstOp;
+    SCEV::NoWrapFlags FlagsPresent;
+
+    if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) ||
+        !isa<SCEVConstant>(ConstOp) || NonConstOp != X)
+      return false;
+
+    OutY = cast<SCEVConstant>(ConstOp)->getAPInt();
+    return (FlagsPresent & ExpectedFlags) == ExpectedFlags;
+  };
+
+  APInt C;
+
+  switch (Pred) {
+  default:
+    break;
+
+  case ICmpInst::ICMP_SGE:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE:
+    // X s<= (X + C)<nsw> if C >= 0
+    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative())
+      return true;
+
+    // (X + C)<nsw> s<= X if C <= 0
+    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) &&
+        !C.isStrictlyPositive())
+      return true;
+    break;
+
+  case ICmpInst::ICMP_SGT:
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT:
+    // X s< (X + C)<nsw> if C > 0
+    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) &&
+        C.isStrictlyPositive())
+      return true;
+
+    // (X + C)<nsw> s< X if C < 0
+    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative())
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred,
+                                                   const SCEV *LHS,
+                                                   const SCEV *RHS) {
+  if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate)
+    return false;
+
+  // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on
+  // the stack can result in exponential time complexity.
+  SaveAndRestore<bool> Restore(ProvingSplitPredicate, true);
+
+  // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L
+  //
+  // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use
+  // isKnownPredicate.  isKnownPredicate is more powerful, but also more
+  // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the
+  // interesting cases seen in practice.  We can consider "upgrading" L >= 0 to
+  // use isKnownPredicate later if needed.
+  return isKnownNonNegative(RHS) &&
+         isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) &&
+         isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS);
+}
+
 /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
 /// protected by a conditional between LHS and RHS.  This is used to
 /// to eliminate casts.
@@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
                     LoopContinuePredicate->getSuccessor(0) != L->getHeader()))
     return true;
 
+  // We don't want more than one activation of the following loops on the stack
+  // -- that can lead to O(n!) time complexity.
+  if (WalkingBEDominatingConds)
+    return false;
+
+  SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true);
+
+  // See if we can exploit a trip count to prove the predicate.
+  const auto &BETakenInfo = getBackedgeTakenInfo(L);
+  const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this);
+  if (LatchBECount != getCouldNotCompute()) {
+    // We know that Latch branches back to the loop header exactly
+    // LatchBECount times.  This means the backdege condition at Latch is
+    // equivalent to  "{0,+,1} u< LatchBECount".
+    Type *Ty = LatchBECount->getType();
+    auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW);
+    const SCEV *LoopCounter =
+      getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags);
+    if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter,
+                      LatchBECount))
+      return true;
+  }
+
   // Check conditions due to any @llvm.assume intrinsics.
-  for (auto &AssumeVH : AC->assumptions()) {
+  for (auto &AssumeVH : AC.assumptions()) {
     if (!AssumeVH)
       continue;
     auto *CI = cast<CallInst>(AssumeVH);
-    if (!DT->dominates(CI, Latch->getTerminator()))
+    if (!DT.dominates(CI, Latch->getTerminator()))
       continue;
 
     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
       return true;
   }
 
-  struct ClearWalkingBEDominatingCondsOnExit {
-    ScalarEvolution &SE;
-
-    explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE)
-        : SE(SE){};
-
-    ~ClearWalkingBEDominatingCondsOnExit() {
-      SE.WalkingBEDominatingConds = false;
-    }
-  };
-
-  // We don't want more than one activation of the following loop on the stack
-  // -- that can lead to O(n!) time complexity.
-  if (WalkingBEDominatingConds)
-    return false;
-
-  WalkingBEDominatingConds = true;
-  ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this);
-
   // If the loop is not reachable from the entry block, we risk running into an
   // infinite loop as we walk up into the dom tree.  These loops do not matter
   // anyway, so we just return a conservative answer when we see them.
-  if (!DT->isReachableFromEntry(L->getHeader()))
+  if (!DT.isReachableFromEntry(L->getHeader()))
     return false;
 
-  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()];
-       DTN != HeaderDTN;
-       DTN = DTN->getIDom()) {
+  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()];
+       DTN != HeaderDTN; DTN = DTN->getIDom()) {
 
     assert(DTN && "should reach the loop header before reaching the root!");
 
@@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
       // We're constructively (and conservatively) enumerating edges within the
       // loop body that dominate the latch.  The dominator tree better agree
       // with us on this:
-      assert(DT->dominates(DominatingEdge, Latch) && "should be!");
+      assert(DT.dominates(DominatingEdge, Latch) && "should be!");
 
       if (isImpliedCond(Pred, LHS, RHS, Condition,
                         BB != ContinuePredicate->getSuccessor(0)))
@@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   }
 
   // Check conditions due to any @llvm.assume intrinsics.
-  for (auto &AssumeVH : AC->assumptions()) {
+  for (auto &AssumeVH : AC.assumptions()) {
     if (!AssumeVH)
       continue;
     auto *CI = cast<CallInst>(AssumeVH);
-    if (!DT->dominates(CI, L->getHeader()))
+    if (!DT.dominates(CI, L->getHeader()))
       continue;
 
     if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))
@@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   return false;
 }
 
+namespace {
 /// RAII wrapper to prevent recursive application of isImpliedCond.
 /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
 /// currently evaluating isImpliedCond.
@@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate {
       LoopPreds.erase(Cond);
   }
 };
+} // end anonymous namespace
 
 /// isImpliedCond - Test whether the condition described by Pred, LHS,
 /// and RHS is true whenever the given Cond value evaluates to true.
@@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
   const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
 
+  return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS);
+}
+
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                    const SCEV *RHS,
+                                    ICmpInst::Predicate FoundPred,
+                                    const SCEV *FoundLHS,
+                                    const SCEV *FoundRHS) {
   // Balance the types.
   if (getTypeSizeInBits(LHS->getType()) <
       getTypeSizeInBits(FoundLHS->getType())) {
@@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
                                    RHS, LHS, FoundLHS, FoundRHS);
   }
 
+  // Unsigned comparison is the same as signed comparison when both the operands
+  // are non-negative.
+  if (CmpInst::isUnsigned(FoundPred) &&
+      CmpInst::getSignedPredicate(FoundPred) == Pred &&
+      isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS))
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
   // Check if we can make progress by sharpening ranges.
   if (FoundPred == ICmpInst::ICMP_NE &&
       (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) {
@@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
     APInt Min = ICmpInst::isSigned(Pred) ?
         getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin();
 
-    if (Min == C->getValue()->getValue()) {
+    if (Min == C->getAPInt()) {
       // Given (V >= Min && V != Min) we conclude V >= (Min + 1).
       // This is true even if (Min + 1) wraps around -- in case of
       // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)).
@@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
   return false;
 }
 
+bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
+                                     const SCEV *&L, const SCEV *&R,
+                                     SCEV::NoWrapFlags &Flags) {
+  const auto *AE = dyn_cast<SCEVAddExpr>(Expr);
+  if (!AE || AE->getNumOperands() != 2)
+    return false;
+
+  L = AE->getOperand(0);
+  R = AE->getOperand(1);
+  Flags = AE->getNoWrapFlags();
+  return true;
+}
+
+bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
+                                                const SCEV *More,
+                                                APInt &C) {
+  // We avoid subtracting expressions here because this function is usually
+  // fairly deep in the call stack (i.e. is called many times).
+
+  if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) {
+    const auto *LAR = cast<SCEVAddRecExpr>(Less);
+    const auto *MAR = cast<SCEVAddRecExpr>(More);
+
+    if (LAR->getLoop() != MAR->getLoop())
+      return false;
+
+    // We look at affine expressions only; not for correctness but to keep
+    // getStepRecurrence cheap.
+    if (!LAR->isAffine() || !MAR->isAffine())
+      return false;
+
+    if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
+      return false;
+
+    Less = LAR->getStart();
+    More = MAR->getStart();
+
+    // fall through
+  }
+
+  if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
+    const auto &M = cast<SCEVConstant>(More)->getAPInt();
+    const auto &L = cast<SCEVConstant>(Less)->getAPInt();
+    C = M - L;
+    return true;
+  }
+
+  const SCEV *L, *R;
+  SCEV::NoWrapFlags Flags;
+  if (splitBinaryAdd(Less, L, R, Flags))
+    if (const auto *LC = dyn_cast<SCEVConstant>(L))
+      if (R == More) {
+        C = -(LC->getAPInt());
+        return true;
+      }
+
+  if (splitBinaryAdd(More, L, R, Flags))
+    if (const auto *LC = dyn_cast<SCEVConstant>(L))
+      if (R == Less) {
+        C = LC->getAPInt();
+        return true;
+      }
+
+  return false;
+}
+
+bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
+    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+    const SCEV *FoundLHS, const SCEV *FoundRHS) {
+  if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT)
+    return false;
+
+  const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!AddRecLHS)
+    return false;
+
+  const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS);
+  if (!AddRecFoundLHS)
+    return false;
+
+  // We'd like to let SCEV reason about control dependencies, so we constrain
+  // both the inequalities to be about add recurrences on the same loop.  This
+  // way we can use isLoopEntryGuardedByCond later.
+
+  const Loop *L = AddRecFoundLHS->getLoop();
+  if (L != AddRecLHS->getLoop())
+    return false;
+
+  //  FoundLHS u< FoundRHS u< -C =>  (FoundLHS + C) u< (FoundRHS + C) ... (1)
+  //
+  //  FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C)
+  //                                                                  ... (2)
+  //
+  // Informal proof for (2), assuming (1) [*]:
+  //
+  // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**]
+  //
+  // Then
+  //
+  //       FoundLHS s< FoundRHS s< INT_MIN - C
+  // <=>  (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C   [ using (3) ]
+  // <=>  (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ]
+  // <=>  (FoundLHS + INT_MIN + C + INT_MIN) s<
+  //                        (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ]
+  // <=>  FoundLHS + C s< FoundRHS + C
+  //
+  // [*]: (1) can be proved by ruling out overflow.
+  //
+  // [**]: This can be proved by analyzing all the four possibilities:
+  //    (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and
+  //    (A s>= 0, B s>= 0).
+  //
+  // Note:
+  // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C"
+  // will not sign underflow.  For instance, say FoundLHS = (i8 -128), FoundRHS
+  // = (i8 -127) and C = (i8 -100).  Then INT_MIN - C = (i8 -28), and FoundRHS
+  // s< (INT_MIN - C).  Lack of sign overflow / underflow in "FoundRHS + C" is
+  // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
+  // C)".
+
+  APInt LDiff, RDiff;
+  if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
+      !computeConstantDifference(FoundRHS, RHS, RDiff) ||
+      LDiff != RDiff)
+    return false;
+
+  if (LDiff == 0)
+    return true;
+
+  APInt FoundRHSLimit;
+
+  if (Pred == CmpInst::ICMP_ULT) {
+    FoundRHSLimit = -RDiff;
+  } else {
+    assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
+    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+  }
+
+  // Try to prove (1) or (2), as needed.
+  return isLoopEntryGuardedByCond(L, Pred, FoundRHS,
+                                  getConstant(FoundRHSLimit));
+}
+
 /// isImpliedCondOperands - Test whether the condition described by Pred,
 /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
 /// and FoundRHS is true.
@@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
   if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
+  if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
   return isImpliedCondOperandsHelper(Pred, LHS, RHS,
                                      FoundLHS, FoundRHS) ||
          // ~x < ~y --> x > y
@@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
 /// If Expr computes ~A, return A else return nullptr
 static const SCEV *MatchNotExpr(const SCEV *Expr) {
   const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
-  if (!Add || Add->getNumOperands() != 2) return nullptr;
-
-  const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0));
-  if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue()))
+  if (!Add || Add->getNumOperands() != 2 ||
+      !Add->getOperand(0)->isAllOnesValue())
     return nullptr;
 
   const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
-  if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr;
-
-  const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0));
-  if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue()))
+  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
+      !AddRHS->getOperand(0)->isAllOnesValue())
     return nullptr;
 
   return AddRHS->getOperand(1);
@@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
   const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
   if (!MaxExpr) return false;
 
-  auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate);
-  return It != MaxExpr->op_end();
+  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
 }
 
 
@@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,
   return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
 }
 
+static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+                                           ICmpInst::Predicate Pred,
+                                           const SCEV *LHS, const SCEV *RHS) {
+
+  // If both sides are affine addrecs for the same loop, with equal
+  // steps, and we know the recurrences don't wrap, then we only
+  // need to check the predicate on the starting values.
+
+  if (!ICmpInst::isRelational(Pred))
+    return false;
+
+  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!LAR)
+    return false;
+  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+  if (!RAR)
+    return false;
+  if (LAR->getLoop() != RAR->getLoop())
+    return false;
+  if (!LAR->isAffine() || !RAR->isAffine())
+    return false;
+
+  if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE))
+    return false;
+
+  SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ?
+                         SCEV::FlagNSW : SCEV::FlagNUW;
+  if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW))
+    return false;
+
+  return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart());
+}
 
 /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max
 /// expression?
@@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
   auto IsKnownPredicateFull =
       [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {
     return isKnownPredicateWithRanges(Pred, LHS, RHS) ||
-        IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS);
+           IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) ||
+           IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) ||
+           isKnownPredicateViaNoOverflow(Pred, LHS, RHS);
   };
 
   switch (Pred) {
@@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
       !isa<SCEVConstant>(AddLHS->getOperand(0)))
     return false;
 
-  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue();
+  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
 
   // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the
   // antecedent "`FoundLHS` `Pred` `FoundRHS`".
@@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
 
   // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
   // for `LHS`:
-  APInt Addend =
-      cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue();
+  APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
   ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
 
   // We can also compute the range of values for `LHS` that satisfy the
   // consequent, "`LHS` `Pred` `RHS`":
-  APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue();
+  APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();
   ConstantRange SatisfyingLHSRange =
       ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS);
 
@@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
-  const SCEV *One = getConstant(Stride->getType(), 1);
+  const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
     APInt MaxRHS = getSignedRange(RHS).getSignedMax();
@@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
-  const SCEV *One = getConstant(Stride->getType(), 1);
+  const SCEV *One = getOne(Stride->getType());
 
   if (IsSigned) {
     APInt MinRHS = getSignedRange(RHS).getSignedMin();
@@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
 // stride and presence of the equality in the comparison.
 const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,
                                             bool Equality) {
-  const SCEV *One = getConstant(Step->getType(), 1);
+  const SCEV *One = getOne(Step->getType());
   Delta = Equality ? getAddExpr(Delta, Step)
                    : getAddExpr(Delta, getMinusSCEV(Step, One));
   return getUDivExpr(Delta, Step);
@@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     // overflow, in which case if RHS - Start is a constant, we don't need to
     // do a max operation since we can just figure it out statically
     if (NoWrap && isa<SCEVConstant>(Diff)) {
-      APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
       if (D.isNegative())
         End = Start;
     } else
@@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
     // overflow, in which case if RHS - Start is a constant, we don't need to
     // do a max operation since we can just figure it out statically
     if (NoWrap && isa<SCEVConstant>(Diff)) {
-      APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue();
+      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();
       if (!D.isNegative())
         End = Start;
     } else
@@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
     if (!SC->getValue()->isZero()) {
       SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
-      Operands[0] = SE.getConstant(SC->getType(), 0);
+      Operands[0] = SE.getZero(SC->getType());
       const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),
                                              getNoWrapFlags(FlagNW));
-      if (const SCEVAddRecExpr *ShiftedAddRec =
-            dyn_cast<SCEVAddRecExpr>(Shifted))
+      if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))
         return ShiftedAddRec->getNumIterationsInRange(
-                           Range.subtract(SC->getValue()->getValue()), SE);
+            Range.subtract(SC->getAPInt()), SE);
       // This is strange and shouldn't happen.
       return SE.getCouldNotCompute();
     }
 
   // The only time we can solve this is when we have all constant indices.
   // Otherwise, we cannot determine the overflow conditions.
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    if (!isa<SCEVConstant>(getOperand(i)))
-      return SE.getCouldNotCompute();
-
+  if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); }))
+    return SE.getCouldNotCompute();
 
   // Okay at this point we know that all elements of the chrec are constants and
   // that the start element is zero.
@@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
   // iteration exits.
   unsigned BitWidth = SE.getTypeSizeInBits(getType());
   if (!Range.contains(APInt(BitWidth, 0)))
-    return SE.getConstant(getType(), 0);
+    return SE.getZero(getType());
 
   if (isAffine()) {
     // If this is an affine expression then we have this situation:
@@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     // If A is negative then the lower of the range is the last possible loop
     // value.  Also note that we already checked for a full range.
     APInt One(BitWidth,1);
-    APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+    APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();
     APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
 
     // The exit value should be (End+A)/A.
@@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
                                              FlagAnyWrap);
 
     // Next, solve the constructed addrec
-    std::pair<const SCEV *,const SCEV *> Roots =
-      SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+    auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
     const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
     const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
     if (R1) {
       // Pick the smallest positive root value.
-      if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
-                         R1->getValue(), R2->getValue()))) {
+      if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp(
+              ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {
         if (!CB->getZExtValue())
           std::swap(R1, R2);   // R1 is the minimum root now.
 
@@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
         if (Range.contains(R1Val->getValue())) {
           // The next iteration must be out of the range...
           ConstantInt *NextVal =
-                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+              ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);
 
           R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
           if (!Range.contains(R1Val->getValue()))
@@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
         // If R1 was not in the range, then it is a good return value.  Make
         // sure that R1-1 WAS in the range though, just in case.
         ConstantInt *NextVal =
-               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+            ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);
         R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
         if (Range.contains(R1Val->getValue()))
           return R1;
@@ -7644,9 +8551,84 @@ struct SCEVCollectTerms {
   }
   bool isDone() const { return false; }
 };
+
+// Check if a SCEV contains an AddRecExpr.
+struct SCEVHasAddRec {
+  bool &ContainsAddRec;
+
+  SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) {
+   ContainsAddRec = false;
+  }
+
+  bool follow(const SCEV *S) {
+    if (isa<SCEVAddRecExpr>(S)) {
+      ContainsAddRec = true;
+
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
+
+// Find factors that are multiplied with an expression that (possibly as a
+// subexpression) contains an AddRecExpr. In the expression:
+//
+//  8 * (100 +  %p * %q * (%a + {0, +, 1}_loop))
+//
+// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)"
+// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size
+// parameters as they form a product with an induction variable.
+//
+// This collector expects all array size parameters to be in the same MulExpr.
+// It might be necessary to later add support for collecting parameters that are
+// spread over different nested MulExpr.
+struct SCEVCollectAddRecMultiplies {
+  SmallVectorImpl<const SCEV *> &Terms;
+  ScalarEvolution &SE;
+
+  SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE)
+      : Terms(T), SE(SE) {}
+
+  bool follow(const SCEV *S) {
+    if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) {
+      bool HasAddRec = false;
+      SmallVector<const SCEV *, 0> Operands;
+      for (auto Op : Mul->operands()) {
+        if (isa<SCEVUnknown>(Op)) {
+          Operands.push_back(Op);
+        } else {
+          bool ContainsAddRec;
+          SCEVHasAddRec ContiansAddRec(ContainsAddRec);
+          visitAll(Op, ContiansAddRec);
+          HasAddRec |= ContainsAddRec;
+        }
+      }
+      if (Operands.size() == 0)
+        return true;
+
+      if (!HasAddRec)
+        return false;
+
+      Terms.push_back(SE.getMulExpr(Operands));
+      // Stop recursion: once we collected a term, do not walk its operands.
+      return false;
+    }
+
+    // Keep looking.
+    return true;
+  }
+  bool isDone() const { return false; }
+};
 }
 
-/// Find parametric terms in this SCEVAddRecExpr.
+/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in
+/// two places:
+///   1) The strides of AddRec expressions.
+///   2) Unknowns that are multiplied with AddRec expressions.
 void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
     SmallVectorImpl<const SCEV *> &Terms) {
   SmallVector<const SCEV *, 4> Strides;
@@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr,
       for (const SCEV *T : Terms)
         dbgs() << *T << "\n";
     });
+
+  SCEVCollectAddRecMultiplies MulCollector(Terms, *this);
+  visitAll(Expr, MulCollector);
 }
 
 static bool findArrayDimensionsRec(ScalarEvolution &SE,
@@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
   return true;
 }
 
-namespace {
-struct FindParameter {
-  bool FoundParameter;
-  FindParameter() : FoundParameter(false) {}
-
-  bool follow(const SCEV *S) {
-    if (isa<SCEVUnknown>(S)) {
-      FoundParameter = true;
-      // Stop recursion: we found a parameter.
-      return false;
-    }
-    // Keep looking.
-    return true;
-  }
-  bool isDone() const {
-    // Stop recursion if we have found a parameter.
-    return FoundParameter;
-  }
-};
-}
-
 // Returns true when S contains at least a SCEVUnknown parameter.
 static inline bool
 containsParameters(const SCEV *S) {
+  struct FindParameter {
+    bool FoundParameter;
+    FindParameter() : FoundParameter(false) {}
+
+    bool follow(const SCEV *S) {
+      if (isa<SCEVUnknown>(S)) {
+        FoundParameter = true;
+        // Stop recursion: we found a parameter.
+        return false;
+      }
+      // Keep looking.
+      return true;
+    }
+    bool isDone() const {
+      // Stop recursion if we have found a parameter.
+      return FoundParameter;
+    }
+  };
+
   FindParameter F;
   SCEVTraversal<FindParameter> ST(F);
   ST.visitAll(S);
@@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
 
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
-  // Divide all terms by the element size.
+  // Try to divide all terms by the element size. If term is not divisible by
+  // element size, proceed with the original term.
   for (const SCEV *&Term : Terms) {
     const SCEV *Q, *R;
     SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
-    Term = Q;
+    if (!Q->isZero())
+      Term = Q;
   }
 
   SmallVector<const SCEV *, 4> NewTerms;
@@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions(
   if (Sizes.empty())
     return;
 
-  if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr))
+  if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))
     if (!AR->isAffine())
       return;
 
@@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
 //                   ScalarEvolution Class Implementation
 //===----------------------------------------------------------------------===//
 
-ScalarEvolution::ScalarEvolution()
-    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64),
-      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) {
-  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
+                                 AssumptionCache &AC, DominatorTree &DT,
+                                 LoopInfo &LI)
+    : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
+      CouldNotCompute(new SCEVCouldNotCompute()),
+      WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+      ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64),
+      FirstUnknown(nullptr) {}
+
+ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
+    : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
+      CouldNotCompute(std::move(Arg.CouldNotCompute)),
+      ValueExprMap(std::move(Arg.ValueExprMap)),
+      WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
+      BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
+      ConstantEvolutionLoopExitValue(
+          std::move(Arg.ConstantEvolutionLoopExitValue)),
+      ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
+      LoopDispositions(std::move(Arg.LoopDispositions)),
+      BlockDispositions(std::move(Arg.BlockDispositions)),
+      UnsignedRanges(std::move(Arg.UnsignedRanges)),
+      SignedRanges(std::move(Arg.SignedRanges)),
+      UniqueSCEVs(std::move(Arg.UniqueSCEVs)),
+      UniquePreds(std::move(Arg.UniquePreds)),
+      SCEVAllocator(std::move(Arg.SCEVAllocator)),
+      FirstUnknown(Arg.FirstUnknown) {
+  Arg.FirstUnknown = nullptr;
 }
 
-bool ScalarEvolution::runOnFunction(Function &F) {
-  this->F = &F;
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  return false;
-}
-
-void ScalarEvolution::releaseMemory() {
+ScalarEvolution::~ScalarEvolution() {
   // Iterate through all the SCEVUnknown instances and call their
   // destructors, so that they release their references to their values.
-  for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
-    U->~SCEVUnknown();
+  for (SCEVUnknown *U = FirstUnknown; U;) {
+    SCEVUnknown *Tmp = U;
+    U = U->Next;
+    Tmp->~SCEVUnknown();
+  }
   FirstUnknown = nullptr;
 
   ValueExprMap.clear();
 
   // Free any extra memory created for ExitNotTakenInfo in the unlikely event
   // that a loop had multiple computable exits.
-  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I =
-         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end();
-       I != E; ++I) {
-    I->second.clear();
-  }
+  for (auto &BTCI : BackedgeTakenCounts)
+    BTCI.second.clear();
 
   assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");
   assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!");
-
-  BackedgeTakenCounts.clear();
-  ConstantEvolutionLoopExitValue.clear();
-  ValuesAtScopes.clear();
-  LoopDispositions.clear();
-  BlockDispositions.clear();
-  UnsignedRanges.clear();
-  SignedRanges.clear();
-  UniqueSCEVs.clear();
-  SCEVAllocator.Reset();
-}
-
-void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<AssumptionCacheTracker>();
-  AU.addRequiredTransitive<LoopInfoWrapperPass>();
-  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
-  AU.addRequired<TargetLibraryInfoWrapperPass>();
+  assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");
 }
 
 bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
@@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   OS << "\n";
 }
 
-void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+void ScalarEvolution::print(raw_ostream &OS) const {
   // ScalarEvolution's implementation of the print method is to print
   // out SCEV values of all instructions that are interesting. Doing
   // this potentially causes it to create new SCEV objects though,
@@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
   OS << "Classifying expressions for: ";
-  F->printAsOperand(OS, /*PrintType=*/false);
+  F.printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
-  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
-    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
-      OS << *I << '\n';
+  for (Instruction &I : instructions(F))
+    if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) {
+      OS << I << '\n';
       OS << "  -->  ";
-      const SCEV *SV = SE.getSCEV(&*I);
+      const SCEV *SV = SE.getSCEV(&I);
       SV->print(OS);
       if (!isa<SCEVCouldNotCompute>(SV)) {
         OS << " U: ";
@@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
         SE.getSignedRange(SV).print(OS);
       }
 
-      const Loop *L = LI->getLoopFor((*I).getParent());
+      const Loop *L = LI.getLoopFor(I.getParent());
 
       const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
       if (AtUse != SV) {
@@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
     }
 
   OS << "Determining loop execution counts for: ";
-  F->printAsOperand(OS, /*PrintType=*/false);
+  F.printAsOperand(OS, /*PrintType=*/false);
   OS << "\n";
-  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+  for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)
     PrintLoopInfo(OS, &SE, *I);
 }
 
@@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
 
     // This recurrence is variant w.r.t. L if any of its operands
     // are variant.
-    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
-         I != E; ++I)
-      if (!isLoopInvariant(*I, L))
+    for (auto *Op : AR->operands())
+      if (!isLoopInvariant(Op, L))
         return LoopVariant;
 
     // Otherwise it's loop-invariant.
@@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
   case scMulExpr:
   case scUMaxExpr:
   case scSMaxExpr: {
-    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
     bool HasVarying = false;
-    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-         I != E; ++I) {
-      LoopDisposition D = getLoopDisposition(*I, L);
+    for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+      LoopDisposition D = getLoopDisposition(Op, L);
       if (D == LoopVariant)
         return LoopVariant;
       if (D == LoopComputable)
@@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
     // invariant if they are not contained in the specified loop.
     // Instructions are never considered invariant in the function body
     // (null loop) because they are defined within the "loop".
-    if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+    if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
       return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
     return LoopInvariant;
   case scCouldNotCompute:
@@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
     // produces the addrec's value is a PHI, and a PHI effectively properly
     // dominates its entire containing block.
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
-    if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+    if (!DT.dominates(AR->getLoop()->getHeader(), BB))
       return DoesNotDominateBlock;
   }
   // FALL THROUGH into SCEVNAryExpr handling.
@@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
   case scSMaxExpr: {
     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
     bool Proper = true;
-    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-         I != E; ++I) {
-      BlockDisposition D = getBlockDisposition(*I, BB);
+    for (const SCEV *NAryOp : NAry->operands()) {
+      BlockDisposition D = getBlockDisposition(NAryOp, BB);
       if (D == DoesNotDominateBlock)
         return DoesNotDominateBlock;
       if (D == DominatesBlock)
@@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
           dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
       if (I->getParent() == BB)
         return DominatesBlock;
-      if (DT->properlyDominates(I->getParent(), BB))
+      if (DT.properlyDominates(I->getParent(), BB))
         return ProperlyDominatesBlock;
       return DoesNotDominateBlock;
     }
@@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
   return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
 }
 
-namespace {
-// Search for a SCEV expression node within an expression tree.
-// Implements SCEVTraversal::Visitor.
-struct SCEVSearch {
-  const SCEV *Node;
-  bool IsFound;
-
-  SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
-
-  bool follow(const SCEV *S) {
-    IsFound |= (S == Node);
-    return !IsFound;
-  }
-  bool isDone() const { return IsFound; }
-};
-}
-
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  // Search for a SCEV expression node within an expression tree.
+  // Implements SCEVTraversal::Visitor.
+  struct SCEVSearch {
+    const SCEV *Node;
+    bool IsFound;
+
+    SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
+
+    bool follow(const SCEV *S) {
+      IsFound |= (S == Node);
+      return !IsFound;
+    }
+    bool isDone() const { return IsFound; }
+  };
+
   SCEVSearch Search(Op);
   visitAll(S, Search);
   return Search.IsFound;
@@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) {
 /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.
 static void
 getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) {
-  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) {
-    getLoopBackedgeTakenCounts(*I, Map, SE); // recurse.
+  std::string &S = Map[L];
+  if (S.empty()) {
+    raw_string_ostream OS(S);
+    SE.getBackedgeTakenCount(L)->print(OS);
 
-    std::string &S = Map[L];
-    if (S.empty()) {
-      raw_string_ostream OS(S);
-      SE.getBackedgeTakenCount(L)->print(OS);
-
-      // false and 0 are semantically equivalent. This can happen in dead loops.
-      replaceSubString(OS.str(), "false", "0");
-      // Remove wrap flags, their use in SCEV is highly fragile.
-      // FIXME: Remove this when SCEV gets smarter about them.
-      replaceSubString(OS.str(), "<nw>", "");
-      replaceSubString(OS.str(), "<nsw>", "");
-      replaceSubString(OS.str(), "<nuw>", "");
-    }
+    // false and 0 are semantically equivalent. This can happen in dead loops.
+    replaceSubString(OS.str(), "false", "0");
+    // Remove wrap flags, their use in SCEV is highly fragile.
+    // FIXME: Remove this when SCEV gets smarter about them.
+    replaceSubString(OS.str(), "<nw>", "");
+    replaceSubString(OS.str(), "<nsw>", "");
+    replaceSubString(OS.str(), "<nuw>", "");
   }
+
+  for (auto *R : reverse(*L))
+    getLoopBackedgeTakenCounts(R, Map, SE); // recurse.
 }
 
-void ScalarEvolution::verifyAnalysis() const {
-  if (!VerifySCEV)
-    return;
-
+void ScalarEvolution::verify() const {
   ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
 
   // Gather stringified backedge taken counts for all loops using SCEV's caches.
   // FIXME: It would be much better to store actual values instead of strings,
   //        but SCEV pointers will change if we drop the caches.
   VerifyMap BackedgeDumpsOld, BackedgeDumpsNew;
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
     getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE);
 
-  // Gather stringified backedge taken counts for all loops without using
-  // SCEV's caches.
-  SE.releaseMemory();
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
-    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE);
+  // Gather stringified backedge taken counts for all loops using a fresh
+  // ScalarEvolution object.
+  ScalarEvolution SE2(F, TLI, AC, DT, LI);
+  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)
+    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);
 
   // Now compare whether they're the same with and without caches. This allows
   // verifying that no pass changed the cache.
@@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const {
 
   // TODO: Verify more things.
 }
+
+char ScalarEvolutionAnalysis::PassID;
+
+ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
+                                             AnalysisManager<Function> *AM) {
+  return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F),
+                         AM->getResult<AssumptionAnalysis>(F),
+                         AM->getResult<DominatorTreeAnalysis>(F),
+                         AM->getResult<LoopAnalysis>(F));
+}
+
+PreservedAnalyses
+ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) {
+  AM->getResult<ScalarEvolutionAnalysis>(F).print(OS);
+  return PreservedAnalyses::all();
+}
+
+INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution",
+                      "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution",
+                    "Scalar Evolution Analysis", false, true)
+char ScalarEvolutionWrapperPass::ID = 0;
+
+ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) {
+  initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) {
+  SE.reset(new ScalarEvolution(
+      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+      getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+      getAnalysis<LoopInfoWrapperPass>().getLoopInfo()));
+  return false;
+}
+
+void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); }
+
+void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const {
+  SE->print(OS);
+}
+
+void ScalarEvolutionWrapperPass::verifyAnalysis() const {
+  if (!VerifySCEV)
+    return;
+
+  SE->verify();
+}
+
+void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AssumptionCacheTracker>();
+  AU.addRequiredTransitive<LoopInfoWrapperPass>();
+  AU.addRequiredTransitive<DominatorTreeWrapperPass>();
+  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
+}
+
+const SCEVPredicate *
+ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS,
+                                   const SCEVConstant *RHS) {
+  FoldingSetNodeID ID;
+  // Unique this node based on the arguments
+  ID.AddInteger(SCEVPredicate::P_Equal);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = nullptr;
+  if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP))
+    return S;
+  SCEVEqualPredicate *Eq = new (SCEVAllocator)
+      SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS);
+  UniquePreds.InsertNode(Eq, IP);
+  return Eq;
+}
+
+namespace {
+class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
+public:
+  static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE,
+                             SCEVUnionPredicate &A) {
+    SCEVPredicateRewriter Rewriter(SE, A);
+    return Rewriter.visit(Scev);
+  }
+
+  SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P)
+      : SCEVRewriteVisitor(SE), P(P) {}
+
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    auto ExprPreds = P.getPredicatesForExpr(Expr);
+    for (auto *Pred : ExprPreds)
+      if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred))
+        if (IPred->getLHS() == Expr)
+          return IPred->getRHS();
+
+    return Expr;
+  }
+
+private:
+  SCEVUnionPredicate &P;
+};
+} // end anonymous namespace
+
+const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev,
+                                                   SCEVUnionPredicate &Preds) {
+  return SCEVPredicateRewriter::rewrite(Scev, *this, Preds);
+}
+
+/// SCEV predicates
+SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID,
+                             SCEVPredicateKind Kind)
+    : FastID(ID), Kind(Kind) {}
+
+SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID,
+                                       const SCEVUnknown *LHS,
+                                       const SCEVConstant *RHS)
+    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {}
+
+bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const {
+  const auto *Op = dyn_cast<const SCEVEqualPredicate>(N);
+
+  if (!Op)
+    return false;
+
+  return Op->LHS == LHS && Op->RHS == RHS;
+}
+
+bool SCEVEqualPredicate::isAlwaysTrue() const { return false; }
+
+const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; }
+
+void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const {
+  OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
+}
+
+/// Union predicates don't get cached so create a dummy set ID for it.
+SCEVUnionPredicate::SCEVUnionPredicate()
+    : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {}
+
+bool SCEVUnionPredicate::isAlwaysTrue() const {
+  return all_of(Preds,
+                [](const SCEVPredicate *I) { return I->isAlwaysTrue(); });
+}
+
+ArrayRef<const SCEVPredicate *>
+SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) {
+  auto I = SCEVToPreds.find(Expr);
+  if (I == SCEVToPreds.end())
+    return ArrayRef<const SCEVPredicate *>();
+  return I->second;
+}
+
+bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const {
+  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N))
+    return all_of(Set->Preds,
+                  [this](const SCEVPredicate *I) { return this->implies(I); });
+
+  auto ScevPredsIt = SCEVToPreds.find(N->getExpr());
+  if (ScevPredsIt == SCEVToPreds.end())
+    return false;
+  auto &SCEVPreds = ScevPredsIt->second;
+
+  return any_of(SCEVPreds,
+                [N](const SCEVPredicate *I) { return I->implies(N); });
+}
+
+const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; }
+
+void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const {
+  for (auto Pred : Preds)
+    Pred->print(OS, Depth);
+}
+
+void SCEVUnionPredicate::add(const SCEVPredicate *N) {
+  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) {
+    for (auto Pred : Set->Preds)
+      add(Pred);
+    return;
+  }
+
+  if (implies(N))
+    return;
+
+  const SCEV *Key = N->getExpr();
+  assert(Key && "Only SCEVUnionPredicate doesn't have an "
+                " associated expression!");
+
+  SCEVToPreds[Key].push_back(N);
+  Preds.push_back(N);
+}
+
+PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE)
+    : SE(SE), Generation(0) {}
+
+const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
+  const SCEV *Expr = SE.getSCEV(V);
+  RewriteEntry &Entry = RewriteMap[Expr];
+
+  // If we already have an entry and the version matches, return it.
+  if (Entry.second && Generation == Entry.first)
+    return Entry.second;
+
+  // We found an entry but it's stale. Rewrite the stale entry
+  // acording to the current predicate.
+  if (Entry.second)
+    Expr = Entry.second;
+
+  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds);
+  Entry = {Generation, NewSCEV};
+
+  return NewSCEV;
+}
+
+void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) {
+  if (Preds.implies(&Pred))
+    return;
+  Preds.add(&Pred);
+  updateGeneration();
+}
+
+const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const {
+  return Preds;
+}
+
+void PredicatedScalarEvolution::updateGeneration() {
+  // If the generation number wrapped recompute everything.
+  if (++Generation == 0) {
+    for (auto &II : RewriteMap) {
+      const SCEV *Rewritten = II.second.second;
+      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)};
+    }
+  }
+}
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 6bc0d85a61f9..2e50c80c4e73 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -19,125 +19,42 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 using namespace llvm;
 
-namespace {
-  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
-  /// implementation that uses ScalarEvolution to answer queries.
-  class ScalarEvolutionAliasAnalysis : public FunctionPass,
-                                       public AliasAnalysis {
-    ScalarEvolution *SE;
-
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) {
-      initializeScalarEvolutionAliasAnalysisPass(
-        *PassRegistry::getPassRegistry());
-    }
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(AnalysisID PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-
-  private:
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    bool runOnFunction(Function &F) override;
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override;
-
-    Value *GetBaseValue(const SCEV *S);
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char ScalarEvolutionAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
-                   "ScalarEvolution-based Alias Analysis", false, true, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
-INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
-                    "ScalarEvolution-based Alias Analysis", false, true, false)
-
-FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
-  return new ScalarEvolutionAliasAnalysis();
-}
-
-void
-ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequiredTransitive<ScalarEvolution>();
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
-}
-
-bool
-ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
-  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout());
-  SE = &getAnalysis<ScalarEvolution>();
-  return false;
-}
-
-/// GetBaseValue - Given an expression, try to find a
-/// base value. Return null is none was found.
-Value *
-ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // In an addrec, assume that the base will be in the start, rather
-    // than the step.
-    return GetBaseValue(AR->getStart());
-  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
-    // If there's a pointer operand, it'll be sorted at the end of the list.
-    const SCEV *Last = A->getOperand(A->getNumOperands()-1);
-    if (Last->getType()->isPointerTy())
-      return GetBaseValue(Last);
-  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
-    // This is a leaf node.
-    return U->getValue();
-  }
-  // No Identified object found.
-  return nullptr;
-}
-
-AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
-                                                const MemoryLocation &LocB) {
+AliasResult SCEVAAResult::alias(const MemoryLocation &LocA,
+                                const MemoryLocation &LocB) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are. This allows the code below to ignore this special
   // case.
   if (LocA.Size == 0 || LocB.Size == 0)
     return NoAlias;
 
-  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
-  const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
-  const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+  // This is SCEVAAResult. Get the SCEVs!
+  const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr));
+  const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr));
 
   // If they evaluate to the same expression, it's a MustAlias.
-  if (AS == BS) return MustAlias;
+  if (AS == BS)
+    return MustAlias;
 
   // If something is known about the difference between the two addresses,
   // see if it's enough to prove a NoAlias.
-  if (SE->getEffectiveSCEVType(AS->getType()) ==
-      SE->getEffectiveSCEVType(BS->getType())) {
-    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+  if (SE.getEffectiveSCEVType(AS->getType()) ==
+      SE.getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());
     APInt ASizeInt(BitWidth, LocA.Size);
     APInt BSizeInt(BitWidth, LocB.Size);
 
     // Compute the difference between the two pointers.
-    const SCEV *BA = SE->getMinusSCEV(BS, AS);
+    const SCEV *BA = SE.getMinusSCEV(BS, AS);
 
     // Test whether the difference is known to be great enough that memory of
     // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
     // are non-zero, which is special-cased above.
-    if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
-        (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+    if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) &&
+        (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax()))
       return NoAlias;
 
     // Folding the subtraction while preserving range information can be tricky
@@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
     // and try again to see if things fold better that way.
 
     // Compute the difference between the two pointers.
-    const SCEV *AB = SE->getMinusSCEV(AS, BS);
+    const SCEV *AB = SE.getMinusSCEV(AS, BS);
 
     // Test whether the difference is known to be great enough that memory of
     // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
     // are non-zero, which is special-cased above.
-    if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
-        (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+    if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) &&
+        (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax()))
       return NoAlias;
   }
 
@@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,
       return NoAlias;
 
   // Forward the query to the next analysis.
-  return AliasAnalysis::alias(LocA, LocB);
+  return AAResultBase::alias(LocA, LocB);
+}
+
+/// Given an expression, try to find a base value.
+///
+/// Returns null if none was found.
+Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetBaseValue(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands() - 1);
+    if (Last->getType()->isPointerTy())
+      return GetBaseValue(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // This is a leaf node.
+    return U->getValue();
+  }
+  // No Identified object found.
+  return nullptr;
+}
+
+SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F),
+                      AM->getResult<ScalarEvolutionAnalysis>(F));
+}
+
+char SCEVAA::PassID;
+
+char SCEVAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa",
+                      "ScalarEvolution-based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa",
+                    "ScalarEvolution-based Alias Analysis", false, true)
+
+FunctionPass *llvm::createSCEVAAWrapperPass() {
+  return new SCEVAAWrapperPass();
+}
+
+SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) {
+  initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool SCEVAAWrapperPass::runOnFunction(Function &F) {
+  Result.reset(
+      new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+                       getAnalysis<ScalarEvolutionWrapperPass>().getSE()));
+  return false;
+}
+
+void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index fee2a2d0d183..921403ddc0fd 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
             // Create a new cast, and leave the old cast in place in case
             // it is being used as an insert point. Clear its operand
             // so that it doesn't hold anything live.
-            Ret = CastInst::Create(Op, V, Ty, "", IP);
+            Ret = CastInst::Create(Op, V, Ty, "", &*IP);
             Ret->takeName(CI);
             CI->replaceAllUsesWith(Ret);
             CI->setOperand(0, UndefValue::get(V->getType()));
@@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
 
   // Create a new cast.
   if (!Ret)
-    Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
+    Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);
 
   // We assert at the end of the function since IP might point to an
   // instruction with different dominance properties than a cast
   // (an invoke for example) and not dominate BIP (but the cast does).
-  assert(SE.DT->dominates(Ret, BIP));
+  assert(SE.DT.dominates(Ret, &*BIP));
 
   rememberInstruction(Ret);
   return Ret;
 }
 
+static BasicBlock::iterator findInsertPointAfter(Instruction *I,
+                                                 BasicBlock *MustDominate) {
+  BasicBlock::iterator IP = ++I->getIterator();
+  if (auto *II = dyn_cast<InvokeInst>(I))
+    IP = II->getNormalDest()->begin();
+
+  while (isa<PHINode>(IP))
+    ++IP;
+
+  while (IP->isEHPad()) {
+    if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) {
+      ++IP;
+    } else if (isa<CatchSwitchInst>(IP)) {
+      IP = MustDominate->getFirstInsertionPt();
+    } else {
+      llvm_unreachable("unexpected eh pad!");
+    }
+  }
+
+  return IP;
+}
+
 /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
 /// which must be possible with a noop cast, doing what we can to share
 /// the casts.
@@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     while ((isa<BitCastInst>(IP) &&
             isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
             cast<BitCastInst>(IP)->getOperand(0) != A) ||
-           isa<DbgInfoIntrinsic>(IP) ||
-           isa<LandingPadInst>(IP))
+           isa<DbgInfoIntrinsic>(IP))
       ++IP;
     return ReuseOrCreateCast(A, Ty, Op, IP);
   }
 
   // Cast the instruction immediately after the instruction.
   Instruction *I = cast<Instruction>(V);
-  BasicBlock::iterator IP = I; ++IP;
-  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
-    IP = II->getNormalDest()->begin();
-  while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
-    ++IP;
+  BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());
   return ReuseOrCreateCast(I, Ty, Op, IP);
 }
 
@@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
         ScanLimit++;
       if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
           IP->getOperand(1) == RHS)
-        return IP;
+        return &*IP;
       if (IP == BlockBegin) break;
     }
   }
@@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   BuilderType::InsertPointGuard Guard(Builder);
 
   // Move the insertion point out of as many loops as we can.
-  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
     BasicBlock *Preheader = L->getLoopPreheader();
     if (!Preheader) break;
 
     // Ok, move up a level.
-    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+    Builder.SetInsertPoint(Preheader->getTerminator());
   }
 
   // If we haven't found this binop, insert it.
@@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
     // Check for divisibility.
     if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
       ConstantInt *CI =
-        ConstantInt::get(SE.getContext(),
-                         C->getValue()->getValue().sdiv(
-                                                   FC->getValue()->getValue()));
+          ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
       // If the quotient is zero and the remainder is non-zero, reject
       // the value at this scale. It will be considered for subsequent
       // smaller scales.
       if (!CI->isZero()) {
         const SCEV *Div = SE.getConstant(CI);
         S = Div;
-        Remainder =
-          SE.getAddExpr(Remainder,
-                        SE.getConstant(C->getValue()->getValue().srem(
-                                                  FC->getValue()->getValue())));
+        Remainder = SE.getAddExpr(
+            Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
         return true;
       }
     }
@@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
     // of the given factor. If so, we can factor it.
     const SCEVConstant *FC = cast<SCEVConstant>(Factor);
     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
-      if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+      if (!C->getAPInt().srem(FC->getAPInt())) {
         SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
-        NewMulOps[0] = SE.getConstant(
-            C->getValue()->getValue().sdiv(FC->getValue()->getValue()));
+        NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
         S = SE.getMulExpr(NewMulOps);
         return true;
       }
@@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
       const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
       if (!ElSize->isZero()) {
         SmallVector<const SCEV *, 8> NewOps;
-        for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-          const SCEV *Op = Ops[i];
+        for (const SCEV *Op : Ops) {
           const SCEV *Remainder = SE.getConstant(Ty, 0);
           if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
             // Op now has ElSize factored out.
@@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
           } else {
             // The operand was not divisible, so add it to the list of operands
             // we'll scan next iteration.
-            NewOps.push_back(Ops[i]);
+            NewOps.push_back(Op);
           }
         }
         // If we made any changes, update Ops.
@@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
        Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
 
     assert(!isa<Instruction>(V) ||
-           SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
+           SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
 
     // Expand the operands for a plain byte offset.
     Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
@@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
           ScanLimit++;
         if (IP->getOpcode() == Instruction::GetElementPtr &&
             IP->getOperand(0) == V && IP->getOperand(1) == Idx)
-          return IP;
+          return &*IP;
         if (IP == BlockBegin) break;
       }
     }
@@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     BuilderType::InsertPointGuard Guard(Builder);
 
     // Move the insertion point out of as many loops as we can.
-    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
       if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
       BasicBlock *Preheader = L->getLoopPreheader();
       if (!Preheader) break;
 
       // Ok, move up a level.
-      Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+      Builder.SetInsertPoint(Preheader->getTerminator());
     }
 
     // Emit a GEP.
@@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
 
   // Move the insertion point out of as many loops as we can.
-  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
     if (!L->isLoopInvariant(V)) break;
 
-    bool AnyIndexNotLoopInvariant = false;
-    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
-         E = GepIndices.end(); I != E; ++I)
-      if (!L->isLoopInvariant(*I)) {
-        AnyIndexNotLoopInvariant = true;
-        break;
-      }
+    bool AnyIndexNotLoopInvariant =
+        std::any_of(GepIndices.begin(), GepIndices.end(),
+                    [L](Value *Op) { return !L->isLoopInvariant(Op); });
+
     if (AnyIndexNotLoopInvariant)
       break;
 
@@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     if (!Preheader) break;
 
     // Ok, move up a level.
-    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+    Builder.SetInsertPoint(Preheader->getTerminator());
   }
 
   // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
@@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   Value *Casted = V;
   if (V->getType() != PTy)
     Casted = InsertNoopCastOfTo(Casted, PTy);
-  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted,
-                                 GepIndices,
-                                 "scevgep");
+  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");
   Ops.push_back(SE.getUnknown(GEP));
   rememberInstruction(GEP);
 
@@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
 /// expression, according to PickMostRelevantLoop.
 const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
   // Test whether we've already computed the most relevant loop for this SCEV.
-  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
-    RelevantLoops.insert(std::make_pair(S, nullptr));
+  auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));
   if (!Pair.second)
     return Pair.first->second;
 
@@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     return nullptr;
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
-      return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+      return Pair.first->second = SE.LI.getLoopFor(I->getParent());
     // A non-instruction has no relevant loops.
     return nullptr;
   }
@@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     const Loop *L = nullptr;
     if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
       L = AR->getLoop();
-    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
-         I != E; ++I)
-      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+    for (const SCEV *Op : N->operands())
+      L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);
     return RelevantLoops[N] = L;
   }
   if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
@@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
     return RelevantLoops[C] = Result;
   }
   if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
-    const Loop *Result =
-      PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
-                           getRelevantLoop(D->getRHS()),
-                           *SE.DT);
+    const Loop *Result = PickMostRelevantLoop(
+        getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);
     return RelevantLoops[D] = Result;
   }
   llvm_unreachable("Unexpected SCEV type!");
@@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
 
   // Sort by loop. Use a stable sort so that constants follow non-constants and
   // pointer operands precede non-pointer operands.
-  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
 
   // Emit instructions to add all the operands. Hoist as much as possible
   // out of loops, and form meaningful getelementptrs where possible.
   Value *Sum = nullptr;
-  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
-       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+  for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {
     const Loop *CurLoop = I->first;
     const SCEV *Op = I->second;
     if (!Sum) {
@@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
     OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
 
   // Sort by loop. Use a stable sort so that constants follow non-constants.
-  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));
 
   // Emit instructions to mul all the operands. Hoist as much as possible
   // out of loops.
   Value *Prod = nullptr;
-  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
-       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) {
-    const SCEV *Op = I->second;
+  for (const auto &I : OpsAndLoops) {
+    const SCEV *Op = I.second;
     if (!Prod) {
       // This is the first operand. Just expand it.
       Prod = expand(Op);
@@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
 
   Value *LHS = expandCodeFor(S->getLHS(), Ty);
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
-    const APInt &RHS = SC->getValue()->getValue();
+    const APInt &RHS = SC->getAPInt();
     if (RHS.isPowerOf2())
       return InsertBinop(Instruction::LShr, LHS,
                          ConstantInt::get(Ty, RHS.logBase2()));
@@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
     for (User::op_iterator OI = IncV->op_begin()+1,
            OE = IncV->op_end(); OI != OE; ++OI)
       if (Instruction *OInst = dyn_cast<Instruction>(OI))
-        if (!SE.DT->dominates(OInst, IVIncInsertPos))
+        if (!SE.DT.dominates(OInst, IVIncInsertPos))
           return false;
   }
   // Advance to the next instruction.
@@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
   case Instruction::Add:
   case Instruction::Sub: {
     Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
-    if (!OInst || SE.DT->dominates(OInst, InsertPos))
+    if (!OInst || SE.DT.dominates(OInst, InsertPos))
       return dyn_cast<Instruction>(IncV->getOperand(0));
     return nullptr;
   }
   case Instruction::BitCast:
     return dyn_cast<Instruction>(IncV->getOperand(0));
   case Instruction::GetElementPtr:
-    for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
-         I != E; ++I) {
+    for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {
       if (isa<Constant>(*I))
         continue;
       if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
-        if (!SE.DT->dominates(OInst, InsertPos))
+        if (!SE.DT.dominates(OInst, InsertPos))
           return nullptr;
       }
       if (allowScale) {
@@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
 /// it available to other uses in this loop. Recursively hoist any operands,
 /// until we reach a value that dominates InsertPos.
 bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
-  if (SE.DT->dominates(IncV, InsertPos))
+  if (SE.DT.dominates(IncV, InsertPos))
       return true;
 
   // InsertPos must itself dominate IncV so that IncV's new position satisfies
   // its existing users.
-  if (isa<PHINode>(InsertPos)
-      || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
+  if (isa<PHINode>(InsertPos) ||
+      !SE.DT.dominates(InsertPos->getParent(), IncV->getParent()))
+    return false;
+
+  if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))
     return false;
 
   // Check that the chain of IV operands leading back to Phi can be hoisted.
@@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
     // IncV is safe to hoist.
     IVIncs.push_back(IncV);
     IncV = Oper;
-    if (SE.DT->dominates(IncV, InsertPos))
+    if (SE.DT.dominates(IncV, InsertPos))
       break;
   }
-  for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
-         E = IVIncs.rend(); I != E; ++I) {
+  for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {
     (*I)->moveBefore(InsertPos);
   }
   return true;
@@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
 }
 
 /// \brief Check whether we can cheaply express the requested SCEV in terms of
-/// the available PHI SCEV by truncation and/or invertion of the step.
+/// the available PHI SCEV by truncation and/or inversion of the step.
 static bool canBeCheaplyTransformed(ScalarEvolution &SE,
                                     const SCEVAddRecExpr *Phi,
                                     const SCEVAddRecExpr *Requested,
@@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
 
     // Only try partially matching scevs that need truncation and/or
     // step-inversion if we know this loop is outside the current loop.
-    bool TryNonMatchingSCEV = IVIncInsertLoop &&
-      SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
+    bool TryNonMatchingSCEV =
+        IVIncInsertLoop &&
+        SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
 
-    for (BasicBlock::iterator I = L->getHeader()->begin();
-         PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-      if (!SE.isSCEVable(PN->getType()))
+    for (auto &I : *L->getHeader()) {
+      auto *PN = dyn_cast<PHINode>(&I);
+      if (!PN || !SE.isSCEVable(PN->getType()))
         continue;
 
       const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
@@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       // Potentially, move the increment. We have made sure in
       // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
       if (L == IVIncInsertLoop)
-        hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
+        hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
 
       // Ok, the add recurrence looks usable.
       // Remember this PHI, even in post-inc mode.
@@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   PostIncLoops.clear();
 
   // Expand code for the start value.
-  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
-                                L->getHeader()->begin());
+  Value *StartV =
+      expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
 
   // StartV must be hoisted into L's preheader to dominate the new phi.
   assert(!isa<Instruction>(StartV) ||
-         SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
-                                  L->getHeader()));
+         SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
+                                 L->getHeader()));
 
   // Expand code for the step value. Do this before creating the PHI so that PHI
   // reuse code doesn't see an incomplete PHI.
@@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   if (useSubtract)
     Step = SE.getNegativeSCEV(Step);
   // Expand the step somewhere that dominates the loop header.
-  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+  Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
 
   // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
   // we actually do emit an addition.  It does not apply if we emit a
@@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
   if (PostIncLoops.count(L)) {
     PostIncLoopSet Loops;
     Loops.insert(L);
-    Normalized =
-      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
-                                                  nullptr, Loops, SE, *SE.DT));
+    Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse(
+        Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));
   }
 
   // Strip off any non-loop-dominating component from the addrec start.
@@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
     // For an expansion to use the postinc form, the client must call
     // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
     // or dominated by IVIncInsertPos.
-    if (isa<Instruction>(Result)
-        && !SE.DT->dominates(cast<Instruction>(Result),
-                             Builder.GetInsertPoint())) {
+    if (isa<Instruction>(Result) &&
+        !SE.DT.dominates(cast<Instruction>(Result),
+                         &*Builder.GetInsertPoint())) {
       // The induction variable's postinc expansion does not dominate this use.
       // IVUsers tries to prevent this case, so it is rare. However, it can
       // happen when an IVUser outside the loop is not dominated by the latch
@@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
       {
         // Expand the step somewhere that dominates the loop header.
         BuilderType::InsertPointGuard Guard(Builder);
-        StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+        StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
       }
       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
     }
@@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
                                        S->getNoWrapFlags(SCEV::FlagNW)));
     BasicBlock::iterator NewInsertPt =
-      std::next(BasicBlock::iterator(cast<Instruction>(V)));
-    BuilderType::InsertPointGuard Guard(Builder);
-    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
-           isa<LandingPadInst>(NewInsertPt))
-      ++NewInsertPt;
+        findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
-                      NewInsertPt);
+                      &*NewInsertPt);
     return V;
   }
 
@@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     BasicBlock *Header = L->getHeader();
     pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
     CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
-                                  Header->begin());
+                                  &Header->front());
     rememberInstruction(CanonicalIV);
 
     SmallSet<BasicBlock *, 4> PredSeen;
@@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
 
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
                                    Instruction *IP) {
-  Builder.SetInsertPoint(IP->getParent(), IP);
+  assert(IP);
+  Builder.SetInsertPoint(IP);
   return expandCodeFor(SH, Ty);
 }
 
@@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
 Value *SCEVExpander::expand(const SCEV *S) {
   // Compute an insertion point for this SCEV object. Hoist the instructions
   // as far out in the loop nest as possible.
-  Instruction *InsertPt = Builder.GetInsertPoint();
-  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+  Instruction *InsertPt = &*Builder.GetInsertPoint();
+  for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;
        L = L->getParentLoop())
     if (SE.isLoopInvariant(S, L)) {
       if (!L) break;
@@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) {
         // LSR sets the insertion point for AddRec start/step values to the
         // block start to simplify value reuse, even though it's an invalid
         // position. SCEVExpander must correct for this in all cases.
-        InsertPt = L->getHeader()->getFirstInsertionPt();
+        InsertPt = &*L->getHeader()->getFirstInsertionPt();
       }
     } else {
       // If the SCEV is computable at this level, insert it into the header
       // after the PHIs (and after any other instructions that we've inserted
       // there) so that it is guaranteed to dominate any user inside the loop.
       if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
-        InsertPt = L->getHeader()->getFirstInsertionPt();
+        InsertPt = &*L->getHeader()->getFirstInsertionPt();
       while (InsertPt != Builder.GetInsertPoint()
              && (isInsertedInstruction(InsertPt)
                  || isa<DbgInfoIntrinsic>(InsertPt))) {
-        InsertPt = std::next(BasicBlock::iterator(InsertPt));
+        InsertPt = &*std::next(InsertPt->getIterator());
       }
       break;
     }
 
   // Check to see if we already expanded this here.
-  std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
-    I = InsertedExpressions.find(std::make_pair(S, InsertPt));
+  auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
   if (I != InsertedExpressions.end())
     return I->second;
 
   BuilderType::InsertPointGuard Guard(Builder);
-  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+  Builder.SetInsertPoint(InsertPt);
 
   // Expand the expression into instructions.
   Value *V = visit(S);
@@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
 
   // Emit code for it.
   BuilderType::InsertPointGuard Guard(Builder);
-  PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
-                                           L->getHeader()->begin()));
+  PHINode *V =
+      cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
 
   return V;
 }
@@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                            const TargetTransformInfo *TTI) {
   // Find integer phis in order of increasing width.
   SmallVector<PHINode*, 8> Phis;
-  for (BasicBlock::iterator I = L->getHeader()->begin();
-       PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
-    Phis.push_back(Phi);
+  for (auto &I : *L->getHeader()) {
+    if (auto *PN = dyn_cast<PHINode>(&I))
+      Phis.push_back(PN);
+    else
+      break;
   }
+
   if (TTI)
     std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {
       // Put pointers at the back and make sure pointer < pointer = false.
@@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   DenseMap<const SCEV *, PHINode *> ExprToIVMap;
   // Process phis from wide to narrow. Map wide phis to their truncation
   // so narrow phis can reuse them.
-  for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
-         PEnd = Phis.end(); PIter != PEnd; ++PIter) {
-    PHINode *Phi = *PIter;
+  for (PHINode *Phi : Phis) {
+    auto SimplifyPHINode = [&](PHINode *PN) -> Value * {
+      if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC))
+        return V;
+      if (!SE.isSCEVable(PN->getType()))
+        return nullptr;
+      auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN));
+      if (!Const)
+        return nullptr;
+      return Const->getValue();
+    };
 
     // Fold constant phis. They may be congruent to other constant phis and
     // would confuse the logic below that expects proper IVs.
-    if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) {
+    if (Value *V = SimplifyPHINode(Phi)) {
+      if (V->getType() != Phi->getType())
+        continue;
       Phi->replaceAllUsesWith(V);
       DeadInsts.emplace_back(Phi);
       ++NumElim;
@@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
         if (OrigInc->getType() != IsomorphicInc->getType()) {
           Instruction *IP = nullptr;
           if (PHINode *PN = dyn_cast<PHINode>(OrigInc))
-            IP = PN->getParent()->getFirstInsertionPt();
+            IP = &*PN->getParent()->getFirstInsertionPt();
           else
             IP = OrigInc->getNextNode();
 
@@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
     ++NumElim;
     Value *NewIV = OrigPhiRef;
     if (OrigPhiRef->getType() != Phi->getType()) {
-      IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
+      IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());
       Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
       NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
     }
@@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
+Value *SCEVExpander::findExistingExpansion(const SCEV *S,
+                                           const Instruction *At, Loop *L) {
+  using namespace llvm::PatternMatch;
+
+  SmallVector<BasicBlock *, 4> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Look for suitable value in simple conditions at the loop exits.
+  for (BasicBlock *BB : ExitingBlocks) {
+    ICmpInst::Predicate Pred;
+    Instruction *LHS, *RHS;
+    BasicBlock *TrueBB, *FalseBB;
+
+    if (!match(BB->getTerminator(),
+               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
+                    TrueBB, FalseBB)))
+      continue;
+
+    if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+      return LHS;
+
+    if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+      return RHS;
+  }
+
+  // There is potential to make this significantly smarter, but this simple
+  // heuristic already gets some interesting cases.
+
+  // Can not find suitable value.
+  return nullptr;
+}
+
 bool SCEVExpander::isHighCostExpansionHelper(
-    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) {
+    const SCEV *S, Loop *L, const Instruction *At,
+    SmallPtrSetImpl<const SCEV *> &Processed) {
+
+  // If we can find an existing value for this scev avaliable at the point "At"
+  // then consider the expression cheap.
+  if (At && findExistingExpansion(S, At, L) != nullptr)
+    return false;
 
   // Zero/One operand expressions
   switch (S->getSCEVType()) {
@@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
   case scConstant:
     return false;
   case scTruncate:
-    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L,
-                                     Processed);
+    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(),
+                                     L, At, Processed);
   case scZeroExtend:
     return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(),
-                                     L, Processed);
+                                     L, At, Processed);
   case scSignExtend:
     return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(),
-                                     L, Processed);
+                                     L, At, Processed);
   }
 
   if (!Processed.insert(S).second)
@@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper(
 
   if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {
     // If the divisor is a power of two and the SCEV type fits in a native
-    // integer, consider the divison cheap irrespective of whether it occurs in
+    // integer, consider the division cheap irrespective of whether it occurs in
     // the user code since it can be lowered into a right shift.
     if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS()))
-      if (SC->getValue()->getValue().isPowerOf2()) {
+      if (SC->getAPInt().isPowerOf2()) {
         const DataLayout &DL =
             L->getHeader()->getParent()->getParent()->getDataLayout();
         unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth();
@@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper(
     if (!ExitingBB)
       return true;
 
-    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
-    if (!ExitingBI || !ExitingBI->isConditional())
+    // At the beginning of this function we already tried to find existing value
+    // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern
+    // involving division. This is just a simple search heuristic.
+    if (!At)
+      At = &ExitingBB->back();
+    if (!findExistingExpansion(
+            SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
       return true;
-
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition());
-    if (!OrigCond)
-      return true;
-
-    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1));
-    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1));
-    if (RHS != S) {
-      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0));
-      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1));
-      if (LHS != S)
-        return true;
-    }
   }
 
   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
@@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper(
   // BackedgeTakenCount. They may already exist in program code, and if not,
   // they are not too expensive rematerialize.
   if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) {
-    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-         I != E; ++I) {
-      if (isHighCostExpansionHelper(*I, L, Processed))
+    for (auto *Op : NAry->operands())
+      if (isHighCostExpansionHelper(Op, L, At, Processed))
         return true;
-    }
   }
 
   // If we haven't recognized an expensive SCEV pattern, assume it's an
@@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper(
   return false;
 }
 
+Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred,
+                                            Instruction *IP) {
+  assert(IP);
+  switch (Pred->getKind()) {
+  case SCEVPredicate::P_Union:
+    return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP);
+  case SCEVPredicate::P_Equal:
+    return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP);
+  }
+  llvm_unreachable("Unknown SCEV predicate type");
+}
+
+Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred,
+                                          Instruction *IP) {
+  Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP);
+  Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP);
+
+  Builder.SetInsertPoint(IP);
+  auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check");
+  return I;
+}
+
+Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union,
+                                          Instruction *IP) {
+  auto *BoolType = IntegerType::get(IP->getContext(), 1);
+  Value *Check = ConstantInt::getNullValue(BoolType);
+
+  // Loop over all checks in this set.
+  for (auto Pred : Union->getPredicates()) {
+    auto *NextCheck = expandCodeForPredicate(Pred, IP);
+    Builder.SetInsertPoint(IP);
+    Check = Builder.CreateOr(Check, NextCheck);
+  }
+
+  return Check;
+}
+
 namespace {
 // Search for a SCEV subexpression that is not safe to expand.  Any expression
 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index b238fe43cc60..b7fd5d506175 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
     SmallVector<const SCEV *, 8> Operands;
     const Loop *L = AR->getLoop();
     // The addrec conceptually uses its operands at loop entry.
-    Instruction *LUser = L->getHeader()->begin();
+    Instruction *LUser = &L->getHeader()->front();
     // Transform each operand.
     for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
          I != E; ++I) {
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index a5fca3e79b37..029997adab9e 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -32,22 +32,23 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+
 using namespace llvm;
 
 // A handy option for disabling scoped no-alias functionality. The same effect
 // can also be achieved by stripping the associated metadata tags from IR, but
 // this option is sometimes more convenient.
-static cl::opt<bool>
-EnableScopedNoAlias("enable-scoped-noalias", cl::init(true));
+static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias",
+                                         cl::init(true));
 
 namespace {
 /// AliasScopeNode - This is a simple wrapper around an MDNode which provides
@@ -57,7 +58,7 @@ class AliasScopeNode {
   const MDNode *Node;
 
 public:
-  AliasScopeNode() : Node(0) {}
+  AliasScopeNode() : Node(nullptr) {}
   explicit AliasScopeNode(const MDNode *N) : Node(N) {}
 
   /// getNode - Get the MDNode for this AliasScopeNode.
@@ -70,79 +71,74 @@ public:
     return dyn_cast_or_null<MDNode>(Node->getOperand(1));
   }
 };
+} // end of anonymous namespace
 
-/// ScopedNoAliasAA - This is a simple alias analysis
-/// implementation that uses scoped-noalias metadata to answer queries.
-class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis {
-public:
-  static char ID; // Class identification, replacement for typeinfo
-  ScopedNoAliasAA() : ImmutablePass(ID) {
-    initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry());
-  }
+AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA,
+                                         const MemoryLocation &LocB) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::alias(LocA, LocB);
 
-  bool doInitialization(Module &M) override;
+  // Get the attached MDNodes.
+  const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope;
 
-  /// getAdjustedAnalysisPointer - This method is used when a pass implements
-  /// an analysis interface through multiple inheritance.  If needed, it
-  /// should override this to adjust the this pointer as needed for the
-  /// specified pass info.
-  void *getAdjustedAnalysisPointer(const void *PI) override {
-    if (PI == &AliasAnalysis::ID)
-      return (AliasAnalysis*)this;
-    return this;
-  }
+  const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias;
 
-protected:
-  bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
-  void collectMDInDomain(const MDNode *List, const MDNode *Domain,
-                         SmallPtrSetImpl<const MDNode *> &Nodes) const;
+  if (!mayAliasInScopes(AScopes, BNoAlias))
+    return NoAlias;
 
-private:
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  AliasResult alias(const MemoryLocation &LocA,
-                    const MemoryLocation &LocB) override;
-  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override;
-  ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-  ModRefBehavior getModRefBehavior(const Function *F) override;
-  ModRefResult getModRefInfo(ImmutableCallSite CS,
-                             const MemoryLocation &Loc) override;
-  ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                             ImmutableCallSite CS2) override;
-};
-}  // End of anonymous namespace
+  if (!mayAliasInScopes(BScopes, ANoAlias))
+    return NoAlias;
 
-// Register this pass...
-char ScopedNoAliasAA::ID = 0;
-INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias",
-                   "Scoped NoAlias Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createScopedNoAliasAAPass() {
-  return new ScopedNoAliasAA();
+  // If they may alias, chain to the next AliasAnalysis.
+  return AAResultBase::alias(LocA, LocB);
 }
 
-bool ScopedNoAliasAA::doInitialization(Module &M) {
-  InitializeAliasAnalysis(this, &M.getDataLayout());
-  return true;
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS,
+                                                const MemoryLocation &Loc) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::getModRefInfo(CS, Loc);
+
+  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
+                                              LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  if (!mayAliasInScopes(
+          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          Loc.AATags.NoAlias))
+    return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-void
-ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
+ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                                ImmutableCallSite CS2) {
+  if (!EnableScopedNoAlias)
+    return AAResultBase::getModRefInfo(CS1, CS2);
+
+  if (!mayAliasInScopes(
+          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  if (!mayAliasInScopes(
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
+          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
+    return MRI_NoModRef;
+
+  return AAResultBase::getModRefInfo(CS1, CS2);
 }
 
-void
-ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain,
-                   SmallPtrSetImpl<const MDNode *> &Nodes) const {
+void ScopedNoAliasAAResult::collectMDInDomain(
+    const MDNode *List, const MDNode *Domain,
+    SmallPtrSetImpl<const MDNode *> &Nodes) const {
   for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)
     if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))
       if (AliasScopeNode(MD).getDomain() == Domain)
         Nodes.insert(MD);
 }
 
-bool
-ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
-                                  const MDNode *NoAlias) const {
+bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
+                                             const MDNode *NoAlias) const {
   if (!Scopes || !NoAlias)
     return true;
 
@@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,
   return true;
 }
 
-AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA,
-                                   const MemoryLocation &LocB) {
-  if (!EnableScopedNoAlias)
-    return AliasAnalysis::alias(LocA, LocB);
-
-  // Get the attached MDNodes.
-  const MDNode *AScopes = LocA.AATags.Scope,
-               *BScopes = LocB.AATags.Scope;
-
-  const MDNode *ANoAlias = LocA.AATags.NoAlias,
-               *BNoAlias = LocB.AATags.NoAlias;
-
-  if (!mayAliasInScopes(AScopes, BNoAlias))
-    return NoAlias;
-
-  if (!mayAliasInScopes(BScopes, ANoAlias))
-    return NoAlias;
-
-  // If they may alias, chain to the next AliasAnalysis.
-  return AliasAnalysis::alias(LocA, LocB);
+ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
+                                           AnalysisManager<Function> *AM) {
+  return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F));
 }
 
-bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc,
-                                             bool OrLocal) {
-  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+char ScopedNoAliasAA::PassID;
+
+char ScopedNoAliasAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+                      "Scoped NoAlias Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias",
+                    "Scoped NoAlias Alias Analysis", false, true)
+
+ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() {
+  return new ScopedNoAliasAAWrapperPass();
 }
 
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) {
-  return AliasAnalysis::getModRefBehavior(CS);
+ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) {
+  initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
-AliasAnalysis::ModRefBehavior
-ScopedNoAliasAA::getModRefBehavior(const Function *F) {
-  return AliasAnalysis::getModRefBehavior(F);
+bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new ScopedNoAliasAAResult(
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
 }
 
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) {
-  if (!EnableScopedNoAlias)
-    return AliasAnalysis::getModRefInfo(CS, Loc);
-
-  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata(
-                                              LLVMContext::MD_noalias)))
-    return NoModRef;
-
-  if (!mayAliasInScopes(
-          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          Loc.AATags.NoAlias))
-    return NoModRef;
-
-  return AliasAnalysis::getModRefInfo(CS, Loc);
+bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
 }
 
-AliasAnalysis::ModRefResult
-ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
-  if (!EnableScopedNoAlias)
-    return AliasAnalysis::getModRefInfo(CS1, CS2);
-
-  if (!mayAliasInScopes(
-          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
-    return NoModRef;
-
-  if (!mayAliasInScopes(
-          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope),
-          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias)))
-    return NoModRef;
-
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
+void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
 }
-
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index edd82f5fe296..f5a927b80525 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) {
 
 void SparseSolver::Print(Function &F, raw_ostream &OS) const {
   OS << "\nFUNCTION: " << F.getName() << "\n";
-  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (!BBExecutable.count(BB))
+  for (auto &BB : F) {
+    if (!BBExecutable.count(&BB))
       OS << "INFEASIBLE: ";
     OS << "\t";
-    if (BB->hasName())
-      OS << BB->getName() << ":\n";
+    if (BB.hasName())
+      OS << BB.getName() << ":\n";
     else
       OS << "; anon bb\n";
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      LatticeFunc->PrintValue(getLatticeState(I), OS);
-      OS << *I << "\n";
+    for (auto &I : BB) {
+      LatticeFunc->PrintValue(getLatticeState(&I), OS);
+      OS << I << "\n";
     }
     
     OS << "\n";
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index 635c50ca6e53..e00f4aed07fc 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   }
 #endif // !NDEBUG
 
+  if (T.getArch() == Triple::r600 ||
+      T.getArch() == Triple::amdgcn) {
+    TLI.setUnavailable(LibFunc::ldexp);
+    TLI.setUnavailable(LibFunc::ldexpf);
+    TLI.setUnavailable(LibFunc::ldexpl);
+  }
+
   // There are no library implementations of mempcy and memset for AMD gpus and
   // these can be difficult to lower in the backend.
   if (T.getArch() == Triple::r600 ||
-      T.getArch() == Triple::amdgcn) {
+      T.getArch() == Triple::amdgcn ||
+      T.getArch() == Triple::wasm32 ||
+      T.getArch() == Triple::wasm64) {
     TLI.setUnavailable(LibFunc::memcpy);
     TLI.setUnavailable(LibFunc::memset);
     TLI.setUnavailable(LibFunc::memset_pattern16);
@@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   }
 
   // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later.
+  // All versions of watchOS support it.
   if (T.isMacOSX()) {
     if (T.isMacOSXVersionLT(10, 5))
       TLI.setUnavailable(LibFunc::memset_pattern16);
   } else if (T.isiOS()) {
     if (T.isOSVersionLT(3, 0))
       TLI.setUnavailable(LibFunc::memset_pattern16);
-  } else {
+  } else if (!T.isWatchOS()) {
     TLI.setUnavailable(LibFunc::memset_pattern16);
   }
 
@@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     }
     break;
   case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
     TLI.setUnavailable(LibFunc::exp10l);
-    if (T.isOSVersionLT(7, 0)) {
+    if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) ||
+                           (T.isOSVersionLT(9, 0) &&
+                            (T.getArch() == Triple::x86 ||
+                             T.getArch() == Triple::x86_64)))) {
       TLI.setUnavailable(LibFunc::exp10);
       TLI.setUnavailable(LibFunc::exp10f);
     } else {
@@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and
   // Linux (GLIBC):
   // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html
-  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c
   // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html
   switch (T.getOS()) {
   case Triple::Darwin:
   case Triple::MacOSX:
   case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
   case Triple::FreeBSD:
   case Triple::Linux:
     break;
@@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   }
 
   // ffsll is available on at least FreeBSD and Linux (GLIBC):
-  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c
   // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html
   switch (T.getOS()) {
+  case Triple::Darwin:
+  case Triple::MacOSX:
+  case Triple::IOS:
+  case Triple::TvOS:
+  case Triple::WatchOS:
   case Triple::FreeBSD:
   case Triple::Linux:
     break;
@@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc::ffsll);
   }
 
+  // The following functions are available on at least FreeBSD:
+  // http://svn.freebsd.org/base/head/lib/libc/string/fls.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c
+  // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c
+  if (!T.isOSFreeBSD()) {
+    TLI.setUnavailable(LibFunc::fls);
+    TLI.setUnavailable(LibFunc::flsl);
+    TLI.setUnavailable(LibFunc::flsll);
+  }
+
   // The following functions are available on at least Linux:
   if (!T.isOSLinux()) {
     TLI.setUnavailable(LibFunc::dunder_strdup);
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 7d1c3fbef68a..9c1d3fd4f582 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
   return *this;
 }
 
-unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
-                                               Type *OpTy) const {
-  return TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
+                                          Type *OpTy) const {
+  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getCallCost(FunctionType *FTy,
-                                          int NumArgs) const {
-  return TTIImpl->getCallCost(FTy, NumArgs);
+int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const {
+  int Cost = TTIImpl->getCallCost(FTy, NumArgs);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned
-TargetTransformInfo::getCallCost(const Function *F,
-                                 ArrayRef<const Value *> Arguments) const {
-  return TTIImpl->getCallCost(F, Arguments);
+int TargetTransformInfo::getCallCost(const Function *F,
+                                     ArrayRef<const Value *> Arguments) const {
+  int Cost = TTIImpl->getCallCost(F, Arguments);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned
-TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                                      ArrayRef<const Value *> Arguments) const {
-  return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+int TargetTransformInfo::getIntrinsicCost(
+    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const {
+  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getUserCost(const User *U) const {
-  return TTIImpl->getUserCost(U);
+int TargetTransformInfo::getUserCost(const User *U) const {
+  int Cost = TTIImpl->getUserCost(U);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
 bool TargetTransformInfo::hasBranchDivergence() const {
@@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                         Scale, AddrSpace);
 }
 
-bool TargetTransformInfo::isLegalMaskedStore(Type *DataType,
-                                             int Consecutive) const {
-  return TTIImpl->isLegalMaskedStore(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
+  return TTIImpl->isLegalMaskedStore(DataType);
 }
 
-bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType,
-                                            int Consecutive) const {
-  return TTIImpl->isLegalMaskedLoad(DataType, Consecutive);
+bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
+  return TTIImpl->isLegalMaskedLoad(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
+  return TTIImpl->isLegalMaskedGather(DataType);
+}
+
+bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
+  return TTIImpl->isLegalMaskedGather(DataType);
 }
 
 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
@@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                               bool HasBaseReg,
                                               int64_t Scale,
                                               unsigned AddrSpace) const {
-  return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
-                                       Scale, AddrSpace);
+  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
+                                           Scale, AddrSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
 }
 
+bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
+  return TTIImpl->enableInterleavedAccessVectorization();
+}
+
 TargetTransformInfo::PopcntSupportKind
 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
   return TTIImpl->getPopcntSupport(IntTyWidthInBit);
@@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
   return TTIImpl->haveFastSqrt(Ty);
 }
 
-unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
-  return TTIImpl->getFPOpCost(Ty);
+int TargetTransformInfo::getFPOpCost(Type *Ty) const {
+  int Cost = TTIImpl->getFPOpCost(Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
-  return TTIImpl->getIntImmCost(Imm, Ty);
+int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                            const APInt &Imm, Type *Ty) const {
-  return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                            const APInt &Imm, Type *Ty) const {
-  return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                       const APInt &Imm, Type *Ty) const {
+  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
   return TTIImpl->getMaxInterleaveFactor(VF);
 }
 
-unsigned TargetTransformInfo::getArithmeticInstrCost(
+int TargetTransformInfo::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
     OperandValueProperties Opd2PropInfo) const {
-  return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
-                                         Opd1PropInfo, Opd2PropInfo);
+  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
+                                             Opd1PropInfo, Opd2PropInfo);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty,
-                                             int Index, Type *SubTp) const {
-  return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
+                                        Type *SubTp) const {
+  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                               Type *Src) const {
-  return TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
+                                          Type *Src) const {
+  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
-  return TTIImpl->getCFInstrCost(Opcode);
+int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
+  int Cost = TTIImpl->getCFInstrCost(Opcode);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                                 Type *CondTy) const {
-  return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                            Type *CondTy) const {
+  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                                 unsigned Index) const {
-  return TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                            unsigned Index) const {
+  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                              unsigned Alignment,
-                                              unsigned AddressSpace) const {
-  return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                         unsigned Alignment,
+                                         unsigned AddressSpace) const {
+  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned
-TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
-                                           unsigned Alignment,
-                                           unsigned AddressSpace) const {
-  return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
+                                               unsigned Alignment,
+                                               unsigned AddressSpace) const {
+  int Cost =
+      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getInterleavedMemoryOpCost(
+int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
+                                                Value *Ptr, bool VariableMask,
+                                                unsigned Alignment) const {
+  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
+                                             Alignment);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getInterleavedMemoryOpCost(
     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
     unsigned Alignment, unsigned AddressSpace) const {
-  return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
-                                             Alignment, AddressSpace);
+  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                                 Alignment, AddressSpace);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned
-TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
-                                           ArrayRef<Type *> Tys) const {
-  return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
-}
-
-unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
                                                ArrayRef<Type *> Tys) const {
-  return TTIImpl->getCallInstrCost(F, RetTy, Tys);
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
+                                               ArrayRef<Value *> Args) const {
+  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
+}
+
+int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
+                                          ArrayRef<Type *> Tys) const {
+  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
   return TTIImpl->getNumberOfParts(Tp);
 }
 
-unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp,
-                                                        bool IsComplex) const {
-  return TTIImpl->getAddressComputationCost(Tp, IsComplex);
+int TargetTransformInfo::getAddressComputationCost(Type *Tp,
+                                                   bool IsComplex) const {
+  int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
-unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
-                                               bool IsPairwiseForm) const {
-  return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty,
+                                          bool IsPairwiseForm) const {
+  int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm);
+  assert(Cost >= 0 && "TTI should not produce negative costs!");
+  return Cost;
 }
 
 unsigned
@@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
 }
 
-bool TargetTransformInfo::hasCompatibleFunctionAttributes(
-    const Function *Caller, const Function *Callee) const {
-  return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee);
+bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
+                                              const Function *Callee) const {
+  return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
 TargetTransformInfo::Concept::~Concept() {}
@@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {}
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
 
 TargetIRAnalysis::TargetIRAnalysis(
-    std::function<Result(Function &)> TTICallback)
+    std::function<Result(const Function &)> TTICallback)
     : TTICallback(TTICallback) {}
 
-TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) {
   return TTICallback(F);
 }
 
 char TargetIRAnalysis::PassID;
 
-TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) {
+TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
   return Result(F.getParent()->getDataLayout());
 }
 
@@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
       *PassRegistry::getPassRegistry());
 }
 
-TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) {
+TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
   TTI = TIRA.run(F);
   return *TTI;
 }
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 4e9c6f678ebd..805f3efb0814 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -121,15 +121,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SetVector.h"
 using namespace llvm;
 
 // A handy option for disabling TBAA functionality. The same effect can also be
@@ -138,199 +136,138 @@ using namespace llvm;
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
 
 namespace {
-  /// TBAANode - This is a simple wrapper around an MDNode which provides a
-  /// higher-level interface by hiding the details of how alias analysis
-  /// information is encoded in its operands.
-  class TBAANode {
-    const MDNode *Node;
+/// TBAANode - This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAANode {
+  const MDNode *Node;
 
-  public:
-    TBAANode() : Node(nullptr) {}
-    explicit TBAANode(const MDNode *N) : Node(N) {}
+public:
+  TBAANode() : Node(nullptr) {}
+  explicit TBAANode(const MDNode *N) : Node(N) {}
 
-    /// getNode - Get the MDNode for this TBAANode.
-    const MDNode *getNode() const { return Node; }
+  /// getNode - Get the MDNode for this TBAANode.
+  const MDNode *getNode() const { return Node; }
 
-    /// getParent - Get this TBAANode's Alias tree parent.
-    TBAANode getParent() const {
-      if (Node->getNumOperands() < 2)
-        return TBAANode();
-      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
-      if (!P)
-        return TBAANode();
-      // Ok, this node has a valid parent. Return it.
-      return TBAANode(P);
-    }
+  /// getParent - Get this TBAANode's Alias tree parent.
+  TBAANode getParent() const {
+    if (Node->getNumOperands() < 2)
+      return TBAANode();
+    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+    if (!P)
+      return TBAANode();
+    // Ok, this node has a valid parent. Return it.
+    return TBAANode(P);
+  }
 
-    /// TypeIsImmutable - Test if this TBAANode represents a type for objects
-    /// which are not modified (by any means) in the context where this
-    /// AliasAnalysis is relevant.
-    bool TypeIsImmutable() const {
-      if (Node->getNumOperands() < 3)
-        return false;
-      ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
-      if (!CI)
-        return false;
-      return CI->getValue()[0];
-    }
-  };
+  /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+  /// which are not modified (by any means) in the context where this
+  /// AliasAnalysis is relevant.
+  bool TypeIsImmutable() const {
+    if (Node->getNumOperands() < 3)
+      return false;
+    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
+    if (!CI)
+      return false;
+    return CI->getValue()[0];
+  }
+};
 
-  /// This is a simple wrapper around an MDNode which provides a
-  /// higher-level interface by hiding the details of how alias analysis
-  /// information is encoded in its operands.
-  class TBAAStructTagNode {
-    /// This node should be created with createTBAAStructTagNode.
-    const MDNode *Node;
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTagNode {
+  /// This node should be created with createTBAAStructTagNode.
+  const MDNode *Node;
 
-  public:
-    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+public:
+  explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
 
-    /// Get the MDNode for this TBAAStructTagNode.
-    const MDNode *getNode() const { return Node; }
+  /// Get the MDNode for this TBAAStructTagNode.
+  const MDNode *getNode() const { return Node; }
 
-    const MDNode *getBaseType() const {
-      return dyn_cast_or_null<MDNode>(Node->getOperand(0));
-    }
-    const MDNode *getAccessType() const {
-      return dyn_cast_or_null<MDNode>(Node->getOperand(1));
-    }
-    uint64_t getOffset() const {
-      return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
-    }
-    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
-    /// objects which are not modified (by any means) in the context where this
-    /// AliasAnalysis is relevant.
-    bool TypeIsImmutable() const {
-      if (Node->getNumOperands() < 4)
-        return false;
-      ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
-      if (!CI)
-        return false;
-      return CI->getValue()[0];
-    }
-  };
+  const MDNode *getBaseType() const {
+    return dyn_cast_or_null<MDNode>(Node->getOperand(0));
+  }
+  const MDNode *getAccessType() const {
+    return dyn_cast_or_null<MDNode>(Node->getOperand(1));
+  }
+  uint64_t getOffset() const {
+    return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
+  }
+  /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
+  /// objects which are not modified (by any means) in the context where this
+  /// AliasAnalysis is relevant.
+  bool TypeIsImmutable() const {
+    if (Node->getNumOperands() < 4)
+      return false;
+    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
+    if (!CI)
+      return false;
+    return CI->getValue()[0];
+  }
+};
 
-  /// This is a simple wrapper around an MDNode which provides a
-  /// higher-level interface by hiding the details of how alias analysis
-  /// information is encoded in its operands.
-  class TBAAStructTypeNode {
-    /// This node should be created with createTBAAStructTypeNode.
-    const MDNode *Node;
+/// This is a simple wrapper around an MDNode which provides a
+/// higher-level interface by hiding the details of how alias analysis
+/// information is encoded in its operands.
+class TBAAStructTypeNode {
+  /// This node should be created with createTBAAStructTypeNode.
+  const MDNode *Node;
 
-  public:
-    TBAAStructTypeNode() : Node(nullptr) {}
-    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
+public:
+  TBAAStructTypeNode() : Node(nullptr) {}
+  explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
 
-    /// Get the MDNode for this TBAAStructTypeNode.
-    const MDNode *getNode() const { return Node; }
+  /// Get the MDNode for this TBAAStructTypeNode.
+  const MDNode *getNode() const { return Node; }
 
-    /// Get this TBAAStructTypeNode's field in the type DAG with
-    /// given offset. Update the offset to be relative to the field type.
-    TBAAStructTypeNode getParent(uint64_t &Offset) const {
-      // Parent can be omitted for the root node.
-      if (Node->getNumOperands() < 2)
-        return TBAAStructTypeNode();
+  /// Get this TBAAStructTypeNode's field in the type DAG with
+  /// given offset. Update the offset to be relative to the field type.
+  TBAAStructTypeNode getParent(uint64_t &Offset) const {
+    // Parent can be omitted for the root node.
+    if (Node->getNumOperands() < 2)
+      return TBAAStructTypeNode();
 
-      // Fast path for a scalar type node and a struct type node with a single
-      // field.
-      if (Node->getNumOperands() <= 3) {
-        uint64_t Cur = Node->getNumOperands() == 2
-                           ? 0
-                           : mdconst::extract<ConstantInt>(Node->getOperand(2))
-                                 ->getZExtValue();
-        Offset -= Cur;
-        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
-        if (!P)
-          return TBAAStructTypeNode();
-        return TBAAStructTypeNode(P);
-      }
-
-      // Assume the offsets are in order. We return the previous field if
-      // the current offset is bigger than the given offset.
-      unsigned TheIdx = 0;
-      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
-        uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
-                           ->getZExtValue();
-        if (Cur > Offset) {
-          assert(Idx >= 3 &&
-                 "TBAAStructTypeNode::getParent should have an offset match!");
-          TheIdx = Idx - 2;
-          break;
-        }
-      }
-      // Move along the last field.
-      if (TheIdx == 0)
-        TheIdx = Node->getNumOperands() - 2;
-      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
-                         ->getZExtValue();
+    // Fast path for a scalar type node and a struct type node with a single
+    // field.
+    if (Node->getNumOperands() <= 3) {
+      uint64_t Cur = Node->getNumOperands() == 2
+                         ? 0
+                         : mdconst::extract<ConstantInt>(Node->getOperand(2))
+                               ->getZExtValue();
       Offset -= Cur;
-      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
       if (!P)
         return TBAAStructTypeNode();
       return TBAAStructTypeNode(P);
     }
-  };
-}
 
-namespace {
-  /// TypeBasedAliasAnalysis - This is a simple alias analysis
-  /// implementation that uses TypeBased to answer queries.
-  class TypeBasedAliasAnalysis : public ImmutablePass,
-                                 public AliasAnalysis {
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
-      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    // Assume the offsets are in order. We return the previous field if
+    // the current offset is bigger than the given offset.
+    unsigned TheIdx = 0;
+    for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
+      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
+                         ->getZExtValue();
+      if (Cur > Offset) {
+        assert(Idx >= 3 &&
+               "TBAAStructTypeNode::getParent should have an offset match!");
+        TheIdx = Idx - 2;
+        break;
+      }
     }
-
-    bool doInitialization(Module &M) override;
-
-    /// getAdjustedAnalysisPointer - This method is used when a pass implements
-    /// an analysis interface through multiple inheritance.  If needed, it
-    /// should override this to adjust the this pointer as needed for the
-    /// specified pass info.
-    void *getAdjustedAnalysisPointer(const void *PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return (AliasAnalysis*)this;
-      return this;
-    }
-
-    bool Aliases(const MDNode *A, const MDNode *B) const;
-    bool PathAliases(const MDNode *A, const MDNode *B) const;
-
-  private:
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override;
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override;
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-    ModRefBehavior getModRefBehavior(const Function *F) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override;
-  };
-}  // End of anonymous namespace
-
-// Register this pass...
-char TypeBasedAliasAnalysis::ID = 0;
-INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
-                   "Type-Based Alias Analysis", false, true, false)
-
-ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
-  return new TypeBasedAliasAnalysis();
-}
-
-bool TypeBasedAliasAnalysis::doInitialization(Module &M) {
-  InitializeAliasAnalysis(this, &M.getDataLayout());
-  return true;
-}
-
-void
-TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AliasAnalysis::getAnalysisUsage(AU);
+    // Move along the last field.
+    if (TheIdx == 0)
+      TheIdx = Node->getNumOperands() - 2;
+    uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
+                       ->getZExtValue();
+    Offset -= Cur;
+    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
+    if (!P)
+      return TBAAStructTypeNode();
+    return TBAAStructTypeNode(P);
+  }
+};
 }
 
 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
@@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) {
   return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
 }
 
-/// Aliases - Test whether the type represented by A may alias the
-/// type represented by B.
-bool
-TypeBasedAliasAnalysis::Aliases(const MDNode *A,
-                                const MDNode *B) const {
-  // Make sure that both MDNodes are struct-path aware.
-  if (isStructPathTBAA(A) && isStructPathTBAA(B))
-    return PathAliases(A, B);
-
-  // Keep track of the root node for A and B.
-  TBAANode RootA, RootB;
-
-  // Climb the tree from A to see if we reach B.
-  for (TBAANode T(A); ; ) {
-    if (T.getNode() == B)
-      // B is an ancestor of A.
-      return true;
-
-    RootA = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Climb the tree from B to see if we reach A.
-  for (TBAANode T(B); ; ) {
-    if (T.getNode() == A)
-      // A is an ancestor of B.
-      return true;
-
-    RootB = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Neither node is an ancestor of the other.
-  
-  // If they have different roots, they're part of different potentially
-  // unrelated type systems, so we must be conservative.
-  if (RootA.getNode() != RootB.getNode())
-    return true;
-
-  // If they have the same root, then we've proved there's no alias.
-  return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool
-TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
-                                    const MDNode *B) const {
-  // Verify that both input nodes are struct-path aware.
-  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
-  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
-
-  // Keep track of the root node for A and B.
-  TBAAStructTypeNode RootA, RootB;
-  TBAAStructTagNode TagA(A), TagB(B);
-
-  // TODO: We need to check if AccessType of TagA encloses AccessType of
-  // TagB to support aggregate AccessType. If yes, return true.
-
-  // Start from the base type of A, follow the edge with the correct offset in
-  // the type DAG and adjust the offset until we reach the base type of B or
-  // until we reach the Root node.
-  // Compare the adjusted offset once we have the same base.
-
-  // Climb the type DAG from base type of A to see if we reach base type of B.
-  const MDNode *BaseA = TagA.getBaseType();
-  const MDNode *BaseB = TagB.getBaseType();
-  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
-  for (TBAAStructTypeNode T(BaseA); ; ) {
-    if (T.getNode() == BaseB)
-      // Base type of A encloses base type of B, check if the offsets match.
-      return OffsetA == OffsetB;
-
-    RootA = T;
-    // Follow the edge with the correct offset, OffsetA will be adjusted to
-    // be relative to the field type.
-    T = T.getParent(OffsetA);
-    if (!T.getNode())
-      break;
-  }
-
-  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
-  // base type of A.
-  OffsetA = TagA.getOffset();
-  for (TBAAStructTypeNode T(BaseB); ; ) {
-    if (T.getNode() == BaseA)
-      // Base type of B encloses base type of A, check if the offsets match.
-      return OffsetA == OffsetB;
-
-    RootB = T;
-    // Follow the edge with the correct offset, OffsetB will be adjusted to
-    // be relative to the field type.
-    T = T.getParent(OffsetB);
-    if (!T.getNode())
-      break;
-  }
-
-  // Neither node is an ancestor of the other.
-
-  // If they have different roots, they're part of different potentially
-  // unrelated type systems, so we must be conservative.
-  if (RootA.getNode() != RootB.getNode())
-    return true;
-
-  // If they have the same root, then we've proved there's no alias.
-  return false;
-}
-
-AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA,
-                                          const MemoryLocation &LocB) {
+AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
+                                     const MemoryLocation &LocB) {
   if (!EnableTBAA)
-    return AliasAnalysis::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB);
 
   // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
   // be conservative.
   const MDNode *AM = LocA.AATags.TBAA;
-  if (!AM) return AliasAnalysis::alias(LocA, LocB);
+  if (!AM)
+    return AAResultBase::alias(LocA, LocB);
   const MDNode *BM = LocB.AATags.TBAA;
-  if (!BM) return AliasAnalysis::alias(LocA, LocB);
+  if (!BM)
+    return AAResultBase::alias(LocA, LocB);
 
   // If they may alias, chain to the next AliasAnalysis.
   if (Aliases(AM, BM))
-    return AliasAnalysis::alias(LocA, LocB);
+    return AAResultBase::alias(LocA, LocB);
 
   // Otherwise return a definitive result.
   return NoAlias;
 }
 
-bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
-                                                    bool OrLocal) {
+bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
+                                               bool OrLocal) {
   if (!EnableTBAA)
-    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
 
   const MDNode *M = Loc.AATags.TBAA;
-  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+  if (!M)
+    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
@@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,
       (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
     return true;
 
-  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+  return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
 }
 
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+FunctionModRefBehavior
+TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
   if (!EnableTBAA)
-    return AliasAnalysis::getModRefBehavior(CS);
+    return AAResultBase::getModRefBehavior(CS);
 
-  ModRefBehavior Min = UnknownModRefBehavior;
+  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
 
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
     if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
         (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
-      Min = OnlyReadsMemory;
+      Min = FMRB_OnlyReadsMemory;
 
-  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
 }
 
-AliasAnalysis::ModRefBehavior
-TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
   // Functions don't have metadata. Just chain to the next implementation.
-  return AliasAnalysis::getModRefBehavior(F);
+  return AAResultBase::getModRefBehavior(F);
 }
 
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
-                                      const MemoryLocation &Loc) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
+                                            const MemoryLocation &Loc) {
   if (!EnableTBAA)
-    return AliasAnalysis::getModRefInfo(CS, Loc);
+    return AAResultBase::getModRefInfo(CS, Loc);
 
   if (const MDNode *L = Loc.AATags.TBAA)
     if (const MDNode *M =
             CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(L, M))
-        return NoModRef;
+        return MRI_NoModRef;
 
-  return AliasAnalysis::getModRefInfo(CS, Loc);
+  return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-AliasAnalysis::ModRefResult
-TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
-                                      ImmutableCallSite CS2) {
+ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
+                                            ImmutableCallSite CS2) {
   if (!EnableTBAA)
-    return AliasAnalysis::getModRefInfo(CS1, CS2);
+    return AAResultBase::getModRefInfo(CS1, CS2);
 
   if (const MDNode *M1 =
           CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
     if (const MDNode *M2 =
             CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
       if (!Aliases(M1, M2))
-        return NoModRef;
+        return MRI_NoModRef;
 
-  return AliasAnalysis::getModRefInfo(CS1, CS2);
+  return AAResultBase::getModRefInfo(CS1, CS2);
 }
 
 bool MDNode::isTBAAVtableAccess() const {
   if (!isStructPathTBAA(this)) {
-    if (getNumOperands() < 1) return false;
+    if (getNumOperands() < 1)
+      return false;
     if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
-      if (Tag1->getString() == "vtable pointer") return true;
+      if (Tag1->getString() == "vtable pointer")
+        return true;
     }
     return false;
   }
 
   // For struct-path aware TBAA, we use the access type of the tag.
-  if (getNumOperands() < 2) return false;
+  if (getNumOperands() < 2)
+    return false;
   MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
-  if (!Tag) return false;
+  if (!Tag)
+    return false;
   if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
-    if (Tag1->getString() == "vtable pointer") return true;
+    if (Tag1->getString() == "vtable pointer")
+      return true;
   }
-  return false;  
+  return false;
 }
 
 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
@@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
   bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
   if (StructPath) {
     A = cast_or_null<MDNode>(A->getOperand(1));
-    if (!A) return nullptr;
+    if (!A)
+      return nullptr;
     B = cast_or_null<MDNode>(B->getOperand(1));
-    if (!B) return nullptr;
+    if (!B)
+      return nullptr;
   }
 
   SmallSetVector<MDNode *, 4> PathA;
@@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
   int IB = PathB.size() - 1;
 
   MDNode *Ret = nullptr;
-  while (IA >= 0 && IB >=0) {
+  while (IA >= 0 && IB >= 0) {
     if (PathA[IA] == PathB[IB])
       Ret = PathA[IA];
     else
@@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
     N.NoAlias = getMetadata(LLVMContext::MD_noalias);
 }
 
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
+  // Make sure that both MDNodes are struct-path aware.
+  if (isStructPathTBAA(A) && isStructPathTBAA(B))
+    return PathAliases(A, B);
+
+  // Keep track of the root node for A and B.
+  TBAANode RootA, RootB;
+
+  // Climb the tree from A to see if we reach B.
+  for (TBAANode T(A);;) {
+    if (T.getNode() == B)
+      // B is an ancestor of A.
+      return true;
+
+    RootA = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Climb the tree from B to see if we reach A.
+  for (TBAANode T(B);;) {
+    if (T.getNode() == A)
+      // A is an ancestor of B.
+      return true;
+
+    RootB = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+/// Test whether the struct-path tag represented by A may alias the
+/// struct-path tag represented by B.
+bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
+  // Verify that both input nodes are struct-path aware.
+  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
+  // Keep track of the root node for A and B.
+  TBAAStructTypeNode RootA, RootB;
+  TBAAStructTagNode TagA(A), TagB(B);
+
+  // TODO: We need to check if AccessType of TagA encloses AccessType of
+  // TagB to support aggregate AccessType. If yes, return true.
+
+  // Start from the base type of A, follow the edge with the correct offset in
+  // the type DAG and adjust the offset until we reach the base type of B or
+  // until we reach the Root node.
+  // Compare the adjusted offset once we have the same base.
+
+  // Climb the type DAG from base type of A to see if we reach base type of B.
+  const MDNode *BaseA = TagA.getBaseType();
+  const MDNode *BaseB = TagB.getBaseType();
+  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
+  for (TBAAStructTypeNode T(BaseA);;) {
+    if (T.getNode() == BaseB)
+      // Base type of A encloses base type of B, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootA = T;
+    // Follow the edge with the correct offset, OffsetA will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetA);
+    if (!T.getNode())
+      break;
+  }
+
+  // Reset OffsetA and climb the type DAG from base type of B to see if we reach
+  // base type of A.
+  OffsetA = TagA.getOffset();
+  for (TBAAStructTypeNode T(BaseB);;) {
+    if (T.getNode() == BaseA)
+      // Base type of B encloses base type of A, check if the offsets match.
+      return OffsetA == OffsetB;
+
+    RootB = T;
+    // Follow the edge with the correct offset, OffsetB will be adjusted to
+    // be relative to the field type.
+    T = T.getParent(OffsetB);
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) {
+  return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F));
+}
+
+char TypeBasedAA::PassID;
+
+char TypeBasedAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa",
+                      "Type-Based Alias Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
+                    false, true)
+
+ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
+  return new TypeBasedAAWrapperPass();
+}
+
+TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
+  initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
+  Result.reset(new TypeBasedAAResult(
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI()));
+  return false;
+}
+
+bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
+  Result.reset();
+  return false;
+}
+
+void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index fa0d7798cae9..314ec9c1886e 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -43,7 +44,7 @@ const unsigned MaxDepth = 6;
 
 /// Enable an experimental feature to leverage information about dominating
 /// conditions to compute known bits.  The individual options below control how
-/// hard we search.  The defaults are choosen to be fairly aggressive.  If you
+/// hard we search.  The defaults are chosen to be fairly aggressive.  If you
 /// run into compile time problems when testing, scale them back and report
 /// your findings.
 static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
@@ -58,12 +59,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
 /// conditions?
 static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
                                                    cl::Hidden,
-                                                   cl::init(20000));
+                                                   cl::init(20));
 
 // Controls the number of uses of the value searched for possible
 // dominating comparisons.
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
-                                              cl::Hidden, cl::init(2000));
+                                              cl::Hidden, cl::init(20));
 
 // If true, don't consider only compares whose only use is a branch.
 static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use",
@@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
   return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));
 }
 
+bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+                              AssumptionCache *AC, const Instruction *CxtI,
+                              const DominatorTree *DT) {
+  bool NonNegative, Negative;
+  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT);
+  return NonNegative;
+}
+
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                           const Query &Q);
+
+bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                          AssumptionCache *AC, const Instruction *CxtI,
+                          const DominatorTree *DT) {
+  return ::isKnownNonEqual(V1, V2, DL, Query(AC,
+                                             safeCxtI(V1, safeCxtI(V2, CxtI)),
+                                             DT));
+}
+
 static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
                               unsigned Depth, const Query &Q);
 
@@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
   }
 
   // If low bits are zero in either operand, output low known-0 bits.
-  // Also compute a conserative estimate for high known-0 bits.
+  // Also compute a conservative estimate for high known-0 bits.
   // More trickiness is possible, but this is sufficient for the
   // interesting case of alignment computation.
   KnownOne.clearAllBits();
@@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
 }
 
 void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
-                                             APInt &KnownZero) {
+                                             APInt &KnownZero,
+                                             APInt &KnownOne) {
   unsigned BitWidth = KnownZero.getBitWidth();
   unsigned NumRanges = Ranges.getNumOperands() / 2;
   assert(NumRanges >= 1);
 
-  // Use the high end of the ranges to find leading zeros.
-  unsigned MinLeadingZeros = BitWidth;
+  KnownZero.setAllBits();
+  KnownOne.setAllBits();
+
   for (unsigned i = 0; i < NumRanges; ++i) {
     ConstantInt *Lower =
         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
     ConstantInt *Upper =
         mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
     ConstantRange Range(Lower->getValue(), Upper->getValue());
-    if (Range.isWrappedSet())
-      MinLeadingZeros = 0; // -1 has no zeros
-    unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros();
-    MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros);
-  }
 
-  KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
+    // The first CommonPrefixBits of all values in Range are equal.
+    unsigned CommonPrefixBits =
+        (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
+
+    APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
+    KnownOne &= Range.getUnsignedMax() & Mask;
+    KnownZero &= ~Range.getUnsignedMax() & Mask;
+  }
 }
 
 static bool isEphemeralValueOf(Instruction *I, const Value *E) {
@@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
   SmallPtrSet<const Value *, 32> Visited;
   SmallPtrSet<const Value *, 16> EphValues;
 
+  // The instruction defining an assumption's condition itself is always
+  // considered ephemeral to that assumption (even if it has other
+  // non-ephemeral users). See r246696's test case for an example.
+  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+    return true;
+
   while (!WorkSet.empty()) {
     const Value *V = WorkSet.pop_back_val();
     if (!Visited.insert(V).second)
       continue;
 
     // If all uses of this value are ephemeral, then so is this value.
-    bool FoundNEUse = false;
-    for (const User *I : V->users())
-      if (!EphValues.count(I)) {
-        FoundNEUse = true;
-        break;
-      }
-
-    if (!FoundNEUse) {
+    if (std::all_of(V->user_begin(), V->user_end(),
+                    [&](const User *U) { return EphValues.count(U); })) {
       if (V == E)
         return true;
 
@@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
       for (BasicBlock::const_iterator I =
              std::next(BasicBlock::const_iterator(Q.CxtI)),
                                       IE(Inv); I != IE; ++I)
-        if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
           return false;
 
       return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {
     // of the block); the common case is that the assume will come first.
     for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
          IE = Inv->getParent()->end(); I != IE; ++I)
-      if (I == Q.CxtI)
+      if (&*I == Q.CxtI)
         return true;
 
     // The context must come first...
     for (BasicBlock::const_iterator I =
            std::next(BasicBlock::const_iterator(Q.CxtI)),
                                     IE(Inv); I != IE; ++I)
-      if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I))
+      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
         return false;
 
     return !isEphemeralValueOf(Inv, Q.CxtI);
@@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
   if (!Q.DT || !Q.CxtI)
     return;
   Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
+  // The context instruction might be in a statically unreachable block.  If
+  // so, asking dominator queries may yield suprising results.  (e.g. the block
+  // may not have a dom tree node)
+  if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
+    return;
 
   // Avoid useless work
   if (auto VI = dyn_cast<Instruction>(V))
@@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
     // instruction.  Finding a condition where one path dominates the context
     // isn't enough because both the true and false cases could merge before
     // the context instruction we're actually interested in.  Instead, we need
-    // to ensure that the taken *edge* dominates the context instruction.
+    // to ensure that the taken *edge* dominates the context instruction.  We
+    // know that the edge must be reachable since we started from a reachable
+    // block.
     BasicBlock *BB0 = BI->getSuccessor(0);
     BasicBlockEdge Edge(BI->getParent(), BB0);
     if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
@@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
   }
 }
 
+// Compute known bits from a shift operator, including those with a
+// non-constant shift amount. KnownZero and KnownOne are the outputs of this
+// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the
+// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific
+// functors that, given the known-zero or known-one bits respectively, and a
+// shift amount, compute the implied known-zero or known-one bits of the shift
+// operator's result respectively for that shift amount. The results from calling
+// KZF and KOF are conservatively combined for all permitted shift amounts.
+template <typename KZFunctor, typename KOFunctor>
+static void computeKnownBitsFromShiftOperator(Operator *I,
+              APInt &KnownZero, APInt &KnownOne,
+              APInt &KnownZero2, APInt &KnownOne2,
+              const DataLayout &DL, unsigned Depth, const Query &Q,
+              KZFunctor KZF, KOFunctor KOF) {
+  unsigned BitWidth = KnownZero.getBitWidth();
+
+  if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1);
+
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
+    KnownZero = KZF(KnownZero, ShiftAmt);
+    KnownOne  = KOF(KnownOne, ShiftAmt);
+    return;
+  }
+
+  computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q);
+
+  // Note: We cannot use KnownZero.getLimitedValue() here, because if
+  // BitWidth > 64 and any upper bits are known, we'll end up returning the
+  // limit value (which implies all bits are known).
+  uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue();
+  uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue();
+
+  // It would be more-clearly correct to use the two temporaries for this
+  // calculation. Reusing the APInts here to prevent unnecessary allocations.
+  KnownZero.clearAllBits(), KnownOne.clearAllBits();
+
+  // If we know the shifter operand is nonzero, we can sometimes infer more
+  // known bits. However this is expensive to compute, so be lazy about it and
+  // only compute it when absolutely necessary.
+  Optional<bool> ShifterOperandIsNonZero;
+
+  // Early exit if we can't constrain any well-defined shift amount.
+  if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) {
+    ShifterOperandIsNonZero =
+        isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+    if (!*ShifterOperandIsNonZero)
+      return;
+  }
+
+  computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q);
+
+  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
+    // Combine the shifted known input bits only for those shift amounts
+    // compatible with its known constraints.
+    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
+      continue;
+    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
+      continue;
+    // If we know the shifter is nonzero, we may be able to infer more known
+    // bits. This check is sunk down as far as possible to avoid the expensive
+    // call to isKnownNonZero if the cheaper checks above fail.
+    if (ShiftAmt == 0) {
+      if (!ShifterOperandIsNonZero.hasValue())
+        ShifterOperandIsNonZero =
+            isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q);
+      if (*ShifterOperandIsNonZero)
+        continue;
+    }
+
+    KnownZero &= KZF(KnownZero2, ShiftAmt);
+    KnownOne  &= KOF(KnownOne2, ShiftAmt);
+  }
+
+  // If there are no compatible shift amounts, then we've proven that the shift
+  // amount must be >= the BitWidth, and the result is undefined. We could
+  // return anything we'd like, but we need to make sure the sets of known bits
+  // stay disjoint (it should be better for some other code to actually
+  // propagate the undef than to pick a value here using known bits).
+  if ((KnownZero & KnownOne) != 0)
+    KnownZero.clearAllBits(), KnownOne.clearAllBits();
+}
+
 static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
                                          APInt &KnownOne, const DataLayout &DL,
                                          unsigned Depth, const Query &Q) {
@@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   default: break;
   case Instruction::Load:
     if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
     break;
   case Instruction::And: {
     // If either the LHS or the RHS are Zero, the result is zero.
@@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     KnownOne &= KnownOne2;
     // Output known-0 are known to be clear if zero in either the LHS | RHS.
     KnownZero |= KnownZero2;
+
+    // and(x, add (x, -1)) is a common idiom that always clears the low bit;
+    // here we handle the more general case of adding any odd number by
+    // matching the form add(x, add(x, y)) where y is odd.
+    // TODO: This could be generalized to clearing any bit set in y where the
+    // following bit is known to be unset in y.
+    Value *Y = nullptr;
+    if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)),
+                                      m_Value(Y))) ||
+        match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)),
+                                      m_Value(Y)))) {
+      APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0);
+      computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q);
+      if (KnownOne3.countTrailingOnes() > 0)
+        KnownZero |= APInt::getLowBitsSet(BitWidth, 1);
+    }
     break;
   }
   case Instruction::Or: {
@@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
   case Instruction::BitCast: {
     Type *SrcTy = I->getOperand(0)->getType();
-    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() ||
+         SrcTy->isFloatingPointTy()) &&
         // TODO: For now, not handling conversions like:
         // (bitcast i64 %x to <2 x i32>)
         !I->getType()->isVectorTy()) {
@@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
       KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
     break;
   }
-  case Instruction::Shl:
+  case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero <<= ShiftAmt;
-      KnownOne  <<= ShiftAmt;
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
-    }
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return (KnownZero << ShiftAmt) |
+             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    };
+
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return KnownOne << ShiftAmt;
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
-  case Instruction::LShr:
+  }
+  case Instruction::LShr: {
     // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownZero, ShiftAmt) |
+             // High bits known zero.
+             APInt::getHighBitsSet(BitWidth, ShiftAmt);
+    };
 
-      // Unsigned shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
-      // high bits known zero.
-      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-    }
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::lshr(KnownOne, ShiftAmt);
+    };
+
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
-  case Instruction::AShr:
+  }
+  case Instruction::AShr: {
     // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      // Compute the new bits that are at the top now.
-      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownZero, ShiftAmt);
+    };
 
-      // Signed shift right.
-      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q);
-      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
-      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
+      return APIntOps::ashr(KnownOne, ShiftAmt);
+    };
 
-      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
-      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
-        KnownZero |= HighBits;
-      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
-        KnownOne |= HighBits;
-    }
+    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
+                                      KnownZero2, KnownOne2, DL, Depth, Q,
+                                      KZF, KOF);
     break;
+  }
   case Instruction::Sub: {
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
@@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   case Instruction::Call:
   case Instruction::Invoke:
     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
-      computeKnownBitsFromRangeMetadata(*MD, KnownZero);
+      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
     // If a range metadata is attached to this IntrinsicInst, intersect the
     // explicit range specified by the metadata and the implicit range of
     // the intrinsic.
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
+      case Intrinsic::bswap:
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        KnownZero |= KnownZero2.byteSwap();
+        KnownOne |= KnownOne2.byteSwap();
+        break;
       case Intrinsic::ctlz:
       case Intrinsic::cttz: {
         unsigned LowBits = Log2_32(BitWidth)+1;
@@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
         break;
       }
       case Intrinsic::ctpop: {
-        unsigned LowBits = Log2_32(BitWidth)+1;
-        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL,
+                         Depth + 1, Q);
+        // We can bound the space the count needs.  Also, bits known to be zero
+        // can't contribute to the population.
+        unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation();
+        unsigned LeadingZeros =
+          APInt(BitWidth, BitsPossiblySet).countLeadingZeros();
+        assert(LeadingZeros <= BitWidth);
+        KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros);
+        KnownOne &= ~KnownZero;
+        // TODO: we could bound KnownOne using the lower bound on the number
+        // of bits which might be set provided by popcnt KnownOne2.
+        break;
+      }
+      case Intrinsic::fabs: {
+        Type *Ty = II->getType();
+        APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits());
+        KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);
         break;
       }
       case Intrinsic::x86_sse42_crc32_64_64:
@@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
 }
 
+static unsigned getAlignment(const Value *V, const DataLayout &DL) {
+  unsigned Align = 0;
+  if (auto *GO = dyn_cast<GlobalObject>(V)) {
+    Align = GO->getAlignment();
+    if (Align == 0) {
+      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
+        Type *ObjectType = GVar->getType()->getElementType();
+        if (ObjectType->isSized()) {
+          // If the object is defined in the current Module, we'll be giving
+          // it the preferred alignment. Otherwise, we have to assume that it
+          // may only have the minimum ABI alignment.
+          if (GVar->isStrongDefinitionForLinker())
+            Align = DL.getPreferredAlignment(GVar);
+          else
+            Align = DL.getABITypeAlignment(ObjectType);
+        }
+      }
+    }
+  } else if (const Argument *A = dyn_cast<Argument>(V)) {
+    Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
+
+    if (!Align && A->hasStructRetAttr()) {
+      // An sret parameter has at least the ABI alignment of the return type.
+      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
+      if (EltTy->isSized())
+        Align = DL.getABITypeAlignment(EltTy);
+    }
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    Align = AI->getAlignment();
+  else if (auto CS = ImmutableCallSite(V))
+    Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex);
+  else if (const LoadInst *LI = dyn_cast<LoadInst>(V))
+    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) {
+      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0));
+      Align = CI->getLimitedValue();
+    }
+
+  return Align;
+}
+
 /// Determine which bits of V are known to be either zero or one and return
 /// them in the KnownZero/KnownOne bit sets.
 ///
@@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   unsigned BitWidth = KnownZero.getBitWidth();
 
   assert((V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->isFPOrFPVectorTy() ||
           V->getType()->getScalarType()->isPointerTy()) &&
-         "Not integer or pointer type!");
+         "Not integer, floating point, or pointer type!");
   assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
          (!V->getType()->isIntOrIntVectorTy() ||
           V->getType()->getScalarSizeInBits() == BitWidth) &&
@@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     return;
   }
 
-  // The address of an aligned GlobalValue has trailing zeros.
-  if (auto *GO = dyn_cast<GlobalObject>(V)) {
-    unsigned Align = GO->getAlignment();
-    if (Align == 0) {
-      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) {
-        Type *ObjectType = GVar->getType()->getElementType();
-        if (ObjectType->isSized()) {
-          // If the object is defined in the current Module, we'll be giving
-          // it the preferred alignment. Otherwise, we have to assume that it
-          // may only have the minimum ABI alignment.
-          if (GVar->isStrongDefinitionForLinker())
-            Align = DL.getPreferredAlignment(GVar);
-          else
-            Align = DL.getABITypeAlignment(ObjectType);
-        }
-      }
-    }
-    if (Align > 0)
-      KnownZero = APInt::getLowBitsSet(BitWidth,
-                                       countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-    return;
-  }
-
-  if (Argument *A = dyn_cast<Argument>(V)) {
-    unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0;
-
-    if (!Align && A->hasStructRetAttr()) {
-      // An sret parameter has at least the ABI alignment of the return type.
-      Type *EltTy = cast<PointerType>(A->getType())->getElementType();
-      if (EltTy->isSized())
-        Align = DL.getABITypeAlignment(EltTy);
-    }
-
-    if (Align)
-      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
-    else
-      KnownZero.clearAllBits();
-    KnownOne.clearAllBits();
-
-    // Don't give up yet... there might be an assumption that provides more
-    // information...
-    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q);
-
-    // Or a dominating condition for that matter
-    if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
-      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL,
-                                              Depth, Q);
-    return;
-  }
-
   // Start out not knowing anything.
   KnownZero.clearAllBits(); KnownOne.clearAllBits();
 
@@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
   if (Operator *I = dyn_cast<Operator>(V))
     computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q);
+
+  // Aligned pointers have trailing zeros - refine KnownZero set
+  if (V->getType()->isPointerTy()) {
+    unsigned Align = getAlignment(V, DL);
+    if (Align)
+      KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
+  }
+
   // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
   // strictly refines KnownZero and KnownOne. Therefore, we run them after
   // computeKnownBitsFromOperator.
@@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
     ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);
     if (XKnownNegative)
       return true;
+
+    // If the shifter operand is a constant, and all of the bits shifted
+    // out are known to be zero, and X is known non-zero then at least one
+    // non-zero bit must remain.
+    if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q);
+      
+      auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
+      // Is there a known one in the portion not shifted out?
+      if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal)
+        return true;
+      // Are all the bits to be shifted out known zero?
+      if (KnownZero.countTrailingOnes() >= ShiftVal)
+        return isKnownNonZero(X, DL, Depth, Q);
+    }
   }
   // div exact can only produce a zero if the dividend is zero.
   else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
@@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
         isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))
       return true;
   }
+  // PHI
+  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // Try and detect a recurrence that monotonically increases from a
+    // starting value, as these are common as induction variables.
+    if (PN->getNumIncomingValues() == 2) {
+      Value *Start = PN->getIncomingValue(0);
+      Value *Induction = PN->getIncomingValue(1);
+      if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start))
+        std::swap(Start, Induction);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) {
+        if (!C->isZero() && !C->isNegative()) {
+          ConstantInt *X;
+          if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) ||
+               match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) &&
+              !X->isNegative())
+            return true;
+        }
+      }
+    }
+  }
 
   if (!BitWidth) return false;
   APInt KnownZero(BitWidth, 0);
@@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
   return KnownOne != 0;
 }
 
+/// Return true if V2 == V1 + X, where X is known non-zero.
+static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL,
+                           const Query &Q) {
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+  if (!BO || BO->getOpcode() != Instruction::Add)
+    return false;
+  Value *Op = nullptr;
+  if (V2 == BO->getOperand(0))
+    Op = BO->getOperand(1);
+  else if (V2 == BO->getOperand(1))
+    Op = BO->getOperand(0);
+  else
+    return false;
+  return isKnownNonZero(Op, DL, 0, Q);
+}
+
+/// Return true if it is known that V1 != V2.
+static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+                            const Query &Q) {
+  if (V1->getType()->isVectorTy() || V1 == V2)
+    return false;
+  if (V1->getType() != V2->getType())
+    // We can't look through casts yet.
+    return false;
+  if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q))
+    return true;
+
+  if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) {
+    // Are any known bits in V1 contradictory to known bits in V2? If V1
+    // has a known zero where V2 has a known one, they must not be equal.
+    auto BitWidth = Ty->getBitWidth();
+    APInt KnownZero1(BitWidth, 0);
+    APInt KnownOne1(BitWidth, 0);
+    computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q);
+    APInt KnownZero2(BitWidth, 0);
+    APInt KnownOne2(BitWidth, 0);
+    computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q);
+
+    auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1);
+    if (OppositeBits.getBoolValue())
+      return true;
+  }
+  return false;
+}
+
 /// Return true if 'V & Mask' is known to be zero.  We use this predicate to
 /// simplify operations downstream. Mask is known to be zero for bits that V
 /// cannot have.
@@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
       }
 
       // This insert value inserts something else than what we are looking for.
-      // See if the (aggregrate) value inserted into has the value we are
+      // See if the (aggregate) value inserted into has the value we are
       // looking for, then.
       if (*req_idx != *i)
         return FindInsertedValue(I->getAggregateOperand(), idx_range,
@@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
   }
 
   if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
-    // If we're extracting a value from an aggregrate that was extracted from
+    // If we're extracting a value from an aggregate that was extracted from
     // something else, we can extract from that something else directly instead.
     // However, we will need to chain I's indices with the requested indices.
 
@@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,
   return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);
 }
 
-/// Return true if Value is always a dereferenceable pointer.
-///
+static bool isAligned(const Value *Base, APInt Offset, unsigned Align,
+                      const DataLayout &DL) {
+  APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL));
+
+  if (!BaseAlign) {
+    Type *Ty = Base->getType()->getPointerElementType();
+    if (!Ty->isSized())
+      return false;
+    BaseAlign = DL.getABITypeAlignment(Ty);
+  }
+
+  APInt Alignment(Offset.getBitWidth(), Align);
+
+  assert(Alignment.isPowerOf2() && "must be a power of 2!");
+  return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1));
+}
+
+static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) {
+  Type *Ty = Base->getType();
+  assert(Ty->isSized() && "must be sized");
+  APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0);
+  return isAligned(Base, Offset, Align, DL);
+}
+
 /// Test if V is always a pointer to allocated and suitably aligned memory for
 /// a simple load or store.
-static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
-                                     const Instruction *CtxI,
-                                     const DominatorTree *DT,
-                                     const TargetLibraryInfo *TLI,
-                                     SmallPtrSetImpl<const Value *> &Visited) {
+static bool isDereferenceableAndAlignedPointer(
+    const Value *V, unsigned Align, const DataLayout &DL,
+    const Instruction *CtxI, const DominatorTree *DT,
+    const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {
   // Note that it is not safe to speculate into a malloc'd region because
   // malloc may return null.
 
-  // These are obviously ok.
-  if (isa<AllocaInst>(V)) return true;
+  // These are obviously ok if aligned.
+  if (isa<AllocaInst>(V))
+    return isAligned(V, Align, DL);
 
   // It's not always safe to follow a bitcast, for example:
   //   bitcast i8* (alloca i8) to i32*
@@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
     if (STy->isSized() && DTy->isSized() &&
         (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&
         (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy)))
-      return isDereferenceablePointer(BC->getOperand(0), DL, CtxI,
-                                      DT, TLI, Visited);
+      return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL,
+                                                CtxI, DT, TLI, Visited);
   }
 
   // Global variables which can't collapse to null are ok.
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
-    return !GV->hasExternalWeakLinkage();
+    if (!GV->hasExternalWeakLinkage())
+      return isAligned(V, Align, DL);
 
   // byval arguments are okay.
   if (const Argument *A = dyn_cast<Argument>(V))
     if (A->hasByValAttr())
-      return true;
-    
+      return isAligned(V, Align, DL);
+
   if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI))
-    return true;
+    return isAligned(V, Align, DL);
 
   // For GEPs, determine if the indexing lands within the allocated object.
   if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
     Type *Ty = VTy->getPointerElementType();
     const Value *Base = GEP->getPointerOperand();
 
-    // Conservatively require that the base pointer be fully dereferenceable.
+    // Conservatively require that the base pointer be fully dereferenceable
+    // and aligned.
     if (!Visited.insert(Base).second)
       return false;
-    if (!isDereferenceablePointer(Base, DL, CtxI,
-                                  DT, TLI, Visited))
+    if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI,
+                                            Visited))
       return false;
-    
+
     APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);
     if (!GEP->accumulateConstantOffset(DL, Offset))
       return false;
-    
-    // Check if the load is within the bounds of the underlying object.
+
+    // Check if the load is within the bounds of the underlying object
+    // and offset is aligned.
     uint64_t LoadSize = DL.getTypeStoreSize(Ty);
     Type *BaseType = Base->getType()->getPointerElementType();
-    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType));
+    assert(isPowerOf2_32(Align) && "must be a power of 2!");
+    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) && 
+           !(Offset & APInt(Offset.getBitWidth(), Align-1));
   }
 
   // For gc.relocate, look through relocations
   if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))
     if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {
       GCRelocateOperands RelocateInst(I);
-      return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI,
-                                      DT, TLI, Visited);
+      return isDereferenceableAndAlignedPointer(
+          RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);
     }
 
   if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V))
-    return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI,
-                                    DT, TLI, Visited);
+    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL,
+                                              CtxI, DT, TLI, Visited);
 
   // If we don't know, assume the worst.
   return false;
 }
 
-bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
-                                    const Instruction *CtxI,
-                                    const DominatorTree *DT,
-                                    const TargetLibraryInfo *TLI) {
+bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
+                                              const DataLayout &DL,
+                                              const Instruction *CtxI,
+                                              const DominatorTree *DT,
+                                              const TargetLibraryInfo *TLI) {
   // When dereferenceability information is provided by a dereferenceable
   // attribute, we know exactly how many bytes are dereferenceable. If we can
   // determine the exact offset to the attributed variable, we can use that
   // information here.
   Type *VTy = V->getType();
   Type *Ty = VTy->getPointerElementType();
+
+  // Require ABI alignment for loads without alignment specification
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(Ty);
+
   if (Ty->isSized()) {
     APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);
     const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
-    
+
     if (Offset.isNonNegative())
-      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL,
-                                         CtxI, DT, TLI))
+      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) &&
+          isAligned(BV, Offset, Align, DL))
         return true;
   }
 
   SmallPtrSet<const Value *, 32> Visited;
-  return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited);
+  return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI,
+                                              Visited);
+}
+
+bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL,
+                                    const Instruction *CtxI,
+                                    const DominatorTree *DT,
+                                    const TargetLibraryInfo *TLI) {
+  return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);
 }
 
 bool llvm::isSafeToSpeculativelyExecute(const Value *V,
@@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
     const LoadInst *LI = cast<LoadInst>(Inst);
     if (!LI->isUnordered() ||
         // Speculative load may create a race that did not exist in the source.
-        LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeThread) ||
+        // Speculative load may load data from dirty regions.
+        LI->getParent()->getParent()->hasFnAttribute(
+            Attribute::SanitizeAddress))
       return false;
     const DataLayout &DL = LI->getModule()->getDataLayout();
-    return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI);
+    return isDereferenceableAndAlignedPointer(
+        LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);
   }
   case Instruction::Call: {
     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
   case Instruction::Switch:
   case Instruction::Unreachable:
   case Instruction::Fence:
-  case Instruction::LandingPad:
   case Instruction::AtomicRMW:
   case Instruction::AtomicCmpXchg:
+  case Instruction::LandingPad:
   case Instruction::Resume:
+  case Instruction::CatchSwitch:
+  case Instruction::CatchPad:
+  case Instruction::CatchRet:
+  case Instruction::CleanupPad:
+  case Instruction::CleanupRet:
     return false; // Misc instructions which have effects
   }
 }
 
+bool llvm::mayBeMemoryDependent(const Instruction &I) {
+  return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I);
+}
+
 /// Return true if we know that the specified value is never null.
 bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
   // Alloca never returns null, malloc might.
   if (isa<AllocaInst>(V)) return true;
 
@@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
   if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
 
-  // Global values are not null unless extern weak.
+  // A global variable in address space 0 is non null unless extern weak.
+  // Other address spaces may have null as a valid address for a global,
+  // so we can't assume anything.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage();
+    return !GV->hasExternalWeakLinkage() &&
+           GV->getType()->getAddressSpace() == 0;
 
   // A Load tagged w/nonnull metadata is never null. 
   if (const LoadInst *LI = dyn_cast<LoadInst>(V))
@@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
 static bool isKnownNonNullFromDominatingCondition(const Value *V,
                                                   const Instruction *CtxI,
                                                   const DominatorTree *DT) {
+  assert(V->getType()->isPointerTy() && "V must be pointer type");
+
   unsigned NumUsesExplored = 0;
   for (auto U : V->users()) {
     // Avoid massive lists
@@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
+static OverflowResult computeOverflowForSignedAdd(
+    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
+    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+  if (Add && Add->hasNoSignedWrap()) {
+    return OverflowResult::NeverOverflows;
+  }
+
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  bool RHSKnownNonNegative, RHSKnownNegative;
+  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0,
+                 AC, CxtI, DT);
+
+  if ((LHSKnownNonNegative && RHSKnownNegative) ||
+      (LHSKnownNegative && RHSKnownNonNegative)) {
+    // The sign bits are opposite: this CANNOT overflow.
+    return OverflowResult::NeverOverflows;
+  }
+
+  // The remaining code needs Add to be available. Early returns if not so.
+  if (!Add)
+    return OverflowResult::MayOverflow;
+
+  // If the sign of Add is the same as at least one of the operands, this add
+  // CANNOT overflow. This is particularly useful when the sum is
+  // @llvm.assume'ed non-negative rather than proved so from analyzing its
+  // operands.
+  bool LHSOrRHSKnownNonNegative =
+      (LHSKnownNonNegative || RHSKnownNonNegative);
+  bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative);
+  if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
+    bool AddKnownNonNegative, AddKnownNegative;
+    ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL,
+                   /*Depth=*/0, AC, CxtI, DT);
+    if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) ||
+        (AddKnownNegative && LHSOrRHSKnownNegative)) {
+      return OverflowResult::NeverOverflows;
+    }
+  }
+
+  return OverflowResult::MayOverflow;
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1),
+                                       Add, DL, AC, CxtI, DT);
+}
+
+OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+                                                 const DataLayout &DL,
+                                                 AssumptionCache *AC,
+                                                 const Instruction *CxtI,
+                                                 const DominatorTree *DT) {
+  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT);
+}
+
+bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
+  // FIXME: This conservative implementation can be relaxed. E.g. most
+  // atomic operations are guaranteed to terminate on most platforms
+  // and most functions terminate.
+
+  return !I->isAtomic() &&       // atomics may never succeed on some platforms
+         !isa<CallInst>(I) &&    // could throw and might not terminate
+         !isa<InvokeInst>(I) &&  // might not terminate and could throw to
+                                 //   non-successor (see bug 24185 for details).
+         !isa<ResumeInst>(I) &&  // has no successors
+         !isa<ReturnInst>(I);    // has no successors
+}
+
+bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I,
+                                                  const Loop *L) {
+  // The loop header is guaranteed to be executed for every iteration.
+  //
+  // FIXME: Relax this constraint to cover all basic blocks that are
+  // guaranteed to be executed at every iteration.
+  if (I->getParent() != L->getHeader()) return false;
+
+  for (const Instruction &LI : *L->getHeader()) {
+    if (&LI == I) return true;
+    if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false;
+  }
+  llvm_unreachable("Instruction not contained in its own parent basic block.");
+}
+
+bool llvm::propagatesFullPoison(const Instruction *I) {
+  switch (I->getOpcode()) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Xor:
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::AddrSpaceCast:
+      // These operations all propagate poison unconditionally. Note that poison
+      // is not any particular value, so xor or subtraction of poison with
+      // itself still yields poison, not zero.
+      return true;
+
+    case Instruction::AShr:
+    case Instruction::SExt:
+      // For these operations, one bit of the input is replicated across
+      // multiple output bits. A replicated poison bit is still poison.
+      return true;
+
+    case Instruction::Shl: {
+      // Left shift *by* a poison value is poison. The number of
+      // positions to shift is unsigned, so no negative values are
+      // possible there. Left shift by zero places preserves poison. So
+      // it only remains to consider left shift of poison by a positive
+      // number of places.
+      //
+      // A left shift by a positive number of places leaves the lowest order bit
+      // non-poisoned. However, if such a shift has a no-wrap flag, then we can
+      // make the poison operand violate that flag, yielding a fresh full-poison
+      // value.
+      auto *OBO = cast<OverflowingBinaryOperator>(I);
+      return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap();
+    }
+
+    case Instruction::Mul: {
+      // A multiplication by zero yields a non-poison zero result, so we need to
+      // rule out zero as an operand. Conservatively, multiplication by a
+      // non-zero constant is not multiplication by zero.
+      //
+      // Multiplication by a non-zero constant can leave some bits
+      // non-poisoned. For example, a multiplication by 2 leaves the lowest
+      // order bit unpoisoned. So we need to consider that.
+      //
+      // Multiplication by 1 preserves poison. If the multiplication has a
+      // no-wrap flag, then we can make the poison operand violate that flag
+      // when multiplied by any integer other than 0 and 1.
+      auto *OBO = cast<OverflowingBinaryOperator>(I);
+      if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) {
+        for (Value *V : OBO->operands()) {
+          if (auto *CI = dyn_cast<ConstantInt>(V)) {
+            // A ConstantInt cannot yield poison, so we can assume that it is
+            // the other operand that is poison.
+            return !CI->isZero();
+          }
+        }
+      }
+      return false;
+    }
+
+    case Instruction::GetElementPtr:
+      // A GEP implicitly represents a sequence of additions, subtractions,
+      // truncations, sign extensions and multiplications. The multiplications
+      // are by the non-zero sizes of some set of types, so we do not have to be
+      // concerned with multiplication by zero. If the GEP is in-bounds, then
+      // these operations are implicitly no-signed-wrap so poison is propagated
+      // by the arguments above for Add, Sub, Trunc, SExt and Mul.
+      return cast<GEPOperator>(I)->isInBounds();
+
+    default:
+      return false;
+  }
+}
+
+const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) {
+  switch (I->getOpcode()) {
+    case Instruction::Store:
+      return cast<StoreInst>(I)->getPointerOperand();
+
+    case Instruction::Load:
+      return cast<LoadInst>(I)->getPointerOperand();
+
+    case Instruction::AtomicCmpXchg:
+      return cast<AtomicCmpXchgInst>(I)->getPointerOperand();
+
+    case Instruction::AtomicRMW:
+      return cast<AtomicRMWInst>(I)->getPointerOperand();
+
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      return I->getOperand(1);
+
+    default:
+      return nullptr;
+  }
+}
+
+bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
+  // We currently only look for uses of poison values within the same basic
+  // block, as that makes it easier to guarantee that the uses will be
+  // executed given that PoisonI is executed.
+  //
+  // FIXME: Expand this to consider uses beyond the same basic block. To do
+  // this, look out for the distinction between post-dominance and strong
+  // post-dominance.
+  const BasicBlock *BB = PoisonI->getParent();
+
+  // Set of instructions that we have proved will yield poison if PoisonI
+  // does.
+  SmallSet<const Value *, 16> YieldsPoison;
+  YieldsPoison.insert(PoisonI);
+
+  for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end();
+       I != E; ++I) {
+    if (&*I != PoisonI) {
+      const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I);
+      if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true;
+      if (!isGuaranteedToTransferExecutionToSuccessor(&*I))
+        return false;
+    }
+
+    // Mark poison that propagates from I through uses of I.
+    if (YieldsPoison.count(&*I)) {
+      for (const User *User : I->users()) {
+        const Instruction *UserI = cast<Instruction>(User);
+        if (UserI->getParent() == BB && propagatesFullPoison(UserI))
+          YieldsPoison.insert(User);
+      }
+    }
+  }
+  return false;
+}
+
+static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+  if (FMF.noNaNs())
+    return true;
+
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isNaN();
+  return false;
+}
+
+static bool isKnownNonZero(Value *V) {
+  if (auto *C = dyn_cast<ConstantFP>(V))
+    return !C->isZero();
+  return false;
+}
+
+static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
+                                              FastMathFlags FMF,
                                               Value *CmpLHS, Value *CmpRHS,
                                               Value *TrueVal, Value *FalseVal,
                                               Value *&LHS, Value *&RHS) {
   LHS = CmpLHS;
   RHS = CmpRHS;
 
-  // (icmp X, Y) ? X : Y
-  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
-    switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
-    case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMAX;
-    case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMAX;
-    case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMIN;
-    case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+  // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may
+  // return inconsistent results between implementations.
+  //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0
+  //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1)
+  // Therefore we behave conservatively and only proceed if at least one of the
+  // operands is known to not be zero, or if we don't care about signed zeroes.
+  switch (Pred) {
+  default: break;
+  case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE:
+    if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+        !isKnownNonZero(CmpRHS))
+      return {SPF_UNKNOWN, SPNB_NA, false};
+  }
+
+  SelectPatternNaNBehavior NaNBehavior = SPNB_NA;
+  bool Ordered = false;
+
+  // When given one NaN and one non-NaN input:
+  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input.
+  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the
+  //     ordered comparison fails), which could be NaN or non-NaN.
+  // so here we discover exactly what NaN behavior is required/accepted.
+  if (CmpInst::isFPPredicate(Pred)) {
+    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF);
+    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF);
+
+    if (LHSSafe && RHSSafe) {
+      // Both operands are known non-NaN.
+      NaNBehavior = SPNB_RETURNS_ANY;
+    } else if (CmpInst::isOrdered(Pred)) {
+      // An ordered comparison will return false when given a NaN, so it
+      // returns the RHS.
+      Ordered = true;
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
+    } else {
+      Ordered = false;
+      // An unordered comparison will return true when given a NaN, so it
+      // returns the LHS.
+      if (LHSSafe)
+        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned.
+        NaNBehavior = SPNB_RETURNS_OTHER;
+      else if (RHSSafe)
+        NaNBehavior = SPNB_RETURNS_NAN;
+      else
+        // Completely unsafe.
+        return {SPF_UNKNOWN, SPNB_NA, false};
     }
   }
 
-  // (icmp X, Y) ? Y : X
   if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
+    std::swap(CmpLHS, CmpRHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+    if (NaNBehavior == SPNB_RETURNS_NAN)
+      NaNBehavior = SPNB_RETURNS_OTHER;
+    else if (NaNBehavior == SPNB_RETURNS_OTHER)
+      NaNBehavior = SPNB_RETURNS_NAN;
+    Ordered = !Ordered;
+  }
+
+  // ([if]cmp X, Y) ? X : Y
+  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {
     switch (Pred) {
-    default: return SPF_UNKNOWN; // Equality.
+    default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.
     case ICmpInst::ICMP_UGT:
-    case ICmpInst::ICMP_UGE: return SPF_UMIN;
+    case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};
     case ICmpInst::ICMP_SGT:
-    case ICmpInst::ICMP_SGE: return SPF_SMIN;
+    case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};
     case ICmpInst::ICMP_ULT:
-    case ICmpInst::ICMP_ULE: return SPF_UMAX;
+    case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};
     case ICmpInst::ICMP_SLT:
-    case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false};
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_OGT:
+    case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered};
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_OLT:
+    case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};
     }
   }
 
@@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
       // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
       // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
       if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
-        return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS;
+        return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
 
       // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
       // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
       if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
-        return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS;
+        return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
     }
     
@@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,
            match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
         LHS = TrueVal;
         RHS = FalseVal;
-        return SPF_SMIN;
+        return {SPF_SMIN, SPNB_NA, false};
       }
     }
   }
 
   // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
 
-  return SPF_UNKNOWN;
+  return {SPF_UNKNOWN, SPNB_NA, false};
 }
 
-static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
-                                 Instruction::CastOps *CastOp) {
+static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
+                              Instruction::CastOps *CastOp) {
   CastInst *CI = dyn_cast<CastInst>(V1);
   Constant *C = dyn_cast<Constant>(V2);
-  if (!CI || !C)
+  CastInst *CI2 = dyn_cast<CastInst>(V2);
+  if (!CI)
     return nullptr;
   *CastOp = CI->getOpcode();
 
+  if (CI2) {
+    // If V1 and V2 are both the same cast from the same type, we can look
+    // through V1.
+    if (CI2->getOpcode() == CI->getOpcode() &&
+        CI2->getSrcTy() == CI->getSrcTy())
+      return CI2->getOperand(0);
+    return nullptr;
+  } else if (!C) {
+    return nullptr;
+  }
+
   if (isa<SExtInst>(CI) && CmpI->isSigned()) {
     Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());
     // This is only valid if the truncated value can be sign-extended
@@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,
   if (isa<TruncInst>(CI))
     return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned());
 
+  if (isa<FPToUIInst>(CI))
+    return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<FPToSIInst>(CI))
+    return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true);
+
+  if (isa<UIToFPInst>(CI))
+    return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true);
+
+  if (isa<SIToFPInst>(CI))
+    return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true);
+
+  if (isa<FPTruncInst>(CI))
+    return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true);
+
+  if (isa<FPExtInst>(CI))
+    return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true);
+
   return nullptr;
 }
 
-SelectPatternFlavor llvm::matchSelectPattern(Value *V,
+SelectPatternResult llvm::matchSelectPattern(Value *V,
                                              Value *&LHS, Value *&RHS,
                                              Instruction::CastOps *CastOp) {
   SelectInst *SI = dyn_cast<SelectInst>(V);
-  if (!SI) return SPF_UNKNOWN;
+  if (!SI) return {SPF_UNKNOWN, SPNB_NA, false};
 
-  ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition());
-  if (!CmpI) return SPF_UNKNOWN;
+  CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition());
+  if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false};
 
-  ICmpInst::Predicate Pred = CmpI->getPredicate();
+  CmpInst::Predicate Pred = CmpI->getPredicate();
   Value *CmpLHS = CmpI->getOperand(0);
   Value *CmpRHS = CmpI->getOperand(1);
   Value *TrueVal = SI->getTrueValue();
   Value *FalseVal = SI->getFalseValue();
+  FastMathFlags FMF;
+  if (isa<FPMathOperator>(CmpI))
+    FMF = CmpI->getFastMathFlags();
 
   // Bail out early.
   if (CmpI->isEquality())
-    return SPF_UNKNOWN;
+    return {SPF_UNKNOWN, SPNB_NA, false};
 
   // Deal with type mismatches.
   if (CastOp && CmpLHS->getType() != TrueVal->getType()) {
-    if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
-      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                   cast<CastInst>(TrueVal)->getOperand(0), C,
                                   LHS, RHS);
-    if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
-      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS,
+    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp))
+      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,
                                   C, cast<CastInst>(FalseVal)->getOperand(0),
                                   LHS, RHS);
   }
-  return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal,
+  return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,
                               LHS, RHS);
 }
+
+ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
+  const unsigned NumRanges = Ranges.getNumOperands() / 2;
+  assert(NumRanges >= 1 && "Must have at least one range!");
+  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
+
+  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
+  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
+
+  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
+
+  for (unsigned i = 1; i < NumRanges; ++i) {
+    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+
+    // Note: unionWith will potentially create a range that contains values not
+    // contained in any of the original N ranges.
+    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
+  }
+
+  return CR;
+}
+
+/// Return true if "icmp Pred LHS RHS" is always true.
+static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                            const DataLayout &DL, unsigned Depth,
+                            AssumptionCache *AC, const Instruction *CxtI,
+                            const DominatorTree *DT) {
+  assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!");
+  if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS)
+    return true;
+
+  switch (Pred) {
+  default:
+    return false;
+
+  case CmpInst::ICMP_SLE: {
+    const APInt *C;
+
+    // LHS s<= LHS +_{nsw} C   if C >= 0
+    if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))))
+      return !C->isNegative();
+    return false;
+  }
+
+  case CmpInst::ICMP_ULE: {
+    const APInt *C;
+
+    // LHS u<= LHS +_{nuw} C   for any C
+    if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
+      return true;
+
+    // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
+    auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+                                       const APInt *&CA, const APInt *&CB) {
+      if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
+          match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
+        return true;
+
+      // If X & C == 0 then (X | C) == X +_{nuw} C
+      if (match(A, m_Or(m_Value(X), m_APInt(CA))) &&
+          match(B, m_Or(m_Specific(X), m_APInt(CB)))) {
+        unsigned BitWidth = CA->getBitWidth();
+        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+        computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT);
+
+        if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB)
+          return true;
+      }
+
+      return false;
+    };
+
+    Value *X;
+    const APInt *CLHS, *CRHS;
+    if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
+      return CLHS->ule(*CRHS);
+
+    return false;
+  }
+  }
+}
+
+/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
+/// ALHS ARHS" is true.
+static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS,
+                                  Value *ARHS, Value *BLHS, Value *BRHS,
+                                  const DataLayout &DL, unsigned Depth,
+                                  AssumptionCache *AC, const Instruction *CxtI,
+                                  const DominatorTree *DT) {
+  switch (Pred) {
+  default:
+    return false;
+
+  case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI,
+                           DT) &&
+           isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI,
+                           DT);
+
+  case CmpInst::ICMP_ULT:
+  case CmpInst::ICMP_ULE:
+    return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI,
+                           DT) &&
+           isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI,
+                           DT);
+  }
+}
+
+bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL,
+                              unsigned Depth, AssumptionCache *AC,
+                              const Instruction *CxtI,
+                              const DominatorTree *DT) {
+  assert(LHS->getType() == RHS->getType() && "mismatched type");
+  Type *OpTy = LHS->getType();
+  assert(OpTy->getScalarType()->isIntegerTy(1));
+
+  // LHS ==> RHS by definition
+  if (LHS == RHS) return true;
+
+  if (OpTy->isVectorTy())
+    // TODO: extending the code below to handle vectors
+    return false;
+  assert(OpTy->isIntegerTy(1) && "implied by above");
+
+  ICmpInst::Predicate APred, BPred;
+  Value *ALHS, *ARHS;
+  Value *BLHS, *BRHS;
+
+  if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) ||
+      !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS))))
+    return false;
+
+  if (APred == BPred)
+    return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC,
+                                 CxtI, DT);
+
+  return false;
+}
diff --git a/lib/Analysis/VectorUtils.cpp b/lib/Analysis/VectorUtils.cpp
index 8c671ef0ef0e..4b244ec5e1f6 100644
--- a/lib/Analysis/VectorUtils.cpp
+++ b/lib/Analysis/VectorUtils.cpp
@@ -11,13 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// \brief Identify if the intrinsic is trivially vectorizable.
 /// This method returns true if the intrinsic's argument types are all
@@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,
 /// d) call should only reads memory.
 /// If all these condition is met then return ValidIntrinsicID
 /// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
 llvm::checkUnaryFloatSignature(const CallInst &I,
                                Intrinsic::ID ValidIntrinsicID) {
   if (I.getNumArgOperands() != 1 ||
@@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I,
 /// d) call should only reads memory.
 /// If all these condition is met then return ValidIntrinsicID
 /// else return not_intrinsic.
-llvm::Intrinsic::ID
+Intrinsic::ID
 llvm::checkBinaryFloatSignature(const CallInst &I,
                                 Intrinsic::ID ValidIntrinsicID) {
   if (I.getNumArgOperands() != 2 ||
@@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I,
 /// \brief Returns intrinsic ID for call.
 /// For the input call instruction it finds mapping intrinsic and returns
 /// its ID, in case it does not found it return not_intrinsic.
-llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
-                                                const TargetLibraryInfo *TLI) {
+Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI,
+                                          const TargetLibraryInfo *TLI) {
   // If we have an intrinsic call, check if it is trivially vectorizable.
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
     Intrinsic::ID ID = II->getIntrinsicID();
@@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
       cast<PointerType>(Gep->getType()->getScalarType())->getElementType());
 
   // Walk backwards and try to peel off zeros.
-  while (LastOperand > 1 &&
-         match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) {
+  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
     // Find the type we're currently indexing into.
     gep_type_iterator GEPTI = gep_type_begin(Gep);
     std::advance(GEPTI, LastOperand - 1);
@@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
 /// \brief If the argument is a GEP, then returns the operand identified by
 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
 /// operand, it returns that instead.
-llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
-                                      Loop *Lp) {
+Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
   GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
   if (!GEP)
     return Ptr;
@@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,
 }
 
 /// \brief If a value has only one user that is a CastInst, return it.
-llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
-  llvm::Value *UniqueCast = nullptr;
+Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+  Value *UniqueCast = nullptr;
   for (User *U : Ptr->users()) {
     CastInst *CI = dyn_cast<CastInst>(U);
     if (CI && CI->getType() == Ty) {
@@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {
 
 /// \brief Get the stride of a pointer access in a loop. Looks for symbolic
 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
-llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
-                                        Loop *Lp) {
-  const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
   if (!PtrTy || PtrTy->isAggregateType())
     return nullptr;
 
   // Try to remove a gep instruction to make the pointer (actually index at this
   // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
   // pointer, otherwise, we are analyzing the index.
-  llvm::Value *OrigPtr = Ptr;
+  Value *OrigPtr = Ptr;
 
   // The size of the pointer access.
   int64_t PtrAccessSize = 1;
@@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
       if (M->getOperand(0)->getSCEVType() != scConstant)
         return nullptr;
 
-      const APInt &APStepVal =
-          cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
+      const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
 
       // Huge step value - give up.
       if (APStepVal.getBitWidth() > 64)
@@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
   if (!U)
     return nullptr;
 
-  llvm::Value *Stride = U->getValue();
+  Value *Stride = U->getValue();
   if (!Lp->isLoopInvariant(Stride))
     return nullptr;
 
@@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,
 /// \brief Given a vector and an element number, see if the scalar value is
 /// already around as a register, for example if it were inserted then extracted
 /// from the vector.
-llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
+Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
   assert(V->getType()->isVectorTy() && "Not looking at a vector?");
   VectorType *VTy = cast<VectorType>(V->getType());
   unsigned Width = VTy->getNumElements();
@@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {
 
   // Extract a value from a vector add operation with a constant zero.
   Value *Val = nullptr; Constant *Con = nullptr;
-  if (match(V,
-            llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val),
-                                      llvm::PatternMatch::m_Constant(Con)))) {
+  if (match(V, m_Add(m_Value(Val), m_Constant(Con))))
     if (Constant *Elt = Con->getAggregateElement(EltNo))
       if (Elt->isNullValue())
         return findScalarElement(Val, EltNo);
-  }
 
   // Otherwise, we don't know.
   return nullptr;
 }
+
+/// \brief Get splat value if the input is a splat vector or return nullptr.
+/// This function is not fully general. It checks only 2 cases:
+/// the input value is (1) a splat constants vector or (2) a sequence
+/// of instructions that broadcast a single value into a vector.
+///
+const llvm::Value *llvm::getSplatValue(const Value *V) {
+
+  if (auto *C = dyn_cast<Constant>(V))
+    if (isa<VectorType>(V->getType()))
+      return C->getSplatValue();
+
+  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
+  if (!ShuffleInst)
+    return nullptr;
+  // All-zero (or undef) shuffle mask elements.
+  for (int MaskElt : ShuffleInst->getShuffleMask())
+    if (MaskElt != 0 && MaskElt != -1)
+      return nullptr;
+  // The first shuffle source is 'insertelement' with index 0.
+  auto *InsertEltInst =
+    dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
+  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
+      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue())
+    return nullptr;
+
+  return InsertEltInst->getOperand(1);
+}
+
+MapVector<Instruction *, uint64_t>
+llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
+                               const TargetTransformInfo *TTI) {
+
+  // DemandedBits will give us every value's live-out bits. But we want
+  // to ensure no extra casts would need to be inserted, so every DAG
+  // of connected values must have the same minimum bitwidth.
+  EquivalenceClasses<Value *> ECs;
+  SmallVector<Value *, 16> Worklist;
+  SmallPtrSet<Value *, 4> Roots;
+  SmallPtrSet<Value *, 16> Visited;
+  DenseMap<Value *, uint64_t> DBits;
+  SmallPtrSet<Instruction *, 4> InstructionSet;
+  MapVector<Instruction *, uint64_t> MinBWs;
+
+  // Determine the roots. We work bottom-up, from truncs or icmps.
+  bool SeenExtFromIllegalType = false;
+  for (auto *BB : Blocks)
+    for (auto &I : *BB) {
+      InstructionSet.insert(&I);
+
+      if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
+          !TTI->isTypeLegal(I.getOperand(0)->getType()))
+        SeenExtFromIllegalType = true;
+
+      // Only deal with non-vector integers up to 64-bits wide.
+      if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
+          !I.getType()->isVectorTy() &&
+          I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
+        // Don't make work for ourselves. If we know the loaded type is legal,
+        // don't add it to the worklist.
+        if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
+          continue;
+
+        Worklist.push_back(&I);
+        Roots.insert(&I);
+      }
+    }
+  // Early exit.
+  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
+    return MinBWs;
+
+  // Now proceed breadth-first, unioning values together.
+  while (!Worklist.empty()) {
+    Value *Val = Worklist.pop_back_val();
+    Value *Leader = ECs.getOrInsertLeaderValue(Val);
+
+    if (Visited.count(Val))
+      continue;
+    Visited.insert(Val);
+
+    // Non-instructions terminate a chain successfully.
+    if (!isa<Instruction>(Val))
+      continue;
+    Instruction *I = cast<Instruction>(Val);
+
+    // If we encounter a type that is larger than 64 bits, we can't represent
+    // it so bail out.
+    if (DB.getDemandedBits(I).getBitWidth() > 64)
+      return MapVector<Instruction *, uint64_t>();
+
+    uint64_t V = DB.getDemandedBits(I).getZExtValue();
+    DBits[Leader] |= V;
+
+    // Casts, loads and instructions outside of our range terminate a chain
+    // successfully.
+    if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
+        !InstructionSet.count(I))
+      continue;
+
+    // Unsafe casts terminate a chain unsuccessfully. We can't do anything
+    // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
+    // transform anything that relies on them.
+    if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
+        !I->getType()->isIntegerTy()) {
+      DBits[Leader] |= ~0ULL;
+      continue;
+    }
+
+    // We don't modify the types of PHIs. Reductions will already have been
+    // truncated if possible, and inductions' sizes will have been chosen by
+    // indvars.
+    if (isa<PHINode>(I))
+      continue;
+
+    if (DBits[Leader] == ~0ULL)
+      // All bits demanded, no point continuing.
+      continue;
+
+    for (Value *O : cast<User>(I)->operands()) {
+      ECs.unionSets(Leader, O);
+      Worklist.push_back(O);
+    }
+  }
+
+  // Now we've discovered all values, walk them to see if there are
+  // any users we didn't see. If there are, we can't optimize that
+  // chain.
+  for (auto &I : DBits)
+    for (auto *U : I.first->users())
+      if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
+        DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
+
+  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
+    uint64_t LeaderDemandedBits = 0;
+    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
+      LeaderDemandedBits |= DBits[*MI];
+
+    uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
+                     llvm::countLeadingZeros(LeaderDemandedBits);
+    // Round up to a power of 2
+    if (!isPowerOf2_64((uint64_t)MinBW))
+      MinBW = NextPowerOf2(MinBW);
+    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
+      if (!isa<Instruction>(*MI))
+        continue;
+      Type *Ty = (*MI)->getType();
+      if (Roots.count(*MI))
+        Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
+      if (MinBW < Ty->getScalarSizeInBits())
+        MinBWs[cast<Instruction>(*MI)] = MinBW;
+    }
+  }
+
+  return MinBWs;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 5c4bab734b2b..26eca230bb31 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -105,7 +105,7 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
     Pair[1] += hexDigitValue(*Buffer);
   }
   Pair[0] = 0;
-  for (int i=0; i<16; i++, Buffer++) {
+  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
     Pair[0] *= 16;
     Pair[0] += hexDigitValue(*Buffer);
   }
@@ -523,9 +523,14 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(zeroinitializer);
   KEYWORD(undef);
   KEYWORD(null);
+  KEYWORD(none);
   KEYWORD(to);
+  KEYWORD(caller);
+  KEYWORD(within);
+  KEYWORD(from);
   KEYWORD(tail);
   KEYWORD(musttail);
+  KEYWORD(notail);
   KEYWORD(target);
   KEYWORD(triple);
   KEYWORD(unwind);
@@ -586,6 +591,10 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(preserve_mostcc);
   KEYWORD(preserve_allcc);
   KEYWORD(ghccc);
+  KEYWORD(x86_intrcc);
+  KEYWORD(hhvmcc);
+  KEYWORD(hhvm_ccc);
+  KEYWORD(cxx_fast_tlscc);
 
   KEYWORD(cc);
   KEYWORD(c);
@@ -601,6 +610,8 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(convergent);
   KEYWORD(dereferenceable);
   KEYWORD(dereferenceable_or_null);
+  KEYWORD(inaccessiblememonly);
+  KEYWORD(inaccessiblemem_or_argmemonly);
   KEYWORD(inlinehint);
   KEYWORD(inreg);
   KEYWORD(jumptable);
@@ -613,6 +624,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(noduplicate);
   KEYWORD(noimplicitfloat);
   KEYWORD(noinline);
+  KEYWORD(norecurse);
   KEYWORD(nonlazybind);
   KEYWORD(nonnull);
   KEYWORD(noredzone);
@@ -690,6 +702,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   TYPEKEYWORD("label",     Type::getLabelTy(Context));
   TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
+  TYPEKEYWORD("token",     Type::getTokenTy(Context));
 #undef TYPEKEYWORD
 
   // Keywords for instructions.
@@ -749,6 +762,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(extractvalue,   ExtractValue);
   INSTKEYWORD(insertvalue,    InsertValue);
   INSTKEYWORD(landingpad,     LandingPad);
+  INSTKEYWORD(cleanupret,     CleanupRet);
+  INSTKEYWORD(catchret,       CatchRet);
+  INSTKEYWORD(catchswitch,  CatchSwitch);
+  INSTKEYWORD(catchpad,     CatchPad);
+  INSTKEYWORD(cleanuppad,   CleanupPad);
 #undef INSTKEYWORD
 
 #define DWKEYWORD(TYPE, TOKEN)                                                 \
@@ -763,6 +781,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   DWKEYWORD(VIRTUALITY, DwarfVirtuality);
   DWKEYWORD(LANG, DwarfLang);
   DWKEYWORD(OP, DwarfOp);
+  DWKEYWORD(MACINFO, DwarfMacinfo);
 #undef DWKEYWORD
 
   if (Keyword.startswith("DIFlag")) {
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 1c6e7bd18d0e..3471a2dbd05c 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -13,6 +13,7 @@
 
 #include "LLParser.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/CallingConv.h"
@@ -48,6 +49,32 @@ bool LLParser::Run() {
          ValidateEndOfModule();
 }
 
+bool LLParser::parseStandaloneConstantValue(Constant *&C,
+                                            const SlotMapping *Slots) {
+  restoreParsingState(Slots);
+  Lex.Lex();
+
+  Type *Ty = nullptr;
+  if (ParseType(Ty) || parseConstantValue(Ty, C))
+    return true;
+  if (Lex.getKind() != lltok::Eof)
+    return Error(Lex.getLoc(), "expected end of string");
+  return false;
+}
+
+void LLParser::restoreParsingState(const SlotMapping *Slots) {
+  if (!Slots)
+    return;
+  NumberedVals = Slots->GlobalValues;
+  NumberedMetadata = Slots->MetadataNodes;
+  for (const auto &I : Slots->NamedTypes)
+    NamedTypes.insert(
+        std::make_pair(I.getKey(), std::make_pair(I.second, LocTy())));
+  for (const auto &I : Slots->Types)
+    NumberedTypes.insert(
+        std::make_pair(I.first, std::make_pair(I.second, LocTy())));
+}
+
 /// ValidateEndOfModule - Do final validity and sanity checks at the end of the
 /// module.
 bool LLParser::ValidateEndOfModule() {
@@ -158,7 +185,7 @@ bool LLParser::ValidateEndOfModule() {
 
   // Look for intrinsic functions and CallInst that need to be upgraded
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
-    UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+    UpgradeCallsToIntrinsic(&*FI++); // must be post-increment, as we remove
 
   UpgradeDebugInfo(*M);
 
@@ -169,6 +196,10 @@ bool LLParser::ValidateEndOfModule() {
   // the mapping from LLParser as it doesn't need it anymore.
   Slots->GlobalValues = std::move(NumberedVals);
   Slots->MetadataNodes = std::move(NumberedMetadata);
+  for (const auto &I : NamedTypes)
+    Slots->NamedTypes.insert(std::make_pair(I.getKey(), I.second.first));
+  for (const auto &I : NumberedTypes)
+    Slots->Types.insert(std::make_pair(I.first, I.second.first));
 
   return false;
 }
@@ -647,6 +678,12 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
     return Error(NameLoc,
                  "symbol with local linkage must have default visibility");
 
+  Type *Ty;
+  LocTy ExplicitTypeLoc = Lex.getLoc();
+  if (ParseType(Ty) ||
+      ParseToken(lltok::comma, "expected comma after alias's type"))
+    return true;
+
   Constant *Aliasee;
   LocTy AliaseeLoc = Lex.getLoc();
   if (Lex.getKind() != lltok::kw_bitcast &&
@@ -669,11 +706,35 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
   auto *PTy = dyn_cast<PointerType>(AliaseeType);
   if (!PTy)
     return Error(AliaseeLoc, "An alias must have pointer type");
+  unsigned AddrSpace = PTy->getAddressSpace();
+
+  if (Ty != PTy->getElementType())
+    return Error(
+        ExplicitTypeLoc,
+        "explicit pointee type doesn't match operand's pointee type");
+
+  GlobalValue *GVal = nullptr;
+
+  // See if the alias was forward referenced, if so, prepare to replace the
+  // forward reference.
+  if (!Name.empty()) {
+    GVal = M->getNamedValue(Name);
+    if (GVal) {
+      if (!ForwardRefVals.erase(Name))
+        return Error(NameLoc, "redefinition of global '@" + Name + "'");
+    }
+  } else {
+    auto I = ForwardRefValIDs.find(NumberedVals.size());
+    if (I != ForwardRefValIDs.end()) {
+      GVal = I->second.first;
+      ForwardRefValIDs.erase(I);
+    }
+  }
 
   // Okay, create the alias but do not insert it into the module yet.
   std::unique_ptr<GlobalAlias> GA(
-      GlobalAlias::create(PTy, (GlobalValue::LinkageTypes)Linkage, Name,
-                          Aliasee, /*Parent*/ nullptr));
+      GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage,
+                          Name, Aliasee, /*Parent*/ nullptr));
   GA->setThreadLocalMode(TLM);
   GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
   GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
@@ -682,27 +743,17 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned L,
   if (Name.empty())
     NumberedVals.push_back(GA.get());
 
-  // See if this value already exists in the symbol table.  If so, it is either
-  // a redefinition or a definition of a forward reference.
-  if (GlobalValue *Val = M->getNamedValue(Name)) {
-    // See if this was a redefinition.  If so, there is no entry in
-    // ForwardRefVals.
-    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
-      I = ForwardRefVals.find(Name);
-    if (I == ForwardRefVals.end())
-      return Error(NameLoc, "redefinition of global named '@" + Name + "'");
-
-    // Otherwise, this was a definition of forward ref.  Verify that types
-    // agree.
-    if (Val->getType() != GA->getType())
-      return Error(NameLoc,
-              "forward reference and definition of alias have different types");
+  if (GVal) {
+    // Verify that types agree.
+    if (GVal->getType() != GA->getType())
+      return Error(
+          ExplicitTypeLoc,
+          "forward reference and definition of alias have different types");
 
     // If they agree, just RAUW the old value with the alias and remove the
     // forward ref info.
-    Val->replaceAllUsesWith(GA.get());
-    Val->eraseFromParent();
-    ForwardRefVals.erase(I);
+    GVal->replaceAllUsesWith(GA.get());
+    GVal->eraseFromParent();
   }
 
   // Insert into the module, we know its name won't collide now.
@@ -767,12 +818,11 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   if (!Name.empty()) {
     GVal = M->getNamedValue(Name);
     if (GVal) {
-      if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+      if (!ForwardRefVals.erase(Name))
         return Error(NameLoc, "redefinition of global '@" + Name + "'");
     }
   } else {
-    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
-      I = ForwardRefValIDs.find(NumberedVals.size());
+    auto I = ForwardRefValIDs.find(NumberedVals.size());
     if (I != ForwardRefValIDs.end()) {
       GVal = I->second.first;
       ForwardRefValIDs.erase(I);
@@ -903,14 +953,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     }
     // Target-dependent attributes:
     case lltok::StringConstant: {
-      std::string Attr = Lex.getStrVal();
-      Lex.Lex();
-      std::string Val;
-      if (EatIfPresent(lltok::equal) &&
-          ParseStringConstant(Val))
+      if (ParseStringAttribute(B))
         return true;
-
-      B.addAttribute(Attr, Val);
       continue;
     }
 
@@ -951,6 +995,10 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break;
     case lltok::kw_cold: B.addAttribute(Attribute::Cold); break;
     case lltok::kw_convergent: B.addAttribute(Attribute::Convergent); break;
+    case lltok::kw_inaccessiblememonly:
+      B.addAttribute(Attribute::InaccessibleMemOnly); break;
+    case lltok::kw_inaccessiblemem_or_argmemonly:
+      B.addAttribute(Attribute::InaccessibleMemOrArgMemOnly); break;
     case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break;
     case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break;
     case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break;
@@ -963,6 +1011,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break;
     case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break;
     case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break;
+    case lltok::kw_norecurse: B.addAttribute(Attribute::NoRecurse); break;
     case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break;
     case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break;
     case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break;
@@ -1015,6 +1064,17 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
 // GlobalValue Reference/Resolution Routines.
 //===----------------------------------------------------------------------===//
 
+static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy,
+                                              const std::string &Name) {
+  if (auto *FT = dyn_cast<FunctionType>(PTy->getElementType()))
+    return Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+  else
+    return new GlobalVariable(*M, PTy->getElementType(), false,
+                              GlobalValue::ExternalWeakLinkage, nullptr, Name,
+                              nullptr, GlobalVariable::NotThreadLocal,
+                              PTy->getAddressSpace());
+}
+
 /// GetGlobalVal - Get a value with the specified name or ID, creating a
 /// forward reference record if needed.  This can return null if the value
 /// exists but does not have the right type.
@@ -1033,8 +1093,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (!Val) {
-    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
-      I = ForwardRefVals.find(Name);
+    auto I = ForwardRefVals.find(Name);
     if (I != ForwardRefVals.end())
       Val = I->second.first;
   }
@@ -1048,15 +1107,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
-  GlobalValue *FwdVal;
-  if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
-    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
-  else
-    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
-                                GlobalValue::ExternalWeakLinkage, nullptr, Name,
-                                nullptr, GlobalVariable::NotThreadLocal,
-                                PTy->getAddressSpace());
-
+  GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, Name);
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -1073,8 +1124,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (!Val) {
-    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
-      I = ForwardRefValIDs.find(ID);
+    auto I = ForwardRefValIDs.find(ID);
     if (I != ForwardRefValIDs.end())
       Val = I->second.first;
   }
@@ -1088,13 +1138,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
-  GlobalValue *FwdVal;
-  if (FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
-    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
-  else
-    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
-                                GlobalValue::ExternalWeakLinkage, nullptr, "");
-
+  GlobalValue *FwdVal = createGlobalFwdRef(M, PTy, "");
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
@@ -1217,6 +1261,19 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
          ParseToken(lltok::rparen, "expected ')' in address space");
 }
 
+/// ParseStringAttribute
+///   := StringConstant
+///   := StringConstant '=' StringConstant
+bool LLParser::ParseStringAttribute(AttrBuilder &B) {
+  std::string Attr = Lex.getStrVal();
+  Lex.Lex();
+  std::string Val;
+  if (EatIfPresent(lltok::equal) && ParseStringConstant(Val))
+    return true;
+  B.addAttribute(Attr, Val);
+  return false;
+}
+
 /// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes.
 bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
   bool HaveError = false;
@@ -1228,6 +1285,11 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     switch (Token) {
     default:  // End of attributes.
       return HaveError;
+    case lltok::StringConstant: {
+      if (ParseStringAttribute(B))
+        return true;
+      continue;
+    }
     case lltok::kw_align: {
       unsigned Alignment;
       if (ParseOptionalAlignment(Alignment))
@@ -1309,6 +1371,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     switch (Token) {
     default:  // End of attributes.
       return HaveError;
+    case lltok::StringConstant: {
+      if (ParseStringAttribute(B))
+        return true;
+      continue;
+    }
     case lltok::kw_dereferenceable: {
       uint64_t Bytes;
       if (ParseOptionalDerefAttrBytes(lltok::kw_dereferenceable, Bytes))
@@ -1323,6 +1390,13 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
       B.addDereferenceableOrNullAttr(Bytes);
       continue;
     }
+    case lltok::kw_align: {
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment))
+        return true;
+      B.addAlignmentAttr(Alignment);
+      continue;
+    }
     case lltok::kw_inreg:           B.addAttribute(Attribute::InReg); break;
     case lltok::kw_noalias:         B.addAttribute(Attribute::NoAlias); break;
     case lltok::kw_nonnull:         B.addAttribute(Attribute::NonNull); break;
@@ -1330,7 +1404,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_zeroext:         B.addAttribute(Attribute::ZExt); break;
 
     // Error handling.
-    case lltok::kw_align:
     case lltok::kw_byval:
     case lltok::kw_inalloca:
     case lltok::kw_nest:
@@ -1473,6 +1546,10 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
 ///   ::= 'preserve_mostcc'
 ///   ::= 'preserve_allcc'
 ///   ::= 'ghccc'
+///   ::= 'x86_intrcc'
+///   ::= 'hhvmcc'
+///   ::= 'hhvm_ccc'
+///   ::= 'cxx_fast_tlscc'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
@@ -1501,6 +1578,10 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_preserve_mostcc:CC = CallingConv::PreserveMost; break;
   case lltok::kw_preserve_allcc: CC = CallingConv::PreserveAll; break;
   case lltok::kw_ghccc:          CC = CallingConv::GHC; break;
+  case lltok::kw_x86_intrcc:     CC = CallingConv::X86_INTR; break;
+  case lltok::kw_hhvmcc:         CC = CallingConv::HHVM; break;
+  case lltok::kw_hhvm_ccc:       CC = CallingConv::HHVM_C; break;
+  case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
   case lltok::kw_cc: {
       Lex.Lex();
       return ParseUInt32(CC);
@@ -1883,7 +1964,59 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
   return false;
 }
 
+/// ParseOptionalOperandBundles
+///    ::= /*empty*/
+///    ::= '[' OperandBundle [, OperandBundle ]* ']'
+///
+/// OperandBundle
+///    ::= bundle-tag '(' ')'
+///    ::= bundle-tag '(' Type Value [, Type Value ]* ')'
+///
+/// bundle-tag ::= String Constant
+bool LLParser::ParseOptionalOperandBundles(
+    SmallVectorImpl<OperandBundleDef> &BundleList, PerFunctionState &PFS) {
+  LocTy BeginLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::lsquare))
+    return false;
 
+  while (Lex.getKind() != lltok::rsquare) {
+    // If this isn't the first operand bundle, we need a comma.
+    if (!BundleList.empty() &&
+        ParseToken(lltok::comma, "expected ',' in input list"))
+      return true;
+
+    std::string Tag;
+    if (ParseStringConstant(Tag))
+      return true;
+
+    if (ParseToken(lltok::lparen, "expected '(' in operand bundle"))
+      return true;
+
+    std::vector<Value *> Inputs;
+    while (Lex.getKind() != lltok::rparen) {
+      // If this isn't the first input, we need a comma.
+      if (!Inputs.empty() &&
+          ParseToken(lltok::comma, "expected ',' in input list"))
+        return true;
+
+      Type *Ty = nullptr;
+      Value *Input = nullptr;
+      if (ParseType(Ty) || ParseValue(Ty, Input, PFS))
+        return true;
+      Inputs.push_back(Input);
+    }
+
+    BundleList.emplace_back(std::move(Tag), std::move(Inputs));
+
+    Lex.Lex(); // Lex the ')'.
+  }
+
+  if (BundleList.empty())
+    return Error(BeginLoc, "operand bundle set must not be empty");
+
+  Lex.Lex(); // Lex the ']'.
+  return false;
+}
 
 /// ParseArgumentList - Parse the argument list for a function type or function
 /// prototype.
@@ -2146,31 +2279,29 @@ LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
   : P(p), F(f), FunctionNumber(functionNumber) {
 
   // Insert unnamed arguments into the NumberedVals list.
-  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
-       AI != E; ++AI)
-    if (!AI->hasName())
-      NumberedVals.push_back(AI);
+  for (Argument &A : F.args())
+    if (!A.hasName())
+      NumberedVals.push_back(&A);
 }
 
 LLParser::PerFunctionState::~PerFunctionState() {
   // If there were any forward referenced non-basicblock values, delete them.
-  for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
-       I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
-    if (!isa<BasicBlock>(I->second.first)) {
-      I->second.first->replaceAllUsesWith(
-                           UndefValue::get(I->second.first->getType()));
-      delete I->second.first;
-      I->second.first = nullptr;
-    }
 
-  for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
-       I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
-    if (!isa<BasicBlock>(I->second.first)) {
-      I->second.first->replaceAllUsesWith(
-                           UndefValue::get(I->second.first->getType()));
-      delete I->second.first;
-      I->second.first = nullptr;
-    }
+  for (const auto &P : ForwardRefVals) {
+    if (isa<BasicBlock>(P.second.first))
+      continue;
+    P.second.first->replaceAllUsesWith(
+        UndefValue::get(P.second.first->getType()));
+    delete P.second.first;
+  }
+
+  for (const auto &P : ForwardRefValIDs) {
+    if (isa<BasicBlock>(P.second.first))
+      continue;
+    P.second.first->replaceAllUsesWith(
+        UndefValue::get(P.second.first->getType()));
+    delete P.second.first;
+  }
 }
 
 bool LLParser::PerFunctionState::FinishFunction() {
@@ -2189,16 +2320,15 @@ bool LLParser::PerFunctionState::FinishFunction() {
 /// GetVal - Get a value with the specified name or ID, creating a
 /// forward reference record if needed.  This can return null if the value
 /// exists but does not have the right type.
-Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
-                                          Type *Ty, LocTy Loc) {
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
+                                          LocTy Loc) {
   // Look this name up in the normal function symbol table.
   Value *Val = F.getValueSymbolTable().lookup(Name);
 
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (!Val) {
-    std::map<std::string, std::pair<Value*, LocTy> >::iterator
-      I = ForwardRefVals.find(Name);
+    auto I = ForwardRefVals.find(Name);
     if (I != ForwardRefVals.end())
       Val = I->second.first;
   }
@@ -2222,25 +2352,24 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
 
   // Otherwise, create a new forward reference for this value and remember it.
   Value *FwdVal;
-  if (Ty->isLabelTy())
+  if (Ty->isLabelTy()) {
     FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
-  else
+  } else {
     FwdVal = new Argument(Ty, Name);
+  }
 
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
 }
 
-Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
-                                          LocTy Loc) {
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty, LocTy Loc) {
   // Look this name up in the normal function symbol table.
   Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
 
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
   if (!Val) {
-    std::map<unsigned, std::pair<Value*, LocTy> >::iterator
-      I = ForwardRefValIDs.find(ID);
+    auto I = ForwardRefValIDs.find(ID);
     if (I != ForwardRefValIDs.end())
       Val = I->second.first;
   }
@@ -2263,10 +2392,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
 
   // Otherwise, create a new forward reference for this value and remember it.
   Value *FwdVal;
-  if (Ty->isLabelTy())
+  if (Ty->isLabelTy()) {
     FwdVal = BasicBlock::Create(F.getContext(), "", &F);
-  else
+  } else {
     FwdVal = new Argument(Ty);
+  }
 
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
@@ -2295,14 +2425,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
       return P.Error(NameLoc, "instruction expected to be numbered '%" +
                      Twine(NumberedVals.size()) + "'");
 
-    std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
-      ForwardRefValIDs.find(NameID);
+    auto FI = ForwardRefValIDs.find(NameID);
     if (FI != ForwardRefValIDs.end()) {
-      if (FI->second.first->getType() != Inst->getType())
+      Value *Sentinel = FI->second.first;
+      if (Sentinel->getType() != Inst->getType())
         return P.Error(NameLoc, "instruction forward referenced with type '" +
                        getTypeString(FI->second.first->getType()) + "'");
-      FI->second.first->replaceAllUsesWith(Inst);
-      delete FI->second.first;
+
+      Sentinel->replaceAllUsesWith(Inst);
+      delete Sentinel;
       ForwardRefValIDs.erase(FI);
     }
 
@@ -2311,14 +2442,15 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
   }
 
   // Otherwise, the instruction had a name.  Resolve forward refs and set it.
-  std::map<std::string, std::pair<Value*, LocTy> >::iterator
-    FI = ForwardRefVals.find(NameStr);
+  auto FI = ForwardRefVals.find(NameStr);
   if (FI != ForwardRefVals.end()) {
-    if (FI->second.first->getType() != Inst->getType())
+    Value *Sentinel = FI->second.first;
+    if (Sentinel->getType() != Inst->getType())
       return P.Error(NameLoc, "instruction forward referenced with type '" +
                      getTypeString(FI->second.first->getType()) + "'");
-    FI->second.first->replaceAllUsesWith(Inst);
-    delete FI->second.first;
+
+    Sentinel->replaceAllUsesWith(Inst);
+    delete Sentinel;
     ForwardRefVals.erase(FI);
   }
 
@@ -2421,6 +2553,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   case lltok::kw_null: ID.Kind = ValID::t_Null; break;
   case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
   case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+  case lltok::kw_none: ID.Kind = ValID::t_None; break;
 
   case lltok::lbrace: {
     // ValID ::= '{' ConstVector '}'
@@ -2430,9 +2563,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
 
-    ID.ConstantStructElts = new Constant*[Elts.size()];
+    ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
     ID.UIntVal = Elts.size();
-    memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+    memcpy(ID.ConstantStructElts.get(), Elts.data(),
+           Elts.size() * sizeof(Elts[0]));
     ID.Kind = ValID::t_ConstantStruct;
     return false;
   }
@@ -2451,8 +2585,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (isPackedStruct) {
-      ID.ConstantStructElts = new Constant*[Elts.size()];
-      memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+      ID.ConstantStructElts = make_unique<Constant *[]>(Elts.size());
+      memcpy(ID.ConstantStructElts.get(), Elts.data(),
+             Elts.size() * sizeof(Elts[0]));
       ID.UIntVal = Elts.size();
       ID.Kind = ValID::t_PackedConstantStruct;
       return false;
@@ -2891,7 +3026,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         }
       }
 
-      SmallPtrSet<const Type*, 4> Visited;
+      SmallPtrSet<Type*, 4> Visited;
       if (!Indices.empty() && !Ty->isSized(&Visited))
         return Error(ID.Loc, "base element of getelementptr must be sized");
 
@@ -3066,6 +3201,11 @@ struct DwarfTagField : public MDUnsignedField {
   DwarfTagField(dwarf::Tag DefaultTag)
       : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {}
 };
+struct DwarfMacinfoTypeField : public MDUnsignedField {
+  DwarfMacinfoTypeField() : MDUnsignedField(0, dwarf::DW_MACINFO_vendor_ext) {}
+  DwarfMacinfoTypeField(dwarf::MacinfoRecordType DefaultType)
+    : MDUnsignedField(DefaultType, dwarf::DW_MACINFO_vendor_ext) {}
+};
 struct DwarfAttEncodingField : public MDUnsignedField {
   DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {}
 };
@@ -3157,6 +3297,26 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) {
   return false;
 }
 
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            DwarfMacinfoTypeField &Result) {
+  if (Lex.getKind() == lltok::APSInt)
+    return ParseMDField(Loc, Name, static_cast<MDUnsignedField &>(Result));
+
+  if (Lex.getKind() != lltok::DwarfMacinfo)
+    return TokError("expected DWARF macinfo type");
+
+  unsigned Macinfo = dwarf::getMacinfo(Lex.getStrVal());
+  if (Macinfo == dwarf::DW_MACINFO_invalid)
+    return TokError(
+        "invalid DWARF macinfo type" + Twine(" '") + Lex.getStrVal() + "'");
+  assert(Macinfo <= Result.Max && "Expected valid DWARF macinfo type");
+
+  Result.assign(Macinfo);
+  Lex.Lex();
+  return false;
+}
+
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
                             DwarfVirtualityField &Result) {
@@ -3569,8 +3729,11 @@ bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
 ///                      isOptimized: true, flags: "-O2", runtimeVersion: 1,
 ///                      splitDebugFilename: "abc.debug", emissionKind: 1,
 ///                      enums: !1, retainedTypes: !2, subprograms: !3,
-///                      globals: !4, imports: !5, dwoId: 0x0abcd)
+///                      globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd)
 bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
+  if (!IsDistinct)
+    return Lex.Error("missing 'distinct', required for !DICompileUnit");
+
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(language, DwarfLangField, );                                        \
   REQUIRED(file, MDField, (/* AllowNull */ false));                            \
@@ -3585,16 +3748,16 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(subprograms, MDField, );                                            \
   OPTIONAL(globals, MDField, );                                                \
   OPTIONAL(imports, MDField, );                                                \
+  OPTIONAL(macros, MDField, );                                                 \
   OPTIONAL(dwoId, MDUnsignedField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(DICompileUnit,
-                           (Context, language.Val, file.Val, producer.Val,
-                            isOptimized.Val, flags.Val, runtimeVersion.Val,
-                            splitDebugFilename.Val, emissionKind.Val, enums.Val,
-                            retainedTypes.Val, subprograms.Val, globals.Val,
-                            imports.Val, dwoId.Val));
+  Result = DICompileUnit::getDistinct(
+      Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
+      runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
+      retainedTypes.Val, subprograms.Val, globals.Val, imports.Val, macros.Val,
+      dwoId.Val);
   return false;
 }
 
@@ -3604,9 +3767,10 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
 ///                     isDefinition: true, scopeLine: 8, containingType: !3,
 ///                     virtuality: DW_VIRTUALTIY_pure_virtual,
 ///                     virtualIndex: 10, flags: 11,
-///                     isOptimized: false, function: void ()* @_Z3foov,
-///                     templateParams: !4, declaration: !5, variables: !6)
+///                     isOptimized: false, templateParams: !4, declaration: !5,
+///                     variables: !6)
 bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
+  auto Loc = Lex.getLoc();
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(scope, MDField, );                                                  \
   OPTIONAL(name, MDStringField, );                                             \
@@ -3622,19 +3786,23 @@ bool LLParser::ParseDISubprogram(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(virtualIndex, MDUnsignedField, (0, UINT32_MAX));                    \
   OPTIONAL(flags, DIFlagField, );                                              \
   OPTIONAL(isOptimized, MDBoolField, );                                        \
-  OPTIONAL(function, MDConstant, );                                            \
   OPTIONAL(templateParams, MDField, );                                         \
   OPTIONAL(declaration, MDField, );                                            \
   OPTIONAL(variables, MDField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
+  if (isDefinition.Val && !IsDistinct)
+    return Lex.Error(
+        Loc,
+        "missing 'distinct', required for !DISubprogram when 'isDefinition'");
+
   Result = GET_OR_DISTINCT(
-      DISubprogram, (Context, scope.Val, name.Val, linkageName.Val, file.Val,
-                     line.Val, type.Val, isLocal.Val, isDefinition.Val,
-                     scopeLine.Val, containingType.Val, virtuality.Val,
-                     virtualIndex.Val, flags.Val, isOptimized.Val, function.Val,
-                     templateParams.Val, declaration.Val, variables.Val));
+      DISubprogram,
+      (Context, scope.Val, name.Val, linkageName.Val, file.Val, line.Val,
+       type.Val, isLocal.Val, isDefinition.Val, scopeLine.Val,
+       containingType.Val, virtuality.Val, virtualIndex.Val, flags.Val,
+       isOptimized.Val, templateParams.Val, declaration.Val, variables.Val));
   return false;
 }
 
@@ -3685,6 +3853,39 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
   return false;
 }
 
+/// ParseDIMacro:
+///   ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value: "SomeValue")
+bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(type, DwarfMacinfoTypeField, );                                     \
+  REQUIRED(line, LineField, );                                                 \
+  REQUIRED(name, MDStringField, );                                             \
+  OPTIONAL(value, MDStringField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIMacro,
+                           (Context, type.Val, line.Val, name.Val, value.Val));
+  return false;
+}
+
+/// ParseDIMacroFile:
+///   ::= !DIMacroFile(line: 9, file: !2, nodes: !3)
+bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file));       \
+  REQUIRED(line, LineField, );                                                 \
+  REQUIRED(file, MDField, );                                                   \
+  OPTIONAL(nodes, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result = GET_OR_DISTINCT(DIMacroFile,
+                           (Context, type.Val, line.Val, file.Val, nodes.Val));
+  return false;
+}
+
+
 /// ParseDIModule:
 ///   ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG",
 ///                 includePath: "/usr/include", isysroot: "/")
@@ -3762,24 +3963,25 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
 }
 
 /// ParseDILocalVariable:
-///   ::= !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0, name: "foo",
+///   ::= !DILocalVariable(arg: 7, scope: !0, name: "foo",
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
+///   ::= !DILocalVariable(scope: !0, name: "foo",
 ///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
 bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
-  REQUIRED(tag, DwarfTagField, );                                              \
   REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
   OPTIONAL(name, MDStringField, );                                             \
+  OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX));                             \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(type, MDField, );                                                   \
-  OPTIONAL(arg, MDUnsignedField, (0, UINT16_MAX));                             \
   OPTIONAL(flags, DIFlagField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DILocalVariable,
-                           (Context, tag.Val, scope.Val, name.Val, file.Val,
-                            line.Val, type.Val, arg.Val, flags.Val));
+                           (Context, scope.Val, name.Val, file.Val, line.Val,
+                            type.Val, arg.Val, flags.Val));
   return false;
 }
 
@@ -3969,13 +4171,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
     V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
     return V == nullptr;
   case ValID::t_InlineAsm: {
-    PointerType *PTy = dyn_cast<PointerType>(Ty);
-    FunctionType *FTy =
-      PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : nullptr;
-    if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+    if (!ID.FTy || !InlineAsm::Verify(ID.FTy, ID.StrVal2))
       return Error(ID.Loc, "invalid type for inline asm constraint string");
-    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1,
-                       (ID.UIntVal>>1)&1, (InlineAsm::AsmDialect(ID.UIntVal>>2)));
+    V = InlineAsm::get(ID.FTy, ID.StrVal, ID.StrVal2, ID.UIntVal & 1,
+                       (ID.UIntVal >> 1) & 1,
+                       (InlineAsm::AsmDialect(ID.UIntVal >> 2)));
     return false;
   }
   case ValID::t_GlobalName:
@@ -4035,6 +4235,11 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
       return Error(ID.Loc, "invalid type for null constant");
     V = Constant::getNullValue(Ty);
     return false;
+  case ValID::t_None:
+    if (!Ty->isTokenTy())
+      return Error(ID.Loc, "invalid type for none constant");
+    V = Constant::getNullValue(Ty);
+    return false;
   case ValID::t_Constant:
     if (ID.ConstantVal->getType() != Ty)
       return Error(ID.Loc, "constant expression type mismatch");
@@ -4056,8 +4261,8 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
           return Error(ID.Loc, "element " + Twine(i) +
                     " of struct initializer doesn't match struct element type");
 
-      V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts,
-                                               ID.UIntVal));
+      V = ConstantStruct::get(
+          ST, makeArrayRef(ID.ConstantStructElts.get(), ID.UIntVal));
     } else
       return Error(ID.Loc, "constant expression type mismatch");
     return false;
@@ -4065,11 +4270,35 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
   llvm_unreachable("Invalid ValID");
 }
 
+bool LLParser::parseConstantValue(Type *Ty, Constant *&C) {
+  C = nullptr;
+  ValID ID;
+  auto Loc = Lex.getLoc();
+  if (ParseValID(ID, /*PFS=*/nullptr))
+    return true;
+  switch (ID.Kind) {
+  case ValID::t_APSInt:
+  case ValID::t_APFloat:
+  case ValID::t_Undef:
+  case ValID::t_Constant:
+  case ValID::t_ConstantStruct:
+  case ValID::t_PackedConstantStruct: {
+    Value *V;
+    if (ConvertValIDToValue(Ty, ID, V, /*PFS=*/nullptr))
+      return true;
+    assert(isa<Constant>(V) && "Expected a constant value");
+    C = cast<Constant>(V);
+    return false;
+  }
+  default:
+    return Error(Loc, "expected a constant value");
+  }
+}
+
 bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
   V = nullptr;
   ValID ID;
-  return ParseValID(ID, PFS) ||
-         ConvertValIDToValue(Ty, ID, V, PFS);
+  return ParseValID(ID, PFS) || ConvertValIDToValue(Ty, ID, V, PFS);
 }
 
 bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
@@ -4242,8 +4471,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (!FunctionName.empty()) {
     // If this was a definition of a forward reference, remove the definition
     // from the forward reference table and fill in the forward ref.
-    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
-      ForwardRefVals.find(FunctionName);
+    auto FRVI = ForwardRefVals.find(FunctionName);
     if (FRVI != ForwardRefVals.end()) {
       Fn = M->getFunction(FunctionName);
       if (!Fn)
@@ -4265,8 +4493,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   } else {
     // If this is a definition of a forward referenced function, make sure the
     // types agree.
-    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
-      = ForwardRefValIDs.find(NumberedVals.size());
+    auto I = ForwardRefValIDs.find(NumberedVals.size());
     if (I != ForwardRefValIDs.end()) {
       Fn = cast<Function>(I->second.first);
       if (Fn->getType() != PFT)
@@ -4498,6 +4725,11 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_indirectbr:  return ParseIndirectBr(Inst, PFS);
   case lltok::kw_invoke:      return ParseInvoke(Inst, PFS);
   case lltok::kw_resume:      return ParseResume(Inst, PFS);
+  case lltok::kw_cleanupret:  return ParseCleanupRet(Inst, PFS);
+  case lltok::kw_catchret:    return ParseCatchRet(Inst, PFS);
+  case lltok::kw_catchswitch: return ParseCatchSwitch(Inst, PFS);
+  case lltok::kw_catchpad:    return ParseCatchPad(Inst, PFS);
+  case lltok::kw_cleanuppad:  return ParseCleanupPad(Inst, PFS);
   // Binary Operators.
   case lltok::kw_add:
   case lltok::kw_sub:
@@ -4580,6 +4812,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_call:     return ParseCall(Inst, PFS, CallInst::TCK_None);
   case lltok::kw_tail:     return ParseCall(Inst, PFS, CallInst::TCK_Tail);
   case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail);
+  case lltok::kw_notail:   return ParseCall(Inst, PFS, CallInst::TCK_NoTail);
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
   case lltok::kw_load:           return ParseLoad(Inst, PFS);
@@ -4798,15 +5031,15 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
+  SmallVector<OperandBundleDef, 2> BundleList;
 
   BasicBlock *NormalBB, *UnwindBB;
-  if (ParseOptionalCallingConv(CC) ||
-      ParseOptionalReturnAttrs(RetAttrs) ||
+  if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
-      ParseValID(CalleeID) ||
-      ParseParameterList(ArgList, PFS) ||
+      ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) ||
       ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
                                  NoBuiltinLoc) ||
+      ParseOptionalOperandBundles(BundleList, PFS) ||
       ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
       ParseTypeAndBasicBlock(NormalBB, PFS) ||
       ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
@@ -4829,6 +5062,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
     Ty = FunctionType::get(RetType, ParamTypes, false);
   }
 
+  CalleeID.FTy = Ty;
+
   // Look up the callee.
   Value *Callee;
   if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
@@ -4880,7 +5115,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Finish off the Attribute and check them
   AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
-  InvokeInst *II = InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args);
+  InvokeInst *II =
+      InvokeInst::Create(Ty, Callee, NormalBB, UnwindBB, Args, BundleList);
   II->setCallingConv(CC);
   II->setAttributes(PAL);
   ForwardRefAttrGroups[II] = FwdRefAttrGrps;
@@ -4900,6 +5136,183 @@ bool LLParser::ParseResume(Instruction *&Inst, PerFunctionState &PFS) {
   return false;
 }
 
+bool LLParser::ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+                                  PerFunctionState &PFS) {
+  if (ParseToken(lltok::lsquare, "expected '[' in catchpad/cleanuppad"))
+    return true;
+
+  while (Lex.getKind() != lltok::rsquare) {
+    // If this isn't the first argument, we need a comma.
+    if (!Args.empty() &&
+        ParseToken(lltok::comma, "expected ',' in argument list"))
+      return true;
+
+    // Parse the argument.
+    LocTy ArgLoc;
+    Type *ArgTy = nullptr;
+    if (ParseType(ArgTy, ArgLoc))
+      return true;
+
+    Value *V;
+    if (ArgTy->isMetadataTy()) {
+      if (ParseMetadataAsValue(V, PFS))
+        return true;
+    } else {
+      if (ParseValue(ArgTy, V, PFS))
+        return true;
+    }
+    Args.push_back(V);
+  }
+
+  Lex.Lex();  // Lex the ']'.
+  return false;
+}
+
+/// ParseCleanupRet
+///   ::= 'cleanupret' from Value unwind ('to' 'caller' | TypeAndValue)
+bool LLParser::ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *CleanupPad = nullptr;
+
+  if (ParseToken(lltok::kw_from, "expected 'from' after cleanupret"))
+    return true;
+
+  if (ParseValue(Type::getTokenTy(Context), CleanupPad, PFS))
+    return true;
+
+  if (ParseToken(lltok::kw_unwind, "expected 'unwind' in cleanupret"))
+    return true;
+
+  BasicBlock *UnwindBB = nullptr;
+  if (Lex.getKind() == lltok::kw_to) {
+    Lex.Lex();
+    if (ParseToken(lltok::kw_caller, "expected 'caller' in cleanupret"))
+      return true;
+  } else {
+    if (ParseTypeAndBasicBlock(UnwindBB, PFS)) {
+      return true;
+    }
+  }
+
+  Inst = CleanupReturnInst::Create(CleanupPad, UnwindBB);
+  return false;
+}
+
+/// ParseCatchRet
+///   ::= 'catchret' from Parent Value 'to' TypeAndValue
+bool LLParser::ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *CatchPad = nullptr;
+
+  if (ParseToken(lltok::kw_from, "expected 'from' after catchret"))
+    return true;
+
+  if (ParseValue(Type::getTokenTy(Context), CatchPad, PFS))
+    return true;
+
+  BasicBlock *BB;
+  if (ParseToken(lltok::kw_to, "expected 'to' in catchret") ||
+      ParseTypeAndBasicBlock(BB, PFS))
+      return true;
+
+  Inst = CatchReturnInst::Create(CatchPad, BB);
+  return false;
+}
+
+/// ParseCatchSwitch
+///   ::= 'catchswitch' within Parent
+bool LLParser::ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *ParentPad;
+  LocTy BBLoc;
+
+  if (ParseToken(lltok::kw_within, "expected 'within' after catchswitch"))
+    return true;
+
+  if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
+      Lex.getKind() != lltok::LocalVarID)
+    return TokError("expected scope value for catchswitch");
+
+  if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+    return true;
+
+  if (ParseToken(lltok::lsquare, "expected '[' with catchswitch labels"))
+    return true;
+
+  SmallVector<BasicBlock *, 32> Table;
+  do {
+    BasicBlock *DestBB;
+    if (ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    Table.push_back(DestBB);
+  } while (EatIfPresent(lltok::comma));
+
+  if (ParseToken(lltok::rsquare, "expected ']' after catchswitch labels"))
+    return true;
+
+  if (ParseToken(lltok::kw_unwind,
+                 "expected 'unwind' after catchswitch scope"))
+    return true;
+
+  BasicBlock *UnwindBB = nullptr;
+  if (EatIfPresent(lltok::kw_to)) {
+    if (ParseToken(lltok::kw_caller, "expected 'caller' in catchswitch"))
+      return true;
+  } else {
+    if (ParseTypeAndBasicBlock(UnwindBB, PFS))
+      return true;
+  }
+
+  auto *CatchSwitch =
+      CatchSwitchInst::Create(ParentPad, UnwindBB, Table.size());
+  for (BasicBlock *DestBB : Table)
+    CatchSwitch->addHandler(DestBB);
+  Inst = CatchSwitch;
+  return false;
+}
+
+/// ParseCatchPad
+///   ::= 'catchpad' ParamList 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *CatchSwitch = nullptr;
+
+  if (ParseToken(lltok::kw_within, "expected 'within' after catchpad"))
+    return true;
+
+  if (Lex.getKind() != lltok::LocalVar && Lex.getKind() != lltok::LocalVarID)
+    return TokError("expected scope value for catchpad");
+
+  if (ParseValue(Type::getTokenTy(Context), CatchSwitch, PFS))
+    return true;
+
+  SmallVector<Value *, 8> Args;
+  if (ParseExceptionArgs(Args, PFS))
+    return true;
+
+  Inst = CatchPadInst::Create(CatchSwitch, Args);
+  return false;
+}
+
+/// ParseCleanupPad
+///   ::= 'cleanuppad' within Parent ParamList
+bool LLParser::ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *ParentPad = nullptr;
+
+  if (ParseToken(lltok::kw_within, "expected 'within' after cleanuppad"))
+    return true;
+
+  if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar &&
+      Lex.getKind() != lltok::LocalVarID)
+    return TokError("expected scope value for cleanuppad");
+
+  if (ParseValue(Type::getTokenTy(Context), ParentPad, PFS))
+    return true;
+
+  SmallVector<Value *, 8> Args;
+  if (ParseExceptionArgs(Args, PFS))
+    return true;
+
+  Inst = CleanupPadInst::Create(ParentPad, Args);
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Binary Operators.
 //===----------------------------------------------------------------------===//
@@ -5196,12 +5609,14 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
 }
 
 /// ParseCall
-///   ::= 'call' OptionalCallingConv OptionalAttrs Type Value
-///       ParameterList OptionalAttrs
-///   ::= 'tail' 'call' OptionalCallingConv OptionalAttrs Type Value
-///       ParameterList OptionalAttrs
-///   ::= 'musttail' 'call' OptionalCallingConv OptionalAttrs Type Value
-///       ParameterList OptionalAttrs
+///   ::= 'call' OptionalFastMathFlags OptionalCallingConv
+///           OptionalAttrs Type Value ParameterList OptionalAttrs
+///   ::= 'tail' 'call' OptionalFastMathFlags OptionalCallingConv
+///           OptionalAttrs Type Value ParameterList OptionalAttrs
+///   ::= 'musttail' 'call' OptionalFastMathFlags OptionalCallingConv
+///           OptionalAttrs Type Value ParameterList OptionalAttrs
+///   ::= 'notail' 'call'  OptionalFastMathFlags OptionalCallingConv
+///           OptionalAttrs Type Value ParameterList OptionalAttrs
 bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          CallInst::TailCallKind TCK) {
   AttrBuilder RetAttrs, FnAttrs;
@@ -5212,20 +5627,29 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
+  SmallVector<OperandBundleDef, 2> BundleList;
   LocTy CallLoc = Lex.getLoc();
 
-  if ((TCK != CallInst::TCK_None &&
-       ParseToken(lltok::kw_call, "expected 'tail call'")) ||
-      ParseOptionalCallingConv(CC) ||
-      ParseOptionalReturnAttrs(RetAttrs) ||
+  if (TCK != CallInst::TCK_None &&
+      ParseToken(lltok::kw_call,
+                 "expected 'tail call', 'musttail call', or 'notail call'"))
+    return true;
+
+  FastMathFlags FMF = EatFastMathFlagsIfPresent();
+
+  if (ParseOptionalCallingConv(CC) || ParseOptionalReturnAttrs(RetAttrs) ||
       ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
       ParseValID(CalleeID) ||
       ParseParameterList(ArgList, PFS, TCK == CallInst::TCK_MustTail,
                          PFS.getFunction().isVarArg()) ||
-      ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false,
-                                 BuiltinLoc))
+      ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, BuiltinLoc) ||
+      ParseOptionalOperandBundles(BundleList, PFS))
     return true;
 
+  if (FMF.any() && !RetType->isFPOrFPVectorTy())
+    return Error(CallLoc, "fast-math-flags specified for call without "
+                          "floating-point scalar or vector return type");
+
   // If RetType is a non-function pointer type, then this is the short syntax
   // for the call, which means that RetType is just the return type.  Infer the
   // rest of the function argument types from the arguments that are present.
@@ -5242,6 +5666,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
     Ty = FunctionType::get(RetType, ParamTypes, false);
   }
 
+  CalleeID.FTy = Ty;
+
   // Look up the callee.
   Value *Callee;
   if (ConvertValIDToValue(PointerType::getUnqual(Ty), CalleeID, Callee, &PFS))
@@ -5293,9 +5719,11 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // Finish off the Attribute and check them
   AttributeSet PAL = AttributeSet::get(Context, Attrs);
 
-  CallInst *CI = CallInst::Create(Ty, Callee, Args);
+  CallInst *CI = CallInst::Create(Ty, Callee, Args, BundleList);
   CI->setTailCallKind(TCK);
   CI->setCallingConv(CC);
+  if (FMF.any())
+    CI->setFastMathFlags(FMF);
   CI->setAttributes(PAL);
   ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
   Inst = CI;
@@ -5614,7 +6042,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
     Indices.push_back(Val);
   }
 
-  SmallPtrSet<const Type*, 4> Visited;
+  SmallPtrSet<Type*, 4> Visited;
   if (!Indices.empty() && !Ty->isSized(&Visited))
     return Error(Loc, "base element of getelementptr must be sized");
 
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 6e57b3e0667d..f61a5e5e3a38 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -46,29 +46,32 @@ namespace llvm {
   /// or a symbolic (%var) reference.  This is just a discriminated union.
   struct ValID {
     enum {
-      t_LocalID, t_GlobalID,      // ID in UIntVal.
-      t_LocalName, t_GlobalName,  // Name in StrVal.
-      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
-      t_Null, t_Undef, t_Zero,    // No value.
-      t_EmptyArray,               // No value:  []
-      t_Constant,                 // Value in ConstantVal.
-      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
-      t_ConstantStruct,           // Value in ConstantStructElts.
-      t_PackedConstantStruct      // Value in ConstantStructElts.
-    } Kind;
+      t_LocalID, t_GlobalID,           // ID in UIntVal.
+      t_LocalName, t_GlobalName,       // Name in StrVal.
+      t_APSInt, t_APFloat,             // Value in APSIntVal/APFloatVal.
+      t_Null, t_Undef, t_Zero, t_None, // No value.
+      t_EmptyArray,                    // No value:  []
+      t_Constant,                      // Value in ConstantVal.
+      t_InlineAsm,                     // Value in FTy/StrVal/StrVal2/UIntVal.
+      t_ConstantStruct,                // Value in ConstantStructElts.
+      t_PackedConstantStruct           // Value in ConstantStructElts.
+    } Kind = t_LocalID;
 
     LLLexer::LocTy Loc;
     unsigned UIntVal;
+    FunctionType *FTy = nullptr;
     std::string StrVal, StrVal2;
     APSInt APSIntVal;
-    APFloat APFloatVal;
+    APFloat APFloatVal{0.0};
     Constant *ConstantVal;
-    Constant **ConstantStructElts;
+    std::unique_ptr<Constant *[]> ConstantStructElts;
 
-    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
-    ~ValID() {
-      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
-        delete [] ConstantStructElts;
+    ValID() = default;
+    ValID(const ValID &RHS)
+        : Kind(RHS.Kind), Loc(RHS.Loc), UIntVal(RHS.UIntVal), FTy(RHS.FTy),
+          StrVal(RHS.StrVal), StrVal2(RHS.StrVal2), APSIntVal(RHS.APSIntVal),
+          APFloatVal(RHS.APFloatVal), ConstantVal(RHS.ConstantVal) {
+      assert(!RHS.ConstantStructElts);
     }
 
     bool operator<(const ValID &RHS) const {
@@ -143,6 +146,8 @@ namespace llvm {
           Slots(Slots), BlockAddressPFS(nullptr) {}
     bool Run();
 
+    bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
+
     LLVMContext &getContext() { return Context; }
 
   private:
@@ -154,6 +159,10 @@ namespace llvm {
       return Error(Lex.getLoc(), Msg);
     }
 
+    /// Restore the internal name and slot mappings using the mappings that
+    /// were created at an earlier parsing stage.
+    void restoreParsingState(const SlotMapping *Slots);
+
     /// GetGlobalVal - Get a value with the specified name or ID, creating a
     /// forward reference record if needed.  This can return null if the value
     /// exists but does not have the right type.
@@ -210,6 +219,8 @@ namespace llvm {
       return ParseUInt64(Val);
     }
 
+    bool ParseStringAttribute(AttrBuilder &B);
+
     bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
     bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
     bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
@@ -343,10 +354,12 @@ namespace llvm {
     bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
                              PerFunctionState *PFS);
 
+    bool parseConstantValue(Type *Ty, Constant *&C);
     bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
     bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
       return ParseValue(Ty, V, &PFS);
     }
+
     bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
                     PerFunctionState &PFS) {
       Loc = Lex.getLoc();
@@ -381,6 +394,13 @@ namespace llvm {
                             bool IsMustTailCall = false,
                             bool InVarArgsFunc = false);
 
+    bool
+    ParseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
+                                PerFunctionState &PFS);
+
+    bool ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
+                            PerFunctionState &PFS);
+
     // Constant Parsing.
     bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
     bool ParseGlobalValue(Type *Ty, Constant *&V);
@@ -441,6 +461,11 @@ namespace llvm {
     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
 
     bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
                          unsigned OperandType);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 691f085f0c9f..29a7f16d3c20 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -49,10 +49,14 @@ namespace lltok {
     kw_external, kw_thread_local,
     kw_localdynamic, kw_initialexec, kw_localexec,
     kw_zeroinitializer,
-    kw_undef, kw_null,
+    kw_undef, kw_null, kw_none,
     kw_to,
+    kw_caller,
+    kw_within,
+    kw_from,
     kw_tail,
     kw_musttail,
+    kw_notail,
     kw_target,
     kw_triple,
     kw_unwind,
@@ -96,6 +100,9 @@ namespace lltok {
     kw_webkit_jscc, kw_anyregcc,
     kw_preserve_mostcc, kw_preserve_allcc,
     kw_ghccc,
+    kw_x86_intrcc,
+    kw_hhvmcc, kw_hhvm_ccc,
+    kw_cxx_fast_tlscc,
 
     // Attributes:
     kw_attributes,
@@ -109,6 +116,8 @@ namespace lltok {
     kw_convergent,
     kw_dereferenceable,
     kw_dereferenceable_or_null,
+    kw_inaccessiblememonly,
+    kw_inaccessiblemem_or_argmemonly,
     kw_inlinehint,
     kw_inreg,
     kw_jumptable,
@@ -121,6 +130,7 @@ namespace lltok {
     kw_noduplicate,
     kw_noimplicitfloat,
     kw_noinline,
+    kw_norecurse,
     kw_nonlazybind,
     kw_nonnull,
     kw_noredzone,
@@ -177,7 +187,8 @@ namespace lltok {
     kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter,
 
     kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_resume,
-    kw_unreachable,
+    kw_unreachable, kw_cleanupret, kw_catchswitch, kw_catchret, kw_catchpad,
+    kw_cleanuppad,
 
     kw_alloca, kw_load, kw_store, kw_fence, kw_cmpxchg, kw_atomicrmw,
     kw_getelementptr,
@@ -209,6 +220,7 @@ namespace lltok {
     DwarfLang,         // DW_LANG_foo
     DwarfOp,           // DW_OP_foo
     DIFlag,            // DIFlagFoo
+    DwarfMacinfo,      // DW_MACINFO_foo
 
     // Type valued tokens (TyVal).
     Type,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index 9145a54f2a7b..4e55e62ecf5c 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -66,3 +66,15 @@ std::unique_ptr<Module> llvm::parseAssemblyString(StringRef AsmString,
   MemoryBufferRef F(AsmString, "<string>");
   return parseAssembly(F, Err, Context, Slots);
 }
+
+Constant *llvm::parseConstantValue(StringRef Asm, SMDiagnostic &Err,
+                                   const Module &M, const SlotMapping *Slots) {
+  SourceMgr SM;
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Asm);
+  SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
+  Constant *C;
+  if (LLParser(Asm, SM, Err, const_cast<Module *>(&M))
+          .parseStandaloneConstantValue(C, Slots))
+    return nullptr;
+  return C;
+}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 289c76e85b4b..385c18a40006 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/BitReader.h"
+#include "llvm-c/Core.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
@@ -22,12 +23,25 @@ using namespace llvm;
 /* Builds a module from the bitcode in the specified memory buffer, returning a
    reference to the module via the OutModule parameter. Returns 0 on success.
    Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
-                          LLVMModuleRef *OutModule, char **OutMessage) {
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutModule,
+                          char **OutMessage) {
   return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
                                    OutMessage);
 }
 
+LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf,
+                           LLVMModuleRef *OutModule) {
+  return LLVMParseBitcodeInContext2(wrap(&getGlobalContext()), MemBuf,
+                                    OutModule);
+}
+
+static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
+  auto *Message = reinterpret_cast<std::string *>(C);
+  raw_string_ostream Stream(*Message);
+  DiagnosticPrinterRawOStream DP(Stream);
+  DI.print(DP);
+}
+
 LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
                                    LLVMMemoryBufferRef MemBuf,
                                    LLVMModuleRef *OutModule,
@@ -35,18 +49,36 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
   MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
   LLVMContext &Ctx = *unwrap(ContextRef);
 
+  LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+      Ctx.getDiagnosticHandler();
+  void *OldDiagnosticContext = Ctx.getDiagnosticContext();
   std::string Message;
-  raw_string_ostream Stream(Message);
-  DiagnosticPrinterRawOStream DP(Stream);
+  Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
+
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+
+  Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
 
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(
-      Buf, Ctx, [&](const DiagnosticInfo &DI) { DI.print(DP); });
   if (ModuleOrErr.getError()) {
-    if (OutMessage) {
-      Stream.flush();
+    if (OutMessage)
       *OutMessage = strdup(Message.c_str());
-    }
-    *OutModule = wrap((Module*)nullptr);
+    *OutModule = wrap((Module *)nullptr);
+    return 1;
+  }
+
+  *OutModule = wrap(ModuleOrErr.get().release());
+  return 0;
+}
+
+LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef,
+                                    LLVMMemoryBufferRef MemBuf,
+                                    LLVMModuleRef *OutModule) {
+  MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
+  LLVMContext &Ctx = *unwrap(ContextRef);
+
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+  if (ModuleOrErr.getError()) {
+    *OutModule = wrap((Module *)nullptr);
     return 1;
   }
 
@@ -59,26 +91,50 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
    Optionally returns a human-readable error message via OutMessage. */
 LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
                                        LLVMMemoryBufferRef MemBuf,
-                                       LLVMModuleRef *OutM,
-                                       char **OutMessage) {
+                                       LLVMModuleRef *OutM, char **OutMessage) {
+  LLVMContext &Ctx = *unwrap(ContextRef);
+  LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+      Ctx.getDiagnosticHandler();
+  void *OldDiagnosticContext = Ctx.getDiagnosticContext();
+
   std::string Message;
+  Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
   std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
 
   ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
-      getLazyBitcodeModule(std::move(Owner), *unwrap(ContextRef));
+      getLazyBitcodeModule(std::move(Owner), Ctx);
   Owner.release();
+  Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
 
-  if (std::error_code EC = ModuleOrErr.getError()) {
+  if (ModuleOrErr.getError()) {
     *OutM = wrap((Module *)nullptr);
     if (OutMessage)
-      *OutMessage = strdup(EC.message().c_str());
+      *OutMessage = strdup(Message.c_str());
     return 1;
   }
 
   *OutM = wrap(ModuleOrErr.get().release());
 
   return 0;
+}
 
+LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef,
+                                        LLVMMemoryBufferRef MemBuf,
+                                        LLVMModuleRef *OutM) {
+  LLVMContext &Ctx = *unwrap(ContextRef);
+  std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
+
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+      getLazyBitcodeModule(std::move(Owner), Ctx);
+  Owner.release();
+
+  if (ModuleOrErr.getError()) {
+    *OutM = wrap((Module *)nullptr);
+    return 1;
+  }
+
+  *OutM = wrap(ModuleOrErr.get().release());
+  return 0;
 }
 
 LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
@@ -87,20 +143,7 @@ LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
                                        OutMessage);
 }
 
-/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
-LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
-                                               LLVMMemoryBufferRef MemBuf,
-                                               LLVMModuleProviderRef *OutMP,
-                                               char **OutMessage) {
-  return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
-                                       reinterpret_cast<LLVMModuleRef*>(OutMP),
-                                       OutMessage);
-}
-
-/* Deprecated: Use LLVMGetBitcodeModule instead. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
-                                      LLVMModuleProviderRef *OutMP,
-                                      char **OutMessage) {
-  return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
-                                               OutMP, OutMessage);
+LLVMBool LLVMGetBitcodeModule2(LLVMMemoryBufferRef MemBuf,
+                               LLVMModuleRef *OutM) {
+  return LLVMGetBitcodeModuleInContext2(LLVMGetGlobalContext(), MemBuf, OutM);
 }
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index c04e8b9f1f37..2e670d584ecc 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -93,35 +94,35 @@ public:
   void resolveConstantForwardRefs();
 };
 
-class BitcodeReaderMDValueList {
+class BitcodeReaderMetadataList {
   unsigned NumFwdRefs;
   bool AnyFwdRefs;
   unsigned MinFwdRef;
   unsigned MaxFwdRef;
-  std::vector<TrackingMDRef> MDValuePtrs;
+  std::vector<TrackingMDRef> MetadataPtrs;
 
   LLVMContext &Context;
 public:
-  BitcodeReaderMDValueList(LLVMContext &C)
+  BitcodeReaderMetadataList(LLVMContext &C)
       : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
 
   // vector compatibility methods
-  unsigned size() const       { return MDValuePtrs.size(); }
-  void resize(unsigned N)     { MDValuePtrs.resize(N); }
-  void push_back(Metadata *MD) { MDValuePtrs.emplace_back(MD); }
-  void clear()                { MDValuePtrs.clear();  }
-  Metadata *back() const      { return MDValuePtrs.back(); }
-  void pop_back()             { MDValuePtrs.pop_back(); }
-  bool empty() const          { return MDValuePtrs.empty(); }
+  unsigned size() const { return MetadataPtrs.size(); }
+  void resize(unsigned N) { MetadataPtrs.resize(N); }
+  void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); }
+  void clear() { MetadataPtrs.clear(); }
+  Metadata *back() const { return MetadataPtrs.back(); }
+  void pop_back() { MetadataPtrs.pop_back(); }
+  bool empty() const { return MetadataPtrs.empty(); }
 
   Metadata *operator[](unsigned i) const {
-    assert(i < MDValuePtrs.size());
-    return MDValuePtrs[i];
+    assert(i < MetadataPtrs.size());
+    return MetadataPtrs[i];
   }
 
   void shrinkTo(unsigned N) {
     assert(N <= size() && "Invalid shrinkTo request!");
-    MDValuePtrs.resize(N);
+    MetadataPtrs.resize(N);
   }
 
   Metadata *getValueFwdRef(unsigned Idx);
@@ -131,17 +132,27 @@ public:
 
 class BitcodeReader : public GVMaterializer {
   LLVMContext &Context;
-  DiagnosticHandlerFunction DiagnosticHandler;
   Module *TheModule = nullptr;
   std::unique_ptr<MemoryBuffer> Buffer;
   std::unique_ptr<BitstreamReader> StreamFile;
   BitstreamCursor Stream;
+  // Next offset to start scanning for lazy parsing of function bodies.
   uint64_t NextUnreadBit = 0;
+  // Last function offset found in the VST.
+  uint64_t LastFunctionBlockBit = 0;
   bool SeenValueSymbolTable = false;
+  uint64_t VSTOffset = 0;
+  // Contains an arbitrary and optional string identifying the bitcode producer
+  std::string ProducerIdentification;
+  // Number of module level metadata records specified by the
+  // MODULE_CODE_METADATA_VALUES record.
+  unsigned NumModuleMDs = 0;
+  // Support older bitcode without the MODULE_CODE_METADATA_VALUES record.
+  bool SeenModuleValuesRecord = false;
 
   std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
-  BitcodeReaderMDValueList MDValueList;
+  BitcodeReaderMetadataList MetadataList;
   std::vector<Comdat *> ComdatList;
   SmallVector<Instruction *, 64> InstructionList;
 
@@ -157,7 +168,7 @@ class BitcodeReader : public GVMaterializer {
   /// is thus not represented here.  As such all indices are off by one.
   std::vector<AttributeSet> MAttributes;
 
-  /// \brief The set of attribute groups.
+  /// The set of attribute groups.
   std::map<unsigned, AttributeSet> MAttributeGroups;
 
   /// While parsing a function body, this is a list of the basic blocks for the
@@ -208,23 +219,24 @@ class BitcodeReader : public GVMaterializer {
   /// (e.g.) blockaddress forward references.
   bool WillMaterializeAllForwardRefs = false;
 
-  /// Functions that have block addresses taken.  This is usually empty.
-  SmallPtrSet<const Function *, 4> BlockAddressesTaken;
-
   /// True if any Metadata block has been materialized.
   bool IsMetadataMaterialized = false;
 
   bool StripDebugInfo = false;
 
+  /// Functions that need to be matched with subprograms when upgrading old
+  /// metadata.
+  SmallDenseMap<Function *, DISubprogram *, 16> FunctionsWithSPs;
+
+  std::vector<std::string> BundleTags;
+
 public:
   std::error_code error(BitcodeError E, const Twine &Message);
   std::error_code error(BitcodeError E);
   std::error_code error(const Twine &Message);
 
-  BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
-                DiagnosticHandlerFunction DiagnosticHandler);
-  BitcodeReader(LLVMContext &Context,
-                DiagnosticHandlerFunction DiagnosticHandler);
+  BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context);
+  BitcodeReader(LLVMContext &Context);
   ~BitcodeReader() override { freeState(); }
 
   std::error_code materializeForwardReferencedFunctions();
@@ -233,11 +245,9 @@ public:
 
   void releaseBuffer();
 
-  bool isDematerializable(const GlobalValue *GV) const override;
   std::error_code materialize(GlobalValue *GV) override;
-  std::error_code materializeModule(Module *M) override;
+  std::error_code materializeModule() override;
   std::vector<StructType *> getIdentifiedStructTypes() const override;
-  void dematerialize(GlobalValue *GV) override;
 
   /// \brief Main interface to parsing a bitcode buffer.
   /// \returns true if an error occurred.
@@ -249,6 +259,9 @@ public:
   /// \returns true if an error occurred.
   ErrorOr<std::string> parseTriple();
 
+  /// Cheap mechanism to just extract the identification block out of bitcode.
+  ErrorOr<std::string> parseIdentificationBlock();
+
   static uint64_t decodeSignRotatedValue(uint64_t V);
 
   /// Materialize any deferred Metadata block.
@@ -256,7 +269,20 @@ public:
 
   void setStripDebugInfo() override;
 
+  /// Save the mapping between the metadata values and the corresponding
+  /// value id that were recorded in the MetadataList during parsing. If
+  /// OnlyTempMD is true, then only record those entries that are still
+  /// temporary metadata. This interface is used when metadata linking is
+  /// performed as a postpass, such as during function importing.
+  void saveMetadataList(DenseMap<const Metadata *, unsigned> &MetadataToIDs,
+                        bool OnlyTempMD) override;
+
 private:
+  /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
+  // ProducerIdentification data member, and do some basic enforcement on the
+  // "epoch" encoded in the bitcode.
+  std::error_code parseBitcodeVersion();
+
   std::vector<StructType *> IdentifiedStructTypes;
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
   StructType *createIdentifiedStructType(LLVMContext &Context);
@@ -268,7 +294,7 @@ private:
     return ValueList.getValueFwdRef(ID, Ty);
   }
   Metadata *getFnMetadataByID(unsigned ID) {
-    return MDValueList.getValueFwdRef(ID);
+    return MetadataList.getValueFwdRef(ID);
   }
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
@@ -351,21 +377,28 @@ private:
   /// a corresponding error code.
   std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
   std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
-  std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false);
+  std::error_code parseModule(uint64_t ResumeBit,
+                              bool ShouldLazyLoadMetadata = false);
   std::error_code parseAttributeBlock();
   std::error_code parseAttributeGroupBlock();
   std::error_code parseTypeTable();
   std::error_code parseTypeTableBody();
+  std::error_code parseOperandBundleTags();
 
-  std::error_code parseValueSymbolTable();
+  ErrorOr<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
+                               unsigned NameIndex, Triple &TT);
+  std::error_code parseValueSymbolTable(uint64_t Offset = 0);
   std::error_code parseConstants();
+  std::error_code rememberAndSkipFunctionBodies();
   std::error_code rememberAndSkipFunctionBody();
   /// Save the positions of the Metadata blocks and skip parsing the blocks.
   std::error_code rememberAndSkipMetadata();
   std::error_code parseFunctionBody(Function *F);
   std::error_code globalCleanup();
   std::error_code resolveGlobalAndAliasInits();
-  std::error_code parseMetadata();
+  std::error_code parseMetadata(bool ModuleLevel = false);
+  std::error_code parseMetadataKinds();
+  std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
   std::error_code parseMetadataAttachment(Function &F);
   ErrorOr<std::string> parseModuleTriple();
   std::error_code parseUseLists();
@@ -376,6 +409,94 @@ private:
       Function *F,
       DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
+
+/// Class to manage reading and parsing function summary index bitcode
+/// files/sections.
+class FunctionIndexBitcodeReader {
+  DiagnosticHandlerFunction DiagnosticHandler;
+
+  /// Eventually points to the function index built during parsing.
+  FunctionInfoIndex *TheIndex = nullptr;
+
+  std::unique_ptr<MemoryBuffer> Buffer;
+  std::unique_ptr<BitstreamReader> StreamFile;
+  BitstreamCursor Stream;
+
+  /// \brief Used to indicate whether we are doing lazy parsing of summary data.
+  ///
+  /// If false, the summary section is fully parsed into the index during
+  /// the initial parse. Otherwise, if true, the caller is expected to
+  /// invoke \a readFunctionSummary for each summary needed, and the summary
+  /// section is thus parsed lazily.
+  bool IsLazy = false;
+
+  /// Used to indicate whether caller only wants to check for the presence
+  /// of the function summary bitcode section. All blocks are skipped,
+  /// but the SeenFuncSummary boolean is set.
+  bool CheckFuncSummaryPresenceOnly = false;
+
+  /// Indicates whether we have encountered a function summary section
+  /// yet during parsing, used when checking if file contains function
+  /// summary section.
+  bool SeenFuncSummary = false;
+
+  /// \brief Map populated during function summary section parsing, and
+  /// consumed during ValueSymbolTable parsing.
+  ///
+  /// Used to correlate summary records with VST entries. For the per-module
+  /// index this maps the ValueID to the parsed function summary, and
+  /// for the combined index this maps the summary record's bitcode
+  /// offset to the function summary (since in the combined index the
+  /// VST records do not hold value IDs but rather hold the function
+  /// summary record offset).
+  DenseMap<uint64_t, std::unique_ptr<FunctionSummary>> SummaryMap;
+
+  /// Map populated during module path string table parsing, from the
+  /// module ID to a string reference owned by the index's module
+  /// path string table, used to correlate with combined index function
+  /// summary records.
+  DenseMap<uint64_t, StringRef> ModuleIdMap;
+
+public:
+  std::error_code error(BitcodeError E, const Twine &Message);
+  std::error_code error(BitcodeError E);
+  std::error_code error(const Twine &Message);
+
+  FunctionIndexBitcodeReader(MemoryBuffer *Buffer,
+                             DiagnosticHandlerFunction DiagnosticHandler,
+                             bool IsLazy = false,
+                             bool CheckFuncSummaryPresenceOnly = false);
+  FunctionIndexBitcodeReader(DiagnosticHandlerFunction DiagnosticHandler,
+                             bool IsLazy = false,
+                             bool CheckFuncSummaryPresenceOnly = false);
+  ~FunctionIndexBitcodeReader() { freeState(); }
+
+  void freeState();
+
+  void releaseBuffer();
+
+  /// Check if the parser has encountered a function summary section.
+  bool foundFuncSummary() { return SeenFuncSummary; }
+
+  /// \brief Main interface to parsing a bitcode buffer.
+  /// \returns true if an error occurred.
+  std::error_code parseSummaryIndexInto(std::unique_ptr<DataStreamer> Streamer,
+                                        FunctionInfoIndex *I);
+
+  /// \brief Interface for parsing a function summary lazily.
+  std::error_code parseFunctionSummary(std::unique_ptr<DataStreamer> Streamer,
+                                       FunctionInfoIndex *I,
+                                       size_t FunctionSummaryOffset);
+
+private:
+  std::error_code parseModule();
+  std::error_code parseValueSymbolTable();
+  std::error_code parseEntireSummary();
+  std::error_code parseModuleStringTable();
+  std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
+  std::error_code initStreamFromBuffer();
+  std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+};
 } // namespace
 
 BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
@@ -397,43 +518,51 @@ static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
   return error(DiagnosticHandler, EC, EC.message());
 }
 
-static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler,
+static std::error_code error(LLVMContext &Context, std::error_code EC,
                              const Twine &Message) {
-  return error(DiagnosticHandler,
-               make_error_code(BitcodeError::CorruptedBitcode), Message);
+  return error([&](const DiagnosticInfo &DI) { Context.diagnose(DI); }, EC,
+               Message);
+}
+
+static std::error_code error(LLVMContext &Context, std::error_code EC) {
+  return error(Context, EC, EC.message());
+}
+
+static std::error_code error(LLVMContext &Context, const Twine &Message) {
+  return error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+               Message);
 }
 
 std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
-  return ::error(DiagnosticHandler, make_error_code(E), Message);
+  if (!ProducerIdentification.empty()) {
+    return ::error(Context, make_error_code(E),
+                   Message + " (Producer: '" + ProducerIdentification +
+                       "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
+  }
+  return ::error(Context, make_error_code(E), Message);
 }
 
 std::error_code BitcodeReader::error(const Twine &Message) {
-  return ::error(DiagnosticHandler,
-                 make_error_code(BitcodeError::CorruptedBitcode), Message);
+  if (!ProducerIdentification.empty()) {
+    return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+                   Message + " (Producer: '" + ProducerIdentification +
+                       "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
+  }
+  return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
+                 Message);
 }
 
 std::error_code BitcodeReader::error(BitcodeError E) {
-  return ::error(DiagnosticHandler, make_error_code(E));
+  return ::error(Context, make_error_code(E));
 }
 
-static DiagnosticHandlerFunction getDiagHandler(DiagnosticHandlerFunction F,
-                                                LLVMContext &C) {
-  if (F)
-    return F;
-  return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); };
-}
+BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context)
+    : Context(Context), Buffer(Buffer), ValueList(Context),
+      MetadataList(Context) {}
 
-BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context,
-                             DiagnosticHandlerFunction DiagnosticHandler)
-    : Context(Context),
-      DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
-      Buffer(Buffer), ValueList(Context), MDValueList(Context) {}
-
-BitcodeReader::BitcodeReader(LLVMContext &Context,
-                             DiagnosticHandlerFunction DiagnosticHandler)
-    : Context(Context),
-      DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)),
-      Buffer(nullptr), ValueList(Context), MDValueList(Context) {}
+BitcodeReader::BitcodeReader(LLVMContext &Context)
+    : Context(Context), Buffer(nullptr), ValueList(Context),
+      MetadataList(Context) {}
 
 std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
@@ -472,7 +601,7 @@ void BitcodeReader::freeState() {
   Buffer = nullptr;
   std::vector<Type*>().swap(TypeList);
   ValueList.clear();
-  MDValueList.clear();
+  MetadataList.clear();
   std::vector<Comdat *>().swap(ComdatList);
 
   std::vector<AttributeSet>().swap(MAttributes);
@@ -779,6 +908,8 @@ void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
     OldV->replaceAllUsesWith(V);
     delete PrevVal;
   }
+
+  return;
 }
 
 
@@ -904,7 +1035,7 @@ void BitcodeReaderValueList::resolveConstantForwardRefs() {
   }
 }
 
-void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
+void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
   if (Idx == size()) {
     push_back(MD);
     return;
@@ -913,7 +1044,7 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
   if (Idx >= size())
     resize(Idx+1);
 
-  TrackingMDRef &OldMD = MDValuePtrs[Idx];
+  TrackingMDRef &OldMD = MetadataPtrs[Idx];
   if (!OldMD) {
     OldMD.reset(MD);
     return;
@@ -925,11 +1056,11 @@ void BitcodeReaderMDValueList::assignValue(Metadata *MD, unsigned Idx) {
   --NumFwdRefs;
 }
 
-Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+Metadata *BitcodeReaderMetadataList::getValueFwdRef(unsigned Idx) {
   if (Idx >= size())
     resize(Idx + 1);
 
-  if (Metadata *MD = MDValuePtrs[Idx])
+  if (Metadata *MD = MetadataPtrs[Idx])
     return MD;
 
   // Track forward refs to be resolved later.
@@ -944,11 +1075,11 @@ Metadata *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
 
   // Create and return a placeholder, which will later be RAUW'd.
   Metadata *MD = MDNode::getTemporary(Context, None).release();
-  MDValuePtrs[Idx].reset(MD);
+  MetadataPtrs[Idx].reset(MD);
   return MD;
 }
 
-void BitcodeReaderMDValueList::tryToResolveCycles() {
+void BitcodeReaderMetadataList::tryToResolveCycles() {
   if (!AnyFwdRefs)
     // Nothing to do.
     return;
@@ -959,7 +1090,7 @@ void BitcodeReaderMDValueList::tryToResolveCycles() {
 
   // Resolve any cycles.
   for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
-    auto &MD = MDValuePtrs[I];
+    auto &MD = MetadataPtrs[I];
     auto *N = dyn_cast_or_null<MDNode>(MD);
     if (!N)
       continue;
@@ -1102,6 +1233,10 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::Cold;
   case bitc::ATTR_KIND_CONVERGENT:
     return Attribute::Convergent;
+  case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY:
+    return Attribute::InaccessibleMemOnly;
+  case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY:
+    return Attribute::InaccessibleMemOrArgMemOnly;
   case bitc::ATTR_KIND_INLINE_HINT:
     return Attribute::InlineHint;
   case bitc::ATTR_KIND_IN_REG:
@@ -1126,6 +1261,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
     return Attribute::NoImplicitFloat;
   case bitc::ATTR_KIND_NO_INLINE:
     return Attribute::NoInline;
+  case bitc::ATTR_KIND_NO_RECURSE:
+    return Attribute::NoRecurse;
   case bitc::ATTR_KIND_NON_LAZY_BIND:
     return Attribute::NonLazyBind;
   case bitc::ATTR_KIND_NON_NULL:
@@ -1360,6 +1497,9 @@ std::error_code BitcodeReader::parseTypeTableBody() {
     case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
       ResultTy = Type::getX86_MMXTy(Context);
       break;
+    case bitc::TYPE_CODE_TOKEN:     // TOKEN
+      ResultTy = Type::getTokenTy(Context);
+      break;
     case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width]
       if (Record.size() < 1)
         return error("Invalid record");
@@ -1524,7 +1664,107 @@ std::error_code BitcodeReader::parseTypeTableBody() {
   }
 }
 
-std::error_code BitcodeReader::parseValueSymbolTable() {
+std::error_code BitcodeReader::parseOperandBundleTags() {
+  if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
+    return error("Invalid record");
+
+  if (!BundleTags.empty())
+    return error("Invalid multiple blocks");
+
+  SmallVector<uint64_t, 64> Record;
+
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Tags are implicitly mapped to integers by their order.
+
+    if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG)
+      return error("Invalid record");
+
+    // OPERAND_BUNDLE_TAG: [strchr x N]
+    BundleTags.emplace_back();
+    if (convertToString(Record, 0, BundleTags.back()))
+      return error("Invalid record");
+    Record.clear();
+  }
+}
+
+/// Associate a value with its name from the given index in the provided record.
+ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
+                                            unsigned NameIndex, Triple &TT) {
+  SmallString<128> ValueName;
+  if (convertToString(Record, NameIndex, ValueName))
+    return error("Invalid record");
+  unsigned ValueID = Record[0];
+  if (ValueID >= ValueList.size() || !ValueList[ValueID])
+    return error("Invalid record");
+  Value *V = ValueList[ValueID];
+
+  StringRef NameStr(ValueName.data(), ValueName.size());
+  if (NameStr.find_first_of(0) != StringRef::npos)
+    return error("Invalid value name");
+  V->setName(NameStr);
+  auto *GO = dyn_cast<GlobalObject>(V);
+  if (GO) {
+    if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
+      if (TT.isOSBinFormatMachO())
+        GO->setComdat(nullptr);
+      else
+        GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
+    }
+  }
+  return V;
+}
+
+/// Parse the value symbol table at either the current parsing location or
+/// at the given bit offset if provided.
+std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
+  uint64_t CurrentBit;
+  // Pass in the Offset to distinguish between calling for the module-level
+  // VST (where we want to jump to the VST offset) and the function-level
+  // VST (where we don't).
+  if (Offset > 0) {
+    // Save the current parsing location so we can jump back at the end
+    // of the VST read.
+    CurrentBit = Stream.GetCurrentBitNo();
+    Stream.JumpToBit(Offset * 32);
+#ifndef NDEBUG
+    // Do some checking if we are in debug mode.
+    BitstreamEntry Entry = Stream.advance();
+    assert(Entry.Kind == BitstreamEntry::SubBlock);
+    assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID);
+#else
+    // In NDEBUG mode ignore the output so we don't get an unused variable
+    // warning.
+    Stream.advance();
+#endif
+  }
+
+  // Compute the delta between the bitcode indices in the VST (the word offset
+  // to the word-aligned ENTER_SUBBLOCK for the function block, and that
+  // expected by the lazy reader. The reader's EnterSubBlock expects to have
+  // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID
+  // (size BlockIDWidth). Note that we access the stream's AbbrevID width here
+  // just before entering the VST subblock because: 1) the EnterSubBlock
+  // changes the AbbrevID width; 2) the VST block is nested within the same
+  // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same
+  // AbbrevID width before calling EnterSubBlock; and 3) when we want to
+  // jump to the FUNCTION_BLOCK using this offset later, we don't want
+  // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK.
+  unsigned FuncBitcodeOffsetDelta =
+      Stream.getAbbrevIDWidth() + bitc::BlockIDWidth;
+
   if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
     return error("Invalid record");
 
@@ -1542,6 +1782,8 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
+      if (Offset > 0)
+        Stream.JumpToBit(CurrentBit);
       return std::error_code();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -1554,23 +1796,39 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
-      if (convertToString(Record, 1, ValueName))
-        return error("Invalid record");
-      unsigned ValueID = Record[0];
-      if (ValueID >= ValueList.size() || !ValueList[ValueID])
-        return error("Invalid record");
-      Value *V = ValueList[ValueID];
+      ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);
+      if (std::error_code EC = ValOrErr.getError())
+        return EC;
+      ValOrErr.get();
+      break;
+    }
+    case bitc::VST_CODE_FNENTRY: {
+      // VST_FNENTRY: [valueid, offset, namechar x N]
+      ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);
+      if (std::error_code EC = ValOrErr.getError())
+        return EC;
+      Value *V = ValOrErr.get();
 
-      V->setName(StringRef(ValueName.data(), ValueName.size()));
-      if (auto *GO = dyn_cast<GlobalObject>(V)) {
-        if (GO->getComdat() == reinterpret_cast<Comdat *>(1)) {
-          if (TT.isOSBinFormatMachO())
-            GO->setComdat(nullptr);
-          else
-            GO->setComdat(TheModule->getOrInsertComdat(V->getName()));
-        }
+      auto *GO = dyn_cast<GlobalObject>(V);
+      if (!GO) {
+        // If this is an alias, need to get the actual Function object
+        // it aliases, in order to set up the DeferredFunctionInfo entry below.
+        auto *GA = dyn_cast<GlobalAlias>(V);
+        if (GA)
+          GO = GA->getBaseObject();
+        assert(GO);
       }
-      ValueName.clear();
+
+      uint64_t FuncWordOffset = Record[1];
+      Function *F = dyn_cast<Function>(GO);
+      assert(F);
+      uint64_t FuncBitOffset = FuncWordOffset * 32;
+      DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta;
+      // Set the LastFunctionBlockBit to point to the last function block.
+      // Later when parsing is resumed after function materialization,
+      // we can simply skip that last function block.
+      if (FuncBitOffset > LastFunctionBlockBit)
+        LastFunctionBlockBit = FuncBitOffset;
       break;
     }
     case bitc::VST_CODE_BBENTRY: {
@@ -1588,19 +1846,51 @@ std::error_code BitcodeReader::parseValueSymbolTable() {
   }
 }
 
+/// Parse a single METADATA_KIND record, inserting result in MDKindMap.
+std::error_code
+BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
+  if (Record.size() < 2)
+    return error("Invalid record");
+
+  unsigned Kind = Record[0];
+  SmallString<8> Name(Record.begin() + 1, Record.end());
+
+  unsigned NewKind = TheModule->getMDKindID(Name.str());
+  if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+    return error("Conflicting METADATA_KIND records");
+  return std::error_code();
+}
+
 static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
 
-std::error_code BitcodeReader::parseMetadata() {
+/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
+/// module level metadata.
+std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
   IsMetadataMaterialized = true;
-  unsigned NextMDValueNo = MDValueList.size();
+  unsigned NextMetadataNo = MetadataList.size();
+  if (ModuleLevel && SeenModuleValuesRecord) {
+    // Now that we are parsing the module level metadata, we want to restart
+    // the numbering of the MD values, and replace temp MD created earlier
+    // with their real values. If we saw a METADATA_VALUE record then we
+    // would have set the MetadataList size to the number specified in that
+    // record, to support parsing function-level metadata first, and we need
+    // to reset back to 0 to fill the MetadataList in with the parsed module
+    // The function-level metadata parsing should have reset the MetadataList
+    // size back to the value reported by the METADATA_VALUE record, saved in
+    // NumModuleMDs.
+    assert(NumModuleMDs == MetadataList.size() &&
+           "Expected MetadataList to only contain module level values");
+    NextMetadataNo = 0;
+  }
 
   if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
     return error("Invalid record");
 
   SmallVector<uint64_t, 64> Record;
 
-  auto getMD =
-      [&](unsigned ID) -> Metadata *{ return MDValueList.getValueFwdRef(ID); };
+  auto getMD = [&](unsigned ID) -> Metadata * {
+    return MetadataList.getValueFwdRef(ID);
+  };
   auto getMDOrNull = [&](unsigned ID) -> Metadata *{
     if (ID)
       return getMD(ID - 1);
@@ -1624,7 +1914,10 @@ std::error_code BitcodeReader::parseMetadata() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      MDValueList.tryToResolveCycles();
+      MetadataList.tryToResolveCycles();
+      assert((!(ModuleLevel && SeenModuleValuesRecord) ||
+              NumModuleMDs == MetadataList.size()) &&
+             "Inconsistent bitcode: METADATA_VALUES mismatch");
       return std::error_code();
     case BitstreamEntry::Record:
       // The interesting case.
@@ -1652,7 +1945,8 @@ std::error_code BitcodeReader::parseMetadata() {
       unsigned Size = Record.size();
       NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
       for (unsigned i = 0; i != Size; ++i) {
-        MDNode *MD = dyn_cast_or_null<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+        MDNode *MD =
+            dyn_cast_or_null<MDNode>(MetadataList.getValueFwdRef(Record[i]));
         if (!MD)
           return error("Invalid record");
         NMD->addOperand(MD);
@@ -1669,7 +1963,7 @@ std::error_code BitcodeReader::parseMetadata() {
       // If this isn't a LocalAsMetadata record, we're dropping it.  This used
       // to be legal, but there's no upgrade path.
       auto dropRecord = [&] {
-        MDValueList.assignValue(MDNode::get(Context, None), NextMDValueNo++);
+        MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
       };
       if (Record.size() != 2) {
         dropRecord();
@@ -1682,9 +1976,9 @@ std::error_code BitcodeReader::parseMetadata() {
         break;
       }
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_OLD_NODE: {
@@ -1699,7 +1993,7 @@ std::error_code BitcodeReader::parseMetadata() {
         if (!Ty)
           return error("Invalid record");
         if (Ty->isMetadataTy())
-          Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+          Elts.push_back(MetadataList.getValueFwdRef(Record[i + 1]));
         else if (!Ty->isVoidTy()) {
           auto *MD =
               ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty));
@@ -1709,7 +2003,7 @@ std::error_code BitcodeReader::parseMetadata() {
         } else
           Elts.push_back(nullptr);
       }
-      MDValueList.assignValue(MDNode::get(Context, Elts), NextMDValueNo++);
+      MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
       break;
     }
     case bitc::METADATA_VALUE: {
@@ -1720,9 +2014,9 @@ std::error_code BitcodeReader::parseMetadata() {
       if (Ty->isMetadataTy() || Ty->isVoidTy())
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_DISTINCT_NODE:
@@ -1732,10 +2026,10 @@ std::error_code BitcodeReader::parseMetadata() {
       SmallVector<Metadata *, 8> Elts;
       Elts.reserve(Record.size());
       for (unsigned ID : Record)
-        Elts.push_back(ID ? MDValueList.getValueFwdRef(ID - 1) : nullptr);
-      MDValueList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
-                                         : MDNode::get(Context, Elts),
-                              NextMDValueNo++);
+        Elts.push_back(ID ? MetadataList.getValueFwdRef(ID - 1) : nullptr);
+      MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
+                                          : MDNode::get(Context, Elts),
+                               NextMetadataNo++);
       break;
     }
     case bitc::METADATA_LOCATION: {
@@ -1744,13 +2038,13 @@ std::error_code BitcodeReader::parseMetadata() {
 
       unsigned Line = Record[1];
       unsigned Column = Record[2];
-      MDNode *Scope = cast<MDNode>(MDValueList.getValueFwdRef(Record[3]));
+      MDNode *Scope = cast<MDNode>(MetadataList.getValueFwdRef(Record[3]));
       Metadata *InlinedAt =
-          Record[4] ? MDValueList.getValueFwdRef(Record[4] - 1) : nullptr;
-      MDValueList.assignValue(
+          Record[4] ? MetadataList.getValueFwdRef(Record[4] - 1) : nullptr;
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DILocation, Record[0],
                           (Context, Line, Column, Scope, InlinedAt)),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_GENERIC_DEBUG: {
@@ -1766,63 +2060,65 @@ std::error_code BitcodeReader::parseMetadata() {
       auto *Header = getMDString(Record[3]);
       SmallVector<Metadata *, 8> DwarfOps;
       for (unsigned I = 4, E = Record.size(); I != E; ++I)
-        DwarfOps.push_back(Record[I] ? MDValueList.getValueFwdRef(Record[I] - 1)
-                                     : nullptr);
-      MDValueList.assignValue(GET_OR_DISTINCT(GenericDINode, Record[0],
-                                              (Context, Tag, Header, DwarfOps)),
-                              NextMDValueNo++);
+        DwarfOps.push_back(
+            Record[I] ? MetadataList.getValueFwdRef(Record[I] - 1) : nullptr);
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(GenericDINode, Record[0],
+                          (Context, Tag, Header, DwarfOps)),
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_SUBRANGE: {
       if (Record.size() != 3)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DISubrange, Record[0],
                           (Context, Record[1], unrotateSign(Record[2]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_ENUMERATOR: {
       if (Record.size() != 3)
         return error("Invalid record");
 
-      MDValueList.assignValue(GET_OR_DISTINCT(DIEnumerator, Record[0],
-                                              (Context, unrotateSign(Record[1]),
-                                               getMDString(Record[2]))),
-                              NextMDValueNo++);
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(
+              DIEnumerator, Record[0],
+              (Context, unrotateSign(Record[1]), getMDString(Record[2]))),
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_BASIC_TYPE: {
       if (Record.size() != 6)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIBasicType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            Record[3], Record[4], Record[5])),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_DERIVED_TYPE: {
       if (Record.size() != 12)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIDerivedType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
                            getMDOrNull(Record[5]), getMDOrNull(Record[6]),
                            Record[7], Record[8], Record[9], Record[10],
                            getMDOrNull(Record[11]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_COMPOSITE_TYPE: {
       if (Record.size() != 16)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DICompositeType, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
@@ -1831,17 +2127,17 @@ std::error_code BitcodeReader::parseMetadata() {
                            getMDOrNull(Record[11]), Record[12],
                            getMDOrNull(Record[13]), getMDOrNull(Record[14]),
                            getMDString(Record[15]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_SUBROUTINE_TYPE: {
       if (Record.size() != 3)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DISubroutineType, Record[0],
                           (Context, Record[1], getMDOrNull(Record[2]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
 
@@ -1849,12 +2145,12 @@ std::error_code BitcodeReader::parseMetadata() {
       if (Record.size() != 6)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIModule, Record[0],
                           (Context, getMDOrNull(Record[1]),
-                          getMDString(Record[2]), getMDString(Record[3]),
-                          getMDString(Record[4]), getMDString(Record[5]))),
-          NextMDValueNo++);
+                           getMDString(Record[2]), getMDString(Record[3]),
+                           getMDString(Record[4]), getMDString(Record[5]))),
+          NextMetadataNo++);
       break;
     }
 
@@ -1862,180 +2158,218 @@ std::error_code BitcodeReader::parseMetadata() {
       if (Record.size() != 3)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]),
                                               getMDString(Record[2]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_COMPILE_UNIT: {
-      if (Record.size() < 14 || Record.size() > 15)
+      if (Record.size() < 14 || Record.size() > 16)
         return error("Invalid record");
 
-      MDValueList.assignValue(
-          GET_OR_DISTINCT(
-              DICompileUnit, Record[0],
-              (Context, Record[1], getMDOrNull(Record[2]),
-               getMDString(Record[3]), Record[4], getMDString(Record[5]),
-               Record[6], getMDString(Record[7]), Record[8],
-               getMDOrNull(Record[9]), getMDOrNull(Record[10]),
-               getMDOrNull(Record[11]), getMDOrNull(Record[12]),
-               getMDOrNull(Record[13]), Record.size() == 14 ? 0 : Record[14])),
-          NextMDValueNo++);
+      // Ignore Record[0], which indicates whether this compile unit is
+      // distinct.  It's always distinct.
+      MetadataList.assignValue(
+          DICompileUnit::getDistinct(
+              Context, Record[1], getMDOrNull(Record[2]),
+              getMDString(Record[3]), Record[4], getMDString(Record[5]),
+              Record[6], getMDString(Record[7]), Record[8],
+              getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+              getMDOrNull(Record[11]), getMDOrNull(Record[12]),
+              getMDOrNull(Record[13]),
+              Record.size() <= 15 ? 0 : getMDOrNull(Record[15]),
+              Record.size() <= 14 ? 0 : Record[14]),
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_SUBPROGRAM: {
-      if (Record.size() != 19)
+      if (Record.size() != 18 && Record.size() != 19)
         return error("Invalid record");
 
-      MDValueList.assignValue(
-          GET_OR_DISTINCT(
-              DISubprogram, Record[0],
-              (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
-               getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
-               getMDOrNull(Record[6]), Record[7], Record[8], Record[9],
-               getMDOrNull(Record[10]), Record[11], Record[12], Record[13],
-               Record[14], getMDOrNull(Record[15]), getMDOrNull(Record[16]),
-               getMDOrNull(Record[17]), getMDOrNull(Record[18]))),
-          NextMDValueNo++);
+      bool HasFn = Record.size() == 19;
+      DISubprogram *SP = GET_OR_DISTINCT(
+          DISubprogram,
+          Record[0] || Record[8], // All definitions should be distinct.
+          (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+           getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+           getMDOrNull(Record[6]), Record[7], Record[8], Record[9],
+           getMDOrNull(Record[10]), Record[11], Record[12], Record[13],
+           Record[14], getMDOrNull(Record[15 + HasFn]),
+           getMDOrNull(Record[16 + HasFn]), getMDOrNull(Record[17 + HasFn])));
+      MetadataList.assignValue(SP, NextMetadataNo++);
+
+      // Upgrade sp->function mapping to function->sp mapping.
+      if (HasFn && Record[15]) {
+        if (auto *CMD = dyn_cast<ConstantAsMetadata>(getMDOrNull(Record[15])))
+          if (auto *F = dyn_cast<Function>(CMD->getValue())) {
+            if (F->isMaterializable())
+              // Defer until materialized; unmaterialized functions may not have
+              // metadata.
+              FunctionsWithSPs[F] = SP;
+            else if (!F->empty())
+              F->setSubprogram(SP);
+          }
+      }
       break;
     }
     case bitc::METADATA_LEXICAL_BLOCK: {
       if (Record.size() != 5)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DILexicalBlock, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), Record[3], Record[4])),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_LEXICAL_BLOCK_FILE: {
       if (Record.size() != 4)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DILexicalBlockFile, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), Record[3])),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_NAMESPACE: {
       if (Record.size() != 5)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DINamespace, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDOrNull(Record[2]), getMDString(Record[3]),
                            Record[4])),
-          NextMDValueNo++);
+          NextMetadataNo++);
+      break;
+    }
+    case bitc::METADATA_MACRO: {
+      if (Record.size() != 5)
+        return error("Invalid record");
+
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(DIMacro, Record[0],
+                          (Context, Record[1], Record[2],
+                           getMDString(Record[3]), getMDString(Record[4]))),
+          NextMetadataNo++);
+      break;
+    }
+    case bitc::METADATA_MACRO_FILE: {
+      if (Record.size() != 5)
+        return error("Invalid record");
+
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(DIMacroFile, Record[0],
+                          (Context, Record[1], Record[2],
+                           getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_TEMPLATE_TYPE: {
       if (Record.size() != 3)
         return error("Invalid record");
 
-      MDValueList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
-                                              Record[0],
-                                              (Context, getMDString(Record[1]),
-                                               getMDOrNull(Record[2]))),
-                              NextMDValueNo++);
+      MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
+                                               Record[0],
+                                               (Context, getMDString(Record[1]),
+                                                getMDOrNull(Record[2]))),
+                               NextMetadataNo++);
       break;
     }
     case bitc::METADATA_TEMPLATE_VALUE: {
       if (Record.size() != 5)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DITemplateValueParameter, Record[0],
                           (Context, Record[1], getMDString(Record[2]),
                            getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_GLOBAL_VAR: {
       if (Record.size() != 11)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIGlobalVariable, Record[0],
                           (Context, getMDOrNull(Record[1]),
                            getMDString(Record[2]), getMDString(Record[3]),
                            getMDOrNull(Record[4]), Record[5],
                            getMDOrNull(Record[6]), Record[7], Record[8],
                            getMDOrNull(Record[9]), getMDOrNull(Record[10]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_LOCAL_VAR: {
       // 10th field is for the obseleted 'inlinedAt:' field.
-      if (Record.size() != 9 && Record.size() != 10)
+      if (Record.size() < 8 || Record.size() > 10)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or
+      // DW_TAG_arg_variable.
+      bool HasTag = Record.size() > 8;
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DILocalVariable, Record[0],
-                          (Context, Record[1], getMDOrNull(Record[2]),
-                           getMDString(Record[3]), getMDOrNull(Record[4]),
-                           Record[5], getMDOrNull(Record[6]), Record[7],
-                           Record[8])),
-          NextMDValueNo++);
+                          (Context, getMDOrNull(Record[1 + HasTag]),
+                           getMDString(Record[2 + HasTag]),
+                           getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
+                           getMDOrNull(Record[5 + HasTag]), Record[6 + HasTag],
+                           Record[7 + HasTag])),
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_EXPRESSION: {
       if (Record.size() < 1)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIExpression, Record[0],
                           (Context, makeArrayRef(Record).slice(1))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_OBJC_PROPERTY: {
       if (Record.size() != 8)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIObjCProperty, Record[0],
                           (Context, getMDString(Record[1]),
                            getMDOrNull(Record[2]), Record[3],
                            getMDString(Record[4]), getMDString(Record[5]),
                            Record[6], getMDOrNull(Record[7]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_IMPORTED_ENTITY: {
       if (Record.size() != 6)
         return error("Invalid record");
 
-      MDValueList.assignValue(
+      MetadataList.assignValue(
           GET_OR_DISTINCT(DIImportedEntity, Record[0],
                           (Context, Record[1], getMDOrNull(Record[2]),
                            getMDOrNull(Record[3]), Record[4],
                            getMDString(Record[5]))),
-          NextMDValueNo++);
+          NextMetadataNo++);
       break;
     }
     case bitc::METADATA_STRING: {
       std::string String(Record.begin(), Record.end());
       llvm::UpgradeMDStringConstant(String);
       Metadata *MD = MDString::get(Context, String);
-      MDValueList.assignValue(MD, NextMDValueNo++);
+      MetadataList.assignValue(MD, NextMetadataNo++);
       break;
     }
     case bitc::METADATA_KIND: {
-      if (Record.size() < 2)
-        return error("Invalid record");
-
-      unsigned Kind = Record[0];
-      SmallString<8> Name(Record.begin()+1, Record.end());
-
-      unsigned NewKind = TheModule->getMDKindID(Name.str());
-      if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
-        return error("Conflicting METADATA_KIND records");
+      // Support older bitcode files that had METADATA_KIND records in a
+      // block with METADATA_BLOCK_ID.
+      if (std::error_code EC = parseMetadataKindRecord(Record))
+        return EC;
       break;
     }
     }
@@ -2043,6 +2377,43 @@ std::error_code BitcodeReader::parseMetadata() {
 #undef GET_OR_DISTINCT
 }
 
+/// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
+std::error_code BitcodeReader::parseMetadataKinds() {
+  if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned Code = Stream.readRecord(Entry.ID, Record);
+    switch (Code) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::METADATA_KIND: {
+      if (std::error_code EC = parseMetadataKindRecord(Record))
+        return EC;
+      break;
+    }
+    }
+  }
+}
+
 /// Decode a signed value stored with the sign bit in the LSB for dense VBR
 /// encoding.
 uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
@@ -2410,11 +2781,12 @@ std::error_code BitcodeReader::parseConstants() {
 
       Type *SelectorTy = Type::getInt1Ty(Context);
 
-      // If CurTy is a vector of length n, then Record[0] must be a <n x i1>
-      // vector. Otherwise, it must be a single bit.
+      // The selector might be an i1 or an <n x i1>
+      // Get the type from the ValueList before getting a forward ref.
       if (VectorType *VTy = dyn_cast<VectorType>(CurTy))
-        SelectorTy = VectorType::get(Type::getInt1Ty(Context),
-                                     VTy->getNumElements());
+        if (Value *V = ValueList[Record[0]])
+          if (SelectorTy != V->getType())
+            SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements());
 
       V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
                                                               SelectorTy),
@@ -2567,9 +2939,6 @@ std::error_code BitcodeReader::parseConstants() {
       if (!Fn)
         return error("Invalid record");
 
-      // Don't let Fn get dematerialized.
-      BlockAddressesTaken.insert(Fn);
-
       // If the function is already parsed we can insert the block address right
       // away.
       BasicBlock *BB;
@@ -2584,7 +2953,7 @@ std::error_code BitcodeReader::parseConstants() {
             return error("Invalid ID");
           ++BBI;
         }
-        BB = BBI;
+        BB = &*BBI;
       } else {
         // Otherwise insert a placeholder and remember it so it can be inserted
         // when the function is parsed.
@@ -2652,7 +3021,7 @@ std::error_code BitcodeReader::parseUseLists() {
         V = ValueList[ID];
       unsigned NumUses = 0;
       SmallDenseMap<const Use *, unsigned, 16> Order;
-      for (const Use &U : V->uses()) {
+      for (const Use &U : V->materialized_uses()) {
         if (++NumUses > Record.size())
           break;
         Order[&U] = Record[NumUses - 1];
@@ -2688,7 +3057,7 @@ std::error_code BitcodeReader::materializeMetadata() {
   for (uint64_t BitPos : DeferredMetadataInfo) {
     // Move the bit stream to the saved position.
     Stream.JumpToBit(BitPos);
-    if (std::error_code EC = parseMetadata())
+    if (std::error_code EC = parseMetadata(true))
       return EC;
   }
   DeferredMetadataInfo.clear();
@@ -2697,6 +3066,25 @@ std::error_code BitcodeReader::materializeMetadata() {
 
 void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
 
+void BitcodeReader::saveMetadataList(
+    DenseMap<const Metadata *, unsigned> &MetadataToIDs, bool OnlyTempMD) {
+  for (unsigned ID = 0; ID < MetadataList.size(); ++ID) {
+    Metadata *MD = MetadataList[ID];
+    auto *N = dyn_cast_or_null<MDNode>(MD);
+    // Save all values if !OnlyTempMD, otherwise just the temporary metadata.
+    if (!OnlyTempMD || (N && N->isTemporary())) {
+      // Will call this after materializing each function, in order to
+      // handle remapping of the function's instructions/metadata.
+      // See if we already have an entry in that case.
+      if (OnlyTempMD && MetadataToIDs.count(MD)) {
+        assert(MetadataToIDs[MD] == ID && "Inconsistent metadata value id");
+        continue;
+      }
+      MetadataToIDs[MD] = ID;
+    }
+  }
+}
+
 /// When we see the block for a function body, remember where it is and then
 /// skip it.  This lets us lazily deserialize the functions.
 std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
@@ -2709,6 +3097,9 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
 
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
+  assert(
+      (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) &&
+      "Mismatch between VST and scanned function offsets");
   DeferredFunctionInfo[Fn] = CurBit;
 
   // Skip over the function block for now.
@@ -2741,10 +3132,91 @@ std::error_code BitcodeReader::globalCleanup() {
   return std::error_code();
 }
 
-std::error_code BitcodeReader::parseModule(bool Resume,
+/// Support for lazy parsing of function bodies. This is required if we
+/// either have an old bitcode file without a VST forward declaration record,
+/// or if we have an anonymous function being materialized, since anonymous
+/// functions do not have a name and are therefore not in the VST.
+std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
+  Stream.JumpToBit(NextUnreadBit);
+
+  if (Stream.AtEndOfStream())
+    return error("Could not find function in stream");
+
+  if (!SeenFirstFunctionBody)
+    return error("Trying to materialize functions before seeing function blocks");
+
+  // An old bitcode file with the symbol table at the end would have
+  // finished the parse greedily.
+  assert(SeenValueSymbolTable);
+
+  SmallVector<uint64_t, 64> Record;
+
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    default:
+      return error("Expect SubBlock");
+    case BitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      default:
+        return error("Expect function block");
+      case bitc::FUNCTION_BLOCK_ID:
+        if (std::error_code EC = rememberAndSkipFunctionBody())
+          return EC;
+        NextUnreadBit = Stream.GetCurrentBitNo();
+        return std::error_code();
+      }
+    }
+  }
+}
+
+std::error_code BitcodeReader::parseBitcodeVersion() {
+  if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+    return error("Invalid record");
+
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    default:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: // Default behavior: reject
+      return error("Invalid value");
+    case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION:      [strchr x
+                                             // N]
+      convertToString(Record, 0, ProducerIdentification);
+      break;
+    }
+    case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH:      [epoch#]
+      unsigned epoch = (unsigned)Record[0];
+      if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
+        return error(
+          Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
+          "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
+      }
+    }
+    }
+  }
+}
+
+std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
                                            bool ShouldLazyLoadMetadata) {
-  if (Resume)
-    Stream.JumpToBit(NextUnreadBit);
+  if (ResumeBit)
+    Stream.JumpToBit(ResumeBit);
   else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
@@ -2785,9 +3257,23 @@ std::error_code BitcodeReader::parseModule(bool Resume,
           return EC;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
-        if (std::error_code EC = parseValueSymbolTable())
-          return EC;
-        SeenValueSymbolTable = true;
+        if (!SeenValueSymbolTable) {
+          // Either this is an old form VST without function index and an
+          // associated VST forward declaration record (which would have caused
+          // the VST to be jumped to and parsed before it was encountered
+          // normally in the stream), or there were no function blocks to
+          // trigger an earlier parsing of the VST.
+          assert(VSTOffset == 0 || FunctionsWithBodies.empty());
+          if (std::error_code EC = parseValueSymbolTable())
+            return EC;
+          SeenValueSymbolTable = true;
+        } else {
+          // We must have had a VST forward declaration record, which caused
+          // the parser to jump to and parse the VST earlier.
+          assert(VSTOffset > 0);
+          if (Stream.SkipBlock())
+            return error("Invalid record");
+        }
         break;
       case bitc::CONSTANTS_BLOCK_ID:
         if (std::error_code EC = parseConstants())
@@ -2802,7 +3288,11 @@ std::error_code BitcodeReader::parseModule(bool Resume,
           break;
         }
         assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
-        if (std::error_code EC = parseMetadata())
+        if (std::error_code EC = parseMetadata(true))
+          return EC;
+        break;
+      case bitc::METADATA_KIND_BLOCK_ID:
+        if (std::error_code EC = parseMetadataKinds())
           return EC;
         break;
       case bitc::FUNCTION_BLOCK_ID:
@@ -2815,8 +3305,39 @@ std::error_code BitcodeReader::parseModule(bool Resume,
           SeenFirstFunctionBody = true;
         }
 
+        if (VSTOffset > 0) {
+          // If we have a VST forward declaration record, make sure we
+          // parse the VST now if we haven't already. It is needed to
+          // set up the DeferredFunctionInfo vector for lazy reading.
+          if (!SeenValueSymbolTable) {
+            if (std::error_code EC =
+                    BitcodeReader::parseValueSymbolTable(VSTOffset))
+              return EC;
+            SeenValueSymbolTable = true;
+            // Fall through so that we record the NextUnreadBit below.
+            // This is necessary in case we have an anonymous function that
+            // is later materialized. Since it will not have a VST entry we
+            // need to fall back to the lazy parse to find its offset.
+          } else {
+            // If we have a VST forward declaration record, but have already
+            // parsed the VST (just above, when the first function body was
+            // encountered here), then we are resuming the parse after
+            // materializing functions. The ResumeBit points to the
+            // start of the last function block recorded in the
+            // DeferredFunctionInfo map. Skip it.
+            if (Stream.SkipBlock())
+              return error("Invalid record");
+            continue;
+          }
+        }
+
+        // Support older bitcode files that did not have the function
+        // index in the VST, nor a VST forward declaration record, as
+        // well as anonymous functions that do not have VST entries.
+        // Build the DeferredFunctionInfo vector on the fly.
         if (std::error_code EC = rememberAndSkipFunctionBody())
           return EC;
+
         // Suspend parsing when we reach the function bodies. Subsequent
         // materialization calls will resume it when necessary. If the bitcode
         // file is old, the symbol table will be at the end instead and will not
@@ -2830,6 +3351,10 @@ std::error_code BitcodeReader::parseModule(bool Resume,
         if (std::error_code EC = parseUseLists())
           return EC;
         break;
+      case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
+        if (std::error_code EC = parseOperandBundleTags())
+          return EC;
+        break;
       }
       continue;
 
@@ -2840,7 +3365,8 @@ std::error_code BitcodeReader::parseModule(bool Resume,
 
 
     // Read a record.
-    switch (Stream.readRecord(Entry.ID, Record)) {
+    auto BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
     default: break;  // Default behavior, ignore unknown content.
     case bitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
       if (Record.size() < 1)
@@ -3012,11 +3538,14 @@ std::error_code BitcodeReader::parseModule(bool Resume,
       auto *FTy = dyn_cast<FunctionType>(Ty);
       if (!FTy)
         return error("Invalid type for value");
+      auto CC = static_cast<CallingConv::ID>(Record[1]);
+      if (CC & ~CallingConv::MaxID)
+        return error("Invalid calling convention ID");
 
       Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
                                         "", TheModule);
 
-      Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+      Func->setCallingConv(CC);
       bool isProto = Record[2];
       uint64_t RawLinkage = Record[3];
       Func->setLinkage(getDecodedLinkage(RawLinkage));
@@ -3079,35 +3608,51 @@ std::error_code BitcodeReader::parseModule(bool Resume,
       }
       break;
     }
-    // ALIAS: [alias type, aliasee val#, linkage]
-    // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass]
-    case bitc::MODULE_CODE_ALIAS: {
-      if (Record.size() < 3)
+    // ALIAS: [alias type, addrspace, aliasee val#, linkage]
+    // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass]
+    case bitc::MODULE_CODE_ALIAS:
+    case bitc::MODULE_CODE_ALIAS_OLD: {
+      bool NewRecord = BitCode == bitc::MODULE_CODE_ALIAS;
+      if (Record.size() < (3 + (unsigned)NewRecord))
         return error("Invalid record");
-      Type *Ty = getTypeByID(Record[0]);
+      unsigned OpNum = 0;
+      Type *Ty = getTypeByID(Record[OpNum++]);
       if (!Ty)
         return error("Invalid record");
-      auto *PTy = dyn_cast<PointerType>(Ty);
-      if (!PTy)
-        return error("Invalid type for value");
 
-      auto *NewGA =
-          GlobalAlias::create(PTy, getDecodedLinkage(Record[2]), "", TheModule);
+      unsigned AddrSpace;
+      if (!NewRecord) {
+        auto *PTy = dyn_cast<PointerType>(Ty);
+        if (!PTy)
+          return error("Invalid type for value");
+        Ty = PTy->getElementType();
+        AddrSpace = PTy->getAddressSpace();
+      } else {
+        AddrSpace = Record[OpNum++];
+      }
+
+      auto Val = Record[OpNum++];
+      auto Linkage = Record[OpNum++];
+      auto *NewGA = GlobalAlias::create(
+          Ty, AddrSpace, getDecodedLinkage(Linkage), "", TheModule);
       // Old bitcode files didn't have visibility field.
       // Local linkage must have default visibility.
-      if (Record.size() > 3 && !NewGA->hasLocalLinkage())
-        // FIXME: Change to an error if non-default in 4.0.
-        NewGA->setVisibility(getDecodedVisibility(Record[3]));
-      if (Record.size() > 4)
-        NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[4]));
+      if (OpNum != Record.size()) {
+        auto VisInd = OpNum++;
+        if (!NewGA->hasLocalLinkage())
+          // FIXME: Change to an error if non-default in 4.0.
+          NewGA->setVisibility(getDecodedVisibility(Record[VisInd]));
+      }
+      if (OpNum != Record.size())
+        NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++]));
       else
-        upgradeDLLImportExportLinkage(NewGA, Record[2]);
-      if (Record.size() > 5)
-        NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[5]));
-      if (Record.size() > 6)
-        NewGA->setUnnamedAddr(Record[6]);
+        upgradeDLLImportExportLinkage(NewGA, Linkage);
+      if (OpNum != Record.size())
+        NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++]));
+      if (OpNum != Record.size())
+        NewGA->setUnnamedAddr(Record[OpNum++]);
       ValueList.push_back(NewGA);
-      AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+      AliasInits.push_back(std::make_pair(NewGA, Val));
       break;
     }
     /// MODULE_CODE_PURGEVALS: [numvals]
@@ -3117,11 +3662,52 @@ std::error_code BitcodeReader::parseModule(bool Resume,
         return error("Invalid record");
       ValueList.shrinkTo(Record[0]);
       break;
+    /// MODULE_CODE_VSTOFFSET: [offset]
+    case bitc::MODULE_CODE_VSTOFFSET:
+      if (Record.size() < 1)
+        return error("Invalid record");
+      VSTOffset = Record[0];
+      break;
+    /// MODULE_CODE_METADATA_VALUES: [numvals]
+    case bitc::MODULE_CODE_METADATA_VALUES:
+      if (Record.size() < 1)
+        return error("Invalid record");
+      assert(!IsMetadataMaterialized);
+      // This record contains the number of metadata values in the module-level
+      // METADATA_BLOCK. It is used to support lazy parsing of metadata as
+      // a postpass, where we will parse function-level metadata first.
+      // This is needed because the ids of metadata are assigned implicitly
+      // based on their ordering in the bitcode, with the function-level
+      // metadata ids starting after the module-level metadata ids. Otherwise,
+      // we would have to parse the module-level metadata block to prime the
+      // MetadataList when we are lazy loading metadata during function
+      // importing. Initialize the MetadataList size here based on the
+      // record value, regardless of whether we are doing lazy metadata
+      // loading, so that we have consistent handling and assertion
+      // checking in parseMetadata for module-level metadata.
+      NumModuleMDs = Record[0];
+      SeenModuleValuesRecord = true;
+      assert(MetadataList.size() == 0);
+      MetadataList.resize(NumModuleMDs);
+      break;
     }
     Record.clear();
   }
 }
 
+/// Helper to read the header common to all bitcode files.
+static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return false;
+  return true;
+}
+
 std::error_code
 BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
                                 Module *M, bool ShouldLazyLoadMetadata) {
@@ -3131,12 +3717,7 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
     return EC;
 
   // Sniff for the signature.
-  if (Stream.Read(8) != 'B' ||
-      Stream.Read(8) != 'C' ||
-      Stream.Read(4) != 0x0 ||
-      Stream.Read(4) != 0xC ||
-      Stream.Read(4) != 0xE ||
-      Stream.Read(4) != 0xD)
+  if (!hasValidBitcodeHeader(Stream))
     return error("Invalid bitcode signature");
 
   // We expect a number of well-defined blocks, though we don't necessarily
@@ -3153,8 +3734,13 @@ BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
     if (Entry.Kind != BitstreamEntry::SubBlock)
       return error("Malformed block");
 
+    if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+      parseBitcodeVersion();
+      continue;
+    }
+
     if (Entry.ID == bitc::MODULE_BLOCK_ID)
-      return parseModule(false, ShouldLazyLoadMetadata);
+      return parseModule(0, ShouldLazyLoadMetadata);
 
     if (Stream.SkipBlock())
       return error("Invalid record");
@@ -3204,12 +3790,7 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
     return EC;
 
   // Sniff for the signature.
-  if (Stream.Read(8) != 'B' ||
-      Stream.Read(8) != 'C' ||
-      Stream.Read(4) != 0x0 ||
-      Stream.Read(4) != 0xC ||
-      Stream.Read(4) != 0xE ||
-      Stream.Read(4) != 0xD)
+  if (!hasValidBitcodeHeader(Stream))
     return error("Invalid bitcode signature");
 
   // We expect a number of well-defined blocks, though we don't necessarily
@@ -3239,6 +3820,41 @@ ErrorOr<std::string> BitcodeReader::parseTriple() {
   }
 }
 
+ErrorOr<std::string> BitcodeReader::parseIdentificationBlock() {
+  if (std::error_code EC = initStream(nullptr))
+    return EC;
+
+  // Sniff for the signature.
+  if (!hasValidBitcodeHeader(Stream))
+    return error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+        if (std::error_code EC = parseBitcodeVersion())
+          return EC;
+        return ProducerIdentification;
+      }
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
+  }
+}
+
 /// Parse metadata attachments.
 std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
   if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
@@ -3274,7 +3890,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
           auto K = MDKindMap.find(Record[I]);
           if (K == MDKindMap.end())
             return error("Invalid ID");
-          Metadata *MD = MDValueList.getValueFwdRef(Record[I + 1]);
+          Metadata *MD = MetadataList.getValueFwdRef(Record[I + 1]);
           F.setMetadata(K->second, cast<MDNode>(MD));
         }
         continue;
@@ -3288,7 +3904,7 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
           MDKindMap.find(Kind);
         if (I == MDKindMap.end())
           return error("Invalid ID");
-        Metadata *Node = MDValueList.getValueFwdRef(Record[i + 1]);
+        Metadata *Node = MetadataList.getValueFwdRef(Record[i + 1]);
         if (isa<LocalAsMetadata>(Node))
           // Drop the attachment.  This used to be legal, but there's no
           // upgrade path.
@@ -3303,17 +3919,17 @@ std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
   }
 }
 
-static std::error_code typeCheckLoadStoreInst(DiagnosticHandlerFunction DH,
-                                              Type *ValType, Type *PtrType) {
+static std::error_code typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
+  LLVMContext &Context = PtrType->getContext();
   if (!isa<PointerType>(PtrType))
-    return error(DH, "Load/Store operand is not a pointer type");
+    return error(Context, "Load/Store operand is not a pointer type");
   Type *ElemType = cast<PointerType>(PtrType)->getElementType();
 
   if (ValType && ValType != ElemType)
-    return error(DH, "Explicit load/store type does not match pointee type of "
-                     "pointer operand");
+    return error(Context, "Explicit load/store type does not match pointee "
+                          "type of pointer operand");
   if (!PointerType::isLoadableOrStorableType(ElemType))
-    return error(DH, "Cannot load/store from pointer");
+    return error(Context, "Cannot load/store from pointer");
   return std::error_code();
 }
 
@@ -3324,11 +3940,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
 
   InstructionList.clear();
   unsigned ModuleValueListSize = ValueList.size();
-  unsigned ModuleMDValueListSize = MDValueList.size();
+  unsigned ModuleMetadataListSize = MetadataList.size();
 
   // Add all the function arguments to the value table.
-  for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    ValueList.push_back(I);
+  for (Argument &I : F->args())
+    ValueList.push_back(&I);
 
   unsigned NextValueNo = ValueList.size();
   BasicBlock *CurBB = nullptr;
@@ -3344,6 +3960,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
     return nullptr;
   };
 
+  std::vector<OperandBundleDef> OperandBundles;
+
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
   while (1) {
@@ -3452,8 +4070,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       unsigned ScopeID = Record[2], IAID = Record[3];
 
       MDNode *Scope = nullptr, *IA = nullptr;
-      if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
-      if (IAID)    IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+      if (ScopeID)
+        Scope = cast<MDNode>(MetadataList.getValueFwdRef(ScopeID - 1));
+      if (IAID)
+        IA = cast<MDNode>(MetadataList.getValueFwdRef(IAID - 1));
       LastLoc = DebugLoc::get(Line, Col, Scope, IA);
       I->setDebugLoc(LastLoc);
       I = nullptr;
@@ -3515,7 +4135,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           CurBB->getInstList().push_back(Temp);
         }
       } else {
-        I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+        auto CastOp = (Instruction::CastOps)Opc;
+        if (!CastInst::castIsValid(CastOp, Op, ResTy))
+          return error("Invalid cast");
+        I = CastInst::Create(CastOp, Op, ResTy);
       }
       InstructionList.push_back(I);
       break;
@@ -3811,6 +4434,110 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       }
       break;
     }
+    case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#]
+      if (Record.size() != 1 && Record.size() != 2)
+        return error("Invalid record");
+      unsigned Idx = 0;
+      Value *CleanupPad =
+          getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+      if (!CleanupPad)
+        return error("Invalid record");
+      BasicBlock *UnwindDest = nullptr;
+      if (Record.size() == 2) {
+        UnwindDest = getBasicBlock(Record[Idx++]);
+        if (!UnwindDest)
+          return error("Invalid record");
+      }
+
+      I = CleanupReturnInst::Create(CleanupPad, UnwindDest);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#]
+      if (Record.size() != 2)
+        return error("Invalid record");
+      unsigned Idx = 0;
+      Value *CatchPad =
+          getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+      if (!CatchPad)
+        return error("Invalid record");
+      BasicBlock *BB = getBasicBlock(Record[Idx++]);
+      if (!BB)
+        return error("Invalid record");
+
+      I = CatchReturnInst::Create(CatchPad, BB);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?]
+      // We must have, at minimum, the outer scope and the number of arguments.
+      if (Record.size() < 2)
+        return error("Invalid record");
+
+      unsigned Idx = 0;
+
+      Value *ParentPad =
+          getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+
+      unsigned NumHandlers = Record[Idx++];
+
+      SmallVector<BasicBlock *, 2> Handlers;
+      for (unsigned Op = 0; Op != NumHandlers; ++Op) {
+        BasicBlock *BB = getBasicBlock(Record[Idx++]);
+        if (!BB)
+          return error("Invalid record");
+        Handlers.push_back(BB);
+      }
+
+      BasicBlock *UnwindDest = nullptr;
+      if (Idx + 1 == Record.size()) {
+        UnwindDest = getBasicBlock(Record[Idx++]);
+        if (!UnwindDest)
+          return error("Invalid record");
+      }
+
+      if (Record.size() != Idx)
+        return error("Invalid record");
+
+      auto *CatchSwitch =
+          CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers);
+      for (BasicBlock *Handler : Handlers)
+        CatchSwitch->addHandler(Handler);
+      I = CatchSwitch;
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CATCHPAD:
+    case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*]
+      // We must have, at minimum, the outer scope and the number of arguments.
+      if (Record.size() < 2)
+        return error("Invalid record");
+
+      unsigned Idx = 0;
+
+      Value *ParentPad =
+          getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context));
+
+      unsigned NumArgOperands = Record[Idx++];
+
+      SmallVector<Value *, 2> Args;
+      for (unsigned Op = 0; Op != NumArgOperands; ++Op) {
+        Value *Val;
+        if (getValueTypePair(Record, Idx, NextValueNo, Val))
+          return error("Invalid record");
+        Args.push_back(Val);
+      }
+
+      if (Record.size() != Idx)
+        return error("Invalid record");
+
+      if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD)
+        I = CleanupPadInst::Create(ParentPad, Args);
+      else
+        I = CatchPadInst::Create(ParentPad, Args);
+      InstructionList.push_back(I);
+      break;
+    }
     case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
       // Check magic
       if ((Record[0] >> 16) == SWITCH_INST_MAGIC) {
@@ -3973,10 +4700,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         }
       }
 
-      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
+      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles);
+      OperandBundles.clear();
       InstructionList.push_back(I);
-      cast<InvokeInst>(I)
-          ->setCallingConv(static_cast<CallingConv::ID>(~(1U << 13) & CCInfo));
+      cast<InvokeInst>(I)->setCallingConv(
+          static_cast<CallingConv::ID>(CallingConv::MaxID & CCInfo));
       cast<InvokeInst>(I)->setAttributes(PAL);
       break;
     }
@@ -4081,6 +4809,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       uint64_t AlignRecord = Record[3];
       const uint64_t InAllocaMask = uint64_t(1) << 5;
       const uint64_t ExplicitTypeMask = uint64_t(1) << 6;
+      // Reserve bit 7 for SwiftError flag.
+      // const uint64_t SwiftErrorMask = uint64_t(1) << 7;
       const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask;
       bool InAlloca = AlignRecord & InAllocaMask;
       Type *Ty = getTypeByID(Record[0]);
@@ -4115,8 +4845,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       Type *Ty = nullptr;
       if (OpNum + 3 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
-      if (std::error_code EC =
-              typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+      if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
         return EC;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
@@ -4140,8 +4869,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       Type *Ty = nullptr;
       if (OpNum + 5 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
-      if (std::error_code EC =
-              typeCheckLoadStoreInst(DiagnosticHandler, Ty, Op->getType()))
+      if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
         return EC;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
@@ -4175,8 +4903,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
-      if (std::error_code EC = typeCheckLoadStoreInst(
-              DiagnosticHandler, Val->getType(), Ptr->getType()))
+      if (std::error_code EC =
+              typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
         return EC;
       unsigned Align;
       if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
@@ -4199,8 +4927,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
-      if (std::error_code EC = typeCheckLoadStoreInst(
-              DiagnosticHandler, Val->getType(), Ptr->getType()))
+      if (std::error_code EC =
+              typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
         return EC;
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == NotAtomic || Ordering == Acquire ||
@@ -4237,8 +4965,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
       SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]);
 
-      if (std::error_code EC = typeCheckLoadStoreInst(
-              DiagnosticHandler, Cmp->getType(), Ptr->getType()))
+      if (std::error_code EC =
+              typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
         return EC;
       AtomicOrdering FailureOrdering;
       if (Record.size() < 7)
@@ -4299,7 +5027,7 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       break;
     }
     case bitc::FUNC_CODE_INST_CALL: {
-      // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+      // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return error("Invalid record");
 
@@ -4307,8 +5035,15 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       AttributeSet PAL = getAttributes(Record[OpNum++]);
       unsigned CCInfo = Record[OpNum++];
 
+      FastMathFlags FMF;
+      if ((CCInfo >> bitc::CALL_FMF) & 1) {
+        FMF = getDecodedFastMathFlags(Record[OpNum++]);
+        if (!FMF.any())
+          return error("Fast math flags indicator set for call with no FMF");
+      }
+
       FunctionType *FTy = nullptr;
-      if (CCInfo >> 15 & 1 &&
+      if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 &&
           !(FTy = dyn_cast<FunctionType>(getTypeByID(Record[OpNum++]))))
         return error("Explicit call type is not a function type");
 
@@ -4354,17 +5089,26 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         }
       }
 
-      I = CallInst::Create(FTy, Callee, Args);
+      I = CallInst::Create(FTy, Callee, Args, OperandBundles);
+      OperandBundles.clear();
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
-          static_cast<CallingConv::ID>((~(1U << 14) & CCInfo) >> 1));
+          static_cast<CallingConv::ID>((0x7ff & CCInfo) >> bitc::CALL_CCONV));
       CallInst::TailCallKind TCK = CallInst::TCK_None;
-      if (CCInfo & 1)
+      if (CCInfo & 1 << bitc::CALL_TAIL)
         TCK = CallInst::TCK_Tail;
-      if (CCInfo & (1 << 14))
+      if (CCInfo & (1 << bitc::CALL_MUSTTAIL))
         TCK = CallInst::TCK_MustTail;
+      if (CCInfo & (1 << bitc::CALL_NOTAIL))
+        TCK = CallInst::TCK_NoTail;
       cast<CallInst>(I)->setTailCallKind(TCK);
       cast<CallInst>(I)->setAttributes(PAL);
+      if (FMF.any()) {
+        if (!isa<FPMathOperator>(I))
+          return error("Fast-math-flags specified for call without "
+                       "floating-point scalar or vector return type");
+        I->setFastMathFlags(FMF);
+      }
       break;
     }
     case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
@@ -4379,6 +5123,28 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
+
+    case bitc::FUNC_CODE_OPERAND_BUNDLE: {
+      // A call or an invoke can be optionally prefixed with some variable
+      // number of operand bundle blocks.  These blocks are read into
+      // OperandBundles and consumed at the next call or invoke instruction.
+
+      if (Record.size() < 1 || Record[0] >= BundleTags.size())
+        return error("Invalid record");
+
+      std::vector<Value *> Inputs;
+
+      unsigned OpNum = 1;
+      while (OpNum != Record.size()) {
+        Value *Op;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return error("Invalid record");
+        Inputs.push_back(Op);
+      }
+
+      OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs));
+      continue;
+    }
     }
 
     // Add instruction to end of current BB.  If there is no current BB, reject
@@ -4387,6 +5153,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       delete I;
       return error("Invalid instruction with no BB");
     }
+    if (!OperandBundles.empty()) {
+      delete I;
+      return error("Operand bundles found with no consumer");
+    }
     CurBB->getInstList().push_back(I);
 
     // If this was a terminator instruction, move to the next block.
@@ -4402,6 +5172,9 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
 
 OutOfRecordLoop:
 
+  if (!OperandBundles.empty())
+    return error("Operand bundles found with no consumer");
+
   // Check the function list for unresolved values.
   if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
     if (!A->getParent()) {
@@ -4421,7 +5194,7 @@ OutOfRecordLoop:
 
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
-  MDValueList.shrinkTo(ModuleMDValueListSize);
+  MetadataList.shrinkTo(ModuleMetadataListSize);
   std::vector<BasicBlock*>().swap(FunctionBBs);
   return std::error_code();
 }
@@ -4431,11 +5204,14 @@ std::error_code BitcodeReader::findFunctionInStream(
     Function *F,
     DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
   while (DeferredFunctionInfoIterator->second == 0) {
-    if (Stream.AtEndOfStream())
-      return error("Could not find function in stream");
-    // ParseModule will parse the next body in the stream and set its
-    // position in the DeferredFunctionInfo map.
-    if (std::error_code EC = parseModule(true))
+    // This is the fallback handling for the old format bitcode that
+    // didn't contain the function index in the VST, or when we have
+    // an anonymous function which would not have a VST entry.
+    // Assert that we have one of those two cases.
+    assert(VSTOffset == 0 || !F->hasName());
+    // Parse the next body in the stream and set its position in the
+    // DeferredFunctionInfo map.
+    if (std::error_code EC = rememberAndSkipFunctionBodies())
       return EC;
   }
   return std::error_code();
@@ -4448,8 +5224,12 @@ std::error_code BitcodeReader::findFunctionInStream(
 void BitcodeReader::releaseBuffer() { Buffer.release(); }
 
 std::error_code BitcodeReader::materialize(GlobalValue *GV) {
-  if (std::error_code EC = materializeMetadata())
-    return EC;
+  // In older bitcode we must materialize the metadata before parsing
+  // any functions, in order to set up the MetadataList properly.
+  if (!SeenModuleValuesRecord) {
+    if (std::error_code EC = materializeMetadata())
+      return EC;
+  }
 
   Function *F = dyn_cast<Function>(GV);
   // If it's not a function or is already material, ignore the request.
@@ -4476,7 +5256,8 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
 
   // Upgrade any old intrinsic calls in the function.
   for (auto &I : UpgradedIntrinsics) {
-    for (auto UI = I.first->user_begin(), UE = I.first->user_end(); UI != UE;) {
+    for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end();
+         UI != UE;) {
       User *U = *UI;
       ++UI;
       if (CallInst *CI = dyn_cast<CallInst>(U))
@@ -4484,41 +5265,16 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
     }
   }
 
+  // Finish fn->subprogram upgrade for materialized functions.
+  if (DISubprogram *SP = FunctionsWithSPs.lookup(F))
+    F->setSubprogram(SP);
+
   // Bring in any functions that this function forward-referenced via
   // blockaddresses.
   return materializeForwardReferencedFunctions();
 }
 
-bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
-  const Function *F = dyn_cast<Function>(GV);
-  if (!F || F->isDeclaration())
-    return false;
-
-  // Dematerializing F would leave dangling references that wouldn't be
-  // reconnected on re-materialization.
-  if (BlockAddressesTaken.count(F))
-    return false;
-
-  return DeferredFunctionInfo.count(const_cast<Function*>(F));
-}
-
-void BitcodeReader::dematerialize(GlobalValue *GV) {
-  Function *F = dyn_cast<Function>(GV);
-  // If this function isn't dematerializable, this is a noop.
-  if (!F || !isDematerializable(F))
-    return;
-
-  assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
-
-  // Just forget the function body, we can remat it later.
-  F->dropAllReferences();
-  F->setIsMaterializable(true);
-}
-
-std::error_code BitcodeReader::materializeModule(Module *M) {
-  assert(M == TheModule &&
-         "Can only Materialize the Module this BitcodeReader is attached to.");
-
+std::error_code BitcodeReader::materializeModule() {
   if (std::error_code EC = materializeMetadata())
     return EC;
 
@@ -4527,16 +5283,16 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
 
   // Iterate over the module, deserializing any functions that are still on
   // disk.
-  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
-       F != E; ++F) {
-    if (std::error_code EC = materialize(F))
+  for (Function &F : *TheModule) {
+    if (std::error_code EC = materialize(&F))
       return EC;
   }
-  // At this point, if there are any function bodies, the current bit is
-  // pointing to the END_BLOCK record after them. Now make sure the rest
-  // of the bits in the module have been read.
-  if (NextUnreadBit)
-    parseModule(true);
+  // At this point, if there are any function bodies, parse the rest of
+  // the bits in the module past the last function block we have recorded
+  // through either lazy scanning or the VST.
+  if (LastFunctionBlockBit || NextUnreadBit)
+    parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit
+                                                     : NextUnreadBit);
 
   // Check that all block address forward references got resolved (as we
   // promised above).
@@ -4561,7 +5317,7 @@ std::error_code BitcodeReader::materializeModule(Module *M) {
   for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
     UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
 
-  UpgradeDebugInfo(*M);
+  UpgradeDebugInfo(*TheModule);
   return std::error_code();
 }
 
@@ -4622,6 +5378,416 @@ BitcodeReader::initLazyStream(std::unique_ptr<DataStreamer> Streamer) {
   return std::error_code();
 }
 
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E,
+                                                  const Twine &Message) {
+  return ::error(DiagnosticHandler, make_error_code(E), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(const Twine &Message) {
+  return ::error(DiagnosticHandler,
+                 make_error_code(BitcodeError::CorruptedBitcode), Message);
+}
+
+std::error_code FunctionIndexBitcodeReader::error(BitcodeError E) {
+  return ::error(DiagnosticHandler, make_error_code(E));
+}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+    MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+    bool IsLazy, bool CheckFuncSummaryPresenceOnly)
+    : DiagnosticHandler(DiagnosticHandler), Buffer(Buffer), IsLazy(IsLazy),
+      CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+FunctionIndexBitcodeReader::FunctionIndexBitcodeReader(
+    DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy,
+    bool CheckFuncSummaryPresenceOnly)
+    : DiagnosticHandler(DiagnosticHandler), Buffer(nullptr), IsLazy(IsLazy),
+      CheckFuncSummaryPresenceOnly(CheckFuncSummaryPresenceOnly) {}
+
+void FunctionIndexBitcodeReader::freeState() { Buffer = nullptr; }
+
+void FunctionIndexBitcodeReader::releaseBuffer() { Buffer.release(); }
+
+// Specialized value symbol table parser used when reading function index
+// blocks where we don't actually create global values.
+// At the end of this routine the function index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// the function block's bitcode offset as well as the offset into the
+// function summary section.
+std::error_code FunctionIndexBitcodeReader::parseValueSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  SmallString<128> ValueName;
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records).
+      break;
+    case bitc::VST_CODE_FNENTRY: {
+      // VST_FNENTRY: [valueid, offset, namechar x N]
+      if (convertToString(Record, 2, ValueName))
+        return error("Invalid record");
+      unsigned ValueID = Record[0];
+      uint64_t FuncOffset = Record[1];
+      std::unique_ptr<FunctionInfo> FuncInfo =
+          llvm::make_unique<FunctionInfo>(FuncOffset);
+      if (foundFuncSummary() && !IsLazy) {
+        DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+            SummaryMap.find(ValueID);
+        assert(SMI != SummaryMap.end() && "Summary info not found");
+        FuncInfo->setFunctionSummary(std::move(SMI->second));
+      }
+      TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+      ValueName.clear();
+      break;
+    }
+    case bitc::VST_CODE_COMBINED_FNENTRY: {
+      // VST_FNENTRY: [offset, namechar x N]
+      if (convertToString(Record, 1, ValueName))
+        return error("Invalid record");
+      uint64_t FuncSummaryOffset = Record[0];
+      std::unique_ptr<FunctionInfo> FuncInfo =
+          llvm::make_unique<FunctionInfo>(FuncSummaryOffset);
+      if (foundFuncSummary() && !IsLazy) {
+        DenseMap<uint64_t, std::unique_ptr<FunctionSummary>>::iterator SMI =
+            SummaryMap.find(FuncSummaryOffset);
+        assert(SMI != SummaryMap.end() && "Summary info not found");
+        FuncInfo->setFunctionSummary(std::move(SMI->second));
+      }
+      TheIndex->addFunctionInfo(ValueName, std::move(FuncInfo));
+
+      ValueName.clear();
+      break;
+    }
+    }
+  }
+}
+
+// Parse just the blocks needed for function index building out of the module.
+// At the end of this routine the function Index is populated with a map
+// from function name to FunctionInfo. The function info contains
+// either the parsed function summary information (when parsing summaries
+// eagerly), or just to the function summary record's offset
+// if parsing lazily (IsLazy).
+std::error_code FunctionIndexBitcodeReader::parseModule() {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return error("Invalid record");
+
+  // Read the function index for this module.
+  while (1) {
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+
+    case BitstreamEntry::SubBlock:
+      if (CheckFuncSummaryPresenceOnly) {
+        if (Entry.ID == bitc::FUNCTION_SUMMARY_BLOCK_ID) {
+          SeenFuncSummary = true;
+          // No need to parse the rest since we found the summary.
+          return std::error_code();
+        }
+        if (Stream.SkipBlock())
+          return error("Invalid record");
+        continue;
+      }
+      switch (Entry.ID) {
+      default: // Skip unknown content.
+        if (Stream.SkipBlock())
+          return error("Invalid record");
+        break;
+      case bitc::BLOCKINFO_BLOCK_ID:
+        // Need to parse these to get abbrev ids (e.g. for VST)
+        if (Stream.ReadBlockInfoBlock())
+          return error("Malformed block");
+        break;
+      case bitc::VALUE_SYMTAB_BLOCK_ID:
+        if (std::error_code EC = parseValueSymbolTable())
+          return EC;
+        break;
+      case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+        SeenFuncSummary = true;
+        if (IsLazy) {
+          // Lazy parsing of summary info, skip it.
+          if (Stream.SkipBlock())
+            return error("Invalid record");
+        } else if (std::error_code EC = parseEntireSummary())
+          return EC;
+        break;
+      case bitc::MODULE_STRTAB_BLOCK_ID:
+        if (std::error_code EC = parseModuleStringTable())
+          return EC;
+        break;
+      }
+      continue;
+
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
+  }
+}
+
+// Eagerly parse the entire function summary block (i.e. for all functions
+// in the index). This populates the FunctionSummary objects in
+// the index.
+std::error_code FunctionIndexBitcodeReader::parseEntireSummary() {
+  if (Stream.EnterSubBlock(bitc::FUNCTION_SUMMARY_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record. The record format depends on whether this
+    // is a per-module index or a combined index file. In the per-module
+    // case the records contain the associated value's ID for correlation
+    // with VST entries. In the combined index the correlation is done
+    // via the bitcode offset of the summary records (which were saved
+    // in the combined index VST entries). The records also contain
+    // information used for ThinLTO renaming and importing.
+    Record.clear();
+    uint64_t CurRecordBit = Stream.GetCurrentBitNo();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: // Default behavior: ignore.
+      break;
+    // FS_PERMODULE_ENTRY: [valueid, islocal, instcount]
+    case bitc::FS_CODE_PERMODULE_ENTRY: {
+      unsigned ValueID = Record[0];
+      bool IsLocal = Record[1];
+      unsigned InstCount = Record[2];
+      std::unique_ptr<FunctionSummary> FS =
+          llvm::make_unique<FunctionSummary>(InstCount);
+      FS->setLocalFunction(IsLocal);
+      // The module path string ref set in the summary must be owned by the
+      // index's module string table. Since we don't have a module path
+      // string table section in the per-module index, we create a single
+      // module path string table entry with an empty (0) ID to take
+      // ownership.
+      FS->setModulePath(
+          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0));
+      SummaryMap[ValueID] = std::move(FS);
+    }
+    // FS_COMBINED_ENTRY: [modid, instcount]
+    case bitc::FS_CODE_COMBINED_ENTRY: {
+      uint64_t ModuleId = Record[0];
+      unsigned InstCount = Record[1];
+      std::unique_ptr<FunctionSummary> FS =
+          llvm::make_unique<FunctionSummary>(InstCount);
+      FS->setModulePath(ModuleIdMap[ModuleId]);
+      SummaryMap[CurRecordBit] = std::move(FS);
+    }
+    }
+  }
+  llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the  module string table block into the Index.
+// This populates the ModulePathStringTable map in the index.
+std::error_code FunctionIndexBitcodeReader::parseModuleStringTable() {
+  if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  SmallString<128> ModulePath;
+  while (1) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return std::error_code();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::MST_CODE_ENTRY: {
+      // MST_ENTRY: [modid, namechar x N]
+      if (convertToString(Record, 1, ModulePath))
+        return error("Invalid record");
+      uint64_t ModuleId = Record[0];
+      StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId);
+      ModuleIdMap[ModuleId] = ModulePathInMap;
+      ModulePath.clear();
+      break;
+    }
+    }
+  }
+  llvm_unreachable("Exit infinite loop");
+}
+
+// Parse the function info index from the bitcode streamer into the given index.
+std::error_code FunctionIndexBitcodeReader::parseSummaryIndexInto(
+    std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I) {
+  TheIndex = I;
+
+  if (std::error_code EC = initStream(std::move(Streamer)))
+    return EC;
+
+  // Sniff for the signature.
+  if (!hasValidBitcodeHeader(Stream))
+    return error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (1) {
+    if (Stream.AtEndOfStream()) {
+      // We didn't really read a proper Module block.
+      return error("Malformed block");
+    }
+
+    BitstreamEntry Entry =
+        Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+    if (Entry.Kind != BitstreamEntry::SubBlock)
+      return error("Malformed block");
+
+    // If we see a MODULE_BLOCK, parse it to find the blocks needed for
+    // building the function summary index.
+    if (Entry.ID == bitc::MODULE_BLOCK_ID)
+      return parseModule();
+
+    if (Stream.SkipBlock())
+      return error("Invalid record");
+  }
+}
+
+// Parse the function information at the given offset in the buffer into
+// the index. Used to support lazy parsing of function summaries from the
+// combined index during importing.
+// TODO: This function is not yet complete as it won't have a consumer
+// until ThinLTO function importing is added.
+std::error_code FunctionIndexBitcodeReader::parseFunctionSummary(
+    std::unique_ptr<DataStreamer> Streamer, FunctionInfoIndex *I,
+    size_t FunctionSummaryOffset) {
+  TheIndex = I;
+
+  if (std::error_code EC = initStream(std::move(Streamer)))
+    return EC;
+
+  // Sniff for the signature.
+  if (!hasValidBitcodeHeader(Stream))
+    return error("Invalid bitcode signature");
+
+  Stream.JumpToBit(FunctionSummaryOffset);
+
+  BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+  switch (Entry.Kind) {
+  default:
+    return error("Malformed block");
+  case BitstreamEntry::Record:
+    // The expected case.
+    break;
+  }
+
+  // TODO: Read a record. This interface will be completed when ThinLTO
+  // importing is added so that it can be tested.
+  SmallVector<uint64_t, 64> Record;
+  switch (Stream.readRecord(Entry.ID, Record)) {
+  case bitc::FS_CODE_COMBINED_ENTRY:
+  default:
+    return error("Invalid record");
+  }
+
+  return std::error_code();
+}
+
+std::error_code
+FunctionIndexBitcodeReader::initStream(std::unique_ptr<DataStreamer> Streamer) {
+  if (Streamer)
+    return initLazyStream(std::move(Streamer));
+  return initStreamFromBuffer();
+}
+
+std::error_code FunctionIndexBitcodeReader::initStreamFromBuffer() {
+  const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart();
+  const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3)
+    return error("Invalid bitcode signature");
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+      return error("Invalid bitcode wrapper header");
+
+  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
+  Stream.init(&*StreamFile);
+
+  return std::error_code();
+}
+
+std::error_code FunctionIndexBitcodeReader::initLazyStream(
+    std::unique_ptr<DataStreamer> Streamer) {
+  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
+  // see it.
+  auto OwnedBytes =
+      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
+  StreamingMemoryObject &Bytes = *OwnedBytes;
+  StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
+  Stream.init(&*StreamFile);
+
+  unsigned char buf[16];
+  if (Bytes.readBytes(buf, 16, 0) != 16)
+    return error("Invalid bitcode signature");
+
+  if (!isBitcode(buf, buf + 16))
+    return error("Invalid bitcode signature");
+
+  if (isBitcodeWrapper(buf, buf + 4)) {
+    const unsigned char *bitcodeStart = buf;
+    const unsigned char *bitcodeEnd = buf + 16;
+    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
+    Bytes.dropLeadingBytes(bitcodeStart - buf);
+    Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
+  }
+  return std::error_code();
+}
+
 namespace {
 class BitcodeErrorCategoryType : public std::error_category {
   const char *name() const LLVM_NOEXCEPT override {
@@ -4669,7 +5835,7 @@ getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
 
   if (MaterializeAll) {
     // Read in the entire module, and destroy the BitcodeReader.
-    if (std::error_code EC = M->materializeAllPermanently())
+    if (std::error_code EC = M->materializeAll())
       return cleanupOnError(EC);
   } else {
     // Resolve forward references from blockaddresses.
@@ -4690,10 +5856,8 @@ getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
 static ErrorOr<std::unique_ptr<Module>>
 getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
                          LLVMContext &Context, bool MaterializeAll,
-                         DiagnosticHandlerFunction DiagnosticHandler,
                          bool ShouldLazyLoadMetadata = false) {
-  BitcodeReader *R =
-      new BitcodeReader(Buffer.get(), Context, DiagnosticHandler);
+  BitcodeReader *R = new BitcodeReader(Buffer.get(), Context);
 
   ErrorOr<std::unique_ptr<Module>> Ret =
       getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context,
@@ -4705,41 +5869,124 @@ getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
   return Ret;
 }
 
-ErrorOr<std::unique_ptr<Module>> llvm::getLazyBitcodeModule(
-    std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
-    DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata) {
+ErrorOr<std::unique_ptr<Module>>
+llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
+                           LLVMContext &Context, bool ShouldLazyLoadMetadata) {
   return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
-                                  DiagnosticHandler, ShouldLazyLoadMetadata);
+                                  ShouldLazyLoadMetadata);
 }
 
-ErrorOr<std::unique_ptr<Module>> llvm::getStreamedBitcodeModule(
-    StringRef Name, std::unique_ptr<DataStreamer> Streamer,
-    LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) {
+ErrorOr<std::unique_ptr<Module>>
+llvm::getStreamedBitcodeModule(StringRef Name,
+                               std::unique_ptr<DataStreamer> Streamer,
+                               LLVMContext &Context) {
   std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
-  BitcodeReader *R = new BitcodeReader(Context, DiagnosticHandler);
+  BitcodeReader *R = new BitcodeReader(Context);
 
   return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false,
                               false);
 }
 
-ErrorOr<std::unique_ptr<Module>>
-llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context,
-                       DiagnosticHandlerFunction DiagnosticHandler) {
+ErrorOr<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
+                                                        LLVMContext &Context) {
   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  return getLazyBitcodeModuleImpl(std::move(Buf), Context, true,
-                                  DiagnosticHandler);
+  return getLazyBitcodeModuleImpl(std::move(Buf), Context, true);
   // TODO: Restore the use-lists to the in-memory state when the bitcode was
   // written.  We must defer until the Module has been fully materialized.
 }
 
-std::string
-llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context,
-                             DiagnosticHandlerFunction DiagnosticHandler) {
+std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer,
+                                         LLVMContext &Context) {
   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context,
-                                            DiagnosticHandler);
+  auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context);
   ErrorOr<std::string> Triple = R->parseTriple();
   if (Triple.getError())
     return "";
   return Triple.get();
 }
+
+std::string llvm::getBitcodeProducerString(MemoryBufferRef Buffer,
+                                           LLVMContext &Context) {
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+  BitcodeReader R(Buf.release(), Context);
+  ErrorOr<std::string> ProducerString = R.parseIdentificationBlock();
+  if (ProducerString.getError())
+    return "";
+  return ProducerString.get();
+}
+
+// Parse the specified bitcode buffer, returning the function info index.
+// If IsLazy is false, parse the entire function summary into
+// the index. Otherwise skip the function summary section, and only create
+// an index object with a map from function name to function summary offset.
+// The index is used to perform lazy function summary reading later.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+llvm::getFunctionInfoIndex(MemoryBufferRef Buffer,
+                           DiagnosticHandlerFunction DiagnosticHandler,
+                           bool IsLazy) {
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+  FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, IsLazy);
+
+  auto Index = llvm::make_unique<FunctionInfoIndex>();
+
+  auto cleanupOnError = [&](std::error_code EC) {
+    R.releaseBuffer(); // Never take ownership on error.
+    return EC;
+  };
+
+  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get()))
+    return cleanupOnError(EC);
+
+  Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+  return std::move(Index);
+}
+
+// Check if the given bitcode buffer contains a function summary block.
+bool llvm::hasFunctionSummary(MemoryBufferRef Buffer,
+                              DiagnosticHandlerFunction DiagnosticHandler) {
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+  FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler, false, true);
+
+  auto cleanupOnError = [&](std::error_code EC) {
+    R.releaseBuffer(); // Never take ownership on error.
+    return false;
+  };
+
+  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr))
+    return cleanupOnError(EC);
+
+  Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+  return R.foundFuncSummary();
+}
+
+// This method supports lazy reading of function summary data from the combined
+// index during ThinLTO function importing. When reading the combined index
+// file, getFunctionInfoIndex is first invoked with IsLazy=true.
+// Then this method is called for each function considered for importing,
+// to parse the summary information for the given function name into
+// the index.
+std::error_code llvm::readFunctionSummary(
+    MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler,
+    StringRef FunctionName, std::unique_ptr<FunctionInfoIndex> Index) {
+  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
+  FunctionIndexBitcodeReader R(Buf.get(), DiagnosticHandler);
+
+  auto cleanupOnError = [&](std::error_code EC) {
+    R.releaseBuffer(); // Never take ownership on error.
+    return EC;
+  };
+
+  // Lookup the given function name in the FunctionMap, which may
+  // contain a list of function infos in the case of a COMDAT. Walk through
+  // and parse each function summary info at the function summary offset
+  // recorded when parsing the value symbol table.
+  for (const auto &FI : Index->getFunctionInfoList(FunctionName)) {
+    size_t FunctionSummaryOffset = FI->bitcodeIndex();
+    if (std::error_code EC =
+            R.parseFunctionSummary(nullptr, Index.get(), FunctionSummaryOffset))
+      return cleanupOnError(EC);
+  }
+
+  Buf.release(); // The FunctionIndexBitcodeReader owns it now.
+  return std::error_code();
+}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 1a70ba5ac127..a1f87863757b 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -13,14 +13,18 @@
 
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "ValueEnumerator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/UseListOrder.h"
@@ -174,6 +178,10 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_IN_ALLOCA;
   case Attribute::Cold:
     return bitc::ATTR_KIND_COLD;
+  case Attribute::InaccessibleMemOnly:
+    return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY;
+  case Attribute::InaccessibleMemOrArgMemOnly:
+    return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY;
   case Attribute::InlineHint:
     return bitc::ATTR_KIND_INLINE_HINT;
   case Attribute::InReg:
@@ -198,6 +206,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT;
   case Attribute::NoInline:
     return bitc::ATTR_KIND_NO_INLINE;
+  case Attribute::NoRecurse:
+    return bitc::ATTR_KIND_NO_RECURSE;
   case Attribute::NonLazyBind:
     return bitc::ATTR_KIND_NON_LAZY_BIND;
   case Attribute::NonNull:
@@ -405,6 +415,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;     break;
     case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA;  break;
     case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX;   break;
+    case Type::TokenTyID:     Code = bitc::TYPE_CODE_TOKEN;     break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
@@ -573,10 +584,41 @@ static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   }
 }
 
-// Emit top-level description of module, including target triple, inline asm,
-// descriptors for global variables, and function prototype info.
-static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
-                            BitstreamWriter &Stream) {
+/// Write a record that will eventually hold the word offset of the
+/// module-level VST. For now the offset is 0, which will be backpatched
+/// after the real VST is written. Returns the bit offset to backpatch.
+static uint64_t WriteValueSymbolTableForwardDecl(const ValueSymbolTable &VST,
+                                                 BitstreamWriter &Stream) {
+  if (VST.empty())
+    return 0;
+
+  // Write a placeholder value in for the offset of the real VST,
+  // which is written after the function blocks so that it can include
+  // the offset of each function. The placeholder offset will be
+  // updated when the real VST is written.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET));
+  // Blocks are 32-bit aligned, so we can use a 32-bit word offset to
+  // hold the real VST offset. Must use fixed instead of VBR as we don't
+  // know how many VBR chunks to reserve ahead of time.
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Emit the placeholder
+  uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0};
+  Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals);
+
+  // Compute and return the bit offset to the placeholder, which will be
+  // patched when the real VST is written. We can simply subtract the 32-bit
+  // fixed size from the current bit number to get the location to backpatch.
+  return Stream.GetCurrentBitNo() - 32;
+}
+
+/// Emit top-level description of module, including target triple, inline asm,
+/// descriptors for global variables, and function prototype info.
+/// Returns the bit offset to backpatch with the location of the real VST.
+static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
   // Emit various pieces of data attached to a module.
   if (!M->getTargetTriple().empty())
     WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
@@ -725,7 +767,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   // Emit the alias information.
   for (const GlobalAlias &A : M->aliases()) {
     // ALIAS: [alias type, aliasee val#, linkage, visibility]
-    Vals.push_back(VE.getTypeID(A.getType()));
+    Vals.push_back(VE.getTypeID(A.getValueType()));
+    Vals.push_back(A.getType()->getAddressSpace());
     Vals.push_back(VE.getValueID(A.getAliasee()));
     Vals.push_back(getEncodedLinkage(A));
     Vals.push_back(getEncodedVisibility(A));
@@ -736,6 +779,25 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
     Vals.clear();
   }
+
+  // Write a record indicating the number of module-level metadata IDs
+  // This is needed because the ids of metadata are assigned implicitly
+  // based on their ordering in the bitcode, with the function-level
+  // metadata ids starting after the module-level metadata ids. For
+  // function importing where we lazy load the metadata as a postpass,
+  // we want to avoid parsing the module-level metadata before parsing
+  // the imported functions.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_METADATA_VALUES));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned MDValsAbbrev = Stream.EmitAbbrev(Abbv);
+  Vals.push_back(VE.numMDs());
+  Stream.EmitRecord(bitc::MODULE_CODE_METADATA_VALUES, Vals, MDValsAbbrev);
+  Vals.clear();
+
+  uint64_t VSTOffsetPlaceholder =
+      WriteValueSymbolTableForwardDecl(M->getValueSymbolTable(), Stream);
+  return VSTOffsetPlaceholder;
 }
 
 static uint64_t GetOptimizationFlags(const Value *V) {
@@ -943,7 +1005,8 @@ static void WriteDICompileUnit(const DICompileUnit *N,
                                BitstreamWriter &Stream,
                                SmallVectorImpl<uint64_t> &Record,
                                unsigned Abbrev) {
-  Record.push_back(N->isDistinct());
+  assert(N->isDistinct() && "Expected distinct compile units");
+  Record.push_back(/* IsDistinct */ true);
   Record.push_back(N->getSourceLanguage());
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawProducer()));
@@ -958,6 +1021,7 @@ static void WriteDICompileUnit(const DICompileUnit *N,
   Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
   Record.push_back(N->getDWOId());
+  Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
@@ -982,7 +1046,6 @@ static void WriteDISubprogram(const DISubprogram *N, const ValueEnumerator &VE,
   Record.push_back(N->getVirtualIndex());
   Record.push_back(N->getFlags());
   Record.push_back(N->isOptimized());
-  Record.push_back(VE.getMetadataOrNullID(N->getRawFunction()));
   Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get()));
   Record.push_back(VE.getMetadataOrNullID(N->getDeclaration()));
   Record.push_back(VE.getMetadataOrNullID(N->getVariables().get()));
@@ -1034,6 +1097,33 @@ static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE,
   Record.clear();
 }
 
+static void WriteDIMacro(const DIMacro *N, const ValueEnumerator &VE,
+                         BitstreamWriter &Stream,
+                         SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getMacinfoType());
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
+  Record.push_back(VE.getMetadataOrNullID(N->getRawValue()));
+
+  Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev);
+  Record.clear();
+}
+
+static void WriteDIMacroFile(const DIMacroFile *N, const ValueEnumerator &VE,
+                             BitstreamWriter &Stream,
+                             SmallVectorImpl<uint64_t> &Record,
+                             unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(N->getMacinfoType());
+  Record.push_back(N->getLine());
+  Record.push_back(VE.getMetadataOrNullID(N->getFile()));
+  Record.push_back(VE.getMetadataOrNullID(N->getElements().get()));
+
+  Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev);
+  Record.clear();
+}
+
 static void WriteDIModule(const DIModule *N, const ValueEnumerator &VE,
                           BitstreamWriter &Stream,
                           SmallVectorImpl<uint64_t> &Record, unsigned Abbrev) {
@@ -1100,7 +1190,6 @@ static void WriteDILocalVariable(const DILocalVariable *N,
                                  SmallVectorImpl<uint64_t> &Record,
                                  unsigned Abbrev) {
   Record.push_back(N->isDistinct());
-  Record.push_back(N->getTag());
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
@@ -1310,16 +1399,15 @@ static void WriteMetadataAttachment(const Function &F,
     Record.clear();
   }
 
-  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
-         I != E; ++I) {
+  for (const BasicBlock &BB : F)
+    for (const Instruction &I : BB) {
       MDs.clear();
-      I->getAllMetadataOtherThanDebugLoc(MDs);
+      I.getAllMetadataOtherThanDebugLoc(MDs);
 
       // If no metadata, ignore instruction.
       if (MDs.empty()) continue;
 
-      Record.push_back(VE.getInstructionID(I));
+      Record.push_back(VE.getInstructionID(&I));
 
       for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
         Record.push_back(MDs[i].first);
@@ -1342,7 +1430,7 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
 
   if (Names.empty()) return;
 
-  Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+  Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3);
 
   for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
     Record.push_back(MDKindID);
@@ -1356,6 +1444,33 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+static void WriteOperandBundleTags(const Module *M, BitstreamWriter &Stream) {
+  // Write metadata kinds
+  //
+  // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG
+  //
+  // OPERAND_BUNDLE_TAG - [strchr x N]
+
+  SmallVector<StringRef, 8> Tags;
+  M->getOperandBundleTags(Tags);
+
+  if (Tags.empty())
+    return;
+
+  Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+
+  for (auto Tag : Tags) {
+    Record.append(Tag.begin(), Tag.end());
+
+    Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
 static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
   if ((int64_t)V >= 0)
     Vals.push_back(V << 1);
@@ -1664,6 +1779,23 @@ static bool PushValueAndType(const Value *V, unsigned InstID,
   return false;
 }
 
+static void WriteOperandBundles(BitstreamWriter &Stream, ImmutableCallSite CS,
+                                unsigned InstID, ValueEnumerator &VE) {
+  SmallVector<unsigned, 64> Record;
+  LLVMContext &C = CS.getInstruction()->getContext();
+
+  for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+    const auto &Bundle = CS.getOperandBundleAt(i);
+    Record.push_back(C.getOperandBundleTagID(Bundle.getTagName()));
+
+    for (auto &Input : Bundle.Inputs)
+      PushValueAndType(Input, InstID, Record, VE);
+
+    Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record);
+    Record.clear();
+  }
+}
+
 /// pushValue - Like PushValueAndType, but where the type of the value is
 /// omitted (perhaps it was already encoded in an earlier operand).
 static void pushValue(const Value *V, unsigned InstID,
@@ -1806,10 +1938,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
       Vals.push_back(VE.getTypeID(SI.getCondition()->getType()));
       pushValue(SI.getCondition(), InstID, Vals, VE);
       Vals.push_back(VE.getValueID(SI.getDefaultDest()));
-      for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
-           i != e; ++i) {
-        Vals.push_back(VE.getValueID(i.getCaseValue()));
-        Vals.push_back(VE.getValueID(i.getCaseSuccessor()));
+      for (SwitchInst::ConstCaseIt Case : SI.cases()) {
+        Vals.push_back(VE.getValueID(Case.getCaseValue()));
+        Vals.push_back(VE.getValueID(Case.getCaseSuccessor()));
       }
     }
     break;
@@ -1826,6 +1957,10 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const InvokeInst *II = cast<InvokeInst>(&I);
     const Value *Callee = II->getCalledValue();
     FunctionType *FTy = II->getFunctionType();
+
+    if (II->hasOperandBundles())
+      WriteOperandBundles(Stream, II, InstID, VE);
+
     Code = bitc::FUNC_CODE_INST_INVOKE;
 
     Vals.push_back(VE.getAttributeID(II->getAttributes()));
@@ -1851,6 +1986,49 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Code = bitc::FUNC_CODE_INST_RESUME;
     PushValueAndType(I.getOperand(0), InstID, Vals, VE);
     break;
+  case Instruction::CleanupRet: {
+    Code = bitc::FUNC_CODE_INST_CLEANUPRET;
+    const auto &CRI = cast<CleanupReturnInst>(I);
+    pushValue(CRI.getCleanupPad(), InstID, Vals, VE);
+    if (CRI.hasUnwindDest())
+      Vals.push_back(VE.getValueID(CRI.getUnwindDest()));
+    break;
+  }
+  case Instruction::CatchRet: {
+    Code = bitc::FUNC_CODE_INST_CATCHRET;
+    const auto &CRI = cast<CatchReturnInst>(I);
+    pushValue(CRI.getCatchPad(), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(CRI.getSuccessor()));
+    break;
+  }
+  case Instruction::CleanupPad:
+  case Instruction::CatchPad: {
+    const auto &FuncletPad = cast<FuncletPadInst>(I);
+    Code = isa<CatchPadInst>(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD
+                                         : bitc::FUNC_CODE_INST_CLEANUPPAD;
+    pushValue(FuncletPad.getParentPad(), InstID, Vals, VE);
+
+    unsigned NumArgOperands = FuncletPad.getNumArgOperands();
+    Vals.push_back(NumArgOperands);
+    for (unsigned Op = 0; Op != NumArgOperands; ++Op)
+      PushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals, VE);
+    break;
+  }
+  case Instruction::CatchSwitch: {
+    Code = bitc::FUNC_CODE_INST_CATCHSWITCH;
+    const auto &CatchSwitch = cast<CatchSwitchInst>(I);
+
+    pushValue(CatchSwitch.getParentPad(), InstID, Vals, VE);
+
+    unsigned NumHandlers = CatchSwitch.getNumHandlers();
+    Vals.push_back(NumHandlers);
+    for (const BasicBlock *CatchPadBB : CatchSwitch.handlers())
+      Vals.push_back(VE.getValueID(CatchPadBB));
+
+    if (CatchSwitch.hasUnwindDest())
+      Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest()));
+    break;
+  }
   case Instruction::Unreachable:
     Code = bitc::FUNC_CODE_INST_UNREACHABLE;
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
@@ -1902,6 +2080,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64");
     AlignRecord |= AI.isUsedWithInAlloca() << 5;
     AlignRecord |= 1 << 6;
+    // Reserve bit 7 for SwiftError flag.
+    // AlignRecord |= AI.isSwiftError() << 7;
     Vals.push_back(AlignRecord);
     break;
   }
@@ -1971,11 +2151,23 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const CallInst &CI = cast<CallInst>(I);
     FunctionType *FTy = CI.getFunctionType();
 
+    if (CI.hasOperandBundles())
+      WriteOperandBundles(Stream, &CI, InstID, VE);
+
     Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeID(CI.getAttributes()));
-    Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) |
-                   unsigned(CI.isMustTailCall()) << 14 | 1 << 15);
+
+    unsigned Flags = GetOptimizationFlags(&I);
+    Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV |
+                   unsigned(CI.isTailCall()) << bitc::CALL_TAIL |
+                   unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL |
+                   1 << bitc::CALL_EXPLICIT_TYPE |
+                   unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL |
+                   unsigned(Flags != 0) << bitc::CALL_FMF);
+    if (Flags != 0)
+      Vals.push_back(Flags);
+
     Vals.push_back(VE.getTypeID(FTy));
     PushValueAndType(CI.getCalledValue(), InstID, Vals, VE);  // Callee
 
@@ -2008,56 +2200,149 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
   Vals.clear();
 }
 
-// Emit names for globals/functions etc.
-static void WriteValueSymbolTable(const ValueSymbolTable &VST,
-                                  const ValueEnumerator &VE,
-                                  BitstreamWriter &Stream) {
-  if (VST.empty()) return;
+enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
+
+/// Determine the encoding to use for the given string name and length.
+static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+  bool isChar6 = true;
+  for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+    if (isChar6)
+      isChar6 = BitCodeAbbrevOp::isChar6(*C);
+    if ((unsigned char)*C & 128)
+      // don't bother scanning the rest.
+      return SE_Fixed8;
+  }
+  if (isChar6)
+    return SE_Char6;
+  else
+    return SE_Fixed7;
+}
+
+/// Emit names for globals/functions etc. The VSTOffsetPlaceholder,
+/// BitcodeStartBit and FunctionIndex are only passed for the module-level
+/// VST, where we are including a function bitcode index and need to
+/// backpatch the VST forward declaration record.
+static void WriteValueSymbolTable(
+    const ValueSymbolTable &VST, const ValueEnumerator &VE,
+    BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0,
+    uint64_t BitcodeStartBit = 0,
+    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> *FunctionIndex =
+        nullptr) {
+  if (VST.empty()) {
+    // WriteValueSymbolTableForwardDecl should have returned early as
+    // well. Ensure this handling remains in sync by asserting that
+    // the placeholder offset is not set.
+    assert(VSTOffsetPlaceholder == 0);
+    return;
+  }
+
+  if (VSTOffsetPlaceholder > 0) {
+    // Get the offset of the VST we are writing, and backpatch it into
+    // the VST forward declaration record.
+    uint64_t VSTOffset = Stream.GetCurrentBitNo();
+    // The BitcodeStartBit was the stream offset of the actual bitcode
+    // (e.g. excluding any initial darwin header).
+    VSTOffset -= BitcodeStartBit;
+    assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
+    Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
+  }
+
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
 
+  // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY
+  // records, which are not used in the per-function VSTs.
+  unsigned FnEntry8BitAbbrev;
+  unsigned FnEntry7BitAbbrev;
+  unsigned FnEntry6BitAbbrev;
+  if (VSTOffsetPlaceholder > 0) {
+    // 8-bit fixed-width VST_FNENTRY function strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+    // 7-bit fixed width VST_FNENTRY function strings.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+    // 6-bit char6 VST_FNENTRY function strings.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+  }
+
   // FIXME: Set up the abbrev, we know how many values there are!
   // FIXME: We know if the type names can use 7-bit ascii.
   SmallVector<unsigned, 64> NameVals;
 
-  for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
-       SI != SE; ++SI) {
-
-    const ValueName &Name = *SI;
-
+  for (const ValueName &Name : VST) {
     // Figure out the encoding to use for the name.
-    bool is7Bit = true;
-    bool isChar6 = true;
-    for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
-         C != E; ++C) {
-      if (isChar6)
-        isChar6 = BitCodeAbbrevOp::isChar6(*C);
-      if ((unsigned char)*C & 128) {
-        is7Bit = false;
-        break;  // don't bother scanning the rest.
-      }
-    }
+    StringEncoding Bits =
+        getStringEncoding(Name.getKeyData(), Name.getKeyLength());
 
     unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+    NameVals.push_back(VE.getValueID(Name.getValue()));
+
+    Function *F = dyn_cast<Function>(Name.getValue());
+    if (!F) {
+      // If value is an alias, need to get the aliased base object to
+      // see if it is a function.
+      auto *GA = dyn_cast<GlobalAlias>(Name.getValue());
+      if (GA && GA->getBaseObject())
+        F = dyn_cast<Function>(GA->getBaseObject());
+    }
 
     // VST_ENTRY:   [valueid, namechar x N]
+    // VST_FNENTRY: [valueid, funcoffset, namechar x N]
     // VST_BBENTRY: [bbid, namechar x N]
     unsigned Code;
-    if (isa<BasicBlock>(SI->getValue())) {
+    if (isa<BasicBlock>(Name.getValue())) {
       Code = bitc::VST_CODE_BBENTRY;
-      if (isChar6)
+      if (Bits == SE_Char6)
         AbbrevToUse = VST_BBENTRY_6_ABBREV;
+    } else if (F && !F->isDeclaration()) {
+      // Must be the module-level VST, where we pass in the Index and
+      // have a VSTOffsetPlaceholder. The function-level VST should not
+      // contain any Function symbols.
+      assert(FunctionIndex);
+      assert(VSTOffsetPlaceholder > 0);
+
+      // Save the word offset of the function (from the start of the
+      // actual bitcode written to the stream).
+      assert(FunctionIndex->count(F) == 1);
+      uint64_t BitcodeIndex =
+          (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit;
+      assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
+      NameVals.push_back(BitcodeIndex / 32);
+
+      Code = bitc::VST_CODE_FNENTRY;
+      AbbrevToUse = FnEntry8BitAbbrev;
+      if (Bits == SE_Char6)
+        AbbrevToUse = FnEntry6BitAbbrev;
+      else if (Bits == SE_Fixed7)
+        AbbrevToUse = FnEntry7BitAbbrev;
     } else {
       Code = bitc::VST_CODE_ENTRY;
-      if (isChar6)
+      if (Bits == SE_Char6)
         AbbrevToUse = VST_ENTRY_6_ABBREV;
-      else if (is7Bit)
+      else if (Bits == SE_Fixed7)
         AbbrevToUse = VST_ENTRY_7_ABBREV;
     }
 
-    NameVals.push_back(VE.getValueID(SI->getValue()));
-    for (const char *P = Name.getKeyData(),
-         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
-      NameVals.push_back((unsigned char)*P);
+    for (const auto P : Name.getKey())
+      NameVals.push_back((unsigned char)P);
 
     // Emit the finished record.
     Stream.EmitRecord(Code, NameVals, AbbrevToUse);
@@ -2066,6 +2351,66 @@ static void WriteValueSymbolTable(const ValueSymbolTable &VST,
   Stream.ExitBlock();
 }
 
+/// Emit function names and summary offsets for the combined index
+/// used by ThinLTO.
+static void WriteCombinedValueSymbolTable(const FunctionInfoIndex &Index,
+                                          BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+  // 8-bit fixed-width VST_COMBINED_FNENTRY function strings.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+  unsigned FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // 7-bit fixed width VST_COMBINED_FNENTRY function strings.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+  unsigned FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // 6-bit char6 VST_COMBINED_FNENTRY function strings.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_FNENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  unsigned FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // FIXME: We know if the type names can use 7-bit ascii.
+  SmallVector<unsigned, 64> NameVals;
+
+  for (const auto &FII : Index) {
+    for (const auto &FI : FII.getValue()) {
+      NameVals.push_back(FI->bitcodeIndex());
+
+      StringRef FuncName = FII.first();
+
+      // Figure out the encoding to use for the name.
+      StringEncoding Bits = getStringEncoding(FuncName.data(), FuncName.size());
+
+      // VST_COMBINED_FNENTRY: [funcsumoffset, namechar x N]
+      unsigned AbbrevToUse = FnEntry8BitAbbrev;
+      if (Bits == SE_Char6)
+        AbbrevToUse = FnEntry6BitAbbrev;
+      else if (Bits == SE_Fixed7)
+        AbbrevToUse = FnEntry7BitAbbrev;
+
+      for (const auto P : FuncName)
+        NameVals.push_back((unsigned char)P);
+
+      // Emit the finished record.
+      Stream.EmitRecord(bitc::VST_CODE_COMBINED_FNENTRY, NameVals, AbbrevToUse);
+      NameVals.clear();
+    }
+  }
+  Stream.ExitBlock();
+}
+
 static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order,
                          BitstreamWriter &Stream) {
   assert(Order.Shuffle.size() >= 2 && "Shuffle too small");
@@ -2100,9 +2445,34 @@ static void WriteUseListBlock(const Function *F, ValueEnumerator &VE,
   Stream.ExitBlock();
 }
 
-/// WriteFunction - Emit a function body to the module stream.
-static void WriteFunction(const Function &F, ValueEnumerator &VE,
-                          BitstreamWriter &Stream) {
+/// \brief Save information for the given function into the function index.
+///
+/// At a minimum this saves the bitcode index of the function record that
+/// was just written. However, if we are emitting function summary information,
+/// for example for ThinLTO, then a \a FunctionSummary object is created
+/// to hold the provided summary information.
+static void SaveFunctionInfo(
+    const Function &F,
+    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+    unsigned NumInsts, uint64_t BitcodeIndex, bool EmitFunctionSummary) {
+  std::unique_ptr<FunctionSummary> FuncSummary;
+  if (EmitFunctionSummary) {
+    FuncSummary = llvm::make_unique<FunctionSummary>(NumInsts);
+    FuncSummary->setLocalFunction(F.hasLocalLinkage());
+  }
+  FunctionIndex[&F] =
+      llvm::make_unique<FunctionInfo>(BitcodeIndex, std::move(FuncSummary));
+}
+
+/// Emit a function body to the module stream.
+static void WriteFunction(
+    const Function &F, ValueEnumerator &VE, BitstreamWriter &Stream,
+    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+    bool EmitFunctionSummary) {
+  // Save the bitcode index of the start of this function block for recording
+  // in the VST.
+  uint64_t BitcodeIndex = Stream.GetCurrentBitNo();
+
   Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
   VE.incorporateFunction(F);
 
@@ -2128,6 +2498,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   bool NeedsMetadataAttachment = F.hasMetadata();
 
   DILocation *LastDL = nullptr;
+  unsigned NumInsts = 0;
 
   // Finally, emit all the instructions, in order.
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -2135,6 +2506,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
          I != E; ++I) {
       WriteInstruction(*I, InstID, VE, Stream, Vals);
 
+      if (!isa<DbgInfoIntrinsic>(I))
+        ++NumInsts;
+
       if (!I->getType()->isVoidTy())
         ++InstID;
 
@@ -2171,6 +2545,9 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
     WriteUseListBlock(&F, VE, Stream);
   VE.purgeFunction();
   Stream.ExitBlock();
+
+  SaveFunctionInfo(F, FunctionIndex, NumInsts, BitcodeIndex,
+                   EmitFunctionSummary);
 }
 
 // Emit blockinfo, which defines the standard abbreviations etc.
@@ -2348,9 +2725,183 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   Stream.ExitBlock();
 }
 
+/// Write the module path strings, currently only used when generating
+/// a combined index file.
+static void WriteModStrings(const FunctionInfoIndex &I,
+                            BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3);
+
+  // TODO: See which abbrev sizes we actually need to emit
+
+  // 8-bit fixed-width MST_ENTRY strings.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+  unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv);
+
+  // 7-bit fixed width MST_ENTRY strings.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+  unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv);
+
+  // 6-bit char6 MST_ENTRY strings.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<unsigned, 64> NameVals;
+  for (const StringMapEntry<uint64_t> &MPSE : I.modPathStringEntries()) {
+    StringEncoding Bits =
+        getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
+    unsigned AbbrevToUse = Abbrev8Bit;
+    if (Bits == SE_Char6)
+      AbbrevToUse = Abbrev6Bit;
+    else if (Bits == SE_Fixed7)
+      AbbrevToUse = Abbrev7Bit;
+
+    NameVals.push_back(MPSE.getValue());
+
+    for (const auto P : MPSE.getKey())
+      NameVals.push_back((unsigned char)P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse);
+    NameVals.clear();
+  }
+  Stream.ExitBlock();
+}
+
+// Helper to emit a single function summary record.
+static void WritePerModuleFunctionSummaryRecord(
+    SmallVector<unsigned, 64> &NameVals, FunctionSummary *FS, unsigned ValueID,
+    unsigned FSAbbrev, BitstreamWriter &Stream) {
+  assert(FS);
+  NameVals.push_back(ValueID);
+  NameVals.push_back(FS->isLocalFunction());
+  NameVals.push_back(FS->instCount());
+
+  // Emit the finished record.
+  Stream.EmitRecord(bitc::FS_CODE_PERMODULE_ENTRY, NameVals, FSAbbrev);
+  NameVals.clear();
+}
+
+/// Emit the per-module function summary section alongside the rest of
+/// the module's bitcode.
+static void WritePerModuleFunctionSummary(
+    DenseMap<const Function *, std::unique_ptr<FunctionInfo>> &FunctionIndex,
+    const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
+
+  // Abbrev for FS_CODE_PERMODULE_ENTRY.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_PERMODULE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // valueid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // islocal
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
+  unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<unsigned, 64> NameVals;
+  for (auto &I : FunctionIndex) {
+    // Skip anonymous functions. We will emit a function summary for
+    // any aliases below.
+    if (!I.first->hasName())
+      continue;
+
+    WritePerModuleFunctionSummaryRecord(
+        NameVals, I.second->functionSummary(),
+        VE.getValueID(M->getValueSymbolTable().lookup(I.first->getName())),
+        FSAbbrev, Stream);
+  }
+
+  for (const GlobalAlias &A : M->aliases()) {
+    if (!A.getBaseObject())
+      continue;
+    const Function *F = dyn_cast<Function>(A.getBaseObject());
+    if (!F || F->isDeclaration())
+      continue;
+
+    assert(FunctionIndex.count(F) == 1);
+    WritePerModuleFunctionSummaryRecord(
+        NameVals, FunctionIndex[F]->functionSummary(),
+        VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSAbbrev,
+        Stream);
+  }
+
+  Stream.ExitBlock();
+}
+
+/// Emit the combined function summary section into the combined index
+/// file.
+static void WriteCombinedFunctionSummary(const FunctionInfoIndex &I,
+                                         BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::FUNCTION_SUMMARY_BLOCK_ID, 3);
+
+  // Abbrev for FS_CODE_COMBINED_ENTRY.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CODE_COMBINED_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
+  unsigned FSAbbrev = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<unsigned, 64> NameVals;
+  for (const auto &FII : I) {
+    for (auto &FI : FII.getValue()) {
+      FunctionSummary *FS = FI->functionSummary();
+      assert(FS);
+
+      NameVals.push_back(I.getModuleId(FS->modulePath()));
+      NameVals.push_back(FS->instCount());
+
+      // Record the starting offset of this summary entry for use
+      // in the VST entry. Add the current code size since the
+      // reader will invoke readRecord after the abbrev id read.
+      FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth());
+
+      // Emit the finished record.
+      Stream.EmitRecord(bitc::FS_CODE_COMBINED_ENTRY, NameVals, FSAbbrev);
+      NameVals.clear();
+    }
+  }
+
+  Stream.ExitBlock();
+}
+
+// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
+// current llvm version, and a record for the epoch number.
+static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
+
+  // Write the "user readable" string identifying the bitcode producer
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  auto StringAbbrev = Stream.EmitAbbrev(Abbv);
+  WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING,
+                    "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream);
+
+  // Write the epoch version
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  auto EpochAbbrev = Stream.EmitAbbrev(Abbv);
+  SmallVector<unsigned, 1> Vals = {bitc::BITCODE_CURRENT_EPOCH};
+  Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev);
+  Stream.ExitBlock();
+}
+
 /// WriteModule - Emit the specified module to the bitstream.
 static void WriteModule(const Module *M, BitstreamWriter &Stream,
-                        bool ShouldPreserveUseListOrder) {
+                        bool ShouldPreserveUseListOrder,
+                        uint64_t BitcodeStartBit, bool EmitFunctionSummary) {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   SmallVector<unsigned, 1> Vals;
@@ -2377,7 +2928,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
 
   // Emit top-level description of module, including target triple, inline asm,
   // descriptors for global variables, and function prototype info.
-  WriteModuleInfo(M, VE, Stream);
+  uint64_t VSTOffsetPlaceholder = WriteModuleInfo(M, VE, Stream);
 
   // Emit constants.
   WriteModuleConstants(VE, Stream);
@@ -2388,17 +2939,25 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream,
   // Emit metadata.
   WriteModuleMetadataStore(M, Stream);
 
-  // Emit names for globals/functions etc.
-  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
-
   // Emit module-level use-lists.
   if (VE.shouldPreserveUseListOrder())
     WriteUseListBlock(nullptr, VE, Stream);
 
+  WriteOperandBundleTags(M, Stream);
+
   // Emit function bodies.
+  DenseMap<const Function *, std::unique_ptr<FunctionInfo>> FunctionIndex;
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
     if (!F->isDeclaration())
-      WriteFunction(*F, VE, Stream);
+      WriteFunction(*F, VE, Stream, FunctionIndex, EmitFunctionSummary);
+
+  // Need to write after the above call to WriteFunction which populates
+  // the summary information in the index.
+  if (EmitFunctionSummary)
+    WritePerModuleFunctionSummary(FunctionIndex, M, VE, Stream);
+
+  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream,
+                        VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex);
 
   Stream.ExitBlock();
 }
@@ -2473,10 +3032,22 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
     Buffer.push_back(0);
 }
 
+/// Helper to write the header common to all bitcode files.
+static void WriteBitcodeHeader(BitstreamWriter &Stream) {
+  // Emit the file header.
+  Stream.Emit((unsigned)'B', 8);
+  Stream.Emit((unsigned)'C', 8);
+  Stream.Emit(0x0, 4);
+  Stream.Emit(0xC, 4);
+  Stream.Emit(0xE, 4);
+  Stream.Emit(0xD, 4);
+}
+
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
-                              bool ShouldPreserveUseListOrder) {
+                              bool ShouldPreserveUseListOrder,
+                              bool EmitFunctionSummary) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256*1024);
 
@@ -2489,17 +3060,20 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
   // Emit the module into the buffer.
   {
     BitstreamWriter Stream(Buffer);
+    // Save the start bit of the actual bitcode, in case there is space
+    // saved at the start for the darwin header above. The reader stream
+    // will start at the bitcode, and we need the offset of the VST
+    // to line up.
+    uint64_t BitcodeStartBit = Stream.GetCurrentBitNo();
 
     // Emit the file header.
-    Stream.Emit((unsigned)'B', 8);
-    Stream.Emit((unsigned)'C', 8);
-    Stream.Emit(0x0, 4);
-    Stream.Emit(0xC, 4);
-    Stream.Emit(0xE, 4);
-    Stream.Emit(0xD, 4);
+    WriteBitcodeHeader(Stream);
+
+    WriteIdentificationBlock(M, Stream);
 
     // Emit the module.
-    WriteModule(M, Stream, ShouldPreserveUseListOrder);
+    WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit,
+                EmitFunctionSummary);
   }
 
   if (TT.isOSDarwin())
@@ -2508,3 +3082,38 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
   // Write the generated bitstream to "Out".
   Out.write((char*)&Buffer.front(), Buffer.size());
 }
+
+// Write the specified function summary index to the given raw output stream,
+// where it will be written in a new bitcode block. This is used when
+// writing the combined index file for ThinLTO.
+void llvm::WriteFunctionSummaryToFile(const FunctionInfoIndex &Index,
+                                      raw_ostream &Out) {
+  SmallVector<char, 0> Buffer;
+  Buffer.reserve(256 * 1024);
+
+  BitstreamWriter Stream(Buffer);
+
+  // Emit the bitcode header.
+  WriteBitcodeHeader(Stream);
+
+  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+  SmallVector<unsigned, 1> Vals;
+  unsigned CurVersion = 1;
+  Vals.push_back(CurVersion);
+  Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+
+  // Write the module paths in the combined index.
+  WriteModStrings(Index, Stream);
+
+  // Write the function summary combined index records.
+  WriteCombinedFunctionSummary(Index, Stream);
+
+  // Need a special VST writer for the combined index (we don't have a
+  // real VST and real values when this is invoked).
+  WriteCombinedValueSymbolTable(Index, Stream);
+
+  Stream.ExitBlock();
+
+  Out.write((char *)&Buffer.front(), Buffer.size());
+}
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3165743576ec..24de99a34d33 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -19,7 +19,7 @@
 using namespace llvm;
 
 PreservedAnalyses BitcodeWriterPass::run(Module &M) {
-  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
+  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, EmitFunctionSummary);
   return PreservedAnalyses::all();
 }
 
@@ -27,17 +27,21 @@ namespace {
   class WriteBitcodePass : public ModulePass {
     raw_ostream &OS; // raw_ostream to print on
     bool ShouldPreserveUseListOrder;
+    bool EmitFunctionSummary;
 
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder)
+    explicit WriteBitcodePass(raw_ostream &o, bool ShouldPreserveUseListOrder,
+                              bool EmitFunctionSummary)
         : ModulePass(ID), OS(o),
-          ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
+          ShouldPreserveUseListOrder(ShouldPreserveUseListOrder),
+          EmitFunctionSummary(EmitFunctionSummary) {}
 
     const char *getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
-      WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder);
+      WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder,
+                         EmitFunctionSummary);
       return false;
     }
   };
@@ -46,6 +50,8 @@ namespace {
 char WriteBitcodePass::ID = 0;
 
 ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str,
-                                          bool ShouldPreserveUseListOrder) {
-  return new WriteBitcodePass(Str, ShouldPreserveUseListOrder);
+                                          bool ShouldPreserveUseListOrder,
+                                          bool EmitFunctionSummary) {
+  return new WriteBitcodePass(Str, ShouldPreserveUseListOrder,
+                              EmitFunctionSummary);
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 44dd604f8823..e07563b5a390 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -87,15 +87,9 @@ static OrderMap orderModule(const Module &M) {
     if (!isa<GlobalValue>(A.getAliasee()))
       orderValue(A.getAliasee(), OM);
   for (const Function &F : M) {
-    if (F.hasPrefixData())
-      if (!isa<GlobalValue>(F.getPrefixData()))
-        orderValue(F.getPrefixData(), OM);
-    if (F.hasPrologueData())
-      if (!isa<GlobalValue>(F.getPrologueData()))
-        orderValue(F.getPrologueData(), OM);
-    if (F.hasPersonalityFn())
-      if (!isa<GlobalValue>(F.getPersonalityFn()))
-        orderValue(F.getPersonalityFn(), OM);
+    for (const Use &U : F.operands())
+      if (!isa<GlobalValue>(U.get()))
+        orderValue(U.get(), OM);
   }
   OM.LastGlobalConstantID = OM.size();
 
@@ -273,12 +267,8 @@ static UseListOrderStack predictUseListOrder(const Module &M) {
   for (const GlobalAlias &A : M.aliases())
     predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
   for (const Function &F : M) {
-    if (F.hasPrefixData())
-      predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
-    if (F.hasPrologueData())
-      predictValueUseListOrder(F.getPrologueData(), nullptr, OM, Stack);
-    if (F.hasPersonalityFn())
-      predictValueUseListOrder(F.getPersonalityFn(), nullptr, OM, Stack);
+    for (const Use &U : F.operands())
+      predictValueUseListOrder(U.get(), nullptr, OM, Stack);
   }
 
   return Stack;
@@ -321,20 +311,10 @@ ValueEnumerator::ValueEnumerator(const Module &M,
   for (const GlobalAlias &GA : M.aliases())
     EnumerateValue(GA.getAliasee());
 
-  // Enumerate the prefix data constants.
+  // Enumerate any optional Function data.
   for (const Function &F : M)
-    if (F.hasPrefixData())
-      EnumerateValue(F.getPrefixData());
-
-  // Enumerate the prologue data constants.
-  for (const Function &F : M)
-    if (F.hasPrologueData())
-      EnumerateValue(F.getPrologueData());
-
-  // Enumerate the personality functions.
-  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (I->hasPersonalityFn())
-      EnumerateValue(I->getPersonalityFn());
+    for (const Use &U : F.operands())
+      EnumerateValue(U.get());
 
   // Enumerate the metadata type.
   //
@@ -425,7 +405,7 @@ unsigned ValueEnumerator::getValueID(const Value *V) const {
 void ValueEnumerator::dump() const {
   print(dbgs(), ValueMap, "Default");
   dbgs() << '\n';
-  print(dbgs(), MDValueMap, "MetaData");
+  print(dbgs(), MetadataMap, "MetaData");
   dbgs() << '\n';
 }
 
@@ -512,10 +492,8 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
 /// Insert all of the values referenced by named metadata in the specified
 /// module.
 void ValueEnumerator::EnumerateNamedMetadata(const Module &M) {
-  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
-                                             E = M.named_metadata_end();
-       I != E; ++I)
-    EnumerateNamedMDNode(I);
+  for (const auto &I : M.named_metadata())
+    EnumerateNamedMDNode(&I);
 }
 
 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
@@ -544,7 +522,7 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
   // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
   //
   // Return early if there's already an ID.
-  if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
+  if (!MetadataMap.insert(std::make_pair(MD, 0)).second)
     return;
 
   // Visit operands first to minimize RAUW.
@@ -557,10 +535,10 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
   HasDILocation |= isa<DILocation>(MD);
   HasGenericDINode |= isa<GenericDINode>(MD);
 
-  // Replace the dummy ID inserted above with the correct one.  MDValueMap may
+  // Replace the dummy ID inserted above with the correct one.  MetadataMap may
   // have changed by inserting operands, so we need a fresh lookup here.
   MDs.push_back(MD);
-  MDValueMap[MD] = MDs.size();
+  MetadataMap[MD] = MDs.size();
 }
 
 /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
@@ -568,12 +546,12 @@ void ValueEnumerator::EnumerateMetadata(const Metadata *MD) {
 void ValueEnumerator::EnumerateFunctionLocalMetadata(
     const LocalAsMetadata *Local) {
   // Check to see if it's already in!
-  unsigned &MDValueID = MDValueMap[Local];
-  if (MDValueID)
+  unsigned &MetadataID = MetadataMap[Local];
+  if (MetadataID)
     return;
 
   MDs.push_back(Local);
-  MDValueID = MDs.size();
+  MetadataID = MDs.size();
 
   EnumerateValue(Local->getValue());
 
@@ -729,23 +707,20 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
   NumModuleMDs = MDs.size();
 
   // Adding function arguments to the value table.
-  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
-       I != E; ++I)
-    EnumerateValue(I);
+  for (const auto &I : F.args())
+    EnumerateValue(&I);
 
   FirstFuncConstantID = Values.size();
 
   // Add all function-level constants to the value table.
-  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
-      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
-           OI != E; ++OI) {
-        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
-            isa<InlineAsm>(*OI))
-          EnumerateValue(*OI);
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB)
+      for (const Use &OI : I.operands()) {
+        if ((isa<Constant>(OI) && !isa<GlobalValue>(OI)) || isa<InlineAsm>(OI))
+          EnumerateValue(OI);
       }
-    BasicBlocks.push_back(BB);
-    ValueMap[BB] = BasicBlocks.size();
+    BasicBlocks.push_back(&BB);
+    ValueMap[&BB] = BasicBlocks.size();
   }
 
   // Optimize the constant layout.
@@ -759,18 +734,17 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
 
   SmallVector<LocalAsMetadata *, 8> FnLocalMDVector;
   // Add all of the instructions.
-  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
-      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
-           OI != E; ++OI) {
-        if (auto *MD = dyn_cast<MetadataAsValue>(&*OI))
+  for (const BasicBlock &BB : F) {
+    for (const Instruction &I : BB) {
+      for (const Use &OI : I.operands()) {
+        if (auto *MD = dyn_cast<MetadataAsValue>(&OI))
           if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
             // Enumerate metadata after the instructions they might refer to.
             FnLocalMDVector.push_back(Local);
       }
 
-      if (!I->getType()->isVoidTy())
-        EnumerateValue(I);
+      if (!I.getType()->isVoidTy())
+        EnumerateValue(&I);
     }
   }
 
@@ -784,7 +758,7 @@ void ValueEnumerator::purgeFunction() {
   for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
     ValueMap.erase(Values[i].first);
   for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
-    MDValueMap.erase(MDs[i]);
+    MetadataMap.erase(MDs[i]);
   for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
     ValueMap.erase(BasicBlocks[i]);
 
@@ -797,8 +771,8 @@ void ValueEnumerator::purgeFunction() {
 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
                                  DenseMap<const BasicBlock*, unsigned> &IDMap) {
   unsigned Counter = 0;
-  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-    IDMap[BB] = ++Counter;
+  for (const BasicBlock &BB : *F)
+    IDMap[&BB] = ++Counter;
 }
 
 /// getGlobalBasicBlockID - This returns the function-specific ID for the
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 92d166e3ba92..9fb8325150e9 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -63,7 +63,7 @@ private:
   std::vector<const Metadata *> MDs;
   SmallVector<const LocalAsMetadata *, 8> FunctionLocalMDs;
   typedef DenseMap<const Metadata *, unsigned> MetadataMapType;
-  MetadataMapType MDValueMap;
+  MetadataMapType MetadataMap;
   bool HasMDString;
   bool HasDILocation;
   bool HasGenericDINode;
@@ -93,7 +93,7 @@ private:
   /// before incorporation.
   unsigned NumModuleValues;
 
-  /// When a function is incorporated, this is the size of the MDValues list
+  /// When a function is incorporated, this is the size of the Metadatas list
   /// before incorporation.
   unsigned NumModuleMDs;
 
@@ -117,8 +117,9 @@ public:
     return ID - 1;
   }
   unsigned getMetadataOrNullID(const Metadata *MD) const {
-    return MDValueMap.lookup(MD);
+    return MetadataMap.lookup(MD);
   }
+  unsigned numMDs() const { return MDs.size(); }
 
   bool hasMDString() const { return HasMDString; }
   bool hasDILocation() const { return HasDILocation; }
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5fe4c4bcaec4..4060db74a9b7 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -142,16 +142,15 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   assert(!State);
   State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
 
-  bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
+  bool IsReturnBlock = BB->isReturnBlock();
   std::vector<unsigned> &KillIndices = State->GetKillIndices();
   std::vector<unsigned> &DefIndices = State->GetDefIndices();
 
   // Examine the live-in regs of all successors.
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
-    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-           E = (*SI)->livein_end(); I != E; ++I) {
-      for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+    for (const auto &LI : (*SI)->liveins()) {
+      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
         unsigned Reg = *AI;
         State->UnionGroups(Reg, 0);
         KillIndices[Reg] = BB->size();
@@ -365,9 +364,11 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
 
     // If MI's defs have a special allocation requirement, don't allow
     // any def registers to be changed. Also assume all registers
-    // defined in a call must not be changed (ABI).
+    // defined in a call must not be changed (ABI). Inline assembly may
+    // reference either system calls or the register directly. Skip it until we
+    // can tell user specified registers from compiler-specified.
     if (MI->isCall() || MI->hasExtraDefRegAllocReq() ||
-        TII->isPredicated(MI)) {
+        TII->isPredicated(MI) || MI->isInlineAsm()) {
       DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
       State->UnionGroups(Reg, 0);
     }
@@ -429,6 +430,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   // If MI's uses have special allocation requirement, don't allow
   // any use registers to be changed. Also assume all registers
   // used in a call must not be changed (ABI).
+  // Inline Assembly register uses also cannot be safely changed.
   // FIXME: The issue with predicated instruction is more complex. We are being
   // conservatively here because the kill markers cannot be trusted after
   // if-conversion:
@@ -444,7 +446,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
   // changed.
   bool Special = MI->isCall() ||
     MI->hasExtraSrcRegAllocReq() ||
-    TII->isPredicated(MI);
+    TII->isPredicated(MI) || MI->isInlineAsm();
 
   // Scan the register uses for this instruction and update
   // live-ranges, groups and RegRefs.
@@ -509,15 +511,8 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
   // Check all references that need rewriting for Reg. For each, use
   // the corresponding register class to narrow the set of registers
   // that are appropriate for renaming.
-  std::pair<std::multimap<unsigned,
-                     AggressiveAntiDepState::RegisterReference>::iterator,
-            std::multimap<unsigned,
-                     AggressiveAntiDepState::RegisterReference>::iterator>
-    Range = State->GetRegRefs().equal_range(Reg);
-  for (std::multimap<unsigned,
-       AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
-       QE = Range.second; Q != QE; ++Q) {
-    const TargetRegisterClass *RC = Q->second.RC;
+  for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) {
+    const TargetRegisterClass *RC = Q.second.RC;
     if (!RC) continue;
 
     BitVector RCBV = TRI->getAllocatableSet(MF, RC);
@@ -685,9 +680,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
 
       // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also
       // defines 'NewReg' via an early-clobber operand.
-      auto Range = RegRefs.equal_range(Reg);
-      for (auto Q = Range.first, QE = Range.second; Q != QE; ++Q) {
-        auto UseMI = Q->second.Operand->getParent();
+      for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+        MachineInstr *UseMI = Q.second.Operand->getParent();
         int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI);
         if (Idx == -1)
           continue;
@@ -698,6 +692,20 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
         }
       }
 
+      // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining
+      // 'Reg' is an early-clobber define and that instruction also uses
+      // 'NewReg'.
+      for (const auto &Q : make_range(RegRefs.equal_range(Reg))) {
+        if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber())
+          continue;
+
+        MachineInstr *DefMI = Q.second.Operand->getParent();
+        if (DefMI->readsRegister(NewReg, TRI)) {
+          DEBUG(dbgs() << "(ec)");
+          goto next_super_reg;
+        }
+      }
+
       // Record that 'Reg' can be renamed to 'NewReg'.
       RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
     }
@@ -920,23 +928,16 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
 
             // Update the references to the old register CurrReg to
             // refer to the new register NewReg.
-            std::pair<std::multimap<unsigned,
-                           AggressiveAntiDepState::RegisterReference>::iterator,
-                      std::multimap<unsigned,
-                           AggressiveAntiDepState::RegisterReference>::iterator>
-              Range = RegRefs.equal_range(CurrReg);
-            for (std::multimap<unsigned,
-                 AggressiveAntiDepState::RegisterReference>::iterator
-                   Q = Range.first, QE = Range.second; Q != QE; ++Q) {
-              Q->second.Operand->setReg(NewReg);
+            for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) {
+              Q.second.Operand->setReg(NewReg);
               // If the SU for the instruction being updated has debug
               // information related to the anti-dependency register, make
               // sure to update that as well.
-              const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+              const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()];
               if (!SU) continue;
               for (DbgValueVector::iterator DVI = DbgValues.begin(),
                      DVE = DbgValues.end(); DVI != DVE; ++DVI)
-                if (DVI->second == Q->second.Operand->getParent())
+                if (DVI->second == Q.second.Operand->getParent())
                   UpdateDbgValue(DVI->first, AntiDepReg, NewReg);
             }
 
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index dc9bcff56121..40451c0d6c19 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -29,12 +29,13 @@ using namespace llvm;
 // Compare VirtRegMap::getRegAllocPref().
 AllocationOrder::AllocationOrder(unsigned VirtReg,
                                  const VirtRegMap &VRM,
-                                 const RegisterClassInfo &RegClassInfo)
+                                 const RegisterClassInfo &RegClassInfo,
+                                 const LiveRegMatrix *Matrix)
   : Pos(0) {
   const MachineFunction &MF = VRM.getMachineFunction();
   const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo();
   Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg));
-  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM);
+  TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix);
   rewind();
 
   DEBUG({
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 02b2d9250bc8..2aee3a63a2b1 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -24,6 +24,7 @@ namespace llvm {
 
 class RegisterClassInfo;
 class VirtRegMap;
+class LiveRegMatrix;
 
 class LLVM_LIBRARY_VISIBILITY AllocationOrder {
   SmallVector<MCPhysReg, 16> Hints;
@@ -37,7 +38,8 @@ public:
   /// @param RegClassInfo Information about reserved and allocatable registers.
   AllocationOrder(unsigned VirtReg,
                   const VirtRegMap &VRM,
-                  const RegisterClassInfo &RegClassInfo);
+                  const RegisterClassInfo &RegClassInfo,
+                  const LiveRegMatrix *Matrix);
 
   /// Get the allocation order without reordered hints.
   ArrayRef<MCPhysReg> getOrder() const { return Order; }
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 98d4c8afc7b9..75579a2b4559 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -25,6 +26,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 
@@ -515,7 +517,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
       if (isa<DbgInfoIntrinsic>(BBI))
         continue;
       if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
-          !isSafeToSpeculativelyExecute(BBI))
+          !isSafeToSpeculativelyExecute(&*BBI))
         return false;
     }
 
@@ -643,3 +645,97 @@ bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) {
 
   return !GS.IsCompared;
 }
+
+static void collectFuncletMembers(
+    DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet,
+    const MachineBasicBlock *MBB) {
+  // Add this MBB to our funclet.
+  auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet));
+
+  // Don't revisit blocks.
+  if (!P.second) {
+    assert(P.first->second == Funclet && "MBB is part of two funclets!");
+    return;
+  }
+
+  bool IsReturn = false;
+  int NumTerminators = 0;
+  for (const MachineInstr &MI : MBB->terminators()) {
+    IsReturn |= MI.isReturn();
+    ++NumTerminators;
+  }
+  assert((!IsReturn || NumTerminators == 1) &&
+         "Expected only one terminator when a return is present!");
+
+  // Returns are boundaries where funclet transfer can occur, don't follow
+  // successors.
+  if (IsReturn)
+    return;
+
+  for (const MachineBasicBlock *SMBB : MBB->successors())
+    if (!SMBB->isEHPad())
+      collectFuncletMembers(FuncletMembership, Funclet, SMBB);
+}
+
+DenseMap<const MachineBasicBlock *, int>
+llvm::getFuncletMembership(const MachineFunction &MF) {
+  DenseMap<const MachineBasicBlock *, int> FuncletMembership;
+
+  // We don't have anything to do if there aren't any EH pads.
+  if (!MF.getMMI().hasEHFunclets())
+    return FuncletMembership;
+
+  int EntryBBNumber = MF.front().getNumber();
+  bool IsSEH = isAsynchronousEHPersonality(
+      classifyEHPersonality(MF.getFunction()->getPersonalityFn()));
+
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  SmallVector<const MachineBasicBlock *, 16> FuncletBlocks;
+  SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks;
+  SmallVector<const MachineBasicBlock *, 16> SEHCatchPads;
+  SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors;
+  for (const MachineBasicBlock &MBB : MF) {
+    if (MBB.isEHFuncletEntry()) {
+      FuncletBlocks.push_back(&MBB);
+    } else if (IsSEH && MBB.isEHPad()) {
+      SEHCatchPads.push_back(&MBB);
+    } else if (MBB.pred_empty()) {
+      UnreachableBlocks.push_back(&MBB);
+    }
+
+    MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+    // CatchPads are not funclets for SEH so do not consider CatchRet to
+    // transfer control to another funclet.
+    if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+      continue;
+
+    // FIXME: SEH CatchPads are not necessarily in the parent function:
+    // they could be inside a finally block.
+    const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB();
+    const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB();
+    CatchRetSuccessors.push_back(
+        {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()});
+  }
+
+  // We don't have anything to do if there aren't any EH pads.
+  if (FuncletBlocks.empty())
+    return FuncletMembership;
+
+  // Identify all the basic blocks reachable from the function entry.
+  collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front());
+  // All blocks not part of a funclet are in the parent function.
+  for (const MachineBasicBlock *MBB : UnreachableBlocks)
+    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+  // Next, identify all the blocks inside the funclets.
+  for (const MachineBasicBlock *MBB : FuncletBlocks)
+    collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB);
+  // SEH CatchPads aren't really funclets, handle them separately.
+  for (const MachineBasicBlock *MBB : SEHCatchPads)
+    collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB);
+  // Finally, identify all the targets of a catchret.
+  for (std::pair<const MachineBasicBlock *, int> CatchRetPair :
+       CatchRetSuccessors)
+    collectFuncletMembers(FuncletMembership, CatchRetPair.second,
+                          CatchRetPair.first);
+  return FuncletMembership;
+}
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 0bad7954b980..ade2d7105b88 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -73,7 +73,6 @@ void ARMException::endFunction(const MachineFunction *MF) {
   const Function *Per = nullptr;
   if (F->hasPersonalityFn())
     Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
-  assert(!MMI->getPersonality() || Per == MMI->getPersonality());
   bool forceEmitPersonality =
     F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
     F->needsUnwindTableEntry();
@@ -115,9 +114,7 @@ void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
     Entry = TypeInfos.size();
   }
 
-  for (std::vector<const GlobalValue *>::const_reverse_iterator
-         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
-    const GlobalValue *GV = *I;
+  for (const GlobalValue *GV : reverse(TypeInfos)) {
     if (VerboseAsm)
       Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 125047e7bbb5..be7eafbeb83d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -135,11 +135,14 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
   return *TM.getObjFileLowering();
 }
 
-/// getDataLayout - Return information about data layout.
 const DataLayout &AsmPrinter::getDataLayout() const {
-  return *TM.getDataLayout();
+  return MMI->getModule()->getDataLayout();
 }
 
+// Do not use the cached DataLayout because some client use it without a Module
+// (llmv-dsymutil, llvm-dwarfdump).
+unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
+
 const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
   assert(MF && "getSubtargetInfo requires a valid MachineFunction!");
   return MF->getSubtarget<MCSubtargetInfo>();
@@ -193,10 +196,18 @@ bool AsmPrinter::doInitialization(Module &M) {
     unsigned Major, Minor, Update;
     TT.getOSVersion(Major, Minor, Update);
     // If there is a version specified, Major will be non-zero.
-    if (Major)
-      OutStreamer->EmitVersionMin((TT.isMacOSX() ?
-                                   MCVM_OSXVersionMin : MCVM_IOSVersionMin),
-                                  Major, Minor, Update);
+    if (Major) {
+      MCVersionMinType VersionType;
+      if (TT.isWatchOS())
+        VersionType = MCVM_WatchOSVersionMin;
+      else if (TT.isTvOS())
+        VersionType = MCVM_TvOSVersionMin;
+      else if (TT.isMacOSX())
+        VersionType = MCVM_OSXVersionMin;
+      else
+        VersionType = MCVM_IOSVersionMin;
+      OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+    }
   }
 
   // Allow the target to emit any magic that it wants at the start of the file.
@@ -224,28 +235,20 @@ bool AsmPrinter::doInitialization(Module &M) {
         TM.getTargetFeatureString()));
     OutStreamer->AddComment("Start of file scope inline assembly");
     OutStreamer->AddBlankLine();
-    EmitInlineAsm(M.getModuleInlineAsm()+"\n", *STI, TM.Options.MCOptions);
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n",
+                  OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions);
     OutStreamer->AddComment("End of file scope inline assembly");
     OutStreamer->AddBlankLine();
   }
 
   if (MAI->doesSupportDebugInformation()) {
-    bool skip_dwarf = false;
-    if (TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+    bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
+    if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
       Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this),
                                      DbgTimerName,
                                      CodeViewLineTablesGroupName));
-      // FIXME: Don't emit DWARF debug info if there's at least one function
-      // with AddressSanitizer instrumentation.
-      // This is a band-aid fix for PR22032.
-      for (auto &F : M.functions()) {
-        if (F.hasFnAttribute(Attribute::SanitizeAddress)) {
-          skip_dwarf = true;
-          break;
-        }
-      }
     }
-    if (!skip_dwarf) {
+    if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
       DD = new DwarfDebug(this, &M);
       Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
     }
@@ -340,8 +343,51 @@ MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
   return TM.getSymbol(GV, *Mang);
 }
 
+static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) {
+  return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName());
+}
+
+static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) {
+  return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName());
+}
+
+/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable.
+void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV,
+                                                MCSymbol *EmittedSym,
+                                                bool AllZeroInitValue) {
+  MCSection *TLSVarSection = getObjFileLowering().getDataSection();
+  OutStreamer->SwitchSection(TLSVarSection);
+  MCSymbol *GVSym = getSymbol(GV);
+  EmitLinkage(GV, EmittedSym);  // same linkage as GV
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+  unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+  unsigned WordSize = DL.getPointerSize();
+  unsigned Alignment = DL.getPointerABIAlignment();
+  EmitAlignment(Log2_32(Alignment));
+  OutStreamer->EmitLabel(EmittedSym);
+  OutStreamer->EmitIntValue(Size, WordSize);
+  OutStreamer->EmitIntValue((1 << AlignLog), WordSize);
+  OutStreamer->EmitIntValue(0, WordSize);
+  if (GV->hasInitializer() && !AllZeroInitValue) {
+    OutStreamer->EmitSymbolValue(
+        getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize);
+  } else
+    OutStreamer->EmitIntValue(0, WordSize);
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym),
+                             MCConstantExpr::create(4 * WordSize, OutContext));
+  OutStreamer->AddBlankLine();  // End of the __emutls_v.* variable.
+}
+
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
 void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  bool IsEmuTLSVar =
+      GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal &&
+      TM.Options.EmulatedTLS;
+  assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) &&
+         "No emulated TLS variables in the common section");
+
   if (GV->hasInitializer()) {
     // Check to see if this is a special global used by LLVM, if so, emit it.
     if (EmitSpecialLLVMGlobal(GV))
@@ -352,7 +398,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (GlobalGOTEquivs.count(getSymbol(GV)))
       return;
 
-    if (isVerbose()) {
+    if (isVerbose() && !IsEmuTLSVar) {
+      // When printing the control variable __emutls_v.*,
+      // we don't need to print the original TLS variable name.
       GV->printAsOperand(OutStreamer->GetCommentOS(),
                      /*PrintType=*/false, GV->getParent());
       OutStreamer->GetCommentOS() << '\n';
@@ -360,7 +408,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   }
 
   MCSymbol *GVSym = getSymbol(GV);
-  EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());
+  MCSymbol *EmittedSym = IsEmuTLSVar ?
+      getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym;
+  // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes.
+  // GV's or GVSym's attributes will be used for the EmittedSym.
+
+  EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration());
 
   if (!GV->hasInitializer())   // External globals require no extra code.
     return;
@@ -371,17 +424,29 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
                        "' is already defined");
 
   if (MAI->hasDotTypeDotSizeDirective())
-    OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject);
 
   SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
 
-  const DataLayout *DL = TM.getDataLayout();
-  uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType());
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
 
   // If the alignment is specified, we *must* obey it.  Overaligning a global
   // with a specified alignment is a prompt way to break globals emitted to
   // sections and expected to be contiguous (e.g. ObjC metadata).
-  unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
+  unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+
+  bool AllZeroInitValue = false;
+  const Constant *InitValue = GV->getInitializer();
+  if (isa<ConstantAggregateZero>(InitValue))
+    AllZeroInitValue = true;
+  else {
+    const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue);
+    if (InitIntValue && InitIntValue->isZero())
+      AllZeroInitValue = true;
+  }
+  if (IsEmuTLSVar)
+    EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue);
 
   for (const HandlerInfo &HI : Handlers) {
     NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
@@ -390,6 +455,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   // Handle common and BSS local symbols (.lcomm).
   if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+    assert(!(IsEmuTLSVar && GVKind.isCommon()) &&
+           "No emulated TLS variables in the common section");
     if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
     unsigned Align = 1 << AlignLog;
 
@@ -434,12 +501,21 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     return;
   }
 
-  MCSection *TheSection =
+  if (IsEmuTLSVar && AllZeroInitValue)
+    return;  // No need of initialization values.
+
+  MCSymbol *EmittedInitSym = IsEmuTLSVar ?
+      getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym;
+  // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes.
+  // GV's or GVSym's attributes will be used for the EmittedInitSym.
+
+  MCSection *TheSection = IsEmuTLSVar ?
+      getObjFileLowering().getReadOnlySection() :
       getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
 
   // Handle the zerofill directive on darwin, which is a special form of BSS
   // emission.
-  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) {
     if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
 
     // .globl _foo
@@ -459,7 +535,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   // TLOF class.  This will also make it more obvious that stuff like
   // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
   // specific code.
-  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) {
     // Emit the .tbss symbol
     MCSymbol *MangSym =
       OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
@@ -473,7 +549,8 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       EmitAlignment(AlignLog, GV);
       OutStreamer->EmitLabel(MangSym);
 
-      EmitGlobalConstant(GV->getInitializer());
+      EmitGlobalConstant(GV->getParent()->getDataLayout(),
+                         GV->getInitializer());
     }
 
     OutStreamer->AddBlankLine();
@@ -490,7 +567,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     //   - __tlv_bootstrap - used to make sure support exists
     //   - spare pointer, used when mapped by the runtime
     //   - pointer to mangled symbol above with initializer
-    unsigned PtrSize = DL->getPointerTypeSize(GV->getType());
+    unsigned PtrSize = DL.getPointerTypeSize(GV->getType());
     OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
                                 PtrSize);
     OutStreamer->EmitIntValue(0, PtrSize);
@@ -502,16 +579,18 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   OutStreamer->SwitchSection(TheSection);
 
-  EmitLinkage(GV, GVSym);
+  // emutls_t.* symbols are only used in the current compilation unit.
+  if (!IsEmuTLSVar)
+    EmitLinkage(GV, EmittedInitSym);
   EmitAlignment(AlignLog, GV);
 
-  OutStreamer->EmitLabel(GVSym);
+  OutStreamer->EmitLabel(EmittedInitSym);
 
-  EmitGlobalConstant(GV->getInitializer());
+  EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer());
 
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
+    OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
                              MCConstantExpr::create(Size, OutContext));
 
   OutStreamer->AddBlankLine();
@@ -545,7 +624,7 @@ void AsmPrinter::EmitFunctionHeader() {
 
   // Emit the prefix data.
   if (F->hasPrefixData())
-    EmitGlobalConstant(F->getPrefixData());
+    EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData());
 
   // Emit the CurrentFnSym.  This is a virtual function to allow targets to
   // do their wild and crazy things as required.
@@ -580,7 +659,7 @@ void AsmPrinter::EmitFunctionHeader() {
 
   // Emit the prologue data.
   if (F->hasPrologueData())
-    EmitGlobalConstant(F->getPrologueData());
+    EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData());
 }
 
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -640,19 +719,27 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
 /// that is an implicit def.
 void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
   unsigned RegNo = MI->getOperand(0).getReg();
-  OutStreamer->AddComment(Twine("implicit-def: ") +
-                          MMI->getContext().getRegisterInfo()->getName(RegNo));
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << "implicit-def: "
+     << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo());
+
+  OutStreamer->AddComment(OS.str());
   OutStreamer->AddBlankLine();
 }
 
 static void emitKill(const MachineInstr *MI, AsmPrinter &AP) {
-  std::string Str = "kill:";
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS << "kill:";
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &Op = MI->getOperand(i);
     assert(Op.isReg() && "KILL instruction must have only register operands");
-    Str += ' ';
-    Str += AP.MMI->getContext().getRegisterInfo()->getName(Op.getReg());
-    Str += (Op.isDef() ? "<def>" : "<kill>");
+    OS << ' '
+       << PrintReg(Op.getReg(),
+                   AP.MF->getSubtarget().getRegisterInfo())
+       << (Op.isDef() ? "<def>" : "<kill>");
   }
   AP.OutStreamer->AddComment(Str);
   AP.OutStreamer->AddBlankLine();
@@ -688,6 +775,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm();
   int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
 
+  for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
+    if (Deref) {
+      // We currently don't support extra Offsets or derefs after the first
+      // one. Bail out early instead of emitting an incorrect comment
+      OS << " [complex expression]";
+      AP.OutStreamer->emitRawComment(OS.str());
+      return true;
+    }
+    uint64_t Op = Expr->getElement(i);
+    if (Op == dwarf::DW_OP_deref) {
+      Deref = true;
+      continue;
+    } else if (Op == dwarf::DW_OP_bit_piece) {
+      // There can't be any operands after this in a valid expression
+      break;
+    }
+    uint64_t ExtraOffset = Expr->getElement(i++);
+    if (Op == dwarf::DW_OP_plus)
+      Offset += ExtraOffset;
+    else {
+      assert(Op == dwarf::DW_OP_minus);
+      Offset -= ExtraOffset;
+    }
+  }
+
   // Register or immediate value. Register 0 means undef.
   if (MI->getOperand(0).isFPImm()) {
     APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
@@ -727,7 +839,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     }
     if (Deref)
       OS << '[';
-    OS << AP.MMI->getContext().getRegisterInfo()->getName(Reg);
+    OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo());
   }
 
   if (Deref)
@@ -888,7 +1000,7 @@ void AsmPrinter::EmitFunctionBody() {
   EmitFunctionBodyEnd();
 
   if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      MAI->hasDotTypeDotSizeDirective()) {
+      MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
     // Create a symbol for the end of function.
     CurrentFnEnd = createTempSymbol("func_end");
     OutStreamer->EmitLabel(CurrentFnEnd);
@@ -1047,20 +1159,17 @@ bool AsmPrinter::doFinalization(Module &M) {
     // Output stubs for external and common global variables.
     MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
     if (!Stubs.empty()) {
-      OutStreamer->SwitchSection(TLOF.getDataRelSection());
-      const DataLayout *DL = TM.getDataLayout();
+      OutStreamer->SwitchSection(TLOF.getDataSection());
+      const DataLayout &DL = M.getDataLayout();
 
       for (const auto &Stub : Stubs) {
         OutStreamer->EmitLabel(Stub.first);
         OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
-                                     DL->getPointerSize());
+                                     DL.getPointerSize());
       }
     }
   }
 
-  // Make sure we wrote out everything we need.
-  OutStreamer->Flush();
-
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
     NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
@@ -1103,10 +1212,29 @@ bool AsmPrinter::doFinalization(Module &M) {
     else
       assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
 
+    // Set the symbol type to function if the alias has a function type.
+    // This affects codegen when the aliasee is not a function.
+    if (Alias.getType()->getPointerElementType()->isFunctionTy())
+      OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
+
     EmitVisibility(Name, Alias.getVisibility());
 
     // Emit the directives as assignments aka .set:
     OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee()));
+
+    // If the aliasee does not correspond to a symbol in the output, i.e. the
+    // alias is not of an object or the aliased object is private, then set the
+    // size of the alias symbol from the type of the alias. We don't do this in
+    // other situations as the alias and aliasee having differing types but same
+    // size may be intentional.
+    const GlobalObject *BaseObject = Alias.getBaseObject();
+    if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() &&
+        (!BaseObject || BaseObject->hasPrivateLinkage())) {
+      const DataLayout &DL = M.getDataLayout();
+      uint64_t Size = DL.getTypeAllocSize(Alias.getValueType());
+      OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
+                               MCConstantExpr::create(Size, OutContext));
+    }
   }
 
   GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
@@ -1120,16 +1248,16 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Emit __morestack address if needed for indirect calls.
   if (MMI->usesMorestackAddr()) {
-    MCSection *ReadOnlySection =
-        getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly(),
-                                                   /*C=*/nullptr);
+    MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(
+        getDataLayout(), SectionKind::getReadOnly(),
+        /*C=*/nullptr);
     OutStreamer->SwitchSection(ReadOnlySection);
 
     MCSymbol *AddrSymbol =
         OutContext.getOrCreateSymbol(StringRef("__morestack_addr"));
     OutStreamer->EmitLabel(AddrSymbol);
 
-    unsigned PtrSize = TM.getDataLayout()->getPointerSize(0);
+    unsigned PtrSize = M.getDataLayout().getPointerSize(0);
     OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"),
                                  PtrSize);
   }
@@ -1169,7 +1297,7 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   CurExceptionSym = nullptr;
   bool NeedsLocalForSize = MAI->needsLocalForSize();
   if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      NeedsLocalForSize) {
+      MMI->hasEHFunclets() || NeedsLocalForSize) {
     CurrentFnBegin = createTempSymbol("func_begin");
     if (NeedsLocalForSize)
       CurrentFnSymForSize = CurrentFnBegin;
@@ -1206,14 +1334,14 @@ void AsmPrinter::EmitConstantPool() {
     const MachineConstantPoolEntry &CPE = CP[i];
     unsigned Align = CPE.getAlignment();
 
-    SectionKind Kind =
-        CPE.getSectionKind(TM.getDataLayout());
+    SectionKind Kind = CPE.getSectionKind(&getDataLayout());
 
     const Constant *C = nullptr;
     if (!CPE.isMachineConstantPoolEntry())
       C = CPE.Val.ConstVal;
 
-    MCSection *S = getObjFileLowering().getSectionForConstant(Kind, C);
+    MCSection *S =
+        getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C);
 
     // The number of sections are small, just do a linear search from the
     // last section to the first.
@@ -1260,14 +1388,13 @@ void AsmPrinter::EmitConstantPool() {
       OutStreamer->EmitZeros(NewOffset - Offset);
 
       Type *Ty = CPE.getType();
-      Offset = NewOffset +
-               TM.getDataLayout()->getTypeAllocSize(Ty);
+      Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty);
 
       OutStreamer->EmitLabel(Sym);
       if (CPE.isMachineConstantPoolEntry())
         EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
       else
-        EmitGlobalConstant(CPE.Val.ConstVal);
+        EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal);
     }
   }
 }
@@ -1276,7 +1403,7 @@ void AsmPrinter::EmitConstantPool() {
 /// by the current function to the current output stream.
 ///
 void AsmPrinter::EmitJumpTableInfo() {
-  const DataLayout *DL = MF->getTarget().getDataLayout();
+  const DataLayout &DL = MF->getDataLayout();
   const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   if (!MJTI) return;
   if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
@@ -1296,8 +1423,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     OutStreamer->SwitchSection(ReadOnlySection);
   }
 
-  EmitAlignment(Log2_32(
-      MJTI->getEntryAlignment(*TM.getDataLayout())));
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
 
   // Jump tables in code sections are marked with a data_region directive
   // where that's supported.
@@ -1335,7 +1461,7 @@ void AsmPrinter::EmitJumpTableInfo() {
     // before each jump table.  The first label is never referenced, but tells
     // the assembler and linker the extents of the jump table object.  The
     // second label is actually referenced by the code.
-    if (JTInDiffSection && DL->hasLinkerPrivateGlobalPrefix())
+    if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix())
       // FIXME: This doesn't have to have any specific name, just any randomly
       // named and numbered 'l' label would work.  Simplify GetJTISymbol.
       OutStreamer->EmitLabel(GetJTISymbol(JTI, true));
@@ -1409,8 +1535,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
 
   assert(Value && "Unknown entry kind!");
 
-  unsigned EntrySize =
-      MJTI->getEntrySize(*TM.getDataLayout());
+  unsigned EntrySize = MJTI->getEntrySize(getDataLayout());
   OutStreamer->EmitValue(Value, EntrySize);
 }
 
@@ -1435,7 +1560,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   assert(GV->hasInitializer() && "Not a special LLVM global!");
 
   if (GV->getName() == "llvm.global_ctors") {
-    EmitXXStructorList(GV->getInitializer(), /* isCtor */ true);
+    EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+                       /* isCtor */ true);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1447,7 +1573,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
   }
 
   if (GV->getName() == "llvm.global_dtors") {
-    EmitXXStructorList(GV->getInitializer(), /* isCtor */ false);
+    EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
+                       /* isCtor */ false);
 
     if (TM.getRelocationModel() == Reloc::Static &&
         MAI->hasStaticCtorDtorReferenceInStaticMode()) {
@@ -1485,7 +1612,8 @@ struct Structor {
 
 /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
 /// priority.
-void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
+void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
+                                    bool isCtor) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority.
   if (!isa<ConstantArray>(List)) return;
@@ -1520,8 +1648,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
   }
 
   // Emit the function pointers in the target-specific order
-  const DataLayout *DL = TM.getDataLayout();
-  unsigned Align = Log2_32(DL->getPointerPrefAlignment());
+  unsigned Align = Log2_32(DL.getPointerPrefAlignment());
   std::stable_sort(Structors.begin(), Structors.end(),
                    [](const Structor &L,
                       const Structor &R) { return L.Priority < R.Priority; });
@@ -1542,7 +1669,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
     OutStreamer->SwitchSection(OutputSection);
     if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection())
       EmitAlignment(Align);
-    EmitXXStructor(S.Func);
+    EmitXXStructor(DL, S.Func);
   }
 }
 
@@ -1621,8 +1748,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
 //
 void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
   if (GV)
-    NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(),
-                                 NumBits);
+    NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
 
   if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
 
@@ -1668,7 +1794,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
       if (C != CE)
         return lowerConstant(C);
 
@@ -1682,11 +1808,9 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       report_fatal_error(OS.str());
     }
   case Instruction::GetElementPtr: {
-    const DataLayout &DL = *TM.getDataLayout();
-
     // Generate a symbolic expression for the byte address
-    APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
-    cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
+    APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0);
+    cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI);
 
     const MCExpr *Base = lowerConstant(CE->getOperand(0));
     if (!OffsetAI)
@@ -1707,7 +1831,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     return lowerConstant(CE->getOperand(0));
 
   case Instruction::IntToPtr: {
-    const DataLayout &DL = *TM.getDataLayout();
+    const DataLayout &DL = getDataLayout();
 
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
@@ -1718,7 +1842,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 
   case Instruction::PtrToInt: {
-    const DataLayout &DL = *TM.getDataLayout();
+    const DataLayout &DL = getDataLayout();
 
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
@@ -1769,10 +1893,13 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
   }
 }
 
-static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP,
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C,
+                                   AsmPrinter &AP,
                                    const Constant *BaseCV = nullptr,
                                    uint64_t Offset = 0);
 
+static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP);
+
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
 /// byte value.  If it is not a repeated sequence, return -1.
@@ -1789,9 +1916,9 @@ static int isRepeatedByteSequence(const ConstantDataSequential *V) {
 /// isRepeatedByteSequence - Determine whether the given value is
 /// composed of a repeated sequence of identical bytes and return the
 /// byte value.  If it is not a repeated sequence, return -1.
-static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
+static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) {
   if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    uint64_t Size = TM.getDataLayout()->getTypeAllocSizeInBits(V->getType());
+    uint64_t Size = DL.getTypeAllocSizeInBits(V->getType());
     assert(Size % 8 == 0);
 
     // Extend the element to take zero padding into account.
@@ -1806,7 +1933,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
     // byte.
     assert(CA->getNumOperands() != 0 && "Should be a CAZ");
     Constant *Op0 = CA->getOperand(0);
-    int Byte = isRepeatedByteSequence(Op0, TM);
+    int Byte = isRepeatedByteSequence(Op0, DL);
     if (Byte == -1)
       return -1;
 
@@ -1823,15 +1950,14 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) {
   return -1;
 }
 
-static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
-                                             AsmPrinter &AP){
+static void emitGlobalConstantDataSequential(const DataLayout &DL,
+                                             const ConstantDataSequential *CDS,
+                                             AsmPrinter &AP) {
 
   // See if we can aggregate this into a .fill, if so, emit it as such.
-  int Value = isRepeatedByteSequence(CDS, AP.TM);
+  int Value = isRepeatedByteSequence(CDS, DL);
   if (Value != -1) {
-    uint64_t Bytes =
-        AP.TM.getDataLayout()->getTypeAllocSize(
-            CDS->getType());
+    uint64_t Bytes = DL.getTypeAllocSize(CDS->getType());
     // Don't emit a 1-byte object as a .fill.
     if (Bytes > 1)
       return AP.OutStreamer->EmitFill(Bytes, Value);
@@ -1851,37 +1977,11 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
       AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i),
                                    ElementByteSize);
     }
-  } else if (ElementByteSize == 4) {
-    // FP Constants are printed as integer constants to avoid losing
-    // precision.
-    assert(CDS->getElementType()->isFloatTy());
-    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
-      union {
-        float F;
-        uint32_t I;
-      };
-
-      F = CDS->getElementAsFloat(i);
-      if (AP.isVerbose())
-        AP.OutStreamer->GetCommentOS() << "float " << F << '\n';
-      AP.OutStreamer->EmitIntValue(I, 4);
-    }
   } else {
-    assert(CDS->getElementType()->isDoubleTy());
-    for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
-      union {
-        double F;
-        uint64_t I;
-      };
-
-      F = CDS->getElementAsDouble(i);
-      if (AP.isVerbose())
-        AP.OutStreamer->GetCommentOS() << "double " << F << '\n';
-      AP.OutStreamer->EmitIntValue(I, 8);
-    }
+    for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+      emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP);
   }
 
-  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CDS->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) *
                         CDS->getNumElements();
@@ -1890,12 +1990,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS,
 
 }
 
-static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
+static void emitGlobalConstantArray(const DataLayout &DL,
+                                    const ConstantArray *CA, AsmPrinter &AP,
                                     const Constant *BaseCV, uint64_t Offset) {
   // See if we can aggregate some values.  Make sure it can be
   // represented as a series of bytes of the constant value.
-  int Value = isRepeatedByteSequence(CA, AP.TM);
-  const DataLayout &DL = *AP.TM.getDataLayout();
+  int Value = isRepeatedByteSequence(CA, DL);
 
   if (Value != -1) {
     uint64_t Bytes = DL.getTypeAllocSize(CA->getType());
@@ -1903,17 +2003,17 @@ static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP,
   }
   else {
     for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) {
-      emitGlobalConstantImpl(CA->getOperand(i), AP, BaseCV, Offset);
+      emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset);
       Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType());
     }
   }
 }
 
-static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
+static void emitGlobalConstantVector(const DataLayout &DL,
+                                     const ConstantVector *CV, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
-    emitGlobalConstantImpl(CV->getOperand(i), AP);
+    emitGlobalConstantImpl(DL, CV->getOperand(i), AP);
 
-  const DataLayout &DL = *AP.TM.getDataLayout();
   unsigned Size = DL.getTypeAllocSize(CV->getType());
   unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) *
                          CV->getType()->getNumElements();
@@ -1921,21 +2021,21 @@ static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) {
     AP.OutStreamer->EmitZeros(Padding);
 }
 
-static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP,
+static void emitGlobalConstantStruct(const DataLayout &DL,
+                                     const ConstantStruct *CS, AsmPrinter &AP,
                                      const Constant *BaseCV, uint64_t Offset) {
   // Print the fields in successive locations. Pad to align if needed!
-  const DataLayout *DL = AP.TM.getDataLayout();
-  unsigned Size = DL->getTypeAllocSize(CS->getType());
-  const StructLayout *Layout = DL->getStructLayout(CS->getType());
+  unsigned Size = DL.getTypeAllocSize(CS->getType());
+  const StructLayout *Layout = DL.getStructLayout(CS->getType());
   uint64_t SizeSoFar = 0;
   for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
     const Constant *Field = CS->getOperand(i);
 
     // Print the actual field value.
-    emitGlobalConstantImpl(Field, AP, BaseCV, Offset+SizeSoFar);
+    emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar);
 
     // Check if padding is needed and insert one or more 0s.
-    uint64_t FieldSize = DL->getTypeAllocSize(Field->getType());
+    uint64_t FieldSize = DL.getTypeAllocSize(Field->getType());
     uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
                         - Layout->getElementOffset(i)) - FieldSize;
     SizeSoFar += FieldSize + PadSize;
@@ -1974,8 +2074,7 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
 
   // PPC's long double has odd notions of endianness compared to how LLVM
   // handles it: p[0] goes first for *big* endian on PPC.
-  if (AP.TM.getDataLayout()->isBigEndian() &&
-      !CFP->getType()->isPPC_FP128Ty()) {
+  if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) {
     int Chunk = API.getNumWords() - 1;
 
     if (TrailingBytes)
@@ -1993,13 +2092,13 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) {
   }
 
   // Emit the tail padding for the long double.
-  const DataLayout &DL = *AP.TM.getDataLayout();
+  const DataLayout &DL = AP.getDataLayout();
   AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) -
                             DL.getTypeStoreSize(CFP->getType()));
 }
 
 static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
-  const DataLayout *DL = AP.TM.getDataLayout();
+  const DataLayout &DL = AP.getDataLayout();
   unsigned BitWidth = CI->getBitWidth();
 
   // Copy the value as we may massage the layout for constants whose bit width
@@ -2016,7 +2115,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
     // Big endian:
     // * Record the extra bits to emit.
     // * Realign the raw data to emit the chunks of 64-bits.
-    if (DL->isBigEndian()) {
+    if (DL.isBigEndian()) {
       // Basically the structure of the raw data is a chunk of 64-bits cells:
       //    0        1         BitWidth / 64
       // [chunk1][chunk2] ... [chunkN].
@@ -2037,7 +2136,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
   // quantities at a time.
   const uint64_t *RawData = Realigned.getRawData();
   for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
-    uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+    uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i];
     AP.OutStreamer->EmitIntValue(Val, 8);
   }
 
@@ -2045,8 +2144,7 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
     // Emit the extra bits after the 64-bits chunks.
 
     // Emit a directive that fills the expected size.
-    uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(
-        CI->getType());
+    uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType());
     Size -= (BitWidth / 64) * 8;
     assert(Size && Size * 8 >= ExtraBitsSize &&
            (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize)))
@@ -2094,7 +2192,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
   if (!AP.GlobalGOTEquivs.count(GOTEquivSym))
     return;
 
-  const GlobalValue *BaseGV = dyn_cast<GlobalValue>(BaseCst);
+  const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst);
   if (!BaseGV)
     return;
 
@@ -2149,10 +2247,10 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
     AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses);
 }
 
-static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
-                                   const Constant *BaseCV, uint64_t Offset) {
-  const DataLayout *DL = AP.TM.getDataLayout();
-  uint64_t Size = DL->getTypeAllocSize(CV->getType());
+static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
+                                   AsmPrinter &AP, const Constant *BaseCV,
+                                   uint64_t Offset) {
+  uint64_t Size = DL.getTypeAllocSize(CV->getType());
 
   // Globals with sub-elements such as combinations of arrays and structs
   // are handled recursively by emitGlobalConstantImpl. Keep track of the
@@ -2189,32 +2287,32 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
   }
 
   if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV))
-    return emitGlobalConstantDataSequential(CDS, AP);
+    return emitGlobalConstantDataSequential(DL, CDS, AP);
 
   if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
-    return emitGlobalConstantArray(CVA, AP, BaseCV, Offset);
+    return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset);
 
   if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
-    return emitGlobalConstantStruct(CVS, AP, BaseCV, Offset);
+    return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset);
 
   if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
     // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of
     // vectors).
     if (CE->getOpcode() == Instruction::BitCast)
-      return emitGlobalConstantImpl(CE->getOperand(0), AP);
+      return emitGlobalConstantImpl(DL, CE->getOperand(0), AP);
 
     if (Size > 8) {
       // If the constant expression's size is greater than 64-bits, then we have
       // to emit the value in chunks. Try to constant fold the value and emit it
       // that way.
-      Constant *New = ConstantFoldConstantExpression(CE, *DL);
+      Constant *New = ConstantFoldConstantExpression(CE, DL);
       if (New && New != CE)
-        return emitGlobalConstantImpl(New, AP);
+        return emitGlobalConstantImpl(DL, New, AP);
     }
   }
 
   if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
-    return emitGlobalConstantVector(V, AP);
+    return emitGlobalConstantVector(DL, V, AP);
 
   // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
   // thread the streamer with EmitValue.
@@ -2230,11 +2328,10 @@ static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP,
 }
 
 /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
-void AsmPrinter::EmitGlobalConstant(const Constant *CV) {
-  uint64_t Size =
-      TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) {
+  uint64_t Size = DL.getTypeAllocSize(CV->getType());
   if (Size)
-    emitGlobalConstantImpl(CV, *this);
+    emitGlobalConstantImpl(DL, CV, *this);
   else if (MAI->hasSubsectionsViaSymbols()) {
     // If the global has zero size, emit a single byte so that two labels don't
     // look like they are at the same location.
@@ -2272,10 +2369,10 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
 
 /// GetCPISymbol - Return the symbol for the specified constant pool entry.
 MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
-  const DataLayout *DL = TM.getDataLayout();
-  return OutContext.getOrCreateSymbol
-    (Twine(DL->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
-     + "_" + Twine(CPID));
+  const DataLayout &DL = getDataLayout();
+  return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                                      "CPI" + Twine(getFunctionNumber()) + "_" +
+                                      Twine(CPID));
 }
 
 /// GetJTISymbol - Return the symbol for the specified jump table entry.
@@ -2286,10 +2383,10 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
 /// GetJTSetSymbol - Return the symbol for the specified jump table .set
 /// FIXME: privatize to AsmPrinter.
 MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
-  const DataLayout *DL = TM.getDataLayout();
-  return OutContext.getOrCreateSymbol
-  (Twine(DL->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
-   Twine(UID) + "_set_" + Twine(MBBID));
+  const DataLayout &DL = getDataLayout();
+  return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+                                      Twine(getFunctionNumber()) + "_" +
+                                      Twine(UID) + "_set_" + Twine(MBBID));
 }
 
 MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
@@ -2301,7 +2398,7 @@ MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
 /// Return the MCSymbol for the specified ExternalSymbol.
 MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
   SmallString<60> NameStr;
-  Mangler::getNameWithPrefix(NameStr, Sym, *TM.getDataLayout());
+  Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout());
   return OutContext.getOrCreateSymbol(NameStr);
 }
 
@@ -2376,6 +2473,14 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
 /// MachineBasicBlock, an alignment (if present) and a comment describing
 /// it if appropriate.
 void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+  // End the previous funclet and start a new one.
+  if (MBB.isEHFuncletEntry()) {
+    for (const HandlerInfo &HI : Handlers) {
+      HI.Handler->endFunclet();
+      HI.Handler->beginFunclet(MBB);
+    }
+  }
+
   // Emit an alignment directive for this block, if needed.
   if (unsigned Align = MBB.getAlignment())
     EmitAlignment(Align);
@@ -2389,20 +2494,28 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
     if (isVerbose())
       OutStreamer->AddComment("Block address taken");
 
-    for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
-      OutStreamer->EmitLabel(Sym);
+    // MBBs can have their address taken as part of CodeGen without having
+    // their corresponding BB's address taken in IR
+    if (BB->hasAddressTaken())
+      for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB))
+        OutStreamer->EmitLabel(Sym);
   }
 
   // Print some verbose block comments.
   if (isVerbose()) {
-    if (const BasicBlock *BB = MBB.getBasicBlock())
-      if (BB->hasName())
-        OutStreamer->AddComment("%" + BB->getName());
+    if (const BasicBlock *BB = MBB.getBasicBlock()) {
+      if (BB->hasName()) {
+        BB->printAsOperand(OutStreamer->GetCommentOS(),
+                           /*PrintType=*/false, BB->getModule());
+        OutStreamer->GetCommentOS() << '\n';
+      }
+    }
     emitBasicBlockLoopComments(MBB, LI, *this);
   }
 
   // Print the main label for the block.
-  if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
+  if (MBB.pred_empty() ||
+      (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
@@ -2440,7 +2553,7 @@ bool AsmPrinter::
 isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   // If this is a landing pad, it isn't a fall through.  If it has no preds,
   // then nothing falls through to it.
-  if (MBB->isLandingPad() || MBB->pred_empty())
+  if (MBB->isEHPad() || MBB->pred_empty())
     return false;
 
   // If there isn't exactly one predecessor, it can't be a fall through.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ad180b6667c0..504c5d283cba 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -47,7 +47,7 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
   OutStreamer->EmitSLEB128IntValue(Value);
 }
 
-/// EmitULEB128 - emit the specified signed leb128 value.
+/// EmitULEB128 - emit the specified unsigned leb128 value.
 void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
                              unsigned PadTo) const {
   if (isVerbose() && Desc)
@@ -56,18 +56,6 @@ void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc,
   OutStreamer->EmitULEB128IntValue(Value, PadTo);
 }
 
-/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
-void AsmPrinter::EmitCFAByte(unsigned Val) const {
-  if (isVerbose()) {
-    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset + 64)
-      OutStreamer->AddComment("DW_CFA_offset + Reg (" +
-                              Twine(Val - dwarf::DW_CFA_offset) + ")");
-    else
-      OutStreamer->AddComment(dwarf::CallFrameString(Val));
-  }
-  OutStreamer->EmitIntValue(Val, 1);
-}
-
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
   switch (Encoding) {
   case dwarf::DW_EH_PE_absptr:
@@ -134,7 +122,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
   default:
     llvm_unreachable("Invalid encoded value.");
   case dwarf::DW_EH_PE_absptr:
-    return TM.getDataLayout()->getPointerSize();
+    return MF->getDataLayout().getPointerSize();
   case dwarf::DW_EH_PE_udata2:
     return 2;
   case dwarf::DW_EH_PE_udata4:
@@ -228,6 +216,9 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpDefCfaOffset:
     OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset());
     break;
+  case MCCFIInstruction::OpAdjustCfaOffset:
+    OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset());
+    break;
   case MCCFIInstruction::OpDefCfa:
     OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
     break;
@@ -246,6 +237,12 @@ void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
   case MCCFIInstruction::OpSameValue:
     OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
+  case MCCFIInstruction::OpGnuArgsSize:
+    OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+    break;
+  case MCCFIInstruction::OpEscape:
+    OutStreamer->EmitCFIEscape(Inst.getValues());
+    break;
   }
 }
 
@@ -284,17 +281,10 @@ void AsmPrinter::emitDwarfDIE(const DIE &Die) const {
   }
 }
 
-void
-AsmPrinter::emitDwarfAbbrevs(const std::vector<DIEAbbrev *>& Abbrevs) const {
-  // For each abbrevation.
-  for (const DIEAbbrev *Abbrev : Abbrevs) {
-    // Emit the abbrevations code (base 1 index.)
-    EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const {
+  // Emit the abbreviations code (base 1 index.)
+  EmitULEB128(Abbrev.getNumber(), "Abbreviation Code");
 
-    // Emit the abbreviations data.
-    Abbrev->Emit(this);
-  }
-
-  // Mark end of abbreviations.
-  EmitULEB128(0, "EOM(3)");
+  // Emit the abbreviations data.
+  Abbrev.Emit(this);
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
index f1efe9d835e0..e59961f85769 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h
@@ -19,6 +19,7 @@
 
 namespace llvm {
 
+class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
 class MCSymbol;
@@ -50,6 +51,11 @@ public:
   /// beginFunction at all.
   virtual void endFunction(const MachineFunction *MF) = 0;
 
+  /// \brief Emit target-specific EH funclet machinery.
+  virtual void beginFunclet(const MachineBasicBlock &MBB,
+                            MCSymbol *Sym = nullptr) {}
+  virtual void endFunclet() {}
+
   /// \brief Process beginning of an instruction.
   virtual void beginInstruction(const MachineInstr *MI) = 0;
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 793e62960dd6..4171657b5285 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -127,19 +127,13 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
   std::unique_ptr<MCAsmParser> Parser(
       createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI));
 
-  // Create a temporary copy of the original STI because the parser may modify
-  // it. For example, when switching between arm and thumb mode. If the target
-  // needs to emit code to return to the original state it can do so in
-  // emitInlineAsmEnd().
-  MCSubtargetInfo TmpSTI = STI;
-
   // We create a new MCInstrInfo here since we might be at the module level
   // and not have a MachineFunction to initialize the TargetInstrInfo from and
   // we only need MCInstrInfo for asm parsing. We create one unconditionally
   // because it's not subtarget dependent.
   std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo());
   std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser(
-      TmpSTI, *Parser, *MII, MCOptions));
+      STI, *Parser, *MII, MCOptions));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
@@ -154,7 +148,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
   // Don't implicitly switch to the text section before the asm.
   int Res = Parser->Run(/*NoInitialTextSection*/ true,
                         /*NoFinalize*/ true);
-  emitInlineAsmEnd(STI, &TmpSTI);
+  emitInlineAsmEnd(STI, &TAP->getSTI());
   if (Res && !HasDiagHandler)
     report_fatal_error("Error parsing inline asm\n");
 }
@@ -512,9 +506,9 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
 /// for their own strange codes.
 void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
                               const char *Code) const {
-  const DataLayout *DL = TM.getDataLayout();
   if (!strcmp(Code, "private")) {
-    OS << DL->getPrivateGlobalPrefix();
+    const DataLayout &DL = MF->getDataLayout();
+    OS << DL.getPrivateGlobalPrefix();
   } else if (!strcmp(Code, "comment")) {
     OS << MAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 0cc829fffc54..df1997bcb72c 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -24,16 +24,19 @@
 
 namespace llvm {
 class ByteStreamer {
- public:
-  virtual ~ByteStreamer() {}
+ protected:
+  ~ByteStreamer() = default;
+  ByteStreamer(const ByteStreamer&) = default;
+  ByteStreamer() = default;
 
+ public:
   // For now we're just handling the calls we need for dwarf emission/hashing.
   virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
   virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
   virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
 };
 
-class APByteStreamer : public ByteStreamer {
+class APByteStreamer final : public ByteStreamer {
 private:
   AsmPrinter &AP;
 
@@ -53,7 +56,7 @@ public:
   }
 };
 
-class HashingByteStreamer : public ByteStreamer {
+class HashingByteStreamer final : public ByteStreamer {
  private:
   DIEHash &Hash;
  public:
@@ -69,7 +72,7 @@ class HashingByteStreamer : public ByteStreamer {
   }
 };
 
-class BufferByteStreamer : public ByteStreamer {
+class BufferByteStreamer final : public ByteStreamer {
 private:
   SmallVectorImpl<char> &Buffer;
   SmallVectorImpl<std::string> &Comments;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 46dbc7693698..bf794f7f70f6 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -86,7 +86,7 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
   AP->EmitULEB128(0, "EOM(2)");
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEAbbrev::print(raw_ostream &O) {
   O << "Abbreviation @"
     << format("0x%lx", (long)(intptr_t)this)
@@ -104,12 +104,13 @@ void DIEAbbrev::print(raw_ostream &O) {
       << '\n';
   }
 }
+
+LLVM_DUMP_METHOD
 void DIEAbbrev::dump() { print(dbgs()); }
-#endif
 
 DIEAbbrev DIE::generateAbbrev() const {
   DIEAbbrev Abbrev(Tag, hasChildren());
-  for (const DIEValue &V : Values)
+  for (const DIEValue &V : values())
     Abbrev.AddAttribute(V.getAttribute(), V.getForm());
   return Abbrev;
 }
@@ -144,36 +145,35 @@ DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
   return DIEValue();
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
+static void printValues(raw_ostream &O, const DIEValueList &Values,
+                        StringRef Type, unsigned Size, unsigned IndentCount) {
+  O << Type << ": Size: " << Size << "\n";
+
+  unsigned I = 0;
+  const std::string Indent(IndentCount, ' ');
+  for (const auto &V : Values.values()) {
+    O << Indent;
+    O << "Blk[" << I++ << "]";
+    O << "  " << dwarf::FormEncodingString(V.getForm()) << " ";
+    V.print(O);
+    O << "\n";
+  }
+}
+
+LLVM_DUMP_METHOD
 void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   const std::string Indent(IndentCount, ' ');
-  bool isBlock = getTag() == 0;
+  O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this)
+    << ", Offset: " << Offset << ", Size: " << Size << "\n";
 
-  if (!isBlock) {
-    O << Indent
-      << "Die: "
-      << format("0x%lx", (long)(intptr_t)this)
-      << ", Offset: " << Offset
-      << ", Size: " << Size << "\n";
-
-    O << Indent
-      << dwarf::TagString(getTag())
-      << " "
-      << dwarf::ChildrenString(hasChildren()) << "\n";
-  } else {
-    O << "Size: " << Size << "\n";
-  }
+  O << Indent << dwarf::TagString(getTag()) << " "
+    << dwarf::ChildrenString(hasChildren()) << "\n";
 
   IndentCount += 2;
-  unsigned I = 0;
-  for (const auto &V : Values) {
+  for (const auto &V : values()) {
     O << Indent;
-
-    if (!isBlock)
-      O << dwarf::AttributeString(V.getAttribute());
-    else
-      O << "Blk[" << I++ << "]";
-
+    O << dwarf::AttributeString(V.getAttribute());
     O << "  " << dwarf::FormEncodingString(V.getForm()) << " ";
     V.print(O);
     O << "\n";
@@ -183,13 +183,13 @@ void DIE::print(raw_ostream &O, unsigned IndentCount) const {
   for (const auto &Child : children())
     Child.print(O, IndentCount + 4);
 
-  if (!isBlock) O << "\n";
+  O << "\n";
 }
 
+LLVM_DUMP_METHOD
 void DIE::dump() {
   print(dbgs());
 }
-#endif
 
 void DIEValue::EmitValue(const AsmPrinter *AP) const {
   switch (Ty) {
@@ -215,7 +215,7 @@ unsigned DIEValue::SizeOf(const AsmPrinter *AP) const {
   llvm_unreachable("Unknown DIE kind");
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEValue::print(raw_ostream &O) const {
   switch (Ty) {
   case isNone:
@@ -228,10 +228,10 @@ void DIEValue::print(raw_ostream &O) const {
   }
 }
 
+LLVM_DUMP_METHOD
 void DIEValue::dump() const {
   print(dbgs());
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEInteger Implementation
@@ -264,7 +264,8 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
   case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
   case dwarf::DW_FORM_addr:
-    Size = Asm->getDataLayout().getPointerSize(); break;
+    Size = Asm->getPointerSize();
+    break;
   case dwarf::DW_FORM_ref_addr:
     Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
     break;
@@ -294,21 +295,21 @@ unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
   case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
   case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:  return AP->getDataLayout().getPointerSize();
+  case dwarf::DW_FORM_addr:
+    return AP->getPointerSize();
   case dwarf::DW_FORM_ref_addr:
     if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
-      return AP->getDataLayout().getPointerSize();
+      return AP->getPointerSize();
     return sizeof(int32_t);
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEInteger::print(raw_ostream &O) const {
   O << "Int: " << (int64_t)Integer << "  0x";
   O.write_hex(Integer);
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEExpr Implementation
@@ -326,12 +327,11 @@ unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getPointerSize();
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIELabel Implementation
@@ -352,12 +352,11 @@ unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getPointerSize();
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEDelta Implementation
@@ -375,14 +374,13 @@ unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   if (Form == dwarf::DW_FORM_data4) return 4;
   if (Form == dwarf::DW_FORM_sec_offset) return 4;
   if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getPointerSize();
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEDelta::print(raw_ostream &O) const {
   O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEString Implementation
@@ -431,11 +429,10 @@ unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   return DIEInteger(S.getOffset()).SizeOf(AP, Form);
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEString::print(raw_ostream &O) const {
   O << "String: " << S.getString();
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEEntry Implementation
@@ -472,15 +469,14 @@ unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
   const DwarfDebug *DD = AP->getDwarfDebug();
   assert(DD && "Expected Dwarf Debug info to be available");
   if (DD->getDwarfVersion() == 2)
-    return AP->getDataLayout().getPointerSize();
+    return AP->getPointerSize();
   return sizeof(int32_t);
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEEntry::print(raw_ostream &O) const {
   O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIETypeSignature Implementation
@@ -491,11 +487,10 @@ void DIETypeSignature::EmitValue(const AsmPrinter *Asm,
   Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8);
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIETypeSignature::print(raw_ostream &O) const {
   O << format("Type Unit: 0x%lx", Unit->getTypeSignature());
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIELoc Implementation
@@ -505,7 +500,7 @@ void DIETypeSignature::print(raw_ostream &O) const {
 ///
 unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
-    for (const auto &V : Values)
+    for (const auto &V : values())
       Size += V.SizeOf(AP);
   }
 
@@ -525,7 +520,7 @@ void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
     Asm->EmitULEB128(Size); break;
   }
 
-  for (const auto &V : Values)
+  for (const auto &V : values())
     V.EmitValue(Asm);
 }
 
@@ -543,12 +538,10 @@ unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   }
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIELoc::print(raw_ostream &O) const {
-  O << "ExprLoc: ";
-  DIE::print(O, 5);
+  printValues(O, *this, "ExprLoc", Size, 5);
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIEBlock Implementation
@@ -558,7 +551,7 @@ void DIELoc::print(raw_ostream &O) const {
 ///
 unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const {
   if (!Size) {
-    for (const auto &V : Values)
+    for (const auto &V : values())
       Size += V.SizeOf(AP);
   }
 
@@ -576,7 +569,7 @@ void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
   case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
   }
 
-  for (const auto &V : Values)
+  for (const auto &V : values())
     V.EmitValue(Asm);
 }
 
@@ -592,12 +585,10 @@ unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   }
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIEBlock::print(raw_ostream &O) const {
-  O << "Blk: ";
-  DIE::print(O, 5);
+  printValues(O, *this, "Blk", Size, 5);
 }
-#endif
 
 //===----------------------------------------------------------------------===//
 // DIELocList Implementation
@@ -608,7 +599,7 @@ unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
     return 4;
   if (Form == dwarf::DW_FORM_sec_offset)
     return 4;
-  return AP->getDataLayout().getPointerSize();
+  return AP->getPointerSize();
 }
 
 /// EmitValue - Emit label value.
@@ -619,6 +610,5 @@ void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf());
 }
 
-#ifndef NDEBUG
+LLVM_DUMP_METHOD
 void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; }
-#endif
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 5e60156fdfc9..02010654a6f4 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -469,38 +469,6 @@ void DIEHash::computeHash(const DIE &Die) {
   Hash.update(makeArrayRef((uint8_t)'\0'));
 }
 
-/// This is based on the type signature computation given in section 7.27 of the
-/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE
-/// with the exception that we are hashing only the context and the name of the
-/// type.
-uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) {
-
-  // Add the contexts to the hash. We won't be computing the ODR hash for
-  // function local types so it's safe to use the generic context hashing
-  // algorithm here.
-  // FIXME: If we figure out how to account for linkage in some way we could
-  // actually do this with a slight modification to the parent hash algorithm.
-  if (const DIE *Parent = Die.getParent())
-    addParentContext(*Parent);
-
-  // Add the current DIE information.
-
-  // Add the DWARF tag of the DIE.
-  addULEB128(Die.getTag());
-
-  // Add the name of the type to the hash.
-  addString(getDIEStringAttr(Die, dwarf::DW_AT_name));
-
-  // Now get the result.
-  MD5::MD5Result Result;
-  Hash.final(Result);
-
-  // ... take the least significant 8 bytes and return those. Our MD5
-  // implementation always returns its results in little endian, swap bytes
-  // appropriately.
-  return support::endian::read64le(Result + 8);
-}
-
 /// This is based on the type signature computation given in section 7.27 of the
 /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE
 /// with the inclusion of the full CU and all top level CU entities.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 833ca0276fdb..44f0ce88523d 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -84,9 +84,6 @@ class DIEHash {
 public:
   DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
 
-  /// \brief Computes the ODR signature.
-  uint64_t computeDIEODRSignature(const DIE &Die);
-
   /// \brief Computes the CU signature.
   uint64_t computeCUSignature(const DIE &Die);
 
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index afffa839a606..bbe53249a084 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -9,6 +9,8 @@
 
 #ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
 #define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H
+
+#include "DebugLocStream.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
@@ -17,7 +19,6 @@
 
 namespace llvm {
 class AsmPrinter;
-class DebugLocStream;
 
 /// \brief This struct describes location entries emitted in the .debug_loc
 /// section.
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index f8cdde203187..4ad3e1867328 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -41,7 +41,7 @@ void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die,
   DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
 }
 
-void DwarfAccelTable::ComputeBucketCount(void) {
+void DwarfAccelTable::ComputeBucketCount() {
   // First get the number of unique hashes.
   std::vector<uint32_t> uniques(Data.size());
   for (size_t i = 0, e = Data.size(); i < e; ++i)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2c212c7ecee1..6665c16159a0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -78,12 +78,11 @@ void DwarfCFIException::endModule() {
     return;
 
   // Emit references to all used personality functions
-  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-  for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
-    if (!Personalities[i])
+  for (const Function *Personality : MMI->getPersonalities()) {
+    if (!Personality)
       continue;
-    MCSymbol *Sym = Asm->getSymbol(Personalities[i]);
-    TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->TM, Sym);
+    MCSymbol *Sym = Asm->getSymbol(Personality);
+    TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym);
   }
 }
 
@@ -108,7 +107,6 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   const Function *Per = nullptr;
   if (F->hasPersonalityFn())
     Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
-  assert(!MMI->getPersonality() || Per == MMI->getPersonality());
 
   // Emit a personality function even when there are no landing pads
   bool forceEmitPersonality =
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index fc54a2925beb..725063a8177b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -151,28 +151,33 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     DIELoc *Loc = new (DIEValueAllocator) DIELoc;
     const MCSymbol *Sym = Asm->getSymbol(Global);
     if (Global->isThreadLocal()) {
-      // FIXME: Make this work with -gsplit-dwarf.
-      unsigned PointerSize = Asm->getDataLayout().getPointerSize();
-      assert((PointerSize == 4 || PointerSize == 8) &&
-             "Add support for other sizes if necessary");
-      // Based on GCC's support for TLS:
-      if (!DD->useSplitDwarf()) {
-        // 1) Start with a constNu of the appropriate pointer size
-        addUInt(*Loc, dwarf::DW_FORM_data1,
-                PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
-        // 2) containing the (relocated) offset of the TLS variable
-        //    within the module's TLS block.
-        addExpr(*Loc, dwarf::DW_FORM_udata,
-                Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+      if (Asm->TM.Options.EmulatedTLS) {
+        // TODO: add debug info for emulated thread local mode.
       } else {
-        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
-        addUInt(*Loc, dwarf::DW_FORM_udata,
-                DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+        // FIXME: Make this work with -gsplit-dwarf.
+        unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+        assert((PointerSize == 4 || PointerSize == 8) &&
+               "Add support for other sizes if necessary");
+        // Based on GCC's support for TLS:
+        if (!DD->useSplitDwarf()) {
+          // 1) Start with a constNu of the appropriate pointer size
+          addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
+                                                  ? dwarf::DW_OP_const4u
+                                                  : dwarf::DW_OP_const8u);
+          // 2) containing the (relocated) offset of the TLS variable
+          //    within the module's TLS block.
+          addExpr(*Loc, dwarf::DW_FORM_udata,
+                  Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+        } else {
+          addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+          addUInt(*Loc, dwarf::DW_FORM_udata,
+                  DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+        }
+        // 3) followed by an OP to make the debugger do a TLS lookup.
+        addUInt(*Loc, dwarf::DW_FORM_data1,
+                DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+                                      : dwarf::DW_OP_form_tls_address);
       }
-      // 3) followed by an OP to make the debugger do a TLS lookup.
-      addUInt(*Loc, dwarf::DW_FORM_data1,
-              DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
-                                    : dwarf::DW_OP_form_tls_address);
     } else {
       DD->addArangeLabel(SymbolCU(this, Sym));
       addOpAddress(*Loc, Sym);
@@ -338,9 +343,9 @@ void DwarfCompileUnit::constructScopeDIE(
     // Skip imported directives in gmlt-like data.
     if (!includeMinimalInlineScopes()) {
       // There is no need to emit empty lexical block DIE.
-      for (const auto &E : DD->findImportedEntitiesForScope(DS))
+      for (const auto *IE : ImportedEntities[DS])
         Children.push_back(
-            constructImportedEntityDIE(cast<DIImportedEntity>(E.second)));
+            constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
     }
 
     // If there are only other scopes as children, put them directly in the
@@ -435,6 +440,9 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
           getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+  if (IA->getDiscriminator())
+    addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
+            IA->getDiscriminator());
 
   // Add name to the name table, we do this here because we're guaranteed
   // to have concrete versions of our DW_TAG_inlined_subprogram nodes.
@@ -517,8 +525,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     unsigned FrameReg = 0;
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
-    assert(Expr != DV.getExpression().end() &&
-           "Wrong number of expressions");
+    assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
     DwarfExpr.AddMachineRegIndirect(FrameReg, Offset);
     DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
     ++Expr;
@@ -597,8 +604,8 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
   return ObjectPointer;
 }
 
-void
-DwarfCompileUnit::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
+void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
+    LexicalScope *Scope) {
   DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()];
   if (AbsDef)
     return;
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 509c9432bcbf..2e2846790cc1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -39,6 +39,12 @@ class DwarfCompileUnit : public DwarfUnit {
   /// The start of the unit within its section.
   MCSymbol *LabelBegin;
 
+  typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList;
+  typedef llvm::DenseMap<const MDNode *, ImportedEntityList>
+  ImportedEntityMap;
+
+  ImportedEntityMap ImportedEntities;
+
   /// GlobalNames - A map of globally visible named entities for this unit.
   StringMap<const DIE *> GlobalNames;
 
@@ -98,6 +104,10 @@ public:
 
   unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override;
 
+  void addImportedEntity(const DIImportedEntity* IE) {
+    ImportedEntities[IE->getScope()].push_back(IE);
+  }
+
   /// addRange - Add an address range to the list of ranges for this unit.
   void addRange(RangeSpan Range);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7d03a3930d7d..3466f3469f1c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -104,6 +105,14 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
                  cl::init(Default));
 
+static cl::opt<DefaultOnOff>
+DwarfLinkageNames("dwarf-linkage-names", cl::Hidden,
+                  cl::desc("Emit DWARF linkage-name attributes."),
+                  cl::values(clEnumVal(Default, "Default for platform"),
+                             clEnumVal(Enable, "Enabled"),
+                             clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                  cl::init(Default));
+
 static const char *const DWARFGroupName = "DWARF Emission";
 static const char *const DbgTimerName = "DWARF Debug Writer";
 
@@ -176,9 +185,9 @@ const DIType *DbgVariable::getType() const {
     if (tag == dwarf::DW_TAG_pointer_type)
       subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
 
-    auto Elements = cast<DICompositeTypeBase>(subType)->getElements();
+    auto Elements = cast<DICompositeType>(subType)->getElements();
     for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
-      auto *DT = cast<DIDerivedTypeBase>(Elements[i]);
+      auto *DT = cast<DIDerivedType>(Elements[i]);
       if (getName() == DT->getName())
         return resolve(DT->getBaseType());
     }
@@ -194,45 +203,67 @@ static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
 DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()),
       PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator),
-      UsedNonDefaultText(false),
       SkeletonHolder(A, "skel_string", DIEValueAllocator),
       IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
-      IsPS4(Triple(A->getTargetTriple()).isPS4()),
       AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                        dwarf::DW_FORM_data4)),
       AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                       dwarf::DW_FORM_data4)),
       AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                            dwarf::DW_FORM_data4)),
-      AccelTypes(TypeAtoms) {
+      AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
 
   CurFn = nullptr;
   CurMI = nullptr;
+  Triple TT(Asm->getTargetTriple());
 
-  // Turn on accelerator tables for Darwin by default, pubnames by
-  // default for non-Darwin/PS4, and handle split dwarf.
+  // Make sure we know our "debugger tuning."  The target option takes
+  // precedence; fall back to triple-based defaults.
+  if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
+    DebuggerTuning = Asm->TM.Options.DebuggerTuning;
+  else if (IsDarwin || TT.isOSFreeBSD())
+    DebuggerTuning = DebuggerKind::LLDB;
+  else if (TT.isPS4CPU())
+    DebuggerTuning = DebuggerKind::SCE;
+  else
+    DebuggerTuning = DebuggerKind::GDB;
+
+  // Turn on accelerator tables for LLDB by default.
   if (DwarfAccelTables == Default)
-    HasDwarfAccelTables = IsDarwin;
+    HasDwarfAccelTables = tuneForLLDB();
   else
     HasDwarfAccelTables = DwarfAccelTables == Enable;
 
+  // Handle split DWARF. Off by default for now.
   if (SplitDwarf == Default)
     HasSplitDwarf = false;
   else
     HasSplitDwarf = SplitDwarf == Enable;
 
+  // Pubnames/pubtypes on by default for GDB.
   if (DwarfPubSections == Default)
-    HasDwarfPubSections = !IsDarwin && !IsPS4;
+    HasDwarfPubSections = tuneForGDB();
   else
     HasDwarfPubSections = DwarfPubSections == Enable;
 
+  // SCE does not use linkage names.
+  if (DwarfLinkageNames == Default)
+    UseLinkageNames = !tuneForSCE();
+  else
+    UseLinkageNames = DwarfLinkageNames == Enable;
+
   unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
   DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
+  // Use dwarf 4 by default if nothing is requested.
+  DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
 
-  // Darwin and PS4 use the standard TLS opcode (defined in DWARF 3).
-  // Everybody else uses GNU's.
-  UseGNUTLSOpcode = !(IsDarwin || IsPS4) || DwarfVersion < 3;
+  // Work around a GDB bug. GDB doesn't support the standard opcode;
+  // SCE doesn't support GNU's; LLDB prefers the standard opcode, which
+  // is defined as of DWARF 3.
+  // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented
+  // https://sourceware.org/bugzilla/show_bug.cgi?id=11616
+  UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3;
 
   Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
 
@@ -300,18 +331,6 @@ void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) {
   }
 }
 
-/// isSubprogramContext - Return true if Context is either a subprogram
-/// or another context nested inside a subprogram.
-bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
-  if (!Context)
-    return false;
-  if (isa<DISubprogram>(Context))
-    return true;
-  if (auto *T = dyn_cast<DIType>(Context))
-    return isSubprogramContext(resolve(T->getScope()));
-  return false;
-}
-
 /// Check whether we should create a DIE for the given Scope, return true
 /// if we don't create a DIE (the corresponding DIE is null).
 bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
@@ -416,6 +435,16 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
   else
     NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
+  if (DIUnit->getDWOId()) {
+    // This CU is either a clang module DWO or a skeleton CU.
+    NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8,
+                  DIUnit->getDWOId());
+    if (!DIUnit->getSplitDebugFilename().empty())
+      // This is a prefabricated skeleton CU.
+      NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name,
+                      DIUnit->getSplitDebugFilename());
+  }
+
   CUMap.insert(std::make_pair(DIUnit, &NewCU));
   CUDieMap.insert(std::make_pair(&Die, &NewCU));
   return NewCU;
@@ -436,8 +465,6 @@ void DwarfDebug::beginModule() {
 
   const Module *M = MMI->getModule();
 
-  FunctionDIs = makeSubprogramMap(*M);
-
   NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
   if (!CU_Nodes)
     return;
@@ -449,12 +476,7 @@ void DwarfDebug::beginModule() {
     auto *CUNode = cast<DICompileUnit>(N);
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
     for (auto *IE : CUNode->getImportedEntities())
-      ScopesWithImportedEntities.push_back(std::make_pair(IE->getScope(), IE));
-    // Stable sort to preserve the order of appearance of imported entities.
-    // This is to avoid out-of-order processing of interdependent declarations
-    // within the same scope, e.g. { namespace A = base; namespace B = A; }
-    std::stable_sort(ScopesWithImportedEntities.begin(),
-                     ScopesWithImportedEntities.end(), less_first());
+      CU.addImportedEntity(IE);
     for (auto *GV : CUNode->getGlobalVariables())
       CU.getOrCreateGlobalVariableDIE(GV);
     for (auto *SP : CUNode->getSubprograms())
@@ -467,7 +489,10 @@ void DwarfDebug::beginModule() {
     for (auto *Ty : CUNode->getRetainedTypes()) {
       // The retained types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
-      CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef())));
+      DIType *RT = cast<DIType>(resolve(Ty->getRef()));
+      if (!RT->isExternalTypeRef())
+        // There is no point in force-emitting a forward declaration.
+        CU.getOrCreateTypeDIE(RT);
     }
     // Emit imported_modules last so that the relevant context is already
     // available.
@@ -1061,12 +1086,8 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
   for (const auto &MBB : *MF)
     for (const auto &MI : MBB)
       if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
-          MI.getDebugLoc()) {
-        // Did the target forget to set the FrameSetup flag for CFI insns?
-        assert(!MI.isCFIInstruction() &&
-               "First non-frame-setup instruction is a CFI instruction.");
+          MI.getDebugLoc())
         return MI.getDebugLoc();
-      }
   return DebugLoc();
 }
 
@@ -1079,8 +1100,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
   if (!MMI->hasDebugInfo())
     return;
 
-  auto DI = FunctionDIs.find(MF->getFunction());
-  if (DI == FunctionDIs.end())
+  auto DI = MF->getFunction()->getSubprogram();
+  if (!DI)
     return;
 
   // Grab the lexical scopes for the function, if we don't have any of those
@@ -1127,7 +1148,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
     // The first mention of a function argument gets the CurrentFnBegin
     // label, so arguments are visible when breaking at function entry.
     const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
-    if (DIVar->getTag() == dwarf::DW_TAG_arg_variable &&
+    if (DIVar->isParameter() &&
         getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
       if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
@@ -1171,7 +1192,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
       "endFunction should be called with the same function as beginFunction");
 
   if (!MMI->hasDebugInfo() || LScopes.empty() ||
-      !FunctionDIs.count(MF->getFunction())) {
+      !MF->getFunction()->getSubprogram()) {
     // If we don't have a lexical scope for this function then there will
     // be a hole in the range information. Keep note of this by setting the
     // previously used section to nullptr.
@@ -1863,7 +1884,7 @@ void DwarfDebug::emitDebugLineDWO() {
   assert(useSplitDwarf() && "No split dwarf?");
   Asm->OutStreamer->SwitchSection(
       Asm->getObjFileLowering().getDwarfLineDWOSection());
-  SplitTypeUnitFileTable.Emit(*Asm->OutStreamer);
+  SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams());
 }
 
 // Emit the .debug_str.dwo section for separated dwarf. This contains the
@@ -1884,7 +1905,7 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
   return &SplitTypeUnitFileTable;
 }
 
-static uint64_t makeTypeSignature(StringRef Identifier) {
+uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) {
   MD5 Hash;
   Hash.update(Identifier);
   // ... take the least significant 8 bytes and return those. Our MD5
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 01f34c6eb81c..4c613a905450 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -33,6 +33,7 @@
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Target/TargetOptions.h"
 #include <memory>
 
 namespace llvm {
@@ -48,24 +49,6 @@ class DwarfTypeUnit;
 class DwarfUnit;
 class MachineModuleInfo;
 
-//===----------------------------------------------------------------------===//
-/// This class is used to record source line correspondence.
-class SrcLineInfo {
-  unsigned Line;     // Source line number.
-  unsigned Column;   // Source column.
-  unsigned SourceID; // Source ID number.
-  MCSymbol *Label;   // Label in code ID number.
-public:
-  SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
-      : Line(L), Column(C), SourceID(S), Label(label) {}
-
-  // Accessors
-  unsigned getLine() const { return Line; }
-  unsigned getColumn() const { return Column; }
-  unsigned getSourceID() const { return SourceID; }
-  MCSymbol *getLabel() const { return Label; }
-};
-
 //===----------------------------------------------------------------------===//
 /// This class is used to track local variable information.
 ///
@@ -127,14 +110,14 @@ public:
   // Accessors.
   const DILocalVariable *getVariable() const { return Var; }
   const DILocation *getInlinedAt() const { return IA; }
-  const ArrayRef<const DIExpression *> getExpression() const { return Expr; }
+  ArrayRef<const DIExpression *> getExpression() const { return Expr; }
   void setDIE(DIE &D) { TheDIE = &D; }
   DIE *getDIE() const { return TheDIE; }
   void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
   unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
   StringRef getName() const { return Var->getName(); }
   const MachineInstr *getMInsn() const { return MInsn; }
-  const ArrayRef<int> getFrameIndex() const { return FrameIndex; }
+  ArrayRef<int> getFrameIndex() const { return FrameIndex; }
 
   void addMMIEntry(const DbgVariable &V) {
     assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
@@ -156,7 +139,8 @@ public:
 
   // Translate tag to proper Dwarf tag.
   dwarf::Tag getTag() const {
-    if (Var->getTag() == dwarf::DW_TAG_arg_variable)
+    // FIXME: Why don't we just infer this tag and store it all along?
+    if (Var->isParameter())
       return dwarf::DW_TAG_formal_parameter;
 
     return dwarf::DW_TAG_variable;
@@ -282,11 +266,6 @@ class DwarfDebug : public AsmPrinterHandler {
   /// Holders for the various debug information flags that we might need to
   /// have exposed. See accessor functions below for description.
 
-  /// Holder for imported entities.
-  typedef SmallVector<std::pair<const MDNode *, const MDNode *>, 32>
-  ImportedEntityMap;
-  ImportedEntityMap ScopesWithImportedEntities;
-
   /// Map from MDNodes for user-defined types to the type units that
   /// describe them.
   DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
@@ -298,16 +277,12 @@ class DwarfDebug : public AsmPrinterHandler {
   /// Whether to emit the pubnames/pubtypes sections.
   bool HasDwarfPubSections;
 
-  /// Whether or not to use AT_ranges for compilation units.
-  bool HasCURanges;
-
-  /// Whether we emitted a function into a section other than the
-  /// default text.
-  bool UsedNonDefaultText;
-
   /// Whether to use the GNU TLS opcode (instead of the standard opcode).
   bool UseGNUTLSOpcode;
 
+  /// Whether to emit DW_AT_[MIPS_]linkage_name.
+  bool UseLinkageNames;
+
   /// Version of dwarf we're emitting.
   unsigned DwarfVersion;
 
@@ -338,7 +313,6 @@ class DwarfDebug : public AsmPrinterHandler {
   /// True iff there are multiple CUs in this module.
   bool SingleCU;
   bool IsDarwin;
-  bool IsPS4;
 
   AddressPool AddrPool;
 
@@ -347,7 +321,8 @@ class DwarfDebug : public AsmPrinterHandler {
   DwarfAccelTable AccelNamespace;
   DwarfAccelTable AccelTypes;
 
-  DenseMap<const Function *, DISubprogram *> FunctionDIs;
+  // Identify a debugger for "tuning" the debug info.
+  DebuggerKind DebuggerTuning;
 
   MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
 
@@ -372,12 +347,6 @@ class DwarfDebug : public AsmPrinterHandler {
   /// Construct a DIE for this abstract scope.
   void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
 
-  /// Compute the size and offset of a DIE given an incoming Offset.
-  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
-
-  /// Compute the size and offset of all the DIEs.
-  void computeSizeAndOffsets();
-
   /// Collect info for variables that were optimized out.
   void collectDeadVariables();
 
@@ -443,9 +412,6 @@ class DwarfDebug : public AsmPrinterHandler {
   /// Emit visible names into a debug ranges section.
   void emitDebugRanges();
 
-  /// Emit inline info using custom format.
-  void emitDebugInlineInfo();
-
   /// DWARF 5 Experimental Split Dwarf Emitters
 
   /// Initialize common features of skeleton units.
@@ -456,10 +422,6 @@ class DwarfDebug : public AsmPrinterHandler {
   /// section.
   DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
 
-  /// Construct the split debug info compile unit for the debug info
-  /// section.
-  DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
-
   /// Emit the debug info dwo section.
   void emitDebugInfoDWO();
 
@@ -544,6 +506,9 @@ public:
   /// Process end of an instruction.
   void endInstruction() override;
 
+  /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits.
+  static uint64_t makeTypeSignature(StringRef Identifier);
+
   /// Add a DIE to the set of types that we're going to pull into
   /// type units.
   void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
@@ -558,10 +523,22 @@ public:
     SymSize[Sym] = Size;
   }
 
+  /// Returns whether to emit DW_AT_[MIPS_]linkage_name.
+  bool useLinkageNames() const { return UseLinkageNames; }
+
   /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the
   /// standard DW_OP_form_tls_address opcode
   bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; }
 
+  /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger.
+  ///
+  /// Returns whether we are "tuning" for a given debugger.
+  /// @{
+  bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; }
+  bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; }
+  bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; }
+  /// @}
+
   // Experimental DWARF5 features.
 
   /// Returns whether or not to emit tables that dwarf consumers can
@@ -604,9 +581,6 @@ public:
   DwarfCompileUnit *lookupUnit(const DIE *CU) const {
     return CUDieMap.lookup(CU);
   }
-  /// isSubprogramContext - Return true if Context is either a subprogram
-  /// or another context nested inside a subprogram.
-  bool isSubprogramContext(const MDNode *Context);
 
   void addSubprogramNames(const DISubprogram *SP, DIE &Die);
 
@@ -622,14 +596,6 @@ public:
 
   const MachineFunction *getCurrentFunction() const { return CurFn; }
 
-  iterator_range<ImportedEntityMap::const_iterator>
-  findImportedEntitiesForScope(const MDNode *Scope) const {
-    return make_range(std::equal_range(
-        ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(),
-        std::pair<const MDNode *, const MDNode *>(Scope, nullptr),
-        less_first()));
-  }
-
   /// A helper function to check whether the DIE for a given Scope is
   /// going to be null.
   bool isLexicalScopeDIENull(LexicalScope *Scope);
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index a2799b8d6300..7b5b831da166 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -211,12 +211,15 @@ bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr,
     return AddMachineRegPiece(MachineReg, SizeInBits,
                getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
   }
-  case dwarf::DW_OP_plus: {
-    // [DW_OP_reg,Offset,DW_OP_plus,DW_OP_deref] --> [DW_OP_breg,Offset].
+  case dwarf::DW_OP_plus:
+  case dwarf::DW_OP_minus: {
+    // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
+    // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
     auto N = I.getNext();
     if (N != E && N->getOp() == dwarf::DW_OP_deref) {
       unsigned Offset = I->getArg(0);
-      ValidReg = AddMachineRegIndirect(MachineReg, Offset);
+      ValidReg = AddMachineRegIndirect(
+          MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
       std::advance(I, 2);
       break;
     } else
@@ -255,6 +258,12 @@ void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
       EmitOp(dwarf::DW_OP_plus_uconst);
       EmitUnsigned(I->getArg(0));
       break;
+    case dwarf::DW_OP_minus:
+      // There is no OP_minus_uconst.
+      EmitOp(dwarf::DW_OP_constu);
+      EmitUnsigned(I->getArg(0));
+      EmitOp(dwarf::DW_OP_minus);
+      break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
       break;
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 355582298e5e..d75fea5d8c8a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -192,18 +192,19 @@ void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
                  DIEInteger(1));
 }
 
-void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, uint64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(false, Integer);
   Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer));
 }
 
-void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form,
+                        uint64_t Integer) {
   addUInt(Block, (dwarf::Attribute)0, Form, Integer);
 }
 
-void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
                         Optional<dwarf::Form> Form, int64_t Integer) {
   if (!Form)
     Form = DIEInteger::BestForm(true, Integer);
@@ -222,9 +223,10 @@ void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
                DIEString(DU->getStringPool().getEntry(*Asm, String)));
 }
 
-DIE::value_iterator DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute,
-                                        dwarf::Form Form,
-                                        const MCSymbol *Label) {
+DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die,
+                                                 dwarf::Attribute Attribute,
+                                                 dwarf::Form Form,
+                                                 const MCSymbol *Label) {
   return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label));
 }
 
@@ -277,6 +279,13 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
                dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type));
 }
 
+void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+                                    StringRef Identifier) {
+  uint64_t Signature = DD->makeTypeSignature(Identifier);
+  Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8,
+               DIEInteger(Signature));
+}
+
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
                             DIEEntry Entry) {
   const DIE *DieCU = Die.getUnitOrNull();
@@ -292,8 +301,6 @@ void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
 }
 
 DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) {
-  assert(Tag != dwarf::DW_TAG_auto_variable &&
-         Tag != dwarf::DW_TAG_arg_variable);
   DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag));
   if (N)
     insertDIE(N, &Die);
@@ -445,7 +452,7 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 
   // Find the __forwarding field and the variable field in the __Block_byref
   // struct.
-  DINodeArray Fields = cast<DICompositeTypeBase>(TmpTy)->getElements();
+  DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements();
   const DIDerivedType *varField = nullptr;
   const DIDerivedType *forwardingField = nullptr;
 
@@ -506,34 +513,35 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
 
 /// Return true if type encoding is unsigned.
 static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
-  if (auto *DTy = dyn_cast<DIDerivedTypeBase>(Ty)) {
+  if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+    // FIXME: Enums without a fixed underlying type have unknown signedness
+    // here, leading to incorrectly emitted constants.
+    if (CTy->getTag() == dwarf::DW_TAG_enumeration_type)
+      return false;
+
+    // (Pieces of) aggregate types that get hacked apart by SROA may be
+    // represented by a constant. Encode them as unsigned bytes.
+    return true;
+  }
+
+  if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
     dwarf::Tag T = (dwarf::Tag)Ty->getTag();
     // Encode pointer constants as unsigned bytes. This is used at least for
     // null pointer constant emission.
-    // (Pieces of) aggregate types that get hacked apart by SROA may also be
-    // represented by a constant. Encode them as unsigned bytes.
     // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
     // here, but accept them for now due to a bug in SROA producing bogus
     // dbg.values.
-    if (T == dwarf::DW_TAG_array_type ||
-        T == dwarf::DW_TAG_class_type ||
-        T == dwarf::DW_TAG_pointer_type ||
+    if (T == dwarf::DW_TAG_pointer_type ||
         T == dwarf::DW_TAG_ptr_to_member_type ||
         T == dwarf::DW_TAG_reference_type ||
-        T == dwarf::DW_TAG_rvalue_reference_type ||
-        T == dwarf::DW_TAG_structure_type ||
-        T == dwarf::DW_TAG_union_type)
+        T == dwarf::DW_TAG_rvalue_reference_type)
       return true;
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
-           T == dwarf::DW_TAG_restrict_type ||
-           T == dwarf::DW_TAG_enumeration_type);
-    if (DITypeRef Deriv = DTy->getBaseType())
-      return isUnsignedDIType(DD, DD->resolve(Deriv));
-    // FIXME: Enums without a fixed underlying type have unknown signedness
-    // here, leading to incorrectly emitted constants.
-    assert(DTy->getTag() == dwarf::DW_TAG_enumeration_type);
-    return false;
+           T == dwarf::DW_TAG_restrict_type);
+    DITypeRef Deriv = DTy->getBaseType();
+    assert(Deriv && "Expected valid base type");
+    return isUnsignedDIType(DD, DD->resolve(Deriv));
   }
 
   auto *BTy = cast<DIBasicType>(Ty);
@@ -659,7 +667,7 @@ void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
 }
 
 void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) {
-  if (!LinkageName.empty())
+  if (!LinkageName.empty() && DD->useLinkageNames())
     addString(Die,
               DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
                                          : dwarf::DW_AT_MIPS_linkage_name,
@@ -685,6 +693,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
     return getOrCreateNameSpace(NS);
   if (auto *SP = dyn_cast<DISubprogram>(Context))
     return getOrCreateSubprogramDIE(SP);
+  if (auto *M = dyn_cast<DIModule>(Context))
+    return getOrCreateModule(M);
   return getDIE(Context);
 }
 
@@ -700,7 +710,8 @@ DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
 
   constructTypeDIE(TyDIE, cast<DICompositeType>(Ty));
 
-  updateAcceleratorTables(Context, Ty, TyDIE);
+  if (!Ty->isExternalTypeRef())
+    updateAcceleratorTables(Context, Ty, TyDIE);
   return &TyDIE;
 }
 
@@ -753,7 +764,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
                                         const DIType *Ty, const DIE &TyDIE) {
   if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
     bool IsImplementation = 0;
-    if (auto *CT = dyn_cast<DICompositeTypeBase>(Ty)) {
+    if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
       IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete();
@@ -795,8 +806,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
 
   // Reverse iterate over our list to go from the outermost construct to the
   // innermost.
-  for (auto I = Parents.rbegin(), E = Parents.rend(); I != E; ++I) {
-    const DIScope *Ctx = *I;
+  for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) {
     StringRef Name = Ctx->getName();
     if (Name.empty() && isa<DINamespace>(Ctx))
       Name = "(anonymous namespace)";
@@ -843,7 +853,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
 
   // Add size if non-zero (derived types might be zero-sized.)
   if (Size && Tag != dwarf::DW_TAG_pointer_type
-           && Tag != dwarf::DW_TAG_ptr_to_member_type)
+           && Tag != dwarf::DW_TAG_ptr_to_member_type
+           && Tag != dwarf::DW_TAG_reference_type
+           && Tag != dwarf::DW_TAG_rvalue_reference_type)
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
 
   if (Tag == dwarf::DW_TAG_ptr_to_member_type)
@@ -899,6 +911,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
 }
 
 void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
+  if (CTy->isExternalTypeRef()) {
+    StringRef Identifier = CTy->getIdentifier();
+    assert(!Identifier.empty() && "external type ref without identifier");
+    addFlag(Buffer, dwarf::DW_AT_declaration);
+    return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier);
+  }
+
   // Add name if not anonymous or intermediate type.
   StringRef Name = CTy->getName();
 
@@ -1134,6 +1153,14 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
     DeclLinkageName = SPDecl->getLinkageName();
+    unsigned DeclID =
+        getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
+    unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
+    if (DeclID != DefID)
+      addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID);
+
+    if (SP->getLine() != SPDecl->getLine())
+      addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine());
   }
 
   // Add function template parameters.
@@ -1180,11 +1207,10 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
        Language == dwarf::DW_LANG_ObjC))
     addFlag(SPDie, dwarf::DW_AT_prototyped);
 
-  const DISubroutineType *SPTy = SP->getType();
-  assert(SPTy->getTag() == dwarf::DW_TAG_subroutine_type &&
-         "the type of a subprogram should be a subroutine");
+  DITypeRefArray Args;
+  if (const DISubroutineType *SPTy = SP->getType())
+    Args = SPTy->getTypeArray();
 
-  auto Args = SPTy->getTypeArray();
   // Add a return type. If this is a type like a C/C++ void type we don't add a
   // return type.
   if (Args.size())
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 44d9d2245dda..82760bf21839 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -113,13 +113,6 @@ protected:
   DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A,
             DwarfDebug *DW, DwarfFile *DWU);
 
-  /// Add a string attribute data and value.
-  ///
-  /// This is guaranteed to be in the local string pool instead of indirected.
-  void addLocalString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
-  void addIndexedString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
-
   bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie);
 
 public:
@@ -162,9 +155,6 @@ public:
   virtual void addGlobalType(const DIType *Ty, const DIE &Die,
                              const DIScope *Context) {}
 
-  /// Add a new name to the namespace accelerator table.
-  void addAccelNamespace(StringRef Name, const DIE &Die);
-
   /// Returns the DIE map slot for the specified debug variable.
   ///
   /// We delegate the request to DwarfDebug when the MDNode can be part of the
@@ -186,14 +176,14 @@ public:
   void addFlag(DIE &Die, dwarf::Attribute Attribute);
 
   /// Add an unsigned integer attribute data and value.
-  void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
-               uint64_t Integer);
+  void addUInt(DIEValueList &Die, dwarf::Attribute Attribute,
+               Optional<dwarf::Form> Form, uint64_t Integer);
 
-  void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
+  void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer);
 
   /// Add an signed integer attribute data and value.
-  void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
-               int64_t Integer);
+  void addSInt(DIEValueList &Die, dwarf::Attribute Attribute,
+               Optional<dwarf::Form> Form, int64_t Integer);
 
   void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
 
@@ -206,8 +196,10 @@ public:
   void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str);
 
   /// Add a Dwarf label attribute data and value.
-  DIE::value_iterator addLabel(DIE &Die, dwarf::Attribute Attribute,
-                               dwarf::Form Form, const MCSymbol *Label);
+  DIEValueList::value_iterator addLabel(DIEValueList &Die,
+                                        dwarf::Attribute Attribute,
+                                        dwarf::Form Form,
+                                        const MCSymbol *Label);
 
   void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
 
@@ -228,7 +220,11 @@ public:
   /// Add a DIE attribute data and value.
   void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry);
 
+  /// Add a type's DW_AT_signature and set the  declaration flag.
   void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
+  /// Add an attribute containing the type signature for a unique identifier.
+  void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
+                           StringRef Identifier);
 
   /// Add block data.
   void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 49ef8d3ddc8f..e24dcb1bffd4 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -662,9 +662,8 @@ void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
     Entry = TypeInfos.size();
   }
 
-  for (std::vector<const GlobalValue *>::const_reverse_iterator
-         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
-    const GlobalValue *GV = *I;
+  for (const GlobalValue *GV : make_range(TypeInfos.rbegin(),
+                                          TypeInfos.rend())) {
     if (VerboseAsm)
       Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--));
     Asm->EmitTTypeReference(GV, TTypeEncoding);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index e42e082acbf9..c6a0e9d0524c 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -76,10 +76,6 @@ protected:
                                SmallVectorImpl<ActionEntry> &Actions,
                                SmallVectorImpl<unsigned> &FirstActions);
 
-  /// Return `true' if this is a call to a function marked `nounwind'. Return
-  /// `false' otherwise.
-  bool callToNoUnwindFunction(const MachineInstr *MI);
-
   void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
                      RangeMapType &PadMap);
 
@@ -131,6 +127,10 @@ public:
   void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
   void beginInstruction(const MachineInstr *MI) override {}
   void endInstruction() override {}
+
+  /// Return `true' if this is a call to a function marked `nounwind'. Return
+  /// `false' otherwise.
+  static bool callToNoUnwindFunction(const MachineInstr *MI);
 };
 }
 
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index eb9e4c10daf4..6a023b998b32 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -48,7 +48,7 @@ void llvm::linkErlangGCPrinter() {}
 void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                      AsmPrinter &AP) {
   MCStreamer &OS = *AP.OutStreamer;
-  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = M.getDataLayout().getPointerSize();
 
   // Put this in a custom .note section.
   OS.SwitchSection(
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 2ceec61ab5ca..c09ef6adea69 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -93,7 +93,7 @@ void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info,
 ///
 void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
                                             AsmPrinter &AP) {
-  unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize();
+  unsigned IntPtrSize = M.getDataLayout().getPointerSize();
 
   AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection());
   EmitCamlGlobal(M, AP, "code_end");
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 6610ac78f8c4..c2c0f84e5c92 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -27,15 +27,15 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   auto *Scope = cast<DIScope>(S);
   StringRef Dir = Scope->getDirectory(),
             Filename = Scope->getFilename();
-  char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
-  if (Result)
-    return Result;
+  std::string &Filepath =
+      DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
+  if (!Filepath.empty())
+    return Filepath;
 
   // Clang emits directory and relative filename info into the IR, but CodeView
   // operates on full paths.  We could change Clang to emit full paths too, but
   // that would increase the IR size and probably not needed for other users.
   // For now, just concatenate and canonicalize the path here.
-  std::string Filepath;
   if (Filename.find(':') == 1)
     Filepath = Filename;
   else
@@ -74,8 +74,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
     Filepath.erase(Cursor, 1);
 
-  Result = strdup(Filepath.c_str());
-  return StringRef(Result);
+  return Filepath;
 }
 
 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
@@ -253,7 +252,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
   }
   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
 
-  // Emit a line table subsection, requred to do PC-to-file:line lookup.
+  // Emit a line table subsection, required to do PC-to-file:line lookup.
   Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
   Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
   MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index 43d1a432712e..78068e07c16f 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
     }
   } FileNameRegistry;
 
-  typedef std::map<std::pair<StringRef, StringRef>, char *>
+  typedef std::map<std::pair<StringRef, StringRef>, std::string>
       DirAndFilenameToFilepathMapTy;
   DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap;
   StringRef getFullFilepath(const MDNode *S);
@@ -116,14 +116,6 @@ class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler {
 public:
   WinCodeViewLineTables(AsmPrinter *Asm);
 
-  ~WinCodeViewLineTables() override {
-    for (DirAndFilenameToFilepathMapTy::iterator
-             I = DirAndFilenameToFilepathMap.begin(),
-             E = DirAndFilenameToFilepathMap.end();
-         I != E; ++I)
-      free(I->second);
-  }
-
   void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {}
 
   /// \brief Emit the COFF section that holds the line table information.
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index a2b9316aa875..48b7104f24c3 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCWin64EH.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
@@ -37,6 +38,7 @@
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 WinException::WinException(AsmPrinter *A) : EHStreamer(A) {
@@ -62,9 +64,9 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   // If any landing pads survive, we need an EH table.
   bool hasLandingPads = !MMI->getLandingPads().empty();
+  bool hasEHFunclets = MMI->hasEHFunclets();
 
   const Function *F = MF->getFunction();
-  const Function *ParentF = MMI->getWinEHParent(F);
 
   shouldEmitMoves = Asm->needsSEHMoves();
 
@@ -78,49 +80,23 @@ void WinException::beginFunction(const MachineFunction *MF) {
     F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
     F->needsUnwindTableEntry();
 
-  shouldEmitPersonality = forceEmitPersonality || (hasLandingPads &&
-    PerEncoding != dwarf::DW_EH_PE_omit && Per);
+  shouldEmitPersonality =
+      forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
+                               PerEncoding != dwarf::DW_EH_PE_omit && Per);
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
   shouldEmitLSDA = shouldEmitPersonality &&
     LSDAEncoding != dwarf::DW_EH_PE_omit;
 
-  // If we're not using CFI, we don't want the CFI or the personality. If
-  // WinEHPrepare outlined something, we should emit the LSDA.
+  // If we're not using CFI, we don't want the CFI or the personality, but we
+  // might want EH tables if we had EH pads.
   if (!Asm->MAI->usesWindowsCFI()) {
-    bool HasOutlinedChildren =
-        F->hasFnAttribute("wineh-parent") && F == ParentF;
-    shouldEmitLSDA = HasOutlinedChildren;
+    shouldEmitLSDA = hasEHFunclets;
     shouldEmitPersonality = false;
     return;
   }
 
-  // If this was an outlined handler, we need to define the label corresponding
-  // to the offset of the parent frame relative to the stack pointer after the
-  // prologue.
-  if (F != ParentF) {
-    WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
-    auto I = FuncInfo.CatchHandlerParentFrameObjOffset.find(F);
-    if (I != FuncInfo.CatchHandlerParentFrameObjOffset.end()) {
-      MCSymbol *HandlerTypeParentFrameOffset =
-          Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
-              GlobalValue::getRealLinkageName(F->getName()));
-
-      // Emit a symbol assignment.
-      Asm->OutStreamer->EmitAssignment(
-          HandlerTypeParentFrameOffset,
-          MCConstantExpr::create(I->second, Asm->OutContext));
-    }
-  }
-
-  if (shouldEmitMoves || shouldEmitPersonality)
-    Asm->OutStreamer->EmitWinCFIStartProc(Asm->CurrentFnSym);
-
-  if (shouldEmitPersonality) {
-    const MCSymbol *PersHandlerSym =
-        TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-    Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
-  }
+  beginFunclet(MF->front(), Asm->CurrentFnSym);
 }
 
 /// endFunction - Gather and emit post-function exception information.
@@ -134,43 +110,158 @@ void WinException::endFunction(const MachineFunction *MF) {
   if (F->hasPersonalityFn())
     Per = classifyEHPersonality(F->getPersonalityFn());
 
-  // Get rid of any dead landing pads if we're not using a Windows EH scheme. In
-  // Windows EH schemes, the landing pad is not actually reachable. It only
-  // exists so that we can emit the right table data.
-  if (!isMSVCEHPersonality(Per))
+  // Get rid of any dead landing pads if we're not using funclets. In funclet
+  // schemes, the landing pad is not actually reachable. It only exists so
+  // that we can emit the right table data.
+  if (!isFuncletEHPersonality(Per))
     MMI->TidyLandingPads();
 
+  endFunclet();
+
+  // endFunclet will emit the necessary .xdata tables for x64 SEH.
+  if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+    return;
+
   if (shouldEmitPersonality || shouldEmitLSDA) {
     Asm->OutStreamer->PushSection();
 
-    if (shouldEmitMoves || shouldEmitPersonality) {
-      // Emit an UNWIND_INFO struct describing the prologue.
-      Asm->OutStreamer->EmitWinEHHandlerData();
-    } else {
-      // Just switch sections to the right xdata section. This use of
-      // CurrentFnSym assumes that we only emit the LSDA when ending the parent
-      // function.
-      MCSection *XData = WinEH::UnwindEmitter::getXDataSection(
-          Asm->CurrentFnSym, Asm->OutContext);
-      Asm->OutStreamer->SwitchSection(XData);
-    }
+    // Just switch sections to the right xdata section. This use of CurrentFnSym
+    // assumes that we only emit the LSDA when ending the parent function.
+    MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym,
+                                                             Asm->OutContext);
+    Asm->OutStreamer->SwitchSection(XData);
 
     // Emit the tables appropriate to the personality function in use. If we
     // don't recognize the personality, assume it uses an Itanium-style LSDA.
     if (Per == EHPersonality::MSVC_Win64SEH)
-      emitCSpecificHandlerTable();
+      emitCSpecificHandlerTable(MF);
     else if (Per == EHPersonality::MSVC_X86SEH)
       emitExceptHandlerTable(MF);
     else if (Per == EHPersonality::MSVC_CXX)
       emitCXXFrameHandler3Table(MF);
+    else if (Per == EHPersonality::CoreCLR)
+      emitCLRExceptionTable(MF);
     else
       emitExceptionTable();
 
     Asm->OutStreamer->PopSection();
   }
+}
 
+/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
+                                   const MachineBasicBlock *MBB) {
+  if (!MBB)
+    return nullptr;
+
+  assert(MBB->isEHFuncletEntry());
+
+  // Give catches and cleanups a name based off of their parent function and
+  // their funclet entry block's number.
+  const MachineFunction *MF = MBB->getParent();
+  const Function *F = MF->getFunction();
+  StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+  MCContext &Ctx = MF->getContext();
+  StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch";
+  return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" +
+                               Twine(MBB->getNumber()) + "@?0?" +
+                               FuncLinkageName + "@4HA");
+}
+
+void WinException::beginFunclet(const MachineBasicBlock &MBB,
+                                MCSymbol *Sym) {
+  CurrentFuncletEntry = &MBB;
+
+  const Function *F = Asm->MF->getFunction();
+  // If a symbol was not provided for the funclet, invent one.
+  if (!Sym) {
+    Sym = getMCSymbolForMBB(Asm, &MBB);
+
+    // Describe our funclet symbol as a function with internal linkage.
+    Asm->OutStreamer->BeginCOFFSymbolDef(Sym);
+    Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC);
+    Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                         << COFF::SCT_COMPLEX_TYPE_SHIFT);
+    Asm->OutStreamer->EndCOFFSymbolDef();
+
+    // We want our funclet's entry point to be aligned such that no nops will be
+    // present after the label.
+    Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()),
+                       F);
+
+    // Now that we've emitted the alignment directive, point at our funclet.
+    Asm->OutStreamer->EmitLabel(Sym);
+  }
+
+  // Mark 'Sym' as starting our funclet.
   if (shouldEmitMoves || shouldEmitPersonality)
+    Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+
+  if (shouldEmitPersonality) {
+    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+    const Function *PerFn = nullptr;
+
+    // Determine which personality routine we are using for this funclet.
+    if (F->hasPersonalityFn())
+      PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+    const MCSymbol *PersHandlerSym =
+        TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
+
+    // Classify the personality routine so that we may reason about it.
+    EHPersonality Per = EHPersonality::Unknown;
+    if (F->hasPersonalityFn())
+      Per = classifyEHPersonality(F->getPersonalityFn());
+
+    // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
+    if (Per != EHPersonality::MSVC_CXX ||
+        !CurrentFuncletEntry->isCleanupFuncletEntry())
+      Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
+  }
+}
+
+void WinException::endFunclet() {
+  // No funclet to process?  Great, we have nothing to do.
+  if (!CurrentFuncletEntry)
+    return;
+
+  if (shouldEmitMoves || shouldEmitPersonality) {
+    const Function *F = Asm->MF->getFunction();
+    EHPersonality Per = EHPersonality::Unknown;
+    if (F->hasPersonalityFn())
+      Per = classifyEHPersonality(F->getPersonalityFn());
+
+    // The .seh_handlerdata directive implicitly switches section, push the
+    // current section so that we may return to it.
+    Asm->OutStreamer->PushSection();
+
+    // Emit an UNWIND_INFO struct describing the prologue.
+    Asm->OutStreamer->EmitWinEHHandlerData();
+
+    if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality &&
+        !CurrentFuncletEntry->isCleanupFuncletEntry()) {
+      // If this is a C++ catch funclet (or the parent function),
+      // emit a reference to the LSDA for the parent function.
+      StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
+      MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
+          Twine("$cppxdata$", FuncLinkageName));
+      Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
+    } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+               !CurrentFuncletEntry->isEHFuncletEntry()) {
+      // If this is the parent function in Win64 SEH, emit the LSDA immediately
+      // following .seh_handlerdata.
+      emitCSpecificHandlerTable(Asm->MF);
+    }
+
+    // Switch back to the previous section now that we are done writing to
+    // .xdata.
+    Asm->OutStreamer->PopSection();
+
+    // Emit a .seh_endproc directive to mark the end of the function.
     Asm->OutStreamer->EmitWinCFIEndProc();
+  }
+
+  // Let's make sure we don't try to end the same funclet twice.
+  CurrentFuncletEntry = nullptr;
 }
 
 const MCExpr *WinException::create32bitRef(const MCSymbol *Value) {
@@ -188,6 +279,202 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
   return create32bitRef(Asm->getSymbol(GV));
 }
 
+const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) {
+  return MCBinaryExpr::createAdd(create32bitRef(Label),
+                                 MCConstantExpr::create(1, Asm->OutContext),
+                                 Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf,
+                                      const MCSymbol *OffsetFrom) {
+  return MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(OffsetOf, Asm->OutContext),
+      MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext);
+}
+
+const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf,
+                                             const MCSymbol *OffsetFrom) {
+  return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom),
+                                 MCConstantExpr::create(1, Asm->OutContext),
+                                 Asm->OutContext);
+}
+
+int WinException::getFrameIndexOffset(int FrameIndex,
+                                      const WinEHFuncInfo &FuncInfo) {
+  const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering();
+  unsigned UnusedReg;
+  if (Asm->MAI->usesWindowsCFI())
+    return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg);
+  // For 32-bit, offsets should be relative to the end of the EH registration
+  // node. For 64-bit, it's relative to SP at the end of the prologue.
+  assert(FuncInfo.EHRegNodeEndOffset != INT_MAX);
+  int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg);
+  Offset += FuncInfo.EHRegNodeEndOffset;
+  return Offset;
+}
+
+namespace {
+
+/// Top-level state used to represent unwind to caller
+const int NullState = -1;
+
+struct InvokeStateChange {
+  /// EH Label immediately after the last invoke in the previous state, or
+  /// nullptr if the previous state was the null state.
+  const MCSymbol *PreviousEndLabel;
+
+  /// EH label immediately before the first invoke in the new state, or nullptr
+  /// if the new state is the null state.
+  const MCSymbol *NewStartLabel;
+
+  /// State of the invoke following NewStartLabel, or NullState to indicate
+  /// the presence of calls which may unwind to caller.
+  int NewState;
+};
+
+/// Iterator that reports all the invoke state changes in a range of machine
+/// basic blocks.  Changes to the null state are reported whenever a call that
+/// may unwind to caller is encountered.  The MBB range is expected to be an
+/// entire function or funclet, and the start and end of the range are treated
+/// as being in the NullState even if there's not an unwind-to-caller call
+/// before the first invoke or after the last one (i.e., the first state change
+/// reported is the first change to something other than NullState, and a
+/// change back to NullState is always reported at the end of iteration).
+class InvokeStateChangeIterator {
+  InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo,
+                            MachineFunction::const_iterator MFI,
+                            MachineFunction::const_iterator MFE,
+                            MachineBasicBlock::const_iterator MBBI,
+                            int BaseState)
+      : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) {
+    LastStateChange.PreviousEndLabel = nullptr;
+    LastStateChange.NewStartLabel = nullptr;
+    LastStateChange.NewState = BaseState;
+    scan();
+  }
+
+public:
+  static iterator_range<InvokeStateChangeIterator>
+  range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin,
+        MachineFunction::const_iterator End, int BaseState = NullState) {
+    // Reject empty ranges to simplify bookkeeping by ensuring that we can get
+    // the end of the last block.
+    assert(Begin != End);
+    auto BlockBegin = Begin->begin();
+    auto BlockEnd = std::prev(End)->end();
+    return make_range(
+        InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState),
+        InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState));
+  }
+
+  // Iterator methods.
+  bool operator==(const InvokeStateChangeIterator &O) const {
+    assert(BaseState == O.BaseState);
+    // Must be visiting same block.
+    if (MFI != O.MFI)
+      return false;
+    // Must be visiting same isntr.
+    if (MBBI != O.MBBI)
+      return false;
+    // At end of block/instr iteration, we can still have two distinct states:
+    // one to report the final EndLabel, and another indicating the end of the
+    // state change iteration.  Check for CurrentEndLabel equality to
+    // distinguish these.
+    return CurrentEndLabel == O.CurrentEndLabel;
+  }
+
+  bool operator!=(const InvokeStateChangeIterator &O) const {
+    return !operator==(O);
+  }
+  InvokeStateChange &operator*() { return LastStateChange; }
+  InvokeStateChange *operator->() { return &LastStateChange; }
+  InvokeStateChangeIterator &operator++() { return scan(); }
+
+private:
+  InvokeStateChangeIterator &scan();
+
+  const WinEHFuncInfo &EHInfo;
+  const MCSymbol *CurrentEndLabel = nullptr;
+  MachineFunction::const_iterator MFI;
+  MachineFunction::const_iterator MFE;
+  MachineBasicBlock::const_iterator MBBI;
+  InvokeStateChange LastStateChange;
+  bool VisitingInvoke = false;
+  int BaseState;
+};
+
+} // end anonymous namespace
+
+InvokeStateChangeIterator &InvokeStateChangeIterator::scan() {
+  bool IsNewBlock = false;
+  for (; MFI != MFE; ++MFI, IsNewBlock = true) {
+    if (IsNewBlock)
+      MBBI = MFI->begin();
+    for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+      const MachineInstr &MI = *MBBI;
+      if (!VisitingInvoke && LastStateChange.NewState != BaseState &&
+          MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) {
+        // Indicate a change of state to the null state.  We don't have
+        // start/end EH labels handy but the caller won't expect them for
+        // null state regions.
+        LastStateChange.PreviousEndLabel = CurrentEndLabel;
+        LastStateChange.NewStartLabel = nullptr;
+        LastStateChange.NewState = BaseState;
+        CurrentEndLabel = nullptr;
+        // Don't re-visit this instr on the next scan
+        ++MBBI;
+        return *this;
+      }
+
+      // All other state changes are at EH labels before/after invokes.
+      if (!MI.isEHLabel())
+        continue;
+      MCSymbol *Label = MI.getOperand(0).getMCSymbol();
+      if (Label == CurrentEndLabel) {
+        VisitingInvoke = false;
+        continue;
+      }
+      auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label);
+      // Ignore EH labels that aren't the ones inserted before an invoke
+      if (InvokeMapIter == EHInfo.LabelToStateMap.end())
+        continue;
+      auto &StateAndEnd = InvokeMapIter->second;
+      int NewState = StateAndEnd.first;
+      // Keep track of the fact that we're between EH start/end labels so
+      // we know not to treat the inoke we'll see as unwinding to caller.
+      VisitingInvoke = true;
+      if (NewState == LastStateChange.NewState) {
+        // The state isn't actually changing here.  Record the new end and
+        // keep going.
+        CurrentEndLabel = StateAndEnd.second;
+        continue;
+      }
+      // Found a state change to report
+      LastStateChange.PreviousEndLabel = CurrentEndLabel;
+      LastStateChange.NewStartLabel = Label;
+      LastStateChange.NewState = NewState;
+      // Start keeping track of the new current end
+      CurrentEndLabel = StateAndEnd.second;
+      // Don't re-visit this instr on the next scan
+      ++MBBI;
+      return *this;
+    }
+  }
+  // Iteration hit the end of the block range.
+  if (LastStateChange.NewState != BaseState) {
+    // Report the end of the last new state
+    LastStateChange.PreviousEndLabel = CurrentEndLabel;
+    LastStateChange.NewStartLabel = nullptr;
+    LastStateChange.NewState = BaseState;
+    // Leave CurrentEndLabel non-null to distinguish this state from end.
+    assert(CurrentEndLabel != nullptr);
+    return *this;
+  }
+  // We've reported all state changes and hit the end state.
+  CurrentEndLabel = nullptr;
+  return *this;
+}
+
 /// Emit the language-specific data that __C_specific_handler expects.  This
 /// handler lives in the x64 Microsoft C runtime and allows catching or cleaning
 /// up after faults with __try, __except, and __finally.  The typeinfo values
@@ -216,135 +503,156 @@ const MCExpr *WinException::create32bitRef(const GlobalValue *GV) {
 ///       imagerel32 LabelLPad;        // Zero means __finally.
 ///     } Entries[NumEntries];
 ///   };
-void WinException::emitCSpecificHandlerTable() {
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
+  auto &OS = *Asm->OutStreamer;
+  MCContext &Ctx = Asm->OutContext;
+  const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
 
-  // Simplifying assumptions for first implementation:
-  // - Cleanups are not implemented.
-  // - Filters are not implemented.
+  bool VerboseAsm = OS.isVerboseAsm();
+  auto AddComment = [&](const Twine &Comment) {
+    if (VerboseAsm)
+      OS.AddComment(Comment);
+  };
 
-  // The Itanium LSDA table sorts similar landing pads together to simplify the
-  // actions table, but we don't need that.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  LandingPads.reserve(PadInfos.size());
-  for (const auto &LP : PadInfos)
-    LandingPads.push_back(&LP);
+  // Emit a label assignment with the SEH frame offset so we can use it for
+  // llvm.x86.seh.recoverfp.
+  StringRef FLinkageName =
+      GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+  MCSymbol *ParentFrameOffset =
+      Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+  const MCExpr *MCOffset =
+      MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
 
-  // Compute label ranges for call sites as we would for the Itanium LSDA, but
-  // use an all zero action table because we aren't using these actions.
-  SmallVector<unsigned, 64> FirstActions;
-  FirstActions.resize(LandingPads.size());
-  SmallVector<CallSiteEntry, 64> CallSites;
-  computeCallSiteTable(CallSites, LandingPads, FirstActions);
+  // Use the assembler to compute the number of table entries through label
+  // difference and division.
+  MCSymbol *TableBegin =
+      Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true);
+  MCSymbol *TableEnd =
+      Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true);
+  const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin);
+  const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx);
+  const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx);
+  AddComment("Number of call sites");
+  OS.EmitValue(EntryCount, 4);
 
-  MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin();
-  MCSymbol *EHFuncEndSym = Asm->getFunctionEnd();
+  OS.EmitLabel(TableBegin);
 
-  // Emit the number of table entries.
-  unsigned NumEntries = 0;
-  for (const CallSiteEntry &CSE : CallSites) {
-    if (!CSE.LPad)
-      continue; // Ignore gaps.
-    NumEntries += CSE.LPad->SEHHandlers.size();
+  // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only
+  // models exceptions from invokes. LLVM also allows arbitrary reordering of
+  // the code, so our tables end up looking a bit different. Rather than
+  // trying to match MSVC's tables exactly, we emit a denormalized table.  For
+  // each range of invokes in the same state, we emit table entries for all
+  // the actions that would be taken in that state. This means our tables are
+  // slightly bigger, which is OK.
+  const MCSymbol *LastStartLabel = nullptr;
+  int LastEHState = -1;
+  // Break out before we enter into a finally funclet.
+  // FIXME: We need to emit separate EH tables for cleanups.
+  MachineFunction::const_iterator End = MF->end();
+  MachineFunction::const_iterator Stop = std::next(MF->begin());
+  while (Stop != End && !Stop->isEHFuncletEntry())
+    ++Stop;
+  for (const auto &StateChange :
+       InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) {
+    // Emit all the actions for the state we just transitioned out of
+    // if it was not the null state
+    if (LastEHState != -1)
+      emitSEHActionsForRange(FuncInfo, LastStartLabel,
+                             StateChange.PreviousEndLabel, LastEHState);
+    LastStartLabel = StateChange.NewStartLabel;
+    LastEHState = StateChange.NewState;
   }
-  Asm->OutStreamer->EmitIntValue(NumEntries, 4);
 
-  // If there are no actions, we don't need to iterate again.
-  if (NumEntries == 0)
-    return;
+  OS.EmitLabel(TableEnd);
+}
 
-  // Emit the four-label records for each call site entry. The table has to be
-  // sorted in layout order, and the call sites should already be sorted.
-  for (const CallSiteEntry &CSE : CallSites) {
-    // Ignore gaps. Unlike the Itanium model, unwinding through a frame without
-    // an EH table entry will propagate the exception rather than terminating
-    // the program.
-    if (!CSE.LPad)
-      continue;
-    const LandingPadInfo *LPad = CSE.LPad;
+void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+                                          const MCSymbol *BeginLabel,
+                                          const MCSymbol *EndLabel, int State) {
+  auto &OS = *Asm->OutStreamer;
+  MCContext &Ctx = Asm->OutContext;
 
-    // Compute the label range. We may reuse the function begin and end labels
-    // rather than forming new ones.
-    const MCExpr *Begin =
-        create32bitRef(CSE.BeginLabel ? CSE.BeginLabel : EHFuncBeginSym);
-    const MCExpr *End;
-    if (CSE.EndLabel) {
-      // The interval is half-open, so we have to add one to include the return
-      // address of the last invoke in the range.
-      End = MCBinaryExpr::createAdd(create32bitRef(CSE.EndLabel),
-                                    MCConstantExpr::create(1, Asm->OutContext),
-                                    Asm->OutContext);
+  bool VerboseAsm = OS.isVerboseAsm();
+  auto AddComment = [&](const Twine &Comment) {
+    if (VerboseAsm)
+      OS.AddComment(Comment);
+  };
+
+  assert(BeginLabel && EndLabel);
+  while (State != -1) {
+    const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
+    const MCExpr *FilterOrFinally;
+    const MCExpr *ExceptOrNull;
+    auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+    if (UME.IsFinally) {
+      FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
+      ExceptOrNull = MCConstantExpr::create(0, Ctx);
     } else {
-      End = create32bitRef(EHFuncEndSym);
+      // For an except, the filter can be 1 (catch-all) or a function
+      // label.
+      FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter)
+                                   : MCConstantExpr::create(1, Ctx);
+      ExceptOrNull = create32bitRef(Handler->getSymbol());
     }
 
-    // Emit an entry for each action.
-    for (SEHHandler Handler : LPad->SEHHandlers) {
-      Asm->OutStreamer->EmitValue(Begin, 4);
-      Asm->OutStreamer->EmitValue(End, 4);
+    AddComment("LabelStart");
+    OS.EmitValue(getLabelPlusOne(BeginLabel), 4);
+    AddComment("LabelEnd");
+    OS.EmitValue(getLabelPlusOne(EndLabel), 4);
+    AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction"
+                                                             : "CatchAll");
+    OS.EmitValue(FilterOrFinally, 4);
+    AddComment(UME.IsFinally ? "Null" : "ExceptionHandler");
+    OS.EmitValue(ExceptOrNull, 4);
 
-      // Emit the filter or finally function pointer, if present. Otherwise,
-      // emit '1' to indicate a catch-all.
-      const Function *F = Handler.FilterOrFinally;
-      if (F)
-        Asm->OutStreamer->EmitValue(create32bitRef(Asm->getSymbol(F)), 4);
-      else
-        Asm->OutStreamer->EmitIntValue(1, 4);
-
-      // Emit the recovery address, if present. Otherwise, this must be a
-      // finally.
-      const BlockAddress *BA = Handler.RecoverBA;
-      if (BA)
-        Asm->OutStreamer->EmitValue(
-            create32bitRef(Asm->GetBlockAddressSymbol(BA)), 4);
-      else
-        Asm->OutStreamer->EmitIntValue(0, 4);
-    }
+    assert(UME.ToState < State && "states should decrease");
+    State = UME.ToState;
   }
 }
 
 void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
-  const Function *ParentF = MMI->getWinEHParent(F);
   auto &OS = *Asm->OutStreamer;
-  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(ParentF);
+  const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
 
-  StringRef ParentLinkageName =
-      GlobalValue::getRealLinkageName(ParentF->getName());
+  StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName());
 
+  SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable;
   MCSymbol *FuncInfoXData = nullptr;
   if (shouldEmitPersonality) {
-    FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
-        Twine("$cppxdata$", ParentLinkageName));
-    OS.EmitValue(create32bitRef(FuncInfoXData), 4);
-
-    extendIP2StateTable(MF, ParentF, FuncInfo);
-
-    // Defer emission until we've visited the parent function and all the catch
-    // handlers.  Cleanups don't contribute to the ip2state table, so don't count
-    // them.
-    if (ParentF != F && !FuncInfo.CatchHandlerMaxState.count(F))
-      return;
-    ++FuncInfo.NumIPToStateFuncsVisited;
-    if (FuncInfo.NumIPToStateFuncsVisited != FuncInfo.CatchHandlerMaxState.size())
-      return;
+    // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from
+    // IPs to state numbers.
+    FuncInfoXData =
+        Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName));
+    computeIP2StateTable(MF, FuncInfo, IPToStateTable);
   } else {
-    FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(ParentLinkageName);
-    emitEHRegistrationOffsetLabel(FuncInfo, ParentLinkageName);
+    FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName);
   }
 
+  int UnwindHelpOffset = 0;
+  if (Asm->MAI->usesWindowsCFI())
+    UnwindHelpOffset =
+        getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo);
+
   MCSymbol *UnwindMapXData = nullptr;
   MCSymbol *TryBlockMapXData = nullptr;
   MCSymbol *IPToStateXData = nullptr;
-  if (!FuncInfo.UnwindMap.empty())
+  if (!FuncInfo.CxxUnwindMap.empty())
     UnwindMapXData = Asm->OutContext.getOrCreateSymbol(
-        Twine("$stateUnwindMap$", ParentLinkageName));
+        Twine("$stateUnwindMap$", FuncLinkageName));
   if (!FuncInfo.TryBlockMap.empty())
-    TryBlockMapXData = Asm->OutContext.getOrCreateSymbol(
-        Twine("$tryMap$", ParentLinkageName));
-  if (!FuncInfo.IPToStateList.empty())
-    IPToStateXData = Asm->OutContext.getOrCreateSymbol(
-        Twine("$ip2state$", ParentLinkageName));
+    TryBlockMapXData =
+        Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName));
+  if (!IPToStateTable.empty())
+    IPToStateXData =
+        Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName));
+
+  bool VerboseAsm = OS.isVerboseAsm();
+  auto AddComment = [&](const Twine &Comment) {
+    if (VerboseAsm)
+      OS.AddComment(Comment);
+  };
 
   // FuncInfo {
   //   uint32_t           MagicNumber
@@ -363,17 +671,38 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
   // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue.
   OS.EmitValueToAlignment(4);
   OS.EmitLabel(FuncInfoXData);
-  OS.EmitIntValue(0x19930522, 4);                      // MagicNumber
-  OS.EmitIntValue(FuncInfo.UnwindMap.size(), 4);       // MaxState
-  OS.EmitValue(create32bitRef(UnwindMapXData), 4);     // UnwindMap
-  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);     // NumTryBlocks
-  OS.EmitValue(create32bitRef(TryBlockMapXData), 4);   // TryBlockMap
-  OS.EmitIntValue(FuncInfo.IPToStateList.size(), 4);   // IPMapEntries
-  OS.EmitValue(create32bitRef(IPToStateXData), 4);     // IPToStateMap
-  if (Asm->MAI->usesWindowsCFI())
-    OS.EmitIntValue(FuncInfo.UnwindHelpFrameOffset, 4); // UnwindHelp
-  OS.EmitIntValue(0, 4);                               // ESTypeList
-  OS.EmitIntValue(1, 4);                               // EHFlags
+
+  AddComment("MagicNumber");
+  OS.EmitIntValue(0x19930522, 4);
+
+  AddComment("MaxState");
+  OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4);
+
+  AddComment("UnwindMap");
+  OS.EmitValue(create32bitRef(UnwindMapXData), 4);
+
+  AddComment("NumTryBlocks");
+  OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4);
+
+  AddComment("TryBlockMap");
+  OS.EmitValue(create32bitRef(TryBlockMapXData), 4);
+
+  AddComment("IPMapEntries");
+  OS.EmitIntValue(IPToStateTable.size(), 4);
+
+  AddComment("IPToStateXData");
+  OS.EmitValue(create32bitRef(IPToStateXData), 4);
+
+  if (Asm->MAI->usesWindowsCFI()) {
+    AddComment("UnwindHelp");
+    OS.EmitIntValue(UnwindHelpOffset, 4);
+  }
+
+  AddComment("ESTypeList");
+  OS.EmitIntValue(0, 4);
+
+  AddComment("EHFlags");
+  OS.EmitIntValue(1, 4);
 
   // UnwindMapEntry {
   //   int32_t ToState;
@@ -381,9 +710,14 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
   // };
   if (UnwindMapXData) {
     OS.EmitLabel(UnwindMapXData);
-    for (const WinEHUnwindMapEntry &UME : FuncInfo.UnwindMap) {
-      OS.EmitIntValue(UME.ToState, 4);                // ToState
-      OS.EmitValue(create32bitRef(UME.Cleanup), 4);   // Action
+    for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
+      MCSymbol *CleanupSym =
+          getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+      AddComment("ToState");
+      OS.EmitIntValue(UME.ToState, 4);
+
+      AddComment("Action");
+      OS.EmitValue(create32bitRef(CleanupSym), 4);
     }
   }
 
@@ -398,33 +732,49 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
     OS.EmitLabel(TryBlockMapXData);
     SmallVector<MCSymbol *, 1> HandlerMaps;
     for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
-      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
-      MCSymbol *HandlerMapXData = nullptr;
+      const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
 
+      MCSymbol *HandlerMapXData = nullptr;
       if (!TBME.HandlerArray.empty())
         HandlerMapXData =
             Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$")
                                                   .concat(Twine(I))
                                                   .concat("$")
-                                                  .concat(ParentLinkageName));
-
+                                                  .concat(FuncLinkageName));
       HandlerMaps.push_back(HandlerMapXData);
 
-      int CatchHigh = -1;
-      for (WinEHHandlerType &HT : TBME.HandlerArray)
-        CatchHigh =
-            std::max(CatchHigh, FuncInfo.CatchHandlerMaxState[HT.Handler]);
+      // TBMEs should form intervals.
+      assert(0 <= TBME.TryLow && "bad trymap interval");
+      assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval");
+      assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval");
+      assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) &&
+             "bad trymap interval");
 
-      assert(TBME.TryLow <= TBME.TryHigh);
-      OS.EmitIntValue(TBME.TryLow, 4);                    // TryLow
-      OS.EmitIntValue(TBME.TryHigh, 4);                   // TryHigh
-      OS.EmitIntValue(CatchHigh, 4);                      // CatchHigh
-      OS.EmitIntValue(TBME.HandlerArray.size(), 4);       // NumCatches
-      OS.EmitValue(create32bitRef(HandlerMapXData), 4);   // HandlerArray
+      AddComment("TryLow");
+      OS.EmitIntValue(TBME.TryLow, 4);
+
+      AddComment("TryHigh");
+      OS.EmitIntValue(TBME.TryHigh, 4);
+
+      AddComment("CatchHigh");
+      OS.EmitIntValue(TBME.CatchHigh, 4);
+
+      AddComment("NumCatches");
+      OS.EmitIntValue(TBME.HandlerArray.size(), 4);
+
+      AddComment("HandlerArray");
+      OS.EmitValue(create32bitRef(HandlerMapXData), 4);
+    }
+
+    // All funclets use the same parent frame offset currently.
+    unsigned ParentFrameOffset = 0;
+    if (shouldEmitPersonality) {
+      const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
+      ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF);
     }
 
     for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) {
-      WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
+      const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I];
       MCSymbol *HandlerMapXData = HandlerMaps[I];
       if (!HandlerMapXData)
         continue;
@@ -438,32 +788,34 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
       OS.EmitLabel(HandlerMapXData);
       for (const WinEHHandlerType &HT : TBME.HandlerArray) {
         // Get the frame escape label with the offset of the catch object. If
-        // the index is -1, then there is no catch object, and we should emit an
-        // offset of zero, indicating that no copy will occur.
+        // the index is INT_MAX, then there is no catch object, and we should
+        // emit an offset of zero, indicating that no copy will occur.
         const MCExpr *FrameAllocOffsetRef = nullptr;
-        if (HT.CatchObjRecoverIdx >= 0) {
-          MCSymbol *FrameAllocOffset =
-              Asm->OutContext.getOrCreateFrameAllocSymbol(
-                  GlobalValue::getRealLinkageName(ParentF->getName()),
-                  HT.CatchObjRecoverIdx);
-          FrameAllocOffsetRef = MCSymbolRefExpr::create(
-              FrameAllocOffset, MCSymbolRefExpr::VK_None, Asm->OutContext);
+        if (HT.CatchObj.FrameIndex != INT_MAX) {
+          int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo);
+          FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext);
         } else {
           FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
         }
 
-        OS.EmitIntValue(HT.Adjectives, 4);                    // Adjectives
-        OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);   // Type
-        OS.EmitValue(FrameAllocOffsetRef, 4);                 // CatchObjOffset
-        OS.EmitValue(create32bitRef(HT.Handler), 4);          // Handler
+        MCSymbol *HandlerSym =
+            getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+
+        AddComment("Adjectives");
+        OS.EmitIntValue(HT.Adjectives, 4);
+
+        AddComment("Type");
+        OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4);
+
+        AddComment("CatchObjOffset");
+        OS.EmitValue(FrameAllocOffsetRef, 4);
+
+        AddComment("Handler");
+        OS.EmitValue(create32bitRef(HandlerSym), 4);
 
         if (shouldEmitPersonality) {
-          MCSymbol *ParentFrameOffset =
-              Asm->OutContext.getOrCreateParentFrameOffsetSymbol(
-                  GlobalValue::getRealLinkageName(HT.Handler->getName()));
-          const MCSymbolRefExpr *ParentFrameOffsetRef = MCSymbolRefExpr::create(
-              ParentFrameOffset, Asm->OutContext);
-          OS.EmitValue(ParentFrameOffsetRef, 4); // ParentFrameOffset
+          AddComment("ParentFrameOffset");
+          OS.EmitIntValue(ParentFrameOffset, 4);
         }
       }
     }
@@ -475,87 +827,65 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
   // };
   if (IPToStateXData) {
     OS.EmitLabel(IPToStateXData);
-    for (auto &IPStatePair : FuncInfo.IPToStateList) {
-      OS.EmitValue(create32bitRef(IPStatePair.first), 4);   // IP
-      OS.EmitIntValue(IPStatePair.second, 4);               // State
+    for (auto &IPStatePair : IPToStateTable) {
+      AddComment("IP");
+      OS.EmitValue(IPStatePair.first, 4);
+      AddComment("ToState");
+      OS.EmitIntValue(IPStatePair.second, 4);
     }
   }
 }
 
-void WinException::extendIP2StateTable(const MachineFunction *MF,
-                                       const Function *ParentF,
-                                       WinEHFuncInfo &FuncInfo) {
-  const Function *F = MF->getFunction();
+void WinException::computeIP2StateTable(
+    const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+    SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) {
 
-  // The Itanium LSDA table sorts similar landing pads together to simplify the
-  // actions table, but we don't need that.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  LandingPads.reserve(PadInfos.size());
-  for (const auto &LP : PadInfos)
-    LandingPads.push_back(&LP);
-
-  RangeMapType PadMap;
-  computePadMap(LandingPads, PadMap);
-
-  // The end label of the previous invoke or nounwind try-range.
-  MCSymbol *LastLabel = Asm->getFunctionBegin();
-
-  // Whether there is a potentially throwing instruction (currently this means
-  // an ordinary call) between the end of the previous try-range and now.
-  bool SawPotentiallyThrowing = false;
-
-  int LastEHState = -2;
-
-  // The parent function and the catch handlers contribute to the 'ip2state'
-  // table.
-
-  // Include ip2state entries for the beginning of the main function and
-  // for catch handler functions.
-  if (F == ParentF) {
-    FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
-    LastEHState = -1;
-  } else if (FuncInfo.HandlerBaseState.count(F)) {
-    FuncInfo.IPToStateList.push_back(
-        std::make_pair(LastLabel, FuncInfo.HandlerBaseState[F]));
-    LastEHState = FuncInfo.HandlerBaseState[F];
-  }
-  for (const auto &MBB : *MF) {
-    for (const auto &MI : MBB) {
-      if (!MI.isEHLabel()) {
-        if (MI.isCall())
-          SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI);
-        continue;
+  for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+                                       FuncletEnd = MF->begin(),
+                                       End = MF->end();
+       FuncletStart != End; FuncletStart = FuncletEnd) {
+    // Find the end of the funclet
+    while (++FuncletEnd != End) {
+      if (FuncletEnd->isEHFuncletEntry()) {
+        break;
       }
+    }
 
-      // End of the previous try-range?
-      MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
-      if (BeginLabel == LastLabel)
-        SawPotentiallyThrowing = false;
+    // Don't emit ip2state entries for cleanup funclets. Any interesting
+    // exceptional actions in cleanups must be handled in a separate IR
+    // function.
+    if (FuncletStart->isCleanupFuncletEntry())
+      continue;
 
-      // Beginning of a new try-range?
-      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
-      if (L == PadMap.end())
-        // Nope, it was just some random label.
-        continue;
+    MCSymbol *StartLabel;
+    int BaseState;
+    if (FuncletStart == MF->begin()) {
+      BaseState = NullState;
+      StartLabel = Asm->getFunctionBegin();
+    } else {
+      auto *FuncletPad =
+          cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI());
+      assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0);
+      BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second;
+      StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart);
+    }
+    assert(StartLabel && "need local function start label");
+    IPToStateTable.push_back(
+        std::make_pair(create32bitRef(StartLabel), BaseState));
 
-      const PadRange &P = L->second;
-      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
-             "Inconsistent landing pad map!");
-
-      // FIXME: Should this be using FuncInfo.HandlerBaseState?
-      if (SawPotentiallyThrowing && LastEHState != -1) {
-        FuncInfo.IPToStateList.push_back(std::make_pair(LastLabel, -1));
-        SawPotentiallyThrowing = false;
-        LastEHState = -1;
-      }
-
-      if (LandingPad->WinEHState != LastEHState)
-        FuncInfo.IPToStateList.push_back(
-            std::make_pair(BeginLabel, LandingPad->WinEHState));
-      LastEHState = LandingPad->WinEHState;
-      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+    for (const auto &StateChange : InvokeStateChangeIterator::range(
+             FuncInfo, FuncletStart, FuncletEnd, BaseState)) {
+      // Compute the label to report as the start of this entry; use the EH
+      // start label for the invoke if we have one, otherwise (this is a call
+      // which may unwind to our caller and does not have an EH start label, so)
+      // use the previous end label.
+      const MCSymbol *ChangeLabel = StateChange.NewStartLabel;
+      if (!ChangeLabel)
+        ChangeLabel = StateChange.PreviousEndLabel;
+      // Emit an entry indicating that PCs after 'Label' have this EH state.
+      IPToStateTable.push_back(
+          std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState));
+      // FIXME: assert that NewState is between CatchLow and CatchHigh.
     }
   }
 }
@@ -566,15 +896,15 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   // registration in order to recover the parent frame pointer. Now that we know
   // we've code generated the parent, we can emit the label assignment that
   // those helpers use to get the offset of the registration node.
-  assert(FuncInfo.EHRegNodeEscapeIndex != INT_MAX &&
-         "no EH reg node localescape index");
+  MCContext &Ctx = Asm->OutContext;
   MCSymbol *ParentFrameOffset =
-      Asm->OutContext.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  MCSymbol *RegistrationOffsetSym = Asm->OutContext.getOrCreateFrameAllocSymbol(
-      FLinkageName, FuncInfo.EHRegNodeEscapeIndex);
-  const MCExpr *RegistrationOffsetSymRef =
-      MCSymbolRefExpr::create(RegistrationOffsetSym, Asm->OutContext);
-  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, RegistrationOffsetSymRef);
+      Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+  unsigned UnusedReg;
+  const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+  int64_t Offset = TFI->getFrameIndexReference(
+      *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
+  const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
+  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
 }
 
 /// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -585,7 +915,13 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
   StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName());
 
-  WinEHFuncInfo &FuncInfo = MMI->getWinEHFuncInfo(F);
+  bool VerboseAsm = OS.isVerboseAsm();
+  auto AddComment = [&](const Twine &Comment) {
+    if (VerboseAsm)
+      OS.AddComment(Comment);
+  };
+
+  const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
   emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
 
   // Emit the __ehtable label that we use for llvm.x86.seh.lsda.
@@ -611,58 +947,290 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
     //
     // Only the EHCookieOffset field appears to vary, and it appears to be the
     // offset from the final saved SP value to the retaddr.
+    AddComment("GSCookieOffset");
     OS.EmitIntValue(-2, 4);
+    AddComment("GSCookieXOROffset");
     OS.EmitIntValue(0, 4);
     // FIXME: Calculate.
+    AddComment("EHCookieOffset");
     OS.EmitIntValue(9999, 4);
+    AddComment("EHCookieXOROffset");
     OS.EmitIntValue(0, 4);
     BaseState = -2;
   }
 
-  // Build a list of pointers to LandingPadInfos and then sort by WinEHState.
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  SmallVector<const LandingPadInfo *, 4> LPads;
-  LPads.reserve((PadInfos.size()));
-  for (const LandingPadInfo &LPInfo : PadInfos)
-    LPads.push_back(&LPInfo);
-  std::sort(LPads.begin(), LPads.end(),
-            [](const LandingPadInfo *L, const LandingPadInfo *R) {
-              return L->WinEHState < R->WinEHState;
-            });
-
-  // For each action in each lpad, emit one of these:
-  // struct ScopeTableEntry {
-  //   int32_t EnclosingLevel;
-  //   int32_t (__cdecl *Filter)();
-  //   void *HandlerOrFinally;
-  // };
-  //
-  // The "outermost" action will use BaseState as its enclosing level. Each
-  // other action will refer to the previous state as its enclosing level.
-  int CurState = 0;
-  for (const LandingPadInfo *LPInfo : LPads) {
-    int EnclosingLevel = BaseState;
-    assert(CurState + int(LPInfo->SEHHandlers.size()) - 1 ==
-               LPInfo->WinEHState &&
-           "gaps in the SEH scope table");
-    for (auto I = LPInfo->SEHHandlers.rbegin(), E = LPInfo->SEHHandlers.rend();
-         I != E; ++I) {
-      const SEHHandler &Handler = *I;
-      const BlockAddress *BA = Handler.RecoverBA;
-      const Function *F = Handler.FilterOrFinally;
-      assert(F && "cannot catch all in 32-bit SEH without filter function");
-      const MCExpr *FilterOrNull =
-          create32bitRef(BA ? Asm->getSymbol(F) : nullptr);
-      const MCExpr *ExceptOrFinally = create32bitRef(
-          BA ? Asm->GetBlockAddressSymbol(BA) : Asm->getSymbol(F));
-
-      OS.EmitIntValue(EnclosingLevel, 4);
-      OS.EmitValue(FilterOrNull, 4);
-      OS.EmitValue(ExceptOrFinally, 4);
-
-      // The next state unwinds to this state.
-      EnclosingLevel = CurState;
-      CurState++;
-    }
+  assert(!FuncInfo.SEHUnwindMap.empty());
+  for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
+    auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+    const MCSymbol *ExceptOrFinally =
+        UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
+    // -1 is usually the base state for "unwind to caller", but for
+    // _except_handler4 it's -2. Do that replacement here if necessary.
+    int ToState = UME.ToState == -1 ? BaseState : UME.ToState;
+    AddComment("ToState");
+    OS.EmitIntValue(ToState, 4);
+    AddComment(UME.IsFinally ? "Null" : "FilterFunction");
+    OS.EmitValue(create32bitRef(UME.Filter), 4);
+    AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler");
+    OS.EmitValue(create32bitRef(ExceptOrFinally), 4);
+  }
+}
+
+static int getRank(const WinEHFuncInfo &FuncInfo, int State) {
+  int Rank = 0;
+  while (State != -1) {
+    ++Rank;
+    State = FuncInfo.ClrEHUnwindMap[State].Parent;
+  }
+  return Rank;
+}
+
+static int getAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) {
+  int LeftRank = getRank(FuncInfo, Left);
+  int RightRank = getRank(FuncInfo, Right);
+
+  while (LeftRank < RightRank) {
+    Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+    --RightRank;
+  }
+
+  while (RightRank < LeftRank) {
+    Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+    --LeftRank;
+  }
+
+  while (Left != Right) {
+    Left = FuncInfo.ClrEHUnwindMap[Left].Parent;
+    Right = FuncInfo.ClrEHUnwindMap[Right].Parent;
+  }
+
+  return Left;
+}
+
+void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
+  // CLR EH "states" are really just IDs that identify handlers/funclets;
+  // states, handlers, and funclets all have 1:1 mappings between them, and a
+  // handler/funclet's "state" is its index in the ClrEHUnwindMap.
+  MCStreamer &OS = *Asm->OutStreamer;
+  const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+  MCSymbol *FuncBeginSym = Asm->getFunctionBegin();
+  MCSymbol *FuncEndSym = Asm->getFunctionEnd();
+
+  // A ClrClause describes a protected region.
+  struct ClrClause {
+    const MCSymbol *StartLabel; // Start of protected region
+    const MCSymbol *EndLabel;   // End of protected region
+    int State;          // Index of handler protecting the protected region
+    int EnclosingState; // Index of funclet enclosing the protected region
+  };
+  SmallVector<ClrClause, 8> Clauses;
+
+  // Build a map from handler MBBs to their corresponding states (i.e. their
+  // indices in the ClrEHUnwindMap).
+  int NumStates = FuncInfo.ClrEHUnwindMap.size();
+  assert(NumStates > 0 && "Don't need exception table!");
+  DenseMap<const MachineBasicBlock *, int> HandlerStates;
+  for (int State = 0; State < NumStates; ++State) {
+    MachineBasicBlock *HandlerBlock =
+        FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+    HandlerStates[HandlerBlock] = State;
+    // Use this loop through all handlers to verify our assumption (used in
+    // the MinEnclosingState computation) that ancestors have lower state
+    // numbers than their descendants.
+    assert(FuncInfo.ClrEHUnwindMap[State].Parent < State &&
+           "ill-formed state numbering");
+  }
+  // Map the main function to the NullState.
+  HandlerStates[&MF->front()] = NullState;
+
+  // Write out a sentinel indicating the end of the standard (Windows) xdata
+  // and the start of the additional (CLR) info.
+  OS.EmitIntValue(0xffffffff, 4);
+  // Write out the number of funclets
+  OS.EmitIntValue(NumStates, 4);
+
+  // Walk the machine blocks/instrs, computing and emitting a few things:
+  // 1. Emit a list of the offsets to each handler entry, in lexical order.
+  // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end.
+  // 3. Compute the list of ClrClauses, in the required order (inner before
+  //    outer, earlier before later; the order by which a forward scan with
+  //    early termination will find the innermost enclosing clause covering
+  //    a given address).
+  // 4. A map (MinClauseMap) from each handler index to the index of the
+  //    outermost funclet/function which contains a try clause targeting the
+  //    key handler.  This will be used to determine IsDuplicate-ness when
+  //    emitting ClrClauses.  The NullState value is used to indicate that the
+  //    top-level function contains a try clause targeting the key handler.
+  // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for
+  // try regions we entered before entering the PendingState try but which
+  // we haven't yet exited.
+  SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack;
+  // EndSymbolMap and MinClauseMap are maps described above.
+  std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]);
+  SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates);
+
+  // Visit the root function and each funclet.
+
+  for (MachineFunction::const_iterator FuncletStart = MF->begin(),
+                                       FuncletEnd = MF->begin(),
+                                       End = MF->end();
+       FuncletStart != End; FuncletStart = FuncletEnd) {
+    int FuncletState = HandlerStates[&*FuncletStart];
+    // Find the end of the funclet
+    MCSymbol *EndSymbol = FuncEndSym;
+    while (++FuncletEnd != End) {
+      if (FuncletEnd->isEHFuncletEntry()) {
+        EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd);
+        break;
+      }
+    }
+    // Emit the function/funclet end and, if this is a funclet (and not the
+    // root function), record it in the EndSymbolMap.
+    OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4);
+    if (FuncletState != NullState) {
+      // Record the end of the handler.
+      EndSymbolMap[FuncletState] = EndSymbol;
+    }
+
+    // Walk the state changes in this function/funclet and compute its clauses.
+    // Funclets always start in the null state.
+    const MCSymbol *CurrentStartLabel = nullptr;
+    int CurrentState = NullState;
+    assert(HandlerStack.empty());
+    for (const auto &StateChange :
+         InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) {
+      // Close any try regions we're not still under
+      int AncestorState =
+          getAncestor(FuncInfo, CurrentState, StateChange.NewState);
+      while (CurrentState != AncestorState) {
+        assert(CurrentState != NullState && "Failed to find ancestor!");
+        // Close the pending clause
+        Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel,
+                           CurrentState, FuncletState});
+        // Now the parent handler is current
+        CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].Parent;
+        // Pop the new start label from the handler stack if we've exited all
+        // descendants of the corresponding handler.
+        if (HandlerStack.back().second == CurrentState)
+          CurrentStartLabel = HandlerStack.pop_back_val().first;
+      }
+
+      if (StateChange.NewState != CurrentState) {
+        // For each clause we're starting, update the MinClauseMap so we can
+        // know which is the topmost funclet containing a clause targeting
+        // it.
+        for (int EnteredState = StateChange.NewState;
+             EnteredState != CurrentState;
+             EnteredState = FuncInfo.ClrEHUnwindMap[EnteredState].Parent) {
+          int &MinEnclosingState = MinClauseMap[EnteredState];
+          if (FuncletState < MinEnclosingState)
+            MinEnclosingState = FuncletState;
+        }
+        // Save the previous current start/label on the stack and update to
+        // the newly-current start/state.
+        HandlerStack.emplace_back(CurrentStartLabel, CurrentState);
+        CurrentStartLabel = StateChange.NewStartLabel;
+        CurrentState = StateChange.NewState;
+      }
+    }
+    assert(HandlerStack.empty());
+  }
+
+  // Now emit the clause info, starting with the number of clauses.
+  OS.EmitIntValue(Clauses.size(), 4);
+  for (ClrClause &Clause : Clauses) {
+    // Emit a CORINFO_EH_CLAUSE :
+    /*
+      struct CORINFO_EH_CLAUSE
+      {
+          CORINFO_EH_CLAUSE_FLAGS Flags;         // actually a CorExceptionFlag
+          DWORD                   TryOffset;
+          DWORD                   TryLength;     // actually TryEndOffset
+          DWORD                   HandlerOffset;
+          DWORD                   HandlerLength; // actually HandlerEndOffset
+          union
+          {
+              DWORD               ClassToken;   // use for catch clauses
+              DWORD               FilterOffset; // use for filter clauses
+          };
+      };
+
+      enum CORINFO_EH_CLAUSE_FLAGS
+      {
+          CORINFO_EH_CLAUSE_NONE    = 0,
+          CORINFO_EH_CLAUSE_FILTER  = 0x0001, // This clause is for a filter
+          CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause
+          CORINFO_EH_CLAUSE_FAULT   = 0x0004, // This clause is a fault clause
+      };
+      typedef enum CorExceptionFlag
+      {
+          COR_ILEXCEPTION_CLAUSE_NONE,
+          COR_ILEXCEPTION_CLAUSE_FILTER  = 0x0001, // This is a filter clause
+          COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause
+          COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004,   // This is a fault clause
+          COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This
+                                                      // clause was duplicated
+                                                      // to a funclet which was
+                                                      // pulled out of line
+      } CorExceptionFlag;
+    */
+    // Add 1 to the start/end of the EH clause; the IP associated with a
+    // call when the runtime does its scan is the IP of the next instruction
+    // (the one to which control will return after the call), so we need
+    // to add 1 to the end of the clause to cover that offset.  We also add
+    // 1 to the start of the clause to make sure that the ranges reported
+    // for all clauses are disjoint.  Note that we'll need some additional
+    // logic when machine traps are supported, since in that case the IP
+    // that the runtime uses is the offset of the faulting instruction
+    // itself; if such an instruction immediately follows a call but the
+    // two belong to different clauses, we'll need to insert a nop between
+    // them so the runtime can distinguish the point to which the call will
+    // return from the point at which the fault occurs.
+
+    const MCExpr *ClauseBegin =
+        getOffsetPlusOne(Clause.StartLabel, FuncBeginSym);
+    const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
+
+    const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
+    MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+    MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
+    const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
+    MCSymbol *EndSym = EndSymbolMap[Clause.State];
+    const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym);
+
+    uint32_t Flags = 0;
+    switch (Entry.HandlerType) {
+    case ClrHandlerType::Catch:
+      // Leaving bits 0-2 clear indicates catch.
+      break;
+    case ClrHandlerType::Filter:
+      Flags |= 1;
+      break;
+    case ClrHandlerType::Finally:
+      Flags |= 2;
+      break;
+    case ClrHandlerType::Fault:
+      Flags |= 4;
+      break;
+    }
+    if (Clause.EnclosingState != MinClauseMap[Clause.State]) {
+      // This is a "duplicate" clause; the handler needs to be entered from a
+      // frame above the one holding the invoke.
+      assert(Clause.EnclosingState > MinClauseMap[Clause.State]);
+      Flags |= 8;
+    }
+    OS.EmitIntValue(Flags, 4);
+
+    // Write the clause start/end
+    OS.EmitValue(ClauseBegin, 4);
+    OS.EmitValue(ClauseEnd, 4);
+
+    // Write out the handler start/end
+    OS.EmitValue(HandlerBegin, 4);
+    OS.EmitValue(HandlerEnd, 4);
+
+    // Write out the type token or filter offset
+    assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters");
+    OS.EmitIntValue(Entry.TypeToken, 4);
   }
 }
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index 669c9cc366ba..acb301016910 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
 class GlobalValue;
 class MachineFunction;
 class MCExpr;
+class Value;
 struct WinEHFuncInfo;
 
 class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
@@ -36,7 +37,14 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// True if this is a 64-bit target and we should use image relative offsets.
   bool useImageRel32 = false;
 
-  void emitCSpecificHandlerTable();
+  /// Pointer to the current funclet entry BB.
+  const MachineBasicBlock *CurrentFuncletEntry = nullptr;
+
+  void emitCSpecificHandlerTable(const MachineFunction *MF);
+
+  void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
+                              const MCSymbol *BeginLabel,
+                              const MCSymbol *EndLabel, int State);
 
   /// Emit the EH table data for 32-bit and 64-bit functions using
   /// the __CxxFrameHandler3 personality.
@@ -47,8 +55,11 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// tables.
   void emitExceptHandlerTable(const MachineFunction *MF);
 
-  void extendIP2StateTable(const MachineFunction *MF, const Function *ParentF,
-                           WinEHFuncInfo &FuncInfo);
+  void emitCLRExceptionTable(const MachineFunction *MF);
+
+  void computeIP2StateTable(
+      const MachineFunction *MF, const WinEHFuncInfo &FuncInfo,
+      SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable);
 
   /// Emits the label used with llvm.x86.seh.recoverfp, which is used by
   /// outlined funclets.
@@ -57,6 +68,16 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
 
   const MCExpr *create32bitRef(const MCSymbol *Value);
   const MCExpr *create32bitRef(const GlobalValue *GV);
+  const MCExpr *getLabelPlusOne(const MCSymbol *Label);
+  const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom);
+  const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf,
+                                 const MCSymbol *OffsetFrom);
+
+  /// Gets the offset that we should use in a table for a stack object with the
+  /// given index. For targets using CFI (Win64, etc), this is relative to the
+  /// established SP at the end of the prologue. For targets without CFI (Win32
+  /// only), it is relative to the frame pointer.
+  int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -74,6 +95,10 @@ public:
 
   /// Gather and emit post-function exception information.
   void endFunction(const MachineFunction *) override;
+
+  /// \brief Emit target-specific EH funclet machinery.
+  void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override;
+  void endFunclet() override;
 };
 }
 
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 530ab46db03b..d12fdb246984 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -8,10 +8,14 @@
 //===----------------------------------------------------------------------===//
 //
 // This file contains a pass (at IR level) to replace atomic instructions with
-// either (intrinsic-based) load-linked/store-conditional loops or AtomicCmpXchg.
+// target specific instruction which implement the same semantics in a way
+// which better fits the target backend.  This can include the use of either
+// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or
+// type coercions.
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/AtomicExpandUtils.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
@@ -20,6 +24,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -44,13 +49,17 @@ namespace {
   private:
     bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
                                bool IsStore, bool IsLoad);
-    bool expandAtomicLoad(LoadInst *LI);
+    IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
+    LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
+    bool tryExpandAtomicLoad(LoadInst *LI);
     bool expandAtomicLoadToLL(LoadInst *LI);
     bool expandAtomicLoadToCmpXchg(LoadInst *LI);
+    StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
     bool expandAtomicStore(StoreInst *SI);
     bool tryExpandAtomicRMW(AtomicRMWInst *AI);
-    bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
-    bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI);
+    bool expandAtomicOpToLLSC(
+        Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+        std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
     bool isIdempotentRMW(AtomicRMWInst *AI);
     bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@@ -108,7 +117,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
         FenceOrdering = RMWI->getOrdering();
         RMWI->setOrdering(Monotonic);
         IsStore = IsLoad = true;
-      } else if (CASI && !TLI->hasLoadLinkedStoreConditional() &&
+      } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
                  (isAtLeastRelease(CASI->getSuccessOrdering()) ||
                   isAtLeastAcquire(CASI->getSuccessOrdering()))) {
         // If a compare and swap is lowered to LL/SC, we can do smarter fence
@@ -126,10 +135,28 @@ bool AtomicExpand::runOnFunction(Function &F) {
       }
     }
 
-    if (LI && TLI->shouldExpandAtomicLoadInIR(LI)) {
-      MadeChange |= expandAtomicLoad(LI);
-    } else if (SI && TLI->shouldExpandAtomicStoreInIR(SI)) {
-      MadeChange |= expandAtomicStore(SI);
+    if (LI) {
+      if (LI->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        LI = convertAtomicLoadToIntegerType(LI);
+        assert(LI->getType()->isIntegerTy() && "invariant broken");
+        MadeChange = true;
+      }
+      
+      MadeChange |= tryExpandAtomicLoad(LI);
+    } else if (SI) {
+      if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
+        // TODO: add a TLI hook to control this so that each target can
+        // convert to lowering the original type one at a time.
+        SI = convertAtomicStoreToIntegerType(SI);
+        assert(SI->getValueOperand()->getType()->isIntegerTy() &&
+               "invariant broken");
+        MadeChange = true;
+      }
+
+      if (TLI->shouldExpandAtomicStoreInIR(SI))
+        MadeChange |= expandAtomicStore(SI);
     } else if (RMWI) {
       // There are two different ways of expanding RMW instructions:
       // - into a load if it is idempotent
@@ -141,7 +168,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
       } else {
         MadeChange |= tryExpandAtomicRMW(RMWI);
       }
-    } else if (CASI && TLI->hasLoadLinkedStoreConditional()) {
+    } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) {
       MadeChange |= expandAtomicCmpXchg(CASI);
     }
   }
@@ -169,11 +196,56 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
   return (LeadingFence || TrailingFence);
 }
 
-bool AtomicExpand::expandAtomicLoad(LoadInst *LI) {
-  if (TLI->hasLoadLinkedStoreConditional())
+/// Get the iX type with the same bitwidth as T.
+IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
+                                                       const DataLayout &DL) {
+  EVT VT = TLI->getValueType(DL, T);
+  unsigned BitWidth = VT.getStoreSizeInBits();
+  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
+  return IntegerType::get(T->getContext(), BitWidth);
+}
+
+/// Convert an atomic load of a non-integral type to an integer load of the
+/// equivelent bitwidth.  See the function comment on
+/// convertAtomicStoreToIntegerType for background.  
+LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
+  auto *M = LI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
+                                            M->getDataLayout());
+
+  IRBuilder<> Builder(LI);
+  
+  Value *Addr = LI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+  
+  auto *NewLI = Builder.CreateLoad(NewAddr);
+  NewLI->setAlignment(LI->getAlignment());
+  NewLI->setVolatile(LI->isVolatile());
+  NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
+  
+  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
+  LI->replaceAllUsesWith(NewVal);
+  LI->eraseFromParent();
+  return NewLI;
+}
+
+bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
+  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
+  case TargetLoweringBase::AtomicExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicExpansionKind::LLSC:
+    return expandAtomicOpToLLSC(
+        LI, LI->getPointerOperand(), LI->getOrdering(),
+        [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
+  case TargetLoweringBase::AtomicExpansionKind::LLOnly:
     return expandAtomicLoadToLL(LI);
-  else
+  case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
     return expandAtomicLoadToCmpXchg(LI);
+  }
+  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
 }
 
 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
@@ -184,6 +256,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
   // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
   Value *Val =
       TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
+  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
 
   LI->replaceAllUsesWith(Val);
   LI->eraseFromParent();
@@ -209,6 +282,35 @@ bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
   return true;
 }
 
+/// Convert an atomic store of a non-integral type to an integer store of the
+/// equivelent bitwidth.  We used to not support floating point or vector
+/// atomics in the IR at all.  The backends learned to deal with the bitcast
+/// idiom because that was the only way of expressing the notion of a atomic
+/// float or vector store.  The long term plan is to teach each backend to
+/// instruction select from the original atomic store, but as a migration
+/// mechanism, we convert back to the old format which the backends understand.
+/// Each backend will need individual work to recognize the new format.
+StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
+  IRBuilder<> Builder(SI);
+  auto *M = SI->getModule();
+  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
+                                            M->getDataLayout());
+  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
+  
+  Value *Addr = SI->getPointerOperand();
+  Type *PT = PointerType::get(NewTy,
+                              Addr->getType()->getPointerAddressSpace());
+  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
+
+  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
+  NewSI->setAlignment(SI->getAlignment());
+  NewSI->setVolatile(SI->isVolatile());
+  NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
+  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
+  SI->eraseFromParent();
+  return NewSI;
+}
+
 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   // This function is only called on atomic stores that are too large to be
   // atomic if implemented as a native store. So we replace them by an
@@ -226,23 +328,15 @@ bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
   return tryExpandAtomicRMW(AI);
 }
 
-bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
-  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
-  case TargetLoweringBase::AtomicRMWExpansionKind::None:
-    return false;
-  case TargetLoweringBase::AtomicRMWExpansionKind::LLSC: {
-    assert(TLI->hasLoadLinkedStoreConditional() &&
-           "TargetLowering requested we expand AtomicRMW instruction into "
-           "load-linked/store-conditional combos, but such instructions aren't "
-           "supported");
-
-    return expandAtomicRMWToLLSC(AI);
-  }
-  case TargetLoweringBase::AtomicRMWExpansionKind::CmpXChg: {
-    return expandAtomicRMWToCmpXchg(AI);
-  }
-  }
-  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
+                                 Value *Loaded, Value *NewVal,
+                                 AtomicOrdering MemOpOrder,
+                                 Value *&Success, Value *&NewLoaded) {
+  Value* Pair = Builder.CreateAtomicCmpXchg(
+      Addr, Loaded, NewVal, MemOpOrder,
+      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
+  Success = Builder.CreateExtractValue(Pair, 1, "success");
+  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
 }
 
 /// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -282,10 +376,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
   }
 }
 
-bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
-  AtomicOrdering MemOpOrder = AI->getOrdering();
-  Value *Addr = AI->getPointerOperand();
-  BasicBlock *BB = AI->getParent();
+bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
+  case TargetLoweringBase::AtomicExpansionKind::None:
+    return false;
+  case TargetLoweringBase::AtomicExpansionKind::LLSC:
+    return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
+                                [&](IRBuilder<> &Builder, Value *Loaded) {
+                                  return performAtomicOp(AI->getOperation(),
+                                                         Builder, Loaded,
+                                                         AI->getValOperand());
+                                });
+  case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
+    return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
+  default:
+    llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
+  }
+}
+
+bool AtomicExpand::expandAtomicOpToLLSC(
+    Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
+    std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
+  BasicBlock *BB = I->getParent();
   Function *F = BB->getParent();
   LLVMContext &Ctx = F->getContext();
 
@@ -303,11 +415,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
   // atomicrmw.end:
   //     fence?
   //     [...]
-  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
+  BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
   BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
 
-  // This grabs the DebugLoc from AI.
-  IRBuilder<> Builder(AI);
+  // This grabs the DebugLoc from I.
+  IRBuilder<> Builder(I);
 
   // The split call above "helpfully" added a branch at the end of BB (to the
   // wrong place), but we might want a fence too. It's easiest to just remove
@@ -320,8 +432,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
   Builder.SetInsertPoint(LoopBB);
   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
 
-  Value *NewVal =
-      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+  Value *NewVal = PerformOp(Builder, Loaded);
 
   Value *StoreSuccess =
       TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
@@ -331,72 +442,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
 
   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
 
-  AI->replaceAllUsesWith(Loaded);
-  AI->eraseFromParent();
-
-  return true;
-}
-
-bool AtomicExpand::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI) {
-  AtomicOrdering MemOpOrder =
-      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
-  Value *Addr = AI->getPointerOperand();
-  BasicBlock *BB = AI->getParent();
-  Function *F = BB->getParent();
-  LLVMContext &Ctx = F->getContext();
-
-  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
-  //
-  // The standard expansion we produce is:
-  //     [...]
-  //     %init_loaded = load atomic iN* %addr
-  //     br label %loop
-  // loop:
-  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
-  //     %new = some_op iN %loaded, %incr
-  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
-  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
-  //     %success = extractvalue { iN, i1 } %pair, 1
-  //     br i1 %success, label %atomicrmw.end, label %loop
-  // atomicrmw.end:
-  //     [...]
-  BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end");
-  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
-
-  // This grabs the DebugLoc from AI.
-  IRBuilder<> Builder(AI);
-
-  // The split call above "helpfully" added a branch at the end of BB (to the
-  // wrong place), but we want a load. It's easiest to just remove
-  // the branch entirely.
-  std::prev(BB->end())->eraseFromParent();
-  Builder.SetInsertPoint(BB);
-  LoadInst *InitLoaded = Builder.CreateLoad(Addr);
-  // Atomics require at least natural alignment.
-  InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits());
-  Builder.CreateBr(LoopBB);
-
-  // Start the main loop block now that we've taken care of the preliminaries.
-  Builder.SetInsertPoint(LoopBB);
-  PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
-  Loaded->addIncoming(InitLoaded, BB);
-
-  Value *NewVal =
-      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
-
-  Value *Pair = Builder.CreateAtomicCmpXchg(
-      Addr, Loaded, NewVal, MemOpOrder,
-      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
-  Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
-  Loaded->addIncoming(NewLoaded, LoopBB);
-
-  Value *Success = Builder.CreateExtractValue(Pair, 1, "success");
-  Builder.CreateCondBr(Success, ExitBB, LoopBB);
-
-  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
-
-  AI->replaceAllUsesWith(NewLoaded);
-  AI->eraseFromParent();
+  I->replaceAllUsesWith(Loaded);
+  I->eraseFromParent();
 
   return true;
 }
@@ -424,7 +471,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   //     %loaded = @load.linked(%addr)
   //     %should_store = icmp eq %loaded, %desired
   //     br i1 %should_store, label %cmpxchg.trystore,
-  //                          label %cmpxchg.failure
+  //                          label %cmpxchg.nostore
   // cmpxchg.trystore:
   //     %stored = @store_conditional(%new, %addr)
   //     %success = icmp eq i32 %stored, 0
@@ -432,6 +479,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   // cmpxchg.success:
   //     fence?
   //     br label %cmpxchg.end
+  // cmpxchg.nostore:
+  //     @load_linked_fail_balance()?
+  //     br label %cmpxchg.failure
   // cmpxchg.failure:
   //     fence?
   //     br label %cmpxchg.end
@@ -440,9 +490,10 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
   //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
   //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
   //     [...]
-  BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
+  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
   auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
-  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
+  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
+  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
   auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
   auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
 
@@ -466,7 +517,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
 
   // If the cmpxchg doesn't actually need any ordering when it fails, we can
   // jump straight past that fence instruction (if it exists).
-  Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
+  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
 
   Builder.SetInsertPoint(TryStoreBB);
   Value *StoreSuccess = TLI->emitStoreConditional(
@@ -482,6 +533,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
                          /*IsLoad=*/true);
   Builder.CreateBr(ExitBB);
 
+  Builder.SetInsertPoint(NoStoreBB);
+  // In the failing case, where we don't execute the store-conditional, the
+  // target might want to balance out the load-linked with a dedicated
+  // instruction (e.g., on ARM, clearing the exclusive monitor).
+  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
+  Builder.CreateBr(FailureBB);
+
   Builder.SetInsertPoint(FailureBB);
   TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
                          /*IsLoad=*/true);
@@ -556,9 +614,77 @@ bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
 
 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
   if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
-    if (TLI->shouldExpandAtomicLoadInIR(ResultingLoad))
-      expandAtomicLoad(ResultingLoad);
+    tryExpandAtomicLoad(ResultingLoad);
     return true;
   }
   return false;
 }
+
+bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
+                                    CreateCmpXchgInstFun CreateCmpXchg) {
+  assert(AI);
+
+  AtomicOrdering MemOpOrder =
+      AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering();
+  Value *Addr = AI->getPointerOperand();
+  BasicBlock *BB = AI->getParent();
+  Function *F = BB->getParent();
+  LLVMContext &Ctx = F->getContext();
+
+  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
+  //
+  // The standard expansion we produce is:
+  //     [...]
+  //     %init_loaded = load atomic iN* %addr
+  //     br label %loop
+  // loop:
+  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
+  //     %new = some_op iN %loaded, %incr
+  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
+  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
+  //     %success = extractvalue { iN, i1 } %pair, 1
+  //     br i1 %success, label %atomicrmw.end, label %loop
+  // atomicrmw.end:
+  //     [...]
+  BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
+  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
+
+  // This grabs the DebugLoc from AI.
+  IRBuilder<> Builder(AI);
+
+  // The split call above "helpfully" added a branch at the end of BB (to the
+  // wrong place), but we want a load. It's easiest to just remove
+  // the branch entirely.
+  std::prev(BB->end())->eraseFromParent();
+  Builder.SetInsertPoint(BB);
+  LoadInst *InitLoaded = Builder.CreateLoad(Addr);
+  // Atomics require at least natural alignment.
+  InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8);
+  Builder.CreateBr(LoopBB);
+
+  // Start the main loop block now that we've taken care of the preliminaries.
+  Builder.SetInsertPoint(LoopBB);
+  PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded");
+  Loaded->addIncoming(InitLoaded, BB);
+
+  Value *NewVal =
+      performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
+
+  Value *NewLoaded = nullptr;
+  Value *Success = nullptr;
+
+  CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder,
+                Success, NewLoaded);
+  assert(Success && NewLoaded);
+
+  Loaded->addIncoming(NewLoaded, LoopBB);
+
+  Builder.CreateCondBr(Success, ExitBB, LoopBB);
+
+  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
+
+  AI->replaceAllUsesWith(NewLoaded);
+  AI->eraseFromParent();
+
+  return true;
+}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index db00910cd018..a67e194356d8 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -33,6 +33,6 @@ cl::opt<unsigned>
                                     cl::desc("Threshold for partial unrolling"),
                                     cl::Hidden);
 
-BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, Function &F)
+BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F)
     : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
       TLI(ST->getTargetLowering()) {}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 618266731c06..604feeddd355 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -12,7 +12,8 @@
 // it then removes.
 //
 // Note that this pass must be run after register allocation, it cannot handle
-// SSA form.
+// SSA form. It also must handle virtual registers for targets that emit virtual
+// ISA (e.g. NVPTX).
 //
 //===----------------------------------------------------------------------===//
 
@@ -20,6 +21,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -95,7 +97,7 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
   // TailMerge can create jump into if branches that make CFG irreducible for
   // HW that requires structurized CFG.
   bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
-      PassConfig->getEnableTailMerge();
+                         PassConfig->getEnableTailMerge();
   BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true,
                       getAnalysis<MachineBlockFrequencyInfo>(),
                       getAnalysis<MachineBranchProbabilityInfo>());
@@ -132,6 +134,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
 
   // Remove the block.
   MF->erase(MBB);
+  FuncletMembership.erase(MBB);
 }
 
 /// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
@@ -150,9 +153,13 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
     if (!I->isImplicitDef())
       break;
     unsigned Reg = I->getOperand(0).getReg();
-    for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-         SubRegs.isValid(); ++SubRegs)
-      ImpDefRegs.insert(*SubRegs);
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+           SubRegs.isValid(); ++SubRegs)
+        ImpDefRegs.insert(*SubRegs);
+    } else {
+      ImpDefRegs.insert(Reg);
+    }
     ++I;
   }
   if (ImpDefRegs.empty())
@@ -163,8 +170,7 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
     if (!TII->isUnpredicatedTerminator(I))
       return false;
     // See if it uses any of the implicitly defined registers.
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = I->getOperand(i);
+    for (const MachineOperand &MO : I->operands()) {
       if (!MO.isReg() || !MO.isUse())
         continue;
       unsigned Reg = MO.getReg();
@@ -208,14 +214,17 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
 
   // Fix CFG.  The later algorithms expect it to be right.
   bool MadeChange = false;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
-    MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr;
+  for (MachineBasicBlock &MBB : MF) {
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
     SmallVector<MachineOperand, 4> Cond;
-    if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
-      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-    MadeChange |= OptimizeImpDefsBlock(MBB);
+    if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true))
+      MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+    MadeChange |= OptimizeImpDefsBlock(&MBB);
   }
 
+  // Recalculate funclet membership.
+  FuncletMembership = getFuncletMembership(MF);
+
   bool MadeChangeThisIteration = true;
   while (MadeChangeThisIteration) {
     MadeChangeThisIteration    = TailMergeBlocks(MF);
@@ -235,12 +244,9 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
 
   // Walk the function to find jump tables that are live.
   BitVector JTIsLive(JTI->getJumpTables().size());
-  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
-       BB != E; ++BB) {
-    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
-         I != E; ++I)
-      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
-        MachineOperand &Op = I->getOperand(op);
+  for (const MachineBasicBlock &BB : MF) {
+    for (const MachineInstr &I : BB)
+      for (const MachineOperand &Op : I.operands()) {
         if (!Op.isJTI()) continue;
 
         // Remember that this JT is live.
@@ -365,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   }
   // Back past possible debugging pseudos at beginning of block.  This matters
   // when one block differs from the other only by whether debugging pseudos
-  // are present at the beginning.  (This way, the various checks later for
+  // are present at the beginning. (This way, the various checks later for
   // I1==MBB1->begin() work as expected.)
   if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
     --I2;
@@ -426,7 +432,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   MachineFunction &MF = *CurMBB.getParent();
 
   // Create the fall-through block.
-  MachineFunction::iterator MBBI = &CurMBB;
+  MachineFunction::iterator MBBI = CurMBB.getIterator();
   MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB);
   CurMBB.getParent()->insert(++MBBI, NewMBB);
 
@@ -445,6 +451,11 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   // For targets that use the register scavenger, we must maintain LiveIns.
   MaintainLiveIns(&CurMBB, NewMBB);
 
+  // Add the new block to the funclet.
+  const auto &FuncletI = FuncletMembership.find(&CurMBB);
+  if (FuncletI != FuncletMembership.end())
+    FuncletMembership[NewMBB] = FuncletI->second;
+
   return NewMBB;
 }
 
@@ -479,7 +490,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
   DebugLoc dl;  // FIXME: this is nowhere
   if (I != MF->end() &&
       !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
-    MachineBasicBlock *NextBB = I;
+    MachineBasicBlock *NextBB = &*I;
     if (TBB == NextBB && !Cond.empty() && !FBB) {
       if (!TII->ReverseBranchCondition(Cond)) {
         TII->RemoveBranch(*CurMBB);
@@ -549,14 +560,23 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
 /// and decide if it would be profitable to merge those tails.  Return the
 /// length of the common tail and iterators to the first common instruction
 /// in each block.
-static bool ProfitableToMerge(MachineBasicBlock *MBB1,
-                              MachineBasicBlock *MBB2,
-                              unsigned minCommonTailLength,
-                              unsigned &CommonTailLen,
-                              MachineBasicBlock::iterator &I1,
-                              MachineBasicBlock::iterator &I2,
-                              MachineBasicBlock *SuccBB,
-                              MachineBasicBlock *PredBB) {
+static bool
+ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
+                  unsigned minCommonTailLength, unsigned &CommonTailLen,
+                  MachineBasicBlock::iterator &I1,
+                  MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
+                  MachineBasicBlock *PredBB,
+                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+  // It is never profitable to tail-merge blocks from two different funclets.
+  if (!FuncletMembership.empty()) {
+    auto Funclet1 = FuncletMembership.find(MBB1);
+    assert(Funclet1 != FuncletMembership.end());
+    auto Funclet2 = FuncletMembership.find(MBB2);
+    assert(Funclet2 != FuncletMembership.end());
+    if (Funclet1->second != Funclet2->second)
+      return false;
+  }
+
   CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
   if (CommonTailLen == 0)
     return false;
@@ -600,12 +620,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
   // branch instruction, which is likely to be smaller than the 2
   // instructions that would be deleted in the merge.
   MachineFunction *MF = MBB1->getParent();
-  if (EffectiveTailLen >= 2 &&
-      MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
-      (I1 == MBB1->begin() || I2 == MBB2->begin()))
-    return true;
-
-  return false;
+  return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() &&
+         (I1 == MBB1->begin() || I2 == MBB2->begin());
 }
 
 /// ComputeSameTails - Look through all the blocks in MergePotentials that have
@@ -634,7 +650,8 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
       if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
                             minCommonTailLength,
                             CommonTailLen, TrialBBI1, TrialBBI2,
-                            SuccBB, PredBB)) {
+                            SuccBB, PredBB,
+                            FuncletMembership)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -776,7 +793,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
 
     if (MBBICommon->mayLoad() || MBBICommon->mayStore())
       if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
-        MBBICommon->clearMemRefs();
+        MBBICommon->dropMemRefs();
 
     ++MBBI;
     ++MBBICommon;
@@ -840,8 +857,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // block, which we can't jump to), we can treat all blocks with this same
     // tail at once.  Use PredBB if that is one of the possibilities, as that
     // will not introduce any extra branches.
-    MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
-                                 getParent()->begin();
+    MachineBasicBlock *EntryBB =
+        &MergePotentials.front().getBlock()->getParent()->front();
     unsigned commonTailIndex = SameTails.size();
     // If there are two blocks, check to see if one can be made to fall through
     // into the other.
@@ -917,12 +934,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   // First find blocks with no successors.
   MergePotentials.clear();
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
-       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
-    if (TriedMerging.count(I))
-      continue;
-    if (I->succ_empty())
-      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+  for (MachineBasicBlock &MBB : MF) {
+    if (MergePotentials.size() == TailMergeThreshold)
+      break;
+    if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB));
   }
 
   // If this is a large problem, avoid visiting the same basic blocks
@@ -958,13 +974,13 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
        I != E; ++I) {
     if (I->pred_size() < 2) continue;
     SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
-    MachineBasicBlock *IBB = I;
-    MachineBasicBlock *PredBB = std::prev(I);
+    MachineBasicBlock *IBB = &*I;
+    MachineBasicBlock *PredBB = &*std::prev(I);
     MergePotentials.clear();
-    for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
-           E2 = I->pred_end();
-         P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
-      MachineBasicBlock *PBB = *P;
+    for (MachineBasicBlock *PBB : I->predecessors()) {
+      if (MergePotentials.size() == TailMergeThreshold)
+        break;
+
       if (TriedMerging.count(PBB))
         continue;
 
@@ -977,7 +993,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         continue;
 
       // Skip blocks which may jump to a landing pad. Can't tail merge these.
-      if (PBB->getLandingPadSuccessor())
+      if (PBB->hasEHPadSuccessor())
         continue;
 
       MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
@@ -990,18 +1006,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           if (TII->ReverseBranchCondition(NewCond))
             continue;
           // This is the QBB case described above
-          if (!FBB)
-            FBB = std::next(MachineFunction::iterator(PBB));
+          if (!FBB) {
+            auto Next = ++PBB->getIterator();
+            if (Next != MF.end())
+              FBB = &*Next;
+          }
         }
 
         // Failing case: the only way IBB can be reached from PBB is via
         // exception handling.  Happens for landing pads.  Would be nice to have
         // a bit in the edge so we didn't have to do all this.
-        if (IBB->isLandingPad()) {
-          MachineFunction::iterator IP = PBB;  IP++;
+        if (IBB->isEHPad()) {
+          MachineFunction::iterator IP = ++PBB->getIterator();
           MachineBasicBlock *PredNextBB = nullptr;
           if (IP != MF.end())
-            PredNextBB = IP;
+            PredNextBB = &*IP;
           if (!TBB) {
             if (IBB != PredNextBB)      // fallthrough
               continue;
@@ -1027,7 +1046,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
                               NewCond, dl);
         }
 
-        MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+        MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB));
       }
     }
 
@@ -1042,7 +1061,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
     // Reinsert an unconditional branch if needed. The 1 below can occur as a
     // result of removing blocks in TryTailMergeBlocks.
-    PredBB = std::prev(I);     // this may have been changed in TryTailMergeBlocks
+    PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks
     if (MergePotentials.size() == 1 &&
         MergePotentials.begin()->getBlock() != PredBB)
       FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
@@ -1080,13 +1099,19 @@ void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) {
   if (TailMBB.succ_size() <= 1)
     return;
 
-  auto MaxEdgeFreq = *std::max_element(EdgeFreqLs.begin(), EdgeFreqLs.end());
-  uint64_t Scale = MaxEdgeFreq.getFrequency() / UINT32_MAX + 1;
+  auto SumEdgeFreq =
+      std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0))
+          .getFrequency();
   auto EdgeFreq = EdgeFreqLs.begin();
 
-  for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
-       SuccI != SuccE; ++SuccI, ++EdgeFreq)
-    TailMBB.setSuccWeight(SuccI, EdgeFreq->getFrequency() / Scale);
+  if (SumEdgeFreq > 0) {
+    for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end();
+         SuccI != SuccE; ++SuccI, ++EdgeFreq) {
+      auto Prob = BranchProbability::getBranchProbability(
+          EdgeFreq->getFrequency(), SumEdgeFreq);
+      TailMBB.setSuccProbability(SuccI, Prob);
+    }
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -1098,10 +1123,12 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
 
   // Make sure blocks are numbered in order
   MF.RenumberBlocks();
+  // Renumbering blocks alters funclet membership, recalculate it.
+  FuncletMembership = getFuncletMembership(MF);
 
   for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
        I != E; ) {
-    MachineBasicBlock *MBB = I++;
+    MachineBasicBlock *MBB = &*I++;
     MadeChange |= OptimizeBlock(MBB);
 
     // If it is dead, remove it.
@@ -1111,6 +1138,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
       ++NumDeadBlocks;
     }
   }
+
   return MadeChange;
 }
 
@@ -1167,20 +1195,31 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
   MachineFunction &MF = *MBB->getParent();
 ReoptimizeBlock:
 
-  MachineFunction::iterator FallThrough = MBB;
+  MachineFunction::iterator FallThrough = MBB->getIterator();
   ++FallThrough;
 
+  // Make sure MBB and FallThrough belong to the same funclet.
+  bool SameFunclet = true;
+  if (!FuncletMembership.empty() && FallThrough != MF.end()) {
+    auto MBBFunclet = FuncletMembership.find(MBB);
+    assert(MBBFunclet != FuncletMembership.end());
+    auto FallThroughFunclet = FuncletMembership.find(&*FallThrough);
+    assert(FallThroughFunclet != FuncletMembership.end());
+    SameFunclet = MBBFunclet->second == FallThroughFunclet->second;
+  }
+
   // If this block is empty, make everyone use its fall-through, not the block
   // explicitly.  Landing pads should not do this since the landing-pad table
   // points to this block.  Blocks with their addresses taken shouldn't be
   // optimized away.
-  if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+  if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() &&
+      SameFunclet) {
     // Dead block?  Leave for cleanup later.
     if (MBB->pred_empty()) return MadeChange;
 
     if (FallThrough == MF.end()) {
       // TODO: Simplify preds to not branch here if possible!
-    } else if (FallThrough->isLandingPad()) {
+    } else if (FallThrough->isEHPad()) {
       // Don't rewrite to a landing pad fallthough.  That could lead to the case
       // where a BB jumps to more than one landing pad.
       // TODO: Is it ever worth rewriting predecessors which don't already
@@ -1190,12 +1229,12 @@ ReoptimizeBlock:
       // instead.
       while (!MBB->pred_empty()) {
         MachineBasicBlock *Pred = *(MBB->pred_end()-1);
-        Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+        Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
       }
       // If MBB was the target of a jump table, update jump tables to go to the
       // fallthrough instead.
       if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
-        MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+        MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough);
       MadeChange = true;
     }
     return MadeChange;
@@ -1237,7 +1276,7 @@ ReoptimizeBlock:
     // AnalyzeBranch.
     if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
         PrevBB.succ_size() == 1 &&
-        !MBB->hasAddressTaken() && !MBB->isLandingPad()) {
+        !MBB->hasAddressTaken() && !MBB->isEHPad()) {
       DEBUG(dbgs() << "\nMerging into block: " << PrevBB
                    << "From MBB: " << *MBB);
       // Remove redundant DBG_VALUEs first.
@@ -1333,7 +1372,7 @@ ReoptimizeBlock:
           TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
 
           // Move this block to the end of the function.
-          MBB->moveAfter(--MF.end());
+          MBB->moveAfter(&MF.back());
           MadeChange = true;
           ++NumBranchOpts;
           return MadeChange;
@@ -1371,7 +1410,7 @@ ReoptimizeBlock:
     // other blocks across it.
     if (CurTBB && CurCond.empty() && !CurFBB &&
         IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
-        !MBB->hasAddressTaken()) {
+        !MBB->hasAddressTaken() && !MBB->isEHPad()) {
       DebugLoc dl = getBranchDebugLoc(*MBB);
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
@@ -1468,14 +1507,11 @@ ReoptimizeBlock:
     // see if it has a fall-through into its successor.
     bool CurFallsThru = MBB->canFallThrough();
 
-    if (!MBB->isLandingPad()) {
+    if (!MBB->isEHPad()) {
       // Check all the predecessors of this block.  If one of them has no fall
       // throughs, move this block right after it.
-      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-           E = MBB->pred_end(); PI != E; ++PI) {
+      for (MachineBasicBlock *PredBB : MBB->predecessors()) {
         // Analyze the branch at the end of the pred.
-        MachineBasicBlock *PredBB = *PI;
-        MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
         MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
         SmallVector<MachineOperand, 4> PredCond;
         if (PredBB != MBB && !PredBB->canFallThrough() &&
@@ -1493,8 +1529,7 @@ ReoptimizeBlock:
           // B elsewhere
           // next:
           if (CurFallsThru) {
-            MachineBasicBlock *NextBB =
-                std::next(MachineFunction::iterator(MBB));
+            MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
             CurCond.clear();
             TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
           }
@@ -1507,11 +1542,9 @@ ReoptimizeBlock:
 
     if (!CurFallsThru) {
       // Check all successors to see if we can move this block before it.
-      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-           E = MBB->succ_end(); SI != E; ++SI) {
+      for (MachineBasicBlock *SuccBB : MBB->successors()) {
         // Analyze the branch at the end of the block before the succ.
-        MachineBasicBlock *SuccBB = *SI;
-        MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+        MachineFunction::iterator SuccPrev = --SuccBB->getIterator();
 
         // If this block doesn't already fall-through to that successor, and if
         // the succ doesn't already have a block that can fall through into it,
@@ -1519,7 +1552,7 @@ ReoptimizeBlock:
         // fallthrough to happen.
         if (SuccBB != MBB && &*SuccPrev != MBB &&
             !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
-            !SuccBB->isLandingPad()) {
+            !SuccBB->isEHPad()) {
           MBB->moveBefore(SuccBB);
           MadeChange = true;
           goto ReoptimizeBlock;
@@ -1531,10 +1564,18 @@ ReoptimizeBlock:
       // removed, move this block to the end of the function.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
+      // We're looking for cases where PrevBB could possibly fall through to
+      // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
+      // so here we skip over any EH pads so we might have a chance to find
+      // a branch target from PrevBB.
+      while (FallThrough != MF.end() && FallThrough->isEHPad())
+        ++FallThrough;
+      // Now check to see if the current block is sitting between PrevBB and
+      // a block to which it could fall through.
       if (FallThrough != MF.end() &&
           !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
-          PrevBB.isSuccessor(FallThrough)) {
-        MBB->moveAfter(--MF.end());
+          PrevBB.isSuccessor(&*FallThrough)) {
+        MBB->moveAfter(&MF.back());
         MadeChange = true;
         return MadeChange;
       }
@@ -1553,7 +1594,7 @@ ReoptimizeBlock:
 bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
   bool MadeChange = false;
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) {
-    MachineBasicBlock *MBB = I++;
+    MachineBasicBlock *MBB = &*I++;
     MadeChange |= HoistCommonCodeInSuccs(MBB);
   }
 
@@ -1564,15 +1605,23 @@ bool BranchFolder::HoistCommonCode(MachineFunction &MF) {
 /// its 'true' successor.
 static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
                                          MachineBasicBlock *TrueBB) {
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         E = BB->succ_end(); SI != E; ++SI) {
-    MachineBasicBlock *SuccBB = *SI;
+  for (MachineBasicBlock *SuccBB : BB->successors())
     if (SuccBB != TrueBB)
       return SuccBB;
-  }
   return nullptr;
 }
 
+template <class Container>
+static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
+                                Container &Set) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+    for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+      Set.insert(*AI);
+  } else {
+    Set.insert(Reg);
+  }
+}
+
 /// findHoistingInsertPosAndDeps - Find the location to move common instructions
 /// in successors to. The location is usually just before the terminator,
 /// however if the terminator is a conditional branch and its previous
@@ -1590,16 +1639,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
   if (!TII->isUnpredicatedTerminator(Loc))
     return MBB->end();
 
-  for (unsigned i = 0, e = Loc->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = Loc->getOperand(i);
+  for (const MachineOperand &MO : Loc->operands()) {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
     if (MO.isUse()) {
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        Uses.insert(*AI);
+      addRegAndItsAliases(Reg, TRI, Uses);
     } else {
       if (!MO.isDead())
         // Don't try to hoist code in the rare case the terminator defines a
@@ -1608,8 +1655,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
 
       // If the terminator defines a register, make sure we don't hoist
       // the instruction whose def might be clobbered by the terminator.
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        Defs.insert(*AI);
+      addRegAndItsAliases(Reg, TRI, Defs);
     }
   }
 
@@ -1626,8 +1672,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
     --PI;
 
   bool IsDef = false;
-  for (unsigned i = 0, e = PI->getNumOperands(); !IsDef && i != e; ++i) {
-    const MachineOperand &MO = PI->getOperand(i);
+  for (const MachineOperand &MO : PI->operands()) {
     // If PI has a regmask operand, it is probably a call. Separate away.
     if (MO.isRegMask())
       return Loc;
@@ -1636,8 +1681,10 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
-    if (Uses.count(Reg))
+    if (Uses.count(Reg)) {
       IsDef = true;
+      break;
+    }
   }
   if (!IsDef)
     // The condition setting instruction is not just before the conditional
@@ -1657,23 +1704,22 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
 
   // Find out what registers are live. Note this routine is ignoring other live
   // registers which are only used by instructions in successor blocks.
-  for (unsigned i = 0, e = PI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = PI->getOperand(i);
+  for (const MachineOperand &MO : PI->operands()) {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
     if (MO.isUse()) {
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        Uses.insert(*AI);
+      addRegAndItsAliases(Reg, TRI, Uses);
     } else {
       if (Uses.erase(Reg)) {
-        for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
-          Uses.erase(*SubRegs); // Use sub-registers to be conservative
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
+            Uses.erase(*SubRegs); // Use sub-registers to be conservative
+        }
       }
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        Defs.insert(*AI);
+      addRegAndItsAliases(Reg, TRI, Defs);
     }
   }
 
@@ -1737,8 +1783,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       break;
 
     bool IsSafe = true;
-    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = TIB->getOperand(i);
+    for (MachineOperand &MO : TIB->operands()) {
       // Don't attempt to hoist instructions with register masks.
       if (MO.isRegMask()) {
         IsSafe = false;
@@ -1793,28 +1838,29 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
       break;
 
     // Remove kills from LocalDefsSet, these registers had short live ranges.
-    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = TIB->getOperand(i);
+    for (const MachineOperand &MO : TIB->operands()) {
       if (!MO.isReg() || !MO.isUse() || !MO.isKill())
         continue;
       unsigned Reg = MO.getReg();
       if (!Reg || !LocalDefsSet.count(Reg))
         continue;
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        LocalDefsSet.erase(*AI);
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+          LocalDefsSet.erase(*AI);
+      } else {
+        LocalDefsSet.erase(Reg);
+      }
     }
 
     // Track local defs so we can update liveins.
-    for (unsigned i = 0, e = TIB->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = TIB->getOperand(i);
+    for (const MachineOperand &MO : TIB->operands()) {
       if (!MO.isReg() || !MO.isDef() || MO.isDead())
         continue;
       unsigned Reg = MO.getReg();
       if (!Reg)
         continue;
       LocalDefs.push_back(Reg);
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        LocalDefsSet.insert(*AI);
+      addRegAndItsAliases(Reg, TRI, LocalDefsSet);
     }
 
     HasDups = true;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 46c05dc0600a..d759d53e27f2 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -54,6 +54,7 @@ namespace llvm {
     typedef std::vector<MergePotentialsElt>::iterator MPIterator;
     std::vector<MergePotentialsElt> MergePotentials;
     SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
+    DenseMap<const MachineBasicBlock *, int> FuncletMembership;
 
     class SameTailElt {
       MPIterator MPIter;
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index eb7552970d3f..a078c3c707a0 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,3 +1,8 @@
+set(system_libs)
+if(CMAKE_HOST_UNIX AND LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD)
+  set(system_libs ${system_libs} pthread)
+endif()
+
 add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
   AllocationOrder.cpp
@@ -20,7 +25,9 @@ add_llvm_library(LLVMCodeGen
   ExecutionDepsFix.cpp
   ExpandISelPseudos.cpp
   ExpandPostRAPseudos.cpp
+  LiveDebugValues.cpp
   FaultMaps.cpp
+  FuncletLayout.cpp
   GCMetadata.cpp
   GCMetadataPrinter.cpp
   GCRootLowering.cpp
@@ -80,6 +87,7 @@ add_llvm_library(LLVMCodeGen
   OptimizePHIs.cpp
   PHIElimination.cpp
   PHIEliminationUtils.cpp
+  ParallelCG.cpp
   Passes.cpp
   PeepholeOptimizer.cpp
   PostRASchedulerList.cpp
@@ -128,6 +136,8 @@ add_llvm_library(LLVMCodeGen
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen/PBQP
+
+  LINK_LIBS ${system_libs}
   )
 
 add_dependencies(LLVMCodeGen intrinsics_gen)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index d08fae09323c..abc655ac34ca 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -24,6 +25,7 @@ using namespace llvm;
 
 void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
                            MachineFunction &MF,
+                           VirtRegMap *VRM,
                            const MachineLoopInfo &MLI,
                            const MachineBlockFrequencyInfo &MBFI,
                            VirtRegAuxInfo::NormalizingFn norm) {
@@ -31,7 +33,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
                << "********** Function: " << MF.getName() << '\n');
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm);
+  VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
   for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI.reg_nodbg_empty(Reg))
@@ -74,7 +76,10 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
 // Check if all values in LI are rematerializable
 static bool isRematerializable(const LiveInterval &LI,
                                const LiveIntervals &LIS,
+                               VirtRegMap *VRM,
                                const TargetInstrInfo &TII) {
+  unsigned Reg = LI.reg;
+  unsigned Original = VRM ? VRM->getOriginal(Reg) : 0;
   for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
        I != E; ++I) {
     const VNInfo *VNI = *I;
@@ -86,6 +91,36 @@ static bool isRematerializable(const LiveInterval &LI,
     MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def);
     assert(MI && "Dead valno in interval");
 
+    // Trace copies introduced by live range splitting.  The inline
+    // spiller can rematerialize through these copies, so the spill
+    // weight must reflect this.
+    if (VRM) {
+      while (MI->isFullCopy()) {
+        // The copy destination must match the interval register.
+        if (MI->getOperand(0).getReg() != Reg)
+          return false;
+
+        // Get the source register.
+        Reg = MI->getOperand(1).getReg();
+
+        // If the original (pre-splitting) registers match this
+        // copy came from a split.
+        if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+            VRM->getOriginal(Reg) != Original)
+          return false;
+
+        // Follow the copy live-in value.
+        const LiveInterval &SrcLI = LIS.getInterval(Reg);
+        LiveQueryResult SrcQ = SrcLI.Query(VNI->def);
+        VNI = SrcQ.valueIn();
+        assert(VNI && "Copy from non-existing value");
+        if (VNI->isPHIDef())
+          return false;
+        MI = LIS.getInstructionFromIndex(VNI->def);
+        assert(MI && "Dead valno in interval");
+      }
+    }
+
     if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis()))
       return false;
   }
@@ -188,7 +223,7 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
   // it is a preferred candidate for spilling.
   // FIXME: this gets much more complicated once we support non-trivial
   // re-materialization.
-  if (isRematerializable(li, LIS, *MF.getSubtarget().getInstrInfo()))
+  if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
     totalWeight *= 0.5F;
 
   li.weight = normalize(totalWeight, li.getSize(), numInstr);
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index fb29b1db7a43..23c0d542560e 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,6 +32,7 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
       CallOrPrologue(Unknown) {
   // No stack is used.
   StackOffset = 0;
+  MaxStackArgAlign = 1;
 
   clearByValRegsInfo();
   UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -192,6 +193,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
 void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
                                           MVT VT, CCAssignFn Fn) {
   unsigned SavedStackOffset = StackOffset;
+  unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
   unsigned NumLocs = Locs.size();
 
   // Set the 'inreg' flag if it is used for this calling convention.
@@ -223,6 +225,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
   // as allocated so that future queries don't return the same registers, i.e.
   // when i64 and f64 are both passed in GPRs.
   StackOffset = SavedStackOffset;
+  MaxStackArgAlign = SavedMaxStackArgAlign;
   Locs.resize(NumLocs);
 }
 
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 155c5ecec772..dc13b5b11d30 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeExpandISelPseudosPass(Registry);
   initializeExpandPostRAPass(Registry);
   initializeFinalizeMachineBundlesPass(Registry);
+  initializeFuncletLayoutPass(Registry);
   initializeGCMachineCodeAnalysisPass(Registry);
   initializeGCModuleInfoPass(Registry);
   initializeIfConverterPass(Registry);
@@ -66,6 +67,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeSlotIndexesPass(Registry);
   initializeStackColoringPass(Registry);
   initializeStackMapLivenessPass(Registry);
+  initializeLiveDebugValuesPass(Registry);
   initializeStackProtectorPass(Registry);
   initializeStackSlotColoringPass(Registry);
   initializeTailDuplicatePassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6ab6acc03722..5844124d8565 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -63,6 +64,9 @@ STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
                           "computations were sunk");
 STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
 STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumAndsAdded,
+          "Number of and mask instructions added to form ext loads");
+STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
 STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
 STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
 STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
@@ -109,25 +113,18 @@ static cl::opt<bool> StressExtLdPromotion(
 
 namespace {
 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
-struct TypeIsSExt {
-  Type *Ty;
-  bool IsSExt;
-  TypeIsSExt(Type *Ty, bool IsSExt) : Ty(Ty), IsSExt(IsSExt) {}
-};
+typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
 typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
 class TypePromotionTransaction;
 
   class CodeGenPrepare : public FunctionPass {
-    /// TLI - Keep a pointer of a TargetLowering to consult for determining
-    /// transformation profitability.
     const TargetMachine *TM;
     const TargetLowering *TLI;
     const TargetTransformInfo *TTI;
     const TargetLibraryInfo *TLInfo;
 
-    /// CurInstIterator - As we scan instructions optimizing them, this is the
-    /// next instruction to optimize.  Xforms that can invalidate this should
-    /// update it.
+    /// As we scan instructions optimizing them, this is the next instruction
+    /// to optimize. Transforms that can invalidate this should update it.
     BasicBlock::iterator CurInstIterator;
 
     /// Keeps track of non-local addresses that have been sunk into a block.
@@ -141,10 +138,10 @@ class TypePromotionTransaction;
     /// promotion for the current function.
     InstrToOrigTy PromotedInsts;
 
-    /// ModifiedDT - If CFG is modified in anyway.
+    /// True if CFG is modified in any way.
     bool ModifiedDT;
 
-    /// OptSize - True if optimizing for size.
+    /// True if optimizing for size.
     bool OptSize;
 
     /// DataLayout for the Function being processed.
@@ -167,30 +164,33 @@ class TypePromotionTransaction;
     }
 
   private:
-    bool EliminateFallThrough(Function &F);
-    bool EliminateMostlyEmptyBlocks(Function &F);
-    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
-    void EliminateMostlyEmptyBlock(BasicBlock *BB);
-    bool OptimizeBlock(BasicBlock &BB, bool& ModifiedDT);
-    bool OptimizeInst(Instruction *I, bool& ModifiedDT);
-    bool OptimizeMemoryInst(Instruction *I, Value *Addr,
+    bool eliminateFallThrough(Function &F);
+    bool eliminateMostlyEmptyBlocks(Function &F);
+    bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+    void eliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
+    bool optimizeInst(Instruction *I, bool& ModifiedDT);
+    bool optimizeMemoryInst(Instruction *I, Value *Addr,
                             Type *AccessTy, unsigned AS);
-    bool OptimizeInlineAsmInst(CallInst *CS);
-    bool OptimizeCallInst(CallInst *CI, bool& ModifiedDT);
-    bool MoveExtToFormExtLoad(Instruction *&I);
-    bool OptimizeExtUses(Instruction *I);
-    bool OptimizeSelectInst(SelectInst *SI);
-    bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI);
-    bool OptimizeExtractElementInst(Instruction *Inst);
-    bool DupRetToEnableTailCallOpts(BasicBlock *BB);
-    bool PlaceDbgValues(Function &F);
+    bool optimizeInlineAsmInst(CallInst *CS);
+    bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
+    bool moveExtToFormExtLoad(Instruction *&I);
+    bool optimizeExtUses(Instruction *I);
+    bool optimizeLoadExt(LoadInst *I);
+    bool optimizeSelectInst(SelectInst *SI);
+    bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
+    bool optimizeSwitchInst(SwitchInst *CI);
+    bool optimizeExtractElementInst(Instruction *Inst);
+    bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+    bool placeDbgValues(Function &F);
     bool sinkAndCmp(Function &F);
-    bool ExtLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
+    bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
                         Instruction *&Inst,
                         const SmallVectorImpl<Instruction *> &Exts,
                         unsigned CreatedInstCost);
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
+    void stripInvariantGroupMetadata(Instruction &I);
   };
 }
 
@@ -218,7 +218,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     TLI = TM->getSubtargetImpl(F)->getTargetLowering();
   TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  OptSize = F.hasFnAttribute(Attribute::OptimizeForSize);
+  OptSize = F.optForSize();
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
@@ -231,12 +231,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
 
   // Eliminate blocks that contain only PHI nodes and an
   // unconditional branch.
-  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+  EverMadeChange |= eliminateMostlyEmptyBlocks(F);
 
   // llvm.dbg.value is far away from the value then iSel may not be able
   // handle it properly. iSel will drop llvm.dbg.value if it can not
   // find a node corresponding to the value.
-  EverMadeChange |= PlaceDbgValues(F);
+  EverMadeChange |= placeDbgValues(F);
 
   // If there is a mask, compare against zero, and branch that can be combined
   // into a single target instruction, push the mask and compare into branch
@@ -251,9 +251,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   while (MadeChange) {
     MadeChange = false;
     for (Function::iterator I = F.begin(); I != F.end(); ) {
-      BasicBlock *BB = I++;
+      BasicBlock *BB = &*I++;
       bool ModifiedDTOnIteration = false;
-      MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
+      MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
 
       // Restart BB iteration if the dominator tree of the Function was changed
       if (ModifiedDTOnIteration)
@@ -296,7 +296,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
     // Merge pairs of basic blocks with unconditional branches, connected by
     // a single edge.
     if (EverMadeChange || MadeChange)
-      MadeChange |= EliminateFallThrough(F);
+      MadeChange |= eliminateFallThrough(F);
 
     EverMadeChange |= MadeChange;
   }
@@ -314,14 +314,14 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   return EverMadeChange;
 }
 
-/// EliminateFallThrough - Merge basic blocks which are connected
-/// by a single edge, where one of the basic blocks has a single successor
-/// pointing to the other basic block, which has a single predecessor.
-bool CodeGenPrepare::EliminateFallThrough(Function &F) {
+/// Merge basic blocks which are connected by a single edge, where one of the
+/// basic blocks has a single successor pointing to the other basic block,
+/// which has a single predecessor.
+bool CodeGenPrepare::eliminateFallThrough(Function &F) {
   bool Changed = false;
   // Scan all of the blocks in the function, except for the entry block.
   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
-    BasicBlock *BB = I++;
+    BasicBlock *BB = &*I++;
     // If the destination block has a single pred, then this is a trivial
     // edge, just collapse it.
     BasicBlock *SinglePred = BB->getSinglePredecessor();
@@ -342,22 +342,21 @@ bool CodeGenPrepare::EliminateFallThrough(Function &F) {
         BB->moveBefore(&BB->getParent()->getEntryBlock());
 
       // We have erased a block. Update the iterator.
-      I = BB;
+      I = BB->getIterator();
     }
   }
   return Changed;
 }
 
-/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
-/// debug info directives, and an unconditional branch.  Passes before isel
-/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
-/// isel.  Start by eliminating these blocks so we can split them the way we
-/// want them.
-bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
+/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
+/// edges in ways that are non-optimal for isel. Start by eliminating these
+/// blocks so we can split them the way we want them.
+bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
   bool MadeChange = false;
   // Note that this intentionally skips the entry block.
   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
-    BasicBlock *BB = I++;
+    BasicBlock *BB = &*I++;
 
     // If this block doesn't end with an uncond branch, ignore it.
     BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
@@ -366,7 +365,7 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
 
     // If the instruction before the branch (skipping debug info) isn't a phi
     // node, then other stuff is happening here.
-    BasicBlock::iterator BBI = BI;
+    BasicBlock::iterator BBI = BI->getIterator();
     if (BBI != BB->begin()) {
       --BBI;
       while (isa<DbgInfoIntrinsic>(BBI)) {
@@ -383,19 +382,19 @@ bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
     if (DestBB == BB)
       continue;
 
-    if (!CanMergeBlocks(BB, DestBB))
+    if (!canMergeBlocks(BB, DestBB))
       continue;
 
-    EliminateMostlyEmptyBlock(BB);
+    eliminateMostlyEmptyBlock(BB);
     MadeChange = true;
   }
   return MadeChange;
 }
 
-/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
-/// single uncond branch between them, and BB contains no other non-phi
+/// Return true if we can merge BB into DestBB if there is a single
+/// unconditional branch between them, and BB contains no other non-phi
 /// instructions.
-bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
                                     const BasicBlock *DestBB) const {
   // We only want to eliminate blocks whose phi nodes are used by phi nodes in
   // the successor.  If there are more complex condition (e.g. preheaders),
@@ -461,9 +460,9 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
 }
 
 
-/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
-/// an unconditional branch in it.
-void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+/// Eliminate a basic block that has only phi's and an unconditional branch in
+/// it.
+void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
   BranchInst *BI = cast<BranchInst>(BB->getTerminator());
   BasicBlock *DestBB = BI->getSuccessor(0);
 
@@ -594,6 +593,14 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
       continue;
     }
 
+    if (RelocatedBase->getParent() != ToReplace->getParent()) {
+      // Base and derived relocates are in different basic blocks.
+      // In this case transform is only valid when base dominates derived
+      // relocate. However it would be too expensive to check dominance
+      // for each such relocate, so we skip the whole transformation.
+      continue;
+    }
+
     Value *Base = ThisRelocate.getBasePtr();
     auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.getDerivedPtr());
     if (!Derived || Derived->getPointerOperand() != Base)
@@ -631,21 +638,20 @@ simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
     // In this case, we can not find the bitcast any more. So we insert a new bitcast
     // no matter there is already one or not. In this way, we can handle all cases, and
     // the extra bitcast should be optimized away in later passes.
-    Instruction *ActualRelocatedBase = RelocatedBase;
+    Value *ActualRelocatedBase = RelocatedBase;
     if (RelocatedBase->getType() != Base->getType()) {
       ActualRelocatedBase =
-          cast<Instruction>(Builder.CreateBitCast(RelocatedBase, Base->getType()));
+          Builder.CreateBitCast(RelocatedBase, Base->getType());
     }
     Value *Replacement = Builder.CreateGEP(
         Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
-    Instruction *ReplacementInst = cast<Instruction>(Replacement);
     Replacement->takeName(ToReplace);
     // If the newly generated derived pointer's type does not match the original derived
     // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
-    Instruction *ActualReplacement = ReplacementInst;
-    if (ReplacementInst->getType() != ToReplace->getType()) {
+    Value *ActualReplacement = Replacement;
+    if (Replacement->getType() != ToReplace->getType()) {
       ActualReplacement =
-          cast<Instruction>(Builder.CreateBitCast(ReplacementInst, ToReplace->getType()));
+          Builder.CreateBitCast(Replacement, ToReplace->getType());
     }
     ToReplace->replaceAllUsesWith(ActualReplacement);
     ToReplace->eraseFromParent();
@@ -723,6 +729,12 @@ static bool SinkCast(CastInst *CI) {
     // Preincrement use iterator so we don't invalidate it.
     ++UI;
 
+    // If the block selected to receive the cast is an EH pad that does not
+    // allow non-PHI instructions before the terminator, we can't sink the
+    // cast.
+    if (UserBB->getTerminator()->isEHPad())
+      continue;
+
     // If this user is in the same block as the cast, don't change the cast.
     if (UserBB == DefBB) continue;
 
@@ -731,9 +743,9 @@ static bool SinkCast(CastInst *CI) {
 
     if (!InsertedCast) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
-      InsertedCast =
-        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
-                         InsertPt);
+      assert(InsertPt != UserBB->end());
+      InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
+                                      CI->getType(), "", &*InsertPt);
     }
 
     // Replace a use of the cast with a use of the new cast.
@@ -751,10 +763,9 @@ static bool SinkCast(CastInst *CI) {
   return MadeChange;
 }
 
-/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
-/// sink it into user blocks to reduce the number of virtual
-/// registers that must be created and coalesced.
+/// If the specified cast instruction is a noop copy (e.g. it's casting from
+/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
+/// reduce the number of virtual registers that must be created and coalesced.
 ///
 /// Return true if any changes are made.
 ///
@@ -789,8 +800,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
   return SinkCast(CI);
 }
 
-/// CombineUAddWithOverflow - try to combine CI into a call to the
-/// llvm.uadd.with.overflow intrinsic if possible.
+/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
+/// possible.
 ///
 /// Return true if any changes were made.
 static bool CombineUAddWithOverflow(CmpInst *CI) {
@@ -818,7 +829,7 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
     assert(*AddI->user_begin() == CI && "expected!");
 #endif
 
-  Module *M = CI->getParent()->getParent()->getParent();
+  Module *M = CI->getModule();
   Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
 
   auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
@@ -836,16 +847,16 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
   return true;
 }
 
-/// SinkCmpExpression - Sink the given CmpInst into user blocks to reduce
-/// the number of virtual registers that must be created and coalesced.  This is
-/// a clear win except on targets with multiple condition code registers
-///  (PowerPC), where it might lose; some adjustment may be wanted there.
+/// Sink the given CmpInst into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced. This is a clear win except on
+/// targets with multiple condition code registers (PowerPC), where it might
+/// lose; some adjustment may be wanted there.
 ///
 /// Return true if any changes are made.
 static bool SinkCmpExpression(CmpInst *CI) {
   BasicBlock *DefBB = CI->getParent();
 
-  /// InsertedCmp - Only insert a cmp in each block once.
+  /// Only insert a cmp in each block once.
   DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
 
   bool MadeChange = false;
@@ -872,10 +883,10 @@ static bool SinkCmpExpression(CmpInst *CI) {
 
     if (!InsertedCmp) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      assert(InsertPt != UserBB->end());
       InsertedCmp =
-        CmpInst::Create(CI->getOpcode(),
-                        CI->getPredicate(),  CI->getOperand(0),
-                        CI->getOperand(1), "", InsertPt);
+          CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
+                          CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
     }
 
     // Replace a use of the cmp with a use of the new cmp.
@@ -903,8 +914,8 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
   return false;
 }
 
-/// isExtractBitsCandidateUse - Check if the candidates could
-/// be combined with shift instruction, which includes:
+/// Check if the candidates could be combined with a shift instruction, which
+/// includes:
 /// 1. Truncate instruction
 /// 2. And instruction and the imm is a mask of the low bits:
 /// imm & (imm+1) == 0
@@ -922,8 +933,7 @@ static bool isExtractBitsCandidateUse(Instruction *User) {
   return true;
 }
 
-/// SinkShiftAndTruncate - sink both shift and truncate instruction
-/// to the use of truncate's BB.
+/// Sink both shift and truncate instruction to the use of truncate's BB.
 static bool
 SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
                      DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
@@ -970,20 +980,22 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
 
     if (!InsertedShift && !InsertedTrunc) {
       BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+      assert(InsertPt != TruncUserBB->end());
       // Sink the shift
       if (ShiftI->getOpcode() == Instruction::AShr)
-        InsertedShift =
-            BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+        InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+                                                   "", &*InsertPt);
       else
-        InsertedShift =
-            BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+        InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+                                                   "", &*InsertPt);
 
       // Sink the trunc
       BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
       TruncInsertPt++;
+      assert(TruncInsertPt != TruncUserBB->end());
 
       InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
-                                       TruncI->getType(), "", TruncInsertPt);
+                                       TruncI->getType(), "", &*TruncInsertPt);
 
       MadeChange = true;
 
@@ -993,10 +1005,10 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
   return MadeChange;
 }
 
-/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
-/// the uses could potentially be combined with this shift instruction and
-/// generate BitExtract instruction. It will only be applied if the architecture
-/// supports BitExtract instruction. Here is an example:
+/// Sink the shift *right* instruction into user blocks if the uses could
+/// potentially be combined with this shift instruction and generate BitExtract
+/// instruction. It will only be applied if the architecture supports BitExtract
+/// instruction. Here is an example:
 /// BB1:
 ///   %x.extract.shift = lshr i64 %arg1, 32
 /// BB2:
@@ -1067,13 +1079,14 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
 
     if (!InsertedShift) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+      assert(InsertPt != UserBB->end());
 
       if (ShiftI->getOpcode() == Instruction::AShr)
-        InsertedShift =
-            BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+        InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
+                                                   "", &*InsertPt);
       else
-        InsertedShift =
-            BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+        InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
+                                                   "", &*InsertPt);
 
       MadeChange = true;
     }
@@ -1089,10 +1102,10 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
   return MadeChange;
 }
 
-//  ScalarizeMaskedLoad() translates masked load intrinsic, like 
+// Translate a masked load intrinsic like
 // <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
 //                               <16 x i1> %mask, <16 x i32> %passthru)
-// to a chain of basic blocks, whith loading element one-by-one if
+// to a chain of basic blocks, with loading element one-by-one if
 // the appropriate mask bit is set
 // 
 //  %1 = bitcast i8* %addr to i32*
@@ -1126,35 +1139,68 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
 //
 static void ScalarizeMaskedLoad(CallInst *CI) {
   Value *Ptr  = CI->getArgOperand(0);
-  Value *Src0 = CI->getArgOperand(3);
+  Value *Alignment = CI->getArgOperand(1);
   Value *Mask = CI->getArgOperand(2);
-  VectorType *VecType = dyn_cast<VectorType>(CI->getType());
-  Type *EltTy = VecType->getElementType();
+  Value *Src0 = CI->getArgOperand(3);
 
+  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+  VectorType *VecType = dyn_cast<VectorType>(CI->getType());
   assert(VecType && "Unexpected return type of masked load intrinsic");
 
+  Type *EltTy = CI->getType()->getVectorElementType();
+
   IRBuilder<> Builder(CI->getContext());
   Instruction *InsertPt = CI;
   BasicBlock *IfBlock = CI->getParent();
   BasicBlock *CondBlock = nullptr;
   BasicBlock *PrevIfBlock = CI->getParent();
-  Builder.SetInsertPoint(InsertPt);
 
+  Builder.SetInsertPoint(InsertPt);
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
+  // Short-cut if the mask is all-true.
+  bool IsAllOnesMask = isa<Constant>(Mask) &&
+    cast<Constant>(Mask)->isAllOnesValue();
+
+  if (IsAllOnesMask) {
+    Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+    CI->replaceAllUsesWith(NewI);
+    CI->eraseFromParent();
+    return;
+  }
+
+  // Adjust alignment for the scalar instruction.
+  AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8);
   // Bitcast %addr fron i8* to EltTy*
   Type *NewPtrType =
     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
+  unsigned VectorWidth = VecType->getNumElements();
+
   Value *UndefVal = UndefValue::get(VecType);
 
   // The result vector
   Value *VResult = UndefVal;
 
+  if (isa<ConstantVector>(Mask)) {
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+          continue;
+      Value *Gep =
+          Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+      LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal);
+      VResult = Builder.CreateInsertElement(VResult, Load,
+                                            Builder.getInt32(Idx));
+    }
+    Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+    CI->replaceAllUsesWith(NewI);
+    CI->eraseFromParent();
+    return;
+  }
+
   PHINode *Phi = nullptr;
   Value *PrevPhi = UndefVal;
 
-  unsigned VectorWidth = VecType->getNumElements();
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
 
     // Fill the "else" block, created in the previous iteration
@@ -1182,16 +1228,17 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
     //  %Elt = load i32* %EltAddr
     //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
     //
-    CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+    CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load");
     Builder.SetInsertPoint(InsertPt);
 
     Value *Gep =
         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
-    LoadInst* Load = Builder.CreateLoad(Gep, false);
+    LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
     VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
 
     // Create "else" block, fill it in the next iteration
-    BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+    BasicBlock *NewIfBlock =
+        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
     Builder.SetInsertPoint(InsertPt);
     Instruction *OldBr = IfBlock->getTerminator();
     BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
@@ -1208,7 +1255,7 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
   CI->eraseFromParent();
 }
 
-//  ScalarizeMaskedStore() translates masked store intrinsic, like
+// Translate a masked store intrinsic, like
 // void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
 //                               <16 x i1> %mask)
 // to a chain of basic blocks, that stores element one-by-one if
@@ -1237,34 +1284,61 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
 //   br label %else2
 //   . . .
 static void ScalarizeMaskedStore(CallInst *CI) {
-  Value *Ptr  = CI->getArgOperand(1);
   Value *Src = CI->getArgOperand(0);
+  Value *Ptr  = CI->getArgOperand(1);
+  Value *Alignment = CI->getArgOperand(2);
   Value *Mask = CI->getArgOperand(3);
 
+  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
   VectorType *VecType = dyn_cast<VectorType>(Src->getType());
-  Type *EltTy = VecType->getElementType();
-
   assert(VecType && "Unexpected data type in masked store intrinsic");
 
+  Type *EltTy = VecType->getElementType();
+
   IRBuilder<> Builder(CI->getContext());
   Instruction *InsertPt = CI;
   BasicBlock *IfBlock = CI->getParent();
   Builder.SetInsertPoint(InsertPt);
   Builder.SetCurrentDebugLocation(CI->getDebugLoc());
 
+  // Short-cut if the mask is all-true.
+  bool IsAllOnesMask = isa<Constant>(Mask) &&
+    cast<Constant>(Mask)->isAllOnesValue();
+
+  if (IsAllOnesMask) {
+    Builder.CreateAlignedStore(Src, Ptr, AlignVal);
+    CI->eraseFromParent();
+    return;
+  }
+
+  // Adjust alignment for the scalar instruction.
+  AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8);
   // Bitcast %addr fron i8* to EltTy*
   Type *NewPtrType =
     EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
   Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
-
   unsigned VectorWidth = VecType->getNumElements();
+
+  if (isa<ConstantVector>(Mask)) {
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+          continue;
+      Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
+      Value *Gep =
+          Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+      Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+    }
+    CI->eraseFromParent();
+    return;
+  }
+
   for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
 
     // Fill the "else" block, created in the previous iteration
     //
     //  %mask_1 = extractelement <16 x i1> %mask, i32 Idx
     //  %to_store = icmp eq i1 %mask_1, true
-    //  br i1 %to_load, label %cond.store, label %else
+    //  br i1 %to_store, label %cond.store, label %else
     //
     Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
@@ -1276,13 +1350,259 @@ static void ScalarizeMaskedStore(CallInst *CI) {
     //  %EltAddr = getelementptr i32* %1, i32 0
     //  %store i32 %OneElt, i32* %EltAddr
     //
-    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+    BasicBlock *CondBlock =
+        IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
     Builder.SetInsertPoint(InsertPt);
-    
+
     Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
     Value *Gep =
         Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
-    Builder.CreateStore(OneElt, Gep);
+    Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
+
+    // Create "else" block, fill it in the next iteration
+    BasicBlock *NewIfBlock =
+        CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+    Builder.SetInsertPoint(InsertPt);
+    Instruction *OldBr = IfBlock->getTerminator();
+    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    OldBr->eraseFromParent();
+    IfBlock = NewIfBlock;
+  }
+  CI->eraseFromParent();
+}
+
+// Translate a masked gather intrinsic like
+// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
+//                               <16 x i1> %Mask, <16 x i32> %Src)
+// to a chain of basic blocks, with loading element one-by-one if
+// the appropriate mask bit is set
+// 
+// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> %Mask, i32 0
+// % ToLoad0 = icmp eq i1 % Mask0, true
+// br i1 % ToLoad0, label %cond.load, label %else
+// 
+// cond.load:
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// % Load0 = load i32, i32* % Ptr0, align 4
+// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
+// br label %else
+// 
+// else:
+// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
+// % Mask1 = extractelement <16 x i1> %Mask, i32 1
+// % ToLoad1 = icmp eq i1 % Mask1, true
+// br i1 % ToLoad1, label %cond.load1, label %else2
+// 
+// cond.load1:
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// % Load1 = load i32, i32* % Ptr1, align 4
+// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1
+// br label %else2
+// . . .
+// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
+// ret <16 x i32> %Result
+static void ScalarizeMaskedGather(CallInst *CI) {
+  Value *Ptrs = CI->getArgOperand(0);
+  Value *Alignment = CI->getArgOperand(1);
+  Value *Mask = CI->getArgOperand(2);
+  Value *Src0 = CI->getArgOperand(3);
+
+  VectorType *VecType = dyn_cast<VectorType>(CI->getType());
+
+  assert(VecType && "Unexpected return type of masked load intrinsic");
+
+  IRBuilder<> Builder(CI->getContext());
+  Instruction *InsertPt = CI;
+  BasicBlock *IfBlock = CI->getParent();
+  BasicBlock *CondBlock = nullptr;
+  BasicBlock *PrevIfBlock = CI->getParent();
+  Builder.SetInsertPoint(InsertPt);
+  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+
+  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+  Value *UndefVal = UndefValue::get(VecType);
+
+  // The result vector
+  Value *VResult = UndefVal;
+  unsigned VectorWidth = VecType->getNumElements();
+
+  // Shorten the way if the mask is a vector of constants.
+  bool IsConstMask = isa<ConstantVector>(Mask);
+
+  if (IsConstMask) {
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+        continue;
+      Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+                                                "Ptr" + Twine(Idx));
+      LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+                                                 "Load" + Twine(Idx));
+      VResult = Builder.CreateInsertElement(VResult, Load,
+                                            Builder.getInt32(Idx),
+                                            "Res" + Twine(Idx));
+    }
+    Value *NewI = Builder.CreateSelect(Mask, VResult, Src0);
+    CI->replaceAllUsesWith(NewI);
+    CI->eraseFromParent();
+    return;
+  }
+
+  PHINode *Phi = nullptr;
+  Value *PrevPhi = UndefVal;
+
+  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+
+    // Fill the "else" block, created in the previous iteration
+    //
+    //  %Mask1 = extractelement <16 x i1> %Mask, i32 1
+    //  %ToLoad1 = icmp eq i1 %Mask1, true
+    //  br i1 %ToLoad1, label %cond.load, label %else
+    //
+    if (Idx > 0) {
+      Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+      Phi->addIncoming(VResult, CondBlock);
+      Phi->addIncoming(PrevPhi, PrevIfBlock);
+      PrevPhi = Phi;
+      VResult = Phi;
+    }
+
+    Value *Predicate = Builder.CreateExtractElement(Mask,
+                                                    Builder.getInt32(Idx),
+                                                    "Mask" + Twine(Idx));
+    Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+                                    ConstantInt::get(Predicate->getType(), 1),
+                                    "ToLoad" + Twine(Idx));
+
+    // Create "cond" block
+    //
+    //  %EltAddr = getelementptr i32* %1, i32 0
+    //  %Elt = load i32* %EltAddr
+    //  VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+    //
+    CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
+    Builder.SetInsertPoint(InsertPt);
+
+    Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+                                              "Ptr" + Twine(Idx));
+    LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal,
+                                               "Load" + Twine(Idx));
+    VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx),
+                                          "Res" + Twine(Idx));
+
+    // Create "else" block, fill it in the next iteration
+    BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
+    Builder.SetInsertPoint(InsertPt);
+    Instruction *OldBr = IfBlock->getTerminator();
+    BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr);
+    OldBr->eraseFromParent();
+    PrevIfBlock = IfBlock;
+    IfBlock = NewIfBlock;
+  }
+
+  Phi = Builder.CreatePHI(VecType, 2, "res.phi.select");
+  Phi->addIncoming(VResult, CondBlock);
+  Phi->addIncoming(PrevPhi, PrevIfBlock);
+  Value *NewI = Builder.CreateSelect(Mask, Phi, Src0);
+  CI->replaceAllUsesWith(NewI);
+  CI->eraseFromParent();
+}
+
+// Translate a masked scatter intrinsic, like
+// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
+//                                  <16 x i1> %Mask)
+// to a chain of basic blocks, that stores element one-by-one if
+// the appropriate mask bit is set.
+//
+// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
+// % Mask0 = extractelement <16 x i1> % Mask, i32 0
+// % ToStore0 = icmp eq i1 % Mask0, true
+// br i1 %ToStore0, label %cond.store, label %else
+//
+// cond.store:
+// % Elt0 = extractelement <16 x i32> %Src, i32 0
+// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
+// store i32 %Elt0, i32* % Ptr0, align 4
+// br label %else
+// 
+// else:
+// % Mask1 = extractelement <16 x i1> % Mask, i32 1
+// % ToStore1 = icmp eq i1 % Mask1, true
+// br i1 % ToStore1, label %cond.store1, label %else2
+//
+// cond.store1:
+// % Elt1 = extractelement <16 x i32> %Src, i32 1
+// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+// store i32 % Elt1, i32* % Ptr1, align 4
+// br label %else2
+//   . . .
+static void ScalarizeMaskedScatter(CallInst *CI) {
+  Value *Src = CI->getArgOperand(0);
+  Value *Ptrs = CI->getArgOperand(1);
+  Value *Alignment = CI->getArgOperand(2);
+  Value *Mask = CI->getArgOperand(3);
+
+  assert(isa<VectorType>(Src->getType()) &&
+         "Unexpected data type in masked scatter intrinsic");
+  assert(isa<VectorType>(Ptrs->getType()) &&
+         isa<PointerType>(Ptrs->getType()->getVectorElementType()) &&
+         "Vector of pointers is expected in masked scatter intrinsic");
+
+  IRBuilder<> Builder(CI->getContext());
+  Instruction *InsertPt = CI;
+  BasicBlock *IfBlock = CI->getParent();
+  Builder.SetInsertPoint(InsertPt);
+  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+  unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue();
+  unsigned VectorWidth = Src->getType()->getVectorNumElements();
+
+  // Shorten the way if the mask is a vector of constants.
+  bool IsConstMask = isa<ConstantVector>(Mask);
+
+  if (IsConstMask) {
+    for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+      if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue())
+        continue;
+      Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+                                                   "Elt" + Twine(Idx));
+      Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+                                                "Ptr" + Twine(Idx));
+      Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
+    }
+    CI->eraseFromParent();
+    return;
+  }
+  for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+    // Fill the "else" block, created in the previous iteration
+    //
+    //  % Mask1 = extractelement <16 x i1> % Mask, i32 Idx
+    //  % ToStore = icmp eq i1 % Mask1, true
+    //  br i1 % ToStore, label %cond.store, label %else
+    //
+    Value *Predicate = Builder.CreateExtractElement(Mask,
+                                                    Builder.getInt32(Idx),
+                                                    "Mask" + Twine(Idx));
+    Value *Cmp =
+       Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate,
+                          ConstantInt::get(Predicate->getType(), 1),
+                          "ToStore" + Twine(Idx));
+
+    // Create "cond" block
+    //
+    //  % Elt1 = extractelement <16 x i32> %Src, i32 1
+    //  % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
+    //  %store i32 % Elt1, i32* % Ptr1
+    //
+    BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
+    Builder.SetInsertPoint(InsertPt);
+
+    Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
+                                                 "Elt" + Twine(Idx));
+    Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
+                                              "Ptr" + Twine(Idx));
+    Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
 
     // Create "else" block, fill it in the next iteration
     BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -1295,7 +1615,86 @@ static void ScalarizeMaskedStore(CallInst *CI) {
   CI->eraseFromParent();
 }
 
-bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
+/// If counting leading or trailing zeros is an expensive operation and a zero
+/// input is defined, add a check for zero to avoid calling the intrinsic.
+///
+/// We want to transform:
+///     %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+///
+/// into:
+///   entry:
+///     %cmpz = icmp eq i64 %A, 0
+///     br i1 %cmpz, label %cond.end, label %cond.false
+///   cond.false:
+///     %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+///     br label %cond.end
+///   cond.end:
+///     %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+///
+/// If the transform is performed, return true and set ModifiedDT to true.
+static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+                                  const TargetLowering *TLI,
+                                  const DataLayout *DL,
+                                  bool &ModifiedDT) {
+  if (!TLI || !DL)
+    return false;
+
+  // If a zero input is undefined, it doesn't make sense to despeculate that.
+  if (match(CountZeros->getOperand(1), m_One()))
+    return false;
+
+  // If it's cheap to speculate, there's nothing to do.
+  auto IntrinsicID = CountZeros->getIntrinsicID();
+  if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
+      (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
+    return false;
+
+  // Only handle legal scalar cases. Anything else requires too much work.
+  Type *Ty = CountZeros->getType();
+  unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
+  if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize())
+    return false;
+
+  // The intrinsic will be sunk behind a compare against zero and branch.
+  BasicBlock *StartBlock = CountZeros->getParent();
+  BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
+
+  // Create another block after the count zero intrinsic. A PHI will be added
+  // in this block to select the result of the intrinsic or the bit-width
+  // constant if the input to the intrinsic is zero.
+  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
+  BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
+
+  // Set up a builder to create a compare, conditional branch, and PHI.
+  IRBuilder<> Builder(CountZeros->getContext());
+  Builder.SetInsertPoint(StartBlock->getTerminator());
+  Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
+
+  // Replace the unconditional branch that was created by the first split with
+  // a compare against zero and a conditional branch.
+  Value *Zero = Constant::getNullValue(Ty);
+  Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
+  Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
+  StartBlock->getTerminator()->eraseFromParent();
+
+  // Create a PHI in the end block to select either the output of the intrinsic
+  // or the bit width of the operand.
+  Builder.SetInsertPoint(&EndBlock->front());
+  PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
+  CountZeros->replaceAllUsesWith(PN);
+  Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
+  PN->addIncoming(BitWidth, StartBlock);
+  PN->addIncoming(CountZeros, CallBlock);
+
+  // We are explicitly handling the zero case, so we can set the intrinsic's
+  // undefined zero argument to 'true'. This will also prevent reprocessing the
+  // intrinsic; we only despeculate when a zero input is defined.
+  CountZeros->setArgOperand(1, Builder.getTrue());
+  ModifiedDT = true;
+  return true;
+}
+
+bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   BasicBlock *BB = CI->getParent();
 
   // Lower inline assembly if we can.
@@ -1311,7 +1710,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       return true;
     }
     // Sink address computing for memory operands into the block.
-    if (OptimizeInlineAsmInst(CI))
+    if (optimizeInlineAsmInst(CI))
       return true;
   }
 
@@ -1372,14 +1771,14 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       // Substituting this can cause recursive simplifications, which can
       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
       // happens.
-      WeakVH IterHandle(CurInstIterator);
+      WeakVH IterHandle(&*CurInstIterator);
 
       replaceAndRecursivelySimplify(CI, RetVal,
                                     TLInfo, nullptr);
 
       // If the iterator instruction was recursively deleted, start over at the
       // start of the block.
-      if (IterHandle != CurInstIterator) {
+      if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
         CurInstIterator = BB->begin();
         SunkAddrs.clear();
       }
@@ -1387,7 +1786,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
     }
     case Intrinsic::masked_load: {
       // Scalarize unsupported vector masked load
-      if (!TTI->isLegalMaskedLoad(CI->getType(), 1)) {
+      if (!TTI->isLegalMaskedLoad(CI->getType())) {
         ScalarizeMaskedLoad(CI);
         ModifiedDT = true;
         return true;
@@ -1395,13 +1794,29 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       return false;
     }
     case Intrinsic::masked_store: {
-      if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(), 1)) {
+      if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
         ScalarizeMaskedStore(CI);
         ModifiedDT = true;
         return true;
       }
       return false;
     }
+    case Intrinsic::masked_gather: {
+      if (!TTI->isLegalMaskedGather(CI->getType())) {
+        ScalarizeMaskedGather(CI);
+        ModifiedDT = true;
+        return true;
+      }
+      return false;
+    }
+    case Intrinsic::masked_scatter: {
+      if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
+        ScalarizeMaskedScatter(CI);
+        ModifiedDT = true;
+        return true;
+      }
+      return false;
+    }
     case Intrinsic::aarch64_stlxr:
     case Intrinsic::aarch64_stxr: {
       ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -1415,6 +1830,15 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       InsertedInsts.insert(ExtVal);
       return true;
     }
+    case Intrinsic::invariant_group_barrier:
+      II->replaceAllUsesWith(II->getArgOperand(0));
+      II->eraseFromParent();
+      return true;
+
+    case Intrinsic::cttz:
+    case Intrinsic::ctlz:
+      // If counting zeros is expensive, try to avoid it.
+      return despeculateCountZeros(II, TLI, DL, ModifiedDT);
     }
 
     if (TLI) {
@@ -1426,7 +1850,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
       Type *AccessTy;
       if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
         while (!PtrOps.empty())
-          if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+          if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
             return true;
     }
   }
@@ -1447,9 +1871,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
   return false;
 }
 
-/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
-/// instructions to the predecessor to enable tail call optimizations. The
-/// case it is currently looking for is:
+/// Look for opportunities to duplicate return instructions to the predecessor
+/// to enable tail call optimizations. The case it is currently looking for is:
 /// @code
 /// bb0:
 ///   %tmp0 = tail call i32 @f0()
@@ -1478,7 +1901,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI, bool& ModifiedDT) {
 ///   %tmp2 = tail call i32 @f2()
 ///   ret i32 %tmp2
 /// @endcode
-bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (!TLI)
     return false;
 
@@ -1597,7 +2020,7 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
 
 namespace {
 
-/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// This is an extended version of TargetLowering::AddrMode
 /// which holds actual Value*'s for register values.
 struct ExtAddrMode : public TargetLowering::AddrMode {
   Value *BaseReg;
@@ -1709,10 +2132,10 @@ class TypePromotionTransaction {
   public:
     /// \brief Record the position of \p Inst.
     InsertionHandler(Instruction *Inst) {
-      BasicBlock::iterator It = Inst;
+      BasicBlock::iterator It = Inst->getIterator();
       HasPrevInstruction = (It != (Inst->getParent()->begin()));
       if (HasPrevInstruction)
-        Point.PrevInst = --It;
+        Point.PrevInst = &*--It;
       else
         Point.BB = Inst->getParent();
     }
@@ -1724,7 +2147,7 @@ class TypePromotionTransaction {
           Inst->removeFromParent();
         Inst->insertAfter(Point.PrevInst);
       } else {
-        Instruction *Position = Point.BB->getFirstInsertionPt();
+        Instruction *Position = &*Point.BB->getFirstInsertionPt();
         if (Inst->getParent())
           Inst->moveBefore(Position);
         else
@@ -1797,7 +2220,7 @@ class TypePromotionTransaction {
         Value *Val = Inst->getOperand(It);
         OriginalValues.push_back(Val);
         // Set a dummy one.
-        // We could use OperandSetter here, but that would implied an overhead
+        // We could use OperandSetter here, but that would imply an overhead
         // that we are not willing to pay.
         Inst->setOperand(It, UndefValue::get(Val->getType()));
       }
@@ -2111,7 +2534,7 @@ class AddressingModeMatcher {
   unsigned AddrSpace;
   Instruction *MemoryInst;
 
-  /// AddrMode - This is the addressing mode that we're building up.  This is
+  /// This is the addressing mode that we're building up. This is
   /// part of the return value of this addressing mode matching stuff.
   ExtAddrMode &AddrMode;
 
@@ -2122,9 +2545,8 @@ class AddressingModeMatcher {
   /// The ongoing transaction where every action should be registered.
   TypePromotionTransaction &TPT;
 
-  /// IgnoreProfitability - This is set to true when we should not do
-  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
-  /// always returns true.
+  /// This is set to true when we should not do profitability checks.
+  /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
   bool IgnoreProfitability;
 
   AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
@@ -2143,7 +2565,7 @@ class AddressingModeMatcher {
   }
 public:
 
-  /// Match - Find the maximal addressing mode that a load/store of V can fold,
+  /// Find the maximal addressing mode that a load/store of V can fold,
   /// give an access type of AccessTy.  This returns a list of involved
   /// instructions in AddrModeInsts.
   /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
@@ -2161,32 +2583,32 @@ public:
 
     bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
                                          MemoryInst, Result, InsertedInsts,
-                                         PromotedInsts, TPT).MatchAddr(V, 0);
+                                         PromotedInsts, TPT).matchAddr(V, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
     return Result;
   }
 private:
-  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
-  bool MatchAddr(Value *V, unsigned Depth);
-  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
+  bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+  bool matchAddr(Value *V, unsigned Depth);
+  bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
                           bool *MovedAway = nullptr);
-  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+  bool isProfitableToFoldIntoAddressingMode(Instruction *I,
                                             ExtAddrMode &AMBefore,
                                             ExtAddrMode &AMAfter);
-  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
-  bool IsPromotionProfitable(unsigned NewCost, unsigned OldCost,
+  bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+  bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
                              Value *PromotedOperand) const;
 };
 
-/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Try adding ScaleReg*Scale to the current addressing mode.
 /// Return true and update AddrMode if this addr mode is legal for the target,
 /// false if not.
-bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
                                              unsigned Depth) {
   // If Scale is 1, then this is the same as adding ScaleReg to the addressing
   // mode.  Just process that directly.
   if (Scale == 1)
-    return MatchAddr(ScaleReg, Depth);
+    return matchAddr(ScaleReg, Depth);
 
   // If the scale is 0, it takes nothing to add this.
   if (Scale == 0)
@@ -2233,9 +2655,9 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
   return true;
 }
 
-/// MightBeFoldableInst - This is a little filter, which returns true if an
-/// addressing computation involving I might be folded into a load/store
-/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// This is a little filter, which returns true if an addressing computation
+/// involving I might be folded into a load/store accessing it.
+/// This doesn't need to be perfect, but needs to accept at least
 /// the set of instructions that MatchOperationAddr can.
 static bool MightBeFoldableInst(Instruction *I) {
   switch (I->getOpcode()) {
@@ -2301,9 +2723,7 @@ class TypePromotionHelper {
   /// \brief Utility function to determine if \p OpIdx should be promoted when
   /// promoting \p Inst.
   static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
-    if (isa<SelectInst>(Inst) && OpIdx == 0)
-      return false;
-    return true;
+    return !(isa<SelectInst>(Inst) && OpIdx == 0);
   }
 
   /// \brief Utility function to promote the operand of \p Ext when this
@@ -2413,8 +2833,7 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
 
   Value *OpndVal = Inst->getOperand(0);
   // Check if we can use this operand in the extension.
-  // If the type is larger than the result type of the extension,
-  // we cannot.
+  // If the type is larger than the result type of the extension, we cannot.
   if (!OpndVal->getType()->isIntegerTy() ||
       OpndVal->getType()->getIntegerBitWidth() >
           ConsideredExtType->getIntegerBitWidth())
@@ -2433,18 +2852,16 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
   // #1 get the type of the operand and check the kind of the extended bits.
   const Type *OpndType;
   InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
-  if (It != PromotedInsts.end() && It->second.IsSExt == IsSExt)
-    OpndType = It->second.Ty;
+  if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
+    OpndType = It->second.getPointer();
   else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
     OpndType = Opnd->getOperand(0)->getType();
   else
     return false;
 
-  // #2 check that the truncate just drop extended bits.
-  if (Inst->getType()->getIntegerBitWidth() >= OpndType->getIntegerBitWidth())
-    return true;
-
-  return false;
+  // #2 check that the truncate just drops extended bits.
+  return Inst->getType()->getIntegerBitWidth() >=
+         OpndType->getIntegerBitWidth();
 }
 
 TypePromotionHelper::Action TypePromotionHelper::getAction(
@@ -2553,7 +2970,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
     }
 
     TPT.replaceAllUsesWith(ExtOpnd, Trunc);
-    // Restore the operand of Ext (which has been replace by the previous call
+    // Restore the operand of Ext (which has been replaced by the previous call
     // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
     TPT.setOperand(Ext, 0, ExtOpnd);
   }
@@ -2631,8 +3048,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
   return ExtOpnd;
 }
 
-/// IsPromotionProfitable - Check whether or not promoting an instruction
-/// to a wider type was profitable.
+/// Check whether or not promoting an instruction to a wider type is profitable.
 /// \p NewCost gives the cost of extension instructions created by the
 /// promotion.
 /// \p OldCost gives the cost of extension instructions before the promotion
@@ -2640,7 +3056,7 @@ Value *TypePromotionHelper::promoteOperandForOther(
 /// matched in the addressing mode the promotion.
 /// \p PromotedOperand is the value that has been promoted.
 /// \return True if the promotion is profitable, false otherwise.
-bool AddressingModeMatcher::IsPromotionProfitable(
+bool AddressingModeMatcher::isPromotionProfitable(
     unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
   DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n');
   // The cost of the new extensions is greater than the cost of the
@@ -2656,9 +3072,9 @@ bool AddressingModeMatcher::IsPromotionProfitable(
   return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
 }
 
-/// MatchOperationAddr - Given an instruction or constant expr, see if we can
-/// fold the operation into the addressing mode.  If so, update the addressing
-/// mode and return true, otherwise return false without modifying AddrMode.
+/// Given an instruction or constant expr, see if we can fold the operation
+/// into the addressing mode. If so, update the addressing mode and return
+/// true, otherwise return false without modifying AddrMode.
 /// If \p MovedAway is not NULL, it contains the information of whether or
 /// not AddrInst has to be folded into the addressing mode on success.
 /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
@@ -2667,7 +3083,7 @@ bool AddressingModeMatcher::IsPromotionProfitable(
 /// This state can happen when AddrInst is a sext, since it may be moved away.
 /// Therefore, AddrInst may not be valid when MovedAway is true and it must
 /// not be referenced anymore.
-bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
                                                unsigned Depth,
                                                bool *MovedAway) {
   // Avoid exponential behavior on extremely deep expression trees.
@@ -2680,13 +3096,13 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   switch (Opcode) {
   case Instruction::PtrToInt:
     // PtrToInt is always a noop, as we know that the int type is pointer sized.
-    return MatchAddr(AddrInst->getOperand(0), Depth);
+    return matchAddr(AddrInst->getOperand(0), Depth);
   case Instruction::IntToPtr: {
     auto AS = AddrInst->getType()->getPointerAddressSpace();
     auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
     // This inttoptr is a no-op if the integer type is pointer sized.
     if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
-      return MatchAddr(AddrInst->getOperand(0), Depth);
+      return matchAddr(AddrInst->getOperand(0), Depth);
     return false;
   }
   case Instruction::BitCast:
@@ -2698,14 +3114,14 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
         // and we don't want to mess around with them.  Assume it knows what it
         // is doing.
         AddrInst->getOperand(0)->getType() != AddrInst->getType())
-      return MatchAddr(AddrInst->getOperand(0), Depth);
+      return matchAddr(AddrInst->getOperand(0), Depth);
     return false;
   case Instruction::AddrSpaceCast: {
     unsigned SrcAS
       = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
     unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
     if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
-      return MatchAddr(AddrInst->getOperand(0), Depth);
+      return matchAddr(AddrInst->getOperand(0), Depth);
     return false;
   }
   case Instruction::Add: {
@@ -2719,8 +3135,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     TypePromotionTransaction::ConstRestorationPt LastKnownGood =
         TPT.getRestorationPoint();
 
-    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(0), Depth+1))
+    if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
+        matchAddr(AddrInst->getOperand(0), Depth+1))
       return true;
 
     // Restore the old addr mode info.
@@ -2729,8 +3145,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     TPT.rollback(LastKnownGood);
 
     // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
-    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
-        MatchAddr(AddrInst->getOperand(1), Depth+1))
+    if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
+        matchAddr(AddrInst->getOperand(1), Depth+1))
       return true;
 
     // Otherwise we definitely can't merge the ADD in.
@@ -2752,7 +3168,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     if (Opcode == Instruction::Shl)
       Scale = 1LL << Scale;
 
-    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+    return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
   }
   case Instruction::GetElementPtr: {
     // Scan the GEP.  We check it if it contains constant offsets and at most
@@ -2791,7 +3207,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
       if (ConstantOffset == 0 ||
           TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
         // Check to see if we can fold the base pointer in too.
-        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+        if (matchAddr(AddrInst->getOperand(0), Depth+1))
           return true;
       }
       AddrMode.BaseOffs -= ConstantOffset;
@@ -2806,7 +3222,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     AddrMode.BaseOffs += ConstantOffset;
 
     // Match the base operand of the GEP.
-    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+    if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
       // If it couldn't be matched, just stuff the value in a register.
       if (AddrMode.HasBaseReg) {
         AddrMode = BackupAddrMode;
@@ -2818,7 +3234,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     }
 
     // Match the remaining variable portion of the GEP.
-    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+    if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
                           Depth)) {
       // If it couldn't be matched, try stuffing the base into a register
       // instead of matching it, and retrying the match of the scale.
@@ -2829,7 +3245,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
       AddrMode.HasBaseReg = true;
       AddrMode.BaseReg = AddrInst->getOperand(0);
       AddrMode.BaseOffs += ConstantOffset;
-      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+      if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
                             VariableScale, Depth)) {
         // If even that didn't work, bail.
         AddrMode = BackupAddrMode;
@@ -2879,12 +3295,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
     ExtAddrMode BackupAddrMode = AddrMode;
     unsigned OldSize = AddrModeInsts.size();
 
-    if (!MatchAddr(PromotedOperand, Depth) ||
-        // The total of the new cost is equals to the cost of the created
+    if (!matchAddr(PromotedOperand, Depth) ||
+        // The total of the new cost is equal to the cost of the created
         // instructions.
-        // The total of the old cost is equals to the cost of the extension plus
+        // The total of the old cost is equal to the cost of the extension plus
         // what we have saved in the addressing mode.
-        !IsPromotionProfitable(CreatedInstsCost,
+        !isPromotionProfitable(CreatedInstsCost,
                                ExtCost + (AddrModeInsts.size() - OldSize),
                                PromotedOperand)) {
       AddrMode = BackupAddrMode;
@@ -2899,12 +3315,12 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
   return false;
 }
 
-/// MatchAddr - If we can, try to add the value of 'Addr' into the current
-/// addressing mode.  If Addr can't be added to AddrMode this returns false and
-/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
-/// or intptr_t for the target.
+/// If we can, try to add the value of 'Addr' into the current addressing mode.
+/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
+/// unmodified. This assumes that Addr is either a pointer type or intptr_t
+/// for the target.
 ///
-bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
   // Start a transaction at this point that we will rollback if the matching
   // fails.
   TypePromotionTransaction::ConstRestorationPt LastKnownGood =
@@ -2929,8 +3345,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
 
     // Check to see if it is possible to fold this operation.
     bool MovedAway = false;
-    if (MatchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
-      // This instruction may have been move away. If so, there is nothing
+    if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
+      // This instruction may have been moved away. If so, there is nothing
       // to check here.
       if (MovedAway)
         return true;
@@ -2938,7 +3354,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
       // *profitable* to do so.  We use a simple cost model to avoid increasing
       // register pressure too much.
       if (I->hasOneUse() ||
-          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+          isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
         AddrModeInsts.push_back(I);
         return true;
       }
@@ -2950,7 +3366,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
       TPT.rollback(LastKnownGood);
     }
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
-    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+    if (matchOperationAddr(CE, CE->getOpcode(), Depth))
       return true;
     TPT.rollback(LastKnownGood);
   } else if (isa<ConstantPointerNull>(Addr)) {
@@ -2983,9 +3399,8 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
   return false;
 }
 
-/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
-/// inline asm call are due to memory operands.  If so, return true, otherwise
-/// return false.
+/// Check to see if all uses of OpVal by the specified inline asm call are due
+/// to memory operands. If so, return true, otherwise return false.
 static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
                                     const TargetMachine &TM) {
   const Function *F = CI->getParent()->getParent();
@@ -3011,8 +3426,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
   return true;
 }
 
-/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
-/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Recursively walk all the uses of I until we find a memory use.
+/// If we find an obviously non-foldable instruction, return true.
 /// Add the ultimately found memory instructions to MemoryUses.
 static bool FindAllMemoryUses(
     Instruction *I,
@@ -3059,11 +3474,11 @@ static bool FindAllMemoryUses(
   return false;
 }
 
-/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
-/// the use site that we're folding it into.  If so, there is no cost to
-/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
-/// that we know are live at the instruction already.
-bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+/// Return true if Val is already known to be live at the use site that we're
+/// folding it into. If so, there is no cost to include it in the addressing
+/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
+/// instruction already.
+bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
                                                    Value *KnownLive2) {
   // If Val is either of the known-live values, we know it is live!
   if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
@@ -3085,11 +3500,11 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
   return Val->isUsedInBasicBlock(MemoryInst->getParent());
 }
 
-/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
-/// mode of the machine to fold the specified instruction into a load or store
-/// that ultimately uses it.  However, the specified instruction has multiple
-/// uses.  Given this, it may actually increase register pressure to fold it
-/// into the load.  For example, consider this code:
+/// It is possible for the addressing mode of the machine to fold the specified
+/// instruction into a load or store that ultimately uses it.
+/// However, the specified instruction has multiple uses.
+/// Given this, it may actually increase register pressure to fold it
+/// into the load. For example, consider this code:
 ///
 ///     X = ...
 ///     Y = X+1
@@ -3107,7 +3522,7 @@ bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
 /// X was live across 'load Z' for other reasons, we actually *would* want to
 /// fold the addressing mode in the Z case.  This would make Y die earlier.
 bool AddressingModeMatcher::
-IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
                                      ExtAddrMode &AMAfter) {
   if (IgnoreProfitability) return true;
 
@@ -3124,9 +3539,9 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
 
   // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
   // lifetime wasn't extended by adding this instruction.
-  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+  if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
     BaseReg = nullptr;
-  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+  if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
     ScaledReg = nullptr;
 
   // If folding this instruction (and it's subexprs) didn't extend any live
@@ -3171,7 +3586,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
                                   MemoryInst, Result, InsertedInsts,
                                   PromotedInsts, TPT);
     Matcher.IgnoreProfitability = true;
-    bool Success = Matcher.MatchAddr(Address, 0);
+    bool Success = Matcher.matchAddr(Address, 0);
     (void)Success; assert(Success && "Couldn't select *anything*?");
 
     // The match was to check the profitability, the changes made are not
@@ -3192,7 +3607,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
 
 } // end anonymous namespace
 
-/// IsNonLocalValue - Return true if the specified values are defined in a
+/// Return true if the specified values are defined in a
 /// different basic block than BB.
 static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   if (Instruction *I = dyn_cast<Instruction>(V))
@@ -3200,16 +3615,15 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   return false;
 }
 
-/// OptimizeMemoryInst - Load and Store Instructions often have
-/// addressing modes that can do significant amounts of computation.  As such,
-/// instruction selection will try to get the load or store to do as much
-/// computation as possible for the program.  The problem is that isel can only
-/// see within a single block.  As such, we sink as much legal addressing mode
-/// stuff into the block as possible.
+/// Load and Store Instructions often have addressing modes that can do
+/// significant amounts of computation. As such, instruction selection will try
+/// to get the load or store to do as much computation as possible for the
+/// program. The problem is that isel can only see within a single block. As
+/// such, we sink as much legal addressing mode work into the block as possible.
 ///
 /// This method is used to optimize both load/store and inline asms with memory
 /// operands.
-bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                                         Type *AccessTy, unsigned AddrSpace) {
   Value *Repl = Addr;
 
@@ -3530,12 +3944,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   if (Repl->use_empty()) {
     // This can cause recursive deletion, which can invalidate our iterator.
     // Use a WeakVH to hold onto it in case this happens.
-    WeakVH IterHandle(CurInstIterator);
+    WeakVH IterHandle(&*CurInstIterator);
     BasicBlock *BB = CurInstIterator->getParent();
 
     RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
 
-    if (IterHandle != CurInstIterator) {
+    if (IterHandle != CurInstIterator.getNodePtrUnchecked()) {
       // If the iterator instruction was recursively deleted, start over at the
       // start of the block.
       CurInstIterator = BB->begin();
@@ -3546,10 +3960,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   return true;
 }
 
-/// OptimizeInlineAsmInst - If there are any memory operands, use
-/// OptimizeMemoryInst to sink their address computing into the block when
-/// possible / profitable.
-bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+/// If there are any memory operands, use OptimizeMemoryInst to sink their
+/// address computing into the block when possible / profitable.
+bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
   bool MadeChange = false;
 
   const TargetRegisterInfo *TRI =
@@ -3566,7 +3979,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
     if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
         OpInfo.isIndirect) {
       Value *OpVal = CS->getArgOperand(ArgNo++);
-      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
+      MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
     } else if (OpInfo.Type == InlineAsm::isInput)
       ArgNo++;
   }
@@ -3646,7 +4059,7 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
 /// %add = add nuw i64 %zext, 4
 /// \encode
 /// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
+bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
                                     LoadInst *&LI, Instruction *&Inst,
                                     const SmallVectorImpl<Instruction *> &Exts,
                                     unsigned CreatedInstsCost = 0) {
@@ -3696,7 +4109,7 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
     }
     // The promotion is profitable.
     // Check if it exposes an ext(load).
-    (void)ExtLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
+    (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
     if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
                // If we have created a new extension, i.e., now we have two
                // extensions. We must make sure one of them is merged with
@@ -3713,13 +4126,13 @@ bool CodeGenPrepare::ExtLdPromotion(TypePromotionTransaction &TPT,
   return false;
 }
 
-/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
-/// basic block as the load, unless conditions are unfavorable. This allows
-/// SelectionDAG to fold the extend into the load.
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
 /// \p I[in/out] the extension may be modified during the process if some
 /// promotions apply.
 ///
-bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
+bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
   // Try to promote a chain of computation if it allows to form
   // an extended load.
   TypePromotionTransaction TPT;
@@ -3730,7 +4143,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
   // Look for a load being extended.
   LoadInst *LI = nullptr;
   Instruction *OldExt = I;
-  bool HasPromoted = ExtLdPromotion(TPT, LI, I, Exts);
+  bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
   if (!LI || !I) {
     assert(!HasPromoted && !LI && "If we did not match any load instruction "
                                   "the code must remain the same");
@@ -3780,7 +4193,7 @@ bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *&I) {
   return true;
 }
 
-bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
   BasicBlock *DefBB = I->getParent();
 
   // If the result of a {s|z}ext and its source are both live out, rewrite all
@@ -3838,7 +4251,8 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
 
     if (!InsertedTrunc) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
-      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+      assert(InsertPt != UserBB->end());
+      InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
       InsertedInsts.insert(InsertedTrunc);
     }
 
@@ -3851,9 +4265,202 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
   return MadeChange;
 }
 
-/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
-/// turned into an explicit branch.
-static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
+// Find loads whose uses only use some of the loaded value's bits.  Add an "and"
+// just after the load if the target can fold this into one extload instruction,
+// with the hope of eliminating some of the other later "and" instructions using
+// the loaded value.  "and"s that are made trivially redundant by the insertion
+// of the new "and" are removed by this function, while others (e.g. those whose
+// path from the load goes through a phi) are left for isel to potentially
+// remove.
+//
+// For example:
+//
+// b0:
+//   x = load i32
+//   ...
+// b1:
+//   y = and x, 0xff
+//   z = use y
+//
+// becomes:
+//
+// b0:
+//   x = load i32
+//   x' = and x, 0xff
+//   ...
+// b1:
+//   z = use x'
+//
+// whereas:
+//
+// b0:
+//   x1 = load i32
+//   ...
+// b1:
+//   x2 = load i32
+//   ...
+// b2:
+//   x = phi x1, x2
+//   y = and x, 0xff
+//
+// becomes (after a call to optimizeLoadExt for each load):
+//
+// b0:
+//   x1 = load i32
+//   x1' = and x1, 0xff
+//   ...
+// b1:
+//   x2 = load i32
+//   x2' = and x2, 0xff
+//   ...
+// b2:
+//   x = phi x1', x2'
+//   y = and x, 0xff
+//
+
+bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
+
+  if (!Load->isSimple() ||
+      !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
+    return false;
+
+  // Skip loads we've already transformed or have no reason to transform.
+  if (Load->hasOneUse()) {
+    User *LoadUser = *Load->user_begin();
+    if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
+        !dyn_cast<PHINode>(LoadUser))
+      return false;
+  }
+
+  // Look at all uses of Load, looking through phis, to determine how many bits
+  // of the loaded value are needed.
+  SmallVector<Instruction *, 8> WorkList;
+  SmallPtrSet<Instruction *, 16> Visited;
+  SmallVector<Instruction *, 8> AndsToMaybeRemove;
+  for (auto *U : Load->users())
+    WorkList.push_back(cast<Instruction>(U));
+
+  EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
+  unsigned BitWidth = LoadResultVT.getSizeInBits();
+  APInt DemandBits(BitWidth, 0);
+  APInt WidestAndBits(BitWidth, 0);
+
+  while (!WorkList.empty()) {
+    Instruction *I = WorkList.back();
+    WorkList.pop_back();
+
+    // Break use-def graph loops.
+    if (!Visited.insert(I).second)
+      continue;
+
+    // For a PHI node, push all of its users.
+    if (auto *Phi = dyn_cast<PHINode>(I)) {
+      for (auto *U : Phi->users())
+        WorkList.push_back(cast<Instruction>(U));
+      continue;
+    }
+
+    switch (I->getOpcode()) {
+    case llvm::Instruction::And: {
+      auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
+      if (!AndC)
+        return false;
+      APInt AndBits = AndC->getValue();
+      DemandBits |= AndBits;
+      // Keep track of the widest and mask we see.
+      if (AndBits.ugt(WidestAndBits))
+        WidestAndBits = AndBits;
+      if (AndBits == WidestAndBits && I->getOperand(0) == Load)
+        AndsToMaybeRemove.push_back(I);
+      break;
+    }
+
+    case llvm::Instruction::Shl: {
+      auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
+      if (!ShlC)
+        return false;
+      uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
+      auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt);
+      DemandBits |= ShlDemandBits;
+      break;
+    }
+
+    case llvm::Instruction::Trunc: {
+      EVT TruncVT = TLI->getValueType(*DL, I->getType());
+      unsigned TruncBitWidth = TruncVT.getSizeInBits();
+      auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth);
+      DemandBits |= TruncBits;
+      break;
+    }
+
+    default:
+      return false;
+    }
+  }
+
+  uint32_t ActiveBits = DemandBits.getActiveBits();
+  // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
+  // target even if isLoadExtLegal says an i1 EXTLOAD is valid.  For example,
+  // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
+  // (and (load x) 1) is not matched as a single instruction, rather as a LDR
+  // followed by an AND.
+  // TODO: Look into removing this restriction by fixing backends to either
+  // return false for isLoadExtLegal for i1 or have them select this pattern to
+  // a single instruction.
+  //
+  // Also avoid hoisting if we didn't see any ands with the exact DemandBits
+  // mask, since these are the only ands that will be removed by isel.
+  if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+      WidestAndBits != DemandBits)
+    return false;
+
+  LLVMContext &Ctx = Load->getType()->getContext();
+  Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
+  EVT TruncVT = TLI->getValueType(*DL, TruncTy);
+
+  // Reject cases that won't be matched as extloads.
+  if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
+      !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
+    return false;
+
+  IRBuilder<> Builder(Load->getNextNode());
+  auto *NewAnd = dyn_cast<Instruction>(
+      Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+
+  // Replace all uses of load with new and (except for the use of load in the
+  // new and itself).
+  Load->replaceAllUsesWith(NewAnd);
+  NewAnd->setOperand(0, Load);
+
+  // Remove any and instructions that are now redundant.
+  for (auto *And : AndsToMaybeRemove)
+    // Check that the and mask is the same as the one we decided to put on the
+    // new and.
+    if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
+      And->replaceAllUsesWith(NewAnd);
+      if (&*CurInstIterator == And)
+        CurInstIterator = std::next(And->getIterator());
+      And->eraseFromParent();
+      ++NumAndUses;
+    }
+
+  ++NumAndsAdded;
+  return true;
+}
+
+/// Check if V (an operand of a select instruction) is an expensive instruction
+/// that is only used once.
+static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
+  auto *I = dyn_cast<Instruction>(V);
+  // If it's safe to speculatively execute, then it should not have side
+  // effects; therefore, it's safe to sink and possibly *not* execute.
+  return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
+         TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
+}
+
+/// Returns true if a SelectInst should be turned into an explicit branch.
+static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
+                                                SelectInst *SI) {
   // FIXME: This should use the same heuristics as IfConversion to determine
   // whether a select is better represented as a branch.  This requires that
   // branch probability metadata is preserved for the select, which is not the
@@ -3861,28 +4468,36 @@ static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
 
   CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
 
-  // If the branch is predicted right, an out of order CPU can avoid blocking on
-  // the compare.  Emit cmovs on compares with a memory operand as branches to
-  // avoid stalls on the load from memory.  If the compare has more than one use
-  // there's probably another cmov or setcc around so it's not worth emitting a
-  // branch.
-  if (!Cmp)
+  // If a branch is predictable, an out-of-order CPU can avoid blocking on its
+  // comparison condition. If the compare has more than one use, there's
+  // probably another cmov or setcc around, so it's not worth emitting a branch.
+  if (!Cmp || !Cmp->hasOneUse())
     return false;
 
   Value *CmpOp0 = Cmp->getOperand(0);
   Value *CmpOp1 = Cmp->getOperand(1);
 
-  // We check that the memory operand has one use to avoid uses of the loaded
-  // value directly after the compare, making branches unprofitable.
-  return Cmp->hasOneUse() &&
-         ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
-          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
+  // Emit "cmov on compare with a memory operand" as a branch to avoid stalls
+  // on a load from memory. But if the load is used more than once, do not
+  // change the select to a branch because the load is probably needed
+  // regardless of whether the branch is taken or not.
+  if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
+      (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()))
+    return true;
+
+  // If either operand of the select is expensive and only needed on one side
+  // of the select, we should form a branch.
+  if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
+      sinkSelectOperand(TTI, SI->getFalseValue()))
+    return true;
+
+  return false;
 }
 
 
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
-bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
+bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
 
   // Can we convert the 'select' to CF ?
@@ -3902,34 +4517,97 @@ bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
     // We have efficient codegen support for the select instruction.
     // Check if it is profitable to keep this 'select'.
     if (!TLI->isPredictableSelectExpensive() ||
-        !isFormingBranchFromSelectProfitable(SI))
+        !isFormingBranchFromSelectProfitable(TTI, SI))
       return false;
   }
 
   ModifiedDT = true;
 
+  // Transform a sequence like this:
+  //    start:
+  //       %cmp = cmp uge i32 %a, %b
+  //       %sel = select i1 %cmp, i32 %c, i32 %d
+  //
+  // Into:
+  //    start:
+  //       %cmp = cmp uge i32 %a, %b
+  //       br i1 %cmp, label %select.true, label %select.false
+  //    select.true:
+  //       br label %select.end
+  //    select.false:
+  //       br label %select.end
+  //    select.end:
+  //       %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
+  //
+  // In addition, we may sink instructions that produce %c or %d from
+  // the entry block into the destination(s) of the new branch.
+  // If the true or false blocks do not contain a sunken instruction, that
+  // block and its branch may be optimized away. In that case, one side of the
+  // first branch will point directly to select.end, and the corresponding PHI
+  // predecessor block will be the start block.
+
   // First, we split the block containing the select into 2 blocks.
   BasicBlock *StartBlock = SI->getParent();
   BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
-  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
+  BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
 
-  // Create a new block serving as the landing pad for the branch.
-  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
-                                             NextBlock->getParent(), NextBlock);
-
-  // Move the unconditional branch from the block with the select in it into our
-  // landing pad block.
+  // Delete the unconditional branch that was just created by the split.
   StartBlock->getTerminator()->eraseFromParent();
-  BranchInst::Create(NextBlock, SmallBlock);
+
+  // These are the new basic blocks for the conditional branch.
+  // At least one will become an actual new basic block.
+  BasicBlock *TrueBlock = nullptr;
+  BasicBlock *FalseBlock = nullptr;
+
+  // Sink expensive instructions into the conditional blocks to avoid executing
+  // them speculatively.
+  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+    TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+                                   EndBlock->getParent(), EndBlock);
+    auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+    auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+    TrueInst->moveBefore(TrueBranch);
+  }
+  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+                                    EndBlock->getParent(), EndBlock);
+    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+    auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+    FalseInst->moveBefore(FalseBranch);
+  }
+
+  // If there was nothing to sink, then arbitrarily choose the 'false' side
+  // for a new input value to the PHI.
+  if (TrueBlock == FalseBlock) {
+    assert(TrueBlock == nullptr &&
+           "Unexpected basic block transform while optimizing select");
+
+    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
+                                    EndBlock->getParent(), EndBlock);
+    BranchInst::Create(EndBlock, FalseBlock);
+  }
 
   // Insert the real conditional branch based on the original condition.
-  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
+  // If we did not create a new block for one of the 'true' or 'false' paths
+  // of the condition, it means that side of the branch goes to the end block
+  // directly and the path originates from the start block from the point of
+  // view of the new PHI.
+  if (TrueBlock == nullptr) {
+    BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+    TrueBlock = StartBlock;
+  } else if (FalseBlock == nullptr) {
+    BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+    FalseBlock = StartBlock;
+  } else {
+    BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+  }
 
   // The select itself is replaced with a PHI Node.
-  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
+  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
   PN->takeName(SI);
-  PN->addIncoming(SI->getTrueValue(), StartBlock);
-  PN->addIncoming(SI->getFalseValue(), SmallBlock);
+  PN->addIncoming(SI->getTrueValue(), TrueBlock);
+  PN->addIncoming(SI->getFalseValue(), FalseBlock);
+
   SI->replaceAllUsesWith(PN);
   SI->eraseFromParent();
 
@@ -3955,7 +4633,7 @@ static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
 /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
 /// it's often worth sinking a shufflevector splat down to its use so that
 /// codegen can spot all lanes are identical.
-bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
+bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   BasicBlock *DefBB = SVI->getParent();
 
   // Only do this xform if variable vector shifts are particularly expensive.
@@ -3987,9 +4665,10 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
 
     if (!InsertedShuffle) {
       BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
-      InsertedShuffle = new ShuffleVectorInst(SVI->getOperand(0),
-                                              SVI->getOperand(1),
-                                              SVI->getOperand(2), "", InsertPt);
+      assert(InsertPt != UserBB->end());
+      InsertedShuffle =
+          new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
+                                SVI->getOperand(2), "", &*InsertPt);
     }
 
     UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -4005,6 +4684,49 @@ bool CodeGenPrepare::OptimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   return MadeChange;
 }
 
+bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
+  if (!TLI || !DL)
+    return false;
+
+  Value *Cond = SI->getCondition();
+  Type *OldType = Cond->getType();
+  LLVMContext &Context = Cond->getContext();
+  MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+  unsigned RegWidth = RegType.getSizeInBits();
+
+  if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
+    return false;
+
+  // If the register width is greater than the type width, expand the condition
+  // of the switch instruction and each case constant to the width of the
+  // register. By widening the type of the switch condition, subsequent
+  // comparisons (for case comparisons) will not need to be extended to the
+  // preferred register width, so we will potentially eliminate N-1 extends,
+  // where N is the number of cases in the switch.
+  auto *NewType = Type::getIntNTy(Context, RegWidth);
+
+  // Zero-extend the switch condition and case constants unless the switch
+  // condition is a function argument that is already being sign-extended.
+  // In that case, we can avoid an unnecessary mask/extension by sign-extending
+  // everything instead.
+  Instruction::CastOps ExtType = Instruction::ZExt;
+  if (auto *Arg = dyn_cast<Argument>(Cond))
+    if (Arg->hasSExtAttr())
+      ExtType = Instruction::SExt;
+
+  auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
+  ExtInst->insertBefore(SI);
+  SI->setCondition(ExtInst);
+  for (SwitchInst::CaseIt Case : SI->cases()) {
+    APInt NarrowConst = Case.getCaseValue()->getValue();
+    APInt WideConst = (ExtType == Instruction::ZExt) ?
+                      NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
+    Case.setValue(ConstantInt::get(Context, WideConst));
+  }
+
+  return true;
+}
+
 namespace {
 /// \brief Helper class to promote a scalar operation to a vector one.
 /// This class is used to move downward extractelement transition.
@@ -4138,7 +4860,7 @@ class VectorPromoteHelper {
   /// \brief Generate a constant vector with \p Val with the same
   /// number of elements as the transition.
   /// \p UseSplat defines whether or not \p Val should be replicated
-  /// accross the whole vector.
+  /// across the whole vector.
   /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
   /// otherwise we generate a vector with as many undef as possible:
   /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
@@ -4320,7 +5042,7 @@ void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
 /// Some targets can do store(extractelement) with one instruction.
 /// Try to push the extractelement towards the stores when the target
 /// has this feature and this is profitable.
-bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
+bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
   unsigned CombineCost = UINT_MAX;
   if (DisableStoreExtract || !TLI ||
       (!StressStoreExtract &&
@@ -4372,7 +5094,7 @@ bool CodeGenPrepare::OptimizeExtractElementInst(Instruction *Inst) {
   return false;
 }
 
-bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
   if (InsertedInsts.count(I))
@@ -4413,8 +5135,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
               TargetLowering::TypeExpandInteger) {
         return SinkCast(CI);
       } else {
-        bool MadeChange = MoveExtToFormExtLoad(I);
-        return MadeChange | OptimizeExtUses(I);
+        bool MadeChange = moveExtToFormExtLoad(I);
+        return MadeChange | optimizeExtUses(I);
       }
     }
     return false;
@@ -4425,17 +5147,21 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
       return OptimizeCmpExpression(CI);
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    stripInvariantGroupMetadata(*LI);
     if (TLI) {
+      bool Modified = optimizeLoadExt(LI);
       unsigned AS = LI->getPointerAddressSpace();
-      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+      Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
+      return Modified;
     }
     return false;
   }
 
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    stripInvariantGroupMetadata(*SI);
     if (TLI) {
       unsigned AS = SI->getPointerAddressSpace();
-      return OptimizeMemoryInst(I, SI->getOperand(1),
+      return optimizeMemoryInst(I, SI->getOperand(1),
                                 SI->getOperand(0)->getType(), AS);
     }
     return false;
@@ -4460,23 +5186,26 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
       GEPI->replaceAllUsesWith(NC);
       GEPI->eraseFromParent();
       ++NumGEPsElim;
-      OptimizeInst(NC, ModifiedDT);
+      optimizeInst(NC, ModifiedDT);
       return true;
     }
     return false;
   }
 
   if (CallInst *CI = dyn_cast<CallInst>(I))
-    return OptimizeCallInst(CI, ModifiedDT);
+    return optimizeCallInst(CI, ModifiedDT);
 
   if (SelectInst *SI = dyn_cast<SelectInst>(I))
-    return OptimizeSelectInst(SI);
+    return optimizeSelectInst(SI);
 
   if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
-    return OptimizeShuffleVectorInst(SVI);
+    return optimizeShuffleVectorInst(SVI);
+
+  if (auto *Switch = dyn_cast<SwitchInst>(I))
+    return optimizeSwitchInst(Switch);
 
   if (isa<ExtractElementInst>(I))
-    return OptimizeExtractElementInst(I);
+    return optimizeExtractElementInst(I);
 
   return false;
 }
@@ -4484,17 +5213,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I, bool& ModifiedDT) {
 // In this pass we look for GEP and cast instructions that are used
 // across basic blocks and rewrite them to improve basic-block-at-a-time
 // selection.
-bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
+bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
   SunkAddrs.clear();
   bool MadeChange = false;
 
   CurInstIterator = BB.begin();
   while (CurInstIterator != BB.end()) {
-    MadeChange |= OptimizeInst(CurInstIterator++, ModifiedDT);
+    MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
     if (ModifiedDT)
       return true;
   }
-  MadeChange |= DupRetToEnableTailCallOpts(&BB);
+  MadeChange |= dupRetToEnableTailCallOpts(&BB);
 
   return MadeChange;
 }
@@ -4502,12 +5231,12 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB, bool& ModifiedDT) {
 // llvm.dbg.value is far away from the value then iSel may not be able
 // handle it properly. iSel will drop llvm.dbg.value if it can not
 // find a node corresponding to the value.
-bool CodeGenPrepare::PlaceDbgValues(Function &F) {
+bool CodeGenPrepare::placeDbgValues(Function &F) {
   bool MadeChange = false;
   for (BasicBlock &BB : F) {
     Instruction *PrevNonDbgInst = nullptr;
     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
-      Instruction *Insn = BI++;
+      Instruction *Insn = &*BI++;
       DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
       // Leave dbg.values that refer to an alloca alone. These
       // instrinsics describe the address of a variable (= the alloca)
@@ -4521,10 +5250,14 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) {
 
       Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
       if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
+        // If VI is a phi in a block with an EHPad terminator, we can't insert
+        // after it.
+        if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+          continue;
         DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
         DVI->removeFromParent();
         if (isa<PHINode>(VI))
-          DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
+          DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
         else
           DVI->insertAfter(VI);
         MadeChange = true;
@@ -4548,7 +5281,7 @@ bool CodeGenPrepare::sinkAndCmp(Function &F) {
     return false;
   bool MadeChange = false;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
-    BasicBlock *BB = I++;
+    BasicBlock *BB = &*I++;
 
     // Does this BB end with the following?
     //   %andVal = and %val, #single-bit-set
@@ -4671,6 +5404,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
     if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
       continue;
 
+    auto *Br1 = cast<BranchInst>(BB.getTerminator());
+    if (Br1->getMetadata(LLVMContext::MD_unpredictable))
+      continue;
+
     unsigned Opc;
     Value *Cond1, *Cond2;
     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
@@ -4697,7 +5434,6 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
 
     // Update original basic block by using the first condition directly by the
     // branch instruction and removing the no longer needed and/or instruction.
-    auto *Br1 = cast<BranchInst>(BB.getTerminator());
     Br1->setCondition(Cond1);
     LogicOp->eraseFromParent();
 
@@ -4828,3 +5564,8 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
   }
   return MadeChange;
 }
+
+void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
+  if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
+    I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
+}
diff --git a/lib/CodeGen/CoreCLRGC.cpp b/lib/CodeGen/CoreCLRGC.cpp
index 28c97ba71bd9..ff7c0d5dc0ac 100644
--- a/lib/CodeGen/CoreCLRGC.cpp
+++ b/lib/CodeGen/CoreCLRGC.cpp
@@ -38,9 +38,9 @@ public:
     UsesMetadata = false;
     CustomRoots = false;
   }
-  Optional<bool> isGCManagedPointer(const Value *V) const override {
+  Optional<bool> isGCManagedPointer(const Type *Ty) const override {
     // Method is only valid on pointer typed values.
-    PointerType *PT = cast<PointerType>(V->getType());
+    const PointerType *PT = cast<PointerType>(Ty);
     // We pick addrspace(1) as our GC managed heap.
     return (1 == PT->getAddressSpace());
   }
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index dba280fd5aa2..c924ba30c8a1 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -52,14 +52,13 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Clear "do not change" set.
   KeepRegs.reset();
 
-  bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn());
+  bool IsReturnBlock = BB->isReturnBlock();
 
   // Examine the live-in regs of all successors.
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
          SE = BB->succ_end(); SI != SE; ++SI)
-    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-           E = (*SI)->livein_end(); I != E; ++I) {
-      for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
+    for (const auto &LI : (*SI)->liveins()) {
+      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) {
         unsigned Reg = *AI;
         Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
         KillIndices[Reg] = BBSize;
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 0a188c0935ad..af6b6a392b75 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -31,10 +31,39 @@
 #include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2],
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+namespace {
+  DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+    return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+  }
+
+  /// Return the DFAInput for an instruction class input vector.
+  /// This function is used in both DFAPacketizer.cpp and in
+  /// DFAPacketizerEmitter.cpp.
+  DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+    DFAInput InsnInput = 0;
+    assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+            "Exceeded maximum number of DFA terms");
+    for (auto U : InsnClass)
+      InsnInput = addDFAFuncUnits(InsnInput, U);
+    return InsnInput;
+  }
+}
+// --------------------------------------------------------------------
+
+DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
+                             const DFAStateInput (*SIT)[2],
                              const unsigned *SET):
   InstrItins(I), CurrentState(0), DFAStateInputTable(SIT),
-  DFAStateEntryTable(SET) {}
+  DFAStateEntryTable(SET) {
+  // Make sure DFA types are large enough for the number of terms & resources.
+  assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput))
+        && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+  assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput))
+        && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
+}
 
 
 //
@@ -60,26 +89,42 @@ void DFAPacketizer::ReadTable(unsigned int state) {
       DFAStateInputTable[i][1];
 }
 
+//
+// getInsnInput - Return the DFAInput for an instruction class.
+//
+DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
+  // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
+  DFAInput InsnInput = 0;
+  unsigned i = 0;
+  for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
+        *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) {
+    InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
+    assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
+  }
+  return InsnInput;
+}
+
+// getInsnInput - Return the DFAInput for an instruction class input vector.
+DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
+  return getDFAInsnInput(InsnClass);
+}
 
 // canReserveResources - Check if the resources occupied by a MCInstrDesc
 // are available in the current state.
 bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
-  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
-  unsigned FuncUnits = IS->getUnits();
-  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  DFAInput InsnInput = getInsnInput(InsnClass);
+  UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
   ReadTable(CurrentState);
   return (CachedTable.count(StateTrans) != 0);
 }
 
-
 // reserveResources - Reserve the resources occupied by a MCInstrDesc and
 // change the current state to reflect that change.
 void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) {
   unsigned InsnClass = MID->getSchedClass();
-  const llvm::InstrStage *IS = InstrItins->beginStage(InsnClass);
-  unsigned FuncUnits = IS->getUnits();
-  UnsignPair StateTrans = UnsignPair(CurrentState, FuncUnits);
+  DFAInput InsnInput = getInsnInput(InsnClass);
+  UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
   ReadTable(CurrentState);
   assert(CachedTable.count(StateTrans) != 0);
   CurrentState = CachedTable[StateTrans];
@@ -104,32 +149,35 @@ namespace llvm {
 // DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides
 // Schedule method to build the dependence graph.
 class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+private:
+  AliasAnalysis *AA;
 public:
   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
-                       bool IsPostRA);
+                       AliasAnalysis *AA);
   // Schedule - Actual scheduling work.
   void schedule() override;
 };
 }
 
 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
-                                           MachineLoopInfo &MLI, bool IsPostRA)
-    : ScheduleDAGInstrs(MF, &MLI, IsPostRA) {
+                                           MachineLoopInfo &MLI,
+                                           AliasAnalysis *AA)
+    : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
   CanHandleTerminators = true;
 }
 
 void DefaultVLIWScheduler::schedule() {
   // Build the scheduling graph.
-  buildSchedGraph(nullptr);
+  buildSchedGraph(AA);
 }
 
 // VLIWPacketizerList Ctor
 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF,
-                                       MachineLoopInfo &MLI, bool IsPostRA)
-    : MF(MF) {
+                                       MachineLoopInfo &MLI, AliasAnalysis *AA)
+    : MF(MF), AA(AA) {
   TII = MF.getSubtarget().getInstrInfo();
   ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
-  VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, IsPostRA);
+  VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA);
 }
 
 // VLIWPacketizerList Dtor
@@ -147,7 +195,7 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
                                          MachineInstr *MI) {
   if (CurrentPacketMIs.size() > 1) {
     MachineInstr *MIFirst = CurrentPacketMIs.front();
-    finalizeBundle(*MBB, MIFirst, MI);
+    finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator());
   }
   CurrentPacketMIs.clear();
   ResourceTracker->clearResources();
@@ -191,7 +239,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
 
     // Ask DFA if machine resource is available for MI.
     bool ResourceAvail = ResourceTracker->canReserveResources(MI);
-    if (ResourceAvail) {
+    if (ResourceAvail && shouldAddToPacket(MI)) {
       // Dependency check for MI with instructions in CurrentPacketMIs.
       for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
            VE = CurrentPacketMIs.end(); VI != VE; ++VI) {
@@ -210,7 +258,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
         } // !isLegalToPacketizeTogether.
       } // For all instructions in CurrentPacketMIs.
     } else {
-      // End the packet if resource is not available.
+      // End the packet if resource is not available, or if the instruction
+      // shoud not be added to the current packet.
       endPacket(MBB, MI);
     }
 
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 941129b5cc95..b11b49717c45 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -101,26 +101,22 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
   // Loop over all instructions in all blocks, from bottom to top, so that it's
   // more likely that chains of dependent but ultimately dead instructions will
   // be cleaned up.
-  for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
-       I != E; ++I) {
-    MachineBasicBlock *MBB = &*I;
-
+  for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) {
     // Start out assuming that reserved registers are live out of this block.
     LivePhysRegs = MRI->getReservedRegs();
 
     // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
     // live across blocks, but some targets (x86) can have flags live out of a
     // block.
-    for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
-           E = MBB->succ_end(); S != E; S++)
-      for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
-           LI != (*S)->livein_end(); LI++)
-        LivePhysRegs.set(*LI);
+    for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(),
+           E = MBB.succ_end(); S != E; S++)
+      for (const auto &LI : (*S)->liveins())
+        LivePhysRegs.set(LI.PhysReg);
 
     // Now scan the instructions and delete dead ones, tracking physreg
     // liveness as we go.
-    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
-         MIE = MBB->rend(); MII != MIE; ) {
+    for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
+         MIE = MBB.rend(); MII != MIE; ) {
       MachineInstr *MI = &*MII;
 
       // If the instruction is dead, delete it!
@@ -132,7 +128,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         MI->eraseFromParentAndMarkDBGValuesForRemoval();
         AnyChanges = true;
         ++NumDeletes;
-        MIE = MBB->rend();
+        MIE = MBB.rend();
         // MII is now pointing to the next instruction to process,
         // so don't increment it.
         continue;
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index e019dfbc8f7d..eae78a950d9a 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -16,7 +16,7 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -192,9 +192,9 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) {
   if (Resumes.empty())
     return false;
 
-  // Check the personality, don't do anything if it's for MSVC.
+  // Check the personality, don't do anything if it's funclet-based.
   EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn());
-  if (isMSVCEHPersonality(Pers))
+  if (isFuncletEHPersonality(Pers))
     return false;
 
   LLVMContext &Ctx = Fn.getContext();
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index fbc4d97c4987..f3536d74111e 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -538,11 +538,11 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
 
   // Fix up the CFG, temporarily leave Head without any successors.
   Head->removeSuccessor(TBB);
-  Head->removeSuccessor(FBB);
+  Head->removeSuccessor(FBB, true);
   if (TBB != Tail)
-    TBB->removeSuccessor(Tail);
+    TBB->removeSuccessor(Tail, true);
   if (FBB != Tail)
-    FBB->removeSuccessor(Tail);
+    FBB->removeSuccessor(Tail, true);
 
   // Fix up Head's terminators.
   // It should become a single branch or a fallthrough.
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index 5b09cf1a0fd7..c550008da025 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -375,9 +375,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
 
   // This is the entry block.
   if (MBB->pred_empty()) {
-    for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
-         e = MBB->livein_end(); i != e; ++i) {
-      for (int rx : regIndices(*i)) {
+    for (const auto &LI : MBB->liveins()) {
+      for (int rx : regIndices(LI.PhysReg)) {
         // Treat function live-ins as if they were defined just before the first
         // instruction.  Usually, function arguments are set up immediately
         // before the call.
@@ -559,12 +558,11 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
   MachineInstr *UndefMI = UndefReads.back().first;
   unsigned OpIdx = UndefReads.back().second;
 
-  for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend();
-       I != E; ++I) {
+  for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) {
     // Update liveness, including the current instruction's defs.
-    LiveRegSet.stepBackward(*I);
+    LiveRegSet.stepBackward(I);
 
-    if (UndefMI == &*I) {
+    if (UndefMI == &I) {
       if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg()))
         TII->breakPartialRegDependency(UndefMI, OpIdx, TRI);
 
@@ -733,12 +731,13 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   // If no relevant registers are used in the function, we can skip it
   // completely.
   bool anyregs = false;
-  for (TargetRegisterClass::const_iterator I = RC->begin(), E = RC->end();
-       I != E; ++I)
-    if (MF->getRegInfo().isPhysRegUsed(*I)) {
+  const MachineRegisterInfo &MRI = mf.getRegInfo();
+  for (unsigned Reg : *RC) {
+    if (MRI.isPhysRegUsed(Reg)) {
       anyregs = true;
       break;
     }
+  }
   if (!anyregs) return false;
 
   // Initialize the AliasMap on the first use.
@@ -752,7 +751,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
         AliasMap[*AI].push_back(i);
   }
 
-  MachineBasicBlock *Entry = MF->begin();
+  MachineBasicBlock *Entry = &*MF->begin();
   ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
   SmallVector<MachineBasicBlock*, 16> Loops;
   for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
@@ -761,22 +760,19 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
     enterBasicBlock(MBB);
     if (SeenUnknownBackEdge)
       Loops.push_back(MBB);
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-        ++I)
-      visitInstr(I);
+    for (MachineInstr &MI : *MBB)
+      visitInstr(&MI);
     processUndefReads(MBB);
     leaveBasicBlock(MBB);
   }
 
   // Visit all the loop blocks again in order to merge DomainValues from
   // back-edges.
-  for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = Loops[i];
+  for (MachineBasicBlock *MBB : Loops) {
     enterBasicBlock(MBB);
-    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-        ++I)
-      if (!I->isDebugValue())
-        processDefs(I, false);
+    for (MachineInstr &MI : *MBB)
+      if (!MI.isDebugValue())
+        processDefs(&MI, false);
     processUndefReads(MBB);
     leaveBasicBlock(MBB);
   }
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index 55e809e24278..90ddac94f93b 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -50,7 +50,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
 
   // Iterate through each instruction in the function, looking for pseudos.
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = I;
+    MachineBasicBlock *MBB = &*I;
     for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
          MBBI != MBBE; ) {
       MachineInstr *MI = MBBI++;
@@ -63,7 +63,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
         // The expansion may involve new basic blocks.
         if (NewMBB != MBB) {
           MBB = NewMBB;
-          I = NewMBB;
+          I = NewMBB->getIterator();
           MBBI = NewMBB->begin();
           MBBE = NewMBB->end();
         }
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
new file mode 100644
index 000000000000..8b2f505ff028
--- /dev/null
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -0,0 +1,55 @@
+//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements basic block placement transformations which result in
+// funclets being contiguous.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "funclet-layout"
+
+namespace {
+class FuncletLayout : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  FuncletLayout() : MachineFunctionPass(ID) {
+    initializeFuncletLayoutPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &F) override;
+};
+}
+
+char FuncletLayout::ID = 0;
+char &llvm::FuncletLayoutID = FuncletLayout::ID;
+INITIALIZE_PASS(FuncletLayout, "funclet-layout",
+                "Contiguously Lay Out Funclets", false, false)
+
+bool FuncletLayout::runOnMachineFunction(MachineFunction &F) {
+  DenseMap<const MachineBasicBlock *, int> FuncletMembership =
+      getFuncletMembership(F);
+  if (FuncletMembership.empty())
+    return false;
+
+  F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) {
+    auto FuncletX = FuncletMembership.find(&X);
+    auto FuncletY = FuncletMembership.find(&Y);
+    assert(FuncletX != FuncletMembership.end());
+    assert(FuncletY != FuncletMembership.end());
+    return FuncletX->second < FuncletY->second;
+  });
+
+  // Conservatively assume we changed something.
+  return true;
+}
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index d8edd7e4063f..484d31737b2e 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -158,7 +158,7 @@ static bool InsertRootInitializers(Function &F, AllocaInst **Roots,
 
   // Search for initializers in the initial BB.
   SmallPtrSet<AllocaInst *, 16> InitedRoots;
-  for (; !CouldBecomeSafePoint(IP); ++IP)
+  for (; !CouldBecomeSafePoint(&*IP); ++IP)
     if (StoreInst *SI = dyn_cast<StoreInst>(IP))
       if (AllocaInst *AI =
               dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
@@ -320,7 +320,9 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
     if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
       RI = FI->removeStackRoot(RI);
     } else {
-      RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+      unsigned FrameReg; // FIXME: surely GCRoot ought to store the
+                         // register that the offset is from?
+      RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg);
       ++RI;
     }
   }
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 6f9e8394081e..dd9a84086181 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -108,10 +108,9 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden,
 
 // FIXME: this could be a transitional option, and we probably need to remove
 // it if only we are sure this optimization could always benefit all targets.
-static cl::opt<bool>
+static cl::opt<cl::boolOrDefault>
 EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
-     cl::desc("Enable global merge pass on external linkage"),
-     cl::init(false));
+     cl::desc("Enable global merge pass on external linkage"));
 
 STATISTIC(NumMerged, "Number of globals merged");
 namespace {
@@ -129,11 +128,14 @@ namespace {
     /// FIXME: This could learn about optsize, and be used in the cost model.
     bool OnlyOptimizeForSize;
 
+    /// Whether we should merge global variables that have external linkage.
+    bool MergeExternalGlobals;
+
     bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                  Module &M, bool isConst, unsigned AddrSpace) const;
     /// \brief Merge everything in \p Globals for which the corresponding bit
     /// in \p GlobalSet is set.
-    bool doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+    bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
                  const BitVector &GlobalSet, Module &M, bool isConst,
                  unsigned AddrSpace) const;
 
@@ -158,9 +160,11 @@ namespace {
     static char ID;             // Pass identification, replacement for typeid.
     explicit GlobalMerge(const TargetMachine *TM = nullptr,
                          unsigned MaximalOffset = 0,
-                         bool OnlyOptimizeForSize = false)
+                         bool OnlyOptimizeForSize = false,
+                         bool MergeExternalGlobals = false)
         : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset),
-          OnlyOptimizeForSize(OnlyOptimizeForSize) {
+          OnlyOptimizeForSize(OnlyOptimizeForSize),
+          MergeExternalGlobals(MergeExternalGlobals) {
       initializeGlobalMergePass(*PassRegistry::getPassRegistry());
     }
 
@@ -189,14 +193,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
                           Module &M, bool isConst, unsigned AddrSpace) const {
   auto &DL = M.getDataLayout();
   // FIXME: Find better heuristics
-  std::stable_sort(
-      Globals.begin(), Globals.end(),
-      [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
-        Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
-        Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
-
-        return (DL.getTypeAllocSize(Ty1) < DL.getTypeAllocSize(Ty2));
-      });
+  std::stable_sort(Globals.begin(), Globals.end(),
+                   [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+                     return DL.getTypeAllocSize(GV1->getValueType()) <
+                            DL.getTypeAllocSize(GV2->getValueType());
+                   });
 
   // If we want to just blindly group all globals together, do so.
   if (!GlobalMergeGroupByUse) {
@@ -207,7 +208,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
 
   // If we want to be smarter, look at all uses of each global, to try to
   // discover all sets of globals used together, and how many times each of
-  // these sets occured.
+  // these sets occurred.
   //
   // Keep this reasonably efficient, by having an append-only list of all sets
   // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of
@@ -302,8 +303,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
         Function *ParentFn = I->getParent()->getParent();
 
         // If we're only optimizing for size, ignore non-minsize functions.
-        if (OnlyOptimizeForSize &&
-            !ParentFn->hasFnAttribute(Attribute::MinSize))
+        if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
           continue;
 
         size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -406,15 +406,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   return Changed;
 }
 
-bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
+bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
                           const BitVector &GlobalSet, Module &M, bool isConst,
                           unsigned AddrSpace) const {
+  assert(Globals.size() > 1);
 
   Type *Int32Ty = Type::getInt32Ty(M.getContext());
   auto &DL = M.getDataLayout();
 
-  assert(Globals.size() > 1);
-
   DEBUG(dbgs() << " Trying to merge set, starts with #"
                << GlobalSet.find_first() << "\n");
 
@@ -425,58 +424,44 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable *> &Globals,
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
 
-    bool HasExternal = false;
-    GlobalVariable *TheFirstExternal = 0;
     for (j = i; j != -1; j = GlobalSet.find_next(j)) {
-      Type *Ty = Globals[j]->getType()->getElementType();
+      Type *Ty = Globals[j]->getValueType();
       MergedSize += DL.getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
         break;
       }
       Tys.push_back(Ty);
       Inits.push_back(Globals[j]->getInitializer());
-
-      if (Globals[j]->hasExternalLinkage() && !HasExternal) {
-        HasExternal = true;
-        TheFirstExternal = Globals[j];
-      }
     }
 
-    // If merged variables doesn't have external linkage, we needn't to expose
-    // the symbol after merging.
-    GlobalValue::LinkageTypes Linkage = HasExternal
-                                            ? GlobalValue::ExternalLinkage
-                                            : GlobalValue::InternalLinkage;
-
     StructType *MergedTy = StructType::get(M.getContext(), Tys);
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
 
-    // If merged variables have external linkage, we use symbol name of the
-    // first variable merged as the suffix of global symbol name. This would
-    // be able to avoid the link-time naming conflict for globalm symbols.
     GlobalVariable *MergedGV = new GlobalVariable(
-        M, MergedTy, isConst, Linkage, MergedInit,
-        HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName()
-                    : "_MergedGlobals",
-        nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+        M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
+        "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
 
-    for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k)) {
+    for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
 
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
-        ConstantInt::get(Int32Ty, idx++)
+        ConstantInt::get(Int32Ty, idx),
       };
       Constant *GEP =
           ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx);
       Globals[k]->replaceAllUsesWith(GEP);
       Globals[k]->eraseFromParent();
 
-      if (Linkage != GlobalValue::InternalLinkage) {
-        // Generate a new alias...
-        auto *PTy = cast<PointerType>(GEP->getType());
-        GlobalAlias::create(PTy, Linkage, Name, GEP, &M);
+      // When the linkage is not internal we must emit an alias for the original
+      // variable name as it may be accessed from another object. On non-Mach-O
+      // we can also emit an alias for internal linkage as it's safe to do so.
+      // It's not safe on Mach-O as the alias (and thus the portion of the
+      // MergedGlobals variable) may be dead stripped at link time.
+      if (Linkage != GlobalValue::InternalLinkage ||
+          !TM->getTargetTriple().isOSBinFormatMachO()) {
+        GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M);
       }
 
       NumMerged++;
@@ -535,61 +520,57 @@ bool GlobalMerge::doInitialization(Module &M) {
   setMustKeepGlobalVariables(M);
 
   // Grab all non-const globals.
-  for (Module::global_iterator I = M.global_begin(),
-         E = M.global_end(); I != E; ++I) {
+  for (auto &GV : M.globals()) {
     // Merge is safe for "normal" internal or external globals only
-    if (I->isDeclaration() || I->isThreadLocal() || I->hasSection())
+    if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
       continue;
 
-    if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) &&
-        !I->hasInternalLinkage())
+    if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
+        !GV.hasInternalLinkage())
       continue;
 
-    PointerType *PT = dyn_cast<PointerType>(I->getType());
+    PointerType *PT = dyn_cast<PointerType>(GV.getType());
     assert(PT && "Global variable is not a pointer!");
 
     unsigned AddressSpace = PT->getAddressSpace();
 
     // Ignore fancy-aligned globals for now.
-    unsigned Alignment = DL.getPreferredAlignment(I);
-    Type *Ty = I->getType()->getElementType();
+    unsigned Alignment = DL.getPreferredAlignment(&GV);
+    Type *Ty = GV.getValueType();
     if (Alignment > DL.getABITypeAlignment(Ty))
       continue;
 
     // Ignore all 'special' globals.
-    if (I->getName().startswith("llvm.") ||
-        I->getName().startswith(".llvm."))
+    if (GV.getName().startswith("llvm.") ||
+        GV.getName().startswith(".llvm."))
       continue;
 
     // Ignore all "required" globals:
-    if (isMustKeepGlobalVariable(I))
+    if (isMustKeepGlobalVariable(&GV))
       continue;
 
     if (DL.getTypeAllocSize(Ty) < MaxOffset) {
-      if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal())
-        BSSGlobals[AddressSpace].push_back(I);
-      else if (I->isConstant())
-        ConstGlobals[AddressSpace].push_back(I);
+      if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal())
+        BSSGlobals[AddressSpace].push_back(&GV);
+      else if (GV.isConstant())
+        ConstGlobals[AddressSpace].push_back(&GV);
       else
-        Globals[AddressSpace].push_back(I);
+        Globals[AddressSpace].push_back(&GV);
     }
   }
 
-  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
-       I = Globals.begin(), E = Globals.end(); I != E; ++I)
-    if (I->second.size() > 1)
-      Changed |= doMerge(I->second, M, false, I->first);
+  for (auto &P : Globals)
+    if (P.second.size() > 1)
+      Changed |= doMerge(P.second, M, false, P.first);
 
-  for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
-       I = BSSGlobals.begin(), E = BSSGlobals.end(); I != E; ++I)
-    if (I->second.size() > 1)
-      Changed |= doMerge(I->second, M, false, I->first);
+  for (auto &P : BSSGlobals)
+    if (P.second.size() > 1)
+      Changed |= doMerge(P.second, M, false, P.first);
 
   if (EnableGlobalMergeOnConst)
-    for (DenseMap<unsigned, SmallVector<GlobalVariable*, 16> >::iterator
-         I = ConstGlobals.begin(), E = ConstGlobals.end(); I != E; ++I)
-      if (I->second.size() > 1)
-        Changed |= doMerge(I->second, M, true, I->first);
+    for (auto &P : ConstGlobals)
+      if (P.second.size() > 1)
+        Changed |= doMerge(P.second, M, true, P.first);
 
   return Changed;
 }
@@ -604,6 +585,9 @@ bool GlobalMerge::doFinalization(Module &M) {
 }
 
 Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
-                                  bool OnlyOptimizeForSize) {
-  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize);
+                                  bool OnlyOptimizeForSize,
+                                  bool MergeExternalByDefault) {
+  bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ?
+    MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE);
+  return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal);
 }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ee0532bfc630..c38c9d22266e 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
 
 using namespace llvm;
 
@@ -190,10 +191,10 @@ namespace {
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
     bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                     const BranchProbability &Prediction) const;
+                     BranchProbability Prediction) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                        bool FalseBranch, unsigned &Dups,
-                       const BranchProbability &Prediction) const;
+                       BranchProbability Prediction) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
@@ -218,7 +219,7 @@ namespace {
 
     bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
                             unsigned Cycle, unsigned Extra,
-                            const BranchProbability &Prediction) const {
+                            BranchProbability Prediction) const {
       return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
                                                    Prediction);
     }
@@ -227,7 +228,7 @@ namespace {
                             unsigned TCycle, unsigned TExtra,
                             MachineBasicBlock &FBB,
                             unsigned FCycle, unsigned FExtra,
-                            const BranchProbability &Prediction) const {
+                            BranchProbability Prediction) const {
       return TCycle > 0 && FCycle > 0 &&
         TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
                                  Prediction);
@@ -462,11 +463,11 @@ bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
 /// getNextBlock - Returns the next block in the function blocks ordering. If
 /// it is the end, returns NULL.
 static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
-  MachineFunction::iterator I = BB;
+  MachineFunction::iterator I = BB->getIterator();
   MachineFunction::iterator E = BB->getParent()->end();
   if (++I == E)
     return nullptr;
-  return I;
+  return &*I;
 }
 
 /// ValidSimple - Returns true if the 'true' block (along with its
@@ -474,7 +475,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
 /// number of instructions that the ifcvt would need to duplicate if performed
 /// in Dups.
 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                              const BranchProbability &Prediction) const {
+                              BranchProbability Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -501,7 +502,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
 /// if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                                 bool FalseBranch, unsigned &Dups,
-                                const BranchProbability &Prediction) const {
+                                BranchProbability Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -530,10 +531,10 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
 
   MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
   if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
-    MachineFunction::iterator I = TrueBBI.BB;
+    MachineFunction::iterator I = TrueBBI.BB->getIterator();
     if (++I == TrueBBI.BB->getParent()->end())
       return false;
-    TExit = I;
+    TExit = &*I;
   }
   return TExit && TExit == FalseBBI.BB;
 }
@@ -948,10 +949,8 @@ void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB,
 /// candidates.
 void IfConverter::AnalyzeBlocks(MachineFunction &MF,
                                 std::vector<IfcvtToken*> &Tokens) {
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *BB = I;
-    AnalyzeBlock(BB, Tokens);
-  }
+  for (auto &BB : MF)
+    AnalyzeBlock(&BB, Tokens);
 
   // Sort to favor more complex ifcvt scheme.
   std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
@@ -961,14 +960,14 @@ void IfConverter::AnalyzeBlocks(MachineFunction &MF,
 /// that all the intervening blocks are empty (given BB can fall through to its
 /// next block).
 static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
-  MachineFunction::iterator PI = BB;
+  MachineFunction::iterator PI = BB->getIterator();
   MachineFunction::iterator I = std::next(PI);
-  MachineFunction::iterator TI = ToBB;
+  MachineFunction::iterator TI = ToBB->getIterator();
   MachineFunction::iterator E = BB->getParent()->end();
   while (I != TI) {
     // Check isSuccessor to avoid case where the next block is empty, but
     // it's not a successor.
-    if (I == E || !I->empty() || !PI->isSuccessor(I))
+    if (I == E || !I->empty() || !PI->isSuccessor(&*I))
       return false;
     PI = I++;
   }
@@ -1114,7 +1113,7 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB);
+    BBI.BB->removeSuccessor(CvtBBI->BB, true);
   } else {
     RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
     PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
@@ -1153,28 +1152,6 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   return true;
 }
 
-/// Scale down weights to fit into uint32_t. NewTrue is the new weight
-/// for successor TrueBB, and NewFalse is the new weight for successor
-/// FalseBB.
-static void ScaleWeights(uint64_t NewTrue, uint64_t NewFalse,
-                         MachineBasicBlock *MBB,
-                         const MachineBasicBlock *TrueBB,
-                         const MachineBasicBlock *FalseBB,
-                         const MachineBranchProbabilityInfo *MBPI) {
-  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
-  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
-  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-                                        SE = MBB->succ_end();
-       SI != SE; ++SI) {
-    if (*SI == TrueBB)
-      MBB->setSuccWeight(SI, (uint32_t)(NewTrue / Scale));
-    else if (*SI == FalseBB)
-      MBB->setSuccWeight(SI, (uint32_t)(NewFalse / Scale));
-    else
-      MBB->setSuccWeight(SI, MBPI->getEdgeWeight(MBB, SI) / Scale);
-  }
-}
-
 /// IfConvertTriangle - If convert a triangle sub-CFG.
 ///
 bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
@@ -1231,16 +1208,14 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   DontKill.clear();
 
   bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
-  uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
-  uint32_t WeightScale = 0;
+  BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
 
   if (HasEarlyExit) {
-    // Get weights before modifying CvtBBI->BB and BBI.BB.
-    CvtNext = MBPI->getEdgeWeight(CvtBBI->BB, NextBBI->BB);
-    CvtFalse = MBPI->getEdgeWeight(CvtBBI->BB, CvtBBI->FalseBB);
-    BBNext = MBPI->getEdgeWeight(BBI.BB, NextBBI->BB);
-    BBCvt = MBPI->getEdgeWeight(BBI.BB, CvtBBI->BB);
-    SumWeight = MBPI->getSumForBlock(CvtBBI->BB, WeightScale);
+    // Get probabilities before modifying CvtBBI->BB and BBI.BB.
+    CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
+    CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
+    BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
+    BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
   }
 
   if (CvtBBI->BB->pred_size() > 1) {
@@ -1251,7 +1226,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB);
+    BBI.BB->removeSuccessor(CvtBBI->BB, true);
   } else {
     // Predicate the 'true' block after removing its branch.
     CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
@@ -1268,22 +1243,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
                                            CvtBBI->BrCond.end());
     if (TII->ReverseBranchCondition(RevCond))
       llvm_unreachable("Unable to reverse branch condition!");
-    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
-    BBI.BB->addSuccessor(CvtBBI->FalseBB);
-    // Update the edge weight for both CvtBBI->FalseBB and NextBBI.
-    // New_Weight(BBI.BB, NextBBI->BB) =
-    //   Weight(BBI.BB, NextBBI->BB) * getSumForBlock(CvtBBI->BB) +
-    //   Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, NextBBI->BB)
-    // New_Weight(BBI.BB, CvtBBI->FalseBB) =
-    //   Weight(BBI.BB, CvtBBI->BB) * Weight(CvtBBI->BB, CvtBBI->FalseBB)
 
-    uint64_t NewNext = BBNext * SumWeight + (BBCvt * CvtNext) / WeightScale;
-    uint64_t NewFalse = (BBCvt * CvtFalse) / WeightScale;
-    // We need to scale down all weights of BBI.BB to fit uint32_t.
-    // Here BBI.BB is connected to CvtBBI->FalseBB and will fall through to
-    // the next block.
-    ScaleWeights(NewNext, NewFalse, BBI.BB, getNextBlock(BBI.BB),
-                 CvtBBI->FalseBB, MBPI);
+    // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
+    // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
+    //   Prob(BBI.BB, NextBBI->BB) +
+    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+    // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
+    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
+    auto NewTrueBB = getNextBlock(BBI.BB);
+    auto NewNext = BBNext + BBCvt * CvtNext;
+    auto NewTrueBBIter =
+        std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+    if (NewTrueBBIter != BBI.BB->succ_end())
+      BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
+
+    auto NewFalse = BBCvt * CvtFalse;
+    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+    BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
   }
 
   // Merge in the 'false' block if the 'false' block has no other
@@ -1526,7 +1502,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       MergeBlocks(BBI, TailBBI);
       TailBBI.IsDone = true;
     } else {
-      BBI.BB->addSuccessor(TailBB);
+      BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
       InsertUncondBranch(BBI.BB, TailBB, TII);
       BBI.HasFallThrough = false;
     }
@@ -1536,7 +1512,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // which can happen here if TailBB is unanalyzable and is merged, so
   // explicitly remove BBI1 and BBI2 as successors.
   BBI.BB->removeSuccessor(BBI1->BB);
-  BBI.BB->removeSuccessor(BBI2->BB);
+  BBI.BB->removeSuccessor(BBI2->BB, true);
   RemoveExtraEdges(BBI);
 
   // Update block info.
@@ -1686,25 +1662,94 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   ToBBI.BB->splice(ToBBI.BB->end(),
                    FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
 
-  std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
-                                         FromBBI.BB->succ_end());
+  // Force normalizing the successors' probabilities of ToBBI.BB to convert all
+  // unknown probabilities into known ones.
+  // FIXME: This usage is too tricky and in the future we would like to
+  // eliminate all unknown probabilities in MBB.
+  ToBBI.BB->normalizeSuccProbs();
+
+  SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
+                                                FromBBI.BB->succ_end());
   MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
   MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
+  // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
+  // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+  auto To2FromProb = BranchProbability::getZero();
+  if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
+    To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
+    // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+    // edge probability being merged to other edges when this edge is removed
+    // later.
+    ToBBI.BB->setSuccProbability(
+        std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
+        BranchProbability::getZero());
+  }
 
-  for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = Succs[i];
+  for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FromSuccs[i];
     // Fallthrough edge can't be transferred.
     if (Succ == FallThrough)
       continue;
+
+    auto NewProb = BranchProbability::getZero();
+    if (AddEdges) {
+      // Calculate the edge probability for the edge from ToBBI.BB to Succ,
+      // which is a portion of the edge probability from FromBBI.BB to Succ. The
+      // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+      // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
+      NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+
+      // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
+      // only happens when if-converting a diamond CFG and FromBBI.BB is the
+      // tail BB.  In this case FromBBI.BB post-dominates ToBBI.BB and hence we
+      // could just use the probabilities on FromBBI.BB's out-edges when adding
+      // new successors.
+      if (!To2FromProb.isZero())
+        NewProb *= To2FromProb;
+    }
+
     FromBBI.BB->removeSuccessor(Succ);
-    if (AddEdges && !ToBBI.BB->isSuccessor(Succ))
-      ToBBI.BB->addSuccessor(Succ);
+
+    if (AddEdges) {
+      // If the edge from ToBBI.BB to Succ already exists, update the
+      // probability of this edge by adding NewProb to it. An example is shown
+      // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+      // don't have to set C as A's successor as it already is. We only need to
+      // update the edge probability on A->C. Note that B will not be
+      // immediately removed from A's successors. It is possible that B->D is
+      // not removed either if D is a fallthrough of B. Later the edge A->D
+      // (generated here) and B->D will be combined into one edge. To maintain
+      // correct edge probability of this combined edge, we need to set the edge
+      // probability of A->B to zero, which is already done above. The edge
+      // probability on A->D is calculated by scaling the original probability
+      // on A->B by the probability of B->D.
+      //
+      // Before ifcvt:      After ifcvt (assume B->D is kept):
+      //
+      //       A                A
+      //      /|               /|\
+      //     / B              / B|
+      //    | /|             |  ||
+      //    |/ |             |  |/
+      //    C  D             C  D
+      //
+      if (ToBBI.BB->isSuccessor(Succ))
+        ToBBI.BB->setSuccProbability(
+            std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+            MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
+      else
+        ToBBI.BB->addSuccessor(Succ, NewProb);
+    }
   }
 
   // Now FromBBI always falls through to the next block!
   if (NBB && !FromBBI.BB->isSuccessor(NBB))
     FromBBI.BB->addSuccessor(NBB);
 
+  // Normalize the probabilities of ToBBI.BB's successors with all adjustment
+  // we've done above.
+  ToBBI.BB->normalizeSuccProbs();
+
   ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end());
   FromBBI.Predicate.clear();
 
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 93e04876a8f3..39c1b9fb9a66 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -38,6 +38,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -107,6 +108,98 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
+
+/// \brief Detect re-ordering hazards and dependencies.
+///
+/// This class keeps track of defs and uses, and can be queried if a given
+/// machine instruction can be re-ordered from after the machine instructions
+/// seen so far to before them.
+class HazardDetector {
+  DenseSet<unsigned> RegDefs;
+  DenseSet<unsigned> RegUses;
+  const TargetRegisterInfo &TRI;
+  bool hasSeenClobber;
+
+public:
+  explicit HazardDetector(const TargetRegisterInfo &TRI) :
+    TRI(TRI), hasSeenClobber(false) {}
+
+  /// \brief Make a note of \p MI for later queries to isSafeToHoist.
+  ///
+  /// May clobber this HazardDetector instance.  \see isClobbered.
+  void rememberInstruction(MachineInstr *MI);
+
+  /// \brief Return true if it is safe to hoist \p MI from after all the
+  /// instructions seen so far (via rememberInstruction) to before it.
+  bool isSafeToHoist(MachineInstr *MI);
+
+  /// \brief Return true if this instance of HazardDetector has been clobbered
+  /// (i.e. has no more useful information).
+  ///
+  /// A HazardDetecter is clobbered when it sees a construct it cannot
+  /// understand, and it would have to return a conservative answer for all
+  /// future queries.  Having a separate clobbered state lets the client code
+  /// bail early, without making queries about all of the future instructions
+  /// (which would have returned the most conservative answer anyway).
+  ///
+  /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
+  /// is an error.
+  bool isClobbered() { return hasSeenClobber; }
+};
+}
+
+
+void HazardDetector::rememberInstruction(MachineInstr *MI) {
+  assert(!isClobbered() &&
+         "Don't add instructions to a clobbered hazard detector");
+
+  if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
+    hasSeenClobber = true;
+    return;
+  }
+
+  for (auto *MMO : MI->memoperands()) {
+    // Right now we don't want to worry about LLVM's memory model.
+    if (!MMO->isUnordered()) {
+      hasSeenClobber = true;
+      return;
+    }
+  }
+
+  for (auto &MO : MI->operands()) {
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+
+    if (MO.isDef())
+      RegDefs.insert(MO.getReg());
+    else
+      RegUses.insert(MO.getReg());
+  }
+}
+
+bool HazardDetector::isSafeToHoist(MachineInstr *MI) {
+  assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
+
+  // Right now we don't want to worry about LLVM's memory model.  This can be
+  // made more precise later.
+  for (auto *MMO : MI->memoperands())
+    if (!MMO->isUnordered())
+      return false;
+
+  for (auto &MO : MI->operands()) {
+    if (MO.isReg() && MO.getReg()) {
+      for (unsigned Reg : RegDefs)
+        if (TRI.regsOverlap(Reg, MO.getReg()))
+          return false;  // We found a write-after-write or read-after-write
+
+      if (MO.isDef())
+        for (unsigned Reg : RegUses)
+          if (TRI.regsOverlap(Reg, MO.getReg()))
+            return false;  // We found a write-after-read
+    }
+  }
+
+  return true;
 }
 
 bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) {
@@ -132,10 +225,10 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
     MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) {
   typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate;
 
-  MDNode *BranchMD =
-      MBB.getBasicBlock()
-          ? MBB.getBasicBlock()->getTerminator()->getMetadata("make.implicit")
-          : nullptr;
+  MDNode *BranchMD = nullptr;
+  if (auto *BB = MBB.getBasicBlock())
+    BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit);
+
   if (!BranchMD)
     return false;
 
@@ -188,7 +281,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //
   // we want to end up with
   //
-  //   Def = TrappingLoad (%RAX + <offset>), LblNull
+  //   Def = FaultingLoad (%RAX + <offset>), LblNull
   //   jmp LblNotNull ;; explicit or fallthrough
   //
   //  LblNotNull:
@@ -199,38 +292,34 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   //  LblNull:
   //   callq throw_NullPointerException
   //
+  //
+  // To see why this is legal, consider the two possibilities:
+  //
+  //  1. %RAX is null: since we constrain <offset> to be less than PageSize, the
+  //     load instruction dereferences the null page, causing a segmentation
+  //     fault.
+  //
+  //  2. %RAX is not null: in this case we know that the load cannot fault, as
+  //     otherwise the load would've faulted in the original program too and the
+  //     original program would've been undefined.
+  //
+  // This reasoning cannot be extended to justify hoisting through arbitrary
+  // control flow.  For instance, in the example below (in pseudo-C)
+  //
+  //    if (ptr == null) { throw_npe(); unreachable; }
+  //    if (some_cond) { return 42; }
+  //    v = ptr->field;  // LD
+  //    ...
+  //
+  // we cannot (without code duplication) use the load marked "LD" to null check
+  // ptr -- clause (2) above does not apply in this case.  In the above program
+  // the safety of ptr->field can be dependent on some_cond; and, for instance,
+  // ptr could be some non-null invalid reference that never gets loaded from
+  // because some_cond is always true.
 
   unsigned PointerReg = MBP.LHS.getReg();
 
-  // As we scan NotNullSucc for a suitable load instruction, we keep track of
-  // the registers defined and used by the instructions we scan past.  This bit
-  // of information lets us decide if it is legal to hoist the load instruction
-  // we find (if we do find such an instruction) to before NotNullSucc.
-  DenseSet<unsigned> RegDefs, RegUses;
-
-  // Returns true if it is safe to reorder MI to before NotNullSucc.
-  auto IsSafeToHoist = [&](MachineInstr *MI) {
-    // Right now we don't want to worry about LLVM's memory model.  This can be
-    // made more precise later.
-    for (auto *MMO : MI->memoperands())
-      if (!MMO->isUnordered())
-        return false;
-
-    for (auto &MO : MI->operands()) {
-      if (MO.isReg() && MO.getReg()) {
-        for (unsigned Reg : RegDefs)
-          if (TRI->regsOverlap(Reg, MO.getReg()))
-            return false;  // We found a write-after-write or read-after-write
-
-        if (MO.isDef())
-          for (unsigned Reg : RegUses)
-            if (TRI->regsOverlap(Reg, MO.getReg()))
-              return false;  // We found a write-after-read
-      }
-    }
-
-    return true;
-  };
+  HazardDetector HD(*TRI);
 
   for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
        ++MII) {
@@ -238,37 +327,16 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
     unsigned BaseReg, Offset;
     if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
       if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg &&
-          Offset < PageSize && MI->getDesc().getNumDefs() == 1 &&
-          IsSafeToHoist(MI)) {
+          Offset < PageSize && MI->getDesc().getNumDefs() <= 1 &&
+          HD.isSafeToHoist(MI)) {
         NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc,
                                    NullSucc);
         return true;
       }
 
-    // MI did not match our criteria for conversion to a trapping load.  Check
-    // if we can continue looking.
-
-    if (MI->mayStore() || MI->hasUnmodeledSideEffects())
+    HD.rememberInstruction(MI);
+    if (HD.isClobbered())
       return false;
-
-    for (auto *MMO : MI->memoperands())
-      // Right now we don't want to worry about LLVM's memory model.
-      if (!MMO->isUnordered())
-        return false;
-
-    // It _may_ be okay to reorder a later load instruction across MI.  Make a
-    // note of its operands so that we can make the legality check if we find a
-    // suitable load instruction:
-
-    for (auto &MO : MI->operands()) {
-      if (!MO.isReg() || !MO.getReg())
-        continue;
-
-      if (MO.isDef())
-        RegDefs.insert(MO.getReg());
-      else
-        RegUses.insert(MO.getReg());
-    }
   }
 
   return false;
@@ -281,14 +349,19 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
 MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI,
                                                      MachineBasicBlock *MBB,
                                                      MCSymbol *HandlerLabel) {
+  const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for
+                                 // all targets.
+
   DebugLoc DL;
   unsigned NumDefs = LoadMI->getDesc().getNumDefs();
-  assert(NumDefs == 1 && "other cases unhandled!");
-  (void)NumDefs;
+  assert(NumDefs <= 1 && "other cases unhandled!");
 
-  unsigned DefReg = LoadMI->defs().begin()->getReg();
-  assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
-         "expected exactly one def!");
+  unsigned DefReg = NoRegister;
+  if (NumDefs != 0) {
+    DefReg = LoadMI->defs().begin()->getReg();
+    assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 &&
+           "expected exactly one def!");
+  }
 
   auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg)
                  .addSym(HandlerLabel)
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 9989f233d09c..e31013266bc7 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -141,7 +141,7 @@ public:
   InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
       : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
         LSS(pass.getAnalysis<LiveStacks>()),
-        AA(&pass.getAnalysis<AliasAnalysis>()),
+        AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
         MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
@@ -329,8 +329,8 @@ static raw_ostream &operator<<(raw_ostream &OS,
   if (SVI.KillsSource)
     OS << " kill";
   OS << " deps[";
-  for (unsigned i = 0, e = SVI.Deps.size(); i != e; ++i)
-    OS << ' ' << SVI.Deps[i]->id << '@' << SVI.Deps[i]->def;
+  for (VNInfo *Dep : SVI.Deps)
+    OS << ' ' << Dep->id << '@' << Dep->def;
   OS << " ]";
   if (SVI.DefMI)
     OS << " def: " << *SVI.DefMI;
@@ -383,9 +383,8 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
     bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg);
     unsigned SpillDepth = ~0u;
 
-    for (TinyPtrVector<VNInfo*>::iterator DepI = Deps->begin(),
-         DepE = Deps->end(); DepI != DepE; ++DepI) {
-      SibValueMap::iterator DepSVI = SibValues.find(*DepI);
+    for (VNInfo *Dep : *Deps) {
+      SibValueMap::iterator DepSVI = SibValues.find(Dep);
       assert(DepSVI != SibValues.end() && "Dependent value not in SibValues");
       SibValueInfo &DepSV = DepSVI->second;
       if (!DepSV.SpillMBB)
@@ -566,12 +565,11 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
 
       // Create entries for all the PHIs.  Don't add them to the worklist, we
       // are processing all of them in one go here.
-      for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-        SibValues.insert(std::make_pair(PHIs[i], SibValueInfo(Reg, PHIs[i])));
+      for (VNInfo *PHI : PHIs)
+        SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI)));
 
       // Add every PHI as a dependent of all the non-PHIs.
-      for (unsigned i = 0, e = NonPHIs.size(); i != e; ++i) {
-        VNInfo *NonPHI = NonPHIs[i];
+      for (VNInfo *NonPHI : NonPHIs) {
         // Known value? Try an insertion.
         std::tie(SVI, Inserted) =
           SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI)));
@@ -654,8 +652,7 @@ void InlineSpiller::analyzeSiblingValues() {
     return;
 
   LiveInterval &OrigLI = LIS.getInterval(Original);
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
-    unsigned Reg = RegsToSpill[i];
+  for (unsigned Reg : RegsToSpill) {
     LiveInterval &LI = LIS.getInterval(Reg);
     for (LiveInterval::const_vni_iterator VI = LI.vni_begin(),
          VE = LI.vni_end(); VI != VE; ++VI) {
@@ -831,9 +828,8 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
 
     if (VNI->isPHIDef()) {
       MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
-      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-             PE = MBB->pred_end(); PI != PE; ++PI) {
-        VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI));
+      for (MachineBasicBlock *P : MBB->predecessors()) {
+        VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P));
         if (PVNI)
           WorkList.push_back(std::make_pair(LI, PVNI));
       }
@@ -920,8 +916,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg,
                << *LIS.getInstructionFromIndex(DefIdx));
 
   // Replace operands
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+  for (const auto &OpPair : Ops) {
+    MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
     if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) {
       MO.setReg(NewVReg);
       MO.setIsKill();
@@ -944,8 +940,7 @@ void InlineSpiller::reMaterializeAll() {
 
   // Try to remat before all uses of snippets.
   bool anyRemat = false;
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
-    unsigned Reg = RegsToSpill[i];
+  for (unsigned Reg : RegsToSpill) {
     LiveInterval &LI = LIS.getInterval(Reg);
     for (MachineRegisterInfo::reg_bundle_iterator
            RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end();
@@ -963,8 +958,7 @@ void InlineSpiller::reMaterializeAll() {
     return;
 
   // Remove any values that were completely rematted.
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
-    unsigned Reg = RegsToSpill[i];
+  for (unsigned Reg : RegsToSpill) {
     LiveInterval &LI = LIS.getInterval(Reg);
     for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end();
          I != E; ++I) {
@@ -989,8 +983,7 @@ void InlineSpiller::reMaterializeAll() {
 
   // Get rid of deleted and empty intervals.
   unsigned ResultPos = 0;
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
-    unsigned Reg = RegsToSpill[i];
+  for (unsigned Reg : RegsToSpill) {
     if (!LIS.hasInterval(Reg))
       continue;
 
@@ -1098,9 +1091,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
   SmallVector<unsigned, 8> FoldOps;
-  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-    unsigned Idx = Ops[i].second;
-    assert(MI == Ops[i].first && "Instruction conflict during operand folding");
+  for (const auto &OpPair : Ops) {
+    unsigned Idx = OpPair.second;
+    assert(MI == OpPair.first && "Instruction conflict during operand folding");
     MachineOperand &MO = MI->getOperand(Idx);
     if (MO.isImplicit()) {
       ImpReg = MO.getReg();
@@ -1139,7 +1132,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       continue;
     MIBundleOperands::PhysRegInfo RI =
       MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI);
-    if (RI.Defines)
+    if (RI.FullyDefined)
       continue;
     // FoldMI does not define this physreg. Remove the LI segment.
     assert(MO->isDead() && "Cannot fold physreg def");
@@ -1152,10 +1145,9 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
 
   // Insert any new instructions other than FoldMI into the LIS maps.
   assert(!MIS.empty() && "Unexpected empty span of instructions!");
-  for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end();
-       MII != End; ++MII)
-    if (&*MII != FoldMI)
-      LIS.InsertMachineInstrInMaps(&*MII);
+  for (MachineInstr &MI : MIS)
+    if (&MI != FoldMI)
+      LIS.InsertMachineInstrInMaps(&MI);
 
   // TII.foldMemoryOperand may have left some implicit operands on the
   // instruction.  Strip them.
@@ -1301,11 +1293,11 @@ void InlineSpiller::spillAroundUses(unsigned Reg) {
 
     // Rewrite instruction operands.
     bool hasLiveDef = false;
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second);
+    for (const auto &OpPair : Ops) {
+      MachineOperand &MO = OpPair.first->getOperand(OpPair.second);
       MO.setReg(NewVReg);
       if (MO.isUse()) {
-        if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second))
+        if (!OpPair.first->isRegTiedToDefOperand(OpPair.second))
           MO.setIsKill();
       } else {
         if (!MO.isDead())
@@ -1335,14 +1327,14 @@ void InlineSpiller::spillAll() {
     VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot);
 
   assert(StackInt->getNumValNums() == 1 && "Bad stack interval values");
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
-    StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]),
+  for (unsigned Reg : RegsToSpill)
+    StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg),
                                      StackInt->getValNumInfo(0));
   DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n');
 
   // Spill around uses of all RegsToSpill.
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
-    spillAroundUses(RegsToSpill[i]);
+  for (unsigned Reg : RegsToSpill)
+    spillAroundUses(Reg);
 
   // Hoisted spills may cause dead code.
   if (!DeadDefs.empty()) {
@@ -1351,9 +1343,9 @@ void InlineSpiller::spillAll() {
   }
 
   // Finally delete the SnippetCopies.
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) {
+  for (unsigned Reg : RegsToSpill) {
     for (MachineRegisterInfo::reg_instr_iterator
-         RI = MRI.reg_instr_begin(RegsToSpill[i]), E = MRI.reg_instr_end();
+         RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end();
          RI != E; ) {
       MachineInstr *MI = &*(RI++);
       assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy");
@@ -1364,8 +1356,8 @@ void InlineSpiller::spillAll() {
   }
 
   // Delete all spilled registers.
-  for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i)
-    Edit->eraseVirtReg(RegsToSpill[i]);
+  for (unsigned Reg : RegsToSpill)
+    Edit->eraseVirtReg(Reg);
 }
 
 void InlineSpiller::spill(LiveRangeEdit &edit) {
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index fd5749bfefa0..f8cc24724580 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -144,7 +144,8 @@ void InterferenceCache::Entry::update(unsigned MBBNum) {
     PrevPos = Start;
   }
 
-  MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum);
+  MachineFunction::const_iterator MFI =
+      MF->getBlockNumbered(MBBNum)->getIterator();
   BlockInterference *BI = &Blocks[MBBNum];
   ArrayRef<SlotIndex> RegMaskSlots;
   ArrayRef<const uint32_t*> RegMaskBits;
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index 53c8adc05d77..724f1d61abe2 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -52,7 +52,7 @@ using namespace llvm;
 static cl::opt<bool> LowerInterleavedAccesses(
     "lower-interleaved-accesses",
     cl::desc("Enable lowering interleaved accesses to intrinsics"),
-    cl::init(false), cl::Hidden);
+    cl::init(true), cl::Hidden);
 
 static unsigned MaxFactor; // The maximum supported interleave factor.
 
@@ -271,7 +271,7 @@ bool InterleavedAccess::runOnFunction(Function &F) {
   SmallVector<Instruction *, 32> DeadInsts;
   bool Changed = false;
 
-  for (auto &I : inst_range(F)) {
+  for (auto &I : instructions(F)) {
     if (LoadInst *LI = dyn_cast<LoadInst>(&I))
       Changed |= lowerInterleavedLoad(LI, DeadInsts);
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 2c95e9e7d0d3..2962f8701625 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -35,24 +35,24 @@ static void EnsureFunctionExists(Module &M, const char *Name,
   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
 }
 
-static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
                                     const char *FName,
                                     const char *DName, const char *LDName) {
   // Insert definitions for all the floating point types.
-  switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+  switch((int)Fn.arg_begin()->getType()->getTypeID()) {
   case Type::FloatTyID:
-    EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+    EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
                          Type::getFloatTy(M.getContext()));
     break;
   case Type::DoubleTyID:
-    EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+    EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
                          Type::getDoubleTy(M.getContext()));
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
   case Type::PPC_FP128TyID:
-    EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
-                         Fn->arg_begin()->getType());
+    EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
+                         Fn.arg_begin()->getType());
     break;
   }
 }
@@ -67,7 +67,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
                                  Type *RetTy) {
   // If we haven't already looked up this function, check to see if the
   // program already contains a function with this name.
-  Module *M = CI->getParent()->getParent()->getParent();
+  Module *M = CI->getModule();
   // Get or insert the definition now.
   std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
@@ -75,7 +75,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   Constant* FCache = M->getOrInsertFunction(NewFn,
                                   FunctionType::get(RetTy, ParamTys, false));
 
-  IRBuilder<> Builder(CI->getParent(), CI);
+  IRBuilder<> Builder(CI->getParent(), CI->getIterator());
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
   CallInst *NewCI = Builder.CreateCall(FCache, Args);
   NewCI->setName(CI->getName());
@@ -94,20 +94,20 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 
 void IntrinsicLowering::AddPrototypes(Module &M) {
   LLVMContext &Context = M.getContext();
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (I->isDeclaration() && !I->use_empty())
-      switch (I->getIntrinsicID()) {
+  for (auto &F : M)
+    if (F.isDeclaration() && !F.use_empty())
+      switch (F.getIntrinsicID()) {
       default: break;
       case Intrinsic::setjmp:
-        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+        EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
                              Type::getInt32Ty(M.getContext()));
         break;
       case Intrinsic::longjmp:
-        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+        EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
                              Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::siglongjmp:
-        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+        EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
                              Type::getVoidTy(M.getContext()));
         break;
       case Intrinsic::memcpy:
@@ -132,31 +132,31 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
                               DL.getIntPtrType(Context), nullptr);
         break;
       case Intrinsic::sqrt:
-        EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+        EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
         break;
       case Intrinsic::sin:
-        EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+        EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
         break;
       case Intrinsic::cos:
-        EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+        EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
         break;
       case Intrinsic::pow:
-        EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+        EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
         break;
       case Intrinsic::log:
-        EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+        EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
         break;
       case Intrinsic::log2:
-        EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+        EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
         break;
       case Intrinsic::log10:
-        EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+        EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
         break;
       case Intrinsic::exp:
-        EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+        EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
         break;
       case Intrinsic::exp2:
-        EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+        EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
         break;
       }
 }
@@ -167,8 +167,8 @@ static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
   assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
-  
-  IRBuilder<> Builder(IP->getParent(), IP);
+
+  IRBuilder<> Builder(IP);
 
   switch(BitSize) {
   default: llvm_unreachable("Unhandled type size of value to byteswap!");
@@ -268,7 +268,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
     0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
   };
 
-  IRBuilder<> Builder(IP->getParent(), IP);
+  IRBuilder<> Builder(IP);
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
   unsigned WordSize = (BitSize + 63) / 64;
@@ -301,7 +301,7 @@ static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
 /// instruction IP.
 static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
 
-  IRBuilder<> Builder(IP->getParent(), IP);
+  IRBuilder<> Builder(IP);
 
   unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
   for (unsigned i = 1; i < BitSize; i <<= 1) {
@@ -338,7 +338,7 @@ static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
 }
 
 void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
-  IRBuilder<> Builder(CI->getParent(), CI);
+  IRBuilder<> Builder(CI);
   LLVMContext &Context = CI->getContext();
 
   const Function *Callee = CI->getCalledFunction();
@@ -424,6 +424,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;
   }
     
+  case Intrinsic::get_dynamic_area_offset:
+    errs() << "WARNING: this target does not support the custom llvm.get."
+              "dynamic.area.offset.  It is being lowered to a constant 0\n";
+    // Just lower it to a constant 0 because for most targets
+    // @llvm.get.dynamic.area.offset is lowered to zero.
+    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0));
+    break;
   case Intrinsic::returnaddress:
   case Intrinsic::frameaddress:
     errs() << "WARNING: this target does not support the llvm."
@@ -589,7 +596,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
     return false;
 
   // Okay, we can do this xform, do so now.
-  Module *M = CI->getParent()->getParent()->getParent();
+  Module *M = CI->getModule();
   Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
 
   Value *Op = CI->getArgOperand(0);
diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt
index 18ed77607c6a..69b6a0f380aa 100644
--- a/lib/CodeGen/LLVMBuild.txt
+++ b/lib/CodeGen/LLVMBuild.txt
@@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG MIRParser
 type = Library
 name = CodeGen
 parent = Libraries
-required_libraries = Analysis Core Instrumentation MC Scalar Support Target TransformUtils
+required_libraries = Analysis BitReader BitWriter Core Instrumentation MC Scalar Support Target TransformUtils
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 37299eb664cf..1c27377feee7 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -82,7 +82,7 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T,
 }
 
 TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(BasicTTIImpl(this, F));
   });
 }
@@ -125,9 +125,10 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
   PM.add(new MachineFunctionAnalysis(*TM, MFInitializer));
 
   // Enable FastISel with -fast, but allow that to be overridden.
+  TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
   if (EnableFastISelOption == cl::BOU_TRUE ||
       (TM->getOptLevel() == CodeGenOpt::None &&
-       EnableFastISelOption != cl::BOU_FALSE))
+       TM->getO0WantsFastISel()))
     TM->setFastISel(true);
 
   // Ask the target for an isel.
@@ -202,6 +203,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     Triple T(getTargetTriple().str());
     AsmStreamer.reset(getTarget().createMCObjectStreamer(
         T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+        Options.MCOptions.MCIncrementalLinkerCompatible,
         /*DWARFMustBeAtTheEnd*/ true));
     break;
   }
@@ -254,6 +256,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   const MCSubtargetInfo &STI = *getMCSubtargetInfo();
   std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
       T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll,
+      Options.MCOptions.MCIncrementalLinkerCompatible,
       /*DWARFMustBeAtTheEnd*/ true));
 
   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
new file mode 100644
index 000000000000..98d30b95dd2d
--- /dev/null
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -0,0 +1,405 @@
+//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This pass implements a data flow analysis that propagates debug location
+/// information by inserting additional DBG_VALUE instructions into the machine
+/// instruction stream. The pass internally builds debug location liveness
+/// ranges to determine the points where additional DBG_VALUEs need to be
+/// inserted.
+///
+/// This is a separate pass from DbgValueHistoryCalculator to facilitate
+/// testing and improve modularity.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <deque>
+#include <list>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "live-debug-values"
+
+STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+
+namespace {
+
+class LiveDebugValues : public MachineFunctionPass {
+
+private:
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  typedef std::pair<const DILocalVariable *, const DILocation *>
+      InlinedVariable;
+
+  /// A potentially inlined instance of a variable.
+  struct DebugVariable {
+    const DILocalVariable *Var;
+    const DILocation *InlinedAt;
+
+    DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt)
+        : Var(_var), InlinedAt(_inlinedAt) {}
+
+    bool operator==(const DebugVariable &DV) const {
+      return (Var == DV.Var) && (InlinedAt == DV.InlinedAt);
+    }
+  };
+
+  /// Member variables and functions for Range Extension across basic blocks.
+  struct VarLoc {
+    DebugVariable Var;
+    const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr.
+
+    VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {}
+
+    bool operator==(const VarLoc &V) const;
+  };
+
+  typedef std::list<VarLoc> VarLocList;
+  typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
+
+  bool OLChanged; // OutgoingLocs got changed for this bb.
+  bool MBBJoined; // The MBB was joined.
+
+  void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
+  void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
+  void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+                              VarLocInMBB &OutLocs);
+  void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+
+  void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+
+  bool ExtendRanges(MachineFunction &MF);
+
+public:
+  static char ID;
+
+  /// Default construct and initialize the pass.
+  LiveDebugValues();
+
+  /// Tell the pass manager which passes we depend on and what
+  /// information we preserve.
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Print to ostream with a message.
+  void printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+                        raw_ostream &Out) const;
+
+  /// Calculate the liveness information for the given machine function.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // namespace
+
+//===----------------------------------------------------------------------===//
+//            Implementation
+//===----------------------------------------------------------------------===//
+
+char LiveDebugValues::ID = 0;
+char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
+INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis",
+                false, false)
+
+/// Default construct and initialize the pass.
+LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) {
+  initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry());
+}
+
+/// Tell the pass manager which passes we depend on and what information we
+/// preserve.
+void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+// \brief If @MI is a DBG_VALUE with debug value described by a defined
+// register, returns the number of this register. In the other case, returns 0.
+static unsigned isDescribedByReg(const MachineInstr &MI) {
+  assert(MI.isDebugValue());
+  assert(MI.getNumOperands() == 4);
+  // If location of variable is described using a register (directly or
+  // indirecltly), this register is always a first operand.
+  return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+// \brief This function takes two DBG_VALUE instructions and returns true
+// if their offsets are equal; otherwise returns false.
+static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) {
+  assert(MI1.isDebugValue());
+  assert(MI1.getNumOperands() == 4);
+
+  assert(MI2.isDebugValue());
+  assert(MI2.getNumOperands() == 4);
+
+  if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue())
+    return true;
+
+  // Check if both MIs are indirect and they are equal.
+  if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue())
+    return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm();
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//            Debug Range Extension Implementation
+//===----------------------------------------------------------------------===//
+
+void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg,
+                                       raw_ostream &Out) const {
+  Out << "Printing " << msg << ":\n";
+  for (const auto &L : V) {
+    Out << "MBB: " << L.first->getName() << ":\n";
+    for (const auto &VLL : L.second) {
+      Out << " Var: " << VLL.Var.Var->getName();
+      Out << " MI: ";
+      (*VLL.MI).dump();
+      Out << "\n";
+    }
+  }
+  Out << "\n";
+}
+
+bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const {
+  return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) &&
+         (areOffsetsEqual(*MI, *V.MI));
+}
+
+/// End all previous ranges related to @MI and start a new range from @MI
+/// if it is a DBG_VALUE instr.
+void LiveDebugValues::transferDebugValue(MachineInstr &MI,
+                                         VarLocList &OpenRanges) {
+  if (!MI.isDebugValue())
+    return;
+  const DILocalVariable *RawVar = MI.getDebugVariable();
+  assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) &&
+         "Expected inlined-at fields to agree");
+  DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt());
+
+  // End all previous ranges of Var.
+  OpenRanges.erase(
+      std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+                     [&](const VarLoc &V) { return (Var == V.Var); }),
+      OpenRanges.end());
+
+  // Add Var to OpenRanges from this DBG_VALUE.
+  // TODO: Currently handles DBG_VALUE which has only reg as location.
+  if (isDescribedByReg(MI)) {
+    VarLoc V(Var, &MI);
+    OpenRanges.push_back(std::move(V));
+  }
+}
+
+/// A definition of a register may mark the end of a range.
+void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
+                                          VarLocList &OpenRanges) {
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!(MO.isReg() && MO.isDef() && MO.getReg() &&
+          TRI->isPhysicalRegister(MO.getReg())))
+      continue;
+    // Remove ranges of all aliased registers.
+    for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
+      OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(),
+                                      [&](const VarLoc &V) {
+                                        return (*RAI ==
+                                                isDescribedByReg(*V.MI));
+                                      }),
+                       OpenRanges.end());
+  }
+}
+
+/// Terminate all open ranges at the end of the current basic block.
+void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+                                             VarLocList &OpenRanges,
+                                             VarLocInMBB &OutLocs) {
+  const MachineBasicBlock *CurMBB = MI.getParent();
+  if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
+    return;
+
+  if (OpenRanges.empty())
+    return;
+
+  if (OutLocs.find(CurMBB) == OutLocs.end()) {
+    // Create space for new Outgoing locs entries.
+    VarLocList VLL;
+    OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
+  }
+  auto OL = OutLocs.find(CurMBB);
+  assert(OL != OutLocs.end());
+  VarLocList &VLL = OL->second;
+
+  for (auto OR : OpenRanges) {
+    // Copy OpenRanges to OutLocs, if not already present.
+    assert(OR.MI->isDebugValue());
+    DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump(););
+    if (std::find_if(VLL.begin(), VLL.end(),
+                     [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
+      VLL.push_back(std::move(OR));
+      OLChanged = true;
+    }
+  }
+  OpenRanges.clear();
+}
+
+/// This routine creates OpenRanges and OutLocs.
+void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+                               VarLocInMBB &OutLocs) {
+  transferDebugValue(MI, OpenRanges);
+  transferRegisterDef(MI, OpenRanges);
+  transferTerminatorInst(MI, OpenRanges, OutLocs);
+}
+
+/// This routine joins the analysis results of all incoming edges in @MBB by
+/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
+/// source variable in all the predecessors of @MBB reside in the same location.
+void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+                           VarLocInMBB &InLocs) {
+  DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
+
+  MBBJoined = false;
+
+  VarLocList InLocsT; // Temporary incoming locations.
+
+  // For all predecessors of this MBB, find the set of VarLocs that can be
+  // joined.
+  for (auto p : MBB.predecessors()) {
+    auto OL = OutLocs.find(p);
+    // Join is null in case of empty OutLocs from any of the pred.
+    if (OL == OutLocs.end())
+      return;
+
+    // Just copy over the Out locs to incoming locs for the first predecessor.
+    if (p == *MBB.pred_begin()) {
+      InLocsT = OL->second;
+      continue;
+    }
+
+    // Join with this predecessor.
+    VarLocList &VLL = OL->second;
+    InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
+                                 [&](VarLoc &ILT) {
+                                   return (std::find_if(VLL.begin(), VLL.end(),
+                                                        [&](const VarLoc &V) {
+                                                          return (ILT == V);
+                                                        }) == VLL.end());
+                                 }),
+                  InLocsT.end());
+  }
+
+  if (InLocsT.empty())
+    return;
+
+  if (InLocs.find(&MBB) == InLocs.end()) {
+    // Create space for new Incoming locs entries.
+    VarLocList VLL;
+    InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
+  }
+  auto IL = InLocs.find(&MBB);
+  assert(IL != InLocs.end());
+  VarLocList &ILL = IL->second;
+
+  // Insert DBG_VALUE instructions, if not already inserted.
+  for (auto ILT : InLocsT) {
+    if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) {
+          return (ILT == I);
+        }) == ILL.end()) {
+      // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
+      // new range is started for the var from the mbb's beginning by inserting
+      // a new DBG_VALUE. transfer() will end this range however appropriate.
+      const MachineInstr *DMI = ILT.MI;
+      MachineInstr *MI =
+          BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
+                  DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0,
+                  DMI->getDebugVariable(), DMI->getDebugExpression());
+      if (DMI->isIndirectDebugValue())
+        MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+      DEBUG(dbgs() << "Inserted: "; MI->dump(););
+      ++NumInserted;
+      MBBJoined = true; // rerun transfer().
+
+      VarLoc V(ILT.Var, MI);
+      ILL.push_back(std::move(V));
+    }
+  }
+}
+
+/// Calculate the liveness information for the given machine function and
+/// extend ranges across basic blocks.
+bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
+
+  DEBUG(dbgs() << "\nDebug Range Extension\n");
+
+  bool Changed = false;
+  OLChanged = MBBJoined = false;
+
+  VarLocList OpenRanges; // Ranges that are open until end of bb.
+  VarLocInMBB OutLocs;   // Ranges that exist beyond bb.
+  VarLocInMBB InLocs;    // Ranges that are incoming after joining.
+
+  std::deque<MachineBasicBlock *> BBWorklist;
+
+  // Initialize every mbb with OutLocs.
+  for (auto &MBB : MF)
+    for (auto &MI : MBB)
+      transfer(MI, OpenRanges, OutLocs);
+  DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
+
+  // Construct a worklist of MBBs.
+  for (auto &MBB : MF)
+    BBWorklist.push_back(&MBB);
+
+  // Perform join() and transfer() using the worklist until the ranges converge
+  // Ranges have converged when the worklist is empty.
+  while (!BBWorklist.empty()) {
+    MachineBasicBlock *MBB = BBWorklist.front();
+    BBWorklist.pop_front();
+
+    join(*MBB, OutLocs, InLocs);
+
+    if (MBBJoined) {
+      Changed = true;
+      for (auto &MI : *MBB)
+        transfer(MI, OpenRanges, OutLocs);
+      DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+      DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+      if (OLChanged) {
+        OLChanged = false;
+        for (auto s : MBB->successors())
+          if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
+              BBWorklist.end()) // add if not already present.
+            BBWorklist.push_back(s);
+      }
+    }
+  }
+  DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
+  DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
+  return Changed;
+}
+
+bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+  TRI = MF.getSubtarget().getRegisterInfo();
+  TII = MF.getSubtarget().getInstrInfo();
+
+  bool Changed = false;
+
+  Changed |= ExtendRanges(MF);
+
+  return Changed;
+}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 15715513452d..6dac7dbd15bf 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -91,9 +91,7 @@ public:
   bool dominates(MachineBasicBlock *MBB) {
     if (LBlocks.empty())
       LS.getMachineBasicBlocks(DL, LBlocks);
-    if (LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB))
-      return true;
-    return false;
+    return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
   }
 };
 } // end anonymous namespace
@@ -512,7 +510,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
   bool Changed = false;
   for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
        ++MFI) {
-    MachineBasicBlock *MBB = MFI;
+    MachineBasicBlock *MBB = &*MFI;
     for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
          MBBI != MBBE;) {
       if (!MBBI->isDebugValue()) {
@@ -536,65 +534,49 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
   return Changed;
 }
 
-void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
-                          LiveRange *LR, const VNInfo *VNI,
-                          SmallVectorImpl<SlotIndex> *Kills,
+/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a
+/// data-flow analysis to propagate them beyond basic block boundaries.
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
+                          const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
                           LiveIntervals &LIS, MachineDominatorTree &MDT,
                           UserValueScopes &UVS) {
-  SmallVector<SlotIndex, 16> Todo;
-  Todo.push_back(Idx);
-  do {
-    SlotIndex Start = Todo.pop_back_val();
-    MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
-    SlotIndex Stop = LIS.getMBBEndIdx(MBB);
-    LocMap::iterator I = locInts.find(Start);
+  SlotIndex Start = Idx;
+  MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+  SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+  LocMap::iterator I = locInts.find(Start);
 
-    // Limit to VNI's live range.
-    bool ToEnd = true;
-    if (LR && VNI) {
-      LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
-      if (!Segment || Segment->valno != VNI) {
-        if (Kills)
-          Kills->push_back(Start);
-        continue;
-      }
-      if (Segment->end < Stop)
-        Stop = Segment->end, ToEnd = false;
+  // Limit to VNI's live range.
+  bool ToEnd = true;
+  if (LR && VNI) {
+    LiveInterval::Segment *Segment = LR->getSegmentContaining(Start);
+    if (!Segment || Segment->valno != VNI) {
+      if (Kills)
+        Kills->push_back(Start);
+      return;
     }
+    if (Segment->end < Stop)
+      Stop = Segment->end, ToEnd = false;
+  }
 
-    // There could already be a short def at Start.
-    if (I.valid() && I.start() <= Start) {
-      // Stop when meeting a different location or an already extended interval.
-      Start = Start.getNextSlot();
-      if (I.value() != LocNo || I.stop() != Start)
-        continue;
-      // This is a one-slot placeholder. Just skip it.
-      ++I;
-    }
+  // There could already be a short def at Start.
+  if (I.valid() && I.start() <= Start) {
+    // Stop when meeting a different location or an already extended interval.
+    Start = Start.getNextSlot();
+    if (I.value() != LocNo || I.stop() != Start)
+      return;
+    // This is a one-slot placeholder. Just skip it.
+    ++I;
+  }
 
-    // Limited by the next def.
-    if (I.valid() && I.start() < Stop)
-      Stop = I.start(), ToEnd = false;
-    // Limited by VNI's live range.
-    else if (!ToEnd && Kills)
-      Kills->push_back(Stop);
-
-    if (Start >= Stop)
-      continue;
+  // Limited by the next def.
+  if (I.valid() && I.start() < Stop)
+    Stop = I.start(), ToEnd = false;
+  // Limited by VNI's live range.
+  else if (!ToEnd && Kills)
+    Kills->push_back(Stop);
 
+  if (Start < Stop)
     I.insert(Start, Stop, LocNo);
-
-    // If we extended to the MBB end, propagate down the dominator tree.
-    if (!ToEnd)
-      continue;
-    const std::vector<MachineDomTreeNode*> &Children =
-      MDT.getNode(MBB)->getChildren();
-    for (unsigned i = 0, e = Children.size(); i != e; ++i) {
-      MachineBasicBlock *MBB = Children[i]->getBlock();
-      if (UVS.dominates(MBB))
-        Todo.push_back(LIS.getMBBStartIdx(MBB));
-    }
-  } while (!Todo.empty());
 }
 
 void
@@ -763,7 +745,7 @@ static void removeDebugValues(MachineFunction &mf) {
 bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
   if (!EnableLDV)
     return false;
-  if (!FunctionDIs.count(mf.getFunction())) {
+  if (!mf.getFunction()->getSubprogram()) {
     removeDebugValues(mf);
     return false;
   }
@@ -1004,11 +986,11 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
     SlotIndex Stop = I.stop();
     unsigned LocNo = I.value();
     DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
-    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
-    SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
+    SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
 
     DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
-    insertDebugValue(MBB, Start, LocNo, LIS, TII);
+    insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
     // This interval may span multiple basic blocks.
     // Insert a DBG_VALUE into each one.
     while(Stop > MBBEnd) {
@@ -1016,9 +998,9 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
       Start = MBBEnd;
       if (++MBB == MFEnd)
         break;
-      MBBEnd = LIS.getMBBEndIdx(MBB);
+      MBBEnd = LIS.getMBBEndIdx(&*MBB);
       DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
-      insertDebugValue(MBB, Start, LocNo, LIS, TII);
+      insertDebugValue(&*MBB, Start, LocNo, LIS, TII);
     }
     DEBUG(dbgs() << '\n');
     if (MBB == MFEnd)
@@ -1047,7 +1029,6 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
 }
 
 bool LiveDebugVariables::doInitialization(Module &M) {
-  FunctionDIs = makeSubprogramMap(M);
   return Pass::doInitialization(M);
 }
 
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 694aa1770c9c..3d36f4d2494a 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -33,7 +33,6 @@ class VirtRegMap;
 
 class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass {
   void *pImpl;
-  DenseMap<const Function *, DISubprogram *> FunctionDIs;
 
 public:
   static char ID; // Pass identification, replacement for typeid
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index d75e4417cb03..efad36ffa3f1 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -26,7 +26,6 @@
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <algorithm>
@@ -865,7 +864,7 @@ void LiveInterval::constructMainRangeFromSubranges(
   // - If any of the subranges is live at a point the main liverange has to be
   //   live too, conversily if no subrange is live the main range mustn't be
   //   live either.
-  // We do this by scannig through all the subranges simultaneously creating new
+  // We do this by scanning through all the subranges simultaneously creating new
   // segments in the main range as segments start/ends come up in the subranges.
   assert(hasSubRanges() && "expected subranges to be present");
   assert(segments.empty() && valnos.empty() && "expected empty main range");
@@ -889,7 +888,7 @@ void LiveInterval::constructMainRangeFromSubranges(
   Segment CurrentSegment;
   bool ConstructingSegment = false;
   bool NeedVNIFixup = false;
-  unsigned ActiveMask = 0;
+  LaneBitmask ActiveMask = 0;
   SlotIndex Pos = First;
   while (true) {
     SlotIndex NextPos = Last;
@@ -899,7 +898,7 @@ void LiveInterval::constructMainRangeFromSubranges(
       END_SEGMENT,
     } Event = NOTHING;
     // Which subregister lanes are affected by the current event.
-    unsigned EventMask = 0;
+    LaneBitmask EventMask = 0;
     // Whether a BEGIN_SEGMENT is also a valno definition point.
     bool IsDef = false;
     // Find the next begin or end of a subrange segment. Combine masks if we
@@ -1066,7 +1065,7 @@ void LiveInterval::print(raw_ostream &OS) const {
   super::print(OS);
   // Print subranges
   for (const SubRange &SR : subranges()) {
-    OS << format(" L%04X ", SR.LaneMask) << SR;
+    OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR;
   }
 }
 
@@ -1101,8 +1100,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
   super::verify();
 
   // Make sure SubRanges are fine and LaneMasks are disjunct.
-  unsigned Mask = 0;
-  unsigned MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+  LaneBitmask Mask = 0;
+  LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
   for (const SubRange &SR : subranges()) {
     // Subrange lanemask should be disjunct to any previous subrange masks.
     assert((Mask & SR.LaneMask) == 0);
@@ -1110,6 +1109,8 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
 
     // subrange mask should not contained in maximum lane mask for the vreg.
     assert((Mask & ~MaxMask) == 0);
+    // empty subranges must be removed.
+    assert(!SR.empty());
 
     SR.verify();
     // Main liverange should cover subrange.
@@ -1370,11 +1371,42 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
   return EqClass.getNumClasses();
 }
 
-void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
-                                          MachineRegisterInfo &MRI) {
-  assert(LIV[0] && "LIV[0] must be set");
-  LiveInterval &LI = *LIV[0];
+template<typename LiveRangeT, typename EqClassesT>
+static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[],
+                            EqClassesT VNIClasses) {
+  // Move segments to new intervals.
+  LiveRange::iterator J = LR.begin(), E = LR.end();
+  while (J != E && VNIClasses[J->valno->id] == 0)
+    ++J;
+  for (LiveRange::iterator I = J; I != E; ++I) {
+    if (unsigned eq = VNIClasses[I->valno->id]) {
+      assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) &&
+             "New intervals should be empty");
+      SplitLRs[eq-1]->segments.push_back(*I);
+    } else
+      *J++ = *I;
+  }
+  LR.segments.erase(J, E);
 
+  // Transfer VNInfos to their new owners and renumber them.
+  unsigned j = 0, e = LR.getNumValNums();
+  while (j != e && VNIClasses[j] == 0)
+    ++j;
+  for (unsigned i = j; i != e; ++i) {
+    VNInfo *VNI = LR.getValNumInfo(i);
+    if (unsigned eq = VNIClasses[i]) {
+      VNI->id = SplitLRs[eq-1]->getNumValNums();
+      SplitLRs[eq-1]->valnos.push_back(VNI);
+    } else {
+      VNI->id = j;
+      LR.valnos[j++] = VNI;
+    }
+  }
+  LR.valnos.resize(j);
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[],
+                                          MachineRegisterInfo &MRI) {
   // Rewrite instructions.
   for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg),
        RE = MRI.reg_end(); RI != RE;) {
@@ -1396,38 +1428,41 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[],
     // NULL. If the use is tied to a def, VNI will be the defined value.
     if (!VNI)
       continue;
-    MO.setReg(LIV[getEqClass(VNI)]->reg);
+    if (unsigned EqClass = getEqClass(VNI))
+      MO.setReg(LIV[EqClass-1]->reg);
   }
 
-  // Move runs to new intervals.
-  LiveInterval::iterator J = LI.begin(), E = LI.end();
-  while (J != E && EqClass[J->valno->id] == 0)
-    ++J;
-  for (LiveInterval::iterator I = J; I != E; ++I) {
-    if (unsigned eq = EqClass[I->valno->id]) {
-      assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
-             "New intervals should be empty");
-      LIV[eq]->segments.push_back(*I);
-    } else
-      *J++ = *I;
-  }
-  // TODO: do not cheat anymore by simply cleaning all subranges
-  LI.clearSubRanges();
-  LI.segments.erase(J, E);
-
-  // Transfer VNInfos to their new owners and renumber them.
-  unsigned j = 0, e = LI.getNumValNums();
-  while (j != e && EqClass[j] == 0)
-    ++j;
-  for (unsigned i = j; i != e; ++i) {
-    VNInfo *VNI = LI.getValNumInfo(i);
-    if (unsigned eq = EqClass[i]) {
-      VNI->id = LIV[eq]->getNumValNums();
-      LIV[eq]->valnos.push_back(VNI);
-    } else {
-      VNI->id = j;
-      LI.valnos[j++] = VNI;
+  // Distribute subregister liveranges.
+  if (LI.hasSubRanges()) {
+    unsigned NumComponents = EqClass.getNumClasses();
+    SmallVector<unsigned, 8> VNIMapping;
+    SmallVector<LiveInterval::SubRange*, 8> SubRanges;
+    BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+    for (LiveInterval::SubRange &SR : LI.subranges()) {
+      // Create new subranges in the split intervals and construct a mapping
+      // for the VNInfos in the subrange.
+      unsigned NumValNos = SR.valnos.size();
+      VNIMapping.clear();
+      VNIMapping.reserve(NumValNos);
+      SubRanges.clear();
+      SubRanges.resize(NumComponents-1, nullptr);
+      for (unsigned I = 0; I < NumValNos; ++I) {
+        const VNInfo &VNI = *SR.valnos[I];
+        const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def);
+        assert(MainRangeVNI != nullptr
+               && "SubRange def must have corresponding main range def");
+        unsigned ComponentNum = getEqClass(MainRangeVNI);
+        VNIMapping.push_back(ComponentNum);
+        if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) {
+          SubRanges[ComponentNum-1]
+            = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask);
+        }
+      }
+      DistributeRange(SR, SubRanges.data(), VNIMapping);
     }
+    LI.removeEmptySubRanges();
   }
-  LI.valnos.resize(j);
+
+  // Distribute main liverange.
+  DistributeRange(LI, LIV, EqClass);
 }
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index c00b010e763b..9451d92bd7ae 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -32,7 +32,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +47,7 @@ char LiveIntervals::ID = 0;
 char &llvm::LiveIntervalsID = LiveIntervals::ID;
 INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
                 "Live Interval Analysis", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LiveVariables)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
@@ -76,8 +75,8 @@ cl::opt<bool> UseSegmentSetForPhysRegs(
 
 void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
   // LiveVariables isn't really required by this analysis, it is only required
   // here to make sure it is live during TwoAddressInstructionPass and
   // PHIElimination. This is temporary.
@@ -124,7 +123,7 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   MRI = &MF->getRegInfo();
   TRI = MF->getSubtarget().getRegisterInfo();
   TII = MF->getSubtarget().getInstrInfo();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   Indexes = &getAnalysis<SlotIndexes>();
   DomTree = &getAnalysis<MachineDominatorTree>();
 
@@ -198,9 +197,16 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
 void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
   assert(LRCalc && "LRCalc not initialized.");
   assert(LI.empty() && "Should only compute empty intervals.");
+  bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg);
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
-  LRCalc->calculate(LI, MRI->shouldTrackSubRegLiveness(LI.reg));
-  computeDeadValues(LI, nullptr);
+  LRCalc->calculate(LI, ShouldTrackSubRegLiveness);
+  bool SeparatedComponents = computeDeadValues(LI, nullptr);
+  if (SeparatedComponents) {
+    assert(ShouldTrackSubRegLiveness
+           && "Separated components should only occur for unused subreg defs");
+    SmallVector<LiveInterval*, 8> SplitLIs;
+    splitSeparateComponents(LI, SplitLIs);
+  }
 }
 
 void LiveIntervals::computeVirtRegs() {
@@ -216,19 +222,31 @@ void LiveIntervals::computeRegMasks() {
   RegMaskBlocks.resize(MF->getNumBlockIDs());
 
   // Find all instructions with regmask operands.
-  for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
-       MBBI != E; ++MBBI) {
-    MachineBasicBlock *MBB = MBBI;
-    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB->getNumber()];
+  for (MachineBasicBlock &MBB : *MF) {
+    std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()];
     RMB.first = RegMaskSlots.size();
-    for (MachineBasicBlock::iterator MI = MBB->begin(), ME = MBB->end();
-         MI != ME; ++MI)
-      for (const MachineOperand &MO : MI->operands()) {
+
+    // Some block starts, such as EH funclets, create masks.
+    if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) {
+      RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB));
+      RegMaskBits.push_back(Mask);
+    }
+
+    for (MachineInstr &MI : MBB) {
+      for (const MachineOperand &MO : MI.operands()) {
         if (!MO.isRegMask())
           continue;
-          RegMaskSlots.push_back(Indexes->getInstructionIndex(MI).getRegSlot());
-          RegMaskBits.push_back(MO.getRegMask());
+        RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot());
+        RegMaskBits.push_back(MO.getRegMask());
       }
+    }
+
+    // Some block ends, such as funclet returns, create masks.
+    if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) {
+      RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB));
+      RegMaskBits.push_back(Mask);
+    }
+
     // Compute the number of register mask instructions in this block.
     RMB.second = RegMaskSlots.size() - RMB.first;
   }
@@ -296,18 +314,17 @@ void LiveIntervals::computeLiveInRegUnits() {
   // Check all basic blocks for live-ins.
   for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
        MFI != MFE; ++MFI) {
-    const MachineBasicBlock *MBB = MFI;
+    const MachineBasicBlock *MBB = &*MFI;
 
     // We only care about ABI blocks: Entry + landing pads.
-    if ((MFI != MF->begin() && !MBB->isLandingPad()) || MBB->livein_empty())
+    if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty())
       continue;
 
     // Create phi-defs at Begin for all live-in registers.
     SlotIndex Begin = Indexes->getMBBStartIdx(MBB);
     DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber());
-    for (MachineBasicBlock::livein_iterator LII = MBB->livein_begin(),
-         LIE = MBB->livein_end(); LII != LIE; ++LII) {
-      for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) {
+    for (const auto &LI : MBB->liveins()) {
+      for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
         unsigned Unit = *Units;
         LiveRange *LR = RegUnitRanges[Unit];
         if (!LR) {
@@ -396,9 +413,6 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes,
   }
 }
 
-/// shrinkToUses - After removing some uses of a register, shrink its live
-/// range to just the remaining uses. This method does not compute reaching
-/// defs for new uses, and it doesn't remove dead defs.
 bool LiveIntervals::shrinkToUses(LiveInterval *li,
                                  SmallVectorImpl<MachineInstr*> *dead) {
   DEBUG(dbgs() << "Shrink: " << *li << '\n');
@@ -406,9 +420,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
          && "Can only shrink virtual registers");
 
   // Shrink subregister live ranges.
+  bool NeedsCleanup = false;
   for (LiveInterval::SubRange &S : li->subranges()) {
     shrinkToUses(S, li->reg);
+    if (S.empty())
+      NeedsCleanup = true;
   }
+  if (NeedsCleanup)
+    li->removeEmptySubRanges();
 
   // Find all the values used, including PHI kills.
   ShrinkToUsesWorkList WorkList;
@@ -456,7 +475,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li,
 
 bool LiveIntervals::computeDeadValues(LiveInterval &LI,
                                       SmallVectorImpl<MachineInstr*> *dead) {
-  bool PHIRemoved = false;
+  bool MayHaveSplitComponents = false;
   for (auto VNI : LI.valnos) {
     if (VNI->isUnused())
       continue;
@@ -466,10 +485,13 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
 
     // Is the register live before? Otherwise we may have to add a read-undef
     // flag for subregister defs.
-    if (MRI->shouldTrackSubRegLiveness(LI.reg)) {
+    bool DeadBeforeDef = false;
+    unsigned VReg = LI.reg;
+    if (MRI->shouldTrackSubRegLiveness(VReg)) {
       if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) {
         MachineInstr *MI = getInstructionFromIndex(Def);
-        MI->addRegisterDefReadUndef(LI.reg);
+        MI->setRegisterDefReadUndef(VReg);
+        DeadBeforeDef = true;
       }
     }
 
@@ -480,19 +502,27 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
       VNI->markUnused();
       LI.removeSegment(I);
       DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n");
-      PHIRemoved = true;
+      MayHaveSplitComponents = true;
     } else {
       // This is a dead def. Make sure the instruction knows.
       MachineInstr *MI = getInstructionFromIndex(Def);
       assert(MI && "No instruction defining live value");
-      MI->addRegisterDead(LI.reg, TRI);
+      MI->addRegisterDead(VReg, TRI);
+
+      // If we have a dead def that is completely separate from the rest of
+      // the liverange then we rewrite it to use a different VReg to not violate
+      // the rule that the liveness of a virtual register forms a connected
+      // component. This should only happen if subregister liveness is tracked.
+      if (DeadBeforeDef)
+        MayHaveSplitComponents = true;
+
       if (dead && MI->allDefsAreDead()) {
         DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI);
         dead->push_back(MI);
       }
     }
   }
-  return PHIRemoved;
+  return MayHaveSplitComponents;
 }
 
 void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
@@ -512,8 +542,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
     // Maybe the operand is for a subregister we don't care about.
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
-      unsigned SubRegMask = TRI->getSubRegIndexLaneMask(SubReg);
-      if ((SubRegMask & SR.LaneMask) == 0)
+      LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
+      if ((LaneMask & SR.LaneMask) == 0)
         continue;
     }
     // We only need to visit each instruction once.
@@ -712,7 +742,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0
         // are actually never written by %vreg2. After assignment the <kill>
         // flag at the read instruction is invalid.
-        unsigned DefinedLanesMask;
+        LaneBitmask DefinedLanesMask;
         if (!SRs.empty()) {
           // Compute a mask of lanes that are defined.
           DefinedLanesMask = 0;
@@ -736,7 +766,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
             continue;
           if (MO.isUse()) {
             // Reading any undefined lanes?
-            unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+            LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
             if ((UseMask & ~DefinedLanesMask) != 0)
               goto CancelKill;
           } else if (MO.getSubReg() == 0) {
@@ -944,7 +974,7 @@ public:
         LiveInterval &LI = LIS.getInterval(Reg);
         if (LI.hasSubRanges()) {
           unsigned SubReg = MO.getSubReg();
-          unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+          LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
           for (LiveInterval::SubRange &S : LI.subranges()) {
             if ((S.LaneMask & LaneMask) == 0)
               continue;
@@ -968,7 +998,7 @@ public:
 private:
   /// Update a single live range, assuming an instruction has been moved from
   /// OldIdx to NewIdx.
-  void updateRange(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+  void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
     if (!Updated.insert(&LR).second)
       return;
     DEBUG({
@@ -976,7 +1006,7 @@ private:
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         dbgs() << PrintReg(Reg);
         if (LaneMask != 0)
-          dbgs() << format(" L%04X", LaneMask);
+          dbgs() << " L" << PrintLaneMask(LaneMask);
       } else {
         dbgs() << PrintRegUnit(Reg, &TRI);
       }
@@ -1098,7 +1128,7 @@ private:
   ///    Hoist kill to NewIdx, then scan for last kill between NewIdx and
   ///    OldIdx.
   ///
-  void handleMoveUp(LiveRange &LR, unsigned Reg, unsigned LaneMask) {
+  void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) {
     // First look for a kill at OldIdx.
     LiveRange::iterator I = LR.find(OldIdx.getBaseIndex());
     LiveRange::iterator E = LR.end();
@@ -1175,7 +1205,7 @@ private:
   }
 
   // Return the last use of reg between NewIdx and OldIdx.
-  SlotIndex findLastUseBefore(unsigned Reg, unsigned LaneMask) {
+  SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) {
 
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       SlotIndex LastUse = NewIdx;
@@ -1255,7 +1285,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
                                         const MachineBasicBlock::iterator End,
                                         const SlotIndex endIdx,
                                         LiveRange &LR, const unsigned Reg,
-                                        const unsigned LaneMask) {
+                                        LaneBitmask LaneMask) {
   LiveInterval::iterator LII = LR.find(endIdx);
   SlotIndex lastUseIdx;
   if (LII != LR.end() && LII->start < endIdx)
@@ -1282,7 +1312,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
         continue;
 
       unsigned SubReg = MO.getSubReg();
-      unsigned Mask = TRI->getSubRegIndexLaneMask(SubReg);
+      LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
       if ((Mask & LaneMask) == 0)
         continue;
 
@@ -1412,3 +1442,20 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
   }
   LI.removeEmptySubRanges();
 }
+
+void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
+    SmallVectorImpl<LiveInterval*> &SplitLIs) {
+  ConnectedVNInfoEqClasses ConEQ(*this);
+  unsigned NumComp = ConEQ.Classify(&LI);
+  if (NumComp <= 1)
+    return;
+  DEBUG(dbgs() << "  Split " << NumComp << " components: " << LI << '\n');
+  unsigned Reg = LI.reg;
+  const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
+  for (unsigned I = 1; I < NumComp; ++I) {
+    unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+    LiveInterval &NewLI = createEmptyInterval(NewVReg);
+    SplitLIs.push_back(&NewLI);
+  }
+  ConEQ.Distribute(LI, SplitLIs.data(), *MRI);
+}
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index cbd98e3f3450..efbbcbe23e15 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -68,7 +68,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
 
 /// Simulates liveness when stepping forward over an instruction(bundle): Remove
 /// killed-uses, add defs. This is the not recommended way, because it depends
-/// on accurate kill flags. If possible use stepBackwards() instead of this
+/// on accurate kill flags. If possible use stepBackward() instead of this
 /// function.
 void LivePhysRegs::stepForward(const MachineInstr &MI,
         SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) {
@@ -128,8 +128,8 @@ void LivePhysRegs::dump() const {
 
 /// Add live-in registers of basic block \p MBB to \p LiveRegs.
 static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
-  for (unsigned Reg : make_range(MBB.livein_begin(), MBB.livein_end()))
-    LiveRegs.addReg(Reg);
+  for (const auto &LI : MBB.liveins())
+    LiveRegs.addReg(LI.PhysReg);
 }
 
 /// Add pristine registers to the given \p LiveRegs. This function removes
@@ -147,11 +147,19 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
 }
 
 void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB,
-                               bool AddPristines) {
-  if (AddPristines) {
+                               bool AddPristinesAndCSRs) {
+  if (AddPristinesAndCSRs) {
     const MachineFunction &MF = *MBB->getParent();
     addPristines(*this, MF, *TRI);
+    if (!MBB->isReturnBlock()) {
+      // The return block has no successors whose live-ins we could merge
+      // below. So instead we add the callee saved registers manually.
+      for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
+        addReg(*I);
+    }
   }
+
+  // To get the live-outs we simply merge the live-ins of all successors.
   for (const MachineBasicBlock *Succ : MBB->successors())
     ::addLiveIns(*this, *Succ);
 }
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index bb2877ae31a8..c408615d42e2 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -64,23 +64,23 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
 
     unsigned SubReg = MO.getSubReg();
     if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
-      unsigned Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
-                                  : MRI->getMaxLaneMaskForVReg(Reg);
+      LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+                                     : MRI->getMaxLaneMaskForVReg(Reg);
 
       // If this is the first time we see a subregister def, initialize
       // subranges by creating a copy of the main range.
       if (!LI.hasSubRanges() && !LI.empty()) {
-        unsigned ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
+        LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg);
         LI.createSubRangeFrom(*Alloc, ClassMask, LI);
       }
 
       for (LiveInterval::SubRange &S : LI.subranges()) {
         // A Mask for subregs common to the existing subrange and current def.
-        unsigned Common = S.LaneMask & Mask;
+        LaneBitmask Common = S.LaneMask & Mask;
         if (Common == 0)
           continue;
         // A Mask for subregs covered by the subrange but not the current def.
-        unsigned LRest = S.LaneMask & ~Mask;
+        LaneBitmask LRest = S.LaneMask & ~Mask;
         LiveInterval::SubRange *CommonRange;
         if (LRest != 0) {
           // Split current subrange into Common and LRest ranges.
@@ -138,7 +138,8 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
 }
 
 
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
+                                 LaneBitmask Mask) {
   // Visit all operands that read Reg. This may include partial defs.
   const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
   for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
@@ -157,7 +158,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, unsigned Mask) {
       continue;
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
-      unsigned SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+      LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
       // Ignore uses not covering the current subrange.
       if ((SubRegMask & Mask) == 0)
         continue;
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 34d99534834b..ff38c68820f1 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -129,7 +129,7 @@ class LiveRangeCalc {
   ///
   /// All uses must be jointly dominated by existing liveness.  PHI-defs are
   /// inserted as needed to preserve SSA form.
-  void extendToUses(LiveRange &LR, unsigned Reg, unsigned LaneMask);
+  void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
 
   /// Reset Map and Seen fields.
   void resetLiveOutMap();
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 08bbe0c3f379..5ce364ae661e 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -226,7 +226,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
     return true;
   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
   unsigned SubReg = MO.getSubReg();
-  unsigned LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+  LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
   for (const LiveInterval::SubRange &S : LI.subranges()) {
     if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
       return true;
@@ -349,8 +349,9 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     ToShrink.pop_back();
     if (foldAsLoad(LI, Dead))
       continue;
+    unsigned VReg = LI->reg;
     if (TheDelegate)
-      TheDelegate->LRE_WillShrinkVirtReg(LI->reg);
+      TheDelegate->LRE_WillShrinkVirtReg(VReg);
     if (!LIS.shrinkToUses(LI, &Dead))
       continue;
 
@@ -360,7 +361,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     // them results in incorrect code.
     bool BeingSpilled = false;
     for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) {
-      if (LI->reg == RegsBeingSpilled[i]) {
+      if (VReg == RegsBeingSpilled[i]) {
         BeingSpilled = true;
         break;
       }
@@ -370,29 +371,21 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
 
     // LI may have been separated, create new intervals.
     LI->RenumberValues();
-    ConnectedVNInfoEqClasses ConEQ(LIS);
-    unsigned NumComp = ConEQ.Classify(LI);
-    if (NumComp <= 1)
-      continue;
-    ++NumFracRanges;
-    bool IsOriginal = VRM && VRM->getOriginal(LI->reg) == LI->reg;
-    DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
-    SmallVector<LiveInterval*, 8> Dups(1, LI);
-    for (unsigned i = 1; i != NumComp; ++i) {
-      Dups.push_back(&createEmptyIntervalFrom(LI->reg));
+    SmallVector<LiveInterval*, 8> SplitLIs;
+    LIS.splitSeparateComponents(*LI, SplitLIs);
+    if (!SplitLIs.empty())
+      ++NumFracRanges;
+
+    unsigned Original = VRM ? VRM->getOriginal(VReg) : 0;
+    for (const LiveInterval *SplitLI : SplitLIs) {
       // If LI is an original interval that hasn't been split yet, make the new
       // intervals their own originals instead of referring to LI. The original
       // interval must contain all the split products, and LI doesn't.
-      if (IsOriginal)
-        VRM->setIsSplitFromReg(Dups.back()->reg, 0);
+      if (Original != VReg && Original != 0)
+        VRM->setIsSplitFromReg(SplitLI->reg, Original);
       if (TheDelegate)
-        TheDelegate->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
+        TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg);
     }
-    ConEQ.Distribute(&Dups[0], MRI);
-    DEBUG({
-      for (unsigned i = 0; i != NumComp; ++i)
-        dbgs() << '\t' << *Dups[i] << '\n';
-    });
   }
 }
 
@@ -411,7 +404,7 @@ void
 LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF,
                                         const MachineLoopInfo &Loops,
                                         const MachineBlockFrequencyInfo &MBFI) {
-  VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI);
+  VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI);
   for (unsigned I = 0, Size = size(); I < Size; ++I) {
     LiveInterval &LI = LIS.getInterval(get(I));
     if (MRI.recomputeRegClass(LI.reg))
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 9ea031d38d29..7ee87c1e650f 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -15,12 +15,11 @@
 #include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
 
@@ -49,7 +48,6 @@ void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const {
 
 bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
-  MRI = &MF.getRegInfo();
   LIS = &getAnalysis<LiveIntervals>();
   VRM = &getAnalysis<VirtRegMap>();
 
@@ -78,7 +76,7 @@ bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
   if (VRegInterval.hasSubRanges()) {
     for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
       unsigned Unit = (*Units).first;
-      unsigned Mask = (*Units).second;
+      LaneBitmask Mask = (*Units).second;
       for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
         if (S.LaneMask & Mask) {
           if (Func(Unit, S))
@@ -101,7 +99,6 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
                << " to " << PrintReg(PhysReg, TRI) << ':');
   assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
   VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
-  MRI->setPhysRegUsed(PhysReg);
 
   foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit,
                                          const LiveRange &Range) {
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index b355393e76f7..06b86d82daf1 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -522,11 +522,15 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
       continue;
     unsigned MOReg = MO.getReg();
     if (MO.isUse()) {
-      MO.setIsKill(false);
+      if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+            MRI->isReserved(MOReg)))
+        MO.setIsKill(false);
       if (MO.readsReg())
         UseRegs.push_back(MOReg);
     } else /*MO.isDef()*/ {
-      MO.setIsDead(false);
+      if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+            MRI->isReserved(MOReg)))
+        MO.setIsDead(false);
       DefRegs.push_back(MOReg);
     }
   }
@@ -559,11 +563,10 @@ void LiveVariables::runOnInstr(MachineInstr *MI,
 void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
   // Mark live-in registers as live-in.
   SmallVector<unsigned, 4> Defs;
-  for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
-         EE = MBB->livein_end(); II != EE; ++II) {
-    assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+  for (const auto &LI : MBB->liveins()) {
+    assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
            "Cannot have a live-in virtual register!");
-    HandlePhysRegDef(*II, nullptr, Defs);
+    HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
   }
 
   // Loop over all of the instructions, processing them.
@@ -599,14 +602,12 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
   for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
          SE = MBB->succ_end(); SI != SE; ++SI) {
     MachineBasicBlock *SuccMBB = *SI;
-    if (SuccMBB->isLandingPad())
+    if (SuccMBB->isEHPad())
       continue;
-    for (MachineBasicBlock::livein_iterator LI = SuccMBB->livein_begin(),
-           LE = SuccMBB->livein_end(); LI != LE; ++LI) {
-      unsigned LReg = *LI;
-      if (!TRI->isInAllocatableClass(LReg))
+    for (const auto &LI : SuccMBB->liveins()) {
+      if (!TRI->isInAllocatableClass(LI.PhysReg))
         // Ignore other live-ins, e.g. those that are live into landing pads.
-        LiveOuts.insert(LReg);
+        LiveOuts.insert(LI.PhysReg);
     }
   }
 
@@ -640,7 +641,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   // function.  This guarantees that we will see the definition of a virtual
   // register before its uses due to dominance properties of SSA (except for PHI
   // nodes, which are treated as a special case).
-  MachineBasicBlock *Entry = MF->begin();
+  MachineBasicBlock *Entry = &MF->front();
   SmallPtrSet<MachineBasicBlock*,16> Visited;
 
   for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 837842914b4c..eb60005764c0 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -325,7 +325,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // Sort the frame references by local offset
   array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
 
-  MachineBasicBlock *Entry = Fn.begin();
+  MachineBasicBlock *Entry = &Fn.front();
 
   unsigned BaseReg = 0;
   int64_t BaseOffset = 0;
diff --git a/lib/CodeGen/MIRParser/LLVMBuild.txt b/lib/CodeGen/MIRParser/LLVMBuild.txt
index 04ae72290f93..2852124786e3 100644
--- a/lib/CodeGen/MIRParser/LLVMBuild.txt
+++ b/lib/CodeGen/MIRParser/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = MIRParser
 parent = CodeGen
-required_libraries = Core Support Target AsmParser CodeGen
+required_libraries = AsmParser CodeGen Core MC Support Target
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 482c33ae2235..28f9d4e298f9 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MILexer.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include <cctype>
@@ -54,15 +55,132 @@ public:
 
 } // end anonymous namespace
 
+MIToken &MIToken::reset(TokenKind Kind, StringRef Range) {
+  this->Kind = Kind;
+  this->Range = Range;
+  return *this;
+}
+
+MIToken &MIToken::setStringValue(StringRef StrVal) {
+  StringValue = StrVal;
+  return *this;
+}
+
+MIToken &MIToken::setOwnedStringValue(std::string StrVal) {
+  StringValueStorage = std::move(StrVal);
+  StringValue = StringValueStorage;
+  return *this;
+}
+
+MIToken &MIToken::setIntegerValue(APSInt IntVal) {
+  this->IntVal = std::move(IntVal);
+  return *this;
+}
+
 /// Skip the leading whitespace characters and return the updated cursor.
 static Cursor skipWhitespace(Cursor C) {
-  while (isspace(C.peek()))
+  while (isblank(C.peek()))
     C.advance();
   return C;
 }
 
+static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; }
+
+/// Skip a line comment and return the updated cursor.
+static Cursor skipComment(Cursor C) {
+  if (C.peek() != ';')
+    return C;
+  while (!isNewlineChar(C.peek()) && !C.isEOF())
+    C.advance();
+  return C;
+}
+
+/// Return true if the given character satisfies the following regular
+/// expression: [-a-zA-Z$._0-9]
 static bool isIdentifierChar(char C) {
-  return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.';
+  return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' ||
+         C == '$';
+}
+
+/// Unescapes the given string value.
+///
+/// Expects the string value to be quoted.
+static std::string unescapeQuotedString(StringRef Value) {
+  assert(Value.front() == '"' && Value.back() == '"');
+  Cursor C = Cursor(Value.substr(1, Value.size() - 2));
+
+  std::string Str;
+  Str.reserve(C.remaining().size());
+  while (!C.isEOF()) {
+    char Char = C.peek();
+    if (Char == '\\') {
+      if (C.peek(1) == '\\') {
+        // Two '\' become one
+        Str += '\\';
+        C.advance(2);
+        continue;
+      }
+      if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) {
+        Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2));
+        C.advance(3);
+        continue;
+      }
+    }
+    Str += Char;
+    C.advance();
+  }
+  return Str;
+}
+
+/// Lex a string constant using the following regular expression: \"[^\"]*\"
+static Cursor lexStringConstant(
+    Cursor C,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  assert(C.peek() == '"');
+  for (C.advance(); C.peek() != '"'; C.advance()) {
+    if (C.isEOF() || isNewlineChar(C.peek())) {
+      ErrorCallback(
+          C.location(),
+          "end of machine instruction reached before the closing '\"'");
+      return None;
+    }
+  }
+  C.advance();
+  return C;
+}
+
+static Cursor lexName(
+    Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  auto Range = C;
+  C.advance(PrefixLength);
+  if (C.peek() == '"') {
+    if (Cursor R = lexStringConstant(C, ErrorCallback)) {
+      StringRef String = Range.upto(R);
+      Token.reset(Type, String)
+          .setOwnedStringValue(
+              unescapeQuotedString(String.drop_front(PrefixLength)));
+      return R;
+    }
+    Token.reset(MIToken::Error, Range.remaining());
+    return Range;
+  }
+  while (isIdentifierChar(C.peek()))
+    C.advance();
+  Token.reset(Type, Range.upto(C))
+      .setStringValue(Range.upto(C).drop_front(PrefixLength));
+  return C;
+}
+
+static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
+  if (C.peek() != 'i' || !isdigit(C.peek(1)))
+    return None;
+  auto Range = C;
+  C.advance(); // Skip 'i'
+  while (isdigit(C.peek()))
+    C.advance();
+  Token.reset(MIToken::IntegerType, Range.upto(C));
+  return C;
 }
 
 static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
@@ -70,32 +188,70 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("_", MIToken::underscore)
       .Case("implicit", MIToken::kw_implicit)
       .Case("implicit-def", MIToken::kw_implicit_define)
+      .Case("def", MIToken::kw_def)
       .Case("dead", MIToken::kw_dead)
       .Case("killed", MIToken::kw_killed)
       .Case("undef", MIToken::kw_undef)
+      .Case("internal", MIToken::kw_internal)
+      .Case("early-clobber", MIToken::kw_early_clobber)
+      .Case("debug-use", MIToken::kw_debug_use)
+      .Case("tied-def", MIToken::kw_tied_def)
+      .Case("frame-setup", MIToken::kw_frame_setup)
+      .Case("debug-location", MIToken::kw_debug_location)
+      .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
+      .Case(".cfi_offset", MIToken::kw_cfi_offset)
+      .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+      .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+      .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+      .Case("blockaddress", MIToken::kw_blockaddress)
+      .Case("target-index", MIToken::kw_target_index)
+      .Case("half", MIToken::kw_half)
+      .Case("float", MIToken::kw_float)
+      .Case("double", MIToken::kw_double)
+      .Case("x86_fp80", MIToken::kw_x86_fp80)
+      .Case("fp128", MIToken::kw_fp128)
+      .Case("ppc_fp128", MIToken::kw_ppc_fp128)
+      .Case("target-flags", MIToken::kw_target_flags)
+      .Case("volatile", MIToken::kw_volatile)
+      .Case("non-temporal", MIToken::kw_non_temporal)
+      .Case("invariant", MIToken::kw_invariant)
+      .Case("align", MIToken::kw_align)
+      .Case("stack", MIToken::kw_stack)
+      .Case("got", MIToken::kw_got)
+      .Case("jump-table", MIToken::kw_jump_table)
+      .Case("constant-pool", MIToken::kw_constant_pool)
+      .Case("call-entry", MIToken::kw_call_entry)
+      .Case("liveout", MIToken::kw_liveout)
+      .Case("address-taken", MIToken::kw_address_taken)
+      .Case("landing-pad", MIToken::kw_landing_pad)
+      .Case("liveins", MIToken::kw_liveins)
+      .Case("successors", MIToken::kw_successors)
       .Default(MIToken::Identifier);
 }
 
 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
-  if (!isalpha(C.peek()) && C.peek() != '_')
+  if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
     return None;
   auto Range = C;
   while (isIdentifierChar(C.peek()))
     C.advance();
   auto Identifier = Range.upto(C);
-  Token = MIToken(getIdentifierKind(Identifier), Identifier);
+  Token.reset(getIdentifierKind(Identifier), Identifier)
+      .setStringValue(Identifier);
   return C;
 }
 
 static Cursor maybeLexMachineBasicBlock(
     Cursor C, MIToken &Token,
     function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
-  if (!C.remaining().startswith("%bb."))
+  bool IsReference = C.remaining().startswith("%bb.");
+  if (!IsReference && !C.remaining().startswith("bb."))
     return None;
   auto Range = C;
-  C.advance(4); // Skip '%bb.'
+  unsigned PrefixLength = IsReference ? 4 : 3;
+  C.advance(PrefixLength); // Skip '%bb.' or 'bb.'
   if (!isdigit(C.peek())) {
-    Token = MIToken(MIToken::Error, C.remaining());
+    Token.reset(MIToken::Error, C.remaining());
     ErrorCallback(C.location(), "expected a number after '%bb.'");
     return C;
   }
@@ -103,26 +259,103 @@ static Cursor maybeLexMachineBasicBlock(
   while (isdigit(C.peek()))
     C.advance();
   StringRef Number = NumberRange.upto(C);
-  unsigned StringOffset = 4 + Number.size(); // Drop '%bb.<id>'
+  unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>'
   if (C.peek() == '.') {
     C.advance(); // Skip '.'
     ++StringOffset;
     while (isIdentifierChar(C.peek()))
       C.advance();
   }
-  Token = MIToken(MIToken::MachineBasicBlock, Range.upto(C), APSInt(Number),
-                  StringOffset);
+  Token.reset(IsReference ? MIToken::MachineBasicBlock
+                          : MIToken::MachineBasicBlockLabel,
+              Range.upto(C))
+      .setIntegerValue(APSInt(Number))
+      .setStringValue(Range.upto(C).drop_front(StringOffset));
   return C;
 }
 
+static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule,
+                            MIToken::TokenKind Kind) {
+  if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+    return None;
+  auto Range = C;
+  C.advance(Rule.size());
+  auto NumberRange = C;
+  while (isdigit(C.peek()))
+    C.advance();
+  Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C)));
+  return C;
+}
+
+static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule,
+                                   MIToken::TokenKind Kind) {
+  if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size())))
+    return None;
+  auto Range = C;
+  C.advance(Rule.size());
+  auto NumberRange = C;
+  while (isdigit(C.peek()))
+    C.advance();
+  StringRef Number = NumberRange.upto(C);
+  unsigned StringOffset = Rule.size() + Number.size();
+  if (C.peek() == '.') {
+    C.advance();
+    ++StringOffset;
+    while (isIdentifierChar(C.peek()))
+      C.advance();
+  }
+  Token.reset(Kind, Range.upto(C))
+      .setIntegerValue(APSInt(Number))
+      .setStringValue(Range.upto(C).drop_front(StringOffset));
+  return C;
+}
+
+static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) {
+  return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex);
+}
+
+static Cursor maybeLexStackObject(Cursor C, MIToken &Token) {
+  return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject);
+}
+
+static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) {
+  return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject);
+}
+
+static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) {
+  return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem);
+}
+
+static Cursor maybeLexIRBlock(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  const StringRef Rule = "%ir-block.";
+  if (!C.remaining().startswith(Rule))
+    return None;
+  if (isdigit(C.peek(Rule.size())))
+    return maybeLexIndex(C, Token, Rule, MIToken::IRBlock);
+  return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback);
+}
+
+static Cursor maybeLexIRValue(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  const StringRef Rule = "%ir.";
+  if (!C.remaining().startswith(Rule))
+    return None;
+  if (isdigit(C.peek(Rule.size())))
+    return maybeLexIndex(C, Token, Rule, MIToken::IRValue);
+  return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback);
+}
+
 static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
   auto Range = C;
   C.advance(); // Skip '%'
   auto NumberRange = C;
   while (isdigit(C.peek()))
     C.advance();
-  Token = MIToken(MIToken::VirtualRegister, Range.upto(C),
-                  APSInt(NumberRange.upto(C)));
+  Token.reset(MIToken::VirtualRegister, Range.upto(C))
+      .setIntegerValue(APSInt(NumberRange.upto(C)));
   return C;
 }
 
@@ -135,41 +368,112 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
   C.advance(); // Skip '%'
   while (isIdentifierChar(C.peek()))
     C.advance();
-  Token = MIToken(MIToken::NamedRegister, Range.upto(C),
-                  /*StringOffset=*/1); // Drop the '%'
+  Token.reset(MIToken::NamedRegister, Range.upto(C))
+      .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
   return C;
 }
 
-static Cursor maybeLexGlobalValue(Cursor C, MIToken &Token) {
+static Cursor maybeLexGlobalValue(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
   if (C.peek() != '@')
     return None;
+  if (!isdigit(C.peek(1)))
+    return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1,
+                   ErrorCallback);
   auto Range = C;
-  C.advance(); // Skip the '@'
-  // TODO: add support for quoted names.
-  if (!isdigit(C.peek())) {
-    while (isIdentifierChar(C.peek()))
-      C.advance();
-    Token = MIToken(MIToken::NamedGlobalValue, Range.upto(C),
-                    /*StringOffset=*/1); // Drop the '@'
-    return C;
-  }
+  C.advance(1); // Skip the '@'
   auto NumberRange = C;
   while (isdigit(C.peek()))
     C.advance();
-  Token =
-      MIToken(MIToken::GlobalValue, Range.upto(C), APSInt(NumberRange.upto(C)));
+  Token.reset(MIToken::GlobalValue, Range.upto(C))
+      .setIntegerValue(APSInt(NumberRange.upto(C)));
   return C;
 }
 
-static Cursor maybeLexIntegerLiteral(Cursor C, MIToken &Token) {
+static Cursor maybeLexExternalSymbol(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  if (C.peek() != '$')
+    return None;
+  return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1,
+                 ErrorCallback);
+}
+
+static bool isValidHexFloatingPointPrefix(char C) {
+  return C == 'H' || C == 'K' || C == 'L' || C == 'M';
+}
+
+static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
+  if (C.peek() != '0' || C.peek(1) != 'x')
+    return None;
+  Cursor Range = C;
+  C.advance(2); // Skip '0x'
+  if (isValidHexFloatingPointPrefix(C.peek()))
+    C.advance();
+  while (isxdigit(C.peek()))
+    C.advance();
+  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+  return C;
+}
+
+static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
+  C.advance();
+  // Skip over [0-9]*([eE][-+]?[0-9]+)?
+  while (isdigit(C.peek()))
+    C.advance();
+  if ((C.peek() == 'e' || C.peek() == 'E') &&
+      (isdigit(C.peek(1)) ||
+       ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) {
+    C.advance(2);
+    while (isdigit(C.peek()))
+      C.advance();
+  }
+  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+  return C;
+}
+
+static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
   if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
     return None;
   auto Range = C;
   C.advance();
   while (isdigit(C.peek()))
     C.advance();
+  if (C.peek() == '.')
+    return lexFloatingPointLiteral(Range, C, Token);
   StringRef StrVal = Range.upto(C);
-  Token = MIToken(MIToken::IntegerLiteral, StrVal, APSInt(StrVal));
+  Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal));
+  return C;
+}
+
+static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) {
+  return StringSwitch<MIToken::TokenKind>(Identifier)
+      .Case("!tbaa", MIToken::md_tbaa)
+      .Case("!alias.scope", MIToken::md_alias_scope)
+      .Case("!noalias", MIToken::md_noalias)
+      .Case("!range", MIToken::md_range)
+      .Default(MIToken::Error);
+}
+
+static Cursor maybeLexExlaim(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  if (C.peek() != '!')
+    return None;
+  auto Range = C;
+  C.advance(1);
+  if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) {
+    Token.reset(MIToken::exclaim, Range.upto(C));
+    return C;
+  }
+  while (isIdentifierChar(C.peek()))
+    C.advance();
+  StringRef StrVal = Range.upto(C);
+  Token.reset(getMetadataKeywordKind(StrVal), StrVal);
+  if (Token.isError())
+    ErrorCallback(Token.location(),
+                  "use of unknown metadata keyword '" + StrVal + "'");
   return C;
 }
 
@@ -181,44 +485,119 @@ static MIToken::TokenKind symbolToken(char C) {
     return MIToken::equal;
   case ':':
     return MIToken::colon;
+  case '(':
+    return MIToken::lparen;
+  case ')':
+    return MIToken::rparen;
+  case '{':
+    return MIToken::lbrace;
+  case '}':
+    return MIToken::rbrace;
+  case '+':
+    return MIToken::plus;
+  case '-':
+    return MIToken::minus;
   default:
     return MIToken::Error;
   }
 }
 
 static Cursor maybeLexSymbol(Cursor C, MIToken &Token) {
-  auto Kind = symbolToken(C.peek());
+  MIToken::TokenKind Kind;
+  unsigned Length = 1;
+  if (C.peek() == ':' && C.peek(1) == ':') {
+    Kind = MIToken::coloncolon;
+    Length = 2;
+  } else
+    Kind = symbolToken(C.peek());
   if (Kind == MIToken::Error)
     return None;
   auto Range = C;
+  C.advance(Length);
+  Token.reset(Kind, Range.upto(C));
+  return C;
+}
+
+static Cursor maybeLexNewline(Cursor C, MIToken &Token) {
+  if (!isNewlineChar(C.peek()))
+    return None;
+  auto Range = C;
   C.advance();
-  Token = MIToken(Kind, Range.upto(C));
+  Token.reset(MIToken::Newline, Range.upto(C));
+  return C;
+}
+
+static Cursor maybeLexEscapedIRValue(
+    Cursor C, MIToken &Token,
+    function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
+  if (C.peek() != '`')
+    return None;
+  auto Range = C;
+  C.advance();
+  auto StrRange = C;
+  while (C.peek() != '`') {
+    if (C.isEOF() || isNewlineChar(C.peek())) {
+      ErrorCallback(
+          C.location(),
+          "end of machine instruction reached before the closing '`'");
+      Token.reset(MIToken::Error, Range.remaining());
+      return C;
+    }
+    C.advance();
+  }
+  StringRef Value = StrRange.upto(C);
+  C.advance();
+  Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value);
   return C;
 }
 
 StringRef llvm::lexMIToken(
     StringRef Source, MIToken &Token,
     function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) {
-  auto C = skipWhitespace(Cursor(Source));
+  auto C = skipComment(skipWhitespace(Cursor(Source)));
   if (C.isEOF()) {
-    Token = MIToken(MIToken::Eof, C.remaining());
+    Token.reset(MIToken::Eof, C.remaining());
     return C.remaining();
   }
 
-  if (Cursor R = maybeLexIdentifier(C, Token))
+  if (Cursor R = maybeLexIntegerType(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
     return R.remaining();
+  if (Cursor R = maybeLexIdentifier(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexJumpTableIndex(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexStackObject(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexFixedStackObject(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexConstantPoolItem(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback))
+    return R.remaining();
+  if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback))
+    return R.remaining();
   if (Cursor R = maybeLexRegister(C, Token))
     return R.remaining();
-  if (Cursor R = maybeLexGlobalValue(C, Token))
+  if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback))
     return R.remaining();
-  if (Cursor R = maybeLexIntegerLiteral(C, Token))
+  if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
+    return R.remaining();
+  if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexNumericalLiteral(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback))
     return R.remaining();
   if (Cursor R = maybeLexSymbol(C, Token))
     return R.remaining();
+  if (Cursor R = maybeLexNewline(C, Token))
+    return R.remaining();
+  if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback))
+    return R.remaining();
 
-  Token = MIToken(MIToken::Error, C.remaining());
+  Token.reset(MIToken::Error, C.remaining());
   ErrorCallback(C.location(),
                 Twine("unexpected character '") + Twine(C.peek()) + "'");
   return C.remaining();
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 55460b56e7d6..ff54aa3554d8 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -30,50 +30,119 @@ struct MIToken {
     // Markers
     Eof,
     Error,
+    Newline,
 
     // Tokens with no info.
     comma,
     equal,
     underscore,
     colon,
+    coloncolon,
+    exclaim,
+    lparen,
+    rparen,
+    lbrace,
+    rbrace,
+    plus,
+    minus,
 
     // Keywords
     kw_implicit,
     kw_implicit_define,
+    kw_def,
     kw_dead,
     kw_killed,
     kw_undef,
+    kw_internal,
+    kw_early_clobber,
+    kw_debug_use,
+    kw_tied_def,
+    kw_frame_setup,
+    kw_debug_location,
+    kw_cfi_same_value,
+    kw_cfi_offset,
+    kw_cfi_def_cfa_register,
+    kw_cfi_def_cfa_offset,
+    kw_cfi_def_cfa,
+    kw_blockaddress,
+    kw_target_index,
+    kw_half,
+    kw_float,
+    kw_double,
+    kw_x86_fp80,
+    kw_fp128,
+    kw_ppc_fp128,
+    kw_target_flags,
+    kw_volatile,
+    kw_non_temporal,
+    kw_invariant,
+    kw_align,
+    kw_stack,
+    kw_got,
+    kw_jump_table,
+    kw_constant_pool,
+    kw_call_entry,
+    kw_liveout,
+    kw_address_taken,
+    kw_landing_pad,
+    kw_liveins,
+    kw_successors,
+
+    // Named metadata keywords
+    md_tbaa,
+    md_alias_scope,
+    md_noalias,
+    md_range,
 
     // Identifier tokens
     Identifier,
+    IntegerType,
     NamedRegister,
+    MachineBasicBlockLabel,
     MachineBasicBlock,
+    StackObject,
+    FixedStackObject,
     NamedGlobalValue,
     GlobalValue,
+    ExternalSymbol,
 
     // Other tokens
     IntegerLiteral,
-    VirtualRegister
+    FloatingPointLiteral,
+    VirtualRegister,
+    ConstantPoolItem,
+    JumpTableIndex,
+    NamedIRBlock,
+    IRBlock,
+    NamedIRValue,
+    IRValue,
+    QuotedIRValue // `<constant value>`
   };
 
 private:
   TokenKind Kind;
-  unsigned StringOffset;
   StringRef Range;
+  StringRef StringValue;
+  std::string StringValueStorage;
   APSInt IntVal;
 
 public:
-  MIToken(TokenKind Kind, StringRef Range, unsigned StringOffset = 0)
-      : Kind(Kind), StringOffset(StringOffset), Range(Range) {}
+  MIToken() : Kind(Error) {}
 
-  MIToken(TokenKind Kind, StringRef Range, const APSInt &IntVal,
-          unsigned StringOffset = 0)
-      : Kind(Kind), StringOffset(StringOffset), Range(Range), IntVal(IntVal) {}
+  MIToken &reset(TokenKind Kind, StringRef Range);
+
+  MIToken &setStringValue(StringRef StrVal);
+  MIToken &setOwnedStringValue(std::string StrVal);
+  MIToken &setIntegerValue(APSInt IntVal);
 
   TokenKind kind() const { return Kind; }
 
   bool isError() const { return Kind == Error; }
 
+  bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; }
+
+  bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; }
+
   bool isRegister() const {
     return Kind == NamedRegister || Kind == underscore ||
            Kind == VirtualRegister;
@@ -81,7 +150,14 @@ public:
 
   bool isRegisterFlag() const {
     return Kind == kw_implicit || Kind == kw_implicit_define ||
-           Kind == kw_dead || Kind == kw_killed || Kind == kw_undef;
+           Kind == kw_def || Kind == kw_dead || Kind == kw_killed ||
+           Kind == kw_undef || Kind == kw_internal ||
+           Kind == kw_early_clobber || Kind == kw_debug_use;
+  }
+
+  bool isMemoryOperandFlag() const {
+    return Kind == kw_volatile || Kind == kw_non_temporal ||
+           Kind == kw_invariant;
   }
 
   bool is(TokenKind K) const { return Kind == K; }
@@ -90,13 +166,19 @@ public:
 
   StringRef::iterator location() const { return Range.begin(); }
 
-  StringRef stringValue() const { return Range.drop_front(StringOffset); }
+  StringRef range() const { return Range; }
+
+  /// Return the token's string value.
+  StringRef stringValue() const { return StringValue; }
 
   const APSInt &integerValue() const { return IntVal; }
 
   bool hasIntegerValue() const {
     return Kind == IntegerLiteral || Kind == MachineBasicBlock ||
-           Kind == GlobalValue || Kind == VirtualRegister;
+           Kind == MachineBasicBlockLabel || Kind == StackObject ||
+           Kind == FixedStackObject || Kind == GlobalValue ||
+           Kind == VirtualRegister || Kind == ConstantPoolItem ||
+           Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue;
   }
 };
 
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c00011288a60..f2f6584fb6c8 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,12 +14,20 @@
 #include "MIParser.h"
 #include "MILexer.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -30,15 +38,20 @@ using namespace llvm;
 namespace {
 
 /// A wrapper struct around the 'MachineOperand' struct that includes a source
-/// range.
-struct MachineOperandWithLocation {
+/// range and other attributes.
+struct ParsedMachineOperand {
   MachineOperand Operand;
   StringRef::iterator Begin;
   StringRef::iterator End;
+  Optional<unsigned> TiedDefIdx;
 
-  MachineOperandWithLocation(const MachineOperand &Operand,
-                             StringRef::iterator Begin, StringRef::iterator End)
-      : Operand(Operand), Begin(Begin), End(End) {}
+  ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin,
+                       StringRef::iterator End, Optional<unsigned> &TiedDefIdx)
+      : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) {
+    if (TiedDefIdx)
+      assert(Operand.isReg() && Operand.isUse() &&
+             "Only used register operands can be tied");
+  }
 };
 
 class MIParser {
@@ -58,6 +71,16 @@ class MIParser {
   StringMap<const uint32_t *> Names2RegMasks;
   /// Maps from subregister names to subregister indices.
   StringMap<unsigned> Names2SubRegIndices;
+  /// Maps from slot numbers to function's unnamed basic blocks.
+  DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
+  /// Maps from slot numbers to function's unnamed values.
+  DenseMap<unsigned, const Value *> Slots2Values;
+  /// Maps from target index names to target indices.
+  StringMap<int> Names2TargetIndices;
+  /// Maps from direct target flag names to the direct target flag values.
+  StringMap<unsigned> Names2DirectTargetFlags;
+  /// Maps from direct target flag names to the bitmask target flag values.
+  StringMap<unsigned> Names2BitmaskTargetFlags;
 
 public:
   MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
@@ -76,19 +99,66 @@ public:
   /// This function always return true.
   bool error(StringRef::iterator Loc, const Twine &Msg);
 
+  bool
+  parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+  bool parseBasicBlocks();
   bool parse(MachineInstr *&MI);
-  bool parseMBB(MachineBasicBlock *&MBB);
-  bool parseNamedRegister(unsigned &Reg);
+  bool parseStandaloneMBB(MachineBasicBlock *&MBB);
+  bool parseStandaloneNamedRegister(unsigned &Reg);
+  bool parseStandaloneVirtualRegister(unsigned &Reg);
+  bool parseStandaloneStackObject(int &FI);
+  bool parseStandaloneMDNode(MDNode *&Node);
+
+  bool
+  parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots);
+  bool parseBasicBlock(MachineBasicBlock &MBB);
+  bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
+  bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
 
   bool parseRegister(unsigned &Reg);
   bool parseRegisterFlag(unsigned &Flags);
   bool parseSubRegisterIndex(unsigned &SubReg);
-  bool parseRegisterOperand(MachineOperand &Dest, bool IsDef = false);
+  bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
+  bool parseRegisterOperand(MachineOperand &Dest,
+                            Optional<unsigned> &TiedDefIdx, bool IsDef = false);
   bool parseImmediateOperand(MachineOperand &Dest);
+  bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
+                       const Constant *&C);
+  bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
+  bool parseTypedImmediateOperand(MachineOperand &Dest);
+  bool parseFPImmediateOperand(MachineOperand &Dest);
   bool parseMBBReference(MachineBasicBlock *&MBB);
   bool parseMBBOperand(MachineOperand &Dest);
+  bool parseStackFrameIndex(int &FI);
+  bool parseStackObjectOperand(MachineOperand &Dest);
+  bool parseFixedStackFrameIndex(int &FI);
+  bool parseFixedStackObjectOperand(MachineOperand &Dest);
+  bool parseGlobalValue(GlobalValue *&GV);
   bool parseGlobalAddressOperand(MachineOperand &Dest);
-  bool parseMachineOperand(MachineOperand &Dest);
+  bool parseConstantPoolIndexOperand(MachineOperand &Dest);
+  bool parseJumpTableIndexOperand(MachineOperand &Dest);
+  bool parseExternalSymbolOperand(MachineOperand &Dest);
+  bool parseMDNode(MDNode *&Node);
+  bool parseMetadataOperand(MachineOperand &Dest);
+  bool parseCFIOffset(int &Offset);
+  bool parseCFIRegister(unsigned &Reg);
+  bool parseCFIOperand(MachineOperand &Dest);
+  bool parseIRBlock(BasicBlock *&BB, const Function &F);
+  bool parseBlockAddressOperand(MachineOperand &Dest);
+  bool parseTargetIndexOperand(MachineOperand &Dest);
+  bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
+  bool parseMachineOperand(MachineOperand &Dest,
+                           Optional<unsigned> &TiedDefIdx);
+  bool parseMachineOperandAndTargetFlags(MachineOperand &Dest,
+                                         Optional<unsigned> &TiedDefIdx);
+  bool parseOffset(int64_t &Offset);
+  bool parseAlignment(unsigned &Alignment);
+  bool parseOperandsOffset(MachineOperand &Op);
+  bool parseIRValue(const Value *&V);
+  bool parseMemoryOperandFlag(unsigned &Flags);
+  bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV);
+  bool parseMachinePointerInfo(MachinePointerInfo &Dest);
+  bool parseMachineMemoryOperand(MachineMemOperand *&Dest);
 
 private:
   /// Convert the integer literal in the current token into an unsigned integer.
@@ -96,15 +166,31 @@ private:
   /// Return true if an error occurred.
   bool getUnsigned(unsigned &Result);
 
+  /// Convert the integer literal in the current token into an uint64.
+  ///
+  /// Return true if an error occurred.
+  bool getUint64(uint64_t &Result);
+
+  /// If the current token is of the given kind, consume it and return false.
+  /// Otherwise report an error and return true.
+  bool expectAndConsume(MIToken::TokenKind TokenKind);
+
+  /// If the current token is of the given kind, consume it and return true.
+  /// Otherwise return false.
+  bool consumeIfPresent(MIToken::TokenKind TokenKind);
+
   void initNames2InstrOpCodes();
 
   /// Try to convert an instruction name to an opcode. Return true if the
   /// instruction name is invalid.
   bool parseInstrName(StringRef InstrName, unsigned &OpCode);
 
-  bool parseInstruction(unsigned &OpCode);
+  bool parseInstruction(unsigned &OpCode, unsigned &Flags);
 
-  bool verifyImplicitOperands(ArrayRef<MachineOperandWithLocation> Operands,
+  bool assignRegisterTies(MachineInstr &MI,
+                          ArrayRef<ParsedMachineOperand> Operands);
+
+  bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
                               const MCInstrDesc &MCID);
 
   void initNames2Regs();
@@ -126,6 +212,34 @@ private:
   ///
   /// Return 0 if the name isn't a subregister index class.
   unsigned getSubRegIndex(StringRef Name);
+
+  const BasicBlock *getIRBlock(unsigned Slot);
+  const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
+
+  const Value *getIRValue(unsigned Slot);
+
+  void initNames2TargetIndices();
+
+  /// Try to convert a name of target index to the corresponding target index.
+  ///
+  /// Return true if the name isn't a name of a target index.
+  bool getTargetIndex(StringRef Name, int &Index);
+
+  void initNames2DirectTargetFlags();
+
+  /// Try to convert a name of a direct target flag to the corresponding
+  /// target flag.
+  ///
+  /// Return true if the name isn't a name of a direct flag.
+  bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
+
+  void initNames2BitmaskTargetFlags();
+
+  /// Try to convert a name of a bitmask target flag to the corresponding
+  /// target flag.
+  ///
+  /// Return true if the name isn't a name of a bitmask target flag.
+  bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
 };
 
 } // end anonymous namespace
@@ -134,7 +248,7 @@ MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error,
                    StringRef Source, const PerFunctionMIParsingState &PFS,
                    const SlotMapping &IRSlots)
     : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source),
-      Token(MIToken::Error, StringRef()), PFS(PFS), IRSlots(IRSlots) {}
+      PFS(PFS), IRSlots(IRSlots) {}
 
 void MIParser::lex() {
   CurrentSource = lexMIToken(
@@ -146,49 +260,378 @@ bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); }
 
 bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) {
   assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size()));
-  Error = SMDiagnostic(
-      SM, SMLoc(),
-      SM.getMemoryBuffer(SM.getMainFileID())->getBufferIdentifier(), 1,
-      Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), Source, None, None);
+  const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID());
+  if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) {
+    // Create an ordinary diagnostic when the source manager's buffer is the
+    // source string.
+    Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg);
+    return true;
+  }
+  // Create a diagnostic for a YAML string literal.
+  Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
+                       Loc - Source.data(), SourceMgr::DK_Error, Msg.str(),
+                       Source, None, None);
   return true;
 }
 
-bool MIParser::parse(MachineInstr *&MI) {
-  lex();
+static const char *toString(MIToken::TokenKind TokenKind) {
+  switch (TokenKind) {
+  case MIToken::comma:
+    return "','";
+  case MIToken::equal:
+    return "'='";
+  case MIToken::colon:
+    return "':'";
+  case MIToken::lparen:
+    return "'('";
+  case MIToken::rparen:
+    return "')'";
+  default:
+    return "<unknown token>";
+  }
+}
 
-  // Parse any register operands before '='
-  // TODO: Allow parsing of multiple operands before '='
-  MachineOperand MO = MachineOperand::CreateImm(0);
-  SmallVector<MachineOperandWithLocation, 8> Operands;
-  if (Token.isRegister() || Token.isRegisterFlag()) {
-    auto Loc = Token.location();
-    if (parseRegisterOperand(MO, /*IsDef=*/true))
+bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) {
+  if (Token.isNot(TokenKind))
+    return error(Twine("expected ") + toString(TokenKind));
+  lex();
+  return false;
+}
+
+bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) {
+  if (Token.isNot(TokenKind))
+    return false;
+  lex();
+  return true;
+}
+
+bool MIParser::parseBasicBlockDefinition(
+    DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+  assert(Token.is(MIToken::MachineBasicBlockLabel));
+  unsigned ID = 0;
+  if (getUnsigned(ID))
+    return true;
+  auto Loc = Token.location();
+  auto Name = Token.stringValue();
+  lex();
+  bool HasAddressTaken = false;
+  bool IsLandingPad = false;
+  unsigned Alignment = 0;
+  BasicBlock *BB = nullptr;
+  if (consumeIfPresent(MIToken::lparen)) {
+    do {
+      // TODO: Report an error when multiple same attributes are specified.
+      switch (Token.kind()) {
+      case MIToken::kw_address_taken:
+        HasAddressTaken = true;
+        lex();
+        break;
+      case MIToken::kw_landing_pad:
+        IsLandingPad = true;
+        lex();
+        break;
+      case MIToken::kw_align:
+        if (parseAlignment(Alignment))
+          return true;
+        break;
+      case MIToken::IRBlock:
+        // TODO: Report an error when both name and ir block are specified.
+        if (parseIRBlock(BB, *MF.getFunction()))
+          return true;
+        lex();
+        break;
+      default:
+        break;
+      }
+    } while (consumeIfPresent(MIToken::comma));
+    if (expectAndConsume(MIToken::rparen))
       return true;
-    Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
-    if (Token.isNot(MIToken::equal))
-      return error("expected '='");
+  }
+  if (expectAndConsume(MIToken::colon))
+    return true;
+
+  if (!Name.empty()) {
+    BB = dyn_cast_or_null<BasicBlock>(
+        MF.getFunction()->getValueSymbolTable().lookup(Name));
+    if (!BB)
+      return error(Loc, Twine("basic block '") + Name +
+                            "' is not defined in the function '" +
+                            MF.getName() + "'");
+  }
+  auto *MBB = MF.CreateMachineBasicBlock(BB);
+  MF.insert(MF.end(), MBB);
+  bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second;
+  if (!WasInserted)
+    return error(Loc, Twine("redefinition of machine basic block with id #") +
+                          Twine(ID));
+  if (Alignment)
+    MBB->setAlignment(Alignment);
+  if (HasAddressTaken)
+    MBB->setHasAddressTaken();
+  MBB->setIsEHPad(IsLandingPad);
+  return false;
+}
+
+bool MIParser::parseBasicBlockDefinitions(
+    DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) {
+  lex();
+  // Skip until the first machine basic block.
+  while (Token.is(MIToken::Newline))
+    lex();
+  if (Token.isErrorOrEOF())
+    return Token.isError();
+  if (Token.isNot(MIToken::MachineBasicBlockLabel))
+    return error("expected a basic block definition before instructions");
+  unsigned BraceDepth = 0;
+  do {
+    if (parseBasicBlockDefinition(MBBSlots))
+      return true;
+    bool IsAfterNewline = false;
+    // Skip until the next machine basic block.
+    while (true) {
+      if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) ||
+          Token.isErrorOrEOF())
+        break;
+      else if (Token.is(MIToken::MachineBasicBlockLabel))
+        return error("basic block definition should be located at the start of "
+                     "the line");
+      else if (consumeIfPresent(MIToken::Newline)) {
+        IsAfterNewline = true;
+        continue;
+      }
+      IsAfterNewline = false;
+      if (Token.is(MIToken::lbrace))
+        ++BraceDepth;
+      if (Token.is(MIToken::rbrace)) {
+        if (!BraceDepth)
+          return error("extraneous closing brace ('}')");
+        --BraceDepth;
+      }
+      lex();
+    }
+    // Verify that we closed all of the '{' at the end of a file or a block.
+    if (!Token.isError() && BraceDepth)
+      return error("expected '}'"); // FIXME: Report a note that shows '{'.
+  } while (!Token.isErrorOrEOF());
+  return Token.isError();
+}
+
+bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
+  assert(Token.is(MIToken::kw_liveins));
+  lex();
+  if (expectAndConsume(MIToken::colon))
+    return true;
+  if (Token.isNewlineOrEOF()) // Allow an empty list of liveins.
+    return false;
+  do {
+    if (Token.isNot(MIToken::NamedRegister))
+      return error("expected a named register");
+    unsigned Reg = 0;
+    if (parseRegister(Reg))
+      return true;
+    MBB.addLiveIn(Reg);
+    lex();
+  } while (consumeIfPresent(MIToken::comma));
+  return false;
+}
+
+bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
+  assert(Token.is(MIToken::kw_successors));
+  lex();
+  if (expectAndConsume(MIToken::colon))
+    return true;
+  if (Token.isNewlineOrEOF()) // Allow an empty list of successors.
+    return false;
+  do {
+    if (Token.isNot(MIToken::MachineBasicBlock))
+      return error("expected a machine basic block reference");
+    MachineBasicBlock *SuccMBB = nullptr;
+    if (parseMBBReference(SuccMBB))
+      return true;
+    lex();
+    unsigned Weight = 0;
+    if (consumeIfPresent(MIToken::lparen)) {
+      if (Token.isNot(MIToken::IntegerLiteral))
+        return error("expected an integer literal after '('");
+      if (getUnsigned(Weight))
+        return true;
+      lex();
+      if (expectAndConsume(MIToken::rparen))
+        return true;
+    }
+    MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight));
+  } while (consumeIfPresent(MIToken::comma));
+  MBB.normalizeSuccProbs();
+  return false;
+}
+
+bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) {
+  // Skip the definition.
+  assert(Token.is(MIToken::MachineBasicBlockLabel));
+  lex();
+  if (consumeIfPresent(MIToken::lparen)) {
+    while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF())
+      lex();
+    consumeIfPresent(MIToken::rparen);
+  }
+  consumeIfPresent(MIToken::colon);
+
+  // Parse the liveins and successors.
+  // N.B: Multiple lists of successors and liveins are allowed and they're
+  // merged into one.
+  // Example:
+  //   liveins: %edi
+  //   liveins: %esi
+  //
+  // is equivalent to
+  //   liveins: %edi, %esi
+  while (true) {
+    if (Token.is(MIToken::kw_successors)) {
+      if (parseBasicBlockSuccessors(MBB))
+        return true;
+    } else if (Token.is(MIToken::kw_liveins)) {
+      if (parseBasicBlockLiveins(MBB))
+        return true;
+    } else if (consumeIfPresent(MIToken::Newline)) {
+      continue;
+    } else
+      break;
+    if (!Token.isNewlineOrEOF())
+      return error("expected line break at the end of a list");
     lex();
   }
 
-  unsigned OpCode;
-  if (Token.isError() || parseInstruction(OpCode))
+  // Parse the instructions.
+  bool IsInBundle = false;
+  MachineInstr *PrevMI = nullptr;
+  while (true) {
+    if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof))
+      return false;
+    else if (consumeIfPresent(MIToken::Newline))
+      continue;
+    if (consumeIfPresent(MIToken::rbrace)) {
+      // The first parsing pass should verify that all closing '}' have an
+      // opening '{'.
+      assert(IsInBundle);
+      IsInBundle = false;
+      continue;
+    }
+    MachineInstr *MI = nullptr;
+    if (parse(MI))
+      return true;
+    MBB.insert(MBB.end(), MI);
+    if (IsInBundle) {
+      PrevMI->setFlag(MachineInstr::BundledSucc);
+      MI->setFlag(MachineInstr::BundledPred);
+    }
+    PrevMI = MI;
+    if (Token.is(MIToken::lbrace)) {
+      if (IsInBundle)
+        return error("nested instruction bundles are not allowed");
+      lex();
+      // This instruction is the start of the bundle.
+      MI->setFlag(MachineInstr::BundledSucc);
+      IsInBundle = true;
+      if (!Token.is(MIToken::Newline))
+        // The next instruction can be on the same line.
+        continue;
+    }
+    assert(Token.isNewlineOrEOF() && "MI is not fully parsed");
+    lex();
+  }
+  return false;
+}
+
+bool MIParser::parseBasicBlocks() {
+  lex();
+  // Skip until the first machine basic block.
+  while (Token.is(MIToken::Newline))
+    lex();
+  if (Token.isErrorOrEOF())
+    return Token.isError();
+  // The first parsing pass should have verified that this token is a MBB label
+  // in the 'parseBasicBlockDefinitions' method.
+  assert(Token.is(MIToken::MachineBasicBlockLabel));
+  do {
+    MachineBasicBlock *MBB = nullptr;
+    if (parseMBBReference(MBB))
+      return true;
+    if (parseBasicBlock(*MBB))
+      return true;
+    // The method 'parseBasicBlock' should parse the whole block until the next
+    // block or the end of file.
+    assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof));
+  } while (Token.isNot(MIToken::Eof));
+  return false;
+}
+
+bool MIParser::parse(MachineInstr *&MI) {
+  // Parse any register operands before '='
+  MachineOperand MO = MachineOperand::CreateImm(0);
+  SmallVector<ParsedMachineOperand, 8> Operands;
+  while (Token.isRegister() || Token.isRegisterFlag()) {
+    auto Loc = Token.location();
+    Optional<unsigned> TiedDefIdx;
+    if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true))
+      return true;
+    Operands.push_back(
+        ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+    if (Token.isNot(MIToken::comma))
+      break;
+    lex();
+  }
+  if (!Operands.empty() && expectAndConsume(MIToken::equal))
     return true;
 
-  // TODO: Parse the instruction flags and memory operands.
+  unsigned OpCode, Flags = 0;
+  if (Token.isError() || parseInstruction(OpCode, Flags))
+    return true;
 
   // Parse the remaining machine operands.
-  while (Token.isNot(MIToken::Eof)) {
+  while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
+         Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
     auto Loc = Token.location();
-    if (parseMachineOperand(MO))
+    Optional<unsigned> TiedDefIdx;
+    if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx))
       return true;
-    Operands.push_back(MachineOperandWithLocation(MO, Loc, Token.location()));
-    if (Token.is(MIToken::Eof))
+    Operands.push_back(
+        ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx));
+    if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
+        Token.is(MIToken::lbrace))
       break;
     if (Token.isNot(MIToken::comma))
       return error("expected ',' before the next machine operand");
     lex();
   }
 
+  DebugLoc DebugLocation;
+  if (Token.is(MIToken::kw_debug_location)) {
+    lex();
+    if (Token.isNot(MIToken::exclaim))
+      return error("expected a metadata node after 'debug-location'");
+    MDNode *Node = nullptr;
+    if (parseMDNode(Node))
+      return true;
+    DebugLocation = DebugLoc(Node);
+  }
+
+  // Parse the machine memory operands.
+  SmallVector<MachineMemOperand *, 2> MemOperands;
+  if (Token.is(MIToken::coloncolon)) {
+    lex();
+    while (!Token.isNewlineOrEOF()) {
+      MachineMemOperand *MemOp = nullptr;
+      if (parseMachineMemoryOperand(MemOp))
+        return true;
+      MemOperands.push_back(MemOp);
+      if (Token.isNewlineOrEOF())
+        break;
+      if (Token.isNot(MIToken::comma))
+        return error("expected ',' before the next machine memory operand");
+      lex();
+    }
+  }
+
   const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode);
   if (!MCID.isVariadic()) {
     // FIXME: Move the implicit operand verification to the machine verifier.
@@ -197,13 +640,22 @@ bool MIParser::parse(MachineInstr *&MI) {
   }
 
   // TODO: Check for extraneous machine operands.
-  MI = MF.CreateMachineInstr(MCID, DebugLoc(), /*NoImplicit=*/true);
+  MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
+  MI->setFlags(Flags);
   for (const auto &Operand : Operands)
     MI->addOperand(MF, Operand.Operand);
+  if (assignRegisterTies(*MI, Operands))
+    return true;
+  if (MemOperands.empty())
+    return false;
+  MachineInstr::mmo_iterator MemRefs =
+      MF.allocateMemRefsArray(MemOperands.size());
+  std::copy(MemOperands.begin(), MemOperands.end(), MemRefs);
+  MI->setMemRefs(MemRefs, MemRefs + MemOperands.size());
   return false;
 }
 
-bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
+bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) {
   lex();
   if (Token.isNot(MIToken::MachineBasicBlock))
     return error("expected a machine basic block reference");
@@ -216,18 +668,52 @@ bool MIParser::parseMBB(MachineBasicBlock *&MBB) {
   return false;
 }
 
-bool MIParser::parseNamedRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
   lex();
   if (Token.isNot(MIToken::NamedRegister))
     return error("expected a named register");
   if (parseRegister(Reg))
-    return 0;
+    return true;
   lex();
   if (Token.isNot(MIToken::Eof))
     return error("expected end of string after the register reference");
   return false;
 }
 
+bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+  lex();
+  if (Token.isNot(MIToken::VirtualRegister))
+    return error("expected a virtual register");
+  if (parseRegister(Reg))
+    return true;
+  lex();
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the register reference");
+  return false;
+}
+
+bool MIParser::parseStandaloneStackObject(int &FI) {
+  lex();
+  if (Token.isNot(MIToken::StackObject))
+    return error("expected a stack object");
+  if (parseStackFrameIndex(FI))
+    return true;
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the stack object reference");
+  return false;
+}
+
+bool MIParser::parseStandaloneMDNode(MDNode *&Node) {
+  lex();
+  if (Token.isNot(MIToken::exclaim))
+    return error("expected a metadata node");
+  if (parseMDNode(Node))
+    return true;
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the metadata node");
+  return false;
+}
+
 static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
   assert(MO.isImplicit());
   return MO.isDef() ? "implicit-def" : "implicit";
@@ -239,8 +725,18 @@ static std::string getRegisterName(const TargetRegisterInfo *TRI,
   return StringRef(TRI->getName(Reg)).lower();
 }
 
-bool MIParser::verifyImplicitOperands(
-    ArrayRef<MachineOperandWithLocation> Operands, const MCInstrDesc &MCID) {
+/// Return true if the parsed machine operands contain a given machine operand.
+static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand,
+                                ArrayRef<ParsedMachineOperand> Operands) {
+  for (const auto &I : Operands) {
+    if (ImplicitOperand.isIdenticalTo(I.Operand))
+      return true;
+  }
+  return false;
+}
+
+bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
+                                      const MCInstrDesc &MCID) {
   if (MCID.isCall())
     // We can't verify call instructions as they can contain arbitrary implicit
     // register and register mask operands.
@@ -249,48 +745,32 @@ bool MIParser::verifyImplicitOperands(
   // Gather all the expected implicit operands.
   SmallVector<MachineOperand, 4> ImplicitOperands;
   if (MCID.ImplicitDefs)
-    for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
+    for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs)
       ImplicitOperands.push_back(
           MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID.ImplicitUses)
-    for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
+    for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses)
       ImplicitOperands.push_back(
           MachineOperand::CreateReg(*ImpUses, false, true));
 
   const auto *TRI = MF.getSubtarget().getRegisterInfo();
   assert(TRI && "Expected target register info");
-  size_t I = ImplicitOperands.size(), J = Operands.size();
-  while (I) {
-    --I;
-    if (J) {
-      --J;
-      const auto &ImplicitOperand = ImplicitOperands[I];
-      const auto &Operand = Operands[J].Operand;
-      if (ImplicitOperand.isIdenticalTo(Operand))
-        continue;
-      if (Operand.isReg() && Operand.isImplicit()) {
-        return error(Operands[J].Begin,
-                     Twine("expected an implicit register operand '") +
-                         printImplicitRegisterFlag(ImplicitOperand) + " %" +
-                         getRegisterName(TRI, ImplicitOperand.getReg()) + "'");
-      }
-    }
-    // TODO: Fix source location when Operands[J].end is right before '=', i.e:
-    // insead of reporting an error at this location:
-    //            %eax = MOV32r0
-    //                 ^
-    // report the error at the following location:
-    //            %eax = MOV32r0
-    //                          ^
-    return error(J < Operands.size() ? Operands[J].End : Token.location(),
+  for (const auto &I : ImplicitOperands) {
+    if (isImplicitOperandIn(I, Operands))
+      continue;
+    return error(Operands.empty() ? Token.location() : Operands.back().End,
                  Twine("missing implicit register operand '") +
-                     printImplicitRegisterFlag(ImplicitOperands[I]) + " %" +
-                     getRegisterName(TRI, ImplicitOperands[I].getReg()) + "'");
+                     printImplicitRegisterFlag(I) + " %" +
+                     getRegisterName(TRI, I.getReg()) + "'");
   }
   return false;
 }
 
-bool MIParser::parseInstruction(unsigned &OpCode) {
+bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
+  if (Token.is(MIToken::kw_frame_setup)) {
+    Flags |= MachineInstr::FrameSetup;
+    lex();
+  }
   if (Token.isNot(MIToken::Identifier))
     return error("expected a machine instruction");
   StringRef InstrName = Token.stringValue();
@@ -330,6 +810,7 @@ bool MIParser::parseRegister(unsigned &Reg) {
 }
 
 bool MIParser::parseRegisterFlag(unsigned &Flags) {
+  const unsigned OldFlags = Flags;
   switch (Token.kind()) {
   case MIToken::kw_implicit:
     Flags |= RegState::Implicit;
@@ -337,6 +818,9 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
   case MIToken::kw_implicit_define:
     Flags |= RegState::ImplicitDefine;
     break;
+  case MIToken::kw_def:
+    Flags |= RegState::Define;
+    break;
   case MIToken::kw_dead:
     Flags |= RegState::Dead;
     break;
@@ -346,11 +830,22 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
   case MIToken::kw_undef:
     Flags |= RegState::Undef;
     break;
-  // TODO: report an error when we specify the same flag more than once.
-  // TODO: parse the other register flags.
+  case MIToken::kw_internal:
+    Flags |= RegState::InternalRead;
+    break;
+  case MIToken::kw_early_clobber:
+    Flags |= RegState::EarlyClobber;
+    break;
+  case MIToken::kw_debug_use:
+    Flags |= RegState::Debug;
+    break;
   default:
     llvm_unreachable("The current token should be a register flag");
   }
+  if (OldFlags == Flags)
+    // We know that the same flag is specified more than once when the flags
+    // weren't modified.
+    return error("duplicate '" + Token.stringValue() + "' register flag");
   lex();
   return false;
 }
@@ -368,7 +863,59 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
   return false;
 }
 
-bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
+bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
+  if (!consumeIfPresent(MIToken::kw_tied_def))
+    return error("expected 'tied-def' after '('");
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error("expected an integer literal after 'tied-def'");
+  if (getUnsigned(TiedDefIdx))
+    return true;
+  lex();
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+  return false;
+}
+
+bool MIParser::assignRegisterTies(MachineInstr &MI,
+                                  ArrayRef<ParsedMachineOperand> Operands) {
+  SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
+  for (unsigned I = 0, E = Operands.size(); I != E; ++I) {
+    if (!Operands[I].TiedDefIdx)
+      continue;
+    // The parser ensures that this operand is a register use, so we just have
+    // to check the tied-def operand.
+    unsigned DefIdx = Operands[I].TiedDefIdx.getValue();
+    if (DefIdx >= E)
+      return error(Operands[I].Begin,
+                   Twine("use of invalid tied-def operand index '" +
+                         Twine(DefIdx) + "'; instruction has only ") +
+                       Twine(E) + " operands");
+    const auto &DefOperand = Operands[DefIdx].Operand;
+    if (!DefOperand.isReg() || !DefOperand.isDef())
+      // FIXME: add note with the def operand.
+      return error(Operands[I].Begin,
+                   Twine("use of invalid tied-def operand index '") +
+                       Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) +
+                       " isn't a defined register");
+    // Check that the tied-def operand wasn't tied elsewhere.
+    for (const auto &TiedPair : TiedRegisterPairs) {
+      if (TiedPair.first == DefIdx)
+        return error(Operands[I].Begin,
+                     Twine("the tied-def operand #") + Twine(DefIdx) +
+                         " is already tied with another register operand");
+    }
+    TiedRegisterPairs.push_back(std::make_pair(DefIdx, I));
+  }
+  // FIXME: Verify that for non INLINEASM instructions, the def and use tied
+  // indices must be less than tied max.
+  for (const auto &TiedPair : TiedRegisterPairs)
+    MI.tieOperands(TiedPair.first, TiedPair.second);
+  return false;
+}
+
+bool MIParser::parseRegisterOperand(MachineOperand &Dest,
+                                    Optional<unsigned> &TiedDefIdx,
+                                    bool IsDef) {
   unsigned Reg;
   unsigned Flags = IsDef ? RegState::Define : 0;
   while (Token.isRegisterFlag()) {
@@ -385,10 +932,17 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest, bool IsDef) {
     if (parseSubRegisterIndex(SubReg))
       return true;
   }
+  if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) {
+    unsigned Idx;
+    if (parseRegisterTiedDefIndex(Idx))
+      return true;
+    TiedDefIdx = Idx;
+  }
   Dest = MachineOperand::CreateReg(
       Reg, Flags & RegState::Define, Flags & RegState::Implicit,
       Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef,
-      /*isEarlyClobber=*/false, SubReg);
+      Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug,
+      Flags & RegState::InternalRead);
   return false;
 }
 
@@ -396,13 +950,55 @@ bool MIParser::parseImmediateOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::IntegerLiteral));
   const APSInt &Int = Token.integerValue();
   if (Int.getMinSignedBits() > 64)
-    // TODO: Replace this with an error when we can parse CIMM Machine Operands.
-    llvm_unreachable("Can't parse large integer literals yet!");
+    return error("integer literal is too large to be an immediate operand");
   Dest = MachineOperand::CreateImm(Int.getExtValue());
   lex();
   return false;
 }
 
+bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
+                               const Constant *&C) {
+  auto Source = StringValue.str(); // The source has to be null terminated.
+  SMDiagnostic Err;
+  C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+                         &IRSlots);
+  if (!C)
+    return error(Loc + Err.getColumnNo(), Err.getMessage());
+  return false;
+}
+
+bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
+  if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C))
+    return true;
+  lex();
+  return false;
+}
+
+bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::IntegerType));
+  auto Loc = Token.location();
+  lex();
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error("expected an integer literal");
+  const Constant *C = nullptr;
+  if (parseIRConstant(Loc, C))
+    return true;
+  Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C));
+  return false;
+}
+
+bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
+  auto Loc = Token.location();
+  lex();
+  if (Token.isNot(MIToken::FloatingPointLiteral))
+    return error("expected a floating point literal");
+  const Constant *C = nullptr;
+  if (parseIRConstant(Loc, C))
+    return true;
+  Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C));
+  return false;
+}
+
 bool MIParser::getUnsigned(unsigned &Result) {
   assert(Token.hasIntegerValue() && "Expected a token with an integer value");
   const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
@@ -414,7 +1010,8 @@ bool MIParser::getUnsigned(unsigned &Result) {
 }
 
 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
-  assert(Token.is(MIToken::MachineBasicBlock));
+  assert(Token.is(MIToken::MachineBasicBlock) ||
+         Token.is(MIToken::MachineBasicBlockLabel));
   unsigned Number;
   if (getUnsigned(Number))
     return true;
@@ -438,16 +1035,66 @@ bool MIParser::parseMBBOperand(MachineOperand &Dest) {
   return false;
 }
 
-bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+bool MIParser::parseStackFrameIndex(int &FI) {
+  assert(Token.is(MIToken::StackObject));
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  auto ObjectInfo = PFS.StackObjectSlots.find(ID);
+  if (ObjectInfo == PFS.StackObjectSlots.end())
+    return error(Twine("use of undefined stack object '%stack.") + Twine(ID) +
+                 "'");
+  StringRef Name;
+  if (const auto *Alloca =
+          MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+    Name = Alloca->getName();
+  if (!Token.stringValue().empty() && Token.stringValue() != Name)
+    return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
+                 "' isn't '" + Token.stringValue() + "'");
+  lex();
+  FI = ObjectInfo->second;
+  return false;
+}
+
+bool MIParser::parseStackObjectOperand(MachineOperand &Dest) {
+  int FI;
+  if (parseStackFrameIndex(FI))
+    return true;
+  Dest = MachineOperand::CreateFI(FI);
+  return false;
+}
+
+bool MIParser::parseFixedStackFrameIndex(int &FI) {
+  assert(Token.is(MIToken::FixedStackObject));
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID);
+  if (ObjectInfo == PFS.FixedStackObjectSlots.end())
+    return error(Twine("use of undefined fixed stack object '%fixed-stack.") +
+                 Twine(ID) + "'");
+  lex();
+  FI = ObjectInfo->second;
+  return false;
+}
+
+bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) {
+  int FI;
+  if (parseFixedStackFrameIndex(FI))
+    return true;
+  Dest = MachineOperand::CreateFI(FI);
+  return false;
+}
+
+bool MIParser::parseGlobalValue(GlobalValue *&GV) {
   switch (Token.kind()) {
   case MIToken::NamedGlobalValue: {
-    auto Name = Token.stringValue();
     const Module *M = MF.getFunction()->getParent();
-    if (const auto *GV = M->getNamedValue(Name)) {
-      Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
-      break;
-    }
-    return error(Twine("use of undefined global value '@") + Name + "'");
+    GV = M->getNamedValue(Token.stringValue());
+    if (!GV)
+      return error(Twine("use of undefined global value '") + Token.range() +
+                   "'");
+    break;
   }
   case MIToken::GlobalValue: {
     unsigned GVIdx;
@@ -456,36 +1103,323 @@ bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
     if (GVIdx >= IRSlots.GlobalValues.size())
       return error(Twine("use of undefined global value '@") + Twine(GVIdx) +
                    "'");
-    Dest = MachineOperand::CreateGA(IRSlots.GlobalValues[GVIdx],
-                                    /*Offset=*/0);
+    GV = IRSlots.GlobalValues[GVIdx];
     break;
   }
   default:
     llvm_unreachable("The current token should be a global value");
   }
-  // TODO: Parse offset and target flags.
+  return false;
+}
+
+bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) {
+  GlobalValue *GV = nullptr;
+  if (parseGlobalValue(GV))
+    return true;
+  lex();
+  Dest = MachineOperand::CreateGA(GV, /*Offset=*/0);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
+bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::ConstantPoolItem));
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  auto ConstantInfo = PFS.ConstantPoolSlots.find(ID);
+  if (ConstantInfo == PFS.ConstantPoolSlots.end())
+    return error("use of undefined constant '%const." + Twine(ID) + "'");
+  lex();
+  Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
+bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::JumpTableIndex));
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID);
+  if (JumpTableEntryInfo == PFS.JumpTableSlots.end())
+    return error("use of undefined jump table '%jump-table." + Twine(ID) + "'");
+  lex();
+  Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second);
+  return false;
+}
+
+bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::ExternalSymbol));
+  const char *Symbol = MF.createExternalSymbolName(Token.stringValue());
+  lex();
+  Dest = MachineOperand::CreateES(Symbol);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
+bool MIParser::parseMDNode(MDNode *&Node) {
+  assert(Token.is(MIToken::exclaim));
+  auto Loc = Token.location();
+  lex();
+  if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+    return error("expected metadata id after '!'");
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  auto NodeInfo = IRSlots.MetadataNodes.find(ID);
+  if (NodeInfo == IRSlots.MetadataNodes.end())
+    return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'");
+  lex();
+  Node = NodeInfo->second.get();
+  return false;
+}
+
+bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
+  MDNode *Node = nullptr;
+  if (parseMDNode(Node))
+    return true;
+  Dest = MachineOperand::CreateMetadata(Node);
+  return false;
+}
+
+bool MIParser::parseCFIOffset(int &Offset) {
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error("expected a cfi offset");
+  if (Token.integerValue().getMinSignedBits() > 32)
+    return error("expected a 32 bit integer (the cfi offset is too large)");
+  Offset = (int)Token.integerValue().getExtValue();
   lex();
   return false;
 }
 
-bool MIParser::parseMachineOperand(MachineOperand &Dest) {
+bool MIParser::parseCFIRegister(unsigned &Reg) {
+  if (Token.isNot(MIToken::NamedRegister))
+    return error("expected a cfi register");
+  unsigned LLVMReg;
+  if (parseRegister(LLVMReg))
+    return true;
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true);
+  if (DwarfReg < 0)
+    return error("invalid DWARF register");
+  Reg = (unsigned)DwarfReg;
+  lex();
+  return false;
+}
+
+bool MIParser::parseCFIOperand(MachineOperand &Dest) {
+  auto Kind = Token.kind();
+  lex();
+  auto &MMI = MF.getMMI();
+  int Offset;
+  unsigned Reg;
+  unsigned CFIIndex;
+  switch (Kind) {
+  case MIToken::kw_cfi_same_value:
+    if (parseCFIRegister(Reg))
+      return true;
+    CFIIndex =
+        MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+    break;
+  case MIToken::kw_cfi_offset:
+    if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+        parseCFIOffset(Offset))
+      return true;
+    CFIIndex =
+        MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+    break;
+  case MIToken::kw_cfi_def_cfa_register:
+    if (parseCFIRegister(Reg))
+      return true;
+    CFIIndex =
+        MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+    break;
+  case MIToken::kw_cfi_def_cfa_offset:
+    if (parseCFIOffset(Offset))
+      return true;
+    // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
+    CFIIndex = MMI.addFrameInst(
+        MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+    break;
+  case MIToken::kw_cfi_def_cfa:
+    if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
+        parseCFIOffset(Offset))
+      return true;
+    // NB: MCCFIInstruction::createDefCfa negates the offset.
+    CFIIndex =
+        MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+    break;
+  default:
+    // TODO: Parse the other CFI operands.
+    llvm_unreachable("The current token should be a cfi operand");
+  }
+  Dest = MachineOperand::CreateCFIIndex(CFIIndex);
+  return false;
+}
+
+bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
+  switch (Token.kind()) {
+  case MIToken::NamedIRBlock: {
+    BB = dyn_cast_or_null<BasicBlock>(
+        F.getValueSymbolTable().lookup(Token.stringValue()));
+    if (!BB)
+      return error(Twine("use of undefined IR block '") + Token.range() + "'");
+    break;
+  }
+  case MIToken::IRBlock: {
+    unsigned SlotNumber = 0;
+    if (getUnsigned(SlotNumber))
+      return true;
+    BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F));
+    if (!BB)
+      return error(Twine("use of undefined IR block '%ir-block.") +
+                   Twine(SlotNumber) + "'");
+    break;
+  }
+  default:
+    llvm_unreachable("The current token should be an IR block reference");
+  }
+  return false;
+}
+
+bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_blockaddress));
+  lex();
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+  if (Token.isNot(MIToken::GlobalValue) &&
+      Token.isNot(MIToken::NamedGlobalValue))
+    return error("expected a global value");
+  GlobalValue *GV = nullptr;
+  if (parseGlobalValue(GV))
+    return true;
+  auto *F = dyn_cast<Function>(GV);
+  if (!F)
+    return error("expected an IR function reference");
+  lex();
+  if (expectAndConsume(MIToken::comma))
+    return true;
+  BasicBlock *BB = nullptr;
+  if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock))
+    return error("expected an IR block reference");
+  if (parseIRBlock(BB, *F))
+    return true;
+  lex();
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+  Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
+bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_target_index));
+  lex();
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+  if (Token.isNot(MIToken::Identifier))
+    return error("expected the name of the target index");
+  int Index = 0;
+  if (getTargetIndex(Token.stringValue(), Index))
+    return error("use of undefined target index '" + Token.stringValue() + "'");
+  lex();
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+  Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0);
+  if (parseOperandsOffset(Dest))
+    return true;
+  return false;
+}
+
+bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_liveout));
+  const auto *TRI = MF.getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs());
+  lex();
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+  while (true) {
+    if (Token.isNot(MIToken::NamedRegister))
+      return error("expected a named register");
+    unsigned Reg = 0;
+    if (parseRegister(Reg))
+      return true;
+    lex();
+    Mask[Reg / 32] |= 1U << (Reg % 32);
+    // TODO: Report an error if the same register is used more than once.
+    if (Token.isNot(MIToken::comma))
+      break;
+    lex();
+  }
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+  Dest = MachineOperand::CreateRegLiveOut(Mask);
+  return false;
+}
+
+bool MIParser::parseMachineOperand(MachineOperand &Dest,
+                                   Optional<unsigned> &TiedDefIdx) {
   switch (Token.kind()) {
   case MIToken::kw_implicit:
   case MIToken::kw_implicit_define:
+  case MIToken::kw_def:
   case MIToken::kw_dead:
   case MIToken::kw_killed:
   case MIToken::kw_undef:
+  case MIToken::kw_internal:
+  case MIToken::kw_early_clobber:
+  case MIToken::kw_debug_use:
   case MIToken::underscore:
   case MIToken::NamedRegister:
   case MIToken::VirtualRegister:
-    return parseRegisterOperand(Dest);
+    return parseRegisterOperand(Dest, TiedDefIdx);
   case MIToken::IntegerLiteral:
     return parseImmediateOperand(Dest);
+  case MIToken::IntegerType:
+    return parseTypedImmediateOperand(Dest);
+  case MIToken::kw_half:
+  case MIToken::kw_float:
+  case MIToken::kw_double:
+  case MIToken::kw_x86_fp80:
+  case MIToken::kw_fp128:
+  case MIToken::kw_ppc_fp128:
+    return parseFPImmediateOperand(Dest);
   case MIToken::MachineBasicBlock:
     return parseMBBOperand(Dest);
+  case MIToken::StackObject:
+    return parseStackObjectOperand(Dest);
+  case MIToken::FixedStackObject:
+    return parseFixedStackObjectOperand(Dest);
   case MIToken::GlobalValue:
   case MIToken::NamedGlobalValue:
     return parseGlobalAddressOperand(Dest);
+  case MIToken::ConstantPoolItem:
+    return parseConstantPoolIndexOperand(Dest);
+  case MIToken::JumpTableIndex:
+    return parseJumpTableIndexOperand(Dest);
+  case MIToken::ExternalSymbol:
+    return parseExternalSymbolOperand(Dest);
+  case MIToken::exclaim:
+    return parseMetadataOperand(Dest);
+  case MIToken::kw_cfi_same_value:
+  case MIToken::kw_cfi_offset:
+  case MIToken::kw_cfi_def_cfa_register:
+  case MIToken::kw_cfi_def_cfa_offset:
+  case MIToken::kw_cfi_def_cfa:
+    return parseCFIOperand(Dest);
+  case MIToken::kw_blockaddress:
+    return parseBlockAddressOperand(Dest);
+  case MIToken::kw_target_index:
+    return parseTargetIndexOperand(Dest);
+  case MIToken::kw_liveout:
+    return parseLiveoutRegisterMaskOperand(Dest);
   case MIToken::Error:
     return true;
   case MIToken::Identifier:
@@ -496,12 +1430,314 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest) {
     }
   // fallthrough
   default:
-    // TODO: parse the other machine operands.
+    // FIXME: Parse the MCSymbol machine operand.
     return error("expected a machine operand");
   }
   return false;
 }
 
+bool MIParser::parseMachineOperandAndTargetFlags(
+    MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) {
+  unsigned TF = 0;
+  bool HasTargetFlags = false;
+  if (Token.is(MIToken::kw_target_flags)) {
+    HasTargetFlags = true;
+    lex();
+    if (expectAndConsume(MIToken::lparen))
+      return true;
+    if (Token.isNot(MIToken::Identifier))
+      return error("expected the name of the target flag");
+    if (getDirectTargetFlag(Token.stringValue(), TF)) {
+      if (getBitmaskTargetFlag(Token.stringValue(), TF))
+        return error("use of undefined target flag '" + Token.stringValue() +
+                     "'");
+    }
+    lex();
+    while (Token.is(MIToken::comma)) {
+      lex();
+      if (Token.isNot(MIToken::Identifier))
+        return error("expected the name of the target flag");
+      unsigned BitFlag = 0;
+      if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+        return error("use of undefined target flag '" + Token.stringValue() +
+                     "'");
+      // TODO: Report an error when using a duplicate bit target flag.
+      TF |= BitFlag;
+      lex();
+    }
+    if (expectAndConsume(MIToken::rparen))
+      return true;
+  }
+  auto Loc = Token.location();
+  if (parseMachineOperand(Dest, TiedDefIdx))
+    return true;
+  if (!HasTargetFlags)
+    return false;
+  if (Dest.isReg())
+    return error(Loc, "register operands can't have target flags");
+  Dest.setTargetFlags(TF);
+  return false;
+}
+
+bool MIParser::parseOffset(int64_t &Offset) {
+  if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus))
+    return false;
+  StringRef Sign = Token.range();
+  bool IsNegative = Token.is(MIToken::minus);
+  lex();
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error("expected an integer literal after '" + Sign + "'");
+  if (Token.integerValue().getMinSignedBits() > 64)
+    return error("expected 64-bit integer (too large)");
+  Offset = Token.integerValue().getExtValue();
+  if (IsNegative)
+    Offset = -Offset;
+  lex();
+  return false;
+}
+
+bool MIParser::parseAlignment(unsigned &Alignment) {
+  assert(Token.is(MIToken::kw_align));
+  lex();
+  if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned())
+    return error("expected an integer literal after 'align'");
+  if (getUnsigned(Alignment))
+    return true;
+  lex();
+  return false;
+}
+
+bool MIParser::parseOperandsOffset(MachineOperand &Op) {
+  int64_t Offset = 0;
+  if (parseOffset(Offset))
+    return true;
+  Op.setOffset(Offset);
+  return false;
+}
+
+bool MIParser::parseIRValue(const Value *&V) {
+  switch (Token.kind()) {
+  case MIToken::NamedIRValue: {
+    V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+    break;
+  }
+  case MIToken::IRValue: {
+    unsigned SlotNumber = 0;
+    if (getUnsigned(SlotNumber))
+      return true;
+    V = getIRValue(SlotNumber);
+    break;
+  }
+  case MIToken::NamedGlobalValue:
+  case MIToken::GlobalValue: {
+    GlobalValue *GV = nullptr;
+    if (parseGlobalValue(GV))
+      return true;
+    V = GV;
+    break;
+  }
+  case MIToken::QuotedIRValue: {
+    const Constant *C = nullptr;
+    if (parseIRConstant(Token.location(), Token.stringValue(), C))
+      return true;
+    V = C;
+    break;
+  }
+  default:
+    llvm_unreachable("The current token should be an IR block reference");
+  }
+  if (!V)
+    return error(Twine("use of undefined IR value '") + Token.range() + "'");
+  return false;
+}
+
+bool MIParser::getUint64(uint64_t &Result) {
+  assert(Token.hasIntegerValue());
+  if (Token.integerValue().getActiveBits() > 64)
+    return error("expected 64-bit integer (too large)");
+  Result = Token.integerValue().getZExtValue();
+  return false;
+}
+
+bool MIParser::parseMemoryOperandFlag(unsigned &Flags) {
+  const unsigned OldFlags = Flags;
+  switch (Token.kind()) {
+  case MIToken::kw_volatile:
+    Flags |= MachineMemOperand::MOVolatile;
+    break;
+  case MIToken::kw_non_temporal:
+    Flags |= MachineMemOperand::MONonTemporal;
+    break;
+  case MIToken::kw_invariant:
+    Flags |= MachineMemOperand::MOInvariant;
+    break;
+  // TODO: parse the target specific memory operand flags.
+  default:
+    llvm_unreachable("The current token should be a memory operand flag");
+  }
+  if (OldFlags == Flags)
+    // We know that the same flag is specified more than once when the flags
+    // weren't modified.
+    return error("duplicate '" + Token.stringValue() + "' memory operand flag");
+  lex();
+  return false;
+}
+
+bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) {
+  switch (Token.kind()) {
+  case MIToken::kw_stack:
+    PSV = MF.getPSVManager().getStack();
+    break;
+  case MIToken::kw_got:
+    PSV = MF.getPSVManager().getGOT();
+    break;
+  case MIToken::kw_jump_table:
+    PSV = MF.getPSVManager().getJumpTable();
+    break;
+  case MIToken::kw_constant_pool:
+    PSV = MF.getPSVManager().getConstantPool();
+    break;
+  case MIToken::FixedStackObject: {
+    int FI;
+    if (parseFixedStackFrameIndex(FI))
+      return true;
+    PSV = MF.getPSVManager().getFixedStack(FI);
+    // The token was already consumed, so use return here instead of break.
+    return false;
+  }
+  case MIToken::kw_call_entry: {
+    lex();
+    switch (Token.kind()) {
+    case MIToken::GlobalValue:
+    case MIToken::NamedGlobalValue: {
+      GlobalValue *GV = nullptr;
+      if (parseGlobalValue(GV))
+        return true;
+      PSV = MF.getPSVManager().getGlobalValueCallEntry(GV);
+      break;
+    }
+    case MIToken::ExternalSymbol:
+      PSV = MF.getPSVManager().getExternalSymbolCallEntry(
+          MF.createExternalSymbolName(Token.stringValue()));
+      break;
+    default:
+      return error(
+          "expected a global value or an external symbol after 'call-entry'");
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("The current token should be pseudo source value");
+  }
+  lex();
+  return false;
+}
+
+bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) {
+  if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) ||
+      Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) ||
+      Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) {
+    const PseudoSourceValue *PSV = nullptr;
+    if (parseMemoryPseudoSourceValue(PSV))
+      return true;
+    int64_t Offset = 0;
+    if (parseOffset(Offset))
+      return true;
+    Dest = MachinePointerInfo(PSV, Offset);
+    return false;
+  }
+  if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) &&
+      Token.isNot(MIToken::GlobalValue) &&
+      Token.isNot(MIToken::NamedGlobalValue) &&
+      Token.isNot(MIToken::QuotedIRValue))
+    return error("expected an IR value reference");
+  const Value *V = nullptr;
+  if (parseIRValue(V))
+    return true;
+  if (!V->getType()->isPointerTy())
+    return error("expected a pointer IR value");
+  lex();
+  int64_t Offset = 0;
+  if (parseOffset(Offset))
+    return true;
+  Dest = MachinePointerInfo(V, Offset);
+  return false;
+}
+
+bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) {
+  if (expectAndConsume(MIToken::lparen))
+    return true;
+  unsigned Flags = 0;
+  while (Token.isMemoryOperandFlag()) {
+    if (parseMemoryOperandFlag(Flags))
+      return true;
+  }
+  if (Token.isNot(MIToken::Identifier) ||
+      (Token.stringValue() != "load" && Token.stringValue() != "store"))
+    return error("expected 'load' or 'store' memory operation");
+  if (Token.stringValue() == "load")
+    Flags |= MachineMemOperand::MOLoad;
+  else
+    Flags |= MachineMemOperand::MOStore;
+  lex();
+
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error("expected the size integer literal after memory operation");
+  uint64_t Size;
+  if (getUint64(Size))
+    return true;
+  lex();
+
+  const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into";
+  if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word)
+    return error(Twine("expected '") + Word + "'");
+  lex();
+
+  MachinePointerInfo Ptr = MachinePointerInfo();
+  if (parseMachinePointerInfo(Ptr))
+    return true;
+  unsigned BaseAlignment = Size;
+  AAMDNodes AAInfo;
+  MDNode *Range = nullptr;
+  while (consumeIfPresent(MIToken::comma)) {
+    switch (Token.kind()) {
+    case MIToken::kw_align:
+      if (parseAlignment(BaseAlignment))
+        return true;
+      break;
+    case MIToken::md_tbaa:
+      lex();
+      if (parseMDNode(AAInfo.TBAA))
+        return true;
+      break;
+    case MIToken::md_alias_scope:
+      lex();
+      if (parseMDNode(AAInfo.Scope))
+        return true;
+      break;
+    case MIToken::md_noalias:
+      lex();
+      if (parseMDNode(AAInfo.NoAlias))
+        return true;
+      break;
+    case MIToken::md_range:
+      lex();
+      if (parseMDNode(Range))
+        return true;
+      break;
+    // TODO: Report an error on duplicate metadata nodes.
+    default:
+      return error("expected 'align' or '!tbaa' or '!alias.scope' or "
+                   "'!noalias' or '!range'");
+    }
+  }
+  if (expectAndConsume(MIToken::rparen))
+    return true;
+  Dest =
+      MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range);
+  return false;
+}
+
 void MIParser::initNames2InstrOpCodes() {
   if (!Names2InstrOpCodes.empty())
     return;
@@ -583,18 +1819,162 @@ unsigned MIParser::getSubRegIndex(StringRef Name) {
   return SubRegInfo->getValue();
 }
 
-bool llvm::parseMachineInstr(MachineInstr *&MI, SourceMgr &SM,
-                             MachineFunction &MF, StringRef Src,
-                             const PerFunctionMIParsingState &PFS,
-                             const SlotMapping &IRSlots, SMDiagnostic &Error) {
-  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parse(MI);
+static void initSlots2BasicBlocks(
+    const Function &F,
+    DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+  MST.incorporateFunction(F);
+  for (auto &BB : F) {
+    if (BB.hasName())
+      continue;
+    int Slot = MST.getLocalSlot(&BB);
+    if (Slot == -1)
+      continue;
+    Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB));
+  }
+}
+
+static const BasicBlock *getIRBlockFromSlot(
+    unsigned Slot,
+    const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
+  auto BlockInfo = Slots2BasicBlocks.find(Slot);
+  if (BlockInfo == Slots2BasicBlocks.end())
+    return nullptr;
+  return BlockInfo->second;
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot) {
+  if (Slots2BasicBlocks.empty())
+    initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks);
+  return getIRBlockFromSlot(Slot, Slots2BasicBlocks);
+}
+
+const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) {
+  if (&F == MF.getFunction())
+    return getIRBlock(Slot);
+  DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks;
+  initSlots2BasicBlocks(F, CustomSlots2BasicBlocks);
+  return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks);
+}
+
+static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST,
+                           DenseMap<unsigned, const Value *> &Slots2Values) {
+  int Slot = MST.getLocalSlot(V);
+  if (Slot == -1)
+    return;
+  Slots2Values.insert(std::make_pair(unsigned(Slot), V));
+}
+
+/// Creates the mapping from slot numbers to function's unnamed IR values.
+static void initSlots2Values(const Function &F,
+                             DenseMap<unsigned, const Value *> &Slots2Values) {
+  ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false);
+  MST.incorporateFunction(F);
+  for (const auto &Arg : F.args())
+    mapValueToSlot(&Arg, MST, Slots2Values);
+  for (const auto &BB : F) {
+    mapValueToSlot(&BB, MST, Slots2Values);
+    for (const auto &I : BB)
+      mapValueToSlot(&I, MST, Slots2Values);
+  }
+}
+
+const Value *MIParser::getIRValue(unsigned Slot) {
+  if (Slots2Values.empty())
+    initSlots2Values(*MF.getFunction(), Slots2Values);
+  auto ValueInfo = Slots2Values.find(Slot);
+  if (ValueInfo == Slots2Values.end())
+    return nullptr;
+  return ValueInfo->second;
+}
+
+void MIParser::initNames2TargetIndices() {
+  if (!Names2TargetIndices.empty())
+    return;
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Indices = TII->getSerializableTargetIndices();
+  for (const auto &I : Indices)
+    Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getTargetIndex(StringRef Name, int &Index) {
+  initNames2TargetIndices();
+  auto IndexInfo = Names2TargetIndices.find(Name);
+  if (IndexInfo == Names2TargetIndices.end())
+    return true;
+  Index = IndexInfo->second;
+  return false;
+}
+
+void MIParser::initNames2DirectTargetFlags() {
+  if (!Names2DirectTargetFlags.empty())
+    return;
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+  for (const auto &I : Flags)
+    Names2DirectTargetFlags.insert(
+        std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
+  initNames2DirectTargetFlags();
+  auto FlagInfo = Names2DirectTargetFlags.find(Name);
+  if (FlagInfo == Names2DirectTargetFlags.end())
+    return true;
+  Flag = FlagInfo->second;
+  return false;
+}
+
+void MIParser::initNames2BitmaskTargetFlags() {
+  if (!Names2BitmaskTargetFlags.empty())
+    return;
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "Expected target instruction info");
+  auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+  for (const auto &I : Flags)
+    Names2BitmaskTargetFlags.insert(
+        std::make_pair(StringRef(I.second), I.first));
+}
+
+bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
+  initNames2BitmaskTargetFlags();
+  auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+  if (FlagInfo == Names2BitmaskTargetFlags.end())
+    return true;
+  Flag = FlagInfo->second;
+  return false;
+}
+
+bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+                                             PerFunctionMIParsingState &PFS,
+                                             const SlotMapping &IRSlots,
+                                             SMDiagnostic &Error) {
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(
+      MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+      SMLoc());
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+      .parseBasicBlockDefinitions(PFS.MBBSlots);
+}
+
+bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src,
+                                    const PerFunctionMIParsingState &PFS,
+                                    const SlotMapping &IRSlots,
+                                    SMDiagnostic &Error) {
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(
+      MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false),
+      SMLoc());
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks();
 }
 
 bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
                              MachineFunction &MF, StringRef Src,
                              const PerFunctionMIParsingState &PFS,
                              const SlotMapping &IRSlots, SMDiagnostic &Error) {
-  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseMBB(MBB);
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB);
 }
 
 bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
@@ -602,5 +1982,30 @@ bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
                                        const PerFunctionMIParsingState &PFS,
                                        const SlotMapping &IRSlots,
                                        SMDiagnostic &Error) {
-  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseNamedRegister(Reg);
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+      .parseStandaloneNamedRegister(Reg);
+}
+
+bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+                                         MachineFunction &MF, StringRef Src,
+                                         const PerFunctionMIParsingState &PFS,
+                                         const SlotMapping &IRSlots,
+                                         SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+      .parseStandaloneVirtualRegister(Reg);
+}
+
+bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM,
+                                     MachineFunction &MF, StringRef Src,
+                                     const PerFunctionMIParsingState &PFS,
+                                     const SlotMapping &IRSlots,
+                                     SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots)
+      .parseStandaloneStackObject(FI);
+}
+
+bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+                       StringRef Src, const PerFunctionMIParsingState &PFS,
+                       const SlotMapping &IRSlots, SMDiagnostic &Error) {
+  return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node);
 }
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
index fca4c4e6f885..8aef704ab36c 100644
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ b/lib/CodeGen/MIRParser/MIParser.h
@@ -19,9 +19,11 @@
 
 namespace llvm {
 
+class BasicBlock;
 class MachineBasicBlock;
 class MachineInstr;
 class MachineFunction;
+class MDNode;
 struct SlotMapping;
 class SMDiagnostic;
 class SourceMgr;
@@ -29,11 +31,42 @@ class SourceMgr;
 struct PerFunctionMIParsingState {
   DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
   DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+  DenseMap<unsigned, int> FixedStackObjectSlots;
+  DenseMap<unsigned, int> StackObjectSlots;
+  DenseMap<unsigned, unsigned> ConstantPoolSlots;
+  DenseMap<unsigned, unsigned> JumpTableSlots;
 };
 
-bool parseMachineInstr(MachineInstr *&MI, SourceMgr &SM, MachineFunction &MF,
-                       StringRef Src, const PerFunctionMIParsingState &PFS,
-                       const SlotMapping &IRSlots, SMDiagnostic &Error);
+/// Parse the machine basic block definitions, and skip the machine
+/// instructions.
+///
+/// This function runs the first parsing pass on the machine function's body.
+/// It parses only the machine basic block definitions and creates the machine
+/// basic blocks in the given machine function.
+///
+/// The machine instructions aren't parsed during the first pass because all
+/// the machine basic blocks aren't defined yet - this makes it impossible to
+/// resolve the machine basic block references.
+///
+/// Return true if an error occurred.
+bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src,
+                                       PerFunctionMIParsingState &PFS,
+                                       const SlotMapping &IRSlots,
+                                       SMDiagnostic &Error);
+
+/// Parse the machine instructions.
+///
+/// This function runs the second parsing pass on the machine function's body.
+/// It skips the machine basic block definitions and parses only the machine
+/// instructions and basic block attributes like liveins and successors.
+///
+/// The second parsing pass assumes that the first parsing pass already ran
+/// on the given source string.
+///
+/// Return true if an error occurred.
+bool parseMachineInstructions(MachineFunction &MF, StringRef Src,
+                              const PerFunctionMIParsingState &PFS,
+                              const SlotMapping &IRSlots, SMDiagnostic &Error);
 
 bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM,
                        MachineFunction &MF, StringRef Src,
@@ -46,6 +79,21 @@ bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM,
                                  const SlotMapping &IRSlots,
                                  SMDiagnostic &Error);
 
+bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM,
+                                   MachineFunction &MF, StringRef Src,
+                                   const PerFunctionMIParsingState &PFS,
+                                   const SlotMapping &IRSlots,
+                                   SMDiagnostic &Error);
+
+bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF,
+                               StringRef Src,
+                               const PerFunctionMIParsingState &PFS,
+                               const SlotMapping &IRSlots, SMDiagnostic &Error);
+
+bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF,
+                 StringRef Src, const PerFunctionMIParsingState &PFS,
+                 const SlotMapping &IRSlots, SMDiagnostic &Error);
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 16b0e1655891..422efbc5ce57 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -20,8 +20,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
@@ -95,30 +97,53 @@ public:
   /// Return true if error occurred.
   bool initializeMachineFunction(MachineFunction &MF);
 
-  /// Initialize the machine basic block using it's YAML representation.
-  ///
-  /// Return true if an error occurred.
-  bool initializeMachineBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
-                                   const yaml::MachineBasicBlock &YamlMBB,
-                                   const PerFunctionMIParsingState &PFS);
+  bool initializeRegisterInfo(MachineFunction &MF,
+                              const yaml::MachineFunction &YamlMF,
+                              PerFunctionMIParsingState &PFS);
 
-  bool
-  initializeRegisterInfo(const MachineFunction &MF,
-                         MachineRegisterInfo &RegInfo,
-                         const yaml::MachineFunction &YamlMF,
-                         DenseMap<unsigned, unsigned> &VirtualRegisterSlots);
+  void inferRegisterInfo(MachineFunction &MF,
+                         const yaml::MachineFunction &YamlMF);
 
-  bool initializeFrameInfo(MachineFrameInfo &MFI,
-                           const yaml::MachineFunction &YamlMF);
+  bool initializeFrameInfo(MachineFunction &MF,
+                           const yaml::MachineFunction &YamlMF,
+                           PerFunctionMIParsingState &PFS);
+
+  bool parseCalleeSavedRegister(MachineFunction &MF,
+                                PerFunctionMIParsingState &PFS,
+                                std::vector<CalleeSavedInfo> &CSIInfo,
+                                const yaml::StringValue &RegisterSource,
+                                int FrameIdx);
+
+  bool parseStackObjectsDebugInfo(MachineFunction &MF,
+                                  PerFunctionMIParsingState &PFS,
+                                  const yaml::MachineStackObject &Object,
+                                  int FrameIdx);
+
+  bool initializeConstantPool(MachineConstantPool &ConstantPool,
+                              const yaml::MachineFunction &YamlMF,
+                              const MachineFunction &MF,
+                              DenseMap<unsigned, unsigned> &ConstantPoolSlots);
+
+  bool initializeJumpTableInfo(MachineFunction &MF,
+                               const yaml::MachineJumpTable &YamlJTI,
+                               PerFunctionMIParsingState &PFS);
 
 private:
+  bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+                   MachineFunction &MF, const PerFunctionMIParsingState &PFS);
+
+  bool parseMBBReference(MachineBasicBlock *&MBB,
+                         const yaml::StringValue &Source, MachineFunction &MF,
+                         const PerFunctionMIParsingState &PFS);
+
   /// Return a MIR diagnostic converted from an MI string diagnostic.
   SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error,
                                     SMRange SourceRange);
 
-  /// Return a MIR diagnostic converted from an LLVM assembly diagnostic.
-  SMDiagnostic diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
-                                        SMRange SourceRange);
+  /// Return a MIR diagnostic converted from a diagnostic located in a YAML
+  /// block scalar string.
+  SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
+                                       SMRange SourceRange);
 
   /// Create an empty function with the given name.
   void createDummyFunction(StringRef Name, Module &M);
@@ -200,7 +225,7 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
     M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error,
                       Context, &IRSlots);
     if (!M) {
-      reportDiagnostic(diagFromLLVMAssemblyDiag(Error, BSN->getSourceRange()));
+      reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange()));
       return M;
     }
     In.nextDocument();
@@ -261,88 +286,56 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
   MF.setHasInlineAsm(YamlMF.HasInlineAsm);
   PerFunctionMIParsingState PFS;
-  if (initializeRegisterInfo(MF, MF.getRegInfo(), YamlMF,
-                             PFS.VirtualRegisterSlots))
+  if (initializeRegisterInfo(MF, YamlMF, PFS))
     return true;
-  if (initializeFrameInfo(*MF.getFrameInfo(), YamlMF))
-    return true;
-
-  const auto &F = *MF.getFunction();
-  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
-    const BasicBlock *BB = nullptr;
-    const yaml::StringValue &Name = YamlMBB.Name;
-    if (!Name.Value.empty()) {
-      BB = dyn_cast_or_null<BasicBlock>(
-          F.getValueSymbolTable().lookup(Name.Value));
-      if (!BB)
-        return error(Name.SourceRange.Start,
-                     Twine("basic block '") + Name.Value +
-                         "' is not defined in the function '" + MF.getName() +
-                         "'");
-    }
-    auto *MBB = MF.CreateMachineBasicBlock(BB);
-    MF.insert(MF.end(), MBB);
-    bool WasInserted =
-        PFS.MBBSlots.insert(std::make_pair(YamlMBB.ID, MBB)).second;
-    if (!WasInserted)
-      return error(Twine("redefinition of machine basic block with id #") +
-                   Twine(YamlMBB.ID));
-  }
-
-  if (YamlMF.BasicBlocks.empty())
-    return error(Twine("machine function '") + Twine(MF.getName()) +
-                 "' requires at least one machine basic block in its body");
-  // Initialize the machine basic blocks after creating them all so that the
-  // machine instructions parser can resolve the MBB references.
-  unsigned I = 0;
-  for (const auto &YamlMBB : YamlMF.BasicBlocks) {
-    if (initializeMachineBasicBlock(MF, *MF.getBlockNumbered(I++), YamlMBB,
-                                    PFS))
+  if (!YamlMF.Constants.empty()) {
+    auto *ConstantPool = MF.getConstantPool();
+    assert(ConstantPool && "Constant pool must be created");
+    if (initializeConstantPool(*ConstantPool, YamlMF, MF,
+                               PFS.ConstantPoolSlots))
       return true;
   }
-  return false;
-}
 
-bool MIRParserImpl::initializeMachineBasicBlock(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    const yaml::MachineBasicBlock &YamlMBB,
-    const PerFunctionMIParsingState &PFS) {
-  MBB.setAlignment(YamlMBB.Alignment);
-  if (YamlMBB.AddressTaken)
-    MBB.setHasAddressTaken();
-  MBB.setIsLandingPad(YamlMBB.IsLandingPad);
   SMDiagnostic Error;
-  // Parse the successors.
-  for (const auto &MBBSource : YamlMBB.Successors) {
-    MachineBasicBlock *SuccMBB = nullptr;
-    if (parseMBBReference(SuccMBB, SM, MF, MBBSource.Value, PFS, IRSlots,
-                          Error))
-      return error(Error, MBBSource.SourceRange);
-    // TODO: Report an error when adding the same successor more than once.
-    MBB.addSuccessor(SuccMBB);
+  if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS,
+                                        IRSlots, Error)) {
+    reportDiagnostic(
+        diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+    return true;
   }
-  // Parse the liveins.
-  for (const auto &LiveInSource : YamlMBB.LiveIns) {
-    unsigned Reg = 0;
-    if (parseNamedRegisterReference(Reg, SM, MF, LiveInSource.Value, PFS,
-                                    IRSlots, Error))
-      return error(Error, LiveInSource.SourceRange);
-    MBB.addLiveIn(Reg);
-  }
-  // Parse the instructions.
-  for (const auto &MISource : YamlMBB.Instructions) {
-    MachineInstr *MI = nullptr;
-    if (parseMachineInstr(MI, SM, MF, MISource.Value, PFS, IRSlots, Error))
-      return error(Error, MISource.SourceRange);
-    MBB.insert(MBB.end(), MI);
+
+  if (MF.empty())
+    return error(Twine("machine function '") + Twine(MF.getName()) +
+                 "' requires at least one machine basic block in its body");
+  // Initialize the frame information after creating all the MBBs so that the
+  // MBB references in the frame information can be resolved.
+  if (initializeFrameInfo(MF, YamlMF, PFS))
+    return true;
+  // Initialize the jump table after creating all the MBBs so that the MBB
+  // references can be resolved.
+  if (!YamlMF.JumpTableInfo.Entries.empty() &&
+      initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS))
+    return true;
+  // Parse the machine instructions after creating all of the MBBs so that the
+  // parser can resolve the MBB references.
+  if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots,
+                               Error)) {
+    reportDiagnostic(
+        diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange));
+    return true;
   }
+  inferRegisterInfo(MF, YamlMF);
+  // FIXME: This is a temporary workaround until the reserved registers can be
+  // serialized.
+  MF.getRegInfo().freezeReservedRegs(MF);
+  MF.verify();
   return false;
 }
 
-bool MIRParserImpl::initializeRegisterInfo(
-    const MachineFunction &MF, MachineRegisterInfo &RegInfo,
-    const yaml::MachineFunction &YamlMF,
-    DenseMap<unsigned, unsigned> &VirtualRegisterSlots) {
+bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF,
+                                           const yaml::MachineFunction &YamlMF,
+                                           PerFunctionMIParsingState &PFS) {
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
   assert(RegInfo.isSSA());
   if (!YamlMF.IsSSA)
     RegInfo.leaveSSA();
@@ -351,6 +344,7 @@ bool MIRParserImpl::initializeRegisterInfo(
     RegInfo.invalidateLiveness();
   RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
 
+  SMDiagnostic Error;
   // Parse the virtual register information.
   for (const auto &VReg : YamlMF.VirtualRegisters) {
     const auto *RC = getRegClass(MF, VReg.Class.Value);
@@ -359,15 +353,71 @@ bool MIRParserImpl::initializeRegisterInfo(
                    Twine("use of undefined register class '") +
                        VReg.Class.Value + "'");
     unsigned Reg = RegInfo.createVirtualRegister(RC);
-    // TODO: Report an error when the same virtual register with the same ID is
-    // redefined.
-    VirtualRegisterSlots.insert(std::make_pair(VReg.ID, Reg));
+    if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
+             .second)
+      return error(VReg.ID.SourceRange.Start,
+                   Twine("redefinition of virtual register '%") +
+                       Twine(VReg.ID.Value) + "'");
+    if (!VReg.PreferredRegister.Value.empty()) {
+      unsigned PreferredReg = 0;
+      if (parseNamedRegisterReference(PreferredReg, SM, MF,
+                                      VReg.PreferredRegister.Value, PFS,
+                                      IRSlots, Error))
+        return error(Error, VReg.PreferredRegister.SourceRange);
+      RegInfo.setSimpleHint(Reg, PreferredReg);
+    }
   }
+
+  // Parse the liveins.
+  for (const auto &LiveIn : YamlMF.LiveIns) {
+    unsigned Reg = 0;
+    if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS,
+                                    IRSlots, Error))
+      return error(Error, LiveIn.Register.SourceRange);
+    unsigned VReg = 0;
+    if (!LiveIn.VirtualRegister.Value.empty()) {
+      if (parseVirtualRegisterReference(
+              VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error))
+        return error(Error, LiveIn.VirtualRegister.SourceRange);
+    }
+    RegInfo.addLiveIn(Reg, VReg);
+  }
+
+  // Parse the callee saved register mask.
+  BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size());
+  if (!YamlMF.CalleeSavedRegisters)
+    return false;
+  for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) {
+    unsigned Reg = 0;
+    if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots,
+                                    Error))
+      return error(Error, RegSource.SourceRange);
+    CalleeSavedRegisterMask[Reg] = true;
+  }
+  RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip());
   return false;
 }
 
-bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
-                                        const yaml::MachineFunction &YamlMF) {
+void MIRParserImpl::inferRegisterInfo(MachineFunction &MF,
+                                      const yaml::MachineFunction &YamlMF) {
+  if (YamlMF.CalleeSavedRegisters)
+    return;
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      for (const MachineOperand &MO : MI.operands()) {
+        if (!MO.isRegMask())
+          continue;
+        MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask());
+      }
+    }
+  }
+}
+
+bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF,
+                                        const yaml::MachineFunction &YamlMF,
+                                        PerFunctionMIParsingState &PFS) {
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const Function &F = *MF.getFunction();
   const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
   MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
   MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken);
@@ -383,7 +433,20 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
   MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment);
   MFI.setHasVAStart(YamlMFI.HasVAStart);
   MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc);
+  if (!YamlMFI.SavePoint.Value.empty()) {
+    MachineBasicBlock *MBB = nullptr;
+    if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS))
+      return true;
+    MFI.setSavePoint(MBB);
+  }
+  if (!YamlMFI.RestorePoint.Value.empty()) {
+    MachineBasicBlock *MBB = nullptr;
+    if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS))
+      return true;
+    MFI.setRestorePoint(MBB);
+  }
 
+  std::vector<CalleeSavedInfo> CSIInfo;
   // Initialize the fixed frame objects.
   for (const auto &Object : YamlMF.FixedStackObjects) {
     int ObjectIdx;
@@ -393,24 +456,187 @@ bool MIRParserImpl::initializeFrameInfo(MachineFrameInfo &MFI,
     else
       ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
     MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
-    // TODO: Store the mapping between fixed object IDs and object indices to
-    // parse fixed stack object references correctly.
+    if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
+                                                         ObjectIdx))
+             .second)
+      return error(Object.ID.SourceRange.Start,
+                   Twine("redefinition of fixed stack object '%fixed-stack.") +
+                       Twine(Object.ID.Value) + "'");
+    if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+                                 ObjectIdx))
+      return true;
   }
 
   // Initialize the ordinary frame objects.
   for (const auto &Object : YamlMF.StackObjects) {
     int ObjectIdx;
+    const AllocaInst *Alloca = nullptr;
+    const yaml::StringValue &Name = Object.Name;
+    if (!Name.Value.empty()) {
+      Alloca = dyn_cast_or_null<AllocaInst>(
+          F.getValueSymbolTable().lookup(Name.Value));
+      if (!Alloca)
+        return error(Name.SourceRange.Start,
+                     "alloca instruction named '" + Name.Value +
+                         "' isn't defined in the function '" + F.getName() +
+                         "'");
+    }
     if (Object.Type == yaml::MachineStackObject::VariableSized)
-      ObjectIdx =
-          MFI.CreateVariableSizedObject(Object.Alignment, /*Alloca=*/nullptr);
+      ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
     else
       ObjectIdx = MFI.CreateStackObject(
           Object.Size, Object.Alignment,
-          Object.Type == yaml::MachineStackObject::SpillSlot);
+          Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
     MFI.setObjectOffset(ObjectIdx, Object.Offset);
-    // TODO: Store the mapping between object IDs and object indices to parse
-    // stack object references correctly.
+    if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
+             .second)
+      return error(Object.ID.SourceRange.Start,
+                   Twine("redefinition of stack object '%stack.") +
+                       Twine(Object.ID.Value) + "'");
+    if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister,
+                                 ObjectIdx))
+      return true;
+    if (Object.LocalOffset)
+      MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue());
+    if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx))
+      return true;
   }
+  MFI.setCalleeSavedInfo(CSIInfo);
+  if (!CSIInfo.empty())
+    MFI.setCalleeSavedInfoValid(true);
+
+  // Initialize the various stack object references after initializing the
+  // stack objects.
+  if (!YamlMFI.StackProtector.Value.empty()) {
+    SMDiagnostic Error;
+    int FI;
+    if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS,
+                                  IRSlots, Error))
+      return error(Error, YamlMFI.StackProtector.SourceRange);
+    MFI.setStackProtectorIndex(FI);
+  }
+  return false;
+}
+
+bool MIRParserImpl::parseCalleeSavedRegister(
+    MachineFunction &MF, PerFunctionMIParsingState &PFS,
+    std::vector<CalleeSavedInfo> &CSIInfo,
+    const yaml::StringValue &RegisterSource, int FrameIdx) {
+  if (RegisterSource.Value.empty())
+    return false;
+  unsigned Reg = 0;
+  SMDiagnostic Error;
+  if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS,
+                                  IRSlots, Error))
+    return error(Error, RegisterSource.SourceRange);
+  CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx));
+  return false;
+}
+
+/// Verify that given node is of a certain type. Return true on error.
+template <typename T>
+static bool typecheckMDNode(T *&Result, MDNode *Node,
+                            const yaml::StringValue &Source,
+                            StringRef TypeString, MIRParserImpl &Parser) {
+  if (!Node)
+    return false;
+  Result = dyn_cast<T>(Node);
+  if (!Result)
+    return Parser.error(Source.SourceRange.Start,
+                        "expected a reference to a '" + TypeString +
+                            "' metadata node");
+  return false;
+}
+
+bool MIRParserImpl::parseStackObjectsDebugInfo(
+    MachineFunction &MF, PerFunctionMIParsingState &PFS,
+    const yaml::MachineStackObject &Object, int FrameIdx) {
+  // Debug information can only be attached to stack objects; Fixed stack
+  // objects aren't supported.
+  assert(FrameIdx >= 0 && "Expected a stack object frame index");
+  MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
+  if (parseMDNode(Var, Object.DebugVar, MF, PFS) ||
+      parseMDNode(Expr, Object.DebugExpr, MF, PFS) ||
+      parseMDNode(Loc, Object.DebugLoc, MF, PFS))
+    return true;
+  if (!Var && !Expr && !Loc)
+    return false;
+  DILocalVariable *DIVar = nullptr;
+  DIExpression *DIExpr = nullptr;
+  DILocation *DILoc = nullptr;
+  if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
+      typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
+      typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+    return true;
+  MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+  return false;
+}
+
+bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source,
+                                MachineFunction &MF,
+                                const PerFunctionMIParsingState &PFS) {
+  if (Source.Value.empty())
+    return false;
+  SMDiagnostic Error;
+  if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error))
+    return error(Error, Source.SourceRange);
+  return false;
+}
+
+bool MIRParserImpl::initializeConstantPool(
+    MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF,
+    const MachineFunction &MF,
+    DenseMap<unsigned, unsigned> &ConstantPoolSlots) {
+  const auto &M = *MF.getFunction()->getParent();
+  SMDiagnostic Error;
+  for (const auto &YamlConstant : YamlMF.Constants) {
+    const Constant *Value = dyn_cast_or_null<Constant>(
+        parseConstantValue(YamlConstant.Value.Value, Error, M));
+    if (!Value)
+      return error(Error, YamlConstant.Value.SourceRange);
+    unsigned Alignment =
+        YamlConstant.Alignment
+            ? YamlConstant.Alignment
+            : M.getDataLayout().getPrefTypeAlignment(Value->getType());
+    unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment);
+    if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index))
+             .second)
+      return error(YamlConstant.ID.SourceRange.Start,
+                   Twine("redefinition of constant pool item '%const.") +
+                       Twine(YamlConstant.ID.Value) + "'");
+  }
+  return false;
+}
+
+bool MIRParserImpl::initializeJumpTableInfo(
+    MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI,
+    PerFunctionMIParsingState &PFS) {
+  MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind);
+  for (const auto &Entry : YamlJTI.Entries) {
+    std::vector<MachineBasicBlock *> Blocks;
+    for (const auto &MBBSource : Entry.Blocks) {
+      MachineBasicBlock *MBB = nullptr;
+      if (parseMBBReference(MBB, MBBSource.Value, MF, PFS))
+        return true;
+      Blocks.push_back(MBB);
+    }
+    unsigned Index = JTI->createJumpTableIndex(Blocks);
+    if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index))
+             .second)
+      return error(Entry.ID.SourceRange.Start,
+                   Twine("redefinition of jump table entry '%jump-table.") +
+                       Twine(Entry.ID.Value) + "'");
+  }
+  return false;
+}
+
+bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB,
+                                      const yaml::StringValue &Source,
+                                      MachineFunction &MF,
+                                      const PerFunctionMIParsingState &PFS) {
+  SMDiagnostic Error;
+  if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error))
+    return error(Error, Source.SourceRange);
   return false;
 }
 
@@ -430,8 +656,8 @@ SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error,
                        Error.getFixIts());
 }
 
-SMDiagnostic MIRParserImpl::diagFromLLVMAssemblyDiag(const SMDiagnostic &Error,
-                                                     SMRange SourceRange) {
+SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
+                                                    SMRange SourceRange) {
   assert(SourceRange.isValid());
 
   // Translate the location of the error from the location in the llvm IR string
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index d5cf9244199e..175cb0d51437 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -14,13 +14,20 @@
 
 #include "MIRPrinter.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/YAMLTraits.h"
@@ -31,11 +38,38 @@ using namespace llvm;
 
 namespace {
 
+/// This structure describes how to print out stack object references.
+struct FrameIndexOperand {
+  std::string Name;
+  unsigned ID;
+  bool IsFixed;
+
+  FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed)
+      : Name(Name.str()), ID(ID), IsFixed(IsFixed) {}
+
+  /// Return an ordinary stack object reference.
+  static FrameIndexOperand create(StringRef Name, unsigned ID) {
+    return FrameIndexOperand(Name, ID, /*IsFixed=*/false);
+  }
+
+  /// Return a fixed stack object reference.
+  static FrameIndexOperand createFixed(unsigned ID) {
+    return FrameIndexOperand("", ID, /*IsFixed=*/true);
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
 /// This class prints out the machine functions using the MIR serialization
 /// format.
 class MIRPrinter {
   raw_ostream &OS;
   DenseMap<const uint32_t *, unsigned> RegisterMaskIds;
+  /// Maps from stack object indices to operand indices which will be used when
+  /// printing frame index machine operands.
+  DenseMap<int, FrameIndexOperand> StackObjectOperandMapping;
 
 public:
   MIRPrinter(raw_ostream &OS) : OS(OS) {}
@@ -44,11 +78,16 @@ public:
 
   void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo,
                const TargetRegisterInfo *TRI);
-  void convert(yaml::MachineFrameInfo &YamlMFI, const MachineFrameInfo &MFI);
-  void convert(ModuleSlotTracker &MST, yaml::MachineBasicBlock &YamlMBB,
-               const MachineBasicBlock &MBB);
+  void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI,
+               const MachineFrameInfo &MFI);
+  void convert(yaml::MachineFunction &MF,
+               const MachineConstantPool &ConstantPool);
+  void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
+               const MachineJumpTableInfo &JTI);
   void convertStackObjects(yaml::MachineFunction &MF,
-                           const MachineFrameInfo &MFI);
+                           const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
+                           ModuleSlotTracker &MST,
+                           const TargetRegisterInfo *TRI);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -60,18 +99,32 @@ class MIPrinter {
   raw_ostream &OS;
   ModuleSlotTracker &MST;
   const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds;
+  const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping;
 
 public:
   MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST,
-            const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds)
-      : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds) {}
+            const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds,
+            const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping)
+      : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds),
+        StackObjectOperandMapping(StackObjectOperandMapping) {}
+
+  void print(const MachineBasicBlock &MBB);
 
   void print(const MachineInstr &MI);
   void printMBBReference(const MachineBasicBlock &MBB);
-  void print(const MachineOperand &Op, const TargetRegisterInfo *TRI);
+  void printIRBlockReference(const BasicBlock &BB);
+  void printIRValueReference(const Value &V);
+  void printStackObjectReference(int FrameIndex);
+  void printOffset(int64_t Offset);
+  void printTargetFlags(const MachineOperand &Op);
+  void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+             unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false);
+  void print(const MachineMemOperand &Op);
+
+  void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
 };
 
-} // end anonymous namespace
+} // end namespace llvm
 
 namespace llvm {
 namespace yaml {
@@ -103,6 +156,12 @@ static void printReg(unsigned Reg, raw_ostream &OS,
     llvm_unreachable("Can't print this kind of register yet");
 }
 
+static void printReg(unsigned Reg, yaml::StringValue &Dest,
+                     const TargetRegisterInfo *TRI) {
+  raw_string_ostream OS(Dest.Value);
+  printReg(Reg, OS, TRI);
+}
+
 void MIRPrinter::print(const MachineFunction &MF) {
   initRegisterMaskIds(MF);
 
@@ -112,23 +171,25 @@ void MIRPrinter::print(const MachineFunction &MF) {
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
   YamlMF.HasInlineAsm = MF.hasInlineAsm();
   convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
-  convert(YamlMF.FrameInfo, *MF.getFrameInfo());
-  convertStackObjects(YamlMF, *MF.getFrameInfo());
-
-  int I = 0;
   ModuleSlotTracker MST(MF.getFunction()->getParent());
+  MST.incorporateFunction(*MF.getFunction());
+  convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
+  convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
+                      MF.getSubtarget().getRegisterInfo());
+  if (const auto *ConstantPool = MF.getConstantPool())
+    convert(YamlMF, *ConstantPool);
+  if (const auto *JumpTableInfo = MF.getJumpTableInfo())
+    convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+  raw_string_ostream StrOS(YamlMF.Body.Value.Value);
+  bool IsNewlineNeeded = false;
   for (const auto &MBB : MF) {
-    // TODO: Allow printing of non sequentially numbered MBBs.
-    // This is currently needed as the basic block references get their index
-    // from MBB.getNumber(), thus it should be sequential so that the parser can
-    // map back to the correct MBBs when parsing the output.
-    assert(MBB.getNumber() == I++ &&
-           "Can't print MBBs that aren't sequentially numbered");
-    (void)I;
-    yaml::MachineBasicBlock YamlMBB;
-    convert(MST, YamlMBB, MBB);
-    YamlMF.BasicBlocks.push_back(YamlMBB);
+    if (IsNewlineNeeded)
+      StrOS << "\n";
+    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+        .print(MBB);
+    IsNewlineNeeded = true;
   }
+  StrOS.flush();
   yaml::Output Out(OS);
   Out << YamlMF;
 }
@@ -147,11 +208,38 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
     VReg.ID = I;
     VReg.Class =
         StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower();
+    unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
+    if (PreferredReg)
+      printReg(PreferredReg, VReg.PreferredRegister, TRI);
     MF.VirtualRegisters.push_back(VReg);
   }
+
+  // Print the live ins.
+  for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) {
+    yaml::MachineFunctionLiveIn LiveIn;
+    printReg(I->first, LiveIn.Register, TRI);
+    if (I->second)
+      printReg(I->second, LiveIn.VirtualRegister, TRI);
+    MF.LiveIns.push_back(LiveIn);
+  }
+  // The used physical register mask is printed as an inverted callee saved
+  // register mask.
+  const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask();
+  if (UsedPhysRegMask.none())
+    return;
+  std::vector<yaml::FlowStringValue> CalleeSavedRegisters;
+  for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) {
+    if (!UsedPhysRegMask[I]) {
+      yaml::FlowStringValue Reg;
+      printReg(I, Reg, TRI);
+      CalleeSavedRegisters.push_back(Reg);
+    }
+  }
+  MF.CalleeSavedRegisters = CalleeSavedRegisters;
 }
 
-void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
+void MIRPrinter::convert(ModuleSlotTracker &MST,
+                         yaml::MachineFrameInfo &YamlMFI,
                          const MachineFrameInfo &MFI) {
   YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken();
   YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken();
@@ -166,10 +254,23 @@ void MIRPrinter::convert(yaml::MachineFrameInfo &YamlMFI,
   YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment();
   YamlMFI.HasVAStart = MFI.hasVAStart();
   YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc();
+  if (MFI.getSavePoint()) {
+    raw_string_ostream StrOS(YamlMFI.SavePoint.Value);
+    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+        .printMBBReference(*MFI.getSavePoint());
+  }
+  if (MFI.getRestorePoint()) {
+    raw_string_ostream StrOS(YamlMFI.RestorePoint.Value);
+    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+        .printMBBReference(*MFI.getRestorePoint());
+  }
 }
 
 void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
-                                     const MachineFrameInfo &MFI) {
+                                     const MachineFrameInfo &MFI,
+                                     MachineModuleInfo &MMI,
+                                     ModuleSlotTracker &MST,
+                                     const TargetRegisterInfo *TRI) {
   // Process fixed stack objects.
   unsigned ID = 0;
   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -177,7 +278,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
       continue;
 
     yaml::FixedMachineStackObject YamlObject;
-    YamlObject.ID = ID++;
+    YamlObject.ID = ID;
     YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
                           ? yaml::FixedMachineStackObject::SpillSlot
                           : yaml::FixedMachineStackObject::DefaultType;
@@ -187,8 +288,8 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
     YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
     MF.FixedStackObjects.push_back(YamlObject);
-    // TODO: Store the mapping between fixed object IDs and object indices to
-    // print the fixed stack object references correctly.
+    StackObjectOperandMapping.insert(
+        std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
   }
 
   // Process ordinary stack objects.
@@ -198,7 +299,10 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
       continue;
 
     yaml::MachineStackObject YamlObject;
-    YamlObject.ID = ID++;
+    YamlObject.ID = ID;
+    if (const auto *Alloca = MFI.getObjectAllocation(I))
+      YamlObject.Name.Value =
+          Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>";
     YamlObject.Type = MFI.isSpillSlotObjectIndex(I)
                           ? yaml::MachineStackObject::SpillSlot
                           : MFI.isVariableSizedObjectIndex(I)
@@ -209,47 +313,100 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.Alignment = MFI.getObjectAlignment(I);
 
     MF.StackObjects.push_back(YamlObject);
-    // TODO: Store the mapping between object IDs and object indices to print
-    // the stack object references correctly.
+    StackObjectOperandMapping.insert(std::make_pair(
+        I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+  }
+
+  for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+    yaml::StringValue Reg;
+    printReg(CSInfo.getReg(), Reg, TRI);
+    auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx());
+    assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+           "Invalid stack object index");
+    const FrameIndexOperand &StackObject = StackObjectInfo->second;
+    if (StackObject.IsFixed)
+      MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+    else
+      MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+  }
+  for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
+    auto LocalObject = MFI.getLocalFrameObjectMap(I);
+    auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first);
+    assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+           "Invalid stack object index");
+    const FrameIndexOperand &StackObject = StackObjectInfo->second;
+    assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
+    MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+  }
+
+  // Print the stack object references in the frame information class after
+  // converting the stack objects.
+  if (MFI.hasStackProtectorIndex()) {
+    raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+    MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+        .printStackObjectReference(MFI.getStackProtectorIndex());
+  }
+
+  // Print the debug variable information.
+  for (MachineModuleInfo::VariableDbgInfo &DebugVar :
+       MMI.getVariableDbgInfo()) {
+    auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
+    assert(StackObjectInfo != StackObjectOperandMapping.end() &&
+           "Invalid stack object index");
+    const FrameIndexOperand &StackObject = StackObjectInfo->second;
+    assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
+    auto &Object = MF.StackObjects[StackObject.ID];
+    {
+      raw_string_ostream StrOS(Object.DebugVar.Value);
+      DebugVar.Var->printAsOperand(StrOS, MST);
+    }
+    {
+      raw_string_ostream StrOS(Object.DebugExpr.Value);
+      DebugVar.Expr->printAsOperand(StrOS, MST);
+    }
+    {
+      raw_string_ostream StrOS(Object.DebugLoc.Value);
+      DebugVar.Loc->printAsOperand(StrOS, MST);
+    }
+  }
+}
+
+void MIRPrinter::convert(yaml::MachineFunction &MF,
+                         const MachineConstantPool &ConstantPool) {
+  unsigned ID = 0;
+  for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) {
+    // TODO: Serialize target specific constant pool entries.
+    if (Constant.isMachineConstantPoolEntry())
+      llvm_unreachable("Can't print target specific constant pool entries yet");
+
+    yaml::MachineConstantPoolValue YamlConstant;
+    std::string Str;
+    raw_string_ostream StrOS(Str);
+    Constant.Val.ConstVal->printAsOperand(StrOS);
+    YamlConstant.ID = ID++;
+    YamlConstant.Value = StrOS.str();
+    YamlConstant.Alignment = Constant.getAlignment();
+    MF.Constants.push_back(YamlConstant);
   }
 }
 
 void MIRPrinter::convert(ModuleSlotTracker &MST,
-                         yaml::MachineBasicBlock &YamlMBB,
-                         const MachineBasicBlock &MBB) {
-  assert(MBB.getNumber() >= 0 && "Invalid MBB number");
-  YamlMBB.ID = (unsigned)MBB.getNumber();
-  // TODO: Serialize unnamed BB references.
-  if (const auto *BB = MBB.getBasicBlock())
-    YamlMBB.Name.Value = BB->hasName() ? BB->getName() : "<unnamed bb>";
-  else
-    YamlMBB.Name.Value = "";
-  YamlMBB.Alignment = MBB.getAlignment();
-  YamlMBB.AddressTaken = MBB.hasAddressTaken();
-  YamlMBB.IsLandingPad = MBB.isLandingPad();
-  for (const auto *SuccMBB : MBB.successors()) {
+                         yaml::MachineJumpTable &YamlJTI,
+                         const MachineJumpTableInfo &JTI) {
+  YamlJTI.Kind = JTI.getEntryKind();
+  unsigned ID = 0;
+  for (const auto &Table : JTI.getJumpTables()) {
     std::string Str;
-    raw_string_ostream StrOS(Str);
-    MIPrinter(StrOS, MST, RegisterMaskIds).printMBBReference(*SuccMBB);
-    YamlMBB.Successors.push_back(StrOS.str());
-  }
-  // Print the live in registers.
-  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
-  assert(TRI && "Expected target register info");
-  for (auto I = MBB.livein_begin(), E = MBB.livein_end(); I != E; ++I) {
-    std::string Str;
-    raw_string_ostream StrOS(Str);
-    printReg(*I, StrOS, TRI);
-    YamlMBB.LiveIns.push_back(StrOS.str());
-  }
-  // Print the machine instructions.
-  YamlMBB.Instructions.reserve(MBB.size());
-  std::string Str;
-  for (const auto &MI : MBB) {
-    raw_string_ostream StrOS(Str);
-    MIPrinter(StrOS, MST, RegisterMaskIds).print(MI);
-    YamlMBB.Instructions.push_back(StrOS.str());
-    Str.clear();
+    yaml::MachineJumpTable::Entry Entry;
+    Entry.ID = ID++;
+    for (const auto *MBB : Table.MBBs) {
+      raw_string_ostream StrOS(Str);
+      MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
+          .printMBBReference(*MBB);
+      Entry.Blocks.push_back(StrOS.str());
+      Str.clear();
+    }
+    YamlJTI.Entries.push_back(Entry);
   }
 }
 
@@ -260,26 +417,137 @@ void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) {
     RegisterMaskIds.insert(std::make_pair(Mask, I++));
 }
 
+void MIPrinter::print(const MachineBasicBlock &MBB) {
+  assert(MBB.getNumber() >= 0 && "Invalid MBB number");
+  OS << "bb." << MBB.getNumber();
+  bool HasAttributes = false;
+  if (const auto *BB = MBB.getBasicBlock()) {
+    if (BB->hasName()) {
+      OS << "." << BB->getName();
+    } else {
+      HasAttributes = true;
+      OS << " (";
+      int Slot = MST.getLocalSlot(BB);
+      if (Slot == -1)
+        OS << "<ir-block badref>";
+      else
+        OS << (Twine("%ir-block.") + Twine(Slot)).str();
+    }
+  }
+  if (MBB.hasAddressTaken()) {
+    OS << (HasAttributes ? ", " : " (");
+    OS << "address-taken";
+    HasAttributes = true;
+  }
+  if (MBB.isEHPad()) {
+    OS << (HasAttributes ? ", " : " (");
+    OS << "landing-pad";
+    HasAttributes = true;
+  }
+  if (MBB.getAlignment()) {
+    OS << (HasAttributes ? ", " : " (");
+    OS << "align " << MBB.getAlignment();
+    HasAttributes = true;
+  }
+  if (HasAttributes)
+    OS << ")";
+  OS << ":\n";
+
+  bool HasLineAttributes = false;
+  // Print the successors
+  if (!MBB.succ_empty()) {
+    OS.indent(2) << "successors: ";
+    for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) {
+      if (I != MBB.succ_begin())
+        OS << ", ";
+      printMBBReference(**I);
+      if (MBB.hasSuccessorProbabilities())
+        OS << '(' << MBB.getSuccProbability(I) << ')';
+    }
+    OS << "\n";
+    HasLineAttributes = true;
+  }
+
+  // Print the live in registers.
+  const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+  assert(TRI && "Expected target register info");
+  if (!MBB.livein_empty()) {
+    OS.indent(2) << "liveins: ";
+    bool First = true;
+    for (const auto &LI : MBB.liveins()) {
+      if (!First)
+        OS << ", ";
+      First = false;
+      printReg(LI.PhysReg, OS, TRI);
+      if (LI.LaneMask != ~0u)
+        OS << ':' << PrintLaneMask(LI.LaneMask);
+    }
+    OS << "\n";
+    HasLineAttributes = true;
+  }
+
+  if (HasLineAttributes)
+    OS << "\n";
+  bool IsInBundle = false;
+  for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) {
+    const MachineInstr &MI = *I;
+    if (IsInBundle && !MI.isInsideBundle()) {
+      OS.indent(2) << "}\n";
+      IsInBundle = false;
+    }
+    OS.indent(IsInBundle ? 4 : 2);
+    print(MI);
+    if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) {
+      OS << " {";
+      IsInBundle = true;
+    }
+    OS << "\n";
+  }
+  if (IsInBundle)
+    OS.indent(2) << "}\n";
+}
+
+/// Return true when an instruction has tied register that can't be determined
+/// by the instruction's descriptor.
+static bool hasComplexRegisterTies(const MachineInstr &MI) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+    const auto &Operand = MI.getOperand(I);
+    if (!Operand.isReg() || Operand.isDef())
+      // Ignore the defined registers as MCID marks only the uses as tied.
+      continue;
+    int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO);
+    int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1;
+    if (ExpectedTiedIdx != TiedIdx)
+      return true;
+  }
+  return false;
+}
+
 void MIPrinter::print(const MachineInstr &MI) {
   const auto &SubTarget = MI.getParent()->getParent()->getSubtarget();
   const auto *TRI = SubTarget.getRegisterInfo();
   assert(TRI && "Expected target register info");
   const auto *TII = SubTarget.getInstrInfo();
   assert(TII && "Expected target instruction info");
+  if (MI.isCFIInstruction())
+    assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
 
+  bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
   unsigned I = 0, E = MI.getNumOperands();
   for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
          !MI.getOperand(I).isImplicit();
        ++I) {
     if (I)
       OS << ", ";
-    print(MI.getOperand(I), TRI);
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true);
   }
 
   if (I)
     OS << " = ";
+  if (MI.getFlag(MachineInstr::FrameSetup))
+    OS << "frame-setup ";
   OS << TII->getName(MI.getOpcode());
-  // TODO: Print the instruction flags, machine mem operands.
   if (I < E)
     OS << ' ';
 
@@ -287,9 +555,27 @@ void MIPrinter::print(const MachineInstr &MI) {
   for (; I < E; ++I) {
     if (NeedComma)
       OS << ", ";
-    print(MI.getOperand(I), TRI);
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
     NeedComma = true;
   }
+
+  if (MI.getDebugLoc()) {
+    if (NeedComma)
+      OS << ',';
+    OS << " debug-location ";
+    MI.getDebugLoc()->printAsOperand(OS, MST);
+  }
+
+  if (!MI.memoperands_empty()) {
+    OS << " :: ";
+    bool NeedComma = false;
+    for (const auto *Op : MI.memoperands()) {
+      if (NeedComma)
+        OS << ", ";
+      print(*Op);
+      NeedComma = true;
+    }
+  }
 }
 
 void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
@@ -300,32 +586,225 @@ void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) {
   }
 }
 
-void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
+static void printIRSlotNumber(raw_ostream &OS, int Slot) {
+  if (Slot == -1)
+    OS << "<badref>";
+  else
+    OS << Slot;
+}
+
+void MIPrinter::printIRBlockReference(const BasicBlock &BB) {
+  OS << "%ir-block.";
+  if (BB.hasName()) {
+    printLLVMNameWithoutPrefix(OS, BB.getName());
+    return;
+  }
+  const Function *F = BB.getParent();
+  int Slot;
+  if (F == MST.getCurrentFunction()) {
+    Slot = MST.getLocalSlot(&BB);
+  } else {
+    ModuleSlotTracker CustomMST(F->getParent(),
+                                /*ShouldInitializeAllMetadata=*/false);
+    CustomMST.incorporateFunction(*F);
+    Slot = CustomMST.getLocalSlot(&BB);
+  }
+  printIRSlotNumber(OS, Slot);
+}
+
+void MIPrinter::printIRValueReference(const Value &V) {
+  if (isa<GlobalValue>(V)) {
+    V.printAsOperand(OS, /*PrintType=*/false, MST);
+    return;
+  }
+  if (isa<Constant>(V)) {
+    // Machine memory operands can load/store to/from constant value pointers.
+    OS << '`';
+    V.printAsOperand(OS, /*PrintType=*/true, MST);
+    OS << '`';
+    return;
+  }
+  OS << "%ir.";
+  if (V.hasName()) {
+    printLLVMNameWithoutPrefix(OS, V.getName());
+    return;
+  }
+  printIRSlotNumber(OS, MST.getLocalSlot(&V));
+}
+
+void MIPrinter::printStackObjectReference(int FrameIndex) {
+  auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex);
+  assert(ObjectInfo != StackObjectOperandMapping.end() &&
+         "Invalid frame index");
+  const FrameIndexOperand &Operand = ObjectInfo->second;
+  if (Operand.IsFixed) {
+    OS << "%fixed-stack." << Operand.ID;
+    return;
+  }
+  OS << "%stack." << Operand.ID;
+  if (!Operand.Name.empty())
+    OS << '.' << Operand.Name;
+}
+
+void MIPrinter::printOffset(int64_t Offset) {
+  if (Offset == 0)
+    return;
+  if (Offset < 0) {
+    OS << " - " << -Offset;
+    return;
+  }
+  OS << " + " << Offset;
+}
+
+static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) {
+  auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+  for (const auto &I : Flags) {
+    if (I.first == TF) {
+      return I.second;
+    }
+  }
+  return nullptr;
+}
+
+void MIPrinter::printTargetFlags(const MachineOperand &Op) {
+  if (!Op.getTargetFlags())
+    return;
+  const auto *TII =
+      Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo();
+  assert(TII && "expected instruction info");
+  auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags());
+  OS << "target-flags(";
+  const bool HasDirectFlags = Flags.first;
+  const bool HasBitmaskFlags = Flags.second;
+  if (!HasDirectFlags && !HasBitmaskFlags) {
+    OS << "<unknown>) ";
+    return;
+  }
+  if (HasDirectFlags) {
+    if (const auto *Name = getTargetFlagName(TII, Flags.first))
+      OS << Name;
+    else
+      OS << "<unknown target flag>";
+  }
+  if (!HasBitmaskFlags) {
+    OS << ") ";
+    return;
+  }
+  bool IsCommaNeeded = HasDirectFlags;
+  unsigned BitMask = Flags.second;
+  auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags();
+  for (const auto &Mask : BitMasks) {
+    // Check if the flag's bitmask has the bits of the current mask set.
+    if ((BitMask & Mask.first) == Mask.first) {
+      if (IsCommaNeeded)
+        OS << ", ";
+      IsCommaNeeded = true;
+      OS << Mask.second;
+      // Clear the bits which were serialized from the flag's bitmask.
+      BitMask &= ~(Mask.first);
+    }
+  }
+  if (BitMask) {
+    // When the resulting flag's bitmask isn't zero, we know that we didn't
+    // serialize all of the bit flags.
+    if (IsCommaNeeded)
+      OS << ", ";
+    OS << "<unknown bitmask target flag>";
+  }
+  OS << ") ";
+}
+
+static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  assert(TII && "expected instruction info");
+  auto Indices = TII->getSerializableTargetIndices();
+  for (const auto &I : Indices) {
+    if (I.first == Index) {
+      return I.second;
+    }
+  }
+  return nullptr;
+}
+
+void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
+                      unsigned I, bool ShouldPrintRegisterTies, bool IsDef) {
+  printTargetFlags(Op);
   switch (Op.getType()) {
   case MachineOperand::MO_Register:
-    // TODO: Print the other register flags.
     if (Op.isImplicit())
       OS << (Op.isDef() ? "implicit-def " : "implicit ");
+    else if (!IsDef && Op.isDef())
+      // Print the 'def' flag only when the operand is defined after '='.
+      OS << "def ";
+    if (Op.isInternalRead())
+      OS << "internal ";
     if (Op.isDead())
       OS << "dead ";
     if (Op.isKill())
       OS << "killed ";
     if (Op.isUndef())
       OS << "undef ";
+    if (Op.isEarlyClobber())
+      OS << "early-clobber ";
+    if (Op.isDebug())
+      OS << "debug-use ";
     printReg(Op.getReg(), OS, TRI);
     // Print the sub register.
     if (Op.getSubReg() != 0)
       OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+    if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
+      OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
     break;
   case MachineOperand::MO_Immediate:
     OS << Op.getImm();
     break;
+  case MachineOperand::MO_CImmediate:
+    Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+    break;
+  case MachineOperand::MO_FPImmediate:
+    Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST);
+    break;
   case MachineOperand::MO_MachineBasicBlock:
     printMBBReference(*Op.getMBB());
     break;
+  case MachineOperand::MO_FrameIndex:
+    printStackObjectReference(Op.getIndex());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    OS << "%const." << Op.getIndex();
+    printOffset(Op.getOffset());
+    break;
+  case MachineOperand::MO_TargetIndex: {
+    OS << "target-index(";
+    if (const auto *Name = getTargetIndexName(
+            *Op.getParent()->getParent()->getParent(), Op.getIndex()))
+      OS << Name;
+    else
+      OS << "<unknown>";
+    OS << ')';
+    printOffset(Op.getOffset());
+    break;
+  }
+  case MachineOperand::MO_JumpTableIndex:
+    OS << "%jump-table." << Op.getIndex();
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    OS << '$';
+    printLLVMNameWithoutPrefix(OS, Op.getSymbolName());
+    printOffset(Op.getOffset());
+    break;
   case MachineOperand::MO_GlobalAddress:
     Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST);
-    // TODO: Print offset and target flags.
+    printOffset(Op.getOffset());
+    break;
+  case MachineOperand::MO_BlockAddress:
+    OS << "blockaddress(";
+    Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false,
+                                                        MST);
+    OS << ", ";
+    printIRBlockReference(*Op.getBlockAddress()->getBasicBlock());
+    OS << ')';
+    printOffset(Op.getOffset());
     break;
   case MachineOperand::MO_RegisterMask: {
     auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask());
@@ -335,9 +814,157 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI) {
       llvm_unreachable("Can't print this machine register mask yet.");
     break;
   }
+  case MachineOperand::MO_RegisterLiveOut: {
+    const uint32_t *RegMask = Op.getRegLiveOut();
+    OS << "liveout(";
+    bool IsCommaNeeded = false;
+    for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) {
+      if (RegMask[Reg / 32] & (1U << (Reg % 32))) {
+        if (IsCommaNeeded)
+          OS << ", ";
+        printReg(Reg, OS, TRI);
+        IsCommaNeeded = true;
+      }
+    }
+    OS << ")";
+    break;
+  }
+  case MachineOperand::MO_Metadata:
+    Op.getMetadata()->printAsOperand(OS, MST);
+    break;
+  case MachineOperand::MO_MCSymbol:
+    OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
+    break;
+  case MachineOperand::MO_CFIIndex: {
+    const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
+    print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+    break;
+  }
+  }
+}
+
+void MIPrinter::print(const MachineMemOperand &Op) {
+  OS << '(';
+  // TODO: Print operand's target specific flags.
+  if (Op.isVolatile())
+    OS << "volatile ";
+  if (Op.isNonTemporal())
+    OS << "non-temporal ";
+  if (Op.isInvariant())
+    OS << "invariant ";
+  if (Op.isLoad())
+    OS << "load ";
+  else {
+    assert(Op.isStore() && "Non load machine operand must be a store");
+    OS << "store ";
+  }
+  OS << Op.getSize() << (Op.isLoad() ? " from " : " into ");
+  if (const Value *Val = Op.getValue()) {
+    printIRValueReference(*Val);
+  } else {
+    const PseudoSourceValue *PVal = Op.getPseudoValue();
+    assert(PVal && "Expected a pseudo source value");
+    switch (PVal->kind()) {
+    case PseudoSourceValue::Stack:
+      OS << "stack";
+      break;
+    case PseudoSourceValue::GOT:
+      OS << "got";
+      break;
+    case PseudoSourceValue::JumpTable:
+      OS << "jump-table";
+      break;
+    case PseudoSourceValue::ConstantPool:
+      OS << "constant-pool";
+      break;
+    case PseudoSourceValue::FixedStack:
+      printStackObjectReference(
+          cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex());
+      break;
+    case PseudoSourceValue::GlobalValueCallEntry:
+      OS << "call-entry ";
+      cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand(
+          OS, /*PrintType=*/false, MST);
+      break;
+    case PseudoSourceValue::ExternalSymbolCallEntry:
+      OS << "call-entry $";
+      printLLVMNameWithoutPrefix(
+          OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
+      break;
+    }
+  }
+  printOffset(Op.getOffset());
+  if (Op.getBaseAlignment() != Op.getSize())
+    OS << ", align " << Op.getBaseAlignment();
+  auto AAInfo = Op.getAAInfo();
+  if (AAInfo.TBAA) {
+    OS << ", !tbaa ";
+    AAInfo.TBAA->printAsOperand(OS, MST);
+  }
+  if (AAInfo.Scope) {
+    OS << ", !alias.scope ";
+    AAInfo.Scope->printAsOperand(OS, MST);
+  }
+  if (AAInfo.NoAlias) {
+    OS << ", !noalias ";
+    AAInfo.NoAlias->printAsOperand(OS, MST);
+  }
+  if (Op.getRanges()) {
+    OS << ", !range ";
+    Op.getRanges()->printAsOperand(OS, MST);
+  }
+  OS << ')';
+}
+
+static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
+                             const TargetRegisterInfo *TRI) {
+  int Reg = TRI->getLLVMRegNum(DwarfReg, true);
+  if (Reg == -1) {
+    OS << "<badreg>";
+    return;
+  }
+  printReg(Reg, OS, TRI);
+}
+
+void MIPrinter::print(const MCCFIInstruction &CFI,
+                      const TargetRegisterInfo *TRI) {
+  switch (CFI.getOperation()) {
+  case MCCFIInstruction::OpSameValue:
+    OS << ".cfi_same_value ";
+    if (CFI.getLabel())
+      OS << "<mcsymbol> ";
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    break;
+  case MCCFIInstruction::OpOffset:
+    OS << ".cfi_offset ";
+    if (CFI.getLabel())
+      OS << "<mcsymbol> ";
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    OS << ", " << CFI.getOffset();
+    break;
+  case MCCFIInstruction::OpDefCfaRegister:
+    OS << ".cfi_def_cfa_register ";
+    if (CFI.getLabel())
+      OS << "<mcsymbol> ";
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    break;
+  case MCCFIInstruction::OpDefCfaOffset:
+    OS << ".cfi_def_cfa_offset ";
+    if (CFI.getLabel())
+      OS << "<mcsymbol> ";
+    OS << CFI.getOffset();
+    break;
+  case MCCFIInstruction::OpDefCfa:
+    OS << ".cfi_def_cfa ";
+    if (CFI.getLabel())
+      OS << "<mcsymbol> ";
+    printCFIRegister(CFI.getRegister(), OS, TRI);
+    OS << ", " << CFI.getOffset();
+    break;
   default:
-    // TODO: Print the other machine operands.
-    llvm_unreachable("Can't print this machine operand at the moment");
+    // TODO: Print the other CFI Operations.
+    OS << "<unserializable cfi operation>";
+    break;
   }
 }
 
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 13d61e65d7e0..8e7566a4e46b 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -40,7 +40,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  virtual bool runOnMachineFunction(MachineFunction &MF) override {
+  bool runOnMachineFunction(MachineFunction &MF) override {
     std::string Str;
     raw_string_ostream StrOS(Str);
     printMIR(StrOS, MF);
@@ -48,7 +48,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
     return false;
   }
 
-  virtual bool doFinalization(Module &M) override {
+  bool doFinalization(Module &M) override {
     printMIR(OS, M);
     OS << MachineFunctions;
     return false;
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 5d3f7ebaed29..76099f28499b 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -38,22 +39,21 @@ using namespace llvm;
 
 #define DEBUG_TYPE "codegen"
 
-MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
-  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
-    AddressTaken(false), CachedMCSymbol(nullptr) {
+MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
+    : BB(B), Number(-1), xParent(&MF) {
   Insts.Parent = this;
 }
 
 MachineBasicBlock::~MachineBasicBlock() {
 }
 
-/// getSymbol - Return the MCSymbol for this basic block.
-///
+/// Return the MCSymbol for this basic block.
 MCSymbol *MachineBasicBlock::getSymbol() const {
   if (!CachedMCSymbol) {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
     const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+    assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
     CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
                                            "_" + Twine(getNumber()));
@@ -68,9 +68,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   return OS;
 }
 
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
-/// parent pointer of the MBB, the MBB numbering, and any instructions in the
-/// MBB to be on the right operand list for registers.
+/// When an MBB is added to an MF, we need to update the parent pointer of the
+/// MBB, the MBB numbering, and any instructions in the MBB to be on the right
+/// operand list for registers.
 ///
 /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
 /// gets the next available unique MBB number. If it is removed from a
@@ -91,10 +91,8 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
   N->Number = -1;
 }
 
-
-/// addNodeToList (MI) - When we add an instruction to a basic block
-/// list, we update its parent pointer and add its operands from reg use/def
-/// lists if appropriate.
+/// When we add an instruction to a basic block list, we update its parent
+/// pointer and add its operands from reg use/def lists if appropriate.
 void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(!N->getParent() && "machine instruction already in a basic block");
   N->setParent(Parent);
@@ -105,9 +103,8 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   N->AddRegOperandsToUseLists(MF->getRegInfo());
 }
 
-/// removeNodeFromList (MI) - When we remove an instruction from a basic block
-/// list, we update its parent pointer and remove its operands from reg use/def
-/// lists if appropriate.
+/// When we remove an instruction from a basic block list, we update its parent
+/// pointer and remove its operands from reg use/def lists if appropriate.
 void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   assert(N->getParent() && "machine instruction not in a basic block");
 
@@ -118,23 +115,22 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
   N->setParent(nullptr);
 }
 
-/// transferNodesFromList (MI) - When moving a range of instructions from one
-/// MBB list to another, we need to update the parent pointers and the use/def
-/// lists.
+/// When moving a range of instructions from one MBB list to another, we need to
+/// update the parent pointers and the use/def lists.
 void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &fromList,
-                      ilist_iterator<MachineInstr> first,
-                      ilist_iterator<MachineInstr> last) {
-  assert(Parent->getParent() == fromList.Parent->getParent() &&
+transferNodesFromList(ilist_traits<MachineInstr> &FromList,
+                      ilist_iterator<MachineInstr> First,
+                      ilist_iterator<MachineInstr> Last) {
+  assert(Parent->getParent() == FromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
 
   // Splice within the same MBB -> no change.
-  if (Parent == fromList.Parent) return;
+  if (Parent == FromList.Parent) return;
 
   // If splicing between two blocks within the same function, just update the
   // parent pointers.
-  for (; first != last; ++first)
-    first->setParent(Parent);
+  for (; First != Last; ++First)
+    First->setParent(Parent);
 }
 
 void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
@@ -208,11 +204,18 @@ const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
   if (succ_size() > 2)
     return nullptr;
   for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
-    if ((*I)->isLandingPad())
+    if ((*I)->isEHPad())
       return *I;
   return nullptr;
 }
 
+bool MachineBasicBlock::hasEHPadSuccessor() const {
+  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+    if ((*I)->isEHPad())
+      return true;
+  return false;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineBasicBlock::dump() const {
   print(dbgs());
@@ -271,7 +274,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     LBB->printAsOperand(OS, /*PrintType=*/false, MST);
     Comma = ", ";
   }
-  if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+  if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
   if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
   if (Alignment)
     OS << Comma << "Align " << Alignment << " (" << (1u << Alignment)
@@ -283,8 +286,11 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
   if (!livein_empty()) {
     if (Indexes) OS << '\t';
     OS << "    Live Ins:";
-    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
-      OS << ' ' << PrintReg(*I, TRI);
+    for (const auto &LI : make_range(livein_begin(), livein_end())) {
+      OS << ' ' << PrintReg(LI.PhysReg, TRI);
+      if (LI.LaneMask != ~0u)
+        OS << ':' << PrintLaneMask(LI.LaneMask);
+    }
     OS << '\n';
   }
   // Print the preds of this block according to the CFG.
@@ -298,8 +304,8 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
 
   for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) {
     if (Indexes) {
-      if (Indexes->hasIndex(I))
-        OS << Indexes->getInstructionIndex(I);
+      if (Indexes->hasIndex(&*I))
+        OS << Indexes->getInstructionIndex(&*I);
       OS << '\t';
     }
     OS << '\t';
@@ -314,35 +320,63 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "    Successors according to CFG:";
     for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) {
       OS << " BB#" << (*SI)->getNumber();
-      if (!Weights.empty())
-        OS << '(' << *getWeightIterator(SI) << ')';
+      if (!Probs.empty())
+        OS << '(' << *getProbabilityIterator(SI) << ')';
     }
     OS << '\n';
   }
 }
 
-void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const {
+void MachineBasicBlock::printAsOperand(raw_ostream &OS,
+                                       bool /*PrintType*/) const {
   OS << "BB#" << getNumber();
 }
 
-void MachineBasicBlock::removeLiveIn(unsigned Reg) {
-  std::vector<unsigned>::iterator I =
-    std::find(LiveIns.begin(), LiveIns.end(), Reg);
-  if (I != LiveIns.end())
+void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
+  LiveInVector::iterator I = std::find_if(
+      LiveIns.begin(), LiveIns.end(),
+      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  if (I == LiveIns.end())
+    return;
+
+  I->LaneMask &= ~LaneMask;
+  if (I->LaneMask == 0)
     LiveIns.erase(I);
 }
 
-bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
-  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
-  return I != livein_end();
+bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
+  livein_iterator I = std::find_if(
+      LiveIns.begin(), LiveIns.end(),
+      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+}
+
+void MachineBasicBlock::sortUniqueLiveIns() {
+  std::sort(LiveIns.begin(), LiveIns.end(),
+            [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) {
+              return LI0.PhysReg < LI1.PhysReg;
+            });
+  // Liveins are sorted by physreg now we can merge their lanemasks.
+  LiveInVector::const_iterator I = LiveIns.begin();
+  LiveInVector::const_iterator J;
+  LiveInVector::iterator Out = LiveIns.begin();
+  for (; I != LiveIns.end(); ++Out, I = J) {
+    unsigned PhysReg = I->PhysReg;
+    LaneBitmask LaneMask = I->LaneMask;
+    for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J)
+      LaneMask |= J->LaneMask;
+    Out->PhysReg = PhysReg;
+    Out->LaneMask = LaneMask;
+  }
+  LiveIns.erase(Out, LiveIns.end());
 }
 
 unsigned
-MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
+MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
   assert(getParent() && "MBB must be inserted in function");
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
   assert(RC && "Register class is required");
-  assert((isLandingPad() || this == &getParent()->front()) &&
+  assert((isEHPad() || this == &getParent()->front()) &&
          "Only the entry block and landing pads can have physreg live ins");
 
   bool LiveIn = isLiveIn(PhysReg);
@@ -370,12 +404,11 @@ MachineBasicBlock::addLiveIn(unsigned PhysReg, const TargetRegisterClass *RC) {
 }
 
 void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
-  getParent()->splice(NewAfter, this);
+  getParent()->splice(NewAfter->getIterator(), getIterator());
 }
 
 void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
-  MachineFunction::iterator BBI = NewBefore;
-  getParent()->splice(++BBI, this);
+  getParent()->splice(++NewBefore->getIterator(), getIterator());
 }
 
 void MachineBasicBlock::updateTerminator() {
@@ -385,7 +418,7 @@ void MachineBasicBlock::updateTerminator() {
 
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 4> Cond;
-  DebugLoc dl;  // FIXME: this is nowhere
+  DebugLoc DL;  // FIXME: this is nowhere
   bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
   (void) B;
   assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -400,7 +433,7 @@ void MachineBasicBlock::updateTerminator() {
       // its layout successor, insert a branch. First we have to locate the
       // only non-landing-pad successor, as that is the fallthrough block.
       for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
-        if ((*SI)->isLandingPad())
+        if ((*SI)->isEHPad())
           continue;
         assert(!TBB && "Found more than one non-landing-pad successor!");
         TBB = *SI;
@@ -414,7 +447,7 @@ void MachineBasicBlock::updateTerminator() {
       // Finally update the unconditional successor to be reached via a branch
       // if it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
-        TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
     }
   } else {
     if (FBB) {
@@ -425,10 +458,10 @@ void MachineBasicBlock::updateTerminator() {
         if (TII->ReverseBranchCondition(Cond))
           return;
         TII->RemoveBranch(*this);
-        TII->InsertBranch(*this, FBB, nullptr, Cond, dl);
+        TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
       } else if (isLayoutSuccessor(FBB)) {
         TII->RemoveBranch(*this);
-        TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
       }
     } else {
       // Walk through the successors and find the successor which is not
@@ -436,7 +469,7 @@ void MachineBasicBlock::updateTerminator() {
       // as the fallthrough successor.
       MachineBasicBlock *FallthroughBB = nullptr;
       for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
-        if ((*SI)->isLandingPad() || *SI == TBB)
+        if ((*SI)->isEHPad() || *SI == TBB)
           continue;
         assert(!FallthroughBB && "Found more than one fallthrough successor.");
         FallthroughBB = *SI;
@@ -445,14 +478,14 @@ void MachineBasicBlock::updateTerminator() {
         // We fallthrough to the same basic block as the conditional jump
         // targets. Remove the conditional jump, leaving unconditional
         // fallthrough.
-        // FIXME: This does not seem like a reasonable pattern to support, but it
-        // has been seen in the wild coming out of degenerate ARM test cases.
+        // FIXME: This does not seem like a reasonable pattern to support, but
+        // it has been seen in the wild coming out of degenerate ARM test cases.
         TII->RemoveBranch(*this);
 
         // Finally update the unconditional successor to be reached via a branch
         // if it would not be reached by fallthrough.
         if (!isLayoutSuccessor(TBB))
-          TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
+          TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
         return;
       }
 
@@ -461,55 +494,69 @@ void MachineBasicBlock::updateTerminator() {
         if (TII->ReverseBranchCondition(Cond)) {
           // We can't reverse the condition, add an unconditional branch.
           Cond.clear();
-          TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+          TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
           return;
         }
         TII->RemoveBranch(*this);
-        TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
+        TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
       } else if (!isLayoutSuccessor(FallthroughBB)) {
         TII->RemoveBranch(*this);
-        TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
+        TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
       }
     }
   }
 }
 
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+void MachineBasicBlock::validateSuccProbs() const {
+#ifndef NDEBUG
+  int64_t Sum = 0;
+  for (auto Prob : Probs)
+    Sum += Prob.getNumerator();
+  // Due to precision issue, we assume that the sum of probabilities is one if
+  // the difference between the sum of their numerators and the denominator is
+  // no greater than the number of successors.
+  assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <=
+             Probs.size() &&
+         "The sum of successors's probabilities exceeds one.");
+#endif // NDEBUG
+}
 
-  // If we see non-zero value for the first time it means we actually use Weight
-  // list, so we fill all Weights with 0's.
-  if (weight != 0 && Weights.empty())
-    Weights.resize(Successors.size());
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ,
+                                     BranchProbability Prob) {
+  // Probability list is either empty (if successor list isn't empty, this means
+  // disabled optimization) or has the same size as successor list.
+  if (!(Probs.empty() && !Successors.empty()))
+    Probs.push_back(Prob);
+  Successors.push_back(Succ);
+  Succ->addPredecessor(this);
+}
 
-  if (weight != 0 || !Weights.empty())
-    Weights.push_back(weight);
+void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
+  // We need to make sure probability list is either empty or has the same size
+  // of successor list. When this function is called, we can safely delete all
+  // probability in the list.
+  Probs.clear();
+  Successors.push_back(Succ);
+  Succ->addPredecessor(this);
+}
 
-   Successors.push_back(succ);
-   succ->addPredecessor(this);
- }
-
-void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
-  succ->removePredecessor(this);
-  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
-  assert(I != Successors.end() && "Not a current successor!");
-
-  // If Weight list is empty it means we don't use it (disabled optimization).
-  if (!Weights.empty()) {
-    weight_iterator WI = getWeightIterator(I);
-    Weights.erase(WI);
-  }
-
-  Successors.erase(I);
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
+                                        bool NormalizeSuccProbs) {
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+  removeSuccessor(I, NormalizeSuccProbs);
 }
 
 MachineBasicBlock::succ_iterator
-MachineBasicBlock::removeSuccessor(succ_iterator I) {
+MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) {
   assert(I != Successors.end() && "Not a current successor!");
 
-  // If Weight list is empty it means we don't use it (disabled optimization).
-  if (!Weights.empty()) {
-    weight_iterator WI = getWeightIterator(I);
-    Weights.erase(WI);
+  // If probability list is empty it means we don't use it (disabled
+  // optimization).
+  if (!Probs.empty()) {
+    probability_iterator WI = getProbabilityIterator(I);
+    Probs.erase(WI);
+    if (NormalizeSuccProbs)
+      normalizeSuccProbs();
   }
 
   (*I)->removePredecessor(this);
@@ -537,74 +584,77 @@ void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
     }
   }
   assert(OldI != E && "Old is not a successor of this block");
-  Old->removePredecessor(this);
 
   // If New isn't already a successor, let it take Old's place.
   if (NewI == E) {
+    Old->removePredecessor(this);
     New->addPredecessor(this);
     *OldI = New;
     return;
   }
 
   // New is already a successor.
-  // Update its weight instead of adding a duplicate edge.
-  if (!Weights.empty()) {
-    weight_iterator OldWI = getWeightIterator(OldI);
-    *getWeightIterator(NewI) += *OldWI;
-    Weights.erase(OldWI);
+  // Update its probability instead of adding a duplicate edge.
+  if (!Probs.empty()) {
+    auto ProbIter = getProbabilityIterator(NewI);
+    if (!ProbIter->isUnknown())
+      *ProbIter += *getProbabilityIterator(OldI);
   }
-  Successors.erase(OldI);
+  removeSuccessor(OldI);
 }
 
-void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
-  Predecessors.push_back(pred);
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
+  Predecessors.push_back(Pred);
 }
 
-void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
-  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred);
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
+  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
   assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
   Predecessors.erase(I);
 }
 
-void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
-  if (this == fromMBB)
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
+  if (this == FromMBB)
     return;
 
-  while (!fromMBB->succ_empty()) {
-    MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    uint32_t Weight = 0;
+  while (!FromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *FromMBB->succ_begin();
 
-    // If Weight list is empty it means we don't use it (disabled optimization).
-    if (!fromMBB->Weights.empty())
-      Weight = *fromMBB->Weights.begin();
+    // If probability list is empty it means we don't use it (disabled optimization).
+    if (!FromMBB->Probs.empty()) {
+      auto Prob = *FromMBB->Probs.begin();
+      addSuccessor(Succ, Prob);
+    } else
+      addSuccessorWithoutProb(Succ);
 
-    addSuccessor(Succ, Weight);
-    fromMBB->removeSuccessor(Succ);
+    FromMBB->removeSuccessor(Succ);
   }
 }
 
 void
-MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
-  if (this == fromMBB)
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
+  if (this == FromMBB)
     return;
 
-  while (!fromMBB->succ_empty()) {
-    MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    uint32_t Weight = 0;
-    if (!fromMBB->Weights.empty())
-      Weight = *fromMBB->Weights.begin();
-    addSuccessor(Succ, Weight);
-    fromMBB->removeSuccessor(Succ);
+  while (!FromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *FromMBB->succ_begin();
+    if (!FromMBB->Probs.empty()) {
+      auto Prob = *FromMBB->Probs.begin();
+      addSuccessor(Succ, Prob);
+    } else
+      addSuccessorWithoutProb(Succ);
+    FromMBB->removeSuccessor(Succ);
 
     // Fix up any PHI nodes in the successor.
     for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
            ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
       for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
         MachineOperand &MO = MI->getOperand(i);
-        if (MO.getMBB() == fromMBB)
+        if (MO.getMBB() == FromMBB)
           MO.setMBB(this);
       }
   }
+  normalizeSuccProbs();
 }
 
 bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
@@ -621,14 +671,14 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
 }
 
 bool MachineBasicBlock::canFallThrough() {
-  MachineFunction::iterator Fallthrough = this;
+  MachineFunction::iterator Fallthrough = getIterator();
   ++Fallthrough;
   // If FallthroughBlock is off the end of the function, it can't fall through.
   if (Fallthrough == getParent()->end())
     return false;
 
   // If FallthroughBlock isn't a successor, no fallthrough is possible.
-  if (!isSuccessor(Fallthrough))
+  if (!isSuccessor(&*Fallthrough))
     return false;
 
   // Analyze the branches, if any, at the end of the block.
@@ -666,11 +716,11 @@ MachineBasicBlock *
 MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   // Splitting the critical edge to a landing pad block is non-trivial. Don't do
   // it in this generic function.
-  if (Succ->isLandingPad())
+  if (Succ->isEHPad())
     return nullptr;
 
   MachineFunction *MF = getParent();
-  DebugLoc dl;  // FIXME: this is nowhere
+  DebugLoc DL;  // FIXME: this is nowhere
 
   // Performance might be harmed on HW that implements branching using exec mask
   // where both sides of the branches are always executed.
@@ -719,7 +769,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   if (LV)
     for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
          I != E; ++I) {
-      MachineInstr *MI = I;
+      MachineInstr *MI = &*I;
       for (MachineInstr::mop_iterator OI = MI->operands_begin(),
            OE = MI->operands_end(); OI != OE; ++OI) {
         if (!OI->isReg() || OI->getReg() == 0 ||
@@ -739,7 +789,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   if (LIS) {
     for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
          I != E; ++I) {
-      MachineInstr *MI = I;
+      MachineInstr *MI = &*I;
 
       for (MachineInstr::mop_iterator OI = MI->operands_begin(),
            OE = MI->operands_end(); OI != OE; ++OI) {
@@ -761,7 +811,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   if (Indexes) {
     for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
          I != E; ++I)
-      Terminators.push_back(I);
+      Terminators.push_back(&*I);
   }
 
   updateTerminator();
@@ -770,7 +820,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
     SmallVector<MachineInstr*, 4> NewTerminators;
     for (instr_iterator I = getFirstInstrTerminator(), E = instr_end();
          I != E; ++I)
-      NewTerminators.push_back(I);
+      NewTerminators.push_back(&*I);
 
     for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
         E = Terminators.end(); I != E; ++I) {
@@ -784,17 +834,16 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   NMBB->addSuccessor(Succ);
   if (!NMBB->isLayoutSuccessor(Succ)) {
     Cond.clear();
-    MF->getSubtarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond,
-                                                    dl);
+    TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
 
     if (Indexes) {
       for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
            I != E; ++I) {
         // Some instructions may have been moved to NMBB by updateTerminator(),
         // so we first remove any instruction that already has an index.
-        if (Indexes->hasIndex(I))
-          Indexes->removeMachineInstrFromMaps(I);
-        Indexes->insertMachineInstrInMaps(I);
+        if (Indexes->hasIndex(&*I))
+          Indexes->removeMachineInstrFromMaps(&*I);
+        Indexes->insertMachineInstrInMaps(&*I);
       }
     }
   }
@@ -808,9 +857,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
         i->getOperand(ni+1).setMBB(NMBB);
 
   // Inherit live-ins from the successor
-  for (MachineBasicBlock::livein_iterator I = Succ->livein_begin(),
-         E = Succ->livein_end(); I != E; ++I)
-    NMBB->addLiveIn(*I);
+  for (const auto &LI : Succ->liveins())
+    NMBB->addLiveIn(LI);
 
   // Update LiveVariables.
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -822,7 +870,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
         if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
           continue;
         if (TargetRegisterInfo::isVirtualRegister(Reg))
-          LV->getVarInfo(Reg).Kills.push_back(I);
+          LV->getVarInfo(Reg).Kills.push_back(&*I);
         DEBUG(dbgs() << "Restored terminator kill: " << *I);
         break;
       }
@@ -834,10 +882,10 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
   if (LIS) {
     // After splitting the edge and updating SlotIndexes, live intervals may be
     // in one of two situations, depending on whether this block was the last in
-    // the function. If the original block was the last in the function, all live
-    // intervals will end prior to the beginning of the new split block. If the
-    // original block was not at the end of the function, all live intervals will
-    // extend to the end of the new split block.
+    // the function. If the original block was the last in the function, all
+    // live intervals will end prior to the beginning of the new split block. If
+    // the original block was not at the end of the function, all live intervals
+    // will extend to the end of the new split block.
 
     bool isLastMBB =
       std::next(MachineFunction::iterator(NMBB)) == getParent()->end();
@@ -861,7 +909,8 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
 
           LiveInterval &LI = LIS->getInterval(Reg);
           VNInfo *VNI = LI.getVNInfoAt(PrevIndex);
-          assert(VNI && "PHI sources should be live out of their predecessors.");
+          assert(VNI &&
+                 "PHI sources should be live out of their predecessors.");
           LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI));
         }
       }
@@ -941,7 +990,7 @@ static void unbundleSingleMI(MachineInstr *MI) {
 
 MachineBasicBlock::instr_iterator
 MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) {
-  unbundleSingleMI(I);
+  unbundleSingleMI(&*I);
   return Insts.erase(I);
 }
 
@@ -964,25 +1013,22 @@ MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) {
   return Insts.insert(I, MI);
 }
 
-/// removeFromParent - This method unlinks 'this' from the containing function,
-/// and returns it, but does not delete it.
+/// This method unlinks 'this' from the containing function, and returns it, but
+/// does not delete it.
 MachineBasicBlock *MachineBasicBlock::removeFromParent() {
   assert(getParent() && "Not embedded in a function!");
   getParent()->remove(this);
   return this;
 }
 
-
-/// eraseFromParent - This method unlinks 'this' from the containing function,
-/// and deletes it.
+/// This method unlinks 'this' from the containing function, and deletes it.
 void MachineBasicBlock::eraseFromParent() {
   assert(getParent() && "Not embedded in a function!");
   getParent()->erase(this);
 }
 
-
-/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
-/// 'Old', change the code and CFG so that it branches to 'New' instead.
+/// Given a machine basic block that branched to 'Old', change the code and CFG
+/// so that it branches to 'New' instead.
 void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
                                                MachineBasicBlock *New) {
   assert(Old != New && "Cannot replace self with self!");
@@ -1004,46 +1050,44 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
   replaceSuccessor(Old, New);
 }
 
-/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
-/// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
-/// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
-/// null.
+/// Various pieces of code can cause excess edges in the CFG to be inserted.  If
+/// we have proven that MBB can only branch to DestA and DestB, remove any other
+/// MBB successors from the CFG.  DestA and DestB can be null.
 ///
 /// Besides DestA and DestB, retain other edges leading to LandingPads
 /// (currently there can be only one; we don't check or require that here).
 /// Note it is possible that DestA and/or DestB are LandingPads.
 bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
                                              MachineBasicBlock *DestB,
-                                             bool isCond) {
+                                             bool IsCond) {
   // The values of DestA and DestB frequently come from a call to the
   // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
   // values from there.
   //
   // 1. If both DestA and DestB are null, then the block ends with no branches
   //    (it falls through to its successor).
-  // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+  // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends
   //    with only an unconditional branch.
-  // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+  // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends
   //    with a conditional branch that falls through to a successor (DestB).
-  // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+  // 4. If DestA and DestB is set and IsCond is true, then the block ends with a
   //    conditional branch followed by an unconditional branch. DestA is the
   //    'true' destination and DestB is the 'false' destination.
 
   bool Changed = false;
 
-  MachineFunction::iterator FallThru =
-    std::next(MachineFunction::iterator(this));
+  MachineFunction::iterator FallThru = std::next(getIterator());
 
   if (!DestA && !DestB) {
     // Block falls through to successor.
-    DestA = FallThru;
-    DestB = FallThru;
+    DestA = &*FallThru;
+    DestB = &*FallThru;
   } else if (DestA && !DestB) {
-    if (isCond)
+    if (IsCond)
       // Block ends in conditional jump that falls through to successor.
-      DestB = FallThru;
+      DestB = &*FallThru;
   } else {
-    assert(DestA && DestB && isCond &&
+    assert(DestA && DestB && IsCond &&
            "CFG in a bad state. Cannot correct CFG edges");
   }
 
@@ -1054,7 +1098,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
   while (SI != succ_end()) {
     const MachineBasicBlock *MBB = *SI;
     if (!SeenMBBs.insert(MBB).second ||
-        (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+        (MBB != DestA && MBB != DestB && !MBB->isEHPad())) {
       // This is a superfluous edge, remove it.
       SI = removeSuccessor(SI);
       Changed = true;
@@ -1063,11 +1107,13 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
     }
   }
 
+  if (Changed)
+    normalizeSuccProbs();
   return Changed;
 }
 
-/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
-/// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
+/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// instructions.  Return UnknownLoc if there is none.
 DebugLoc
 MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
   DebugLoc DL;
@@ -1083,40 +1129,55 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
   return DL;
 }
 
-/// getSuccWeight - Return weight of the edge from this block to MBB.
-///
-uint32_t MachineBasicBlock::getSuccWeight(const_succ_iterator Succ) const {
-  if (Weights.empty())
-    return 0;
+/// Return probability of the edge from this block to MBB.
+BranchProbability
+MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const {
+  if (Probs.empty())
+    return BranchProbability(1, succ_size());
 
-  return *getWeightIterator(Succ);
+  const auto &Prob = *getProbabilityIterator(Succ);
+  if (Prob.isUnknown()) {
+    // For unknown probabilities, collect the sum of all known ones, and evenly
+    // ditribute the complemental of the sum to each unknown probability.
+    unsigned KnownProbNum = 0;
+    auto Sum = BranchProbability::getZero();
+    for (auto &P : Probs) {
+      if (!P.isUnknown()) {
+        Sum += P;
+        KnownProbNum++;
+      }
+    }
+    return Sum.getCompl() / (Probs.size() - KnownProbNum);
+  } else
+    return Prob;
 }
 
-/// Set successor weight of a given iterator.
-void MachineBasicBlock::setSuccWeight(succ_iterator I, uint32_t weight) {
-  if (Weights.empty())
+/// Set successor probability of a given iterator.
+void MachineBasicBlock::setSuccProbability(succ_iterator I,
+                                           BranchProbability Prob) {
+  assert(!Prob.isUnknown());
+  if (Probs.empty())
     return;
-  *getWeightIterator(I) = weight;
+  *getProbabilityIterator(I) = Prob;
 }
 
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::succ_iterator I) {
-  assert(Weights.size() == Successors.size() && "Async weight list!");
-  size_t index = std::distance(Successors.begin(), I);
-  assert(index < Weights.size() && "Not a current successor!");
-  return Weights.begin() + index;
-}
-
-/// getWeightIterator - Return wight iterator corresonding to the I successor
-/// iterator
-MachineBasicBlock::const_weight_iterator MachineBasicBlock::
-getWeightIterator(MachineBasicBlock::const_succ_iterator I) const {
-  assert(Weights.size() == Successors.size() && "Async weight list!");
+/// Return probability iterator corresonding to the I successor iterator
+MachineBasicBlock::const_probability_iterator
+MachineBasicBlock::getProbabilityIterator(
+    MachineBasicBlock::const_succ_iterator I) const {
+  assert(Probs.size() == Successors.size() && "Async probability list!");
   const size_t index = std::distance(Successors.begin(), I);
-  assert(index < Weights.size() && "Not a current successor!");
-  return Weights.begin() + index;
+  assert(index < Probs.size() && "Not a current successor!");
+  return Probs.begin() + index;
+}
+
+/// Return probability iterator corresonding to the I successor iterator.
+MachineBasicBlock::probability_iterator
+MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
+  assert(Probs.size() == Successors.size() && "Async probability list!");
+  const size_t index = std::distance(Successors.begin(), I);
+  assert(index < Probs.size() && "Not a current successor!");
+  return Probs.begin() + index;
 }
 
 /// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
@@ -1138,33 +1199,33 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
     do {
       --I;
 
-      MachineOperandIteratorBase::PhysRegInfo Analysis =
+      MachineOperandIteratorBase::PhysRegInfo Info =
         ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
-      if (Analysis.Defines)
-        // Outputs happen after inputs so they take precedence if both are
-        // present.
-        return Analysis.DefinesDead ? LQR_Dead : LQR_Live;
+      // Defs happen after uses so they take precedence if both are present.
 
-      if (Analysis.Kills || Analysis.Clobbers)
-        // Register killed, so isn't live.
+      // Register is dead after a dead def of the full register.
+      if (Info.DeadDef)
         return LQR_Dead;
-
-      else if (Analysis.ReadsOverlap)
-        // Defined or read without a previous kill - live.
-        return Analysis.Reads ? LQR_Live : LQR_OverlappingLive;
-
+      // Register is (at least partially) live after a def.
+      if (Info.Defined)
+        return LQR_Live;
+      // Register is dead after a full kill or clobber and no def.
+      if (Info.Killed || Info.Clobbered)
+        return LQR_Dead;
+      // Register must be live if we read it.
+      if (Info.Read)
+        return LQR_Live;
     } while (I != begin() && --N > 0);
   }
 
   // Did we get to the start of the block?
   if (I == begin()) {
     // If so, the register's state is definitely defined by the live-in state.
-    for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true);
-         RAI.isValid(); ++RAI) {
+    for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid();
+         ++RAI)
       if (isLiveIn(*RAI))
-        return (*RAI == Reg) ? LQR_Live : LQR_OverlappingLive;
-    }
+        return LQR_Live;
 
     return LQR_Dead;
   }
@@ -1176,16 +1237,14 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
   // If this is the last insn in the block, don't search forwards.
   if (I != end()) {
     for (++I; I != end() && N > 0; ++I, --N) {
-      MachineOperandIteratorBase::PhysRegInfo Analysis =
+      MachineOperandIteratorBase::PhysRegInfo Info =
         ConstMIOperands(I).analyzePhysReg(Reg, TRI);
 
-      if (Analysis.ReadsOverlap)
-        // Used, therefore must have been live.
-        return (Analysis.Reads) ?
-          LQR_Live : LQR_OverlappingLive;
-
-      else if (Analysis.Clobbers || Analysis.Defines)
-        // Defined (but not read) therefore cannot have been live.
+      // Register is live when we read it here.
+      if (Info.Read)
+        return LQR_Live;
+      // Register is dead if we can fully overwrite or clobber it here.
+      if (Info.FullyDefined || Info.Clobbered)
         return LQR_Dead;
     }
   }
@@ -1193,3 +1252,17 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI,
   // At this point we have no idea of the liveness of the register.
   return LQR_Unknown;
 }
+
+const uint32_t *
+MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const {
+  // EH funclet entry does not preserve any registers.
+  return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr;
+}
+
+const uint32_t *
+MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
+  // If we see a return block with successors, this must be a funclet return,
+  // which does not preserve any registers. If there are no successors, we don't
+  // care what kind of return it is, putting a mask after it is a no-op.
+  return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
+}
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 9151d99089d6..9119e31bdb3c 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -57,7 +57,7 @@ struct GraphTraits<MachineBlockFrequencyInfo *> {
 
   static inline
   const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return &G->getFunction()->front();
   }
 
   static ChildIteratorType child_begin(const NodeType *N) {
@@ -143,7 +143,7 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   if (!MBFI)
     MBFI.reset(new ImplType);
-  MBFI->doFunction(&F, &MBPI, &MLI);
+  MBFI->calculate(F, MBPI, MLI);
 #ifndef NDEBUG
   if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
     view();
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 2969bad4ff98..f5e305645011 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -51,7 +51,7 @@ using namespace llvm;
 #define DEBUG_TYPE "block-placement"
 
 STATISTIC(NumCondBranches, "Number of conditional branches");
-STATISTIC(NumUncondBranches, "Number of uncondittional branches");
+STATISTIC(NumUncondBranches, "Number of unconditional branches");
 STATISTIC(CondBranchTakenFreq,
           "Potential frequency of taking conditional branches");
 STATISTIC(UncondBranchTakenFreq,
@@ -62,6 +62,11 @@ static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
                                                 "blocks in the function."),
                                        cl::init(0), cl::Hidden);
 
+static cl::opt<unsigned>
+    AlignAllLoops("align-all-loops",
+                  cl::desc("Force the alignment of all loops in the function."),
+                  cl::init(0), cl::Hidden);
+
 // FIXME: Find a good default for this flag and remove the flag.
 static cl::opt<unsigned> ExitBlockBias(
     "block-placement-exit-block-bias",
@@ -81,6 +86,29 @@ static cl::opt<unsigned> OutlineOptionalThreshold(
              "instruction count below this threshold"),
     cl::init(4), cl::Hidden);
 
+static cl::opt<unsigned> LoopToColdBlockRatio(
+    "loop-to-cold-block-ratio",
+    cl::desc("Outline loop blocks from loop chain if (frequency of loop) / "
+             "(frequency of block) is greater than this ratio"),
+    cl::init(5), cl::Hidden);
+
+static cl::opt<bool>
+    PreciseRotationCost("precise-rotation-cost",
+                        cl::desc("Model the cost of loop rotation more "
+                                 "precisely by using profile data."),
+                        cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned> MisfetchCost(
+    "misfetch-cost",
+    cl::desc("Cost that models the probablistic risk of an instruction "
+             "misfetch due to a jump comparing to falling through, whose cost "
+             "is zero."),
+    cl::init(1), cl::Hidden);
+
+static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
+                                      cl::desc("Cost of jump instructions."),
+                                      cl::init(1), cl::Hidden);
+
 namespace {
 class BlockChain;
 /// \brief Type for our function-wide basic block -> block chain mapping.
@@ -246,9 +274,12 @@ class MachineBlockPlacement : public MachineFunctionPass {
                                      const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L,
                                       const BlockFilterSet &LoopBlockSet);
+  BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L);
   void buildLoopChains(MachineFunction &F, MachineLoop &L);
   void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB,
                   const BlockFilterSet &LoopBlockSet);
+  void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L,
+                             const BlockFilterSet &LoopBlockSet);
   void buildCFGChains(MachineFunction &F);
 
 public:
@@ -354,31 +385,56 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
   const BranchProbability HotProb(4, 5); // 80%
 
   MachineBasicBlock *BestSucc = nullptr;
-  // FIXME: Due to the performance of the probability and weight routines in
-  // the MBPI analysis, we manually compute probabilities using the edge
-  // weights. This is suboptimal as it means that the somewhat subtle
-  // definition of edge weight semantics is encoded here as well. We should
-  // improve the MBPI interface to efficiently support query patterns such as
-  // this.
-  uint32_t BestWeight = 0;
-  uint32_t WeightScale = 0;
-  uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
-  for (MachineBasicBlock *Succ : BB->successors()) {
-    if (BlockFilter && !BlockFilter->count(Succ))
-      continue;
-    BlockChain &SuccChain = *BlockToChain[Succ];
-    if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
-      continue;
-    }
-    if (Succ != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
-      continue;
-    }
+  auto BestProb = BranchProbability::getZero();
 
-    uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
-    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+  // Adjust edge probabilities by excluding edges pointing to blocks that is
+  // either not in BlockFilter or is already in the current chain. Consider the
+  // following CFG:
+  //
+  //     --->A
+  //     |  / \
+  //     | B   C
+  //     |  \ / \
+  //     ----D   E
+  //
+  // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+  // A->C is chosen as a fall-through, D won't be selected as a successor of C
+  // due to CFG constraint (the probability of C->D is not greater than
+  // HotProb). If we exclude E that is not in BlockFilter when calculating the
+  // probability of C->D, D will be selected and we will get A C D B as the
+  // layout of this loop.
+  auto AdjustedSumProb = BranchProbability::getOne();
+  SmallVector<MachineBasicBlock *, 4> Successors;
+  for (MachineBasicBlock *Succ : BB->successors()) {
+    bool SkipSucc = false;
+    if (BlockFilter && !BlockFilter->count(Succ)) {
+      SkipSucc = true;
+    } else {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (SuccChain == &Chain) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ)
+                     << " -> Already merged!\n");
+        SkipSucc = true;
+      } else if (Succ != *SuccChain->begin()) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
+        continue;
+      }
+    }
+    if (SkipSucc)
+      AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ);
+    else
+      Successors.push_back(Succ);
+  }
+
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock *Succ : Successors) {
+    BranchProbability SuccProb;
+    uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator();
+    uint32_t SuccProbD = AdjustedSumProb.getNumerator();
+    if (SuccProbN >= SuccProbD)
+      SuccProb = BranchProbability::getOne();
+    else
+      SuccProb = BranchProbability(SuccProbN, SuccProbD);
 
     // If we outline optional branches, look whether Succ is unavoidable, i.e.
     // dominates all terminators of the MachineFunction. If it does, other
@@ -406,6 +462,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
 
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
         DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
@@ -415,8 +472,9 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
 
       // Make sure that a hot successor doesn't have a globally more
       // important predecessor.
+      auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
       BlockFrequency CandidateEdgeFreq =
-          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+          MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
       for (MachineBasicBlock *Pred : Succ->predecessors()) {
         if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||
@@ -440,10 +498,10 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
                  << " (prob)"
                  << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "")
                  << "\n");
-    if (BestSucc && BestWeight >= SuccWeight)
+    if (BestSucc && BestProb >= SuccProb)
       continue;
     BestSucc = Succ;
-    BestWeight = SuccWeight;
+    BestProb = SuccProb;
   }
   return BestSucc;
 }
@@ -505,14 +563,14 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
     const BlockFilterSet *BlockFilter) {
   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E;
        ++I) {
-    if (BlockFilter && !BlockFilter->count(I))
+    if (BlockFilter && !BlockFilter->count(&*I))
       continue;
-    if (BlockToChain[I] != &PlacedChain) {
+    if (BlockToChain[&*I] != &PlacedChain) {
       PrevUnplacedBlockIt = I;
       // Now select the head of the chain to which the unplaced block belongs
       // as the block to place. This will force the entire chain to be placed,
       // and satisfies the requirements of merging chains.
-      return *BlockToChain[I]->begin();
+      return *BlockToChain[&*I]->begin();
     }
   }
   return nullptr;
@@ -672,13 +730,8 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
     MachineBasicBlock *OldExitingBB = ExitingBB;
     BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq;
     bool HasLoopingSucc = false;
-    // FIXME: Due to the performance of the probability and weight routines in
-    // the MBPI analysis, we use the internal weights and manually compute the
-    // probabilities to avoid quadratic behavior.
-    uint32_t WeightScale = 0;
-    uint32_t SumWeight = MBPI->getSumForBlock(MBB, WeightScale);
     for (MachineBasicBlock *Succ : MBB->successors()) {
-      if (Succ->isLandingPad())
+      if (Succ->isEHPad())
         continue;
       if (Succ == MBB)
         continue;
@@ -690,10 +743,10 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
         continue;
       }
 
-      uint32_t SuccWeight = MBPI->getEdgeWeight(MBB, Succ);
+      auto SuccProb = MBPI->getEdgeProbability(MBB, Succ);
       if (LoopBlockSet.count(Succ)) {
         DEBUG(dbgs() << "    looping: " << getBlockName(MBB) << " -> "
-                     << getBlockName(Succ) << " (" << SuccWeight << ")\n");
+                     << getBlockName(Succ) << " (" << SuccProb << ")\n");
         HasLoopingSucc = true;
         continue;
       }
@@ -705,7 +758,6 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L,
           BlocksExitingToOuterLoop.insert(MBB);
       }
 
-      BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
       BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb;
       DEBUG(dbgs() << "    exiting: " << getBlockName(MBB) << " -> "
                    << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] (";
@@ -791,6 +843,188 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
   std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end());
 }
 
+/// \brief Attempt to rotate a loop based on profile data to reduce branch cost.
+///
+/// With profile data, we can determine the cost in terms of missed fall through
+/// opportunities when rotating a loop chain and select the best rotation.
+/// Basically, there are three kinds of cost to consider for each rotation:
+///    1. The possibly missed fall through edge (if it exists) from BB out of
+///    the loop to the loop header.
+///    2. The possibly missed fall through edges (if they exist) from the loop
+///    exits to BB out of the loop.
+///    3. The missed fall through edge (if it exists) from the last BB to the
+///    first BB in the loop chain.
+///  Therefore, the cost for a given rotation is the sum of costs listed above.
+///  We select the best rotation with the smallest cost.
+void MachineBlockPlacement::rotateLoopWithProfile(
+    BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
+  auto HeaderBB = L.getHeader();
+  auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+  auto RotationPos = LoopChain.end();
+
+  BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
+
+  // A utility lambda that scales up a block frequency by dividing it by a
+  // branch probability which is the reciprocal of the scale.
+  auto ScaleBlockFrequency = [](BlockFrequency Freq,
+                                unsigned Scale) -> BlockFrequency {
+    if (Scale == 0)
+      return 0;
+    // Use operator / between BlockFrequency and BranchProbability to implement
+    // saturating multiplication.
+    return Freq / BranchProbability(1, Scale);
+  };
+
+  // Compute the cost of the missed fall-through edge to the loop header if the
+  // chain head is not the loop header. As we only consider natural loops with
+  // single header, this computation can be done only once.
+  BlockFrequency HeaderFallThroughCost(0);
+  for (auto *Pred : HeaderBB->predecessors()) {
+    BlockChain *PredChain = BlockToChain[Pred];
+    if (!LoopBlockSet.count(Pred) &&
+        (!PredChain || Pred == *std::prev(PredChain->end()))) {
+      auto EdgeFreq =
+          MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+      auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
+      // If the predecessor has only an unconditional jump to the header, we
+      // need to consider the cost of this jump.
+      if (Pred->succ_size() == 1)
+        FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost);
+      HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost);
+    }
+  }
+
+  // Here we collect all exit blocks in the loop, and for each exit we find out
+  // its hottest exit edge. For each loop rotation, we define the loop exit cost
+  // as the sum of frequencies of exit edges we collect here, excluding the exit
+  // edge from the tail of the loop chain.
+  SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq;
+  for (auto BB : LoopChain) {
+    auto LargestExitEdgeProb = BranchProbability::getZero();
+    for (auto *Succ : BB->successors()) {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (!LoopBlockSet.count(Succ) &&
+          (!SuccChain || Succ == *SuccChain->begin())) {
+        auto SuccProb = MBPI->getEdgeProbability(BB, Succ);
+        LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb);
+      }
+    }
+    if (LargestExitEdgeProb > BranchProbability::getZero()) {
+      auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb;
+      ExitsWithFreq.emplace_back(BB, ExitFreq);
+    }
+  }
+
+  // In this loop we iterate every block in the loop chain and calculate the
+  // cost assuming the block is the head of the loop chain. When the loop ends,
+  // we should have found the best candidate as the loop chain's head.
+  for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()),
+            EndIter = LoopChain.end();
+       Iter != EndIter; Iter++, TailIter++) {
+    // TailIter is used to track the tail of the loop chain if the block we are
+    // checking (pointed by Iter) is the head of the chain.
+    if (TailIter == LoopChain.end())
+      TailIter = LoopChain.begin();
+
+    auto TailBB = *TailIter;
+
+    // Calculate the cost by putting this BB to the top.
+    BlockFrequency Cost = 0;
+
+    // If the current BB is the loop header, we need to take into account the
+    // cost of the missed fall through edge from outside of the loop to the
+    // header.
+    if (Iter != HeaderIter)
+      Cost += HeaderFallThroughCost;
+
+    // Collect the loop exit cost by summing up frequencies of all exit edges
+    // except the one from the chain tail.
+    for (auto &ExitWithFreq : ExitsWithFreq)
+      if (TailBB != ExitWithFreq.first)
+        Cost += ExitWithFreq.second;
+
+    // The cost of breaking the once fall-through edge from the tail to the top
+    // of the loop chain. Here we need to consider three cases:
+    // 1. If the tail node has only one successor, then we will get an
+    //    additional jmp instruction. So the cost here is (MisfetchCost +
+    //    JumpInstCost) * tail node frequency.
+    // 2. If the tail node has two successors, then we may still get an
+    //    additional jmp instruction if the layout successor after the loop
+    //    chain is not its CFG successor. Note that the more frequently executed
+    //    jmp instruction will be put ahead of the other one. Assume the
+    //    frequency of those two branches are x and y, where x is the frequency
+    //    of the edge to the chain head, then the cost will be
+    //    (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency.
+    // 3. If the tail node has more than two successors (this rarely happens),
+    //    we won't consider any additional cost.
+    if (TailBB->isSuccessor(*Iter)) {
+      auto TailBBFreq = MBFI->getBlockFreq(TailBB);
+      if (TailBB->succ_size() == 1)
+        Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(),
+                                    MisfetchCost + JumpInstCost);
+      else if (TailBB->succ_size() == 2) {
+        auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter);
+        auto TailToHeadFreq = TailBBFreq * TailToHeadProb;
+        auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2)
+                                  ? TailBBFreq * TailToHeadProb.getCompl()
+                                  : TailToHeadFreq;
+        Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) +
+                ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost);
+      }
+    }
+
+    DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter)
+                 << " to the top: " << Cost.getFrequency() << "\n");
+
+    if (Cost < SmallestRotationCost) {
+      SmallestRotationCost = Cost;
+      RotationPos = Iter;
+    }
+  }
+
+  if (RotationPos != LoopChain.end()) {
+    DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos)
+                 << " to the top\n");
+    std::rotate(LoopChain.begin(), RotationPos, LoopChain.end());
+  }
+}
+
+/// \brief Collect blocks in the given loop that are to be placed.
+///
+/// When profile data is available, exclude cold blocks from the returned set;
+/// otherwise, collect all blocks in the loop.
+MachineBlockPlacement::BlockFilterSet
+MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) {
+  BlockFilterSet LoopBlockSet;
+
+  // Filter cold blocks off from LoopBlockSet when profile data is available.
+  // Collect the sum of frequencies of incoming edges to the loop header from
+  // outside. If we treat the loop as a super block, this is the frequency of
+  // the loop. Then for each block in the loop, we calculate the ratio between
+  // its frequency and the frequency of the loop block. When it is too small,
+  // don't add it to the loop chain. If there are outer loops, then this block
+  // will be merged into the first outer loop chain for which this block is not
+  // cold anymore. This needs precise profile data and we only do this when
+  // profile data is available.
+  if (F.getFunction()->getEntryCount()) {
+    BlockFrequency LoopFreq(0);
+    for (auto LoopPred : L.getHeader()->predecessors())
+      if (!L.contains(LoopPred))
+        LoopFreq += MBFI->getBlockFreq(LoopPred) *
+                    MBPI->getEdgeProbability(LoopPred, L.getHeader());
+
+    for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+      auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency();
+      if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio)
+        continue;
+      LoopBlockSet.insert(LoopBB);
+    }
+  } else
+    LoopBlockSet.insert(L.block_begin(), L.block_end());
+
+  return LoopBlockSet;
+}
+
 /// \brief Forms basic block chains from the natural loop structures.
 ///
 /// These chains are designed to preserve the existing *structure* of the code
@@ -805,19 +1039,27 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
     buildLoopChains(F, *InnerLoop);
 
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
-  BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end());
+  BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L);
+
+  // Check if we have profile data for this function. If yes, we will rotate
+  // this loop by modeling costs more precisely which requires the profile data
+  // for better layout.
+  bool RotateLoopWithProfile =
+      PreciseRotationCost && F.getFunction()->getEntryCount();
 
   // First check to see if there is an obviously preferable top block for the
   // loop. This will default to the header, but may end up as one of the
   // predecessors to the header if there is one which will result in strictly
   // fewer branches in the loop body.
-  MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
+  // When we use profile data to rotate the loop, this is unnecessary.
+  MachineBasicBlock *LoopTop =
+      RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
 
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
   // branches by placing an exit edge at the bottom.
   MachineBasicBlock *ExitingBB = nullptr;
-  if (LoopTop == L.getHeader())
+  if (!RotateLoopWithProfile && LoopTop == L.getHeader())
     ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
 
   BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -828,7 +1070,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
   SmallPtrSet<BlockChain *, 4> UpdatedPreds;
   assert(LoopChain.LoopPredecessors == 0);
   UpdatedPreds.insert(&LoopChain);
-  for (MachineBasicBlock *LoopBB : L.getBlocks()) {
+
+  for (MachineBasicBlock *LoopBB : LoopBlockSet) {
     BlockChain &Chain = *BlockToChain[LoopBB];
     if (!UpdatedPreds.insert(&Chain).second)
       continue;
@@ -848,7 +1091,11 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
   }
 
   buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet);
-  rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+
+  if (RotateLoopWithProfile)
+    rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
+  else
+    rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
 
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -889,7 +1136,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // the assumptions of the remaining algorithm.
   SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch.
   for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    MachineBasicBlock *BB = FI;
+    MachineBasicBlock *BB = &*FI;
     BlockChain *Chain =
         new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB);
     // Also, merge any blocks which we cannot reason about and must preserve
@@ -900,8 +1147,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
       if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
         break;
 
-      MachineFunction::iterator NextFI(std::next(FI));
-      MachineBasicBlock *NextBB = NextFI;
+      MachineFunction::iterator NextFI = std::next(FI);
+      MachineBasicBlock *NextBB = &*NextFI;
       // Ensure that the layout successor is a viable block, as we know that
       // fallthrough is a possibility.
       assert(NextFI != FE && "Can't fallthrough past the last block.");
@@ -1004,7 +1251,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     // Update the terminator of the previous block.
     if (ChainBB == *FunctionChain.begin())
       continue;
-    MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(ChainBB));
+    MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB));
 
     // FIXME: It would be awesome of updateTerminator would just return rather
     // than assert when the branch cannot be analyzed in order to remove this
@@ -1035,14 +1282,16 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
       }
 
       // If PrevBB has a two-way branch, try to re-order the branches
-      // such that we branch to the successor with higher weight first.
+      // such that we branch to the successor with higher probability first.
       if (TBB && !Cond.empty() && FBB &&
-          MBPI->getEdgeWeight(PrevBB, FBB) > MBPI->getEdgeWeight(PrevBB, TBB) &&
+          MBPI->getEdgeProbability(PrevBB, FBB) >
+              MBPI->getEdgeProbability(PrevBB, TBB) &&
           !TII->ReverseBranchCondition(Cond)) {
         DEBUG(dbgs() << "Reverse order of the two branches: "
                      << getBlockName(PrevBB) << "\n");
-        DEBUG(dbgs() << "    Edge weight: " << MBPI->getEdgeWeight(PrevBB, FBB)
-                     << " vs " << MBPI->getEdgeWeight(PrevBB, TBB) << "\n");
+        DEBUG(dbgs() << "    Edge probability: "
+                     << MBPI->getEdgeProbability(PrevBB, FBB) << " vs "
+                     << MBPI->getEdgeProbability(PrevBB, TBB) << "\n");
         DebugLoc dl; // FIXME: this is nowhere
         TII->RemoveBranch(*PrevBB);
         TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl);
@@ -1064,13 +1313,14 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
   // exclusively on the loop info here so that we can align backedges in
   // unnatural CFGs and backedges that were introduced purely because of the
   // loop rotations done during this layout pass.
+  // FIXME: Use Function::optForSize().
   if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     return;
   if (FunctionChain.begin() == FunctionChain.end())
     return; // Empty chain.
 
   const BranchProbability ColdProb(1, 5); // 20%
-  BlockFrequency EntryFreq = MBFI->getBlockFreq(F.begin());
+  BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front());
   BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb;
   for (MachineBasicBlock *ChainBB : FunctionChain) {
     if (ChainBB == *FunctionChain.begin())
@@ -1084,6 +1334,11 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
     if (!L)
       continue;
 
+    if (AlignAllLoops) {
+      ChainBB->setAlignment(AlignAllLoops);
+      continue;
+    }
+
     unsigned Align = TLI->getPrefLoopAlignment(L);
     if (!Align)
       continue; // Don't care about loop alignment.
@@ -1224,4 +1479,3 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
 
   return false;
 }
-
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 6fbc2be70486..cf6d4018cb70 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -28,91 +28,48 @@ char MachineBranchProbabilityInfo::ID = 0;
 
 void MachineBranchProbabilityInfo::anchor() { }
 
-uint32_t MachineBranchProbabilityInfo::
-getSumForBlock(const MachineBasicBlock *MBB, uint32_t &Scale) const {
-  // First we compute the sum with 64-bits of precision, ensuring that cannot
-  // overflow by bounding the number of weights considered. Hopefully no one
-  // actually needs 2^32 successors.
-  assert(MBB->succ_size() < UINT32_MAX);
-  uint64_t Sum = 0;
-  Scale = 1;
-  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, I);
-    Sum += Weight;
-  }
-
-  // If the computed sum fits in 32-bits, we're done.
-  if (Sum <= UINT32_MAX)
-    return Sum;
-
-  // Otherwise, compute the scale necessary to cause the weights to fit, and
-  // re-sum with that scale applied.
-  assert((Sum / UINT32_MAX) < UINT32_MAX);
-  Scale = (Sum / UINT32_MAX) + 1;
-  Sum = 0;
-  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
-       E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, I);
-    Sum += Weight / Scale;
-  }
-  assert(Sum <= UINT32_MAX);
-  return Sum;
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+    const MachineBasicBlock *Src,
+    MachineBasicBlock::const_succ_iterator Dst) const {
+  return Src->getSuccProbability(Dst);
 }
 
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
-              MachineBasicBlock::const_succ_iterator Dst) const {
-  uint32_t Weight = Src->getSuccWeight(Dst);
-  if (!Weight)
-    return DEFAULT_WEIGHT;
-  return Weight;
-}
-
-uint32_t MachineBranchProbabilityInfo::
-getEdgeWeight(const MachineBasicBlock *Src,
-              const MachineBasicBlock *Dst) const {
+BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
+    const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
   // This is a linear search. Try to use the const_succ_iterator version when
   // possible.
-  return getEdgeWeight(Src, std::find(Src->succ_begin(), Src->succ_end(), Dst));
+  return getEdgeProbability(Src,
+                            std::find(Src->succ_begin(), Src->succ_end(), Dst));
 }
 
 bool
 MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
                                         const MachineBasicBlock *Dst) const {
   // Hot probability is at least 4/5 = 80%
-  // FIXME: Compare against a static "hot" BranchProbability.
-  return getEdgeProbability(Src, Dst) > BranchProbability(4, 5);
+  static BranchProbability HotProb(4, 5);
+  return getEdgeProbability(Src, Dst) > HotProb;
 }
 
 MachineBasicBlock *
 MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
-  uint32_t MaxWeight = 0;
+  auto MaxProb = BranchProbability::getZero();
   MachineBasicBlock *MaxSucc = nullptr;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    uint32_t Weight = getEdgeWeight(MBB, I);
-    if (Weight > MaxWeight) {
-      MaxWeight = Weight;
+    auto Prob = getEdgeProbability(MBB, I);
+    if (Prob > MaxProb) {
+      MaxProb = Prob;
       MaxSucc = *I;
     }
   }
 
-  if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
+  static BranchProbability HotProb(4, 5);
+  if (getEdgeProbability(MBB, MaxSucc) >= HotProb)
     return MaxSucc;
 
   return nullptr;
 }
 
-BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
-    const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
-  uint32_t Scale = 1;
-  uint32_t D = getSumForBlock(Src, Scale);
-  uint32_t N = getEdgeWeight(Src, Dst) / Scale;
-
-  return BranchProbability(N, D);
-}
-
 raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability(
     raw_ostream &OS, const MachineBasicBlock *Src,
     const MachineBasicBlock *Dst) const {
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 87aaaa0834cf..021707b7c3c7 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -57,7 +57,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addPreservedID(MachineLoopInfoID);
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
@@ -111,7 +111,7 @@ char &llvm::MachineCSEID = MachineCSE::ID;
 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineCSE, "machine-cse",
                 "Machine Common Subexpression Elimination", false, false)
 
@@ -714,7 +714,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<MachineDominatorTree>();
   LookAheadLimit = TII->getMachineCSELookAheadLimit();
   return PerformCSE(DT->getRootNode());
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index f33d0e6a28e5..fa43c4dfa05a 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -10,6 +10,7 @@
 // The machine combiner pass uses machine trace metrics to ensure the combined
 // instructions does not lengthen the critical path or the resource depth.
 //===----------------------------------------------------------------------===//
+
 #define DEBUG_TYPE "machine-combiner"
 
 #include "llvm/ADT/Statistic.h"
@@ -68,10 +69,10 @@ private:
                       MachineTraceMetrics::Trace BlockTrace);
   bool
   improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
-                           MachineTraceMetrics::Trace BlockTrace,
-                           SmallVectorImpl<MachineInstr *> &InsInstrs,
-                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-                           bool NewCodeHasLessInsts);
+                          MachineTraceMetrics::Trace BlockTrace,
+                          SmallVectorImpl<MachineInstr *> &InsInstrs,
+                          DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
+                          MachineCombinerPattern Pattern);
   bool preservesResourceLen(MachineBasicBlock *MBB,
                             MachineTraceMetrics::Trace BlockTrace,
                             SmallVectorImpl<MachineInstr *> &InsInstrs,
@@ -122,9 +123,9 @@ unsigned
 MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
                           MachineTraceMetrics::Trace BlockTrace) {
-
   SmallVector<unsigned, 16> InstrDepth;
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
 
   // For each instruction in the new sequence compute the depth based on the
   // operands. Use the trace information when possible. For new operands which
@@ -180,8 +181,8 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
 /// \returns Latency of \p NewRoot
 unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
                                      MachineTraceMetrics::Trace BlockTrace) {
-
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
 
   // Check each definition in NewRoot and compute the latency
   unsigned NewRootLatency = 0;
@@ -202,62 +203,86 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
           NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO,
           UseMO->findRegisterUseOperandIdx(MO.getReg()));
     } else {
-      LatencyOp = TSchedModel.computeInstrLatency(NewRoot->getOpcode());
+      LatencyOp = TSchedModel.computeInstrLatency(NewRoot);
     }
     NewRootLatency = std::max(NewRootLatency, LatencyOp);
   }
   return NewRootLatency;
 }
 
-/// True when the new instruction sequence does not lengthen the critical path
-/// and the new sequence has less instructions or the new sequence improves the
-/// critical path.
+/// The combiner's goal may differ based on which pattern it is attempting
+/// to optimize.
+enum class CombinerObjective {
+  MustReduceDepth, // The data dependency chain must be improved.
+  Default          // The critical path must not be lengthened.
+};
+
+static CombinerObjective getCombinerObjective(MachineCombinerPattern P) {
+  // TODO: If C++ ever gets a real enum class, make this part of the
+  // MachineCombinerPattern class.
+  switch (P) {
+  case MachineCombinerPattern::REASSOC_AX_BY:
+  case MachineCombinerPattern::REASSOC_AX_YB:
+  case MachineCombinerPattern::REASSOC_XA_BY:
+  case MachineCombinerPattern::REASSOC_XA_YB:
+    return CombinerObjective::MustReduceDepth;
+  default:
+    return CombinerObjective::Default;
+  }
+}
+
 /// The DAGCombine code sequence ends in MI (Machine Instruction) Root.
 /// The new code sequence ends in MI NewRoot. A necessary condition for the new
 /// sequence to replace the old sequence is that it cannot lengthen the critical
-/// path. This is decided by the formula:
-/// (NewRootDepth + NewRootLatency) <= (RootDepth + RootLatency + RootSlack)).
-/// If the new sequence has an equal length critical path but does not reduce
-/// the number of instructions (NewCodeHasLessInsts is false), then it is not
-/// considered an improvement. The slack is the number of cycles Root can be
-/// delayed before the critical patch becomes longer.
+/// path. The definition of "improve" may be restricted by specifying that the
+/// new path improves the data dependency chain (MustReduceDepth).
 bool MachineCombiner::improvesCriticalPathLen(
     MachineBasicBlock *MBB, MachineInstr *Root,
     MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
-    bool NewCodeHasLessInsts) {
-
-  assert(TSchedModel.hasInstrSchedModel() && "Missing machine model\n");
+    MachineCombinerPattern Pattern) {
+  assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
+         "Missing machine model\n");
   // NewRoot is the last instruction in the \p InsInstrs vector.
-  // Get depth and latency of NewRoot.
   unsigned NewRootIdx = InsInstrs.size() - 1;
   MachineInstr *NewRoot = InsInstrs[NewRootIdx];
-  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
-  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
 
-  // Get depth, latency and slack of Root.
+  // Get depth and latency of NewRoot and Root.
+  unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
   unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth;
+
+  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
+        dbgs() << " NewRootDepth: " << NewRootDepth << "\n";
+        dbgs() << " RootDepth: " << RootDepth << "\n");
+
+  // For a transform such as reassociation, the cost equation is
+  // conservatively calculated so that we must improve the depth (data
+  // dependency cycles) in the critical path to proceed with the transform.
+  // Being conservative also protects against inaccuracies in the underlying
+  // machine trace metrics and CPU models.
+  if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth)
+    return NewRootDepth < RootDepth;
+
+  // A more flexible cost calculation for the critical path includes the slack
+  // of the original code sequence. This may allow the transform to proceed
+  // even if the instruction depths (data dependency cycles) become worse.
+  unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
   unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
   unsigned RootSlack = BlockTrace.getInstrSlack(Root);
 
-  DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n";
-        dbgs() << " NewRootDepth: " << NewRootDepth
-               << " NewRootLatency: " << NewRootLatency << "\n";
-        dbgs() << " RootDepth: " << RootDepth << " RootLatency: " << RootLatency
-               << " RootSlack: " << RootSlack << "\n";
-        dbgs() << " NewRootDepth + NewRootLatency "
+  DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
+        dbgs() << " RootLatency: " << RootLatency << "\n";
+        dbgs() << " RootSlack: " << RootSlack << "\n";
+        dbgs() << " NewRootDepth + NewRootLatency = "
                << NewRootDepth + NewRootLatency << "\n";
-        dbgs() << " RootDepth + RootLatency + RootSlack "
+        dbgs() << " RootDepth + RootLatency + RootSlack = "
                << RootDepth + RootLatency + RootSlack << "\n";);
 
   unsigned NewCycleCount = NewRootDepth + NewRootLatency;
   unsigned OldCycleCount = RootDepth + RootLatency + RootSlack;
   
-  if (NewCodeHasLessInsts)
-    return NewCycleCount <= OldCycleCount;
-  else
-    return NewCycleCount < OldCycleCount;
+  return NewCycleCount <= OldCycleCount;
 }
 
 /// helper routine to convert instructions into SC
@@ -271,11 +296,14 @@ void MachineCombiner::instr2instrSC(
     InstrsSC.push_back(SC);
   }
 }
+
 /// True when the new instructions do not increase resource length
 bool MachineCombiner::preservesResourceLen(
     MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs) {
+  if (!TSchedModel.hasInstrSchedModel())
+    return true;
 
   // Compute current resource length
 
@@ -310,7 +338,7 @@ bool MachineCombiner::preservesResourceLen(
 bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) {
   if (OptSize && (NewSize < OldSize))
     return true;
-  if (!TSchedModel.hasInstrSchedModel())
+  if (!TSchedModel.hasInstrSchedModelOrItineraries())
     return true;
   return false;
 }
@@ -332,7 +360,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     auto &MI = *BlockIter++;
 
     DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";);
-    SmallVector<MachineCombinerPattern::MC_PATTERN, 16> Patterns;
+    SmallVector<MachineCombinerPattern, 16> Patterns;
     // The motivating example is:
     //
     //     MUL  Other        MUL_op1 MUL_op2  Other
@@ -358,54 +386,55 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
     // mostly one pattern, and getMachineCombinerPatterns() can order patterns
     // based on an internal cost heuristic.
 
-    if (TII->getMachineCombinerPatterns(MI, Patterns)) {
-      for (auto P : Patterns) {
-        SmallVector<MachineInstr *, 16> InsInstrs;
-        SmallVector<MachineInstr *, 16> DelInstrs;
-        DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
-        if (!MinInstr)
-          MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
-        MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+    if (!TII->getMachineCombinerPatterns(MI, Patterns))
+      continue;
+
+    for (auto P : Patterns) {
+      SmallVector<MachineInstr *, 16> InsInstrs;
+      SmallVector<MachineInstr *, 16> DelInstrs;
+      DenseMap<unsigned, unsigned> InstrIdxForVirtReg;
+      if (!MinInstr)
+        MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+      MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+      Traces->verifyAnalysis();
+      TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
+                                      InstrIdxForVirtReg);
+      unsigned NewInstCount = InsInstrs.size();
+      unsigned OldInstCount = DelInstrs.size();
+      // Found pattern, but did not generate alternative sequence.
+      // This can happen e.g. when an immediate could not be materialized
+      // in a single instruction.
+      if (!NewInstCount)
+        continue;
+
+      // Substitute when we optimize for codesize and the new sequence has
+      // fewer instructions OR
+      // the new sequence neither lengthens the critical path nor increases
+      // resource pressure.
+      if (doSubstitute(NewInstCount, OldInstCount) ||
+          (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
+                                   InstrIdxForVirtReg, P) &&
+           preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
+        for (auto *InstrPtr : InsInstrs)
+          MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
+        for (auto *InstrPtr : DelInstrs)
+          InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
+
+        Changed = true;
+        ++NumInstCombined;
+
+        Traces->invalidate(MBB);
         Traces->verifyAnalysis();
-        TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs,
-                                        InstrIdxForVirtReg);
-        unsigned NewInstCount = InsInstrs.size();
-        unsigned OldInstCount = DelInstrs.size();
-        // Found pattern, but did not generate alternative sequence.
-        // This can happen e.g. when an immediate could not be materialized
-        // in a single instruction.
-        if (!NewInstCount)
-          continue;
-        // Substitute when we optimize for codesize and the new sequence has
-        // fewer instructions OR
-        // the new sequence neither lengthens the critical path nor increases
-        // resource pressure.
-        if (doSubstitute(NewInstCount, OldInstCount) ||
-            (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                      InstrIdxForVirtReg,
-                                      NewInstCount < OldInstCount) &&
-             preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
-          for (auto *InstrPtr : InsInstrs)
-            MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
-          for (auto *InstrPtr : DelInstrs)
-            InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval();
-
-          Changed = true;
-          ++NumInstCombined;
-
-          Traces->invalidate(MBB);
-          Traces->verifyAnalysis();
-          // Eagerly stop after the first pattern fires.
-          break;
-        } else {
-          // Cleanup instructions of the alternative code sequence. There is no
-          // use for them.
-          MachineFunction *MF = MBB->getParent();
-          for (auto *InstrPtr : InsInstrs)
-            MF->DeleteMachineInstr(InstrPtr);
-        }
-        InstrIdxForVirtReg.clear();
+        // Eagerly stop after the first pattern fires.
+        break;
+      } else {
+        // Cleanup instructions of the alternative code sequence. There is no
+        // use for them.
+        MachineFunction *MF = MBB->getParent();
+        for (auto *InstrPtr : InsInstrs)
+          MF->DeleteMachineInstr(InstrPtr);
       }
+      InstrIdxForVirtReg.clear();
     }
   }
 
@@ -420,9 +449,8 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   TSchedModel.init(SchedModel, &STI, TII);
   MRI = &MF.getRegInfo();
   Traces = &getAnalysis<MachineTraceMetrics>();
-  MinInstr = 0;
-
-  OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  MinInstr = nullptr;
+  OptSize = MF.getFunction()->optForSize();
 
   DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
   if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 9856e70edaef..ca4bb1c6ad49 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionInitializer.h"
@@ -26,6 +27,8 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
@@ -44,6 +47,11 @@ using namespace llvm;
 
 #define DEBUG_TYPE "codegen"
 
+static cl::opt<unsigned>
+    AlignAllFunctions("align-all-functions",
+                      cl::desc("Force the alignment of all functions."),
+                      cl::init(0), cl::Hidden);
+
 void MachineFunctionInitializer::anchor() {}
 
 //===----------------------------------------------------------------------===//
@@ -79,12 +87,27 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
 
   // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn.
+  // FIXME: Use Function::optForSize().
   if (!Fn->hasFnAttribute(Attribute::OptimizeForSize))
     Alignment = std::max(Alignment,
                          STI->getTargetLowering()->getPrefFunctionAlignment());
 
+  if (AlignAllFunctions)
+    Alignment = AlignAllFunctions;
+
   FunctionNumber = FunctionNum;
   JumpTableInfo = nullptr;
+
+  if (isFuncletEHPersonality(classifyEHPersonality(
+          F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+    WinEHInfo = new (Allocator) WinEHFuncInfo();
+  }
+
+  assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+         "Can't create a MachineFunction using a Module with a "
+         "Target-incompatible DataLayout attached\n");
+
+  PSVManager = llvm::make_unique<PseudoSourceValueManager>();
 }
 
 MachineFunction::~MachineFunction() {
@@ -117,6 +140,11 @@ MachineFunction::~MachineFunction() {
     JumpTableInfo->~MachineJumpTableInfo();
     Allocator.Deallocate(JumpTableInfo);
   }
+
+  if (WinEHInfo) {
+    WinEHInfo->~WinEHFuncInfo();
+    Allocator.Deallocate(WinEHInfo);
+  }
 }
 
 const DataLayout &MachineFunction::getDataLayout() const {
@@ -149,7 +177,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
   if (MBB == nullptr)
     MBBI = begin();
   else
-    MBBI = MBB;
+    MBBI = MBB->getIterator();
 
   // Figure out the block number this should have.
   unsigned BlockNo = 0;
@@ -169,7 +197,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
       if (MBBNumbering[BlockNo])
         MBBNumbering[BlockNo]->setNumber(-1);
 
-      MBBNumbering[BlockNo] = MBBI;
+      MBBNumbering[BlockNo] = &*MBBI;
       MBBI->setNumber(BlockNo);
     }
   }
@@ -322,6 +350,13 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
   return std::make_pair(Result, Result + Num);
 }
 
+const char *MachineFunction::createExternalSymbolName(StringRef Name) {
+  char *Dest = Allocator.Allocate<char>(Name.size() + 1);
+  std::copy(Name.begin(), Name.end(), Dest);
+  Dest[Name.size()] = 0;
+  return Dest;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void MachineFunction::dump() const {
   print(dbgs());
@@ -593,10 +628,9 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
     BV.set(*CSR);
 
   // Saved CSRs are not pristine.
-  const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
-  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
-         E = CSI.end(); I != E; ++I)
-    BV.reset(I->getReg());
+  for (auto &I : getCalleeSavedInfo())
+    for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
+      BV.reset(*S);
 
   return BV;
 }
@@ -801,42 +835,26 @@ Type *MachineConstantPoolEntry::getType() const {
   return Val.ConstVal->getType();
 }
 
-
-unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+bool MachineConstantPoolEntry::needsRelocation() const {
   if (isMachineConstantPoolEntry())
-    return Val.MachineCPVal->getRelocationInfo();
-  return Val.ConstVal->getRelocationInfo();
+    return true;
+  return Val.ConstVal->needsRelocation();
 }
 
 SectionKind
 MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
-  SectionKind Kind;
-  switch (getRelocationInfo()) {
+  if (needsRelocation())
+    return SectionKind::getReadOnlyWithRel();
+  switch (DL->getTypeAllocSize(getType())) {
+  case 4:
+    return SectionKind::getMergeableConst4();
+  case 8:
+    return SectionKind::getMergeableConst8();
+  case 16:
+    return SectionKind::getMergeableConst16();
   default:
-    llvm_unreachable("Unknown section kind");
-  case Constant::GlobalRelocations:
-    Kind = SectionKind::getReadOnlyWithRel();
-    break;
-  case Constant::LocalRelocation:
-    Kind = SectionKind::getReadOnlyWithRelLocal();
-    break;
-  case Constant::NoRelocation:
-    switch (DL->getTypeAllocSize(getType())) {
-    case 4:
-      Kind = SectionKind::getMergeableConst4();
-      break;
-    case 8:
-      Kind = SectionKind::getMergeableConst8();
-      break;
-    case 16:
-      Kind = SectionKind::getMergeableConst16();
-      break;
-    default:
-      Kind = SectionKind::getReadOnly();
-      break;
-    }
+    return SectionKind::getReadOnly();
   }
-  return Kind;
 }
 
 MachineConstantPool::~MachineConstantPool() {
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index aaf06a70da74..05463fc6a1ef 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -13,11 +13,14 @@
 
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
@@ -49,13 +52,16 @@ void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
   // passes explicitly. This does not include setPreservesCFG,
   // because CodeGen overloads that to mean preserving the MachineBasicBlock
   // CFG in addition to the LLVM IR CFG.
-  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved<BasicAAWrapperPass>();
   AU.addPreserved<DominanceFrontier>();
   AU.addPreserved<DominatorTreeWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
   AU.addPreserved<IVUsers>();
   AU.addPreserved<LoopInfoWrapperPass>();
   AU.addPreserved<MemoryDependenceAnalysis>();
-  AU.addPreserved<ScalarEvolution>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<SCEVAAWrapperPass>();
   AU.addPreserved<StackProtector>();
 
   FunctionPass::getAnalysisUsage(AU);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index fdc4226ad926..1eb2edcd7cec 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -43,6 +44,11 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
+static cl::opt<bool> PrintWholeRegMask(
+    "print-whole-regmask",
+    cl::desc("Print the full contents of regmask operands in IR dumps"),
+    cl::init(true), cl::Hidden);
+
 //===----------------------------------------------------------------------===//
 // MachineOperand Implementation
 //===----------------------------------------------------------------------===//
@@ -407,9 +413,26 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
     if (getOffset()) OS << "+" << getOffset();
     OS << '>';
     break;
-  case MachineOperand::MO_RegisterMask:
-    OS << "<regmask>";
+  case MachineOperand::MO_RegisterMask: {
+    unsigned NumRegsInMask = 0;
+    unsigned NumRegsEmitted = 0;
+    OS << "<regmask";
+    for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+      unsigned MaskWord = i / 32;
+      unsigned MaskBit = i % 32;
+      if (getRegMask()[MaskWord] & (1 << MaskBit)) {
+        if (PrintWholeRegMask || NumRegsEmitted <= 10) {
+          OS << " " << PrintReg(i, TRI);
+          NumRegsEmitted++;
+        }
+        NumRegsInMask++;
+      }
+    }
+    if (NumRegsEmitted != NumRegsInMask)
+      OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more...";
+    OS << ">";
     break;
+  }
   case MachineOperand::MO_RegisterLiveOut:
     OS << "<regliveout>";
     break;
@@ -443,26 +466,28 @@ unsigned MachinePointerInfo::getAddrSpace() const {
 
 /// getConstantPool - Return a MachinePointerInfo record that refers to the
 /// constant pool.
-MachinePointerInfo MachinePointerInfo::getConstantPool() {
-  return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getConstantPool());
 }
 
 /// getFixedStack - Return a MachinePointerInfo record that refers to the
 /// the specified FrameIndex.
-MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
-  return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF,
+                                                     int FI, int64_t Offset) {
+  return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset);
 }
 
-MachinePointerInfo MachinePointerInfo::getJumpTable() {
-  return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getJumpTable());
 }
 
-MachinePointerInfo MachinePointerInfo::getGOT() {
-  return MachinePointerInfo(PseudoSourceValue::getGOT());
+MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) {
+  return MachinePointerInfo(MF.getPSVManager().getGOT());
 }
 
-MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
-  return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
+                                                int64_t Offset) {
+  return MachinePointerInfo(MF.getPSVManager().getStack(), Offset);
 }
 
 MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
@@ -606,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
 
 void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
   if (MCID->ImplicitDefs)
-    for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
+    for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs;
+           ++ImpDefs)
       addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true));
   if (MCID->ImplicitUses)
-    for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses)
+    for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses;
+           ++ImpUses)
       addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
@@ -841,7 +868,7 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
 
 bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const {
   assert(!isBundledWithPred() && "Must be called on bundle header");
-  for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) {
+  for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) {
     if (MII->getDesc().getFlags() & Mask) {
       if (Type == AnyInBundle)
         return true;
@@ -865,13 +892,13 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
 
   if (isBundle()) {
     // Both instructions are bundles, compare MIs inside the bundle.
-    MachineBasicBlock::const_instr_iterator I1 = *this;
+    MachineBasicBlock::const_instr_iterator I1 = getIterator();
     MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
-    MachineBasicBlock::const_instr_iterator I2 = *Other;
+    MachineBasicBlock::const_instr_iterator I2 = Other->getIterator();
     MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end();
     while (++I1 != E1 && I1->isInsideBundle()) {
       ++I2;
-      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(I2, Check))
+      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check))
         return false;
     }
   }
@@ -976,7 +1003,7 @@ unsigned MachineInstr::getNumExplicitOperands() const {
 void MachineInstr::bundleWithPred() {
   assert(!isBundledWithPred() && "MI is already bundled with its predecessor");
   setFlag(BundledPred);
-  MachineBasicBlock::instr_iterator Pred = this;
+  MachineBasicBlock::instr_iterator Pred = getIterator();
   --Pred;
   assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags");
   Pred->setFlag(BundledSucc);
@@ -985,7 +1012,7 @@ void MachineInstr::bundleWithPred() {
 void MachineInstr::bundleWithSucc() {
   assert(!isBundledWithSucc() && "MI is already bundled with its successor");
   setFlag(BundledSucc);
-  MachineBasicBlock::instr_iterator Succ = this;
+  MachineBasicBlock::instr_iterator Succ = getIterator();
   ++Succ;
   assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags");
   Succ->setFlag(BundledPred);
@@ -994,7 +1021,7 @@ void MachineInstr::bundleWithSucc() {
 void MachineInstr::unbundleFromPred() {
   assert(isBundledWithPred() && "MI isn't bundled with its predecessor");
   clearFlag(BundledPred);
-  MachineBasicBlock::instr_iterator Pred = this;
+  MachineBasicBlock::instr_iterator Pred = getIterator();
   --Pred;
   assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags");
   Pred->clearFlag(BundledSucc);
@@ -1003,7 +1030,7 @@ void MachineInstr::unbundleFromPred() {
 void MachineInstr::unbundleFromSucc() {
   assert(isBundledWithSucc() && "MI isn't bundled with its successor");
   clearFlag(BundledSucc);
-  MachineBasicBlock::instr_iterator Succ = this;
+  MachineBasicBlock::instr_iterator Succ = getIterator();
   ++Succ;
   assert(Succ->isBundledWithPred() && "Inconsistent bundle flags");
   Succ->clearFlag(BundledPred);
@@ -1139,7 +1166,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect(
 /// Return the number of instructions inside the MI bundle, not counting the
 /// header instruction.
 unsigned MachineInstr::getBundleSize() const {
-  MachineBasicBlock::const_instr_iterator I = this;
+  MachineBasicBlock::const_instr_iterator I = getIterator();
   unsigned Size = 0;
   while (I->isBundledWithSucc())
     ++Size, ++I;
@@ -1501,6 +1528,10 @@ bool MachineInstr::hasUnmodeledSideEffects() const {
   return false;
 }
 
+bool MachineInstr::isLoadFoldBarrier() const {
+  return mayStore() || isCall() || hasUnmodeledSideEffects();
+}
+
 /// allDefsAreDead - Return true if all the defs of this instruction are dead.
 ///
 bool MachineInstr::allDefsAreDead() const {
@@ -1615,7 +1646,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
     FirstOp = false;
   }
 
-
   for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
 
@@ -1706,13 +1736,16 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
   }
 
   bool HaveSemi = false;
-  const unsigned PrintableFlags = FrameSetup;
+  const unsigned PrintableFlags = FrameSetup | FrameDestroy;
   if (Flags & PrintableFlags) {
     if (!HaveSemi) OS << ";"; HaveSemi = true;
     OS << " flags: ";
 
     if (Flags & FrameSetup)
       OS << "FrameSetup";
+
+    if (Flags & FrameDestroy)
+      OS << "FrameDestroy";
   }
 
   if (!memoperands_empty()) {
@@ -1755,7 +1788,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       DebugLoc InlinedAtDL(InlinedAt);
       if (InlinedAtDL && MF) {
         OS << " inlined @[ ";
-	InlinedAtDL.print(OS);
+        InlinedAtDL.print(OS);
         OS << " ]";
       }
     }
@@ -1902,11 +1935,11 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) {
   }
 }
 
-void MachineInstr::addRegisterDefReadUndef(unsigned Reg) {
+void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
   for (MachineOperand &MO : operands()) {
     if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
       continue;
-    MO.setIsUndef();
+    MO.setIsUndef(IsUndef);
   }
 }
 
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index cd820ee1ac52..3eaf4c5dea0f 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -293,15 +293,17 @@ MachineOperandIteratorBase::PhysRegInfo
 MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
                                            const TargetRegisterInfo *TRI) {
   bool AllDefsDead = true;
-  PhysRegInfo PRI = {false, false, false, false, false, false};
+  PhysRegInfo PRI = {false, false, false, false, false, false, false};
 
   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
          "analyzePhysReg not given a physical register!");
   for (; isValid(); ++*this) {
     MachineOperand &MO = deref();
 
-    if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
-      PRI.Clobbers = true;    // Regmask clobbers Reg.
+    if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) {
+      PRI.Clobbered = true;
+      continue;
+    }
 
     if (!MO.isReg())
       continue;
@@ -310,33 +312,28 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
     if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
       continue;
 
-    bool IsRegOrSuperReg = MOReg == Reg || TRI->isSubRegister(MOReg, Reg);
-    bool IsRegOrOverlapping = MOReg == Reg || TRI->regsOverlap(MOReg, Reg);
-
-    if (IsRegOrSuperReg && MO.readsReg()) {
-      // Reg or a super-reg is read, and perhaps killed also.
-      PRI.Reads = true;
-      PRI.Kills = MO.isKill();
-    }
-
-    if (IsRegOrOverlapping && MO.readsReg()) {
-      PRI.ReadsOverlap = true;// Reg or an overlapping register is read.
-    }
-
-    if (!MO.isDef())
+    if (!TRI->regsOverlap(MOReg, Reg))
       continue;
 
-    if (IsRegOrSuperReg) {
-      PRI.Defines = true;     // Reg or a super-register is defined.
+    bool Covered = TRI->isSuperRegisterEq(MOReg, Reg);
+    if (MO.readsReg()) {
+      PRI.Read = true;
+      if (Covered) {
+        PRI.FullyRead = true;
+        if (MO.isKill())
+          PRI.Killed = true;
+      }
+    } else if (MO.isDef()) {
+      PRI.Defined = true;
+      if (Covered)
+        PRI.FullyDefined = true;
       if (!MO.isDead())
         AllDefsDead = false;
     }
-    if (IsRegOrOverlapping)
-      PRI.Clobbers = true;    // Reg or an overlapping reg is defined.
   }
 
-  if (AllDefsDead && PRI.Defines)
-    PRI.DefinesDead = true;   // Reg or super-register was defined and was dead.
+  if (AllDefsDead && PRI.FullyDefined)
+    PRI.DeadDef = true;
 
   return PRI;
 }
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index e9ea5ed9648c..a8368e9c80d6 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -138,7 +138,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineLoopInfo>();
       AU.addRequired<MachineDominatorTree>();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -153,7 +153,7 @@ namespace {
     }
 
   private:
-    /// CandidateInfo - Keep track of information about hoisting candidates.
+    /// Keep track of information about hoisting candidates.
     struct CandidateInfo {
       MachineInstr *MI;
       unsigned      Def;
@@ -162,149 +162,76 @@ namespace {
         : MI(mi), Def(def), FI(fi) {}
     };
 
-    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
-    /// invariants out to the preheader.
     void HoistRegionPostRA();
 
-    /// HoistPostRA - When an instruction is found to only use loop invariant
-    /// operands that is safe to hoist, this instruction is called to do the
-    /// dirty work.
     void HoistPostRA(MachineInstr *MI, unsigned Def);
 
-    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
-    /// gather register def and frame object update information.
-    void ProcessMI(MachineInstr *MI,
-                   BitVector &PhysRegDefs,
-                   BitVector &PhysRegClobbers,
-                   SmallSet<int, 32> &StoredFIs,
+    void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
+                   BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs,
                    SmallVectorImpl<CandidateInfo> &Candidates);
 
-    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
-    /// current loop.
     void AddToLiveIns(unsigned Reg);
 
-    /// IsLICMCandidate - Returns true if the instruction may be a suitable
-    /// candidate for LICM. e.g. If the instruction is a call, then it's
-    /// obviously not safe to hoist it.
     bool IsLICMCandidate(MachineInstr &I);
 
-    /// IsLoopInvariantInst - Returns true if the instruction is loop
-    /// invariant. I.e., all virtual register operands are defined outside of
-    /// the loop, physical registers aren't accessed (explicitly or implicitly),
-    /// and the instruction is hoistable.
-    ///
     bool IsLoopInvariantInst(MachineInstr &I);
 
-    /// HasLoopPHIUse - Return true if the specified instruction is used by any
-    /// phi node in the current loop.
     bool HasLoopPHIUse(const MachineInstr *MI) const;
 
-    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-    /// and an use in the current loop, return true if the target considered
-    /// it 'high'.
     bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
                                unsigned Reg) const;
 
     bool IsCheapInstruction(MachineInstr &MI) const;
 
-    /// CanCauseHighRegPressure - Visit BBs from header to current BB,
-    /// check if hoisting an instruction of the given cost matrix can cause high
-    /// register pressure.
     bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost,
                                  bool Cheap);
 
-    /// UpdateBackTraceRegPressure - Traverse the back trace from header to
-    /// the current block and update their register pressures to reflect the
-    /// effect of hoisting MI from the current block to the preheader.
     void UpdateBackTraceRegPressure(const MachineInstr *MI);
 
-    /// IsProfitableToHoist - Return true if it is potentially profitable to
-    /// hoist the given loop invariant.
     bool IsProfitableToHoist(MachineInstr &MI);
 
-    /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
-    /// If not then a load from this mbb may not be safe to hoist.
     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
 
     void EnterScope(MachineBasicBlock *MBB);
 
     void ExitScope(MachineBasicBlock *MBB);
 
-    /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
-    /// dominator tree node if its a leaf or all of its children are done. Walk
-    /// up the dominator tree to destroy ancestors which are now done.
-    void ExitScopeIfDone(MachineDomTreeNode *Node,
-                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
-                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    void ExitScopeIfDone(
+        MachineDomTreeNode *Node,
+        DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren,
+        DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap);
 
-    /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
-    /// blocks dominated by the specified header block, and that are in the
-    /// current loop) in depth first order w.r.t the DominatorTree. This allows
-    /// us to visit definitions before uses, allowing us to hoist a loop body in
-    /// one pass without iteration.
-    ///
     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
+
     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
 
-    /// SinkIntoLoop - Sink instructions into loops if profitable. This
-    /// especially tries to prevent register spills caused by register pressure
-    /// if there is little to no overhead moving instructions into loops.
     void SinkIntoLoop();
 
-    /// InitRegPressure - Find all virtual register references that are liveout
-    /// of the preheader to initialize the starting "register pressure". Note
-    /// this does not count live through (livein but not used) registers.
     void InitRegPressure(MachineBasicBlock *BB);
 
-    /// calcRegisterCost - Calculate the additional register pressure that the
-    /// registers used in MI cause.
-    ///
-    /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
-    /// figure out which usages are live-ins.
-    /// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
     DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI,
                                              bool ConsiderSeen,
                                              bool ConsiderUnseenAsDef);
 
-    /// UpdateRegPressure - Update estimate of register pressure after the
-    /// specified instruction.
     void UpdateRegPressure(const MachineInstr *MI,
                            bool ConsiderUnseenAsDef = false);
 
-    /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
-    /// the load itself could be hoisted. Return the unfolded and hoistable
-    /// load, or null if the load couldn't be unfolded or if it wouldn't
-    /// be hoistable.
     MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
 
-    /// LookForDuplicate - Find an instruction amount PrevMIs that is a
-    /// duplicate of MI. Return this instruction if it's found.
-    const MachineInstr *LookForDuplicate(const MachineInstr *MI,
-                                     std::vector<const MachineInstr*> &PrevMIs);
+    const MachineInstr *
+    LookForDuplicate(const MachineInstr *MI,
+                     std::vector<const MachineInstr *> &PrevMIs);
 
-    /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
-    /// the preheader that compute the same value. If it's found, do a RAU on
-    /// with the definition of the existing instruction rather than hoisting
-    /// the instruction to the preheader.
-    bool EliminateCSE(MachineInstr *MI,
-           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+    bool EliminateCSE(
+        MachineInstr *MI,
+        DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI);
 
-    /// MayCSE - Return true if the given instruction will be CSE'd if it's
-    /// hoisted out of the loop.
     bool MayCSE(MachineInstr *MI);
 
-    /// Hoist - When an instruction is found to only use loop invariant operands
-    /// that is safe to hoist, this instruction is called to do the dirty work.
-    /// It returns true if the instruction is hoisted.
     bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
 
-    /// InitCSEMap - Initialize the CSE map with instructions that are in the
-    /// current loop preheader that may become duplicates of instructions that
-    /// are hoisted out of the loop.
     void InitCSEMap(MachineBasicBlock *BB);
 
-    /// getCurPreheader - Get the preheader for the current loop, splitting
-    /// a critical edge if needed.
     MachineBasicBlock *getCurPreheader();
   };
 } // end anonymous namespace
@@ -315,12 +242,11 @@ INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
                 "Machine Loop Invariant Code Motion", false, false)
 
-/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
-/// loop that has a unique predecessor.
+/// Test if the given loop is the outer-most loop that has a unique predecessor.
 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
   // Check whether this loop even has a unique predecessor.
   if (!CurLoop->getLoopPredecessor())
@@ -367,7 +293,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   // Get our Loop information...
   MLI = &getAnalysis<MachineLoopInfo>();
   DT  = &getAnalysis<MachineDominatorTree>();
-  AA  = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
   while (!Worklist.empty()) {
@@ -402,9 +328,12 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-/// InstructionStoresToFI - Return true if instruction stores to the
-/// specified frame.
+/// Return true if instruction stores to the specified frame.
 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  // If we lost memory operands, conservatively assume that the instruction
+  // writes to all slots. 
+  if (MI->memoperands_empty())
+    return true;
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end(); o != oe; ++o) {
     if (!(*o)->isStore() || !(*o)->getPseudoValue())
@@ -418,7 +347,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
   return false;
 }
 
-/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// Examine the instruction for potentai LICM candidate. Also
 /// gather register def and frame object update information.
 void MachineLICM::ProcessMI(MachineInstr *MI,
                             BitVector &PhysRegDefs,
@@ -506,8 +435,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
   }
 }
 
-/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
-/// invariants out to the preheader.
+/// Walk the specified region of the CFG and hoist loop invariants out to the
+/// preheader.
 void MachineLICM::HoistRegionPostRA() {
   MachineBasicBlock *Preheader = getCurPreheader();
   if (!Preheader)
@@ -529,15 +458,13 @@ void MachineLICM::HoistRegionPostRA() {
     // If the header of the loop containing this basic block is a landing pad,
     // then don't try to hoist instructions out of this loop.
     const MachineLoop *ML = MLI->getLoopFor(BB);
-    if (ML && ML->getHeader()->isLandingPad()) continue;
+    if (ML && ML->getHeader()->isEHPad()) continue;
 
     // Conservatively treat live-in's as an external def.
     // FIXME: That means a reload that're reused in successor block(s) will not
     // be LICM'ed.
-    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
-           E = BB->livein_end(); I != E; ++I) {
-      unsigned Reg = *I;
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+    for (const auto &LI : BB->liveins()) {
+      for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI)
         PhysRegDefs.set(*AI);
     }
 
@@ -601,8 +528,8 @@ void MachineLICM::HoistRegionPostRA() {
   }
 }
 
-/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
-/// loop, and make sure it is not killed by any instructions in the loop.
+/// Add register 'Reg' to the livein sets of BBs in the current loop, and make
+/// sure it is not killed by any instructions in the loop.
 void MachineLICM::AddToLiveIns(unsigned Reg) {
   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
@@ -622,9 +549,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
   }
 }
 
-/// HoistPostRA - When an instruction is found to only use loop invariant
-/// operands that is safe to hoist, this instruction is called to do the
-/// dirty work.
+/// When an instruction is found to only use loop invariant operands that is
+/// safe to hoist, this instruction is called to do the dirty work.
 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
   MachineBasicBlock *Preheader = getCurPreheader();
 
@@ -646,8 +572,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
   Changed = true;
 }
 
-// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
-// If not then a load from this mbb may not be safe to hoist.
+/// Check if this mbb is guaranteed to execute. If not then a load from this mbb
+/// may not be safe to hoist.
 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   if (SpeculationState != SpeculateUnknown)
     return SpeculationState == SpeculateFalse;
@@ -679,9 +605,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
   BackTrace.pop_back();
 }
 
-/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
-/// dominator tree node if its a leaf or all of its children are done. Walk
-/// up the dominator tree to destroy ancestors which are now done.
+/// Destroy scope for the MBB that corresponds to the given dominator tree node
+/// if its a leaf or all of its children are done. Walk up the dominator tree to
+/// destroy ancestors which are now done.
 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
@@ -701,11 +627,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
   }
 }
 
-/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
-/// blocks dominated by the specified header block, and that are in the
-/// current loop) in depth first order w.r.t the DominatorTree. This allows
-/// us to visit definitions before uses, allowing us to hoist a loop body in
-/// one pass without iteration.
+/// Walk the specified loop in the CFG (defined by all blocks dominated by the
+/// specified header block, and that are in the current loop) in depth first
+/// order w.r.t the DominatorTree. This allows us to visit definitions before
+/// uses, allowing us to hoist a loop body in one pass without iteration.
 ///
 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   MachineBasicBlock *Preheader = getCurPreheader();
@@ -727,7 +652,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
     // If the header of the loop containing this basic block is a landing pad,
     // then don't try to hoist instructions out of this loop.
     const MachineLoop *ML = MLI->getLoopFor(BB);
-    if (ML && ML->getHeader()->isLandingPad())
+    if (ML && ML->getHeader()->isEHPad())
       continue;
 
     // If this subregion is not in the top level loop at all, exit.
@@ -786,6 +711,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
   }
 }
 
+/// Sink instructions into loops if profitable. This especially tries to prevent
+/// register spills caused by register pressure if there is little to no
+/// overhead moving instructions into loops.
 void MachineLICM::SinkIntoLoop() {
   MachineBasicBlock *Preheader = getCurPreheader();
   if (!Preheader)
@@ -796,8 +724,8 @@ void MachineLICM::SinkIntoLoop() {
        I != Preheader->instr_end(); ++I) {
     // We need to ensure that we can safely move this instruction into the loop.
     // As such, it must not have side-effects, e.g. such as a call has.  
-    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
-      Candidates.push_back(I);
+    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I))
+      Candidates.push_back(&*I);
   }
 
   for (MachineInstr *I : Candidates) {
@@ -837,9 +765,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
 }
 
-/// InitRegPressure - Find all virtual register references that are liveout of
-/// the preheader to initialize the starting "register pressure". Note this
-/// does not count live through (livein but not used) registers.
+/// Find all virtual register references that are liveout of the preheader to
+/// initialize the starting "register pressure". Note this does not count live
+/// through (livein but not used) registers.
 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
   std::fill(RegPressure.begin(), RegPressure.end(), 0);
 
@@ -858,8 +786,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
     UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true);
 }
 
-/// UpdateRegPressure - Update estimate of register pressure after the
-/// specified instruction.
+/// Update estimate of register pressure after the specified instruction.
 void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
                                     bool ConsiderUnseenAsDef) {
   auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef);
@@ -872,6 +799,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI,
   }
 }
 
+/// Calculate the additional register pressure that the registers used in MI
+/// cause.
+///
+/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to
+/// figure out which usages are live-ins.
+/// FIXME: Figure out a way to consider 'RegSeen' from all code paths.
 DenseMap<unsigned, int>
 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
                               bool ConsiderUnseenAsDef) {
@@ -915,23 +848,28 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
   return Cost;
 }
 
-/// isLoadFromGOTOrConstantPool - Return true if this machine instruction
-/// loads from global offset table or constant pool.
-static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
+/// Return true if this machine instruction loads from global offset table or
+/// constant pool.
+static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
   assert (MI.mayLoad() && "Expected MI that loads!");
+  
+  // If we lost memory operands, conservatively assume that the instruction
+  // reads from everything.. 
+  if (MI.memoperands_empty())
+    return true;
+
   for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
          E = MI.memoperands_end(); I != E; ++I) {
     if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
-      if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+      if (PSV->isGOT() || PSV->isConstantPool())
         return true;
     }
   }
   return false;
 }
 
-/// IsLICMCandidate - Returns true if the instruction may be a suitable
-/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
-/// not safe to hoist it.
+/// Returns true if the instruction may be a suitable candidate for LICM.
+/// e.g. If the instruction is a call, then it's obviously not safe to hoist it.
 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
   // Check if it's safe to move the instruction.
   bool DontMoveAcrossStore = true;
@@ -944,16 +882,16 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
   // from constant memory are not safe to speculate all the time, for example
   // indexed load from a jump table.
   // Stores and side effects are already checked by isSafeToMove.
-  if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
+  if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) &&
       !IsGuaranteedToExecute(I.getParent()))
     return false;
 
   return true;
 }
 
-/// IsLoopInvariantInst - Returns true if the instruction is loop
-/// invariant. I.e., all virtual register operands are defined outside of the
-/// loop, physical registers aren't accessed explicitly, and there are no side
+/// Returns true if the instruction is loop invariant.
+/// I.e., all virtual register operands are defined outside of the loop,
+/// physical registers aren't accessed explicitly, and there are no side
 /// effects that aren't captured by the operands or other flags.
 ///
 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
@@ -1007,8 +945,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
 }
 
 
-/// HasLoopPHIUse - Return true if the specified instruction is used by a
-/// phi node and hoisting it could cause a copy to be inserted.
+/// Return true if the specified instruction is used by a phi node and hoisting
+/// it could cause a copy to be inserted.
 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
   SmallVector<const MachineInstr*, 8> Work(1, MI);
   do {
@@ -1042,9 +980,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
   return false;
 }
 
-/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
-/// and an use in the current loop, return true if the target considered
-/// it 'high'.
+/// Compute operand latency between a def of 'Reg' and an use in the current
+/// loop, return true if the target considered it high.
 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
                                         unsigned DefIdx, unsigned Reg) const {
   if (MRI->use_nodbg_empty(Reg))
@@ -1074,8 +1011,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
   return false;
 }
 
-/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
-/// the operand latency between its def and a use is one or less.
+/// Return true if the instruction is marked "cheap" or the operand latency
+/// between its def and a use is one or less.
 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
   if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike())
     return true;
@@ -1099,9 +1036,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
   return isCheap;
 }
 
-/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
-/// if hoisting an instruction of the given cost matrix can cause high
-/// register pressure.
+/// Visit BBs from header to current BB, check if hoisting an instruction of the
+/// given cost matrix can cause high register pressure.
 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
                                           bool CheapInstr) {
   for (const auto &RPIdAndCost : Cost) {
@@ -1124,9 +1060,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost,
   return false;
 }
 
-/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
-/// current block and update their register pressures to reflect the effect
-/// of hoisting MI from the current block to the preheader.
+/// Traverse the back trace from header to the current block and update their
+/// register pressures to reflect the effect of hoisting MI from the current
+/// block to the preheader.
 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
   // First compute the 'cost' of the instruction, i.e. its contribution
   // to register pressure.
@@ -1139,8 +1075,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
       RP[RPIdAndCost.first] += RPIdAndCost.second;
 }
 
-/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
-/// the given loop invariant.
+/// Return true if it is potentially profitable to hoist the given loop
+/// invariant.
 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   if (MI.isImplicitDef())
     return true;
@@ -1230,6 +1166,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   return true;
 }
 
+/// Unfold a load from the given machineinstr if the load itself could be
+/// hoisted. Return the unfolded and hoistable load, or null if the load
+/// couldn't be unfolded or if it wouldn't be hoistable.
 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // Don't unfold simple loads.
   if (MI->canFoldAsLoad())
@@ -1287,6 +1226,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   return NewMIs[0];
 }
 
+/// Initialize the CSE map with instructions that are in the current loop
+/// preheader that may become duplicates of instructions that are hoisted
+/// out of the loop.
 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
     const MachineInstr *MI = &*I;
@@ -1295,6 +1237,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   }
 }
 
+/// Find an instruction amount PrevMIs that is a duplicate of MI.
+/// Return this instruction if it's found.
 const MachineInstr*
 MachineLICM::LookForDuplicate(const MachineInstr *MI,
                               std::vector<const MachineInstr*> &PrevMIs) {
@@ -1306,6 +1250,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
   return nullptr;
 }
 
+/// Given a LICM'ed instruction, look for an instruction on the preheader that
+/// computes the same value. If it's found, do a RAU on with the definition of
+/// the existing instruction rather than hoisting the instruction to the
+/// preheader.
 bool MachineLICM::EliminateCSE(MachineInstr *MI,
           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
@@ -1363,8 +1311,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
   return false;
 }
 
-/// MayCSE - Return true if the given instruction will be CSE'd if it's
-/// hoisted out of the loop.
+/// Return true if the given instruction will be CSE'd if it's hoisted out of
+/// the loop.
 bool MachineLICM::MayCSE(MachineInstr *MI) {
   unsigned Opcode = MI->getOpcode();
   DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
@@ -1377,9 +1325,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {
   return LookForDuplicate(MI, CI->second) != nullptr;
 }
 
-/// Hoist - When an instruction is found to use only loop invariant operands
+/// When an instruction is found to use only loop invariant operands
 /// that are safe to hoist, this instruction is called to do the dirty work.
-///
+/// It returns true if the instruction is hoisted.
 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   // First check whether we should hoist this instruction.
   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
@@ -1441,6 +1389,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   return true;
 }
 
+/// Get the preheader for the current loop, splitting a critical edge if needed.
 MachineBasicBlock *MachineLICM::getCurPreheader() {
   // Determine the block to which to hoist instructions. If we can't find a
   // suitable loop predecessor, we can't do any hoisting.
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index ce6abdd870b3..2f5c9e05cc7b 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -37,7 +37,7 @@ char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
 
 bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
   releaseMemory();
-  LI.Analyze(getAnalysis<MachineDominatorTree>().getBase());
+  LI.analyze(getAnalysis<MachineDominatorTree>().getBase());
   return false;
 }
 
@@ -51,11 +51,11 @@ MachineBasicBlock *MachineLoop::getTopBlock() {
   MachineBasicBlock *TopMBB = getHeader();
   MachineFunction::iterator Begin = TopMBB->getParent()->begin();
   if (TopMBB != Begin) {
-    MachineBasicBlock *PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+    MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator());
     while (contains(PriorMBB)) {
       TopMBB = PriorMBB;
       if (TopMBB == Begin) break;
-      PriorMBB = std::prev(MachineFunction::iterator(TopMBB));
+      PriorMBB = &*std::prev(TopMBB->getIterator());
     }
   }
   return TopMBB;
@@ -65,11 +65,12 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   MachineBasicBlock *BotMBB = getHeader();
   MachineFunction::iterator End = BotMBB->getParent()->end();
   if (BotMBB != std::prev(End)) {
-    MachineBasicBlock *NextMBB = std::next(MachineFunction::iterator(BotMBB));
+    MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator());
     while (contains(NextMBB)) {
       BotMBB = NextMBB;
-      if (BotMBB == std::next(MachineFunction::iterator(BotMBB))) break;
-      NextMBB = std::next(MachineFunction::iterator(BotMBB));
+      if (BotMBB == &*std::next(BotMBB->getIterator()))
+        break;
+      NextMBB = &*std::next(BotMBB->getIterator());
     }
   }
   return BotMBB;
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6a206249d834..1956a701d8e6 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,12 +9,12 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -35,7 +35,7 @@ char MachineModuleInfo::ID = 0;
 MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
 
 namespace llvm {
-class MMIAddrLabelMapCallbackPtr : CallbackVH {
+class MMIAddrLabelMapCallbackPtr final : CallbackVH {
   MMIAddrLabelMap *Map;
 public:
   MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
@@ -209,9 +209,8 @@ bool MachineModuleInfo::doInitialization(Module &M) {
   CurCallSite = 0;
   CallsEHReturn = false;
   CallsUnwindInit = false;
+  HasEHFunclets = false;
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
-  // Always emit some info, by default "no personality" info.
-  Personalities.push_back(nullptr);
   PersonalityTypeCache = EHPersonality::Unknown;
   AddrLabelSymbols = nullptr;
   TheModule = nullptr;
@@ -249,6 +248,7 @@ void MachineModuleInfo::EndFunction() {
   FilterEnds.clear();
   CallsEHReturn = false;
   CallsUnwindInit = false;
+  HasEHFunclets = false;
   VariableDbgInfos.clear();
 }
 
@@ -314,32 +314,11 @@ MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
   return LandingPadLabel;
 }
 
-/// addPersonality - Provide the personality function for the exception
-/// information.
-void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
-                                       const Function *Personality) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.Personality = Personality;
-  addPersonality(Personality);
-}
-
 void MachineModuleInfo::addPersonality(const Function *Personality) {
   for (unsigned i = 0; i < Personalities.size(); ++i)
     if (Personalities[i] == Personality)
       return;
-
-  // If this is the first personality we're adding go
-  // ahead and add it at the beginning.
-  if (!Personalities[0])
-    Personalities[0] = Personality;
-  else
-    Personalities.push_back(Personality);
-}
-
-void MachineModuleInfo::addWinEHState(MachineBasicBlock *LandingPad,
-                                      int State) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.WinEHState = State;
+  Personalities.push_back(Personality);
 }
 
 /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
@@ -481,56 +460,3 @@ try_next:;
   FilterIds.push_back(0); // terminator
   return FilterID;
 }
-
-/// getPersonality - Return the personality function for the current function.
-const Function *MachineModuleInfo::getPersonality() const {
-  for (const LandingPadInfo &LPI : LandingPads)
-    if (LPI.Personality)
-      return LPI.Personality;
-  return nullptr;
-}
-
-EHPersonality MachineModuleInfo::getPersonalityType() {
-  if (PersonalityTypeCache == EHPersonality::Unknown) {
-    if (const Function *F = getPersonality())
-      PersonalityTypeCache = classifyEHPersonality(F);
-  }
-  return PersonalityTypeCache;
-}
-
-/// getPersonalityIndex - Return unique index for current personality
-/// function. NULL/first personality function should always get zero index.
-unsigned MachineModuleInfo::getPersonalityIndex() const {
-  const Function* Personality = nullptr;
-
-  // Scan landing pads. If there is at least one non-NULL personality - use it.
-  for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
-    if (LandingPads[i].Personality) {
-      Personality = LandingPads[i].Personality;
-      break;
-    }
-
-  for (unsigned i = 0, e = Personalities.size(); i < e; ++i) {
-    if (Personalities[i] == Personality)
-      return i;
-  }
-
-  // This will happen if the current personality function is
-  // in the zero index.
-  return 0;
-}
-
-const Function *MachineModuleInfo::getWinEHParent(const Function *F) const {
-  StringRef WinEHParentName =
-      F->getFnAttribute("wineh-parent").getValueAsString();
-  if (WinEHParentName.empty() || WinEHParentName == F->getName())
-    return F;
-  return F->getParent()->getFunction(WinEHParentName);
-}
-
-WinEHFuncInfo &MachineModuleInfo::getWinEHFuncInfo(const Function *F) {
-  auto &Ptr = FuncInfoMap[getWinEHParent(F)];
-  if (!Ptr)
-    Ptr.reset(new WinEHFuncInfo);
-  return *Ptr;
-}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index e883ce523134..03c82f46da63 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -27,13 +27,11 @@ void MachineRegisterInfo::Delegate::anchor() {}
 MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF)
   : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true),
     TracksSubRegLiveness(false) {
+  unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
-  UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs());
-
-  // Create the physreg use/def lists.
-  PhysRegUseDefLists.resize(getTargetRegisterInfo()->getNumRegs(), nullptr);
+  UsedPhysRegMask.resize(NumRegs);
+  PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]());
 }
 
 /// setRegClass - Set the register class of the specified virtual register.
@@ -117,6 +115,8 @@ void MachineRegisterInfo::clearVirtRegs() {
   }
 #endif
   VRegInfo.clear();
+  for (auto &I : LiveIns)
+    I.second = 0;
 }
 
 void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
@@ -394,8 +394,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
     }
 }
 
-unsigned MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const
-{
+LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
   // Lane masks are only defined for vregs.
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   const TargetRegisterClass &TRC = *getRegClass(Reg);
@@ -468,11 +467,8 @@ static bool isNoReturnDef(const MachineOperand &MO) {
   if (MF.getFunction()->hasFnAttribute(Attribute::UWTable))
     return false;
   const Function *Called = getCalledFunction(MI);
-  if (Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn)
-      || !Called->hasFnAttribute(Attribute::NoUnwind))
-    return false;
-
-  return true;
+  return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) ||
+           !Called->hasFnAttribute(Attribute::NoUnwind));
 }
 
 bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
@@ -488,3 +484,15 @@ bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const {
   }
   return false;
 }
+
+bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const {
+  if (UsedPhysRegMask.test(PhysReg))
+    return true;
+  const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+  for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid();
+       ++AliasReg) {
+    if (!reg_nodbg_empty(*AliasReg))
+      return true;
+  }
+  return false;
+}
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index a48e54caf3fe..bcee15c7c75f 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -49,6 +49,11 @@ DumpCriticalPathLength("misched-dcpl", cl::Hidden,
 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
   cl::desc("Pop up a window to show MISched dags after they are processed"));
 
+/// In some situations a few uninteresting nodes depend on nearly all other
+/// nodes in the graph, provide a cutoff to hide them.
+static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
+  cl::desc("Hide nodes with more predecessor/successor than cutoff"));
+
 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
 
@@ -106,7 +111,7 @@ public:
   void print(raw_ostream &O, const Module* = nullptr) const override;
 
 protected:
-  void scheduleRegions(ScheduleDAGInstrs &Scheduler);
+  void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
 };
 
 /// MachineScheduler runs after coalescing and before register allocation.
@@ -146,7 +151,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
 
 INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler",
                       "Machine Instruction Scheduler", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler",
@@ -161,7 +166,7 @@ void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequiredID(MachineDominatorsID);
   AU.addRequired<MachineLoopInfo>();
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<TargetPassConfig>();
   AU.addRequired<SlotIndexes>();
   AU.addPreserved<SlotIndexes>();
@@ -315,14 +320,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   } else if (!mf.getSubtarget().enableMachineScheduler())
     return false;
 
-  DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
+  DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
 
   // Initialize the context of the pass.
   MF = &mf;
   MLI = &getAnalysis<MachineLoopInfo>();
   MDT = &getAnalysis<MachineDominatorTree>();
   PassConfig = &getAnalysis<TargetPassConfig>();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   LIS = &getAnalysis<LiveIntervals>();
 
@@ -335,7 +340,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
-  scheduleRegions(*Scheduler);
+  scheduleRegions(*Scheduler, false);
 
   DEBUG(LIS->dump());
   if (VerifyScheduling)
@@ -363,7 +368,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
-  scheduleRegions(*Scheduler);
+  scheduleRegions(*Scheduler, true);
 
   if (VerifyScheduling)
     MF->verify(this, "After post machine scheduling.");
@@ -383,15 +388,14 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
                             MachineBasicBlock *MBB,
                             MachineFunction *MF,
-                            const TargetInstrInfo *TII,
-                            bool IsPostRA) {
+                            const TargetInstrInfo *TII) {
   return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF);
 }
 
 /// Main driver for both MachineScheduler and PostMachineScheduler.
-void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
+void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
+                                           bool FixKillFlags) {
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-  bool IsPostRA = Scheduler.isPostRA();
 
   // Visit all machine basic blocks.
   //
@@ -400,7 +404,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
        MBB != MBBEnd; ++MBB) {
 
-    Scheduler.startBlock(MBB);
+    Scheduler.startBlock(&*MBB);
 
 #ifndef NDEBUG
     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
@@ -429,7 +433,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
 
       // Avoid decrementing RegionEnd for blocks with no terminator.
       if (RegionEnd != MBB->end() ||
-          isSchedBoundary(std::prev(RegionEnd), MBB, MF, TII, IsPostRA)) {
+          isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
         --RegionEnd;
         // Count the boundary instruction.
         --RemainingInstrs;
@@ -440,14 +444,14 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
       unsigned NumRegionInstrs = 0;
       MachineBasicBlock::iterator I = RegionEnd;
       for(;I != MBB->begin(); --I, --RemainingInstrs) {
-        if (isSchedBoundary(std::prev(I), MBB, MF, TII, IsPostRA))
+        if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
           break;
         if (!I->isDebugValue())
           ++NumRegionInstrs;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
       // it. Perhaps it still needs to be bundled.
-      Scheduler.enterRegion(MBB, I, RegionEnd, NumRegionInstrs);
+      Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
 
       // Skip empty scheduling regions (0 or 1 schedulable instructions).
       if (I == RegionEnd || I == std::prev(RegionEnd)) {
@@ -456,8 +460,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
         Scheduler.exitRegion();
         continue;
       }
-      DEBUG(dbgs() << "********** " << ((Scheduler.isPostRA()) ? "PostRA " : "")
-            << "MI Scheduling **********\n");
+      DEBUG(dbgs() << "********** MI Scheduling **********\n");
       DEBUG(dbgs() << MF->getName()
             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
             << "\n  From: " << *I << "    To: ";
@@ -484,11 +487,11 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler) {
     }
     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
     Scheduler.finishBlock();
-    if (Scheduler.isPostRA()) {
-      // FIXME: Ideally, no further passes should rely on kill flags. However,
-      // thumb2 size reduction is currently an exception.
-      Scheduler.fixupKills(MBB);
-    }
+    // FIXME: Ideally, no further passes should rely on kill flags. However,
+    // thumb2 size reduction is currently an exception, so the PostMIScheduler
+    // needs to do this.
+    if (FixKillFlags)
+        Scheduler.fixupKills(&*MBB);
   }
   Scheduler.finalizeSchedule();
 }
@@ -499,7 +502,7 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
 
 LLVM_DUMP_METHOD
 void ReadyQueue::dump() {
-  dbgs() << Name << ": ";
+  dbgs() << "Queue " << Name << ": ";
   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
     dbgs() << Queue[i]->NodeNum << " ";
   dbgs() << "\n";
@@ -660,6 +663,9 @@ bool ScheduleDAGMI::checkSchedLimit() {
 /// does not consider liveness or register pressure. It is useful for PostRA
 /// scheduling and potentially other custom schedulers.
 void ScheduleDAGMI::schedule() {
+  DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
+  DEBUG(SchedImpl->dumpPolicy());
+
   // Build the DAG.
   buildSchedGraph(AA);
 
@@ -682,7 +688,11 @@ void ScheduleDAGMI::schedule() {
   initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+  while (true) {
+    DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
     assert(!SU->isScheduled && "Node already scheduled");
     if (!checkSchedLimit())
       break;
@@ -900,6 +910,13 @@ void ScheduleDAGMILive::initRegPressure() {
     updatePressureDiffs(LiveUses);
   }
 
+  DEBUG(
+    dbgs() << "Top Pressure:\n";
+    dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+    dbgs() << "Bottom Pressure:\n";
+    dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+  );
+
   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
 
   // Cache the list of excess pressure sets in this region. This will also track
@@ -976,18 +993,24 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
     }
     // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
     assert(VNI && "No live value at use.");
-    for (VReg2UseMap::iterator
-           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
-      SUnit *SU = UI->SU;
-      DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
-            << *SU->getInstr());
+    for (const VReg2SUnit &V2SU
+         : make_range(VRegUses.find(Reg), VRegUses.end())) {
+      SUnit *SU = V2SU.SU;
       // If this use comes before the reaching def, it cannot be a last use, so
       // descrease its pressure change.
       if (!SU->isScheduled && SU != &ExitSU) {
         LiveQueryResult LRQ
           = LI.Query(LIS->getInstructionIndex(SU->getInstr()));
-        if (LRQ.valueIn() == VNI)
-          getPressureDiff(SU).addPressureChange(Reg, true, &MRI);
+        if (LRQ.valueIn() == VNI) {
+          PressureDiff &PDiff = getPressureDiff(SU);
+          PDiff.addPressureChange(Reg, true, &MRI);
+          DEBUG(
+            dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
+                   << *SU->getInstr();
+            dbgs() << "              to ";
+            PDiff.dump(*TRI);
+          );
+        }
       }
     }
   }
@@ -998,12 +1021,14 @@ void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) {
 /// only includes instructions that have DAG nodes, not scheduling boundaries.
 ///
 /// This is a skeletal driver, with all the functionality pushed into helpers,
-/// so that it can be easilly extended by experimental schedulers. Generally,
+/// so that it can be easily extended by experimental schedulers. Generally,
 /// implementing MachineSchedStrategy should be sufficient to implement a new
 /// scheduling algorithm. However, if a scheduler further subclasses
 /// ScheduleDAGMILive then it will want to override this virtual method in order
 /// to update any specialized state.
 void ScheduleDAGMILive::schedule() {
+  DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
+  DEBUG(SchedImpl->dumpPolicy());
   buildDAGWithRegPressure();
 
   Topo.InitDAGTopologicalSorting();
@@ -1017,8 +1042,16 @@ void ScheduleDAGMILive::schedule() {
   // This may initialize a DFSResult to be used for queue priority.
   SchedImpl->initialize(this);
 
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
+  DEBUG(
+    for (const SUnit &SU : SUnits) {
+      SU.dumpAll(this);
+      if (ShouldTrackPressure) {
+        dbgs() << "  Pressure Diff      : ";
+        getPressureDiff(&SU).dump(*TRI);
+      }
+      dbgs() << '\n';
+    }
+  );
   if (ViewMISchedDAGs) viewGraph();
 
   // Initialize ready queues now that the DAG and priority data are finalized.
@@ -1030,7 +1063,11 @@ void ScheduleDAGMILive::schedule() {
   }
 
   bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+  while (true) {
+    DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
     assert(!SU->isScheduled && "Node already scheduled");
     if (!checkSchedLimit())
       break;
@@ -1149,14 +1186,15 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
     unsigned LiveOutHeight = DefSU->getHeight();
     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
     // Visit all local users of the vreg def.
-    for (VReg2UseMap::iterator
-           UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) {
-      if (UI->SU == &ExitSU)
+    for (const VReg2SUnit &V2SU
+         : make_range(VRegUses.find(Reg), VRegUses.end())) {
+      SUnit *SU = V2SU.SU;
+      if (SU == &ExitSU)
         continue;
 
       // Only consider uses of the phi.
       LiveQueryResult LRQ =
-        LI.Query(LIS->getInstructionIndex(UI->SU->getInstr()));
+        LI.Query(LIS->getInstructionIndex(SU->getInstr()));
       if (!LRQ.valueIn()->isPHIDef())
         continue;
 
@@ -1164,10 +1202,10 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
       // overestimate in strange cases. This allows cyclic latency to be
       // estimated as the minimum slack of the vreg's depth or height.
       unsigned CyclicLatency = 0;
-      if (LiveOutDepth > UI->SU->getDepth())
-        CyclicLatency = LiveOutDepth - UI->SU->getDepth();
+      if (LiveOutDepth > SU->getDepth())
+        CyclicLatency = LiveOutDepth - SU->getDepth();
 
-      unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency;
+      unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
       if (LiveInHeight > LiveOutHeight) {
         if (LiveInHeight - LiveOutHeight < CyclicLatency)
           CyclicLatency = LiveInHeight - LiveOutHeight;
@@ -1176,7 +1214,7 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
         CyclicLatency = 0;
 
       DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
-            << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n");
+            << SU->NodeNum << ") = " << CyclicLatency << "c\n");
       if (CyclicLatency > MaxCyclicLatency)
         MaxCyclicLatency = CyclicLatency;
     }
@@ -1203,6 +1241,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
       // Update top scheduled pressure.
       TopRPTracker.advance();
       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
+      DEBUG(
+        dbgs() << "Top Pressure:\n";
+        dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
+      );
+
       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
     }
   }
@@ -1225,6 +1268,11 @@ void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
       SmallVector<unsigned, 8> LiveUses;
       BotRPTracker.recede(&LiveUses);
       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
+      DEBUG(
+        dbgs() << "Bottom Pressure:\n";
+        dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI);
+      );
+
       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
       updatePressureDiffs(LiveUses);
     }
@@ -1349,25 +1397,49 @@ namespace {
 /// \brief Post-process the DAG to create cluster edges between instructions
 /// that may be fused by the processor into a single operation.
 class MacroFusion : public ScheduleDAGMutation {
-  const TargetInstrInfo *TII;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
 public:
-  MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
+  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
+    : TII(TII), TRI(TRI) {}
 
   void apply(ScheduleDAGMI *DAG) override;
 };
 } // anonymous
 
+/// Returns true if \p MI reads a register written by \p Other.
+static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
+                       const MachineInstr &Other) {
+  for (const MachineOperand &MO : MI.uses()) {
+    if (!MO.isReg() || !MO.readsReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Other.modifiesRegister(Reg, &TRI))
+      return true;
+  }
+  return false;
+}
+
 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 /// fused operations.
 void MacroFusion::apply(ScheduleDAGMI *DAG) {
   // For now, assume targets can only fuse with the branch.
-  MachineInstr *Branch = DAG->ExitSU.getInstr();
+  SUnit &ExitSU = DAG->ExitSU;
+  MachineInstr *Branch = ExitSU.getInstr();
   if (!Branch)
     return;
 
-  for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
-    SUnit *SU = &DAG->SUnits[--Idx];
-    if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
+  for (SUnit &SU : DAG->SUnits) {
+    // SUnits with successors can't be schedule in front of the ExitSU.
+    if (!SU.Succs.empty())
+      continue;
+    // We only care if the node writes to a register that the branch reads.
+    MachineInstr *Pred = SU.getInstr();
+    if (!HasDataDep(TRI, *Branch, *Pred))
+      continue;
+
+    if (!TII.shouldScheduleAdjacent(Pred, Branch))
       continue;
 
     // Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1376,11 +1448,11 @@ void MacroFusion::apply(ScheduleDAGMI *DAG) {
     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
     // of SU, we could create an artificial edge from the deepest root, but it
     // hasn't been needed yet.
-    bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
+    bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster));
     (void)Success;
     assert(Success && "No DAG nodes should be reachable from ExitSU");
 
-    DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
+    DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
     break;
   }
 }
@@ -2277,7 +2349,7 @@ void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
     Latency = Cand.SU->getDepth();
     break;
   }
-  dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+  dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
   if (P.isValid())
     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
            << ":" << P.getUnitInc() << " ";
@@ -2438,6 +2510,14 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
   }
 }
 
+void GenericScheduler::dumpPolicy() {
+  dbgs() << "GenericScheduler RegionPolicy: "
+         << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
+         << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
+         << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
+         << "\n";
+}
+
 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
 /// critical path by more cycles than it takes to drain the instruction buffer.
 /// We estimate an upper bounds on in-flight instructions as:
@@ -2499,11 +2579,13 @@ static bool tryPressure(const PressureChange &TryP,
                         const PressureChange &CandP,
                         GenericSchedulerBase::SchedCandidate &TryCand,
                         GenericSchedulerBase::SchedCandidate &Cand,
-                        GenericSchedulerBase::CandReason Reason) {
-  int TryRank = TryP.getPSetOrMax();
-  int CandRank = CandP.getPSetOrMax();
+                        GenericSchedulerBase::CandReason Reason,
+                        const TargetRegisterInfo *TRI,
+                        const MachineFunction &MF) {
+  unsigned TryPSet = TryP.getPSetOrMax();
+  unsigned CandPSet = CandP.getPSetOrMax();
   // If both candidates affect the same set, go with the smallest increase.
-  if (TryRank == CandRank) {
+  if (TryPSet == CandPSet) {
     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
                    Reason);
   }
@@ -2513,6 +2595,13 @@ static bool tryPressure(const PressureChange &TryP,
                  Reason)) {
     return true;
   }
+
+  int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
+                                 std::numeric_limits<int>::max();
+
+  int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
+                                   std::numeric_limits<int>::max();
+
   // If the candidates are decreasing pressure, reverse priority.
   if (TryP.getUnitInc() < 0)
     std::swap(TryRank, CandRank);
@@ -2597,7 +2686,7 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
     }
   }
   DEBUG(if (TryCand.RPDelta.Excess.isValid())
-          dbgs() << "  SU(" << TryCand.SU->NodeNum << ") "
+          dbgs() << "  Try  SU(" << TryCand.SU->NodeNum << ") "
                  << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet())
                  << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n");
 
@@ -2615,13 +2704,15 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
   // Avoid exceeding the target's limit.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
                                                Cand.RPDelta.Excess,
-                                               TryCand, Cand, RegExcess))
+                                               TryCand, Cand, RegExcess, TRI,
+                                               DAG->MF))
     return;
 
   // Avoid increasing the max critical pressure in the scheduled region.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
                                                Cand.RPDelta.CriticalMax,
-                                               TryCand, Cand, RegCritical))
+                                               TryCand, Cand, RegCritical, TRI,
+                                               DAG->MF))
     return;
 
   // For loops that are acyclic path limited, aggressively schedule for latency.
@@ -2657,7 +2748,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
   // Avoid increasing the max pressure of the entire region.
   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
                                                Cand.RPDelta.CurrentMax,
-                                               TryCand, Cand, RegMax))
+                                               TryCand, Cand, RegMax, TRI,
+                                               DAG->MF))
     return;
 
   // Avoid critical resource consumption and balance the schedule.
@@ -2672,8 +2764,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
 
   // Avoid serializing long latency dependence chains.
   // For acyclic path limited loops, latency was already checked above.
-  if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited
-      && tryLatency(TryCand, Cand, Zone)) {
+  if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency &&
+      !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) {
     return;
   }
 
@@ -2727,12 +2819,12 @@ SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
   // efficient, but also provides the best heuristics for CriticalPSets.
   if (SUnit *SU = Bot.pickOnlyChoice()) {
     IsTopNode = false;
-    DEBUG(dbgs() << "Pick Bot NOCAND\n");
+    DEBUG(dbgs() << "Pick Bot ONLY1\n");
     return SU;
   }
   if (SUnit *SU = Top.pickOnlyChoice()) {
     IsTopNode = true;
-    DEBUG(dbgs() << "Pick Top NOCAND\n");
+    DEBUG(dbgs() << "Pick Top ONLY1\n");
     return SU;
   }
   CandPolicy NoPolicy;
@@ -2887,7 +2979,7 @@ static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
   if (EnableLoadCluster && DAG->TII->enableClusterLoads())
     DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
   if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
+    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
   return DAG;
 }
 
@@ -3254,12 +3346,10 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
   }
 
   static bool isNodeHidden(const SUnit *Node) {
-    return (Node->Preds.size() > 10 || Node->Succs.size() > 10);
-  }
-
-  static bool hasNodeAddressLabel(const SUnit *Node,
-                                  const ScheduleDAG *Graph) {
-    return false;
+    if (ViewMISchedCutoff == 0)
+      return false;
+    return (Node->Preds.size() > ViewMISchedCutoff
+         || Node->Succs.size() > ViewMISchedCutoff);
   }
 
   /// If you want to override the dot attributes printed for a particular
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1b9be50068a9..5e6d6190c638 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -87,7 +87,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachinePostDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
@@ -150,7 +150,7 @@ INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
 
@@ -268,7 +268,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   PDT = &getAnalysis<MachinePostDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   bool EverMadeChange = false;
 
@@ -667,7 +667,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
 
   // It's not safe to sink instructions to EH landing pad. Control flow into
   // landing pad is implicitly defined.
-  if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
+  if (SuccToSinkTo && SuccToSinkTo->isEHPad())
     return nullptr;
 
   return SuccToSinkTo;
@@ -686,7 +686,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore,
   if (!MI->isSafeToMove(AA, SawStore))
     return false;
 
-  // Convergent operations may only be moved to control equivalent locations.
+  // Convergent operations may not be made control-dependent on additional
+  // values.
   if (MI->isConvergent())
     return false;
 
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index d9a6b68462eb..f7edacd5ebaf 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
 
   // Update RegUnits to reflect live registers after UseMI.
   // First kills.
-  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
-    for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units)
+  for (unsigned Kill : Kills)
+    for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
       RegUnits.erase(*Units);
 
   // Second, live defs.
-  for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) {
-    unsigned DefOp = LiveDefOps[i];
+  for (unsigned DefOp : LiveDefOps) {
     for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI);
          Units.isValid(); ++Units) {
       LiveRegUnit &LRU = RegUnits[*Units];
@@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
   assert(TBI.HasValidInstrDepths && "Missing depth info");
   assert(TBI.HasValidInstrHeights && "Missing height info");
   unsigned MaxLen = 0;
-  for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) {
-    const LiveInReg &LIR = TBI.LiveIns[i];
+  for (const LiveInReg &LIR : TBI.LiveIns) {
     if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
       continue;
     const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index ca35ec5fdcf8..cdcd8eb4fbdf 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/LiveVariables.h"
@@ -42,7 +43,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -204,18 +204,19 @@ namespace {
     void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
     void visitMachineFunctionAfter();
 
+    template <typename T> void report(const char *msg, ilist_iterator<T> I) {
+      report(msg, &*I);
+    }
     void report(const char *msg, const MachineFunction *MF);
     void report(const char *msg, const MachineBasicBlock *MBB);
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
-    void report(const char *msg, const MachineFunction *MF,
-                const LiveInterval &LI);
-    void report(const char *msg, const MachineBasicBlock *MBB,
-                const LiveInterval &LI);
-    void report(const char *msg, const MachineFunction *MF,
-                const LiveRange &LR, unsigned Reg, unsigned LaneMask);
-    void report(const char *msg, const MachineBasicBlock *MBB,
-                const LiveRange &LR, unsigned Reg, unsigned LaneMask);
+
+    void report_context(const LiveInterval &LI) const;
+    void report_context(const LiveRange &LR, unsigned Reg,
+                        LaneBitmask LaneMask) const;
+    void report_context(const LiveRange::Segment &S) const;
+    void report_context(const VNInfo &VNI) const;
 
     void verifyInlineAsm(const MachineInstr *MI);
 
@@ -233,9 +234,11 @@ namespace {
     void verifyLiveRangeSegment(const LiveRange&,
                                 const LiveRange::const_iterator I, unsigned,
                                 unsigned);
-    void verifyLiveRange(const LiveRange&, unsigned, unsigned LaneMask = 0);
+    void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
 
     void verifyStackFrame();
+
+    void verifySlotIndexes() const;
   };
 
   struct MachineVerifierPass : public MachineFunctionPass {
@@ -273,6 +276,19 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
     .runOnMachineFunction(const_cast<MachineFunction&>(*this));
 }
 
+void MachineVerifier::verifySlotIndexes() const {
+  if (Indexes == nullptr)
+    return;
+
+  // Ensure the IdxMBB list is sorted by slot indexes.
+  SlotIndex Last;
+  for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(),
+       E = Indexes->MBBIndexEnd(); I != E; ++I) {
+    assert(!Last.isValid() || I->first > Last);
+    Last = I->first;
+  }
+}
+
 bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
   foundErrors = 0;
 
@@ -295,10 +311,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
     Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
   }
 
+  verifySlotIndexes();
+
   visitMachineFunctionBefore();
   for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
        MFI!=MFE; ++MFI) {
-    visitMachineBasicBlockBefore(MFI);
+    visitMachineBasicBlockBefore(&*MFI);
     // Keep track of the current bundle header.
     const MachineInstr *CurBundle = nullptr;
     // Do we expect the next instruction to be part of the same bundle?
@@ -306,7 +324,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
     for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
-      if (MBBI->getParent() != MFI) {
+      if (MBBI->getParent() != &*MFI) {
         report("Bad instruction parent pointer", MFI);
         errs() << "Instruction: " << *MBBI;
         continue;
@@ -315,20 +333,22 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       // Check for consistent bundle flags.
       if (InBundle && !MBBI->isBundledWithPred())
         report("Missing BundledPred flag, "
-               "BundledSucc was set on predecessor", MBBI);
+               "BundledSucc was set on predecessor",
+               &*MBBI);
       if (!InBundle && MBBI->isBundledWithPred())
         report("BundledPred flag is set, "
-               "but BundledSucc not set on predecessor", MBBI);
+               "but BundledSucc not set on predecessor",
+               &*MBBI);
 
       // Is this a bundle header?
       if (!MBBI->isInsideBundle()) {
         if (CurBundle)
           visitMachineBundleAfter(CurBundle);
-        CurBundle = MBBI;
+        CurBundle = &*MBBI;
         visitMachineBundleBefore(CurBundle);
       } else if (!CurBundle)
         report("No bundle header", MBBI);
-      visitMachineInstrBefore(MBBI);
+      visitMachineInstrBefore(&*MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
         const MachineInstr &MI = *MBBI;
         const MachineOperand &Op = MI.getOperand(I);
@@ -341,7 +361,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
         visitMachineOperand(&Op, I);
       }
 
-      visitMachineInstrAfter(MBBI);
+      visitMachineInstrAfter(&*MBBI);
 
       // Was this the last bundled instruction?
       InBundle = MBBI->isBundledWithSucc();
@@ -350,7 +370,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
       visitMachineBundleAfter(CurBundle);
     if (InBundle)
       report("BundledSucc flag set on last instruction in block", &MFI->back());
-    visitMachineBasicBlockAfter(MFI);
+    visitMachineBasicBlockAfter(&*MFI);
   }
   visitMachineFunctionAfter();
 
@@ -375,7 +395,10 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
   if (!foundErrors++) {
     if (Banner)
       errs() << "# " << Banner << '\n';
-    MF->print(errs(), Indexes);
+    if (LiveInts != nullptr)
+      LiveInts->print(errs());
+    else
+      MF->print(errs(), Indexes);
   }
   errs() << "*** Bad machine code: " << msg << " ***\n"
       << "- function:    " << MF->getName() << "\n";
@@ -399,7 +422,8 @@ void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
   errs() << "- instruction: ";
   if (Indexes && Indexes->hasIndex(MI))
     errs() << Indexes->getInstructionIndex(MI) << '\t';
-  MI->print(errs(), TM);
+  MI->print(errs(), /*SkipOpers=*/true);
+  errs() << '\n';
 }
 
 void MachineVerifier::report(const char *msg,
@@ -411,36 +435,24 @@ void MachineVerifier::report(const char *msg,
   errs() << "\n";
 }
 
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
-                             const LiveInterval &LI) {
-  report(msg, MF);
+void MachineVerifier::report_context(const LiveInterval &LI) const {
   errs() << "- interval:    " << LI << '\n';
 }
 
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
-                             const LiveInterval &LI) {
-  report(msg, MBB);
-  errs() << "- interval:    " << LI << '\n';
-}
-
-void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB,
-                             const LiveRange &LR, unsigned Reg,
-                             unsigned LaneMask) {
-  report(msg, MBB);
-  errs() << "- liverange:   " << LR << '\n';
+void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+                                     LaneBitmask LaneMask) const {
   errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
   if (LaneMask != 0)
-    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
+    errs() << "- lanemask:    " << PrintLaneMask(LaneMask) << '\n';
+  errs() << "- liverange:   " << LR << '\n';
 }
 
-void MachineVerifier::report(const char *msg, const MachineFunction *MF,
-                             const LiveRange &LR, unsigned Reg,
-                             unsigned LaneMask) {
-  report(msg, MF);
-  errs() << "- liverange:   " << LR << '\n';
-  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
-  if (LaneMask != 0)
-    errs() << "- lanemask:    " << format("%04X\n", LaneMask);
+void MachineVerifier::report_context(const LiveRange::Segment &S) const {
+  errs() << "- segment:     " << S << '\n';
+}
+
+void MachineVerifier::report_context(const VNInfo &VNI) const {
+  errs() << "- ValNo:       " << VNI.id << " (def " << VNI.def << ")\n";
 }
 
 void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
@@ -507,11 +519,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   if (MRI->isSSA()) {
     // If this block has allocatable physical registers live-in, check that
     // it is an entry block or landing pad.
-    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
-           LE = MBB->livein_end();
-         LI != LE; ++LI) {
-      unsigned reg = *LI;
-      if (isAllocatable(reg) && !MBB->isLandingPad() &&
+    for (const auto &LI : MBB->liveins()) {
+      if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
           MBB != MBB->getParent()->begin()) {
         report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB);
       }
@@ -522,7 +531,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I) {
-    if ((*I)->isLandingPad())
+    if ((*I)->isEHPad())
       LandingPadSuccs.insert(*I);
     if (!FunctionBlocks.count(*I))
       report("MBB has successor that isn't part of the function.", MBB);
@@ -547,10 +556,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 
   const MCAsmInfo *AsmInfo = TM->getMCAsmInfo();
   const BasicBlock *BB = MBB->getBasicBlock();
+  const Function *Fn = MF->getFunction();
   if (LandingPadSuccs.size() > 1 &&
       !(AsmInfo &&
         AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj &&
-        BB && isa<SwitchInst>(BB->getTerminator())))
+        BB && isa<SwitchInst>(BB->getTerminator())) &&
+      !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn())))
     report("MBB has more than one landing pad successor", MBB);
 
   // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
@@ -562,7 +573,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
     // check whether its answers match up with reality.
     if (!TBB && !FBB) {
       // Block falls through to its successor.
-      MachineFunction::const_iterator MBBI = MBB;
+      MachineFunction::const_iterator MBBI = MBB->getIterator();
       ++MBBI;
       if (MBBI == MF->end()) {
         // It's possible that the block legitimately ends with a noreturn
@@ -575,7 +586,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
         report("MBB exits via unconditional fall-through but doesn't have "
                "exactly one CFG successor!", MBB);
-      } else if (!MBB->isSuccessor(MBBI)) {
+      } else if (!MBB->isSuccessor(&*MBBI)) {
         report("MBB exits via unconditional fall-through but its successor "
                "differs from its CFG successor!", MBB);
       }
@@ -613,7 +624,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       }
     } else if (TBB && !FBB && !Cond.empty()) {
       // Block conditionally branches somewhere, otherwise falls through.
-      MachineFunction::const_iterator MBBI = MBB;
+      MachineFunction::const_iterator MBBI = MBB->getIterator();
       ++MBBI;
       if (MBBI == MF->end()) {
         report("MBB conditionally falls through out of function!", MBB);
@@ -628,7 +639,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
       } else if (MBB->succ_size() != 2) {
         report("MBB exits via conditional branch/fall-through but doesn't have "
                "exactly two CFG successors!", MBB);
-      } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+      } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) {
         report("MBB exits via conditional branch/fall-through but the CFG "
                "successors don't match the actual successors!", MBB);
       }
@@ -680,13 +691,12 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   }
 
   regsLive.clear();
-  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
-         E = MBB->livein_end(); I != E; ++I) {
-    if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+  for (const auto &LI : MBB->liveins()) {
+    if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
       report("MBB live-in list contains non-physical register", MBB);
       continue;
     }
-    for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true);
+    for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
          SubRegs.isValid(); ++SubRegs)
       regsLive.insert(*SubRegs);
   }
@@ -822,9 +832,12 @@ void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
   const MCInstrDesc &MCID = MI->getDesc();
+  unsigned NumDefs = MCID.getNumDefs();
+  if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
+    NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
 
   // The first MCID.NumDefs operands must be explicit register defines
-  if (MONum < MCID.getNumDefs()) {
+  if (MONum < NumDefs) {
     const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
     if (!MO->isReg())
       report("Explicit definition must be a register", MO, MONum);
@@ -972,13 +985,38 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   case MachineOperand::MO_FrameIndex:
     if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
-      LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+      int FI = MO->getIndex();
+      LiveInterval &LI = LiveStks->getInterval(FI);
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (MI->mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) {
+
+      bool stores = MI->mayStore();
+      bool loads = MI->mayLoad();
+      // For a memory-to-memory move, we need to check if the frame
+      // index is used for storing or loading, by inspecting the
+      // memory operands.
+      if (stores && loads) {
+        for (auto *MMO : MI->memoperands()) {
+          const PseudoSourceValue *PSV = MMO->getPseudoValue();
+          if (PSV == nullptr) continue;
+          const FixedStackPseudoSourceValue *Value =
+            dyn_cast<FixedStackPseudoSourceValue>(PSV);
+          if (Value == nullptr) continue;
+          if (Value->getFrameIndex() != FI) continue;
+
+          if (MMO->isStore())
+            loads = false;
+          else
+            stores = false;
+          break;
+        }
+        if (loads == stores)
+          report("Missing fixed stack memoperand.", MI);
+      }
+      if (loads && !LI.liveAt(Idx.getRegSlot(true))) {
         report("Instruction loads from dead spill slot", MO, MONum);
         errs() << "Live stack: " << LI << '\n';
       }
-      if (MI->mayStore() && !LI.liveAt(Idx.getRegSlot())) {
+      if (stores && !LI.liveAt(Idx.getRegSlot())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         errs() << "Live stack: " << LI << '\n';
       }
@@ -1387,40 +1425,39 @@ void MachineVerifier::verifyLiveIntervals() {
 
 void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
                                            const VNInfo *VNI, unsigned Reg,
-                                           unsigned LaneMask) {
+                                           LaneBitmask LaneMask) {
   if (VNI->isUnused())
     return;
 
   const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def);
 
   if (!DefVNI) {
-    report("Valno not live at def and not marked unused", MF, LR, Reg,
-           LaneMask);
-    errs() << "Valno #" << VNI->id << '\n';
+    report("Value not live at VNInfo def and not marked unused", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(*VNI);
     return;
   }
 
   if (DefVNI != VNI) {
-    report("Live segment at def has different valno", MF, LR, Reg, LaneMask);
-    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
-        << " where valno #" << DefVNI->id << " is live\n";
+    report("Live segment at def has different VNInfo", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(*VNI);
     return;
   }
 
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
   if (!MBB) {
-    report("Invalid definition index", MF, LR, Reg, LaneMask);
-    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
-        << " in " << LR << '\n';
+    report("Invalid VNInfo definition index", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(*VNI);
     return;
   }
 
   if (VNI->isPHIDef()) {
     if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
-      report("PHIDef value is not defined at MBB start", MBB, LR, Reg,
-             LaneMask);
-      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def
-          << ", not at the beginning of BB#" << MBB->getNumber() << '\n';
+      report("PHIDef VNInfo is not defined at MBB start", MBB);
+      report_context(LR, Reg, LaneMask);
+      report_context(*VNI);
     }
     return;
   }
@@ -1428,8 +1465,9 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
   // Non-PHI def.
   const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
   if (!MI) {
-    report("No instruction at def index", MBB, LR, Reg, LaneMask);
-    errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+    report("No instruction at VNInfo def index", MBB);
+    report_context(LR, Reg, LaneMask);
+    report_context(*VNI);
     return;
   }
 
@@ -1457,60 +1495,67 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
 
     if (!hasDef) {
       report("Defining instruction does not modify register", MI);
-      errs() << "Valno #" << VNI->id << " in " << LR << '\n';
+      report_context(LR, Reg, LaneMask);
+      report_context(*VNI);
     }
 
     // Early clobber defs begin at USE slots, but other defs must begin at
     // DEF slots.
     if (isEarlyClobber) {
       if (!VNI->def.isEarlyClobber()) {
-        report("Early clobber def must be at an early-clobber slot", MBB, LR,
-               Reg, LaneMask);
-        errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+        report("Early clobber def must be at an early-clobber slot", MBB);
+        report_context(LR, Reg, LaneMask);
+        report_context(*VNI);
       }
     } else if (!VNI->def.isRegister()) {
-      report("Non-PHI, non-early clobber def must be at a register slot",
-             MBB, LR, Reg, LaneMask);
-      errs() << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n';
+      report("Non-PHI, non-early clobber def must be at a register slot", MBB);
+      report_context(LR, Reg, LaneMask);
+      report_context(*VNI);
     }
   }
 }
 
 void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
                                              const LiveRange::const_iterator I,
-                                             unsigned Reg, unsigned LaneMask) {
+                                             unsigned Reg, LaneBitmask LaneMask)
+{
   const LiveRange::Segment &S = *I;
   const VNInfo *VNI = S.valno;
   assert(VNI && "Live segment has no valno");
 
   if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) {
-    report("Foreign valno in live segment", MF, LR, Reg, LaneMask);
-    errs() << S << " has a bad valno\n";
+    report("Foreign valno in live segment", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
+    report_context(*VNI);
   }
 
   if (VNI->isUnused()) {
-    report("Live segment valno is marked unused", MF, LR, Reg, LaneMask);
-    errs() << S << '\n';
+    report("Live segment valno is marked unused", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
   }
 
   const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start);
   if (!MBB) {
-    report("Bad start of live segment, no basic block", MF, LR, Reg, LaneMask);
-    errs() << S << '\n';
+    report("Bad start of live segment, no basic block", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
     return;
   }
   SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
   if (S.start != MBBStartIdx && S.start != VNI->def) {
-    report("Live segment must begin at MBB entry or valno def", MBB, LR, Reg,
-           LaneMask);
-    errs() << S << '\n';
+    report("Live segment must begin at MBB entry or valno def", MBB);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
   }
 
   const MachineBasicBlock *EndMBB =
     LiveInts->getMBBFromIndex(S.end.getPrevSlot());
   if (!EndMBB) {
-    report("Bad end of live segment, no basic block", MF, LR, Reg, LaneMask);
-    errs() << S << '\n';
+    report("Bad end of live segment, no basic block", MF);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
     return;
   }
 
@@ -1527,26 +1572,26 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   const MachineInstr *MI =
     LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
   if (!MI) {
-    report("Live segment doesn't end at a valid instruction", EndMBB, LR, Reg,
-           LaneMask);
-    errs() << S << '\n';
+    report("Live segment doesn't end at a valid instruction", EndMBB);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
     return;
   }
 
   // The block slot must refer to a basic block boundary.
   if (S.end.isBlock()) {
-    report("Live segment ends at B slot of an instruction", EndMBB, LR, Reg,
-           LaneMask);
-    errs() << S << '\n';
+    report("Live segment ends at B slot of an instruction", EndMBB);
+    report_context(LR, Reg, LaneMask);
+    report_context(S);
   }
 
   if (S.end.isDead()) {
     // Segment ends on the dead slot.
     // That means there must be a dead def.
     if (!SlotIndex::isSameInstr(S.start, S.end)) {
-      report("Live segment ending at dead slot spans instructions", EndMBB, LR,
-             Reg, LaneMask);
-      errs() << S << '\n';
+      report("Live segment ending at dead slot spans instructions", EndMBB);
+      report_context(LR, Reg, LaneMask);
+      report_context(S);
     }
   }
 
@@ -1555,9 +1600,9 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
   if (S.end.isEarlyClobber()) {
     if (I+1 == LR.end() || (I+1)->start != S.end) {
       report("Live segment ending at early clobber slot must be "
-             "redefined by an EC def in the same instruction", EndMBB, LR, Reg,
-             LaneMask);
-      errs() << S << '\n';
+             "redefined by an EC def in the same instruction", EndMBB);
+      report_context(LR, Reg, LaneMask);
+      report_context(S);
     }
   }
 
@@ -1587,14 +1632,15 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
             !hasSubRegDef) {
           report("Instruction ending live segment doesn't read the register",
                  MI);
-          errs() << S << " in " << LR << '\n';
+          report_context(LR, Reg, LaneMask);
+          report_context(S);
         }
       }
     }
   }
 
   // Now check all the basic blocks in this live segment.
-  MachineFunction::const_iterator MFI = MBB;
+  MachineFunction::const_iterator MFI = MBB->getIterator();
   // Is this live segment the beginning of a non-PHIDef VN?
   if (S.start == VNI->def && !VNI->isPHIDef()) {
     // Not live-in to any blocks.
@@ -1604,10 +1650,10 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     ++MFI;
   }
   for (;;) {
-    assert(LiveInts->isLiveInToMBB(LR, MFI));
+    assert(LiveInts->isLiveInToMBB(LR, &*MFI));
     // We don't know how to track physregs into a landing pad.
     if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
-        MFI->isLandingPad()) {
+        MFI->isEHPad()) {
       if (&*MFI == EndMBB)
         break;
       ++MFI;
@@ -1616,7 +1662,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
     // Is VNI a PHI-def in the current block?
     bool IsPHI = VNI->isPHIDef() &&
-      VNI->def == LiveInts->getMBBStartIdx(MFI);
+      VNI->def == LiveInts->getMBBStartIdx(&*MFI);
 
     // Check that VNI is live-out of all predecessors.
     for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
@@ -1626,22 +1672,23 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
       // All predecessors must have a live-out value.
       if (!PVNI) {
-        report("Register not marked live out of predecessor", *PI, LR, Reg,
-               LaneMask);
-        errs() << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
-            << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before "
-            << PEnd << '\n';
+        report("Register not marked live out of predecessor", *PI);
+        report_context(LR, Reg, LaneMask);
+        report_context(*VNI);
+        errs() << " live into BB#" << MFI->getNumber()
+               << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before "
+               << PEnd << '\n';
         continue;
       }
 
       // Only PHI-defs can take different predecessor values.
       if (!IsPHI && PVNI != VNI) {
-        report("Different value live out of predecessor", *PI, LR, Reg,
-               LaneMask);
+        report("Different value live out of predecessor", *PI);
+        report_context(LR, Reg, LaneMask);
         errs() << "Valno #" << PVNI->id << " live out of BB#"
-            << (*PI)->getNumber() << '@' << PEnd
-            << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
-            << '@' << LiveInts->getMBBStartIdx(MFI) << '\n';
+               << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id
+               << " live into BB#" << MFI->getNumber() << '@'
+               << LiveInts->getMBBStartIdx(&*MFI) << '\n';
       }
     }
     if (&*MFI == EndMBB)
@@ -1651,7 +1698,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 }
 
 void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
-                                      unsigned LaneMask) {
+                                      LaneBitmask LaneMask) {
   for (const VNInfo *VNI : LR.valnos)
     verifyLiveRangeValue(LR, VNI, Reg, LaneMask);
 
@@ -1664,24 +1711,35 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   verifyLiveRange(LI, Reg);
 
-  unsigned Mask = 0;
-  unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+  LaneBitmask Mask = 0;
+  LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
-    if ((Mask & SR.LaneMask) != 0)
-      report("Lane masks of sub ranges overlap in live interval", MF, LI);
-    if ((SR.LaneMask & ~MaxMask) != 0)
-      report("Subrange lanemask is invalid", MF, LI);
+    if ((Mask & SR.LaneMask) != 0) {
+      report("Lane masks of sub ranges overlap in live interval", MF);
+      report_context(LI);
+    }
+    if ((SR.LaneMask & ~MaxMask) != 0) {
+      report("Subrange lanemask is invalid", MF);
+      report_context(LI);
+    }
+    if (SR.empty()) {
+      report("Subrange must not be empty", MF);
+      report_context(SR, LI.reg, SR.LaneMask);
+    }
     Mask |= SR.LaneMask;
     verifyLiveRange(SR, LI.reg, SR.LaneMask);
-    if (!LI.covers(SR))
-      report("A Subrange is not covered by the main range", MF, LI);
+    if (!LI.covers(SR)) {
+      report("A Subrange is not covered by the main range", MF);
+      report_context(LI);
+    }
   }
 
   // Check the LI only has one connected component.
   ConnectedVNInfoEqClasses ConEQ(*LiveInts);
   unsigned NumComp = ConEQ.Classify(&LI);
   if (NumComp > 1) {
-    report("Multiple connected components in live interval", MF, LI);
+    report("Multiple connected components in live interval", MF);
+    report_context(LI);
     for (unsigned comp = 0; comp != NumComp; ++comp) {
       errs() << comp << ": valnos";
       for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index d3433018004c..2c937926d0a7 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -548,7 +548,7 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
 bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
                                    MachineBasicBlock &MBB,
                                    MachineLoopInfo *MLI) {
-  if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+  if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad())
     return false;   // Quick exit for basic blocks without PHIs.
 
   const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 99bbad1cc280..4cabc3a8c1fd 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -28,7 +28,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
   // Usually, we just want to insert the copy before the first terminator
   // instruction. However, for the edge going to a landing pad, we must insert
   // the copy before the call/invoke instruction.
-  if (!SuccMBB->isLandingPad())
+  if (!SuccMBB->isEHPad())
     return MBB->getFirstTerminator();
 
   // Discover any defs/uses in this basic block.
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
new file mode 100644
index 000000000000..e73ba0296045
--- /dev/null
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -0,0 +1,96 @@
+//===-- ParallelCG.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that can be used for parallel code generation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ParallelCG.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static void codegen(Module *M, llvm::raw_pwrite_stream &OS,
+                    const Target *TheTarget, StringRef CPU, StringRef Features,
+                    const TargetOptions &Options, Reloc::Model RM,
+                    CodeModel::Model CM, CodeGenOpt::Level OL,
+                    TargetMachine::CodeGenFileType FileType) {
+  std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine(
+      M->getTargetTriple(), CPU, Features, Options, RM, CM, OL));
+
+  legacy::PassManager CodeGenPasses;
+  if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType))
+    report_fatal_error("Failed to setup codegen");
+  CodeGenPasses.run(*M);
+}
+
+std::unique_ptr<Module>
+llvm::splitCodeGen(std::unique_ptr<Module> M,
+                   ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU,
+                   StringRef Features, const TargetOptions &Options,
+                   Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL,
+                   TargetMachine::CodeGenFileType FileType) {
+  StringRef TripleStr = M->getTargetTriple();
+  std::string ErrMsg;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+  if (!TheTarget)
+    report_fatal_error(Twine("Target not found: ") + ErrMsg);
+
+  if (OSs.size() == 1) {
+    codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM,
+            OL, FileType);
+    return M;
+  }
+
+  std::vector<thread> Threads;
+  SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) {
+    // We want to clone the module in a new context to multi-thread the codegen.
+    // We do it by serializing partition modules to bitcode (while still on the
+    // main thread, in order to avoid data races) and spinning up new threads
+    // which deserialize the partitions into separate contexts.
+    // FIXME: Provide a more direct way to do this in LLVM.
+    SmallVector<char, 0> BC;
+    raw_svector_ostream BCOS(BC);
+    WriteBitcodeToFile(MPart.get(), BCOS);
+
+    llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()];
+    Threads.emplace_back(
+        [TheTarget, CPU, Features, Options, RM, CM, OL, FileType,
+         ThreadOS](const SmallVector<char, 0> &BC) {
+          LLVMContext Ctx;
+          ErrorOr<std::unique_ptr<Module>> MOrErr =
+              parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()),
+                                               "<split-module>"),
+                               Ctx);
+          if (!MOrErr)
+            report_fatal_error("Failed to read bitcode");
+          std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+          codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features,
+                  Options, RM, CM, OL, FileType);
+        },
+        // Pass BC using std::move to ensure that it get moved rather than
+        // copied into the thread's context.
+        std::move(BC));
+  });
+
+  for (thread &T : Threads)
+    T.join();
+
+  return {};
+}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 024d166a4987..873f7125b82a 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -13,7 +13,11 @@
 //===---------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/IR/IRPrintingPasses.h"
@@ -52,9 +56,6 @@ static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
     cl::desc("Disable Machine LICM"));
 static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden,
     cl::desc("Disable Machine Common Subexpression Elimination"));
-static cl::opt<cl::boolOrDefault>
-    EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
-                        cl::desc("enable the shrink-wrapping pass"));
 static cl::opt<cl::boolOrDefault> OptimizeRegAlloc(
     "optimize-regalloc", cl::Hidden,
     cl::desc("Enable optimized register allocation compilation path."));
@@ -95,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
 
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
-// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it
-// wouldn't be part of the standard pass pipeline, and the target would just add
-// a PostRA scheduling pass wherever it wants.
-static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
+// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
+// Targets can return true in targetSchedulesPostRAScheduling() and
+// insert a PostRA scheduling pass wherever it wants.
+cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
   cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
 
 // Experimental option to run live interval analysis early.
@@ -188,6 +189,29 @@ char TargetPassConfig::ID = 0;
 char TargetPassConfig::EarlyTailDuplicateID = 0;
 char TargetPassConfig::PostRAMachineLICMID = 0;
 
+namespace {
+struct InsertedPass {
+  AnalysisID TargetPassID;
+  IdentifyingPassPtr InsertedPassID;
+  bool VerifyAfter;
+  bool PrintAfter;
+
+  InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID,
+               bool VerifyAfter, bool PrintAfter)
+      : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID),
+        VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {}
+
+  Pass *getInsertedPass() const {
+    assert(InsertedPassID.isValid() && "Illegal Pass ID!");
+    if (InsertedPassID.isInstance())
+      return InsertedPassID.getInstance();
+    Pass *NP = Pass::createPass(InsertedPassID.getID());
+    assert(NP && "Pass ID not registered");
+    return NP;
+  }
+};
+}
+
 namespace llvm {
 class PassConfigImpl {
 public:
@@ -202,7 +226,7 @@ public:
 
   /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass
   /// is inserted after each instance of the first one.
-  SmallVector<std::pair<AnalysisID, IdentifyingPassPtr>, 4> InsertedPasses;
+  SmallVector<InsertedPass, 4> InsertedPasses;
 };
 } // namespace llvm
 
@@ -217,7 +241,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
     : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
       StopAfter(nullptr), Started(true), Stopped(false),
       AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
-      DisableVerify(false), EnableTailMerge(true), EnableShrinkWrap(false) {
+      DisableVerify(false), EnableTailMerge(true) { 
 
   Impl = new PassConfigImpl();
 
@@ -225,6 +249,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
   // including this pass itself.
   initializeCodeGen(*PassRegistry::getPassRegistry());
 
+  // Also register alias analysis passes required by codegen passes.
+  initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
+
   // Substitute Pseudo Pass IDs for real ones.
   substitutePass(&EarlyTailDuplicateID, &TailDuplicateID);
   substitutePass(&PostRAMachineLICMID, &MachineLICMID);
@@ -232,14 +260,15 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
 
 /// Insert InsertedPassID pass after TargetPassID.
 void TargetPassConfig::insertPass(AnalysisID TargetPassID,
-                                  IdentifyingPassPtr InsertedPassID) {
+                                  IdentifyingPassPtr InsertedPassID,
+                                  bool VerifyAfter, bool PrintAfter) {
   assert(((!InsertedPassID.isInstance() &&
            TargetPassID != InsertedPassID.getID()) ||
           (InsertedPassID.isInstance() &&
            TargetPassID != InsertedPassID.getInstance()->getPassID())) &&
          "Insert a pass after itself!");
-  std::pair<AnalysisID, IdentifyingPassPtr> P(TargetPassID, InsertedPassID);
-  Impl->InsertedPasses.push_back(P);
+  Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter,
+                                    PrintAfter);
 }
 
 /// createPassConfig - Create a pass configuration object to be used by
@@ -304,21 +333,9 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
     }
 
     // Add the passes after the pass P if there is any.
-    for (SmallVectorImpl<std::pair<AnalysisID, IdentifyingPassPtr> >::iterator
-             I = Impl->InsertedPasses.begin(),
-             E = Impl->InsertedPasses.end();
-         I != E; ++I) {
-      if ((*I).first == PassID) {
-        assert((*I).second.isValid() && "Illegal Pass ID!");
-        Pass *NP;
-        if ((*I).second.isInstance())
-          NP = (*I).second.getInstance();
-        else {
-          NP = Pass::createPass((*I).second.getID());
-          assert(NP && "Pass ID not registered");
-        }
-        addPass(NP, false, false);
-      }
+    for (auto IP : Impl->InsertedPasses) {
+      if (IP.TargetPassID == PassID)
+        addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter);
     }
   } else {
     delete P;
@@ -380,10 +397,10 @@ void TargetPassConfig::addIRPasses() {
   // BasicAliasAnalysis wins if they disagree. This is intended to help
   // support "obvious" type-punning idioms.
   if (UseCFLAA)
-    addPass(createCFLAliasAnalysisPass());
-  addPass(createTypeBasedAliasAnalysisPass());
-  addPass(createScopedNoAliasAAPass());
-  addPass(createBasicAliasAnalysisPass());
+    addPass(createCFLAAWrapperPass());
+  addPass(createTypeBasedAAWrapperPass());
+  addPass(createScopedNoAliasAAWrapperPass());
+  addPass(createBasicAAWrapperPass());
 
   // Before running any passes, run the verifier to determine if the input
   // coming from the front-end and/or optimizer is valid.
@@ -461,7 +478,7 @@ void TargetPassConfig::addISelPrepare() {
 
   // Add both the safe stack and the stack protection passes: each of them will
   // only protect functions that have corresponding attributes.
-  addPass(createSafeStackPass());
+  addPass(createSafeStackPass(TM));
   addPass(createStackProtectorPass(TM));
 
   if (PrintISelInput)
@@ -539,8 +556,9 @@ void TargetPassConfig::addMachinePasses() {
   addPostRegAlloc();
 
   // Insert prolog/epilog code.  Eliminate abstract frame index references...
-  if (getEnableShrinkWrap())
+  if (getOptLevel() != CodeGenOpt::None) 
     addPass(&ShrinkWrapID);
+
   addPass(&PrologEpilogCodeInserterID);
 
   /// Add passes that optimize machine instructions after register allocation.
@@ -557,7 +575,10 @@ void TargetPassConfig::addMachinePasses() {
     addPass(&ImplicitNullChecksID);
 
   // Second pass scheduler.
-  if (getOptLevel() != CodeGenOpt::None) {
+  // Let Target optionally insert this pass by itself at some other
+  // point.
+  if (getOptLevel() != CodeGenOpt::None &&
+      !TM->targetSchedulesPostRAScheduling()) {
     if (MISchedPostRA)
       addPass(&PostMachineSchedulerID);
     else
@@ -576,7 +597,10 @@ void TargetPassConfig::addMachinePasses() {
 
   addPreEmitPass();
 
+  addPass(&FuncletLayoutID, false);
+
   addPass(&StackMapLivenessID, false);
+  addPass(&LiveDebugValuesID, false);
 
   AddingMachinePasses = false;
 }
@@ -613,27 +637,12 @@ void TargetPassConfig::addMachineSSAOptimization() {
   addPass(&MachineCSEID, false);
   addPass(&MachineSinkingID);
 
-  addPass(&PeepholeOptimizerID, false);
+  addPass(&PeepholeOptimizerID);
   // Clean-up the dead code that may have been generated by peephole
   // rewriting.
   addPass(&DeadMachineInstructionElimID);
 }
 
-bool TargetPassConfig::getEnableShrinkWrap() const {
-  switch (EnableShrinkWrapOpt) {
-  case cl::BOU_UNSET:
-    return EnableShrinkWrap && getOptLevel() != CodeGenOpt::None;
-  // If EnableShrinkWrap is set, it takes precedence on whatever the
-  // target sets. The rational is that we assume we want to test
-  // something related to shrink-wrapping.
-  case cl::BOU_TRUE:
-    return true;
-  case cl::BOU_FALSE:
-    return false;
-  }
-  llvm_unreachable("Invalid shrink-wrapping state");
-}
-
 //===---------------------------------------------------------------------===//
 /// Register Allocation Pass Configuration
 //===---------------------------------------------------------------------===//
@@ -717,7 +726,8 @@ void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
   addPass(&PHIEliminationID, false);
   addPass(&TwoAddressInstructionPassID, false);
 
-  addPass(RegAllocPass);
+  if (RegAllocPass)
+    addPass(RegAllocPass);
 }
 
 /// Add standard target-independent passes that are tightly coupled with
@@ -748,25 +758,27 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
   // PreRA instruction scheduling.
   addPass(&MachineSchedulerID);
 
-  // Add the selected register allocation pass.
-  addPass(RegAllocPass);
+  if (RegAllocPass) {
+    // Add the selected register allocation pass.
+    addPass(RegAllocPass);
 
-  // Allow targets to change the register assignments before rewriting.
-  addPreRewrite();
+    // Allow targets to change the register assignments before rewriting.
+    addPreRewrite();
 
-  // Finally rewrite virtual registers.
-  addPass(&VirtRegRewriterID);
+    // Finally rewrite virtual registers.
+    addPass(&VirtRegRewriterID);
 
-  // Perform stack slot coloring and post-ra machine LICM.
-  //
-  // FIXME: Re-enable coloring with register when it's capable of adding
-  // kill markers.
-  addPass(&StackSlotColoringID);
+    // Perform stack slot coloring and post-ra machine LICM.
+    //
+    // FIXME: Re-enable coloring with register when it's capable of adding
+    // kill markers.
+    addPass(&StackSlotColoringID);
 
-  // Run post-ra machine LICM to hoist reloads / remats.
-  //
-  // FIXME: can this move into MachineLateOptimization?
-  addPass(&PostRAMachineLICMID);
+    // Run post-ra machine LICM to hoist reloads / remats.
+    //
+    // FIXME: can this move into MachineLateOptimization?
+    addPass(&PostRAMachineLICMID);
+  }
 }
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index ebe05e3f2731..52b42b624ee1 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -43,7 +43,7 @@
 // - Optimize Loads:
 //
 //     Loads that can be folded into a later instruction. A load is foldable
-//     if it loads to virtual registers and the virtual register defined has 
+//     if it loads to virtual registers and the virtual register defined has
 //     a single use.
 //
 // - Optimize Copies and Bitcast (more generally, target specific copies):
@@ -98,6 +98,16 @@ static cl::opt<bool>
 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
                   cl::desc("Disable advanced copy optimization"));
 
+static cl::opt<bool> DisableNAPhysCopyOpt(
+    "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
+    cl::desc("Disable non-allocatable physical register copy optimization"));
+
+// Limit the number of PHI instructions to process
+// in PeepholeOptimizer::getNextSource.
+static cl::opt<unsigned> RewritePHILimit(
+    "rewrite-phi-limit", cl::Hidden, cl::init(10),
+    cl::desc("Limit the length of PHI chains to lookup"));
+
 STATISTIC(NumReuse,      "Number of extension results reused");
 STATISTIC(NumCmps,       "Number of compares eliminated");
 STATISTIC(NumImmFold,    "Number of move immediate folded");
@@ -105,8 +115,11 @@ STATISTIC(NumLoadFold,   "Number of loads folded");
 STATISTIC(NumSelects,    "Number of selects optimized");
 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
+STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
 
 namespace {
+  class ValueTrackerResult;
+
   class PeepholeOptimizer : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -130,6 +143,10 @@ namespace {
       }
     }
 
+    /// \brief Track Def -> Use info used for rewriting copies.
+    typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult>
+        RewriteMapTy;
+
   private:
     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
@@ -137,17 +154,38 @@ namespace {
     bool optimizeSelect(MachineInstr *MI,
                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
     bool optimizeCondBranch(MachineInstr *MI);
-    bool optimizeCopyOrBitcast(MachineInstr *MI);
     bool optimizeCoalescableCopy(MachineInstr *MI);
     bool optimizeUncoalescableCopy(MachineInstr *MI,
                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
-    bool findNextSource(unsigned &Reg, unsigned &SubReg);
+    bool findNextSource(unsigned Reg, unsigned SubReg,
+                        RewriteMapTy &RewriteMap);
     bool isMoveImmediate(MachineInstr *MI,
                          SmallSet<unsigned, 4> &ImmDefRegs,
                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
                        SmallSet<unsigned, 4> &ImmDefRegs,
                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+
+    /// \brief If copy instruction \p MI is a virtual register copy, track it in
+    /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
+    /// previously seen as a copy, replace the uses of this copy with the
+    /// previously seen copy's destination register.
+    bool foldRedundantCopy(MachineInstr *MI,
+                           SmallSet<unsigned, 4> &CopySrcRegs,
+                           DenseMap<unsigned, MachineInstr *> &CopyMIs);
+
+    /// \brief Is the register \p Reg a non-allocatable physical register?
+    bool isNAPhysCopy(unsigned Reg);
+
+    /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical
+    /// register copy, track it in the \p NAPhysToVirtMIs map. If this
+    /// non-allocatable physical register was previously copied to a virtual
+    /// registered and hasn't been clobbered, the virt->phys copy can be
+    /// deleted.
+    bool foldRedundantNAPhysCopy(
+        MachineInstr *MI,
+        DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
+
     bool isLoadFoldable(MachineInstr *MI,
                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
 
@@ -171,6 +209,69 @@ namespace {
     }
   };
 
+  /// \brief Helper class to hold a reply for ValueTracker queries. Contains the
+  /// returned sources for a given search and the instructions where the sources
+  /// were tracked from.
+  class ValueTrackerResult {
+  private:
+    /// Track all sources found by one ValueTracker query.
+    SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs;
+
+    /// Instruction using the sources in 'RegSrcs'.
+    const MachineInstr *Inst;
+
+  public:
+    ValueTrackerResult() : Inst(nullptr) {}
+    ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) {
+      addSource(Reg, SubReg);
+    }
+
+    bool isValid() const { return getNumSources() > 0; }
+
+    void setInst(const MachineInstr *I) { Inst = I; }
+    const MachineInstr *getInst() const { return Inst; }
+
+    void clear() {
+      RegSrcs.clear();
+      Inst = nullptr;
+    }
+
+    void addSource(unsigned SrcReg, unsigned SrcSubReg) {
+      RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg));
+    }
+
+    void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
+      assert(Idx < getNumSources() && "Reg pair source out of index");
+      RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg);
+    }
+
+    int getNumSources() const { return RegSrcs.size(); }
+
+    unsigned getSrcReg(int Idx) const {
+      assert(Idx < getNumSources() && "Reg source out of index");
+      return RegSrcs[Idx].Reg;
+    }
+
+    unsigned getSrcSubReg(int Idx) const {
+      assert(Idx < getNumSources() && "SubReg source out of index");
+      return RegSrcs[Idx].SubReg;
+    }
+
+    bool operator==(const ValueTrackerResult &Other) {
+      if (Other.getInst() != getInst())
+        return false;
+
+      if (Other.getNumSources() != getNumSources())
+        return false;
+
+      for (int i = 0, e = Other.getNumSources(); i != e; ++i)
+        if (Other.getSrcReg(i) != getSrcReg(i) ||
+            Other.getSrcSubReg(i) != getSrcSubReg(i))
+          return false;
+      return true;
+    }
+  };
+
   /// \brief Helper class to track the possible sources of a value defined by
   /// a (chain of) copy related instructions.
   /// Given a definition (instruction and definition index), this class
@@ -213,23 +314,25 @@ namespace {
 
     /// \brief Dispatcher to the right underlying implementation of
     /// getNextSource.
-    bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceImpl();
     /// \brief Specialized version of getNextSource for Copy instructions.
-    bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromCopy();
     /// \brief Specialized version of getNextSource for Bitcast instructions.
-    bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromBitcast();
     /// \brief Specialized version of getNextSource for RegSequence
     /// instructions.
-    bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromRegSequence();
     /// \brief Specialized version of getNextSource for InsertSubreg
     /// instructions.
-    bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromInsertSubreg();
     /// \brief Specialized version of getNextSource for ExtractSubreg
     /// instructions.
-    bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromExtractSubreg();
     /// \brief Specialized version of getNextSource for SubregToReg
     /// instructions.
-    bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
+    ValueTrackerResult getNextSourceFromSubregToReg();
+    /// \brief Specialized version of getNextSource for PHI instructions.
+    ValueTrackerResult getNextSourceFromPHI();
 
   public:
     /// \brief Create a ValueTracker instance for the value defined by \p Reg.
@@ -276,16 +379,10 @@ namespace {
 
     /// \brief Following the use-def chain, get the next available source
     /// for the tracked value.
-    /// When the returned value is not nullptr, \p SrcReg gives the register
-    /// that contain the tracked value.
-    /// \note The sub register index returned in \p SrcSubReg must be used
-    /// on \p SrcReg to access the actual value.
-    /// \return Unless the returned value is nullptr (i.e., no source found),
-    /// \p SrcReg gives the register of the next source used in the returned
-    /// instruction and \p SrcSubReg the sub-register index to be used on that
-    /// source to get the tracked value. When nullptr is returned, no
-    /// alternative source has been found.
-    const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
+    /// \return A ValueTrackerResult containing a set of registers
+    /// and sub registers with tracked values. A ValueTrackerResult with
+    /// an empty set of registers means no source was found.
+    ValueTrackerResult getNextSource();
 
     /// \brief Get the last register where the initial value can be found.
     /// Initially this is the register of the definition.
@@ -303,11 +400,10 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
                 "Peephole Optimizations", false, false)
 
-/// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
-/// a single register and writes a single register and it does not modify the
-/// source, and if the source value is preserved as a sub-register of the
-/// result, then replace all reachable uses of the source with the subreg of the
-/// result.
+/// If instruction is a copy-like instruction, i.e. it reads a single register
+/// and writes a single register and it does not modify the source, and if the
+/// source value is preserved as a sub-register of the result, then replace all
+/// reachable uses of the source with the subreg of the result.
 ///
 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
 /// the code. Since this code does not currently share EXTRACTs, just ignore all
@@ -458,10 +554,10 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
   return Changed;
 }
 
-/// optimizeCmpInstr - If the instruction is a compare and the previous
-/// instruction it's comparing against all ready sets (or could be modified to
-/// set) the same flag as the compare, then we can remove the comparison and use
-/// the flag from the previous instruction.
+/// If the instruction is a compare and the previous instruction it's comparing
+/// against already sets (or could be modified to set) the same flag as the
+/// compare, then we can remove the comparison and use the flag from the
+/// previous instruction.
 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
                                          MachineBasicBlock *MBB) {
   // If this instruction is a comparison against zero and isn't comparing a
@@ -506,88 +602,138 @@ bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
   return TII->optimizeCondBranch(MI);
 }
 
-/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
-/// share the same register file.
-static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
-                                  const TargetRegisterClass *DefRC,
-                                  unsigned DefSubReg,
-                                  const TargetRegisterClass *SrcRC,
-                                  unsigned SrcSubReg) {
-  // Same register class.
-  if (DefRC == SrcRC)
-    return true;
-
-  // Both operands are sub registers. Check if they share a register class.
-  unsigned SrcIdx, DefIdx;
-  if (SrcSubReg && DefSubReg)
-    return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
-                                      SrcIdx, DefIdx) != nullptr;
-  // At most one of the register is a sub register, make it Src to avoid
-  // duplicating the test.
-  if (!SrcSubReg) {
-    std::swap(DefSubReg, SrcSubReg);
-    std::swap(DefRC, SrcRC);
-  }
-
-  // One of the register is a sub register, check if we can get a superclass.
-  if (SrcSubReg)
-    return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
-  // Plain copy.
-  return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
-}
-
 /// \brief Try to find the next source that share the same register file
 /// for the value defined by \p Reg and \p SubReg.
-/// When true is returned, \p Reg and \p SubReg are updated with the
-/// register number and sub-register index of the new source.
+/// When true is returned, the \p RewriteMap can be used by the client to
+/// retrieve all Def -> Use along the way up to the next source. Any found
+/// Use that is not itself a key for another entry, is the next source to
+/// use. During the search for the next source, multiple sources can be found
+/// given multiple incoming sources of a PHI instruction. In this case, we
+/// look in each PHI source for the next source; all found next sources must
+/// share the same register file as \p Reg and \p SubReg. The client should
+/// then be capable to rewrite all intermediate PHIs to get the next source.
 /// \return False if no alternative sources are available. True otherwise.
-bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
+bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg,
+                                       RewriteMapTy &RewriteMap) {
   // Do not try to find a new source for a physical register.
   // So far we do not have any motivating example for doing that.
   // Thus, instead of maintaining untested code, we will revisit that if
   // that changes at some point.
   if (TargetRegisterInfo::isPhysicalRegister(Reg))
     return false;
-
   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
-  unsigned DefSubReg = SubReg;
 
-  unsigned Src;
-  unsigned SrcSubReg;
-  bool ShouldRewrite = false;
+  SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook;
+  TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg);
+  SrcToLook.push_back(CurSrcPair);
 
-  // Follow the chain of copies until we reach the top of the use-def chain
-  // or find a more suitable source.
-  ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
-  do {
-    unsigned CopySrcReg, CopySrcSubReg;
-    if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
-      break;
-    Src = CopySrcReg;
-    SrcSubReg = CopySrcSubReg;
+  unsigned PHICount = 0;
+  while (!SrcToLook.empty() && PHICount < RewritePHILimit) {
+    TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val();
+    // As explained above, do not handle physical registers
+    if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg))
+      return false;
 
-    // Do not extend the live-ranges of physical registers as they add
-    // constraints to the register allocator.
-    // Moreover, if we want to extend the live-range of a physical register,
-    // unlike SSA virtual register, we will have to check that they are not
-    // redefine before the related use.
-    if (TargetRegisterInfo::isPhysicalRegister(Src))
-      break;
+    CurSrcPair = Pair;
+    ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI,
+                            !DisableAdvCopyOpt, TII);
+    ValueTrackerResult Res;
+    bool ShouldRewrite = false;
 
-    const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
+    do {
+      // Follow the chain of copies until we reach the top of the use-def chain
+      // or find a more suitable source.
+      Res = ValTracker.getNextSource();
+      if (!Res.isValid())
+        break;
 
-    // If this source does not incur a cross register bank copy, use it.
-    ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
-                                          SrcSubReg);
-  } while (!ShouldRewrite);
+      // Insert the Def -> Use entry for the recently found source.
+      ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
+      if (CurSrcRes.isValid()) {
+        assert(CurSrcRes == Res && "ValueTrackerResult found must match");
+        // An existent entry with multiple sources is a PHI cycle we must avoid.
+        // Otherwise it's an entry with a valid next source we already found.
+        if (CurSrcRes.getNumSources() > 1) {
+          DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n");
+          return false;
+        }
+        break;
+      }
+      RewriteMap.insert(std::make_pair(CurSrcPair, Res));
+
+      // ValueTrackerResult usually have one source unless it's the result from
+      // a PHI instruction. Add the found PHI edges to be looked up further.
+      unsigned NumSrcs = Res.getNumSources();
+      if (NumSrcs > 1) {
+        PHICount++;
+        for (unsigned i = 0; i < NumSrcs; ++i)
+          SrcToLook.push_back(TargetInstrInfo::RegSubRegPair(
+              Res.getSrcReg(i), Res.getSrcSubReg(i)));
+        break;
+      }
+
+      CurSrcPair.Reg = Res.getSrcReg(0);
+      CurSrcPair.SubReg = Res.getSrcSubReg(0);
+      // Do not extend the live-ranges of physical registers as they add
+      // constraints to the register allocator. Moreover, if we want to extend
+      // the live-range of a physical register, unlike SSA virtual register,
+      // we will have to check that they aren't redefine before the related use.
+      if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+        return false;
+
+      const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
+      ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC,
+                                                CurSrcPair.SubReg);
+    } while (!ShouldRewrite);
+
+    // Continue looking for new sources...
+    if (Res.isValid())
+      continue;
+
+    // Do not continue searching for a new source if the there's at least
+    // one use-def which cannot be rewritten.
+    if (!ShouldRewrite)
+      return false;
+  }
+
+  if (PHICount >= RewritePHILimit) {
+    DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
+    return false;
+  }
 
   // If we did not find a more suitable source, there is nothing to optimize.
-  if (!ShouldRewrite || Src == Reg)
-    return false;
+  return CurSrcPair.Reg != Reg;
+}
 
-  Reg = Src;
-  SubReg = SrcSubReg;
-  return true;
+/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are
+/// guaranteed to have the same register class. This is necessary whenever we
+/// successfully traverse a PHI instruction and find suitable sources coming
+/// from its edges. By inserting a new PHI, we provide a rewritten PHI def
+/// suitable to be used in a new COPY instruction.
+static MachineInstr *
+insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+          const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs,
+          MachineInstr *OrigPHI) {
+  assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
+
+  const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg);
+  unsigned NewVR = MRI->createVirtualRegister(NewRC);
+  MachineBasicBlock *MBB = OrigPHI->getParent();
+  MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(),
+                                    TII->get(TargetOpcode::PHI), NewVR);
+
+  unsigned MBBOpIdx = 2;
+  for (auto RegPair : SrcRegs) {
+    MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
+    MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB());
+    // Since we're extended the lifetime of RegPair.Reg, clear the
+    // kill flags to account for that and make RegPair.Reg reaches
+    // the new PHI.
+    MRI->clearKillFlags(RegPair.Reg);
+    MBBOpIdx += 2;
+  }
+
+  return MIB;
 }
 
 namespace {
@@ -624,7 +770,7 @@ public:
   /// This source defines the whole definition, i.e.,
   /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
   ///
-  /// The second and subsequent calls will return false, has there is only one
+  /// The second and subsequent calls will return false, as there is only one
   /// rewritable source.
   ///
   /// \return True if a rewritable source has been found, false otherwise.
@@ -632,9 +778,9 @@ public:
   virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
                                        unsigned &TrackReg,
                                        unsigned &TrackSubReg) {
-    // If CurrentSrcIdx == 1, this means this function has already been
-    // called once. CopyLike has one defintiion and one argument, thus,
-    // there is nothing else to rewrite.
+    // If CurrentSrcIdx == 1, this means this function has already been called
+    // once. CopyLike has one definition and one argument, thus, there is
+    // nothing else to rewrite.
     if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
       return false;
     // This is the first call to getNextRewritableSource.
@@ -653,7 +799,7 @@ public:
 
   /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
   /// if possible.
-  /// \return True if the rewritting was possible, false otherwise.
+  /// \return True if the rewriting was possible, false otherwise.
   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
     if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
       return false;
@@ -662,6 +808,157 @@ public:
     MOSrc.setSubReg(NewSubReg);
     return true;
   }
+
+  /// \brief Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
+  /// the new source to use for rewrite. If \p HandleMultipleSources is true and
+  /// multiple sources for a given \p Def are found along the way, we found a
+  /// PHI instructions that needs to be rewritten.
+  /// TODO: HandleMultipleSources should be removed once we test PHI handling
+  /// with coalescable copies.
+  TargetInstrInfo::RegSubRegPair
+  getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
+               TargetInstrInfo::RegSubRegPair Def,
+               PeepholeOptimizer::RewriteMapTy &RewriteMap,
+               bool HandleMultipleSources = true) {
+
+    TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
+    do {
+      ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
+      // If there are no entries on the map, LookupSrc is the new source.
+      if (!Res.isValid())
+        return LookupSrc;
+
+      // There's only one source for this definition, keep searching...
+      unsigned NumSrcs = Res.getNumSources();
+      if (NumSrcs == 1) {
+        LookupSrc.Reg = Res.getSrcReg(0);
+        LookupSrc.SubReg = Res.getSrcSubReg(0);
+        continue;
+      }
+
+      // TODO: Remove once multiple srcs w/ coalescable copies are supported.
+      if (!HandleMultipleSources)
+        break;
+
+      // Multiple sources, recurse into each source to find a new source
+      // for it. Then, rewrite the PHI accordingly to its new edges.
+      SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs;
+      for (unsigned i = 0; i < NumSrcs; ++i) {
+        TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i),
+                                              Res.getSrcSubReg(i));
+        NewPHISrcs.push_back(
+            getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
+      }
+
+      // Build the new PHI node and return its def register as the new source.
+      MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst());
+      MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI);
+      DEBUG(dbgs() << "-- getNewSource\n");
+      DEBUG(dbgs() << "   Replacing: " << *OrigPHI);
+      DEBUG(dbgs() << "        With: " << *NewPHI);
+      const MachineOperand &MODef = NewPHI->getOperand(0);
+      return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
+
+    } while (1);
+
+    return TargetInstrInfo::RegSubRegPair(0, 0);
+  }
+
+  /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+  /// and create a new COPY instruction. More info about RewriteMap in
+  /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+  /// Uncoalescable copies, since they are copy like instructions that aren't
+  /// recognized by the register allocator.
+  virtual MachineInstr *
+  RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+                PeepholeOptimizer::RewriteMapTy &RewriteMap) {
+    return nullptr;
+  }
+};
+
+/// \brief Helper class to rewrite uncoalescable copy like instructions
+/// into new COPY (coalescable friendly) instructions.
+class UncoalescableRewriter : public CopyRewriter {
+protected:
+  const TargetInstrInfo &TII;
+  MachineRegisterInfo   &MRI;
+  /// The number of defs in the bitcast
+  unsigned NumDefs;
+
+public:
+  UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII,
+                         MachineRegisterInfo &MRI)
+      : CopyRewriter(MI), TII(TII), MRI(MRI) {
+    NumDefs = MI.getDesc().getNumDefs();
+  }
+
+  /// \brief Get the next rewritable def source (TrackReg, TrackSubReg)
+  /// All such sources need to be considered rewritable in order to
+  /// rewrite a uncoalescable copy-like instruction. This method return
+  /// each definition that must be checked if rewritable.
+  ///
+  bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
+                               unsigned &TrackReg,
+                               unsigned &TrackSubReg) override {
+    // Find the next non-dead definition and continue from there.
+    if (CurrentSrcIdx == NumDefs)
+      return false;
+
+    while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
+      ++CurrentSrcIdx;
+      if (CurrentSrcIdx == NumDefs)
+        return false;
+    }
+
+    // What we track are the alternative sources of the definition.
+    const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
+    TrackReg = MODef.getReg();
+    TrackSubReg = MODef.getSubReg();
+
+    CurrentSrcIdx++;
+    return true;
+  }
+
+  /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap
+  /// and create a new COPY instruction. More info about RewriteMap in
+  /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
+  /// Uncoalescable copies, since they are copy like instructions that aren't
+  /// recognized by the register allocator.
+  MachineInstr *
+  RewriteSource(TargetInstrInfo::RegSubRegPair Def,
+                PeepholeOptimizer::RewriteMapTy &RewriteMap) override {
+    assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+           "We do not rewrite physical registers");
+
+    // Find the new source to use in the COPY rewrite.
+    TargetInstrInfo::RegSubRegPair NewSrc =
+        getNewSource(&MRI, &TII, Def, RewriteMap);
+
+    // Insert the COPY.
+    const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg);
+    unsigned NewVR = MRI.createVirtualRegister(DefRC);
+
+    MachineInstr *NewCopy =
+        BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
+                TII.get(TargetOpcode::COPY), NewVR)
+            .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
+
+    NewCopy->getOperand(0).setSubReg(Def.SubReg);
+    if (Def.SubReg)
+      NewCopy->getOperand(0).setIsUndef();
+
+    DEBUG(dbgs() << "-- RewriteSource\n");
+    DEBUG(dbgs() << "   Replacing: " << CopyLike);
+    DEBUG(dbgs() << "        With: " << *NewCopy);
+    MRI.replaceRegWith(Def.Reg, NewVR);
+    MRI.clearKillFlags(NewVR);
+
+    // We extended the lifetime of NewSrc.Reg, clear the kill flags to
+    // account for that.
+    MRI.clearKillFlags(NewSrc.Reg);
+
+    return NewCopy;
+  }
 };
 
 /// \brief Specialized rewriter for INSERT_SUBREG instruction.
@@ -699,7 +996,7 @@ public:
     // partial definition.
     TrackReg = MODef.getReg();
     if (MODef.getSubReg())
-      // Bails if we have to compose sub-register indices.
+      // Bail if we have to compose sub-register indices.
       return false;
     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
     return true;
@@ -740,7 +1037,7 @@ public:
     CurrentSrcIdx = 1;
     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
     SrcReg = MOExtractedReg.getReg();
-    // If we have to compose sub-register indices, bails out.
+    // If we have to compose sub-register indices, bail out.
     if (MOExtractedReg.getSubReg())
       return false;
 
@@ -818,7 +1115,7 @@ public:
     }
     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
     SrcReg = MOInsertedReg.getReg();
-    // If we have to compose sub-register indices, bails out.
+    // If we have to compose sub-register indices, bail out.
     if ((SrcSubReg = MOInsertedReg.getSubReg()))
       return false;
 
@@ -828,7 +1125,7 @@ public:
 
     const MachineOperand &MODef = CopyLike.getOperand(0);
     TrackReg = MODef.getReg();
-    // If we have to compose sub-registers, bails.
+    // If we have to compose sub-registers, bail.
     return MODef.getSubReg() == 0;
   }
 
@@ -850,7 +1147,13 @@ public:
 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
 /// if no rewriter works for \p MI.
 static CopyRewriter *getCopyRewriter(MachineInstr &MI,
-                                     const TargetInstrInfo &TII) {
+                                     const TargetInstrInfo &TII,
+                                     MachineRegisterInfo &MRI) {
+  // Handle uncoalescable copy-like instructions.
+  if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
+                         MI.isExtractSubregLike()))
+    return new UncoalescableRewriter(MI, TII, MRI);
+
   switch (MI.getOpcode()) {
   default:
     return nullptr;
@@ -874,7 +1177,7 @@ static CopyRewriter *getCopyRewriter(MachineInstr &MI,
 /// the same register bank.
 /// New copies issued by this optimization are register allocator
 /// friendly. This optimization does not remove any copy as it may
-/// overconstraint the register allocator, but replaces some operands
+/// overconstrain the register allocator, but replaces some operands
 /// when possible.
 /// \pre isCoalescableCopy(*MI) is true.
 /// \return True, when \p MI has been rewritten. False otherwise.
@@ -889,25 +1192,33 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
 
   bool Changed = false;
   // Get the right rewriter for the current copy.
-  std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
-  // If none exists, bails out.
+  std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+  // If none exists, bail out.
   if (!CpyRewriter)
     return false;
   // Rewrite each rewritable source.
   unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
   while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
                                               TrackSubReg)) {
-    unsigned NewSrc = TrackReg;
-    unsigned NewSubReg = TrackSubReg;
-    // Try to find a more suitable source.
-    // If we failed to do so, or get the actual source,
-    // move to the next source.
-    if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
+    // Keep track of PHI nodes and its incoming edges when looking for sources.
+    RewriteMapTy RewriteMap;
+    // Try to find a more suitable source. If we failed to do so, or get the
+    // actual source, move to the next source.
+    if (!findNextSource(TrackReg, TrackSubReg, RewriteMap))
       continue;
+
+    // Get the new source to rewrite. TODO: Only enable handling of multiple
+    // sources (PHIs) once we have a motivating example and testcases for it.
+    TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg);
+    TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource(
+        MRI, TII, TrackPair, RewriteMap, false /* multiple sources */);
+    if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0)
+      continue;
+
     // Rewrite source.
-    if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
+    if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
       // We may have extended the live-range of NewSrc, account for that.
-      MRI->clearKillFlags(NewSrc);
+      MRI->clearKillFlags(NewSrc.Reg);
       Changed = true;
     }
   }
@@ -936,61 +1247,53 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
   assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
 
   // Check if we can rewrite all the values defined by this instruction.
-  SmallVector<
-      std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
-      4> RewritePairs;
-  for (const MachineOperand &MODef : MI->defs()) {
-    if (MODef.isDead())
-      // We can ignore those.
-      continue;
+  SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs;
+  // Get the right rewriter for the current copy.
+  std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI));
+  // If none exists, bail out.
+  if (!CpyRewriter)
+    return false;
 
+  // Rewrite each rewritable source by generating new COPYs. This works
+  // differently from optimizeCoalescableCopy since it first makes sure that all
+  // definitions can be rewritten.
+  RewriteMapTy RewriteMap;
+  unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg;
+  while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg,
+                                              CopyDefSubReg)) {
     // If a physical register is here, this is probably for a good reason.
     // Do not rewrite that.
-    if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+    if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg))
       return false;
 
     // If we do not know how to rewrite this definition, there is no point
     // in trying to kill this instruction.
-    TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
-    TargetInstrInfo::RegSubRegPair Src = Def;
-    if (!findNextSource(Src.Reg, Src.SubReg))
+    TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg);
+    if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap))
       return false;
-    RewritePairs.push_back(std::make_pair(Def, Src));
+
+    RewritePairs.push_back(Def);
   }
+
   // The change is possible for all defs, do it.
-  for (const auto &PairDefSrc : RewritePairs) {
-    const auto &Def = PairDefSrc.first;
-    const auto &Src = PairDefSrc.second;
+  for (const auto &Def : RewritePairs) {
     // Rewrite the "copy" in a way the register coalescer understands.
-    assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
-           "We do not rewrite physical registers");
-    const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
-    unsigned NewVR = MRI->createVirtualRegister(DefRC);
-    MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
-                                    TII->get(TargetOpcode::COPY),
-                                    NewVR).addReg(Src.Reg, 0, Src.SubReg);
-    NewCopy->getOperand(0).setSubReg(Def.SubReg);
-    if (Def.SubReg)
-      NewCopy->getOperand(0).setIsUndef();
+    MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap);
+    assert(NewCopy && "Should be able to always generate a new copy");
     LocalMIs.insert(NewCopy);
-    MRI->replaceRegWith(Def.Reg, NewVR);
-    MRI->clearKillFlags(NewVR);
-    // We extended the lifetime of Src.
-    // Clear the kill flags to account for that.
-    MRI->clearKillFlags(Src.Reg);
   }
+
   // MI is now dead.
   MI->eraseFromParent();
   ++NumUncoalescableCopies;
   return true;
 }
 
-/// isLoadFoldable - Check whether MI is a candidate for folding into a later
-/// instruction. We only fold loads to virtual registers and the virtual
-/// register defined has a single use.
+/// Check whether MI is a candidate for folding into a later instruction.
+/// We only fold loads to virtual registers and the virtual register defined
+/// has a single use.
 bool PeepholeOptimizer::isLoadFoldable(
-                              MachineInstr *MI,
-                              SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
+    MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
   if (!MI->canFoldAsLoad() || !MI->mayLoad())
     return false;
   const MCInstrDesc &MCID = MI->getDesc();
@@ -1010,9 +1313,9 @@ bool PeepholeOptimizer::isLoadFoldable(
   return false;
 }
 
-bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
-                                        SmallSet<unsigned, 4> &ImmDefRegs,
-                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::isMoveImmediate(
+    MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs,
+    DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MI->isMoveImmediate())
     return false;
@@ -1028,23 +1331,26 @@ bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
   return false;
 }
 
-/// foldImmediate - Try folding register operands that are defined by move
-/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// Try folding register operands that are defined by move immediate
+/// instructions, i.e. a trivial constant folding optimization, if
 /// and only if the def and use are in the same BB.
-bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
-                                      SmallSet<unsigned, 4> &ImmDefRegs,
-                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+bool PeepholeOptimizer::foldImmediate(
+    MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs,
+    DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isDef())
       continue;
+    // Ignore dead implicit defs.
+    if (MO.isImplicit() && MO.isDead())
+      continue;
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     if (ImmDefRegs.count(Reg) == 0)
       continue;
     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
-    assert(II != ImmDefMIs.end());
+    assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
     if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
       ++NumImmFold;
       return true;
@@ -1053,6 +1359,117 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
   return false;
 }
 
+// FIXME: This is very simple and misses some cases which should be handled when
+// motivating examples are found.
+//
+// The copy rewriting logic should look at uses as well as defs and be able to
+// eliminate copies across blocks.
+//
+// Later copies that are subregister extracts will also not be eliminated since
+// only the first copy is considered.
+//
+// e.g.
+// %vreg1 = COPY %vreg0
+// %vreg2 = COPY %vreg0:sub1
+//
+// Should replace %vreg2 uses with %vreg1:sub1
+bool PeepholeOptimizer::foldRedundantCopy(
+    MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs,
+    DenseMap<unsigned, MachineInstr *> &CopyMIs) {
+  assert(MI->isCopy() && "expected a COPY machine instruction");
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+    return false;
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+    return false;
+
+  if (CopySrcRegs.insert(SrcReg).second) {
+    // First copy of this reg seen.
+    CopyMIs.insert(std::make_pair(SrcReg, MI));
+    return false;
+  }
+
+  MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
+
+  unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+  unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
+
+  // Can't replace different subregister extracts.
+  if (SrcSubReg != PrevSrcSubReg)
+    return false;
+
+  unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+
+  // Only replace if the copy register class is the same.
+  //
+  // TODO: If we have multiple copies to different register classes, we may want
+  // to track multiple copies of the same source register.
+  if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
+    return false;
+
+  MRI->replaceRegWith(DstReg, PrevDstReg);
+
+  // Lifetime of the previous copy has been extended.
+  MRI->clearKillFlags(PrevDstReg);
+  return true;
+}
+
+bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
+  return TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         !MRI->isAllocatable(Reg);
+}
+
+bool PeepholeOptimizer::foldRedundantNAPhysCopy(
+    MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
+  assert(MI->isCopy() && "expected a COPY machine instruction");
+
+  if (DisableNAPhysCopyOpt)
+    return false;
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+    // %vreg = COPY %PHYSREG
+    // Avoid using a datastructure which can track multiple live non-allocatable
+    // phys->virt copies since LLVM doesn't seem to do this.
+    NAPhysToVirtMIs.insert({SrcReg, MI});
+    return false;
+  }
+
+  if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+    return false;
+
+  // %PHYSREG = COPY %vreg
+  auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
+  if (PrevCopy == NAPhysToVirtMIs.end()) {
+    // We can't remove the copy: there was an intervening clobber of the
+    // non-allocatable physical register after the copy to virtual.
+    DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI
+                 << '\n');
+    return false;
+  }
+
+  unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+  if (PrevDstReg == SrcReg) {
+    // Remove the virt->phys copy: we saw the virtual register definition, and
+    // the non-allocatable physical register's state hasn't changed since then.
+    DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n');
+    ++NumNAPhysCopies;
+    return true;
+  }
+
+  // Potential missed optimization opportunity: we saw a different virtual
+  // register get a copy of the non-allocatable physical register, and we only
+  // track one such copy. Avoid getting confused by this new non-allocatable
+  // physical register definition, and remove it from the tracked copies.
+  DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n');
+  NAPhysToVirtMIs.erase(PrevCopy);
+  return false;
+}
+
 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   if (skipOptnoneFunction(*MF.getFunction()))
     return false;
@@ -1070,9 +1487,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = false;
 
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = &*I;
-
+  for (MachineBasicBlock &MBB : MF) {
     bool SeenMoveImm = false;
 
     // During this forward scan, at some point it needs to answer the question
@@ -1086,8 +1501,19 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
 
-    for (MachineBasicBlock::iterator
-           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+    // Track when a non-allocatable physical register is copied to a virtual
+    // register so that useless moves can be removed.
+    //
+    // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG`
+    // without any intervening re-definition of %PHYSREG.
+    DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
+
+    // Set of virtual registers that are copied from.
+    SmallSet<unsigned, 4> CopySrcRegs;
+    DenseMap<unsigned, MachineInstr *> CopySrcMIs;
+
+    for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+         MII != MIE; ) {
       MachineInstr *MI = &*MII;
       // We may be erasing MI below, increment MII now.
       ++MII;
@@ -1097,20 +1523,60 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       if (MI->isDebugValue())
           continue;
 
-      // If there exists an instruction which belongs to the following
-      // categories, we will discard the load candidates.
-      if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
-          MI->isKill() || MI->isInlineAsm() ||
-          MI->hasUnmodeledSideEffects()) {
+      // If we run into an instruction we can't fold across, discard
+      // the load candidates.
+      if (MI->isLoadFoldBarrier())
         FoldAsLoadDefCandidates.clear();
+
+      if (MI->isPosition() || MI->isPHI())
+        continue;
+
+      if (!MI->isCopy()) {
+        for (const auto &Op : MI->operands()) {
+          // Visit all operands: definitions can be implicit or explicit.
+          if (Op.isReg()) {
+            unsigned Reg = Op.getReg();
+            if (Op.isDef() && isNAPhysCopy(Reg)) {
+              const auto &Def = NAPhysToVirtMIs.find(Reg);
+              if (Def != NAPhysToVirtMIs.end()) {
+                // A new definition of the non-allocatable physical register
+                // invalidates previous copies.
+                DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+                             << '\n');
+                NAPhysToVirtMIs.erase(Def);
+              }
+            }
+          } else if (Op.isRegMask()) {
+            const uint32_t *RegMask = Op.getRegMask();
+            for (auto &RegMI : NAPhysToVirtMIs) {
+              unsigned Def = RegMI.first;
+              if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
+                DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI
+                             << '\n');
+                NAPhysToVirtMIs.erase(Def);
+              }
+            }
+          }
+        }
+      }
+
+      if (MI->isImplicitDef() || MI->isKill())
+        continue;
+
+      if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
+        // Blow away all non-allocatable physical registers knowledge since we
+        // don't know what's correct anymore.
+        //
+        // FIXME: handle explicit asm clobbers.
+        DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
+                     << '\n');
+        NAPhysToVirtMIs.clear();
         continue;
       }
-      if (MI->mayStore() || MI->isCall())
-        FoldAsLoadDefCandidates.clear();
 
       if ((isUncoalescableCopy(*MI) &&
            optimizeUncoalescableCopy(MI, LocalMIs)) ||
-          (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
+          (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) ||
           (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
         // MI is deleted.
         LocalMIs.erase(MI);
@@ -1129,17 +1595,26 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
+      if (MI->isCopy() &&
+          (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) ||
+           foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) {
+        LocalMIs.erase(MI);
+        MI->eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
         SeenMoveImm = true;
       } else {
-        Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
+        Changed |= optimizeExtInstr(MI, &MBB, LocalMIs);
         // optimizeExtInstr might have created new instructions after MI
         // and before the already incremented MII. Adjust MII so that the
         // next iteration sees the new instructions.
         MII = MI;
         ++MII;
         if (SeenMoveImm)
-          Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+          Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs);
       }
 
       // Check whether MI is a load candidate for folding into a later
@@ -1190,8 +1665,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
-                                         unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
   assert(Def->isCopy() && "Invalid definition");
   // Copy instruction are supposed to be: Def = Src.
   // If someone breaks this assumption, bad things will happen everywhere.
@@ -1199,30 +1673,27 @@ bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
 
   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
     // If we look for a different subreg, it means we want a subreg of src.
-    // Bails as we do not support composing subreg yet.
-    return false;
+    // Bails as we do not support composing subregs yet.
+    return ValueTrackerResult();
   // Otherwise, we want the whole source.
   const MachineOperand &Src = Def->getOperand(1);
-  SrcReg = Src.getReg();
-  SrcSubReg = Src.getSubReg();
-  return true;
+  return ValueTrackerResult(Src.getReg(), Src.getSubReg());
 }
 
-bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
-                                            unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
   assert(Def->isBitcast() && "Invalid definition");
 
   // Bail if there are effects that a plain copy will not expose.
   if (Def->hasUnmodeledSideEffects())
-    return false;
+    return ValueTrackerResult();
 
   // Bitcasts with more than one def are not supported.
   if (Def->getDesc().getNumDefs() != 1)
-    return false;
+    return ValueTrackerResult();
   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
     // If we look for a different subreg, it means we want a subreg of the src.
-    // Bails as we do not support composing subreg yet.
-    return false;
+    // Bails as we do not support composing subregs yet.
+    return ValueTrackerResult();
 
   unsigned SrcIdx = Def->getNumOperands();
   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
@@ -1230,25 +1701,25 @@ bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
     const MachineOperand &MO = Def->getOperand(OpIdx);
     if (!MO.isReg() || !MO.getReg())
       continue;
+    // Ignore dead implicit defs.
+    if (MO.isImplicit() && MO.isDead())
+      continue;
     assert(!MO.isDef() && "We should have skipped all the definitions by now");
     if (SrcIdx != EndOpIdx)
       // Multiple sources?
-      return false;
+      return ValueTrackerResult();
     SrcIdx = OpIdx;
   }
   const MachineOperand &Src = Def->getOperand(SrcIdx);
-  SrcReg = Src.getReg();
-  SrcSubReg = Src.getSubReg();
-  return true;
+  return ValueTrackerResult(Src.getReg(), Src.getSubReg());
 }
 
-bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
-                                                unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
          "Invalid definition");
 
   if (Def->getOperand(DefIdx).getSubReg())
-    // If we are composing subreg, bails out.
+    // If we are composing subregs, bail out.
     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
     // This should almost never happen as the SSA property is tracked at
     // the register level (as opposed to the subreg level).
@@ -1262,16 +1733,16 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
     // have this case.
     // If we can ascertain (or force) that this never happens, we could
     // turn that into an assertion.
-    return false;
+    return ValueTrackerResult();
 
   if (!TII)
     // We could handle the REG_SEQUENCE here, but we do not want to
     // duplicate the code from the generic TII.
-    return false;
+    return ValueTrackerResult();
 
   SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
-    return false;
+    return ValueTrackerResult();
 
   // We are looking at:
   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
@@ -1279,41 +1750,38 @@ bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
   for (auto &RegSeqInput : RegSeqInputRegs) {
     if (RegSeqInput.SubIdx == DefSubReg) {
       if (RegSeqInput.SubReg)
-        // Bails if we have to compose sub registers.
-        return false;
+        // Bail if we have to compose sub registers.
+        return ValueTrackerResult();
 
-      SrcReg = RegSeqInput.Reg;
-      SrcSubReg = RegSeqInput.SubReg;
-      return true;
+      return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
     }
   }
 
   // If the subreg we are tracking is super-defined by another subreg,
   // we could follow this value. However, this would require to compose
   // the subreg and we do not do that for now.
-  return false;
+  return ValueTrackerResult();
 }
 
-bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
-                                                 unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
          "Invalid definition");
 
   if (Def->getOperand(DefIdx).getSubReg())
-    // If we are composing subreg, bails out.
+    // If we are composing subreg, bail out.
     // Same remark as getNextSourceFromRegSequence.
     // I.e., this may be turned into an assert.
-    return false;
+    return ValueTrackerResult();
 
   if (!TII)
     // We could handle the REG_SEQUENCE here, but we do not want to
     // duplicate the code from the generic TII.
-    return false;
+    return ValueTrackerResult();
 
   TargetInstrInfo::RegSubRegPair BaseReg;
   TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
-    return false;
+    return ValueTrackerResult();
 
   // We are looking at:
   // Def = INSERT_SUBREG v0, v1, sub1
@@ -1323,9 +1791,7 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
 
   // #1 Check if the inserted register matches the required sub index.
   if (InsertedReg.SubIdx == DefSubReg) {
-    SrcReg = InsertedReg.Reg;
-    SrcSubReg = InsertedReg.SubReg;
-    return true;
+    return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
   }
   // #2 Otherwise, if the sub register we are looking for is not partial
   // defined by the inserted element, we can look through the main
@@ -1333,10 +1799,10 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
   const MachineOperand &MODef = Def->getOperand(DefIdx);
   // If the result register (Def) and the base register (v0) do not
   // have the same register class or if we have to compose
-  // subregisters, bails out.
+  // subregisters, bail out.
   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
       BaseReg.SubReg)
-    return false;
+    return ValueTrackerResult();
 
   // Get the TRI and check if the inserted sub-register overlaps with the
   // sub-register we are tracking.
@@ -1344,121 +1810,138 @@ bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
   if (!TRI ||
       (TRI->getSubRegIndexLaneMask(DefSubReg) &
        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
-    return false;
+    return ValueTrackerResult();
   // At this point, the value is available in v0 via the same subreg
   // we used for Def.
-  SrcReg = BaseReg.Reg;
-  SrcSubReg = DefSubReg;
-  return true;
+  return ValueTrackerResult(BaseReg.Reg, DefSubReg);
 }
 
-bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
-                                                  unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
   assert((Def->isExtractSubreg() ||
           Def->isExtractSubregLike()) && "Invalid definition");
   // We are looking at:
   // Def = EXTRACT_SUBREG v0, sub0
 
-  // Bails if we have to compose sub registers.
+  // Bail if we have to compose sub registers.
   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
   if (DefSubReg)
-    return false;
+    return ValueTrackerResult();
 
   if (!TII)
     // We could handle the EXTRACT_SUBREG here, but we do not want to
     // duplicate the code from the generic TII.
-    return false;
+    return ValueTrackerResult();
 
   TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
-    return false;
+    return ValueTrackerResult();
 
-  // Bails if we have to compose sub registers.
+  // Bail if we have to compose sub registers.
   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
   if (ExtractSubregInputReg.SubReg)
-    return false;
+    return ValueTrackerResult();
   // Otherwise, the value is available in the v0.sub0.
-  SrcReg = ExtractSubregInputReg.Reg;
-  SrcSubReg = ExtractSubregInputReg.SubIdx;
-  return true;
+  return ValueTrackerResult(ExtractSubregInputReg.Reg,
+                            ExtractSubregInputReg.SubIdx);
 }
 
-bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
-                                                unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
   assert(Def->isSubregToReg() && "Invalid definition");
   // We are looking at:
   // Def = SUBREG_TO_REG Imm, v0, sub0
 
-  // Bails if we have to compose sub registers.
+  // Bail if we have to compose sub registers.
   // If DefSubReg != sub0, we would have to check that all the bits
   // we track are included in sub0 and if yes, we would have to
   // determine the right subreg in v0.
   if (DefSubReg != Def->getOperand(3).getImm())
-    return false;
-  // Bails if we have to compose sub registers.
+    return ValueTrackerResult();
+  // Bail if we have to compose sub registers.
   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
   if (Def->getOperand(2).getSubReg())
-    return false;
+    return ValueTrackerResult();
 
-  SrcReg = Def->getOperand(2).getReg();
-  SrcSubReg = Def->getOperand(3).getImm();
-  return true;
+  return ValueTrackerResult(Def->getOperand(2).getReg(),
+                            Def->getOperand(3).getImm());
 }
 
-bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
+/// \brief Explore each PHI incoming operand and return its sources
+ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
+  assert(Def->isPHI() && "Invalid definition");
+  ValueTrackerResult Res;
+
+  // If we look for a different subreg, bail as we do not support composing
+  // subregs yet.
+  if (Def->getOperand(0).getSubReg() != DefSubReg)
+    return ValueTrackerResult();
+
+  // Return all register sources for PHI instructions.
+  for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
+    auto &MO = Def->getOperand(i);
+    assert(MO.isReg() && "Invalid PHI instruction");
+    Res.addSource(MO.getReg(), MO.getSubReg());
+  }
+
+  return Res;
+}
+
+ValueTrackerResult ValueTracker::getNextSourceImpl() {
   assert(Def && "This method needs a valid definition");
 
   assert(
       (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
       Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
   if (Def->isCopy())
-    return getNextSourceFromCopy(SrcReg, SrcSubReg);
+    return getNextSourceFromCopy();
   if (Def->isBitcast())
-    return getNextSourceFromBitcast(SrcReg, SrcSubReg);
+    return getNextSourceFromBitcast();
   // All the remaining cases involve "complex" instructions.
-  // Bails if we did not ask for the advanced tracking.
+  // Bail if we did not ask for the advanced tracking.
   if (!UseAdvancedTracking)
-    return false;
+    return ValueTrackerResult();
   if (Def->isRegSequence() || Def->isRegSequenceLike())
-    return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
+    return getNextSourceFromRegSequence();
   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
-    return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
+    return getNextSourceFromInsertSubreg();
   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
-    return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
+    return getNextSourceFromExtractSubreg();
   if (Def->isSubregToReg())
-    return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
-  return false;
+    return getNextSourceFromSubregToReg();
+  if (Def->isPHI())
+    return getNextSourceFromPHI();
+  return ValueTrackerResult();
 }
 
-const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
-                                                unsigned &SrcSubReg) {
+ValueTrackerResult ValueTracker::getNextSource() {
   // If we reach a point where we cannot move up in the use-def chain,
   // there is nothing we can get.
   if (!Def)
-    return nullptr;
+    return ValueTrackerResult();
 
-  const MachineInstr *PrevDef = nullptr;
-  // Try to find the next source.
-  if (getNextSourceImpl(SrcReg, SrcSubReg)) {
+  ValueTrackerResult Res = getNextSourceImpl();
+  if (Res.isValid()) {
     // Update definition, definition index, and subregister for the
     // next call of getNextSource.
     // Update the current register.
-    Reg = SrcReg;
-    // Update the return value before moving up in the use-def chain.
-    PrevDef = Def;
+    bool OneRegSrc = Res.getNumSources() == 1;
+    if (OneRegSrc)
+      Reg = Res.getSrcReg(0);
+    // Update the result before moving up in the use-def chain
+    // with the instruction containing the last found sources.
+    Res.setInst(Def);
+
     // If we can still move up in the use-def chain, move to the next
-    // defintion.
-    if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+    // definition.
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
       Def = MRI.getVRegDef(Reg);
       DefIdx = MRI.def_begin(Reg).getOperandNo();
-      DefSubReg = SrcSubReg;
-      return PrevDef;
+      DefSubReg = Res.getSrcSubReg(0);
+      return Res;
     }
   }
   // If we end up here, this means we will not be able to find another source
-  // for the next iteration.
-  // Make sure any new call to getNextSource bails out early by cutting the
-  // use-def chain.
+  // for the next iteration. Make sure any new call to getNextSource bails out
+  // early by cutting the use-def chain.
   Def = nullptr;
-  return PrevDef;
+  return Res;
 }
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 6f76116da1eb..b95dffd05c46 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -87,7 +87,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<TargetPassConfig>();
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
@@ -196,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList(
     const RegisterClassInfo &RCI,
     TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
     SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs)
-    : ScheduleDAGInstrs(MF, &MLI, /*IsPostRA=*/true), AA(AA), EndIndex(0) {
+    : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) {
 
   const InstrItineraryData *InstrItins =
       MF.getSubtarget().getInstrItineraryData();
@@ -267,7 +267,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
 
   TII = Fn.getSubtarget().getInstrInfo();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
 
   RegClassInfo.runOnMachineFunction(Fn);
@@ -302,8 +302,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
                                  CriticalPathRCs);
 
   // Loop over all of the basic blocks
-  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
-       MBB != MBBe; ++MBB) {
+  for (auto &MBB : Fn) {
 #ifndef NDEBUG
     // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
     if (DebugDiv > 0) {
@@ -311,25 +310,25 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
       if (bbcnt++ % DebugDiv != DebugMod)
         continue;
       dbgs() << "*** DEBUG scheduling " << Fn.getName()
-             << ":BB#" << MBB->getNumber() << " ***\n";
+             << ":BB#" << MBB.getNumber() << " ***\n";
     }
 #endif
 
     // Initialize register live-range state for scheduling in this block.
-    Scheduler.startBlock(MBB);
+    Scheduler.startBlock(&MBB);
 
     // Schedule each sequence of instructions not interrupted by a label
     // or anything else that effectively needs to shut down scheduling.
-    MachineBasicBlock::iterator Current = MBB->end();
-    unsigned Count = MBB->size(), CurrentCount = Count;
-    for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+    MachineBasicBlock::iterator Current = MBB.end();
+    unsigned Count = MBB.size(), CurrentCount = Count;
+    for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) {
       MachineInstr *MI = std::prev(I);
       --Count;
       // Calls are not scheduling boundaries before register allocation, but
       // post-ra we don't gain anything by scheduling across calls since we
       // don't need to worry about register pressure.
-      if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) {
-        Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count);
+      if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) {
+        Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count);
         Scheduler.setEndIndex(CurrentCount);
         Scheduler.schedule();
         Scheduler.exitRegion();
@@ -343,9 +342,9 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
         Count -= MI->getBundleSize();
     }
     assert(Count == 0 && "Instruction count mismatch!");
-    assert((MBB->begin() == Current || CurrentCount != 0) &&
+    assert((MBB.begin() == Current || CurrentCount != 0) &&
            "Instruction count mismatch!");
-    Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount);
     Scheduler.setEndIndex(CurrentCount);
     Scheduler.schedule();
     Scheduler.exitRegion();
@@ -355,7 +354,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
     Scheduler.finishBlock();
 
     // Update register kills
-    Scheduler.fixupKills(MBB);
+    Scheduler.fixupKills(&MBB);
   }
 
   return true;
@@ -400,8 +399,12 @@ void SchedulePostRATDList::schedule() {
   }
 
   DEBUG(dbgs() << "********** List Scheduling **********\n");
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
+  DEBUG(
+    for (const SUnit &SU : SUnits) {
+      SU.dumpAll(this);
+      dbgs() << '\n';
+    }
+  );
 
   AvailableQueue.initNodes(SUnits);
   ListScheduleTopDown();
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 5f8194983484..d27ea2f51867 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -58,7 +58,7 @@ INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
 
 void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved<AAResultsWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -96,7 +96,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
 
   // This is a physreg implicit-def.
   // Look for the first instruction to use or define an alias.
-  MachineBasicBlock::instr_iterator UserMI = MI;
+  MachineBasicBlock::instr_iterator UserMI = MI->getIterator();
   MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end();
   bool Found = false;
   for (++UserMI; UserMI != UserE; ++UserMI) {
@@ -151,7 +151,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) {
     for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(),
          MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI)
       if (MBBI->isImplicitDef())
-        WorkList.insert(MBBI);
+        WorkList.insert(&*MBBI);
 
     if (WorkList.empty())
       continue;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 6ca69a124297..939c50027b02 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -71,8 +71,9 @@ private:
   // stack frame indexes.
   unsigned MinCSFrameIndex, MaxCSFrameIndex;
 
-  // Save and Restore blocks of the current function.
-  MachineBasicBlock *SaveBlock;
+  // Save and Restore blocks of the current function. Typically there is a
+  // single save block, unless Windows EH funclets are involved.
+  SmallVector<MachineBasicBlock *, 1> SaveBlocks;
   SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
 
   // Flag to control whether to use the register scavenger to resolve
@@ -91,9 +92,6 @@ private:
                            int &SPAdj);
   void scavengeFrameVirtualRegs(MachineFunction &Fn);
   void insertPrologEpilogCode(MachineFunction &Fn);
-
-  // Convenience for recognizing return blocks.
-  bool isReturnBlock(const MachineBasicBlock *MBB) const;
 };
 } // namespace
 
@@ -128,10 +126,6 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool PEI::isReturnBlock(const MachineBasicBlock* MBB) const {
-  return (MBB && !MBB->empty() && MBB->back().isReturn());
-}
-
 /// Compute the set of return blocks
 void PEI::calculateSets(MachineFunction &Fn) {
   const MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -142,25 +136,25 @@ void PEI::calculateSets(MachineFunction &Fn) {
 
   // Use the points found by shrink-wrapping, if any.
   if (MFI->getSavePoint()) {
-    SaveBlock = MFI->getSavePoint();
+    SaveBlocks.push_back(MFI->getSavePoint());
     assert(MFI->getRestorePoint() && "Both restore and save must be set");
     MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
     // If RestoreBlock does not have any successor and is not a return block
     // then the end point is unreachable and we do not need to insert any
     // epilogue.
-    if (!RestoreBlock->succ_empty() || isReturnBlock(RestoreBlock))
+    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
       RestoreBlocks.push_back(RestoreBlock);
     return;
   }
 
   // Save refs to entry and return blocks.
-  SaveBlock = Fn.begin();
-  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
-       MBB != E; ++MBB)
-    if (isReturnBlock(MBB))
-      RestoreBlocks.push_back(MBB);
-
-  return;
+  SaveBlocks.push_back(&Fn.front());
+  for (MachineBasicBlock &MBB : Fn) {
+    if (MBB.isEHFuncletEntry())
+      SaveBlocks.push_back(&MBB);
+    if (MBB.isReturnBlock())
+      RestoreBlocks.push_back(&MBB);
+  }
 }
 
 /// StackObjSet - A set of stack object indexes
@@ -195,7 +189,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   // place all spills in the entry block, all restores in return blocks.
   calculateSets(Fn);
 
-  // Add the code to save and restore the callee saved registers
+  // Add the code to save and restore the callee saved registers.
   if (!F->hasFnAttribute(Attribute::Naked))
     insertCSRSpillsAndRestores(Fn);
 
@@ -237,6 +231,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   }
 
   delete RS;
+  SaveBlocks.clear();
   RestoreBlocks.clear();
   return true;
 }
@@ -407,7 +402,7 @@ static void updateLiveness(MachineFunction &MF) {
     const MachineBasicBlock *CurBB = WorkList.pop_back_val();
     // By construction, the region that is after the save point is
     // dominated by the Save and post-dominated by the Restore.
-    if (CurBB == Save)
+    if (CurBB == Save && Save != Restore)
       continue;
     // Enqueue all the successors not already visited.
     // Those are by construction either before Save or after Restore.
@@ -419,10 +414,13 @@ static void updateLiveness(MachineFunction &MF) {
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-    for (MachineBasicBlock *MBB : Visited)
+    for (MachineBasicBlock *MBB : Visited) {
+      MCPhysReg Reg = CSI[i].getReg();
       // Add the callee-saved register as live-in.
       // It's killed at the spill.
-      MBB->addLiveIn(CSI[i].getReg());
+      if (!MBB->isLiveIn(Reg))
+        MBB->addLiveIn(Reg);
+    }
   }
 }
 
@@ -446,18 +444,20 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
   MachineBasicBlock::iterator I;
 
   // Spill using target interface.
-  I = SaveBlock->begin();
-  if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
-    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
-      // Insert the spill to the stack frame.
-      unsigned Reg = CSI[i].getReg();
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-      TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
-                              RC, TRI);
+  for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+    I = SaveBlock->begin();
+    if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+        // Insert the spill to the stack frame.
+        unsigned Reg = CSI[i].getReg();
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+                                RC, TRI);
+      }
     }
+    // Update the live-in information of all the blocks up to the save point.
+    updateLiveness(Fn);
   }
-  // Update the live-in information of all the blocks up to the save point.
-  updateLiveness(Fn);
 
   // Restore using target interface.
   for (MachineBasicBlock *MBB : RestoreBlocks) {
@@ -500,7 +500,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
 static inline void
 AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
-                  unsigned &MaxAlign) {
+                  unsigned &MaxAlign, unsigned Skew) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
     Offset += MFI->getObjectSize(FrameIdx);
@@ -512,7 +512,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
   MaxAlign = std::max(MaxAlign, Align);
 
   // Adjust to alignment boundary.
-  Offset = (Offset + Align - 1) / Align * Align;
+  Offset = RoundUpToAlignment(Offset, Align, Skew);
 
   if (StackGrowsDown) {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
@@ -530,12 +530,12 @@ static void
 AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                       SmallSet<int, 16> &ProtectedObjs,
                       MachineFrameInfo *MFI, bool StackGrowsDown,
-                      int64_t &Offset, unsigned &MaxAlign) {
+                      int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
 
   for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
         E = UnassignedObjs.end(); I != E; ++I) {
     int i = *I;
-    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
     ProtectedObjs.insert(i);
   }
 }
@@ -563,6 +563,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
          && "Local area offset should be in direction of stack growth");
   int64_t Offset = LocalAreaOffset;
 
+  // Skew to be applied to alignment.
+  unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
   // If there are fixed sized objects that are preallocated in the local area,
   // non-fixed objects can't be allocated right at the start of local area.
   // We currently don't support filling in holes in between fixed sized
@@ -593,7 +596,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = RoundUpToAlignment(Offset, Align);
+      Offset = RoundUpToAlignment(Offset, Align, Skew);
 
       MFI->setObjectOffset(i, -Offset);        // Set the computed offset
     }
@@ -602,7 +605,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
-      Offset = RoundUpToAlignment(Offset, Align);
+      Offset = RoundUpToAlignment(Offset, Align, Skew);
 
       MFI->setObjectOffset(i, Offset);
       Offset += MFI->getObjectSize(i);
@@ -624,7 +627,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     RS->getScavengingFrameIndices(SFIs);
     for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
            IE = SFIs.end(); I != IE; ++I)
-      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
   }
 
   // FIXME: Once this is working, then enable flag will change to a target
@@ -635,7 +638,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     unsigned Align = MFI->getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
-    Offset = RoundUpToAlignment(Offset, Align);
+    Offset = RoundUpToAlignment(Offset, Align, Skew);
 
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
@@ -662,7 +665,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     StackObjSet AddrOfObjs;
 
     AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
-                      Offset, MaxAlign);
+                      Offset, MaxAlign, Skew);
 
     // Assign large stack objects first.
     for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
@@ -695,11 +698,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     }
 
     AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
-                          Offset, MaxAlign);
+                          Offset, MaxAlign, Skew);
     AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
-                          Offset, MaxAlign);
+                          Offset, MaxAlign, Skew);
     AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
-                          Offset, MaxAlign);
+                          Offset, MaxAlign, Skew);
   }
 
   // Then assign frame offsets to stack objects that are not used to spill
@@ -719,7 +722,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     if (ProtectedObjs.count(i))
       continue;
 
-    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
   }
 
   // Make sure the special register scavenging spill slot is closest to the
@@ -729,7 +732,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     RS->getScavengingFrameIndices(SFIs);
     for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
            IE = SFIs.end(); I != IE; ++I)
-      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign);
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
   }
 
   if (!TFI.targetHandlesStackFrameRounding()) {
@@ -754,7 +757,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If the frame pointer is eliminated, all frame offsets will be relative to
     // SP not FP. Align to MaxAlign so this works.
     StackAlign = std::max(StackAlign, MaxAlign);
-    Offset = RoundUpToAlignment(Offset, StackAlign);
+    Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
   }
 
   // Update frame info to pretend that this is part of the stack...
@@ -771,18 +774,24 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
 
   // Add prologue to the function...
-  TFI.emitPrologue(Fn, *SaveBlock);
+  for (MachineBasicBlock *SaveBlock : SaveBlocks)
+    TFI.emitPrologue(Fn, *SaveBlock);
 
   // Add epilogue to restore the callee-save registers in each exiting block.
   for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
     TFI.emitEpilogue(Fn, *RestoreBlock);
 
+  for (MachineBasicBlock *SaveBlock : SaveBlocks)
+    TFI.inlineStackProbe(Fn, *SaveBlock);
+
   // Emit additional code that is required to support segmented stacks, if
   // we've been asked for it.  This, when linked with a runtime with support
   // for segmented stacks (libgcc is one), will result in allocating stack
   // space in small chunks instead of one large contiguous block.
-  if (Fn.shouldSplitStack())
-    TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+  if (Fn.shouldSplitStack()) {
+    for (MachineBasicBlock *SaveBlock : SaveBlocks)
+      TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+  }
 
   // Emit additional code that is required to explicitly handle the stack in
   // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
@@ -790,7 +799,8 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
   // different conditional check and another BIF for allocating more stack
   // space.
   if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
-    TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+    for (MachineBasicBlock *SaveBlock : SaveBlocks)
+      TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
 }
 
 /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
@@ -800,25 +810,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   if (!TFI.needsFrameIndexResolution(Fn)) return;
 
-  MachineModuleInfo &MMI = Fn.getMMI();
-  const Function *F = Fn.getFunction();
-  const Function *ParentF = MMI.getWinEHParent(F);
-  unsigned FrameReg;
-  if (F == ParentF) {
-    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
-    // FIXME: This should be unconditional but we have bugs in the preparation
-    // pass.
-    if (FuncInfo.UnwindHelpFrameIdx != INT_MAX)
-      FuncInfo.UnwindHelpFrameOffset = TFI.getFrameIndexReferenceFromSP(
-          Fn, FuncInfo.UnwindHelpFrameIdx, FrameReg);
-  } else if (MMI.hasWinEHFuncInfo(F)) {
-    WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(Fn.getFunction());
-    auto I = FuncInfo.CatchHandlerParentFrameObjIdx.find(F);
-    if (I != FuncInfo.CatchHandlerParentFrameObjIdx.end())
-      FuncInfo.CatchHandlerParentFrameObjOffset[F] =
-          TFI.getFrameIndexReferenceFromSP(Fn, I->second, FrameReg);
-  }
-
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
@@ -841,12 +832,12 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   }
 
   // Handle the unreachable blocks.
-  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
-    if (Reachable.count(BB))
+  for (auto &BB : Fn) {
+    if (Reachable.count(&BB))
       // Already handled in DFS traversal.
       continue;
     int SPAdj = 0;
-    replaceFrameIndices(BB, Fn, SPAdj);
+    replaceFrameIndices(&BB, Fn, SPAdj);
   }
 }
 
@@ -889,11 +880,11 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
       if (!MI->getOperand(i).isFI())
         continue;
 
-      // Frame indicies in debug values are encoded in a target independent
+      // Frame indices in debug values are encoded in a target independent
       // way with simply the frame index and offset rather than any
       // target-specific addressing mode.
       if (MI->isDebugValue()) {
-        assert(i == 0 && "Frame indicies can only appear as the first "
+        assert(i == 0 && "Frame indices can only appear as the first "
                          "operand of a DBG_VALUE machine instruction");
         unsigned Reg;
         MachineOperand &Offset = MI->getOperand(1);
@@ -979,7 +970,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
   // Run through the instructions and find any virtual registers.
   for (MachineFunction::iterator BB = Fn.begin(),
        E = Fn.end(); BB != E; ++BB) {
-    RS->enterBasicBlock(BB);
+    RS->enterBasicBlock(&*BB);
 
     int SPAdj = 0;
 
@@ -1026,12 +1017,8 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
           // Replace this reference to the virtual register with the
           // scratch register.
           assert (ScratchReg && "Missing scratch register!");
-          MachineRegisterInfo &MRI = Fn.getRegInfo();
           Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
           
-          // Make sure MRI now accounts this register as used.
-          MRI.setPhysRegUsed(ScratchReg);
-
           // Because this instruction was processed by the RS before this
           // register was allocated, make sure that the RS now records the
           // register as being used.
@@ -1044,7 +1031,7 @@ PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
       // problem because we need the spill code before I: Move I to just
       // prior to J.
       if (I != std::prev(J)) {
-        BB->splice(J, BB, I);
+        BB->splice(J, &*BB, I);
 
         // Before we move I, we need to prepare the RS to visit I again.
         // Specifically, RS will assert if it sees uses of registers that
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index b1c341d3a681..1f46417e61e7 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -22,87 +23,38 @@
 #include <map>
 using namespace llvm;
 
-namespace {
-struct PSVGlobalsTy {
-  // PseudoSourceValues are immutable so don't need locking.
-  const PseudoSourceValue PSVs[4];
-  sys::Mutex Lock;  // Guards FSValues, but not the values inside it.
-  std::map<int, const PseudoSourceValue *> FSValues;
-
-  PSVGlobalsTy() : PSVs() {}
-  ~PSVGlobalsTy() {
-    for (std::map<int, const PseudoSourceValue *>::iterator
-           I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
-      delete I->second;
-    }
-  }
-};
-
-static ManagedStatic<PSVGlobalsTy> PSVGlobals;
-
-}  // anonymous namespace
-
-const PseudoSourceValue *PseudoSourceValue::getStack()
-{ return &PSVGlobals->PSVs[0]; }
-const PseudoSourceValue *PseudoSourceValue::getGOT()
-{ return &PSVGlobals->PSVs[1]; }
-const PseudoSourceValue *PseudoSourceValue::getJumpTable()
-{ return &PSVGlobals->PSVs[2]; }
-const PseudoSourceValue *PseudoSourceValue::getConstantPool()
-{ return &PSVGlobals->PSVs[3]; }
-
 static const char *const PSVNames[] = {
-  "Stack",
-  "GOT",
-  "JumpTable",
-  "ConstantPool"
-};
+    "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack",
+    "GlobalValueCallEntry", "ExternalSymbolCallEntry"};
 
-PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {}
+PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {}
 
 PseudoSourceValue::~PseudoSourceValue() {}
 
 void PseudoSourceValue::printCustom(raw_ostream &O) const {
-  O << PSVNames[this - PSVGlobals->PSVs];
-}
-
-const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
-  PSVGlobalsTy &PG = *PSVGlobals;
-  sys::ScopedLock locked(PG.Lock);
-  const PseudoSourceValue *&V = PG.FSValues[FI];
-  if (!V)
-    V = new FixedStackPseudoSourceValue(FI);
-  return V;
+  O << PSVNames[Kind];
 }
 
 bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
-  if (this == getStack())
+  if (isStack())
     return false;
-  if (this == getGOT() ||
-      this == getConstantPool() ||
-      this == getJumpTable())
+  if (isGOT() || isConstantPool() || isJumpTable())
     return true;
   llvm_unreachable("Unknown PseudoSourceValue!");
 }
 
-bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
-  if (this == getStack() ||
-      this == getGOT() ||
-      this == getConstantPool() ||
-      this == getJumpTable())
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+  if (isStack() || isGOT() || isConstantPool() || isJumpTable())
     return false;
   llvm_unreachable("Unknown PseudoSourceValue!");
 }
 
-bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
-  if (this == getGOT() ||
-      this == getConstantPool() ||
-      this == getJumpTable())
-    return false;
-  return true;
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+  return !(isGOT() || isConstantPool() || isJumpTable());
 }
 
-bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+bool FixedStackPseudoSourceValue::isConstant(
+    const MachineFrameInfo *MFI) const {
   return MFI && MFI->isImmutableObjectIndex(FI);
 }
 
@@ -122,3 +74,69 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
 void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
   OS << "FixedStack" << FI;
 }
+
+CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind)
+    : PseudoSourceValue(Kind) {}
+
+bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+  return false;
+}
+
+bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const {
+  return false;
+}
+
+bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const {
+  return false;
+}
+
+GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue(
+    const GlobalValue *GV)
+    : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {}
+
+ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES)
+    : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {}
+
+PseudoSourceValueManager::PseudoSourceValueManager()
+    : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT),
+      JumpTablePSV(PseudoSourceValue::JumpTable),
+      ConstantPoolPSV(PseudoSourceValue::ConstantPool) {}
+
+const PseudoSourceValue *PseudoSourceValueManager::getStack() {
+  return &StackPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; }
+
+const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() {
+  return &ConstantPoolPSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() {
+  return &JumpTablePSV;
+}
+
+const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) {
+  std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
+  if (!V)
+    V = llvm::make_unique<FixedStackPseudoSourceValue>(FI);
+  return V.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
+  std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
+      GlobalCallEntries[GV];
+  if (!E)
+    E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV);
+  return E.get();
+}
+
+const PseudoSourceValue *
+PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
+  std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
+      ExternalCallEntries[ES];
+  if (!E)
+    E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES);
+  return E.get();
+}
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 0090332a8123..cfe367d5115c 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -133,8 +133,8 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
 
 void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
   AU.addPreserved<SlotIndexes>();
@@ -223,7 +223,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
   SmallVector<unsigned, 8> PhysRegSpillCands;
 
   // Check for an available register in this class.
-  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
   while (unsigned PhysReg = Order.next()) {
     // Check for interference in PhysReg
     switch (Matrix->checkInterference(VirtReg, PhysReg)) {
@@ -276,7 +276,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) {
                      getAnalysis<LiveIntervals>(),
                      getAnalysis<LiveRegMatrix>());
 
-  calculateSpillWeightsAndHints(*LIS, *MF,
+  calculateSpillWeightsAndHints(*LIS, *MF, VRM,
                                 getAnalysis<MachineLoopInfo>(),
                                 getAnalysis<MachineBlockFrequencyInfo>());
 
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fd3d4d78968b..f4c076fea0e7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -799,10 +799,9 @@ void RAFast::AllocateBasicBlock() {
   MachineBasicBlock::iterator MII = MBB->begin();
 
   // Add live-in registers as live.
-  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
-         E = MBB->livein_end(); I != E; ++I)
-    if (MRI->isAllocatable(*I))
-      definePhysReg(MII, *I, regReserved);
+  for (const auto &LI : MBB->liveins())
+    if (MRI->isAllocatable(LI.PhysReg))
+      definePhysReg(MII, LI.PhysReg, regReserved);
 
   SmallVector<unsigned, 8> VirtDead;
   SmallVector<MachineInstr*, 32> Coalesced;
@@ -986,10 +985,6 @@ void RAFast::AllocateBasicBlock() {
       }
     }
 
-    for (UsedInInstrSet::iterator
-         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setRegUnitUsed(*I);
-
     // Track registers defined by instruction - early clobbers and tied uses at
     // this point.
     UsedInInstr.clear();
@@ -1050,10 +1045,6 @@ void RAFast::AllocateBasicBlock() {
       killVirtReg(VirtDead[i]);
     VirtDead.clear();
 
-    for (UsedInInstrSet::iterator
-         I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I)
-      MRI->setRegUnitUsed(*I);
-
     if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
       DEBUG(dbgs() << "-- coalescing: " << *MI);
       Coalesced.push_back(MI);
@@ -1103,12 +1094,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
     AllocateBasicBlock();
   }
 
-  // Add the clobber lists for all the instructions we skipped earlier.
-  for (const MCInstrDesc *Desc : SkippedInstrs)
-    if (const uint16_t *Defs = Desc->getImplicitDefs())
-      while (*Defs)
-        MRI->setPhysRegUsed(*Defs++);
-
   // All machine operands and other references to virtual registers have been
   // replaced. Remove the virtual registers.
   MRI->clearVirtRegs();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 7ebcf7f54856..945cb9e2c993 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -86,6 +86,14 @@ static cl::opt<bool> EnableLocalReassignment(
              "may be compile time intensive"),
     cl::init(false));
 
+static cl::opt<bool> EnableDeferredSpilling(
+    "enable-deferred-spilling", cl::Hidden,
+    cl::desc("Instead of spilling a variable right away, defer the actual "
+             "code insertion to the end of the allocation. That way the "
+             "allocator might still find a suitable coloring for this "
+             "variable because of other evicted variables."),
+    cl::init(false));
+
 // FIXME: Find a good default for this flag and remove the flag.
 static cl::opt<unsigned>
 CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -157,6 +165,11 @@ class RAGreedy : public MachineFunctionPass,
     /// Live range will be spilled.  No more splitting will be attempted.
     RS_Spill,
 
+
+    /// Live range is in memory. Because of other evictions, it might get moved
+    /// in a register in the end.
+    RS_Memory,
+
     /// There is nothing more we can do to this live range.  Abort compilation
     /// if it can't be assigned.
     RS_Done
@@ -414,6 +427,7 @@ const char *const RAGreedy::StageName[] = {
     "RS_Split",
     "RS_Split2",
     "RS_Spill",
+    "RS_Memory",
     "RS_Done"
 };
 #endif
@@ -447,8 +461,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addRequired<MachineBlockFrequencyInfo>();
   AU.addPreserved<MachineBlockFrequencyInfo>();
-  AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
+  AU.addPreserved<AAResultsWrapperPass>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
   AU.addRequired<SlotIndexes>();
@@ -536,6 +550,13 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
     // Unsplit ranges that couldn't be allocated immediately are deferred until
     // everything else has been allocated.
     Prio = Size;
+  } else if (ExtraRegInfo[Reg].Stage == RS_Memory) {
+    // Memory operand should be considered last.
+    // Change the priority such that Memory operand are assigned in
+    // the reverse order that they came in.
+    // TODO: Make this a member variable and probably do something about hints.
+    static unsigned MemOp = 0;
+    Prio = MemOp++;
   } else {
     // Giant live ranges fall back to the global assignment heuristic, which
     // prevents excessive spilling in pathological cases.
@@ -637,7 +658,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
 //===----------------------------------------------------------------------===//
 
 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
-  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
   unsigned PhysReg;
   while ((PhysReg = Order.next())) {
     if (PhysReg == PrevReg)
@@ -2450,7 +2471,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
                                      unsigned Depth) {
   unsigned CostPerUseLimit = ~0u;
   // First try assigning a free register.
-  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
+  AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
     // When NewVRegs is not empty, we may have made decisions such as evicting
     // a virtual register, go with the earlier decisions and use the physical
@@ -2512,13 +2533,23 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     return PhysReg;
 
   // Finally spill VirtReg itself.
-  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
-  LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
-  spiller().spill(LRE);
-  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
+  if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
+    // TODO: This is experimental and in particular, we do not model
+    // the live range splitting done by spilling correctly.
+    // We would need a deep integration with the spiller to do the
+    // right thing here. Anyway, that is still good for early testing.
+    setStage(VirtReg, RS_Memory);
+    DEBUG(dbgs() << "Do as if this register is in memory\n");
+    NewVRegs.push_back(VirtReg.reg);
+  } else {
+    NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+    LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
+    spiller().spill(LRE);
+    setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
 
-  if (VerifyEnabled)
-    MF->verify(this, "After spilling");
+    if (VerifyEnabled)
+      MF->verify(this, "After spilling");
+  }
 
   // The live virtual register requesting allocation was spilled, so tell
   // the caller not to allocate anything during this round.
@@ -2555,7 +2586,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
 
   initializeCSRCost();
 
-  calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
+  calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI);
 
   DEBUG(LIS->dump());
 
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index eeff73d0f2a0..fd28b05ed80a 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -47,6 +47,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -497,8 +498,8 @@ void PBQPRAConstraintList::anchor() {}
 
 void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.setPreservesCFG();
-  au.addRequired<AliasAnalysis>();
-  au.addPreserved<AliasAnalysis>();
+  au.addRequired<AAResultsWrapperPass>();
+  au.addPreserved<AAResultsWrapperPass>();
   au.addRequired<SlotIndexes>();
   au.addPreserved<SlotIndexes>();
   au.addRequired<LiveIntervals>();
@@ -724,11 +725,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   MachineBlockFrequencyInfo &MBFI =
     getAnalysis<MachineBlockFrequencyInfo>();
 
-  calculateSpillWeightsAndHints(LIS, MF, getAnalysis<MachineLoopInfo>(), MBFI,
-                                normalizePBQPSpillWeight);
-
   VirtRegMap &VRM = getAnalysis<VirtRegMap>();
 
+  calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(),
+                                MBFI, normalizePBQPSpillWeight);
+
   std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM));
 
   MF.getRegInfo().freezeReservedRegs(MF);
@@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
   return true;
 }
 
-namespace {
-// A helper class for printing node and register info in a consistent way
-class PrintNodeInfo {
-public:
-  typedef PBQP::RegAlloc::PBQPRAGraph Graph;
-  typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId;
-
-  PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {}
-
-  void print(raw_ostream &OS) const {
+/// Create Printable object for node and register info.
+static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId,
+                               const PBQP::RegAlloc::PBQPRAGraph &G) {
+  return Printable([NId, &G](raw_ostream &OS) {
     const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo();
     const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
     unsigned VReg = G.getNodeMetadata(NId).getVReg();
     const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg));
     OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')';
-  }
-
-private:
-  const Graph &G;
-  NodeId NId;
-};
-
-inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) {
-  PR.print(OS);
-  return OS;
+  });
 }
-} // anonymous namespace
 
 void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const {
   for (auto NId : nodeIds()) {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index c911b9b47ea2..e7b32179bde5 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -32,7 +32,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
@@ -93,7 +92,7 @@ namespace {
 
     /// A LaneMask to remember on which subregister live ranges we need to call
     /// shrinkToUses() later.
-    unsigned ShrinkMask;
+    LaneBitmask ShrinkMask;
 
     /// True if the main range of the currently coalesced intervals should be
     /// checked for smaller live intervals.
@@ -164,15 +163,13 @@ namespace {
     /// LaneMask are split as necessary. @p LaneMask are the lanes that
     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
     /// lanemasks already adjusted to the coalesced register.
-    /// @returns false if live range conflicts couldn't get resolved.
-    bool mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
-                           unsigned LaneMask, CoalescerPair &CP);
+    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
+                           LaneBitmask LaneMask, CoalescerPair &CP);
 
     /// Join the liveranges of two subregisters. Joins @p RRange into
     /// @p LRange, @p RRange may be invalid afterwards.
-    /// @returns false if live range conflicts couldn't get resolved.
-    bool joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
-                          unsigned LaneMask, const CoalescerPair &CP);
+    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+                          LaneBitmask LaneMask, const CoalescerPair &CP);
 
     /// We found a non-trivially-coalescable copy. If the source value number is
     /// defined by a copy from the destination reg see if we can merge these two
@@ -224,30 +221,17 @@ namespace {
     /// Dst, we can drop \p Copy.
     bool applyTerminalRule(const MachineInstr &Copy) const;
 
-    /// Check whether or not \p LI is composed by multiple connected
-    /// components and if that is the case, fix that.
-    void splitNewRanges(LiveInterval *LI) {
-      ConnectedVNInfoEqClasses ConEQ(*LIS);
-      unsigned NumComps = ConEQ.Classify(LI);
-      if (NumComps <= 1)
-        return;
-      SmallVector<LiveInterval*, 8> NewComps(1, LI);
-      for (unsigned i = 1; i != NumComps; ++i) {
-        unsigned VReg = MRI->createVirtualRegister(MRI->getRegClass(LI->reg));
-        NewComps.push_back(&LIS->createEmptyInterval(VReg));
-      }
-
-      ConEQ.Distribute(&NewComps[0], *MRI);
-    }
-
     /// Wrapper method for \see LiveIntervals::shrinkToUses.
     /// This method does the proper fixing of the live-ranges when the afore
     /// mentioned method returns true.
     void shrinkToUses(LiveInterval *LI,
                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
-      if (LIS->shrinkToUses(LI, Dead))
-        // We may have created multiple connected components, split them.
-        splitNewRanges(LI);
+      if (LIS->shrinkToUses(LI, Dead)) {
+        /// Check whether or not \p LI is composed by multiple connected
+        /// components and if that is the case, fix that.
+        SmallVector<LiveInterval*, 8> SplitLIs;
+        LIS->splitSeparateComponents(*LI, SplitLIs);
+      }
     }
 
   public:
@@ -275,7 +259,7 @@ INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
                     "Simple Register Coalescing", false, false)
 
@@ -453,7 +437,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
 
 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<LiveIntervals>();
   AU.addPreserved<LiveIntervals>();
   AU.addPreserved<SlotIndexes>();
@@ -679,14 +663,18 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   unsigned UseOpIdx;
   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
     return false;
-  unsigned Op1, Op2, NewDstIdx;
-  if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
-    return false;
-  if (Op1 == UseOpIdx)
-    NewDstIdx = Op2;
-  else if (Op2 == UseOpIdx)
-    NewDstIdx = Op1;
-  else
+
+  // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
+  // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
+  // passed to the method. That _other_ operand is chosen by
+  // the findCommutedOpIndices() method.
+  //
+  // That is obviously an area for improvement in case of instructions having
+  // more than 2 operands. For example, if some instruction has 3 commutable
+  // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
+  // op#2<->op#3) of commute transformation should be considered/tried here.
+  unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
+  if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx))
     return false;
 
   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
@@ -719,7 +707,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   // At this point we have decided that it is legal to do this
   // transformation.  Start by commuting the instruction.
   MachineBasicBlock *MBB = DefMI->getParent();
-  MachineInstr *NewMI = TII->commuteInstruction(DefMI);
+  MachineInstr *NewMI =
+      TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx);
   if (!NewMI)
     return false;
   if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
@@ -804,7 +793,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   if (IntB.hasSubRanges()) {
     if (!IntA.hasSubRanges()) {
-      unsigned Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
+      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
       IntA.createSubRangeFrom(Allocator, Mask, IntA);
     }
     SlotIndex AIdx = CopyIdx.getRegSlot(true);
@@ -812,20 +801,21 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
       assert(ASubValNo != nullptr);
 
-      unsigned AMask = SA.LaneMask;
+      LaneBitmask AMask = SA.LaneMask;
       for (LiveInterval::SubRange &SB : IntB.subranges()) {
-        unsigned BMask = SB.LaneMask;
-        unsigned Common = BMask & AMask;
+        LaneBitmask BMask = SB.LaneMask;
+        LaneBitmask Common = BMask & AMask;
         if (Common == 0)
           continue;
 
-        DEBUG(
-            dbgs() << format("\t\tCopy+Merge %04X into %04X\n", BMask, Common));
-        unsigned BRest = BMask & ~AMask;
+        DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
+                      << " into " << PrintLaneMask(Common) << '\n');
+        LaneBitmask BRest = BMask & ~AMask;
         LiveInterval::SubRange *CommonRange;
         if (BRest != 0) {
           SB.LaneMask = BRest;
-          DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", BRest));
+          DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
+                       << '\n');
           // Duplicate SubRange for newly merged common stuff.
           CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB);
         } else {
@@ -842,7 +832,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
         AMask &= ~BMask;
       }
       if (AMask != 0) {
-        DEBUG(dbgs() << format("\t\tNew Lane %04X\n", AMask));
+        DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
         LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
         VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
         addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo);
@@ -1107,7 +1097,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
   // CopyMI is undef iff SrcReg is not live before the instruction.
   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
-    unsigned SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
+    LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
       if ((SR.LaneMask & SrcMask) == 0)
         continue;
@@ -1128,7 +1118,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     DstLI.MergeValueNumberInto(VNI, PrevVNI);
 
     // The affected subregister segments can be removed.
-    unsigned DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
+    LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
       if ((SR.LaneMask & DstMask) == 0)
         continue;
@@ -1147,7 +1137,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
       continue;
     const MachineInstr &MI = *MO.getParent();
     SlotIndex UseIdx = LIS->getInstructionIndex(&MI);
-    unsigned UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
+    LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
     bool isLive;
     if (UseMask != ~0u && DstLI.hasSubRanges()) {
       isLive = false;
@@ -1213,10 +1203,10 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
       if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
         if (!DstInt->hasSubRanges()) {
           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
-          unsigned Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
+          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
           DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
         }
-        unsigned Mask = TRI->getSubRegIndexLaneMask(SubIdx);
+        LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx);
         bool IsUndef = true;
         SlotIndex MIIdx = UseMI->isDebugValue()
           ? LIS->getSlotIndexes()->getIndexBefore(UseMI)
@@ -1445,8 +1435,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     for (LiveInterval::SubRange &S : LI.subranges()) {
       if ((S.LaneMask & ShrinkMask) == 0)
         continue;
-      DEBUG(dbgs() << "Shrink LaneUses (Lane "
-                   << format("%04X", S.LaneMask) << ")\n");
+      DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
+                   << ")\n");
       LIS->shrinkToUses(S, LI.reg);
     }
     LI.removeEmptySubRanges();
@@ -1644,7 +1634,7 @@ class JoinVals {
   const unsigned SubIdx;
   /// The LaneMask that this liverange will occupy the coalesced register. May
   /// be smaller than the lanemask produced by SubIdx when merging subranges.
-  const unsigned LaneMask;
+  const LaneBitmask LaneMask;
 
   /// This is true when joining sub register ranges, false when joining main
   /// ranges.
@@ -1699,11 +1689,11 @@ class JoinVals {
     ConflictResolution Resolution;
 
     /// Lanes written by this def, 0 for unanalyzed values.
-    unsigned WriteLanes;
+    LaneBitmask WriteLanes;
 
     /// Lanes with defined values in this register. Other lanes are undef and
     /// safe to clobber.
-    unsigned ValidLanes;
+    LaneBitmask ValidLanes;
 
     /// Value in LI being redefined by this def.
     VNInfo *RedefVNI;
@@ -1744,7 +1734,7 @@ class JoinVals {
   /// Compute the bitmask of lanes actually written by DefMI.
   /// Set Redef if there are any partial register definitions that depend on the
   /// previous value of the register.
-  unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
+  LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
 
   /// Find the ultimate value that VNI was copied from.
   std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
@@ -1780,12 +1770,12 @@ class JoinVals {
   /// entry to TaintedVals.
   ///
   /// Returns false if the tainted lanes extend beyond the basic block.
-  bool taintExtent(unsigned, unsigned, JoinVals&,
-                   SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
+  bool taintExtent(unsigned, LaneBitmask, JoinVals&,
+                   SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&);
 
   /// Return true if MI uses any of the given Lanes from Reg.
   /// This does not include partial redefinitions of Reg.
-  bool usesLanes(const MachineInstr *MI, unsigned, unsigned, unsigned) const;
+  bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const;
 
   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
   /// be pruned:
@@ -1796,7 +1786,7 @@ class JoinVals {
   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
 
 public:
-  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, unsigned LaneMask,
+  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
            SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
            bool TrackSubRegLiveness)
@@ -1822,8 +1812,8 @@ public:
 
   /// Removes subranges starting at copies that get removed. This sometimes
   /// happens when undefined subranges are copied around. These ranges contain
-  /// no usefull information and can be removed.
-  void pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask);
+  /// no useful information and can be removed.
+  void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
 
   /// Erase any machine instructions that have been coalesced away.
   /// Add erased instructions to ErasedInstrs.
@@ -1840,9 +1830,9 @@ public:
 };
 } // end anonymous namespace
 
-unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
+LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   const {
-  unsigned L = 0;
+  LaneBitmask L = 0;
   for (const MachineOperand &MO : DefMI->operands()) {
     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
       continue;
@@ -1879,7 +1869,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
       ValueIn = nullptr;
       for (const LiveInterval::SubRange &S : LI.subranges()) {
         // Transform lanemask to a mask in the joined live interval.
-        unsigned SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
+        LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
         if ((SMask & LaneMask) == 0)
           continue;
         LiveQueryResult LRQ = S.Query(Def);
@@ -1928,7 +1918,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   const MachineInstr *DefMI = nullptr;
   if (VNI->isPHIDef()) {
     // Conservatively assume that all lanes in a PHI are valid.
-    unsigned Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+    LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
     V.ValidLanes = V.WriteLanes = Lanes;
   } else {
     DefMI = Indexes->getInstructionFromIndex(VNI->def);
@@ -2190,8 +2180,8 @@ bool JoinVals::mapValues(JoinVals &Other) {
 }
 
 bool JoinVals::
-taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
-            SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
+taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
+            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) {
   VNInfo *VNI = LR.getValNumInfo(ValNo);
   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
@@ -2230,7 +2220,7 @@ taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
 }
 
 bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx,
-                         unsigned Lanes) const {
+                         LaneBitmask Lanes) const {
   if (MI->isDebugValue())
     return false;
   for (const MachineOperand &MO : MI->operands()) {
@@ -2264,8 +2254,8 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
     // join, those lanes will be tainted with a wrong value. Get the extent of
     // the tainted lanes.
-    unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
-    SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
+    LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
+    SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
       // Tainted lanes would extend beyond the basic block.
       return false;
@@ -2384,7 +2374,7 @@ void JoinVals::pruneValues(JoinVals &Other,
   }
 }
 
-void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
 {
   // Look for values being erased.
   bool DidPrune = false;
@@ -2401,7 +2391,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
       // copied and we must remove that subrange value as well.
       VNInfo *ValueOut = Q.valueOutOrDead();
       if (ValueOut != nullptr && Q.valueIn() == nullptr) {
-        DEBUG(dbgs() << "\t\tPrune sublane " << format("%04X", S.LaneMask)
+        DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
                      << " at " << Def << "\n");
         LIS->pruneValue(S, Def, nullptr);
         DidPrune = true;
@@ -2410,10 +2400,10 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, unsigned &ShrinkMask)
         continue;
       }
       // If a subrange ends at the copy, then a value was copied but only
-      // partially used later. Shrink the subregister range apropriately.
+      // partially used later. Shrink the subregister range appropriately.
       if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
-        DEBUG(dbgs() << "\t\tDead uses at sublane "
-                     << format("%04X", S.LaneMask) << " at " << Def << "\n");
+        DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
+                     << " at " << Def << "\n");
         ShrinkMask |= S.LaneMask;
       }
     }
@@ -2477,8 +2467,8 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   }
 }
 
-bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
-                                         unsigned LaneMask,
+void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
+                                         LaneBitmask LaneMask,
                                          const CoalescerPair &CP) {
   SmallVector<VNInfo*, 16> NewVNInfo;
   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
@@ -2492,13 +2482,15 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
   // interferences.
   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
-    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
-    return false;
+    // We already determined that it is legal to merge the intervals, so this
+    // should never fail.
+    llvm_unreachable("*** Couldn't join subrange!\n");
   }
   if (!LHSVals.resolveConflicts(RHSVals) ||
       !RHSVals.resolveConflicts(LHSVals)) {
-    DEBUG(dbgs() << "*** Couldn't join subrange!\n");
-    return false;
+    // We already determined that it is legal to merge the intervals, so this
+    // should never fail.
+    llvm_unreachable("*** Couldn't join subrange!\n");
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2521,36 +2513,37 @@ bool RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 
   DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
   if (EndPoints.empty())
-    return true;
+    return;
 
   // Recompute the parts of the live range we had to remove because of
   // CR_Replace conflicts.
   DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
                << " points: " << LRange << '\n');
   LIS->extendToIndices(LRange, EndPoints);
-  return true;
 }
 
-bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
+void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
                                           const LiveRange &ToMerge,
-                                          unsigned LaneMask, CoalescerPair &CP) {
+                                          LaneBitmask LaneMask,
+                                          CoalescerPair &CP) {
   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   for (LiveInterval::SubRange &R : LI.subranges()) {
-    unsigned RMask = R.LaneMask;
+    LaneBitmask RMask = R.LaneMask;
     // LaneMask of subregisters common to subrange R and ToMerge.
-    unsigned Common = RMask & LaneMask;
+    LaneBitmask Common = RMask & LaneMask;
     // There is nothing to do without common subregs.
     if (Common == 0)
       continue;
 
-    DEBUG(dbgs() << format("\t\tCopy+Merge %04X into %04X\n", RMask, Common));
+    DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
+                 << PrintLaneMask(Common) << '\n');
     // LaneMask of subregisters contained in the R range but not in ToMerge,
     // they have to split into their own subrange.
-    unsigned LRest = RMask & ~LaneMask;
+    LaneBitmask LRest = RMask & ~LaneMask;
     LiveInterval::SubRange *CommonRange;
     if (LRest != 0) {
       R.LaneMask = LRest;
-      DEBUG(dbgs() << format("\t\tReduce Lane to %04X\n", LRest));
+      DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
       // Duplicate SubRange for newly merged common stuff.
       CommonRange = LI.createSubRangeFrom(Allocator, Common, R);
     } else {
@@ -2559,16 +2552,14 @@ bool RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
       CommonRange = &R;
     }
     LiveRange RangeCopy(ToMerge, Allocator);
-    if (!joinSubRegRanges(*CommonRange, RangeCopy, Common, CP))
-      return false;
+    joinSubRegRanges(*CommonRange, RangeCopy, Common, CP);
     LaneMask &= ~RMask;
   }
 
   if (LaneMask != 0) {
-    DEBUG(dbgs() << format("\t\tNew Lane %04X\n", LaneMask));
+    DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
     LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
   }
-  return true;
 }
 
 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -2602,15 +2593,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
     // create initial subranges if necessary.
     unsigned DstIdx = CP.getDstIdx();
     if (!LHS.hasSubRanges()) {
-      unsigned Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
-                                  : TRI->getSubRegIndexLaneMask(DstIdx);
+      LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
+                                     : TRI->getSubRegIndexLaneMask(DstIdx);
       // LHS must support subregs or we wouldn't be in this codepath.
       assert(Mask != 0);
       LHS.createSubRangeFrom(Allocator, Mask, LHS);
     } else if (DstIdx != 0) {
       // Transform LHS lanemasks to new register class if necessary.
       for (LiveInterval::SubRange &R : LHS.subranges()) {
-        unsigned Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
+        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
         R.LaneMask = Mask;
       }
     }
@@ -2619,41 +2610,21 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
 
     // Determine lanemasks of RHS in the coalesced register and merge subranges.
     unsigned SrcIdx = CP.getSrcIdx();
-    bool Abort = false;
     if (!RHS.hasSubRanges()) {
-      unsigned Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
-                                  : TRI->getSubRegIndexLaneMask(SrcIdx);
-      if (!mergeSubRangeInto(LHS, RHS, Mask, CP))
-        Abort = true;
+      LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
+                                     : TRI->getSubRegIndexLaneMask(SrcIdx);
+      mergeSubRangeInto(LHS, RHS, Mask, CP);
     } else {
       // Pair up subranges and merge.
       for (LiveInterval::SubRange &R : RHS.subranges()) {
-        unsigned Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
-        if (!mergeSubRangeInto(LHS, R, Mask, CP)) {
-          Abort = true;
-          break;
-        }
+        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
+        mergeSubRangeInto(LHS, R, Mask, CP);
       }
     }
-    if (Abort) {
-      // This shouldn't have happened :-(
-      // However we are aware of at least one existing problem where we
-      // can't merge subranges when multiple ranges end up in the
-      // "overflow bit" 32. As a workaround we drop all subregister ranges
-      // which means we loose some precision but are back to a well defined
-      // state.
-      assert(TargetRegisterInfo::isImpreciseLaneMask(
-             CP.getNewRC()->getLaneMask())
-             && "SubRange merge should only fail when merging into bit 32.");
-      DEBUG(dbgs() << "\tSubrange join aborted!\n");
-      LHS.clearSubRanges();
-      RHS.clearSubRanges();
-    } else {
-      DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
+    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
 
-      LHSVals.pruneSubRegValues(LHS, ShrinkMask);
-      RHSVals.pruneSubRegValues(LHS, ShrinkMask);
-    }
+    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
+    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
 
   // The merging algorithm in LiveInterval::join() can't handle conflicting
@@ -2799,7 +2770,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
       !isTerminalReg(DstReg, Copy, MRI))
     return false;
 
-  // DstReg is a terminal node. Check if it inteferes with any other
+  // DstReg is a terminal node. Check if it interferes with any other
   // copy involving SrcReg.
   const MachineBasicBlock *OrigBB = Copy.getParent();
   const LiveInterval &DstLI = LIS->getInterval(DstReg);
@@ -2904,7 +2875,7 @@ void RegisterCoalescer::joinAllIntervals() {
   std::vector<MBBPriorityInfo> MBBs;
   MBBs.reserve(MF->size());
   for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
-    MachineBasicBlock *MBB = I;
+    MachineBasicBlock *MBB = &*I;
     MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
                                    JoinSplitEdges && isSplitEdge(MBB)));
   }
@@ -2943,7 +2914,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   TRI = STI.getRegisterInfo();
   TII = STI.getInstrInfo();
   LIS = &getAnalysis<LiveIntervals>();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   Loops = &getAnalysis<MachineLoopInfo>();
   if (EnableGlobalCopies == cl::BOU_UNSET)
     JoinGlobalCopies = STI.enableJoinGlobalCopies();
@@ -2981,22 +2952,25 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
     if (MRI->recomputeRegClass(Reg)) {
       DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
                    << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
+      ++NumInflated;
+
       LiveInterval &LI = LIS->getInterval(Reg);
-      unsigned MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
-      if (MaxMask == 0) {
+      if (LI.hasSubRanges()) {
         // If the inflated register class does not support subregisters anymore
         // remove the subranges.
-        LI.clearSubRanges();
-      } else {
+        if (!MRI->shouldTrackSubRegLiveness(Reg)) {
+          LI.clearSubRanges();
+        } else {
 #ifndef NDEBUG
-        // If subranges are still supported, then the same subregs should still
-        // be supported.
-        for (LiveInterval::SubRange &S : LI.subranges()) {
-          assert ((S.LaneMask & ~MaxMask) == 0);
-        }
+          LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
+          // If subranges are still supported, then the same subregs
+          // should still be supported.
+          for (LiveInterval::SubRange &S : LI.subranges()) {
+            assert((S.LaneMask & ~MaxMask) == 0);
+          }
 #endif
+        }
       }
-      ++NumInflated;
     }
   }
 
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index c3786e552a13..8382b0912bde 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -59,12 +59,12 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Max Pressure: ";
   dumpRegSetPressure(MaxSetPressure, TRI);
   dbgs() << "Live In: ";
-  for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i)
-    dbgs() << PrintVRegOrUnit(LiveInRegs[i], TRI) << " ";
+  for (unsigned Reg : LiveInRegs)
+    dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
   dbgs() << '\n';
   dbgs() << "Live Out: ";
-  for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i)
-    dbgs() << PrintVRegOrUnit(LiveOutRegs[i], TRI) << " ";
+  for (unsigned Reg : LiveOutRegs)
+    dbgs() << PrintVRegOrUnit(Reg, TRI) << " ";
   dbgs() << '\n';
 }
 
@@ -78,11 +78,13 @@ void RegPressureTracker::dump() const {
 }
 
 void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
+  const char *sep = "";
   for (const PressureChange &Change : *this) {
-    if (!Change.isValid() || Change.getUnitInc() == 0)
-      continue;
-    dbgs() << "    " << TRI.getRegPressureSetName(Change.getPSet())
+    if (!Change.isValid())
+      break;
+    dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet())
            << " " << Change.getUnitInc();
+    sep = "    ";
   }
   dbgs() << '\n';
 }
@@ -90,8 +92,8 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
 /// Increase the current pressure as impacted by these registers and bump
 /// the high water mark if needed.
 void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
-  for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) {
-    PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]);
+  for (unsigned RegUnit : RegUnits) {
+    PSetIterator PSetI = MRI->getPressureSets(RegUnit);
     unsigned Weight = PSetI.getWeight();
     for (; PSetI.isValid(); ++PSetI) {
       CurrSetPressure[*PSetI] += Weight;
@@ -104,8 +106,8 @@ void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) {
 
 /// Simply decrease the current pressure as impacted by these registers.
 void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) {
-  for (unsigned I = 0, E = RegUnits.size(); I != E; ++I)
-    decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I]));
+  for (unsigned RegUnit : RegUnits)
+    decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit));
 }
 
 /// Clear the result so it can be used for another round of pressure tracking.
@@ -157,10 +159,22 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) {
   LiveInRegs.clear();
 }
 
-const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const {
+void LiveRegSet::init(const MachineRegisterInfo &MRI) {
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  unsigned NumRegUnits = TRI.getNumRegs();
+  unsigned NumVirtRegs = MRI.getNumVirtRegs();
+  Regs.setUniverse(NumRegUnits + NumVirtRegs);
+  this->NumRegUnits = NumRegUnits;
+}
+
+void LiveRegSet::clear() {
+  Regs.clear();
+}
+
+static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
   if (TargetRegisterInfo::isVirtualRegister(Reg))
-    return &LIS->getInterval(Reg);
-  return LIS->getCachedRegUnit(Reg);
+    return &LIS.getInterval(Reg);
+  return LIS.getCachedRegUnit(Reg);
 }
 
 void RegPressureTracker::reset() {
@@ -176,8 +190,7 @@ void RegPressureTracker::reset() {
   else
     static_cast<RegionPressure&>(P).reset();
 
-  LiveRegs.PhysRegs.clear();
-  LiveRegs.VirtRegs.clear();
+  LiveRegs.clear();
   UntiedDefs.clear();
 }
 
@@ -210,8 +223,7 @@ void RegPressureTracker::init(const MachineFunction *mf,
 
   P.MaxSetPressure = CurrSetPressure;
 
-  LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs());
-  LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs());
+  LiveRegs.init(*MRI);
   if (TrackUntiedDefs)
     UntiedDefs.setUniverse(MRI->getNumVirtRegs());
 }
@@ -250,14 +262,8 @@ void RegPressureTracker::closeTop() {
     static_cast<RegionPressure&>(P).TopPos = CurrPos;
 
   assert(P.LiveInRegs.empty() && "inconsistent max pressure result");
-  P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
-  P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
-  for (SparseSet<unsigned>::const_iterator I =
-         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
-    P.LiveInRegs.push_back(*I);
-  std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end());
-  P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()),
-                     P.LiveInRegs.end());
+  P.LiveInRegs.reserve(LiveRegs.size());
+  LiveRegs.appendTo(P.LiveInRegs);
 }
 
 /// Set the boundary for the bottom of the region and summarize live outs.
@@ -268,21 +274,14 @@ void RegPressureTracker::closeBottom() {
     static_cast<RegionPressure&>(P).BottomPos = CurrPos;
 
   assert(P.LiveOutRegs.empty() && "inconsistent max pressure result");
-  P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size());
-  P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end());
-  for (SparseSet<unsigned>::const_iterator I =
-         LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I)
-    P.LiveOutRegs.push_back(*I);
-  std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end());
-  P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()),
-                      P.LiveOutRegs.end());
+  P.LiveOutRegs.reserve(LiveRegs.size());
+  LiveRegs.appendTo(P.LiveOutRegs);
 }
 
 /// Finalize the region boundaries and record live ins and live outs.
 void RegPressureTracker::closeRegion() {
   if (!isTopClosed() && !isBottomClosed()) {
-    assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() &&
-           "no region boundary");
+    assert(LiveRegs.size() == 0 && "no region boundary");
     return;
   }
   if (!isBottomClosed())
@@ -299,8 +298,7 @@ void RegPressureTracker::closeRegion() {
 void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
   LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0);
   assert(isBottomClosed() && "need bottom-up tracking to intialize.");
-  for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) {
-    unsigned Reg = P.LiveOutRegs[i];
+  for (unsigned Reg : P.LiveOutRegs) {
     if (TargetRegisterInfo::isVirtualRegister(Reg)
         && !RPTracker.hasUntiedDef(Reg)) {
       increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg));
@@ -315,71 +313,113 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
 }
 
 namespace {
-/// Collect this instruction's unique uses and defs into SmallVectors for
-/// processing defs and uses in order.
-///
-/// FIXME: always ignore tied opers
-class RegisterOperands {
-  const TargetRegisterInfo *TRI;
-  const MachineRegisterInfo *MRI;
-  bool IgnoreDead;
 
+/// List of register defined and used by a machine instruction.
+class RegisterOperands {
 public:
   SmallVector<unsigned, 8> Uses;
   SmallVector<unsigned, 8> Defs;
   SmallVector<unsigned, 8> DeadDefs;
 
-  RegisterOperands(const TargetRegisterInfo *tri,
-                   const MachineRegisterInfo *mri, bool ID = false):
-    TRI(tri), MRI(mri), IgnoreDead(ID) {}
+  void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
+               const MachineRegisterInfo &MRI, bool IgnoreDead = false);
 
-  /// Push this operand's register onto the correct vector.
-  void collect(const MachineOperand &MO) {
+  /// Use liveness information to find dead defs not marked with a dead flag
+  /// and move them to the DeadDefs vector.
+  void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+};
+
+/// Collect this instruction's unique uses and defs into SmallVectors for
+/// processing defs and uses in order.
+///
+/// FIXME: always ignore tied opers
+class RegisterOperandsCollector {
+  RegisterOperands &RegOpers;
+  const TargetRegisterInfo &TRI;
+  const MachineRegisterInfo &MRI;
+  bool IgnoreDead;
+
+  RegisterOperandsCollector(RegisterOperands &RegOpers,
+                            const TargetRegisterInfo &TRI,
+                            const MachineRegisterInfo &MRI,
+                            bool IgnoreDead)
+    : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {}
+
+  void collectInstr(const MachineInstr &MI) const {
+    for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI)
+      collectOperand(*OperI);
+
+    // Remove redundant physreg dead defs.
+    SmallVectorImpl<unsigned>::iterator I =
+      std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
+                     std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
+    RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+  }
+
+  /// Push this operand's register onto the correct vectors.
+  void collectOperand(const MachineOperand &MO) const {
     if (!MO.isReg() || !MO.getReg())
       return;
+    unsigned Reg = MO.getReg();
     if (MO.readsReg())
-      pushRegUnits(MO.getReg(), Uses);
+      pushRegUnits(Reg, RegOpers.Uses);
     if (MO.isDef()) {
       if (MO.isDead()) {
         if (!IgnoreDead)
-          pushRegUnits(MO.getReg(), DeadDefs);
-      }
-      else
-        pushRegUnits(MO.getReg(), Defs);
+          pushRegUnits(Reg, RegOpers.DeadDefs);
+      } else
+        pushRegUnits(Reg, RegOpers.Defs);
     }
   }
 
-protected:
-  void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) {
+  void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const {
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       if (containsReg(RegUnits, Reg))
         return;
       RegUnits.push_back(Reg);
-    }
-    else if (MRI->isAllocatable(Reg)) {
-      for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
+    } else if (MRI.isAllocatable(Reg)) {
+      for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) {
         if (containsReg(RegUnits, *Units))
           continue;
         RegUnits.push_back(*Units);
       }
     }
   }
+
+  friend class RegisterOperands;
 };
-} // namespace
 
-/// Collect physical and virtual register operands.
-static void collectOperands(const MachineInstr *MI,
-                            RegisterOperands &RegOpers) {
-  for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI)
-    RegOpers.collect(*OperI);
-
-  // Remove redundant physreg dead defs.
-  SmallVectorImpl<unsigned>::iterator I =
-    std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(),
-                   std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs));
-  RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end());
+void RegisterOperands::collect(const MachineInstr &MI,
+                               const TargetRegisterInfo &TRI,
+                               const MachineRegisterInfo &MRI,
+                               bool IgnoreDead) {
+  RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead);
+  Collector.collectInstr(MI);
 }
 
+void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
+                                      const LiveIntervals &LIS) {
+  SlotIndex SlotIdx = LIS.getInstructionIndex(&MI);
+  for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin();
+       RI != Defs.end(); /*empty*/) {
+    unsigned Reg = *RI;
+    const LiveRange *LR = getLiveRange(LIS, Reg);
+    if (LR != nullptr) {
+      LiveQueryResult LRQ = LR->Query(SlotIdx);
+      if (LRQ.isDeadDef()) {
+        // LiveIntervals knows this is a dead even though it's MachineOperand is
+        // not flagged as such.
+        DeadDefs.push_back(Reg);
+        RI = Defs.erase(RI);
+        continue;
+      }
+    }
+    ++RI;
+  }
+}
+
+} // namespace
+
 /// Initialize an array of N PressureDiffs.
 void PressureDiffs::init(unsigned N) {
   Size = N;
@@ -399,7 +439,7 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
   int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight();
   for (; PSetI.isValid(); ++PSetI) {
     // Find an existing entry in the pressure diff for this PSet.
-    PressureDiff::iterator I = begin(), E = end();
+    PressureDiff::iterator I = nonconst_begin(), E = nonconst_end();
     for (; I != E && I->isValid(); ++I) {
       if (I->getPSet() >= *PSetI)
         break;
@@ -411,10 +451,20 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
     if (!I->isValid() || I->getPSet() != *PSetI) {
       PressureChange PTmp = PressureChange(*PSetI);
       for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J)
-        std::swap(*J,PTmp);
+        std::swap(*J, PTmp);
     }
     // Update the units for this pressure set.
-    I->setUnitInc(I->getUnitInc() + Weight);
+    unsigned NewUnitInc = I->getUnitInc() + Weight;
+    if (NewUnitInc != 0) {
+      I->setUnitInc(NewUnitInc);
+    } else {
+      // Remove entry
+      PressureDiff::iterator J;
+      for (J = std::next(I); J != E && J->isValid(); ++J, ++I)
+        *I = *J;
+      if (J != E)
+        *I = *J;
+    }
   }
 }
 
@@ -423,18 +473,18 @@ static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
                          const MachineRegisterInfo *MRI) {
   assert(!PDiff.begin()->isValid() && "stale PDiff");
 
-  for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i)
-    PDiff.addPressureChange(RegOpers.Defs[i], true, MRI);
+  for (unsigned Reg : RegOpers.Defs)
+    PDiff.addPressureChange(Reg, true, MRI);
 
-  for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i)
-    PDiff.addPressureChange(RegOpers.Uses[i], false, MRI);
+  for (unsigned Reg : RegOpers.Uses)
+    PDiff.addPressureChange(Reg, false, MRI);
 }
 
 /// Force liveness of registers.
 void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    if (LiveRegs.insert(Regs[i]))
-      increaseRegPressure(Regs[i]);
+  for (unsigned Reg : Regs) {
+    if (LiveRegs.insert(Reg))
+      increaseRegPressure(Reg);
   }
 }
 
@@ -465,13 +515,9 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
 /// registers that are both defined and used by the instruction.  If a pressure
 /// difference pointer is provided record the changes is pressure caused by this
 /// instruction independent of liveness.
-bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
                                 PressureDiff *PDiff) {
-  // Check for the top of the analyzable region.
-  if (CurrPos == MBB->begin()) {
-    closeRegion();
-    return false;
-  }
+  assert(CurrPos != MBB->begin());
   if (!isBottomClosed())
     closeBottom();
 
@@ -483,11 +529,8 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
   do
     --CurrPos;
   while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+  assert(!CurrPos->isDebugValue());
 
-  if (CurrPos->isDebugValue()) {
-    closeRegion();
-    return false;
-  }
   SlotIndex SlotIdx;
   if (RequireIntervals)
     SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
@@ -496,8 +539,11 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
   if (RequireIntervals && isTopClosed())
     static_cast<IntervalPressure&>(P).openTop(SlotIdx);
 
-  RegisterOperands RegOpers(TRI, MRI);
-  collectOperands(CurrPos, RegOpers);
+  const MachineInstr &MI = *CurrPos;
+  RegisterOperands RegOpers;
+  RegOpers.collect(MI, *TRI, *MRI);
+  if (RequireIntervals)
+    RegOpers.detectDeadDefs(MI, *LIS);
 
   if (PDiff)
     collectPDiff(*PDiff, RegOpers, MRI);
@@ -508,37 +554,19 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
 
   // Kill liveness at live defs.
   // TODO: consider earlyclobbers?
-  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Defs[i];
-    bool DeadDef = false;
-    if (RequireIntervals) {
-      const LiveRange *LR = getLiveRange(Reg);
-      if (LR) {
-        LiveQueryResult LRQ = LR->Query(SlotIdx);
-        DeadDef = LRQ.isDeadDef();
-      }
-    }
-    if (DeadDef) {
-      // LiveIntervals knows this is a dead even though it's MachineOperand is
-      // not flagged as such. Since this register will not be recorded as
-      // live-out, increase its PDiff value to avoid underflowing pressure.
-      if (PDiff)
-        PDiff->addPressureChange(Reg, false, MRI);
-    } else {
-      if (LiveRegs.erase(Reg))
-        decreaseRegPressure(Reg);
-      else
-        discoverLiveOut(Reg);
-    }
+  for (unsigned Reg : RegOpers.Defs) {
+    if (LiveRegs.erase(Reg))
+      decreaseRegPressure(Reg);
+    else
+      discoverLiveOut(Reg);
   }
 
   // Generate liveness for uses.
-  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Uses[i];
+  for (unsigned Reg : RegOpers.Uses) {
     if (!LiveRegs.contains(Reg)) {
       // Adjust liveouts if LiveIntervals are available.
       if (RequireIntervals) {
-        const LiveRange *LR = getLiveRange(Reg);
+        const LiveRange *LR = getLiveRange(*LIS, Reg);
         if (LR) {
           LiveQueryResult LRQ = LR->Query(SlotIdx);
           if (!LRQ.isKill() && !LRQ.valueDefined())
@@ -552,24 +580,18 @@ bool RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
     }
   }
   if (TrackUntiedDefs) {
-    for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
-      unsigned Reg = RegOpers.Defs[i];
+    for (unsigned Reg : RegOpers.Defs) {
       if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg))
         UntiedDefs.insert(Reg);
     }
   }
-  return true;
 }
 
 /// Advance across the current instruction.
-bool RegPressureTracker::advance() {
+void RegPressureTracker::advance() {
   assert(!TrackUntiedDefs && "unsupported mode");
 
-  // Check for the bottom of the analyzable region.
-  if (CurrPos == MBB->end()) {
-    closeRegion();
-    return false;
-  }
+  assert(CurrPos != MBB->end());
   if (!isTopClosed())
     closeTop();
 
@@ -585,11 +607,10 @@ bool RegPressureTracker::advance() {
       static_cast<RegionPressure&>(P).openBottom(CurrPos);
   }
 
-  RegisterOperands RegOpers(TRI, MRI);
-  collectOperands(CurrPos, RegOpers);
+  RegisterOperands RegOpers;
+  RegOpers.collect(*CurrPos, *TRI, *MRI);
 
-  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Uses[i];
+  for (unsigned Reg : RegOpers.Uses) {
     // Discover live-ins.
     bool isLive = LiveRegs.contains(Reg);
     if (!isLive)
@@ -597,24 +618,21 @@ bool RegPressureTracker::advance() {
     // Kill liveness at last uses.
     bool lastUse = false;
     if (RequireIntervals) {
-      const LiveRange *LR = getLiveRange(Reg);
+      const LiveRange *LR = getLiveRange(*LIS, Reg);
       lastUse = LR && LR->Query(SlotIdx).isKill();
-    }
-    else {
+    } else {
       // Allocatable physregs are always single-use before register rewriting.
       lastUse = !TargetRegisterInfo::isVirtualRegister(Reg);
     }
     if (lastUse && isLive) {
       LiveRegs.erase(Reg);
       decreaseRegPressure(Reg);
-    }
-    else if (!lastUse && !isLive)
+    } else if (!lastUse && !isLive)
       increaseRegPressure(Reg);
   }
 
   // Generate liveness for defs.
-  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Defs[i];
+  for (unsigned Reg : RegOpers.Defs) {
     if (LiveRegs.insert(Reg))
       increaseRegPressure(Reg);
   }
@@ -627,7 +645,6 @@ bool RegPressureTracker::advance() {
   do
     ++CurrPos;
   while (CurrPos != MBB->end() && CurrPos->isDebugValue());
-  return true;
 }
 
 /// Find the max change in excess pressure across all sets.
@@ -653,8 +670,7 @@ static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec,
         PDiff = 0;            // Under the limit
       else
         PDiff = PNew - Limit; // Just exceeded limit.
-    }
-    else if (Limit > PNew)
+    } else if (Limit > PNew)
       PDiff = Limit - POld;   // Just obeyed limit.
 
     if (PDiff) {
@@ -719,34 +735,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) {
   assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
 
   // Account for register pressure similar to RegPressureTracker::recede().
-  RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true);
-  collectOperands(MI, RegOpers);
-
-  // Boost max pressure for all dead defs together.
-  // Since CurrSetPressure and MaxSetPressure
-  increaseRegPressure(RegOpers.DeadDefs);
-  decreaseRegPressure(RegOpers.DeadDefs);
+  RegisterOperands RegOpers;
+  RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true);
+  assert(RegOpers.DeadDefs.size() == 0);
+  if (RequireIntervals)
+    RegOpers.detectDeadDefs(*MI, *LIS);
 
   // Kill liveness at live defs.
-  for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Defs[i];
-    bool DeadDef = false;
-    if (RequireIntervals) {
-      const LiveRange *LR = getLiveRange(Reg);
-      if (LR) {
-        SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
-        LiveQueryResult LRQ = LR->Query(SlotIdx);
-        DeadDef = LRQ.isDeadDef();
-      }
-    }
-    if (!DeadDef) {
-      if (!containsReg(RegOpers.Uses, Reg))
-        decreaseRegPressure(Reg);
-    }
+  for (unsigned Reg : RegOpers.Defs) {
+    if (!containsReg(RegOpers.Uses, Reg))
+      decreaseRegPressure(Reg);
   }
   // Generate liveness for uses.
-  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Uses[i];
+  for (unsigned Reg : RegOpers.Uses) {
     if (!LiveRegs.contains(Reg))
       increaseRegPressure(Reg);
   }
@@ -853,7 +854,8 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
     unsigned MNew = MOld;
     // Ignore DeadDefs here because they aren't captured by PressureChange.
     unsigned PNew = POld + PDiffI->getUnitInc();
-    assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow");
+    assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld)
+           && "PSet overflow/underflow");
     if (PNew > MOld)
       MNew = PNew;
     // Check if current pressure has exceeded the limit.
@@ -892,19 +894,13 @@ getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff,
 }
 
 /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx).
-static bool findUseBetween(unsigned Reg,
-                           SlotIndex PriorUseIdx, SlotIndex NextUseIdx,
-                           const MachineRegisterInfo *MRI,
+static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx,
+                           SlotIndex NextUseIdx, const MachineRegisterInfo &MRI,
                            const LiveIntervals *LIS) {
-  for (MachineRegisterInfo::use_instr_nodbg_iterator
-       UI = MRI->use_instr_nodbg_begin(Reg),
-       UE = MRI->use_instr_nodbg_end(); UI != UE; ++UI) {
-      const MachineInstr* MI = &*UI;
-      if (MI->isDebugValue())
-        continue;
-      SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
-      if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
-        return true;
+  for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) {
+    SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot();
+    if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx)
+      return true;
   }
   return false;
 }
@@ -919,8 +915,8 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
   assert(!MI->isDebugValue() && "Expect a nondebug instruction.");
 
   // Account for register pressure similar to RegPressureTracker::recede().
-  RegisterOperands RegOpers(TRI, MRI);
-  collectOperands(MI, RegOpers);
+  RegisterOperands RegOpers;
+  RegOpers.collect(*MI, *TRI, *MRI);
 
   // Kill liveness at last uses. Assume allocatable physregs are single-use
   // rather than checking LiveIntervals.
@@ -928,21 +924,18 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
   if (RequireIntervals)
     SlotIdx = LIS->getInstructionIndex(MI).getRegSlot();
 
-  for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) {
-    unsigned Reg = RegOpers.Uses[i];
+  for (unsigned Reg : RegOpers.Uses) {
     if (RequireIntervals) {
       // FIXME: allow the caller to pass in the list of vreg uses that remain
       // to be bottom-scheduled to avoid searching uses at each query.
       SlotIndex CurrIdx = getCurrSlot();
-      const LiveRange *LR = getLiveRange(Reg);
+      const LiveRange *LR = getLiveRange(*LIS, Reg);
       if (LR) {
         LiveQueryResult LRQ = LR->Query(SlotIdx);
-        if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) {
+        if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS))
           decreaseRegPressure(Reg);
-        }
       }
-    }
-    else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+    } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
       // Allocatable physregs are always single-use before register rewriting.
       decreaseRegPressure(Reg);
     }
@@ -966,7 +959,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
 /// This is expensive for an on-the-fly query because it calls
 /// bumpDownwardPressure to recompute the pressure sets based on current
 /// liveness. We don't yet have a fast version of downward pressure tracking
-/// analagous to getUpwardPressureDelta.
+/// analogous to getUpwardPressureDelta.
 void RegPressureTracker::
 getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta,
                             ArrayRef<PressureChange> CriticalPSets,
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 4176686d1f7f..8fa1bf74b7e2 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -31,9 +31,12 @@ using namespace llvm;
 #define DEBUG_TYPE "reg-scavenging"
 
 /// setUsed - Set the register units of this register as used.
-void RegScavenger::setRegUsed(unsigned Reg) {
-  for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
-    RegUnitsAvailable.reset(*RUI);
+void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+  for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
+    LaneBitmask UnitMask = (*RUI).second;
+    if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+      RegUnitsAvailable.reset((*RUI).first);
+  }
 }
 
 void RegScavenger::initRegState() {
@@ -50,9 +53,8 @@ void RegScavenger::initRegState() {
     return;
 
   // Live-in registers are in use.
-  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
-         E = MBB->livein_end(); I != E; ++I)
-    setRegUsed(*I);
+  for (const auto &LI : MBB->liveins())
+    setRegUsed(LI.PhysReg, LI.LaneMask);
 
   // Pristine CSRs are also unavailable.
   const MachineFunction &MF = *MBB->getParent();
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 76a7fef58fcc..efde61ece639 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -372,7 +372,6 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       dbgs() << "\n";
     }
   }
-  dbgs() << "\n";
 }
 #endif
 
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 390b6d25954e..fb82ab7a5555 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -13,12 +13,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/ADT/IntEqClasses.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -51,15 +51,11 @@ static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
 
 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
                                      const MachineLoopInfo *mli,
-                                     bool IsPostRAFlag, bool RemoveKillFlags,
-                                     LiveIntervals *lis)
-    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(lis),
-      IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
-      CanHandleTerminators(false), FirstDbgValue(nullptr) {
-  assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
+                                     bool RemoveKillFlags)
+    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
+      RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
+      TrackLaneMasks(false), FirstDbgValue(nullptr) {
   DbgValues.clear();
-  assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
-         "Virtual registers must be removed prior to PostRA scheduling");
 
   const TargetSubtargetInfo &ST = mf.getSubtarget();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
@@ -230,11 +226,8 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
 
       if (TRI->isPhysicalRegister(Reg))
         Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
-      else {
-        assert(!IsPostRA && "Virtual register encountered after regalloc.");
-        if (MO.readsReg()) // ignore undef operands
-          addVRegUseDeps(&ExitSU, i);
-      }
+      else if (MO.readsReg()) // ignore undef operands
+        addVRegUseDeps(&ExitSU, i);
     }
   } else {
     // For others, e.g. fallthrough, conditional branch, assume the exit
@@ -242,11 +235,9 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
     assert(Uses.empty() && "Uses in set before adding deps?");
     for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
            SE = BB->succ_end(); SI != SE; ++SI)
-      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-             E = (*SI)->livein_end(); I != E; ++I) {
-        unsigned Reg = *I;
-        if (!Uses.contains(Reg))
-          Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
+      for (const auto &LI : (*SI)->liveins()) {
+        if (!Uses.contains(LI.PhysReg))
+          Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
       }
   }
 }
@@ -371,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   }
 }
 
+LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
+{
+  unsigned Reg = MO.getReg();
+  // No point in tracking lanemasks if we don't have interesting subregisters.
+  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
+  if (!RC.HasDisjunctSubRegs)
+    return ~0u;
+
+  unsigned SubReg = MO.getSubReg();
+  if (SubReg == 0)
+    return RC.getLaneMask();
+  return TRI->getSubRegIndexLaneMask(SubReg);
+}
+
 /// addVRegDefDeps - Add register output and data dependencies from this SUnit
 /// to instructions that occur later in the same scheduling region if they read
 /// from or write to the virtual register defined at OperIdx.
@@ -378,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
 /// TODO: Hoist loop induction variable increments. This has to be
 /// reevaluated. Generally, IV scheduling should be done before coalescing.
 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
-  const MachineInstr *MI = SU->getInstr();
-  unsigned Reg = MI->getOperand(OperIdx).getReg();
+  MachineInstr *MI = SU->getInstr();
+  MachineOperand &MO = MI->getOperand(OperIdx);
+  unsigned Reg = MO.getReg();
 
-  // Singly defined vregs do not have output/anti dependencies.
-  // The current operand is a def, so we have at least one.
-  // Check here if there are any others...
+  LaneBitmask DefLaneMask;
+  LaneBitmask KillLaneMask;
+  if (TrackLaneMasks) {
+    bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
+    DefLaneMask = getLaneMaskForMO(MO);
+    // If we have a <read-undef> flag, none of the lane values comes from an
+    // earlier instruction.
+    KillLaneMask = IsKill ? ~0u : DefLaneMask;
+
+    // Clear undef flag, we'll re-add it later once we know which subregister
+    // Def is first.
+    MO.setIsUndef(false);
+  } else {
+    DefLaneMask = ~0u;
+    KillLaneMask = ~0u;
+  }
+
+  if (MO.isDead()) {
+    assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
+           "Dead defs should have no uses");
+  } else {
+    // Add data dependence to all uses we found so far.
+    const TargetSubtargetInfo &ST = MF.getSubtarget();
+    for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
+         E = CurrentVRegUses.end(); I != E; /*empty*/) {
+      LaneBitmask LaneMask = I->LaneMask;
+      // Ignore uses of other lanes.
+      if ((LaneMask & KillLaneMask) == 0) {
+        ++I;
+        continue;
+      }
+
+      if ((LaneMask & DefLaneMask) != 0) {
+        SUnit *UseSU = I->SU;
+        MachineInstr *Use = UseSU->getInstr();
+        SDep Dep(SU, SDep::Data, Reg);
+        Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
+                                                        I->OperandIndex));
+        ST.adjustSchedDependency(SU, UseSU, Dep);
+        UseSU->addPred(Dep);
+      }
+
+      LaneMask &= ~KillLaneMask;
+      // If we found a Def for all lanes of this use, remove it from the list.
+      if (LaneMask != 0) {
+        I->LaneMask = LaneMask;
+        ++I;
+      } else
+        I = CurrentVRegUses.erase(I);
+    }
+  }
+
+  // Shortcut: Singly defined vregs do not have output/anti dependencies.
   if (MRI.hasOneDef(Reg))
     return;
 
-  // Add output dependence to the next nearest def of this vreg.
+  // Add output dependence to the next nearest defs of this vreg.
   //
   // Unless this definition is dead, the output dependence should be
   // transitively redundant with antidependencies from this definition's
   // uses. We're conservative for now until we have a way to guarantee the uses
   // are not eliminated sometime during scheduling. The output dependence edge
   // is also useful if output latency exceeds def-use latency.
-  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
-  if (DefI == VRegDefs.end())
-    VRegDefs.insert(VReg2SUnit(Reg, SU));
-  else {
-    SUnit *DefSU = DefI->SU;
-    if (DefSU != SU && DefSU != &ExitSU) {
-      SDep Dep(SU, SDep::Output, Reg);
-      Dep.setLatency(
-        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
-      DefSU->addPred(Dep);
-    }
-    DefI->SU = SU;
+  LaneBitmask LaneMask = DefLaneMask;
+  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+                                     CurrentVRegDefs.end())) {
+    // Ignore defs for other lanes.
+    if ((V2SU.LaneMask & LaneMask) == 0)
+      continue;
+    // Add an output dependence.
+    SUnit *DefSU = V2SU.SU;
+    // Ignore additional defs of the same lanes in one instruction. This can
+    // happen because lanemasks are shared for targets with too many
+    // subregisters. We also use some representration tricks/hacks where we
+    // add super-register defs/uses, to imply that although we only access parts
+    // of the reg we care about the full one.
+    if (DefSU == SU)
+      continue;
+    SDep Dep(SU, SDep::Output, Reg);
+    Dep.setLatency(
+      SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
+    DefSU->addPred(Dep);
+
+    // Update current definition. This can get tricky if the def was about a
+    // bigger lanemask before. We then have to shrink it and create a new
+    // VReg2SUnit for the non-overlapping part.
+    LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
+    LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
+    if (NonOverlapMask != 0)
+      CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
+    V2SU.SU = SU;
+    V2SU.LaneMask = OverlapMask;
   }
+  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
+  if (LaneMask != 0)
+    CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
 }
 
 /// addVRegUseDeps - Add a register data dependency if the instruction that
@@ -416,59 +492,34 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 ///
 /// TODO: Handle ExitSU "uses" properly.
 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
-  MachineInstr *MI = SU->getInstr();
-  unsigned Reg = MI->getOperand(OperIdx).getReg();
+  const MachineInstr *MI = SU->getInstr();
+  const MachineOperand &MO = MI->getOperand(OperIdx);
+  unsigned Reg = MO.getReg();
 
-  // Record this local VReg use.
-  VReg2UseMap::iterator UI = VRegUses.find(Reg);
-  for (; UI != VRegUses.end(); ++UI) {
-    if (UI->SU == SU)
-      break;
+  // Remember the use. Data dependencies will be added when we find the def.
+  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
+
+  // Add antidependences to the following defs of the vreg.
+  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
+                                     CurrentVRegDefs.end())) {
+    // Ignore defs for unrelated lanes.
+    LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
+    if ((PrevDefLaneMask & LaneMask) == 0)
+      continue;
+    if (V2SU.SU == SU)
+      continue;
+
+    V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
   }
-  if (UI == VRegUses.end())
-    VRegUses.insert(VReg2SUnit(Reg, SU));
-
-  // Lookup this operand's reaching definition.
-  assert(LIS && "vreg dependencies requires LiveIntervals");
-  LiveQueryResult LRQ
-    = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
-  VNInfo *VNI = LRQ.valueIn();
-
-  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
-  assert(VNI && "No value to read by operand");
-  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
-  // Phis and other noninstructions (after coalescing) have a NULL Def.
-  if (Def) {
-    SUnit *DefSU = getSUnit(Def);
-    if (DefSU) {
-      // The reaching Def lives within this scheduling region.
-      // Create a data dependence.
-      SDep dep(DefSU, SDep::Data, Reg);
-      // Adjust the dependence latency using operand def/use information, then
-      // allow the target to perform its own adjustments.
-      int DefOp = Def->findRegisterDefOperandIdx(Reg);
-      dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
-
-      const TargetSubtargetInfo &ST = MF.getSubtarget();
-      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
-      SU->addPred(dep);
-    }
-  }
-
-  // Add antidependence to the following def of the vreg it uses.
-  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
-  if (DefI != VRegDefs.end() && DefI->SU != SU)
-    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
 }
 
 /// Return true if MI is an instruction we are unable to reason about
 /// (like a call or something with unmodeled side effects).
 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
-  if (MI->isCall() || MI->hasUnmodeledSideEffects() ||
-      (MI->hasOrderedMemoryRef() &&
-       (!MI->mayLoad() || !MI->isInvariantLoad(AA))))
-    return true;
-  return false;
+  return MI->isCall() || MI->hasUnmodeledSideEffects() ||
+         (MI->hasOrderedMemoryRef() &&
+          (!MI->mayLoad() || !MI->isInvariantLoad(AA)));
 }
 
 // This MI might have either incomplete info, or known to be unsafe
@@ -508,7 +559,7 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
   return false;
 }
 
-/// This returns true if the two MIs need a chain edge betwee them.
+/// This returns true if the two MIs need a chain edge between them.
 /// If these are not even memory operations, we still may need
 /// chain deps between them. The question really is - could
 /// these two MIs be reordered during scheduling from memory dependency
@@ -670,7 +721,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
                                       unsigned TrueMemOrderLatency = 0,
                                       bool isNormalMemory = false) {
   // If this is a false dependency,
-  // do not add the edge, but rememeber the rejected node.
+  // do not add the edge, but remember the rejected node.
   if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) {
     SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier);
     Dep.setLatency(TrueMemOrderLatency);
@@ -685,7 +736,7 @@ static inline void addChainDependency(AliasAnalysis *AA,
   }
 }
 
-/// Create an SUnit for each real instruction, numbered in top-down toplological
+/// Create an SUnit for each real instruction, numbered in top-down topological
 /// order. The instruction order A < B, implies that no edge exists from B to A.
 ///
 /// Map each real instruction to its SUnit.
@@ -743,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() {
   }
 }
 
+void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
+  const MachineInstr *MI = SU->getInstr();
+  for (const MachineOperand &MO : MI->operands()) {
+    if (!MO.isReg())
+      continue;
+    if (!MO.readsReg())
+      continue;
+    if (TrackLaneMasks && !MO.isUse())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    // Record this local VReg use.
+    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+    for (; UI != VRegUses.end(); ++UI) {
+      if (UI->SU == SU)
+        break;
+    }
+    if (UI == VRegUses.end())
+      VRegUses.insert(VReg2SUnit(Reg, 0, SU));
+  }
+}
+
 /// If RegPressure is non-null, compute register pressure as a side effect. The
 /// DAG builder is an efficient place to do it because it already visits
 /// operands.
 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
                                         RegPressureTracker *RPTracker,
-                                        PressureDiffs *PDiffs) {
+                                        PressureDiffs *PDiffs,
+                                        bool TrackLaneMasks) {
   const TargetSubtargetInfo &ST = MF.getSubtarget();
   bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
                                                        : ST.useAA();
   AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
 
+  this->TrackLaneMasks = TrackLaneMasks;
   MISUnitMap.clear();
   ScheduleDAG::clearDAG();
 
@@ -766,7 +844,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   // We build scheduling units by walking a block's instruction list from bottom
   // to top.
 
-  // Remember where a generic side-effecting instruction is as we procede.
+  // Remember where a generic side-effecting instruction is as we proceed.
   SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
 
   // Memory references to specific known memory locations are tracked
@@ -787,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   Defs.setUniverse(TRI->getNumRegs());
   Uses.setUniverse(TRI->getNumRegs());
 
-  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
+  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
+  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
+  unsigned NumVirtRegs = MRI.getNumVirtRegs();
+  CurrentVRegDefs.setUniverse(NumVirtRegs);
+  CurrentVRegUses.setUniverse(NumVirtRegs);
+
   VRegUses.clear();
-  VRegDefs.setUniverse(MRI.getNumVirtRegs());
-  VRegUses.setUniverse(MRI.getNumVirtRegs());
+  VRegUses.setUniverse(NumVirtRegs);
 
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
@@ -818,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
       assert(RPTracker->getPos() == std::prev(MII) &&
              "RPTracker can't find MI");
+      collectVRegUses(SU);
     }
 
     assert(
@@ -835,7 +918,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (TRI->isPhysicalRegister(Reg))
         addPhysRegDeps(SU, j);
       else {
-        assert(!IsPostRA && "Virtual register encountered!");
         if (MO.isDef()) {
           HasVRegDef = true;
           addVRegDefDeps(SU, j);
@@ -890,7 +972,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       BarrierChain = SU;
       // This is a barrier event that acts as a pivotal node in the DAG,
       // so it is safe to clear list of exposed nodes.
-      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       RejectMemNodes.clear();
       NonAliasMemDefs.clear();
@@ -903,27 +985,27 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         unsigned ChainLatency = 0;
         if (AliasChain->getInstr()->mayLoad())
           ChainLatency = TrueMemOrderLatency;
-        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+        addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
                            RejectMemNodes, ChainLatency);
       }
       AliasChain = SU;
       for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-        addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+        addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                            PendingLoads[k], RejectMemNodes,
                            TrueMemOrderLatency);
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                              I->second[i], RejectMemNodes);
       }
       for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
            AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
         for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                              I->second[i], RejectMemNodes, TrueMemOrderLatency);
       }
-      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
       PendingLoads.clear();
       AliasMemDefs.clear();
@@ -937,7 +1019,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         BarrierChain->addPred(SDep(SU, SDep::Barrier));
 
       UnderlyingObjectsVector Objs;
-      getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+      getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
 
       if (Objs.empty()) {
         // Treat all other stores conservatively.
@@ -961,7 +1043,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
         if (I != IE) {
           for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+            addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                                I->second[i], RejectMemNodes, 0, true);
 
           // If we're not using AA, then we only need one store per object.
@@ -986,7 +1068,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
         if (J != JE) {
           for (unsigned i = 0, e = J->second.size(); i != e; ++i)
-            addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+            addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                                J->second[i], RejectMemNodes,
                                TrueMemOrderLatency, true);
           J->second.clear();
@@ -996,15 +1078,15 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         // Add dependencies from all the PendingLoads, i.e. loads
         // with no underlying object.
         for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
-          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                              PendingLoads[k], RejectMemNodes,
                              TrueMemOrderLatency);
         // Add dependence on alias chain, if needed.
         if (AliasChain)
-          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
                              RejectMemNodes);
       }
-      adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU, RejectMemNodes,
+      adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
                       TrueMemOrderLatency);
     } else if (MI->mayLoad()) {
       bool MayAlias = true;
@@ -1012,7 +1094,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
         // Invariant load, no chain dependencies needed!
       } else {
         UnderlyingObjectsVector Objs;
-        getUnderlyingObjectsForInstr(MI, MFI, Objs, *TM.getDataLayout());
+        getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout());
 
         if (Objs.empty()) {
           // A load with no underlying object. Depend on all
@@ -1020,7 +1102,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
           for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
                  AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+              addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                                  I->second[i], RejectMemNodes);
 
           PendingLoads.push_back(SU);
@@ -1044,7 +1126,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
             ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
           if (I != IE)
             for (unsigned i = 0, e = I->second.size(); i != e; ++i)
-              addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU,
+              addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
                                  I->second[i], RejectMemNodes, 0, true);
           if (ThisMayAlias)
             AliasMemUses[V].push_back(SU);
@@ -1052,11 +1134,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
             NonAliasMemUses[V].push_back(SU);
         }
         if (MayAlias)
-          adjustChainDeps(AA, MFI, *TM.getDataLayout(), SU, &ExitSU,
+          adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
                           RejectMemNodes, /*Latency=*/0);
         // Add dependencies on alias and barrier chains, if needed.
         if (MayAlias && AliasChain)
-          addChainDependency(AAForDep, MFI, *TM.getDataLayout(), SU, AliasChain,
+          addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
                              RejectMemNodes);
         if (BarrierChain)
           BarrierChain->addPred(SDep(SU, SDep::Barrier));
@@ -1068,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 
   Defs.clear();
   Uses.clear();
-  VRegDefs.clear();
+  CurrentVRegDefs.clear();
+  CurrentVRegUses.clear();
   PendingLoads.clear();
 }
 
@@ -1080,11 +1163,9 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
   // Examine the live-in regs of all successors.
   for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
        SE = BB->succ_end(); SI != SE; ++SI) {
-    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
-         E = (*SI)->livein_end(); I != E; ++I) {
-      unsigned Reg = *I;
+    for (const auto &LI : (*SI)->liveins()) {
       // Repeat, for reg and all subregs.
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
+      for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
            SubRegs.isValid(); ++SubRegs)
         LiveRegs.set(*SubRegs);
     }
@@ -1103,7 +1184,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
   // Once we set a kill flag on an instruction, we bail out, as otherwise we
   // might set it on too many operands.  We will clear as many flags as we
   // can though.
-  MachineBasicBlock::instr_iterator Begin = MI;
+  MachineBasicBlock::instr_iterator Begin = MI->getIterator();
   MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
   while (Begin != End) {
     for (MachineOperand &MO : (--End)->operands()) {
@@ -1237,7 +1318,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
         toggleKillFlag(MI, MO);
         DEBUG(MI->dump());
         DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) {
-          MachineBasicBlock::instr_iterator Begin = MI;
+          MachineBasicBlock::instr_iterator Begin = MI->getIterator();
           MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
           while (++Begin != End)
             DEBUG(Begin->dump());
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index b2e4617720ff..1150d26e559b 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -43,9 +43,12 @@ namespace llvm {
       return (Node->NumPreds > 10 || Node->NumSuccs > 10);
     }
 
-    static bool hasNodeAddressLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph) {
-      return true;
+    static std::string getNodeIdentifierLabel(const SUnit *Node,
+                                              const ScheduleDAG *Graph) {
+      std::string R;
+      raw_string_ostream OS(R);
+      OS << static_cast<const void *>(Node);
+      return R;
     }
 
     /// If you want to override the dot attributes printed for a particular
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3b29306bb54a..0872d7a9a228 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -156,13 +156,16 @@ namespace {
     void deleteAndRecombine(SDNode *N);
     bool recursivelyDeleteUnusedNodes(SDNode *N);
 
+    /// Replaces all uses of the results of one DAG node with new values.
     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
                       bool AddTo = true);
 
+    /// Replaces all uses of the results of one DAG node with new values.
     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
       return CombineTo(N, &Res, 1, AddTo);
     }
 
+    /// Replaces all uses of the results of one DAG node with new values.
     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
                       bool AddTo = true) {
       SDValue To[] = { Res0, Res1 };
@@ -233,18 +236,17 @@ namespace {
     SDValue visitADDE(SDNode *N);
     SDValue visitSUBE(SDNode *N);
     SDValue visitMUL(SDNode *N);
+    SDValue useDivRem(SDNode *N);
     SDValue visitSDIV(SDNode *N);
     SDValue visitUDIV(SDNode *N);
-    SDValue visitSREM(SDNode *N);
-    SDValue visitUREM(SDNode *N);
+    SDValue visitREM(SDNode *N);
     SDValue visitMULHU(SDNode *N);
     SDValue visitMULHS(SDNode *N);
     SDValue visitSMUL_LOHI(SDNode *N);
     SDValue visitUMUL_LOHI(SDNode *N);
     SDValue visitSMULO(SDNode *N);
     SDValue visitUMULO(SDNode *N);
-    SDValue visitSDIVREM(SDNode *N);
-    SDValue visitUDIVREM(SDNode *N);
+    SDValue visitIMINMAX(SDNode *N);
     SDValue visitAND(SDNode *N);
     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
     SDValue visitOR(SDNode *N);
@@ -265,6 +267,7 @@ namespace {
     SDValue visitVSELECT(SDNode *N);
     SDValue visitSELECT_CC(SDNode *N);
     SDValue visitSETCC(SDNode *N);
+    SDValue visitSETCCE(SDNode *N);
     SDValue visitSIGN_EXTEND(SDNode *N);
     SDValue visitZERO_EXTEND(SDNode *N);
     SDValue visitANY_EXTEND(SDNode *N);
@@ -298,6 +301,10 @@ namespace {
     SDValue visitBRCOND(SDNode *N);
     SDValue visitBR_CC(SDNode *N);
     SDValue visitLOAD(SDNode *N);
+
+    SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
+    SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+
     SDValue visitSTORE(SDNode *N);
     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
@@ -312,9 +319,11 @@ namespace {
     SDValue visitMGATHER(SDNode *N);
     SDValue visitMSCATTER(SDNode *N);
     SDValue visitFP_TO_FP16(SDNode *N);
+    SDValue visitFP16_TO_FP(SDNode *N);
 
     SDValue visitFADDForFMACombine(SDNode *N);
     SDValue visitFSUBForFMACombine(SDNode *N);
+    SDValue visitFMULForFMACombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
@@ -338,14 +347,17 @@ namespace {
                                          unsigned HiOp);
     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
     SDValue CombineExtLoad(SDNode *N);
+    SDValue combineRepeatedFPDivisors(SDNode *N);
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
-    SDValue BuildReciprocalEstimate(SDValue Op);
-    SDValue BuildRsqrtEstimate(SDValue Op);
-    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
-    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
+    SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
+    SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
+    SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
+                                 SDNodeFlags *Flags);
+    SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
+                                 SDNodeFlags *Flags);
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
@@ -374,6 +386,10 @@ namespace {
     /// chain (aliasing node.)
     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 
+    /// Do FindBetterChain for a store and any possibly adjacent stores on
+    /// consecutive chains.
+    bool findBetterNeighborChains(StoreSDNode *St);
+
     /// Holds a pointer to an LSBaseSDNode as well as information on where it
     /// is located in a sequence of memory operations connected by a chain.
     struct MemOpLink {
@@ -388,19 +404,37 @@ namespace {
       unsigned SequenceNum;
     };
 
+    /// This is a helper function for visitMUL to check the profitability
+    /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+    /// MulNode is the original multiply, AddNode is (add x, c1),
+    /// and ConstNode is c2.
+    bool isMulAddWithConstProfitable(SDNode *MulNode,
+                                     SDValue &AddNode,
+                                     SDValue &ConstNode);
+
     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
     /// constant build_vector of the stored constant values in Stores.
     SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
                                          SDLoc SL,
                                          ArrayRef<MemOpLink> Stores,
+                                         SmallVectorImpl<SDValue> &Chains,
                                          EVT Ty) const;
 
+    /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
+    /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
+    /// the type of the loaded value to be extended.  LoadedVT returns the type
+    /// of the original loaded value.  NarrowLoad returns whether the load would
+    /// need to be narrowed in order to match.
+    bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+                          EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+                          bool &NarrowLoad);
+
     /// This is a helper function for MergeConsecutiveStores. When the source
     /// elements of the consecutive stores are all constants or all extracted
     /// vector elements, try to merge them into one larger store.
     /// \return True if a merged store was created.
     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
-                                         EVT MemVT, unsigned NumElem,
+                                         EVT MemVT, unsigned NumStores,
                                          bool IsConstantSrc, bool UseVector);
 
     /// This is a helper function for MergeConsecutiveStores.
@@ -409,7 +443,7 @@ namespace {
     void getStoreMergeAndAliasCandidates(
         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
-    
+
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
     /// \return True if some memory operations were changed.
@@ -427,9 +461,7 @@ namespace {
     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
-      auto *F = DAG.getMachineFunction().getFunction();
-      ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
-                    F->hasFnAttribute(Attribute::MinSize);
+      ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
     }
 
     /// Runs the dag combiner on all nodes in the work list
@@ -606,6 +638,9 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   assert(Op.hasOneUse() && "Unknown reuse!");
 
   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+
+  const SDNodeFlags *Flags = Op.getNode()->getFlags();
+
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown code");
   case ISD::ConstantFP: {
@@ -623,12 +658,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
-                         Op.getOperand(1));
+                         Op.getOperand(1), Flags);
     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
                        GetNegatedExpression(Op.getOperand(1), DAG,
                                             LegalOperations, Depth+1),
-                       Op.getOperand(0));
+                       Op.getOperand(0), Flags);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
     assert(Options.UnsafeFPMath);
@@ -640,7 +675,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
-                       Op.getOperand(1), Op.getOperand(0));
+                       Op.getOperand(1), Op.getOperand(0), Flags);
 
   case ISD::FMUL:
   case ISD::FDIV:
@@ -652,13 +687,13 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                          GetNegatedExpression(Op.getOperand(0), DAG,
                                               LegalOperations, Depth+1),
-                         Op.getOperand(1));
+                         Op.getOperand(1), Flags);
 
     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
                        Op.getOperand(0),
                        GetNegatedExpression(Op.getOperand(1), DAG,
-                                            LegalOperations, Depth+1));
+                                            LegalOperations, Depth+1), Flags);
 
   case ISD::FP_EXTEND:
   case ISD::FSIN:
@@ -1216,9 +1251,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
   LegalTypes = Level >= AfterLegalizeTypes;
 
   // Add all the dag nodes to the worklist.
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I)
-    AddToWorklist(I);
+  for (SDNode &Node : DAG.allnodes())
+    AddToWorklist(&Node);
 
   // Create a dummy node (which is not added to allnodes), that adds a reference
   // to the root node, preventing it from being deleted, and tracking any
@@ -1333,16 +1367,18 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MUL:                return visitMUL(N);
   case ISD::SDIV:               return visitSDIV(N);
   case ISD::UDIV:               return visitUDIV(N);
-  case ISD::SREM:               return visitSREM(N);
-  case ISD::UREM:               return visitUREM(N);
+  case ISD::SREM:
+  case ISD::UREM:               return visitREM(N);
   case ISD::MULHU:              return visitMULHU(N);
   case ISD::MULHS:              return visitMULHS(N);
   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   case ISD::SMULO:              return visitSMULO(N);
   case ISD::UMULO:              return visitUMULO(N);
-  case ISD::SDIVREM:            return visitSDIVREM(N);
-  case ISD::UDIVREM:            return visitUDIVREM(N);
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:               return visitIMINMAX(N);
   case ISD::AND:                return visitAND(N);
   case ISD::OR:                 return visitOR(N);
   case ISD::XOR:                return visitXOR(N);
@@ -1361,6 +1397,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::VSELECT:            return visitVSELECT(N);
   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SETCCE:             return visitSETCCE(N);
   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
@@ -1408,6 +1445,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
   case ISD::MSCATTER:           return visitMSCATTER(N);
   case ISD::MSTORE:             return visitMSTORE(N);
   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
+  case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   }
   return SDValue();
 }
@@ -1470,13 +1508,8 @@ SDValue DAGCombiner::combine(SDNode *N) {
     // Constant operands are canonicalized to RHS.
     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
       SDValue Ops[] = {N1, N0};
-      SDNode *CSENode;
-      if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
-        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
-                                      &BinNode->Flags);
-      } else {
-        CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
-      }
+      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
+                                            N->getFlags());
       if (CSENode)
         return SDValue(CSENode, 0);
     }
@@ -1595,26 +1628,6 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
 }
 
-static bool isNullConstant(SDValue V) {
-  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
-  return Const != nullptr && Const->isNullValue();
-}
-
-static bool isNullFPConstant(SDValue V) {
-  ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
-  return Const != nullptr && Const->isZero() && !Const->isNegative();
-}
-
-static bool isAllOnesConstant(SDValue V) {
-  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
-  return Const != nullptr && Const->isAllOnesValue();
-}
-
-static bool isOneConstant(SDValue V) {
-  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
-  return Const != nullptr && Const->isOne();
-}
-
 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
 /// ContantSDNode pointer else nullptr.
 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
@@ -1721,22 +1734,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return SDValue(N, 0);
 
   // fold (a+b) -> (a|b) iff a and b share no bits.
-  if (VT.isInteger() && !VT.isVector()) {
-    APInt LHSZero, LHSOne;
-    APInt RHSZero, RHSOne;
-    DAG.computeKnownBits(N0, LHSZero, LHSOne);
-
-    if (LHSZero.getBoolValue()) {
-      DAG.computeKnownBits(N1, RHSZero, RHSOne);
-
-      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
-      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
-      if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
-        if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
-          return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
-      }
-    }
-  }
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
+      VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
+    return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
@@ -1971,31 +1971,26 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   // If the flag result is dead, turn this into an SUB.
   if (!N->hasAnyUseOfValue(1))
-    return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
-                     DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
-                                 MVT::Glue));
+    return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
 
   // fold (subc x, x) -> 0 + no borrow
-  if (N0 == N1) {
-    SDLoc DL(N);
+  if (N0 == N1)
     return CombineTo(N, DAG.getConstant(0, DL, VT),
-                     DAG.getNode(ISD::CARRY_FALSE, DL,
-                                 MVT::Glue));
-  }
+                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
 
   // fold (subc x, 0) -> x + no borrow
   if (isNullConstant(N1))
-    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
-                                        MVT::Glue));
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
 
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   if (isAllOnesConstant(N0))
-    return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
-                     DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
-                                 MVT::Glue));
+    return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
 
   return SDValue();
 }
@@ -2130,14 +2125,15 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   }
 
   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
-  if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
-      (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
-                     isa<ConstantSDNode>(N0.getOperand(1))))
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT,
-                       DAG.getNode(ISD::MUL, SDLoc(N0), VT,
-                                   N0.getOperand(0), N1),
-                       DAG.getNode(ISD::MUL, SDLoc(N1), VT,
-                                   N0.getOperand(1), N1));
+  if (isConstantIntBuildVectorOrConstantInt(N1) &&
+      N0.getOpcode() == ISD::ADD &&
+      isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
+      isMulAddWithConstProfitable(N, N0, N1))
+      return DAG.getNode(ISD::ADD, SDLoc(N), VT,
+                         DAG.getNode(ISD::MUL, SDLoc(N0), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.getNode(ISD::MUL, SDLoc(N1), VT,
+                                     N0.getOperand(1), N1));
 
   // reassociate mul
   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
@@ -2146,6 +2142,88 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   return SDValue();
 }
 
+/// Return true if divmod libcall is available.
+static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
+                                     const TargetLowering &TLI) {
+  RTLIB::Libcall LC;
+  switch (Node->getSimpleValueType(0).SimpleTy) {
+  default: return false; // No libcall for vector types.
+  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
+  }
+
+  return TLI.getLibcallName(LC) != nullptr;
+}
+
+/// Issue divrem if both quotient and remainder are needed.
+SDValue DAGCombiner::useDivRem(SDNode *Node) {
+  if (Node->use_empty())
+    return SDValue(); // This is a dead node, leave it alone.
+
+  EVT VT = Node->getValueType(0);
+  if (!TLI.isTypeLegal(VT))
+    return SDValue();
+
+  unsigned Opcode = Node->getOpcode();
+  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
+
+  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+  // If DIVREM is going to get expanded into a libcall,
+  // but there is no libcall available, then don't combine.
+  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
+      !isDivRemLibcallAvailable(Node, isSigned, TLI))
+    return SDValue();
+
+  // If div is legal, it's better to do the normal expansion
+  unsigned OtherOpcode = 0;
+  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
+    OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
+    if (TLI.isOperationLegalOrCustom(Opcode, VT))
+      return SDValue();
+  } else {
+    OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+    if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
+      return SDValue();
+  }
+
+  SDValue Op0 = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  SDValue combined;
+  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
+         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == Node || User->use_empty())
+      continue;
+    // Convert the other matching node(s), too;
+    // otherwise, the DIVREM may get target-legalized into something
+    // target-specific that we won't be able to recognize.
+    unsigned UserOpc = User->getOpcode();
+    if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
+        User->getOperand(0) == Op0 &&
+        User->getOperand(1) == Op1) {
+      if (!combined) {
+        if (UserOpc == OtherOpcode) {
+          SDVTList VTs = DAG.getVTList(VT, VT);
+          combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
+        } else if (UserOpc == DivRemOpc) {
+          combined = SDValue(User, 0);
+        } else {
+          assert(UserOpc == Opcode);
+          continue;
+        }
+      }
+      if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
+        CombineTo(User, combined);
+      else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
+        CombineTo(User, combined.getValue(1));
+    }
+  }
+  return combined;
+}
+
 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2156,26 +2234,26 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
+  SDLoc DL(N);
+
   // fold (sdiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
-    return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
+    return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
   // fold (sdiv X, 1) -> X
   if (N1C && N1C->isOne())
     return N0;
   // fold (sdiv X, -1) -> 0-X
-  if (N1C && N1C->isAllOnesValue()) {
-    SDLoc DL(N);
+  if (N1C && N1C->isAllOnesValue())
     return DAG.getNode(ISD::SUB, DL, VT,
                        DAG.getConstant(0, DL, VT), N0);
-  }
+
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   if (!VT.isVector()) {
     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-      return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
-                         N0, N1);
+      return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
   }
 
   // fold (sdiv X, pow2) -> simple ops after legalize
@@ -2186,18 +2264,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
       (N1C->getAPIntValue().isPowerOf2() ||
        (-N1C->getAPIntValue()).isPowerOf2())) {
-    // If dividing by powers of two is cheap, then don't perform the following
-    // fold.
-    if (TLI.isPow2SDivCheap())
-      return SDValue();
-
     // Target-specific implementation of sdiv x, pow2.
-    SDValue Res = BuildSDIVPow2(N);
-    if (Res.getNode())
+    if (SDValue Res = BuildSDIVPow2(N))
       return Res;
 
     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
-    SDLoc DL(N);
 
     // Splat the sign bit into the register
     SDValue SGN =
@@ -2228,15 +2299,23 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
   }
 
   // If integer divide is expensive and we satisfy the requirements, emit an
-  // alternate sequence.
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildSDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  // alternate sequence.  Targets may check function attributes for size/speed
+  // trade-offs.
+  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+    if (SDValue Op = BuildSDIV(N))
+      return Op;
+
+  // sdiv, srem -> sdivrem
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+  // Otherwise, we break the simplification logic in visitREM().
+  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+    if (SDValue DivRem = useDivRem(N))
+        return DivRem;
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2254,26 +2333,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
+  SDLoc DL(N);
+
   // fold (udiv c1, c2) -> c1/c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C)
-    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
+    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
                                                     N0C, N1C))
       return Folded;
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
-    SDLoc DL(N);
+  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
     return DAG.getNode(ISD::SRL, DL, VT, N0,
                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
                                        getShiftAmountTy(N0.getValueType())));
-  }
+
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
       if (SHC->getAPIntValue().isPowerOf2()) {
         EVT ADDVT = N1.getOperand(1).getValueType();
-        SDLoc DL(N);
         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
                                   N1.getOperand(1),
                                   DAG.getConstant(SHC->getAPIntValue()
@@ -2284,15 +2363,23 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
       }
     }
   }
+
   // fold (udiv x, c) -> alternate
-  if (N1C && !TLI.isIntDivCheap()) {
-    SDValue Op = BuildUDIV(N);
-    if (Op.getNode()) return Op;
-  }
+  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
+    if (SDValue Op = BuildUDIV(N))
+      return Op;
+
+  // sdiv, srem -> sdivrem
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
+  // Otherwise, we break the simplification logic in visitREM().
+  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
+    if (SDValue DivRem = useDivRem(N))
+        return DivRem;
 
   // undef / X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
   // X / undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2300,102 +2387,83 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitSREM(SDNode *N) {
+// handles ISD::SREM and ISD::UREM
+SDValue DAGCombiner::visitREM(SDNode *N) {
+  unsigned Opcode = N->getOpcode();
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
+  bool isSigned = (Opcode == ISD::SREM);
+  SDLoc DL(N);
 
-  // fold (srem c1, c2) -> c1%c2
+  // fold (rem c1, c2) -> c1%c2
   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C)
-    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
-                                                    N0C, N1C))
+    if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
       return Folded;
-  // If we know the sign bits of both operands are zero, strength reduce to a
-  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
-  if (!VT.isVector()) {
-    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-      return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
-  }
 
-  // If X/C can be simplified by the division-by-constant logic, lower
-  // X%C to the equivalent of X-X/C*C.
-  if (N1C && !N1C->isNullValue()) {
-    SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
-    AddToWorklist(Div.getNode());
-    SDValue OptimizedDiv = combine(Div.getNode());
-    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
-      SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
-                                OptimizedDiv, N1);
-      SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
-      AddToWorklist(Mul.getNode());
-      return Sub;
+  if (isSigned) {
+    // If we know the sign bits of both operands are zero, strength reduce to a
+    // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+    if (!VT.isVector()) {
+      if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+        return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
     }
-  }
-
-  // undef % X -> 0
-  if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, SDLoc(N), VT);
-  // X % undef -> undef
-  if (N1.getOpcode() == ISD::UNDEF)
-    return N1;
-
-  return SDValue();
-}
-
-SDValue DAGCombiner::visitUREM(SDNode *N) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  EVT VT = N->getValueType(0);
-
-  // fold (urem c1, c2) -> c1%c2
-  ConstantSDNode *N0C = isConstOrConstSplat(N0);
-  ConstantSDNode *N1C = isConstOrConstSplat(N1);
-  if (N0C && N1C)
-    if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
-                                                    N0C, N1C))
-      return Folded;
-  // fold (urem x, pow2) -> (and x, pow2-1)
-  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
-      N1C->getAPIntValue().isPowerOf2()) {
-    SDLoc DL(N);
-    return DAG.getNode(ISD::AND, DL, VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
-  }
-  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
-  if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
-      if (SHC->getAPIntValue().isPowerOf2()) {
-        SDLoc DL(N);
-        SDValue Add =
-          DAG.getNode(ISD::ADD, DL, VT, N1,
+  } else {
+    // fold (urem x, pow2) -> (and x, pow2-1)
+    if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
+        N1C->getAPIntValue().isPowerOf2()) {
+      return DAG.getNode(ISD::AND, DL, VT, N0,
+                         DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+    }
+    // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+    if (N1.getOpcode() == ISD::SHL) {
+      if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
+        if (SHC->getAPIntValue().isPowerOf2()) {
+          SDValue Add =
+            DAG.getNode(ISD::ADD, DL, VT, N1,
                  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
                                  VT));
-        AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+          AddToWorklist(Add.getNode());
+          return DAG.getNode(ISD::AND, DL, VT, N0, Add);
+        }
       }
     }
   }
 
+  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+
   // If X/C can be simplified by the division-by-constant logic, lower
   // X%C to the equivalent of X-X/C*C.
-  if (N1C && !N1C->isNullValue()) {
-    SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
+  // To avoid mangling nodes, this simplification requires that the combine()
+  // call for the speculative DIV must not cause a DIVREM conversion.  We guard
+  // against this by skipping the simplification if isIntDivCheap().  When
+  // div is not cheap, combine will not return a DIVREM.  Regardless,
+  // checking cheapness here makes sense since the simplification results in
+  // fatter code.
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
+    unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+    SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
     AddToWorklist(Div.getNode());
     SDValue OptimizedDiv = combine(Div.getNode());
     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
-      SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
-                                OptimizedDiv, N1);
-      SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
+      assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
+             (OptimizedDiv.getOpcode() != ISD::SDIVREM));
+      SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
       AddToWorklist(Mul.getNode());
       return Sub;
     }
   }
 
+  // sdiv, srem -> sdivrem
+  if (SDValue DivRem = useDivRem(N))
+    return DivRem.getValue(1);
+
   // undef % X -> 0
   if (N0.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(0, SDLoc(N), VT);
+    return DAG.getConstant(0, DL, VT);
   // X % undef -> undef
   if (N1.getOpcode() == ISD::UNDEF)
     return N1;
@@ -2532,8 +2600,8 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 }
 
 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
+    return Res;
 
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
@@ -2563,8 +2631,8 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
-  if (Res.getNode()) return Res;
+  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
+    return Res;
 
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
@@ -2613,16 +2681,26 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) {
   return SDValue();
 }
 
-SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
-  if (Res.getNode()) return Res;
+SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
 
-  return SDValue();
-}
+  // fold vector ops
+  if (VT.isVector())
+    if (SDValue FoldedVOp = SimplifyVBinOp(N))
+      return FoldedVOp;
 
-SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
-  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
-  if (Res.getNode()) return Res;
+  // fold (add c1, c2) -> c1+c2
+  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+  ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
+
+  // canonicalize constant to RHS
+  if (isConstantIntBuildVectorOrConstantInt(N0) &&
+     !isConstantIntBuildVectorOrConstantInt(N1))
+    return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
 
   return SDValue();
 }
@@ -2848,10 +2926,13 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
       if (Result != ISD::SETCC_INVALID &&
           (!LegalOperations ||
            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-                            getSetCCResultType(N0.getSimpleValueType())))))
-        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
-                            LL, LR, Result);
+            TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+        EVT CCVT = getSetCCResultType(LL.getValueType());
+        if (N0.getValueType() == CCVT ||
+            (!LegalOperations && N0.getValueType() == MVT::i1))
+          return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                              LL, LR, Result);
+      }
     }
   }
 
@@ -2887,6 +2968,46 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
   return SDValue();
 }
 
+bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
+                                   EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
+                                   bool &NarrowLoad) {
+  uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
+
+  if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
+    return false;
+
+  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+  LoadedVT = LoadN->getMemoryVT();
+
+  if (ExtVT == LoadedVT &&
+      (!LegalOperations ||
+       TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
+    // ZEXTLOAD will match without needing to change the size of the value being
+    // loaded.
+    NarrowLoad = false;
+    return true;
+  }
+
+  // Do not change the width of a volatile load.
+  if (LoadN->isVolatile())
+    return false;
+
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
+    return false;
+
+  if (LegalOperations &&
+      !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
+    return false;
+
+  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
+    return false;
+
+  NarrowLoad = true;
+  return true;
+}
+
 SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -3079,16 +3200,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       : cast<LoadSDNode>(N0);
     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
-      uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
-      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
-        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
-        EVT LoadedVT = LN0->getMemoryVT();
-        EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
-        if (ExtVT == LoadedVT &&
-            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
-                                                    ExtVT))) {
-
+      auto NarrowLoad = false;
+      EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+      EVT ExtVT, LoadedVT;
+      if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
+                           NarrowLoad)) {
+        if (!NarrowLoad) {
           SDValue NewLoad =
             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
@@ -3096,14 +3213,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
           AddToWorklist(N);
           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
-        }
-
-        // Do not change the width of a volatile load.
-        // Do not generate loads of non-round integer types since these can
-        // be expensive (and would be wrong if the type is not byte sized).
-        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
-            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
-                                                    ExtVT))) {
+        } else {
           EVT PtrType = LN0->getOperand(1).getValueType();
 
           unsigned Alignment = LN0->getAlignment();
@@ -3142,10 +3252,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return Combined;
 
   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
 
   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   // fold (and (sra)) -> (and (srl)) when possible.
@@ -3507,10 +3616,13 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
       if (Result != ISD::SETCC_INVALID &&
           (!LegalOperations ||
            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
-            TLI.isOperationLegal(ISD::SETCC,
-              getSetCCResultType(N0.getValueType())))))
-        return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
-                            LL, LR, Result);
+            TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
+        EVT CCVT = getSetCCResultType(LL.getValueType());
+        if (N0.getValueType() == CCVT ||
+            (!LegalOperations && N0.getValueType() == MVT::i1))
+          return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
+                              LL, LR, Result);
+      }
     }
   }
 
@@ -3665,11 +3777,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     return Combined;
 
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
-  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
     return BSwap;
-  BSwap = MatchBSwapHWordLow(N, N0, N1);
-  if (BSwap.getNode())
+  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
     return BSwap;
 
   // reassociate or
@@ -3690,10 +3800,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     }
   }
   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
 
   // See if this is some rotate idiom.
   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
@@ -3710,7 +3819,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
   if (Op.getOpcode() == ISD::AND) {
-    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+    if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
       Mask = Op.getOperand(1);
       Op = Op.getOperand(0);
     } else {
@@ -3727,105 +3836,106 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
 }
 
 // Return true if we can prove that, whenever Neg and Pos are both in the
-// range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos).  This means that
+// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
 //
 //     (or (shift1 X, Neg), (shift2 X, Pos))
 //
 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
-// in direction shift1 by Neg.  The range [0, OpSize) means that we only need
+// in direction shift1 by Neg.  The range [0, EltSize) means that we only need
 // to consider shift amounts with defined behavior.
-static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
-  // If OpSize is a power of 2 then:
+static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
+  // If EltSize is a power of 2 then:
   //
-  //  (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
-  //  (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
+  //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
+  //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
   //
-  // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
+  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
   // for the stronger condition:
   //
-  //     Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1)    [A]
+  //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
   //
-  // for all Neg and Pos.  Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
+  // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
   // we can just replace Neg with Neg' for the rest of the function.
   //
   // In other cases we check for the even stronger condition:
   //
-  //     Neg == OpSize - Pos                                    [B]
+  //     Neg == EltSize - Pos                                    [B]
   //
   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
-  // behavior if Pos == 0 (and consequently Neg == OpSize).
+  // behavior if Pos == 0 (and consequently Neg == EltSize).
   //
-  // We could actually use [A] whenever OpSize is a power of 2, but the
+  // We could actually use [A] whenever EltSize is a power of 2, but the
   // only extra cases that it would match are those uninteresting ones
   // where Neg and Pos are never in range at the same time.  E.g. for
-  // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
+  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   // as well as (sub 32, Pos), but:
   //
   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   //
   // always invokes undefined behavior for 32-bit X.
   //
-  // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
+  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
   unsigned MaskLoBits = 0;
-  if (Neg.getOpcode() == ISD::AND &&
-      isPowerOf2_64(OpSize) &&
-      Neg.getOperand(1).getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
-    Neg = Neg.getOperand(0);
-    MaskLoBits = Log2_64(OpSize);
+  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+    if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
+      if (NegC->getAPIntValue() == EltSize - 1) {
+        Neg = Neg.getOperand(0);
+        MaskLoBits = Log2_64(EltSize);
+      }
+    }
   }
 
   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   if (Neg.getOpcode() != ISD::SUB)
-    return 0;
-  ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0));
+    return false;
+  ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
   if (!NegC)
-    return 0;
+    return false;
   SDValue NegOp1 = Neg.getOperand(1);
 
-  // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
+  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   // Pos'.  The truncation is redundant for the purpose of the equality.
-  if (MaskLoBits &&
-      Pos.getOpcode() == ISD::AND &&
-      Pos.getOperand(1).getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
-    Pos = Pos.getOperand(0);
+  if (MaskLoBits && Pos.getOpcode() == ISD::AND)
+    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+      if (PosC->getAPIntValue() == EltSize - 1)
+        Pos = Pos.getOperand(0);
 
   // The condition we need is now:
   //
-  //     (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
+  //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
   //
   // If NegOp1 == Pos then we need:
   //
-  //              OpSize & Mask == NegC & Mask
+  //              EltSize & Mask == NegC & Mask
   //
   // (because "x & Mask" is a truncation and distributes through subtraction).
   APInt Width;
   if (Pos == NegOp1)
     Width = NegC->getAPIntValue();
+
   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   // Then the condition we want to prove becomes:
   //
-  //     (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
+  //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
   //
   // which, again because "x & Mask" is a truncation, becomes:
   //
-  //                NegC & Mask == (OpSize - PosC) & Mask
-  //              OpSize & Mask == (NegC + PosC) & Mask
-  else if (Pos.getOpcode() == ISD::ADD &&
-           Pos.getOperand(0) == NegOp1 &&
-           Pos.getOperand(1).getOpcode() == ISD::Constant)
-    Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
-             NegC->getAPIntValue());
-  else
+  //                NegC & Mask == (EltSize - PosC) & Mask
+  //             EltSize & Mask == (NegC + PosC) & Mask
+  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
+    if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
+      Width = PosC->getAPIntValue() + NegC->getAPIntValue();
+    else
+      return false;
+  } else
     return false;
 
-  // Now we just need to check that OpSize & Mask == Width & Mask.
+  // Now we just need to check that EltSize & Mask == Width & Mask.
   if (MaskLoBits)
-    // Opsize & Mask is 0 since Mask is Opsize - 1.
+    // EltSize & Mask is 0 since Mask is EltSize - 1.
     return Width.getLoBits(MaskLoBits) == 0;
-  return Width == OpSize;
+  return Width == EltSize;
 }
 
 // A subroutine of MatchRotate used once we have found an OR of two opposite
@@ -3845,7 +3955,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   //          (srl x, (*ext y))) ->
   //   (rotr x, y) or (rotl x, (sub 32, y))
   EVT VT = Shifted.getValueType();
-  if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
                        HasPos ? Pos : Neg).getNode();
@@ -3888,10 +3998,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
   if (RHSShift.getOpcode() == ISD::SHL) {
     std::swap(LHS, RHS);
     std::swap(LHSShift, RHSShift);
-    std::swap(LHSMask , RHSMask );
+    std::swap(LHSMask, RHSMask);
   }
 
-  unsigned OpSizeInBits = VT.getSizeInBits();
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
   SDValue LHSShiftArg = LHSShift.getOperand(0);
   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   SDValue RHSShiftArg = RHSShift.getOperand(0);
@@ -3899,11 +4009,10 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
 
   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
-  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
-      RHSShiftAmt.getOpcode() == ISD::Constant) {
-    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
-    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
-    if ((LShVal + RShVal) != OpSizeInBits)
+  if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
+    uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
+    uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
+    if ((LShVal + RShVal) != EltSizeInBits)
       return nullptr;
 
     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
@@ -3911,18 +4020,23 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
 
     // If there is an AND of either shifted operand, apply it to the result.
     if (LHSMask.getNode() || RHSMask.getNode()) {
-      APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+      APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
+      SDValue Mask = DAG.getConstant(AllBits, DL, VT);
 
       if (LHSMask.getNode()) {
-        APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
-        Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+        APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
+        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+                           DAG.getNode(ISD::OR, DL, VT, LHSMask,
+                                       DAG.getConstant(RHSBits, DL, VT)));
       }
       if (RHSMask.getNode()) {
-        APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
-        Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+        APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
+        Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
+                           DAG.getNode(ISD::OR, DL, VT, RHSMask,
+                                       DAG.getConstant(LHSBits, DL, VT)));
       }
 
-      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
     }
 
     return Rot.getNode();
@@ -4112,10 +4226,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   }
 
   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
-  if (N0.getOpcode() == N1.getOpcode()) {
-    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
-    if (Tmp.getNode()) return Tmp;
-  }
+  if (N0.getOpcode() == N1.getOpcode())
+    if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
+      return Tmp;
 
   // Simplify the expression using non-local knowledge.
   if (!VT.isVector() &&
@@ -4434,12 +4547,19 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   }
 
-  if (N1C && !N1C->isOpaque()) {
-    SDValue NewSHL = visitShiftByConstant(N, N1C);
-    if (NewSHL.getNode())
-      return NewSHL;
+  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
+  if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
+      if (SDValue Folded =
+              DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
+        return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
+    }
   }
 
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+      return NewSHL;
+
   return SDValue();
 }
 
@@ -4583,11 +4703,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
 
-  if (N1C && !N1C->isOpaque()) {
-    SDValue NewSRA = visitShiftByConstant(N, N1C);
-    if (NewSRA.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSRA = visitShiftByConstant(N, N1C))
       return NewSRA;
-  }
 
   return SDValue();
 }
@@ -4744,8 +4862,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
       N1.getOperand(0).getOpcode() == ISD::AND) {
-    SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
-    if (NewOp1.getNode())
+    if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   }
 
@@ -4754,15 +4871,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  if (N1C && !N1C->isOpaque()) {
-    SDValue NewSRL = visitShiftByConstant(N, N1C);
-    if (NewSRL.getNode())
+  if (N1C && !N1C->isOpaque())
+    if (SDValue NewSRL = visitShiftByConstant(N, N1C))
       return NewSRL;
-  }
 
   // Attempt to convert a srl of a load into a narrower zero-extending load.
-  SDValue NarrowLoad = ReduceLoadWidth(N);
-  if (NarrowLoad.getNode())
+  if (SDValue NarrowLoad = ReduceLoadWidth(N))
     return NarrowLoad;
 
   // Here is a common situation. We want to optimize:
@@ -4973,6 +5087,79 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   if (SimplifySelectOps(N, N1, N2))
     return SDValue(N, 0);  // Don't revisit N.
 
+  if (VT0 == MVT::i1) {
+    // The code in this block deals with the following 2 equivalences:
+    //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+    //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+    // The target can specify its prefered form with the
+    // shouldNormalizeToSelectSequence() callback. However we always transform
+    // to the right anyway if we find the inner select exists in the DAG anyway
+    // and we always transform to the left side if we know that we can further
+    // optimize the combination of the conditions.
+    bool normalizeToSequence
+      = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+    // select (and Cond0, Cond1), X, Y
+    //   -> select Cond0, (select Cond1, X, Y), Y
+    if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+      SDValue Cond0 = N0->getOperand(0);
+      SDValue Cond1 = N0->getOperand(1);
+      SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                        N1.getValueType(), Cond1, N1, N2);
+      if (normalizeToSequence || !InnerSelect.use_empty())
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
+                           InnerSelect, N2);
+    }
+    // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
+    if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+      SDValue Cond0 = N0->getOperand(0);
+      SDValue Cond1 = N0->getOperand(1);
+      SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
+                                        N1.getValueType(), Cond1, N1, N2);
+      if (normalizeToSequence || !InnerSelect.use_empty())
+        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
+                           InnerSelect);
+    }
+
+    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
+    if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
+      SDValue N1_0 = N1->getOperand(0);
+      SDValue N1_1 = N1->getOperand(1);
+      SDValue N1_2 = N1->getOperand(2);
+      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+        // Create the actual and node if we can generate good code for it.
+        if (!normalizeToSequence) {
+          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
+                                    N0, N1_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
+                             N1_1, N2);
+        }
+        // Otherwise see if we can optimize the "and" to a better pattern.
+        if (SDValue Combined = visitANDLike(N0, N1_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1_1, N2);
+      }
+    }
+    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
+    if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
+      SDValue N2_0 = N2->getOperand(0);
+      SDValue N2_1 = N2->getOperand(1);
+      SDValue N2_2 = N2->getOperand(2);
+      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+        // Create the actual or node if we can generate good code for it.
+        if (!normalizeToSequence) {
+          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
+                                   N0, N2_0);
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
+                             N1, N2_2);
+        }
+        // Otherwise see if we can optimize to a better pattern.
+        if (SDValue Combined = visitORLike(N0, N2_0, N))
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
+                             N1, N2_2);
+      }
+    }
+  }
+
   // fold selects based on a setcc into other things, such as min/max/abs
   if (N0.getOpcode() == ISD::SETCC) {
     // select x, y (fcmp lt x, y) -> fminnum x, y
@@ -4990,10 +5177,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
 
-      SDValue FMinMax =
-          combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
-                              N1, N2, CC, TLI, DAG);
-      if (FMinMax)
+      if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
+                                                N0.getOperand(1), N1, N2, CC,
+                                                TLI, DAG))
         return FMinMax;
     }
 
@@ -5006,69 +5192,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     return SimplifySelect(SDLoc(N), N0, N1, N2);
   }
 
-  if (VT0 == MVT::i1) {
-    if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
-      // select (and Cond0, Cond1), X, Y
-      //   -> select Cond0, (select Cond1, X, Y), Y
-      if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
-        SDValue Cond0 = N0->getOperand(0);
-        SDValue Cond1 = N0->getOperand(1);
-        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
-                                          N1.getValueType(), Cond1, N1, N2);
-        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
-                           InnerSelect, N2);
-      }
-      // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
-      if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
-        SDValue Cond0 = N0->getOperand(0);
-        SDValue Cond1 = N0->getOperand(1);
-        SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
-                                          N1.getValueType(), Cond1, N1, N2);
-        return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
-                           InnerSelect);
-      }
-    }
-
-    // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
-    if (N1->getOpcode() == ISD::SELECT) {
-      SDValue N1_0 = N1->getOperand(0);
-      SDValue N1_1 = N1->getOperand(1);
-      SDValue N1_2 = N1->getOperand(2);
-      if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
-        // Create the actual and node if we can generate good code for it.
-        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
-          SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
-                                    N0, N1_0);
-          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
-                             N1_1, N2);
-        }
-        // Otherwise see if we can optimize the "and" to a better pattern.
-        if (SDValue Combined = visitANDLike(N0, N1_0, N))
-          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
-                             N1_1, N2);
-      }
-    }
-    // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
-    if (N2->getOpcode() == ISD::SELECT) {
-      SDValue N2_0 = N2->getOperand(0);
-      SDValue N2_1 = N2->getOperand(1);
-      SDValue N2_2 = N2->getOperand(2);
-      if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
-        // Create the actual or node if we can generate good code for it.
-        if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
-          SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
-                                   N0, N2_0);
-          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
-                             N1, N2_2);
-        }
-        // Otherwise see if we can optimize to a better pattern.
-        if (SDValue Combined = visitORLike(N0, N2_0, N))
-          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
-                             N1, N2_2);
-      }
-    }
-  }
-
   return SDValue();
 }
 
@@ -5523,8 +5646,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
       N2.getOpcode() == ISD::CONCAT_VECTORS &&
       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
-    SDValue CV = ConvertSelectToConcatVector(N, DAG);
-    if (CV.getNode())
+    if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
       return CV;
   }
 
@@ -5580,7 +5702,20 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
                        SDLoc(N));
 }
 
-/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or 
+SDValue DAGCombiner::visitSETCCE(SDNode *N) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Carry = N->getOperand(2);
+  SDValue Cond = N->getOperand(3);
+
+  // If Carry is false, fold to a regular SETCC.
+  if (Carry.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
+
+  return SDValue();
+}
+
+/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
 /// a build_vector of constants.
 /// This function is called by the DAGCombiner when visiting sext/zext/aext
 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
@@ -5837,8 +5972,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   if (N0.getOpcode() == ISD::TRUNCATE) {
     // fold (sext (truncate (load x))) -> (sext (smaller load x))
     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
       SDNode* oye = N0.getNode()->getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
@@ -6024,7 +6158,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
 
     if (!VT.isVector()) {
       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
-      if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
         SDLoc DL(N);
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
@@ -6120,8 +6255,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
       SDNode* oye = N0.getNode()->getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
@@ -6133,32 +6267,45 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   }
 
   // fold (zext (truncate x)) -> (and x, mask)
-  if (N0.getOpcode() == ISD::TRUNCATE &&
-      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
-
+  if (N0.getOpcode() == ISD::TRUNCATE) {
     // fold (zext (truncate (load x))) -> (zext (smaller load x))
     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
+      SDNode *oye = N0.getNode()->getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
         AddToWorklist(oye);
       }
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      return SDValue(N, 0); // Return N so it doesn't get rechecked!
     }
 
-    SDValue Op = N0.getOperand(0);
-    if (Op.getValueType().bitsLT(VT)) {
-      Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
-      AddToWorklist(Op.getNode());
-    } else if (Op.getValueType().bitsGT(VT)) {
-      Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
-      AddToWorklist(Op.getNode());
+    EVT SrcVT = N0.getOperand(0).getValueType();
+    EVT MinVT = N0.getValueType();
+
+    // Try to mask before the extension to avoid having to generate a larger mask,
+    // possibly over several sub-vectors.
+    if (SrcVT.bitsLT(VT)) {
+      if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
+                               TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
+        SDValue Op = N0.getOperand(0);
+        Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+        AddToWorklist(Op.getNode());
+        return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+      }
+    }
+
+    if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
+      SDValue Op = N0.getOperand(0);
+      if (SrcVT.bitsLT(VT)) {
+        Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
+        AddToWorklist(Op.getNode());
+      } else if (SrcVT.bitsGT(VT)) {
+        Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
+        AddToWorklist(Op.getNode());
+      }
+      return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
     }
-    return DAG.getZeroExtendInReg(Op, SDLoc(N),
-                                  N0.getValueType().getScalarType());
   }
 
   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -6219,6 +6366,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
 
   // fold (zext (and/or/xor (load x), cst)) ->
   //      (and/or/xor (zextload x), (zext cst))
+  // Unless (and (load x) cst) will match as a zextload already and has
+  // additional users.
   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
        N0.getOpcode() == ISD::XOR) &&
       isa<LoadSDNode>(N0.getOperand(0)) &&
@@ -6229,9 +6378,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
       bool DoXform = true;
       SmallVector<SDNode*, 4> SetCCs;
-      if (!N0.hasOneUse())
-        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
-                                          SetCCs, TLI);
+      if (!N0.hasOneUse()) {
+        if (N0.getOpcode() == ISD::AND) {
+          auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
+          auto NarrowLoad = false;
+          EVT LoadResultTy = AndC->getValueType(0);
+          EVT ExtVT, LoadedVT;
+          if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
+                               NarrowLoad))
+            DoXform = false;
+        }
+        if (DoXform)
+          DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
+                                            ISD::ZERO_EXTEND, SetCCs, TLI);
+      }
       if (DoXform) {
         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
                                          LN0->getChain(), LN0->getBasePtr(),
@@ -6378,8 +6538,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   // fold (aext (truncate (load x))) -> (aext (smaller load x))
   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
-    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
-    if (NarrowLoad.getNode()) {
+    if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
       SDNode* oye = N0.getNode()->getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
@@ -6546,8 +6705,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
       // Watch out for shift count overflow though.
       if (Amt >= Mask.getBitWidth()) break;
       APInt NewMask = Mask << Amt;
-      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
-      if (SimplifyLHS.getNode())
+      if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
                            SimplifyLHS, V.getOperand(1));
     }
@@ -6736,8 +6894,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   unsigned VTBits = VT.getScalarType().getSizeInBits();
   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
 
+  if (N0.isUndef())
+    return DAG.getUNDEF(VT);
+
   // fold (sext_in_reg c1) -> c1
-  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+  if (isConstantIntBuildVectorOrConstantInt(N0))
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
 
   // If the input is already sign extended, just drop the extension.
@@ -6771,8 +6932,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
 
   // fold (sext_in_reg (load x)) -> (smaller sextload x)
   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
-  SDValue NarrowLoad = ReduceLoadWidth(N);
-  if (NarrowLoad.getNode())
+  if (SDValue NarrowLoad = ReduceLoadWidth(N))
     return NarrowLoad;
 
   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
@@ -6831,29 +6991,6 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
                          BSwap, N1);
   }
 
-  // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
-  // into a build_vector.
-  if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
-    SmallVector<SDValue, 8> Elts;
-    unsigned NumElts = N0->getNumOperands();
-    unsigned ShAmt = VTBits - EVTBits;
-
-    for (unsigned i = 0; i != NumElts; ++i) {
-      SDValue Op = N0->getOperand(i);
-      if (Op->getOpcode() == ISD::UNDEF) {
-        Elts.push_back(Op);
-        continue;
-      }
-
-      ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
-      const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
-      Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
-                                     SDLoc(Op), Op.getValueType()));
-    }
-
-    return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
-  }
-
   return SDValue();
 }
 
@@ -6999,9 +7136,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   // fold (truncate (load x)) -> (smaller load x)
   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
-    SDValue Reduced = ReduceLoadWidth(N);
-    if (Reduced.getNode())
+    if (SDValue Reduced = ReduceLoadWidth(N))
       return Reduced;
+
     // Handle the case where the load remains an extending load even
     // after truncation.
     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
@@ -7107,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   return SDValue();
 }
 
+static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
+  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
+  // and Lo parts; on big-endian machines it doesn't.
+  return DAG.getDataLayout().isBigEndian() ? 1 : 0;
+}
+
 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -7173,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
 
   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+  //
+  // For ppc_fp128:
+  // fold (bitcast (fneg x)) ->
+  //     flipbit = signbit
+  //     (xor (bitcast x) (build_pair flipbit, flipbit))
+  // fold (bitcast (fabs x)) ->
+  //     flipbit = (and (extract_element (bitcast x), 0), signbit)
+  //     (xor (bitcast x) (build_pair flipbit, flipbit))
   // This often reduces constant pool loads.
   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
@@ -7183,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
     AddToWorklist(NewConv.getNode());
 
     SDLoc DL(N);
+    if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+      assert(VT.getSizeInBits() == 128);
+      SDValue SignBit = DAG.getConstant(
+          APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
+      SDValue FlipBit;
+      if (N0.getOpcode() == ISD::FNEG) {
+        FlipBit = SignBit;
+        AddToWorklist(FlipBit.getNode());
+      } else {
+        assert(N0.getOpcode() == ISD::FABS);
+        SDValue Hi =
+            DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
+                        DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+                                              SDLoc(NewConv)));
+        AddToWorklist(Hi.getNode());
+        FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
+        AddToWorklist(FlipBit.getNode());
+      }
+      SDValue FlipBits =
+          DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+      AddToWorklist(FlipBits.getNode());
+      return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
+    }
     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
     if (N0.getOpcode() == ISD::FNEG)
       return DAG.getNode(ISD::XOR, DL, VT,
@@ -7196,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   // Note that we don't handle (copysign x, cst) because this can always be
   // folded to an fneg or fabs.
+  //
+  // For ppc_fp128:
+  // fold (bitcast (fcopysign cst, x)) ->
+  //     flipbit = (and (extract_element
+  //                     (xor (bitcast cst), (bitcast x)), 0),
+  //                    signbit)
+  //     (xor (bitcast cst) (build_pair flipbit, flipbit))
   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
       VT.isInteger() && !VT.isVector()) {
@@ -7224,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
         AddToWorklist(X.getNode());
       }
 
+      if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
+        APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
+        SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
+                                  N0.getOperand(0));
+        AddToWorklist(Cst.getNode());
+        SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
+                                N0.getOperand(1));
+        AddToWorklist(X.getNode());
+        SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
+        AddToWorklist(XorResult.getNode());
+        SDValue XorResult64 = DAG.getNode(
+            ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
+            DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
+                                  SDLoc(XorResult)));
+        AddToWorklist(XorResult64.getNode());
+        SDValue FlipBit =
+            DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
+                        DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
+        AddToWorklist(FlipBit.getNode());
+        SDValue FlipBits =
+            DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
+        AddToWorklist(FlipBits.getNode());
+        return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
+      }
       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
@@ -7240,11 +7445,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   }
 
   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
-  if (N0.getOpcode() == ISD::BUILD_PAIR) {
-    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
-    if (CombineLD.getNode())
+  if (N0.getOpcode() == ISD::BUILD_PAIR)
+    if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
       return CombineLD;
-  }
 
   // Remove double bitcasts from shuffles - this is often a legacy of
   // XformToShuffleWithZero being used to combine bitmaskings (of
@@ -7257,10 +7460,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
 
     // If operands are a bitcast, peek through if it casts the original VT.
-    // If operands are a UNDEF or constant, just bitcast back to original VT.
+    // If operands are a constant, just bitcast back to original VT.
     auto PeekThroughBitcast = [&](SDValue Op) {
       if (Op.getOpcode() == ISD::BITCAST &&
-          Op.getOperand(0)->getValueType(0) == VT)
+          Op.getOperand(0).getValueType() == VT)
         return SDValue(Op.getOperand(0));
       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
@@ -7431,28 +7634,34 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   SDLoc SL(N);
 
   const TargetOptions &Options = DAG.getTarget().Options;
-  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                       Options.UnsafeFPMath);
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
 
   // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations &&
-                  TLI.isOperationLegal(ISD::FMAD, VT));
+  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
 
   // Floating-point multiply-add without intermediate rounding.
-  bool HasFMA = ((!LegalOperations ||
-                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
-                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-                 UnsafeFPMath);
+  bool HasFMA =
+      AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
   // No valid opcode, do not combine.
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
   // Always prefer FMAD to FMA for precision.
-  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   bool LookThroughFPExt = TLI.isFPExtFree(VT);
 
+  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
+  // prefer to fold the multiply with fewer uses.
+  if (Aggressive && N0.getOpcode() == ISD::FMUL &&
+      N1.getOpcode() == ISD::FMUL) {
+    if (N0.getNode()->use_size() > N1.getNode()->use_size())
+      std::swap(N0, N1);
+  }
+
   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   if (N0.getOpcode() == ISD::FMUL &&
       (Aggressive || N0->hasOneUse())) {
@@ -7469,7 +7678,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // Look through FP_EXTEND nodes to do more combining.
-  if (UnsafeFPMath && LookThroughFPExt) {
+  if (AllowFusion && LookThroughFPExt) {
     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
     if (N0.getOpcode() == ISD::FP_EXTEND) {
       SDValue N00 = N0.getOperand(0);
@@ -7495,7 +7704,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   }
 
   // More folding opportunities when target permits.
-  if ((UnsafeFPMath || HasFMAD)  && Aggressive) {
+  if ((AllowFusion || HasFMAD)  && Aggressive) {
     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
     if (N0.getOpcode() == PreferredFusedOpcode &&
         N0.getOperand(2).getOpcode() == ISD::FMUL) {
@@ -7518,7 +7727,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
                                      N0));
     }
 
-    if (UnsafeFPMath && LookThroughFPExt) {
+    if (AllowFusion && LookThroughFPExt) {
       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
       auto FoldFAddFMAFPExtFMul = [&] (
@@ -7608,25 +7817,23 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   SDLoc SL(N);
 
   const TargetOptions &Options = DAG.getTarget().Options;
-  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                       Options.UnsafeFPMath);
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
 
   // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations &&
-                  TLI.isOperationLegal(ISD::FMAD, VT));
+  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
 
   // Floating-point multiply-add without intermediate rounding.
-  bool HasFMA = ((!LegalOperations ||
-                  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
-                 TLI.isFMAFasterThanFMulAndFAdd(VT) &&
-                 UnsafeFPMath);
+  bool HasFMA =
+      AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
   // No valid opcode, do not combine.
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
   // Always prefer FMAD to FMA for precision.
-  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   bool LookThroughFPExt = TLI.isFPExtFree(VT);
 
@@ -7659,7 +7866,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // Look through FP_EXTEND nodes to do more combining.
-  if (UnsafeFPMath && LookThroughFPExt) {
+  if (AllowFusion && LookThroughFPExt) {
     // fold (fsub (fpext (fmul x, y)), z)
     //   -> (fma (fpext x), (fpext y), (fneg z))
     if (N0.getOpcode() == ISD::FP_EXTEND) {
@@ -7735,7 +7942,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   }
 
   // More folding opportunities when target permits.
-  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
+  if ((AllowFusion || HasFMAD) && Aggressive) {
     // fold (fsub (fma x, y, (fmul u, v)), z)
     //   -> (fma x, y (fma u, v, (fneg z)))
     if (N0.getOpcode() == PreferredFusedOpcode &&
@@ -7765,7 +7972,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
                                      N21, N0));
     }
 
-    if (UnsafeFPMath && LookThroughFPExt) {
+    if (AllowFusion && LookThroughFPExt) {
       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
       if (N0.getOpcode() == PreferredFusedOpcode) {
@@ -7866,14 +8073,97 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   return SDValue();
 }
 
+/// Try to perform FMA combining on a given FMUL node.
+SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc SL(N);
+
+  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  bool AllowFusion =
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
+
+  // Floating-point multiply-add with intermediate rounding.
+  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
+  // Floating-point multiply-add without intermediate rounding.
+  bool HasFMA =
+      AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+
+  // No valid opcode, do not combine.
+  if (!HasFMAD && !HasFMA)
+    return SDValue();
+
+  // Always prefer FMAD to FMA for precision.
+  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
+  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
+
+  // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
+  // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
+  auto FuseFADD = [&](SDValue X, SDValue Y) {
+    if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
+      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+      if (XC1 && XC1->isExactlyValue(+1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+      if (XC1 && XC1->isExactlyValue(-1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+    }
+    return SDValue();
+  };
+
+  if (SDValue FMA = FuseFADD(N0, N1))
+    return FMA;
+  if (SDValue FMA = FuseFADD(N1, N0))
+    return FMA;
+
+  // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
+  // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
+  // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
+  // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
+  auto FuseFSUB = [&](SDValue X, SDValue Y) {
+    if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
+      auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
+      if (XC0 && XC0->isExactlyValue(+1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+                           Y);
+      if (XC0 && XC0->isExactlyValue(-1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT,
+                           DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
+                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+
+      auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
+      if (XC1 && XC1->isExactlyValue(+1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
+                           DAG.getNode(ISD::FNEG, SL, VT, Y));
+      if (XC1 && XC1->isExactlyValue(-1.0))
+        return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
+    }
+    return SDValue();
+  };
+
+  if (SDValue FMA = FuseFSUB(N0, N1))
+    return FMA;
+  if (SDValue FMA = FuseFSUB(N1, N0))
+    return FMA;
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitFADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
+  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
   // fold vector ops
   if (VT.isVector())
@@ -7882,23 +8172,23 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
 
   // fold (fadd c1, c2) -> c1 + c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
 
   // canonicalize constant to RHS
   if (N0CFP && !N1CFP)
-    return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
 
   // fold (fadd A, (fneg B)) -> (fsub A, B)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
     return DAG.getNode(ISD::FSUB, DL, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations));
+                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
   // fold (fadd (fneg A), B) -> (fsub B, A)
   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
     return DAG.getNode(ISD::FSUB, DL, VT, N1,
-                       GetNegatedExpression(N0, DAG, LegalOperations));
+                       GetNegatedExpression(N0, DAG, LegalOperations), Flags);
 
   // If 'unsafe math' is enabled, fold lots of things.
   if (Options.UnsafeFPMath) {
@@ -7907,14 +8197,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     bool AllowNewConst = (Level < AfterLegalizeDAG);
 
     // fold (fadd A, 0) -> A
-    if (N1CFP && N1CFP->isZero())
-      return N0;
+    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+      if (N1C->isZero())
+        return N0;
 
     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
-        isa<ConstantFPSDNode>(N0.getOperand(1)))
+        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
-                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
+                         DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
+                                     Flags),
+                         Flags);
 
     // If allowed, fold (fadd (fneg x), x) -> 0.0
     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
@@ -7929,64 +8222,64 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     // of rounding steps.
     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
       if (N0.getOpcode() == ISD::FMUL) {
-        ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
-        ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
+        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
+        bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
 
         // (fadd (fmul x, c), x) -> (fmul x, c+1)
         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
-                                       DAG.getConstantFP(1.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+                                       DAG.getConstantFP(1.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
         }
 
         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
             N1.getOperand(0) == N1.getOperand(1) &&
             N0.getOperand(0) == N1.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
-                                       DAG.getConstantFP(2.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
+                                       DAG.getConstantFP(2.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
         }
       }
 
       if (N1.getOpcode() == ISD::FMUL) {
-        ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
-        ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1));
+        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
+        bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
 
         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
-                                       DAG.getConstantFP(1.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+                                       DAG.getConstantFP(1.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
         }
 
         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
             N0.getOperand(0) == N0.getOperand(1) &&
             N1.getOperand(0) == N0.getOperand(0)) {
-          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
-                                       DAG.getConstantFP(2.0, DL, VT));
-          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
+          SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
+                                       DAG.getConstantFP(2.0, DL, VT), Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
         }
       }
 
       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
-        ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0));
+        bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
-        if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
+        if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
             (N0.getOperand(0) == N1)) {
           return DAG.getNode(ISD::FMUL, DL, VT,
-                             N1, DAG.getConstantFP(3.0, DL, VT));
+                             N1, DAG.getConstantFP(3.0, DL, VT), Flags);
         }
       }
 
       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
-        ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0));
+        bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
             N1.getOperand(0) == N0) {
           return DAG.getNode(ISD::FMUL, DL, VT,
-                             N0, DAG.getConstantFP(3.0, DL, VT));
+                             N0, DAG.getConstantFP(3.0, DL, VT), Flags);
         }
       }
 
@@ -7996,15 +8289,14 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
           N0.getOperand(0) == N0.getOperand(1) &&
           N1.getOperand(0) == N1.getOperand(1) &&
           N0.getOperand(0) == N1.getOperand(0)) {
-        return DAG.getNode(ISD::FMUL, DL, VT,
-                           N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
+        return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
+                           DAG.getConstantFP(4.0, DL, VT), Flags);
       }
     }
   } // enable-unsafe-fp-math
 
   // FADD -> FMA combines:
-  SDValue Fused = visitFADDForFMACombine(N);
-  if (Fused) {
+  if (SDValue Fused = visitFADDForFMACombine(N)) {
     AddToWorklist(Fused.getNode());
     return Fused;
   }
@@ -8020,6 +8312,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
   const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
   // fold vector ops
   if (VT.isVector())
@@ -8028,12 +8321,12 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
+    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
     return DAG.getNode(ISD::FADD, dl, VT, N0,
-                       GetNegatedExpression(N1, DAG, LegalOperations));
+                       GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
   // If 'unsafe math' is enabled, fold lots of things.
   if (Options.UnsafeFPMath) {
@@ -8068,8 +8361,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   }
 
   // FSUB -> FMA combines:
-  SDValue Fused = visitFSUBForFMACombine(N);
-  if (Fused) {
+  if (SDValue Fused = visitFSUBForFMACombine(N)) {
     AddToWorklist(Fused.getNode());
     return Fused;
   }
@@ -8085,6 +8377,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
+  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
   // fold vector ops
   if (VT.isVector()) {
@@ -8095,12 +8388,12 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
 
   // fold (fmul c1, c2) -> c1*c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
+    return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
 
   // canonicalize constant to RHS
   if (isConstantFPBuildVectorOrConstantFP(N0) &&
      !isConstantFPBuildVectorOrConstantFP(N1))
-    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
+    return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
 
   // fold (fmul A, 1.0) -> A
   if (N1CFP && N1CFP->isExactlyValue(1.0))
@@ -8129,8 +8422,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
         // the second operand of the outer multiply are constants.
         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
-          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
-          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
+          SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
+          return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
         }
       }
     }
@@ -8139,16 +8432,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
     // during an early run of DAGCombiner can prevent folding with fmuls
     // inserted during lowering.
-    if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
+    if (N0.getOpcode() == ISD::FADD &&
+        (N0.getOperand(0) == N0.getOperand(1)) &&
+        N0.hasOneUse()) {
       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
-      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
-      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
+      SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
     }
   }
 
   // fold (fmul X, 2.0) -> (fadd X, X)
   if (N1CFP && N1CFP->isExactlyValue(+2.0))
-    return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
+    return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
 
   // fold (fmul X, -1.0) -> (fneg X)
   if (N1CFP && N1CFP->isExactlyValue(-1.0))
@@ -8163,10 +8458,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
       if (LHSNeg == 2 || RHSNeg == 2)
         return DAG.getNode(ISD::FMUL, DL, VT,
                            GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations));
+                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           Flags);
     }
   }
 
+  // FMUL -> FMA combines:
+  if (SDValue Fused = visitFMULForFMACombine(N)) {
+    AddToWorklist(Fused.getNode());
+    return Fused;
+  }
+
   return SDValue();
 }
 
@@ -8193,66 +8495,145 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N1CFP && N1CFP->isZero())
       return N2;
   }
+  // TODO: The FMA node should have flags that propagate to these nodes.
   if (N0CFP && N0CFP->isExactlyValue(1.0))
     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   if (N1CFP && N1CFP->isExactlyValue(1.0))
     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
 
   // Canonicalize (fma c, x, y) -> (fma x, c, y)
-  if (N0CFP && !N1CFP)
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
 
-  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
-  if (Options.UnsafeFPMath && N1CFP &&
-      N2.getOpcode() == ISD::FMUL &&
-      N0 == N2.getOperand(0) &&
-      N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
-    return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                       DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
-  }
+  // TODO: FMA nodes should have flags that propagate to the created nodes.
+  // For now, create a Flags object for use with all unsafe math transforms.
+  SDNodeFlags Flags;
+  Flags.setUnsafeAlgebra(true);
 
+  if (Options.UnsafeFPMath) {
+    // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+    if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+        isConstantFPBuildVectorOrConstantFP(N1) &&
+        isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
+      return DAG.getNode(ISD::FMUL, dl, VT, N0,
+                         DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+                                     &Flags), &Flags);
+    }
 
-  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
-  if (Options.UnsafeFPMath &&
-      N0.getOpcode() == ISD::FMUL && N1CFP &&
-      N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
-    return DAG.getNode(ISD::FMA, dl, VT,
-                       N0.getOperand(0),
-                       DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
-                       N2);
+    // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+    if (N0.getOpcode() == ISD::FMUL &&
+        isConstantFPBuildVectorOrConstantFP(N1) &&
+        isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
+      return DAG.getNode(ISD::FMA, dl, VT,
+                         N0.getOperand(0),
+                         DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+                                     &Flags),
+                         N2);
+    }
   }
 
   // (fma x, 1, y) -> (fadd x, y)
   // (fma x, -1, y) -> (fadd (fneg x), y)
   if (N1CFP) {
     if (N1CFP->isExactlyValue(1.0))
+      // TODO: The FMA node should have flags that propagate to this node.
       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
 
     if (N1CFP->isExactlyValue(-1.0) &&
         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
       AddToWorklist(RHSNeg.getNode());
+      // TODO: The FMA node should have flags that propagate to this node.
       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
     }
   }
 
-  // (fma x, c, x) -> (fmul x, (c+1))
-  if (Options.UnsafeFPMath && N1CFP && N0 == N2)
+  if (Options.UnsafeFPMath) {
+    // (fma x, c, x) -> (fmul x, (c+1))
+    if (N1CFP && N0 == N2) {
     return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                       DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(1.0, dl, VT)));
-
-  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
-  if (Options.UnsafeFPMath && N1CFP &&
-      N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
-    return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                       DAG.getNode(ISD::FADD, dl, VT,
-                                   N1, DAG.getConstantFP(-1.0, dl, VT)));
+                         DAG.getNode(ISD::FADD, dl, VT,
+                                     N1, DAG.getConstantFP(1.0, dl, VT),
+                                     &Flags), &Flags);
+    }
 
+    // (fma x, c, (fneg x)) -> (fmul x, (c-1))
+    if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
+      return DAG.getNode(ISD::FMUL, dl, VT, N0,
+                         DAG.getNode(ISD::FADD, dl, VT,
+                                     N1, DAG.getConstantFP(-1.0, dl, VT),
+                                     &Flags), &Flags);
+    }
+  }
 
   return SDValue();
 }
 
+// Combine multiple FDIVs with the same divisor into multiple FMULs by the
+// reciprocal.
+// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
+// Notice that this is not always beneficial. One reason is different target
+// may have different costs for FDIV and FMUL, so sometimes the cost of two
+// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
+// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
+SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
+  const SDNodeFlags *Flags = N->getFlags();
+  if (!UnsafeMath && !Flags->hasAllowReciprocal())
+    return SDValue();
+
+  // Skip if current node is a reciprocal.
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  if (N0CFP && N0CFP->isExactlyValue(1.0))
+    return SDValue();
+
+  // Exit early if the target does not want this transform or if there can't
+  // possibly be enough uses of the divisor to make the transform worthwhile.
+  SDValue N1 = N->getOperand(1);
+  unsigned MinUses = TLI.combineRepeatedFPDivisors();
+  if (!MinUses || N1->use_size() < MinUses)
+    return SDValue();
+
+  // Find all FDIV users of the same divisor.
+  // Use a set because duplicates may be present in the user list.
+  SetVector<SDNode *> Users;
+  for (auto *U : N1->uses()) {
+    if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
+      // This division is eligible for optimization only if global unsafe math
+      // is enabled or if this division allows reciprocal formation.
+      if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
+        Users.insert(U);
+    }
+  }
+
+  // Now that we have the actual number of divisor uses, make sure it meets
+  // the minimum threshold specified by the target.
+  if (Users.size() < MinUses)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
+  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
+
+  // Dividend / Divisor -> Dividend * Reciprocal
+  for (auto *U : Users) {
+    SDValue Dividend = U->getOperand(0);
+    if (Dividend != FPOne) {
+      SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
+                                    Reciprocal, Flags);
+      CombineTo(U, NewNode);
+    } else if (U != Reciprocal.getNode()) {
+      // In the absence of fast-math-flags, this user node is always the
+      // same node as Reciprocal, but with FMF they may be different nodes.
+      CombineTo(U, Reciprocal);
+    }
+  }
+  return SDValue(N, 0);  // N was replaced.
+}
+
 SDValue DAGCombiner::visitFDIV(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -8261,6 +8642,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
+  SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
   // fold vector ops
   if (VT.isVector())
@@ -8269,7 +8651,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
 
   // fold (fdiv c1, c2) -> c1/c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
 
   if (Options.UnsafeFPMath) {
     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
@@ -8288,28 +8670,30 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
            TLI.isFPImmLegal(Recip, VT)))
         return DAG.getNode(ISD::FMUL, DL, VT, N0,
-                           DAG.getConstantFP(Recip, DL, VT));
+                           DAG.getConstantFP(Recip, DL, VT), Flags);
     }
 
     // If this FDIV is part of a reciprocal square root, it may be folded
     // into a target-specific square root estimate instruction.
     if (N1.getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
       }
     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+                                          Flags)) {
         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
         AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
       }
     } else if (N1.getOpcode() == ISD::FP_ROUND &&
                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
-      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
+      if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
+                                          Flags)) {
         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
         AddToWorklist(RV.getNode());
-        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+        return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
       }
     } else if (N1.getOpcode() == ISD::FMUL) {
       // Look through an FMUL. Even though this won't remove the FDIV directly,
@@ -8326,18 +8710,18 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
       if (SqrtOp.getNode()) {
         // We found a FSQRT, so try to make this fold:
         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
-        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
-          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
+        if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
+          RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
           AddToWorklist(RV.getNode());
-          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+          return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
         }
       }
     }
 
     // Fold into a reciprocal estimate and multiply instead of a real divide.
-    if (SDValue RV = BuildReciprocalEstimate(N1)) {
+    if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
       AddToWorklist(RV.getNode());
-      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
     }
   }
 
@@ -8349,52 +8733,13 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
       if (LHSNeg == 2 || RHSNeg == 2)
         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
                            GetNegatedExpression(N0, DAG, LegalOperations),
-                           GetNegatedExpression(N1, DAG, LegalOperations));
+                           GetNegatedExpression(N1, DAG, LegalOperations),
+                           Flags);
     }
   }
 
-  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
-  // reciprocal.
-  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-  // Notice that this is not always beneficial. One reason is different target
-  // may have different costs for FDIV and FMUL, so sometimes the cost of two
-  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
-  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
-  if (Options.UnsafeFPMath) {
-    // Skip if current node is a reciprocal.
-    if (N0CFP && N0CFP->isExactlyValue(1.0))
-      return SDValue();
-
-    // Find all FDIV users of the same divisor.
-    // Use a set because duplicates may be present in the user list.
-    SetVector<SDNode *> Users;
-    for (auto *U : N1->uses())
-      if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
-        Users.insert(U);
-
-    if (TLI.combineRepeatedFPDivisors(Users.size())) {
-      SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
-      // FIXME: This optimization requires some level of fast-math, so the
-      // created reciprocal node should at least have the 'allowReciprocal'
-      // fast-math-flag set.
-      SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
-
-      // Dividend / Divisor -> Dividend * Reciprocal
-      for (auto *U : Users) {
-        SDValue Dividend = U->getOperand(0);
-        if (Dividend != FPOne) {
-          SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
-                                        Reciprocal);
-          CombineTo(U, NewNode);
-        } else if (U != Reciprocal.getNode()) {
-          // In the absence of fast-math-flags, this user node is always the
-          // same node as Reciprocal, but with FMF they may be different nodes.
-          CombineTo(U, Reciprocal);
-        }
-      }
-      return SDValue(N, 0);  // N was replaced.
-    }
-  }
+  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
+    return CombineRepeatedDivisors;
 
   return SDValue();
 }
@@ -8408,7 +8753,8 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 
   // fold (frem c1, c2) -> fmod(c1,c2)
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
+    return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
+                       &cast<BinaryWithFlagsSDNode>(N)->Flags);
 
   return SDValue();
 }
@@ -8417,20 +8763,25 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
     return SDValue();
 
+  // TODO: FSQRT nodes should have flags that propagate to the created nodes.
+  // For now, create a Flags object for use with all unsafe math transforms.
+  SDNodeFlags Flags;
+  Flags.setUnsafeAlgebra(true);
+
   // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
-  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
+  SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
   if (!RV)
     return SDValue();
-  
+
   EVT VT = RV.getValueType();
   SDLoc DL(N);
-  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
+  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
   AddToWorklist(RV.getNode());
 
   // Unfortunately, RV is now NaN if the input was exactly 0.
   // Select out this case and force the answer to 0.
   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+  EVT CCVT = getSetCCResultType(VT);
   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
   AddToWorklist(ZeroCmp.getNode());
   AddToWorklist(RV.getNode());
@@ -8439,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
                      ZeroCmp, Zero, RV);
 }
 
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  // Do not optimize out type conversion of f128 type yet.
+  // For some target like x86_64, configuration is changed
+  // to keep one f128 value in one SSE register, but
+  // instruction selection cannot handle FCOPYSIGN on
+  // SSE registers yet.
+  SDValue N1 = N->getOperand(1);
+  EVT N1VT = N1->getValueType(0);
+  EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+  return (N1.getOpcode() == ISD::FP_EXTEND ||
+          N1.getOpcode() == ISD::FP_ROUND) &&
+         (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+}
+
 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -8482,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 
   // copysign(x, fp_extend(y)) -> copysign(x, y)
   // copysign(x, fp_round(y)) -> copysign(x, y)
-  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+  if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
                        N0, N1.getOperand(0));
 
@@ -8837,11 +9204,12 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       APFloat CVal = CFP1->getValueAPF();
       CVal.changeSign();
       if (Level >= AfterLegalizeDAG &&
-          (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
-           TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
-        return DAG.getNode(
-            ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
-            DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+          (TLI.isFPImmLegal(CVal, VT) ||
+           TLI.isOperationLegal(ISD::ConstantFP, VT)))
+        return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+                           DAG.getNode(ISD::FNEG, SDLoc(N), VT,
+                                       N0.getOperand(1)),
+                           &cast<BinaryWithFlagsSDNode>(N0)->Flags);
     }
   }
 
@@ -8851,20 +9219,20 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
 
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
+    return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
   }
 
-  if (N0CFP) {
-    EVT VT = N->getValueType(0);
-    // Canonicalize to constant on RHS.
+  // Canonicalize to constant on RHS.
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
-  }
 
   return SDValue();
 }
@@ -8872,20 +9240,20 @@ SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
-  const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-  const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
+  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
 
   if (N0CFP && N1CFP) {
     const APFloat &C0 = N0CFP->getValueAPF();
     const APFloat &C1 = N1CFP->getValueAPF();
-    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
+    return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
   }
 
-  if (N0CFP) {
-    EVT VT = N->getValueType(0);
-    // Canonicalize to constant on RHS.
+  // Canonicalize to constant on RHS.
+  if (isConstantFPBuildVectorOrConstantFP(N0) &&
+     !isConstantFPBuildVectorOrConstantFP(N1))
     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
-  }
 
   return SDValue();
 }
@@ -9034,8 +9402,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
     SDValue Op1 = TheXor->getOperand(1);
     if (Op0.getOpcode() == Op1.getOpcode()) {
       // Avoid missing important xor optimizations.
-      SDValue Tmp = visitXOR(TheXor);
-      if (Tmp.getNode()) {
+      if (SDValue Tmp = visitXOR(TheXor)) {
         if (Tmp.getNode() != TheXor) {
           DEBUG(dbgs() << "\nReplacing.8 ";
                 TheXor->dump(&DAG);
@@ -9722,8 +10089,8 @@ struct LoadedSlice {
     void addSliceGain(const LoadedSlice &LS) {
       // Each slice saves a truncate.
       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
-      if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
-                              LS.Inst->getOperand(0).getValueType()))
+      if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
+                              LS.Inst->getValueType(0)))
         ++Truncates;
       // If there is a shift amount, this slice gets rid of it.
       if (LS.Shift)
@@ -10625,30 +10992,109 @@ struct BaseIndexOffset {
 };
 } // namespace
 
+// This is a helper function for visitMUL to check the profitability
+// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
+// MulNode is the original multiply, AddNode is (add x, c1),
+// and ConstNode is c2.
+//
+// If the (add x, c1) has multiple uses, we could increase
+// the number of adds if we make this transformation.
+// It would only be worth doing this if we can remove a
+// multiply in the process. Check for that here.
+// To illustrate:
+//     (A + c1) * c3
+//     (A + c2) * c3
+// We're checking for cases where we have common "c3 * A" expressions.
+bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
+                                              SDValue &AddNode,
+                                              SDValue &ConstNode) {
+  APInt Val;
+
+  // If the add only has one use, this would be OK to do.
+  if (AddNode.getNode()->hasOneUse())
+    return true;
+
+  // Walk all the users of the constant with which we're multiplying.
+  for (SDNode *Use : ConstNode->uses()) {
+
+    if (Use == MulNode) // This use is the one we're on right now. Skip it.
+      continue;
+
+    if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
+      SDNode *OtherOp;
+      SDNode *MulVar = AddNode.getOperand(0).getNode();
+
+      // OtherOp is what we're multiplying against the constant.
+      if (Use->getOperand(0) == ConstNode)
+        OtherOp = Use->getOperand(1).getNode();
+      else
+        OtherOp = Use->getOperand(0).getNode();
+
+      // Check to see if multiply is with the same operand of our "add".
+      //
+      //     ConstNode  = CONST
+      //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
+      //     ...
+      //     AddNode  = (A + c1)  <-- MulVar is A.
+      //         = AddNode * ConstNode   <-- current visiting instruction.
+      //
+      // If we make this transformation, we will have a common
+      // multiply (ConstNode * A) that we can save.
+      if (OtherOp == MulVar)
+        return true;
+
+      // Now check to see if a future expansion will give us a common
+      // multiply.
+      //
+      //     ConstNode  = CONST
+      //     AddNode    = (A + c1)
+      //     ...   = AddNode * ConstNode <-- current visiting instruction.
+      //     ...
+      //     OtherOp = (A + c2)
+      //     Use     = OtherOp * ConstNode <-- visiting Use.
+      //
+      // If we make this transformation, we will have a common
+      // multiply (CONST * A) after we also do the same transformation
+      // to the "t2" instruction.
+      if (OtherOp->getOpcode() == ISD::ADD &&
+          isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
+          OtherOp->getOperand(0).getNode() == MulVar)
+        return true;
+    }
+  }
+
+  // Didn't find a case where this would be profitable.
+  return false;
+}
+
 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
                                                   SDLoc SL,
                                                   ArrayRef<MemOpLink> Stores,
+                                                  SmallVectorImpl<SDValue> &Chains,
                                                   EVT Ty) const {
   SmallVector<SDValue, 8> BuildVector;
 
-  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
-    BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
+  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+    StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
+    Chains.push_back(St->getChain());
+    BuildVector.push_back(St->getValue());
+  }
 
   return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
 }
 
 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
-                  unsigned NumElem, bool IsConstantSrc, bool UseVector) {
+                  unsigned NumStores, bool IsConstantSrc, bool UseVector) {
   // Make sure we have something to merge.
-  if (NumElem < 2)
+  if (NumStores < 2)
     return false;
 
   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   unsigned LatestNodeUsed = 0;
 
-  for (unsigned i=0; i < NumElem; ++i) {
+  for (unsigned i=0; i < NumStores; ++i) {
     // Find a chain for the new wide-store operand. Notice that some
     // of the store nodes that we found may not be selected for inclusion
     // in the wide store. The chain we use needs to be the chain of the
@@ -10657,45 +11103,57 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
       LatestNodeUsed = i;
   }
 
+  SmallVector<SDValue, 8> Chains;
+
   // The latest Node in the DAG.
   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   SDLoc DL(StoreNodes[0].MemNode);
 
   SDValue StoredVal;
   if (UseVector) {
-    // Find a legal type for the vector store.
-    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+    bool IsVec = MemVT.isVector();
+    unsigned Elts = NumStores;
+    if (IsVec) {
+      // When merging vector stores, get the total number of elements.
+      Elts *= MemVT.getVectorNumElements();
+    }
+    // Get the type for the merged vector store.
+    EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+
     if (IsConstantSrc) {
-      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
+      StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
     } else {
       SmallVector<SDValue, 8> Ops;
-      for (unsigned i = 0; i < NumElem ; ++i) {
+      for (unsigned i = 0; i < NumStores; ++i) {
         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
         SDValue Val = St->getValue();
-        // All of the operands of a BUILD_VECTOR must have the same type.
+        // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
         if (Val.getValueType() != MemVT)
           return false;
         Ops.push_back(Val);
+        Chains.push_back(St->getChain());
       }
 
       // Build the extracted vector elements back into a vector.
-      StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
-    }
+      StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
+                              DL, Ty, Ops);    }
   } else {
     // We should always use a vector store when merging extracted vector
     // elements, so this path implies a store of constants.
     assert(IsConstantSrc && "Merged vector elements should use vector store");
 
-    unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
+    unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
     APInt StoreInt(SizeInBits, 0);
 
     // Construct a single integer constant which is made of the smaller
     // constant inputs.
     bool IsLE = DAG.getDataLayout().isLittleEndian();
-    for (unsigned i = 0; i < NumElem ; ++i) {
-      unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
+    for (unsigned i = 0; i < NumStores; ++i) {
+      unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
+      Chains.push_back(St->getChain());
+
       SDValue Val = St->getValue();
       StoreInt <<= ElementSizeBytes * 8;
       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
@@ -10712,7 +11170,10 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   }
 
-  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
+  assert(!Chains.empty());
+
+  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
                                   FirstInChain->getBasePtr(),
                                   FirstInChain->getPointerInfo(),
                                   false, false,
@@ -10721,7 +11182,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   // Replace the last store with the new store
   CombineTo(LatestOp, NewStore);
   // Erase all other stores.
-  for (unsigned i = 0; i < NumElem ; ++i) {
+  for (unsigned i = 0; i < NumStores; ++i) {
     if (StoreNodes[i].MemNode == LatestOp)
       continue;
     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
@@ -10743,17 +11204,6 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   return true;
 }
 
-static bool allowableAlignment(const SelectionDAG &DAG,
-                               const TargetLowering &TLI, EVT EVTTy,
-                               unsigned AS, unsigned Align) {
-  if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
-    return true;
-
-  Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
-  unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
-  return (Align >= ABIAlignment);
-}
-
 void DAGCombiner::getStoreMergeAndAliasCandidates(
     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
@@ -10775,6 +11225,38 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
   EVT MemVT = St->getMemoryVT();
   unsigned Seq = 0;
   StoreSDNode *Index = St;
+
+
+  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
+                                                  : DAG.getSubtarget().useAA();
+
+  if (UseAA) {
+    // Look at other users of the same chain. Stores on the same chain do not
+    // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
+    // to be on the same chain, so don't bother looking at adjacent chains.
+
+    SDValue Chain = St->getChain();
+    for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
+      if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
+        if (I.getOperandNo() != 0)
+          continue;
+
+        if (OtherST->isVolatile() || OtherST->isIndexed())
+          continue;
+
+        if (OtherST->getMemoryVT() != MemVT)
+          continue;
+
+        BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
+
+        if (Ptr.equalBaseIndex(BasePtr))
+          StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
+      }
+    }
+
+    return;
+  }
+
   while (Index) {
     // If the chain has more than one use, then we can't reorder the mem ops.
     if (Index != St && !SDValue(Index, 0)->hasOneUse())
@@ -10800,6 +11282,13 @@ void DAGCombiner::getStoreMergeAndAliasCandidates(
     if (Index->getMemoryVT() != MemVT)
       break;
 
+    // We do not allow under-aligned stores in order to prevent
+    // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
+    // be irrelevant here; what MATTERS is that we not move memory
+    // operations that potentially overlap past each-other.
+    if (Index->getAlignment() < MemVT.getStoreSize())
+      break;
+
     // We found a potential memory operand to merge.
     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
 
@@ -10844,8 +11333,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
     return false;
 
-  // Don't merge vectors into wider inputs.
-  if (MemVT.isVector() || !MemVT.isSimple())
+  if (!MemVT.isSimple())
     return false;
 
   // Perform an early exit check. Do not bother looking at stored values that
@@ -10854,9 +11342,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
                        isa<ConstantFPSDNode>(StoredVal);
-  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+                          StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
 
-  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
+  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
+    return false;
+
+  // Don't merge vectors into wider vectors if the source data comes from loads.
+  // TODO: This restriction can be lifted by using logic similar to the
+  // ExtractVecSrc case.
+  if (MemVT.isVector() && IsLoadSrc)
     return false;
 
   // Only look at ends of store sequences.
@@ -10868,22 +11363,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // We need to make sure that these nodes do not interfere with
   // any of the store nodes.
   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
-  
+
   // Save the StoreSDNodes that we find in the chain.
   SmallVector<MemOpLink, 8> StoreNodes;
 
   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
-  
+
   // Check if there is anything to merge.
   if (StoreNodes.size() < 2)
     return false;
 
-  // Sort the memory operands according to their distance from the base pointer.
+  // Sort the memory operands according to their distance from the
+  // base pointer.  As a secondary criteria: make sure stores coming
+  // later in the code come first in the list. This is important for
+  // the non-UseAA case, because we're merging stores into the FINAL
+  // store along a chain which potentially contains aliasing stores.
+  // Thus, if there are multiple stores to the same address, the last
+  // one can be considered for merging but not the others.
   std::sort(StoreNodes.begin(), StoreNodes.end(),
             [](MemOpLink LHS, MemOpLink RHS) {
     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
-            LHS.SequenceNum > RHS.SequenceNum);
+            LHS.SequenceNum < RHS.SequenceNum);
   });
 
   // Scan the memory operations on the chain and find the first non-consecutive
@@ -10900,15 +11401,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
         break;
     }
 
-    bool Alias = false;
     // Check if this store interferes with any of the loads that we found.
-    for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
-      if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
-        Alias = true;
-        break;
-      }
-    // We found a load that alias with this store. Stop the sequence.
-    if (Alias)
+    // If we find a load that alias with this store. Stop the sequence.
+    if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
+                    [&](LSBaseSDNode* Ldn) {
+                      return isAlias(Ldn, StoreNodes[i].MemNode);
+                    }))
       break;
 
     // Mark this node as useful.
@@ -10919,6 +11417,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   unsigned FirstStoreAlign = FirstInChain->getAlignment();
+  LLVMContext &Context = *DAG.getContext();
+  const DataLayout &DL = DAG.getDataLayout();
 
   // Store the constants into memory as one consecutive store.
   if (IsConstantSrc) {
@@ -10940,43 +11440,40 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
       // Find a legal type for the constant store.
       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
-      EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+      EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
+      bool IsFast;
       if (TLI.isTypeLegal(StoreTy) &&
-          allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
-                             FirstStoreAlign)) {
+          TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+                                 FirstStoreAlign, &IsFast) && IsFast) {
         LastLegalType = i+1;
       // Or check whether a truncstore is legal.
-      } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+      } else if (TLI.getTypeAction(Context, StoreTy) ==
                  TargetLowering::TypePromoteInteger) {
         EVT LegalizedStoredValueTy =
-          TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
+          TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
-            allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
-                               FirstStoreAlign)) {
+            TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+                                   FirstStoreAS, FirstStoreAlign, &IsFast) &&
+            IsFast) {
           LastLegalType = i + 1;
         }
       }
 
-      // Find a legal type for the vector store.
-      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
-      if (TLI.isTypeLegal(Ty) &&
-          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
-        LastLegalVectorType = i + 1;
+      // We only use vectors if the constant is known to be zero or the target
+      // allows it and the function is not marked with the noimplicitfloat
+      // attribute.
+      if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
+                                                        FirstStoreAS)) &&
+          !NoVectors) {
+        // Find a legal type for the vector store.
+        EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
+        if (TLI.isTypeLegal(Ty) &&
+            TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+                                   FirstStoreAlign, &IsFast) && IsFast)
+          LastLegalVectorType = i + 1;
       }
     }
 
-
-    // We only use vectors if the constant is known to be zero or the target
-    // allows it and the function is not marked with the noimplicitfloat
-    // attribute.
-    if (NoVectors) {
-      LastLegalVectorType = 0;
-    } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
-                                                            LastLegalVectorType,
-                                                            FirstStoreAS)) {
-      LastLegalVectorType = 0;
-    }
-
     // Check if we found a legal integer type to store.
     if (LastLegalType == 0 && LastLegalVectorType == 0)
       return false;
@@ -10990,27 +11487,36 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
   // When extracting multiple vector elements, try to store them
   // in one vector store rather than a sequence of scalar stores.
-  if (IsExtractVecEltSrc) {
-    unsigned NumElem = 0;
+  if (IsExtractVecSrc) {
+    unsigned NumStoresToMerge = 0;
+    bool IsVec = MemVT.isVector();
     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
-      SDValue StoredVal = St->getValue();
+      unsigned StoreValOpcode = St->getValue().getOpcode();
       // This restriction could be loosened.
       // Bail out if any stored values are not elements extracted from a vector.
       // It should be possible to handle mixed sources, but load sources need
       // more careful handling (see the block of code below that handles
       // consecutive loads).
-      if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
+          StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
         return false;
 
       // Find a legal type for the vector store.
-      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+      unsigned Elts = i + 1;
+      if (IsVec) {
+        // When merging vector stores, get the total number of elements.
+        Elts *= MemVT.getVectorNumElements();
+      }
+      EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+      bool IsFast;
       if (TLI.isTypeLegal(Ty) &&
-          allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
-        NumElem = i + 1;
+          TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
+                                 FirstStoreAlign, &IsFast) && IsFast)
+        NumStoresToMerge = i + 1;
     }
 
-    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
+    return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
                                            false, true);
   }
 
@@ -11084,7 +11590,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   StartAddress = LoadNodes[0].OffsetFromBase;
   SDValue FirstChain = FirstLoad->getChain();
   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
-    // All loads much share the same chain.
+    // All loads must share the same chain.
     if (LoadNodes[i].MemNode->getChain() != FirstChain)
       break;
 
@@ -11092,35 +11598,41 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
       break;
     LastConsecutiveLoad = i;
-
     // Find a legal type for the vector store.
-    EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
+    EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
+    bool IsFastSt, IsFastLd;
     if (TLI.isTypeLegal(StoreTy) &&
-        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
-        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
+        TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+                               FirstStoreAlign, &IsFastSt) && IsFastSt &&
+        TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+                               FirstLoadAlign, &IsFastLd) && IsFastLd) {
       LastLegalVectorType = i + 1;
     }
 
     // Find a legal type for the integer store.
     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
-    StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+    StoreTy = EVT::getIntegerVT(Context, SizeInBits);
     if (TLI.isTypeLegal(StoreTy) &&
-        allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
-        allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
+        TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+                               FirstStoreAlign, &IsFastSt) && IsFastSt &&
+        TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
+                               FirstLoadAlign, &IsFastLd) && IsFastLd)
       LastLegalIntegerType = i + 1;
     // Or check whether a truncstore and extload is legal.
-    else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
+    else if (TLI.getTypeAction(Context, StoreTy) ==
              TargetLowering::TypePromoteInteger) {
       EVT LegalizedStoredValueTy =
-        TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
+        TLI.getTypeToTransformTo(Context, StoreTy);
       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
-          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
-                             FirstStoreAlign) &&
-          allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
-                             FirstLoadAlign))
+          TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+                                 FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+          IsFastSt &&
+          TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
+                                 FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
+          IsFastLd)
         LastLegalIntegerType = i+1;
     }
   }
@@ -11138,6 +11650,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   if (NumElem < 2)
     return false;
 
+  // Collect the chains from all merged stores.
+  SmallVector<SDValue, 8> MergeStoreChains;
+  MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
+
   // The latest Node in the DAG.
   unsigned LatestNodeUsed = 0;
   for (unsigned i=1; i<NumElem; ++i) {
@@ -11147,6 +11663,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     // latest store node which is *used* and replaced by the wide store.
     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
       LatestNodeUsed = i;
+
+    MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
   }
 
   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
@@ -11155,34 +11673,33 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // to memory.
   EVT JointMemOpVT;
   if (UseVectorTy) {
-    JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
+    JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
   } else {
     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
-    JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+    JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
   }
 
   SDLoc LoadDL(LoadNodes[0].MemNode);
   SDLoc StoreDL(StoreNodes[0].MemNode);
 
+  // The merged loads are required to have the same incoming chain, so
+  // using the first's chain is acceptable.
   SDValue NewLoad = DAG.getLoad(
       JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
       FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
 
+  SDValue NewStoreChain =
+    DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
+
   SDValue NewStore = DAG.getStore(
-      LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
+    NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
       FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
 
-  // Replace one of the loads with the new load.
-  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
-  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
-                                SDValue(NewLoad.getNode(), 1));
-
-  // Remove the rest of the load chains.
-  for (unsigned i = 1; i < NumElem ; ++i) {
-    // Replace all chain users of the old load nodes with the chain of the new
-    // load node.
+  // Transfer chain users from old loads to the new load.
+  for (unsigned i = 0; i < NumElem; ++i) {
     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
-    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
+                                  SDValue(NewLoad.getNode(), 1));
   }
 
   // Replace the last store with the new store.
@@ -11200,6 +11717,114 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   return true;
 }
 
+SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
+  SDLoc SL(ST);
+  SDValue ReplStore;
+
+  // Replace the chain to avoid dependency.
+  if (ST->isTruncatingStore()) {
+    ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
+                                  ST->getBasePtr(), ST->getMemoryVT(),
+                                  ST->getMemOperand());
+  } else {
+    ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
+                             ST->getMemOperand());
+  }
+
+  // Create token to keep both nodes around.
+  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
+                              MVT::Other, ST->getChain(), ReplStore);
+
+  // Make sure the new and old chains are cleaned up.
+  AddToWorklist(Token.getNode());
+
+  // Don't add users to work list.
+  return CombineTo(ST, Token, false);
+}
+
+SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
+  SDValue Value = ST->getValue();
+  if (Value.getOpcode() == ISD::TargetConstantFP)
+    return SDValue();
+
+  SDLoc DL(ST);
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+
+  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
+
+  // NOTE: If the original store is volatile, this transform must not increase
+  // the number of stores.  For example, on x86-32 an f64 can be stored in one
+  // processor operation but an i64 (which is not legal) requires two.  So the
+  // transform should not be done in this case.
+
+  SDValue Tmp;
+  switch (CFP->getSimpleValueType(0).SimpleTy) {
+  default:
+    llvm_unreachable("Unknown FP type");
+  case MVT::f16:    // We don't do this for these yet.
+  case MVT::f80:
+  case MVT::f128:
+  case MVT::ppcf128:
+    return SDValue();
+  case MVT::f32:
+    if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+      ;
+      Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+                            bitcastToAPInt().getZExtValue(), SDLoc(CFP),
+                            MVT::i32);
+      return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
+    }
+
+    return SDValue();
+  case MVT::f64:
+    if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+         !ST->isVolatile()) ||
+        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+      ;
+      Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                            getZExtValue(), SDLoc(CFP), MVT::i64);
+      return DAG.getStore(Chain, DL, Tmp,
+                          Ptr, ST->getMemOperand());
+    }
+
+    if (!ST->isVolatile() &&
+        TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+      // Many FP stores are not made apparent until after legalize, e.g. for
+      // argument passing.  Since this is so common, custom legalize the
+      // 64-bit integer store into two 32-bit stores.
+      uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+      SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
+      SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
+      if (DAG.getDataLayout().isBigEndian())
+        std::swap(Lo, Hi);
+
+      unsigned Alignment = ST->getAlignment();
+      bool isVolatile = ST->isVolatile();
+      bool isNonTemporal = ST->isNonTemporal();
+      AAMDNodes AAInfo = ST->getAAInfo();
+
+      SDValue St0 = DAG.getStore(Chain, DL, Lo,
+                                 Ptr, ST->getPointerInfo(),
+                                 isVolatile, isNonTemporal,
+                                 ST->getAlignment(), AAInfo);
+      Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                        DAG.getConstant(4, DL, Ptr.getValueType()));
+      Alignment = MinAlign(Alignment, 4U);
+      SDValue St1 = DAG.getStore(Chain, DL, Hi,
+                                 Ptr, ST->getPointerInfo().getWithOffset(4),
+                                 isVolatile, isNonTemporal,
+                                 Alignment, AAInfo);
+      return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                         St0, St1);
+    }
+
+    return SDValue();
+  }
+}
+
 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   StoreSDNode *ST  = cast<StoreSDNode>(N);
   SDValue Chain = ST->getChain();
@@ -11227,81 +11852,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
     return Chain;
 
-  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
-  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
-    // NOTE: If the original store is volatile, this transform must not increase
-    // the number of stores.  For example, on x86-32 an f64 can be stored in one
-    // processor operation but an i64 (which is not legal) requires two.  So the
-    // transform should not be done in this case.
-    if (Value.getOpcode() != ISD::TargetConstantFP) {
-      SDValue Tmp;
-      switch (CFP->getSimpleValueType(0).SimpleTy) {
-      default: llvm_unreachable("Unknown FP type");
-      case MVT::f16:    // We don't do this for these yet.
-      case MVT::f80:
-      case MVT::f128:
-      case MVT::ppcf128:
-        break;
-      case MVT::f32:
-        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
-            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
-          ;
-          Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
-                              bitcastToAPInt().getZExtValue(), SDLoc(CFP),
-                              MVT::i32);
-          return DAG.getStore(Chain, SDLoc(N), Tmp,
-                              Ptr, ST->getMemOperand());
-        }
-        break;
-      case MVT::f64:
-        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
-             !ST->isVolatile()) ||
-            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
-          ;
-          Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
-                                getZExtValue(), SDLoc(CFP), MVT::i64);
-          return DAG.getStore(Chain, SDLoc(N), Tmp,
-                              Ptr, ST->getMemOperand());
-        }
-
-        if (!ST->isVolatile() &&
-            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
-          // Many FP stores are not made apparent until after legalize, e.g. for
-          // argument passing.  Since this is so common, custom legalize the
-          // 64-bit integer store into two 32-bit stores.
-          uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
-          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
-          SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
-          if (DAG.getDataLayout().isBigEndian())
-            std::swap(Lo, Hi);
-
-          unsigned Alignment = ST->getAlignment();
-          bool isVolatile = ST->isVolatile();
-          bool isNonTemporal = ST->isNonTemporal();
-          AAMDNodes AAInfo = ST->getAAInfo();
-
-          SDLoc DL(N);
-
-          SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
-                                     Ptr, ST->getPointerInfo(),
-                                     isVolatile, isNonTemporal,
-                                     ST->getAlignment(), AAInfo);
-          Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                            DAG.getConstant(4, DL, Ptr.getValueType()));
-          Alignment = MinAlign(Alignment, 4U);
-          SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
-                                     Ptr, ST->getPointerInfo().getWithOffset(4),
-                                     isVolatile, isNonTemporal,
-                                     Alignment, AAInfo);
-          return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
-                             St0, St1);
-        }
-
-        break;
-      }
-    }
-  }
-
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
@@ -11319,8 +11869,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // Try transforming a pair floating point load / store ops to integer
   // load / store ops.
-  SDValue NewST = TransformFPLoadStorePair(N);
-  if (NewST.getNode())
+  if (SDValue NewST = TransformFPLoadStorePair(N))
     return NewST;
 
   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
@@ -11331,31 +11880,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     UseAA = false;
 #endif
   if (UseAA && ST->isUnindexed()) {
-    // Walk up chain skipping non-aliasing memory nodes.
-    SDValue BetterChain = FindBetterChain(N, Chain);
+    // FIXME: We should do this even without AA enabled. AA will just allow
+    // FindBetterChain to work in more situations. The problem with this is that
+    // any combine that expects memory operations to be on consecutive chains
+    // first needs to be updated to look for users of the same chain.
 
-    // If there is a better chain.
-    if (Chain != BetterChain) {
-      SDValue ReplStore;
-
-      // Replace the chain to avoid dependency.
-      if (ST->isTruncatingStore()) {
-        ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
-                                      ST->getMemoryVT(), ST->getMemOperand());
-      } else {
-        ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
-                                 ST->getMemOperand());
-      }
-
-      // Create token to keep both nodes around.
-      SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
-                                  MVT::Other, Chain, ReplStore);
-
-      // Make sure the new and old chains are cleaned up.
-      AddToWorklist(Token.getNode());
-
-      // Don't add users to work list.
-      return CombineTo(N, Token, false);
+    // Walk up chain skipping non-aliasing memory nodes, on this store and any
+    // adjacent stores.
+    if (findBetterNeighborChains(ST)) {
+      // replaceStoreChain uses CombineTo, which handled all of the worklist
+      // manipulation. Return the original node to not do anything else.
+      return SDValue(ST, 0);
     }
   }
 
@@ -11440,6 +11975,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       return SDValue(N, 0);
   }
 
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  //
+  // Make sure to do this only after attempting to merge stores in order to
+  //  avoid changing the types of some subset of stores due to visit order,
+  //  preventing their merging.
+  if (isa<ConstantFPSDNode>(Value)) {
+    if (SDValue NewSt = replaceStoreOfFPConstant(ST))
+      return NewSt;
+  }
+
   return ReduceLoadOpStoreWidth(N);
 }
 
@@ -11613,7 +12158,24 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   }
 
   SDValue EltNo = N->getOperand(1);
-  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
+  ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+
+  // extract_vector_elt (build_vector x, y), 1 -> y
+  if (ConstEltNo &&
+      InVec.getOpcode() == ISD::BUILD_VECTOR &&
+      TLI.isTypeLegal(VT) &&
+      (InVec.hasOneUse() ||
+       TLI.aggressivelyPreferBuildVectorSources(VT))) {
+    SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
+    EVT InEltVT = Elt.getValueType();
+
+    // Sometimes build_vector's scalar input types do not match result type.
+    if (NVT == InEltVT)
+      return Elt;
+
+    // TODO: It may be useful to truncate if free if the build_vector implicitly
+    // converts.
+  }
 
   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   // We only perform this optimization before the op legalization phase because
@@ -11621,13 +12183,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   // patterns. For example on AVX, extracting elements from a wide vector
   // without using extract_subvector. However, if we can find an underlying
   // scalar value, then we can always use that.
-  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
-      && ConstEltNo) {
-    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
     int NumElem = VT.getVectorNumElements();
     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
     // Find the new index to extract from.
-    int OrigElt = SVOp->getMaskElt(Elt);
+    int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
 
     // Extracting an undef index is undef.
     if (OrigElt == -1)
@@ -12183,12 +12743,90 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
                      DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
 }
 
-SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
-  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
-  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
-  // inputs come from at most two distinct vectors, turn this into a shuffle
-  // node.
+// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
+// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
+// most two distinct vectors the same size as the result, attempt to turn this
+// into a legal shuffle.
+static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N->getOperand(0).getValueType();
+  int NumElts = VT.getVectorNumElements();
+  int NumOpElts = OpVT.getVectorNumElements();
 
+  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
+  SmallVector<int, 8> Mask;
+
+  for (SDValue Op : N->ops()) {
+    // Peek through any bitcast.
+    while (Op.getOpcode() == ISD::BITCAST)
+      Op = Op.getOperand(0);
+
+    // UNDEF nodes convert to UNDEF shuffle mask values.
+    if (Op.getOpcode() == ISD::UNDEF) {
+      Mask.append((unsigned)NumOpElts, -1);
+      continue;
+    }
+
+    if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+      return SDValue();
+
+    // What vector are we extracting the subvector from and at what index?
+    SDValue ExtVec = Op.getOperand(0);
+
+    // We want the EVT of the original extraction to correctly scale the
+    // extraction index.
+    EVT ExtVT = ExtVec.getValueType();
+
+    // Peek through any bitcast.
+    while (ExtVec.getOpcode() == ISD::BITCAST)
+      ExtVec = ExtVec.getOperand(0);
+
+    // UNDEF nodes convert to UNDEF shuffle mask values.
+    if (ExtVec.getOpcode() == ISD::UNDEF) {
+      Mask.append((unsigned)NumOpElts, -1);
+      continue;
+    }
+
+    if (!isa<ConstantSDNode>(Op.getOperand(1)))
+      return SDValue();
+    int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+
+    // Ensure that we are extracting a subvector from a vector the same
+    // size as the result.
+    if (ExtVT.getSizeInBits() != VT.getSizeInBits())
+      return SDValue();
+
+    // Scale the subvector index to account for any bitcast.
+    int NumExtElts = ExtVT.getVectorNumElements();
+    if (0 == (NumExtElts % NumElts))
+      ExtIdx /= (NumExtElts / NumElts);
+    else if (0 == (NumElts % NumExtElts))
+      ExtIdx *= (NumElts / NumExtElts);
+    else
+      return SDValue();
+
+    // At most we can reference 2 inputs in the final shuffle.
+    if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
+      SV0 = ExtVec;
+      for (int i = 0; i != NumOpElts; ++i)
+        Mask.push_back(i + ExtIdx);
+    } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
+      SV1 = ExtVec;
+      for (int i = 0; i != NumOpElts; ++i)
+        Mask.push_back(i + ExtIdx + NumElts);
+    } else {
+      return SDValue();
+    }
+  }
+
+  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
+    return SDValue();
+
+  return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+                              DAG.getBitcast(VT, SV1), Mask);
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   // If we only have one input vector, we don't need to do any concatenation.
   if (N->getNumOperands() == 1)
     return N->getOperand(0);
@@ -12289,6 +12927,11 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
     return V;
 
+  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+    if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
+      return V;
+
   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   // nodes often generate nop CONCAT_VECTOR nodes.
   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
@@ -12503,7 +13146,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
                   SVN->getMask().end(), [](int i) { return i == -1; })) {
     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
-                              ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
+                              makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
     N1 = DAG.getUNDEF(ConcatVT);
     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   }
@@ -12981,6 +13624,21 @@ SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
   return SDValue();
 }
 
+SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+
+  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
+  if (N0->getOpcode() == ISD::AND) {
+    ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
+    if (AndConst && AndConst->getAPIntValue() == 0xffff) {
+      return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
+                         N0.getOperand(0));
+    }
+  }
+
+  return SDValue();
+}
+
 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
 /// with the destination vector and a zero vector.
 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -13002,34 +13660,76 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   if (RHS.getOpcode() == ISD::BITCAST)
     RHS = RHS.getOperand(0);
 
-  if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
-    SmallVector<int, 8> Indices;
-    unsigned NumElts = RHS.getNumOperands();
+  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
 
-    for (unsigned i = 0; i != NumElts; ++i) {
-      SDValue Elt = RHS.getOperand(i);
-      if (isAllOnesConstant(Elt))
+  EVT RVT = RHS.getValueType();
+  unsigned NumElts = RHS.getNumOperands();
+
+  // Attempt to create a valid clear mask, splitting the mask into
+  // sub elements and checking to see if each is
+  // all zeros or all ones - suitable for shuffle masking.
+  auto BuildClearMask = [&](int Split) {
+    int NumSubElts = NumElts * Split;
+    int NumSubBits = RVT.getScalarSizeInBits() / Split;
+
+    SmallVector<int, 8> Indices;
+    for (int i = 0; i != NumSubElts; ++i) {
+      int EltIdx = i / Split;
+      int SubIdx = i % Split;
+      SDValue Elt = RHS.getOperand(EltIdx);
+      if (Elt.getOpcode() == ISD::UNDEF) {
+        Indices.push_back(-1);
+        continue;
+      }
+
+      APInt Bits;
+      if (isa<ConstantSDNode>(Elt))
+        Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
+      else if (isa<ConstantFPSDNode>(Elt))
+        Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+      else
+        return SDValue();
+
+      // Extract the sub element from the constant bit mask.
+      if (DAG.getDataLayout().isBigEndian()) {
+        Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
+      } else {
+        Bits = Bits.lshr(SubIdx * NumSubBits);
+      }
+
+      if (Split > 1)
+        Bits = Bits.trunc(NumSubBits);
+
+      if (Bits.isAllOnesValue())
         Indices.push_back(i);
-      else if (isNullConstant(Elt))
-        Indices.push_back(NumElts+i);
+      else if (Bits == 0)
+        Indices.push_back(i + NumSubElts);
       else
         return SDValue();
     }
 
     // Let's see if the target supports this vector_shuffle.
-    EVT RVT = RHS.getValueType();
-    if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+    EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
+    EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
+    if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
       return SDValue();
 
-    // Return the new VECTOR_SHUFFLE node.
-    EVT EltVT = RVT.getVectorElementType();
-    SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
-                                   DAG.getConstant(0, dl, EltVT));
-    SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
-    LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
-    SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
-    return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
-  }
+    SDValue Zero = DAG.getConstant(0, dl, ClearVT);
+    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+                                                   DAG.getBitcast(ClearVT, LHS),
+                                                   Zero, &Indices[0]));
+  };
+
+  // Determine maximum split level (byte level masking).
+  int MaxSplit = 1;
+  if (RVT.getScalarSizeInBits() % 8 == 0)
+    MaxSplit = RVT.getScalarSizeInBits() / 8;
+
+  for (int Split = 1; Split <= MaxSplit; ++Split)
+    if (RVT.getScalarSizeInBits() % Split == 0)
+      if (SDValue S = BuildClearMask(Split))
+        return S;
 
   return SDValue();
 }
@@ -13041,60 +13741,17 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
 
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
+  SDValue Ops[] = {LHS, RHS};
 
+  // See if we can constant fold the vector operation.
+  if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
+          N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+    return Fold;
+
+  // Try to convert a constant mask AND into a shuffle clear mask.
   if (SDValue Shuffle = XformToShuffleWithZero(N))
     return Shuffle;
 
-  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
-  // this operation.
-  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
-      RHS.getOpcode() == ISD::BUILD_VECTOR) {
-    // Check if both vectors are constants. If not bail out.
-    if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
-          cast<BuildVectorSDNode>(RHS)->isConstant()))
-      return SDValue();
-
-    SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
-      SDValue LHSOp = LHS.getOperand(i);
-      SDValue RHSOp = RHS.getOperand(i);
-
-      // Can't fold divide by zero.
-      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
-          N->getOpcode() == ISD::FDIV) {
-        if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
-             cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
-          break;
-      }
-
-      EVT VT = LHSOp.getValueType();
-      EVT RVT = RHSOp.getValueType();
-      if (RVT != VT) {
-        // Integer BUILD_VECTOR operands may have types larger than the element
-        // size (e.g., when the element type is not legal).  Prior to type
-        // legalization, the types may not match between the two BUILD_VECTORS.
-        // Truncate one of the operands to make them match.
-        if (RVT.getSizeInBits() > VT.getSizeInBits()) {
-          RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
-        } else {
-          LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
-          VT = RVT;
-        }
-      }
-      SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
-                                   LHSOp, RHSOp);
-      if (FoldOp.getOpcode() != ISD::UNDEF &&
-          FoldOp.getOpcode() != ISD::Constant &&
-          FoldOp.getOpcode() != ISD::ConstantFP)
-        break;
-      Ops.push_back(FoldOp);
-      AddToWorklist(FoldOp.getNode());
-    }
-
-    if (Ops.size() == LHS.getNumOperands())
-      return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
-  }
-
   // Type legalization might introduce new shuffles in the DAG.
   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
@@ -13109,7 +13766,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
       EVT VT = N->getValueType(0);
       SDValue UndefVector = LHS.getOperand(1);
       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
-                                     LHS.getOperand(0), RHS.getOperand(0));
+                                     LHS.getOperand(0), RHS.getOperand(0),
+                                     N->getFlags());
       AddUsersToWorklist(N);
       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
                                   &SVN0->getMask()[0]);
@@ -13390,9 +14048,10 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
                             CstOffset);
         AddToWorklist(CPIdx.getNode());
-        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
-                           MachinePointerInfo::getConstantPool(), false,
-                           false, false, Alignment);
+        return DAG.getLoad(
+            TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+            MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+            false, false, false, Alignment);
       }
     }
 
@@ -13481,8 +14140,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
     // Get a SetCC of the condition
     // NOTE: Don't create a SETCC if it's not legal on this target.
     if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC,
-          LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
+        TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
       SDValue Temp, SCC;
       // cast from setcc result type to select result type
       if (LegalTypes) {
@@ -13514,51 +14172,6 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
     }
   }
 
-  // Check to see if this is the equivalent of setcc
-  // FIXME: Turn all of these into setcc if setcc if setcc is legal
-  // otherwise, go ahead with the folds.
-  if (0 && isNullConstant(N3) && isOneConstant(N2)) {
-    EVT XType = N0.getValueType();
-    if (!LegalOperations ||
-        TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) {
-      SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
-      if (Res.getValueType() != VT)
-        Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
-      return Res;
-    }
-
-    // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
-    if (isNullConstant(N1) && CC == ISD::SETEQ &&
-        (!LegalOperations ||
-         TLI.isOperationLegal(ISD::CTLZ, XType))) {
-      SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
-      return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
-                         DAG.getConstant(Log2_32(XType.getSizeInBits()),
-                                         SDLoc(Ctlz),
-                                       getShiftAmountTy(Ctlz.getValueType())));
-    }
-    // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
-    if (isNullConstant(N1) && CC == ISD::SETGT) {
-      SDLoc DL(N0);
-      SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
-                                  XType, DAG.getConstant(0, DL, XType), N0);
-      SDValue NotN0 = DAG.getNOT(DL, N0, XType);
-      return DAG.getNode(ISD::SRL, DL, XType,
-                         DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
-                         DAG.getConstant(XType.getSizeInBits() - 1, DL,
-                                         getShiftAmountTy(XType)));
-    }
-    // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
-    if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
-      SDLoc DL(N0);
-      SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
-                                 DAG.getConstant(XType.getSizeInBits() - 1, DL,
-                                         getShiftAmountTy(N0.getValueType())));
-      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
-                                                                    XType));
-    }
-  }
-
   // Check to see if this is an integer abs.
   // select_cc setg[te] X,  0,  X, -X ->
   // select_cc setgt    X, -1,  X, -X ->
@@ -13666,7 +14279,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   return S;
 }
 
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
@@ -13690,16 +14303,16 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
 
       // Newton iterations: Est = Est + Est (1 - Arg * Est)
       for (unsigned i = 0; i < Iterations; ++i) {
-        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
+        SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
         AddToWorklist(NewEst.getNode());
 
-        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
+        NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
         AddToWorklist(NewEst.getNode());
 
-        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+        NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
         AddToWorklist(NewEst.getNode());
 
-        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
+        Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
         AddToWorklist(Est.getNode());
       }
     }
@@ -13716,31 +14329,32 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
 /// As a result, we precompute A/2 prior to the iteration loop.
 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
-                                          unsigned Iterations) {
+                                          unsigned Iterations,
+                                          SDNodeFlags *Flags) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
 
   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   // this entire sequence requires only one FP constant.
-  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
+  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
   AddToWorklist(HalfArg.getNode());
 
-  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
+  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
   AddToWorklist(HalfArg.getNode());
 
   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
     AddToWorklist(NewEst.getNode());
 
-    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
+    NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
     AddToWorklist(NewEst.getNode());
 
-    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
+    NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
     AddToWorklist(NewEst.getNode());
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
     AddToWorklist(Est.getNode());
   }
   return Est;
@@ -13752,7 +14366,8 @@ SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
 ///     =>
 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
-                                          unsigned Iterations) {
+                                          unsigned Iterations,
+                                          SDNodeFlags *Flags) {
   EVT VT = Arg.getValueType();
   SDLoc DL(Arg);
   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
@@ -13760,25 +14375,25 @@ SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
 
   // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
   for (unsigned i = 0; i < Iterations; ++i) {
-    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
+    SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
     AddToWorklist(HalfEst.getNode());
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
     AddToWorklist(Est.getNode());
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
     AddToWorklist(Est.getNode());
 
-    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
+    Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
     AddToWorklist(Est.getNode());
 
-    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
+    Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
     AddToWorklist(Est.getNode());
   }
   return Est;
 }
 
-SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
+SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
@@ -13790,8 +14405,8 @@ SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
     AddToWorklist(Est.getNode());
     if (Iterations) {
       Est = UseOneConstNR ?
-        BuildRsqrtNROneConst(Op, Est, Iterations) :
-        BuildRsqrtNRTwoConst(Op, Est, Iterations);
+        BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
+        BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
     }
     return Est;
   }
@@ -13955,14 +14570,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
     SDValue Chain = Chains.pop_back_val();
 
     // For TokenFactor nodes, look at each operand and only continue up the
-    // chain until we find two aliases.  If we've seen two aliases, assume we'll
-    // find more and revert to original chain since the xform is unlikely to be
-    // profitable.
+    // chain until we reach the depth limit.
     //
     // FIXME: The depth check could be made to return the last non-aliasing
     // chain we found before we hit a tokenfactor rather than the original
     // chain.
-    if (Depth > 6 || Aliases.size() == 2) {
+    if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
       Aliases.clear();
       Aliases.push_back(OriginalChain);
       return;
@@ -14094,6 +14707,83 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
 }
 
+bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
+  // This holds the base pointer, index, and the offset in bytes from the base
+  // pointer.
+  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
+
+  // We must have a base and an offset.
+  if (!BasePtr.Base.getNode())
+    return false;
+
+  // Do not handle stores to undef base pointers.
+  if (BasePtr.Base.getOpcode() == ISD::UNDEF)
+    return false;
+
+  SmallVector<StoreSDNode *, 8> ChainedStores;
+  ChainedStores.push_back(St);
+
+  // Walk up the chain and look for nodes with offsets from the same
+  // base pointer. Stop when reaching an instruction with a different kind
+  // or instruction which has a different base pointer.
+  StoreSDNode *Index = St;
+  while (Index) {
+    // If the chain has more than one use, then we can't reorder the mem ops.
+    if (Index != St && !SDValue(Index, 0)->hasOneUse())
+      break;
+
+    if (Index->isVolatile() || Index->isIndexed())
+      break;
+
+    // Find the base pointer and offset for this memory node.
+    BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
+
+    // Check that the base pointer is the same as the original one.
+    if (!Ptr.equalBaseIndex(BasePtr))
+      break;
+
+    // Find the next memory operand in the chain. If the next operand in the
+    // chain is a store then move up and continue the scan with the next
+    // memory operand. If the next operand is a load save it and use alias
+    // information to check if it interferes with anything.
+    SDNode *NextInChain = Index->getChain().getNode();
+    while (true) {
+      if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
+        // We found a store node. Use it for the next iteration.
+        ChainedStores.push_back(STn);
+        Index = STn;
+        break;
+      } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
+        NextInChain = Ldn->getChain().getNode();
+        continue;
+      } else {
+        Index = nullptr;
+        break;
+      }
+    }
+  }
+
+  bool MadeChange = false;
+  SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
+
+  for (StoreSDNode *ChainedStore : ChainedStores) {
+    SDValue Chain = ChainedStore->getChain();
+    SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
+
+    if (Chain != BetterChain) {
+      MadeChange = true;
+      BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
+    }
+  }
+
+  // Do all replacements after finding the replacements to make to avoid making
+  // the chains more complicated by introducing new TokenFactors.
+  for (auto Replacement : BetterChains)
+    replaceStoreChain(Replacement.first, Replacement.second);
+
+  return MadeChange;
+}
+
 /// This is the entry point for the file.
 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
                            CodeGenOpt::Level OptLevel) {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2b9ba2c1b534..cfbb20947acc 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -118,9 +118,9 @@ bool FastISel::lowerArguments() {
   for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(),
                                     E = FuncInfo.Fn->arg_end();
        I != E; ++I) {
-    DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I);
+    DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I);
     assert(VI != LocalValueMap.end() && "Missed an argument?");
-    FuncInfo.ValueMap[I] = VI->second;
+    FuncInfo.ValueMap[&*I] = VI->second;
   }
   return true;
 }
@@ -611,7 +611,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
   // have to worry about calling conventions and target-specific lowering code.
   // Instead we perform the call lowering right here.
   //
-  // CALLSEQ_START(0)
+  // CALLSEQ_START(0...)
   // STACKMAP(id, nbytes, ...)
   // CALLSEQ_END(0, 0)
   //
@@ -647,8 +647,11 @@ bool FastISel::selectStackmap(const CallInst *I) {
 
   // Issue CALLSEQ_START
   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
-      .addImm(0);
+  auto Builder =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown));
+  const MCInstrDesc &MCID = Builder.getInstr()->getDesc();
+  for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I)
+    Builder.addImm(0);
 
   // Issue STACKMAP.
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -1100,13 +1103,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
     return true;
-  case Intrinsic::eh_actions: {
-    unsigned ResultReg = getRegForValue(UndefValue::get(II->getType()));
-    if (!ResultReg)
-      return false;
-    updateValueMap(II, ResultReg);
-    return true;
-  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
     assert(DI->getVariable() && "Missing variable");
@@ -1326,12 +1322,38 @@ bool FastISel::selectBitCast(const User *I) {
   return true;
 }
 
+// Remove local value instructions starting from the instruction after
+// SavedLastLocalValue to the current function insert point.
+void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
+{
+  MachineInstr *CurLastLocalValue = getLastLocalValue();
+  if (CurLastLocalValue != SavedLastLocalValue) {
+    // Find the first local value instruction to be deleted. 
+    // This is the instruction after SavedLastLocalValue if it is non-NULL.
+    // Otherwise it's the first instruction in the block.
+    MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue);
+    if (SavedLastLocalValue)
+      ++FirstDeadInst;
+    else
+      FirstDeadInst = FuncInfo.MBB->getFirstNonPHI();
+    setLastLocalValue(SavedLastLocalValue);
+    removeDeadCode(FirstDeadInst, FuncInfo.InsertPt);
+  }
+}
+
 bool FastISel::selectInstruction(const Instruction *I) {
+  MachineInstr *SavedLastLocalValue = getLastLocalValue();
   // Just before the terminator instruction, insert instructions to
   // feed PHI nodes in successor blocks.
   if (isa<TerminatorInst>(I))
-    if (!handlePHINodesInSuccessorBlocks(I->getParent()))
+    if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
+      // PHI node handling may have generated local value instructions,
+      // even though it failed to handle all PHI nodes.
+      // We remove these instructions because SelectionDAGISel will generate 
+      // them again.
+      removeDeadLocalValueCode(SavedLastLocalValue);
       return false;
+    }
 
   DbgLoc = I->getDebugLoc();
 
@@ -1348,7 +1370,7 @@ bool FastISel::selectInstruction(const Instruction *I) {
         LibInfo->hasOptimizedCodeGen(Func))
       return false;
 
-    // Don't handle Intrinsic::trap if a trap funciton is specified.
+    // Don't handle Intrinsic::trap if a trap function is specified.
     if (F && F->getIntrinsicID() == Intrinsic::trap &&
         Call->hasFnAttr("trap-func-name"))
       return false;
@@ -1380,8 +1402,12 @@ bool FastISel::selectInstruction(const Instruction *I) {
 
   DbgLoc = DebugLoc();
   // Undo phi node updates, because they will be added again by SelectionDAG.
-  if (isa<TerminatorInst>(I))
+  if (isa<TerminatorInst>(I)) {
+    // PHI node handling may have generated local value instructions. 
+    // We remove them because SelectionDAGISel will generate them again.
+    removeDeadLocalValueCode(SavedLastLocalValue);
     FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate);
+  }
   return false;
 }
 
@@ -1398,11 +1424,30 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
     TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
                      SmallVector<MachineOperand, 0>(), DbgLoc);
   }
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(),
-                                               MSucc->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(MSucc, BranchWeight);
+  if (FuncInfo.BPI) {
+    auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+        FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock());
+    FuncInfo.MBB->addSuccessor(MSucc, BranchProbability);
+  } else
+    FuncInfo.MBB->addSuccessorWithoutProb(MSucc);
+}
+
+void FastISel::finishCondBranch(const BasicBlock *BranchBB,
+                                MachineBasicBlock *TrueMBB,
+                                MachineBasicBlock *FalseMBB) {
+  // Add TrueMBB as successor unless it is equal to the FalseMBB: This can
+  // happen in degenerate IR and MachineIR forbids to have a block twice in the
+  // successor/predecessor lists.
+  if (TrueMBB != FalseMBB) {
+    if (FuncInfo.BPI) {
+      auto BranchProbability =
+          FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock());
+      FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability);
+    } else
+      FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB);
+  }
+
+  fastEmitBranch(FalseMBB, DbgLoc);
 }
 
 /// Emit an FNeg operation.
@@ -1864,21 +1909,18 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode,
-                                   const TargetRegisterClass *RC, unsigned Op0,
-                                   bool Op0IsKill, const ConstantFP *FPImm) {
+unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  const ConstantFP *FPImm) {
   const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   unsigned ResultReg = createResultReg(RC);
-  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
-        .addReg(Op0, getKillRegState(Op0IsKill))
         .addFPImm(FPImm);
   else {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
-        .addReg(Op0, getKillRegState(Op0IsKill))
         .addFPImm(FPImm);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
@@ -1912,35 +1954,6 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode,
-                                     const TargetRegisterClass *RC,
-                                     unsigned Op0, bool Op0IsKill, unsigned Op1,
-                                     bool Op1IsKill, uint64_t Imm1,
-                                     uint64_t Imm2) {
-  const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
-  unsigned ResultReg = createResultReg(RC);
-  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
-  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
-
-  if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
-        .addReg(Op0, getKillRegState(Op0IsKill))
-        .addReg(Op1, getKillRegState(Op1IsKill))
-        .addImm(Imm1)
-        .addImm(Imm2);
-  else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
-        .addReg(Op0, getKillRegState(Op0IsKill))
-        .addReg(Op1, getKillRegState(Op1IsKill))
-        .addImm(Imm1)
-        .addImm(Imm2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
-  }
-  return ResultReg;
-}
-
 unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC, uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
@@ -1957,25 +1970,6 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode,
-                                   const TargetRegisterClass *RC, uint64_t Imm1,
-                                   uint64_t Imm2) {
-  unsigned ResultReg = createResultReg(RC);
-  const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
-  if (II.getNumDefs() >= 1)
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
-        .addImm(Imm1)
-        .addImm(Imm2);
-  else {
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1)
-        .addImm(Imm2);
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
-  }
-  return ResultReg;
-}
-
 unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
                                               bool Op0IsKill, uint32_t Idx) {
   unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cc306cbf5ae4..b62bd2bd63ee 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -87,6 +87,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
   MachineModuleInfo &MMI = MF->getMMI();
+  const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
 
   // Check whether the function can return without sret-demotion.
   SmallVector<ISD::OutputArg, 4> Outs;
@@ -103,28 +104,29 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
          I != E; ++I) {
       if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
-        // Static allocas can be folded into the initial stack frame adjustment.
-        if (AI->isStaticAlloca()) {
+        Type *Ty = AI->getAllocatedType();
+        unsigned Align =
+          std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+        unsigned StackAlign = TFI->getStackAlignment();
+
+        // Static allocas can be folded into the initial stack frame
+        // adjustment. For targets that don't realign the stack, don't
+        // do this if there is an extra alignment requirement.
+        if (AI->isStaticAlloca() && 
+            (TFI->isStackRealignable() || (Align <= StackAlign))) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
-          Type *Ty = AI->getAllocatedType();
           uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
-          unsigned Align =
-              std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
-                       AI->getAlignment());
 
           TySize *= CUI->getZExtValue();   // Get total allocated size.
           if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
 
           StaticAllocaMap[AI] =
             MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
-
         } else {
-          unsigned Align =
-              std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(
-                           AI->getAllocatedType()),
-                       AI->getAlignment());
-          unsigned StackAlign =
-              MF->getSubtarget().getFrameLowering()->getStackAlignment();
+          // FIXME: Overaligned static allocas should be grouped into
+          // a single dynamic allocation instead of using a separate
+          // stack allocation for each one.
           if (Align <= StackAlign)
             Align = 0;
           // Inform the Frame Information that we have variable-sized objects.
@@ -134,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
       // Look for inline asm that clobbers the SP register.
       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-        ImmutableCallSite CS(I);
+        ImmutableCallSite CS(&*I);
         if (isa<InlineAsm>(CS.getCalledValue())) {
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
           const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
@@ -163,7 +165,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           MF->getFrameInfo()->setHasVAStart(true);
       }
 
-      // If we have a musttail call in a variadic funciton, we need to ensure we
+      // If we have a musttail call in a variadic function, we need to ensure we
       // forward implicit register parameters.
       if (const auto *CI = dyn_cast<CallInst>(I)) {
         if (CI->isMustTailCall() && Fn->isVarArg())
@@ -172,10 +174,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
-      if (isUsedOutsideOfDefiningBlock(I))
-        if (!isa<AllocaInst>(I) ||
-            !StaticAllocaMap.count(cast<AllocaInst>(I)))
-          InitializeRegForValue(I);
+      if (isUsedOutsideOfDefiningBlock(&*I))
+        if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(&*I);
 
       // Collect llvm.dbg.declare information. This is done now instead of
       // during the initial isel pass through the IR so that it is done
@@ -205,15 +206,36 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       }
 
       // Decide the preferred extend type for a value.
-      PreferredExtendType[I] = getPreferredExtendForValue(I);
+      PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
     }
 
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
   for (BB = Fn->begin(); BB != EB; ++BB) {
-    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
-    MBBMap[BB] = MBB;
+    // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
+    // are really data, and no instructions can live here.
+    if (BB->isEHPad()) {
+      const Instruction *I = BB->getFirstNonPHI();
+      // If this is a non-landingpad EH pad, mark this function as using
+      // funclets.
+      // FIXME: SEH catchpads do not create funclets, so we could avoid setting
+      // this in such cases in order to improve frame layout.
+      if (!isa<LandingPadInst>(I)) {
+        MMI.setHasEHFunclets(true);
+        MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+      }
+      if (isa<CatchSwitchInst>(I)) {
+        assert(&*BB->begin() == I &&
+               "WinEHPrepare failed to remove PHIs from imaginary BBs");
+        continue;
+      }
+      if (isa<FuncletPadInst>(I))
+        assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+    }
+
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
+    MBBMap[&*BB] = MBB;
     MF->push_back(MBB);
 
     // Transfer the address-taken flag. This is necessary because there could
@@ -252,94 +274,64 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   // Mark landing pad blocks.
   SmallVector<const LandingPadInst *, 4> LPads;
   for (BB = Fn->begin(); BB != EB; ++BB) {
-    if (const auto *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
-      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
-    if (BB->isLandingPad())
-      LPads.push_back(BB->getLandingPadInst());
+    const Instruction *FNP = BB->getFirstNonPHI();
+    if (BB->isEHPad() && MBBMap.count(&*BB))
+      MBBMap[&*BB]->setIsEHPad();
+    if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
+      LPads.push_back(LPI);
   }
 
-  // If this is an MSVC EH personality, we need to do a bit more work.
-  EHPersonality Personality = EHPersonality::Unknown;
-  if (Fn->hasPersonalityFn())
-    Personality = classifyEHPersonality(Fn->getPersonalityFn());
-  if (!isMSVCEHPersonality(Personality))
+  // If this personality uses funclets, we need to do a bit more work.
+  if (!Fn->hasPersonalityFn())
+    return;
+  EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+  if (!isFuncletEHPersonality(Personality))
     return;
 
-  if (Personality == EHPersonality::MSVC_Win64SEH ||
-      Personality == EHPersonality::MSVC_X86SEH) {
-    addSEHHandlersForLPads(LPads);
-  }
+  // Calculate state numbers if we haven't already.
+  WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo();
+  if (Personality == EHPersonality::MSVC_CXX)
+    calculateWinCXXEHStateNumbers(&fn, EHInfo);
+  else if (isAsynchronousEHPersonality(Personality))
+    calculateSEHStateNumbers(&fn, EHInfo);
+  else if (Personality == EHPersonality::CoreCLR)
+    calculateClrEHStateNumbers(&fn, EHInfo);
 
-  WinEHFuncInfo &EHInfo = MMI.getWinEHFuncInfo(&fn);
-  if (Personality == EHPersonality::MSVC_CXX) {
-    const Function *WinEHParentFn = MMI.getWinEHParent(&fn);
-    calculateWinCXXEHStateNumbers(WinEHParentFn, EHInfo);
-  }
+  calculateCatchReturnSuccessorColors(&fn, EHInfo);
 
-  // Copy the state numbers to LandingPadInfo for the current function, which
-  // could be a handler or the parent. This should happen for 32-bit SEH and
-  // C++ EH.
-  if (Personality == EHPersonality::MSVC_CXX ||
-      Personality == EHPersonality::MSVC_X86SEH) {
-    for (const LandingPadInst *LP : LPads) {
-      MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
-      MMI.addWinEHState(LPadMBB, EHInfo.LandingPadStateMap[LP]);
-    }
-  }
-}
-
-void FunctionLoweringInfo::addSEHHandlersForLPads(
-    ArrayRef<const LandingPadInst *> LPads) {
-  MachineModuleInfo &MMI = MF->getMMI();
-
-  // Iterate over all landing pads with llvm.eh.actions calls.
-  for (const LandingPadInst *LP : LPads) {
-    const IntrinsicInst *ActionsCall =
-        dyn_cast<IntrinsicInst>(LP->getNextNode());
-    if (!ActionsCall ||
-        ActionsCall->getIntrinsicID() != Intrinsic::eh_actions)
-      continue;
-
-    // Parse the llvm.eh.actions call we found.
-    MachineBasicBlock *LPadMBB = MBBMap[LP->getParent()];
-    SmallVector<std::unique_ptr<ActionHandler>, 4> Actions;
-    parseEHActions(ActionsCall, Actions);
-
-    // Iterate EH actions from most to least precedence, which means
-    // iterating in reverse.
-    for (auto I = Actions.rbegin(), E = Actions.rend(); I != E; ++I) {
-      ActionHandler *Action = I->get();
-      if (auto *CH = dyn_cast<CatchHandler>(Action)) {
-        const auto *Filter =
-            dyn_cast<Function>(CH->getSelector()->stripPointerCasts());
-        assert((Filter || CH->getSelector()->isNullValue()) &&
-               "expected function or catch-all");
-        const auto *RecoverBA =
-            cast<BlockAddress>(CH->getHandlerBlockOrFunc());
-        MMI.addSEHCatchHandler(LPadMBB, Filter, RecoverBA);
+  // Map all BB references in the WinEH data to MBBs.
+  for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
+    for (WinEHHandlerType &H : TBME.HandlerArray) {
+      if (H.CatchObj.Alloca) {
+        assert(StaticAllocaMap.count(H.CatchObj.Alloca));
+        H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca];
       } else {
-        assert(isa<CleanupHandler>(Action));
-        const auto *Fini = cast<Function>(Action->getHandlerBlockOrFunc());
-        MMI.addSEHCleanupHandler(LPadMBB, Fini);
+        H.CatchObj.FrameIndex = INT_MAX;
       }
+      if (H.Handler)
+        H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
     }
   }
+  for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
+    if (UME.Cleanup)
+      UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+  for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
+    const BasicBlock *BB = UME.Handler.get<const BasicBlock *>();
+    UME.Handler = MBBMap[BB];
+  }
+  for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
+    const BasicBlock *BB = CME.Handler.get<const BasicBlock *>();
+    CME.Handler = MBBMap[BB];
+  }
 }
 
 /// clear - Clear out all the function-specific state. This returns this
 /// FunctionLoweringInfo to an empty state, ready to be used for a
 /// different function.
 void FunctionLoweringInfo::clear() {
-  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
-         "Not all catch info was assigned to a landing pad!");
-
   MBBMap.clear();
   ValueMap.clear();
   StaticAllocaMap.clear();
-#ifndef NDEBUG
-  CatchInfoLost.clear();
-  CatchInfoFound.clear();
-#endif
   LiveOutRegInfo.clear();
   VisitedBBs.clear();
   ArgDbgValues.clear();
@@ -520,6 +512,17 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) {
   return 0;
 }
 
+unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
+    const Value *CPI, const TargetRegisterClass *RC) {
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  auto I = CatchPadExceptionPointers.insert({CPI, 0});
+  unsigned &VReg = I.first->second;
+  if (I.second)
+    VReg = MRI.createVirtualRegister(RC);
+  assert(VReg && "null vreg in exception pointer table!");
+  return VReg;
+}
+
 /// ComputeUsesVAFloatArgument - Determine if any floating-point values are
 /// being passed to this variadic function, and set the MachineModuleInfo's
 /// usesVAFloatArgument flag if so. This flag is used to emit an undefined
@@ -547,10 +550,9 @@ void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
 /// landingpad instruction and add them to the specified machine module info.
 void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
                              MachineBasicBlock *MBB) {
-  MMI.addPersonality(
-      MBB,
-      cast<Function>(
-          I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()));
+  if (const auto *PF = dyn_cast<Function>(
+      I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+    MMI.addPersonality(PF);
 
   if (I.isCleanup())
     MMI.addCleanup(MBB);
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 5ec10308dc28..a1e2d410ab00 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
               UseRC = RC;
             else if (RC) {
               const TargetRegisterClass *ComRC =
-                TRI->getCommonSubClass(UseRC, RC);
+                TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
               // If multiple uses expect disjoint register classes, we emit
               // copies in AddRegisterOperand.
               if (ComRC)
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index fbc8f1e89f6e..f46767f6c4a1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -39,6 +39,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "legalizedag"
 
+namespace {
+
+struct FloatSignAsInt;
+
 //===----------------------------------------------------------------------===//
 /// This takes an arbitrary SelectionDAG as input and
 /// hacks on it until the target machine can handle it.  This involves
@@ -51,7 +55,6 @@ using namespace llvm;
 /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
 /// will attempt merge setcc and brc instructions into brcc's.
 ///
-namespace {
 class SelectionDAGLegalize {
   const TargetMachine &TM;
   const TargetLowering &TLI;
@@ -130,7 +133,11 @@ private:
   SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
   void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
                                 SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandFCOPYSIGN(SDNode *Node);
+  void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const;
+  SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL,
+                          SDValue NewIntValue) const;
+  SDValue ExpandFCOPYSIGN(SDNode *Node) const;
+  SDValue ExpandFABS(SDNode *Node) const;
   SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
                                SDLoc dl);
   SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
@@ -138,6 +145,7 @@ private:
   SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
                                 SDLoc dl);
 
+  SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl);
   SDValue ExpandBSWAP(SDValue Op, SDLoc dl);
   SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl);
 
@@ -146,10 +154,11 @@ private:
   SDValue ExpandVectorBuildThroughStack(SDNode* Node);
 
   SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP);
+  SDValue ExpandConstant(ConstantSDNode *CP);
 
-  std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
-
-  void ExpandNode(SDNode *Node);
+  // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall
+  bool ExpandNode(SDNode *Node);
+  void ConvertNodeToLibcall(SDNode *Node);
   void PromoteNode(SDNode *Node);
 
 public:
@@ -273,17 +282,30 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
       DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout()));
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   if (Extend) {
-    SDValue Result =
-      DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
-                     DAG.getEntryNode(),
-                     CPIdx, MachinePointerInfo::getConstantPool(),
-                     VT, false, false, false, Alignment);
+    SDValue Result = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT,
+        false, false, false, Alignment);
     return Result;
   }
   SDValue Result =
-    DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
-                MachinePointerInfo::getConstantPool(), false, false, false,
-                Alignment);
+      DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, Alignment);
+  return Result;
+}
+
+/// Expands the Constant node to a load from the constant pool.
+SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) {
+  SDLoc dl(CP);
+  EVT VT = CP->getValueType(0);
+  SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(),
+                                      TLI.getPointerTy(DAG.getDataLayout()));
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  SDValue Result =
+    DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                false, false, false, Alignment);
   return Result;
 }
 
@@ -594,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
 
   // Store the vector.
-  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
-                            MachinePointerInfo::getFixedStack(SPFI),
-                            false, false, 0);
+  SDValue Ch = DAG.getStore(
+      DAG.getEntryNode(), dl, Tmp1, StackPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+      false, 0);
 
   // Truncate or zero extend offset to target pointer type.
-  unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
-  Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+  Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT);
   // Add the offset to the index.
   unsigned EltSize = EltVT.getSizeInBits()/8;
   Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,
@@ -610,9 +632,9 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
   Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
                          false, false, 0);
   // Load the updated vector.
-  return DAG.getLoad(VT, dl, Ch, StackPtr,
-                     MachinePointerInfo::getFixedStack(SPFI), false, false,
-                     false, 0);
+  return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(
+                                               DAG.getMachineFunction(), SPFI),
+                     false, false, false, 0);
 }
 
 
@@ -728,14 +750,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         case TargetLowering::Legal: {
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
+          EVT MemVT = ST->getMemoryVT();
           unsigned AS = ST->getAddressSpace();
           unsigned Align = ST->getAlignment();
-          if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
-            Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
-            if (Align < ABIAlignment)
-              ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
-          }
+          const DataLayout &DL = DAG.getDataLayout();
+          if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+            ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           break;
         }
         case TargetLowering::Custom: {
@@ -839,20 +859,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
         SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
         ReplaceNode(SDValue(Node, 0), Result);
       } else {
-        switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(),
-                                        StVT.getSimpleVT())) {
+        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
         default: llvm_unreachable("This action is not supported yet!");
         case TargetLowering::Legal: {
+          EVT MemVT = ST->getMemoryVT();
           unsigned AS = ST->getAddressSpace();
           unsigned Align = ST->getAlignment();
           // If this is an unaligned store and the target doesn't support it,
           // expand it.
-          if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) {
-            Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
-            unsigned ABIAlignment = DL.getABITypeAlignment(Ty);
-            if (Align < ABIAlignment)
-              ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
-          }
+          if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+            ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this);
           break;
         }
         case TargetLowering::Custom: {
@@ -895,17 +911,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Legal: {
+      EVT MemVT = LD->getMemoryVT();
       unsigned AS = LD->getAddressSpace();
       unsigned Align = LD->getAlignment();
+      const DataLayout &DL = DAG.getDataLayout();
       // If this is an unaligned load and the target doesn't support it,
       // expand it.
-      if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) {
-        Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-        unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
-        if (Align < ABIAlignment){
-          ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
-        }
-      }
+      if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+        ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain);
       break;
     }
     case TargetLowering::Custom: {
@@ -1092,23 +1105,20 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
           Chain = Res.getValue(1);
         }
       } else {
-        // If this is an unaligned load and the target doesn't support
-        // it, expand it.
+        // If this is an unaligned load and the target doesn't support it,
+        // expand it.
         EVT MemVT = LD->getMemoryVT();
         unsigned AS = LD->getAddressSpace();
         unsigned Align = LD->getAlignment();
-        if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) {
-          Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
-          unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(Ty);
-          if (Align < ABIAlignment){
-            ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
-          }
-        }
+        const DataLayout &DL = DAG.getDataLayout();
+        if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align))
+          ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain);
       }
       break;
     }
     case TargetLowering::Expand:
-      if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) {
+      EVT DestVT = Node->getValueType(0);
+      if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
         // If the source type is not legal, see if there is a legal extload to
         // an intermediate type that we can then extend further.
         EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
@@ -1127,6 +1137,23 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
           Chain = Load.getValue(1);
           break;
         }
+
+        // Handle the special case of fp16 extloads. EXTLOAD doesn't have the
+        // normal undefined upper bits behavior to allow using an in-reg extend
+        // with the illegal FP type, so load as an integer and do the
+        // from-integer conversion.
+        if (SrcVT.getScalarType() == MVT::f16) {
+          EVT ISrcVT = SrcVT.changeTypeToInteger();
+          EVT IDestVT = DestVT.changeTypeToInteger();
+          EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+
+          SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
+                                          Chain, Ptr, ISrcVT,
+                                          LD->getMemOperand());
+          Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
+          Chain = Result.getValue(1);
+          break;
+        }
       }
 
       assert(!SrcVT.isVector() &&
@@ -1180,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
 
 #ifndef NDEBUG
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-    assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
-             TargetLowering::TypeLegal &&
+    assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+              TargetLowering::TypeLegal ||
+            TLI.isTypeLegal(Node->getValueType(i))) &&
            "Unexpected illegal type!");
 
   for (const SDValue &Op : Node->op_values())
-    assert((TLI.getTypeAction(*DAG.getContext(),
-                              Op.getValueType()) == TargetLowering::TypeLegal ||
-                              Op.getOpcode() == ISD::TargetConstant) &&
-                              "Unexpected illegal type!");
+    assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
+              TargetLowering::TypeLegal ||
+            TLI.isTypeLegal(Op.getValueType()) ||
+            Op.getOpcode() == ISD::TargetConstant) &&
+            "Unexpected illegal type!");
 #endif
 
   // Figure out the correct action; the way to query this varies by opcode
@@ -1201,6 +1230,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::STACKSAVE:
     Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
     break;
+  case ISD::GET_DYNAMIC_AREA_OFFSET:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getValueType(0));
+    break;
   case ISD::VAARG:
     Action = TLI.getOperationAction(Node->getOpcode(),
                                     Node->getValueType(0));
@@ -1229,7 +1262,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::SETCC:
   case ISD::BR_CC: {
     unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
-                         Node->getOpcode() == ISD::SETCC ? 2 : 1;
+                         Node->getOpcode() == ISD::SETCC ? 2 :
+                         Node->getOpcode() == ISD::SETCCE ? 3 : 1;
     unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
     MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType();
     ISD::CondCode CCCode =
@@ -1265,6 +1299,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::FRAME_TO_ARGS_OFFSET:
   case ISD::EH_SJLJ_SETJMP:
   case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_SETUP_DISPATCH:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be expanded.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1281,6 +1316,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     if (Action == TargetLowering::Legal)
       Action = TargetLowering::Custom;
     break;
+  case ISD::READCYCLECOUNTER:
+    // READCYCLECOUNTER returns an i64, even if type legalization might have
+    // expanded that to several smaller types.
+    Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
+    break;
   case ISD::READ_REGISTER:
   case ISD::WRITE_REGISTER:
     // Named register is legal in the DAG, but blocked by register name
@@ -1379,7 +1419,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     }
       // FALL THROUGH
     case TargetLowering::Expand:
-      ExpandNode(Node);
+      if (ExpandNode(Node))
+        return;
+      // FALL THROUGH
+    case TargetLowering::LibCall:
+      ConvertNodeToLibcall(Node);
       return;
     case TargetLowering::Promote:
       PromoteNode(Node);
@@ -1419,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in
   // the vector. If all are expanded here, we don't want one store per vector
   // element.
+
+  // Caches for hasPredecessorHelper
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+
   SDValue StackPtr, Ch;
   for (SDNode::use_iterator UI = Vec.getNode()->use_begin(),
        UE = Vec.getNode()->use_end(); UI != UE; ++UI) {
@@ -1433,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
       if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode()))
         continue;
 
+      // If the index is dependent on the store we will introduce a cycle when
+      // creating the load (the load uses the index, and by replacing the chain
+      // we will make the index dependent on the load).
+      if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist))
+        continue;
+
       StackPtr = ST->getBasePtr();
       Ch = SDValue(ST, 0);
       break;
@@ -1490,7 +1545,8 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
 
   SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
   int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
 
   // First store the whole vector.
   SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
@@ -1528,7 +1584,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   SDLoc dl(Node);
   SDValue FIPtr = DAG.CreateStackTemporary(VT);
   int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
 
   // Emit a store of each element to the stack slot.
   SmallVector<SDValue, 8> Stores;
@@ -1568,69 +1625,143 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
                      false, false, false, 0);
 }
 
-SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
-  SDLoc dl(Node);
-  SDValue Tmp1 = Node->getOperand(0);
-  SDValue Tmp2 = Node->getOperand(1);
+namespace {
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+  EVT FloatVT;
+  SDValue Chain;
+  SDValue FloatPtr;
+  SDValue IntPtr;
+  MachinePointerInfo IntPointerInfo;
+  MachinePointerInfo FloatPointerInfo;
+  SDValue IntValue;
+  APInt SignMask;
+};
+}
 
-  // Get the sign bit of the RHS.  First obtain a value that has the same
-  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
-  SDValue SignBit;
-  EVT FloatVT = Tmp2.getValueType();
-  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+/// Bitcast a floating-point value to an integer value. Only bitcast the part
+/// containing the sign bit if the target has no integer value capable of
+/// holding all bits of the floating-point value.
+void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
+                                             SDLoc DL, SDValue Value) const {
+  EVT FloatVT = Value.getValueType();
+  unsigned NumBits = FloatVT.getSizeInBits();
+  State.FloatVT = FloatVT;
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+  // Convert to an integer of the same size.
   if (TLI.isTypeLegal(IVT)) {
-    // Convert to an integer with the same sign bit.
-    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
-  } else {
-    auto &DL = DAG.getDataLayout();
-    // Store the float to memory, then load the sign part out as an integer.
-    MVT LoadTy = TLI.getPointerTy(DL);
-    // First create a temporary that is aligned for both the load and store.
-    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
-    // Then store the float to it.
-    SDValue Ch =
-      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
-                   false, false, 0);
-    if (DL.isBigEndian()) {
-      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
-      // Load out a legal integer with the same sign bit as the float.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
-                            false, false, false, 0);
-    } else { // Little endian
-      SDValue LoadPtr = StackPtr;
-      // The float may be wider than the integer we are going to load.  Advance
-      // the pointer so that the loaded integer will contain the sign bit.
-      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
-      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
-      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), LoadPtr,
-                           DAG.getConstant(ByteOffset, dl,
-                                           LoadPtr.getValueType()));
-      // Load a legal integer containing the sign bit.
-      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
-                            false, false, false, 0);
-      // Move the sign bit to the top bit of the loaded integer.
-      unsigned BitShift = LoadTy.getSizeInBits() -
-        (FloatVT.getSizeInBits() - 8 * ByteOffset);
-      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
-      if (BitShift)
-        SignBit = DAG.getNode(
-            ISD::SHL, dl, LoadTy, SignBit,
-            DAG.getConstant(BitShift, dl,
-                            TLI.getShiftAmountTy(SignBit.getValueType(), DL)));
-    }
+    State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value);
+    State.SignMask = APInt::getSignBit(NumBits);
+    return;
   }
-  // Now get the sign bit proper, by seeing whether the value is negative.
-  SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()),
-                         SignBit,
-                         DAG.getConstant(0, dl, SignBit.getValueType()),
-                         ISD::SETLT);
-  // Get the absolute value of the result.
-  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
-  // Select between the nabs and abs value based on the sign bit of
-  // the input.
-  return DAG.getSelect(dl, AbsVal.getValueType(), SignBit,
-                      DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
-                      AbsVal);
+
+  auto &DataLayout = DAG.getDataLayout();
+  // Store the float to memory, then load the sign part out as an integer.
+  MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+  // First create a temporary that is aligned for both the load and store.
+  SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  // Then store the float to it.
+  State.FloatPtr = StackPtr;
+  MachineFunction &MF = DAG.getMachineFunction();
+  State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI);
+  State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr,
+                             State.FloatPointerInfo, false, false, 0);
+
+  SDValue IntPtr;
+  if (DataLayout.isBigEndian()) {
+    assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+    // Load out a legal integer with the same sign bit as the float.
+    IntPtr = StackPtr;
+    State.IntPointerInfo = State.FloatPointerInfo;
+  } else {
+    // Advance the pointer so that the loaded byte will contain the sign bit.
+    unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1;
+    IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
+                      DAG.getConstant(ByteOffset, DL, StackPtr.getValueType()));
+    State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI,
+                                                             ByteOffset);
+  }
+
+  State.IntPtr = IntPtr;
+  State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain,
+                                  IntPtr, State.IntPointerInfo, MVT::i8,
+                                  false, false, false, 0);
+  State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7);
+}
+
+/// Replace the integer value produced by getSignAsIntValue() with a new value
+/// and cast the result back to a floating-point type.
+SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State,
+                                          SDLoc DL, SDValue NewIntValue) const {
+  if (!State.Chain)
+    return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue);
+
+  // Override the part containing the sign bit in the value stored on the stack.
+  SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr,
+                                    State.IntPointerInfo, MVT::i8, false, false,
+                                    0);
+  return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr,
+                     State.FloatPointerInfo, false, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const {
+  SDLoc DL(Node);
+  SDValue Mag = Node->getOperand(0);
+  SDValue Sign = Node->getOperand(1);
+
+  // Get sign bit into an integer value.
+  FloatSignAsInt SignAsInt;
+  getSignAsIntValue(SignAsInt, DL, Sign);
+
+  EVT IntVT = SignAsInt.IntValue.getValueType();
+  SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT);
+  SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue,
+                                SignMask);
+
+  // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X)
+  EVT FloatVT = Mag.getValueType();
+  if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) {
+    SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag);
+    SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue);
+    SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit,
+                                DAG.getConstant(0, DL, IntVT), ISD::SETNE);
+    return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue);
+  }
+
+  // Transform values to integer, copy the sign bit and transform back.
+  FloatSignAsInt MagAsInt;
+  getSignAsIntValue(MagAsInt, DL, Mag);
+  assert(SignAsInt.SignMask == MagAsInt.SignMask);
+  SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue,
+                                    ClearSignMask);
+  SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit);
+
+  return modifySignAsInt(MagAsInt, DL, CopiedSign);
+}
+
+SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const {
+  SDLoc DL(Node);
+  SDValue Value = Node->getOperand(0);
+
+  // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal.
+  EVT FloatVT = Value.getValueType();
+  if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) {
+    SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT);
+    return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero);
+  }
+
+  // Transform value to integer, clear the sign bit and transform back.
+  FloatSignAsInt ValueAsInt;
+  getSignAsIntValue(ValueAsInt, DL, Value);
+  EVT IntVT = ValueAsInt.IntValue.getValueType();
+  SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT);
+  SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue,
+                                    ClearSignMask);
+  return modifySignAsInt(ValueAsInt, DL, ClearedSign);
 }
 
 void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
@@ -1798,7 +1929,8 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
 
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
   int SPFI = StackPtrFI->getIndex();
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
 
   unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1838,14 +1970,14 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
   FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
   int SPFI = StackPtrFI->getIndex();
 
-  SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
-                                 StackPtr,
-                                 MachinePointerInfo::getFixedStack(SPFI),
-                                 Node->getValueType(0).getVectorElementType(),
-                                 false, false, 0);
-  return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
-                     MachinePointerInfo::getFixedStack(SPFI),
-                     false, false, false, 0);
+  SDValue Ch = DAG.getTruncStore(
+      DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI),
+      Node->getValueType(0).getVectorElementType(), false, false, 0);
+  return DAG.getLoad(
+      Node->getValueType(0), dl, Ch, StackPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false,
+      false, false, 0);
 }
 
 static bool
@@ -2011,9 +2143,10 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
     SDValue CPIdx =
         DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout()));
     unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-    return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
-                       MachinePointerInfo::getConstantPool(),
-                       false, false, false, Alignment);
+    return DAG.getLoad(
+        VT, dl, DAG.getEntryNode(), CPIdx,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, Alignment);
   }
 
   SmallSet<SDValue, 16> DefinedValues;
@@ -2205,47 +2338,6 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
   return ExpandLibCall(LC, Node, isSigned);
 }
 
-/// Return true if divmod libcall is available.
-static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
-                                     const TargetLowering &TLI) {
-  RTLIB::Libcall LC;
-  switch (Node->getSimpleValueType(0).SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
-  case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
-  case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
-  case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
-  case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
-  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
-  }
-
-  return TLI.getLibcallName(LC) != nullptr;
-}
-
-/// Only issue divrem libcall if both quotient and remainder are needed.
-static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) {
-  // The other use might have been replaced with a divrem already.
-  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
-  unsigned OtherOpcode = 0;
-  if (isSigned)
-    OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV;
-  else
-    OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV;
-
-  SDValue Op0 = Node->getOperand(0);
-  SDValue Op1 = Node->getOperand(1);
-  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
-         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
-    SDNode *User = *UI;
-    if (User == Node)
-      continue;
-    if ((User->getOpcode() == OtherOpcode || User->getOpcode() == DivRemOpc) &&
-        User->getOperand(0) == Op0 &&
-        User->getOperand(1) == Op1)
-      return true;
-  }
-  return false;
-}
-
 /// Issue libcalls to __{u}divmod to compute div / rem pairs.
 void
 SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2428,6 +2520,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
                                                    SDValue Op0,
                                                    EVT DestVT,
                                                    SDLoc dl) {
+  // TODO: Should any fast-math-flags be set for the created nodes?
+  
   if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
     // simple 32-bit [signed|unsigned] integer to float/double expansion
 
@@ -2611,14 +2705,15 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
   Alignment = std::min(Alignment, 4u);
   SDValue FudgeInReg;
   if (DestVT == MVT::f32)
-    FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
-                             MachinePointerInfo::getConstantPool(),
-                             false, false, false, Alignment);
+    FudgeInReg = DAG.getLoad(
+        MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, Alignment);
   else {
-    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
-                                  DAG.getEntryNode(), CPIdx,
-                                  MachinePointerInfo::getConstantPool(),
-                                  MVT::f32, false, false, false, Alignment);
+    SDValue Load = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+        false, false, false, Alignment);
     HandleSDNode Handle(Load);
     LegalizeOp(Load.getNode());
     FudgeInReg = Handle.getValue();
@@ -2713,6 +2808,31 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
   return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
 }
 
+/// Open code the operations for BITREVERSE.
+SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) {
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+  unsigned Sz = VT.getScalarSizeInBits();
+  
+  SDValue Tmp, Tmp2;
+  Tmp = DAG.getConstant(0, dl, VT);
+  for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
+    if (I < J)
+      Tmp2 =
+          DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
+    else
+      Tmp2 =
+          DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
+    
+    APInt Shift(Sz, 1);
+    Shift = Shift.shl(J);
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
+  }
+
+  return Tmp;
+}
+
 /// Open code the operations for BSWAP of the specified operation.
 SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) {
   EVT VT = Op.getValueType();
@@ -2865,16 +2985,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
   }
 }
 
-std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
-  unsigned Opc = Node->getOpcode();
-  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
-  RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
-
-  return ExpandChainLibCall(LC, Node, false);
-}
-
-void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   SmallVector<SDValue, 8> Results;
   SDLoc dl(Node);
   SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2888,6 +2999,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
     Results.push_back(Tmp1);
     break;
+  case ISD::BITREVERSE:
+    Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl));
+    break;
   case ISD::BSWAP:
     Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
     break;
@@ -2908,30 +3022,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // preserve the chain and be done.
     Results.push_back(Node->getOperand(0));
     break;
+  case ISD::READCYCLECOUNTER:
+    // If the target didn't expand this, just return 'zero' and preserve the
+    // chain.
+    Results.append(Node->getNumValues() - 1,
+                   DAG.getConstant(0, dl, Node->getValueType(0)));
+    Results.push_back(Node->getOperand(0));
+    break;
   case ISD::EH_SJLJ_SETJMP:
     // If the target didn't expand this, just return 'zero' and preserve the
     // chain.
     Results.push_back(DAG.getConstant(0, dl, MVT::i32));
     Results.push_back(Node->getOperand(0));
     break;
-  case ISD::ATOMIC_FENCE: {
-    // If the target didn't lower this, lower it to '__sync_synchronize()' call
-    // FIXME: handle "fence singlethread" more efficiently.
-    TargetLowering::ArgListTy Args;
-
-    TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl)
-        .setChain(Node->getOperand(0))
-        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                   DAG.getExternalSymbol("__sync_synchronize",
-                                         TLI.getPointerTy(DAG.getDataLayout())),
-                   std::move(Args), 0);
-
-    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
-    Results.push_back(CallResult.second);
-    break;
-  }
   case ISD::ATOMIC_LOAD: {
     // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP.
     SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0));
@@ -2959,26 +3062,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Swap.getValue(1));
     break;
   }
-  // By default, atomic intrinsics are marked Legal and lowered. Targets
-  // which don't support them directly, however, may want libcalls, in which
-  // case they mark them Expand, and we get here.
-  case ISD::ATOMIC_SWAP:
-  case ISD::ATOMIC_LOAD_ADD:
-  case ISD::ATOMIC_LOAD_SUB:
-  case ISD::ATOMIC_LOAD_AND:
-  case ISD::ATOMIC_LOAD_OR:
-  case ISD::ATOMIC_LOAD_XOR:
-  case ISD::ATOMIC_LOAD_NAND:
-  case ISD::ATOMIC_LOAD_MIN:
-  case ISD::ATOMIC_LOAD_MAX:
-  case ISD::ATOMIC_LOAD_UMIN:
-  case ISD::ATOMIC_LOAD_UMAX:
-  case ISD::ATOMIC_CMP_SWAP: {
-    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
-    Results.push_back(Tmp.first);
-    Results.push_back(Tmp.second);
-    break;
-  }
   case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
     // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and
     // splits out the success value as a comparison. Expanding the resulting
@@ -3017,21 +3100,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     }
     break;
   }
-  case ISD::TRAP: {
-    // If this operation is not supported, lower it to 'abort()' call
-    TargetLowering::ArgListTy Args;
-    TargetLowering::CallLoweringInfo CLI(DAG);
-    CLI.setDebugLoc(dl)
-        .setChain(Node->getOperand(0))
-        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
-                   DAG.getExternalSymbol("abort",
-                                         TLI.getPointerTy(DAG.getDataLayout())),
-                   std::move(Args), 0);
-    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
-
-    Results.push_back(CallResult.second);
-    break;
-  }
   case ISD::FP_ROUND:
   case ISD::BITCAST:
     Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
@@ -3097,6 +3165,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                         Node->getOperand(0),
                         Tmp1, ISD::SETLT);
     True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+    // TODO: Should any fast-math-flags be set for the FSUB?
     False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
                         DAG.getNode(ISD::FSUB, dl, VT,
                                     Node->getOperand(0), Tmp1));
@@ -3106,57 +3175,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   }
-  case ISD::VAARG: {
-    const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
-    EVT VT = Node->getValueType(0);
-    Tmp1 = Node->getOperand(0);
-    Tmp2 = Node->getOperand(1);
-    unsigned Align = Node->getConstantOperandVal(3);
-
-    SDValue VAListLoad =
-        DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl, Tmp1, Tmp2,
-                    MachinePointerInfo(V), false, false, false, 0);
-    SDValue VAList = VAListLoad;
-
-    if (Align > TLI.getMinStackArgumentAlignment()) {
-      assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
-      VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(Align - 1, dl,
-                                           VAList.getValueType()));
-
-      VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList,
-                           DAG.getConstant(-(int64_t)Align, dl,
-                                           VAList.getValueType()));
-    }
-
-    // Increment the pointer, VAList, to the next vaarg
-    Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
-                       DAG.getConstant(DAG.getDataLayout().getTypeAllocSize(
-                                           VT.getTypeForEVT(*DAG.getContext())),
-                                       dl, VAList.getValueType()));
-    // Store the incremented VAList to the legalized pointer
-    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
-                        MachinePointerInfo(V), false, false, 0);
-    // Load the actual argument out of the pointer VAList
-    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
-                                  false, false, false, 0));
+  case ISD::VAARG:
+    Results.push_back(DAG.expandVAArg(Node));
     Results.push_back(Results[0].getValue(1));
     break;
-  }
-  case ISD::VACOPY: {
-    // This defaults to loading a pointer from the input and storing it to the
-    // output, returning the chain.
-    const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
-    const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
-    Tmp1 = DAG.getLoad(TLI.getPointerTy(DAG.getDataLayout()), dl,
-                       Node->getOperand(0), Node->getOperand(2),
-                       MachinePointerInfo(VS), false, false, false, 0);
-    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
-                        MachinePointerInfo(VD), false, false, 0);
-    Results.push_back(Tmp1);
+  case ISD::VACOPY:
+    Results.push_back(DAG.expandVACopy(Node));
     break;
-  }
   case ISD::EXTRACT_VECTOR_ELT:
     if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
       // This must be an access of the only element.  Return it.
@@ -3302,28 +3327,24 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Results.push_back(Node->getOperand(0));
     }
     break;
+  case ISD::GET_DYNAMIC_AREA_OFFSET:
+    Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(0));
+    break;
   case ISD::FCOPYSIGN:
     Results.push_back(ExpandFCOPYSIGN(Node));
     break;
   case ISD::FNEG:
     // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
     Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0));
+    // TODO: If FNEG has fast-math-flags, propagate them to the FSUB.
     Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
                        Node->getOperand(0));
     Results.push_back(Tmp1);
     break;
-  case ISD::FABS: {
-    // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
-    EVT VT = Node->getValueType(0);
-    Tmp1 = Node->getOperand(0);
-    Tmp2 = DAG.getConstantFP(0.0, dl, VT);
-    Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()),
-                        Tmp1, Tmp2, ISD::SETUGT);
-    Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
-    Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3);
-    Results.push_back(Tmp1);
+  case ISD::FABS:
+    Results.push_back(ExpandFABS(Node));
     break;
-  }
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
@@ -3344,25 +3365,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
     
-  case ISD::FMINNUM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
-                                      RTLIB::FMIN_F80, RTLIB::FMIN_F128,
-                                      RTLIB::FMIN_PPCF128));
-    break;
-  case ISD::FMAXNUM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
-                                      RTLIB::FMAX_F80, RTLIB::FMAX_F128,
-                                      RTLIB::FMAX_PPCF128));
-    break;
-  case ISD::FSQRT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
-                                      RTLIB::SQRT_F80, RTLIB::SQRT_F128,
-                                      RTLIB::SQRT_PPCF128));
-    break;
   case ISD::FSIN:
   case ISD::FCOS: {
     EVT VT = Node->getValueType(0);
-    bool isSIN = Node->getOpcode() == ISD::FSIN;
     // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin /
     // fcos which share the same operand and both are used.
     if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) ||
@@ -3370,137 +3375,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         && useSinCos(Node)) {
       SDVTList VTs = DAG.getVTList(VT, VT);
       Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0));
-      if (!isSIN)
+      if (Node->getOpcode() == ISD::FCOS)
         Tmp1 = Tmp1.getValue(1);
       Results.push_back(Tmp1);
-    } else if (isSIN) {
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
-                                        RTLIB::SIN_F80, RTLIB::SIN_F128,
-                                        RTLIB::SIN_PPCF128));
-    } else {
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
-                                        RTLIB::COS_F80, RTLIB::COS_F128,
-                                        RTLIB::COS_PPCF128));
     }
     break;
   }
-  case ISD::FSINCOS:
-    // Expand into sincos libcall.
-    ExpandSinCosLibCall(Node, Results);
-    break;
-  case ISD::FLOG:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
-                                      RTLIB::LOG_F80, RTLIB::LOG_F128,
-                                      RTLIB::LOG_PPCF128));
-    break;
-  case ISD::FLOG2:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
-                                      RTLIB::LOG2_F80, RTLIB::LOG2_F128,
-                                      RTLIB::LOG2_PPCF128));
-    break;
-  case ISD::FLOG10:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
-                                      RTLIB::LOG10_F80, RTLIB::LOG10_F128,
-                                      RTLIB::LOG10_PPCF128));
-    break;
-  case ISD::FEXP:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
-                                      RTLIB::EXP_F80, RTLIB::EXP_F128,
-                                      RTLIB::EXP_PPCF128));
-    break;
-  case ISD::FEXP2:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
-                                      RTLIB::EXP2_F80, RTLIB::EXP2_F128,
-                                      RTLIB::EXP2_PPCF128));
-    break;
-  case ISD::FTRUNC:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
-                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
-                                      RTLIB::TRUNC_PPCF128));
-    break;
-  case ISD::FFLOOR:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
-                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
-                                      RTLIB::FLOOR_PPCF128));
-    break;
-  case ISD::FCEIL:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
-                                      RTLIB::CEIL_F80, RTLIB::CEIL_F128,
-                                      RTLIB::CEIL_PPCF128));
-    break;
-  case ISD::FRINT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
-                                      RTLIB::RINT_F80, RTLIB::RINT_F128,
-                                      RTLIB::RINT_PPCF128));
-    break;
-  case ISD::FNEARBYINT:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
-                                      RTLIB::NEARBYINT_F64,
-                                      RTLIB::NEARBYINT_F80,
-                                      RTLIB::NEARBYINT_F128,
-                                      RTLIB::NEARBYINT_PPCF128));
-    break;
-  case ISD::FROUND:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
-                                      RTLIB::ROUND_F64,
-                                      RTLIB::ROUND_F80,
-                                      RTLIB::ROUND_F128,
-                                      RTLIB::ROUND_PPCF128));
-    break;
-  case ISD::FPOWI:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
-                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
-                                      RTLIB::POWI_PPCF128));
-    break;
-  case ISD::FPOW:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
-                                      RTLIB::POW_F80, RTLIB::POW_F128,
-                                      RTLIB::POW_PPCF128));
-    break;
-  case ISD::FDIV:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
-                                      RTLIB::DIV_F80, RTLIB::DIV_F128,
-                                      RTLIB::DIV_PPCF128));
-    break;
-  case ISD::FREM:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
-                                      RTLIB::REM_F80, RTLIB::REM_F128,
-                                      RTLIB::REM_PPCF128));
-    break;
-  case ISD::FMA:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
-                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
-                                      RTLIB::FMA_PPCF128));
-    break;
   case ISD::FMAD:
     llvm_unreachable("Illegal fmad should never be formed");
 
-  case ISD::FADD:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
-                                      RTLIB::ADD_F80, RTLIB::ADD_F128,
-                                      RTLIB::ADD_PPCF128));
-    break;
-  case ISD::FMUL:
-    Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
-                                      RTLIB::MUL_F80, RTLIB::MUL_F128,
-                                      RTLIB::MUL_PPCF128));
-    break;
-  case ISD::FP16_TO_FP: {
-    if (Node->getValueType(0) == MVT::f32) {
-      Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
-      break;
+  case ISD::FP16_TO_FP:
+    if (Node->getValueType(0) != MVT::f32) {
+      // We can extend to types bigger than f32 in two steps without changing
+      // the result. Since "f16 -> f32" is much more commonly available, give
+      // CodeGen the option of emitting that before resorting to a libcall.
+      SDValue Res =
+          DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
+      Results.push_back(
+          DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
     }
-
-    // We can extend to types bigger than f32 in two steps without changing the
-    // result. Since "f16 -> f32" is much more commonly available, give CodeGen
-    // the option of emitting that before resorting to a libcall.
-    SDValue Res =
-        DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0));
-    Results.push_back(
-        DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res));
     break;
-  }
-  case ISD::FP_TO_FP16: {
+  case ISD::FP_TO_FP16:
     if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
       SDValue Op = Node->getOperand(0);
       MVT SVT = Op.getSimpleValueType();
@@ -3512,16 +3407,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                        DAG.getIntPtrConstant(0, dl));
         Results.push_back(
             DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal));
-        break;
       }
     }
-
-    RTLIB::Libcall LC =
-        RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
-    Results.push_back(ExpandLibCall(LC, Node, false));
     break;
-  }
   case ISD::ConstantFP: {
     ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
     // Check to see if this FP immediate is already legal.
@@ -3530,17 +3418,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
+  case ISD::Constant: {
+    ConstantSDNode *CP = cast<ConstantSDNode>(Node);
+    Results.push_back(ExpandConstant(CP));
+    break;
+  }
   case ISD::FSUB: {
     EVT VT = Node->getValueType(0);
     if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) &&
         TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) {
+      const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags;
       Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1));
-      Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1);
+      Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags);
       Results.push_back(Tmp1);
-    } else {
-      Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
-                                        RTLIB::SUB_F80, RTLIB::SUB_F128,
-                                        RTLIB::SUB_PPCF128));
     }
     break;
   }
@@ -3564,29 +3454,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     Tmp2 = Node->getOperand(0);
     Tmp3 = Node->getOperand(1);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
-        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
-         // If div is legal, it's better to do the normal expansion
-         !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) &&
-         useDivRem(Node, isSigned, false))) {
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
       SDVTList VTs = DAG.getVTList(VT, VT);
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+      Results.push_back(Tmp1);
     } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
       // X % Y -> X-X/Y*Y
       Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
       Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
       Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
-    } else if (isSigned)
-      Tmp1 = ExpandIntLibCall(Node, true,
-                              RTLIB::SREM_I8,
-                              RTLIB::SREM_I16, RTLIB::SREM_I32,
-                              RTLIB::SREM_I64, RTLIB::SREM_I128);
-    else
-      Tmp1 = ExpandIntLibCall(Node, false,
-                              RTLIB::UREM_I8,
-                              RTLIB::UREM_I16, RTLIB::UREM_I32,
-                              RTLIB::UREM_I64, RTLIB::UREM_I128);
-    Results.push_back(Tmp1);
+      Results.push_back(Tmp1);
+    }
     break;
   }
   case ISD::UDIV:
@@ -3594,23 +3472,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     bool isSigned = Node->getOpcode() == ISD::SDIV;
     unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
     EVT VT = Node->getValueType(0);
-    SDVTList VTs = DAG.getVTList(VT, VT);
-    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) ||
-        (isDivRemLibcallAvailable(Node, isSigned, TLI) &&
-         useDivRem(Node, isSigned, true)))
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+      SDVTList VTs = DAG.getVTList(VT, VT);
       Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
                          Node->getOperand(1));
-    else if (isSigned)
-      Tmp1 = ExpandIntLibCall(Node, true,
-                              RTLIB::SDIV_I8,
-                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
-                              RTLIB::SDIV_I64, RTLIB::SDIV_I128);
-    else
-      Tmp1 = ExpandIntLibCall(Node, false,
-                              RTLIB::UDIV_I8,
-                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
-                              RTLIB::UDIV_I64, RTLIB::UDIV_I128);
-    Results.push_back(Tmp1);
+      Results.push_back(Tmp1);
+    }
     break;
   }
   case ISD::MULHU:
@@ -3626,11 +3493,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1.getValue(1));
     break;
   }
-  case ISD::SDIVREM:
-  case ISD::UDIVREM:
-    // Expand into divrem libcall
-    ExpandDivRemLibCall(Node, Results);
-    break;
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3673,14 +3535,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                           TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
       Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
       Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
-      break;
     }
-
-    Tmp1 = ExpandIntLibCall(Node, false,
-                            RTLIB::MUL_I8,
-                            RTLIB::MUL_I16, RTLIB::MUL_I32,
-                            RTLIB::MUL_I64, RTLIB::MUL_I128);
-    Results.push_back(Tmp1);
     break;
   }
   case ISD::SADDO:
@@ -3867,9 +3722,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                Index, Table);
 
     EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
-                                MachinePointerInfo::getJumpTable(), MemVT,
-                                false, false, false, 0);
+    SDValue LD = DAG.getExtLoad(
+        ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+        MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT,
+        false, false, false, 0);
     Addr = LD;
     if (TM.getRelocationModel() == Reloc::PIC_) {
       // For PIC, the sequence is:
@@ -4091,17 +3947,277 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     break;
   }
 
+  // Replace the original node with the legalized result.
+  if (Results.empty())
+    return false;
+
+  ReplaceNode(Node, Results.data());
+  return true;
+}
+
+void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+  SmallVector<SDValue, 8> Results;
+  SDLoc dl(Node);
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  unsigned Opc = Node->getOpcode();
+  switch (Opc) {
+  case ISD::ATOMIC_FENCE: {
+    // If the target didn't lower this, lower it to '__sync_synchronize()' call
+    // FIXME: handle "fence singlethread" more efficiently.
+    TargetLowering::ArgListTy Args;
+
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(dl)
+        .setChain(Node->getOperand(0))
+        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol("__sync_synchronize",
+                                         TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args), 0);
+
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+    Results.push_back(CallResult.second);
+    break;
+  }
+  // By default, atomic intrinsics are marked Legal and lowered. Targets
+  // which don't support them directly, however, may want libcalls, in which
+  // case they mark them Expand, and we get here.
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_CMP_SWAP: {
+    MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+    RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!");
+
+    std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false);
+    Results.push_back(Tmp.first);
+    Results.push_back(Tmp.second);
+    break;
+  }
+  case ISD::TRAP: {
+    // If this operation is not supported, lower it to 'abort()' call
+    TargetLowering::ArgListTy Args;
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(dl)
+        .setChain(Node->getOperand(0))
+        .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol("abort",
+                                         TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args), 0);
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+
+    Results.push_back(CallResult.second);
+    break;
+  }
+  case ISD::FMINNUM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64,
+                                      RTLIB::FMIN_F80, RTLIB::FMIN_F128,
+                                      RTLIB::FMIN_PPCF128));
+    break;
+  case ISD::FMAXNUM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
+                                      RTLIB::FMAX_F80, RTLIB::FMAX_F128,
+                                      RTLIB::FMAX_PPCF128));
+    break;
+  case ISD::FSQRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_F128,
+                                      RTLIB::SQRT_PPCF128));
+    break;
+  case ISD::FSIN:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                      RTLIB::SIN_F80, RTLIB::SIN_F128,
+                                      RTLIB::SIN_PPCF128));
+    break;
+  case ISD::FCOS:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                                      RTLIB::COS_F80, RTLIB::COS_F128,
+                                      RTLIB::COS_PPCF128));
+    break;
+  case ISD::FSINCOS:
+    // Expand into sincos libcall.
+    ExpandSinCosLibCall(Node, Results);
+    break;
+  case ISD::FLOG:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                      RTLIB::LOG_F80, RTLIB::LOG_F128,
+                                      RTLIB::LOG_PPCF128));
+    break;
+  case ISD::FLOG2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_F128,
+                                      RTLIB::LOG2_PPCF128));
+    break;
+  case ISD::FLOG10:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_F128,
+                                      RTLIB::LOG10_PPCF128));
+    break;
+  case ISD::FEXP:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                      RTLIB::EXP_F80, RTLIB::EXP_F128,
+                                      RTLIB::EXP_PPCF128));
+    break;
+  case ISD::FEXP2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_F128,
+                                      RTLIB::EXP2_PPCF128));
+    break;
+  case ISD::FTRUNC:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_F128,
+                                      RTLIB::TRUNC_PPCF128));
+    break;
+  case ISD::FFLOOR:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_F128,
+                                      RTLIB::FLOOR_PPCF128));
+    break;
+  case ISD::FCEIL:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_F128,
+                                      RTLIB::CEIL_PPCF128));
+    break;
+  case ISD::FRINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                      RTLIB::RINT_F80, RTLIB::RINT_F128,
+                                      RTLIB::RINT_PPCF128));
+    break;
+  case ISD::FNEARBYINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+                                      RTLIB::NEARBYINT_F64,
+                                      RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_F128,
+                                      RTLIB::NEARBYINT_PPCF128));
+    break;
+  case ISD::FROUND:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32,
+                                      RTLIB::ROUND_F64,
+                                      RTLIB::ROUND_F80,
+                                      RTLIB::ROUND_F128,
+                                      RTLIB::ROUND_PPCF128));
+    break;
+  case ISD::FPOWI:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                      RTLIB::POWI_F80, RTLIB::POWI_F128,
+                                      RTLIB::POWI_PPCF128));
+    break;
+  case ISD::FPOW:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+                                      RTLIB::POW_F80, RTLIB::POW_F128,
+                                      RTLIB::POW_PPCF128));
+    break;
+  case ISD::FDIV:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+                                      RTLIB::DIV_F80, RTLIB::DIV_F128,
+                                      RTLIB::DIV_PPCF128));
+    break;
+  case ISD::FREM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+                                      RTLIB::REM_F80, RTLIB::REM_F128,
+                                      RTLIB::REM_PPCF128));
+    break;
+  case ISD::FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
+    break;
+  case ISD::FADD:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
+                                      RTLIB::ADD_F80, RTLIB::ADD_F128,
+                                      RTLIB::ADD_PPCF128));
+    break;
+  case ISD::FMUL:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
+                                      RTLIB::MUL_F80, RTLIB::MUL_F128,
+                                      RTLIB::MUL_PPCF128));
+    break;
+  case ISD::FP16_TO_FP:
+    if (Node->getValueType(0) == MVT::f32) {
+      Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+    }
+    break;
+  case ISD::FP_TO_FP16: {
+    RTLIB::Libcall LC =
+        RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
+    Results.push_back(ExpandLibCall(LC, Node, false));
+    break;
+  }
+  case ISD::FSUB:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
+                                      RTLIB::SUB_F80, RTLIB::SUB_F128,
+                                      RTLIB::SUB_PPCF128));
+    break;
+  case ISD::SREM:
+    Results.push_back(ExpandIntLibCall(Node, true,
+                                       RTLIB::SREM_I8,
+                                       RTLIB::SREM_I16, RTLIB::SREM_I32,
+                                       RTLIB::SREM_I64, RTLIB::SREM_I128));
+    break;
+  case ISD::UREM:
+    Results.push_back(ExpandIntLibCall(Node, false,
+                                       RTLIB::UREM_I8,
+                                       RTLIB::UREM_I16, RTLIB::UREM_I32,
+                                       RTLIB::UREM_I64, RTLIB::UREM_I128));
+    break;
+  case ISD::SDIV:
+    Results.push_back(ExpandIntLibCall(Node, true,
+                                       RTLIB::SDIV_I8,
+                                       RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+                                       RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+    break;
+  case ISD::UDIV:
+    Results.push_back(ExpandIntLibCall(Node, false,
+                                       RTLIB::UDIV_I8,
+                                       RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+                                       RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+    break;
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    // Expand into divrem libcall
+    ExpandDivRemLibCall(Node, Results);
+    break;
+  case ISD::MUL:
+    Results.push_back(ExpandIntLibCall(Node, false,
+                                       RTLIB::MUL_I8,
+                                       RTLIB::MUL_I16, RTLIB::MUL_I32,
+                                       RTLIB::MUL_I64, RTLIB::MUL_I128));
+    break;
+  }
+
   // Replace the original node with the legalized result.
   if (!Results.empty())
     ReplaceNode(Node, Results.data());
 }
 
+// Determine the vector type to use in place of an original scalar element when
+// promoting equally sized vectors.
+static MVT getPromotedVectorElementType(const TargetLowering &TLI,
+                                        MVT EltVT, MVT NewEltVT) {
+  unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits();
+  MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt);
+  assert(TLI.isTypeLegal(MidVT) && "unexpected");
+  return MidVT;
+}
+
 void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   SmallVector<SDValue, 8> Results;
   MVT OVT = Node->getSimpleValueType(0);
   if (Node->getOpcode() == ISD::UINT_TO_FP ||
       Node->getOpcode() == ISD::SINT_TO_FP ||
-      Node->getOpcode() == ISD::SETCC) {
+      Node->getOpcode() == ISD::SETCC ||
+      Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+      Node->getOpcode() == ISD::INSERT_VECTOR_ELT) {
     OVT = Node->getOperand(0).getSimpleValueType();
   }
   if (Node->getOpcode() == ISD::BR_CC)
@@ -4284,11 +4400,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
   case ISD::FREM:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
-  case ISD::FCOPYSIGN:
   case ISD::FPOW: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
-    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
+                       Node->getFlags());
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
                                   Tmp3, DAG.getIntPtrConstant(0, dl)));
     break;
@@ -4303,12 +4419,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                     DAG.getIntPtrConstant(0, dl)));
     break;
   }
+  case ISD::FCOPYSIGN:
   case ISD::FPOWI: {
     Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
     Tmp2 = Node->getOperand(1);
     Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+
+    // fcopysign doesn't change anything but the sign bit, so
+    //   (fp_round (fcopysign (fpext a), b))
+    // is as precise as
+    //   (fp_round (fpext a))
+    // which is a no-op. Mark it as a TRUNCating FP_ROUND.
+    const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN);
     Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
-                                  Tmp3, DAG.getIntPtrConstant(0, dl)));
+                                  Tmp3, DAG.getIntPtrConstant(isTrunc, dl)));
     break;
   }
   case ISD::FFLOOR:
@@ -4333,6 +4457,157 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
                                   Tmp2, DAG.getIntPtrConstant(0, dl)));
     break;
   }
+  case ISD::BUILD_VECTOR: {
+    MVT EltVT = OVT.getVectorElementType();
+    MVT NewEltVT = NVT.getVectorElementType();
+
+    // Handle bitcasts to a different vector type with the same total bit size
+    //
+    // e.g. v2i64 = build_vector i64:x, i64:y => v4i32
+    //  =>
+    //  v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y))
+
+    assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+           "Invalid promote type for build_vector");
+    assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+    MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+
+    SmallVector<SDValue, 8> NewOps;
+    for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) {
+      SDValue Op = Node->getOperand(I);
+      NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op));
+    }
+
+    SDLoc SL(Node);
+    SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps);
+    SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+    Results.push_back(CvtVec);
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    MVT EltVT = OVT.getVectorElementType();
+    MVT NewEltVT = NVT.getVectorElementType();
+
+    // Handle bitcasts to a different vector type with the same total bit size.
+    //
+    // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32
+    //  =>
+    //  v4i32:castx = bitcast x:v2i64
+    //
+    // i64 = bitcast
+    //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
+    //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
+    //
+
+    assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+           "Invalid promote type for extract_vector_elt");
+    assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+    MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+    unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+    SDValue Idx = Node->getOperand(1);
+    EVT IdxVT = Idx.getValueType();
+    SDLoc SL(Node);
+    SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT);
+    SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+    SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+
+    SmallVector<SDValue, 8> NewOps;
+    for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+      SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+      SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+                                CastVec, TmpIdx);
+      NewOps.push_back(Elt);
+    }
+
+    SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps);
+
+    Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec));
+    break;
+  }
+  case ISD::INSERT_VECTOR_ELT: {
+    MVT EltVT = OVT.getVectorElementType();
+    MVT NewEltVT = NVT.getVectorElementType();
+
+    // Handle bitcasts to a different vector type with the same total bit size
+    //
+    // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32
+    //  =>
+    //  v4i32:castx = bitcast x:v2i64
+    //  v2i32:casty = bitcast y:i64
+    //
+    // v2i64 = bitcast
+    //   (v4i32 insert_vector_elt
+    //       (v4i32 insert_vector_elt v4i32:castx,
+    //                                (extract_vector_elt casty, 0), 2 * z),
+    //        (extract_vector_elt casty, 1), (2 * z + 1))
+
+    assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() &&
+           "Invalid promote type for insert_vector_elt");
+    assert(NewEltVT.bitsLT(EltVT) && "not handled");
+
+    MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+    unsigned NewEltsPerOldElt = MidVT.getVectorNumElements();
+
+    SDValue Val = Node->getOperand(1);
+    SDValue Idx = Node->getOperand(2);
+    EVT IdxVT = Idx.getValueType();
+    SDLoc SL(Node);
+
+    SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT);
+    SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor);
+
+    SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0));
+    SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+
+    SDValue NewVec = CastVec;
+    for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
+      SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT);
+      SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset);
+
+      SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT,
+                                CastVal, IdxOffset);
+
+      NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT,
+                           NewVec, Elt, InEltIdx);
+    }
+
+    Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec));
+    break;
+  }
+  case ISD::SCALAR_TO_VECTOR: {
+    MVT EltVT = OVT.getVectorElementType();
+    MVT NewEltVT = NVT.getVectorElementType();
+
+    // Handle bitcasts to different vector type with the smae total bit size.
+    //
+    // e.g. v2i64 = scalar_to_vector x:i64
+    //   =>
+    //  concat_vectors (v2i32 bitcast x:i64), (v2i32 undef)
+    //
+
+    MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT);
+    SDValue Val = Node->getOperand(0);
+    SDLoc SL(Node);
+
+    SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val);
+    SDValue Undef = DAG.getUNDEF(MidVT);
+
+    SmallVector<SDValue, 8> NewElts;
+    NewElts.push_back(CastVal);
+    for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I)
+      NewElts.push_back(Undef);
+
+    SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts);
+    SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat);
+    Results.push_back(CvtVec);
+    break;
+  }
   }
 
   // Replace the original node with the legalized result.
@@ -4356,7 +4631,7 @@ void SelectionDAG::Legalize() {
     for (auto NI = allnodes_end(); NI != allnodes_begin();) {
       --NI;
 
-      SDNode *N = NI;
+      SDNode *N = &*NI;
       if (N->use_empty() && N != getRoot().getNode()) {
         ++NI;
         DeleteNode(N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 3c50a4155731..6c0193a76732 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
 }
 
 //===----------------------------------------------------------------------===//
-//  Result Float to Integer Conversion.
+//  Convert Float Results to Integer for Non-HW-supported Operations.
 //===----------------------------------------------------------------------===//
 
-void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
   DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
         dbgs() << "\n");
   SDValue R = SDValue();
@@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
 #endif
     llvm_unreachable("Do not know how to soften the result of this operator!");
 
-    case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
-    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;
-    case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
-    case ISD::ConstantFP:
-      R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+    case ISD::Register:
+    case ISD::CopyFromReg:
+    case ISD::CopyToReg:
+      assert(isLegalInHWReg(N->getValueType(ResNo)) &&
+             "Unsupported SoftenFloatRes opcode!");
+      // Only when isLegalInHWReg, we can skip check of the operands.
+      R = SDValue(N, ResNo);
       break;
+    case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
+    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N, ResNo); break;
+    case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
+    case ISD::ConstantFP:  R = SoftenFloatRes_ConstantFP(N, ResNo); break;
     case ISD::EXTRACT_VECTOR_ELT:
       R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
-    case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::FABS:        R = SoftenFloatRes_FABS(N, ResNo); break;
     case ISD::FMINNUM:     R = SoftenFloatRes_FMINNUM(N); break;
     case ISD::FMAXNUM:     R = SoftenFloatRes_FMAXNUM(N); break;
     case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
     case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
-    case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N); break;
+    case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
     case ISD::FCOS:        R = SoftenFloatRes_FCOS(N); break;
     case ISD::FDIV:        R = SoftenFloatRes_FDIV(N); break;
     case ISD::FEXP:        R = SoftenFloatRes_FEXP(N); break;
@@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FMA:         R = SoftenFloatRes_FMA(N); break;
     case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
     case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
-    case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
+    case ISD::FNEG:        R = SoftenFloatRes_FNEG(N, ResNo); break;
     case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
     case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
     case ISD::FP16_TO_FP:  R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
     case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
     case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
-    case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
-    case ISD::SELECT:      R = SoftenFloatRes_SELECT(N); break;
-    case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N); break;
+    case ISD::LOAD:        R = SoftenFloatRes_LOAD(N, ResNo); break;
+    case ISD::SELECT:      R = SoftenFloatRes_SELECT(N, ResNo); break;
+    case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
     case ISD::SINT_TO_FP:
     case ISD::UINT_TO_FP:  R = SoftenFloatRes_XINT_TO_FP(N); break;
     case ISD::UNDEF:       R = SoftenFloatRes_UNDEF(N); break;
@@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
   }
 
   // If R is null, the sub-method took care of registering the result.
-  if (R.getNode())
+  if (R.getNode()) {
     SetSoftenedFloat(SDValue(N, ResNo), R);
+    ReplaceSoftenFloatResult(N, ResNo, R);
+  }
+  // Return true only if the node is changed,
+  // assuming that the operands are also converted when necessary.
+  // Otherwise, return false to tell caller to scan operands.
+  return R.getNode() && R.getNode() != N;
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   return BitConvertToInteger(N->getOperand(0));
 }
 
@@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
                      BitConvertToInteger(N->getOperand(1)));
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
-  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N),
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
+  // When LegalInHWReg, we can load better from the constant pool.
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
+  ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+  return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN),
                          TLI.getTypeToTransformTo(*DAG.getContext(),
-                                                  N->getValueType(0)));
+                                                  CN->getValueType(0)));
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
                      NewOp, N->getOperand(1));
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
+  // When LegalInHWReg, FABS can be implemented as native bitwise operations.
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned Size = NVT.getSizeInBits();
 
@@ -165,7 +186,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
                                            RTLIB::FMIN_F80,
                                            RTLIB::FMIN_F128,
                                            RTLIB::FMIN_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
@@ -178,7 +199,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
                                            RTLIB::FMAX_F80,
                                            RTLIB::FMAX_F128,
                                            RTLIB::FMAX_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
@@ -191,7 +212,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
                                            RTLIB::ADD_F80,
                                            RTLIB::ADD_F128,
                                            RTLIB::ADD_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
@@ -203,10 +224,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
                                            RTLIB::CEIL_F80,
                                            RTLIB::CEIL_F128,
                                            RTLIB::CEIL_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
+  // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   SDValue LHS = GetSoftenedFloat(N->getOperand(0));
   SDValue RHS = BitConvertToInteger(N->getOperand(1));
   SDLoc dl(N);
@@ -263,7 +287,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
                                            RTLIB::COS_F80,
                                            RTLIB::COS_F128,
                                            RTLIB::COS_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
@@ -276,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
                                            RTLIB::DIV_F80,
                                            RTLIB::DIV_F128,
                                            RTLIB::DIV_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
@@ -288,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
                                            RTLIB::EXP_F80,
                                            RTLIB::EXP_F128,
                                            RTLIB::EXP_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
@@ -300,7 +324,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
                                            RTLIB::EXP2_F80,
                                            RTLIB::EXP2_F128,
                                            RTLIB::EXP2_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
@@ -312,7 +336,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
                                            RTLIB::FLOOR_F80,
                                            RTLIB::FLOOR_F128,
                                            RTLIB::FLOOR_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
@@ -324,7 +348,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
                                            RTLIB::LOG_F80,
                                            RTLIB::LOG_F128,
                                            RTLIB::LOG_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
@@ -336,7 +360,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
                                            RTLIB::LOG2_F80,
                                            RTLIB::LOG2_F128,
                                            RTLIB::LOG2_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
@@ -348,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
                                            RTLIB::LOG10_F80,
                                            RTLIB::LOG10_F128,
                                            RTLIB::LOG10_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -362,7 +386,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
                                            RTLIB::FMA_F80,
                                            RTLIB::FMA_F128,
                                            RTLIB::FMA_PPCF128),
-                         NVT, Ops, 3, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
@@ -375,7 +399,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
                                            RTLIB::MUL_F80,
                                            RTLIB::MUL_F128,
                                            RTLIB::MUL_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
@@ -387,10 +411,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
                                            RTLIB::NEARBYINT_F80,
                                            RTLIB::NEARBYINT_F128,
                                            RTLIB::NEARBYINT_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
+  // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDLoc dl(N);
   // Expand Y = FNEG(X) -> Y = SUB -0.0, X
@@ -402,7 +429,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
                                            RTLIB::SUB_F80,
                                            RTLIB::SUB_F128,
                                            RTLIB::SUB_PPCF128),
-                         NVT, Ops, 2, false, dl).first;
+                         NVT, Ops, false, dl).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -418,11 +445,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
       SoftenFloatResult(Op.getNode(), 0);
   }
 
+  if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) {
+    Op = GetPromotedFloat(Op);
+    // If the promotion did the FP_EXTEND to the destination type for us,
+    // there's nothing left to do here.
+    if (Op.getValueType() == N->getValueType(0)) {
+      return BitConvertToInteger(Op);
+    }
+  }
+
   RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
   if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat)
     Op = GetSoftenedFloat(Op);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
-  return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
 }
 
 // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -430,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
   EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
   SDValue Op = N->getOperand(0);
-  SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, &Op, 1,
+  SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
                                   false, SDLoc(N)).first;
   if (N->getValueType(0) == MVT::f32)
     return Res32;
@@ -438,7 +474,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
-  return TLI.makeLibCall(DAG, LC, NVT, &Res32, 1, false, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -452,7 +488,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
 
   RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
-  return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
@@ -465,7 +501,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
                                            RTLIB::POW_F80,
                                            RTLIB::POW_F128,
                                            RTLIB::POW_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -479,7 +515,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
                                            RTLIB::POWI_F80,
                                            RTLIB::POWI_F128,
                                            RTLIB::POWI_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
@@ -492,7 +528,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
                                            RTLIB::REM_F80,
                                            RTLIB::REM_F128,
                                            RTLIB::REM_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
@@ -504,7 +540,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
                                            RTLIB::RINT_F80,
                                            RTLIB::RINT_F128,
                                            RTLIB::RINT_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
@@ -516,7 +552,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
                                            RTLIB::ROUND_F80,
                                            RTLIB::ROUND_F128,
                                            RTLIB::ROUND_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
@@ -528,7 +564,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
                                            RTLIB::SIN_F80,
                                            RTLIB::SIN_F128,
                                            RTLIB::SIN_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
@@ -540,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
                                            RTLIB::SQRT_F80,
                                            RTLIB::SQRT_F128,
                                            RTLIB::SQRT_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
@@ -553,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
                                            RTLIB::SUB_F80,
                                            RTLIB::SUB_F128,
                                            RTLIB::SUB_PPCF128),
-                         NVT, Ops, 2, false, SDLoc(N)).first;
+                         NVT, Ops, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -568,10 +604,11 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
                                            RTLIB::TRUNC_F80,
                                            RTLIB::TRUNC_F128,
                                            RTLIB::TRUNC_PPCF128),
-                         NVT, &Op, 1, false, SDLoc(N)).first;
+                         NVT, Op, false, SDLoc(N)).first;
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
+  bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
   LoadSDNode *L = cast<LoadSDNode>(N);
   EVT VT = N->getValueType(0);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -586,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
                        L->getAAInfo());
     // Legalized the chain result - switch anything that used the old chain to
     // use the new one.
-    ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+    if (N != NewL.getValue(1).getNode())
+      ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
     return NewL;
   }
 
@@ -600,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
-  return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+  auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
+  if (LegalInHWReg)
+    return ExtendNode;
+  return BitConvertToInteger(ExtendNode);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   SDValue LHS = GetSoftenedFloat(N->getOperand(1));
   SDValue RHS = GetSoftenedFloat(N->getOperand(2));
   return DAG.getSelect(SDLoc(N),
                        LHS.getValueType(), N->getOperand(0), LHS, RHS);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
+  if (isLegalInHWReg(N->getValueType(ResNo)))
+    return SDValue(N, ResNo);
   SDValue LHS = GetSoftenedFloat(N->getOperand(2));
   SDValue RHS = GetSoftenedFloat(N->getOperand(3));
   return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -636,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.
-  ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+  if (N != NewVAARG.getValue(1).getNode())
+    ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
   return NewVAARG;
 }
 
@@ -665,12 +711,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
                            NVT, N->getOperand(0));
   return TLI.makeLibCall(DAG, LC,
                          TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
-                         &Op, 1, Signed, dl).first;
+                         Op, Signed, dl).first;
 }
 
 
 //===----------------------------------------------------------------------===//
-//  Operand Float to Integer Conversion..
+//  Convert Float Operand to Integer for Non-HW-supported Operations.
 //===----------------------------------------------------------------------===//
 
 bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -680,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
 
   switch (N->getOpcode()) {
   default:
+    if (CanSkipSoftenFloatOperand(N, OpNo))
+      return false;
 #ifndef NDEBUG
     dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
     N->dump(&DAG); dbgs() << "\n";
@@ -691,18 +739,27 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   case ISD::FP_EXTEND:   Res = SoftenFloatOp_FP_EXTEND(N); break;
   case ISD::FP_TO_FP16:  // Same as FP_ROUND for softening purposes
   case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
-  case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
-  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_XINT(N); break;
   case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
   case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
-  case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
+  case ISD::STORE:
+    Res = SoftenFloatOp_STORE(N, OpNo);
+    // Do not try to analyze or soften this node again if the value is
+    // or can be held in a register. In that case, Res.getNode() should
+    // be equal to N.
+    if (Res.getNode() == N &&
+        isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+      return false;
+    // Otherwise, we need to reanalyze and lower the new Res nodes.
+    break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
   if (!Res.getNode()) return false;
 
   // If the result is N, the sub-method updated N in place.  Tell the legalizer
-  // core about this.
+  // core about this to re-analyze.
   if (Res.getNode() == N)
     return true;
 
@@ -713,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
   return false;
 }
 
+bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
+  if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
+    return false;
+  // When the operand type can be kept in registers, SoftenFloatResult
+  // will call ReplaceValueWith to replace all references and we can
+  // skip softening this operand.
+  switch (N->getOperand(OpNo).getOpcode()) {
+    case ISD::BITCAST:
+    case ISD::ConstantFP:
+    case ISD::CopyFromReg:
+    case ISD::CopyToReg:
+    case ISD::FABS:
+    case ISD::FCOPYSIGN:
+    case ISD::FNEG:
+    case ISD::Register:
+    case ISD::SELECT:
+    case ISD::SELECT_CC:
+      return true;
+  }
+  // For some opcodes, SoftenFloatResult handles all conversion of softening
+  // and replacing operands, so that there is no need to soften operands
+  // again, although such opcode could be scanned for other illegal operands.
+  switch (N->getOpcode()) {
+    case ISD::ConstantFP:
+    case ISD::CopyFromReg:
+    case ISD::CopyToReg:
+    case ISD::FABS:
+    case ISD::FCOPYSIGN:
+    case ISD::FNEG:
+    case ISD::Register:
+      return true;
+  }
+  return false;
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
   return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
                      GetSoftenedFloat(N->getOperand(0)));
@@ -730,7 +822,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
 
-  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
 }
 
 
@@ -747,7 +839,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
 
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -773,20 +865,33 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
                  0);
 }
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
+  bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
+  EVT SVT = N->getOperand(0).getValueType();
   EVT RVT = N->getValueType(0);
-  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
-  SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
-}
+  EVT NVT = EVT();
+  SDLoc dl(N);
+
+  // If the result is not legal, eg: fp -> i1, then it needs to be promoted to
+  // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly
+  // match, eg. we don't have fp -> i8 conversions.
+  // Look for an appropriate libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+       IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL;
+       ++IntVT) {
+    NVT = (MVT::SimpleValueType)IntVT;
+    // The type needs to big enough to hold the result.
+    if (NVT.bitsGE(RVT))
+      LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT);
+  }
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
 
-SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
-  EVT RVT = N->getValueType(0);
-  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
-  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
   SDValue Op = GetSoftenedFloat(N->getOperand(0));
-  return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first;
+  SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+
+  // Truncate the result if the libcall returns a larger type.
+  return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
 }
 
 SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
@@ -1028,7 +1133,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
                                                    RTLIB::DIV_F80,
                                                    RTLIB::DIV_F128,
                                                    RTLIB::DIV_PPCF128),
-                                 N->getValueType(0), Ops, 2, false,
+                                 N->getValueType(0), Ops, false,
                                  SDLoc(N)).first;
   GetPairElements(Call, Lo, Hi);
 }
@@ -1102,7 +1207,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
                                                    RTLIB::FMA_F80,
                                                    RTLIB::FMA_F128,
                                                    RTLIB::FMA_PPCF128),
-                                 N->getValueType(0), Ops, 3, false,
+                                 N->getValueType(0), Ops, false,
                                  SDLoc(N)).first;
   GetPairElements(Call, Lo, Hi);
 }
@@ -1116,7 +1221,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
                                                    RTLIB::MUL_F80,
                                                    RTLIB::MUL_F128,
                                                    RTLIB::MUL_PPCF128),
-                                 N->getValueType(0), Ops, 2, false,
+                                 N->getValueType(0), Ops, false,
                                  SDLoc(N)).first;
   GetPairElements(Call, Lo, Hi);
 }
@@ -1231,7 +1336,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
                                                    RTLIB::SUB_F80,
                                                    RTLIB::SUB_F128,
                                                    RTLIB::SUB_PPCF128),
-                                 N->getValueType(0), Ops, 2, false,
+                                 N->getValueType(0), Ops, false,
                                  SDLoc(N)).first;
   GetPairElements(Call, Lo, Hi);
 }
@@ -1310,7 +1415,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     }
     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
 
-    Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first;
+    Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
     GetPairElements(Hi, Lo, Hi);
   }
 
@@ -1341,6 +1446,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
     break;
   }
 
+  // TODO: Are there fast-math-flags to propagate to this FADD?
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
                    DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
                                              APInt(128, Parts)),
@@ -1494,7 +1600,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
 
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
-  return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first;
+  return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
 }
 
 SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1511,6 +1617,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
     SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
+    // TODO: Are there fast-math-flags to propagate to this FSUB?
     return DAG.getSelectCC(dl, N->getOperand(0), Tmp,
                            DAG.getNode(ISD::ADD, dl, MVT::i32,
                                        DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
@@ -1527,7 +1634,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
-  return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1,
+  return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
                          false, dl).first;
 }
 
@@ -1912,8 +2019,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
   SDValue Op0 = GetPromotedFloat(N->getOperand(0));
   SDValue Op1 = GetPromotedFloat(N->getOperand(1));
-
-  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
+  return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags());
 }
 
 SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9f060a09a0f3..cd114d668e20 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -53,6 +53,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
   case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
   case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;
+  case ISD::BITREVERSE:  Res = PromoteIntRes_BITREVERSE(N); break;
   case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
   case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
   case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
@@ -65,16 +66,20 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
   case ISD::EXTRACT_VECTOR_ELT:
                          Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
-  case ISD::LOAD:        Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
-  case ISD::MLOAD:       Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break;
+  case ISD::LOAD:        Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
+  case ISD::MLOAD:       Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
+    break;
+  case ISD::MGATHER:     Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
+    break;
   case ISD::SELECT:      Res = PromoteIntRes_SELECT(N); break;
   case ISD::VSELECT:     Res = PromoteIntRes_VSELECT(N); break;
   case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
   case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
   case ISD::SMIN:
-  case ISD::SMAX:
+  case ISD::SMAX:        Res = PromoteIntRes_SExtIntBinOp(N); break;
   case ISD::UMIN:
-  case ISD::UMAX:        Res = PromoteIntRes_SimpleIntBinOp(N); break;
+  case ISD::UMAX:        Res = PromoteIntRes_ZExtIntBinOp(N); break;
+
   case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
   case ISD::SIGN_EXTEND_INREG:
                          Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
@@ -114,10 +119,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break;
 
   case ISD::SDIV:
-  case ISD::SREM:        Res = PromoteIntRes_SDIV(N); break;
+  case ISD::SREM:        Res = PromoteIntRes_SExtIntBinOp(N); break;
 
   case ISD::UDIV:
-  case ISD::UREM:        Res = PromoteIntRes_UDIV(N); break;
+  case ISD::UREM:        Res = PromoteIntRes_ZExtIntBinOp(N); break;
 
   case ISD::SADDO:
   case ISD::SSUBO:       Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
@@ -180,7 +185,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
                               N->getChain(), N->getBasePtr(),
                               N->getMemOperand(), N->getOrdering(),
                               N->getSynchScope());
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
@@ -193,7 +198,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
                               N->getChain(), N->getBasePtr(),
                               Op2, N->getMemOperand(), N->getOrdering(),
                               N->getSynchScope());
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
@@ -316,6 +321,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
                       TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  SDLoc dl(N);
+
+  unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
+  return DAG.getNode(
+      ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
+      DAG.getConstant(DiffBits, dl,
+                      TLI.getShiftAmountTy(NVT, DAG.getDataLayout())));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
   // The pair element type may be legal, or may not promote to the same type as
   // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
@@ -465,7 +483,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
   SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
                                N->getMemoryVT(), N->getMemOperand());
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
@@ -475,20 +493,34 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0());
 
-  SDValue Mask = N->getMask();
-  EVT NewMaskVT = getSetCCResultType(NVT);
-  if (NewMaskVT != N->getMask().getValueType())
-    Mask = PromoteTargetBoolean(Mask, NewMaskVT);
   SDLoc dl(N);
-
   SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
-                                  Mask, ExtSrc0, N->getMemoryVT(),
+                                  N->getMask(), ExtSrc0, N->getMemoryVT(),
                                   N->getMemOperand(), ISD::SEXTLOAD);
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
 }
+
+SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue ExtSrc0 = GetPromotedInteger(N->getValue());
+  assert(NVT == ExtSrc0.getValueType() &&
+      "Gather result type and the passThru agrument type should be the same");
+
+  SDLoc dl(N);
+  SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(),
+                   N->getIndex()};
+  SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
+                                    N->getMemoryVT(), dl, Ops,
+                                    N->getMemOperand()); 
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
 /// Promote the overflow flag of an overflowing arithmetic node.
 SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
   // Simply change the return type of the boolean result.
@@ -534,14 +566,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
   return Res;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
-  // Sign extend the input.
-  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
-  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), SDLoc(N),
-                     LHS.getValueType(), LHS, RHS);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
   SDValue LHS = GetPromotedInteger(N->getOperand(1));
   SDValue RHS = GetPromotedInteger(N->getOperand(2));
@@ -629,6 +653,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
                      LHS.getValueType(), LHS, RHS);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) {
+  // Sign extend the input.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), SDLoc(N),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) {
+  // Zero extend the input.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), SDLoc(N),
+                     LHS.getValueType(), LHS, RHS);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -770,14 +810,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
   return Mul;
 }
 
-SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
-  // Zero extend the input.
-  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
-  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), SDLoc(N),
-                     LHS.getValueType(), LHS, RHS);
-}
-
 SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
   return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
                                                N->getValueType(0)));
@@ -875,6 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
                                                     OpNo); break;
   case ISD::MLOAD:        Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
                                                     OpNo); break;
+  case ISD::MGATHER:  Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
+                                                 OpNo); break;
+  case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N),
+                                                  OpNo); break;
   case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
   case ISD::FP16_TO_FP:
   case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
@@ -1143,56 +1179,49 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
                            N->getMemoryVT(), N->getMemOperand());
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
+                                              unsigned OpNo) {
 
   SDValue DataOp = N->getValue();
   EVT DataVT = DataOp.getValueType();
   SDValue Mask = N->getMask();
-  EVT MaskVT = Mask.getValueType();
   SDLoc dl(N);
 
   bool TruncateStore = false;
-  if (!TLI.isTypeLegal(DataVT)) {
-    if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) {
-      DataOp = GetPromotedInteger(DataOp);
-      if (!TLI.isTypeLegal(MaskVT))
-        Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
-      TruncateStore = true;
-    }
+  if (OpNo == 2) {
+    // Mask comes before the data operand. If the data operand is legal, we just
+    // promote the mask.
+    // When the data operand has illegal type, we should legalize the data
+    // operand first. The mask will be promoted/splitted/widened according to
+    // the data operand type.
+    if (TLI.isTypeLegal(DataVT))
+      Mask = PromoteTargetBoolean(Mask, DataVT);
     else {
-      assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector &&
-             "Unexpected data legalization in MSTORE");
-      DataOp = GetWidenedVector(DataOp);
+      if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+        return PromoteIntOp_MSTORE(N, 3);
+
+      else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+        return WidenVecOp_MSTORE(N, 3);
 
-      if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
-        Mask = GetWidenedVector(Mask);
       else {
-        EVT BoolVT = getSetCCResultType(DataOp.getValueType());
-
-        // We can't use ModifyToType() because we should fill the mask with
-        // zeroes
-        unsigned WidenNumElts = BoolVT.getVectorNumElements();
-        unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
-        unsigned NumConcat = WidenNumElts / MaskNumElts;
-        SmallVector<SDValue, 16> Ops(NumConcat);
-        SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
-        Ops[0] = Mask;
-        for (unsigned i = 1; i != NumConcat; ++i)
-          Ops[i] = ZeroVal;
-
-        Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
+        assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+        return SplitVecOp_MSTORE(N, 3);
       }
     }
+  } else { // Data operand
+    assert(OpNo == 3 && "Unexpected operand for promotion");
+    DataOp = GetPromotedInteger(DataOp);
+    Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
+    TruncateStore = true;
   }
-  else
-    Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType());
+
   return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
                             N->getMemoryVT(), N->getMemOperand(),
                             TruncateStore);
 }
 
-SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){
+SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
+                                             unsigned OpNo) {
   assert(OpNo == 2 && "Only know how to promote the mask!");
   EVT DataVT = N->getValueType(0);
   SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
@@ -1201,6 +1230,31 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo)
   return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
+                                               unsigned OpNo) {
+
+  SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+  if (OpNo == 2) {
+    // The Mask
+    EVT DataVT = N->getValueType(0);
+    NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+  } else
+    NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+  return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
+                                                unsigned OpNo) {
+  SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+  if (OpNo == 2) {
+    // The Mask
+    EVT DataVT = N->getValue().getValueType();
+    NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+  } else
+    NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
+  return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op);
@@ -1259,6 +1313,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::ANY_EXTEND:  ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
   case ISD::AssertSext:  ExpandIntRes_AssertSext(N, Lo, Hi); break;
   case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
+  case ISD::BITREVERSE:  ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
   case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
   case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
   case ISD::CTLZ_ZERO_UNDEF:
@@ -1270,6 +1325,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
   case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
+  case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
   case ISD::SDIV:        ExpandIntRes_SDIV(N, Lo, Hi); break;
   case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
   case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -1763,12 +1819,6 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
 }
 
-void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
-                                                 SDValue &Lo, SDValue &Hi) {
-  SDValue Res = DisintegrateMERGE_VALUES(N, ResNo);
-  SplitInteger(Res, Lo, Hi);
-}
-
 void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
                                                SDValue &Lo, SDValue &Hi) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -1834,6 +1884,14 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
   }
 }
 
+void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  SDLoc dl(N);
+  GetExpandedInteger(N->getOperand(0), Hi, Lo);  // Note swapped operands.
+  Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
                                           SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -1918,8 +1976,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
 
   RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/,
-                               dl).first,
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
                Lo, Hi);
 }
 
@@ -1934,8 +1991,7 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
 
   RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/,
-                               dl).first,
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
                Lo, Hi);
 }
 
@@ -2055,7 +2111,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     }
   }
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Ch);
 }
@@ -2096,11 +2152,21 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
 
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/,
-                               dl).first,
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
                Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDLoc DL(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);
+  SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0));
+  Lo = R.getValue(0);
+  Hi = R.getValue(1);
+  ReplaceValueWith(SDValue(N, 1), R.getValue(2));
+}
+
 void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
                                              SDValue &Lo, SDValue &Hi) {
   SDValue LHS = Node->getOperand(0);
@@ -2166,7 +2232,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
     LC = RTLIB::SDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -2261,8 +2327,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
 
   if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-    SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo,
-                 Hi);
+    SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
     return;
   }
 
@@ -2352,7 +2417,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
     LC = RTLIB::SREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -2499,7 +2564,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
     LC = RTLIB::UDIV_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
 
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -2525,7 +2590,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
     LC = RTLIB::UREM_I128;
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
 
-  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi);
+  SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -2605,6 +2670,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
   case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;
   case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break;
+  case ISD::SETCCE:            Res = ExpandIntOp_SETCCE(N); break;
   case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;
   case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
   case ISD::TRUNCATE:          Res = ExpandIntOp_TRUNCATE(N); break;
@@ -2732,6 +2798,47 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
     return;
   }
 
+  if (LHSHi == RHSHi) {
+    // Comparing the low bits is enough.
+    NewLHS = Tmp1;
+    NewRHS = SDValue();
+    return;
+  }
+
+  // Lower with SETCCE if the target supports it.
+  // FIXME: Make all targets support this, then remove the other lowering.
+  if (TLI.getOperationAction(
+          ISD::SETCCE,
+          TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) ==
+      TargetLowering::Custom) {
+    // SETCCE can detect < and >= directly. For > and <=, flip operands and
+    // condition code.
+    bool FlipOperands = false;
+    switch (CCCode) {
+    case ISD::SETGT:  CCCode = ISD::SETLT;  FlipOperands = true; break;
+    case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break;
+    case ISD::SETLE:  CCCode = ISD::SETGE;  FlipOperands = true; break;
+    case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break;
+    default: break;
+    }
+    if (FlipOperands) {
+      std::swap(LHSLo, RHSLo);
+      std::swap(LHSHi, RHSHi);
+    }
+    // Perform a wide subtraction, feeding the carry from the low part into
+    // SETCCE. The SETCCE operation is essentially looking at the high part of
+    // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or
+    // positive iff LHS >= RHS.
+    SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+    SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo);
+    SDValue Res =
+        DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()),
+                    LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode));
+    NewLHS = Res;
+    NewRHS = SDValue();
+    return;
+  }
+
   NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
                              LHSHi, RHSHi, ISD::SETEQ, false,
                              DagCombineInfo, dl);
@@ -2796,6 +2903,24 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
                                 DAG.getCondCode(CCCode)), 0);
 }
 
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Carry = N->getOperand(2);
+  SDValue Cond = N->getOperand(3);
+  SDLoc dl = SDLoc(N);
+
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedInteger(LHS, LHSLo, LHSHi);
+  GetExpandedInteger(RHS, RHSLo, RHSHi);
+
+  // Expand to a SUBE for the low part and a smaller SETCCE for the high.
+  SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue);
+  SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry);
+  return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi,
+                     LowCmp.getValue(1), Cond);
+}
+
 SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
   // The value being shifted is legal, but the shift amount is too big.
   // It follows that either the result of the shift is undefined, or the
@@ -2820,7 +2945,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this SINT_TO_FP!");
-  return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first;
+  return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2980,11 +3105,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 
     // Load the value out, extending it from f32 to the destination float type.
     // FIXME: Avoid the extend by constructing the right constant pool?
-    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
-                                   FudgePtr,
-                                   MachinePointerInfo::getConstantPool(),
-                                   MVT::f32,
-                                   false, false, false, Alignment);
+    SDValue Fudge = DAG.getExtLoad(
+        ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+        false, false, false, Alignment);
     return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
   }
 
@@ -2992,7 +3116,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
   RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
   assert(LC != RTLIB::UNKNOWN_LIBCALL &&
          "Don't know how to expand this UINT_TO_FP!");
-  return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first;
+  return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
 }
 
 SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 54cfaf570619..2a0b0aa44794 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -73,21 +73,20 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
   // (for example because it was created but not used).  In general, we cannot
   // distinguish between new nodes and deleted nodes.
   SmallVector<SDNode*, 16> NewNodes;
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I) {
+  for (SDNode &Node : DAG.allnodes()) {
     // Remember nodes marked NewNode - they are subject to extra checking below.
-    if (I->getNodeId() == NewNode)
-      NewNodes.push_back(I);
+    if (Node.getNodeId() == NewNode)
+      NewNodes.push_back(&Node);
 
-    for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
-      SDValue Res(I, i);
+    for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
+      SDValue Res(&Node, i);
       bool Failed = false;
 
       unsigned Mapped = 0;
       if (ReplacedValues.find(Res) != ReplacedValues.end()) {
         Mapped |= 1;
         // Check that remapped values are only used by nodes marked NewNode.
-        for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+        for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end();
              UI != UE; ++UI)
           if (UI.getUse().getResNo() == i)
             assert(UI->getNodeId() == NewNode &&
@@ -119,16 +118,16 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
       if (WidenedVectors.find(Res) != WidenedVectors.end())
         Mapped |= 128;
 
-      if (I->getNodeId() != Processed) {
+      if (Node.getNodeId() != Processed) {
         // Since we allow ReplacedValues to map deleted nodes, it may map nodes
         // marked NewNode too, since a deleted node may have been reallocated as
         // another node that has not been seen by the LegalizeTypes machinery.
-        if ((I->getNodeId() == NewNode && Mapped > 1) ||
-            (I->getNodeId() != NewNode && Mapped != 0)) {
+        if ((Node.getNodeId() == NewNode && Mapped > 1) ||
+            (Node.getNodeId() != NewNode && Mapped != 0)) {
           dbgs() << "Unprocessed value in a map!";
           Failed = true;
         }
-      } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+      } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
         if (Mapped > 1) {
           dbgs() << "Value with legal type was transformed!";
           Failed = true;
@@ -194,13 +193,12 @@ bool DAGTypeLegalizer::run() {
   // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
   // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
   // non-leaves.
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I) {
-    if (I->getNumOperands() == 0) {
-      I->setNodeId(ReadyToProcess);
-      Worklist.push_back(I);
+  for (SDNode &Node : DAG.allnodes()) {
+    if (Node.getNumOperands() == 0) {
+      Node.setNodeId(ReadyToProcess);
+      Worklist.push_back(&Node);
     } else {
-      I->setNodeId(Unanalyzed);
+      Node.setNodeId(Unanalyzed);
     }
   }
 
@@ -240,9 +238,13 @@ bool DAGTypeLegalizer::run() {
         Changed = true;
         goto NodeDone;
       case TargetLowering::TypeSoftenFloat:
-        SoftenFloatResult(N, i);
-        Changed = true;
-        goto NodeDone;
+        Changed = SoftenFloatResult(N, i);
+        if (Changed)
+          goto NodeDone;
+        // If not changed, the result type should be legally in register.
+        assert(isLegalInHWReg(ResultVT) &&
+               "Unchanged SoftenFloatResult should be legal in register!");
+        goto ScanOperands;
       case TargetLowering::TypeExpandFloat:
         ExpandFloatResult(N, i);
         Changed = true;
@@ -409,40 +411,48 @@ NodeDone:
   // In a debug build, scan all the nodes to make sure we found them all.  This
   // ensures that there are no cycles and that everything got processed.
 #ifndef NDEBUG
-  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = DAG.allnodes_end(); I != E; ++I) {
+  for (SDNode &Node : DAG.allnodes()) {
     bool Failed = false;
 
     // Check that all result types are legal.
-    if (!IgnoreNodeResults(I))
-      for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
-        if (!isTypeLegal(I->getValueType(i))) {
-          dbgs() << "Result type " << i << " illegal!\n";
+    // A value type is illegal if its TypeAction is not TypeLegal,
+    // and TLI.RegClassForVT does not have a register class for this type.
+    // For example, the x86_64 target has f128 that is not TypeLegal,
+    // to have softened operators, but it also has FR128 register class to
+    // pass and return f128 values. Hence a legalized node can have f128 type.
+    if (!IgnoreNodeResults(&Node))
+      for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
+        if (!isTypeLegal(Node.getValueType(i)) &&
+            !TLI.isTypeLegal(Node.getValueType(i))) {
+          dbgs() << "Result type " << i << " illegal: ";
+          Node.dump();
           Failed = true;
         }
 
     // Check that all operand types are legal.
-    for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
-      if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
-          !isTypeLegal(I->getOperand(i).getValueType())) {
-        dbgs() << "Operand type " << i << " illegal!\n";
+    for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
+      if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
+          !isTypeLegal(Node.getOperand(i).getValueType()) &&
+          !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+        dbgs() << "Operand type " << i << " illegal: ";
+        Node.getOperand(i).dump();
         Failed = true;
       }
 
-    if (I->getNodeId() != Processed) {
-       if (I->getNodeId() == NewNode)
+    if (Node.getNodeId() != Processed) {
+       if (Node.getNodeId() == NewNode)
          dbgs() << "New node not analyzed?\n";
-       else if (I->getNodeId() == Unanalyzed)
+       else if (Node.getNodeId() == Unanalyzed)
          dbgs() << "Unanalyzed node not noticed?\n";
-       else if (I->getNodeId() > 0)
+       else if (Node.getNodeId() > 0)
          dbgs() << "Operand not processed?\n";
-       else if (I->getNodeId() == ReadyToProcess)
+       else if (Node.getNodeId() == ReadyToProcess)
          dbgs() << "Not added to worklist?\n";
        Failed = true;
     }
 
     if (Failed) {
-      I->dump(&DAG); dbgs() << "\n";
+      Node.dump(&DAG); dbgs() << "\n";
       llvm_unreachable(nullptr);
     }
   }
@@ -751,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
 }
 
 void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
-  assert(Result.getValueType() ==
-         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+  // f128 of x86_64 could be kept in SSE registers,
+  // but sometimes softened to i128.
+  assert((Result.getValueType() ==
+          TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
+          Op.getValueType() ==
+          TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
          "Invalid type for softened float");
   AnalyzeNewValue(Result);
 
   SDValue &OpEntry = SoftenedFloats[Op];
-  assert(!OpEntry.getNode() && "Node is already converted to integer!");
+  // Allow repeated calls to save f128 type nodes
+  // or any node with type that transforms to itself.
+  // Many operations on these types are not softened.
+  assert((!OpEntry.getNode()||
+          Op.getValueType() ==
+          TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+         "Node is already converted to integer!");
   OpEntry = Result;
 }
 
@@ -1042,23 +1062,22 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
   unsigned NumOps = N->getNumOperands();
   SDLoc dl(N);
   if (NumOps == 0) {
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned,
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
                            dl).first;
   } else if (NumOps == 1) {
     SDValue Op = N->getOperand(0);
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned,
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
                            dl).first;
   } else if (NumOps == 2) {
     SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
-    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned,
+    return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
                            dl).first;
   }
   SmallVector<SDValue, 8> Ops(NumOps);
   for (unsigned i = 0; i < NumOps; ++i)
     Ops[i] = N->getOperand(i);
 
-  return TLI.makeLibCall(DAG, LC, N->getValueType(0),
-                         &Ops[0], NumOps, isSigned, dl).first;
+  return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
 }
 
 // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
@@ -1108,6 +1127,23 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
   return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
 }
 
+/// WidenTargetBoolean - Widen the given target boolean to a target boolean
+/// of the given type. The boolean vector is widened and then promoted to match
+/// the target boolean type of the given ValVT.
+SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
+                                             bool WithZeroes) {
+  SDLoc dl(Bool);
+  EVT BoolVT = Bool.getValueType();
+
+  assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
+         TLI.isTypeLegal(ValVT) &&
+         "Unexpected types in WidenTargetBoolean");
+  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
+                                ValVT.getVectorNumElements());
+  Bool = ModifyToType(Bool, WideVT, WithZeroes);
+  return PromoteTargetBoolean(Bool, ValVT);
+}
+
 /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
 /// bits in Hi.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d1131a74cf17..8ba19f76797f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -72,6 +72,20 @@ private:
     return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal;
   }
 
+  /// isSimpleLegalType - Return true if this is a simple legal type.
+  bool isSimpleLegalType(EVT VT) const {
+    return VT.isSimple() && TLI.isTypeLegal(VT);
+  }
+
+  /// isLegalInHWReg - Return true if this type can be passed in registers.
+  /// For example, x86_64's f128, should to be legally in registers
+  /// and only some operations converted to library calls or integer
+  /// bitwise operations.
+  bool isLegalInHWReg(EVT VT) const {
+    EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+    return VT == NVT && isSimpleLegalType(VT);
+  }
+
   EVT getSetCCResultType(EVT VT) const {
     return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
   }
@@ -173,6 +187,11 @@ private:
   std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
 
   SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
+
+  /// Modify Bit Vector to match SetCC result type of ValVT.
+  /// The bit vector is widened with zeroes when WithZeroes is true.
+  SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
+
   void ReplaceValueWith(SDValue From, SDValue To);
   void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -234,6 +253,7 @@ private:
   SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntRes_BITCAST(SDNode *N);
   SDValue PromoteIntRes_BSWAP(SDNode *N);
+  SDValue PromoteIntRes_BITREVERSE(SDNode *N);
   SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
   SDValue PromoteIntRes_Constant(SDNode *N);
   SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
@@ -246,21 +266,22 @@ private:
   SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
   SDValue PromoteIntRes_LOAD(LoadSDNode *N);
   SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
+  SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
   SDValue PromoteIntRes_Overflow(SDNode *N);
   SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
-  SDValue PromoteIntRes_SDIV(SDNode *N);
   SDValue PromoteIntRes_SELECT(SDNode *N);
   SDValue PromoteIntRes_VSELECT(SDNode *N);
   SDValue PromoteIntRes_SELECT_CC(SDNode *N);
   SDValue PromoteIntRes_SETCC(SDNode *N);
   SDValue PromoteIntRes_SHL(SDNode *N);
   SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SExtIntBinOp(SDNode *N);
   SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
   SDValue PromoteIntRes_SRA(SDNode *N);
   SDValue PromoteIntRes_SRL(SDNode *N);
   SDValue PromoteIntRes_TRUNCATE(SDNode *N);
   SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
-  SDValue PromoteIntRes_UDIV(SDNode *N);
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
@@ -276,7 +297,6 @@ private:
   SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
   SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
-  SDValue PromoteIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
@@ -284,7 +304,6 @@ private:
   SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
-  SDValue PromoteIntOp_VSETCC(SDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_Shift(SDNode *N);
   SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
   SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
@@ -294,6 +313,8 @@ private:
   SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
   SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
   SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
 
   void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
 
@@ -312,8 +333,6 @@ private:
 
   // Integer Result Expansion.
   void ExpandIntegerResult(SDNode *N, unsigned ResNo);
-  void ExpandIntRes_MERGE_VALUES      (SDNode *N, unsigned ResNo,
-                                       SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ANY_EXTEND        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_AssertSext        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_AssertZext        (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -322,6 +341,7 @@ private:
   void ExpandIntRes_CTPOP             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_CTTZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_LOAD          (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_READCYCLECOUNTER  (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_SIGN_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -333,6 +353,7 @@ private:
   void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_BITREVERSE        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_SDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -354,12 +375,10 @@ private:
 
   // Integer Operand Expansion.
   bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
-  SDValue ExpandIntOp_BITCAST(SDNode *N);
   SDValue ExpandIntOp_BR_CC(SDNode *N);
-  SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
-  SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
   SDValue ExpandIntOp_SELECT_CC(SDNode *N);
   SDValue ExpandIntOp_SETCC(SDNode *N);
+  SDValue ExpandIntOp_SETCCE(SDNode *N);
   SDValue ExpandIntOp_Shift(SDNode *N);
   SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
   SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -375,32 +394,48 @@ private:
   // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
   //===--------------------------------------------------------------------===//
 
-  /// GetSoftenedFloat - Given a processed operand Op which was converted to an
-  /// integer of the same size, this returns the integer.  The integer contains
-  /// exactly the same bits as Op - only the type changed.  For example, if Op
-  /// is an f32 which was softened to an i32, then this method returns an i32,
-  /// the bits of which coincide with those of Op.
+  /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer
+  /// if the Op is not supported in target HW and converted to the integer.
+  /// The integer contains exactly the same bits as Op - only the type changed.
+  /// For example, if Op is an f32 which was softened to an i32, then this method
+  /// returns an i32, the bits of which coincide with those of Op.
+  /// If the Op can be efficiently supported in target HW or the operand must
+  /// stay in a register, the Op is not converted to an integer.
+  /// In that case, the given op is returned.
   SDValue GetSoftenedFloat(SDValue Op) {
     SDValue &SoftenedOp = SoftenedFloats[Op];
+    if (!SoftenedOp.getNode() &&
+        isSimpleLegalType(Op.getValueType()))
+      return Op;
     RemapValue(SoftenedOp);
     assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
     return SoftenedOp;
   }
   void SetSoftenedFloat(SDValue Op, SDValue Result);
 
-  // Result Float to Integer Conversion.
-  void SoftenFloatResult(SDNode *N, unsigned OpNo);
+  // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary.
+  void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) {
+    // When the result type can be kept in HW registers, the converted
+    // NewRes node could have the same type. We can save the effort in
+    // cloning every user of N in SoftenFloatOperand or other legalization functions,
+    // by calling ReplaceValueWith here to update all users.
+    if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo)))
+      ReplaceValueWith(SDValue(N, ResNo), NewRes);
+  }
+
+  // Convert Float Results to Integer for Non-HW-supported Operations.
+  bool SoftenFloatResult(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
-  SDValue SoftenFloatRes_BITCAST(SDNode *N);
+  SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
-  SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+  SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
-  SDValue SoftenFloatRes_FABS(SDNode *N);
+  SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_FMINNUM(SDNode *N);
   SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
   SDValue SoftenFloatRes_FADD(SDNode *N);
   SDValue SoftenFloatRes_FCEIL(SDNode *N);
-  SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_FCOS(SDNode *N);
   SDValue SoftenFloatRes_FDIV(SDNode *N);
   SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -412,7 +447,7 @@ private:
   SDValue SoftenFloatRes_FMA(SDNode *N);
   SDValue SoftenFloatRes_FMUL(SDNode *N);
   SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
-  SDValue SoftenFloatRes_FNEG(SDNode *N);
+  SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
   SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
   SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -425,21 +460,25 @@ private:
   SDValue SoftenFloatRes_FSQRT(SDNode *N);
   SDValue SoftenFloatRes_FSUB(SDNode *N);
   SDValue SoftenFloatRes_FTRUNC(SDNode *N);
-  SDValue SoftenFloatRes_LOAD(SDNode *N);
-  SDValue SoftenFloatRes_SELECT(SDNode *N);
-  SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
+  SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
+  SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
   SDValue SoftenFloatRes_UNDEF(SDNode *N);
   SDValue SoftenFloatRes_VAARG(SDNode *N);
   SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
 
-  // Operand Float to Integer Conversion.
+  // Return true if we can skip softening the given operand or SDNode because
+  // it was soften before by SoftenFloatResult and references to the operand
+  // were replaced by ReplaceValueWith.
+  bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
+
+  // Convert Float Operand to Integer for Non-HW-supported Operations.
   bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
   SDValue SoftenFloatOp_BITCAST(SDNode *N);
   SDValue SoftenFloatOp_BR_CC(SDNode *N);
   SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
   SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
-  SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
-  SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
   SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
   SDValue SoftenFloatOp_SETCC(SDNode *N);
   SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -575,7 +614,6 @@ private:
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
-  SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
   SDValue ScalarizeVecRes_VSELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
@@ -617,20 +655,18 @@ private:
   void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
                                   SDValue &Hi);
 
@@ -650,6 +686,7 @@ private:
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
+  SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Support: LegalizeVectorTypes.cpp
@@ -680,8 +717,8 @@ private:
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
+  SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
   SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
-  SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
   SDValue WidenVecRes_SELECT(SDNode* N);
   SDValue WidenVecRes_SELECT_CC(SDNode* N);
   SDValue WidenVecRes_SETCC(SDNode* N);
@@ -693,6 +730,7 @@ private:
   SDValue WidenVecRes_Binary(SDNode *N);
   SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
   SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
   SDValue WidenVecRes_POWI(SDNode *N);
   SDValue WidenVecRes_Shift(SDNode *N);
   SDValue WidenVecRes_Unary(SDNode *N);
@@ -707,9 +745,11 @@ private:
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
   SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
+  SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
   SDValue WidenVecOp_SETCC(SDNode* N);
 
   SDValue WidenVecOp_Convert(SDNode *N);
+  SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
 
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
@@ -745,8 +785,10 @@ private:
 
   /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
   /// input vector must have the same element type as NVT.
-  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
-
+  /// When FillWithZeroes is "on" the vector will be widened with
+  /// zeroes.
+  /// By default, the vector will be widened with undefined values.
+  SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false);
 
   //===--------------------------------------------------------------------===//
   // Generic Splitting: LegalizeTypesGeneric.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 14d8f7762086..593c346df770 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
     case TargetLowering::TypePromoteFloat:
       llvm_unreachable("Bitcast of a promotion-needing float should never need"
                        "expansion");
-    case TargetLowering::TypeSoftenFloat:
-      // Convert the integer operand instead.
-      SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+    case TargetLowering::TypeSoftenFloat: {
+      // Expand the floating point operand only if it was converted to integers.
+      // Otherwise, it is a legal type like f128 that can be saved in a register.
+      auto SoftenedOp = GetSoftenedFloat(InOp);
+      if (SoftenedOp == InOp)
+        break;
+      SplitInteger(SoftenedOp, Lo, Hi);
       Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
       Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
       return;
+    }
     case TargetLowering::TypeExpandInteger:
     case TargetLowering::TypeExpandFloat: {
       auto &DL = DAG.getDataLayout();
@@ -161,7 +166,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
       NOutVT.getTypeForEVT(*DAG.getContext()));
   SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
   int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
 
   // Emit a store to the stack slot.
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 83d4ad5ea1f4..f61f631e2ff8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -105,6 +105,8 @@ class VectorLegalizer {
   SDValue ExpandLoad(SDValue Op);
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
+  SDValue ExpandBITREVERSE(SDValue Op);
+  SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
 
   /// \brief Implements vector promotion.
   ///
@@ -159,7 +161,7 @@ bool VectorLegalizer::Run() {
   DAG.AssignTopologicalOrder();
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
-    LegalizeOp(SDValue(I, 0));
+    LegalizeOp(SDValue(&*I, 0));
 
   // Finally, it's possible the root changed.  Get the new root.
   SDValue OldRoot = DAG.getRoot();
@@ -218,9 +220,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
             assert(Result.getValue(1).use_empty() &&
                    "There are still live users of the old chain!");
             return LegalizeOp(Lowered);
-          } else {
-            return TranslateLegalizeResults(Op, Lowered);
           }
+          return TranslateLegalizeResults(Op, Lowered);
         }
       case TargetLowering::Expand:
         Changed = true;
@@ -231,7 +232,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     EVT StVT = ST->getMemoryVT();
     MVT ValVT = ST->getValue().getSimpleValueType();
     if (StVT.isVector() && ST->isTruncatingStore())
-      switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) {
+      switch (TLI.getTruncStoreAction(ValVT, StVT)) {
       default: llvm_unreachable("This action is not supported yet!");
       case TargetLowering::Legal:
         return TranslateLegalizeResults(Op, Result);
@@ -244,7 +245,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
         Changed = true;
         return LegalizeOp(ExpandStore(Op));
       }
-  } else if (Op.getOpcode() == ISD::MSCATTER)
+  } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
     HasVectorValue = true;
 
   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
@@ -265,6 +266,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UDIV:
   case ISD::SREM:
   case ISD::UREM:
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
   case ISD::FADD:
   case ISD::FSUB:
   case ISD::FMUL:
@@ -279,6 +282,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::ROTL:
   case ISD::ROTR:
   case ISD::BSWAP:
+  case ISD::BITREVERSE:
   case ISD::CTLZ:
   case ISD::CTTZ:
   case ISD::CTLZ_ZERO_UNDEF:
@@ -298,6 +302,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FABS:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
+  case ISD::FMINNAN:
+  case ISD::FMAXNAN:
   case ISD::FCOPYSIGN:
   case ISD::FSQRT:
   case ISD::FSIN:
@@ -338,9 +344,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::MSCATTER:
     QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
     break;
+  case ISD::MSTORE:
+    QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
+    break;
   }
 
   switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+  default: llvm_unreachable("This action is not supported yet!");
   case TargetLowering::Promote:
     Result = Promote(Op);
     Changed = true;
@@ -411,7 +421,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) {
       Operands[j] = Op.getOperand(j);
   }
 
-  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
+  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
@@ -708,6 +718,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
     return ExpandFNEG(Op);
   case ISD::SETCC:
     return UnrollVSETCC(Op);
+  case ISD::BITREVERSE:
+    return ExpandBITREVERSE(Op);
+  case ISD::CTLZ_ZERO_UNDEF:
+  case ISD::CTTZ_ZERO_UNDEF:
+    return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
   default:
     return DAG.UnrollVectorOp(Op.getNode());
   }
@@ -893,6 +908,25 @@ SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
 }
 
+SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
+  EVT VT = Op.getValueType();
+
+  // If we have the scalar operation, it's probably cheaper to unroll it.
+  if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  // If we have the appropriate vector bit operations, it is better to use them
+  // than unrolling and expanding each component.
+  if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
+      !TLI.isOperationLegalOrCustom(ISD::OR, VT))
+    return DAG.UnrollVectorOp(Op.getNode());
+
+  // Let LegalizeDAG handle this later.
+  return Op;
+}
+
 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // Implement VSELECT in terms of XOR, AND, OR
   // on platforms which do not support blend natively.
@@ -971,6 +1005,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
 
   // Convert hi and lo to floats
   // Convert the hi part back to the upper values
+  // TODO: Can any fast-math-flags be set on these nodes?
   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
@@ -984,12 +1019,23 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
     SDLoc DL(Op);
     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
+    // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
                        Zero, Op.getOperand(0));
   }
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
+SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+  // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle.
+  unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
+  if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType()))
+    return Op;
+
+  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
   EVT VT = Op.getValueType();
   unsigned NumElems = VT.getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 51cd6619f783..d0187d36dee2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -67,6 +67,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
   case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
   case ISD::ANY_EXTEND:
+  case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
@@ -108,6 +109,12 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FMUL:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
+  case ISD::FMINNAN:
+  case ISD::FMAXNAN:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
 
   case ISD::FPOW:
   case ISD::FREM:
@@ -139,7 +146,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(0));
   SDValue RHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(N->getOpcode(), SDLoc(N),
-                     LHS.getValueType(), LHS, RHS);
+                     LHS.getValueType(), LHS, RHS, N->getFlags());
 }
 
 SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
@@ -228,7 +235,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
                                N->isInvariant(), N->getOriginalAlignment(),
                                N->getAAInfo());
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
   return Result;
@@ -594,6 +601,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::INSERT_SUBVECTOR:  SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
   case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
+  case ISD::FCOPYSIGN:         SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
@@ -613,6 +621,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
     break;
 
+  case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CONVERT_RNDSAT:
   case ISD::CTLZ:
@@ -656,11 +665,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::SUB:
   case ISD::MUL:
   case ISD::FADD:
-  case ISD::FCOPYSIGN:
   case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
+  case ISD::FMINNAN:
+  case ISD::FMAXNAN:
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::FDIV:
@@ -698,8 +708,10 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
   GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
   SDLoc dl(N);
 
-  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
-  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+  const SDNodeFlags *Flags = N->getFlags();
+  unsigned Opcode = N->getOpcode();
+  Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
+  Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
 }
 
 void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
@@ -870,6 +882,25 @@ void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
   Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
 }
 
+void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
+                                             SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  SDLoc DL(N);
+
+  SDValue RHSLo, RHSHi;
+  SDValue RHS = N->getOperand(1);
+  EVT RHSVT = RHS.getValueType();
+  if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+    GetSplitVector(RHS, RHSLo, RHSHi);
+  else
+    std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
+
+
+  Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
+  Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
 void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue LHSLo, LHSHi;
@@ -989,7 +1020,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                    Hi.getValue(1));
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(LD, 1), Ch);
 }
@@ -1003,6 +1034,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
   SDValue Ch = MLD->getChain();
   SDValue Ptr = MLD->getBasePtr();
   SDValue Mask = MLD->getMask();
+  SDValue Src0 = MLD->getSrc0();
   unsigned Alignment = MLD->getOriginalAlignment();
   ISD::LoadExtType ExtType = MLD->getExtensionType();
 
@@ -1012,16 +1044,22 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
     (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
      Alignment/2 : Alignment;
 
+  // Split Mask operand
   SDValue MaskLo, MaskHi;
-  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Mask, MaskLo, MaskHi);
+  else
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
 
   EVT MemoryVT = MLD->getMemoryVT();
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue Src0 = MLD->getSrc0();
   SDValue Src0Lo, Src0Hi;
-  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
+  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  else
+    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(), 
@@ -1049,7 +1087,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
   Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                    Hi.getValue(1));
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(MLD, 1), Ch);
 
@@ -1064,20 +1102,33 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
   SDValue Ch = MGT->getChain();
   SDValue Ptr = MGT->getBasePtr();
   SDValue Mask = MGT->getMask();
+  SDValue Src0 = MGT->getValue();
+  SDValue Index = MGT->getIndex();
   unsigned Alignment = MGT->getOriginalAlignment();
 
+  // Split Mask operand
   SDValue MaskLo, MaskHi;
-  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Mask, MaskLo, MaskHi);
+  else
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
 
   EVT MemoryVT = MGT->getMemoryVT();
   EVT LoMemVT, HiMemVT;
+  // Split MemoryVT
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue Src0Lo, Src0Hi;
-  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  else
+    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
 
   SDValue IndexHi, IndexLo;
-  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MGT->getIndex(), dl);
+  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Index, IndexLo, IndexHi);
+  else
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MGT->getPointerInfo(), 
@@ -1097,7 +1148,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
   Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                    Hi.getValue(1));
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(MGT, 1), Ch);
 }
@@ -1357,6 +1408,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
       Res = SplitVecOp_TruncateHelper(N);
       break;
     case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
+    case ISD::FCOPYSIGN:         Res = SplitVecOp_FCOPYSIGN(N); break;
     case ISD::STORE:
       Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
       break;
@@ -1567,23 +1619,31 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
   SDValue Ptr = MGT->getBasePtr();
   SDValue Index = MGT->getIndex();
   SDValue Mask = MGT->getMask();
+  SDValue Src0 = MGT->getValue();
   unsigned Alignment = MGT->getOriginalAlignment();
 
   SDValue MaskLo, MaskHi;
-  std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+    // Split Mask operand
+    GetSplitVector(Mask, MaskLo, MaskHi);
+  else
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
 
   EVT MemoryVT = MGT->getMemoryVT();
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue Src0Lo, Src0Hi;
-  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(MGT->getValue(), dl);
+  if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Src0, Src0Lo, Src0Hi);
+  else
+    std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl);
 
   SDValue IndexHi, IndexLo;
-  if (Index.getNode())
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Index, IndexLo, IndexHi);
   else
-    IndexLo = IndexHi = Index;
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MGT->getPointerInfo(), 
@@ -1609,7 +1669,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
   Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
                    Hi.getValue(1));
 
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(MGT, 1), Ch);
 
@@ -1633,9 +1693,21 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue DataLo, DataHi;
-  GetSplitVector(Data, DataLo, DataHi);
+  if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+    // Split Data operand
+    GetSplitVector(Data, DataLo, DataHi);
+  else
+    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
   SDValue MaskLo, MaskHi;
-  GetSplitVector(Mask, MaskLo, MaskHi);
+  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+    // Split Mask operand
+    GetSplitVector(Mask, MaskLo, MaskHi);
+  else
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+
+  MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
+  MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
 
   // if Alignment is equal to the vector size,
   // take the half of it for the second part
@@ -1680,25 +1752,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
   unsigned Alignment = N->getOriginalAlignment();
   SDLoc DL(N);
 
+  // Split all operands
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue DataLo, DataHi;
-  GetSplitVector(Data, DataLo, DataHi);
-  SDValue MaskLo, MaskHi;
-  GetSplitVector(Mask, MaskLo, MaskHi);
-
-    SDValue PtrLo, PtrHi;
-  if (Ptr.getValueType().isVector()) // gather form vector of pointers
-    std::tie(PtrLo, PtrHi) = DAG.SplitVector(Ptr, DL);
+  if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+    // Split Data operand
+    GetSplitVector(Data, DataLo, DataHi);
   else
-    PtrLo = PtrHi = Ptr;
+    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+
+  SDValue MaskLo, MaskHi;
+  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+    // Split Mask operand
+    GetSplitVector(Mask, MaskLo, MaskHi);
+  else
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
 
   SDValue IndexHi, IndexLo;
-  if (Index.getNode())
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
+    GetSplitVector(Index, IndexLo, IndexHi);
   else
-    IndexLo = IndexHi = Index;
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
 
   SDValue Lo, Hi;
   MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -1706,7 +1782,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
                          MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
                          Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsLo[] = {Ch, DataLo, MaskLo, PtrLo, IndexLo};
+  SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo};
   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
                             DL, OpsLo, MMO);
 
@@ -1715,7 +1791,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
                          MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                          Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsHi[] = {Ch, DataHi, MaskHi, PtrHi, IndexHi};
+  SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
                             DL, OpsHi, MMO);
 
@@ -1891,6 +1967,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
 }
 
+SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
+  // The result (and the first input) has a legal vector type, but the second
+  // input needs splitting.
+  return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
+}
 
 
 //===----------------------------------------------------------------------===//
@@ -1938,6 +2019,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MLOAD:
     Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
     break;
+  case ISD::MGATHER:
+    Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
+    break;
 
   case ISD::ADD:
   case ISD::AND:
@@ -1949,11 +2033,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::XOR:
   case ISD::FMINNUM:
   case ISD::FMAXNUM:
+  case ISD::FMINNAN:
+  case ISD::FMAXNAN:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     Res = WidenVecRes_Binary(N);
     break;
 
   case ISD::FADD:
-  case ISD::FCOPYSIGN:
   case ISD::FMUL:
   case ISD::FPOW:
   case ISD::FSUB:
@@ -1966,6 +2055,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_BinaryCanTrap(N);
     break;
 
+  case ISD::FCOPYSIGN:
+    Res = WidenVecRes_FCOPYSIGN(N);
+    break;
+
   case ISD::FPOWI:
     Res = WidenVecRes_POWI(N);
     break;
@@ -1989,6 +2082,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
     Res = WidenVecRes_Convert(N);
     break;
 
+  case ISD::BITREVERSE:
   case ISD::BSWAP:
   case ISD::CTLZ:
   case ISD::CTPOP:
@@ -2037,7 +2131,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp1 = GetWidenedVector(N->getOperand(0));
   SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
 }
 
 SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
@@ -2048,6 +2142,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
   EVT WidenEltVT = WidenVT.getVectorElementType();
   EVT VT = WidenVT;
   unsigned NumElts =  VT.getVectorNumElements();
+  const SDNodeFlags *Flags = N->getFlags();
   while (!TLI.isTypeLegal(VT) && NumElts != 1) {
     NumElts = NumElts / 2;
     VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
@@ -2057,7 +2152,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
     // Operation doesn't trap so just widen as normal.
     SDValue InOp1 = GetWidenedVector(N->getOperand(0));
     SDValue InOp2 = GetWidenedVector(N->getOperand(1));
-    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
   }
 
   // No legal vector version so unroll the vector operation and then widen.
@@ -2087,7 +2182,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
       SDValue EOp2 = DAG.getNode(
           ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
           DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
       Idx += NumElts;
       CurNumElts -= NumElts;
     }
@@ -2105,7 +2200,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
             ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2,
             DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
         ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
-                                             EOp1, EOp2);
+                                             EOp1, EOp2, Flags);
       }
       CurNumElts = 0;
     }
@@ -2195,7 +2290,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
 
   unsigned Opcode = N->getOpcode();
   unsigned InVTNumElts = InVT.getVectorNumElements();
-
+  const SDNodeFlags *Flags = N->getFlags();
   if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
     InOp = GetWidenedVector(N->getOperand(0));
     InVT = InOp.getValueType();
@@ -2203,7 +2298,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     if (InVTNumElts == WidenNumElts) {
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InOp);
-      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
     }
   }
 
@@ -2224,7 +2319,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVec);
-      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
     }
 
     if (InVTNumElts % WidenNumElts == 0) {
@@ -2234,7 +2329,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
       // Extract the input and convert the shorten input vector.
       if (N->getNumOperands() == 1)
         return DAG.getNode(Opcode, DL, WidenVT, InVal);
-      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
     }
   }
 
@@ -2250,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
     if (N->getNumOperands() == 1)
       Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
     else
-      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
   }
 
   SDValue UndefVal = DAG.getUNDEF(EltVT);
@@ -2260,6 +2355,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
+  // If this is an FCOPYSIGN with same input types, we can treat it as a
+  // normal (can trap) binary op.
+  if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
+    return WidenVecRes_BinaryCanTrap(N);
+
+  // If the types are different, fall back to unrolling.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -2669,7 +2775,35 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
   SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
                                   Mask, Src0, N->getMemoryVT(),
                                   N->getMemOperand(), ExtType);
-  // Legalized the chain result - switch anything that used the old chain to
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
+
+  EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Mask = N->getMask();
+  SDValue Src0 = GetWidenedVector(N->getValue());
+  unsigned NumElts = WideVT.getVectorNumElements();
+  SDLoc dl(N);
+
+  // The mask should be widened as well
+  Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+  // Widen the Index operand
+  SDValue Index = N->getIndex();
+  EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+                                     Index.getValueType().getScalarType(),
+                                     NumElts);
+  Index = ModifyToType(Index, WideIndexVT);
+  SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+  SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
+                                    N->getMemoryVT(), dl, Ops,
+                                    N->getMemOperand());
+
+  // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
   return Res;
@@ -2831,7 +2965,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
   case ISD::MSTORE:             Res = WidenVecOp_MSTORE(N, OpNo); break;
+  case ISD::MSCATTER:           Res = WidenVecOp_MSCATTER(N, OpNo); break;
   case ISD::SETCC:              Res = WidenVecOp_SETCC(N); break;
+  case ISD::FCOPYSIGN:          Res = WidenVecOp_FCOPYSIGN(N); break;
 
   case ISD::ANY_EXTEND:
   case ISD::SIGN_EXTEND:
@@ -2928,6 +3064,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
   }
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+  // The result (and first input) is legal, but the second input is illegal.
+  // We can't do much to fix that, so just unroll and let the extracts off of
+  // the second input be widened as needed later.
+  return DAG.UnrollVectorOp(N);
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
   // Since the result is legal and the input is illegal, it is unlikely
   // that we can fix the input to a legal type so unroll the convert
@@ -3070,6 +3213,34 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
                             false);
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "Can widen only data operand of mscatter");
+  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
+  SDValue DataOp = MSC->getValue();
+  SDValue Mask = MSC->getMask();
+
+  // Widen the value
+  SDValue WideVal = GetWidenedVector(DataOp);
+  EVT WideVT = WideVal.getValueType();
+  unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+  SDLoc dl(N);
+
+  // The mask should be widened as well
+  Mask = WidenTargetBoolean(Mask, WideVT, true);
+
+  // Widen index
+  SDValue Index = MSC->getIndex();
+  EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
+                                     Index.getValueType().getScalarType(),
+                                     NumElts);
+  Index = ModifyToType(Index, WideIndexVT);
+
+  SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index};
+  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
+                              MSC->getMemoryVT(), dl, Ops,
+                              MSC->getMemOperand());
+}
+
 SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
   SDValue InOp0 = GetWidenedVector(N->getOperand(0));
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
@@ -3533,7 +3704,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
 
 /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
 /// input vector must have the same element type as NVT.
-SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+/// FillWithZeroes specifies that the vector should be widened with zeroes.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
+                                       bool FillWithZeroes) {
   // Note that InOp might have been widened so it might already have
   // the right width or it might need be narrowed.
   EVT InVT = InOp.getValueType();
@@ -3550,10 +3723,11 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
   if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
     unsigned NumConcat = WidenNumElts / InNumElts;
     SmallVector<SDValue, 16> Ops(NumConcat);
-    SDValue UndefVal = DAG.getUNDEF(InVT);
+    SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) :
+      DAG.getUNDEF(InVT);
     Ops[0] = InOp;
     for (unsigned i = 1; i != NumConcat; ++i)
-      Ops[i] = UndefVal;
+      Ops[i] = FillVal;
 
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
   }
@@ -3573,8 +3747,9 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
         ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
         DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
-  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+    DAG.getUNDEF(EltVT);
   for ( ; Idx < WidenNumElts; ++Idx)
-    Ops[Idx] = UndefVal;
+    Ops[Idx] = FillVal;
   return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
 }
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 6303422b9ae9..622e06f0da2a 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -49,7 +49,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS)
   TII = STI.getInstrInfo();
   ResourcesModel.reset(TII->CreateTargetScheduleState(STI));
   // This hard requirement could be relaxed, but for now
-  // do not let it procede.
+  // do not let it proceed.
   assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
 
   unsigned NumRC = TRI->getNumRegClasses();
@@ -269,12 +269,12 @@ bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) {
     }
 
   // Now see if there are no other dependencies
-  // to instructions alredy in the packet.
+  // to instructions already in the packet.
   for (unsigned i = 0, e = Packet.size(); i != e; ++i)
     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
          E = Packet[i]->Succs.end(); I != E; ++I) {
       // Since we do not add pseudos to packets, might as well
-      // ignor order deps.
+      // ignore order deps.
       if (I->isCtrl())
         continue;
 
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 34e1a7001082..62e7733ecd2b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
     assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
     NumRes = MCID.getNumDefs();
-    for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
       if (Reg == *ImpDef)
         break;
       ++NumRes;
@@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
+    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index e9bd52034ffd..91024e672f9c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -141,8 +141,8 @@ private:
   /// that are "live". These nodes must be scheduled before any other nodes that
   /// modifies the registers can be scheduled.
   unsigned NumLiveRegs;
-  std::vector<SUnit*> LiveRegDefs;
-  std::vector<SUnit*> LiveRegGens;
+  std::unique_ptr<SUnit*[]> LiveRegDefs;
+  std::unique_ptr<SUnit*[]> LiveRegGens;
 
   // Collect interferences between physical register use/defs.
   // Each interference is an SUnit and set of physical registers.
@@ -328,8 +328,8 @@ void ScheduleDAGRRList::Schedule() {
   NumLiveRegs = 0;
   // Allocate slots for each physical register, plus one for a special register
   // to track the virtual resource of a calling sequence.
-  LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
-  LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
+  LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]());
+  LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]());
   CallSeqEndForStart.clear();
   assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
 
@@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
     const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
     assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
     NumRes = MCID.getNumDefs();
-    for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
       if (Reg == *ImpDef)
         break;
       ++NumRes;
@@ -1218,7 +1218,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
 /// CheckForLiveRegDef - Return true and update live register vector if the
 /// specified register def of the specified SUnit clobbers any "live" registers.
 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
-                               std::vector<SUnit*> &LiveRegDefs,
+                               SUnit **LiveRegDefs,
                                SmallSet<unsigned, 4> &RegAdded,
                                SmallVectorImpl<unsigned> &LRegs,
                                const TargetRegisterInfo *TRI) {
@@ -1240,7 +1240,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
 /// by RegMask, and add them to LRegs.
 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
-                                     std::vector<SUnit*> &LiveRegDefs,
+                                     ArrayRef<SUnit*> LiveRegDefs,
                                      SmallSet<unsigned, 4> &RegAdded,
                                      SmallVectorImpl<unsigned> &LRegs) {
   // Look at all live registers. Skip Reg0 and the special CallResource.
@@ -1278,7 +1278,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
     if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
-      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(),
                          RegAdded, LRegs, TRI);
   }
 
@@ -1302,7 +1302,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
             if (TargetRegisterInfo::isPhysicalRegister(Reg))
-              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
           }
         } else
           i += NumVals;
@@ -1328,13 +1328,15 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
       }
     }
     if (const uint32_t *RegMask = getNodeRegMask(Node))
-      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
+      CheckForLiveRegDefMasked(SU, RegMask,
+                               makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()),
+                               RegAdded, LRegs);
 
     const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
     if (!MCID.ImplicitDefs)
       continue;
-    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
-      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+    for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
   }
 
   return !LRegs.empty();
@@ -2718,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
                                          ScheduleDAGRRList *scheduleDAG,
                                          const TargetInstrInfo *TII,
                                          const TargetRegisterInfo *TRI) {
-  const uint16_t *ImpDefs
+  const MCPhysReg *ImpDefs
     = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
   const uint32_t *RegMask = getNodeRegMask(SU->getNode());
   if(!ImpDefs && !RegMask)
@@ -2737,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
         return true;
 
       if (ImpDefs)
-        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
+        for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
           // Return true if SU clobbers this physical register use and the
           // definition of the register reaches from DepSU. IsReachable queries
           // a topological forward sort of the DAG (following the successors).
@@ -2756,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
                                   const TargetRegisterInfo *TRI) {
   SDNode *N = SuccSU->getNode();
   unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
-  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
   assert(ImpDefs && "Caller should check hasPhysRegDefs");
   for (const SDNode *SUNode = SU->getNode(); SUNode;
        SUNode = SUNode->getGluedNode()) {
     if (!SUNode->isMachineOpcode())
       continue;
-    const uint16_t *SUImpDefs =
+    const MCPhysReg *SUImpDefs =
       TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
     const uint32_t *SURegMask = getNodeRegMask(SUNode);
     if (!SUImpDefs && !SURegMask)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 159c28cd2a61..5cc806668b12 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -86,12 +86,6 @@ namespace llvm {
     /// flagged together nodes with a single SUnit.
     void BuildSchedGraph(AliasAnalysis *AA);
 
-    /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is
-    /// CopyToReg and its only active data operands are CopyFromReg within a
-    /// single block loop.
-    ///
-    void InitVRegCycleFlag(SUnit *SU);
-
     /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
     ///
     void InitNumRegDefsLeft(SUnit *SU);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 14f44ccc60ce..abbc48e10e46 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "SDNodeDbgValue.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -210,28 +211,6 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) {
   return true;
 }
 
-/// isScalarToVector - Return true if the specified node is a
-/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
-/// element is not an undef.
-bool ISD::isScalarToVector(const SDNode *N) {
-  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
-    return true;
-
-  if (N->getOpcode() != ISD::BUILD_VECTOR)
-    return false;
-  if (N->getOperand(0).getOpcode() == ISD::UNDEF)
-    return false;
-  unsigned NumElems = N->getNumOperands();
-  if (NumElems == 1)
-    return false;
-  for (unsigned i = 1; i < NumElems; ++i) {
-    SDValue V = N->getOperand(i);
-    if (V.getOpcode() != ISD::UNDEF)
-      return false;
-  }
-  return true;
-}
-
 /// allOperandsUndef - Return true if the node has at least one operand
 /// and all operands of the specified node are ISD::UNDEF.
 bool ISD::allOperandsUndef(const SDNode *N) {
@@ -397,24 +376,21 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
     ID.AddInteger(Op.getResNo());
   }
 }
+
 /// Add logical or fast math flag values to FoldingSetNodeID value.
 static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode,
                            const SDNodeFlags *Flags) {
-  if (!Flags || !isBinOpWithFlags(Opcode))
+  if (!isBinOpWithFlags(Opcode))
     return;
 
-  unsigned RawFlags = Flags->getRawFlags();
-  // If no flags are set, do not alter the ID. We must match the ID of nodes
-  // that were created without explicitly specifying flags. This also saves time
-  // and allows a gradual increase in API usage of the optional optimization
-  // flags.
-  if (RawFlags != 0)
-    ID.AddInteger(RawFlags);
+  unsigned RawFlags = 0;
+  if (Flags)
+    RawFlags = Flags->getRawFlags();
+  ID.AddInteger(RawFlags);
 }
 
 static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) {
-  if (auto *Node = dyn_cast<BinaryWithFlagsSDNode>(N))
-    AddNodeIDFlags(ID, Node->getOpcode(), &Node->Flags);
+  AddNodeIDFlags(ID, N->getOpcode(), N->getFlags());
 }
 
 static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
@@ -624,9 +600,9 @@ void SelectionDAG::RemoveDeadNodes() {
   SmallVector<SDNode*, 128> DeadNodes;
 
   // Add all obviously-dead nodes to the DeadNodes worklist.
-  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
-    if (I->use_empty())
-      DeadNodes.push_back(I);
+  for (SDNode &Node : allnodes())
+    if (Node.use_empty())
+      DeadNodes.push_back(&Node);
 
   RemoveDeadNodes(DeadNodes);
 
@@ -766,6 +742,7 @@ static void VerifySDNode(SDNode *N) {
 void SelectionDAG::InsertNode(SDNode *N) {
   AllNodes.push_back(N);
 #ifndef NDEBUG
+  N->PersistentId = NextPersistentId++;
   VerifySDNode(N);
 #endif
 }
@@ -929,7 +906,7 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
       EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
       Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
       UpdateListeners(nullptr) {
-  AllNodes.push_back(&EntryNode);
+  InsertNode(&EntryNode);
   DbgInfo = new SDDbgInfo();
 }
 
@@ -950,7 +927,10 @@ void SelectionDAG::allnodes_clear() {
   assert(&*AllNodes.begin() == &EntryNode);
   AllNodes.remove(AllNodes.begin());
   while (!AllNodes.empty())
-    DeallocateNode(AllNodes.begin());
+    DeallocateNode(&AllNodes.front());
+#ifndef NDEBUG
+  NextPersistentId = 0;
+#endif
 }
 
 BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL,
@@ -1023,7 +1003,7 @@ void SelectionDAG::clear() {
             static_cast<SDNode*>(nullptr));
 
   EntryNode.UseList = nullptr;
-  AllNodes.push_back(&EntryNode);
+  InsertNode(&EntryNode);
   Root = getEntryNode();
   DbgInfo->clear();
 }
@@ -1429,8 +1409,8 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
   if (SDNode *E = FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
 
-  SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset,
-                                                    TargetFlags);
+  SDNode *N =
+      new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   InsertNode(N);
   return SDValue(N, 0);
@@ -1852,8 +1832,58 @@ SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
   EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout());
   if (OpTy == ShTy || OpTy.isVector()) return Op;
 
-  ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
-  return getNode(Opcode, SDLoc(Op), ShTy, Op);
+  return getZExtOrTrunc(Op, SDLoc(Op), ShTy);
+}
+
+SDValue SelectionDAG::expandVAArg(SDNode *Node) {
+  SDLoc dl(Node);
+  const TargetLowering &TLI = getTargetLoweringInfo();
+  const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  EVT VT = Node->getValueType(0);
+  SDValue Tmp1 = Node->getOperand(0);
+  SDValue Tmp2 = Node->getOperand(1);
+  unsigned Align = Node->getConstantOperandVal(3);
+
+  SDValue VAListLoad =
+    getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2,
+            MachinePointerInfo(V), false, false, false, 0);
+  SDValue VAList = VAListLoad;
+
+  if (Align > TLI.getMinStackArgumentAlignment()) {
+    assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+    VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+                     getConstant(Align - 1, dl, VAList.getValueType()));
+
+    VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+                     getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+  }
+
+  // Increment the pointer, VAList, to the next vaarg
+  Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
+                 getConstant(getDataLayout().getTypeAllocSize(
+                                               VT.getTypeForEVT(*getContext())),
+                             dl, VAList.getValueType()));
+  // Store the incremented VAList to the legalized pointer
+  Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2,
+                  MachinePointerInfo(V), false, false, 0);
+  // Load the actual argument out of the pointer VAList
+  return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(),
+                 false, false, false, 0);
+}
+
+SDValue SelectionDAG::expandVACopy(SDNode *Node) {
+  SDLoc dl(Node);
+  const TargetLowering &TLI = getTargetLoweringInfo();
+  // This defaults to loading a pointer from the input and storing it to the
+  // output, returning the chain.
+  const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+  const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+  SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl,
+                         Node->getOperand(0), Node->getOperand(2),
+                         MachinePointerInfo(VS), false, false, false, 0);
+  return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+                  MachinePointerInfo(VD), false, false, 0);
 }
 
 /// CreateStackTemporary - Create a stack temporary, suitable for holding the
@@ -1872,8 +1902,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
 /// CreateStackTemporary - Create a stack temporary suitable for holding
 /// either of the specified value types.
 SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
-  unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
-                            VT2.getStoreSizeInBits())/8;
+  unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize());
   Type *Ty1 = VT1.getTypeForEVT(*getContext());
   Type *Ty2 = VT2.getTypeForEVT(*getContext());
   const DataLayout &DL = getDataLayout();
@@ -2255,7 +2284,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       unsigned MemBits = VT.getScalarType().getSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
-      computeKnownBitsFromRangeMetadata(*Ranges, KnownZero);
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD)
+        computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne);
     }
     break;
   }
@@ -2564,6 +2594,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     if (Tmp == 1) return 1;  // Early out.
     Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
     return std::min(Tmp, Tmp2);
+  case ISD::SELECT_CC:
+    Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+    return std::min(Tmp, Tmp2);
   case ISD::SMIN:
   case ISD::SMAX:
   case ISD::UMIN:
@@ -2679,7 +2714,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
     const int rIndex = Items - 1 -
       cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
 
-    // If the sign portion ends in our element the substraction gives correct
+    // If the sign portion ends in our element the subtraction gives correct
     // result. Otherwise it gives either negative or > bitwidth result
     return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
   }
@@ -2798,6 +2833,16 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
   return false;
 }
 
+bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
+  assert(A.getValueType() == B.getValueType() &&
+         "Values must have the same type");
+  APInt AZero, AOne;
+  APInt BZero, BOne;
+  computeKnownBits(A, AZero, AOne);
+  computeKnownBits(B, BZero, BOne);
+  return (AZero | BZero).isAllOnesValue();
+}
+
 /// getNode - Gets or creates the specified node.
 ///
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
@@ -2848,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
         return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
         return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
-      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+      if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
         return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+      if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
+        return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -2954,44 +3001,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
       case ISD::CTTZ:
       case ISD::CTTZ_ZERO_UNDEF:
       case ISD::CTPOP: {
-        EVT SVT = VT.getScalarType();
-        EVT InVT = BV->getValueType(0);
-        EVT InSVT = InVT.getScalarType();
-
-        // Find legal integer scalar type for constant promotion and
-        // ensure that its scalar size is at least as large as source.
-        EVT LegalSVT = SVT;
-        if (SVT.isInteger()) {
-          LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT);
-          if (LegalSVT.bitsLT(SVT)) break;
-        }
-
-        // Let the above scalar folding handle the folding of each element.
-        SmallVector<SDValue, 8> Ops;
-        for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
-          SDValue OpN = BV->getOperand(i);
-          EVT OpVT = OpN.getValueType();
-
-          // Build vector (integer) scalar operands may need implicit
-          // truncation - do this before constant folding.
-          if (OpVT.isInteger() && OpVT.bitsGT(InSVT))
-            OpN = getNode(ISD::TRUNCATE, DL, InSVT, OpN);
-
-          OpN = getNode(Opcode, DL, SVT, OpN);
-
-          // Legalize the (integer) scalar constant if necessary.
-          if (LegalSVT != SVT)
-            OpN = getNode(ISD::ANY_EXTEND, DL, LegalSVT, OpN);
-
-          if (OpN.getOpcode() != ISD::UNDEF &&
-              OpN.getOpcode() != ISD::Constant &&
-              OpN.getOpcode() != ISD::ConstantFP)
-            break;
-          Ops.push_back(OpN);
-        }
-        if (Ops.size() == VT.getVectorNumElements())
-          return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
-        break;
+        SDValue Ops = { Operand };
+        if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+          return Fold;
       }
       }
     }
@@ -3012,6 +3024,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
+    assert(Operand.getValueType().bitsLT(VT) &&
+           "Invalid fpext node, dst < src!");
     if (Operand.getOpcode() == ISD::UNDEF)
       return getUNDEF(VT);
     break;
@@ -3019,12 +3033,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid SIGN_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
-           "Invalid sext node, dst < src!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
+    assert(Operand.getValueType().bitsLT(VT) &&
+           "Invalid sext node, dst < src!");
     if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::UNDEF)
@@ -3035,12 +3049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ZERO_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
-           "Invalid zext node, dst < src!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
+    assert(Operand.getValueType().bitsLT(VT) &&
+           "Invalid zext node, dst < src!");
     if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
       return getNode(ISD::ZERO_EXTEND, DL, VT,
                      Operand.getNode()->getOperand(0));
@@ -3052,12 +3066,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ANY_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
-           "Invalid anyext node, dst < src!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
+    assert(Operand.getValueType().bitsLT(VT) &&
+           "Invalid anyext node, dst < src!");
 
     if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
         OpOpcode == ISD::ANY_EXTEND)
@@ -3077,12 +3091,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid TRUNCATE!");
     if (Operand.getValueType() == VT) return Operand;   // noop truncate
-    assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
-           "Invalid truncate node, src < dst!");
     assert((!VT.isVector() ||
             VT.getVectorNumElements() ==
             Operand.getValueType().getVectorNumElements()) &&
            "Vector element count mismatch!");
+    assert(Operand.getValueType().bitsGT(VT) &&
+           "Invalid truncate node, src < dst!");
     if (OpOpcode == ISD::TRUNCATE)
       return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
     if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
@@ -3135,8 +3149,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
   case ISD::FNEG:
     // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
     if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB)
+      // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags?
       return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
-                     Operand.getNode()->getOperand(0));
+                       Operand.getNode()->getOperand(0),
+                       &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags);
     if (OpOpcode == ISD::FNEG)  // --X -> X
       return Operand.getNode()->getOperand(0);
     break;
@@ -3182,6 +3198,10 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
   case ISD::SRA:  return std::make_pair(C1.ashr(C2), true);
   case ISD::ROTL: return std::make_pair(C1.rotl(C2), true);
   case ISD::ROTR: return std::make_pair(C1.rotr(C2), true);
+  case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true);
+  case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true);
+  case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true);
+  case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true);
   case ISD::UDIV:
     if (!C2.getBoolValue())
       break;
@@ -3284,10 +3304,118 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT,
   return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
 }
 
+SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL,
+                                                   EVT VT,
+                                                   ArrayRef<SDValue> Ops,
+                                                   const SDNodeFlags *Flags) {
+  // If the opcode is a target-specific ISD node, there's nothing we can
+  // do here and the operand rules may not line up with the below, so
+  // bail early.
+  if (Opcode >= ISD::BUILTIN_OP_END)
+    return SDValue();
+
+  // We can only fold vectors - maybe merge with FoldConstantArithmetic someday?
+  if (!VT.isVector())
+    return SDValue();
+
+  unsigned NumElts = VT.getVectorNumElements();
+
+  auto IsScalarOrSameVectorSize = [&](const SDValue &Op) {
+    return !Op.getValueType().isVector() ||
+           Op.getValueType().getVectorNumElements() == NumElts;
+  };
+
+  auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) {
+    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op);
+    return (Op.getOpcode() == ISD::UNDEF) ||
+           (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant());
+  };
+
+  // All operands must be vector types with the same number of elements as
+  // the result type and must be either UNDEF or a build vector of constant
+  // or UNDEF scalars.
+  if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
+      !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+    return SDValue();
+
+  // If we are comparing vectors, then the result needs to be a i1 boolean
+  // that is then sign-extended back to the legal result type.
+  EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType());
+
+  // Find legal integer scalar type for constant promotion and
+  // ensure that its scalar size is at least as large as source.
+  EVT LegalSVT = VT.getScalarType();
+  if (LegalSVT.isInteger()) {
+    LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+    if (LegalSVT.bitsLT(SVT))
+      return SDValue();
+  }
+
+  // Constant fold each scalar lane separately.
+  SmallVector<SDValue, 4> ScalarResults;
+  for (unsigned i = 0; i != NumElts; i++) {
+    SmallVector<SDValue, 4> ScalarOps;
+    for (SDValue Op : Ops) {
+      EVT InSVT = Op.getValueType().getScalarType();
+      BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op);
+      if (!InBV) {
+        // We've checked that this is UNDEF or a constant of some kind.
+        if (Op.isUndef())
+          ScalarOps.push_back(getUNDEF(InSVT));
+        else
+          ScalarOps.push_back(Op);
+        continue;
+      }
+
+      SDValue ScalarOp = InBV->getOperand(i);
+      EVT ScalarVT = ScalarOp.getValueType();
+
+      // Build vector (integer) scalar operands may need implicit
+      // truncation - do this before constant folding.
+      if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+        ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+
+      ScalarOps.push_back(ScalarOp);
+    }
+
+    // Constant fold the scalar operands.
+    SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags);
+
+    // Legalize the (integer) scalar constant if necessary.
+    if (LegalSVT != SVT)
+      ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
+
+    // Scalar folding only succeeded if the result is a constant or UNDEF.
+    if (ScalarResult.getOpcode() != ISD::UNDEF &&
+        ScalarResult.getOpcode() != ISD::Constant &&
+        ScalarResult.getOpcode() != ISD::ConstantFP)
+      return SDValue();
+    ScalarResults.push_back(ScalarResult);
+  }
+
+  assert(ScalarResults.size() == NumElts &&
+         "Unexpected number of scalar results for BUILD_VECTOR");
+  return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults);
+}
+
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
                               SDValue N2, const SDNodeFlags *Flags) {
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+  // Canonicalize constant to RHS if commutative.
+  if (isCommutativeBinOp(Opcode)) {
+    if (N1C && !N2C) {
+      std::swap(N1C, N2C);
+      std::swap(N1, N2);
+    } else if (N1CFP && !N2CFP) {
+      std::swap(N1CFP, N2CFP);
+      std::swap(N1, N2);
+    }
+  }
+
   switch (Opcode) {
   default: break;
   case ISD::TokenFactor:
@@ -3356,6 +3484,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
   case ISD::MUL:
   case ISD::SDIV:
   case ISD::SREM:
+  case ISD::SMIN:
+  case ISD::SMAX:
+  case ISD::UMIN:
+  case ISD::UMAX:
     assert(VT.isInteger() && "This operator does not apply to FP types!");
     assert(N1.getValueType() == N2.getValueType() &&
            N1.getValueType() == VT && "Binary operator types must match!");
@@ -3367,37 +3499,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
   case ISD::FREM:
     if (getTarget().Options.UnsafeFPMath) {
       if (Opcode == ISD::FADD) {
-        // 0+x --> x
-        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
-          if (CFP->getValueAPF().isZero())
-            return N2;
         // x+0 --> x
-        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
-          if (CFP->getValueAPF().isZero())
-            return N1;
+        if (N2CFP && N2CFP->getValueAPF().isZero())
+          return N1;
       } else if (Opcode == ISD::FSUB) {
         // x-0 --> x
-        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
-          if (CFP->getValueAPF().isZero())
-            return N1;
+        if (N2CFP && N2CFP->getValueAPF().isZero())
+          return N1;
       } else if (Opcode == ISD::FMUL) {
-        ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1);
-        SDValue V = N2;
-
-        // If the first operand isn't the constant, try the second
-        if (!CFP) {
-          CFP = dyn_cast<ConstantFPSDNode>(N2);
-          V = N1;
-        }
-
-        if (CFP) {
-          // 0*x --> 0
-          if (CFP->isZero())
-            return SDValue(CFP,0);
-          // 1*x --> x
-          if (CFP->isExactlyValue(1.0))
-            return V;
-        }
+        // x*0 --> 0
+        if (N2CFP && N2CFP->isZero())
+          return N2;
+        // x*1 --> x
+        if (N2CFP && N2CFP->isExactlyValue(1.0))
+          return N1;
       }
     }
     assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
@@ -3457,7 +3572,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     assert(VT.isFloatingPoint() &&
            N1.getValueType().isFloatingPoint() &&
            VT.bitsLE(N1.getValueType()) &&
-           isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+           N2C && "Invalid FP_ROUND!");
     if (N1.getValueType() == VT) return N1;  // noop conversion.
     break;
   case ISD::AssertSext:
@@ -3502,13 +3617,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       SmallVector<SDValue, 8> Ops;
       for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
         SDValue Op = N1.getOperand(i);
-        if (Op.getValueType() != VT.getScalarType()) break;
         if (Op.getOpcode() == ISD::UNDEF) {
-          Ops.push_back(Op);
+          Ops.push_back(getUNDEF(VT.getScalarType()));
           continue;
         }
         if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
           APInt Val = C->getAPIntValue();
+          Val = Val.zextOrTrunc(VT.getScalarSizeInBits());
           Ops.push_back(SignExtendInReg(Val));
           continue;
         }
@@ -3590,15 +3705,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       return N1.getOperand(N2C->getZExtValue());
 
     // EXTRACT_ELEMENT of a constant int is also very common.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+    if (N1C) {
       unsigned ElementSize = VT.getSizeInBits();
       unsigned Shift = ElementSize * N2C->getZExtValue();
-      APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+      APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift);
       return getConstant(ShiftedVal.trunc(ElementSize), DL, VT);
     }
     break;
-  case ISD::EXTRACT_SUBVECTOR: {
-    SDValue Index = N2;
+  case ISD::EXTRACT_SUBVECTOR:
     if (VT.isSimple() && N1.getValueType().isSimple()) {
       assert(VT.isVector() && N1.getValueType().isVector() &&
              "Extract subvector VTs must be a vectors!");
@@ -3608,9 +3722,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
       assert(VT.getSimpleVT() <= N1.getSimpleValueType() &&
              "Extract subvector must be from larger vector to smaller vector!");
 
-      if (isa<ConstantSDNode>(Index)) {
-        assert((VT.getVectorNumElements() +
-                cast<ConstantSDNode>(Index)->getZExtValue()
+      if (N2C) {
+        assert((VT.getVectorNumElements() + N2C->getZExtValue()
                 <= N1.getValueType().getVectorNumElements())
                && "Extract subvector overflow!");
       }
@@ -3621,29 +3734,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
     }
     break;
   }
-  }
 
   // Perform trivial constant folding.
   if (SDValue SV =
           FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
     return SV;
 
-  // Canonicalize constant to RHS if commutative.
-  if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
-    std::swap(N1C, N2C);
-    std::swap(N1, N2);
-  }
-
   // Constant fold FP operations.
   bool HasFPExceptions = TLI->hasFloatingPointExceptions();
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
   if (N1CFP) {
-    if (!N2CFP && isCommutativeBinOp(Opcode)) {
-      // Canonicalize constant to RHS if commutative.
-      std::swap(N1CFP, N2CFP);
-      std::swap(N1, N2);
-    } else if (N2CFP) {
+    if (N2CFP) {
       APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
       APFloat::opStatus s;
       switch (Opcode) {
@@ -3670,7 +3770,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
         }
         break;
       case ISD::FREM :
-        s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+        s = V1.mod(V2);
         if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
                                  s!=APFloat::opDivByZero)) {
           return getConstantFP(V1, DL, VT);
@@ -3795,7 +3895,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
                               SDValue N1, SDValue N2, SDValue N3) {
   // Perform various simplifications.
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   switch (Opcode) {
   case ISD::FMA: {
     ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
@@ -3827,12 +3926,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
     break;
   case ISD::SETCC: {
     // Use FoldSetCC to simplify SETCC's.
-    SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
-    if (Simp.getNode()) return Simp;
+    if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
+      return V;
+    // Vector constant folding.
+    SDValue Ops[] = {N1, N2, N3};
+    if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+      return V;
     break;
   }
   case ISD::SELECT:
-    if (N1C) {
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
      if (N1C->getZExtValue())
        return N2;             // select true, X, Y -> X
      return N3;             // select false, X, Y -> Y
@@ -4153,6 +4256,14 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
   return true;
 }
 
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+  // On Darwin, -Os means optimize for size without hurting performance, so
+  // only really optimize for size when -Oz (MinSize) is used.
+  if (MF.getTarget().getTargetTriple().isOSDarwin())
+    return MF.getFunction()->optForMinSize();
+  return MF.getFunction()->optForSize();
+}
+
 static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
                                        SDValue Chain, SDValue Dst,
                                        SDValue Src, uint64_t Size,
@@ -4173,7 +4284,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4286,7 +4397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4380,7 +4491,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool OptSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
   if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
@@ -4446,6 +4557,16 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }
 
+static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI,
+                                            unsigned AS) {
+  // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all
+  // pointer operands can be losslessly bitcasted to pointers of address space 0
+  if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) {
+    report_fatal_error("cannot lower memory intrinsic in address space " +
+                       Twine(AS));
+  }
+}
+
 SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                 SDValue Src, SDValue Size,
                                 unsigned Align, bool isVol, bool AlwaysInline,
@@ -4487,6 +4608,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
                                    true, DstPtrInfo, SrcPtrInfo);
   }
 
+  checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+  checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
   // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
   // memcpy is not guaranteed to be safe. libc memcpys aren't required to
   // respect volatile, so they may do things like read or write memory
@@ -4548,6 +4672,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
       return Result;
   }
 
+  checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+  checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace());
+
   // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
   // not be safe.  See memcpy above for more details.
 
@@ -4605,6 +4732,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
       return Result;
   }
 
+  checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
+
   // Emit a library call.
   Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
   TargetLowering::ArgListTy Args;
@@ -4872,10 +5001,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
 /// MachinePointerInfo record from it.  This is particularly useful because the
 /// code generator has many cases where it doesn't bother passing in a
 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+                                           int64_t Offset = 0) {
   // If this is FI+Offset, we can model it.
   if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
-    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+    return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+                                             FI->getIndex(), Offset);
 
   // If this is (FI+Offset1)+Offset2, we can model it.
   if (Ptr.getOpcode() != ISD::ADD ||
@@ -4884,20 +5015,22 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
     return MachinePointerInfo();
 
   int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
-  return MachinePointerInfo::getFixedStack(FI, Offset+
-                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+  return MachinePointerInfo::getFixedStack(
+      DAG.getMachineFunction(), FI,
+      Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
 }
 
 /// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
 /// MachinePointerInfo record from it.  This is particularly useful because the
 /// code generator has many cases where it doesn't bother passing in a
 /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+                                           SDValue OffsetOp) {
   // If the 'Offset' value isn't a constant, we can't handle this.
   if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
-    return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+    return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
   if (OffsetOp.getOpcode() == ISD::UNDEF)
-    return InferPointerInfo(Ptr);
+    return InferPointerInfo(DAG, Ptr);
   return MachinePointerInfo();
 }
 
@@ -4926,7 +5059,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   // If we don't have a PtrInfo, infer the trivial frame index case to simplify
   // clients.
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(Ptr, Offset);
+    PtrInfo = InferPointerInfo(*this, Ptr, Offset);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
@@ -5054,7 +5187,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
     Flags |= MachineMemOperand::MONonTemporal;
 
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(Ptr);
+    PtrInfo = InferPointerInfo(*this, Ptr);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
@@ -5109,7 +5242,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
     Flags |= MachineMemOperand::MONonTemporal;
 
   if (PtrInfo.V.isNull())
-    PtrInfo = InferPointerInfo(Ptr);
+    PtrInfo = InferPointerInfo(*this, Ptr);
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO =
@@ -5261,7 +5394,7 @@ SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl,
     cast<MaskedGatherSDNode>(E)->refineAlignment(MMO);
     return SDValue(E, 0);
   }
-  MaskedGatherSDNode *N = 
+  MaskedGatherSDNode *N =
     new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(),
                                            Ops, VTs, VT, MMO);
   CSEMap.InsertNode(N, IP);
@@ -5317,12 +5450,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
 }
 
 SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
-                              ArrayRef<SDValue> Ops) {
+                              ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) {
   unsigned NumOps = Ops.size();
   switch (NumOps) {
   case 0: return getNode(Opcode, DL, VT);
   case 1: return getNode(Opcode, DL, VT, Ops[0]);
-  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags);
   case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
   default: break;
   }
@@ -5656,7 +5789,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
          "Update with wrong number of operands");
 
   // If no operands changed just return the input node.
-  if (Ops.empty() || std::equal(Ops.begin(), Ops.end(), N->op_begin()))
+  if (std::equal(Ops.begin(), Ops.end(), N->op_begin()))
     return N;
 
   // See if the modified node already exists.
@@ -6451,13 +6584,13 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
   // Node Id fields for nodes At SortedPos and after will contain the
   // count of outstanding operands.
   for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
-    SDNode *N = I++;
+    SDNode *N = &*I++;
     checkForCycles(N, this);
     unsigned Degree = N->getNumOperands();
     if (Degree == 0) {
       // A node with no uses, add it to the result array immediately.
       N->setNodeId(DAGSize++);
-      allnodes_iterator Q = N;
+      allnodes_iterator Q(N);
       if (Q != SortedPos)
         SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
       assert(SortedPos != AllNodes.end() && "Overran node list");
@@ -6470,8 +6603,8 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
 
   // Visit all the nodes. As we iterate, move nodes into sorted order,
   // such that by the time the end is reached all nodes will be sorted.
-  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
-    SDNode *N = I;
+  for (SDNode &Node : allnodes()) {
+    SDNode *N = &Node;
     checkForCycles(N, this);
     // N is in sorted position, so all its uses have one less operand
     // that needs to be sorted.
@@ -6493,9 +6626,10 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
         P->setNodeId(Degree);
       }
     }
-    if (I == SortedPos) {
+    if (&Node == SortedPos) {
 #ifndef NDEBUG
-      SDNode *S = ++I;
+      allnodes_iterator I(N);
+      SDNode *S = &*++I;
       dbgs() << "Overran sorted position:\n";
       S->dumprFull(this); dbgs() << "\n";
       dbgs() << "Checking if this is due to cycles\n";
@@ -6559,6 +6693,26 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
 //                              SDNode Class
 //===----------------------------------------------------------------------===//
 
+bool llvm::isNullConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isNullValue();
+}
+
+bool llvm::isNullFPConstant(SDValue V) {
+  ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V);
+  return Const != nullptr && Const->isZero() && !Const->isNegative();
+}
+
+bool llvm::isAllOnesConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isAllOnesValue();
+}
+
+bool llvm::isOneConstant(SDValue V) {
+  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
+  return Const != nullptr && Const->isOne();
+}
+
 HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
@@ -6772,6 +6926,12 @@ uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
   return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
 }
 
+const SDNodeFlags *SDNode::getFlags() const {
+  if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this))
+    return &FlagsNode->Flags;
+  return nullptr;
+}
+
 SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   assert(N->getNumValues() == 1 &&
          "Can't unroll a vector with multiple results!");
@@ -6808,9 +6968,11 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
     }
 
     switch (N->getOpcode()) {
-    default:
-      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
+    default: {
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands,
+                                N->getFlags()));
       break;
+    }
     case ISD::VSELECT:
       Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
       break;
@@ -7101,6 +7263,24 @@ BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
   return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
 }
 
+int32_t
+BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements,
+                                                   uint32_t BitWidth) const {
+  if (ConstantFPSDNode *CN =
+          dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) {
+    bool IsExact;
+    APSInt IntVal(BitWidth);
+    APFloat APF = CN->getValueAPF();
+    if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) !=
+            APFloat::opOK ||
+        !IsExact)
+      return -1;
+
+    return IntVal.exactLogBase2();
+  }
+  return -1;
+}
+
 bool BuildVectorSDNode::isConstant() const {
   for (const SDValue &Op : op_values()) {
     unsigned Opc = Op.getOpcode();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c3c0eb101a0..d2ea85ab4d22 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/FastISel.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
@@ -63,6 +64,7 @@
 #include "llvm/Target/TargetSelectionDAGInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <algorithm>
+#include <utility>
 using namespace llvm;
 
 #define DEBUG_TYPE "isel"
@@ -79,7 +81,7 @@ LimitFPPrecision("limit-float-precision",
                  cl::init(0));
 
 static cl::opt<bool>
-EnableFMFInDAG("enable-fmf-dag", cl::init(false), cl::Hidden,
+EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden,
                 cl::desc("Enable fast-math-flags for DAG nodes"));
 
 // Limit the width of DAG chains. This is important in general to prevent
@@ -196,6 +198,14 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL,
   if (PartEVT == ValueVT)
     return Val;
 
+  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
+      ValueVT.bitsLT(PartEVT)) {
+    // For an FP value in an integer part, we need to truncate to the right
+    // width first.
+    PartEVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
+    Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
+  }
+
   if (PartEVT.isInteger() && ValueVT.isInteger()) {
     if (ValueVT.bitsLT(PartEVT)) {
       // For a truncate, see if we have any information to
@@ -319,9 +329,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
     assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
       "Cannot handle this kind of promotion");
     // Promoted vector extract
-    bool Smaller = ValueVT.bitsLE(PartEVT);
-    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                       DL, ValueVT, Val);
+    return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
 
   }
 
@@ -339,11 +347,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
   }
 
   if (ValueVT.getVectorNumElements() == 1 &&
-      ValueVT.getVectorElementType() != PartEVT) {
-    bool Smaller = ValueVT.bitsLE(PartEVT);
-    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                       DL, ValueVT.getScalarType(), Val);
-  }
+      ValueVT.getVectorElementType() != PartEVT)
+    Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
 
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
@@ -387,6 +392,12 @@ static void getCopyToParts(SelectionDAG &DAG, SDLoc DL,
       assert(NumParts == 1 && "Do not know what to promote to!");
       Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
     } else {
+      if (ValueVT.isFloatingPoint()) {
+        // FP values need to be bitcast, then extended if they are being put
+        // into a larger container.
+        ValueVT = EVT::getIntegerVT(*DAG.getContext(),  ValueVT.getSizeInBits());
+        Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+      }
       assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
              ValueVT.isInteger() &&
              "Unknown mismatch!");
@@ -520,9 +531,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
                PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      bool Smaller = PartEVT.bitsLE(ValueVT);
-      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                        DL, PartVT, Val);
+      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
     } else{
       // Vector -> scalar conversion.
       assert(ValueVT.getVectorNumElements() == 1 &&
@@ -531,9 +540,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
           ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
           DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
 
-      bool Smaller = ValueVT.bitsLE(PartVT);
-      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                         DL, PartVT, Val);
+      Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
     }
 
     Parts[0] = Val;
@@ -595,8 +602,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
                            const DataLayout &DL, unsigned Reg, Type *Ty) {
   ComputeValueVTs(TLI, DL, Ty, ValueVTs);
 
-  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
-    EVT ValueVT = ValueVTs[Value];
+  for (EVT ValueVT : ValueVTs) {
     unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
     MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
     for (unsigned i = 0; i != NumRegs; ++i)
@@ -907,7 +913,8 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
 
   visit(I.getOpcode(), I);
 
-  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
+      !isStatepoint(&I)) // statepoints handle their exports internally
     CopyToExportRegsIfNeeded(&I);
 
   CurInst = nullptr;
@@ -943,14 +950,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     assert(Variable->isValidLocationForIntrinsic(dl) &&
            "Expected inlined-at fields to agree");
     uint64_t Offset = DI->getOffset();
-    // A dbg.value for an alloca is always indirect.
-    bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
     SDDbgValue *SDV;
     if (Val.getNode()) {
-      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, IsIndirect,
+      if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
                                     Val)) {
         SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
-                              IsIndirect, Offset, dl, DbgSDNodeOrder);
+                              false, Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
@@ -1168,6 +1173,135 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
   llvm_unreachable("Can't get register for value!");
 }
 
+void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
+  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
+  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
+  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
+  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
+  if (IsMSVCCXX || IsCoreCLR)
+    CatchPadMBB->setIsEHFuncletEntry();
+
+  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
+}
+
+void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
+  // Update machine-CFG edge.
+  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
+  FuncInfo.MBB->addSuccessor(TargetMBB);
+
+  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+  bool IsSEH = isAsynchronousEHPersonality(Pers);
+  if (IsSEH) {
+    // If this is not a fall-through branch or optimizations are switched off,
+    // emit the branch.
+    if (TargetMBB != NextBlock(FuncInfo.MBB) ||
+        TM.getOptLevel() == CodeGenOpt::None)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+                              getControlRoot(), DAG.getBasicBlock(TargetMBB)));
+    return;
+  }
+
+  // Figure out the funclet membership for the catchret's successor.
+  // This will be used by the FuncletLayout pass to determine how to order the
+  // BB's.
+  WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+  const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+  assert(SuccessorColor && "No parent funclet for catchret!");
+  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
+  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
+
+  // Create the terminator node.
+  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
+                            getControlRoot(), DAG.getBasicBlock(TargetMBB),
+                            DAG.getBasicBlock(SuccessorColorMBB));
+  DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
+  // Don't emit any special code for the cleanuppad instruction. It just marks
+  // the start of a funclet.
+  FuncInfo.MBB->setIsEHFuncletEntry();
+  FuncInfo.MBB->setIsCleanupFuncletEntry();
+}
+
+/// When an invoke or a cleanupret unwinds to the next EH pad, there are
+/// many places it could ultimately go. In the IR, we have a single unwind
+/// destination, but in the machine CFG, we enumerate all the possible blocks.
+/// This function skips over imaginary basic blocks that hold catchswitch
+/// instructions, and finds all the "real" machine
+/// basic block destinations. As those destinations may not be successors of
+/// EHPadBB, here we also calculate the edge probability to those destinations.
+/// The passed-in Prob is the edge probability to EHPadBB.
+static void findUnwindDestinations(
+    FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+    BranchProbability Prob,
+    SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+        &UnwindDests) {
+  EHPersonality Personality =
+    classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
+  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
+  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
+
+  while (EHPadBB) {
+    const Instruction *Pad = EHPadBB->getFirstNonPHI();
+    BasicBlock *NewEHPadBB = nullptr;
+    if (isa<LandingPadInst>(Pad)) {
+      // Stop on landingpads. They are not funclets.
+      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+      break;
+    } else if (isa<CleanupPadInst>(Pad)) {
+      // Stop on cleanup pads. Cleanups are always funclet entries for all known
+      // personalities.
+      UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+      UnwindDests.back().first->setIsEHFuncletEntry();
+      break;
+    } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+      // Add the catchpad handlers to the possible destinations.
+      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+        UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+        // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
+        if (IsMSVCCXX || IsCoreCLR)
+          UnwindDests.back().first->setIsEHFuncletEntry();
+      }
+      NewEHPadBB = CatchSwitch->getUnwindDest();
+    } else {
+      continue;
+    }
+
+    BranchProbabilityInfo *BPI = FuncInfo.BPI;
+    if (BPI && NewEHPadBB)
+      Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
+    EHPadBB = NewEHPadBB;
+  }
+}
+
+void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
+  // Update successor info.
+  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+  auto UnwindDest = I.getUnwindDest();
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  BranchProbability UnwindDestProb =
+      (BPI && UnwindDest)
+          ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
+          : BranchProbability::getZero();
+  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
+  for (auto &UnwindDest : UnwindDests) {
+    UnwindDest.first->setIsEHPad();
+    addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
+  }
+  FuncInfo.MBB->normalizeSuccProbs();
+
+  // Create the terminator node.
+  SDValue Ret =
+      DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
+  DAG.setRoot(Ret);
+}
+
+void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
+  report_fatal_error("visitCatchSwitch not yet implemented!");
+}
+
 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   auto &DL = DAG.getDataLayout();
@@ -1186,7 +1320,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
     ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
                     PtrValueVTs);
 
-    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
+                                        DemoteReg, PtrValueVTs[0]);
     SDValue RetOp = getValue(I.getOperand(0));
 
     SmallVector<EVT, 4> ValueVTs;
@@ -1334,25 +1469,34 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
 }
 
 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
-uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
-                                            const MachineBasicBlock *Dst) const {
+BranchProbability
+SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
+                                        const MachineBasicBlock *Dst) const {
   BranchProbabilityInfo *BPI = FuncInfo.BPI;
-  if (!BPI)
-    return 0;
   const BasicBlock *SrcBB = Src->getBasicBlock();
   const BasicBlock *DstBB = Dst->getBasicBlock();
-  return BPI->getEdgeWeight(SrcBB, DstBB);
+  if (!BPI) {
+    // If BPI is not available, set the default probability as 1 / N, where N is
+    // the number of successors.
+    auto SuccSize = std::max<uint32_t>(
+        std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
+    return BranchProbability(1, SuccSize);
+  }
+  return BPI->getEdgeProbability(SrcBB, DstBB);
 }
 
-void SelectionDAGBuilder::
-addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
-                       uint32_t Weight /* = 0 */) {
-  if (!Weight)
-    Weight = getEdgeWeight(Src, Dst);
-  Src->addSuccessor(Dst, Weight);
+void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
+                                               MachineBasicBlock *Dst,
+                                               BranchProbability Prob) {
+  if (!FuncInfo.BPI)
+    Src->addSuccessorWithoutProb(Dst);
+  else {
+    if (Prob.isUnknown())
+      Prob = getEdgeProbability(Src, Dst);
+    Src->addSuccessor(Dst, Prob);
+  }
 }
 
-
 static bool InBlock(const Value *V, const BasicBlock *BB) {
   if (const Instruction *I = dyn_cast<Instruction>(V))
     return I->getParent() == BB;
@@ -1369,8 +1513,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
                                                   MachineBasicBlock *FBB,
                                                   MachineBasicBlock *CurBB,
                                                   MachineBasicBlock *SwitchBB,
-                                                  uint32_t TWeight,
-                                                  uint32_t FWeight) {
+                                                  BranchProbability TProb,
+                                                  BranchProbability FProb) {
   const BasicBlock *BB = CurBB->getBasicBlock();
 
   // If the leaf of the tree is a comparison, merge the condition into
@@ -1385,17 +1529,15 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
       ISD::CondCode Condition;
       if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
         Condition = getICmpCondCode(IC->getPredicate());
-      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+      } else {
+        const FCmpInst *FC = cast<FCmpInst>(Cond);
         Condition = getFCmpCondCode(FC->getPredicate());
         if (TM.Options.NoNaNsFPMath)
           Condition = getFCmpCodeWithoutNaN(Condition);
-      } else {
-        (void)Condition; // silence warning.
-        llvm_unreachable("Unknown compare instruction");
       }
 
       CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
-                   TBB, FBB, CurBB, TWeight, FWeight);
+                   TBB, FBB, CurBB, TProb, FProb);
       SwitchCases.push_back(CB);
       return;
     }
@@ -1403,26 +1545,19 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
 
   // Create a CaseBlock record representing this branch.
   CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
-               nullptr, TBB, FBB, CurBB, TWeight, FWeight);
+               nullptr, TBB, FBB, CurBB, TProb, FProb);
   SwitchCases.push_back(CB);
 }
 
-/// Scale down both weights to fit into uint32_t.
-static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
-  uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
-  uint32_t Scale = (NewMax / UINT32_MAX) + 1;
-  NewTrue = NewTrue / Scale;
-  NewFalse = NewFalse / Scale;
-}
-
 /// FindMergedConditions - If Cond is an expression like
 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
                                                MachineBasicBlock *TBB,
                                                MachineBasicBlock *FBB,
                                                MachineBasicBlock *CurBB,
                                                MachineBasicBlock *SwitchBB,
-                                               unsigned Opc, uint32_t TWeight,
-                                               uint32_t FWeight) {
+                                               Instruction::BinaryOps Opc,
+                                               BranchProbability TProb,
+                                               BranchProbability FProb) {
   // If this node is not part of the or/and tree, emit it as a branch.
   const Instruction *BOp = dyn_cast<Instruction>(Cond);
   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
@@ -1431,12 +1566,12 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
       !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
       !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
     EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
-                                 TWeight, FWeight);
+                                 TProb, FProb);
     return;
   }
 
   //  Create TmpBB after CurBB.
-  MachineFunction::iterator BBI = CurBB;
+  MachineFunction::iterator BBI(CurBB);
   MachineFunction &MF = DAG.getMachineFunction();
   MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
   CurBB->getParent()->insert(++BBI, TmpBB);
@@ -1455,26 +1590,25 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
     // The requirement is that
     //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
     //     = TrueProb for original BB.
-    // Assuming the original weights are A and B, one choice is to set BB1's
-    // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
-    // assumes that
+    // Assuming the original probabilities are A and B, one choice is to set
+    // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
+    // A/(1+B) and 2B/(1+B). This choice assumes that
     //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
     // Another choice is to assume TrueProb for BB1 equals to TrueProb for
     // TmpBB, but the math is more complicated.
 
-    uint64_t NewTrueWeight = TWeight;
-    uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight;
-    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    auto NewTrueProb = TProb / 2;
+    auto NewFalseProb = TProb / 2 + FProb;
     // Emit the LHS condition.
     FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
-                         NewTrueWeight, NewFalseWeight);
+                         NewTrueProb, NewFalseProb);
 
-    NewTrueWeight = TWeight;
-    NewFalseWeight = 2 * (uint64_t)FWeight;
-    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
+    SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
     // Emit the RHS condition into TmpBB.
     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
-                         NewTrueWeight, NewFalseWeight);
+                         Probs[0], Probs[1]);
   } else {
     assert(Opc == Instruction::And && "Unknown merge op!");
     // Codegen X & Y as:
@@ -1491,24 +1625,23 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
     // The requirement is that
     //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
     //     = FalseProb for original BB.
-    // Assuming the original weights are A and B, one choice is to set BB1's
-    // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
-    // assumes that
-    //   FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
+    // Assuming the original probabilities are A and B, one choice is to set
+    // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
+    // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
+    // TrueProb for BB1 * FalseProb for TmpBB.
 
-    uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight;
-    uint64_t NewFalseWeight = FWeight;
-    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    auto NewTrueProb = TProb + FProb / 2;
+    auto NewFalseProb = FProb / 2;
     // Emit the LHS condition.
     FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
-                         NewTrueWeight, NewFalseWeight);
+                         NewTrueProb, NewFalseProb);
 
-    NewTrueWeight = 2 * (uint64_t)TWeight;
-    NewFalseWeight = FWeight;
-    ScaleWeights(NewTrueWeight, NewFalseWeight);
+    // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
+    SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
     // Emit the RHS condition into TmpBB.
     FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
-                         NewTrueWeight, NewFalseWeight);
+                         Probs[0], Probs[1]);
   }
 }
 
@@ -1585,12 +1718,14 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
   //     jle foo
   //
   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
-    if (!DAG.getTargetLoweringInfo().isJumpExpensive() &&
-        BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And ||
-                             BOp->getOpcode() == Instruction::Or)) {
+    Instruction::BinaryOps Opcode = BOp->getOpcode();
+    if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
+        !I.getMetadata(LLVMContext::MD_unpredictable) &&
+        (Opcode == Instruction::And || Opcode == Instruction::Or)) {
       FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
-                           BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB),
-                           getEdgeWeight(BrMBB, Succ1MBB));
+                           Opcode,
+                           getEdgeProbability(BrMBB, Succ0MBB),
+                           getEdgeProbability(BrMBB, Succ1MBB));
       // If the compares in later blocks need to use values not currently
       // exported from this block, export them now.  This block should always
       // be the first entry.
@@ -1669,11 +1804,12 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   }
 
   // Update successor info
-  addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
+  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
   // TrueBB and FalseBB are always different unless the incoming IR is
   // degenerate. This only happens when running llc on weird IR.
   if (CB.TrueBB != CB.FalseBB)
-    addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
+    addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
+  SwitchBB->normalizeSuccProbs();
 
   // If the lhs block is the next block, invert the condition so that we can
   // fall through to the lhs instead of the rhs block.
@@ -1797,10 +1933,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
                         GuardPtr, MachinePointerInfo(IRGuard, 0),
                         true, false, false, Align);
 
-  SDValue StackSlot = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(),
-                                  StackSlotPtr,
-                                  MachinePointerInfo::getFixedStack(FI),
-                                  true, false, false, Align);
+  SDValue StackSlot = DAG.getLoad(
+      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+      false, false, Align);
 
   // Perform the comparison via a subtract/getsetcc.
   EVT VT = Guard.getValueType();
@@ -1837,7 +1973,7 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue Chain =
       TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
-                      nullptr, 0, false, getCurSDLoc(), false, false).second;
+                      None, false, getCurSDLoc(), false, false).second;
   DAG.setRoot(Chain);
 }
 
@@ -1884,8 +2020,9 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  addSuccessorWithWeight(SwitchBB, B.Default);
-  addSuccessorWithWeight(SwitchBB, MBB);
+  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
+  SwitchBB->normalizeSuccProbs();
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1902,7 +2039,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 /// visitBitTestCase - this function produces one "bit test"
 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                                            MachineBasicBlock* NextMBB,
-                                           uint32_t BranchWeightToNext,
+                                           BranchProbability BranchProbToNext,
                                            unsigned Reg,
                                            BitTestCase &B,
                                            MachineBasicBlock *SwitchBB) {
@@ -1938,10 +2075,14 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
         AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
   }
 
-  // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
-  addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
-  // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
-  addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
+  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
+  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
+  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
+  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
+  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
+  // one as they are relative probabilities (and thus work more like weights),
+  // and hence we need to normalize them to let the sum of them become one.
+  SwitchBB->normalizeSuccProbs();
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
                               MVT::Other, getControlRoot(),
@@ -1958,9 +2099,10 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
   MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
 
-  // Retrieve successors.
+  // Retrieve successors. Look through artificial IR level blocks like
+  // catchswitch for successors.
   MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
-  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+  const BasicBlock *EHPadBB = I.getSuccessor(1);
 
   const Value *Callee(I.getCalledValue());
   const Function *Fn = dyn_cast<Function>(Callee);
@@ -1975,14 +2117,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
       break;
     case Intrinsic::experimental_patchpoint_void:
     case Intrinsic::experimental_patchpoint_i64:
-      visitPatchpoint(&I, LandingPad);
+      visitPatchpoint(&I, EHPadBB);
       break;
     case Intrinsic::experimental_gc_statepoint:
-      LowerStatepoint(ImmutableStatepoint(&I), LandingPad);
+      LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
       break;
     }
   } else
-    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+    LowerCallTo(&I, getValue(Callee), false, EHPadBB);
 
   // If the value of the invoke is used outside of its defining block, make it
   // available as a virtual register.
@@ -1992,9 +2134,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
     CopyToExportRegsIfNeeded(&I);
   }
 
-  // Update successor info
-  addSuccessorWithWeight(InvokeMBB, Return);
-  addSuccessorWithWeight(InvokeMBB, LandingPad);
+  SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests;
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  BranchProbability EHPadBBProb =
+      BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
+          : BranchProbability::getZero();
+  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
+
+  // Update successor info.
+  addSuccessorWithProb(InvokeMBB, Return);
+  for (auto &UnwindDest : UnwindDests) {
+    UnwindDest.first->setIsEHPad();
+    addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
+  }
+  InvokeMBB->normalizeSuccProbs();
 
   // Drop into normal successor.
   DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
@@ -2007,7 +2160,7 @@ void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
 }
 
 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
-  assert(FuncInfo.MBB->isLandingPad() &&
+  assert(FuncInfo.MBB->isEHPad() &&
          "Call to landingpad not in landing pad!");
 
   MachineBasicBlock *MBB = FuncInfo.MBB;
@@ -2017,8 +2170,16 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother to create these DAG nodes.
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  if (TLI.getExceptionPointerRegister() == 0 &&
-      TLI.getExceptionSelectorRegister() == 0)
+  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
+  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+    return;
+
+  // If landingpad's return type is token type, we don't create DAG nodes
+  // for its exception pointer and selector value. The extraction of exception
+  // pointer or selector value from token type landingpads is not currently
+  // supported.
+  if (LP.getType()->isTokenTy())
     return;
 
   SmallVector<EVT, 2> ValueVTs;
@@ -2074,8 +2235,7 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
       // If this case has the same successor and is a neighbour, merge it into
       // the previous cluster.
       Clusters[DstIndex - 1].High = CaseVal;
-      Clusters[DstIndex - 1].Weight += CC.Weight;
-      assert(Clusters[DstIndex - 1].Weight >= CC.Weight && "Weight overflow!");
+      Clusters[DstIndex - 1].Prob += CC.Prob;
     } else {
       std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
                    sizeof(Clusters[SrcIndex]));
@@ -2109,8 +2269,9 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
         continue;
 
     MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
-    addSuccessorWithWeight(IndirectBrMBB, Succ);
+    addSuccessorWithProb(IndirectBrMBB, Succ);
   }
+  IndirectBrMBB->normalizeSuccProbs();
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
                           MVT::Other, getControlRoot(),
@@ -2119,7 +2280,8 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
 
 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
   if (DAG.getTarget().Options.TrapUnreachable)
-    DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+    DAG.setRoot(
+        DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
 }
 
 void SelectionDAGBuilder::visitFSub(const User &I) {
@@ -2260,6 +2422,10 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
   SDValue Op1 = getValue(I.getOperand(0));
   SDValue Op2 = getValue(I.getOperand(1));
   ISD::CondCode Condition = getFCmpCondCode(predicate);
+  
+  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
+  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
+  // further optimization, but currently FMF is only applicable to binary nodes.
   if (TM.Options.NoNaNsFPMath)
     Condition = getFCmpCodeWithoutNaN(Condition);
   EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
@@ -2284,27 +2450,74 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
 
   // Min/max matching is only viable if all output VTs are the same.
   if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
-    Value *LHS, *RHS;
-    SelectPatternFlavor SPF = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
-    ISD::NodeType Opc = ISD::DELETED_NODE;
-    switch (SPF) {
-    case SPF_UMAX: Opc = ISD::UMAX; break;
-    case SPF_UMIN: Opc = ISD::UMIN; break;
-    case SPF_SMAX: Opc = ISD::SMAX; break;
-    case SPF_SMIN: Opc = ISD::SMIN; break;
-    default: break;
-    }
-
     EVT VT = ValueVTs[0];
     LLVMContext &Ctx = *DAG.getContext();
     auto &TLI = DAG.getTargetLoweringInfo();
-    while (TLI.getTypeAction(Ctx, VT) == TargetLoweringBase::TypeSplitVector)
+
+    // We care about the legality of the operation after it has been type
+    // legalized.
+    while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
+           VT != TLI.getTypeToTransformTo(Ctx, VT))
       VT = TLI.getTypeToTransformTo(Ctx, VT);
 
-    if (Opc != ISD::DELETED_NODE && TLI.isOperationLegalOrCustom(Opc, VT) &&
-        // If the underlying comparison instruction is used by any other instruction,
-        // the consumed instructions won't be destroyed, so it is not profitable
-        // to convert to a min/max.
+    // If the vselect is legal, assume we want to leave this as a vector setcc +
+    // vselect. Otherwise, if this is going to be scalarized, we want to see if
+    // min/max is legal on the scalar type.
+    bool UseScalarMinMax = VT.isVector() &&
+      !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+
+    Value *LHS, *RHS;
+    auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
+    ISD::NodeType Opc = ISD::DELETED_NODE;
+    switch (SPR.Flavor) {
+    case SPF_UMAX:    Opc = ISD::UMAX; break;
+    case SPF_UMIN:    Opc = ISD::UMIN; break;
+    case SPF_SMAX:    Opc = ISD::SMAX; break;
+    case SPF_SMIN:    Opc = ISD::SMIN; break;
+    case SPF_FMINNUM:
+      switch (SPR.NaNBehavior) {
+      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMINNAN; break;
+      case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
+      case SPNB_RETURNS_ANY: {
+        if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+          Opc = ISD::FMINNUM;
+        else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
+          Opc = ISD::FMINNAN;
+        else if (UseScalarMinMax)
+          Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
+            ISD::FMINNUM : ISD::FMINNAN;
+        break;
+      }
+      }
+      break;
+    case SPF_FMAXNUM:
+      switch (SPR.NaNBehavior) {
+      case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
+      case SPNB_RETURNS_NAN:   Opc = ISD::FMAXNAN; break;
+      case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
+      case SPNB_RETURNS_ANY:
+
+        if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+          Opc = ISD::FMAXNUM;
+        else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
+          Opc = ISD::FMAXNAN;
+        else if (UseScalarMinMax)
+          Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
+            ISD::FMAXNUM : ISD::FMAXNAN;
+        break;
+      }
+      break;
+    default: break;
+    }
+
+    if (Opc != ISD::DELETED_NODE &&
+        (TLI.isOperationLegalOrCustom(Opc, VT) ||
+         (UseScalarMinMax &&
+          TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
+        // If the underlying comparison instruction is used by any other
+        // instruction, the consumed instructions won't be destroyed, so it is
+        // not profitable to convert to a min/max.
         cast<SelectInst>(&I)->getCondition()->hasOneUse()) {
       OpCode = Opc;
       LHSVal = getValue(LHS);
@@ -2920,7 +3133,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   // throughout the function's lifetime.
 
   bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
-    isDereferenceablePointer(SV, *DAG.getTarget().getDataLayout());
+                     isDereferenceablePointer(SV, DAG.getDataLayout());
   unsigned Alignment = I.getAlignment();
 
   AAMDNodes AAInfo;
@@ -2940,8 +3153,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
   if (isVolatile || NumValues > MaxParallelChains)
     // Serialize volatile loads with other side effects.
     Root = getRoot();
-  else if (AA->pointsToConstantMemory(
-               MemoryLocation(SV, AA->getTypeStoreSize(Ty), AAInfo))) {
+  else if (AA->pointsToConstantMemory(MemoryLocation(
+               SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -3056,7 +3269,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
-  // llvm.masked.store.*(Src0, Ptr, alignemt, Mask)
+  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
   Value  *PtrOperand = I.getArgOperand(1);
   SDValue Ptr = getValue(PtrOperand);
   SDValue Src0 = getValue(I.getArgOperand(0));
@@ -3080,63 +3293,70 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
   setValue(&I, StoreNode);
 }
 
-// Gather/scatter receive a vector of pointers.
-// This vector of pointers may be represented as a base pointer + vector of 
-// indices, it depends on GEP and instruction preceeding GEP
-// that calculates indices
-static bool getUniformBase(Value *& Ptr, SDValue& Base, SDValue& Index,
+// Get a uniform base for the Gather/Scatter intrinsic.
+// The first argument of the Gather/Scatter intrinsic is a vector of pointers.
+// We try to represent it as a base pointer + vector of indices.
+// Usually, the vector of pointers comes from a 'getelementptr' instruction.
+// The first operand of the GEP may be a single pointer or a vector of pointers
+// Example:
+//   %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
+//  or
+//   %gep.ptr = getelementptr i32, i32* %ptr,        <8 x i32> %ind
+// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
+//
+// When the first GEP operand is a single pointer - it is the uniform base we
+// are looking for. If first operand of the GEP is a splat vector - we
+// extract the spalt value and use it as a uniform base.
+// In all other cases the function returns 'false'.
+//
+static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
                            SelectionDAGBuilder* SDB) {
 
-  assert (Ptr->getType()->isVectorTy() && "Uexpected pointer type");
-  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!Gep || Gep->getNumOperands() > 2)
-    return false;
-  ShuffleVectorInst *ShuffleInst = 
-    dyn_cast<ShuffleVectorInst>(Gep->getPointerOperand());
-  if (!ShuffleInst || !ShuffleInst->getMask()->isNullValue() ||
-      cast<Instruction>(ShuffleInst->getOperand(0))->getOpcode() !=
-      Instruction::InsertElement)
-    return false;
-
-  Ptr = cast<InsertElementInst>(ShuffleInst->getOperand(0))->getOperand(1);
-
   SelectionDAG& DAG = SDB->DAG;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  // Check is the Ptr is inside current basic block
-  // If not, look for the shuffle instruction
-  if (SDB->findValue(Ptr))
-    Base = SDB->getValue(Ptr);
-  else if (SDB->findValue(ShuffleInst)) {
-    SDValue ShuffleNode = SDB->getValue(ShuffleInst);
-    SDLoc sdl = ShuffleNode;
-    Base = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, sdl,
-        ShuffleNode.getValueType().getScalarType(), ShuffleNode,
-        DAG.getConstant(0, sdl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-    SDB->setValue(Ptr, Base);
-  }
-  else
+  LLVMContext &Context = *DAG.getContext();
+
+  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
+  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || GEP->getNumOperands() > 2)
     return false;
 
-  Value *IndexVal = Gep->getOperand(1);
-  if (SDB->findValue(IndexVal)) {
-    Index = SDB->getValue(IndexVal);
+  const Value *GEPPtr = GEP->getPointerOperand();
+  if (!GEPPtr->getType()->isVectorTy())
+    Ptr = GEPPtr;
+  else if (!(Ptr = getSplatValue(GEPPtr)))
+    return false;
 
-    if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+  Value *IndexVal = GEP->getOperand(1);
+
+  // The operands of the GEP may be defined in another basic block.
+  // In this case we'll not find nodes for the operands.
+  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
+    return false;
+
+  Base = SDB->getValue(Ptr);
+  Index = SDB->getValue(IndexVal);
+
+  // Suppress sign extension.
+  if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
+    if (SDB->findValue(Sext->getOperand(0))) {
       IndexVal = Sext->getOperand(0);
-      if (SDB->findValue(IndexVal))
-        Index = SDB->getValue(IndexVal);
+      Index = SDB->getValue(IndexVal);
     }
-    return true;
   }
-  return false;
+  if (!Index.getValueType().isVector()) {
+    unsigned GEPWidth = GEP->getType()->getVectorNumElements();
+    EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
+    SmallVector<SDValue, 16> Ops(GEPWidth, Index);
+    Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+  }
+  return true;
 }
 
 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
   // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
-  Value  *Ptr = I.getArgOperand(1);
+  const Value *Ptr = I.getArgOperand(1);
   SDValue Src0 = getValue(I.getArgOperand(0));
   SDValue Mask = getValue(I.getArgOperand(3));
   EVT VT = Src0.getValueType();
@@ -3150,10 +3370,10 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
 
   SDValue Base;
   SDValue Index;
-  Value *BasePtr = Ptr;
+  const Value *BasePtr = Ptr;
   bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
 
-  Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
+  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
   MachineMemOperand *MMO = DAG.getMachineFunction().
     getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
                          MachineMemOperand::MOStore,  VT.getStoreSize(),
@@ -3190,7 +3410,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
 
   SDValue InChain = DAG.getRoot();
   if (AA->pointsToConstantMemory(MemoryLocation(
-          PtrOperand, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+          PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+          AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     InChain = DAG.getEntryNode();
   }
@@ -3212,7 +3433,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   SDLoc sdl = getCurSDLoc();
 
   // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
-  Value  *Ptr = I.getArgOperand(0);
+  const Value *Ptr = I.getArgOperand(0);
   SDValue Src0 = getValue(I.getArgOperand(3));
   SDValue Mask = getValue(I.getArgOperand(2));
 
@@ -3229,12 +3450,13 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   SDValue Root = DAG.getRoot();
   SDValue Base;
   SDValue Index;
-  Value *BasePtr = Ptr;
+  const Value *BasePtr = Ptr;
   bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
   bool ConstantMemory = false;
   if (UniformBase &&
-      AA->pointsToConstantMemory(
-          MemoryLocation(BasePtr, AA->getTypeStoreSize(I.getType()), AAInfo))) {
+      AA->pointsToConstantMemory(MemoryLocation(
+          BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
+          AAInfo))) {
     // Do not serialize (non-volatile) loads of constant memory with anything.
     Root = DAG.getEntryNode();
     ConstantMemory = true;
@@ -3511,6 +3733,8 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) {
 
 static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl,
                                        SelectionDAG &DAG) {
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
   //   IntegerPartOfX = ((int32_t)(t0);
   SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
 
@@ -3609,6 +3833,8 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
     //
     //   #define LOG2OFe 1.4426950f
     //   t0 = Op * LOG2OFe
+
+    // TODO: What fast-math-flags should be set here?
     SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
                              getF32Constant(DAG, 0x3fb8aa3b, dl));
     return getLimitedPrecisionExp2(t0, dl, DAG);
@@ -3622,6 +3848,9 @@ static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                          const TargetLowering &TLI) {
+ 
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
   if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3718,6 +3947,9 @@ static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                           const TargetLowering &TLI) {
+  
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
   if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3813,6 +4045,9 @@ static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG,
 /// limited-precision mode.
 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG,
                            const TargetLowering &TLI) {
+
+  // TODO: What fast-math-flags should be set on the floating-point nodes?
+
   if (Op.getValueType() == MVT::f32 &&
       LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
     SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
@@ -3922,6 +4157,7 @@ static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS,
     }
   }
 
+  // TODO: What fast-math-flags should be set on the FMUL node?
   if (IsExp10) {
     // Put the exponent in the right bit position for later addition to the
     // final result:
@@ -3955,9 +4191,9 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
       return DAG.getConstantFP(1.0, DL, LHS.getValueType());
 
     const Function *F = DAG.getMachineFunction().getFunction();
-    if (!F->hasFnAttribute(Attribute::OptimizeForSize) ||
-        // If optimizing for size, don't insert too many multiplies.  This
-        // inserts up to 5 multiplies.
+    if (!F->optForSize() ||
+        // If optimizing for size, don't insert too many multiplies.
+        // This inserts up to 5 multiplies.
         countPopulation(Val) + Log2_32(Val) < 7) {
       // We use the simple binary decomposition method to generate the multiply
       // sequence.  There are more optimal ways to do this (for example,
@@ -3965,6 +4201,8 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
       // the benefit of being both really simple and much better than a libcall.
       SDValue Res;  // Logically starts equal to 1.0
       SDValue CurSquare = LHS;
+      // TODO: Intrinsics should have fast-math-flags that propagate to these
+      // nodes.
       while (Val) {
         if (Val & 1) {
           if (Res.getNode())
@@ -3990,22 +4228,20 @@ static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS,
   return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
 }
 
-// getTruncatedArgReg - Find underlying register used for an truncated
-// argument.
-static unsigned getTruncatedArgReg(const SDValue &N) {
-  if (N.getOpcode() != ISD::TRUNCATE)
+// getUnderlyingArgReg - Find underlying register used for a truncated or
+// bitcasted argument.
+static unsigned getUnderlyingArgReg(const SDValue &N) {
+  switch (N.getOpcode()) {
+  case ISD::CopyFromReg:
+    return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+  case ISD::BITCAST:
+  case ISD::AssertZext:
+  case ISD::AssertSext:
+  case ISD::TRUNCATE:
+    return getUnderlyingArgReg(N.getOperand(0));
+  default:
     return 0;
-
-  const SDValue &Ext = N.getOperand(0);
-  if (Ext.getOpcode() == ISD::AssertZext ||
-      Ext.getOpcode() == ISD::AssertSext) {
-    const SDValue &CFR = Ext.getOperand(0);
-    if (CFR.getOpcode() == ISD::CopyFromReg)
-      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
-    if (CFR.getOpcode() == ISD::TRUNCATE)
-      return getTruncatedArgReg(CFR);
   }
-  return 0;
 }
 
 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
@@ -4033,11 +4269,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
     Op = MachineOperand::CreateFI(FI);
 
   if (!Op && N.getNode()) {
-    unsigned Reg;
-    if (N.getOpcode() == ISD::CopyFromReg)
-      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
-    else
-      Reg = getTruncatedArgReg(N);
+    unsigned Reg = getUnderlyingArgReg(N);
     if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
       MachineRegisterInfo &RegInfo = MF.getRegInfo();
       unsigned PR = RegInfo.getLiveInPhysReg(Reg);
@@ -4145,14 +4377,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::longjmp:
     return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
   case Intrinsic::memcpy: {
-    // FIXME: this definition of "user defined address space" is x86-specific
-    // Assert for address < 256 since we support only user defined address
-    // spaces.
-    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
-           < 256 &&
-           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
-           < 256 &&
-           "Unknown address space");
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4169,12 +4393,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
   case Intrinsic::memset: {
-    // FIXME: this definition of "user defined address space" is x86-specific
-    // Assert for address < 256 since we support only user defined address
-    // spaces.
-    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
-           < 256 &&
-           "Unknown address space");
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4189,14 +4407,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return nullptr;
   }
   case Intrinsic::memmove: {
-    // FIXME: this definition of "user defined address space" is x86-specific
-    // Assert for address < 256 since we support only user defined address
-    // spaces.
-    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
-           < 256 &&
-           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
-           < 256 &&
-           "Unknown address space");
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));
@@ -4238,33 +4448,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
         Address = BCI->getOperand(0);
       // Parameters are handled specially.
-      bool isParameter = Variable->getTag() == dwarf::DW_TAG_arg_variable ||
-                         isa<Argument>(Address);
-
-      const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
-
-      if (isParameter && !AI) {
-        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
-        if (FINode)
-          // Byval parameter.  We have a frame index at this point.
-          SDV = DAG.getFrameIndexDbgValue(
-              Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder);
-        else {
-          // Address is an argument, so try to emit its dbg value using
-          // virtual register info from the FuncInfo.ValueMap.
-          EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
-                                   N);
-          return nullptr;
-        }
-      } else if (AI)
+      bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+      auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+      if (isParameter && FINode) {
+        // Byval parameter. We have a frame index at this point.
+        SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
+                                        FINode->getIndex(), 0, dl, SDNodeOrder);
+      } else if (isa<Argument>(Address)) {
+        // Address is an argument, so try to emit its dbg value using
+        // virtual register info from the FuncInfo.ValueMap.
+        EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false,
+                                 N);
+        return nullptr;
+      } else {
         SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
                               true, 0, dl, SDNodeOrder);
-      else {
-        // Can't do anything with other non-AI cases yet.
-        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
-        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
-        DEBUG(Address->dump());
-        return nullptr;
       }
       DAG.AddDbgValue(SDV, N.getNode(), isParameter);
     } else {
@@ -4315,12 +4513,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
         // Check unused arguments map.
         N = UnusedArgNodeMap[V];
       if (N.getNode()) {
-        // A dbg.value for an alloca is always indirect.
-        bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
         if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
-                                      IsIndirect, N)) {
+                                      false, N)) {
           SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
-                                IsIndirect, Offset, dl, SDNodeOrder);
+                                false, Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
       } else if (!V->use_empty() ) {
@@ -4421,6 +4617,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getRoot(), getValue(I.getArgOperand(0))));
     return nullptr;
   }
+  case Intrinsic::eh_sjlj_setup_dispatch: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
+                            getRoot()));
+    return nullptr;
+  }
 
   case Intrinsic::masked_gather:
     visitMaskedGather(I);
@@ -4614,6 +4815,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                                getValue(I.getArgOperand(1)),
                                getValue(I.getArgOperand(2))));
     } else {
+      // TODO: Intrinsic calls should have fast-math-flags.
       SDValue Mul = DAG.getNode(ISD::FMUL, sdl,
                                 getValue(I.getArgOperand(0)).getValueType(),
                                 getValue(I.getArgOperand(0)),
@@ -4652,6 +4854,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(Res.getValue(1));
     return nullptr;
   }
+  case Intrinsic::bitreverse:
+    setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return nullptr;
   case Intrinsic::bswap:
     setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
                              getValue(I.getArgOperand(0)).getValueType(),
@@ -4693,6 +4900,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
     return nullptr;
   }
+  case Intrinsic::get_dynamic_area_offset: {
+    SDValue Op = getRoot();
+    EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+    EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
+    // Result type for @llvm.get.dynamic.area.offset should match PtrTy for
+    // target.
+    if (PtrTy != ResTy)
+      report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
+                         " intrinsic!");
+    Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
+                      Op);
+    DAG.setRoot(Op);
+    setValue(&I, Res);
+    return nullptr;
+  }
   case Intrinsic::stackprotector: {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
@@ -4743,8 +4965,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
 
     // Store the stack protector onto the stack.
-    Res = DAG.getStore(Chain, sdl, Src, FIN,
-                       MachinePointerInfo::getFixedStack(FI),
+    Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(
+                                                 DAG.getMachineFunction(), FI),
                        true, false, 0);
     setValue(&I, Res);
     DAG.setRoot(Res);
@@ -4946,9 +5168,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::clear_cache:
     return TLI.getClearCacheBuiltinName();
-  case Intrinsic::eh_actions:
-    setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
-    return nullptr;
   case Intrinsic::donothing:
     // ignore
     return nullptr;
@@ -4965,9 +5184,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     visitStatepoint(I);
     return nullptr;
   }
-  case Intrinsic::experimental_gc_result_int:
-  case Intrinsic::experimental_gc_result_float:
-  case Intrinsic::experimental_gc_result_ptr:
   case Intrinsic::experimental_gc_result: {
     visitGCResult(I);
     return nullptr;
@@ -4978,7 +5194,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::instrprof_increment:
     llvm_unreachable("instrprof failed to lower an increment");
-
+  case Intrinsic::instrprof_value_profile:
+    llvm_unreachable("instrprof failed to lower a value profiling call");
   case Intrinsic::localescape: {
     MachineFunction &MF = DAG.getMachineFunction();
     const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -5032,19 +5249,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
     return nullptr;
   }
-  case Intrinsic::eh_begincatch:
-  case Intrinsic::eh_endcatch:
-    llvm_unreachable("begin/end catch intrinsics not lowered in codegen");
+
+  case Intrinsic::eh_exceptionpointer:
   case Intrinsic::eh_exceptioncode: {
-    unsigned Reg = TLI.getExceptionPointerRegister();
-    assert(Reg && "cannot get exception code on this platform");
+    // Get the exception pointer vreg, copy from it, and resize it to fit.
+    const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0));
     MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout());
     const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT);
-    assert(FuncInfo.MBB->isLandingPad() && "eh.exceptioncode in non-lpad");
-    unsigned VReg = FuncInfo.MBB->addLiveIn(Reg, PtrRC);
+    unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC);
     SDValue N =
         DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT);
-    N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
+    if (Intrinsic == Intrinsic::eh_exceptioncode)
+      N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
     setValue(&I, N);
     return nullptr;
   }
@@ -5053,11 +5269,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
 std::pair<SDValue, SDValue>
 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
-                                    MachineBasicBlock *LandingPad) {
+                                    const BasicBlock *EHPadBB) {
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
   MCSymbol *BeginLabel = nullptr;
 
-  if (LandingPad) {
+  if (EHPadBB) {
     // Insert a label before the invoke call to mark the try range.  This can be
     // used to detect deletion of the invoke via the MachineModuleInfo.
     BeginLabel = MMI.getContext().createTempSymbol();
@@ -5067,7 +5283,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     unsigned CallSiteIndex = MMI.getCurrentCallSite();
     if (CallSiteIndex) {
       MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
-      LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
+      LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
 
       // Now that the call site is handled, stop tracking it.
       MMI.setCurrentCallSite(0);
@@ -5100,14 +5316,21 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     DAG.setRoot(Result.second);
   }
 
-  if (LandingPad) {
+  if (EHPadBB) {
     // Insert a label at the end of the invoke call to mark the try range.  This
     // can be used to detect deletion of the invoke via the MachineModuleInfo.
     MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
     DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
-    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+    if (MMI.hasEHFunclets()) {
+      assert(CLI.CS);
+      WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
+      EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+                                BeginLabel, EndLabel);
+    } else {
+      MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+    }
   }
 
   return Result;
@@ -5115,7 +5338,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
 
 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
                                       bool isTailCall,
-                                      MachineBasicBlock *LandingPad) {
+                                      const BasicBlock *EHPadBB) {
   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   Type *RetTy = FTy->getReturnType();
@@ -5154,7 +5377,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
     .setCallee(RetTy, FTy, Callee, std::move(Args), CS)
     .setTailCall(isTailCall);
-  std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad);
+  std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
 
   if (Result.first.getNode())
     setValue(CS.getInstruction(), Result.first);
@@ -5978,7 +6201,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-	const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+        const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
         std::pair<unsigned, const TargetRegisterClass *> MatchRC =
             TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
                                              OpInfo.ConstraintVT);
@@ -6037,10 +6260,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
         SDValue StackSlot =
             DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
-        Chain = DAG.getStore(Chain, getCurSDLoc(),
-                             OpInfo.CallOperand, StackSlot,
-                             MachinePointerInfo::getFixedStack(SSFI),
-                             false, false, 0);
+        Chain = DAG.getStore(
+            Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+            false, false, 0);
         OpInfo.CallOperand = StackSlot;
       }
 
@@ -6460,12 +6683,9 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
 /// This is a helper for lowering intrinsics that follow a target calling
 /// convention or require stack pointer adjustment. Only a subset of the
 /// intrinsic's operands need to participate in the calling convention.
-std::pair<SDValue, SDValue>
-SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
-                                       unsigned NumArgs, SDValue Callee,
-                                       Type *ReturnTy,
-                                       MachineBasicBlock *LandingPad,
-                                       bool IsPatchPoint) {
+std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands(
+    ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee,
+    Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) {
   TargetLowering::ArgListTy Args;
   Args.reserve(NumArgs);
 
@@ -6489,7 +6709,7 @@ SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx,
     .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs)
     .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint);
 
-  return lowerInvokable(CLI, LandingPad);
+  return lowerInvokable(CLI, EHPadBB);
 }
 
 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap
@@ -6593,7 +6813,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
 
 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
-                                          MachineBasicBlock *LandingPad) {
+                                          const BasicBlock *EHPadBB) {
   // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>,
   //                                                 i32 <numBytes>,
   //                                                 i8* <target>,
@@ -6630,9 +6850,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs;
   Type *ReturnTy =
     IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
-  std::pair<SDValue, SDValue> Result =
-    lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
-                      LandingPad, true);
+  std::pair<SDValue, SDValue> Result = lowerCallOperands(
+      CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true);
 
   SDNode *CallEnd = Result.second.getNode();
   if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg))
@@ -6926,8 +7145,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                i, j*Parts[j].getValueType().getStoreSize());
         if (NumParts > 1 && j == 0)
           MyFlags.Flags.setSplit();
-        else if (j != 0)
+        else if (j != 0) {
           MyFlags.Flags.setOrigAlign(1);
+          if (j == NumParts - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
 
         CLI.Outs.push_back(MyFlags);
         CLI.OutVals.push_back(Parts[j]);
@@ -6986,8 +7208,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
                                                         PtrVT));
       SDValue L = CLI.DAG.getLoad(
           RetTys[i], CLI.DL, CLI.Chain, Add,
-          MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false,
-          false, false, 1);
+          MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(),
+                                            DemoteStackIdx, Offsets[i]),
+          false, false, false, 1);
       ReturnValues[i] = L;
       Chains[i] = L.getValue(1);
     }
@@ -7069,9 +7292,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
   if (FastISel)
     return A->use_empty();
 
-  const BasicBlock *Entry = A->getParent()->begin();
+  const BasicBlock &Entry = A->getParent()->front();
   for (const User *U : A->users())
-    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+    if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U))
       return false;  // Use not in entry block.
 
   return true;
@@ -7138,6 +7361,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // in the various CC lowering callbacks.
         Flags.setByVal();
       }
+      if (F.getCallingConv() == CallingConv::X86_INTR) {
+        // IA Interrupt passes frame (1st parameter) by value in the stack.
+        if (Idx == 1)
+          Flags.setByVal();
+      }
       if (Flags.isByVal() || Flags.isInAlloca()) {
         PointerType *Ty = cast<PointerType>(I->getType());
         Type *ElementTy = Ty->getElementType();
@@ -7165,8 +7393,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         if (NumRegs > 1 && i == 0)
           MyFlags.Flags.setSplit();
         // if it isn't first piece, alignment must be 1
-        else if (i > 0)
+        else if (i > 0) {
           MyFlags.Flags.setOrigAlign(1);
+          if (i == NumRegs - 1)
+            MyFlags.Flags.setSplitEnd();
+        }
         Ins.push_back(MyFlags);
       }
       if (NeedsRegBlock && Value == NumValues - 1)
@@ -7235,12 +7466,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // If this argument is unused then remember its value. It is used to generate
     // debugging information.
     if (I->use_empty() && NumValues) {
-      SDB->setUnusedArgValue(I, InVals[i]);
+      SDB->setUnusedArgValue(&*I, InVals[i]);
 
       // Also remember any frame index for use in FastISel.
       if (FrameIndexSDNode *FI =
           dyn_cast<FrameIndexSDNode>(InVals[i].getNode()))
-        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+        FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
     }
 
     for (unsigned Val = 0; Val != NumValues; ++Val) {
@@ -7270,18 +7501,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
     // Note down frame index.
     if (FrameIndexSDNode *FI =
         dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
-      FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+      FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
 
     SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
                                      SDB->getCurSDLoc());
 
-    SDB->setValue(I, Res);
+    SDB->setValue(&*I, Res);
     if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
       if (LoadSDNode *LNode =
           dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
         if (FrameIndexSDNode *FI =
             dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
-        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
+        FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
     }
 
     // If this argument is live outside of the entry block, insert a copy from
@@ -7293,13 +7524,13 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       // uses with vregs.
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-        FuncInfo->ValueMap[I] = Reg;
+        FuncInfo->ValueMap[&*I] = Reg;
         continue;
       }
     }
-    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
-      FuncInfo->InitializeRegForValue(I);
-      SDB->CopyToExportRegsIfNeeded(I);
+    if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) {
+      FuncInfo->InitializeRegForValue(&*I);
+      SDB->CopyToExportRegsIfNeeded(&*I);
     }
   }
 
@@ -7401,21 +7632,21 @@ AddSuccessorMBB(const BasicBlock *BB,
   // If SuccBB has not been created yet, create it.
   if (!SuccMBB) {
     MachineFunction *MF = ParentMBB->getParent();
-    MachineFunction::iterator BBI = ParentMBB;
+    MachineFunction::iterator BBI(ParentMBB);
     SuccMBB = MF->CreateMachineBasicBlock(BB);
     MF->insert(++BBI, SuccMBB);
   }
   // Add it as a successor of ParentMBB.
   ParentMBB->addSuccessor(
-      SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely));
+      SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely));
   return SuccMBB;
 }
 
 MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) {
-  MachineFunction::iterator I = MBB;
+  MachineFunction::iterator I(MBB);
   if (++I == FuncInfo.MF->end())
     return nullptr;
-  return I;
+  return &*I;
 }
 
 /// During lowering new call nodes can be created (such as memset, etc.).
@@ -7469,14 +7700,18 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
                                          CaseCluster &JTCluster) {
   assert(First <= Last);
 
-  uint32_t Weight = 0;
+  auto Prob = BranchProbability::getZero();
   unsigned NumCmps = 0;
   std::vector<MachineBasicBlock*> Table;
-  DenseMap<MachineBasicBlock*, uint32_t> JTWeights;
+  DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+  // Initialize probabilities in JTProbs.
+  for (unsigned I = First; I <= Last; ++I)
+    JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
   for (unsigned I = First; I <= Last; ++I) {
     assert(Clusters[I].Kind == CC_Range);
-    Weight += Clusters[I].Weight;
-    assert(Weight >= Clusters[I].Weight && "Weight overflow!");
+    Prob += Clusters[I].Prob;
     APInt Low = Clusters[I].Low->getValue();
     APInt High = Clusters[I].High->getValue();
     NumCmps += (Low == High) ? 1 : 2;
@@ -7491,10 +7726,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
     uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
     for (uint64_t J = 0; J < ClusterSize; ++J)
       Table.push_back(Clusters[I].MBB);
-    JTWeights[Clusters[I].MBB] += Clusters[I].Weight;
+    JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
   }
 
-  unsigned NumDests = JTWeights.size();
+  unsigned NumDests = JTProbs.size();
   if (isSuitableForBitTests(NumDests, NumCmps,
                             Clusters[First].Low->getValue(),
                             Clusters[Last].High->getValue())) {
@@ -7513,9 +7748,10 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
   for (MachineBasicBlock *Succ : Table) {
     if (Done.count(Succ))
       continue;
-    addSuccessorWithWeight(JumpTableMBB, Succ, JTWeights[Succ]);
+    addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
     Done.insert(Succ);
   }
+  JumpTableMBB->normalizeSuccProbs();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
@@ -7529,7 +7765,7 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
   JTCases.emplace_back(std::move(JTH), std::move(JT));
 
   JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
-                                     JTCases.size() - 1, Weight);
+                                     JTCases.size() - 1, Prob);
   return true;
 }
 
@@ -7707,19 +7943,29 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
                            .getSizeInBits();
   assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!");
 
-  if (Low.isNonNegative() && High.slt(BitWidth)) {
-    // Optimize the case where all the case values fit in a
-    // word without having to subtract minValue. In this case,
-    // we can optimize away the subtraction.
+  // Check if the clusters cover a contiguous range such that no value in the
+  // range will jump to the default statement.
+  bool ContiguousRange = true;
+  for (int64_t I = First + 1; I <= Last; ++I) {
+    if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+      ContiguousRange = false;
+      break;
+    }
+  }
+
+  if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+    // Optimize the case where all the case values fit in a word without having
+    // to subtract minValue. In this case, we can optimize away the subtraction.
     LowBound = APInt::getNullValue(Low.getBitWidth());
     CmpRange = High;
+    ContiguousRange = false;
   } else {
     LowBound = Low;
     CmpRange = High - Low;
   }
 
   CaseBitsVector CBV;
-  uint32_t TotalWeight = 0;
+  auto TotalProb = BranchProbability::getZero();
   for (unsigned i = First; i <= Last; ++i) {
     // Find the CaseBits for this destination.
     unsigned j;
@@ -7727,39 +7973,40 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
       if (CBV[j].BB == Clusters[i].MBB)
         break;
     if (j == CBV.size())
-      CBV.push_back(CaseBits(0, Clusters[i].MBB, 0, 0));
+      CBV.push_back(
+          CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
     CaseBits *CB = &CBV[j];
 
-    // Update Mask, Bits and ExtraWeight.
+    // Update Mask, Bits and ExtraProb.
     uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
     uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
     assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
     CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
     CB->Bits += Hi - Lo + 1;
-    CB->ExtraWeight += Clusters[i].Weight;
-    TotalWeight += Clusters[i].Weight;
-    assert(TotalWeight >= Clusters[i].Weight && "Weight overflow!");
+    CB->ExtraProb += Clusters[i].Prob;
+    TotalProb += Clusters[i].Prob;
   }
 
   BitTestInfo BTI;
   std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
-    // Sort by weight first, number of bits second.
-    if (a.ExtraWeight != b.ExtraWeight)
-      return a.ExtraWeight > b.ExtraWeight;
+    // Sort by probability first, number of bits second.
+    if (a.ExtraProb != b.ExtraProb)
+      return a.ExtraProb > b.ExtraProb;
     return a.Bits > b.Bits;
   });
 
   for (auto &CB : CBV) {
     MachineBasicBlock *BitTestBB =
         FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
-    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraWeight));
+    BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
   }
   BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
-                            SI->getCondition(), -1U, MVT::Other, false, nullptr,
-                            nullptr, std::move(BTI));
+                            SI->getCondition(), -1U, MVT::Other, false,
+                            ContiguousRange, nullptr, nullptr, std::move(BTI),
+                            TotalProb);
 
   BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
-                                    BitTestCases.size() - 1, TotalWeight);
+                                    BitTestCases.size() - 1, TotalProb);
   return true;
 }
 
@@ -7868,9 +8115,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
                                         MachineBasicBlock *DefaultMBB) {
   MachineFunction *CurMF = FuncInfo.MF;
   MachineBasicBlock *NextMBB = nullptr;
-  MachineFunction::iterator BBI = W.MBB;
+  MachineFunction::iterator BBI(W.MBB);
   if (++BBI != FuncInfo.MF->end())
-    NextMBB = BBI;
+    NextMBB = &*BBI;
 
   unsigned Size = W.LastCluster - W.FirstCluster + 1;
 
@@ -7906,13 +8153,16 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
             ISD::SETEQ);
 
         // Update successor info.
-        // Both Small and Big will jump to Small.BB, so we sum up the weights.
-        addSuccessorWithWeight(SwitchMBB, Small.MBB, Small.Weight + Big.Weight);
-        addSuccessorWithWeight(
-            SwitchMBB, DefaultMBB,
-            // The default destination is the first successor in IR.
-            BPI ? BPI->getEdgeWeight(SwitchMBB->getBasicBlock(), (unsigned)0)
-                : 0);
+        // Both Small and Big will jump to Small.BB, so we sum up the
+        // probabilities.
+        addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob);
+        if (BPI)
+          addSuccessorWithProb(
+              SwitchMBB, DefaultMBB,
+              // The default destination is the first successor in IR.
+              BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0));
+        else
+          addSuccessorWithProb(SwitchMBB, DefaultMBB);
 
         // Insert the true branch.
         SDValue BrCond =
@@ -7929,17 +8179,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
   }
 
   if (TM.getOptLevel() != CodeGenOpt::None) {
-    // Order cases by weight so the most likely case will be checked first.
+    // Order cases by probability so the most likely case will be checked first.
     std::sort(W.FirstCluster, W.LastCluster + 1,
               [](const CaseCluster &a, const CaseCluster &b) {
-      return a.Weight > b.Weight;
+      return a.Prob > b.Prob;
     });
 
     // Rearrange the case blocks so that the last one falls through if possible
-    // without without changing the order of weights.
+    // without without changing the order of probabilities.
     for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) {
       --I;
-      if (I->Weight > W.LastCluster->Weight)
+      if (I->Prob > W.LastCluster->Prob)
         break;
       if (I->Kind == CC_Range && I->MBB == NextMBB) {
         std::swap(*I, *W.LastCluster);
@@ -7948,12 +8198,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
     }
   }
 
-  // Compute total weight.
-  uint32_t UnhandledWeights = 0;
-  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) {
-    UnhandledWeights += I->Weight;
-    assert(UnhandledWeights >= I->Weight && "Weight overflow!");
-  }
+  // Compute total probability.
+  BranchProbability DefaultProb = W.DefaultProb;
+  BranchProbability UnhandledProbs = DefaultProb;
+  for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+    UnhandledProbs += I->Prob;
 
   MachineBasicBlock *CurMBB = W.MBB;
   for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
@@ -7967,6 +8216,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
       // Put Cond in a virtual register to make it available from the new blocks.
       ExportFromCurrentBlock(Cond);
     }
+    UnhandledProbs -= I->Prob;
 
     switch (I->Kind) {
       case CC_JumpTable: {
@@ -7977,8 +8227,28 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
         // The jump block hasn't been inserted yet; insert it here.
         MachineBasicBlock *JumpMBB = JT->MBB;
         CurMF->insert(BBI, JumpMBB);
-        addSuccessorWithWeight(CurMBB, Fallthrough);
-        addSuccessorWithWeight(CurMBB, JumpMBB);
+
+        auto JumpProb = I->Prob;
+        auto FallthroughProb = UnhandledProbs;
+
+        // If the default statement is a target of the jump table, we evenly
+        // distribute the default probability to successors of CurMBB. Also
+        // update the probability on the edge from JumpMBB to Fallthrough.
+        for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+                                              SE = JumpMBB->succ_end();
+             SI != SE; ++SI) {
+          if (*SI == DefaultMBB) {
+            JumpProb += DefaultProb / 2;
+            FallthroughProb -= DefaultProb / 2;
+            JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+            JumpMBB->normalizeSuccProbs();
+            break;
+          }
+        }
+
+        addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+        addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+        CurMBB->normalizeSuccProbs();
 
         // The jump table header will be inserted in our current block, do the
         // range check, and fall through to our fallthrough block.
@@ -8004,8 +8274,17 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
         BTB->Parent = CurMBB;
         BTB->Default = Fallthrough;
 
-        // If we're in the right place, emit the bit test header header right now.
-        if (CurMBB ==SwitchMBB) {
+        BTB->DefaultProb = UnhandledProbs;
+        // If the cases in bit test don't form a contiguous range, we evenly
+        // distribute the probability on the edge to Fallthrough to two
+        // successors of CurMBB.
+        if (!BTB->ContiguousRange) {
+          BTB->Prob += DefaultProb / 2;
+          BTB->DefaultProb -= DefaultProb / 2;
+        }
+
+        // If we're in the right place, emit the bit test header right now.
+        if (CurMBB == SwitchMBB) {
           visitBitTestHeader(*BTB, SwitchMBB);
           BTB->Emitted = true;
         }
@@ -8028,10 +8307,9 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
           RHS = I->High;
         }
 
-        // The false weight is the sum of all unhandled cases.
-        UnhandledWeights -= I->Weight;
-        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Weight,
-                     UnhandledWeights);
+        // The false probability is the sum of all unhandled cases.
+        CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
+                     UnhandledProbs);
 
         if (CurMBB == SwitchMBB)
           visitSwitchCase(CB, SwitchMBB);
@@ -8049,8 +8327,8 @@ unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC,
                                               CaseClusterIt First,
                                               CaseClusterIt Last) {
   return std::count_if(First, Last + 1, [&](const CaseCluster &X) {
-    if (X.Weight != CC.Weight)
-      return X.Weight > CC.Weight;
+    if (X.Prob != CC.Prob)
+      return X.Prob > CC.Prob;
 
     // Ties are broken by comparing the case value.
     return X.Low->getValue().slt(CC.Low->getValue());
@@ -8066,24 +8344,24 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
 
   assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!");
 
-  // Balance the tree based on branch weights to create a near-optimal (in terms
-  // of search time given key frequency) binary search tree. See e.g. Kurt
+  // Balance the tree based on branch probabilities to create a near-optimal (in
+  // terms of search time given key frequency) binary search tree. See e.g. Kurt
   // Mehlhorn "Nearly Optimal Binary Search Trees" (1975).
   CaseClusterIt LastLeft = W.FirstCluster;
   CaseClusterIt FirstRight = W.LastCluster;
-  uint32_t LeftWeight = LastLeft->Weight;
-  uint32_t RightWeight = FirstRight->Weight;
+  auto LeftProb = LastLeft->Prob + W.DefaultProb / 2;
+  auto RightProb = FirstRight->Prob + W.DefaultProb / 2;
 
   // Move LastLeft and FirstRight towards each other from opposite directions to
-  // find a partitioning of the clusters which balances the weight on both
-  // sides. If LeftWeight and RightWeight are equal, alternate which side is
-  // taken to ensure 0-weight nodes are distributed evenly.
+  // find a partitioning of the clusters which balances the probability on both
+  // sides. If LeftProb and RightProb are equal, alternate which side is
+  // taken to ensure 0-probability nodes are distributed evenly.
   unsigned I = 0;
   while (LastLeft + 1 < FirstRight) {
-    if (LeftWeight < RightWeight || (LeftWeight == RightWeight && (I & 1)))
-      LeftWeight += (++LastLeft)->Weight;
+    if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1)))
+      LeftProb += (++LastLeft)->Prob;
     else
-      RightWeight += (--FirstRight)->Weight;
+      RightProb += (--FirstRight)->Prob;
     I++;
   }
 
@@ -8144,7 +8422,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
   const ConstantInt *Pivot = PivotCluster->Low;
 
   // New blocks will be inserted immediately after the current one.
-  MachineFunction::iterator BBI = W.MBB;
+  MachineFunction::iterator BBI(W.MBB);
   ++BBI;
 
   // We will branch to the LHS if Value < Pivot. If LHS is a single cluster,
@@ -8158,7 +8436,8 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
   } else {
     LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
     FuncInfo.MF->insert(BBI, LeftMBB);
-    WorkList.push_back({LeftMBB, FirstLeft, LastLeft, W.GE, Pivot});
+    WorkList.push_back(
+        {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2});
     // Put Cond in a virtual register to make it available from the new blocks.
     ExportFromCurrentBlock(Cond);
   }
@@ -8173,14 +8452,15 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
   } else {
     RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock());
     FuncInfo.MF->insert(BBI, RightMBB);
-    WorkList.push_back({RightMBB, FirstRight, LastRight, Pivot, W.LT});
+    WorkList.push_back(
+        {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2});
     // Put Cond in a virtual register to make it available from the new blocks.
     ExportFromCurrentBlock(Cond);
   }
 
   // Create the CaseBlock record that will be used to lower the branch.
   CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
-               LeftWeight, RightWeight);
+               LeftProb, RightProb);
 
   if (W.MBB == SwitchMBB)
     visitSwitchCase(CB, SwitchMBB);
@@ -8196,9 +8476,10 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   for (auto I : SI.cases()) {
     MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()];
     const ConstantInt *CaseVal = I.getCaseValue();
-    uint32_t Weight =
-        BPI ? BPI->getEdgeWeight(SI.getParent(), I.getSuccessorIndex()) : 0;
-    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Weight));
+    BranchProbability Prob =
+        BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+            : BranchProbability(1, SI.getNumCases() + 1);
+    Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
   }
 
   MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()];
@@ -8274,7 +8555,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
   SwitchWorkList WorkList;
   CaseClusterIt First = Clusters.begin();
   CaseClusterIt Last = Clusters.end() - 1;
-  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr});
+  auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+  WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
 
   while (!WorkList.empty()) {
     SwitchWorkListItem W = WorkList.back();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 700675453fe7..49a3872d20c8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -17,6 +17,7 @@
 #include "StatepointLowering.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -30,7 +31,6 @@
 namespace llvm {
 
 class AddrSpaceCastInst;
-class AliasAnalysis;
 class AllocaInst;
 class BasicBlock;
 class BitCastInst;
@@ -154,39 +154,39 @@ private:
       unsigned JTCasesIndex;
       unsigned BTCasesIndex;
     };
-    uint32_t Weight;
+    BranchProbability Prob;
 
     static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
-                             MachineBasicBlock *MBB, uint32_t Weight) {
+                             MachineBasicBlock *MBB, BranchProbability Prob) {
       CaseCluster C;
       C.Kind = CC_Range;
       C.Low = Low;
       C.High = High;
       C.MBB = MBB;
-      C.Weight = Weight;
+      C.Prob = Prob;
       return C;
     }
 
     static CaseCluster jumpTable(const ConstantInt *Low,
                                  const ConstantInt *High, unsigned JTCasesIndex,
-                                 uint32_t Weight) {
+                                 BranchProbability Prob) {
       CaseCluster C;
       C.Kind = CC_JumpTable;
       C.Low = Low;
       C.High = High;
       C.JTCasesIndex = JTCasesIndex;
-      C.Weight = Weight;
+      C.Prob = Prob;
       return C;
     }
 
     static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
-                                unsigned BTCasesIndex, uint32_t Weight) {
+                                unsigned BTCasesIndex, BranchProbability Prob) {
       CaseCluster C;
       C.Kind = CC_BitTests;
       C.Low = Low;
       C.High = High;
       C.BTCasesIndex = BTCasesIndex;
-      C.Weight = Weight;
+      C.Prob = Prob;
       return C;
     }
   };
@@ -198,13 +198,13 @@ private:
     uint64_t Mask;
     MachineBasicBlock* BB;
     unsigned Bits;
-    uint32_t ExtraWeight;
+    BranchProbability ExtraProb;
 
     CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
-             uint32_t Weight):
-      Mask(mask), BB(bb), Bits(bits), ExtraWeight(Weight) { }
+             BranchProbability Prob):
+      Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
 
-    CaseBits() : Mask(0), BB(nullptr), Bits(0), ExtraWeight(0) {}
+    CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
   };
 
   typedef std::vector<CaseBits> CaseBitsVector;
@@ -217,13 +217,13 @@ private:
   /// blocks needed by multi-case switch statements.
   struct CaseBlock {
     CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
-              const Value *cmpmiddle,
-              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
-              MachineBasicBlock *me,
-              uint32_t trueweight = 0, uint32_t falseweight = 0)
-      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
-        TrueBB(truebb), FalseBB(falsebb), ThisBB(me),
-        TrueWeight(trueweight), FalseWeight(falseweight) { }
+              const Value *cmpmiddle, MachineBasicBlock *truebb,
+              MachineBasicBlock *falsebb, MachineBasicBlock *me,
+              BranchProbability trueprob = BranchProbability::getUnknown(),
+              BranchProbability falseprob = BranchProbability::getUnknown())
+        : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+          TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
+          FalseProb(falseprob) {}
 
     // CC - the condition code to use for the case block's setcc node
     ISD::CondCode CC;
@@ -239,8 +239,8 @@ private:
     // ThisBB - the block into which to emit the code for the setcc and branches
     MachineBasicBlock *ThisBB;
 
-    // TrueWeight/FalseWeight - branch weights.
-    uint32_t TrueWeight, FalseWeight;
+    // TrueProb/FalseProb - branch weights.
+    BranchProbability TrueProb, FalseProb;
   };
 
   struct JumpTable {
@@ -272,32 +272,35 @@ private:
 
   struct BitTestCase {
     BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
-                uint32_t Weight):
-      Mask(M), ThisBB(T), TargetBB(Tr), ExtraWeight(Weight) { }
+                BranchProbability Prob):
+      Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
     uint64_t Mask;
     MachineBasicBlock *ThisBB;
     MachineBasicBlock *TargetBB;
-    uint32_t ExtraWeight;
+    BranchProbability ExtraProb;
   };
 
   typedef SmallVector<BitTestCase, 3> BitTestInfo;
 
   struct BitTestBlock {
-    BitTestBlock(APInt F, APInt R, const Value* SV,
-                 unsigned Rg, MVT RgVT, bool E,
-                 MachineBasicBlock* P, MachineBasicBlock* D,
-                 BitTestInfo C):
-      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
-      Parent(P), Default(D), Cases(std::move(C)) { }
+    BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+                 bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+                 BitTestInfo C, BranchProbability Pr)
+        : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+          ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)),
+          Prob(Pr) {}
     APInt First;
     APInt Range;
     const Value *SValue;
     unsigned Reg;
     MVT RegVT;
     bool Emitted;
+    bool ContiguousRange;
     MachineBasicBlock *Parent;
     MachineBasicBlock *Default;
     BitTestInfo Cases;
+    BranchProbability Prob;
+    BranchProbability DefaultProb;
   };
 
   /// Minimum jump table density, in percent.
@@ -339,6 +342,7 @@ private:
     CaseClusterIt LastCluster;
     const ConstantInt *GE;
     const ConstantInt *LT;
+    BranchProbability DefaultProb;
   };
   typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
 
@@ -515,6 +519,7 @@ private:
     void resetPerFunctionState() {
       FailureMBB = nullptr;
       Guard = nullptr;
+      GuardReg = 0;
     }
 
     MachineBasicBlock *getParentMBB() { return ParentMBB; }
@@ -592,10 +597,6 @@ public:
   ///
   FunctionLoweringInfo &FuncInfo;
 
-  /// OptLevel - What optimization level we're generating code for.
-  ///
-  CodeGenOpt::Level OptLevel;
-
   /// GFI - Garbage collection metadata for the function.
   GCFunctionInfo *GFI;
 
@@ -613,7 +614,7 @@ public:
   SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
                       CodeGenOpt::Level ol)
     : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
-      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      DAG(dag), FuncInfo(funcinfo),
       HasTailCall(false) {
   }
 
@@ -692,19 +693,20 @@ public:
 
   void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
                             MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
-                            MachineBasicBlock *SwitchBB, unsigned Opc,
-                            uint32_t TW, uint32_t FW);
+                            MachineBasicBlock *SwitchBB,
+                            Instruction::BinaryOps Opc, BranchProbability TW,
+                            BranchProbability FW);
   void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     MachineBasicBlock *CurBB,
                                     MachineBasicBlock *SwitchBB,
-                                    uint32_t TW, uint32_t FW);
+                                    BranchProbability TW, BranchProbability FW);
   bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
   bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
   void CopyToExportRegsIfNeeded(const Value *V);
   void ExportFromCurrentBlock(const Value *V);
   void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
-                   MachineBasicBlock *LandingPad = nullptr);
+                   const BasicBlock *EHPadBB = nullptr);
 
   std::pair<SDValue, SDValue> lowerCallOperands(
           ImmutableCallSite CS,
@@ -712,7 +714,7 @@ public:
           unsigned NumArgs,
           SDValue Callee,
           Type *ReturnTy,
-          MachineBasicBlock *LandingPad = nullptr,
+          const BasicBlock *EHPadBB = nullptr,
           bool IsPatchPoint = false);
 
   /// UpdateSplitBlock - When an MBB was split during scheduling, update the
@@ -722,11 +724,11 @@ public:
   // This function is responsible for the whole statepoint lowering process.
   // It uniformly handles invoke and call statepoints.
   void LowerStatepoint(ImmutableStatepoint Statepoint,
-                       MachineBasicBlock *LandingPad = nullptr);
+                       const BasicBlock *EHPadBB = nullptr);
 private:
-  std::pair<SDValue, SDValue> lowerInvokable(
-          TargetLowering::CallLoweringInfo &CLI,
-          MachineBasicBlock *LandingPad);
+  std::pair<SDValue, SDValue>
+  lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
+                 const BasicBlock *EHPadBB = nullptr);
 
   // Terminator instructions.
   void visitRet(const ReturnInst &I);
@@ -734,11 +736,18 @@ private:
   void visitSwitch(const SwitchInst &I);
   void visitIndirectBr(const IndirectBrInst &I);
   void visitUnreachable(const UnreachableInst &I);
+  void visitCleanupRet(const CleanupReturnInst &I);
+  void visitCatchSwitch(const CatchSwitchInst &I);
+  void visitCatchRet(const CatchReturnInst &I);
+  void visitCatchPad(const CatchPadInst &I);
+  void visitCleanupPad(const CleanupPadInst &CPI);
+
+  BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
+                                       const MachineBasicBlock *Dst) const;
+  void addSuccessorWithProb(
+      MachineBasicBlock *Src, MachineBasicBlock *Dst,
+      BranchProbability Prob = BranchProbability::getUnknown());
 
-  uint32_t getEdgeWeight(const MachineBasicBlock *Src,
-                         const MachineBasicBlock *Dst) const;
-  void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
-                              uint32_t Weight = 0);
 public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
@@ -748,7 +757,7 @@ public:
   void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
   void visitBitTestCase(BitTestBlock &BB,
                         MachineBasicBlock* NextMBB,
-                        uint32_t BranchWeightToNext,
+                        BranchProbability BranchProbToNext,
                         unsigned Reg,
                         BitTestCase &B,
                         MachineBasicBlock *SwitchBB);
@@ -842,7 +851,7 @@ private:
   void visitVACopy(const CallInst &I);
   void visitStackmap(const CallInst &I);
   void visitPatchpoint(ImmutableCallSite CS,
-                       MachineBasicBlock *LandingPad = nullptr);
+                       const BasicBlock *EHPadBB = nullptr);
 
   // These three are implemented in StatepointLowering.cpp
   void visitStatepoint(const CallInst &I);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 5b9b18286fae..a1c6c4c1dd63 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Printable.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetIntrinsicInfo.h"
@@ -30,6 +31,11 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
+static cl::opt<bool>
+VerboseDAGDumping("dag-dump-verbose", cl::Hidden,
+                  cl::desc("Display more information when dumping selection "
+                           "DAG nodes."));
+
 std::string SDNode::getOperationName(const SelectionDAG *G) const {
   switch (getOpcode()) {
   default:
@@ -102,6 +108,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::EH_RETURN:                  return "EH_RETURN";
   case ISD::EH_SJLJ_SETJMP:             return "EH_SJLJ_SETJMP";
   case ISD::EH_SJLJ_LONGJMP:            return "EH_SJLJ_LONGJMP";
+  case ISD::EH_SJLJ_SETUP_DISPATCH:     return "EH_SJLJ_SETUP_DISPATCH";
   case ISD::ConstantPool:               return "ConstantPool";
   case ISD::TargetIndex:                return "TargetIndex";
   case ISD::ExternalSymbol:             return "ExternalSymbol";
@@ -145,6 +152,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FABS:                       return "fabs";
   case ISD::FMINNUM:                    return "fminnum";
   case ISD::FMAXNUM:                    return "fmaxnum";
+  case ISD::FMINNAN:                    return "fminnan";
+  case ISD::FMAXNAN:                    return "fmaxnan";
   case ISD::FNEG:                       return "fneg";
   case ISD::FSQRT:                      return "fsqrt";
   case ISD::FSIN:                       return "fsin";
@@ -201,6 +210,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
   case ISD::FPOWI:                      return "fpowi";
   case ISD::SETCC:                      return "setcc";
+  case ISD::SETCCE:                     return "setcce";
   case ISD::SELECT:                     return "select";
   case ISD::VSELECT:                    return "vselect";
   case ISD::SELECT_CC:                  return "select_cc";
@@ -273,6 +283,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::CALLSEQ_START:              return "callseq_start";
   case ISD::CALLSEQ_END:                return "callseq_end";
 
+    // EH instructions
+  case ISD::CATCHRET:                   return "catchret";
+  case ISD::CLEANUPRET:                 return "cleanupret";
+
     // Other operators
   case ISD::LOAD:                       return "load";
   case ISD::STORE:                      return "store";
@@ -295,15 +309,17 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::LIFETIME_END:               return "lifetime.end";
   case ISD::GC_TRANSITION_START:        return "gc_transition.start";
   case ISD::GC_TRANSITION_END:          return "gc_transition.end";
+  case ISD::GET_DYNAMIC_AREA_OFFSET:    return "get.dynamic.area.offset";
 
   // Bit manipulation
+  case ISD::BITREVERSE:                 return "bitreverse";
   case ISD::BSWAP:                      return "bswap";
   case ISD::CTPOP:                      return "ctpop";
   case ISD::CTTZ:                       return "cttz";
   case ISD::CTTZ_ZERO_UNDEF:            return "cttz_zero_undef";
   case ISD::CTLZ:                       return "ctlz";
   case ISD::CTLZ_ZERO_UNDEF:            return "ctlz_zero_undef";
-
+    
   // Trampolines
   case ISD::INIT_TRAMPOLINE:            return "init_trampoline";
   case ISD::ADJUST_TRAMPOLINE:          return "adjust_trampoline";
@@ -320,7 +336,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
 
     case ISD::SETO:                     return "seto";
     case ISD::SETUO:                    return "setuo";
-    case ISD::SETUEQ:                   return "setue";
+    case ISD::SETUEQ:                   return "setueq";
     case ISD::SETUGT:                   return "setugt";
     case ISD::SETUGE:                   return "setuge";
     case ISD::SETULT:                   return "setult";
@@ -352,6 +368,16 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
   }
 }
 
+static Printable PrintNodeId(const SDNode &Node) {
+  return Printable([&Node](raw_ostream &OS) {
+#ifndef NDEBUG
+    OS << 't' << Node.PersistentId;
+#else
+    OS << (const void*)&Node;
+#endif
+  });
+}
+
 void SDNode::dump() const { dump(nullptr); }
 void SDNode::dump(const SelectionDAG *G) const {
   print(dbgs(), G);
@@ -359,8 +385,6 @@ void SDNode::dump(const SelectionDAG *G) const {
 }
 
 void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
-  OS << (const void*)this << ": ";
-
   for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
     if (i) OS << ",";
     if (getValueType(i) == MVT::Other)
@@ -368,7 +392,6 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
     else
       OS << getValueType(i).getEVTString();
   }
-  OS << " = " << getOperationName(G);
 }
 
 void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
@@ -523,48 +546,58 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
        << ']';
   }
 
-  if (unsigned Order = getIROrder())
-      OS << " [ORD=" << Order << ']';
+  if (VerboseDAGDumping) {
+    if (unsigned Order = getIROrder())
+        OS << " [ORD=" << Order << ']';
 
-  if (getNodeId() != -1)
-    OS << " [ID=" << getNodeId() << ']';
+    if (getNodeId() != -1)
+      OS << " [ID=" << getNodeId() << ']';
 
-  if (!G)
-    return;
+    if (!G)
+      return;
 
-  DILocation *L = getDebugLoc();
-  if (!L)
-    return;
+    DILocation *L = getDebugLoc();
+    if (!L)
+      return;
 
-  if (auto *Scope = L->getScope())
-    OS << Scope->getFilename();
-  else
-    OS << "<unknown>";
-  OS << ':' << L->getLine();
-  if (unsigned C = L->getColumn())
-    OS << ':' << C;
+    if (auto *Scope = L->getScope())
+      OS << Scope->getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << L->getLine();
+    if (unsigned C = L->getColumn())
+      OS << ':' << C;
+  }
+}
+
+/// Return true if this node is so simple that we should just print it inline
+/// if it appears as an operand.
+static bool shouldPrintInline(const SDNode &Node) {
+  if (Node.getOpcode() == ISD::EntryToken)
+    return false;
+  return Node.getNumOperands() == 0;
 }
 
 static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
-  for (const SDValue &Op : N->op_values())
+  for (const SDValue &Op : N->op_values()) {
+    if (shouldPrintInline(*Op.getNode()))
+      continue;
     if (Op.getNode()->hasOneUse())
       DumpNodes(Op.getNode(), indent+2, G);
-    else
-      dbgs() << "\n" << std::string(indent+2, ' ')
-             << (void*)Op.getNode() << ": <multiple use>";
+  }
 
-  dbgs() << '\n';
   dbgs().indent(indent);
   N->dump(G);
 }
 
 void SelectionDAG::dump() const {
-  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n";
 
   for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
        I != E; ++I) {
-    const SDNode *N = I;
-    if (!N->hasOneUse() && N != getRoot().getNode())
+    const SDNode *N = &*I;
+    if (!N->hasOneUse() && N != getRoot().getNode() &&
+        (!shouldPrintInline(*N) || N->use_empty()))
       DumpNodes(N, 2, this);
   }
 
@@ -573,10 +606,30 @@ void SelectionDAG::dump() const {
 }
 
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << PrintNodeId(*this) << ": ";
   print_types(OS, G);
+  OS << " = " << getOperationName(G);
   print_details(OS, G);
 }
 
+static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
+                         const SDValue Value) {
+  if (!Value.getNode()) {
+    OS << "<null>";
+    return false;
+  } else if (shouldPrintInline(*Value.getNode())) {
+    OS << Value->getOperationName(G) << ':';
+    Value->print_types(OS, G);
+    Value->print_details(OS, G);
+    return true;
+  } else {
+    OS << PrintNodeId(*Value.getNode());
+    if (unsigned RN = Value.getResNo())
+      OS << ':' << RN;
+    return false;
+  }
+}
+
 typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
 static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
                        const SelectionDAG *G, VisitedSDNodeSet &once) {
@@ -589,20 +642,13 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
 
   // Having printed this SDNode, walk the children:
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-    const SDNode *child = N->getOperand(i).getNode();
-
     if (i) OS << ",";
     OS << " ";
 
-    if (child->getNumOperands() == 0) {
-      // This child has no grandchildren; print it inline right here.
-      child->printr(OS, G);
-      once.insert(child);
-    } else {         // Just the address. FIXME: also print the child's opcode.
-      OS << (const void*)child;
-      if (unsigned RN = N->getOperand(i).getResNo())
-        OS << ":" << RN;
-    }
+    const SDValue Op = N->getOperand(i);
+    bool printedInline = printOperand(OS, G, Op);
+    if (printedInline)
+      once.insert(Op.getNode());
   }
 
   OS << "\n";
@@ -664,12 +710,9 @@ void SDNode::dumprFull(const SelectionDAG *G) const {
 }
 
 void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
-  print_types(OS, G);
+  printr(OS, G);
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     if (i) OS << ", "; else OS << " ";
-    OS << (void*)getOperand(i).getNode();
-    if (unsigned RN = getOperand(i).getResNo())
-      OS << ":" << RN;
+    printOperand(OS, G, getOperand(i));
   }
-  print_details(OS, G);
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 97ece8b9248a..853a21a15eb9 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/FastISel.h"
@@ -263,13 +264,17 @@ namespace llvm {
         return;
       IS.OptLevel = NewOptLevel;
       IS.TM.setOptLevel(NewOptLevel);
-      SavedFastISel = IS.TM.Options.EnableFastISel;
-      if (NewOptLevel == CodeGenOpt::None)
-        IS.TM.setFastISel(true);
       DEBUG(dbgs() << "\nChanging optimization level for Function "
             << IS.MF->getFunction()->getName() << "\n");
       DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
             << " ; After: -O" << NewOptLevel << "\n");
+      SavedFastISel = IS.TM.Options.EnableFastISel;
+      if (NewOptLevel == CodeGenOpt::None) {
+        IS.TM.setFastISel(IS.TM.getO0WantsFastISel());
+        DEBUG(dbgs() << "\tFastISel is "
+              << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled")
+              << "\n");
+      }
     }
 
     ~OptLevelChanger() {
@@ -293,6 +298,11 @@ namespace llvm {
     const TargetLowering *TLI = IS->TLI;
     const TargetSubtargetInfo &ST = IS->MF->getSubtarget();
 
+    // Try first to see if the Target has its own way of selecting a scheduler
+    if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) {
+      return SchedulerCtor(IS, OptLevel);
+    }
+
     if (OptLevel == CodeGenOpt::None ||
         (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) ||
         TLI->getSchedulingPreference() == Sched::Source)
@@ -350,8 +360,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
   OptLevel(OL),
   DAGSize(0) {
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
-    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
-    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
+    initializeBranchProbabilityInfoWrapperPassPass(
+        *PassRegistry::getPassRegistry());
+    initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry());
     initializeTargetLibraryInfoWrapperPassPass(
         *PassRegistry::getPassRegistry());
   }
@@ -363,13 +374,12 @@ SelectionDAGISel::~SelectionDAGISel() {
 }
 
 void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
   if (UseMBPI && OptLevel != CodeGenOpt::None)
-    AU.addRequired<BranchProbabilityInfo>();
+    AU.addRequired<BranchProbabilityInfoWrapperPass>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -380,10 +390,10 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
 ///
 /// This is required for correctness, so it must be done at -O0.
 ///
-static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
+static void SplitCriticalSideEffectEdges(Function &Fn) {
   // Loop for blocks with phi nodes.
-  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
-    PHINode *PN = dyn_cast<PHINode>(BB->begin());
+  for (BasicBlock &BB : Fn) {
+    PHINode *PN = dyn_cast<PHINode>(BB.begin());
     if (!PN) continue;
 
   ReprocessBlock:
@@ -391,7 +401,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
     // are potentially trapping constant expressions.  Constant expressions are
     // the only potentially trapping value that can occur as the argument to a
     // PHI.
-    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I)
       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
         ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
         if (!CE || !CE->canTrap()) continue;
@@ -405,8 +415,8 @@ static void SplitCriticalSideEffectEdges(Function &Fn, AliasAnalysis *AA) {
 
         // Okay, we have to split this edge.
         SplitCriticalEdge(
-            Pred->getTerminator(), GetSuccessorNumber(Pred, BB),
-            CriticalEdgeSplittingOptions(AA).setMergeIdenticalEdges());
+            Pred->getTerminator(), GetSuccessorNumber(Pred, &BB),
+            CriticalEdgeSplittingOptions().setMergeIdenticalEdges());
         goto ReprocessBlock;
       }
   }
@@ -437,19 +447,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   TII = MF->getSubtarget().getInstrInfo();
   TLI = MF->getSubtarget().getTargetLowering();
   RegInfo = &MF->getRegInfo();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
 
   DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
 
-  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), AA);
+  SplitCriticalSideEffectEdges(const_cast<Function &>(Fn));
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF, CurDAG);
 
   if (UseMBPI && OptLevel != CodeGenOpt::None)
-    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
   else
     FuncInfo->BPI = nullptr;
 
@@ -457,15 +467,50 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   MF->setHasInlineAsm(false);
 
+  FuncInfo->SplitCSR = false;
+  SmallVector<MachineBasicBlock*, 4> Returns;
+
+  // We split CSR if the target supports it for the given function
+  // and the function has only return exits.
+  if (TLI->supportSplitCSR(MF)) {
+    FuncInfo->SplitCSR = true;
+
+    // Collect all the return blocks.
+    for (const BasicBlock &BB : Fn) {
+      if (!succ_empty(&BB))
+        continue;
+
+      const TerminatorInst *Term = BB.getTerminator();
+      if (isa<UnreachableInst>(Term))
+        continue;
+      if (isa<ReturnInst>(Term)) {
+        Returns.push_back(FuncInfo->MBBMap[&BB]);
+        continue;
+      }
+
+      // Bail out if the exit block is not Return nor Unreachable.
+      FuncInfo->SplitCSR = false;
+      break;
+    }
+  }
+
+  MachineBasicBlock *EntryMBB = &MF->front();
+  if (FuncInfo->SplitCSR)
+    // This performs initialization so lowering for SplitCSR will be correct.
+    TLI->initializeSplitCSR(EntryMBB);
+
   SelectAllBasicBlocks(Fn);
 
   // If the first basic block in the function has live ins that need to be
   // copied into vregs, emit the copies into the top of the block before
   // emitting the code for the block.
-  MachineBasicBlock *EntryMBB = MF->begin();
   const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
   RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII);
 
+  // Insert copies in the entry block and the return blocks.
+  if (FuncInfo->SplitCSR)
+    TLI->insertCopiesSplitCSR(EntryMBB, Returns);
+
   DenseMap<unsigned, unsigned> LiveInMap;
   if (!FuncInfo->ArgDbgValues.empty())
     for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
@@ -882,7 +927,7 @@ void SelectionDAGISel::DoInstructionSelection() {
     // graph) and preceding back toward the beginning (the entry
     // node).
     while (ISelPosition != CurDAG->allnodes_begin()) {
-      SDNode *Node = --ISelPosition;
+      SDNode *Node = &*--ISelPosition;
       // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
       // but there are currently some corner cases that it misses. Also, this
       // makes it theoretically possible to disable the DAGCombiner.
@@ -916,14 +961,47 @@ void SelectionDAGISel::DoInstructionSelection() {
   PostprocessISelDAG();
 }
 
+static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) {
+  for (const User *U : CPI->users()) {
+    if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) {
+      Intrinsic::ID IID = EHPtrCall->getIntrinsicID();
+      if (IID == Intrinsic::eh_exceptionpointer ||
+          IID == Intrinsic::eh_exceptioncode)
+        return true;
+    }
+  }
+  return false;
+}
+
 /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
 /// do other setup for EH landing-pad blocks.
 bool SelectionDAGISel::PrepareEHLandingPad() {
   MachineBasicBlock *MBB = FuncInfo->MBB;
-
+  const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn();
+  const BasicBlock *LLVMBB = MBB->getBasicBlock();
   const TargetRegisterClass *PtrRC =
       TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout()));
 
+  // Catchpads have one live-in register, which typically holds the exception
+  // pointer or code.
+  if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) {
+    if (hasExceptionPointerOrCodeUser(CPI)) {
+      // Get or create the virtual register to hold the pointer or code.  Mark
+      // the live in physreg and copy into the vreg.
+      MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn);
+      assert(EHPhysReg && "target lacks exception pointer register");
+      MBB->addLiveIn(EHPhysReg);
+      unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC);
+      BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
+              TII->get(TargetOpcode::COPY), VReg)
+          .addReg(EHPhysReg, RegState::Kill);
+    }
+    return true;
+  }
+
+  if (!LLVMBB->isLandingPad())
+    return true;
+
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
@@ -935,52 +1013,12 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
-  // If this is an MSVC-style personality function, we need to split the landing
-  // pad into several BBs.
-  const BasicBlock *LLVMBB = MBB->getBasicBlock();
-  const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst();
-  MF->getMMI().addPersonality(MBB, cast<Function>(LPadInst->getParent()
-                                                      ->getParent()
-                                                      ->getPersonalityFn()
-                                                      ->stripPointerCasts()));
-  EHPersonality Personality = MF->getMMI().getPersonalityType();
-
-  if (isMSVCEHPersonality(Personality)) {
-    SmallVector<MachineBasicBlock *, 4> ClauseBBs;
-    const IntrinsicInst *ActionsCall =
-        dyn_cast<IntrinsicInst>(LLVMBB->getFirstInsertionPt());
-    // Get all invoke BBs that unwind to this landingpad.
-    SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(),
-                                                  MBB->pred_end());
-    if (ActionsCall && ActionsCall->getIntrinsicID() == Intrinsic::eh_actions) {
-      // If this is a call to llvm.eh.actions followed by indirectbr, then we've
-      // run WinEHPrepare, and we should remove this block from the machine CFG.
-      // Mark the targets of the indirectbr as landingpads instead.
-      for (const BasicBlock *LLVMSucc : successors(LLVMBB)) {
-        MachineBasicBlock *ClauseBB = FuncInfo->MBBMap[LLVMSucc];
-        // Add the edge from the invoke to the clause.
-        for (MachineBasicBlock *InvokeBB : InvokeBBs)
-          InvokeBB->addSuccessor(ClauseBB);
-
-        // Mark the clause as a landing pad or MI passes will delete it.
-        ClauseBB->setIsLandingPad();
-      }
-    }
-
-    // Remove the edge from the invoke to the lpad.
-    for (MachineBasicBlock *InvokeBB : InvokeBBs)
-      InvokeBB->removeSuccessor(MBB);
-
-    // Don't select instructions for the landingpad.
-    return false;
-  }
-
   // Mark exception register as live in.
-  if (unsigned Reg = TLI->getExceptionPointerRegister())
+  if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn))
     FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
   // Mark exception selector register as live in.
-  if (unsigned Reg = TLI->getExceptionSelectorRegister())
+  if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn))
     FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC);
 
   return true;
@@ -992,9 +1030,9 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
 static bool isFoldedOrDeadInstruction(const Instruction *I,
                                       FunctionLoweringInfo *FuncInfo) {
   return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded.
-         !isa<TerminatorInst>(I) && // Terminators aren't folded.
+         !isa<TerminatorInst>(I) &&    // Terminators aren't folded.
          !isa<DbgInfoIntrinsic>(I) &&  // Debug instructions aren't folded.
-         !isa<LandingPadInst>(I) &&    // Landingpad instructions aren't folded.
+         !I->isEHPad() &&              // EH pad instructions aren't folded.
          !FuncInfo->isExportedInst(I); // Exported instrs must be computed.
 }
 
@@ -1143,17 +1181,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       FuncInfo->VisitedBBs.insert(LLVMBB);
     }
 
-    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const Begin =
+        LLVMBB->getFirstNonPHI()->getIterator();
     BasicBlock::const_iterator const End = LLVMBB->end();
     BasicBlock::const_iterator BI = End;
 
     FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+    if (!FuncInfo->MBB)
+      continue; // Some blocks like catchpads have no code or MBB.
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
 
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
     FuncInfo->ExceptionSelectorVirtReg = 0;
-    if (LLVMBB->isLandingPad())
+    if (LLVMBB->isEHPad())
       if (!PrepareEHLandingPad())
         continue;
 
@@ -1192,7 +1233,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
       unsigned NumFastIselRemaining = std::distance(Begin, End);
       // Do FastISel on as many instructions as possible.
       for (; BI != Begin; --BI) {
-        const Instruction *Inst = std::prev(BI);
+        const Instruction *Inst = &*std::prev(BI);
 
         // If we no longer require this instruction, skip it.
         if (isFoldedOrDeadInstruction(Inst, FuncInfo)) {
@@ -1212,8 +1253,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
           // then see if there is a load right before the selected instructions.
           // Try to fold the load if so.
           const Instruction *BeforeInst = Inst;
-          while (BeforeInst != Begin) {
-            BeforeInst = std::prev(BasicBlock::const_iterator(BeforeInst));
+          while (BeforeInst != &*Begin) {
+            BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst));
             if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo))
               break;
           }
@@ -1245,7 +1286,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
             // For the purpose of debugging, just abort.
             report_fatal_error("FastISel didn't select the entire block");
 
-          if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+          if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() &&
+              !Inst->use_empty()) {
             unsigned &R = FuncInfo->ValueMap[Inst];
             if (!R)
               R = FuncInfo->CreateRegs(Inst->getType());
@@ -1253,7 +1295,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
 
           bool HadTailCall = false;
           MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt;
-          SelectBasicBlock(Inst, BI, HadTailCall);
+          SelectBasicBlock(Inst->getIterator(), BI, HadTailCall);
 
           // If the call was emitted as a tail call, we're done with the block.
           // We also need to delete any previously emitted instructions.
@@ -1483,35 +1525,39 @@ SelectionDAGISel::FinishBasicBlock() {
       CodeGenAndEmitDAG();
     }
 
-    uint32_t UnhandledWeight = 0;
-    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j)
-      UnhandledWeight += SDB->BitTestCases[i].Cases[j].ExtraWeight;
-
+    BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob;
     for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
-      UnhandledWeight -= SDB->BitTestCases[i].Cases[j].ExtraWeight;
+      UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb;
       // Set the current basic block to the mbb we wish to insert the code into
       FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
       FuncInfo->InsertPt = FuncInfo->MBB->end();
       // Emit the code
-      if (j+1 != ej)
-        SDB->visitBitTestCase(SDB->BitTestCases[i],
-                              SDB->BitTestCases[i].Cases[j+1].ThisBB,
-                              UnhandledWeight,
-                              SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j],
-                              FuncInfo->MBB);
-      else
-        SDB->visitBitTestCase(SDB->BitTestCases[i],
-                              SDB->BitTestCases[i].Default,
-                              UnhandledWeight,
-                              SDB->BitTestCases[i].Reg,
-                              SDB->BitTestCases[i].Cases[j],
-                              FuncInfo->MBB);
 
+      // If all cases cover a contiguous range, it is not necessary to jump to
+      // the default block after the last bit test fails. This is because the
+      // range check during bit test header creation has guaranteed that every
+      // case here doesn't go outside the range.
+      MachineBasicBlock *NextMBB;
+      if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+        NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB;
+      else if (j + 1 != ej)
+        NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB;
+      else
+        NextMBB = SDB->BitTestCases[i].Default;
+
+      SDB->visitBitTestCase(SDB->BitTestCases[i],
+                            NextMBB,
+                            UnhandledProb,
+                            SDB->BitTestCases[i].Reg,
+                            SDB->BitTestCases[i].Cases[j],
+                            FuncInfo->MBB);
 
       CurDAG->setRoot(SDB->getRoot());
       SDB->clear();
       CodeGenAndEmitDAG();
+
+      if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej)
+        break;
     }
 
     // Update PHI Nodes
@@ -1642,14 +1688,7 @@ SelectionDAGISel::FinishBasicBlock() {
 /// one preferred by the target.
 ///
 ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
-  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
-
-  if (!Ctor) {
-    Ctor = ISHeuristic;
-    RegisterScheduler::setDefault(Ctor);
-  }
-
-  return Ctor(this, OptLevel);
+  return ISHeuristic(this, OptLevel);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1961,7 +2000,7 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
 }
 
 /// GetVBR - decode a vbr encoding whose top bit is set.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
   assert(Val >= 128 && "Not a VBR");
   Val &= 127;  // Remove first vbr bit.
@@ -2287,7 +2326,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
 }
 
 /// CheckSame - Implements OP_CheckSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
           SDValue N,
           const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
@@ -2298,7 +2337,7 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 }
 
 /// CheckChildSame - Implements OP_CheckChildXSame.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
              SDValue N,
              const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes,
@@ -2310,20 +2349,20 @@ CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
 }
 
 /// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                       const SelectionDAGISel &SDISel) {
   return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
 }
 
 /// CheckNodePredicate - Implements OP_CheckNodePredicate.
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                    const SelectionDAGISel &SDISel, SDNode *N) {
   return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDNode *N) {
   uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -2331,7 +2370,7 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return N->getOpcode() == Opc;
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
           const TargetLowering *TLI, const DataLayout &DL) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2341,7 +2380,7 @@ CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N,
   return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL);
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering *TLI, const DataLayout &DL,
                unsigned ChildNo) {
@@ -2351,14 +2390,14 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                      DL);
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
               SDValue N) {
   return cast<CondCodeSDNode>(N)->get() ==
       (ISD::CondCode)MatcherTable[MatcherIndex++];
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
   MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2369,7 +2408,7 @@ CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL);
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
              SDValue N) {
   int64_t Val = MatcherTable[MatcherIndex++];
@@ -2380,7 +2419,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return C && C->getSExtValue() == Val;
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
                   SDValue N, unsigned ChildNo) {
   if (ChildNo >= N.getNumOperands())
@@ -2388,7 +2427,7 @@ CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo));
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
             SDValue N, const SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
@@ -2401,7 +2440,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
   return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
 }
 
-LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
 CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
            SDValue N, const SelectionDAGISel &SDISel) {
   int64_t Val = MatcherTable[MatcherIndex++];
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 4df5ede388fc..2764688518c2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -80,9 +80,16 @@ namespace llvm {
       return true;
     }
 
-    static bool hasNodeAddressLabel(const SDNode *Node,
-                                    const SelectionDAG *Graph) {
-      return true;
+    static std::string getNodeIdentifierLabel(const SDNode *Node,
+                                              const SelectionDAG *Graph) {
+      std::string R;
+      raw_string_ostream OS(R);
+#ifndef NDEBUG
+      OS << 't' << Node->PersistentId;
+#else
+      OS << static_cast<const void *>(Node);
+#endif
+      return R;
     }
 
     /// If you want to override the dot attributes printed for a particular
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 34688df4765b..050ec2116c5d 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GCStrategy.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -95,6 +96,9 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
 
       SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
       const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+      MFI->markAsStatepointSpillSlotObjectIndex(FI);
+
       Builder.FuncInfo.StatepointStackSlots.push_back(FI);
       AllocatedStackSlots.push_back(true);
       return SpillSlot;
@@ -105,8 +109,8 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
       return Builder.DAG.getFrameIndex(FI, ValueType);
     }
     // Note: We deliberately choose to advance this only on the failing path.
-    // Doing so on the suceeding path involes a bit of complexity that caused a
-    // minor bug previously.  Unless performance shows this matters, please
+    // Doing so on the succeeding path involves a bit of complexity that caused
+    // a minor bug previously.  Unless performance shows this matters, please
     // keep this code as simple as possible.
     NextSlotToAllocate++;
   }
@@ -119,7 +123,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
 static Optional<int> findPreviousSpillSlot(const Value *Val,
                                            SelectionDAGBuilder &Builder,
                                            int LookUpDepth) {
-  // Can not look any futher - give up now
+  // Can not look any further - give up now
   if (LookUpDepth <= 0)
     return Optional<int>();
 
@@ -196,7 +200,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
 /// Try to find existing copies of the incoming values in stack slots used for
 /// statepoint spilling.  If we can find a spill slot for the incoming value,
 /// mark that slot as allocated, and reuse the same slot for this safepoint.
-/// This helps to avoid series of loads and stores that only serve to resuffle
+/// This helps to avoid series of loads and stores that only serve to reshuffle
 /// values on the stack between calls.
 static void reservePreviousStackSlotForValue(const Value *IncomingValue,
                                              SelectionDAGBuilder &Builder) {
@@ -255,7 +259,7 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
                                    SmallVectorImpl<const Value *> &Relocs,
                                    SelectionDAGBuilder &Builder) {
 
-  // This is horribly ineffecient, but I don't care right now
+  // This is horribly inefficient, but I don't care right now
   SmallSet<SDValue, 64> Seen;
 
   SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs;
@@ -283,13 +287,29 @@ static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases,
 /// call node. Also update NodeMap so that getValue(statepoint) will
 /// reference lowered call result
 static SDNode *
-lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
+lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB,
                         SelectionDAGBuilder &Builder,
                         SmallVectorImpl<SDValue> &PendingExports) {
 
   ImmutableCallSite CS(ISP.getCallSite());
 
-  SDValue ActualCallee = Builder.getValue(ISP.getCalledValue());
+  SDValue ActualCallee;
+
+  if (ISP.getNumPatchBytes() > 0) {
+    // If we've been asked to emit a nop sequence instead of a call instruction
+    // for this statepoint then don't lower the call target, but use a constant
+    // `null` instead.  Not lowering the call target lets statepoint clients get
+    // away without providing a physical address for the symbolic call target at
+    // link time.
+
+    const auto &TLI = Builder.DAG.getTargetLoweringInfo();
+    const auto &DL = Builder.DAG.getDataLayout();
+
+    unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace();
+    ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(),
+                                           TLI.getPointerTy(DL, AS));
+  } else
+    ActualCallee = Builder.getValue(ISP.getCalledValue());
 
   assert(CS.getCallingConv() != CallingConv::AnyReg &&
          "anyregcc is not supported on statepoints!");
@@ -300,7 +320,7 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
   SDValue ReturnValue, CallEndVal;
   std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands(
       ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos,
-      ISP.getNumCallArgs(), ActualCallee, DefTy, LandingPad,
+      ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB,
       false /* IsPatchPoint */);
 
   SDNode *CallEnd = CallEndVal.getNode();
@@ -317,25 +337,33 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
   //   ch, glue = callseq_end ch, glue
   //   get_return_value ch, glue
   //
-  // get_return_value can either be a CopyFromReg to grab the return value from
-  // %RAX, or it can be a LOAD to load a value returned by reference via a stack
-  // slot.
+  // get_return_value can either be a sequence of CopyFromReg instructions
+  // to grab the return value from the return register(s), or it can be a LOAD
+  // to load a value returned by reference via a stack slot.
 
-  if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg ||
-                 CallEnd->getOpcode() == ISD::LOAD))
-    CallEnd = CallEnd->getOperand(0).getNode();
+  if (HasDef) {
+    if (CallEnd->getOpcode() == ISD::LOAD)
+      CallEnd = CallEnd->getOperand(0).getNode();
+    else
+      while (CallEnd->getOpcode() == ISD::CopyFromReg)
+        CallEnd = CallEnd->getOperand(0).getNode();
+  }
 
   assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!");
 
-  if (HasDef) {
-    if (CS.isInvoke()) {
-      // Result value will be used in different basic block for invokes
-      // so we need to export it now. But statepoint call has a different type
-      // than the actuall call. It means that standart exporting mechanism will
-      // create register of the wrong type. So instead we need to create
-      // register with correct type and save value into it manually.
+  // Export the result value if needed
+  const Instruction *GCResult = ISP.getGCResult();
+  if (HasDef && GCResult) {
+    if (GCResult->getParent() != CS.getParent()) {
+      // Result value will be used in a different basic block so we need to
+      // export it now.
+      // Default exporting mechanism will not work here because statepoint call
+      // has a different type than the actual call. It means that by default
+      // llvm will create export register of the wrong type (always i32 in our
+      // case). So instead we need to create export register with correct type
+      // manually.
       // TODO: To eliminate this problem we can remove gc.result intrinsics
-      //       completelly and make statepoint call to return a tuple.
+      //       completely and make statepoint call to return a tuple.
       unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType());
       RegsForValue RFV(
           *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(),
@@ -347,8 +375,9 @@ lowerCallFromStatepoint(ImmutableStatepoint ISP, MachineBasicBlock *LandingPad,
       PendingExports.push_back(Chain);
       Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg;
     } else {
-      // The value of the statepoint itself will be the value of call itself.
-      // We'll replace the actually call node shortly.  gc_result will grab
+      // Result value will be used in a same basic block. Don't export it or
+      // perform any explicit register copies.
+      // We'll replace the actuall call node shortly. gc_result will grab
       // this value.
       Builder.setValue(CS.getInstruction(), ReturnValue);
     }
@@ -411,7 +440,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
     //       chaining stores one after another, this may allow
     //       a bit more optimal scheduling for them
     Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
-                                 MachinePointerInfo::getFixedStack(Index),
+                                 MachinePointerInfo::getFixedStack(
+                                     Builder.DAG.getMachineFunction(), Index),
                                  false, false, 0);
 
     Builder.StatepointLowering.setLocation(Incoming, Loc);
@@ -483,21 +513,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // to the GCStrategy from there (yet).
   GCStrategy &S = Builder.GFI->getStrategy();
   for (const Value *V : Bases) {
-    auto Opt = S.isGCManagedPointer(V);
+    auto Opt = S.isGCManagedPointer(V->getType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() &&
              "non gc managed base pointer found in statepoint");
     }
   }
   for (const Value *V : Ptrs) {
-    auto Opt = S.isGCManagedPointer(V);
+    auto Opt = S.isGCManagedPointer(V->getType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() &&
              "non gc managed derived pointer found in statepoint");
     }
   }
   for (const Value *V : Relocations) {
-    auto Opt = S.isGCManagedPointer(V);
+    auto Opt = S.isGCManagedPointer(V->getType());
     if (Opt.hasValue()) {
       assert(Opt.getValue() && "non gc managed pointer relocated");
     }
@@ -581,19 +611,21 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
       SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex();
     } else {
       // Record value as visited, but not spilled. This is case for allocas
-      // and constants. For this values we can avoid emiting spill load while
+      // and constants. For this values we can avoid emitting spill load while
       // visiting corresponding gc_relocate.
       // Actually we do not need to record them in this map at all.
-      // We do this only to check that we are not relocating any unvisited value.
+      // We do this only to check that we are not relocating any unvisited
+      // value.
       SpillMap[V] = None;
 
       // Default llvm mechanisms for exporting values which are used in
       // different basic blocks does not work for gc relocates.
       // Note that it would be incorrect to teach llvm that all relocates are
-      // uses of the corresponging values so that it would automatically
+      // uses of the corresponding values so that it would automatically
       // export them. Relocates of the spilled values does not use original
       // value.
-      if (StatepointSite.getCallSite().isInvoke())
+      if (RelocateOpers.getUnderlyingCallSite().getParent() !=
+          StatepointInstr->getParent())
         Builder.ExportFromCurrentBlock(V);
     }
   }
@@ -608,7 +640,7 @@ void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) {
 }
 
 void SelectionDAGBuilder::LowerStatepoint(
-    ImmutableStatepoint ISP, MachineBasicBlock *LandingPad /*=nullptr*/) {
+    ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) {
   // The basic scheme here is that information about both the original call and
   // the safepoint is encoded in the CallInst.  We create a temporary call and
   // lower it, then reverse engineer the calling sequence.
@@ -620,14 +652,12 @@ void SelectionDAGBuilder::LowerStatepoint(
   ImmutableCallSite CS(ISP.getCallSite());
 
 #ifndef NDEBUG
-  // Consistency check. Don't do this for invokes. It would be too
-  // expensive to preserve this information across different basic blocks
-  if (!CS.isInvoke()) {
-    for (const User *U : CS->users()) {
-      const CallInst *Call = cast<CallInst>(U);
-      if (isGCRelocate(Call))
-        StatepointLowering.scheduleRelocCall(*Call);
-    }
+  // Consistency check. Check only relocates in the same basic block as thier
+  // statepoint.
+  for (const User *U : CS->users()) {
+    const CallInst *Call = cast<CallInst>(U);
+    if (isGCRelocate(Call) && Call->getParent() == CS.getParent())
+      StatepointLowering.scheduleRelocCall(*Call);
   }
 #endif
 
@@ -648,7 +678,7 @@ void SelectionDAGBuilder::LowerStatepoint(
 
   // Get call node, we will replace it later with statepoint
   SDNode *CallNode =
-      lowerCallFromStatepoint(ISP, LandingPad, *this, PendingExports);
+      lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports);
 
   // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END
   // nodes with all the appropriate arguments and return values.
@@ -790,7 +820,7 @@ void SelectionDAGBuilder::LowerStatepoint(
 
   // Replace original call
   DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root
-  // Remove originall call node
+  // Remove original call node
   DAG.DeleteNode(CallNode);
 
   // DON'T set the root - under the assumption that it's already set past the
@@ -809,8 +839,9 @@ void SelectionDAGBuilder::visitGCResult(const CallInst &CI) {
   Instruction *I = cast<Instruction>(CI.getArgOperand(0));
   assert(isStatepoint(I) && "first argument must be a statepoint token");
 
-  if (isa<InvokeInst>(I)) {
-    // For invokes we should have stored call result in a virtual register.
+  if (I->getParent() != CI.getParent()) {
+    // Statepoint is in different basic block so we should have stored call
+    // result in a virtual register.
     // We can not use default getValue() functionality to copy value from this
     // register because statepoint and actuall call return types can be
     // different, and getValue() will use CopyFromReg of the wrong type,
@@ -833,9 +864,10 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
 
 #ifndef NDEBUG
   // Consistency check
-  // We skip this check for invoke statepoints. It would be too expensive to
-  // preserve validation info through different basic blocks.
-  if (!RelocateOpers.isTiedToInvoke()) {
+  // We skip this check for relocates not in the same basic block as thier
+  // statepoint. It would be too expensive to preserve validation info through
+  // different basic blocks.
+  if (RelocateOpers.getStatepoint()->getParent() == CI.getParent()) {
     StatepointLowering.relocCallVisited(CI);
   }
 #endif
@@ -862,13 +894,14 @@ void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) {
 
   // Be conservative: flush all pending loads
   // TODO: Probably we can be less restrictive on this,
-  // it may allow more scheduling opprtunities
+  // it may allow more scheduling opportunities.
   SDValue Chain = getRoot();
 
   SDValue SpillLoad =
-    DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
-                MachinePointerInfo::getFixedStack(*DerivedPtrLocation),
-                false, false, false, 0);
+      DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot,
+                  MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
+                                                    *DerivedPtrLocation),
+                  false, false, false, 0);
 
   // Again, be conservative, don't emit pending loads
   DAG.setRoot(SpillLoad.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index fbf651277c7f..c64d882d69a4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -85,21 +85,22 @@ void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS,
 std::pair<SDValue, SDValue>
 TargetLowering::makeLibCall(SelectionDAG &DAG,
                             RTLIB::Libcall LC, EVT RetVT,
-                            const SDValue *Ops, unsigned NumOps,
+                            ArrayRef<SDValue> Ops,
                             bool isSigned, SDLoc dl,
                             bool doesNotReturn,
                             bool isReturnValueUsed) const {
   TargetLowering::ArgListTy Args;
-  Args.reserve(NumOps);
+  Args.reserve(Ops.size());
 
   TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0; i != NumOps; ++i) {
-    Entry.Node = Ops[i];
+  for (SDValue Op : Ops) {
+    Entry.Node = Op;
     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
-    Entry.isSExt = shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
-    Entry.isZExt = !shouldSignExtendTypeInLibCall(Ops[i].getValueType(), isSigned);
+    Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+    Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
     Args.push_back(Entry);
   }
+
   if (LC == RTLIB::UNKNOWN_LIBCALL)
     report_fatal_error("Unsupported library call operation!");
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
@@ -115,9 +116,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
   return LowerCallTo(CLI);
 }
 
-
-/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
-/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+/// Soften the operands of a comparison. This code is shared among BR_CC,
+/// SELECT_CC, and SETCC handlers.
 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
                                          SDValue &NewLHS, SDValue &NewRHS,
                                          ISD::CondCode &CCCode,
@@ -127,6 +127,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
 
   // Expand into one or more soft-fp libcall(s).
   RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+  bool ShouldInvertCC = false;
   switch (CCCode) {
   case ISD::SETEQ:
   case ISD::SETOEQ:
@@ -166,34 +167,38 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
     LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
           (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128;
     break;
-  default:
+  case ISD::SETONE:
+    // SETONE = SETOLT | SETOGT
+    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+          (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
+    LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+          (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+    break;
+  case ISD::SETUEQ:
     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
           (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128;
+    LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
+          (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+    break;
+  default:
+    // Invert CC for unordered comparisons
+    ShouldInvertCC = true;
     switch (CCCode) {
-    case ISD::SETONE:
-      // SETONE = SETOLT | SETOGT
-      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
-            (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
-      // Fallthrough
-    case ISD::SETUGT:
-      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
-            (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
-      break;
-    case ISD::SETUGE:
-      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
+    case ISD::SETULT:
+      LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
             (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128;
       break;
-    case ISD::SETULT:
-      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
-            (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
-      break;
     case ISD::SETULE:
-      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
+      LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
+            (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128;
+      break;
+    case ISD::SETUGT:
+      LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
             (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128;
       break;
-    case ISD::SETUEQ:
-      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
-            (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128;
+    case ISD::SETUGE:
+      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
+            (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128;
       break;
     default: llvm_unreachable("Do not know how to soften this setcc!");
     }
@@ -201,17 +206,21 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
 
   // Use the target specific return value for comparions lib calls.
   EVT RetVT = getCmpLibcallReturnType();
-  SDValue Ops[2] = { NewLHS, NewRHS };
-  NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/,
+  SDValue Ops[2] = {NewLHS, NewRHS};
+  NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
                        dl).first;
   NewRHS = DAG.getConstant(0, dl, RetVT);
+
   CCCode = getCmpLibcallCC(LC1);
+  if (ShouldInvertCC)
+    CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
+
   if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
     SDValue Tmp = DAG.getNode(
         ISD::SETCC, dl,
         getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
         NewLHS, NewRHS, DAG.getCondCode(CCCode));
-    NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/,
+    NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
                          dl).first;
     NewLHS = DAG.getNode(
         ISD::SETCC, dl,
@@ -222,9 +231,8 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   }
 }
 
-/// getJumpTableEncoding - Return the entry encoding for a jump table in the
-/// current function.  The returned value is a member of the
-/// MachineJumpTableInfo::JTEntryKind enum.
+/// Return the entry encoding for a jump table in the current function. The
+/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
 unsigned TargetLowering::getJumpTableEncoding() const {
   // In non-pic modes, just use the address of a block.
   if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
@@ -250,9 +258,8 @@ SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   return Table;
 }
 
-/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
-/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
-/// MCExpr.
+/// This returns the relocation base for the given PIC jumptable, the same as
+/// getPICJumpTableRelocBase, but as an MCExpr.
 const MCExpr *
 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
                                              unsigned JTI,MCContext &Ctx) const{
@@ -279,10 +286,9 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
 //  Optimization Methods
 //===----------------------------------------------------------------------===//
 
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
-/// specified instruction is a constant integer.  If so, check to see if there
-/// are any bits set in the constant that are not demanded.  If so, shrink the
-/// constant and return true.
+/// Check to see if the specified operand of the specified instruction is a
+/// constant integer. If so, check to see if there are any bits set in the
+/// constant that are not demanded. If so, shrink the constant and return true.
 bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
                                                         const APInt &Demanded) {
   SDLoc dl(Op);
@@ -317,10 +323,9 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
   return false;
 }
 
-/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
-/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
-/// cast, but it could be generalized for targets with other types of
-/// implicit widening casts.
+/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
+/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
+/// generalized for targets with other types of implicit widening casts.
 bool
 TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
                                                     unsigned BitWidth,
@@ -366,13 +371,13 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   return false;
 }
 
-/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
-/// DemandedMask bits of the result of Op are ever used downstream.  If we can
-/// use this information to simplify Op, create a new simplified DAG node and
-/// return true, returning the original and new nodes in Old and New. Otherwise,
-/// analyze the expression and return a mask of KnownOne and KnownZero bits for
-/// the expression (used to simplify the caller).  The KnownZero/One bits may
-/// only be accurate for those bits in the DemandedMask.
+/// Look at Op. At this point, we know that only the DemandedMask bits of the
+/// result of Op are ever used downstream. If we can use this information to
+/// simplify Op, create a new simplified DAG node and return true, returning the
+/// original and new nodes in Old and New. Otherwise, analyze the expression and
+/// return a mask of KnownOne and KnownZero bits for the expression (used to
+/// simplify the caller).  The KnownZero/One bits may only be accurate for those
+/// bits in the DemandedMask.
 bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                           const APInt &DemandedMask,
                                           APInt &KnownZero,
@@ -1061,7 +1066,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
-      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
+      if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() &&
+           Op.getOperand(0).getValueType() != MVT::f128) {
+        // Cannot eliminate/lower SHL for f128 yet.
         EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32;
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
@@ -1120,9 +1127,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   return false;
 }
 
-/// computeKnownBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
+/// Determine which of the bits specified in Mask are known to be either zero or
+/// one and return them in the KnownZero/KnownOne bitsets.
 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
                                                    APInt &KnownZero,
                                                    APInt &KnownOne,
@@ -1137,9 +1143,8 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
 }
 
-/// ComputeNumSignBitsForTargetNode - This method can be implemented by
-/// targets that want to expose additional information about sign bits to the
-/// DAG Combiner.
+/// This method can be implemented by targets that want to expose additional
+/// information about sign bits to the DAG Combiner.
 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
                                                          const SelectionDAG &,
                                                          unsigned Depth) const {
@@ -1152,10 +1157,8 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
   return 1;
 }
 
-/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
-/// one bit set. This differs from computeKnownBits in that it doesn't need to
-/// determine which bit is set.
-///
+/// Test if the given value is known to have exactly one bit set. This differs
+/// from computeKnownBits in that it doesn't need to determine which bit is set.
 static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
   // A left-shift of a constant one will have exactly one bit set, because
   // shifting the bit off the end is undefined.
@@ -1239,8 +1242,8 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
   return CN->isNullValue();
 }
 
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
-/// and cc. If it is unable to simplify it, return a null SDValue.
+/// Try to simplify a setcc built with the specified operands and cc. If it is
+/// unable to simplify it, return a null SDValue.
 SDValue
 TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
                               ISD::CondCode Cond, bool foldBooleans,
@@ -1270,7 +1273,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
 
-  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+  if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
     // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
@@ -1335,7 +1338,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         PreExt = N0->getOperand(0);
       } else if (N0->getOpcode() == ISD::AND) {
         // DAGCombine turns costly ZExts into ANDs
-        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
+        if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
           if ((C->getAPIntValue()+1).isPowerOf2()) {
             MinBits = C->getAPIntValue().countTrailingOnes();
             PreExt = N0->getOperand(0);
@@ -1345,7 +1348,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         MinBits = N0->getOperand(0).getValueSizeInBits();
         PreExt = N0->getOperand(0);
         Signed = true;
-      } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) {
+      } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
         // ZEXTLOAD / SEXTLOAD
         if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
           MinBits = LN0->getMemoryVT().getSizeInBits();
@@ -1697,8 +1700,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
          (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
         N0.getOpcode() == ISD::AND) {
       auto &DL = DAG.getDataLayout();
-      if (ConstantSDNode *AndRHS =
-                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
         EVT ShiftTy = DCI.isBeforeLegalize()
                           ? getPointerTy(DL)
                           : getShiftAmountTy(N0.getValueType(), DL);
@@ -1728,8 +1730,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // (X & -256) == 256 -> (X >> 8) == 1
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
           N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
-        if (ConstantSDNode *AndRHS =
-            dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           const APInt &AndRHSC = AndRHS->getAPIntValue();
           if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
             unsigned ShiftBits = AndRHSC.countTrailingZeros();
@@ -1783,7 +1784,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
     // Constant fold or commute setcc.
     SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
     if (O.getNode()) return O;
-  } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+  } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
     // If the RHS of an FP comparison is a constant, simplify it away in
     // some cases.
     if (CFP->getValueAPF().isNaN()) {
@@ -1900,8 +1901,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       // to be careful about increasing register pressure needlessly.
       bool LegalRHSImm = false;
 
-      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
-        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
           // Turn (X+C1) == C2 --> X == C2-C1
           if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
             return DAG.getSetCC(dl, VT, N0.getOperand(0),
@@ -1924,7 +1925,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         }
 
         // Turn (C1-X) == C2 --> X == C1-C2
-        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+        if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
           if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
             return
               DAG.getSetCC(dl, VT, N0.getOperand(1),
@@ -2075,12 +2076,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   return SDValue();
 }
 
-/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
-/// node is a GlobalAddress + offset.
+/// Returns true (and the GlobalValue and the offset) if the node is a
+/// GlobalAddress + offset.
 bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
                                     int64_t &Offset) const {
-  if (isa<GlobalAddressSDNode>(N)) {
-    GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+  if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
     GA = GASD->getGlobal();
     Offset += GASD->getOffset();
     return true;
@@ -2090,14 +2090,12 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
     SDValue N1 = N->getOperand(0);
     SDValue N2 = N->getOperand(1);
     if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
-      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
-      if (V) {
+      if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
         Offset += V->getSExtValue();
         return true;
       }
     } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
-      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
-      if (V) {
+      if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
         Offset += V->getSExtValue();
         return true;
       }
@@ -2107,9 +2105,8 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
   return false;
 }
 
-
-SDValue TargetLowering::
-PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+SDValue TargetLowering::PerformDAGCombine(SDNode *N,
+                                          DAGCombinerInfo &DCI) const {
   // Default implementation: no optimization.
   return SDValue();
 }
@@ -2159,9 +2156,9 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
   return C_Unknown;
 }
 
-/// LowerXConstraint - try to replace an X constraint, which matches anything,
-/// with another that has more specific requirements based on the type of the
-/// corresponding operand.
+/// Try to replace an X constraint, which matches anything, with another that
+/// has more specific requirements based on the type of the corresponding
+/// operand.
 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
   if (ConstraintVT.isInteger())
     return "r";
@@ -2170,8 +2167,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
   return nullptr;
 }
 
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector.  If it is invalid, don't add anything to Ops.
+/// Lower the specified operand into the Ops vector.
+/// If it is invalid, don't add anything to Ops.
 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                   std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
@@ -2284,31 +2281,30 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
 //===----------------------------------------------------------------------===//
 // Constraint Selection.
 
-/// isMatchingInputConstraint - Return true of this is an input operand that is
-/// a matching constraint like "4".
+/// Return true of this is an input operand that is a matching constraint like
+/// "4".
 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
   assert(!ConstraintCode.empty() && "No known constraint!");
   return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
 }
 
-/// getMatchedOperand - If this is an input matching constraint, this method
-/// returns the output operand it matches.
+/// If this is an input matching constraint, this method returns the output
+/// operand it matches.
 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
   assert(!ConstraintCode.empty() && "No known constraint!");
   return atoi(ConstraintCode.c_str());
 }
 
-
-/// ParseConstraints - Split up the constraint string from the inline
-/// assembly value into the specific constraints and their prefixes,
-/// and also tie in the associated operand values.
+/// Split up the constraint string from the inline assembly value into the
+/// specific constraints and their prefixes, and also tie in the associated
+/// operand values.
 /// If this returns an empty vector, and if the constraint string itself
 /// isn't empty, there was an error parsing.
 TargetLowering::AsmOperandInfoVector
 TargetLowering::ParseConstraints(const DataLayout &DL,
                                  const TargetRegisterInfo *TRI,
                                  ImmutableCallSite CS) const {
-  /// ConstraintOperands - Information about all of the constraints.
+  /// Information about all of the constraints.
   AsmOperandInfoVector ConstraintOperands;
   const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
   unsigned maCount = 0; // Largest number of multiple alternative constraints.
@@ -2483,16 +2479,13 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
                              " incompatible type!");
         }
       }
-
     }
   }
 
   return ConstraintOperands;
 }
 
-
-/// getConstraintGenerality - Return an integer indicating how general CT
-/// is.
+/// Return an integer indicating how general CT is.
 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
   switch (CT) {
   case TargetLowering::C_Other:
@@ -2581,8 +2574,8 @@ TargetLowering::ConstraintWeight
   return weight;
 }
 
-/// ChooseConstraint - If there are multiple different constraints that we
-/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// If there are multiple different constraints that we could pick for this
+/// operand (e.g. "imr") try to pick the 'best' one.
 /// This is somewhat tricky: constraints fall into four classes:
 ///    Other         -> immediates and magic values
 ///    Register      -> one specific register
@@ -2649,9 +2642,8 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
   OpInfo.ConstraintType = BestType;
 }
 
-/// ComputeConstraintToUse - Determines the constraint code and constraint
-/// type to use for the specific AsmOperandInfo, setting
-/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+/// Determines the constraint code and constraint type to use for the specific
+/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
                                             SDValue Op,
                                             SelectionDAG *DAG) const {
@@ -2717,6 +2709,16 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
   return Mul;
 }
 
+SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
+                                      SelectionDAG &DAG,
+                                      std::vector<SDNode *> *Created) const {
+  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.isIntDivCheap(N->getValueType(0), Attr))
+    return SDValue(N,0); // Lower SDIV as SDIV
+  return SDValue();
+}
+
 /// \brief Given an ISD::SDIV node expressing a divide by constant,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.
@@ -3036,3 +3038,46 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
       DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
   return true;
 }
+
+//===----------------------------------------------------------------------===//
+// Implementation of Emulated TLS Model
+//===----------------------------------------------------------------------===//
+
+SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
+                                                SelectionDAG &DAG) const {
+  // Access to address of TLS varialbe xyz is lowered to a function call:
+  //   __emutls_get_address( address of global variable named "__emutls_v.xyz" )
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
+  SDLoc dl(GA);
+
+  ArgListTy Args;
+  ArgListEntry Entry;
+  std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
+  Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
+  StringRef EmuTlsVarName(NameString);
+  GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
+  if (!EmuTlsVar)
+    EmuTlsVar = dyn_cast_or_null<GlobalVariable>(
+        VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType));
+  Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
+  Entry.Ty = VoidPtrType;
+  Args.push_back(Entry);
+
+  SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
+  CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0);
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+  // At last for X86 targets, maybe good for other targets too?
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setAdjustsStack(true);  // Is this only for X86 target?
+  MFI->setHasCalls(true);
+
+  assert((GA->getOffset() == 0) &&
+         "Emulated TLS must have zero offset in GlobalAddressSDNode");
+  return CallResult.first;
+}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index e7b2a8e72d2c..878eeeed0f6a 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -112,7 +112,7 @@ public:
     case 1:
       // Find all 'return', 'resume', and 'unwind' instructions.
       while (StateBB != StateE) {
-        BasicBlock *CurBB = StateBB++;
+        BasicBlock *CurBB = &*StateBB++;
 
         // Branches and invokes do not escape, only unwind, resume, and return
         // do.
@@ -120,7 +120,7 @@ public:
         if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
           continue;
 
-        Builder.SetInsertPoint(TI->getParent(), TI);
+        Builder.SetInsertPoint(TI);
         return &Builder;
       }
 
@@ -163,8 +163,8 @@ public:
 
         // Split the basic block containing the function call.
         BasicBlock *CallBB = CI->getParent();
-        BasicBlock *NewBB =
-            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+        BasicBlock *NewBB = CallBB->splitBasicBlock(
+            CI->getIterator(), CallBB->getName() + ".cont");
 
         // Remove the unconditional branch inserted at the end of CallBB.
         CallBB->getInstList().pop_back();
@@ -184,7 +184,7 @@ public:
         delete CI;
       }
 
-      Builder.SetInsertPoint(RI->getParent(), RI);
+      Builder.SetInsertPoint(RI);
       return &Builder;
     }
   }
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index 4463cc7d3c51..f8aa1e2b0b9a 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -43,9 +43,11 @@
 // points must be in the same loop.
 // Property #3 is ensured via the MachineBlockFrequencyInfo.
 //
-// If this pass found points matching all this properties, then
-// MachineFrameInfo is updated this that information.
+// If this pass found points matching all these properties, then
+// MachineFrameInfo is updated with this information.
 //===----------------------------------------------------------------------===//
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 // To check for profitability.
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -61,11 +63,14 @@
 #include "llvm/CodeGen/Passes.h"
 // To know about callee-saved.
 #include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
 // To query the target about frame lowering.
 #include "llvm/Target/TargetFrameLowering.h"
 // To know about frame setup operation.
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
 // To access TargetInstrInfo.
 #include "llvm/Target/TargetSubtargetInfo.h"
 
@@ -78,6 +83,10 @@ STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
 STATISTIC(NumCandidatesDropped,
           "Number of shrink-wrapping candidates dropped because of frequency");
 
+static cl::opt<cl::boolOrDefault>
+    EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
+                        cl::desc("enable the shrink-wrapping pass"));
+
 namespace {
 /// \brief Class to determine where the safe point to insert the
 /// prologue and epilogue are.
@@ -113,18 +122,38 @@ class ShrinkWrap : public MachineFunctionPass {
   unsigned FrameDestroyOpcode;
   /// Entry block.
   const MachineBasicBlock *Entry;
+  typedef SmallSetVector<unsigned, 16> SetOfRegs;
+  /// Registers that need to be saved for the current function.
+  mutable SetOfRegs CurrentCSRs;
+  /// Current MachineFunction.
+  MachineFunction *MachineFunc;
 
   /// \brief Check if \p MI uses or defines a callee-saved register or
   /// a frame index. If this is the case, this means \p MI must happen
   /// after Save and before Restore.
-  bool useOrDefCSROrFI(const MachineInstr &MI) const;
+  bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+
+  const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
+    if (CurrentCSRs.empty()) {
+      BitVector SavedRegs;
+      const TargetFrameLowering *TFI =
+          MachineFunc->getSubtarget().getFrameLowering();
+
+      TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS);
+
+      for (int Reg = SavedRegs.find_first(); Reg != -1;
+           Reg = SavedRegs.find_next(Reg))
+        CurrentCSRs.insert((unsigned)Reg);
+    }
+    return CurrentCSRs;
+  }
 
   /// \brief Update the Save and Restore points such that \p MBB is in
   /// the region that is dominated by Save and post-dominated by Restore
   /// and Save and Restore still match the safe point definition.
   /// Such point may not exist and Save and/or Restore may be null after
   /// this call.
-  void updateSaveRestorePoints(MachineBasicBlock &MBB);
+  void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
 
   /// \brief Initialize the pass for \p MF.
   void init(MachineFunction &MF) {
@@ -140,6 +169,8 @@ class ShrinkWrap : public MachineFunctionPass {
     FrameSetupOpcode = TII.getCallFrameSetupOpcode();
     FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
     Entry = &MF.front();
+    CurrentCSRs.clear();
+    MachineFunc = &MF;
 
     ++NumFunc;
   }
@@ -148,6 +179,9 @@ class ShrinkWrap : public MachineFunctionPass {
   /// shrink-wrapping.
   bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
 
+  /// \brief Check if shrink wrapping is enabled for this target and function.
+  static bool isShrinkWrapEnabled(const MachineFunction &MF);
+
 public:
   static char ID;
 
@@ -185,27 +219,34 @@ INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false)
 
-bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI) const {
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
+                                 RegScavenger *RS) const {
   if (MI.getOpcode() == FrameSetupOpcode ||
       MI.getOpcode() == FrameDestroyOpcode) {
     DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
     return true;
   }
   for (const MachineOperand &MO : MI.operands()) {
-    bool UseCSR = false;
+    bool UseOrDefCSR = false;
     if (MO.isReg()) {
       unsigned PhysReg = MO.getReg();
       if (!PhysReg)
         continue;
       assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
              "Unallocated register?!");
-      UseCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+      UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg);
+    } else if (MO.isRegMask()) {
+      // Check if this regmask clobbers any of the CSRs.
+      for (unsigned Reg : getCurrentCSRs(RS)) {
+        if (MO.clobbersPhysReg(Reg)) {
+          UseOrDefCSR = true;
+          break;
+        }
+      }
     }
-    // TODO: Handle regmask more accurately.
-    // For now, be conservative about them.
-    if (UseCSR || MO.isFI() || MO.isRegMask()) {
-      DEBUG(dbgs() << "Use or define CSR(" << UseCSR << ") or FI(" << MO.isFI()
-                   << "): " << MI << '\n');
+    if (UseOrDefCSR || MO.isFI()) {
+      DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
+                   << MO.isFI() << "): " << MI << '\n');
       return true;
     }
   }
@@ -225,7 +266,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
   return IDom;
 }
 
-void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
+void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
+                                         RegScavenger *RS) {
   // Get rid of the easy cases first.
   if (!Save)
     Save = &MBB;
@@ -246,7 +288,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
   // terminator.
   if (Restore == &MBB) {
     for (const MachineInstr &Terminator : MBB.terminators()) {
-      if (!useOrDefCSROrFI(Terminator))
+      if (!useOrDefCSROrFI(Terminator, RS))
         continue;
       // One of the terminator needs to happen before the restore point.
       if (MBB.succ_empty()) {
@@ -277,7 +319,24 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
   while (Save && Restore &&
          (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) ||
           !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) ||
-          MLI->getLoopFor(Save) != MLI->getLoopFor(Restore))) {
+          // Post-dominance is not enough in loops to ensure that all uses/defs
+          // are after the prologue and before the epilogue at runtime.
+          // E.g.,
+          // while(1) {
+          //  Save
+          //  Restore
+          //   if (...)
+          //     break;
+          //  use/def CSRs
+          // }
+          // All the uses/defs of CSRs are dominated by Save and post-dominated
+          // by Restore. However, the CSRs uses are still reachable after
+          // Restore and before Save are executed.
+          //
+          // For now, just push the restore/save points outside of loops.
+          // FIXME: Refine the criteria to still find interesting cases
+          // for loops.
+          MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
     // Fix (A).
     if (!SaveDominatesRestore) {
       Save = MDT->findNearestCommonDominator(Save, Restore);
@@ -288,35 +347,72 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB) {
       Restore = MPDT->findNearestCommonDominator(Restore, Save);
 
     // Fix (C).
-    if (Save && Restore && Save != Restore &&
-        MLI->getLoopFor(Save) != MLI->getLoopFor(Restore)) {
-      if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore))
-        // Push Save outside of this loop.
-        Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
-      else
+    if (Save && Restore &&
+        (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) {
+      if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
+        // Push Save outside of this loop if immediate dominator is different
+        // from save block. If immediate dominator is not different, bail out.
+        MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
+        if (IDom != Save)
+          Save = IDom;
+        else {
+          Save = nullptr;
+          break;
+        }
+      } else {
+        // If the loop does not exit, there is no point in looking
+        // for a post-dominator outside the loop.
+        SmallVector<MachineBasicBlock*, 4> ExitBlocks;
+        MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks);
         // Push Restore outside of this loop.
-        Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+        // Look for the immediate post-dominator of the loop exits.
+        MachineBasicBlock *IPdom = Restore;
+        for (MachineBasicBlock *LoopExitBB: ExitBlocks) {
+          IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT);
+          if (!IPdom)
+            break;
+        }
+        // If the immediate post-dominator is not in a less nested loop,
+        // then we are stuck in a program with an infinite loop.
+        // In that case, we will not find a safe point, hence, bail out.
+        if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore))
+          Restore = IPdom;
+        else {
+          Restore = nullptr;
+          break;
+        }
+      }
     }
   }
 }
 
 bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.empty())
+  if (MF.empty() || !isShrinkWrapEnabled(MF))
     return false;
+
   DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
 
   init(MF);
 
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  std::unique_ptr<RegScavenger> RS(
+      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
   for (MachineBasicBlock &MBB : MF) {
     DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName()
                  << '\n');
 
+    if (MBB.isEHFuncletEntry()) {
+      DEBUG(dbgs() << "EH Funclets are not supported yet.\n");
+      return false;
+    }
+
     for (const MachineInstr &MI : MBB) {
-      if (!useOrDefCSROrFI(MI))
+      if (!useOrDefCSROrFI(MI, RS.get()))
         continue;
       // Save (resp. restore) point must dominate (resp. post dominate)
       // MI. Look for the proper basic block for those.
-      updateSaveRestorePoints(MBB);
+      updateSaveRestorePoints(MBB, RS.get());
       // If we are at a point where we cannot improve the placement of
       // save/restore instructions, just give up.
       if (!ArePointsInteresting()) {
@@ -368,7 +464,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
         break;
       NewBB = Restore;
     }
-    updateSaveRestorePoints(*NewBB);
+    updateSaveRestorePoints(*NewBB, RS.get());
   } while (Save && Restore);
 
   if (!ArePointsInteresting()) {
@@ -386,3 +482,30 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   ++NumCandidates;
   return false;
 }
+
+bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+
+  switch (EnableShrinkWrapOpt) {
+  case cl::BOU_UNSET:
+    return TFI->enableShrinkWrapping(MF) &&
+      // Windows with CFI has some limitations that make it impossible
+      // to use shrink-wrapping.
+      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
+      // Sanitizers look at the value of the stack at the location
+      // of the crash. Since a crash can happen anywhere, the
+      // frame must be lowered before anything else happen for the
+      // sanitizers to be able to get a correct stack frame.
+      !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
+        MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) ||
+        MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory));
+  // If EnableShrinkWrap is set, it takes precedence on whatever the
+  // target sets. The rational is that we assume we want to test
+  // something related to shrink-wrapping.
+  case cl::BOU_TRUE:
+    return true;
+  case cl::BOU_FALSE:
+    return false;
+  }
+  llvm_unreachable("Invalid shrink-wrapping state");
+}
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index d236e1f5ab6f..e1f242a08de1 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -50,7 +50,7 @@ class SjLjEHPrepare : public FunctionPass {
   Type *FunctionContextTy;
   Constant *RegisterFn;
   Constant *UnregisterFn;
-  Constant *BuiltinSetjmpFn;
+  Constant *BuiltinSetupDispatchFn;
   Constant *FrameAddrFn;
   Constant *StackAddrFn;
   Constant *StackRestoreFn;
@@ -112,7 +112,8 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
   FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
   StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
   StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
-  BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+  BuiltinSetupDispatchFn =
+    Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch);
   LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
   CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
   FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
@@ -178,8 +179,8 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
   // values and replace the LPI with that aggregate.
   Type *LPadType = LPI->getType();
   Value *LPadVal = UndefValue::get(LPadType);
-  IRBuilder<> Builder(
-      std::next(BasicBlock::iterator(cast<Instruction>(SelVal))));
+  auto *SelI = cast<Instruction>(SelVal);
+  IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator()));
   LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val");
   LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val");
 
@@ -190,7 +191,7 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal,
 /// it with all of the data that we know at this point.
 Value *SjLjEHPrepare::setupFunctionContext(Function &F,
                                            ArrayRef<LandingPadInst *> LPads) {
-  BasicBlock *EntryBB = F.begin();
+  BasicBlock *EntryBB = &F.front();
 
   // Create an alloca for the incoming jump buffer ptr and the new jump buffer
   // that needs to be restored on all exits from the function. This is an alloca
@@ -198,12 +199,13 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
   auto &DL = F.getParent()->getDataLayout();
   unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy);
   FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
-                           EntryBB->begin());
+                           &EntryBB->front());
 
   // Fill in the function context structure.
   for (unsigned I = 0, E = LPads.size(); I != E; ++I) {
     LandingPadInst *LPI = LPads[I];
-    IRBuilder<> Builder(LPI->getParent()->getFirstInsertionPt());
+    IRBuilder<> Builder(LPI->getParent(),
+                        LPI->getParent()->getFirstInsertionPt());
 
     // Reference the __data field.
     Value *FCData =
@@ -250,21 +252,20 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) {
   while (isa<AllocaInst>(AfterAllocaInsPt) &&
          isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize()))
     ++AfterAllocaInsPt;
+  assert(AfterAllocaInsPt != F.front().end());
 
-  for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
-       ++AI) {
-    Type *Ty = AI->getType();
+  for (auto &AI : F.args()) {
+    Type *Ty = AI.getType();
 
     // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction.
     Value *TrueValue = ConstantInt::getTrue(F.getContext());
     Value *UndefValue = UndefValue::get(Ty);
-    Instruction *SI = SelectInst::Create(TrueValue, AI, UndefValue,
-                                         AI->getName() + ".tmp",
-                                         AfterAllocaInsPt);
-    AI->replaceAllUsesWith(SI);
+    Instruction *SI = SelectInst::Create(
+        TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt);
+    AI.replaceAllUsesWith(SI);
 
     // Reset the operand, because it  was clobbered by the RAUW above.
-    SI->setOperand(1, AI);
+    SI->setOperand(1, &AI);
   }
 }
 
@@ -279,7 +280,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
       // Ignore obvious cases we don't have to handle. In particular, most
       // instructions either have no uses or only have a single use inside the
       // current block. Ignore them quickly.
-      Instruction *Inst = II;
+      Instruction *Inst = &*II;
       if (Inst->use_empty())
         continue;
       if (Inst->hasOneUse() &&
@@ -360,7 +361,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F,
       DemotePHIToStack(PN);
 
     // Move the landingpad instruction back to the top of the landing pad block.
-    LPI->moveBefore(UnwindBlock->begin());
+    LPI->moveBefore(&UnwindBlock->front());
   }
 }
 
@@ -400,7 +401,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
 
   Value *FuncCtx =
       setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end()));
-  BasicBlock *EntryBB = F.begin();
+  BasicBlock *EntryBB = &F.front();
   IRBuilder<> Builder(EntryBB->getTerminator());
 
   // Get a reference to the jump buffer.
@@ -421,9 +422,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
   Val = Builder.CreateCall(StackAddrFn, {}, "sp");
   Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true);
 
-  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
-  Value *SetjmpArg = Builder.CreateBitCast(JBufPtr, Builder.getInt8PtrTy());
-  Builder.CreateCall(BuiltinSetjmpFn, SetjmpArg);
+  // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf.
+  Builder.CreateCall(BuiltinSetupDispatchFn, {});
 
   // Store a pointer to the function context so that the back-end will know
   // where to look for it.
@@ -475,7 +475,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) {
         continue;
       }
       Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
-      StackAddr->insertAfter(I);
+      StackAddr->insertAfter(&*I);
       Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
       StoreStackAddr->insertAfter(StackAddr);
     }
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 025ae70ed888..c9d23f67bdee 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -172,8 +172,8 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
   // optionally includes an additional position prior to MBB->begin(), indicated
   // by the includeStart flag. This is done so that we can iterate MIs in a MBB
   // in parallel with SlotIndexes, but there should be a better way to do this.
-  IndexList::iterator ListB = startIdx.listEntry();
-  IndexList::iterator ListI = endIdx.listEntry();
+  IndexList::iterator ListB = startIdx.listEntry()->getIterator();
+  IndexList::iterator ListI = endIdx.listEntry()->getIterator();
   MachineBasicBlock::iterator MBBI = End;
   bool pastStart = false;
   while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 97a5424aa560..d30cfc27bf4b 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -36,7 +36,6 @@
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 
 using namespace llvm;
@@ -188,9 +187,9 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
   BlockFrequencies.resize(mf.getNumBlockIDs());
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
   setThreshold(MBFI->getEntryFreq());
-  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
-    unsigned Num = I->getNumber();
-    BlockFrequencies[Num] = MBFI->getBlockFreq(I);
+  for (auto &I : mf) {
+    unsigned Num = I.getNumber();
+    BlockFrequencies[Num] = MBFI->getBlockFreq(&I);
   }
 
   // We never change the function.
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index dab1dfe4f1f8..51dddabed2d9 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -56,6 +56,7 @@ void SplitAnalysis::clear() {
 
 SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
   const MachineBasicBlock *MBB = MF.getBlockNumbered(Num);
+  // FIXME: Handle multiple EH pad successors.
   const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor();
   std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num];
   SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
@@ -176,10 +177,11 @@ bool SplitAnalysis::calcLiveBlockInfo() {
   UseE = UseSlots.end();
 
   // Loop over basic blocks where CurLI is live.
-  MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  MachineFunction::iterator MFI =
+      LIS.getMBBFromIndex(LVI->start)->getIterator();
   for (;;) {
     BlockInfo BI;
-    BI.MBB = MFI;
+    BI.MBB = &*MFI;
     SlotIndex Start, Stop;
     std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
@@ -259,7 +261,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
     if (LVI->start < Stop)
       ++MFI;
     else
-      MFI = LIS.getMBBFromIndex(LVI->start);
+      MFI = LIS.getMBBFromIndex(LVI->start)->getIterator();
   }
 
   assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count");
@@ -275,8 +277,9 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
   unsigned Count = 0;
 
   // Loop over basic blocks where li is live.
-  MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start);
-  SlotIndex Stop = LIS.getMBBEndIdx(MFI);
+  MachineFunction::const_iterator MFI =
+      LIS.getMBBFromIndex(LVI->start)->getIterator();
+  SlotIndex Stop = LIS.getMBBEndIdx(&*MFI);
   for (;;) {
     ++Count;
     LVI = li->advanceTo(LVI, Stop);
@@ -284,7 +287,7 @@ unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const {
       return Count;
     do {
       ++MFI;
-      Stop = LIS.getMBBEndIdx(MFI);
+      Stop = LIS.getMBBEndIdx(&*MFI);
     } while (Stop <= LVI->start);
   }
 }
@@ -864,9 +867,9 @@ bool SplitEditor::transferValues() {
       // This value has multiple defs in RegIdx, but it wasn't rematerialized,
       // so the live range is accurate. Add live-in blocks in [Start;End) to the
       // LiveInBlocks.
-      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+      MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
       SlotIndex BlockStart, BlockEnd;
-      std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB);
+      std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB);
 
       // The first block may be live-in, or it may have its own def.
       if (Start != BlockStart) {
@@ -875,7 +878,7 @@ bool SplitEditor::transferValues() {
         DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
         // MBB has its own def. Is it also live-out?
         if (BlockEnd <= End)
-          LRC.setLiveOutValue(MBB, VNI);
+          LRC.setLiveOutValue(&*MBB, VNI);
 
         // Skip to the next block for live-in.
         ++MBB;
@@ -886,23 +889,23 @@ bool SplitEditor::transferValues() {
       assert(Start <= BlockStart && "Expected live-in block");
       while (BlockStart < End) {
         DEBUG(dbgs() << ">BB#" << MBB->getNumber());
-        BlockEnd = LIS.getMBBEndIdx(MBB);
+        BlockEnd = LIS.getMBBEndIdx(&*MBB);
         if (BlockStart == ParentVNI->def) {
           // This block has the def of a parent PHI, so it isn't live-in.
           assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
           VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
           assert(VNI && "Missing def for complex mapped parent PHI");
           if (End >= BlockEnd)
-            LRC.setLiveOutValue(MBB, VNI); // Live-out as well.
+            LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
         } else {
           // This block needs a live-in value.  The last block covered may not
           // be live-out.
           if (End < BlockEnd)
-            LRC.addLiveInBlock(LR, MDT[MBB], End);
+            LRC.addLiveInBlock(LR, MDT[&*MBB], End);
           else {
             // Live-through, and we don't know the value.
-            LRC.addLiveInBlock(LR, MDT[MBB]);
-            LRC.setLiveOutValue(MBB, nullptr);
+            LRC.addLiveInBlock(LR, MDT[&*MBB]);
+            LRC.setLiveOutValue(&*MBB, nullptr);
           }
         }
         BlockStart = BlockEnd;
@@ -1081,16 +1084,14 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
   ConnectedVNInfoEqClasses ConEQ(LIS);
   for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
     // Don't use iterators, they are invalidated by create() below.
-    LiveInterval *li = &LIS.getInterval(Edit->get(i));
-    unsigned NumComp = ConEQ.Classify(li);
-    if (NumComp <= 1)
-      continue;
-    DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
-    SmallVector<LiveInterval*, 8> dups;
-    dups.push_back(li);
-    for (unsigned j = 1; j != NumComp; ++j)
-      dups.push_back(&Edit->createEmptyInterval());
-    ConEQ.Distribute(&dups[0], MRI);
+    unsigned VReg = Edit->get(i);
+    LiveInterval &LI = LIS.getInterval(VReg);
+    SmallVector<LiveInterval*, 8> SplitLIs;
+    LIS.splitSeparateComponents(LI, SplitLIs);
+    unsigned Original = VRM.getOriginal(VReg);
+    for (LiveInterval *SplitLI : SplitLIs)
+      VRM.setIsSplitFromReg(SplitLI->reg, Original);
+
     // The new intervals all map back to i.
     if (LRMap)
       LRMap->resize(Edit->size(), i);
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 116eef66c580..b3cd8b3d80bb 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -94,7 +94,9 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
     default:
       llvm_unreachable("Unrecognized operand type.");
     case StackMaps::DirectMemRefOp: {
-      unsigned Size = AP.TM.getDataLayout()->getPointerSizeInBits();
+      auto &DL = AP.MF->getDataLayout();
+
+      unsigned Size = DL.getPointerSizeInBits();
       assert((Size % 8) == 0 && "Need pointer size in bytes.");
       Size /= 8;
       unsigned Reg = (++MOI)->getReg();
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index bcea37a3aafa..db3fef524b30 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -373,7 +373,7 @@ bool StackProtector::InsertStackProtectors() {
   Value *StackGuardVar = nullptr; // The stack guard variable.
 
   for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
-    BasicBlock *BB = I++;
+    BasicBlock *BB = &*I++;
     ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
     if (!RI)
       continue;
@@ -433,7 +433,7 @@ bool StackProtector::InsertStackProtectors() {
       BasicBlock *FailBB = CreateFailBB();
 
       // Split the basic block before the return instruction.
-      BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+      BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
 
       // Update the dominator tree if we need to.
       if (DT && DT->isReachableFromEntry(BB)) {
@@ -453,22 +453,20 @@ bool StackProtector::InsertStackProtectors() {
       LoadInst *LI1 = B.CreateLoad(StackGuardVar);
       LoadInst *LI2 = B.CreateLoad(AI);
       Value *Cmp = B.CreateICmpEQ(LI1, LI2);
-      unsigned SuccessWeight =
-          BranchProbabilityInfo::getBranchWeightStackProtector(true);
-      unsigned FailureWeight =
-          BranchProbabilityInfo::getBranchWeightStackProtector(false);
+      auto SuccessProb =
+          BranchProbabilityInfo::getBranchProbStackProtector(true);
+      auto FailureProb =
+          BranchProbabilityInfo::getBranchProbStackProtector(false);
       MDNode *Weights = MDBuilder(F->getContext())
-                            .createBranchWeights(SuccessWeight, FailureWeight);
+                            .createBranchWeights(SuccessProb.getNumerator(),
+                                                 FailureProb.getNumerator());
       B.CreateCondBr(Cmp, NewBB, FailBB, Weights);
     }
   }
 
   // Return if we didn't modify any basic blocks. i.e., there are no return
   // statements in the function.
-  if (!HasPrologue)
-    return false;
-
-  return true;
+  return HasPrologue;
 }
 
 /// CreateFailBB - Create a basic block to jump to when the stack protector
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index a5a175f2c8f0..51f4d0e68172 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -318,7 +318,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
     if (NewFI == -1 || (NewFI == (int)SS))
       continue;
 
-    const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+    const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI);
     SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
     for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
       RefMMOs[i]->setValue(NewSV);
diff --git a/lib/CodeGen/StatepointExampleGC.cpp b/lib/CodeGen/StatepointExampleGC.cpp
index 95dfd75018c1..3f60e18fafa9 100644
--- a/lib/CodeGen/StatepointExampleGC.cpp
+++ b/lib/CodeGen/StatepointExampleGC.cpp
@@ -34,9 +34,9 @@ public:
     UsesMetadata = false;
     CustomRoots = false;
   }
-  Optional<bool> isGCManagedPointer(const Value *V) const override {
+  Optional<bool> isGCManagedPointer(const Type *Ty) const override {
     // Method is only valid on pointer typed values.
-    PointerType *PT = cast<PointerType>(V->getType());
+    const PointerType *PT = cast<PointerType>(Ty);
     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     // GC managed heap.  We know that a pointer into this heap needs to be
     // updated and that no other pointer does.  Note that addrspace(1) is used
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 237460cd9051..d2fbf533a787 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -59,7 +59,7 @@ TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
 typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
 
 namespace {
-  /// TailDuplicatePass - Perform tail duplication.
+  /// Perform tail duplication.
   class TailDuplicatePass : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -69,11 +69,11 @@ namespace {
     std::unique_ptr<RegScavenger> RS;
     bool PreRegAlloc;
 
-    // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+    // A list of virtual registers for which to update SSA form.
     SmallVector<unsigned, 16> SSAUpdateVRs;
 
-    // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
-    // source virtual registers.
+    // For each virtual register in SSAUpdateVals keep a list of source virtual
+    // registers.
     DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
 
   public:
@@ -161,7 +161,7 @@ void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const {
 
 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = I;
+    MachineBasicBlock *MBB = &*I;
     SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
                                                 MBB->pred_end());
     MachineBasicBlock::iterator MI = MBB->begin();
@@ -207,7 +207,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
   }
 }
 
-/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+/// Tail duplicate the block and cleanup.
 bool
 TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
                                           bool IsSimple,
@@ -310,9 +310,9 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
   return true;
 }
 
-/// TailDuplicateBlocks - Look for small blocks that are unconditionally
-/// branched to and do not fall through. Tail-duplicate their instructions
-/// into their predecessors to eliminate (dynamic) branches.
+/// Look for small blocks that are unconditionally branched to and do not fall
+/// through. Tail-duplicate their instructions into their predecessors to
+/// eliminate (dynamic) branches.
 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
   bool MadeChange = false;
 
@@ -322,7 +322,7 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
   }
 
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
-    MachineBasicBlock *MBB = I++;
+    MachineBasicBlock *MBB = &*I++;
 
     if (NumTails == TailDupLimit)
       break;
@@ -375,8 +375,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
   }
 }
 
-/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
-/// SSA update.
+/// Add a definition and source virtual registers pair for SSA update.
 void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
                                           MachineBasicBlock *BB) {
   DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
@@ -390,9 +389,8 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
   }
 }
 
-/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
-/// Remember the source register that's contributed by PredBB and update SSA
-/// update map.
+/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
+/// source register that's contributed by PredBB and update SSA update map.
 void TailDuplicatePass::ProcessPHI(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
     DenseMap<unsigned, unsigned> &LocalVRMap,
@@ -422,7 +420,7 @@ void TailDuplicatePass::ProcessPHI(
     MI->eraseFromParent();
 }
 
-/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// Duplicate a TailBB instruction to PredBB and update
 /// the source operands due to earlier PHI translation.
 void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
                                      MachineBasicBlock *TailBB,
@@ -459,9 +457,9 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
   PredBB->insert(PredBB->instr_end(), NewMI);
 }
 
-/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
-/// blocks, the successors have gained new predecessors. Update the PHI
-/// instructions in them accordingly.
+/// After FromBB is tail duplicated into its predecessor blocks, the successors
+/// have gained new predecessors. Update the PHI instructions in them
+/// accordingly.
 void
 TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
                                   SmallVectorImpl<MachineBasicBlock *> &TDBBs,
@@ -545,7 +543,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
   }
 }
 
-/// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
+/// Determine if it is profitable to duplicate this block.
 bool
 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
                                        bool IsSimple,
@@ -563,6 +561,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
   if (TailDuplicateSize.getNumOccurrences() == 0 &&
+      // FIXME: Use Function::optForSize().
       MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
     MaxDuplicateCount = 1;
   else
@@ -584,30 +583,51 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  for (MachineBasicBlock::iterator I = TailBB.begin(); I != TailBB.end(); ++I) {
+  for (MachineInstr &MI : TailBB) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->isNotDuplicable())
+    if (MI.isNotDuplicable())
       return false;
 
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->isReturn())
+    if (PreRegAlloc && MI.isReturn())
       return false;
 
     // Avoid duplicating calls before register allocation. Calls presents a
     // barrier to register allocation so duplicating them may end up increasing
     // spills.
-    if (PreRegAlloc && I->isCall())
+    if (PreRegAlloc && MI.isCall())
       return false;
 
-    if (!I->isPHI() && !I->isDebugValue())
+    if (!MI.isPHI() && !MI.isDebugValue())
       InstrCount += 1;
 
     if (InstrCount > MaxDuplicateCount)
       return false;
   }
 
+  // Check if any of the successors of TailBB has a PHI node in which the
+  // value corresponding to TailBB uses a subregister.
+  // If a phi node uses a register paired with a subregister, the actual
+  // "value type" of the phi may differ from the type of the register without
+  // any subregisters. Due to a bug, tail duplication may add a new operand
+  // without a necessary subregister, producing an invalid code. This is
+  // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
+  // Disable tail duplication for this case for now, until the problem is
+  // fixed.
+  for (auto SB : TailBB.successors()) {
+    for (auto &I : *SB) {
+      if (!I.isPHI())
+        break;
+      unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
+      assert(Idx != 0);
+      MachineOperand &PU = I.getOperand(Idx);
+      if (PU.getSubReg() != 0)
+        return false;
+    }
+  }
+
   if (HasIndirectbr && PreRegAlloc)
     return true;
 
@@ -620,7 +640,7 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   return canCompletelyDuplicateBB(TailBB);
 }
 
-/// isSimpleBB - True if this BB has only one unconditional jump.
+/// True if this BB has only one unconditional jump.
 bool
 TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
   if (TailBB->succ_size() != 1)
@@ -636,22 +656,16 @@ TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
 static bool
 bothUsedInPHI(const MachineBasicBlock &A,
               SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
-  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
-         SE = A.succ_end(); SI != SE; ++SI) {
-    MachineBasicBlock *BB = *SI;
+  for (MachineBasicBlock *BB : A.successors())
     if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
       return true;
-  }
 
   return false;
 }
 
 bool
 TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
-  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
-       PE = BB.pred_end(); PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
+  for (MachineBasicBlock *PredBB : BB.predecessors()) {
     if (PredBB->succ_size() > 1)
       return false;
 
@@ -680,7 +694,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
 
-    if (PredBB->getLandingPadSuccessor())
+    if (PredBB->hasEHPadSuccessor())
       continue;
 
     if (bothUsedInPHI(*PredBB, Succs))
@@ -696,7 +710,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
                  << "From simple Succ: " << *TailBB);
 
     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
-    MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(PredBB));
+    MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
 
     // Make PredFBB explicit.
     if (PredCond.empty())
@@ -731,19 +745,19 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
     if (PredTBB)
       TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
 
-    uint32_t Weight = MBPI->getEdgeWeight(PredBB, TailBB);
-    PredBB->removeSuccessor(TailBB);
-    unsigned NumSuccessors = PredBB->succ_size();
-    assert(NumSuccessors <= 1);
-    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
-      PredBB->addSuccessor(NewTarget, Weight);
+    if (!PredBB->isSuccessor(NewTarget))
+      PredBB->replaceSuccessor(TailBB, NewTarget);
+    else {
+      PredBB->removeSuccessor(TailBB, true);
+      assert(PredBB->succ_size() <= 1);
+    }
 
     TDBBs.push_back(PredBB);
   }
   return Changed;
 }
 
-/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
 bool
 TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
@@ -798,13 +812,12 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
       RS->enterBasicBlock(PredBB);
       if (!PredBB->empty())
         RS->forward(std::prev(PredBB->end()));
-      for (MachineBasicBlock::livein_iterator I = TailBB->livein_begin(),
-             E = TailBB->livein_end(); I != E; ++I) {
-        if (!RS->isRegUsed(*I, false))
+      for (const auto &LI : TailBB->liveins()) {
+        if (!RS->isRegUsed(LI.PhysReg, false))
           // If a register is previously livein to the tail but it's not live
           // at the end of predecessor BB, then it should be added to its
           // livein list.
-          PredBB->addLiveIn(*I);
+          PredBB->addLiveIn(LI);
       }
     }
 
@@ -845,7 +858,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
            "TailDuplicate called on block with multiple successors!");
     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
            E = TailBB->succ_end(); I != E; ++I)
-      PredBB->addSuccessor(*I, MBPI->getEdgeWeight(TailBB, I));
+      PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
 
     Changed = true;
     ++NumTailDups;
@@ -854,7 +867,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
   // If TailBB was duplicated into all its predecessors except for the prior
   // block, which falls through unconditionally, move the contents of this
   // block into the prior block.
-  MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
   MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
@@ -960,8 +973,8 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
   return Changed;
 }
 
-/// RemoveDeadBlock - Remove the specified dead machine basic block from the
-/// function, updating the CFG.
+/// Remove the specified dead machine basic block from the function, updating
+/// the CFG.
 void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index f3cccd82a5c5..679ade185e1c 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -32,25 +33,22 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
   return Attr.getValueAsString() == "true";
 }
 
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index. This is the default implementation
-/// which is overridden for some targets.
-int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                             int FI) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
-}
-
+/// Returns the displacement from the frame register to the stack
+/// frame of the specified index, along with the frame register used
+/// (in output arg FrameReg). This is the default implementation which
+/// is overridden for some targets.
 int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                              int FI, unsigned &FrameReg) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
 
   // By default, assume all frame indices are referenced via whatever
   // getFrameRegister() says. The target can override this if it's doing
   // something different.
   FrameReg = RI->getFrameRegister(MF);
-  return getFrameIndexOffset(MF, FI);
+
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+         getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
 }
 
 bool TargetFrameLowering::needsFrameIndexResolution(
@@ -84,3 +82,13 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
       SavedRegs.set(Reg);
   }
 }
+
+unsigned TargetFrameLowering::getStackAlignmentSkew(
+    const MachineFunction &MF) const {
+  // When HHVM function is called, the stack is skewed as the return address
+  // is removed from the stack before we enter the function.
+  if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM))
+    return MF.getTarget().getPointerSize();
+
+  return 0;
+}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 97ca0253d376..6eaf991ac700 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -118,23 +118,24 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
   MBB->addSuccessor(NewDest);
 }
 
-// commuteInstruction - The default implementation of this method just exchanges
-// the two operands returned by findCommutedOpIndices.
-MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
-                                                  bool NewMI) const {
+MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                      bool NewMI,
+                                                      unsigned Idx1,
+                                                      unsigned Idx2) const {
   const MCInstrDesc &MCID = MI->getDesc();
   bool HasDef = MCID.getNumDefs();
   if (HasDef && !MI->getOperand(0).isReg())
     // No idea how to commute this instruction. Target should implement its own.
     return nullptr;
-  unsigned Idx1, Idx2;
-  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
-    assert(MI->isCommutable() && "Precondition violation: MI must be commutable.");
-    return nullptr;
-  }
 
+  unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
+  unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+  assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
+         CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
+         "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
   assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
          "This only knows how to commute register operands so far");
+
   unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
   unsigned Reg1 = MI->getOperand(Idx1).getReg();
   unsigned Reg2 = MI->getOperand(Idx2).getReg();
@@ -184,9 +185,53 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
   return MI;
 }
 
-/// findCommutedOpIndices - If specified MI is commutable, return the two
-/// operand indices that would swap value. Return true if the instruction
-/// is not in a form which this routine understands.
+MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
+                                                  bool NewMI,
+                                                  unsigned OpIdx1,
+                                                  unsigned OpIdx2) const {
+  // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose
+  // any commutable operand, which is done in findCommutedOpIndices() method
+  // called below.
+  if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) &&
+      !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) {
+    assert(MI->isCommutable() &&
+           "Precondition violation: MI must be commutable.");
+    return nullptr;
+  }
+  return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+}
+
+bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
+                                           unsigned &ResultIdx2,
+                                           unsigned CommutableOpIdx1,
+                                           unsigned CommutableOpIdx2) {
+  if (ResultIdx1 == CommuteAnyOperandIndex &&
+      ResultIdx2 == CommuteAnyOperandIndex) {
+    ResultIdx1 = CommutableOpIdx1;
+    ResultIdx2 = CommutableOpIdx2;
+  } else if (ResultIdx1 == CommuteAnyOperandIndex) {
+    if (ResultIdx2 == CommutableOpIdx1)
+      ResultIdx1 = CommutableOpIdx2;
+    else if (ResultIdx2 == CommutableOpIdx2)
+      ResultIdx1 = CommutableOpIdx1;
+    else
+      return false;
+  } else if (ResultIdx2 == CommuteAnyOperandIndex) {
+    if (ResultIdx1 == CommutableOpIdx1)
+      ResultIdx2 = CommutableOpIdx2;
+    else if (ResultIdx1 == CommutableOpIdx2)
+      ResultIdx2 = CommutableOpIdx1;
+    else
+      return false;
+  } else
+    // Check that the result operand indices match the given commutable
+    // operand indices.
+    return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) ||
+           (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1);
+
+  return true;
+}
+
 bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
                                             unsigned &SrcOpIdx1,
                                             unsigned &SrcOpIdx2) const {
@@ -196,10 +241,15 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
+
   // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
   // is not true, then the target must implement this.
-  SrcOpIdx1 = MCID.getNumDefs();
-  SrcOpIdx2 = SrcOpIdx1 + 1;
+  unsigned CommutableOpIdx1 = MCID.getNumDefs();
+  unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
+  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+                            CommutableOpIdx1, CommutableOpIdx2))
+    return false;
+
   if (!MI->getOperand(SrcOpIdx1).isReg() ||
       !MI->getOperand(SrcOpIdx2).isReg())
     // No idea.
@@ -207,7 +257,6 @@ bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI,
   return true;
 }
 
-
 bool
 TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   if (!MI->isTerminator()) return false;
@@ -315,7 +364,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
 
   assert(RC->getSize() >= (Offset + Size) && "bad subregister range");
 
-  if (!MF.getTarget().getDataLayout()->isLittleEndian()) {
+  if (!MF.getDataLayout().isLittleEndian()) {
     Offset = RC->getSize() - (Offset + Size);
   }
   return true;
@@ -384,11 +433,6 @@ void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   llvm_unreachable("Not a MachO target");
 }
 
-bool TargetInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                           ArrayRef<unsigned> Ops) const {
-  return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
-}
-
 static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI,
                                     ArrayRef<unsigned> Ops, int FrameIndex,
                                     const TargetInstrInfo &TII) {
@@ -489,10 +533,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
            "Folded a use to a non-load!");
     const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              Flags, MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+        MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
 
     return NewMI;
@@ -517,6 +560,217 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
   return --Pos;
 }
 
+bool TargetInstrInfo::hasReassociableOperands(
+    const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
+  const MachineOperand &Op1 = Inst.getOperand(1);
+  const MachineOperand &Op2 = Inst.getOperand(2);
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+  // We need virtual register definitions for the operands that we will
+  // reassociate.
+  MachineInstr *MI1 = nullptr;
+  MachineInstr *MI2 = nullptr;
+  if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+    MI1 = MRI.getUniqueVRegDef(Op1.getReg());
+  if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+    MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+
+  // And they need to be in the trace (otherwise, they won't have a depth).
+  return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB;
+}
+
+bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst,
+                                             bool &Commuted) const {
+  const MachineBasicBlock *MBB = Inst.getParent();
+  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
+  MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
+  unsigned AssocOpcode = Inst.getOpcode();
+
+  // If only one operand has the same opcode and it's the second source operand,
+  // the operands must be commuted.
+  Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
+  if (Commuted)
+    std::swap(MI1, MI2);
+
+  // 1. The previous instruction must be the same type as Inst.
+  // 2. The previous instruction must have virtual register definitions for its
+  //    operands in the same basic block as Inst.
+  // 3. The previous instruction's result must only be used by Inst.
+  return MI1->getOpcode() == AssocOpcode &&
+         hasReassociableOperands(*MI1, MBB) &&
+         MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg());
+}
+
+// 1. The operation must be associative and commutative.
+// 2. The instruction must have virtual register definitions for its
+//    operands in the same basic block.
+// 3. The instruction must have a reassociable sibling.
+bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst,
+                                               bool &Commuted) const {
+  return isAssociativeAndCommutative(Inst) &&
+         hasReassociableOperands(Inst, Inst.getParent()) &&
+         hasReassociableSibling(Inst, Commuted);
+}
+
+// The concept of the reassociation pass is that these operations can benefit
+// from this kind of transformation:
+//
+// A = ? op ?
+// B = A op X (Prev)
+// C = B op Y (Root)
+// -->
+// A = ? op ?
+// B = X op Y
+// C = A op B
+//
+// breaking the dependency between A and B, allowing them to be executed in
+// parallel (or back-to-back in a pipeline) instead of depending on each other.
+
+// FIXME: This has the potential to be expensive (compile time) while not
+// improving the code at all. Some ways to limit the overhead:
+// 1. Track successful transforms; bail out if hit rate gets too low.
+// 2. Only enable at -O3 or some other non-default optimization level.
+// 3. Pre-screen pattern candidates here: if an operand of the previous
+//    instruction is known to not increase the critical path, then don't match
+//    that pattern.
+bool TargetInstrInfo::getMachineCombinerPatterns(
+    MachineInstr &Root,
+    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+
+  bool Commute;
+  if (isReassociationCandidate(Root, Commute)) {
+    // We found a sequence of instructions that may be suitable for a
+    // reassociation of operands to increase ILP. Specify each commutation
+    // possibility for the Prev instruction in the sequence and let the
+    // machine combiner decide if changing the operands is worthwhile.
+    if (Commute) {
+      Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB);
+      Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB);
+    } else {
+      Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY);
+      Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY);
+    }
+    return true;
+  }
+
+  return false;
+}
+
+/// Attempt the reassociation transformation to reduce critical path length.
+/// See the above comments before getMachineCombinerPatterns().
+void TargetInstrInfo::reassociateOps(
+    MachineInstr &Root, MachineInstr &Prev,
+    MachineCombinerPattern Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+  MachineFunction *MF = Root.getParent()->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
+
+  // This array encodes the operand index for each parameter because the
+  // operands may be commuted. Each row corresponds to a pattern value,
+  // and each column specifies the index of A, B, X, Y.
+  unsigned OpIdx[4][4] = {
+    { 1, 1, 2, 2 },
+    { 1, 2, 2, 1 },
+    { 2, 1, 1, 2 },
+    { 2, 2, 1, 1 }
+  };
+
+  int Row;
+  switch (Pattern) {
+  case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break;
+  case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break;
+  case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break;
+  case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break;
+  default: llvm_unreachable("unexpected MachineCombinerPattern");
+  }
+
+  MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]);
+  MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]);
+  MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]);
+  MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
+  MachineOperand &OpC = Root.getOperand(0);
+
+  unsigned RegA = OpA.getReg();
+  unsigned RegB = OpB.getReg();
+  unsigned RegX = OpX.getReg();
+  unsigned RegY = OpY.getReg();
+  unsigned RegC = OpC.getReg();
+
+  if (TargetRegisterInfo::isVirtualRegister(RegA))
+    MRI.constrainRegClass(RegA, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegB))
+    MRI.constrainRegClass(RegB, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegX))
+    MRI.constrainRegClass(RegX, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegY))
+    MRI.constrainRegClass(RegY, RC);
+  if (TargetRegisterInfo::isVirtualRegister(RegC))
+    MRI.constrainRegClass(RegC, RC);
+
+  // Create a new virtual register for the result of (X op Y) instead of
+  // recycling RegB because the MachineCombiner's computation of the critical
+  // path requires a new register definition rather than an existing one.
+  unsigned NewVR = MRI.createVirtualRegister(RC);
+  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
+
+  unsigned Opcode = Root.getOpcode();
+  bool KillA = OpA.isKill();
+  bool KillX = OpX.isKill();
+  bool KillY = OpY.isKill();
+
+  // Create new instructions for insertion.
+  MachineInstrBuilder MIB1 =
+      BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
+          .addReg(RegX, getKillRegState(KillX))
+          .addReg(RegY, getKillRegState(KillY));
+  MachineInstrBuilder MIB2 =
+      BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
+          .addReg(RegA, getKillRegState(KillA))
+          .addReg(NewVR, getKillRegState(true));
+
+  setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2);
+
+  // Record new instructions for insertion and old instructions for deletion.
+  InsInstrs.push_back(MIB1);
+  InsInstrs.push_back(MIB2);
+  DelInstrs.push_back(&Prev);
+  DelInstrs.push_back(&Root);
+}
+
+void TargetInstrInfo::genAlternativeCodeSequence(
+    MachineInstr &Root, MachineCombinerPattern Pattern,
+    SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
+    DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
+  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
+
+  // Select the previous instruction in the sequence based on the input pattern.
+  MachineInstr *Prev = nullptr;
+  switch (Pattern) {
+  case MachineCombinerPattern::REASSOC_AX_BY:
+  case MachineCombinerPattern::REASSOC_XA_BY:
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+    break;
+  case MachineCombinerPattern::REASSOC_AX_YB:
+  case MachineCombinerPattern::REASSOC_XA_YB:
+    Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
+    break;
+  default:
+    break;
+  }
+
+  assert(Prev && "Unknown pattern for machine combiner");
+
+  reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
+  return;
+}
+
 /// foldMemoryOperand - Same as the previous version except it allows folding
 /// of any load and store from / to any address, not just from a specific
 /// stack slot.
@@ -661,6 +915,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const {
     return 0;
  
   int SPAdj = MI->getOperand(0).getImm();
+  SPAdj = TFI->alignSPAdjust(SPAdj);
 
   if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) ||
        (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode))
@@ -686,10 +941,7 @@ bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   // modification.
   const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI))
-    return true;
-
-  return false;
+  return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI);
 }
 
 // Provide a global flag for disabling the PreRA hazard recognizer that targets
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index ecfd65931574..36a31c9d6461 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -247,13 +247,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
   Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2";
   Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
-  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
-  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
   Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
   Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
   Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
-  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
-  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
   Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
   Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
   Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
@@ -266,13 +262,9 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
   Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
   Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
-  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
-  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
   Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
   Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
   Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
-  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
-  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
   Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
   Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
   Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
@@ -501,10 +493,6 @@ RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
-    if (RetVT == MVT::i8)
-      return FPTOSINT_F32_I8;
-    if (RetVT == MVT::i16)
-      return FPTOSINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOSINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -512,10 +500,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
     if (RetVT == MVT::i128)
       return FPTOSINT_F32_I128;
   } else if (OpVT == MVT::f64) {
-    if (RetVT == MVT::i8)
-      return FPTOSINT_F64_I8;
-    if (RetVT == MVT::i16)
-      return FPTOSINT_F64_I16;
     if (RetVT == MVT::i32)
       return FPTOSINT_F64_I32;
     if (RetVT == MVT::i64)
@@ -551,10 +535,6 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
 /// UNKNOWN_LIBCALL if there is none.
 RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
   if (OpVT == MVT::f32) {
-    if (RetVT == MVT::i8)
-      return FPTOUINT_F32_I8;
-    if (RetVT == MVT::i16)
-      return FPTOUINT_F32_I16;
     if (RetVT == MVT::i32)
       return FPTOUINT_F32_I32;
     if (RetVT == MVT::i64)
@@ -562,10 +542,6 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
     if (RetVT == MVT::i128)
       return FPTOUINT_F32_I128;
   } else if (OpVT == MVT::f64) {
-    if (RetVT == MVT::i8)
-      return FPTOUINT_F64_I8;
-    if (RetVT == MVT::i16)
-      return FPTOUINT_F64_I16;
     if (RetVT == MVT::i32)
       return FPTOUINT_F64_I32;
     if (RetVT == MVT::i64)
@@ -758,17 +734,13 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   SelectIsExpensive = false;
   HasMultipleConditionRegisters = false;
   HasExtractBitsInsn = false;
-  IntDivIsCheap = false;
   FsqrtIsCheap = false;
-  Pow2SDivIsCheap = false;
   JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   MaskAndBranchFoldingIsLegal = false;
   EnableExtLdPromotion = false;
   HasFloatingPointExceptions = true;
   StackPointerRegisterToSaveRestore = 0;
-  ExceptionPointerRegister = 0;
-  ExceptionSelectorRegister = 0;
   BooleanContents = UndefinedBooleanContent;
   BooleanFloatContents = UndefinedBooleanContent;
   BooleanVectorContents = UndefinedBooleanContent;
@@ -778,6 +750,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   MinFunctionAlignment = 0;
   PrefFunctionAlignment = 0;
   PrefLoopAlignment = 0;
+  GatherAllAliasesMaxDepth = 6;
   MinStackArgumentAlignment = 1;
   InsertFencesForAtomic = false;
   MinimumJumpTableEntries = 4;
@@ -814,6 +787,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::CONCAT_VECTORS, VT, Expand);
     setOperationAction(ISD::FMINNUM, VT, Expand);
     setOperationAction(ISD::FMAXNUM, VT, Expand);
+    setOperationAction(ISD::FMINNAN, VT, Expand);
+    setOperationAction(ISD::FMAXNAN, VT, Expand);
     setOperationAction(ISD::FMAD, VT, Expand);
     setOperationAction(ISD::SMIN, VT, Expand);
     setOperationAction(ISD::SMAX, VT, Expand);
@@ -828,6 +803,8 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::SMULO, VT, Expand);
     setOperationAction(ISD::UMULO, VT, Expand);
 
+    setOperationAction(ISD::BITREVERSE, VT, Expand);
+    
     // These library functions default to expand.
     setOperationAction(ISD::FROUND, VT, Expand);
 
@@ -838,11 +815,17 @@ void TargetLoweringBase::initActions() {
       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
     }
+
+    // For most targets @llvm.get.dynamic.area.offest just returns 0.
+    setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
   }
 
   // Most targets ignore the @llvm.prefetch intrinsic.
   setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
 
+  // Most targets also ignore the @llvm.readcyclecounter intrinsic.
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
+
   // ConstantFP nodes default to expand.  Targets can either change this to
   // Legal, in which case all fp constants are legal, or use isFPImmLegal()
   // to optimize expansions for certain constants.
@@ -1111,6 +1094,19 @@ MachineBasicBlock*
 TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
                                    MachineBasicBlock *MBB) const {
   MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  // We're handling multiple types of operands here:
+  // PATCHPOINT MetaArgs - live-in, read only, direct
+  // STATEPOINT Deopt Spill - live-through, read only, indirect
+  // STATEPOINT Deopt Alloca - live-through, read only, direct
+  // (We're currently conservative and mark the deopt slots read/write in
+  // practice.) 
+  // STATEPOINT GC Spill - live-through, read/write, indirect
+  // STATEPOINT GC Alloca - live-through, read/write, direct
+  // The live-in vs live-through is handled already (the live through ones are
+  // all stack slots), but we need to handle the different type of stackmap
+  // operands and memory effects here.
 
   // MI changes inside this loop as we grow operands.
   for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) {
@@ -1126,10 +1122,24 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
     // Copy operands before the frame-index.
     for (unsigned i = 0; i < OperIdx; ++i)
       MIB.addOperand(MI->getOperand(i));
-    // Add frame index operands: direct-mem-ref tag, #FI, offset.
-    MIB.addImm(StackMaps::DirectMemRefOp);
-    MIB.addOperand(MI->getOperand(OperIdx));
-    MIB.addImm(0);
+    // Add frame index operands recognized by stackmaps.cpp
+    if (MFI.isStatepointSpillSlotObjectIndex(FI)) {
+      // indirect-mem-ref tag, size, #FI, offset.
+      // Used for spills inserted by StatepointLowering.  This codepath is not
+      // used for patchpoints/stackmaps at all, for these spilling is done via
+      // foldMemoryOperand callback only.
+      assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity");
+      MIB.addImm(StackMaps::IndirectMemRefOp);
+      MIB.addImm(MFI.getObjectSize(FI));
+      MIB.addOperand(MI->getOperand(OperIdx));
+      MIB.addImm(0);
+    } else {
+      // direct-mem-ref tag, #FI, offset.
+      // Used by patchpoint, and direct alloca arguments to statepoints
+      MIB.addImm(StackMaps::DirectMemRefOp);
+      MIB.addOperand(MI->getOperand(OperIdx));
+      MIB.addImm(0);
+    }
     // Copy the operands after the frame index.
     for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i)
       MIB.addOperand(MI->getOperand(i));
@@ -1139,7 +1149,6 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
     assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!");
 
     // Add a new memory operand for this FI.
-    const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
 
     unsigned Flags = MachineMemOperand::MOLoad;
@@ -1148,8 +1157,8 @@ TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
       Flags |= MachineMemOperand::MOVolatile;
     }
     MachineMemOperand *MMO = MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(FI), Flags,
-        TM.getDataLayout()->getPointerSize(), MFI.getObjectAlignment(FI));
+        MachinePointerInfo::getFixedStack(MF, FI), Flags,
+        MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
     MIB->addMemOperand(MF, MMO);
 
     // Replace the instruction and update the operand index.
@@ -1274,20 +1283,14 @@ void TargetLoweringBase::computeRegisterProperties(
     ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat);
   }
 
+  // Decide how to handle f16. If the target does not have native f16 support,
+  // promote it to f32, because there are no f16 library calls (except for
+  // conversions).
   if (!isTypeLegal(MVT::f16)) {
-    // If the target has native f32 support, promote f16 operations to f32.  If
-    // f32 is not supported, generate soft float library calls.
-    if (isTypeLegal(MVT::f32)) {
-      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
-      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
-      TransformToType[MVT::f16] = MVT::f32;
-      ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
-    } else {
-      NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16];
-      RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16];
-      TransformToType[MVT::f16] = MVT::i16;
-      ValueTypeActions.setTypeAction(MVT::f16, TypeSoftenFloat);
-    }
+    NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32];
+    RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32];
+    TransformToType[MVT::f16] = MVT::f32;
+    ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat);
   }
 
   // Loop over all of the vector value types to see which need transformations.
@@ -1528,6 +1531,29 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
   return DL.getABITypeAlignment(Ty);
 }
 
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+                                            const DataLayout &DL, EVT VT,
+                                            unsigned AddrSpace,
+                                            unsigned Alignment,
+                                            bool *Fast) const {
+  // Check if the specified alignment is sufficient based on the data layout.
+  // TODO: While using the data layout works in practice, a better solution
+  // would be to implement this check directly (make this a virtual function).
+  // For example, the ABI alignment may change based on software platform while
+  // this function should only be affected by hardware implementation.
+  Type *Ty = VT.getTypeForEVT(Context);
+  if (Alignment >= DL.getABITypeAlignment(Ty)) {
+    // Assume that an access that meets the ABI-specified alignment is fast.
+    if (Fast != nullptr)
+      *Fast = true;
+    return true;
+  }
+  
+  // This is a misaligned access.
+  return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+}
+
+
 //===----------------------------------------------------------------------===//
 //  TargetTransformInfo Helpers
 //===----------------------------------------------------------------------===//
@@ -1546,6 +1572,11 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
   case Invoke:         return 0;
   case Resume:         return 0;
   case Unreachable:    return 0;
+  case CleanupRet:     return 0;
+  case CatchRet:       return 0;
+  case CatchPad:       return 0;
+  case CatchSwitch:    return 0;
+  case CleanupPad:     return 0;
   case Add:            return ISD::ADD;
   case FAdd:           return ISD::FADD;
   case Sub:            return ISD::SUB;
@@ -1603,13 +1634,13 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
   llvm_unreachable("Unknown instruction type encountered!");
 }
 
-std::pair<unsigned, MVT>
+std::pair<int, MVT>
 TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
                                             Type *Ty) const {
   LLVMContext &C = Ty->getContext();
   EVT MTy = getValueType(DL, Ty);
 
-  unsigned Cost = 1;
+  int Cost = 1;
   // We keep legalizing the type until we find a legal kind. We assume that
   // the only operation that costs anything is the split. After splitting
   // we need to handle two types.
@@ -1622,11 +1653,28 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
     if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger)
       Cost *= 2;
 
+    // Do not loop with f128 type.
+    if (MTy == LK.second)
+      return std::make_pair(Cost, MTy.getSimpleVT());
+
     // Keep legalizing the type.
     MTy = LK.second;
   }
 }
 
+Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+  if (!TM.getTargetTriple().isAndroid())
+    return nullptr;
+
+  // Android provides a libc function to retrieve the address of the current
+  // thread's unsafe stack pointer.
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+  Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
+                                     StackPtrTy->getPointerTo(0), nullptr);
+  return IRB.CreateCall(Fn);
+}
+
 //===----------------------------------------------------------------------===//
 //  Loop Strength Reduction hooks
 //===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 2f78763d8e02..58ae9cc53bda 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -24,6 +24,7 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionCOFF.h"
@@ -32,6 +33,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -58,9 +60,8 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
   report_fatal_error("We do not support this DWARF encoding yet!");
 }
 
-void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
-                                                       const TargetMachine &TM,
-                                                       const MCSymbol *Sym) const {
+void TargetLoweringObjectFileELF::emitPersonalityValue(
+    MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const {
   SmallString<64> NameData("DW.ref.");
   NameData += Sym->getName();
   MCSymbolELF *Label =
@@ -72,9 +73,9 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
   MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS,
                                               Flags, 0, Label->getName());
-  unsigned Size = TM.getDataLayout()->getPointerSize();
+  unsigned Size = DL.getPointerSize();
   Streamer.SwitchSection(Sec);
-  Streamer.EmitValueToAlignment(TM.getDataLayout()->getPointerABIAlignment());
+  Streamer.EmitValueToAlignment(DL.getPointerABIAlignment());
   Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
   const MCExpr *E = MCConstantExpr::create(Size, getContext());
   Streamer.emitELFSize(Label, E);
@@ -232,14 +233,8 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
     return ".tdata";
   if (Kind.isThreadBSS())
     return ".tbss";
-  if (Kind.isDataNoRel())
+  if (Kind.isData())
     return ".data";
-  if (Kind.isDataRelLocal())
-    return ".data.rel.local";
-  if (Kind.isDataRel())
-    return ".data.rel";
-  if (Kind.isReadOnlyWithRelLocal())
-    return ".data.rel.ro.local";
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
   return ".data.rel.ro";
 }
@@ -282,8 +277,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
-    unsigned Align =
-        TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV));
+    unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
+        cast<GlobalVariable>(GV));
 
     std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
     Name = SizeSpec + utostr(Align);
@@ -350,9 +345,8 @@ bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection(
 
 /// Given a mergeable constant with the specified size and relocation
 /// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
-                                                   const Constant *C) const {
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
   if (Kind.isMergeableConst4() && MergeableConst4Section)
     return MergeableConst4Section;
   if (Kind.isMergeableConst8() && MergeableConst8Section)
@@ -362,7 +356,6 @@ TargetLoweringObjectFileELF::getSectionForConstant(SectionKind Kind,
   if (Kind.isReadOnly())
     return ReadOnlySection;
 
-  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
   assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
   return DataRelROSection;
 }
@@ -507,7 +500,7 @@ emitModuleFlags(MCStreamer &Streamer,
 
   // Get the section.
   MCSectionMachO *S = getContext().getMachOSection(
-      Segment, Section, TAA, StubSize, SectionKind::getDataNoRel());
+      Segment, Section, TAA, StubSize, SectionKind::getData());
   Streamer.SwitchSection(S);
   Streamer.EmitLabel(getContext().
                      getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO")));
@@ -589,14 +582,16 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      GV->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GV)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
   if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      TM.getDataLayout()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+      GV->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GV)) < 32)
     return UStringSection;
 
   // With MachO only variables whose corresponding symbol starts with 'l' or
@@ -634,12 +629,11 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
   return DataSection;
 }
 
-MCSection *
-TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind,
-                                                     const Constant *C) const {
+MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
   // If this constant requires a relocation, we have to put it in the data
   // segment, not in the text segment.
-  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+  if (Kind.isData() || Kind.isReadOnlyWithRel())
     return ConstDataSection;
 
   if (Kind.isMergeableConst4())
@@ -706,7 +700,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
 const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
     const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
     MachineModuleInfo *MMI, MCStreamer &Streamer) const {
-  // Although MachO 32-bit targets do not explictly have a GOTPCREL relocation
+  // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
   // as 64-bit do, we replace the GOT equivalent by accessing the final symbol
   // through a non_lazy_ptr stub instead. One advantage is that it allows the
   // computation of deltas to final external symbols. Example:
@@ -740,7 +734,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
   // non_lazy_ptr stubs.
   SmallString<128> Name;
   StringRef Suffix = "$non_lazy_ptr";
-  Name += DL->getPrivateGlobalPrefix();
+  Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix();
   Name += Sym->getName();
   Name += Suffix;
   MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
@@ -763,6 +757,29 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
   return MCBinaryExpr::createSub(LHS, RHS, Ctx);
 }
 
+static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
+                               const MCSection &Section) {
+  if (!AsmInfo.isSectionAtomizableBySymbols(Section))
+    return true;
+
+  // If it is not dead stripped, it is safe to use private labels.
+  const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
+  if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
+    return true;
+
+  return false;
+}
+
+void TargetLoweringObjectFileMachO::getNameWithPrefix(
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    const TargetMachine &TM) const {
+  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+  const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
+  bool CannotUsePrivateLabel =
+      !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+}
+
 //===----------------------------------------------------------------------===//
 //                                  COFF
 //===----------------------------------------------------------------------===//
@@ -918,7 +935,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
                                          COMDATSymName, Selection);
     } else {
       SmallString<256> TmpData;
-      getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true, Mang, TM);
+      Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
       return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
                                          Selection);
     }
@@ -943,8 +960,9 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
 }
 
 void TargetLoweringObjectFileCOFF::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-    bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    const TargetMachine &TM) const {
+  bool CannotUsePrivateLabel = false;
   if (GV->hasPrivateLinkage() &&
       ((isa<Function>(GV) && TM.getFunctionSections()) ||
        (isa<GlobalVariable>(GV) && TM.getDataSections())))
@@ -1043,7 +1061,7 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
     raw_string_ostream FlagOS(Flag);
     Mang.getNameWithPrefix(FlagOS, GV, false);
     FlagOS.flush();
-    if (Flag[0] == DL->getGlobalPrefix())
+    if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
       OS << Flag.substr(1);
     else
       OS << Flag;
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 61a66b623928..0a7042ac3db5 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -11,13 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/Function.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "target-reg-info"
 
 using namespace llvm;
 
@@ -34,54 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
 
-void PrintReg::print(raw_ostream &OS) const {
-  if (!Reg)
-    OS << "%noreg";
-  else if (TargetRegisterInfo::isStackSlot(Reg))
-    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
-  else if (TargetRegisterInfo::isVirtualRegister(Reg))
-    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
-  else if (TRI && Reg < TRI->getNumRegs())
-    OS << '%' << TRI->getName(Reg);
-  else
-    OS << "%physreg" << Reg;
-  if (SubIdx) {
-    if (TRI)
-      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+namespace llvm {
+
+Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
+                   unsigned SubIdx) {
+  return Printable([Reg, TRI, SubIdx](raw_ostream &OS) {
+    if (!Reg)
+      OS << "%noreg";
+    else if (TargetRegisterInfo::isStackSlot(Reg))
+      OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+    else if (TargetRegisterInfo::isVirtualRegister(Reg))
+      OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+    else if (TRI && Reg < TRI->getNumRegs())
+      OS << '%' << TRI->getName(Reg);
     else
-      OS << ":sub(" << SubIdx << ')';
-  }
+      OS << "%physreg" << Reg;
+    if (SubIdx) {
+      if (TRI)
+        OS << ':' << TRI->getSubRegIndexName(SubIdx);
+      else
+        OS << ":sub(" << SubIdx << ')';
+    }
+  });
 }
 
-void PrintRegUnit::print(raw_ostream &OS) const {
-  // Generic printout when TRI is missing.
-  if (!TRI) {
-    OS << "Unit~" << Unit;
-    return;
-  }
+Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+  return Printable([Unit, TRI](raw_ostream &OS) {
+    // Generic printout when TRI is missing.
+    if (!TRI) {
+      OS << "Unit~" << Unit;
+      return;
+    }
 
-  // Check for invalid register units.
-  if (Unit >= TRI->getNumRegUnits()) {
-    OS << "BadUnit~" << Unit;
-    return;
-  }
+    // Check for invalid register units.
+    if (Unit >= TRI->getNumRegUnits()) {
+      OS << "BadUnit~" << Unit;
+      return;
+    }
 
-  // Normal units have at least one root.
-  MCRegUnitRootIterator Roots(Unit, TRI);
-  assert(Roots.isValid() && "Unit has no roots.");
-  OS << TRI->getName(*Roots);
-  for (++Roots; Roots.isValid(); ++Roots)
-    OS << '~' << TRI->getName(*Roots);
+    // Normal units have at least one root.
+    MCRegUnitRootIterator Roots(Unit, TRI);
+    assert(Roots.isValid() && "Unit has no roots.");
+    OS << TRI->getName(*Roots);
+    for (++Roots; Roots.isValid(); ++Roots)
+      OS << '~' << TRI->getName(*Roots);
+  });
 }
 
-void PrintVRegOrUnit::print(raw_ostream &OS) const {
-  if (TRI && TRI->isVirtualRegister(Unit)) {
-    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
-    return;
-  }
-  PrintRegUnit::print(OS);
+Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
+  return Printable([Unit, TRI](raw_ostream &OS) {
+    if (TRI && TRI->isVirtualRegister(Unit)) {
+      OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit);
+    } else {
+      OS << PrintRegUnit(Unit, TRI);
+    }
+  });
 }
 
+Printable PrintLaneMask(LaneBitmask LaneMask) {
+  return Printable([LaneMask](raw_ostream &OS) {
+    OS << format("%08X", LaneMask);
+  });
+}
+
+} // End of llvm namespace
+
 /// getAllocatableClass - Return the maximal subclass of the given register
 /// class that is alloctable, or NULL.
 const TargetRegisterClass *
@@ -161,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
 static inline
 const TargetRegisterClass *firstCommonClass(const uint32_t *A,
                                             const uint32_t *B,
-                                            const TargetRegisterInfo *TRI) {
+                                            const TargetRegisterInfo *TRI,
+                                            const MVT::SimpleValueType SVT =
+                                            MVT::SimpleValueType::Any) {
+  const MVT VT(SVT);
   for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
-    if (unsigned Common = *A++ & *B++)
-      return TRI->getRegClass(I + countTrailingZeros(Common));
+    if (unsigned Common = *A++ & *B++) {
+      const TargetRegisterClass *RC =
+          TRI->getRegClass(I + countTrailingZeros(Common));
+      if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT))
+        return RC;
+    }
   return nullptr;
 }
 
 const TargetRegisterClass *
 TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
-                                      const TargetRegisterClass *B) const {
+                                      const TargetRegisterClass *B,
+                                      const MVT::SimpleValueType SVT) const {
   // First take care of the trivial cases.
   if (A == B)
     return A;
@@ -179,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
 
   // Register classes are ordered topologically, so the largest common
   // sub-class it the common sub-class with the smallest ID.
-  return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
+  return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
 }
 
 const TargetRegisterClass *
@@ -260,13 +291,55 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
   return BestRC;
 }
 
+/// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
+/// share the same register file.
+static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
+                                  const TargetRegisterClass *DefRC,
+                                  unsigned DefSubReg,
+                                  const TargetRegisterClass *SrcRC,
+                                  unsigned SrcSubReg) {
+  // Same register class.
+  if (DefRC == SrcRC)
+    return true;
+
+  // Both operands are sub registers. Check if they share a register class.
+  unsigned SrcIdx, DefIdx;
+  if (SrcSubReg && DefSubReg) {
+    return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
+                                      SrcIdx, DefIdx) != nullptr;
+  }
+
+  // At most one of the register is a sub register, make it Src to avoid
+  // duplicating the test.
+  if (!SrcSubReg) {
+    std::swap(DefSubReg, SrcSubReg);
+    std::swap(DefRC, SrcRC);
+  }
+
+  // One of the register is a sub register, check if we can get a superclass.
+  if (SrcSubReg)
+    return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
+
+  // Plain copy.
+  return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
+bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                                              unsigned DefSubReg,
+                                              const TargetRegisterClass *SrcRC,
+                                              unsigned SrcSubReg) const {
+  // If this source does not incur a cross register bank copy, use it.
+  return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg);
+}
+
 // Compute target-independent register allocator hints to help eliminate copies.
 void
 TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                           ArrayRef<MCPhysReg> Order,
                                           SmallVectorImpl<MCPhysReg> &Hints,
                                           const MachineFunction &MF,
-                                          const VirtRegMap *VRM) const {
+                                          const VirtRegMap *VRM,
+                                          const LiveRegMatrix *Matrix) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
 
@@ -295,6 +368,26 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   Hints.push_back(Phys);
 }
 
+bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  return !MF.getFunction()->hasFnAttribute("no-realign-stack");
+}
+
+bool TargetRegisterInfo::needsStackRealignment(
+    const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = TFI->getStackAlignment();
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                              F->hasFnAttribute(Attribute::StackAlignment));
+  if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
+    if (canRealignStack(MF))
+      return true;
+    DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n");
+  }
+  return false;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void
 TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex,
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 299380d9268b..fc656396ade8 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -211,11 +211,9 @@ unsigned TargetSchedModel::computeOperandLatency(
   if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit()
       && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()
       && SchedModel.isComplete()) {
-    std::string Err;
-    raw_string_ostream ss(Err);
-    ss << "DefIdx " << DefIdx << " exceeds machine model writes for "
-       << *DefMI;
-    report_fatal_error(ss.str());
+    errs() << "DefIdx " << DefIdx << " exceeds machine model writes for "
+           << *DefMI;
+    llvm_unreachable("incomplete machine model");
   }
 #endif
   // FIXME: Automatically giving all implicit defs defaultDefLatency is
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 1e30821dc741..c6bae2434586 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -83,21 +83,20 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // The current basic block being processed.
   MachineBasicBlock *MBB;
 
-  // DistanceMap - Keep track the distance of a MI from the start of the
-  // current basic block.
+  // Keep track the distance of a MI from the start of the current basic block.
   DenseMap<MachineInstr*, unsigned> DistanceMap;
 
   // Set of already processed instructions in the current block.
   SmallPtrSet<MachineInstr*, 8> Processed;
 
-  // SrcRegMap - A map from virtual registers to physical registers which are
-  // likely targets to be coalesced to due to copies from physical registers to
-  // virtual registers. e.g. v1024 = move r0.
+  // A map from virtual registers to physical registers which are likely targets
+  // to be coalesced to due to copies from physical registers to virtual
+  // registers. e.g. v1024 = move r0.
   DenseMap<unsigned, unsigned> SrcRegMap;
 
-  // DstRegMap - A map from virtual registers to physical registers which are
-  // likely targets to be coalesced to due to copies to physical registers from
-  // virtual registers. e.g. r1 = move v1024.
+  // A map from virtual registers to physical registers which are likely targets
+  // to be coalesced to due to copies to physical registers from virtual
+  // registers. e.g. r1 = move v1024.
   DenseMap<unsigned, unsigned> DstRegMap;
 
   bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
@@ -110,8 +109,8 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
                              MachineInstr *MI, unsigned Dist);
 
-  bool commuteInstruction(MachineBasicBlock::iterator &mi,
-                          unsigned RegB, unsigned RegC, unsigned Dist);
+  bool commuteInstruction(MachineInstr *MI,
+                          unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
 
   bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
 
@@ -133,6 +132,11 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
                                unsigned SrcIdx, unsigned DstIdx,
                                unsigned Dist, bool shouldOnlyCommute);
 
+  bool tryInstructionCommute(MachineInstr *MI,
+                             unsigned DstOpIdx,
+                             unsigned BaseOpIdx,
+                             bool BaseOpKilled,
+                             unsigned Dist);
   void scanUses(unsigned DstReg);
 
   void processCopy(MachineInstr *MI);
@@ -151,7 +155,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.addPreserved<LiveVariables>();
     AU.addPreserved<SlotIndexes>();
     AU.addPreserved<LiveIntervals>();
@@ -160,7 +164,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  /// runOnMachineFunction - Pass entry point.
+  /// Pass entry point.
   bool runOnMachineFunction(MachineFunction&) override;
 };
 } // end anonymous namespace
@@ -168,7 +172,7 @@ public:
 char TwoAddressInstructionPass::ID = 0;
 INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
                 "Two-Address instruction pass", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
                 "Two-Address instruction pass", false, false)
 
@@ -176,10 +180,9 @@ char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
 
 static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
 
-/// sink3AddrInstruction - A two-address instruction has been converted to a
-/// three-address instruction to avoid clobbering a register. Try to sink it
-/// past the instruction that would kill the above mentioned register to reduce
-/// register pressure.
+/// A two-address instruction has been converted to a three-address instruction
+/// to avoid clobbering a register. Try to sink it past the instruction that
+/// would kill the above mentioned register to reduce register pressure.
 bool TwoAddressInstructionPass::
 sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
                      MachineBasicBlock::iterator OldPos) {
@@ -195,8 +198,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   unsigned DefReg = 0;
   SmallSet<unsigned, 4> UseRegs;
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (!MO.isReg())
       continue;
     unsigned MOReg = MO.getReg();
@@ -231,10 +233,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
     KillMI = LIS->getInstructionFromIndex(I->end);
   }
   if (!KillMI) {
-    for (MachineRegisterInfo::use_nodbg_iterator
-           UI = MRI->use_nodbg_begin(SavedReg),
-           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
-      MachineOperand &UseMO = *UI;
+    for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
       if (!UseMO.isKill())
         continue;
       KillMI = UseMO.getParent();
@@ -312,8 +311,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
   return true;
 }
 
-/// getSingleDef -- return the MachineInstr* if it is the single def of the Reg
-/// in current BB.
+/// Return the MachineInstr* if it is the single def of the Reg in current BB.
 static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
                                   const MachineRegisterInfo *MRI) {
   MachineInstr *Ret = nullptr;
@@ -351,10 +349,10 @@ bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg,
   return false;
 }
 
-/// noUseAfterLastDef - Return true if there are no intervening uses between the
-/// last instruction in the MBB that defines the specified register and the
-/// two-address instruction which is being processed. It also returns the last
-/// def location by reference
+/// Return true if there are no intervening uses between the last instruction
+/// in the MBB that defines the specified register and the two-address
+/// instruction which is being processed. It also returns the last def location
+/// by reference.
 bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
                                                   unsigned &LastDef) {
   LastDef = 0;
@@ -375,9 +373,9 @@ bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist,
   return !(LastUse > LastDef && LastUse < Dist);
 }
 
-/// isCopyToReg - Return true if the specified MI is a copy instruction or
-/// a extract_subreg instruction. It also returns the source and destination
-/// registers and whether they are physical registers by reference.
+/// Return true if the specified MI is a copy instruction or an extract_subreg
+/// instruction. It also returns the source and destination registers and
+/// whether they are physical registers by reference.
 static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
                         unsigned &SrcReg, unsigned &DstReg,
                         bool &IsSrcPhys, bool &IsDstPhys) {
@@ -397,8 +395,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
   return true;
 }
 
-/// isPLainlyKilled - Test if the given register value, which is used by the
-// given instruction, is killed by the given instruction.
+/// Test if the given register value, which is used by the
+/// given instruction, is killed by the given instruction.
 static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
                             LiveIntervals *LIS) {
   if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
@@ -424,7 +422,7 @@ static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
   return MI->killsRegister(Reg);
 }
 
-/// isKilled - Test if the given register value, which is used by the given
+/// Test if the given register value, which is used by the given
 /// instruction, is killed by the given instruction. This looks through
 /// coalescable copies to see if the original value is potentially not killed.
 ///
@@ -472,8 +470,8 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
   }
 }
 
-/// isTwoAddrUse - Return true if the specified MI uses the specified register
-/// as a two-address use. If so, return the destination register by reference.
+/// Return true if the specified MI uses the specified register as a two-address
+/// use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
   for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
@@ -488,8 +486,8 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
   return false;
 }
 
-/// findOnlyInterestingUse - Given a register, if has a single in-basic block
-/// use, return the use instruction if it's a copy or a two-address use.
+/// Given a register, if has a single in-basic block use, return the use
+/// instruction if it's a copy or a two-address use.
 static
 MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
                                      MachineRegisterInfo *MRI,
@@ -516,8 +514,8 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
   return nullptr;
 }
 
-/// getMappedReg - Return the physical register the specified virtual register
-/// might be mapped to.
+/// Return the physical register the specified virtual register might be mapped
+/// to.
 static unsigned
 getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
   while (TargetRegisterInfo::isVirtualRegister(Reg))  {
@@ -531,8 +529,7 @@ getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
   return 0;
 }
 
-/// regsAreCompatible - Return true if the two registers are equal or aliased.
-///
+/// Return true if the two registers are equal or aliased.
 static bool
 regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
   if (RegA == RegB)
@@ -543,8 +540,8 @@ regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
 }
 
 
-/// isProfitableToCommute - Return true if it's potentially profitable to commute
-/// the two-address instruction that's being processed.
+/// Return true if it's potentially profitable to commute the two-address
+/// instruction that's being processed.
 bool
 TwoAddressInstructionPass::
 isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
@@ -642,15 +639,15 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
   return LastDefB && LastDefC && LastDefC > LastDefB;
 }
 
-/// commuteInstruction - Commute a two-address instruction and update the basic
-/// block, distance map, and live variables if needed. Return true if it is
-/// successful.
-bool TwoAddressInstructionPass::
-commuteInstruction(MachineBasicBlock::iterator &mi,
-                   unsigned RegB, unsigned RegC, unsigned Dist) {
-  MachineInstr *MI = mi;
+/// Commute a two-address instruction and update the basic block, distance map,
+/// and live variables if needed. Return true if it is successful.
+bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+                                                   unsigned RegBIdx,
+                                                   unsigned RegCIdx,
+                                                   unsigned Dist) {
+  unsigned RegC = MI->getOperand(RegCIdx).getReg();
   DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
-  MachineInstr *NewMI = TII->commuteInstruction(MI);
+  MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx);
 
   if (NewMI == nullptr) {
     DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
@@ -672,8 +669,8 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
   return true;
 }
 
-/// isProfitableToConv3Addr - Return true if it is profitable to convert the
-/// given 2-address instruction to a 3-address one.
+/// Return true if it is profitable to convert the given 2-address instruction
+/// to a 3-address one.
 bool
 TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   // Look for situations like this:
@@ -689,17 +686,18 @@ TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
   return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
 }
 
-/// convertInstTo3Addr - Convert the specified two-address instruction into a
-/// three address one. Return true if this transformation was successful.
+/// Convert the specified two-address instruction into a three address one.
+/// Return true if this transformation was successful.
 bool
 TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
                                               MachineBasicBlock::iterator &nmi,
                                               unsigned RegA, unsigned RegB,
                                               unsigned Dist) {
   // FIXME: Why does convertToThreeAddress() need an iterator reference?
-  MachineFunction::iterator MFI = MBB;
+  MachineFunction::iterator MFI = MBB->getIterator();
   MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV);
-  assert(MBB == MFI && "convertToThreeAddress changed iterator reference");
+  assert(MBB->getIterator() == MFI &&
+         "convertToThreeAddress changed iterator reference");
   if (!NewMI)
     return false;
 
@@ -730,8 +728,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
   return true;
 }
 
-/// scanUses - Scan forward recursively for only uses, update maps if the use
-/// is a copy or a two-address instruction.
+/// Scan forward recursively for only uses, update maps if the use is a copy or
+/// a two-address instruction.
 void
 TwoAddressInstructionPass::scanUses(unsigned DstReg) {
   SmallVector<unsigned, 4> VirtRegPairs;
@@ -777,8 +775,8 @@ TwoAddressInstructionPass::scanUses(unsigned DstReg) {
   }
 }
 
-/// processCopy - If the specified instruction is not yet processed, process it
-/// if it's a copy. For a copy instruction, we find the physical registers the
+/// If the specified instruction is not yet processed, process it if it's a
+/// copy. For a copy instruction, we find the physical registers the
 /// source and destination registers might be mapped to. These are kept in
 /// point-to maps used to determine future optimizations. e.g.
 /// v1024 = mov r0
@@ -813,9 +811,9 @@ void TwoAddressInstructionPass::processCopy(MachineInstr *MI) {
   return;
 }
 
-/// rescheduleMIBelowKill - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill
-/// instruction in order to eliminate the need for the copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the instruction below the kill instruction in order to
+/// eliminate the need for the copy.
 bool TwoAddressInstructionPass::
 rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
                       MachineBasicBlock::iterator &nmi,
@@ -871,8 +869,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   SmallSet<unsigned, 2> Uses;
   SmallSet<unsigned, 2> Kills;
   SmallSet<unsigned, 2> Defs;
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (!MO.isReg())
       continue;
     unsigned MOReg = MO.getReg();
@@ -914,8 +911,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
         OtherMI->isBranch() || OtherMI->isTerminator())
       // Don't move pass calls, etc.
       return false;
-    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = OtherMI->getOperand(i);
+    for (const MachineOperand &MO : OtherMI->operands()) {
       if (!MO.isReg())
         continue;
       unsigned MOReg = MO.getReg();
@@ -984,8 +980,8 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
   return true;
 }
 
-/// isDefTooClose - Return true if the re-scheduling will put the given
-/// instruction too close to the defs of its register dependencies.
+/// Return true if the re-scheduling will put the given instruction too close
+/// to the defs of its register dependencies.
 bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
                                               MachineInstr *MI) {
   for (MachineInstr &DefMI : MRI->def_instructions(Reg)) {
@@ -1004,10 +1000,9 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
   return false;
 }
 
-/// rescheduleKillAboveMI - If there is one more local instruction that reads
-/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the
-/// current two-address instruction in order to eliminate the need for the
-/// copy.
+/// If there is one more local instruction that reads 'Reg' and it kills 'Reg,
+/// consider moving the kill instruction above the current two-address
+/// instruction in order to eliminate the need for the copy.
 bool TwoAddressInstructionPass::
 rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
                       MachineBasicBlock::iterator &nmi,
@@ -1055,8 +1050,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
   SmallSet<unsigned, 2> Kills;
   SmallSet<unsigned, 2> Defs;
   SmallSet<unsigned, 2> LiveDefs;
-  for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = KillMI->getOperand(i);
+  for (const MachineOperand &MO : KillMI->operands()) {
     if (!MO.isReg())
       continue;
     unsigned MOReg = MO.getReg();
@@ -1094,8 +1088,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
       // Don't move pass calls, etc.
       return false;
     SmallVector<unsigned, 2> OtherDefs;
-    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = OtherMI->getOperand(i);
+    for (const MachineOperand &MO : OtherMI->operands()) {
       if (!MO.isReg())
         continue;
       unsigned MOReg = MO.getReg();
@@ -1155,13 +1148,68 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
   return true;
 }
 
-/// tryInstructionTransform - For the case where an instruction has a single
-/// pair of tied register operands, attempt some transformations that may
-/// either eliminate the tied operands or improve the opportunities for
-/// coalescing away the register copy.  Returns true if no copy needs to be
-/// inserted to untie mi's operands (either because they were untied, or
-/// because mi was rescheduled, and will be visited again later). If the
-/// shouldOnlyCommute flag is true, only instruction commutation is attempted.
+/// Tries to commute the operand 'BaseOpIdx' and some other operand in the
+/// given machine instruction to improve opportunities for coalescing and
+/// elimination of a register to register copy.
+///
+/// 'DstOpIdx' specifies the index of MI def operand.
+/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx'
+/// operand is killed by the given instruction.
+/// The 'Dist' arguments provides the distance of MI from the start of the
+/// current basic block and it is used to determine if it is profitable
+/// to commute operands in the instruction.
+///
+/// Returns true if the transformation happened. Otherwise, returns false.
+bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
+                                                      unsigned DstOpIdx,
+                                                      unsigned BaseOpIdx,
+                                                      bool BaseOpKilled,
+                                                      unsigned Dist) {
+  unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
+  unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+  unsigned OpsNum = MI->getDesc().getNumOperands();
+  unsigned OtherOpIdx = MI->getDesc().getNumDefs();
+  for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
+    // The call of findCommutedOpIndices below only checks if BaseOpIdx
+    // and OtherOpIdx are commutable, it does not really search for
+    // other commutable operands and does not change the values of passed
+    // variables.
+    if (OtherOpIdx == BaseOpIdx ||
+        !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx))
+      continue;
+
+    unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+    bool AggressiveCommute = false;
+
+    // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
+    // operands. This makes the live ranges of DstOp and OtherOp joinable.
+    bool DoCommute =
+        !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false);
+
+    if (!DoCommute &&
+        isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) {
+      DoCommute = true;
+      AggressiveCommute = true;
+    }
+
+    // If it's profitable to commute, try to do so.
+    if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+      ++NumCommuted;
+      if (AggressiveCommute)
+        ++NumAggrCommuted;
+      return true;
+    }
+  }
+  return false;
+}
+
+/// For the case where an instruction has a single pair of tied register
+/// operands, attempt some transformations that may either eliminate the tied
+/// operands or improve the opportunities for coalescing away the register copy.
+/// Returns true if no copy needs to be inserted to untie mi's operands
+/// (either because they were untied, or because mi was rescheduled, and will
+/// be visited again later). If the shouldOnlyCommute flag is true, only
+/// instruction commutation is attempted.
 bool TwoAddressInstructionPass::
 tryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineBasicBlock::iterator &nmi,
@@ -1181,51 +1229,18 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
   if (TargetRegisterInfo::isVirtualRegister(regA))
     scanUses(regA);
 
-  // Check if it is profitable to commute the operands.
-  unsigned SrcOp1, SrcOp2;
-  unsigned regC = 0;
-  unsigned regCIdx = ~0U;
-  bool TryCommute = false;
-  bool AggressiveCommute = false;
-  if (MI.isCommutable() && MI.getNumOperands() >= 3 &&
-      TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) {
-    if (SrcIdx == SrcOp1)
-      regCIdx = SrcOp2;
-    else if (SrcIdx == SrcOp2)
-      regCIdx = SrcOp1;
-
-    if (regCIdx != ~0U) {
-      regC = MI.getOperand(regCIdx).getReg();
-      if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false))
-        // If C dies but B does not, swap the B and C operands.
-        // This makes the live ranges of A and C joinable.
-        TryCommute = true;
-      else if (isProfitableToCommute(regA, regB, regC, &MI, Dist)) {
-        TryCommute = true;
-        AggressiveCommute = true;
-      }
-    }
-  }
+  bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
 
   // If the instruction is convertible to 3 Addr, instead
   // of returning try 3 Addr transformation aggresively and
   // use this variable to check later. Because it might be better.
   // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret`
   // instead of the following code.
-  //   addl	%esi, %edi
-  //   movl	%edi, %eax
+  //   addl     %esi, %edi
+  //   movl     %edi, %eax
   //   ret
-  bool Commuted = false;
-
-  // If it's profitable to commute, try to do so.
-  if (TryCommute && commuteInstruction(mi, regB, regC, Dist)) {
-    Commuted = true;
-    ++NumCommuted;
-    if (AggressiveCommute)
-      ++NumAggrCommuted;
-    if (!MI.isConvertibleTo3Addr())
-      return false;
-  }
+  if (Commuted && !MI.isConvertibleTo3Addr())
+    return false;
 
   if (shouldOnlyCommute)
     return false;
@@ -1237,6 +1252,13 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
     return true;
   }
 
+  // If we commuted, regB may have changed so we should re-sample it to avoid
+  // confusing the three address conversion below.
+  if (Commuted) {
+    regB = MI.getOperand(SrcIdx).getReg();
+    regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
+  }
+
   if (MI.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
@@ -1348,10 +1370,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
 
           SmallVector<unsigned, 4> OrigRegs;
           if (LIS) {
-            for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
-                 MOE = MI.operands_end(); MOI != MOE; ++MOI) {
-              if (MOI->isReg())
-                OrigRegs.push_back(MOI->getReg());
+            for (const MachineOperand &MO : MI.operands()) {
+              if (MO.isReg())
+                OrigRegs.push_back(MO.getReg());
             }
           }
 
@@ -1536,12 +1557,10 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     SrcRegMap[RegA] = RegB;
   }
 
-
   if (AllUsesCopied) {
     if (!IsEarlyClobber) {
       // Replace other (un-tied) uses of regB with LastCopiedReg.
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
+      for (MachineOperand &MO : MI->operands()) {
         if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
             MO.isUse()) {
           if (MO.isKill()) {
@@ -1578,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     // regB is still used in this instruction, but a kill flag was
     // removed from a different tied use of regB, so now we need to add
     // a kill flag to one of the remaining uses of regB.
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI->getOperand(i);
+    for (MachineOperand &MO : MI->operands()) {
       if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
         MO.setIsKill(true);
         break;
@@ -1588,8 +1606,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
   }
 }
 
-/// runOnMachineFunction - Reduce two-address instructions to two operands.
-///
+/// Reduce two-address instructions to two operands.
 bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   const TargetMachine &TM = MF->getTarget();
@@ -1599,7 +1616,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   InstrItins = MF->getSubtarget().getInstrItineraryData();
   LV = getAnalysisIfAvailable<LiveVariables>();
   LIS = getAnalysisIfAvailable<LiveIntervals>();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   OptLevel = TM.getOptLevel();
 
   bool MadeChange = false;
@@ -1614,7 +1631,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
   TiedOperandMap TiedOperands;
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
-    MBB = MBBI;
+    MBB = &*MBBI;
     unsigned Dist = 0;
     DistanceMap.clear();
     SrcRegMap.clear();
@@ -1661,8 +1678,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
           unsigned DstReg = mi->getOperand(DstIdx).getReg();
           if (SrcReg != DstReg &&
               tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
-            // The tied operands have been eliminated or shifted further down the
-            // block to ease elimination. Continue processing with 'nmi'.
+            // The tied operands have been eliminated or shifted further down
+            // the block to ease elimination. Continue processing with 'nmi'.
             TiedOperands.clear();
             mi = nmi;
             continue;
@@ -1671,9 +1688,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
       }
 
       // Now iterate over the information collected above.
-      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
-             OE = TiedOperands.end(); OI != OE; ++OI) {
-        processTiedPairs(mi, OI->second, Dist);
+      for (auto &TO : TiedOperands) {
+        processTiedPairs(mi, TO.second, Dist);
         DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
       }
 
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index d393e103104d..8c9631e435bf 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -71,8 +71,8 @@ bool UnreachableBlockElim::runOnFunction(Function &F) {
   // in them.
   std::vector<BasicBlock*> DeadBlocks;
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    if (!Reachable.count(I)) {
-      BasicBlock *BB = I;
+    if (!Reachable.count(&*I)) {
+      BasicBlock *BB = &*I;
       DeadBlocks.push_back(BB);
       while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
         PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
@@ -131,7 +131,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
   // in them.
   std::vector<MachineBasicBlock*> DeadBlocks;
   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    MachineBasicBlock *BB = I;
+    MachineBasicBlock *BB = &*I;
 
     // Test for deadness.
     if (!Reachable.count(BB)) {
@@ -167,7 +167,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
 
   // Cleanup PHI nodes.
   for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    MachineBasicBlock *BB = I;
+    MachineBasicBlock *BB = &*I;
     // Prune unneeded PHI entries.
     SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
                                              BB->pred_end());
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 2912bdd63426..bf1c0dce9e56 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -163,11 +163,12 @@ class VirtRegRewriter : public MachineFunctionPass {
   SlotIndexes *Indexes;
   LiveIntervals *LIS;
   VirtRegMap *VRM;
-  SparseSet<unsigned> PhysRegs;
 
   void rewrite();
   void addMBBLiveIns();
   bool readsUndefSubreg(const MachineOperand &MO) const;
+  void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+
 public:
   static char ID;
   VirtRegRewriter() : MachineFunctionPass(ID) {}
@@ -237,10 +238,52 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
   return true;
 }
 
+void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
+                                             unsigned PhysReg) const {
+  assert(!LI.empty());
+  assert(LI.hasSubRanges());
+
+  typedef std::pair<const LiveInterval::SubRange *,
+                    LiveInterval::const_iterator> SubRangeIteratorPair;
+  SmallVector<SubRangeIteratorPair, 4> SubRanges;
+  SlotIndex First;
+  SlotIndex Last;
+  for (const LiveInterval::SubRange &SR : LI.subranges()) {
+    SubRanges.push_back(std::make_pair(&SR, SR.begin()));
+    if (!First.isValid() || SR.segments.front().start < First)
+      First = SR.segments.front().start;
+    if (!Last.isValid() || SR.segments.back().end > Last)
+      Last = SR.segments.back().end;
+  }
+
+  // Check all mbb start positions between First and Last while
+  // simulatenously advancing an iterator for each subrange.
+  for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First);
+       MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) {
+    SlotIndex MBBBegin = MBBI->first;
+    // Advance all subrange iterators so that their end position is just
+    // behind MBBBegin (or the iterator is at the end).
+    LaneBitmask LaneMask = 0;
+    for (auto &RangeIterPair : SubRanges) {
+      const LiveInterval::SubRange *SR = RangeIterPair.first;
+      LiveInterval::const_iterator &SRI = RangeIterPair.second;
+      while (SRI != SR->end() && SRI->end <= MBBBegin)
+        ++SRI;
+      if (SRI == SR->end())
+        continue;
+      if (SRI->start <= MBBBegin)
+        LaneMask |= SR->LaneMask;
+    }
+    if (LaneMask == 0)
+      continue;
+    MachineBasicBlock *MBB = MBBI->second;
+    MBB->addLiveIn(PhysReg, LaneMask);
+  }
+}
+
 // Compute MBB live-in lists from virtual register live ranges and their
 // assignments.
 void VirtRegRewriter::addMBBLiveIns() {
-  SmallVector<MachineBasicBlock*, 16> LiveIn;
   for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
     unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
     if (MRI->reg_nodbg_empty(VirtReg))
@@ -254,31 +297,18 @@ void VirtRegRewriter::addMBBLiveIns() {
     assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
 
     if (LI.hasSubRanges()) {
-      for (LiveInterval::SubRange &S : LI.subranges()) {
-        for (const auto &Seg : S.segments) {
-          if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
-            continue;
-          for (MCSubRegIndexIterator SR(PhysReg, TRI); SR.isValid(); ++SR) {
-            unsigned SubReg = SR.getSubReg();
-            unsigned SubRegIndex = SR.getSubRegIndex();
-            unsigned SubRegLaneMask = TRI->getSubRegIndexLaneMask(SubRegIndex);
-            if ((SubRegLaneMask & S.LaneMask) == 0)
-              continue;
-            for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
-              LiveIn[i]->addLiveIn(SubReg);
-            }
-          }
-          LiveIn.clear();
-        }
-      }
+      addLiveInsForSubRanges(LI, PhysReg);
     } else {
-      // Scan the segments of LI.
-      for (const auto &Seg : LI.segments) {
-        if (!Indexes->findLiveInMBBs(Seg.start, Seg.end, LiveIn))
-          continue;
-        for (unsigned i = 0, e = LiveIn.size(); i != e; ++i)
-          LiveIn[i]->addLiveIn(PhysReg);
-        LiveIn.clear();
+      // Go over MBB begin positions and see if we have segments covering them.
+      // The following works because segments and the MBBIndex list are both
+      // sorted by slot indexes.
+      SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin();
+      for (const auto &Seg : LI) {
+        I = Indexes->advanceMBBIndex(I, Seg.start);
+        for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) {
+          MachineBasicBlock *MBB = I->second;
+          MBB->addLiveIn(PhysReg);
+        }
       }
     }
   }
@@ -305,7 +335,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
   assert(LI.liveAt(BaseIndex) &&
          "Reads of completely dead register should be marked undef already");
   unsigned SubRegIdx = MO.getSubReg();
-  unsigned UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
+  LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   // See if any of the relevant subregister liveranges is defined at this point.
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
     if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
@@ -319,54 +349,15 @@ void VirtRegRewriter::rewrite() {
   SmallVector<unsigned, 8> SuperDeads;
   SmallVector<unsigned, 8> SuperDefs;
   SmallVector<unsigned, 8> SuperKills;
-  SmallPtrSet<const MachineInstr *, 4> NoReturnInsts;
-
-  // Here we have a SparseSet to hold which PhysRegs are actually encountered
-  // in the MF we are about to iterate over so that later when we call
-  // setPhysRegUsed, we are only doing it for physRegs that were actually found
-  // in the program and not for all of the possible physRegs for the given
-  // target architecture. If the target has a lot of physRegs, then for a small
-  // program there will be a significant compile time reduction here.
-  PhysRegs.clear();
-  PhysRegs.setUniverse(TRI->getNumRegs());
-
-  // The function with uwtable should guarantee that the stack unwinder
-  // can unwind the stack to the previous frame.  Thus, we can't apply the
-  // noreturn optimization if the caller function has uwtable attribute.
-  bool HasUWTable = MF->getFunction()->hasFnAttribute(Attribute::UWTable);
 
   for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
        MBBI != MBBE; ++MBBI) {
     DEBUG(MBBI->print(dbgs(), Indexes));
-    bool IsExitBB = MBBI->succ_empty();
     for (MachineBasicBlock::instr_iterator
            MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) {
-      MachineInstr *MI = MII;
+      MachineInstr *MI = &*MII;
       ++MII;
 
-      // Check if this instruction is a call to a noreturn function.  If this
-      // is a call to noreturn function and we don't need the stack unwinding
-      // functionality (i.e. this function does not have uwtable attribute and
-      // the callee function has the nounwind attribute), then we can ignore
-      // the definitions set by this instruction.
-      if (!HasUWTable && IsExitBB && MI->isCall()) {
-        for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
-               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
-          MachineOperand &MO = *MOI;
-          if (!MO.isGlobal())
-            continue;
-          const Function *Func = dyn_cast<Function>(MO.getGlobal());
-          if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) ||
-              // We need to keep correct unwind information
-              // even if the function will not return, since the
-              // runtime may need it.
-              !Func->hasFnAttribute(Attribute::NoUnwind))
-            continue;
-          NoReturnInsts.insert(MI);
-          break;
-        }
-      }
-
       for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
            MOE = MI->operands_end(); MOI != MOE; ++MOI) {
         MachineOperand &MO = *MOI;
@@ -375,15 +366,6 @@ void VirtRegRewriter::rewrite() {
         if (MO.isRegMask())
           MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
 
-        // If we encounter a VirtReg or PhysReg then get at the PhysReg and add
-        // it to the physreg bitset.  Later we use only the PhysRegs that were
-        // actually encountered in the MF to populate the MRI's used physregs.
-        if (MO.isReg() && MO.getReg())
-          PhysRegs.insert(
-              TargetRegisterInfo::isVirtualRegister(MO.getReg()) ?
-              VRM->getPhys(MO.getReg()) :
-              MO.getReg());
-
         if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
           continue;
         unsigned VirtReg = MO.getReg();
@@ -418,14 +400,6 @@ void VirtRegRewriter::rewrite() {
                 MO.setIsUndef(true);
             } else if (!MO.isDead()) {
               assert(MO.isDef());
-              // Things get tricky when we ran out of lane mask bits and
-              // merged multiple lanes into the overflow bit: In this case
-              // our subregister liveness tracking isn't precise and we can't
-              // know what subregister parts are undefined, fall back to the
-              // implicit super-register def then.
-              unsigned LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
-              if (TargetRegisterInfo::isImpreciseLaneMask(LaneMask))
-                SuperDefs.push_back(PhysReg);
             }
           }
 
@@ -470,29 +444,5 @@ void VirtRegRewriter::rewrite() {
       }
     }
   }
-
-  // Tell MRI about physical registers in use.
-  if (NoReturnInsts.empty()) {
-    for (SparseSet<unsigned>::iterator
-        RegI = PhysRegs.begin(), E = PhysRegs.end(); RegI != E; ++RegI)
-      if (!MRI->reg_nodbg_empty(*RegI))
-        MRI->setPhysRegUsed(*RegI);
-  } else {
-    for (SparseSet<unsigned>::iterator
-        I = PhysRegs.begin(), E = PhysRegs.end(); I != E; ++I) {
-      unsigned Reg = *I;
-      if (MRI->reg_nodbg_empty(Reg))
-        continue;
-      // Check if this register has a use that will impact the rest of the
-      // code. Uses in debug and noreturn instructions do not impact the
-      // generated code.
-      for (MachineInstr &It : MRI->reg_nodbg_instructions(Reg)) {
-        if (!NoReturnInsts.count(&It)) {
-          MRI->setPhysRegUsed(Reg);
-          break;
-        }
-      }
-    }
-  }
 }
 
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 0d26ed333ca7..52fb922c935a 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -18,66 +18,40 @@
 
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/Analysis/LibCallSemantics.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include <memory>
+#include "llvm/Transforms/Utils/SSAUpdater.h"
 
 using namespace llvm;
-using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "winehprepare"
 
+static cl::opt<bool> DisableDemotion(
+    "disable-demotion", cl::Hidden,
+    cl::desc(
+        "Clone multicolor basic blocks but do not demote cross funclet values"),
+    cl::init(false));
+
+static cl::opt<bool> DisableCleanups(
+    "disable-cleanups", cl::Hidden,
+    cl::desc("Do not remove implausible terminators or other similar cleanups"),
+    cl::init(false));
+
 namespace {
-
-// This map is used to model frame variable usage during outlining, to
-// construct a structure type to hold the frame variables in a frame
-// allocation block, and to remap the frame variable allocas (including
-// spill locations as needed) to GEPs that get the variable from the
-// frame allocation structure.
-typedef MapVector<Value *, TinyPtrVector<AllocaInst *>> FrameVarInfoMap;
-
-// TinyPtrVector cannot hold nullptr, so we need our own sentinel that isn't
-// quite null.
-AllocaInst *getCatchObjectSentinel() {
-  return static_cast<AllocaInst *>(nullptr) + 1;
-}
-
-typedef SmallSet<BasicBlock *, 4> VisitedBlockSet;
-
-class LandingPadActions;
-class LandingPadMap;
-
-typedef DenseMap<const BasicBlock *, CatchHandler *> CatchHandlerMapTy;
-typedef DenseMap<const BasicBlock *, CleanupHandler *> CleanupHandlerMapTy;
-
+  
 class WinEHPrepare : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
-  WinEHPrepare(const TargetMachine *TM = nullptr)
-      : FunctionPass(ID) {
-    if (TM)
-      TheTriple = TM->getTargetTriple();
-  }
+  WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {}
 
   bool runOnFunction(Function &Fn) override;
 
@@ -90,264 +64,27 @@ public:
   }
 
 private:
-  bool prepareExceptionHandlers(Function &F,
-                                SmallVectorImpl<LandingPadInst *> &LPads);
-  void identifyEHBlocks(Function &F, SmallVectorImpl<LandingPadInst *> &LPads);
-  void promoteLandingPadValues(LandingPadInst *LPad);
-  void demoteValuesLiveAcrossHandlers(Function &F,
-                                      SmallVectorImpl<LandingPadInst *> &LPads);
-  void findSEHEHReturnPoints(Function &F,
-                             SetVector<BasicBlock *> &EHReturnBlocks);
-  void findCXXEHReturnPoints(Function &F,
-                             SetVector<BasicBlock *> &EHReturnBlocks);
-  void getPossibleReturnTargets(Function *ParentF, Function *HandlerF,
-                                SetVector<BasicBlock*> &Targets);
-  void completeNestedLandingPad(Function *ParentFn,
-                                LandingPadInst *OutlinedLPad,
-                                const LandingPadInst *OriginalLPad,
-                                FrameVarInfoMap &VarInfo);
-  Function *createHandlerFunc(Function *ParentFn, Type *RetTy,
-                              const Twine &Name, Module *M, Value *&ParentFP);
-  bool outlineHandler(ActionHandler *Action, Function *SrcFn,
-                      LandingPadInst *LPad, BasicBlock *StartBB,
-                      FrameVarInfoMap &VarInfo);
-  void addStubInvokeToHandlerIfNeeded(Function *Handler);
+  void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot);
+  void
+  insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+                 SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist);
+  AllocaInst *insertPHILoads(PHINode *PN, Function &F);
+  void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+                          DenseMap<BasicBlock *, Value *> &Loads, Function &F);
+  bool prepareExplicitEH(Function &F);
+  void colorFunclets(Function &F);
 
-  void mapLandingPadBlocks(LandingPadInst *LPad, LandingPadActions &Actions);
-  CatchHandler *findCatchHandler(BasicBlock *BB, BasicBlock *&NextBB,
-                                 VisitedBlockSet &VisitedBlocks);
-  void findCleanupHandlers(LandingPadActions &Actions, BasicBlock *StartBB,
-                           BasicBlock *EndBB);
-
-  void processSEHCatchHandler(CatchHandler *Handler, BasicBlock *StartBB);
-
-  Triple TheTriple;
+  void demotePHIsOnFunclets(Function &F);
+  void cloneCommonBlocks(Function &F);
+  void removeImplausibleInstructions(Function &F);
+  void cleanupPreparedFunclets(Function &F);
+  void verifyPreparedFunclets(Function &F);
 
   // All fields are reset by runOnFunction.
-  DominatorTree *DT = nullptr;
-  const TargetLibraryInfo *LibInfo = nullptr;
   EHPersonality Personality = EHPersonality::Unknown;
-  CatchHandlerMapTy CatchHandlerMap;
-  CleanupHandlerMapTy CleanupHandlerMap;
-  DenseMap<const LandingPadInst *, LandingPadMap> LPadMaps;
-  SmallPtrSet<BasicBlock *, 4> NormalBlocks;
-  SmallPtrSet<BasicBlock *, 4> EHBlocks;
-  SetVector<BasicBlock *> EHReturnBlocks;
 
-  // This maps landing pad instructions found in outlined handlers to
-  // the landing pad instruction in the parent function from which they
-  // were cloned.  The cloned/nested landing pad is used as the key
-  // because the landing pad may be cloned into multiple handlers.
-  // This map will be used to add the llvm.eh.actions call to the nested
-  // landing pads after all handlers have been outlined.
-  DenseMap<LandingPadInst *, const LandingPadInst *> NestedLPtoOriginalLP;
-
-  // This maps blocks in the parent function which are destinations of
-  // catch handlers to cloned blocks in (other) outlined handlers. This
-  // handles the case where a nested landing pads has a catch handler that
-  // returns to a handler function rather than the parent function.
-  // The original block is used as the key here because there should only
-  // ever be one handler function from which the cloned block is not pruned.
-  // The original block will be pruned from the parent function after all
-  // handlers have been outlined.  This map will be used to adjust the
-  // return instructions of handlers which return to the block that was
-  // outlined into a handler.  This is done after all handlers have been
-  // outlined but before the outlined code is pruned from the parent function.
-  DenseMap<const BasicBlock *, BasicBlock *> LPadTargetBlocks;
-
-  // Map from outlined handler to call to parent local address. Only used for
-  // 32-bit EH.
-  DenseMap<Function *, Value *> HandlerToParentFP;
-
-  AllocaInst *SEHExceptionCodeSlot = nullptr;
-};
-
-class WinEHFrameVariableMaterializer : public ValueMaterializer {
-public:
-  WinEHFrameVariableMaterializer(Function *OutlinedFn, Value *ParentFP,
-                                 FrameVarInfoMap &FrameVarInfo);
-  ~WinEHFrameVariableMaterializer() override {}
-
-  Value *materializeValueFor(Value *V) override;
-
-  void escapeCatchObject(Value *V);
-
-private:
-  FrameVarInfoMap &FrameVarInfo;
-  IRBuilder<> Builder;
-};
-
-class LandingPadMap {
-public:
-  LandingPadMap() : OriginLPad(nullptr) {}
-  void mapLandingPad(const LandingPadInst *LPad);
-
-  bool isInitialized() { return OriginLPad != nullptr; }
-
-  bool isOriginLandingPadBlock(const BasicBlock *BB) const;
-  bool isLandingPadSpecificInst(const Instruction *Inst) const;
-
-  void remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
-                     Value *SelectorValue) const;
-
-private:
-  const LandingPadInst *OriginLPad;
-  // We will normally only see one of each of these instructions, but
-  // if more than one occurs for some reason we can handle that.
-  TinyPtrVector<const ExtractValueInst *> ExtractedEHPtrs;
-  TinyPtrVector<const ExtractValueInst *> ExtractedSelectors;
-};
-
-class WinEHCloningDirectorBase : public CloningDirector {
-public:
-  WinEHCloningDirectorBase(Function *HandlerFn, Value *ParentFP,
-                           FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
-      : Materializer(HandlerFn, ParentFP, VarInfo),
-        SelectorIDType(Type::getInt32Ty(HandlerFn->getContext())),
-        Int8PtrType(Type::getInt8PtrTy(HandlerFn->getContext())),
-        LPadMap(LPadMap), ParentFP(ParentFP) {}
-
-  CloningAction handleInstruction(ValueToValueMapTy &VMap,
-                                  const Instruction *Inst,
-                                  BasicBlock *NewBB) override;
-
-  virtual CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
-                                         const Instruction *Inst,
-                                         BasicBlock *NewBB) = 0;
-  virtual CloningAction handleEndCatch(ValueToValueMapTy &VMap,
-                                       const Instruction *Inst,
-                                       BasicBlock *NewBB) = 0;
-  virtual CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
-                                        const Instruction *Inst,
-                                        BasicBlock *NewBB) = 0;
-  virtual CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
-                                         const IndirectBrInst *IBr,
-                                         BasicBlock *NewBB) = 0;
-  virtual CloningAction handleInvoke(ValueToValueMapTy &VMap,
-                                     const InvokeInst *Invoke,
-                                     BasicBlock *NewBB) = 0;
-  virtual CloningAction handleResume(ValueToValueMapTy &VMap,
-                                     const ResumeInst *Resume,
-                                     BasicBlock *NewBB) = 0;
-  virtual CloningAction handleCompare(ValueToValueMapTy &VMap,
-                                      const CmpInst *Compare,
-                                      BasicBlock *NewBB) = 0;
-  virtual CloningAction handleLandingPad(ValueToValueMapTy &VMap,
-                                         const LandingPadInst *LPad,
-                                         BasicBlock *NewBB) = 0;
-
-  ValueMaterializer *getValueMaterializer() override { return &Materializer; }
-
-protected:
-  WinEHFrameVariableMaterializer Materializer;
-  Type *SelectorIDType;
-  Type *Int8PtrType;
-  LandingPadMap &LPadMap;
-
-  /// The value representing the parent frame pointer.
-  Value *ParentFP;
-};
-
-class WinEHCatchDirector : public WinEHCloningDirectorBase {
-public:
-  WinEHCatchDirector(
-      Function *CatchFn, Value *ParentFP, Value *Selector,
-      FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap,
-      DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPads,
-      DominatorTree *DT, SmallPtrSetImpl<BasicBlock *> &EHBlocks)
-      : WinEHCloningDirectorBase(CatchFn, ParentFP, VarInfo, LPadMap),
-        CurrentSelector(Selector->stripPointerCasts()),
-        ExceptionObjectVar(nullptr), NestedLPtoOriginalLP(NestedLPads),
-        DT(DT), EHBlocks(EHBlocks) {}
-
-  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
-                                 const Instruction *Inst,
-                                 BasicBlock *NewBB) override;
-  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
-                               BasicBlock *NewBB) override;
-  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
-                                const Instruction *Inst,
-                                BasicBlock *NewBB) override;
-  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
-                                 const IndirectBrInst *IBr,
-                                 BasicBlock *NewBB) override;
-  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
-                             BasicBlock *NewBB) override;
-  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
-                             BasicBlock *NewBB) override;
-  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
-                              BasicBlock *NewBB) override;
-  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
-                                 const LandingPadInst *LPad,
-                                 BasicBlock *NewBB) override;
-
-  Value *getExceptionVar() { return ExceptionObjectVar; }
-  TinyPtrVector<BasicBlock *> &getReturnTargets() { return ReturnTargets; }
-
-private:
-  Value *CurrentSelector;
-
-  Value *ExceptionObjectVar;
-  TinyPtrVector<BasicBlock *> ReturnTargets;
-
-  // This will be a reference to the field of the same name in the WinEHPrepare
-  // object which instantiates this WinEHCatchDirector object.
-  DenseMap<LandingPadInst *, const LandingPadInst *> &NestedLPtoOriginalLP;
-  DominatorTree *DT;
-  SmallPtrSetImpl<BasicBlock *> &EHBlocks;
-};
-
-class WinEHCleanupDirector : public WinEHCloningDirectorBase {
-public:
-  WinEHCleanupDirector(Function *CleanupFn, Value *ParentFP,
-                       FrameVarInfoMap &VarInfo, LandingPadMap &LPadMap)
-      : WinEHCloningDirectorBase(CleanupFn, ParentFP, VarInfo,
-                                 LPadMap) {}
-
-  CloningAction handleBeginCatch(ValueToValueMapTy &VMap,
-                                 const Instruction *Inst,
-                                 BasicBlock *NewBB) override;
-  CloningAction handleEndCatch(ValueToValueMapTy &VMap, const Instruction *Inst,
-                               BasicBlock *NewBB) override;
-  CloningAction handleTypeIdFor(ValueToValueMapTy &VMap,
-                                const Instruction *Inst,
-                                BasicBlock *NewBB) override;
-  CloningAction handleIndirectBr(ValueToValueMapTy &VMap,
-                                 const IndirectBrInst *IBr,
-                                 BasicBlock *NewBB) override;
-  CloningAction handleInvoke(ValueToValueMapTy &VMap, const InvokeInst *Invoke,
-                             BasicBlock *NewBB) override;
-  CloningAction handleResume(ValueToValueMapTy &VMap, const ResumeInst *Resume,
-                             BasicBlock *NewBB) override;
-  CloningAction handleCompare(ValueToValueMapTy &VMap, const CmpInst *Compare,
-                              BasicBlock *NewBB) override;
-  CloningAction handleLandingPad(ValueToValueMapTy &VMap,
-                                 const LandingPadInst *LPad,
-                                 BasicBlock *NewBB) override;
-};
-
-class LandingPadActions {
-public:
-  LandingPadActions() : HasCleanupHandlers(false) {}
-
-  void insertCatchHandler(CatchHandler *Action) { Actions.push_back(Action); }
-  void insertCleanupHandler(CleanupHandler *Action) {
-    Actions.push_back(Action);
-    HasCleanupHandlers = true;
-  }
-
-  bool includesCleanup() const { return HasCleanupHandlers; }
-
-  SmallVectorImpl<ActionHandler *> &actions() { return Actions; }
-  SmallVectorImpl<ActionHandler *>::iterator begin() { return Actions.begin(); }
-  SmallVectorImpl<ActionHandler *>::iterator end() { return Actions.end(); }
-
-private:
-  // Note that this class does not own the ActionHandler objects in this vector.
-  // The ActionHandlers are owned by the CatchHandlerMap and CleanupHandlerMap
-  // in the WinEHPrepare class.
-  SmallVector<ActionHandler *, 4> Actions;
-  bool HasCleanupHandlers;
+  DenseMap<BasicBlock *, ColorVector> BlockColors;
+  MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks;
 };
 
 } // end anonymous namespace
@@ -361,2536 +98,987 @@ FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) {
 }
 
 bool WinEHPrepare::runOnFunction(Function &Fn) {
-  // No need to prepare outlined handlers.
-  if (Fn.hasFnAttribute("wineh-parent"))
-    return false;
-
-  SmallVector<LandingPadInst *, 4> LPads;
-  SmallVector<ResumeInst *, 4> Resumes;
-  for (BasicBlock &BB : Fn) {
-    if (auto *LP = BB.getLandingPadInst())
-      LPads.push_back(LP);
-    if (auto *Resume = dyn_cast<ResumeInst>(BB.getTerminator()))
-      Resumes.push_back(Resume);
-  }
-
-  // No need to prepare functions that lack landing pads.
-  if (LPads.empty())
+  if (!Fn.hasPersonalityFn())
     return false;
 
   // Classify the personality to see what kind of preparation we need.
   Personality = classifyEHPersonality(Fn.getPersonalityFn());
 
-  // Do nothing if this is not an MSVC personality.
-  if (!isMSVCEHPersonality(Personality))
+  // Do nothing if this is not a funclet-based personality.
+  if (!isFuncletEHPersonality(Personality))
     return false;
 
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-  // If there were any landing pads, prepareExceptionHandlers will make changes.
-  prepareExceptionHandlers(Fn, LPads);
-  return true;
+  return prepareExplicitEH(Fn);
 }
 
 bool WinEHPrepare::doFinalization(Module &M) { return false; }
 
-void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<DominatorTreeWrapperPass>();
-  AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
+void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {}
 
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
-                               Constant *&Selector, BasicBlock *&NextBB);
-
-// Finds blocks reachable from the starting set Worklist. Does not follow unwind
-// edges or blocks listed in StopPoints.
-static void findReachableBlocks(SmallPtrSetImpl<BasicBlock *> &ReachableBBs,
-                                SetVector<BasicBlock *> &Worklist,
-                                const SetVector<BasicBlock *> *StopPoints) {
-  while (!Worklist.empty()) {
-    BasicBlock *BB = Worklist.pop_back_val();
-
-    // Don't cross blocks that we should stop at.
-    if (StopPoints && StopPoints->count(BB))
-      continue;
-
-    if (!ReachableBBs.insert(BB).second)
-      continue; // Already visited.
-
-    // Don't follow unwind edges of invokes.
-    if (auto *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
-      Worklist.insert(II->getNormalDest());
-      continue;
-    }
-
-    // Otherwise, follow all successors.
-    Worklist.insert(succ_begin(BB), succ_end(BB));
-  }
-}
-
-// Attempt to find an instruction where a block can be split before
-// a call to llvm.eh.begincatch and its operands.  If the block
-// begins with the begincatch call or one of its adjacent operands
-// the block will not be split.
-static Instruction *findBeginCatchSplitPoint(BasicBlock *BB,
-                                             IntrinsicInst *II) {
-  // If the begincatch call is already the first instruction in the block,
-  // don't split.
-  Instruction *FirstNonPHI = BB->getFirstNonPHI();
-  if (II == FirstNonPHI)
-    return nullptr;
-
-  // If either operand is in the same basic block as the instruction and
-  // isn't used by another instruction before the begincatch call, include it
-  // in the split block.
-  auto *Op0 = dyn_cast<Instruction>(II->getOperand(0));
-  auto *Op1 = dyn_cast<Instruction>(II->getOperand(1));
-
-  Instruction *I = II->getPrevNode();
-  Instruction *LastI = II;
-
-  while (I == Op0 || I == Op1) {
-    // If the block begins with one of the operands and there are no other
-    // instructions between the operand and the begincatch call, don't split.
-    if (I == FirstNonPHI)
-      return nullptr;
-
-    LastI = I;
-    I = I->getPrevNode();
-  }
-
-  // If there is at least one instruction in the block before the begincatch
-  // call and its operands, split the block at either the begincatch or
-  // its operand.
-  return LastI;
-}
-
-/// Find all points where exceptional control rejoins normal control flow via
-/// llvm.eh.endcatch. Add them to the normal bb reachability worklist.
-void WinEHPrepare::findCXXEHReturnPoints(
-    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
-  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = BBI;
-    for (Instruction &I : *BB) {
-      if (match(&I, m_Intrinsic<Intrinsic::eh_begincatch>())) {
-        Instruction *SplitPt =
-            findBeginCatchSplitPoint(BB, cast<IntrinsicInst>(&I));
-        if (SplitPt) {
-          // Split the block before the llvm.eh.begincatch call to allow
-          // cleanup and catch code to be distinguished later.
-          // Do not update BBI because we still need to process the
-          // portion of the block that we are splitting off.
-          SplitBlock(BB, SplitPt, DT);
-          break;
-        }
-      }
-      if (match(&I, m_Intrinsic<Intrinsic::eh_endcatch>())) {
-        // Split the block after the call to llvm.eh.endcatch if there is
-        // anything other than an unconditional branch, or if the successor
-        // starts with a phi.
-        auto *Br = dyn_cast<BranchInst>(I.getNextNode());
-        if (!Br || !Br->isUnconditional() ||
-            isa<PHINode>(Br->getSuccessor(0)->begin())) {
-          DEBUG(dbgs() << "splitting block " << BB->getName()
-                       << " with llvm.eh.endcatch\n");
-          BBI = SplitBlock(BB, I.getNextNode(), DT);
-        }
-        // The next BB is normal control flow.
-        EHReturnBlocks.insert(BB->getTerminator()->getSuccessor(0));
-        break;
-      }
-    }
-  }
-}
-
-static bool isCatchAllLandingPad(const BasicBlock *BB) {
-  const LandingPadInst *LP = BB->getLandingPadInst();
-  if (!LP)
-    return false;
-  unsigned N = LP->getNumClauses();
-  return (N > 0 && LP->isCatch(N - 1) &&
-          isa<ConstantPointerNull>(LP->getClause(N - 1)));
-}
-
-/// Find all points where exceptions control rejoins normal control flow via
-/// selector dispatch.
-void WinEHPrepare::findSEHEHReturnPoints(
-    Function &F, SetVector<BasicBlock *> &EHReturnBlocks) {
-  for (auto BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = BBI;
-    // If the landingpad is a catch-all, treat the whole lpad as if it is
-    // reachable from normal control flow.
-    // FIXME: This is imprecise. We need a better way of identifying where a
-    // catch-all starts and cleanups stop. As far as LLVM is concerned, there
-    // is no difference.
-    if (isCatchAllLandingPad(BB)) {
-      EHReturnBlocks.insert(BB);
-      continue;
-    }
-
-    BasicBlock *CatchHandler;
-    BasicBlock *NextBB;
-    Constant *Selector;
-    if (isSelectorDispatch(BB, CatchHandler, Selector, NextBB)) {
-      // Split the edge if there are multiple predecessors. This creates a place
-      // where we can insert EH recovery code.
-      if (!CatchHandler->getSinglePredecessor()) {
-        DEBUG(dbgs() << "splitting EH return edge from " << BB->getName()
-                     << " to " << CatchHandler->getName() << '\n');
-        BBI = CatchHandler = SplitCriticalEdge(
-            BB, std::find(succ_begin(BB), succ_end(BB), CatchHandler));
-      }
-      EHReturnBlocks.insert(CatchHandler);
-    }
-  }
-}
-
-void WinEHPrepare::identifyEHBlocks(Function &F, 
-                                    SmallVectorImpl<LandingPadInst *> &LPads) {
-  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
-               << F.getName() << '\n');
-
-  // Build a set of all non-exceptional blocks and exceptional blocks.
-  // - Non-exceptional blocks are blocks reachable from the entry block while
-  //   not following invoke unwind edges.
-  // - Exceptional blocks are blocks reachable from landingpads. Analysis does
-  //   not follow llvm.eh.endcatch blocks, which mark a transition from
-  //   exceptional to normal control.
-
-  if (Personality == EHPersonality::MSVC_CXX)
-    findCXXEHReturnPoints(F, EHReturnBlocks);
-  else
-    findSEHEHReturnPoints(F, EHReturnBlocks);
-
-  DEBUG({
-    dbgs() << "identified the following blocks as EH return points:\n";
-    for (BasicBlock *BB : EHReturnBlocks)
-      dbgs() << "  " << BB->getName() << '\n';
-  });
-
-// Join points should not have phis at this point, unless they are a
-// landingpad, in which case we will demote their phis later.
-#ifndef NDEBUG
-  for (BasicBlock *BB : EHReturnBlocks)
-    assert((BB->isLandingPad() || !isa<PHINode>(BB->begin())) &&
-           "non-lpad EH return block has phi");
-#endif
-
-  // Normal blocks are the blocks reachable from the entry block and all EH
-  // return points.
-  SetVector<BasicBlock *> Worklist;
-  Worklist = EHReturnBlocks;
-  Worklist.insert(&F.getEntryBlock());
-  findReachableBlocks(NormalBlocks, Worklist, nullptr);
-  DEBUG({
-    dbgs() << "marked the following blocks as normal:\n";
-    for (BasicBlock *BB : NormalBlocks)
-      dbgs() << "  " << BB->getName() << '\n';
-  });
-
-  // Exceptional blocks are the blocks reachable from landingpads that don't
-  // cross EH return points.
-  Worklist.clear();
-  for (auto *LPI : LPads)
-    Worklist.insert(LPI->getParent());
-  findReachableBlocks(EHBlocks, Worklist, &EHReturnBlocks);
-  DEBUG({
-    dbgs() << "marked the following blocks as exceptional:\n";
-    for (BasicBlock *BB : EHBlocks)
-      dbgs() << "  " << BB->getName() << '\n';
-  });
-
-}
-
-/// Ensure that all values live into and out of exception handlers are stored
-/// in memory.
-/// FIXME: This falls down when values are defined in one handler and live into
-/// another handler. For example, a cleanup defines a value used only by a
-/// catch handler.
-void WinEHPrepare::demoteValuesLiveAcrossHandlers(
-    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
-  DEBUG(dbgs() << "Demoting values live across exception handlers in function "
-               << F.getName() << '\n');
-
-  // identifyEHBlocks() should have been called before this function.
-  assert(!NormalBlocks.empty());
-
-  // Try to avoid demoting EH pointer and selector values. They get in the way
-  // of our pattern matching.
-  SmallPtrSet<Instruction *, 10> EHVals;
-  for (BasicBlock &BB : F) {
-    LandingPadInst *LP = BB.getLandingPadInst();
-    if (!LP)
-      continue;
-    EHVals.insert(LP);
-    for (User *U : LP->users()) {
-      auto *EI = dyn_cast<ExtractValueInst>(U);
-      if (!EI)
-        continue;
-      EHVals.insert(EI);
-      for (User *U2 : EI->users()) {
-        if (auto *PN = dyn_cast<PHINode>(U2))
-          EHVals.insert(PN);
-      }
-    }
-  }
-
-  SetVector<Argument *> ArgsToDemote;
-  SetVector<Instruction *> InstrsToDemote;
-  for (BasicBlock &BB : F) {
-    bool IsNormalBB = NormalBlocks.count(&BB);
-    bool IsEHBB = EHBlocks.count(&BB);
-    if (!IsNormalBB && !IsEHBB)
-      continue; // Blocks that are neither normal nor EH are unreachable.
-    for (Instruction &I : BB) {
-      for (Value *Op : I.operands()) {
-        // Don't demote static allocas, constants, and labels.
-        if (isa<Constant>(Op) || isa<BasicBlock>(Op) || isa<InlineAsm>(Op))
-          continue;
-        auto *AI = dyn_cast<AllocaInst>(Op);
-        if (AI && AI->isStaticAlloca())
-          continue;
-
-        if (auto *Arg = dyn_cast<Argument>(Op)) {
-          if (IsEHBB) {
-            DEBUG(dbgs() << "Demoting argument " << *Arg
-                         << " used by EH instr: " << I << "\n");
-            ArgsToDemote.insert(Arg);
-          }
-          continue;
-        }
-
-        // Don't demote EH values.
-        auto *OpI = cast<Instruction>(Op);
-        if (EHVals.count(OpI))
-          continue;
-
-        BasicBlock *OpBB = OpI->getParent();
-        // If a value is produced and consumed in the same BB, we don't need to
-        // demote it.
-        if (OpBB == &BB)
-          continue;
-        bool IsOpNormalBB = NormalBlocks.count(OpBB);
-        bool IsOpEHBB = EHBlocks.count(OpBB);
-        if (IsNormalBB != IsOpNormalBB || IsEHBB != IsOpEHBB) {
-          DEBUG({
-            dbgs() << "Demoting instruction live in-out from EH:\n";
-            dbgs() << "Instr: " << *OpI << '\n';
-            dbgs() << "User: " << I << '\n';
-          });
-          InstrsToDemote.insert(OpI);
-        }
-      }
-    }
-  }
-
-  // Demote values live into and out of handlers.
-  // FIXME: This demotion is inefficient. We should insert spills at the point
-  // of definition, insert one reload in each handler that uses the value, and
-  // insert reloads in the BB used to rejoin normal control flow.
-  Instruction *AllocaInsertPt = F.getEntryBlock().getFirstInsertionPt();
-  for (Instruction *I : InstrsToDemote)
-    DemoteRegToStack(*I, false, AllocaInsertPt);
-
-  // Demote arguments separately, and only for uses in EH blocks.
-  for (Argument *Arg : ArgsToDemote) {
-    auto *Slot = new AllocaInst(Arg->getType(), nullptr,
-                                Arg->getName() + ".reg2mem", AllocaInsertPt);
-    SmallVector<User *, 4> Users(Arg->user_begin(), Arg->user_end());
-    for (User *U : Users) {
-      auto *I = dyn_cast<Instruction>(U);
-      if (I && EHBlocks.count(I->getParent())) {
-        auto *Reload = new LoadInst(Slot, Arg->getName() + ".reload", false, I);
-        U->replaceUsesOfWith(Arg, Reload);
-      }
-    }
-    new StoreInst(Arg, Slot, AllocaInsertPt);
-  }
-
-  // Demote landingpad phis, as the landingpad will be removed from the machine
-  // CFG.
-  for (LandingPadInst *LPI : LPads) {
-    BasicBlock *BB = LPI->getParent();
-    while (auto *Phi = dyn_cast<PHINode>(BB->begin()))
-      DemotePHIToStack(Phi, AllocaInsertPt);
-  }
-
-  DEBUG(dbgs() << "Demoted " << InstrsToDemote.size() << " instructions and "
-               << ArgsToDemote.size() << " arguments for WinEHPrepare\n\n");
-}
-
-bool WinEHPrepare::prepareExceptionHandlers(
-    Function &F, SmallVectorImpl<LandingPadInst *> &LPads) {
-  // Don't run on functions that are already prepared.
-  for (LandingPadInst *LPad : LPads) {
-    BasicBlock *LPadBB = LPad->getParent();
-    for (Instruction &Inst : *LPadBB)
-      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>()))
-        return false;
-  }
-
-  identifyEHBlocks(F, LPads);
-  demoteValuesLiveAcrossHandlers(F, LPads);
-
-  // These containers are used to re-map frame variables that are used in
-  // outlined catch and cleanup handlers.  They will be populated as the
-  // handlers are outlined.
-  FrameVarInfoMap FrameVarInfo;
-
-  bool HandlersOutlined = false;
-
-  Module *M = F.getParent();
-  LLVMContext &Context = M->getContext();
-
-  // Create a new function to receive the handler contents.
-  PointerType *Int8PtrType = Type::getInt8PtrTy(Context);
-  Type *Int32Type = Type::getInt32Ty(Context);
-  Function *ActionIntrin = Intrinsic::getDeclaration(M, Intrinsic::eh_actions);
-
-  if (isAsynchronousEHPersonality(Personality)) {
-    // FIXME: Switch the ehptr type to i32 and then switch this.
-    SEHExceptionCodeSlot =
-        new AllocaInst(Int8PtrType, nullptr, "seh_exception_code",
-                       F.getEntryBlock().getFirstInsertionPt());
-  }
-
-  // In order to handle the case where one outlined catch handler returns
-  // to a block within another outlined catch handler that would otherwise
-  // be unreachable, we need to outline the nested landing pad before we
-  // outline the landing pad which encloses it.
-  if (!isAsynchronousEHPersonality(Personality))
-    std::sort(LPads.begin(), LPads.end(),
-              [this](LandingPadInst *const &L, LandingPadInst *const &R) {
-                return DT->properlyDominates(R->getParent(), L->getParent());
-              });
-
-  // This container stores the llvm.eh.recover and IndirectBr instructions
-  // that make up the body of each landing pad after it has been outlined.
-  // We need to defer the population of the target list for the indirectbr
-  // until all landing pads have been outlined so that we can handle the
-  // case of blocks in the target that are reached only from nested
-  // landing pads.
-  SmallVector<std::pair<CallInst*, IndirectBrInst *>, 4> LPadImpls;
-
-  for (LandingPadInst *LPad : LPads) {
-    // Look for evidence that this landingpad has already been processed.
-    bool LPadHasActionList = false;
-    BasicBlock *LPadBB = LPad->getParent();
-    for (Instruction &Inst : *LPadBB) {
-      if (match(&Inst, m_Intrinsic<Intrinsic::eh_actions>())) {
-        LPadHasActionList = true;
-        break;
-      }
-    }
-
-    // If we've already outlined the handlers for this landingpad,
-    // there's nothing more to do here.
-    if (LPadHasActionList)
-      continue;
-
-    // If either of the values in the aggregate returned by the landing pad is
-    // extracted and stored to memory, promote the stored value to a register.
-    promoteLandingPadValues(LPad);
-
-    LandingPadActions Actions;
-    mapLandingPadBlocks(LPad, Actions);
-
-    HandlersOutlined |= !Actions.actions().empty();
-    for (ActionHandler *Action : Actions) {
-      if (Action->hasBeenProcessed())
-        continue;
-      BasicBlock *StartBB = Action->getStartBlock();
-
-      // SEH doesn't do any outlining for catches. Instead, pass the handler
-      // basic block addr to llvm.eh.actions and list the block as a return
-      // target.
-      if (isAsynchronousEHPersonality(Personality)) {
-        if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
-          processSEHCatchHandler(CatchAction, StartBB);
-          continue;
-        }
-      }
-
-      outlineHandler(Action, &F, LPad, StartBB, FrameVarInfo);
-    }
-
-    // Split the block after the landingpad instruction so that it is just a
-    // call to llvm.eh.actions followed by indirectbr.
-    assert(!isa<PHINode>(LPadBB->begin()) && "lpad phi not removed");
-    SplitBlock(LPadBB, LPad->getNextNode(), DT);
-    // Erase the branch inserted by the split so we can insert indirectbr.
-    LPadBB->getTerminator()->eraseFromParent();
-
-    // Replace all extracted values with undef and ultimately replace the
-    // landingpad with undef.
-    SmallVector<Instruction *, 4> SEHCodeUses;
-    SmallVector<Instruction *, 4> EHUndefs;
-    for (User *U : LPad->users()) {
-      auto *E = dyn_cast<ExtractValueInst>(U);
-      if (!E)
-        continue;
-      assert(E->getNumIndices() == 1 &&
-             "Unexpected operation: extracting both landing pad values");
-      unsigned Idx = *E->idx_begin();
-      assert((Idx == 0 || Idx == 1) && "unexpected index");
-      if (Idx == 0 && isAsynchronousEHPersonality(Personality))
-        SEHCodeUses.push_back(E);
-      else
-        EHUndefs.push_back(E);
-    }
-    for (Instruction *E : EHUndefs) {
-      E->replaceAllUsesWith(UndefValue::get(E->getType()));
-      E->eraseFromParent();
-    }
-    LPad->replaceAllUsesWith(UndefValue::get(LPad->getType()));
-
-    // Rewrite uses of the exception pointer to loads of an alloca.
-    while (!SEHCodeUses.empty()) {
-      Instruction *E = SEHCodeUses.pop_back_val();
-      SmallVector<Use *, 4> Uses;
-      for (Use &U : E->uses())
-        Uses.push_back(&U);
-      for (Use *U : Uses) {
-        auto *I = cast<Instruction>(U->getUser());
-        if (isa<ResumeInst>(I))
-          continue;
-        if (auto *Phi = dyn_cast<PHINode>(I))
-          SEHCodeUses.push_back(Phi);
-        else
-          U->set(new LoadInst(SEHExceptionCodeSlot, "sehcode", false, I));
-      }
-      E->replaceAllUsesWith(UndefValue::get(E->getType()));
-      E->eraseFromParent();
-    }
-
-    // Add a call to describe the actions for this landing pad.
-    std::vector<Value *> ActionArgs;
-    for (ActionHandler *Action : Actions) {
-      // Action codes from docs are: 0 cleanup, 1 catch.
-      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
-        ActionArgs.push_back(ConstantInt::get(Int32Type, 1));
-        ActionArgs.push_back(CatchAction->getSelector());
-        // Find the frame escape index of the exception object alloca in the
-        // parent.
-        int FrameEscapeIdx = -1;
-        Value *EHObj = const_cast<Value *>(CatchAction->getExceptionVar());
-        if (EHObj && !isa<ConstantPointerNull>(EHObj)) {
-          auto I = FrameVarInfo.find(EHObj);
-          assert(I != FrameVarInfo.end() &&
-                 "failed to map llvm.eh.begincatch var");
-          FrameEscapeIdx = std::distance(FrameVarInfo.begin(), I);
-        }
-        ActionArgs.push_back(ConstantInt::get(Int32Type, FrameEscapeIdx));
-      } else {
-        ActionArgs.push_back(ConstantInt::get(Int32Type, 0));
-      }
-      ActionArgs.push_back(Action->getHandlerBlockOrFunc());
-    }
-    CallInst *Recover =
-        CallInst::Create(ActionIntrin, ActionArgs, "recover", LPadBB);
-
-    SetVector<BasicBlock *> ReturnTargets;
-    for (ActionHandler *Action : Actions) {
-      if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
-        const auto &CatchTargets = CatchAction->getReturnTargets();
-        ReturnTargets.insert(CatchTargets.begin(), CatchTargets.end());
-      }
-    }
-    IndirectBrInst *Branch =
-        IndirectBrInst::Create(Recover, ReturnTargets.size(), LPadBB);
-    for (BasicBlock *Target : ReturnTargets)
-      Branch->addDestination(Target);
-
-    if (!isAsynchronousEHPersonality(Personality)) {
-      // C++ EH must repopulate the targets later to handle the case of
-      // targets that are reached indirectly through nested landing pads.
-      LPadImpls.push_back(std::make_pair(Recover, Branch));
-    }
-
-  } // End for each landingpad
-
-  // If nothing got outlined, there is no more processing to be done.
-  if (!HandlersOutlined)
-    return false;
-
-  // Replace any nested landing pad stubs with the correct action handler.
-  // This must be done before we remove unreachable blocks because it
-  // cleans up references to outlined blocks that will be deleted.
-  for (auto &LPadPair : NestedLPtoOriginalLP)
-    completeNestedLandingPad(&F, LPadPair.first, LPadPair.second, FrameVarInfo);
-  NestedLPtoOriginalLP.clear();
-
-  // Update the indirectbr instructions' target lists if necessary.
-  SetVector<BasicBlock*> CheckedTargets;
-  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-  for (auto &LPadImplPair : LPadImpls) {
-    IntrinsicInst *Recover = cast<IntrinsicInst>(LPadImplPair.first);
-    IndirectBrInst *Branch = LPadImplPair.second;
-
-    // Get a list of handlers called by 
-    parseEHActions(Recover, ActionList);
-
-    // Add an indirect branch listing possible successors of the catch handlers.
-    SetVector<BasicBlock *> ReturnTargets;
-    for (const auto &Action : ActionList) {
-      if (auto *CA = dyn_cast<CatchHandler>(Action.get())) {
-        Function *Handler = cast<Function>(CA->getHandlerBlockOrFunc());
-        getPossibleReturnTargets(&F, Handler, ReturnTargets);
-      }
-    }
-    ActionList.clear();
-    // Clear any targets we already knew about.
-    for (unsigned int I = 0, E = Branch->getNumDestinations(); I < E; ++I) {
-      BasicBlock *KnownTarget = Branch->getDestination(I);
-      if (ReturnTargets.count(KnownTarget))
-        ReturnTargets.remove(KnownTarget);
-    }
-    for (BasicBlock *Target : ReturnTargets) {
-      Branch->addDestination(Target);
-      // The target may be a block that we excepted to get pruned.
-      // If it is, it may contain a call to llvm.eh.endcatch.
-      if (CheckedTargets.insert(Target)) {
-        // Earlier preparations guarantee that all calls to llvm.eh.endcatch
-        // will be followed by an unconditional branch.
-        auto *Br = dyn_cast<BranchInst>(Target->getTerminator());
-        if (Br && Br->isUnconditional() &&
-            Br != Target->getFirstNonPHIOrDbgOrLifetime()) {
-          Instruction *Prev = Br->getPrevNode();
-          if (match(cast<Value>(Prev), m_Intrinsic<Intrinsic::eh_endcatch>()))
-            Prev->eraseFromParent();
-        }
-      }
-    }
-  }
-  LPadImpls.clear();
-
-  F.addFnAttr("wineh-parent", F.getName());
-
-  // Delete any blocks that were only used by handlers that were outlined above.
-  removeUnreachableBlocks(F);
-
-  BasicBlock *Entry = &F.getEntryBlock();
-  IRBuilder<> Builder(F.getParent()->getContext());
-  Builder.SetInsertPoint(Entry->getFirstInsertionPt());
-
-  Function *FrameEscapeFn =
-      Intrinsic::getDeclaration(M, Intrinsic::localescape);
-  Function *RecoverFrameFn =
-      Intrinsic::getDeclaration(M, Intrinsic::localrecover);
-  SmallVector<Value *, 8> AllocasToEscape;
-
-  // Scan the entry block for an existing call to llvm.localescape. We need to
-  // keep escaping those objects.
-  for (Instruction &I : F.front()) {
-    auto *II = dyn_cast<IntrinsicInst>(&I);
-    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
-      auto Args = II->arg_operands();
-      AllocasToEscape.append(Args.begin(), Args.end());
-      II->eraseFromParent();
-      break;
-    }
-  }
-
-  // Finally, replace all of the temporary allocas for frame variables used in
-  // the outlined handlers with calls to llvm.localrecover.
-  for (auto &VarInfoEntry : FrameVarInfo) {
-    Value *ParentVal = VarInfoEntry.first;
-    TinyPtrVector<AllocaInst *> &Allocas = VarInfoEntry.second;
-    AllocaInst *ParentAlloca = cast<AllocaInst>(ParentVal);
-
-    // FIXME: We should try to sink unescaped allocas from the parent frame into
-    // the child frame. If the alloca is escaped, we have to use the lifetime
-    // markers to ensure that the alloca is only live within the child frame.
-
-    // Add this alloca to the list of things to escape.
-    AllocasToEscape.push_back(ParentAlloca);
-
-    // Next replace all outlined allocas that are mapped to it.
-    for (AllocaInst *TempAlloca : Allocas) {
-      if (TempAlloca == getCatchObjectSentinel())
-        continue; // Skip catch parameter sentinels.
-      Function *HandlerFn = TempAlloca->getParent()->getParent();
-      llvm::Value *FP = HandlerToParentFP[HandlerFn];
-      assert(FP);
-
-      // FIXME: Sink this localrecover into the blocks where it is used.
-      Builder.SetInsertPoint(TempAlloca);
-      Builder.SetCurrentDebugLocation(TempAlloca->getDebugLoc());
-      Value *RecoverArgs[] = {
-          Builder.CreateBitCast(&F, Int8PtrType, ""), FP,
-          llvm::ConstantInt::get(Int32Type, AllocasToEscape.size() - 1)};
-      Instruction *RecoveredAlloca =
-          Builder.CreateCall(RecoverFrameFn, RecoverArgs);
-
-      // Add a pointer bitcast if the alloca wasn't an i8.
-      if (RecoveredAlloca->getType() != TempAlloca->getType()) {
-        RecoveredAlloca->setName(Twine(TempAlloca->getName()) + ".i8");
-        RecoveredAlloca = cast<Instruction>(
-            Builder.CreateBitCast(RecoveredAlloca, TempAlloca->getType()));
-      }
-      TempAlloca->replaceAllUsesWith(RecoveredAlloca);
-      TempAlloca->removeFromParent();
-      RecoveredAlloca->takeName(TempAlloca);
-      delete TempAlloca;
-    }
-  } // End for each FrameVarInfo entry.
-
-  // Insert 'call void (...)* @llvm.localescape(...)' at the end of the entry
-  // block.
-  Builder.SetInsertPoint(&F.getEntryBlock().back());
-  Builder.CreateCall(FrameEscapeFn, AllocasToEscape);
-
-  if (SEHExceptionCodeSlot) {
-    if (isAllocaPromotable(SEHExceptionCodeSlot)) {
-      SmallPtrSet<BasicBlock *, 4> UserBlocks;
-      for (User *U : SEHExceptionCodeSlot->users()) {
-        if (auto *Inst = dyn_cast<Instruction>(U))
-          UserBlocks.insert(Inst->getParent());
-      }
-      PromoteMemToReg(SEHExceptionCodeSlot, *DT);
-      // After the promotion, kill off dead instructions.
-      for (BasicBlock *BB : UserBlocks)
-        SimplifyInstructionsInBlock(BB, LibInfo);
-    }
-  }
-
-  // Clean up the handler action maps we created for this function
-  DeleteContainerSeconds(CatchHandlerMap);
-  CatchHandlerMap.clear();
-  DeleteContainerSeconds(CleanupHandlerMap);
-  CleanupHandlerMap.clear();
-  HandlerToParentFP.clear();
-  DT = nullptr;
-  LibInfo = nullptr;
-  SEHExceptionCodeSlot = nullptr;
-  EHBlocks.clear();
-  NormalBlocks.clear();
-  EHReturnBlocks.clear();
-
-  return HandlersOutlined;
-}
-
-void WinEHPrepare::promoteLandingPadValues(LandingPadInst *LPad) {
-  // If the return values of the landing pad instruction are extracted and
-  // stored to memory, we want to promote the store locations to reg values.
-  SmallVector<AllocaInst *, 2> EHAllocas;
-
-  // The landingpad instruction returns an aggregate value.  Typically, its
-  // value will be passed to a pair of extract value instructions and the
-  // results of those extracts are often passed to store instructions.
-  // In unoptimized code the stored value will often be loaded and then stored
-  // again.
-  for (auto *U : LPad->users()) {
-    ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
-    if (!Extract)
-      continue;
-
-    for (auto *EU : Extract->users()) {
-      if (auto *Store = dyn_cast<StoreInst>(EU)) {
-        auto *AV = cast<AllocaInst>(Store->getPointerOperand());
-        EHAllocas.push_back(AV);
-      }
-    }
-  }
-
-  // We can't do this without a dominator tree.
-  assert(DT);
-
-  if (!EHAllocas.empty()) {
-    PromoteMemToReg(EHAllocas, *DT);
-    EHAllocas.clear();
-  }
-
-  // After promotion, some extracts may be trivially dead. Remove them.
-  SmallVector<Value *, 4> Users(LPad->user_begin(), LPad->user_end());
-  for (auto *U : Users)
-    RecursivelyDeleteTriviallyDeadInstructions(U);
-}
-
-void WinEHPrepare::getPossibleReturnTargets(Function *ParentF,
-                                            Function *HandlerF,
-                                            SetVector<BasicBlock*> &Targets) {
-  for (BasicBlock &BB : *HandlerF) {
-    // If the handler contains landing pads, check for any
-    // handlers that may return directly to a block in the
-    // parent function.
-    if (auto *LPI = BB.getLandingPadInst()) {
-      IntrinsicInst *Recover = cast<IntrinsicInst>(LPI->getNextNode());
-      SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-      parseEHActions(Recover, ActionList);
-      for (const auto &Action : ActionList) {
-        if (auto *CH = dyn_cast<CatchHandler>(Action.get())) {
-          Function *NestedF = cast<Function>(CH->getHandlerBlockOrFunc());
-          getPossibleReturnTargets(ParentF, NestedF, Targets);
-        }
-      }
-    }
-
-    auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
-    if (!Ret)
-      continue;
-
-    // Handler functions must always return a block address.
-    BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
-
-    // If this is the handler for a nested landing pad, the
-    // return address may have been remapped to a block in the
-    // parent handler.  We're not interested in those.
-    if (BA->getFunction() != ParentF)
-      continue;
-
-    Targets.insert(BA->getBasicBlock());
-  }
-}
-
-void WinEHPrepare::completeNestedLandingPad(Function *ParentFn,
-                                            LandingPadInst *OutlinedLPad,
-                                            const LandingPadInst *OriginalLPad,
-                                            FrameVarInfoMap &FrameVarInfo) {
-  // Get the nested block and erase the unreachable instruction that was
-  // temporarily inserted as its terminator.
-  LLVMContext &Context = ParentFn->getContext();
-  BasicBlock *OutlinedBB = OutlinedLPad->getParent();
-  // If the nested landing pad was outlined before the landing pad that enclosed
-  // it, it will already be in outlined form.  In that case, we just need to see
-  // if the returns and the enclosing branch instruction need to be updated.
-  IndirectBrInst *Branch =
-      dyn_cast<IndirectBrInst>(OutlinedBB->getTerminator());
-  if (!Branch) {
-    // If the landing pad wasn't in outlined form, it should be a stub with
-    // an unreachable terminator.
-    assert(isa<UnreachableInst>(OutlinedBB->getTerminator()));
-    OutlinedBB->getTerminator()->eraseFromParent();
-    // That should leave OutlinedLPad as the last instruction in its block.
-    assert(&OutlinedBB->back() == OutlinedLPad);
-  }
-
-  // The original landing pad will have already had its action intrinsic
-  // built by the outlining loop.  We need to clone that into the outlined
-  // location.  It may also be necessary to add references to the exception
-  // variables to the outlined handler in which this landing pad is nested
-  // and remap return instructions in the nested handlers that should return
-  // to an address in the outlined handler.
-  Function *OutlinedHandlerFn = OutlinedBB->getParent();
-  BasicBlock::const_iterator II = OriginalLPad;
-  ++II;
-  // The instruction after the landing pad should now be a call to eh.actions.
-  const Instruction *Recover = II;
-  const IntrinsicInst *EHActions = cast<IntrinsicInst>(Recover);
-
-  // Remap the return target in the nested handler.
-  SmallVector<BlockAddress *, 4> ActionTargets;
-  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-  parseEHActions(EHActions, ActionList);
-  for (const auto &Action : ActionList) {
-    auto *Catch = dyn_cast<CatchHandler>(Action.get());
-    if (!Catch)
-      continue;
-    // The dyn_cast to function here selects C++ catch handlers and skips
-    // SEH catch handlers.
-    auto *Handler = dyn_cast<Function>(Catch->getHandlerBlockOrFunc());
-    if (!Handler)
-      continue;
-    // Visit all the return instructions, looking for places that return
-    // to a location within OutlinedHandlerFn.
-    for (BasicBlock &NestedHandlerBB : *Handler) {
-      auto *Ret = dyn_cast<ReturnInst>(NestedHandlerBB.getTerminator());
-      if (!Ret)
-        continue;
-
-      // Handler functions must always return a block address.
-      BlockAddress *BA = cast<BlockAddress>(Ret->getReturnValue());
-      // The original target will have been in the main parent function,
-      // but if it is the address of a block that has been outlined, it
-      // should be a block that was outlined into OutlinedHandlerFn.
-      assert(BA->getFunction() == ParentFn);
-
-      // Ignore targets that aren't part of an outlined handler function.
-      if (!LPadTargetBlocks.count(BA->getBasicBlock()))
-        continue;
-
-      // If the return value is the address ofF a block that we
-      // previously outlined into the parent handler function, replace
-      // the return instruction and add the mapped target to the list
-      // of possible return addresses.
-      BasicBlock *MappedBB = LPadTargetBlocks[BA->getBasicBlock()];
-      assert(MappedBB->getParent() == OutlinedHandlerFn);
-      BlockAddress *NewBA = BlockAddress::get(OutlinedHandlerFn, MappedBB);
-      Ret->eraseFromParent();
-      ReturnInst::Create(Context, NewBA, &NestedHandlerBB);
-      ActionTargets.push_back(NewBA);
-    }
-  }
-  ActionList.clear();
-
-  if (Branch) {
-    // If the landing pad was already in outlined form, just update its targets.
-    for (unsigned int I = Branch->getNumDestinations(); I > 0; --I)
-      Branch->removeDestination(I);
-    // Add the previously collected action targets.
-    for (auto *Target : ActionTargets)
-      Branch->addDestination(Target->getBasicBlock());
-  } else {
-    // If the landing pad was previously stubbed out, fill in its outlined form.
-    IntrinsicInst *NewEHActions = cast<IntrinsicInst>(EHActions->clone());
-    OutlinedBB->getInstList().push_back(NewEHActions);
-
-    // Insert an indirect branch into the outlined landing pad BB.
-    IndirectBrInst *IBr = IndirectBrInst::Create(NewEHActions, 0, OutlinedBB);
-    // Add the previously collected action targets.
-    for (auto *Target : ActionTargets)
-      IBr->addDestination(Target->getBasicBlock());
-  }
-}
-
-// This function examines a block to determine whether the block ends with a
-// conditional branch to a catch handler based on a selector comparison.
-// This function is used both by the WinEHPrepare::findSelectorComparison() and
-// WinEHCleanupDirector::handleTypeIdFor().
-static bool isSelectorDispatch(BasicBlock *BB, BasicBlock *&CatchHandler,
-                               Constant *&Selector, BasicBlock *&NextBB) {
-  ICmpInst::Predicate Pred;
-  BasicBlock *TBB, *FBB;
-  Value *LHS, *RHS;
-
-  if (!match(BB->getTerminator(),
-             m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)), TBB, FBB)))
-    return false;
-
-  if (!match(LHS,
-             m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))) &&
-      !match(RHS, m_Intrinsic<Intrinsic::eh_typeid_for>(m_Constant(Selector))))
-    return false;
-
-  if (Pred == CmpInst::ICMP_EQ) {
-    CatchHandler = TBB;
-    NextBB = FBB;
-    return true;
-  }
-
-  if (Pred == CmpInst::ICMP_NE) {
-    CatchHandler = FBB;
-    NextBB = TBB;
-    return true;
-  }
-
-  return false;
-}
-
-static bool isCatchBlock(BasicBlock *BB) {
-  for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-       II != IE; ++II) {
-    if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_begincatch>()))
-      return true;
-  }
-  return false;
-}
-
-static BasicBlock *createStubLandingPad(Function *Handler) {
-  // FIXME: Finish this!
-  LLVMContext &Context = Handler->getContext();
-  BasicBlock *StubBB = BasicBlock::Create(Context, "stub");
-  Handler->getBasicBlockList().push_back(StubBB);
-  IRBuilder<> Builder(StubBB);
-  LandingPadInst *LPad = Builder.CreateLandingPad(
-      llvm::StructType::get(Type::getInt8PtrTy(Context),
-                            Type::getInt32Ty(Context), nullptr),
-      0);
-  // Insert a call to llvm.eh.actions so that we don't try to outline this lpad.
-  Function *ActionIntrin =
-      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::eh_actions);
-  Builder.CreateCall(ActionIntrin, {}, "recover");
-  LPad->setCleanup(true);
-  Builder.CreateUnreachable();
-  return StubBB;
-}
-
-// Cycles through the blocks in an outlined handler function looking for an
-// invoke instruction and inserts an invoke of llvm.donothing with an empty
-// landing pad if none is found.  The code that generates the .xdata tables for
-// the handler needs at least one landing pad to identify the parent function's
-// personality.
-void WinEHPrepare::addStubInvokeToHandlerIfNeeded(Function *Handler) {
-  ReturnInst *Ret = nullptr;
-  UnreachableInst *Unreached = nullptr;
-  for (BasicBlock &BB : *Handler) {
-    TerminatorInst *Terminator = BB.getTerminator();
-    // If we find an invoke, there is nothing to be done.
-    auto *II = dyn_cast<InvokeInst>(Terminator);
-    if (II)
-      return;
-    // If we've already recorded a return instruction, keep looking for invokes.
-    if (!Ret)
-      Ret = dyn_cast<ReturnInst>(Terminator);
-    // If we haven't recorded an unreachable instruction, try this terminator.
-    if (!Unreached)
-      Unreached = dyn_cast<UnreachableInst>(Terminator);
-  }
-
-  // If we got this far, the handler contains no invokes.  We should have seen
-  // at least one return or unreachable instruction.  We'll insert an invoke of
-  // llvm.donothing ahead of that instruction.
-  assert(Ret || Unreached);
-  TerminatorInst *Term;
-  if (Ret)
-    Term = Ret;
-  else
-    Term = Unreached;
-  BasicBlock *OldRetBB = Term->getParent();
-  BasicBlock *NewRetBB = SplitBlock(OldRetBB, Term, DT);
-  // SplitBlock adds an unconditional branch instruction at the end of the
-  // parent block.  We want to replace that with an invoke call, so we can
-  // erase it now.
-  OldRetBB->getTerminator()->eraseFromParent();
-  BasicBlock *StubLandingPad = createStubLandingPad(Handler);
-  Function *F =
-      Intrinsic::getDeclaration(Handler->getParent(), Intrinsic::donothing);
-  InvokeInst::Create(F, NewRetBB, StubLandingPad, None, "", OldRetBB);
-}
-
-// FIXME: Consider sinking this into lib/Target/X86 somehow. TargetLowering
-// usually doesn't build LLVM IR, so that's probably the wrong place.
-Function *WinEHPrepare::createHandlerFunc(Function *ParentFn, Type *RetTy,
-                                          const Twine &Name, Module *M,
-                                          Value *&ParentFP) {
-  // x64 uses a two-argument prototype where the parent FP is the second
-  // argument. x86 uses no arguments, just the incoming EBP value.
-  LLVMContext &Context = M->getContext();
-  Type *Int8PtrType = Type::getInt8PtrTy(Context);
-  FunctionType *FnType;
-  if (TheTriple.getArch() == Triple::x86_64) {
-    Type *ArgTys[2] = {Int8PtrType, Int8PtrType};
-    FnType = FunctionType::get(RetTy, ArgTys, false);
-  } else {
-    FnType = FunctionType::get(RetTy, None, false);
-  }
-
-  Function *Handler =
-      Function::Create(FnType, GlobalVariable::InternalLinkage, Name, M);
-  BasicBlock *Entry = BasicBlock::Create(Context, "entry");
-  Handler->getBasicBlockList().push_front(Entry);
-  if (TheTriple.getArch() == Triple::x86_64) {
-    ParentFP = &(Handler->getArgumentList().back());
-  } else {
-    assert(M);
-    Function *FrameAddressFn =
-        Intrinsic::getDeclaration(M, Intrinsic::frameaddress);
-    Function *RecoverFPFn =
-        Intrinsic::getDeclaration(M, Intrinsic::x86_seh_recoverfp);
-    IRBuilder<> Builder(&Handler->getEntryBlock());
-    Value *EBP =
-        Builder.CreateCall(FrameAddressFn, {Builder.getInt32(1)}, "ebp");
-    Value *ParentI8Fn = Builder.CreateBitCast(ParentFn, Int8PtrType);
-    ParentFP = Builder.CreateCall(RecoverFPFn, {ParentI8Fn, EBP});
-  }
-  return Handler;
-}
-
-bool WinEHPrepare::outlineHandler(ActionHandler *Action, Function *SrcFn,
-                                  LandingPadInst *LPad, BasicBlock *StartBB,
-                                  FrameVarInfoMap &VarInfo) {
-  Module *M = SrcFn->getParent();
-  LLVMContext &Context = M->getContext();
-  Type *Int8PtrType = Type::getInt8PtrTy(Context);
-
-  // Create a new function to receive the handler contents.
-  Value *ParentFP;
-  Function *Handler;
-  if (Action->getType() == Catch) {
-    Handler = createHandlerFunc(SrcFn, Int8PtrType, SrcFn->getName() + ".catch", M,
-                                ParentFP);
-  } else {
-    Handler = createHandlerFunc(SrcFn, Type::getVoidTy(Context),
-                                SrcFn->getName() + ".cleanup", M, ParentFP);
-  }
-  Handler->setPersonalityFn(SrcFn->getPersonalityFn());
-  HandlerToParentFP[Handler] = ParentFP;
-  Handler->addFnAttr("wineh-parent", SrcFn->getName());
-  BasicBlock *Entry = &Handler->getEntryBlock();
-
-  // Generate a standard prolog to setup the frame recovery structure.
-  IRBuilder<> Builder(Context);
-  Builder.SetInsertPoint(Entry);
-  Builder.SetCurrentDebugLocation(LPad->getDebugLoc());
-
-  std::unique_ptr<WinEHCloningDirectorBase> Director;
-
-  ValueToValueMapTy VMap;
-
-  LandingPadMap &LPadMap = LPadMaps[LPad];
-  if (!LPadMap.isInitialized())
-    LPadMap.mapLandingPad(LPad);
-  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
-    Constant *Sel = CatchAction->getSelector();
-    Director.reset(new WinEHCatchDirector(Handler, ParentFP, Sel, VarInfo,
-                                          LPadMap, NestedLPtoOriginalLP, DT,
-                                          EHBlocks));
-    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
-                          ConstantInt::get(Type::getInt32Ty(Context), 1));
-  } else {
-    Director.reset(
-        new WinEHCleanupDirector(Handler, ParentFP, VarInfo, LPadMap));
-    LPadMap.remapEHValues(VMap, UndefValue::get(Int8PtrType),
-                          UndefValue::get(Type::getInt32Ty(Context)));
-  }
-
-  SmallVector<ReturnInst *, 8> Returns;
-  ClonedCodeInfo OutlinedFunctionInfo;
-
-  // If the start block contains PHI nodes, we need to map them.
-  BasicBlock::iterator II = StartBB->begin();
-  while (auto *PN = dyn_cast<PHINode>(II)) {
-    bool Mapped = false;
-    // Look for PHI values that we have already mapped (such as the selector).
-    for (Value *Val : PN->incoming_values()) {
-      if (VMap.count(Val)) {
-        VMap[PN] = VMap[Val];
-        Mapped = true;
-      }
-    }
-    // If we didn't find a match for this value, map it as an undef.
-    if (!Mapped) {
-      VMap[PN] = UndefValue::get(PN->getType());
-    }
-    ++II;
-  }
-
-  // The landing pad value may be used by PHI nodes.  It will ultimately be
-  // eliminated, but we need it in the map for intermediate handling.
-  VMap[LPad] = UndefValue::get(LPad->getType());
-
-  // Skip over PHIs and, if applicable, landingpad instructions.
-  II = StartBB->getFirstInsertionPt();
-
-  CloneAndPruneIntoFromInst(Handler, SrcFn, II, VMap,
-                            /*ModuleLevelChanges=*/false, Returns, "",
-                            &OutlinedFunctionInfo, Director.get());
-
-  // Move all the instructions in the cloned "entry" block into our entry block.
-  // Depending on how the parent function was laid out, the block that will
-  // correspond to the outlined entry block may not be the first block in the
-  // list.  We can recognize it, however, as the cloned block which has no
-  // predecessors.  Any other block wouldn't have been cloned if it didn't
-  // have a predecessor which was also cloned.
-  Function::iterator ClonedIt = std::next(Function::iterator(Entry));
-  while (!pred_empty(ClonedIt))
-    ++ClonedIt;
-  BasicBlock *ClonedEntryBB = ClonedIt;
-  assert(ClonedEntryBB);
-  Entry->getInstList().splice(Entry->end(), ClonedEntryBB->getInstList());
-  ClonedEntryBB->eraseFromParent();
-
-  // Make sure we can identify the handler's personality later.
-  addStubInvokeToHandlerIfNeeded(Handler);
-
-  if (auto *CatchAction = dyn_cast<CatchHandler>(Action)) {
-    WinEHCatchDirector *CatchDirector =
-        reinterpret_cast<WinEHCatchDirector *>(Director.get());
-    CatchAction->setExceptionVar(CatchDirector->getExceptionVar());
-    CatchAction->setReturnTargets(CatchDirector->getReturnTargets());
-
-    // Look for blocks that are not part of the landing pad that we just
-    // outlined but terminate with a call to llvm.eh.endcatch and a
-    // branch to a block that is in the handler we just outlined.
-    // These blocks will be part of a nested landing pad that intends to
-    // return to an address in this handler.  This case is best handled
-    // after both landing pads have been outlined, so for now we'll just
-    // save the association of the blocks in LPadTargetBlocks.  The
-    // return instructions which are created from these branches will be
-    // replaced after all landing pads have been outlined.
-    for (const auto MapEntry : VMap) {
-      // VMap maps all values and blocks that were just cloned, but dead
-      // blocks which were pruned will map to nullptr.
-      if (!isa<BasicBlock>(MapEntry.first) || MapEntry.second == nullptr)
-        continue;
-      const BasicBlock *MappedBB = cast<BasicBlock>(MapEntry.first);
-      for (auto *Pred : predecessors(const_cast<BasicBlock *>(MappedBB))) {
-        auto *Branch = dyn_cast<BranchInst>(Pred->getTerminator());
-        if (!Branch || !Branch->isUnconditional() || Pred->size() <= 1)
-          continue;
-        BasicBlock::iterator II = const_cast<BranchInst *>(Branch);
-        --II;
-        if (match(cast<Value>(II), m_Intrinsic<Intrinsic::eh_endcatch>())) {
-          // This would indicate that a nested landing pad wants to return
-          // to a block that is outlined into two different handlers.
-          assert(!LPadTargetBlocks.count(MappedBB));
-          LPadTargetBlocks[MappedBB] = cast<BasicBlock>(MapEntry.second);
-        }
-      }
-    }
-  } // End if (CatchAction)
-
-  Action->setHandlerBlockOrFunc(Handler);
-
-  return true;
-}
-
-/// This BB must end in a selector dispatch. All we need to do is pass the
-/// handler block to llvm.eh.actions and list it as a possible indirectbr
-/// target.
-void WinEHPrepare::processSEHCatchHandler(CatchHandler *CatchAction,
-                                          BasicBlock *StartBB) {
-  BasicBlock *HandlerBB;
-  BasicBlock *NextBB;
-  Constant *Selector;
-  bool Res = isSelectorDispatch(StartBB, HandlerBB, Selector, NextBB);
-  if (Res) {
-    // If this was EH dispatch, this must be a conditional branch to the handler
-    // block.
-    // FIXME: Handle instructions in the dispatch block. Currently we drop them,
-    // leading to crashes if some optimization hoists stuff here.
-    assert(CatchAction->getSelector() && HandlerBB &&
-           "expected catch EH dispatch");
-  } else {
-    // This must be a catch-all. Split the block after the landingpad.
-    assert(CatchAction->getSelector()->isNullValue() && "expected catch-all");
-    HandlerBB = SplitBlock(StartBB, StartBB->getFirstInsertionPt(), DT);
-  }
-  IRBuilder<> Builder(HandlerBB->getFirstInsertionPt());
-  Function *EHCodeFn = Intrinsic::getDeclaration(
-      StartBB->getParent()->getParent(), Intrinsic::eh_exceptioncode);
-  Value *Code = Builder.CreateCall(EHCodeFn, {}, "sehcode");
-  Code = Builder.CreateIntToPtr(Code, SEHExceptionCodeSlot->getAllocatedType());
-  Builder.CreateStore(Code, SEHExceptionCodeSlot);
-  CatchAction->setHandlerBlockOrFunc(BlockAddress::get(HandlerBB));
-  TinyPtrVector<BasicBlock *> Targets(HandlerBB);
-  CatchAction->setReturnTargets(Targets);
-}
-
-void LandingPadMap::mapLandingPad(const LandingPadInst *LPad) {
-  // Each instance of this class should only ever be used to map a single
-  // landing pad.
-  assert(OriginLPad == nullptr || OriginLPad == LPad);
-
-  // If the landing pad has already been mapped, there's nothing more to do.
-  if (OriginLPad == LPad)
-    return;
-
-  OriginLPad = LPad;
-
-  // The landingpad instruction returns an aggregate value.  Typically, its
-  // value will be passed to a pair of extract value instructions and the
-  // results of those extracts will have been promoted to reg values before
-  // this routine is called.
-  for (auto *U : LPad->users()) {
-    const ExtractValueInst *Extract = dyn_cast<ExtractValueInst>(U);
-    if (!Extract)
-      continue;
-    assert(Extract->getNumIndices() == 1 &&
-           "Unexpected operation: extracting both landing pad values");
-    unsigned int Idx = *(Extract->idx_begin());
-    assert((Idx == 0 || Idx == 1) &&
-           "Unexpected operation: extracting an unknown landing pad element");
-    if (Idx == 0) {
-      ExtractedEHPtrs.push_back(Extract);
-    } else if (Idx == 1) {
-      ExtractedSelectors.push_back(Extract);
-    }
-  }
-}
-
-bool LandingPadMap::isOriginLandingPadBlock(const BasicBlock *BB) const {
-  return BB->getLandingPadInst() == OriginLPad;
-}
-
-bool LandingPadMap::isLandingPadSpecificInst(const Instruction *Inst) const {
-  if (Inst == OriginLPad)
-    return true;
-  for (auto *Extract : ExtractedEHPtrs) {
-    if (Inst == Extract)
-      return true;
-  }
-  for (auto *Extract : ExtractedSelectors) {
-    if (Inst == Extract)
-      return true;
-  }
-  return false;
-}
-
-void LandingPadMap::remapEHValues(ValueToValueMapTy &VMap, Value *EHPtrValue,
-                                  Value *SelectorValue) const {
-  // Remap all landing pad extract instructions to the specified values.
-  for (auto *Extract : ExtractedEHPtrs)
-    VMap[Extract] = EHPtrValue;
-  for (auto *Extract : ExtractedSelectors)
-    VMap[Extract] = SelectorValue;
-}
-
-static bool isLocalAddressCall(const Value *V) {
-  return match(const_cast<Value *>(V), m_Intrinsic<Intrinsic::localaddress>());
-}
-
-CloningDirector::CloningAction WinEHCloningDirectorBase::handleInstruction(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // If this is one of the boilerplate landing pad instructions, skip it.
-  // The instruction will have already been remapped in VMap.
-  if (LPadMap.isLandingPadSpecificInst(Inst))
-    return CloningDirector::SkipInstruction;
-
-  // Nested landing pads that have not already been outlined will be cloned as
-  // stubs, with just the landingpad instruction and an unreachable instruction.
-  // When all landingpads have been outlined, we'll replace this with the
-  // llvm.eh.actions call and indirect branch created when the landing pad was
-  // outlined.
-  if (auto *LPad = dyn_cast<LandingPadInst>(Inst)) {
-    return handleLandingPad(VMap, LPad, NewBB);
-  }
-
-  // Nested landing pads that have already been outlined will be cloned in their
-  // outlined form, but we need to intercept the ibr instruction to filter out
-  // targets that do not return to the handler we are outlining.
-  if (auto *IBr = dyn_cast<IndirectBrInst>(Inst)) {
-    return handleIndirectBr(VMap, IBr, NewBB);
-  }
-
-  if (auto *Invoke = dyn_cast<InvokeInst>(Inst))
-    return handleInvoke(VMap, Invoke, NewBB);
-
-  if (auto *Resume = dyn_cast<ResumeInst>(Inst))
-    return handleResume(VMap, Resume, NewBB);
-
-  if (auto *Cmp = dyn_cast<CmpInst>(Inst))
-    return handleCompare(VMap, Cmp, NewBB);
-
-  if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
-    return handleBeginCatch(VMap, Inst, NewBB);
-  if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
-    return handleEndCatch(VMap, Inst, NewBB);
-  if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
-    return handleTypeIdFor(VMap, Inst, NewBB);
-
-  // When outlining llvm.localaddress(), remap that to the second argument,
-  // which is the FP of the parent.
-  if (isLocalAddressCall(Inst)) {
-    VMap[Inst] = ParentFP;
-    return CloningDirector::SkipInstruction;
-  }
-
-  // Continue with the default cloning behavior.
-  return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleLandingPad(
-    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
-  // If the instruction after the landing pad is a call to llvm.eh.actions
-  // the landing pad has already been outlined.  In this case, we should
-  // clone it because it may return to a block in the handler we are
-  // outlining now that would otherwise be unreachable.  The landing pads
-  // are sorted before outlining begins to enable this case to work
-  // properly.
-  const Instruction *NextI = LPad->getNextNode();
-  if (match(NextI, m_Intrinsic<Intrinsic::eh_actions>()))
-    return CloningDirector::CloneInstruction;
-
-  // If the landing pad hasn't been outlined yet, the landing pad we are
-  // outlining now does not dominate it and so it cannot return to a block
-  // in this handler.  In that case, we can just insert a stub landing
-  // pad now and patch it up later.
-  Instruction *NewInst = LPad->clone();
-  if (LPad->hasName())
-    NewInst->setName(LPad->getName());
-  // Save this correlation for later processing.
-  NestedLPtoOriginalLP[cast<LandingPadInst>(NewInst)] = LPad;
-  VMap[LPad] = NewInst;
-  BasicBlock::InstListType &InstList = NewBB->getInstList();
-  InstList.push_back(NewInst);
-  InstList.push_back(new UnreachableInst(NewBB->getContext()));
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleBeginCatch(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // The argument to the call is some form of the first element of the
-  // landingpad aggregate value, but that doesn't matter.  It isn't used
-  // here.
-  // The second argument is an outparameter where the exception object will be
-  // stored. Typically the exception object is a scalar, but it can be an
-  // aggregate when catching by value.
-  // FIXME: Leave something behind to indicate where the exception object lives
-  // for this handler. Should it be part of llvm.eh.actions?
-  assert(ExceptionObjectVar == nullptr && "Multiple calls to "
-                                          "llvm.eh.begincatch found while "
-                                          "outlining catch handler.");
-  ExceptionObjectVar = Inst->getOperand(1)->stripPointerCasts();
-  if (isa<ConstantPointerNull>(ExceptionObjectVar))
-    return CloningDirector::SkipInstruction;
-  assert(cast<AllocaInst>(ExceptionObjectVar)->isStaticAlloca() &&
-         "catch parameter is not static alloca");
-  Materializer.escapeCatchObject(ExceptionObjectVar);
-  return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleEndCatch(ValueToValueMapTy &VMap,
-                                   const Instruction *Inst, BasicBlock *NewBB) {
-  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
-  // It might be interesting to track whether or not we are inside a catch
-  // function, but that might make the algorithm more brittle than it needs
-  // to be.
-
-  // The end catch call can occur in one of two places: either in a
-  // landingpad block that is part of the catch handlers exception mechanism,
-  // or at the end of the catch block.  However, a catch-all handler may call
-  // end catch from the original landing pad.  If the call occurs in a nested
-  // landing pad block, we must skip it and continue so that the landing pad
-  // gets cloned.
-  auto *ParentBB = IntrinCall->getParent();
-  if (ParentBB->isLandingPad() && !LPadMap.isOriginLandingPadBlock(ParentBB))
-    return CloningDirector::SkipInstruction;
-
-  // If an end catch occurs anywhere else we want to terminate the handler
-  // with a return to the code that follows the endcatch call.  If the
-  // next instruction is not an unconditional branch, we need to split the
-  // block to provide a clear target for the return instruction.
-  BasicBlock *ContinueBB;
-  auto Next = std::next(BasicBlock::const_iterator(IntrinCall));
-  const BranchInst *Branch = dyn_cast<BranchInst>(Next);
-  if (!Branch || !Branch->isUnconditional()) {
-    // We're interrupting the cloning process at this location, so the
-    // const_cast we're doing here will not cause a problem.
-    ContinueBB = SplitBlock(const_cast<BasicBlock *>(ParentBB),
-                            const_cast<Instruction *>(cast<Instruction>(Next)));
-  } else {
-    ContinueBB = Branch->getSuccessor(0);
-  }
-
-  ReturnInst::Create(NewBB->getContext(), BlockAddress::get(ContinueBB), NewBB);
-  ReturnTargets.push_back(ContinueBB);
-
-  // We just added a terminator to the cloned block.
-  // Tell the caller to stop processing the current basic block so that
-  // the branch instruction will be skipped.
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleTypeIdFor(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  auto *IntrinCall = dyn_cast<IntrinsicInst>(Inst);
-  Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
-  // This causes a replacement that will collapse the landing pad CFG based
-  // on the filter function we intend to match.
-  if (Selector == CurrentSelector)
-    VMap[Inst] = ConstantInt::get(SelectorIDType, 1);
-  else
-    VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
-  // Tell the caller not to clone this instruction.
-  return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCatchDirector::handleIndirectBr(
-    ValueToValueMapTy &VMap,
-    const IndirectBrInst *IBr,
-    BasicBlock *NewBB) {
-  // If this indirect branch is not part of a landing pad block, just clone it.
-  const BasicBlock *ParentBB = IBr->getParent();
-  if (!ParentBB->isLandingPad())
-    return CloningDirector::CloneInstruction;
-
-  // If it is part of a landing pad, we want to filter out target blocks
-  // that are not part of the handler we are outlining.
-  const LandingPadInst *LPad = ParentBB->getLandingPadInst();
-
-  // Save this correlation for later processing.
-  NestedLPtoOriginalLP[cast<LandingPadInst>(VMap[LPad])] = LPad;
-
-  // We should only get here for landing pads that have already been outlined.
-  assert(match(LPad->getNextNode(), m_Intrinsic<Intrinsic::eh_actions>()));
-
-  // Copy the indirectbr, but only include targets that were previously
-  // identified as EH blocks and are dominated by the nested landing pad.
-  SetVector<const BasicBlock *> ReturnTargets;
-  for (int I = 0, E = IBr->getNumDestinations(); I < E; ++I) {
-    auto *TargetBB = IBr->getDestination(I);
-    if (EHBlocks.count(const_cast<BasicBlock*>(TargetBB)) &&
-        DT->dominates(ParentBB, TargetBB)) {
-      DEBUG(dbgs() << "  Adding destination " << TargetBB->getName() << "\n");
-      ReturnTargets.insert(TargetBB);
-    }
-  }
-  IndirectBrInst *NewBranch = 
-        IndirectBrInst::Create(const_cast<Value *>(IBr->getAddress()),
-                               ReturnTargets.size(), NewBB);
-  for (auto *Target : ReturnTargets)
-    NewBranch->addDestination(const_cast<BasicBlock*>(Target));
-
-  // The operands and targets of the branch instruction are remapped later
-  // because it is a terminator.  Tell the cloning code to clone the
-  // blocks we just added to the target list.
-  return CloningDirector::CloneSuccessors;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleInvoke(ValueToValueMapTy &VMap,
-                                 const InvokeInst *Invoke, BasicBlock *NewBB) {
-  return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleResume(ValueToValueMapTy &VMap,
-                                 const ResumeInst *Resume, BasicBlock *NewBB) {
-  // Resume instructions shouldn't be reachable from catch handlers.
-  // We still need to handle it, but it will be pruned.
-  BasicBlock::InstListType &InstList = NewBB->getInstList();
-  InstList.push_back(new UnreachableInst(NewBB->getContext()));
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction
-WinEHCatchDirector::handleCompare(ValueToValueMapTy &VMap,
-                                  const CmpInst *Compare, BasicBlock *NewBB) {
-  const IntrinsicInst *IntrinCall = nullptr;
-  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
-    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(0));
-  } else if (match(Compare->getOperand(1),
-                   m_Intrinsic<Intrinsic::eh_typeid_for>())) {
-    IntrinCall = dyn_cast<IntrinsicInst>(Compare->getOperand(1));
-  }
-  if (IntrinCall) {
-    Value *Selector = IntrinCall->getArgOperand(0)->stripPointerCasts();
-    // This causes a replacement that will collapse the landing pad CFG based
-    // on the filter function we intend to match.
-    if (Selector == CurrentSelector->stripPointerCasts()) {
-      VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
-    } else {
-      VMap[Compare] = ConstantInt::get(SelectorIDType, 0);
-    }
-    return CloningDirector::SkipInstruction;
-  }
-  return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleLandingPad(
-    ValueToValueMapTy &VMap, const LandingPadInst *LPad, BasicBlock *NewBB) {
-  // The MS runtime will terminate the process if an exception occurs in a
-  // cleanup handler, so we shouldn't encounter landing pads in the actual
-  // cleanup code, but they may appear in catch blocks.  Depending on where
-  // we started cloning we may see one, but it will get dropped during dead
-  // block pruning.
-  Instruction *NewInst = new UnreachableInst(NewBB->getContext());
-  VMap[LPad] = NewInst;
-  BasicBlock::InstListType &InstList = NewBB->getInstList();
-  InstList.push_back(NewInst);
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleBeginCatch(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // Cleanup code may flow into catch blocks or the catch block may be part
-  // of a branch that will be optimized away.  We'll insert a return
-  // instruction now, but it may be pruned before the cloning process is
-  // complete.
-  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleEndCatch(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // Cleanup handlers nested within catch handlers may begin with a call to
-  // eh.endcatch.  We can just ignore that instruction.
-  return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleTypeIdFor(
-    ValueToValueMapTy &VMap, const Instruction *Inst, BasicBlock *NewBB) {
-  // If we encounter a selector comparison while cloning a cleanup handler,
-  // we want to stop cloning immediately.  Anything after the dispatch
-  // will be outlined into a different handler.
-  BasicBlock *CatchHandler;
-  Constant *Selector;
-  BasicBlock *NextBB;
-  if (isSelectorDispatch(const_cast<BasicBlock *>(Inst->getParent()),
-                         CatchHandler, Selector, NextBB)) {
-    ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
-    return CloningDirector::StopCloningBB;
-  }
-  // If eg.typeid.for is called for any other reason, it can be ignored.
-  VMap[Inst] = ConstantInt::get(SelectorIDType, 0);
-  return CloningDirector::SkipInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleIndirectBr(
-    ValueToValueMapTy &VMap,
-    const IndirectBrInst *IBr,
-    BasicBlock *NewBB) {
-  // No special handling is required for cleanup cloning.
-  return CloningDirector::CloneInstruction;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleInvoke(
-    ValueToValueMapTy &VMap, const InvokeInst *Invoke, BasicBlock *NewBB) {
-  // All invokes in cleanup handlers can be replaced with calls.
-  SmallVector<Value *, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
-  // Insert a normal call instruction...
-  CallInst *NewCall =
-      CallInst::Create(const_cast<Value *>(Invoke->getCalledValue()), CallArgs,
-                       Invoke->getName(), NewBB);
-  NewCall->setCallingConv(Invoke->getCallingConv());
-  NewCall->setAttributes(Invoke->getAttributes());
-  NewCall->setDebugLoc(Invoke->getDebugLoc());
-  VMap[Invoke] = NewCall;
-
-  // Remap the operands.
-  llvm::RemapInstruction(NewCall, VMap, RF_None, nullptr, &Materializer);
-
-  // Insert an unconditional branch to the normal destination.
-  BranchInst::Create(Invoke->getNormalDest(), NewBB);
-
-  // The unwind destination won't be cloned into the new function, so
-  // we don't need to clean up its phi nodes.
-
-  // We just added a terminator to the cloned block.
-  // Tell the caller to stop processing the current basic block.
-  return CloningDirector::CloneSuccessors;
-}
-
-CloningDirector::CloningAction WinEHCleanupDirector::handleResume(
-    ValueToValueMapTy &VMap, const ResumeInst *Resume, BasicBlock *NewBB) {
-  ReturnInst::Create(NewBB->getContext(), nullptr, NewBB);
-
-  // We just added a terminator to the cloned block.
-  // Tell the caller to stop processing the current basic block so that
-  // the branch instruction will be skipped.
-  return CloningDirector::StopCloningBB;
-}
-
-CloningDirector::CloningAction
-WinEHCleanupDirector::handleCompare(ValueToValueMapTy &VMap,
-                                    const CmpInst *Compare, BasicBlock *NewBB) {
-  if (match(Compare->getOperand(0), m_Intrinsic<Intrinsic::eh_typeid_for>()) ||
-      match(Compare->getOperand(1), m_Intrinsic<Intrinsic::eh_typeid_for>())) {
-    VMap[Compare] = ConstantInt::get(SelectorIDType, 1);
-    return CloningDirector::SkipInstruction;
-  }
-  return CloningDirector::CloneInstruction;
-}
-
-WinEHFrameVariableMaterializer::WinEHFrameVariableMaterializer(
-    Function *OutlinedFn, Value *ParentFP, FrameVarInfoMap &FrameVarInfo)
-    : FrameVarInfo(FrameVarInfo), Builder(OutlinedFn->getContext()) {
-  BasicBlock *EntryBB = &OutlinedFn->getEntryBlock();
-
-  // New allocas should be inserted in the entry block, but after the parent FP
-  // is established if it is an instruction.
-  Instruction *InsertPoint = EntryBB->getFirstInsertionPt();
-  if (auto *FPInst = dyn_cast<Instruction>(ParentFP))
-    InsertPoint = FPInst->getNextNode();
-  Builder.SetInsertPoint(EntryBB, InsertPoint);
-}
-
-Value *WinEHFrameVariableMaterializer::materializeValueFor(Value *V) {
-  // If we're asked to materialize a static alloca, we temporarily create an
-  // alloca in the outlined function and add this to the FrameVarInfo map.  When
-  // all the outlining is complete, we'll replace these temporary allocas with
-  // calls to llvm.localrecover.
-  if (auto *AV = dyn_cast<AllocaInst>(V)) {
-    assert(AV->isStaticAlloca() &&
-           "cannot materialize un-demoted dynamic alloca");
-    AllocaInst *NewAlloca = dyn_cast<AllocaInst>(AV->clone());
-    Builder.Insert(NewAlloca, AV->getName());
-    FrameVarInfo[AV].push_back(NewAlloca);
-    return NewAlloca;
-  }
-
-  if (isa<Instruction>(V) || isa<Argument>(V)) {
-    Function *Parent = isa<Instruction>(V)
-                           ? cast<Instruction>(V)->getParent()->getParent()
-                           : cast<Argument>(V)->getParent();
-    errs()
-        << "Failed to demote instruction used in exception handler of function "
-        << GlobalValue::getRealLinkageName(Parent->getName()) << ":\n";
-    errs() << "  " << *V << '\n';
-    report_fatal_error("WinEHPrepare failed to demote instruction");
-  }
-
-  // Don't materialize other values.
-  return nullptr;
-}
-
-void WinEHFrameVariableMaterializer::escapeCatchObject(Value *V) {
-  // Catch parameter objects have to live in the parent frame. When we see a use
-  // of a catch parameter, add a sentinel to the multimap to indicate that it's
-  // used from another handler. This will prevent us from trying to sink the
-  // alloca into the handler and ensure that the catch parameter is present in
-  // the call to llvm.localescape.
-  FrameVarInfo[V].push_back(getCatchObjectSentinel());
-}
-
-// This function maps the catch and cleanup handlers that are reachable from the
-// specified landing pad. The landing pad sequence will have this basic shape:
-//
-//  <cleanup handler>
-//  <selector comparison>
-//  <catch handler>
-//  <cleanup handler>
-//  <selector comparison>
-//  <catch handler>
-//  <cleanup handler>
-//  ...
-//
-// Any of the cleanup slots may be absent.  The cleanup slots may be occupied by
-// any arbitrary control flow, but all paths through the cleanup code must
-// eventually reach the next selector comparison and no path can skip to a
-// different selector comparisons, though some paths may terminate abnormally.
-// Therefore, we will use a depth first search from the start of any given
-// cleanup block and stop searching when we find the next selector comparison.
-//
-// If the landingpad instruction does not have a catch clause, we will assume
-// that any instructions other than selector comparisons and catch handlers can
-// be ignored.  In practice, these will only be the boilerplate instructions.
-//
-// The catch handlers may also have any control structure, but we are only
-// interested in the start of the catch handlers, so we don't need to actually
-// follow the flow of the catch handlers.  The start of the catch handlers can
-// be located from the compare instructions, but they can be skipped in the
-// flow by following the contrary branch.
-void WinEHPrepare::mapLandingPadBlocks(LandingPadInst *LPad,
-                                       LandingPadActions &Actions) {
-  unsigned int NumClauses = LPad->getNumClauses();
-  unsigned int HandlersFound = 0;
-  BasicBlock *BB = LPad->getParent();
-
-  DEBUG(dbgs() << "Mapping landing pad: " << BB->getName() << "\n");
-
-  if (NumClauses == 0) {
-    findCleanupHandlers(Actions, BB, nullptr);
-    return;
-  }
-
-  VisitedBlockSet VisitedBlocks;
-
-  while (HandlersFound != NumClauses) {
-    BasicBlock *NextBB = nullptr;
-
-    // Skip over filter clauses.
-    if (LPad->isFilter(HandlersFound)) {
-      ++HandlersFound;
-      continue;
-    }
-
-    // See if the clause we're looking for is a catch-all.
-    // If so, the catch begins immediately.
-    Constant *ExpectedSelector =
-        LPad->getClause(HandlersFound)->stripPointerCasts();
-    if (isa<ConstantPointerNull>(ExpectedSelector)) {
-      // The catch all must occur last.
-      assert(HandlersFound == NumClauses - 1);
-
-      // There can be additional selector dispatches in the call chain that we
-      // need to ignore.
-      BasicBlock *CatchBlock = nullptr;
-      Constant *Selector;
-      while (BB && isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
-        DEBUG(dbgs() << "  Found extra catch dispatch in block "
-                     << CatchBlock->getName() << "\n");
-        BB = NextBB;
-      }
-
-      // Add the catch handler to the action list.
-      CatchHandler *Action = nullptr;
-      if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
-        // If the CatchHandlerMap already has an entry for this BB, re-use it.
-        Action = CatchHandlerMap[BB];
-        assert(Action->getSelector() == ExpectedSelector);
-      } else {
-        // We don't expect a selector dispatch, but there may be a call to
-        // llvm.eh.begincatch, which separates catch handling code from
-        // cleanup code in the same control flow.  This call looks for the
-        // begincatch intrinsic.
-        Action = findCatchHandler(BB, NextBB, VisitedBlocks);
-        if (Action) {
-          // For C++ EH, check if there is any interesting cleanup code before
-          // we begin the catch. This is important because cleanups cannot
-          // rethrow exceptions but code called from catches can. For SEH, it
-          // isn't important if some finally code before a catch-all is executed
-          // out of line or after recovering from the exception.
-          if (Personality == EHPersonality::MSVC_CXX)
-            findCleanupHandlers(Actions, BB, BB);
-        } else {
-          // If an action was not found, it means that the control flows
-          // directly into the catch-all handler and there is no cleanup code.
-          // That's an expected situation and we must create a catch action.
-          // Since this is a catch-all handler, the selector won't actually
-          // appear in the code anywhere.  ExpectedSelector here is the constant
-          // null ptr that we got from the landing pad instruction.
-          Action = new CatchHandler(BB, ExpectedSelector, nullptr);
-          CatchHandlerMap[BB] = Action;
-        }
-      }
-      Actions.insertCatchHandler(Action);
-      DEBUG(dbgs() << "  Catch all handler at block " << BB->getName() << "\n");
-      ++HandlersFound;
-
-      // Once we reach a catch-all, don't expect to hit a resume instruction.
-      BB = nullptr;
-      break;
-    }
-
-    CatchHandler *CatchAction = findCatchHandler(BB, NextBB, VisitedBlocks);
-    assert(CatchAction);
-
-    // See if there is any interesting code executed before the dispatch.
-    findCleanupHandlers(Actions, BB, CatchAction->getStartBlock());
-
-    // When the source program contains multiple nested try blocks the catch
-    // handlers can get strung together in such a way that we can encounter
-    // a dispatch for a selector that we've already had a handler for.
-    if (CatchAction->getSelector()->stripPointerCasts() == ExpectedSelector) {
-      ++HandlersFound;
-
-      // Add the catch handler to the action list.
-      DEBUG(dbgs() << "  Found catch dispatch in block "
-                   << CatchAction->getStartBlock()->getName() << "\n");
-      Actions.insertCatchHandler(CatchAction);
-    } else {
-      // Under some circumstances optimized IR will flow unconditionally into a
-      // handler block without checking the selector.  This can only happen if
-      // the landing pad has a catch-all handler and the handler for the
-      // preceeding catch clause is identical to the catch-call handler
-      // (typically an empty catch).  In this case, the handler must be shared
-      // by all remaining clauses.
-      if (isa<ConstantPointerNull>(
-              CatchAction->getSelector()->stripPointerCasts())) {
-        DEBUG(dbgs() << "  Applying early catch-all handler in block "
-                     << CatchAction->getStartBlock()->getName()
-                     << "  to all remaining clauses.\n");
-        Actions.insertCatchHandler(CatchAction);
-        return;
-      }
-
-      DEBUG(dbgs() << "  Found extra catch dispatch in block "
-                   << CatchAction->getStartBlock()->getName() << "\n");
-    }
-
-    // Move on to the block after the catch handler.
-    BB = NextBB;
-  }
-
-  // If we didn't wind up in a catch-all, see if there is any interesting code
-  // executed before the resume.
-  findCleanupHandlers(Actions, BB, BB);
-
-  // It's possible that some optimization moved code into a landingpad that
-  // wasn't
-  // previously being used for cleanup.  If that happens, we need to execute
-  // that
-  // extra code from a cleanup handler.
-  if (Actions.includesCleanup() && !LPad->isCleanup())
-    LPad->setCleanup(true);
-}
-
-// This function searches starting with the input block for the next
-// block that terminates with a branch whose condition is based on a selector
-// comparison.  This may be the input block.  See the mapLandingPadBlocks
-// comments for a discussion of control flow assumptions.
-//
-CatchHandler *WinEHPrepare::findCatchHandler(BasicBlock *BB,
-                                             BasicBlock *&NextBB,
-                                             VisitedBlockSet &VisitedBlocks) {
-  // See if we've already found a catch handler use it.
-  // Call count() first to avoid creating a null entry for blocks
-  // we haven't seen before.
-  if (CatchHandlerMap.count(BB) && CatchHandlerMap[BB] != nullptr) {
-    CatchHandler *Action = cast<CatchHandler>(CatchHandlerMap[BB]);
-    NextBB = Action->getNextBB();
-    return Action;
-  }
-
-  // VisitedBlocks applies only to the current search.  We still
-  // need to consider blocks that we've visited while mapping other
-  // landing pads.
-  VisitedBlocks.insert(BB);
-
-  BasicBlock *CatchBlock = nullptr;
-  Constant *Selector = nullptr;
-
-  // If this is the first time we've visited this block from any landing pad
-  // look to see if it is a selector dispatch block.
-  if (!CatchHandlerMap.count(BB)) {
-    if (isSelectorDispatch(BB, CatchBlock, Selector, NextBB)) {
-      CatchHandler *Action = new CatchHandler(BB, Selector, NextBB);
-      CatchHandlerMap[BB] = Action;
-      return Action;
-    }
-    // If we encounter a block containing an llvm.eh.begincatch before we
-    // find a selector dispatch block, the handler is assumed to be
-    // reached unconditionally.  This happens for catch-all blocks, but
-    // it can also happen for other catch handlers that have been combined
-    // with the catch-all handler during optimization.
-    if (isCatchBlock(BB)) {
-      PointerType *Int8PtrTy = Type::getInt8PtrTy(BB->getContext());
-      Constant *NullSelector = ConstantPointerNull::get(Int8PtrTy);
-      CatchHandler *Action = new CatchHandler(BB, NullSelector, nullptr);
-      CatchHandlerMap[BB] = Action;
-      return Action;
-    }
-  }
-
-  // Visit each successor, looking for the dispatch.
-  // FIXME: We expect to find the dispatch quickly, so this will probably
-  //        work better as a breadth first search.
-  for (BasicBlock *Succ : successors(BB)) {
-    if (VisitedBlocks.count(Succ))
-      continue;
-
-    CatchHandler *Action = findCatchHandler(Succ, NextBB, VisitedBlocks);
-    if (Action)
-      return Action;
-  }
-  return nullptr;
-}
-
-// These are helper functions to combine repeated code from findCleanupHandlers.
-static void createCleanupHandler(LandingPadActions &Actions,
-                                 CleanupHandlerMapTy &CleanupHandlerMap,
-                                 BasicBlock *BB) {
-  CleanupHandler *Action = new CleanupHandler(BB);
-  CleanupHandlerMap[BB] = Action;
-  Actions.insertCleanupHandler(Action);
-  DEBUG(dbgs() << "  Found cleanup code in block "
-               << Action->getStartBlock()->getName() << "\n");
-}
-
-static CallSite matchOutlinedFinallyCall(BasicBlock *BB,
-                                         Instruction *MaybeCall) {
-  // Look for finally blocks that Clang has already outlined for us.
-  //   %fp = call i8* @llvm.localaddress()
-  //   call void @"fin$parent"(iN 1, i8* %fp)
-  if (isLocalAddressCall(MaybeCall) && MaybeCall != BB->getTerminator())
-    MaybeCall = MaybeCall->getNextNode();
-  CallSite FinallyCall(MaybeCall);
-  if (!FinallyCall || FinallyCall.arg_size() != 2)
-    return CallSite();
-  if (!match(FinallyCall.getArgument(0), m_SpecificInt(1)))
-    return CallSite();
-  if (!isLocalAddressCall(FinallyCall.getArgument(1)))
-    return CallSite();
-  return FinallyCall;
-}
-
-static BasicBlock *followSingleUnconditionalBranches(BasicBlock *BB) {
-  // Skip single ubr blocks.
-  while (BB->getFirstNonPHIOrDbg() == BB->getTerminator()) {
-    auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
-    if (Br && Br->isUnconditional())
-      BB = Br->getSuccessor(0);
-    else
-      return BB;
-  }
-  return BB;
-}
-
-// This function searches starting with the input block for the next block that
-// contains code that is not part of a catch handler and would not be eliminated
-// during handler outlining.
-//
-void WinEHPrepare::findCleanupHandlers(LandingPadActions &Actions,
-                                       BasicBlock *StartBB, BasicBlock *EndBB) {
-  // Here we will skip over the following:
-  //
-  // landing pad prolog:
-  //
-  // Unconditional branches
-  //
-  // Selector dispatch
-  //
-  // Resume pattern
-  //
-  // Anything else marks the start of an interesting block
-
-  BasicBlock *BB = StartBB;
-  // Anything other than an unconditional branch will kick us out of this loop
-  // one way or another.
-  while (BB) {
-    BB = followSingleUnconditionalBranches(BB);
-    // If we've already scanned this block, don't scan it again.  If it is
-    // a cleanup block, there will be an action in the CleanupHandlerMap.
-    // If we've scanned it and it is not a cleanup block, there will be a
-    // nullptr in the CleanupHandlerMap.  If we have not scanned it, there will
-    // be no entry in the CleanupHandlerMap.  We must call count() first to
-    // avoid creating a null entry for blocks we haven't scanned.
-    if (CleanupHandlerMap.count(BB)) {
-      if (auto *Action = CleanupHandlerMap[BB]) {
-        Actions.insertCleanupHandler(Action);
-        DEBUG(dbgs() << "  Found cleanup code in block "
-                     << Action->getStartBlock()->getName() << "\n");
-        // FIXME: This cleanup might chain into another, and we need to discover
-        // that.
-        return;
-      } else {
-        // Here we handle the case where the cleanup handler map contains a
-        // value for this block but the value is a nullptr.  This means that
-        // we have previously analyzed the block and determined that it did
-        // not contain any cleanup code.  Based on the earlier analysis, we
-        // know the block must end in either an unconditional branch, a
-        // resume or a conditional branch that is predicated on a comparison
-        // with a selector.  Either the resume or the selector dispatch
-        // would terminate the search for cleanup code, so the unconditional
-        // branch is the only case for which we might need to continue
-        // searching.
-        BasicBlock *SuccBB = followSingleUnconditionalBranches(BB);
-        if (SuccBB == BB || SuccBB == EndBB)
-          return;
-        BB = SuccBB;
-        continue;
-      }
-    }
-
-    // Create an entry in the cleanup handler map for this block.  Initially
-    // we create an entry that says this isn't a cleanup block.  If we find
-    // cleanup code, the caller will replace this entry.
-    CleanupHandlerMap[BB] = nullptr;
-
-    TerminatorInst *Terminator = BB->getTerminator();
-
-    // Landing pad blocks have extra instructions we need to accept.
-    LandingPadMap *LPadMap = nullptr;
-    if (BB->isLandingPad()) {
-      LandingPadInst *LPad = BB->getLandingPadInst();
-      LPadMap = &LPadMaps[LPad];
-      if (!LPadMap->isInitialized())
-        LPadMap->mapLandingPad(LPad);
-    }
-
-    // Look for the bare resume pattern:
-    //   %lpad.val1 = insertvalue { i8*, i32 } undef, i8* %exn, 0
-    //   %lpad.val2 = insertvalue { i8*, i32 } %lpad.val1, i32 %sel, 1
-    //   resume { i8*, i32 } %lpad.val2
-    if (auto *Resume = dyn_cast<ResumeInst>(Terminator)) {
-      InsertValueInst *Insert1 = nullptr;
-      InsertValueInst *Insert2 = nullptr;
-      Value *ResumeVal = Resume->getOperand(0);
-      // If the resume value isn't a phi or landingpad value, it should be a
-      // series of insertions. Identify them so we can avoid them when scanning
-      // for cleanups.
-      if (!isa<PHINode>(ResumeVal) && !isa<LandingPadInst>(ResumeVal)) {
-        Insert2 = dyn_cast<InsertValueInst>(ResumeVal);
-        if (!Insert2)
-          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-        Insert1 = dyn_cast<InsertValueInst>(Insert2->getAggregateOperand());
-        if (!Insert1)
-          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-      }
-      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-           II != IE; ++II) {
-        Instruction *Inst = II;
-        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-          continue;
-        if (Inst == Insert1 || Inst == Insert2 || Inst == Resume)
-          continue;
-        if (!Inst->hasOneUse() ||
-            (Inst->user_back() != Insert1 && Inst->user_back() != Insert2)) {
-          return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-        }
-      }
-      return;
-    }
-
-    BranchInst *Branch = dyn_cast<BranchInst>(Terminator);
-    if (Branch && Branch->isConditional()) {
-      // Look for the selector dispatch.
-      //   %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*))
-      //   %matches = icmp eq i32 %sel, %2
-      //   br i1 %matches, label %catch14, label %eh.resume
-      CmpInst *Compare = dyn_cast<CmpInst>(Branch->getCondition());
-      if (!Compare || !Compare->isEquality())
-        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-      for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-           II != IE; ++II) {
-        Instruction *Inst = II;
-        if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-          continue;
-        if (Inst == Compare || Inst == Branch)
-          continue;
-        if (match(Inst, m_Intrinsic<Intrinsic::eh_typeid_for>()))
-          continue;
-        return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-      }
-      // The selector dispatch block should always terminate our search.
-      assert(BB == EndBB);
-      return;
-    }
-
-    if (isAsynchronousEHPersonality(Personality)) {
-      // If this is a landingpad block, split the block at the first non-landing
-      // pad instruction.
-      Instruction *MaybeCall = BB->getFirstNonPHIOrDbg();
-      if (LPadMap) {
-        while (MaybeCall != BB->getTerminator() &&
-               LPadMap->isLandingPadSpecificInst(MaybeCall))
-          MaybeCall = MaybeCall->getNextNode();
-      }
-
-      // Look for outlined finally calls on x64, since those happen to match the
-      // prototype provided by the runtime.
-      if (TheTriple.getArch() == Triple::x86_64) {
-        if (CallSite FinallyCall = matchOutlinedFinallyCall(BB, MaybeCall)) {
-          Function *Fin = FinallyCall.getCalledFunction();
-          assert(Fin && "outlined finally call should be direct");
-          auto *Action = new CleanupHandler(BB);
-          Action->setHandlerBlockOrFunc(Fin);
-          Actions.insertCleanupHandler(Action);
-          CleanupHandlerMap[BB] = Action;
-          DEBUG(dbgs() << "  Found frontend-outlined finally call to "
-                       << Fin->getName() << " in block "
-                       << Action->getStartBlock()->getName() << "\n");
-
-          // Split the block if there were more interesting instructions and
-          // look for finally calls in the normal successor block.
-          BasicBlock *SuccBB = BB;
-          if (FinallyCall.getInstruction() != BB->getTerminator() &&
-              FinallyCall.getInstruction()->getNextNode() !=
-                  BB->getTerminator()) {
-            SuccBB =
-                SplitBlock(BB, FinallyCall.getInstruction()->getNextNode(), DT);
-          } else {
-            if (FinallyCall.isInvoke()) {
-              SuccBB = cast<InvokeInst>(FinallyCall.getInstruction())
-                           ->getNormalDest();
-            } else {
-              SuccBB = BB->getUniqueSuccessor();
-              assert(SuccBB &&
-                     "splitOutlinedFinallyCalls didn't insert a branch");
-            }
-          }
-          BB = SuccBB;
-          if (BB == EndBB)
-            return;
-          continue;
-        }
-      }
-    }
-
-    // Anything else is either a catch block or interesting cleanup code.
-    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
-         II != IE; ++II) {
-      Instruction *Inst = II;
-      if (LPadMap && LPadMap->isLandingPadSpecificInst(Inst))
-        continue;
-      // Unconditional branches fall through to this loop.
-      if (Inst == Branch)
-        continue;
-      // If this is a catch block, there is no cleanup code to be found.
-      if (match(Inst, m_Intrinsic<Intrinsic::eh_begincatch>()))
-        return;
-      // If this a nested landing pad, it may contain an endcatch call.
-      if (match(Inst, m_Intrinsic<Intrinsic::eh_endcatch>()))
-        return;
-      // Anything else makes this interesting cleanup code.
-      return createCleanupHandler(Actions, CleanupHandlerMap, BB);
-    }
-
-    // Only unconditional branches in empty blocks should get this far.
-    assert(Branch && Branch->isUnconditional());
-    if (BB == EndBB)
-      return;
-    BB = Branch->getSuccessor(0);
-  }
-}
-
-// This is a public function, declared in WinEHFuncInfo.h and is also
-// referenced by WinEHNumbering in FunctionLoweringInfo.cpp.
-void llvm::parseEHActions(
-    const IntrinsicInst *II,
-    SmallVectorImpl<std::unique_ptr<ActionHandler>> &Actions) {
-  assert(II->getIntrinsicID() == Intrinsic::eh_actions &&
-         "attempted to parse non eh.actions intrinsic");
-  for (unsigned I = 0, E = II->getNumArgOperands(); I != E;) {
-    uint64_t ActionKind =
-        cast<ConstantInt>(II->getArgOperand(I))->getZExtValue();
-    if (ActionKind == /*catch=*/1) {
-      auto *Selector = cast<Constant>(II->getArgOperand(I + 1));
-      ConstantInt *EHObjIndex = cast<ConstantInt>(II->getArgOperand(I + 2));
-      int64_t EHObjIndexVal = EHObjIndex->getSExtValue();
-      Constant *Handler = cast<Constant>(II->getArgOperand(I + 3));
-      I += 4;
-      auto CH = make_unique<CatchHandler>(/*BB=*/nullptr, Selector,
-                                          /*NextBB=*/nullptr);
-      CH->setHandlerBlockOrFunc(Handler);
-      CH->setExceptionVarIndex(EHObjIndexVal);
-      Actions.push_back(std::move(CH));
-    } else if (ActionKind == 0) {
-      Constant *Handler = cast<Constant>(II->getArgOperand(I + 1));
-      I += 2;
-      auto CH = make_unique<CleanupHandler>(/*BB=*/nullptr);
-      CH->setHandlerBlockOrFunc(Handler);
-      Actions.push_back(std::move(CH));
-    } else {
-      llvm_unreachable("Expected either a catch or cleanup handler!");
-    }
-  }
-  std::reverse(Actions.begin(), Actions.end());
-}
-
-namespace {
-struct WinEHNumbering {
-  WinEHNumbering(WinEHFuncInfo &FuncInfo) : FuncInfo(FuncInfo),
-      CurrentBaseState(-1), NextState(0) {}
-
-  WinEHFuncInfo &FuncInfo;
-  int CurrentBaseState;
-  int NextState;
-
-  SmallVector<std::unique_ptr<ActionHandler>, 4> HandlerStack;
-  SmallPtrSet<const Function *, 4> VisitedHandlers;
-
-  int currentEHNumber() const {
-    return HandlerStack.empty() ? CurrentBaseState : HandlerStack.back()->getEHState();
-  }
-
-  void createUnwindMapEntry(int ToState, ActionHandler *AH);
-  void createTryBlockMapEntry(int TryLow, int TryHigh,
-                              ArrayRef<CatchHandler *> Handlers);
-  void processCallSite(MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
-                       ImmutableCallSite CS);
-  void popUnmatchedActions(int FirstMismatch);
-  void calculateStateNumbers(const Function &F);
-  void findActionRootLPads(const Function &F);
-};
-}
-
-void WinEHNumbering::createUnwindMapEntry(int ToState, ActionHandler *AH) {
-  WinEHUnwindMapEntry UME;
+static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState,
+                             const BasicBlock *BB) {
+  CxxUnwindMapEntry UME;
   UME.ToState = ToState;
-  if (auto *CH = dyn_cast_or_null<CleanupHandler>(AH))
-    UME.Cleanup = cast<Function>(CH->getHandlerBlockOrFunc());
-  else
-    UME.Cleanup = nullptr;
-  FuncInfo.UnwindMap.push_back(UME);
+  UME.Cleanup = BB;
+  FuncInfo.CxxUnwindMap.push_back(UME);
+  return FuncInfo.getLastStateNumber();
 }
 
-void WinEHNumbering::createTryBlockMapEntry(int TryLow, int TryHigh,
-                                            ArrayRef<CatchHandler *> Handlers) {
-  // See if we already have an entry for this set of handlers.
-  // This is using iterators rather than a range-based for loop because
-  // if we find the entry we're looking for we'll need the iterator to erase it.
-  int NumHandlers = Handlers.size();
-  auto I = FuncInfo.TryBlockMap.begin();
-  auto E = FuncInfo.TryBlockMap.end();
-  for ( ; I != E; ++I) {
-    auto &Entry = *I;
-    if (Entry.HandlerArray.size() != (size_t)NumHandlers)
-      continue;
-    int N;
-    for (N = 0; N < NumHandlers; ++N) {
-      if (Entry.HandlerArray[N].Handler != Handlers[N]->getHandlerBlockOrFunc())
-        break; // breaks out of inner loop
-    }
-    // If all the handlers match, this is what we were looking for.
-    if (N == NumHandlers) {
-      break;
-    }
-  }
-
-  // If we found an existing entry for this set of handlers, extend the range
-  // but move the entry to the end of the map vector.  The order of entries
-  // in the map is critical to the way that the runtime finds handlers.
-  // FIXME: Depending on what has happened with block ordering, this may
-  //        incorrectly combine entries that should remain separate.
-  if (I != E) {
-    // Copy the existing entry.
-    WinEHTryBlockMapEntry Entry = *I;
-    Entry.TryLow = std::min(TryLow, Entry.TryLow);
-    Entry.TryHigh = std::max(TryHigh, Entry.TryHigh);
-    assert(Entry.TryLow <= Entry.TryHigh);
-    // Erase the old entry and add this one to the back.
-    FuncInfo.TryBlockMap.erase(I);
-    FuncInfo.TryBlockMap.push_back(Entry);
-    return;
-  }
-
-  // If we didn't find an entry, create a new one.
+static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
+                                int TryHigh, int CatchHigh,
+                                ArrayRef<const CatchPadInst *> Handlers) {
   WinEHTryBlockMapEntry TBME;
   TBME.TryLow = TryLow;
   TBME.TryHigh = TryHigh;
+  TBME.CatchHigh = CatchHigh;
   assert(TBME.TryLow <= TBME.TryHigh);
-  for (CatchHandler *CH : Handlers) {
+  for (const CatchPadInst *CPI : Handlers) {
     WinEHHandlerType HT;
-    if (CH->getSelector()->isNullValue()) {
-      HT.Adjectives = 0x40;
+    Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0));
+    if (TypeInfo->isNullValue())
       HT.TypeDescriptor = nullptr;
-    } else {
-      auto *GV = cast<GlobalVariable>(CH->getSelector()->stripPointerCasts());
-      // Selectors are always pointers to GlobalVariables with 'struct' type.
-      // The struct has two fields, adjectives and a type descriptor.
-      auto *CS = cast<ConstantStruct>(GV->getInitializer());
-      HT.Adjectives =
-          cast<ConstantInt>(CS->getAggregateElement(0U))->getZExtValue();
-      HT.TypeDescriptor =
-          cast<GlobalVariable>(CS->getAggregateElement(1)->stripPointerCasts());
-    }
-    HT.Handler = cast<Function>(CH->getHandlerBlockOrFunc());
-    HT.CatchObjRecoverIdx = CH->getExceptionVarIndex();
+    else
+      HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
+    HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
+    HT.Handler = CPI->getParent();
+    if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
+      HT.CatchObj.Alloca = nullptr;
+    else
+      HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
     TBME.HandlerArray.push_back(HT);
   }
   FuncInfo.TryBlockMap.push_back(TBME);
 }
 
-static void print_name(const Value *V) {
-#ifndef NDEBUG
-  if (!V) {
-    DEBUG(dbgs() << "null");
-    return;
-  }
-
-  if (const auto *F = dyn_cast<Function>(V))
-    DEBUG(dbgs() << F->getName());
-  else
-    DEBUG(V->dump());
-#endif
+static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) {
+  for (const User *U : CleanupPad->users())
+    if (const auto *CRI = dyn_cast<CleanupReturnInst>(U))
+      return CRI->getUnwindDest();
+  return nullptr;
 }
 
-void WinEHNumbering::processCallSite(
-    MutableArrayRef<std::unique_ptr<ActionHandler>> Actions,
-    ImmutableCallSite CS) {
-  DEBUG(dbgs() << "processCallSite (EH state = " << currentEHNumber()
-               << ") for: ");
-  print_name(CS ? CS.getCalledValue() : nullptr);
-  DEBUG(dbgs() << '\n');
-
-  DEBUG(dbgs() << "HandlerStack: \n");
-  for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
-    DEBUG(dbgs() << "  ");
-    print_name(HandlerStack[I]->getHandlerBlockOrFunc());
-    DEBUG(dbgs() << '\n');
-  }
-  DEBUG(dbgs() << "Actions: \n");
-  for (int I = 0, E = Actions.size(); I < E; ++I) {
-    DEBUG(dbgs() << "  ");
-    print_name(Actions[I]->getHandlerBlockOrFunc());
-    DEBUG(dbgs() << '\n');
-  }
-  int FirstMismatch = 0;
-  for (int E = std::min(HandlerStack.size(), Actions.size()); FirstMismatch < E;
-       ++FirstMismatch) {
-    if (HandlerStack[FirstMismatch]->getHandlerBlockOrFunc() !=
-        Actions[FirstMismatch]->getHandlerBlockOrFunc())
-      break;
-  }
-
-  // Remove unmatched actions from the stack and process their EH states.
-  popUnmatchedActions(FirstMismatch);
-
-  DEBUG(dbgs() << "Pushing actions for CallSite: ");
-  print_name(CS ? CS.getCalledValue() : nullptr);
-  DEBUG(dbgs() << '\n');
-
-  bool LastActionWasCatch = false;
-  const LandingPadInst *LastRootLPad = nullptr;
-  for (size_t I = FirstMismatch; I != Actions.size(); ++I) {
-    // We can reuse eh states when pushing two catches for the same invoke.
-    bool CurrActionIsCatch = isa<CatchHandler>(Actions[I].get());
-    auto *Handler = cast<Function>(Actions[I]->getHandlerBlockOrFunc());
-    // Various conditions can lead to a handler being popped from the
-    // stack and re-pushed later.  That shouldn't create a new state.
-    // FIXME: Can code optimization lead to re-used handlers?
-    if (FuncInfo.HandlerEnclosedState.count(Handler)) {
-      // If we already assigned the state enclosed by this handler re-use it.
-      Actions[I]->setEHState(FuncInfo.HandlerEnclosedState[Handler]);
+static void calculateStateNumbersForInvokes(const Function *Fn,
+                                            WinEHFuncInfo &FuncInfo) {
+  auto *F = const_cast<Function *>(Fn);
+  DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F);
+  for (BasicBlock &BB : *F) {
+    auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
       continue;
+
+    auto &BBColors = BlockColors[&BB];
+    assert(BBColors.size() == 1 && "multi-color BB not removed by preparation");
+    BasicBlock *FuncletEntryBB = BBColors.front();
+
+    BasicBlock *FuncletUnwindDest;
+    auto *FuncletPad =
+        dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI());
+    assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock());
+    if (!FuncletPad)
+      FuncletUnwindDest = nullptr;
+    else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))
+      FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest();
+    else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad))
+      FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad);
+    else
+      llvm_unreachable("unexpected funclet pad!");
+
+    BasicBlock *InvokeUnwindDest = II->getUnwindDest();
+    int BaseState = -1;
+    if (FuncletUnwindDest == InvokeUnwindDest) {
+      auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+      if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+        BaseState = BaseStateI->second;
     }
-    const LandingPadInst* RootLPad = FuncInfo.RootLPad[Handler];
-    if (CurrActionIsCatch && LastActionWasCatch && RootLPad == LastRootLPad) {
-      DEBUG(dbgs() << "setEHState for handler to " << currentEHNumber() << "\n");
-      Actions[I]->setEHState(currentEHNumber());
+
+    if (BaseState != -1) {
+      FuncInfo.InvokeStateMap[II] = BaseState;
     } else {
-      DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber() << ", ");
-      print_name(Actions[I]->getHandlerBlockOrFunc());
-      DEBUG(dbgs() << ") with EH state " << NextState << "\n");
-      createUnwindMapEntry(currentEHNumber(), Actions[I].get());
-      DEBUG(dbgs() << "setEHState for handler to " << NextState << "\n");
-      Actions[I]->setEHState(NextState);
-      NextState++;
+      Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI();
+      assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!");
+      FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst];
     }
-    HandlerStack.push_back(std::move(Actions[I]));
-    LastActionWasCatch = CurrActionIsCatch;
-    LastRootLPad = RootLPad;
-  }
-
-  // This is used to defer numbering states for a handler until after the
-  // last time it appears in an invoke action list.
-  if (CS.isInvoke()) {
-    for (int I = 0, E = HandlerStack.size(); I < E; ++I) {
-      auto *Handler = cast<Function>(HandlerStack[I]->getHandlerBlockOrFunc());
-      if (FuncInfo.LastInvoke[Handler] != cast<InvokeInst>(CS.getInstruction()))
-        continue;
-      FuncInfo.LastInvokeVisited[Handler] = true;
-      DEBUG(dbgs() << "Last invoke of ");
-      print_name(Handler);
-      DEBUG(dbgs() << " has been visited.\n");
-    }
-  }
-
-  DEBUG(dbgs() << "In EHState " << currentEHNumber() << " for CallSite: ");
-  print_name(CS ? CS.getCalledValue() : nullptr);
-  DEBUG(dbgs() << '\n');
-}
-
-void WinEHNumbering::popUnmatchedActions(int FirstMismatch) {
-  // Don't recurse while we are looping over the handler stack.  Instead, defer
-  // the numbering of the catch handlers until we are done popping.
-  SmallVector<CatchHandler *, 4> PoppedCatches;
-  for (int I = HandlerStack.size() - 1; I >= FirstMismatch; --I) {
-    std::unique_ptr<ActionHandler> Handler = HandlerStack.pop_back_val();
-    if (isa<CatchHandler>(Handler.get()))
-      PoppedCatches.push_back(cast<CatchHandler>(Handler.release()));
-  }
-
-  int TryHigh = NextState - 1;
-  int LastTryLowIdx = 0;
-  for (int I = 0, E = PoppedCatches.size(); I != E; ++I) {
-    CatchHandler *CH = PoppedCatches[I];
-    DEBUG(dbgs() << "Popped handler with state " << CH->getEHState() << "\n");
-    if (I + 1 == E || CH->getEHState() != PoppedCatches[I + 1]->getEHState()) {
-      int TryLow = CH->getEHState();
-      auto Handlers =
-          makeArrayRef(&PoppedCatches[LastTryLowIdx], I - LastTryLowIdx + 1);
-      DEBUG(dbgs() << "createTryBlockMapEntry(" << TryLow << ", " << TryHigh);
-      for (size_t J = 0; J < Handlers.size(); ++J) {
-        DEBUG(dbgs() << ", ");
-        print_name(Handlers[J]->getHandlerBlockOrFunc());
-      }
-      DEBUG(dbgs() << ")\n");
-      createTryBlockMapEntry(TryLow, TryHigh, Handlers);
-      LastTryLowIdx = I + 1;
-    }
-  }
-
-  for (CatchHandler *CH : PoppedCatches) {
-    if (auto *F = dyn_cast<Function>(CH->getHandlerBlockOrFunc())) {
-      if (FuncInfo.LastInvokeVisited[F]) {
-        DEBUG(dbgs() << "Assigning base state " << NextState << " to ");
-        print_name(F);
-        DEBUG(dbgs() << '\n');
-        FuncInfo.HandlerBaseState[F] = NextState;
-        DEBUG(dbgs() << "createUnwindMapEntry(" << currentEHNumber()
-                     << ", null)\n");
-        createUnwindMapEntry(currentEHNumber(), nullptr);
-        ++NextState;
-        calculateStateNumbers(*F);
-      }
-      else {
-        DEBUG(dbgs() << "Deferring handling of ");
-        print_name(F);
-        DEBUG(dbgs() << " until last invoke visited.\n");
-      }
-    }
-    delete CH;
   }
 }
 
-void WinEHNumbering::calculateStateNumbers(const Function &F) {
-  auto I = VisitedHandlers.insert(&F);
-  if (!I.second)
-    return; // We've already visited this handler, don't renumber it.
-
-  int OldBaseState = CurrentBaseState;
-  if (FuncInfo.HandlerBaseState.count(&F)) {
-    CurrentBaseState = FuncInfo.HandlerBaseState[&F];
+// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
+// to. If the unwind edge came from an invoke, return null.
+static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
+                                                 Value *ParentPad) {
+  const TerminatorInst *TI = BB->getTerminator();
+  if (isa<InvokeInst>(TI))
+    return nullptr;
+  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+    if (CatchSwitch->getParentPad() != ParentPad)
+      return nullptr;
+    return BB;
   }
-
-  size_t SavedHandlerStackSize = HandlerStack.size();
-
-  DEBUG(dbgs() << "Calculating state numbers for: " << F.getName() << '\n');
-  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-  for (const BasicBlock &BB : F) {
-    for (const Instruction &I : BB) {
-      const auto *CI = dyn_cast<CallInst>(&I);
-      if (!CI || CI->doesNotThrow())
-        continue;
-      processCallSite(None, CI);
-    }
-    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
-    if (!II)
-      continue;
-    const LandingPadInst *LPI = II->getLandingPadInst();
-    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
-    if (!ActionsCall)
-      continue;
-    parseEHActions(ActionsCall, ActionList);
-    if (ActionList.empty())
-      continue;
-    processCallSite(ActionList, II);
-    ActionList.clear();
-    FuncInfo.LandingPadStateMap[LPI] = currentEHNumber();
-    DEBUG(dbgs() << "Assigning state " << currentEHNumber()
-                  << " to landing pad at " << LPI->getParent()->getName()
-                  << '\n');
-  }
-
-  // Pop any actions that were pushed on the stack for this function.
-  popUnmatchedActions(SavedHandlerStackSize);
-
-  DEBUG(dbgs() << "Assigning max state " << NextState - 1
-               << " to " << F.getName() << '\n');
-  FuncInfo.CatchHandlerMaxState[&F] = NextState - 1;
-
-  CurrentBaseState = OldBaseState;
+  assert(!TI->isEHPad() && "unexpected EHPad!");
+  auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad();
+  if (CleanupPad->getParentPad() != ParentPad)
+    return nullptr;
+  return CleanupPad->getParent();
 }
 
-// This function follows the same basic traversal as calculateStateNumbers
-// but it is necessary to identify the root landing pad associated
-// with each action before we start assigning state numbers.
-void WinEHNumbering::findActionRootLPads(const Function &F) {
-  auto I = VisitedHandlers.insert(&F);
-  if (!I.second)
-    return; // We've already visited this handler, don't revisit it.
+static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
+                                     const Instruction *FirstNonPHI,
+                                     int ParentState) {
+  const BasicBlock *BB = FirstNonPHI->getParent();
+  assert(BB->isEHPad() && "not a funclet!");
 
-  SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-  for (const BasicBlock &BB : F) {
-    const auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
-    if (!II)
-      continue;
-    const LandingPadInst *LPI = II->getLandingPadInst();
-    auto *ActionsCall = dyn_cast<IntrinsicInst>(LPI->getNextNode());
-    if (!ActionsCall)
-      continue;
+  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+    assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+           "shouldn't revist catch funclets!");
 
-    assert(ActionsCall->getIntrinsicID() == Intrinsic::eh_actions);
-    parseEHActions(ActionsCall, ActionList);
-    if (ActionList.empty())
-      continue;
-    for (int I = 0, E = ActionList.size(); I < E; ++I) {
-      if (auto *Handler
-              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc())) {
-        FuncInfo.LastInvoke[Handler] = II;
-        // Don't replace the root landing pad if we previously saw this
-        // handler in a different function.
-        if (FuncInfo.RootLPad.count(Handler) &&
-            FuncInfo.RootLPad[Handler]->getParent()->getParent() != &F)
-          continue;
-        DEBUG(dbgs() << "Setting root lpad for ");
-        print_name(Handler);
-        DEBUG(dbgs() << " to " << LPI->getParent()->getName() << '\n');
-        FuncInfo.RootLPad[Handler] = LPI;
+    SmallVector<const CatchPadInst *, 2> Handlers;
+    for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+      auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+      Handlers.push_back(CatchPad);
+    }
+    int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+    FuncInfo.EHPadStateMap[CatchSwitch] = TryLow;
+    for (const BasicBlock *PredBlock : predecessors(BB))
+      if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+                                               CatchSwitch->getParentPad())))
+        calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+                                 TryLow);
+    int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr);
+
+    // catchpads are separate funclets in C++ EH due to the way rethrow works.
+    int TryHigh = CatchLow - 1;
+    for (const auto *CatchPad : Handlers) {
+      FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+      for (const User *U : CatchPad->users()) {
+        const auto *UserI = cast<Instruction>(U);
+        if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+          if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+            calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
+        if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+          if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+              CatchSwitch->getUnwindDest())
+            calculateCXXStateNumbers(FuncInfo, UserI, CatchLow);
       }
     }
-    // Walk the actions again and look for nested handlers.  This has to
-    // happen after all of the actions have been processed in the current
-    // function.
-    for (int I = 0, E = ActionList.size(); I < E; ++I)
-      if (auto *Handler
-              = dyn_cast<Function>(ActionList[I]->getHandlerBlockOrFunc()))
-        findActionRootLPads(*Handler);
-    ActionList.clear();
+    int CatchHigh = FuncInfo.getLastStateNumber();
+    addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers);
+    DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n');
+    DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n');
+    DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh
+                 << '\n');
+  } else {
+    auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+    // It's possible for a cleanup to be visited twice: it might have multiple
+    // cleanupret instructions.
+    if (FuncInfo.EHPadStateMap.count(CleanupPad))
+      return;
+
+    int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB);
+    FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+    DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+                 << BB->getName() << '\n');
+    for (const BasicBlock *PredBlock : predecessors(BB)) {
+      if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+                                               CleanupPad->getParentPad()))) {
+        calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+                                 CleanupState);
+      }
+    }
+    for (const User *U : CleanupPad->users()) {
+      const auto *UserI = cast<Instruction>(U);
+      if (UserI->isEHPad())
+        report_fatal_error("Cleanup funclets for the MSVC++ personality cannot "
+                           "contain exceptional actions");
+    }
   }
 }
 
-void llvm::calculateWinCXXEHStateNumbers(const Function *ParentFn,
+static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState,
+                        const Function *Filter, const BasicBlock *Handler) {
+  SEHUnwindMapEntry Entry;
+  Entry.ToState = ParentState;
+  Entry.IsFinally = false;
+  Entry.Filter = Filter;
+  Entry.Handler = Handler;
+  FuncInfo.SEHUnwindMap.push_back(Entry);
+  return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState,
+                         const BasicBlock *Handler) {
+  SEHUnwindMapEntry Entry;
+  Entry.ToState = ParentState;
+  Entry.IsFinally = true;
+  Entry.Filter = nullptr;
+  Entry.Handler = Handler;
+  FuncInfo.SEHUnwindMap.push_back(Entry);
+  return FuncInfo.SEHUnwindMap.size() - 1;
+}
+
+static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
+                                     const Instruction *FirstNonPHI,
+                                     int ParentState) {
+  const BasicBlock *BB = FirstNonPHI->getParent();
+  assert(BB->isEHPad() && "no a funclet!");
+
+  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) {
+    assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 &&
+           "shouldn't revist catch funclets!");
+
+    // Extract the filter function and the __except basic block and create a
+    // state for them.
+    assert(CatchSwitch->getNumHandlers() == 1 &&
+           "SEH doesn't have multiple handlers per __try");
+    const auto *CatchPad =
+        cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI());
+    const BasicBlock *CatchPadBB = CatchPad->getParent();
+    const Constant *FilterOrNull =
+        cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts());
+    const Function *Filter = dyn_cast<Function>(FilterOrNull);
+    assert((Filter || FilterOrNull->isNullValue()) &&
+           "unexpected filter value");
+    int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB);
+
+    // Everything in the __try block uses TryState as its parent state.
+    FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+    DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
+                 << CatchPadBB->getName() << '\n');
+    for (const BasicBlock *PredBlock : predecessors(BB))
+      if ((PredBlock = getEHPadFromPredecessor(PredBlock,
+                                               CatchSwitch->getParentPad())))
+        calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+                                 TryState);
+
+    // Everything in the __except block unwinds to ParentState, just like code
+    // outside the __try.
+    for (const User *U : CatchPad->users()) {
+      const auto *UserI = cast<Instruction>(U);
+      if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI))
+        if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest())
+          calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+      if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI))
+        if (getCleanupRetUnwindDest(InnerCleanupPad) ==
+            CatchSwitch->getUnwindDest())
+          calculateSEHStateNumbers(FuncInfo, UserI, ParentState);
+    }
+  } else {
+    auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI);
+
+    // It's possible for a cleanup to be visited twice: it might have multiple
+    // cleanupret instructions.
+    if (FuncInfo.EHPadStateMap.count(CleanupPad))
+      return;
+
+    int CleanupState = addSEHFinally(FuncInfo, ParentState, BB);
+    FuncInfo.EHPadStateMap[CleanupPad] = CleanupState;
+    DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB "
+                 << BB->getName() << '\n');
+    for (const BasicBlock *PredBlock : predecessors(BB))
+      if ((PredBlock =
+               getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad())))
+        calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(),
+                                 CleanupState);
+    for (const User *U : CleanupPad->users()) {
+      const auto *UserI = cast<Instruction>(U);
+      if (UserI->isEHPad())
+        report_fatal_error("Cleanup funclets for the SEH personality cannot "
+                           "contain exceptional actions");
+    }
+  }
+}
+
+static bool isTopLevelPadForMSVC(const Instruction *EHPad) {
+  if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad))
+    return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) &&
+           CatchSwitch->unwindsToCaller();
+  if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad))
+    return isa<ConstantTokenNone>(CleanupPad->getParentPad()) &&
+           getCleanupRetUnwindDest(CleanupPad) == nullptr;
+  if (isa<CatchPadInst>(EHPad))
+    return false;
+  llvm_unreachable("unexpected EHPad!");
+}
+
+void llvm::calculateSEHStateNumbers(const Function *Fn,
+                                    WinEHFuncInfo &FuncInfo) {
+  // Don't compute state numbers twice.
+  if (!FuncInfo.SEHUnwindMap.empty())
+    return;
+
+  for (const BasicBlock &BB : *Fn) {
+    if (!BB.isEHPad())
+      continue;
+    const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+    if (!isTopLevelPadForMSVC(FirstNonPHI))
+      continue;
+    ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1);
+  }
+
+  calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
                                          WinEHFuncInfo &FuncInfo) {
   // Return if it's already been done.
-  if (!FuncInfo.LandingPadStateMap.empty())
+  if (!FuncInfo.EHPadStateMap.empty())
     return;
 
-  WinEHNumbering Num(FuncInfo);
-  Num.findActionRootLPads(*ParentFn);
-  // The VisitedHandlers list is used by both findActionRootLPads and
-  // calculateStateNumbers, but both functions need to visit all handlers.
-  Num.VisitedHandlers.clear();
-  Num.calculateStateNumbers(*ParentFn);
-  // Pop everything on the handler stack.
-  // It may be necessary to call this more than once because a handler can
-  // be pushed on the stack as a result of clearing the stack.
-  while (!Num.HandlerStack.empty())
-    Num.processCallSite(None, ImmutableCallSite());
+  for (const BasicBlock &BB : *Fn) {
+    if (!BB.isEHPad())
+      continue;
+    const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+    if (!isTopLevelPadForMSVC(FirstNonPHI))
+      continue;
+    calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1);
+  }
+
+  calculateStateNumbersForInvokes(Fn, FuncInfo);
 }
+
+static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int ParentState,
+                           ClrHandlerType HandlerType, uint32_t TypeToken,
+                           const BasicBlock *Handler) {
+  ClrEHUnwindMapEntry Entry;
+  Entry.Parent = ParentState;
+  Entry.Handler = Handler;
+  Entry.HandlerType = HandlerType;
+  Entry.TypeToken = TypeToken;
+  FuncInfo.ClrEHUnwindMap.push_back(Entry);
+  return FuncInfo.ClrEHUnwindMap.size() - 1;
+}
+
+void llvm::calculateClrEHStateNumbers(const Function *Fn,
+                                      WinEHFuncInfo &FuncInfo) {
+  // Return if it's already been done.
+  if (!FuncInfo.EHPadStateMap.empty())
+    return;
+
+  SmallVector<std::pair<const Instruction *, int>, 8> Worklist;
+
+  // Each pad needs to be able to refer to its parent, so scan the function
+  // looking for top-level handlers and seed the worklist with them.
+  for (const BasicBlock &BB : *Fn) {
+    if (!BB.isEHPad())
+      continue;
+    if (BB.isLandingPad())
+      report_fatal_error("CoreCLR EH cannot use landingpads");
+    const Instruction *FirstNonPHI = BB.getFirstNonPHI();
+    if (!isTopLevelPadForMSVC(FirstNonPHI))
+      continue;
+    // queue this with sentinel parent state -1 to mean unwind to caller.
+    Worklist.emplace_back(FirstNonPHI, -1);
+  }
+
+  while (!Worklist.empty()) {
+    const Instruction *Pad;
+    int ParentState;
+    std::tie(Pad, ParentState) = Worklist.pop_back_val();
+
+    Value *ParentPad;
+    int PredState;
+    if (const CleanupPadInst *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
+      // A cleanup can have multiple exits; don't re-process after the first.
+      if (FuncInfo.EHPadStateMap.count(Cleanup))
+        continue;
+      // CoreCLR personality uses arity to distinguish faults from finallies.
+      const BasicBlock *PadBlock = Cleanup->getParent();
+      ClrHandlerType HandlerType =
+          (Cleanup->getNumOperands() ? ClrHandlerType::Fault
+                                     : ClrHandlerType::Finally);
+      int NewState =
+          addClrEHHandler(FuncInfo, ParentState, HandlerType, 0, PadBlock);
+      FuncInfo.EHPadStateMap[Cleanup] = NewState;
+      // Propagate the new state to all preds of the cleanup
+      ParentPad = Cleanup->getParentPad();
+      PredState = NewState;
+    } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+      SmallVector<const CatchPadInst *, 1> Handlers;
+      for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+        const auto *Catch = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI());
+        Handlers.push_back(Catch);
+      }
+      FuncInfo.EHPadStateMap[CatchSwitch] = ParentState;
+      int NewState = ParentState;
+      for (auto HandlerI = Handlers.rbegin(), HandlerE = Handlers.rend();
+           HandlerI != HandlerE; ++HandlerI) {
+        const CatchPadInst *Catch = *HandlerI;
+        const BasicBlock *PadBlock = Catch->getParent();
+        uint32_t TypeToken = static_cast<uint32_t>(
+            cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue());
+        NewState = addClrEHHandler(FuncInfo, NewState, ClrHandlerType::Catch,
+                                   TypeToken, PadBlock);
+        FuncInfo.EHPadStateMap[Catch] = NewState;
+      }
+      for (const auto *CatchPad : Handlers) {
+        for (const User *U : CatchPad->users()) {
+          const auto *UserI = cast<Instruction>(U);
+          if (UserI->isEHPad())
+            Worklist.emplace_back(UserI, ParentState);
+        }
+      }
+      PredState = NewState;
+      ParentPad = CatchSwitch->getParentPad();
+    } else {
+      llvm_unreachable("Unexpected EH pad");
+    }
+
+    // Queue all predecessors with the given state
+    for (const BasicBlock *Pred : predecessors(Pad->getParent())) {
+      if ((Pred = getEHPadFromPredecessor(Pred, ParentPad)))
+        Worklist.emplace_back(Pred->getFirstNonPHI(), PredState);
+    }
+  }
+
+  calculateStateNumbersForInvokes(Fn, FuncInfo);
+}
+
+void WinEHPrepare::colorFunclets(Function &F) {
+  BlockColors = colorEHFunclets(F);
+
+  // Invert the map from BB to colors to color to BBs.
+  for (BasicBlock &BB : F) {
+    ColorVector &Colors = BlockColors[&BB];
+    for (BasicBlock *Color : Colors)
+      FuncletBlocks[Color].push_back(&BB);
+  }
+}
+
+void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
+                                               WinEHFuncInfo &FuncInfo) {
+  for (const BasicBlock &BB : *Fn) {
+    const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
+    if (!CatchRet)
+      continue;
+    // A 'catchret' returns to the outer scope's color.
+    Value *ParentPad = CatchRet->getParentPad();
+    const BasicBlock *Color;
+    if (isa<ConstantTokenNone>(ParentPad))
+      Color = &Fn->getEntryBlock();
+    else
+      Color = cast<Instruction>(ParentPad)->getParent();
+    // Record the catchret successor's funclet membership.
+    FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
+  }
+}
+
+void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
+  // Strip PHI nodes off of EH pads.
+  SmallVector<PHINode *, 16> PHINodes;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+    BasicBlock *BB = &*FI++;
+    if (!BB->isEHPad())
+      continue;
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+      Instruction *I = &*BI++;
+      auto *PN = dyn_cast<PHINode>(I);
+      // Stop at the first non-PHI.
+      if (!PN)
+        break;
+
+      AllocaInst *SpillSlot = insertPHILoads(PN, F);
+      if (SpillSlot)
+        insertPHIStores(PN, SpillSlot);
+
+      PHINodes.push_back(PN);
+    }
+  }
+
+  for (auto *PN : PHINodes) {
+    // There may be lingering uses on other EH PHIs being removed
+    PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    PN->eraseFromParent();
+  }
+}
+
+void WinEHPrepare::cloneCommonBlocks(Function &F) {
+  // We need to clone all blocks which belong to multiple funclets.  Values are
+  // remapped throughout the funclet to propogate both the new instructions
+  // *and* the new basic blocks themselves.
+  for (auto &Funclets : FuncletBlocks) {
+    BasicBlock *FuncletPadBB = Funclets.first;
+    std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second;
+
+    std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone;
+    ValueToValueMapTy VMap;
+    for (BasicBlock *BB : BlocksInFunclet) {
+      ColorVector &ColorsForBB = BlockColors[BB];
+      // We don't need to do anything if the block is monochromatic.
+      size_t NumColorsForBB = ColorsForBB.size();
+      if (NumColorsForBB == 1)
+        continue;
+
+      DEBUG_WITH_TYPE("winehprepare-coloring",
+                      dbgs() << "  Cloning block \'" << BB->getName()
+                              << "\' for funclet \'" << FuncletPadBB->getName()
+                              << "\'.\n");
+
+      // Create a new basic block and copy instructions into it!
+      BasicBlock *CBB =
+          CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName()));
+      // Insert the clone immediately after the original to ensure determinism
+      // and to keep the same relative ordering of any funclet's blocks.
+      CBB->insertInto(&F, BB->getNextNode());
+
+      // Add basic block mapping.
+      VMap[BB] = CBB;
+
+      // Record delta operations that we need to perform to our color mappings.
+      Orig2Clone.emplace_back(BB, CBB);
+    }
+
+    // If nothing was cloned, we're done cloning in this funclet.
+    if (Orig2Clone.empty())
+      continue;
+
+    // Update our color mappings to reflect that one block has lost a color and
+    // another has gained a color.
+    for (auto &BBMapping : Orig2Clone) {
+      BasicBlock *OldBlock = BBMapping.first;
+      BasicBlock *NewBlock = BBMapping.second;
+
+      BlocksInFunclet.push_back(NewBlock);
+      ColorVector &NewColors = BlockColors[NewBlock];
+      assert(NewColors.empty() && "A new block should only have one color!");
+      NewColors.push_back(FuncletPadBB);
+
+      DEBUG_WITH_TYPE("winehprepare-coloring",
+                      dbgs() << "  Assigned color \'" << FuncletPadBB->getName()
+                              << "\' to block \'" << NewBlock->getName()
+                              << "\'.\n");
+
+      BlocksInFunclet.erase(
+          std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock),
+          BlocksInFunclet.end());
+      ColorVector &OldColors = BlockColors[OldBlock];
+      OldColors.erase(
+          std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB),
+          OldColors.end());
+
+      DEBUG_WITH_TYPE("winehprepare-coloring",
+                      dbgs() << "  Removed color \'" << FuncletPadBB->getName()
+                              << "\' from block \'" << OldBlock->getName()
+                              << "\'.\n");
+    }
+
+    // Loop over all of the instructions in this funclet, fixing up operand
+    // references as we go.  This uses VMap to do all the hard work.
+    for (BasicBlock *BB : BlocksInFunclet)
+      // Loop over all instructions, fixing each one as we find it...
+      for (Instruction &I : *BB)
+        RemapInstruction(&I, VMap,
+                         RF_IgnoreMissingEntries | RF_NoModuleLevelChanges);
+
+    auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) {
+      unsigned NumPreds = PN->getNumIncomingValues();
+      for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd;
+           ++PredIdx) {
+        BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx);
+        ColorVector &IncomingColors = BlockColors[IncomingBlock];
+        bool BlockInFunclet = IncomingColors.size() == 1 &&
+                              IncomingColors.front() == FuncletPadBB;
+        if (IsForOldBlock != BlockInFunclet)
+          continue;
+        PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false);
+        // Revisit the next entry.
+        --PredIdx;
+        --PredEnd;
+      }
+    };
+
+    for (auto &BBMapping : Orig2Clone) {
+      BasicBlock *OldBlock = BBMapping.first;
+      BasicBlock *NewBlock = BBMapping.second;
+      for (Instruction &OldI : *OldBlock) {
+        auto *OldPN = dyn_cast<PHINode>(&OldI);
+        if (!OldPN)
+          break;
+        UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true);
+      }
+      for (Instruction &NewI : *NewBlock) {
+        auto *NewPN = dyn_cast<PHINode>(&NewI);
+        if (!NewPN)
+          break;
+        UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false);
+      }
+    }
+
+    // Check to see if SuccBB has PHI nodes. If so, we need to add entries to
+    // the PHI nodes for NewBB now.
+    for (auto &BBMapping : Orig2Clone) {
+      BasicBlock *OldBlock = BBMapping.first;
+      BasicBlock *NewBlock = BBMapping.second;
+      for (BasicBlock *SuccBB : successors(NewBlock)) {
+        for (Instruction &SuccI : *SuccBB) {
+          auto *SuccPN = dyn_cast<PHINode>(&SuccI);
+          if (!SuccPN)
+            break;
+
+          // Ok, we have a PHI node.  Figure out what the incoming value was for
+          // the OldBlock.
+          int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock);
+          if (OldBlockIdx == -1)
+            break;
+          Value *IV = SuccPN->getIncomingValue(OldBlockIdx);
+
+          // Remap the value if necessary.
+          if (auto *Inst = dyn_cast<Instruction>(IV)) {
+            ValueToValueMapTy::iterator I = VMap.find(Inst);
+            if (I != VMap.end())
+              IV = I->second;
+          }
+
+          SuccPN->addIncoming(IV, NewBlock);
+        }
+      }
+    }
+
+    for (ValueToValueMapTy::value_type VT : VMap) {
+      // If there were values defined in BB that are used outside the funclet,
+      // then we now have to update all uses of the value to use either the
+      // original value, the cloned value, or some PHI derived value.  This can
+      // require arbitrary PHI insertion, of which we are prepared to do, clean
+      // these up now.
+      SmallVector<Use *, 16> UsesToRename;
+
+      auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first));
+      if (!OldI)
+        continue;
+      auto *NewI = cast<Instruction>(VT.second);
+      // Scan all uses of this instruction to see if it is used outside of its
+      // funclet, and if so, record them in UsesToRename.
+      for (Use &U : OldI->uses()) {
+        Instruction *UserI = cast<Instruction>(U.getUser());
+        BasicBlock *UserBB = UserI->getParent();
+        ColorVector &ColorsForUserBB = BlockColors[UserBB];
+        assert(!ColorsForUserBB.empty());
+        if (ColorsForUserBB.size() > 1 ||
+            *ColorsForUserBB.begin() != FuncletPadBB)
+          UsesToRename.push_back(&U);
+      }
+
+      // If there are no uses outside the block, we're done with this
+      // instruction.
+      if (UsesToRename.empty())
+        continue;
+
+      // We found a use of OldI outside of the funclet.  Rename all uses of OldI
+      // that are outside its funclet to be uses of the appropriate PHI node
+      // etc.
+      SSAUpdater SSAUpdate;
+      SSAUpdate.Initialize(OldI->getType(), OldI->getName());
+      SSAUpdate.AddAvailableValue(OldI->getParent(), OldI);
+      SSAUpdate.AddAvailableValue(NewI->getParent(), NewI);
+
+      while (!UsesToRename.empty())
+        SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val());
+    }
+  }
+}
+
+void WinEHPrepare::removeImplausibleInstructions(Function &F) {
+  // Remove implausible terminators and replace them with UnreachableInst.
+  for (auto &Funclet : FuncletBlocks) {
+    BasicBlock *FuncletPadBB = Funclet.first;
+    std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second;
+    Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI();
+    auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI);
+    auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad);
+    auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad);
+
+    for (BasicBlock *BB : BlocksInFunclet) {
+      for (Instruction &I : *BB) {
+        CallSite CS(&I);
+        if (!CS)
+          continue;
+
+        Value *FuncletBundleOperand = nullptr;
+        if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet))
+          FuncletBundleOperand = BU->Inputs.front();
+
+        if (FuncletBundleOperand == FuncletPad)
+          continue;
+
+        // Skip call sites which are nounwind intrinsics.
+        auto *CalledFn =
+            dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+        if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+          continue;
+
+        // This call site was not part of this funclet, remove it.
+        if (CS.isInvoke()) {
+          // Remove the unwind edge if it was an invoke.
+          removeUnwindEdge(BB);
+          // Get a pointer to the new call.
+          BasicBlock::iterator CallI =
+              std::prev(BB->getTerminator()->getIterator());
+          auto *CI = cast<CallInst>(&*CallI);
+          changeToUnreachable(CI, /*UseLLVMTrap=*/false);
+        } else {
+          changeToUnreachable(&I, /*UseLLVMTrap=*/false);
+        }
+
+        // There are no more instructions in the block (except for unreachable),
+        // we are done.
+        break;
+      }
+
+      TerminatorInst *TI = BB->getTerminator();
+      // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst.
+      bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad;
+      // The token consumed by a CatchReturnInst must match the funclet token.
+      bool IsUnreachableCatchret = false;
+      if (auto *CRI = dyn_cast<CatchReturnInst>(TI))
+        IsUnreachableCatchret = CRI->getCatchPad() != CatchPad;
+      // The token consumed by a CleanupReturnInst must match the funclet token.
+      bool IsUnreachableCleanupret = false;
+      if (auto *CRI = dyn_cast<CleanupReturnInst>(TI))
+        IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad;
+      if (IsUnreachableRet || IsUnreachableCatchret ||
+          IsUnreachableCleanupret) {
+        changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+      } else if (isa<InvokeInst>(TI)) {
+        if (Personality == EHPersonality::MSVC_CXX && CleanupPad) {
+          // Invokes within a cleanuppad for the MSVC++ personality never
+          // transfer control to their unwind edge: the personality will
+          // terminate the program.
+          removeUnwindEdge(BB);
+        }
+      }
+    }
+  }
+}
+
+void WinEHPrepare::cleanupPreparedFunclets(Function &F) {
+  // Clean-up some of the mess we made by removing useles PHI nodes, trivial
+  // branches, etc.
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
+    BasicBlock *BB = &*FI++;
+    SimplifyInstructionsInBlock(BB);
+    ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true);
+    MergeBlockIntoPredecessor(BB);
+  }
+
+  // We might have some unreachable blocks after cleaning up some impossible
+  // control flow.
+  removeUnreachableBlocks(F);
+}
+
+void WinEHPrepare::verifyPreparedFunclets(Function &F) {
+  // Recolor the CFG to verify that all is well.
+  for (BasicBlock &BB : F) {
+    size_t NumColors = BlockColors[&BB].size();
+    assert(NumColors == 1 && "Expected monochromatic BB!");
+    if (NumColors == 0)
+      report_fatal_error("Uncolored BB!");
+    if (NumColors > 1)
+      report_fatal_error("Multicolor BB!");
+    if (!DisableDemotion) {
+      bool EHPadHasPHI = BB.isEHPad() && isa<PHINode>(BB.begin());
+      assert(!EHPadHasPHI && "EH Pad still has a PHI!");
+      if (EHPadHasPHI)
+        report_fatal_error("EH Pad still has a PHI!");
+    }
+  }
+}
+
+bool WinEHPrepare::prepareExplicitEH(Function &F) {
+  // Remove unreachable blocks.  It is not valuable to assign them a color and
+  // their existence can trick us into thinking values are alive when they are
+  // not.
+  removeUnreachableBlocks(F);
+
+  // Determine which blocks are reachable from which funclet entries.
+  colorFunclets(F);
+
+  cloneCommonBlocks(F);
+
+  if (!DisableDemotion)
+    demotePHIsOnFunclets(F);
+
+  if (!DisableCleanups) {
+    removeImplausibleInstructions(F);
+
+    cleanupPreparedFunclets(F);
+  }
+
+  verifyPreparedFunclets(F);
+
+  BlockColors.clear();
+  FuncletBlocks.clear();
+
+  return true;
+}
+
+// TODO: Share loads when one use dominates another, or when a catchpad exit
+// dominates uses (needs dominators).
+AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
+  BasicBlock *PHIBlock = PN->getParent();
+  AllocaInst *SpillSlot = nullptr;
+  Instruction *EHPad = PHIBlock->getFirstNonPHI();
+
+  if (!isa<TerminatorInst>(EHPad)) {
+    // If the EHPad isn't a terminator, then we can insert a load in this block
+    // that will dominate all uses.
+    SpillSlot = new AllocaInst(PN->getType(), nullptr,
+                               Twine(PN->getName(), ".wineh.spillslot"),
+                               &F.getEntryBlock().front());
+    Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+                            &*PHIBlock->getFirstInsertionPt());
+    PN->replaceAllUsesWith(V);
+    return SpillSlot;
+  }
+
+  // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert
+  // loads of the slot before every use.
+  DenseMap<BasicBlock *, Value *> Loads;
+  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+       UI != UE;) {
+    Use &U = *UI++;
+    auto *UsingInst = cast<Instruction>(U.getUser());
+    if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) {
+      // Use is on an EH pad phi.  Leave it alone; we'll insert loads and
+      // stores for it separately.
+      continue;
+    }
+    replaceUseWithLoad(PN, U, SpillSlot, Loads, F);
+  }
+  return SpillSlot;
+}
+
+// TODO: improve store placement.  Inserting at def is probably good, but need
+// to be careful not to introduce interfering stores (needs liveness analysis).
+// TODO: identify related phi nodes that can share spill slots, and share them
+// (also needs liveness).
+void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI,
+                                   AllocaInst *SpillSlot) {
+  // Use a worklist of (Block, Value) pairs -- the given Value needs to be
+  // stored to the spill slot by the end of the given Block.
+  SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist;
+
+  Worklist.push_back({OriginalPHI->getParent(), OriginalPHI});
+
+  while (!Worklist.empty()) {
+    BasicBlock *EHBlock;
+    Value *InVal;
+    std::tie(EHBlock, InVal) = Worklist.pop_back_val();
+
+    PHINode *PN = dyn_cast<PHINode>(InVal);
+    if (PN && PN->getParent() == EHBlock) {
+      // The value is defined by another PHI we need to remove, with no room to
+      // insert a store after the PHI, so each predecessor needs to store its
+      // incoming value.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+        Value *PredVal = PN->getIncomingValue(i);
+
+        // Undef can safely be skipped.
+        if (isa<UndefValue>(PredVal))
+          continue;
+
+        insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist);
+      }
+    } else {
+      // We need to store InVal, which dominates EHBlock, but can't put a store
+      // in EHBlock, so need to put stores in each predecessor.
+      for (BasicBlock *PredBlock : predecessors(EHBlock)) {
+        insertPHIStore(PredBlock, InVal, SpillSlot, Worklist);
+      }
+    }
+  }
+}
+
+void WinEHPrepare::insertPHIStore(
+    BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot,
+    SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) {
+
+  if (PredBlock->isEHPad() &&
+      isa<TerminatorInst>(PredBlock->getFirstNonPHI())) {
+    // Pred is unsplittable, so we need to queue it on the worklist.
+    Worklist.push_back({PredBlock, PredVal});
+    return;
+  }
+
+  // Otherwise, insert the store at the end of the basic block.
+  new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator());
+}
+
+void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
+                                      DenseMap<BasicBlock *, Value *> &Loads,
+                                      Function &F) {
+  // Lazilly create the spill slot.
+  if (!SpillSlot)
+    SpillSlot = new AllocaInst(V->getType(), nullptr,
+                               Twine(V->getName(), ".wineh.spillslot"),
+                               &F.getEntryBlock().front());
+
+  auto *UsingInst = cast<Instruction>(U.getUser());
+  if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) {
+    // If this is a PHI node, we can't insert a load of the value before
+    // the use.  Instead insert the load in the predecessor block
+    // corresponding to the incoming value.
+    //
+    // Note that if there are multiple edges from a basic block to this
+    // PHI node that we cannot have multiple loads.  The problem is that
+    // the resulting PHI node will have multiple values (from each load)
+    // coming in from the same block, which is illegal SSA form.
+    // For this reason, we keep track of and reuse loads we insert.
+    BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U);
+    if (auto *CatchRet =
+            dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) {
+      // Putting a load above a catchret and use on the phi would still leave
+      // a cross-funclet def/use.  We need to split the edge, change the
+      // catchret to target the new block, and put the load there.
+      BasicBlock *PHIBlock = UsingInst->getParent();
+      BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock);
+      // SplitEdge gives us:
+      //   IncomingBlock:
+      //     ...
+      //     br label %NewBlock
+      //   NewBlock:
+      //     catchret label %PHIBlock
+      // But we need:
+      //   IncomingBlock:
+      //     ...
+      //     catchret label %NewBlock
+      //   NewBlock:
+      //     br label %PHIBlock
+      // So move the terminators to each others' blocks and swap their
+      // successors.
+      BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator());
+      Goto->removeFromParent();
+      CatchRet->removeFromParent();
+      IncomingBlock->getInstList().push_back(CatchRet);
+      NewBlock->getInstList().push_back(Goto);
+      Goto->setSuccessor(0, PHIBlock);
+      CatchRet->setSuccessor(NewBlock);
+      // Update the color mapping for the newly split edge.
+      ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
+      BlockColors[NewBlock] = ColorsForPHIBlock;
+      for (BasicBlock *FuncletPad : ColorsForPHIBlock)
+        FuncletBlocks[FuncletPad].push_back(NewBlock);
+      // Treat the new block as incoming for load insertion.
+      IncomingBlock = NewBlock;
+    }
+    Value *&Load = Loads[IncomingBlock];
+    // Insert the load into the predecessor block
+    if (!Load)
+      Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+                          /*Volatile=*/false, IncomingBlock->getTerminator());
+
+    U.set(Load);
+  } else {
+    // Reload right before the old use.
+    auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
+                              /*Volatile=*/false, UsingInst);
+    U.set(Load);
+  }
+}
+
+void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
+                                      MCSymbol *InvokeBegin,
+                                      MCSymbol *InvokeEnd) {
+  assert(InvokeStateMap.count(II) &&
+         "should get invoke with precomputed state");
+  LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
+}
+
+WinEHFuncInfo::WinEHFuncInfo() {}
diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt
index 645d92fef228..2c2848d1e5cc 100644
--- a/lib/DebugInfo/CMakeLists.txt
+++ b/lib/DebugInfo/CMakeLists.txt
@@ -1,4 +1,4 @@
-
+add_subdirectory(CodeView)
 add_subdirectory(DWARF)
 add_subdirectory(PDB)
-
+add_subdirectory(Symbolize)
diff --git a/lib/DebugInfo/CodeView/CMakeLists.txt b/lib/DebugInfo/CodeView/CMakeLists.txt
new file mode 100644
index 000000000000..cfa0e4d8b401
--- /dev/null
+++ b/lib/DebugInfo/CodeView/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_llvm_library(LLVMDebugInfoCodeView
+  FieldListRecordBuilder.cpp
+  Line.cpp
+  ListRecordBuilder.cpp
+  MemoryTypeTableBuilder.cpp
+  MethodListRecordBuilder.cpp
+  TypeRecordBuilder.cpp
+  TypeTableBuilder.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/CodeView
+  )
diff --git a/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp b/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
new file mode 100644
index 000000000000..91b71cc4b119
--- /dev/null
+++ b/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
@@ -0,0 +1,165 @@
+//===-- FieldListRecordBuilder.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+FieldListRecordBuilder::FieldListRecordBuilder()
+    : ListRecordBuilder(TypeRecordKind::FieldList) {}
+
+void FieldListRecordBuilder::writeBaseClass(MemberAccess Access, TypeIndex Type,
+                                            uint64_t Offset) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::BaseClass);
+  Builder.writeUInt16(static_cast<uint16_t>(Access));
+  Builder.writeTypeIndex(Type);
+  Builder.writeEncodedUnsignedInteger(Offset);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeEnumerate(MemberAccess Access, uint64_t Value,
+                                            StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::Enumerate);
+  Builder.writeUInt16(static_cast<uint16_t>(Access));
+  Builder.writeEncodedUnsignedInteger(Value);
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeMember(MemberAccess Access, TypeIndex Type,
+                                         uint64_t Offset, StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::Member);
+  Builder.writeUInt16(static_cast<uint16_t>(Access));
+  Builder.writeTypeIndex(Type);
+  Builder.writeEncodedUnsignedInteger(Offset);
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeMethod(uint16_t OverloadCount,
+                                         TypeIndex MethodList, StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::Method);
+  Builder.writeUInt16(OverloadCount);
+  Builder.writeTypeIndex(MethodList);
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeOneMethod(
+    MemberAccess Access, MethodKind Kind, MethodOptions Options, TypeIndex Type,
+    int32_t VTableSlotOffset, StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  uint16_t Flags = static_cast<uint16_t>(Access);
+  Flags |= static_cast<uint16_t>(Kind) << MethodKindShift;
+  Flags |= static_cast<uint16_t>(Options);
+
+  Builder.writeTypeRecordKind(TypeRecordKind::OneMethod);
+  Builder.writeUInt16(Flags);
+  Builder.writeTypeIndex(Type);
+  switch (Kind) {
+  case MethodKind::IntroducingVirtual:
+  case MethodKind::PureIntroducingVirtual:
+    assert(VTableSlotOffset >= 0);
+    Builder.writeInt32(VTableSlotOffset);
+    break;
+
+  default:
+    assert(VTableSlotOffset == -1);
+    break;
+  }
+
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeOneMethod(const MethodInfo &Method,
+                                            StringRef Name) {
+  writeOneMethod(Method.getAccess(), Method.getKind(), Method.getOptions(),
+                 Method.getType(), Method.getVTableSlotOffset(), Name);
+}
+
+void FieldListRecordBuilder::writeNestedType(TypeIndex Type, StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::NestedType);
+  Builder.writeUInt16(0);
+  Builder.writeTypeIndex(Type);
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeStaticMember(MemberAccess Access,
+                                               TypeIndex Type, StringRef Name) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::StaticMember);
+  Builder.writeUInt16(static_cast<uint16_t>(Access));
+  Builder.writeTypeIndex(Type);
+  Builder.writeNullTerminatedString(Name);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeIndirectVirtualBaseClass(
+    MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType,
+    int64_t VirtualBasePointerOffset, uint64_t SlotIndex) {
+  writeVirtualBaseClass(TypeRecordKind::IndirectVirtualBaseClass, Access, Type,
+                        VirtualBasePointerType, VirtualBasePointerOffset,
+                        SlotIndex);
+}
+
+void FieldListRecordBuilder::writeVirtualBaseClass(
+    MemberAccess Access, TypeIndex Type, TypeIndex VirtualBasePointerType,
+    int64_t VirtualBasePointerOffset, uint64_t SlotIndex) {
+  writeVirtualBaseClass(TypeRecordKind::VirtualBaseClass, Access, Type,
+                        VirtualBasePointerType, VirtualBasePointerOffset,
+                        SlotIndex);
+}
+
+void FieldListRecordBuilder::writeVirtualBaseClass(
+    TypeRecordKind Kind, MemberAccess Access, TypeIndex Type,
+    TypeIndex VirtualBasePointerType, int64_t VirtualBasePointerOffset,
+    uint64_t SlotIndex) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(Kind);
+  Builder.writeUInt16(static_cast<uint16_t>(Access));
+  Builder.writeTypeIndex(Type);
+  Builder.writeTypeIndex(VirtualBasePointerType);
+  Builder.writeEncodedInteger(VirtualBasePointerOffset);
+  Builder.writeEncodedUnsignedInteger(SlotIndex);
+
+  finishSubRecord();
+}
+
+void FieldListRecordBuilder::writeVirtualFunctionTablePointer(TypeIndex Type) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  Builder.writeTypeRecordKind(TypeRecordKind::VirtualFunctionTablePointer);
+  Builder.writeUInt16(0);
+  Builder.writeTypeIndex(Type);
+
+  finishSubRecord();
+}
\ No newline at end of file
diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/DebugInfo/CodeView/LLVMBuild.txt
similarity index 78%
rename from lib/Analysis/IPA/LLVMBuild.txt
rename to lib/DebugInfo/CodeView/LLVMBuild.txt
index 980e91809b55..4db23376fce4 100644
--- a/lib/Analysis/IPA/LLVMBuild.txt
+++ b/lib/DebugInfo/CodeView/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===;
+;===- ./lib/DebugInfo/CodeView/LLVMBuild.txt -------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,7 +17,6 @@
 
 [component_0]
 type = Library
-name = IPA
-parent = Libraries
-library_name = ipa
-required_libraries = Analysis Core Support
+name = DebugInfoCodeView
+parent = DebugInfo
+required_libraries = Support
diff --git a/lib/DebugInfo/CodeView/Line.cpp b/lib/DebugInfo/CodeView/Line.cpp
new file mode 100644
index 000000000000..4cb766b5fd26
--- /dev/null
+++ b/lib/DebugInfo/CodeView/Line.cpp
@@ -0,0 +1,22 @@
+//===-- Line.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/Line.h"
+
+using namespace llvm;
+using namespace codeview;
+
+LineInfo::LineInfo(uint32_t StartLine, uint32_t EndLine, bool IsStatement) {
+  LineData = StartLine & StartLineMask;
+  uint32_t LineDelta = EndLine - StartLine;
+  LineData |= (LineDelta << EndLineDeltaShift) & EndLineDeltaMask;
+  if (IsStatement) {
+    LineData |= StatementFlag;
+  }
+}
diff --git a/lib/DebugInfo/CodeView/ListRecordBuilder.cpp b/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
new file mode 100644
index 000000000000..69c7e87330e6
--- /dev/null
+++ b/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
@@ -0,0 +1,31 @@
+//===-- ListRecordBuilder.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+ListRecordBuilder::ListRecordBuilder(TypeRecordKind Kind) : Builder(Kind) {}
+
+void ListRecordBuilder::finishSubRecord() {
+  // The builder starts at offset 2 in the actual CodeView buffer, so add an
+  // additional offset of 2 before computing the alignment.
+  uint32_t Remainder = (Builder.size() + 2) % 4;
+  if (Remainder != 0) {
+    for (int32_t PaddingBytesLeft = 4 - Remainder; PaddingBytesLeft > 0;
+         --PaddingBytesLeft) {
+      Builder.writeUInt8(0xf0 + PaddingBytesLeft);
+    }
+  }
+
+  // TODO: Split the list into multiple records if it's longer than 64KB, using
+  // a subrecord of TypeRecordKind::Index to chain the records together.
+  assert(Builder.size() < 65536);
+}
diff --git a/lib/DebugInfo/CodeView/Makefile b/lib/DebugInfo/CodeView/Makefile
new file mode 100644
index 000000000000..70e2bd011429
--- /dev/null
+++ b/lib/DebugInfo/CodeView/Makefile
@@ -0,0 +1,14 @@
+##===- lib/DebugInfo/CodeView/Makefile ---------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMDebugInfoCodeView
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp b/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
new file mode 100644
index 000000000000..9afce92eeb1d
--- /dev/null
+++ b/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
@@ -0,0 +1,35 @@
+//===-- MemoryTypeTableBuilder.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+
+using namespace llvm;
+using namespace codeview;
+
+MemoryTypeTableBuilder::Record::Record(StringRef RData)
+    : Size(RData.size()), Data(new char[RData.size()]) {
+  memcpy(Data.get(), RData.data(), RData.size());
+}
+
+TypeIndex MemoryTypeTableBuilder::writeRecord(StringRef Data) {
+  auto I = HashedRecords.find(Data);
+  if (I != HashedRecords.end()) {
+    return I->second;
+  }
+
+  std::unique_ptr<Record> R(new Record(Data));
+
+  TypeIndex TI(static_cast<uint32_t>(Records.size()) +
+               TypeIndex::FirstNonSimpleIndex);
+  HashedRecords.insert(std::make_pair(StringRef(R->data(), R->size()), TI));
+  Records.push_back(std::move(R));
+
+  return TI;
+}
diff --git a/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp b/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
new file mode 100644
index 000000000000..889302556b2d
--- /dev/null
+++ b/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
@@ -0,0 +1,49 @@
+//===-- MethodListRecordBuilder.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+MethodListRecordBuilder::MethodListRecordBuilder()
+    : ListRecordBuilder(TypeRecordKind::MethodList) {}
+
+void MethodListRecordBuilder::writeMethod(MemberAccess Access, MethodKind Kind,
+                                          MethodOptions Options, TypeIndex Type,
+                                          int32_t VTableSlotOffset) {
+  TypeRecordBuilder &Builder = getBuilder();
+
+  uint16_t Flags = static_cast<uint16_t>(Access);
+  Flags |= static_cast<uint16_t>(Kind) << MethodKindShift;
+  Flags |= static_cast<uint16_t>(Options);
+
+  Builder.writeUInt16(Flags);
+  Builder.writeUInt16(0);
+  Builder.writeTypeIndex(Type);
+  switch (Kind) {
+  case MethodKind::IntroducingVirtual:
+  case MethodKind::PureIntroducingVirtual:
+    assert(VTableSlotOffset >= 0);
+    Builder.writeInt32(VTableSlotOffset);
+    break;
+
+  default:
+    assert(VTableSlotOffset == -1);
+    break;
+  }
+
+  // TODO: Fail if too big?
+}
+
+void MethodListRecordBuilder::writeMethod(const MethodInfo &Method) {
+  writeMethod(Method.getAccess(), Method.getKind(), Method.getOptions(),
+              Method.getType(), Method.getVTableSlotOffset());
+}
diff --git a/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp b/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
new file mode 100644
index 000000000000..cbf464fd7668
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
@@ -0,0 +1,113 @@
+//===-- TypeRecordBuilder.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+
+using namespace llvm;
+using namespace codeview;
+
+TypeRecordBuilder::TypeRecordBuilder(TypeRecordKind Kind) : Stream(Buffer),
+  Writer(Stream) {
+  writeTypeRecordKind(Kind);
+}
+
+StringRef TypeRecordBuilder::str() {
+  return StringRef(Buffer.data(), Buffer.size());
+}
+
+void TypeRecordBuilder::writeUInt8(uint8_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt16(int16_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt16(uint16_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt32(int32_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt32(uint32_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeInt64(int64_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeUInt64(uint64_t Value) {
+  Writer.write(Value);
+}
+
+void TypeRecordBuilder::writeEncodedInteger(int64_t Value) {
+  if (Value >= 0) {
+    writeEncodedUnsignedInteger(static_cast<uint64_t>(Value));
+  } else {
+    writeEncodedSignedInteger(Value);
+  }
+}
+
+void TypeRecordBuilder::writeEncodedSignedInteger(int64_t Value) {
+  if (Value >= std::numeric_limits<int8_t>::min() &&
+      Value <= std::numeric_limits<int8_t>::max()) {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::SByte));
+    writeInt16(static_cast<int8_t>(Value));
+  } else if (Value >= std::numeric_limits<int16_t>::min() &&
+             Value <= std::numeric_limits<int16_t>::max()) {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::Int16));
+    writeInt16(static_cast<int16_t>(Value));
+  } else if (Value >= std::numeric_limits<int32_t>::min() &&
+             Value <= std::numeric_limits<int32_t>::max()) {
+    writeUInt16(static_cast<uint32_t>(TypeRecordKind::Int32));
+    writeInt32(static_cast<int32_t>(Value));
+  } else {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::Int64));
+    writeInt64(Value);
+  }
+}
+
+void TypeRecordBuilder::writeEncodedUnsignedInteger(uint64_t Value) {
+  if (Value < static_cast<uint16_t>(TypeRecordKind::SByte)) {
+    writeUInt16(static_cast<uint16_t>(Value));
+  } else if (Value <= std::numeric_limits<uint16_t>::max()) {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt16));
+    writeUInt16(static_cast<uint16_t>(Value));
+  } else if (Value <= std::numeric_limits<uint32_t>::max()) {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt32));
+    writeUInt32(static_cast<uint32_t>(Value));
+  } else {
+    writeUInt16(static_cast<uint16_t>(TypeRecordKind::UInt64));
+    writeUInt64(Value);
+  }
+}
+
+void TypeRecordBuilder::writeNullTerminatedString(const char *Value) {
+  assert(Value != nullptr);
+
+  size_t Length = strlen(Value);
+  Stream.write(Value, Length);
+  writeUInt8(0);
+}
+
+void TypeRecordBuilder::writeNullTerminatedString(StringRef Value) {
+  Stream.write(Value.data(), Value.size());
+  writeUInt8(0);
+}
+
+void TypeRecordBuilder::writeTypeIndex(TypeIndex TypeInd) {
+  writeUInt32(TypeInd.getIndex());
+}
+
+void TypeRecordBuilder::writeTypeRecordKind(TypeRecordKind Kind) {
+  writeUInt16(static_cast<uint16_t>(Kind));
+}
diff --git a/lib/DebugInfo/CodeView/TypeTableBuilder.cpp b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
new file mode 100644
index 000000000000..4af5dcaf7228
--- /dev/null
+++ b/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
@@ -0,0 +1,217 @@
+//===-- TypeTableBuilder.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace codeview;
+
+namespace {
+
+const int PointerKindShift = 0;
+const int PointerModeShift = 5;
+const int PointerSizeShift = 13;
+
+const int ClassHfaKindShift = 11;
+const int ClassWindowsRTClassKindShift = 14;
+
+void writePointerBase(TypeRecordBuilder &Builder,
+                      const PointerRecordBase &Record) {
+  Builder.writeTypeIndex(Record.getReferentType());
+  uint32_t flags =
+      static_cast<uint32_t>(Record.getOptions()) |
+      (Record.getSize() << PointerSizeShift) |
+      (static_cast<uint32_t>(Record.getMode()) << PointerModeShift) |
+      (static_cast<uint32_t>(Record.getPointerKind()) << PointerKindShift);
+  Builder.writeUInt32(flags);
+}
+}
+
+TypeTableBuilder::TypeTableBuilder() {}
+
+TypeTableBuilder::~TypeTableBuilder() {}
+
+TypeIndex TypeTableBuilder::writeModifier(const ModifierRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Modifier);
+
+  Builder.writeTypeIndex(Record.getModifiedType());
+  Builder.writeUInt16(static_cast<uint16_t>(Record.getOptions()));
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeProcedure(const ProcedureRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Procedure);
+
+  Builder.writeTypeIndex(Record.getReturnType());
+  Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
+  Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
+  Builder.writeUInt16(Record.getParameterCount());
+  Builder.writeTypeIndex(Record.getArgumentList());
+
+  return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writeMemberFunction(const MemberFunctionRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::MemberFunction);
+
+  Builder.writeTypeIndex(Record.getReturnType());
+  Builder.writeTypeIndex(Record.getClassType());
+  Builder.writeTypeIndex(Record.getThisType());
+  Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
+  Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
+  Builder.writeUInt16(Record.getParameterCount());
+  Builder.writeTypeIndex(Record.getArgumentList());
+  Builder.writeInt32(Record.getThisPointerAdjustment());
+
+  return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writeArgumentList(const ArgumentListRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::ArgumentList);
+
+  Builder.writeUInt32(Record.getArgumentTypes().size());
+  for (TypeIndex TI : Record.getArgumentTypes()) {
+    Builder.writeTypeIndex(TI);
+  }
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writePointer(const PointerRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Pointer);
+
+  writePointerBase(Builder, Record);
+
+  return writeRecord(Builder);
+}
+
+TypeIndex
+TypeTableBuilder::writePointerToMember(const PointerToMemberRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Pointer);
+
+  writePointerBase(Builder, Record);
+
+  Builder.writeTypeIndex(Record.getContainingType());
+  Builder.writeUInt16(static_cast<uint16_t>(Record.getRepresentation()));
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeArray(const ArrayRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Array);
+
+  Builder.writeTypeIndex(Record.getElementType());
+  Builder.writeTypeIndex(Record.getIndexType());
+  Builder.writeEncodedUnsignedInteger(Record.getSize());
+  Builder.writeNullTerminatedString(Record.getName());
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeAggregate(const AggregateRecord &Record) {
+  assert((Record.getKind() == TypeRecordKind::Structure) ||
+         (Record.getKind() == TypeRecordKind::Class) ||
+         (Record.getKind() == TypeRecordKind::Union));
+
+  TypeRecordBuilder Builder(Record.getKind());
+
+  Builder.writeUInt16(Record.getMemberCount());
+  uint16_t Flags =
+      static_cast<uint16_t>(Record.getOptions()) |
+      (static_cast<uint16_t>(Record.getHfa()) << ClassHfaKindShift) |
+      (static_cast<uint16_t>(Record.getWinRTKind())
+       << ClassWindowsRTClassKindShift);
+  Builder.writeUInt16(Flags);
+  Builder.writeTypeIndex(Record.getFieldList());
+  if (Record.getKind() != TypeRecordKind::Union) {
+    Builder.writeTypeIndex(Record.getDerivationList());
+    Builder.writeTypeIndex(Record.getVTableShape());
+  } else {
+    assert(Record.getDerivationList() == TypeIndex());
+    assert(Record.getVTableShape() == TypeIndex());
+  }
+  Builder.writeEncodedUnsignedInteger(Record.getSize());
+  Builder.writeNullTerminatedString(Record.getName());
+  if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
+      ClassOptions::None) {
+    Builder.writeNullTerminatedString(Record.getUniqueName());
+  }
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeEnum(const EnumRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::Enum);
+
+  Builder.writeUInt16(Record.getMemberCount());
+  Builder.writeUInt16(static_cast<uint16_t>(Record.getOptions()));
+  Builder.writeTypeIndex(Record.getUnderlyingType());
+  Builder.writeTypeIndex(Record.getFieldList());
+  Builder.writeNullTerminatedString(Record.getName());
+  if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
+      ClassOptions::None) {
+    Builder.writeNullTerminatedString(Record.getUniqueName());
+  }
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeBitField(const BitFieldRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::BitField);
+
+  Builder.writeTypeIndex(Record.getType());
+  Builder.writeUInt8(Record.getBitSize());
+  Builder.writeUInt8(Record.getBitOffset());
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeVirtualTableShape(
+    const VirtualTableShapeRecord &Record) {
+  TypeRecordBuilder Builder(TypeRecordKind::VirtualTableShape);
+
+  ArrayRef<VirtualTableSlotKind> Slots = Record.getSlots();
+
+  Builder.writeUInt16(Slots.size());
+  for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
+    uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
+    if ((SlotIndex + 1) < Slots.size()) {
+      Byte |= static_cast<uint8_t>(Slots[SlotIndex + 1]);
+    }
+    Builder.writeUInt8(Byte);
+  }
+
+  return writeRecord(Builder);
+}
+
+TypeIndex TypeTableBuilder::writeRecord(TypeRecordBuilder &Builder) {
+  return writeRecord(Builder.str());
+}
+
+TypeIndex TypeTableBuilder::writeFieldList(FieldListRecordBuilder &FieldList) {
+  // TODO: Split the list into multiple records if it's longer than 64KB, using
+  // a subrecord of TypeRecordKind::Index to chain the records together.
+  return writeRecord(FieldList.str());
+}
+
+TypeIndex
+TypeTableBuilder::writeMethodList(MethodListRecordBuilder &MethodList) {
+  // TODO: Split the list into multiple records if it's longer than 64KB, using
+  // a subrecord of TypeRecordKind::Index to chain the records together.
+  return writeRecord(MethodList.str());
+}
diff --git a/lib/DebugInfo/DWARF/CMakeLists.txt b/lib/DebugInfo/DWARF/CMakeLists.txt
index d5f8a6f24eec..7104c5f10391 100644
--- a/lib/DebugInfo/DWARF/CMakeLists.txt
+++ b/lib/DebugInfo/DWARF/CMakeLists.txt
@@ -10,9 +10,11 @@ add_llvm_library(LLVMDebugInfoDWARF
   DWARFDebugInfoEntry.cpp
   DWARFDebugLine.cpp
   DWARFDebugLoc.cpp
+  DWARFDebugMacro.cpp
   DWARFDebugRangeList.cpp
   DWARFFormValue.cpp
   DWARFTypeUnit.cpp
+  DWARFUnitIndex.cpp
   DWARFUnit.cpp
   SyntaxHighlighting.cpp
 
diff --git a/lib/DebugInfo/DWARF/DWARFContext.cpp b/lib/DebugInfo/DWARF/DWARFContext.cpp
index 96bcf15e0af0..a4195b75c47d 100644
--- a/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
@@ -126,6 +127,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
     getDebugFrame()->dump(OS);
   }
 
+  if (DumpType == DIDT_All || DumpType == DIDT_Macro) {
+    OS << "\n.debug_macinfo contents:\n";
+    getDebugMacro()->dump(OS);
+  }
+
   uint32_t offset = 0;
   if (DumpType == DIDT_All || DumpType == DIDT_Aranges) {
     OS << "\n.debug_aranges contents:\n";
@@ -155,6 +161,16 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
     }
   }
 
+  if (DumpType == DIDT_All || DumpType == DIDT_CUIndex) {
+    OS << "\n.debug_cu_index contents:\n";
+    getCUIndex().dump(OS);
+  }
+
+  if (DumpType == DIDT_All || DumpType == DIDT_TUIndex) {
+    OS << "\n.debug_tu_index contents:\n";
+    getTUIndex().dump(OS);
+  }
+
   if (DumpType == DIDT_All || DumpType == DIDT_LineDwo) {
     OS << "\n.debug_line.dwo contents:\n";
     unsigned stmtOffset = 0;
@@ -250,6 +266,28 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
                      getStringSection(), isLittleEndian());
 }
 
+const DWARFUnitIndex &DWARFContext::getCUIndex() {
+  if (CUIndex)
+    return *CUIndex;
+
+  DataExtractor CUIndexData(getCUIndexSection(), isLittleEndian(), 0);
+
+  CUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
+  CUIndex->parse(CUIndexData);
+  return *CUIndex;
+}
+
+const DWARFUnitIndex &DWARFContext::getTUIndex() {
+  if (TUIndex)
+    return *TUIndex;
+
+  DataExtractor TUIndexData(getTUIndexSection(), isLittleEndian(), 0);
+
+  TUIndex = llvm::make_unique<DWARFUnitIndex>(DW_SECT_TYPES);
+  TUIndex->parse(TUIndexData);
+  return *TUIndex;
+}
+
 const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
   if (Abbrev)
     return Abbrev.get();
@@ -322,24 +360,37 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() {
   return DebugFrame.get();
 }
 
+const DWARFDebugMacro *DWARFContext::getDebugMacro() {
+  if (Macro)
+    return Macro.get();
+
+  DataExtractor MacinfoData(getMacinfoSection(), isLittleEndian(), 0);
+  Macro.reset(new DWARFDebugMacro());
+  Macro->parse(MacinfoData);
+  return Macro.get();
+}
+
 const DWARFLineTable *
 DWARFContext::getLineTableForUnit(DWARFUnit *U) {
   if (!Line)
     Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
+
   const auto *UnitDIE = U->getUnitDIE();
   if (UnitDIE == nullptr)
     return nullptr;
+
   unsigned stmtOffset =
       UnitDIE->getAttributeValueAsSectionOffset(U, DW_AT_stmt_list, -1U);
   if (stmtOffset == -1U)
     return nullptr; // No line table for this compile unit.
 
+  stmtOffset += U->getLineTableOffset();
   // See if the line table is cached.
   if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
     return lt;
 
   // We have to parse it first.
-  DataExtractor lineData(getLineSection().Data, isLittleEndian(),
+  DataExtractor lineData(U->getLineSection(), isLittleEndian(),
                          U->getAddressByteSize());
   return Line->getOrParseLineTable(lineData, stmtOffset);
 }
@@ -556,10 +607,11 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
       continue;
     StringRef data;
 
+    section_iterator RelocatedSection = Section.getRelocatedSection();
     // Try to obtain an already relocated version of this section.
     // Else use the unrelocated section from the object file. We'll have to
     // apply relocations ourselves later.
-    if (!L || !L->getLoadedSectionContents(name,data))
+    if (!L || !L->getLoadedSectionContents(*RelocatedSection,data))
       Section.getContents(data);
 
     name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes.
@@ -591,6 +643,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
             .Case("debug_frame", &DebugFrameSection)
             .Case("debug_str", &StringSection)
             .Case("debug_ranges", &RangeSection)
+            .Case("debug_macinfo", &MacinfoSection)
             .Case("debug_pubnames", &PubNamesSection)
             .Case("debug_pubtypes", &PubTypesSection)
             .Case("debug_gnu_pubnames", &GnuPubNamesSection)
@@ -607,6 +660,8 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
             .Case("apple_namespaces", &AppleNamespacesSection.Data)
             .Case("apple_namespac", &AppleNamespacesSection.Data)
             .Case("apple_objc", &AppleObjCSection.Data)
+            .Case("debug_cu_index", &CUIndexSection)
+            .Case("debug_tu_index", &TUIndexSection)
             // Any more debug info sections go here.
             .Default(nullptr);
     if (SectionData) {
@@ -623,7 +678,6 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
       TypesDWOSections[Section].Data = data;
     }
 
-    section_iterator RelocatedSection = Section.getRelocatedSection();
     if (RelocatedSection == Obj.section_end())
       continue;
 
@@ -634,7 +688,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
     // If the section we're relocating was relocated already by the JIT,
     // then we used the relocated version above, so we do not need to process
     // relocations for it now.
-    if (L && L->getLoadedSectionContents(RelSecName,RelSecData))
+    if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData))
+      continue;
+
+    // In Mach-o files, the relocations do not need to be applied if
+    // there is no load offset to apply. The value read at the
+    // relocation point already factors in the section address
+    // (actually applying the relocations will produce wrong results
+    // as the section address will be added twice).
+    if (!L && isa<MachOObjectFile>(&Obj))
       continue;
 
     RelSecName = RelSecName.substr(
@@ -685,13 +747,19 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
           }
           SymAddr = *SymAddrOrErr;
           // Also remember what section this symbol is in for later
-          Sym->getSection(RSec);
+          RSec = *Sym->getSection();
         } else if (auto *MObj = dyn_cast<MachOObjectFile>(&Obj)) {
           // MachO also has relocations that point to sections and
           // scattered relocations.
-          // FIXME: We are not handling scattered relocations, do we have to?
-          RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
-          SymAddr = RSec->getAddress();
+          auto RelocInfo = MObj->getRelocation(Reloc.getRawDataRefImpl());
+          if (MObj->isRelocationScattered(RelocInfo)) {
+            // FIXME: it's not clear how to correctly handle scattered
+            // relocations.
+            continue;
+          } else {
+            RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl());
+            SymAddr = RSec->getAddress();
+          }
         }
 
         // If we are given load addresses for the sections, we need to adjust:
@@ -699,12 +767,15 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
         //           (Address of Section in File) +
         //           (Load Address of Section)
         if (L != nullptr && RSec != Obj.section_end()) {
-          // RSec is now either the section being targetted or the section
-          // containing the symbol being targetted. In either case,
+          // RSec is now either the section being targeted or the section
+          // containing the symbol being targeted. In either case,
           // we need to perform the same computation.
           StringRef SecName;
           RSec->getName(SecName);
-          SectionLoadAddress = L->getSectionLoadAddress(SecName);
+//           llvm::dbgs() << "Name: '" << SecName
+//                        << "', RSec: " << RSec->getRawDataRefImpl()
+//                        << ", Section: " << Section.getRawDataRefImpl() << "\n";
+          SectionLoadAddress = L->getSectionLoadAddress(*RSec);
           if (SectionLoadAddress != 0)
             SymAddr += SectionLoadAddress - RSec->getAddress();
         }
diff --git a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 5abbde4ac0fe..62d5e666aef9 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -139,7 +139,7 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
   std::string File;
   auto Color = syntax::Enumerator;
   if (attr == DW_AT_decl_file || attr == DW_AT_call_file) {
-  Color = syntax::String;
+    Color = syntax::String;
     if (const auto *LT = u->getContext().getLineTableForUnit(u))
       if (LT->getFileNameByIndex(
              formValue.getAsUnsignedConstant().getValue(),
diff --git a/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
new file mode 100644
index 000000000000..b825d4d25243
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFDebugMacro.cpp
@@ -0,0 +1,103 @@
+//===-- DWARFDebugMacro.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SyntaxHighlighting.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace dwarf;
+using namespace syntax;
+
+void DWARFDebugMacro::dump(raw_ostream &OS) const {
+  unsigned IndLevel = 0;
+  for (const Entry &E : Macros) {
+    // There should not be DW_MACINFO_end_file when IndLevel is Zero. However,
+    // this check handles the case of corrupted ".debug_macinfo" section.
+    if (IndLevel > 0)
+      IndLevel -= (E.Type == DW_MACINFO_end_file);
+    // Print indentation.
+    for (unsigned I = 0; I < IndLevel; I++)
+      OS << "  ";
+    IndLevel += (E.Type == DW_MACINFO_start_file);
+
+    WithColor(OS, syntax::Macro).get() << MacinfoString(E.Type);
+    switch (E.Type) {
+    default:
+      // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
+      break;
+    case DW_MACINFO_define:
+    case DW_MACINFO_undef:
+      OS << " - lineno: " << E.Line;
+      OS << " macro: " << E.MacroStr;
+      break;
+    case DW_MACINFO_start_file:
+      OS << " - lineno: " << E.Line;
+      OS << " filenum: " << E.File;
+      break;
+    case DW_MACINFO_end_file:
+      break;
+    case DW_MACINFO_vendor_ext:
+      OS << " - constant: " << E.ExtConstant;
+      OS << " string: " << E.ExtStr;
+      break;
+    }
+    OS << "\n";
+  }
+}
+
+void DWARFDebugMacro::parse(DataExtractor data) {
+  uint32_t Offset = 0;
+  while (data.isValidOffset(Offset)) {
+    // A macro list entry consists of:
+    Entry E;
+    // 1. Macinfo type
+    E.Type = data.getULEB128(&Offset);
+
+    if (E.Type == 0) {
+      // Reached end of ".debug_macinfo" section.
+      return;
+    }
+
+    switch (E.Type) {
+    default:
+      // Got a corrupted ".debug_macinfo" section (invalid macinfo type).
+      // Push the corrupted entry to the list and halt parsing.
+      E.Type = DW_MACINFO_invalid;
+      Macros.push_back(E);
+      return;
+    case DW_MACINFO_define:
+    case DW_MACINFO_undef:
+      // 2. Source line
+      E.Line = data.getULEB128(&Offset);
+      // 3. Macro string
+      E.MacroStr = data.getCStr(&Offset);
+      break;
+    case DW_MACINFO_start_file:
+      // 2. Source line
+      E.Line = data.getULEB128(&Offset);
+      // 3. Source file id
+      E.File = data.getULEB128(&Offset);
+      break;
+    case DW_MACINFO_end_file:
+      break;
+    case DW_MACINFO_vendor_ext:
+      // 2. Vendor extension constant
+      E.ExtConstant = data.getULEB128(&Offset);
+      // 3. Vendor extension string
+      E.ExtStr = data.getCStr(&Offset);
+      break;
+    }
+
+    Macros.push_back(E);
+  }
+}
diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 53a676efaf3f..3dc58423df68 100644
--- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -18,7 +18,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
-#include <climits>
+#include <limits>
 using namespace llvm;
 using namespace dwarf;
 using namespace syntax;
@@ -110,7 +110,7 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
 
 bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   // First, check DWARF4 form classes.
-  if (Form < ArrayRef<FormClass>(DWARF4FormClasses).size() &&
+  if (Form < makeArrayRef(DWARF4FormClasses).size() &&
       DWARF4FormClasses[Form] == FC)
     return true;
   // Check more forms from DWARF4 and DWARF5 proposals.
@@ -261,6 +261,12 @@ DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
 bool
 DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
                           uint32_t *offset_ptr, const DWARFUnit *cu) {
+  return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(),
+                   cu->getAddressByteSize());
+}
+bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
+                               uint32_t *offset_ptr, uint16_t Version,
+                               uint8_t AddrSize) {
   bool indirect = false;
   do {
     switch (form) {
@@ -295,10 +301,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
 
     // Compile unit address sized values
     case DW_FORM_addr:
-      *offset_ptr += cu->getAddressByteSize();
+      *offset_ptr += AddrSize;
       return true;
     case DW_FORM_ref_addr:
-      *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
+      *offset_ptr += getRefAddrSize(AddrSize, Version);
       return true;
 
     // 0 byte values - implied from the form.
@@ -565,7 +571,7 @@ Optional<uint64_t> DWARFFormValue::getAsUnsignedConstant() const {
 
 Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
   if ((!isFormClass(FC_Constant) && !isFormClass(FC_Flag)) ||
-      (Form == DW_FORM_udata && uint64_t(LLONG_MAX) < Value.uval))
+      (Form == DW_FORM_udata && uint64_t(std::numeric_limits<int64_t>::max()) < Value.uval))
     return None;
   switch (Form) {
   case DW_FORM_data4:
@@ -584,6 +590,6 @@ Optional<int64_t> DWARFFormValue::getAsSignedConstant() const {
 Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
   if (!isFormClass(FC_Block) && !isFormClass(FC_Exprloc))
     return None;
-  return ArrayRef<uint8_t>(Value.data, Value.uval);
+  return makeArrayRef(Value.data, Value.uval);
 }
 
diff --git a/lib/DebugInfo/DWARF/DWARFUnit.cpp b/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 348476d72b60..92ca2d4c3ff0 100644
--- a/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -14,29 +14,37 @@
 #include "llvm/Support/Path.h"
 #include <cstdio>
 
-using namespace llvm;
+namespace llvm {
 using namespace dwarf;
 
 void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
   parseImpl(C, Section, C.getDebugAbbrev(), C.getRangeSection(),
             C.getStringSection(), StringRef(), C.getAddrSection(),
-            C.isLittleEndian());
+            C.getLineSection().Data, C.isLittleEndian());
 }
 
 void DWARFUnitSectionBase::parseDWO(DWARFContext &C,
-                                    const DWARFSection &DWOSection) {
+                                    const DWARFSection &DWOSection,
+                                    DWARFUnitIndex *Index) {
   parseImpl(C, DWOSection, C.getDebugAbbrevDWO(), C.getRangeDWOSection(),
             C.getStringDWOSection(), C.getStringOffsetDWOSection(),
-            C.getAddrSection(), C.isLittleEndian());
+            C.getAddrSection(), C.getLineDWOSection().Data, C.isLittleEndian());
 }
 
 DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
                      const DWARFDebugAbbrev *DA, StringRef RS, StringRef SS,
-                     StringRef SOS, StringRef AOS, bool LE,
-                     const DWARFUnitSectionBase &UnitSection)
+                     StringRef SOS, StringRef AOS, StringRef LS, bool LE,
+                     const DWARFUnitSectionBase &UnitSection,
+                     const DWARFUnitIndex::Entry *IndexEntry)
     : Context(DC), InfoSection(Section), Abbrev(DA), RangeSection(RS),
-      StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
-      isLittleEndian(LE), UnitSection(UnitSection) {
+      LineSection(LS), StringSection(SS), StringOffsetSection([&]() {
+        if (IndexEntry)
+          if (const auto *C = IndexEntry->getOffset(DW_SECT_STR_OFFSETS))
+            return SOS.slice(C->Offset, C->Offset + C->Length);
+        return SOS;
+      }()),
+      AddrOffsetSection(AOS), isLittleEndian(LE), UnitSection(UnitSection),
+      IndexEntry(IndexEntry) {
   clear();
 }
 
@@ -69,6 +77,17 @@ bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
   Length = debug_info.getU32(offset_ptr);
   Version = debug_info.getU16(offset_ptr);
   uint64_t AbbrOffset = debug_info.getU32(offset_ptr);
+  if (IndexEntry) {
+    if (AbbrOffset)
+      return false;
+    auto *UnitContrib = IndexEntry->getOffset();
+    if (!UnitContrib || UnitContrib->Length != (Length + 4))
+      return false;
+    auto *AbbrEntry = IndexEntry->getOffset(DW_SECT_ABBREV);
+    if (!AbbrEntry)
+      return false;
+    AbbrOffset = AbbrEntry->Offset;
+  }
   AddrSize = debug_info.getU8(offset_ptr);
 
   bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
@@ -375,3 +394,12 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address) {
     return DWARFDebugInfoEntryInlinedChain();
   return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address);
 }
+
+const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
+                                        DWARFSectionKind Kind) {
+  if (Kind == DW_SECT_INFO)
+    return Context.getCUIndex();
+  assert(Kind == DW_SECT_TYPES);
+  return Context.getTUIndex();
+}
+}
diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
new file mode 100644
index 000000000000..96b316957dfd
--- /dev/null
+++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp
@@ -0,0 +1,168 @@
+//===-- DWARFUnitIndex.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+bool DWARFUnitIndex::Header::parse(DataExtractor IndexData,
+                                   uint32_t *OffsetPtr) {
+  if (!IndexData.isValidOffsetForDataOfSize(*OffsetPtr, 16))
+    return false;
+  Version = IndexData.getU32(OffsetPtr);
+  NumColumns = IndexData.getU32(OffsetPtr);
+  NumUnits = IndexData.getU32(OffsetPtr);
+  NumBuckets = IndexData.getU32(OffsetPtr);
+  return Version <= 2;
+}
+
+void DWARFUnitIndex::Header::dump(raw_ostream &OS) const {
+  OS << format("version = %u slots = %u\n\n", Version, NumBuckets);
+}
+
+bool DWARFUnitIndex::parse(DataExtractor IndexData) {
+  bool b = parseImpl(IndexData);
+  if (!b) {
+    // Make sure we don't try to dump anything
+    Header.NumBuckets = 0;
+    // Release any partially initialized data.
+    ColumnKinds.reset();
+    Rows.reset();
+  }
+  return b;
+}
+
+bool DWARFUnitIndex::parseImpl(DataExtractor IndexData) {
+  uint32_t Offset = 0;
+  if (!Header.parse(IndexData, &Offset))
+    return false;
+
+  if (!IndexData.isValidOffsetForDataOfSize(
+          Offset, Header.NumBuckets * (8 + 4) +
+                      (2 * Header.NumUnits + 1) * 4 * Header.NumColumns))
+    return false;
+
+  Rows = llvm::make_unique<Entry[]>(Header.NumBuckets);
+  auto Contribs =
+      llvm::make_unique<Entry::SectionContribution *[]>(Header.NumUnits);
+  ColumnKinds = llvm::make_unique<DWARFSectionKind[]>(Header.NumColumns);
+
+  // Read Hash Table of Signatures
+  for (unsigned i = 0; i != Header.NumBuckets; ++i)
+    Rows[i].Signature = IndexData.getU64(&Offset);
+
+  // Read Parallel Table of Indexes
+  for (unsigned i = 0; i != Header.NumBuckets; ++i) {
+    auto Index = IndexData.getU32(&Offset);
+    if (!Index)
+      continue;
+    Rows[i].Index = this;
+    Rows[i].Contributions =
+        llvm::make_unique<Entry::SectionContribution[]>(Header.NumColumns);
+    Contribs[Index - 1] = Rows[i].Contributions.get();
+  }
+
+  // Read the Column Headers
+  for (unsigned i = 0; i != Header.NumColumns; ++i) {
+    ColumnKinds[i] = static_cast<DWARFSectionKind>(IndexData.getU32(&Offset));
+    if (ColumnKinds[i] == InfoColumnKind) {
+      if (InfoColumn != -1)
+        return false;
+      InfoColumn = i;
+    }
+  }
+
+  if (InfoColumn == -1)
+    return false;
+
+  // Read Table of Section Offsets
+  for (unsigned i = 0; i != Header.NumUnits; ++i) {
+    auto *Contrib = Contribs[i];
+    for (unsigned i = 0; i != Header.NumColumns; ++i)
+      Contrib[i].Offset = IndexData.getU32(&Offset);
+  }
+
+  // Read Table of Section Sizes
+  for (unsigned i = 0; i != Header.NumUnits; ++i) {
+    auto *Contrib = Contribs[i];
+    for (unsigned i = 0; i != Header.NumColumns; ++i)
+      Contrib[i].Length = IndexData.getU32(&Offset);
+  }
+
+  return true;
+}
+
+StringRef DWARFUnitIndex::getColumnHeader(DWARFSectionKind DS) {
+#define CASE(DS)                                                               \
+  case DW_SECT_##DS:                                                           \
+    return #DS;
+  switch (DS) {
+    CASE(INFO);
+    CASE(TYPES);
+    CASE(ABBREV);
+    CASE(LINE);
+    CASE(LOC);
+    CASE(STR_OFFSETS);
+    CASE(MACINFO);
+    CASE(MACRO);
+  }
+  llvm_unreachable("unknown DWARFSectionKind");
+}
+
+void DWARFUnitIndex::dump(raw_ostream &OS) const {
+  if (!Header.NumBuckets)
+    return;
+
+  Header.dump(OS);
+  OS << "Index Signature         ";
+  for (unsigned i = 0; i != Header.NumColumns; ++i)
+    OS << ' ' << left_justify(getColumnHeader(ColumnKinds[i]), 24);
+  OS << "\n----- ------------------";
+  for (unsigned i = 0; i != Header.NumColumns; ++i)
+    OS << " ------------------------";
+  OS << '\n';
+  for (unsigned i = 0; i != Header.NumBuckets; ++i) {
+    auto &Row = Rows[i];
+    if (auto *Contribs = Row.Contributions.get()) {
+      OS << format("%5u 0x%016" PRIx64 " ", i + 1, Row.Signature);
+      for (unsigned i = 0; i != Header.NumColumns; ++i) {
+        auto &Contrib = Contribs[i];
+        OS << format("[0x%08x, 0x%08x) ", Contrib.Offset,
+                     Contrib.Offset + Contrib.Length);
+      }
+      OS << '\n';
+    }
+  }
+}
+
+const DWARFUnitIndex::Entry::SectionContribution *
+DWARFUnitIndex::Entry::getOffset(DWARFSectionKind Sec) const {
+  uint32_t i = 0;
+  for (; i != Index->Header.NumColumns; ++i)
+    if (Index->ColumnKinds[i] == Sec)
+      return &Contributions[i];
+  return nullptr;
+}
+const DWARFUnitIndex::Entry::SectionContribution *
+DWARFUnitIndex::Entry::getOffset() const {
+  return &Contributions[Index->InfoColumn];
+}
+
+const DWARFUnitIndex::Entry *
+DWARFUnitIndex::getFromOffset(uint32_t Offset) const {
+  for (uint32_t i = 0; i != Header.NumBuckets; ++i)
+    if (const auto &Contribs = Rows[i].Contributions)
+      if (Contribs[InfoColumn].Offset == Offset)
+        return &Rows[i];
+  return nullptr;
+}
+}
diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
index a6b4c6549ca4..4f561d062b12 100644
--- a/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
+++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.cpp
@@ -27,6 +27,7 @@ WithColor::WithColor(llvm::raw_ostream &OS, enum HighlightColor Type) : OS(OS) {
     case Tag:        OS.changeColor(llvm::raw_ostream::BLUE);    break;
     case Attribute:  OS.changeColor(llvm::raw_ostream::CYAN);    break;
     case Enumerator: OS.changeColor(llvm::raw_ostream::MAGENTA); break;
+    case Macro:      OS.changeColor(llvm::raw_ostream::RED);     break;
     }
   }
 }
diff --git a/lib/DebugInfo/DWARF/SyntaxHighlighting.h b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
index 946a31308aa1..16e68351d5e1 100644
--- a/lib/DebugInfo/DWARF/SyntaxHighlighting.h
+++ b/lib/DebugInfo/DWARF/SyntaxHighlighting.h
@@ -17,7 +17,7 @@ namespace dwarf {
 namespace syntax {
 
 // Symbolic names for various syntax elements.
-enum HighlightColor { Address, String, Tag, Attribute, Enumerator };
+enum HighlightColor { Address, String, Tag, Attribute, Enumerator, Macro };
 
 /// An RAII object that temporarily switches an output stream to a
 /// specific color.
diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt
index 7a8e8baec2c7..23a5a3db5628 100644
--- a/lib/DebugInfo/LLVMBuild.txt
+++ b/lib/DebugInfo/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = DWARF PDB
+subdirectories = CodeView DWARF PDB Symbolize
 
 [component_0]
 type = Group
diff --git a/lib/DebugInfo/Makefile b/lib/DebugInfo/Makefile
index 27a5e1f0f496..6072af314416 100644
--- a/lib/DebugInfo/Makefile
+++ b/lib/DebugInfo/Makefile
@@ -10,6 +10,6 @@ LEVEL = ../..
 
 include $(LEVEL)/Makefile.config
 
-PARALLEL_DIRS := DWARF PDB
+PARALLEL_DIRS := CodeView DWARF PDB Symbolize
 
-include $(LEVEL)/Makefile.common
\ No newline at end of file
+include $(LEVEL)/Makefile.common
diff --git a/lib/DebugInfo/PDB/PDB.cpp b/lib/DebugInfo/PDB/PDB.cpp
index 13201bbaa641..613407eb1346 100644
--- a/lib/DebugInfo/PDB/PDB.cpp
+++ b/lib/DebugInfo/PDB/PDB.cpp
@@ -31,7 +31,7 @@ PDB_ErrorCode llvm::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
 
 PDB_ErrorCode llvm::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
                                    std::unique_ptr<IPDBSession> &Session) {
-// Create the correct concrete instance type based on the value of Type.
+  // Create the correct concrete instance type based on the value of Type.
 #if HAVE_DIA_SDK
   return DIASession::createFromExe(Path, Session);
 #endif
diff --git a/lib/DebugInfo/PDB/PDBContext.cpp b/lib/DebugInfo/PDB/PDBContext.cpp
index 83f27c7fa3d9..ca2ae6665ce8 100644
--- a/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/lib/DebugInfo/PDB/PDBContext.cpp
@@ -21,24 +21,11 @@ using namespace llvm;
 using namespace llvm::object;
 
 PDBContext::PDBContext(const COFFObjectFile &Object,
-                       std::unique_ptr<IPDBSession> PDBSession,
-                       bool RelativeAddress)
+                       std::unique_ptr<IPDBSession> PDBSession)
     : DIContext(CK_PDB), Session(std::move(PDBSession)) {
-  if (!RelativeAddress) {
-    uint64_t ImageBase = 0;
-    if (Object.is64()) {
-      const pe32plus_header *Header = nullptr;
-      Object.getPE32PlusHeader(Header);
-      if (Header)
-        ImageBase = Header->ImageBase;
-    } else {
-      const pe32_header *Header = nullptr;
-      Object.getPE32Header(Header);
-      if (Header)
-        ImageBase = static_cast<uint64_t>(Header->ImageBase);
-    }
-    Session->setLoadAddress(ImageBase);
-  }
+  ErrorOr<uint64_t> ImageBase = Object.getImageBase();
+  if (ImageBase)
+    Session->setLoadAddress(ImageBase.get());
 }
 
 void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType) {}
diff --git a/lib/DebugInfo/Symbolize/CMakeLists.txt b/lib/DebugInfo/Symbolize/CMakeLists.txt
new file mode 100644
index 000000000000..fe5c4bfc4321
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMSymbolize
+  DIPrinter.cpp
+  SymbolizableObjectFile.cpp
+  Symbolize.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/DebugInfo/Symbolize
+  )
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
new file mode 100644
index 000000000000..c6bfbc07dcf3
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -0,0 +1,69 @@
+//===- lib/DebugInfo/Symbolize/DIPrinter.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DIPrinter class, which is responsible for printing
+// structures defined in DebugInfo/DIContext.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+
+#include "llvm/DebugInfo/DIContext.h"
+
+namespace llvm {
+namespace symbolize {
+
+// By default, DILineInfo contains "<invalid>" for function/filename it
+// cannot fetch. We replace it to "??" to make our output closer to addr2line.
+static const char kDILineInfoBadString[] = "<invalid>";
+static const char kBadString[] = "??";
+
+void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
+  if (PrintFunctionNames) {
+    std::string FunctionName = Info.FunctionName;
+    if (FunctionName == kDILineInfoBadString)
+      FunctionName = kBadString;
+
+    StringRef Delimiter = (PrintPretty == true) ? " at " : "\n";
+    StringRef Prefix = (PrintPretty && Inlined) ? " (inlined by) " : "";
+    OS << Prefix << FunctionName << Delimiter;
+  }
+  std::string Filename = Info.FileName;
+  if (Filename == kDILineInfoBadString)
+    Filename = kBadString;
+  OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+}
+
+DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
+  printName(Info, false);
+  return *this;
+}
+
+DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
+  uint32_t FramesNum = Info.getNumberOfFrames();
+  if (FramesNum == 0) {
+    printName(DILineInfo(), false);
+    return *this;
+  }
+  for (uint32_t i = 0; i < FramesNum; i++)
+    printName(Info.getFrame(i), i > 0);
+  return *this;
+}
+
+DIPrinter &DIPrinter::operator<<(const DIGlobal &Global) {
+  std::string Name = Global.Name;
+  if (Name == kDILineInfoBadString)
+    Name = kBadString;
+  OS << Name << "\n";
+  OS << Global.Start << " " << Global.Size << "\n";
+  return *this;
+}
+
+}
+}
diff --git a/lib/DebugInfo/Symbolize/LLVMBuild.txt b/lib/DebugInfo/Symbolize/LLVMBuild.txt
new file mode 100644
index 000000000000..f9ec6b32f6d9
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/DebugInfo/Symbolize/LLVMBuild.txt ------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = Symbolize
+parent = DebugInfo
+required_libraries = DebugInfoDWARF DebugInfoPDB Object Support
diff --git a/lib/Analysis/IPA/Makefile b/lib/DebugInfo/Symbolize/Makefile
similarity index 74%
rename from lib/Analysis/IPA/Makefile
rename to lib/DebugInfo/Symbolize/Makefile
index b850c9ff7f44..17aac9396585 100644
--- a/lib/Analysis/IPA/Makefile
+++ b/lib/DebugInfo/Symbolize/Makefile
@@ -1,4 +1,4 @@
-##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===##
+##===- lib/DebugInfo/Symbolize/Makefile --------------------*- Makefile -*-===##
 #
 #                     The LLVM Compiler Infrastructure
 #
@@ -8,8 +8,8 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../../..
-LIBRARYNAME = LLVMipa
-BUILD_ARCHIVE = 1
+LIBRARYNAME = LLVMSymbolize
+BUILD_ARCHIVE := 1
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
new file mode 100644
index 000000000000..e31462459844
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -0,0 +1,254 @@
+//===-- SymbolizableObjectFile.cpp ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of SymbolizableObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SymbolizableObjectFile.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+
+namespace llvm {
+namespace symbolize {
+
+using namespace object;
+
+static DILineInfoSpecifier
+getDILineInfoSpecifier(FunctionNameKind FNKind) {
+  return DILineInfoSpecifier(
+      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
+}
+
+ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
+SymbolizableObjectFile::create(object::ObjectFile *Obj,
+                               std::unique_ptr<DIContext> DICtx) {
+  std::unique_ptr<SymbolizableObjectFile> res(
+      new SymbolizableObjectFile(Obj, std::move(DICtx)));
+  std::unique_ptr<DataExtractor> OpdExtractor;
+  uint64_t OpdAddress = 0;
+  // Find the .opd (function descriptor) section if any, for big-endian
+  // PowerPC64 ELF.
+  if (Obj->getArch() == Triple::ppc64) {
+    for (section_iterator Section : Obj->sections()) {
+      StringRef Name;
+      StringRef Data;
+      if (auto EC = Section->getName(Name))
+        return EC;
+      if (Name == ".opd") {
+        if (auto EC = Section->getContents(Data))
+          return EC;
+        OpdExtractor.reset(new DataExtractor(Data, Obj->isLittleEndian(),
+                                             Obj->getBytesInAddress()));
+        OpdAddress = Section->getAddress();
+        break;
+      }
+    }
+  }
+  std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
+      computeSymbolSizes(*Obj);
+  for (auto &P : Symbols)
+    res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
+
+  // If this is a COFF object and we didn't find any symbols, try the export
+  // table.
+  if (Symbols.empty()) {
+    if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
+      if (auto EC = res->addCoffExportSymbols(CoffObj))
+        return EC;
+  }
+  return std::move(res);
+}
+
+SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
+                                               std::unique_ptr<DIContext> DICtx)
+    : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
+
+namespace {
+struct OffsetNamePair {
+  uint32_t Offset;
+  StringRef Name;
+  bool operator<(const OffsetNamePair &R) const {
+    return Offset < R.Offset;
+  }
+};
+}
+
+std::error_code SymbolizableObjectFile::addCoffExportSymbols(
+    const COFFObjectFile *CoffObj) {
+  // Get all export names and offsets.
+  std::vector<OffsetNamePair> ExportSyms;
+  for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
+    StringRef Name;
+    uint32_t Offset;
+    if (auto EC = Ref.getSymbolName(Name))
+      return EC;
+    if (auto EC = Ref.getExportRVA(Offset))
+      return EC;
+    ExportSyms.push_back(OffsetNamePair{Offset, Name});
+  }
+  if (ExportSyms.empty())
+    return std::error_code();
+
+  // Sort by ascending offset.
+  array_pod_sort(ExportSyms.begin(), ExportSyms.end());
+
+  // Approximate the symbol sizes by assuming they run to the next symbol.
+  // FIXME: This assumes all exports are functions.
+  uint64_t ImageBase = CoffObj->getImageBase();
+  for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
+    OffsetNamePair &Export = *I;
+    // FIXME: The last export has a one byte size now.
+    uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
+    uint64_t SymbolStart = ImageBase + Export.Offset;
+    uint64_t SymbolSize = NextOffset - Export.Offset;
+    SymbolDesc SD = {SymbolStart, SymbolSize};
+    Functions.insert(std::make_pair(SD, Export.Name));
+  }
+  return std::error_code();
+}
+
+std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
+                                                  uint64_t SymbolSize,
+                                                  DataExtractor *OpdExtractor,
+                                                  uint64_t OpdAddress) {
+  SymbolRef::Type SymbolType = Symbol.getType();
+  if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
+    return std::error_code();
+  ErrorOr<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
+  if (auto EC = SymbolAddressOrErr.getError())
+    return EC;
+  uint64_t SymbolAddress = *SymbolAddressOrErr;
+  if (OpdExtractor) {
+    // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
+    // function descriptors. The first word of the descriptor is a pointer to
+    // the function's code.
+    // For the purposes of symbolization, pretend the symbol's address is that
+    // of the function's code, not the descriptor.
+    uint64_t OpdOffset = SymbolAddress - OpdAddress;
+    uint32_t OpdOffset32 = OpdOffset;
+    if (OpdOffset == OpdOffset32 && 
+        OpdExtractor->isValidOffsetForAddress(OpdOffset32))
+      SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
+  }
+  ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
+  if (auto EC = SymbolNameOrErr.getError())
+    return EC;
+  StringRef SymbolName = *SymbolNameOrErr;
+  // Mach-O symbol table names have leading underscore, skip it.
+  if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
+    SymbolName = SymbolName.drop_front();
+  // FIXME: If a function has alias, there are two entries in symbol table
+  // with same address size. Make sure we choose the correct one.
+  auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
+  SymbolDesc SD = { SymbolAddress, SymbolSize };
+  M.insert(std::make_pair(SD, SymbolName));
+  return std::error_code();
+}
+
+// Return true if this is a 32-bit x86 PE COFF module.
+bool SymbolizableObjectFile::isWin32Module() const {
+  auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
+  return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
+  if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
+    return CoffObject->getImageBase();
+  return 0;
+}
+
+bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
+                                                    uint64_t Address,
+                                                    std::string &Name,
+                                                    uint64_t &Addr,
+                                                    uint64_t &Size) const {
+  const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
+  if (SymbolMap.empty())
+    return false;
+  SymbolDesc SD = { Address, Address };
+  auto SymbolIterator = SymbolMap.upper_bound(SD);
+  if (SymbolIterator == SymbolMap.begin())
+    return false;
+  --SymbolIterator;
+  if (SymbolIterator->first.Size != 0 &&
+      SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
+    return false;
+  Name = SymbolIterator->second.str();
+  Addr = SymbolIterator->first.Addr;
+  Size = SymbolIterator->first.Size;
+  return true;
+}
+
+bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
+    FunctionNameKind FNKind, bool UseSymbolTable) const {
+  // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
+  // better answers for linkage names than the DIContext. Otherwise, we are
+  // probably using PEs and PDBs, and we shouldn't do the override. PE files
+  // generally only contain the names of exported symbols.
+  return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
+         isa<DWARFContext>(DebugInfoContext.get());
+}
+
+DILineInfo SymbolizableObjectFile::symbolizeCode(uint64_t ModuleOffset,
+                                                 FunctionNameKind FNKind,
+                                                 bool UseSymbolTable) const {
+  DILineInfo LineInfo;
+  if (DebugInfoContext) {
+    LineInfo = DebugInfoContext->getLineInfoForAddress(
+        ModuleOffset, getDILineInfoSpecifier(FNKind));
+  }
+  // Override function name from symbol table if necessary.
+  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
+    std::string FunctionName;
+    uint64_t Start, Size;
+    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+                               FunctionName, Start, Size)) {
+      LineInfo.FunctionName = FunctionName;
+    }
+  }
+  return LineInfo;
+}
+
+DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
+    uint64_t ModuleOffset, FunctionNameKind FNKind, bool UseSymbolTable) const {
+  DIInliningInfo InlinedContext;
+
+  if (DebugInfoContext)
+    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
+        ModuleOffset, getDILineInfoSpecifier(FNKind));
+  // Make sure there is at least one frame in context.
+  if (InlinedContext.getNumberOfFrames() == 0)
+    InlinedContext.addFrame(DILineInfo());
+
+  // Override the function name in lower frame with name from symbol table.
+  if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
+    std::string FunctionName;
+    uint64_t Start, Size;
+    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
+                               FunctionName, Start, Size)) {
+      InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
+          ->FunctionName = FunctionName;
+    }
+  }
+
+  return InlinedContext;
+}
+
+DIGlobal SymbolizableObjectFile::symbolizeData(uint64_t ModuleOffset) const {
+  DIGlobal Res;
+  getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Res.Name, Res.Start,
+                         Res.Size);
+  return Res;
+}
+
+}  // namespace symbolize
+}  // namespace llvm
+
diff --git a/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
new file mode 100644
index 000000000000..8583b6a36e63
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h
@@ -0,0 +1,82 @@
+//===-- SymbolizableObjectFile.h -------------------------------- C++ -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolizableObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+#define LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
+
+#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
+#include <map>
+
+namespace llvm {
+class DataExtractor;
+}
+
+namespace llvm {
+namespace symbolize {
+
+class SymbolizableObjectFile : public SymbolizableModule {
+public:
+  static ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
+  create(object::ObjectFile *Obj, std::unique_ptr<DIContext> DICtx);
+
+  DILineInfo symbolizeCode(uint64_t ModuleOffset, FunctionNameKind FNKind,
+                           bool UseSymbolTable) const override;
+  DIInliningInfo symbolizeInlinedCode(uint64_t ModuleOffset,
+                                      FunctionNameKind FNKind,
+                                      bool UseSymbolTable) const override;
+  DIGlobal symbolizeData(uint64_t ModuleOffset) const override;
+
+  // Return true if this is a 32-bit x86 PE COFF module.
+  bool isWin32Module() const override;
+
+  // Returns the preferred base of the module, i.e. where the loader would place
+  // it in memory assuming there were no conflicts.
+  uint64_t getModulePreferredBase() const override;
+
+private:
+  bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind,
+                                     bool UseSymbolTable) const;
+
+  bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address,
+                              std::string &Name, uint64_t &Addr,
+                              uint64_t &Size) const;
+  // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
+  // (function descriptor) section and OpdExtractor refers to its contents.
+  std::error_code addSymbol(const object::SymbolRef &Symbol,
+                            uint64_t SymbolSize,
+                            DataExtractor *OpdExtractor = nullptr,
+                            uint64_t OpdAddress = 0);
+  std::error_code addCoffExportSymbols(const object::COFFObjectFile *CoffObj);
+
+  object::ObjectFile *Module;
+  std::unique_ptr<DIContext> DebugInfoContext;
+
+  struct SymbolDesc {
+    uint64_t Addr;
+    // If size is 0, assume that symbol occupies the whole memory range up to
+    // the following symbol.
+    uint64_t Size;
+    friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
+      return s1.Addr < s2.Addr;
+    }
+  };
+  std::map<SymbolDesc, StringRef> Functions;
+  std::map<SymbolDesc, StringRef> Objects;
+
+  SymbolizableObjectFile(object::ObjectFile *Obj,
+                         std::unique_ptr<DIContext> DICtx);
+};
+
+}  // namespace symbolize
+}  // namespace llvm
+
+#endif  // LLVM_LIB_DEBUGINFO_SYMBOLIZE_SYMBOLIZABLEOBJECTFILE_H
diff --git a/lib/DebugInfo/Symbolize/Symbolize.cpp b/lib/DebugInfo/Symbolize/Symbolize.cpp
new file mode 100644
index 000000000000..3da1963bb791
--- /dev/null
+++ b/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -0,0 +1,456 @@
+//===-- LLVMSymbolize.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation for LLVM symbolization library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+
+#include "SymbolizableObjectFile.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Config/config.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/DebugInfo/PDB/PDBContext.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include <stdlib.h>
+
+#if defined(_MSC_VER)
+#include <Windows.h>
+#include <DbgHelp.h>
+#pragma comment(lib, "dbghelp.lib")
+
+// Windows.h conflicts with our COFF header definitions.
+#ifdef IMAGE_FILE_MACHINE_I386
+#undef IMAGE_FILE_MACHINE_I386
+#endif
+#endif
+
+namespace llvm {
+namespace symbolize {
+
+ErrorOr<DILineInfo> LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
+                                                  uint64_t ModuleOffset) {
+  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+  if (auto EC = InfoOrErr.getError())
+    return EC;
+  SymbolizableModule *Info = InfoOrErr.get();
+
+  // If the user is giving us relative addresses, add the preferred base of the
+  // object to the offset before we do the query. It's what DIContext expects.
+  if (Opts.RelativeAddresses)
+    ModuleOffset += Info->getModulePreferredBase();
+
+  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
+                                            Opts.UseSymbolTable);
+  if (Opts.Demangle)
+    LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
+  return LineInfo;
+}
+
+ErrorOr<DIInliningInfo>
+LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
+                                     uint64_t ModuleOffset) {
+  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+  if (auto EC = InfoOrErr.getError())
+    return EC;
+  SymbolizableModule *Info = InfoOrErr.get();
+
+  // If the user is giving us relative addresses, add the preferred base of the
+  // object to the offset before we do the query. It's what DIContext expects.
+  if (Opts.RelativeAddresses)
+    ModuleOffset += Info->getModulePreferredBase();
+
+  DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
+      ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
+  if (Opts.Demangle) {
+    for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
+      auto *Frame = InlinedContext.getMutableFrame(i);
+      Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
+    }
+  }
+  return InlinedContext;
+}
+
+ErrorOr<DIGlobal> LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
+                                                uint64_t ModuleOffset) {
+  auto InfoOrErr = getOrCreateModuleInfo(ModuleName);
+  if (auto EC = InfoOrErr.getError())
+    return EC;
+  SymbolizableModule *Info = InfoOrErr.get();
+
+  // If the user is giving us relative addresses, add the preferred base of
+  // the object to the offset before we do the query. It's what DIContext
+  // expects.
+  if (Opts.RelativeAddresses)
+    ModuleOffset += Info->getModulePreferredBase();
+
+  DIGlobal Global = Info->symbolizeData(ModuleOffset);
+  if (Opts.Demangle)
+    Global.Name = DemangleName(Global.Name, Info);
+  return Global;
+}
+
+void LLVMSymbolizer::flush() {
+  ObjectForUBPathAndArch.clear();
+  BinaryForPath.clear();
+  ObjectPairForPathArch.clear();
+  Modules.clear();
+}
+
+// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
+// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
+// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
+// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
+static
+std::string getDarwinDWARFResourceForPath(
+    const std::string &Path, const std::string &Basename) {
+  SmallString<16> ResourceName = StringRef(Path);
+  if (sys::path::extension(Path) != ".dSYM") {
+    ResourceName += ".dSYM";
+  }
+  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
+  sys::path::append(ResourceName, Basename);
+  return ResourceName.str();
+}
+
+static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
+      MemoryBuffer::getFileOrSTDIN(Path);
+  if (!MB)
+    return false;
+  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
+}
+
+static bool findDebugBinary(const std::string &OrigPath,
+                            const std::string &DebuglinkName, uint32_t CRCHash,
+                            std::string &Result) {
+  std::string OrigRealPath = OrigPath;
+#if defined(HAVE_REALPATH)
+  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
+    OrigRealPath = RP;
+    free(RP);
+  }
+#endif
+  SmallString<16> OrigDir(OrigRealPath);
+  llvm::sys::path::remove_filename(OrigDir);
+  SmallString<16> DebugPath = OrigDir;
+  // Try /path/to/original_binary/debuglink_name
+  llvm::sys::path::append(DebugPath, DebuglinkName);
+  if (checkFileCRC(DebugPath, CRCHash)) {
+    Result = DebugPath.str();
+    return true;
+  }
+  // Try /path/to/original_binary/.debug/debuglink_name
+  DebugPath = OrigRealPath;
+  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
+  if (checkFileCRC(DebugPath, CRCHash)) {
+    Result = DebugPath.str();
+    return true;
+  }
+  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
+  DebugPath = "/usr/lib/debug";
+  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
+                          DebuglinkName);
+  if (checkFileCRC(DebugPath, CRCHash)) {
+    Result = DebugPath.str();
+    return true;
+  }
+  return false;
+}
+
+static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
+                                    uint32_t &CRCHash) {
+  if (!Obj)
+    return false;
+  for (const SectionRef &Section : Obj->sections()) {
+    StringRef Name;
+    Section.getName(Name);
+    Name = Name.substr(Name.find_first_not_of("._"));
+    if (Name == "gnu_debuglink") {
+      StringRef Data;
+      Section.getContents(Data);
+      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
+      uint32_t Offset = 0;
+      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
+        // 4-byte align the offset.
+        Offset = (Offset + 3) & ~0x3;
+        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
+          DebugName = DebugNameStr;
+          CRCHash = DE.getU32(&Offset);
+          return true;
+        }
+      }
+      break;
+    }
+  }
+  return false;
+}
+
+static
+bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
+                             const MachOObjectFile *Obj) {
+  ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
+  ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
+  if (dbg_uuid.empty() || bin_uuid.empty())
+    return false;
+  return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
+}
+
+ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
+    const MachOObjectFile *MachExeObj, const std::string &ArchName) {
+  // On Darwin we may find DWARF in separate object file in
+  // resource directory.
+  std::vector<std::string> DsymPaths;
+  StringRef Filename = sys::path::filename(ExePath);
+  DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
+  for (const auto &Path : Opts.DsymHints) {
+    DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
+  }
+  for (const auto &Path : DsymPaths) {
+    auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
+    if (!DbgObjOrErr)
+      continue;
+    ObjectFile *DbgObj = DbgObjOrErr.get();
+    const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
+    if (!MachDbgObj)
+      continue;
+    if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
+      return DbgObj;
+  }
+  return nullptr;
+}
+
+ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
+                                                  const ObjectFile *Obj,
+                                                  const std::string &ArchName) {
+  std::string DebuglinkName;
+  uint32_t CRCHash;
+  std::string DebugBinaryPath;
+  if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
+    return nullptr;
+  if (!findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath))
+    return nullptr;
+  auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
+  if (!DbgObjOrErr)
+    return nullptr;
+  return DbgObjOrErr.get();
+}
+
+ErrorOr<LLVMSymbolizer::ObjectPair>
+LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
+                                      const std::string &ArchName) {
+  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
+  if (I != ObjectPairForPathArch.end())
+    return I->second;
+
+  auto ObjOrErr = getOrCreateObject(Path, ArchName);
+  if (auto EC = ObjOrErr.getError()) {
+    ObjectPairForPathArch.insert(
+        std::make_pair(std::make_pair(Path, ArchName), EC));
+    return EC;
+  }
+
+  ObjectFile *Obj = ObjOrErr.get();
+  assert(Obj != nullptr);
+  ObjectFile *DbgObj = nullptr;
+
+  if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
+    DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
+  if (!DbgObj)
+    DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
+  if (!DbgObj)
+    DbgObj = Obj;
+  ObjectPair Res = std::make_pair(Obj, DbgObj);
+  ObjectPairForPathArch.insert(
+      std::make_pair(std::make_pair(Path, ArchName), Res));
+  return Res;
+}
+
+ErrorOr<ObjectFile *>
+LLVMSymbolizer::getOrCreateObject(const std::string &Path,
+                                  const std::string &ArchName) {
+  const auto &I = BinaryForPath.find(Path);
+  Binary *Bin = nullptr;
+  if (I == BinaryForPath.end()) {
+    ErrorOr<OwningBinary<Binary>> BinOrErr = createBinary(Path);
+    if (auto EC = BinOrErr.getError()) {
+      BinaryForPath.insert(std::make_pair(Path, EC));
+      return EC;
+    }
+    Bin = BinOrErr->getBinary();
+    BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
+  } else if (auto EC = I->second.getError()) {
+    return EC;
+  } else {
+    Bin = I->second->getBinary();
+  }
+
+  assert(Bin != nullptr);
+
+  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
+    const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
+    if (I != ObjectForUBPathAndArch.end()) {
+      if (auto EC = I->second.getError())
+        return EC;
+      return I->second->get();
+    }
+    ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
+        UB->getObjectForArch(ArchName);
+    if (auto EC = ObjOrErr.getError()) {
+      ObjectForUBPathAndArch.insert(
+          std::make_pair(std::make_pair(Path, ArchName), EC));
+      return EC;
+    }
+    ObjectFile *Res = ObjOrErr->get();
+    ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
+                                                 std::move(ObjOrErr.get())));
+    return Res;
+  }
+  if (Bin->isObject()) {
+    return cast<ObjectFile>(Bin);
+  }
+  return object_error::arch_not_found;
+}
+
+ErrorOr<SymbolizableModule *>
+LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
+  const auto &I = Modules.find(ModuleName);
+  if (I != Modules.end()) {
+    auto &InfoOrErr = I->second;
+    if (auto EC = InfoOrErr.getError())
+      return EC;
+    return InfoOrErr->get();
+  }
+  std::string BinaryName = ModuleName;
+  std::string ArchName = Opts.DefaultArch;
+  size_t ColonPos = ModuleName.find_last_of(':');
+  // Verify that substring after colon form a valid arch name.
+  if (ColonPos != std::string::npos) {
+    std::string ArchStr = ModuleName.substr(ColonPos + 1);
+    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
+      BinaryName = ModuleName.substr(0, ColonPos);
+      ArchName = ArchStr;
+    }
+  }
+  auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
+  if (auto EC = ObjectsOrErr.getError()) {
+    // Failed to find valid object file.
+    Modules.insert(std::make_pair(ModuleName, EC));
+    return EC;
+  }
+  ObjectPair Objects = ObjectsOrErr.get();
+
+  std::unique_ptr<DIContext> Context;
+  if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
+    // If this is a COFF object, assume it contains PDB debug information.  If
+    // we don't find any we will fall back to the DWARF case.
+    std::unique_ptr<IPDBSession> Session;
+    PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
+                                         Objects.first->getFileName(), Session);
+    if (Error == PDB_ErrorCode::Success) {
+      Context.reset(new PDBContext(*CoffObject, std::move(Session)));
+    }
+  }
+  if (!Context)
+    Context.reset(new DWARFContextInMemory(*Objects.second));
+  assert(Context);
+  auto InfoOrErr =
+      SymbolizableObjectFile::create(Objects.first, std::move(Context));
+  auto InsertResult =
+      Modules.insert(std::make_pair(ModuleName, std::move(InfoOrErr)));
+  assert(InsertResult.second);
+  if (auto EC = InsertResult.first->second.getError())
+    return EC;
+  return InsertResult.first->second->get();
+}
+
+// Undo these various manglings for Win32 extern "C" functions:
+// cdecl       - _foo
+// stdcall     - _foo@12
+// fastcall    - @foo@12
+// vectorcall  - foo@@12
+// These are all different linkage names for 'foo'.
+static StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
+  // Remove any '_' or '@' prefix.
+  char Front = SymbolName.empty() ? '\0' : SymbolName[0];
+  if (Front == '_' || Front == '@')
+    SymbolName = SymbolName.drop_front();
+
+  // Remove any '@[0-9]+' suffix.
+  if (Front != '?') {
+    size_t AtPos = SymbolName.rfind('@');
+    if (AtPos != StringRef::npos &&
+        std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
+                    [](char C) { return C >= '0' && C <= '9'; })) {
+      SymbolName = SymbolName.substr(0, AtPos);
+    }
+  }
+
+  // Remove any ending '@' for vectorcall.
+  if (SymbolName.endswith("@"))
+    SymbolName = SymbolName.drop_back();
+
+  return SymbolName;
+}
+
+#if !defined(_MSC_VER)
+// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
+extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
+                                size_t *length, int *status);
+#endif
+
+std::string LLVMSymbolizer::DemangleName(const std::string &Name,
+                                         const SymbolizableModule *ModInfo) {
+#if !defined(_MSC_VER)
+  // We can spoil names of symbols with C linkage, so use an heuristic
+  // approach to check if the name should be demangled.
+  if (Name.substr(0, 2) == "_Z") {
+    int status = 0;
+    char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
+    if (status != 0)
+      return Name;
+    std::string Result = DemangledName;
+    free(DemangledName);
+    return Result;
+  }
+#else
+  if (!Name.empty() && Name.front() == '?') {
+    // Only do MSVC C++ demangling on symbols starting with '?'.
+    char DemangledName[1024] = {0};
+    DWORD result = ::UnDecorateSymbolName(
+        Name.c_str(), DemangledName, 1023,
+        UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
+            UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
+            UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
+            UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
+            UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
+            UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
+    return (result == 0) ? Name : std::string(DemangledName);
+  }
+#endif
+  if (ModInfo && ModInfo->isWin32Module())
+    return std::string(demanglePE32ExternCFunc(Name));
+  return Name;
+}
+
+} // namespace symbolize
+} // namespace llvm
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 67a1ca67e2f3..41c8da40346a 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -61,8 +61,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
 
 void JITEventListener::anchor() {}
 
-ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
-  : LazyFunctionCreator(nullptr) {
+void ExecutionEngine::Init(std::unique_ptr<Module> M) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
   SymbolSearchingDisabled = false;
@@ -79,6 +78,16 @@ ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
   Modules.push_back(std::move(M));
 }
 
+ExecutionEngine::ExecutionEngine(std::unique_ptr<Module> M)
+    : DL(M->getDataLayout()), LazyFunctionCreator(nullptr) {
+  Init(std::move(M));
+}
+
+ExecutionEngine::ExecutionEngine(DataLayout DL, std::unique_ptr<Module> M)
+    : DL(std::move(DL)), LazyFunctionCreator(nullptr) {
+  Init(std::move(M));
+}
+
 ExecutionEngine::~ExecutionEngine() {
   clearAllGlobalMappings();
 }
@@ -86,7 +95,7 @@ ExecutionEngine::~ExecutionEngine() {
 namespace {
 /// \brief Helper class which uses a value handler to automatically deletes the
 /// memory block when the GlobalVariable is destroyed.
-class GVMemoryBlock : public CallbackVH {
+class GVMemoryBlock final : public CallbackVH {
   GVMemoryBlock(const GlobalVariable *GV)
     : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
 
@@ -115,7 +124,7 @@ public:
 }  // anonymous namespace
 
 char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
-  return GVMemoryBlock::Create(GV, *getDataLayout());
+  return GVMemoryBlock::Create(GV, getDataLayout());
 }
 
 void ExecutionEngine::addObjectFile(std::unique_ptr<object::ObjectFile> O) {
@@ -187,7 +196,7 @@ std::string ExecutionEngine::getMangledName(const GlobalValue *GV) {
 
   const DataLayout &DL =
     GV->getParent()->getDataLayout().isDefault()
-      ? *getDataLayout()
+      ? getDataLayout()
       : GV->getParent()->getDataLayout();
 
   Mangler::getNameWithPrefix(FullName, GV->getName(), DL);
@@ -228,11 +237,10 @@ void ExecutionEngine::clearAllGlobalMappings() {
 void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
   MutexGuard locked(lock);
 
-  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
-    EEState.RemoveMapping(getMangledName(FI));
-  for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
-       GI != GE; ++GI)
-    EEState.RemoveMapping(getMangledName(GI));
+  for (Function &FI : *M)
+    EEState.RemoveMapping(getMangledName(&FI));
+  for (GlobalVariable &GI : M->globals())
+    EEState.RemoveMapping(getMangledName(&GI));
 }
 
 uint64_t ExecutionEngine::updateGlobalMapping(const GlobalValue *GV,
@@ -333,7 +341,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
                        const std::vector<std::string> &InputArgv) {
   Values.clear();  // Free the old contents.
   Values.reserve(InputArgv.size());
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout().getPointerSize();
   Array = make_unique<char[]>((InputArgv.size()+1)*PtrSize);
 
   DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array.get() << "\n");
@@ -408,7 +416,7 @@ void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
 #ifndef NDEBUG
 /// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
 static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
-  unsigned PtrSize = EE->getDataLayout()->getPointerSize();
+  unsigned PtrSize = EE->getDataLayout().getPointerSize();
   for (unsigned i = 0; i < PtrSize; ++i)
     if (*(i + (uint8_t*)Loc))
       return false;
@@ -621,8 +629,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       break;
     case Type::VectorTyID:
       // if the whole vector is 'undef' just reserve memory for the value.
-      const VectorType* VTy = dyn_cast<VectorType>(C->getType());
-      const Type *ElemTy = VTy->getElementType();
+      auto* VTy = dyn_cast<VectorType>(C->getType());
+      Type *ElemTy = VTy->getElementType();
       unsigned int elemNum = VTy->getNumElements();
       Result.AggregateVal.resize(elemNum);
       if (ElemTy->isIntegerTy())
@@ -641,8 +649,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     case Instruction::GetElementPtr: {
       // Compute the index
       GenericValue Result = getConstantValue(Op0);
-      APInt Offset(DL->getPointerSizeInBits(), 0);
-      cast<GEPOperator>(CE)->accumulateConstantOffset(*DL, Offset);
+      APInt Offset(DL.getPointerSizeInBits(), 0);
+      cast<GEPOperator>(CE)->accumulateConstantOffset(DL, Offset);
 
       char* tmp = (char*) Result.PointerVal;
       Result = PTOGV(tmp + Offset.getSExtValue());
@@ -729,16 +737,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     }
     case Instruction::PtrToInt: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = DL->getTypeSizeInBits(Op0->getType());
+      uint32_t PtrWidth = DL.getTypeSizeInBits(Op0->getType());
       assert(PtrWidth <= 64 && "Bad pointer width");
       GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
-      uint32_t IntWidth = DL->getTypeSizeInBits(CE->getType());
+      uint32_t IntWidth = DL.getTypeSizeInBits(CE->getType());
       GV.IntVal = GV.IntVal.zextOrTrunc(IntWidth);
       return GV;
     }
     case Instruction::IntToPtr: {
       GenericValue GV = getConstantValue(Op0);
-      uint32_t PtrWidth = DL->getTypeSizeInBits(CE->getType());
+      uint32_t PtrWidth = DL.getTypeSizeInBits(CE->getType());
       GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
       assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
       GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
@@ -860,8 +868,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           case Instruction::FRem:
-            apfLHS.mod(APFloat(Sem, RHS.IntVal),
-                       APFloat::rmNearestTiesToEven);
+            apfLHS.mod(APFloat(Sem, RHS.IntVal));
             GV.IntVal = apfLHS.bitcastToAPInt();
             break;
           }
@@ -1040,7 +1047,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
 
 void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
                                          GenericValue *Ptr, Type *Ty) {
-  const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
+  const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
   default:
@@ -1080,7 +1087,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
     break;
   }
 
-  if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
+  if (sys::IsLittleEndianHost != getDataLayout().isLittleEndian())
     // Host and target are different endian - reverse the stored bytes.
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
@@ -1117,7 +1124,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
 void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
                                           GenericValue *Ptr,
                                           Type *Ty) {
-  const unsigned LoadBytes = getDataLayout()->getTypeStoreSize(Ty);
+  const unsigned LoadBytes = getDataLayout().getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
@@ -1143,8 +1150,8 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     break;
   }
   case Type::VectorTyID: {
-    const VectorType *VT = cast<VectorType>(Ty);
-    const Type *ElemT = VT->getElementType();
+    auto *VT = cast<VectorType>(Ty);
+    Type *ElemT = VT->getElementType();
     const unsigned numElems = VT->getNumElements();
     if (ElemT->isFloatTy()) {
       Result.AggregateVal.resize(numElems);
@@ -1183,20 +1190,20 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
   
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
     unsigned ElementSize =
-      getDataLayout()->getTypeAllocSize(CP->getType()->getElementType());
+        getDataLayout().getTypeAllocSize(CP->getType()->getElementType());
     for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
       InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
     return;
   }
   
   if (isa<ConstantAggregateZero>(Init)) {
-    memset(Addr, 0, (size_t)getDataLayout()->getTypeAllocSize(Init->getType()));
+    memset(Addr, 0, (size_t)getDataLayout().getTypeAllocSize(Init->getType()));
     return;
   }
   
   if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
     unsigned ElementSize =
-      getDataLayout()->getTypeAllocSize(CPA->getType()->getElementType());
+        getDataLayout().getTypeAllocSize(CPA->getType()->getElementType());
     for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
       InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
     return;
@@ -1204,7 +1211,7 @@ void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
   
   if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
     const StructLayout *SL =
-      getDataLayout()->getStructLayout(cast<StructType>(CPS->getType()));
+        getDataLayout().getStructLayout(cast<StructType>(CPS->getType()));
     for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
       InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
     return;
@@ -1349,7 +1356,7 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
     InitializeMemory(GV->getInitializer(), GA);
 
   Type *ElTy = GV->getType()->getElementType();
-  size_t GVSize = (size_t)getDataLayout()->getTypeAllocSize(ElTy);
+  size_t GVSize = (size_t)getDataLayout().getTypeAllocSize(ElTy);
   NumInitBytes += (unsigned)GVSize;
   ++NumGlobals;
 }
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 55ab5af2b909..ff7c4dce0d5d 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
 
 
-inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
+static LLVMTargetMachineRef wrap(const TargetMachine *P) {
   return
   reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
 }
@@ -210,35 +210,6 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
   return 1;
 }
 
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
-                                   LLVMModuleProviderRef MP,
-                                   char **OutError) {
-  /* The module provider is now actually a module. */
-  return LLVMCreateExecutionEngineForModule(OutEE,
-                                            reinterpret_cast<LLVMModuleRef>(MP),
-                                            OutError);
-}
-
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
-                               LLVMModuleProviderRef MP,
-                               char **OutError) {
-  /* The module provider is now actually a module. */
-  return LLVMCreateInterpreterForModule(OutInterp,
-                                        reinterpret_cast<LLVMModuleRef>(MP),
-                                        OutError);
-}
-
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
-                               LLVMModuleProviderRef MP,
-                               unsigned OptLevel,
-                               char **OutError) {
-  /* The module provider is now actually a module. */
-  return LLVMCreateJITCompilerForModule(OutJIT,
-                                        reinterpret_cast<LLVMModuleRef>(MP),
-                                        OptLevel, OutError);
-}
-
-
 void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
   delete unwrap(EE);
 }
@@ -282,11 +253,6 @@ void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
   unwrap(EE)->addModule(std::unique_ptr<Module>(unwrap(M)));
 }
 
-void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
-  /* The module provider is now actually a module. */
-  LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
-}
-
 LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
                           LLVMModuleRef *OutMod, char **OutError) {
   Module *Mod = unwrap(M);
@@ -295,14 +261,6 @@ LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
   return 0;
 }
 
-LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
-                                  LLVMModuleProviderRef MP,
-                                  LLVMModuleRef *OutMod, char **OutError) {
-  /* The module provider is now actually a module. */
-  return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
-                          OutError);
-}
-
 LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
                           LLVMValueRef *OutFn) {
   if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
@@ -318,7 +276,7 @@ void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
 }
 
 LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
-  return wrap(unwrap(EE)->getDataLayout());
+  return wrap(&unwrap(EE)->getDataLayout());
 }
 
 LLVMTargetMachineRef
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index dbfa37e2b0da..1eb4f7d19342 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -593,7 +593,7 @@ static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
 }
 
 static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
-                                    const Type *Ty, const bool val) {
+                                     Type *Ty, const bool val) {
   GenericValue Dest;
     if(Ty->isVectorTy()) {
       assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
@@ -788,7 +788,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
 }
 
 static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
-                                      GenericValue Src3, const Type *Ty) {
+                                      GenericValue Src3, Type *Ty) {
     GenericValue Dest;
     if(Ty->isVectorTy()) {
       assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
@@ -805,7 +805,7 @@ static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
 
 void Interpreter::visitSelectInst(SelectInst &I) {
   ExecutionContext &SF = ECStack.back();
-  const Type * Ty = I.getOperand(0)->getType();
+  Type * Ty = I.getOperand(0)->getType();
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
@@ -968,7 +968,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
   unsigned NumElements = 
     getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
 
-  unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+  unsigned TypeSize = (size_t)getDataLayout().getTypeAllocSize(Ty);
 
   // Avoid malloc-ing zero bytes, use max()...
   unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
@@ -1000,7 +1000,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
   for (; I != E; ++I) {
     if (StructType *STy = dyn_cast<StructType>(*I)) {
-      const StructLayout *SLO = TD.getStructLayout(STy);
+      const StructLayout *SLO = getDataLayout().getStructLayout(STy);
 
       const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
       unsigned Index = unsigned(CPU->getZExtValue());
@@ -1020,7 +1020,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
         assert(BitWidth == 64 && "Invalid index type for getelementptr");
         Idx = (int64_t)IdxGV.IntVal.getZExtValue();
       }
-      Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+      Total += getDataLayout().getTypeAllocSize(ST->getElementType()) * Idx;
     }
   }
 
@@ -1139,7 +1139,7 @@ void Interpreter::visitShl(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
 
   if (Ty->isVectorTy()) {
     uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
@@ -1166,7 +1166,7 @@ void Interpreter::visitLShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
 
   if (Ty->isVectorTy()) {
     uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
@@ -1193,7 +1193,7 @@ void Interpreter::visitAShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  const Type *Ty = I.getType();
+  Type *Ty = I.getType();
 
   if (Ty->isVectorTy()) {
     size_t src1Size = Src1.AggregateVal.size();
@@ -1237,10 +1237,10 @@ GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
 
 GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
-  const Type *SrcTy = SrcVal->getType();
+  Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
   if (SrcTy->isVectorTy()) {
-    const Type *DstVecTy = DstTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
     unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
     unsigned size = Src.AggregateVal.size();
     // the sizes of src and dst vectors must be equal.
@@ -1248,7 +1248,7 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
     for (unsigned i = 0; i < size; i++)
       Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth);
   } else {
-    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    auto *DITy = cast<IntegerType>(DstTy);
     unsigned DBitWidth = DITy->getBitWidth();
     Dest.IntVal = Src.IntVal.sext(DBitWidth);
   }
@@ -1257,10 +1257,10 @@ GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
 
 GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
-  const Type *SrcTy = SrcVal->getType();
+  Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
   if (SrcTy->isVectorTy()) {
-    const Type *DstVecTy = DstTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
     unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
 
     unsigned size = Src.AggregateVal.size();
@@ -1269,7 +1269,7 @@ GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
     for (unsigned i = 0; i < size; i++)
       Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth);
   } else {
-    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    auto *DITy = cast<IntegerType>(DstTy);
     unsigned DBitWidth = DITy->getBitWidth();
     Dest.IntVal = Src.IntVal.zext(DBitWidth);
   }
@@ -1327,8 +1327,8 @@ GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
 
   if (SrcTy->getTypeID() == Type::VectorTyID) {
-    const Type *DstVecTy = DstTy->getScalarType();
-    const Type *SrcVecTy = SrcTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
+    Type *SrcVecTy = SrcTy->getScalarType();
     uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
     unsigned size = Src.AggregateVal.size();
     // the sizes of src and dst vectors must be equal.
@@ -1365,8 +1365,8 @@ GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
 
   if (SrcTy->getTypeID() == Type::VectorTyID) {
-    const Type *DstVecTy = DstTy->getScalarType();
-    const Type *SrcVecTy = SrcTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
+    Type *SrcVecTy = SrcTy->getScalarType();
     uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
     unsigned size = Src.AggregateVal.size();
     // the sizes of src and dst vectors must be equal
@@ -1401,7 +1401,7 @@ GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
 
   if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
-    const Type *DstVecTy = DstTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
     unsigned size = Src.AggregateVal.size();
     // the sizes of src and dst vectors must be equal
     Dest.AggregateVal.resize(size);
@@ -1433,7 +1433,7 @@ GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
 
   if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
-    const Type *DstVecTy = DstTy->getScalarType();
+    Type *DstVecTy = DstTy->getScalarType();
     unsigned size = Src.AggregateVal.size();
     // the sizes of src and dst vectors must be equal
     Dest.AggregateVal.resize(size);
@@ -1477,7 +1477,7 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
   assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
 
-  uint32_t PtrSize = TD.getPointerSizeInBits();
+  uint32_t PtrSize = getDataLayout().getPointerSizeInBits();
   if (PtrSize != Src.IntVal.getBitWidth())
     Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
 
@@ -1497,10 +1497,10 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
       (DstTy->getTypeID() == Type::VectorTyID)) {
     // vector src bitcast to vector dst or vector src bitcast to scalar dst or
     // scalar src bitcast to vector dst
-    bool isLittleEndian = TD.isLittleEndian();
+    bool isLittleEndian = getDataLayout().isLittleEndian();
     GenericValue TempDst, TempSrc, SrcVec;
-    const Type *SrcElemTy;
-    const Type *DstElemTy;
+    Type *SrcElemTy;
+    Type *DstElemTy;
     unsigned SrcBitSize;
     unsigned DstBitSize;
     unsigned SrcNum;
@@ -2091,7 +2091,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
   }
 
   // Get pointers to first LLVM BB & Instruction in function.
-  StackFrame.CurBB     = F->begin();
+  StackFrame.CurBB     = &F->front();
   StackFrame.CurInst   = StackFrame.CurBB->begin();
 
   // Run through the function arguments and initialize their values...
@@ -2103,7 +2103,7 @@ void Interpreter::callFunction(Function *F, ArrayRef<GenericValue> ArgVals) {
   unsigned i = 0;
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); 
        AI != E; ++AI, ++i)
-    SetValue(AI, ArgVals[i], StackFrame);
+    SetValue(&*AI, ArgVals[i], StackFrame);
 
   // Handle varargs arguments...
   StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
@@ -2121,27 +2121,5 @@ void Interpreter::run() {
 
     DEBUG(dbgs() << "About to interpret: " << I);
     visit(I);   // Dispatch to one of the visit* methods...
-#if 0
-    // This is not safe, as visiting the instruction could lower it and free I.
-DEBUG(
-    if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && 
-        I.getType() != Type::VoidTy) {
-      dbgs() << "  --> ";
-      const GenericValue &Val = SF.Values[&I];
-      switch (I.getType()->getTypeID()) {
-      default: llvm_unreachable("Invalid GenericValue Type");
-      case Type::VoidTyID:    dbgs() << "void"; break;
-      case Type::FloatTyID:   dbgs() << "float " << Val.FloatVal; break;
-      case Type::DoubleTyID:  dbgs() << "double " << Val.DoubleVal; break;
-      case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
-        break;
-      case Type::IntegerTyID: 
-        dbgs() << "i" << Val.IntVal.getBitWidth() << " "
-               << Val.IntVal.toStringUnsigned(10)
-               << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
-        break;
-      }
-    });
-#endif
   }
 }
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 9b44042d6144..441f0eb85721 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -178,7 +178,7 @@ static void *ffiValueFor(Type *Ty, const GenericValue &AV,
 }
 
 static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
-                      const DataLayout *TD, GenericValue &Result) {
+                      const DataLayout &TD, GenericValue &Result) {
   ffi_cif cif;
   FunctionType *FTy = F->getFunctionType();
   const unsigned NumArgs = F->arg_size();
@@ -198,7 +198,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
     const unsigned ArgNo = A->getArgNo();
     Type *ArgTy = FTy->getParamType(ArgNo);
     args[ArgNo] = ffiTypeFor(ArgTy);
-    ArgBytes += TD->getTypeStoreSize(ArgTy);
+    ArgBytes += TD.getTypeStoreSize(ArgTy);
   }
 
   SmallVector<uint8_t, 128> ArgData;
@@ -210,7 +210,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
     const unsigned ArgNo = A->getArgNo();
     Type *ArgTy = FTy->getParamType(ArgNo);
     values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
-    ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+    ArgDataPtr += TD.getTypeStoreSize(ArgTy);
   }
 
   Type *RetTy = FTy->getReturnType();
@@ -219,7 +219,7 @@ static bool ffiInvoke(RawFunc Fn, Function *F, ArrayRef<GenericValue> ArgVals,
   if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
     SmallVector<uint8_t, 128> ret;
     if (RetTy->getTypeID() != Type::VoidTyID)
-      ret.resize(TD->getTypeStoreSize(RetTy));
+      ret.resize(TD.getTypeStoreSize(RetTy));
     ffi_call(&cif, Fn, ret.data(), values.data());
     switch (RetTy->getTypeID()) {
       case Type::IntegerTyID:
@@ -368,7 +368,7 @@ static GenericValue lle_X_sprintf(FunctionType *FT,
       case 'x': case 'X':
         if (HowLong >= 1) {
           if (HowLong == 1 &&
-              TheInterpreter->getDataLayout()->getPointerSizeInBits() == 64 &&
+              TheInterpreter->getDataLayout().getPointerSizeInBits() == 64 &&
               sizeof(long) < sizeof(int64_t)) {
             // Make sure we use %lld with a 64 bit argument because we might be
             // compiling LLI on a 32 bit compiler.
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index f103c09659aa..bc7da2e4f6af 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -35,7 +35,7 @@ extern "C" void LLVMLinkInInterpreter() { }
 ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
                                      std::string *ErrStr) {
   // Tell this Module to materialize everything and release the GVMaterializer.
-  if (std::error_code EC = M->materializeAllPermanently()) {
+  if (std::error_code EC = M->materializeAll()) {
     if (ErrStr)
       *ErrStr = EC.message();
     // We got an error, just return 0
@@ -49,16 +49,15 @@ ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
 // Interpreter ctor - Initialize stuff
 //
 Interpreter::Interpreter(std::unique_ptr<Module> M)
-  : ExecutionEngine(std::move(M)), TD(Modules.back().get()) {
+    : ExecutionEngine(std::move(M)) {
 
   memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
-  setDataLayout(&TD);
   // Initialize the "backend"
   initializeExecutionEngine();
   initializeExternalFunctions();
   emitGlobals();
 
-  IL = new IntrinsicLowering(TD);
+  IL = new IntrinsicLowering(getDataLayout());
 }
 
 Interpreter::~Interpreter() {
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index f97664181a87..2e5a867a200f 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -26,7 +26,6 @@
 namespace llvm {
 
 class IntrinsicLowering;
-struct FunctionInfo;
 template<typename T> class generic_gep_type_iterator;
 class ConstantExpr;
 typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
@@ -95,7 +94,6 @@ struct ExecutionContext {
 //
 class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
   GenericValue ExitValue;          // The return value of the called function
-  DataLayout TD;
   IntrinsicLowering *IL;
 
   // The runtime stack of executing code.  The top of the stack is the current
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index f6944eea2e78..6cbebe98e7c9 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -65,12 +65,13 @@ MCJIT::createJIT(std::unique_ptr<Module> M,
                    std::move(Resolver));
 }
 
-MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
+MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> TM,
              std::shared_ptr<MCJITMemoryManager> MemMgr,
              std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
-    : ExecutionEngine(std::move(M)), TM(std::move(tm)), Ctx(nullptr),
-      MemMgr(std::move(MemMgr)), Resolver(*this, std::move(Resolver)),
-      Dyld(*this->MemMgr, this->Resolver), ObjCache(nullptr) {
+    : ExecutionEngine(TM->createDataLayout(), std::move(M)), TM(std::move(TM)),
+      Ctx(nullptr), MemMgr(std::move(MemMgr)),
+      Resolver(*this, std::move(Resolver)), Dyld(*this->MemMgr, this->Resolver),
+      ObjCache(nullptr) {
   // FIXME: We are managing our modules, so we do not want the base class
   // ExecutionEngine to manage them as well. To avoid double destruction
   // of the first (and only) module added in ExecutionEngine constructor
@@ -85,7 +86,6 @@ MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
   Modules.clear();
 
   OwnedModules.addModule(std::move(First));
-  setDataLayout(TM->getDataLayout());
   RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
 
@@ -159,7 +159,6 @@ std::unique_ptr<MemoryBuffer> MCJIT::emitObject(Module *M) {
   // Initialize passes.
   PM.run(*M);
   // Flush the output buffer to get the generated code into memory
-  ObjStream.flush();
 
   std::unique_ptr<MemoryBuffer> CompiledObjBuffer(
                                 new ObjectMemoryBuffer(std::move(ObjBufferSV)));
@@ -193,7 +192,11 @@ void MCJIT::generateCodeForModule(Module *M) {
   if (ObjCache)
     ObjectToLoad = ObjCache->getObject(M);
 
-  M->setDataLayout(*TM->getDataLayout());
+  if (M->getDataLayout().isDefault()) {
+    M->setDataLayout(getDataLayout());
+  } else {
+    assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
+  }
 
   // If the cache did not contain a suitable object, compile the object
   if (!ObjectToLoad) {
@@ -265,7 +268,7 @@ void MCJIT::finalizeModule(Module *M) {
 
 RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
   SmallString<128> FullName;
-  Mangler::getNameWithPrefix(FullName, Name, *TM->getDataLayout());
+  Mangler::getNameWithPrefix(FullName, Name, getDataLayout());
 
   if (void *Addr = getPointerToGlobalIfAvailable(FullName))
     return RuntimeDyld::SymbolInfo(static_cast<uint64_t>(
@@ -315,10 +318,12 @@ RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
     object::Archive *A = OB.getBinary();
     // Look for our symbols in each Archive
     object::Archive::child_iterator ChildIt = A->findSym(Name);
+    if (std::error_code EC = ChildIt->getError())
+      report_fatal_error(EC.message());
     if (ChildIt != A->child_end()) {
       // FIXME: Support nested archives?
       ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
-          ChildIt->getAsBinary();
+          (*ChildIt)->getAsBinary();
       if (ChildBinOrErr.getError())
         continue;
       std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index a45173c2da8d..3c9d2fd50336 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -86,7 +86,7 @@ class MCJIT : public ExecutionEngine {
     ModulePtrSet::iterator begin_added() { return AddedModules.begin(); }
     ModulePtrSet::iterator end_added() { return AddedModules.end(); }
     iterator_range<ModulePtrSet::iterator> added() {
-      return iterator_range<ModulePtrSet::iterator>(begin_added(), end_added());
+      return make_range(begin_added(), end_added());
     }
 
     ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); }
@@ -223,12 +223,13 @@ public:
   /// FindFunctionNamed - Search all of the active modules to find the function that
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
-  virtual Function *FindFunctionNamed(const char *FnName) override;
+  Function *FindFunctionNamed(const char *FnName) override;
 
-  /// FindGlobalVariableNamed - Search all of the active modules to find the global variable
-  /// that defines Name.  This is very slow operation and shouldn't be used for
-  /// general code.
-  virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false) override;
+  /// FindGlobalVariableNamed - Search all of the active modules to find the
+  /// global variable that defines Name.  This is very slow operation and
+  /// shouldn't be used for general code.
+  GlobalVariable *FindGlobalVariableNamed(const char *Name,
+                                          bool AllowInternal = false) override;
 
   /// Sets the object manager that MCJIT should use to avoid compilation.
   void setObjectCache(ObjectCache *manager) override;
@@ -335,6 +336,6 @@ protected:
                               bool CheckFunctionsOnly);
 };
 
-} // End llvm namespace
+} // end llvm namespace
 
-#endif
+#endif // LLVM_LIB_EXECUTIONENGINE_MCJIT_MCJIT_H
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index 99fe22c001da..a17f52e322e8 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -2,6 +2,8 @@ add_llvm_library(LLVMOrcJIT
   ExecutionUtils.cpp
   IndirectionUtils.cpp
   NullResolver.cpp
+  OrcCBindings.cpp
+  OrcCBindingsStack.cpp
   OrcMCJITReplacement.cpp
   OrcTargetSupport.cpp
 
diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index b439810ed330..34564e42b10f 100644
--- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -19,6 +19,9 @@
 namespace llvm {
 namespace orc {
 
+void JITCompileCallbackManager::anchor() {}
+void IndirectStubsManager::anchor() {}
+
 Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
   Constant *AddrIntVal =
     ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
@@ -37,7 +40,7 @@ GlobalVariable* createImplPointer(PointerType &PT, Module &M,
   return IP;
 }
 
-void makeStub(Function &F, GlobalVariable &ImplPointer) {
+void makeStub(Function &F, Value &ImplPointer) {
   assert(F.isDeclaration() && "Can't turn a definition into a stub.");
   assert(F.getParent() && "Function isn't in a module.");
   Module &M = *F.getParent();
@@ -61,9 +64,7 @@ class GlobalRenamer {
 public:
 
   static bool needsRenaming(const Value &New) {
-    if (!New.hasName() || New.getName().startswith("\01L"))
-      return true;
-    return false;
+    return !New.hasName() || New.getName().startswith("\01L");
   }
 
   const std::string& getRename(const Value &Orig) {
@@ -106,6 +107,9 @@ void makeAllSymbolsExternallyAccessible(Module &M) {
 
   for (auto &GV : M.globals())
     raiseVisibilityOnValue(GV, Renamer);
+
+  for (auto &A : M.aliases())
+    raiseVisibilityOnValue(A, Renamer);
 }
 
 Function* cloneFunctionDecl(Module &Dst, const Function &F,
@@ -121,7 +125,7 @@ Function* cloneFunctionDecl(Module &Dst, const Function &F,
     auto NewArgI = NewF->arg_begin();
     for (auto ArgI = F.arg_begin(), ArgE = F.arg_end(); ArgI != ArgE;
          ++ArgI, ++NewArgI)
-      (*VMap)[ArgI] = NewArgI;
+      (*VMap)[&*ArgI] = &*NewArgI;
   }
 
   return NewF;
@@ -177,5 +181,16 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
                                  nullptr, Materializer));
 }
 
+GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
+                                  ValueToValueMapTy &VMap) {
+  assert(OrigA.getAliasee() && "Original alias doesn't have an aliasee?");
+  auto *NewA = GlobalAlias::create(OrigA.getValueType(),
+                                   OrigA.getType()->getPointerAddressSpace(),
+                                   OrigA.getLinkage(), OrigA.getName(), &Dst);
+  NewA->copyAttributesFrom(&OrigA);
+  VMap[&OrigA] = NewA;
+  return NewA;
+}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/lib/ExecutionEngine/Orc/OrcCBindings.cpp b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
new file mode 100644
index 000000000000..d2379cd441d5
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/OrcCBindings.cpp
@@ -0,0 +1,97 @@
+//===----------- OrcCBindings.cpp - C bindings for the Orc APIs -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcCBindingsStack.h"
+#include "llvm-c/OrcBindings.h"
+
+using namespace llvm;
+
+LLVMOrcJITStackRef LLVMOrcCreateInstance(LLVMTargetMachineRef TM) {
+  TargetMachine *TM2(unwrap(TM));
+
+  Triple T(TM2->getTargetTriple());
+
+  auto CompileCallbackMgr = OrcCBindingsStack::createCompileCallbackMgr(T);
+  auto IndirectStubsMgrBuilder =
+    OrcCBindingsStack::createIndirectStubsMgrBuilder(T);
+
+  OrcCBindingsStack *JITStack =
+    new OrcCBindingsStack(*TM2, std::move(CompileCallbackMgr),
+			  IndirectStubsMgrBuilder);
+
+  return wrap(JITStack);
+}
+
+void LLVMOrcGetMangledSymbol(LLVMOrcJITStackRef JITStack, char **MangledName,
+                             const char *SymbolName) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  std::string Mangled = J.mangle(SymbolName);
+  *MangledName = new char[Mangled.size() + 1];
+  strcpy(*MangledName, Mangled.c_str());
+}
+
+void LLVMOrcDisposeMangledSymbol(char *MangledName) {
+  delete[] MangledName;
+}
+
+LLVMOrcTargetAddress
+LLVMOrcCreateLazyCompileCallback(LLVMOrcJITStackRef JITStack,
+                                 LLVMOrcLazyCompileCallbackFn Callback,
+                                 void *CallbackCtx) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  return J.createLazyCompileCallback(Callback, CallbackCtx);
+}
+
+void LLVMOrcCreateIndirectStub(LLVMOrcJITStackRef JITStack,
+                               const char *StubName,
+                               LLVMOrcTargetAddress InitAddr) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  J.createIndirectStub(StubName, InitAddr);
+}
+
+void LLVMOrcSetIndirectStubPointer(LLVMOrcJITStackRef JITStack,
+                                   const char *StubName,
+                                   LLVMOrcTargetAddress NewAddr) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  J.setIndirectStubPointer(StubName, NewAddr);
+}
+
+LLVMOrcModuleHandle
+LLVMOrcAddEagerlyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+                            LLVMOrcSymbolResolverFn SymbolResolver,
+                            void *SymbolResolverCtx) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  Module *M(unwrap(Mod));
+  return J.addIRModuleEager(M, SymbolResolver, SymbolResolverCtx);
+}
+
+LLVMOrcModuleHandle
+LLVMOrcAddLazilyCompiledIR(LLVMOrcJITStackRef JITStack, LLVMModuleRef Mod,
+                           LLVMOrcSymbolResolverFn SymbolResolver,
+                           void *SymbolResolverCtx) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  Module *M(unwrap(Mod));
+  return J.addIRModuleLazy(M, SymbolResolver, SymbolResolverCtx);
+}
+
+void LLVMOrcRemoveModule(LLVMOrcJITStackRef JITStack, LLVMOrcModuleHandle H) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  J.removeModule(H);
+}
+
+LLVMOrcTargetAddress LLVMOrcGetSymbolAddress(LLVMOrcJITStackRef JITStack,
+                                             const char *SymbolName) {
+  OrcCBindingsStack &J = *unwrap(JITStack);
+  auto Sym = J.findSymbol(SymbolName, true);
+  return Sym.getAddress();
+}
+
+void LLVMOrcDisposeInstance(LLVMOrcJITStackRef JITStack) {
+  delete unwrap(JITStack);
+}
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
new file mode 100644
index 000000000000..e519c7f30920
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -0,0 +1,43 @@
+//===-------- OrcCBindingsStack.cpp - Orc JIT stack for C bindings --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcCBindingsStack.h"
+
+#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include <cstdio>
+#include <system_error>
+
+using namespace llvm;
+
+std::unique_ptr<OrcCBindingsStack::CompileCallbackMgr>
+OrcCBindingsStack::createCompileCallbackMgr(Triple T) {
+  switch (T.getArch()) {
+    default: return nullptr;
+
+    case Triple::x86_64: {
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+      return llvm::make_unique<CCMgrT>(0);
+    }
+  }
+}
+
+OrcCBindingsStack::IndirectStubsManagerBuilder
+OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) {
+  switch (T.getArch()) {
+    default: return nullptr;
+
+    case Triple::x86_64:
+      return [](){
+        return llvm::make_unique<
+                 orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
+      };
+  }
+}
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
new file mode 100644
index 000000000000..2e17624ff474
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -0,0 +1,282 @@
+//===--- OrcCBindingsStack.h - Orc JIT stack for C bindings ---*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
+#define LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm-c/OrcBindings.h"
+
+namespace llvm {
+
+class OrcCBindingsStack;
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(OrcCBindingsStack, LLVMOrcJITStackRef)
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
+
+class OrcCBindingsStack {
+public:
+
+  typedef orc::JITCompileCallbackManager CompileCallbackMgr;
+  typedef orc::ObjectLinkingLayer<> ObjLayerT;
+  typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
+  typedef orc::CompileOnDemandLayer<CompileLayerT, CompileCallbackMgr> CODLayerT;
+
+  typedef std::function<std::unique_ptr<CompileCallbackMgr>()>
+    CallbackManagerBuilder;
+
+  typedef CODLayerT::IndirectStubsManagerBuilderT IndirectStubsManagerBuilder;
+
+private:
+
+  class GenericHandle {
+  public:
+    virtual ~GenericHandle() {}
+    virtual orc::JITSymbol findSymbolIn(const std::string &Name,
+                                        bool ExportedSymbolsOnly) = 0;
+    virtual void removeModule() = 0;
+  };
+
+  template <typename LayerT>
+  class GenericHandleImpl : public GenericHandle {
+  public:
+    GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle)
+      : Layer(Layer), Handle(std::move(Handle)) {}
+
+    orc::JITSymbol findSymbolIn(const std::string &Name,
+                                bool ExportedSymbolsOnly) override {
+      return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
+    }
+
+    void removeModule() override {
+      return Layer.removeModuleSet(Handle);
+    }
+
+  private:
+    LayerT &Layer;
+    typename LayerT::ModuleSetHandleT Handle;
+  };
+
+  template <typename LayerT>
+  std::unique_ptr<GenericHandleImpl<LayerT>>
+  createGenericHandle(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle) {
+    return llvm::make_unique<GenericHandleImpl<LayerT>>(Layer,
+                                                        std::move(Handle));
+  }
+
+public:
+
+  // We need a 'ModuleSetHandleT' to conform to the layer concept.
+  typedef unsigned ModuleSetHandleT;
+
+  typedef unsigned ModuleHandleT;
+
+  static std::unique_ptr<CompileCallbackMgr> createCompileCallbackMgr(Triple T);
+  static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T);
+
+  OrcCBindingsStack(TargetMachine &TM,
+		    std::unique_ptr<CompileCallbackMgr> CCMgr, 
+                    IndirectStubsManagerBuilder IndirectStubsMgrBuilder)
+    : DL(TM.createDataLayout()), CCMgr(std::move(CCMgr)),
+      ObjectLayer(),
+      CompileLayer(ObjectLayer, orc::SimpleCompiler(TM)),
+      CODLayer(CompileLayer,
+               [](Function &F) { std::set<Function*> S; S.insert(&F); return S; },
+               *this->CCMgr, std::move(IndirectStubsMgrBuilder), false),
+      IndirectStubsMgr(IndirectStubsMgrBuilder()),
+      CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
+
+  ~OrcCBindingsStack() {
+    // Run any destructors registered with __cxa_atexit.
+    CXXRuntimeOverrides.runDestructors();
+    // Run any IR destructors.
+    for (auto &DtorRunner : IRStaticDestructorRunners)
+      DtorRunner.runViaLayer(*this);
+  }
+
+  std::string mangle(StringRef Name) {
+    std::string MangledName;
+    {
+      raw_string_ostream MangledNameStream(MangledName);
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
+    }
+    return MangledName;
+  }
+
+  template <typename PtrTy>
+  static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+    return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
+  }
+
+  orc::TargetAddress
+  createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
+                            void *CallbackCtx) {
+    auto CCInfo = CCMgr->getCompileCallback();
+    CCInfo.setCompileAction(
+      [=]() -> orc::TargetAddress {
+        return Callback(wrap(this), CallbackCtx);
+      });
+    return CCInfo.getAddress();
+  }
+
+  void createIndirectStub(StringRef StubName, orc::TargetAddress Addr) {
+    IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported);
+  }
+
+  void setIndirectStubPointer(StringRef Name, orc::TargetAddress Addr) {
+    IndirectStubsMgr->updatePointer(Name, Addr);
+  }
+
+  std::shared_ptr<RuntimeDyld::SymbolResolver>
+  createResolver(LLVMOrcSymbolResolverFn ExternalResolver,
+                 void *ExternalResolverCtx) {
+    auto Resolver = orc::createLambdaResolver(
+      [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) {
+        // Search order:
+        // 1. JIT'd symbols.
+        // 2. Runtime overrides.
+        // 3. External resolver (if present).
+
+        if (auto Sym = CODLayer.findSymbol(Name, true))
+          return RuntimeDyld::SymbolInfo(Sym.getAddress(),
+                                         Sym.getFlags());
+        if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
+          return Sym;
+
+        if (ExternalResolver)
+          return RuntimeDyld::SymbolInfo(ExternalResolver(Name.c_str(),
+                                                          ExternalResolverCtx),
+                                         llvm::JITSymbolFlags::Exported);
+
+        return RuntimeDyld::SymbolInfo(nullptr);
+      },
+      [](const std::string &Name) {
+        return RuntimeDyld::SymbolInfo(nullptr);
+      }
+    );
+
+    return std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(Resolver));
+  }
+
+  template <typename LayerT>
+  ModuleHandleT addIRModule(LayerT &Layer,
+                            Module *M,
+                            std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr,
+                            LLVMOrcSymbolResolverFn ExternalResolver,
+                            void *ExternalResolverCtx) {
+
+    // Attach a data-layout if one isn't already present.
+    if (M->getDataLayout().isDefault())
+      M->setDataLayout(DL);
+
+    // Record the static constructors and destructors. We have to do this before
+    // we hand over ownership of the module to the JIT.
+    std::vector<std::string> CtorNames, DtorNames;
+    for (auto Ctor : orc::getConstructors(*M))
+      CtorNames.push_back(mangle(Ctor.Func->getName()));
+    for (auto Dtor : orc::getDestructors(*M))
+      DtorNames.push_back(mangle(Dtor.Func->getName()));
+
+    // Create the resolver.
+    auto Resolver = createResolver(ExternalResolver, ExternalResolverCtx);
+
+    // Add the module to the JIT.
+    std::vector<Module*> S;
+    S.push_back(std::move(M));
+
+    auto LH = Layer.addModuleSet(std::move(S), std::move(MemMgr),
+                                 std::move(Resolver));
+    ModuleHandleT H = createHandle(Layer, LH);
+
+    // Run the static constructors, and save the static destructor runner for
+    // execution when the JIT is torn down.
+    orc::CtorDtorRunner<OrcCBindingsStack> CtorRunner(std::move(CtorNames), H);
+    CtorRunner.runViaLayer(*this);
+
+    IRStaticDestructorRunners.emplace_back(std::move(DtorNames), H);
+
+    return H;
+  }
+
+  ModuleHandleT addIRModuleEager(Module* M,
+                                 LLVMOrcSymbolResolverFn ExternalResolver,
+                                 void *ExternalResolverCtx) {
+    return addIRModule(CompileLayer, std::move(M),
+                       llvm::make_unique<SectionMemoryManager>(),
+                       std::move(ExternalResolver), ExternalResolverCtx);
+  }
+
+  ModuleHandleT addIRModuleLazy(Module* M,
+                                LLVMOrcSymbolResolverFn ExternalResolver,
+                                void *ExternalResolverCtx) {
+    return addIRModule(CODLayer, std::move(M), nullptr,
+                       std::move(ExternalResolver), ExternalResolverCtx);
+  }
+
+  void removeModule(ModuleHandleT H) {
+    GenericHandles[H]->removeModule();
+    GenericHandles[H] = nullptr;
+    FreeHandleIndexes.push_back(H);
+  }
+
+  orc::JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
+      return Sym;
+    return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
+  }
+
+  orc::JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name,
+                              bool ExportedSymbolsOnly) {
+    return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly);
+  }
+
+private:
+
+  template <typename LayerT>
+  unsigned createHandle(LayerT &Layer,
+                        typename LayerT::ModuleSetHandleT Handle) {
+    unsigned NewHandle;
+    if (!FreeHandleIndexes.empty()) {
+      NewHandle = FreeHandleIndexes.back();
+      FreeHandleIndexes.pop_back();
+      GenericHandles[NewHandle] = createGenericHandle(Layer, std::move(Handle));
+      return NewHandle;
+    } else {
+      NewHandle = GenericHandles.size();
+      GenericHandles.push_back(createGenericHandle(Layer, std::move(Handle)));
+    }
+    return NewHandle;
+  }
+
+  DataLayout DL;
+  SectionMemoryManager CCMgrMemMgr;
+
+  std::unique_ptr<CompileCallbackMgr> CCMgr;
+  ObjLayerT ObjectLayer;
+  CompileLayerT CompileLayer;
+  CODLayerT CODLayer;
+
+  std::unique_ptr<orc::IndirectStubsManager> IndirectStubsMgr;
+
+  std::vector<std::unique_ptr<GenericHandle>> GenericHandles;
+  std::vector<unsigned> FreeHandleIndexes;
+
+  orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
+  std::vector<orc::CtorDtorRunner<OrcCBindingsStack>> IRStaticDestructorRunners;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_EXECUTIONENGINE_ORC_ORCCBINDINGSSTACK_H
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 7dc5164c419a..38a27cff5b2f 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -137,25 +137,26 @@ public:
   }
 
   OrcMCJITReplacement(
-                    std::shared_ptr<MCJITMemoryManager> MemMgr,
-                    std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
-                    std::unique_ptr<TargetMachine> TM)
-      : TM(std::move(TM)), MemMgr(*this, std::move(MemMgr)),
-        Resolver(*this), ClientResolver(std::move(ClientResolver)),
-        NotifyObjectLoaded(*this), NotifyFinalized(*this),
+      std::shared_ptr<MCJITMemoryManager> MemMgr,
+      std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+      std::unique_ptr<TargetMachine> TM)
+      : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
+        MemMgr(*this, std::move(MemMgr)), Resolver(*this),
+        ClientResolver(std::move(ClientResolver)), NotifyObjectLoaded(*this),
+        NotifyFinalized(*this),
         ObjectLayer(NotifyObjectLoaded, NotifyFinalized),
         CompileLayer(ObjectLayer, SimpleCompiler(*this->TM)),
-        LazyEmitLayer(CompileLayer) {
-    setDataLayout(this->TM->getDataLayout());
-  }
+        LazyEmitLayer(CompileLayer) {}
 
   void addModule(std::unique_ptr<Module> M) override {
 
     // If this module doesn't have a DataLayout attached then attach the
     // default.
-    if (M->getDataLayout().isDefault())
-      M->setDataLayout(*getDataLayout());
-
+    if (M->getDataLayout().isDefault()) {
+      M->setDataLayout(getDataLayout());
+    } else {
+      assert(M->getDataLayout() == getDataLayout() && "DataLayout Mismatch");
+    }
     Modules.push_back(std::move(M));
     std::vector<Module *> Ms;
     Ms.push_back(&*Modules.back());
@@ -174,12 +175,7 @@ public:
     std::tie(Obj, Buf) = O.takeBinary();
     std::vector<std::unique_ptr<object::ObjectFile>> Objs;
     Objs.push_back(std::move(Obj));
-    auto H =
-      ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
-
-    std::vector<std::unique_ptr<MemoryBuffer>> Bufs;
-    Bufs.push_back(std::move(Buf));
-    ObjectLayer.takeOwnershipOfBuffers(H, std::move(Bufs));
+    ObjectLayer.addObjectSet(std::move(Objs), &MemMgr, &Resolver);
   }
 
   void addArchive(object::OwningBinary<object::Archive> A) override {
@@ -234,6 +230,10 @@ public:
     CompileLayer.setObjectCache(NewCache);
   }
 
+  void setProcessAllSections(bool ProcessAllSections) override {
+    ObjectLayer.setProcessAllSections(ProcessAllSections);
+  }
+
 private:
 
   RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
@@ -252,10 +252,12 @@ private:
       object::Archive *A = OB.getBinary();
       // Look for our symbols in each Archive
       object::Archive::child_iterator ChildIt = A->findSym(Name);
+      if (std::error_code EC = ChildIt->getError())
+        report_fatal_error(EC.message());
       if (ChildIt != A->child_end()) {
         // FIXME: Support nested archives?
         ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr =
-            ChildIt->getAsBinary();
+            (*ChildIt)->getAsBinary();
         if (ChildBinOrErr.getError())
           continue;
         std::unique_ptr<object::Binary> &ChildBin = ChildBinOrErr.get();
@@ -289,7 +291,7 @@ private:
              "Incorrect number of Infos for Objects.");
       for (unsigned I = 0; I < Objects.size(); ++I)
         M.MemMgr.notifyObjectLoaded(&M, *Objects[I]);
-    };
+    }
 
   private:
     OrcMCJITReplacement &M;
@@ -310,7 +312,7 @@ private:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mang.getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
+      Mang.getNameWithPrefix(MangledNameStream, Name, getDataLayout());
     }
     return MangledName;
   }
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index 258868aa64f6..b931f10b9d78 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -1,137 +1,170 @@
+//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 #include "llvm/ADT/Triple.h"
 #include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Process.h"
 #include <array>
 
-using namespace llvm::orc;
-
-namespace {
-
-uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
-                                TargetAddress CallbackID) {
-  return JCBM->executeCompileCallback(CallbackID);
-}
-
-}
-
 namespace llvm {
 namespace orc {
 
-const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
+void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+                                  void *CallbackMgr) {
 
-void OrcX86_64::insertResolverBlock(
-    Module &M, JITCompileCallbackManagerBase &JCBM) {
+  const uint8_t ResolverCode[] = {
+                                               // resolver_entry:
+    0x55,                                      // 0x00: pushq     %rbp
+    0x48, 0x89, 0xe5,                          // 0x01: movq      %rsp, %rbp
+    0x50,                                      // 0x04: pushq     %rax
+    0x53,                                      // 0x05: pushq     %rbx
+    0x51,                                      // 0x06: pushq     %rcx
+    0x52,                                      // 0x07: pushq     %rdx
+    0x56,                                      // 0x08: pushq     %rsi
+    0x57,                                      // 0x09: pushq     %rdi
+    0x41, 0x50,                                // 0x0a: pushq     %r8
+    0x41, 0x51,                                // 0x0c: pushq     %r9
+    0x41, 0x52,                                // 0x0e: pushq     %r10
+    0x41, 0x53,                                // 0x10: pushq     %r11
+    0x41, 0x54,                                // 0x12: pushq     %r12
+    0x41, 0x55,                                // 0x14: pushq     %r13
+    0x41, 0x56,                                // 0x16: pushq     %r14
+    0x41, 0x57,                                // 0x18: pushq     %r15
+    0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00,  // 0x1a: subq      20, %rsp
+    0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64  (%rsp)
+    0x48, 0x8d, 0x3d, 0x43, 0x00, 0x00, 0x00,  // 0x26: leaq      67(%rip), %rdi
+    0x48, 0x8b, 0x3f,                          // 0x2d: movq      (%rdi), %rdi
+    0x48, 0x8b, 0x75, 0x08,                    // 0x30: movq      8(%rbp), %rsi
+    0x48, 0x83, 0xee, 0x06,                    // 0x34: subq      $6, %rsi
+    0x48, 0xb8,                                // 0x38: movabsq   $0, %rax
 
-  // Trampoline code-sequence length, used to get trampoline address from return
-  // address.
-  const unsigned X86_64_TrampolineLength = 6;
+    // 0x3a: JIT re-entry fn addr:
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
-  // List of x86-64 GPRs to save. Note - RBP saved separately below.
-  std::array<const char *, 14> GPRs = {{
-      "rax", "rbx", "rcx", "rdx",
-      "rsi", "rdi", "r8", "r9",
-      "r10", "r11", "r12", "r13",
-      "r14", "r15"
-    }};
+    0xff, 0xd0,                                // 0x42: callq     *%rax
+    0x48, 0x89, 0x45, 0x08,                    // 0x44: movq      %rax, 8(%rbp)
+    0x48, 0x0f, 0xae, 0x0c, 0x24,              // 0x48: fxrstor64 (%rsp)
+    0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00,  // 0x4d: addq      20, %rsp
+    0x41, 0x5f,                                // 0x54: popq      %r15
+    0x41, 0x5e,                                // 0x56: popq      %r14
+    0x41, 0x5d,                                // 0x58: popq      %r13
+    0x41, 0x5c,                                // 0x5a: popq      %r12
+    0x41, 0x5b,                                // 0x5c: popq      %r11
+    0x41, 0x5a,                                // 0x5e: popq      %r10
+    0x41, 0x59,                                // 0x60: popq      %r9
+    0x41, 0x58,                                // 0x62: popq      %r8
+    0x5f,                                      // 0x64: popq      %rdi
+    0x5e,                                      // 0x65: popq      %rsi
+    0x5a,                                      // 0x66: popq      %rdx
+    0x59,                                      // 0x67: popq      %rcx
+    0x5b,                                      // 0x68: popq      %rbx
+    0x58,                                      // 0x69: popq      %rax
+    0x5d,                                      // 0x6a: popq      %rbp
+    0xc3,                                      // 0x6b: retq
+    0x00, 0x00, 0x00, 0x00,                    // 0x6c: <padding>
 
-  // Address of the executeCompileCallback function.
-  uint64_t CallbackAddr =
-      static_cast<uint64_t>(
-        reinterpret_cast<uintptr_t>(executeCompileCallback));
-
-  std::ostringstream AsmStream;
-  Triple TT(M.getTargetTriple());
-
-  // Switch to text section.
-  if (TT.getOS() == Triple::Darwin)
-    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
-              << ".align 4, 0x90\n";
-  else
-    AsmStream << ".text\n"
-              << ".align 16, 0x90\n";
-
-  // Bake in a pointer to the callback manager immediately before the
-  // start of the resolver function.
-  AsmStream << "jit_callback_manager_addr:\n"
-            << "  .quad " << &JCBM << "\n";
-
-  // Start the resolver function.
-  AsmStream << ResolverBlockName << ":\n"
-            << "  pushq     %rbp\n"
-            << "  movq      %rsp, %rbp\n";
-
-  // Store the GPRs.
-  for (const auto &GPR : GPRs)
-    AsmStream << "  pushq     %" << GPR << "\n";
-
-  // Store floating-point state with FXSAVE.
-  // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
-  //       number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
-  //       an extra 64 bits of padding to the FXSave area.
-  unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
-  unsigned FXSaveSize = 512 + Padding;
-  AsmStream << "  subq      $" << FXSaveSize << ", %rsp\n"
-            << "  fxsave64  (%rsp)\n"
-
-  // Load callback manager address, compute trampoline address, call JIT.
-            << "  lea       jit_callback_manager_addr(%rip), %rdi\n"
-            << "  movq      (%rdi), %rdi\n"
-            << "  movq      0x8(%rbp), %rsi\n"
-            << "  subq      $" << X86_64_TrampolineLength << ", %rsi\n"
-            << "  movabsq   $" << CallbackAddr << ", %rax\n"
-            << "  callq     *%rax\n"
-
-  // Replace the return to the trampoline with the return address of the
-  // compiled function body.
-            << "  movq      %rax, 0x8(%rbp)\n"
-
-  // Restore the floating point state.
-            << "  fxrstor64 (%rsp)\n"
-            << "  addq      $" << FXSaveSize << ", %rsp\n";
-
-  for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
-    AsmStream << "  popq      %" << GPR << "\n";
-
-  // Restore original RBP and return to compiled function body.
-  AsmStream << "  popq      %rbp\n"
-            << "  retq\n";
-
-  M.appendModuleInlineAsm(AsmStream.str());
-}
-
-OrcX86_64::LabelNameFtor
-OrcX86_64::insertCompileCallbackTrampolines(Module &M,
-                                            TargetAddress ResolverBlockAddr,
-                                            unsigned NumCalls,
-                                            unsigned StartIndex) {
-  const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
-
-  std::ostringstream AsmStream;
-  Triple TT(M.getTargetTriple());
-
-  if (TT.getOS() == Triple::Darwin)
-    AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
-              << ".align 4, 0x90\n";
-  else
-    AsmStream << ".text\n"
-              << ".align 16, 0x90\n";
-
-  AsmStream << ResolverBlockPtrName << ":\n"
-            << "  .quad " << ResolverBlockAddr << "\n";
-
-  auto GetLabelName =
-    [=](unsigned I) {
-      std::ostringstream LabelStream;
-      LabelStream << "orc_jcc_" << (StartIndex + I);
-      return LabelStream.str();
+    // 0x70: Callback mgr address.
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   };
 
-  for (unsigned I = 0; I < NumCalls; ++I)
-    AsmStream << GetLabelName(I) << ":\n"
-              << "  callq *" << ResolverBlockPtrName << "(%rip)\n";
+  const unsigned ReentryFnAddrOffset = 0x3a;
+  const unsigned CallbackMgrAddrOffset = 0x70;
+  
+  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+         sizeof(CallbackMgr));
+}
 
-  M.appendModuleInlineAsm(AsmStream.str());
+void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+				 unsigned NumTrampolines) {
 
-  return GetLabelName;
+  unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
+
+  memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void*));
+
+  uint64_t *Trampolines = reinterpret_cast<uint64_t*>(TrampolineMem);
+  uint64_t CallIndirPCRel = 0xf1c40000000015ff;
+
+  for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
+    Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
+}
+
+std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+                                                  unsigned MinStubs,
+                                                  void *InitialPtrVal) {
+  // Stub format is:
+  //
+  // .section __orc_stubs
+  // stub1:
+  //                 jmpq    *ptr1(%rip)
+  //                 .byte   0xC4         ; <- Invalid opcode padding.
+  //                 .byte   0xF1
+  // stub2:
+  //                 jmpq    *ptr2(%rip)
+  //
+  // ...
+  //
+  // .section __orc_ptrs
+  // ptr1:
+  //                 .quad 0x0
+  // ptr2:
+  //                 .quad 0x0
+  //
+  // ...
+
+  const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+  // Emit at least MinStubs, rounded up to fill the pages allocated.
+  unsigned PageSize = sys::Process::getPageSize();
+  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+  unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+  // Allocate memory for stubs and pointers in one call.
+  std::error_code EC;
+  auto StubsMem =
+    sys::OwningMemoryBlock(
+      sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr,
+                                        sys::Memory::MF_READ |
+                                        sys::Memory::MF_WRITE,
+                                        EC));
+
+  if (EC)
+    return EC;
+
+  // Create separate MemoryBlocks representing the stubs and pointers.
+  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+  sys::MemoryBlock PtrsBlock(static_cast<char*>(StubsMem.base()) +
+                               NumPages * PageSize,
+                             NumPages * PageSize);
+
+  // Populate the stubs page stubs and mark it executable.
+  uint64_t *Stub = reinterpret_cast<uint64_t*>(StubsBlock.base());
+  uint64_t PtrOffsetField =
+    static_cast<uint64_t>(NumPages * PageSize - 6) << 16;
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
+
+  if (auto EC = sys::Memory::protectMappedMemory(StubsBlock,
+                                                 sys::Memory::MF_READ |
+                                                 sys::Memory::MF_EXEC))
+    return EC;
+
+  // Initialize all pointers to point at FailureAddress.
+  void **Ptr = reinterpret_cast<void**>(PtrsBlock.base());
+  for (unsigned I = 0; I < NumStubs; ++I)
+    Ptr[I] = InitialPtrVal;
+
+  StubsInfo.NumStubs = NumStubs;
+  StubsInfo.StubsMem = std::move(StubsMem);
+
+  return std::error_code();
 }
 
 } // End namespace orc.
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 93287a3a4e71..a95f3bbe4179 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -41,20 +41,21 @@ void RuntimeDyldImpl::deregisterEHFrames() {}
 
 #ifndef NDEBUG
 static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
-  dbgs() << "----- Contents of section " << S.Name << " " << State << " -----";
+  dbgs() << "----- Contents of section " << S.getName() << " " << State
+         << " -----";
 
-  if (S.Address == nullptr) {
+  if (S.getAddress() == nullptr) {
     dbgs() << "\n          <section not emitted>\n";
     return;
   }
 
   const unsigned ColsPerRow = 16;
 
-  uint8_t *DataAddr = S.Address;
-  uint64_t LoadAddr = S.LoadAddress;
+  uint8_t *DataAddr = S.getAddress();
+  uint64_t LoadAddr = S.getLoadAddress();
 
   unsigned StartPadding = LoadAddr & (ColsPerRow - 1);
-  unsigned BytesRemaining = S.Size;
+  unsigned BytesRemaining = S.getSize();
 
   if (StartPadding) {
     dbgs() << "\n" << format("0x%016" PRIx64,
@@ -82,30 +83,41 @@ static void dumpSectionMemory(const SectionEntry &S, StringRef State) {
 void RuntimeDyldImpl::resolveRelocations() {
   MutexGuard locked(lock);
 
+  // Print out the sections prior to relocation.
+  DEBUG(
+    for (int i = 0, e = Sections.size(); i != e; ++i)
+      dumpSectionMemory(Sections[i], "before relocations");
+  );
+
   // First, resolve relocations associated with external symbols.
   resolveExternalSymbols();
 
-  // Just iterate over the sections we have and resolve all the relocations
-  // in them. Gross overkill, but it gets the job done.
-  for (int i = 0, e = Sections.size(); i != e; ++i) {
+  // Iterate over all outstanding relocations
+  for (auto it = Relocations.begin(), e = Relocations.end(); it != e; ++it) {
     // The Section here (Sections[i]) refers to the section in which the
     // symbol for the relocation is located.  The SectionID in the relocation
     // entry provides the section to which the relocation will be applied.
-    uint64_t Addr = Sections[i].LoadAddress;
-    DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t"
+    int Idx = it->first;
+    uint64_t Addr = Sections[Idx].getLoadAddress();
+    DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t"
                  << format("%p", (uintptr_t)Addr) << "\n");
-    DEBUG(dumpSectionMemory(Sections[i], "before relocations"));
-    resolveRelocationList(Relocations[i], Addr);
-    DEBUG(dumpSectionMemory(Sections[i], "after relocations"));
-    Relocations.erase(i);
+    resolveRelocationList(it->second, Addr);
   }
+  Relocations.clear();
+
+  // Print out sections after relocation.
+  DEBUG(
+    for (int i = 0, e = Sections.size(); i != e; ++i)
+      dumpSectionMemory(Sections[i], "after relocations");
+  );
+
 }
 
 void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
                                         uint64_t TargetAddress) {
   MutexGuard locked(lock);
   for (unsigned i = 0, e = Sections.size(); i != e; ++i) {
-    if (Sections[i].Address == LocalAddress) {
+    if (Sections[i].getAddress() == LocalAddress) {
       reassignSectionAddress(i, TargetAddress);
       return;
     }
@@ -122,14 +134,10 @@ static std::error_code getOffset(const SymbolRef &Sym, SectionRef Sec,
   return std::error_code();
 }
 
-std::pair<unsigned, unsigned>
+RuntimeDyldImpl::ObjSectionToIDMap
 RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
   MutexGuard locked(lock);
 
-  // Grab the first Section ID. We'll use this later to construct the underlying
-  // range for the returned LoadedObjectInfo.
-  unsigned SectionsAddedBeginIdx = Sections.size();
-
   // Save information about our target
   Arch = (Triple::ArchType)Obj.getArch();
   IsTargetLittleEndian = Obj.isLittleEndian();
@@ -155,39 +163,56 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
        ++I) {
     uint32_t Flags = I->getFlags();
 
-    bool IsCommon = Flags & SymbolRef::SF_Common;
-    if (IsCommon)
+    if (Flags & SymbolRef::SF_Common)
       CommonSymbols.push_back(*I);
     else {
       object::SymbolRef::Type SymType = I->getType();
 
-      if (SymType == object::SymbolRef::ST_Function ||
-          SymType == object::SymbolRef::ST_Data ||
-          SymType == object::SymbolRef::ST_Unknown) {
+      // Get symbol name.
+      ErrorOr<StringRef> NameOrErr = I->getName();
+      Check(NameOrErr.getError());
+      StringRef Name = *NameOrErr;
+  
+      // Compute JIT symbol flags.
+      JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
+      if (Flags & SymbolRef::SF_Weak)
+        RTDyldSymFlags |= JITSymbolFlags::Weak;
+      if (Flags & SymbolRef::SF_Exported)
+        RTDyldSymFlags |= JITSymbolFlags::Exported;
 
-        ErrorOr<StringRef> NameOrErr = I->getName();
-        Check(NameOrErr.getError());
-        StringRef Name = *NameOrErr;
-        section_iterator SI = Obj.section_end();
-        Check(I->getSection(SI));
+      if (Flags & SymbolRef::SF_Absolute &&
+          SymType != object::SymbolRef::ST_File) {
+        auto Addr = I->getAddress();
+        Check(Addr.getError());
+        uint64_t SectOffset = *Addr;
+        unsigned SectionID = AbsoluteSymbolSection;
+
+        DEBUG(dbgs() << "\tType: " << SymType << " (absolute) Name: " << Name
+                     << " SID: " << SectionID << " Offset: "
+                     << format("%p", (uintptr_t)SectOffset)
+                     << " flags: " << Flags << "\n");
+        GlobalSymbolTable[Name] =
+          SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
+      } else if (SymType == object::SymbolRef::ST_Function ||
+                 SymType == object::SymbolRef::ST_Data ||
+                 SymType == object::SymbolRef::ST_Unknown ||
+                 SymType == object::SymbolRef::ST_Other) {
+
+        ErrorOr<section_iterator> SIOrErr = I->getSection();
+        Check(SIOrErr.getError());
+        section_iterator SI = *SIOrErr;
         if (SI == Obj.section_end())
           continue;
+        // Get symbol offset.
         uint64_t SectOffset;
         Check(getOffset(*I, *SI, SectOffset));
-        StringRef SectionData;
-        Check(SI->getContents(SectionData));
         bool IsCode = SI->isText();
-        unsigned SectionID =
-            findOrEmitSection(Obj, *SI, IsCode, LocalSections);
+        unsigned SectionID = findOrEmitSection(Obj, *SI, IsCode, LocalSections);
+
         DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name
                      << " SID: " << SectionID << " Offset: "
                      << format("%p", (uintptr_t)SectOffset)
                      << " flags: " << Flags << "\n");
-        JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
-        if (Flags & SymbolRef::SF_Weak)
-          RTDyldSymFlags |= JITSymbolFlags::Weak;
-        if (Flags & SymbolRef::SF_Exported)
-          RTDyldSymFlags |= JITSymbolFlags::Exported;
         GlobalSymbolTable[Name] =
           SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
       }
@@ -231,9 +256,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
   // Give the subclasses a chance to tie-up any loose ends.
   finalizeLoad(Obj, LocalSections);
 
-  unsigned SectionsAddedEndIdx = Sections.size();
+//   for (auto E : LocalSections)
+//     llvm::dbgs() << "Added: " << E.first.getRawDataRefImpl() << " -> " << E.second << "\n";
 
-  return std::make_pair(SectionsAddedBeginIdx, SectionsAddedEndIdx);
+  return LocalSections;
 }
 
 // A helper method for computeTotalAllocSize.
@@ -406,10 +432,9 @@ unsigned RuntimeDyldImpl::computeSectionStubBufSize(const ObjectFile &Obj,
     if (!(RelSecI == Section))
       continue;
 
-    for (const RelocationRef &Reloc : SI->relocations()) {
-      (void)Reloc;
-      StubBufSize += StubSize;
-    }
+    for (const RelocationRef &Reloc : SI->relocations())
+      if (relocationNeedsStub(Reloc))
+        StubBufSize += StubSize;
   }
 
   // Get section data size and alignment
@@ -492,7 +517,8 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
-  Sections.push_back(SectionEntry("<common symbols>", Addr, CommonSize, 0));
+  Sections.push_back(
+      SectionEntry("<common symbols>", Addr, CommonSize, CommonSize, 0));
   memset(Addr, 0, CommonSize);
 
   DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID << " new addr: "
@@ -524,6 +550,9 @@ void RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
     Offset += Size;
     Addr += Size;
   }
+
+  if (Checker)
+    Checker->registerSection(Obj.getFileName(), SectionID);
 }
 
 unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
@@ -556,12 +585,20 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
   uint8_t *Addr;
   const char *pData = nullptr;
 
-  // In either case, set the location of the unrelocated section in memory,
-  // since we still process relocations for it even if we're not applying them.
-  Check(Section.getContents(data));
-  // Virtual sections have no data in the object image, so leave pData = 0
-  if (!IsVirtual)
+  // If this section contains any bits (i.e. isn't a virtual or bss section),
+  // grab a reference to them.
+  if (!IsVirtual && !IsZeroInit) {
+    // In either case, set the location of the unrelocated section in memory,
+    // since we still process relocations for it even if we're not applying them.
+    Check(Section.getContents(data));
     pData = data.data();
+  }
+
+  // Code section alignment needs to be at least as high as stub alignment or
+  // padding calculations may by incorrect when the section is remapped to a
+  // higher alignment.
+  if (IsCode)
+    Alignment = std::max(Alignment, getStubAlignment());
 
   // Some sections, such as debug info, don't need to be loaded for execution.
   // Leave those where they are.
@@ -606,7 +643,8 @@ unsigned RuntimeDyldImpl::emitSection(const ObjectFile &Obj,
                  << " Allocate: " << Allocate << "\n");
   }
 
-  Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
+  Sections.push_back(
+      SectionEntry(Name, Addr, DataSize, Allocate, (uintptr_t)pData));
 
   if (Checker)
     Checker->registerSection(Obj.getFileName(), SectionID);
@@ -742,11 +780,11 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
   // Addr is a uint64_t because we can't assume the pointer width
   // of the target is the same as that of the host. Just use a generic
   // "big enough" type.
-  DEBUG(dbgs() << "Reassigning address for section "
-               << SectionID << " (" << Sections[SectionID].Name << "): "
-               << format("0x%016" PRIx64, Sections[SectionID].LoadAddress) << " -> "
-               << format("0x%016" PRIx64, Addr) << "\n");
-  Sections[SectionID].LoadAddress = Addr;
+  DEBUG(dbgs() << "Reassigning address for section " << SectionID << " ("
+               << Sections[SectionID].getName() << "): "
+               << format("0x%016" PRIx64, Sections[SectionID].getLoadAddress())
+               << " -> " << format("0x%016" PRIx64, Addr) << "\n");
+  Sections[SectionID].setLoadAddress(Addr);
 }
 
 void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
@@ -754,7 +792,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
     const RelocationEntry &RE = Relocs[i];
     // Ignore relocations for sections that were not loaded
-    if (Sections[RE.SectionID].Address == nullptr)
+    if (Sections[RE.SectionID].getAddress() == nullptr)
       continue;
     resolveRelocation(RE, Value);
   }
@@ -818,10 +856,11 @@ void RuntimeDyldImpl::resolveExternalSymbols() {
 // RuntimeDyld class implementation
 
 uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
-                                                  StringRef SectionName) const {
-  for (unsigned I = BeginIdx; I != EndIdx; ++I)
-    if (RTDyld.Sections[I].Name == SectionName)
-      return RTDyld.Sections[I].LoadAddress;
+                                          const object::SectionRef &Sec) const {
+
+  auto I = ObjSecToIDMap.find(Sec);
+  if (I != ObjSecToIDMap.end())
+    return RTDyld.Sections[I->second].getLoadAddress();
 
   return 0;
 }
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1dacc1393f2c..e5fab929ea29 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "RuntimeDyldCOFF.h"
+#include "Targets/RuntimeDyldCOFFI386.h"
 #include "Targets/RuntimeDyldCOFFX86_64.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -24,12 +25,11 @@ using namespace llvm::object;
 
 namespace {
 
-class LoadedCOFFObjectInfo
+class LoadedCOFFObjectInfo final
     : public RuntimeDyld::LoadedObjectInfoHelper<LoadedCOFFObjectInfo> {
 public:
-  LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
-                       unsigned EndIdx)
-      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+  LoadedCOFFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+      : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
 
   OwningBinary<ObjectFile>
   getObjectForDebug(const ObjectFile &Obj) const override {
@@ -48,6 +48,8 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
     break;
+  case Triple::x86:
+    return make_unique<RuntimeDyldCOFFI386>(MemMgr, Resolver);
   case Triple::x86_64:
     return make_unique<RuntimeDyldCOFFX86_64>(MemMgr, Resolver);
   }
@@ -55,10 +57,7 @@ llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
 
 std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyldCOFF::loadObject(const object::ObjectFile &O) {
-  unsigned SectionStartIdx, SectionEndIdx;
-  std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
-  return llvm::make_unique<LoadedCOFFObjectInfo>(*this, SectionStartIdx,
-                                                 SectionEndIdx);
+  return llvm::make_unique<LoadedCOFFObjectInfo>(*this, loadObjectImpl(O));
 }
 
 uint64_t RuntimeDyldCOFF::getSymbolOffset(const SymbolRef &Sym) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index ae199b720223..58ce88a68f23 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -727,7 +727,7 @@ bool RuntimeDyldCheckerImpl::checkAllRulesInBuffer(StringRef RulePrefix,
 }
 
 bool RuntimeDyldCheckerImpl::isSymbolValid(StringRef Symbol) const {
-  if (getRTDyld().getSymbolLocalAddress(Symbol))
+  if (getRTDyld().getSymbol(Symbol))
     return true;
   return !!getRTDyld().Resolver.findSymbol(Symbol);
 }
@@ -799,11 +799,10 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getSectionAddr(
   unsigned SectionID = SectionInfo->SectionID;
   uint64_t Addr;
   if (IsInsideLoad)
-    Addr =
-      static_cast<uint64_t>(
-        reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address));
+    Addr = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(
+        getRTDyld().Sections[SectionID].getAddress()));
   else
-    Addr = getRTDyld().Sections[SectionID].LoadAddress;
+    Addr = getRTDyld().Sections[SectionID].getLoadAddress();
 
   return std::make_pair(Addr, std::string(""));
 }
@@ -835,11 +834,11 @@ std::pair<uint64_t, std::string> RuntimeDyldCheckerImpl::getStubAddrFor(
 
   uint64_t Addr;
   if (IsInsideLoad) {
-    uintptr_t SectionBase =
-        reinterpret_cast<uintptr_t>(getRTDyld().Sections[SectionID].Address);
+    uintptr_t SectionBase = reinterpret_cast<uintptr_t>(
+        getRTDyld().Sections[SectionID].getAddress());
     Addr = static_cast<uint64_t>(SectionBase) + StubOffset;
   } else {
-    uint64_t SectionBase = getRTDyld().Sections[SectionID].LoadAddress;
+    uint64_t SectionBase = getRTDyld().Sections[SectionID].getLoadAddress();
     Addr = SectionBase + StubOffset;
   }
 
@@ -855,16 +854,16 @@ RuntimeDyldCheckerImpl::getSubsectionStartingAt(StringRef Name) const {
   const auto &SymInfo = pos->second;
   uint8_t *SectionAddr = getRTDyld().getSectionAddress(SymInfo.getSectionID());
   return StringRef(reinterpret_cast<const char *>(SectionAddr) +
-                     SymInfo.getOffset(),
-                   getRTDyld().Sections[SymInfo.getSectionID()].Size -
-                     SymInfo.getOffset());
+                       SymInfo.getOffset(),
+                   getRTDyld().Sections[SymInfo.getSectionID()].getSize() -
+                       SymInfo.getOffset());
 }
 
 void RuntimeDyldCheckerImpl::registerSection(
     StringRef FilePath, unsigned SectionID) {
   StringRef FileName = sys::path::filename(FilePath);
   const SectionEntry &Section = getRTDyld().Sections[SectionID];
-  StringRef SectionName = Section.Name;
+  StringRef SectionName = Section.getName();
 
   Stubs[FileName][SectionName].SectionID = SectionID;
 }
@@ -874,7 +873,7 @@ void RuntimeDyldCheckerImpl::registerStubMap(
     const RuntimeDyldImpl::StubMap &RTDyldStubs) {
   StringRef FileName = sys::path::filename(FilePath);
   const SectionEntry &Section = getRTDyld().Sections[SectionID];
-  StringRef SectionName = Section.Name;
+  StringRef SectionName = Section.getName();
 
   Stubs[FileName][SectionName].SectionID = SectionID;
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 3787950b3b08..e09b71af18a5 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -66,7 +66,6 @@ public:
   static inline bool classof(const ELFObjectFile<ELFT> *v) {
     return v->isDyldType();
   }
-
 };
 
 
@@ -104,12 +103,11 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
   sym->st_value = static_cast<addr_type>(Addr);
 }
 
-class LoadedELFObjectInfo
+class LoadedELFObjectInfo final
     : public RuntimeDyld::LoadedObjectInfoHelper<LoadedELFObjectInfo> {
 public:
-  LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
-                      unsigned EndIdx)
-      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+  LoadedELFObjectInfo(RuntimeDyldImpl &RTDyld, ObjSectionToIDMap ObjSecToIDMap)
+      : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
 
   OwningBinary<ObjectFile>
   getObjectForDebug(const ObjectFile &Obj) const override;
@@ -118,6 +116,7 @@ public:
 template <typename ELFT>
 std::unique_ptr<DyldELFObject<ELFT>>
 createRTDyldELFObject(MemoryBufferRef Buffer,
+                      const ObjectFile &SourceObject,
                       const LoadedELFObjectInfo &L,
                       std::error_code &ec) {
   typedef typename ELFFile<ELFT>::Elf_Shdr Elf_Shdr;
@@ -127,6 +126,7 @@ createRTDyldELFObject(MemoryBufferRef Buffer,
     llvm::make_unique<DyldELFObject<ELFT>>(Buffer, ec);
 
   // Iterate over all sections in the object.
+  auto SI = SourceObject.section_begin();
   for (const auto &Sec : Obj->sections()) {
     StringRef SectionName;
     Sec.getName(SectionName);
@@ -135,12 +135,13 @@ createRTDyldELFObject(MemoryBufferRef Buffer,
       Elf_Shdr *shdr = const_cast<Elf_Shdr *>(
           reinterpret_cast<const Elf_Shdr *>(ShdrRef.p));
 
-      if (uint64_t SecLoadAddr = L.getSectionLoadAddress(SectionName)) {
+      if (uint64_t SecLoadAddr = L.getSectionLoadAddress(*SI)) {
         // This assumes that the address passed in matches the target address
         // bitness. The template-based type cast handles everything else.
         shdr->sh_addr = static_cast<addr_type>(SecLoadAddr);
       }
     }
+    ++SI;
   }
 
   return Obj;
@@ -158,16 +159,20 @@ OwningBinary<ObjectFile> createELFDebugObject(const ObjectFile &Obj,
   std::unique_ptr<ObjectFile> DebugObj;
   if (Obj.getBytesInAddress() == 4 && Obj.isLittleEndian()) {
     typedef ELFType<support::little, false> ELF32LE;
-    DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), L, ec);
+    DebugObj = createRTDyldELFObject<ELF32LE>(Buffer->getMemBufferRef(), Obj, L,
+                                              ec);
   } else if (Obj.getBytesInAddress() == 4 && !Obj.isLittleEndian()) {
     typedef ELFType<support::big, false> ELF32BE;
-    DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), L, ec);
+    DebugObj = createRTDyldELFObject<ELF32BE>(Buffer->getMemBufferRef(), Obj, L,
+                                              ec);
   } else if (Obj.getBytesInAddress() == 8 && !Obj.isLittleEndian()) {
     typedef ELFType<support::big, true> ELF64BE;
-    DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), L, ec);
+    DebugObj = createRTDyldELFObject<ELF64BE>(Buffer->getMemBufferRef(), Obj, L,
+                                              ec);
   } else if (Obj.getBytesInAddress() == 8 && Obj.isLittleEndian()) {
     typedef ELFType<support::little, true> ELF64LE;
-    DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), L, ec);
+    DebugObj = createRTDyldELFObject<ELF64LE>(Buffer->getMemBufferRef(), Obj, L,
+                                              ec);
   } else
     llvm_unreachable("Unexpected ELF format");
 
@@ -181,7 +186,7 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
   return createELFDebugObject(Obj, *this);
 }
 
-} // namespace
+} // anonymous namespace
 
 namespace llvm {
 
@@ -193,9 +198,9 @@ RuntimeDyldELF::~RuntimeDyldELF() {}
 void RuntimeDyldELF::registerEHFrames() {
   for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = UnregisteredEHFrameSections[i];
-    uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
-    uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
-    size_t EHFrameSize = Sections[EHFrameSID].Size;
+    uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+    uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+    size_t EHFrameSize = Sections[EHFrameSID].getSize();
     MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
     RegisteredEHFrameSections.push_back(EHFrameSID);
   }
@@ -205,9 +210,9 @@ void RuntimeDyldELF::registerEHFrames() {
 void RuntimeDyldELF::deregisterEHFrames() {
   for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) {
     SID EHFrameSID = RegisteredEHFrameSections[i];
-    uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
-    uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
-    size_t EHFrameSize = Sections[EHFrameSID].Size;
+    uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+    uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+    size_t EHFrameSize = Sections[EHFrameSID].getSize();
     MemMgr.deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
   }
   RegisteredEHFrameSections.clear();
@@ -215,10 +220,7 @@ void RuntimeDyldELF::deregisterEHFrames() {
 
 std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
-  unsigned SectionStartIdx, SectionEndIdx;
-  std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
-  return llvm::make_unique<LoadedELFObjectInfo>(*this, SectionStartIdx,
-                                                SectionEndIdx);
+  return llvm::make_unique<LoadedELFObjectInfo>(*this, loadObjectImpl(O));
 }
 
 void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
@@ -230,9 +232,10 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
     llvm_unreachable("Relocation type not implemented yet!");
     break;
   case ELF::R_X86_64_64: {
-    support::ulittle64_t::ref(Section.Address + Offset) = Value + Addend;
+    support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+        Value + Addend;
     DEBUG(dbgs() << "Writing " << format("%p", (Value + Addend)) << " at "
-                 << format("%p\n", Section.Address + Offset));
+                 << format("%p\n", Section.getAddressWithOffset(Offset)));
     break;
   }
   case ELF::R_X86_64_32:
@@ -242,23 +245,34 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section,
            (Type == ELF::R_X86_64_32S &&
             ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN)));
     uint32_t TruncatedAddr = (Value & 0xFFFFFFFF);
-    support::ulittle32_t::ref(Section.Address + Offset) = TruncatedAddr;
+    support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+        TruncatedAddr;
     DEBUG(dbgs() << "Writing " << format("%p", TruncatedAddr) << " at "
-                 << format("%p\n", Section.Address + Offset));
+                 << format("%p\n", Section.getAddressWithOffset(Offset)));
+    break;
+  }
+  case ELF::R_X86_64_PC8: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    int64_t RealOffset = Value + Addend - FinalAddress;
+    assert(isInt<8>(RealOffset));
+    int8_t TruncOffset = (RealOffset & 0xFF);
+    Section.getAddress()[Offset] = TruncOffset;
     break;
   }
   case ELF::R_X86_64_PC32: {
-    uint64_t FinalAddress = Section.LoadAddress + Offset;
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     int64_t RealOffset = Value + Addend - FinalAddress;
     assert(isInt<32>(RealOffset));
     int32_t TruncOffset = (RealOffset & 0xFFFFFFFF);
-    support::ulittle32_t::ref(Section.Address + Offset) = TruncOffset;
+    support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+        TruncOffset;
     break;
   }
   case ELF::R_X86_64_PC64: {
-    uint64_t FinalAddress = Section.LoadAddress + Offset;
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     int64_t RealOffset = Value + Addend - FinalAddress;
-    support::ulittle64_t::ref(Section.Address + Offset) = RealOffset;
+    support::ulittle64_t::ref(Section.getAddressWithOffset(Offset)) =
+        RealOffset;
     break;
   }
   }
@@ -269,13 +283,16 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
                                           uint32_t Type, int32_t Addend) {
   switch (Type) {
   case ELF::R_386_32: {
-    support::ulittle32_t::ref(Section.Address + Offset) = Value + Addend;
+    support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+        Value + Addend;
     break;
   }
   case ELF::R_386_PC32: {
-    uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+    uint32_t FinalAddress =
+        Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF;
     uint32_t RealOffset = Value + Addend - FinalAddress;
-    support::ulittle32_t::ref(Section.Address + Offset) = RealOffset;
+    support::ulittle32_t::ref(Section.getAddressWithOffset(Offset)) =
+        RealOffset;
     break;
   }
   default:
@@ -289,11 +306,12 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
 void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
                                               uint64_t Offset, uint64_t Value,
                                               uint32_t Type, int64_t Addend) {
-  uint32_t *TargetPtr = reinterpret_cast<uint32_t *>(Section.Address + Offset);
-  uint64_t FinalAddress = Section.LoadAddress + Offset;
+  uint32_t *TargetPtr =
+      reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+  uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
 
   DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
-               << format("%llx", Section.Address + Offset)
+               << format("%llx", Section.getAddressWithOffset(Offset))
                << " FinalAddress: 0x" << format("%llx", FinalAddress)
                << " Value: 0x" << format("%llx", Value) << " Type: 0x"
                << format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
@@ -305,7 +323,7 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
     break;
   case ELF::R_AARCH64_ABS64: {
     uint64_t *TargetPtr =
-        reinterpret_cast<uint64_t *>(Section.Address + Offset);
+        reinterpret_cast<uint64_t *>(Section.getAddressWithOffset(Offset));
     *TargetPtr = Value + Addend;
     break;
   }
@@ -428,12 +446,13 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
                                           uint64_t Offset, uint32_t Value,
                                           uint32_t Type, int32_t Addend) {
   // TODO: Add Thumb relocations.
-  uint32_t *TargetPtr = (uint32_t *)(Section.Address + Offset);
-  uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
+  uint32_t *TargetPtr =
+      reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
+  uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset) & 0xFFFFFFFF;
   Value += Addend;
 
   DEBUG(dbgs() << "resolveARMRelocation, LocalAddress: "
-               << Section.Address + Offset
+               << Section.getAddressWithOffset(Offset)
                << " FinalAddress: " << format("%p", FinalAddress) << " Value: "
                << format("%x", Value) << " Type: " << format("%x", Type)
                << " Addend: " << format("%x", Addend) << "\n");
@@ -477,13 +496,14 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
 void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
                                            uint64_t Offset, uint32_t Value,
                                            uint32_t Type, int32_t Addend) {
-  uint8_t *TargetPtr = Section.Address + Offset;
+  uint8_t *TargetPtr = Section.getAddressWithOffset(Offset);
   Value += Addend;
 
   DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: "
-               << Section.Address + Offset << " FinalAddress: "
-               << format("%p", Section.LoadAddress + Offset) << " Value: "
-               << format("%x", Value) << " Type: " << format("%x", Type)
+               << Section.getAddressWithOffset(Offset) << " FinalAddress: "
+               << format("%p", Section.getLoadAddressWithOffset(Offset))
+               << " Value: " << format("%x", Value)
+               << " Type: " << format("%x", Type)
                << " Addend: " << format("%x", Addend) << "\n");
 
   uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
@@ -512,47 +532,47 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   case ELF::R_MIPS_PC32: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PC16: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xffff0000;
     Insn |= ((Value - FinalAddress) >> 2) & 0xffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PC19_S2: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xfff80000;
     Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PC21_S2: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xffe00000;
     Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PC26_S2: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xfc000000;
     Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PCHI16: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xffff0000;
     Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
     break;
   }
   case ELF::R_MIPS_PCLO16: {
-    uint32_t FinalAddress = (Section.LoadAddress + Offset);
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     Insn &= 0xffff0000;
     Insn |= (Value - FinalAddress) & 0xffff;
     writeBytesUnaligned(Insn, TargetPtr, 4);
@@ -603,7 +623,8 @@ void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section,
                                                CalculatedValue, SymOffset,
                                                SectionID);
   }
-  applyMIPS64Relocation(Section.Address + Offset, CalculatedValue, RelType);
+  applyMIPS64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue,
+                        RelType);
 }
 
 int64_t
@@ -613,13 +634,12 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
                                          uint64_t SymOffset, SID SectionID) {
 
   DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x"
-               << format("%llx", Section.Address + Offset)
+               << format("%llx", Section.getAddressWithOffset(Offset))
                << " FinalAddress: 0x"
-               << format("%llx", Section.LoadAddress + Offset)
+               << format("%llx", Section.getLoadAddressWithOffset(Offset))
                << " Value: 0x" << format("%llx", Value) << " Type: 0x"
                << format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
-               << " SymOffset: " << format("%x", SymOffset)
-               << "\n");
+               << " SymOffset: " << format("%x", SymOffset) << "\n");
 
   switch (Type) {
   default:
@@ -672,35 +692,35 @@ RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
     return Value + Addend - (GOTAddr + 0x7ff0);
   }
   case ELF::R_MIPS_PC16: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return ((Value + Addend - FinalAddress) >> 2) & 0xffff;
   }
   case ELF::R_MIPS_PC32: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return Value + Addend - FinalAddress;
   }
   case ELF::R_MIPS_PC18_S3: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
-    return ((Value + Addend - ((FinalAddress | 7) ^ 7)) >> 3) & 0x3ffff;
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff;
   }
   case ELF::R_MIPS_PC19_S2: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
-    return ((Value + Addend - FinalAddress) >> 2) & 0x7ffff;
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
   }
   case ELF::R_MIPS_PC21_S2: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff;
   }
   case ELF::R_MIPS_PC26_S2: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff;
   }
   case ELF::R_MIPS_PCHI16: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff;
   }
   case ELF::R_MIPS_PCLO16: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     return (Value + Addend - FinalAddress) & 0xffff;
   }
   }
@@ -769,7 +789,7 @@ void RuntimeDyldELF::findPPC64TOCSection(const ELFObjectFileBase &Obj,
   // relocation) without a .toc directive.  In this case just use the
   // first section (which is usually the .odp) since the code won't
   // reference the .toc base directly.
-  Rel.SymbolName = NULL;
+  Rel.SymbolName = nullptr;
   Rel.SectionID = 0;
 
   // The TOC consists of sections .got, .toc, .tocbss, .plt in that
@@ -842,8 +862,9 @@ void RuntimeDyldELF::findOPDEntrySection(const ELFObjectFileBase &Obj,
       if (Rel.Addend != (int64_t)TargetSymbolOffset)
         continue;
 
-      section_iterator tsi(Obj.section_end());
-      check(TargetSymbol->getSection(tsi));
+      ErrorOr<section_iterator> TSIOrErr = TargetSymbol->getSection();
+      check(TSIOrErr.getError());
+      section_iterator tsi = *TSIOrErr;
       bool IsCode = tsi->isText();
       Rel.SectionID = findOrEmitSection(Obj, (*tsi), IsCode, LocalSections);
       Rel.Addend = (intptr_t)Addend;
@@ -884,10 +905,30 @@ static inline uint16_t applyPPChighesta (uint64_t value) {
   return ((value + 0x8000) >> 48) & 0xffff;
 }
 
+void RuntimeDyldELF::resolvePPC32Relocation(const SectionEntry &Section,
+                                            uint64_t Offset, uint64_t Value,
+                                            uint32_t Type, int64_t Addend) {
+  uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_PPC_ADDR16_LO:
+    writeInt16BE(LocalAddress, applyPPClo(Value + Addend));
+    break;
+  case ELF::R_PPC_ADDR16_HI:
+    writeInt16BE(LocalAddress, applyPPChi(Value + Addend));
+    break;
+  case ELF::R_PPC_ADDR16_HA:
+    writeInt16BE(LocalAddress, applyPPCha(Value + Addend));
+    break;
+  }
+}
+
 void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
                                             uint64_t Offset, uint64_t Value,
                                             uint32_t Type, int64_t Addend) {
-  uint8_t *LocalAddress = Section.Address + Offset;
+  uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
   switch (Type) {
   default:
     llvm_unreachable("Relocation type not implemented yet!");
@@ -929,17 +970,17 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
     writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc));
   } break;
   case ELF::R_PPC64_REL16_LO: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     uint64_t Delta = Value - FinalAddress + Addend;
     writeInt16BE(LocalAddress, applyPPClo(Delta));
   } break;
   case ELF::R_PPC64_REL16_HI: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     uint64_t Delta = Value - FinalAddress + Addend;
     writeInt16BE(LocalAddress, applyPPChi(Delta));
   } break;
   case ELF::R_PPC64_REL16_HA: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     uint64_t Delta = Value - FinalAddress + Addend;
     writeInt16BE(LocalAddress, applyPPCha(Delta));
   } break;
@@ -950,22 +991,22 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
     writeInt32BE(LocalAddress, Result);
   } break;
   case ELF::R_PPC64_REL24: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
-    if (SignExtend32<24>(delta) != delta)
+    if (SignExtend32<26>(delta) != delta)
       llvm_unreachable("Relocation R_PPC64_REL24 overflow");
     // Generates a 'bl <address>' instruction
     writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC));
   } break;
   case ELF::R_PPC64_REL32: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     int32_t delta = static_cast<int32_t>(Value - FinalAddress + Addend);
     if (SignExtend32<32>(delta) != delta)
       llvm_unreachable("Relocation R_PPC64_REL32 overflow");
     writeInt32BE(LocalAddress, delta);
   } break;
   case ELF::R_PPC64_REL64: {
-    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
     uint64_t Delta = Value - FinalAddress + Addend;
     writeInt64BE(LocalAddress, Delta);
   } break;
@@ -978,27 +1019,27 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
 void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
                                               uint64_t Offset, uint64_t Value,
                                               uint32_t Type, int64_t Addend) {
-  uint8_t *LocalAddress = Section.Address + Offset;
+  uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
   switch (Type) {
   default:
     llvm_unreachable("Relocation type not implemented yet!");
     break;
   case ELF::R_390_PC16DBL:
   case ELF::R_390_PLT16DBL: {
-    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
     assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
     writeInt16BE(LocalAddress, Delta / 2);
     break;
   }
   case ELF::R_390_PC32DBL:
   case ELF::R_390_PLT32DBL: {
-    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
     assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
     writeInt32BE(LocalAddress, Delta / 2);
     break;
   }
   case ELF::R_390_PC32: {
-    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    int64_t Delta = (Value + Addend) - Section.getLoadAddressWithOffset(Offset);
     assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
     writeInt32BE(LocalAddress, Delta);
     break;
@@ -1072,6 +1113,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
     else
       llvm_unreachable("Mips ABI not handled");
     break;
+  case Triple::ppc:
+    resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
+    break;
   case Triple::ppc64: // Fall through.
   case Triple::ppc64le:
     resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
@@ -1085,7 +1129,7 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
 }
 
 void *RuntimeDyldELF::computePlaceholderAddress(unsigned SectionID, uint64_t Offset) const {
-  return (void*)(Sections[SectionID].ObjAddress + Offset);
+  return (void *)(Sections[SectionID].getObjAddress() + Offset);
 }
 
 void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value) {
@@ -1096,6 +1140,29 @@ void RuntimeDyldELF::processSimpleRelocation(unsigned SectionID, uint64_t Offset
     addRelocationForSection(RE, Value.SectionID);
 }
 
+uint32_t RuntimeDyldELF::getMatchingLoRelocation(uint32_t RelType,
+                                                 bool IsLocal) const {
+  switch (RelType) {
+  case ELF::R_MICROMIPS_GOT16:
+    if (IsLocal)
+      return ELF::R_MICROMIPS_LO16;
+    break;
+  case ELF::R_MICROMIPS_HI16:
+    return ELF::R_MICROMIPS_LO16;
+  case ELF::R_MIPS_GOT16:
+    if (IsLocal)
+      return ELF::R_MIPS_LO16;
+    break;
+  case ELF::R_MIPS_HI16:
+    return ELF::R_MIPS_LO16;
+  case ELF::R_MIPS_PCHI16:
+    return ELF::R_MIPS_PCLO16;
+  default:
+    break;
+  }
+  return ELF::R_MIPS_NONE;
+}
+
 relocation_iterator RuntimeDyldELF::processRelocationRef(
     unsigned SectionID, relocation_iterator RelI, const ObjectFile &O,
     ObjSectionToIDMap &ObjSectionToID, StubMap &Stubs) {
@@ -1136,8 +1203,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
       // TODO: Now ELF SymbolRef::ST_Debug = STT_SECTION, it's not obviously
       // and can be changed by another developers. Maybe best way is add
       // a new symbol type ST_Section to SymbolRef and use it.
-      section_iterator si(Obj.section_end());
-      Symbol->getSection(si);
+      section_iterator si = *Symbol->getSection();
       if (si == Obj.section_end())
         llvm_unreachable("Symbol section not found, bad object file format!");
       DEBUG(dbgs() << "\t\tThis is section symbol\n");
@@ -1178,24 +1244,28 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
     // Look for an existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-      resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
+      resolveRelocation(Section, Offset,
+                        (uint64_t)Section.getAddressWithOffset(i->second),
                         RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
       // Create a new stub function.
       DEBUG(dbgs() << " Create a new stub function\n");
-      Stubs[Value] = Section.StubOffset;
-      uint8_t *StubTargetAddr =
-          createStubFunction(Section.Address + Section.StubOffset);
+      Stubs[Value] = Section.getStubOffset();
+      uint8_t *StubTargetAddr = createStubFunction(
+          Section.getAddressWithOffset(Section.getStubOffset()));
 
-      RelocationEntry REmovz_g3(SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry REmovz_g3(SectionID,
+                                StubTargetAddr - Section.getAddress(),
                                 ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
-      RelocationEntry REmovk_g2(SectionID, StubTargetAddr - Section.Address + 4,
+      RelocationEntry REmovk_g2(SectionID, StubTargetAddr -
+                                               Section.getAddress() + 4,
                                 ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
-      RelocationEntry REmovk_g1(SectionID, StubTargetAddr - Section.Address + 8,
+      RelocationEntry REmovk_g1(SectionID, StubTargetAddr -
+                                               Section.getAddress() + 8,
                                 ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
-      RelocationEntry REmovk_g0(SectionID,
-                                StubTargetAddr - Section.Address + 12,
+      RelocationEntry REmovk_g0(SectionID, StubTargetAddr -
+                                               Section.getAddress() + 12,
                                 ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
 
       if (Value.SymbolName) {
@@ -1210,9 +1280,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         addRelocationForSection(REmovk_g0, Value.SectionID);
       }
       resolveRelocation(Section, Offset,
-                        (uint64_t)Section.Address + Section.StubOffset, RelType,
-                        0);
-      Section.StubOffset += getMaxStubSize();
+                        reinterpret_cast<uint64_t>(Section.getAddressWithOffset(
+                            Section.getStubOffset())),
+                        RelType, 0);
+      Section.advanceStubOffset(getMaxStubSize());
     }
   } else if (Arch == Triple::arm) {
     if (RelType == ELF::R_ARM_PC24 || RelType == ELF::R_ARM_CALL ||
@@ -1224,26 +1295,29 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
       // Look for an existing stub.
       StubMap::const_iterator i = Stubs.find(Value);
       if (i != Stubs.end()) {
-        resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
-          RelType, 0);
+        resolveRelocation(
+            Section, Offset,
+            reinterpret_cast<uint64_t>(Section.getAddressWithOffset(i->second)),
+            RelType, 0);
         DEBUG(dbgs() << " Stub function found\n");
       } else {
         // Create a new stub function.
         DEBUG(dbgs() << " Create a new stub function\n");
-        Stubs[Value] = Section.StubOffset;
-        uint8_t *StubTargetAddr =
-          createStubFunction(Section.Address + Section.StubOffset);
-        RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
-          ELF::R_ARM_ABS32, Value.Addend);
+        Stubs[Value] = Section.getStubOffset();
+        uint8_t *StubTargetAddr = createStubFunction(
+            Section.getAddressWithOffset(Section.getStubOffset()));
+        RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(),
+                           ELF::R_ARM_ABS32, Value.Addend);
         if (Value.SymbolName)
           addRelocationForSymbol(RE, Value.SymbolName);
         else
           addRelocationForSection(RE, Value.SectionID);
 
-        resolveRelocation(Section, Offset,
-          (uint64_t)Section.Address + Section.StubOffset, RelType,
-          0);
-        Section.StubOffset += getMaxStubSize();
+        resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
+                                               Section.getAddressWithOffset(
+                                                   Section.getStubOffset())),
+                          RelType, 0);
+        Section.advanceStubOffset(getMaxStubSize());
       }
     } else {
       uint32_t *Placeholder =
@@ -1282,15 +1356,16 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
       } else {
         // Create a new stub function.
         DEBUG(dbgs() << " Create a new stub function\n");
-        Stubs[Value] = Section.StubOffset;
-        uint8_t *StubTargetAddr =
-          createStubFunction(Section.Address + Section.StubOffset);
+        Stubs[Value] = Section.getStubOffset();
+        uint8_t *StubTargetAddr = createStubFunction(
+            Section.getAddressWithOffset(Section.getStubOffset()));
 
         // Creating Hi and Lo relocations for the filled stub instructions.
-        RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address,
-          ELF::R_MIPS_HI16, Value.Addend);
-        RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4,
-          ELF::R_MIPS_LO16, Value.Addend);
+        RelocationEntry REHi(SectionID, StubTargetAddr - Section.getAddress(),
+                             ELF::R_MIPS_HI16, Value.Addend);
+        RelocationEntry RELo(SectionID,
+                             StubTargetAddr - Section.getAddress() + 4,
+                             ELF::R_MIPS_LO16, Value.Addend);
 
         if (Value.SymbolName) {
           addRelocationForSymbol(REHi, Value.SymbolName);
@@ -1301,21 +1376,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
           addRelocationForSection(RELo, Value.SectionID);
         }
 
-        RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset);
+        RelocationEntry RE(SectionID, Offset, RelType, Section.getStubOffset());
         addRelocationForSection(RE, SectionID);
-        Section.StubOffset += getMaxStubSize();
+        Section.advanceStubOffset(getMaxStubSize());
       }
+    } else if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16) {
+      int64_t Addend = (Opcode & 0x0000ffff) << 16;
+      RelocationEntry RE(SectionID, Offset, RelType, Addend);
+      PendingRelocs.push_back(std::make_pair(Value, RE));
+    } else if (RelType == ELF::R_MIPS_LO16 || RelType == ELF::R_MIPS_PCLO16) {
+      int64_t Addend = Value.Addend + SignExtend32<16>(Opcode & 0x0000ffff);
+      for (auto I = PendingRelocs.begin(); I != PendingRelocs.end();) {
+        const RelocationValueRef &MatchingValue = I->first;
+        RelocationEntry &Reloc = I->second;
+        if (MatchingValue == Value &&
+            RelType == getMatchingLoRelocation(Reloc.RelType) &&
+            SectionID == Reloc.SectionID) {
+          Reloc.Addend += Addend;
+          if (Value.SymbolName)
+            addRelocationForSymbol(Reloc, Value.SymbolName);
+          else
+            addRelocationForSection(Reloc, Value.SectionID);
+          I = PendingRelocs.erase(I);
+        } else
+          ++I;
+      }
+      RelocationEntry RE(SectionID, Offset, RelType, Addend);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
     } else {
-      // FIXME: Calculate correct addends for R_MIPS_HI16, R_MIPS_LO16,
-      // R_MIPS_PCHI16 and R_MIPS_PCLO16 relocations.
-      if (RelType == ELF::R_MIPS_HI16 || RelType == ELF::R_MIPS_PCHI16)
-        Value.Addend += (Opcode & 0x0000ffff) << 16;
-      else if (RelType == ELF::R_MIPS_LO16)
-        Value.Addend += (Opcode & 0x0000ffff);
-      else if (RelType == ELF::R_MIPS_32)
+      if (RelType == ELF::R_MIPS_32)
         Value.Addend += Opcode;
-      else if (RelType == ELF::R_MIPS_PCLO16)
-        Value.Addend += SignExtend32<16>((Opcode & 0x0000ffff));
       else if (RelType == ELF::R_MIPS_PC16)
         Value.Addend += SignExtend32<18>((Opcode & 0x0000ffff) << 2);
       else if (RelType == ELF::R_MIPS_PC19_S2)
@@ -1353,7 +1446,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
       // an external symbol (Symbol::ST_Unknown) or if the target address
       // is not within the signed 24-bits branch address.
       SectionEntry &Section = Sections[SectionID];
-      uint8_t *Target = Section.Address + Offset;
+      uint8_t *Target = Section.getAddressWithOffset(Offset);
       bool RangeOverflow = false;
       if (SymType != SymbolRef::ST_Unknown) {
         if (AbiVariant != 2) {
@@ -1367,10 +1460,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
           uint8_t SymOther = Symbol->getOther();
           Value.Addend += ELF::decodePPC64LocalEntryOffset(SymOther);
         }
-        uint8_t *RelocTarget = Sections[Value.SectionID].Address + Value.Addend;
+        uint8_t *RelocTarget =
+            Sections[Value.SectionID].getAddressWithOffset(Value.Addend);
         int32_t delta = static_cast<int32_t>(Target - RelocTarget);
-        // If it is within 24-bits branch range, just set the branch target
-        if (SignExtend32<24>(delta) == delta) {
+        // If it is within 26-bits branch range, just set the branch target
+        if (SignExtend32<26>(delta) == delta) {
           RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
           if (Value.SymbolName)
             addRelocationForSymbol(RE, Value.SymbolName);
@@ -1387,23 +1481,25 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         if (i != Stubs.end()) {
           // Symbol function stub already created, just relocate to it
           resolveRelocation(Section, Offset,
-                            (uint64_t)Section.Address + i->second, RelType, 0);
+                            reinterpret_cast<uint64_t>(
+                                Section.getAddressWithOffset(i->second)),
+                            RelType, 0);
           DEBUG(dbgs() << " Stub function found\n");
         } else {
           // Create a new stub function.
           DEBUG(dbgs() << " Create a new stub function\n");
-          Stubs[Value] = Section.StubOffset;
-          uint8_t *StubTargetAddr =
-              createStubFunction(Section.Address + Section.StubOffset,
-                                 AbiVariant);
-          RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
+          Stubs[Value] = Section.getStubOffset();
+          uint8_t *StubTargetAddr = createStubFunction(
+              Section.getAddressWithOffset(Section.getStubOffset()),
+              AbiVariant);
+          RelocationEntry RE(SectionID, StubTargetAddr - Section.getAddress(),
                              ELF::R_PPC64_ADDR64, Value.Addend);
 
           // Generates the 64-bits address loads as exemplified in section
           // 4.5.1 in PPC64 ELF ABI.  Note that the relocations need to
           // apply to the low part of the instructions, so we have to update
           // the offset according to the target endianness.
-          uint64_t StubRelocOffset = StubTargetAddr - Section.Address;
+          uint64_t StubRelocOffset = StubTargetAddr - Section.getAddress();
           if (!IsTargetLittleEndian)
             StubRelocOffset += 2;
 
@@ -1428,10 +1524,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
             addRelocationForSection(REl, Value.SectionID);
           }
 
-          resolveRelocation(Section, Offset,
-                            (uint64_t)Section.Address + Section.StubOffset,
+          resolveRelocation(Section, Offset, reinterpret_cast<uint64_t>(
+                                                 Section.getAddressWithOffset(
+                                                     Section.getStubOffset())),
                             RelType, 0);
-          Section.StubOffset += getMaxStubSize();
+          Section.advanceStubOffset(getMaxStubSize());
         }
         if (SymType == SymbolRef::ST_Unknown) {
           // Restore the TOC for external calls
@@ -1450,11 +1547,11 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
       // These relocations are supposed to subtract the TOC address from
       // the final value.  This does not fit cleanly into the RuntimeDyld
       // scheme, since there may be *two* sections involved in determining
-      // the relocation value (the section of the symbol refered to by the
+      // the relocation value (the section of the symbol referred to by the
       // relocation, and the TOC section associated with the current module).
       //
       // Fortunately, these relocations are currently only ever generated
-      // refering to symbols that themselves reside in the TOC, which means
+      // referring to symbols that themselves reside in the TOC, which means
       // that the two sections are actually the same.  Thus they cancel out
       // and we can immediately resolve the relocation right now.
       switch (RelType) {
@@ -1511,16 +1608,17 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
     StubMap::const_iterator i = Stubs.find(Value);
     uintptr_t StubAddress;
     if (i != Stubs.end()) {
-      StubAddress = uintptr_t(Section.Address) + i->second;
+      StubAddress = uintptr_t(Section.getAddressWithOffset(i->second));
       DEBUG(dbgs() << " Stub function found\n");
     } else {
       // Create a new stub function.
       DEBUG(dbgs() << " Create a new stub function\n");
 
-      uintptr_t BaseAddress = uintptr_t(Section.Address);
+      uintptr_t BaseAddress = uintptr_t(Section.getAddress());
       uintptr_t StubAlignment = getStubAlignment();
-      StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) &
-                    -StubAlignment;
+      StubAddress =
+          (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
+          -StubAlignment;
       unsigned StubOffset = StubAddress - BaseAddress;
 
       Stubs[Value] = StubOffset;
@@ -1531,7 +1629,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
-      Section.StubOffset = StubOffset + getMaxStubSize();
+      Section.advanceStubOffset(getMaxStubSize());
     }
 
     if (RelType == ELF::R_390_GOTENT)
@@ -1564,37 +1662,39 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
         StubMap::const_iterator i = Stubs.find(Value);
         uintptr_t StubAddress;
         if (i != Stubs.end()) {
-        StubAddress = uintptr_t(Section.Address) + i->second;
-        DEBUG(dbgs() << " Stub function found\n");
+          StubAddress = uintptr_t(Section.getAddress()) + i->second;
+          DEBUG(dbgs() << " Stub function found\n");
         } else {
-        // Create a new stub function (equivalent to a PLT entry).
-        DEBUG(dbgs() << " Create a new stub function\n");
+          // Create a new stub function (equivalent to a PLT entry).
+          DEBUG(dbgs() << " Create a new stub function\n");
 
-        uintptr_t BaseAddress = uintptr_t(Section.Address);
-        uintptr_t StubAlignment = getStubAlignment();
-        StubAddress = (BaseAddress + Section.StubOffset + StubAlignment - 1) &
-                -StubAlignment;
-        unsigned StubOffset = StubAddress - BaseAddress;
-        Stubs[Value] = StubOffset;
-        createStubFunction((uint8_t *)StubAddress);
+          uintptr_t BaseAddress = uintptr_t(Section.getAddress());
+          uintptr_t StubAlignment = getStubAlignment();
+          StubAddress =
+              (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
+              -StubAlignment;
+          unsigned StubOffset = StubAddress - BaseAddress;
+          Stubs[Value] = StubOffset;
+          createStubFunction((uint8_t *)StubAddress);
 
-        // Bump our stub offset counter
-        Section.StubOffset = StubOffset + getMaxStubSize();
+          // Bump our stub offset counter
+          Section.advanceStubOffset(getMaxStubSize());
 
-        // Allocate a GOT Entry
-        uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
+          // Allocate a GOT Entry
+          uint64_t GOTOffset = allocateGOTEntries(SectionID, 1);
 
-        // The load of the GOT address has an addend of -4
-        resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
+          // The load of the GOT address has an addend of -4
+          resolveGOTOffsetRelocation(SectionID, StubOffset + 2, GOTOffset - 4);
 
-        // Fill in the value of the symbol we're targeting into the GOT
-        addRelocationForSymbol(computeGOTOffsetRE(SectionID,GOTOffset,0,ELF::R_X86_64_64),
-          Value.SymbolName);
+          // Fill in the value of the symbol we're targeting into the GOT
+          addRelocationForSymbol(
+              computeGOTOffsetRE(SectionID, GOTOffset, 0, ELF::R_X86_64_64),
+              Value.SymbolName);
         }
 
         // Make the target call a call into the stub table.
         resolveRelocation(Section, Offset, StubAddress, ELF::R_X86_64_PC32,
-                Addend);
+                          Addend);
       } else {
         RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend,
                   Value.Offset);
@@ -1670,7 +1770,7 @@ uint64_t RuntimeDyldELF::allocateGOTEntries(unsigned SectionID, unsigned no)
     GOTSectionID = Sections.size();
     // Reserve a section id. We'll allocate the section later
     // once we know the total size
-    Sections.push_back(SectionEntry(".got", 0, 0, 0));
+    Sections.push_back(SectionEntry(".got", nullptr, 0, 0, 0));
   }
   uint64_t StartOffset = CurrentGOTIndex * getGOTEntrySize();
   CurrentGOTIndex += no;
@@ -1693,6 +1793,10 @@ RelocationEntry RuntimeDyldELF::computeGOTOffsetRE(unsigned SectionID, uint64_t
 
 void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
                                   ObjSectionToIDMap &SectionMap) {
+  if (IsMipsO32ABI)
+    if (!PendingRelocs.empty())
+      report_fatal_error("Can't find matching LO16 reloc");
+
   // If necessary, allocate the global offset table
   if (GOTSectionID != 0) {
     // Allocate memory for the section
@@ -1702,7 +1806,8 @@ void RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
     if (!Addr)
       report_fatal_error("Unable to allocate memory for GOT!");
 
-    Sections[GOTSectionID] = SectionEntry(".got", Addr, TotalSize, 0);
+    Sections[GOTSectionID] =
+        SectionEntry(".got", Addr, TotalSize, TotalSize, 0);
 
     if (Checker)
       Checker->registerSection(Obj.getFileName(), GOTSectionID);
@@ -1746,4 +1851,23 @@ bool RuntimeDyldELF::isCompatibleFile(const object::ObjectFile &Obj) const {
   return Obj.isELF();
 }
 
+bool RuntimeDyldELF::relocationNeedsStub(const RelocationRef &R) const {
+  if (Arch != Triple::x86_64)
+    return true;  // Conservative answer
+
+  switch (R.getType()) {
+  default:
+    return true;  // Conservative answer
+
+
+  case ELF::R_X86_64_GOTPCREL:
+  case ELF::R_X86_64_PC32:
+  case ELF::R_X86_64_PC64:
+  case ELF::R_X86_64_64:
+    // We know that these reloation types won't need a stub function.  This list
+    // can be extended as needed.
+    return false;
+  }
+}
+
 } // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 1a2552deed95..041811d3e285 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -43,6 +43,9 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset,
                              uint32_t Value, uint32_t Type, int32_t Addend);
 
+  void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
+                              uint64_t Value, uint32_t Type, int64_t Addend);
+
   void resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset,
                               uint64_t Value, uint32_t Type, int64_t Addend);
 
@@ -120,6 +123,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   // no particular advanced processing.
   void processSimpleRelocation(unsigned SectionID, uint64_t Offset, unsigned RelType, RelocationValueRef Value);
 
+  // Return matching *LO16 relocation (Mips specific)
+  uint32_t getMatchingLoRelocation(uint32_t RelType,
+                                   bool IsLocal = false) const;
+
   // The tentative ID for the GOT section
   unsigned GOTSectionID;
 
@@ -135,12 +142,18 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   // A map to avoid duplicate got entries (Mips64 specific)
   StringMap<uint64_t> GOTSymbolOffsets;
 
+  // *HI16 relocations will be added for resolving when we find matching
+  // *LO16 part. (Mips specific)
+  SmallVector<std::pair<RelocationValueRef, RelocationEntry>, 8> PendingRelocs;
+
   // When a module is loaded we save the SectionID of the EH frame section
   // in a table until we receive a request to register all unregistered
   // EH frame sections with the memory manager.
   SmallVector<SID, 2> UnregisteredEHFrameSections;
   SmallVector<SID, 2> RegisteredEHFrameSections;
 
+  bool relocationNeedsStub(const RelocationRef &R) const override;
+
 public:
   RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
                  RuntimeDyld::SymbolResolver &Resolver);
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index e085a9296e8d..dafd3c8793c3 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -30,6 +30,7 @@
 #include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
+#include <unordered_map>
 #include <system_error>
 
 using namespace llvm;
@@ -50,7 +51,6 @@ class Twine;
 /// SectionEntry - represents a section emitted into memory by the dynamic
 /// linker.
 class SectionEntry {
-public:
   /// Name - section name.
   std::string Name;
 
@@ -70,15 +70,54 @@ public:
   /// relocations (like ARM).
   uintptr_t StubOffset;
 
+  /// The total amount of space allocated for this section.  This includes the
+  /// section size and the maximum amount of space that the stubs can occupy.
+  size_t AllocationSize;
+
   /// ObjAddress - address of the section in the in-memory object file.  Used
   /// for calculating relocations in some object formats (like MachO).
   uintptr_t ObjAddress;
 
+public:
   SectionEntry(StringRef name, uint8_t *address, size_t size,
-               uintptr_t objAddress)
+               size_t allocationSize, uintptr_t objAddress)
       : Name(name), Address(address), Size(size),
         LoadAddress(reinterpret_cast<uintptr_t>(address)), StubOffset(size),
-        ObjAddress(objAddress) {}
+        AllocationSize(allocationSize), ObjAddress(objAddress) {
+    // AllocationSize is used only in asserts, prevent an "unused private field"
+    // warning:
+    (void)AllocationSize;
+  }
+
+  StringRef getName() const { return Name; }
+
+  uint8_t *getAddress() const { return Address; }
+
+  /// \brief Return the address of this section with an offset.
+  uint8_t *getAddressWithOffset(unsigned OffsetBytes) const {
+    assert(OffsetBytes <= AllocationSize && "Offset out of bounds!");
+    return Address + OffsetBytes;
+  }
+
+  size_t getSize() const { return Size; }
+
+  uint64_t getLoadAddress() const { return LoadAddress; }
+  void setLoadAddress(uint64_t LA) { LoadAddress = LA; }
+
+  /// \brief Return the load address of this section with an offset.
+  uint64_t getLoadAddressWithOffset(unsigned OffsetBytes) const {
+    assert(OffsetBytes <= AllocationSize && "Offset out of bounds!");
+    return LoadAddress + OffsetBytes;
+  }
+
+  uintptr_t getStubOffset() const { return StubOffset; }
+
+  void advanceStubOffset(unsigned StubSize) {
+    StubOffset += StubSize;
+    assert(StubOffset <= AllocationSize && "Not enough space allocated!");
+  }
+
+  uintptr_t getObjAddress() const { return ObjAddress; }
 };
 
 /// RelocationEntry - used to represent relocations internally in the dynamic
@@ -188,6 +227,8 @@ class RuntimeDyldImpl {
   friend class RuntimeDyld::LoadedObjectInfo;
   friend class RuntimeDyldCheckerImpl;
 protected:
+  static const unsigned AbsoluteSymbolSection = ~0U;
+
   // The MemoryManager to load objects into.
   RuntimeDyld::MemoryManager &MemMgr;
 
@@ -224,7 +265,7 @@ protected:
   // Relocations to sections already loaded. Indexed by SectionID which is the
   // source of the address. The target where the address will be written is
   // SectionID/Offset in the relocation itself.
-  DenseMap<unsigned, RelocationList> Relocations;
+  std::unordered_map<unsigned, RelocationList> Relocations;
 
   // Relocations to external symbols that are not yet resolved.  Symbols are
   // external when they aren't found in the global symbol table of all loaded
@@ -269,11 +310,11 @@ protected:
   }
 
   uint64_t getSectionLoadAddress(unsigned SectionID) const {
-    return Sections[SectionID].LoadAddress;
+    return Sections[SectionID].getLoadAddress();
   }
 
   uint8_t *getSectionAddress(unsigned SectionID) const {
-    return (uint8_t *)Sections[SectionID].Address;
+    return Sections[SectionID].getAddress();
   }
 
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
@@ -378,7 +419,12 @@ protected:
                                      const SectionRef &Section);
 
   // \brief Implementation of the generic part of the loadObject algorithm.
-  std::pair<unsigned, unsigned> loadObjectImpl(const object::ObjectFile &Obj);
+  ObjSectionToIDMap loadObjectImpl(const object::ObjectFile &Obj);
+
+  // \brief Return true if the relocation R may require allocating a stub.
+  virtual bool relocationNeedsStub(const RelocationRef &R) const {
+    return true;    // Conservative answer
+  }
 
 public:
   RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
@@ -407,6 +453,9 @@ public:
     if (pos == GlobalSymbolTable.end())
       return nullptr;
     const auto &SymInfo = pos->second;
+    // Absolute symbols do not have a local address.
+    if (SymInfo.getSectionID() == AbsoluteSymbolSection)
+      return nullptr;
     return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
   }
 
@@ -417,8 +466,10 @@ public:
     if (pos == GlobalSymbolTable.end())
       return nullptr;
     const auto &SymEntry = pos->second;
-    uint64_t TargetAddr =
-      getSectionLoadAddress(SymEntry.getSectionID()) + SymEntry.getOffset();
+    uint64_t SectionAddr = 0;
+    if (SymEntry.getSectionID() != AbsoluteSymbolSection)
+      SectionAddr = getSectionLoadAddress(SymEntry.getSectionID());
+    uint64_t TargetAddr = SectionAddr + SymEntry.getOffset();
     return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags());
   }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index c0741141757c..739e8d65dbf4 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -26,12 +26,12 @@ using namespace llvm::object;
 
 namespace {
 
-class LoadedMachOObjectInfo
+class LoadedMachOObjectInfo final
     : public RuntimeDyld::LoadedObjectInfoHelper<LoadedMachOObjectInfo> {
 public:
-  LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld, unsigned BeginIdx,
-                        unsigned EndIdx)
-      : LoadedObjectInfoHelper(RTDyld, BeginIdx, EndIdx) {}
+  LoadedMachOObjectInfo(RuntimeDyldImpl &RTDyld,
+                        ObjSectionToIDMap ObjSecToIDMap)
+      : LoadedObjectInfoHelper(RTDyld, std::move(ObjSecToIDMap)) {}
 
   OwningBinary<ObjectFile>
   getObjectForDebug(const ObjectFile &Obj) const override {
@@ -45,11 +45,47 @@ namespace llvm {
 
 int64_t RuntimeDyldMachO::memcpyAddend(const RelocationEntry &RE) const {
   unsigned NumBytes = 1 << RE.Size;
-  uint8_t *Src = Sections[RE.SectionID].Address + RE.Offset;
+  uint8_t *Src = Sections[RE.SectionID].getAddress() + RE.Offset;
 
   return static_cast<int64_t>(readBytesUnaligned(Src, NumBytes));
 }
 
+relocation_iterator RuntimeDyldMachO::processScatteredVANILLA(
+                          unsigned SectionID, relocation_iterator RelI,
+                          const ObjectFile &BaseObjT,
+                          RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) {
+  const MachOObjectFile &Obj =
+    static_cast<const MachOObjectFile&>(BaseObjT);
+  MachO::any_relocation_info RE =
+    Obj.getRelocation(RelI->getRawDataRefImpl());
+
+  SectionEntry &Section = Sections[SectionID];
+  uint32_t RelocType = Obj.getAnyRelocationType(RE);
+  bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
+  unsigned Size = Obj.getAnyRelocationLength(RE);
+  uint64_t Offset = RelI->getOffset();
+  uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
+  unsigned NumBytes = 1 << Size;
+  int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
+
+  unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE);
+  section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr);
+  assert(TargetSI != Obj.section_end() && "Can't find section for symbol");
+  uint64_t SectionBaseAddr = TargetSI->getAddress();
+  SectionRef TargetSection = *TargetSI;
+  bool IsCode = TargetSection.isText();
+  uint32_t TargetSectionID =
+    findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID);
+
+  Addend -= SectionBaseAddr;
+  RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size);
+
+  addRelocationForSection(R, TargetSectionID);
+
+  return ++RelI;
+}
+
+
 RelocationValueRef RuntimeDyldMachO::getRelocationValueRef(
     const ObjectFile &BaseTObj, const relocation_iterator &RI,
     const RelocationEntry &RE, ObjSectionToIDMap &ObjSectionToID) {
@@ -99,8 +135,8 @@ void RuntimeDyldMachO::makeValueAddendPCRel(RelocationValueRef &Value,
 void RuntimeDyldMachO::dumpRelocationToResolve(const RelocationEntry &RE,
                                                uint64_t Value) const {
   const SectionEntry &Section = Sections[RE.SectionID];
-  uint8_t *LocalAddress = Section.Address + RE.Offset;
-  uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+  uint8_t *LocalAddress = Section.getAddress() + RE.Offset;
+  uint64_t FinalAddress = Section.getLoadAddress() + RE.Offset;
 
   dbgs() << "resolveRelocation Section: " << RE.SectionID
          << " LocalAddress: " << format("%p", LocalAddress)
@@ -147,10 +183,9 @@ void RuntimeDyldMachO::populateIndirectSymbolPointersSection(
          "Pointers section does not contain a whole number of stubs?");
 
   DEBUG(dbgs() << "Populating pointer table section "
-               << Sections[PTSectionID].Name
-               << ", Section ID " << PTSectionID << ", "
-               << NumPTEntries << " entries, " << PTEntrySize
-               << " bytes each:\n");
+               << Sections[PTSectionID].getName() << ", Section ID "
+               << PTSectionID << ", " << NumPTEntries << " entries, "
+               << PTEntrySize << " bytes each:\n");
 
   for (unsigned i = 0; i < NumPTEntries; ++i) {
     unsigned SymbolIndex =
@@ -204,7 +239,7 @@ void RuntimeDyldMachOCRTPBase<Impl>::finalizeLoad(const ObjectFile &Obj,
 }
 
 template <typename Impl>
-unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
+unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(uint8_t *P,
                                                           int64_t DeltaForText,
                                                           int64_t DeltaForEH) {
   typedef typename Impl::TargetPtrT TargetPtrT;
@@ -213,7 +248,7 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
                << ", Delta for EH: " << DeltaForEH << "\n");
   uint32_t Length = readBytesUnaligned(P, 4);
   P += 4;
-  unsigned char *Ret = P + Length;
+  uint8_t *Ret = P + Length;
   uint32_t Offset = readBytesUnaligned(P, 4);
   if (Offset == 0) // is a CIE
     return Ret;
@@ -240,9 +275,9 @@ unsigned char *RuntimeDyldMachOCRTPBase<Impl>::processFDE(unsigned char *P,
 }
 
 static int64_t computeDelta(SectionEntry *A, SectionEntry *B) {
-  int64_t ObjDistance =
-    static_cast<int64_t>(A->ObjAddress) - static_cast<int64_t>(B->ObjAddress);
-  int64_t MemDistance = A->LoadAddress - B->LoadAddress;
+  int64_t ObjDistance = static_cast<int64_t>(A->getObjAddress()) -
+                        static_cast<int64_t>(B->getObjAddress());
+  int64_t MemDistance = A->getLoadAddress() - B->getLoadAddress();
   return ObjDistance - MemDistance;
 }
 
@@ -265,14 +300,14 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
     if (ExceptTab)
       DeltaForEH = computeDelta(ExceptTab, EHFrame);
 
-    unsigned char *P = EHFrame->Address;
-    unsigned char *End = P + EHFrame->Size;
+    uint8_t *P = EHFrame->getAddress();
+    uint8_t *End = P + EHFrame->getSize();
     do {
       P = processFDE(P, DeltaForText, DeltaForEH);
     } while (P != End);
 
-    MemMgr.registerEHFrames(EHFrame->Address, EHFrame->LoadAddress,
-                            EHFrame->Size);
+    MemMgr.registerEHFrames(EHFrame->getAddress(), EHFrame->getLoadAddress(),
+                            EHFrame->getSize());
   }
   UnregisteredEHFrameSections.clear();
 }
@@ -298,10 +333,7 @@ RuntimeDyldMachO::create(Triple::ArchType Arch,
 
 std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyldMachO::loadObject(const object::ObjectFile &O) {
-  unsigned SectionStartIdx, SectionEndIdx;
-  std::tie(SectionStartIdx, SectionEndIdx) = loadObjectImpl(O);
-  return llvm::make_unique<LoadedMachOObjectInfo>(*this, SectionStartIdx,
-                                                  SectionEndIdx);
+  return llvm::make_unique<LoadedMachOObjectInfo>(*this, loadObjectImpl(O));
 }
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 0d7364f78597..c8ae47b0db22 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -79,6 +79,12 @@ protected:
     return RelocationEntry(SectionID, Offset, RelType, 0, IsPCRel, Size);
   }
 
+  /// Process a scattered vanilla relocation.
+  relocation_iterator processScatteredVANILLA(
+                           unsigned SectionID, relocation_iterator RelI,
+                           const ObjectFile &BaseObjT,
+                           RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID);
+
   /// Construct a RelocationValueRef representing the relocation target.
   /// For Symbols in known sections, this will return a RelocationValueRef
   /// representing a (SectionID, Offset) pair.
@@ -140,7 +146,7 @@ private:
   Impl &impl() { return static_cast<Impl &>(*this); }
   const Impl &impl() const { return static_cast<const Impl &>(*this); }
 
-  unsigned char *processFDE(unsigned char *P, int64_t DeltaForText,
+  unsigned char *processFDE(uint8_t *P, int64_t DeltaForText,
                             int64_t DeltaForEH);
 
 public:
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
new file mode 100644
index 000000000000..fbfbb3285233
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -0,0 +1,201 @@
+//===--- RuntimeDyldCOFFI386.h --- COFF/X86_64 specific code ---*- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// COFF x86 support for MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFI386_H
+
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
+#include "../RuntimeDyldCOFF.h"
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF {
+public:
+  RuntimeDyldCOFFI386(RuntimeDyld::MemoryManager &MM,
+                      RuntimeDyld::SymbolResolver &Resolver)
+      : RuntimeDyldCOFF(MM, Resolver) {}
+
+  unsigned getMaxStubSize() override {
+    return 8; // 2-byte jmp instruction + 32-bit relative address + 2 byte pad
+  }
+
+  unsigned getStubAlignment() override { return 1; }
+
+  relocation_iterator processRelocationRef(unsigned SectionID,
+                                           relocation_iterator RelI,
+                                           const ObjectFile &Obj,
+                                           ObjSectionToIDMap &ObjSectionToID,
+                                           StubMap &Stubs) override {
+    auto Symbol = RelI->getSymbol();
+    if (Symbol == Obj.symbol_end())
+      report_fatal_error("Unknown symbol in relocation");
+
+    ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
+    if (auto EC = TargetNameOrErr.getError())
+      report_fatal_error(EC.message());
+    StringRef TargetName = *TargetNameOrErr;
+
+    auto Section = *Symbol->getSection();
+
+    uint64_t RelType = RelI->getType();
+    uint64_t Offset = RelI->getOffset();
+
+#if !defined(NDEBUG)
+    SmallString<32> RelTypeName;
+    RelI->getTypeName(RelTypeName);
+#endif
+    DEBUG(dbgs() << "\t\tIn Section " << SectionID << " Offset " << Offset
+                 << " RelType: " << RelTypeName << " TargetName: " << TargetName
+                 << "\n");
+
+    unsigned TargetSectionID = -1;
+    if (Section == Obj.section_end()) {
+      RelocationEntry RE(SectionID, Offset, RelType, 0, -1, 0, 0, 0, false, 0);
+      addRelocationForSymbol(RE, TargetName);
+    } else {
+      TargetSectionID =
+          findOrEmitSection(Obj, *Section, Section->isText(), ObjSectionToID);
+
+      switch (RelType) {
+      case COFF::IMAGE_REL_I386_ABSOLUTE:
+        // This relocation is ignored.
+        break;
+      case COFF::IMAGE_REL_I386_DIR32:
+      case COFF::IMAGE_REL_I386_DIR32NB:
+      case COFF::IMAGE_REL_I386_REL32: {
+        RelocationEntry RE =
+            RelocationEntry(SectionID, Offset, RelType, 0, TargetSectionID,
+                            getSymbolOffset(*Symbol), 0, 0, false, 0);
+        addRelocationForSection(RE, TargetSectionID);
+        break;
+      }
+      case COFF::IMAGE_REL_I386_SECTION: {
+        RelocationEntry RE =
+            RelocationEntry(TargetSectionID, Offset, RelType, 0);
+        addRelocationForSection(RE, TargetSectionID);
+        break;
+      }
+      case COFF::IMAGE_REL_I386_SECREL: {
+        RelocationEntry RE = RelocationEntry(SectionID, Offset, RelType,
+                                             getSymbolOffset(*Symbol));
+        addRelocationForSection(RE, TargetSectionID);
+        break;
+      }
+      default:
+        llvm_unreachable("unsupported relocation type");
+      }
+
+    }
+
+    return ++RelI;
+  }
+
+  void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
+    const auto Section = Sections[RE.SectionID];
+    uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
+
+    switch (RE.RelType) {
+    case COFF::IMAGE_REL_I386_ABSOLUTE:
+      // This relocation is ignored.
+      break;
+    case COFF::IMAGE_REL_I386_DIR32: {
+      // The target's 32-bit VA.
+      uint64_t Result =
+          RE.Sections.SectionA == static_cast<uint32_t>(-1)
+              ? Value
+              : Sections[RE.Sections.SectionA].getLoadAddressWithOffset(
+                    RE.Addend);
+      assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+             "relocation overflow");
+      assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+             "relocation underflow");
+      DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+                   << " RelType: IMAGE_REL_I386_DIR32"
+                   << " TargetSection: " << RE.Sections.SectionA
+                   << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+      writeBytesUnaligned(Result, Target, 4);
+      break;
+    }
+    case COFF::IMAGE_REL_I386_DIR32NB: {
+      // The target's 32-bit RVA.
+      // NOTE: use Section[0].getLoadAddress() as an approximation of ImageBase
+      uint64_t Result =
+          Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend) -
+          Sections[0].getLoadAddress();
+      assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+             "relocation overflow");
+      assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+             "relocation underflow");
+      DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+                   << " RelType: IMAGE_REL_I386_DIR32NB"
+                   << " TargetSection: " << RE.Sections.SectionA
+                   << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+      writeBytesUnaligned(Result, Target, 4);
+      break;
+    }
+    case COFF::IMAGE_REL_I386_REL32: {
+      // 32-bit relative displacement to the target.
+      uint64_t Result = Sections[RE.Sections.SectionA].getLoadAddress() -
+                        Section.getLoadAddress() + RE.Addend - 4 - RE.Offset;
+      assert(static_cast<int32_t>(Result) <= INT32_MAX &&
+             "relocation overflow");
+      assert(static_cast<int32_t>(Result) >= INT32_MIN &&
+             "relocation underflow");
+      DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+                   << " RelType: IMAGE_REL_I386_REL32"
+                   << " TargetSection: " << RE.Sections.SectionA
+                   << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+      writeBytesUnaligned(Result, Target, 4);
+      break;
+    }
+    case COFF::IMAGE_REL_I386_SECTION:
+      // 16-bit section index of the section that contains the target.
+      assert(static_cast<int32_t>(RE.SectionID) <= INT16_MAX &&
+             "relocation overflow");
+      assert(static_cast<int32_t>(RE.SectionID) >= INT16_MIN &&
+             "relocation underflow");
+      DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+                   << " RelType: IMAGE_REL_I386_SECTION Value: " << RE.SectionID
+                   << '\n');
+      writeBytesUnaligned(RE.SectionID, Target, 2);
+      break;
+    case COFF::IMAGE_REL_I386_SECREL:
+      // 32-bit offset of the target from the beginning of its section.
+      assert(static_cast<int32_t>(RE.Addend) <= INT32_MAX &&
+             "relocation overflow");
+      assert(static_cast<int32_t>(RE.Addend) >= INT32_MIN &&
+             "relocation underflow");
+      DEBUG(dbgs() << "\t\tOffset: " << RE.Offset
+                   << " RelType: IMAGE_REL_I386_SECREL Value: " << RE.Addend
+                   << '\n');
+      writeBytesUnaligned(RE.Addend, Target, 2);
+      break;
+    default:
+      llvm_unreachable("unsupported relocation type");
+    }
+  }
+
+  void registerEHFrames() override {}
+  void deregisterEHFrames() override {}
+
+  void finalizeLoad(const ObjectFile &Obj,
+                    ObjSectionToIDMap &SectionMap) override {}
+};
+
+}
+
+#endif
+
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index 408227eb0f21..25f538d8f3da 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -62,7 +62,7 @@ public:
   // symbol in the target address space.
   void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *Target = Section.Address + RE.Offset;
+    uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
 
     switch (RE.RelType) {
 
@@ -72,8 +72,7 @@ public:
     case COFF::IMAGE_REL_AMD64_REL32_3:
     case COFF::IMAGE_REL_AMD64_REL32_4:
     case COFF::IMAGE_REL_AMD64_REL32_5: {
-      uint32_t *TargetAddress = (uint32_t *)Target;
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       // Delta is the distance from the start of the reloc to the end of the
       // instruction with the reloc.
       uint64_t Delta = 4 + (RE.RelType - COFF::IMAGE_REL_AMD64_REL32);
@@ -81,7 +80,7 @@ public:
       uint64_t Result = Value + RE.Addend;
       assert(((int64_t)Result <= INT32_MAX) && "Relocation overflow");
       assert(((int64_t)Result >= INT32_MIN) && "Relocation underflow");
-      *TargetAddress = Result;
+      writeBytesUnaligned(Result, Target, 4);
       break;
     }
 
@@ -92,14 +91,12 @@ public:
       // within a 32 bit offset from the base.
       //
       // For now we just set these to zero.
-      uint32_t *TargetAddress = (uint32_t *)Target;
-      *TargetAddress = 0;
+      writeBytesUnaligned(0, Target, 4);
       break;
     }
 
     case COFF::IMAGE_REL_AMD64_ADDR64: {
-      uint64_t *TargetAddress = (uint64_t *)Target;
-      *TargetAddress = Value + RE.Addend;
+      writeBytesUnaligned(Value + RE.Addend, Target, 8);
       break;
     }
 
@@ -119,8 +116,7 @@ public:
     symbol_iterator Symbol = RelI->getSymbol();
     if (Symbol == Obj.symbol_end())
       report_fatal_error("Unknown symbol in relocation");
-    section_iterator SecI(Obj.section_end());
-    Symbol->getSection(SecI);
+    section_iterator SecI = *Symbol->getSection();
     // If there is no section, this must be an external reference.
     const bool IsExtern = SecI == Obj.section_end();
 
@@ -129,7 +125,7 @@ public:
     uint64_t Offset = RelI->getOffset();
     uint64_t Addend = 0;
     SectionEntry &Section = Sections[SectionID];
-    uintptr_t ObjTarget = Section.ObjAddress + Offset;
+    uintptr_t ObjTarget = Section.getObjAddress() + Offset;
 
     switch (RelType) {
 
@@ -140,14 +136,14 @@ public:
     case COFF::IMAGE_REL_AMD64_REL32_4:
     case COFF::IMAGE_REL_AMD64_REL32_5:
     case COFF::IMAGE_REL_AMD64_ADDR32NB: {
-      uint32_t *Displacement = (uint32_t *)ObjTarget;
-      Addend = *Displacement;
+      uint8_t *Displacement = (uint8_t *)ObjTarget;
+      Addend = readBytesUnaligned(Displacement, 4);
       break;
     }
 
     case COFF::IMAGE_REL_AMD64_ADDR64: {
-      uint64_t *Displacement = (uint64_t *)ObjTarget;
-      Addend = *Displacement;
+      uint8_t *Displacement = (uint8_t *)ObjTarget;
+      Addend = readBytesUnaligned(Displacement, 8);
       break;
     }
 
@@ -182,9 +178,9 @@ public:
   unsigned getStubAlignment() override { return 1; }
   void registerEHFrames() override {
     for (auto const &EHFrameSID : UnregisteredEHFrameSections) {
-      uint8_t *EHFrameAddr = Sections[EHFrameSID].Address;
-      uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress;
-      size_t EHFrameSize = Sections[EHFrameSID].Size;
+      uint8_t *EHFrameAddr = Sections[EHFrameSID].getAddress();
+      uint64_t EHFrameLoadAddr = Sections[EHFrameSID].getLoadAddress();
+      size_t EHFrameSize = Sections[EHFrameSID].getSize();
       MemMgr.registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize);
       RegisteredEHFrameSections.push_back(EHFrameSID);
     }
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 7bf764114bae..dbca37747ce8 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -34,7 +34,7 @@ public:
   /// Extract the addend encoded in the instruction / memory location.
   int64_t decodeAddend(const RelocationEntry &RE) const {
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
     unsigned NumBytes = 1 << RE.Size;
     int64_t Addend = 0;
     // Verify that the relocation has the correct size and alignment.
@@ -272,15 +272,14 @@ public:
 
     RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI));
     RE.Addend = decodeAddend(RE);
-    RelocationValueRef Value(
-        getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
 
     assert((ExplicitAddend == 0 || RE.Addend == 0) && "Relocation has "\
       "ARM64_RELOC_ADDEND and embedded addend in the instruction.");
-    if (ExplicitAddend) {
+    if (ExplicitAddend)
       RE.Addend = ExplicitAddend;
-      Value.Offset = ExplicitAddend;
-    }
+
+    RelocationValueRef Value(
+        getRelocationValueRef(Obj, RelI, RE, ObjSectionToID));
 
     bool IsExtern = Obj.getPlainRelocationExternal(RelInfo);
     if (!IsExtern && RE.IsPCRel)
@@ -305,7 +304,7 @@ public:
     DEBUG(dumpRelocationToResolve(RE, Value));
 
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
     MachO::RelocationInfoType RelType =
       static_cast<MachO::RelocationInfoType>(RE.RelType);
 
@@ -325,7 +324,7 @@ public:
     case MachO::ARM64_RELOC_BRANCH26: {
       assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_BRANCH26 not supported");
       // Check if branch is in range.
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       int64_t PCRelVal = Value - FinalAddress + RE.Addend;
       encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
       break;
@@ -334,7 +333,7 @@ public:
     case MachO::ARM64_RELOC_PAGE21: {
       assert(RE.IsPCRel && "not PCRel and ARM64_RELOC_PAGE21 not supported");
       // Adjust for PC-relative relocation and offset.
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       int64_t PCRelVal =
         ((Value + RE.Addend) & (-4096)) - (FinalAddress & (-4096));
       encodeAddend(LocalAddress, /*Size=*/4, RelType, PCRelVal);
@@ -376,10 +375,10 @@ private:
     else {
       // FIXME: There must be a better way to do this then to check and fix the
       // alignment every time!!!
-      uintptr_t BaseAddress = uintptr_t(Section.Address);
+      uintptr_t BaseAddress = uintptr_t(Section.getAddress());
       uintptr_t StubAlignment = getStubAlignment();
       uintptr_t StubAddress =
-          (BaseAddress + Section.StubOffset + StubAlignment - 1) &
+          (BaseAddress + Section.getStubOffset() + StubAlignment - 1) &
           -StubAlignment;
       unsigned StubOffset = StubAddress - BaseAddress;
       Stubs[Value] = StubOffset;
@@ -392,7 +391,7 @@ private:
         addRelocationForSymbol(GOTRE, Value.SymbolName);
       else
         addRelocationForSection(GOTRE, Value.SectionID);
-      Section.StubOffset = StubOffset + getMaxStubSize();
+      Section.advanceStubOffset(getMaxStubSize());
       Offset = static_cast<int64_t>(StubOffset);
     }
     RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, Offset,
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 0a24bb2f5eae..7731df09bd21 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -35,7 +35,7 @@ public:
 
   int64_t decodeAddend(const RelocationEntry &RE) const {
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
 
     switch (RE.RelType) {
       default:
@@ -64,8 +64,10 @@ public:
       if (RelType == MachO::ARM_RELOC_HALF_SECTDIFF)
         return processHALFSECTDIFFRelocation(SectionID, RelI, Obj,
                                              ObjSectionToID);
+      else if (RelType == MachO::GENERIC_RELOC_VANILLA)
+        return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
       else
-        return ++++RelI;
+        return ++RelI;
     }
 
     RelocationEntry RE(getRelocationEntry(SectionID, Obj, RelI));
@@ -92,12 +94,12 @@ public:
   void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
     DEBUG(dumpRelocationToResolve(RE, Value));
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
 
     // If the relocation is PC-relative, the value to be encoded is the
     // pointer difference.
     if (RE.IsPCRel) {
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       Value -= FinalAddress;
       // ARM PCRel relocations have an effective-PC offset of two instructions
       // (four bytes in Thumb mode, 8 bytes in ARM mode).
@@ -130,8 +132,8 @@ public:
       break;
     }
     case MachO::ARM_RELOC_HALF_SECTDIFF: {
-      uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
-      uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+      uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+      uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
       assert((Value == SectionABase || Value == SectionBBase) &&
              "Unexpected HALFSECTDIFF relocation value.");
       Value = SectionABase - SectionBBase + RE.Addend;
@@ -178,21 +180,21 @@ private:
     RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
     uint8_t *Addr;
     if (i != Stubs.end()) {
-      Addr = Section.Address + i->second;
+      Addr = Section.getAddressWithOffset(i->second);
     } else {
       // Create a new stub function.
-      Stubs[Value] = Section.StubOffset;
-      uint8_t *StubTargetAddr =
-          createStubFunction(Section.Address + Section.StubOffset);
-      RelocationEntry StubRE(RE.SectionID, StubTargetAddr - Section.Address,
-                             MachO::GENERIC_RELOC_VANILLA, Value.Offset, false,
-                             2);
+      Stubs[Value] = Section.getStubOffset();
+      uint8_t *StubTargetAddr = createStubFunction(
+          Section.getAddressWithOffset(Section.getStubOffset()));
+      RelocationEntry StubRE(
+          RE.SectionID, StubTargetAddr - Section.getAddress(),
+          MachO::GENERIC_RELOC_VANILLA, Value.Offset, false, 2);
       if (Value.SymbolName)
         addRelocationForSymbol(StubRE, Value.SymbolName);
       else
         addRelocationForSection(StubRE, Value.SectionID);
-      Addr = Section.Address + Section.StubOffset;
-      Section.StubOffset += getMaxStubSize();
+      Addr = Section.getAddressWithOffset(Section.getStubOffset());
+      Section.advanceStubOffset(getMaxStubSize());
     }
     RelocationEntry TargetRE(RE.SectionID, RE.Offset, RE.RelType, 0,
                              RE.IsPCRel, RE.Size);
@@ -221,7 +223,7 @@ private:
     uint32_t RelocType = MachO.getAnyRelocationType(RE);
     bool IsPCRel = MachO.getAnyRelocationPCRel(RE);
     uint64_t Offset = RelI->getOffset();
-    uint8_t *LocalAddress = Section.Address + Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
     int64_t Immediate = readBytesUnaligned(LocalAddress, 4); // Copy the whole instruction out.
     Immediate = ((Immediate >> 4) & 0xf000) | (Immediate & 0xfff);
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 569a078d7f3d..85059d70a3eb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -47,8 +47,7 @@ public:
         return processSECTDIFFRelocation(SectionID, RelI, Obj,
                                          ObjSectionToID);
       else if (RelType == MachO::GENERIC_RELOC_VANILLA)
-        return processI386ScatteredVANILLA(SectionID, RelI, Obj,
-                                           ObjSectionToID);
+        return processScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
       llvm_unreachable("Unhandled scattered relocation.");
     }
 
@@ -84,10 +83,10 @@ public:
     DEBUG(dumpRelocationToResolve(RE, Value));
 
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
 
     if (RE.IsPCRel) {
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
     }
 
@@ -99,8 +98,8 @@ public:
       break;
     case MachO::GENERIC_RELOC_SECTDIFF:
     case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
-      uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
-      uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+      uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+      uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
       assert((Value == SectionABase || Value == SectionBBase) &&
              "Unexpected SECTDIFF relocation value.");
       Value = SectionABase - SectionBBase + RE.Addend;
@@ -139,7 +138,7 @@ private:
     bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
     unsigned Size = Obj.getAnyRelocationLength(RE);
     uint64_t Offset = RelI->getOffset();
-    uint8_t *LocalAddress = Section.Address + Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(Offset);
     unsigned NumBytes = 1 << Size;
     uint64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
 
@@ -183,41 +182,6 @@ private:
     return ++RelI;
   }
 
-  relocation_iterator processI386ScatteredVANILLA(
-      unsigned SectionID, relocation_iterator RelI,
-      const ObjectFile &BaseObjT,
-      RuntimeDyldMachO::ObjSectionToIDMap &ObjSectionToID) {
-    const MachOObjectFile &Obj =
-        static_cast<const MachOObjectFile&>(BaseObjT);
-    MachO::any_relocation_info RE =
-        Obj.getRelocation(RelI->getRawDataRefImpl());
-
-    SectionEntry &Section = Sections[SectionID];
-    uint32_t RelocType = Obj.getAnyRelocationType(RE);
-    bool IsPCRel = Obj.getAnyRelocationPCRel(RE);
-    unsigned Size = Obj.getAnyRelocationLength(RE);
-    uint64_t Offset = RelI->getOffset();
-    uint8_t *LocalAddress = Section.Address + Offset;
-    unsigned NumBytes = 1 << Size;
-    int64_t Addend = readBytesUnaligned(LocalAddress, NumBytes);
-
-    unsigned SymbolBaseAddr = Obj.getScatteredRelocationValue(RE);
-    section_iterator TargetSI = getSectionByAddress(Obj, SymbolBaseAddr);
-    assert(TargetSI != Obj.section_end() && "Can't find section for symbol");
-    uint64_t SectionBaseAddr = TargetSI->getAddress();
-    SectionRef TargetSection = *TargetSI;
-    bool IsCode = TargetSection.isText();
-    uint32_t TargetSectionID =
-        findOrEmitSection(Obj, TargetSection, IsCode, ObjSectionToID);
-
-    Addend -= SectionBaseAddr;
-    RelocationEntry R(SectionID, Offset, RelocType, Addend, IsPCRel, Size);
-
-    addRelocationForSection(R, TargetSectionID);
-
-    return ++RelI;
-  }
-
   // Populate stubs in __jump_table section.
   void populateJumpTable(const MachOObjectFile &Obj, const SectionRef &JTSection,
                          unsigned JTSectionID) {
diff --git a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index dd56e72f9144..2242295bc1ee 100644
--- a/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -39,6 +39,10 @@ public:
       static_cast<const MachOObjectFile &>(BaseObjT);
     MachO::any_relocation_info RelInfo =
         Obj.getRelocation(RelI->getRawDataRefImpl());
+    uint32_t RelType = Obj.getAnyRelocationType(RelInfo);
+
+    if (RelType == MachO::X86_64_RELOC_SUBTRACTOR)
+      return processSubtractRelocation(SectionID, RelI, Obj, ObjSectionToID);
 
     assert(!Obj.isRelocationScattered(RelInfo) &&
            "Scattered relocations not supported on X86_64");
@@ -69,14 +73,14 @@ public:
   void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
     DEBUG(dumpRelocationToResolve(RE, Value));
     const SectionEntry &Section = Sections[RE.SectionID];
-    uint8_t *LocalAddress = Section.Address + RE.Offset;
+    uint8_t *LocalAddress = Section.getAddressWithOffset(RE.Offset);
 
     // If the relocation is PC-relative, the value to be encoded is the
     // pointer difference.
     if (RE.IsPCRel) {
       // FIXME: It seems this value needs to be adjusted by 4 for an effective
       // PC address. Is that expected? Only for branches, perhaps?
-      uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+      uint64_t FinalAddress = Section.getLoadAddressWithOffset(RE.Offset);
       Value -= FinalAddress + 4;
     }
 
@@ -91,9 +95,17 @@ public:
     case MachO::X86_64_RELOC_BRANCH:
       writeBytesUnaligned(Value + RE.Addend, LocalAddress, 1 << RE.Size);
       break;
+    case MachO::X86_64_RELOC_SUBTRACTOR: {
+      uint64_t SectionABase = Sections[RE.Sections.SectionA].getLoadAddress();
+      uint64_t SectionBBase = Sections[RE.Sections.SectionB].getLoadAddress();
+      assert((Value == SectionABase || Value == SectionBBase) &&
+             "Unexpected SUBTRACTOR relocation value.");
+      Value = SectionABase - SectionBBase + RE.Addend;
+      writeBytesUnaligned(Value, LocalAddress, 1 << RE.Size);
+      break;
+    }
     case MachO::X86_64_RELOC_GOT_LOAD:
     case MachO::X86_64_RELOC_GOT:
-    case MachO::X86_64_RELOC_SUBTRACTOR:
     case MachO::X86_64_RELOC_TLV:
       Error("Relocation type not implemented yet!");
     }
@@ -112,24 +124,65 @@ private:
     RuntimeDyldMachO::StubMap::const_iterator i = Stubs.find(Value);
     uint8_t *Addr;
     if (i != Stubs.end()) {
-      Addr = Section.Address + i->second;
+      Addr = Section.getAddressWithOffset(i->second);
     } else {
-      Stubs[Value] = Section.StubOffset;
-      uint8_t *GOTEntry = Section.Address + Section.StubOffset;
-      RelocationEntry GOTRE(RE.SectionID, Section.StubOffset,
+      Stubs[Value] = Section.getStubOffset();
+      uint8_t *GOTEntry = Section.getAddressWithOffset(Section.getStubOffset());
+      RelocationEntry GOTRE(RE.SectionID, Section.getStubOffset(),
                             MachO::X86_64_RELOC_UNSIGNED, Value.Offset, false,
                             3);
       if (Value.SymbolName)
         addRelocationForSymbol(GOTRE, Value.SymbolName);
       else
         addRelocationForSection(GOTRE, Value.SectionID);
-      Section.StubOffset += 8;
+      Section.advanceStubOffset(8);
       Addr = GOTEntry;
     }
     RelocationEntry TargetRE(RE.SectionID, RE.Offset,
                              MachO::X86_64_RELOC_UNSIGNED, RE.Addend, true, 2);
     resolveRelocation(TargetRE, (uint64_t)Addr);
   }
+
+  relocation_iterator
+  processSubtractRelocation(unsigned SectionID, relocation_iterator RelI,
+                            const ObjectFile &BaseObjT,
+                            ObjSectionToIDMap &ObjSectionToID) {
+    const MachOObjectFile &Obj =
+        static_cast<const MachOObjectFile&>(BaseObjT);
+    MachO::any_relocation_info RE =
+        Obj.getRelocation(RelI->getRawDataRefImpl());
+
+    unsigned Size = Obj.getAnyRelocationLength(RE);
+    uint64_t Offset = RelI->getOffset();
+    uint8_t *LocalAddress = Sections[SectionID].getAddressWithOffset(Offset);
+    unsigned NumBytes = 1 << Size;
+
+    ErrorOr<StringRef> SubtrahendNameOrErr = RelI->getSymbol()->getName();
+    if (auto EC = SubtrahendNameOrErr.getError())
+      report_fatal_error(EC.message());
+    auto SubtrahendI = GlobalSymbolTable.find(*SubtrahendNameOrErr);
+    unsigned SectionBID = SubtrahendI->second.getSectionID();
+    uint64_t SectionBOffset = SubtrahendI->second.getOffset();
+    int64_t Addend =
+      SignExtend64(readBytesUnaligned(LocalAddress, NumBytes), NumBytes * 8);
+
+    ++RelI;
+    ErrorOr<StringRef> MinuendNameOrErr = RelI->getSymbol()->getName();
+    if (auto EC = MinuendNameOrErr.getError())
+      report_fatal_error(EC.message());
+    auto MinuendI = GlobalSymbolTable.find(*MinuendNameOrErr);
+    unsigned SectionAID = MinuendI->second.getSectionID();
+    uint64_t SectionAOffset = MinuendI->second.getOffset();
+
+    RelocationEntry R(SectionID, Offset, MachO::X86_64_RELOC_SUBTRACTOR, (uint64_t)Addend,
+                      SectionAID, SectionAOffset, SectionBID, SectionBOffset,
+                      false, Size);
+
+    addRelocationForSection(R, SectionAID);
+
+    return ++RelI;
+  }
+
 };
 }
 
diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp
index 59860844e939..e2f220862cf7 100644
--- a/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Process.h"
 
 namespace llvm {
 
@@ -48,16 +49,27 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
 
   // Look in the list of free memory regions and use a block there if one
   // is available.
-  for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) {
-    sys::MemoryBlock &MB = MemGroup.FreeMem[i];
-    if (MB.size() >= RequiredSize) {
-      Addr = (uintptr_t)MB.base();
-      uintptr_t EndOfBlock = Addr + MB.size();
+  for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
+    if (FreeMB.Free.size() >= RequiredSize) {
+      Addr = (uintptr_t)FreeMB.Free.base();
+      uintptr_t EndOfBlock = Addr + FreeMB.Free.size();
       // Align the address.
       Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
-      // Store cutted free memory block.
-      MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size),
-                                             EndOfBlock - Addr - Size);
+
+      if (FreeMB.PendingPrefixIndex == (unsigned)-1) {
+        // The part of the block we're giving out to the user is now pending
+        MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size));
+
+        // Remember this pending block, such that future allocations can just
+        // modify it rather than creating a new one
+        FreeMB.PendingPrefixIndex = MemGroup.PendingMem.size() - 1;
+      } else {
+        sys::MemoryBlock &PendingMB = MemGroup.PendingMem[FreeMB.PendingPrefixIndex];
+        PendingMB = sys::MemoryBlock(PendingMB.base(), Addr + Size - (uintptr_t)PendingMB.base());
+      }
+
+      // Remember how much free space is now left in this block
+      FreeMB.Free = sys::MemoryBlock((void *)(Addr + Size), EndOfBlock - Addr - Size);
       return (uint8_t*)Addr;
     }
   }
@@ -85,6 +97,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
   // Save this address as the basis for our next request
   MemGroup.Near = MB;
 
+  // Remember that we allocated this memory
   MemGroup.AllocatedMem.push_back(MB);
   Addr = (uintptr_t)MB.base();
   uintptr_t EndOfBlock = Addr + MB.size();
@@ -92,11 +105,18 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
   // Align the address.
   Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
 
+  // The part of the block we're giving out to the user is now pending
+  MemGroup.PendingMem.push_back(sys::MemoryBlock((void *)Addr, Size));
+
   // The allocateMappedMemory may allocate much more memory than we need. In
   // this case, we store the unused memory as a free memory block.
   unsigned FreeSize = EndOfBlock-Addr-Size;
-  if (FreeSize > 16)
-    MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize));
+  if (FreeSize > 16) {
+    FreeMemBlock FreeMB;
+    FreeMB.Free = sys::MemoryBlock((void*)(Addr + Size), FreeSize);
+    FreeMB.PendingPrefixIndex = (unsigned)-1;
+    MemGroup.FreeMem.push_back(FreeMB);
+  }
 
   // Return aligned address
   return (uint8_t*)Addr;
@@ -107,9 +127,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
   // FIXME: Should in-progress permissions be reverted if an error occurs?
   std::error_code ec;
 
-  // Don't allow free memory blocks to be used after setting protection flags.
-  CodeMem.FreeMem.clear();
-
   // Make code memory executable.
   ec = applyMemoryGroupPermissions(CodeMem,
                                    sys::Memory::MF_READ | sys::Memory::MF_EXEC);
@@ -143,36 +160,62 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
   return false;
 }
 
+static sys::MemoryBlock trimBlockToPageSize(sys::MemoryBlock M) {
+  static const size_t PageSize = sys::Process::getPageSize();
+
+  size_t StartOverlap =
+      (PageSize - ((uintptr_t)M.base() % PageSize)) % PageSize;
+
+  size_t TrimmedSize = M.size();
+  TrimmedSize -= StartOverlap;
+  TrimmedSize -= TrimmedSize % PageSize;
+
+  sys::MemoryBlock Trimmed((void *)((uintptr_t)M.base() + StartOverlap), TrimmedSize);
+
+  assert(((uintptr_t)Trimmed.base() % PageSize) == 0);
+  assert((Trimmed.size() % PageSize) == 0);
+  assert(M.base() <= Trimmed.base() && Trimmed.size() <= M.size());
+
+  return Trimmed;
+}
+
+
 std::error_code
 SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
                                                   unsigned Permissions) {
+  for (sys::MemoryBlock &MB : MemGroup.PendingMem)
+    if (std::error_code EC = sys::Memory::protectMappedMemory(MB, Permissions))
+      return EC;
 
-  for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) {
-    std::error_code ec;
-    ec =
-        sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], Permissions);
-    if (ec) {
-      return ec;
-    }
+  MemGroup.PendingMem.clear();
+
+  // Now go through free blocks and trim any of them that don't span the entire
+  // page because one of the pending blocks may have overlapped it.
+  for (FreeMemBlock &FreeMB : MemGroup.FreeMem) {
+    FreeMB.Free = trimBlockToPageSize(FreeMB.Free);
+    // We cleared the PendingMem list, so all these pointers are now invalid
+    FreeMB.PendingPrefixIndex = (unsigned)-1;
   }
 
+  // Remove all blocks which are now empty
+  MemGroup.FreeMem.erase(
+      std::remove_if(MemGroup.FreeMem.begin(), MemGroup.FreeMem.end(),
+                     [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
+      MemGroup.FreeMem.end());
+
   return std::error_code();
 }
 
 void SectionMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(),
-                                            CodeMem.AllocatedMem[i].size());
+  for (sys::MemoryBlock &Block : CodeMem.PendingMem)
+    sys::Memory::InvalidateInstructionCache(Block.base(), Block.size());
 }
 
 SectionMemoryManager::~SectionMemoryManager() {
-  for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
-    sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
-  for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i)
-    sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]);
-  for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i)
-    sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]);
+  for (MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) {
+    for (sys::MemoryBlock &Block : Group->AllocatedMem)
+      sys::Memory::releaseMappedMemory(Block);
+  }
 }
 
 } // namespace llvm
-
diff --git a/lib/Fuzzer/CMakeLists.txt b/lib/Fuzzer/CMakeLists.txt
index 8b4d61905d00..d4d85041d218 100644
--- a/lib/Fuzzer/CMakeLists.txt
+++ b/lib/Fuzzer/CMakeLists.txt
@@ -17,10 +17,16 @@ if( LLVM_USE_SANITIZE_COVERAGE )
   add_library(LLVMFuzzerNoMain STATIC
     $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
     )
+  if( HAVE_LIBPTHREAD )
+    target_link_libraries(LLVMFuzzerNoMain pthread)
+  endif()
   add_library(LLVMFuzzer STATIC
     FuzzerMain.cpp
     $<TARGET_OBJECTS:LLVMFuzzerNoMainObjects>
     )
+  if( HAVE_LIBPTHREAD )
+    target_link_libraries(LLVMFuzzer pthread)
+  endif()
 
   if( LLVM_INCLUDE_TESTS )
     add_subdirectory(test)
diff --git a/lib/Fuzzer/FuzzerCrossOver.cpp b/lib/Fuzzer/FuzzerCrossOver.cpp
index d93ce5cf4fb8..5203deaf9128 100644
--- a/lib/Fuzzer/FuzzerCrossOver.cpp
+++ b/lib/Fuzzer/FuzzerCrossOver.cpp
@@ -16,11 +16,11 @@
 namespace fuzzer {
 
 // Cross Data1 and Data2, store the result (up to MaxOutSize bytes) in Out.
-size_t CrossOver(const uint8_t *Data1, size_t Size1,
-                 const uint8_t *Data2, size_t Size2,
-                 uint8_t *Out, size_t MaxOutSize) {
+size_t MutationDispatcher::CrossOver(const uint8_t *Data1, size_t Size1,
+                                     const uint8_t *Data2, size_t Size2,
+                                     uint8_t *Out, size_t MaxOutSize) {
   assert(Size1 || Size2);
-  MaxOutSize = rand() % MaxOutSize + 1;
+  MaxOutSize = Rand(MaxOutSize) + 1;
   size_t OutPos = 0;
   size_t Pos1 = 0;
   size_t Pos2 = 0;
@@ -34,7 +34,7 @@ size_t CrossOver(const uint8_t *Data1, size_t Size1,
     if (*InPos < InSize) {
       size_t InSizeLeft = InSize - *InPos;
       size_t MaxExtraSize = std::min(OutSizeLeft, InSizeLeft);
-      size_t ExtraSize = rand() % MaxExtraSize + 1;
+      size_t ExtraSize = Rand(MaxExtraSize) + 1;
       memcpy(Out + OutPos, Data + *InPos, ExtraSize);
       OutPos += ExtraSize;
       (*InPos) += ExtraSize;
diff --git a/lib/Fuzzer/FuzzerDFSan.h b/lib/Fuzzer/FuzzerDFSan.h
new file mode 100644
index 000000000000..eb206ec61ce8
--- /dev/null
+++ b/lib/Fuzzer/FuzzerDFSan.h
@@ -0,0 +1,61 @@
+//===- FuzzerDFSan.h - Internal header for the Fuzzer -----------*- C++ -* ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// DFSan interface.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_DFSAN_H
+#define LLVM_FUZZER_DFSAN_H
+
+#define LLVM_FUZZER_SUPPORTS_DFSAN 0
+#if defined(__has_include)
+# if __has_include(<sanitizer/dfsan_interface.h>)
+#  if defined (__linux__)
+#   undef LLVM_FUZZER_SUPPORTS_DFSAN
+#   define LLVM_FUZZER_SUPPORTS_DFSAN 1
+#   include <sanitizer/dfsan_interface.h>
+#  endif  // __linux__
+# endif
+#endif  // defined(__has_include)
+
+#if LLVM_FUZZER_SUPPORTS_DFSAN
+
+extern "C" {
+__attribute__((weak))
+dfsan_label dfsan_create_label(const char *desc, void *userdata);
+__attribute__((weak))
+void dfsan_set_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+void dfsan_add_label(dfsan_label label, void *addr, size_t size);
+__attribute__((weak))
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
+__attribute__((weak))
+dfsan_label dfsan_read_label(const void *addr, size_t size);
+}  // extern "C"
+
+namespace fuzzer {
+static bool ReallyHaveDFSan() {
+  return &dfsan_create_label != nullptr;
+}
+}  // namespace fuzzer
+#else
+// When compiling with a compiler which does not support dfsan,
+// this code is still expected to build (but not necessary work).
+typedef unsigned short dfsan_label;
+struct dfsan_label_info {
+  dfsan_label l1, l2;
+  const char *desc;
+  void *userdata;
+};
+namespace fuzzer {
+static bool ReallyHaveDFSan() { return false; }
+}  // namespace fuzzer
+
+#endif
+
+#endif // LLVM_FUZZER_DFSAN_H
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index 0ee08e1d1624..dc5f8babbfe6 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -32,35 +32,42 @@ struct FlagDescription {
   int   Default;
   int   *IntFlag;
   const char **StrFlag;
+  unsigned int *UIntFlag;
 };
 
 struct {
 #define FUZZER_FLAG_INT(Name, Default, Description) int Name;
+#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name;
 #define FUZZER_FLAG_STRING(Name, Description) const char *Name;
 #include "FuzzerFlags.def"
 #undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_UNSIGNED
 #undef FUZZER_FLAG_STRING
 } Flags;
 
-static FlagDescription FlagDescriptions [] {
+static const FlagDescription FlagDescriptions [] {
 #define FUZZER_FLAG_INT(Name, Default, Description)                            \
-  { #Name, Description, Default, &Flags.Name, nullptr},
+  {#Name, Description, Default, &Flags.Name, nullptr, nullptr},
+#define FUZZER_FLAG_UNSIGNED(Name, Default, Description)                       \
+  {#Name,   Description, static_cast<int>(Default),                            \
+   nullptr, nullptr, &Flags.Name},
 #define FUZZER_FLAG_STRING(Name, Description)                                  \
-  { #Name, Description, 0, nullptr, &Flags.Name },
+  {#Name, Description, 0, nullptr, &Flags.Name, nullptr},
 #include "FuzzerFlags.def"
 #undef FUZZER_FLAG_INT
+#undef FUZZER_FLAG_UNSIGNED
 #undef FUZZER_FLAG_STRING
 };
 
 static const size_t kNumFlags =
     sizeof(FlagDescriptions) / sizeof(FlagDescriptions[0]);
 
-static std::vector<std::string> inputs;
-static const char *ProgName;
+static std::vector<std::string> *Inputs;
+static std::string *ProgName;
 
 static void PrintHelp() {
   Printf("Usage: %s [-flag1=val1 [-flag2=val2 ...] ] [dir1 [dir2 ...] ]\n",
-         ProgName);
+         ProgName->c_str());
   Printf("\nFlags: (strictly in form -flag=value)\n");
   size_t MaxFlagLen = 0;
   for (size_t F = 0; F < kNumFlags; F++)
@@ -106,6 +113,12 @@ static bool ParseOneFlag(const char *Param) {
         if (Flags.verbosity >= 2)
           Printf("Flag: %s %d\n", Name, Val);;
         return true;
+      } else if (FlagDescriptions[F].UIntFlag) {
+        unsigned int Val = std::stoul(Str);
+        *FlagDescriptions[F].UIntFlag = Val;
+        if (Flags.verbosity >= 2)
+          Printf("Flag: %s %u\n", Name, Val);
+        return true;
       } else if (FlagDescriptions[F].StrFlag) {
         *FlagDescriptions[F].StrFlag = Str;
         if (Flags.verbosity >= 2)
@@ -119,16 +132,20 @@ static bool ParseOneFlag(const char *Param) {
 }
 
 // We don't use any library to minimize dependencies.
-static void ParseFlags(int argc, char **argv) {
+static void ParseFlags(const std::vector<std::string> &Args) {
   for (size_t F = 0; F < kNumFlags; F++) {
     if (FlagDescriptions[F].IntFlag)
       *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default;
+    if (FlagDescriptions[F].UIntFlag)
+      *FlagDescriptions[F].UIntFlag =
+          static_cast<unsigned int>(FlagDescriptions[F].Default);
     if (FlagDescriptions[F].StrFlag)
       *FlagDescriptions[F].StrFlag = nullptr;
   }
-  for (int A = 1; A < argc; A++) {
-    if (ParseOneFlag(argv[A])) continue;
-    inputs.push_back(argv[A]);
+  Inputs = new std::vector<std::string>;
+  for (size_t A = 1; A < Args.size(); A++) {
+    if (ParseOneFlag(Args[A].c_str())) continue;
+    Inputs->push_back(Args[A]);
   }
 }
 
@@ -151,7 +168,7 @@ static void WorkerThread(const std::string &Cmd, std::atomic<int> *Counter,
     std::string ToRun = Cmd + " > " + Log + " 2>&1\n";
     if (Flags.verbosity)
       Printf("%s", ToRun.c_str());
-    int ExitCode = system(ToRun.c_str());
+    int ExitCode = ExecuteCommand(ToRun.c_str());
     if (ExitCode != 0)
       *HasErrors = true;
     std::lock_guard<std::mutex> Lock(Mu);
@@ -161,15 +178,15 @@ static void WorkerThread(const std::string &Cmd, std::atomic<int> *Counter,
   }
 }
 
-static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
-                                  int NumJobs) {
+static int RunInMultipleProcesses(const std::vector<std::string> &Args,
+                                  int NumWorkers, int NumJobs) {
   std::atomic<int> Counter(0);
   std::atomic<bool> HasErrors(false);
   std::string Cmd;
-  for (int i = 0; i < argc; i++) {
-    if (FlagValue(argv[i], "jobs") || FlagValue(argv[i], "workers")) continue;
-    Cmd += argv[i];
-    Cmd += " ";
+  for (auto &S : Args) {
+    if (FlagValue(S.c_str(), "jobs") || FlagValue(S.c_str(), "workers"))
+      continue;
+    Cmd += S + " ";
   }
   std::vector<std::thread> V;
   std::thread Pulse(PulseThread);
@@ -181,36 +198,37 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers,
   return HasErrors ? 1 : 0;
 }
 
-std::vector<std::string> ReadTokensFile(const char *TokensFilePath) {
-  if (!TokensFilePath) return {};
-  std::string TokensFileContents = FileToString(TokensFilePath);
-  std::istringstream ISS(TokensFileContents);
-  std::vector<std::string> Res = {std::istream_iterator<std::string>{ISS},
-                                  std::istream_iterator<std::string>{}};
-  Res.push_back(" ");
-  Res.push_back("\t");
-  Res.push_back("\n");
-  return Res;
-}
-
-int ApplyTokens(const Fuzzer &F, const char *InputFilePath) {
+int RunOneTest(Fuzzer *F, const char *InputFilePath) {
   Unit U = FileToVector(InputFilePath);
-  auto T = F.SubstituteTokens(U);
-  T.push_back(0);
-  Printf("%s", T.data());
+  Unit PreciseSizedU(U);
+  assert(PreciseSizedU.size() == PreciseSizedU.capacity());
+  F->ExecuteCallback(PreciseSizedU);
   return 0;
 }
 
 int FuzzerDriver(int argc, char **argv, UserCallback Callback) {
-  SimpleUserSuppliedFuzzer SUSF(Callback);
+  FuzzerRandomLibc Rand(0);
+  SimpleUserSuppliedFuzzer SUSF(&Rand, Callback);
   return FuzzerDriver(argc, argv, SUSF);
 }
 
 int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) {
-  using namespace fuzzer;
+  std::vector<std::string> Args(argv, argv + argc);
+  return FuzzerDriver(Args, USF);
+}
 
-  ProgName = argv[0];
-  ParseFlags(argc, argv);
+int FuzzerDriver(const std::vector<std::string> &Args, UserCallback Callback) {
+  FuzzerRandomLibc Rand(0);
+  SimpleUserSuppliedFuzzer SUSF(&Rand, Callback);
+  return FuzzerDriver(Args, SUSF);
+}
+
+int FuzzerDriver(const std::vector<std::string> &Args,
+                 UserSuppliedFuzzer &USF) {
+  using namespace fuzzer;
+  assert(!Args.empty());
+  ProgName = new std::string(Args[0]);
+  ParseFlags(Args);
   if (Flags.help) {
     PrintHelp();
     return 0;
@@ -223,33 +241,70 @@ int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) {
   }
 
   if (Flags.workers > 0 && Flags.jobs > 0)
-    return RunInMultipleProcesses(argc, argv, Flags.workers, Flags.jobs);
+    return RunInMultipleProcesses(Args, Flags.workers, Flags.jobs);
 
   Fuzzer::FuzzingOptions Options;
   Options.Verbosity = Flags.verbosity;
   Options.MaxLen = Flags.max_len;
   Options.UnitTimeoutSec = Flags.timeout;
+  Options.MaxTotalTimeSec = Flags.max_total_time;
   Options.DoCrossOver = Flags.cross_over;
   Options.MutateDepth = Flags.mutate_depth;
   Options.ExitOnFirst = Flags.exit_on_first;
   Options.UseCounters = Flags.use_counters;
+  Options.UseIndirCalls = Flags.use_indir_calls;
   Options.UseTraces = Flags.use_traces;
-  Options.UseFullCoverageSet = Flags.use_full_coverage_set;
+  Options.ShuffleAtStartUp = Flags.shuffle;
   Options.PreferSmallDuringInitialShuffle =
       Flags.prefer_small_during_initial_shuffle;
-  Options.Tokens = ReadTokensFile(Flags.tokens);
   Options.Reload = Flags.reload;
+  Options.OnlyASCII = Flags.only_ascii;
+  Options.TBMDepth = Flags.tbm_depth;
+  Options.TBMWidth = Flags.tbm_width;
+  Options.OutputCSV = Flags.output_csv;
   if (Flags.runs >= 0)
     Options.MaxNumberOfRuns = Flags.runs;
-  if (!inputs.empty())
-    Options.OutputCorpus = inputs[0];
+  if (!Inputs->empty())
+    Options.OutputCorpus = (*Inputs)[0];
   if (Flags.sync_command)
     Options.SyncCommand = Flags.sync_command;
   Options.SyncTimeout = Flags.sync_timeout;
+  Options.ReportSlowUnits = Flags.report_slow_units;
+  if (Flags.artifact_prefix)
+    Options.ArtifactPrefix = Flags.artifact_prefix;
+  if (Flags.exact_artifact_path)
+    Options.ExactArtifactPath = Flags.exact_artifact_path;
+  std::vector<Unit> Dictionary;
+  if (Flags.dict)
+    if (!ParseDictionaryFile(FileToString(Flags.dict), &Dictionary))
+      return 1;
+  if (Flags.verbosity > 0 && !Dictionary.empty())
+    Printf("Dictionary: %zd entries\n", Dictionary.size());
+  Options.SaveArtifacts = !Flags.test_single_input;
+
   Fuzzer F(USF, Options);
 
-  if (Flags.apply_tokens)
-    return ApplyTokens(F, Flags.apply_tokens);
+  for (auto &U: Dictionary)
+    USF.GetMD().AddWordToDictionary(U.data(), U.size());
+
+  // Timer
+  if (Flags.timeout > 0)
+    SetTimer(Flags.timeout / 2 + 1);
+
+  if (Flags.test_single_input) {
+    RunOneTest(&F, Flags.test_single_input);
+    exit(0);
+  }
+
+  if (Flags.save_minimized_corpus) {
+    Printf("The flag -save_minimized_corpus is deprecated; use -merge=1\n");
+    exit(1);
+  }
+
+  if (Flags.merge) {
+    F.Merge(*Inputs);
+    exit(0);
+  }
 
   unsigned Seed = Flags.seed;
   // Initialize Seed.
@@ -257,35 +312,26 @@ int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF) {
     Seed = time(0) * 10000 + getpid();
   if (Flags.verbosity)
     Printf("Seed: %u\n", Seed);
-  srand(Seed);
-
-  // Timer
-  if (Flags.timeout > 0)
-    SetTimer(Flags.timeout / 2 + 1);
-
-  if (Flags.verbosity >= 2) {
-    Printf("Tokens: {");
-    for (auto &T : Options.Tokens)
-      Printf("%s,", T.c_str());
-    Printf("}\n");
-  }
+  USF.GetRand().ResetSeed(Seed);
 
   F.RereadOutputCorpus();
-  for (auto &inp : inputs)
+  for (auto &inp : *Inputs)
     if (inp != Options.OutputCorpus)
       F.ReadDir(inp, nullptr);
 
   if (F.CorpusSize() == 0)
     F.AddToCorpus(Unit());  // Can't fuzz empty corpus, so add an empty input.
   F.ShuffleAndMinimize();
-  if (Flags.save_minimized_corpus)
-    F.SaveCorpus();
-  F.Loop(Flags.iterations < 0 ? INT_MAX : Flags.iterations);
+  if (Flags.drill)
+    F.Drill();
+  else
+    F.Loop();
+
   if (Flags.verbosity)
     Printf("Done %d runs in %zd second(s)\n", F.getTotalNumberOfRuns(),
            F.secondsSinceProcessStartUp());
 
-  return 0;
+  exit(0);  // Don't let F destroy itself.
 }
 
 }  // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 742f672e2012..c2b506c3c8aa 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -11,16 +11,14 @@
 // portability and independence.
 //===----------------------------------------------------------------------===//
 FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.")
-FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.")
-FUZZER_FLAG_INT(iterations, -1,
-            "Number of iterations of the fuzzer internal loop"
-            " (-1 for infinite iterations).")
+FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.")
 FUZZER_FLAG_INT(runs, -1,
             "Number of individual test runs (-1 for infinite runs).")
 FUZZER_FLAG_INT(max_len, 64, "Maximum length of the test input.")
 FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.")
 FUZZER_FLAG_INT(mutate_depth, 5,
             "Apply this number of consecutive mutations to each input.")
+FUZZER_FLAG_INT(shuffle, 1, "Shuffle inputs at startup")
 FUZZER_FLAG_INT(
     prefer_small_during_initial_shuffle, -1,
     "If 1, always prefer smaller inputs during the initial corpus shuffle."
@@ -31,16 +29,15 @@ FUZZER_FLAG_INT(
     timeout, 1200,
     "Timeout in seconds (if positive). "
     "If one unit runs more than this number of seconds the process will abort.")
+FUZZER_FLAG_INT(max_total_time, 0, "If positive, indicates the maximal total "
+                                   "time in seconds to run the fuzzer.")
 FUZZER_FLAG_INT(help, 0, "Print help.")
-FUZZER_FLAG_INT(
-    save_minimized_corpus, 0,
-    "If 1, the minimized corpus is saved into the first input directory")
+FUZZER_FLAG_INT(save_minimized_corpus, 0, "Deprecated. Use -merge=1")
+FUZZER_FLAG_INT(merge, 0, "If 1, the 2-nd, 3-rd, etc corpora will be "
+  "merged into the 1-st corpus. Only interesting units will be taken.")
 FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters")
+FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters")
 FUZZER_FLAG_INT(use_traces, 0, "Experimental: use instruction traces")
-FUZZER_FLAG_INT(use_full_coverage_set, 0,
-            "Experimental: Maximize the number of different full"
-            " coverage sets as opposed to maximizing the total coverage."
-            " This is potentially MUCH slower, but may discover more paths.")
 FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn"
                           " this number of jobs in separate worker processes"
                           " with stdout/stderr redirected to fuzz-JOB.log.")
@@ -49,12 +46,29 @@ FUZZER_FLAG_INT(workers, 0,
             " If zero, \"min(jobs,NumberOfCpuCores()/2)\" is used.")
 FUZZER_FLAG_INT(reload, 1,
                 "Reload the main corpus periodically to get new units"
-                "discovered by other processes.")
-FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to"
-                           " fuzz a token based input language.")
-FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes "
-                                 " with tokens and write the result to stdout.")
+                " discovered by other processes.")
 FUZZER_FLAG_STRING(sync_command, "Execute an external command "
                                  "\"<sync_command> <test_corpus>\" "
                                  "to synchronize the test corpus.")
 FUZZER_FLAG_INT(sync_timeout, 600, "Minimum timeout between syncs.")
+FUZZER_FLAG_INT(report_slow_units, 10,
+    "Report slowest units if they run for more than this number of seconds.")
+FUZZER_FLAG_INT(only_ascii, 0,
+                "If 1, generate only ASCII (isprint+isspace) inputs.")
+FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.")
+FUZZER_FLAG_INT(tbm_depth, 5, "Apply at most this number of consecutive"
+                               "trace-based-mutations (tbm).")
+FUZZER_FLAG_INT(tbm_width, 5, "Apply at most this number of independent"
+                               "trace-based-mutations (tbm)")
+FUZZER_FLAG_STRING(test_single_input, "Use specified file as test input.")
+FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, "
+                                    "timeout, or slow inputs) as "
+                                    "$(artifact_prefix)file")
+FUZZER_FLAG_STRING(exact_artifact_path,
+                   "Write the single artifact on failure (crash, timeout) "
+                   "as $(exact_artifact_path). This overrides -artifact_prefix "
+                   "and will not use checksum in the file name. Do not "
+                   "use the same path for several parallel processes.")
+FUZZER_FLAG_INT(drill, 0, "Experimental: fuzz using a single unit as the seed "
+                          "corpus, then merge with the initial corpus")
+FUZZER_FLAG_INT(output_csv, 0, "Enable pulse output in CSV format.")
diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp
index 85703c81841c..043fad396d51 100644
--- a/lib/Fuzzer/FuzzerIO.cpp
+++ b/lib/Fuzzer/FuzzerIO.cpp
@@ -15,13 +15,15 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
+#include <cstdarg>
 #include <cstdio>
 
 namespace fuzzer {
 
 static long GetEpoch(const std::string &Path) {
   struct stat St;
-  if (stat(Path.c_str(), &St)) return 0;
+  if (stat(Path.c_str(), &St))
+    return 0;  // Can't stat, be conservative.
   return St.st_mtime;
 }
 
@@ -29,12 +31,15 @@ static std::vector<std::string> ListFilesInDir(const std::string &Dir,
                                                long *Epoch) {
   std::vector<std::string> V;
   if (Epoch) {
-    auto E = GetEpoch(Dir.c_str());
+    auto E = GetEpoch(Dir);
     if (*Epoch >= E) return V;
     *Epoch = E;
   }
   DIR *D = opendir(Dir.c_str());
-  if (!D) return V;
+  if (!D) {
+    Printf("No such directory: %s; exiting\n", Dir.c_str());
+    exit(1);
+  }
   while (auto E = readdir(D)) {
     if (E->d_type == DT_REG || E->d_type == DT_LNK)
       V.push_back(E->d_name);
@@ -45,6 +50,10 @@ static std::vector<std::string> ListFilesInDir(const std::string &Dir,
 
 Unit FileToVector(const std::string &Path) {
   std::ifstream T(Path);
+  if (!T) {
+    Printf("No such directory: %s; exiting\n", Path.c_str());
+    exit(1);
+  }
   return Unit((std::istreambuf_iterator<char>(T)),
               std::istreambuf_iterator<char>());
 }
@@ -60,8 +69,11 @@ void CopyFileToErr(const std::string &Path) {
 }
 
 void WriteToFile(const Unit &U, const std::string &Path) {
-  std::ofstream OF(Path);
-  OF.write((const char*)U.data(), U.size());
+  // Use raw C interface because this function may be called from a sig handler.
+  FILE *Out = fopen(Path.c_str(), "w");
+  if (!Out) return;
+  fwrite(U.data(), sizeof(U[0]), U.size(), Out);
+  fclose(Out);
 }
 
 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
@@ -79,11 +91,6 @@ std::string DirPlusFile(const std::string &DirPath,
   return DirPath + "/" + FileName;
 }
 
-void PrintFileAsBase64(const std::string &Path) {
-  std::string Cmd = "base64 -w 0 < " + Path + "; echo";
-  ExecuteCommand(Cmd);
-}
-
 void Printf(const char *Fmt, ...) {
   va_list ap;
   va_start(ap, Fmt);
diff --git a/lib/Fuzzer/FuzzerInterface.cpp b/lib/Fuzzer/FuzzerInterface.cpp
index dcd4e746013c..bcd726fc08e4 100644
--- a/lib/Fuzzer/FuzzerInterface.cpp
+++ b/lib/Fuzzer/FuzzerInterface.cpp
@@ -14,14 +14,17 @@
 #include "FuzzerInternal.h"
 
 namespace fuzzer {
-size_t UserSuppliedFuzzer::BasicMutate(uint8_t *Data, size_t Size,
-                                       size_t MaxSize) {
-  return ::fuzzer::Mutate(Data, Size, MaxSize);
-}
-size_t UserSuppliedFuzzer::BasicCrossOver(const uint8_t *Data1, size_t Size1,
-                                          const uint8_t *Data2, size_t Size2,
-                                          uint8_t *Out, size_t MaxOutSize) {
-  return ::fuzzer::CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize);
+
+void FuzzerRandomLibc::ResetSeed(unsigned int seed) { srand(seed); }
+
+size_t FuzzerRandomLibc::Rand() { return rand(); }
+
+UserSuppliedFuzzer::UserSuppliedFuzzer(FuzzerRandomBase *Rand)
+    : Rand(Rand), MD(*Rand) {}
+
+UserSuppliedFuzzer::~UserSuppliedFuzzer() {
+  if (OwnRand)
+    delete Rand;
 }
 
 }  // namespace fuzzer.
diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h
index 3fd807afcfeb..65f1707ba922 100644
--- a/lib/Fuzzer/FuzzerInterface.h
+++ b/lib/Fuzzer/FuzzerInterface.h
@@ -18,10 +18,14 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <vector>
+#include <string>
 
 namespace fuzzer {
+typedef std::vector<uint8_t> Unit;
 
-typedef void (*UserCallback)(const uint8_t *Data, size_t Size);
+/// Returns an int 0. Values other than zero are reserved for future.
+typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
 /** Simple C-like interface with a single user-supplied callback.
 
 Usage:
@@ -29,8 +33,9 @@ Usage:
 #\code
 #include "FuzzerInterface.h"
 
-void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   DoStuffWithData(Data, Size);
+  return 0;
 }
 
 // Implement your own main() or use the one from FuzzerMain.cpp.
@@ -42,6 +47,79 @@ int main(int argc, char **argv) {
 */
 int FuzzerDriver(int argc, char **argv, UserCallback Callback);
 
+class FuzzerRandomBase {
+ public:
+  FuzzerRandomBase(){}
+  virtual ~FuzzerRandomBase(){};
+  virtual void ResetSeed(unsigned int seed) = 0;
+  // Return a random number.
+  virtual size_t Rand() = 0;
+  // Return a random number in range [0,n).
+  size_t operator()(size_t n) { return n ? Rand() % n : 0; }
+  bool RandBool() { return Rand() % 2; }
+};
+
+class FuzzerRandomLibc : public FuzzerRandomBase {
+ public:
+  FuzzerRandomLibc(unsigned int seed) { ResetSeed(seed); }
+  void ResetSeed(unsigned int seed) override;
+  ~FuzzerRandomLibc() override {}
+  size_t Rand() override;
+};
+
+class MutationDispatcher {
+ public:
+  MutationDispatcher(FuzzerRandomBase &Rand);
+  ~MutationDispatcher();
+  /// Indicate that we are about to start a new sequence of mutations.
+  void StartMutationSequence();
+  /// Print the current sequence of mutations.
+  void PrintMutationSequence();
+  /// Mutates data by shuffling bytes.
+  size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by erasing a byte.
+  size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by inserting a byte.
+  size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by chanding one byte.
+  size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize);
+  /// Mutates data by chanding one bit.
+  size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Mutates data by adding a word from the dictionary.
+  size_t Mutate_AddWordFromDictionary(uint8_t *Data, size_t Size,
+                                      size_t MaxSize);
+
+  /// Tries to find an ASCII integer in Data, changes it to another ASCII int.
+  size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// CrossOver Data with some other element of the corpus.
+  size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Applies one of the above mutations.
+  /// Returns the new size of data which could be up to MaxSize.
+  size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
+
+  /// Creates a cross-over of two pieces of Data, returns its size.
+  size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
+                   size_t Size2, uint8_t *Out, size_t MaxOutSize);
+
+  void AddWordToDictionary(const uint8_t *Word, size_t Size);
+  void SetCorpus(const std::vector<Unit> *Corpus);
+
+ private:
+  FuzzerRandomBase &Rand;
+  struct Impl;
+  Impl *MDImpl;
+};
+
+// For backward compatibility only, deprecated.
+static inline size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize,
+                            FuzzerRandomBase &Rand) {
+  MutationDispatcher MD(Rand);
+  return MD.Mutate(Data, Size, MaxSize);
+}
+
 /** An abstract class that allows to use user-supplied mutators with libFuzzer.
 
 Usage:
@@ -50,8 +128,9 @@ Usage:
 #include "FuzzerInterface.h"
 class MyFuzzer : public fuzzer::UserSuppliedFuzzer {
  public:
+  MyFuzzer(fuzzer::FuzzerRandomBase *Rand);
   // Must define the target function.
-  void TargetFunction(...) { ... }
+  int TargetFunction(...) { ...; return 0; }
   // Optionally define the mutator.
   size_t Mutate(...) { ... }
   // Optionally define the CrossOver method.
@@ -66,33 +145,45 @@ int main(int argc, char **argv) {
 */
 class UserSuppliedFuzzer {
  public:
+  UserSuppliedFuzzer(FuzzerRandomBase *Rand);
   /// Executes the target function on 'Size' bytes of 'Data'.
-  virtual void TargetFunction(const uint8_t *Data, size_t Size) = 0;
+  virtual int TargetFunction(const uint8_t *Data, size_t Size) = 0;
+  virtual void StartMutationSequence() { MD.StartMutationSequence(); }
+  virtual void PrintMutationSequence() { MD.PrintMutationSequence(); }
+  virtual void SetCorpus(const std::vector<Unit> *Corpus) {
+    MD.SetCorpus(Corpus);
+  }
   /// Mutates 'Size' bytes of data in 'Data' inplace into up to 'MaxSize' bytes,
   /// returns the new size of the data, which should be positive.
   virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
-    return BasicMutate(Data, Size, MaxSize);
+    return MD.Mutate(Data, Size, MaxSize);
   }
   /// Crosses 'Data1' and 'Data2', writes up to 'MaxOutSize' bytes into Out,
   /// returns the number of bytes written, which should be positive.
   virtual size_t CrossOver(const uint8_t *Data1, size_t Size1,
                            const uint8_t *Data2, size_t Size2,
                            uint8_t *Out, size_t MaxOutSize) {
-    return BasicCrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize);
+    return MD.CrossOver(Data1, Size1, Data2, Size2, Out, MaxOutSize);
   }
-  virtual ~UserSuppliedFuzzer() {}
+  virtual ~UserSuppliedFuzzer();
 
- protected:
-  /// These can be called internally by Mutate and CrossOver.
-  size_t BasicMutate(uint8_t *Data, size_t Size, size_t MaxSize);
-  size_t BasicCrossOver(const uint8_t *Data1, size_t Size1,
-                        const uint8_t *Data2, size_t Size2,
-                        uint8_t *Out, size_t MaxOutSize);
+  FuzzerRandomBase &GetRand() { return *Rand; }
+
+  MutationDispatcher &GetMD() { return MD; }
+
+ private:
+  bool OwnRand = false;
+  FuzzerRandomBase *Rand;
+  MutationDispatcher MD;
 };
 
 /// Runs the fuzzing with the UserSuppliedFuzzer.
 int FuzzerDriver(int argc, char **argv, UserSuppliedFuzzer &USF);
 
+/// More C++-ish interface.
+int FuzzerDriver(const std::vector<std::string> &Args, UserSuppliedFuzzer &USF);
+int FuzzerDriver(const std::vector<std::string> &Args, UserCallback Callback);
+
 }  // namespace fuzzer
 
 #endif  // LLVM_FUZZER_INTERFACE_H
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index c387fe7a7c60..e96a4bc35fe2 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -8,6 +8,10 @@
 //===----------------------------------------------------------------------===//
 // Define the main class fuzzer::Fuzzer and most functions.
 //===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_INTERNAL_H
+#define LLVM_FUZZER_INTERNAL_H
+
 #include <cassert>
 #include <climits>
 #include <chrono>
@@ -20,7 +24,6 @@
 #include "FuzzerInterface.h"
 
 namespace fuzzer {
-typedef std::vector<uint8_t> Unit;
 using namespace std::chrono;
 
 std::string FileToString(const std::string &Path);
@@ -33,25 +36,36 @@ void CopyFileToErr(const std::string &Path);
 std::string DirPlusFile(const std::string &DirPath,
                         const std::string &FileName);
 
-size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
-
-size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
-                 size_t Size2, uint8_t *Out, size_t MaxOutSize);
-
 void Printf(const char *Fmt, ...);
 void Print(const Unit &U, const char *PrintAfter = "");
 void PrintASCII(const Unit &U, const char *PrintAfter = "");
 std::string Hash(const Unit &U);
 void SetTimer(int Seconds);
-void PrintFileAsBase64(const std::string &Path);
-void ExecuteCommand(const std::string &Command);
+std::string Base64(const Unit &U);
+int ExecuteCommand(const std::string &Command);
 
 // Private copy of SHA1 implementation.
 static const int kSHA1NumBytes = 20;
 // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'.
 void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out);
 
+// Changes U to contain only ASCII (isprint+isspace) characters.
+// Returns true iff U has been changed.
+bool ToASCII(Unit &U);
+bool IsASCII(const Unit &U);
+
 int NumberOfCpuCores();
+int GetPid();
+
+// Dictionary.
+
+// Parses one dictionary entry.
+// If successfull, write the enty to Unit and returns true,
+// otherwise returns false.
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
+// Parses the dictionary file, fills Units, returns true iff all lines
+// were parsed succesfully.
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
 
 class Fuzzer {
  public:
@@ -59,27 +73,42 @@ class Fuzzer {
     int Verbosity = 1;
     int MaxLen = 0;
     int UnitTimeoutSec = 300;
+    int MaxTotalTimeSec = 0;
     bool DoCrossOver = true;
     int  MutateDepth = 5;
     bool ExitOnFirst = false;
     bool UseCounters = false;
+    bool UseIndirCalls = true;
     bool UseTraces = false;
     bool UseFullCoverageSet  = false;
     bool Reload = true;
+    bool ShuffleAtStartUp = true;
     int PreferSmallDuringInitialShuffle = -1;
     size_t MaxNumberOfRuns = ULONG_MAX;
     int SyncTimeout = 600;
+    int ReportSlowUnits = 10;
+    bool OnlyASCII = false;
+    int TBMDepth = 10;
+    int TBMWidth = 10;
     std::string OutputCorpus;
     std::string SyncCommand;
-    std::vector<std::string> Tokens;
+    std::string ArtifactPrefix = "./";
+    std::string ExactArtifactPath;
+    bool SaveArtifacts = true;
+    bool PrintNEW = true;  // Print a status line when new units are found;
+    bool OutputCSV = false;
   };
   Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options);
   void AddToCorpus(const Unit &U) { Corpus.push_back(U); }
-  void Loop(size_t NumIterations);
+  size_t ChooseUnitIdxToMutate();
+  const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
+  void Loop();
+  void Drill();
   void ShuffleAndMinimize();
   void InitializeTraceState();
   size_t CorpusSize() const { return Corpus.size(); }
   void ReadDir(const std::string &Path, long *Epoch) {
+    Printf("Loading corpus: %s\n", Path.c_str());
     ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch);
   }
   void RereadOutputCorpus();
@@ -95,25 +124,31 @@ class Fuzzer {
 
   static void StaticAlarmCallback();
 
-  Unit SubstituteTokens(const Unit &U) const;
+  void ExecuteCallback(const Unit &U);
+
+  // Merge Corpora[1:] into Corpora[0].
+  void Merge(const std::vector<std::string> &Corpora);
 
  private:
   void AlarmCallback();
-  void ExecuteCallback(const Unit &U);
-  void MutateAndTestOne(Unit *U);
-  void ReportNewCoverage(size_t NewCoverage, const Unit &U);
-  size_t RunOne(const Unit &U);
-  void RunOneAndUpdateCorpus(const Unit &U);
-  size_t RunOneMaximizeTotalCoverage(const Unit &U);
-  size_t RunOneMaximizeFullCoverageSet(const Unit &U);
-  size_t RunOneMaximizeCoveragePairs(const Unit &U);
+  void MutateAndTestOne();
+  void ReportNewCoverage(const Unit &U);
+  bool RunOne(const Unit &U);
+  void RunOneAndUpdateCorpus(Unit &U);
   void WriteToOutputCorpus(const Unit &U);
-  void WriteToCrash(const Unit &U, const char *Prefix);
-  void PrintStats(const char *Where, size_t Cov, const char *End = "\n");
-  void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = "");
+  void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
+  void PrintStats(const char *Where, const char *End = "\n");
+  void PrintStatusForNewUnit(const Unit &U);
+  void PrintUnitInASCII(const Unit &U, const char *PrintAfter = "");
 
   void SyncCorpus();
 
+  size_t RecordBlockCoverage();
+  size_t RecordCallerCalleeCoverage();
+  void PrepareCoverageBeforeRun();
+  bool CheckCoverageAfterRun();
+
+
   // Trace-based fuzzing: we run a unit with some kind of tracing
   // enabled and record potentially useful mutations. Then
   // We apply these mutations one by one to the unit and run it again.
@@ -131,10 +166,10 @@ class Fuzzer {
   Unit CurrentUnit;
 
   size_t TotalNumberOfRuns = 0;
+  size_t TotalNumberOfExecutedTraceBasedMutations = 0;
 
   std::vector<Unit> Corpus;
   std::unordered_set<std::string> UnitHashesAddedToCorpus;
-  std::unordered_set<uintptr_t> FullCoverageSets;
 
   // For UseCounters
   std::vector<uint8_t> CounterBitmap;
@@ -151,17 +186,23 @@ class Fuzzer {
   system_clock::time_point UnitStartTime;
   long TimeOfLongestUnitInSeconds = 0;
   long EpochOfLastReadOfOutputCorpus = 0;
+  size_t LastRecordedBlockCoverage = 0;
+  size_t LastRecordedCallerCalleeCoverage = 0;
 };
 
 class SimpleUserSuppliedFuzzer: public UserSuppliedFuzzer {
  public:
-  SimpleUserSuppliedFuzzer(UserCallback Callback) : Callback(Callback) {}
-  virtual void TargetFunction(const uint8_t *Data, size_t Size) {
+  SimpleUserSuppliedFuzzer(FuzzerRandomBase *Rand, UserCallback Callback)
+      : UserSuppliedFuzzer(Rand), Callback(Callback) {}
+
+  virtual int TargetFunction(const uint8_t *Data, size_t Size) override {
     return Callback(Data, Size);
   }
 
  private:
-  UserCallback Callback;
+  UserCallback Callback = nullptr;
 };
 
 };  // namespace fuzzer
+
+#endif // LLVM_FUZZER_INTERNAL_H
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 9ef47583cbb9..7ea82f4f15dd 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -10,10 +10,43 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerInternal.h"
-#include <sanitizer/coverage_interface.h>
 #include <algorithm>
 
+#if defined(__has_include)
+# if __has_include(<sanitizer/coverage_interface.h>)
+#  include <sanitizer/coverage_interface.h>
+# endif
+#endif
+
+extern "C" {
+// Re-declare some of the sanitizer functions as "weak" so that
+// libFuzzer can be linked w/o the sanitizers and sanitizer-coverage
+// (in which case it will complain at start-up time).
+__attribute__((weak)) void __sanitizer_print_stack_trace();
+__attribute__((weak)) void __sanitizer_reset_coverage();
+__attribute__((weak)) size_t __sanitizer_get_total_unique_caller_callee_pairs();
+__attribute__((weak)) size_t __sanitizer_get_total_unique_coverage();
+__attribute__((weak))
+void __sanitizer_set_death_callback(void (*callback)(void));
+__attribute__((weak)) size_t __sanitizer_get_number_of_counters();
+__attribute__((weak))
+uintptr_t __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
+}
+
 namespace fuzzer {
+static const size_t kMaxUnitSizeToPrint = 256;
+
+static void MissingWeakApiFunction(const char *FnName) {
+  Printf("ERROR: %s is not defined. Exiting.\n"
+         "Did you use -fsanitize-coverage=... to build your code?\n", FnName);
+  exit(1);
+}
+
+#define CHECK_WEAK_API_FUNCTION(fn)                                            \
+  do {                                                                         \
+    if (!fn)                                                                   \
+      MissingWeakApiFunction(#fn);                                             \
+  } while (false)
 
 // Only one Fuzzer per process.
 static Fuzzer *F;
@@ -27,17 +60,12 @@ Fuzzer::Fuzzer(UserSuppliedFuzzer &USF, FuzzingOptions Options)
 }
 
 void Fuzzer::SetDeathCallback() {
+  CHECK_WEAK_API_FUNCTION(__sanitizer_set_death_callback);
   __sanitizer_set_death_callback(StaticDeathCallback);
 }
 
-void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) {
-  if (Options.Tokens.empty()) {
-    PrintASCII(U, PrintAfter);
-  } else {
-    auto T = SubstituteTokens(U);
-    T.push_back(0);
-    Printf("%s%s", T.data(), PrintAfter);
-  }
+void Fuzzer::PrintUnitInASCII(const Unit &U, const char *PrintAfter) {
+  PrintASCII(U, PrintAfter);
 }
 
 void Fuzzer::StaticDeathCallback() {
@@ -47,9 +75,11 @@ void Fuzzer::StaticDeathCallback() {
 
 void Fuzzer::DeathCallback() {
   Printf("DEATH:\n");
-  Print(CurrentUnit, "\n");
-  PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
-  WriteToCrash(CurrentUnit, "crash-");
+  if (CurrentUnit.size() <= kMaxUnitSizeToPrint) {
+    Print(CurrentUnit, "\n");
+    PrintUnitInASCII(CurrentUnit, "\n");
+  }
+  WriteUnitToFileWithPrefix(CurrentUnit, "crash-");
 }
 
 void Fuzzer::StaticAlarmCallback() {
@@ -68,19 +98,49 @@ void Fuzzer::AlarmCallback() {
     Printf("ALARM: working on the last Unit for %zd seconds\n", Seconds);
     Printf("       and the timeout value is %d (use -timeout=N to change)\n",
            Options.UnitTimeoutSec);
-    Print(CurrentUnit, "\n");
-    PrintUnitInASCIIOrTokens(CurrentUnit, "\n");
-    WriteToCrash(CurrentUnit, "timeout-");
+    if (CurrentUnit.size() <= kMaxUnitSizeToPrint) {
+      Print(CurrentUnit, "\n");
+      PrintUnitInASCII(CurrentUnit, "\n");
+    }
+    WriteUnitToFileWithPrefix(CurrentUnit, "timeout-");
+    Printf("==%d== ERROR: libFuzzer: timeout after %d seconds\n", GetPid(),
+           Seconds);
+    if (__sanitizer_print_stack_trace)
+      __sanitizer_print_stack_trace();
+    Printf("SUMMARY: libFuzzer: timeout\n");
     exit(1);
   }
 }
 
-void Fuzzer::PrintStats(const char *Where, size_t Cov, const char *End) {
-  if (!Options.Verbosity) return;
+void Fuzzer::PrintStats(const char *Where, const char *End) {
   size_t Seconds = secondsSinceProcessStartUp();
   size_t ExecPerSec = (Seconds ? TotalNumberOfRuns / Seconds : 0);
-  Printf("#%zd\t%s cov %zd bits %zd units %zd exec/s %zd %s", TotalNumberOfRuns,
-         Where, Cov, TotalBits(), Corpus.size(), ExecPerSec, End);
+
+  if (Options.OutputCSV) {
+    static bool csvHeaderPrinted = false;
+    if (!csvHeaderPrinted) {
+      csvHeaderPrinted = true;
+      Printf("runs,block_cov,bits,cc_cov,corpus,execs_per_sec,tbms,reason\n");
+    }
+    Printf("%zd,%zd,%zd,%zd,%zd,%zd,%zd,%s\n", TotalNumberOfRuns,
+           LastRecordedBlockCoverage, TotalBits(),
+           LastRecordedCallerCalleeCoverage, Corpus.size(), ExecPerSec,
+           TotalNumberOfExecutedTraceBasedMutations, Where);
+  }
+
+  if (!Options.Verbosity)
+    return;
+  Printf("#%zd\t%s", TotalNumberOfRuns, Where);
+  if (LastRecordedBlockCoverage)
+    Printf(" cov: %zd", LastRecordedBlockCoverage);
+  if (auto TB = TotalBits())
+    Printf(" bits: %zd", TB);
+  if (LastRecordedCallerCalleeCoverage)
+    Printf(" indir: %zd", LastRecordedCallerCalleeCoverage);
+  Printf(" units: %zd exec/s: %zd", Corpus.size(), ExecPerSec);
+  if (TotalNumberOfExecutedTraceBasedMutations)
+    Printf(" tbm: %zd", TotalNumberOfExecutedTraceBasedMutations);
+  Printf("%s", End);
 }
 
 void Fuzzer::RereadOutputCorpus() {
@@ -101,143 +161,127 @@ void Fuzzer::RereadOutputCorpus() {
     if (UnitHashesAddedToCorpus.insert(Hash(X)).second) {
       CurrentUnit.clear();
       CurrentUnit.insert(CurrentUnit.begin(), X.begin(), X.end());
-      size_t NewCoverage = RunOne(CurrentUnit);
-      if (NewCoverage) {
+      if (RunOne(CurrentUnit)) {
         Corpus.push_back(X);
-        if (Options.Verbosity >= 1)
-          PrintStats("RELOAD", NewCoverage);
+        PrintStats("RELOAD");
       }
     }
   }
 }
 
 void Fuzzer::ShuffleAndMinimize() {
-  size_t MaxCov = 0;
-  bool PreferSmall =
-      (Options.PreferSmallDuringInitialShuffle == 1 ||
-       (Options.PreferSmallDuringInitialShuffle == -1 && rand() % 2));
+  bool PreferSmall = (Options.PreferSmallDuringInitialShuffle == 1 ||
+                      (Options.PreferSmallDuringInitialShuffle == -1 &&
+                       USF.GetRand().RandBool()));
   if (Options.Verbosity)
     Printf("PreferSmall: %d\n", PreferSmall);
-  PrintStats("READ  ", 0);
+  PrintStats("READ  ");
   std::vector<Unit> NewCorpus;
-  std::random_shuffle(Corpus.begin(), Corpus.end());
-  if (PreferSmall)
-    std::stable_sort(
-        Corpus.begin(), Corpus.end(),
-        [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
+  if (Options.ShuffleAtStartUp) {
+    std::random_shuffle(Corpus.begin(), Corpus.end(), USF.GetRand());
+    if (PreferSmall)
+      std::stable_sort(
+          Corpus.begin(), Corpus.end(),
+          [](const Unit &A, const Unit &B) { return A.size() < B.size(); });
+  }
   Unit &U = CurrentUnit;
   for (const auto &C : Corpus) {
     for (size_t First = 0; First < 1; First++) {
       U.clear();
       size_t Last = std::min(First + Options.MaxLen, C.size());
       U.insert(U.begin(), C.begin() + First, C.begin() + Last);
-      size_t NewCoverage = RunOne(U);
-      if (NewCoverage) {
-        MaxCov = NewCoverage;
+      if (Options.OnlyASCII)
+        ToASCII(U);
+      if (RunOne(U)) {
         NewCorpus.push_back(U);
         if (Options.Verbosity >= 2)
-          Printf("NEW0: %zd L %zd\n", NewCoverage, U.size());
+          Printf("NEW0: %zd L %zd\n", LastRecordedBlockCoverage, U.size());
       }
     }
   }
   Corpus = NewCorpus;
   for (auto &X : Corpus)
     UnitHashesAddedToCorpus.insert(Hash(X));
-  PrintStats("INITED", MaxCov);
+  PrintStats("INITED");
 }
 
-size_t Fuzzer::RunOne(const Unit &U) {
+bool Fuzzer::RunOne(const Unit &U) {
   UnitStartTime = system_clock::now();
   TotalNumberOfRuns++;
-  size_t Res = 0;
-  if (Options.UseFullCoverageSet)
-    Res = RunOneMaximizeFullCoverageSet(U);
-  else
-    Res = RunOneMaximizeTotalCoverage(U);
+
+  PrepareCoverageBeforeRun();
+  ExecuteCallback(U);
+  bool Res = CheckCoverageAfterRun();
+
   auto UnitStopTime = system_clock::now();
   auto TimeOfUnit =
       duration_cast<seconds>(UnitStopTime - UnitStartTime).count();
-  if (TimeOfUnit > TimeOfLongestUnitInSeconds) {
+  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) &&
+      secondsSinceProcessStartUp() >= 2)
+    PrintStats("pulse ");
+  if (TimeOfUnit > TimeOfLongestUnitInSeconds &&
+      TimeOfUnit >= Options.ReportSlowUnits) {
     TimeOfLongestUnitInSeconds = TimeOfUnit;
-    Printf("Longest unit: %zd s:\n", TimeOfLongestUnitInSeconds);
-    Print(U, "\n");
+    Printf("Slowest unit: %zd s:\n", TimeOfLongestUnitInSeconds);
+    WriteUnitToFileWithPrefix(U, "slow-unit-");
   }
   return Res;
 }
 
-void Fuzzer::RunOneAndUpdateCorpus(const Unit &U) {
+void Fuzzer::RunOneAndUpdateCorpus(Unit &U) {
   if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
     return;
-  ReportNewCoverage(RunOne(U), U);
-}
-
-static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) {
-  uintptr_t Res = 0;
-  for (uintptr_t i = 0; i < NumPCs; i++) {
-    Res = (Res + PCs[i]) * 7;
-  }
-  return Res;
-}
-
-Unit Fuzzer::SubstituteTokens(const Unit &U) const {
-  Unit Res;
-  for (auto Idx : U) {
-    if (Idx < Options.Tokens.size()) {
-      std::string Token = Options.Tokens[Idx];
-      Res.insert(Res.end(), Token.begin(), Token.end());
-    } else {
-      Res.push_back(' ');
-    }
-  }
-  // FIXME: Apply DFSan labels.
-  return Res;
+  if (Options.OnlyASCII)
+    ToASCII(U);
+  if (RunOne(U))
+    ReportNewCoverage(U);
 }
 
 void Fuzzer::ExecuteCallback(const Unit &U) {
-  if (Options.Tokens.empty()) {
-    USF.TargetFunction(U.data(), U.size());
-  } else {
-    auto T = SubstituteTokens(U);
-    USF.TargetFunction(T.data(), T.size());
-  }
+  const uint8_t *Data = U.data();
+  uint8_t EmptyData;
+  if (!Data) 
+    Data = &EmptyData;
+  int Res = USF.TargetFunction(Data, U.size());
+  (void)Res;
+  assert(Res == 0);
 }
 
-// Experimental.
-// Fuly reset the current coverage state, run a single unit,
-// compute a hash function from the full coverage set,
-// return non-zero if the hash value is new.
-// This produces tons of new units and as is it's only suitable for small tests,
-// e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale.
-size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) {
-  __sanitizer_reset_coverage();
-  ExecuteCallback(U);
-  uintptr_t *PCs;
-  uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs);
-  if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second)
-    return FullCoverageSets.size();
-  return 0;
+size_t Fuzzer::RecordBlockCoverage() {
+  CHECK_WEAK_API_FUNCTION(__sanitizer_get_total_unique_coverage);
+  return LastRecordedBlockCoverage = __sanitizer_get_total_unique_coverage();
 }
 
-size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) {
-  size_t NumCounters = __sanitizer_get_number_of_counters();
+size_t Fuzzer::RecordCallerCalleeCoverage() {
+  if (!Options.UseIndirCalls)
+    return 0;
+  if (!__sanitizer_get_total_unique_caller_callee_pairs)
+    return 0;
+  return LastRecordedCallerCalleeCoverage =
+             __sanitizer_get_total_unique_caller_callee_pairs();
+}
+
+void Fuzzer::PrepareCoverageBeforeRun() {
   if (Options.UseCounters) {
+    size_t NumCounters = __sanitizer_get_number_of_counters();
     CounterBitmap.resize(NumCounters);
     __sanitizer_update_counter_bitset_and_clear_counters(0);
   }
-  size_t OldCoverage = __sanitizer_get_total_unique_coverage();
-  ExecuteCallback(U);
-  size_t NewCoverage = __sanitizer_get_total_unique_coverage();
+  RecordBlockCoverage();
+  RecordCallerCalleeCoverage();
+}
+
+bool Fuzzer::CheckCoverageAfterRun() {
+  size_t OldCoverage = LastRecordedBlockCoverage;
+  size_t NewCoverage = RecordBlockCoverage();
+  size_t OldCallerCalleeCoverage = LastRecordedCallerCalleeCoverage;
+  size_t NewCallerCalleeCoverage = RecordCallerCalleeCoverage();
   size_t NumNewBits = 0;
   if (Options.UseCounters)
     NumNewBits = __sanitizer_update_counter_bitset_and_clear_counters(
         CounterBitmap.data());
-
-  if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)) && Options.Verbosity)
-    PrintStats("pulse ", NewCoverage);
-
-  if (NewCoverage > OldCoverage || NumNewBits)
-    return NewCoverage;
-  return 0;
+  return NewCoverage > OldCoverage ||
+         NewCallerCalleeCoverage > OldCallerCalleeCoverage || NumNewBits;
 }
 
 void Fuzzer::WriteToOutputCorpus(const Unit &U) {
@@ -246,13 +290,20 @@ void Fuzzer::WriteToOutputCorpus(const Unit &U) {
   WriteToFile(U, Path);
   if (Options.Verbosity >= 2)
     Printf("Written to %s\n", Path.c_str());
+  assert(!Options.OnlyASCII || IsASCII(U));
 }
 
-void Fuzzer::WriteToCrash(const Unit &U, const char *Prefix) {
-  std::string Path = Prefix + Hash(U);
+void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) {
+  if (!Options.SaveArtifacts)
+    return;
+  std::string Path = Options.ArtifactPrefix + Prefix + Hash(U);
+  if (!Options.ExactArtifactPath.empty())
+    Path = Options.ExactArtifactPath;  // Overrides ArtifactPrefix.
   WriteToFile(U, Path);
-  Printf("CRASHED; file written to %s\nBase64: ", Path.c_str());
-  PrintFileAsBase64(Path);
+  Printf("artifact_prefix='%s'; Test unit written to %s\n",
+         Options.ArtifactPrefix.c_str(), Path.c_str());
+  if (U.size() <= kMaxUnitSizeToPrint)
+    Printf("Base64: %s\n", Base64(U).c_str());
 }
 
 void Fuzzer::SaveCorpus() {
@@ -264,70 +315,181 @@ void Fuzzer::SaveCorpus() {
            Options.OutputCorpus.c_str());
 }
 
-void Fuzzer::ReportNewCoverage(size_t NewCoverage, const Unit &U) {
-  if (!NewCoverage) return;
-  Corpus.push_back(U);
-  UnitHashesAddedToCorpus.insert(Hash(U));
-  PrintStats("NEW   ", NewCoverage, "");
+void Fuzzer::PrintStatusForNewUnit(const Unit &U) {
+  if (!Options.PrintNEW)
+    return;
+  PrintStats("NEW   ", "");
   if (Options.Verbosity) {
-    Printf(" L: %zd", U.size());
-    if (U.size() < 30) {
-      Printf(" ");
-      PrintUnitInASCIIOrTokens(U, "\t");
-      Print(U);
-    }
+    Printf(" L: %zd ", U.size());
+    USF.PrintMutationSequence();
     Printf("\n");
   }
+}
+
+void Fuzzer::ReportNewCoverage(const Unit &U) {
+  Corpus.push_back(U);
+  UnitHashesAddedToCorpus.insert(Hash(U));
+  PrintStatusForNewUnit(U);
   WriteToOutputCorpus(U);
   if (Options.ExitOnFirst)
     exit(0);
 }
 
-void Fuzzer::MutateAndTestOne(Unit *U) {
+void Fuzzer::Merge(const std::vector<std::string> &Corpora) {
+  if (Corpora.size() <= 1) {
+    Printf("Merge requires two or more corpus dirs\n");
+    return;
+  }
+  auto InitialCorpusDir = Corpora[0];
+  ReadDir(InitialCorpusDir, nullptr);
+  Printf("Merge: running the initial corpus '%s' of %d units\n",
+         InitialCorpusDir.c_str(), Corpus.size());
+  for (auto &U : Corpus)
+    RunOne(U);
+
+  std::vector<std::string> ExtraCorpora(Corpora.begin() + 1, Corpora.end());
+
+  size_t NumTried = 0;
+  size_t NumMerged = 0;
+  for (auto &C : ExtraCorpora) {
+    Corpus.clear();
+    ReadDir(C, nullptr);
+    Printf("Merge: merging the extra corpus '%s' of %zd units\n", C.c_str(),
+           Corpus.size());
+    for (auto &U : Corpus) {
+      NumTried++;
+      if (RunOne(U)) {
+        WriteToOutputCorpus(U);
+        NumMerged++;
+      }
+    }
+  }
+  Printf("Merge: written %zd out of %zd units\n", NumMerged, NumTried);
+}
+
+void Fuzzer::MutateAndTestOne() {
+  auto &U = CurrentUnit;
+  USF.StartMutationSequence();
+
+  U = ChooseUnitToMutate();
+
   for (int i = 0; i < Options.MutateDepth; i++) {
     StartTraceRecording();
-    size_t Size = U->size();
-    U->resize(Options.MaxLen);
-    size_t NewSize = USF.Mutate(U->data(), Size, U->size());
+    size_t Size = U.size();
+    U.resize(Options.MaxLen);
+    size_t NewSize = USF.Mutate(U.data(), Size, U.size());
     assert(NewSize > 0 && "Mutator returned empty unit");
     assert(NewSize <= (size_t)Options.MaxLen &&
            "Mutator return overisized unit");
-    U->resize(NewSize);
-    RunOneAndUpdateCorpus(*U);
+    U.resize(NewSize);
+    RunOneAndUpdateCorpus(U);
     size_t NumTraceBasedMutations = StopTraceRecording();
-    for (size_t j = 0; j < NumTraceBasedMutations; j++) {
-      ApplyTraceBasedMutation(j, U);
-      RunOneAndUpdateCorpus(*U);
+    size_t TBMWidth =
+        std::min((size_t)Options.TBMWidth, NumTraceBasedMutations);
+    size_t TBMDepth =
+        std::min((size_t)Options.TBMDepth, NumTraceBasedMutations);
+    Unit BackUp = U;
+    for (size_t w = 0; w < TBMWidth; w++) {
+      U = BackUp;
+      for (size_t d = 0; d < TBMDepth; d++) {
+        TotalNumberOfExecutedTraceBasedMutations++;
+        ApplyTraceBasedMutation(USF.GetRand()(NumTraceBasedMutations), &U);
+        RunOneAndUpdateCorpus(U);
+      }
     }
   }
 }
 
-void Fuzzer::Loop(size_t NumIterations) {
-  for (size_t i = 1; i <= NumIterations; i++) {
-    for (size_t J1 = 0; J1 < Corpus.size(); J1++) {
-      SyncCorpus();
-      RereadOutputCorpus();
-      if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
-        return;
-      // First, simply mutate the unit w/o doing crosses.
-      CurrentUnit = Corpus[J1];
-      MutateAndTestOne(&CurrentUnit);
-      // Now, cross with others.
-      if (Options.DoCrossOver && !Corpus[J1].empty()) {
-        for (size_t J2 = 0; J2 < Corpus.size(); J2++) {
-          CurrentUnit.resize(Options.MaxLen);
-          size_t NewSize = USF.CrossOver(
-              Corpus[J1].data(), Corpus[J1].size(), Corpus[J2].data(),
-              Corpus[J2].size(), CurrentUnit.data(), CurrentUnit.size());
-          assert(NewSize > 0 && "CrossOver returned empty unit");
-          assert(NewSize <= (size_t)Options.MaxLen &&
-                 "CrossOver return overisized unit");
-          CurrentUnit.resize(NewSize);
-          MutateAndTestOne(&CurrentUnit);
-        }
-      }
+// Returns an index of random unit from the corpus to mutate.
+// Hypothesis: units added to the corpus last are more likely to be interesting.
+// This function gives more wieght to the more recent units.
+size_t Fuzzer::ChooseUnitIdxToMutate() {
+    size_t N = Corpus.size();
+    size_t Total = (N + 1) * N / 2;
+    size_t R = USF.GetRand()(Total);
+    size_t IdxBeg = 0, IdxEnd = N;
+    // Binary search.
+    while (IdxEnd - IdxBeg >= 2) {
+      size_t Idx = IdxBeg + (IdxEnd - IdxBeg) / 2;
+      if (R > (Idx + 1) * Idx / 2)
+        IdxBeg = Idx;
+      else
+        IdxEnd = Idx;
+    }
+    assert(IdxBeg < N);
+    return IdxBeg;
+}
+
+// Experimental search heuristic: drilling.
+// - Read, shuffle, execute and minimize the corpus.
+// - Choose one random unit.
+// - Reset the coverage.
+// - Start fuzzing as if the chosen unit was the only element of the corpus.
+// - When done, reset the coverage again.
+// - Merge the newly created corpus into the original one.
+void Fuzzer::Drill() {
+  // The corpus is already read, shuffled, and minimized.
+  assert(!Corpus.empty());
+  Options.PrintNEW = false;  // Don't print NEW status lines when drilling.
+
+  Unit U = ChooseUnitToMutate();
+
+  CHECK_WEAK_API_FUNCTION(__sanitizer_reset_coverage);
+  __sanitizer_reset_coverage();
+
+  std::vector<Unit> SavedCorpus;
+  SavedCorpus.swap(Corpus);
+  Corpus.push_back(U);
+  assert(Corpus.size() == 1);
+  RunOne(U);
+  PrintStats("DRILL ");
+  std::string SavedOutputCorpusPath; // Don't write new units while drilling.
+  SavedOutputCorpusPath.swap(Options.OutputCorpus);
+  Loop();
+
+  __sanitizer_reset_coverage();
+
+  PrintStats("REINIT");
+  SavedOutputCorpusPath.swap(Options.OutputCorpus);
+  for (auto &U : SavedCorpus)
+    RunOne(U);
+  PrintStats("MERGE ");
+  Options.PrintNEW = true;
+  size_t NumMerged = 0;
+  for (auto &U : Corpus) {
+    if (RunOne(U)) {
+      PrintStatusForNewUnit(U);
+      NumMerged++;
+      WriteToOutputCorpus(U);
     }
   }
+  PrintStats("MERGED");
+  if (NumMerged && Options.Verbosity)
+    Printf("Drilling discovered %zd new units\n", NumMerged);
+}
+
+void Fuzzer::Loop() {
+  system_clock::time_point LastCorpusReload = system_clock::now();
+  if (Options.DoCrossOver)
+    USF.SetCorpus(&Corpus);
+  while (true) {
+    SyncCorpus();
+    auto Now = system_clock::now();
+    if (duration_cast<seconds>(Now - LastCorpusReload).count()) {
+      RereadOutputCorpus();
+      LastCorpusReload = Now;
+    }
+    if (TotalNumberOfRuns >= Options.MaxNumberOfRuns)
+      break;
+    if (Options.MaxTotalTimeSec > 0 &&
+        secondsSinceProcessStartUp() >
+        static_cast<size_t>(Options.MaxTotalTimeSec))
+      break;
+    // Perform several mutations and runs.
+    MutateAndTestOne();
+  }
+
+  PrintStats("DONE  ", "\n");
 }
 
 void Fuzzer::SyncCorpus() {
diff --git a/lib/Fuzzer/FuzzerMain.cpp b/lib/Fuzzer/FuzzerMain.cpp
index c4dffb45d166..c5af5b059091 100644
--- a/lib/Fuzzer/FuzzerMain.cpp
+++ b/lib/Fuzzer/FuzzerMain.cpp
@@ -13,7 +13,7 @@
 #include "FuzzerInternal.h"
 
 // This function should be defined by the user.
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size);
 
 int main(int argc, char **argv) {
   return fuzzer::FuzzerDriver(argc, argv, LLVMFuzzerTestOneInput);
diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp
index f537fa90fd85..c3fa37a435d6 100644
--- a/lib/Fuzzer/FuzzerMutate.cpp
+++ b/lib/Fuzzer/FuzzerMutate.cpp
@@ -13,10 +13,42 @@
 
 #include "FuzzerInternal.h"
 
+#include <algorithm>
+
 namespace fuzzer {
 
-static char FlipRandomBit(char X) {
-  int Bit = rand() % 8;
+struct Mutator {
+  size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max);
+  const char *Name;
+};
+
+struct MutationDispatcher::Impl {
+  std::vector<Unit> Dictionary;
+  std::vector<Mutator> Mutators;
+  std::vector<Mutator> CurrentMutatorSequence;
+  const std::vector<Unit> *Corpus = nullptr;
+
+  void Add(Mutator M) { Mutators.push_back(M); }
+  Impl() {
+    Add({&MutationDispatcher::Mutate_EraseByte, "EraseByte"});
+    Add({&MutationDispatcher::Mutate_InsertByte, "InsertByte"});
+    Add({&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"});
+    Add({&MutationDispatcher::Mutate_ChangeBit, "ChangeBit"});
+    Add({&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"});
+    Add({&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"});
+    Add({&MutationDispatcher::Mutate_CrossOver, "CrossOver"});
+  }
+  void AddWordToDictionary(const uint8_t *Word, size_t Size) {
+    if (Dictionary.empty()) {
+      Add({&MutationDispatcher::Mutate_AddWordFromDictionary, "AddFromDict"});
+    }
+    Dictionary.push_back(Unit(Word, Word + Size));
+  }
+  void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; }
+};
+
+static char FlipRandomBit(char X, FuzzerRandomBase &Rand) {
+  int Bit = Rand(8);
   char Mask = 1 << Bit;
   char R;
   if (X & (1 << Bit))
@@ -27,45 +59,167 @@ static char FlipRandomBit(char X) {
   return R;
 }
 
-static char RandCh() {
-  if (rand() % 2) return rand();
+static char RandCh(FuzzerRandomBase &Rand) {
+  if (Rand.RandBool()) return Rand(256);
   const char *Special = "!*'();:@&=+$,/?%#[]123ABCxyz-`~.";
-  return Special[rand() % (sizeof(Special) - 1)];
+  return Special[Rand(sizeof(Special) - 1)];
+}
+
+size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size,
+                                               size_t MaxSize) {
+  assert(Size);
+  size_t ShuffleAmount = Rand(std::min(Size, (size_t)8)) + 1;  // [1,8] and <= Size.
+  size_t ShuffleStart = Rand(Size - ShuffleAmount);
+  assert(ShuffleStart + ShuffleAmount <= Size);
+  std::random_shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount,
+                      Rand);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_EraseByte(uint8_t *Data, size_t Size,
+                                            size_t MaxSize) {
+  assert(Size);
+  if (Size == 1) return 0;
+  size_t Idx = Rand(Size);
+  // Erase Data[Idx].
+  memmove(Data + Idx, Data + Idx + 1, Size - Idx - 1);
+  return Size - 1;
+}
+
+size_t MutationDispatcher::Mutate_InsertByte(uint8_t *Data, size_t Size,
+                                             size_t MaxSize) {
+  if (Size == MaxSize) return 0;
+  size_t Idx = Rand(Size + 1);
+  // Insert new value at Data[Idx].
+  memmove(Data + Idx + 1, Data + Idx, Size - Idx);
+  Data[Idx] = RandCh(Rand);
+  return Size + 1;
+}
+
+size_t MutationDispatcher::Mutate_ChangeByte(uint8_t *Data, size_t Size,
+                                             size_t MaxSize) {
+  size_t Idx = Rand(Size);
+  Data[Idx] = RandCh(Rand);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size,
+                                            size_t MaxSize) {
+  size_t Idx = Rand(Size);
+  Data[Idx] = FlipRandomBit(Data[Idx], Rand);
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_AddWordFromDictionary(uint8_t *Data,
+                                                        size_t Size,
+                                                        size_t MaxSize) {
+  auto &D = MDImpl->Dictionary;
+  assert(!D.empty());
+  if (D.empty()) return 0;
+  const Unit &Word = D[Rand(D.size())];
+  if (Size + Word.size() > MaxSize) return 0;
+  size_t Idx = Rand(Size + 1);
+  memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
+  memcpy(Data + Idx, Word.data(), Word.size());
+  return Size + Word.size();
+}
+
+size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size,
+                                                     size_t MaxSize) {
+  size_t B = Rand(Size);
+  while (B < Size && !isdigit(Data[B])) B++;
+  if (B == Size) return 0;
+  size_t E = B;
+  while (E < Size && isdigit(Data[E])) E++;
+  assert(B < E);
+  // now we have digits in [B, E).
+  // strtol and friends don't accept non-zero-teminated data, parse it manually.
+  uint64_t Val = Data[B] - '0';
+  for (size_t i = B + 1; i < E; i++)
+    Val = Val * 10 + Data[i] - '0';
+
+  // Mutate the integer value.
+  switch(Rand(5)) {
+    case 0: Val++; break;
+    case 1: Val--; break;
+    case 2: Val /= 2; break;
+    case 3: Val *= 2; break;
+    case 4: Val = Rand(Val * Val); break;
+    default: assert(0);
+  }
+  // Just replace the bytes with the new ones, don't bother moving bytes.
+  for (size_t i = B; i < E; i++) {
+    size_t Idx = E + B - i - 1;
+    assert(Idx >= B && Idx < E);
+    Data[Idx] = (Val % 10) + '0';
+    Val /= 10;
+  }
+  return Size;
+}
+
+size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size,
+                                            size_t MaxSize) {
+  auto Corpus = MDImpl->Corpus;
+  if (!Corpus || Corpus->size() < 2 || Size == 0) return 0;
+  size_t Idx = Rand(Corpus->size());
+  const Unit &Other = (*Corpus)[Idx];
+  if (Other.empty()) return 0;
+  Unit U(MaxSize);
+  size_t NewSize =
+      CrossOver(Data, Size, Other.data(), Other.size(), U.data(), U.size());
+  assert(NewSize > 0 && "CrossOver returned empty unit");
+  assert(NewSize <= MaxSize && "CrossOver returned overisized unit");
+  memcpy(Data, U.data(), NewSize);
+  return NewSize;
+}
+
+void MutationDispatcher::StartMutationSequence() {
+  MDImpl->CurrentMutatorSequence.clear();
+}
+
+void MutationDispatcher::PrintMutationSequence() {
+  Printf("MS: %zd ", MDImpl->CurrentMutatorSequence.size());
+  for (auto M : MDImpl->CurrentMutatorSequence)
+    Printf("%s-", M.Name);
 }
 
 // Mutates Data in place, returns new size.
-size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
+size_t MutationDispatcher::Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
   assert(MaxSize > 0);
   assert(Size <= MaxSize);
   if (Size == 0) {
     for (size_t i = 0; i < MaxSize; i++)
-      Data[i] = RandCh();
+      Data[i] = RandCh(Rand);
     return MaxSize;
   }
   assert(Size > 0);
-  size_t Idx = rand() % Size;
-  switch (rand() % 3) {
-  case 0:
-    if (Size > 1) {
-      // Erase Data[Idx].
-      memmove(Data + Idx, Data + Idx + 1, Size - Idx - 1);
-      Size = Size - 1;
+  // Some mutations may fail (e.g. can't insert more bytes if Size == MaxSize),
+  // in which case they will return 0.
+  // Try several times before returning un-mutated data.
+  for (int Iter = 0; Iter < 10; Iter++) {
+    size_t MutatorIdx = Rand(MDImpl->Mutators.size());
+    auto M = MDImpl->Mutators[MutatorIdx];
+    size_t NewSize = (this->*(M.Fn))(Data, Size, MaxSize);
+    if (NewSize) {
+      MDImpl->CurrentMutatorSequence.push_back(M);
+      return NewSize;
     }
-    [[clang::fallthrough]];
-  case 1:
-    if (Size < MaxSize) {
-      // Insert new value at Data[Idx].
-      memmove(Data + Idx + 1, Data + Idx, Size - Idx);
-      Data[Idx] = RandCh();
-    }
-    Data[Idx] = RandCh();
-    break;
-  case 2:
-    Data[Idx] = FlipRandomBit(Data[Idx]);
-    break;
   }
-  assert(Size > 0);
   return Size;
 }
 
+void MutationDispatcher::SetCorpus(const std::vector<Unit> *Corpus) {
+  MDImpl->SetCorpus(Corpus);
+}
+
+void MutationDispatcher::AddWordToDictionary(const uint8_t *Word, size_t Size) {
+  MDImpl->AddWordToDictionary(Word, Size);
+}
+
+MutationDispatcher::MutationDispatcher(FuzzerRandomBase &Rand) : Rand(Rand) {
+  MDImpl = new Impl;
+}
+
+MutationDispatcher::~MutationDispatcher() { delete MDImpl; }
+
 }  // namespace fuzzer
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index b2e1e956dfcf..8204a2ddc7c8 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -67,37 +67,34 @@
   clang  -fPIC -c -g -O2 -std=c++11 Fuzzer*.cpp
   clang++ -O0 -std=c++11 -fsanitize-coverage=edge,trace-cmp \
     -fsanitize=dataflow \
-    test/dfsan/DFSanSimpleCmpTest.cpp Fuzzer*.o
-  ./a.out
+    test/SimpleCmpTest.cpp Fuzzer*.o
+  ./a.out -use_traces=1
 )
 */
 
+#include "FuzzerDFSan.h"
 #include "FuzzerInternal.h"
-#include <sanitizer/dfsan_interface.h>
 
 #include <algorithm>
 #include <cstring>
 #include <unordered_map>
 
+#if !LLVM_FUZZER_SUPPORTS_DFSAN
+// Stubs for dfsan for platforms where dfsan does not exist and weak
+// functions don't work.
 extern "C" {
-__attribute__((weak))
-dfsan_label dfsan_create_label(const char *desc, void *userdata);
-__attribute__((weak))
-void dfsan_set_label(dfsan_label label, void *addr, size_t size);
-__attribute__((weak))
-void dfsan_add_label(dfsan_label label, void *addr, size_t size);
-__attribute__((weak))
-const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label);
-__attribute__((weak))
-dfsan_label dfsan_read_label(const void *addr, size_t size);
+dfsan_label dfsan_create_label(const char *desc, void *userdata) { return 0; }
+void dfsan_set_label(dfsan_label label, void *addr, size_t size) {}
+void dfsan_add_label(dfsan_label label, void *addr, size_t size) {}
+const struct dfsan_label_info *dfsan_get_label_info(dfsan_label label) {
+  return nullptr;
+}
+dfsan_label dfsan_read_label(const void *addr, size_t size) { return 0; }
 }  // extern "C"
+#endif  // !LLVM_FUZZER_SUPPORTS_DFSAN
 
 namespace fuzzer {
 
-static bool ReallyHaveDFSan() {
-  return &dfsan_create_label != nullptr;
-}
-
 // These values are copied from include/llvm/IR/InstrTypes.h.
 // We do not include the LLVM headers here to remain independent.
 // If these values ever change, an assertion in ComputeCmp will fail.
@@ -138,7 +135,9 @@ static bool ComputeCmp(size_t CmpSize, size_t CmpType, uint64_t Arg1,
   if (CmpSize == 4) return ComputeCmp<uint32_t, int32_t>(CmpType, Arg1, Arg2);
   if (CmpSize == 2) return ComputeCmp<uint16_t, int16_t>(CmpType, Arg1, Arg2);
   if (CmpSize == 1) return ComputeCmp<uint8_t, int8_t>(CmpType, Arg1, Arg2);
-  assert(0 && "unsupported type size");
+  // Other size, ==
+  if (CmpType == ICMP_EQ) return Arg1 == Arg2;
+  // assert(0 && "unsupported cmp and type size combination");
   return true;
 }
 
@@ -180,8 +179,13 @@ class TraceState {
   void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
                         uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
                         dfsan_label L2);
-  void TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1,
-                        uint64_t Arg2);
+  void DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits, uint64_t Val,
+                           size_t NumCases, uint64_t *Cases, dfsan_label L);
+  void TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                        uint64_t Arg1, uint64_t Arg2);
+
+  void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
+                           size_t NumCases, uint64_t *Cases);
   int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
                            size_t DataSize);
 
@@ -191,9 +195,8 @@ class TraceState {
     Mutations.clear();
   }
 
-  size_t StopTraceRecording() {
+  size_t StopTraceRecording(FuzzerRandomBase &Rand) {
     RecordingTraces = false;
-    std::random_shuffle(Mutations.begin(), Mutations.end());
     return Mutations.size();
   }
 
@@ -207,7 +210,7 @@ class TraceState {
   }
   bool RecordingTraces = false;
   std::vector<TraceBasedMutation> Mutations;
-  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)] = {};
+  LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
   const Fuzzer::FuzzingOptions &Options;
   const Unit &CurrentUnit;
 };
@@ -255,36 +258,64 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
 
 
   if (Options.Verbosity >= 3)
-    Printf("DFSAN: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 %d MU %zd\n",
+    Printf("DFSanCmpCallback: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 "
+           "%d MU %zd\n",
            PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, Mutations.size());
 }
 
+void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
+                                     uint64_t Val, size_t NumCases,
+                                     uint64_t *Cases, dfsan_label L) {
+  assert(ReallyHaveDFSan());
+  if (!RecordingTraces) return;
+  if (!L) return;  // Not actionable.
+  LabelRange LR = GetLabelRange(L);
+  size_t ValSize = ValSizeInBits / 8;
+  bool TryShort = IsTwoByteData(Val);
+  for (size_t i = 0; i < NumCases; i++)
+    TryShort &= IsTwoByteData(Cases[i]);
+
+  for (size_t Pos = LR.Beg; Pos + ValSize <= LR.End; Pos++)
+    for (size_t i = 0; i < NumCases; i++)
+      Mutations.push_back({Pos, ValSize, Cases[i]});
+
+  if (TryShort)
+    for (size_t Pos = LR.Beg; Pos + 2 <= LR.End; Pos++)
+      for (size_t i = 0; i < NumCases; i++)
+        Mutations.push_back({Pos, 2, Cases[i]});
+
+  if (Options.Verbosity >= 3)
+    Printf("DFSanSwitchCallback: PC %lx Val %zd SZ %zd # %zd L %d: {%d, %d} "
+           "TryShort %d\n",
+           PC, Val, ValSize, NumCases, L, LR.Beg, LR.End, TryShort);
+}
+
 int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
                                     size_t DataSize) {
   int Res = 0;
   const uint8_t *Beg = CurrentUnit.data();
   const uint8_t *End = Beg + CurrentUnit.size();
-  for (const uint8_t *Cur = Beg; Cur < End; Cur += DataSize) {
+  for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
     Cur = (uint8_t *)memmem(Cur, End - Cur, &PresentData, DataSize);
     if (!Cur)
       break;
     size_t Pos = Cur - Beg;
     assert(Pos < CurrentUnit.size());
+    if (Mutations.size() > 100000U) return Res;  // Just in case.
     Mutations.push_back({Pos, DataSize, DesiredData});
     Mutations.push_back({Pos, DataSize, DesiredData + 1});
     Mutations.push_back({Pos, DataSize, DesiredData - 1});
-    Cur += DataSize;
     Res++;
   }
   return Res;
 }
 
-void TraceState::TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1,
-                        uint64_t Arg2) {
+void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
+                                  uint64_t Arg1, uint64_t Arg2) {
   if (!RecordingTraces) return;
   int Added = 0;
   if (Options.Verbosity >= 3)
-    Printf("TraceCmp: %zd %zd\n", Arg1, Arg2);
+    Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
   Added += TryToAddDesiredData(Arg1, Arg2, CmpSize);
   Added += TryToAddDesiredData(Arg2, Arg1, CmpSize);
   if (!Added && CmpSize == 4 && IsTwoByteData(Arg1) && IsTwoByteData(Arg2)) {
@@ -293,16 +324,40 @@ void TraceState::TraceCmpCallback(size_t CmpSize, size_t CmpType, uint64_t Arg1,
   }
 }
 
+void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
+                                     uint64_t Val, size_t NumCases,
+                                     uint64_t *Cases) {
+  if (!RecordingTraces) return;
+  size_t ValSize = ValSizeInBits / 8;
+  bool TryShort = IsTwoByteData(Val);
+  for (size_t i = 0; i < NumCases; i++)
+    TryShort &= IsTwoByteData(Cases[i]);
+
+  if (Options.Verbosity >= 3)
+    Printf("TraceSwitch: %p %zd # %zd; TryShort %d\n", PC, Val, NumCases,
+           TryShort);
+
+  for (size_t i = 0; i < NumCases; i++) {
+    TryToAddDesiredData(Val, Cases[i], ValSize);
+    if (TryShort)
+      TryToAddDesiredData(Val, Cases[i], 2);
+  }
+
+}
+
 static TraceState *TS;
 
 void Fuzzer::StartTraceRecording() {
   if (!TS) return;
+  if (ReallyHaveDFSan())
+    for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++)
+      dfsan_set_label(i + 1, &CurrentUnit[i], 1);
   TS->StartTraceRecording();
 }
 
 size_t Fuzzer::StopTraceRecording() {
   if (!TS) return 0;
-  return TS->StopTraceRecording();
+  return TS->StopTraceRecording(USF.GetRand());
 }
 
 void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) {
@@ -319,11 +374,19 @@ void Fuzzer::InitializeTraceState() {
   for (size_t i = 0; i < static_cast<size_t>(Options.MaxLen); i++) {
     dfsan_label L = dfsan_create_label("input", (void*)(i + 1));
     // We assume that no one else has called dfsan_create_label before.
-    assert(L == i + 1);
-    dfsan_set_label(L, &CurrentUnit[i], 1);
+    if (L != i + 1) {
+      Printf("DFSan labels are not starting from 1, exiting\n");
+      exit(1);
+    }
   }
 }
 
+static size_t InternalStrnlen(const char *S, size_t MaxLen) {
+  size_t Len = 0;
+  for (; Len < MaxLen && S[Len]; Len++) {}
+  return Len;
+}
+
 }  // namespace fuzzer
 
 using fuzzer::TS;
@@ -340,6 +403,13 @@ void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
   TS->DFSanCmpCallback(PC, CmpSize, Type, Arg1, Arg2, L1, L2);
 }
 
+void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
+                                         dfsan_label L1, dfsan_label L2) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  TS->DFSanSwitchCallback(PC, Cases[1], Val, Cases[0], Cases+2, L1);
+}
+
 void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
                             size_t n, dfsan_label s1_label,
                             dfsan_label s2_label, dfsan_label n_label) {
@@ -354,12 +424,96 @@ void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
   TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
 }
 
+void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
+                             size_t n, dfsan_label s1_label,
+                             dfsan_label s2_label, dfsan_label n_label) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
+  uint64_t S1 = 0, S2 = 0;
+  n = std::min(n, fuzzer::InternalStrnlen(s1, n));
+  n = std::min(n, fuzzer::InternalStrnlen(s2, n));
+  // Simplification: handle only first 8 bytes.
+  memcpy(&S1, s1, std::min(n, sizeof(S1)));
+  memcpy(&S2, s2, std::min(n, sizeof(S2)));
+  dfsan_label L1 = dfsan_read_label(s1, n);
+  dfsan_label L2 = dfsan_read_label(s2, n);
+  TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+}
+
+void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
+                            dfsan_label s1_label, dfsan_label s2_label) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
+  uint64_t S1 = 0, S2 = 0;
+  size_t Len1 = strlen(s1);
+  size_t Len2 = strlen(s2);
+  size_t N = std::min(Len1, Len2);
+  if (N <= 1) return;  // Not interesting.
+  // Simplification: handle only first 8 bytes.
+  memcpy(&S1, s1, std::min(N, sizeof(S1)));
+  memcpy(&S2, s2, std::min(N, sizeof(S2)));
+  dfsan_label L1 = dfsan_read_label(s1, Len1);
+  dfsan_label L2 = dfsan_read_label(s2, Len2);
+  TS->DFSanCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+}
+
+void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
+                                  const void *s2, size_t n) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
+  uint64_t S1 = 0, S2 = 0;
+  // Simplification: handle only first 8 bytes.
+  memcpy(&S1, s1, std::min(n, sizeof(S1)));
+  memcpy(&S2, s2, std::min(n, sizeof(S2)));
+  TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+}
+
+void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
+                                   const char *s2, size_t n) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
+  uint64_t S1 = 0, S2 = 0;
+  size_t Len1 = fuzzer::InternalStrnlen(s1, n);
+  size_t Len2 = fuzzer::InternalStrnlen(s2, n);
+  n = std::min(n, Len1);
+  n = std::min(n, Len2);
+  if (n <= 1) return;  // Not interesting.
+  // Simplification: handle only first 8 bytes.
+  memcpy(&S1, s1, std::min(n, sizeof(S1)));
+  memcpy(&S2, s2, std::min(n, sizeof(S2)));
+  TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+}
+
+void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
+                                   const char *s2) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
+  uint64_t S1 = 0, S2 = 0;
+  size_t Len1 = strlen(s1);
+  size_t Len2 = strlen(s2);
+  size_t N = std::min(Len1, Len2);
+  if (N <= 1) return;  // Not interesting.
+  // Simplification: handle only first 8 bytes.
+  memcpy(&S1, s1, std::min(N, sizeof(S1)));
+  memcpy(&S2, s2, std::min(N, sizeof(S2)));
+  TS->TraceCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2);
+}
+
+__attribute__((visibility("default")))
 void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
                                uint64_t Arg2) {
   if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
   uint64_t CmpSize = (SizeAndType >> 32) / 8;
   uint64_t Type = (SizeAndType << 32) >> 32;
-  TS->TraceCmpCallback(CmpSize, Type, Arg1, Arg2);
+  TS->TraceCmpCallback(PC, CmpSize, Type, Arg1, Arg2);
+}
+
+__attribute__((visibility("default")))
+void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases) {
+  if (!TS) return;
+  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+  TS->TraceSwitchCallback(PC, Cases[1], Val, Cases[0], Cases + 2);
 }
 
 }  // extern "C"
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index e381c0406321..6c1133fffd37 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -16,6 +16,7 @@
 #include <cassert>
 #include <cstring>
 #include <signal.h>
+#include <sstream>
 #include <unistd.h>
 
 namespace fuzzer {
@@ -51,7 +52,6 @@ static void AlarmHandler(int, siginfo_t *, void *) {
 
 void SetTimer(int Seconds) {
   struct itimerval T {{Seconds, 0}, {Seconds, 0}};
-  Printf("SetTimer %d\n", Seconds);
   int Res = setitimer(ITIMER_REAL, &T, nullptr);
   assert(Res == 0);
   struct sigaction sigact;
@@ -69,8 +69,131 @@ int NumberOfCpuCores() {
   return N;
 }
 
-void ExecuteCommand(const std::string &Command) {
-  system(Command.c_str());
+int ExecuteCommand(const std::string &Command) {
+  return system(Command.c_str());
+}
+
+bool ToASCII(Unit &U) {
+  bool Changed = false;
+  for (auto &X : U) {
+    auto NewX = X;
+    NewX &= 127;
+    if (!isspace(NewX) && !isprint(NewX))
+      NewX = ' ';
+    Changed |= NewX != X;
+    X = NewX;
+  }
+  return Changed;
+}
+
+bool IsASCII(const Unit &U) {
+  for (auto X : U)
+    if (!(isprint(X) || isspace(X))) return false;
+  return true;
+}
+
+bool ParseOneDictionaryEntry(const std::string &Str, Unit *U) {
+  U->clear();
+  if (Str.empty()) return false;
+  size_t L = 0, R = Str.size() - 1;  // We are parsing the range [L,R].
+  // Skip spaces from both sides.
+  while (L < R && isspace(Str[L])) L++;
+  while (R > L && isspace(Str[R])) R--;
+  if (R - L < 2) return false;
+  // Check the closing "
+  if (Str[R] != '"') return false;
+  R--;
+  // Find the opening "
+  while (L < R && Str[L] != '"') L++;
+  if (L >= R) return false;
+  assert(Str[L] == '\"');
+  L++;
+  assert(L <= R);
+  for (size_t Pos = L; Pos <= R; Pos++) {
+    uint8_t V = (uint8_t)Str[Pos];
+    if (!isprint(V) && !isspace(V)) return false;
+    if (V =='\\') {
+      // Handle '\\'
+      if (Pos + 1 <= R && (Str[Pos + 1] == '\\' || Str[Pos + 1] == '"')) {
+        U->push_back(Str[Pos + 1]);
+        Pos++;
+        continue;
+      }
+      // Handle '\xAB'
+      if (Pos + 3 <= R && Str[Pos + 1] == 'x'
+           && isxdigit(Str[Pos + 2]) && isxdigit(Str[Pos + 3])) {
+        char Hex[] = "0xAA";
+        Hex[2] = Str[Pos + 2];
+        Hex[3] = Str[Pos + 3];
+        U->push_back(strtol(Hex, nullptr, 16));
+        Pos += 3;
+        continue;
+      }
+      return false;  // Invalid escape.
+    } else {
+      // Any other character.
+      U->push_back(V);
+    }
+  }
+  return true;
+}
+
+bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units) {
+  if (Text.empty()) {
+    Printf("ParseDictionaryFile: file does not exist or is empty\n");
+    return false;
+  }
+  std::istringstream ISS(Text);
+  Units->clear();
+  Unit U;
+  int LineNo = 0;
+  std::string S;
+  while (std::getline(ISS, S, '\n')) {
+    LineNo++;
+    size_t Pos = 0;
+    while (Pos < S.size() && isspace(S[Pos])) Pos++;  // Skip spaces.
+    if (Pos == S.size()) continue;  // Empty line.
+    if (S[Pos] == '#') continue;  // Comment line.
+    if (ParseOneDictionaryEntry(S, &U)) {
+      Units->push_back(U);
+    } else {
+      Printf("ParseDictionaryFile: error in line %d\n\t\t%s\n", LineNo,
+             S.c_str());
+      return false;
+    }
+  }
+  return true;
+}
+
+int GetPid() { return getpid(); }
+
+
+std::string Base64(const Unit &U) {
+  static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                              "abcdefghijklmnopqrstuvwxyz"
+                              "0123456789+/";
+  std::string Res;
+  size_t i;
+  for (i = 0; i + 2 < U.size(); i += 3) {
+    uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2];
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += Table[(x >> 6) & 63];
+    Res += Table[x & 63];
+  }
+  if (i + 1 == U.size()) {
+    uint32_t x = (U[i] << 16);
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += "==";
+  } else if (i + 2 == U.size()) {
+    uint32_t x = (U[i] << 16) + (U[i + 1] << 8);
+    Res += Table[(x >> 18) & 63];
+    Res += Table[(x >> 12) & 63];
+    Res += Table[(x >> 6) & 63];
+    Res += "=";
+  }
+  return Res;
 }
 
 }  // namespace fuzzer
diff --git a/lib/Fuzzer/cxx.dict b/lib/Fuzzer/cxx.dict
new file mode 100644
index 000000000000..41350f47558b
--- /dev/null
+++ b/lib/Fuzzer/cxx.dict
@@ -0,0 +1,122 @@
+"++"
+"--"
+"<<"
+">>"
+"+="
+"-="
+"*="
+"/="
+">>="
+"<<="
+"&="
+"|="
+"^="
+"%="
+"!="
+"&&"
+"||"
+"=="
+">="
+"<="
+"->"
+"alignas"
+"alignof"
+"and"
+"and_eq"
+"asm"
+"auto"
+"bitand"
+"bitor"
+"bool"
+"break"
+"case"
+"catch"
+"char"
+"char16_t"
+"char32_t"
+"class"
+"compl"
+"concept"
+"const"
+"constexpr"
+"const_cast"
+"continue"
+"decltype"
+"default"
+"delete"
+"do"
+"double"
+"dynamic_cast"
+"else"
+"enum"
+"explicit"
+"export"
+"extern"
+"false"
+"float"
+"for"
+"friend"
+"goto"
+"if"
+"inline"
+"int"
+"long"
+"mutable"
+"namespace"
+"new"
+"noexcept"
+"not"
+"not_eq"
+"nullptr"
+"operator"
+"or"
+"or_eq"
+"private"
+"protected"
+"public"
+"register"
+"reinterpret_cast"
+"requires"
+"return"
+"short"
+"signed"
+"sizeof"
+"static"
+"static_assert"
+"static_cast"
+"struct"
+"switch"
+"template"
+"this"
+"thread_local"
+"throw"
+"true"
+"try"
+"typedef"
+"typeid"
+"typename"
+"union"
+"unsigned"
+"using"
+"virtual"
+"void"
+"volatile"
+"wchar_t"
+"while"
+"xor"
+"xor_eq"
+"if"
+"elif"
+"else"
+"endif"
+"defined"
+"ifdef"
+"ifndef"
+"define"
+"undef"
+"include"
+"line"
+"error"
+"pragma"
+"override"
+"final"
diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt
deleted file mode 100644
index f3c4f80e1467..000000000000
--- a/lib/Fuzzer/cxx_fuzzer_tokens.txt
+++ /dev/null
@@ -1,218 +0,0 @@
-#
-##
-`
-~
-!
-@
-$
-%
-^
-&
-*
-(
-)
-_
--
-_
-=
-+
-{
-}
-[
-]
-|
-\
-,
-.
-/
-?
->
-<
-;
-:
-'
-"
-++
---
-<<
->>
-+=
--=
-*=
-/=
->>=
-<<=
-&=
-|=
-^=
-%=
-!=
-&&
-||
-==
->=
-<=
-->
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-alignas
-alignof
-and
-and_eq
-asm
-auto
-bitand
-bitor
-bool
-break
-case
-catch
-char
-char16_t
-char32_t
-class
-compl
-concept
-const
-constexpr
-const_cast
-continue
-decltype
-default
-delete
-do
-double
-dynamic_cast
-else
-enum
-explicit
-export
-extern
-false
-float
-for
-friend
-goto
-if
-inline
-int
-long
-mutable
-namespace
-new
-noexcept
-not
-not_eq
-nullptr
-operator
-or
-or_eq
-private
-protected
-public
-register
-reinterpret_cast
-requires
-return
-short
-signed
-sizeof
-static
-static_assert
-static_cast
-struct
-switch
-template
-this
-thread_local
-throw
-true
-try
-typedef
-typeid
-typename
-union
-unsigned
-using
-virtual
-void
-volatile
-wchar_t
-while
-xor
-xor_eq
-if
-elif
-else
-endif
-defined
-ifdef
-ifndef
-define
-undef
-include
-line
-error
-pragma
-override
-final
diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt
index a9acec15d4d3..674fcc3c9f8c 100644
--- a/lib/Fuzzer/test/CMakeLists.txt
+++ b/lib/Fuzzer/test/CMakeLists.txt
@@ -5,26 +5,41 @@
 set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=edge,indirect-calls")
 
 set(DFSanTests
-  DFSanMemcmpTest
-  DFSanSimpleCmpTest
+  MemcmpTest
+  SimpleCmpTest
+  StrcmpTest
+  StrncmpTest
+  SwitchTest
   )
 
 set(Tests
+  CallerCalleeTest
   CounterTest
-  CxxTokensTest
   FourIndependentBranchesTest
   FullCoverageSetTest
-  InfiniteTest
+  MemcmpTest
   NullDerefTest
+  SimpleCmpTest
+  SimpleDictionaryTest
+  SimpleHashTest
   SimpleTest
+  StrcmpTest
+  StrncmpTest
+  SwitchTest
   TimeoutTest
-  ${DFSanTests}
   )
 
 set(CustomMainTests
   UserSuppliedFuzzerTest
   )
 
+set(UninstrumentedTests
+  UninstrumentedTest
+  )
+
+set(TraceBBTests
+  SimpleTest
+  )
 
 set(TestBinaries)
 
@@ -80,6 +95,17 @@ foreach(Test ${DFSanTests})
   set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-DFSan)
 endforeach()
 
+add_subdirectory(uninstrumented)
+
+foreach(Test ${UninstrumentedTests})
+  set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-Uninstrumented)
+endforeach()
+
+add_subdirectory(trace-bb)
+
+foreach(Test ${TraceBBTests})
+  set(TestBinaries ${TestBinaries} LLVMFuzzer-${Test}-TraceBB)
+endforeach()
 
 set_target_properties(${TestBinaries}
   PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
diff --git a/lib/Fuzzer/test/CallerCalleeTest.cpp b/lib/Fuzzer/test/CallerCalleeTest.cpp
new file mode 100644
index 000000000000..150b2fc04058
--- /dev/null
+++ b/lib/Fuzzer/test/CallerCalleeTest.cpp
@@ -0,0 +1,56 @@
+// Simple test for a fuzzer.
+// Try to find the target using the indirect caller-callee pairs.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+typedef void (*F)();
+static F t[256];
+
+void f34() {
+  std::cerr << "BINGO\n";
+  exit(1);
+}
+void f23() { t[(unsigned)'d'] = f34;}
+void f12() { t[(unsigned)'c'] = f23;}
+void f01() { t[(unsigned)'b'] = f12;}
+void f00() {}
+
+static F t0[256] = {
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+  f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00, f00,
+};
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 4) return 0;
+  // Spoof the counters.
+  for (int i = 0; i < 200; i++) {
+    f23();
+    f12();
+    f01();
+  }
+  memcpy(t, t0, sizeof(t));
+  t[(unsigned)'a'] = f01;
+  t[Data[0]]();
+  t[Data[1]]();
+  t[Data[2]]();
+  t[Data[3]]();
+  return 0;
+}
+
diff --git a/lib/Fuzzer/test/CounterTest.cpp b/lib/Fuzzer/test/CounterTest.cpp
index 29ddb02ebaea..b61f419c4991 100644
--- a/lib/Fuzzer/test/CounterTest.cpp
+++ b/lib/Fuzzer/test/CounterTest.cpp
@@ -2,7 +2,7 @@
 // executed many times.
 #include <iostream>
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   int Num = 0;
   for (size_t i = 0; i < Size; i++)
     if (Data[i] == 'A' + i)
@@ -11,4 +11,5 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     std::cerr <<  "BINGO!\n";
     exit(1);
   }
+  return 0;
 }
diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp
deleted file mode 100644
index 77d08b3d1055..000000000000
--- a/lib/Fuzzer/test/CxxTokensTest.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens.
-#include <cstdint>
-#include <cstdlib>
-#include <cstddef>
-#include <cstring>
-#include <iostream>
-
-static void Found() {
-  std::cout << "BINGO; Found the target, exiting\n";
-  exit(1);
-}
-
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
-  // looking for "thread_local unsigned A;"
-  if (Size < 24) return;
-  if (0 == memcmp(&Data[0], "thread_local", 12))
-    if (Data[12] == ' ')
-      if (0 == memcmp(&Data[13], "unsigned", 8))
-        if (Data[21] == ' ')
-          if (Data[22] == 'A')
-            if (Data[23] == ';')
-              Found();
-}
-
diff --git a/lib/Fuzzer/test/DFSanMemcmpTest.cpp b/lib/Fuzzer/test/DFSanMemcmpTest.cpp
deleted file mode 100644
index 510a24398005..000000000000
--- a/lib/Fuzzer/test/DFSanMemcmpTest.cpp
+++ /dev/null
@@ -1,12 +0,0 @@
-// Simple test for a fuzzer. The fuzzer must find a particular string.
-#include <cstring>
-#include <cstdint>
-#include <cstdio>
-#include <cstdlib>
-
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
-  if (Size >= 8 && memcmp(Data, "01234567", 8) == 0) {
-    fprintf(stderr, "BINGO\n");
-    exit(1);
-  }
-}
diff --git a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
index e0b7509b8d65..6007dd4a027b 100644
--- a/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
+++ b/lib/Fuzzer/test/FourIndependentBranchesTest.cpp
@@ -4,7 +4,7 @@
 #include <cstddef>
 #include <iostream>
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   int bits = 0;
   if (Size > 0 && Data[0] == 'F') bits |= 1;
   if (Size > 1 && Data[1] == 'U') bits |= 2;
@@ -14,5 +14,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     std::cerr <<  "BINGO!\n";
     exit(1);
   }
+  return 0;
 }
 
diff --git a/lib/Fuzzer/test/FullCoverageSetTest.cpp b/lib/Fuzzer/test/FullCoverageSetTest.cpp
index 2c6ff98db005..a868084a0cee 100644
--- a/lib/Fuzzer/test/FullCoverageSetTest.cpp
+++ b/lib/Fuzzer/test/FullCoverageSetTest.cpp
@@ -4,7 +4,7 @@
 #include <cstddef>
 #include <iostream>
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   int bits = 0;
   if (Size > 0 && Data[0] == 'F') bits |= 1;
   if (Size > 1 && Data[1] == 'U') bits |= 2;
@@ -16,5 +16,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
     std::cerr <<  "BINGO!\n";
     exit(1);
   }
+  return 0;
 }
 
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 50f2f99760e5..8c0012708afc 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -2,6 +2,8 @@
 #include "gtest/gtest.h"
 #include <set>
 
+using namespace fuzzer;
+
 // For now, have LLVMFuzzerTestOneInput just to make it link.
 // Later we may want to make unittests that actually call LLVMFuzzerTestOneInput.
 extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
@@ -9,7 +11,8 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
 }
 
 TEST(Fuzzer, CrossOver) {
-  using namespace fuzzer;
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
   Unit A({0, 1, 2}), B({5, 6, 7});
   Unit C;
   Unit Expected[] = {
@@ -52,8 +55,8 @@ TEST(Fuzzer, CrossOver) {
     std::set<Unit> FoundUnits, ExpectedUnitsWitThisLength;
     for (int Iter = 0; Iter < 3000; Iter++) {
       C.resize(Len);
-      size_t NewSize = CrossOver(A.data(), A.size(), B.data(), B.size(),
-                                 C.data(), C.size());
+      size_t NewSize = MD.CrossOver(A.data(), A.size(), B.data(), B.size(),
+                                    C.data(), C.size());
       C.resize(NewSize);
       FoundUnits.insert(C);
     }
@@ -71,3 +74,302 @@ TEST(Fuzzer, Hash) {
   U.push_back('d');
   EXPECT_EQ("81fe8bfe87576c3ecb22426f8e57847382917acf", fuzzer::Hash(U));
 }
+
+typedef size_t (MutationDispatcher::*Mutator)(uint8_t *Data, size_t Size,
+                                              size_t MaxSize);
+
+void TestEraseByte(Mutator M, int NumIter) {
+  uint8_t REM0[8] = {0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM1[8] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM2[8] = {0x00, 0x11, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM3[8] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x77};
+  uint8_t REM4[8] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x66, 0x77};
+  uint8_t REM5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x66, 0x77};
+  uint8_t REM6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x77};
+  uint8_t REM7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  int FoundMask = 0;
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, sizeof(T), sizeof(T));
+    if (NewSize == 7 && !memcmp(REM0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(REM1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(REM2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(REM3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(REM4, T, 7)) FoundMask |= 1 << 4;
+    if (NewSize == 7 && !memcmp(REM5, T, 7)) FoundMask |= 1 << 5;
+    if (NewSize == 7 && !memcmp(REM6, T, 7)) FoundMask |= 1 << 6;
+    if (NewSize == 7 && !memcmp(REM7, T, 7)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, EraseByte1) {
+  TestEraseByte(&MutationDispatcher::Mutate_EraseByte, 100);
+}
+TEST(FuzzerMutate, EraseByte2) {
+  TestEraseByte(&MutationDispatcher::Mutate, 1000);
+}
+
+void TestInsertByte(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  int FoundMask = 0;
+  uint8_t INS0[8] = {0xF1, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS1[8] = {0x00, 0xF2, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS2[8] = {0x00, 0x11, 0xF3, 0x22, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS3[8] = {0x00, 0x11, 0x22, 0xF4, 0x33, 0x44, 0x55, 0x66};
+  uint8_t INS4[8] = {0x00, 0x11, 0x22, 0x33, 0xF5, 0x44, 0x55, 0x66};
+  uint8_t INS5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF6, 0x55, 0x66};
+  uint8_t INS6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF7, 0x66};
+  uint8_t INS7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF8};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+    size_t NewSize = (MD.*M)(T, 7, 8);
+    if (NewSize == 8 && !memcmp(INS0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(INS1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(INS2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(INS3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(INS4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(INS5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(INS6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(INS7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, InsertByte1) {
+  TestInsertByte(&MutationDispatcher::Mutate_InsertByte, 1 << 15);
+}
+TEST(FuzzerMutate, InsertByte2) {
+  TestInsertByte(&MutationDispatcher::Mutate, 1 << 17);
+}
+
+void TestChangeByte(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  int FoundMask = 0;
+  uint8_t CH0[8] = {0xF0, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH1[8] = {0x00, 0xF1, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH2[8] = {0x00, 0x11, 0xF2, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH3[8] = {0x00, 0x11, 0x22, 0xF3, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0xF4, 0x55, 0x66, 0x77};
+  uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0xF5, 0x66, 0x77};
+  uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0xF5, 0x77};
+  uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 8, 9);
+    if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, ChangeByte1) {
+  TestChangeByte(&MutationDispatcher::Mutate_ChangeByte, 1 << 15);
+}
+TEST(FuzzerMutate, ChangeByte2) {
+  TestChangeByte(&MutationDispatcher::Mutate, 1 << 17);
+}
+
+void TestChangeBit(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  int FoundMask = 0;
+  uint8_t CH0[8] = {0x01, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH1[8] = {0x00, 0x13, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH2[8] = {0x00, 0x11, 0x02, 0x33, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH3[8] = {0x00, 0x11, 0x22, 0x37, 0x44, 0x55, 0x66, 0x77};
+  uint8_t CH4[8] = {0x00, 0x11, 0x22, 0x33, 0x54, 0x55, 0x66, 0x77};
+  uint8_t CH5[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x54, 0x66, 0x77};
+  uint8_t CH6[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x76, 0x77};
+  uint8_t CH7[8] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0xF7};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[9] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77};
+    size_t NewSize = (MD.*M)(T, 8, 9);
+    if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    if (NewSize == 8 && !memcmp(CH4, T, 8)) FoundMask |= 1 << 4;
+    if (NewSize == 8 && !memcmp(CH5, T, 8)) FoundMask |= 1 << 5;
+    if (NewSize == 8 && !memcmp(CH6, T, 8)) FoundMask |= 1 << 6;
+    if (NewSize == 8 && !memcmp(CH7, T, 8)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, ChangeBit1) {
+  TestChangeBit(&MutationDispatcher::Mutate_ChangeBit, 1 << 16);
+}
+TEST(FuzzerMutate, ChangeBit2) {
+  TestChangeBit(&MutationDispatcher::Mutate, 1 << 18);
+}
+
+void TestShuffleBytes(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  int FoundMask = 0;
+  uint8_t CH0[7] = {0x00, 0x22, 0x11, 0x33, 0x44, 0x55, 0x66};
+  uint8_t CH1[7] = {0x11, 0x00, 0x33, 0x22, 0x44, 0x55, 0x66};
+  uint8_t CH2[7] = {0x00, 0x33, 0x11, 0x22, 0x44, 0x55, 0x66};
+  uint8_t CH3[7] = {0x00, 0x11, 0x22, 0x44, 0x55, 0x66, 0x33};
+  uint8_t CH4[7] = {0x00, 0x11, 0x22, 0x33, 0x55, 0x44, 0x66};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[7] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66};
+    size_t NewSize = (MD.*M)(T, 7, 7);
+    if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 7 && !memcmp(CH4, T, 7)) FoundMask |= 1 << 4;
+  }
+  EXPECT_EQ(FoundMask, 31);
+}
+
+TEST(FuzzerMutate, ShuffleBytes1) {
+  TestShuffleBytes(&MutationDispatcher::Mutate_ShuffleBytes, 1 << 15);
+}
+TEST(FuzzerMutate, ShuffleBytes2) {
+  TestShuffleBytes(&MutationDispatcher::Mutate, 1 << 19);
+}
+
+void TestAddWordFromDictionary(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+  uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD};
+  uint8_t Word2[3] = {0xFF, 0xEE, 0xEF};
+  MD.AddWordToDictionary(Word1, sizeof(Word1));
+  MD.AddWordToDictionary(Word2, sizeof(Word2));
+  int FoundMask = 0;
+  uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD};
+  uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22};
+  uint8_t CH2[7] = {0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22};
+  uint8_t CH3[7] = {0xAA, 0xBB, 0xCC, 0xDD, 0x00, 0x11, 0x22};
+  uint8_t CH4[6] = {0x00, 0x11, 0x22, 0xFF, 0xEE, 0xEF};
+  uint8_t CH5[6] = {0x00, 0x11, 0xFF, 0xEE, 0xEF, 0x22};
+  uint8_t CH6[6] = {0x00, 0xFF, 0xEE, 0xEF, 0x11, 0x22};
+  uint8_t CH7[6] = {0xFF, 0xEE, 0xEF, 0x00, 0x11, 0x22};
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[7] = {0x00, 0x11, 0x22};
+    size_t NewSize = (MD.*M)(T, 3, 7);
+    if (NewSize == 7 && !memcmp(CH0, T, 7)) FoundMask |= 1 << 0;
+    if (NewSize == 7 && !memcmp(CH1, T, 7)) FoundMask |= 1 << 1;
+    if (NewSize == 7 && !memcmp(CH2, T, 7)) FoundMask |= 1 << 2;
+    if (NewSize == 7 && !memcmp(CH3, T, 7)) FoundMask |= 1 << 3;
+    if (NewSize == 6 && !memcmp(CH4, T, 6)) FoundMask |= 1 << 4;
+    if (NewSize == 6 && !memcmp(CH5, T, 6)) FoundMask |= 1 << 5;
+    if (NewSize == 6 && !memcmp(CH6, T, 6)) FoundMask |= 1 << 6;
+    if (NewSize == 6 && !memcmp(CH7, T, 6)) FoundMask |= 1 << 7;
+  }
+  EXPECT_EQ(FoundMask, 255);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionary1) {
+  TestAddWordFromDictionary(&MutationDispatcher::Mutate_AddWordFromDictionary,
+                            1 << 15);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionary2) {
+  TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15);
+}
+
+void TestChangeASCIIInteger(Mutator M, int NumIter) {
+  FuzzerRandomLibc Rand(0);
+  MutationDispatcher MD(Rand);
+
+  uint8_t CH0[8] = {'1', '2', '3', '4', '5', '6', '7', '7'};
+  uint8_t CH1[8] = {'1', '2', '3', '4', '5', '6', '7', '9'};
+  uint8_t CH2[8] = {'2', '4', '6', '9', '1', '3', '5', '6'};
+  uint8_t CH3[8] = {'0', '6', '1', '7', '2', '8', '3', '9'};
+  int FoundMask = 0;
+  for (int i = 0; i < NumIter; i++) {
+    uint8_t T[8] = {'1', '2', '3', '4', '5', '6', '7', '8'};
+    size_t NewSize = (MD.*M)(T, 8, 8);
+    /**/ if (NewSize == 8 && !memcmp(CH0, T, 8)) FoundMask |= 1 << 0;
+    else if (NewSize == 8 && !memcmp(CH1, T, 8)) FoundMask |= 1 << 1;
+    else if (NewSize == 8 && !memcmp(CH2, T, 8)) FoundMask |= 1 << 2;
+    else if (NewSize == 8 && !memcmp(CH3, T, 8)) FoundMask |= 1 << 3;
+    else if (NewSize == 8)                       FoundMask |= 1 << 4;
+  }
+  EXPECT_EQ(FoundMask, 31);
+}
+
+TEST(FuzzerMutate, ChangeASCIIInteger1) {
+  TestChangeASCIIInteger(&MutationDispatcher::Mutate_ChangeASCIIInteger,
+                         1 << 15);
+}
+
+TEST(FuzzerMutate, ChangeASCIIInteger2) {
+  TestChangeASCIIInteger(&MutationDispatcher::Mutate, 1 << 15);
+}
+
+
+TEST(FuzzerDictionary, ParseOneDictionaryEntry) {
+  Unit U;
+  EXPECT_FALSE(ParseOneDictionaryEntry("", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry(" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\t  ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  zz\" ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"zz ", &U));
+  EXPECT_FALSE(ParseOneDictionaryEntry("  \"\" ", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"a\"", &U));
+  EXPECT_EQ(U, Unit({'a'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("abc=\"abc\"", &U));
+  EXPECT_EQ(U, Unit({'a', 'b', 'c'}));
+  EXPECT_FALSE(ParseOneDictionaryEntry("\"\\\"", &U));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\\\"", &U));
+  EXPECT_EQ(U, Unit({'\\'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xAB\"", &U));
+  EXPECT_EQ(U, Unit({0xAB}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\xABz\\xDE\"", &U));
+  EXPECT_EQ(U, Unit({0xAB, 'z', 0xDE}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"#\"", &U));
+  EXPECT_EQ(U, Unit({'#'}));
+  EXPECT_TRUE(ParseOneDictionaryEntry("\"\\\"\"", &U));
+  EXPECT_EQ(U, Unit({'"'}));
+}
+
+TEST(FuzzerDictionary, ParseDictionaryFile) {
+  std::vector<Unit> Units;
+  EXPECT_FALSE(ParseDictionaryFile("zzz\n", &Units));
+  EXPECT_FALSE(ParseDictionaryFile("", &Units));
+  EXPECT_TRUE(ParseDictionaryFile("\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("#zzzz a b c d\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile(" #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\n", &Units));
+  EXPECT_EQ(Units.size(), 0U);
+  EXPECT_TRUE(ParseDictionaryFile("  #zzzz\naaa=\"aa\"", &Units));
+  EXPECT_EQ(Units, std::vector<Unit>({Unit({'a', 'a'})}));
+  EXPECT_TRUE(
+      ParseDictionaryFile("  #zzzz\naaa=\"aa\"\n\nabc=\"abc\"", &Units));
+  EXPECT_EQ(Units,
+            std::vector<Unit>({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})}));
+}
+
+TEST(FuzzerUtil, Base64) {
+  EXPECT_EQ("", Base64({}));
+  EXPECT_EQ("YQ==", Base64({'a'}));
+  EXPECT_EQ("eA==", Base64({'x'}));
+  EXPECT_EQ("YWI=", Base64({'a', 'b'}));
+  EXPECT_EQ("eHk=", Base64({'x', 'y'}));
+  EXPECT_EQ("YWJj", Base64({'a', 'b', 'c'}));
+  EXPECT_EQ("eHl6", Base64({'x', 'y', 'z'}));
+  EXPECT_EQ("YWJjeA==", Base64({'a', 'b', 'c', 'x'}));
+  EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'}));
+  EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'}));
+}
diff --git a/lib/Fuzzer/test/InfiniteTest.cpp b/lib/Fuzzer/test/InfiniteTest.cpp
deleted file mode 100644
index b6d174ffdc90..000000000000
--- a/lib/Fuzzer/test/InfiniteTest.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-// Simple test for a fuzzer. The fuzzer must find the string "Hi!".
-#include <cstdint>
-#include <cstdlib>
-#include <cstddef>
-#include <iostream>
-
-static volatile int Sink;
-
-static volatile int One = 1;
-
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
-  if (Size > 0 && Data[0] == 'H') {
-    Sink = 1;
-    if (Size > 1 && Data[1] == 'i') {
-      Sink = 2;
-      if (Size > 2 && Data[2] == '!') {
-        Sink = 2;
-        while (One)
-          ;
-      }
-    }
-  }
-}
-
diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp
new file mode 100644
index 000000000000..47ce59e0d8f5
--- /dev/null
+++ b/lib/Fuzzer/test/MemcmpTest.cpp
@@ -0,0 +1,20 @@
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstring>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // TODO: check other sizes.
+  if (Size >= 8 && memcmp(Data, "01234567", 8) == 0) {
+    if (Size >= 12 && memcmp(Data + 8, "ABCD", 4) == 0) {
+      if (Size >= 14 && memcmp(Data + 12, "XY", 2) == 0) {
+        if (Size >= 16 && memcmp(Data + 14, "KLM", 3) == 0) {
+          fprintf(stderr, "BINGO\n");
+          exit(1);
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/lib/Fuzzer/test/NullDerefTest.cpp b/lib/Fuzzer/test/NullDerefTest.cpp
index 0cff6617a31d..200c56ccbbc9 100644
--- a/lib/Fuzzer/test/NullDerefTest.cpp
+++ b/lib/Fuzzer/test/NullDerefTest.cpp
@@ -7,7 +7,7 @@
 static volatile int Sink;
 static volatile int *Null = 0;
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size > 0 && Data[0] == 'H') {
     Sink = 1;
     if (Size > 1 && Data[1] == 'i') {
@@ -18,5 +18,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
       }
     }
   }
+  return 0;
 }
 
diff --git a/lib/Fuzzer/test/DFSanSimpleCmpTest.cpp b/lib/Fuzzer/test/SimpleCmpTest.cpp
similarity index 85%
rename from lib/Fuzzer/test/DFSanSimpleCmpTest.cpp
rename to lib/Fuzzer/test/SimpleCmpTest.cpp
index ee378146dae1..8568c737efb1 100644
--- a/lib/Fuzzer/test/DFSanSimpleCmpTest.cpp
+++ b/lib/Fuzzer/test/SimpleCmpTest.cpp
@@ -4,8 +4,8 @@
 #include <cstring>
 #include <cstdio>
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
-  if (Size < 14) return;
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14) return 0;
   uint64_t x = 0;
   int64_t  y = 0;
   int z = 0;
@@ -27,4 +27,5 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
             Size, x, y, z, a);
     exit(1);
   }
+  return 0;
 }
diff --git a/lib/Fuzzer/test/SimpleDictionaryTest.cpp b/lib/Fuzzer/test/SimpleDictionaryTest.cpp
new file mode 100644
index 000000000000..b9cb2f0270a3
--- /dev/null
+++ b/lib/Fuzzer/test/SimpleDictionaryTest.cpp
@@ -0,0 +1,26 @@
+// Simple test for a fuzzer.
+// The fuzzer must find a string based on dictionary words:
+//   "Elvis"
+//   "Presley"
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+#include <cstring>
+#include <iostream>
+
+static volatile int Zero = 0;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  const char *Expected = "ElvisPresley";
+  if (Size < strlen(Expected)) return 0;
+  size_t Match = 0;
+  for (size_t i = 0; Expected[i]; i++)
+    if (Expected[i] + Zero == Data[i])
+      Match++;
+  if (Match == strlen(Expected)) {
+    std::cout << "BINGO; Found the target, exiting\n";
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/lib/Fuzzer/test/SimpleHashTest.cpp b/lib/Fuzzer/test/SimpleHashTest.cpp
new file mode 100644
index 000000000000..5bab3fa7f649
--- /dev/null
+++ b/lib/Fuzzer/test/SimpleHashTest.cpp
@@ -0,0 +1,37 @@
+// This test computes a checksum of the data (all but the last 4 bytes),
+// and then compares the last 4 bytes with the computed value.
+// A fuzzer with cmp traces is expected to defeat this check.
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <cstdio>
+
+// A modified jenkins_one_at_a_time_hash initialized by non-zero,
+// so that simple_hash(0) != 0. See also
+// https://en.wikipedia.org/wiki/Jenkins_hash_function
+static uint32_t simple_hash(const uint8_t *Data, size_t Size) {
+  uint32_t Hash = 0x12039854;
+  for (uint32_t i = 0; i < Size; i++) {
+    Hash += Data[i];
+    Hash += (Hash << 10);
+    Hash ^= (Hash >> 6);
+  }
+  Hash += (Hash << 3);
+  Hash ^= (Hash >> 11);
+  Hash += (Hash << 15);
+  return Hash;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size < 14)
+    return 0;
+
+  uint32_t Hash = simple_hash(&Data[0], Size - 4);
+  uint32_t Want = reinterpret_cast<const uint32_t *>(&Data[Size - 4])[0];
+  if (Hash != Want)
+    return 0;
+  fprintf(stderr, "BINGO; simple_hash defeated: %x == %x\n", (unsigned int)Hash,
+          (unsigned int)Want);
+  exit(1);
+  return 0;
+}
diff --git a/lib/Fuzzer/test/SimpleTest.cpp b/lib/Fuzzer/test/SimpleTest.cpp
index a891635a7f14..04225a889f5d 100644
--- a/lib/Fuzzer/test/SimpleTest.cpp
+++ b/lib/Fuzzer/test/SimpleTest.cpp
@@ -1,4 +1,5 @@
 // Simple test for a fuzzer. The fuzzer must find the string "Hi!".
+#include <assert.h>
 #include <cstdint>
 #include <cstdlib>
 #include <cstddef>
@@ -6,7 +7,8 @@
 
 static volatile int Sink;
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  assert(Data);
   if (Size > 0 && Data[0] == 'H') {
     Sink = 1;
     if (Size > 1 && Data[1] == 'i') {
@@ -17,5 +19,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
       }
     }
   }
+  return 0;
 }
 
diff --git a/lib/Fuzzer/test/StrcmpTest.cpp b/lib/Fuzzer/test/StrcmpTest.cpp
new file mode 100644
index 000000000000..835819ae2f45
--- /dev/null
+++ b/lib/Fuzzer/test/StrcmpTest.cpp
@@ -0,0 +1,29 @@
+// Break through a series of strcmp.
+#include <cstring>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+
+bool Eq(const uint8_t *Data, size_t Size, const char *Str) {
+  char Buff[1024];
+  size_t Len = strlen(Str);
+  if (Size < Len) return false;
+  if (Len >= sizeof(Buff)) return false;
+  memcpy(Buff, (char*)Data, Len);
+  Buff[Len] = 0;
+  int res = strcmp(Buff, Str);
+  return res == 0;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Eq(Data, Size, "AAA") &&
+      Size >= 3 && Eq(Data + 3, Size - 3, "BBBB") &&
+      Size >= 7 && Eq(Data + 7, Size - 7, "CCCCCC") &&
+      Size >= 14 && Data[13] == 42
+    ) {
+    fprintf(stderr, "BINGO\n");
+    exit(1);
+  }
+  return 0;
+}
diff --git a/lib/Fuzzer/test/StrncmpTest.cpp b/lib/Fuzzer/test/StrncmpTest.cpp
new file mode 100644
index 000000000000..55344d75e0b1
--- /dev/null
+++ b/lib/Fuzzer/test/StrncmpTest.cpp
@@ -0,0 +1,25 @@
+// Simple test for a fuzzer. The fuzzer must find a particular string.
+#include <cstring>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+
+static volatile int sink;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  // TODO: check other sizes.
+  char *S = (char*)Data;
+  if (Size >= 8 && strncmp(S, "123", 8))
+    sink = 1;
+  if (Size >= 8 && strncmp(S, "01234567", 8) == 0) {
+    if (Size >= 12 && strncmp(S + 8, "ABCD", 4) == 0) {
+      if (Size >= 14 && strncmp(S + 12, "XY", 2) == 0) {
+        if (Size >= 16 && strncmp(S + 14, "KLM", 3) == 0) {
+          fprintf(stderr, "BINGO\n");
+          exit(1);
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/lib/Fuzzer/test/SwitchTest.cpp b/lib/Fuzzer/test/SwitchTest.cpp
new file mode 100644
index 000000000000..5de7fff74525
--- /dev/null
+++ b/lib/Fuzzer/test/SwitchTest.cpp
@@ -0,0 +1,55 @@
+// Simple test for a fuzzer. The fuzzer must find the interesting switch value.
+#include <cstdint>
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cstddef>
+
+static volatile int Sink;
+
+template<class T>
+bool Switch(const uint8_t *Data, size_t Size) {
+  T X;
+  if (Size < sizeof(X)) return false;
+  memcpy(&X, Data, sizeof(X));
+  switch (X) {
+    case 1: Sink = __LINE__; break;
+    case 101: Sink = __LINE__; break;
+    case 1001: Sink = __LINE__; break;
+    case 10001: Sink = __LINE__; break;
+    case 100001: Sink = __LINE__; break;
+    case 1000001: Sink = __LINE__; break;
+    case 10000001: Sink = __LINE__; break;
+    case 100000001: return true;
+  }
+  return false;
+}
+
+bool ShortSwitch(const uint8_t *Data, size_t Size) {
+  short X;
+  if (Size < sizeof(short)) return false;
+  memcpy(&X, Data, sizeof(short));
+  switch(X) {
+    case 42: Sink = __LINE__; break;
+    case 402: Sink = __LINE__; break;
+    case 4002: Sink = __LINE__; break;
+    case 5002: Sink = __LINE__; break;
+    case 7002: Sink = __LINE__; break;
+    case 9002: Sink = __LINE__; break;
+    case 14002: Sink = __LINE__; break;
+    case 21402: return true;
+  }
+  return false;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 4  && Switch<int>(Data, Size) &&
+      Size >= 12 && Switch<uint64_t>(Data + 4, Size - 4) &&
+      Size >= 14 && ShortSwitch(Data + 12, 2)
+    ) {
+    fprintf(stderr, "BINGO; Found the target, exiting\n");
+    exit(1);
+  }
+  return 0;
+}
+
diff --git a/lib/Fuzzer/test/TimeoutTest.cpp b/lib/Fuzzer/test/TimeoutTest.cpp
index d541c058b648..71790ded95a2 100644
--- a/lib/Fuzzer/test/TimeoutTest.cpp
+++ b/lib/Fuzzer/test/TimeoutTest.cpp
@@ -6,7 +6,7 @@
 
 static volatile int Sink;
 
-extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
   if (Size > 0 && Data[0] == 'H') {
     Sink = 1;
     if (Size > 1 && Data[1] == 'i') {
@@ -18,5 +18,6 @@ extern "C" void LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
       }
     }
   }
+  return 0;
 }
 
diff --git a/lib/Fuzzer/test/UninstrumentedTest.cpp b/lib/Fuzzer/test/UninstrumentedTest.cpp
new file mode 100644
index 000000000000..c1730198d83f
--- /dev/null
+++ b/lib/Fuzzer/test/UninstrumentedTest.cpp
@@ -0,0 +1,8 @@
+// This test should not be instrumented.
+#include <cstdint>
+#include <cstddef>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  return 0;
+}
+
diff --git a/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp b/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp
index b46313dbafbf..59f83b57bfad 100644
--- a/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp
+++ b/lib/Fuzzer/test/UserSuppliedFuzzerTest.cpp
@@ -14,9 +14,11 @@ static const uint64_t kMagic = 8860221463604ULL;
 
 class MyFuzzer : public fuzzer::UserSuppliedFuzzer {
  public:
-  void TargetFunction(const uint8_t *Data, size_t Size) {
-    if (Size <= 10) return;
-    if (memcmp(Data, &kMagic, sizeof(kMagic))) return;
+  MyFuzzer(fuzzer::FuzzerRandomBase *Rand)
+      : fuzzer::UserSuppliedFuzzer(Rand) {}
+  int TargetFunction(const uint8_t *Data, size_t Size) {
+    if (Size <= 10) return 0;
+    if (memcmp(Data, &kMagic, sizeof(kMagic))) return 0;
     // It's hard to get here w/o advanced fuzzing techniques (e.g. cmp tracing).
     // So, we simply 'fix' the data in the custom mutator.
     if (Data[8] == 'H') {
@@ -27,6 +29,7 @@ class MyFuzzer : public fuzzer::UserSuppliedFuzzer {
         }
       }
     }
+    return 0;
   }
   // Custom mutator.
   virtual size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize) {
@@ -35,13 +38,14 @@ class MyFuzzer : public fuzzer::UserSuppliedFuzzer {
       Size = sizeof(kMagic);
     // "Fix" the data, then mutate.
     memcpy(Data, &kMagic, std::min(MaxSize, sizeof(kMagic)));
-    return BasicMutate(Data + sizeof(kMagic), Size - sizeof(kMagic),
-                       MaxSize - sizeof(kMagic));
+    return fuzzer::UserSuppliedFuzzer::Mutate(
+        Data + sizeof(kMagic), Size - sizeof(kMagic), MaxSize - sizeof(kMagic));
   }
   // No need to redefine CrossOver() here.
 };
 
 int main(int argc, char **argv) {
-  MyFuzzer F;
+  fuzzer::FuzzerRandomLibc Rand(0);
+  MyFuzzer F(&Rand);
   fuzzer::FuzzerDriver(argc, argv, F);
 }
diff --git a/lib/Fuzzer/test/dict1.txt b/lib/Fuzzer/test/dict1.txt
new file mode 100644
index 000000000000..520d0cc7b7d8
--- /dev/null
+++ b/lib/Fuzzer/test/dict1.txt
@@ -0,0 +1,4 @@
+# Dictionary for SimpleDictionaryTest
+
+a="Elvis"
+b="Presley"
diff --git a/lib/Fuzzer/test/fuzzer-dfsan.test b/lib/Fuzzer/test/fuzzer-dfsan.test
new file mode 100644
index 000000000000..982f143669d0
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-dfsan.test
@@ -0,0 +1,22 @@
+CHECK1: BINGO
+CHECK2: BINGO
+CHECK3: BINGO
+CHECK4: BINGO
+
+CHECK_DFSanCmpCallback: DFSanCmpCallback: PC
+CHECK_DFSanSwitchCallback: DFSanSwitchCallback: PC
+
+RUN: not LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK1
+RUN: LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=100 -timeout=5 -verbosity=3 2>&1 | FileCheck %s  -check-prefix=CHECK_DFSanCmpCallback
+
+RUN: not LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK2
+RUN: LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s  -check-prefix=CHECK_DFSanCmpCallback
+
+RUN: not LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3
+RUN: LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s  -check-prefix=CHECK_DFSanCmpCallback
+
+RUN: not LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3
+RUN:     LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=2     -timeout=5 -verbosity=3 2>&1 | FileCheck %s  -check-prefix=CHECK_DFSanCmpCallback
+
+RUN: not LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=100000 -timeout=5              2>&1 | FileCheck %s --check-prefix=CHECK4
+RUN:     LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=2     -timeout=5 -verbosity=3 2>&1 | FileCheck %s  -check-prefix=CHECK_DFSanSwitchCallback
diff --git a/lib/Fuzzer/test/fuzzer-drill.test b/lib/Fuzzer/test/fuzzer-drill.test
new file mode 100644
index 000000000000..b2fc1fecd276
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-drill.test
@@ -0,0 +1,8 @@
+CHECK: BINGO
+RUN: rm -rf FourIndependentBranchesTestCORPUS
+RUN: mkdir FourIndependentBranchesTestCORPUS
+RUN:     LLVMFuzzer-FourIndependentBranchesTest -seed=1 -runs=100000 FourIndependentBranchesTestCORPUS
+RUN: not LLVMFuzzer-FourIndependentBranchesTest         -runs=100000 -drill=1 -jobs=200 FourIndependentBranchesTestCORPUS 2>&1 | FileCheck %s
+RUN: rm -rf FourIndependentBranchesTestCORPUS
+
+
diff --git a/lib/Fuzzer/test/fuzzer-timeout.test b/lib/Fuzzer/test/fuzzer-timeout.test
new file mode 100644
index 000000000000..c3a9e8a3a9e0
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-timeout.test
@@ -0,0 +1,13 @@
+RUN: not LLVMFuzzer-TimeoutTest -timeout=1 2>&1 | FileCheck %s --check-prefix=TimeoutTest
+TimeoutTest: ALARM: working on the last Unit for
+TimeoutTest: Test unit written to ./timeout-
+TimeoutTest: == ERROR: libFuzzer: timeout after
+TimeoutTest: #0
+TimeoutTest: #1
+TimeoutTest: #2
+TimeoutTest: SUMMARY: libFuzzer: timeout
+
+RUN: not LLVMFuzzer-TimeoutTest -timeout=1 -test_single_input=%S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInputTimeoutTest
+SingleInputTimeoutTest: ALARM: working on the last Unit for
+SingleInputTimeoutTest-NOT: Test unit written to ./timeout-
+
diff --git a/lib/Fuzzer/test/fuzzer-traces.test b/lib/Fuzzer/test/fuzzer-traces.test
new file mode 100644
index 000000000000..084cf30d6982
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-traces.test
@@ -0,0 +1,19 @@
+CHECK: BINGO
+Done1000000: Done 1000000 runs in
+
+RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=1000001  2>&1 | FileCheck %s
+
+RUN: not LLVMFuzzer-MemcmpTest -use_traces=1 -seed=4294967295 -runs=100000   2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-MemcmpTest               -seed=4294967295 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+
+RUN: not LLVMFuzzer-StrncmpTest -use_traces=1 -seed=1 -runs=100000   2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-StrncmpTest               -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+
+RUN: not LLVMFuzzer-StrcmpTest -use_traces=1 -seed=1 -runs=200000   2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-StrcmpTest               -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+
+RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002  2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-SwitchTest               -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
+
+RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=100000  2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-SimpleHashTest               -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index b8e672f0fec7..810410df6fc7 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -1,30 +1,32 @@
 CHECK: BINGO
+Done1000000: Done 1000000 runs in
 
-RUN: ./LLVMFuzzer-SimpleTest 2>&1 | FileCheck %s
+RUN: LLVMFuzzer-SimpleTest 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-NullDerefTest -test_single_input=%S/hi.txt 2>&1 | FileCheck %s --check-prefix=SingleInput
+SingleInput-NOT: Test unit written to ./crash-
 
-RUN: not ./LLVMFuzzer-InfiniteTest -timeout=2 2>&1 | FileCheck %s --check-prefix=InfiniteTest
-InfiniteTest: ALARM: working on the last Unit for
-InfiniteTest: CRASHED; file written to timeout
+RUN: LLVMFuzzer-SimpleCmpTest -max_total_time=1 2>&1 | FileCheck %s --check-prefix=MaxTotalTime
+MaxTotalTime: Done {{.*}} runs in {{.}} second(s)
 
-RUN: not ./LLVMFuzzer-TimeoutTest -timeout=5 2>&1 | FileCheck %s --check-prefix=TimeoutTest
-TimeoutTest: ALARM: working on the last Unit for
-TimeoutTest: CRASHED; file written to timeout
+RUN: not LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=NullDerefTest
+NullDerefTest: Test unit written to ./crash-
+RUN: not LLVMFuzzer-NullDerefTest  -artifact_prefix=ZZZ 2>&1 | FileCheck %s --check-prefix=NullDerefTestPrefix
+NullDerefTestPrefix: Test unit written to ZZZcrash-
+RUN: not LLVMFuzzer-NullDerefTest  -artifact_prefix=ZZZ -exact_artifact_path=FOOBAR 2>&1 | FileCheck %s --check-prefix=NullDerefTestExactPath
+NullDerefTestExactPath: Test unit written to FOOBAR
 
-RUN: not ./LLVMFuzzer-NullDerefTest 2>&1 | FileCheck %s --check-prefix=NullDerefTest
-NullDerefTest: CRASHED; file written to crash-
+#not LLVMFuzzer-FullCoverageSetTest -timeout=15 -seed=1 -mutate_depth=2 -use_full_coverage_set=1 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-FullCoverageSetTest -timeout=15 -seed=1 -mutate_depth=2 -use_full_coverage_set=1 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-FourIndependentBranchesTest -timeout=15 -seed=1 -use_full_coverage_set=1 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-CallerCalleeTest                     -cross_over=0 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-CallerCalleeTest  -use_indir_calls=0 -cross_over=0 -max_len=6 -seed=1 -runs=1000000 2>&1 | FileCheck %s  --check-prefix=Done1000000
 
-RUN: not ./LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s
-RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s
 
-RUN: not ./LLVMFuzzer-DFSanMemcmpTest-DFSan -use_traces=1 -seed=1 -runs=100 -timeout=5 2>&1 | FileCheck %s
-
-RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s
-
-RUN: not ./LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-SimpleDictionaryTest -dict=%S/dict1.txt -seed=1 -runs=1000003  2>&1 | FileCheck %s
+RUN:     LLVMFuzzer-SimpleDictionaryTest                    -seed=1 -runs=1000000  2>&1 | FileCheck %s --check-prefix=Done1000000
 
+RUN: not LLVMFuzzer-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED
+UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting.
diff --git a/lib/Fuzzer/test/hi.txt b/lib/Fuzzer/test/hi.txt
new file mode 100644
index 000000000000..2f9031f0ec7b
--- /dev/null
+++ b/lib/Fuzzer/test/hi.txt
@@ -0,0 +1 @@
+Hi!
\ No newline at end of file
diff --git a/lib/Fuzzer/test/lit.cfg b/lib/Fuzzer/test/lit.cfg
index 834a16aefe73..2140a97668b3 100644
--- a/lib/Fuzzer/test/lit.cfg
+++ b/lib/Fuzzer/test/lit.cfg
@@ -5,10 +5,11 @@ config.test_format = lit.formats.ShTest(True)
 config.suffixes = ['.test']
 config.test_source_root = os.path.dirname(__file__)
 
-# Tweak PATH to include llvm tools dir.
+# Tweak PATH to include llvm tools dir and current exec dir.
 llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
 if (not llvm_tools_dir) or (not os.path.exists(llvm_tools_dir)):
   lit_config.fatal("Invalid llvm_tools_dir config attribute: %r" % llvm_tools_dir)
-path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
+path = os.path.pathsep.join((llvm_tools_dir, config.test_exec_root,
+                             config.environment['PATH']))
 config.environment['PATH'] = path
 
diff --git a/lib/Fuzzer/test/merge.test b/lib/Fuzzer/test/merge.test
new file mode 100644
index 000000000000..57ecc141bbfe
--- /dev/null
+++ b/lib/Fuzzer/test/merge.test
@@ -0,0 +1,29 @@
+CHECK: BINGO
+
+RUN: rm -rf  %tmp/T1 %tmp/T2
+RUN: mkdir -p %tmp/T1 %tmp/T2
+RUN: echo F..... > %tmp/T1/1
+RUN: echo .U.... > %tmp/T1/2
+RUN: echo ..Z... > %tmp/T1/3
+
+# T1 has 3 elements, T2 is empty.
+RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK1
+CHECK1: Merge: running the initial corpus {{.*}} of 3 units
+CHECK1: Merge: written 0 out of 0 units
+
+RUN: echo ...Z.. > %tmp/T2/1
+RUN: echo ....E. > %tmp/T2/2
+RUN: echo .....R > %tmp/T2/3
+RUN: echo F..... > %tmp/T2/a
+RUN: echo .U.... > %tmp/T2/b
+RUN: echo ..Z... > %tmp/T2/c
+
+# T1 has 3 elements, T2 has 6 elements, only 3 are new.
+RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK2
+CHECK2: Merge: running the initial corpus {{.*}} of 3 units
+CHECK2: Merge: written 3 out of 6 units
+
+# Now, T1 has 6 units and T2 has no new interesting units.
+RUN: LLVMFuzzer-FullCoverageSetTest -merge=1 %tmp/T1 %tmp/T2 2>&1 | FileCheck %s --check-prefix=CHECK3
+CHECK3: Merge: running the initial corpus {{.*}} of 6 units
+CHECK3: Merge: written 0 out of 6 units
diff --git a/lib/Fuzzer/test/trace-bb/CMakeLists.txt b/lib/Fuzzer/test/trace-bb/CMakeLists.txt
new file mode 100644
index 000000000000..99af019565b5
--- /dev/null
+++ b/lib/Fuzzer/test/trace-bb/CMakeLists.txt
@@ -0,0 +1,14 @@
+# These tests are not instrumented with coverage.
+
+set(CMAKE_CXX_FLAGS_RELEASE
+  "${LIBFUZZER_FLAGS_BASE} -fsanitize-coverage=edge,trace-bb")
+
+foreach(Test ${TraceBBTests})
+  add_executable(LLVMFuzzer-${Test}-TraceBB
+    ../${Test}.cpp
+    )
+  target_link_libraries(LLVMFuzzer-${Test}-TraceBB
+    LLVMFuzzer
+    )
+endforeach()
+
diff --git a/lib/Fuzzer/test/uninstrumented/CMakeLists.txt b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt
new file mode 100644
index 000000000000..443ba3716f66
--- /dev/null
+++ b/lib/Fuzzer/test/uninstrumented/CMakeLists.txt
@@ -0,0 +1,14 @@
+# These tests are not instrumented with coverage.
+
+set(CMAKE_CXX_FLAGS_RELEASE
+  "${LIBFUZZER_FLAGS_BASE} -O0 -fno-sanitize=all")
+
+foreach(Test ${UninstrumentedTests})
+  add_executable(LLVMFuzzer-${Test}-Uninstrumented
+    ../${Test}.cpp
+    )
+  target_link_libraries(LLVMFuzzer-${Test}-Uninstrumented
+    LLVMFuzzer
+    )
+endforeach()
+
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index b553f11018c7..185db47f07e5 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -102,17 +103,9 @@ static OrderMap orderModule(const Module *M) {
     orderValue(&A, OM);
   }
   for (const Function &F : *M) {
-    if (F.hasPrefixData())
-      if (!isa<GlobalValue>(F.getPrefixData()))
-        orderValue(F.getPrefixData(), OM);
-
-    if (F.hasPrologueData())
-      if (!isa<GlobalValue>(F.getPrologueData()))
-        orderValue(F.getPrologueData(), OM);
-
-    if (F.hasPersonalityFn())
-      if (!isa<GlobalValue>(F.getPersonalityFn()))
-        orderValue(F.getPersonalityFn(), OM);
+    for (const Use &U : F.operands())
+      if (!isa<GlobalValue>(U.get()))
+        orderValue(U.get(), OM);
 
     orderValue(&F, OM);
 
@@ -232,8 +225,7 @@ static UseListOrderStack predictUseListOrder(const Module *M) {
   // We want to visit the functions backward now so we can list function-local
   // constants in the last Function they're used in.  Module-level constants
   // have already been visited above.
-  for (auto I = M->rbegin(), E = M->rend(); I != E; ++I) {
-    const Function &F = *I;
+  for (const Function &F : make_range(M->rbegin(), M->rend())) {
     if (F.isDeclaration())
       continue;
     for (const BasicBlock &BB : F)
@@ -263,8 +255,8 @@ static UseListOrderStack predictUseListOrder(const Module *M) {
   for (const GlobalAlias &A : M->aliases())
     predictValueUseListOrder(A.getAliasee(), nullptr, OM, Stack);
   for (const Function &F : *M)
-    if (F.hasPrefixData())
-      predictValueUseListOrder(F.getPrefixData(), nullptr, OM, Stack);
+    for (const Use &U : F.operands())
+      predictValueUseListOrder(U.get(), nullptr, OM, Stack);
 
   return Stack;
 }
@@ -304,6 +296,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::AnyReg:        Out << "anyregcc"; break;
   case CallingConv::PreserveMost:  Out << "preserve_mostcc"; break;
   case CallingConv::PreserveAll:   Out << "preserve_allcc"; break;
+  case CallingConv::CXX_FAST_TLS:  Out << "cxx_fast_tlscc"; break;
   case CallingConv::GHC:           Out << "ghccc"; break;
   case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
   case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
@@ -320,6 +313,9 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::X86_64_Win64:  Out << "x86_64_win64cc"; break;
   case CallingConv::SPIR_FUNC:     Out << "spir_func"; break;
   case CallingConv::SPIR_KERNEL:   Out << "spir_kernel"; break;
+  case CallingConv::X86_INTR:      Out << "x86_intrcc"; break;
+  case CallingConv::HHVM:          Out << "hhvmcc"; break;
+  case CallingConv::HHVM_C:        Out << "hhvm_ccc"; break;
   }
 }
 
@@ -343,18 +339,8 @@ enum PrefixType {
   NoPrefix
 };
 
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it).  Print it out.
-static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+void llvm::printLLVMNameWithoutPrefix(raw_ostream &OS, StringRef Name) {
   assert(!Name.empty() && "Cannot get empty name!");
-  switch (Prefix) {
-  case NoPrefix: break;
-  case GlobalPrefix: OS << '@'; break;
-  case ComdatPrefix: OS << '$'; break;
-  case LabelPrefix:  break;
-  case LocalPrefix:  OS << '%'; break;
-  }
 
   // Scan the name to see if it needs quotes first.
   bool NeedsQuotes = isdigit(static_cast<unsigned char>(Name[0]));
@@ -386,9 +372,31 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
   OS << '"';
 }
 
-/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
-/// prefixed with % (if the string only contains simple characters) or is
-/// surrounded with ""'s (if it has special chars in it).  Print it out.
+/// Turn the specified name into an 'LLVM name', which is either prefixed with %
+/// (if the string only contains simple characters) or is surrounded with ""'s
+/// (if it has special chars in it). Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+  switch (Prefix) {
+  case NoPrefix:
+    break;
+  case GlobalPrefix:
+    OS << '@';
+    break;
+  case ComdatPrefix:
+    OS << '$';
+    break;
+  case LabelPrefix:
+    break;
+  case LocalPrefix:
+    OS << '%';
+    break;
+  }
+  printLLVMNameWithoutPrefix(OS, Name);
+}
+
+/// Turn the specified name into an 'LLVM name', which is either prefixed with %
+/// (if the string only contains simple characters) or is surrounded with ""'s
+/// (if it has special chars in it). Print it out.
 static void PrintLLVMName(raw_ostream &OS, const Value *V) {
   PrintLLVMName(OS, V->getName(),
                 isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
@@ -456,6 +464,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   case Type::LabelTyID:     OS << "label"; return;
   case Type::MetadataTyID:  OS << "metadata"; return;
   case Type::X86_MMXTyID:   OS << "x86_mmx"; return;
+  case Type::TokenTyID:     OS << "token"; return;
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
     return;
@@ -691,8 +700,9 @@ void ModuleSlotTracker::incorporateFunction(const Function &F) {
   this->F = &F;
 }
 
-static SlotTracker *createSlotTracker(const Module *M) {
-  return new SlotTracker(M);
+int ModuleSlotTracker::getLocalSlot(const Value *V) {
+  assert(F && "No function incorporated");
+  return Machine->getLocalSlot(V);
 }
 
 static SlotTracker *createSlotTracker(const Value *V) {
@@ -802,7 +812,7 @@ void SlotTracker::processFunction() {
   for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
       AE = TheFunction->arg_end(); AI != AE; ++AI)
     if (!AI->hasName())
-      CreateFunctionSlot(AI);
+      CreateFunctionSlot(&*AI);
 
   ST_DEBUG("Inserting Instructions:\n");
 
@@ -1093,11 +1103,10 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       // the value back and get the same value.
       //
       bool ignored;
-      bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf;
       bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
       bool isInf = CFP->getValueAPF().isInfinity();
       bool isNaN = CFP->getValueAPF().isNaN();
-      if (!isHalf && !isInf && !isNaN) {
+      if (!isInf && !isNaN) {
         double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
                                 CFP->getValueAPF().convertToFloat();
         SmallString<128> StrVal;
@@ -1123,15 +1132,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       // x86, so we must not use these types.
       static_assert(sizeof(double) == sizeof(uint64_t),
                     "assuming that double is 64 bits!");
-      char Buffer[40];
       APFloat apf = CFP->getValueAPF();
-      // Halves and floats are represented in ASCII IR as double, convert.
+      // Floats are represented in ASCII IR as double, convert.
       if (!isDouble)
         apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
                           &ignored);
-      Out << "0x" <<
-              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
-                            Buffer+40);
+      Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true);
       return;
     }
 
@@ -1139,60 +1145,32 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     // These appear as a magic letter identifying the type, then a
     // fixed number of hex digits.
     Out << "0x";
-    // Bit position, in the current word, of the next nibble to print.
-    int shiftcount;
-
+    APInt API = CFP->getValueAPF().bitcastToAPInt();
     if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
       Out << 'K';
-      // api needed to prevent premature destruction
-      APInt api = CFP->getValueAPF().bitcastToAPInt();
-      const uint64_t* p = api.getRawData();
-      uint64_t word = p[1];
-      shiftcount = 12;
-      int width = api.getBitWidth();
-      for (int j=0; j<width; j+=4, shiftcount-=4) {
-        unsigned int nibble = (word>>shiftcount) & 15;
-        if (nibble < 10)
-          Out << (unsigned char)(nibble + '0');
-        else
-          Out << (unsigned char)(nibble - 10 + 'A');
-        if (shiftcount == 0 && j+4 < width) {
-          word = *p;
-          shiftcount = 64;
-          if (width-j-4 < 64)
-            shiftcount = width-j-4;
-        }
-      }
+      Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
+                                  /*Upper=*/true);
+      Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+                                  /*Upper=*/true);
       return;
     } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
-      shiftcount = 60;
       Out << 'L';
+      Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+                                  /*Upper=*/true);
+      Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
+                                  /*Upper=*/true);
     } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
-      shiftcount = 60;
       Out << 'M';
+      Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
+                                  /*Upper=*/true);
+      Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
+                                  /*Upper=*/true);
     } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
-      shiftcount = 12;
       Out << 'H';
+      Out << format_hex_no_prefix(API.getZExtValue(), 4,
+                                  /*Upper=*/true);
     } else
       llvm_unreachable("Unsupported floating point type");
-    // api needed to prevent premature destruction
-    APInt api = CFP->getValueAPF().bitcastToAPInt();
-    const uint64_t* p = api.getRawData();
-    uint64_t word = *p;
-    int width = api.getBitWidth();
-    for (int j=0; j<width; j+=4, shiftcount-=4) {
-      unsigned int nibble = (word>>shiftcount) & 15;
-      if (nibble < 10)
-        Out << (unsigned char)(nibble + '0');
-      else
-        Out << (unsigned char)(nibble - 10 + 'A');
-      if (shiftcount == 0 && j+4 < width) {
-        word = *(++p);
-        shiftcount = 64;
-        if (width-j-4 < 64)
-          shiftcount = width-j-4;
-      }
-    }
     return;
   }
 
@@ -1313,6 +1291,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     return;
   }
 
+  if (isa<ConstantTokenNone>(CV)) {
+    Out << "none";
+    return;
+  }
+
   if (isa<UndefValue>(CV)) {
     Out << "undef";
     return;
@@ -1326,10 +1309,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     Out << " (";
 
     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
-      TypePrinter.print(
-          cast<PointerType>(GEP->getPointerOperandType()->getScalarType())
-              ->getElementType(),
-          Out);
+      TypePrinter.print(GEP->getSourceElementType(), Out);
       Out << ", ";
     }
 
@@ -1409,6 +1389,7 @@ struct MDFieldPrinter {
       : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) {
   }
   void printTag(const DINode *N);
+  void printMacinfoType(const DIMacroNode *N);
   void printString(StringRef Name, StringRef Value,
                    bool ShouldSkipEmpty = true);
   void printMetadata(StringRef Name, const Metadata *MD,
@@ -1431,6 +1412,14 @@ void MDFieldPrinter::printTag(const DINode *N) {
     Out << N->getTag();
 }
 
+void MDFieldPrinter::printMacinfoType(const DIMacroNode *N) {
+  Out << FS << "type: ";
+  if (const char *Type = dwarf::MacinfoString(N->getMacinfoType()))
+    Out << Type;
+  else
+    Out << N->getMacinfoType();
+}
+
 void MDFieldPrinter::printString(StringRef Name, StringRef Value,
                                  bool ShouldSkipEmpty) {
   if (ShouldSkipEmpty && Value.empty())
@@ -1656,6 +1645,7 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Printer.printMetadata("subprograms", N->getRawSubprograms());
   Printer.printMetadata("globals", N->getRawGlobalVariables());
   Printer.printMetadata("imports", N->getRawImportedEntities());
+  Printer.printMetadata("macros", N->getRawMacros());
   Printer.printInt("dwoId", N->getDWOId());
   Out << ")";
 }
@@ -1680,7 +1670,6 @@ static void writeDISubprogram(raw_ostream &Out, const DISubprogram *N,
   Printer.printInt("virtualIndex", N->getVirtualIndex());
   Printer.printDIFlags("flags", N->getFlags());
   Printer.printBool("isOptimized", N->isOptimized());
-  Printer.printMetadata("function", N->getRawFunction());
   Printer.printMetadata("templateParams", N->getRawTemplateParams());
   Printer.printMetadata("declaration", N->getRawDeclaration());
   Printer.printMetadata("variables", N->getRawVariables());
@@ -1725,6 +1714,29 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
   Out << ")";
 }
 
+static void writeDIMacro(raw_ostream &Out, const DIMacro *N,
+                         TypePrinting *TypePrinter, SlotTracker *Machine,
+                         const Module *Context) {
+  Out << "!DIMacro(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMacinfoType(N);
+  Printer.printInt("line", N->getLine());
+  Printer.printString("name", N->getName());
+  Printer.printString("value", N->getValue());
+  Out << ")";
+}
+
+static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N,
+                             TypePrinting *TypePrinter, SlotTracker *Machine,
+                             const Module *Context) {
+  Out << "!DIMacroFile(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printInt("line", N->getLine());
+  Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false);
+  Printer.printMetadata("nodes", N->getRawElements());
+  Out << ")";
+}
+
 static void writeDIModule(raw_ostream &Out, const DIModule *N,
                           TypePrinting *TypePrinter, SlotTracker *Machine,
                           const Module *Context) {
@@ -1789,11 +1801,8 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
                                  SlotTracker *Machine, const Module *Context) {
   Out << "!DILocalVariable(";
   MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
-  Printer.printTag(N);
   Printer.printString("name", N->getName());
-  Printer.printInt("arg", N->getArg(),
-                   /* ShouldSkipZero */
-                   N->getTag() == dwarf::DW_TAG_auto_variable);
+  Printer.printInt("arg", N->getArg());
   Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
   Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
@@ -1998,6 +2007,7 @@ class AssemblyWriter {
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
   SetVector<const Comdat *> Comdats;
+  bool IsForDebug;
   bool ShouldPreserveUseListOrder;
   UseListOrderStack UseListOrders;
   SmallVector<StringRef, 8> MDNames;
@@ -2005,12 +2015,7 @@ class AssemblyWriter {
 public:
   /// Construct an AssemblyWriter with an external SlotTracker
   AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, const Module *M,
-                 AssemblyAnnotationWriter *AAW,
-                 bool ShouldPreserveUseListOrder = false);
-
-  /// Construct an AssemblyWriter with an internally allocated SlotTracker
-  AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                 AssemblyAnnotationWriter *AAW,
+                 AssemblyAnnotationWriter *AAW, bool IsForDebug,
                  bool ShouldPreserveUseListOrder = false);
 
   void printMDNodeBody(const MDNode *MD);
@@ -2020,6 +2025,7 @@ public:
 
   void writeOperand(const Value *Op, bool PrintType);
   void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx);
+  void writeOperandBundles(ImmutableCallSite CS);
   void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope);
   void writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
                           AtomicOrdering FailureOrdering,
@@ -2043,8 +2049,6 @@ public:
   void printUseLists(const Function *F);
 
 private:
-  void init();
-
   /// \brief Print out metadata attachments.
   void printMetadataAttachments(
       const SmallVectorImpl<std::pair<unsigned, MDNode *>> &MDs,
@@ -2060,7 +2064,12 @@ private:
 };
 } // namespace
 
-void AssemblyWriter::init() {
+AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+                               const Module *M, AssemblyAnnotationWriter *AAW,
+                               bool IsForDebug, bool ShouldPreserveUseListOrder)
+    : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
+      IsForDebug(IsForDebug),
+      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
   if (!TheModule)
     return;
   TypePrinter.incorporateTypes(*TheModule);
@@ -2072,23 +2081,6 @@ void AssemblyWriter::init() {
       Comdats.insert(C);
 }
 
-AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
-                               const Module *M, AssemblyAnnotationWriter *AAW,
-                               bool ShouldPreserveUseListOrder)
-    : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW),
-      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
-  init();
-}
-
-AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
-                               AssemblyAnnotationWriter *AAW,
-                               bool ShouldPreserveUseListOrder)
-    : Out(o), TheModule(M), SlotTrackerStorage(createSlotTracker(M)),
-      Machine(*SlotTrackerStorage), AnnotationWriter(AAW),
-      ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {
-  init();
-}
-
 void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
   if (!Operand) {
     Out << "<null operand!>";
@@ -2170,6 +2162,43 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
   WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
 }
 
+void AssemblyWriter::writeOperandBundles(ImmutableCallSite CS) {
+  if (!CS.hasOperandBundles())
+    return;
+
+  Out << " [ ";
+
+  bool FirstBundle = true;
+  for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+    OperandBundleUse BU = CS.getOperandBundleAt(i);
+
+    if (!FirstBundle)
+      Out << ", ";
+    FirstBundle = false;
+
+    Out << '"';
+    PrintEscapedString(BU.getTagName(), Out);
+    Out << '"';
+
+    Out << '(';
+
+    bool FirstInput = true;
+    for (const auto &Input : BU.Inputs) {
+      if (!FirstInput)
+        Out << ", ";
+      FirstInput = false;
+
+      TypePrinter.print(Input->getType(), Out);
+      Out << " ";
+      WriteAsOperandInternal(Out, Input, &TypePrinter, &Machine, TheModule);
+    }
+
+    Out << ')';
+  }
+
+  Out << " ]";
+}
+
 void AssemblyWriter::printModule(const Module *M) {
   Machine.initialize();
 
@@ -2422,6 +2451,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
 
   Out << "alias ";
 
+  TypePrinter.print(GA->getValueType(), Out);
+
+  Out << ", ";
+
   const Constant *Aliasee = GA->getAliasee();
 
   if (!Aliasee) {
@@ -2536,28 +2569,26 @@ void AssemblyWriter::printFunction(const Function *F) {
   Machine.incorporateFunction(F);
 
   // Loop over the arguments, printing them...
-
-  unsigned Idx = 1;
-  if (!F->isDeclaration()) {
-    // If this isn't a declaration, print the argument names as well.
-    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-         I != E; ++I) {
+  if (F->isDeclaration() && !IsForDebug) {
+    // We're only interested in the type here - don't print argument names.
+    for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
       // Insert commas as we go... the first arg doesn't get a comma
-      if (I != F->arg_begin()) Out << ", ";
-      printArgument(I, Attrs, Idx);
-      Idx++;
+      if (I)
+        Out << ", ";
+      // Output type...
+      TypePrinter.print(FT->getParamType(I), Out);
+
+      if (Attrs.hasAttributes(I + 1))
+        Out << ' ' << Attrs.getAsString(I + 1);
     }
   } else {
-    // Otherwise, print the types from the function type.
-    for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+    // The arguments are meaningful here, print them in detail.
+    unsigned Idx = 1;
+    for (const Argument &Arg : F->args()) {
       // Insert commas as we go... the first arg doesn't get a comma
-      if (i) Out << ", ";
-
-      // Output type...
-      TypePrinter.print(FT->getParamType(i), Out);
-
-      if (Attrs.hasAttributes(i+1))
-        Out << ' ' << Attrs.getAsString(i+1);
+      if (Idx != 1)
+        Out << ", ";
+      printArgument(&Arg, Attrs, Idx++);
     }
   }
 
@@ -2604,7 +2635,7 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out << " {";
     // Output all of the function's basic blocks.
     for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
-      printBasicBlock(I);
+      printBasicBlock(&*I);
 
     // Output the function's use-lists.
     printUseLists(F);
@@ -2738,6 +2769,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       Out << "musttail ";
     else if (CI->isTailCall())
       Out << "tail ";
+    else if (CI->isNoTailCall())
+      Out << "notail ";
   }
 
   // Print out the opcode...
@@ -2850,8 +2883,50 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 
       writeOperand(LPI->getClause(i), true);
     }
+  } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(&I)) {
+    Out << " within ";
+    writeOperand(CatchSwitch->getParentPad(), /*PrintType=*/false);
+    Out << " [";
+    unsigned Op = 0;
+    for (const BasicBlock *PadBB : CatchSwitch->handlers()) {
+      if (Op > 0)
+        Out << ", ";
+      writeOperand(PadBB, /*PrintType=*/true);
+      ++Op;
+    }
+    Out << "] unwind ";
+    if (const BasicBlock *UnwindDest = CatchSwitch->getUnwindDest())
+      writeOperand(UnwindDest, /*PrintType=*/true);
+    else
+      Out << "to caller";
+  } else if (const auto *FPI = dyn_cast<FuncletPadInst>(&I)) {
+    Out << " within ";
+    writeOperand(FPI->getParentPad(), /*PrintType=*/false);
+    Out << " [";
+    for (unsigned Op = 0, NumOps = FPI->getNumArgOperands(); Op < NumOps;
+         ++Op) {
+      if (Op > 0)
+        Out << ", ";
+      writeOperand(FPI->getArgOperand(Op), /*PrintType=*/true);
+    }
+    Out << ']';
   } else if (isa<ReturnInst>(I) && !Operand) {
     Out << " void";
+  } else if (const auto *CRI = dyn_cast<CatchReturnInst>(&I)) {
+    Out << " from ";
+    writeOperand(CRI->getOperand(0), /*PrintType=*/false);
+
+    Out << " to ";
+    writeOperand(CRI->getOperand(1), /*PrintType=*/true);
+  } else if (const auto *CRI = dyn_cast<CleanupReturnInst>(&I)) {
+    Out << " from ";
+    writeOperand(CRI->getOperand(0), /*PrintType=*/false);
+
+    Out << " unwind ";
+    if (CRI->hasUnwindDest())
+      writeOperand(CRI->getOperand(1), /*PrintType=*/true);
+    else
+      Out << "to caller";
   } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
     // Print the calling convention being used.
     if (CI->getCallingConv() != CallingConv::C) {
@@ -2892,6 +2967,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     Out << ')';
     if (PAL.hasAttributes(AttributeSet::FunctionIndex))
       Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
+
+    writeOperandBundles(CI);
+
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
     FunctionType *FTy = cast<FunctionType>(II->getFunctionType());
@@ -2926,6 +3004,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     if (PAL.hasAttributes(AttributeSet::FunctionIndex))
       Out << " #" << Machine.getAttributeGroupSlot(PAL.getFnAttributes());
 
+    writeOperandBundles(II);
+
     Out << "\n          to ";
     writeOperand(II->getNormalDest(), true);
     Out << " unwind ";
@@ -3138,29 +3218,23 @@ void AssemblyWriter::printUseLists(const Function *F) {
 //                       External Interface declarations
 //===----------------------------------------------------------------------===//
 
-void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
-  SlotTracker SlotTable(this->getParent());
-  formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this->getParent(), AAW);
-  W.printFunction(this);
-}
-
 void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW,
-                   bool ShouldPreserveUseListOrder) const {
+                   bool ShouldPreserveUseListOrder, bool IsForDebug) const {
   SlotTracker SlotTable(this);
   formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, this, AAW, ShouldPreserveUseListOrder);
+  AssemblyWriter W(OS, SlotTable, this, AAW, IsForDebug,
+                   ShouldPreserveUseListOrder);
   W.printModule(this);
 }
 
-void NamedMDNode::print(raw_ostream &ROS) const {
+void NamedMDNode::print(raw_ostream &ROS, bool IsForDebug) const {
   SlotTracker SlotTable(getParent());
   formatted_raw_ostream OS(ROS);
-  AssemblyWriter W(OS, SlotTable, getParent(), nullptr);
+  AssemblyWriter W(OS, SlotTable, getParent(), nullptr, IsForDebug);
   W.printNamedMDNode(this);
 }
 
-void Comdat::print(raw_ostream &ROS) const {
+void Comdat::print(raw_ostream &ROS, bool /*IsForDebug*/) const {
   PrintLLVMName(ROS, getName(), ComdatPrefix);
   ROS << " = comdat ";
 
@@ -3185,7 +3259,7 @@ void Comdat::print(raw_ostream &ROS) const {
   ROS << '\n';
 }
 
-void Type::print(raw_ostream &OS) const {
+void Type::print(raw_ostream &OS, bool /*IsForDebug*/) const {
   TypePrinting TP;
   TP.print(const_cast<Type*>(this), OS);
 
@@ -3208,7 +3282,7 @@ static bool isReferencingMDNode(const Instruction &I) {
   return false;
 }
 
-void Value::print(raw_ostream &ROS) const {
+void Value::print(raw_ostream &ROS, bool IsForDebug) const {
   bool ShouldInitializeAllMetadata = false;
   if (auto *I = dyn_cast<Instruction>(this))
     ShouldInitializeAllMetadata = isReferencingMDNode(*I);
@@ -3216,10 +3290,11 @@ void Value::print(raw_ostream &ROS) const {
     ShouldInitializeAllMetadata = true;
 
   ModuleSlotTracker MST(getModuleFromVal(this), ShouldInitializeAllMetadata);
-  print(ROS, MST);
+  print(ROS, MST, IsForDebug);
 }
 
-void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
+void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST,
+                  bool IsForDebug) const {
   formatted_raw_ostream OS(ROS);
   SlotTracker EmptySlotTable(static_cast<const Module *>(nullptr));
   SlotTracker &SlotTable =
@@ -3231,14 +3306,14 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
 
   if (const Instruction *I = dyn_cast<Instruction>(this)) {
     incorporateFunction(I->getParent() ? I->getParent()->getParent() : nullptr);
-    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr, IsForDebug);
     W.printInstruction(*I);
   } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
     incorporateFunction(BB->getParent());
-    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr, IsForDebug);
     W.printBasicBlock(BB);
   } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
+    AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr, IsForDebug);
     if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
       W.printGlobal(V);
     else if (const Function *F = dyn_cast<Function>(GV))
@@ -3261,7 +3336,7 @@ void Value::print(raw_ostream &ROS, ModuleSlotTracker &MST) const {
 
 /// Print without a type, skipping the TypePrinting object.
 ///
-/// \return \c true iff printing was succesful.
+/// \return \c true iff printing was successful.
 static bool printWithoutType(const Value &V, raw_ostream &O,
                              SlotTracker *Machine, const Module *M) {
   if (V.hasName() || isa<GlobalValue>(V) ||
@@ -3340,41 +3415,45 @@ void Metadata::printAsOperand(raw_ostream &OS, ModuleSlotTracker &MST,
   printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ true);
 }
 
-void Metadata::print(raw_ostream &OS, const Module *M) const {
+void Metadata::print(raw_ostream &OS, const Module *M,
+                     bool /*IsForDebug*/) const {
   ModuleSlotTracker MST(M, isa<MDNode>(this));
   printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
 }
 
 void Metadata::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                     const Module *M) const {
+                     const Module *M, bool /*IsForDebug*/) const {
   printMetadataImpl(OS, *this, MST, M, /* OnlyAsOperand */ false);
 }
 
 // Value::dump - allow easy printing of Values from the debugger.
 LLVM_DUMP_METHOD
-void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+void Value::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
 
 // Type::dump - allow easy printing of Types from the debugger.
 LLVM_DUMP_METHOD
-void Type::dump() const { print(dbgs()); dbgs() << '\n'; }
+void Type::dump() const { print(dbgs(), /*IsForDebug=*/true); dbgs() << '\n'; }
 
 // Module::dump() - Allow printing of Modules from the debugger.
 LLVM_DUMP_METHOD
-void Module::dump() const { print(dbgs(), nullptr); }
+void Module::dump() const {
+  print(dbgs(), nullptr,
+        /*ShouldPreserveUseListOrder=*/false, /*IsForDebug=*/true);
+}
 
 // \brief Allow printing of Comdats from the debugger.
 LLVM_DUMP_METHOD
-void Comdat::dump() const { print(dbgs()); }
+void Comdat::dump() const { print(dbgs(), /*IsForDebug=*/true); }
 
 // NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
 LLVM_DUMP_METHOD
-void NamedMDNode::dump() const { print(dbgs()); }
+void NamedMDNode::dump() const { print(dbgs(), /*IsForDebug=*/true); }
 
 LLVM_DUMP_METHOD
 void Metadata::dump() const { dump(nullptr); }
 
 LLVM_DUMP_METHOD
 void Metadata::dump(const Module *M) const {
-  print(dbgs(), M);
+  print(dbgs(), M, /*IsForDebug=*/true);
   dbgs() << '\n';
 }
diff --git a/lib/IR/AttributeImpl.h b/lib/IR/AttributeImpl.h
index 6f338ae835fa..659f9568b7c6 100644
--- a/lib/IR/AttributeImpl.h
+++ b/lib/IR/AttributeImpl.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/Support/TrailingObjects.h"
 #include <string>
 
 namespace llvm {
@@ -141,13 +142,16 @@ public:
 /// \class
 /// \brief This class represents a group of attributes that apply to one
 /// element: function, return type, or parameter.
-class AttributeSetNode : public FoldingSetNode {
+class AttributeSetNode final
+    : public FoldingSetNode,
+      private TrailingObjects<AttributeSetNode, Attribute> {
+  friend TrailingObjects;
+
   unsigned NumAttrs; ///< Number of attributes in this node.
 
   AttributeSetNode(ArrayRef<Attribute> Attrs) : NumAttrs(Attrs.size()) {
     // There's memory after the node where we can store the entries in.
-    std::copy(Attrs.begin(), Attrs.end(),
-              reinterpret_cast<Attribute *>(this + 1));
+    std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<Attribute>());
   }
 
   // AttributesSetNode is uniqued, these should not be publicly available.
@@ -170,7 +174,7 @@ public:
   std::string getAsString(bool InAttrGrp) const;
 
   typedef const Attribute *iterator;
-  iterator begin() const { return reinterpret_cast<iterator>(this + 1); }
+  iterator begin() const { return getTrailingObjects<Attribute>(); }
   iterator end() const { return begin() + NumAttrs; }
 
   void Profile(FoldingSetNodeID &ID) const {
@@ -181,27 +185,29 @@ public:
       AttrList[I].Profile(ID);
   }
 };
-static_assert(
-    AlignOf<AttributeSetNode>::Alignment >= AlignOf<Attribute>::Alignment,
-    "Alignment is insufficient for objects appended to AttributeSetNode");
+
+typedef std::pair<unsigned, AttributeSetNode *> IndexAttrPair;
 
 //===----------------------------------------------------------------------===//
 /// \class
 /// \brief This class represents a set of attributes that apply to the function,
 /// return type, and parameters.
-class AttributeSetImpl : public FoldingSetNode {
+class AttributeSetImpl final
+    : public FoldingSetNode,
+      private TrailingObjects<AttributeSetImpl, IndexAttrPair> {
   friend class AttributeSet;
-
-public:
-  typedef std::pair<unsigned, AttributeSetNode*> IndexAttrPair;
+  friend TrailingObjects;
 
 private:
   LLVMContext &Context;
   unsigned NumAttrs; ///< Number of entries in this set.
 
+  // Helper fn for TrailingObjects class.
+  size_t numTrailingObjects(OverloadToken<IndexAttrPair>) { return NumAttrs; }
+
   /// \brief Return a pointer to the IndexAttrPair for the specified slot.
   const IndexAttrPair *getNode(unsigned Slot) const {
-    return reinterpret_cast<const IndexAttrPair *>(this + 1) + Slot;
+    return getTrailingObjects<IndexAttrPair>() + Slot;
   }
 
   // AttributesSet is uniqued, these should not be publicly available.
@@ -222,8 +228,7 @@ public:
     }
 #endif
     // There's memory after the node where we can store the entries in.
-    std::copy(Attrs.begin(), Attrs.end(),
-              reinterpret_cast<IndexAttrPair *>(this + 1));
+    std::copy(Attrs.begin(), Attrs.end(), getTrailingObjects<IndexAttrPair>());
   }
 
   /// \brief Get the context that created this AttributeSetImpl.
@@ -273,10 +278,6 @@ public:
 
   void dump() const;
 };
-static_assert(
-    AlignOf<AttributeSetImpl>::Alignment >=
-        AlignOf<AttributeSetImpl::IndexAttrPair>::Alignment,
-    "Alignment is insufficient for objects appended to AttributeSetImpl");
 
 } // end llvm namespace
 
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 546a98670a29..bcf7dc365ce5 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
 #include "AttributeImpl.h"
 #include "LLVMContextImpl.h"
 #include "llvm/ADT/STLExtras.h"
@@ -120,28 +121,28 @@ Attribute::AttrKind Attribute::getKindAsEnum() const {
   if (!pImpl) return None;
   assert((isEnumAttribute() || isIntAttribute()) &&
          "Invalid attribute type to get the kind as an enum!");
-  return pImpl ? pImpl->getKindAsEnum() : None;
+  return pImpl->getKindAsEnum();
 }
 
 uint64_t Attribute::getValueAsInt() const {
   if (!pImpl) return 0;
   assert(isIntAttribute() &&
          "Expected the attribute to be an integer attribute!");
-  return pImpl ? pImpl->getValueAsInt() : 0;
+  return pImpl->getValueAsInt();
 }
 
 StringRef Attribute::getKindAsString() const {
   if (!pImpl) return StringRef();
   assert(isStringAttribute() &&
          "Invalid attribute type to get the kind as a string!");
-  return pImpl ? pImpl->getKindAsString() : StringRef();
+  return pImpl->getKindAsString();
 }
 
 StringRef Attribute::getValueAsString() const {
   if (!pImpl) return StringRef();
   assert(isStringAttribute() &&
          "Invalid attribute type to get the value as a string!");
-  return pImpl ? pImpl->getValueAsString() : StringRef();
+  return pImpl->getValueAsString();
 }
 
 bool Attribute::hasAttribute(AttrKind Kind) const {
@@ -198,6 +199,10 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "byval";
   if (hasAttribute(Attribute::Convergent))
     return "convergent";
+  if (hasAttribute(Attribute::InaccessibleMemOnly))
+    return "inaccessiblememonly";
+  if (hasAttribute(Attribute::InaccessibleMemOrArgMemOnly))
+    return "inaccessiblemem_or_argmemonly";
   if (hasAttribute(Attribute::InAlloca))
     return "inalloca";
   if (hasAttribute(Attribute::InlineHint))
@@ -232,6 +237,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "noredzone";
   if (hasAttribute(Attribute::NoReturn))
     return "noreturn";
+  if (hasAttribute(Attribute::NoRecurse))
+    return "norecurse";
   if (hasAttribute(Attribute::NoUnwind))
     return "nounwind";
   if (hasAttribute(Attribute::OptimizeNone))
@@ -442,6 +449,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
   case Attribute::JumpTable:       return 1ULL << 45;
   case Attribute::Convergent:      return 1ULL << 46;
   case Attribute::SafeStack:       return 1ULL << 47;
+  case Attribute::NoRecurse:       return 1ULL << 48;
+  case Attribute::InaccessibleMemOnly:         return 1ULL << 49;
+  case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50;
   case Attribute::Dereferenceable:
     llvm_unreachable("dereferenceable attribute not supported in raw format");
     break;
@@ -472,9 +482,8 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
   SmallVector<Attribute, 8> SortedAttrs(Attrs.begin(), Attrs.end());
   array_pod_sort(SortedAttrs.begin(), SortedAttrs.end());
 
-  for (SmallVectorImpl<Attribute>::iterator I = SortedAttrs.begin(),
-         E = SortedAttrs.end(); I != E; ++I)
-    I->Profile(ID);
+  for (Attribute Attr : SortedAttrs)
+    Attr.Profile(ID);
 
   void *InsertPoint;
   AttributeSetNode *PA =
@@ -484,8 +493,7 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
   // new one and insert it.
   if (!PA) {
     // Coallocate entries after the AttributeSetNode itself.
-    void *Mem = ::operator new(sizeof(AttributeSetNode) +
-                               sizeof(Attribute) * SortedAttrs.size());
+    void *Mem = ::operator new(totalSizeToAlloc<Attribute>(SortedAttrs.size()));
     PA = new (Mem) AttributeSetNode(SortedAttrs);
     pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint);
   }
@@ -617,9 +625,8 @@ AttributeSet::getImpl(LLVMContext &C,
   // create a new one and insert it.
   if (!PA) {
     // Coallocate entries after the AttributeSetImpl itself.
-    void *Mem = ::operator new(sizeof(AttributeSetImpl) +
-                               sizeof(std::pair<unsigned, AttributeSetNode *>) *
-                                   Attrs.size());
+    void *Mem = ::operator new(
+        AttributeSetImpl::totalSizeToAlloc<IndexAttrPair>(Attrs.size()));
     PA = new (Mem) AttributeSetImpl(C, Attrs);
     pImpl->AttrsLists.InsertNode(PA, InsertPoint);
   }
@@ -684,22 +691,26 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
     if (!B.contains(Kind))
       continue;
 
-    if (Kind == Attribute::Alignment)
-      Attrs.push_back(std::make_pair(Index, Attribute::
-                                     getWithAlignment(C, B.getAlignment())));
-    else if (Kind == Attribute::StackAlignment)
-      Attrs.push_back(std::make_pair(Index, Attribute::
-                              getWithStackAlignment(C, B.getStackAlignment())));
-    else if (Kind == Attribute::Dereferenceable)
-      Attrs.push_back(std::make_pair(Index,
-                                     Attribute::getWithDereferenceableBytes(C,
-                                       B.getDereferenceableBytes())));
-    else if (Kind == Attribute::DereferenceableOrNull)
-      Attrs.push_back(
-          std::make_pair(Index, Attribute::getWithDereferenceableOrNullBytes(
-                                    C, B.getDereferenceableOrNullBytes())));
-    else
-      Attrs.push_back(std::make_pair(Index, Attribute::get(C, Kind)));
+    Attribute Attr;
+    switch (Kind) {
+    case Attribute::Alignment:
+      Attr = Attribute::getWithAlignment(C, B.getAlignment());
+      break;
+    case Attribute::StackAlignment:
+      Attr = Attribute::getWithStackAlignment(C, B.getStackAlignment());
+      break;
+    case Attribute::Dereferenceable:
+      Attr = Attribute::getWithDereferenceableBytes(
+          C, B.getDereferenceableBytes());
+      break;
+    case Attribute::DereferenceableOrNull:
+      Attr = Attribute::getWithDereferenceableOrNullBytes(
+          C, B.getDereferenceableOrNullBytes());
+      break;
+    default:
+      Attr = Attribute::get(C, Kind);
+    }
+    Attrs.push_back(std::make_pair(Index, Attr));
   }
 
   // Add target-dependent (string) attributes.
@@ -713,9 +724,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
 AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<Attribute::AttrKind> Kind) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
-  for (ArrayRef<Attribute::AttrKind>::iterator I = Kind.begin(),
-         E = Kind.end(); I != E; ++I)
-    Attrs.push_back(std::make_pair(Index, Attribute::get(C, *I)));
+  for (Attribute::AttrKind K : Kind)
+    Attrs.push_back(std::make_pair(Index, Attribute::get(C, K)));
   return get(C, Attrs);
 }
 
@@ -736,9 +746,8 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef<AttributeSet> Attrs) {
     if (!AS) continue;
     SmallVector<std::pair<unsigned, AttributeSetNode *>, 8>::iterator
       ANVI = AttrNodeVec.begin(), ANVE;
-    for (const AttributeSetImpl::IndexAttrPair
-             *AI = AS->getNode(0),
-             *AE = AS->getNode(AS->getNumAttributes());
+    for (const IndexAttrPair *AI = AS->getNode(0),
+                             *AE = AS->getNode(AS->getNumAttributes());
          AI != AE; ++AI) {
       ANVE = AttrNodeVec.end();
       while (ANVI != ANVE && ANVI->first <= AI->first)
@@ -770,6 +779,36 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index,
   return addAttributes(C, Index, AttributeSet::get(C, Index, B));
 }
 
+AttributeSet AttributeSet::addAttribute(LLVMContext &C,
+                                        ArrayRef<unsigned> Indices,
+                                        Attribute A) const {
+  unsigned I = 0, E = pImpl ? pImpl->getNumAttributes() : 0;
+  auto IdxI = Indices.begin(), IdxE = Indices.end();
+  SmallVector<AttributeSet, 4> AttrSet;
+
+  while (I != E && IdxI != IdxE) {
+    if (getSlotIndex(I) < *IdxI)
+      AttrSet.emplace_back(getSlotAttributes(I++));
+    else if (getSlotIndex(I) > *IdxI)
+      AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
+    else {
+      AttrBuilder B(getSlotAttributes(I), *IdxI);
+      B.addAttribute(A);
+      AttrSet.emplace_back(AttributeSet::get(C, *IdxI, B));
+      ++I;
+      ++IdxI;
+    }
+  }
+
+  while (I != E)
+    AttrSet.emplace_back(getSlotAttributes(I++));
+
+  while (IdxI != IdxE)
+    AttrSet.emplace_back(AttributeSet::get(C, std::make_pair(*IdxI++, A)));
+
+  return get(C, AttrSet);
+}
+
 AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index,
                                          AttributeSet Attrs) const {
   if (!pImpl) return Attrs;
@@ -955,17 +994,17 @@ AttributeSet AttributeSet::getFnAttributes() const {
 
 bool AttributeSet::hasAttribute(unsigned Index, Attribute::AttrKind Kind) const{
   AttributeSetNode *ASN = getAttributes(Index);
-  return ASN ? ASN->hasAttribute(Kind) : false;
+  return ASN && ASN->hasAttribute(Kind);
 }
 
 bool AttributeSet::hasAttribute(unsigned Index, StringRef Kind) const {
   AttributeSetNode *ASN = getAttributes(Index);
-  return ASN ? ASN->hasAttribute(Kind) : false;
+  return ASN && ASN->hasAttribute(Kind);
 }
 
 bool AttributeSet::hasAttributes(unsigned Index) const {
   AttributeSetNode *ASN = getAttributes(Index);
-  return ASN ? ASN->hasAttributes() : false;
+  return ASN && ASN->hasAttributes();
 }
 
 /// \brief Return true if the specified attribute is set for at least one
@@ -1111,6 +1150,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index)
 
 void AttrBuilder::clear() {
   Attrs.reset();
+  TargetDepAttrs.clear();
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
 }
 
@@ -1177,23 +1217,10 @@ AttrBuilder &AttrBuilder::removeAttributes(AttributeSet A, uint64_t Index) {
   for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
-      Attribute::AttrKind Kind = I->getKindAsEnum();
-      Attrs[Kind] = false;
-
-      if (Kind == Attribute::Alignment)
-        Alignment = 0;
-      else if (Kind == Attribute::StackAlignment)
-        StackAlignment = 0;
-      else if (Kind == Attribute::Dereferenceable)
-        DerefBytes = 0;
-      else if (Kind == Attribute::DereferenceableOrNull)
-        DerefOrNullBytes = 0;
+      removeAttribute(Attr.getKindAsEnum());
     } else {
       assert(Attr.isStringAttribute() && "Invalid attribute type!");
-      std::map<std::string, std::string>::iterator
-        Iter = TargetDepAttrs.find(Attr.getKindAsString());
-      if (Iter != TargetDepAttrs.end())
-        TargetDepAttrs.erase(Iter);
+      removeAttribute(Attr.getKindAsString());
     }
   }
 
@@ -1322,8 +1349,7 @@ bool AttrBuilder::hasAttributes(AttributeSet A, uint64_t Index) const {
 
   assert(Slot != ~0U && "Couldn't find the index!");
 
-  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot);
-       I != E; ++I) {
+  for (AttributeSet::iterator I = A.begin(Slot), E = A.end(Slot); I != E; ++I) {
     Attribute Attr = *I;
     if (Attr.isEnumAttribute() || Attr.isIntAttribute()) {
       if (Attrs[I->getKindAsEnum()])
@@ -1382,7 +1408,7 @@ AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
 //===----------------------------------------------------------------------===//
 
 /// \brief Which attributes cannot be applied to a type.
-AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) {
+AttrBuilder AttributeFuncs::typeIncompatible(Type *Ty) {
   AttrBuilder Incompatible;
 
   if (!Ty->isIntegerTy())
@@ -1406,3 +1432,80 @@ AttrBuilder AttributeFuncs::typeIncompatible(const Type *Ty) {
 
   return Incompatible;
 }
+
+template<typename AttrClass>
+static bool isEqual(const Function &Caller, const Function &Callee) {
+  return Caller.getFnAttribute(AttrClass::getKind()) ==
+         Callee.getFnAttribute(AttrClass::getKind());
+}
+
+/// \brief Compute the logical AND of the attributes of the caller and the
+/// callee.
+///
+/// This function sets the caller's attribute to false if the callee's attribute
+/// is false.
+template<typename AttrClass>
+static void setAND(Function &Caller, const Function &Callee) {
+  if (AttrClass::isSet(Caller, AttrClass::getKind()) &&
+      !AttrClass::isSet(Callee, AttrClass::getKind()))
+    AttrClass::set(Caller, AttrClass::getKind(), false);
+}
+
+/// \brief Compute the logical OR of the attributes of the caller and the
+/// callee.
+///
+/// This function sets the caller's attribute to true if the callee's attribute
+/// is true.
+template<typename AttrClass>
+static void setOR(Function &Caller, const Function &Callee) {
+  if (!AttrClass::isSet(Caller, AttrClass::getKind()) &&
+      AttrClass::isSet(Callee, AttrClass::getKind()))
+    AttrClass::set(Caller, AttrClass::getKind(), true);
+}
+
+/// \brief If the inlined function had a higher stack protection level than the
+/// calling function, then bump up the caller's stack protection level.
+static void adjustCallerSSPLevel(Function &Caller, const Function &Callee) {
+  // If upgrading the SSP attribute, clear out the old SSP Attributes first.
+  // Having multiple SSP attributes doesn't actually hurt, but it adds useless
+  // clutter to the IR.
+  AttrBuilder B;
+  B.addAttribute(Attribute::StackProtect)
+    .addAttribute(Attribute::StackProtectStrong)
+    .addAttribute(Attribute::StackProtectReq);
+  AttributeSet OldSSPAttr = AttributeSet::get(Caller.getContext(),
+                                              AttributeSet::FunctionIndex,
+                                              B);
+
+  if (Callee.hasFnAttribute(Attribute::SafeStack)) {
+    Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller.addFnAttr(Attribute::SafeStack);
+  } else if (Callee.hasFnAttribute(Attribute::StackProtectReq) &&
+             !Caller.hasFnAttribute(Attribute::SafeStack)) {
+    Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller.addFnAttr(Attribute::StackProtectReq);
+  } else if (Callee.hasFnAttribute(Attribute::StackProtectStrong) &&
+             !Caller.hasFnAttribute(Attribute::SafeStack) &&
+             !Caller.hasFnAttribute(Attribute::StackProtectReq)) {
+    Caller.removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
+    Caller.addFnAttr(Attribute::StackProtectStrong);
+  } else if (Callee.hasFnAttribute(Attribute::StackProtect) &&
+             !Caller.hasFnAttribute(Attribute::SafeStack) &&
+             !Caller.hasFnAttribute(Attribute::StackProtectReq) &&
+             !Caller.hasFnAttribute(Attribute::StackProtectStrong))
+    Caller.addFnAttr(Attribute::StackProtect);
+}
+
+#define GET_ATTR_COMPAT_FUNC
+#include "AttributesCompatFunc.inc"
+
+bool AttributeFuncs::areInlineCompatible(const Function &Caller,
+                                         const Function &Callee) {
+  return hasCompatibleFnAttrs(Caller, Callee);
+}
+
+
+void AttributeFuncs::mergeAttributesForInlining(Function &Caller,
+                                                const Function &Callee) {
+  mergeFnAttrs(Caller, Callee);
+}
diff --git a/lib/IR/AttributesCompatFunc.td b/lib/IR/AttributesCompatFunc.td
new file mode 100644
index 000000000000..7c85b3da9ab6
--- /dev/null
+++ b/lib/IR/AttributesCompatFunc.td
@@ -0,0 +1 @@
+include "llvm/IR/Attributes.td"
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index f1c6ebd4846e..12c354c89b20 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Regex.h"
 #include <cstring>
 using namespace llvm;
 
@@ -92,8 +93,42 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
                                         F->arg_begin()->getType());
       return true;
     }
+    Regex vldRegex("^arm\\.neon\\.vld([1234]|[234]lane)\\.v[a-z0-9]*$");
+    if (vldRegex.match(Name)) {
+      auto fArgs = F->getFunctionType()->params();
+      SmallVector<Type *, 4> Tys(fArgs.begin(), fArgs.end());
+      // Can't use Intrinsic::getDeclaration here as the return types might
+      // then only be structurally equal.
+      FunctionType* fType = FunctionType::get(F->getReturnType(), Tys, false);
+      NewFn = Function::Create(fType, F->getLinkage(),
+                               "llvm." + Name + ".p0i8", F->getParent());
+      return true;
+    }
+    Regex vstRegex("^arm\\.neon\\.vst([1234]|[234]lane)\\.v[a-z0-9]*$");
+    if (vstRegex.match(Name)) {
+      static const Intrinsic::ID StoreInts[] = {Intrinsic::arm_neon_vst1,
+                                                Intrinsic::arm_neon_vst2,
+                                                Intrinsic::arm_neon_vst3,
+                                                Intrinsic::arm_neon_vst4};
+
+      static const Intrinsic::ID StoreLaneInts[] = {
+        Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
+        Intrinsic::arm_neon_vst4lane
+      };
+
+      auto fArgs = F->getFunctionType()->params();
+      Type *Tys[] = {fArgs[0], fArgs[1]};
+      if (Name.find("lane") == StringRef::npos)
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          StoreInts[fArgs.size() - 3], Tys);
+      else
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          StoreLaneInts[fArgs.size() - 5], Tys);
+      return true;
+    }
     break;
   }
+
   case 'c': {
     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
       F->setName(Name + ".old");
@@ -129,7 +164,10 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name.startswith("x86.sse2.pcmpgt.") ||
         Name.startswith("x86.avx2.pcmpeq.") ||
         Name.startswith("x86.avx2.pcmpgt.") ||
+        Name.startswith("x86.avx2.vbroadcast") ||
+        Name.startswith("x86.avx2.pbroadcast") ||
         Name.startswith("x86.avx.vpermil.") ||
+        Name.startswith("x86.sse41.pmovsx") ||
         Name == "x86.avx.vinsertf128.pd.256" ||
         Name == "x86.avx.vinsertf128.ps.256" ||
         Name == "x86.avx.vinsertf128.si.256" ||
@@ -162,6 +200,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
         Name == "x86.avx2.pblendd.128" ||
         Name == "x86.avx2.pblendd.256" ||
         Name == "x86.avx2.vbroadcasti128" ||
+        Name == "x86.xop.vpcmov" ||
         (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
       NewFn = nullptr;
       return true;
@@ -325,7 +364,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   Function *F = CI->getCalledFunction();
   LLVMContext &C = CI->getContext();
   IRBuilder<> Builder(C);
-  Builder.SetInsertPoint(CI->getParent(), CI);
+  Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
 
   assert(F && "Intrinsic call is not direct?");
 
@@ -351,7 +390,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                Name == "llvm.x86.avx.movnt.ps.256" ||
                Name == "llvm.x86.avx.movnt.pd.256") {
       IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
+      Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
 
       Module *M = F->getParent();
       SmallVector<Metadata *, 1> Elts;
@@ -368,7 +407,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
                                         "cast");
       StoreInst *SI = Builder.CreateStore(Arg1, BC);
       SI->setMetadata(M->getMDKindID("nontemporal"), Node);
-      SI->setAlignment(16);
+      SI->setAlignment(32);
 
       // Remove intrinsic.
       CI->eraseFromParent();
@@ -419,6 +458,16 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep =
           Builder.CreateCall(VPCOM, {CI->getArgOperand(0), CI->getArgOperand(1),
                                      Builder.getInt8(Imm)});
+    } else if (Name == "llvm.x86.xop.vpcmov") {
+      Value *Arg0 = CI->getArgOperand(0);
+      Value *Arg1 = CI->getArgOperand(1);
+      Value *Sel = CI->getArgOperand(2);
+      unsigned NumElts = CI->getType()->getVectorNumElements();
+      Constant *MinusOne = ConstantVector::getSplat(NumElts, Builder.getInt64(-1));
+      Value *NotSel = Builder.CreateXor(Sel, MinusOne);
+      Value *Sel0 = Builder.CreateAnd(Arg0, Sel);
+      Value *Sel1 = Builder.CreateAnd(Arg1, NotSel);
+      Rep = Builder.CreateOr(Sel0, Sel1);
     } else if (Name == "llvm.x86.sse42.crc32.64.8") {
       Function *CRC32 = Intrinsic::getDeclaration(F->getParent(),
                                                Intrinsic::x86_sse42_crc32_32_8);
@@ -438,6 +487,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       for (unsigned I = 0; I < EltNum; ++I)
         Rep = Builder.CreateInsertElement(Rep, Load,
                                           ConstantInt::get(I32Ty, I));
+    } else if (Name.startswith("llvm.x86.sse41.pmovsx")) {
+      VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
+      VectorType *DstTy = cast<VectorType>(CI->getType());
+      unsigned NumDstElts = DstTy->getNumElements();
+
+      // Extract a subvector of the first NumDstElts lanes and sign extend.
+      SmallVector<int, 8> ShuffleMask;
+      for (int i = 0; i != (int)NumDstElts; ++i)
+        ShuffleMask.push_back(i);
+
+      Value *SV = Builder.CreateShuffleVector(
+          CI->getArgOperand(0), UndefValue::get(SrcTy), ShuffleMask);
+      Rep = Builder.CreateSExt(SV, DstTy);
     } else if (Name == "llvm.x86.avx2.vbroadcasti128") {
       // Replace vbroadcasts with a vector shuffle.
       Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
@@ -447,6 +509,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       const int Idxs[4] = { 0, 1, 0, 1 };
       Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
                                         Idxs);
+    } else if (Name.startswith("llvm.x86.avx2.pbroadcast") ||
+               Name.startswith("llvm.x86.avx2.vbroadcast")) {
+      // Replace vp?broadcasts with a vector shuffle.
+      Value *Op = CI->getArgOperand(0);
+      unsigned NumElts = CI->getType()->getVectorNumElements();
+      Type *MaskTy = VectorType::get(Type::getInt32Ty(C), NumElts);
+      Rep = Builder.CreateShuffleVector(Op, UndefValue::get(Op->getType()),
+                                        Constant::getNullValue(MaskTy));
     } else if (Name == "llvm.x86.sse2.psll.dq") {
       // 128-bit shift left specified in bits.
       unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
@@ -517,10 +587,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
-      
+
       // Mask off the high bits of the immediate value; hardware ignores those.
       Imm = Imm & 1;
-      
+
       // Extend the second operand into a vector that is twice as big.
       Value *UndefV = UndefValue::get(Op1->getType());
       SmallVector<Constant*, 8> Idxs;
@@ -562,7 +632,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
       VectorType *VecTy = cast<VectorType>(CI->getType());
       unsigned NumElts = VecTy->getNumElements();
-      
+
       // Mask off the high bits of the immediate value; hardware ignores those.
       Imm = Imm & 1;
 
@@ -627,6 +697,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   default:
     llvm_unreachable("Unknown function for CallInst upgrade.");
 
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
+                                 CI->arg_operands().end());
+    CI->replaceAllUsesWith(Builder.CreateCall(NewFn, Args));
+    CI->eraseFromParent();
+    return;
+  }
+
   case Intrinsic::ctlz:
   case Intrinsic::cttz:
     assert(CI->getNumArgOperands() == 1 &&
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 0a0449434a7b..f61276fd436b 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Type.h"
 #include <algorithm>
+
 using namespace llvm;
 
 ValueSymbolTable *BasicBlock::getValueSymbolTable() {
@@ -35,8 +36,7 @@ LLVMContext &BasicBlock::getContext() const {
 
 // Explicit instantiation of SymbolTableListTraits since some of the methods
 // are not in the public header file...
-template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
-
+template class llvm::SymbolTableListTraits<Instruction>;
 
 BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
                        BasicBlock *InsertBefore)
@@ -56,7 +56,7 @@ void BasicBlock::insertInto(Function *NewParent, BasicBlock *InsertBefore) {
   assert(!Parent && "Already has a parent");
 
   if (InsertBefore)
-    NewParent->getBasicBlockList().insert(InsertBefore, this);
+    NewParent->getBasicBlockList().insert(InsertBefore->getIterator(), this);
   else
     NewParent->getBasicBlockList().push_back(this);
 }
@@ -91,26 +91,26 @@ void BasicBlock::setParent(Function *parent) {
 }
 
 void BasicBlock::removeFromParent() {
-  getParent()->getBasicBlockList().remove(this);
+  getParent()->getBasicBlockList().remove(getIterator());
 }
 
 iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
-  return getParent()->getBasicBlockList().erase(this);
+  return getParent()->getBasicBlockList().erase(getIterator());
 }
 
 /// Unlink this basic block from its current function and
 /// insert it into the function that MovePos lives in, right before MovePos.
 void BasicBlock::moveBefore(BasicBlock *MovePos) {
-  MovePos->getParent()->getBasicBlockList().splice(MovePos,
-                       getParent()->getBasicBlockList(), this);
+  MovePos->getParent()->getBasicBlockList().splice(
+      MovePos->getIterator(), getParent()->getBasicBlockList(), getIterator());
 }
 
 /// Unlink this basic block from its current function and
 /// insert it into the function that MovePos lives in, right after MovePos.
 void BasicBlock::moveAfter(BasicBlock *MovePos) {
-  Function::iterator I = MovePos;
-  MovePos->getParent()->getBasicBlockList().splice(++I,
-                                       getParent()->getBasicBlockList(), this);
+  MovePos->getParent()->getBasicBlockList().splice(
+      ++MovePos->getIterator(), getParent()->getBasicBlockList(),
+      getIterator());
 }
 
 const Module *BasicBlock::getModule() const {
@@ -196,8 +196,8 @@ BasicBlock::iterator BasicBlock::getFirstInsertionPt() {
   if (!FirstNonPHI)
     return end();
 
-  iterator InsertPt = FirstNonPHI;
-  if (isa<LandingPadInst>(InsertPt)) ++InsertPt;
+  iterator InsertPt = FirstNonPHI->getIterator();
+  if (InsertPt->isEHPad()) ++InsertPt;
   return InsertPt;
 }
 
@@ -245,12 +245,12 @@ BasicBlock *BasicBlock::getSingleSuccessor() {
 
 BasicBlock *BasicBlock::getUniqueSuccessor() {
   succ_iterator SI = succ_begin(this), E = succ_end(this);
-  if (SI == E) return NULL; // No successors
+  if (SI == E) return nullptr; // No successors
   BasicBlock *SuccBB = *SI;
   ++SI;
   for (;SI != E; ++SI) {
     if (*SI != SuccBB)
-      return NULL;
+      return nullptr;
     // The same successor appears multiple times in the successor list.
     // This is OK.
   }
@@ -333,6 +333,17 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
   }
 }
 
+bool BasicBlock::canSplitPredecessors() const {
+  const Instruction *FirstNonPHI = getFirstNonPHI();
+  if (isa<LandingPadInst>(FirstNonPHI))
+    return true;
+  // This is perhaps a little conservative because constructs like
+  // CleanupBlockInst are pretty easy to split.  However, SplitBlockPredecessors
+  // cannot handle such things just yet.
+  if (FirstNonPHI->isEHPad())
+    return false;
+  return true;
+}
 
 /// This splits a basic block into two at the specified
 /// instruction.  Note that all instructions BEFORE the specified iterator stay
@@ -393,8 +404,7 @@ void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
     // Cope with being called on a BasicBlock that doesn't have a terminator
     // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
     return;
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-    BasicBlock *Succ = TI->getSuccessor(i);
+  for (BasicBlock *Succ : TI->successors()) {
     // N.B. Succ might not be a complete BasicBlock, so don't assume
     // that it ends with a non-phi instruction.
     for (iterator II = Succ->begin(), IE = Succ->end(); II != IE; ++II) {
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index aabeaefc0c7a..40b4ec65e22b 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -1,3 +1,7 @@
+set(LLVM_TARGET_DEFINITIONS AttributesCompatFunc.td)
+tablegen(LLVM AttributesCompatFunc.inc -gen-attrs)
+add_public_tablegen_target(AttributeCompatFuncTableGen)
+
 add_llvm_library(LLVMCore
   AsmWriter.cpp
   Attributes.cpp
@@ -32,13 +36,13 @@ add_llvm_library(LLVMCore
   MDBuilder.cpp
   Mangler.cpp
   Metadata.cpp
-  MetadataTracking.cpp
   Module.cpp
   Operator.cpp
   Pass.cpp
   PassManager.cpp
   PassRegistry.cpp
   Statepoint.cpp
+  FunctionInfo.cpp
   Type.cpp
   TypeFinder.cpp
   Use.cpp
@@ -52,4 +56,12 @@ add_llvm_library(LLVMCore
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/IR
   )
 
+# PR24785: Workaround for hanging compilation.
+if( MSVC_VERSION EQUAL 1800)
+  set_property(
+    SOURCE Function.cpp
+    PROPERTY COMPILE_FLAGS "/Og-"
+    )
+endif()
+
 add_dependencies(LLVMCore intrinsics_gen)
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 46bb20e0d1b7..ce3fe03e2df7 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -83,7 +83,7 @@ foldConstantCastPair(
   assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
   assert(CastInst::isCast(opc) && "Invalid cast opcode");
 
-  // The the types and opcodes for the two Cast constant expressions
+  // The types and opcodes for the two Cast constant expressions
   Type *SrcTy = Op->getOperand(0)->getType();
   Type *MidTy = Op->getType();
   Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
@@ -109,7 +109,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
   if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
     if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
       if (PTy->getAddressSpace() == DPTy->getAddressSpace()
-          && DPTy->getElementType()->isSized()) {
+          && PTy->getElementType()->isSized()) {
         SmallVector<Value*, 8> IdxList;
         Value *Zero =
           Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
@@ -1187,7 +1187,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
         (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
         return ConstantFP::get(C1->getContext(), C3V);
       case Instruction::FRem:
-        (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+        (void)C3V.mod(C2V);
         return ConstantFP::get(C1->getContext(), C3V);
       }
     }
@@ -1277,9 +1277,9 @@ static bool isMaybeZeroSizedType(Type *Ty) {
 }
 
 /// IdxCompare - Compare the two constants as though they were getelementptr
-/// indices.  This allows coersion of the types to be the same thing.
+/// indices.  This allows coercion of the types to be the same thing.
 ///
-/// If the two constants are the "same" (after coersion), return 0.  If the
+/// If the two constants are the "same" (after coercion), return 0.  If the
 /// first is less than the second, return -1, if the second is less than the
 /// first, return 1.  If the constants are not integral, return -2.
 ///
@@ -1685,7 +1685,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
     // Otherwise, for integer compare, pick the same value as the non-undef
     // operand, and fold it to true or false.
     if (isIntegerPredicate)
-      return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+      return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(Predicate));
 
     // Choosing NaN for the undef will always make unordered comparison succeed
     // and ordered comparison fails.
@@ -1869,7 +1869,8 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
   } else {
     // Evaluate the relation between the two constants, per the predicate.
     int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
-    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+    switch (evaluateICmpRelation(C1, C2,
+                                 CmpInst::isSigned((CmpInst::Predicate)pred))) {
     default: llvm_unreachable("Unknown relational!");
     case ICmpInst::BAD_ICMP_PREDICATE:
       break;  // Couldn't determine anything about these constants.
@@ -1950,8 +1951,10 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
 
     // If the left hand side is an extension, try eliminating it.
     if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
-      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
-          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+      if ((CE1->getOpcode() == Instruction::SExt &&
+           ICmpInst::isSigned((ICmpInst::Predicate)pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt &&
+           !ICmpInst::isSigned((ICmpInst::Predicate)pred))){
         Constant *CE1Op0 = CE1->getOperand(0);
         Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
         if (CE1Inverse == CE1Op0) {
@@ -1997,17 +2000,17 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
 }
 
 /// \brief Test whether a given ConstantInt is in-range for a SequentialType.
-static bool isIndexInRangeOfSequentialType(const SequentialType *STy,
+static bool isIndexInRangeOfSequentialType(SequentialType *STy,
                                            const ConstantInt *CI) {
-  if (const PointerType *PTy = dyn_cast<PointerType>(STy))
-    // Only handle pointers to sized types, not pointers to functions.
-    return PTy->getElementType()->isSized();
+  // And indices are valid when indexing along a pointer
+  if (isa<PointerType>(STy))
+    return true;
 
   uint64_t NumElements = 0;
   // Determine the number of elements in our sequential type.
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+  if (auto *ATy = dyn_cast<ArrayType>(STy))
     NumElements = ATy->getNumElements();
-  else if (const VectorType *VTy = dyn_cast<VectorType>(STy))
+  else if (auto *VTy = dyn_cast<VectorType>(STy))
     NumElements = VTy->getNumElements();
 
   assert((isa<ArrayType>(STy) || NumElements > 0) &&
@@ -2178,7 +2181,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
             // dimension.
             NewIdxs.resize(Idxs.size());
             uint64_t NumElements = 0;
-            if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+            if (auto *ATy = dyn_cast<ArrayType>(Ty))
               NumElements = ATy->getNumElements();
             else
               NumElements = cast<VectorType>(Ty)->getNumElements();
diff --git a/lib/IR/ConstantRange.cpp b/lib/IR/ConstantRange.cpp
index 91095cfe9eec..48f9b27a25ae 100644
--- a/lib/IR/ConstantRange.cpp
+++ b/lib/IR/ConstantRange.cpp
@@ -21,7 +21,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -125,6 +127,57 @@ ConstantRange ConstantRange::makeSatisfyingICmpRegion(CmpInst::Predicate Pred,
       .inverse();
 }
 
+ConstantRange ConstantRange::makeNoWrapRegion(Instruction::BinaryOps BinOp,
+                                              const APInt &C,
+                                              unsigned NoWrapKind) {
+  typedef OverflowingBinaryOperator OBO;
+
+  // Computes the intersection of CR0 and CR1.  It is different from
+  // intersectWith in that the ConstantRange returned will only contain elements
+  // in both CR0 and CR1 (i.e. SubsetIntersect(X, Y) is a *subset*, proper or
+  // not, of both X and Y).
+  auto SubsetIntersect =
+      [](const ConstantRange &CR0, const ConstantRange &CR1) {
+    return CR0.inverse().unionWith(CR1.inverse()).inverse();
+  };
+
+  assert(BinOp >= Instruction::BinaryOpsBegin &&
+         BinOp < Instruction::BinaryOpsEnd && "Binary operators only!");
+
+  assert((NoWrapKind == OBO::NoSignedWrap ||
+          NoWrapKind == OBO::NoUnsignedWrap ||
+          NoWrapKind == (OBO::NoUnsignedWrap | OBO::NoSignedWrap)) &&
+         "NoWrapKind invalid!");
+
+  unsigned BitWidth = C.getBitWidth();
+  if (BinOp != Instruction::Add)
+    // Conservative answer: empty set
+    return ConstantRange(BitWidth, false);
+
+  if (C.isMinValue())
+    // Full set: nothing signed / unsigned wraps when added to 0.
+    return ConstantRange(BitWidth);
+
+  ConstantRange Result(BitWidth);
+
+  if (NoWrapKind & OBO::NoUnsignedWrap)
+    Result = SubsetIntersect(Result,
+                             ConstantRange(APInt::getNullValue(BitWidth), -C));
+
+  if (NoWrapKind & OBO::NoSignedWrap) {
+    if (C.isStrictlyPositive())
+      Result = SubsetIntersect(
+          Result, ConstantRange(APInt::getSignedMinValue(BitWidth),
+                                APInt::getSignedMinValue(BitWidth) - C));
+    else
+      Result = SubsetIntersect(
+          Result, ConstantRange(APInt::getSignedMinValue(BitWidth) - C,
+                                APInt::getSignedMinValue(BitWidth)));
+  }
+
+  return Result;
+}
+
 /// isFullSet - Return true if this set contains all of the elements possible
 /// for this data-type
 bool ConstantRange::isFullSet() const {
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 308e6bde3d14..0898bf645385 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -53,6 +53,11 @@ bool Constant::isNegativeZeroValue() const {
       if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
         return true;
 
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+      if (SplatCFP && SplatCFP->isZero() && SplatCFP->isNegative())
+        return true;
+
   // We've already handled true FP case; any other FP vectors can't represent -0.0.
   if (getType()->isFPOrFPVectorTy())
     return false;
@@ -68,6 +73,17 @@ bool Constant::isZeroValue() const {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
     return CFP->isZero();
 
+  // Equivalent for a vector of -0.0's.
+  if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this))
+    if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+      if (SplatCFP && SplatCFP->isZero())
+        return true;
+
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
+    if (ConstantFP *SplatCFP = dyn_cast_or_null<ConstantFP>(CV->getSplatValue()))
+      if (SplatCFP && SplatCFP->isZero())
+        return true;
+
   // Otherwise, just use +0.0.
   return isNullValue();
 }
@@ -81,8 +97,10 @@ bool Constant::isNullValue() const {
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
     return CFP->isZero() && !CFP->isNegative();
 
-  // constant zero is zero for aggregates and cpnull is null for pointers.
-  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+  // constant zero is zero for aggregates, cpnull is null for pointers, none for
+  // tokens.
+  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this) ||
+         isa<ConstantTokenNone>(this);
 }
 
 bool Constant::isAllOnesValue() const {
@@ -204,6 +222,8 @@ Constant *Constant::getNullValue(Type *Ty) {
   case Type::ArrayTyID:
   case Type::VectorTyID:
     return ConstantAggregateZero::get(Ty);
+  case Type::TokenTyID:
+    return ConstantTokenNone::get(Ty->getContext());
   default:
     // Function, Label, or Opaque type?
     llvm_unreachable("Cannot create a null constant of that type!");
@@ -410,32 +430,13 @@ bool Constant::isConstantUsed() const {
   return false;
 }
 
+bool Constant::needsRelocation() const {
+  if (isa<GlobalValue>(this))
+    return true; // Global reference.
 
-
-/// getRelocationInfo - This method classifies the entry according to
-/// whether or not it may generate a relocation entry.  This must be
-/// conservative, so if it might codegen to a relocatable entry, it should say
-/// so.  The return values are:
-/// 
-///  NoRelocation: This constant pool entry is guaranteed to never have a
-///     relocation applied to it (because it holds a simple constant like
-///     '4').
-///  LocalRelocation: This entry has relocations, but the entries are
-///     guaranteed to be resolvable by the static linker, so the dynamic
-///     linker will never see them.
-///  GlobalRelocations: This entry may have arbitrary relocations.
-///
-/// FIXME: This really should not be in IR.
-Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
-    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
-      return LocalRelocation;  // Local to this file/library.
-    return GlobalRelocations;    // Global reference.
-  }
-  
   if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
-    return BA->getFunction()->getRelocationInfo();
-  
+    return BA->getFunction()->needsRelocation();
+
   // While raw uses of blockaddress need to be relocated, differences between
   // two of them don't when they are for labels in the same function.  This is a
   // common idiom when creating a table for the indirect goto extension, so we
@@ -444,20 +445,18 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
     if (CE->getOpcode() == Instruction::Sub) {
       ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
       ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
-      if (LHS && RHS &&
-          LHS->getOpcode() == Instruction::PtrToInt &&
+      if (LHS && RHS && LHS->getOpcode() == Instruction::PtrToInt &&
           RHS->getOpcode() == Instruction::PtrToInt &&
           isa<BlockAddress>(LHS->getOperand(0)) &&
           isa<BlockAddress>(RHS->getOperand(0)) &&
           cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
-            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
-        return NoRelocation;
+              cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+        return false;
     }
 
-  PossibleRelocationsTy Result = NoRelocation;
+  bool Result = false;
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    Result = std::max(Result,
-                      cast<Constant>(getOperand(i))->getRelocationInfo());
+    Result |= cast<Constant>(getOperand(i))->needsRelocation();
 
   return Result;
 }
@@ -797,10 +796,10 @@ Constant *ConstantAggregateZero::getElementValue(unsigned Idx) const {
 }
 
 unsigned ConstantAggregateZero::getNumElements() const {
-  const Type *Ty = getType();
-  if (const auto *AT = dyn_cast<ArrayType>(Ty))
+  Type *Ty = getType();
+  if (auto *AT = dyn_cast<ArrayType>(Ty))
     return AT->getNumElements();
-  if (const auto *VT = dyn_cast<VectorType>(Ty))
+  if (auto *VT = dyn_cast<VectorType>(Ty))
     return VT->getNumElements();
   return Ty->getStructNumElements();
 }
@@ -838,10 +837,10 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
 }
 
 unsigned UndefValue::getNumElements() const {
-  const Type *Ty = getType();
-  if (const auto *AT = dyn_cast<ArrayType>(Ty))
+  Type *Ty = getType();
+  if (auto *AT = dyn_cast<ArrayType>(Ty))
     return AT->getNumElements();
-  if (const auto *VT = dyn_cast<VectorType>(Ty))
+  if (auto *VT = dyn_cast<VectorType>(Ty))
     return VT->getNumElements();
   return Ty->getStructNumElements();
 }
@@ -858,6 +857,59 @@ static bool rangeOnlyContains(ItTy Start, ItTy End, EltTy Elt) {
   return true;
 }
 
+template <typename SequentialTy, typename ElementTy>
+static Constant *getIntSequenceIfElementsMatch(ArrayRef<Constant *> V) {
+  assert(!V.empty() && "Cannot get empty int sequence.");
+
+  SmallVector<ElementTy, 16> Elts;
+  for (Constant *C : V)
+    if (auto *CI = dyn_cast<ConstantInt>(C))
+      Elts.push_back(CI->getZExtValue());
+    else
+      return nullptr;
+  return SequentialTy::get(V[0]->getContext(), Elts);
+}
+
+template <typename SequentialTy, typename ElementTy>
+static Constant *getFPSequenceIfElementsMatch(ArrayRef<Constant *> V) {
+  assert(!V.empty() && "Cannot get empty FP sequence.");
+
+  SmallVector<ElementTy, 16> Elts;
+  for (Constant *C : V)
+    if (auto *CFP = dyn_cast<ConstantFP>(C))
+      Elts.push_back(CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+    else
+      return nullptr;
+  return SequentialTy::getFP(V[0]->getContext(), Elts);
+}
+
+template <typename SequenceTy>
+static Constant *getSequenceIfElementsMatch(Constant *C,
+                                            ArrayRef<Constant *> V) {
+  // We speculatively build the elements here even if it turns out that there is
+  // a constantexpr or something else weird, since it is so uncommon for that to
+  // happen.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    if (CI->getType()->isIntegerTy(8))
+      return getIntSequenceIfElementsMatch<SequenceTy, uint8_t>(V);
+    else if (CI->getType()->isIntegerTy(16))
+      return getIntSequenceIfElementsMatch<SequenceTy, uint16_t>(V);
+    else if (CI->getType()->isIntegerTy(32))
+      return getIntSequenceIfElementsMatch<SequenceTy, uint32_t>(V);
+    else if (CI->getType()->isIntegerTy(64))
+      return getIntSequenceIfElementsMatch<SequenceTy, uint64_t>(V);
+  } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isHalfTy())
+      return getFPSequenceIfElementsMatch<SequenceTy, uint16_t>(V);
+    else if (CFP->getType()->isFloatTy())
+      return getFPSequenceIfElementsMatch<SequenceTy, uint32_t>(V);
+    else if (CFP->getType()->isDoubleTy())
+      return getFPSequenceIfElementsMatch<SequenceTy, uint64_t>(V);
+  }
+
+  return nullptr;
+}
+
 ConstantArray::ConstantArray(ArrayType *T, ArrayRef<Constant *> V)
   : Constant(T, ConstantArrayVal,
              OperandTraits<ConstantArray>::op_end(this) - V.size(),
@@ -875,6 +927,7 @@ Constant *ConstantArray::get(ArrayType *Ty, ArrayRef<Constant*> V) {
     return C;
   return Ty->getContext().pImpl->ArrayConstants.getOrCreate(Ty, V);
 }
+
 Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
   // Empty arrays are canonicalized to ConstantAggregateZero.
   if (V.empty())
@@ -897,74 +950,8 @@ Constant *ConstantArray::getImpl(ArrayType *Ty, ArrayRef<Constant*> V) {
 
   // Check to see if all of the elements are ConstantFP or ConstantInt and if
   // the element type is compatible with ConstantDataVector.  If so, use it.
-  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
-    // We speculatively build the elements here even if it turns out that there
-    // is a constantexpr or something else weird in the array, since it is so
-    // uncommon for that to happen.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (CI->getType()->isIntegerTy(8)) {
-        SmallVector<uint8_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(16)) {
-        SmallVector<uint16_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(32)) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(64)) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::get(C->getContext(), Elts);
-      }
-    }
-
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-      if (CFP->getType()->isFloatTy()) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(
-                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::getFP(C->getContext(), Elts);
-      } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(
-                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataArray::getFP(C->getContext(), Elts);
-      }
-    }
-  }
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType()))
+    return getSequenceIfElementsMatch<ConstantDataArray>(C, V);
 
   // Otherwise, we really do want to create a ConstantArray.
   return nullptr;
@@ -1060,6 +1047,7 @@ Constant *ConstantVector::get(ArrayRef<Constant*> V) {
   VectorType *Ty = VectorType::get(V.front()->getType(), V.size());
   return Ty->getContext().pImpl->VectorConstants.getOrCreate(Ty, V);
 }
+
 Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
   assert(!V.empty() && "Vectors can't be empty");
   VectorType *T = VectorType::get(V.front()->getType(), V.size());
@@ -1085,74 +1073,8 @@ Constant *ConstantVector::getImpl(ArrayRef<Constant*> V) {
 
   // Check to see if all of the elements are ConstantFP or ConstantInt and if
   // the element type is compatible with ConstantDataVector.  If so, use it.
-  if (ConstantDataSequential::isElementTypeCompatible(C->getType())) {
-    // We speculatively build the elements here even if it turns out that there
-    // is a constantexpr or something else weird in the array, since it is so
-    // uncommon for that to happen.
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
-      if (CI->getType()->isIntegerTy(8)) {
-        SmallVector<uint8_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(16)) {
-        SmallVector<uint16_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(32)) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      } else if (CI->getType()->isIntegerTy(64)) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(V[i]))
-            Elts.push_back(CI->getZExtValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::get(C->getContext(), Elts);
-      }
-    }
-
-    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
-      if (CFP->getType()->isFloatTy()) {
-        SmallVector<uint32_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(
-                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::getFP(C->getContext(), Elts);
-      } else if (CFP->getType()->isDoubleTy()) {
-        SmallVector<uint64_t, 16> Elts;
-        for (unsigned i = 0, e = V.size(); i != e; ++i)
-          if (ConstantFP *CFP = dyn_cast<ConstantFP>(V[i]))
-            Elts.push_back(
-                CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
-          else
-            break;
-        if (Elts.size() == V.size())
-          return ConstantDataVector::getFP(C->getContext(), Elts);
-      }
-    }
-  }
+  if (ConstantDataSequential::isElementTypeCompatible(C->getType()))
+    return getSequenceIfElementsMatch<ConstantDataVector>(C, V);
 
   // Otherwise, the element type isn't compatible with ConstantDataVector, or
   // the operand list constants a ConstantExpr or something else strange.
@@ -1170,6 +1092,17 @@ Constant *ConstantVector::getSplat(unsigned NumElts, Constant *V) {
   return get(Elts);
 }
 
+ConstantTokenNone *ConstantTokenNone::get(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheNoneToken)
+    pImpl->TheNoneToken.reset(new ConstantTokenNone(Context));
+  return pImpl->TheNoneToken.get();
+}
+
+/// Remove the constant from the constant table.
+void ConstantTokenNone::destroyConstantImpl() {
+  llvm_unreachable("You can't ConstantTokenNone->destroyConstantImpl()!");
+}
 
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
@@ -1221,8 +1154,7 @@ ArrayRef<unsigned> ConstantExpr::getIndices() const {
 }
 
 unsigned ConstantExpr::getPredicate() const {
-  assert(isCompare());
-  return ((const CompareConstantExpr*)this)->predicate;
+  return cast<CompareConstantExpr>(this)->predicate;
 }
 
 /// getWithOperandReplaced - Return a constant expression identical to this
@@ -1245,7 +1177,7 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 /// operands replaced with the specified values.  The specified array must
 /// have the same number of operands as our current one.
 Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
-                                        bool OnlyIfReduced) const {
+                                        bool OnlyIfReduced, Type *SrcTy) const {
   assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
 
   // If no operands changed return self.
@@ -1283,10 +1215,13 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
   case Instruction::ShuffleVector:
     return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2],
                                           OnlyIfReducedTy);
-  case Instruction::GetElementPtr:
-    return ConstantExpr::getGetElementPtr(nullptr, Ops[0], Ops.slice(1),
-                                          cast<GEPOperator>(this)->isInBounds(),
-                                          OnlyIfReducedTy);
+  case Instruction::GetElementPtr: {
+    auto *GEPO = cast<GEPOperator>(this);
+    assert(SrcTy || (Ops[0]->getType() == getOperand(0)->getType()));
+    return ConstantExpr::getGetElementPtr(
+        SrcTy ? SrcTy : GEPO->getSourceElementType(), Ops[0], Ops.slice(1),
+        GEPO->isInBounds(), OnlyIfReducedTy);
+  }
   case Instruction::ICmp:
   case Instruction::FCmp:
     return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1],
@@ -2430,9 +2365,9 @@ StringRef ConstantDataSequential::getRawDataValues() const {
 /// formed with a vector or array of the specified element type.
 /// ConstantDataArray only works with normal float and int types that are
 /// stored densely in memory, not with things like i42 or x86_f80.
-bool ConstantDataSequential::isElementTypeCompatible(const Type *Ty) {
-  if (Ty->isFloatTy() || Ty->isDoubleTy()) return true;
-  if (const IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) {
+  if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true;
+  if (auto *IT = dyn_cast<IntegerType>(Ty)) {
     switch (IT->getBitWidth()) {
     case 8:
     case 16:
@@ -2587,7 +2522,7 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef<double> Elts) {
 /// object.
 Constant *ConstantDataArray::getFP(LLVMContext &Context,
                                    ArrayRef<uint16_t> Elts) {
-  Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size());
+  Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size());
   const char *Data = reinterpret_cast<const char *>(Elts.data());
   return getImpl(StringRef(const_cast<char *>(Data), Elts.size() * 2), Ty);
 }
@@ -2703,6 +2638,11 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) {
   }
 
   if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isHalfTy()) {
+      SmallVector<uint16_t, 16> Elts(
+          NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
+      return getFP(V->getContext(), Elts);
+    }
     if (CFP->getType()->isFloatTy()) {
       SmallVector<uint32_t, 16> Elts(
           NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue());
@@ -2748,6 +2688,10 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
   switch (getElementType()->getTypeID()) {
   default:
     llvm_unreachable("Accessor can only be used when element is float/double!");
+  case Type::HalfTyID: {
+    auto EltVal = *reinterpret_cast<const uint16_t *>(EltPtr);
+    return APFloat(APFloat::IEEEhalf, APInt(16, EltVal));
+  }
   case Type::FloatTyID: {
     auto EltVal = *reinterpret_cast<const uint32_t *>(EltPtr);
     return APFloat(APFloat::IEEEsingle, APInt(32, EltVal));
@@ -2782,7 +2726,8 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const {
 /// Note that this has to compute a new constant to return, so it isn't as
 /// efficient as getElementAsInteger/Float/Double.
 Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const {
-  if (getElementType()->isFloatTy() || getElementType()->isDoubleTy())
+  if (getElementType()->isHalfTy() || getElementType()->isFloatTy() ||
+      getElementType()->isDoubleTy())
     return ConstantFP::get(getContext(), getElementAsAPFloat(Elt));
 
   return ConstantInt::get(getElementType(), getElementAsInteger(Elt));
@@ -2872,6 +2817,11 @@ Value *ConstantFP::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   llvm_unreachable("Unsupported class for handleOperandChange()!");
 }
 
+Value *ConstantTokenNone::handleOperandChangeImpl(Value *From, Value *To,
+                                                  Use *U) {
+  llvm_unreachable("Unsupported class for handleOperandChange()!");
+}
+
 Value *UndefValue::handleOperandChangeImpl(Value *From, Value *To, Use *U) {
   llvm_unreachable("Unsupported class for handleOperandChange()!");
 }
@@ -3070,7 +3020,7 @@ Instruction *ConstantExpr::getAsInstruction() {
   case Instruction::ICmp:
   case Instruction::FCmp:
     return CmpInst::Create((Instruction::OtherOps)getOpcode(),
-                           getPredicate(), Ops[0], Ops[1]);
+                           (CmpInst::Predicate)getPredicate(), Ops[0], Ops[1]);
 
   default:
     assert(getNumOperands() == 2 && "Must be binary operator?");
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index f3ddcd78d265..13fcbd2ece10 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -179,6 +179,13 @@ public:
 
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  static bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::ExtractValue;
+  }
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+  }
 };
 
 /// InsertValueConstantExpr - This class is private to
@@ -205,6 +212,13 @@ public:
 
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  static bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::InsertValue;
+  }
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+  }
 };
 
 /// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
@@ -235,6 +249,13 @@ public:
   Type *getSourceElementType() const;
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  static bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::GetElementPtr;
+  }
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+  }
 };
 
 // CompareConstantExpr - This class is private to Constants.cpp, and is used
@@ -257,6 +278,14 @@ public:
   }
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  static bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::ICmp ||
+           CE->getOpcode() == Instruction::FCmp;
+  }
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
+  }
 };
 
 template <>
@@ -373,41 +402,45 @@ template <class ConstantClass> struct ConstantAggrKeyType {
 struct InlineAsmKeyType {
   StringRef AsmString;
   StringRef Constraints;
+  FunctionType *FTy;
   bool HasSideEffects;
   bool IsAlignStack;
   InlineAsm::AsmDialect AsmDialect;
 
   InlineAsmKeyType(StringRef AsmString, StringRef Constraints,
-                   bool HasSideEffects, bool IsAlignStack,
+                   FunctionType *FTy, bool HasSideEffects, bool IsAlignStack,
                    InlineAsm::AsmDialect AsmDialect)
-      : AsmString(AsmString), Constraints(Constraints),
+      : AsmString(AsmString), Constraints(Constraints), FTy(FTy),
         HasSideEffects(HasSideEffects), IsAlignStack(IsAlignStack),
         AsmDialect(AsmDialect) {}
   InlineAsmKeyType(const InlineAsm *Asm, SmallVectorImpl<Constant *> &)
       : AsmString(Asm->getAsmString()), Constraints(Asm->getConstraintString()),
-        HasSideEffects(Asm->hasSideEffects()),
+        FTy(Asm->getFunctionType()), HasSideEffects(Asm->hasSideEffects()),
         IsAlignStack(Asm->isAlignStack()), AsmDialect(Asm->getDialect()) {}
 
   bool operator==(const InlineAsmKeyType &X) const {
     return HasSideEffects == X.HasSideEffects &&
            IsAlignStack == X.IsAlignStack && AsmDialect == X.AsmDialect &&
-           AsmString == X.AsmString && Constraints == X.Constraints;
+           AsmString == X.AsmString && Constraints == X.Constraints &&
+           FTy == X.FTy;
   }
   bool operator==(const InlineAsm *Asm) const {
     return HasSideEffects == Asm->hasSideEffects() &&
            IsAlignStack == Asm->isAlignStack() &&
            AsmDialect == Asm->getDialect() &&
            AsmString == Asm->getAsmString() &&
-           Constraints == Asm->getConstraintString();
+           Constraints == Asm->getConstraintString() &&
+           FTy == Asm->getFunctionType();
   }
   unsigned getHash() const {
     return hash_combine(AsmString, Constraints, HasSideEffects, IsAlignStack,
-                        AsmDialect);
+                        AsmDialect, FTy);
   }
 
   typedef ConstantInfo<InlineAsm>::TypeClass TypeClass;
   InlineAsm *create(TypeClass *Ty) const {
-    return new InlineAsm(Ty, AsmString, Constraints, HasSideEffects,
+    assert(PointerType::getUnqual(FTy) == Ty);
+    return new InlineAsm(FTy, AsmString, Constraints, HasSideEffects,
                          IsAlignStack, AsmDialect);
   }
 };
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 0eb88a967575..7f39c8085a69 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -262,6 +262,8 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMVectorTypeKind;
   case Type::X86_MMXTyID:
     return LLVMX86_MMXTypeKind;
+  case Type::TokenTyID:
+    return LLVMTokenTypeKind;
   }
   llvm_unreachable("Unhandled TypeID.");
 }
@@ -366,6 +368,9 @@ LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
 LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
   return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
 }
+LLVMTypeRef LLVMTokenTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getTokenTy(*unwrap(C));
+}
 
 LLVMTypeRef LLVMHalfType(void) {
   return LLVMHalfTypeInContext(LLVMGetGlobalContext());
@@ -1528,7 +1533,7 @@ LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
   Module::global_iterator I = Mod->global_begin();
   if (I == Mod->global_end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
@@ -1536,23 +1541,23 @@ LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
   Module::global_iterator I = Mod->global_end();
   if (I == Mod->global_begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
   GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
-  Module::global_iterator I = GV;
+  Module::global_iterator I(GV);
   if (++I == GV->getParent()->global_end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
   GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
-  Module::global_iterator I = GV;
+  Module::global_iterator I(GV);
   if (I == GV->getParent()->global_begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
@@ -1639,7 +1644,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
                           const char *Name) {
   auto *PTy = cast<PointerType>(unwrap(Ty));
-  return wrap(GlobalAlias::create(PTy, GlobalValue::ExternalLinkage, Name,
+  return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+                                  GlobalValue::ExternalLinkage, Name,
                                   unwrap<Constant>(Aliasee), unwrap(M)));
 }
 
@@ -1660,7 +1666,7 @@ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
   Module::iterator I = Mod->begin();
   if (I == Mod->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
@@ -1668,23 +1674,23 @@ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
   Module::iterator I = Mod->end();
   if (I == Mod->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
   Function *Func = unwrap<Function>(Fn);
-  Module::iterator I = Func;
+  Module::iterator I(Func);
   if (++I == Func->getParent()->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
   Function *Func = unwrap<Function>(Fn);
-  Module::iterator I = Func;
+  Module::iterator I(Func);
   if (I == Func->getParent()->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 void LLVMDeleteFunction(LLVMValueRef Fn) {
@@ -1779,14 +1785,14 @@ void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
   Function *Fn = unwrap<Function>(FnRef);
   for (Function::arg_iterator I = Fn->arg_begin(),
                               E = Fn->arg_end(); I != E; I++)
-    *ParamRefs++ = wrap(I);
+    *ParamRefs++ = wrap(&*I);
 }
 
 LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
   Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
   while (index --> 0)
     AI++;
-  return wrap(AI);
+  return wrap(&*AI);
 }
 
 LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
@@ -1798,7 +1804,7 @@ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
   Function::arg_iterator I = Func->arg_begin();
   if (I == Func->arg_end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
@@ -1806,23 +1812,23 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
   Function::arg_iterator I = Func->arg_end();
   if (I == Func->arg_begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
   Argument *A = unwrap<Argument>(Arg);
-  Function::arg_iterator I = A;
+  Function::arg_iterator I(A);
   if (++I == A->getParent()->arg_end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
   Argument *A = unwrap<Argument>(Arg);
-  Function::arg_iterator I = A;
+  Function::arg_iterator I(A);
   if (I == A->getParent()->arg_begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
@@ -1880,7 +1886,7 @@ unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
 void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
   Function *Fn = unwrap<Function>(FnRef);
   for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
-    *BasicBlocksRefs++ = wrap(I);
+    *BasicBlocksRefs++ = wrap(&*I);
 }
 
 LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
@@ -1892,7 +1898,7 @@ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
   Function::iterator I = Func->begin();
   if (I == Func->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
@@ -1900,23 +1906,23 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
   Function::iterator I = Func->end();
   if (I == Func->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
   BasicBlock *Block = unwrap(BB);
-  Function::iterator I = Block;
+  Function::iterator I(Block);
   if (++I == Block->getParent()->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
   BasicBlock *Block = unwrap(BB);
-  Function::iterator I = Block;
+  Function::iterator I(Block);
   if (I == Block->getParent()->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
@@ -1968,7 +1974,7 @@ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
   BasicBlock::iterator I = Block->begin();
   if (I == Block->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
@@ -1976,23 +1982,23 @@ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
   BasicBlock::iterator I = Block->end();
   if (I == Block->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
   Instruction *Instr = unwrap<Instruction>(Inst);
-  BasicBlock::iterator I = Instr;
+  BasicBlock::iterator I(Instr);
   if (++I == Instr->getParent()->end())
     return nullptr;
-  return wrap(I);
+  return wrap(&*I);
 }
 
 LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
   Instruction *Instr = unwrap<Instruction>(Inst);
-  BasicBlock::iterator I = Instr;
+  BasicBlock::iterator I(Instr);
   if (I == Instr->getParent()->begin())
     return nullptr;
-  return wrap(--I);
+  return wrap(&*--I);
 }
 
 void LLVMInstructionEraseFromParent(LLVMValueRef Inst) {
@@ -2160,12 +2166,12 @@ void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
                          LLVMValueRef Instr) {
   BasicBlock *BB = unwrap(Block);
   Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
-  unwrap(Builder)->SetInsertPoint(BB, I);
+  unwrap(Builder)->SetInsertPoint(BB, I->getIterator());
 }
 
 void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
   Instruction *I = unwrap<Instruction>(Instr);
-  unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+  unwrap(Builder)->SetInsertPoint(I->getParent(), I->getIterator());
 }
 
 void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
@@ -2489,7 +2495,6 @@ LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
      CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
 }
 
-
 LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
                            const char *Name) {
   return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
@@ -2515,6 +2520,21 @@ static AtomicOrdering mapFromLLVMOrdering(LLVMAtomicOrdering Ordering) {
   llvm_unreachable("Invalid LLVMAtomicOrdering value!");
 }
 
+static LLVMAtomicOrdering mapToLLVMOrdering(AtomicOrdering Ordering) {
+  switch (Ordering) {
+    case NotAtomic: return LLVMAtomicOrderingNotAtomic;
+    case Unordered: return LLVMAtomicOrderingUnordered;
+    case Monotonic: return LLVMAtomicOrderingMonotonic;
+    case Acquire: return LLVMAtomicOrderingAcquire;
+    case Release: return LLVMAtomicOrderingRelease;
+    case AcquireRelease: return LLVMAtomicOrderingAcquireRelease;
+    case SequentiallyConsistent:
+      return LLVMAtomicOrderingSequentiallyConsistent;
+  }
+
+  llvm_unreachable("Invalid AtomicOrdering value!");
+}
+
 LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering Ordering,
                             LLVMBool isSingleThread, const char *Name) {
   return wrap(
@@ -2567,6 +2587,25 @@ void LLVMSetVolatile(LLVMValueRef MemAccessInst, LLVMBool isVolatile) {
   return cast<StoreInst>(P)->setVolatile(isVolatile);
 }
 
+LLVMAtomicOrdering LLVMGetOrdering(LLVMValueRef MemAccessInst) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  AtomicOrdering O;
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    O = LI->getOrdering();
+  else
+    O = cast<StoreInst>(P)->getOrdering();
+  return mapToLLVMOrdering(O);
+}
+
+void LLVMSetOrdering(LLVMValueRef MemAccessInst, LLVMAtomicOrdering Ordering) {
+  Value *P = unwrap<Value>(MemAccessInst);
+  AtomicOrdering O = mapFromLLVMOrdering(Ordering);
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(P))
+    return LI->setOrdering(O);
+  return cast<StoreInst>(P)->setOrdering(O);
+}
+
 /*--.. Casts ...............................................................--*/
 
 LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 2a90e70af1a3..b7841fe2b85c 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -148,7 +148,7 @@ DICompileUnit *DIBuilder::createCompileUnit(
   CUNode = DICompileUnit::getDistinct(
       VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer,
       isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr,
-      nullptr, nullptr, nullptr, nullptr, DWOId);
+      nullptr, nullptr, nullptr, nullptr, nullptr, DWOId);
 
   // Create a named metadata so that it is easier to find cu in a module.
   // Note that we only generate this when the caller wants to actually
@@ -255,10 +255,12 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
                             DITypeRef::get(Base));
 }
 
-DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy) {
+DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy,
+                                              uint64_t SizeInBits,
+                                              uint64_t AlignInBits) {
   assert(RTy && "Unable to create reference type");
   return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr,
-                            DITypeRef::get(RTy), 0, 0, 0, 0);
+                            DITypeRef::get(RTy), SizeInBits, AlignInBits, 0, 0);
 }
 
 DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
@@ -429,12 +431,23 @@ DICompositeType *DIBuilder::createUnionType(
   return R;
 }
 
-DISubroutineType *DIBuilder::createSubroutineType(DIFile *File,
-                                                  DITypeRefArray ParameterTypes,
+DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
                                                   unsigned Flags) {
   return DISubroutineType::get(VMContext, Flags, ParameterTypes);
 }
 
+DICompositeType *DIBuilder::createExternalTypeRef(unsigned Tag, DIFile *File,
+                                                  StringRef UniqueIdentifier) {
+  assert(!UniqueIdentifier.empty() && "external type ref without uid");
+  auto *CTy =
+      DICompositeType::get(VMContext, Tag, "", nullptr, 0, nullptr, nullptr, 0,
+                           0, 0, DINode::FlagExternalTypeRef, nullptr, 0,
+                           nullptr, nullptr, UniqueIdentifier);
+  // Types with unique IDs need to be in the type map.
+  retainType(CTy);
+  return CTy;
+}
+
 DICompositeType *DIBuilder::createEnumerationType(
     DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
     uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements,
@@ -590,18 +603,20 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
       .release();
 }
 
-DILocalVariable *DIBuilder::createLocalVariable(
-    unsigned Tag, DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
-    DIType *Ty, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) {
+static DILocalVariable *createLocalVariable(
+    LLVMContext &VMContext,
+    DenseMap<MDNode *, std::vector<TrackingMDNodeRef>> &PreservedVariables,
+    DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
+    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
   // FIXME: Why getNonCompileUnitScope()?
   // FIXME: Why is "!Context" okay here?
   // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
   // the only valid scopes)?
   DIScope *Context = getNonCompileUnitScope(Scope);
 
-  auto *Node = DILocalVariable::get(
-      VMContext, Tag, cast_or_null<DILocalScope>(Context), Name, File, LineNo,
-      DITypeRef::get(Ty), ArgNo, Flags);
+  auto *Node =
+      DILocalVariable::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
+                           File, LineNo, DITypeRef::get(Ty), ArgNo, Flags);
   if (AlwaysPreserve) {
     // The optimizer may remove local variables. If there is an interest
     // to preserve variable info in such situation then stash it in a
@@ -613,6 +628,23 @@ DILocalVariable *DIBuilder::createLocalVariable(
   return Node;
 }
 
+DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
+                                               DIFile *File, unsigned LineNo,
+                                               DIType *Ty, bool AlwaysPreserve,
+                                               unsigned Flags) {
+  return createLocalVariable(VMContext, PreservedVariables, Scope, Name,
+                             /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve,
+                             Flags);
+}
+
+DILocalVariable *DIBuilder::createParameterVariable(
+    DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
+    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
+  assert(ArgNo && "Expected non-zero argument number for parameter");
+  return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
+                             File, LineNo, Ty, AlwaysPreserve, Flags);
+}
+
 DIExpression *DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
   return DIExpression::get(VMContext, Addr);
 }
@@ -629,36 +661,37 @@ DIExpression *DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
   return DIExpression::get(VMContext, Addr);
 }
 
-DISubprogram *DIBuilder::createFunction(DIScopeRef Context, StringRef Name,
-                                        StringRef LinkageName, DIFile *File,
-                                        unsigned LineNo, DISubroutineType *Ty,
-                                        bool isLocalToUnit, bool isDefinition,
-                                        unsigned ScopeLine, unsigned Flags,
-                                        bool isOptimized, Function *Fn,
-                                        MDNode *TParams, MDNode *Decl) {
+DISubprogram *DIBuilder::createFunction(
+    DIScopeRef Context, StringRef Name, StringRef LinkageName, DIFile *File,
+    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+    bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
+    DITemplateParameterArray TParams, DISubprogram *Decl) {
   // dragonegg does not generate identifier for types, so using an empty map
   // to resolve the context should be fine.
   DITypeIdentifierMap EmptyMap;
   return createFunction(Context.resolve(EmptyMap), Name, LinkageName, File,
                         LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
-                        Flags, isOptimized, Fn, TParams, Decl);
+                        Flags, isOptimized, TParams, Decl);
 }
 
-DISubprogram *DIBuilder::createFunction(DIScope *Context, StringRef Name,
-                                        StringRef LinkageName, DIFile *File,
-                                        unsigned LineNo, DISubroutineType *Ty,
-                                        bool isLocalToUnit, bool isDefinition,
-                                        unsigned ScopeLine, unsigned Flags,
-                                        bool isOptimized, Function *Fn,
-                                        MDNode *TParams, MDNode *Decl) {
-  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
-         "function types should be subroutines");
-  auto *Node = DISubprogram::get(
-      VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
-      LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine,
-      nullptr, 0, 0, Flags, isOptimized, Fn, cast_or_null<MDTuple>(TParams),
-      cast_or_null<DISubprogram>(Decl),
-      MDTuple::getTemporary(VMContext, None).release());
+template <class... Ts>
+static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
+  if (IsDistinct)
+    return DISubprogram::getDistinct(std::forward<Ts>(Args)...);
+  return DISubprogram::get(std::forward<Ts>(Args)...);
+}
+
+DISubprogram *DIBuilder::createFunction(
+    DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
+    unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+    bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
+    DITemplateParameterArray TParams, DISubprogram *Decl) {
+  auto *Node =
+      getSubprogram(/* IsDistinct = */ isDefinition, VMContext,
+                    DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
+                    LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition,
+                    ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl,
+                    MDTuple::getTemporary(VMContext, None).release());
 
   if (isDefinition)
     AllSubprograms.push_back(Node);
@@ -670,12 +703,11 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
     unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
     bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
-    Function *Fn, MDNode *TParams, MDNode *Decl) {
+    DITemplateParameterArray TParams, DISubprogram *Decl) {
   return DISubprogram::getTemporary(
              VMContext, DIScopeRef::get(getNonCompileUnitScope(Context)), Name,
              LinkageName, File, LineNo, Ty, isLocalToUnit, isDefinition,
-             ScopeLine, nullptr, 0, 0, Flags, isOptimized, Fn,
-             cast_or_null<MDTuple>(TParams), cast_or_null<DISubprogram>(Decl),
+             ScopeLine, nullptr, 0, 0, Flags, isOptimized, TParams, Decl,
              nullptr)
       .release();
 }
@@ -685,18 +717,16 @@ DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName,
                         DIFile *F, unsigned LineNo, DISubroutineType *Ty,
                         bool isLocalToUnit, bool isDefinition, unsigned VK,
                         unsigned VIndex, DIType *VTableHolder, unsigned Flags,
-                        bool isOptimized, Function *Fn, MDNode *TParam) {
-  assert(Ty->getTag() == dwarf::DW_TAG_subroutine_type &&
-         "function types should be subroutines");
+                        bool isOptimized, DITemplateParameterArray TParams) {
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
   // FIXME: Do we want to use different scope/lines?
-  auto *SP = DISubprogram::get(
-      VMContext, DIScopeRef::get(cast<DIScope>(Context)), Name, LinkageName, F,
-      LineNo, Ty, isLocalToUnit, isDefinition, LineNo,
-      DITypeRef::get(VTableHolder), VK, VIndex, Flags, isOptimized, Fn,
-      cast_or_null<MDTuple>(TParam), nullptr, nullptr);
+  auto *SP = getSubprogram(
+      /* IsDistinct = */ isDefinition, VMContext,
+      DIScopeRef::get(cast<DIScope>(Context)), Name, LinkageName, F, LineNo, Ty,
+      isLocalToUnit, isDefinition, LineNo, DITypeRef::get(VTableHolder), VK,
+      VIndex, Flags, isOptimized, TParams, nullptr, nullptr);
 
   if (isDefinition)
     AllSubprograms.push_back(SP);
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 4d867efe1b3d..5468f47bbfe6 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -41,6 +41,7 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
   assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
+  IsPadded = false;
   NumElements = ST->getNumElements();
 
   // Loop over each of the elements, placing them in memory.
@@ -49,8 +50,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
     unsigned TyAlign = ST->isPacked() ? 1 : DL.getABITypeAlignment(Ty);
 
     // Add padding if necessary to align the data element properly.
-    if ((StructSize & (TyAlign-1)) != 0)
+    if ((StructSize & (TyAlign-1)) != 0) {
+      IsPadded = true;
       StructSize = RoundUpToAlignment(StructSize, TyAlign);
+    }
 
     // Keep track of maximum alignment constraint.
     StructAlignment = std::max(TyAlign, StructAlignment);
@@ -64,8 +67,10 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
 
   // Add padding to the end of the struct so that it could be put in an array
   // and all array elements would be aligned correctly.
-  if ((StructSize & (StructAlignment-1)) != 0)
+  if ((StructSize & (StructAlignment-1)) != 0) {
+    IsPadded = true;
     StructSize = RoundUpToAlignment(StructSize, StructAlignment);
+  }
 }
 
 
@@ -461,8 +466,8 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
       return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
 
     // The best match so far depends on what we're looking for.
-     if (AlignType == INTEGER_ALIGN &&
-         Alignments[i].AlignType == INTEGER_ALIGN) {
+    if (AlignType == INTEGER_ALIGN &&
+        Alignments[i].AlignType == INTEGER_ALIGN) {
       // The "best match" for integers is the smallest size that is larger than
       // the BitWidth requested.
       if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index 9646d1aa4d76..a2443becdd00 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -56,21 +56,6 @@ DISubprogram *llvm::getDISubprogram(const Function *F) {
   return nullptr;
 }
 
-DICompositeTypeBase *llvm::getDICompositeType(DIType *T) {
-  if (auto *C = dyn_cast_or_null<DICompositeTypeBase>(T))
-    return C;
-
-  if (auto *D = dyn_cast_or_null<DIDerivedTypeBase>(T)) {
-    // This function is currently used by dragonegg and dragonegg does
-    // not generate identifier for types, so using an empty map to resolve
-    // DerivedFrom should be fine.
-    DITypeIdentifierMap EmptyMap;
-    return getDICompositeType(D->getBaseType().resolve(EmptyMap));
-  }
-
-  return nullptr;
-}
-
 DITypeIdentifierMap
 llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) {
   DITypeIdentifierMap Map;
@@ -164,20 +149,22 @@ void DebugInfoFinder::processType(DIType *DT) {
   if (!addType(DT))
     return;
   processScope(DT->getScope().resolve(TypeIdentifierMap));
-  if (auto *DCT = dyn_cast<DICompositeTypeBase>(DT)) {
+  if (auto *ST = dyn_cast<DISubroutineType>(DT)) {
+    for (DITypeRef Ref : ST->getTypeArray())
+      processType(Ref.resolve(TypeIdentifierMap));
+    return;
+  }
+  if (auto *DCT = dyn_cast<DICompositeType>(DT)) {
     processType(DCT->getBaseType().resolve(TypeIdentifierMap));
-    if (auto *ST = dyn_cast<DISubroutineType>(DCT)) {
-      for (DITypeRef Ref : ST->getTypeArray())
-        processType(Ref.resolve(TypeIdentifierMap));
-      return;
-    }
     for (Metadata *D : DCT->getElements()) {
       if (auto *T = dyn_cast<DIType>(D))
         processType(T);
       else if (auto *SP = dyn_cast<DISubprogram>(D))
         processSubprogram(SP);
     }
-  } else if (auto *DDT = dyn_cast<DIDerivedTypeBase>(DT)) {
+    return;
+  }
+  if (auto *DDT = dyn_cast<DIDerivedType>(DT)) {
     processType(DDT->getBaseType().resolve(TypeIdentifierMap));
   }
 }
@@ -313,6 +300,10 @@ bool DebugInfoFinder::addScope(DIScope *Scope) {
 
 bool llvm::stripDebugInfo(Function &F) {
   bool Changed = false;
+  if (F.getSubprogram()) {
+    Changed = true;
+    F.setSubprogram(nullptr);
+  }
   for (BasicBlock &BB : F) {
     for (Instruction &I : BB) {
       if (I.getDebugLoc()) {
@@ -349,7 +340,7 @@ bool llvm::StripDebugInfo(Module &M) {
 
   for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
          NME = M.named_metadata_end(); NMI != NME;) {
-    NamedMDNode *NMD = NMI;
+    NamedMDNode *NMD = &*NMI;
     ++NMI;
     if (NMD->getName().startswith("llvm.dbg.")) {
       NMD->eraseFromParent();
@@ -372,21 +363,3 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
     return Val->getZExtValue();
   return 0;
 }
-
-DenseMap<const llvm::Function *, DISubprogram *>
-llvm::makeSubprogramMap(const Module &M) {
-  DenseMap<const Function *, DISubprogram *> R;
-
-  NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu");
-  if (!CU_Nodes)
-    return R;
-
-  for (MDNode *N : CU_Nodes->operands()) {
-    auto *CUNode = cast<DICompileUnit>(N);
-    for (auto *SP : CUNode->getSubprograms()) {
-      if (Function *F = SP->getFunction())
-        R.insert(std::make_pair(F, SP));
-    }
-  }
-  return R;
-}
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 5e017488c1fb..58e0abdd577c 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -295,8 +295,7 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context,
                                             StorageType Storage,
                                             bool ShouldCreate) {
   DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, TypeArray));
-  Metadata *Ops[] = {nullptr,   nullptr, nullptr, nullptr,
-                     TypeArray, nullptr, nullptr, nullptr};
+  Metadata *Ops[] = {nullptr, nullptr, nullptr, TypeArray};
   DEFINE_GETIMPL_STORE(DISubroutineType, (Flags), Ops);
 }
 
@@ -316,22 +315,20 @@ DICompileUnit *DICompileUnit::getImpl(
     unsigned RuntimeVersion, MDString *SplitDebugFilename,
     unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
     Metadata *Subprograms, Metadata *GlobalVariables,
-    Metadata *ImportedEntities, uint64_t DWOId,
+    Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
     StorageType Storage, bool ShouldCreate) {
+  assert(Storage != Uniqued && "Cannot unique DICompileUnit");
   assert(isCanonical(Producer) && "Expected canonical MDString");
   assert(isCanonical(Flags) && "Expected canonical MDString");
   assert(isCanonical(SplitDebugFilename) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(
-      DICompileUnit,
-      (SourceLanguage, File, getString(Producer), IsOptimized, getString(Flags),
-       RuntimeVersion, getString(SplitDebugFilename), EmissionKind, EnumTypes,
-       RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, DWOId));
+
   Metadata *Ops[] = {File, Producer, Flags, SplitDebugFilename, EnumTypes,
                      RetainedTypes, Subprograms, GlobalVariables,
-                     ImportedEntities};
-  DEFINE_GETIMPL_STORE(
-      DICompileUnit,
-      (SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId), Ops);
+                     ImportedEntities, Macros};
+  return storeImpl(new (ArrayRef<Metadata *>(Ops).size()) DICompileUnit(
+                       Context, Storage, SourceLanguage, IsOptimized,
+                       RuntimeVersion, EmissionKind, DWOId, Ops),
+                   Storage);
 }
 
 DISubprogram *DILocalScope::getSubprogram() const {
@@ -345,34 +342,28 @@ DISubprogram *DISubprogram::getImpl(
     MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
     bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
     Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-    unsigned Flags, bool IsOptimized, Metadata *Function,
-    Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
-    StorageType Storage, bool ShouldCreate) {
+    unsigned Flags, bool IsOptimized, Metadata *TemplateParams,
+    Metadata *Declaration, Metadata *Variables, StorageType Storage,
+    bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   assert(isCanonical(LinkageName) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DISubprogram,
                         (Scope, getString(Name), getString(LinkageName), File,
                          Line, Type, IsLocalToUnit, IsDefinition, ScopeLine,
                          ContainingType, Virtuality, VirtualIndex, Flags,
-                         IsOptimized, Function, TemplateParams, Declaration,
-                         Variables));
-  Metadata *Ops[] = {File,           Scope,       Name,           Name,
-                     LinkageName,    Type,        ContainingType, Function,
-                     TemplateParams, Declaration, Variables};
+                         IsOptimized, TemplateParams, Declaration, Variables));
+  Metadata *Ops[] = {File,        Scope,    Name,           Name,
+                     LinkageName, Type,     ContainingType, TemplateParams,
+                     Declaration, Variables};
   DEFINE_GETIMPL_STORE(DISubprogram,
                        (Line, ScopeLine, Virtuality, VirtualIndex, Flags,
                         IsLocalToUnit, IsDefinition, IsOptimized),
                        Ops);
 }
 
-Function *DISubprogram::getFunction() const {
-  // FIXME: Should this be looking through bitcasts?
-  return dyn_cast_or_null<Function>(getFunctionConstant());
-}
-
 bool DISubprogram::describes(const Function *F) const {
   assert(F && "Invalid function");
-  if (F == getFunction())
+  if (F->getSubprogram() == this)
     return true;
   StringRef Name = getLinkageName();
   if (Name.empty())
@@ -380,15 +371,13 @@ bool DISubprogram::describes(const Function *F) const {
   return F->getName() == Name;
 }
 
-void DISubprogram::replaceFunction(Function *F) {
-  replaceFunction(F ? ConstantAsMetadata::get(F)
-                    : static_cast<ConstantAsMetadata *>(nullptr));
-}
-
 DILexicalBlock *DILexicalBlock::getImpl(LLVMContext &Context, Metadata *Scope,
                                         Metadata *File, unsigned Line,
                                         unsigned Column, StorageType Storage,
                                         bool ShouldCreate) {
+  // Fixup column.
+  adjustColumn(Column);
+
   assert(Scope && "Expected scope");
   DEFINE_GETIMPL_LOOKUP(DILexicalBlock, (Scope, File, Line, Column));
   Metadata *Ops[] = {File, Scope};
@@ -467,21 +456,21 @@ DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                        Ops);
 }
 
-DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, unsigned Tag,
-                                          Metadata *Scope, MDString *Name,
-                                          Metadata *File, unsigned Line,
-                                          Metadata *Type, unsigned Arg,
-                                          unsigned Flags, StorageType Storage,
+DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
+                                          MDString *Name, Metadata *File,
+                                          unsigned Line, Metadata *Type,
+                                          unsigned Arg, unsigned Flags,
+                                          StorageType Storage,
                                           bool ShouldCreate) {
   // 64K ought to be enough for any frontend.
   assert(Arg <= UINT16_MAX && "Expected argument number to fit in 16-bits");
 
   assert(Scope && "Expected scope");
   assert(isCanonical(Name) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DILocalVariable, (Tag, Scope, getString(Name), File,
-                                          Line, Type, Arg, Flags));
+  DEFINE_GETIMPL_LOOKUP(DILocalVariable,
+                        (Scope, getString(Name), File, Line, Type, Arg, Flags));
   Metadata *Ops[] = {Scope, Name, File, Type};
-  DEFINE_GETIMPL_STORE(DILocalVariable, (Tag, Line, Arg, Flags), Ops);
+  DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags), Ops);
 }
 
 DIExpression *DIExpression::getImpl(LLVMContext &Context,
@@ -496,6 +485,7 @@ unsigned DIExpression::ExprOperand::getSize() const {
   case dwarf::DW_OP_bit_piece:
     return 3;
   case dwarf::DW_OP_plus:
+  case dwarf::DW_OP_minus:
     return 2;
   default:
     return 1;
@@ -516,6 +506,7 @@ bool DIExpression::isValid() const {
       // Piece expressions must be at the end.
       return I->get() + I->getSize() == E->get();
     case dwarf::DW_OP_plus:
+    case dwarf::DW_OP_minus:
     case dwarf::DW_OP_deref:
       break;
     }
@@ -566,3 +557,24 @@ DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag,
   Metadata *Ops[] = {Scope, Entity, Name};
   DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops);
 }
+
+DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType,
+                          unsigned Line, MDString *Name, MDString *Value,
+                          StorageType Storage, bool ShouldCreate) {
+  assert(isCanonical(Name) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIMacro,
+                        (MIType, Line, getString(Name), getString(Value)));
+  Metadata *Ops[] = { Name, Value };
+  DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops);
+}
+
+DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType,
+                                  unsigned Line, Metadata *File,
+                                  Metadata *Elements, StorageType Storage,
+                                  bool ShouldCreate) {
+  DEFINE_GETIMPL_LOOKUP(DIMacroFile,
+                        (MIType, Line, File, Elements));
+  Metadata *Ops[] = { File, Elements };
+  DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops);
+}
+
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index b8f77eda15a6..6426f76bbaa6 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -49,7 +49,7 @@ struct PassRemarksOpt {
                                "' in -pass-remarks: " + RegexError,
                            false);
     }
-  };
+  }
 };
 
 static PassRemarksOpt PassRemarksOptLoc;
@@ -91,6 +91,8 @@ int llvm::getNextAvailablePluginDiagnosticKind() {
   return ++PluginKindID;
 }
 
+const char *DiagnosticInfo::AlwaysPrint = "";
+
 DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I,
                                                  const Twine &MsgStr,
                                                  DiagnosticSeverity Severity)
@@ -121,9 +123,17 @@ void DiagnosticInfoDebugMetadataVersion::print(DiagnosticPrinter &DP) const {
 }
 
 void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
-  if (getFileName() && getLineNum() > 0)
-    DP << getFileName() << ":" << getLineNum() << ": ";
-  else if (getFileName())
+  if (!FileName.empty()) {
+    DP << getFileName();
+    if (LineNum > 0)
+      DP << ":" << getLineNum();
+    DP << ": ";
+  }
+  DP << getMsg();
+}
+
+void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const {
+  if (getFileName())
     DP << getFileName() << ": ";
   DP << getMsg();
 }
@@ -166,8 +176,9 @@ bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const {
 }
 
 bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const {
-  return PassRemarksAnalysisOptLoc.Pattern &&
-         PassRemarksAnalysisOptLoc.Pattern->match(getPassName());
+  return getPassName() == DiagnosticInfo::AlwaysPrint ||
+         (PassRemarksAnalysisOptLoc.Pattern &&
+          PassRemarksAnalysisOptLoc.Pattern->match(getPassName()));
 }
 
 void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
@@ -196,6 +207,24 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
       DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
 }
 
+void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
+                                                   const char *PassName,
+                                                   const Function &Fn,
+                                                   const DebugLoc &DLoc,
+                                                   const Twine &Msg) {
+  Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute(PassName, Fn,
+                                                                 DLoc, Msg));
+}
+
+void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
+                                                  const char *PassName,
+                                                  const Function &Fn,
+                                                  const DebugLoc &DLoc,
+                                                  const Twine &Msg) {
+  Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisAliasing(PassName, Fn,
+                                                                DLoc, Msg));
+}
+
 bool DiagnosticInfoOptimizationFailure::isEnabled() const {
   // Only print warnings.
   return getSeverity() == DS_Warning;
diff --git a/lib/IR/Dominators.cpp b/lib/IR/Dominators.cpp
index b6a8bbcbe5fa..b9d4fb7de881 100644
--- a/lib/IR/Dominators.cpp
+++ b/lib/IR/Dominators.cpp
@@ -91,10 +91,10 @@ bool DominatorTree::dominates(const Instruction *Def,
   if (Def == User)
     return false;
 
-  // The value defined by an invoke dominates an instruction only if
-  // it dominates every instruction in UseBB.
-  // A PHI is dominated only if the instruction dominates every possible use
-  // in the UseBB.
+  // The value defined by an invoke dominates an instruction only if it
+  // dominates every instruction in UseBB.
+  // A PHI is dominated only if the instruction dominates every possible use in
+  // the UseBB.
   if (isa<InvokeInst>(Def) || isa<PHINode>(User))
     return dominates(Def, UseBB);
 
@@ -126,15 +126,15 @@ bool DominatorTree::dominates(const Instruction *Def,
   if (DefBB == UseBB)
     return false;
 
-  const InvokeInst *II = dyn_cast<InvokeInst>(Def);
-  if (!II)
-    return dominates(DefBB, UseBB);
-
   // Invoke results are only usable in the normal destination, not in the
   // exceptional destination.
-  BasicBlock *NormalDest = II->getNormalDest();
-  BasicBlockEdge E(DefBB, NormalDest);
-  return dominates(E, UseBB);
+  if (const auto *II = dyn_cast<InvokeInst>(Def)) {
+    BasicBlock *NormalDest = II->getNormalDest();
+    BasicBlockEdge E(DefBB, NormalDest);
+    return dominates(E, UseBB);
+  }
+
+  return dominates(DefBB, UseBB);
 }
 
 bool DominatorTree::dominates(const BasicBlockEdge &BBE,
@@ -142,7 +142,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE,
   // Assert that we have a single edge. We could handle them by simply
   // returning false, but since isSingleEdge is linear on the number of
   // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge());
+  assert(BBE.isSingleEdge() &&
+         "This function is not efficient in handling multiple edges");
 
   // If the BB the edge ends in doesn't dominate the use BB, then the
   // edge also doesn't.
@@ -192,7 +193,8 @@ bool DominatorTree::dominates(const BasicBlockEdge &BBE, const Use &U) const {
   // Assert that we have a single edge. We could handle them by simply
   // returning false, but since isSingleEdge is linear on the number of
   // edges, the callers can normally handle them more efficiently.
-  assert(BBE.isSingleEdge());
+  assert(BBE.isSingleEdge() &&
+         "This function is not efficient in handling multiple edges");
 
   Instruction *UserInst = cast<Instruction>(U.getUser());
   // A PHI in the end of the edge is dominated by it.
@@ -232,8 +234,8 @@ bool DominatorTree::dominates(const Instruction *Def, const Use &U) const {
   if (!isReachableFromEntry(DefBB))
     return false;
 
-  // Invoke instructions define their return values on the edges
-  // to their normal successors, so we have to handle them specially.
+  // Invoke instructions define their return values on the edges to their normal
+  // successors, so we have to handle them specially.
   // Among other things, this means they don't dominate anything in
   // their own block, except possibly a phi, so we don't need to
   // walk the block in any case.
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index b50ad1262c69..cfb40b19c733 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -35,8 +35,8 @@ using namespace llvm;
 
 // Explicit instantiations of SymbolTableListTraits since some of the methods
 // are not in the public header file...
-template class llvm::SymbolTableListTraits<Argument, Function>;
-template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+template class llvm::SymbolTableListTraits<Argument>;
+template class llvm::SymbolTableListTraits<BasicBlock>;
 
 //===----------------------------------------------------------------------===//
 // Argument Implementation
@@ -235,11 +235,11 @@ Type *Function::getReturnType() const {
 }
 
 void Function::removeFromParent() {
-  getParent()->getFunctionList().remove(this);
+  getParent()->getFunctionList().remove(getIterator());
 }
 
 void Function::eraseFromParent() {
-  getParent()->getFunctionList().erase(this);
+  getParent()->getFunctionList().erase(getIterator());
 }
 
 //===----------------------------------------------------------------------===//
@@ -248,7 +248,7 @@ void Function::eraseFromParent() {
 
 Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
                    Module *ParentModule)
-    : GlobalObject(PointerType::getUnqual(Ty), Value::FunctionVal,
+    : GlobalObject(Ty, Value::FunctionVal,
                    OperandTraits<Function>::op_begin(this), 0, Linkage, name),
       Ty(Ty) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
@@ -279,9 +279,6 @@ Function::~Function() {
 
   // Remove the function from the on-the-side GC table.
   clearGC();
-
-  // FIXME: needed by operator delete
-  setFunctionNumOperands(1);
 }
 
 void Function::BuildLazyArguments() const {
@@ -328,14 +325,15 @@ void Function::dropAllReferences() {
   while (!BasicBlocks.empty())
     BasicBlocks.begin()->eraseFromParent();
 
-  // Prefix and prologue data are stored in a side table.
-  setPrefixData(nullptr);
-  setPrologueData(nullptr);
+  // Drop uses of any optional data (real or placeholder).
+  if (getNumOperands()) {
+    User::dropAllReferences();
+    setNumHungOffUseOperands(0);
+    setValueSubclassData(getSubclassDataFromValue() & ~0xe);
+  }
 
   // Metadata is stored in a side-table.
   clearMetadata();
-
-  setPersonalityFn(nullptr);
 }
 
 void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -411,30 +409,26 @@ void Function::clearGC() {
   }
 }
 
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a Function) from the Function Src to this one.
+/// Copy all additional attributes (those not needed to create a Function) from
+/// the Function Src to this one.
 void Function::copyAttributesFrom(const GlobalValue *Src) {
-  assert(isa<Function>(Src) && "Expected a Function!");
   GlobalObject::copyAttributesFrom(Src);
-  const Function *SrcF = cast<Function>(Src);
+  const Function *SrcF = dyn_cast<Function>(Src);
+  if (!SrcF)
+    return;
+
   setCallingConv(SrcF->getCallingConv());
   setAttributes(SrcF->getAttributes());
   if (SrcF->hasGC())
     setGC(SrcF->getGC());
   else
     clearGC();
-  if (SrcF->hasPrefixData())
-    setPrefixData(SrcF->getPrefixData());
-  else
-    setPrefixData(nullptr);
-  if (SrcF->hasPrologueData())
-    setPrologueData(SrcF->getPrologueData());
-  else
-    setPrologueData(nullptr);
   if (SrcF->hasPersonalityFn())
     setPersonalityFn(SrcF->getPersonalityFn());
-  else
-    setPersonalityFn(nullptr);
+  if (SrcF->hasPrefixData())
+    setPrefixData(SrcF->getPrefixData());
+  if (SrcF->hasPrologueData())
+    setPrologueData(SrcF->getPrologueData());
 }
 
 /// \brief This does the actual lookup of an intrinsic ID which
@@ -492,7 +486,10 @@ static std::string getMangledTypeStr(Type* Ty) {
       Result += "vararg";
     // Ensure nested function types are distinguishable.
     Result += "f"; 
-  } else if (Ty)
+  } else if (isa<VectorType>(Ty))
+    Result += "v" + utostr(Ty->getVectorNumElements()) +
+      getMangledTypeStr(Ty->getVectorElementType());
+  else if (Ty)
     Result += EVT::getEVT(Ty).getEVTString();
   return Result;
 }
@@ -541,22 +538,25 @@ enum IIT_Info {
   // Values from 16+ are only encodable with the inefficient encoding.
   IIT_V64  = 16,
   IIT_MMX  = 17,
-  IIT_METADATA = 18,
-  IIT_EMPTYSTRUCT = 19,
-  IIT_STRUCT2 = 20,
-  IIT_STRUCT3 = 21,
-  IIT_STRUCT4 = 22,
-  IIT_STRUCT5 = 23,
-  IIT_EXTEND_ARG = 24,
-  IIT_TRUNC_ARG = 25,
-  IIT_ANYPTR = 26,
-  IIT_V1   = 27,
-  IIT_VARARG = 28,
-  IIT_HALF_VEC_ARG = 29,
-  IIT_SAME_VEC_WIDTH_ARG = 30,
-  IIT_PTR_TO_ARG = 31,
-  IIT_VEC_OF_PTRS_TO_ELT = 32,
-  IIT_I128 = 33
+  IIT_TOKEN = 18,
+  IIT_METADATA = 19,
+  IIT_EMPTYSTRUCT = 20,
+  IIT_STRUCT2 = 21,
+  IIT_STRUCT3 = 22,
+  IIT_STRUCT4 = 23,
+  IIT_STRUCT5 = 24,
+  IIT_EXTEND_ARG = 25,
+  IIT_TRUNC_ARG = 26,
+  IIT_ANYPTR = 27,
+  IIT_V1   = 28,
+  IIT_VARARG = 29,
+  IIT_HALF_VEC_ARG = 30,
+  IIT_SAME_VEC_WIDTH_ARG = 31,
+  IIT_PTR_TO_ARG = 32,
+  IIT_VEC_OF_PTRS_TO_ELT = 33,
+  IIT_I128 = 34,
+  IIT_V512 = 35,
+  IIT_V1024 = 36
 };
 
 
@@ -576,6 +576,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
   case IIT_MMX:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0));
     return;
+  case IIT_TOKEN:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Token, 0));
+    return;
   case IIT_METADATA:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Metadata, 0));
     return;
@@ -634,6 +637,14 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 64));
     DecodeIITType(NextElt, Infos, OutputTable);
     return;
+  case IIT_V512:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 512));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
+  case IIT_V1024:
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1024));
+    DecodeIITType(NextElt, Infos, OutputTable);
+    return;
   case IIT_PTR:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
     DecodeIITType(NextElt, Infos, OutputTable);
@@ -751,6 +762,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
   case IITDescriptor::Void: return Type::getVoidTy(Context);
   case IITDescriptor::VarArg: return Type::getVoidTy(Context);
   case IITDescriptor::MMX: return Type::getX86_MMXTy(Context);
+  case IITDescriptor::Token: return Type::getTokenTy(Context);
   case IITDescriptor::Metadata: return Type::getMetadataTy(Context);
   case IITDescriptor::Half: return Type::getHalfTy(Context);
   case IITDescriptor::Float: return Type::getFloatTy(Context);
@@ -924,62 +936,68 @@ bool Function::callsFunctionThatReturnsTwice() const {
   return false;
 }
 
+Constant *Function::getPersonalityFn() const {
+  assert(hasPersonalityFn() && getNumOperands());
+  return cast<Constant>(Op<0>());
+}
+
+void Function::setPersonalityFn(Constant *Fn) {
+  setHungoffOperand<0>(Fn);
+  setValueSubclassDataBit(3, Fn != nullptr);
+}
+
 Constant *Function::getPrefixData() const {
-  assert(hasPrefixData());
-  const LLVMContextImpl::PrefixDataMapTy &PDMap =
-      getContext().pImpl->PrefixDataMap;
-  assert(PDMap.find(this) != PDMap.end());
-  return cast<Constant>(PDMap.find(this)->second->getReturnValue());
+  assert(hasPrefixData() && getNumOperands());
+  return cast<Constant>(Op<1>());
 }
 
 void Function::setPrefixData(Constant *PrefixData) {
-  if (!PrefixData && !hasPrefixData())
-    return;
-
-  unsigned SCData = getSubclassDataFromValue();
-  LLVMContextImpl::PrefixDataMapTy &PDMap = getContext().pImpl->PrefixDataMap;
-  ReturnInst *&PDHolder = PDMap[this];
-  if (PrefixData) {
-    if (PDHolder)
-      PDHolder->setOperand(0, PrefixData);
-    else
-      PDHolder = ReturnInst::Create(getContext(), PrefixData);
-    SCData |= (1<<1);
-  } else {
-    delete PDHolder;
-    PDMap.erase(this);
-    SCData &= ~(1<<1);
-  }
-  setValueSubclassData(SCData);
+  setHungoffOperand<1>(PrefixData);
+  setValueSubclassDataBit(1, PrefixData != nullptr);
 }
 
 Constant *Function::getPrologueData() const {
-  assert(hasPrologueData());
-  const LLVMContextImpl::PrologueDataMapTy &SOMap =
-      getContext().pImpl->PrologueDataMap;
-  assert(SOMap.find(this) != SOMap.end());
-  return cast<Constant>(SOMap.find(this)->second->getReturnValue());
+  assert(hasPrologueData() && getNumOperands());
+  return cast<Constant>(Op<2>());
 }
 
 void Function::setPrologueData(Constant *PrologueData) {
-  if (!PrologueData && !hasPrologueData())
+  setHungoffOperand<2>(PrologueData);
+  setValueSubclassDataBit(2, PrologueData != nullptr);
+}
+
+void Function::allocHungoffUselist() {
+  // If we've already allocated a uselist, stop here.
+  if (getNumOperands())
     return;
 
-  unsigned PDData = getSubclassDataFromValue();
-  LLVMContextImpl::PrologueDataMapTy &PDMap = getContext().pImpl->PrologueDataMap;
-  ReturnInst *&PDHolder = PDMap[this];
-  if (PrologueData) {
-    if (PDHolder)
-      PDHolder->setOperand(0, PrologueData);
-    else
-      PDHolder = ReturnInst::Create(getContext(), PrologueData);
-    PDData |= (1<<2);
-  } else {
-    delete PDHolder;
-    PDMap.erase(this);
-    PDData &= ~(1<<2);
+  allocHungoffUses(3, /*IsPhi=*/ false);
+  setNumHungOffUseOperands(3);
+
+  // Initialize the uselist with placeholder operands to allow traversal.
+  auto *CPN = ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0));
+  Op<0>().set(CPN);
+  Op<1>().set(CPN);
+  Op<2>().set(CPN);
+}
+
+template <int Idx>
+void Function::setHungoffOperand(Constant *C) {
+  if (C) {
+    allocHungoffUselist();
+    Op<Idx>().set(C);
+  } else if (getNumOperands()) {
+    Op<Idx>().set(
+        ConstantPointerNull::get(Type::getInt1PtrTy(getContext(), 0)));
   }
-  setValueSubclassData(PDData);
+}
+
+void Function::setValueSubclassDataBit(unsigned Bit, bool On) {
+  assert(Bit < 16 && "SubclassData contains only 16 bits");
+  if (On)
+    setValueSubclassData(getSubclassDataFromValue() | (1 << Bit));
+  else
+    setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit));
 }
 
 void Function::setEntryCount(uint64_t Count) {
@@ -997,22 +1015,3 @@ Optional<uint64_t> Function::getEntryCount() const {
       }
   return None;
 }
-
-void Function::setPersonalityFn(Constant *C) {
-  if (!C) {
-    if (hasPersonalityFn()) {
-      // Note, the num operands is used to compute the offset of the operand, so
-      // the order here matters.  Clearing the operand then clearing the num
-      // operands ensures we have the correct offset to the operand.
-      Op<0>().set(nullptr);
-      setFunctionNumOperands(0);
-    }
-  } else {
-    // Note, the num operands is used to compute the offset of the operand, so
-    // the order here matters.  We need to set num operands to 1 first so that
-    // we get the correct offset to the first operand when we set it.
-    if (!hasPersonalityFn())
-      setFunctionNumOperands(1);
-    Op<0>().set(C);
-  }
-}
diff --git a/lib/IR/FunctionInfo.cpp b/lib/IR/FunctionInfo.cpp
new file mode 100644
index 000000000000..17a67bcf0472
--- /dev/null
+++ b/lib/IR/FunctionInfo.cpp
@@ -0,0 +1,67 @@
+//===-- FunctionInfo.cpp - Function Info Index ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the function info index and summary classes for the
+// IR library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+// Create the combined function index/summary from multiple
+// per-module instances.
+void FunctionInfoIndex::mergeFrom(std::unique_ptr<FunctionInfoIndex> Other,
+                                  uint64_t NextModuleId) {
+
+  StringRef ModPath;
+  for (auto &OtherFuncInfoLists : *Other) {
+    std::string FuncName = OtherFuncInfoLists.getKey();
+    FunctionInfoList &List = OtherFuncInfoLists.second;
+
+    // Assert that the func info list only has one entry, since we shouldn't
+    // have duplicate names within a single per-module index.
+    assert(List.size() == 1);
+    std::unique_ptr<FunctionInfo> Info = std::move(List.front());
+
+    // Skip if there was no function summary section.
+    if (!Info->functionSummary())
+      continue;
+
+    // Add the module path string ref for this module if we haven't already
+    // saved a reference to it.
+    if (ModPath.empty())
+      ModPath =
+          addModulePath(Info->functionSummary()->modulePath(), NextModuleId);
+    else
+      assert(ModPath == Info->functionSummary()->modulePath() &&
+             "Each module in the combined map should have a unique ID");
+
+    // Note the module path string ref was copied above and is still owned by
+    // the original per-module index. Reset it to the new module path
+    // string reference owned by the combined index.
+    Info->functionSummary()->setModulePath(ModPath);
+
+    // If it is a local function, rename it.
+    if (Info->functionSummary()->isLocalFunction()) {
+      // Any local functions are virtually renamed when being added to the
+      // combined index map, to disambiguate from other functions with
+      // the same name. The symbol table created for the combined index
+      // file should contain the renamed symbols.
+      FuncName =
+          FunctionInfoIndex::getGlobalNameForLocal(FuncName, NextModuleId);
+    }
+
+    // Add new function info to existing list. There may be duplicates when
+    // combining FunctionMap entries, due to COMDAT functions. Any local
+    // functions were virtually renamed above.
+    addFunctionInfo(FuncName, std::move(Info));
+  }
+}
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index 6ed589131725..35b8157751b6 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -448,7 +448,7 @@ static uint32_t branchDiv(uint64_t Numerator, uint64_t Divisor) {
 
 namespace {
 struct formatBranchInfo {
-  formatBranchInfo(const GCOVOptions &Options, uint64_t Count, uint64_t Total)
+  formatBranchInfo(const GCOV::Options &Options, uint64_t Count, uint64_t Total)
       : Options(Options), Count(Count), Total(Total) {}
 
   void print(raw_ostream &OS) const {
@@ -460,7 +460,7 @@ struct formatBranchInfo {
       OS << "taken " << branchDiv(Count, Total) << "%";
   }
 
-  const GCOVOptions &Options;
+  const GCOV::Options &Options;
   uint64_t Count;
   uint64_t Total;
 };
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index 1d0282677bf7..6159f93faf89 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -32,15 +32,9 @@ bool GlobalValue::isMaterializable() const {
     return F->isMaterializable();
   return false;
 }
-bool GlobalValue::isDematerializable() const {
-  return getParent() && getParent()->isDematerializable(this);
-}
 std::error_code GlobalValue::materialize() {
   return getParent()->materialize(this);
 }
-void GlobalValue::dematerialize() {
-  getParent()->dematerialize(this);
-}
 
 /// Override destroyConstantImpl to make sure it doesn't get called on
 /// GlobalValue's because they shouldn't be treated like other constants.
@@ -97,10 +91,11 @@ void GlobalObject::setGlobalObjectSubClassData(unsigned Val) {
 }
 
 void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
-  const auto *GV = cast<GlobalObject>(Src);
-  GlobalValue::copyAttributesFrom(GV);
-  setAlignment(GV->getAlignment());
-  setSection(GV->getSection());
+  GlobalValue::copyAttributesFrom(Src);
+  if (const auto *GV = dyn_cast<GlobalObject>(Src)) {
+    setAlignment(GV->getAlignment());
+    setSection(GV->getSection());
+  }
 }
 
 const char *GlobalValue::getSection() const {
@@ -147,9 +142,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
                                Constant *InitVal, const Twine &Name,
                                ThreadLocalMode TLMode, unsigned AddressSpace,
                                bool isExternallyInitialized)
-    : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+    : GlobalObject(Ty, Value::GlobalVariableVal,
                    OperandTraits<GlobalVariable>::op_begin(this),
-                   InitVal != nullptr, Link, Name),
+                   InitVal != nullptr, Link, Name, AddressSpace),
       isConstantGlobal(constant),
       isExternallyInitializedConstant(isExternallyInitialized) {
   setThreadLocalMode(TLMode);
@@ -165,9 +160,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
                                const Twine &Name, GlobalVariable *Before,
                                ThreadLocalMode TLMode, unsigned AddressSpace,
                                bool isExternallyInitialized)
-    : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+    : GlobalObject(Ty, Value::GlobalVariableVal,
                    OperandTraits<GlobalVariable>::op_begin(this),
-                   InitVal != nullptr, Link, Name),
+                   InitVal != nullptr, Link, Name, AddressSpace),
       isConstantGlobal(constant),
       isExternallyInitializedConstant(isExternallyInitialized) {
   setThreadLocalMode(TLMode);
@@ -178,7 +173,7 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
   }
 
   if (Before)
-    Before->getParent()->getGlobalList().insert(Before, this);
+    Before->getParent()->getGlobalList().insert(Before->getIterator(), this);
   else
     M.getGlobalList().push_back(this);
 }
@@ -188,11 +183,11 @@ void GlobalVariable::setParent(Module *parent) {
 }
 
 void GlobalVariable::removeFromParent() {
-  getParent()->getGlobalList().remove(this);
+  getParent()->getGlobalList().remove(getIterator());
 }
 
 void GlobalVariable::eraseFromParent() {
-  getParent()->getGlobalList().erase(this);
+  getParent()->getGlobalList().erase(getIterator());
 }
 
 void GlobalVariable::setInitializer(Constant *InitVal) {
@@ -216,14 +211,14 @@ void GlobalVariable::setInitializer(Constant *InitVal) {
   }
 }
 
-/// copyAttributesFrom - copy all additional attributes (those not needed to
-/// create a GlobalVariable) from the GlobalVariable Src to this one.
+/// Copy all additional attributes (those not needed to create a GlobalVariable)
+/// from the GlobalVariable Src to this one.
 void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
-  assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
   GlobalObject::copyAttributesFrom(Src);
-  const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
-  setThreadLocalMode(SrcVar->getThreadLocalMode());
-  setExternallyInitialized(SrcVar->isExternallyInitialized());
+  if (const GlobalVariable *SrcVar = dyn_cast<GlobalVariable>(Src)) {
+    setThreadLocalMode(SrcVar->getThreadLocalMode());
+    setExternallyInitialized(SrcVar->isExternallyInitialized());
+  }
 }
 
 
@@ -231,35 +226,40 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
 // GlobalAlias Implementation
 //===----------------------------------------------------------------------===//
 
-GlobalAlias::GlobalAlias(PointerType *Ty, LinkageTypes Link, const Twine &Name,
-                         Constant *Aliasee, Module *ParentModule)
-    : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
+                         const Twine &Name, Constant *Aliasee,
+                         Module *ParentModule)
+    : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name,
+                  AddressSpace) {
   Op<0>() = Aliasee;
 
   if (ParentModule)
     ParentModule->getAliasList().push_back(this);
 }
 
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Link,
-                                 const Twine &Name, Constant *Aliasee,
-                                 Module *ParentModule) {
-  return new GlobalAlias(Ty, Link, Name, Aliasee, ParentModule);
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+                                 LinkageTypes Link, const Twine &Name,
+                                 Constant *Aliasee, Module *ParentModule) {
+  return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule);
 }
 
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
-                                 const Twine &Name, Module *Parent) {
-  return create(Ty, Linkage, Name, nullptr, Parent);
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+                                 LinkageTypes Linkage, const Twine &Name,
+                                 Module *Parent) {
+  return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent);
 }
 
-GlobalAlias *GlobalAlias::create(PointerType *Ty, LinkageTypes Linkage,
-                                 const Twine &Name, GlobalValue *Aliasee) {
-  return create(Ty, Linkage, Name, Aliasee, Aliasee->getParent());
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+                                 LinkageTypes Linkage, const Twine &Name,
+                                 GlobalValue *Aliasee) {
+  return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent());
 }
 
 GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name,
                                  GlobalValue *Aliasee) {
   PointerType *PTy = Aliasee->getType();
-  return create(PTy, Link, Name, Aliasee);
+  return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name,
+                Aliasee);
 }
 
 GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
@@ -271,11 +271,11 @@ void GlobalAlias::setParent(Module *parent) {
 }
 
 void GlobalAlias::removeFromParent() {
-  getParent()->getAliasList().remove(this);
+  getParent()->getAliasList().remove(getIterator());
 }
 
 void GlobalAlias::eraseFromParent() {
-  getParent()->getAliasList().erase(this);
+  getParent()->getAliasList().erase(getIterator());
 }
 
 void GlobalAlias::setAliasee(Constant *Aliasee) {
diff --git a/lib/IR/IRBuilder.cpp b/lib/IR/IRBuilder.cpp
index bddb278dee79..447412936335 100644
--- a/lib/IR/IRBuilder.cpp
+++ b/lib/IR/IRBuilder.cpp
@@ -247,18 +247,21 @@ CallInst *IRBuilderBase::CreateMaskedIntrinsic(Intrinsic::ID Id,
   return createCallHelper(TheFn, Ops, this, Name);
 }
 
+template <typename T0, typename T1, typename T2, typename T3>
 static std::vector<Value *>
 getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
-                  Value *ActualCallee, ArrayRef<Value *> CallArgs,
-                  ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs) {
+                  Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
+                  ArrayRef<T1> TransitionArgs, ArrayRef<T2> DeoptArgs,
+                  ArrayRef<T3> GCArgs) {
   std::vector<Value *> Args;
   Args.push_back(B.getInt64(ID));
   Args.push_back(B.getInt32(NumPatchBytes));
   Args.push_back(ActualCallee);
   Args.push_back(B.getInt32(CallArgs.size()));
-  Args.push_back(B.getInt32((unsigned)StatepointFlags::None));
+  Args.push_back(B.getInt32(Flags));
   Args.insert(Args.end(), CallArgs.begin(), CallArgs.end());
-  Args.push_back(B.getInt32(0 /* no transition args */));
+  Args.push_back(B.getInt32(TransitionArgs.size()));
+  Args.insert(Args.end(), TransitionArgs.begin(), TransitionArgs.end());
   Args.push_back(B.getInt32(DeoptArgs.size()));
   Args.insert(Args.end(), DeoptArgs.begin(), DeoptArgs.end());
   Args.insert(Args.end(), GCArgs.begin(), GCArgs.end());
@@ -266,36 +269,78 @@ getStatepointArgs(IRBuilderBase &B, uint64_t ID, uint32_t NumPatchBytes,
   return Args;
 }
 
-CallInst *IRBuilderBase::CreateGCStatepointCall(
-    uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
-    ArrayRef<Value *> CallArgs, ArrayRef<Value *> DeoptArgs,
-    ArrayRef<Value *> GCArgs, const Twine &Name) {
+template <typename T0, typename T1, typename T2, typename T3>
+static CallInst *CreateGCStatepointCallCommon(
+    IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
+    Value *ActualCallee, uint32_t Flags, ArrayRef<T0> CallArgs,
+    ArrayRef<T1> TransitionArgs, ArrayRef<T2> DeoptArgs, ArrayRef<T3> GCArgs,
+    const Twine &Name) {
   // Extract out the type of the callee.
   PointerType *FuncPtrType = cast<PointerType>(ActualCallee->getType());
   assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
          "actual callee must be a callable value");
 
-  Module *M = BB->getParent()->getParent();
+  Module *M = Builder->GetInsertBlock()->getParent()->getParent();
   // Fill in the one generic type'd argument (the function is also vararg)
   Type *ArgTypes[] = { FuncPtrType };
   Function *FnStatepoint =
     Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_statepoint,
                               ArgTypes);
 
-  std::vector<llvm::Value *> Args = getStatepointArgs(
-      *this, ID, NumPatchBytes, ActualCallee, CallArgs, DeoptArgs, GCArgs);
-  return createCallHelper(FnStatepoint, Args, this, Name);
+  std::vector<llvm::Value *> Args =
+      getStatepointArgs(*Builder, ID, NumPatchBytes, ActualCallee, Flags,
+                        CallArgs, TransitionArgs, DeoptArgs, GCArgs);
+  return createCallHelper(FnStatepoint, Args, Builder, Name);
+}
+
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
+    ArrayRef<Value *> CallArgs, ArrayRef<Value *> DeoptArgs,
+    ArrayRef<Value *> GCArgs, const Twine &Name) {
+  return CreateGCStatepointCallCommon<Value *, Value *, Value *, Value *>(
+      this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None),
+      CallArgs, None /* No Transition Args */, DeoptArgs, GCArgs, Name);
+}
+
+CallInst *IRBuilderBase::CreateGCStatepointCall(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee, uint32_t Flags,
+    ArrayRef<Use> CallArgs, ArrayRef<Use> TransitionArgs,
+    ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+  return CreateGCStatepointCallCommon<Use, Use, Use, Value *>(
+      this, ID, NumPatchBytes, ActualCallee, Flags, CallArgs, TransitionArgs,
+      DeoptArgs, GCArgs, Name);
 }
 
 CallInst *IRBuilderBase::CreateGCStatepointCall(
     uint64_t ID, uint32_t NumPatchBytes, Value *ActualCallee,
     ArrayRef<Use> CallArgs, ArrayRef<Value *> DeoptArgs,
     ArrayRef<Value *> GCArgs, const Twine &Name) {
-  std::vector<Value *> VCallArgs;
-  for (auto &U : CallArgs)
-    VCallArgs.push_back(U.get());
-  return CreateGCStatepointCall(ID, NumPatchBytes, ActualCallee, VCallArgs,
-                                DeoptArgs, GCArgs, Name);
+  return CreateGCStatepointCallCommon<Use, Value *, Value *, Value *>(
+      this, ID, NumPatchBytes, ActualCallee, uint32_t(StatepointFlags::None),
+      CallArgs, None, DeoptArgs, GCArgs, Name);
+}
+
+template <typename T0, typename T1, typename T2, typename T3>
+static InvokeInst *CreateGCStatepointInvokeCommon(
+    IRBuilderBase *Builder, uint64_t ID, uint32_t NumPatchBytes,
+    Value *ActualInvokee, BasicBlock *NormalDest, BasicBlock *UnwindDest,
+    uint32_t Flags, ArrayRef<T0> InvokeArgs, ArrayRef<T1> TransitionArgs,
+    ArrayRef<T2> DeoptArgs, ArrayRef<T3> GCArgs, const Twine &Name) {
+  // Extract out the type of the callee.
+  PointerType *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
+  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
+         "actual callee must be a callable value");
+
+  Module *M = Builder->GetInsertBlock()->getParent()->getParent();
+  // Fill in the one generic type'd argument (the function is also vararg)
+  Function *FnStatepoint = Intrinsic::getDeclaration(
+      M, Intrinsic::experimental_gc_statepoint, {FuncPtrType});
+
+  std::vector<llvm::Value *> Args =
+      getStatepointArgs(*Builder, ID, NumPatchBytes, ActualInvokee, Flags,
+                        InvokeArgs, TransitionArgs, DeoptArgs, GCArgs);
+  return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, Builder,
+                            Name);
 }
 
 InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
@@ -303,32 +348,30 @@ InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
     BasicBlock *NormalDest, BasicBlock *UnwindDest,
     ArrayRef<Value *> InvokeArgs, ArrayRef<Value *> DeoptArgs,
     ArrayRef<Value *> GCArgs, const Twine &Name) {
-  // Extract out the type of the callee.
-  PointerType *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
-  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
-         "actual callee must be a callable value");
+  return CreateGCStatepointInvokeCommon<Value *, Value *, Value *, Value *>(
+      this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest,
+      uint32_t(StatepointFlags::None), InvokeArgs, None /* No Transition Args*/,
+      DeoptArgs, GCArgs, Name);
+}
 
-  Module *M = BB->getParent()->getParent();
-  // Fill in the one generic type'd argument (the function is also vararg)
-  Function *FnStatepoint = Intrinsic::getDeclaration(
-      M, Intrinsic::experimental_gc_statepoint, {FuncPtrType});
-
-  std::vector<llvm::Value *> Args = getStatepointArgs(
-      *this, ID, NumPatchBytes, ActualInvokee, InvokeArgs, DeoptArgs, GCArgs);
-  return createInvokeHelper(FnStatepoint, NormalDest, UnwindDest, Args, this,
-                            Name);
+InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
+    uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
+    BasicBlock *NormalDest, BasicBlock *UnwindDest, uint32_t Flags,
+    ArrayRef<Use> InvokeArgs, ArrayRef<Use> TransitionArgs,
+    ArrayRef<Use> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
+  return CreateGCStatepointInvokeCommon<Use, Use, Use, Value *>(
+      this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest, Flags,
+      InvokeArgs, TransitionArgs, DeoptArgs, GCArgs, Name);
 }
 
 InvokeInst *IRBuilderBase::CreateGCStatepointInvoke(
     uint64_t ID, uint32_t NumPatchBytes, Value *ActualInvokee,
     BasicBlock *NormalDest, BasicBlock *UnwindDest, ArrayRef<Use> InvokeArgs,
     ArrayRef<Value *> DeoptArgs, ArrayRef<Value *> GCArgs, const Twine &Name) {
-  std::vector<Value *> VCallArgs;
-  for (auto &U : InvokeArgs)
-    VCallArgs.push_back(U.get());
-  return CreateGCStatepointInvoke(ID, NumPatchBytes, ActualInvokee, NormalDest,
-                                  UnwindDest, VCallArgs, DeoptArgs, GCArgs,
-                                  Name);
+  return CreateGCStatepointInvokeCommon<Use, Value *, Value *, Value *>(
+      this, ID, NumPatchBytes, ActualInvokee, NormalDest, UnwindDest,
+      uint32_t(StatepointFlags::None), InvokeArgs, None, DeoptArgs, GCArgs,
+      Name);
 }
 
 CallInst *IRBuilderBase::CreateGCResult(Instruction *Statepoint,
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index aa9e0272ad10..15d3b830b8fc 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -24,23 +24,22 @@ using namespace llvm;
 InlineAsm::~InlineAsm() {
 }
 
-
-InlineAsm *InlineAsm::get(FunctionType *Ty, StringRef AsmString,
+InlineAsm *InlineAsm::get(FunctionType *FTy, StringRef AsmString,
                           StringRef Constraints, bool hasSideEffects,
                           bool isAlignStack, AsmDialect asmDialect) {
-  InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack,
-                       asmDialect);
-  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
-  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+  InlineAsmKeyType Key(AsmString, Constraints, FTy, hasSideEffects,
+                       isAlignStack, asmDialect);
+  LLVMContextImpl *pImpl = FTy->getContext().pImpl;
+  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(FTy), Key);
 }
 
-InlineAsm::InlineAsm(PointerType *Ty, const std::string &asmString,
+InlineAsm::InlineAsm(FunctionType *FTy, const std::string &asmString,
                      const std::string &constraints, bool hasSideEffects,
                      bool isAlignStack, AsmDialect asmDialect)
-  : Value(Ty, Value::InlineAsmVal),
-    AsmString(asmString), Constraints(constraints),
-    HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
-    Dialect(asmDialect) {
+    : Value(PointerType::getUnqual(FTy), Value::InlineAsmVal),
+      AsmString(asmString), Constraints(constraints), FTy(FTy),
+      HasSideEffects(hasSideEffects), IsAlignStack(isAlignStack),
+      Dialect(asmDialect) {
 
   // Do various checks on the constraint string and type.
   assert(Verify(getFunctionType(), constraints) &&
@@ -53,7 +52,7 @@ void InlineAsm::destroyConstant() {
 }
 
 FunctionType *InlineAsm::getFunctionType() const {
-  return cast<FunctionType>(getType()->getElementType());
+  return FTy;
 }
     
 ///Default constructor.
@@ -160,6 +159,9 @@ bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
       // If Operand N already has a matching input, reject this.  An output
       // can't be constrained to the same value as multiple inputs.
       if (isMultipleAlternative) {
+        if (multipleAlternativeIndex >=
+            ConstraintsSoFar[N].multipleAlternatives.size())
+          return true;
         InlineAsm::SubConstraintInfo &scInfo =
           ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
         if (scInfo.MatchingInput != -1)
@@ -291,4 +293,3 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
   if (Ty->getNumParams() != NumInputs) return false;
   return true;
 }
-
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index c57ba16cf6ca..a0bd2c9698e8 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -28,7 +28,7 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
   if (InsertBefore) {
     BasicBlock *BB = InsertBefore->getParent();
     assert(BB && "Instruction to insert before is not in a basic block!");
-    BB->getInstList().insert(InsertBefore, this);
+    BB->getInstList().insert(InsertBefore->getIterator(), this);
   }
 }
 
@@ -62,33 +62,39 @@ Module *Instruction::getModule() {
   return getParent()->getModule();
 }
 
+Function *Instruction::getFunction() { return getParent()->getParent(); }
+
+const Function *Instruction::getFunction() const {
+  return getParent()->getParent();
+}
 
 void Instruction::removeFromParent() {
-  getParent()->getInstList().remove(this);
+  getParent()->getInstList().remove(getIterator());
 }
 
 iplist<Instruction>::iterator Instruction::eraseFromParent() {
-  return getParent()->getInstList().erase(this);
+  return getParent()->getInstList().erase(getIterator());
 }
 
 /// insertBefore - Insert an unlinked instructions into a basic block
 /// immediately before the specified instruction.
 void Instruction::insertBefore(Instruction *InsertPos) {
-  InsertPos->getParent()->getInstList().insert(InsertPos, this);
+  InsertPos->getParent()->getInstList().insert(InsertPos->getIterator(), this);
 }
 
 /// insertAfter - Insert an unlinked instructions into a basic block
 /// immediately after the specified instruction.
 void Instruction::insertAfter(Instruction *InsertPos) {
-  InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+  InsertPos->getParent()->getInstList().insertAfter(InsertPos->getIterator(),
+                                                    this);
 }
 
 /// moveBefore - Unlink this instruction from its current basic block and
 /// insert it into the basic block that MovePos lives in, right before
 /// MovePos.
 void Instruction::moveBefore(Instruction *MovePos) {
-  MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
-                                             this);
+  MovePos->getParent()->getInstList().splice(
+      MovePos->getIterator(), getParent()->getInstList(), getIterator());
 }
 
 /// Set or clear the unsafe-algebra flag on this instruction, which must be an
@@ -196,6 +202,10 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case Invoke: return "invoke";
   case Resume: return "resume";
   case Unreachable: return "unreachable";
+  case CleanupRet: return "cleanupret";
+  case CatchRet: return "catchret";
+  case CatchPad: return "catchpad";
+  case CatchSwitch: return "catchswitch";
 
   // Standard binary operators...
   case Add: return "add";
@@ -256,6 +266,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   case ExtractValue:   return "extractvalue";
   case InsertValue:    return "insertvalue";
   case LandingPad:     return "landingpad";
+  case CleanupPad:     return "cleanuppad";
 
   default: return "<Invalid operator> ";
   }
@@ -285,11 +296,12 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
   if (const CallInst *CI = dyn_cast<CallInst>(I1))
     return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
            CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
-           CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+           CI->getAttributes() == cast<CallInst>(I2)->getAttributes() &&
+           CI->hasIdenticalOperandBundleSchema(*cast<CallInst>(I2));
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
     return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
-           CI->getAttributes() ==
-             cast<InvokeInst>(I2)->getAttributes();
+           CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes() &&
+           CI->hasIdenticalOperandBundleSchema(*cast<InvokeInst>(I2));
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
     return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
@@ -407,6 +419,8 @@ bool Instruction::mayReadFromMemory() const {
   case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory
   case Instruction::AtomicCmpXchg:
   case Instruction::AtomicRMW:
+  case Instruction::CatchPad:
+  case Instruction::CatchRet:
     return true;
   case Instruction::Call:
     return !cast<CallInst>(this)->doesNotAccessMemory();
@@ -427,6 +441,8 @@ bool Instruction::mayWriteToMemory() const {
   case Instruction::VAArg:
   case Instruction::AtomicCmpXchg:
   case Instruction::AtomicRMW:
+  case Instruction::CatchPad:
+  case Instruction::CatchRet:
     return true;
   case Instruction::Call:
     return !cast<CallInst>(this)->onlyReadsMemory();
@@ -455,6 +471,10 @@ bool Instruction::isAtomic() const {
 bool Instruction::mayThrow() const {
   if (const CallInst *CI = dyn_cast<CallInst>(this))
     return !CI->doesNotThrow();
+  if (const auto *CRI = dyn_cast<CleanupReturnInst>(this))
+    return CRI->unwindsToCaller();
+  if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(this))
+    return CatchSwitch->unwindsToCaller();
   return isa<ResumeInst>(this);
 }
 
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 86c921aeda8a..4ae2fd522b52 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -62,7 +62,10 @@ UnaryInstruction::~UnaryInstruction() {
 const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
   if (Op1->getType() != Op2->getType())
     return "both values to select must have same type";
-  
+
+  if (Op1->getType()->isTokenTy())
+    return "select values cannot have token type";
+
   if (VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
     // Vector select.
     if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
@@ -84,6 +87,8 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
 //                               PHINode Class
 //===----------------------------------------------------------------------===//
 
+void PHINode::anchor() {}
+
 PHINode::PHINode(const PHINode &PN)
     : Instruction(PN.getType(), Instruction::PHI, nullptr, PN.getNumOperands()),
       ReservedSpace(PN.getNumOperands()) {
@@ -223,9 +228,10 @@ CallInst::~CallInst() {
 }
 
 void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
-                    const Twine &NameStr) {
+                    ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
   this->FTy = FTy;
-  assert(getNumOperands() == Args.size() + 1 && "NumOperands not set up?");
+  assert(getNumOperands() == Args.size() + CountBundleInputs(Bundles) + 1 &&
+         "NumOperands not set up?");
   Op<-1>() = Func;
 
 #ifndef NDEBUG
@@ -240,6 +246,11 @@ void CallInst::init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
 #endif
 
   std::copy(Args.begin(), Args.end(), op_begin());
+
+  auto It = populateBundleOperandInfos(Bundles, Args.size());
+  (void)It;
+  assert(It + 1 == op_end() && "Should add up!");
+
   setName(NameStr);
 }
 
@@ -281,11 +292,26 @@ CallInst::CallInst(const CallInst &CI)
       AttributeList(CI.AttributeList), FTy(CI.FTy) {
   setTailCallKind(CI.getTailCallKind());
   setCallingConv(CI.getCallingConv());
-    
+
   std::copy(CI.op_begin(), CI.op_end(), op_begin());
+  std::copy(CI.bundle_op_info_begin(), CI.bundle_op_info_end(),
+            bundle_op_info_begin());
   SubclassOptionalData = CI.SubclassOptionalData;
 }
 
+CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
+                           Instruction *InsertPt) {
+  std::vector<Value *> Args(CI->arg_begin(), CI->arg_end());
+
+  auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(),
+                                 InsertPt);
+  NewCI->setTailCallKind(CI->getTailCallKind());
+  NewCI->setCallingConv(CI->getCallingConv());
+  NewCI->SubclassOptionalData = CI->SubclassOptionalData;
+  NewCI->setAttributes(CI->getAttributes());
+  return NewCI;
+}
+
 void CallInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addAttribute(getContext(), i, attr);
@@ -320,6 +346,8 @@ void CallInst::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
 }
 
 bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+  assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+
   if (AttributeList.hasAttribute(i, A))
     return true;
   if (const Function *F = getCalledFunction())
@@ -327,6 +355,25 @@ bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
   return false;
 }
 
+bool CallInst::dataOperandHasImpliedAttr(unsigned i,
+                                         Attribute::AttrKind A) const {
+
+  // There are getNumOperands() - 1 data operands.  The last operand is the
+  // callee.
+  assert(i < getNumOperands() && "Data operand index out of bounds!");
+
+  // The attribute A can either be directly specified, if the operand in
+  // question is a call argument; or be indirectly implied by the kind of its
+  // containing operand bundle, if the operand is a bundle operand.
+
+  if (i < (getNumArgOperands() + 1))
+    return paramHasAttr(i, A);
+
+  assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+         "Must be either a call argument or an operand bundle!");
+  return bundleOperandHasAttr(i - 1, A);
+}
+
 /// IsConstantOne - Return true only if val is constant int 1
 static bool IsConstantOne(Value *val) {
   assert(val && "IsConstantOne does not work with nullptr val");
@@ -496,10 +543,12 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
 
 void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
                       BasicBlock *IfException, ArrayRef<Value *> Args,
+                      ArrayRef<OperandBundleDef> Bundles,
                       const Twine &NameStr) {
   this->FTy = FTy;
 
-  assert(getNumOperands() == 3 + Args.size() && "NumOperands not set up?");
+  assert(getNumOperands() == 3 + Args.size() + CountBundleInputs(Bundles) &&
+         "NumOperands not set up?");
   Op<-3>() = Fn;
   Op<-2>() = IfNormal;
   Op<-1>() = IfException;
@@ -516,6 +565,11 @@ void InvokeInst::init(FunctionType *FTy, Value *Fn, BasicBlock *IfNormal,
 #endif
 
   std::copy(Args.begin(), Args.end(), op_begin());
+
+  auto It = populateBundleOperandInfos(Bundles, Args.size());
+  (void)It;
+  assert(It + 3 == op_end() && "Should add up!");
+
   setName(NameStr);
 }
 
@@ -527,9 +581,24 @@ InvokeInst::InvokeInst(const InvokeInst &II)
       AttributeList(II.AttributeList), FTy(II.FTy) {
   setCallingConv(II.getCallingConv());
   std::copy(II.op_begin(), II.op_end(), op_begin());
+  std::copy(II.bundle_op_info_begin(), II.bundle_op_info_end(),
+            bundle_op_info_begin());
   SubclassOptionalData = II.SubclassOptionalData;
 }
 
+InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef<OperandBundleDef> OpB,
+                               Instruction *InsertPt) {
+  std::vector<Value *> Args(II->arg_begin(), II->arg_end());
+
+  auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(),
+                                   II->getUnwindDest(), Args, OpB,
+                                   II->getName(), InsertPt);
+  NewII->setCallingConv(II->getCallingConv());
+  NewII->SubclassOptionalData = II->SubclassOptionalData;
+  NewII->setAttributes(II->getAttributes());
+  return NewII;
+}
+
 BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
   return getSuccessor(idx);
 }
@@ -543,12 +612,20 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
 bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const {
   if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
     return true;
+
+  // Operand bundles override attributes on the called function, but don't
+  // override attributes directly present on the invoke instruction.
+  if (isFnAttrDisallowedByOpBundle(A))
+    return false;
+
   if (const Function *F = getCalledFunction())
     return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
   return false;
 }
 
 bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
+  assert(i < (getNumArgOperands() + 1) && "Param index out of bounds!");
+
   if (AttributeList.hasAttribute(i, A))
     return true;
   if (const Function *F = getCalledFunction())
@@ -556,6 +633,24 @@ bool InvokeInst::paramHasAttr(unsigned i, Attribute::AttrKind A) const {
   return false;
 }
 
+bool InvokeInst::dataOperandHasImpliedAttr(unsigned i,
+                                           Attribute::AttrKind A) const {
+  // There are getNumOperands() - 3 data operands.  The last three operands are
+  // the callee and the two successor basic blocks.
+  assert(i < (getNumOperands() - 2) && "Data operand index out of bounds!");
+
+  // The attribute A can either be directly specified, if the operand in
+  // question is an invoke argument; or be indirectly implied by the kind of its
+  // containing operand bundle, if the operand is a bundle operand.
+
+  if (i < (getNumArgOperands() + 1))
+    return paramHasAttr(i, A);
+
+  assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) &&
+         "Must be either an invoke argument or an operand bundle!");
+  return bundleOperandHasAttr(i - 1, A);
+}
+
 void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addAttribute(getContext(), i, attr);
@@ -670,6 +765,223 @@ BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
   llvm_unreachable("ResumeInst has no successors!");
 }
 
+//===----------------------------------------------------------------------===//
+//                        CleanupReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+CleanupReturnInst::CleanupReturnInst(const CleanupReturnInst &CRI)
+    : TerminatorInst(CRI.getType(), Instruction::CleanupRet,
+                     OperandTraits<CleanupReturnInst>::op_end(this) -
+                         CRI.getNumOperands(),
+                     CRI.getNumOperands()) {
+  setInstructionSubclassData(CRI.getSubclassDataFromInstruction());
+  Op<0>() = CRI.Op<0>();
+  if (CRI.hasUnwindDest())
+    Op<1>() = CRI.Op<1>();
+}
+
+void CleanupReturnInst::init(Value *CleanupPad, BasicBlock *UnwindBB) {
+  if (UnwindBB)
+    setInstructionSubclassData(getSubclassDataFromInstruction() | 1);
+
+  Op<0>() = CleanupPad;
+  if (UnwindBB)
+    Op<1>() = UnwindBB;
+}
+
+CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
+                                     unsigned Values, Instruction *InsertBefore)
+    : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
+                     Instruction::CleanupRet,
+                     OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+                     Values, InsertBefore) {
+  init(CleanupPad, UnwindBB);
+}
+
+CleanupReturnInst::CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB,
+                                     unsigned Values, BasicBlock *InsertAtEnd)
+    : TerminatorInst(Type::getVoidTy(CleanupPad->getContext()),
+                     Instruction::CleanupRet,
+                     OperandTraits<CleanupReturnInst>::op_end(this) - Values,
+                     Values, InsertAtEnd) {
+  init(CleanupPad, UnwindBB);
+}
+
+BasicBlock *CleanupReturnInst::getSuccessorV(unsigned Idx) const {
+  assert(Idx == 0);
+  return getUnwindDest();
+}
+unsigned CleanupReturnInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void CleanupReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
+  assert(Idx == 0);
+  setUnwindDest(B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        CatchReturnInst Implementation
+//===----------------------------------------------------------------------===//
+void CatchReturnInst::init(Value *CatchPad, BasicBlock *BB) {
+  Op<0>() = CatchPad;
+  Op<1>() = BB;
+}
+
+CatchReturnInst::CatchReturnInst(const CatchReturnInst &CRI)
+    : TerminatorInst(Type::getVoidTy(CRI.getContext()), Instruction::CatchRet,
+                     OperandTraits<CatchReturnInst>::op_begin(this), 2) {
+  Op<0>() = CRI.Op<0>();
+  Op<1>() = CRI.Op<1>();
+}
+
+CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
+                                 Instruction *InsertBefore)
+    : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+                     OperandTraits<CatchReturnInst>::op_begin(this), 2,
+                     InsertBefore) {
+  init(CatchPad, BB);
+}
+
+CatchReturnInst::CatchReturnInst(Value *CatchPad, BasicBlock *BB,
+                                 BasicBlock *InsertAtEnd)
+    : TerminatorInst(Type::getVoidTy(BB->getContext()), Instruction::CatchRet,
+                     OperandTraits<CatchReturnInst>::op_begin(this), 2,
+                     InsertAtEnd) {
+  init(CatchPad, BB);
+}
+
+BasicBlock *CatchReturnInst::getSuccessorV(unsigned Idx) const {
+  assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+  return getSuccessor();
+}
+unsigned CatchReturnInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void CatchReturnInst::setSuccessorV(unsigned Idx, BasicBlock *B) {
+  assert(Idx < getNumSuccessors() && "Successor # out of range for catchret!");
+  setSuccessor(B);
+}
+
+//===----------------------------------------------------------------------===//
+//                       CatchSwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+                                 unsigned NumReservedValues,
+                                 const Twine &NameStr,
+                                 Instruction *InsertBefore)
+    : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+                     InsertBefore) {
+  if (UnwindDest)
+    ++NumReservedValues;
+  init(ParentPad, UnwindDest, NumReservedValues + 1);
+  setName(NameStr);
+}
+
+CatchSwitchInst::CatchSwitchInst(Value *ParentPad, BasicBlock *UnwindDest,
+                                 unsigned NumReservedValues,
+                                 const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : TerminatorInst(ParentPad->getType(), Instruction::CatchSwitch, nullptr, 0,
+                     InsertAtEnd) {
+  if (UnwindDest)
+    ++NumReservedValues;
+  init(ParentPad, UnwindDest, NumReservedValues + 1);
+  setName(NameStr);
+}
+
+CatchSwitchInst::CatchSwitchInst(const CatchSwitchInst &CSI)
+    : TerminatorInst(CSI.getType(), Instruction::CatchSwitch, nullptr,
+                     CSI.getNumOperands()) {
+  init(CSI.getParentPad(), CSI.getUnwindDest(), CSI.getNumOperands());
+  setNumHungOffUseOperands(ReservedSpace);
+  Use *OL = getOperandList();
+  const Use *InOL = CSI.getOperandList();
+  for (unsigned I = 1, E = ReservedSpace; I != E; ++I)
+    OL[I] = InOL[I];
+}
+
+void CatchSwitchInst::init(Value *ParentPad, BasicBlock *UnwindDest,
+                           unsigned NumReservedValues) {
+  assert(ParentPad && NumReservedValues);
+
+  ReservedSpace = NumReservedValues;
+  setNumHungOffUseOperands(UnwindDest ? 2 : 1);
+  allocHungoffUses(ReservedSpace);
+
+  Op<0>() = ParentPad;
+  if (UnwindDest) {
+    setInstructionSubclassData(getSubclassDataFromInstruction() | 1);
+    setUnwindDest(UnwindDest);
+  }
+}
+
+/// growOperands - grow operands - This grows the operand list in response to a
+/// push_back style of operation. This grows the number of ops by 2 times.
+void CatchSwitchInst::growOperands(unsigned Size) {
+  unsigned NumOperands = getNumOperands();
+  assert(NumOperands >= 1);
+  if (ReservedSpace >= NumOperands + Size)
+    return;
+  ReservedSpace = (NumOperands + Size / 2) * 2;
+  growHungoffUses(ReservedSpace);
+}
+
+void CatchSwitchInst::addHandler(BasicBlock *Handler) {
+  unsigned OpNo = getNumOperands();
+  growOperands(1);
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  setNumHungOffUseOperands(getNumOperands() + 1);
+  getOperandList()[OpNo] = Handler;
+}
+
+BasicBlock *CatchSwitchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned CatchSwitchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void CatchSwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        FuncletPadInst Implementation
+//===----------------------------------------------------------------------===//
+void FuncletPadInst::init(Value *ParentPad, ArrayRef<Value *> Args,
+                          const Twine &NameStr) {
+  assert(getNumOperands() == 1 + Args.size() && "NumOperands not set up?");
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setParentPad(ParentPad);
+  setName(NameStr);
+}
+
+FuncletPadInst::FuncletPadInst(const FuncletPadInst &FPI)
+    : Instruction(FPI.getType(), FPI.getOpcode(),
+                  OperandTraits<FuncletPadInst>::op_end(this) -
+                      FPI.getNumOperands(),
+                  FPI.getNumOperands()) {
+  std::copy(FPI.op_begin(), FPI.op_end(), op_begin());
+  setParentPad(FPI.getParentPad());
+}
+
+FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                               ArrayRef<Value *> Args, unsigned Values,
+                               const Twine &NameStr, Instruction *InsertBefore)
+    : Instruction(ParentPad->getType(), Op,
+                  OperandTraits<FuncletPadInst>::op_end(this) - Values, Values,
+                  InsertBefore) {
+  init(ParentPad, Args, NameStr);
+}
+
+FuncletPadInst::FuncletPadInst(Instruction::FuncletPadOps Op, Value *ParentPad,
+                               ArrayRef<Value *> Args, unsigned Values,
+                               const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : Instruction(ParentPad->getType(), Op,
+                  OperandTraits<FuncletPadInst>::op_end(this) - Values, Values,
+                  InsertAtEnd) {
+  init(ParentPad, Args, NameStr);
+}
+
 //===----------------------------------------------------------------------===//
 //                      UnreachableInst Implementation
 //===----------------------------------------------------------------------===//
@@ -1193,6 +1505,8 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
 //                       GetElementPtrInst Implementation
 //===----------------------------------------------------------------------===//
 
+void GetElementPtrInst::anchor() {}
+
 void GetElementPtrInst::init(Value *Ptr, ArrayRef<Value *> IdxList,
                              const Twine &Name) {
   assert(getNumOperands() == 1 + IdxList.size() &&
@@ -2029,7 +2343,7 @@ bool CastInst::isNoopCast(const DataLayout &DL) const {
 /// *  %S = secondOpcode MidTy %F to DstTy
 /// The function returns a resultOpcode so these two casts can be replaced with:
 /// *  %Replacement = resultOpcode %SrcTy %x to DstTy
-/// If no such cast is permited, the function returns 0.
+/// If no such cast is permitted, the function returns 0.
 unsigned CastInst::isEliminableCastPair(
   Instruction::CastOps firstOp, Instruction::CastOps secondOp,
   Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy,
@@ -2037,7 +2351,7 @@ unsigned CastInst::isEliminableCastPair(
   // Define the 144 possibilities for these two cast instructions. The values
   // in this matrix determine what to do in a given situation and select the
   // case in the switch below.  The rows correspond to firstOp, the columns 
-  // correspond to secondOp.  In looking at the table below, keep in  mind
+  // correspond to secondOp.  In looking at the table below, keep in mind
   // the following cast properties:
   //
   //          Size Compare       Source               Destination
@@ -2087,17 +2401,19 @@ unsigned CastInst::isEliminableCastPair(
     {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+
   };
 
+  // TODO: This logic could be encoded into the table above and handled in the
+  // switch below.
   // If either of the casts are a bitcast from scalar to vector, disallow the
-  // merging. However, bitcast of A->B->A are allowed.
-  bool isFirstBitcast  = (firstOp == Instruction::BitCast);
-  bool isSecondBitcast = (secondOp == Instruction::BitCast);
-  bool chainedBitcast  = (SrcTy == DstTy && isFirstBitcast && isSecondBitcast);
+  // merging. However, any pair of bitcasts are allowed.
+  bool IsFirstBitcast  = (firstOp == Instruction::BitCast);
+  bool IsSecondBitcast = (secondOp == Instruction::BitCast);
+  bool AreBothBitcasts = IsFirstBitcast && IsSecondBitcast;
 
-  // Check if any of the bitcasts convert scalars<->vectors.
-  if ((isFirstBitcast  && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
-      (isSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
-    // Unless we are bitcasing to the original type, disallow optimizations.
-    if (!chainedBitcast) return 0;
+  // Check if any of the casts convert scalars <-> vectors.
+  if ((IsFirstBitcast  && isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+      (IsSecondBitcast && isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+    if (!AreBothBitcasts)
+      return 0;
 
   int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
                             [secondOp-Instruction::CastOpsBegin];
@@ -2966,9 +3282,8 @@ AddrSpaceCastInst::AddrSpaceCastInst(
 
 void CmpInst::anchor() {}
 
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const Twine &Name,
-                 Instruction *InsertBefore)
+CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
+                 Value *RHS, const Twine &Name, Instruction *InsertBefore)
   : Instruction(ty, op,
                 OperandTraits<CmpInst>::op_begin(this),
                 OperandTraits<CmpInst>::operands(this),
@@ -2979,9 +3294,8 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
   setName(Name);
 }
 
-CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
-                 Value *LHS, Value *RHS, const Twine &Name,
-                 BasicBlock *InsertAtEnd)
+CmpInst::CmpInst(Type *ty, OtherOps op, Predicate predicate, Value *LHS,
+                 Value *RHS, const Twine &Name, BasicBlock *InsertAtEnd)
   : Instruction(ty, op,
                 OperandTraits<CmpInst>::op_begin(this),
                 OperandTraits<CmpInst>::operands(this),
@@ -2993,8 +3307,7 @@ CmpInst::CmpInst(Type *ty, OtherOps op, unsigned short predicate,
 }
 
 CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate,
-                Value *S1, Value *S2, 
+CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
                 const Twine &Name, Instruction *InsertBefore) {
   if (Op == Instruction::ICmp) {
     if (InsertBefore)
@@ -3014,7 +3327,7 @@ CmpInst::Create(OtherOps Op, unsigned short predicate,
 }
 
 CmpInst *
-CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
+CmpInst::Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2,
                 const Twine &Name, BasicBlock *InsertAtEnd) {
   if (Op == Instruction::ICmp) {
     return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
@@ -3077,6 +3390,8 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
   }
 }
 
+void ICmpInst::anchor() {}
+
 ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
   switch (pred) {
     default: llvm_unreachable("Unknown icmp predicate!");
@@ -3196,7 +3511,24 @@ CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
   }
 }
 
-bool CmpInst::isUnsigned(unsigned short predicate) {
+CmpInst::Predicate CmpInst::getSignedPredicate(Predicate pred) {
+  assert(CmpInst::isUnsigned(pred) && "Call only with signed predicates!");
+
+  switch (pred) {
+  default:
+    llvm_unreachable("Unknown predicate!");
+  case CmpInst::ICMP_ULT:
+    return CmpInst::ICMP_SLT;
+  case CmpInst::ICMP_ULE:
+    return CmpInst::ICMP_SLE;
+  case CmpInst::ICMP_UGT:
+    return CmpInst::ICMP_SGT;
+  case CmpInst::ICMP_UGE:
+    return CmpInst::ICMP_SGE;
+  }
+}
+
+bool CmpInst::isUnsigned(Predicate predicate) {
   switch (predicate) {
     default: return false;
     case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: 
@@ -3204,7 +3536,7 @@ bool CmpInst::isUnsigned(unsigned short predicate) {
   }
 }
 
-bool CmpInst::isSigned(unsigned short predicate) {
+bool CmpInst::isSigned(Predicate predicate) {
   switch (predicate) {
     default: return false;
     case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
@@ -3212,7 +3544,7 @@ bool CmpInst::isSigned(unsigned short predicate) {
   }
 }
 
-bool CmpInst::isOrdered(unsigned short predicate) {
+bool CmpInst::isOrdered(Predicate predicate) {
   switch (predicate) {
     default: return false;
     case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: 
@@ -3221,7 +3553,7 @@ bool CmpInst::isOrdered(unsigned short predicate) {
   }
 }
       
-bool CmpInst::isUnordered(unsigned short predicate) {
+bool CmpInst::isUnordered(Predicate predicate) {
   switch (predicate) {
     default: return false;
     case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: 
@@ -3230,7 +3562,7 @@ bool CmpInst::isUnordered(unsigned short predicate) {
   }
 }
 
-bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+bool CmpInst::isTrueWhenEqual(Predicate predicate) {
   switch(predicate) {
     default: return false;
     case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
@@ -3238,7 +3570,7 @@ bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
   }
 }
 
-bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+bool CmpInst::isFalseWhenEqual(Predicate predicate) {
   switch(predicate) {
   case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
   case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
@@ -3569,6 +3901,10 @@ AddrSpaceCastInst *AddrSpaceCastInst::cloneImpl() const {
 }
 
 CallInst *CallInst::cloneImpl() const {
+  if (hasOperandBundles()) {
+    unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+    return new(getNumOperands(), DescriptorBytes) CallInst(*this);
+  }
   return  new(getNumOperands()) CallInst(*this);
 }
 
@@ -3613,11 +3949,31 @@ IndirectBrInst *IndirectBrInst::cloneImpl() const {
 }
 
 InvokeInst *InvokeInst::cloneImpl() const {
+  if (hasOperandBundles()) {
+    unsigned DescriptorBytes = getNumOperandBundles() * sizeof(BundleOpInfo);
+    return new(getNumOperands(), DescriptorBytes) InvokeInst(*this);
+  }
   return new(getNumOperands()) InvokeInst(*this);
 }
 
 ResumeInst *ResumeInst::cloneImpl() const { return new (1) ResumeInst(*this); }
 
+CleanupReturnInst *CleanupReturnInst::cloneImpl() const {
+  return new (getNumOperands()) CleanupReturnInst(*this);
+}
+
+CatchReturnInst *CatchReturnInst::cloneImpl() const {
+  return new (getNumOperands()) CatchReturnInst(*this);
+}
+
+CatchSwitchInst *CatchSwitchInst::cloneImpl() const {
+  return new CatchSwitchInst(*this);
+}
+
+FuncletPadInst *FuncletPadInst::cloneImpl() const {
+  return new (getNumOperands()) FuncletPadInst(*this);
+}
+
 UnreachableInst *UnreachableInst::cloneImpl() const {
   LLVMContext &Context = getContext();
   return new UnreachableInst(Context);
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 6d799e4b9650..8848bcb71477 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -104,6 +104,39 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
   assert(DereferenceableOrNullID == MD_dereferenceable_or_null && 
          "dereferenceable_or_null kind id drifted");
   (void)DereferenceableOrNullID;
+
+  // Create the 'make.implicit' metadata kind.
+  unsigned MakeImplicitID = getMDKindID("make.implicit");
+  assert(MakeImplicitID == MD_make_implicit &&
+         "make.implicit kind id drifted");
+  (void)MakeImplicitID;
+
+  // Create the 'unpredictable' metadata kind.
+  unsigned UnpredictableID = getMDKindID("unpredictable");
+  assert(UnpredictableID == MD_unpredictable &&
+         "unpredictable kind id drifted");
+  (void)UnpredictableID;
+
+  // Create the 'invariant.group' metadata kind.
+  unsigned InvariantGroupId = getMDKindID("invariant.group");
+  assert(InvariantGroupId == MD_invariant_group &&
+         "invariant.group kind id drifted");
+  (void)InvariantGroupId;
+
+  // Create the 'align' metadata kind.
+  unsigned AlignID = getMDKindID("align");
+  assert(AlignID == MD_align && "align kind id drifted");
+  (void)AlignID;
+
+  auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt");
+  assert(DeoptEntry->second == LLVMContext::OB_deopt &&
+         "deopt operand bundle id drifted!");
+  (void)DeoptEntry;
+
+  auto *FuncletEntry = pImpl->getOrInsertBundleTag("funclet");
+  assert(FuncletEntry->second == LLVMContext::OB_funclet &&
+         "funclet operand bundle id drifted!");
+  (void)FuncletEntry;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
@@ -193,6 +226,11 @@ static bool isDiagnosticEnabled(const DiagnosticInfo &DI) {
     if (!cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI).isEnabled())
       return false;
     break;
+  case llvm::DK_OptimizationRemarkAnalysisFPCommute:
+    if (!cast<DiagnosticInfoOptimizationRemarkAnalysisFPCommute>(DI)
+             .isEnabled())
+      return false;
+    break;
   default:
     break;
   }
@@ -250,7 +288,7 @@ unsigned LLVMContext::getMDKindID(StringRef Name) const {
       .first->second;
 }
 
-/// getHandlerNames - Populate client supplied smallvector using custome
+/// getHandlerNames - Populate client-supplied smallvector using custom
 /// metadata name and ID.
 void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
   Names.resize(pImpl->CustomMDKindNames.size());
@@ -258,3 +296,11 @@ void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
        E = pImpl->CustomMDKindNames.end(); I != E; ++I)
     Names[I->second] = I->first();
 }
+
+void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
+  pImpl->getOperandBundleTags(Tags);
+}
+
+uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
+  return pImpl->getOperandBundleTagID(Tag);
+}
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index 1e2080770fcd..5239b4f7d84a 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -27,6 +27,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
     FloatTy(C, Type::FloatTyID),
     DoubleTy(C, Type::DoubleTyID),
     MetadataTy(C, Type::MetadataTyID),
+    TokenTy(C, Type::TokenTyID),
     X86_FP80Ty(C, Type::X86_FP80TyID),
     FP128Ty(C, Type::FP128TyID),
     PPC_FP128Ty(C, Type::PPC_FP128TyID),
@@ -78,7 +79,7 @@ LLVMContextImpl::~LLVMContextImpl() {
   // unnecessary RAUW when nodes are still unresolved.
   for (auto *I : DistinctMDNodes)
     I->dropAllReferences();
-#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   for (auto *I : CLASS##s)                                                     \
     I->dropAllReferences();
 #include "llvm/IR/Metadata.def"
@@ -92,8 +93,8 @@ LLVMContextImpl::~LLVMContextImpl() {
   // Destroy MDNodes.
   for (MDNode *I : DistinctMDNodes)
     I->deleteAsSubclass();
-#define HANDLE_MDNODE_LEAF(CLASS)                                              \
-  for (CLASS *I : CLASS##s)                                                    \
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
+  for (CLASS * I : CLASS##s)                                                   \
     delete I;
 #include "llvm/IR/Metadata.def"
 
@@ -218,6 +219,23 @@ unsigned MDNodeOpsKey::calculateHash(ArrayRef<Metadata *> Ops) {
   return hash_combine_range(Ops.begin(), Ops.end());
 }
 
+StringMapEntry<uint32_t> *LLVMContextImpl::getOrInsertBundleTag(StringRef Tag) {
+  uint32_t NewIdx = BundleTagCache.size();
+  return &*(BundleTagCache.insert(std::make_pair(Tag, NewIdx)).first);
+}
+
+void LLVMContextImpl::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
+  Tags.resize(BundleTagCache.size());
+  for (const auto &T : BundleTagCache)
+    Tags[T.second] = T.first();
+}
+
+uint32_t LLVMContextImpl::getOperandBundleTagID(StringRef Tag) const {
+  auto I = BundleTagCache.find(Tag);
+  assert(I != BundleTagCache.end() && "Unknown tag!");
+  return I->second;
+}
+
 // ConstantsContext anchors
 void UnaryConstantExpr::anchor() { }
 
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index cbbf11e334c4..a24114d0a0ae 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -458,67 +458,6 @@ template <> struct MDNodeKeyImpl<DIFile> {
   unsigned getHashValue() const { return hash_combine(Filename, Directory); }
 };
 
-template <> struct MDNodeKeyImpl<DICompileUnit> {
-  unsigned SourceLanguage;
-  Metadata *File;
-  StringRef Producer;
-  bool IsOptimized;
-  StringRef Flags;
-  unsigned RuntimeVersion;
-  StringRef SplitDebugFilename;
-  unsigned EmissionKind;
-  Metadata *EnumTypes;
-  Metadata *RetainedTypes;
-  Metadata *Subprograms;
-  Metadata *GlobalVariables;
-  Metadata *ImportedEntities;
-  uint64_t DWOId;
-
-  MDNodeKeyImpl(unsigned SourceLanguage, Metadata *File, StringRef Producer,
-                bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
-                StringRef SplitDebugFilename, unsigned EmissionKind,
-                Metadata *EnumTypes, Metadata *RetainedTypes,
-                Metadata *Subprograms, Metadata *GlobalVariables,
-                Metadata *ImportedEntities, uint64_t DWOId)
-      : SourceLanguage(SourceLanguage), File(File), Producer(Producer),
-        IsOptimized(IsOptimized), Flags(Flags), RuntimeVersion(RuntimeVersion),
-        SplitDebugFilename(SplitDebugFilename), EmissionKind(EmissionKind),
-        EnumTypes(EnumTypes), RetainedTypes(RetainedTypes),
-        Subprograms(Subprograms), GlobalVariables(GlobalVariables),
-        ImportedEntities(ImportedEntities), DWOId(DWOId) {}
-  MDNodeKeyImpl(const DICompileUnit *N)
-      : SourceLanguage(N->getSourceLanguage()), File(N->getRawFile()),
-        Producer(N->getProducer()), IsOptimized(N->isOptimized()),
-        Flags(N->getFlags()), RuntimeVersion(N->getRuntimeVersion()),
-        SplitDebugFilename(N->getSplitDebugFilename()),
-        EmissionKind(N->getEmissionKind()), EnumTypes(N->getRawEnumTypes()),
-        RetainedTypes(N->getRawRetainedTypes()),
-        Subprograms(N->getRawSubprograms()),
-        GlobalVariables(N->getRawGlobalVariables()),
-        ImportedEntities(N->getRawImportedEntities()), DWOId(N->getDWOId()) {}
-
-  bool isKeyOf(const DICompileUnit *RHS) const {
-    return SourceLanguage == RHS->getSourceLanguage() &&
-           File == RHS->getRawFile() && Producer == RHS->getProducer() &&
-           IsOptimized == RHS->isOptimized() && Flags == RHS->getFlags() &&
-           RuntimeVersion == RHS->getRuntimeVersion() &&
-           SplitDebugFilename == RHS->getSplitDebugFilename() &&
-           EmissionKind == RHS->getEmissionKind() &&
-           EnumTypes == RHS->getRawEnumTypes() &&
-           RetainedTypes == RHS->getRawRetainedTypes() &&
-           Subprograms == RHS->getRawSubprograms() &&
-           GlobalVariables == RHS->getRawGlobalVariables() &&
-           ImportedEntities == RHS->getRawImportedEntities() &&
-           DWOId == RHS->getDWOId();
-  }
-  unsigned getHashValue() const {
-    return hash_combine(SourceLanguage, File, Producer, IsOptimized, Flags,
-                        RuntimeVersion, SplitDebugFilename, EmissionKind,
-                        EnumTypes, RetainedTypes, Subprograms, GlobalVariables,
-                        ImportedEntities, DWOId);
-  }
-};
-
 template <> struct MDNodeKeyImpl<DISubprogram> {
   Metadata *Scope;
   StringRef Name;
@@ -534,7 +473,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
   unsigned VirtualIndex;
   unsigned Flags;
   bool IsOptimized;
-  Metadata *Function;
   Metadata *TemplateParams;
   Metadata *Declaration;
   Metadata *Variables;
@@ -544,15 +482,15 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
                 bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
                 Metadata *ContainingType, unsigned Virtuality,
                 unsigned VirtualIndex, unsigned Flags, bool IsOptimized,
-                Metadata *Function, Metadata *TemplateParams,
-                Metadata *Declaration, Metadata *Variables)
+                Metadata *TemplateParams, Metadata *Declaration,
+                Metadata *Variables)
       : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
         Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
         IsDefinition(IsDefinition), ScopeLine(ScopeLine),
         ContainingType(ContainingType), Virtuality(Virtuality),
         VirtualIndex(VirtualIndex), Flags(Flags), IsOptimized(IsOptimized),
-        Function(Function), TemplateParams(TemplateParams),
-        Declaration(Declaration), Variables(Variables) {}
+        TemplateParams(TemplateParams), Declaration(Declaration),
+        Variables(Variables) {}
   MDNodeKeyImpl(const DISubprogram *N)
       : Scope(N->getRawScope()), Name(N->getName()),
         LinkageName(N->getLinkageName()), File(N->getRawFile()),
@@ -561,7 +499,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
         ScopeLine(N->getScopeLine()), ContainingType(N->getRawContainingType()),
         Virtuality(N->getVirtuality()), VirtualIndex(N->getVirtualIndex()),
         Flags(N->getFlags()), IsOptimized(N->isOptimized()),
-        Function(N->getRawFunction()),
         TemplateParams(N->getRawTemplateParams()),
         Declaration(N->getRawDeclaration()), Variables(N->getRawVariables()) {}
 
@@ -576,7 +513,6 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
            Virtuality == RHS->getVirtuality() &&
            VirtualIndex == RHS->getVirtualIndex() && Flags == RHS->getFlags() &&
            IsOptimized == RHS->isOptimized() &&
-           Function == RHS->getRawFunction() &&
            TemplateParams == RHS->getRawTemplateParams() &&
            Declaration == RHS->getRawDeclaration() &&
            Variables == RHS->getRawVariables();
@@ -584,7 +520,7 @@ template <> struct MDNodeKeyImpl<DISubprogram> {
   unsigned getHashValue() const {
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
                         IsLocalToUnit, IsDefinition, ScopeLine, ContainingType,
-                        Virtuality, VirtualIndex, Flags, IsOptimized, Function,
+                        Virtuality, VirtualIndex, Flags, IsOptimized,
                         TemplateParams, Declaration, Variables);
   }
 };
@@ -759,7 +695,6 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
 };
 
 template <> struct MDNodeKeyImpl<DILocalVariable> {
-  unsigned Tag;
   Metadata *Scope;
   StringRef Name;
   Metadata *File;
@@ -768,23 +703,23 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
   unsigned Arg;
   unsigned Flags;
 
-  MDNodeKeyImpl(unsigned Tag, Metadata *Scope, StringRef Name, Metadata *File,
-                unsigned Line, Metadata *Type, unsigned Arg, unsigned Flags)
-      : Tag(Tag), Scope(Scope), Name(Name), File(File), Line(Line), Type(Type),
-        Arg(Arg), Flags(Flags) {}
+  MDNodeKeyImpl(Metadata *Scope, StringRef Name, Metadata *File, unsigned Line,
+                Metadata *Type, unsigned Arg, unsigned Flags)
+      : Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg),
+        Flags(Flags) {}
   MDNodeKeyImpl(const DILocalVariable *N)
-      : Tag(N->getTag()), Scope(N->getRawScope()), Name(N->getName()),
-        File(N->getRawFile()), Line(N->getLine()), Type(N->getRawType()),
-        Arg(N->getArg()), Flags(N->getFlags()) {}
+      : Scope(N->getRawScope()), Name(N->getName()), File(N->getRawFile()),
+        Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()),
+        Flags(N->getFlags()) {}
 
   bool isKeyOf(const DILocalVariable *RHS) const {
-    return Tag == RHS->getTag() && Scope == RHS->getRawScope() &&
-           Name == RHS->getName() && File == RHS->getRawFile() &&
-           Line == RHS->getLine() && Type == RHS->getRawType() &&
-           Arg == RHS->getArg() && Flags == RHS->getFlags();
+    return Scope == RHS->getRawScope() && Name == RHS->getName() &&
+           File == RHS->getRawFile() && Line == RHS->getLine() &&
+           Type == RHS->getRawType() && Arg == RHS->getArg() &&
+           Flags == RHS->getFlags();
   }
   unsigned getHashValue() const {
-    return hash_combine(Tag, Scope, Name, File, Line, Type, Arg, Flags);
+    return hash_combine(Scope, Name, File, Line, Type, Arg, Flags);
   }
 };
 
@@ -857,6 +792,49 @@ template <> struct MDNodeKeyImpl<DIImportedEntity> {
   }
 };
 
+template <> struct MDNodeKeyImpl<DIMacro> {
+  unsigned MIType;
+  unsigned Line;
+  StringRef Name;
+  StringRef Value;
+
+  MDNodeKeyImpl(unsigned MIType, unsigned Line, StringRef Name, StringRef Value)
+      : MIType(MIType), Line(Line), Name(Name), Value(Value) {}
+  MDNodeKeyImpl(const DIMacro *N)
+      : MIType(N->getMacinfoType()), Line(N->getLine()), Name(N->getName()),
+        Value(N->getValue()) {}
+
+  bool isKeyOf(const DIMacro *RHS) const {
+    return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
+           Name == RHS->getName() && Value == RHS->getValue();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(MIType, Line, Name, Value);
+  }
+};
+
+template <> struct MDNodeKeyImpl<DIMacroFile> {
+  unsigned MIType;
+  unsigned Line;
+  Metadata *File;
+  Metadata *Elements;
+
+  MDNodeKeyImpl(unsigned MIType, unsigned Line, Metadata *File,
+                Metadata *Elements)
+      : MIType(MIType), Line(Line), File(File), Elements(Elements) {}
+  MDNodeKeyImpl(const DIMacroFile *N)
+      : MIType(N->getMacinfoType()), Line(N->getLine()), File(N->getRawFile()),
+        Elements(N->getRawElements()) {}
+
+  bool isKeyOf(const DIMacroFile *RHS) const {
+    return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
+           File == RHS->getRawFile() && File == RHS->getRawElements();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(MIType, Line, File, Elements);
+  }
+};
+
 /// \brief DenseMapInfo for MDNode subclasses.
 template <class NodeTy> struct MDNodeInfo {
   typedef MDNodeKeyImpl<NodeTy> KeyTy;
@@ -953,7 +931,8 @@ public:
 
   DenseMap<const Value*, ValueName*> ValueNames;
 
-#define HANDLE_MDNODE_LEAF(CLASS) DenseSet<CLASS *, CLASS##Info> CLASS##s;
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
+  DenseSet<CLASS *, CLASS##Info> CLASS##s;
 #include "llvm/IR/Metadata.def"
 
   // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
@@ -988,8 +967,10 @@ public:
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
 
+  std::unique_ptr<ConstantTokenNone> TheNoneToken;
+
   // Basic type instances.
-  Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy;
+  Type VoidTy, LabelTy, HalfTy, FloatTy, DoubleTy, MetadataTy, TokenTy;
   Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
   IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty;
 
@@ -1033,20 +1014,19 @@ public:
   /// instructions in different blocks at the same location.
   DenseMap<std::pair<const char *, unsigned>, unsigned> DiscriminatorTable;
 
-  /// \brief Mapping from a function to its prefix data, which is stored as the
-  /// operand of an unparented ReturnInst so that the prefix data has a Use.
-  typedef DenseMap<const Function *, ReturnInst *> PrefixDataMapTy;
-  PrefixDataMapTy PrefixDataMap;
-
-  /// \brief Mapping from a function to its prologue data, which is stored as
-  /// the operand of an unparented ReturnInst so that the prologue data has a
-  /// Use.
-  typedef DenseMap<const Function *, ReturnInst *> PrologueDataMapTy;
-  PrologueDataMapTy PrologueDataMap;
-
   int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
   int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
 
+  /// \brief A set of interned tags for operand bundles.  The StringMap maps
+  /// bundle tags to their IDs.
+  ///
+  /// \see LLVMContext::getOperandBundleTagID
+  StringMap<uint32_t> BundleTagCache;
+
+  StringMapEntry<uint32_t> *getOrInsertBundleTag(StringRef Tag);
+  void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
+  uint32_t getOperandBundleTagID(StringRef Tag) const;
+
   LLVMContextImpl(LLVMContext &C);
   ~LLVMContextImpl();
 
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 27d98a279fe2..f2e0c7d32c02 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -569,13 +569,33 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
 
 AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
   AnalysisUsage *AnUsage = nullptr;
-  DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+  auto DMI = AnUsageMap.find(P);
   if (DMI != AnUsageMap.end())
     AnUsage = DMI->second;
   else {
-    AnUsage = new AnalysisUsage();
-    P->getAnalysisUsage(*AnUsage);
-    AnUsageMap[P] = AnUsage;
+    // Look up the analysis usage from the pass instance (different instances
+    // of the same pass can produce different results), but unique the
+    // resulting object to reduce memory usage.  This helps to greatly reduce
+    // memory usage when we have many instances of only a few pass types
+    // (e.g. instcombine, simplifycfg, etc...) which tend to share a fixed set
+    // of dependencies.
+    AnalysisUsage AU;
+    P->getAnalysisUsage(AU);
+    
+    AUFoldingSetNode* Node = nullptr;
+    FoldingSetNodeID ID;
+    AUFoldingSetNode::Profile(ID, AU);
+    void *IP = nullptr;
+    if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP))
+      Node = N;
+    else {
+      Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU);
+      UniqueAnalysisUsages.InsertNode(Node, IP);
+    }
+    assert(Node && "cached analysis usage must be non null");
+
+    AnUsageMap[P] = &Node->AU;
+    AnUsage = &Node->AU;;
   }
   return AnUsage;
 }
@@ -686,6 +706,10 @@ void PMTopLevelManager::schedulePass(Pass *P) {
 /// passes and all pass managers. If desired pass is not found
 /// then return NULL.
 Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+  // For immutable passes we have a direct mapping from ID to pass, so check
+  // that first.
+  if (Pass *P = ImmutablePassMap.lookup(AID))
+    return P;
 
   // Check pass managers
   for (PMDataManager *PassManager : PassManagers)
@@ -697,24 +721,6 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
     if (Pass *P = IndirectPassManager->findAnalysisPass(AID, false))
       return P;
 
-  // Check the immutable passes. Iterate in reverse order so that we find
-  // the most recently registered passes first.
-  for (auto I = ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E;
-       ++I) {
-    AnalysisID PI = (*I)->getPassID();
-    if (PI == AID)
-      return *I;
-
-    // If Pass not found then check the interfaces implemented by Immutable Pass
-    const PassInfo *PassInf = findAnalysisPassInfo(PI);
-    assert(PassInf && "Expected all immutable passes to be initialized");
-    const std::vector<const PassInfo*> &ImmPI =
-      PassInf->getInterfacesImplemented();
-    for (const PassInfo *PI : ImmPI)
-      if (PI->getTypeInfo() == AID)
-        return *I;
-  }
-
   return nullptr;
 }
 
@@ -729,6 +735,24 @@ const PassInfo *PMTopLevelManager::findAnalysisPassInfo(AnalysisID AID) const {
   return PI;
 }
 
+void PMTopLevelManager::addImmutablePass(ImmutablePass *P) {
+  P->initializePass();
+  ImmutablePasses.push_back(P);
+
+  // Add this pass to the map from its analysis ID. We clobber any prior runs
+  // of the pass in the map so that the last one added is the one found when
+  // doing lookups.
+  AnalysisID AID = P->getPassID();
+  ImmutablePassMap[AID] = P;
+
+  // Also add any interfaces implemented by the immutable pass to the map for
+  // fast lookup.
+  const PassInfo *PassInf = findAnalysisPassInfo(AID);
+  assert(PassInf && "Expected all immutable passes to be initialized");
+  for (const PassInfo *ImmPI : PassInf->getInterfacesImplemented())
+    ImmutablePassMap[ImmPI->getTypeInfo()] = P;
+}
+
 // Print passes managed by this top level manager.
 void PMTopLevelManager::dumpPasses() const {
 
@@ -780,15 +804,8 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
 
   for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
         DME = LastUser.end(); DMI != DME; ++DMI) {
-    DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
-      InversedLastUser.find(DMI->second);
-    if (InvDMI != InversedLastUser.end()) {
-      SmallPtrSet<Pass *, 8> &L = InvDMI->second;
-      L.insert(DMI->first);
-    } else {
-      SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
-      InversedLastUser[DMI->second] = L;
-    }
+    SmallPtrSet<Pass *, 8> &L = InversedLastUser[DMI->second];
+    L.insert(DMI->first);
   }
 }
 
@@ -801,10 +818,6 @@ PMTopLevelManager::~PMTopLevelManager() {
   for (SmallVectorImpl<ImmutablePass *>::iterator
          I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
     delete *I;
-
-  for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
-         DME = AnUsageMap.end(); DMI != DME; ++DMI)
-    delete DMI->second;
 }
 
 //===----------------------------------------------------------------------===//
@@ -989,31 +1002,28 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
 
   // At the moment, this pass is the last user of all required passes.
   SmallVector<Pass *, 12> LastUses;
-  SmallVector<Pass *, 8> RequiredPasses;
+  SmallVector<Pass *, 8> UsedPasses;
   SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
 
   unsigned PDepth = this->getDepth();
 
-  collectRequiredAnalysis(RequiredPasses,
-                          ReqAnalysisNotAvailable, P);
-  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
-         E = RequiredPasses.end(); I != E; ++I) {
-    Pass *PRequired = *I;
+  collectRequiredAndUsedAnalyses(UsedPasses, ReqAnalysisNotAvailable, P);
+  for (Pass *PUsed : UsedPasses) {
     unsigned RDepth = 0;
 
-    assert(PRequired->getResolver() && "Analysis Resolver is not set");
-    PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+    assert(PUsed->getResolver() && "Analysis Resolver is not set");
+    PMDataManager &DM = PUsed->getResolver()->getPMDataManager();
     RDepth = DM.getDepth();
 
     if (PDepth == RDepth)
-      LastUses.push_back(PRequired);
+      LastUses.push_back(PUsed);
     else if (PDepth > RDepth) {
       // Let the parent claim responsibility of last use
-      TransferLastUses.push_back(PRequired);
+      TransferLastUses.push_back(PUsed);
       // Keep track of higher level analysis used by this manager.
-      HigherLevelAnalysis.push_back(PRequired);
+      HigherLevelAnalysis.push_back(PUsed);
     } else
-      llvm_unreachable("Unable to accommodate Required Pass");
+      llvm_unreachable("Unable to accommodate Used Pass");
   }
 
   // Set P as P's last user until someone starts using P.
@@ -1030,10 +1040,8 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
   }
 
   // Now, take care of required analyses that are not available.
-  for (SmallVectorImpl<AnalysisID>::iterator
-         I = ReqAnalysisNotAvailable.begin(),
-         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
-    const PassInfo *PI = TPM->findAnalysisPassInfo(*I);
+  for (AnalysisID ID : ReqAnalysisNotAvailable) {
+    const PassInfo *PI = TPM->findAnalysisPassInfo(ID);
     Pass *AnalysisPass = PI->createPass();
     this->addLowerLevelRequiredPass(P, AnalysisPass);
   }
@@ -1048,30 +1056,29 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
 }
 
 
-/// Populate RP with analysis pass that are required by
+/// Populate UP with analysis pass that are used or required by
 /// pass P and are available. Populate RP_NotAvail with analysis
 /// pass that are required by pass P but are not available.
-void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
-                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
-                                            Pass *P) {
+void PMDataManager::collectRequiredAndUsedAnalyses(
+    SmallVectorImpl<Pass *> &UP, SmallVectorImpl<AnalysisID> &RP_NotAvail,
+    Pass *P) {
   AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
-  const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
-  for (AnalysisUsage::VectorType::const_iterator
-         I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
-    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
-      RP.push_back(AnalysisPass);
-    else
-      RP_NotAvail.push_back(*I);
-  }
 
-  const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
-  for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
-         E = IDs.end(); I != E; ++I) {
-    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
-      RP.push_back(AnalysisPass);
+  for (const auto &UsedID : AnUsage->getUsedSet())
+    if (Pass *AnalysisPass = findAnalysisPass(UsedID, true))
+      UP.push_back(AnalysisPass);
+
+  for (const auto &RequiredID : AnUsage->getRequiredSet())
+    if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true))
+      UP.push_back(AnalysisPass);
     else
-      RP_NotAvail.push_back(*I);
-  }
+      RP_NotAvail.push_back(RequiredID);
+
+  for (const auto &RequiredID : AnUsage->getRequiredTransitiveSet())
+    if (Pass *AnalysisPass = findAnalysisPass(RequiredID, true))
+      UP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(RequiredID);
 }
 
 // All Required analyses should be available to the pass as it runs!  Here
@@ -1206,6 +1213,15 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const {
   dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
 }
 
+void PMDataManager::dumpUsedSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Used", P, analysisUsage.getUsedSet());
+}
+
 void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
                                    const AnalysisUsage::VectorType &Set) const {
   assert(PassDebugging >= Details);
@@ -1310,6 +1326,7 @@ bool BBPassManager::runOnFunction(Function &F) {
         dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
                      I->getName());
       dumpPreservedSet(BP);
+      dumpUsedSet(BP);
 
       verifyPreservedAnalysis(BP);
       removeNotPreservedAnalysis(BP);
@@ -1524,6 +1541,7 @@ bool FPPassManager::runOnFunction(Function &F) {
     if (LocalChanged)
       dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
     dumpPreservedSet(FP);
+    dumpUsedSet(FP);
 
     verifyPreservedAnalysis(FP);
     removeNotPreservedAnalysis(FP);
@@ -1601,6 +1619,7 @@ MPPassManager::runOnModule(Module &M) {
       dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
                    M.getModuleIdentifier());
     dumpPreservedSet(MP);
+    dumpUsedSet(MP);
 
     verifyPreservedAnalysis(MP);
     removeNotPreservedAnalysis(MP);
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
index b4c5ca7c6a12..4ce3ea2e9c04 100644
--- a/lib/IR/MDBuilder.cpp
+++ b/lib/IR/MDBuilder.cpp
@@ -36,8 +36,7 @@ MDNode *MDBuilder::createFPMath(float Accuracy) {
 
 MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight,
                                        uint32_t FalseWeight) {
-  uint32_t Weights[] = {TrueWeight, FalseWeight};
-  return createBranchWeights(Weights);
+  return createBranchWeights({TrueWeight, FalseWeight});
 }
 
 MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
@@ -53,14 +52,15 @@ MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
   return MDNode::get(Context, Vals);
 }
 
+MDNode *MDBuilder::createUnpredictable() {
+  return MDNode::get(Context, None);
+}
+
 MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
-  SmallVector<Metadata *, 2> Vals(2);
-  Vals[0] = createString("function_entry_count");
-
   Type *Int64Ty = Type::getInt64Ty(Context);
-  Vals[1] = createConstant(ConstantInt::get(Int64Ty, Count));
-
-  return MDNode::get(Context, Vals);
+  return MDNode::get(Context,
+                     {createString("function_entry_count"),
+                      createConstant(ConstantInt::get(Int64Ty, Count))});
 }
 
 MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
@@ -76,8 +76,7 @@ MDNode *MDBuilder::createRange(Constant *Lo, Constant *Hi) {
     return nullptr;
 
   // Return the range [Lo, Hi).
-  Metadata *Range[2] = {createConstant(Lo), createConstant(Hi)};
-  return MDNode::get(Context, Range);
+  return MDNode::get(Context, {createConstant(Lo), createConstant(Hi)});
 }
 
 MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) {
@@ -112,12 +111,10 @@ MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent,
                                   bool isConstant) {
   if (isConstant) {
     Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1);
-    Metadata *Ops[3] = {createString(Name), Parent, createConstant(Flags)};
-    return MDNode::get(Context, Ops);
-  } else {
-    Metadata *Ops[2] = {createString(Name), Parent};
-    return MDNode::get(Context, Ops);
+    return MDNode::get(Context,
+                       {createString(Name), Parent, createConstant(Flags)});
   }
+  return MDNode::get(Context, {createString(Name), Parent});
 }
 
 MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) {
@@ -125,8 +122,7 @@ MDNode *MDBuilder::createAliasScopeDomain(StringRef Name) {
 }
 
 MDNode *MDBuilder::createAliasScope(StringRef Name, MDNode *Domain) {
-  Metadata *Ops[2] = {createString(Name), Domain};
-  return MDNode::get(Context, Ops);
+  return MDNode::get(Context, {createString(Name), Domain});
 }
 
 /// \brief Return metadata for a tbaa.struct node with the given
@@ -161,23 +157,19 @@ MDNode *MDBuilder::createTBAAStructTypeNode(
 MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
                                             uint64_t Offset) {
   ConstantInt *Off = ConstantInt::get(Type::getInt64Ty(Context), Offset);
-  Metadata *Ops[3] = {createString(Name), Parent, createConstant(Off)};
-  return MDNode::get(Context, Ops);
+  return MDNode::get(Context,
+                     {createString(Name), Parent, createConstant(Off)});
 }
 
 /// \brief Return metadata for a TBAA tag node with the given
 /// base type, access type and offset relative to the base type.
 MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
                                            uint64_t Offset, bool IsConstant) {
-  Type *Int64 = Type::getInt64Ty(Context);
+  IntegerType *Int64 = Type::getInt64Ty(Context);
+  ConstantInt *Off = ConstantInt::get(Int64, Offset);
   if (IsConstant) {
-    Metadata *Ops[4] = {BaseType, AccessType,
-                        createConstant(ConstantInt::get(Int64, Offset)),
-                        createConstant(ConstantInt::get(Int64, 1))};
-    return MDNode::get(Context, Ops);
-  } else {
-    Metadata *Ops[3] = {BaseType, AccessType,
-                        createConstant(ConstantInt::get(Int64, Offset))};
-    return MDNode::get(Context, Ops);
+    return MDNode::get(Context, {BaseType, AccessType, createConstant(Off),
+                                 createConstant(ConstantInt::get(Int64, 1))});
   }
+  return MDNode::get(Context, {BaseType, AccessType, createConstant(Off)});
 }
diff --git a/lib/IR/Makefile b/lib/IR/Makefile
index cc403f38dd8e..329cd6636e94 100644
--- a/lib/IR/Makefile
+++ b/lib/IR/Makefile
@@ -10,14 +10,20 @@ LEVEL = ../..
 LIBRARYNAME = LLVMCore
 BUILD_ARCHIVE = 1
 
-BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen
+BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen \
+		 $(PROJ_OBJ_ROOT)/include/llvm/IR/Attributes.inc \
+		 $(PROJ_OBJ_ROOT)/lib/IR/AttributesCompatFunc.inc
 
 include $(LEVEL)/Makefile.common
 
 GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/IR/Intrinsics.gen
+ATTRINCFILE:=$(PROJ_OBJ_ROOT)/include/llvm/IR/Attributes.inc
+ATTRCOMPATFUNCINCFILE:=$(PROJ_OBJ_ROOT)/lib/IR/AttributesCompatFunc.inc
 
 INTRINSICTD  := $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics.td
 INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/IR/Intrinsics*.td)
+ATTRIBUTESTD  := $(PROJ_SRC_ROOT)/include/llvm/IR/Attributes.td
+ATTRCOMPATFUNCTD  := $(PROJ_SRC_ROOT)/lib/IR/AttributesCompatFunc.td
 
 $(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(LLVM_TBLGEN)
 	$(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
@@ -28,6 +34,28 @@ $(GENFILE): $(ObjDir)/Intrinsics.gen.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir
 	  $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \
 	    changed significantly. )
 
+$(ObjDir)/Attributes.inc.tmp: $(ObjDir)/.dir $(ATTRIBUTESTD) $(LLVM_TBLGEN)
+	$(Echo) Building Attributes.inc.tmp from $(ATTRIBUTESTD)
+	$(Verb) $(LLVMTableGen) $(call SYSPATH, $(ATTRIBUTESTD)) -o $(call SYSPATH, $@) -gen-attrs
+
+$(ATTRINCFILE): $(ObjDir)/Attributes.inc.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir
+	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+	  $(EchoCmd) Updated Attributes.inc because Attributes.inc.tmp \
+	    changed significantly. )
+
+$(ObjDir)/AttributesCompatFunc.inc.tmp: $(ObjDir)/.dir $(ATTRCOMPATFUNCTD) $(LLVM_TBLGEN)
+	$(Echo) Building AttributesCompatFunc.inc.tmp from $(ATTRCOMPATFUNCTD)
+	$(Verb) $(LLVMTableGen) $(call SYSPATH, $(ATTRCOMPATFUNCTD)) -o $(call SYSPATH, $@) -gen-attrs
+
+$(ATTRCOMPATFUNCINCFILE): $(ObjDir)/AttributesCompatFunc.inc.tmp $(PROJ_OBJ_ROOT)/include/llvm/IR/.dir
+	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+	  $(EchoCmd) Updated AttributesCompatFunc.inc because AttributesCompatFunc.inc.tmp \
+	    changed significantly. )
+
 install-local:: $(GENFILE)
 	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen
 	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/IR/Intrinsics.gen
+
+install-local:: $(ATTRINCFILE)
+	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/IR/Attributes.inc
+	$(Verb) $(DataInstall) $(ATTRINCFILE) $(DESTDIR)$(PROJ_includedir)/llvm/IR/Attributes.inc
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index 1abcf0d18c91..ab1ba5e2035b 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -120,6 +120,38 @@ void MetadataAsValue::untrack() {
     MetadataTracking::untrack(MD);
 }
 
+bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) {
+  assert(Ref && "Expected live reference");
+  assert((Owner || *static_cast<Metadata **>(Ref) == &MD) &&
+         "Reference without owner must be direct");
+  if (auto *R = ReplaceableMetadataImpl::get(MD)) {
+    R->addRef(Ref, Owner);
+    return true;
+  }
+  return false;
+}
+
+void MetadataTracking::untrack(void *Ref, Metadata &MD) {
+  assert(Ref && "Expected live reference");
+  if (auto *R = ReplaceableMetadataImpl::get(MD))
+    R->dropRef(Ref);
+}
+
+bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) {
+  assert(Ref && "Expected live reference");
+  assert(New && "Expected live reference");
+  assert(Ref != New && "Expected change");
+  if (auto *R = ReplaceableMetadataImpl::get(MD)) {
+    R->moveRef(Ref, New, MD);
+    return true;
+  }
+  return false;
+}
+
+bool MetadataTracking::isReplaceable(const Metadata &MD) {
+  return ReplaceableMetadataImpl::get(const_cast<Metadata &>(MD));
+}
+
 void ReplaceableMetadataImpl::addRef(void *Ref, OwnerTy Owner) {
   bool WasInserted =
       UseMap.insert(std::make_pair(Ref, std::make_pair(Owner, NextIndex)))
@@ -239,6 +271,12 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
   }
 }
 
+ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) {
+  if (auto *N = dyn_cast<MDNode>(&MD))
+    return N->Context.getReplaceableUses();
+  return dyn_cast<ValueAsMetadata>(&MD);
+}
+
 static Function *getLocalFunction(Value *V) {
   assert(V && "Expected value");
   if (auto *A = dyn_cast<Argument>(V))
@@ -517,7 +555,7 @@ void MDNode::decrementUnresolvedOperandCount() {
     resolve();
 }
 
-void MDNode::resolveCycles() {
+void MDNode::resolveCycles(bool MDMaterialized) {
   if (isResolved())
     return;
 
@@ -530,6 +568,8 @@ void MDNode::resolveCycles() {
     if (!N)
       continue;
 
+    if (N->isTemporary() && !MDMaterialized)
+      continue;
     assert(!N->isTemporary() &&
            "Expected all forward declarations to be resolved");
     if (!N->isResolved())
@@ -545,6 +585,18 @@ static bool hasSelfReference(MDNode *N) {
 }
 
 MDNode *MDNode::replaceWithPermanentImpl() {
+  switch (getMetadataID()) {
+  default:
+    // If this type isn't uniquable, replace with a distinct node.
+    return replaceWithDistinctImpl();
+
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
+  case CLASS##Kind:                                                            \
+    break;
+#include "llvm/IR/Metadata.def"
+  }
+
+  // Even if this type is uniquable, self-references have to be distinct.
   if (hasSelfReference(this))
     return replaceWithDistinctImpl();
   return replaceWithUniquedImpl();
@@ -671,8 +723,8 @@ MDNode *MDNode::uniquify() {
   // Try to insert into uniquing store.
   switch (getMetadataID()) {
   default:
-    llvm_unreachable("Invalid subclass of MDNode");
-#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+    llvm_unreachable("Invalid or non-uniquable subclass of MDNode");
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   case CLASS##Kind: {                                                          \
     CLASS *SubclassThis = cast<CLASS>(this);                                   \
     std::integral_constant<bool, HasCachedHash<CLASS>::value>                  \
@@ -687,8 +739,8 @@ MDNode *MDNode::uniquify() {
 void MDNode::eraseFromStore() {
   switch (getMetadataID()) {
   default:
-    llvm_unreachable("Invalid subclass of MDNode");
-#define HANDLE_MDNODE_LEAF(CLASS)                                              \
+    llvm_unreachable("Invalid or non-uniquable subclass of MDNode");
+#define HANDLE_MDNODE_LEAF_UNIQUABLE(CLASS)                                    \
   case CLASS##Kind:                                                            \
     getContext().pImpl->CLASS##s.erase(cast<CLASS>(this));                     \
     break;
@@ -941,6 +993,17 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
   return MDNode::get(A->getContext(), MDs);
 }
 
+MDNode *MDNode::getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B) {
+  if (!A || !B)
+    return nullptr;
+
+  ConstantInt *AVal = mdconst::extract<ConstantInt>(A->getOperand(0));
+  ConstantInt *BVal = mdconst::extract<ConstantInt>(B->getOperand(0));
+  if (AVal->getZExtValue() < BVal->getZExtValue())
+    return A;
+  return B;
+}
+
 //===----------------------------------------------------------------------===//
 // NamedMDNode implementation.
 //
@@ -1045,14 +1108,10 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
   return getMetadataImpl(getContext().getMDKindID(Kind));
 }
 
-void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
+void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
   SmallSet<unsigned, 5> KnownSet;
   KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
 
-  // Drop debug if needed
-  if (KnownSet.erase(LLVMContext::MD_dbg))
-    DbgLoc = DebugLoc();
-
   if (!hasMetadataHashEntry())
     return; // Nothing to remove!
 
@@ -1077,7 +1136,7 @@ void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
   }
 }
 
-/// setMetadata - Set the metadata of of the specified kind to the specified
+/// setMetadata - Set the metadata of the specified kind to the specified
 /// node.  This updates/replaces metadata if already present, or removes it if
 /// Node is null.
 void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
@@ -1251,3 +1310,11 @@ void Function::clearMetadata() {
   getContext().pImpl->FunctionMetadata.erase(this);
   setHasMetadataHashEntry(false);
 }
+
+void Function::setSubprogram(DISubprogram *SP) {
+  setMetadata(LLVMContext::MD_dbg, SP);
+}
+
+DISubprogram *Function::getSubprogram() const {
+  return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
+}
diff --git a/lib/IR/MetadataImpl.h b/lib/IR/MetadataImpl.h
index 662a50eb1bdc..b9137460bd20 100644
--- a/lib/IR/MetadataImpl.h
+++ b/lib/IR/MetadataImpl.h
@@ -26,6 +26,19 @@ static T *getUniqued(DenseSet<T *, InfoT> &Store,
   return I == Store.end() ? nullptr : *I;
 }
 
+template <class T> T *MDNode::storeImpl(T *N, StorageType Storage) {
+  switch (Storage) {
+  case Uniqued:
+    llvm_unreachable("Cannot unique without a uniquing-store");
+  case Distinct:
+    N->storeDistinctInContext();
+    break;
+  case Temporary:
+    break;
+  }
+  return N;
+}
+
 template <class T, class StoreT>
 T *MDNode::storeImpl(T *N, StorageType Storage, StoreT &Store) {
   switch (Storage) {
diff --git a/lib/IR/MetadataTracking.cpp b/lib/IR/MetadataTracking.cpp
deleted file mode 100644
index 47f0b9366d7d..000000000000
--- a/lib/IR/MetadataTracking.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- MetadataTracking.cpp - Implement metadata tracking -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements Metadata tracking.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/IR/MetadataTracking.h"
-#include "llvm/IR/Metadata.h"
-
-using namespace llvm;
-
-ReplaceableMetadataImpl *ReplaceableMetadataImpl::get(Metadata &MD) {
-  if (auto *N = dyn_cast<MDNode>(&MD))
-    return N->Context.getReplaceableUses();
-  return dyn_cast<ValueAsMetadata>(&MD);
-}
-
-bool MetadataTracking::track(void *Ref, Metadata &MD, OwnerTy Owner) {
-  assert(Ref && "Expected live reference");
-  assert((Owner || *static_cast<Metadata **>(Ref) == &MD) &&
-         "Reference without owner must be direct");
-  if (auto *R = ReplaceableMetadataImpl::get(MD)) {
-    R->addRef(Ref, Owner);
-    return true;
-  }
-  return false;
-}
-
-void MetadataTracking::untrack(void *Ref, Metadata &MD) {
-  assert(Ref && "Expected live reference");
-  if (auto *R = ReplaceableMetadataImpl::get(MD))
-    R->dropRef(Ref);
-}
-
-bool MetadataTracking::retrack(void *Ref, Metadata &MD, void *New) {
-  assert(Ref && "Expected live reference");
-  assert(New && "Expected live reference");
-  assert(Ref != New && "Expected change");
-  if (auto *R = ReplaceableMetadataImpl::get(MD)) {
-    R->moveRef(Ref, New, MD);
-    return true;
-  }
-  return false;
-}
-
-bool MetadataTracking::isReplaceable(const Metadata &MD) {
-  return ReplaceableMetadataImpl::get(const_cast<Metadata &>(MD));
-}
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 043f74e12da3..ac578d6dba0f 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -29,6 +29,7 @@
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
+
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -37,9 +38,9 @@ using namespace llvm;
 
 // Explicit instantiations of SymbolTableListTraits since some of the methods
 // are not in the public header file.
-template class llvm::SymbolTableListTraits<Function, Module>;
-template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
-template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+template class llvm::SymbolTableListTraits<Function>;
+template class llvm::SymbolTableListTraits<GlobalVariable>;
+template class llvm::SymbolTableListTraits<GlobalAlias>;
 
 //===----------------------------------------------------------------------===//
 // Primitive Module methods.
@@ -81,7 +82,6 @@ RandomNumberGenerator *Module::createRNG(const Pass* P) const {
   return new RandomNumberGenerator(Salt);
 }
 
-
 /// getNamedValue - Return the first global value in the module with
 /// the specified name, of arbitrary type.  This method returns null
 /// if a global with the specified name is not found.
@@ -102,6 +102,9 @@ void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
   return Context.getMDKindNames(Result);
 }
 
+void Module::getOperandBundleTags(SmallVectorImpl<StringRef> &Result) const {
+  return Context.getOperandBundleTags(Result);
+}
 
 //===----------------------------------------------------------------------===//
 // Methods for easy access to the functions in the module.
@@ -274,7 +277,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
 /// delete it.
 void Module::eraseNamedMetadata(NamedMDNode *NMD) {
   static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
-  NamedMDList.erase(NMD);
+  NamedMDList.erase(NMD->getIterator());
 }
 
 bool Module::isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB) {
@@ -376,17 +379,11 @@ const DataLayout &Module::getDataLayout() const { return DL; }
 //
 void Module::setMaterializer(GVMaterializer *GVM) {
   assert(!Materializer &&
-         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
+         "Module already has a GVMaterializer.  Call materializeAll"
          " to clear it out before setting another one.");
   Materializer.reset(GVM);
 }
 
-bool Module::isDematerializable(const GlobalValue *GV) const {
-  if (Materializer)
-    return Materializer->isDematerializable(GV);
-  return false;
-}
-
 std::error_code Module::materialize(GlobalValue *GV) {
   if (!Materializer)
     return std::error_code();
@@ -394,23 +391,11 @@ std::error_code Module::materialize(GlobalValue *GV) {
   return Materializer->materialize(GV);
 }
 
-void Module::dematerialize(GlobalValue *GV) {
-  if (Materializer)
-    return Materializer->dematerialize(GV);
-}
-
 std::error_code Module::materializeAll() {
   if (!Materializer)
     return std::error_code();
-  return Materializer->materializeModule(this);
-}
-
-std::error_code Module::materializeAllPermanently() {
-  if (std::error_code EC = materializeAll())
-    return EC;
-
-  Materializer.reset();
-  return std::error_code();
+  std::unique_ptr<GVMaterializer> M = std::move(Materializer);
+  return M->materializeModule();
 }
 
 std::error_code Module::materializeMetadata() {
@@ -458,7 +443,14 @@ void Module::dropAllReferences() {
 unsigned Module::getDwarfVersion() const {
   auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("Dwarf Version"));
   if (!Val)
-    return dwarf::DWARF_VERSION;
+    return 0;
+  return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
+
+unsigned Module::getCodeViewFlag() const {
+  auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("CodeView"));
+  if (!Val)
+    return 0;
   return cast<ConstantInt>(Val->getValue())->getZExtValue();
 }
 
@@ -471,7 +463,7 @@ Comdat *Module::getOrInsertComdat(StringRef Name) {
 PICLevel::Level Module::getPICLevel() const {
   auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("PIC Level"));
 
-  if (Val == NULL)
+  if (!Val)
     return PICLevel::Default;
 
   return static_cast<PICLevel::Level>(
@@ -481,3 +473,15 @@ PICLevel::Level Module::getPICLevel() const {
 void Module::setPICLevel(PICLevel::Level PL) {
   addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL);
 }
+
+void Module::setMaximumFunctionCount(uint64_t Count) {
+  addModuleFlag(ModFlagBehavior::Error, "MaxFunctionCount", Count);
+}
+
+Optional<uint64_t> Module::getMaximumFunctionCount() {
+  auto *Val =
+      cast_or_null<ConstantAsMetadata>(getModuleFlag("MaxFunctionCount"));
+  if (!Val)
+    return None;
+  return cast<ConstantInt>(Val->getValue())->getZExtValue();
+}
diff --git a/lib/IR/Statepoint.cpp b/lib/IR/Statepoint.cpp
index 83ee611cc375..d45c1883ef9e 100644
--- a/lib/IR/Statepoint.cpp
+++ b/lib/IR/Statepoint.cpp
@@ -67,10 +67,7 @@ bool llvm::isGCResult(const ImmutableCallSite &CS) {
 bool llvm::isGCResult(const Value *inst) {
   if (const CallInst *call = dyn_cast<CallInst>(inst)) {
     if (Function *F = call->getCalledFunction()) {
-      return (F->getIntrinsicID() == Intrinsic::experimental_gc_result_int ||
-              F->getIntrinsicID() == Intrinsic::experimental_gc_result_float ||
-              F->getIntrinsicID() == Intrinsic::experimental_gc_result_ptr ||
-              F->getIntrinsicID() == Intrinsic::experimental_gc_result);
+      return F->getIntrinsicID() == Intrinsic::experimental_gc_result;
     }
   }
   return false;
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index a18f98261abc..50573d8d688a 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -24,77 +24,73 @@ namespace llvm {
 /// setSymTabObject - This is called when (f.e.) the parent of a basic block
 /// changes.  This requires us to remove all the instruction symtab entries from
 /// the current function and reinsert them into the new function.
-template<typename ValueSubClass, typename ItemParentClass>
-template<typename TPtr>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::setSymTabObject(TPtr *Dest, TPtr Src) {
+template <typename ValueSubClass>
+template <typename TPtr>
+void SymbolTableListTraits<ValueSubClass>::setSymTabObject(TPtr *Dest,
+                                                           TPtr Src) {
   // Get the old symtab and value list before doing the assignment.
-  ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+  ValueSymbolTable *OldST = getSymTab(getListOwner());
 
   // Do it.
   *Dest = Src;
   
   // Get the new SymTab object.
-  ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+  ValueSymbolTable *NewST = getSymTab(getListOwner());
   
   // If there is nothing to do, quick exit.
   if (OldST == NewST) return;
   
   // Move all the elements from the old symtab to the new one.
-  iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+  ListTy &ItemList = getList(getListOwner());
   if (ItemList.empty()) return;
   
   if (OldST) {
     // Remove all entries from the previous symtab.
-    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
-         I != ItemList.end(); ++I)
+    for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
       if (I->hasName())
         OldST->removeValueName(I->getValueName());
   }
 
   if (NewST) {
     // Add all of the items to the new symtab.
-    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
-         I != ItemList.end(); ++I)
+    for (auto I = ItemList.begin(); I != ItemList.end(); ++I)
       if (I->hasName())
-        NewST->reinsertValue(I);
+        NewST->reinsertValue(&*I);
   }
   
 }
 
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::addNodeToList(ValueSubClass *V) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::addNodeToList(ValueSubClass *V) {
   assert(!V->getParent() && "Value already in a container!!");
   ItemParentClass *Owner = getListOwner();
   V->setParent(Owner);
   if (V->hasName())
-    if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+    if (ValueSymbolTable *ST = getSymTab(Owner))
       ST->reinsertValue(V);
 }
 
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::removeNodeFromList(ValueSubClass *V) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::removeNodeFromList(
+    ValueSubClass *V) {
   V->setParent(nullptr);
   if (V->hasName())
-    if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+    if (ValueSymbolTable *ST = getSymTab(getListOwner()))
       ST->removeValueName(V->getValueName());
 }
 
-template<typename ValueSubClass, typename ItemParentClass>
-void SymbolTableListTraits<ValueSubClass,ItemParentClass>
-::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
-                        ilist_iterator<ValueSubClass> first,
-                        ilist_iterator<ValueSubClass> last) {
+template <typename ValueSubClass>
+void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
+    SymbolTableListTraits &L2, ilist_iterator<ValueSubClass> first,
+    ilist_iterator<ValueSubClass> last) {
   // We only have to do work here if transferring instructions between BBs
   ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
   if (NewIP == OldIP) return;  // No work to do at all...
 
   // We only have to update symbol table entries if we are transferring the
   // instructions to a different symtab object...
-  ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
-  ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+  ValueSymbolTable *NewST = getSymTab(NewIP);
+  ValueSymbolTable *OldST = getSymTab(OldIP);
   if (NewST != OldST) {
     for (; first != last; ++first) {
       ValueSubClass &V = *first;
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index a9ca80034ca7..4c1baf52a58f 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -35,6 +35,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   case LabelTyID     : return getLabelTy(C);
   case MetadataTyID  : return getMetadataTy(C);
   case X86_MMXTyID   : return getX86_MMXTy(C);
+  case TokenTyID     : return getTokenTy(C);
   default:
     return nullptr;
   }
@@ -42,16 +43,10 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
 
 /// getScalarType - If this is a vector type, return the element type,
 /// otherwise return this.
-Type *Type::getScalarType() {
-  if (VectorType *VTy = dyn_cast<VectorType>(this))
+Type *Type::getScalarType() const {
+  if (auto *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType();
-  return this;
-}
-
-const Type *Type::getScalarType() const {
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
-    return VTy->getElementType();
-  return this;
+  return const_cast<Type*>(this);
 }
 
 /// isIntegerTy - Return true if this is an IntegerType of the specified width.
@@ -74,8 +69,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
   // Vector -> Vector conversions are always lossless if the two vector types
   // have the same size, otherwise not.  Also, 64-bit vector types can be
   // converted to x86mmx.
-  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
-    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+  if (auto *thisPTy = dyn_cast<VectorType>(this)) {
+    if (auto *thatPTy = dyn_cast<VectorType>(Ty))
       return thisPTy->getBitWidth() == thatPTy->getBitWidth();
     if (Ty->getTypeID() == Type::X86_MMXTyID &&
         thisPTy->getBitWidth() == 64)
@@ -83,7 +78,7 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
   }
 
   if (this->getTypeID() == Type::X86_MMXTyID)
-    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+    if (auto *thatPTy = dyn_cast<VectorType>(Ty))
       if (thatPTy->getBitWidth() == 64)
         return true;
 
@@ -91,8 +86,8 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
   // remaining and ptr->ptr. Just select the lossless conversions. Everything
   // else is not lossless. Conservatively assume we can't losslessly convert
   // between pointers with different address spaces.
-  if (const PointerType *PTy = dyn_cast<PointerType>(this)) {
-    if (const PointerType *OtherPTy = dyn_cast<PointerType>(Ty))
+  if (auto *PTy = dyn_cast<PointerType>(this)) {
+    if (auto *OtherPTy = dyn_cast<PointerType>(Ty))
       return PTy->getAddressSpace() == OtherPTy->getAddressSpace();
     return false;
   }
@@ -100,14 +95,12 @@ bool Type::canLosslesslyBitCastTo(Type *Ty) const {
 }
 
 bool Type::isEmptyTy() const {
-  const ArrayType *ATy = dyn_cast<ArrayType>(this);
-  if (ATy) {
+  if (auto *ATy = dyn_cast<ArrayType>(this)) {
     unsigned NumElements = ATy->getNumElements();
     return NumElements == 0 || ATy->getElementType()->isEmptyTy();
   }
 
-  const StructType *STy = dyn_cast<StructType>(this);
-  if (STy) {
+  if (auto *STy = dyn_cast<StructType>(this)) {
     unsigned NumElements = STy->getNumElements();
     for (unsigned i = 0; i < NumElements; ++i)
       if (!STy->getElementType(i)->isEmptyTy())
@@ -144,7 +137,7 @@ unsigned Type::getScalarSizeInBits() const {
 /// is only valid on floating point types.  If the FP type does not
 /// have a stable mantissa (e.g. ppc long double), this method returns -1.
 int Type::getFPMantissaWidth() const {
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+  if (auto *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType()->getFPMantissaWidth();
   assert(isFloatingPointTy() && "Not a floating point type!");
   if (getTypeID() == HalfTyID) return 11;
@@ -159,65 +152,16 @@ int Type::getFPMantissaWidth() const {
 /// isSizedDerivedType - Derived types like structures and arrays are sized
 /// iff all of the members of the type are sized as well.  Since asking for
 /// their size is relatively uncommon, move this operation out of line.
-bool Type::isSizedDerivedType(SmallPtrSetImpl<const Type*> *Visited) const {
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+bool Type::isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited) const {
+  if (auto *ATy = dyn_cast<ArrayType>(this))
     return ATy->getElementType()->isSized(Visited);
 
-  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+  if (auto *VTy = dyn_cast<VectorType>(this))
     return VTy->getElementType()->isSized(Visited);
 
   return cast<StructType>(this)->isSized(Visited);
 }
 
-//===----------------------------------------------------------------------===//
-//                         Subclass Helper Methods
-//===----------------------------------------------------------------------===//
-
-unsigned Type::getIntegerBitWidth() const {
-  return cast<IntegerType>(this)->getBitWidth();
-}
-
-bool Type::isFunctionVarArg() const {
-  return cast<FunctionType>(this)->isVarArg();
-}
-
-Type *Type::getFunctionParamType(unsigned i) const {
-  return cast<FunctionType>(this)->getParamType(i);
-}
-
-unsigned Type::getFunctionNumParams() const {
-  return cast<FunctionType>(this)->getNumParams();
-}
-
-StringRef Type::getStructName() const {
-  return cast<StructType>(this)->getName();
-}
-
-unsigned Type::getStructNumElements() const {
-  return cast<StructType>(this)->getNumElements();
-}
-
-Type *Type::getStructElementType(unsigned N) const {
-  return cast<StructType>(this)->getElementType(N);
-}
-
-Type *Type::getSequentialElementType() const {
-  return cast<SequentialType>(this)->getElementType();
-}
-
-uint64_t Type::getArrayNumElements() const {
-  return cast<ArrayType>(this)->getNumElements();
-}
-
-unsigned Type::getVectorNumElements() const {
-  return cast<VectorType>(this)->getNumElements();
-}
-
-unsigned Type::getPointerAddressSpace() const {
-  return cast<PointerType>(getScalarType())->getAddressSpace();
-}
-
-
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
@@ -228,6 +172,7 @@ Type *Type::getHalfTy(LLVMContext &C) { return &C.pImpl->HalfTy; }
 Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
 Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
 Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getTokenTy(LLVMContext &C) { return &C.pImpl->TokenTy; }
 Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
 Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
 Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
@@ -345,7 +290,7 @@ FunctionType::FunctionType(Type *Result, ArrayRef<Type*> Params,
   assert(isValidReturnType(Result) && "invalid return type for function");
   setSubclassData(IsVarArgs);
 
-  SubTys[0] = const_cast<Type*>(Result);
+  SubTys[0] = Result;
 
   for (unsigned i = 0, e = Params.size(); i != e; ++i) {
     assert(isValidArgumentType(Params[i]) &&
@@ -428,12 +373,14 @@ void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
   if (isPacked)
     setSubclassData(getSubclassData() | SCDB_Packed);
 
-  unsigned NumElements = Elements.size();
-  Type **Elts = getContext().pImpl->TypeAllocator.Allocate<Type*>(NumElements);
-  memcpy(Elts, Elements.data(), sizeof(Elements[0]) * NumElements);
-  
-  ContainedTys = Elts;
-  NumContainedTys = NumElements;
+  NumContainedTys = Elements.size();
+
+  if (Elements.empty()) {
+    ContainedTys = nullptr;
+    return;
+  }
+
+  ContainedTys = Elements.copy(getContext().pImpl->TypeAllocator).data();
 }
 
 void StructType::setName(StringRef Name) {
@@ -470,7 +417,6 @@ void StructType::setName(StringRef Name) {
    
     do {
       TempStr.resize(NameSize + 1);
-      TmpStream.resync();
       TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
 
       IterBool = getContext().pImpl->NamedStructTypes.insert(
@@ -556,13 +502,13 @@ StructType *StructType::create(StringRef Name, Type *type, ...) {
   return Ret;
 }
 
-bool StructType::isSized(SmallPtrSetImpl<const Type*> *Visited) const {
+bool StructType::isSized(SmallPtrSetImpl<Type*> *Visited) const {
   if ((getSubclassData() & SCDB_IsSized) != 0)
     return true;
   if (isOpaque())
     return false;
 
-  if (Visited && !Visited->insert(this).second)
+  if (Visited && !Visited->insert(const_cast<StructType*>(this)).second)
     return false;
 
   // Okay, our struct is sized if all of the elements are, but if one of the
@@ -602,22 +548,19 @@ void StructType::setBody(Type *type, ...) {
 
 bool StructType::isValidElementType(Type *ElemTy) {
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
+         !ElemTy->isTokenTy();
 }
 
 /// isLayoutIdentical - Return true if this is layout identical to the
 /// specified struct.
 bool StructType::isLayoutIdentical(StructType *Other) const {
   if (this == Other) return true;
-  
-  if (isPacked() != Other->isPacked() ||
-      getNumElements() != Other->getNumElements())
+
+  if (isPacked() != Other->isPacked())
     return false;
 
-  if (!getNumElements())
-    return true;
-  
-  return std::equal(element_begin(), element_end(), Other->element_begin());
+  return elements() == Other->elements();
 }
 
 /// getTypeByName - Return the type with the specified name, or null if there
@@ -631,8 +574,8 @@ StructType *Module::getTypeByName(StringRef Name) const {
 //                       CompositeType Implementation
 //===----------------------------------------------------------------------===//
 
-Type *CompositeType::getTypeAtIndex(const Value *V) {
-  if (StructType *STy = dyn_cast<StructType>(this)) {
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+  if (auto *STy = dyn_cast<StructType>(this)) {
     unsigned Idx =
       (unsigned)cast<Constant>(V)->getUniqueInteger().getZExtValue();
     assert(indexValid(Idx) && "Invalid structure index!");
@@ -641,16 +584,18 @@ Type *CompositeType::getTypeAtIndex(const Value *V) {
 
   return cast<SequentialType>(this)->getElementType();
 }
-Type *CompositeType::getTypeAtIndex(unsigned Idx) {
-  if (StructType *STy = dyn_cast<StructType>(this)) {
+
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const{
+  if (auto *STy = dyn_cast<StructType>(this)) {
     assert(indexValid(Idx) && "Invalid structure index!");
     return STy->getElementType(Idx);
   }
-  
+
   return cast<SequentialType>(this)->getElementType();
 }
+
 bool CompositeType::indexValid(const Value *V) const {
-  if (const StructType *STy = dyn_cast<StructType>(this)) {
+  if (auto *STy = dyn_cast<StructType>(this)) {
     // Structure indexes require (vectors of) 32-bit integer constants.  In the
     // vector case all of the indices must be equal.
     if (!V->getType()->getScalarType()->isIntegerTy(32))
@@ -667,7 +612,7 @@ bool CompositeType::indexValid(const Value *V) const {
 }
 
 bool CompositeType::indexValid(unsigned Idx) const {
-  if (const StructType *STy = dyn_cast<StructType>(this))
+  if (auto *STy = dyn_cast<StructType>(this))
     return Idx < STy->getNumElements();
   // Sequential types can be indexed by any integer.
   return true;
@@ -683,10 +628,9 @@ ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
   NumElements = NumEl;
 }
 
-ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
-  Type *ElementType = const_cast<Type*>(elementType);
+ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
   assert(isValidElementType(ElementType) && "Invalid type for array element!");
-    
+
   LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
   ArrayType *&Entry = 
     pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
@@ -698,7 +642,8 @@ ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
 
 bool ArrayType::isValidElementType(Type *ElemTy) {
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
+         !ElemTy->isTokenTy();
 }
 
 //===----------------------------------------------------------------------===//
@@ -710,8 +655,7 @@ VectorType::VectorType(Type *ElType, unsigned NumEl)
   NumElements = NumEl;
 }
 
-VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
-  Type *ElementType = const_cast<Type*>(elementType);
+VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
   assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
   assert(isValidElementType(ElementType) && "Element type of a VectorType must "
                                             "be an integer, floating point, or "
@@ -761,13 +705,13 @@ PointerType::PointerType(Type *E, unsigned AddrSpace)
   assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
 }
 
-PointerType *Type::getPointerTo(unsigned addrs) {
-  return PointerType::get(this, addrs);
+PointerType *Type::getPointerTo(unsigned addrs) const {
+  return PointerType::get(const_cast<Type*>(this), addrs);
 }
 
 bool PointerType::isValidElementType(Type *ElemTy) {
   return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy();
+         !ElemTy->isMetadataTy() && !ElemTy->isTokenTy();
 }
 
 bool PointerType::isLoadableOrStorableType(Type *ElemTy) {
diff --git a/lib/IR/TypeFinder.cpp b/lib/IR/TypeFinder.cpp
index 7accc5bef535..b5bdab0865b6 100644
--- a/lib/IR/TypeFinder.cpp
+++ b/lib/IR/TypeFinder.cpp
@@ -44,19 +44,13 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
   for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
     incorporateType(FI->getType());
 
-    if (FI->hasPrefixData())
-      incorporateValue(FI->getPrefixData());
-
-    if (FI->hasPrologueData())
-      incorporateValue(FI->getPrologueData());
-
-    if (FI->hasPersonalityFn())
-      incorporateValue(FI->getPersonalityFn());
+    for (const Use &U : FI->operands())
+      incorporateValue(U.get());
 
     // First incorporate the arguments.
     for (Function::const_arg_iterator AI = FI->arg_begin(),
            AE = FI->arg_end(); AI != AE; ++AI)
-      incorporateValue(AI);
+      incorporateValue(&*AI);
 
     for (Function::const_iterator BB = FI->begin(), E = FI->end();
          BB != E;++BB)
@@ -85,7 +79,7 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
 
   for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
          E = M.named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *NMD = I;
+    const NamedMDNode *NMD = &*I;
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       incorporateMDNode(NMD->getOperand(i));
   }
diff --git a/lib/IR/User.cpp b/lib/IR/User.cpp
index 522722d701ba..a75abe6938c9 100644
--- a/lib/IR/User.cpp
+++ b/lib/IR/User.cpp
@@ -87,22 +87,70 @@ void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
   Use::zap(OldOps, OldOps + OldNumUses, true);
 }
 
+
+// This is a private struct used by `User` to track the co-allocated descriptor
+// section.
+struct DescriptorInfo {
+  intptr_t SizeInBytes;
+};
+
+ArrayRef<const uint8_t> User::getDescriptor() const {
+  auto MutableARef = const_cast<User *>(this)->getDescriptor();
+  return {MutableARef.begin(), MutableARef.end()};
+}
+
+MutableArrayRef<uint8_t> User::getDescriptor() {
+  assert(HasDescriptor && "Don't call otherwise!");
+  assert(!HasHungOffUses && "Invariant!");
+
+  auto *DI = reinterpret_cast<DescriptorInfo *>(getIntrusiveOperands()) - 1;
+  assert(DI->SizeInBytes != 0 && "Should not have had a descriptor otherwise!");
+
+  return MutableArrayRef<uint8_t>(
+      reinterpret_cast<uint8_t *>(DI) - DI->SizeInBytes, DI->SizeInBytes);
+}
+
 //===----------------------------------------------------------------------===//
 //                         User operator new Implementations
 //===----------------------------------------------------------------------===//
 
-void *User::operator new(size_t Size, unsigned Us) {
+void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
+                                     unsigned DescBytes) {
   assert(Us < (1u << NumUserOperandsBits) && "Too many operands");
-  void *Storage = ::operator new(Size + sizeof(Use) * Us);
-  Use *Start = static_cast<Use*>(Storage);
+
+  static_assert(sizeof(DescriptorInfo) % sizeof(void *) == 0, "Required below");
+
+  unsigned DescBytesToAllocate =
+      DescBytes == 0 ? 0 : (DescBytes + sizeof(DescriptorInfo));
+  assert(DescBytesToAllocate % sizeof(void *) == 0 &&
+         "We need this to satisfy alignment constraints for Uses");
+
+  uint8_t *Storage = static_cast<uint8_t *>(
+      ::operator new(Size + sizeof(Use) * Us + DescBytesToAllocate));
+  Use *Start = reinterpret_cast<Use *>(Storage + DescBytesToAllocate);
   Use *End = Start + Us;
   User *Obj = reinterpret_cast<User*>(End);
   Obj->NumUserOperands = Us;
   Obj->HasHungOffUses = false;
+  Obj->HasDescriptor = DescBytes != 0;
   Use::initTags(Start, End);
+
+  if (DescBytes != 0) {
+    auto *DescInfo = reinterpret_cast<DescriptorInfo *>(Storage + DescBytes);
+    DescInfo->SizeInBytes = DescBytes;
+  }
+
   return Obj;
 }
 
+void *User::operator new(size_t Size, unsigned Us) {
+  return allocateFixedOperandUser(Size, Us, 0);
+}
+
+void *User::operator new(size_t Size, unsigned Us, unsigned DescBytes) {
+  return allocateFixedOperandUser(Size, Us, DescBytes);
+}
+
 void *User::operator new(size_t Size) {
   // Allocate space for a single Use*
   void *Storage = ::operator new(Size + sizeof(Use *));
@@ -110,6 +158,7 @@ void *User::operator new(size_t Size) {
   User *Obj = reinterpret_cast<User *>(HungOffOperandList + 1);
   Obj->NumUserOperands = 0;
   Obj->HasHungOffUses = true;
+  Obj->HasDescriptor = false;
   *HungOffOperandList = nullptr;
   return Obj;
 }
@@ -123,11 +172,20 @@ void User::operator delete(void *Usr) {
   // use a Use[] allocated prior to the user.
   User *Obj = static_cast<User *>(Usr);
   if (Obj->HasHungOffUses) {
+    assert(!Obj->HasDescriptor && "not supported!");
+
     Use **HungOffOperandList = static_cast<Use **>(Usr) - 1;
     // drop the hung off uses.
     Use::zap(*HungOffOperandList, *HungOffOperandList + Obj->NumUserOperands,
              /* Delete */ true);
     ::operator delete(HungOffOperandList);
+  } else if (Obj->HasDescriptor) {
+    Use *UseBegin = static_cast<Use *>(Usr) - Obj->NumUserOperands;
+    Use::zap(UseBegin, UseBegin + Obj->NumUserOperands, /* Delete */ false);
+
+    auto *DI = reinterpret_cast<DescriptorInfo *>(UseBegin) - 1;
+    uint8_t *Storage = reinterpret_cast<uint8_t *>(DI) - DI->SizeInBytes;
+    ::operator delete(Storage);
   } else {
     Use *Storage = static_cast<Use *>(Usr) - Obj->NumUserOperands;
     Use::zap(Storage, Storage + Obj->NumUserOperands,
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index f554d590284f..eb9deb6a07e1 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -314,6 +314,16 @@ void Value::takeName(Value *V) {
 }
 
 #ifndef NDEBUG
+void Value::assertModuleIsMaterialized() const {
+  const GlobalValue *GV = dyn_cast<GlobalValue>(this);
+  if (!GV)
+    return;
+  const Module *M = GV->getParent();
+  if (!M)
+    return;
+  assert(M->isMaterialized());
+}
+
 static bool contains(SmallPtrSetImpl<ConstantExpr *> &Cache, ConstantExpr *Expr,
                      Constant *C) {
   if (!Cache.insert(Expr).second)
@@ -490,8 +500,7 @@ Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL,
         return V;
       Offset = GEPOffset;
       V = GEP->getPointerOperand();
-    } else if (Operator::getOpcode(V) == Instruction::BitCast ||
-               Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
       V = cast<Operator>(V)->getOperand(0);
     } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
       V = GA->getAliasee();
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index e10142de8232..deb6e7573e72 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -32,6 +32,24 @@ ValueSymbolTable::~ValueSymbolTable() {
 #endif
 }
 
+ValueName *ValueSymbolTable::makeUniqueName(Value *V,
+                                            SmallString<256> &UniqueName) {
+  unsigned BaseSize = UniqueName.size();
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(BaseSize);
+    raw_svector_ostream S(UniqueName);
+    if (isa<GlobalValue>(V))
+      S << ".";
+    S << ++LastUnique;
+
+    // Try insert the vmap entry with this suffix.
+    auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
+    if (IterBool.second)
+      return &*IterBool.first;
+  }
+}
+
 // Insert a value into the symbol table with the specified name...
 //
 void ValueSymbolTable::reinsertValue(Value* V) {
@@ -49,21 +67,8 @@ void ValueSymbolTable::reinsertValue(Value* V) {
   // The name is too already used, just free it so we can allocate a new name.
   V->getValueName()->Destroy();
 
-  unsigned BaseSize = UniqueName.size();
-  while (1) {
-    // Trim any suffix off and append the next number.
-    UniqueName.resize(BaseSize);
-    raw_svector_ostream(UniqueName) << "." << ++LastUnique;
-
-    // Try insert the vmap entry with this suffix.
-    auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
-    if (IterBool.second) {
-      // Newly inserted name.  Success!
-      V->setValueName(&*IterBool.first);
-     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
-      return;
-    }
-  }
+  ValueName *VN = makeUniqueName(V, UniqueName);
+  V->setValueName(VN);
 }
 
 void ValueSymbolTable::removeValueName(ValueName *V) {
@@ -86,20 +91,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   
   // Otherwise, there is a naming conflict.  Rename this value.
   SmallString<256> UniqueName(Name.begin(), Name.end());
-  
-  while (1) {
-    // Trim any suffix off and append the next number.
-    UniqueName.resize(Name.size());
-    raw_svector_ostream(UniqueName) << ++LastUnique;
-    
-    // Try insert the vmap entry with this suffix.
-    auto IterBool = vmap.insert(std::make_pair(UniqueName, V));
-    if (IterBool.second) {
-      // DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V <<
-      //       "\n");
-      return &*IterBool.first;
-    }
-  }
+  return makeUniqueName(V, UniqueName);
 }
 
 
diff --git a/lib/IR/ValueTypes.cpp b/lib/IR/ValueTypes.cpp
index d95de3989df0..f2932302af2a 100644
--- a/lib/IR/ValueTypes.cpp
+++ b/lib/IR/ValueTypes.cpp
@@ -19,6 +19,11 @@
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
+EVT EVT::changeExtendedTypeToInteger() const {
+  LLVMContext &Context = LLVMTy->getContext();
+  return getIntegerVT(Context, getSizeInBits());
+}
+
 EVT EVT::changeExtendedVectorElementTypeToInteger() const {
   LLVMContext &Context = LLVMTy->getContext();
   EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
@@ -83,6 +88,10 @@ bool EVT::isExtended1024BitVector() const {
   return isExtendedVector() && getExtendedSizeInBits() == 1024;
 }
 
+bool EVT::isExtended2048BitVector() const {
+  return isExtendedVector() && getExtendedSizeInBits() == 2048;
+}
+
 EVT EVT::getExtendedVectorElementType() const {
   assert(isExtended() && "Type is not extended!");
   return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -134,6 +143,8 @@ std::string EVT::getEVTString() const {
   case MVT::v16i1:   return "v16i1";
   case MVT::v32i1:   return "v32i1";
   case MVT::v64i1:   return "v64i1";
+  case MVT::v512i1:  return "v512i1";
+  case MVT::v1024i1: return "v1024i1";
   case MVT::v1i8:    return "v1i8";
   case MVT::v2i8:    return "v2i8";
   case MVT::v4i8:    return "v4i8";
@@ -141,22 +152,29 @@ std::string EVT::getEVTString() const {
   case MVT::v16i8:   return "v16i8";
   case MVT::v32i8:   return "v32i8";
   case MVT::v64i8:   return "v64i8";
+  case MVT::v128i8:  return "v128i8";
+  case MVT::v256i8:  return "v256i8";
   case MVT::v1i16:   return "v1i16";
   case MVT::v2i16:   return "v2i16";
   case MVT::v4i16:   return "v4i16";
   case MVT::v8i16:   return "v8i16";
   case MVT::v16i16:  return "v16i16";
   case MVT::v32i16:  return "v32i16";
+  case MVT::v64i16:  return "v64i16";
+  case MVT::v128i16: return "v128i16";
   case MVT::v1i32:   return "v1i32";
   case MVT::v2i32:   return "v2i32";
   case MVT::v4i32:   return "v4i32";
   case MVT::v8i32:   return "v8i32";
   case MVT::v16i32:  return "v16i32";
+  case MVT::v32i32:  return "v32i32";
+  case MVT::v64i32:  return "v64i32";
   case MVT::v1i64:   return "v1i64";
   case MVT::v2i64:   return "v2i64";
   case MVT::v4i64:   return "v4i64";
   case MVT::v8i64:   return "v8i64";
   case MVT::v16i64:  return "v16i64";
+  case MVT::v32i64:  return "v32i64";
   case MVT::v1i128:  return "v1i128";
   case MVT::v1f32:   return "v1f32";
   case MVT::v2f32:   return "v2f32";
@@ -203,6 +221,8 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v16i1:   return VectorType::get(Type::getInt1Ty(Context), 16);
   case MVT::v32i1:   return VectorType::get(Type::getInt1Ty(Context), 32);
   case MVT::v64i1:   return VectorType::get(Type::getInt1Ty(Context), 64);
+  case MVT::v512i1:  return VectorType::get(Type::getInt1Ty(Context), 512);
+  case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
   case MVT::v1i8:    return VectorType::get(Type::getInt8Ty(Context), 1);
   case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
   case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
@@ -210,22 +230,29 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
   case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
   case MVT::v64i8:   return VectorType::get(Type::getInt8Ty(Context), 64);
+  case MVT::v128i8:  return VectorType::get(Type::getInt8Ty(Context), 128);
+  case MVT::v256i8:  return VectorType::get(Type::getInt8Ty(Context), 256);
   case MVT::v1i16:   return VectorType::get(Type::getInt16Ty(Context), 1);
   case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
   case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
   case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
   case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
   case MVT::v32i16:  return VectorType::get(Type::getInt16Ty(Context), 32);
+  case MVT::v64i16:  return VectorType::get(Type::getInt16Ty(Context), 64);
+  case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
   case MVT::v1i32:   return VectorType::get(Type::getInt32Ty(Context), 1);
   case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
   case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
   case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
   case MVT::v16i32:  return VectorType::get(Type::getInt32Ty(Context), 16);
+  case MVT::v32i32:  return VectorType::get(Type::getInt32Ty(Context), 32);
+  case MVT::v64i32:  return VectorType::get(Type::getInt32Ty(Context), 64);
   case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
   case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
   case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
   case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
   case MVT::v16i64:  return VectorType::get(Type::getInt64Ty(Context), 16);
+  case MVT::v32i64:  return VectorType::get(Type::getInt64Ty(Context), 32);
   case MVT::v1i128:  return VectorType::get(Type::getInt128Ty(Context), 1);
   case MVT::v2f16:   return VectorType::get(Type::getHalfTy(Context), 2);
   case MVT::v4f16:   return VectorType::get(Type::getHalfTy(Context), 4);
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 2a0a4ff393ed..81c87e4759b7 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -39,8 +39,7 @@
 //    only by the unwind edge of an invoke instruction.
 //  * A landingpad instruction must be the first non-PHI instruction in the
 //    block.
-//  * All landingpad instructions must use the same personality function with
-//    the same function.
+//  * Landingpad instructions must be in a function with a personality function.
 //  * All other things that are tested by asserts spread about the code...
 //
 //===----------------------------------------------------------------------===//
@@ -92,6 +91,16 @@ struct VerifierSupport {
       : OS(OS), M(nullptr), Broken(false) {}
 
 private:
+  template <class NodeTy> void Write(const ilist_iterator<NodeTy> &I) {
+    Write(&*I);
+  }
+
+  void Write(const Module *M) {
+    if (!M)
+      return;
+    OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+  }
+
   void Write(const Value *V) {
     if (!V)
       return;
@@ -184,6 +193,9 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// \brief Track unresolved string-based type references.
   SmallDenseMap<const MDString *, const MDNode *, 32> UnresolvedTypeRefs;
 
+  /// \brief The result type for a landingpad.
+  Type *LandingPadResultTy;
+
   /// \brief Whether we've seen a call to @llvm.localescape in this function
   /// already.
   bool SawFrameEscape;
@@ -192,9 +204,15 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   /// given function and the largest index passed to llvm.localrecover.
   DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
 
+  /// Cache of constants visited in search of ConstantExprs.
+  SmallPtrSet<const Constant *, 32> ConstantExprVisited;
+
+  void checkAtomicMemAccessSize(const Module *M, Type *Ty,
+                                const Instruction *I);
 public:
   explicit Verifier(raw_ostream &OS)
-      : VerifierSupport(OS), Context(nullptr), SawFrameEscape(false) {}
+      : VerifierSupport(OS), Context(nullptr), LandingPadResultTy(nullptr),
+        SawFrameEscape(false) {}
 
   bool verify(const Function &F) {
     M = F.getParent();
@@ -228,6 +246,7 @@ public:
     // FIXME: We strip const here because the inst visitor strips const.
     visit(const_cast<Function &>(F));
     InstsInThisBlock.clear();
+    LandingPadResultTy = nullptr;
     SawFrameEscape = false;
 
     return !Broken;
@@ -297,12 +316,12 @@ private:
   void visitFunction(const Function &F);
   void visitBasicBlock(BasicBlock &BB);
   void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
+  void visitDereferenceableMetadata(Instruction& I, MDNode* MD);
 
   template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
 #include "llvm/IR/Metadata.def"
   void visitDIScope(const DIScope &N);
-  void visitDIDerivedTypeBase(const DIDerivedTypeBase &N);
   void visitDIVariable(const DIVariable &N);
   void visitDILexicalBlockBase(const DILexicalBlockBase &N);
   void visitDITemplateParameter(const DITemplateParameter &N);
@@ -379,7 +398,13 @@ private:
   void visitAllocaInst(AllocaInst &AI);
   void visitExtractValueInst(ExtractValueInst &EVI);
   void visitInsertValueInst(InsertValueInst &IVI);
+  void visitEHPadPredecessors(Instruction &I);
   void visitLandingPadInst(LandingPadInst &LPI);
+  void visitCatchPadInst(CatchPadInst &CPI);
+  void visitCatchReturnInst(CatchReturnInst &CatchReturn);
+  void visitCleanupPadInst(CleanupPadInst &CPI);
+  void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
+  void visitCleanupReturnInst(CleanupReturnInst &CRI);
 
   void VerifyCallSite(CallSite CS);
   void verifyMustTailCall(CallInst &CI);
@@ -399,7 +424,8 @@ private:
   void VerifyFunctionMetadata(
       const SmallVector<std::pair<unsigned, MDNode *>, 4> MDs);
 
-  void VerifyConstantExprBitcastType(const ConstantExpr *CE);
+  void visitConstantExprsRecursively(const Constant *EntryC);
+  void visitConstantExpr(const ConstantExpr *CE);
   void VerifyStatepoint(ImmutableCallSite CS);
   void verifyFrameRecoverIndices();
 
@@ -524,25 +550,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
   }
 
   // Walk any aggregate initializers looking for bitcasts between address spaces
-  SmallPtrSet<const Value *, 4> Visited;
-  SmallVector<const Value *, 4> WorkStack;
-  WorkStack.push_back(cast<Value>(GV.getInitializer()));
-
-  while (!WorkStack.empty()) {
-    const Value *V = WorkStack.pop_back_val();
-    if (!Visited.insert(V).second)
-      continue;
-
-    if (const User *U = dyn_cast<User>(V)) {
-      WorkStack.append(U->op_begin(), U->op_end());
-    }
-
-    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-      VerifyConstantExprBitcastType(CE);
-      if (Broken)
-        return;
-    }
-  }
+  visitConstantExprsRecursively(GV.getInitializer());
 
   visitGlobalValue(GV);
 }
@@ -556,7 +564,8 @@ void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) {
 void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
                                    const GlobalAlias &GA, const Constant &C) {
   if (const auto *GV = dyn_cast<GlobalValue>(&C)) {
-    Assert(!GV->isDeclaration(), "Alias must point to a definition", &GA);
+    Assert(!GV->isDeclarationForLinker(), "Alias must point to a definition",
+           &GA);
 
     if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) {
       Assert(Visited.insert(GA2).second, "Aliases cannot form a cycle", &GA);
@@ -571,7 +580,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl<const GlobalAlias*> &Visited,
   }
 
   if (const auto *CE = dyn_cast<ConstantExpr>(&C))
-    VerifyConstantExprBitcastType(CE);
+    visitConstantExprsRecursively(CE);
 
   for (const Use &U : C.operands()) {
     Value *V = &*U;
@@ -779,39 +788,10 @@ void Verifier::visitDIBasicType(const DIBasicType &N) {
          "invalid tag", &N);
 }
 
-void Verifier::visitDIDerivedTypeBase(const DIDerivedTypeBase &N) {
+void Verifier::visitDIDerivedType(const DIDerivedType &N) {
   // Common scope checks.
   visitDIScope(N);
 
-  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
-  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
-         N.getBaseType());
-
-  // FIXME: Sink this into the subclass verifies.
-  if (!N.getFile() || N.getFile()->getFilename().empty()) {
-    // Check whether the filename is allowed to be empty.
-    uint16_t Tag = N.getTag();
-    Assert(
-        Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
-            Tag == dwarf::DW_TAG_pointer_type ||
-            Tag == dwarf::DW_TAG_ptr_to_member_type ||
-            Tag == dwarf::DW_TAG_reference_type ||
-            Tag == dwarf::DW_TAG_rvalue_reference_type ||
-            Tag == dwarf::DW_TAG_restrict_type ||
-            Tag == dwarf::DW_TAG_array_type ||
-            Tag == dwarf::DW_TAG_enumeration_type ||
-            Tag == dwarf::DW_TAG_subroutine_type ||
-            Tag == dwarf::DW_TAG_inheritance || Tag == dwarf::DW_TAG_friend ||
-            Tag == dwarf::DW_TAG_structure_type ||
-            Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef,
-        "derived/composite type requires a filename", &N, N.getFile());
-  }
-}
-
-void Verifier::visitDIDerivedType(const DIDerivedType &N) {
-  // Common derived type checks.
-  visitDIDerivedTypeBase(N);
-
   Assert(N.getTag() == dwarf::DW_TAG_typedef ||
              N.getTag() == dwarf::DW_TAG_pointer_type ||
              N.getTag() == dwarf::DW_TAG_ptr_to_member_type ||
@@ -828,6 +808,10 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
     Assert(isTypeRef(N, N.getExtraData()), "invalid pointer to member type", &N,
            N.getExtraData());
   }
+
+  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+         N.getBaseType());
 }
 
 static bool hasConflictingReferenceFlags(unsigned Flags) {
@@ -845,27 +829,34 @@ void Verifier::visitTemplateParams(const MDNode &N, const Metadata &RawParams) {
 }
 
 void Verifier::visitDICompositeType(const DICompositeType &N) {
-  // Common derived type checks.
-  visitDIDerivedTypeBase(N);
+  // Common scope checks.
+  visitDIScope(N);
 
   Assert(N.getTag() == dwarf::DW_TAG_array_type ||
              N.getTag() == dwarf::DW_TAG_structure_type ||
              N.getTag() == dwarf::DW_TAG_union_type ||
              N.getTag() == dwarf::DW_TAG_enumeration_type ||
-             N.getTag() == dwarf::DW_TAG_subroutine_type ||
              N.getTag() == dwarf::DW_TAG_class_type,
          "invalid tag", &N);
 
+  Assert(isScopeRef(N, N.getScope()), "invalid scope", &N, N.getScope());
+  Assert(isTypeRef(N, N.getBaseType()), "invalid base type", &N,
+         N.getBaseType());
+
   Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
          "invalid composite elements", &N, N.getRawElements());
   Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N,
          N.getRawVTableHolder());
-  Assert(!N.getRawElements() || isa<MDTuple>(N.getRawElements()),
-         "invalid composite elements", &N, N.getRawElements());
   Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
          &N);
   if (auto *Params = N.getRawTemplateParams())
     visitTemplateParams(N, *Params);
+
+  if (N.getTag() == dwarf::DW_TAG_class_type ||
+      N.getTag() == dwarf::DW_TAG_union_type) {
+    Assert(N.getFile() && !N.getFile()->getFilename().empty(),
+           "class/union requires a filename", &N, N.getFile());
+  }
 }
 
 void Verifier::visitDISubroutineType(const DISubroutineType &N) {
@@ -885,6 +876,7 @@ void Verifier::visitDIFile(const DIFile &N) {
 }
 
 void Verifier::visitDICompileUnit(const DICompileUnit &N) {
+  Assert(N.isDistinct(), "compile units must be distinct", &N);
   Assert(N.getTag() == dwarf::DW_TAG_compile_unit, "invalid tag", &N);
 
   // Don't bother verifying the compilation directory or producer string
@@ -928,6 +920,12 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
              Op);
     }
   }
+  if (auto *Array = N.getRawMacros()) {
+    Assert(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+    for (Metadata *Op : N.getMacros()->operands()) {
+      Assert(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+    }
+  }
 }
 
 void Verifier::visitDISubprogram(const DISubprogram &N) {
@@ -937,13 +935,6 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
     Assert(isa<DISubroutineType>(T), "invalid subroutine type", &N, T);
   Assert(isTypeRef(N, N.getRawContainingType()), "invalid containing type", &N,
          N.getRawContainingType());
-  if (auto *RawF = N.getRawFunction()) {
-    auto *FMD = dyn_cast<ConstantAsMetadata>(RawF);
-    auto *F = FMD ? FMD->getValue() : nullptr;
-    auto *FT = F ? dyn_cast<PointerType>(F->getType()) : nullptr;
-    Assert(F && FT && isa<FunctionType>(FT->getElementType()),
-           "invalid function", &N, F, FT);
-  }
   if (auto *Params = N.getRawTemplateParams())
     visitTemplateParams(N, *Params);
   if (auto *S = N.getRawDeclaration()) {
@@ -961,40 +952,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
   Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags",
          &N);
 
-  auto *F = N.getFunction();
-  if (!F)
-    return;
-
-  // Check that all !dbg attachments lead to back to N (or, at least, another
-  // subprogram that describes the same function).
-  //
-  // FIXME: Check this incrementally while visiting !dbg attachments.
-  // FIXME: Only check when N is the canonical subprogram for F.
-  SmallPtrSet<const MDNode *, 32> Seen;
-  for (auto &BB : *F)
-    for (auto &I : BB) {
-      // Be careful about using DILocation here since we might be dealing with
-      // broken code (this is the Verifier after all).
-      DILocation *DL =
-          dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
-      if (!DL)
-        continue;
-      if (!Seen.insert(DL).second)
-        continue;
-
-      DILocalScope *Scope = DL->getInlinedAtScope();
-      if (Scope && !Seen.insert(Scope).second)
-        continue;
-
-      DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
-      if (SP && !Seen.insert(SP).second)
-        continue;
-
-      // FIXME: Once N is canonical, check "SP == &N".
-      Assert(SP->describes(F),
-             "!dbg attachment points at wrong subprogram for function", &N, F,
-             &I, DL, Scope, SP);
-    }
+  if (N.isDefinition())
+    Assert(N.isDistinct(), "subprogram definitions must be distinct", &N);
 }
 
 void Verifier::visitDILexicalBlockBase(const DILexicalBlockBase &N) {
@@ -1020,6 +979,27 @@ void Verifier::visitDINamespace(const DINamespace &N) {
     Assert(isa<DIScope>(S), "invalid scope ref", &N, S);
 }
 
+void Verifier::visitDIMacro(const DIMacro &N) {
+  Assert(N.getMacinfoType() == dwarf::DW_MACINFO_define ||
+         N.getMacinfoType() == dwarf::DW_MACINFO_undef,
+         "invalid macinfo type", &N);
+  Assert(!N.getName().empty(), "anonymous macro", &N);
+}
+
+void Verifier::visitDIMacroFile(const DIMacroFile &N) {
+  Assert(N.getMacinfoType() == dwarf::DW_MACINFO_start_file,
+         "invalid macinfo type", &N);
+  if (auto *F = N.getRawFile())
+    Assert(isa<DIFile>(F), "invalid file", &N, F);
+
+  if (auto *Array = N.getRawElements()) {
+    Assert(isa<MDTuple>(Array), "invalid macro list", &N, Array);
+    for (Metadata *Op : N.getElements()->operands()) {
+      Assert(Op && isa<DIMacroNode>(Op), "invalid macro ref", &N, Op);
+    }
+  }
+}
+
 void Verifier::visitDIModule(const DIModule &N) {
   Assert(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N);
   Assert(!N.getName().empty(), "anonymous module", &N);
@@ -1075,9 +1055,7 @@ void Verifier::visitDILocalVariable(const DILocalVariable &N) {
   // Checks common to all variables.
   visitDIVariable(N);
 
-  Assert(N.getTag() == dwarf::DW_TAG_auto_variable ||
-             N.getTag() == dwarf::DW_TAG_arg_variable,
-         "invalid tag", &N);
+  Assert(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
   Assert(N.getRawScope() && isa<DILocalScope>(N.getRawScope()),
          "local variable requires a valid scope", &N, N.getRawScope());
 }
@@ -1274,7 +1252,10 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx,
         I->getKindAsEnum() == Attribute::OptimizeNone ||
         I->getKindAsEnum() == Attribute::JumpTable ||
         I->getKindAsEnum() == Attribute::Convergent ||
-        I->getKindAsEnum() == Attribute::ArgMemOnly) {
+        I->getKindAsEnum() == Attribute::ArgMemOnly ||
+        I->getKindAsEnum() == Attribute::NoRecurse ||
+        I->getKindAsEnum() == Attribute::InaccessibleMemOnly ||
+        I->getKindAsEnum() == Attribute::InaccessibleMemOrArgMemOnly) {
       if (!isFunction) {
         CheckFailed("Attribute '" + I->getAsString() +
                     "' only applies to functions!", V);
@@ -1365,7 +1346,7 @@ void Verifier::VerifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
          V);
 
   if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-    SmallPtrSet<const Type*, 4> Visited;
+    SmallPtrSet<Type*, 4> Visited;
     if (!PTy->getElementType()->isSized(&Visited)) {
       Assert(!Attrs.hasAttribute(Idx, Attribute::ByVal) &&
                  !Attrs.hasAttribute(Idx, Attribute::InAlloca),
@@ -1444,6 +1425,18 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
         Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly)),
       "Attributes 'readnone and readonly' are incompatible!", V);
 
+  Assert(
+      !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+        Attrs.hasAttribute(AttributeSet::FunctionIndex, 
+                           Attribute::InaccessibleMemOrArgMemOnly)),
+      "Attributes 'readnone and inaccessiblemem_or_argmemonly' are incompatible!", V);
+
+  Assert(
+      !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone) &&
+        Attrs.hasAttribute(AttributeSet::FunctionIndex, 
+                           Attribute::InaccessibleMemOnly)),
+      "Attributes 'readnone and inaccessiblememonly' are incompatible!", V);
+
   Assert(
       !(Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::NoInline) &&
         Attrs.hasAttribute(AttributeSet::FunctionIndex,
@@ -1501,7 +1494,35 @@ void Verifier::VerifyFunctionMetadata(
   }
 }
 
-void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) {
+void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
+  if (!ConstantExprVisited.insert(EntryC).second)
+    return;
+
+  SmallVector<const Constant *, 16> Stack;
+  Stack.push_back(EntryC);
+
+  while (!Stack.empty()) {
+    const Constant *C = Stack.pop_back_val();
+
+    // Check this constant expression.
+    if (const auto *CE = dyn_cast<ConstantExpr>(C))
+      visitConstantExpr(CE);
+
+    // Visit all sub-expressions.
+    for (const Use &U : C->operands()) {
+      const auto *OpC = dyn_cast<Constant>(U);
+      if (!OpC)
+        continue;
+      if (isa<GlobalValue>(OpC))
+        continue; // Global values get visited separately.
+      if (!ConstantExprVisited.insert(OpC).second)
+        continue;
+      Stack.push_back(OpC);
+    }
+  }
+}
+
+void Verifier::visitConstantExpr(const ConstantExpr *CE) {
   if (CE->getOpcode() != Instruction::BitCast)
     return;
 
@@ -1554,17 +1575,11 @@ void Verifier::VerifyStatepoint(ImmutableCallSite CS) {
          &CI);
 
   const Value *Target = CS.getArgument(2);
-  const PointerType *PT = dyn_cast<PointerType>(Target->getType());
+  auto *PT = dyn_cast<PointerType>(Target->getType());
   Assert(PT && PT->getElementType()->isFunctionTy(),
          "gc.statepoint callee must be of function pointer type", &CI, Target);
   FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
 
-  if (NumPatchBytes)
-    Assert(isa<ConstantPointerNull>(Target->stripPointerCasts()),
-           "gc.statepoint must have null as call target if number of patchable "
-           "bytes is non zero",
-           &CI);
-
   const Value *NumCallArgsV = CS.getArgument(3);
   Assert(isa<ConstantInt>(NumCallArgsV),
          "gc.statepoint number of arguments to underlying call "
@@ -1743,17 +1758,33 @@ void Verifier::visitFunction(const Function &F) {
            FT->getParamType(i));
     Assert(I->getType()->isFirstClassType(),
            "Function arguments must have first-class types!", I);
-    if (!isLLVMdotName)
+    if (!isLLVMdotName) {
       Assert(!I->getType()->isMetadataTy(),
              "Function takes metadata but isn't an intrinsic", I, &F);
+      Assert(!I->getType()->isTokenTy(),
+             "Function takes token but isn't an intrinsic", I, &F);
+    }
   }
 
+  if (!isLLVMdotName)
+    Assert(!F.getReturnType()->isTokenTy(),
+           "Functions returns a token but isn't an intrinsic", &F);
+
   // Get the function metadata attachments.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   F.getAllMetadata(MDs);
   assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync");
   VerifyFunctionMetadata(MDs);
 
+  // Check validity of the personality function
+  if (F.hasPersonalityFn()) {
+    auto *Per = dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+    if (Per)
+      Assert(Per->getParent() == F.getParent(),
+             "Referencing personality function in another module!",
+             &F, F.getParent(), Per, Per->getParent());
+  }
+
   if (F.isMaterializable()) {
     // Function has a body somewhere we can't see.
     Assert(MDs.empty(), "unmaterialized function cannot have metadata", &F,
@@ -1782,13 +1813,27 @@ void Verifier::visitFunction(const Function &F) {
     }
 
     // Visit metadata attachments.
-    for (const auto &I : MDs)
+    for (const auto &I : MDs) {
+      // Verify that the attachment is legal.
+      switch (I.first) {
+      default:
+        break;
+      case LLVMContext::MD_dbg:
+        Assert(isa<DISubprogram>(I.second),
+               "function !dbg attachment must be a subprogram", &F, I.second);
+        break;
+      }
+
+      // Verify the metadata itself.
       visitMDNode(*I.second);
+    }
   }
 
   // If this function is actually an intrinsic, verify that it is only used in
   // direct call/invokes, never having its "address taken".
-  if (F.getIntrinsicID()) {
+  // Only do this if the module is materialized, otherwise we don't have all the
+  // uses.
+  if (F.getIntrinsicID() && F.getParent()->isMaterialized()) {
     const User *U;
     if (F.hasAddressTaken(&U))
       Assert(0, "Invalid user of intrinsic instruction!", U);
@@ -1798,6 +1843,44 @@ void Verifier::visitFunction(const Function &F) {
              (F.isDeclaration() && F.hasExternalLinkage()) ||
              F.hasAvailableExternallyLinkage(),
          "Function is marked as dllimport, but not external.", &F);
+
+  auto *N = F.getSubprogram();
+  if (!N)
+    return;
+
+  // Check that all !dbg attachments lead to back to N (or, at least, another
+  // subprogram that describes the same function).
+  //
+  // FIXME: Check this incrementally while visiting !dbg attachments.
+  // FIXME: Only check when N is the canonical subprogram for F.
+  SmallPtrSet<const MDNode *, 32> Seen;
+  for (auto &BB : F)
+    for (auto &I : BB) {
+      // Be careful about using DILocation here since we might be dealing with
+      // broken code (this is the Verifier after all).
+      DILocation *DL =
+          dyn_cast_or_null<DILocation>(I.getDebugLoc().getAsMDNode());
+      if (!DL)
+        continue;
+      if (!Seen.insert(DL).second)
+        continue;
+
+      DILocalScope *Scope = DL->getInlinedAtScope();
+      if (Scope && !Seen.insert(Scope).second)
+        continue;
+
+      DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr;
+
+      // Scope and SP could be the same MDNode and we don't want to skip
+      // validation in that case
+      if (SP && ((Scope != SP) && !Seen.insert(SP).second))
+        continue;
+
+      // FIXME: Once N is canonical, check "SP == &N".
+      Assert(SP->describes(&F),
+             "!dbg attachment points at wrong subprogram for function", N, &F,
+             &I, DL, Scope, SP);
+    }
 }
 
 // verifyBasicBlock - Verify that a basic block is well formed...
@@ -2194,6 +2277,9 @@ void Verifier::visitPHINode(PHINode &PN) {
              isa<PHINode>(--BasicBlock::iterator(&PN)),
          "PHI nodes not grouped at top of basic block!", &PN, PN.getParent());
 
+  // Check that a PHI doesn't yield a Token.
+  Assert(!PN.getType()->isTokenTy(), "PHI nodes cannot have token type!");
+
   // Check that all of the values of the PHI node have the same type as the
   // result, and that the incoming blocks are really basic blocks.
   for (Value *IncValue : PN.incoming_values()) {
@@ -2296,16 +2382,44 @@ void Verifier::VerifyCallSite(CallSite CS) {
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
   if (CS.getCalledFunction() == nullptr ||
       !CS.getCalledFunction()->getName().startswith("llvm.")) {
-    for (FunctionType::param_iterator PI = FTy->param_begin(),
-           PE = FTy->param_end(); PI != PE; ++PI)
-      Assert(!(*PI)->isMetadataTy(),
+    for (Type *ParamTy : FTy->params()) {
+      Assert(!ParamTy->isMetadataTy(),
              "Function has metadata parameter but isn't an intrinsic", I);
+      Assert(!ParamTy->isTokenTy(),
+             "Function has token parameter but isn't an intrinsic", I);
+    }
   }
 
+  // Verify that indirect calls don't return tokens.
+  if (CS.getCalledFunction() == nullptr)
+    Assert(!FTy->getReturnType()->isTokenTy(),
+           "Return type cannot be token for indirect call!");
+
   if (Function *F = CS.getCalledFunction())
     if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
       visitIntrinsicCallSite(ID, CS);
 
+  // Verify that a callsite has at most one "deopt" and one "funclet" operand
+  // bundle.
+  bool FoundDeoptBundle = false, FoundFuncletBundle = false;
+  for (unsigned i = 0, e = CS.getNumOperandBundles(); i < e; ++i) {
+    OperandBundleUse BU = CS.getOperandBundleAt(i);
+    uint32_t Tag = BU.getTagID();
+    if (Tag == LLVMContext::OB_deopt) {
+      Assert(!FoundDeoptBundle, "Multiple deopt operand bundles", I);
+      FoundDeoptBundle = true;
+    }
+    if (Tag == LLVMContext::OB_funclet) {
+      Assert(!FoundFuncletBundle, "Multiple funclet operand bundles", I);
+      FoundFuncletBundle = true;
+      Assert(BU.Inputs.size() == 1,
+             "Expected exactly one funclet bundle operand", I);
+      Assert(isa<FuncletPadInst>(BU.Inputs.front()),
+             "Funclet bundle operands should correspond to a FuncletPadInst",
+             I);
+    }
+  }
+
   visitInstruction(*I);
 }
 
@@ -2406,10 +2520,12 @@ void Verifier::visitCallInst(CallInst &CI) {
 void Verifier::visitInvokeInst(InvokeInst &II) {
   VerifyCallSite(&II);
 
-  // Verify that there is a landingpad instruction as the first non-PHI
-  // instruction of the 'unwind' destination.
-  Assert(II.getUnwindDest()->isLandingPad(),
-         "The unwind destination does not have a landingpad instruction!", &II);
+  // Verify that the first non-PHI instruction of the unwind destination is an
+  // exception handling instruction.
+  Assert(
+      II.getUnwindDest()->isEHPad(),
+      "The unwind destination does not have an exception handling instruction!",
+      &II);
 
   visitTerminatorInst(II);
 }
@@ -2622,6 +2738,14 @@ void Verifier::visitRangeMetadata(Instruction& I,
   }
 }
 
+void Verifier::checkAtomicMemAccessSize(const Module *M, Type *Ty,
+                                        const Instruction *I) {
+  unsigned Size = M->getDataLayout().getTypeSizeInBits(Ty);
+  Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
+  Assert(!(Size & (Size - 1)),
+         "atomic memory access' operand must have a power-of-two size", Ty, I);
+}
+
 void Verifier::visitLoadInst(LoadInst &LI) {
   PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
   Assert(PTy, "Load operand must be a pointer.", &LI);
@@ -2633,14 +2757,12 @@ void Verifier::visitLoadInst(LoadInst &LI) {
            "Load cannot have Release ordering", &LI);
     Assert(LI.getAlignment() != 0,
            "Atomic load must specify explicit alignment", &LI);
-    if (!ElTy->isPointerTy()) {
-      Assert(ElTy->isIntegerTy(), "atomic load operand must have integer type!",
-             &LI, ElTy);
-      unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert(Size >= 8 && !(Size & (Size - 1)),
-             "atomic load operand must be power-of-two byte-sized integer", &LI,
-             ElTy);
-    }
+    Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() ||
+               ElTy->isFloatingPointTy(),
+           "atomic load operand must have integer, pointer, or floating point "
+           "type!",
+           ElTy, &LI);
+    checkAtomicMemAccessSize(M, ElTy, &LI);
   } else {
     Assert(LI.getSynchScope() == CrossThread,
            "Non-atomic load cannot have SynchronizationScope specified", &LI);
@@ -2662,14 +2784,12 @@ void Verifier::visitStoreInst(StoreInst &SI) {
            "Store cannot have Acquire ordering", &SI);
     Assert(SI.getAlignment() != 0,
            "Atomic store must specify explicit alignment", &SI);
-    if (!ElTy->isPointerTy()) {
-      Assert(ElTy->isIntegerTy(),
-             "atomic store operand must have integer type!", &SI, ElTy);
-      unsigned Size = ElTy->getPrimitiveSizeInBits();
-      Assert(Size >= 8 && !(Size & (Size - 1)),
-             "atomic store operand must be power-of-two byte-sized integer",
-             &SI, ElTy);
-    }
+    Assert(ElTy->isIntegerTy() || ElTy->isPointerTy() ||
+               ElTy->isFloatingPointTy(),
+           "atomic store operand must have integer, pointer, or floating point "
+           "type!",
+           ElTy, &SI);
+    checkAtomicMemAccessSize(M, ElTy, &SI);
   } else {
     Assert(SI.getSynchScope() == CrossThread,
            "Non-atomic store cannot have SynchronizationScope specified", &SI);
@@ -2678,7 +2798,7 @@ void Verifier::visitStoreInst(StoreInst &SI) {
 }
 
 void Verifier::visitAllocaInst(AllocaInst &AI) {
-  SmallPtrSet<const Type*, 4> Visited;
+  SmallPtrSet<Type*, 4> Visited;
   PointerType *PTy = AI.getType();
   Assert(PTy->getAddressSpace() == 0,
          "Allocation instruction pointer not in the generic address space!",
@@ -2716,9 +2836,7 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
   Type *ElTy = PTy->getElementType();
   Assert(ElTy->isIntegerTy(), "cmpxchg operand must have integer type!", &CXI,
          ElTy);
-  unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert(Size >= 8 && !(Size & (Size - 1)),
-         "cmpxchg operand must be power-of-two byte-sized integer", &CXI, ElTy);
+  checkAtomicMemAccessSize(M, ElTy, &CXI);
   Assert(ElTy == CXI.getOperand(1)->getType(),
          "Expected value type does not match pointer operand type!", &CXI,
          ElTy);
@@ -2737,10 +2855,7 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
   Type *ElTy = PTy->getElementType();
   Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
          &RMWI, ElTy);
-  unsigned Size = ElTy->getPrimitiveSizeInBits();
-  Assert(Size >= 8 && !(Size & (Size - 1)),
-         "atomicrmw operand must be power-of-two byte-sized integer", &RMWI,
-         ElTy);
+  checkAtomicMemAccessSize(M, ElTy, &RMWI);
   Assert(ElTy == RMWI.getOperand(1)->getType(),
          "Argument value type does not match pointer operand type!", &RMWI,
          ElTy);
@@ -2777,23 +2892,62 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
   visitInstruction(IVI);
 }
 
-void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
-  BasicBlock *BB = LPI.getParent();
+void Verifier::visitEHPadPredecessors(Instruction &I) {
+  assert(I.isEHPad());
 
+  BasicBlock *BB = I.getParent();
+  Function *F = BB->getParent();
+
+  Assert(BB != &F->getEntryBlock(), "EH pad cannot be in entry block.", &I);
+
+  if (auto *LPI = dyn_cast<LandingPadInst>(&I)) {
+    // The landingpad instruction defines its parent as a landing pad block. The
+    // landing pad block may be branched to only by the unwind edge of an
+    // invoke.
+    for (BasicBlock *PredBB : predecessors(BB)) {
+      const auto *II = dyn_cast<InvokeInst>(PredBB->getTerminator());
+      Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
+             "Block containing LandingPadInst must be jumped to "
+             "only by the unwind edge of an invoke.",
+             LPI);
+    }
+    return;
+  }
+  if (auto *CPI = dyn_cast<CatchPadInst>(&I)) {
+    if (!pred_empty(BB))
+      Assert(BB->getUniquePredecessor() == CPI->getCatchSwitch()->getParent(),
+             "Block containg CatchPadInst must be jumped to "
+             "only by its catchswitch.",
+             CPI);
+    return;
+  }
+
+  for (BasicBlock *PredBB : predecessors(BB)) {
+    TerminatorInst *TI = PredBB->getTerminator();
+    if (auto *II = dyn_cast<InvokeInst>(TI)) {
+      Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
+             "EH pad must be jumped to via an unwind edge", &I, II);
+    } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) {
+      Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI);
+    }
+  }
+}
+
+void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
   // The landingpad instruction is ill-formed if it doesn't have any clauses and
   // isn't a cleanup.
   Assert(LPI.getNumClauses() > 0 || LPI.isCleanup(),
          "LandingPadInst needs at least one clause or to be a cleanup.", &LPI);
 
-  // The landingpad instruction defines its parent as a landing pad block. The
-  // landing pad block may be branched to only by the unwind edge of an invoke.
-  for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
-    const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator());
-    Assert(II && II->getUnwindDest() == BB && II->getNormalDest() != BB,
-           "Block containing LandingPadInst must be jumped to "
-           "only by the unwind edge of an invoke.",
+  visitEHPadPredecessors(LPI);
+
+  if (!LandingPadResultTy)
+    LandingPadResultTy = LPI.getType();
+  else
+    Assert(LandingPadResultTy == LPI.getType(),
+           "The landingpad instruction should have a consistent result type "
+           "inside a function.",
            &LPI);
-  }
 
   Function *F = LPI.getParent()->getParent();
   Assert(F->hasPersonalityFn(),
@@ -2820,6 +2974,132 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
   visitInstruction(LPI);
 }
 
+void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
+  visitEHPadPredecessors(CPI);
+
+  BasicBlock *BB = CPI.getParent();
+
+  Function *F = BB->getParent();
+  Assert(F->hasPersonalityFn(),
+         "CatchPadInst needs to be in a function with a personality.", &CPI);
+
+  Assert(isa<CatchSwitchInst>(CPI.getParentPad()),
+         "CatchPadInst needs to be directly nested in a CatchSwitchInst.",
+         CPI.getParentPad());
+
+  // The catchpad instruction must be the first non-PHI instruction in the
+  // block.
+  Assert(BB->getFirstNonPHI() == &CPI,
+         "CatchPadInst not the first non-PHI instruction in the block.", &CPI);
+
+  visitInstruction(CPI);
+}
+
+void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
+  Assert(isa<CatchPadInst>(CatchReturn.getOperand(0)),
+         "CatchReturnInst needs to be provided a CatchPad", &CatchReturn,
+         CatchReturn.getOperand(0));
+
+  visitTerminatorInst(CatchReturn);
+}
+
+void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
+  visitEHPadPredecessors(CPI);
+
+  BasicBlock *BB = CPI.getParent();
+
+  Function *F = BB->getParent();
+  Assert(F->hasPersonalityFn(),
+         "CleanupPadInst needs to be in a function with a personality.", &CPI);
+
+  // The cleanuppad instruction must be the first non-PHI instruction in the
+  // block.
+  Assert(BB->getFirstNonPHI() == &CPI,
+         "CleanupPadInst not the first non-PHI instruction in the block.",
+         &CPI);
+
+  auto *ParentPad = CPI.getParentPad();
+  Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
+             isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+         "CleanupPadInst has an invalid parent.", &CPI);
+
+  User *FirstUser = nullptr;
+  BasicBlock *FirstUnwindDest = nullptr;
+  for (User *U : CPI.users()) {
+    BasicBlock *UnwindDest;
+    if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) {
+      UnwindDest = CRI->getUnwindDest();
+    } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
+      continue;
+    } else if (CallSite(U)) {
+      continue;
+    } else {
+      Assert(false, "bogus cleanuppad use", &CPI);
+    }
+
+    if (!FirstUser) {
+      FirstUser = U;
+      FirstUnwindDest = UnwindDest;
+    } else {
+      Assert(
+          UnwindDest == FirstUnwindDest,
+          "cleanupret instructions from the same cleanuppad must have the same "
+          "unwind destination",
+          FirstUser, U);
+    }
+  }
+
+  visitInstruction(CPI);
+}
+
+void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
+  visitEHPadPredecessors(CatchSwitch);
+
+  BasicBlock *BB = CatchSwitch.getParent();
+
+  Function *F = BB->getParent();
+  Assert(F->hasPersonalityFn(),
+         "CatchSwitchInst needs to be in a function with a personality.",
+         &CatchSwitch);
+
+  // The catchswitch instruction must be the first non-PHI instruction in the
+  // block.
+  Assert(BB->getFirstNonPHI() == &CatchSwitch,
+         "CatchSwitchInst not the first non-PHI instruction in the block.",
+         &CatchSwitch);
+
+  if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
+    Instruction *I = UnwindDest->getFirstNonPHI();
+    Assert(I->isEHPad() && !isa<LandingPadInst>(I),
+           "CatchSwitchInst must unwind to an EH block which is not a "
+           "landingpad.",
+           &CatchSwitch);
+  }
+
+  auto *ParentPad = CatchSwitch.getParentPad();
+  Assert(isa<CatchSwitchInst>(ParentPad) || isa<ConstantTokenNone>(ParentPad) ||
+             isa<CleanupPadInst>(ParentPad) || isa<CatchPadInst>(ParentPad),
+         "CatchSwitchInst has an invalid parent.", ParentPad);
+
+  visitTerminatorInst(CatchSwitch);
+}
+
+void Verifier::visitCleanupReturnInst(CleanupReturnInst &CRI) {
+  Assert(isa<CleanupPadInst>(CRI.getOperand(0)),
+         "CleanupReturnInst needs to be provided a CleanupPad", &CRI,
+         CRI.getOperand(0));
+
+  if (BasicBlock *UnwindDest = CRI.getUnwindDest()) {
+    Instruction *I = UnwindDest->getFirstNonPHI();
+    Assert(I->isEHPad() && !isa<LandingPadInst>(I),
+           "CleanupReturnInst must unwind to an EH block which is not a "
+           "landingpad.",
+           &CRI);
+  }
+
+  visitTerminatorInst(CRI);
+}
+
 void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
   Instruction *Op = cast<Instruction>(I.getOperand(i));
   // If the we have an invalid invoke, don't try to compute the dominance.
@@ -2835,6 +3115,19 @@ void Verifier::verifyDominatesUse(Instruction &I, unsigned i) {
          "Instruction does not dominate all uses!", Op, &I);
 }
 
+void Verifier::visitDereferenceableMetadata(Instruction& I, MDNode* MD) {
+  Assert(I.getType()->isPointerTy(), "dereferenceable, dereferenceable_or_null "
+         "apply only to pointer types", &I);
+  Assert(isa<LoadInst>(I),
+         "dereferenceable, dereferenceable_or_null apply only to load"
+         " instructions, use attributes for calls or invokes", &I);
+  Assert(MD->getNumOperands() == 1, "dereferenceable, dereferenceable_or_null "
+         "take one operand!", &I);
+  ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0));
+  Assert(CI && CI->getType()->isIntegerTy(64), "dereferenceable, "
+         "dereferenceable_or_null metadata value must be an i64!", &I);
+}
+
 /// verifyInstruction - Verify that an instruction is well formed.
 ///
 void Verifier::visitInstruction(Instruction &I) {
@@ -2903,7 +3196,7 @@ void Verifier::visitInstruction(Instruction &I) {
           " donothing or patchpoint",
           &I);
       Assert(F->getParent() == M, "Referencing function in another module!",
-             &I);
+             &I, M, F, F->getParent());
     } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
       Assert(OpBB->getParent() == BB->getParent(),
              "Referring to a basic block in another function!", &I);
@@ -2911,7 +3204,7 @@ void Verifier::visitInstruction(Instruction &I) {
       Assert(OpArg->getParent() == BB->getParent(),
              "Referring to an argument in another function!", &I);
     } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
-      Assert(GV->getParent() == M, "Referencing global in another module!", &I);
+      Assert(GV->getParent() == M, "Referencing global in another module!", &I, M, GV, GV->getParent());
     } else if (isa<Instruction>(I.getOperand(i))) {
       verifyDominatesUse(I, i);
     } else if (isa<InlineAsm>(I.getOperand(i))) {
@@ -2922,22 +3215,7 @@ void Verifier::visitInstruction(Instruction &I) {
       if (CE->getType()->isPtrOrPtrVectorTy()) {
         // If we have a ConstantExpr pointer, we need to see if it came from an
         // illegal bitcast (inttoptr <constant int> )
-        SmallVector<const ConstantExpr *, 4> Stack;
-        SmallPtrSet<const ConstantExpr *, 4> Visited;
-        Stack.push_back(CE);
-
-        while (!Stack.empty()) {
-          const ConstantExpr *V = Stack.pop_back_val();
-          if (!Visited.insert(V).second)
-            continue;
-
-          VerifyConstantExprBitcastType(V);
-
-          for (unsigned I = 0, N = V->getNumOperands(); I != N; ++I) {
-            if (ConstantExpr *Op = dyn_cast<ConstantExpr>(V->getOperand(I)))
-              Stack.push_back(Op);
-          }
-        }
+        visitConstantExprsRecursively(CE);
       }
     }
   }
@@ -2971,6 +3249,28 @@ void Verifier::visitInstruction(Instruction &I) {
            &I);
   }
 
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable))
+    visitDereferenceableMetadata(I, MD);
+
+  if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null))
+    visitDereferenceableMetadata(I, MD);
+
+  if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
+    Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
+           &I);
+    Assert(isa<LoadInst>(I), "align applies only to load instructions, "
+           "use attributes for calls or invokes", &I);
+    Assert(AlignMD->getNumOperands() == 1, "align takes one operand!", &I);
+    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(AlignMD->getOperand(0));
+    Assert(CI && CI->getType()->isIntegerTy(64),
+           "align metadata value must be an i64!", &I);
+    uint64_t Align = CI->getZExtValue();
+    Assert(isPowerOf2_64(Align),
+           "align metadata value must be a power of 2!", &I);
+    Assert(Align <= Value::MaximumAlignment,
+           "alignment is larger that implementation defined limit", &I);
+  }
+
   if (MDNode *N = I.getDebugLoc().getAsMDNode()) {
     Assert(isa<DILocation>(N), "invalid !dbg metadata attachment", &I, N);
     visitMDNode(*N);
@@ -2998,6 +3298,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
   case IITDescriptor::Void: return !Ty->isVoidTy();
   case IITDescriptor::VarArg: return true;
   case IITDescriptor::MMX:  return !Ty->isX86_MMXTy();
+  case IITDescriptor::Token: return !Ty->isTokenTy();
   case IITDescriptor::Metadata: return !Ty->isMetadataTy();
   case IITDescriptor::Half: return !Ty->isHalfTy();
   case IITDescriptor::Float: return !Ty->isFloatTy();
@@ -3321,9 +3622,6 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
     VerifyStatepoint(CS);
     break;
-  case Intrinsic::experimental_gc_result_int:
-  case Intrinsic::experimental_gc_result_float:
-  case Intrinsic::experimental_gc_result_ptr:
   case Intrinsic::experimental_gc_result: {
     Assert(CS.getParent()->getParent()->hasGC(),
            "Enclosing function does not use GC.", CS);
@@ -3339,9 +3637,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
 
     // Assert that result type matches wrapped callee.
     const Value *Target = StatepointCS.getArgument(2);
-    const PointerType *PT = cast<PointerType>(Target->getType());
-    const FunctionType *TargetFuncType =
-      cast<FunctionType>(PT->getElementType());
+    auto *PT = cast<PointerType>(Target->getType());
+    auto *TargetFuncType = cast<FunctionType>(PT->getElementType());
     Assert(CS.getType() == TargetFuncType->getReturnType(),
            "gc.result result type does not match wrapped callee", CS);
     break;
@@ -3352,19 +3649,16 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
     // Check that this relocate is correctly tied to the statepoint
 
     // This is case for relocate on the unwinding path of an invoke statepoint
-    if (ExtractValueInst *ExtractValue =
-          dyn_cast<ExtractValueInst>(CS.getArgOperand(0))) {
-      Assert(isa<LandingPadInst>(ExtractValue->getAggregateOperand()),
-             "gc relocate on unwind path incorrectly linked to the statepoint",
-             CS);
+    if (LandingPadInst *LandingPad =
+          dyn_cast<LandingPadInst>(CS.getArgOperand(0))) {
 
       const BasicBlock *InvokeBB =
-        ExtractValue->getParent()->getUniquePredecessor();
+          LandingPad->getParent()->getUniquePredecessor();
 
       // Landingpad relocates should have only one predecessor with invoke
       // statepoint terminator
       Assert(InvokeBB, "safepoints should have unique landingpads",
-             ExtractValue->getParent());
+             LandingPad->getParent());
       Assert(InvokeBB->getTerminator(), "safepoint block should be well formed",
              InvokeBB);
       Assert(isStatepoint(InvokeBB->getTerminator()),
@@ -3448,6 +3742,12 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
            "gc.relocate: relocating a pointer shouldn't change its address space", CS);
     break;
   }
+  case Intrinsic::eh_exceptioncode:
+  case Intrinsic::eh_exceptionpointer: {
+    Assert(isa<CatchPadInst>(CS.getArgOperand(0)),
+           "eh.exceptionpointer argument must be a catchpad", CS);
+    break;
+  }
   };
 }
 
@@ -3598,7 +3898,7 @@ void Verifier::verifyTypeRefs() {
   for (auto *CU : CUs->operands())
     if (auto Ts = cast<DICompileUnit>(CU)->getRetainedTypes())
       for (DIType *Op : Ts)
-        if (auto *T = dyn_cast<DICompositeType>(Op))
+        if (auto *T = dyn_cast_or_null<DICompositeType>(Op))
           if (auto *S = T->getRawIdentifier()) {
             UnresolvedTypeRefs.erase(S);
             TypeRefs.insert(std::make_pair(S, T));
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 43fee65db7f5..9b243fc571d0 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -31,11 +31,11 @@ static const char *const TimeIRParsingName = "Parse IR";
 
 static std::unique_ptr<Module>
 getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
-                LLVMContext &Context) {
+                LLVMContext &Context, bool ShouldLazyLoadMetadata) {
   if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
                 (const unsigned char *)Buffer->getBufferEnd())) {
-    ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
-        getLazyBitcodeModule(std::move(Buffer), Context);
+    ErrorOr<std::unique_ptr<Module>> ModuleOrErr = getLazyBitcodeModule(
+        std::move(Buffer), Context, ShouldLazyLoadMetadata);
     if (std::error_code EC = ModuleOrErr.getError()) {
       Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
                          EC.message());
@@ -49,7 +49,8 @@ getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
 
 std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
                                                   SMDiagnostic &Err,
-                                                  LLVMContext &Context) {
+                                                  LLVMContext &Context,
+                                                  bool ShouldLazyLoadMetadata) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
   if (std::error_code EC = FileOrErr.getError()) {
@@ -58,7 +59,8 @@ std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
     return nullptr;
   }
 
-  return getLazyIRModule(std::move(FileOrErr.get()), Err, Context);
+  return getLazyIRModule(std::move(FileOrErr.get()), Err, Context,
+                         ShouldLazyLoadMetadata);
 }
 
 std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt
index ea79d5e6a83a..7a0ad50fb94c 100644
--- a/lib/LTO/LLVMBuild.txt
+++ b/lib/LTO/LLVMBuild.txt
@@ -25,7 +25,6 @@ required_libraries =
  BitWriter
  CodeGen
  Core
- IPA
  IPO
  InstCombine
  Linker
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 25ae4ac76e3c..6baaaa4b1395 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/ParallelCG.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/Config/config.h"
 #include "llvm/IR/Constants.h"
@@ -63,47 +64,15 @@ const char* LTOCodeGenerator::getVersionString() {
 #endif
 }
 
-static void handleLTODiagnostic(const DiagnosticInfo &DI) {
-  DiagnosticPrinterRawOStream DP(errs());
-  DI.print(DP);
-  errs() << "\n";
-}
-
-LTOCodeGenerator::LTOCodeGenerator()
-    : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context),
-                                            handleLTODiagnostic) {
+LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
+    : Context(Context), MergedModule(new Module("ld-temp.o", Context)),
+      TheLinker(new Linker(*MergedModule)) {
   initializeLTOPasses();
 }
 
-LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
-    : OwnedContext(std::move(Context)), Context(*OwnedContext),
-      IRLinker(new Module("ld-temp.o", *OwnedContext), handleLTODiagnostic) {
-  initializeLTOPasses();
-}
+LTOCodeGenerator::~LTOCodeGenerator() {}
 
-void LTOCodeGenerator::destroyMergedModule() {
-  if (OwnedModule) {
-    assert(IRLinker.getModule() == &OwnedModule->getModule() &&
-           "The linker's module should be the same as the owned module");
-    delete OwnedModule;
-    OwnedModule = nullptr;
-  } else if (IRLinker.getModule())
-    IRLinker.deleteModule();
-}
-
-LTOCodeGenerator::~LTOCodeGenerator() {
-  destroyMergedModule();
-
-  delete TargetMach;
-  TargetMach = nullptr;
-
-  for (std::vector<char *>::iterator I = CodegenOptions.begin(),
-                                     E = CodegenOptions.end();
-       I != E; ++I)
-    free(*I);
-}
-
-// Initialize LTO passes. Please keep this funciton in sync with
+// Initialize LTO passes. Please keep this function in sync with
 // PassManagerBuilder::populateLTOPassManager(), and make sure all LTO
 // passes are initialized.
 void LTOCodeGenerator::initializeLTOPasses() {
@@ -120,11 +89,11 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeGlobalDCEPass(R);
   initializeArgPromotionPass(R);
   initializeJumpThreadingPass(R);
-  initializeSROAPass(R);
+  initializeSROALegacyPassPass(R);
   initializeSROA_DTPass(R);
   initializeSROA_SSAUpPass(R);
   initializeFunctionAttrsPass(R);
-  initializeGlobalsModRefPass(R);
+  initializeGlobalsAAWrapperPassPass(R);
   initializeLICMPass(R);
   initializeMergedLoadStoreMotionPass(R);
   initializeGVNPass(R);
@@ -133,41 +102,39 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeCFGSimplifyPassPass(R);
 }
 
-bool LTOCodeGenerator::addModule(LTOModule *mod) {
-  assert(&mod->getModule().getContext() == &Context &&
+bool LTOCodeGenerator::addModule(LTOModule *Mod) {
+  assert(&Mod->getModule().getContext() == &Context &&
          "Expected module in same context");
 
-  bool ret = IRLinker.linkInModule(&mod->getModule());
+  bool ret = TheLinker->linkInModule(Mod->takeModule());
 
-  const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
+  const std::vector<const char *> &undefs = Mod->getAsmUndefinedRefs();
   for (int i = 0, e = undefs.size(); i != e; ++i)
     AsmUndefinedRefs[undefs[i]] = 1;
 
   return !ret;
 }
 
-void LTOCodeGenerator::setModule(LTOModule *Mod) {
+void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) {
   assert(&Mod->getModule().getContext() == &Context &&
          "Expected module in same context");
 
-  // Delete the old merged module.
-  destroyMergedModule();
   AsmUndefinedRefs.clear();
 
-  OwnedModule = Mod;
-  IRLinker.setModule(&Mod->getModule());
+  MergedModule = Mod->takeModule();
+  TheLinker = make_unique<Linker>(*MergedModule);
 
   const std::vector<const char*> &Undefs = Mod->getAsmUndefinedRefs();
   for (int I = 0, E = Undefs.size(); I != E; ++I)
     AsmUndefinedRefs[Undefs[I]] = 1;
 }
 
-void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
-  Options = options;
+void LTOCodeGenerator::setTargetOptions(TargetOptions Options) {
+  this->Options = Options;
 }
 
-void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
-  switch (debug) {
+void LTOCodeGenerator::setDebugInfo(lto_debug_model Debug) {
+  switch (Debug) {
   case LTO_DEBUG_MODEL_NONE:
     EmitDwarfDebugInfo = false;
     return;
@@ -179,176 +146,8 @@ void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) {
   llvm_unreachable("Unknown debug format!");
 }
 
-void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) {
-  switch (model) {
-  case LTO_CODEGEN_PIC_MODEL_STATIC:
-  case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-  case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-  case LTO_CODEGEN_PIC_MODEL_DEFAULT:
-    CodeModel = model;
-    return;
-  }
-  llvm_unreachable("Unknown PIC model!");
-}
-
-bool LTOCodeGenerator::writeMergedModules(const char *path,
-                                          std::string &errMsg) {
-  if (!determineTarget(errMsg))
-    return false;
-
-  // mark which symbols can not be internalized
-  applyScopeRestrictions();
-
-  // create output file
-  std::error_code EC;
-  tool_output_file Out(path, EC, sys::fs::F_None);
-  if (EC) {
-    errMsg = "could not open bitcode file for writing: ";
-    errMsg += path;
-    return false;
-  }
-
-  // write bitcode to it
-  WriteBitcodeToFile(IRLinker.getModule(), Out.os(), ShouldEmbedUselists);
-  Out.os().close();
-
-  if (Out.os().has_error()) {
-    errMsg = "could not write bitcode file: ";
-    errMsg += path;
-    Out.os().clear_error();
-    return false;
-  }
-
-  Out.keep();
-  return true;
-}
-
-bool LTOCodeGenerator::compileOptimizedToFile(const char **name,
-                                              std::string &errMsg) {
-  // make unique temp .o file to put generated object file
-  SmallString<128> Filename;
-  int FD;
-  std::error_code EC =
-      sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
-  if (EC) {
-    errMsg = EC.message();
-    return false;
-  }
-
-  // generate object file
-  tool_output_file objFile(Filename.c_str(), FD);
-
-  bool genResult = compileOptimized(objFile.os(), errMsg);
-  objFile.os().close();
-  if (objFile.os().has_error()) {
-    objFile.os().clear_error();
-    sys::fs::remove(Twine(Filename));
-    return false;
-  }
-
-  objFile.keep();
-  if (!genResult) {
-    sys::fs::remove(Twine(Filename));
-    return false;
-  }
-
-  NativeObjectPath = Filename.c_str();
-  *name = NativeObjectPath.c_str();
-  return true;
-}
-
-std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compileOptimized(std::string &errMsg) {
-  const char *name;
-  if (!compileOptimizedToFile(&name, errMsg))
-    return nullptr;
-
-  // read .o file into memory buffer
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-      MemoryBuffer::getFile(name, -1, false);
-  if (std::error_code EC = BufferOrErr.getError()) {
-    errMsg = EC.message();
-    sys::fs::remove(NativeObjectPath);
-    return nullptr;
-  }
-
-  // remove temp files
-  sys::fs::remove(NativeObjectPath);
-
-  return std::move(*BufferOrErr);
-}
-
-
-bool LTOCodeGenerator::compile_to_file(const char **name,
-                                       bool disableInline,
-                                       bool disableGVNLoadPRE,
-                                       bool disableVectorization,
-                                       std::string &errMsg) {
-  if (!optimize(disableInline, disableGVNLoadPRE,
-                disableVectorization, errMsg))
-    return false;
-
-  return compileOptimizedToFile(name, errMsg);
-}
-
-std::unique_ptr<MemoryBuffer>
-LTOCodeGenerator::compile(bool disableInline, bool disableGVNLoadPRE,
-                          bool disableVectorization, std::string &errMsg) {
-  if (!optimize(disableInline, disableGVNLoadPRE,
-                disableVectorization, errMsg))
-    return nullptr;
-
-  return compileOptimized(errMsg);
-}
-
-bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
-  if (TargetMach)
-    return true;
-
-  std::string TripleStr = IRLinker.getModule()->getTargetTriple();
-  if (TripleStr.empty())
-    TripleStr = sys::getDefaultTargetTriple();
-  llvm::Triple Triple(TripleStr);
-
-  // create target machine from info for merged modules
-  const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
-  if (!march)
-    return false;
-
-  // The relocation model is actually a static member of TargetMachine and
-  // needs to be set before the TargetMachine is instantiated.
-  Reloc::Model RelocModel = Reloc::Default;
-  switch (CodeModel) {
-  case LTO_CODEGEN_PIC_MODEL_STATIC:
-    RelocModel = Reloc::Static;
-    break;
-  case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
-    RelocModel = Reloc::PIC_;
-    break;
-  case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
-    RelocModel = Reloc::DynamicNoPIC;
-    break;
-  case LTO_CODEGEN_PIC_MODEL_DEFAULT:
-    // RelocModel is already the default, so leave it that way.
-    break;
-  }
-
-  // Construct LTOModule, hand over ownership of module and target. Use MAttr as
-  // the default set of features.
-  SubtargetFeatures Features(MAttr);
-  Features.getDefaultSubtargetFeatures(Triple);
-  std::string FeatureStr = Features.getString();
-  // Set a default CPU for Darwin triples.
-  if (MCpu.empty() && Triple.isOSDarwin()) {
-    if (Triple.getArch() == llvm::Triple::x86_64)
-      MCpu = "core2";
-    else if (Triple.getArch() == llvm::Triple::x86)
-      MCpu = "yonah";
-    else if (Triple.getArch() == llvm::Triple::aarch64)
-      MCpu = "cyclone";
-  }
-
-  CodeGenOpt::Level CGOptLevel;
+void LTOCodeGenerator::setOptLevel(unsigned Level) {
+  OptLevel = Level;
   switch (OptLevel) {
   case 0:
     CGOptLevel = CodeGenOpt::None;
@@ -363,10 +162,157 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
     CGOptLevel = CodeGenOpt::Aggressive;
     break;
   }
+}
 
-  TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options,
-                                          RelocModel, CodeModel::Default,
-                                          CGOptLevel);
+bool LTOCodeGenerator::writeMergedModules(const char *Path) {
+  if (!determineTarget())
+    return false;
+
+  // mark which symbols can not be internalized
+  applyScopeRestrictions();
+
+  // create output file
+  std::error_code EC;
+  tool_output_file Out(Path, EC, sys::fs::F_None);
+  if (EC) {
+    std::string ErrMsg = "could not open bitcode file for writing: ";
+    ErrMsg += Path;
+    emitError(ErrMsg);
+    return false;
+  }
+
+  // write bitcode to it
+  WriteBitcodeToFile(MergedModule.get(), Out.os(), ShouldEmbedUselists);
+  Out.os().close();
+
+  if (Out.os().has_error()) {
+    std::string ErrMsg = "could not write bitcode file: ";
+    ErrMsg += Path;
+    emitError(ErrMsg);
+    Out.os().clear_error();
+    return false;
+  }
+
+  Out.keep();
+  return true;
+}
+
+bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
+  // make unique temp output file to put generated code
+  SmallString<128> Filename;
+  int FD;
+
+  const char *Extension =
+      (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o");
+
+  std::error_code EC =
+      sys::fs::createTemporaryFile("lto-llvm", Extension, FD, Filename);
+  if (EC) {
+    emitError(EC.message());
+    return false;
+  }
+
+  // generate object file
+  tool_output_file objFile(Filename.c_str(), FD);
+
+  bool genResult = compileOptimized(&objFile.os());
+  objFile.os().close();
+  if (objFile.os().has_error()) {
+    objFile.os().clear_error();
+    sys::fs::remove(Twine(Filename));
+    return false;
+  }
+
+  objFile.keep();
+  if (!genResult) {
+    sys::fs::remove(Twine(Filename));
+    return false;
+  }
+
+  NativeObjectPath = Filename.c_str();
+  *Name = NativeObjectPath.c_str();
+  return true;
+}
+
+std::unique_ptr<MemoryBuffer>
+LTOCodeGenerator::compileOptimized() {
+  const char *name;
+  if (!compileOptimizedToFile(&name))
+    return nullptr;
+
+  // read .o file into memory buffer
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+      MemoryBuffer::getFile(name, -1, false);
+  if (std::error_code EC = BufferOrErr.getError()) {
+    emitError(EC.message());
+    sys::fs::remove(NativeObjectPath);
+    return nullptr;
+  }
+
+  // remove temp files
+  sys::fs::remove(NativeObjectPath);
+
+  return std::move(*BufferOrErr);
+}
+
+bool LTOCodeGenerator::compile_to_file(const char **Name, bool DisableVerify,
+                                       bool DisableInline,
+                                       bool DisableGVNLoadPRE,
+                                       bool DisableVectorization) {
+  if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+                DisableVectorization))
+    return false;
+
+  return compileOptimizedToFile(Name);
+}
+
+std::unique_ptr<MemoryBuffer>
+LTOCodeGenerator::compile(bool DisableVerify, bool DisableInline,
+                          bool DisableGVNLoadPRE, bool DisableVectorization) {
+  if (!optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+                DisableVectorization))
+    return nullptr;
+
+  return compileOptimized();
+}
+
+bool LTOCodeGenerator::determineTarget() {
+  if (TargetMach)
+    return true;
+
+  std::string TripleStr = MergedModule->getTargetTriple();
+  if (TripleStr.empty()) {
+    TripleStr = sys::getDefaultTargetTriple();
+    MergedModule->setTargetTriple(TripleStr);
+  }
+  llvm::Triple Triple(TripleStr);
+
+  // create target machine from info for merged modules
+  std::string ErrMsg;
+  const Target *march = TargetRegistry::lookupTarget(TripleStr, ErrMsg);
+  if (!march) {
+    emitError(ErrMsg);
+    return false;
+  }
+
+  // Construct LTOModule, hand over ownership of module and target. Use MAttr as
+  // the default set of features.
+  SubtargetFeatures Features(MAttr);
+  Features.getDefaultSubtargetFeatures(Triple);
+  FeatureStr = Features.getString();
+  // Set a default CPU for Darwin triples.
+  if (MCpu.empty() && Triple.isOSDarwin()) {
+    if (Triple.getArch() == llvm::Triple::x86_64)
+      MCpu = "core2";
+    else if (Triple.getArch() == llvm::Triple::x86)
+      MCpu = "yonah";
+    else if (Triple.getArch() == llvm::Triple::aarch64)
+      MCpu = "cyclone";
+  }
+
+  TargetMach.reset(march->createTargetMachine(TripleStr, MCpu, FeatureStr,
+                                              Options, RelocModel,
+                                              CodeModel::Default, CGOptLevel));
   return true;
 }
 
@@ -453,7 +399,6 @@ static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
 void LTOCodeGenerator::applyScopeRestrictions() {
   if (ScopeRestrictionsDone || !ShouldInternalize)
     return;
-  Module *mergedModule = IRLinker.getModule();
 
   // Start off with a verification pass.
   legacy::PassManager passes;
@@ -467,20 +412,17 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   TargetLibraryInfoImpl TLII(Triple(TargetMach->getTargetTriple()));
   TargetLibraryInfo TLI(TLII);
 
-  accumulateAndSortLibcalls(Libcalls, TLI, *mergedModule, *TargetMach);
+  accumulateAndSortLibcalls(Libcalls, TLI, *MergedModule, *TargetMach);
 
-  for (Module::iterator f = mergedModule->begin(),
-         e = mergedModule->end(); f != e; ++f)
-    applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler);
-  for (Module::global_iterator v = mergedModule->global_begin(),
-         e = mergedModule->global_end(); v !=  e; ++v)
-    applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler);
-  for (Module::alias_iterator a = mergedModule->alias_begin(),
-         e = mergedModule->alias_end(); a != e; ++a)
-    applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler);
+  for (Function &f : *MergedModule)
+    applyRestriction(f, Libcalls, MustPreserveList, AsmUsed, Mangler);
+  for (GlobalVariable &v : MergedModule->globals())
+    applyRestriction(v, Libcalls, MustPreserveList, AsmUsed, Mangler);
+  for (GlobalAlias &a : MergedModule->aliases())
+    applyRestriction(a, Libcalls, MustPreserveList, AsmUsed, Mangler);
 
   GlobalVariable *LLVMCompilerUsed =
-    mergedModule->getGlobalVariable("llvm.compiler.used");
+    MergedModule->getGlobalVariable("llvm.compiler.used");
   findUsedValues(LLVMCompilerUsed, AsmUsed);
   if (LLVMCompilerUsed)
     LLVMCompilerUsed->eraseFromParent();
@@ -495,7 +437,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 
     llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
     LLVMCompilerUsed =
-      new llvm::GlobalVariable(*mergedModule, ATy, false,
+      new llvm::GlobalVariable(*MergedModule, ATy, false,
                                llvm::GlobalValue::AppendingLinkage,
                                llvm::ConstantArray::get(ATy, asmUsed2),
                                "llvm.compiler.used");
@@ -506,21 +448,18 @@ void LTOCodeGenerator::applyScopeRestrictions() {
   passes.add(createInternalizePass(MustPreserveList));
 
   // apply scope restrictions
-  passes.run(*mergedModule);
+  passes.run(*MergedModule);
 
   ScopeRestrictionsDone = true;
 }
 
 /// Optimize merged modules using various IPO passes
-bool LTOCodeGenerator::optimize(bool DisableInline,
+bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
                                 bool DisableGVNLoadPRE,
-                                bool DisableVectorization,
-                                std::string &errMsg) {
-  if (!this->determineTarget(errMsg))
+                                bool DisableVectorization) {
+  if (!this->determineTarget())
     return false;
 
-  Module *mergedModule = IRLinker.getModule();
-
   // Mark which symbols can not be internalized
   this->applyScopeRestrictions();
 
@@ -528,7 +467,7 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
   legacy::PassManager passes;
 
   // Add an appropriate DataLayout instance for this module...
-  mergedModule->setDataLayout(*TargetMach->getDataLayout());
+  MergedModule->setDataLayout(TargetMach->createDataLayout());
 
   passes.add(
       createTargetTransformInfoWrapperPass(TargetMach->getTargetIRAnalysis()));
@@ -542,60 +481,57 @@ bool LTOCodeGenerator::optimize(bool DisableInline,
     PMB.Inliner = createFunctionInliningPass();
   PMB.LibraryInfo = new TargetLibraryInfoImpl(TargetTriple);
   PMB.OptLevel = OptLevel;
-  PMB.VerifyInput = true;
-  PMB.VerifyOutput = true;
+  PMB.VerifyInput = !DisableVerify;
+  PMB.VerifyOutput = !DisableVerify;
 
   PMB.populateLTOPassManager(passes);
 
   // Run our queue of passes all at once now, efficiently.
-  passes.run(*mergedModule);
+  passes.run(*MergedModule);
 
   return true;
 }
 
-bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out,
-                                        std::string &errMsg) {
-  if (!this->determineTarget(errMsg))
+bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
+  if (!this->determineTarget())
     return false;
 
-  Module *mergedModule = IRLinker.getModule();
-
-  legacy::PassManager codeGenPasses;
+  legacy::PassManager preCodeGenPasses;
 
   // If the bitcode files contain ARC code and were compiled with optimization,
   // the ObjCARCContractPass must be run, so do it unconditionally here.
-  codeGenPasses.add(createObjCARCContractPass());
+  preCodeGenPasses.add(createObjCARCContractPass());
+  preCodeGenPasses.run(*MergedModule);
 
-  if (TargetMach->addPassesToEmitFile(codeGenPasses, out,
-                                      TargetMachine::CGFT_ObjectFile)) {
-    errMsg = "target file type not supported";
-    return false;
-  }
-
-  // Run the code generator, and write assembly file
-  codeGenPasses.run(*mergedModule);
+  // Do code generation. We need to preserve the module in case the client calls
+  // writeMergedModules() after compilation, but we only need to allow this at
+  // parallelism level 1. This is achieved by having splitCodeGen return the
+  // original module at parallelism level 1 which we then assign back to
+  // MergedModule.
+  MergedModule =
+      splitCodeGen(std::move(MergedModule), Out, MCpu, FeatureStr, Options,
+                   RelocModel, CodeModel::Default, CGOptLevel, FileType);
 
   return true;
 }
 
 /// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
 /// LTO problems.
-void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) {
-  for (std::pair<StringRef, StringRef> o = getToken(options);
-       !o.first.empty(); o = getToken(o.second)) {
-    // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add
-    // that.
-    if (CodegenOptions.empty())
-      CodegenOptions.push_back(strdup("libLLVMLTO"));
-    CodegenOptions.push_back(strdup(o.first.str().c_str()));
-  }
+void LTOCodeGenerator::setCodeGenDebugOptions(const char *Options) {
+  for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty();
+       o = getToken(o.second))
+    CodegenOptions.push_back(o.first);
 }
 
 void LTOCodeGenerator::parseCodeGenDebugOptions() {
   // if options were requested, set them
-  if (!CodegenOptions.empty())
-    cl::ParseCommandLineOptions(CodegenOptions.size(),
-                                const_cast<char **>(&CodegenOptions[0]));
+  if (!CodegenOptions.empty()) {
+    // ParseCommandLineOptions() expects argv[0] to be program name.
+    std::vector<const char *> CodegenArgv(1, "libLLVMLTO");
+    for (std::string &Arg : CodegenOptions)
+      CodegenArgv.push_back(Arg.c_str());
+    cl::ParseCommandLineOptions(CodegenArgv.size(), CodegenArgv.data());
+  }
 }
 
 void LTOCodeGenerator::DiagnosticHandler(const DiagnosticInfo &DI,
@@ -645,3 +581,20 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler,
   Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this,
                                /* RespectFilters */ true);
 }
+
+namespace {
+class LTODiagnosticInfo : public DiagnosticInfo {
+  const Twine &Msg;
+public:
+  LTODiagnosticInfo(const Twine &DiagMsg, DiagnosticSeverity Severity=DS_Error)
+      : DiagnosticInfo(DK_Linker, Severity), Msg(DiagMsg) {}
+  void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
+void LTOCodeGenerator::emitError(const std::string &ErrMsg) {
+  if (DiagHandler)
+    (*DiagHandler)(LTO_DS_ERROR, ErrMsg.c_str(), DiagContext);
+  else
+    Context.diagnose(LTODiagnosticInfo(ErrMsg));
+}
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 53ed4175f8e3..409b94902332 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -91,106 +91,97 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
   return StringRef(Triple).startswith(TriplePrefix);
 }
 
-LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options,
-                                     std::string &errMsg) {
+std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
+  ErrorOr<MemoryBufferRef> BCOrErr =
+      IRObjectFile::findBitcodeInMemBuffer(Buffer->getMemBufferRef());
+  if (!BCOrErr)
+    return "";
+  LLVMContext Context;
+  return getBitcodeProducerString(*BCOrErr, Context);
+}
+
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromFile(LLVMContext &Context, const char *path,
+                          TargetOptions options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getFile(path);
-  if (std::error_code EC = BufferOrErr.getError()) {
-    errMsg = EC.message();
-    return nullptr;
-  }
+  if (std::error_code EC = BufferOrErr.getError())
+    return EC;
   std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
-  return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
-                       &getGlobalContext());
+  return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
 }
 
-LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size,
-                                         TargetOptions options,
-                                         std::string &errMsg) {
-  return createFromOpenFileSlice(fd, path, size, 0, options, errMsg);
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path,
+                              size_t size, TargetOptions options) {
+  return createFromOpenFileSlice(Context, fd, path, size, 0, options);
 }
 
-LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path,
-                                              size_t map_size, off_t offset,
-                                              TargetOptions options,
-                                              std::string &errMsg) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd,
+                                   const char *path, size_t map_size,
+                                   off_t offset, TargetOptions options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
-  if (std::error_code EC = BufferOrErr.getError()) {
-    errMsg = EC.message();
-    return nullptr;
-  }
+  if (std::error_code EC = BufferOrErr.getError())
+    return EC;
   std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get());
-  return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg,
-                       &getGlobalContext());
+  return makeLTOModule(Buffer->getMemBufferRef(), options, &Context);
 }
 
-LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length,
-                                       TargetOptions options,
-                                       std::string &errMsg, StringRef path) {
-  return createInContext(mem, length, options, errMsg, path,
-                         &getGlobalContext());
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createFromBuffer(LLVMContext &Context, const void *mem,
+                            size_t length, TargetOptions options,
+                            StringRef path) {
+  return createInContext(mem, length, options, path, &Context);
 }
 
-LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length,
-                                           TargetOptions options,
-                                           std::string &errMsg,
-                                           StringRef path) {
-  return createInContext(mem, length, options, errMsg, path, nullptr);
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createInLocalContext(const void *mem, size_t length,
+                                TargetOptions options, StringRef path) {
+  return createInContext(mem, length, options, path, nullptr);
 }
 
-LTOModule *LTOModule::createInContext(const void *mem, size_t length,
-                                      TargetOptions options,
-                                      std::string &errMsg, StringRef path,
-                                      LLVMContext *Context) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::createInContext(const void *mem, size_t length,
+                           TargetOptions options, StringRef path,
+                           LLVMContext *Context) {
   StringRef Data((const char *)mem, length);
   MemoryBufferRef Buffer(Data, path);
-  return makeLTOModule(Buffer, options, errMsg, Context);
+  return makeLTOModule(Buffer, options, Context);
 }
 
-static std::unique_ptr<Module> parseBitcodeFileImpl(MemoryBufferRef Buffer,
-                                                    LLVMContext &Context,
-                                                    bool ShouldBeLazy,
-                                                    std::string &ErrMsg) {
+static ErrorOr<std::unique_ptr<Module>>
+parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
+                     bool ShouldBeLazy) {
 
   // Find the buffer.
   ErrorOr<MemoryBufferRef> MBOrErr =
       IRObjectFile::findBitcodeInMemBuffer(Buffer);
-  if (std::error_code EC = MBOrErr.getError()) {
-    ErrMsg = EC.message();
-    return nullptr;
-  }
-
-  std::function<void(const DiagnosticInfo &)> DiagnosticHandler =
-      [&ErrMsg](const DiagnosticInfo &DI) {
-        raw_string_ostream Stream(ErrMsg);
-        DiagnosticPrinterRawOStream DP(Stream);
-        DI.print(DP);
-      };
+  if (std::error_code EC = MBOrErr.getError())
+    return EC;
 
   if (!ShouldBeLazy) {
     // Parse the full file.
-    ErrorOr<std::unique_ptr<Module>> M =
-        parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler);
-    if (!M)
-      return nullptr;
+    ErrorOr<std::unique_ptr<Module>> M = parseBitcodeFile(*MBOrErr, Context);
+    if (std::error_code EC = M.getError())
+      return EC;
     return std::move(*M);
   }
 
   // Parse lazily.
   std::unique_ptr<MemoryBuffer> LightweightBuf =
       MemoryBuffer::getMemBuffer(*MBOrErr, false);
-  ErrorOr<std::unique_ptr<Module>> M =
-      getLazyBitcodeModule(std::move(LightweightBuf), Context,
-                           DiagnosticHandler, true /*ShouldLazyLoadMetadata*/);
-  if (!M)
-    return nullptr;
+  ErrorOr<std::unique_ptr<Module>> M = getLazyBitcodeModule(
+      std::move(LightweightBuf), Context, true /*ShouldLazyLoadMetadata*/);
+  if (std::error_code EC = M.getError())
+    return EC;
   return std::move(*M);
 }
 
-LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
-                                    TargetOptions options, std::string &errMsg,
-                                    LLVMContext *Context) {
+ErrorOr<std::unique_ptr<LTOModule>>
+LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options,
+                         LLVMContext *Context) {
   std::unique_ptr<LLVMContext> OwnedContext;
   if (!Context) {
     OwnedContext = llvm::make_unique<LLVMContext>();
@@ -199,11 +190,12 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
 
   // If we own a context, we know this is being used only for symbol
   // extraction, not linking.  Be lazy in that case.
-  std::unique_ptr<Module> M = parseBitcodeFileImpl(
-      Buffer, *Context,
-      /* ShouldBeLazy */ static_cast<bool>(OwnedContext), errMsg);
-  if (!M)
-    return nullptr;
+  ErrorOr<std::unique_ptr<Module>> MOrErr =
+      parseBitcodeFileImpl(Buffer, *Context,
+                           /* ShouldBeLazy */ static_cast<bool>(OwnedContext));
+  if (std::error_code EC = MOrErr.getError())
+    return EC;
+  std::unique_ptr<Module> &M = *MOrErr;
 
   std::string TripleStr = M->getTargetTriple();
   if (TripleStr.empty())
@@ -211,9 +203,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
   llvm::Triple Triple(TripleStr);
 
   // find machine architecture for this module
+  std::string errMsg;
   const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
   if (!march)
-    return nullptr;
+    return std::unique_ptr<LTOModule>(nullptr);
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
@@ -232,25 +225,21 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer,
 
   TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr,
                                                      options);
-  M->setDataLayout(*target->getDataLayout());
+  M->setDataLayout(target->createDataLayout());
 
   std::unique_ptr<object::IRObjectFile> IRObj(
       new object::IRObjectFile(Buffer, std::move(M)));
 
-  LTOModule *Ret;
+  std::unique_ptr<LTOModule> Ret;
   if (OwnedContext)
-    Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext));
+    Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext)));
   else
-    Ret = new LTOModule(std::move(IRObj), target);
-
-  if (Ret->parseSymbols(errMsg)) {
-    delete Ret;
-    return nullptr;
-  }
+    Ret.reset(new LTOModule(std::move(IRObj), target));
 
+  Ret->parseSymbols();
   Ret->parseMetadata();
 
-  return Ret;
+  return std::move(Ret);
 }
 
 /// Create a MemoryBuffer from a memory range with an optional name.
@@ -583,9 +572,7 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
   info.symbol = decl;
 }
 
-/// parseSymbols - Parse the symbols from the module and model-level ASM and add
-/// them to either the defined or undefined lists.
-bool LTOModule::parseSymbols(std::string &errMsg) {
+void LTOModule::parseSymbols() {
   for (auto &Sym : IRFile->symbols()) {
     const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
     uint32_t Flags = Sym.getFlags();
@@ -640,8 +627,6 @@ bool LTOModule::parseSymbols(std::string &errMsg) {
     NameAndAttributes info = u->getValue();
     _symbols.push_back(info);
   }
-
-  return false;
 }
 
 /// parseMetadata - Parse metadata from the module
diff --git a/lib/LibDriver/LibDriver.cpp b/lib/LibDriver/LibDriver.cpp
index b33a22ff0cf8..3ae543460745 100644
--- a/lib/LibDriver/LibDriver.cpp
+++ b/lib/LibDriver/LibDriver.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // Defines an interface to a lib.exe-compatible driver that also understands
-// bitcode files. Used by llvm-lib and lld-link2 /lib.
+// bitcode files. Used by llvm-lib and lld-link /lib.
 //
 //===----------------------------------------------------------------------===//
 
@@ -51,7 +51,7 @@ static const llvm::opt::OptTable::Info infoTable[] = {
 
 class LibOptTable : public llvm::opt::OptTable {
 public:
-  LibOptTable() : OptTable(infoTable, llvm::array_lengthof(infoTable), true) {}
+  LibOptTable() : OptTable(infoTable, true) {}
 };
 
 }
@@ -102,7 +102,7 @@ static Optional<std::string> findInputFile(StringRef File,
 int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
   SmallVector<const char *, 20> NewArgs(ArgsArr.begin(), ArgsArr.end());
   BumpPtrAllocator Alloc;
-  BumpPtrStringSaver Saver(Alloc);
+  StringSaver Saver(Alloc);
   cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs);
   ArgsArr = NewArgs;
 
@@ -135,14 +135,13 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
       llvm::errs() << Arg->getValue() << ": no such file or directory\n";
       return 1;
     }
-    Members.emplace_back(Saver.save(*Path),
-                         llvm::sys::path::filename(Arg->getValue()));
+    Members.emplace_back(Saver.save(*Path));
   }
 
   std::pair<StringRef, std::error_code> Result =
       llvm::writeArchive(getOutputPath(&Args, Members[0]), Members,
                          /*WriteSymtab=*/true, object::Archive::K_GNU,
-                         /*Deterministic*/ true);
+                         /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin));
 
   if (Result.second) {
     if (Result.first.empty())
diff --git a/lib/LibDriver/Options.td b/lib/LibDriver/Options.td
index 0aa1affbebc9..5a56ef7468d4 100644
--- a/lib/LibDriver/Options.td
+++ b/lib/LibDriver/Options.td
@@ -12,6 +12,8 @@ class P<string name, string help> :
 def libpath: P<"libpath", "Object file search path">;
 def out    : P<"out", "Path to file to write output">;
 
+def llvmlibthin : F<"llvmlibthin">;
+
 //==============================================================================
 // The flags below do nothing. They are defined only for lib.exe compatibility.
 //==============================================================================
diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt
index f9d8e0925ae3..8916fb3f7251 100644
--- a/lib/Linker/CMakeLists.txt
+++ b/lib/Linker/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_llvm_library(LLVMLinker
+  IRMover.cpp
   LinkModules.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
new file mode 100644
index 000000000000..fa6e37517fc4
--- /dev/null
+++ b/lib/Linker/IRMover.cpp
@@ -0,0 +1,1657 @@
+//===- lib/Linker/IRMover.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker/IRMover.h"
+#include "LinkDiagnosticInfo.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/TypeFinder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class TypeMapTy : public ValueMapTypeRemapper {
+  /// This is a mapping from a source type to a destination type to use.
+  DenseMap<Type *, Type *> MappedTypes;
+
+  /// When checking to see if two subgraphs are isomorphic, we speculatively
+  /// add types to MappedTypes, but keep track of them here in case we need to
+  /// roll back.
+  SmallVector<Type *, 16> SpeculativeTypes;
+
+  SmallVector<StructType *, 16> SpeculativeDstOpaqueTypes;
+
+  /// This is a list of non-opaque structs in the source module that are mapped
+  /// to an opaque struct in the destination module.
+  SmallVector<StructType *, 16> SrcDefinitionsToResolve;
+
+  /// This is the set of opaque types in the destination modules who are
+  /// getting a body from the source module.
+  SmallPtrSet<StructType *, 16> DstResolvedOpaqueTypes;
+
+public:
+  TypeMapTy(IRMover::IdentifiedStructTypeSet &DstStructTypesSet)
+      : DstStructTypesSet(DstStructTypesSet) {}
+
+  IRMover::IdentifiedStructTypeSet &DstStructTypesSet;
+  /// Indicate that the specified type in the destination module is conceptually
+  /// equivalent to the specified type in the source module.
+  void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+  /// Produce a body for an opaque type in the dest module from a type
+  /// definition in the source module.
+  void linkDefinedTypeBodies();
+
+  /// Return the mapped type to use for the specified input type from the
+  /// source module.
+  Type *get(Type *SrcTy);
+  Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
+
+  void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
+
+  FunctionType *get(FunctionType *T) {
+    return cast<FunctionType>(get((Type *)T));
+  }
+
+private:
+  Type *remapType(Type *SrcTy) override { return get(SrcTy); }
+
+  bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
+
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+  assert(SpeculativeTypes.empty());
+  assert(SpeculativeDstOpaqueTypes.empty());
+
+  // Check to see if these types are recursively isomorphic and establish a
+  // mapping between them if so.
+  if (!areTypesIsomorphic(DstTy, SrcTy)) {
+    // Oops, they aren't isomorphic.  Just discard this request by rolling out
+    // any speculative mappings we've established.
+    for (Type *Ty : SpeculativeTypes)
+      MappedTypes.erase(Ty);
+
+    SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() -
+                                   SpeculativeDstOpaqueTypes.size());
+    for (StructType *Ty : SpeculativeDstOpaqueTypes)
+      DstResolvedOpaqueTypes.erase(Ty);
+  } else {
+    for (Type *Ty : SpeculativeTypes)
+      if (auto *STy = dyn_cast<StructType>(Ty))
+        if (STy->hasName())
+          STy->setName("");
+  }
+  SpeculativeTypes.clear();
+  SpeculativeDstOpaqueTypes.clear();
+}
+
+/// Recursively walk this pair of types, returning true if they are isomorphic,
+/// false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+  // Two types with differing kinds are clearly not isomorphic.
+  if (DstTy->getTypeID() != SrcTy->getTypeID())
+    return false;
+
+  // If we have an entry in the MappedTypes table, then we have our answer.
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry)
+    return Entry == DstTy;
+
+  // Two identical types are clearly isomorphic.  Remember this
+  // non-speculatively.
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
+    return true;
+  }
+
+  // Okay, we have two types with identical kinds that we haven't seen before.
+
+  // If this is an opaque struct type, special case it.
+  if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+    // Mapping an opaque type to any struct, just keep the dest struct.
+    if (SSTy->isOpaque()) {
+      Entry = DstTy;
+      SpeculativeTypes.push_back(SrcTy);
+      return true;
+    }
+
+    // Mapping a non-opaque source type to an opaque dest.  If this is the first
+    // type that we're mapping onto this destination type then we succeed.  Keep
+    // the dest, but fill it in later. If this is the second (different) type
+    // that we're trying to map onto the same opaque type then we fail.
+    if (cast<StructType>(DstTy)->isOpaque()) {
+      // We can only map one source type onto the opaque destination type.
+      if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)).second)
+        return false;
+      SrcDefinitionsToResolve.push_back(SSTy);
+      SpeculativeTypes.push_back(SrcTy);
+      SpeculativeDstOpaqueTypes.push_back(cast<StructType>(DstTy));
+      Entry = DstTy;
+      return true;
+    }
+  }
+
+  // If the number of subtypes disagree between the two types, then we fail.
+  if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+    return false;
+
+  // Fail if any of the extra properties (e.g. array size) of the type disagree.
+  if (isa<IntegerType>(DstTy))
+    return false; // bitwidth disagrees.
+  if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+    if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+      return false;
+
+  } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+    if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+      return false;
+  } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+    StructType *SSTy = cast<StructType>(SrcTy);
+    if (DSTy->isLiteral() != SSTy->isLiteral() ||
+        DSTy->isPacked() != SSTy->isPacked())
+      return false;
+  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
+  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+    if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
+      return false;
+  }
+
+  // Otherwise, we speculate that these two types will line up and recursively
+  // check the subelements.
+  Entry = DstTy;
+  SpeculativeTypes.push_back(SrcTy);
+
+  for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I)
+    if (!areTypesIsomorphic(DstTy->getContainedType(I),
+                            SrcTy->getContainedType(I)))
+      return false;
+
+  // If everything seems to have lined up, then everything is great.
+  return true;
+}
+
+void TypeMapTy::linkDefinedTypeBodies() {
+  SmallVector<Type *, 16> Elements;
+  for (StructType *SrcSTy : SrcDefinitionsToResolve) {
+    StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+    assert(DstSTy->isOpaque());
+
+    // Map the body of the source type over to a new body for the dest type.
+    Elements.resize(SrcSTy->getNumElements());
+    for (unsigned I = 0, E = Elements.size(); I != E; ++I)
+      Elements[I] = get(SrcSTy->getElementType(I));
+
+    DstSTy->setBody(Elements, SrcSTy->isPacked());
+    DstStructTypesSet.switchToNonOpaque(DstSTy);
+  }
+  SrcDefinitionsToResolve.clear();
+  DstResolvedOpaqueTypes.clear();
+}
+
+void TypeMapTy::finishType(StructType *DTy, StructType *STy,
+                           ArrayRef<Type *> ETypes) {
+  DTy->setBody(ETypes, STy->isPacked());
+
+  // Steal STy's name.
+  if (STy->hasName()) {
+    SmallString<16> TmpName = STy->getName();
+    STy->setName("");
+    DTy->setName(TmpName);
+  }
+
+  DstStructTypesSet.addNonOpaque(DTy);
+}
+
+Type *TypeMapTy::get(Type *Ty) {
+  SmallPtrSet<StructType *, 8> Visited;
+  return get(Ty, Visited);
+}
+
+Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
+  // If we already have an entry for this type, return it.
+  Type **Entry = &MappedTypes[Ty];
+  if (*Entry)
+    return *Entry;
+
+  // These are types that LLVM itself will unique.
+  bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
+
+#ifndef NDEBUG
+  if (!IsUniqued) {
+    for (auto &Pair : MappedTypes) {
+      assert(!(Pair.first != Ty && Pair.second == Ty) &&
+             "mapping to a source type");
+    }
+  }
+#endif
+
+  if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
+    StructType *DTy = StructType::create(Ty->getContext());
+    return *Entry = DTy;
+  }
+
+  // If this is not a recursive type, then just map all of the elements and
+  // then rebuild the type from inside out.
+  SmallVector<Type *, 4> ElementTypes;
+
+  // If there are no element types to map, then the type is itself.  This is
+  // true for the anonymous {} struct, things like 'float', integers, etc.
+  if (Ty->getNumContainedTypes() == 0 && IsUniqued)
+    return *Entry = Ty;
+
+  // Remap all of the elements, keeping track of whether any of them change.
+  bool AnyChange = false;
+  ElementTypes.resize(Ty->getNumContainedTypes());
+  for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
+    ElementTypes[I] = get(Ty->getContainedType(I), Visited);
+    AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
+  }
+
+  // If we found our type while recursively processing stuff, just use it.
+  Entry = &MappedTypes[Ty];
+  if (*Entry) {
+    if (auto *DTy = dyn_cast<StructType>(*Entry)) {
+      if (DTy->isOpaque()) {
+        auto *STy = cast<StructType>(Ty);
+        finishType(DTy, STy, ElementTypes);
+      }
+    }
+    return *Entry;
+  }
+
+  // If all of the element types mapped directly over and the type is not
+  // a nomed struct, then the type is usable as-is.
+  if (!AnyChange && IsUniqued)
+    return *Entry = Ty;
+
+  // Otherwise, rebuild a modified type.
+  switch (Ty->getTypeID()) {
+  default:
+    llvm_unreachable("unknown derived type to remap");
+  case Type::ArrayTyID:
+    return *Entry = ArrayType::get(ElementTypes[0],
+                                   cast<ArrayType>(Ty)->getNumElements());
+  case Type::VectorTyID:
+    return *Entry = VectorType::get(ElementTypes[0],
+                                    cast<VectorType>(Ty)->getNumElements());
+  case Type::PointerTyID:
+    return *Entry = PointerType::get(ElementTypes[0],
+                                     cast<PointerType>(Ty)->getAddressSpace());
+  case Type::FunctionTyID:
+    return *Entry = FunctionType::get(ElementTypes[0],
+                                      makeArrayRef(ElementTypes).slice(1),
+                                      cast<FunctionType>(Ty)->isVarArg());
+  case Type::StructTyID: {
+    auto *STy = cast<StructType>(Ty);
+    bool IsPacked = STy->isPacked();
+    if (IsUniqued)
+      return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
+
+    // If the type is opaque, we can just use it directly.
+    if (STy->isOpaque()) {
+      DstStructTypesSet.addOpaque(STy);
+      return *Entry = Ty;
+    }
+
+    if (StructType *OldT =
+            DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
+      STy->setName("");
+      return *Entry = OldT;
+    }
+
+    if (!AnyChange) {
+      DstStructTypesSet.addNonOpaque(STy);
+      return *Entry = Ty;
+    }
+
+    StructType *DTy = StructType::create(Ty->getContext());
+    finishType(DTy, STy, ElementTypes);
+    return *Entry = DTy;
+  }
+  }
+}
+
+LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity,
+                                       const Twine &Msg)
+    : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {}
+void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
+
+//===----------------------------------------------------------------------===//
+// IRLinker implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class IRLinker;
+
+/// Creates prototypes for functions that are lazily linked on the fly. This
+/// speeds up linking for modules with many/ lazily linked functions of which
+/// few get used.
+class GlobalValueMaterializer final : public ValueMaterializer {
+  IRLinker *TheIRLinker;
+
+public:
+  GlobalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {}
+  Value *materializeDeclFor(Value *V) override;
+  void materializeInitFor(GlobalValue *New, GlobalValue *Old) override;
+  Metadata *mapTemporaryMetadata(Metadata *MD) override;
+  void replaceTemporaryMetadata(const Metadata *OrigMD,
+                                Metadata *NewMD) override;
+  bool isMetadataNeeded(Metadata *MD) override;
+};
+
+class LocalValueMaterializer final : public ValueMaterializer {
+  IRLinker *TheIRLinker;
+
+public:
+  LocalValueMaterializer(IRLinker *TheIRLinker) : TheIRLinker(TheIRLinker) {}
+  Value *materializeDeclFor(Value *V) override;
+  void materializeInitFor(GlobalValue *New, GlobalValue *Old) override;
+  Metadata *mapTemporaryMetadata(Metadata *MD) override;
+  void replaceTemporaryMetadata(const Metadata *OrigMD,
+                                Metadata *NewMD) override;
+  bool isMetadataNeeded(Metadata *MD) override;
+};
+
+/// This is responsible for keeping track of the state used for moving data
+/// from SrcM to DstM.
+class IRLinker {
+  Module &DstM;
+  Module &SrcM;
+
+  std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor;
+
+  TypeMapTy TypeMap;
+  GlobalValueMaterializer GValMaterializer;
+  LocalValueMaterializer LValMaterializer;
+
+  /// Mapping of values from what they used to be in Src, to what they are now
+  /// in DstM.  ValueToValueMapTy is a ValueMap, which involves some overhead
+  /// due to the use of Value handles which the Linker doesn't actually need,
+  /// but this allows us to reuse the ValueMapper code.
+  ValueToValueMapTy ValueMap;
+  ValueToValueMapTy AliasValueMap;
+
+  DenseSet<GlobalValue *> ValuesToLink;
+  std::vector<GlobalValue *> Worklist;
+
+  void maybeAdd(GlobalValue *GV) {
+    if (ValuesToLink.insert(GV).second)
+      Worklist.push_back(GV);
+  }
+
+  /// Set to true when all global value body linking is complete (including
+  /// lazy linking). Used to prevent metadata linking from creating new
+  /// references.
+  bool DoneLinkingBodies = false;
+
+  bool HasError = false;
+
+  /// Flag indicating that we are just linking metadata (after function
+  /// importing).
+  bool IsMetadataLinkingPostpass;
+
+  /// Flags to pass to value mapper invocations.
+  RemapFlags ValueMapperFlags = RF_MoveDistinctMDs;
+
+  /// Association between metadata values created during bitcode parsing and
+  /// the value id. Used to correlate temporary metadata created during
+  /// function importing with the final metadata parsed during the subsequent
+  /// metadata linking postpass.
+  DenseMap<const Metadata *, unsigned> MetadataToIDs;
+
+  /// Association between metadata value id and temporary metadata that
+  /// remains unmapped after function importing. Saved during function
+  /// importing and consumed during the metadata linking postpass.
+  DenseMap<unsigned, MDNode *> *ValIDToTempMDMap;
+
+  /// Set of subprogram metadata that does not need to be linked into the
+  /// destination module, because the functions were not imported directly
+  /// or via an inlined body in an imported function.
+  SmallPtrSet<const Metadata *, 16> UnneededSubprograms;
+
+  /// Handles cloning of a global values from the source module into
+  /// the destination module, including setting the attributes and visibility.
+  GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, bool ForDefinition);
+
+  /// Helper method for setting a message and returning an error code.
+  bool emitError(const Twine &Message) {
+    SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
+    HasError = true;
+    return true;
+  }
+
+  void emitWarning(const Twine &Message) {
+    SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Warning, Message));
+  }
+
+  /// Check whether we should be linking metadata from the source module.
+  bool shouldLinkMetadata() {
+    // ValIDToTempMDMap will be non-null when we are importing or otherwise want
+    // to link metadata lazily, and then when linking the metadata.
+    // We only want to return true for the former case.
+    return ValIDToTempMDMap == nullptr || IsMetadataLinkingPostpass;
+  }
+
+  /// Given a global in the source module, return the global in the
+  /// destination module that is being linked to, if any.
+  GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
+    // If the source has no name it can't link.  If it has local linkage,
+    // there is no name match-up going on.
+    if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+      return nullptr;
+
+    // Otherwise see if we have a match in the destination module's symtab.
+    GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
+    if (!DGV)
+      return nullptr;
+
+    // If we found a global with the same name in the dest module, but it has
+    // internal linkage, we are really not doing any linkage here.
+    if (DGV->hasLocalLinkage())
+      return nullptr;
+
+    // Otherwise, we do in fact link to the destination global.
+    return DGV;
+  }
+
+  void computeTypeMapping();
+
+  Constant *linkAppendingVarProto(GlobalVariable *DstGV,
+                                  const GlobalVariable *SrcGV);
+
+  bool shouldLink(GlobalValue *DGV, GlobalValue &SGV);
+  Constant *linkGlobalValueProto(GlobalValue *GV, bool ForAlias);
+
+  bool linkModuleFlagsMetadata();
+
+  void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src);
+  bool linkFunctionBody(Function &Dst, Function &Src);
+  void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
+  bool linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
+
+  /// Functions that take care of cloning a specific global value type
+  /// into the destination module.
+  GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar);
+  Function *copyFunctionProto(const Function *SF);
+  GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
+
+  void linkNamedMDNodes();
+
+  /// Populate the UnneededSubprograms set with the DISubprogram metadata
+  /// from the source module that we don't need to link into the dest module,
+  /// because the functions were not imported directly or via an inlined body
+  /// in an imported function.
+  void findNeededSubprograms(ValueToValueMapTy &ValueMap);
+
+  /// The value mapper leaves nulls in the list of subprograms for any
+  /// in the UnneededSubprograms map. Strip those out after metadata linking.
+  void stripNullSubprograms();
+
+public:
+  IRLinker(Module &DstM, IRMover::IdentifiedStructTypeSet &Set, Module &SrcM,
+           ArrayRef<GlobalValue *> ValuesToLink,
+           std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor,
+           DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr,
+           bool IsMetadataLinkingPostpass = false)
+      : DstM(DstM), SrcM(SrcM), AddLazyFor(AddLazyFor), TypeMap(Set),
+        GValMaterializer(this), LValMaterializer(this),
+        IsMetadataLinkingPostpass(IsMetadataLinkingPostpass),
+        ValIDToTempMDMap(ValIDToTempMDMap) {
+    for (GlobalValue *GV : ValuesToLink)
+      maybeAdd(GV);
+
+    // If appropriate, tell the value mapper that it can expect to see
+    // temporary metadata.
+    if (!shouldLinkMetadata())
+      ValueMapperFlags = ValueMapperFlags | RF_HaveUnmaterializedMetadata;
+  }
+
+  bool run();
+  Value *materializeDeclFor(Value *V, bool ForAlias);
+  void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias);
+
+  /// Save the mapping between the given temporary metadata and its metadata
+  /// value id. Used to support metadata linking as a postpass for function
+  /// importing.
+  Metadata *mapTemporaryMetadata(Metadata *MD);
+
+  /// Replace any temporary metadata saved for the source metadata's id with
+  /// the new non-temporary metadata. Used when metadata linking as a postpass
+  /// for function importing.
+  void replaceTemporaryMetadata(const Metadata *OrigMD, Metadata *NewMD);
+
+  /// Indicates whether we need to map the given metadata into the destination
+  /// module. Used to prevent linking of metadata only needed by functions not
+  /// linked into the dest module.
+  bool isMetadataNeeded(Metadata *MD);
+};
+}
+
+/// The LLVM SymbolTable class autorenames globals that conflict in the symbol
+/// table. This is good for all clients except for us. Go through the trouble
+/// to force this back.
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+  // If the global doesn't force its name or if it already has the right name,
+  // there is nothing for us to do.
+  if (GV->hasLocalLinkage() || GV->getName() == Name)
+    return;
+
+  Module *M = GV->getParent();
+
+  // If there is a conflict, rename the conflict.
+  if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
+    GV->takeName(ConflictGV);
+    ConflictGV->setName(Name); // This will cause ConflictGV to get renamed
+    assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
+  } else {
+    GV->setName(Name); // Force the name back
+  }
+}
+
+Value *GlobalValueMaterializer::materializeDeclFor(Value *V) {
+  return TheIRLinker->materializeDeclFor(V, false);
+}
+
+void GlobalValueMaterializer::materializeInitFor(GlobalValue *New,
+                                                 GlobalValue *Old) {
+  TheIRLinker->materializeInitFor(New, Old, false);
+}
+
+Metadata *GlobalValueMaterializer::mapTemporaryMetadata(Metadata *MD) {
+  return TheIRLinker->mapTemporaryMetadata(MD);
+}
+
+void GlobalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD,
+                                                       Metadata *NewMD) {
+  TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD);
+}
+
+bool GlobalValueMaterializer::isMetadataNeeded(Metadata *MD) {
+  return TheIRLinker->isMetadataNeeded(MD);
+}
+
+Value *LocalValueMaterializer::materializeDeclFor(Value *V) {
+  return TheIRLinker->materializeDeclFor(V, true);
+}
+
+void LocalValueMaterializer::materializeInitFor(GlobalValue *New,
+                                                GlobalValue *Old) {
+  TheIRLinker->materializeInitFor(New, Old, true);
+}
+
+Metadata *LocalValueMaterializer::mapTemporaryMetadata(Metadata *MD) {
+  return TheIRLinker->mapTemporaryMetadata(MD);
+}
+
+void LocalValueMaterializer::replaceTemporaryMetadata(const Metadata *OrigMD,
+                                                      Metadata *NewMD) {
+  TheIRLinker->replaceTemporaryMetadata(OrigMD, NewMD);
+}
+
+bool LocalValueMaterializer::isMetadataNeeded(Metadata *MD) {
+  return TheIRLinker->isMetadataNeeded(MD);
+}
+
+Value *IRLinker::materializeDeclFor(Value *V, bool ForAlias) {
+  auto *SGV = dyn_cast<GlobalValue>(V);
+  if (!SGV)
+    return nullptr;
+
+  return linkGlobalValueProto(SGV, ForAlias);
+}
+
+void IRLinker::materializeInitFor(GlobalValue *New, GlobalValue *Old,
+                                  bool ForAlias) {
+  // If we already created the body, just return.
+  if (auto *F = dyn_cast<Function>(New)) {
+    if (!F->isDeclaration())
+      return;
+  } else if (auto *V = dyn_cast<GlobalVariable>(New)) {
+    if (V->hasInitializer())
+      return;
+  } else {
+    auto *A = cast<GlobalAlias>(New);
+    if (A->getAliasee())
+      return;
+  }
+
+  if (ForAlias || shouldLink(New, *Old))
+    linkGlobalValueBody(*New, *Old);
+}
+
+Metadata *IRLinker::mapTemporaryMetadata(Metadata *MD) {
+  if (!ValIDToTempMDMap)
+    return nullptr;
+  // If this temporary metadata has a value id recorded during function
+  // parsing, record that in the ValIDToTempMDMap if one was provided.
+  if (MetadataToIDs.count(MD)) {
+    unsigned Idx = MetadataToIDs[MD];
+    // Check if we created a temp MD when importing a different function from
+    // this module. If so, reuse it the same temporary metadata, otherwise
+    // add this temporary metadata to the map.
+    if (!ValIDToTempMDMap->count(Idx)) {
+      MDNode *Node = cast<MDNode>(MD);
+      assert(Node->isTemporary());
+      (*ValIDToTempMDMap)[Idx] = Node;
+    }
+    return (*ValIDToTempMDMap)[Idx];
+  }
+  return nullptr;
+}
+
+void IRLinker::replaceTemporaryMetadata(const Metadata *OrigMD,
+                                        Metadata *NewMD) {
+  if (!ValIDToTempMDMap)
+    return;
+#ifndef NDEBUG
+  auto *N = dyn_cast_or_null<MDNode>(NewMD);
+  assert(!N || !N->isTemporary());
+#endif
+  // If a mapping between metadata value ids and temporary metadata
+  // created during function importing was provided, and the source
+  // metadata has a value id recorded during metadata parsing, replace
+  // the temporary metadata with the final mapped metadata now.
+  if (MetadataToIDs.count(OrigMD)) {
+    unsigned Idx = MetadataToIDs[OrigMD];
+    // Nothing to do if we didn't need to create a temporary metadata during
+    // function importing.
+    if (!ValIDToTempMDMap->count(Idx))
+      return;
+    MDNode *TempMD = (*ValIDToTempMDMap)[Idx];
+    TempMD->replaceAllUsesWith(NewMD);
+    MDNode::deleteTemporary(TempMD);
+    ValIDToTempMDMap->erase(Idx);
+  }
+}
+
+bool IRLinker::isMetadataNeeded(Metadata *MD) {
+  // Currently only DISubprogram metadata is marked as being unneeded.
+  if (UnneededSubprograms.empty())
+    return true;
+  MDNode *Node = dyn_cast<MDNode>(MD);
+  if (!Node)
+    return true;
+  DISubprogram *SP = getDISubprogram(Node);
+  if (!SP)
+    return true;
+  return !UnneededSubprograms.count(SP);
+}
+
+/// Loop through the global variables in the src module and merge them into the
+/// dest module.
+GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) {
+  // No linking to be performed or linking from the source: simply create an
+  // identical version of the symbol over in the dest module... the
+  // initializer will be filled in later by LinkGlobalInits.
+  GlobalVariable *NewDGV =
+      new GlobalVariable(DstM, TypeMap.get(SGVar->getType()->getElementType()),
+                         SGVar->isConstant(), GlobalValue::ExternalLinkage,
+                         /*init*/ nullptr, SGVar->getName(),
+                         /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
+                         SGVar->getType()->getAddressSpace());
+  NewDGV->setAlignment(SGVar->getAlignment());
+  return NewDGV;
+}
+
+/// Link the function in the source module into the destination module if
+/// needed, setting up mapping information.
+Function *IRLinker::copyFunctionProto(const Function *SF) {
+  // If there is no linkage to be performed or we are linking from the source,
+  // bring SF over.
+  return Function::Create(TypeMap.get(SF->getFunctionType()),
+                          GlobalValue::ExternalLinkage, SF->getName(), &DstM);
+}
+
+/// Set up prototypes for any aliases that come over from the source module.
+GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) {
+  // If there is no linkage to be performed or we're linking from the source,
+  // bring over SGA.
+  auto *Ty = TypeMap.get(SGA->getValueType());
+  return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(),
+                             GlobalValue::ExternalLinkage, SGA->getName(),
+                             &DstM);
+}
+
+GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
+                                            bool ForDefinition) {
+  GlobalValue *NewGV;
+  if (auto *SGVar = dyn_cast<GlobalVariable>(SGV)) {
+    NewGV = copyGlobalVariableProto(SGVar);
+  } else if (auto *SF = dyn_cast<Function>(SGV)) {
+    NewGV = copyFunctionProto(SF);
+  } else {
+    if (ForDefinition)
+      NewGV = copyGlobalAliasProto(cast<GlobalAlias>(SGV));
+    else
+      NewGV = new GlobalVariable(
+          DstM, TypeMap.get(SGV->getType()->getElementType()),
+          /*isConstant*/ false, GlobalValue::ExternalLinkage,
+          /*init*/ nullptr, SGV->getName(),
+          /*insertbefore*/ nullptr, SGV->getThreadLocalMode(),
+          SGV->getType()->getAddressSpace());
+  }
+
+  if (ForDefinition)
+    NewGV->setLinkage(SGV->getLinkage());
+  else if (SGV->hasExternalWeakLinkage() || SGV->hasWeakLinkage() ||
+           SGV->hasLinkOnceLinkage())
+    NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
+
+  NewGV->copyAttributesFrom(SGV);
+  return NewGV;
+}
+
+/// Loop over all of the linked values to compute type mappings.  For example,
+/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct
+/// types 'Foo' but one got renamed when the module was loaded into the same
+/// LLVMContext.
+void IRLinker::computeTypeMapping() {
+  for (GlobalValue &SGV : SrcM.globals()) {
+    GlobalValue *DGV = getLinkedToGlobal(&SGV);
+    if (!DGV)
+      continue;
+
+    if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) {
+      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+      continue;
+    }
+
+    // Unify the element type of appending arrays.
+    ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+    ArrayType *SAT = cast<ArrayType>(SGV.getType()->getElementType());
+    TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+  }
+
+  for (GlobalValue &SGV : SrcM)
+    if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
+      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+
+  for (GlobalValue &SGV : SrcM.aliases())
+    if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
+      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
+
+  // Incorporate types by name, scanning all the types in the source module.
+  // At this point, the destination module may have a type "%foo = { i32 }" for
+  // example.  When the source module got loaded into the same LLVMContext, if
+  // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
+  std::vector<StructType *> Types = SrcM.getIdentifiedStructTypes();
+  for (StructType *ST : Types) {
+    if (!ST->hasName())
+      continue;
+
+    // Check to see if there is a dot in the name followed by a digit.
+    size_t DotPos = ST->getName().rfind('.');
+    if (DotPos == 0 || DotPos == StringRef::npos ||
+        ST->getName().back() == '.' ||
+        !isdigit(static_cast<unsigned char>(ST->getName()[DotPos + 1])))
+      continue;
+
+    // Check to see if the destination module has a struct with the prefix name.
+    StructType *DST = DstM.getTypeByName(ST->getName().substr(0, DotPos));
+    if (!DST)
+      continue;
+
+    // Don't use it if this actually came from the source module. They're in
+    // the same LLVMContext after all. Also don't use it unless the type is
+    // actually used in the destination module. This can happen in situations
+    // like this:
+    //
+    //      Module A                         Module B
+    //      --------                         --------
+    //   %Z = type { %A }                %B = type { %C.1 }
+    //   %A = type { %B.1, [7 x i8] }    %C.1 = type { i8* }
+    //   %B.1 = type { %C }              %A.2 = type { %B.3, [5 x i8] }
+    //   %C = type { i8* }               %B.3 = type { %C.1 }
+    //
+    // When we link Module B with Module A, the '%B' in Module B is
+    // used. However, that would then use '%C.1'. But when we process '%C.1',
+    // we prefer to take the '%C' version. So we are then left with both
+    // '%C.1' and '%C' being used for the same types. This leads to some
+    // variables using one type and some using the other.
+    if (TypeMap.DstStructTypesSet.hasType(DST))
+      TypeMap.addTypeMapping(DST, ST);
+  }
+
+  // Now that we have discovered all of the type equivalences, get a body for
+  // any 'opaque' types in the dest module that are now resolved.
+  TypeMap.linkDefinedTypeBodies();
+}
+
+static void getArrayElements(const Constant *C,
+                             SmallVectorImpl<Constant *> &Dest) {
+  unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
+
+  for (unsigned i = 0; i != NumElements; ++i)
+    Dest.push_back(C->getAggregateElement(i));
+}
+
+/// If there were any appending global variables, link them together now.
+/// Return true on error.
+Constant *IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+                                          const GlobalVariable *SrcGV) {
+  Type *EltTy = cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()))
+                    ->getElementType();
+
+  StringRef Name = SrcGV->getName();
+  bool IsNewStructor = false;
+  bool IsOldStructor = false;
+  if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") {
+    if (cast<StructType>(EltTy)->getNumElements() == 3)
+      IsNewStructor = true;
+    else
+      IsOldStructor = true;
+  }
+
+  PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo();
+  if (IsOldStructor) {
+    auto &ST = *cast<StructType>(EltTy);
+    Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
+    EltTy = StructType::get(SrcGV->getContext(), Tys, false);
+  }
+
+  if (DstGV) {
+    ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+
+    if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) {
+      emitError(
+          "Linking globals named '" + SrcGV->getName() +
+          "': can only link appending global with another appending global!");
+      return nullptr;
+    }
+
+    // Check to see that they two arrays agree on type.
+    if (EltTy != DstTy->getElementType()) {
+      emitError("Appending variables with different element types!");
+      return nullptr;
+    }
+    if (DstGV->isConstant() != SrcGV->isConstant()) {
+      emitError("Appending variables linked with different const'ness!");
+      return nullptr;
+    }
+
+    if (DstGV->getAlignment() != SrcGV->getAlignment()) {
+      emitError(
+          "Appending variables with different alignment need to be linked!");
+      return nullptr;
+    }
+
+    if (DstGV->getVisibility() != SrcGV->getVisibility()) {
+      emitError(
+          "Appending variables with different visibility need to be linked!");
+      return nullptr;
+    }
+
+    if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) {
+      emitError(
+          "Appending variables with different unnamed_addr need to be linked!");
+      return nullptr;
+    }
+
+    if (StringRef(DstGV->getSection()) != SrcGV->getSection()) {
+      emitError(
+          "Appending variables with different section name need to be linked!");
+      return nullptr;
+    }
+  }
+
+  SmallVector<Constant *, 16> DstElements;
+  if (DstGV)
+    getArrayElements(DstGV->getInitializer(), DstElements);
+
+  SmallVector<Constant *, 16> SrcElements;
+  getArrayElements(SrcGV->getInitializer(), SrcElements);
+
+  if (IsNewStructor)
+    SrcElements.erase(
+        std::remove_if(SrcElements.begin(), SrcElements.end(),
+                       [this](Constant *E) {
+                         auto *Key = dyn_cast<GlobalValue>(
+                             E->getAggregateElement(2)->stripPointerCasts());
+                         if (!Key)
+                           return false;
+                         GlobalValue *DGV = getLinkedToGlobal(Key);
+                         return !shouldLink(DGV, *Key);
+                       }),
+        SrcElements.end());
+  uint64_t NewSize = DstElements.size() + SrcElements.size();
+  ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+
+  // Create the new global variable.
+  GlobalVariable *NG = new GlobalVariable(
+      DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(),
+      /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(),
+      SrcGV->getType()->getAddressSpace());
+
+  NG->copyAttributesFrom(SrcGV);
+  forceRenaming(NG, SrcGV->getName());
+
+  Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+  // Stop recursion.
+  ValueMap[SrcGV] = Ret;
+
+  for (auto *V : SrcElements) {
+    Constant *NewV;
+    if (IsOldStructor) {
+      auto *S = cast<ConstantStruct>(V);
+      auto *E1 = MapValue(S->getOperand(0), ValueMap, ValueMapperFlags,
+                          &TypeMap, &GValMaterializer);
+      auto *E2 = MapValue(S->getOperand(1), ValueMap, ValueMapperFlags,
+                          &TypeMap, &GValMaterializer);
+      Value *Null = Constant::getNullValue(VoidPtrTy);
+      NewV =
+          ConstantStruct::get(cast<StructType>(EltTy), E1, E2, Null, nullptr);
+    } else {
+      NewV =
+          MapValue(V, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer);
+    }
+    DstElements.push_back(NewV);
+  }
+
+  NG->setInitializer(ConstantArray::get(NewType, DstElements));
+
+  // Replace any uses of the two global variables with uses of the new
+  // global.
+  if (DstGV) {
+    DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+    DstGV->eraseFromParent();
+  }
+
+  return Ret;
+}
+
+static bool useExistingDest(GlobalValue &SGV, GlobalValue *DGV,
+                            bool ShouldLink) {
+  if (!DGV)
+    return false;
+
+  if (SGV.isDeclaration())
+    return true;
+
+  if (DGV->isDeclarationForLinker() && !SGV.isDeclarationForLinker())
+    return false;
+
+  if (ShouldLink)
+    return false;
+
+  return true;
+}
+
+bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) {
+  // Already imported all the values. Just map to the Dest value
+  // in case it is referenced in the metadata.
+  if (IsMetadataLinkingPostpass) {
+    assert(!ValuesToLink.count(&SGV) &&
+           "Source value unexpectedly requested for link during metadata link");
+    return false;
+  }
+
+  if (ValuesToLink.count(&SGV))
+    return true;
+
+  if (SGV.hasLocalLinkage())
+    return true;
+
+  if (DGV && !DGV->isDeclaration())
+    return false;
+
+  if (SGV.hasAvailableExternallyLinkage())
+    return true;
+
+  if (DoneLinkingBodies)
+    return false;
+
+  AddLazyFor(SGV, [this](GlobalValue &GV) { maybeAdd(&GV); });
+  return ValuesToLink.count(&SGV);
+}
+
+Constant *IRLinker::linkGlobalValueProto(GlobalValue *SGV, bool ForAlias) {
+  GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+  bool ShouldLink = shouldLink(DGV, *SGV);
+
+  // just missing from map
+  if (ShouldLink) {
+    auto I = ValueMap.find(SGV);
+    if (I != ValueMap.end())
+      return cast<Constant>(I->second);
+
+    I = AliasValueMap.find(SGV);
+    if (I != AliasValueMap.end())
+      return cast<Constant>(I->second);
+  }
+
+  DGV = nullptr;
+  if (ShouldLink || !ForAlias)
+    DGV = getLinkedToGlobal(SGV);
+
+  // Handle the ultra special appending linkage case first.
+  assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage());
+  if (SGV->hasAppendingLinkage())
+    return linkAppendingVarProto(cast_or_null<GlobalVariable>(DGV),
+                                 cast<GlobalVariable>(SGV));
+
+  GlobalValue *NewGV;
+  if (useExistingDest(*SGV, DGV, ShouldLink)) {
+    NewGV = DGV;
+  } else {
+    // If we are done linking global value bodies (i.e. we are performing
+    // metadata linking), don't link in the global value due to this
+    // reference, simply map it to null.
+    if (DoneLinkingBodies)
+      return nullptr;
+
+    NewGV = copyGlobalValueProto(SGV, ShouldLink);
+    if (!ForAlias)
+      forceRenaming(NewGV, SGV->getName());
+  }
+  if (ShouldLink || ForAlias) {
+    if (const Comdat *SC = SGV->getComdat()) {
+      if (auto *GO = dyn_cast<GlobalObject>(NewGV)) {
+        Comdat *DC = DstM.getOrInsertComdat(SC->getName());
+        DC->setSelectionKind(SC->getSelectionKind());
+        GO->setComdat(DC);
+      }
+    }
+  }
+
+  if (!ShouldLink && ForAlias)
+    NewGV->setLinkage(GlobalValue::InternalLinkage);
+
+  Constant *C = NewGV;
+  if (DGV)
+    C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType()));
+
+  if (DGV && NewGV != DGV) {
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+
+  return C;
+}
+
+/// Update the initializers in the Dest module now that all globals that may be
+/// referenced are in Dest.
+void IRLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) {
+  // Figure out what the initializer looks like in the dest module.
+  Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, ValueMapperFlags,
+                              &TypeMap, &GValMaterializer));
+}
+
+/// Copy the source function over into the dest function and fix up references
+/// to values. At this point we know that Dest is an external function, and
+/// that Src is not.
+bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
+  assert(Dst.isDeclaration() && !Src.isDeclaration());
+
+  // Materialize if needed.
+  if (std::error_code EC = Src.materialize())
+    return emitError(EC.message());
+
+  if (!shouldLinkMetadata())
+    // This is only supported for lazy links. Do after materialization of
+    // a function and before remapping metadata on instructions below
+    // in RemapInstruction, as the saved mapping is used to handle
+    // the temporary metadata hanging off instructions.
+    SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, true);
+
+  // Link in the prefix data.
+  if (Src.hasPrefixData())
+    Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, ValueMapperFlags,
+                               &TypeMap, &GValMaterializer));
+
+  // Link in the prologue data.
+  if (Src.hasPrologueData())
+    Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap,
+                                 ValueMapperFlags, &TypeMap,
+                                 &GValMaterializer));
+
+  // Link in the personality function.
+  if (Src.hasPersonalityFn())
+    Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap,
+                                  ValueMapperFlags, &TypeMap,
+                                  &GValMaterializer));
+
+  // Go through and convert function arguments over, remembering the mapping.
+  Function::arg_iterator DI = Dst.arg_begin();
+  for (Argument &Arg : Src.args()) {
+    DI->setName(Arg.getName()); // Copy the name over.
+
+    // Add a mapping to our mapping.
+    ValueMap[&Arg] = &*DI;
+    ++DI;
+  }
+
+  // Copy over the metadata attachments.
+  SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
+  Src.getAllMetadata(MDs);
+  for (const auto &I : MDs)
+    Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, ValueMapperFlags,
+                                         &TypeMap, &GValMaterializer));
+
+  // Splice the body of the source function into the dest function.
+  Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList());
+
+  // At this point, all of the instructions and values of the function are now
+  // copied over.  The only problem is that they are still referencing values in
+  // the Source function as operands.  Loop through all of the operands of the
+  // functions and patch them up to point to the local versions.
+  for (BasicBlock &BB : Dst)
+    for (Instruction &I : BB)
+      RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries | ValueMapperFlags,
+                       &TypeMap, &GValMaterializer);
+
+  // There is no need to map the arguments anymore.
+  for (Argument &Arg : Src.args())
+    ValueMap.erase(&Arg);
+
+  return false;
+}
+
+void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) {
+  Constant *Aliasee = Src.getAliasee();
+  Constant *Val = MapValue(Aliasee, AliasValueMap, ValueMapperFlags, &TypeMap,
+                           &LValMaterializer);
+  Dst.setAliasee(Val);
+}
+
+bool IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
+  if (auto *F = dyn_cast<Function>(&Src))
+    return linkFunctionBody(cast<Function>(Dst), *F);
+  if (auto *GVar = dyn_cast<GlobalVariable>(&Src)) {
+    linkGlobalInit(cast<GlobalVariable>(Dst), *GVar);
+    return false;
+  }
+  linkAliasBody(cast<GlobalAlias>(Dst), cast<GlobalAlias>(Src));
+  return false;
+}
+
+void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
+  // Track unneeded nodes to make it simpler to handle the case
+  // where we are checking if an already-mapped SP is needed.
+  NamedMDNode *CompileUnits = SrcM.getNamedMetadata("llvm.dbg.cu");
+  if (!CompileUnits)
+    return;
+  for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
+    auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
+    assert(CU && "Expected valid compile unit");
+    for (auto *Op : CU->getSubprograms()) {
+      // Unless we were doing function importing and deferred metadata linking,
+      // any needed SPs should have been mapped as they would be reached
+      // from the function linked in (either on the function itself for linked
+      // function bodies, or from DILocation on inlined instructions).
+      assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) &&
+             "DISubprogram shouldn't be mapped yet");
+      if (!ValueMap.MD()[Op])
+        UnneededSubprograms.insert(Op);
+    }
+  }
+  if (!IsMetadataLinkingPostpass)
+    return;
+  // In the case of metadata linking as a postpass (e.g. for function
+  // importing), see which DISubprogram MD from the source has an associated
+  // temporary metadata node, which means the SP was needed by an imported
+  // function.
+  for (auto MDI : MetadataToIDs) {
+    const MDNode *Node = dyn_cast<MDNode>(MDI.first);
+    if (!Node)
+      continue;
+    DISubprogram *SP = getDISubprogram(Node);
+    if (!SP || !ValIDToTempMDMap->count(MDI.second))
+      continue;
+    UnneededSubprograms.erase(SP);
+  }
+}
+
+// Squash null subprograms from compile unit subprogram lists.
+void IRLinker::stripNullSubprograms() {
+  NamedMDNode *CompileUnits = DstM.getNamedMetadata("llvm.dbg.cu");
+  if (!CompileUnits)
+    return;
+  for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
+    auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
+    assert(CU && "Expected valid compile unit");
+
+    SmallVector<Metadata *, 16> NewSPs;
+    NewSPs.reserve(CU->getSubprograms().size());
+    bool FoundNull = false;
+    for (DISubprogram *SP : CU->getSubprograms()) {
+      if (!SP) {
+        FoundNull = true;
+        continue;
+      }
+      NewSPs.push_back(SP);
+    }
+    if (FoundNull)
+      CU->replaceSubprograms(MDTuple::get(CU->getContext(), NewSPs));
+  }
+}
+
+/// Insert all of the named MDNodes in Src into the Dest module.
+void IRLinker::linkNamedMDNodes() {
+  findNeededSubprograms(ValueMap);
+  const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata();
+  for (const NamedMDNode &NMD : SrcM.named_metadata()) {
+    // Don't link module flags here. Do them separately.
+    if (&NMD == SrcModFlags)
+      continue;
+    NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName());
+    // Add Src elements into Dest node.
+    for (const MDNode *op : NMD.operands())
+      DestNMD->addOperand(MapMetadata(
+          op, ValueMap, ValueMapperFlags | RF_NullMapMissingGlobalValues,
+          &TypeMap, &GValMaterializer));
+  }
+  stripNullSubprograms();
+}
+
+/// Merge the linker flags in Src into the Dest module.
+bool IRLinker::linkModuleFlagsMetadata() {
+  // If the source module has no module flags, we are done.
+  const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata();
+  if (!SrcModFlags)
+    return false;
+
+  // If the destination module doesn't have module flags yet, then just copy
+  // over the source module's flags.
+  NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata();
+  if (DstModFlags->getNumOperands() == 0) {
+    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
+      DstModFlags->addOperand(SrcModFlags->getOperand(I));
+
+    return false;
+  }
+
+  // First build a map of the existing module flags and requirements.
+  DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
+  SmallSetVector<MDNode *, 16> Requirements;
+  for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
+    MDNode *Op = DstModFlags->getOperand(I);
+    ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
+    MDString *ID = cast<MDString>(Op->getOperand(1));
+
+    if (Behavior->getZExtValue() == Module::Require) {
+      Requirements.insert(cast<MDNode>(Op->getOperand(2)));
+    } else {
+      Flags[ID] = std::make_pair(Op, I);
+    }
+  }
+
+  // Merge in the flags from the source module, and also collect its set of
+  // requirements.
+  for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
+    MDNode *SrcOp = SrcModFlags->getOperand(I);
+    ConstantInt *SrcBehavior =
+        mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
+    MDString *ID = cast<MDString>(SrcOp->getOperand(1));
+    MDNode *DstOp;
+    unsigned DstIndex;
+    std::tie(DstOp, DstIndex) = Flags.lookup(ID);
+    unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
+
+    // If this is a requirement, add it and continue.
+    if (SrcBehaviorValue == Module::Require) {
+      // If the destination module does not already have this requirement, add
+      // it.
+      if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
+        DstModFlags->addOperand(SrcOp);
+      }
+      continue;
+    }
+
+    // If there is no existing flag with this ID, just add it.
+    if (!DstOp) {
+      Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
+      DstModFlags->addOperand(SrcOp);
+      continue;
+    }
+
+    // Otherwise, perform a merge.
+    ConstantInt *DstBehavior =
+        mdconst::extract<ConstantInt>(DstOp->getOperand(0));
+    unsigned DstBehaviorValue = DstBehavior->getZExtValue();
+
+    // If either flag has override behavior, handle it first.
+    if (DstBehaviorValue == Module::Override) {
+      // Diagnose inconsistent flags which both have override behavior.
+      if (SrcBehaviorValue == Module::Override &&
+          SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        emitError("linking module flags '" + ID->getString() +
+                  "': IDs have conflicting override values");
+      }
+      continue;
+    } else if (SrcBehaviorValue == Module::Override) {
+      // Update the destination flag to that of the source.
+      DstModFlags->setOperand(DstIndex, SrcOp);
+      Flags[ID].first = SrcOp;
+      continue;
+    }
+
+    // Diagnose inconsistent merge behavior types.
+    if (SrcBehaviorValue != DstBehaviorValue) {
+      emitError("linking module flags '" + ID->getString() +
+                "': IDs have conflicting behaviors");
+      continue;
+    }
+
+    auto replaceDstValue = [&](MDNode *New) {
+      Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
+      MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps);
+      DstModFlags->setOperand(DstIndex, Flag);
+      Flags[ID].first = Flag;
+    };
+
+    // Perform the merge for standard behavior types.
+    switch (SrcBehaviorValue) {
+    case Module::Require:
+    case Module::Override:
+      llvm_unreachable("not possible");
+    case Module::Error: {
+      // Emit an error if the values differ.
+      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        emitError("linking module flags '" + ID->getString() +
+                  "': IDs have conflicting values");
+      }
+      continue;
+    }
+    case Module::Warning: {
+      // Emit a warning if the values differ.
+      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
+        emitWarning("linking module flags '" + ID->getString() +
+                    "': IDs have conflicting values");
+      }
+      continue;
+    }
+    case Module::Append: {
+      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+      SmallVector<Metadata *, 8> MDs;
+      MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
+      MDs.append(DstValue->op_begin(), DstValue->op_end());
+      MDs.append(SrcValue->op_begin(), SrcValue->op_end());
+
+      replaceDstValue(MDNode::get(DstM.getContext(), MDs));
+      break;
+    }
+    case Module::AppendUnique: {
+      SmallSetVector<Metadata *, 16> Elts;
+      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
+      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
+      Elts.insert(DstValue->op_begin(), DstValue->op_end());
+      Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
+
+      replaceDstValue(MDNode::get(DstM.getContext(),
+                                  makeArrayRef(Elts.begin(), Elts.end())));
+      break;
+    }
+    }
+  }
+
+  // Check all of the requirements.
+  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
+    MDNode *Requirement = Requirements[I];
+    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
+    Metadata *ReqValue = Requirement->getOperand(1);
+
+    MDNode *Op = Flags[Flag].first;
+    if (!Op || Op->getOperand(2) != ReqValue) {
+      emitError("linking module flags '" + Flag->getString() +
+                "': does not have the required value");
+      continue;
+    }
+  }
+
+  return HasError;
+}
+
+// This function returns true if the triples match.
+static bool triplesMatch(const Triple &T0, const Triple &T1) {
+  // If vendor is apple, ignore the version number.
+  if (T0.getVendor() == Triple::Apple)
+    return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() &&
+           T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS();
+
+  return T0 == T1;
+}
+
+// This function returns the merged triple.
+static std::string mergeTriples(const Triple &SrcTriple,
+                                const Triple &DstTriple) {
+  // If vendor is apple, pick the triple with the larger version number.
+  if (SrcTriple.getVendor() == Triple::Apple)
+    if (DstTriple.isOSVersionLT(SrcTriple))
+      return SrcTriple.str();
+
+  return DstTriple.str();
+}
+
+bool IRLinker::run() {
+  // Inherit the target data from the source module if the destination module
+  // doesn't have one already.
+  if (DstM.getDataLayout().isDefault())
+    DstM.setDataLayout(SrcM.getDataLayout());
+
+  if (SrcM.getDataLayout() != DstM.getDataLayout()) {
+    emitWarning("Linking two modules of different data layouts: '" +
+                SrcM.getModuleIdentifier() + "' is '" +
+                SrcM.getDataLayoutStr() + "' whereas '" +
+                DstM.getModuleIdentifier() + "' is '" +
+                DstM.getDataLayoutStr() + "'\n");
+  }
+
+  // Copy the target triple from the source to dest if the dest's is empty.
+  if (DstM.getTargetTriple().empty() && !SrcM.getTargetTriple().empty())
+    DstM.setTargetTriple(SrcM.getTargetTriple());
+
+  Triple SrcTriple(SrcM.getTargetTriple()), DstTriple(DstM.getTargetTriple());
+
+  if (!SrcM.getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
+    emitWarning("Linking two modules of different target triples: " +
+                SrcM.getModuleIdentifier() + "' is '" + SrcM.getTargetTriple() +
+                "' whereas '" + DstM.getModuleIdentifier() + "' is '" +
+                DstM.getTargetTriple() + "'\n");
+
+  DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple));
+
+  // Append the module inline asm string.
+  if (!SrcM.getModuleInlineAsm().empty()) {
+    if (DstM.getModuleInlineAsm().empty())
+      DstM.setModuleInlineAsm(SrcM.getModuleInlineAsm());
+    else
+      DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" +
+                              SrcM.getModuleInlineAsm());
+  }
+
+  // Loop over all of the linked values to compute type mappings.
+  computeTypeMapping();
+
+  std::reverse(Worklist.begin(), Worklist.end());
+  while (!Worklist.empty()) {
+    GlobalValue *GV = Worklist.back();
+    Worklist.pop_back();
+
+    // Already mapped.
+    if (ValueMap.find(GV) != ValueMap.end() ||
+        AliasValueMap.find(GV) != AliasValueMap.end())
+      continue;
+
+    assert(!GV->isDeclaration());
+    MapValue(GV, ValueMap, ValueMapperFlags, &TypeMap, &GValMaterializer);
+    if (HasError)
+      return true;
+  }
+
+  // Note that we are done linking global value bodies. This prevents
+  // metadata linking from creating new references.
+  DoneLinkingBodies = true;
+
+  // Remap all of the named MDNodes in Src into the DstM module. We do this
+  // after linking GlobalValues so that MDNodes that reference GlobalValues
+  // are properly remapped.
+  if (shouldLinkMetadata()) {
+    // Even if just linking metadata we should link decls above in case
+    // any are referenced by metadata. IRLinker::shouldLink ensures that
+    // we don't actually link anything from source.
+    if (IsMetadataLinkingPostpass) {
+      // Ensure metadata materialized
+      if (SrcM.getMaterializer()->materializeMetadata())
+        return true;
+      SrcM.getMaterializer()->saveMetadataList(MetadataToIDs, false);
+    }
+
+    linkNamedMDNodes();
+
+    if (IsMetadataLinkingPostpass) {
+      // Handle anything left in the ValIDToTempMDMap, such as metadata nodes
+      // not reached by the dbg.cu NamedMD (i.e. only reached from
+      // instructions).
+      // Walk the MetadataToIDs once to find the set of new (imported) MD
+      // that still has corresponding temporary metadata, and invoke metadata
+      // mapping on each one.
+      for (auto MDI : MetadataToIDs) {
+        if (!ValIDToTempMDMap->count(MDI.second))
+          continue;
+        MapMetadata(MDI.first, ValueMap, ValueMapperFlags, &TypeMap,
+                    &GValMaterializer);
+      }
+      assert(ValIDToTempMDMap->empty());
+    }
+
+    // Merge the module flags into the DstM module.
+    if (linkModuleFlagsMetadata())
+      return true;
+  }
+
+  return false;
+}
+
+IRMover::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
+    : ETypes(E), IsPacked(P) {}
+
+IRMover::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
+    : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
+
+bool IRMover::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
+  if (IsPacked != That.IsPacked)
+    return false;
+  if (ETypes != That.ETypes)
+    return false;
+  return true;
+}
+
+bool IRMover::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
+  return !this->operator==(That);
+}
+
+StructType *IRMover::StructTypeKeyInfo::getEmptyKey() {
+  return DenseMapInfo<StructType *>::getEmptyKey();
+}
+
+StructType *IRMover::StructTypeKeyInfo::getTombstoneKey() {
+  return DenseMapInfo<StructType *>::getTombstoneKey();
+}
+
+unsigned IRMover::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
+  return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
+                      Key.IsPacked);
+}
+
+unsigned IRMover::StructTypeKeyInfo::getHashValue(const StructType *ST) {
+  return getHashValue(KeyTy(ST));
+}
+
+bool IRMover::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
+                                         const StructType *RHS) {
+  if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+    return false;
+  return LHS == KeyTy(RHS);
+}
+
+bool IRMover::StructTypeKeyInfo::isEqual(const StructType *LHS,
+                                         const StructType *RHS) {
+  if (RHS == getEmptyKey())
+    return LHS == getEmptyKey();
+
+  if (RHS == getTombstoneKey())
+    return LHS == getTombstoneKey();
+
+  return KeyTy(LHS) == KeyTy(RHS);
+}
+
+void IRMover::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
+  assert(!Ty->isOpaque());
+  NonOpaqueStructTypes.insert(Ty);
+}
+
+void IRMover::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
+  assert(!Ty->isOpaque());
+  NonOpaqueStructTypes.insert(Ty);
+  bool Removed = OpaqueStructTypes.erase(Ty);
+  (void)Removed;
+  assert(Removed);
+}
+
+void IRMover::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
+  assert(Ty->isOpaque());
+  OpaqueStructTypes.insert(Ty);
+}
+
+StructType *
+IRMover::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
+                                                bool IsPacked) {
+  IRMover::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
+  auto I = NonOpaqueStructTypes.find_as(Key);
+  if (I == NonOpaqueStructTypes.end())
+    return nullptr;
+  return *I;
+}
+
+bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) {
+  if (Ty->isOpaque())
+    return OpaqueStructTypes.count(Ty);
+  auto I = NonOpaqueStructTypes.find(Ty);
+  if (I == NonOpaqueStructTypes.end())
+    return false;
+  return *I == Ty;
+}
+
+IRMover::IRMover(Module &M) : Composite(M) {
+  TypeFinder StructTypes;
+  StructTypes.run(M, true);
+  for (StructType *Ty : StructTypes) {
+    if (Ty->isOpaque())
+      IdentifiedStructTypes.addOpaque(Ty);
+    else
+      IdentifiedStructTypes.addNonOpaque(Ty);
+  }
+}
+
+bool IRMover::move(
+    Module &Src, ArrayRef<GlobalValue *> ValuesToLink,
+    std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor,
+    DenseMap<unsigned, MDNode *> *ValIDToTempMDMap,
+    bool IsMetadataLinkingPostpass) {
+  IRLinker TheIRLinker(Composite, IdentifiedStructTypes, Src, ValuesToLink,
+                       AddLazyFor, ValIDToTempMDMap, IsMetadataLinkingPostpass);
+  bool RetCode = TheIRLinker.run();
+  Composite.dropTriviallyDeadConstantArrays();
+  return RetCode;
+}
diff --git a/lib/Linker/LinkDiagnosticInfo.h b/lib/Linker/LinkDiagnosticInfo.h
new file mode 100644
index 000000000000..d91f19c69aac
--- /dev/null
+++ b/lib/Linker/LinkDiagnosticInfo.h
@@ -0,0 +1,25 @@
+//===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H
+#define LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+
+namespace llvm {
+class LinkDiagnosticInfo : public DiagnosticInfo {
+  const Twine &Msg;
+
+public:
+  LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg);
+  void print(DiagnosticPrinter &DP) const override;
+};
+}
+
+#endif
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index f0906809ee48..9de3be412d75 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -12,447 +12,72 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker/Linker.h"
+#include "LinkDiagnosticInfo.h"
 #include "llvm-c/Linker.h"
-#include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/TypeFinder.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include <cctype>
-#include <tuple>
 using namespace llvm;
 
-
-//===----------------------------------------------------------------------===//
-// TypeMap implementation.
-//===----------------------------------------------------------------------===//
-
 namespace {
-class TypeMapTy : public ValueMapTypeRemapper {
-  /// This is a mapping from a source type to a destination type to use.
-  DenseMap<Type*, Type*> MappedTypes;
-
-  /// When checking to see if two subgraphs are isomorphic, we speculatively
-  /// add types to MappedTypes, but keep track of them here in case we need to
-  /// roll back.
-  SmallVector<Type*, 16> SpeculativeTypes;
-
-  SmallVector<StructType*, 16> SpeculativeDstOpaqueTypes;
-
-  /// This is a list of non-opaque structs in the source module that are mapped
-  /// to an opaque struct in the destination module.
-  SmallVector<StructType*, 16> SrcDefinitionsToResolve;
-
-  /// This is the set of opaque types in the destination modules who are
-  /// getting a body from the source module.
-  SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
-
-public:
-  TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet)
-      : DstStructTypesSet(DstStructTypesSet) {}
-
-  Linker::IdentifiedStructTypeSet &DstStructTypesSet;
-  /// Indicate that the specified type in the destination module is conceptually
-  /// equivalent to the specified type in the source module.
-  void addTypeMapping(Type *DstTy, Type *SrcTy);
-
-  /// Produce a body for an opaque type in the dest module from a type
-  /// definition in the source module.
-  void linkDefinedTypeBodies();
-
-  /// Return the mapped type to use for the specified input type from the
-  /// source module.
-  Type *get(Type *SrcTy);
-  Type *get(Type *SrcTy, SmallPtrSet<StructType *, 8> &Visited);
-
-  void finishType(StructType *DTy, StructType *STy, ArrayRef<Type *> ETypes);
-
-  FunctionType *get(FunctionType *T) {
-    return cast<FunctionType>(get((Type *)T));
-  }
-
-  /// Dump out the type map for debugging purposes.
-  void dump() const {
-    for (auto &Pair : MappedTypes) {
-      dbgs() << "TypeMap: ";
-      Pair.first->print(dbgs());
-      dbgs() << " => ";
-      Pair.second->print(dbgs());
-      dbgs() << '\n';
-    }
-  }
-
-private:
-  Type *remapType(Type *SrcTy) override { return get(SrcTy); }
-
-  bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
-};
-}
-
-void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
-  assert(SpeculativeTypes.empty());
-  assert(SpeculativeDstOpaqueTypes.empty());
-
-  // Check to see if these types are recursively isomorphic and establish a
-  // mapping between them if so.
-  if (!areTypesIsomorphic(DstTy, SrcTy)) {
-    // Oops, they aren't isomorphic.  Just discard this request by rolling out
-    // any speculative mappings we've established.
-    for (Type *Ty : SpeculativeTypes)
-      MappedTypes.erase(Ty);
-
-    SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() -
-                                   SpeculativeDstOpaqueTypes.size());
-    for (StructType *Ty : SpeculativeDstOpaqueTypes)
-      DstResolvedOpaqueTypes.erase(Ty);
-  } else {
-    for (Type *Ty : SpeculativeTypes)
-      if (auto *STy = dyn_cast<StructType>(Ty))
-        if (STy->hasName())
-          STy->setName("");
-  }
-  SpeculativeTypes.clear();
-  SpeculativeDstOpaqueTypes.clear();
-}
-
-/// Recursively walk this pair of types, returning true if they are isomorphic,
-/// false if they are not.
-bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
-  // Two types with differing kinds are clearly not isomorphic.
-  if (DstTy->getTypeID() != SrcTy->getTypeID())
-    return false;
-
-  // If we have an entry in the MappedTypes table, then we have our answer.
-  Type *&Entry = MappedTypes[SrcTy];
-  if (Entry)
-    return Entry == DstTy;
-
-  // Two identical types are clearly isomorphic.  Remember this
-  // non-speculatively.
-  if (DstTy == SrcTy) {
-    Entry = DstTy;
-    return true;
-  }
-
-  // Okay, we have two types with identical kinds that we haven't seen before.
-
-  // If this is an opaque struct type, special case it.
-  if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
-    // Mapping an opaque type to any struct, just keep the dest struct.
-    if (SSTy->isOpaque()) {
-      Entry = DstTy;
-      SpeculativeTypes.push_back(SrcTy);
-      return true;
-    }
-
-    // Mapping a non-opaque source type to an opaque dest.  If this is the first
-    // type that we're mapping onto this destination type then we succeed.  Keep
-    // the dest, but fill it in later. If this is the second (different) type
-    // that we're trying to map onto the same opaque type then we fail.
-    if (cast<StructType>(DstTy)->isOpaque()) {
-      // We can only map one source type onto the opaque destination type.
-      if (!DstResolvedOpaqueTypes.insert(cast<StructType>(DstTy)).second)
-        return false;
-      SrcDefinitionsToResolve.push_back(SSTy);
-      SpeculativeTypes.push_back(SrcTy);
-      SpeculativeDstOpaqueTypes.push_back(cast<StructType>(DstTy));
-      Entry = DstTy;
-      return true;
-    }
-  }
-
-  // If the number of subtypes disagree between the two types, then we fail.
-  if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
-    return false;
-
-  // Fail if any of the extra properties (e.g. array size) of the type disagree.
-  if (isa<IntegerType>(DstTy))
-    return false;  // bitwidth disagrees.
-  if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
-    if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
-      return false;
-
-  } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
-    if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
-      return false;
-  } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
-    StructType *SSTy = cast<StructType>(SrcTy);
-    if (DSTy->isLiteral() != SSTy->isLiteral() ||
-        DSTy->isPacked() != SSTy->isPacked())
-      return false;
-  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
-    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
-      return false;
-  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-    if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
-      return false;
-  }
-
-  // Otherwise, we speculate that these two types will line up and recursively
-  // check the subelements.
-  Entry = DstTy;
-  SpeculativeTypes.push_back(SrcTy);
-
-  for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I)
-    if (!areTypesIsomorphic(DstTy->getContainedType(I),
-                            SrcTy->getContainedType(I)))
-      return false;
-
-  // If everything seems to have lined up, then everything is great.
-  return true;
-}
-
-void TypeMapTy::linkDefinedTypeBodies() {
-  SmallVector<Type*, 16> Elements;
-  for (StructType *SrcSTy : SrcDefinitionsToResolve) {
-    StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
-    assert(DstSTy->isOpaque());
-
-    // Map the body of the source type over to a new body for the dest type.
-    Elements.resize(SrcSTy->getNumElements());
-    for (unsigned I = 0, E = Elements.size(); I != E; ++I)
-      Elements[I] = get(SrcSTy->getElementType(I));
-
-    DstSTy->setBody(Elements, SrcSTy->isPacked());
-    DstStructTypesSet.switchToNonOpaque(DstSTy);
-  }
-  SrcDefinitionsToResolve.clear();
-  DstResolvedOpaqueTypes.clear();
-}
-
-void TypeMapTy::finishType(StructType *DTy, StructType *STy,
-                           ArrayRef<Type *> ETypes) {
-  DTy->setBody(ETypes, STy->isPacked());
-
-  // Steal STy's name.
-  if (STy->hasName()) {
-    SmallString<16> TmpName = STy->getName();
-    STy->setName("");
-    DTy->setName(TmpName);
-  }
-
-  DstStructTypesSet.addNonOpaque(DTy);
-}
-
-Type *TypeMapTy::get(Type *Ty) {
-  SmallPtrSet<StructType *, 8> Visited;
-  return get(Ty, Visited);
-}
-
-Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
-  // If we already have an entry for this type, return it.
-  Type **Entry = &MappedTypes[Ty];
-  if (*Entry)
-    return *Entry;
-
-  // These are types that LLVM itself will unique.
-  bool IsUniqued = !isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral();
-
-#ifndef NDEBUG
-  if (!IsUniqued) {
-    for (auto &Pair : MappedTypes) {
-      assert(!(Pair.first != Ty && Pair.second == Ty) &&
-             "mapping to a source type");
-    }
-  }
-#endif
-
-  if (!IsUniqued && !Visited.insert(cast<StructType>(Ty)).second) {
-    StructType *DTy = StructType::create(Ty->getContext());
-    return *Entry = DTy;
-  }
-
-  // If this is not a recursive type, then just map all of the elements and
-  // then rebuild the type from inside out.
-  SmallVector<Type *, 4> ElementTypes;
-
-  // If there are no element types to map, then the type is itself.  This is
-  // true for the anonymous {} struct, things like 'float', integers, etc.
-  if (Ty->getNumContainedTypes() == 0 && IsUniqued)
-    return *Entry = Ty;
-
-  // Remap all of the elements, keeping track of whether any of them change.
-  bool AnyChange = false;
-  ElementTypes.resize(Ty->getNumContainedTypes());
-  for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) {
-    ElementTypes[I] = get(Ty->getContainedType(I), Visited);
-    AnyChange |= ElementTypes[I] != Ty->getContainedType(I);
-  }
-
-  // If we found our type while recursively processing stuff, just use it.
-  Entry = &MappedTypes[Ty];
-  if (*Entry) {
-    if (auto *DTy = dyn_cast<StructType>(*Entry)) {
-      if (DTy->isOpaque()) {
-        auto *STy = cast<StructType>(Ty);
-        finishType(DTy, STy, ElementTypes);
-      }
-    }
-    return *Entry;
-  }
-
-  // If all of the element types mapped directly over and the type is not
-  // a nomed struct, then the type is usable as-is.
-  if (!AnyChange && IsUniqued)
-    return *Entry = Ty;
-
-  // Otherwise, rebuild a modified type.
-  switch (Ty->getTypeID()) {
-  default:
-    llvm_unreachable("unknown derived type to remap");
-  case Type::ArrayTyID:
-    return *Entry = ArrayType::get(ElementTypes[0],
-                                   cast<ArrayType>(Ty)->getNumElements());
-  case Type::VectorTyID:
-    return *Entry = VectorType::get(ElementTypes[0],
-                                    cast<VectorType>(Ty)->getNumElements());
-  case Type::PointerTyID:
-    return *Entry = PointerType::get(ElementTypes[0],
-                                     cast<PointerType>(Ty)->getAddressSpace());
-  case Type::FunctionTyID:
-    return *Entry = FunctionType::get(ElementTypes[0],
-                                      makeArrayRef(ElementTypes).slice(1),
-                                      cast<FunctionType>(Ty)->isVarArg());
-  case Type::StructTyID: {
-    auto *STy = cast<StructType>(Ty);
-    bool IsPacked = STy->isPacked();
-    if (IsUniqued)
-      return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked);
-
-    // If the type is opaque, we can just use it directly.
-    if (STy->isOpaque()) {
-      DstStructTypesSet.addOpaque(STy);
-      return *Entry = Ty;
-    }
-
-    if (StructType *OldT =
-            DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) {
-      STy->setName("");
-      return *Entry = OldT;
-    }
-
-    if (!AnyChange) {
-      DstStructTypesSet.addNonOpaque(STy);
-      return *Entry = Ty;
-    }
-
-    StructType *DTy = StructType::create(Ty->getContext());
-    finishType(DTy, STy, ElementTypes);
-    return *Entry = DTy;
-  }
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// ModuleLinker implementation.
-//===----------------------------------------------------------------------===//
-
-namespace {
-class ModuleLinker;
-
-/// Creates prototypes for functions that are lazily linked on the fly. This
-/// speeds up linking for modules with many/ lazily linked functions of which
-/// few get used.
-class ValueMaterializerTy : public ValueMaterializer {
-  TypeMapTy &TypeMap;
-  Module *DstM;
-  std::vector<GlobalValue *> &LazilyLinkGlobalValues;
-
-public:
-  ValueMaterializerTy(TypeMapTy &TypeMap, Module *DstM,
-                      std::vector<GlobalValue *> &LazilyLinkGlobalValues)
-      : ValueMaterializer(), TypeMap(TypeMap), DstM(DstM),
-        LazilyLinkGlobalValues(LazilyLinkGlobalValues) {}
-
-  Value *materializeValueFor(Value *V) override;
-};
-
-class LinkDiagnosticInfo : public DiagnosticInfo {
-  const Twine &Msg;
-
-public:
-  LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg);
-  void print(DiagnosticPrinter &DP) const override;
-};
-LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity,
-                                       const Twine &Msg)
-    : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {}
-void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
 
 /// This is an implementation class for the LinkModules function, which is the
 /// entrypoint for this file.
 class ModuleLinker {
-  Module *DstM, *SrcM;
+  IRMover &Mover;
+  Module &SrcM;
 
-  TypeMapTy TypeMap;
-  ValueMaterializerTy ValMaterializer;
-
-  /// Mapping of values from what they used to be in Src, to what they are now
-  /// in DstM.  ValueToValueMapTy is a ValueMap, which involves some overhead
-  /// due to the use of Value handles which the Linker doesn't actually need,
-  /// but this allows us to reuse the ValueMapper code.
-  ValueToValueMapTy ValueMap;
-
-  struct AppendingVarInfo {
-    GlobalVariable *NewGV;   // New aggregate global in dest module.
-    const Constant *DstInit; // Old initializer from dest module.
-    const Constant *SrcInit; // Old initializer from src module.
-  };
-
-  std::vector<AppendingVarInfo> AppendingVars;
-
-  // Set of items not to link in from source.
-  SmallPtrSet<const Value *, 16> DoNotLinkFromSource;
-
-  // Vector of GlobalValues to lazily link in.
-  std::vector<GlobalValue *> LazilyLinkGlobalValues;
-
-  /// Functions that have replaced other functions.
-  SmallPtrSet<const Function *, 16> OverridingFunctions;
-
-  DiagnosticHandlerFunction DiagnosticHandler;
+  SetVector<GlobalValue *> ValuesToLink;
+  StringSet<> Internalize;
 
   /// For symbol clashes, prefer those from Src.
-  bool OverrideFromSrc;
+  unsigned Flags;
 
-public:
-  ModuleLinker(Module *dstM, Linker::IdentifiedStructTypeSet &Set, Module *srcM,
-               DiagnosticHandlerFunction DiagnosticHandler,
-               bool OverrideFromSrc)
-      : DstM(dstM), SrcM(srcM), TypeMap(Set),
-        ValMaterializer(TypeMap, DstM, LazilyLinkGlobalValues),
-        DiagnosticHandler(DiagnosticHandler), OverrideFromSrc(OverrideFromSrc) {
+  /// Function index passed into ModuleLinker for using in function
+  /// importing/exporting handling.
+  const FunctionInfoIndex *ImportIndex;
+
+  /// Functions to import from source module, all other functions are
+  /// imported as declarations instead of definitions.
+  DenseSet<const GlobalValue *> *FunctionsToImport;
+
+  /// Set to true if the given FunctionInfoIndex contains any functions
+  /// from this source module, in which case we must conservatively assume
+  /// that any of its functions may be imported into another module
+  /// as part of a different backend compilation process.
+  bool HasExportedFunctions = false;
+
+  /// Association between metadata value id and temporary metadata that
+  /// remains unmapped after function importing. Saved during function
+  /// importing and consumed during the metadata linking postpass.
+  DenseMap<unsigned, MDNode *> *ValIDToTempMDMap;
+
+  /// Used as the callback for lazy linking.
+  /// The mover has just hit GV and we have to decide if it, and other members
+  /// of the same comdat, should be linked. Every member to be linked is passed
+  /// to Add.
+  void addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add);
+
+  bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; }
+  bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; }
+  bool shouldInternalizeLinkedSymbols() {
+    return Flags & Linker::InternalizeLinkedSymbols;
   }
 
-  bool run();
+  /// Check if we should promote the given local value to global scope.
+  bool doPromoteLocalToGlobal(const GlobalValue *SGV);
 
-private:
   bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
                             const GlobalValue &Src);
 
-  /// Helper method for setting a message and returning an error code.
+  /// Should we have mover and linker error diag info?
   bool emitError(const Twine &Message) {
-    DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message));
+    SrcM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, Message));
     return true;
   }
 
-  void emitWarning(const Twine &Message) {
-    DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message));
-  }
-
-  bool getComdatLeader(Module *M, StringRef ComdatName,
+  bool getComdatLeader(Module &M, StringRef ComdatName,
                        const GlobalVariable *&GVar);
   bool computeResultingSelectionKind(StringRef ComdatName,
                                      Comdat::SelectionKind Src,
@@ -463,17 +88,20 @@ private:
       ComdatsChosen;
   bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK,
                        bool &LinkFromSrc);
+  // Keep track of the global value members of each comdat in source.
+  DenseMap<const Comdat *, std::vector<GlobalValue *>> ComdatMembers;
 
   /// Given a global in the source module, return the global in the
   /// destination module that is being linked to, if any.
   GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) {
+    Module &DstM = Mover.getModule();
     // If the source has no name it can't link.  If it has local linkage,
     // there is no name match-up going on.
-    if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+    if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV)))
       return nullptr;
 
     // Otherwise see if we have a match in the destination module's symtab.
-    GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+    GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV));
     if (!DGV)
       return nullptr;
 
@@ -486,139 +114,237 @@ private:
     return DGV;
   }
 
-  void computeTypeMapping();
+  bool linkIfNeeded(GlobalValue &GV);
 
-  void upgradeMismatchedGlobalArray(StringRef Name);
-  void upgradeMismatchedGlobals();
+  /// Helper methods to check if we are importing from or potentially
+  /// exporting from the current source module.
+  bool isPerformingImport() const { return FunctionsToImport != nullptr; }
+  bool isModuleExporting() const { return HasExportedFunctions; }
 
-  bool linkAppendingVarProto(GlobalVariable *DstGV,
-                             const GlobalVariable *SrcGV);
+  /// If we are importing from the source module, checks if we should
+  /// import SGV as a definition, otherwise import as a declaration.
+  bool doImportAsDefinition(const GlobalValue *SGV);
 
-  bool linkGlobalValueProto(GlobalValue *GV);
-  bool linkModuleFlagsMetadata();
+  /// Get the name for SGV that should be used in the linked destination
+  /// module. Specifically, this handles the case where we need to rename
+  /// a local that is being promoted to global scope.
+  std::string getName(const GlobalValue *SGV);
 
-  void linkAppendingVarInit(const AppendingVarInfo &AVI);
+  /// Process globals so that they can be used in ThinLTO. This includes
+  /// promoting local variables so that they can be reference externally by
+  /// thin lto imported globals and converting strong external globals to
+  /// available_externally.
+  void processGlobalsForThinLTO();
+  void processGlobalForThinLTO(GlobalValue &GV);
 
-  void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src);
-  bool linkFunctionBody(Function &Dst, Function &Src);
-  void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
-  bool linkGlobalValueBody(GlobalValue &Src);
+  /// Get the new linkage for SGV that should be used in the linked destination
+  /// module. Specifically, for ThinLTO importing or exporting it may need
+  /// to be adjusted.
+  GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
 
-  void linkNamedMDNodes();
-  void stripReplacedSubprograms();
+public:
+  ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
+               const FunctionInfoIndex *Index = nullptr,
+               DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+               DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
+      : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
+        FunctionsToImport(FunctionsToImport),
+        ValIDToTempMDMap(ValIDToTempMDMap) {
+    assert((ImportIndex || !FunctionsToImport) &&
+           "Expect a FunctionInfoIndex when importing");
+    // If we have a FunctionInfoIndex but no function to import,
+    // then this is the primary module being compiled in a ThinLTO
+    // backend compilation, and we need to see if it has functions that
+    // may be exported to another backend compilation.
+    if (ImportIndex && !FunctionsToImport)
+      HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
+    assert((ValIDToTempMDMap || !FunctionsToImport) &&
+           "Function importing must provide a ValIDToTempMDMap");
+  }
+
+  bool run();
 };
 }
 
-/// The LLVM SymbolTable class autorenames globals that conflict in the symbol
-/// table. This is good for all clients except for us. Go through the trouble
-/// to force this back.
-static void forceRenaming(GlobalValue *GV, StringRef Name) {
-  // If the global doesn't force its name or if it already has the right name,
-  // there is nothing for us to do.
-  if (GV->hasLocalLinkage() || GV->getName() == Name)
-    return;
-
-  Module *M = GV->getParent();
-
-  // If there is a conflict, rename the conflict.
-  if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
-    GV->takeName(ConflictGV);
-    ConflictGV->setName(Name);    // This will cause ConflictGV to get renamed
-    assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
-  } else {
-    GV->setName(Name);              // Force the name back
+bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
+  if (!isPerformingImport())
+    return false;
+  auto *GA = dyn_cast<GlobalAlias>(SGV);
+  if (GA) {
+    if (GA->hasWeakAnyLinkage())
+      return false;
+    const GlobalObject *GO = GA->getBaseObject();
+    if (!GO->hasLinkOnceODRLinkage())
+      return false;
+    return doImportAsDefinition(GO);
   }
-}
-
-/// copy additional attributes (those not needed to construct a GlobalValue)
-/// from the SrcGV to the DestGV.
-static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
-  DestGV->copyAttributesFrom(SrcGV);
-  forceRenaming(DestGV, SrcGV->getName());
-}
-
-static bool isLessConstraining(GlobalValue::VisibilityTypes a,
-                               GlobalValue::VisibilityTypes b) {
-  if (a == GlobalValue::HiddenVisibility)
-    return false;
-  if (b == GlobalValue::HiddenVisibility)
+  // Always import GlobalVariable definitions, except for the special
+  // case of WeakAny which are imported as ExternalWeak declarations
+  // (see comments in ModuleLinker::getLinkage). The linkage changes
+  // described in ModuleLinker::getLinkage ensure the correct behavior (e.g.
+  // global variables with external linkage are transformed to
+  // available_externally definitions, which are ultimately turned into
+  // declarations after the EliminateAvailableExternally pass).
+  if (isa<GlobalVariable>(SGV) && !SGV->isDeclaration() &&
+      !SGV->hasWeakAnyLinkage())
     return true;
-  if (a == GlobalValue::ProtectedVisibility)
-    return false;
-  if (b == GlobalValue::ProtectedVisibility)
+  // Only import the function requested for importing.
+  auto *SF = dyn_cast<Function>(SGV);
+  if (SF && FunctionsToImport->count(SF))
     return true;
+  // Otherwise no.
   return false;
 }
 
-/// Loop through the global variables in the src module and merge them into the
-/// dest module.
-static GlobalVariable *copyGlobalVariableProto(TypeMapTy &TypeMap, Module &DstM,
-                                               const GlobalVariable *SGVar) {
-  // No linking to be performed or linking from the source: simply create an
-  // identical version of the symbol over in the dest module... the
-  // initializer will be filled in later by LinkGlobalInits.
-  GlobalVariable *NewDGV = new GlobalVariable(
-      DstM, TypeMap.get(SGVar->getType()->getElementType()),
-      SGVar->isConstant(), SGVar->getLinkage(), /*init*/ nullptr,
-      SGVar->getName(), /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(),
-      SGVar->getType()->getAddressSpace());
+bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
+  assert(SGV->hasLocalLinkage());
+  // Both the imported references and the original local variable must
+  // be promoted.
+  if (!isPerformingImport() && !isModuleExporting())
+    return false;
 
-  return NewDGV;
+  // Local const variables never need to be promoted unless they are address
+  // taken. The imported uses can simply use the clone created in this module.
+  // For now we are conservative in determining which variables are not
+  // address taken by checking the unnamed addr flag. To be more aggressive,
+  // the address taken information must be checked earlier during parsing
+  // of the module and recorded in the function index for use when importing
+  // from that module.
+  auto *GVar = dyn_cast<GlobalVariable>(SGV);
+  if (GVar && GVar->isConstant() && GVar->hasUnnamedAddr())
+    return false;
+
+  // Eventually we only need to promote functions in the exporting module that
+  // are referenced by a potentially exported function (i.e. one that is in the
+  // function index).
+  return true;
 }
 
-/// Link the function in the source module into the destination module if
-/// needed, setting up mapping information.
-static Function *copyFunctionProto(TypeMapTy &TypeMap, Module &DstM,
-                                   const Function *SF) {
-  // If there is no linkage to be performed or we are linking from the source,
-  // bring SF over.
-  return Function::Create(TypeMap.get(SF->getFunctionType()), SF->getLinkage(),
-                          SF->getName(), &DstM);
+std::string ModuleLinker::getName(const GlobalValue *SGV) {
+  // For locals that must be promoted to global scope, ensure that
+  // the promoted name uniquely identifies the copy in the original module,
+  // using the ID assigned during combined index creation. When importing,
+  // we rename all locals (not just those that are promoted) in order to
+  // avoid naming conflicts between locals imported from different modules.
+  if (SGV->hasLocalLinkage() &&
+      (doPromoteLocalToGlobal(SGV) || isPerformingImport()))
+    return FunctionInfoIndex::getGlobalNameForLocal(
+        SGV->getName(),
+        ImportIndex->getModuleId(SGV->getParent()->getModuleIdentifier()));
+  return SGV->getName();
 }
 
-/// Set up prototypes for any aliases that come over from the source module.
-static GlobalAlias *copyGlobalAliasProto(TypeMapTy &TypeMap, Module &DstM,
-                                         const GlobalAlias *SGA) {
-  // If there is no linkage to be performed or we're linking from the source,
-  // bring over SGA.
-  auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
-  return GlobalAlias::create(PTy, SGA->getLinkage(), SGA->getName(), &DstM);
-}
-
-static GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, Module &DstM,
-                                         const GlobalValue *SGV) {
-  GlobalValue *NewGV;
-  if (auto *SGVar = dyn_cast<GlobalVariable>(SGV))
-    NewGV = copyGlobalVariableProto(TypeMap, DstM, SGVar);
-  else if (auto *SF = dyn_cast<Function>(SGV))
-    NewGV = copyFunctionProto(TypeMap, DstM, SF);
-  else
-    NewGV = copyGlobalAliasProto(TypeMap, DstM, cast<GlobalAlias>(SGV));
-  copyGVAttributes(NewGV, SGV);
-  return NewGV;
-}
-
-Value *ValueMaterializerTy::materializeValueFor(Value *V) {
-  auto *SGV = dyn_cast<GlobalValue>(V);
-  if (!SGV)
-    return nullptr;
-
-  GlobalValue *DGV = copyGlobalValueProto(TypeMap, *DstM, SGV);
-
-  if (Comdat *SC = SGV->getComdat()) {
-    if (auto *DGO = dyn_cast<GlobalObject>(DGV)) {
-      Comdat *DC = DstM->getOrInsertComdat(SC->getName());
-      DGO->setComdat(DC);
-    }
+GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
+  // Any local variable that is referenced by an exported function needs
+  // to be promoted to global scope. Since we don't currently know which
+  // functions reference which local variables/functions, we must treat
+  // all as potentially exported if this module is exporting anything.
+  if (isModuleExporting()) {
+    if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV))
+      return GlobalValue::ExternalLinkage;
+    return SGV->getLinkage();
   }
 
-  LazilyLinkGlobalValues.push_back(SGV);
-  return DGV;
+  // Otherwise, if we aren't importing, no linkage change is needed.
+  if (!isPerformingImport())
+    return SGV->getLinkage();
+
+  switch (SGV->getLinkage()) {
+  case GlobalValue::ExternalLinkage:
+    // External defnitions are converted to available_externally
+    // definitions upon import, so that they are available for inlining
+    // and/or optimization, but are turned into declarations later
+    // during the EliminateAvailableExternally pass.
+    if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+      return GlobalValue::AvailableExternallyLinkage;
+    // An imported external declaration stays external.
+    return SGV->getLinkage();
+
+  case GlobalValue::AvailableExternallyLinkage:
+    // An imported available_externally definition converts
+    // to external if imported as a declaration.
+    if (!doImportAsDefinition(SGV))
+      return GlobalValue::ExternalLinkage;
+    // An imported available_externally declaration stays that way.
+    return SGV->getLinkage();
+
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+    // These both stay the same when importing the definition.
+    // The ThinLTO pass will eventually force-import their definitions.
+    return SGV->getLinkage();
+
+  case GlobalValue::WeakAnyLinkage:
+    // Can't import weak_any definitions correctly, or we might change the
+    // program semantics, since the linker will pick the first weak_any
+    // definition and importing would change the order they are seen by the
+    // linker. The module linking caller needs to enforce this.
+    assert(!doImportAsDefinition(SGV));
+    // If imported as a declaration, it becomes external_weak.
+    return GlobalValue::ExternalWeakLinkage;
+
+  case GlobalValue::WeakODRLinkage:
+    // For weak_odr linkage, there is a guarantee that all copies will be
+    // equivalent, so the issue described above for weak_any does not exist,
+    // and the definition can be imported. It can be treated similarly
+    // to an imported externally visible global value.
+    if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+      return GlobalValue::AvailableExternallyLinkage;
+    else
+      return GlobalValue::ExternalLinkage;
+
+  case GlobalValue::AppendingLinkage:
+    // It would be incorrect to import an appending linkage variable,
+    // since it would cause global constructors/destructors to be
+    // executed multiple times. This should have already been handled
+    // by linkIfNeeded, and we will assert in shouldLinkFromSource
+    // if we try to import, so we simply return AppendingLinkage here
+    // as this helper is called more widely in getLinkedToGlobal.
+    return GlobalValue::AppendingLinkage;
+
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    // If we are promoting the local to global scope, it is handled
+    // similarly to a normal externally visible global.
+    if (doPromoteLocalToGlobal(SGV)) {
+      if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
+        return GlobalValue::AvailableExternallyLinkage;
+      else
+        return GlobalValue::ExternalLinkage;
+    }
+    // A non-promoted imported local definition stays local.
+    // The ThinLTO pass will eventually force-import their definitions.
+    return SGV->getLinkage();
+
+  case GlobalValue::ExternalWeakLinkage:
+    // External weak doesn't apply to definitions, must be a declaration.
+    assert(!doImportAsDefinition(SGV));
+    // Linkage stays external_weak.
+    return SGV->getLinkage();
+
+  case GlobalValue::CommonLinkage:
+    // Linkage stays common on definitions.
+    // The ThinLTO pass will eventually force-import their definitions.
+    return SGV->getLinkage();
+  }
+
+  llvm_unreachable("unknown linkage type");
 }
 
-bool ModuleLinker::getComdatLeader(Module *M, StringRef ComdatName,
+static GlobalValue::VisibilityTypes
+getMinVisibility(GlobalValue::VisibilityTypes A,
+                 GlobalValue::VisibilityTypes B) {
+  if (A == GlobalValue::HiddenVisibility || B == GlobalValue::HiddenVisibility)
+    return GlobalValue::HiddenVisibility;
+  if (A == GlobalValue::ProtectedVisibility ||
+      B == GlobalValue::ProtectedVisibility)
+    return GlobalValue::ProtectedVisibility;
+  return GlobalValue::DefaultVisibility;
+}
+
+bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName,
                                    const GlobalVariable *&GVar) {
-  const GlobalValue *GVal = M->getNamedValue(ComdatName);
+  const GlobalValue *GVal = M.getNamedValue(ComdatName);
   if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) {
     GVal = GA->getBaseObject();
     if (!GVal)
@@ -641,6 +367,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
                                                  Comdat::SelectionKind Dst,
                                                  Comdat::SelectionKind &Result,
                                                  bool &LinkFromSrc) {
+  Module &DstM = Mover.getModule();
   // The ability to mix Comdat::SelectionKind::Any with
   // Comdat::SelectionKind::Largest is a behavior that comes from COFF.
   bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any ||
@@ -677,8 +404,8 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
         getComdatLeader(SrcM, ComdatName, SrcGV))
       return true;
 
-    const DataLayout &DstDL = DstM->getDataLayout();
-    const DataLayout &SrcDL = SrcM->getDataLayout();
+    const DataLayout &DstDL = DstM.getDataLayout();
+    const DataLayout &SrcDL = SrcM.getDataLayout();
     uint64_t DstSize =
         DstDL.getTypeAllocSize(DstGV->getType()->getPointerElementType());
     uint64_t SrcSize =
@@ -708,9 +435,10 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName,
 bool ModuleLinker::getComdatResult(const Comdat *SrcC,
                                    Comdat::SelectionKind &Result,
                                    bool &LinkFromSrc) {
+  Module &DstM = Mover.getModule();
   Comdat::SelectionKind SSK = SrcC->getSelectionKind();
   StringRef ComdatName = SrcC->getName();
-  Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable();
+  Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable();
   Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName);
 
   if (DstCI == ComdatSymTab.end()) {
@@ -729,14 +457,17 @@ bool ModuleLinker::getComdatResult(const Comdat *SrcC,
 bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
                                         const GlobalValue &Dest,
                                         const GlobalValue &Src) {
+
   // Should we unconditionally use the Src?
-  if (OverrideFromSrc) {
+  if (shouldOverrideFromSrc()) {
     LinkFromSrc = true;
     return false;
   }
 
   // We always have to add Src if it has appending linkage.
   if (Src.hasAppendingLinkage()) {
+    // Should have prevented importing for appending linkage in linkIfNeeded.
+    assert(!isPerformingImport());
     LinkFromSrc = true;
     return false;
   }
@@ -744,6 +475,28 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
   bool SrcIsDeclaration = Src.isDeclarationForLinker();
   bool DestIsDeclaration = Dest.isDeclarationForLinker();
 
+  if (isPerformingImport()) {
+    if (isa<Function>(&Src)) {
+      // For functions, LinkFromSrc iff this is a function requested
+      // for importing. For variables, decide below normally.
+      LinkFromSrc = FunctionsToImport->count(&Src);
+      return false;
+    }
+
+    // Check if this is an alias with an already existing definition
+    // in Dest, which must have come from a prior importing pass from
+    // the same Src module. Unlike imported function and variable
+    // definitions, which are imported as available_externally and are
+    // not definitions for the linker, that is not a valid linkage for
+    // imported aliases which must be definitions. Simply use the existing
+    // Dest copy.
+    if (isa<GlobalAlias>(&Src) && !DestIsDeclaration) {
+      assert(isa<GlobalAlias>(&Dest));
+      LinkFromSrc = false;
+      return false;
+    }
+  }
+
   if (SrcIsDeclaration) {
     // If Src is external or if both Src & Dest are external..  Just link the
     // external globals, we aren't adding anything.
@@ -753,7 +506,12 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
       return false;
     }
     // If the Dest is weak, use the source linkage.
-    LinkFromSrc = Dest.hasExternalWeakLinkage();
+    if (Dest.hasExternalWeakLinkage()) {
+      LinkFromSrc = true;
+      return false;
+    }
+    // Link an available_externally over a declaration.
+    LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration();
     return false;
   }
 
@@ -808,730 +566,117 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc,
                    "': symbol multiply defined!");
 }
 
-/// Loop over all of the linked values to compute type mappings.  For example,
-/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct
-/// types 'Foo' but one got renamed when the module was loaded into the same
-/// LLVMContext.
-void ModuleLinker::computeTypeMapping() {
-  for (GlobalValue &SGV : SrcM->globals()) {
-    GlobalValue *DGV = getLinkedToGlobal(&SGV);
-    if (!DGV)
-      continue;
+bool ModuleLinker::linkIfNeeded(GlobalValue &GV) {
+  GlobalValue *DGV = getLinkedToGlobal(&GV);
 
-    if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) {
-      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
-      continue;
+  if (shouldLinkOnlyNeeded() && !(DGV && DGV->isDeclaration()))
+    return false;
+
+  if (DGV && !GV.hasLocalLinkage() && !GV.hasAppendingLinkage()) {
+    auto *DGVar = dyn_cast<GlobalVariable>(DGV);
+    auto *SGVar = dyn_cast<GlobalVariable>(&GV);
+    if (DGVar && SGVar) {
+      if (DGVar->isDeclaration() && SGVar->isDeclaration() &&
+          (!DGVar->isConstant() || !SGVar->isConstant())) {
+        DGVar->setConstant(false);
+        SGVar->setConstant(false);
+      }
+      if (DGVar->hasCommonLinkage() && SGVar->hasCommonLinkage()) {
+        unsigned Align = std::max(DGVar->getAlignment(), SGVar->getAlignment());
+        SGVar->setAlignment(Align);
+        DGVar->setAlignment(Align);
+      }
     }
 
-    // Unify the element type of appending arrays.
-    ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
-    ArrayType *SAT = cast<ArrayType>(SGV.getType()->getElementType());
-    TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+    GlobalValue::VisibilityTypes Visibility =
+        getMinVisibility(DGV->getVisibility(), GV.getVisibility());
+    DGV->setVisibility(Visibility);
+    GV.setVisibility(Visibility);
+
+    bool HasUnnamedAddr = GV.hasUnnamedAddr() && DGV->hasUnnamedAddr();
+    DGV->setUnnamedAddr(HasUnnamedAddr);
+    GV.setUnnamedAddr(HasUnnamedAddr);
   }
 
-  for (GlobalValue &SGV : *SrcM) {
-    if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
-      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
-  }
+  // Don't want to append to global_ctors list, for example, when we
+  // are importing for ThinLTO, otherwise the global ctors and dtors
+  // get executed multiple times for local variables (the latter causing
+  // double frees).
+  if (GV.hasAppendingLinkage() && isPerformingImport())
+    return false;
 
-  for (GlobalValue &SGV : SrcM->aliases()) {
-    if (GlobalValue *DGV = getLinkedToGlobal(&SGV))
-      TypeMap.addTypeMapping(DGV->getType(), SGV.getType());
-  }
+  if (isPerformingImport() && !doImportAsDefinition(&GV))
+    return false;
 
-  // Incorporate types by name, scanning all the types in the source module.
-  // At this point, the destination module may have a type "%foo = { i32 }" for
-  // example.  When the source module got loaded into the same LLVMContext, if
-  // it had the same type, it would have been renamed to "%foo.42 = { i32 }".
-  std::vector<StructType *> Types = SrcM->getIdentifiedStructTypes();
-  for (StructType *ST : Types) {
-    if (!ST->hasName())
-      continue;
+  if (!DGV && !shouldOverrideFromSrc() &&
+      (GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() ||
+       GV.hasAvailableExternallyLinkage()))
+    return false;
 
-    // Check to see if there is a dot in the name followed by a digit.
-    size_t DotPos = ST->getName().rfind('.');
-    if (DotPos == 0 || DotPos == StringRef::npos ||
-        ST->getName().back() == '.' ||
-        !isdigit(static_cast<unsigned char>(ST->getName()[DotPos + 1])))
-      continue;
+  if (GV.isDeclaration())
+    return false;
 
-    // Check to see if the destination module has a struct with the prefix name.
-    StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos));
-    if (!DST)
-      continue;
-
-    // Don't use it if this actually came from the source module. They're in
-    // the same LLVMContext after all. Also don't use it unless the type is
-    // actually used in the destination module. This can happen in situations
-    // like this:
-    //
-    //      Module A                         Module B
-    //      --------                         --------
-    //   %Z = type { %A }                %B = type { %C.1 }
-    //   %A = type { %B.1, [7 x i8] }    %C.1 = type { i8* }
-    //   %B.1 = type { %C }              %A.2 = type { %B.3, [5 x i8] }
-    //   %C = type { i8* }               %B.3 = type { %C.1 }
-    //
-    // When we link Module B with Module A, the '%B' in Module B is
-    // used. However, that would then use '%C.1'. But when we process '%C.1',
-    // we prefer to take the '%C' version. So we are then left with both
-    // '%C.1' and '%C' being used for the same types. This leads to some
-    // variables using one type and some using the other.
-    if (TypeMap.DstStructTypesSet.hasType(DST))
-      TypeMap.addTypeMapping(DST, ST);
-  }
-
-  // Now that we have discovered all of the type equivalences, get a body for
-  // any 'opaque' types in the dest module that are now resolved.
-  TypeMap.linkDefinedTypeBodies();
-}
-
-static void upgradeGlobalArray(GlobalVariable *GV) {
-  ArrayType *ATy = cast<ArrayType>(GV->getType()->getElementType());
-  StructType *OldTy = cast<StructType>(ATy->getElementType());
-  assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements");
-
-  // Get the upgraded 3 element type.
-  PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo();
-  Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1),
-                  VoidPtrTy};
-  StructType *NewTy = StructType::get(GV->getContext(), Tys, false);
-
-  // Build new constants with a null third field filled in.
-  Constant *OldInitC = GV->getInitializer();
-  ConstantArray *OldInit = dyn_cast<ConstantArray>(OldInitC);
-  if (!OldInit && !isa<ConstantAggregateZero>(OldInitC))
-    // Invalid initializer; give up.
-    return;
-  std::vector<Constant *> Initializers;
-  if (OldInit && OldInit->getNumOperands()) {
-    Value *Null = Constant::getNullValue(VoidPtrTy);
-    for (Use &U : OldInit->operands()) {
-      ConstantStruct *Init = cast<ConstantStruct>(U.get());
-      Initializers.push_back(ConstantStruct::get(
-          NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr));
-    }
-  }
-  assert(Initializers.size() == ATy->getNumElements() &&
-         "Failed to copy all array elements");
-
-  // Replace the old GV with a new one.
-  ATy = ArrayType::get(NewTy, Initializers.size());
-  Constant *NewInit = ConstantArray::get(ATy, Initializers);
-  GlobalVariable *NewGV = new GlobalVariable(
-      *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "",
-      GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(),
-      GV->isExternallyInitialized());
-  NewGV->copyAttributesFrom(GV);
-  NewGV->takeName(GV);
-  assert(GV->use_empty() && "program cannot use initializer list");
-  GV->eraseFromParent();
-}
-
-void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) {
-  // Look for the global arrays.
-  auto *DstGV = dyn_cast_or_null<GlobalVariable>(DstM->getNamedValue(Name));
-  if (!DstGV)
-    return;
-  auto *SrcGV = dyn_cast_or_null<GlobalVariable>(SrcM->getNamedValue(Name));
-  if (!SrcGV)
-    return;
-
-  // Check if the types already match.
-  auto *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
-  auto *SrcTy =
-      cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
-  if (DstTy == SrcTy)
-    return;
-
-  // Grab the element types.  We can only upgrade an array of a two-field
-  // struct.  Only bother if the other one has three-fields.
-  auto *DstEltTy = cast<StructType>(DstTy->getElementType());
-  auto *SrcEltTy = cast<StructType>(SrcTy->getElementType());
-  if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) {
-    upgradeGlobalArray(DstGV);
-    return;
-  }
-  if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2)
-    upgradeGlobalArray(SrcGV);
-
-  // We can't upgrade any other differences.
-}
-
-void ModuleLinker::upgradeMismatchedGlobals() {
-  upgradeMismatchedGlobalArray("llvm.global_ctors");
-  upgradeMismatchedGlobalArray("llvm.global_dtors");
-}
-
-/// If there were any appending global variables, link them together now.
-/// Return true on error.
-bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
-                                         const GlobalVariable *SrcGV) {
-
-  if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
-    return emitError("Linking globals named '" + SrcGV->getName() +
-           "': can only link appending global with another appending global!");
-
-  ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
-  ArrayType *SrcTy =
-    cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
-  Type *EltTy = DstTy->getElementType();
-
-  // Check to see that they two arrays agree on type.
-  if (EltTy != SrcTy->getElementType())
-    return emitError("Appending variables with different element types!");
-  if (DstGV->isConstant() != SrcGV->isConstant())
-    return emitError("Appending variables linked with different const'ness!");
-
-  if (DstGV->getAlignment() != SrcGV->getAlignment())
-    return emitError(
-             "Appending variables with different alignment need to be linked!");
-
-  if (DstGV->getVisibility() != SrcGV->getVisibility())
-    return emitError(
-            "Appending variables with different visibility need to be linked!");
-
-  if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr())
-    return emitError(
-        "Appending variables with different unnamed_addr need to be linked!");
-
-  if (StringRef(DstGV->getSection()) != SrcGV->getSection())
-    return emitError(
-          "Appending variables with different section name need to be linked!");
-
-  uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
-  ArrayType *NewType = ArrayType::get(EltTy, NewSize);
-
-  // Create the new global variable.
-  GlobalVariable *NG =
-    new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
-                       DstGV->getLinkage(), /*init*/nullptr, /*name*/"", DstGV,
-                       DstGV->getThreadLocalMode(),
-                       DstGV->getType()->getAddressSpace());
-
-  // Propagate alignment, visibility and section info.
-  copyGVAttributes(NG, DstGV);
-
-  AppendingVarInfo AVI;
-  AVI.NewGV = NG;
-  AVI.DstInit = DstGV->getInitializer();
-  AVI.SrcInit = SrcGV->getInitializer();
-  AppendingVars.push_back(AVI);
-
-  // Replace any uses of the two global variables with uses of the new
-  // global.
-  ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
-
-  DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
-  DstGV->eraseFromParent();
-
-  // Track the source variable so we don't try to link it.
-  DoNotLinkFromSource.insert(SrcGV);
-
-  return false;
-}
-
-bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) {
-  GlobalValue *DGV = getLinkedToGlobal(SGV);
-
-  // Handle the ultra special appending linkage case first.
-  if (DGV && DGV->hasAppendingLinkage())
-    return linkAppendingVarProto(cast<GlobalVariable>(DGV),
-                                 cast<GlobalVariable>(SGV));
-
-  bool LinkFromSrc = true;
-  Comdat *C = nullptr;
-  GlobalValue::VisibilityTypes Visibility = SGV->getVisibility();
-  bool HasUnnamedAddr = SGV->hasUnnamedAddr();
-
-  if (const Comdat *SC = SGV->getComdat()) {
+  if (const Comdat *SC = GV.getComdat()) {
+    bool LinkFromSrc;
     Comdat::SelectionKind SK;
     std::tie(SK, LinkFromSrc) = ComdatsChosen[SC];
-    C = DstM->getOrInsertComdat(SC->getName());
-    C->setSelectionKind(SK);
-  } else if (DGV) {
-    if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV))
-      return true;
-  }
-
-  if (!LinkFromSrc) {
-    // Track the source global so that we don't attempt to copy it over when
-    // processing global initializers.
-    DoNotLinkFromSource.insert(SGV);
-
-    if (DGV)
-      // Make sure to remember this mapping.
-      ValueMap[SGV] =
-          ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType()));
-  }
-
-  if (DGV) {
-    Visibility = isLessConstraining(Visibility, DGV->getVisibility())
-                     ? DGV->getVisibility()
-                     : Visibility;
-    HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr();
-  }
-
-  if (!LinkFromSrc && !DGV)
-    return false;
-
-  GlobalValue *NewGV;
-  if (!LinkFromSrc) {
-    NewGV = DGV;
-  } else {
-    // If the GV is to be lazily linked, don't create it just yet.
-    // The ValueMaterializerTy will deal with creating it if it's used.
-    if (!DGV && !OverrideFromSrc &&
-        (SGV->hasLocalLinkage() || SGV->hasLinkOnceLinkage() ||
-         SGV->hasAvailableExternallyLinkage())) {
-      DoNotLinkFromSource.insert(SGV);
-      return false;
-    }
-
-    NewGV = copyGlobalValueProto(TypeMap, *DstM, SGV);
-
-    if (DGV && isa<Function>(DGV))
-      if (auto *NewF = dyn_cast<Function>(NewGV))
-        OverridingFunctions.insert(NewF);
-  }
-
-  NewGV->setUnnamedAddr(HasUnnamedAddr);
-  NewGV->setVisibility(Visibility);
-
-  if (auto *NewGO = dyn_cast<GlobalObject>(NewGV)) {
-    if (C)
-      NewGO->setComdat(C);
-
-    if (DGV && DGV->hasCommonLinkage() && SGV->hasCommonLinkage())
-      NewGO->setAlignment(std::max(DGV->getAlignment(), SGV->getAlignment()));
-  }
-
-  if (auto *NewGVar = dyn_cast<GlobalVariable>(NewGV)) {
-    auto *DGVar = dyn_cast_or_null<GlobalVariable>(DGV);
-    auto *SGVar = dyn_cast<GlobalVariable>(SGV);
-    if (DGVar && SGVar && DGVar->isDeclaration() && SGVar->isDeclaration() &&
-        (!DGVar->isConstant() || !SGVar->isConstant()))
-      NewGVar->setConstant(false);
-  }
-
-  // Make sure to remember this mapping.
-  if (NewGV != DGV) {
-    if (DGV) {
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType()));
-      DGV->eraseFromParent();
-    }
-    ValueMap[SGV] = NewGV;
-  }
-
-  return false;
-}
-
-static void getArrayElements(const Constant *C,
-                             SmallVectorImpl<Constant *> &Dest) {
-  unsigned NumElements = cast<ArrayType>(C->getType())->getNumElements();
-
-  for (unsigned i = 0; i != NumElements; ++i)
-    Dest.push_back(C->getAggregateElement(i));
-}
-
-void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
-  // Merge the initializer.
-  SmallVector<Constant *, 16> DstElements;
-  getArrayElements(AVI.DstInit, DstElements);
-
-  SmallVector<Constant *, 16> SrcElements;
-  getArrayElements(AVI.SrcInit, SrcElements);
-
-  ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
-
-  StringRef Name = AVI.NewGV->getName();
-  bool IsNewStructor =
-      (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") &&
-      cast<StructType>(NewType->getElementType())->getNumElements() == 3;
-
-  for (auto *V : SrcElements) {
-    if (IsNewStructor) {
-      Constant *Key = V->getAggregateElement(2);
-      if (DoNotLinkFromSource.count(Key))
-        continue;
-    }
-    DstElements.push_back(
-        MapValue(V, ValueMap, RF_None, &TypeMap, &ValMaterializer));
-  }
-  if (IsNewStructor) {
-    NewType = ArrayType::get(NewType->getElementType(), DstElements.size());
-    AVI.NewGV->mutateType(PointerType::get(NewType, 0));
-  }
-
-  AVI.NewGV->setInitializer(ConstantArray::get(NewType, DstElements));
-}
-
-/// Update the initializers in the Dest module now that all globals that may be
-/// referenced are in Dest.
-void ModuleLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) {
-  // Figure out what the initializer looks like in the dest module.
-  Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, RF_None, &TypeMap,
-                              &ValMaterializer));
-}
-
-/// Copy the source function over into the dest function and fix up references
-/// to values. At this point we know that Dest is an external function, and
-/// that Src is not.
-bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) {
-  assert(Dst.isDeclaration() && !Src.isDeclaration());
-
-  // Materialize if needed.
-  if (std::error_code EC = Src.materialize())
-    return emitError(EC.message());
-
-  // Link in the prefix data.
-  if (Src.hasPrefixData())
-    Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, RF_None, &TypeMap,
-                               &ValMaterializer));
-
-  // Link in the prologue data.
-  if (Src.hasPrologueData())
-    Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, RF_None,
-                                 &TypeMap, &ValMaterializer));
-
-  // Link in the personality function.
-  if (Src.hasPersonalityFn())
-    Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, RF_None,
-                                  &TypeMap, &ValMaterializer));
-
-  // Go through and convert function arguments over, remembering the mapping.
-  Function::arg_iterator DI = Dst.arg_begin();
-  for (Argument &Arg : Src.args()) {
-    DI->setName(Arg.getName());  // Copy the name over.
-
-    // Add a mapping to our mapping.
-    ValueMap[&Arg] = DI;
-    ++DI;
-  }
-
-  // Copy over the metadata attachments.
-  SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
-  Src.getAllMetadata(MDs);
-  for (const auto &I : MDs)
-    Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_None, &TypeMap,
-                                         &ValMaterializer));
-
-  // Splice the body of the source function into the dest function.
-  Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList());
-
-  // At this point, all of the instructions and values of the function are now
-  // copied over.  The only problem is that they are still referencing values in
-  // the Source function as operands.  Loop through all of the operands of the
-  // functions and patch them up to point to the local versions.
-  for (BasicBlock &BB : Dst)
-    for (Instruction &I : BB)
-      RemapInstruction(&I, ValueMap, RF_IgnoreMissingEntries, &TypeMap,
-                       &ValMaterializer);
-
-  // There is no need to map the arguments anymore.
-  for (Argument &Arg : Src.args())
-    ValueMap.erase(&Arg);
-
-  Src.dematerialize();
-  return false;
-}
-
-void ModuleLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) {
-  Constant *Aliasee = Src.getAliasee();
-  Constant *Val =
-      MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer);
-  Dst.setAliasee(Val);
-}
-
-bool ModuleLinker::linkGlobalValueBody(GlobalValue &Src) {
-  Value *Dst = ValueMap[&Src];
-  assert(Dst);
-  if (auto *F = dyn_cast<Function>(&Src))
-    return linkFunctionBody(cast<Function>(*Dst), *F);
-  if (auto *GVar = dyn_cast<GlobalVariable>(&Src)) {
-    linkGlobalInit(cast<GlobalVariable>(*Dst), *GVar);
+    if (LinkFromSrc)
+      ValuesToLink.insert(&GV);
     return false;
   }
-  linkAliasBody(cast<GlobalAlias>(*Dst), cast<GlobalAlias>(Src));
+
+  bool LinkFromSrc = true;
+  if (DGV && shouldLinkFromSource(LinkFromSrc, *DGV, GV))
+    return true;
+  if (LinkFromSrc)
+    ValuesToLink.insert(&GV);
   return false;
 }
 
-/// Insert all of the named MDNodes in Src into the Dest module.
-void ModuleLinker::linkNamedMDNodes() {
-  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
-  for (const NamedMDNode &NMD : SrcM->named_metadata()) {
-    // Don't link module flags here. Do them separately.
-    if (&NMD == SrcModFlags)
-      continue;
-    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(NMD.getName());
-    // Add Src elements into Dest node.
-    for (const MDNode *op : NMD.operands())
-      DestNMD->addOperand(
-          MapMetadata(op, ValueMap, RF_None, &TypeMap, &ValMaterializer));
-  }
-}
-
-/// Drop DISubprograms that have been superseded.
-///
-/// FIXME: this creates an asymmetric result: we strip functions from losing
-/// subprograms in DstM, but leave losing subprograms in SrcM.
-/// TODO: Remove this logic once the backend can correctly determine canonical
-/// subprograms.
-void ModuleLinker::stripReplacedSubprograms() {
-  // Avoid quadratic runtime by returning early when there's nothing to do.
-  if (OverridingFunctions.empty())
+void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) {
+  // Add these to the internalize list
+  if (!GV.hasLinkOnceLinkage())
     return;
 
-  // Move the functions now, so the set gets cleared even on early returns.
-  auto Functions = std::move(OverridingFunctions);
-  OverridingFunctions.clear();
+  if (shouldInternalizeLinkedSymbols())
+    Internalize.insert(GV.getName());
+  Add(GV);
 
-  // Drop functions from subprograms if they've been overridden by the new
-  // compile unit.
-  NamedMDNode *CompileUnits = DstM->getNamedMetadata("llvm.dbg.cu");
-  if (!CompileUnits)
+  const Comdat *SC = GV.getComdat();
+  if (!SC)
     return;
-  for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
-    auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
-    assert(CU && "Expected valid compile unit");
-
-    for (DISubprogram *SP : CU->getSubprograms()) {
-      if (!SP || !SP->getFunction() || !Functions.count(SP->getFunction()))
-        continue;
-
-      // Prevent DebugInfoFinder from tagging this as the canonical subprogram,
-      // since the canonical one is in the incoming module.
-      SP->replaceFunction(nullptr);
-    }
+  for (GlobalValue *GV2 : ComdatMembers[SC]) {
+    if (!GV2->hasLocalLinkage() && shouldInternalizeLinkedSymbols())
+      Internalize.insert(GV2->getName());
+    Add(*GV2);
   }
 }
 
-/// Merge the linker flags in Src into the Dest module.
-bool ModuleLinker::linkModuleFlagsMetadata() {
-  // If the source module has no module flags, we are done.
-  const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
-  if (!SrcModFlags) return false;
-
-  // If the destination module doesn't have module flags yet, then just copy
-  // over the source module's flags.
-  NamedMDNode *DstModFlags = DstM->getOrInsertModuleFlagsMetadata();
-  if (DstModFlags->getNumOperands() == 0) {
-    for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I)
-      DstModFlags->addOperand(SrcModFlags->getOperand(I));
-
-    return false;
+void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
+  if (GV.hasLocalLinkage() &&
+      (doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
+    GV.setName(getName(&GV));
+    GV.setLinkage(getLinkage(&GV));
+    if (!GV.hasLocalLinkage())
+      GV.setVisibility(GlobalValue::HiddenVisibility);
+    if (isModuleExporting())
+      ValuesToLink.insert(&GV);
+    return;
   }
-
-  // First build a map of the existing module flags and requirements.
-  DenseMap<MDString *, std::pair<MDNode *, unsigned>> Flags;
-  SmallSetVector<MDNode*, 16> Requirements;
-  for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) {
-    MDNode *Op = DstModFlags->getOperand(I);
-    ConstantInt *Behavior = mdconst::extract<ConstantInt>(Op->getOperand(0));
-    MDString *ID = cast<MDString>(Op->getOperand(1));
-
-    if (Behavior->getZExtValue() == Module::Require) {
-      Requirements.insert(cast<MDNode>(Op->getOperand(2)));
-    } else {
-      Flags[ID] = std::make_pair(Op, I);
-    }
-  }
-
-  // Merge in the flags from the source module, and also collect its set of
-  // requirements.
-  bool HasErr = false;
-  for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) {
-    MDNode *SrcOp = SrcModFlags->getOperand(I);
-    ConstantInt *SrcBehavior =
-        mdconst::extract<ConstantInt>(SrcOp->getOperand(0));
-    MDString *ID = cast<MDString>(SrcOp->getOperand(1));
-    MDNode *DstOp;
-    unsigned DstIndex;
-    std::tie(DstOp, DstIndex) = Flags.lookup(ID);
-    unsigned SrcBehaviorValue = SrcBehavior->getZExtValue();
-
-    // If this is a requirement, add it and continue.
-    if (SrcBehaviorValue == Module::Require) {
-      // If the destination module does not already have this requirement, add
-      // it.
-      if (Requirements.insert(cast<MDNode>(SrcOp->getOperand(2)))) {
-        DstModFlags->addOperand(SrcOp);
-      }
-      continue;
-    }
-
-    // If there is no existing flag with this ID, just add it.
-    if (!DstOp) {
-      Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands());
-      DstModFlags->addOperand(SrcOp);
-      continue;
-    }
-
-    // Otherwise, perform a merge.
-    ConstantInt *DstBehavior =
-        mdconst::extract<ConstantInt>(DstOp->getOperand(0));
-    unsigned DstBehaviorValue = DstBehavior->getZExtValue();
-
-    // If either flag has override behavior, handle it first.
-    if (DstBehaviorValue == Module::Override) {
-      // Diagnose inconsistent flags which both have override behavior.
-      if (SrcBehaviorValue == Module::Override &&
-          SrcOp->getOperand(2) != DstOp->getOperand(2)) {
-        HasErr |= emitError("linking module flags '" + ID->getString() +
-                            "': IDs have conflicting override values");
-      }
-      continue;
-    } else if (SrcBehaviorValue == Module::Override) {
-      // Update the destination flag to that of the source.
-      DstModFlags->setOperand(DstIndex, SrcOp);
-      Flags[ID].first = SrcOp;
-      continue;
-    }
-
-    // Diagnose inconsistent merge behavior types.
-    if (SrcBehaviorValue != DstBehaviorValue) {
-      HasErr |= emitError("linking module flags '" + ID->getString() +
-                          "': IDs have conflicting behaviors");
-      continue;
-    }
-
-    auto replaceDstValue = [&](MDNode *New) {
-      Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New};
-      MDNode *Flag = MDNode::get(DstM->getContext(), FlagOps);
-      DstModFlags->setOperand(DstIndex, Flag);
-      Flags[ID].first = Flag;
-    };
-
-    // Perform the merge for standard behavior types.
-    switch (SrcBehaviorValue) {
-    case Module::Require:
-    case Module::Override: llvm_unreachable("not possible");
-    case Module::Error: {
-      // Emit an error if the values differ.
-      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
-        HasErr |= emitError("linking module flags '" + ID->getString() +
-                            "': IDs have conflicting values");
-      }
-      continue;
-    }
-    case Module::Warning: {
-      // Emit a warning if the values differ.
-      if (SrcOp->getOperand(2) != DstOp->getOperand(2)) {
-        emitWarning("linking module flags '" + ID->getString() +
-                    "': IDs have conflicting values");
-      }
-      continue;
-    }
-    case Module::Append: {
-      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
-      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
-      SmallVector<Metadata *, 8> MDs;
-      MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands());
-      MDs.append(DstValue->op_begin(), DstValue->op_end());
-      MDs.append(SrcValue->op_begin(), SrcValue->op_end());
-
-      replaceDstValue(MDNode::get(DstM->getContext(), MDs));
-      break;
-    }
-    case Module::AppendUnique: {
-      SmallSetVector<Metadata *, 16> Elts;
-      MDNode *DstValue = cast<MDNode>(DstOp->getOperand(2));
-      MDNode *SrcValue = cast<MDNode>(SrcOp->getOperand(2));
-      Elts.insert(DstValue->op_begin(), DstValue->op_end());
-      Elts.insert(SrcValue->op_begin(), SrcValue->op_end());
-
-      replaceDstValue(MDNode::get(DstM->getContext(),
-                                  makeArrayRef(Elts.begin(), Elts.end())));
-      break;
-    }
-    }
-  }
-
-  // Check all of the requirements.
-  for (unsigned I = 0, E = Requirements.size(); I != E; ++I) {
-    MDNode *Requirement = Requirements[I];
-    MDString *Flag = cast<MDString>(Requirement->getOperand(0));
-    Metadata *ReqValue = Requirement->getOperand(1);
-
-    MDNode *Op = Flags[Flag].first;
-    if (!Op || Op->getOperand(2) != ReqValue) {
-      HasErr |= emitError("linking module flags '" + Flag->getString() +
-                          "': does not have the required value");
-      continue;
-    }
-  }
-
-  return HasErr;
+  GV.setLinkage(getLinkage(&GV));
 }
 
-// This function returns true if the triples match.
-static bool triplesMatch(const Triple &T0, const Triple &T1) {
-  // If vendor is apple, ignore the version number.
-  if (T0.getVendor() == Triple::Apple)
-    return T0.getArch() == T1.getArch() &&
-           T0.getSubArch() == T1.getSubArch() &&
-           T0.getVendor() == T1.getVendor() &&
-           T0.getOS() == T1.getOS();
-
-  return T0 == T1;
-}
-
-// This function returns the merged triple.
-static std::string mergeTriples(const Triple &SrcTriple, const Triple &DstTriple) {
-  // If vendor is apple, pick the triple with the larger version number.
-  if (SrcTriple.getVendor() == Triple::Apple)
-    if (DstTriple.isOSVersionLT(SrcTriple))
-      return SrcTriple.str();
-
-  return DstTriple.str();
+void ModuleLinker::processGlobalsForThinLTO() {
+  for (GlobalVariable &GV : SrcM.globals())
+    processGlobalForThinLTO(GV);
+  for (Function &SF : SrcM)
+    processGlobalForThinLTO(SF);
+  for (GlobalAlias &GA : SrcM.aliases())
+    processGlobalForThinLTO(GA);
 }
 
 bool ModuleLinker::run() {
-  assert(DstM && "Null destination module");
-  assert(SrcM && "Null source module");
-
-  // Inherit the target data from the source module if the destination module
-  // doesn't have one already.
-  if (DstM->getDataLayout().isDefault())
-    DstM->setDataLayout(SrcM->getDataLayout());
-
-  if (SrcM->getDataLayout() != DstM->getDataLayout()) {
-    emitWarning("Linking two modules of different data layouts: '" +
-                SrcM->getModuleIdentifier() + "' is '" +
-                SrcM->getDataLayoutStr() + "' whereas '" +
-                DstM->getModuleIdentifier() + "' is '" +
-                DstM->getDataLayoutStr() + "'\n");
-  }
-
-  // Copy the target triple from the source to dest if the dest's is empty.
-  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
-    DstM->setTargetTriple(SrcM->getTargetTriple());
-
-  Triple SrcTriple(SrcM->getTargetTriple()), DstTriple(DstM->getTargetTriple());
-
-  if (!SrcM->getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple))
-    emitWarning("Linking two modules of different target triples: " +
-                SrcM->getModuleIdentifier() + "' is '" +
-                SrcM->getTargetTriple() + "' whereas '" +
-                DstM->getModuleIdentifier() + "' is '" +
-                DstM->getTargetTriple() + "'\n");
-
-  DstM->setTargetTriple(mergeTriples(SrcTriple, DstTriple));
-
-  // Append the module inline asm string.
-  if (!SrcM->getModuleInlineAsm().empty()) {
-    if (DstM->getModuleInlineAsm().empty())
-      DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
-    else
-      DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
-                               SrcM->getModuleInlineAsm());
-  }
-
-  // Loop over all of the linked values to compute type mappings.
-  computeTypeMapping();
-
-  ComdatsChosen.clear();
-  for (const auto &SMEC : SrcM->getComdatSymbolTable()) {
+  for (const auto &SMEC : SrcM.getComdatSymbolTable()) {
     const Comdat &C = SMEC.getValue();
     if (ComdatsChosen.count(&C))
       continue;
@@ -1542,233 +687,88 @@ bool ModuleLinker::run() {
     ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc);
   }
 
-  // Upgrade mismatched global arrays.
-  upgradeMismatchedGlobals();
+  for (GlobalVariable &GV : SrcM.globals())
+    if (const Comdat *SC = GV.getComdat())
+      ComdatMembers[SC].push_back(&GV);
+
+  for (Function &SF : SrcM)
+    if (const Comdat *SC = SF.getComdat())
+      ComdatMembers[SC].push_back(&SF);
+
+  for (GlobalAlias &GA : SrcM.aliases())
+    if (const Comdat *SC = GA.getComdat())
+      ComdatMembers[SC].push_back(&GA);
 
   // Insert all of the globals in src into the DstM module... without linking
   // initializers (which could refer to functions not yet mapped over).
-  for (GlobalVariable &GV : SrcM->globals())
-    if (linkGlobalValueProto(&GV))
+  for (GlobalVariable &GV : SrcM.globals())
+    if (linkIfNeeded(GV))
       return true;
 
-  // Link the functions together between the two modules, without doing function
-  // bodies... this just adds external function prototypes to the DstM
-  // function...  We do this so that when we begin processing function bodies,
-  // all of the global values that may be referenced are available in our
-  // ValueMap.
-  for (Function &F :*SrcM)
-    if (linkGlobalValueProto(&F))
+  for (Function &SF : SrcM)
+    if (linkIfNeeded(SF))
       return true;
 
-  // If there were any aliases, link them now.
-  for (GlobalAlias &GA : SrcM->aliases())
-    if (linkGlobalValueProto(&GA))
+  for (GlobalAlias &GA : SrcM.aliases())
+    if (linkIfNeeded(GA))
       return true;
 
-  for (const AppendingVarInfo &AppendingVar : AppendingVars)
-    linkAppendingVarInit(AppendingVar);
+  processGlobalsForThinLTO();
 
-  for (const auto &Entry : DstM->getComdatSymbolTable()) {
-    const Comdat &C = Entry.getValue();
-    if (C.getSelectionKind() == Comdat::Any)
+  for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
+    GlobalValue *GV = ValuesToLink[I];
+    const Comdat *SC = GV->getComdat();
+    if (!SC)
       continue;
-    const GlobalValue *GV = SrcM->getNamedValue(C.getName());
-    if (GV)
-      MapValue(GV, ValueMap, RF_None, &TypeMap, &ValMaterializer);
+    for (GlobalValue *GV2 : ComdatMembers[SC])
+      ValuesToLink.insert(GV2);
   }
 
-  // Strip replaced subprograms before mapping any metadata -- so that we're
-  // not changing metadata from the source module (note that
-  // linkGlobalValueBody() eventually calls RemapInstruction() and therefore
-  // MapMetadata()) -- but after linking global value protocols -- so that
-  // OverridingFunctions has been built.
-  stripReplacedSubprograms();
-
-  // Link in the function bodies that are defined in the source module into
-  // DstM.
-  for (Function &SF : *SrcM) {
-    // Skip if no body (function is external).
-    if (SF.isDeclaration())
-      continue;
-
-    // Skip if not linking from source.
-    if (DoNotLinkFromSource.count(&SF))
-      continue;
-
-    if (linkGlobalValueBody(SF))
-      return true;
+  if (shouldInternalizeLinkedSymbols()) {
+    for (GlobalValue *GV : ValuesToLink)
+      Internalize.insert(GV->getName());
   }
 
-  // Resolve all uses of aliases with aliasees.
-  for (GlobalAlias &Src : SrcM->aliases()) {
-    if (DoNotLinkFromSource.count(&Src))
-      continue;
-    linkGlobalValueBody(Src);
-  }
-
-  // Remap all of the named MDNodes in Src into the DstM module. We do this
-  // after linking GlobalValues so that MDNodes that reference GlobalValues
-  // are properly remapped.
-  linkNamedMDNodes();
-
-  // Merge the module flags into the DstM module.
-  if (linkModuleFlagsMetadata())
+  if (Mover.move(SrcM, ValuesToLink.getArrayRef(),
+                 [this](GlobalValue &GV, IRMover::ValueAdder Add) {
+                   addLazyFor(GV, Add);
+                 },
+                 ValIDToTempMDMap, false))
     return true;
-
-  // Update the initializers in the DstM module now that all globals that may
-  // be referenced are in DstM.
-  for (GlobalVariable &Src : SrcM->globals()) {
-    // Only process initialized GV's or ones not already in dest.
-    if (!Src.hasInitializer() || DoNotLinkFromSource.count(&Src))
-      continue;
-    linkGlobalValueBody(Src);
-  }
-
-  // Process vector of lazily linked in functions.
-  while (!LazilyLinkGlobalValues.empty()) {
-    GlobalValue *SGV = LazilyLinkGlobalValues.back();
-    LazilyLinkGlobalValues.pop_back();
-
-    assert(!SGV->isDeclaration() && "users should not pass down decls");
-    if (linkGlobalValueBody(*SGV))
-      return true;
+  Module &DstM = Mover.getModule();
+  for (auto &P : Internalize) {
+    GlobalValue *GV = DstM.getNamedValue(P.first());
+    GV->setLinkage(GlobalValue::InternalLinkage);
   }
 
   return false;
 }
 
-Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef<Type *> E, bool P)
-    : ETypes(E), IsPacked(P) {}
+Linker::Linker(Module &M) : Mover(M) {}
 
-Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST)
-    : ETypes(ST->elements()), IsPacked(ST->isPacked()) {}
-
-bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const {
-  if (IsPacked != That.IsPacked)
-    return false;
-  if (ETypes != That.ETypes)
-    return false;
-  return true;
+bool Linker::linkInModule(std::unique_ptr<Module> Src, unsigned Flags,
+                          const FunctionInfoIndex *Index,
+                          DenseSet<const GlobalValue *> *FunctionsToImport,
+                          DenseMap<unsigned, MDNode *> *ValIDToTempMDMap) {
+  ModuleLinker ModLinker(Mover, *Src, Flags, Index, FunctionsToImport,
+                         ValIDToTempMDMap);
+  return ModLinker.run();
 }
 
-bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const {
-  return !this->operator==(That);
+bool Linker::linkInModuleForCAPI(Module &Src) {
+  ModuleLinker ModLinker(Mover, Src, 0, nullptr, nullptr);
+  return ModLinker.run();
 }
 
-StructType *Linker::StructTypeKeyInfo::getEmptyKey() {
-  return DenseMapInfo<StructType *>::getEmptyKey();
-}
-
-StructType *Linker::StructTypeKeyInfo::getTombstoneKey() {
-  return DenseMapInfo<StructType *>::getTombstoneKey();
-}
-
-unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) {
-  return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()),
-                      Key.IsPacked);
-}
-
-unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) {
-  return getHashValue(KeyTy(ST));
-}
-
-bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS,
-                                        const StructType *RHS) {
-  if (RHS == getEmptyKey() || RHS == getTombstoneKey())
-    return false;
-  return LHS == KeyTy(RHS);
-}
-
-bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS,
-                                        const StructType *RHS) {
-  if (RHS == getEmptyKey())
-    return LHS == getEmptyKey();
-
-  if (RHS == getTombstoneKey())
-    return LHS == getTombstoneKey();
-
-  return KeyTy(LHS) == KeyTy(RHS);
-}
-
-void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) {
-  assert(!Ty->isOpaque());
-  NonOpaqueStructTypes.insert(Ty);
-}
-
-void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) {
-  assert(!Ty->isOpaque());
-  NonOpaqueStructTypes.insert(Ty);
-  bool Removed = OpaqueStructTypes.erase(Ty);
-  (void)Removed;
-  assert(Removed);
-}
-
-void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) {
-  assert(Ty->isOpaque());
-  OpaqueStructTypes.insert(Ty);
-}
-
-StructType *
-Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef<Type *> ETypes,
-                                               bool IsPacked) {
-  Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked);
-  auto I = NonOpaqueStructTypes.find_as(Key);
-  if (I == NonOpaqueStructTypes.end())
-    return nullptr;
-  return *I;
-}
-
-bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) {
-  if (Ty->isOpaque())
-    return OpaqueStructTypes.count(Ty);
-  auto I = NonOpaqueStructTypes.find(Ty);
-  if (I == NonOpaqueStructTypes.end())
-    return false;
-  return *I == Ty;
-}
-
-void Linker::init(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
-  this->Composite = M;
-  this->DiagnosticHandler = DiagnosticHandler;
-
-  TypeFinder StructTypes;
-  StructTypes.run(*M, true);
-  for (StructType *Ty : StructTypes) {
-    if (Ty->isOpaque())
-      IdentifiedStructTypes.addOpaque(Ty);
-    else
-      IdentifiedStructTypes.addNonOpaque(Ty);
-  }
-}
-
-Linker::Linker(Module *M, DiagnosticHandlerFunction DiagnosticHandler) {
-  init(M, DiagnosticHandler);
-}
-
-Linker::Linker(Module *M) {
-  init(M, [this](const DiagnosticInfo &DI) {
-    Composite->getContext().diagnose(DI);
-  });
-}
-
-Linker::~Linker() {
-}
-
-void Linker::deleteModule() {
-  delete Composite;
-  Composite = nullptr;
-}
-
-bool Linker::linkInModule(Module *Src, bool OverrideSymbols) {
-  ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src,
-                         DiagnosticHandler, OverrideSymbols);
-  bool RetCode = TheLinker.run();
-  Composite->dropTriviallyDeadConstantArrays();
-  return RetCode;
-}
-
-void Linker::setModule(Module *Dst) {
-  init(Dst, DiagnosticHandler);
+bool Linker::linkInMetadata(Module &Src,
+                            DenseMap<unsigned, MDNode *> *ValIDToTempMDMap) {
+  SetVector<GlobalValue *> ValuesToLink;
+  if (Mover.move(
+          Src, ValuesToLink.getArrayRef(),
+          [this](GlobalValue &GV, IRMover::ValueAdder Add) { assert(false); },
+          ValIDToTempMDMap, true))
+    return true;
+  return false;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1780,34 +780,58 @@ void Linker::setModule(Module *Dst) {
 /// true is returned and ErrorMsg (if not null) is set to indicate the problem.
 /// Upon failure, the Dest module could be in a modified state, and shouldn't be
 /// relied on to be consistent.
-bool Linker::LinkModules(Module *Dest, Module *Src,
-                         DiagnosticHandlerFunction DiagnosticHandler) {
-  Linker L(Dest, DiagnosticHandler);
-  return L.linkInModule(Src);
+bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
+                         unsigned Flags) {
+  Linker L(Dest);
+  return L.linkInModule(std::move(Src), Flags);
 }
 
-bool Linker::LinkModules(Module *Dest, Module *Src) {
-  Linker L(Dest);
-  return L.linkInModule(Src);
+std::unique_ptr<Module>
+llvm::renameModuleForThinLTO(std::unique_ptr<Module> M,
+                             const FunctionInfoIndex *Index) {
+  std::unique_ptr<llvm::Module> RenamedModule(
+      new llvm::Module(M->getModuleIdentifier(), M->getContext()));
+  Linker L(*RenamedModule.get());
+  if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index))
+    return nullptr;
+  return RenamedModule;
 }
 
 //===----------------------------------------------------------------------===//
 // C API.
 //===----------------------------------------------------------------------===//
 
+static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
+  auto *Message = reinterpret_cast<std::string *>(C);
+  raw_string_ostream Stream(*Message);
+  DiagnosticPrinterRawOStream DP(Stream);
+  DI.print(DP);
+}
+
 LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
                          LLVMLinkerMode Unused, char **OutMessages) {
   Module *D = unwrap(Dest);
+  LLVMContext &Ctx = D->getContext();
+
+  LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
+      Ctx.getDiagnosticHandler();
+  void *OldDiagnosticContext = Ctx.getDiagnosticContext();
   std::string Message;
-  raw_string_ostream Stream(Message);
-  DiagnosticPrinterRawOStream DP(Stream);
+  Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
 
-  LLVMBool Result = Linker::LinkModules(
-      D, unwrap(Src), [&](const DiagnosticInfo &DI) { DI.print(DP); });
+  Linker L(*D);
+  Module *M = unwrap(Src);
+  LLVMBool Result = L.linkInModuleForCAPI(*M);
 
-  if (OutMessages && Result) {
-    Stream.flush();
+  Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
+
+  if (OutMessages && Result)
     *OutMessages = strdup(Message.c_str());
-  }
   return Result;
 }
+
+LLVMBool LLVMLinkModules2(LLVMModuleRef Dest, LLVMModuleRef Src) {
+  Module *D = unwrap(Dest);
+  std::unique_ptr<Module> M(unwrap(Src));
+  return Linker::linkModules(*D, std::move(M));
+}
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 6554d6a9e60e..8c015644d8ad 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_library(LLVMMC
   MCELFObjectTargetWriter.cpp
   MCELFStreamer.cpp
   MCExpr.cpp
+  MCFragment.cpp
   MCInst.cpp
   MCInstPrinter.cpp
   MCInstrAnalysis.cpp
diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp
index f7649fba6e89..9643b7594682 100644
--- a/lib/MC/ConstantPools.cpp
+++ b/lib/MC/ConstantPools.cpp
@@ -29,17 +29,17 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
        I != E; ++I) {
     Streamer.EmitCodeAlignment(I->Size); // align naturally
     Streamer.EmitLabel(I->Label);
-    Streamer.EmitValue(I->Value, I->Size);
+    Streamer.EmitValue(I->Value, I->Size, I->Loc);
   }
   Streamer.EmitDataRegion(MCDR_DataRegionEnd);
   Entries.clear();
 }
 
 const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
-                                     unsigned Size) {
+                                     unsigned Size, SMLoc Loc) {
   MCSymbol *CPEntryLabel = Context.createTempSymbol();
 
-  Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size));
+  Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc));
   return MCSymbolRefExpr::create(CPEntryLabel, Context);
 }
 
@@ -90,8 +90,8 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
 
 const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
                                                const MCExpr *Expr,
-                                               unsigned Size) {
+                                               unsigned Size, SMLoc Loc) {
   MCSection *Section = Streamer.getCurrentSection().first;
   return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(),
-                                                   Size);
+                                                   Size, Loc);
 }
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index e925bc272dc8..e6552beefd01 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/StringSaver.h"
 #include <vector>
 using namespace llvm;
 
@@ -106,7 +107,9 @@ class ELFObjectWriter : public MCObjectWriter {
     /// @name Symbol Table Data
     /// @{
 
-    StringTableBuilder StrTabBuilder;
+    BumpPtrAllocator Alloc;
+    StringSaver VersionSymSaver{Alloc};
+    StringTableBuilder StrTabBuilder{StringTableBuilder::ELF};
 
     /// @}
 
@@ -157,9 +160,9 @@ class ELFObjectWriter : public MCObjectWriter {
 
     template <typename T> void write(T Val) {
       if (IsLittleEndian)
-        support::endian::Writer<support::little>(OS).write(Val);
+        support::endian::Writer<support::little>(getStream()).write(Val);
       else
-        support::endian::Writer<support::big>(OS).write(Val);
+        support::endian::Writer<support::big>(getStream()).write(Val);
     }
 
     void writeHeader(const MCAssembler &Asm);
@@ -232,7 +235,7 @@ class ELFObjectWriter : public MCObjectWriter {
 }
 
 void ELFObjectWriter::align(unsigned Alignment) {
-  uint64_t Padding = OffsetToAlignment(OS.tell(), Alignment);
+  uint64_t Padding = OffsetToAlignment(getStream().tell(), Alignment);
   WriteZeros(Padding);
 }
 
@@ -447,9 +450,6 @@ void ELFObjectWriter::writeSymbol(SymbolTableWriter &Writer,
                                   uint32_t StringIndex, ELFSymbolData &MSD,
                                   const MCAsmLayout &Layout) {
   const auto &Symbol = cast<MCSymbolELF>(*MSD.Symbol);
-  assert((!Symbol.getFragment() ||
-          (Symbol.getFragment()->getParent() == &Symbol.getSection())) &&
-         "The symbol's section doesn't match the fragment's symbol");
   const MCSymbolELF *Base =
       cast_or_null<MCSymbolELF>(Layout.getBaseSymbol(Symbol));
 
@@ -630,28 +630,36 @@ void ELFObjectWriter::recordRelocation(MCAssembler &Asm,
     // In general, ELF has no relocations for -B. It can only represent (A + C)
     // or (A + C - R). If B = R + K and the relocation is not pcrel, we can
     // replace B to implement it: (A - R - K + C)
-    if (IsPCRel)
-      Asm.getContext().reportFatalError(
+    if (IsPCRel) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
           "No relocation available to represent this relative expression");
+      return;
+    }
 
     const auto &SymB = cast<MCSymbolELF>(RefB->getSymbol());
 
-    if (SymB.isUndefined())
-      Asm.getContext().reportFatalError(
+    if (SymB.isUndefined()) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
           Twine("symbol '") + SymB.getName() +
               "' can not be undefined in a subtraction expression");
+      return;
+    }
 
     assert(!SymB.isAbsolute() && "Should have been folded");
     const MCSection &SecB = SymB.getSection();
-    if (&SecB != &FixupSection)
-      Asm.getContext().reportFatalError(
+    if (&SecB != &FixupSection) {
+      Asm.getContext().reportError(
           Fixup.getLoc(), "Cannot represent a difference across sections");
+      return;
+    }
 
-    if (::isWeak(SymB))
-      Asm.getContext().reportFatalError(
+    if (::isWeak(SymB)) {
+      Asm.getContext().reportError(
           Fixup.getLoc(), "Cannot represent a subtraction with a weak symbol");
+      return;
+    }
 
     uint64_t SymBOffset = Layout.getSymbolOffset(SymB);
     uint64_t K = SymBOffset - FixupOffset;
@@ -764,7 +772,7 @@ void ELFObjectWriter::computeSymbolTable(
   SymbolTableIndex = addToSectionTable(SymtabSection);
 
   align(SymtabSection->getAlignment());
-  uint64_t SecStart = OS.tell();
+  uint64_t SecStart = getStream().tell();
 
   // The first entry is the undefined symbol entry.
   Writer.writeSymbol(0, 0, 0, 0, 0, 0, false);
@@ -784,8 +792,10 @@ void ELFObjectWriter::computeSymbolTable(
                     Renames.count(&Symbol)))
       continue;
 
-    if (Symbol.isTemporary() && Symbol.isUndefined())
-      Ctx.reportFatalError(SMLoc(), "Undefined temporary");
+    if (Symbol.isTemporary() && Symbol.isUndefined()) {
+      Ctx.reportError(SMLoc(), "Undefined temporary symbol");
+      continue;
+    }
 
     ELFSymbolData MSD;
     MSD.Symbol = cast<MCSymbolELF>(&Symbol);
@@ -850,13 +860,15 @@ void ELFObjectWriter::computeSymbolTable(
         Buf += Name.substr(0, Pos);
         unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
         Buf += Name.substr(Pos + Skip);
-        Name = Buf;
+        Name = VersionSymSaver.save(Buf.c_str());
       }
     }
 
     // Sections have their own string table
-    if (Symbol.getType() != ELF::STT_SECTION)
-      MSD.Name = StrTabBuilder.add(Name);
+    if (Symbol.getType() != ELF::STT_SECTION) {
+      MSD.Name = Name;
+      StrTabBuilder.add(Name);
+    }
 
     if (Local)
       LocalSymbolData.push_back(MSD);
@@ -878,7 +890,7 @@ void ELFObjectWriter::computeSymbolTable(
   for (const std::string &Name : FileNames)
     StrTabBuilder.add(Name);
 
-  StrTabBuilder.finalize(StringTableBuilder::ELF);
+  StrTabBuilder.finalize();
 
   for (const std::string &Name : FileNames)
     Writer.writeSymbol(StrTabBuilder.getOffset(Name),
@@ -911,7 +923,7 @@ void ELFObjectWriter::computeSymbolTable(
     assert(MSD.Symbol->getBinding() != ELF::STB_LOCAL);
   }
 
-  uint64_t SecEnd = OS.tell();
+  uint64_t SecEnd = getStream().tell();
   SectionOffsets[SymtabSection] = std::make_pair(SecStart, SecEnd);
 
   ArrayRef<uint32_t> ShndxIndexes = Writer.getShndxIndexes();
@@ -921,12 +933,12 @@ void ELFObjectWriter::computeSymbolTable(
   }
   assert(SymtabShndxSectionIndex != 0);
 
-  SecStart = OS.tell();
+  SecStart = getStream().tell();
   const MCSectionELF *SymtabShndxSection =
       SectionTable[SymtabShndxSectionIndex - 1];
   for (uint32_t Index : ShndxIndexes)
     write(Index);
-  SecEnd = OS.tell();
+  SecEnd = getStream().tell();
   SectionOffsets[SymtabShndxSection] = std::make_pair(SecStart, SecEnd);
 }
 
@@ -957,31 +969,6 @@ ELFObjectWriter::createRelocationSection(MCContext &Ctx,
   return RelaSection;
 }
 
-static SmallVector<char, 128>
-getUncompressedData(const MCAsmLayout &Layout,
-                    const MCSection::FragmentListType &Fragments) {
-  SmallVector<char, 128> UncompressedData;
-  for (const MCFragment &F : Fragments) {
-    const SmallVectorImpl<char> *Contents;
-    switch (F.getKind()) {
-    case MCFragment::FT_Data:
-      Contents = &cast<MCDataFragment>(F).getContents();
-      break;
-    case MCFragment::FT_Dwarf:
-      Contents = &cast<MCDwarfLineAddrFragment>(F).getContents();
-      break;
-    case MCFragment::FT_DwarfFrame:
-      Contents = &cast<MCDwarfCallFrameFragment>(F).getContents();
-      break;
-    default:
-      llvm_unreachable(
-          "Not expecting any other fragment types in a debug_* section");
-    }
-    UncompressedData.append(Contents->begin(), Contents->end());
-  }
-  return UncompressedData;
-}
-
 // Include the debug info compression header:
 // "ZLIB" followed by 8 bytes representing the uncompressed size of the section,
 // useful for consumers to preallocate a buffer to decompress into.
@@ -1016,27 +1003,29 @@ void ELFObjectWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     return;
   }
 
-  // Gather the uncompressed data from all the fragments.
-  const MCSection::FragmentListType &Fragments = Section.getFragmentList();
-  SmallVector<char, 128> UncompressedData =
-      getUncompressedData(Layout, Fragments);
+  SmallVector<char, 128> UncompressedData;
+  raw_svector_ostream VecOS(UncompressedData);
+  raw_pwrite_stream &OldStream = getStream();
+  setStream(VecOS);
+  Asm.writeSectionData(&Section, Layout);
+  setStream(OldStream);
 
   SmallVector<char, 128> CompressedContents;
   zlib::Status Success = zlib::compress(
       StringRef(UncompressedData.data(), UncompressedData.size()),
       CompressedContents);
   if (Success != zlib::StatusOK) {
-    Asm.writeSectionData(&Section, Layout);
+    getStream() << UncompressedData;
     return;
   }
 
   if (!prependCompressionHeader(UncompressedData.size(), CompressedContents)) {
-    Asm.writeSectionData(&Section, Layout);
+    getStream() << UncompressedData;
     return;
   }
   Asm.getContext().renameELFSection(&Section,
                                     (".z" + SectionName.drop_front(1)).str());
-  OS << CompressedContents;
+  getStream() << CompressedContents;
 }
 
 void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
@@ -1061,8 +1050,13 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
                                        const MCSectionELF &Sec) {
   std::vector<ELFRelocationEntry> &Relocs = Relocations[&Sec];
 
-  // Sort the relocation entries. Most targets just sort by Offset, but some
-  // (e.g., MIPS) have additional constraints.
+  // We record relocations by pushing to the end of a vector. Reverse the vector
+  // to get the relocations in the order they were created.
+  // In most cases that is not important, but it can be for special sections
+  // (.eh_frame) or specific relocations (TLS optimizations on SystemZ).
+  std::reverse(Relocs.begin(), Relocs.end());
+
+  // Sort the relocation entries. MIPS needs this.
   TargetObjectWriter->sortRelocs(Asm, Relocs);
 
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
@@ -1100,7 +1094,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
 
 const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) {
   const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1];
-  OS << StrTabBuilder.data();
+  getStream() << StrTabBuilder.data();
   return StrtabSection;
 }
 
@@ -1209,12 +1203,12 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
     align(Section.getAlignment());
 
     // Remember the offset into the file for this section.
-    uint64_t SecStart = OS.tell();
+    uint64_t SecStart = getStream().tell();
 
     const MCSymbolELF *SignatureSymbol = Section.getGroup();
     writeSectionData(Asm, Section, Layout);
 
-    uint64_t SecEnd = OS.tell();
+    uint64_t SecEnd = getStream().tell();
     SectionOffsets[&Section] = std::make_pair(SecStart, SecEnd);
 
     MCSectionELF *RelSection = createRelocationSection(Ctx, Section);
@@ -1246,7 +1240,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
     align(Group->getAlignment());
 
     // Remember the offset into the file for this section.
-    uint64_t SecStart = OS.tell();
+    uint64_t SecStart = getStream().tell();
 
     const MCSymbol *SignatureSymbol = Group->getGroup();
     assert(SignatureSymbol);
@@ -1256,7 +1250,7 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
       write(SecIndex);
     }
 
-    uint64_t SecEnd = OS.tell();
+    uint64_t SecEnd = getStream().tell();
     SectionOffsets[Group] = std::make_pair(SecStart, SecEnd);
   }
 
@@ -1267,25 +1261,25 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
     align(RelSection->getAlignment());
 
     // Remember the offset into the file for this section.
-    uint64_t SecStart = OS.tell();
+    uint64_t SecStart = getStream().tell();
 
     writeRelocations(Asm, *RelSection->getAssociatedSection());
 
-    uint64_t SecEnd = OS.tell();
+    uint64_t SecEnd = getStream().tell();
     SectionOffsets[RelSection] = std::make_pair(SecStart, SecEnd);
   }
 
   {
-    uint64_t SecStart = OS.tell();
+    uint64_t SecStart = getStream().tell();
     const MCSectionELF *Sec = createStringTable(Ctx);
-    uint64_t SecEnd = OS.tell();
+    uint64_t SecEnd = getStream().tell();
     SectionOffsets[Sec] = std::make_pair(SecStart, SecEnd);
   }
 
   uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
   align(NaturalAlignment);
 
-  const unsigned SectionHeaderOffset = OS.tell();
+  const unsigned SectionHeaderOffset = getStream().tell();
 
   // ... then the section header table ...
   writeSectionHeader(Layout, SectionIndexMap, SectionOffsets);
@@ -1301,19 +1295,19 @@ void ELFObjectWriter::writeObject(MCAssembler &Asm,
     uint64_t Val = SectionHeaderOffset;
     if (sys::IsLittleEndianHost != IsLittleEndian)
       sys::swapByteOrder(Val);
-    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
-              offsetof(ELF::Elf64_Ehdr, e_shoff));
+    getStream().pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+                       offsetof(ELF::Elf64_Ehdr, e_shoff));
     NumSectionsOffset = offsetof(ELF::Elf64_Ehdr, e_shnum);
   } else {
     uint32_t Val = SectionHeaderOffset;
     if (sys::IsLittleEndianHost != IsLittleEndian)
       sys::swapByteOrder(Val);
-    OS.pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
-              offsetof(ELF::Elf32_Ehdr, e_shoff));
+    getStream().pwrite(reinterpret_cast<char *>(&Val), sizeof(Val),
+                       offsetof(ELF::Elf32_Ehdr, e_shoff));
     NumSectionsOffset = offsetof(ELF::Elf32_Ehdr, e_shnum);
   }
-  OS.pwrite(reinterpret_cast<char *>(&NumSections), sizeof(NumSections),
-            NumSectionsOffset);
+  getStream().pwrite(reinterpret_cast<char *>(&NumSections),
+                     sizeof(NumSections), NumSectionsOffset);
 }
 
 bool ELFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
diff --git a/lib/MC/MCAsmBackend.cpp b/lib/MC/MCAsmBackend.cpp
index 36c65b7bcd49..fcf139b72537 100644
--- a/lib/MC/MCAsmBackend.cpp
+++ b/lib/MC/MCAsmBackend.cpp
@@ -16,6 +16,10 @@ MCAsmBackend::MCAsmBackend() : HasDataInCodeSupport(false) {}
 
 MCAsmBackend::~MCAsmBackend() {}
 
+bool MCAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const {
+  return false;
+}
+
 const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   static const MCFixupKindInfo Builtins[] = {
       {"FK_Data_1", 0, 8, 0},
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 100dc7c3dc60..36e10b3c6a07 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -157,3 +157,9 @@ bool MCAsmInfo::isValidUnquotedName(StringRef Name) const {
 
   return true;
 }
+
+bool MCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  return SectionName == ".text" || SectionName == ".data" ||
+        (SectionName == ".bss" && !usesELFSectionDirectiveForBSS());
+}
diff --git a/lib/MC/MCAsmInfoCOFF.cpp b/lib/MC/MCAsmInfoCOFF.cpp
index 97fc76a9adb1..5b9dd2009f8b 100644
--- a/lib/MC/MCAsmInfoCOFF.cpp
+++ b/lib/MC/MCAsmInfoCOFF.cpp
@@ -37,8 +37,7 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() {
 
   UseIntegratedAssembler = true;
 
-  // FIXME: For now keep the previous behavior, AShr. Need to double-check
-  // other COFF-targeting assemblers and change this if necessary.
+  // At least MSVC inline-asm does AShr.
   UseLogicalShr = false;
 }
 
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index bb90ff2c350a..ae9486d3db4d 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -93,9 +93,4 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
 
   UseIntegratedAssembler = true;
   SetDirectiveSuppressesReloc = true;
-
-  // FIXME: For now keep the previous behavior, AShr, matching the previous
-  // behavior of as(1) (both -q and -Q: resp. LLVM and gas v1.38).
-  // If/when this changes, the AArch64 Darwin special case can go away.
-  UseLogicalShr = false;
 }
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 227c937e8d1b..c99ce7752b30 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -1,4 +1,4 @@
-//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -29,9 +29,11 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include <cctype>
+
 using namespace llvm;
 
 namespace {
@@ -78,6 +80,9 @@ public:
     }
     EmitCommentsAndEOL();
   }
+
+  void EmitSyntaxDirective() override;
+
   void EmitCommentsAndEOL();
 
   /// isVerboseAsm - Return true if this streamer supports verbose assembly at
@@ -160,7 +165,7 @@ public:
   void EmitBytes(StringRef Data) override;
 
   void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc = SMLoc()) override;
+                     SMLoc Loc = SMLoc()) override;
   void EmitIntValue(uint64_t Value, unsigned Size) override;
 
   void EmitULEB128Value(const MCExpr *Value) override;
@@ -181,7 +186,7 @@ public:
   void EmitCodeAlignment(unsigned ByteAlignment,
                          unsigned MaxBytesToEmit = 0) override;
 
-  bool EmitValueToOffset(const MCExpr *Offset,
+  void emitValueToOffset(const MCExpr *Offset,
                          unsigned char Value = 0) override;
 
   void EmitFileDirective(StringRef Filename) override;
@@ -207,6 +212,8 @@ public:
   void EmitCFISameValue(int64_t Register) override;
   void EmitCFIRelOffset(int64_t Register, int64_t Offset) override;
   void EmitCFIAdjustCfaOffset(int64_t Adjustment) override;
+  void EmitCFIEscape(StringRef Values) override;
+  void EmitCFIGnuArgsSize(int64_t Size) override;
   void EmitCFISignalFrame() override;
   void EmitCFIUndefined(int64_t Register) override;
   void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
@@ -233,6 +240,9 @@ public:
   void EmitBundleLock(bool AlignToEnd) override;
   void EmitBundleUnlock() override;
 
+  bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+                          const MCExpr *Expr, SMLoc Loc) override;
+
   /// EmitRawText - If this file is backed by an assembly streamer, this dumps
   /// the specified string in the output .s file.  This capability is
   /// indicated by the hasRawTextSupport() predicate.
@@ -250,15 +260,9 @@ public:
 void MCAsmStreamer::AddComment(const Twine &T) {
   if (!IsVerboseAsm) return;
 
-  // Make sure that CommentStream is flushed.
-  CommentStream.flush();
-
   T.toVector(CommentToEmit);
   // Each comment goes on its own line.
   CommentToEmit.push_back('\n');
-
-  // Tell the comment stream that the vector changed underneath it.
-  CommentStream.resync();
 }
 
 void MCAsmStreamer::EmitCommentsAndEOL() {
@@ -267,7 +271,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
     return;
   }
 
-  CommentStream.flush();
   StringRef Comments = CommentToEmit;
 
   assert(Comments.back() == '\n' &&
@@ -282,8 +285,6 @@ void MCAsmStreamer::EmitCommentsAndEOL() {
   } while (!Comments.empty());
 
   CommentToEmit.clear();
-  // Tell the comment stream that the vector changed underneath it.
-  CommentStream.resync();
 }
 
 static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
@@ -372,6 +373,8 @@ void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) {
 void MCAsmStreamer::EmitVersionMin(MCVersionMinType Kind, unsigned Major,
                                    unsigned Minor, unsigned Update) {
   switch (Kind) {
+  case MCVM_WatchOSVersionMin:    OS << "\t.watchos_version_min"; break;
+  case MCVM_TvOSVersionMin:       OS << "\t.tvos_version_min"; break;
   case MCVM_IOSVersionMin:        OS << "\t.ios_version_min"; break;
   case MCVM_OSXVersionMin:        OS << "\t.macosx_version_min"; break;
   }
@@ -480,6 +483,14 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitSyntaxDirective() {
+  if (MAI->getAssemblerDialect() == 1)
+    OS << "\t.intel_syntax noprefix\n";
+  // FIXME: Currently emit unprefix'ed registers.
+  // The intel_syntax directive has one optional argument 
+  // with may have a value of prefix or noprefix.
+}
+
 void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
   OS << "\t.def\t ";
   Symbol->print(OS, MAI);
@@ -531,9 +542,6 @@ void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
 
 void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                      unsigned ByteAlignment) {
-  // Common symbols do not belong to any actual section.
-  AssignSection(Symbol, nullptr);
-
   OS << "\t.comm\t";
   Symbol->print(OS, MAI);
   OS << ',' << Size;
@@ -553,9 +561,6 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 /// @param Size - The size of the common symbol.
 void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                           unsigned ByteAlign) {
-  // Common symbols do not belong to any actual section.
-  AssignSection(Symbol, nullptr);
-
   OS << "\t.lcomm\t";
   Symbol->print(OS, MAI);
   OS << ',' << Size;
@@ -579,7 +584,7 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
                                  uint64_t Size, unsigned ByteAlignment) {
   if (Symbol)
-    AssignSection(Symbol, Section);
+    AssignFragment(Symbol, &Section->getDummyFragment());
 
   // Note: a .zerofill directive does not switch sections.
   OS << ".zerofill ";
@@ -603,7 +608,7 @@ void MCAsmStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
 // e.g. _a.
 void MCAsmStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
                                    uint64_t Size, unsigned ByteAlignment) {
-  AssignSection(Symbol, Section);
+  AssignFragment(Symbol, &Section->getDummyFragment());
 
   assert(Symbol && "Symbol shouldn't be NULL!");
   // Instead of using the Section we'll just use the shortcut.
@@ -654,7 +659,6 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
   OS << '"';
 }
 
-
 void MCAsmStreamer::EmitBytes(StringRef Data) {
   assert(getCurrentSection().first &&
          "Cannot emit contents before setting section!");
@@ -685,7 +689,7 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
 }
 
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                  const SMLoc &Loc) {
+                                  SMLoc Loc) {
   assert(Size <= 8 && "Invalid size");
   assert(getCurrentSection().first &&
          "Cannot emit contents before setting section!");
@@ -776,7 +780,6 @@ void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
   EmitEOL();
 }
 
-
 /// EmitFill - Emit NumBytes bytes worth of the value specified by
 /// FillValue.  This implements directives such as '.space'.
 void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
@@ -856,17 +859,15 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                        1, MaxBytesToEmit);
 }
 
-bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset,
                                       unsigned char Value) {
   // FIXME: Verify that Offset is associated with the current section.
   OS << ".org ";
   Offset->print(OS, MAI);
   OS << ", " << (unsigned)Value;
   EmitEOL();
-  return false;
 }
 
-
 void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
   assert(MAI->hasSingleParameterDotFile());
   OS << "\t.file\t";
@@ -1014,6 +1015,32 @@ void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
   EmitEOL();
 }
 
+static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) {
+  OS << "\t.cfi_escape ";
+  if (!Values.empty()) {
+    size_t e = Values.size() - 1;
+    for (size_t i = 0; i < e; ++i)
+      OS << format("0x%02x", uint8_t(Values[i])) << ", ";
+    OS << format("0x%02x", uint8_t(Values[e]));
+  }
+}
+
+void MCAsmStreamer::EmitCFIEscape(StringRef Values) {
+  MCStreamer::EmitCFIEscape(Values);
+  PrintCFIEscape(OS, Values);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+  MCStreamer::EmitCFIGnuArgsSize(Size);
+  
+  uint8_t Buffer[16] = { dwarf::DW_CFA_GNU_args_size };
+  unsigned Len = encodeULEB128(Size, Buffer + 1) + 1;
+  
+  PrintCFIEscape(OS, StringRef((const char *)&Buffer[0], Len));
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
   MCStreamer::EmitCFIDefCfaRegister(Register);
   OS << "\t.cfi_def_cfa_register ";
@@ -1203,7 +1230,7 @@ void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) {
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitWinCFIEndProlog(void) {
+void MCAsmStreamer::EmitWinCFIEndProlog() {
   MCStreamer::EmitWinCFIEndProlog();
 
   OS << "\t.seh_endprologue";
@@ -1217,7 +1244,6 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
   SmallVector<MCFixup, 4> Fixups;
   raw_svector_ostream VecOS(Code);
   Emitter->encodeInstruction(Inst, VecOS, Fixups, STI);
-  VecOS.flush();
 
   // If we are showing fixups, create symbolic markers in the encoded
   // representation. We do this by making a per-bit map to the fixup item index,
@@ -1334,6 +1360,19 @@ void MCAsmStreamer::EmitBundleUnlock() {
   EmitEOL();
 }
 
+bool MCAsmStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                       const MCExpr *Expr, SMLoc) {
+  OS << "\t.reloc ";
+  Offset.print(OS, MAI);
+  OS << ", " << Name;
+  if (Expr) {
+    OS << ", ";
+    Expr->print(OS, MAI);
+  }
+  EmitEOL();
+  return false;
+}
+
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
 /// indicated by the hasRawTextSupport() predicate.
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index f53b589e1aea..15e82fa49388 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -64,272 +64,11 @@ STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
 
 /* *** */
 
-MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
-  : Assembler(Asm), LastValidFragment()
- {
-  // Compute the section layout order. Virtual sections must go last.
-  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (!it->isVirtualSection())
-      SectionOrder.push_back(&*it);
-  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
-    if (it->isVirtualSection())
-      SectionOrder.push_back(&*it);
-}
-
-bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
-  const MCSection *Sec = F->getParent();
-  const MCFragment *LastValid = LastValidFragment.lookup(Sec);
-  if (!LastValid)
-    return false;
-  assert(LastValid->getParent() == Sec);
-  return F->getLayoutOrder() <= LastValid->getLayoutOrder();
-}
-
-void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
-  // If this fragment wasn't already valid, we don't need to do anything.
-  if (!isFragmentValid(F))
-    return;
-
-  // Otherwise, reset the last valid fragment to the previous fragment
-  // (if this is the first fragment, it will be NULL).
-  LastValidFragment[F->getParent()] = F->getPrevNode();
-}
-
-void MCAsmLayout::ensureValid(const MCFragment *F) const {
-  MCSection *Sec = F->getParent();
-  MCFragment *Cur = LastValidFragment[Sec];
-  if (!Cur)
-    Cur = Sec->begin();
-  else
-    Cur = Cur->getNextNode();
-
-  // Advance the layout position until the fragment is valid.
-  while (!isFragmentValid(F)) {
-    assert(Cur && "Layout bookkeeping error");
-    const_cast<MCAsmLayout*>(this)->layoutFragment(Cur);
-    Cur = Cur->getNextNode();
-  }
-}
-
-uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
-  ensureValid(F);
-  assert(F->Offset != ~UINT64_C(0) && "Address not set!");
-  return F->Offset;
-}
-
-// Simple getSymbolOffset helper for the non-varibale case.
-static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
-                           bool ReportError, uint64_t &Val) {
-  if (!S.getFragment()) {
-    if (ReportError)
-      report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                         S.getName() + "'");
-    return false;
-  }
-  Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset();
-  return true;
-}
-
-static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
-                                bool ReportError, uint64_t &Val) {
-  if (!S.isVariable())
-    return getLabelOffset(Layout, S, ReportError, Val);
-
-  // If SD is a variable, evaluate it.
-  MCValue Target;
-  if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
-    report_fatal_error("unable to evaluate offset for variable '" +
-                       S.getName() + "'");
-
-  uint64_t Offset = Target.getConstant();
-
-  const MCSymbolRefExpr *A = Target.getSymA();
-  if (A) {
-    uint64_t ValA;
-    if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
-      return false;
-    Offset += ValA;
-  }
-
-  const MCSymbolRefExpr *B = Target.getSymB();
-  if (B) {
-    uint64_t ValB;
-    if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
-      return false;
-    Offset -= ValB;
-  }
-
-  Val = Offset;
-  return true;
-}
-
-bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const {
-  return getSymbolOffsetImpl(*this, S, false, Val);
-}
-
-uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const {
-  uint64_t Val;
-  getSymbolOffsetImpl(*this, S, true, Val);
-  return Val;
-}
-
-const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
-  if (!Symbol.isVariable())
-    return &Symbol;
-
-  const MCExpr *Expr = Symbol.getVariableValue();
-  MCValue Value;
-  if (!Expr->evaluateAsValue(Value, *this))
-    llvm_unreachable("Invalid Expression");
-
-  const MCSymbolRefExpr *RefB = Value.getSymB();
-  if (RefB)
-    Assembler.getContext().reportFatalError(
-        SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
-                     "' could not be evaluated in a subtraction expression");
-
-  const MCSymbolRefExpr *A = Value.getSymA();
-  if (!A)
-    return nullptr;
-
-  const MCSymbol &ASym = A->getSymbol();
-  const MCAssembler &Asm = getAssembler();
-  if (ASym.isCommon()) {
-    // FIXME: we should probably add a SMLoc to MCExpr.
-    Asm.getContext().reportFatalError(SMLoc(),
-                                "Common symbol " + ASym.getName() +
-                                    " cannot be used in assignment expr");
-  }
-
-  return &ASym;
-}
-
-uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const {
-  // The size is the last fragment's end offset.
-  const MCFragment &F = Sec->getFragmentList().back();
-  return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
-}
-
-uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const {
-  // Virtual sections have no file size.
-  if (Sec->isVirtualSection())
-    return 0;
-
-  // Otherwise, the file size is the same as the address space size.
-  return getSectionAddressSize(Sec);
-}
-
-uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
-                                    const MCFragment *F,
-                                    uint64_t FOffset, uint64_t FSize) {
-  uint64_t BundleSize = Assembler.getBundleAlignSize();
-  assert(BundleSize > 0 &&
-         "computeBundlePadding should only be called if bundling is enabled");
-  uint64_t BundleMask = BundleSize - 1;
-  uint64_t OffsetInBundle = FOffset & BundleMask;
-  uint64_t EndOfFragment = OffsetInBundle + FSize;
-
-  // There are two kinds of bundling restrictions:
-  //
-  // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
-  //    *end* on a bundle boundary.
-  // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
-  //    would, add padding until the end of the bundle so that the fragment
-  //    will start in a new one.
-  if (F->alignToBundleEnd()) {
-    // Three possibilities here:
-    //
-    // A) The fragment just happens to end at a bundle boundary, so we're good.
-    // B) The fragment ends before the current bundle boundary: pad it just
-    //    enough to reach the boundary.
-    // C) The fragment ends after the current bundle boundary: pad it until it
-    //    reaches the end of the next bundle boundary.
-    //
-    // Note: this code could be made shorter with some modulo trickery, but it's
-    // intentionally kept in its more explicit form for simplicity.
-    if (EndOfFragment == BundleSize)
-      return 0;
-    else if (EndOfFragment < BundleSize)
-      return BundleSize - EndOfFragment;
-    else { // EndOfFragment > BundleSize
-      return 2 * BundleSize - EndOfFragment;
-    }
-  } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
-    return BundleSize - OffsetInBundle;
-  else
-    return 0;
-}
-
-/* *** */
-
-void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
-  V->destroy();
-}
-
-MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
-                           AlignToBundleEnd(false), BundlePadding(0) {
-}
-
-MCFragment::~MCFragment() { }
-
-MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
-                       uint8_t BundlePadding, MCSection *Parent)
-    : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false),
-      BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr),
-      Offset(~UINT64_C(0)) {
-  if (Parent)
-    Parent->getFragmentList().push_back(this);
-}
-
-void MCFragment::destroy() {
-  // First check if we are the sentinal.
-  if (Kind == FragmentType(~0)) {
-    delete this;
-    return;
-  }
-
-  switch (Kind) {
-    case FT_Align:
-      delete cast<MCAlignFragment>(this);
-      return;
-    case FT_Data:
-      delete cast<MCDataFragment>(this);
-      return;
-    case FT_CompactEncodedInst:
-      delete cast<MCCompactEncodedInstFragment>(this);
-      return;
-    case FT_Fill:
-      delete cast<MCFillFragment>(this);
-      return;
-    case FT_Relaxable:
-      delete cast<MCRelaxableFragment>(this);
-      return;
-    case FT_Org:
-      delete cast<MCOrgFragment>(this);
-      return;
-    case FT_Dwarf:
-      delete cast<MCDwarfLineAddrFragment>(this);
-      return;
-    case FT_DwarfFrame:
-      delete cast<MCDwarfCallFrameFragment>(this);
-      return;
-    case FT_LEB:
-      delete cast<MCLEBFragment>(this);
-      return;
-    case FT_SafeSEH:
-      delete cast<MCSafeSEHFragment>(this);
-      return;
-  }
-}
-
-/* *** */
-
 MCAssembler::MCAssembler(MCContext &Context_, MCAsmBackend &Backend_,
-                         MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
-                         raw_ostream &OS_)
+                         MCCodeEmitter &Emitter_, MCObjectWriter &Writer_)
     : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
-      OS(OS_), BundleAlignSize(0), RelaxAll(false),
-      SubsectionsViaSymbols(false), ELFHeaderEFlags(0) {
+      BundleAlignSize(0), RelaxAll(false), SubsectionsViaSymbols(false),
+      IncrementalLinkerCompatible(false), ELFHeaderEFlags(0) {
   VersionMinInfo.Major = 0; // Major version == 0 for "none specified"
 }
 
@@ -347,6 +86,7 @@ void MCAssembler::reset() {
   BundleAlignSize = 0;
   RelaxAll = false;
   SubsectionsViaSymbols = false;
+  IncrementalLinkerCompatible = false;
   ELFHeaderEFlags = 0;
   LOHContainer.reset();
   VersionMinInfo.Major = 0;
@@ -358,6 +98,14 @@ void MCAssembler::reset() {
   getLOHContainer().reset();
 }
 
+bool MCAssembler::registerSection(MCSection &Section) {
+  if (Section.isRegistered())
+    return false;
+  Sections.push_back(&Section);
+  Section.setIsRegistered(true);
+  return true;
+}
+
 bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
   if (ThumbFuncs.count(Symbol))
     return true;
@@ -404,7 +152,7 @@ const MCSymbol *MCAssembler::getAtom(const MCSymbol &S) const {
     return &S;
 
   // Absolute and undefined symbols have no defining atom.
-  if (!S.getFragment())
+  if (!S.isInSection())
     return nullptr;
 
   // Non-linker visible symbols in sections which can't be atomized have no
@@ -426,8 +174,13 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
   // probably merge the two into a single callback that tries to evaluate a
   // fixup and records a relocation if one is needed.
   const MCExpr *Expr = Fixup.getValue();
-  if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup))
-    getContext().reportFatalError(Fixup.getLoc(), "expected relocatable expression");
+  if (!Expr->evaluateAsRelocatable(Target, &Layout, &Fixup)) {
+    getContext().reportError(Fixup.getLoc(), "expected relocatable expression");
+    // Claim to have completely evaluated the fixup, to prevent any further
+    // processing from being done.
+    Value = 0;
+    return true;
+  }
 
   bool IsPCRel = Backend.getFixupKindInfo(
     Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
@@ -523,12 +276,19 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
 
   case MCFragment::FT_Org: {
     const MCOrgFragment &OF = cast<MCOrgFragment>(F);
-    int64_t TargetLocation;
-    if (!OF.getOffset().evaluateAsAbsolute(TargetLocation, Layout))
+    MCValue Value;
+    if (!OF.getOffset().evaluateAsValue(Value, Layout))
       report_fatal_error("expected assembly-time absolute expression");
 
     // FIXME: We need a way to communicate this error.
     uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+    int64_t TargetLocation = Value.getConstant();
+    if (const MCSymbolRefExpr *A = Value.getSymA()) {
+      uint64_t Val;
+      if (!Layout.getSymbolOffset(A->getSymbol(), Val))
+        report_fatal_error("expected absolute expression");
+      TargetLocation += Val;
+    }
     int64_t Size = TargetLocation - FragmentOffset;
     if (Size < 0 || Size >= 0x40000000)
       report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
@@ -540,6 +300,8 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
     return cast<MCDwarfLineAddrFragment>(F).getContents().size();
   case MCFragment::FT_DwarfFrame:
     return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+  case MCFragment::FT_Dummy:
+    llvm_unreachable("Should not have been added");
   }
 
   llvm_unreachable("invalid fragment kind");
@@ -773,6 +535,8 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
     OW->writeBytes(CF.getContents());
     break;
   }
+  case MCFragment::FT_Dummy:
+    llvm_unreachable("Should not have been added");
   }
 
   assert(OW->getStream().tell() - Start == FragmentSize &&
@@ -786,15 +550,14 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
     assert(Layout.getSectionFileSize(Sec) == 0 && "Invalid size for section!");
 
     // Check that contents are only things legal inside a virtual section.
-    for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
-         ++it) {
-      switch (it->getKind()) {
+    for (const MCFragment &F : *Sec) {
+      switch (F.getKind()) {
       default: llvm_unreachable("Invalid fragment in virtual section!");
       case MCFragment::FT_Data: {
         // Check that we aren't trying to write a non-zero contents (or fixups)
         // into a virtual section. This is to support clients which use standard
         // directives to fill the contents of virtual sections.
-        const MCDataFragment &DF = cast<MCDataFragment>(*it);
+        const MCDataFragment &DF = cast<MCDataFragment>(F);
         assert(DF.fixup_begin() == DF.fixup_end() &&
                "Cannot have fixups in virtual section!");
         for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
@@ -810,13 +573,13 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
       case MCFragment::FT_Align:
         // Check that we aren't trying to write a non-zero value into a virtual
         // section.
-        assert((cast<MCAlignFragment>(it)->getValueSize() == 0 ||
-                cast<MCAlignFragment>(it)->getValue() == 0) &&
+        assert((cast<MCAlignFragment>(F).getValueSize() == 0 ||
+                cast<MCAlignFragment>(F).getValue() == 0) &&
                "Invalid align in virtual section!");
         break;
       case MCFragment::FT_Fill:
-        assert((cast<MCFillFragment>(it)->getValueSize() == 0 ||
-                cast<MCFillFragment>(it)->getValue() == 0) &&
+        assert((cast<MCFillFragment>(F).getValueSize() == 0 ||
+                cast<MCFillFragment>(F).getValue() == 0) &&
                "Invalid fill in virtual section!");
         break;
       }
@@ -828,9 +591,8 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
   uint64_t Start = getWriter().getStream().tell();
   (void)Start;
 
-  for (MCSection::const_iterator it = Sec->begin(), ie = Sec->end(); it != ie;
-       ++it)
-    writeFragment(*this, Layout, *it);
+  for (const MCFragment &F : *Sec)
+    writeFragment(*this, Layout, F);
 
   assert(getWriter().getStream().tell() - Start ==
          Layout.getSectionAddressSize(Sec));
@@ -854,23 +616,20 @@ std::pair<uint64_t, bool> MCAssembler::handleFixup(const MCAsmLayout &Layout,
   return std::make_pair(FixedValue, IsPCRel);
 }
 
-void MCAssembler::Finish() {
+void MCAssembler::layout(MCAsmLayout &Layout) {
   DEBUG_WITH_TYPE("mc-dump", {
       llvm::errs() << "assembler backend - pre-layout\n--\n";
       dump(); });
 
-  // Create the layout object.
-  MCAsmLayout Layout(*this);
-
   // Create dummy fragments and assign section ordinals.
   unsigned SectionIndex = 0;
-  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+  for (MCSection &Sec : *this) {
     // Create dummy fragments to eliminate any empty sections, this simplifies
     // layout.
-    if (it->getFragmentList().empty())
-      new MCDataFragment(&*it);
+    if (Sec.getFragmentList().empty())
+      new MCDataFragment(&Sec);
 
-    it->setOrdinal(SectionIndex++);
+    Sec.setOrdinal(SectionIndex++);
   }
 
   // Assign layout order indices to sections and fragments.
@@ -879,9 +638,8 @@ void MCAssembler::Finish() {
     Sec->setLayoutOrder(i);
 
     unsigned FragmentIndex = 0;
-    for (MCSection::iterator iFrag = Sec->begin(), iFragEnd = Sec->end();
-         iFrag != iFragEnd; ++iFrag)
-      iFrag->setLayoutOrder(FragmentIndex++);
+    for (MCFragment &Frag : *Sec)
+      Frag.setLayoutOrder(FragmentIndex++);
   }
 
   // Layout until everything fits.
@@ -899,17 +657,14 @@ void MCAssembler::Finish() {
       llvm::errs() << "assembler backend - final-layout\n--\n";
       dump(); });
 
-  uint64_t StartOffset = OS.tell();
-
   // Allow the object writer a chance to perform post-layout binding (for
   // example, to set the index fields in the symbol data).
   getWriter().executePostLayoutBinding(*this, Layout);
 
   // Evaluate and apply the fixups, generating relocation entries as necessary.
-  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
-    for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
-         ++it2) {
-      MCEncodedFragment *F = dyn_cast<MCEncodedFragment>(it2);
+  for (MCSection &Sec : *this) {
+    for (MCFragment &Frag : Sec) {
+      MCEncodedFragment *F = dyn_cast<MCEncodedFragment>(&Frag);
       // Data and relaxable fragments both have fixups.  So only process
       // those here.
       // FIXME: Is there a better way to do this?  MCEncodedFragmentWithFixups
@@ -935,6 +690,15 @@ void MCAssembler::Finish() {
       }
     }
   }
+}
+
+void MCAssembler::Finish() {
+  // Create the layout object.
+  MCAsmLayout Layout(*this);
+  layout(Layout);
+
+  raw_ostream &OS = getWriter().getStream();
+  uint64_t StartOffset = OS.tell();
 
   // Write the object file.
   getWriter().writeObject(*this, Layout);
@@ -960,9 +724,8 @@ bool MCAssembler::fragmentNeedsRelaxation(const MCRelaxableFragment *F,
   if (!getBackend().mayNeedRelaxation(F->getInst()))
     return false;
 
-  for (MCRelaxableFragment::const_fixup_iterator it = F->fixup_begin(),
-       ie = F->fixup_end(); it != ie; ++it)
-    if (fixupNeedsRelaxation(*it, F, Layout))
+  for (const MCFixup &Fixup : F->getFixups())
+    if (fixupNeedsRelaxation(Fixup, F, Layout))
       return true;
 
   return false;
@@ -991,7 +754,6 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
   getEmitter().encodeInstruction(Relaxed, VecOS, Fixups, F.getSubtargetInfo());
-  VecOS.flush();
 
   // Update the fragment.
   F.setInst(Relaxed);
@@ -1014,7 +776,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
     encodeSLEB128(Value, OSE);
   else
     encodeULEB128(Value, OSE);
-  OSE.flush();
   return OldSize != LF.getContents().size();
 }
 
@@ -1031,8 +792,8 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
   SmallString<8> &Data = DF.getContents();
   Data.clear();
   raw_svector_ostream OSE(Data);
-  MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OSE);
-  OSE.flush();
+  MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
+                          AddrDelta, OSE);
   return OldSize != Data.size();
 }
 
@@ -1048,7 +809,6 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
   Data.clear();
   raw_svector_ostream OSE(Data);
   MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE);
-  OSE.flush();
   return OldSize != Data.size();
 }
 
@@ -1085,7 +845,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSection &Sec) {
       break;
     }
     if (RelaxedFrag && !FirstRelaxedFragment)
-      FirstRelaxedFragment = I;
+      FirstRelaxedFragment = &*I;
   }
   if (FirstRelaxedFragment) {
     Layout.invalidateFragmentsFrom(FirstRelaxedFragment);
@@ -1113,158 +873,3 @@ void MCAssembler::finishLayout(MCAsmLayout &Layout) {
     Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
   }
 }
-
-// Debugging methods
-
-namespace llvm {
-
-raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
-  OS << "<MCFixup" << " Offset:" << AF.getOffset()
-     << " Value:" << *AF.getValue()
-     << " Kind:" << AF.getKind() << ">";
-  return OS;
-}
-
-}
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void MCFragment::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<";
-  switch (getKind()) {
-  case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
-  case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
-  case MCFragment::FT_CompactEncodedInst:
-    OS << "MCCompactEncodedInstFragment"; break;
-  case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
-  case MCFragment::FT_Relaxable:  OS << "MCRelaxableFragment"; break;
-  case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
-  case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
-  case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
-  case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
-  case MCFragment::FT_SafeSEH:    OS << "MCSafeSEHFragment"; break;
-  }
-
-  OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
-     << " Offset:" << Offset
-     << " HasInstructions:" << hasInstructions() 
-     << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
-
-  switch (getKind()) {
-  case MCFragment::FT_Align: {
-    const MCAlignFragment *AF = cast<MCAlignFragment>(this);
-    if (AF->hasEmitNops())
-      OS << " (emit nops)";
-    OS << "\n       ";
-    OS << " Alignment:" << AF->getAlignment()
-       << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
-       << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
-    break;
-  }
-  case MCFragment::FT_Data:  {
-    const MCDataFragment *DF = cast<MCDataFragment>(this);
-    OS << "\n       ";
-    OS << " Contents:[";
-    const SmallVectorImpl<char> &Contents = DF->getContents();
-    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
-      if (i) OS << ",";
-      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
-    }
-    OS << "] (" << Contents.size() << " bytes)";
-
-    if (DF->fixup_begin() != DF->fixup_end()) {
-      OS << ",\n       ";
-      OS << " Fixups:[";
-      for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
-             ie = DF->fixup_end(); it != ie; ++it) {
-        if (it != DF->fixup_begin()) OS << ",\n                ";
-        OS << *it;
-      }
-      OS << "]";
-    }
-    break;
-  }
-  case MCFragment::FT_CompactEncodedInst: {
-    const MCCompactEncodedInstFragment *CEIF =
-      cast<MCCompactEncodedInstFragment>(this);
-    OS << "\n       ";
-    OS << " Contents:[";
-    const SmallVectorImpl<char> &Contents = CEIF->getContents();
-    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
-      if (i) OS << ",";
-      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
-    }
-    OS << "] (" << Contents.size() << " bytes)";
-    break;
-  }
-  case MCFragment::FT_Fill:  {
-    const MCFillFragment *FF = cast<MCFillFragment>(this);
-    OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
-       << " Size:" << FF->getSize();
-    break;
-  }
-  case MCFragment::FT_Relaxable:  {
-    const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
-    OS << "\n       ";
-    OS << " Inst:";
-    F->getInst().dump_pretty(OS);
-    break;
-  }
-  case MCFragment::FT_Org:  {
-    const MCOrgFragment *OF = cast<MCOrgFragment>(this);
-    OS << "\n       ";
-    OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
-    break;
-  }
-  case MCFragment::FT_Dwarf:  {
-    const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
-    OS << "\n       ";
-    OS << " AddrDelta:" << OF->getAddrDelta()
-       << " LineDelta:" << OF->getLineDelta();
-    break;
-  }
-  case MCFragment::FT_DwarfFrame:  {
-    const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
-    OS << "\n       ";
-    OS << " AddrDelta:" << CF->getAddrDelta();
-    break;
-  }
-  case MCFragment::FT_LEB: {
-    const MCLEBFragment *LF = cast<MCLEBFragment>(this);
-    OS << "\n       ";
-    OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
-    break;
-  }
-  case MCFragment::FT_SafeSEH: {
-    const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this);
-    OS << "\n       ";
-    OS << " Sym:" << F->getSymbol();
-    break;
-  }
-  }
-  OS << ">";
-}
-
-void MCAssembler::dump() {
-  raw_ostream &OS = llvm::errs();
-
-  OS << "<MCAssembler\n";
-  OS << "  Sections:[\n    ";
-  for (iterator it = begin(), ie = end(); it != ie; ++it) {
-    if (it != begin()) OS << ",\n    ";
-    it->dump();
-  }
-  OS << "],\n";
-  OS << "  Symbols:[";
-
-  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
-    if (it != symbol_begin()) OS << ",\n           ";
-    OS << "(";
-    it->dump();
-    OS << ", Index:" << it->getIndex() << ", ";
-    OS << ")";
-  }
-  OS << "]>\n";
-}
-#endif
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index a85796cfbad9..b5ad518d0330 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCSymbolCOFF.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCSymbolMachO.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -41,7 +42,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
       GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
       AllowTemporaryLabels(true), DwarfCompileUnitID(0),
-      AutoReset(DoAutoReset) {
+      AutoReset(DoAutoReset), HadError(false) {
 
   std::error_code EC = llvm::sys::fs::current_path(CompilationDir);
   if (EC)
@@ -62,9 +63,6 @@ MCContext::~MCContext() {
 
   // NOTE: The symbols are all allocated out of a bump pointer allocator,
   // we don't need to free them here.
-
-  // If the stream for the .secure_log_unique directive was created free it.
-  delete (raw_ostream *)SecureLog;
 }
 
 //===----------------------------------------------------------------------===//
@@ -73,13 +71,11 @@ MCContext::~MCContext() {
 
 void MCContext::reset() {
   // Call the destructors so the fragments are freed
-  for (auto &I : ELFUniquingMap)
-    I.second->~MCSectionELF();
-  for (auto &I : COFFUniquingMap)
-    I.second->~MCSectionCOFF();
-  for (auto &I : MachOUniquingMap)
-    I.second->~MCSectionMachO();
+  COFFAllocator.DestroyAll();
+  ELFAllocator.DestroyAll();
+  MachOAllocator.DestroyAll();
 
+  MCSubtargetAllocator.DestroyAll();
   UsedNames.clear();
   Symbols.clear();
   SectionSymbols.clear();
@@ -103,6 +99,8 @@ void MCContext::reset() {
   DwarfLocSeen = false;
   GenDwarfForAssembly = false;
   GenDwarfFileNumber = 0;
+
+  HadError = false;
 }
 
 //===----------------------------------------------------------------------===//
@@ -294,8 +292,8 @@ MCSectionMachO *MCContext::getMachOSection(StringRef Segment, StringRef Section,
     Begin = createTempSymbol(BeginSymName, false);
 
   // Otherwise, return a new section.
-  return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
-                                            Reserved2, Kind, Begin);
+  return Entry = new (MachOAllocator.Allocate()) MCSectionMachO(
+             Segment, Section, TypeAndAttributes, Reserved2, Kind, Begin);
 }
 
 void MCContext::renameELFSection(MCSectionELF *Section, StringRef Name) {
@@ -322,7 +320,7 @@ MCSectionELF *MCContext::createELFRelSection(StringRef Name, unsigned Type,
   bool Inserted;
   std::tie(I, Inserted) = ELFRelSecNames.insert(std::make_pair(Name, true));
 
-  return new (*this)
+  return new (ELFAllocator.Allocate())
       MCSectionELF(I->getKey(), Type, Flags, SectionKind::getReadOnly(),
                    EntrySize, Group, true, nullptr, Associated);
 }
@@ -367,15 +365,15 @@ MCSectionELF *MCContext::getELFSection(StringRef Section, unsigned Type,
   if (BeginSymName)
     Begin = createTempSymbol(BeginSymName, false);
 
-  MCSectionELF *Result =
-      new (*this) MCSectionELF(CachedName, Type, Flags, Kind, EntrySize,
-                               GroupSym, UniqueID, Begin, Associated);
+  MCSectionELF *Result = new (ELFAllocator.Allocate())
+      MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym, UniqueID,
+                   Begin, Associated);
   Entry.second = Result;
   return Result;
 }
 
 MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) {
-  MCSectionELF *Result = new (*this)
+  MCSectionELF *Result = new (ELFAllocator.Allocate())
       MCSectionELF(".group", ELF::SHT_GROUP, 0, SectionKind::getReadOnly(), 4,
                    Group, ~0, nullptr, nullptr);
   return Result;
@@ -404,7 +402,7 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
     Begin = createTempSymbol(BeginSymName, false);
 
   StringRef CachedName = Iter->first.SectionName;
-  MCSectionCOFF *Result = new (*this) MCSectionCOFF(
+  MCSectionCOFF *Result = new (COFFAllocator.Allocate()) MCSectionCOFF(
       CachedName, Characteristics, COMDATSymbol, Selection, Kind, Begin);
 
   Iter->second = Result;
@@ -441,6 +439,10 @@ MCSectionCOFF *MCContext::getAssociativeCOFFSection(MCSectionCOFF *Sec,
                         COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE);
 }
 
+MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
+  return *new (MCSubtargetAllocator.Allocate()) MCSubtargetInfo(STI);
+}
+
 //===----------------------------------------------------------------------===//
 // Dwarf Management
 //===----------------------------------------------------------------------===//
@@ -472,14 +474,24 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) {
       [&](MCSection *Sec) { return !MCOS.mayHaveInstructions(*Sec); });
 }
 
-void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) const {
-  // If we have a source manager and a location, use it. Otherwise just
-  // use the generic report_fatal_error().
-  if (!SrcMgr || Loc == SMLoc())
+//===----------------------------------------------------------------------===//
+// Error Reporting
+//===----------------------------------------------------------------------===//
+
+void MCContext::reportError(SMLoc Loc, const Twine &Msg) {
+  HadError = true;
+
+  // If we have a source manager use it. Otherwise just use the generic
+  // report_fatal_error().
+  if (!SrcMgr)
     report_fatal_error(Msg, false);
 
   // Use the source manager to print the message.
   SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg);
+}
+
+void MCContext::reportFatalError(SMLoc Loc, const Twine &Msg) {
+  reportError(Loc, Msg);
 
   // If we reached here, we are failing ungracefully. Run the interrupt handlers
   // to make sure any special cleanups get done, in particular that we remove
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 716d76a79fe3..82063fb74696 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -125,7 +125,6 @@ void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
 static void emitComments(LLVMDisasmContext *DC,
                          formatted_raw_ostream &FormattedOS) {
   // Flush the stream before taking its content.
-  DC->CommentStream.flush();
   StringRef Comments = DC->CommentsToEmit.str();
   // Get the default information for printing a comment.
   const MCAsmInfo *MAI = DC->getAsmInfo();
@@ -147,7 +146,6 @@ static void emitComments(LLVMDisasmContext *DC,
 
   // Tell the comment stream that the vector changed underneath it.
   DC->CommentsToEmit.clear();
-  DC->CommentStream.resync();
 }
 
 /// \brief Gets latency information for \p Inst from the itinerary
@@ -261,7 +259,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
     return 0;
 
   case MCDisassembler::Success: {
-    Annotations.flush();
     StringRef AnnotationsStr = Annotations.str();
 
     SmallVector<char, 64> InsnStr;
@@ -273,7 +270,6 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
       emitLatency(DC, Inst);
 
     emitComments(DC, FormattedOS);
-    OS.flush();
 
     assert(OutStringSize != 0 && "Output buffer cannot be zero size");
     size_t OutputSize = std::min(OutStringSize-1, InsnStr.size());
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index c84c4865f51e..a99ac4eca59e 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -27,27 +27,9 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
-// Given a special op, return the address skip amount (in units of
-// DWARF2_LINE_MIN_INSN_LENGTH.
-#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
-
-// The maximum address skip amount that can be encoded with a special op.
-#define MAX_SPECIAL_ADDR_DELTA         SPECIAL_ADDR(255)
-
-// First special line opcode - leave room for the standard opcodes.
-// Note: If you want to change this, you'll have to update the
-// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().
-#define DWARF2_LINE_OPCODE_BASE         13
-
-// Minimum line offset in a special line info. opcode.  This value
-// was chosen to give a reasonable range of values.
-#define DWARF2_LINE_BASE                -5
-
-// Range of line offsets in a special line info. opcode.
-#define DWARF2_LINE_RANGE               14
-
 static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
   unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment();
   if (MinInsnLength == 1)
@@ -197,7 +179,8 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
 //
 // This emits the Dwarf file and the line tables.
 //
-void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) {
+void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS,
+                            MCDwarfLineTableParams Params) {
   MCContext &context = MCOS->getContext();
 
   auto &LineTables = context.getMCDwarfLineTables();
@@ -212,14 +195,17 @@ void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) {
 
   // Handle the rest of the Compile Units.
   for (const auto &CUIDTablePair : LineTables)
-    CUIDTablePair.second.EmitCU(MCOS);
+    CUIDTablePair.second.EmitCU(MCOS, Params);
 }
 
-void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS) const {
-  MCOS.EmitLabel(Header.Emit(&MCOS, None).second);
+void MCDwarfDwoLineTable::Emit(MCStreamer &MCOS,
+                               MCDwarfLineTableParams Params) const {
+  MCOS.EmitLabel(Header.Emit(&MCOS, Params, None).second);
 }
 
-std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS) const {
+std::pair<MCSymbol *, MCSymbol *>
+MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
+                             MCDwarfLineTableParams Params) const {
   static const char StandardOpcodeLengths[] = {
       0, // length of DW_LNS_copy
       1, // length of DW_LNS_advance_pc
@@ -234,9 +220,10 @@ std::pair<MCSymbol *, MCSymbol *> MCDwarfLineTableHeader::Emit(MCStreamer *MCOS)
       0, // length of DW_LNS_set_epilogue_begin
       1  // DW_LNS_set_isa
   };
-  assert(array_lengthof(StandardOpcodeLengths) ==
-         (DWARF2_LINE_OPCODE_BASE - 1));
-  return Emit(MCOS, StandardOpcodeLengths);
+  assert(array_lengthof(StandardOpcodeLengths) >=
+         (Params.DWARF2LineOpcodeBase - 1U));
+  return Emit(MCOS, Params, makeArrayRef(StandardOpcodeLengths,
+                                         Params.DWARF2LineOpcodeBase - 1));
 }
 
 static const MCExpr *forceExpAbs(MCStreamer &OS, const MCExpr* Expr) {
@@ -256,9 +243,8 @@ static void emitAbsValue(MCStreamer &OS, const MCExpr *Value, unsigned Size) {
 }
 
 std::pair<MCSymbol *, MCSymbol *>
-MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
+MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
                              ArrayRef<char> StandardOpcodeLengths) const {
-
   MCContext &context = MCOS->getContext();
 
   // Create a symbol at the beginning of the line table.
@@ -293,8 +279,8 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
   // Parameters of the state machine, are next.
   MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1);
   MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
-  MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
-  MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+  MCOS->EmitIntValue(Params.DWARF2LineBase, 1);
+  MCOS->EmitIntValue(Params.DWARF2LineRange, 1);
   MCOS->EmitIntValue(StandardOpcodeLengths.size() + 1, 1);
 
   // Standard opcode lengths
@@ -329,8 +315,9 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
   return std::make_pair(LineStartSym, LineEndSym);
 }
 
-void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const {
-  MCSymbol *LineEndSym = Header.Emit(MCOS).second;
+void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS,
+                              MCDwarfLineTableParams Params) const {
+  MCSymbol *LineEndSym = Header.Emit(MCOS, Params).second;
 
   // Put out the line tables.
   for (const auto &LineSec : MCLineSections.getMCLineEntries())
@@ -416,21 +403,31 @@ unsigned MCDwarfLineTableHeader::getFile(StringRef &Directory,
 }
 
 /// Utility function to emit the encoding to a streamer.
-void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
-                           uint64_t AddrDelta) {
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
+                           int64_t LineDelta, uint64_t AddrDelta) {
   MCContext &Context = MCOS->getContext();
   SmallString<256> Tmp;
   raw_svector_ostream OS(Tmp);
-  MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OS);
+  MCDwarfLineAddr::Encode(Context, Params, LineDelta, AddrDelta, OS);
   MCOS->EmitBytes(OS.str());
 }
 
+/// Given a special op, return the address skip amount (in units of
+/// DWARF2_LINE_MIN_INSN_LENGTH).
+static uint64_t SpecialAddr(MCDwarfLineTableParams Params, uint64_t op) {
+  return (op - Params.DWARF2LineOpcodeBase) / Params.DWARF2LineRange;
+}
+
 /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
-void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
-                             uint64_t AddrDelta, raw_ostream &OS) {
+void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
+                             int64_t LineDelta, uint64_t AddrDelta,
+                             raw_ostream &OS) {
   uint64_t Temp, Opcode;
   bool NeedCopy = false;
 
+  // The maximum address skip amount that can be encoded with a special op.
+  uint64_t MaxSpecialAddrDelta = SpecialAddr(Params, 255);
+
   // Scale the address delta by the minimum instruction length.
   AddrDelta = ScaleAddrDelta(Context, AddrDelta);
 
@@ -438,7 +435,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
   // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the
   // end_sequence to emit the matrix entry.
   if (LineDelta == INT64_MAX) {
-    if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+    if (AddrDelta == MaxSpecialAddrDelta)
       OS << char(dwarf::DW_LNS_const_add_pc);
     else if (AddrDelta) {
       OS << char(dwarf::DW_LNS_advance_pc);
@@ -451,16 +448,16 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
   }
 
   // Bias the line delta by the base.
-  Temp = LineDelta - DWARF2_LINE_BASE;
+  Temp = LineDelta - Params.DWARF2LineBase;
 
   // If the line increment is out of range of a special opcode, we must encode
   // it with DW_LNS_advance_line.
-  if (Temp >= DWARF2_LINE_RANGE) {
+  if (Temp >= Params.DWARF2LineRange) {
     OS << char(dwarf::DW_LNS_advance_line);
     encodeSLEB128(LineDelta, OS);
 
     LineDelta = 0;
-    Temp = 0 - DWARF2_LINE_BASE;
+    Temp = 0 - Params.DWARF2LineBase;
     NeedCopy = true;
   }
 
@@ -471,19 +468,19 @@ void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta,
   }
 
   // Bias the opcode by the special opcode base.
-  Temp += DWARF2_LINE_OPCODE_BASE;
+  Temp += Params.DWARF2LineOpcodeBase;
 
   // Avoid overflow when addr_delta is large.
-  if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+  if (AddrDelta < 256 + MaxSpecialAddrDelta) {
     // Try using a special opcode.
-    Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+    Opcode = Temp + AddrDelta * Params.DWARF2LineRange;
     if (Opcode <= 255) {
       OS << char(Opcode);
       return;
     }
 
     // Try using DW_LNS_const_add_pc followed by special op.
-    Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+    Opcode = Temp + (AddrDelta - MaxSpecialAddrDelta) * Params.DWARF2LineRange;
     if (Opcode <= 255) {
       OS << char(dwarf::DW_LNS_const_add_pc);
       OS << char(Opcode);
@@ -517,10 +514,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) {
   MCOS->EmitULEB128IntValue(1);
   MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit);
   MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1);
-  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4);
-  if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1 &&
-      MCOS->getContext().getDwarfVersion() >= 3) {
-    EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4);
+  EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list,
+             context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+                                            : dwarf::DW_FORM_data4);
+  if (context.getGenDwarfSectionSyms().size() > 1 &&
+      context.getDwarfVersion() >= 3) {
+    EmitAbbrev(MCOS, dwarf::DW_AT_ranges,
+               context.getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+                                              : dwarf::DW_FORM_data4);
   } else {
     EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr);
     EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr);
@@ -845,7 +846,7 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
     LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
   MCSymbol *AbbrevSectionSymbol = nullptr;
   MCSymbol *InfoSectionSymbol = nullptr;
-  MCSymbol *RangesSectionSymbol = NULL;
+  MCSymbol *RangesSectionSymbol = nullptr;
 
   // Create end symbols for each section, and remove empty sections
   MCOS->getContext().finalizeDwarfSections(*MCOS);
@@ -998,38 +999,29 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
 }
 
 namespace {
-  class FrameEmitterImpl {
-    int CFAOffset;
-    int InitialCFAOffset;
-    bool IsEH;
-    const MCSymbol *SectionStart;
-  public:
-    FrameEmitterImpl(bool isEH)
-        : CFAOffset(0), InitialCFAOffset(0), IsEH(isEH), SectionStart(nullptr) {
-    }
+class FrameEmitterImpl {
+  int CFAOffset = 0;
+  int InitialCFAOffset = 0;
+  bool IsEH;
+  MCObjectStreamer &Streamer;
 
-    void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
+public:
+  FrameEmitterImpl(bool IsEH, MCObjectStreamer &Streamer)
+      : IsEH(IsEH), Streamer(Streamer) {}
 
-    /// Emit the unwind information in a compact way.
-    void EmitCompactUnwind(MCObjectStreamer &streamer,
-                           const MCDwarfFrameInfo &frame);
+  /// Emit the unwind information in a compact way.
+  void EmitCompactUnwind(const MCDwarfFrameInfo &frame);
 
-    const MCSymbol &EmitCIE(MCObjectStreamer &streamer,
-                            const MCSymbol *personality,
-                            unsigned personalityEncoding,
-                            const MCSymbol *lsda,
-                            bool IsSignalFrame,
-                            unsigned lsdaEncoding,
-                            bool IsSimple);
-    MCSymbol *EmitFDE(MCObjectStreamer &streamer,
-                      const MCSymbol &cieStart,
-                      const MCDwarfFrameInfo &frame);
-    void EmitCFIInstructions(MCObjectStreamer &streamer,
-                             ArrayRef<MCCFIInstruction> Instrs,
-                             MCSymbol *BaseLabel);
-    void EmitCFIInstruction(MCObjectStreamer &Streamer,
-                            const MCCFIInstruction &Instr);
-  };
+  const MCSymbol &EmitCIE(const MCSymbol *personality,
+                          unsigned personalityEncoding, const MCSymbol *lsda,
+                          bool IsSignalFrame, unsigned lsdaEncoding,
+                          bool IsSimple);
+  void EmitFDE(const MCSymbol &cieStart, const MCDwarfFrameInfo &frame,
+               bool LastInSection, const MCSymbol &SectionStart);
+  void EmitCFIInstructions(ArrayRef<MCCFIInstruction> Instrs,
+                           MCSymbol *BaseLabel);
+  void EmitCFIInstruction(const MCCFIInstruction &Instr);
+};
 
 } // end anonymous namespace
 
@@ -1037,8 +1029,7 @@ static void emitEncodingByte(MCObjectStreamer &Streamer, unsigned Encoding) {
   Streamer.EmitIntValue(Encoding, 1);
 }
 
-void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
-                                          const MCCFIInstruction &Instr) {
+void FrameEmitterImpl::EmitCFIInstruction(const MCCFIInstruction &Instr) {
   int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
   auto *MRI = Streamer.getContext().getRegisterInfo();
 
@@ -1150,6 +1141,11 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
     Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
     return;
   }
+  case MCCFIInstruction::OpGnuArgsSize: {
+    Streamer.EmitIntValue(dwarf::DW_CFA_GNU_args_size, 1);
+    Streamer.EmitULEB128IntValue(Instr.getOffset());
+    return;
+  }
   case MCCFIInstruction::OpEscape:
     Streamer.EmitBytes(Instr.getValues());
     return;
@@ -1158,8 +1154,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
 }
 
 /// Emit frame instructions to describe the layout of the frame.
-void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
-                                           ArrayRef<MCCFIInstruction> Instrs,
+void FrameEmitterImpl::EmitCFIInstructions(ArrayRef<MCCFIInstruction> Instrs,
                                            MCSymbol *BaseLabel) {
   for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
     const MCCFIInstruction &Instr = Instrs[i];
@@ -1171,18 +1166,17 @@ void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
     if (BaseLabel && Label) {
       MCSymbol *ThisSym = Label;
       if (ThisSym != BaseLabel) {
-        streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+        Streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
         BaseLabel = ThisSym;
       }
     }
 
-    EmitCFIInstruction(streamer, Instr);
+    EmitCFIInstruction(Instr);
   }
 }
 
 /// Emit the unwind information in a compact way.
-void FrameEmitterImpl::EmitCompactUnwind(MCObjectStreamer &Streamer,
-                                         const MCDwarfFrameInfo &Frame) {
+void FrameEmitterImpl::EmitCompactUnwind(const MCDwarfFrameInfo &Frame) {
   MCContext &Context = Streamer.getContext();
   const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
 
@@ -1254,39 +1248,39 @@ static unsigned getCIEVersion(bool IsEH, unsigned DwarfVersion) {
   case 3:
     return 3;
   case 4:
+  case 5:
     return 4;
   }
   llvm_unreachable("Unknown version");
 }
 
-const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
-                                          const MCSymbol *personality,
+const MCSymbol &FrameEmitterImpl::EmitCIE(const MCSymbol *personality,
                                           unsigned personalityEncoding,
                                           const MCSymbol *lsda,
                                           bool IsSignalFrame,
                                           unsigned lsdaEncoding,
                                           bool IsSimple) {
-  MCContext &context = streamer.getContext();
+  MCContext &context = Streamer.getContext();
   const MCRegisterInfo *MRI = context.getRegisterInfo();
   const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
 
   MCSymbol *sectionStart = context.createTempSymbol();
-  streamer.EmitLabel(sectionStart);
+  Streamer.EmitLabel(sectionStart);
 
   MCSymbol *sectionEnd = context.createTempSymbol();
 
   // Length
-  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
-                                               *sectionEnd, 4);
-  emitAbsValue(streamer, Length, 4);
+  const MCExpr *Length =
+      MakeStartMinusEndExpr(Streamer, *sectionStart, *sectionEnd, 4);
+  emitAbsValue(Streamer, Length, 4);
 
   // CIE ID
   unsigned CIE_ID = IsEH ? 0 : -1;
-  streamer.EmitIntValue(CIE_ID, 4);
+  Streamer.EmitIntValue(CIE_ID, 4);
 
   // Version
   uint8_t CIEVersion = getCIEVersion(IsEH, context.getDwarfVersion());
-  streamer.EmitIntValue(CIEVersion, 1);
+  Streamer.EmitIntValue(CIEVersion, 1);
 
   // Augmentation String
   SmallString<8> Augmentation;
@@ -1299,31 +1293,31 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
     Augmentation += "R";
     if (IsSignalFrame)
       Augmentation += "S";
-    streamer.EmitBytes(Augmentation);
+    Streamer.EmitBytes(Augmentation);
   }
-  streamer.EmitIntValue(0, 1);
+  Streamer.EmitIntValue(0, 1);
 
   if (CIEVersion >= 4) {
     // Address Size
-    streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
+    Streamer.EmitIntValue(context.getAsmInfo()->getPointerSize(), 1);
 
     // Segment Descriptor Size
-    streamer.EmitIntValue(0, 1);
+    Streamer.EmitIntValue(0, 1);
   }
 
   // Code Alignment Factor
-  streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
+  Streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment());
 
   // Data Alignment Factor
-  streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+  Streamer.EmitSLEB128IntValue(getDataAlignmentFactor(Streamer));
 
   // Return Address Register
   if (CIEVersion == 1) {
     assert(MRI->getRARegister() <= 255 &&
            "DWARF 2 encodes return_address_register in one byte");
-    streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1);
+    Streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), IsEH), 1);
   } else {
-    streamer.EmitULEB128IntValue(
+    Streamer.EmitULEB128IntValue(
         MRI->getDwarfRegNum(MRI->getRARegister(), IsEH));
   }
 
@@ -1335,28 +1329,28 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
       // Personality Encoding
       augmentationLength += 1;
       // Personality
-      augmentationLength += getSizeForEncoding(streamer, personalityEncoding);
+      augmentationLength += getSizeForEncoding(Streamer, personalityEncoding);
     }
     if (lsda)
       augmentationLength += 1;
     // Encoding of the FDE pointers
     augmentationLength += 1;
 
-    streamer.EmitULEB128IntValue(augmentationLength);
+    Streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data (optional)
     if (personality) {
       // Personality Encoding
-      emitEncodingByte(streamer, personalityEncoding);
+      emitEncodingByte(Streamer, personalityEncoding);
       // Personality
-      EmitPersonality(streamer, *personality, personalityEncoding);
+      EmitPersonality(Streamer, *personality, personalityEncoding);
     }
 
     if (lsda)
-      emitEncodingByte(streamer, lsdaEncoding);
+      emitEncodingByte(Streamer, lsdaEncoding);
 
     // Encoding of the FDE pointers
-    emitEncodingByte(streamer, MOFI->getFDEEncoding());
+    emitEncodingByte(Streamer, MOFI->getFDEEncoding());
   }
 
   // Initial Instructions
@@ -1365,22 +1359,23 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
   if (!IsSimple) {
     const std::vector<MCCFIInstruction> &Instructions =
         MAI->getInitialFrameState();
-    EmitCFIInstructions(streamer, Instructions, nullptr);
+    EmitCFIInstructions(Instructions, nullptr);
   }
 
   InitialCFAOffset = CFAOffset;
 
   // Padding
-  streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
+  Streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize());
 
-  streamer.EmitLabel(sectionEnd);
+  Streamer.EmitLabel(sectionEnd);
   return *sectionStart;
 }
 
-MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
-                                    const MCSymbol &cieStart,
-                                    const MCDwarfFrameInfo &frame) {
-  MCContext &context = streamer.getContext();
+void FrameEmitterImpl::EmitFDE(const MCSymbol &cieStart,
+                               const MCDwarfFrameInfo &frame,
+                               bool LastInSection,
+                               const MCSymbol &SectionStart) {
+  MCContext &context = Streamer.getContext();
   MCSymbol *fdeStart = context.createTempSymbol();
   MCSymbol *fdeEnd = context.createTempSymbol();
   const MCObjectFileInfo *MOFI = context.getObjectFileInfo();
@@ -1388,107 +1383,103 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
   CFAOffset = InitialCFAOffset;
 
   // Length
-  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
-  emitAbsValue(streamer, Length, 4);
+  const MCExpr *Length = MakeStartMinusEndExpr(Streamer, *fdeStart, *fdeEnd, 0);
+  emitAbsValue(Streamer, Length, 4);
 
-  streamer.EmitLabel(fdeStart);
+  Streamer.EmitLabel(fdeStart);
 
   // CIE Pointer
   const MCAsmInfo *asmInfo = context.getAsmInfo();
   if (IsEH) {
-    const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
-                                                 0);
-    emitAbsValue(streamer, offset, 4);
+    const MCExpr *offset =
+        MakeStartMinusEndExpr(Streamer, cieStart, *fdeStart, 0);
+    emitAbsValue(Streamer, offset, 4);
   } else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) {
-    const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
-                                                 cieStart, 0);
-    emitAbsValue(streamer, offset, 4);
+    const MCExpr *offset =
+        MakeStartMinusEndExpr(Streamer, SectionStart, cieStart, 0);
+    emitAbsValue(Streamer, offset, 4);
   } else {
-    streamer.EmitSymbolValue(&cieStart, 4);
+    Streamer.EmitSymbolValue(&cieStart, 4);
   }
 
   // PC Begin
   unsigned PCEncoding =
       IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr;
-  unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
-  emitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH);
+  unsigned PCSize = getSizeForEncoding(Streamer, PCEncoding);
+  emitFDESymbol(Streamer, *frame.Begin, PCEncoding, IsEH);
 
   // PC Range
-  const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
-                                              *frame.End, 0);
-  emitAbsValue(streamer, Range, PCSize);
+  const MCExpr *Range =
+      MakeStartMinusEndExpr(Streamer, *frame.Begin, *frame.End, 0);
+  emitAbsValue(Streamer, Range, PCSize);
 
   if (IsEH) {
     // Augmentation Data Length
     unsigned augmentationLength = 0;
 
     if (frame.Lsda)
-      augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
+      augmentationLength += getSizeForEncoding(Streamer, frame.LsdaEncoding);
 
-    streamer.EmitULEB128IntValue(augmentationLength);
+    Streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data
     if (frame.Lsda)
-      emitFDESymbol(streamer, *frame.Lsda, frame.LsdaEncoding, true);
+      emitFDESymbol(Streamer, *frame.Lsda, frame.LsdaEncoding, true);
   }
 
   // Call Frame Instructions
-  EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+  EmitCFIInstructions(frame.Instructions, frame.Begin);
 
   // Padding
-  streamer.EmitValueToAlignment(PCSize);
+  // The size of a .eh_frame section has to be a multiple of the alignment
+  // since a null CIE is interpreted as the end. Old systems overaligned
+  // .eh_frame, so we do too and account for it in the last FDE.
+  unsigned Align = LastInSection ? asmInfo->getPointerSize() : PCSize;
+  Streamer.EmitValueToAlignment(Align);
 
-  return fdeEnd;
+  Streamer.EmitLabel(fdeEnd);
 }
 
 namespace {
-  struct CIEKey {
-    static const CIEKey getEmptyKey() {
-      return CIEKey(nullptr, 0, -1, false, false);
-    }
-    static const CIEKey getTombstoneKey() {
-      return CIEKey(nullptr, -1, 0, false, false);
-    }
+struct CIEKey {
+  static const CIEKey getEmptyKey() {
+    return CIEKey(nullptr, 0, -1, false, false);
+  }
+  static const CIEKey getTombstoneKey() {
+    return CIEKey(nullptr, -1, 0, false, false);
+  }
 
-    CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_,
-           unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_)
-        : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_),
-          LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_),
-          IsSimple(IsSimple_) {}
-    const MCSymbol *Personality;
-    unsigned PersonalityEncoding;
-    unsigned LsdaEncoding;
-    bool IsSignalFrame;
-    bool IsSimple;
-  };
-}
+  CIEKey(const MCSymbol *Personality, unsigned PersonalityEncoding,
+         unsigned LsdaEncoding, bool IsSignalFrame, bool IsSimple)
+      : Personality(Personality), PersonalityEncoding(PersonalityEncoding),
+        LsdaEncoding(LsdaEncoding), IsSignalFrame(IsSignalFrame),
+        IsSimple(IsSimple) {}
+  const MCSymbol *Personality;
+  unsigned PersonalityEncoding;
+  unsigned LsdaEncoding;
+  bool IsSignalFrame;
+  bool IsSimple;
+};
+} // anonymous namespace
 
 namespace llvm {
-  template <>
-  struct DenseMapInfo<CIEKey> {
-    static CIEKey getEmptyKey() {
-      return CIEKey::getEmptyKey();
-    }
-    static CIEKey getTombstoneKey() {
-      return CIEKey::getTombstoneKey();
-    }
-    static unsigned getHashValue(const CIEKey &Key) {
-      return static_cast<unsigned>(hash_combine(Key.Personality,
-                                                Key.PersonalityEncoding,
-                                                Key.LsdaEncoding,
-                                                Key.IsSignalFrame,
-                                                Key.IsSimple));
-    }
-    static bool isEqual(const CIEKey &LHS,
-                        const CIEKey &RHS) {
-      return LHS.Personality == RHS.Personality &&
-        LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
-        LHS.LsdaEncoding == RHS.LsdaEncoding &&
-        LHS.IsSignalFrame == RHS.IsSignalFrame &&
-        LHS.IsSimple == RHS.IsSimple;
-    }
-  };
-}
+template <> struct DenseMapInfo<CIEKey> {
+  static CIEKey getEmptyKey() { return CIEKey::getEmptyKey(); }
+  static CIEKey getTombstoneKey() { return CIEKey::getTombstoneKey(); }
+  static unsigned getHashValue(const CIEKey &Key) {
+    return static_cast<unsigned>(
+        hash_combine(Key.Personality, Key.PersonalityEncoding, Key.LsdaEncoding,
+                     Key.IsSignalFrame, Key.IsSimple));
+  }
+  static bool isEqual(const CIEKey &LHS, const CIEKey &RHS) {
+    return LHS.Personality == RHS.Personality &&
+           LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+           LHS.LsdaEncoding == RHS.LsdaEncoding &&
+           LHS.IsSignalFrame == RHS.IsSignalFrame &&
+           LHS.IsSimple == RHS.IsSimple;
+  }
+};
+} // namespace llvm
 
 void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
                                bool IsEH) {
@@ -1496,7 +1487,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
 
   MCContext &Context = Streamer.getContext();
   const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
-  FrameEmitterImpl Emitter(IsEH);
+  FrameEmitterImpl Emitter(IsEH, Streamer);
   ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getDwarfFrameInfos();
 
   // Emit the compact unwind info if available.
@@ -1514,7 +1505,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
       NeedsEHFrameSection |=
         Frame.CompactUnwindEncoding ==
           MOFI->getCompactUnwindDwarfEHFrameOnly();
-      Emitter.EmitCompactUnwind(Streamer, Frame);
+      Emitter.EmitCompactUnwind(Frame);
     }
   }
 
@@ -1527,23 +1518,15 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
   Streamer.SwitchSection(&Section);
   MCSymbol *SectionStart = Context.createTempSymbol();
   Streamer.EmitLabel(SectionStart);
-  Emitter.setSectionStart(SectionStart);
 
-  MCSymbol *FDEEnd = nullptr;
   DenseMap<CIEKey, const MCSymbol *> CIEStarts;
 
   const MCSymbol *DummyDebugKey = nullptr;
-  NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
-  for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
-    const MCDwarfFrameInfo &Frame = FrameArray[i];
-
-    // Emit the label from the previous iteration
-    if (FDEEnd) {
-      Streamer.EmitLabel(FDEEnd);
-      FDEEnd = nullptr;
-    }
-
-    if (!NeedsEHFrameSection && Frame.CompactUnwindEncoding !=
+  bool CanOmitDwarf = MOFI->getOmitDwarfIfHaveCompactUnwind();
+  for (auto I = FrameArray.begin(), E = FrameArray.end(); I != E;) {
+    const MCDwarfFrameInfo &Frame = *I;
+    ++I;
+    if (CanOmitDwarf && Frame.CompactUnwindEncoding !=
           MOFI->getCompactUnwindDwarfEHFrameOnly())
       // Don't generate an EH frame if we don't need one. I.e., it's taken care
       // of by the compact unwind encoding.
@@ -1553,18 +1536,12 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
                Frame.LsdaEncoding, Frame.IsSignalFrame, Frame.IsSimple);
     const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
     if (!CIEStart)
-      CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
-                                  Frame.PersonalityEncoding, Frame.Lsda,
-                                  Frame.IsSignalFrame,
-                                  Frame.LsdaEncoding,
-                                  Frame.IsSimple);
+      CIEStart = &Emitter.EmitCIE(Frame.Personality, Frame.PersonalityEncoding,
+                                  Frame.Lsda, Frame.IsSignalFrame,
+                                  Frame.LsdaEncoding, Frame.IsSimple);
 
-    FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+    Emitter.EmitFDE(*CIEStart, Frame, I == E, *SectionStart);
   }
-
-  Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize());
-  if (FDEEnd)
-    Streamer.EmitLabel(FDEEnd);
 }
 
 void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer,
diff --git a/lib/MC/MCELFObjectTargetWriter.cpp b/lib/MC/MCELFObjectTargetWriter.cpp
index bc0ba85a8ff6..de645cac7370 100644
--- a/lib/MC/MCELFObjectTargetWriter.cpp
+++ b/lib/MC/MCELFObjectTargetWriter.cpp
@@ -29,23 +29,7 @@ bool MCELFObjectTargetWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
   return false;
 }
 
-// ELF doesn't require relocations to be in any order. We sort by the Offset,
-// just to match gnu as for easier comparison. The use type is an arbitrary way
-// of making the sort deterministic.
-static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
-  const ELFRelocationEntry &A = *AP;
-  const ELFRelocationEntry &B = *BP;
-  if (A.Offset != B.Offset)
-    return B.Offset - A.Offset;
-  if (B.Type != A.Type)
-    return A.Type - B.Type;
-  //llvm_unreachable("ELFRelocs might be unstable!");
-  return 0;
-}
-
-
 void
 MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm,
                                     std::vector<ELFRelocationEntry> &Relocs) {
-  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
 }
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index fe9ac21e17fc..06d161bccab4 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -68,7 +68,6 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
       EF->setBundlePadding(static_cast<uint8_t>(RequiredBundlePadding));
 
       Assembler.writeFragmentPadding(*EF, FSize, OW);
-      VecOS.flush();
       delete OW;
 
       DF->getContents().append(Code.begin(), Code.end());
@@ -87,20 +86,10 @@ void MCELFStreamer::mergeFragment(MCDataFragment *DF,
 }
 
 void MCELFStreamer::InitSections(bool NoExecStack) {
-  // This emulates the same behavior of GNU as. This makes it easier
-  // to compare the output as the major sections are in the same order.
   MCContext &Ctx = getContext();
   SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
   EmitCodeAlignment(4);
 
-  SwitchSection(Ctx.getObjectFileInfo()->getDataSection());
-  EmitCodeAlignment(4);
-
-  SwitchSection(Ctx.getObjectFileInfo()->getBSSSection());
-  EmitCodeAlignment(4);
-
-  SwitchSection(Ctx.getObjectFileInfo()->getTextSection());
-
   if (NoExecStack)
     SwitchSection(Ctx.getAsmInfo()->getNonexecutableStackSection(Ctx));
 }
@@ -112,7 +101,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *S) {
   MCObjectStreamer::EmitLabel(Symbol);
 
   const MCSectionELF &Section =
-    static_cast<const MCSectionELF&>(Symbol->getSection());
+      static_cast<const MCSectionELF &>(*getCurrentSectionOnly());
   if (Section.getFlags() & ELF::SHF_TLS)
     Symbol->setType(ELF::STT_TLS);
 }
@@ -134,7 +123,7 @@ void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
   llvm_unreachable("invalid assembler flag!");
 }
 
-// If bundle aligment is used and there are any instructions in the section, it
+// If bundle alignment is used and there are any instructions in the section, it
 // needs to be aligned to at least the bundle size.
 static void setSectionAlignmentForBundling(const MCAssembler &Assembler,
                                            MCSection *Section) {
@@ -312,13 +301,20 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
   Symbol->setType(ELF::STT_OBJECT);
 
   if (Symbol->getBinding() == ELF::STB_LOCAL) {
-    MCSection *Section = getAssembler().getContext().getELFSection(
+    MCSection &Section = *getAssembler().getContext().getELFSection(
         ".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    MCSectionSubPair P = getCurrentSection();
+    SwitchSection(&Section);
 
-    AssignSection(Symbol, Section);
+    EmitValueToAlignment(ByteAlignment, 0, 1, 0);
+    EmitLabel(Symbol);
+    EmitZeros(Size);
 
-    struct LocalCommon L = {Symbol, Size, ByteAlignment};
-    LocalCommons.push_back(L);
+    // Update the maximum alignment of the section if necessary.
+    if (ByteAlignment > Section.getAlignment())
+      Section.setAlignment(ByteAlignment);
+
+    SwitchSection(P.first, P.second);
   } else {
     if(Symbol->declareCommon(Size, ByteAlignment))
       report_fatal_error("Symbol: " + Symbol->getName() +
@@ -344,7 +340,7 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
 }
 
 void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                  const SMLoc &Loc) {
+                                  SMLoc Loc) {
   if (isBundleLocked())
     report_fatal_error("Emitting values inside a locked bundle is forbidden");
   fixSymbolsInTLSFixups(Value);
@@ -480,7 +476,6 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst,
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
   Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
-  VecOS.flush();
 
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
     fixSymbolsInTLSFixups(Fixups[i].getValue());
@@ -603,7 +598,7 @@ void MCELFStreamer::EmitBundleUnlock() {
     report_fatal_error("Empty bundle-locked group is forbidden");
 
   // When the -mc-relax-all flag is used, we emit instructions to fragments
-  // stored on a stack. When the bundle unlock is emited, we pop a fragment
+  // stored on a stack. When the bundle unlock is emitted, we pop a fragment
   // from the stack a merge it to the one below.
   if (getAssembler().getRelaxAll()) {
     assert(!BundleGroups.empty() && "There are no bundle groups");
@@ -625,29 +620,6 @@ void MCELFStreamer::EmitBundleUnlock() {
     Sec.setBundleLockState(MCSection::NotBundleLocked);
 }
 
-void MCELFStreamer::Flush() {
-  for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
-                                                e = LocalCommons.end();
-       i != e; ++i) {
-    const MCSymbol &Symbol = *i->Symbol;
-    uint64_t Size = i->Size;
-    unsigned ByteAlignment = i->ByteAlignment;
-    MCSection &Section = Symbol.getSection();
-
-    getAssembler().registerSection(Section);
-    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &Section);
-
-    MCFragment *F = new MCFillFragment(0, 0, Size, &Section);
-    Symbol.setFragment(F);
-
-    // Update the maximum alignment of the section if necessary.
-    if (ByteAlignment > Section.getAlignment())
-      Section.setAlignment(ByteAlignment);
-  }
-
-  LocalCommons.clear();
-}
-
 void MCELFStreamer::FinishImpl() {
   // Ensure the last section gets aligned if necessary.
   MCSection *CurSection = getCurrentSectionOnly();
@@ -655,8 +627,6 @@ void MCELFStreamer::FinishImpl() {
 
   EmitFrames(nullptr);
 
-  Flush();
-
   this->MCObjectStreamer::FinishImpl();
 }
 
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index a30ceecc952b..0f26b38c29d7 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -43,7 +43,7 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
     const MCSymbol &Sym = SRE.getSymbol();
     // Parenthesize names that start with $ so that they don't look like
     // absolute names.
-    bool UseParens = Sym.getName()[0] == '$';
+    bool UseParens = Sym.getName().size() && Sym.getName()[0] == '$';
     if (UseParens) {
       OS << '(';
       Sym.print(OS, MAI);
@@ -202,6 +202,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_SIZE: return "SIZE";
   case VK_WEAKREF: return "WEAKREF";
   case VK_ARM_NONE: return "none";
+  case VK_ARM_GOT_PREL: return "GOT_PREL";
   case VK_ARM_TARGET1: return "target1";
   case VK_ARM_TARGET2: return "target2";
   case VK_ARM_PREL31: return "prel31";
@@ -311,7 +312,6 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("got", VK_GOT)
     .Case("gotoff", VK_GOTOFF)
     .Case("gotpcrel", VK_GOTPCREL)
-    .Case("got_prel", VK_GOTPCREL)
     .Case("gottpoff", VK_GOTTPOFF)
     .Case("indntpoff", VK_INDNTPOFF)
     .Case("ntpoff", VK_NTPOFF)
@@ -382,7 +382,15 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO)
     .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI)
     .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA)
+    .Case("gdgot", VK_Hexagon_GD_GOT)
+    .Case("gdplt", VK_Hexagon_GD_PLT)
+    .Case("iegot", VK_Hexagon_IE_GOT)
+    .Case("ie", VK_Hexagon_IE)
+    .Case("ldgot", VK_Hexagon_LD_GOT)
+    .Case("ldplt", VK_Hexagon_LD_PLT)
+    .Case("pcrel", VK_Hexagon_PCREL)
     .Case("none", VK_ARM_NONE)
+    .Case("got_prel", VK_ARM_GOT_PREL)
     .Case("target1", VK_ARM_TARGET1)
     .Case("target2", VK_ARM_TARGET2)
     .Case("prel31", VK_ARM_PREL31)
@@ -477,7 +485,8 @@ static void AttemptToFoldSymbolOffsetDifference(
   if (!Asm->getWriter().isSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
     return;
 
-  if (SA.getFragment() == SB.getFragment()) {
+  if (SA.getFragment() == SB.getFragment() && !SA.isVariable() &&
+      !SB.isVariable()) {
     Addend += (SA.getOffset() - SB.getOffset());
 
     // Pointers to Thumb symbols need to have their low-bit set to allow
@@ -583,11 +592,6 @@ EvaluateSymbolicAdd(const MCAssembler *Asm, const MCAsmLayout *Layout,
   const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
   const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
 
-  // If we have a negated symbol, then we must have also have a non-negated
-  // symbol in order to encode the expression.
-  if (B && !A)
-    return false;
-
   Res = MCValue::get(A, B, Result_Cst);
   return true;
 }
@@ -606,7 +610,7 @@ bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
                                    true);
 }
 
-static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
+static bool canExpand(const MCSymbol &Sym, bool InSet) {
   const MCExpr *Expr = Sym.getVariableValue();
   const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
   if (Inner) {
@@ -616,9 +620,7 @@ static bool canExpand(const MCSymbol &Sym, const MCAssembler *Asm, bool InSet) {
 
   if (InSet)
     return true;
-  if (!Asm)
-    return false;
-  return !Asm->getWriter().isWeak(Sym);
+  return !Sym.isInSection();
 }
 
 bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
@@ -643,7 +645,7 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
 
     // Evaluate recursively if this is a variable.
     if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None &&
-        canExpand(Sym, Asm, InSet)) {
+        canExpand(Sym, InSet)) {
       bool IsMachO = SRE->hasSubsectionsViaSymbols();
       if (Sym.getVariableValue()->evaluateAsRelocatableImpl(
               Res, Asm, Layout, Fixup, Addrs, InSet || IsMachO)) {
@@ -739,7 +741,17 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
     case MCBinaryExpr::AShr: Result = LHS >> RHS; break;
     case MCBinaryExpr::Add:  Result = LHS + RHS; break;
     case MCBinaryExpr::And:  Result = LHS & RHS; break;
-    case MCBinaryExpr::Div:  Result = LHS / RHS; break;
+    case MCBinaryExpr::Div:
+      // Handle division by zero. gas just emits a warning and keeps going,
+      // we try to be stricter.
+      // FIXME: Currently the caller of this function has no way to understand
+      // we're bailing out because of 'division by zero'. Therefore, it will
+      // emit a 'expected relocatable expression' error. It would be nice to
+      // change this code to emit a better diagnostic.
+      if (RHS == 0)
+        return false;
+      Result = LHS / RHS;
+      break;
     case MCBinaryExpr::EQ:   Result = LHS == RHS; break;
     case MCBinaryExpr::GT:   Result = LHS > RHS; break;
     case MCBinaryExpr::GTE:  Result = LHS >= RHS; break;
@@ -765,45 +777,41 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
   llvm_unreachable("Invalid assembly expression kind!");
 }
 
-MCSection *MCExpr::findAssociatedSection() const {
+MCFragment *MCExpr::findAssociatedFragment() const {
   switch (getKind()) {
   case Target:
     // We never look through target specific expressions.
-    return cast<MCTargetExpr>(this)->findAssociatedSection();
+    return cast<MCTargetExpr>(this)->findAssociatedFragment();
 
   case Constant:
-    return MCSymbol::AbsolutePseudoSection;
+    return MCSymbol::AbsolutePseudoFragment;
 
   case SymbolRef: {
     const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
     const MCSymbol &Sym = SRE->getSymbol();
-
-    if (Sym.isDefined())
-      return &Sym.getSection();
-
-    return nullptr;
+    return Sym.getFragment();
   }
 
   case Unary:
-    return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedSection();
+    return cast<MCUnaryExpr>(this)->getSubExpr()->findAssociatedFragment();
 
   case Binary: {
     const MCBinaryExpr *BE = cast<MCBinaryExpr>(this);
-    MCSection *LHS_S = BE->getLHS()->findAssociatedSection();
-    MCSection *RHS_S = BE->getRHS()->findAssociatedSection();
+    MCFragment *LHS_F = BE->getLHS()->findAssociatedFragment();
+    MCFragment *RHS_F = BE->getRHS()->findAssociatedFragment();
 
-    // If either section is absolute, return the other.
-    if (LHS_S == MCSymbol::AbsolutePseudoSection)
-      return RHS_S;
-    if (RHS_S == MCSymbol::AbsolutePseudoSection)
-      return LHS_S;
+    // If either is absolute, return the other.
+    if (LHS_F == MCSymbol::AbsolutePseudoFragment)
+      return RHS_F;
+    if (RHS_F == MCSymbol::AbsolutePseudoFragment)
+      return LHS_F;
 
     // Not always correct, but probably the best we can do without more context.
     if (BE->getOpcode() == MCBinaryExpr::Sub)
-      return MCSymbol::AbsolutePseudoSection;
+      return MCSymbol::AbsolutePseudoFragment;
 
-    // Otherwise, return the first non-null section.
-    return LHS_S ? LHS_S : RHS_S;
+    // Otherwise, return the first non-null fragment.
+    return LHS_F ? LHS_F : RHS_F;
   }
   }
 
diff --git a/lib/MC/MCFragment.cpp b/lib/MC/MCFragment.cpp
new file mode 100644
index 000000000000..efdb7049203a
--- /dev/null
+++ b/lib/MC/MCFragment.cpp
@@ -0,0 +1,458 @@
+//===- lib/MC/MCFragment.cpp - Assembler Fragment Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCFragment.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <tuple>
+using namespace llvm;
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+  : Assembler(Asm), LastValidFragment()
+ {
+  // Compute the section layout order. Virtual sections must go last.
+  for (MCSection &Sec : Asm)
+    if (!Sec.isVirtualSection())
+      SectionOrder.push_back(&Sec);
+  for (MCSection &Sec : Asm)
+    if (Sec.isVirtualSection())
+      SectionOrder.push_back(&Sec);
+}
+
+bool MCAsmLayout::isFragmentValid(const MCFragment *F) const {
+  const MCSection *Sec = F->getParent();
+  const MCFragment *LastValid = LastValidFragment.lookup(Sec);
+  if (!LastValid)
+    return false;
+  assert(LastValid->getParent() == Sec);
+  return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::invalidateFragmentsFrom(MCFragment *F) {
+  // If this fragment wasn't already valid, we don't need to do anything.
+  if (!isFragmentValid(F))
+    return;
+
+  // Otherwise, reset the last valid fragment to the previous fragment
+  // (if this is the first fragment, it will be NULL).
+  LastValidFragment[F->getParent()] = F->getPrevNode();
+}
+
+void MCAsmLayout::ensureValid(const MCFragment *F) const {
+  MCSection *Sec = F->getParent();
+  MCSection::iterator I;
+  if (MCFragment *Cur = LastValidFragment[Sec])
+    I = ++MCSection::iterator(Cur);
+  else
+    I = Sec->begin();
+
+  // Advance the layout position until the fragment is valid.
+  while (!isFragmentValid(F)) {
+    assert(I != Sec->end() && "Layout bookkeeping error");
+    const_cast<MCAsmLayout *>(this)->layoutFragment(&*I);
+    ++I;
+  }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+  ensureValid(F);
+  assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+  return F->Offset;
+}
+
+// Simple getSymbolOffset helper for the non-varibale case.
+static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbol &S,
+                           bool ReportError, uint64_t &Val) {
+  if (!S.getFragment()) {
+    if (ReportError)
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         S.getName() + "'");
+    return false;
+  }
+  Val = Layout.getFragmentOffset(S.getFragment()) + S.getOffset();
+  return true;
+}
+
+static bool getSymbolOffsetImpl(const MCAsmLayout &Layout, const MCSymbol &S,
+                                bool ReportError, uint64_t &Val) {
+  if (!S.isVariable())
+    return getLabelOffset(Layout, S, ReportError, Val);
+
+  // If SD is a variable, evaluate it.
+  MCValue Target;
+  if (!S.getVariableValue()->evaluateAsValue(Target, Layout))
+    report_fatal_error("unable to evaluate offset for variable '" +
+                       S.getName() + "'");
+
+  uint64_t Offset = Target.getConstant();
+
+  const MCSymbolRefExpr *A = Target.getSymA();
+  if (A) {
+    uint64_t ValA;
+    if (!getLabelOffset(Layout, A->getSymbol(), ReportError, ValA))
+      return false;
+    Offset += ValA;
+  }
+
+  const MCSymbolRefExpr *B = Target.getSymB();
+  if (B) {
+    uint64_t ValB;
+    if (!getLabelOffset(Layout, B->getSymbol(), ReportError, ValB))
+      return false;
+    Offset -= ValB;
+  }
+
+  Val = Offset;
+  return true;
+}
+
+bool MCAsmLayout::getSymbolOffset(const MCSymbol &S, uint64_t &Val) const {
+  return getSymbolOffsetImpl(*this, S, false, Val);
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbol &S) const {
+  uint64_t Val;
+  getSymbolOffsetImpl(*this, S, true, Val);
+  return Val;
+}
+
+const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
+  if (!Symbol.isVariable())
+    return &Symbol;
+
+  const MCExpr *Expr = Symbol.getVariableValue();
+  MCValue Value;
+  if (!Expr->evaluateAsValue(Value, *this)) {
+    Assembler.getContext().reportError(
+        SMLoc(), "expression could not be evaluated");
+    return nullptr;
+  }
+
+  const MCSymbolRefExpr *RefB = Value.getSymB();
+  if (RefB) {
+    Assembler.getContext().reportError(
+        SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
+                     "' could not be evaluated in a subtraction expression");
+    return nullptr;
+  }
+
+  const MCSymbolRefExpr *A = Value.getSymA();
+  if (!A)
+    return nullptr;
+
+  const MCSymbol &ASym = A->getSymbol();
+  const MCAssembler &Asm = getAssembler();
+  if (ASym.isCommon()) {
+    // FIXME: we should probably add a SMLoc to MCExpr.
+    Asm.getContext().reportError(SMLoc(),
+                                 "Common symbol '" + ASym.getName() +
+                                     "' cannot be used in assignment expr");
+    return nullptr;
+  }
+
+  return &ASym;
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSection *Sec) const {
+  // The size is the last fragment's end offset.
+  const MCFragment &F = Sec->getFragmentList().back();
+  return getFragmentOffset(&F) + getAssembler().computeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSection *Sec) const {
+  // Virtual sections have no file size.
+  if (Sec->isVirtualSection())
+    return 0;
+
+  // Otherwise, the file size is the same as the address space size.
+  return getSectionAddressSize(Sec);
+}
+
+uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
+                                    const MCFragment *F,
+                                    uint64_t FOffset, uint64_t FSize) {
+  uint64_t BundleSize = Assembler.getBundleAlignSize();
+  assert(BundleSize > 0 &&
+         "computeBundlePadding should only be called if bundling is enabled");
+  uint64_t BundleMask = BundleSize - 1;
+  uint64_t OffsetInBundle = FOffset & BundleMask;
+  uint64_t EndOfFragment = OffsetInBundle + FSize;
+
+  // There are two kinds of bundling restrictions:
+  //
+  // 1) For alignToBundleEnd(), add padding to ensure that the fragment will
+  //    *end* on a bundle boundary.
+  // 2) Otherwise, check if the fragment would cross a bundle boundary. If it
+  //    would, add padding until the end of the bundle so that the fragment
+  //    will start in a new one.
+  if (F->alignToBundleEnd()) {
+    // Three possibilities here:
+    //
+    // A) The fragment just happens to end at a bundle boundary, so we're good.
+    // B) The fragment ends before the current bundle boundary: pad it just
+    //    enough to reach the boundary.
+    // C) The fragment ends after the current bundle boundary: pad it until it
+    //    reaches the end of the next bundle boundary.
+    //
+    // Note: this code could be made shorter with some modulo trickery, but it's
+    // intentionally kept in its more explicit form for simplicity.
+    if (EndOfFragment == BundleSize)
+      return 0;
+    else if (EndOfFragment < BundleSize)
+      return BundleSize - EndOfFragment;
+    else { // EndOfFragment > BundleSize
+      return 2 * BundleSize - EndOfFragment;
+    }
+  } else if (OffsetInBundle > 0 && EndOfFragment > BundleSize)
+    return BundleSize - OffsetInBundle;
+  else
+    return 0;
+}
+
+/* *** */
+
+void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
+  V->destroy();
+}
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
+                           AlignToBundleEnd(false), BundlePadding(0) {
+}
+
+MCFragment::~MCFragment() { }
+
+MCFragment::MCFragment(FragmentType Kind, bool HasInstructions,
+                       uint8_t BundlePadding, MCSection *Parent)
+    : Kind(Kind), HasInstructions(HasInstructions), AlignToBundleEnd(false),
+      BundlePadding(BundlePadding), Parent(Parent), Atom(nullptr),
+      Offset(~UINT64_C(0)) {
+  if (Parent && !isDummy())
+    Parent->getFragmentList().push_back(this);
+}
+
+void MCFragment::destroy() {
+  // First check if we are the sentinal.
+  if (Kind == FragmentType(~0)) {
+    delete this;
+    return;
+  }
+
+  switch (Kind) {
+    case FT_Align:
+      delete cast<MCAlignFragment>(this);
+      return;
+    case FT_Data:
+      delete cast<MCDataFragment>(this);
+      return;
+    case FT_CompactEncodedInst:
+      delete cast<MCCompactEncodedInstFragment>(this);
+      return;
+    case FT_Fill:
+      delete cast<MCFillFragment>(this);
+      return;
+    case FT_Relaxable:
+      delete cast<MCRelaxableFragment>(this);
+      return;
+    case FT_Org:
+      delete cast<MCOrgFragment>(this);
+      return;
+    case FT_Dwarf:
+      delete cast<MCDwarfLineAddrFragment>(this);
+      return;
+    case FT_DwarfFrame:
+      delete cast<MCDwarfCallFrameFragment>(this);
+      return;
+    case FT_LEB:
+      delete cast<MCLEBFragment>(this);
+      return;
+    case FT_SafeSEH:
+      delete cast<MCSafeSEHFragment>(this);
+      return;
+    case FT_Dummy:
+      delete cast<MCDummyFragment>(this);
+      return;
+  }
+}
+
+/* *** */
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+  OS << "<MCFixup" << " Offset:" << AF.getOffset()
+     << " Value:" << *AF.getValue()
+     << " Kind:" << AF.getKind() << ">";
+  return OS;
+}
+
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MCFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<";
+  switch (getKind()) {
+  case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+  case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_CompactEncodedInst:
+    OS << "MCCompactEncodedInstFragment"; break;
+  case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
+  case MCFragment::FT_Relaxable:  OS << "MCRelaxableFragment"; break;
+  case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
+  case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+  case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+  case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
+  case MCFragment::FT_SafeSEH:    OS << "MCSafeSEHFragment"; break;
+  case MCFragment::FT_Dummy:
+    OS << "MCDummyFragment";
+    break;
+  }
+
+  OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+     << " Offset:" << Offset
+     << " HasInstructions:" << hasInstructions() 
+     << " BundlePadding:" << static_cast<unsigned>(getBundlePadding()) << ">";
+
+  switch (getKind()) {
+  case MCFragment::FT_Align: {
+    const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+    if (AF->hasEmitNops())
+      OS << " (emit nops)";
+    OS << "\n       ";
+    OS << " Alignment:" << AF->getAlignment()
+       << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+       << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+    break;
+  }
+  case MCFragment::FT_Data:  {
+    const MCDataFragment *DF = cast<MCDataFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = DF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+
+    if (DF->fixup_begin() != DF->fixup_end()) {
+      OS << ",\n       ";
+      OS << " Fixups:[";
+      for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+             ie = DF->fixup_end(); it != ie; ++it) {
+        if (it != DF->fixup_begin()) OS << ",\n                ";
+        OS << *it;
+      }
+      OS << "]";
+    }
+    break;
+  }
+  case MCFragment::FT_CompactEncodedInst: {
+    const MCCompactEncodedInstFragment *CEIF =
+      cast<MCCompactEncodedInstFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = CEIF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+    break;
+  }
+  case MCFragment::FT_Fill:  {
+    const MCFillFragment *FF = cast<MCFillFragment>(this);
+    OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+       << " Size:" << FF->getSize();
+    break;
+  }
+  case MCFragment::FT_Relaxable:  {
+    const MCRelaxableFragment *F = cast<MCRelaxableFragment>(this);
+    OS << "\n       ";
+    OS << " Inst:";
+    F->getInst().dump_pretty(OS);
+    break;
+  }
+  case MCFragment::FT_Org:  {
+    const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+    OS << "\n       ";
+    OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+    break;
+  }
+  case MCFragment::FT_Dwarf:  {
+    const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << OF->getAddrDelta()
+       << " LineDelta:" << OF->getLineDelta();
+    break;
+  }
+  case MCFragment::FT_DwarfFrame:  {
+    const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << CF->getAddrDelta();
+    break;
+  }
+  case MCFragment::FT_LEB: {
+    const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+    OS << "\n       ";
+    OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+    break;
+  }
+  case MCFragment::FT_SafeSEH: {
+    const MCSafeSEHFragment *F = cast<MCSafeSEHFragment>(this);
+    OS << "\n       ";
+    OS << " Sym:" << F->getSymbol();
+    break;
+  }
+  case MCFragment::FT_Dummy:
+    break;
+  }
+  OS << ">";
+}
+
+void MCAssembler::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAssembler\n";
+  OS << "  Sections:[\n    ";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "],\n";
+  OS << "  Symbols:[";
+
+  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+    if (it != symbol_begin()) OS << ",\n           ";
+    OS << "(";
+    it->dump();
+    OS << ", Index:" << it->getIndex() << ", ";
+    OS << ")";
+  }
+  OS << "]>\n";
+}
+#endif
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 7ef69be66df6..5f829aeb339c 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -23,6 +23,8 @@ void MCOperand::print(raw_ostream &OS) const {
     OS << "Reg:" << getReg();
   else if (isImm())
     OS << "Imm:" << getImm();
+  else if (isFPImm())
+    OS << "FPImm:" << getFPImm();
   else if (isExpr()) {
     OS << "Expr:(" << *getExpr() << ")";
   } else if (isInst()) {
diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp
index 5be2fa1b30b6..ee55f3eff3ac 100644
--- a/lib/MC/MCInstrDesc.cpp
+++ b/lib/MC/MCInstrDesc.cpp
@@ -53,7 +53,7 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI,
 
 bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg,
                                           const MCRegisterInfo *MRI) const {
-  if (const uint16_t *ImpDefs = ImplicitDefs)
+  if (const MCPhysReg *ImpDefs = ImplicitDefs)
     for (; *ImpDefs; ++ImpDefs)
       if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs)))
         return true;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 53cd1317a3d7..21f7571eec4a 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -60,6 +60,7 @@ public:
 
   /// state management
   void reset() override {
+    CreatedADWARFSection = false;
     HasSectionLabel.clear();
     MCObjectStreamer::reset();
   }
@@ -180,8 +181,6 @@ void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
 void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
-  // isSymbolLinkerVisible uses the section.
-  AssignSection(Symbol, getCurrentSection().first);
   // We have to create a new fragment if this is an atom defining symbol,
   // fragments cannot span atoms.
   if (getAssembler().isSymbolLinkerVisible(*Symbol))
@@ -384,8 +383,6 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
   assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
 
-  AssignSection(Symbol, nullptr);
-
   getAssembler().registerSymbol(*Symbol);
   Symbol->setExternal(true);
   Symbol->setCommon(Size, ByteAlignment);
@@ -417,8 +414,6 @@ void MCMachOStreamer::EmitZerofill(MCSection *Section, MCSymbol *Symbol,
   if (ByteAlignment != 1)
     new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, Section);
 
-  AssignSection(Symbol, Section);
-
   MCFragment *F = new MCFillFragment(0, 0, Size, Section);
   Symbol->setFragment(F);
 
@@ -443,12 +438,11 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst,
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
   getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
-  VecOS.flush();
 
   // Add the fixups and data.
-  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
-    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
-    DF->getFixups().push_back(Fixups[i]);
+  for (MCFixup &Fixup : Fixups) {
+    Fixup.setOffset(Fixup.getOffset() + DF->getContents().size());
+    DF->getFixups().push_back(Fixup);
   }
   DF->getContents().append(Code.begin(), Code.end());
 }
@@ -463,7 +457,8 @@ void MCMachOStreamer::FinishImpl() {
   // defining symbols.
   DenseMap<const MCFragment *, const MCSymbol *> DefiningSymbolMap;
   for (const MCSymbol &Symbol : getAssembler().symbols()) {
-    if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.getFragment()) {
+    if (getAssembler().isSymbolLinkerVisible(Symbol) && Symbol.isInSection() &&
+        !Symbol.isVariable()) {
       // An atom defining symbol should never be internal to a fragment.
       assert(Symbol.getOffset() == 0 &&
              "Invalid offset in atom defining symbol!");
@@ -473,14 +468,12 @@ void MCMachOStreamer::FinishImpl() {
 
   // Set the fragment atom associations by tracking the last seen atom defining
   // symbol.
-  for (MCAssembler::iterator it = getAssembler().begin(),
-         ie = getAssembler().end(); it != ie; ++it) {
+  for (MCSection &Sec : getAssembler()) {
     const MCSymbol *CurrentAtom = nullptr;
-    for (MCSection::iterator it2 = it->begin(), ie2 = it->end(); it2 != ie2;
-         ++it2) {
-      if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(it2))
+    for (MCFragment &Frag : Sec) {
+      if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag))
         CurrentAtom = Symbol;
-      it2->setAtom(CurrentAtom);
+      Frag.setAtom(CurrentAtom);
     }
   }
 
@@ -493,6 +486,26 @@ MCStreamer *llvm::createMachOStreamer(MCContext &Context, MCAsmBackend &MAB,
                                       bool LabelSections) {
   MCMachOStreamer *S = new MCMachOStreamer(Context, MAB, OS, CE,
                                            DWARFMustBeAtTheEnd, LabelSections);
+  const Triple &TT = Context.getObjectFileInfo()->getTargetTriple();
+  if (TT.isOSDarwin()) {
+    unsigned Major, Minor, Update;
+    TT.getOSVersion(Major, Minor, Update);
+    // If there is a version specified, Major will be non-zero.
+    if (Major) {
+      MCVersionMinType VersionType;
+      if (TT.isWatchOS())
+        VersionType = MCVM_WatchOSVersionMin;
+      else if (TT.isTvOS())
+        VersionType = MCVM_TvOSVersionMin;
+      else if (TT.isMacOSX())
+        VersionType = MCVM_OSXVersionMin;
+      else {
+        assert(TT.isiOS() && "Must only be iOS platform left");
+        VersionType = MCVM_IOSVersionMin;
+      }
+      S->EmitVersionMin(VersionType, Major, Minor, Update);
+    }
+  }
   if (RelaxAll)
     S->getAssembler().setRelaxAll(true);
   return S;
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 576827a72d56..028f2e955b21 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -16,6 +16,8 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Support/COFF.h"
+
 using namespace llvm;
 
 static bool useCompactUnwind(const Triple &T) {
@@ -27,6 +29,10 @@ static bool useCompactUnwind(const Triple &T) {
   if (T.getArch() == Triple::aarch64)
     return true;
 
+  // armv7k always has it.
+  if (T.isWatchOS())
+    return true;
+
   // Use it on newer version of OS X.
   if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
     return true;
@@ -43,9 +49,18 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
   // MachO
   SupportsWeakOmittedEHFrame = false;
 
+  EHFrameSection = Ctx->getMachOSection(
+      "__TEXT", "__eh_frame",
+      MachO::S_COALESCED | MachO::S_ATTR_NO_TOC |
+          MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
+      SectionKind::getReadOnly());
+
   if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
     SupportsCompactUnwindWithoutEHFrame = true;
 
+  if (T.isWatchOS())
+    OmitDwarfIfHaveCompactUnwind = true;
+
   PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
     | dwarf::DW_EH_PE_sdata4;
   LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
@@ -61,16 +76,15 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
                            MachO::S_ATTR_PURE_INSTRUCTIONS,
                            SectionKind::getText());
   DataSection // .data
-    = Ctx->getMachOSection("__DATA", "__data", 0,
-                           SectionKind::getDataRel());
+      = Ctx->getMachOSection("__DATA", "__data", 0, SectionKind::getData());
 
   // BSSSection might not be expected initialized on msvc.
   BSSSection = nullptr;
 
   TLSDataSection // .tdata
-    = Ctx->getMachOSection("__DATA", "__thread_data",
-                           MachO::S_THREAD_LOCAL_REGULAR,
-                           SectionKind::getDataRel());
+      = Ctx->getMachOSection("__DATA", "__thread_data",
+                             MachO::S_THREAD_LOCAL_REGULAR,
+                             SectionKind::getData());
   TLSBSSSection // .tbss
     = Ctx->getMachOSection("__DATA", "__thread_bss",
                            MachO::S_THREAD_LOCAL_ZEROFILL,
@@ -78,14 +92,13 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
 
   // TODO: Verify datarel below.
   TLSTLVSection // .tlv
-    = Ctx->getMachOSection("__DATA", "__thread_vars",
-                           MachO::S_THREAD_LOCAL_VARIABLES,
-                           SectionKind::getDataRel());
+      = Ctx->getMachOSection("__DATA", "__thread_vars",
+                             MachO::S_THREAD_LOCAL_VARIABLES,
+                             SectionKind::getData());
 
-  TLSThreadInitSection
-    = Ctx->getMachOSection("__DATA", "__thread_init",
-                          MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
-                          SectionKind::getDataRel());
+  TLSThreadInitSection = Ctx->getMachOSection(
+      "__DATA", "__thread_init", MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+      SectionKind::getData());
 
   CStringSection // .cstring
     = Ctx->getMachOSection("__TEXT", "__cstring",
@@ -112,22 +125,35 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
     = Ctx->getMachOSection("__TEXT", "__const", 0,
                            SectionKind::getReadOnly());
 
-  TextCoalSection
-    = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
-                           MachO::S_COALESCED |
-                           MachO::S_ATTR_PURE_INSTRUCTIONS,
-                           SectionKind::getText());
-  ConstTextCoalSection
-    = Ctx->getMachOSection("__TEXT", "__const_coal",
-                           MachO::S_COALESCED,
-                           SectionKind::getReadOnly());
+  // If the target is not powerpc, map the coal sections to the non-coal
+  // sections.
+  //
+  // "__TEXT/__textcoal_nt" => section "__TEXT/__text"
+  // "__TEXT/__const_coal"  => section "__TEXT/__const"
+  // "__DATA/__datacoal_nt" => section "__DATA/__data"
+  Triple::ArchType ArchTy = T.getArch();
+
+  if (ArchTy == Triple::ppc || ArchTy == Triple::ppc64) {
+    TextCoalSection
+      = Ctx->getMachOSection("__TEXT", "__textcoal_nt",
+                             MachO::S_COALESCED |
+                             MachO::S_ATTR_PURE_INSTRUCTIONS,
+                             SectionKind::getText());
+    ConstTextCoalSection
+      = Ctx->getMachOSection("__TEXT", "__const_coal",
+                             MachO::S_COALESCED,
+                             SectionKind::getReadOnly());
+    DataCoalSection = Ctx->getMachOSection(
+        "__DATA", "__datacoal_nt", MachO::S_COALESCED, SectionKind::getData());
+  } else {
+    TextCoalSection = TextSection;
+    ConstTextCoalSection = ReadOnlySection;
+    DataCoalSection = DataSection;
+  }
+
   ConstDataSection  // .const_data
     = Ctx->getMachOSection("__DATA", "__const", 0,
                            SectionKind::getReadOnlyWithRel());
-  DataCoalSection
-    = Ctx->getMachOSection("__DATA","__datacoal_nt",
-                           MachO::S_COALESCED,
-                           SectionKind::getDataRel());
   DataCommonSection
     = Ctx->getMachOSection("__DATA","__common",
                            MachO::S_ZEROFILL,
@@ -147,21 +173,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
                            SectionKind::getMetadata());
 
   if (RelocM == Reloc::Static) {
-    StaticCtorSection
-      = Ctx->getMachOSection("__TEXT", "__constructor", 0,
-                             SectionKind::getDataRel());
-    StaticDtorSection
-      = Ctx->getMachOSection("__TEXT", "__destructor", 0,
-                             SectionKind::getDataRel());
+    StaticCtorSection = Ctx->getMachOSection("__TEXT", "__constructor", 0,
+                                             SectionKind::getData());
+    StaticDtorSection = Ctx->getMachOSection("__TEXT", "__destructor", 0,
+                                             SectionKind::getData());
   } else {
-    StaticCtorSection
-      = Ctx->getMachOSection("__DATA", "__mod_init_func",
-                             MachO::S_MOD_INIT_FUNC_POINTERS,
-                             SectionKind::getDataRel());
-    StaticDtorSection
-      = Ctx->getMachOSection("__DATA", "__mod_term_func",
-                             MachO::S_MOD_TERM_FUNC_POINTERS,
-                             SectionKind::getDataRel());
+    StaticCtorSection = Ctx->getMachOSection("__DATA", "__mod_init_func",
+                                             MachO::S_MOD_INIT_FUNC_POINTERS,
+                                             SectionKind::getData());
+    StaticDtorSection = Ctx->getMachOSection("__DATA", "__mod_term_func",
+                                             MachO::S_MOD_TERM_FUNC_POINTERS,
+                                             SectionKind::getData());
   }
 
   // Exception Handling.
@@ -176,9 +198,11 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
                              SectionKind::getReadOnly());
 
     if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
-      CompactUnwindDwarfEHFrameOnly = 0x04000000;
+      CompactUnwindDwarfEHFrameOnly = 0x04000000;  // UNWIND_X86_64_MODE_DWARF
     else if (T.getArch() == Triple::aarch64)
-      CompactUnwindDwarfEHFrameOnly = 0x03000000;
+      CompactUnwindDwarfEHFrameOnly = 0x03000000;  // UNWIND_ARM64_MODE_DWARF
+    else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
+      CompactUnwindDwarfEHFrameOnly = 0x04000000;  // UNWIND_ARM_MODE_DWARF
   }
 
   // Debug Information.
@@ -235,6 +259,12 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
   DwarfDebugInlineSection =
       Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
                            SectionKind::getMetadata());
+  DwarfCUIndexSection =
+      Ctx->getMachOSection("__DWARF", "__debug_cu_index", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
+  DwarfTUIndexSection =
+      Ctx->getMachOSection("__DWARF", "__debug_tu_index", MachO::S_ATTR_DEBUG,
+                           SectionKind::getMetadata());
   StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps",
                                          0, SectionKind::getMetadata());
 
@@ -258,7 +288,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
                      ((CMModel == CodeModel::Large) ? dwarf::DW_EH_PE_sdata8
                                                     : dwarf::DW_EH_PE_sdata4);
-
     break;
   default:
     FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -391,17 +420,15 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
     break;
   }
 
+  unsigned EHSectionType = T.getArch() == Triple::x86_64
+                               ? ELF::SHT_X86_64_UNWIND
+                               : ELF::SHT_PROGBITS;
+
   // Solaris requires different flags for .eh_frame to seemingly every other
   // platform.
-  EHSectionType = ELF::SHT_PROGBITS;
-  EHSectionFlags = ELF::SHF_ALLOC;
-  if (T.isOSSolaris()) {
-    if (T.getArch() == Triple::x86_64)
-      EHSectionType = ELF::SHT_X86_64_UNWIND;
-    else
-      EHSectionFlags |= ELF::SHF_WRITE;
-  }
-
+  unsigned EHSectionFlags = ELF::SHF_ALLOC;
+  if (T.isOSSolaris() && T.getArch() != Triple::x86_64)
+    EHSectionFlags |= ELF::SHF_WRITE;
 
   // ELF
   BSSSection = Ctx->getELFSection(".bss", ELF::SHT_NOBITS,
@@ -423,18 +450,9 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
   TLSBSSSection = Ctx->getELFSection(
       ".tbss", ELF::SHT_NOBITS, ELF::SHF_ALLOC | ELF::SHF_TLS | ELF::SHF_WRITE);
 
-  DataRelSection = Ctx->getELFSection(".data.rel", ELF::SHT_PROGBITS,
-                                      ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
-  DataRelLocalSection = Ctx->getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
-                                           ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
   DataRelROSection = Ctx->getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
                                         ELF::SHF_ALLOC | ELF::SHF_WRITE);
 
-  DataRelROLocalSection = Ctx->getELFSection(
-      ".data.rel.ro.local", ELF::SHT_PROGBITS, ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
   MergeableConst4Section =
       Ctx->getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
                          ELF::SHF_ALLOC | ELF::SHF_MERGE, 4, "");
@@ -519,14 +537,28 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
   DwarfAddrSection =
       Ctx->getELFSection(".debug_addr", ELF::SHT_PROGBITS, 0, "addr_sec");
 
+  // DWP Sections
+  DwarfCUIndexSection =
+      Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0);
+  DwarfTUIndexSection =
+      Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0);
+
   StackMapSection =
       Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
 
   FaultMapSection =
       Ctx->getELFSection(".llvm_faultmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+
+  EHFrameSection =
+      Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
 }
 
 void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
+  EHFrameSection = Ctx->getCOFFSection(
+      ".eh_frame", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                       COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+      SectionKind::getData());
+
   bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb;
 
   CommDirectiveSupportsAlignment = true;
@@ -545,7 +577,7 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
   DataSection = Ctx->getCOFFSection(
       ".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
                    COFF::IMAGE_SCN_MEM_WRITE,
-      SectionKind::getDataRel());
+      SectionKind::getData());
   ReadOnlySection = Ctx->getCOFFSection(
       ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getReadOnly());
@@ -563,21 +595,20 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
     StaticCtorSection = Ctx->getCOFFSection(
         ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                       COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
-        SectionKind::getDataRel());
+        SectionKind::getData());
     StaticDtorSection = Ctx->getCOFFSection(
         ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                       COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
-        SectionKind::getDataRel());
+        SectionKind::getData());
   }
 
   // FIXME: We're emitting LSDA info into a readonly section on COFF, even
   // though it contains relocatable pointers.  In PIC mode, this is probably a
   // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
-  assert(T.isOSWindows() && "Windows is the only supported COFF target");
   if (T.getArch() == Triple::x86_64) {
     // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
-    LSDASection = 0;
+    LSDASection = nullptr;
   } else {
     LSDASection = Ctx->getCOFFSection(".gcc_except_table",
                                       COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -693,6 +724,16 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
           COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getMetadata(), "addr_sec");
+  DwarfCUIndexSection = Ctx->getCOFFSection(
+      ".debug_cu_index",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
+  DwarfTUIndexSection = Ctx->getCOFFSection(
+      ".debug_tu_index",
+      COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+          COFF::IMAGE_SCN_MEM_READ,
+      SectionKind::getMetadata());
   DwarfAccelNamesSection = Ctx->getCOFFSection(
       ".apple_names",
       COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
@@ -720,11 +761,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
 
   PDataSection = Ctx->getCOFFSection(
       ".pdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
-      SectionKind::getDataRel());
+      SectionKind::getData());
 
   XDataSection = Ctx->getCOFFSection(
       ".xdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
-      SectionKind::getDataRel());
+      SectionKind::getData());
 
   SXDataSection = Ctx->getCOFFSection(".sxdata", COFF::IMAGE_SCN_LNK_INFO,
                                       SectionKind::getMetadata());
@@ -732,12 +773,12 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
   TLSDataSection = Ctx->getCOFFSection(
       ".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ |
                    COFF::IMAGE_SCN_MEM_WRITE,
-      SectionKind::getDataRel());
-	  
+      SectionKind::getData());
+
   StackMapSection = Ctx->getCOFFSection(".llvm_stackmaps",
                                         COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                                             COFF::IMAGE_SCN_MEM_READ,
-                                        SectionKind::getReadOnly());	
+                                        SectionKind::getReadOnly());
 }
 
 void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
@@ -752,6 +793,7 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
   CommDirectiveSupportsAlignment = true;
   SupportsWeakOmittedEHFrame = true;
   SupportsCompactUnwindWithoutEHFrame = false;
+  OmitDwarfIfHaveCompactUnwind = false;
 
   PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
       dwarf::DW_EH_PE_absptr;
@@ -767,25 +809,26 @@ void MCObjectFileInfo::InitMCObjectFileInfo(const Triple &TheTriple,
 
   TT = TheTriple;
 
-  Triple::ArchType Arch = TT.getArch();
-  // FIXME: Checking for Arch here to filter out bogus triples such as
-  // cellspu-apple-darwin. Perhaps we should fix in Triple?
-  if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
-       Arch == Triple::arm || Arch == Triple::thumb ||
-       Arch == Triple::aarch64 ||
-       Arch == Triple::ppc || Arch == Triple::ppc64 ||
-       Arch == Triple::UnknownArch) &&
-      TT.isOSBinFormatMachO()) {
+  switch (TT.getObjectFormat()) {
+  case Triple::MachO:
     Env = IsMachO;
     initMachOMCObjectFileInfo(TT);
-  } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
-              Arch == Triple::arm || Arch == Triple::thumb) &&
-             (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) {
+    break;
+  case Triple::COFF:
+    if (!TT.isOSWindows())
+      report_fatal_error(
+          "Cannot initialize MC for non-Windows COFF object files.");
+
     Env = IsCOFF;
     initCOFFMCObjectFileInfo(TT);
-  } else {
+    break;
+  case Triple::ELF:
     Env = IsELF;
     initELFMCObjectFileInfo(TT);
+    break;
+  case Triple::UnknownObjectFormat:
+    report_fatal_error("Cannot initialize MC for unknown object file format.");
+    break;
   }
 }
 
@@ -799,24 +842,3 @@ MCSection *MCObjectFileInfo::getDwarfTypesSection(uint64_t Hash) const {
   return Ctx->getELFSection(".debug_types", ELF::SHT_PROGBITS, ELF::SHF_GROUP,
                             0, utostr(Hash));
 }
-
-void MCObjectFileInfo::InitEHFrameSection() {
-  if (Env == IsMachO)
-    EHFrameSection =
-      Ctx->getMachOSection("__TEXT", "__eh_frame",
-                           MachO::S_COALESCED |
-                           MachO::S_ATTR_NO_TOC |
-                           MachO::S_ATTR_STRIP_STATIC_SYMS |
-                           MachO::S_ATTR_LIVE_SUPPORT,
-                           SectionKind::getReadOnly());
-  else if (Env == IsELF)
-    EHFrameSection =
-        Ctx->getELFSection(".eh_frame", EHSectionType, EHSectionFlags);
-  else
-    EHFrameSection =
-      Ctx->getCOFFSection(".eh_frame",
-                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                          COFF::IMAGE_SCN_MEM_READ |
-                          COFF::IMAGE_SCN_MEM_WRITE,
-                          SectionKind::getDataRel());
-}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 0a637775d4ee..d0a7dafa15b8 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -28,7 +28,7 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
                                    MCCodeEmitter *Emitter_)
     : MCStreamer(Context),
       Assembler(new MCAssembler(Context, TAB, *Emitter_,
-                                *TAB.createObjectWriter(OS), OS)),
+                                *TAB.createObjectWriter(OS))),
       EmitEHFrame(true), EmitDebugFrame(false) {}
 
 MCObjectStreamer::~MCObjectStreamer() {
@@ -39,26 +39,27 @@ MCObjectStreamer::~MCObjectStreamer() {
 }
 
 void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
-  if (PendingLabels.size()) {
-    if (!F) {
-      F = new MCDataFragment();
-      MCSection *CurSection = getCurrentSectionOnly();
-      CurSection->getFragmentList().insert(CurInsertionPoint, F);
-      F->setParent(CurSection);
-    }
-    for (MCSymbol *Sym : PendingLabels) {
-      Sym->setFragment(F);
-      Sym->setOffset(FOffset);
-    }
-    PendingLabels.clear();
+  if (PendingLabels.empty())
+    return;
+  if (!F) {
+    F = new MCDataFragment();
+    MCSection *CurSection = getCurrentSectionOnly();
+    CurSection->getFragmentList().insert(CurInsertionPoint, F);
+    F->setParent(CurSection);
   }
+  for (MCSymbol *Sym : PendingLabels) {
+    Sym->setFragment(F);
+    Sym->setOffset(FOffset);
+  }
+  PendingLabels.clear();
 }
 
 void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
                                               const MCSymbol *Lo,
                                               unsigned Size) {
   // If not assigned to the same (valid) fragment, fallback.
-  if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment()) {
+  if (!Hi->getFragment() || Hi->getFragment() != Lo->getFragment() ||
+      Hi->isVariable() || Lo->isVariable()) {
     MCStreamer::emitAbsoluteSymbolDiff(Hi, Lo, Size);
     return;
   }
@@ -93,7 +94,7 @@ MCFragment *MCObjectStreamer::getCurrentFragment() const {
   assert(getCurrentSectionOnly() && "No current section!");
 
   if (CurInsertionPoint != getCurrentSectionOnly()->getFragmentList().begin())
-    return std::prev(CurInsertionPoint);
+    return &*std::prev(CurInsertionPoint);
 
   return nullptr;
 }
@@ -121,7 +122,7 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
 }
 
 void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                     const SMLoc &Loc) {
+                                     SMLoc Loc) {
   MCStreamer::EmitValueImpl(Value, Size, Loc);
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
@@ -155,7 +156,6 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
   MCStreamer::EmitLabel(Symbol);
 
   getAssembler().registerSymbol(*Symbol);
-  assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
 
   // If there is a current fragment, mark the symbol as pointing into it.
   // Otherwise queue the label and set its fragment pointer when we emit the
@@ -276,7 +276,6 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst,
   raw_svector_ostream VecOS(Code);
   getAssembler().getEmitter().encodeInstruction(Inst, VecOS, IF->getFixups(),
                                                 STI);
-  VecOS.flush();
   IF->getContents().append(Code.begin(), Code.end());
 }
 
@@ -321,8 +320,10 @@ static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A,
   return AddrDelta;
 }
 
-static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta,
-                                 const MCSymbol *Label, int PointerSize) {
+static void emitDwarfSetLineAddr(MCObjectStreamer &OS,
+                                 MCDwarfLineTableParams Params,
+                                 int64_t LineDelta, const MCSymbol *Label,
+                                 int PointerSize) {
   // emit the sequence to set the address
   OS.EmitIntValue(dwarf::DW_LNS_extended_op, 1);
   OS.EmitULEB128IntValue(PointerSize + 1);
@@ -330,7 +331,7 @@ static void emitDwarfSetLineAddr(MCObjectStreamer &OS, int64_t LineDelta,
   OS.EmitSymbolValue(Label, PointerSize);
 
   // emit the sequence for the LineDelta (from 1) and a zero address delta.
-  MCDwarfLineAddr::Emit(&OS, LineDelta, 0);
+  MCDwarfLineAddr::Emit(&OS, Params, LineDelta, 0);
 }
 
 void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
@@ -338,13 +339,15 @@ void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                                 const MCSymbol *Label,
                                                 unsigned PointerSize) {
   if (!LastLabel) {
-    emitDwarfSetLineAddr(*this, LineDelta, Label, PointerSize);
+    emitDwarfSetLineAddr(*this, Assembler->getDWARFLinetableParams(), LineDelta,
+                         Label, PointerSize);
     return;
   }
   const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
   int64_t Res;
   if (AddrDelta->evaluateAsAbsolute(Res, getAssembler())) {
-    MCDwarfLineAddr::Emit(this, LineDelta, Res);
+    MCDwarfLineAddr::Emit(this, Assembler->getDWARFLinetableParams(), LineDelta,
+                          Res);
     return;
   }
   insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
@@ -388,26 +391,9 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
   cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true);
 }
 
-bool MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
                                          unsigned char Value) {
-  int64_t Res;
-  if (Offset->evaluateAsAbsolute(Res, getAssembler())) {
-    insert(new MCOrgFragment(*Offset, Value));
-    return false;
-  }
-
-  MCSymbol *CurrentPos = getContext().createTempSymbol();
-  EmitLabel(CurrentPos);
-  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
-  const MCExpr *Ref =
-    MCSymbolRefExpr::create(CurrentPos, Variant, getContext());
-  const MCExpr *Delta =
-    MCBinaryExpr::create(MCBinaryExpr::Sub, Offset, Ref, getContext());
-
-  if (!Delta->evaluateAsAbsolute(Res, getAssembler()))
-    return true;
-  EmitFill(Res, Value);
-  return false;
+  insert(new MCOrgFragment(*Offset, Value));
 }
 
 // Associate GPRel32 fixup with data and resize data area
@@ -430,19 +416,31 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
   DF->getContents().resize(DF->getContents().size() + 8, 0);
 }
 
-void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
-  // FIXME: A MCFillFragment would be more memory efficient but MCExpr has
-  //        problems evaluating expressions across multiple fragments.
+bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                          const MCExpr *Expr, SMLoc Loc) {
+  int64_t OffsetValue;
+  if (!Offset.evaluateAsAbsolute(OffsetValue))
+    llvm_unreachable("Offset is not absolute");
+
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
-  DF->getContents().append(NumBytes, FillValue);
+
+  MCFixupKind Kind;
+  if (!Assembler->getBackend().getFixupKind(Name, Kind))
+    return true;
+
+  if (Expr == nullptr)
+    Expr =
+        MCSymbolRefExpr::create(getContext().createTempSymbol(), getContext());
+  DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
+  return false;
 }
 
-void MCObjectStreamer::EmitZeros(uint64_t NumBytes) {
+void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) {
   const MCSection *Sec = getCurrentSection().first;
   assert(Sec && "need a section");
   unsigned ItemSize = Sec->isVirtualSection() ? 0 : 1;
-  insert(new MCFillFragment(0, ItemSize, NumBytes));
+  insert(new MCFillFragment(FillValue, ItemSize, NumBytes));
 }
 
 void MCObjectStreamer::FinishImpl() {
@@ -451,7 +449,7 @@ void MCObjectStreamer::FinishImpl() {
     MCGenDwarfInfo::Emit(this);
 
   // Dump out the dwarf file & directory tables and line tables.
-  MCDwarfLineTable::Emit(this);
+  MCDwarfLineTable::Emit(this, getAssembler().getDWARFLinetableParams());
 
   flushPendingLabels(nullptr);
   getAssembler().Finish();
diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp
index 347903408737..e84f74ae81d6 100644
--- a/lib/MC/MCObjectWriter.cpp
+++ b/lib/MC/MCObjectWriter.cpp
@@ -33,8 +33,14 @@ bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
   if (!SA.getFragment() || !SB.getFragment())
     return false;
 
-  return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, *SB.getFragment(),
-                                                InSet, false);
+  return isSymbolRefDifferenceFullyResolvedImpl(Asm, SA, SB, InSet);
+}
+
+bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
+    bool InSet) const {
+  return isSymbolRefDifferenceFullyResolvedImpl(Asm, A, *B.getFragment(), InSet,
+                                                false);
 }
 
 bool MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index b983d9995f4d..36c192026856 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -436,7 +436,8 @@ StringRef AsmLexer::LexUntilEndOfLine() {
   return StringRef(TokStart, CurPtr-TokStart);
 }
 
-const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
+size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
+                            bool ShouldSkipSpace) {
   const char *SavedTokStart = TokStart;
   const char *SavedCurPtr = CurPtr;
   bool SavedAtStartOfLine = isAtStartOfLine;
@@ -446,7 +447,16 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
   SMLoc SavedErrLoc = getErrLoc();
 
   SkipSpace = ShouldSkipSpace;
-  AsmToken Token = LexToken();
+
+  size_t ReadCount;
+  for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
+    AsmToken Token = LexToken();
+
+    Buf[ReadCount] = Token;
+
+    if (Token.is(AsmToken::Eof))
+      break;
+  }
 
   SetError(SavedErrLoc, SavedErr);
 
@@ -455,7 +465,7 @@ const AsmToken AsmLexer::peekTok(bool ShouldSkipSpace) {
   CurPtr = SavedCurPtr;
   TokStart = SavedTokStart;
 
-  return Token;
+  return ReadCount;
 }
 
 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 04d141389c92..646cbb43cae8 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -33,6 +33,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -251,14 +252,14 @@ private:
   bool parseStatement(ParseStatementInfo &Info,
                       MCAsmParserSemaCallback *SI);
   void eatToEndOfLine();
-  bool parseCppHashLineFilenameComment(const SMLoc &L);
+  bool parseCppHashLineFilenameComment(SMLoc L);
 
   void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body,
                         ArrayRef<MCAsmMacroParameter> Parameters);
   bool expandMacro(raw_svector_ostream &OS, StringRef Body,
                    ArrayRef<MCAsmMacroParameter> Parameters,
                    ArrayRef<MCAsmMacroArgument> A, bool EnableAtPseudoVariable,
-                   const SMLoc &L);
+                   SMLoc L);
 
   /// \brief Are macros enabled in the parser?
   bool areMacrosEnabled() {return MacrosEnabledFlag;}
@@ -342,6 +343,7 @@ private:
   enum DirectiveKind {
     DK_NO_DIRECTIVE, // Placeholder
     DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
+    DK_RELOC,
     DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_OCTA,
     DK_SINGLE, DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
     DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
@@ -374,6 +376,7 @@ private:
 
   // ".ascii", ".asciz", ".string"
   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+  bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc"
   bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ...
   bool parseDirectiveOctaValue(); // ".octa"
   bool parseDirectiveRealValue(const fltSemantics &); // ".single", ...
@@ -553,6 +556,8 @@ void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
 }
 
 bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+  if(getTargetParser().getTargetOptions().MCNoWarn)
+    return false;
   if (getTargetParser().getTargetOptions().MCFatalWarnings)
     return Error(L, Msg, Ranges);
   printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
@@ -679,11 +684,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // so conservatively exclude them. Only do this if we're finalizing, though,
   // as otherwise we won't necessarilly have seen everything yet.
   if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
-    const MCContext::SymbolTable &Symbols = getContext().getSymbols();
-    for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
-                                                e = Symbols.end();
-         i != e; ++i) {
-      MCSymbol *Sym = i->getValue();
+    for (const auto &TableEntry : getContext().getSymbols()) {
+      MCSymbol *Sym = TableEntry.getValue();
       // Variable symbols may not be marked as defined, so check those
       // explicitly. If we know it's a variable, we have a definition for
       // the purposes of this check.
@@ -691,9 +693,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
         // FIXME: We would really like to refer back to where the symbol was
         // first referenced for a source location. We need to add something
         // to track that. Currently, we just point to the end of the file.
-        printMessage(
-            getLexer().getLoc(), SourceMgr::DK_Error,
-            "assembler local symbol '" + Sym->getName() + "' not defined");
+        return Error(getLexer().getLoc(), "assembler local symbol '" +
+                                              Sym->getName() + "' not defined");
     }
   }
 
@@ -702,7 +703,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   if (!HadError && !NoFinalize)
     Out.Finish();
 
-  return HadError;
+  return HadError || getContext().hadError();
 }
 
 void AsmParser::checkForValidSection() {
@@ -865,11 +866,12 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 
     // If this is an absolute variable reference, substitute it now to preserve
     // semantics in the face of reassignment.
-    if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+    if (Sym->isVariable() &&
+        isa<MCConstantExpr>(Sym->getVariableValue(/*SetUsed*/ false))) {
       if (Variant)
         return Error(EndLoc, "unexpected modifier on variable reference");
 
-      Res = Sym->getVariableValue();
+      Res = Sym->getVariableValue(/*SetUsed*/ false);
       return false;
     }
 
@@ -1102,8 +1104,9 @@ bool AsmParser::parseAbsoluteExpression(int64_t &Res) {
   return false;
 }
 
-unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
-                                       MCBinaryExpr::Opcode &Kind) {
+static unsigned getDarwinBinOpPrecedence(AsmToken::TokenKind K,
+                                         MCBinaryExpr::Opcode &Kind,
+                                         bool ShouldUseLogicalShr) {
   switch (K) {
   default:
     return 0; // not a binop.
@@ -1155,7 +1158,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
     Kind = MCBinaryExpr::Shl;
     return 4;
   case AsmToken::GreaterGreater:
-    Kind = MAI.shouldUseLogicalShr() ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
+    Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
     return 4;
 
   // High Intermediate Precedence: +, -
@@ -1179,6 +1182,89 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
   }
 }
 
+static unsigned getGNUBinOpPrecedence(AsmToken::TokenKind K,
+                                      MCBinaryExpr::Opcode &Kind,
+                                      bool ShouldUseLogicalShr) {
+  switch (K) {
+  default:
+    return 0; // not a binop.
+
+  // Lowest Precedence: &&, ||
+  case AsmToken::AmpAmp:
+    Kind = MCBinaryExpr::LAnd;
+    return 2;
+  case AsmToken::PipePipe:
+    Kind = MCBinaryExpr::LOr;
+    return 1;
+
+  // Low Precedence: ==, !=, <>, <, <=, >, >=
+  case AsmToken::EqualEqual:
+    Kind = MCBinaryExpr::EQ;
+    return 3;
+  case AsmToken::ExclaimEqual:
+  case AsmToken::LessGreater:
+    Kind = MCBinaryExpr::NE;
+    return 3;
+  case AsmToken::Less:
+    Kind = MCBinaryExpr::LT;
+    return 3;
+  case AsmToken::LessEqual:
+    Kind = MCBinaryExpr::LTE;
+    return 3;
+  case AsmToken::Greater:
+    Kind = MCBinaryExpr::GT;
+    return 3;
+  case AsmToken::GreaterEqual:
+    Kind = MCBinaryExpr::GTE;
+    return 3;
+
+  // Low Intermediate Precedence: +, -
+  case AsmToken::Plus:
+    Kind = MCBinaryExpr::Add;
+    return 4;
+  case AsmToken::Minus:
+    Kind = MCBinaryExpr::Sub;
+    return 4;
+
+  // High Intermediate Precedence: |, &, ^
+  //
+  // FIXME: gas seems to support '!' as an infix operator?
+  case AsmToken::Pipe:
+    Kind = MCBinaryExpr::Or;
+    return 5;
+  case AsmToken::Caret:
+    Kind = MCBinaryExpr::Xor;
+    return 5;
+  case AsmToken::Amp:
+    Kind = MCBinaryExpr::And;
+    return 5;
+
+  // Highest Precedence: *, /, %, <<, >>
+  case AsmToken::Star:
+    Kind = MCBinaryExpr::Mul;
+    return 6;
+  case AsmToken::Slash:
+    Kind = MCBinaryExpr::Div;
+    return 6;
+  case AsmToken::Percent:
+    Kind = MCBinaryExpr::Mod;
+    return 6;
+  case AsmToken::LessLess:
+    Kind = MCBinaryExpr::Shl;
+    return 6;
+  case AsmToken::GreaterGreater:
+    Kind = ShouldUseLogicalShr ? MCBinaryExpr::LShr : MCBinaryExpr::AShr;
+    return 6;
+  }
+}
+
+unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
+                                       MCBinaryExpr::Opcode &Kind) {
+  bool ShouldUseLogicalShr = MAI.shouldUseLogicalShr();
+  return IsDarwin ? getDarwinBinOpPrecedence(K, Kind, ShouldUseLogicalShr)
+                  : getGNUBinOpPrecedence(K, Kind, ShouldUseLogicalShr);
+}
+
 /// \brief Parse all binary operators with precedence >= 'Precedence'.
 /// Res contains the LHS of the expression on input.
 bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
@@ -1251,6 +1337,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     // Treat '.' as a valid identifier in this context.
     Lex();
     IDVal = ".";
+  } else if (Lexer.is(AsmToken::LCurly)) {
+    // Treat '{' as a valid identifier in this context.
+    Lex();
+    IDVal = "{";
+
+  } else if (Lexer.is(AsmToken::RCurly)) {
+    // Treat '}' as a valid identifier in this context.
+    Lex();
+    IDVal = "}";
   } else if (parseIdentifier(IDVal)) {
     if (!TheCondState.Ignore)
       return TokError("unexpected token at start of statement");
@@ -1313,6 +1408,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   // See what kind of statement we have.
   switch (Lexer.getKind()) {
   case AsmToken::Colon: {
+    if (!getTargetParser().isLabel(ID))
+      break;
     checkForValidSection();
 
     // identifier ':'   -> Label.
@@ -1334,8 +1431,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
             SI->LookupInlineAsmLabel(IDVal, getSourceManager(), IDLoc, true);
         assert(RewrittenLabel.size() &&
                "We should have an internal name here.");
-        Info.AsmRewrites->push_back(AsmRewrite(AOK_Label, IDLoc,
-                                               IDVal.size(), RewrittenLabel));
+        Info.AsmRewrites->emplace_back(AOK_Label, IDLoc, IDVal.size(),
+                                       RewrittenLabel);
         IDVal = RewrittenLabel;
       }
       Sym = getContext().getOrCreateSymbol(IDVal);
@@ -1371,6 +1468,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   }
 
   case AsmToken::Equal:
+    if (!getTargetParser().equalIsAsmAssignment())
+      break;
     // identifier '=' ... -> assignment statement
     Lex();
 
@@ -1599,6 +1698,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveError(IDLoc, true);
     case DK_WARNING:
       return parseDirectiveWarning(IDLoc);
+    case DK_RELOC:
+      return parseDirectiveReloc(IDLoc);
     }
 
     return Error(IDLoc, "unknown directive");
@@ -1613,12 +1714,14 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN"))
     return parseDirectiveMSAlign(IDLoc, Info);
 
+  if (ParsingInlineAsm && (IDVal == "even"))
+    Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
   checkForValidSection();
 
   // Canonicalize the opcode to lower case.
   std::string OpcodeStr = IDVal.lower();
   ParseInstructionInfo IInfo(Info.AsmRewrites);
-  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc,
+  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
                                                      Info.ParsedOperands);
   Info.ParseError = HadError;
 
@@ -1703,7 +1806,7 @@ void AsmParser::eatToEndOfLine() {
 /// parseCppHashLineFilenameComment as this:
 ///   ::= # number "filename"
 /// or just as a full line comment if it doesn't have a number and a string.
-bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) {
+bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
   Lex(); // Eat the hash token.
 
   if (getLexer().isNot(AsmToken::Integer)) {
@@ -1743,7 +1846,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
   raw_ostream &OS = errs();
 
   const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr();
-  const SMLoc &DiagLoc = Diag.getLoc();
+  SMLoc DiagLoc = Diag.getLoc();
   unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc);
   unsigned CppHashBuf =
       Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc);
@@ -1802,7 +1905,7 @@ static bool isIdentifierChar(char c) {
 bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
                             ArrayRef<MCAsmMacroParameter> Parameters,
                             ArrayRef<MCAsmMacroArgument> A,
-                            bool EnableAtPseudoVariable, const SMLoc &L) {
+                            bool EnableAtPseudoVariable, SMLoc L) {
   unsigned NParameters = Parameters.size();
   bool HasVararg = NParameters ? Parameters.back().Vararg : false;
   if ((!IsDarwin || NParameters != 0) && NParameters != A.size())
@@ -1858,10 +1961,8 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
           break;
 
         // Otherwise substitute with the token values, with spaces eliminated.
-        for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
-                                                ie = A[Index].end();
-             it != ie; ++it)
-          OS << it->getString();
+        for (const AsmToken &Token : A[Index])
+          OS << Token.getString();
         break;
       }
       }
@@ -1897,15 +1998,13 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
           }
         } else {
           bool VarargParameter = HasVararg && Index == (NParameters - 1);
-          for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
-                                                  ie = A[Index].end();
-               it != ie; ++it)
+          for (const AsmToken &Token : A[Index])
             // We expect no quotes around the string's contents when
             // parsing for varargs.
-            if (it->getKind() != AsmToken::String || VarargParameter)
-              OS << it->getString();
+            if (Token.getKind() != AsmToken::String || VarargParameter)
+              OS << Token.getString();
             else
-              OS << it->getStringContents();
+              OS << Token.getStringContents();
 
           Pos += 1 + Argument.size();
         }
@@ -2371,6 +2470,51 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
   return false;
 }
 
+/// parseDirectiveReloc
+///  ::= .reloc expression , identifier [ , expression ]
+bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
+  const MCExpr *Offset;
+  const MCExpr *Expr = nullptr;
+
+  SMLoc OffsetLoc = Lexer.getTok().getLoc();
+  if (parseExpression(Offset))
+    return true;
+
+  // We can only deal with constant expressions at the moment.
+  int64_t OffsetValue;
+  if (!Offset->evaluateAsAbsolute(OffsetValue))
+    return Error(OffsetLoc, "expression is not a constant value");
+
+  if (Lexer.isNot(AsmToken::Comma))
+    return TokError("expected comma");
+  Lexer.Lex();
+
+  if (Lexer.isNot(AsmToken::Identifier))
+    return TokError("expected relocation name");
+  SMLoc NameLoc = Lexer.getTok().getLoc();
+  StringRef Name = Lexer.getTok().getIdentifier();
+  Lexer.Lex();
+
+  if (Lexer.is(AsmToken::Comma)) {
+    Lexer.Lex();
+    SMLoc ExprLoc = Lexer.getLoc();
+    if (parseExpression(Expr))
+      return true;
+
+    MCValue Value;
+    if (!Expr->evaluateAsRelocatable(Value, nullptr, nullptr))
+      return Error(ExprLoc, "expression must be relocatable");
+  }
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in .reloc directive");
+
+  if (getStreamer().EmitRelocDirective(*Offset, Name, Expr, DirectiveLoc))
+    return Error(NameLoc, "unknown relocation name");
+
+  return false;
+}
+
 /// parseDirectiveValue
 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
 bool AsmParser::parseDirectiveValue(unsigned Size) {
@@ -2617,7 +2761,6 @@ bool AsmParser::parseDirectiveOrg() {
   checkForValidSection();
 
   const MCExpr *Offset;
-  SMLoc Loc = getTok().getLoc();
   if (parseExpression(Offset))
     return true;
 
@@ -2636,13 +2779,7 @@ bool AsmParser::parseDirectiveOrg() {
   }
 
   Lex();
-
-  // Only limited forms of relocatable expressions are accepted here, it
-  // has to be relative to the current section. The streamer will return
-  // 'true' if the expression wasn't evaluatable.
-  if (getStreamer().EmitValueToOffset(Offset, FillExpr))
-    return Error(Loc, "expected assembly-time absolute expression");
-
+  getStreamer().emitValueToOffset(Offset, FillExpr);
   return false;
 }
 
@@ -2703,7 +2840,11 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
     Alignment = 1ULL << Alignment;
   } else {
-    // Reject alignments that aren't a power of two, for gas compatibility.
+    // Reject alignments that aren't either a power of two or zero,
+    // for gas compatibility. Alignment of zero is silently rounded
+    // up to one.
+    if (Alignment == 0)
+      Alignment = 1;
     if (!isPowerOf2_64(Alignment))
       Error(AlignmentLoc, "alignment must be a power of 2");
   }
@@ -4269,6 +4410,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".err"] = DK_ERR;
   DirectiveKindMap[".error"] = DK_ERROR;
   DirectiveKindMap[".warning"] = DK_WARNING;
+  DirectiveKindMap[".reloc"] = DK_RELOC;
 }
 
 MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -4405,10 +4547,10 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
   SmallString<256> Buf;
   raw_svector_ostream OS(Buf);
 
-  for (MCAsmMacroArguments::iterator i = A.begin(), e = A.end(); i != e; ++i) {
+  for (const MCAsmMacroArgument &Arg : A) {
     // Note that the AtPseudoVariable is enabled for instantiations of .irp.
     // This is undocumented, but GAS seems to support it.
-    if (expandMacro(OS, M->Body, Parameter, *i, true, getTok().getLoc()))
+    if (expandMacro(OS, M->Body, Parameter, Arg, true, getTok().getLoc()))
       return true;
   }
 
@@ -4488,10 +4630,10 @@ bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info,
   if (!MCE)
     return Error(ExprLoc, "unexpected expression in _emit");
   uint64_t IntValue = MCE->getValue();
-  if (!isUIntN(8, IntValue) && !isIntN(8, IntValue))
+  if (!isUInt<8>(IntValue) && !isInt<8>(IntValue))
     return Error(ExprLoc, "literal value out of range for directive");
 
-  Info.AsmRewrites->push_back(AsmRewrite(AOK_Emit, IDLoc, Len));
+  Info.AsmRewrites->emplace_back(AOK_Emit, IDLoc, Len);
   return false;
 }
 
@@ -4507,8 +4649,7 @@ bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) {
   if (!isPowerOf2_64(IntValue))
     return Error(ExprLoc, "literal value not a power of two greater then zero");
 
-  Info.AsmRewrites->push_back(
-      AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue)));
+  Info.AsmRewrites->emplace_back(AOK_Align, IDLoc, 5, Log2_64(IntValue));
   return false;
 }
 
@@ -4604,18 +4745,18 @@ bool AsmParser::parseMSInlineAsm(
         OutputDecls.push_back(OpDecl);
         OutputDeclsAddressOf.push_back(Operand.needAddressOf());
         OutputConstraints.push_back(("=" + Operand.getConstraint()).str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size()));
+        AsmStrRewrites.emplace_back(AOK_Output, Start, SymName.size());
       } else {
         InputDecls.push_back(OpDecl);
         InputDeclsAddressOf.push_back(Operand.needAddressOf());
         InputConstraints.push_back(Operand.getConstraint().str());
-        AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size()));
+        AsmStrRewrites.emplace_back(AOK_Input, Start, SymName.size());
       }
     }
 
     // Consider implicit defs to be clobbers.  Think of cpuid and push.
-    ArrayRef<uint16_t> ImpDefs(Desc.getImplicitDefs(),
-                               Desc.getNumImplicitDefs());
+    ArrayRef<MCPhysReg> ImpDefs(Desc.getImplicitDefs(),
+                                Desc.getNumImplicitDefs());
     ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end());
   }
 
@@ -4710,14 +4851,23 @@ bool AsmParser::parseMSInlineAsm(
       OS << ".byte";
       break;
     case AOK_Align: {
-      unsigned Val = AR.Val;
-      OS << ".align " << Val;
+      // MS alignment directives are measured in bytes. If the native assembler
+      // measures alignment in bytes, we can pass it straight through.
+      OS << ".align";
+      if (getContext().getAsmInfo()->getAlignmentIsInBytes())
+        break;
 
-      // Skip the original immediate.
+      // Alignment is in log2 form, so print that instead and skip the original
+      // immediate.
+      unsigned Val = AR.Val;
+      OS << ' ' << Val;
       assert(Val < 10 && "Expected alignment less then 2^10.");
       AdditionalSkip = (Val < 4) ? 2 : Val < 7 ? 3 : 4;
       break;
     }
+    case AOK_EVEN:
+      OS << ".even";
+      break;
     case AOK_DotOperator:
       // Insert the dot if the user omitted it.
       OS.flush();
@@ -4803,7 +4953,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
     // FIXME: Diagnose assignment to protected identifier (e.g., register name).
     if (isSymbolUsedInExpression(Sym, Value))
       return Parser.Error(EqualLoc, "Recursive use of '" + Name + "'");
-    else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+    else if (Sym->isUndefined(/*SetUsed*/ false) && !Sym->isUsed() &&
+             !Sym->isVariable())
       ; // Allow redefinitions of undefined symbols only used in directives.
     else if (Sym->isVariable() && !Sym->isUsed() && allow_redef)
       ; // Allow redefinitions of variables that haven't yet been used.
@@ -4815,15 +4966,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
       return Parser.Error(EqualLoc,
                           "invalid reassignment of non-absolute variable '" +
                               Name + "'");
-
-    // Don't count these checks as uses.
-    Sym->setUsed(false);
   } else if (Name == ".") {
-    if (Parser.getStreamer().EmitValueToOffset(Value, 0)) {
-      Parser.Error(EqualLoc, "expected absolute expression");
-      Parser.eatToEndOfStatement();
-      return true;
-    }
+    Parser.getStreamer().emitValueToOffset(Value, 0);
     return false;
   } else
     Sym = Parser.getContext().getOrCreateSymbol(Name);
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index f09bce005d6a..a4b2b195f710 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -98,11 +98,10 @@ class COFFAsmParser : public MCAsmParserExtension {
                               SectionKind::getText());
   }
   bool ParseSectionDirectiveData(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data",
-                              COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
-                            | COFF::IMAGE_SCN_MEM_READ
-                            | COFF::IMAGE_SCN_MEM_WRITE,
-                              SectionKind::getDataRel());
+    return ParseSectionSwitch(".data", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ |
+                                           COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getData());
   }
   bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
     return ParseSectionSwitch(".bss",
@@ -153,7 +152,7 @@ static SectionKind computeSectionKind(unsigned Flags) {
   if (Flags & COFF::IMAGE_SCN_MEM_READ &&
       (Flags & COFF::IMAGE_SCN_MEM_WRITE) == 0)
     return SectionKind::getReadOnly();
-  return SectionKind::getDataRel();
+  return SectionKind::getData();
 }
 
 bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index dc664e8a8f61..73e068a34391 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -8,10 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCSectionMachO.h"
@@ -38,6 +41,8 @@ class DarwinAsmParser : public MCAsmParserExtension {
                           unsigned TAA = 0, unsigned ImplicitAlign = 0,
                           unsigned StubSize = 0);
 
+  SMLoc LastVersionMinDirective;
+
 public:
   DarwinAsmParser() {}
 
@@ -164,9 +169,14 @@ public:
     addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveTLV>(".tlv");
 
     addDirectiveHandler<&DarwinAsmParser::parseSectionDirectiveIdent>(".ident");
+    addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(
+      ".watchos_version_min");
+    addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".tvos_version_min");
     addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(".ios_version_min");
     addDirectiveHandler<&DarwinAsmParser::parseVersionMin>(
       ".macosx_version_min");
+
+    LastVersionMinDirective = SMLoc();
   }
 
   bool parseDirectiveDesc(StringRef, SMLoc);
@@ -381,9 +391,8 @@ bool DarwinAsmParser::parseSectionSwitch(const char *Segment,
   // FIXME: Arch specific.
   bool isText = TAA & MachO::S_ATTR_PURE_INSTRUCTIONS;
   getStreamer().SwitchSection(getContext().getMachOSection(
-                                Segment, Section, TAA, StubSize,
-                                isText ? SectionKind::getText()
-                                       : SectionKind::getDataRel()));
+      Segment, Section, TAA, StubSize,
+      isText ? SectionKind::getText() : SectionKind::getData()));
 
   // Set the implicit alignment, if any.
   //
@@ -579,12 +588,34 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
   if (!ErrorStr.empty())
     return Error(Loc, ErrorStr.c_str());
 
+  // Issue a warning if the target is not powerpc and Section is a *coal* section.
+  Triple TT = getParser().getContext().getObjectFileInfo()->getTargetTriple();
+  Triple::ArchType ArchTy = TT.getArch();
+
+  if (ArchTy != Triple::ppc && ArchTy != Triple::ppc64) {
+    StringRef NonCoalSection = StringSwitch<StringRef>(Section)
+                                   .Case("__textcoal_nt", "__text")
+                                   .Case("__const_coal", "__const")
+                                   .Case("__datacoal_nt", "__data")
+                                   .Default(Section);
+
+    if (!Section.equals(NonCoalSection)) {
+      StringRef SectionVal(Loc.getPointer());
+      size_t B = SectionVal.find(',') + 1, E = SectionVal.find(',', B);
+      SMLoc BLoc = SMLoc::getFromPointer(SectionVal.data() + B);
+      SMLoc ELoc = SMLoc::getFromPointer(SectionVal.data() + E);
+      getParser().Warning(Loc, "section \"" + Section + "\" is deprecated",
+                          SMRange(BLoc, ELoc));
+      getParser().Note(Loc, "change section name to \"" + NonCoalSection +
+                       "\"", SMRange(BLoc, ELoc));
+    }
+  }
+
   // FIXME: Arch specific.
   bool isText = Segment == "__TEXT";  // FIXME: Hack.
   getStreamer().SwitchSection(getContext().getMachOSection(
-                                Segment, Section, TAA, StubSize,
-                                isText ? SectionKind::getText()
-                                : SectionKind::getDataRel()));
+      Segment, Section, TAA, StubSize,
+      isText ? SectionKind::getText() : SectionKind::getData()));
   return false;
 }
 
@@ -636,17 +667,16 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
                  "environment variable unset.");
 
   // Open the secure log file if we haven't already.
-  raw_ostream *OS = getContext().getSecureLog();
+  raw_fd_ostream *OS = getContext().getSecureLog();
   if (!OS) {
     std::error_code EC;
-    OS = new raw_fd_ostream(SecureLogFile, EC,
-                            sys::fs::F_Append | sys::fs::F_Text);
-    if (EC) {
-       delete OS;
+    auto NewOS = llvm::make_unique<raw_fd_ostream>(
+        SecureLogFile, EC, sys::fs::F_Append | sys::fs::F_Text);
+    if (EC)
        return Error(IDLoc, Twine("can't open secure log file: ") +
                                SecureLogFile + " (" + EC.message() + ")");
-    }
-    getContext().setSecureLog(OS);
+    OS = NewOS.get();
+    getContext().setSecureLog(std::move(NewOS));
   }
 
   // Write the message.
@@ -867,9 +897,11 @@ bool DarwinAsmParser::parseDirectiveDataRegionEnd(StringRef, SMLoc) {
 /// parseVersionMin
 ///  ::= .ios_version_min major,minor[,update]
 ///  ::= .macosx_version_min major,minor[,update]
-bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) {
+bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc) {
   int64_t Major = 0, Minor = 0, Update = 0;
   int Kind = StringSwitch<int>(Directive)
+    .Case(".watchos_version_min", MCVM_WatchOSVersionMin)
+    .Case(".tvos_version_min", MCVM_TvOSVersionMin)
     .Case(".ios_version_min", MCVM_IOSVersionMin)
     .Case(".macosx_version_min", MCVM_OSXVersionMin);
   // Get the major version number.
@@ -902,6 +934,24 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc) {
     Lex();
   }
 
+  const Triple &T = getContext().getObjectFileInfo()->getTargetTriple();
+  Triple::OSType ExpectedOS = Triple::UnknownOS;
+  switch ((MCVersionMinType)Kind) {
+  case MCVM_WatchOSVersionMin: ExpectedOS = Triple::WatchOS; break;
+  case MCVM_TvOSVersionMin:    ExpectedOS = Triple::TvOS;    break;
+  case MCVM_IOSVersionMin:     ExpectedOS = Triple::IOS;     break;
+  case MCVM_OSXVersionMin:     ExpectedOS = Triple::MacOSX;  break;
+  }
+  if (T.getOS() != ExpectedOS)
+    Warning(Loc, Directive + " should only be used for " +
+            Triple::getOSTypeName(ExpectedOS) + " targets");
+
+  if (LastVersionMinDirective.isValid()) {
+    Warning(Loc, "overriding previous version_min directive");
+    Note(LastVersionMinDirective, "previous definition is here");
+  }
+  LastVersionMinDirective = Loc;
+
   // We've parsed a correct version specifier, so send it to the streamer.
   getStreamer().EmitVersionMin((MCVersionMinType)Kind, Major, Minor, Update);
 
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 5f8a6039afd0..6cbcdec5e275 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -51,8 +51,6 @@ public:
       &ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
     addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
-    addDirectiveHandler<
-      &ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
     addDirectiveHandler<
       &ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
     addDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
@@ -81,8 +79,8 @@ public:
   // the best way for us to get access to it?
   bool ParseSectionDirectiveData(StringRef, SMLoc) {
     return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
-                              ELF::SHF_WRITE |ELF::SHF_ALLOC,
-                              SectionKind::getDataRel());
+                              ELF::SHF_WRITE | ELF::SHF_ALLOC,
+                              SectionKind::getData());
   }
   bool ParseSectionDirectiveText(StringRef, SMLoc) {
     return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
@@ -113,9 +111,8 @@ public:
   }
   bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
     return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
-                              ELF::SHF_ALLOC |
-                              ELF::SHF_WRITE,
-                              SectionKind::getDataRel());
+                              ELF::SHF_ALLOC | ELF::SHF_WRITE,
+                              SectionKind::getData());
   }
   bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
     return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
@@ -123,17 +120,10 @@ public:
                               ELF::SHF_WRITE,
                               SectionKind::getReadOnlyWithRel());
   }
-  bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
-    return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
-                              ELF::SHF_ALLOC |
-                              ELF::SHF_WRITE,
-                              SectionKind::getReadOnlyWithRelLocal());
-  }
   bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
     return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
-                              ELF::SHF_ALLOC |
-                              ELF::SHF_WRITE,
-                              SectionKind::getDataRel());
+                              ELF::SHF_ALLOC | ELF::SHF_WRITE,
+                              SectionKind::getData());
   }
   bool ParseDirectivePushSection(StringRef, SMLoc);
   bool ParseDirectivePopSection(StringRef, SMLoc);
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 795cc85ef547..e891bd2c6240 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -12,8 +12,8 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
-                           TokStart(nullptr), SkipSpace(true) {
+MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true) {
+  CurTok.emplace_back(AsmToken::Error, StringRef());
 }
 
 MCAsmLexer::~MCAsmLexer() {
diff --git a/lib/MC/MCParser/MCTargetAsmParser.cpp b/lib/MC/MCParser/MCTargetAsmParser.cpp
index 60a3a3b59a3d..4e4b47805cd8 100644
--- a/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -7,13 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCTargetAsmParser.h"
 using namespace llvm;
 
-MCTargetAsmParser::MCTargetAsmParser()
-  : AvailableFeatures(0), ParsingInlineAsm(false)
+MCTargetAsmParser::MCTargetAsmParser(MCTargetOptions const &MCOptions,
+                                     const MCSubtargetInfo &STI)
+  : AvailableFeatures(0), ParsingInlineAsm(false), MCOptions(MCOptions),
+    STI(&STI)
 {
 }
 
 MCTargetAsmParser::~MCTargetAsmParser() {
 }
+
+MCSubtargetInfo &MCTargetAsmParser::copySTI() {
+  MCSubtargetInfo &STICopy = getContext().getSubtargetCopy(getSTI());
+  STI = &STICopy;
+  return STICopy;
+}
+
+const MCSubtargetInfo &MCTargetAsmParser::getSTI() const {
+  return *STI;
+}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 9152f2b42a48..dbd544a44ce3 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 
 MCSection::MCSection(SectionVariant V, SectionKind K, MCSymbol *Begin)
     : Begin(Begin), BundleGroupBeforeFirstInst(false), HasInstructions(false),
-      IsRegistered(false), Variant(V), Kind(K) {}
+      IsRegistered(false), DummyFragment(this), Variant(V), Kind(K) {}
 
 MCSymbol *MCSection::getEndSymbol(MCContext &Ctx) {
   if (!End)
@@ -72,7 +72,7 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
   if (MI == SubsectionFragmentMap.end())
     IP = end();
   else
-    IP = MI->second;
+    IP = MI->second->getIterator();
   if (!ExactMatch && Subsection != 0) {
     // The GNU as documentation claims that subsections have an alignment of 4,
     // although this appears not to be the case.
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index ce0b4f5fb411..b8373f40b8be 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -11,6 +11,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index b4448d79a2d5..5a0bb7fe986f 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -27,12 +27,7 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
   if (isUnique())
     return false;
 
-  // FIXME: Does .section .bss/.data/.text work everywhere??
-  if (Name == ".text" || Name == ".data" ||
-      (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
-    return true;
-
-  return false;
+  return MAI.shouldOmitSectionDirective(Name);
 }
 
 static void printName(raw_ostream &OS, StringRef Name) {
@@ -138,6 +133,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << "note";
   else if (Type == ELF::SHT_PROGBITS)
     OS << "progbits";
+  else if (Type == ELF::SHT_X86_64_UNWIND)
+    OS << "unwind";
 
   if (EntrySize) {
     assert(Flags & ELF::SHF_MERGE);
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index c9f15914e4b1..879c6e5ff932 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -177,7 +177,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   TAAParsed = false;
 
   SmallVector<StringRef, 5> SplitSpec;
-  Spec.split(SplitSpec, ",");
+  Spec.split(SplitSpec, ',');
   // Remove leading and trailing whitespace.
   auto GetEmptyOrTrim = [&SplitSpec](size_t Idx) -> StringRef {
     return SplitSpec.size() > Idx ? SplitSpec[Idx].trim() : StringRef();
@@ -235,7 +235,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
 
   // The attribute list is a '+' separated list of attributes.
   SmallVector<StringRef, 1> SectionAttrs;
-  Attrs.split(SectionAttrs, "+", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+  Attrs.split(SectionAttrs, '+', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
 
   for (StringRef &SectionAttr : SectionAttrs) {
     auto AttrDescriptorI = std::find_if(
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 7fbbbd95b560..836b40544642 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -107,8 +107,7 @@ void MCStreamer::EmitSLEB128IntValue(int64_t Value) {
   EmitBytes(OSE.str());
 }
 
-void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
-                           const SMLoc &Loc) {
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, SMLoc Loc) {
   EmitValueImpl(Value, Size, Loc);
 }
 
@@ -189,11 +188,9 @@ void MCStreamer::InitSections(bool NoExecStack) {
   SwitchSection(getContext().getObjectFileInfo()->getTextSection());
 }
 
-void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) {
-  if (Section)
-    Symbol->setSection(*Section);
-  else
-    Symbol->setUndefined();
+void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
+  assert(Fragment);
+  Symbol->setFragment(Fragment);
 
   // As we emit symbols into a section, track the order so that they can
   // be sorted upon later. Zero is reserved to mean 'unemitted'.
@@ -203,7 +200,8 @@ void MCStreamer::AssignSection(MCSymbol *Symbol, MCSection *Section) {
 void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
   assert(getCurrentSection().first && "Cannot emit before setting section!");
-  AssignSection(Symbol, getCurrentSection().first);
+  assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
+  Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment());
 
   MCTargetStreamer *TS = getTargetStreamer();
   if (TS)
@@ -361,6 +359,14 @@ void MCStreamer::EmitCFIEscape(StringRef Values) {
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction = 
+    MCCFIInstruction::createGnuArgsSize(Label, Size);
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::EmitCFISignalFrame() {
   EnsureValidDwarfFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
@@ -467,6 +473,8 @@ void MCStreamer::EmitWinEHHandlerData() {
     report_fatal_error("Chained unwind areas can't have handlers!");
 }
 
+void MCStreamer::EmitSyntaxDirective() {}
+
 void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
   EnsureValidWinFrameInfo();
 
@@ -679,8 +687,7 @@ void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
 void MCStreamer::ChangeSection(MCSection *, const MCExpr *) {}
 void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
 void MCStreamer::EmitBytes(StringRef Data) {}
-void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                               const SMLoc &Loc) {
+void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) {
   visitUsedExpr(*Value);
 }
 void MCStreamer::EmitULEB128Value(const MCExpr *Value) {}
@@ -690,9 +697,7 @@ void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
                                       unsigned MaxBytesToEmit) {}
 void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                                    unsigned MaxBytesToEmit) {}
-bool MCStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) {
-  return false;
-}
+void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) {}
 void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
 void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
 void MCStreamer::FinishImpl() {}
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 9210cf544b16..dc864d3a17f8 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -32,8 +32,8 @@ void MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
     CPUSchedModel = &MCSchedModel::GetDefaultSchedModel();
 }
 
-void MCSubtargetInfo::setDefaultFeatures(StringRef CPU) {
-  FeatureBits = getFeatures(CPU, "", ProcDesc, ProcFeatures);
+void MCSubtargetInfo::setDefaultFeatures(StringRef CPU, StringRef FS) {
+  FeatureBits = getFeatures(CPU, FS, ProcDesc, ProcFeatures);
 }
 
 MCSubtargetInfo::MCSubtargetInfo(
@@ -77,13 +77,12 @@ FeatureBitset MCSubtargetInfo::ApplyFeatureFlag(StringRef FS) {
 const MCSchedModel &MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
   assert(ProcSchedModels && "Processor machine model not available!");
 
-  unsigned NumProcs = ProcDesc.size();
-#ifndef NDEBUG
-  for (size_t i = 1; i < NumProcs; i++) {
-    assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 &&
-           "Processor machine model table is not sorted");
-  }
-#endif
+  size_t NumProcs = ProcDesc.size();
+  assert(std::is_sorted(ProcSchedModels, ProcSchedModels+NumProcs,
+                    [](const SubtargetInfoKV &LHS, const SubtargetInfoKV &RHS) {
+                      return strcmp(LHS.Key, RHS.Key) < 0;
+                    }) &&
+         "Processor machine model table is not sorted");
 
   // Find entry
   const SubtargetInfoKV *Found =
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 125380a9d140..ab3b8eb68322 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -16,8 +16,11 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-// Sentinel value for the absolute pseudo section.
-MCSection *MCSymbol::AbsolutePseudoSection = reinterpret_cast<MCSection *>(1);
+// Only the address of this fragment is ever actually used.
+static MCDummyFragment SentinelFragment(nullptr);
+
+// Sentinel value for the absolute pseudo fragment.
+MCFragment *MCSymbol::AbsolutePseudoFragment = &SentinelFragment;
 
 void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
                              MCContext &Ctx) {
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
index 1258d9e29f2e..465622715526 100644
--- a/lib/MC/MCTargetOptions.cpp
+++ b/lib/MC/MCTargetOptions.cpp
@@ -14,9 +14,10 @@ namespace llvm {
 
 MCTargetOptions::MCTargetOptions()
     : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
-      MCFatalWarnings(false), MCSaveTempLabels(false),
-      MCUseDwarfDirectory(false), ShowMCEncoding(false), ShowMCInst(false),
-      AsmVerbose(false), DwarfVersion(0), ABIName() {}
+      MCFatalWarnings(false), MCNoWarn(false), MCSaveTempLabels(false),
+      MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
+      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
+      DwarfVersion(0), ABIName() {}
 
 StringRef MCTargetOptions::getABIName() const {
   return ABIName;
diff --git a/lib/MC/MCWinEH.cpp b/lib/MC/MCWinEH.cpp
index d5d9eadf39a0..83af203c7acb 100644
--- a/lib/MC/MCWinEH.cpp
+++ b/lib/MC/MCWinEH.cpp
@@ -49,10 +49,10 @@ static MCSection *getUnwindInfoSection(StringRef SecName,
       if (CodeSecName.startswith(".text$"))
         CodeSecName = CodeSecName.substr(6);
 
-      return Context.getCOFFSection(
-          (SecName + Twine('$') + CodeSecName).str(),
-          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
-          SectionKind::getDataRel());
+      return Context.getCOFFSection((SecName + Twine('$') + CodeSecName).str(),
+                                    COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                        COFF::IMAGE_SCN_MEM_READ,
+                                    SectionKind::getData());
     }
   }
 
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 8ce6127e3866..324385fa132a 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -78,7 +78,6 @@ uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
       return C->getValue();
 
-
     MCValue Target;
     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
       report_fatal_error("unable to evaluate offset for variable '" +
@@ -117,7 +116,8 @@ uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
   return OffsetToAlignment(EndAddr, NextSec.getAlignment());
 }
 
-void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
+void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
+                                   unsigned NumLoadCommands,
                                    unsigned LoadCommandsSize,
                                    bool SubsectionsViaSymbols) {
   uint32_t Flags = 0;
@@ -128,7 +128,7 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
   // struct mach_header (28 bytes) or
   // struct mach_header_64 (32 bytes)
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
@@ -136,29 +136,30 @@ void MachObjectWriter::writeHeader(unsigned NumLoadCommands,
   write32(TargetObjectWriter->getCPUType());
   write32(TargetObjectWriter->getCPUSubtype());
 
-  write32(MachO::MH_OBJECT);
+  write32(Type);
   write32(NumLoadCommands);
   write32(LoadCommandsSize);
   write32(Flags);
   if (is64Bit())
     write32(0); // reserved
 
-  assert(OS.tell() - Start ==
-         (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header)));
+  assert(
+      getStream().tell() - Start ==
+      (is64Bit() ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header)));
 }
 
 /// writeSegmentLoadCommand - Write a segment load command.
 ///
 /// \param NumSections The number of sections in this segment.
 /// \param SectionDataSize The total size of the sections.
-void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
-                                               uint64_t VMSize,
-                                               uint64_t SectionDataStartOffset,
-                                               uint64_t SectionDataSize) {
+void MachObjectWriter::writeSegmentLoadCommand(
+    StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
+    uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
+    uint32_t InitProt) {
   // struct segment_command (56 bytes) or
   // struct segment_command_64 (72 bytes)
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   unsigned SegmentLoadCommandSize =
@@ -169,31 +170,32 @@ void MachObjectWriter::writeSegmentLoadCommand(unsigned NumSections,
           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
                          sizeof(MachO::section)));
 
-  writeBytes("", 16);
+  assert(Name.size() <= 16);
+  writeBytes(Name, 16);
   if (is64Bit()) {
-    write64(0); // vmaddr
+    write64(VMAddr);                 // vmaddr
     write64(VMSize); // vmsize
     write64(SectionDataStartOffset); // file offset
     write64(SectionDataSize); // file size
   } else {
-    write32(0); // vmaddr
+    write32(VMAddr);                 // vmaddr
     write32(VMSize); // vmsize
     write32(SectionDataStartOffset); // file offset
     write32(SectionDataSize); // file size
   }
   // maxprot
-  write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+  write32(MaxProt);
   // initprot
-  write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE);
+  write32(InitProt);
   write32(NumSections);
   write32(0); // flags
 
-  assert(OS.tell() - Start == SegmentLoadCommandSize);
+  assert(getStream().tell() - Start == SegmentLoadCommandSize);
 }
 
-void MachObjectWriter::writeSection(const MCAssembler &Asm,
-                                    const MCAsmLayout &Layout,
-                                    const MCSection &Sec, uint64_t FileOffset,
+void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
+                                    const MCSection &Sec, uint64_t VMAddr,
+                                    uint64_t FileOffset, unsigned Flags,
                                     uint64_t RelocationsStart,
                                     unsigned NumRelocations) {
   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
@@ -208,24 +210,20 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
   // struct section (68 bytes) or
   // struct section_64 (80 bytes)
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   writeBytes(Section.getSectionName(), 16);
   writeBytes(Section.getSegmentName(), 16);
   if (is64Bit()) {
-    write64(getSectionAddress(&Sec)); // address
+    write64(VMAddr);      // address
     write64(SectionSize); // size
   } else {
-    write32(getSectionAddress(&Sec)); // address
+    write32(VMAddr);      // address
     write32(SectionSize); // size
   }
   write32(FileOffset);
 
-  unsigned Flags = Section.getTypeAndAttributes();
-  if (Section.hasInstructions())
-    Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
-
   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
   write32(Log2_32(Section.getAlignment()));
   write32(NumRelocations ? RelocationsStart : 0);
@@ -236,8 +234,8 @@ void MachObjectWriter::writeSection(const MCAssembler &Asm,
   if (is64Bit())
     write32(0); // reserved3
 
-  assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) :
-                               sizeof(MachO::section)));
+  assert(getStream().tell() - Start ==
+         (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
 }
 
 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
@@ -246,7 +244,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
                                               uint32_t StringTableSize) {
   // struct symtab_command (24 bytes)
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   write32(MachO::LC_SYMTAB);
@@ -256,7 +254,7 @@ void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
   write32(StringTableOffset);
   write32(StringTableSize);
 
-  assert(OS.tell() - Start == sizeof(MachO::symtab_command));
+  assert(getStream().tell() - Start == sizeof(MachO::symtab_command));
 }
 
 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
@@ -269,7 +267,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
                                                 uint32_t NumIndirectSymbols) {
   // struct dysymtab_command (80 bytes)
 
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   write32(MachO::LC_DYSYMTAB);
@@ -293,7 +291,7 @@ void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
   write32(0); // locreloff
   write32(0); // nlocrel
 
-  assert(OS.tell() - Start == sizeof(MachO::dysymtab_command));
+  assert(getStream().tell() - Start == sizeof(MachO::dysymtab_command));
 }
 
 MachObjectWriter::MachSymbolData *
@@ -389,7 +387,7 @@ void MachObjectWriter::writeNlist(MachSymbolData &MSD,
 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
                                                 uint32_t DataOffset,
                                                 uint32_t DataSize) {
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   write32(Type);
@@ -397,7 +395,7 @@ void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
   write32(DataOffset);
   write32(DataSize);
 
-  assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command));
+  assert(getStream().tell() - Start == sizeof(MachO::linkedit_data_command));
 }
 
 static unsigned ComputeLinkerOptionsLoadCommandSize(
@@ -413,7 +411,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
   const std::vector<std::string> &Options)
 {
   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
-  uint64_t Start = OS.tell();
+  uint64_t Start = getStream().tell();
   (void) Start;
 
   write32(MachO::LC_LINKER_OPTION);
@@ -429,7 +427,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
   // Pad to a multiple of the pointer size.
   writeBytes("", OffsetToAlignment(BytesWritten, is64Bit() ? 8 : 4));
 
-  assert(OS.tell() - Start == Size);
+  assert(getStream().tell() - Start == Size);
 }
 
 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
@@ -458,9 +456,9 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
         Section.getType() != MachO::S_SYMBOL_STUBS) {
-	MCSymbol &Symbol = *it->Symbol;
-	report_fatal_error("indirect symbol '" + Symbol.getName() +
-                           "' not in a symbol pointer or stub section");
+      MCSymbol &Symbol = *it->Symbol;
+      report_fatal_error("indirect symbol '" + Symbol.getName() +
+                         "' not in a symbol pointer or stub section");
     }
   }
 
@@ -522,7 +520,7 @@ void MachObjectWriter::computeSymbolTable(
 
     StringTable.add(Symbol.getName());
   }
-  StringTable.finalize(StringTableBuilder::MachO);
+  StringTable.finalize();
 
   // Build the symbol arrays but only for non-local symbols.
   //
@@ -627,6 +625,18 @@ void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
   bindIndirectSymbols(Asm);
 }
 
+bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+    const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
+    bool InSet) const {
+  // FIXME: We don't handle things like
+  // foo = .
+  // creating atoms.
+  if (A.isVariable() || B.isVariable())
+    return false;
+  return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
+                                                                InSet);
+}
+
 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
     bool InSet, bool IsPCRel) const {
@@ -746,7 +756,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
     ++NumLoadCommands;
     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
   }
-  
+
   // Compute the total size of the section data, as well as its file size and vm
   // size.
   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
@@ -776,18 +786,25 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
   SectionDataFileSize += SectionDataPadding;
 
   // Write the prolog, starting with the header and load command...
-  writeHeader(NumLoadCommands, LoadCommandsSize,
+  writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
               Asm.getSubsectionsViaSymbols());
-  writeSegmentLoadCommand(NumSections, VMSize,
-                          SectionDataStart, SectionDataSize);
+  uint32_t Prot =
+      MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
+  writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
+                          SectionDataSize, Prot, Prot);
 
   // ... and then the section headers.
   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-  for (const MCSection &Sec : Asm) {
+  for (const MCSection &Section : Asm) {
+    const auto &Sec = cast<MCSectionMachO>(Section);
     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
     unsigned NumRelocs = Relocs.size();
     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
-    writeSection(Asm, Layout, Sec, SectionStart, RelocTableEnd, NumRelocs);
+    unsigned Flags = Sec.getTypeAndAttributes();
+    if (Sec.hasInstructions())
+      Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
+    writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
+                 RelocTableEnd, NumRelocs);
     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
   }
 
@@ -798,8 +815,22 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
     assert(VersionInfo.Major < 65536 && "unencodable major target version");
     uint32_t EncodedVersion = VersionInfo.Update | (VersionInfo.Minor << 8) |
       (VersionInfo.Major << 16);
-    write32(VersionInfo.Kind == MCVM_OSXVersionMin ? MachO::LC_VERSION_MIN_MACOSX :
-            MachO::LC_VERSION_MIN_IPHONEOS);
+    MachO::LoadCommandType LCType;
+    switch (VersionInfo.Kind) {
+    case MCVM_OSXVersionMin:
+      LCType = MachO::LC_VERSION_MIN_MACOSX;
+      break;
+    case MCVM_IOSVersionMin:
+      LCType = MachO::LC_VERSION_MIN_IPHONEOS;
+      break;
+    case MCVM_TvOSVersionMin:
+      LCType = MachO::LC_VERSION_MIN_TVOS;
+      break;
+    case MCVM_WatchOSVersionMin:
+      LCType = MachO::LC_VERSION_MIN_WATCHOS;
+      break;
+    }
+    write32(LCType);
     write32(sizeof(MachO::version_min_command));
     write32(EncodedVersion);
     write32(0);         // reserved.
@@ -901,12 +932,12 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
   // Write out the loh commands, if there is one.
   if (LOHSize) {
 #ifndef NDEBUG
-    unsigned Start = OS.tell();
+    unsigned Start = getStream().tell();
 #endif
     Asm.getLOHContainer().emit(*this, Layout);
     // Pad to a multiple of the pointer size.
     writeBytes("", OffsetToAlignment(LOHRawSize, is64Bit() ? 8 : 4));
-    assert(OS.tell() - Start == LOHSize);
+    assert(getStream().tell() - Start == LOHSize);
   }
 
   // Write the symbol table data, if used.
@@ -942,7 +973,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
         writeNlist(Entry, Layout);
 
     // Write the string table.
-    OS << StringTable.data();
+    getStream() << StringTable.data();
   }
 }
 
diff --git a/lib/MC/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp
index 9de9363611e6..80e552287b3d 100644
--- a/lib/MC/StringTableBuilder.cpp
+++ b/lib/MC/StringTableBuilder.cpp
@@ -8,35 +8,71 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/StringTableBuilder.h"
-#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
 
+#include <vector>
+
 using namespace llvm;
 
-static bool compareBySuffix(StringRef a, StringRef b) {
-  size_t sizeA = a.size();
-  size_t sizeB = b.size();
-  size_t len = std::min(sizeA, sizeB);
-  for (size_t i = 0; i < len; ++i) {
-    char ca = a[sizeA - i - 1];
-    char cb = b[sizeB - i - 1];
-    if (ca != cb)
-      return ca > cb;
-  }
-  return sizeA > sizeB;
+StringTableBuilder::StringTableBuilder(Kind K) : K(K) {}
+
+typedef std::pair<StringRef, size_t> StringPair;
+
+// Returns the character at Pos from end of a string.
+static int charTailAt(StringPair *P, size_t Pos) {
+  StringRef S = P->first;
+  if (Pos >= S.size())
+    return -1;
+  return (unsigned char)S[S.size() - Pos - 1];
 }
 
-void StringTableBuilder::finalize(Kind kind) {
-  SmallVector<StringRef, 8> Strings;
+// Three-way radix quicksort. This is much faster than std::sort with strcmp
+// because it does not compare characters that we already know the same.
+static void multikey_qsort(StringPair **Begin, StringPair **End, int Pos) {
+tailcall:
+  if (End - Begin <= 1)
+    return;
+
+  // Partition items. Items in [Begin, P) are greater than the pivot,
+  // [P, Q) are the same as the pivot, and [Q, End) are less than the pivot.
+  int Pivot = charTailAt(*Begin, Pos);
+  StringPair **P = Begin;
+  StringPair **Q = End;
+  for (StringPair **R = Begin + 1; R < Q;) {
+    int C = charTailAt(*R, Pos);
+    if (C > Pivot)
+      std::swap(*P++, *R++);
+    else if (C < Pivot)
+      std::swap(*--Q, *R);
+    else
+      R++;
+  }
+
+  multikey_qsort(Begin, P, Pos);
+  multikey_qsort(Q, End, Pos);
+  if (Pivot != -1) {
+    // qsort(P, Q, Pos + 1), but with tail call optimization.
+    Begin = P;
+    End = Q;
+    ++Pos;
+    goto tailcall;
+  }
+}
+
+void StringTableBuilder::finalize() {
+  std::vector<std::pair<StringRef, size_t> *> Strings;
   Strings.reserve(StringIndexMap.size());
+  for (std::pair<StringRef, size_t> &P : StringIndexMap)
+    Strings.push_back(&P);
 
-  for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i)
-    Strings.push_back(i->getKey());
+  if (!Strings.empty())
+    multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
 
-  std::sort(Strings.begin(), Strings.end(), compareBySuffix);
-
-  switch (kind) {
+  switch (K) {
+  case RAW:
+    break;
   case ELF:
   case MachO:
     // Start the table with a NUL byte.
@@ -49,22 +85,25 @@ void StringTableBuilder::finalize(Kind kind) {
   }
 
   StringRef Previous;
-  for (StringRef s : Strings) {
-    if (kind == WinCOFF)
-      assert(s.size() > COFF::NameSize && "Short string in COFF string table!");
+  for (std::pair<StringRef, size_t> *P : Strings) {
+    StringRef S = P->first;
+    if (K == WinCOFF)
+      assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
 
-    if (Previous.endswith(s)) {
-      StringIndexMap[s] = StringTable.size() - 1 - s.size();
+    if (Previous.endswith(S)) {
+      P->second = StringTable.size() - S.size() - (K != RAW);
       continue;
     }
 
-    StringIndexMap[s] = StringTable.size();
-    StringTable += s;
-    StringTable += '\x00';
-    Previous = s;
+    P->second = StringTable.size();
+    StringTable += S;
+    if (K != RAW)
+      StringTable += '\x00';
+    Previous = S;
   }
 
-  switch (kind) {
+  switch (K) {
+  case RAW:
   case ELF:
     break;
   case MachO:
@@ -75,14 +114,31 @@ void StringTableBuilder::finalize(Kind kind) {
   case WinCOFF:
     // Write the table size in the first word.
     assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
-    uint32_t size = static_cast<uint32_t>(StringTable.size());
+    uint32_t Size = static_cast<uint32_t>(StringTable.size());
     support::endian::write<uint32_t, support::little, support::unaligned>(
-        StringTable.data(), size);
+        StringTable.data(), Size);
     break;
   }
+
+  Size = StringTable.size();
 }
 
 void StringTableBuilder::clear() {
   StringTable.clear();
   StringIndexMap.clear();
 }
+
+size_t StringTableBuilder::getOffset(StringRef S) const {
+  assert(isFinalized());
+  auto I = StringIndexMap.find(S);
+  assert(I != StringIndexMap.end() && "String is not in table!");
+  return I->second;
+}
+
+size_t StringTableBuilder::add(StringRef S) {
+  assert(!isFinalized());
+  auto P = StringIndexMap.insert(std::make_pair(S, Size));
+  if (P.second)
+    Size += S.size() + (K != RAW);
+  return P.first->second;
+}
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 76574e987cb1..b642f17f0e79 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -56,7 +56,7 @@ static inline bool isEnabled(StringRef Feature) {
 ///
 static void Split(std::vector<std::string> &V, StringRef S) {
   SmallVector<StringRef, 3> Tmp;
-  S.split(Tmp, ",", -1, false /* KeepEmpty */);
+  S.split(Tmp, ',', -1, false /* KeepEmpty */);
   V.assign(Tmp.begin(), Tmp.end());
 }
 
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 56ef1c7a2735..a3820906b76b 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
@@ -32,8 +33,10 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/JamCRC.h"
 #include "llvm/Support/TimeValue.h"
 #include <cstdio>
+#include <ctime>
 
 using namespace llvm;
 
@@ -76,8 +79,6 @@ public:
   COFFSymbol(StringRef name);
   void set_name_offset(uint32_t Offset);
 
-  bool should_keep() const;
-
   int64_t getIndex() const { return Index; }
   void setIndex(int Value) {
     Index = Value;
@@ -125,7 +126,7 @@ public:
   COFF::header Header;
   sections Sections;
   symbols Symbols;
-  StringTableBuilder Strings;
+  StringTableBuilder Strings{StringTableBuilder::WinCOFF};
 
   // Maps used during object file creation.
   section_map SectionMap;
@@ -160,8 +161,6 @@ public:
   void SetSymbolName(COFFSymbol &S);
   void SetSectionName(COFFSection &S);
 
-  bool ExportSymbol(const MCSymbol &Symbol, MCAssembler &Asm);
-
   bool IsPhysicalSection(COFFSection *S);
 
   // Entity writing methods.
@@ -215,38 +214,6 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
   write_uint32_le(Data.Name + 4, Offset);
 }
 
-/// logic to decide if the symbol should be reported in the symbol table
-bool COFFSymbol::should_keep() const {
-  // no section means its external, keep it
-  if (!Section)
-    return true;
-
-  // if it has relocations pointing at it, keep it
-  if (Relocations > 0) {
-    assert(Section->Number != -1 && "Sections with relocations must be real!");
-    return true;
-  }
-
-  // if this is a safeseh handler, keep it
-  if (MC && (cast<MCSymbolCOFF>(MC)->isSafeSEH()))
-    return true;
-
-  // if the section its in is being droped, drop it
-  if (Section->Number == -1)
-    return false;
-
-  // if it is the section symbol, keep it
-  if (Section->Symbol == this)
-    return true;
-
-  // if its temporary, drop it
-  if (MC && MC->isTemporary())
-    return false;
-
-  // otherwise, keep it
-  return true;
-}
-
 //------------------------------------------------------------------------------
 // Section class implementation
 
@@ -392,7 +359,6 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &Symbol,
                                        MCAssembler &Assembler,
                                        const MCAsmLayout &Layout) {
   COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol);
-  SymbolMap[&Symbol] = coff_symbol;
 
   if (cast<MCSymbolCOFF>(Symbol).isWeakExternal()) {
     coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
@@ -515,25 +481,6 @@ void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
     std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
 }
 
-bool WinCOFFObjectWriter::ExportSymbol(const MCSymbol &Symbol,
-                                       MCAssembler &Asm) {
-  // This doesn't seem to be right. Strings referred to from the .data section
-  // need symbols so they can be linked to code in the .text section right?
-
-  // return Asm.isSymbolLinkerVisible(Symbol);
-
-  // Non-temporary labels should always be visible to the linker.
-  if (!Symbol.isTemporary())
-    return true;
-
-  // Temporary variable symbols are invisible.
-  if (Symbol.isVariable())
-    return false;
-
-  // Absolute temporary labels are never visible.
-  return !Symbol.isAbsolute();
-}
-
 bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
   return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) ==
          0;
@@ -663,7 +610,7 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
     defineSection(static_cast<const MCSectionCOFF &>(Section));
 
   for (const MCSymbol &Symbol : Asm.symbols())
-    if (ExportSymbol(Symbol, Asm))
+    if (!Symbol.isTemporary())
       DefineSymbol(Symbol, Asm, Layout);
 }
 
@@ -674,7 +621,8 @@ bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
   // thunk to implement their /INCREMENTAL feature.  Make sure we don't optimize
   // away any relocations to functions.
   uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
-  if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+  if (Asm.isIncrementalLinkerCompatible() &&
+      (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
     return false;
   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
                                                                 InSet, IsPCRel);
@@ -702,41 +650,49 @@ void WinCOFFObjectWriter::recordRelocation(
     const MCFixup &Fixup, MCValue Target, bool &IsPCRel, uint64_t &FixedValue) {
   assert(Target.getSymA() && "Relocation must reference a symbol!");
 
-  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
-  const MCSymbol &A = Symbol;
-  if (!A.isRegistered())
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
+  const MCSymbol &A = Target.getSymA()->getSymbol();
+  if (!A.isRegistered()) {
+    Asm.getContext().reportError(Fixup.getLoc(),
                                       Twine("symbol '") + A.getName() +
                                           "' can not be undefined");
+    return;
+  }
+  if (A.isTemporary() && A.isUndefined()) {
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                      Twine("assembler label '") + A.getName() +
+                                          "' can not be undefined");
+    return;
+  }
 
   MCSection *Section = Fragment->getParent();
 
   // Mark this symbol as requiring an entry in the symbol table.
   assert(SectionMap.find(Section) != SectionMap.end() &&
          "Section must already have been defined in executePostLayoutBinding!");
-  assert(SymbolMap.find(&A) != SymbolMap.end() &&
-         "Symbol must already have been defined in executePostLayoutBinding!");
 
   COFFSection *coff_section = SectionMap[Section];
-  COFFSymbol *coff_symbol = SymbolMap[&A];
   const MCSymbolRefExpr *SymB = Target.getSymB();
   bool CrossSection = false;
 
   if (SymB) {
     const MCSymbol *B = &SymB->getSymbol();
-    if (!B->getFragment())
-      Asm.getContext().reportFatalError(
+    if (!B->getFragment()) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
           Twine("symbol '") + B->getName() +
               "' can not be undefined in a subtraction expression");
+      return;
+    }
 
-    if (!A.getFragment())
-      Asm.getContext().reportFatalError(
+    if (!A.getFragment()) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
-          Twine("symbol '") + Symbol.getName() +
+          Twine("symbol '") + A.getName() +
               "' can not be undefined in a subtraction expression");
+      return;
+    }
 
-    CrossSection = &Symbol.getSection() != &B->getSection();
+    CrossSection = &A.getSection() != &B->getSection();
 
     // Offset of the symbol in the section
     int64_t OffsetOfB = Layout.getSymbolOffset(*B);
@@ -765,12 +721,19 @@ void WinCOFFObjectWriter::recordRelocation(
   Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
 
   // Turn relocations for temporary symbols into section relocations.
-  if (coff_symbol->MC->isTemporary() || CrossSection) {
-    Reloc.Symb = coff_symbol->Section->Symbol;
-    FixedValue += Layout.getFragmentOffset(coff_symbol->MC->getFragment()) +
-                  coff_symbol->MC->getOffset();
-  } else
-    Reloc.Symb = coff_symbol;
+  if (A.isTemporary() || CrossSection) {
+    MCSection *TargetSection = &A.getSection();
+    assert(
+        SectionMap.find(TargetSection) != SectionMap.end() &&
+        "Section must already have been defined in executePostLayoutBinding!");
+    Reloc.Symb = SectionMap[TargetSection]->Symbol;
+    FixedValue += Layout.getSymbolOffset(A);
+  } else {
+    assert(
+        SymbolMap.find(&A) != SymbolMap.end() &&
+        "Symbol must already have been defined in executePostLayoutBinding!");
+    Reloc.Symb = SymbolMap[&A];
+  }
 
   ++Reloc.Symb->Relocations;
 
@@ -884,14 +847,10 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
     // Update section number & offset for symbols that have them.
     if (Symbol->Section)
       Symbol->Data.SectionNumber = Symbol->Section->Number;
-    if (Symbol->should_keep()) {
-      Symbol->setIndex(Header.NumberOfSymbols++);
-      // Update auxiliary symbol info.
-      Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
-      Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
-    } else {
-      Symbol->setIndex(-1);
-    }
+    Symbol->setIndex(Header.NumberOfSymbols++);
+    // Update auxiliary symbol info.
+    Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
+    Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
   }
 
   // Build string table.
@@ -899,16 +858,15 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
     if (S->Name.size() > COFF::NameSize)
       Strings.add(S->Name);
   for (const auto &S : Symbols)
-    if (S->should_keep() && S->Name.size() > COFF::NameSize)
+    if (S->Name.size() > COFF::NameSize)
       Strings.add(S->Name);
-  Strings.finalize(StringTableBuilder::WinCOFF);
+  Strings.finalize();
 
   // Set names.
   for (const auto &S : Sections)
     SetSectionName(*S);
   for (auto &S : Symbols)
-    if (S->should_keep())
-      SetSymbolName(*S);
+    SetSymbolName(*S);
 
   // Fixup weak external references.
   for (auto &Symbol : Symbols) {
@@ -948,7 +906,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
 
   // Assign file offsets to COFF object file structures.
 
-  unsigned offset = 0;
+  unsigned offset = getInitialOffset();
 
   if (UseBigObj)
     offset += COFF::Header32Size;
@@ -1011,8 +969,23 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
 
   Header.PointerToSymbolTable = offset;
 
+  // FIXME: Remove the #else branch and make the #if branch unconditional once
+  // LLVM's self host configuration is aware of /Brepro.
+#if (ENABLE_TIMESTAMPS == 1)
+  // MS LINK expects to be able to use this timestamp to implement their
+  // /INCREMENTAL feature.
+  if (Asm.isIncrementalLinkerCompatible()) {
+    std::time_t Now = time(nullptr);
+    if (Now < 0 || !isUInt<32>(Now))
+      Now = UINT32_MAX;
+    Header.TimeDateStamp = Now;
+  } else {
+    Header.TimeDateStamp = 0;
+  }
+#else
   // We want a deterministic output. It looks like GNU as also writes 0 in here.
   Header.TimeDateStamp = 0;
+#endif
 
   // Write it all to disk...
   WriteFileHeader(Header);
@@ -1029,6 +1002,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
       }
     }
 
+    SmallVector<char, 128> SectionContents;
     for (i = Sections.begin(), ie = Sections.end(), j = Asm.begin(),
         je = Asm.end();
          (i != ie) && (j != je); ++i, ++j) {
@@ -1037,20 +1011,47 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
         continue;
 
       if ((*i)->Header.PointerToRawData != 0) {
-        assert(OS.tell() <= (*i)->Header.PointerToRawData &&
+        assert(getStream().tell() <= (*i)->Header.PointerToRawData &&
                "Section::PointerToRawData is insane!");
 
-        unsigned SectionDataPadding = (*i)->Header.PointerToRawData - OS.tell();
+        unsigned SectionDataPadding =
+            (*i)->Header.PointerToRawData - getStream().tell();
         assert(SectionDataPadding < 4 &&
                "Should only need at most three bytes of padding!");
 
         WriteZeros(SectionDataPadding);
 
+        // Save the contents of the section to a temporary buffer, we need this
+        // to CRC the data before we dump it into the object file.
+        SectionContents.clear();
+        raw_svector_ostream VecOS(SectionContents);
+        raw_pwrite_stream &OldStream = getStream();
+        // Redirect the output stream to our buffer.
+        setStream(VecOS);
+        // Fill our buffer with the section data.
         Asm.writeSectionData(&*j, Layout);
+        // Reset the stream back to what it was before.
+        setStream(OldStream);
+
+        // Calculate our CRC with an initial value of '0', this is not how
+        // JamCRC is specified but it aligns with the expected output.
+        JamCRC JC(/*Init=*/0x00000000U);
+        JC.update(SectionContents);
+
+        // Write the section contents to the object file.
+        getStream() << SectionContents;
+
+        // Update the section definition auxiliary symbol to record the CRC.
+        COFFSection *Sec = SectionMap[&*j];
+        COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
+        assert(AuxSyms.size() == 1 &&
+               AuxSyms[0].AuxType == ATSectionDefinition);
+        AuxSymbol &SecDef = AuxSyms[0];
+        SecDef.Aux.SectionDefinition.CheckSum = JC.getCRC();
       }
 
       if ((*i)->Relocations.size() > 0) {
-        assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+        assert(getStream().tell() == (*i)->Header.PointerToRelocations &&
                "Section::PointerToRelocations is insane!");
 
         if ((*i)->Relocations.size() >= 0xffff) {
@@ -1071,14 +1072,14 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
     }
   }
 
-  assert(OS.tell() == Header.PointerToSymbolTable &&
+  assert(getStream().tell() == Header.PointerToSymbolTable &&
          "Header::PointerToSymbolTable is insane!");
 
   for (auto &Symbol : Symbols)
     if (Symbol->getIndex() != -1)
       WriteSymbol(*Symbol);
 
-  OS.write(Strings.data().data(), Strings.data().size());
+  getStream().write(Strings.data().data(), Strings.data().size());
 }
 
 MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 36dd691f07b8..a38b1a41a9b0 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -49,7 +49,6 @@ void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
   SmallString<256> Code;
   raw_svector_ostream VecOS(Code);
   getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
-  VecOS.flush();
 
   // Add the fixups and data.
   for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
@@ -123,29 +122,37 @@ void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
          "Got non-COFF section in the COFF backend!");
 
   if (CurSymbol)
-    FatalError("starting a new symbol definition without completing the "
-               "previous one");
+    Error("starting a new symbol definition without completing the "
+          "previous one");
   CurSymbol = Symbol;
 }
 
 void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
-  if (!CurSymbol)
-    FatalError("storage class specified outside of symbol definition");
+  if (!CurSymbol) {
+    Error("storage class specified outside of symbol definition");
+    return;
+  }
 
-  if (StorageClass & ~COFF::SSC_Invalid)
-    FatalError("storage class value '" + Twine(StorageClass) +
+  if (StorageClass & ~COFF::SSC_Invalid) {
+    Error("storage class value '" + Twine(StorageClass) +
                "' out of range");
+    return;
+  }
 
   getAssembler().registerSymbol(*CurSymbol);
   cast<MCSymbolCOFF>(CurSymbol)->setClass((uint16_t)StorageClass);
 }
 
 void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
-  if (!CurSymbol)
-    FatalError("symbol type specified outside of a symbol definition");
+  if (!CurSymbol) {
+    Error("symbol type specified outside of a symbol definition");
+    return;
+  }
 
-  if (Type & ~0xffff)
-    FatalError("type value '" + Twine(Type) + "' out of range");
+  if (Type & ~0xffff) {
+    Error("type value '" + Twine(Type) + "' out of range");
+    return;
+  }
 
   getAssembler().registerSymbol(*CurSymbol);
   cast<MCSymbolCOFF>(CurSymbol)->setType((uint16_t)Type);
@@ -153,7 +160,7 @@ void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
 
 void MCWinCOFFStreamer::EndCOFFSymbolDef() {
   if (!CurSymbol)
-    FatalError("ending symbol definition without starting one");
+    Error("ending symbol definition without starting one");
   CurSymbol = nullptr;
 }
 
@@ -215,8 +222,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
     Size = std::max(Size, static_cast<uint64_t>(ByteAlignment));
   }
 
-  AssignSection(Symbol, nullptr);
-
   getAssembler().registerSymbol(*Symbol);
   Symbol->setExternal(true);
   Symbol->setCommon(Size, ByteAlignment);
@@ -228,7 +233,6 @@ void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
 
     OS << " -aligncomm:\"" << Symbol->getName() << "\","
        << Log2_32_Ceil(ByteAlignment);
-    OS.flush();
 
     PushSection();
     SwitchSection(MFI->getDrectveSection());
@@ -249,8 +253,6 @@ void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
   getAssembler().registerSymbol(*Symbol);
   Symbol->setExternal(false);
 
-  AssignSection(Symbol, Section);
-
   if (ByteAlignment != 1)
     new MCAlignFragment(ByteAlignment, /*Value=*/0, /*ValueSize=*/0,
                         ByteAlignment, Section);
@@ -287,9 +289,8 @@ void MCWinCOFFStreamer::FinishImpl() {
   MCObjectStreamer::FinishImpl();
 }
 
-LLVM_ATTRIBUTE_NORETURN
-void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
-  getContext().reportFatalError(SMLoc(), Msg);
+void MCWinCOFFStreamer::Error(const Twine &Msg) const {
+  getContext().reportError(SMLoc(), Msg);
 }
 }
 
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index d4821196a6cf..99b0650c8b7e 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -43,10 +43,10 @@ StringRef ArchiveMemberHeader::getName() const {
   return llvm::StringRef(Name, end);
 }
 
-uint32_t ArchiveMemberHeader::getSize() const {
+ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const {
   uint32_t Ret;
   if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
-    llvm_unreachable("Size is not a decimal number.");
+    return object_error::parse_failed; // Size is not a decimal number.
   return Ret;
 }
 
@@ -82,22 +82,30 @@ unsigned ArchiveMemberHeader::getGID() const {
   return Ret;
 }
 
-Archive::Child::Child(const Archive *Parent, const char *Start)
+Archive::Child::Child(const Archive *Parent, StringRef Data,
+                      uint16_t StartOfFile)
+    : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {}
+
+Archive::Child::Child(const Archive *Parent, const char *Start,
+                      std::error_code *EC)
     : Parent(Parent) {
   if (!Start)
     return;
 
-  const ArchiveMemberHeader *Header =
-      reinterpret_cast<const ArchiveMemberHeader *>(Start);
   uint64_t Size = sizeof(ArchiveMemberHeader);
-  if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
-    Size += Header->getSize();
   Data = StringRef(Start, Size);
+  if (!isThinMember()) {
+    ErrorOr<uint64_t> MemberSize = getRawSize();
+    if ((*EC = MemberSize.getError()))
+      return;
+    Size += MemberSize.get();
+    Data = StringRef(Start, Size);
+  }
 
   // Setup StartOfFile and PaddingBytes.
   StartOfFile = sizeof(ArchiveMemberHeader);
   // Don't include attached name.
-  StringRef Name = Header->getName();
+  StringRef Name = getRawName();
   if (Name.startswith("#1/")) {
     uint64_t NameSize;
     if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
@@ -106,25 +114,40 @@ Archive::Child::Child(const Archive *Parent, const char *Start)
   }
 }
 
-uint64_t Archive::Child::getSize() const {
-  if (Parent->IsThin)
-    return getHeader()->getSize();
+ErrorOr<uint64_t> Archive::Child::getSize() const {
+  if (Parent->IsThin) {
+    ErrorOr<uint32_t> Size = getHeader()->getSize();
+    if (std::error_code EC = Size.getError())
+      return EC;
+    return Size.get();
+  }
   return Data.size() - StartOfFile;
 }
 
-uint64_t Archive::Child::getRawSize() const {
-  return getHeader()->getSize();
+ErrorOr<uint64_t> Archive::Child::getRawSize() const {
+  ErrorOr<uint32_t> Size = getHeader()->getSize();
+  if (std::error_code EC = Size.getError())
+    return EC;
+  return Size.get();
+}
+
+bool Archive::Child::isThinMember() const {
+  StringRef Name = getHeader()->getName();
+  return Parent->IsThin && Name != "/" && Name != "//";
 }
 
 ErrorOr<StringRef> Archive::Child::getBuffer() const {
-  if (!Parent->IsThin)
-    return StringRef(Data.data() + StartOfFile, getSize());
+  if (!isThinMember()) {
+    ErrorOr<uint32_t> Size = getSize();
+    if (std::error_code EC = Size.getError())
+      return EC;
+    return StringRef(Data.data() + StartOfFile, Size.get());
+  }
   ErrorOr<StringRef> Name = getName();
   if (std::error_code EC = Name.getError())
     return EC;
-  SmallString<128> FullName =
-      Parent->getMemoryBufferRef().getBufferIdentifier();
-  sys::path::remove_filename(FullName);
+  SmallString<128> FullName = sys::path::parent_path(
+      Parent->getMemoryBufferRef().getBufferIdentifier());
   sys::path::append(FullName, *Name);
   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
   if (std::error_code EC = Buf.getError())
@@ -133,7 +156,7 @@ ErrorOr<StringRef> Archive::Child::getBuffer() const {
   return Parent->ThinBuffers.back()->getBuffer();
 }
 
-Archive::Child Archive::Child::getNext() const {
+ErrorOr<Archive::Child> Archive::Child::getNext() const {
   size_t SpaceToSkip = Data.size();
   // If it's odd, add 1 to make it even.
   if (SpaceToSkip & 1)
@@ -141,11 +164,19 @@ Archive::Child Archive::Child::getNext() const {
 
   const char *NextLoc = Data.data() + SpaceToSkip;
 
-  // Check to see if this is past the end of the archive.
-  if (NextLoc >= Parent->Data.getBufferEnd())
-    return Child(Parent, nullptr);
+  // Check to see if this is at the end of the archive.
+  if (NextLoc == Parent->Data.getBufferEnd())
+    return Child(Parent, nullptr, nullptr);
 
-  return Child(Parent, NextLoc);
+  // Check to see if this is past the end of the archive.
+  if (NextLoc > Parent->Data.getBufferEnd())
+    return object_error::parse_failed;
+
+  std::error_code EC;
+  Child Ret(Parent, NextLoc, &EC);
+  if (EC)
+    return EC;
+  return Ret;
 }
 
 uint64_t Archive::Child::getChildOffset() const {
@@ -168,17 +199,11 @@ ErrorOr<StringRef> Archive::Child::getName() const {
     std::size_t offset;
     if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
       llvm_unreachable("Long name offset is not an integer");
-    const char *addr = Parent->StringTable->Data.begin()
-                       + sizeof(ArchiveMemberHeader)
-                       + offset;
+
     // Verify it.
-    if (Parent->StringTable == Parent->child_end()
-        || addr < (Parent->StringTable->Data.begin()
-                   + sizeof(ArchiveMemberHeader))
-        || addr > (Parent->StringTable->Data.begin()
-                   + sizeof(ArchiveMemberHeader)
-                   + Parent->StringTable->getSize()))
+    if (offset >= Parent->StringTable.size())
       return object_error::parse_failed;
+    const char *addr = Parent->StringTable.begin() + offset;
 
     // GNU long file names end with a "/\n".
     if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
@@ -227,9 +252,13 @@ ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
   return std::move(Ret);
 }
 
+void Archive::setFirstRegular(const Child &C) {
+  FirstRegularData = C.Data;
+  FirstRegularStartOfFile = C.StartOfFile;
+}
+
 Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
-    : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()),
-      StringTable(child_end()), FirstRegular(child_end()) {
+    : Binary(Binary::ID_Archive, Source) {
   StringRef Buffer = Data.getBuffer();
   // Check for sufficient magic.
   if (Buffer.startswith(ThinMagic)) {
@@ -242,15 +271,26 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
   }
 
   // Get the special members.
-  child_iterator i = child_begin(false);
-  child_iterator e = child_end();
+  child_iterator I = child_begin(false);
+  if ((ec = I->getError()))
+    return;
+  child_iterator E = child_end();
 
-  if (i == e) {
+  if (I == E) {
     ec = std::error_code();
     return;
   }
+  const Child *C = &**I;
 
-  StringRef Name = i->getRawName();
+  auto Increment = [&]() {
+    ++I;
+    if ((ec = I->getError()))
+      return true;
+    C = &**I;
+    return false;
+  };
+
+  StringRef Name = C->getRawName();
 
   // Below is the pattern that is used to figure out the archive format
   // GNU archive format
@@ -273,9 +313,13 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
 
   if (Name == "__.SYMDEF") {
     Format = K_BSD;
-    SymbolTable = i;
-    ++i;
-    FirstRegular = i;
+    // We know that the symbol table is not an external file, so we just assert
+    // there is no error.
+    SymbolTable = *C->getBuffer();
+    if (Increment())
+      return;
+    setFirstRegular(*C);
+
     ec = std::error_code();
     return;
   }
@@ -283,16 +327,19 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
   if (Name.startswith("#1/")) {
     Format = K_BSD;
     // We know this is BSD, so getName will work since there is no string table.
-    ErrorOr<StringRef> NameOrErr = i->getName();
+    ErrorOr<StringRef> NameOrErr = C->getName();
     ec = NameOrErr.getError();
     if (ec)
       return;
     Name = NameOrErr.get();
     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
-      SymbolTable = i;
-      ++i;
+      // We know that the symbol table is not an external file, so we just
+      // assert there is no error.
+      SymbolTable = *C->getBuffer();
+      if (Increment())
+        return;
     }
-    FirstRegular = i;
+    setFirstRegular(*C);
     return;
   }
 
@@ -303,30 +350,36 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
 
   bool has64SymTable = false;
   if (Name == "/" || Name == "/SYM64/") {
-    SymbolTable = i;
+    // We know that the symbol table is not an external file, so we just assert
+    // there is no error.
+    SymbolTable = *C->getBuffer();
     if (Name == "/SYM64/")
       has64SymTable = true;
 
-    ++i;
-    if (i == e) {
+    if (Increment())
+      return;
+    if (I == E) {
       ec = std::error_code();
       return;
     }
-    Name = i->getRawName();
+    Name = C->getRawName();
   }
 
   if (Name == "//") {
     Format = has64SymTable ? K_MIPS64 : K_GNU;
-    StringTable = i;
-    ++i;
-    FirstRegular = i;
+    // The string table is never an external member, so we just assert on the
+    // ErrorOr.
+    StringTable = *C->getBuffer();
+    if (Increment())
+      return;
+    setFirstRegular(*C);
     ec = std::error_code();
     return;
   }
 
   if (Name[0] != '/') {
     Format = has64SymTable ? K_MIPS64 : K_GNU;
-    FirstRegular = i;
+    setFirstRegular(*C);
     ec = std::error_code();
     return;
   }
@@ -337,23 +390,30 @@ Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
   }
 
   Format = K_COFF;
-  SymbolTable = i;
+  // We know that the symbol table is not an external file, so we just assert
+  // there is no error.
+  SymbolTable = *C->getBuffer();
 
-  ++i;
-  if (i == e) {
-    FirstRegular = i;
+  if (Increment())
+    return;
+
+  if (I == E) {
+    setFirstRegular(*C);
     ec = std::error_code();
     return;
   }
 
-  Name = i->getRawName();
+  Name = C->getRawName();
 
   if (Name == "//") {
-    StringTable = i;
-    ++i;
+    // The string table is never an external member, so we just assert on the
+    // ErrorOr.
+    StringTable = *C->getBuffer();
+    if (Increment())
+      return;
   }
 
-  FirstRegular = i;
+  setFirstRegular(*C);
   ec = std::error_code();
 }
 
@@ -362,22 +422,25 @@ Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
     return child_end();
 
   if (SkipInternal)
-    return FirstRegular;
+    return Child(this, FirstRegularData, FirstRegularStartOfFile);
 
   const char *Loc = Data.getBufferStart() + strlen(Magic);
-  Child c(this, Loc);
-  return c;
+  std::error_code EC;
+  Child c(this, Loc, &EC);
+  if (EC)
+    return child_iterator(EC);
+  return child_iterator(c);
 }
 
 Archive::child_iterator Archive::child_end() const {
-  return Child(this, nullptr);
+  return Child(this, nullptr, nullptr);
 }
 
 StringRef Archive::Symbol::getName() const {
   return Parent->getSymbolTable().begin() + StringIndex;
 }
 
-ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
+ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
   const char *Buf = Parent->getSymbolTable().begin();
   const char *Offsets = Buf;
   if (Parent->kind() == K_MIPS64)
@@ -422,8 +485,11 @@ ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
   }
 
   const char *Loc = Parent->getData().begin() + Offset;
-  child_iterator Iter(Child(Parent, Loc));
-  return Iter;
+  std::error_code EC;
+  Child C(Parent, Loc, &EC);
+  if (EC)
+    return EC;
+  return C;
 }
 
 Archive::Symbol Archive::Symbol::getNext() const {
@@ -506,12 +572,12 @@ Archive::symbol_iterator Archive::symbol_begin() const {
 }
 
 Archive::symbol_iterator Archive::symbol_end() const {
-  if (!hasSymbolTable())
-    return symbol_iterator(Symbol(this, 0, 0));
   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
 }
 
 uint32_t Archive::getNumberOfSymbols() const {
+  if (!hasSymbolTable())
+    return 0;
   const char *buf = getSymbolTable().begin();
   if (kind() == K_GNU)
     return read32be(buf);
@@ -542,6 +608,4 @@ Archive::child_iterator Archive::findSym(StringRef name) const {
   return child_end();
 }
 
-bool Archive::hasSymbolTable() const {
-  return SymbolTable != child_end();
-}
+bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
diff --git a/lib/Object/ArchiveWriter.cpp b/lib/Object/ArchiveWriter.cpp
index a40901c924ea..c7343fdc171d 100644
--- a/lib/Object/ArchiveWriter.cpp
+++ b/lib/Object/ArchiveWriter.cpp
@@ -34,32 +34,32 @@
 
 using namespace llvm;
 
-NewArchiveIterator::NewArchiveIterator(object::Archive::child_iterator I,
+NewArchiveIterator::NewArchiveIterator(const object::Archive::Child &OldMember,
                                        StringRef Name)
-    : IsNewMember(false), Name(Name), OldI(I) {}
+    : IsNewMember(false), Name(Name), OldMember(OldMember) {}
 
-NewArchiveIterator::NewArchiveIterator(StringRef NewFilename, StringRef Name)
-    : IsNewMember(true), Name(Name), NewFilename(NewFilename) {}
+NewArchiveIterator::NewArchiveIterator(StringRef FileName)
+    : IsNewMember(true), Name(FileName), OldMember(nullptr, nullptr, nullptr) {}
 
 StringRef NewArchiveIterator::getName() const { return Name; }
 
 bool NewArchiveIterator::isNewMember() const { return IsNewMember; }
 
-object::Archive::child_iterator NewArchiveIterator::getOld() const {
+const object::Archive::Child &NewArchiveIterator::getOld() const {
   assert(!IsNewMember);
-  return OldI;
+  return OldMember;
 }
 
 StringRef NewArchiveIterator::getNew() const {
   assert(IsNewMember);
-  return NewFilename;
+  return Name;
 }
 
 llvm::ErrorOr<int>
 NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
   assert(IsNewMember);
   int NewFD;
-  if (auto EC = sys::fs::openFileForRead(NewFilename, NewFD))
+  if (auto EC = sys::fs::openFileForRead(Name, NewFD))
     return EC;
   assert(NewFD != -1);
 
@@ -77,7 +77,7 @@ NewArchiveIterator::getFD(sys::fs::file_status &NewStatus) const {
 
 template <typename T>
 static void printWithSpacePadding(raw_fd_ostream &OS, T Data, unsigned Size,
-				  bool MayTruncate = false) {
+                                  bool MayTruncate = false) {
   uint64_t OldPos = OS.tell();
   OS << Data;
   unsigned SizeSoFar = OS.tell() - OldPos;
@@ -135,30 +135,56 @@ static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
     Out.write(uint8_t(0));
 }
 
+static bool useStringTable(bool Thin, StringRef Name) {
+  return Thin || Name.size() >= 16;
+}
+
 static void
-printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind,
+printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin,
                   StringRef Name,
                   std::vector<unsigned>::iterator &StringMapIndexIter,
                   const sys::TimeValue &ModTime, unsigned UID, unsigned GID,
                   unsigned Perms, unsigned Size) {
   if (Kind == object::Archive::K_BSD)
     return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
-  if (Name.size() < 16)
+  if (!useStringTable(Thin, Name))
     return printGNUSmallMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
   Out << '/';
   printWithSpacePadding(Out, *StringMapIndexIter++, 15);
   printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
 }
 
-static void writeStringTable(raw_fd_ostream &Out,
+// Compute the relative path from From to To.
+static std::string computeRelativePath(StringRef From, StringRef To) {
+  if (sys::path::is_absolute(From) || sys::path::is_absolute(To))
+    return To;
+
+  StringRef DirFrom = sys::path::parent_path(From);
+  auto FromI = sys::path::begin(DirFrom);
+  auto ToI = sys::path::begin(To);
+  while (*FromI == *ToI) {
+    ++FromI;
+    ++ToI;
+  }
+
+  SmallString<128> Relative;
+  for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
+    sys::path::append(Relative, "..");
+
+  for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
+    sys::path::append(Relative, *ToI);
+
+  return Relative.str();
+}
+
+static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName,
                              ArrayRef<NewArchiveIterator> Members,
-                             std::vector<unsigned> &StringMapIndexes) {
+                             std::vector<unsigned> &StringMapIndexes,
+                             bool Thin) {
   unsigned StartOffset = 0;
-  for (ArrayRef<NewArchiveIterator>::iterator I = Members.begin(),
-                                              E = Members.end();
-       I != E; ++I) {
-    StringRef Name = I->getName();
-    if (Name.size() < 16)
+  for (const NewArchiveIterator &I : Members) {
+    StringRef Name = sys::path::filename(I.getName());
+    if (!useStringTable(Thin, Name))
       continue;
     if (StartOffset == 0) {
       printWithSpacePadding(Out, "//", 58);
@@ -166,7 +192,13 @@ static void writeStringTable(raw_fd_ostream &Out,
       StartOffset = Out.tell();
     }
     StringMapIndexes.push_back(Out.tell() - StartOffset);
-    Out << Name << "/\n";
+
+    if (Thin)
+      Out << computeRelativePath(ArcName, I.getName());
+    else
+      Out << Name;
+
+    Out << "/\n";
   }
   if (StartOffset == 0)
     return;
@@ -268,9 +300,11 @@ writeSymbolTable(raw_fd_ostream &Out, object::Archive::Kind Kind,
   return BodyStartOffset + 4;
 }
 
-std::pair<StringRef, std::error_code> llvm::writeArchive(
-    StringRef ArcName, std::vector<NewArchiveIterator> &NewMembers,
-    bool WriteSymtab, object::Archive::Kind Kind, bool Deterministic) {
+std::pair<StringRef, std::error_code>
+llvm::writeArchive(StringRef ArcName,
+                   std::vector<NewArchiveIterator> &NewMembers,
+                   bool WriteSymtab, object::Archive::Kind Kind,
+                   bool Deterministic, bool Thin) {
   SmallString<128> TmpArchive;
   int TmpArchiveFD;
   if (auto EC = sys::fs::createUniqueFile(ArcName + ".temp-archive-%%%%%%%.a",
@@ -279,7 +313,10 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
 
   tool_output_file Output(TmpArchive, TmpArchiveFD);
   raw_fd_ostream &Out = Output.os();
-  Out << "!<arch>\n";
+  if (Thin)
+    Out << "!<thin>\n";
+  else
+    Out << "!<arch>\n";
 
   std::vector<unsigned> MemberOffsetRefs;
 
@@ -309,9 +346,11 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
       Buffers.push_back(std::move(MemberBufferOrErr.get()));
       MemberRef = Buffers.back()->getMemBufferRef();
     } else {
-      object::Archive::child_iterator OldMember = Member.getOld();
+      const object::Archive::Child &OldMember = Member.getOld();
+      assert((!Thin || OldMember.getParent()->isThin()) &&
+             "Thin archives cannot refers to member of other archives");
       ErrorOr<MemoryBufferRef> MemberBufferOrErr =
-          OldMember->getMemoryBufferRef();
+          OldMember.getMemoryBufferRef();
       if (auto EC = MemberBufferOrErr.getError())
         return std::make_pair("", EC);
       MemberRef = MemberBufferOrErr.get();
@@ -330,7 +369,7 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
 
   std::vector<unsigned> StringMapIndexes;
   if (Kind != object::Archive::K_BSD)
-    writeStringTable(Out, NewMembers, StringMapIndexes);
+    writeStringTable(Out, ArcName, NewMembers, StringMapIndexes, Thin);
 
   unsigned MemberNum = 0;
   unsigned NewMemberNum = 0;
@@ -358,26 +397,32 @@ std::pair<StringRef, std::error_code> llvm::writeArchive(
       GID = Status.getGroup();
       Perms = Status.permissions();
     } else {
-      object::Archive::child_iterator OldMember = I.getOld();
-      ModTime = OldMember->getLastModified();
-      UID = OldMember->getUID();
-      GID = OldMember->getGID();
-      Perms = OldMember->getAccessMode();
+      const object::Archive::Child &OldMember = I.getOld();
+      ModTime = OldMember.getLastModified();
+      UID = OldMember.getUID();
+      GID = OldMember.getGID();
+      Perms = OldMember.getAccessMode();
     }
 
     if (I.isNewMember()) {
       StringRef FileName = I.getNew();
       const sys::fs::file_status &Status = NewMemberStatus[NewMemberNum++];
-      printMemberHeader(Out, Kind, sys::path::filename(FileName),
+      printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName),
                         StringMapIndexIter, ModTime, UID, GID, Perms,
                         Status.getSize());
     } else {
-      object::Archive::child_iterator OldMember = I.getOld();
-      printMemberHeader(Out, Kind, I.getName(), StringMapIndexIter, ModTime,
-                        UID, GID, Perms, OldMember->getSize());
+      const object::Archive::Child &OldMember = I.getOld();
+      ErrorOr<uint32_t> Size = OldMember.getSize();
+      if (std::error_code EC = Size.getError())
+        return std::make_pair("", EC);
+      StringRef FileName = I.getName();
+      printMemberHeader(Out, Kind, Thin, sys::path::filename(FileName),
+                        StringMapIndexIter, ModTime, UID, GID, Perms,
+                        Size.get());
     }
 
-    Out << File.getBuffer();
+    if (!Thin)
+      Out << File.getBuffer();
 
     if (Out.tell() % 2)
       Out << '\n';
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 8f10143fccc3..2ac2ee51dc23 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMObject
   RecordStreamer.cpp
   SymbolicFile.cpp
   SymbolSize.cpp
+  FunctionIndexObjectFile.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Object
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index bcca9839b475..1f2111759a0e 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -171,6 +171,11 @@ ErrorOr<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const {
   if (std::error_code EC = getSection(SectionNumber, Section))
     return EC;
   Result += Section->VirtualAddress;
+
+  // The section VirtualAddress does not include ImageBase, and we want to
+  // return virtual addresses.
+  Result += getImageBase();
+
   return Result;
 }
 
@@ -178,10 +183,10 @@ SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
   int32_t SectionNumber = Symb.getSectionNumber();
 
+  if (Symb.getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+    return SymbolRef::ST_Function;
   if (Symb.isAnyUndefined())
     return SymbolRef::ST_Unknown;
-  if (Symb.isFunctionDefinition())
-    return SymbolRef::ST_Function;
   if (Symb.isCommon())
     return SymbolRef::ST_Data;
   if (Symb.isFileRecord())
@@ -230,21 +235,17 @@ uint64_t COFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Ref) const {
   return Symb.getValue();
 }
 
-std::error_code
-COFFObjectFile::getSymbolSection(DataRefImpl Ref,
-                                 section_iterator &Result) const {
+ErrorOr<section_iterator>
+COFFObjectFile::getSymbolSection(DataRefImpl Ref) const {
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
-  if (COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
-    Result = section_end();
-  } else {
-    const coff_section *Sec = nullptr;
-    if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec))
-      return EC;
-    DataRefImpl Ref;
-    Ref.p = reinterpret_cast<uintptr_t>(Sec);
-    Result = section_iterator(SectionRef(Ref, this));
-  }
-  return std::error_code();
+  if (COFF::isReservedSectionNumber(Symb.getSectionNumber()))
+    return section_end();
+  const coff_section *Sec = nullptr;
+  if (std::error_code EC = getSection(Symb.getSectionNumber(), Sec))
+    return EC;
+  DataRefImpl Ret;
+  Ret.p = reinterpret_cast<uintptr_t>(Sec);
+  return section_iterator(SectionRef(Ret, this));
 }
 
 unsigned COFFObjectFile::getSymbolSectionID(SymbolRef Sym) const {
@@ -266,7 +267,12 @@ std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref,
 
 uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const {
   const coff_section *Sec = toSec(Ref);
-  return Sec->VirtualAddress;
+  uint64_t Result = Sec->VirtualAddress;
+
+  // The section VirtualAddress does not include ImageBase, and we want to
+  // return virtual addresses.
+  Result += getImageBase();
+  return Result;
 }
 
 uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const {
@@ -412,10 +418,18 @@ std::error_code COFFObjectFile::initSymbolTablePtr() {
   return std::error_code();
 }
 
+uint64_t COFFObjectFile::getImageBase() const {
+  if (PE32Header)
+    return PE32Header->ImageBase;
+  else if (PE32PlusHeader)
+    return PE32PlusHeader->ImageBase;
+  // This actually comes up in practice.
+  return 0;
+}
+
 // Returns the file offset for the given VA.
 std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const {
-  uint64_t ImageBase = PE32Header ? (uint64_t)PE32Header->ImageBase
-                                  : (uint64_t)PE32PlusHeader->ImageBase;
+  uint64_t ImageBase = getImageBase();
   uint64_t Rva = Addr - ImageBase;
   assert(Rva <= UINT32_MAX);
   return getRvaPtr((uint32_t)Rva, Res);
@@ -744,6 +758,8 @@ StringRef COFFObjectFile::getFileFormatName() const {
     return "COFF-x86-64";
   case COFF::IMAGE_FILE_MACHINE_ARMNT:
     return "COFF-ARM";
+  case COFF::IMAGE_FILE_MACHINE_ARM64:
+    return "COFF-ARM64";
   default:
     return "COFF-<unknown arch>";
   }
@@ -757,6 +773,8 @@ unsigned COFFObjectFile::getArch() const {
     return Triple::x86_64;
   case COFF::IMAGE_FILE_MACHINE_ARMNT:
     return Triple::thumb;
+  case COFF::IMAGE_FILE_MACHINE_ARM64:
+    return Triple::aarch64;
   default:
     return Triple::UnknownArch;
   }
diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp
index 9a24b531da9e..4c1fca19bf1b 100644
--- a/lib/Object/COFFYAML.cpp
+++ b/lib/Object/COFFYAML.cpp
@@ -56,6 +56,7 @@ void ScalarEnumerationTraits<COFF::MachineTypes>::enumeration(
   ECase(IMAGE_FILE_MACHINE_AMD64);
   ECase(IMAGE_FILE_MACHINE_ARM);
   ECase(IMAGE_FILE_MACHINE_ARMNT);
+  ECase(IMAGE_FILE_MACHINE_ARM64);
   ECase(IMAGE_FILE_MACHINE_EBC);
   ECase(IMAGE_FILE_MACHINE_I386);
   ECase(IMAGE_FILE_MACHINE_IA64);
@@ -210,6 +211,7 @@ void ScalarBitSetTraits<COFF::Characteristics>::bitset(
 
 void ScalarBitSetTraits<COFF::SectionCharacteristics>::bitset(
     IO &IO, COFF::SectionCharacteristics &Value) {
+  BCase(IMAGE_SCN_TYPE_NOLOAD);
   BCase(IMAGE_SCN_TYPE_NO_PAD);
   BCase(IMAGE_SCN_CNT_CODE);
   BCase(IMAGE_SCN_CNT_INITIALIZED_DATA);
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 398e9e412994..62c27cc427a6 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -26,6 +26,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
     }
     break;
   case ELF::EM_386:
+  case ELF::EM_IAMCU:
     switch (Type) {
 #include "llvm/Support/ELFRelocs/i386.def"
     default:
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index 72c232c32870..4a4b2276f46b 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -193,6 +193,7 @@ ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(IO &IO,
   ECase(EM_VIDEOCORE5)
   ECase(EM_78KOR)
   ECase(EM_56800EX)
+  ECase(EM_AMDGPU)
 #undef ECase
 }
 
@@ -316,6 +317,25 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
     BCase(EF_HEXAGON_ISA_V4)
     BCase(EF_HEXAGON_ISA_V5)
     break;
+  case ELF::EM_AVR:
+    BCase(EF_AVR_ARCH_AVR1)
+    BCase(EF_AVR_ARCH_AVR2)
+    BCase(EF_AVR_ARCH_AVR25)
+    BCase(EF_AVR_ARCH_AVR3)
+    BCase(EF_AVR_ARCH_AVR31)
+    BCase(EF_AVR_ARCH_AVR35)
+    BCase(EF_AVR_ARCH_AVR4)
+    BCase(EF_AVR_ARCH_AVR51)
+    BCase(EF_AVR_ARCH_AVR6)
+    BCase(EF_AVR_ARCH_AVRTINY)
+    BCase(EF_AVR_ARCH_XMEGA1)
+    BCase(EF_AVR_ARCH_XMEGA2)
+    BCase(EF_AVR_ARCH_XMEGA3)
+    BCase(EF_AVR_ARCH_XMEGA4)
+    BCase(EF_AVR_ARCH_XMEGA5)
+    BCase(EF_AVR_ARCH_XMEGA6)
+    BCase(EF_AVR_ARCH_XMEGA7)
+    break;
   default:
     llvm_unreachable("Unsupported architecture");
   }
@@ -382,6 +402,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
 
 void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
                                                   ELFYAML::ELF_SHF &Value) {
+  const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
 #define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
   BCase(SHF_WRITE)
   BCase(SHF_ALLOC)
@@ -394,6 +415,17 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
   BCase(SHF_OS_NONCONFORMING)
   BCase(SHF_GROUP)
   BCase(SHF_TLS)
+  switch(Object->Header.Machine) {
+  case ELF::EM_AMDGPU:
+    BCase(SHF_AMDGPU_HSA_GLOBAL)
+    BCase(SHF_AMDGPU_HSA_READONLY)
+    BCase(SHF_AMDGPU_HSA_CODE)
+    BCase(SHF_AMDGPU_HSA_AGENT)
+    break;
+  default:
+    // Nothing to do.
+    break;
+  }
 #undef BCase
 }
 
@@ -466,6 +498,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
 #include "llvm/Support/ELFRelocs/Hexagon.def"
     break;
   case ELF::EM_386:
+  case ELF::EM_IAMCU:
 #include "llvm/Support/ELFRelocs/i386.def"
     break;
   case ELF::EM_AARCH64:
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
index 7ca2f12f0924..7ecc3a19af9d 100644
--- a/lib/Object/Error.cpp
+++ b/lib/Object/Error.cpp
@@ -47,6 +47,8 @@ std::string _object_error_category::message(int EV) const {
     return "Invalid section index";
   case object_error::bitcode_section_not_found:
     return "Bitcode section not found in object file";
+  case object_error::elf_invalid_dynamic_table_size:
+    return "Invalid dynamic table size";
   case object_error::macho_small_load_command:
     return "Mach-O load command with size < 8 bytes";
   case object_error::macho_load_segment_too_many_sections:
diff --git a/lib/Object/FunctionIndexObjectFile.cpp b/lib/Object/FunctionIndexObjectFile.cpp
new file mode 100644
index 000000000000..fe111de1a9c8
--- /dev/null
+++ b/lib/Object/FunctionIndexObjectFile.cpp
@@ -0,0 +1,143 @@
+//===- FunctionIndexObjectFile.cpp - Function index file implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Part of the FunctionIndexObjectFile class implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/FunctionInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace object;
+
+FunctionIndexObjectFile::FunctionIndexObjectFile(
+    MemoryBufferRef Object, std::unique_ptr<FunctionInfoIndex> I)
+    : SymbolicFile(Binary::ID_FunctionIndex, Object), Index(std::move(I)) {}
+
+FunctionIndexObjectFile::~FunctionIndexObjectFile() {}
+
+std::unique_ptr<FunctionInfoIndex> FunctionIndexObjectFile::takeIndex() {
+  return std::move(Index);
+}
+
+ErrorOr<MemoryBufferRef>
+FunctionIndexObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
+  for (const SectionRef &Sec : Obj.sections()) {
+    StringRef SecName;
+    if (std::error_code EC = Sec.getName(SecName))
+      return EC;
+    if (SecName == ".llvmbc") {
+      StringRef SecContents;
+      if (std::error_code EC = Sec.getContents(SecContents))
+        return EC;
+      return MemoryBufferRef(SecContents, Obj.getFileName());
+    }
+  }
+
+  return object_error::bitcode_section_not_found;
+}
+
+ErrorOr<MemoryBufferRef>
+FunctionIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
+  sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+  switch (Type) {
+  case sys::fs::file_magic::bitcode:
+    return Object;
+  case sys::fs::file_magic::elf_relocatable:
+  case sys::fs::file_magic::macho_object:
+  case sys::fs::file_magic::coff_object: {
+    ErrorOr<std::unique_ptr<ObjectFile>> ObjFile =
+        ObjectFile::createObjectFile(Object, Type);
+    if (!ObjFile)
+      return ObjFile.getError();
+    return findBitcodeInObject(*ObjFile->get());
+  }
+  default:
+    return object_error::invalid_file_type;
+  }
+}
+
+// Looks for function index in the given memory buffer.
+// returns true if found, else false.
+bool FunctionIndexObjectFile::hasFunctionSummaryInMemBuffer(
+    MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler) {
+  ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
+  if (!BCOrErr)
+    return false;
+
+  return hasFunctionSummary(BCOrErr.get(), DiagnosticHandler);
+}
+
+// Parse function index in the given memory buffer.
+// Return new FunctionIndexObjectFile instance containing parsed
+// function summary/index.
+ErrorOr<std::unique_ptr<FunctionIndexObjectFile>>
+FunctionIndexObjectFile::create(MemoryBufferRef Object,
+                                DiagnosticHandlerFunction DiagnosticHandler,
+                                bool IsLazy) {
+  std::unique_ptr<FunctionInfoIndex> Index;
+
+  ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
+  if (!BCOrErr)
+    return BCOrErr.getError();
+
+  ErrorOr<std::unique_ptr<FunctionInfoIndex>> IOrErr = getFunctionInfoIndex(
+      BCOrErr.get(), DiagnosticHandler, IsLazy);
+
+  if (std::error_code EC = IOrErr.getError())
+    return EC;
+
+  Index = std::move(IOrErr.get());
+
+  return llvm::make_unique<FunctionIndexObjectFile>(Object, std::move(Index));
+}
+
+// Parse the function summary information for function with the
+// given name out of the given buffer. Parsed information is
+// stored on the index object saved in this object.
+std::error_code FunctionIndexObjectFile::findFunctionSummaryInMemBuffer(
+    MemoryBufferRef Object, DiagnosticHandlerFunction DiagnosticHandler,
+    StringRef FunctionName) {
+  sys::fs::file_magic Type = sys::fs::identify_magic(Object.getBuffer());
+  switch (Type) {
+  case sys::fs::file_magic::bitcode: {
+    return readFunctionSummary(Object, DiagnosticHandler, FunctionName,
+                               std::move(Index));
+  }
+  default:
+    return object_error::invalid_file_type;
+  }
+}
+
+// Parse the function index out of an IR file and return the function
+// index object if found, or nullptr if not.
+ErrorOr<std::unique_ptr<FunctionInfoIndex>>
+llvm::getFunctionIndexForFile(StringRef Path,
+                              DiagnosticHandlerFunction DiagnosticHandler) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
+      MemoryBuffer::getFileOrSTDIN(Path);
+  std::error_code EC = FileOrErr.getError();
+  if (EC)
+    return EC;
+  MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
+  ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+      object::FunctionIndexObjectFile::create(BufferRef, DiagnosticHandler);
+  EC = ObjOrErr.getError();
+  if (EC)
+    return EC;
+
+  object::FunctionIndexObjectFile &Obj = **ObjOrErr;
+  return Obj.takeIndex();
+}
diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp
index 9f5132e9062c..c35c413b3c3b 100644
--- a/lib/Object/IRObjectFile.cpp
+++ b/lib/Object/IRObjectFile.cpp
@@ -219,6 +219,12 @@ uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
   uint32_t Res = BasicSymbolRef::SF_None;
   if (GV->isDeclarationForLinker())
     Res |= BasicSymbolRef::SF_Undefined;
+  else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
+    Res |= BasicSymbolRef::SF_Hidden;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+    if (GVar->isConstant())
+      Res |= BasicSymbolRef::SF_Const;
+  }
   if (GV->hasPrivateLinkage())
     Res |= BasicSymbolRef::SF_FormatSpecific;
   if (!GV->hasLocalLinkage())
@@ -303,7 +309,7 @@ llvm::object::IRObjectFile::create(MemoryBufferRef Object,
       MemoryBuffer::getMemBuffer(BCOrErr.get(), false));
 
   ErrorOr<std::unique_ptr<Module>> MOrErr =
-      getLazyBitcodeModule(std::move(Buff), Context, nullptr,
+      getLazyBitcodeModule(std::move(Buff), Context,
                            /*ShouldLazyLoadMetadata*/ true);
   if (std::error_code EC = MOrErr.getError())
     return EC;
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 05900630c75c..d1f79b225ee4 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -278,7 +278,7 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
         return;
       }
       LinkOptHintsLoadCmd = Load.Ptr;
-    } else if (Load.C.cmd == MachO::LC_DYLD_INFO || 
+    } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
                Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
       // Multiple dyldinfo load commands
       if (DyldInfoLoadCmd) {
@@ -401,6 +401,9 @@ SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
     case MachO::N_UNDF :
       return SymbolRef::ST_Unknown;
     case MachO::N_SECT :
+      section_iterator Sec = *getSymbolSection(Symb);
+      if (Sec->isData() || Sec->isBSS())
+        return SymbolRef::ST_Data;
       return SymbolRef::ST_Function;
   }
   return SymbolRef::ST_Other;
@@ -445,22 +448,18 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
   return Result;
 }
 
-std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb,
-                                                  section_iterator &Res) const {
+ErrorOr<section_iterator>
+MachOObjectFile::getSymbolSection(DataRefImpl Symb) const {
   MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
   uint8_t index = Entry.n_sect;
 
-  if (index == 0) {
-    Res = section_end();
-  } else {
-    DataRefImpl DRI;
-    DRI.d.a = index - 1;
-    if (DRI.d.a >= Sections.size())
-      report_fatal_error("getSymbolSection: Invalid section index.");
-    Res = section_iterator(SectionRef(DRI, this));
-  }
-
-  return std::error_code();
+  if (index == 0)
+    return section_end();
+  DataRefImpl DRI;
+  DRI.d.a = index - 1;
+  if (DRI.d.a >= Sections.size())
+    report_fatal_error("getSymbolSection: Invalid section index.");
+  return section_iterator(SectionRef(DRI, this));
 }
 
 unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const {
@@ -487,9 +486,32 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const {
 }
 
 uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const {
-  if (is64Bit())
-    return getSection64(Sec).size;
-  return getSection(Sec).size;
+  // In the case if a malformed Mach-O file where the section offset is past
+  // the end of the file or some part of the section size is past the end of
+  // the file return a size of zero or a size that covers the rest of the file
+  // but does not extend past the end of the file.
+  uint32_t SectOffset, SectType;
+  uint64_t SectSize;
+
+  if (is64Bit()) {
+    MachO::section_64 Sect = getSection64(Sec);
+    SectOffset = Sect.offset;
+    SectSize = Sect.size;
+    SectType = Sect.flags & MachO::SECTION_TYPE;
+  } else {
+    MachO::section Sect = getSection(Sec);
+    SectOffset = Sect.offset;
+    SectSize = Sect.size;
+    SectType = Sect.flags & MachO::SECTION_TYPE;
+  }
+  if (SectType == MachO::S_ZEROFILL || SectType == MachO::S_GB_ZEROFILL)
+    return SectSize;
+  uint64_t FileSize = getData().size();
+  if (SectOffset > FileSize)
+    return 0;
+  if (FileSize - SectOffset < SectSize)
+    return FileSize - SectOffset;
+  return SectSize;
 }
 
 std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec,
@@ -1136,8 +1158,7 @@ Triple MachOObjectFile::getThumbArch(uint32_t CPUType, uint32_t CPUSubType,
 }
 
 Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType,
-                                const char **McpuDefault,
-				Triple *ThumbTriple) {
+                                const char **McpuDefault, Triple *ThumbTriple) {
   Triple T = MachOObjectFile::getArch(CPUType, CPUSubType, McpuDefault);
   *ThumbTriple = MachOObjectFile::getThumbArch(CPUType, CPUSubType,
                                                McpuDefault);
@@ -1212,8 +1233,8 @@ dice_iterator MachOObjectFile::end_dices() const {
   return dice_iterator(DiceRef(DRI, this));
 }
 
-ExportEntry::ExportEntry(ArrayRef<uint8_t> T) 
-  : Trie(T), Malformed(false), Done(false) { }
+ExportEntry::ExportEntry(ArrayRef<uint8_t> T)
+    : Trie(T), Malformed(false), Done(false) {}
 
 void ExportEntry::moveToFirst() {
   pushNode(0);
@@ -1226,7 +1247,7 @@ void ExportEntry::moveToEnd() {
 }
 
 bool ExportEntry::operator==(const ExportEntry &Other) const {
-  // Common case, one at end, other iterating from begin. 
+  // Common case, one at end, other iterating from begin.
   if (Done || Other.Done)
     return (Done == Other.Done);
   // Not equal if different stack sizes.
@@ -1240,7 +1261,7 @@ bool ExportEntry::operator==(const ExportEntry &Other) const {
     if (Stack[i].Start != Other.Stack[i].Start)
       return false;
   }
-  return true;  
+  return true;
 }
 
 uint64_t ExportEntry::readULEB128(const uint8_t *&Ptr) {
@@ -1281,11 +1302,10 @@ uint32_t ExportEntry::nodeOffset() const {
   return Stack.back().Start - Trie.begin();
 }
 
-ExportEntry::NodeState::NodeState(const uint8_t *Ptr) 
-  : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0), 
-    ImportName(nullptr), ChildCount(0), NextChildIndex(0),  
-    ParentStringLength(0), IsExportNode(false) {
-}
+ExportEntry::NodeState::NodeState(const uint8_t *Ptr)
+    : Start(Ptr), Current(Ptr), Flags(0), Address(0), Other(0),
+      ImportName(nullptr), ChildCount(0), NextChildIndex(0),
+      ParentStringLength(0), IsExportNode(false) {}
 
 void ExportEntry::pushNode(uint64_t offset) {
   const uint8_t *Ptr = Trie.begin() + offset;
@@ -1302,7 +1322,7 @@ void ExportEntry::pushNode(uint64_t offset) {
     } else {
       State.Address = readULEB128(State.Current);
       if (State.Flags & MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER)
-        State.Other = readULEB128(State.Current); 
+        State.Other = readULEB128(State.Current);
     }
   }
   State.ChildCount = *Children;
@@ -1339,7 +1359,7 @@ void ExportEntry::pushDownUntilBottom() {
 //
 // There is one "export" node for each exported symbol.  But because some
 // symbols may be a prefix of another symbol (e.g. _dup and _dup2), an export
-// node may have child nodes too.  
+// node may have child nodes too.
 //
 // The algorithm for moveNext() is to keep moving down the leftmost unvisited
 // child until hitting a node with no children (which is an export node or
@@ -1372,7 +1392,7 @@ void ExportEntry::moveNext() {
   Done = true;
 }
 
-iterator_range<export_iterator> 
+iterator_range<export_iterator>
 MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
   ExportEntry Start(Trie);
   if (Trie.size() == 0)
@@ -1383,15 +1403,13 @@ MachOObjectFile::exports(ArrayRef<uint8_t> Trie) {
   ExportEntry Finish(Trie);
   Finish.moveToEnd();
 
-  return iterator_range<export_iterator>(export_iterator(Start), 
-                                         export_iterator(Finish));
+  return make_range(export_iterator(Start), export_iterator(Finish));
 }
 
 iterator_range<export_iterator> MachOObjectFile::exports() const {
   return exports(getDyldInfoExportsTrie());
 }
 
-
 MachORebaseEntry::MachORebaseEntry(ArrayRef<uint8_t> Bytes, bool is64Bit)
     : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
       RemainingLoopCount(0), AdvanceAmount(0), RebaseType(0),
@@ -1555,17 +1573,14 @@ MachOObjectFile::rebaseTable(ArrayRef<uint8_t> Opcodes, bool is64) {
   MachORebaseEntry Finish(Opcodes, is64);
   Finish.moveToEnd();
 
-  return iterator_range<rebase_iterator>(rebase_iterator(Start),
-                                         rebase_iterator(Finish));
+  return make_range(rebase_iterator(Start), rebase_iterator(Finish));
 }
 
 iterator_range<rebase_iterator> MachOObjectFile::rebaseTable() const {
   return rebaseTable(getDyldInfoRebaseOpcodes(), is64Bit());
 }
 
-
-MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit,
-                               Kind BK)
+MachOBindEntry::MachOBindEntry(ArrayRef<uint8_t> Bytes, bool is64Bit, Kind BK)
     : Opcodes(Bytes), Ptr(Bytes.begin()), SegmentOffset(0), SegmentIndex(0),
       Ordinal(0), Flags(0), Addend(0), RemainingLoopCount(0), AdvanceAmount(0),
       BindType(0), PointerSize(is64Bit ? 8 : 4),
@@ -1769,7 +1784,6 @@ int64_t MachOBindEntry::readSLEB128() {
   return Result;
 }
 
-
 uint32_t MachOBindEntry::segmentIndex() const { return SegmentIndex; }
 
 uint64_t MachOBindEntry::segmentOffset() const { return SegmentOffset; }
@@ -1810,8 +1824,7 @@ MachOObjectFile::bindTable(ArrayRef<uint8_t> Opcodes, bool is64,
   MachOBindEntry Finish(Opcodes, is64, BKind);
   Finish.moveToEnd();
 
-  return iterator_range<bind_iterator>(bind_iterator(Start),
-                                       bind_iterator(Finish));
+  return make_range(bind_iterator(Start), bind_iterator(Finish));
 }
 
 iterator_range<bind_iterator> MachOObjectFile::bindTable() const {
@@ -1841,8 +1854,7 @@ MachOObjectFile::end_load_commands() const {
 
 iterator_range<MachOObjectFile::load_command_iterator>
 MachOObjectFile::load_commands() const {
-  return iterator_range<load_command_iterator>(begin_load_commands(),
-                                               end_load_commands());
+  return make_range(begin_load_commands(), end_load_commands());
 }
 
 StringRef
@@ -2207,66 +2219,66 @@ MachOObjectFile::getLinkOptHintsLoadCommand() const {
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
-  if (!DyldInfoLoadCmd) 
-    return ArrayRef<uint8_t>();
+  if (!DyldInfoLoadCmd)
+    return None;
 
-  MachO::dyld_info_command DyldInfo 
-                   = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
-  const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
-                                             getPtr(this, DyldInfo.rebase_off));
-  return ArrayRef<uint8_t>(Ptr, DyldInfo.rebase_size);
+  MachO::dyld_info_command DyldInfo =
+      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+  const uint8_t *Ptr =
+      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.rebase_off));
+  return makeArrayRef(Ptr, DyldInfo.rebase_size);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
-  if (!DyldInfoLoadCmd) 
-    return ArrayRef<uint8_t>();
+  if (!DyldInfoLoadCmd)
+    return None;
 
-  MachO::dyld_info_command DyldInfo 
-                   = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
-  const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
-                                               getPtr(this, DyldInfo.bind_off));
-  return ArrayRef<uint8_t>(Ptr, DyldInfo.bind_size);
+  MachO::dyld_info_command DyldInfo =
+      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+  const uint8_t *Ptr =
+      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.bind_off));
+  return makeArrayRef(Ptr, DyldInfo.bind_size);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
-  if (!DyldInfoLoadCmd) 
-    return ArrayRef<uint8_t>();
+  if (!DyldInfoLoadCmd)
+    return None;
 
-  MachO::dyld_info_command DyldInfo 
-                   = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
-  const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
-                                          getPtr(this, DyldInfo.weak_bind_off));
-  return ArrayRef<uint8_t>(Ptr, DyldInfo.weak_bind_size);
+  MachO::dyld_info_command DyldInfo =
+      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+  const uint8_t *Ptr =
+      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.weak_bind_off));
+  return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
-  if (!DyldInfoLoadCmd) 
-    return ArrayRef<uint8_t>();
+  if (!DyldInfoLoadCmd)
+    return None;
 
-  MachO::dyld_info_command DyldInfo 
-                   = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
-  const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
-                                          getPtr(this, DyldInfo.lazy_bind_off));
-  return ArrayRef<uint8_t>(Ptr, DyldInfo.lazy_bind_size);
+  MachO::dyld_info_command DyldInfo =
+      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+  const uint8_t *Ptr =
+      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.lazy_bind_off));
+  return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
-  if (!DyldInfoLoadCmd) 
-    return ArrayRef<uint8_t>();
+  if (!DyldInfoLoadCmd)
+    return None;
 
-  MachO::dyld_info_command DyldInfo 
-                   = getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
-  const uint8_t *Ptr = reinterpret_cast<const uint8_t*>(
-                                             getPtr(this, DyldInfo.export_off));
-  return ArrayRef<uint8_t>(Ptr, DyldInfo.export_size);
+  MachO::dyld_info_command DyldInfo =
+      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+  const uint8_t *Ptr =
+      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.export_off));
+  return makeArrayRef(Ptr, DyldInfo.export_size);
 }
 
 ArrayRef<uint8_t> MachOObjectFile::getUuid() const {
   if (!UuidLoadCmd)
-    return ArrayRef<uint8_t>();
+    return None;
   // Returning a pointer is fine as uuid doesn't need endian swapping.
   const char *Ptr = UuidLoadCmd + offsetof(MachO::uuid_command, uuid);
-  return ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Ptr), 16);
+  return makeArrayRef(reinterpret_cast<const uint8_t *>(Ptr), 16);
 }
 
 StringRef MachOObjectFile::getStringTableData() const {
@@ -2315,4 +2327,3 @@ ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) {
     return EC;
   return std::move(Ret);
 }
-
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 1d0e69e4622d..a1c83b9b7f86 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -69,14 +69,14 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
 
 ErrorOr<std::unique_ptr<MachOObjectFile>>
 MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
-  if (Parent) {
-    StringRef ParentData = Parent->getData();
-    StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
-    StringRef ObjectName = Parent->getFileName();
-    MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
-    return ObjectFile::createMachOObjectFile(ObjBuffer);
-  }
-  return object_error::parse_failed;
+  if (!Parent)
+    return object_error::parse_failed;
+
+  StringRef ParentData = Parent->getData();
+  StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
+  StringRef ObjectName = Parent->getFileName();
+  MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
+  return ObjectFile::createMachOObjectFile(ObjBuffer);
 }
 
 ErrorOr<std::unique_ptr<Archive>>
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 5c4b7a67b2ad..b44c1a16fd08 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -98,8 +98,10 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
 
 void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect,
                                  LLVMSymbolIteratorRef Sym) {
-  if (std::error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect)))
+  ErrorOr<section_iterator> SecOrErr = (*unwrap(Sym))->getSection();
+  if (std::error_code ec = SecOrErr.getError())
     report_fatal_error(ec.message());
+  *unwrap(Sect) = *SecOrErr;
 }
 
 // ObjectFile Symbol iterators
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index f82edae89bc6..d12dc411361c 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -29,10 +29,10 @@ ObjectFile::ObjectFile(unsigned int Type, MemoryBufferRef Source)
     : SymbolicFile(Type, Source) {}
 
 bool SectionRef::containsSymbol(SymbolRef S) const {
-  section_iterator SymSec = getObject()->section_end();
-  if (S.getSection(SymSec))
+  ErrorOr<section_iterator> SymSec = S.getSection();
+  if (!SymSec)
     return false;
-  return *this == *SymSec;
+  return *this == **SymSec;
 }
 
 uint64_t ObjectFile::getSymbolValue(DataRefImpl Ref) const {
diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp
index 854e68e40f4d..bf79dfb8da62 100644
--- a/lib/Object/SymbolicFile.cpp
+++ b/lib/Object/SymbolicFile.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/IRObjectFile.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -54,9 +56,10 @@ ErrorOr<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
   case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
   case sys::fs::file_magic::macho_dsym_companion:
   case sys::fs::file_magic::macho_kext_bundle:
-  case sys::fs::file_magic::coff_import_library:
   case sys::fs::file_magic::pecoff_executable:
     return ObjectFile::createObjectFile(Object, Type);
+  case sys::fs::file_magic::coff_import_library:
+    return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
   case sys::fs::file_magic::elf_relocatable:
   case sys::fs::file_magic::macho_object:
   case sys::fs::file_magic::coff_object: {
diff --git a/lib/Option/Arg.cpp b/lib/Option/Arg.cpp
index ac000736c1f3..c3de2d1a4965 100644
--- a/lib/Option/Arg.cpp
+++ b/lib/Option/Arg.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 using namespace llvm::opt;
@@ -43,23 +44,25 @@ Arg::~Arg() {
   }
 }
 
-void Arg::dump() const {
-  llvm::errs() << "<";
+void Arg::print(raw_ostream& O) const {
+  O << "<";
 
-  llvm::errs() << " Opt:";
-  Opt.dump();
+  O << " Opt:";
+  Opt.print(O);
 
-  llvm::errs() << " Index:" << Index;
+  O << " Index:" << Index;
 
-  llvm::errs() << " Values: [";
+  O << " Values: [";
   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
-    if (i) llvm::errs() << ", ";
-    llvm::errs() << "'" << Values[i] << "'";
+    if (i) O << ", ";
+    O << "'" << Values[i] << "'";
   }
 
-  llvm::errs() << "]>\n";
+  O << "]>\n";
 }
 
+LLVM_DUMP_METHOD void Arg::dump() const { print(dbgs()); }
+
 std::string Arg::getAsString(const ArgList &Args) const {
   SmallString<256> Res;
   llvm::raw_svector_ostream OS(Res);
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index a74ead6b3588..0826ef873195 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -258,6 +259,21 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
   }
 }
 
+void ArgList::AddAllArgs(ArgStringList &Output,
+                         ArrayRef<OptSpecifier> Ids) const {
+  for (const Arg *Arg : Args) {
+    for (OptSpecifier Id : Ids) {
+      if (Arg->getOption().matches(Id)) {
+        Arg->claim();
+        Arg->render(*this, Output);
+        break;
+      }
+    }
+  }
+}
+
+/// This 3-opt variant of AddAllArgs could be eliminated in favor of one
+/// that accepts a single specifier, given the above which accepts any number.
 void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
                          OptSpecifier Id1, OptSpecifier Id2) const {
   for (auto Arg: filtered(Id0, Id1, Id2)) {
@@ -313,6 +329,15 @@ const char *ArgList::GetOrMakeJoinedArgString(unsigned Index,
   return MakeArgString(LHS + RHS);
 }
 
+void ArgList::print(raw_ostream &O) const {
+  for (Arg *A : *this) {
+    O << "* ";
+    A->print(O);
+  }
+}
+
+LLVM_DUMP_METHOD void ArgList::dump() const { print(dbgs()); }
+
 //
 
 void InputArgList::releaseMemory() {
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index e83536f2b572..09d4cebb83d0 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -84,11 +84,9 @@ static inline bool operator<(const OptTable::Info &I, const char *Name) {
 
 OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {}
 
-OptTable::OptTable(const Info *OptionInfos, unsigned NumOptionInfos,
-                   bool IgnoreCase)
-    : OptionInfos(OptionInfos), NumOptionInfos(NumOptionInfos),
-      IgnoreCase(IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0),
-      FirstSearchableIndex(0) {
+OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
+    : OptionInfos(OptionInfos), IgnoreCase(IgnoreCase), TheInputOptionID(0),
+      TheUnknownOptionID(0), FirstSearchableIndex(0) {
   // Explicitly zero initialize the error to work around a bug in array
   // value-initialization on MinGW with gcc 4.3.5.
 
@@ -199,8 +197,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
   if (isInput(PrefixesUnion, Str))
     return new Arg(getOption(TheInputOptionID), Str, Index++, Str);
 
-  const Info *Start = OptionInfos + FirstSearchableIndex;
-  const Info *End = OptionInfos + getNumOptions();
+  const Info *Start = OptionInfos.begin() + FirstSearchableIndex;
+  const Info *End = OptionInfos.end();
   StringRef Name = StringRef(Str).ltrim(PrefixChars);
 
   // Search for the first next option which could be a prefix.
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 221414d79e77..ebf05aab764b 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -35,10 +36,10 @@ Option::Option(const OptTable::Info *info, const OptTable *owner)
   }
 }
 
-void Option::dump() const {
-  llvm::errs() << "<";
+void Option::print(raw_ostream &O) const {
+  O << "<";
   switch (getKind()) {
-#define P(N) case N: llvm::errs() << #N; break
+#define P(N) case N: O << #N; break
     P(GroupClass);
     P(InputClass);
     P(UnknownClass);
@@ -54,33 +55,35 @@ void Option::dump() const {
   }
 
   if (Info->Prefixes) {
-    llvm::errs() << " Prefixes:[";
-    for (const char * const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) {
-      llvm::errs() << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", ");
+    O << " Prefixes:[";
+    for (const char *const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) {
+      O << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", ");
     }
-    llvm::errs() << ']';
+    O << ']';
   }
 
-  llvm::errs() << " Name:\"" << getName() << '"';
+  O << " Name:\"" << getName() << '"';
 
   const Option Group = getGroup();
   if (Group.isValid()) {
-    llvm::errs() << " Group:";
-    Group.dump();
+    O << " Group:";
+    Group.print(O);
   }
 
   const Option Alias = getAlias();
   if (Alias.isValid()) {
-    llvm::errs() << " Alias:";
-    Alias.dump();
+    O << " Alias:";
+    Alias.print(O);
   }
 
   if (getKind() == MultiArgClass)
-    llvm::errs() << " NumArgs:" << getNumArgs();
+    O << " NumArgs:" << getNumArgs();
 
-  llvm::errs() << ">\n";
+  O << ">\n";
 }
 
+void Option::dump() const { print(dbgs()); }
+
 bool Option::matches(OptSpecifier Opt) const {
   // Aliases are never considered in matching, look through them.
   const Option Alias = getAlias();
diff --git a/lib/Passes/LLVMBuild.txt b/lib/Passes/LLVMBuild.txt
index 3063fe3e5da5..a752f42dcedd 100644
--- a/lib/Passes/LLVMBuild.txt
+++ b/lib/Passes/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Passes
 parent = Libraries
-required_libraries = Analysis Core IPA IPO InstCombine Scalar Support TransformUtils Vectorize
+required_libraries = Analysis Core IPO InstCombine Scalar Support TransformUtils Vectorize
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index ba7132050e9b..8ba81f72a717 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -28,9 +29,14 @@
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
 #include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
 #include "llvm/Transforms/Scalar/EarlyCSE.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/SROA.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 
 using namespace llvm;
diff --git a/lib/Passes/PassRegistry.def b/lib/Passes/PassRegistry.def
index d768a3ad1d2a..241a78927c77 100644
--- a/lib/Passes/PassRegistry.def
+++ b/lib/Passes/PassRegistry.def
@@ -27,10 +27,13 @@ MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 #ifndef MODULE_PASS
 #define MODULE_PASS(NAME, CREATE_PASS)
 #endif
+MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
+MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 MODULE_PASS("no-op-module", NoOpModulePass())
 MODULE_PASS("print", PrintModulePass(dbgs()))
 MODULE_PASS("print-cg", LazyCallGraphPrinterPass(dbgs()))
+MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
 MODULE_PASS("verify", VerifierPass())
 #undef MODULE_PASS
 
@@ -54,6 +57,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
 FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
 FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
+FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
 FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
 FUNCTION_ANALYSIS("targetir",
                   TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
@@ -62,6 +66,7 @@ FUNCTION_ANALYSIS("targetir",
 #ifndef FUNCTION_PASS
 #define FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
+FUNCTION_PASS("adce", ADCEPass())
 FUNCTION_PASS("early-cse", EarlyCSEPass())
 FUNCTION_PASS("instcombine", InstCombinePass())
 FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
@@ -71,7 +76,9 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
 FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
 FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
 FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
+FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
 FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
+FUNCTION_PASS("sroa", SROA())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
 #undef FUNCTION_PASS
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
index cf04fea8491d..55c0fb4792ef 100644
--- a/lib/ProfileData/CoverageMapping.cpp
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -181,18 +181,6 @@ void FunctionRecordIterator::skipOtherFiles() {
     *this = FunctionRecordIterator();
 }
 
-/// Get the function name from the record, removing the filename prefix if
-/// necessary.
-static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) {
-  StringRef FunctionName = Record.FunctionName;
-  if (Record.Filenames.empty())
-    return FunctionName;
-  StringRef Filename = sys::path::filename(Record.Filenames[0]);
-  if (FunctionName.startswith(Filename))
-    FunctionName = FunctionName.drop_front(Filename.size() + 1);
-  return FunctionName;
-}
-
 ErrorOr<std::unique_ptr<CoverageMapping>>
 CoverageMapping::load(CoverageMappingReader &CoverageReader,
                       IndexedInstrProfReader &ProfileReader) {
@@ -216,7 +204,11 @@ CoverageMapping::load(CoverageMappingReader &CoverageReader,
 
     assert(!Record.MappingRegions.empty() && "Function has no regions");
 
-    FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames);
+    StringRef OrigFuncName = Record.FunctionName;
+    if (!Record.Filenames.empty())
+      OrigFuncName =
+          getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
+    FunctionRecord Function(OrigFuncName, Record.Filenames);
     for (const auto &Region : Record.MappingRegions) {
       ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
       if (!ExecutionCount)
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
index 334a3f51ec9e..a0f82a0d4ede 100644
--- a/lib/ProfileData/CoverageMappingReader.cpp
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -290,36 +290,25 @@ std::error_code RawCoverageMappingReader::read() {
   return std::error_code();
 }
 
-namespace {
+std::error_code InstrProfSymtab::create(SectionRef &Section) {
+  if (auto Err = Section.getContents(Data))
+    return Err;
+  Address = Section.getAddress();
+  return std::error_code();
+}
 
-/// \brief A helper structure to access the data from a section
-/// in an object file.
-struct SectionData {
-  StringRef Data;
-  uint64_t Address;
-
-  std::error_code load(SectionRef &Section) {
-    if (auto Err = Section.getContents(Data))
-      return Err;
-    Address = Section.getAddress();
-    return std::error_code();
-  }
-
-  std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) {
-    if (Pointer < Address)
-      return coveragemap_error::malformed;
-    auto Offset = Pointer - Address;
-    if (Offset + Size > Data.size())
-      return coveragemap_error::malformed;
-    Result = Data.substr(Pointer - Address, Size);
-    return std::error_code();
-  }
-};
+StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) {
+  if (Pointer < Address)
+    return StringRef();
+  auto Offset = Pointer - Address;
+  if (Offset + Size > Data.size())
+    return StringRef();
+  return Data.substr(Pointer - Address, Size);
 }
 
 template <typename T, support::endianness Endian>
-std::error_code readCoverageMappingData(
-    SectionData &ProfileNames, StringRef Data,
+static std::error_code readCoverageMappingData(
+    InstrProfSymtab &ProfileNames, StringRef Data,
     std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records,
     std::vector<StringRef> &Filenames) {
   using namespace support;
@@ -343,7 +332,7 @@ std::error_code readCoverageMappingData(
 
     // Skip past the function records, saving the start and end for later.
     const char *FunBuf = Buf;
-    Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t));
+    Buf += NRecords * sizeof(coverage::CovMapFunctionRecord<T>);
     const char *FunEnd = Buf;
 
     // Get the filenames.
@@ -366,12 +355,15 @@ std::error_code readCoverageMappingData(
     // before reading the next map.
     Buf += alignmentAdjustment(Buf, 8);
 
-    while (FunBuf < FunEnd) {
+    auto CFR =
+        reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf);
+    while ((const char *)CFR < FunEnd) {
       // Read the function information
-      T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf);
-      uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
-      uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf);
-      uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf);
+      T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr);
+      uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize);
+      uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize);
+      uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash);
+      CFR++;
 
       // Now use that to read the coverage data.
       if (CovBuf + DataSize > CovEnd)
@@ -386,9 +378,9 @@ std::error_code readCoverageMappingData(
         continue;
 
       // Finally, grab the name and create a record.
-      StringRef FuncName;
-      if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName))
-        return EC;
+      StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize);
+      if (NameSize && FuncName.empty())
+        return coveragemap_error::malformed;
       Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
           CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
           FilenamesBegin, Filenames.size() - FilenamesBegin));
@@ -401,7 +393,7 @@ std::error_code readCoverageMappingData(
 static const char *TestingFormatMagic = "llvmcovmtestdata";
 
 static std::error_code loadTestingFormat(StringRef Data,
-                                         SectionData &ProfileNames,
+                                         InstrProfSymtab &ProfileNames,
                                          StringRef &CoverageMapping,
                                          uint8_t &BytesInAddress,
                                          support::endianness &Endian) {
@@ -420,14 +412,14 @@ static std::error_code loadTestingFormat(StringRef Data,
   if (Data.size() < 1)
     return coveragemap_error::truncated;
   N = 0;
-  ProfileNames.Address =
+  uint64_t Address =
       decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N);
   if (N > Data.size())
     return coveragemap_error::malformed;
   Data = Data.substr(N);
   if (Data.size() < ProfileNamesSize)
     return coveragemap_error::malformed;
-  ProfileNames.Data = Data.substr(0, ProfileNamesSize);
+  ProfileNames.create(Data.substr(0, ProfileNamesSize), Address);
   CoverageMapping = Data.substr(ProfileNamesSize);
   return std::error_code();
 }
@@ -443,12 +435,10 @@ static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) {
   return coveragemap_error::no_data_found;
 }
 
-static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
-                                        SectionData &ProfileNames,
-                                        StringRef &CoverageMapping,
-                                        uint8_t &BytesInAddress,
-                                        support::endianness &Endian,
-                                        StringRef Arch) {
+static std::error_code
+loadBinaryFormat(MemoryBufferRef ObjectBuffer, InstrProfSymtab &ProfileNames,
+                 StringRef &CoverageMapping, uint8_t &BytesInAddress,
+                 support::endianness &Endian, StringRef Arch) {
   auto BinOrErr = object::createBinary(ObjectBuffer);
   if (std::error_code EC = BinOrErr.getError())
     return EC;
@@ -477,17 +467,18 @@ static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer,
                                 : support::endianness::big;
 
   // Look for the sections that we are interested in.
-  auto NamesSection = lookupSection(*OF, "__llvm_prf_names");
+  auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false));
   if (auto EC = NamesSection.getError())
     return EC;
-  auto CoverageSection = lookupSection(*OF, "__llvm_covmap");
+  auto CoverageSection =
+      lookupSection(*OF, getInstrProfCoverageSectionName(false));
   if (auto EC = CoverageSection.getError())
     return EC;
 
   // Get the contents of the given sections.
   if (std::error_code EC = CoverageSection->getContents(CoverageMapping))
     return EC;
-  if (std::error_code EC = ProfileNames.load(*NamesSection))
+  if (std::error_code EC = ProfileNames.create(*NamesSection))
     return EC;
 
   return std::error_code();
@@ -498,33 +489,33 @@ BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
                              StringRef Arch) {
   std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader());
 
-  SectionData Profile;
+  InstrProfSymtab ProfileNames;
   StringRef Coverage;
   uint8_t BytesInAddress;
   support::endianness Endian;
   std::error_code EC;
   if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
     // This is a special format used for testing.
-    EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage,
+    EC = loadTestingFormat(ObjectBuffer->getBuffer(), ProfileNames, Coverage,
                            BytesInAddress, Endian);
   else
-    EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage,
-                          BytesInAddress, Endian, Arch);
+    EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), ProfileNames,
+                          Coverage, BytesInAddress, Endian, Arch);
   if (EC)
     return EC;
 
   if (BytesInAddress == 4 && Endian == support::endianness::little)
     EC = readCoverageMappingData<uint32_t, support::endianness::little>(
-        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+        ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
   else if (BytesInAddress == 4 && Endian == support::endianness::big)
     EC = readCoverageMappingData<uint32_t, support::endianness::big>(
-        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+        ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
   else if (BytesInAddress == 8 && Endian == support::endianness::little)
     EC = readCoverageMappingData<uint64_t, support::endianness::little>(
-        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+        ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
   else if (BytesInAddress == 8 && Endian == support::endianness::big)
     EC = readCoverageMappingData<uint64_t, support::endianness::big>(
-        Profile, Coverage, Reader->MappingRecords, Reader->Filenames);
+        ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames);
   else
     return coveragemap_error::malformed;
   if (EC)
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 92822a71402f..f5acd23129dc 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -12,6 +12,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -28,28 +32,32 @@ class InstrProfErrorCategoryType : public std::error_category {
       return "Success";
     case instrprof_error::eof:
       return "End of File";
+    case instrprof_error::unrecognized_format:
+      return "Unrecognized instrumentation profile encoding format";
     case instrprof_error::bad_magic:
-      return "Invalid profile data (bad magic)";
+      return "Invalid instrumentation profile data (bad magic)";
     case instrprof_error::bad_header:
-      return "Invalid profile data (file header is corrupt)";
+      return "Invalid instrumentation profile data (file header is corrupt)";
     case instrprof_error::unsupported_version:
-      return "Unsupported profiling format version";
+      return "Unsupported instrumentation profile format version";
     case instrprof_error::unsupported_hash_type:
-      return "Unsupported profiling hash";
+      return "Unsupported instrumentation profile hash type";
     case instrprof_error::too_large:
       return "Too much profile data";
     case instrprof_error::truncated:
       return "Truncated profile data";
     case instrprof_error::malformed:
-      return "Malformed profile data";
+      return "Malformed instrumentation profile data";
     case instrprof_error::unknown_function:
       return "No profile data available for function";
     case instrprof_error::hash_mismatch:
-      return "Function hash mismatch";
+      return "Function control flow change detected (hash mismatch)";
     case instrprof_error::count_mismatch:
-      return "Function count mismatch";
+      return "Function basic block count change detected (counter mismatch)";
     case instrprof_error::counter_overflow:
       return "Counter overflow";
+    case instrprof_error::value_site_count_mismatch:
+      return "Function value site count change detected (counter mismatch)";
     }
     llvm_unreachable("A value of instrprof_error has no message.");
   }
@@ -61,3 +69,415 @@ static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory;
 const std::error_category &llvm::instrprof_category() {
   return *ErrorCategory;
 }
+
+namespace llvm {
+
+std::string getPGOFuncName(StringRef RawFuncName,
+                           GlobalValue::LinkageTypes Linkage,
+                           StringRef FileName,
+                           uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
+
+  // Function names may be prefixed with a binary '1' to indicate
+  // that the backend should not modify the symbols due to any platform
+  // naming convention. Do not include that '1' in the PGO profile name.
+  if (RawFuncName[0] == '\1')
+    RawFuncName = RawFuncName.substr(1);
+
+  std::string FuncName = RawFuncName;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+    // For local symbols, prepend the main file name to distinguish them.
+    // Do not include the full path in the file name since there's no guarantee
+    // that it will stay the same, e.g., if the files are checked out from
+    // version control in different locations.
+    if (FileName.empty())
+      FuncName = FuncName.insert(0, "<unknown>:");
+    else
+      FuncName = FuncName.insert(0, FileName.str() + ":");
+  }
+  return FuncName;
+}
+
+std::string getPGOFuncName(const Function &F, uint64_t Version) {
+  return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(),
+                        Version);
+}
+
+StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
+  if (FileName.empty())
+    return PGOFuncName;
+  // Drop the file name including ':'. See also getPGOFuncName.
+  if (PGOFuncName.startswith(FileName))
+    PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1);
+  return PGOFuncName;
+}
+
+// \p FuncName is the string used as profile lookup key for the function. A
+// symbol is created to hold the name. Return the legalized symbol name.
+static std::string getPGOFuncNameVarName(StringRef FuncName,
+                                         GlobalValue::LinkageTypes Linkage) {
+  std::string VarName = getInstrProfNameVarPrefix();
+  VarName += FuncName;
+
+  if (!GlobalValue::isLocalLinkage(Linkage))
+    return VarName;
+
+  // Now fix up illegal chars in local VarName that may upset the assembler.
+  const char *InvalidChars = "-:<>\"'";
+  size_t found = VarName.find_first_of(InvalidChars);
+  while (found != std::string::npos) {
+    VarName[found] = '_';
+    found = VarName.find_first_of(InvalidChars, found + 1);
+  }
+  return VarName;
+}
+
+GlobalVariable *createPGOFuncNameVar(Module &M,
+                                     GlobalValue::LinkageTypes Linkage,
+                                     StringRef FuncName) {
+
+  // We generally want to match the function's linkage, but available_externally
+  // and extern_weak both have the wrong semantics, and anything that doesn't
+  // need to link across compilation units doesn't need to be visible at all.
+  if (Linkage == GlobalValue::ExternalWeakLinkage)
+    Linkage = GlobalValue::LinkOnceAnyLinkage;
+  else if (Linkage == GlobalValue::AvailableExternallyLinkage)
+    Linkage = GlobalValue::LinkOnceODRLinkage;
+  else if (Linkage == GlobalValue::InternalLinkage ||
+           Linkage == GlobalValue::ExternalLinkage)
+    Linkage = GlobalValue::PrivateLinkage;
+
+  auto *Value = ConstantDataArray::getString(M.getContext(), FuncName, false);
+  auto FuncNameVar =
+      new GlobalVariable(M, Value->getType(), true, Linkage, Value,
+                         getPGOFuncNameVarName(FuncName, Linkage));
+
+  // Hide the symbol so that we correctly get a copy for each executable.
+  if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
+    FuncNameVar->setVisibility(GlobalValue::HiddenVisibility);
+
+  return FuncNameVar;
+}
+
+GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) {
+  return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName);
+}
+
+instrprof_error
+InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
+                                         uint64_t Weight) {
+  this->sortByTargetValues();
+  Input.sortByTargetValues();
+  auto I = ValueData.begin();
+  auto IE = ValueData.end();
+  instrprof_error Result = instrprof_error::success;
+  for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE;
+       ++J) {
+    while (I != IE && I->Value < J->Value)
+      ++I;
+    if (I != IE && I->Value == J->Value) {
+      uint64_t JCount = J->Count;
+      bool Overflowed;
+      if (Weight > 1) {
+        JCount = SaturatingMultiply(JCount, Weight, &Overflowed);
+        if (Overflowed)
+          Result = instrprof_error::counter_overflow;
+      }
+      I->Count = SaturatingAdd(I->Count, JCount, &Overflowed);
+      if (Overflowed)
+        Result = instrprof_error::counter_overflow;
+      ++I;
+      continue;
+    }
+    ValueData.insert(I, *J);
+  }
+  return Result;
+}
+
+// Merge Value Profile data from Src record to this record for ValueKind.
+// Scale merged value counts by \p Weight.
+instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
+                                                    InstrProfRecord &Src,
+                                                    uint64_t Weight) {
+  uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+  uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind);
+  if (ThisNumValueSites != OtherNumValueSites)
+    return instrprof_error::value_site_count_mismatch;
+  std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+      getValueSitesForKind(ValueKind);
+  std::vector<InstrProfValueSiteRecord> &OtherSiteRecords =
+      Src.getValueSitesForKind(ValueKind);
+  instrprof_error Result = instrprof_error::success;
+  for (uint32_t I = 0; I < ThisNumValueSites; I++)
+    MergeResult(Result,
+                ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight));
+  return Result;
+}
+
+instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
+                                       uint64_t Weight) {
+  // If the number of counters doesn't match we either have bad data
+  // or a hash collision.
+  if (Counts.size() != Other.Counts.size())
+    return instrprof_error::count_mismatch;
+
+  instrprof_error Result = instrprof_error::success;
+
+  for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
+    bool Overflowed;
+    uint64_t OtherCount = Other.Counts[I];
+    if (Weight > 1) {
+      OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed);
+      if (Overflowed)
+        Result = instrprof_error::counter_overflow;
+    }
+    Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed);
+    if (Overflowed)
+      Result = instrprof_error::counter_overflow;
+  }
+
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    MergeResult(Result, mergeValueProfData(Kind, Other, Weight));
+
+  return Result;
+}
+
+// Map indirect call target name hash to name string.
+uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
+                                     ValueMapType *ValueMap) {
+  if (!ValueMap)
+    return Value;
+  switch (ValueKind) {
+  case IPVK_IndirectCallTarget: {
+    auto Result =
+        std::lower_bound(ValueMap->begin(), ValueMap->end(), Value,
+                         [](const std::pair<uint64_t, uint64_t> &LHS,
+                            uint64_t RHS) { return LHS.first < RHS; });
+    if (Result != ValueMap->end())
+      Value = (uint64_t)Result->second;
+    break;
+  }
+  }
+  return Value;
+}
+
+void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
+                                   InstrProfValueData *VData, uint32_t N,
+                                   ValueMapType *ValueMap) {
+  for (uint32_t I = 0; I < N; I++) {
+    VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
+  }
+  std::vector<InstrProfValueSiteRecord> &ValueSites =
+      getValueSitesForKind(ValueKind);
+  if (N == 0)
+    ValueSites.push_back(InstrProfValueSiteRecord());
+  else
+    ValueSites.emplace_back(VData, VData + N);
+}
+
+#define INSTR_PROF_COMMON_API_IMPL
+#include "llvm/ProfileData/InstrProfData.inc"
+
+/*!
+ * \brief ValueProfRecordClosure Interface implementation for  InstrProfRecord
+ *  class. These C wrappers are used as adaptors so that C++ code can be
+ *  invoked as callbacks.
+ */
+uint32_t getNumValueKindsInstrProf(const void *Record) {
+  return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds();
+}
+
+uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) {
+  return reinterpret_cast<const InstrProfRecord *>(Record)
+      ->getNumValueSites(VKind);
+}
+
+uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) {
+  return reinterpret_cast<const InstrProfRecord *>(Record)
+      ->getNumValueData(VKind);
+}
+
+uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK,
+                                         uint32_t S) {
+  return reinterpret_cast<const InstrProfRecord *>(R)
+      ->getNumValueDataForSite(VK, S);
+}
+
+void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
+                              uint32_t K, uint32_t S,
+                              uint64_t (*Mapper)(uint32_t, uint64_t)) {
+  return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite(
+      Dst, K, S, Mapper);
+}
+
+ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
+  ValueProfData *VD =
+      (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData());
+  memset(VD, 0, TotalSizeInBytes);
+  return VD;
+}
+
+static ValueProfRecordClosure InstrProfRecordClosure = {
+    0,
+    getNumValueKindsInstrProf,
+    getNumValueSitesInstrProf,
+    getNumValueDataInstrProf,
+    getNumValueDataForSiteInstrProf,
+    0,
+    getValueForSiteInstrProf,
+    allocValueProfDataInstrProf};
+
+// Wrapper implementation using the closure mechanism.
+uint32_t ValueProfData::getSize(const InstrProfRecord &Record) {
+  InstrProfRecordClosure.Record = &Record;
+  return getValueProfDataSize(&InstrProfRecordClosure);
+}
+
+// Wrapper implementation using the closure mechanism.
+std::unique_ptr<ValueProfData>
+ValueProfData::serializeFrom(const InstrProfRecord &Record) {
+  InstrProfRecordClosure.Record = &Record;
+
+  std::unique_ptr<ValueProfData> VPD(
+      serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr));
+  return VPD;
+}
+
+void ValueProfRecord::deserializeTo(InstrProfRecord &Record,
+                                    InstrProfRecord::ValueMapType *VMap) {
+  Record.reserveSites(Kind, NumValueSites);
+
+  InstrProfValueData *ValueData = getValueProfRecordValueData(this);
+  for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) {
+    uint8_t ValueDataCount = this->SiteCountArray[VSite];
+    Record.addValueData(Kind, VSite, ValueData, ValueDataCount, VMap);
+    ValueData += ValueDataCount;
+  }
+}
+
+// For writing/serializing,  Old is the host endianness, and  New is
+// byte order intended on disk. For Reading/deserialization, Old
+// is the on-disk source endianness, and New is the host endianness.
+void ValueProfRecord::swapBytes(support::endianness Old,
+                                support::endianness New) {
+  using namespace support;
+  if (Old == New)
+    return;
+
+  if (getHostEndianness() != Old) {
+    sys::swapByteOrder<uint32_t>(NumValueSites);
+    sys::swapByteOrder<uint32_t>(Kind);
+  }
+  uint32_t ND = getValueProfRecordNumValueData(this);
+  InstrProfValueData *VD = getValueProfRecordValueData(this);
+
+  // No need to swap byte array: SiteCountArrray.
+  for (uint32_t I = 0; I < ND; I++) {
+    sys::swapByteOrder<uint64_t>(VD[I].Value);
+    sys::swapByteOrder<uint64_t>(VD[I].Count);
+  }
+  if (getHostEndianness() == Old) {
+    sys::swapByteOrder<uint32_t>(NumValueSites);
+    sys::swapByteOrder<uint32_t>(Kind);
+  }
+}
+
+void ValueProfData::deserializeTo(InstrProfRecord &Record,
+                                  InstrProfRecord::ValueMapType *VMap) {
+  if (NumValueKinds == 0)
+    return;
+
+  ValueProfRecord *VR = getFirstValueProfRecord(this);
+  for (uint32_t K = 0; K < NumValueKinds; K++) {
+    VR->deserializeTo(Record, VMap);
+    VR = getValueProfRecordNext(VR);
+  }
+}
+
+template <class T>
+static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) {
+  using namespace support;
+  if (Orig == little)
+    return endian::readNext<T, little, unaligned>(D);
+  else
+    return endian::readNext<T, big, unaligned>(D);
+}
+
+static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) {
+  return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize))
+                                            ValueProfData());
+}
+
+instrprof_error ValueProfData::checkIntegrity() {
+  if (NumValueKinds > IPVK_Last + 1)
+    return instrprof_error::malformed;
+  // Total size needs to be mulltiple of quadword size.
+  if (TotalSize % sizeof(uint64_t))
+    return instrprof_error::malformed;
+
+  ValueProfRecord *VR = getFirstValueProfRecord(this);
+  for (uint32_t K = 0; K < this->NumValueKinds; K++) {
+    if (VR->Kind > IPVK_Last)
+      return instrprof_error::malformed;
+    VR = getValueProfRecordNext(VR);
+    if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize)
+      return instrprof_error::malformed;
+  }
+  return instrprof_error::success;
+}
+
+ErrorOr<std::unique_ptr<ValueProfData>>
+ValueProfData::getValueProfData(const unsigned char *D,
+                                const unsigned char *const BufferEnd,
+                                support::endianness Endianness) {
+  using namespace support;
+  if (D + sizeof(ValueProfData) > BufferEnd)
+    return instrprof_error::truncated;
+
+  const unsigned char *Header = D;
+  uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness);
+  if (D + TotalSize > BufferEnd)
+    return instrprof_error::too_large;
+
+  std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize);
+  memcpy(VPD.get(), D, TotalSize);
+  // Byte swap.
+  VPD->swapBytesToHost(Endianness);
+
+  instrprof_error EC = VPD->checkIntegrity();
+  if (EC != instrprof_error::success)
+    return EC;
+
+  return std::move(VPD);
+}
+
+void ValueProfData::swapBytesToHost(support::endianness Endianness) {
+  using namespace support;
+  if (Endianness == getHostEndianness())
+    return;
+
+  sys::swapByteOrder<uint32_t>(TotalSize);
+  sys::swapByteOrder<uint32_t>(NumValueKinds);
+
+  ValueProfRecord *VR = getFirstValueProfRecord(this);
+  for (uint32_t K = 0; K < NumValueKinds; K++) {
+    VR->swapBytes(Endianness, getHostEndianness());
+    VR = getValueProfRecordNext(VR);
+  }
+}
+
+void ValueProfData::swapBytesFromHost(support::endianness Endianness) {
+  using namespace support;
+  if (Endianness == getHostEndianness())
+    return;
+
+  ValueProfRecord *VR = getFirstValueProfRecord(this);
+  for (uint32_t K = 0; K < NumValueKinds; K++) {
+    ValueProfRecord *NVR = getValueProfRecordNext(VR);
+    VR->swapBytes(getHostEndianness(), Endianness);
+    VR = NVR;
+  }
+  sys::swapByteOrder<uint32_t>(TotalSize);
+  sys::swapByteOrder<uint32_t>(NumValueKinds);
+}
+
+}
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
deleted file mode 100644
index ebca7b22fbfb..000000000000
--- a/lib/ProfileData/InstrProfIndexed.h
+++ /dev/null
@@ -1,56 +0,0 @@
-//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Shared header for the instrumented profile data reader and writer.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
-#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H
-
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MD5.h"
-
-namespace llvm {
-
-namespace IndexedInstrProf {
-enum class HashT : uint32_t {
-  MD5,
-
-  Last = MD5
-};
-
-static inline uint64_t MD5Hash(StringRef Str) {
-  MD5 Hash;
-  Hash.update(Str);
-  llvm::MD5::MD5Result Result;
-  Hash.final(Result);
-  // Return the least significant 8 bytes. Our MD5 implementation returns the
-  // result in little endian, so we may need to swap bytes.
-  using namespace llvm::support;
-  return endian::read<uint64_t, little, unaligned>(Result);
-}
-
-static inline uint64_t ComputeHash(HashT Type, StringRef K) {
-  switch (Type) {
-  case HashT::MD5:
-    return IndexedInstrProf::MD5Hash(K);
-  }
-  llvm_unreachable("Unhandled hash type");
-}
-
-const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
-const uint64_t Version = 2;
-const HashT HashType = HashT::MD5;
-}
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index 8a529a000c53..5e83456822fd 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/InstrProfReader.h"
-#include "InstrProfIndexed.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cassert>
 
@@ -55,8 +54,10 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
     Result.reset(new RawInstrProfReader64(std::move(Buffer)));
   else if (RawInstrProfReader32::hasFormat(*Buffer))
     Result.reset(new RawInstrProfReader32(std::move(Buffer)));
-  else
+  else if (TextInstrProfReader::hasFormat(*Buffer))
     Result.reset(new TextInstrProfReader(std::move(Buffer)));
+  else
+    return instrprof_error::unrecognized_format;
 
   // Initialize the reader and return the result.
   if (std::error_code EC = initializeReader(*Result))
@@ -98,16 +99,98 @@ void InstrProfIterator::Increment() {
     *this = InstrProfIterator();
 }
 
+bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) {
+  // Verify that this really looks like plain ASCII text by checking a
+  // 'reasonable' number of characters (up to profile magic size).
+  size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t));
+  StringRef buffer = Buffer.getBufferStart();
+  return count == 0 ||
+         std::all_of(buffer.begin(), buffer.begin() + count,
+                     [](char c) { return ::isprint(c) || ::isspace(c); });
+}
+
+std::error_code TextInstrProfReader::readHeader() {
+  Symtab.reset(new InstrProfSymtab());
+  return success();
+}
+
+std::error_code
+TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
+
+#define CHECK_LINE_END(Line)                                                   \
+  if (Line.is_at_end())                                                        \
+    return error(instrprof_error::truncated);
+#define READ_NUM(Str, Dst)                                                     \
+  if ((Str).getAsInteger(10, (Dst)))                                           \
+    return error(instrprof_error::malformed);
+#define VP_READ_ADVANCE(Val)                                                   \
+  CHECK_LINE_END(Line);                                                        \
+  uint32_t Val;                                                                \
+  READ_NUM((*Line), (Val));                                                    \
+  Line++;
+
+  if (Line.is_at_end())
+    return success();
+
+  uint32_t NumValueKinds;
+  if (Line->getAsInteger(10, NumValueKinds)) {
+    // No value profile data
+    return success();
+  }
+  if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1)
+    return error(instrprof_error::malformed);
+  Line++;
+
+  for (uint32_t VK = 0; VK < NumValueKinds; VK++) {
+    VP_READ_ADVANCE(ValueKind);
+    if (ValueKind > IPVK_Last)
+      return error(instrprof_error::malformed);
+    VP_READ_ADVANCE(NumValueSites);
+    if (!NumValueSites)
+      continue;
+
+    Record.reserveSites(VK, NumValueSites);
+    for (uint32_t S = 0; S < NumValueSites; S++) {
+      VP_READ_ADVANCE(NumValueData);
+
+      std::vector<InstrProfValueData> CurrentValues;
+      for (uint32_t V = 0; V < NumValueData; V++) {
+        CHECK_LINE_END(Line);
+        std::pair<StringRef, StringRef> VD = Line->split(':');
+        uint64_t TakenCount, Value;
+        if (VK == IPVK_IndirectCallTarget) {
+          Symtab->addFuncName(VD.first);
+          Value = IndexedInstrProf::ComputeHash(VD.first);
+        } else {
+          READ_NUM(VD.first, Value);
+        }
+        READ_NUM(VD.second, TakenCount);
+        CurrentValues.push_back({Value, TakenCount});
+        Line++;
+      }
+      Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr);
+    }
+  }
+  return success();
+
+#undef CHECK_LINE_END
+#undef READ_NUM
+#undef VP_READ_ADVANCE
+}
+
 std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
   // Skip empty lines and comments.
   while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
     ++Line;
   // If we hit EOF while looking for a name, we're done.
-  if (Line.is_at_end())
+  if (Line.is_at_end()) {
+    Symtab->finalizeSymtab();
     return error(instrprof_error::eof);
+  }
 
   // Read the function name.
   Record.Name = *Line++;
+  Symtab->addFuncName(Record.Name);
 
   // Read the function hash.
   if (Line.is_at_end())
@@ -136,57 +219,35 @@ std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
     Record.Counts.push_back(Count);
   }
 
+  // Check if value profile data exists and read it if so.
+  if (std::error_code EC = readValueProfileData(Record))
+    return EC;
+
+  // This is needed to avoid two pass parsing because llvm-profdata
+  // does dumping while reading.
+  Symtab->finalizeSymtab();
   return success();
 }
 
-template <class IntPtrT>
-static uint64_t getRawMagic();
-
-template <>
-uint64_t getRawMagic<uint64_t>() {
-  return
-    uint64_t(255) << 56 |
-    uint64_t('l') << 48 |
-    uint64_t('p') << 40 |
-    uint64_t('r') << 32 |
-    uint64_t('o') << 24 |
-    uint64_t('f') << 16 |
-    uint64_t('r') <<  8 |
-    uint64_t(129);
-}
-
-template <>
-uint64_t getRawMagic<uint32_t>() {
-  return
-    uint64_t(255) << 56 |
-    uint64_t('l') << 48 |
-    uint64_t('p') << 40 |
-    uint64_t('r') << 32 |
-    uint64_t('o') << 24 |
-    uint64_t('f') << 16 |
-    uint64_t('R') <<  8 |
-    uint64_t(129);
-}
-
 template <class IntPtrT>
 bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
   if (DataBuffer.getBufferSize() < sizeof(uint64_t))
     return false;
   uint64_t Magic =
     *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
-  return getRawMagic<IntPtrT>() == Magic ||
-    sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic;
+  return RawInstrProf::getMagic<IntPtrT>() == Magic ||
+         sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic;
 }
 
 template <class IntPtrT>
 std::error_code RawInstrProfReader<IntPtrT>::readHeader() {
   if (!hasFormat(*DataBuffer))
     return error(instrprof_error::bad_magic);
-  if (DataBuffer->getBufferSize() < sizeof(RawHeader))
+  if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header))
     return error(instrprof_error::bad_header);
-  auto *Header =
-    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
-  ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
+  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(
+      DataBuffer->getBufferStart());
+  ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>();
   return readHeader(*Header);
 }
 
@@ -202,29 +263,38 @@ RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
     return instrprof_error::eof;
   // If there isn't enough space for another header, this is probably just
   // garbage at the end of the file.
-  if (CurrentPos + sizeof(RawHeader) > End)
+  if (CurrentPos + sizeof(RawInstrProf::Header) > End)
     return instrprof_error::malformed;
   // The writer ensures each profile is padded to start at an aligned address.
   if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
     return instrprof_error::malformed;
   // The magic should have the same byte order as in the previous header.
   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
-  if (Magic != swap(getRawMagic<IntPtrT>()))
+  if (Magic != swap(RawInstrProf::getMagic<IntPtrT>()))
     return instrprof_error::bad_magic;
 
   // There's another profile to read, so we need to process the header.
-  auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
+  auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos);
   return readHeader(*Header);
 }
 
-static uint64_t getRawVersion() {
-  return 1;
+template <class IntPtrT>
+void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
+  for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
+    StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
+    Symtab.addFuncName(FunctionName);
+    const IntPtrT FPtr = swap(I->FunctionPointer);
+    if (!FPtr)
+      continue;
+    Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName));
+  }
+  Symtab.finalizeSymtab();
 }
 
 template <class IntPtrT>
 std::error_code
-RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
-  if (swap(Header.Version) != getRawVersion())
+RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) {
+  if (swap(Header.Version) != RawInstrProf::Version)
     return error(instrprof_error::unsupported_version);
 
   CountersDelta = swap(Header.CountersDelta);
@@ -232,50 +302,69 @@ RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
   auto DataSize = swap(Header.DataSize);
   auto CountersSize = swap(Header.CountersSize);
   auto NamesSize = swap(Header.NamesSize);
+  auto ValueDataSize = swap(Header.ValueDataSize);
+  ValueKindLast = swap(Header.ValueKindLast);
 
-  ptrdiff_t DataOffset = sizeof(RawHeader);
-  ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
+  auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>);
+  auto PaddingSize = getNumPaddingBytes(NamesSize);
+
+  ptrdiff_t DataOffset = sizeof(RawInstrProf::Header);
+  ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes;
   ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
-  size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
+  ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize;
+  size_t ProfileSize = ValueDataOffset + ValueDataSize;
 
   auto *Start = reinterpret_cast<const char *>(&Header);
   if (Start + ProfileSize > DataBuffer->getBufferEnd())
     return error(instrprof_error::bad_header);
 
-  Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
+  Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
+      Start + DataOffset);
   DataEnd = Data + DataSize;
   CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
   NamesStart = Start + NamesOffset;
+  ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
   ProfileEnd = Start + ProfileSize;
 
+  std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+  createSymtab(*NewSymtab.get());
+  Symtab = std::move(NewSymtab);
   return success();
 }
 
 template <class IntPtrT>
-std::error_code
-RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
-  if (Data == DataEnd)
-    if (std::error_code EC = readNextHeader(ProfileEnd))
-      return EC;
+std::error_code RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) {
+  Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize));
+  if (Record.Name.data() < NamesStart ||
+      Record.Name.data() + Record.Name.size() >
+          reinterpret_cast<const char *>(ValueDataStart))
+    return error(instrprof_error::malformed);
+  return success();
+}
 
-  // Get the raw data.
-  StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize));
+template <class IntPtrT>
+std::error_code RawInstrProfReader<IntPtrT>::readFuncHash(
+    InstrProfRecord &Record) {
+  Record.Hash = swap(Data->FuncHash);
+  return success();
+}
+
+template <class IntPtrT>
+std::error_code RawInstrProfReader<IntPtrT>::readRawCounts(
+    InstrProfRecord &Record) {
   uint32_t NumCounters = swap(Data->NumCounters);
+  IntPtrT CounterPtr = Data->CounterPtr;
   if (NumCounters == 0)
     return error(instrprof_error::malformed);
-  auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters);
+
+  auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters);
+  auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
 
   // Check bounds.
-  auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
-  if (RawName.data() < NamesStart ||
-      RawName.data() + RawName.size() > DataBuffer->getBufferEnd() ||
-      RawCounts.data() < CountersStart ||
+  if (RawCounts.data() < CountersStart ||
       RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
     return error(instrprof_error::malformed);
 
-  // Store the data in Record, byte-swapping as necessary.
-  Record.Hash = swap(Data->FuncHash);
-  Record.Name = RawName;
   if (ShouldSwapBytes) {
     Record.Counts.clear();
     Record.Counts.reserve(RawCounts.size());
@@ -284,8 +373,61 @@ RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
   } else
     Record.Counts = RawCounts;
 
+  return success();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readValueProfilingData(InstrProfRecord &Record) {
+
+  Record.clearValueData();
+  CurValueDataSize = 0;
+  // Need to match the logic in value profile dumper code in compiler-rt:
+  uint32_t NumValueKinds = 0;
+  for (uint32_t I = 0; I < IPVK_Last + 1; I++)
+    NumValueKinds += (Data->NumValueSites[I] != 0);
+
+  if (!NumValueKinds)
+    return success();
+
+  ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
+      ValueProfData::getValueProfData(ValueDataStart,
+                                      (const unsigned char *)ProfileEnd,
+                                      getDataEndianness());
+
+  if (VDataPtrOrErr.getError())
+    return VDataPtrOrErr.getError();
+
+  VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap());
+  CurValueDataSize = VDataPtrOrErr.get()->getSize();
+  return success();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
+  if (atEnd())
+    if (std::error_code EC = readNextHeader(ProfileEnd))
+      return EC;
+
+  // Read name ad set it in Record.
+  if (std::error_code EC = readName(Record))
+    return EC;
+
+  // Read FuncHash and set it in Record.
+  if (std::error_code EC = readFuncHash(Record))
+    return EC;
+
+  // Read raw counts and set Record.
+  if (std::error_code EC = readRawCounts(Record))
+    return EC;
+
+  // Read value data and set Record.
+  if (std::error_code EC = readValueProfilingData(Record))
+    return EC;
+
   // Iterate.
-  ++Data;
+  advanceData();
   return success();
 }
 
@@ -302,52 +444,112 @@ InstrProfLookupTrait::ComputeHash(StringRef K) {
 typedef InstrProfLookupTrait::data_type data_type;
 typedef InstrProfLookupTrait::offset_type offset_type;
 
+bool InstrProfLookupTrait::readValueProfilingData(
+    const unsigned char *&D, const unsigned char *const End) {
+  ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr =
+      ValueProfData::getValueProfData(D, End, ValueProfDataEndianness);
+
+  if (VDataPtrOrErr.getError())
+    return false;
+
+  VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
+  D += VDataPtrOrErr.get()->TotalSize;
+
+  return true;
+}
+
 data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D,
                                          offset_type N) {
-
   // Check if the data is corrupt. If so, don't try to read it.
   if (N % sizeof(uint64_t))
     return data_type();
 
   DataBuffer.clear();
-  uint64_t NumCounts;
-  uint64_t NumEntries = N / sizeof(uint64_t);
   std::vector<uint64_t> CounterBuffer;
-  for (uint64_t I = 0; I < NumEntries; I += NumCounts) {
-    using namespace support;
-    // The function hash comes first.
+
+  using namespace support;
+  const unsigned char *End = D + N;
+  while (D < End) {
+    // Read hash.
+    if (D + sizeof(uint64_t) >= End)
+      return data_type();
     uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D);
 
-    if (++I >= NumEntries)
-      return data_type();
-
-    // In v1, we have at least one count.
-    // Later, we have the number of counts.
-    NumCounts = (1 == FormatVersion)
-                    ? NumEntries - I
-                    : endian::readNext<uint64_t, little, unaligned>(D);
-    if (1 != FormatVersion)
-      ++I;
-
-    // If we have more counts than data, this is bogus.
-    if (I + NumCounts > NumEntries)
+    // Initialize number of counters for FormatVersion == 1.
+    uint64_t CountsSize = N / sizeof(uint64_t) - 1;
+    // If format version is different then read the number of counters.
+    if (FormatVersion != 1) {
+      if (D + sizeof(uint64_t) > End)
+        return data_type();
+      CountsSize = endian::readNext<uint64_t, little, unaligned>(D);
+    }
+    // Read counter values.
+    if (D + CountsSize * sizeof(uint64_t) > End)
       return data_type();
 
     CounterBuffer.clear();
-    for (unsigned J = 0; J < NumCounts; ++J)
+    CounterBuffer.reserve(CountsSize);
+    for (uint64_t J = 0; J < CountsSize; ++J)
       CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D));
 
-    DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer)));
+    DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer));
+
+    // Read value profiling data.
+    if (FormatVersion > 2 && !readValueProfilingData(D, End)) {
+      DataBuffer.clear();
+      return data_type();
+    }
   }
   return DataBuffer;
 }
 
+template <typename HashTableImpl>
+std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
+    StringRef FuncName, ArrayRef<InstrProfRecord> &Data) {
+  auto Iter = HashTable->find(FuncName);
+  if (Iter == HashTable->end())
+    return instrprof_error::unknown_function;
+
+  Data = (*Iter);
+  if (Data.empty())
+    return instrprof_error::malformed;
+
+  return instrprof_error::success;
+}
+
+template <typename HashTableImpl>
+std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords(
+    ArrayRef<InstrProfRecord> &Data) {
+  if (atEnd())
+    return instrprof_error::eof;
+
+  Data = *RecordIterator;
+
+  if (Data.empty())
+    return instrprof_error::malformed;
+
+  return instrprof_error::success;
+}
+
+template <typename HashTableImpl>
+InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex(
+    const unsigned char *Buckets, const unsigned char *const Payload,
+    const unsigned char *const Base, IndexedInstrProf::HashT HashType,
+    uint64_t Version) {
+  FormatVersion = Version;
+  HashTable.reset(HashTableImpl::Create(
+      Buckets, Payload, Base,
+      typename HashTableImpl::InfoType(HashType, Version)));
+  RecordIterator = HashTable->data_begin();
+}
+
 bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
   if (DataBuffer.getBufferSize() < 8)
     return false;
   using namespace support;
   uint64_t Magic =
       endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+  // Verify that it's magical.
   return Magic == IndexedInstrProf::Magic;
 }
 
@@ -360,71 +562,91 @@ std::error_code IndexedInstrProfReader::readHeader() {
 
   using namespace support;
 
+  auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur);
+  Cur += sizeof(IndexedInstrProf::Header);
+
   // Check the magic number.
-  uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+  uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic);
   if (Magic != IndexedInstrProf::Magic)
     return error(instrprof_error::bad_magic);
 
   // Read the version.
-  FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur);
+  uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version);
   if (FormatVersion > IndexedInstrProf::Version)
     return error(instrprof_error::unsupported_version);
 
   // Read the maximal function count.
-  MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+  MaxFunctionCount =
+      endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount);
 
   // Read the hash type and start offset.
   IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
-      endian::readNext<uint64_t, little, unaligned>(Cur));
+      endian::byte_swap<uint64_t, little>(Header->HashType));
   if (HashType > IndexedInstrProf::HashT::Last)
     return error(instrprof_error::unsupported_hash_type);
-  uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+  uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset);
 
   // The rest of the file is an on disk hash table.
-  Index.reset(InstrProfReaderIndex::Create(
-      Start + HashOffset, Cur, Start,
-      InstrProfLookupTrait(HashType, FormatVersion)));
-  // Set up our iterator for readNextRecord.
-  RecordIterator = Index->data_begin();
-
+  InstrProfReaderIndexBase *IndexPtr = nullptr;
+  IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>(
+      Start + HashOffset, Cur, Start, HashType, FormatVersion);
+  Index.reset(IndexPtr);
   return success();
 }
 
-std::error_code IndexedInstrProfReader::getFunctionCounts(
-    StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) {
-  auto Iter = Index->find(FuncName);
-  if (Iter == Index->end())
-    return error(instrprof_error::unknown_function);
+InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
+  if (Symtab.get())
+    return *Symtab.get();
 
+  std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+  Index->populateSymtab(*NewSymtab.get());
+
+  Symtab = std::move(NewSymtab);
+  return *Symtab.get();
+}
+
+ErrorOr<InstrProfRecord>
+IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
+                                           uint64_t FuncHash) {
+  ArrayRef<InstrProfRecord> Data;
+  std::error_code EC = Index->getRecords(FuncName, Data);
+  if (EC != instrprof_error::success)
+    return EC;
   // Found it. Look for counters with the right hash.
-  ArrayRef<InstrProfRecord> Data = (*Iter);
-  if (Data.empty())
-    return error(instrprof_error::malformed);
-
   for (unsigned I = 0, E = Data.size(); I < E; ++I) {
     // Check for a match and fill the vector if there is one.
     if (Data[I].Hash == FuncHash) {
-      Counts = Data[I].Counts;
-      return success();
+      return std::move(Data[I]);
     }
   }
   return error(instrprof_error::hash_mismatch);
 }
 
 std::error_code
-IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
-  // Are we out of records?
-  if (RecordIterator == Index->data_end())
-    return error(instrprof_error::eof);
+IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
+                                          std::vector<uint64_t> &Counts) {
+  ErrorOr<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash);
+  if (std::error_code EC = Record.getError())
+    return EC;
 
-  if ((*RecordIterator).empty())
-    return error(instrprof_error::malformed);
+  Counts = Record.get().Counts;
+  return success();
+}
 
+std::error_code IndexedInstrProfReader::readNextRecord(
+    InstrProfRecord &Record) {
   static unsigned RecordIndex = 0;
-  ArrayRef<InstrProfRecord> Data = (*RecordIterator);
+
+  ArrayRef<InstrProfRecord> Data;
+
+  std::error_code EC = Index->getRecords(Data);
+  if (EC != instrprof_error::success)
+    return error(EC);
+
   Record = Data[RecordIndex++];
   if (RecordIndex >= Data.size()) {
-    ++RecordIterator;
+    Index->advanceToNextKey();
     RecordIndex = 0;
   }
   return success();
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 2188543ed61c..9bb03e1e77a3 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -13,27 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/InstrProfWriter.h"
-#include "InstrProfIndexed.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include <tuple>
 
 using namespace llvm;
 
 namespace {
+static support::endianness ValueProfDataEndianness = support::little;
+
 class InstrProfRecordTrait {
 public:
   typedef StringRef key_type;
   typedef StringRef key_type_ref;
 
-  typedef const InstrProfWriter::CounterData *const data_type;
-  typedef const InstrProfWriter::CounterData *const data_type_ref;
+  typedef const InstrProfWriter::ProfilingData *const data_type;
+  typedef const InstrProfWriter::ProfilingData *const data_type_ref;
 
   typedef uint64_t hash_value_type;
   typedef uint64_t offset_type;
 
   static hash_value_type ComputeHash(key_type_ref K) {
-    return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+    return IndexedInstrProf::ComputeHash(K);
   }
 
   static std::pair<offset_type, offset_type>
@@ -45,8 +47,15 @@ public:
     LE.write<offset_type>(N);
 
     offset_type M = 0;
-    for (const auto &Counts : *V)
-      M += (2 + Counts.second.size()) * sizeof(uint64_t);
+    for (const auto &ProfileData : *V) {
+      const InstrProfRecord &ProfRecord = ProfileData.second;
+      M += sizeof(uint64_t); // The function hash
+      M += sizeof(uint64_t); // The size of the Counts vector
+      M += ProfRecord.Counts.size() * sizeof(uint64_t);
+
+      // Value data
+      M += ValueProfData::getSize(ProfileData.second);
+    }
     LE.write<offset_type>(M);
 
     return std::make_pair(N, M);
@@ -60,50 +69,68 @@ public:
                        offset_type) {
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
+    for (const auto &ProfileData : *V) {
+      const InstrProfRecord &ProfRecord = ProfileData.second;
 
-    for (const auto &Counts : *V) {
-      LE.write<uint64_t>(Counts.first);
-      LE.write<uint64_t>(Counts.second.size());
-      for (uint64_t I : Counts.second)
+      LE.write<uint64_t>(ProfileData.first); // Function hash
+      LE.write<uint64_t>(ProfRecord.Counts.size());
+      for (uint64_t I : ProfRecord.Counts)
         LE.write<uint64_t>(I);
+
+      // Write value data
+      std::unique_ptr<ValueProfData> VDataPtr =
+          ValueProfData::serializeFrom(ProfileData.second);
+      uint32_t S = VDataPtr->getSize();
+      VDataPtr->swapBytesFromHost(ValueProfDataEndianness);
+      Out.write((const char *)VDataPtr.get(), S);
     }
   }
 };
 }
 
-std::error_code
-InstrProfWriter::addFunctionCounts(StringRef FunctionName,
-                                   uint64_t FunctionHash,
-                                   ArrayRef<uint64_t> Counters) {
-  auto &CounterData = FunctionData[FunctionName];
+// Internal interface for testing purpose only.
+void InstrProfWriter::setValueProfDataEndianness(
+    support::endianness Endianness) {
+  ValueProfDataEndianness = Endianness;
+}
 
-  auto Where = CounterData.find(FunctionHash);
-  if (Where == CounterData.end()) {
+std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
+                                           uint64_t Weight) {
+  auto &ProfileDataMap = FunctionData[I.Name];
+
+  bool NewFunc;
+  ProfilingData::iterator Where;
+  std::tie(Where, NewFunc) =
+      ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
+  InstrProfRecord &Dest = Where->second;
+
+  instrprof_error Result;
+  if (NewFunc) {
     // We've never seen a function with this name and hash, add it.
-    CounterData[FunctionHash] = Counters;
-    // We keep track of the max function count as we go for simplicity.
-    if (Counters[0] > MaxFunctionCount)
-      MaxFunctionCount = Counters[0];
-    return instrprof_error::success;
+    Dest = std::move(I);
+    // Fix up the name to avoid dangling reference.
+    Dest.Name = FunctionData.find(Dest.Name)->getKey();
+    Result = instrprof_error::success;
+    if (Weight > 1) {
+      for (auto &Count : Dest.Counts) {
+        bool Overflowed;
+        Count = SaturatingMultiply(Count, Weight, &Overflowed);
+        if (Overflowed && Result == instrprof_error::success) {
+          Result = instrprof_error::counter_overflow;
+        }
+      }
+    }
+  } else {
+    // We're updating a function we've seen before.
+    Result = Dest.merge(I, Weight);
   }
 
-  // We're updating a function we've seen before.
-  auto &FoundCounters = Where->second;
-  // If the number of counters doesn't match we either have bad data or a hash
-  // collision.
-  if (FoundCounters.size() != Counters.size())
-    return instrprof_error::count_mismatch;
-
-  for (size_t I = 0, E = Counters.size(); I < E; ++I) {
-    if (FoundCounters[I] + Counters[I] < FoundCounters[I])
-      return instrprof_error::counter_overflow;
-    FoundCounters[I] += Counters[I];
-  }
   // We keep track of the max function count as we go for simplicity.
-  if (FoundCounters[0] > MaxFunctionCount)
-    MaxFunctionCount = FoundCounters[0];
+  // Update this statistic no matter the result of the merge.
+  if (Dest.Counts[0] > MaxFunctionCount)
+    MaxFunctionCount = Dest.Counts[0];
 
-  return instrprof_error::success;
+  return Result;
 }
 
 std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
@@ -117,13 +144,23 @@ std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) {
   endian::Writer<little> LE(OS);
 
   // Write the header.
-  LE.write<uint64_t>(IndexedInstrProf::Magic);
-  LE.write<uint64_t>(IndexedInstrProf::Version);
-  LE.write<uint64_t>(MaxFunctionCount);
-  LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+  IndexedInstrProf::Header Header;
+  Header.Magic = IndexedInstrProf::Magic;
+  Header.Version = IndexedInstrProf::Version;
+  Header.MaxFunctionCount = MaxFunctionCount;
+  Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
+  Header.HashOffset = 0;
+  int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
+
+  // Only write out all the fields execpt 'HashOffset'. We need
+  // to remember the offset of that field to allow back patching
+  // later.
+  for (int I = 0; I < N - 1; I++)
+    LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]);
 
   // Save a space to write the hash table start location.
   uint64_t HashTableStartLoc = OS.tell();
+  // Reserve the space for HashOffset field.
   LE.write<uint64_t>(0);
   // Write the hash table.
   uint64_t HashTableStart = Generator.Emit(OS);
@@ -138,9 +175,65 @@ void InstrProfWriter::write(raw_fd_ostream &OS) {
   // Go back and fill in the hash table start.
   using namespace support;
   OS.seek(TableStart.first);
+  // Now patch the HashOffset field previously reserved.
   endian::Writer<little>(OS).write<uint64_t>(TableStart.second);
 }
 
+static const char *ValueProfKindStr[] = {
+#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator,
+#include "llvm/ProfileData/InstrProfData.inc"
+};
+
+void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
+                                        InstrProfSymtab &Symtab,
+                                        raw_fd_ostream &OS) {
+  OS << Func.Name << "\n";
+  OS << "# Func Hash:\n" << Func.Hash << "\n";
+  OS << "# Num Counters:\n" << Func.Counts.size() << "\n";
+  OS << "# Counter Values:\n";
+  for (uint64_t Count : Func.Counts)
+    OS << Count << "\n";
+
+  uint32_t NumValueKinds = Func.getNumValueKinds();
+  if (!NumValueKinds) {
+    OS << "\n";
+    return;
+  }
+
+  OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n";
+  for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) {
+    uint32_t NS = Func.getNumValueSites(VK);
+    if (!NS)
+      continue;
+    OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n";
+    OS << "# NumValueSites:\n" << NS << "\n";
+    for (uint32_t S = 0; S < NS; S++) {
+      uint32_t ND = Func.getNumValueDataForSite(VK, S);
+      OS << ND << "\n";
+      std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
+      for (uint32_t I = 0; I < ND; I++) {
+        if (VK == IPVK_IndirectCallTarget)
+          OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n";
+        else
+          OS << VD[I].Value << ":" << VD[I].Count << "\n";
+      }
+    }
+  }
+
+  OS << "\n";
+}
+
+void InstrProfWriter::writeText(raw_fd_ostream &OS) {
+  InstrProfSymtab Symtab;
+  for (const auto &I : FunctionData)
+    Symtab.addFuncName(I.getKey());
+  Symtab.finalizeSymtab();
+
+  for (const auto &I : FunctionData)
+    for (const auto &Func : I.getValue())
+      writeRecordInText(Func.second, Symtab, OS);
+}
+
 std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
   std::string Data;
   llvm::raw_string_ostream OS(Data);
diff --git a/lib/ProfileData/SampleProf.cpp b/lib/ProfileData/SampleProf.cpp
index 920c48a24640..9ded757f2b28 100644
--- a/lib/ProfileData/SampleProf.cpp
+++ b/lib/ProfileData/SampleProf.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 
+using namespace llvm::sampleprof;
 using namespace llvm;
 
 namespace {
@@ -27,17 +28,25 @@ class SampleProfErrorCategoryType : public std::error_category {
     case sampleprof_error::success:
       return "Success";
     case sampleprof_error::bad_magic:
-      return "Invalid file format (bad magic)";
+      return "Invalid sample profile data (bad magic)";
     case sampleprof_error::unsupported_version:
-      return "Unsupported format version";
+      return "Unsupported sample profile format version";
     case sampleprof_error::too_large:
       return "Too much profile data";
     case sampleprof_error::truncated:
       return "Truncated profile data";
     case sampleprof_error::malformed:
-      return "Malformed profile data";
+      return "Malformed sample profile data";
     case sampleprof_error::unrecognized_format:
-      return "Unrecognized profile encoding format";
+      return "Unrecognized sample profile encoding format";
+    case sampleprof_error::unsupported_writing_format:
+      return "Profile encoding format unsupported for writing operations";
+    case sampleprof_error::truncated_name_table:
+      return "Truncated function name table";
+    case sampleprof_error::not_implemented:
+      return "Unimplemented feature";
+    case sampleprof_error::counter_overflow:
+      return "Counter overflow";
     }
     llvm_unreachable("A value of sampleprof_error has no message.");
   }
@@ -49,3 +58,92 @@ static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory;
 const std::error_category &llvm::sampleprof_category() {
   return *ErrorCategory;
 }
+
+void LineLocation::print(raw_ostream &OS) const {
+  OS << LineOffset;
+  if (Discriminator > 0)
+    OS << "." << Discriminator;
+}
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+                                          const LineLocation &Loc) {
+  Loc.print(OS);
+  return OS;
+}
+
+void LineLocation::dump() const { print(dbgs()); }
+
+void CallsiteLocation::print(raw_ostream &OS) const {
+  LineLocation::print(OS);
+  OS << ": inlined callee: " << CalleeName;
+}
+
+void CallsiteLocation::dump() const { print(dbgs()); }
+
+inline raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+                                                 const CallsiteLocation &Loc) {
+  Loc.print(OS);
+  return OS;
+}
+
+/// \brief Print the sample record to the stream \p OS indented by \p Indent.
+void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {
+  OS << NumSamples;
+  if (hasCalls()) {
+    OS << ", calls:";
+    for (const auto &I : getCallTargets())
+      OS << " " << I.first() << ":" << I.second;
+  }
+  OS << "\n";
+}
+
+void SampleRecord::dump() const { print(dbgs(), 0); }
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+                                          const SampleRecord &Sample) {
+  Sample.print(OS, 0);
+  return OS;
+}
+
+/// \brief Print the samples collected for a function on stream \p OS.
+void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {
+  OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
+     << " sampled lines\n";
+
+  OS.indent(Indent);
+  if (BodySamples.size() > 0) {
+    OS << "Samples collected in the function's body {\n";
+    SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);
+    for (const auto &SI : SortedBodySamples.get()) {
+      OS.indent(Indent + 2);
+      OS << SI->first << ": " << SI->second;
+    }
+    OS.indent(Indent);
+    OS << "}\n";
+  } else {
+    OS << "No samples collected in the function's body\n";
+  }
+
+  OS.indent(Indent);
+  if (CallsiteSamples.size() > 0) {
+    OS << "Samples collected in inlined callsites {\n";
+    SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples(
+        CallsiteSamples);
+    for (const auto &CS : SortedCallsiteSamples.get()) {
+      OS.indent(Indent + 2);
+      OS << CS->first << ": ";
+      CS->second.print(OS, Indent + 4);
+    }
+    OS << "}\n";
+  } else {
+    OS << "No inlined callsites in this function\n";
+  }
+}
+
+raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,
+                                          const FunctionSamples &FS) {
+  FS.print(OS);
+  return OS;
+}
+
+void FunctionSamples::dump(void) const { print(dbgs(), 0); }
diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp
index b39bfd6e2ecd..93cd87bb82f8 100644
--- a/lib/ProfileData/SampleProfReader.cpp
+++ b/lib/ProfileData/SampleProfReader.cpp
@@ -8,133 +8,37 @@
 //===----------------------------------------------------------------------===//
 //
 // This file implements the class that reads LLVM sample profiles. It
-// supports two file formats: text and binary. The textual representation
-// is useful for debugging and testing purposes. The binary representation
-// is more compact, resulting in smaller file sizes. However, they can
-// both be used interchangeably.
+// supports three file formats: text, binary and gcov.
 //
-// NOTE: If you are making changes to the file format, please remember
-//       to document them in the Clang documentation at
-//       tools/clang/docs/UsersManual.rst.
+// The textual representation is useful for debugging and testing purposes. The
+// binary representation is more compact, resulting in smaller file sizes.
 //
-// Text format
-// -----------
+// The gcov encoding is the one generated by GCC's AutoFDO profile creation
+// tool (https://github.com/google/autofdo)
 //
-// Sample profiles are written as ASCII text. The file is divided into
-// sections, which correspond to each of the functions executed at runtime.
-// Each section has the following format
-//
-//     function1:total_samples:total_head_samples
-//     offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-//     offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-//     ...
-//     offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
-//
-// The file may contain blank lines between sections and within a
-// section. However, the spacing within a single line is fixed. Additional
-// spaces will result in an error while reading the file.
-//
-// Function names must be mangled in order for the profile loader to
-// match them in the current translation unit. The two numbers in the
-// function header specify how many total samples were accumulated in the
-// function (first number), and the total number of samples accumulated
-// in the prologue of the function (second number). This head sample
-// count provides an indicator of how frequently the function is invoked.
-//
-// Each sampled line may contain several items. Some are optional (marked
-// below):
-//
-// a. Source line offset. This number represents the line number
-//    in the function where the sample was collected. The line number is
-//    always relative to the line where symbol of the function is
-//    defined. So, if the function has its header at line 280, the offset
-//    13 is at line 293 in the file.
-//
-//    Note that this offset should never be a negative number. This could
-//    happen in cases like macros. The debug machinery will register the
-//    line number at the point of macro expansion. So, if the macro was
-//    expanded in a line before the start of the function, the profile
-//    converter should emit a 0 as the offset (this means that the optimizers
-//    will not be able to associate a meaningful weight to the instructions
-//    in the macro).
-//
-// b. [OPTIONAL] Discriminator. This is used if the sampled program
-//    was compiled with DWARF discriminator support
-//    (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators).
-//    DWARF discriminators are unsigned integer values that allow the
-//    compiler to distinguish between multiple execution paths on the
-//    same source line location.
-//
-//    For example, consider the line of code ``if (cond) foo(); else bar();``.
-//    If the predicate ``cond`` is true 80% of the time, then the edge
-//    into function ``foo`` should be considered to be taken most of the
-//    time. But both calls to ``foo`` and ``bar`` are at the same source
-//    line, so a sample count at that line is not sufficient. The
-//    compiler needs to know which part of that line is taken more
-//    frequently.
-//
-//    This is what discriminators provide. In this case, the calls to
-//    ``foo`` and ``bar`` will be at the same line, but will have
-//    different discriminator values. This allows the compiler to correctly
-//    set edge weights into ``foo`` and ``bar``.
-//
-// c. Number of samples. This is an integer quantity representing the
-//    number of samples collected by the profiler at this source
-//    location.
-//
-// d. [OPTIONAL] Potential call targets and samples. If present, this
-//    line contains a call instruction. This models both direct and
-//    number of samples. For example,
-//
-//      130: 7  foo:3  bar:2  baz:7
-//
-//    The above means that at relative line offset 130 there is a call
-//    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
-//    with ``baz()`` being the relatively more frequently called target.
+// All three encodings can be used interchangeably as an input sample profile.
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Regex.h"
 
 using namespace llvm::sampleprof;
 using namespace llvm;
 
-/// \brief Print the samples collected for a function on stream \p OS.
-///
-/// \param OS Stream to emit the output to.
-void FunctionSamples::print(raw_ostream &OS) {
-  OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()
-     << " sampled lines\n";
-  for (const auto &SI : BodySamples) {
-    LineLocation Loc = SI.first;
-    const SampleRecord &Sample = SI.second;
-    OS << "\tline offset: " << Loc.LineOffset
-       << ", discriminator: " << Loc.Discriminator
-       << ", number of samples: " << Sample.getSamples();
-    if (Sample.hasCalls()) {
-      OS << ", calls:";
-      for (const auto &I : Sample.getCallTargets())
-        OS << " " << I.first() << ":" << I.second;
-    }
-    OS << "\n";
-  }
-  OS << "\n";
-}
-
 /// \brief Dump the function profile for \p FName.
 ///
 /// \param FName Name of the function to print.
 /// \param OS Stream to emit the output to.
 void SampleProfileReader::dumpFunctionProfile(StringRef FName,
                                               raw_ostream &OS) {
-  OS << "Function: " << FName << ": ";
-  Profiles[FName].print(OS);
+  OS << "Function: " << FName << ": " << Profiles[FName];
 }
 
 /// \brief Dump all the function profiles found on stream \p OS.
@@ -143,6 +47,102 @@ void SampleProfileReader::dump(raw_ostream &OS) {
     dumpFunctionProfile(I.getKey(), OS);
 }
 
+/// \brief Parse \p Input as function head.
+///
+/// Parse one line of \p Input, and update function name in \p FName,
+/// function's total sample count in \p NumSamples, function's entry
+/// count in \p NumHeadSamples.
+///
+/// \returns true if parsing is successful.
+static bool ParseHead(const StringRef &Input, StringRef &FName,
+                      uint64_t &NumSamples, uint64_t &NumHeadSamples) {
+  if (Input[0] == ' ')
+    return false;
+  size_t n2 = Input.rfind(':');
+  size_t n1 = Input.rfind(':', n2 - 1);
+  FName = Input.substr(0, n1);
+  if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples))
+    return false;
+  if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples))
+    return false;
+  return true;
+}
+
+
+/// \brief Returns true if line offset \p L is legal (only has 16 bits).
+static bool isOffsetLegal(unsigned L) {
+  return (L & 0xffff) == L;
+}
+
+/// \brief Parse \p Input as line sample.
+///
+/// \param Input input line.
+/// \param IsCallsite true if the line represents an inlined callsite.
+/// \param Depth the depth of the inline stack.
+/// \param NumSamples total samples of the line/inlined callsite.
+/// \param LineOffset line offset to the start of the function.
+/// \param Discriminator discriminator of the line.
+/// \param TargetCountMap map from indirect call target to count.
+///
+/// returns true if parsing is successful.
+static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
+                      uint64_t &NumSamples, uint32_t &LineOffset,
+                      uint32_t &Discriminator, StringRef &CalleeName,
+                      DenseMap<StringRef, uint64_t> &TargetCountMap) {
+  for (Depth = 0; Input[Depth] == ' '; Depth++)
+    ;
+  if (Depth == 0)
+    return false;
+
+  size_t n1 = Input.find(':');
+  StringRef Loc = Input.substr(Depth, n1 - Depth);
+  size_t n2 = Loc.find('.');
+  if (n2 == StringRef::npos) {
+    if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset))
+      return false;
+    Discriminator = 0;
+  } else {
+    if (Loc.substr(0, n2).getAsInteger(10, LineOffset))
+      return false;
+    if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator))
+      return false;
+  }
+
+  StringRef Rest = Input.substr(n1 + 2);
+  if (Rest[0] >= '0' && Rest[0] <= '9') {
+    IsCallsite = false;
+    size_t n3 = Rest.find(' ');
+    if (n3 == StringRef::npos) {
+      if (Rest.getAsInteger(10, NumSamples))
+        return false;
+    } else {
+      if (Rest.substr(0, n3).getAsInteger(10, NumSamples))
+        return false;
+    }
+    while (n3 != StringRef::npos) {
+      n3 += Rest.substr(n3).find_first_not_of(' ');
+      Rest = Rest.substr(n3);
+      n3 = Rest.find(' ');
+      StringRef pair = Rest;
+      if (n3 != StringRef::npos) {
+        pair = Rest.substr(0, n3);
+      }
+      size_t n4 = pair.find(':');
+      uint64_t count;
+      if (pair.substr(n4 + 1).getAsInteger(10, count))
+        return false;
+      TargetCountMap[pair.substr(0, n4)] = count;
+    }
+  } else {
+    IsCallsite = true;
+    size_t n3 = Rest.find_last_of(':');
+    CalleeName = Rest.substr(0, n3);
+    if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
+      return false;
+  }
+  return true;
+}
+
 /// \brief Load samples from a text file.
 ///
 /// See the documentation at the top of the file for an explanation of
@@ -151,14 +151,13 @@ void SampleProfileReader::dump(raw_ostream &OS) {
 /// \returns true if the file was loaded successfully, false otherwise.
 std::error_code SampleProfileReaderText::read() {
   line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#');
+  sampleprof_error Result = sampleprof_error::success;
 
-  // Read the profile of each function. Since each function may be
-  // mentioned more than once, and we are collecting flat profiles,
-  // accumulate samples as we parse them.
-  Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$");
-  Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$");
-  Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)");
-  while (!LineIt.is_at_eof()) {
+  InlineCallStack InlineStack;
+
+  for (; !LineIt.is_at_eof(); ++LineIt) {
+    if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
+      continue;
     // Read the header of each function.
     //
     // Note that for function identifiers we are actually expecting
@@ -171,63 +170,74 @@ std::error_code SampleProfileReaderText::read() {
     //
     // The only requirement we place on the identifier, then, is that it
     // should not begin with a number.
-    SmallVector<StringRef, 4> Matches;
-    if (!HeadRE.match(*LineIt, &Matches)) {
-      reportParseError(LineIt.line_number(),
-                       "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
-      return sampleprof_error::malformed;
-    }
-    assert(Matches.size() == 4);
-    StringRef FName = Matches[1];
-    unsigned NumSamples, NumHeadSamples;
-    Matches[2].getAsInteger(10, NumSamples);
-    Matches[3].getAsInteger(10, NumHeadSamples);
-    Profiles[FName] = FunctionSamples();
-    FunctionSamples &FProfile = Profiles[FName];
-    FProfile.addTotalSamples(NumSamples);
-    FProfile.addHeadSamples(NumHeadSamples);
-    ++LineIt;
-
-    // Now read the body. The body of the function ends when we reach
-    // EOF or when we see the start of the next function.
-    while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) {
-      if (!LineSampleRE.match(*LineIt, &Matches)) {
-        reportParseError(
-            LineIt.line_number(),
-            "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt);
+    if ((*LineIt)[0] != ' ') {
+      uint64_t NumSamples, NumHeadSamples;
+      StringRef FName;
+      if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) {
+        reportError(LineIt.line_number(),
+                    "Expected 'mangled_name:NUM:NUM', found " + *LineIt);
         return sampleprof_error::malformed;
       }
-      assert(Matches.size() == 5);
-      unsigned LineOffset, NumSamples, Discriminator = 0;
-      Matches[1].getAsInteger(10, LineOffset);
-      if (Matches[2] != "")
-        Matches[2].getAsInteger(10, Discriminator);
-      Matches[3].getAsInteger(10, NumSamples);
-
-      // If there are function calls in this line, generate a call sample
-      // entry for each call.
-      std::string CallsLine(Matches[4]);
-      while (CallsLine != "") {
-        SmallVector<StringRef, 3> CallSample;
-        if (!CallSampleRE.match(CallsLine, &CallSample)) {
-          reportParseError(LineIt.line_number(),
-                           "Expected 'mangled_name:NUM', found " + CallsLine);
-          return sampleprof_error::malformed;
-        }
-        StringRef CalledFunction = CallSample[1];
-        unsigned CalledFunctionSamples;
-        CallSample[2].getAsInteger(10, CalledFunctionSamples);
-        FProfile.addCalledTargetSamples(LineOffset, Discriminator,
-                                        CalledFunction, CalledFunctionSamples);
-        CallsLine = CallSampleRE.sub("", CallsLine);
+      Profiles[FName] = FunctionSamples();
+      FunctionSamples &FProfile = Profiles[FName];
+      MergeResult(Result, FProfile.addTotalSamples(NumSamples));
+      MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
+      InlineStack.clear();
+      InlineStack.push_back(&FProfile);
+    } else {
+      uint64_t NumSamples;
+      StringRef FName;
+      DenseMap<StringRef, uint64_t> TargetCountMap;
+      bool IsCallsite;
+      uint32_t Depth, LineOffset, Discriminator;
+      if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
+                     Discriminator, FName, TargetCountMap)) {
+        reportError(LineIt.line_number(),
+                    "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
+                        *LineIt);
+        return sampleprof_error::malformed;
+      }
+      if (IsCallsite) {
+        while (InlineStack.size() > Depth) {
+          InlineStack.pop_back();
+        }
+        FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
+            CallsiteLocation(LineOffset, Discriminator, FName));
+        MergeResult(Result, FSamples.addTotalSamples(NumSamples));
+        InlineStack.push_back(&FSamples);
+      } else {
+        while (InlineStack.size() > Depth) {
+          InlineStack.pop_back();
+        }
+        FunctionSamples &FProfile = *InlineStack.back();
+        for (const auto &name_count : TargetCountMap) {
+          MergeResult(Result, FProfile.addCalledTargetSamples(
+                                  LineOffset, Discriminator, name_count.first,
+                                  name_count.second));
+        }
+        MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
+                                                    NumSamples));
       }
-
-      FProfile.addBodySamples(LineOffset, Discriminator, NumSamples);
-      ++LineIt;
     }
   }
 
-  return sampleprof_error::success;
+  return Result;
+}
+
+bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) {
+  bool result = false;
+
+  // Check that the first non-comment line is a valid function header.
+  line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#');
+  if (!LineIt.is_at_eof()) {
+    if ((*LineIt)[0] != ' ') {
+      uint64_t NumSamples, NumHeadSamples;
+      StringRef FName;
+      result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples);
+    }
+  }
+
+  return result;
 }
 
 template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
@@ -243,7 +253,7 @@ template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() {
     EC = sampleprof_error::success;
 
   if (EC) {
-    reportParseError(0, EC.message());
+    reportError(0, EC.message());
     return EC;
   }
 
@@ -256,7 +266,7 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
   StringRef Str(reinterpret_cast<const char *>(Data));
   if (Data + Str.size() + 1 > End) {
     EC = sampleprof_error::truncated;
-    reportParseError(0, EC.message());
+    reportError(0, EC.message());
     return EC;
   }
 
@@ -264,62 +274,109 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readString() {
   return Str;
 }
 
+ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
+  std::error_code EC;
+  auto Idx = readNumber<uint32_t>();
+  if (std::error_code EC = Idx.getError())
+    return EC;
+  if (*Idx >= NameTable.size())
+    return sampleprof_error::truncated_name_table;
+  return NameTable[*Idx];
+}
+
+std::error_code
+SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
+  auto NumSamples = readNumber<uint64_t>();
+  if (std::error_code EC = NumSamples.getError())
+    return EC;
+  FProfile.addTotalSamples(*NumSamples);
+
+  // Read the samples in the body.
+  auto NumRecords = readNumber<uint32_t>();
+  if (std::error_code EC = NumRecords.getError())
+    return EC;
+
+  for (uint32_t I = 0; I < *NumRecords; ++I) {
+    auto LineOffset = readNumber<uint64_t>();
+    if (std::error_code EC = LineOffset.getError())
+      return EC;
+
+    if (!isOffsetLegal(*LineOffset)) {
+      return std::error_code();
+    }
+
+    auto Discriminator = readNumber<uint64_t>();
+    if (std::error_code EC = Discriminator.getError())
+      return EC;
+
+    auto NumSamples = readNumber<uint64_t>();
+    if (std::error_code EC = NumSamples.getError())
+      return EC;
+
+    auto NumCalls = readNumber<uint32_t>();
+    if (std::error_code EC = NumCalls.getError())
+      return EC;
+
+    for (uint32_t J = 0; J < *NumCalls; ++J) {
+      auto CalledFunction(readStringFromTable());
+      if (std::error_code EC = CalledFunction.getError())
+        return EC;
+
+      auto CalledFunctionSamples = readNumber<uint64_t>();
+      if (std::error_code EC = CalledFunctionSamples.getError())
+        return EC;
+
+      FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
+                                      *CalledFunction, *CalledFunctionSamples);
+    }
+
+    FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
+  }
+
+  // Read all the samples for inlined function calls.
+  auto NumCallsites = readNumber<uint32_t>();
+  if (std::error_code EC = NumCallsites.getError())
+    return EC;
+
+  for (uint32_t J = 0; J < *NumCallsites; ++J) {
+    auto LineOffset = readNumber<uint64_t>();
+    if (std::error_code EC = LineOffset.getError())
+      return EC;
+
+    auto Discriminator = readNumber<uint64_t>();
+    if (std::error_code EC = Discriminator.getError())
+      return EC;
+
+    auto FName(readStringFromTable());
+    if (std::error_code EC = FName.getError())
+      return EC;
+
+    FunctionSamples &CalleeProfile = FProfile.functionSamplesAt(
+        CallsiteLocation(*LineOffset, *Discriminator, *FName));
+    if (std::error_code EC = readProfile(CalleeProfile))
+      return EC;
+  }
+
+  return sampleprof_error::success;
+}
+
 std::error_code SampleProfileReaderBinary::read() {
   while (!at_eof()) {
-    auto FName(readString());
+    auto NumHeadSamples = readNumber<uint64_t>();
+    if (std::error_code EC = NumHeadSamples.getError())
+      return EC;
+
+    auto FName(readStringFromTable());
     if (std::error_code EC = FName.getError())
       return EC;
 
     Profiles[*FName] = FunctionSamples();
     FunctionSamples &FProfile = Profiles[*FName];
 
-    auto Val = readNumber<unsigned>();
-    if (std::error_code EC = Val.getError())
+    FProfile.addHeadSamples(*NumHeadSamples);
+
+    if (std::error_code EC = readProfile(FProfile))
       return EC;
-    FProfile.addTotalSamples(*Val);
-
-    Val = readNumber<unsigned>();
-    if (std::error_code EC = Val.getError())
-      return EC;
-    FProfile.addHeadSamples(*Val);
-
-    // Read the samples in the body.
-    auto NumRecords = readNumber<unsigned>();
-    if (std::error_code EC = NumRecords.getError())
-      return EC;
-    for (unsigned I = 0; I < *NumRecords; ++I) {
-      auto LineOffset = readNumber<uint64_t>();
-      if (std::error_code EC = LineOffset.getError())
-        return EC;
-
-      auto Discriminator = readNumber<uint64_t>();
-      if (std::error_code EC = Discriminator.getError())
-        return EC;
-
-      auto NumSamples = readNumber<uint64_t>();
-      if (std::error_code EC = NumSamples.getError())
-        return EC;
-
-      auto NumCalls = readNumber<unsigned>();
-      if (std::error_code EC = NumCalls.getError())
-        return EC;
-
-      for (unsigned J = 0; J < *NumCalls; ++J) {
-        auto CalledFunction(readString());
-        if (std::error_code EC = CalledFunction.getError())
-          return EC;
-
-        auto CalledFunctionSamples = readNumber<uint64_t>();
-        if (std::error_code EC = CalledFunctionSamples.getError())
-          return EC;
-
-        FProfile.addCalledTargetSamples(*LineOffset, *Discriminator,
-                                        *CalledFunction,
-                                        *CalledFunctionSamples);
-      }
-
-      FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples);
-    }
   }
 
   return sampleprof_error::success;
@@ -343,6 +400,18 @@ std::error_code SampleProfileReaderBinary::readHeader() {
   else if (*Version != SPVersion())
     return sampleprof_error::unsupported_version;
 
+  // Read the name table.
+  auto Size = readNumber<uint32_t>();
+  if (std::error_code EC = Size.getError())
+    return EC;
+  NameTable.reserve(*Size);
+  for (uint32_t I = 0; I < *Size; ++I) {
+    auto Name(readString());
+    if (std::error_code EC = Name.getError())
+      return EC;
+    NameTable.push_back(*Name);
+  }
+
   return sampleprof_error::success;
 }
 
@@ -353,6 +422,249 @@ bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) {
   return Magic == SPMagic();
 }
 
+std::error_code SampleProfileReaderGCC::skipNextWord() {
+  uint32_t dummy;
+  if (!GcovBuffer.readInt(dummy))
+    return sampleprof_error::truncated;
+  return sampleprof_error::success;
+}
+
+template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() {
+  if (sizeof(T) <= sizeof(uint32_t)) {
+    uint32_t Val;
+    if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max())
+      return static_cast<T>(Val);
+  } else if (sizeof(T) <= sizeof(uint64_t)) {
+    uint64_t Val;
+    if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max())
+      return static_cast<T>(Val);
+  }
+
+  std::error_code EC = sampleprof_error::malformed;
+  reportError(0, EC.message());
+  return EC;
+}
+
+ErrorOr<StringRef> SampleProfileReaderGCC::readString() {
+  StringRef Str;
+  if (!GcovBuffer.readString(Str))
+    return sampleprof_error::truncated;
+  return Str;
+}
+
+std::error_code SampleProfileReaderGCC::readHeader() {
+  // Read the magic identifier.
+  if (!GcovBuffer.readGCDAFormat())
+    return sampleprof_error::unrecognized_format;
+
+  // Read the version number. Note - the GCC reader does not validate this
+  // version, but the profile creator generates v704.
+  GCOV::GCOVVersion version;
+  if (!GcovBuffer.readGCOVVersion(version))
+    return sampleprof_error::unrecognized_format;
+
+  if (version != GCOV::V704)
+    return sampleprof_error::unsupported_version;
+
+  // Skip the empty integer.
+  if (std::error_code EC = skipNextWord())
+    return EC;
+
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) {
+  uint32_t Tag;
+  if (!GcovBuffer.readInt(Tag))
+    return sampleprof_error::truncated;
+
+  if (Tag != Expected)
+    return sampleprof_error::malformed;
+
+  if (std::error_code EC = skipNextWord())
+    return EC;
+
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readNameTable() {
+  if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames))
+    return EC;
+
+  uint32_t Size;
+  if (!GcovBuffer.readInt(Size))
+    return sampleprof_error::truncated;
+
+  for (uint32_t I = 0; I < Size; ++I) {
+    StringRef Str;
+    if (!GcovBuffer.readString(Str))
+      return sampleprof_error::truncated;
+    Names.push_back(Str);
+  }
+
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readFunctionProfiles() {
+  if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction))
+    return EC;
+
+  uint32_t NumFunctions;
+  if (!GcovBuffer.readInt(NumFunctions))
+    return sampleprof_error::truncated;
+
+  InlineCallStack Stack;
+  for (uint32_t I = 0; I < NumFunctions; ++I)
+    if (std::error_code EC = readOneFunctionProfile(Stack, true, 0))
+      return EC;
+
+  return sampleprof_error::success;
+}
+
+std::error_code SampleProfileReaderGCC::readOneFunctionProfile(
+    const InlineCallStack &InlineStack, bool Update, uint32_t Offset) {
+  uint64_t HeadCount = 0;
+  if (InlineStack.size() == 0)
+    if (!GcovBuffer.readInt64(HeadCount))
+      return sampleprof_error::truncated;
+
+  uint32_t NameIdx;
+  if (!GcovBuffer.readInt(NameIdx))
+    return sampleprof_error::truncated;
+
+  StringRef Name(Names[NameIdx]);
+
+  uint32_t NumPosCounts;
+  if (!GcovBuffer.readInt(NumPosCounts))
+    return sampleprof_error::truncated;
+
+  uint32_t NumCallsites;
+  if (!GcovBuffer.readInt(NumCallsites))
+    return sampleprof_error::truncated;
+
+  FunctionSamples *FProfile = nullptr;
+  if (InlineStack.size() == 0) {
+    // If this is a top function that we have already processed, do not
+    // update its profile again.  This happens in the presence of
+    // function aliases.  Since these aliases share the same function
+    // body, there will be identical replicated profiles for the
+    // original function.  In this case, we simply not bother updating
+    // the profile of the original function.
+    FProfile = &Profiles[Name];
+    FProfile->addHeadSamples(HeadCount);
+    if (FProfile->getTotalSamples() > 0)
+      Update = false;
+  } else {
+    // Otherwise, we are reading an inlined instance. The top of the
+    // inline stack contains the profile of the caller. Insert this
+    // callee in the caller's CallsiteMap.
+    FunctionSamples *CallerProfile = InlineStack.front();
+    uint32_t LineOffset = Offset >> 16;
+    uint32_t Discriminator = Offset & 0xffff;
+    FProfile = &CallerProfile->functionSamplesAt(
+        CallsiteLocation(LineOffset, Discriminator, Name));
+  }
+
+  for (uint32_t I = 0; I < NumPosCounts; ++I) {
+    uint32_t Offset;
+    if (!GcovBuffer.readInt(Offset))
+      return sampleprof_error::truncated;
+
+    uint32_t NumTargets;
+    if (!GcovBuffer.readInt(NumTargets))
+      return sampleprof_error::truncated;
+
+    uint64_t Count;
+    if (!GcovBuffer.readInt64(Count))
+      return sampleprof_error::truncated;
+
+    // The line location is encoded in the offset as:
+    //   high 16 bits: line offset to the start of the function.
+    //   low 16 bits: discriminator.
+    uint32_t LineOffset = Offset >> 16;
+    uint32_t Discriminator = Offset & 0xffff;
+
+    InlineCallStack NewStack;
+    NewStack.push_back(FProfile);
+    NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+    if (Update) {
+      // Walk up the inline stack, adding the samples on this line to
+      // the total sample count of the callers in the chain.
+      for (auto CallerProfile : NewStack)
+        CallerProfile->addTotalSamples(Count);
+
+      // Update the body samples for the current profile.
+      FProfile->addBodySamples(LineOffset, Discriminator, Count);
+    }
+
+    // Process the list of functions called at an indirect call site.
+    // These are all the targets that a function pointer (or virtual
+    // function) resolved at runtime.
+    for (uint32_t J = 0; J < NumTargets; J++) {
+      uint32_t HistVal;
+      if (!GcovBuffer.readInt(HistVal))
+        return sampleprof_error::truncated;
+
+      if (HistVal != HIST_TYPE_INDIR_CALL_TOPN)
+        return sampleprof_error::malformed;
+
+      uint64_t TargetIdx;
+      if (!GcovBuffer.readInt64(TargetIdx))
+        return sampleprof_error::truncated;
+      StringRef TargetName(Names[TargetIdx]);
+
+      uint64_t TargetCount;
+      if (!GcovBuffer.readInt64(TargetCount))
+        return sampleprof_error::truncated;
+
+      if (Update) {
+        FunctionSamples &TargetProfile = Profiles[TargetName];
+        TargetProfile.addCalledTargetSamples(LineOffset, Discriminator,
+                                             TargetName, TargetCount);
+      }
+    }
+  }
+
+  // Process all the inlined callers into the current function. These
+  // are all the callsites that were inlined into this function.
+  for (uint32_t I = 0; I < NumCallsites; I++) {
+    // The offset is encoded as:
+    //   high 16 bits: line offset to the start of the function.
+    //   low 16 bits: discriminator.
+    uint32_t Offset;
+    if (!GcovBuffer.readInt(Offset))
+      return sampleprof_error::truncated;
+    InlineCallStack NewStack;
+    NewStack.push_back(FProfile);
+    NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end());
+    if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset))
+      return EC;
+  }
+
+  return sampleprof_error::success;
+}
+
+/// \brief Read a GCC AutoFDO profile.
+///
+/// This format is generated by the Linux Perf conversion tool at
+/// https://github.com/google/autofdo.
+std::error_code SampleProfileReaderGCC::read() {
+  // Read the string table.
+  if (std::error_code EC = readNameTable())
+    return EC;
+
+  // Read the source profile.
+  if (std::error_code EC = readFunctionProfiles())
+    return EC;
+
+  return sampleprof_error::success;
+}
+
+bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) {
+  StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart()));
+  return Magic == "adcg*704";
+}
+
 /// \brief Prepare a memory buffer for the contents of \p Filename.
 ///
 /// \returns an error code indicating the status of the buffer.
@@ -364,7 +676,7 @@ setupMemoryBuffer(std::string Filename) {
   auto Buffer = std::move(BufferOrErr.get());
 
   // Sanity check the file.
-  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
+  if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max())
     return sampleprof_error::too_large;
 
   return std::move(Buffer);
@@ -384,13 +696,29 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) {
   auto BufferOrError = setupMemoryBuffer(Filename);
   if (std::error_code EC = BufferOrError.getError())
     return EC;
+  return create(BufferOrError.get(), C);
+}
 
-  auto Buffer = std::move(BufferOrError.get());
+/// \brief Create a sample profile reader based on the format of the input data.
+///
+/// \param B The memory buffer to create the reader from (assumes ownership).
+///
+/// \param Reader The reader to instantiate according to \p Filename's format.
+///
+/// \param C The LLVM context to use to emit diagnostics.
+///
+/// \returns an error code indicating the status of the created reader.
+ErrorOr<std::unique_ptr<SampleProfileReader>>
+SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) {
   std::unique_ptr<SampleProfileReader> Reader;
-  if (SampleProfileReaderBinary::hasFormat(*Buffer))
-    Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C));
+  if (SampleProfileReaderBinary::hasFormat(*B))
+    Reader.reset(new SampleProfileReaderBinary(std::move(B), C));
+  else if (SampleProfileReaderGCC::hasFormat(*B))
+    Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
+  else if (SampleProfileReaderText::hasFormat(*B))
+    Reader.reset(new SampleProfileReaderText(std::move(B), C));
   else
-    Reader.reset(new SampleProfileReaderText(std::move(Buffer), C));
+    return sampleprof_error::unrecognized_format;
 
   if (std::error_code EC = Reader->readHeader())
     return EC;
diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp
index c95267ad976b..51feee5ad7d1 100644
--- a/lib/ProfileData/SampleProfWriter.cpp
+++ b/lib/ProfileData/SampleProfWriter.cpp
@@ -30,16 +30,27 @@ using namespace llvm::sampleprof;
 using namespace llvm;
 
 /// \brief Write samples to a text file.
-bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
-  if (S.empty())
-    return true;
+///
+/// Note: it may be tempting to implement this in terms of
+/// FunctionSamples::print().  Please don't.  The dump functionality is intended
+/// for debugging and has no specified form.
+///
+/// The format used here is more structured and deliberate because
+/// it needs to be parsed by the SampleProfileReaderText class.
+std::error_code SampleProfileWriterText::write(StringRef FName,
+                                               const FunctionSamples &S) {
+  auto &OS = *OutputStream;
 
-  OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples()
-     << "\n";
+  OS << FName << ":" << S.getTotalSamples();
+  if (Indent == 0)
+    OS << ":" << S.getHeadSamples();
+  OS << "\n";
 
-  for (const auto &I : S.getBodySamples()) {
-    LineLocation Loc = I.first;
-    const SampleRecord &Sample = I.second;
+  SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
+  for (const auto &I : SortedSamples.get()) {
+    LineLocation Loc = I->first;
+    const SampleRecord &Sample = I->second;
+    OS.indent(Indent + 1);
     if (Loc.Discriminator == 0)
       OS << Loc.LineOffset << ": ";
     else
@@ -52,32 +63,89 @@ bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) {
     OS << "\n";
   }
 
-  return true;
+  SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples(
+      S.getCallsiteSamples());
+  Indent += 1;
+  for (const auto &I : SortedCallsiteSamples.get()) {
+    CallsiteLocation Loc = I->first;
+    const FunctionSamples &CalleeSamples = I->second;
+    OS.indent(Indent);
+    if (Loc.Discriminator == 0)
+      OS << Loc.LineOffset << ": ";
+    else
+      OS << Loc.LineOffset << "." << Loc.Discriminator << ": ";
+    if (std::error_code EC = write(Loc.CalleeName, CalleeSamples))
+      return EC;
+  }
+  Indent -= 1;
+
+  return sampleprof_error::success;
 }
 
-SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F,
-                                                     std::error_code &EC)
-    : SampleProfileWriter(F, EC, sys::fs::F_None) {
-  if (EC)
-    return;
+std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) {
+  const auto &ret = NameTable.find(FName);
+  if (ret == NameTable.end())
+    return sampleprof_error::truncated_name_table;
+  encodeULEB128(ret->second, *OutputStream);
+  return sampleprof_error::success;
+}
 
-  // Write the file header.
+void SampleProfileWriterBinary::addName(StringRef FName) {
+  auto NextIdx = NameTable.size();
+  NameTable.insert(std::make_pair(FName, NextIdx));
+}
+
+void SampleProfileWriterBinary::addNames(const FunctionSamples &S) {
+  // Add all the names in indirect call targets.
+  for (const auto &I : S.getBodySamples()) {
+    const SampleRecord &Sample = I.second;
+    for (const auto &J : Sample.getCallTargets())
+      addName(J.first());
+  }
+
+  // Recursively add all the names for inlined callsites.
+  for (const auto &J : S.getCallsiteSamples()) {
+    CallsiteLocation Loc = J.first;
+    const FunctionSamples &CalleeSamples = J.second;
+    addName(Loc.CalleeName);
+    addNames(CalleeSamples);
+  }
+}
+
+std::error_code SampleProfileWriterBinary::writeHeader(
+    const StringMap<FunctionSamples> &ProfileMap) {
+  auto &OS = *OutputStream;
+
+  // Write file magic identifier.
   encodeULEB128(SPMagic(), OS);
   encodeULEB128(SPVersion(), OS);
+
+  // Generate the name table for all the functions referenced in the profile.
+  for (const auto &I : ProfileMap) {
+    addName(I.first());
+    addNames(I.second);
+  }
+
+  // Write out the name table.
+  encodeULEB128(NameTable.size(), OS);
+  for (auto N : NameTable) {
+    OS << N.first;
+    encodeULEB128(0, OS);
+  }
+
+  return sampleprof_error::success;
 }
 
-/// \brief Write samples to a binary file.
-///
-/// \returns true if the samples were written successfully, false otherwise.
-bool SampleProfileWriterBinary::write(StringRef FName,
-                                      const FunctionSamples &S) {
-  if (S.empty())
-    return true;
+std::error_code SampleProfileWriterBinary::writeBody(StringRef FName,
+                                                     const FunctionSamples &S) {
+  auto &OS = *OutputStream;
+
+  if (std::error_code EC = writeNameIdx(FName))
+    return EC;
 
-  OS << FName;
-  encodeULEB128(0, OS);
   encodeULEB128(S.getTotalSamples(), OS);
-  encodeULEB128(S.getHeadSamples(), OS);
+
+  // Emit all the body samples.
   encodeULEB128(S.getBodySamples().size(), OS);
   for (const auto &I : S.getBodySamples()) {
     LineLocation Loc = I.first;
@@ -87,18 +155,38 @@ bool SampleProfileWriterBinary::write(StringRef FName,
     encodeULEB128(Sample.getSamples(), OS);
     encodeULEB128(Sample.getCallTargets().size(), OS);
     for (const auto &J : Sample.getCallTargets()) {
-      std::string Callee = J.first();
-      unsigned CalleeSamples = J.second;
-      OS << Callee;
-      encodeULEB128(0, OS);
+      StringRef Callee = J.first();
+      uint64_t CalleeSamples = J.second;
+      if (std::error_code EC = writeNameIdx(Callee))
+        return EC;
       encodeULEB128(CalleeSamples, OS);
     }
   }
 
-  return true;
+  // Recursively emit all the callsite samples.
+  encodeULEB128(S.getCallsiteSamples().size(), OS);
+  for (const auto &J : S.getCallsiteSamples()) {
+    CallsiteLocation Loc = J.first;
+    const FunctionSamples &CalleeSamples = J.second;
+    encodeULEB128(Loc.LineOffset, OS);
+    encodeULEB128(Loc.Discriminator, OS);
+    if (std::error_code EC = writeBody(Loc.CalleeName, CalleeSamples))
+      return EC;
+  }
+
+  return sampleprof_error::success;
 }
 
-/// \brief Create a sample profile writer based on the specified format.
+/// \brief Write samples of a top-level function to a binary file.
+///
+/// \returns true if the samples were written successfully, false otherwise.
+std::error_code SampleProfileWriterBinary::write(StringRef FName,
+                                                 const FunctionSamples &S) {
+  encodeULEB128(S.getHeadSamples(), *OutputStream);
+  return writeBody(FName, S);
+}
+
+/// \brief Create a sample profile file writer based on the specified format.
 ///
 /// \param Filename The file to create.
 ///
@@ -110,12 +198,38 @@ bool SampleProfileWriterBinary::write(StringRef FName,
 ErrorOr<std::unique_ptr<SampleProfileWriter>>
 SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
   std::error_code EC;
+  std::unique_ptr<raw_ostream> OS;
+  if (Format == SPF_Binary)
+    OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None));
+  else
+    OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text));
+  if (EC)
+    return EC;
+
+  return create(OS, Format);
+}
+
+/// \brief Create a sample profile stream writer based on the specified format.
+///
+/// \param OS The output stream to store the profile data to.
+///
+/// \param Writer The writer to instantiate according to the specified format.
+///
+/// \param Format Encoding format for the profile file.
+///
+/// \returns an error code indicating the status of the created writer.
+ErrorOr<std::unique_ptr<SampleProfileWriter>>
+SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
+                            SampleProfileFormat Format) {
+  std::error_code EC;
   std::unique_ptr<SampleProfileWriter> Writer;
 
   if (Format == SPF_Binary)
-    Writer.reset(new SampleProfileWriterBinary(Filename, EC));
+    Writer.reset(new SampleProfileWriterBinary(OS));
   else if (Format == SPF_Text)
-    Writer.reset(new SampleProfileWriterText(Filename, EC));
+    Writer.reset(new SampleProfileWriterText(OS));
+  else if (Format == SPF_GCC)
+    EC = sampleprof_error::unsupported_writing_format;
   else
     EC = sampleprof_error::unrecognized_format;
 
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 5d31225396d4..19b8221b60cb 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -767,6 +767,15 @@ APFloat::isLargest() const {
     && isSignificandAllOnes();
 }
 
+bool
+APFloat::isInteger() const {
+  // This could be made more efficient; I'm going for obviously correct.
+  if (!isFinite()) return false;
+  APFloat truncated = *this;
+  truncated.roundToIntegral(rmTowardZero);
+  return compare(truncated) == cmpEqual;
+}
+
 bool
 APFloat::bitwiseIsEqual(const APFloat &rhs) const {
   if (this == &rhs)
@@ -777,18 +786,12 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const {
     return false;
   if (category==fcZero || category==fcInfinity)
     return true;
-  else if (isFiniteNonZero() && exponent!=rhs.exponent)
+
+  if (isFiniteNonZero() && exponent != rhs.exponent)
     return false;
-  else {
-    int i= partCount();
-    const integerPart* p=significandParts();
-    const integerPart* q=rhs.significandParts();
-    for (; i>0; i--, p++, q++) {
-      if (*p != *q)
-        return false;
-    }
-    return true;
-  }
+
+  return std::equal(significandParts(), significandParts() + partCount(),
+                    rhs.significandParts());
 }
 
 APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
@@ -847,6 +850,21 @@ APFloat::semanticsPrecision(const fltSemantics &semantics)
 {
   return semantics.precision;
 }
+APFloat::ExponentType
+APFloat::semanticsMaxExponent(const fltSemantics &semantics)
+{
+  return semantics.maxExponent;
+}
+APFloat::ExponentType
+APFloat::semanticsMinExponent(const fltSemantics &semantics)
+{
+  return semantics.minExponent;
+}
+unsigned int
+APFloat::semanticsSizeInBits(const fltSemantics &semantics)
+{
+  return semantics.sizeInBits;
+}
 
 const integerPart *
 APFloat::significandParts() const
@@ -1762,7 +1780,7 @@ APFloat::remainder(const APFloat &rhs)
 /* Normalized llvm frem (C fmod).
    This is not currently correct in all cases.  */
 APFloat::opStatus
-APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+APFloat::mod(const APFloat &rhs)
 {
   opStatus fs;
   fs = modSpecials(rhs);
@@ -1787,10 +1805,10 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
                                           rmNearestTiesToEven);
     assert(fs==opOK);   // should always work
 
-    fs = V.multiply(rhs, rounding_mode);
+    fs = V.multiply(rhs, rmNearestTiesToEven);
     assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
 
-    fs = subtract(V, rounding_mode);
+    fs = subtract(V, rmNearestTiesToEven);
     assert(fs==opOK || fs==opInexact);   // likewise
 
     if (isZero())
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index 6f7e341904b9..e7f3e1764c52 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -11,37 +11,35 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 
 using namespace llvm;
 
-BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
+BlockFrequency &BlockFrequency::operator*=(BranchProbability Prob) {
   Frequency = Prob.scale(Frequency);
   return *this;
 }
 
-const BlockFrequency
-BlockFrequency::operator*(const BranchProbability &Prob) const {
+BlockFrequency BlockFrequency::operator*(BranchProbability Prob) const {
   BlockFrequency Freq(Frequency);
   Freq *= Prob;
   return Freq;
 }
 
-BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
+BlockFrequency &BlockFrequency::operator/=(BranchProbability Prob) {
   Frequency = Prob.scaleByInverse(Frequency);
   return *this;
 }
 
-BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const {
+BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const {
   BlockFrequency Freq(Frequency);
   Freq /= Prob;
   return Freq;
 }
 
-BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
+BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) {
   uint64_t Before = Freq.Frequency;
   Frequency += Freq.Frequency;
 
@@ -52,11 +50,25 @@ BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) {
   return *this;
 }
 
-const BlockFrequency
-BlockFrequency::operator+(const BlockFrequency &Prob) const {
-  BlockFrequency Freq(Frequency);
-  Freq += Prob;
-  return Freq;
+BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const {
+  BlockFrequency NewFreq(Frequency);
+  NewFreq += Freq;
+  return NewFreq;
+}
+
+BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) {
+  // If underflow, set frequency to 0.
+  if (Frequency <= Freq.Frequency)
+    Frequency = 0;
+  else
+    Frequency -= Freq.Frequency;
+  return *this;
+}
+
+BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const {
+  BlockFrequency NewFreq(Frequency);
+  NewFreq -= Freq;
+  return NewFreq;
 }
 
 BlockFrequency &BlockFrequency::operator>>=(const unsigned count) {
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index 65878d6e3025..771d02c0aa3c 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -15,17 +15,58 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
 
 using namespace llvm;
 
+const uint32_t BranchProbability::D;
+
 raw_ostream &BranchProbability::print(raw_ostream &OS) const {
-  return OS << N << " / " << D << " = "
-            << format("%g%%", ((double)N / D) * 100.0);
+  if (isUnknown())
+    return OS << "?%";
+
+  // Get a percentage rounded to two decimal digits. This avoids
+  // implementation-defined rounding inside printf.
+  double Percent = rint(((double)N / D) * 100.0 * 100.0) / 100.0;
+  return OS << format("0x%08" PRIx32 " / 0x%08" PRIx32 " = %.2f%%", N, D,
+                      Percent);
 }
 
 void BranchProbability::dump() const { print(dbgs()) << '\n'; }
 
+BranchProbability::BranchProbability(uint32_t Numerator, uint32_t Denominator) {
+  assert(Denominator > 0 && "Denominator cannot be 0!");
+  assert(Numerator <= Denominator && "Probability cannot be bigger than 1!");
+  if (Denominator == D)
+    N = Numerator;
+  else {
+    uint64_t Prob64 =
+        (Numerator * static_cast<uint64_t>(D) + Denominator / 2) / Denominator;
+    N = static_cast<uint32_t>(Prob64);
+  }
+}
+
+BranchProbability
+BranchProbability::getBranchProbability(uint64_t Numerator,
+                                        uint64_t Denominator) {
+  assert(Numerator <= Denominator && "Probability cannot be bigger than 1!");
+  // Scale down Denominator to fit in a 32-bit integer.
+  int Scale = 0;
+  while (Denominator > UINT32_MAX) {
+    Denominator >>= 1;
+    Scale++;
+  }
+  return BranchProbability(Numerator >> Scale, Denominator);
+}
+
+// If ConstD is not zero, then replace D by ConstD so that division and modulo
+// operations by D can be optimized, in case this function is not inlined by the
+// compiler.
+template <uint32_t ConstD>
 static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
+  if (ConstD > 0)
+    D = ConstD;
+
   assert(D && "divide by 0");
 
   // Fast path for multiplying by 1.0.
@@ -65,9 +106,9 @@ static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
 }
 
 uint64_t BranchProbability::scale(uint64_t Num) const {
-  return ::scale(Num, N, D);
+  return ::scale<D>(Num, N, D);
 }
 
 uint64_t BranchProbability::scaleByInverse(uint64_t Num) const {
-  return ::scale(Num, D, N);
+  return ::scale<0>(Num, D, N);
 }
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index a8a4df51661f..75b3e89f9167 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -1,7 +1,8 @@
 set(system_libs)
 if( NOT MSVC )
   if( MINGW )
-    set(system_libs ${system_libs} psapi shell32 ole32)
+    # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
+    set(system_libs ${system_libs} psapi shell32 ole32 uuid)
   elseif( CMAKE_HOST_UNIX )
     if( HAVE_LIBRT )
       set(system_libs ${system_libs} rt)
@@ -59,6 +60,7 @@ add_llvm_library(LLVMSupport
   IntEqClasses.cpp
   IntervalMap.cpp
   IntrusiveRefCntPtr.cpp
+  JamCRC.cpp
   LEB128.cpp
   LineIterator.cpp
   Locale.cpp
@@ -87,6 +89,7 @@ add_llvm_library(LLVMSupport
   StringRef.cpp
   SystemUtils.cpp
   TargetParser.cpp
+  ThreadPool.cpp
   Timer.cpp
   ToolOutputFile.cpp
   Triple.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 17fba95ebb2b..fdcdb03706de 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -120,7 +120,7 @@ public:
 
   void addOption(Option *O) {
     bool HadErrors = false;
-    if (O->ArgStr[0]) {
+    if (O->hasArgStr()) {
       // Add argument to the argument map!
       if (!OptionsMap.insert(std::make_pair(O->ArgStr, O)).second) {
         errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
@@ -151,12 +151,12 @@ public:
   }
 
   void removeOption(Option *O) {
-    SmallVector<const char *, 16> OptionNames;
+    SmallVector<StringRef, 16> OptionNames;
     O->getExtraOptionNames(OptionNames);
-    if (O->ArgStr[0])
+    if (O->hasArgStr())
       OptionNames.push_back(O->ArgStr);
     for (auto Name : OptionNames)
-      OptionsMap.erase(StringRef(Name));
+      OptionsMap.erase(Name);
 
     if (O->getFormattingFlag() == cl::Positional)
       for (auto Opt = PositionalOpts.begin(); Opt != PositionalOpts.end();
@@ -182,13 +182,13 @@ public:
             nullptr != ConsumeAfterOpt);
   }
 
-  void updateArgStr(Option *O, const char *NewName) {
+  void updateArgStr(Option *O, StringRef NewName) {
     if (!OptionsMap.insert(std::make_pair(NewName, O)).second) {
       errs() << ProgramName << ": CommandLine Error: Option '" << O->ArgStr
              << "' registered more than once!\n";
       report_fatal_error("inconsistency in registered CommandLine options");
     }
-    OptionsMap.erase(StringRef(O->ArgStr));
+    OptionsMap.erase(O->ArgStr);
   }
 
   void printOptionValues();
@@ -227,7 +227,7 @@ void Option::addArgument() {
 
 void Option::removeArgument() { GlobalParser->removeOption(this); }
 
-void Option::setArgStr(const char *S) {
+void Option::setArgStr(StringRef S) {
   if (FullyInitialized)
     GlobalParser->updateArgStr(this, S);
   ArgStr = S;
@@ -296,24 +296,23 @@ static Option *LookupNearestOption(StringRef Arg,
                                            ie = OptionsMap.end();
        it != ie; ++it) {
     Option *O = it->second;
-    SmallVector<const char *, 16> OptionNames;
+    SmallVector<StringRef, 16> OptionNames;
     O->getExtraOptionNames(OptionNames);
-    if (O->ArgStr[0])
+    if (O->hasArgStr())
       OptionNames.push_back(O->ArgStr);
 
     bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed;
     StringRef Flag = PermitValue ? LHS : Arg;
-    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
-      StringRef Name = OptionNames[i];
+    for (auto Name : OptionNames) {
       unsigned Distance = StringRef(Name).edit_distance(
           Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
       if (!Best || Distance < BestDistance) {
         Best = O;
         BestDistance = Distance;
         if (RHS.empty() || !PermitValue)
-          NearestString = OptionNames[i];
+          NearestString = Name;
         else
-          NearestString = (Twine(OptionNames[i]) + "=" + RHS).str();
+          NearestString = (Twine(Name) + "=" + RHS).str();
       }
     }
   }
@@ -346,10 +345,7 @@ static bool CommaSeparateAndAddOccurrence(Option *Handler, unsigned pos,
     Value = Val;
   }
 
-  if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
-    return true;
-
-  return false;
+  return Handler->addOccurrence(pos, ArgName, Value, MultiArg);
 }
 
 /// ProvideOption - For Value, this differentiates between an empty value ("")
@@ -799,7 +795,7 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
   // telling us.
   SmallVector<const char *, 20> newArgv;
   BumpPtrAllocator A;
-  BumpPtrStringSaver Saver(A);
+  StringSaver Saver(A);
   newArgv.push_back(Saver.save(progName));
 
   // Parse the value of the environment variable into a "command line"
@@ -822,7 +818,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc,
   // Expand response files.
   SmallVector<const char *, 20> newArgv(argv, argv + argc);
   BumpPtrAllocator A;
-  BumpPtrStringSaver Saver(A);
+  StringSaver Saver(A);
   ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);
   argv = &newArgv[0];
   argc = static_cast<int>(newArgv.size());
@@ -859,7 +855,7 @@ void CommandLineParser::ParseCommandLineOptions(int argc,
               "error - this positional option will never be matched, "
               "because it does not Require a value, and a "
               "cl::ConsumeAfter option is active!");
-      } else if (UnboundedFound && !Opt->ArgStr[0]) {
+      } else if (UnboundedFound && !Opt->hasArgStr()) {
         // This option does not "require" a value...  Make sure this option is
         // not specified after an option that eats all extra arguments, or this
         // one will never get any!
@@ -1144,8 +1140,8 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value,
 // getValueStr - Get the value description string, using "DefaultMsg" if nothing
 // has been specified yet.
 //
-static const char *getValueStr(const Option &O, const char *DefaultMsg) {
-  if (O.ValueStr[0] == 0)
+static StringRef getValueStr(const Option &O, StringRef DefaultMsg) {
+  if (O.ValueStr.empty())
     return DefaultMsg;
   return O.ValueStr;
 }
@@ -1155,7 +1151,7 @@ static const char *getValueStr(const Option &O, const char *DefaultMsg) {
 //
 
 // Return the width of the option tag for printing...
-size_t alias::getOptionWidth() const { return std::strlen(ArgStr) + 6; }
+size_t alias::getOptionWidth() const { return ArgStr.size() + 6; }
 
 static void printHelpStr(StringRef HelpStr, size_t Indent,
                          size_t FirstLineIndentedBy) {
@@ -1170,7 +1166,7 @@ static void printHelpStr(StringRef HelpStr, size_t Indent,
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   outs() << "  -" << ArgStr;
-  printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6);
+  printHelpStr(HelpStr, GlobalWidth, ArgStr.size() + 6);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1182,9 +1178,9 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
 
 // Return the width of the option tag for printing...
 size_t basic_parser_impl::getOptionWidth(const Option &O) const {
-  size_t Len = std::strlen(O.ArgStr);
+  size_t Len = O.ArgStr.size();
   if (const char *ValName = getValueName())
-    Len += std::strlen(getValueStr(O, ValName)) + 3;
+    Len += getValueStr(O, ValName).size() + 3;
 
   return Len + 6;
 }
@@ -1205,7 +1201,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
 void basic_parser_impl::printOptionName(const Option &O,
                                         size_t GlobalWidth) const {
   outs() << "  -" << O.ArgStr;
-  outs().indent(GlobalWidth - std::strlen(O.ArgStr));
+  outs().indent(GlobalWidth - O.ArgStr.size());
 }
 
 // parser<bool> implementation
@@ -1319,7 +1315,7 @@ unsigned generic_parser_base::findOption(const char *Name) {
 // Return the width of the option tag for printing...
 size_t generic_parser_base::getOptionWidth(const Option &O) const {
   if (O.hasArgStr()) {
-    size_t Size = std::strlen(O.ArgStr) + 6;
+    size_t Size = O.ArgStr.size() + 6;
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
       Size = std::max(Size, std::strlen(getOption(i)) + 8);
     return Size;
@@ -1338,7 +1334,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
                                           size_t GlobalWidth) const {
   if (O.hasArgStr()) {
     outs() << "  -" << O.ArgStr;
-    printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6);
+    printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
 
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
       size_t NumSpaces = GlobalWidth - strlen(getOption(i)) - 8;
@@ -1346,7 +1342,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
       outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
     }
   } else {
-    if (O.HelpStr[0])
+    if (!O.HelpStr.empty())
       outs() << "  " << O.HelpStr << '\n';
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
       const char *Option = getOption(i);
@@ -1365,7 +1361,7 @@ void generic_parser_base::printGenericOptionDiff(
     const Option &O, const GenericOptionValue &Value,
     const GenericOptionValue &Default, size_t GlobalWidth) const {
   outs() << "  -" << O.ArgStr;
-  outs().indent(GlobalWidth - std::strlen(O.ArgStr));
+  outs().indent(GlobalWidth - O.ArgStr.size());
 
   unsigned NumOpts = getNumOptions();
   for (unsigned i = 0; i != NumOpts; ++i) {
@@ -1508,7 +1504,7 @@ public:
     outs() << "USAGE: " << GlobalParser->ProgramName << " [options]";
 
     for (auto Opt : GlobalParser->PositionalOpts) {
-      if (Opt->ArgStr[0])
+      if (Opt->hasArgStr())
         outs() << " --" << Opt->ArgStr;
       outs() << " " << Opt->HelpStr;
     }
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index aba0f1ddeee8..3f4ef9da48f1 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -24,6 +24,12 @@ static ManagedStatic<
     sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext;
 
 struct CrashRecoveryContextImpl {
+  // When threads are disabled, this links up all active
+  // CrashRecoveryContextImpls.  When threads are enabled there's one thread
+  // per CrashRecoveryContext and CurrentContext is a thread-local, so only one
+  // CrashRecoveryContextImpl is active per thread and this is always null.
+  const CrashRecoveryContextImpl *Next;
+
   CrashRecoveryContext *CRC;
   std::string Backtrace;
   ::jmp_buf JumpBuffer;
@@ -34,21 +40,26 @@ public:
   CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
                                                         Failed(false),
                                                         SwitchedThread(false) {
+    Next = CurrentContext->get();
     CurrentContext->set(this);
   }
   ~CrashRecoveryContextImpl() {
     if (!SwitchedThread)
-      CurrentContext->erase();
+      CurrentContext->set(Next);
   }
 
   /// \brief Called when the separate crash-recovery thread was finished, to
   /// indicate that we don't need to clear the thread-local CurrentContext.
-  void setSwitchedThread() { SwitchedThread = true; }
+  void setSwitchedThread() { 
+#if defined(LLVM_ENABLE_THREADS) && LLVM_ENABLE_THREADS != 0
+    SwitchedThread = true;
+#endif
+  }
 
   void HandleCrash() {
     // Eliminate the current context entry, to avoid re-entering in case the
     // cleanup code crashes.
-    CurrentContext->erase();
+    CurrentContext->set(Next);
 
     assert(!Failed && "Crash recovery context already failed!");
     Failed = true;
@@ -65,7 +76,7 @@ public:
 static ManagedStatic<sys::Mutex> gCrashRecoveryContextMutex;
 static bool gCrashRecoveryEnabled = false;
 
-static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextCleanup> >
+static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContext>>
        tlIsRecoveringFromCrash;
 
 CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
@@ -73,7 +84,8 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {}
 CrashRecoveryContext::~CrashRecoveryContext() {
   // Reclaim registered resources.
   CrashRecoveryContextCleanup *i = head;
-  tlIsRecoveringFromCrash->set(head);
+  const CrashRecoveryContext *PC = tlIsRecoveringFromCrash->get();
+  tlIsRecoveringFromCrash->set(this);
   while (i) {
     CrashRecoveryContextCleanup *tmp = i;
     i = tmp->next;
@@ -81,7 +93,7 @@ CrashRecoveryContext::~CrashRecoveryContext() {
     tmp->recoverResources();
     delete tmp;
   }
-  tlIsRecoveringFromCrash->erase();
+  tlIsRecoveringFromCrash->set(PC);
   
   CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
   delete CRCI;
@@ -232,7 +244,7 @@ void CrashRecoveryContext::Disable() {
 
 static const int Signals[] =
     { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
-static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static const unsigned NumSignals = array_lengthof(Signals);
 static struct sigaction PrevActions[NumSignals];
 
 static void CrashRecoverySignalHandler(int Signal) {
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 13a41557a8d2..7d7225671737 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -177,6 +177,23 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_MIPS_assumed_size:          return "DW_AT_MIPS_assumed_size";
   case DW_AT_lo_user:                    return "DW_AT_lo_user";
   case DW_AT_hi_user:                    return "DW_AT_hi_user";
+  case DW_AT_BORLAND_property_read:      return "DW_AT_BORLAND_property_read";
+  case DW_AT_BORLAND_property_write:     return "DW_AT_BORLAND_property_write";
+  case DW_AT_BORLAND_property_implements: return "DW_AT_BORLAND_property_implements";
+  case DW_AT_BORLAND_property_index:     return "DW_AT_BORLAND_property_index";
+  case DW_AT_BORLAND_property_default:   return "DW_AT_BORLAND_property_default";
+  case DW_AT_BORLAND_Delphi_unit:        return "DW_AT_BORLAND_Delphi_unit";
+  case DW_AT_BORLAND_Delphi_class:       return "DW_AT_BORLAND_Delphi_class";
+  case DW_AT_BORLAND_Delphi_record:      return "DW_AT_BORLAND_Delphi_record";
+  case DW_AT_BORLAND_Delphi_metaclass:   return "DW_AT_BORLAND_Delphi_metaclass";
+  case DW_AT_BORLAND_Delphi_constructor: return "DW_AT_BORLAND_Delphi_constructor";
+  case DW_AT_BORLAND_Delphi_destructor:  return "DW_AT_BORLAND_Delphi_destructor";
+  case DW_AT_BORLAND_Delphi_anonymous_method: return "DW_AT_BORLAND_Delphi_anonymous_method";
+  case DW_AT_BORLAND_Delphi_interface:   return "DW_AT_BORLAND_Delphi_interface";
+  case DW_AT_BORLAND_Delphi_ABI:         return "DW_AT_BORLAND_Delphi_ABI";
+  case DW_AT_BORLAND_Delphi_return:      return "DW_AT_BORLAND_Delphi_return";
+  case DW_AT_BORLAND_Delphi_frameptr:    return "DW_AT_BORLAND_Delphi_frameptr";
+  case DW_AT_BORLAND_closure:            return "DW_AT_BORLAND_closure";
   case DW_AT_APPLE_optimized:            return "DW_AT_APPLE_optimized";
   case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
   case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
@@ -201,6 +218,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
   case DW_AT_GNU_addr_base:              return "DW_AT_GNU_addr_base";
   case DW_AT_GNU_pubnames:               return "DW_AT_GNU_pubnames";
   case DW_AT_GNU_pubtypes:               return "DW_AT_GNU_pubtypes";
+  case DW_AT_GNU_discriminator:          return "DW_AT_GNU_discriminator";
   }
   return nullptr;
 }
@@ -373,6 +391,14 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) {
    case DW_CC_nocall:                     return "DW_CC_nocall";
    case DW_CC_lo_user:                    return "DW_CC_lo_user";
    case DW_CC_hi_user:                    return "DW_CC_hi_user";
+   case DW_CC_GNU_borland_fastcall_i386:  return "DW_CC_GNU_borland_fastcall_i386";
+   case DW_CC_BORLAND_safecall:           return "DW_CC_BORLAND_safecall";
+   case DW_CC_BORLAND_stdcall:            return "DW_CC_BORLAND_stdcall";
+   case DW_CC_BORLAND_pascal:             return "DW_CC_BORLAND_pascal";
+   case DW_CC_BORLAND_msfastcall:         return "DW_CC_BORLAND_msfastcall";
+   case DW_CC_BORLAND_msreturn:           return "DW_CC_BORLAND_msreturn";
+   case DW_CC_BORLAND_thiscall:           return "DW_CC_BORLAND_thiscall";
+   case DW_CC_BORLAND_fastcall:           return "DW_CC_BORLAND_fastcall";
   }
   return nullptr;
 }
@@ -442,10 +468,21 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
   case DW_MACINFO_start_file:            return "DW_MACINFO_start_file";
   case DW_MACINFO_end_file:              return "DW_MACINFO_end_file";
   case DW_MACINFO_vendor_ext:            return "DW_MACINFO_vendor_ext";
+  case DW_MACINFO_invalid:               return "DW_MACINFO_invalid";
   }
   return nullptr;
 }
 
+unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) {
+  return StringSwitch<unsigned>(MacinfoString)
+      .Case("DW_MACINFO_define", DW_MACINFO_define)
+      .Case("DW_MACINFO_undef", DW_MACINFO_undef)
+      .Case("DW_MACINFO_start_file", DW_MACINFO_start_file)
+      .Case("DW_MACINFO_end_file", DW_MACINFO_end_file)
+      .Case("DW_MACINFO_vendor_ext", DW_MACINFO_vendor_ext)
+      .Default(DW_MACINFO_invalid);
+}
+
 const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
   switch (Encoding) {
   case DW_CFA_nop:                       return "DW_CFA_nop";
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index a25e21ae043e..2808bd34af06 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm-c/Core.h"
+#include "llvm-c/ErrorHandling.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h"
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 307ff09afedc..651e679f2cb5 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/Signals.h"
 #include <system_error>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -34,10 +35,8 @@ FileOutputBuffer::~FileOutputBuffer() {
   sys::fs::remove(Twine(TempPath));
 }
 
-std::error_code
-FileOutputBuffer::create(StringRef FilePath, size_t Size,
-                         std::unique_ptr<FileOutputBuffer> &Result,
-                         unsigned Flags) {
+ErrorOr<std::unique_ptr<FileOutputBuffer>>
+FileOutputBuffer::create(StringRef FilePath, size_t Size, unsigned Flags) {
   // If file already exists, it must be a regular file (to be mappable).
   sys::fs::file_status Stat;
   std::error_code EC = sys::fs::status(FilePath, Stat);
@@ -76,6 +75,8 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
   if (EC)
     return EC;
 
+  sys::RemoveFileOnSignal(TempFilePath);
+
 #ifndef LLVM_ON_WIN32
   // On Windows, CreateFileMapping (the mmap function on Windows)
   // automatically extends the underlying file. We don't need to
@@ -95,10 +96,9 @@ FileOutputBuffer::create(StringRef FilePath, size_t Size,
   if (Ret)
     return std::error_code(errno, std::generic_category());
 
-  Result.reset(
+  std::unique_ptr<FileOutputBuffer> Buf(
       new FileOutputBuffer(std::move(MappedFile), FilePath, TempFilePath));
-
-  return std::error_code();
+  return std::move(Buf);
 }
 
 std::error_code FileOutputBuffer::commit() {
@@ -107,6 +107,8 @@ std::error_code FileOutputBuffer::commit() {
 
 
   // Rename file to final name.
-  return sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+  std::error_code EC = sys::fs::rename(Twine(TempPath), Twine(FinalPath));
+  sys::DontRemoveFileOnSignal(TempPath);
+  return EC;
 }
 } // namespace
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index b8538ffe1f9f..bb0ec2defef9 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -232,9 +232,29 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
   Buckets = AllocateBuckets(NumBuckets);
   NumNodes = 0;
 }
+
+FoldingSetImpl::FoldingSetImpl(FoldingSetImpl &&Arg)
+    : Buckets(Arg.Buckets), NumBuckets(Arg.NumBuckets), NumNodes(Arg.NumNodes) {
+  Arg.Buckets = nullptr;
+  Arg.NumBuckets = 0;
+  Arg.NumNodes = 0;
+}
+
+FoldingSetImpl &FoldingSetImpl::operator=(FoldingSetImpl &&RHS) {
+  free(Buckets); // This may be null if the set is in a moved-from state.
+  Buckets = RHS.Buckets;
+  NumBuckets = RHS.NumBuckets;
+  NumNodes = RHS.NumNodes;
+  RHS.Buckets = nullptr;
+  RHS.NumBuckets = 0;
+  RHS.NumNodes = 0;
+  return *this;
+}
+
 FoldingSetImpl::~FoldingSetImpl() {
   free(Buckets);
 }
+
 void FoldingSetImpl::clear() {
   // Set all but the last bucket to null pointers.
   memset(Buckets, 0, NumBuckets*sizeof(void*));
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index a9b022041468..d0e1d50e8ccb 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -103,7 +103,7 @@ struct GraphSession {
   bool TryFindProgram(StringRef Names, std::string &ProgramPath) {
     raw_string_ostream Log(LogBuffer);
     SmallVector<StringRef, 8> parts;
-    Names.split(parts, "|");
+    Names.split(parts, '|');
     for (auto Name : parts) {
       if (ErrorOr<std::string> P = sys::findProgramByName(Name)) {
         ProgramPath = *P;
@@ -189,61 +189,87 @@ bool llvm::DisplayGraph(StringRef FilenameRef, bool wait,
     return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg);
   }
 
-  enum PSViewerKind { PSV_None, PSV_OSXOpen, PSV_XDGOpen, PSV_Ghostview };
-  PSViewerKind PSViewer = PSV_None;
+  enum ViewerKind {
+    VK_None,
+    VK_OSXOpen,
+    VK_XDGOpen,
+    VK_Ghostview,
+    VK_CmdStart
+  };
+  ViewerKind Viewer = VK_None;
 #ifdef __APPLE__
-  if (!PSViewer && S.TryFindProgram("open", ViewerPath))
-    PSViewer = PSV_OSXOpen;
+  if (!Viewer && S.TryFindProgram("open", ViewerPath))
+    Viewer = VK_OSXOpen;
+#endif
+  if (!Viewer && S.TryFindProgram("gv", ViewerPath))
+    Viewer = VK_Ghostview;
+  if (!Viewer && S.TryFindProgram("xdg-open", ViewerPath))
+    Viewer = VK_XDGOpen;
+#ifdef LLVM_ON_WIN32
+  if (!Viewer && S.TryFindProgram("cmd", ViewerPath)) {
+    Viewer = VK_CmdStart;
+  }
 #endif
-  if (!PSViewer && S.TryFindProgram("gv", ViewerPath))
-    PSViewer = PSV_Ghostview;
-  if (!PSViewer && S.TryFindProgram("xdg-open", ViewerPath))
-    PSViewer = PSV_XDGOpen;
 
-  // PostScript graph generator + PostScript viewer
+  // PostScript or PDF graph generator + PostScript/PDF viewer
   std::string GeneratorPath;
-  if (PSViewer &&
+  if (Viewer &&
       (S.TryFindProgram(getProgramName(program), GeneratorPath) ||
        S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) {
-    std::string PSFilename = Filename + ".ps";
+    std::string OutputFilename =
+        Filename + (Viewer == VK_CmdStart ? ".pdf" : ".ps");
 
     std::vector<const char *> args;
     args.push_back(GeneratorPath.c_str());
-    args.push_back("-Tps");
+    if (Viewer == VK_CmdStart)
+      args.push_back("-Tpdf");
+    else
+      args.push_back("-Tps");
     args.push_back("-Nfontname=Courier");
     args.push_back("-Gsize=7.5,10");
     args.push_back(Filename.c_str());
     args.push_back("-o");
-    args.push_back(PSFilename.c_str());
+    args.push_back(OutputFilename.c_str());
     args.push_back(nullptr);
 
     errs() << "Running '" << GeneratorPath << "' program... ";
 
-    if (ExecGraphViewer(GeneratorPath, args, Filename, wait, ErrMsg))
+    if (ExecGraphViewer(GeneratorPath, args, Filename, true, ErrMsg))
       return true;
 
+    // The lifetime of StartArg must include the call of ExecGraphViewer
+    // because the args are passed as vector of char*.
+    std::string StartArg;
+
     args.clear();
     args.push_back(ViewerPath.c_str());
-    switch (PSViewer) {
-    case PSV_OSXOpen:
+    switch (Viewer) {
+    case VK_OSXOpen:
       args.push_back("-W");
-      args.push_back(PSFilename.c_str());
+      args.push_back(OutputFilename.c_str());
       break;
-    case PSV_XDGOpen:
+    case VK_XDGOpen:
       wait = false;
-      args.push_back(PSFilename.c_str());
+      args.push_back(OutputFilename.c_str());
       break;
-    case PSV_Ghostview:
+    case VK_Ghostview:
       args.push_back("--spartan");
-      args.push_back(PSFilename.c_str());
+      args.push_back(OutputFilename.c_str());
       break;
-    case PSV_None:
+    case VK_CmdStart:
+      args.push_back("/S");
+      args.push_back("/C");
+      StartArg =
+          (StringRef("start ") + (wait ? "/WAIT " : "") + OutputFilename).str();
+      args.push_back(StartArg.c_str());
+      break;
+    case VK_None:
       llvm_unreachable("Invalid viewer");
     }
     args.push_back(nullptr);
 
     ErrMsg.clear();
-    return ExecGraphViewer(ViewerPath, args, PSFilename, wait, ErrMsg);
+    return ExecGraphViewer(ViewerPath, args, OutputFilename, wait, ErrMsg);
   }
 
   // dotty
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 1bd1fe2bea0e..c0f9e0744b5e 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -368,8 +368,14 @@ StringRef sys::getHostCPUName() {
 
       // Broadwell:
       case 61:
+      case 71:
         return "broadwell";
 
+      // Skylake:
+      case 78:
+      case 94:
+        return "skylake";
+
       case 28: // Most 45 nm Intel Atom processors
       case 38: // 45 nm Atom Lincroft
       case 39: // 32 nm Atom Medfield
@@ -381,6 +387,8 @@ StringRef sys::getHostCPUName() {
       case 55:
       case 74:
       case 77:
+      case 90:
+      case 93:
         return "silvermont";
 
       default: // Unknown family 6 CPU, try to guess.
@@ -689,7 +697,7 @@ StringRef sys::getHostCPUName() {
     if (Lines[I].startswith("features")) {
       size_t Pos = Lines[I].find(":");
       if (Pos != StringRef::npos) {
-        Lines[I].drop_front(Pos + 1).split(CPUFeatures, " ");
+        Lines[I].drop_front(Pos + 1).split(CPUFeatures, ' ');
         break;
       }
     }
@@ -766,14 +774,17 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
   // indicates that the AVX registers will be saved and restored on context
   // switch, then we have full AVX support.
-  bool HasAVX = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
-                !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
-  Features["avx"]    = HasAVX;
-  Features["fma"]    = HasAVX && (ECX >> 12) & 1;
-  Features["f16c"]   = HasAVX && (ECX >> 29) & 1;
+  bool HasAVXSave = ((ECX >> 27) & 1) && ((ECX >> 28) & 1) &&
+                    !GetX86XCR0(&EAX, &EDX) && ((EAX & 0x6) == 0x6);
+  Features["avx"]    = HasAVXSave;
+  Features["fma"]    = HasAVXSave && (ECX >> 12) & 1;
+  Features["f16c"]   = HasAVXSave && (ECX >> 29) & 1;
+
+  // Only enable XSAVE if OS has enabled support for saving YMM state.
+  Features["xsave"]  = HasAVXSave && (ECX >> 26) & 1;
 
   // AVX512 requires additional context to be saved by the OS.
-  bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+  bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
 
   unsigned MaxExtLevel;
   GetX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -783,15 +794,15 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["lzcnt"]  = HasExtLeaf1 && ((ECX >>  5) & 1);
   Features["sse4a"]  = HasExtLeaf1 && ((ECX >>  6) & 1);
   Features["prfchw"] = HasExtLeaf1 && ((ECX >>  8) & 1);
-  Features["xop"]    = HasAVX && HasExtLeaf1 && ((ECX >> 11) & 1);
-  Features["fma4"]   = HasAVX && HasExtLeaf1 && ((ECX >> 16) & 1);
+  Features["xop"]    = HasExtLeaf1 && ((ECX >> 11) & 1) && HasAVXSave;
+  Features["fma4"]   = HasExtLeaf1 && ((ECX >> 16) & 1) && HasAVXSave;
   Features["tbm"]    = HasExtLeaf1 && ((ECX >> 21) & 1);
 
   bool HasLeaf7 = MaxLevel >= 7 &&
                   !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
 
   // AVX2 is only supported if we have the OS save support from AVX.
-  Features["avx2"]     = HasAVX && HasLeaf7 && (EBX >>  5) & 1;
+  Features["avx2"]     = HasAVXSave && HasLeaf7 && ((EBX >>  5) & 1);
 
   Features["fsgsbase"] = HasLeaf7 && ((EBX >>  0) & 1);
   Features["bmi"]      = HasLeaf7 && ((EBX >>  3) & 1);
@@ -801,6 +812,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["rdseed"]   = HasLeaf7 && ((EBX >> 18) & 1);
   Features["adx"]      = HasLeaf7 && ((EBX >> 19) & 1);
   Features["sha"]      = HasLeaf7 && ((EBX >> 29) & 1);
+  // Enable protection keys
+  Features["pku"]    = HasLeaf7 && ((ECX >> 4) & 1);
 
   // AVX512 is only supported if the OS supports the context save for it.
   Features["avx512f"]  = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
@@ -811,6 +824,14 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
   Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
 
+  bool HasLeafD = MaxLevel >= 0xd &&
+    !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+
+  // Only enable XSAVE if OS has enabled support for saving YMM state.
+  Features["xsaveopt"] = HasAVXSave && HasLeafD && ((EAX >> 0) & 1);
+  Features["xsavec"]   = HasAVXSave && HasLeafD && ((EAX >> 1) & 1);
+  Features["xsaves"]   = HasAVXSave && HasLeafD && ((EAX >> 3) & 1);
+
   return true;
 }
 #elif defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
@@ -832,7 +853,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   // Look for the CPU features.
   for (unsigned I = 0, E = Lines.size(); I != E; ++I)
     if (Lines[I].startswith("Features")) {
-      Lines[I].split(CPUFeatures, " ");
+      Lines[I].split(CPUFeatures, ' ');
       break;
     }
 
diff --git a/lib/Support/JamCRC.cpp b/lib/Support/JamCRC.cpp
new file mode 100644
index 000000000000..bc21c917d55b
--- /dev/null
+++ b/lib/Support/JamCRC.cpp
@@ -0,0 +1,96 @@
+//===-- JamCRC.cpp - Cyclic Redundancy Check --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of JamCRC.
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation technique is the one mentioned in:
+// D. V. Sarwate. 1988. Computation of cyclic redundancy checks via table
+// look-up. Commun. ACM 31, 8 (August 1988)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/JamCRC.h"
+
+using namespace llvm;
+
+static const uint32_t CRCTable[256] = {
+    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba,
+    0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3,
+    0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
+    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91,
+    0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+    0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
+    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec,
+    0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5,
+    0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
+    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+    0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940,
+    0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
+    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116,
+    0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f,
+    0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d,
+    0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a,
+    0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
+    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818,
+    0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+    0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
+    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457,
+    0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c,
+    0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
+    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+    0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb,
+    0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
+    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9,
+    0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086,
+    0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4,
+    0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad,
+    0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
+    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683,
+    0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+    0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
+    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe,
+    0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7,
+    0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
+    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+    0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252,
+    0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
+    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60,
+    0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79,
+    0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f,
+    0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04,
+    0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
+    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a,
+    0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+    0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
+    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21,
+    0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e,
+    0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
+    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+    0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45,
+    0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
+    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db,
+    0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0,
+    0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6,
+    0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf,
+    0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
+    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
+};
+
+void JamCRC::update(ArrayRef<char> Data) {
+  for (char Byte : Data) {
+    int TableIdx = (CRC ^ Byte) & 0xff;
+    CRC = CRCTable[TableIdx] ^ (CRC >> 8);
+  }
+}
diff --git a/lib/Support/Locale.cpp b/lib/Support/Locale.cpp
index d5cb72b5db3a..53bc0e36d830 100644
--- a/lib/Support/Locale.cpp
+++ b/lib/Support/Locale.cpp
@@ -1,3 +1,4 @@
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/Unicode.h"
 
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index b8fb2841e525..9868207b14ff 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
 #include <cassert>
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index d09ef3a4c0bc..faee10bb07cf 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -162,13 +162,14 @@ MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
 }
 
 ErrorOr<std::unique_ptr<MemoryBuffer>>
-MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize) {
+MemoryBuffer::getFileOrSTDIN(const Twine &Filename, int64_t FileSize,
+                             bool RequiresNullTerminator) {
   SmallString<256> NameBuf;
   StringRef NameRef = Filename.toStringRef(NameBuf);
 
   if (NameRef == "-")
     return getSTDIN();
-  return getFile(Filename, FileSize);
+  return getFile(Filename, FileSize, RequiresNullTerminator);
 }
 
 ErrorOr<std::unique_ptr<MemoryBuffer>>
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index cf467381db8c..4952f59fc24d 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -455,17 +455,15 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
   if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
   if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
 
-  for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
-                                                  e = components.end();
-                                                  i != e; ++i) {
+  for (auto &component : components) {
     bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
-    bool component_has_sep = !i->empty() && is_separator((*i)[0]);
-    bool is_root_name = has_root_name(*i);
+    bool component_has_sep = !component.empty() && is_separator(component[0]);
+    bool is_root_name = has_root_name(component);
 
     if (path_has_sep) {
       // Strip separators from beginning of component.
-      size_t loc = i->find_first_not_of(separators);
-      StringRef c = i->substr(loc);
+      size_t loc = component.find_first_not_of(separators);
+      StringRef c = component.substr(loc);
 
       // Append it.
       path.append(c.begin(), c.end());
@@ -477,7 +475,7 @@ void append(SmallVectorImpl<char> &path, const Twine &a,
       path.push_back(preferred_separator);
     }
 
-    path.append(i->begin(), i->end());
+    path.append(component.begin(), component.end());
   }
 }
 
@@ -661,8 +659,51 @@ bool is_absolute(const Twine &path) {
   return rootDir && rootName;
 }
 
-bool is_relative(const Twine &path) {
-  return !is_absolute(path);
+bool is_relative(const Twine &path) { return !is_absolute(path); }
+
+StringRef remove_leading_dotslash(StringRef Path) {
+  // Remove leading "./" (or ".//" or "././" etc.)
+  while (Path.size() > 2 && Path[0] == '.' && is_separator(Path[1])) {
+    Path = Path.substr(2);
+    while (Path.size() > 0 && is_separator(Path[0]))
+      Path = Path.substr(1);
+  }
+  return Path;
+}
+
+static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
+  SmallVector<StringRef, 16> components;
+
+  // Skip the root path, then look for traversal in the components.
+  StringRef rel = path::relative_path(path);
+  for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) {
+    if (C == ".")
+      continue;
+    if (remove_dot_dot) {
+      if (C == "..") {
+        if (!components.empty())
+          components.pop_back();
+        continue;
+      }
+    }
+    components.push_back(C);
+  }
+
+  SmallString<256> buffer = path::root_path(path);
+  for (StringRef C : components)
+    path::append(buffer, C);
+  return buffer;
+}
+
+bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot) {
+  StringRef p(path.data(), path.size());
+
+  SmallString<256> result = remove_dots(p, remove_dot_dot);
+  if (result == path)
+    return false;
+
+  path.swap(result);
+  return true;
 }
 
 } // end namespace path
@@ -732,7 +773,9 @@ std::error_code createUniqueDirectory(const Twine &Prefix,
                             true, 0, FS_Dir);
 }
 
-std::error_code make_absolute(SmallVectorImpl<char> &path) {
+static std::error_code make_absolute(const Twine &current_directory,
+                                     SmallVectorImpl<char> &path,
+                                     bool use_current_directory) {
   StringRef p(path.data(), path.size());
 
   bool rootDirectory = path::has_root_directory(p),
@@ -748,7 +791,9 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) {
 
   // All of the following conditions will need the current directory.
   SmallString<128> current_dir;
-  if (std::error_code ec = current_path(current_dir))
+  if (use_current_directory)
+    current_directory.toVector(current_dir);
+  else if (std::error_code ec = current_path(current_dir))
     return ec;
 
   // Relative path. Prepend the current directory.
@@ -785,12 +830,22 @@ std::error_code make_absolute(SmallVectorImpl<char> &path) {
                    "occurred above!");
 }
 
-std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
+std::error_code make_absolute(const Twine &current_directory,
+                              SmallVectorImpl<char> &path) {
+  return make_absolute(current_directory, path, true);
+}
+
+std::error_code make_absolute(SmallVectorImpl<char> &path) {
+  return make_absolute(Twine(), path, false);
+}
+
+std::error_code create_directories(const Twine &Path, bool IgnoreExisting,
+                                   perms Perms) {
   SmallString<128> PathStorage;
   StringRef P = Path.toStringRef(PathStorage);
 
   // Be optimistic and try to create the directory
-  std::error_code EC = create_directory(P, IgnoreExisting);
+  std::error_code EC = create_directory(P, IgnoreExisting, Perms);
   // If we succeeded, or had any error other than the parent not existing, just
   // return it.
   if (EC != errc::no_such_file_or_directory)
@@ -802,10 +857,10 @@ std::error_code create_directories(const Twine &Path, bool IgnoreExisting) {
   if (Parent.empty())
     return EC;
 
-  if ((EC = create_directories(Parent)))
+  if ((EC = create_directories(Parent, IgnoreExisting, Perms)))
       return EC;
 
-  return create_directory(P, IgnoreExisting);
+  return create_directory(P, IgnoreExisting, Perms);
 }
 
 std::error_code copy_file(const Twine &From, const Twine &To) {
@@ -889,8 +944,7 @@ std::error_code is_other(const Twine &Path, bool &Result) {
 }
 
 void directory_entry::replace_filename(const Twine &filename, file_status st) {
-  SmallString<128> path(Path.begin(), Path.end());
-  path::remove_filename(path);
+  SmallString<128> path = path::parent_path(Path);
   path::append(path, filename);
   Path = path.str();
   Status = st;
@@ -940,7 +994,8 @@ file_magic identify_magic(StringRef Magic) {
       break;
     case '!':
       if (Magic.size() >= 8)
-        if (memcmp(Magic.data(),"!<arch>\n",8) == 0)
+        if (memcmp(Magic.data(), "!<arch>\n", 8) == 0 ||
+            memcmp(Magic.data(), "!<thin>\n", 8) == 0)
           return file_magic::archive;
       break;
 
@@ -1074,3 +1129,20 @@ std::error_code directory_entry::status(file_status &result) const {
 #if defined(LLVM_ON_WIN32)
 #include "Windows/Path.inc"
 #endif
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+bool user_cache_directory(SmallVectorImpl<char> &Result, const Twine &Path1,
+                          const Twine &Path2, const Twine &Path3) {
+  if (getUserCacheDir(Result)) {
+    append(Result, Path1, Path2, Path3);
+    return true;
+  }
+  return false;
+}
+
+} // end namespace path
+} // end namsspace sys
+} // end namespace llvm
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index f9f8cab9d933..05b3e31644bd 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm-c/Core.h"
+#include "llvm-c/ErrorHandling.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"     // Get autoconf configuration settings
 #include "llvm/Support/Compiler.h"
@@ -154,6 +154,20 @@ void llvm::EnablePrettyStackTrace() {
 #endif
 }
 
+const void* llvm::SavePrettyStackState() {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+  return PrettyStackTraceHead;
+#else
+  return nullptr;
+#endif
+}
+
+void llvm::RestorePrettyStackState(const void* Top) {
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+  PrettyStackTraceHead = (const PrettyStackTraceEntry*)Top;
+#endif
+}
+
 void LLVMEnablePrettyStackTrace() {
   EnablePrettyStackTrace();
 }
diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp
index a11789372d93..3dc6b7c99d01 100644
--- a/lib/Support/Signals.cpp
+++ b/lib/Support/Signals.cpp
@@ -12,8 +12,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/Signals.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
 
 namespace llvm {
 using namespace sys;
@@ -23,6 +36,131 @@ using namespace sys;
 //===          independent code.
 //===----------------------------------------------------------------------===//
 
+static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
+    CallBacksToRun;
+void sys::RunSignalHandlers() {
+  if (!CallBacksToRun.isConstructed())
+    return;
+  for (auto &I : *CallBacksToRun)
+    I.first(I.second);
+  CallBacksToRun->clear();
+}
+}
+
+using namespace llvm;
+
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+                                  const char **Modules, intptr_t *Offsets,
+                                  const char *MainExecutableName,
+                                  StringSaver &StrPool);
+
+/// Format a pointer value as hexadecimal. Zero pad it out so its always the
+/// same width.
+static FormattedNumber format_ptr(void *PC) {
+  // Each byte is two hex digits plus 2 for the 0x prefix.
+  unsigned PtrWidth = 2 + 2 * sizeof(void *);
+  return format_hex((uint64_t)PC, PtrWidth);
+}
+
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+                                      llvm::raw_ostream &OS)
+  LLVM_ATTRIBUTE_USED;
+
+/// Helper that launches llvm-symbolizer and symbolizes a backtrace.
+static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
+                                      llvm::raw_ostream &OS) {
+  // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
+  // into actual instruction addresses.
+  // Use llvm-symbolizer tool to symbolize the stack traces.
+  ErrorOr<std::string> LLVMSymbolizerPathOrErr =
+      sys::findProgramByName("llvm-symbolizer");
+  if (!LLVMSymbolizerPathOrErr)
+    return false;
+  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
+  // We don't know argv0 or the address of main() at this point, but try
+  // to guess it anyway (it's possible on some platforms).
+  std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
+  if (MainExecutableName.empty() ||
+      MainExecutableName.find("llvm-symbolizer") != std::string::npos)
+    return false;
+
+  BumpPtrAllocator Allocator;
+  StringSaver StrPool(Allocator);
+  std::vector<const char *> Modules(Depth, nullptr);
+  std::vector<intptr_t> Offsets(Depth, 0);
+  if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
+                             MainExecutableName.c_str(), StrPool))
+    return false;
+  int InputFD;
+  SmallString<32> InputFile, OutputFile;
+  sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
+  sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
+  FileRemover InputRemover(InputFile.c_str());
+  FileRemover OutputRemover(OutputFile.c_str());
+
+  {
+    raw_fd_ostream Input(InputFD, true);
+    for (int i = 0; i < Depth; i++) {
+      if (Modules[i])
+        Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
+    }
+  }
+
+  StringRef InputFileStr(InputFile);
+  StringRef OutputFileStr(OutputFile);
+  StringRef StderrFileStr;
+  const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr,
+                                  &StderrFileStr};
+  const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
+#ifdef LLVM_ON_WIN32
+                        // Pass --relative-address on Windows so that we don't
+                        // have to add ImageBase from PE file.
+                        // FIXME: Make this the default for llvm-symbolizer.
+                        "--relative-address",
+#endif
+                        "--demangle", nullptr};
+  int RunResult =
+      sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects);
+  if (RunResult != 0)
+    return false;
+
+  // This report format is based on the sanitizer stack trace printer.  See
+  // sanitizer_stacktrace_printer.cc in compiler-rt.
+  auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
+  if (!OutputBuf)
+    return false;
+  StringRef Output = OutputBuf.get()->getBuffer();
+  SmallVector<StringRef, 32> Lines;
+  Output.split(Lines, "\n");
+  auto CurLine = Lines.begin();
+  int frame_no = 0;
+  for (int i = 0; i < Depth; i++) {
+    if (!Modules[i]) {
+      OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << '\n';
+      continue;
+    }
+    // Read pairs of lines (function name and file/line info) until we
+    // encounter empty line.
+    for (;;) {
+      if (CurLine == Lines.end())
+        return false;
+      StringRef FunctionName = *CurLine++;
+      if (FunctionName.empty())
+        break;
+      OS << '#' << frame_no++ << ' ' << format_ptr(StackTrace[i]) << ' ';
+      if (!FunctionName.startswith("??"))
+        OS << FunctionName << ' ';
+      if (CurLine == Lines.end())
+        return false;
+      StringRef FileLineInfo = *CurLine++;
+      if (!FileLineInfo.startswith("??"))
+        OS << FileLineInfo;
+      else
+        OS << "(" << Modules[i] << '+' << format_hex(Offsets[i], 0) << ")";
+      OS << "\n";
+    }
+  }
+  return true;
 }
 
 // Include the platform-specific parts of this class.
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 56c3b0f5659f..e49d1cbe0637 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -33,9 +34,6 @@
 #include <cstring>
 using namespace llvm;
 
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
 /// -stats - Command line option to cause transformations to emit stats about
 /// what they did.
 ///
@@ -144,20 +142,18 @@ void llvm::PrintStatistics() {
   if (Stats.Stats.empty()) return;
 
   // Get the stream to write to.
-  raw_ostream &OutStream = *CreateInfoOutputFile();
-  PrintStatistics(OutStream);
-  delete &OutStream;   // Close the file.
+  std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
+  PrintStatistics(*OutStream);
+
 #else
   // Check if the -stats option is set instead of checking
   // !Stats.Stats.empty().  In release builds, Statistics operators
   // do nothing, so stats are never Registered.
   if (Enabled) {
     // Get the stream to write to.
-    raw_ostream &OutStream = *CreateInfoOutputFile();
-    OutStream << "Statistics are disabled.  "
-            << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
-    OutStream.flush();
-    delete &OutStream;   // Close the file.
+    std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
+    (*OutStream) << "Statistics are disabled.  "
+                 << "Build with asserts or with -DLLVM_ENABLE_STATS\n";
   }
 #endif
 }
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ddece087a9e7..7ecff2964c51 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -140,37 +140,44 @@ std::string StringRef::upper() const {
 /// \return - The index of the first occurrence of \arg Str, or npos if not
 /// found.
 size_t StringRef::find(StringRef Str, size_t From) const {
-  size_t N = Str.size();
-  if (N > Length)
+  if (From > Length)
     return npos;
 
+  const char *Needle = Str.data();
+  size_t N = Str.size();
+  if (N == 0)
+    return From;
+
+  size_t Size = Length - From;
+  if (Size < N)
+    return npos;
+
+  const char *Start = Data + From;
+  const char *Stop = Start + (Size - N + 1);
+
   // For short haystacks or unsupported needles fall back to the naive algorithm
-  if (Length < 16 || N > 255 || N == 0) {
-    for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
-      if (substr(i, N).equals(Str))
-        return i;
+  if (Size < 16 || N > 255) {
+    do {
+      if (std::memcmp(Start, Needle, N) == 0)
+        return Start - Data;
+      ++Start;
+    } while (Start < Stop);
     return npos;
   }
 
-  if (From >= Length)
-    return npos;
-
   // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
   uint8_t BadCharSkip[256];
   std::memset(BadCharSkip, N, 256);
   for (unsigned i = 0; i != N-1; ++i)
     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
 
-  unsigned Len = Length-From, Pos = From;
-  while (Len >= N) {
-    if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
-      return Pos;
+  do {
+    if (std::memcmp(Start, Needle, N) == 0)
+      return Start - Data;
 
     // Otherwise skip the appropriate number of bytes.
-    uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
-    Len -= Skip;
-    Pos += Skip;
-  }
+    Start += BadCharSkip[(uint8_t)Start[N-1]];
+  } while (Start < Stop);
 
   return npos;
 }
@@ -274,24 +281,56 @@ StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
 }
 
 void StringRef::split(SmallVectorImpl<StringRef> &A,
-                      StringRef Separators, int MaxSplit,
+                      StringRef Separator, int MaxSplit,
                       bool KeepEmpty) const {
-  StringRef rest = *this;
+  StringRef S = *this;
 
-  // rest.data() is used to distinguish cases like "a," that splits into
-  // "a" + "" and "a" that splits into "a" + 0.
-  for (int splits = 0;
-       rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
-       ++splits) {
-    std::pair<StringRef, StringRef> p = rest.split(Separators);
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
 
-    if (KeepEmpty || p.first.size() != 0)
-      A.push_back(p.first);
-    rest = p.second;
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + Separator.size(), npos);
   }
-  // If we have a tail left, add it.
-  if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
-    A.push_back(rest);
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
+}
+
+void StringRef::split(SmallVectorImpl<StringRef> &A, char Separator,
+                      int MaxSplit, bool KeepEmpty) const {
+  StringRef S = *this;
+
+  // Count down from MaxSplit. When MaxSplit is -1, this will just split
+  // "forever". This doesn't support splitting more than 2^31 times
+  // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
+  // but that seems unlikely to be useful.
+  while (MaxSplit-- != 0) {
+    size_t Idx = S.find(Separator);
+    if (Idx == npos)
+      break;
+
+    // Push this split.
+    if (KeepEmpty || Idx > 0)
+      A.push_back(S.slice(0, Idx));
+
+    // Jump forward.
+    S = S.slice(Idx + 1, npos);
+  }
+
+  // Push the tail.
+  if (KeepEmpty || !S.empty())
+    A.push_back(S);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Support/StringSaver.cpp b/lib/Support/StringSaver.cpp
index d6b84e53dccd..bbc1fd276266 100644
--- a/lib/Support/StringSaver.cpp
+++ b/lib/Support/StringSaver.cpp
@@ -11,7 +11,7 @@
 
 using namespace llvm;
 
-const char *StringSaver::saveImpl(StringRef S) {
+const char *StringSaver::save(StringRef S) {
   char *P = Alloc.Allocate<char>(S.size() + 1);
   memcpy(P, S.data(), S.size());
   P[S.size()] = '\0';
diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp
index 4d4c041b8435..337532edbbc6 100644
--- a/lib/Support/TargetParser.cpp
+++ b/lib/Support/TargetParser.cpp
@@ -16,9 +16,11 @@
 #include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include <cctype>
 
 using namespace llvm;
+using namespace ARM;
 
 namespace {
 
@@ -26,36 +28,19 @@ namespace {
 // features they correspond to (use getFPUFeatures).
 // FIXME: TableGen this.
 // The entries must appear in the order listed in ARM::FPUKind for correct indexing
-struct {
-  const char * Name;
+static const struct {
+  const char *NameCStr;
+  size_t NameLength;
   ARM::FPUKind ID;
   ARM::FPUVersion FPUVersion;
   ARM::NeonSupportLevel NeonSupport;
   ARM::FPURestriction Restriction;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
 } FPUNames[] = {
-  { "invalid",        ARM::FK_INVALID,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
-  { "none",           ARM::FK_NONE,           ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
-  { "vfp",            ARM::FK_VFP,            ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
-  { "vfpv2",          ARM::FK_VFPV2,          ARM::FV_VFPV2,      ARM::NS_None,   ARM::FR_None},
-  { "vfpv3",          ARM::FK_VFPV3,          ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_None},
-  { "vfpv3-fp16",     ARM::FK_VFPV3_FP16,     ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_None},
-  { "vfpv3-d16",      ARM::FK_VFPV3_D16,      ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_D16},
-  { "vfpv3-d16-fp16", ARM::FK_VFPV3_D16_FP16, ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_D16},
-  { "vfpv3xd",        ARM::FK_VFPV3XD,        ARM::FV_VFPV3,      ARM::NS_None,   ARM::FR_SP_D16},
-  { "vfpv3xd-fp16",   ARM::FK_VFPV3XD_FP16,   ARM::FV_VFPV3_FP16, ARM::NS_None,   ARM::FR_SP_D16},
-  { "vfpv4",          ARM::FK_VFPV4,          ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_None},
-  { "vfpv4-d16",      ARM::FK_VFPV4_D16,      ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_D16},
-  { "fpv4-sp-d16",    ARM::FK_FPV4_SP_D16,    ARM::FV_VFPV4,      ARM::NS_None,   ARM::FR_SP_D16},
-  { "fpv5-d16",       ARM::FK_FPV5_D16,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_D16},
-  { "fpv5-sp-d16",    ARM::FK_FPV5_SP_D16,    ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_SP_D16},
-  { "fp-armv8",       ARM::FK_FP_ARMV8,       ARM::FV_VFPV5,      ARM::NS_None,   ARM::FR_None},
-  { "neon",           ARM::FK_NEON,           ARM::FV_VFPV3,      ARM::NS_Neon,   ARM::FR_None},
-  { "neon-fp16",      ARM::FK_NEON_FP16,      ARM::FV_VFPV3_FP16, ARM::NS_Neon,   ARM::FR_None},
-  { "neon-vfpv4",     ARM::FK_NEON_VFPV4,     ARM::FV_VFPV4,      ARM::NS_Neon,   ARM::FR_None},
-  { "neon-fp-armv8",  ARM::FK_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Neon,   ARM::FR_None},
-  { "crypto-neon-fp-armv8",
-               ARM::FK_CRYPTO_NEON_FP_ARMV8,  ARM::FV_VFPV5,      ARM::NS_Crypto, ARM::FR_None},
-  { "softvfp",        ARM::FK_SOFTVFP,        ARM::FV_NONE,       ARM::NS_None,   ARM::FR_None},
+#define ARM_FPU(NAME, KIND, VERSION, NEON_SUPPORT, RESTRICTION) \
+  { NAME, sizeof(NAME) - 1, KIND, VERSION, NEON_SUPPORT, RESTRICTION },
+#include "llvm/Support/ARMTargetParser.def"
 };
 
 // List of canonical arch names (use getArchSynonym).
@@ -66,165 +51,79 @@ struct {
 // of the triples and are not conforming with their official names.
 // Check to see if the expectation should be changed.
 // FIXME: TableGen this.
-struct {
-  const char *Name;
+static const struct {
+  const char *NameCStr;
+  size_t NameLength;
+  const char *CPUAttrCStr;
+  size_t CPUAttrLength;
+  const char *SubArchCStr;
+  size_t SubArchLength;
+  unsigned DefaultFPU;
+  unsigned ArchBaseExtensions;
   ARM::ArchKind ID;
-  const char *CPUAttr; // CPU class in build attributes.
-  const char *SubArch; // Sub-Arch name.
   ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+
+  // CPU class in build attributes.
+  StringRef getCPUAttr() const { return StringRef(CPUAttrCStr, CPUAttrLength); }
+
+  // Sub-Arch name.
+  StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
 } ARCHNames[] = {
-  { "invalid",   ARM::AK_INVALID,  nullptr,   nullptr, ARMBuildAttrs::CPUArch::Pre_v4 },
-  { "armv2",     ARM::AK_ARMV2,    "2",       "v2",    ARMBuildAttrs::CPUArch::Pre_v4 },
-  { "armv2a",    ARM::AK_ARMV2A,   "2A",      "v2a",   ARMBuildAttrs::CPUArch::Pre_v4 },
-  { "armv3",     ARM::AK_ARMV3,    "3",       "v3",    ARMBuildAttrs::CPUArch::Pre_v4 },
-  { "armv3m",    ARM::AK_ARMV3M,   "3M",      "v3m",   ARMBuildAttrs::CPUArch::Pre_v4 },
-  { "armv4",     ARM::AK_ARMV4,    "4",       "v4",    ARMBuildAttrs::CPUArch::v4 },
-  { "armv4t",    ARM::AK_ARMV4T,   "4T",      "v4t",   ARMBuildAttrs::CPUArch::v4T },
-  { "armv5t",    ARM::AK_ARMV5T,   "5T",      "v5",    ARMBuildAttrs::CPUArch::v5T },
-  { "armv5te",   ARM::AK_ARMV5TE,  "5TE",     "v5e",   ARMBuildAttrs::CPUArch::v5TE },
-  { "armv5tej",  ARM::AK_ARMV5TEJ, "5TEJ",    "v5e",   ARMBuildAttrs::CPUArch::v5TEJ },
-  { "armv6",     ARM::AK_ARMV6,    "6",       "v6",    ARMBuildAttrs::CPUArch::v6 },
-  { "armv6k",    ARM::AK_ARMV6K,   "6K",      "v6k",   ARMBuildAttrs::CPUArch::v6K },
-  { "armv6t2",   ARM::AK_ARMV6T2,  "6T2",     "v6t2",  ARMBuildAttrs::CPUArch::v6T2 },
-  { "armv6z",    ARM::AK_ARMV6Z,   "6Z",      "v6z",   ARMBuildAttrs::CPUArch::v6KZ },
-  { "armv6zk",   ARM::AK_ARMV6ZK,  "6ZK",     "v6zk",  ARMBuildAttrs::CPUArch::v6KZ },
-  { "armv6-m",   ARM::AK_ARMV6M,   "6-M",     "v6m",   ARMBuildAttrs::CPUArch::v6_M },
-  { "armv6s-m",  ARM::AK_ARMV6SM,  "6S-M",    "v6sm",  ARMBuildAttrs::CPUArch::v6S_M },
-  { "armv7-a",   ARM::AK_ARMV7A,   "7-A",     "v7",    ARMBuildAttrs::CPUArch::v7 },
-  { "armv7-r",   ARM::AK_ARMV7R,   "7-R",     "v7r",   ARMBuildAttrs::CPUArch::v7 },
-  { "armv7-m",   ARM::AK_ARMV7M,   "7-M",     "v7m",   ARMBuildAttrs::CPUArch::v7 },
-  { "armv7e-m",  ARM::AK_ARMV7EM,  "7E-M",    "v7em",  ARMBuildAttrs::CPUArch::v7E_M },
-  { "armv8-a",   ARM::AK_ARMV8A,   "8-A",     "v8",    ARMBuildAttrs::CPUArch::v8 },
-  { "armv8.1-a", ARM::AK_ARMV8_1A, "8.1-A",   "v8.1a", ARMBuildAttrs::CPUArch::v8 },
-  // Non-standard Arch names.
-  { "iwmmxt",    ARM::AK_IWMMXT,   "iwmmxt",  "",      ARMBuildAttrs::CPUArch::v5TE },
-  { "iwmmxt2",   ARM::AK_IWMMXT2,  "iwmmxt2", "",      ARMBuildAttrs::CPUArch::v5TE },
-  { "xscale",    ARM::AK_XSCALE,   "xscale",  "",      ARMBuildAttrs::CPUArch::v5TE },
-  { "armv5",     ARM::AK_ARMV5,    "5T",      "v5",    ARMBuildAttrs::CPUArch::v5T },
-  { "armv5e",    ARM::AK_ARMV5E,   "5TE",     "v5e",   ARMBuildAttrs::CPUArch::v5TE },
-  { "armv6j",    ARM::AK_ARMV6J,   "6J",      "v6",    ARMBuildAttrs::CPUArch::v6 },
-  { "armv6hl",   ARM::AK_ARMV6HL,  "6-M",     "v6hl",  ARMBuildAttrs::CPUArch::v6_M },
-  { "armv7",     ARM::AK_ARMV7,    "7",       "v7",    ARMBuildAttrs::CPUArch::v7 },
-  { "armv7l",    ARM::AK_ARMV7L,   "7-L",     "v7l",   ARMBuildAttrs::CPUArch::v7 },
-  { "armv7hl",   ARM::AK_ARMV7HL,  "7-L",     "v7hl",  ARMBuildAttrs::CPUArch::v7 },
-  { "armv7s",    ARM::AK_ARMV7S,   "7-S",     "v7s",   ARMBuildAttrs::CPUArch::v7 }
+#define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
+  {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
+   sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ID, ARCH_ATTR},
+#include "llvm/Support/ARMTargetParser.def"
 };
+
 // List of Arch Extension names.
 // FIXME: TableGen this.
-struct {
-  const char *Name;
-  ARM::ArchExtKind ID;
+static const struct {
+  const char *NameCStr;
+  size_t NameLength;
+  unsigned ID;
+  const char *Feature;
+  const char *NegFeature;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
 } ARCHExtNames[] = {
-  { "invalid",  ARM::AEK_INVALID },
-  { "crc",      ARM::AEK_CRC },
-  { "crypto",   ARM::AEK_CRYPTO },
-  { "fp",       ARM::AEK_FP },
-  { "idiv",     ARM::AEK_HWDIV },
-  { "mp",       ARM::AEK_MP },
-  { "simd",     ARM::AEK_SIMD },
-  { "sec",      ARM::AEK_SEC },
-  { "virt",     ARM::AEK_VIRT },
-  { "os",       ARM::AEK_OS },
-  { "iwmmxt",   ARM::AEK_IWMMXT },
-  { "iwmmxt2",  ARM::AEK_IWMMXT2 },
-  { "maverick", ARM::AEK_MAVERICK },
-  { "xscale",   ARM::AEK_XSCALE }
+#define ARM_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE) \
+  { NAME, sizeof(NAME) - 1, ID, FEATURE, NEGFEATURE },
+#include "llvm/Support/ARMTargetParser.def"
 };
+
+// List of HWDiv names (use getHWDivSynonym) and which architectural
+// features they correspond to (use getHWDivFeatures).
+// FIXME: TableGen this.
+static const struct {
+  const char *NameCStr;
+  size_t NameLength;
+  unsigned ID;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
+} HWDivNames[] = {
+#define ARM_HW_DIV_NAME(NAME, ID) { NAME, sizeof(NAME) - 1, ID },
+#include "llvm/Support/ARMTargetParser.def"
+};
+
 // List of CPU names and their arches.
 // The same CPU can have multiple arches and can be default on multiple arches.
 // When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
 // When this becomes table-generated, we'd probably need two tables.
 // FIXME: TableGen this.
-struct {
-  const char *Name;
+static const struct {
+  const char *NameCStr;
+  size_t NameLength;
   ARM::ArchKind ArchID;
-  bool Default;
+  bool Default; // is $Name the default CPU for $ArchID ?
+  unsigned DefaultExtensions;
+
+  StringRef getName() const { return StringRef(NameCStr, NameLength); }
 } CPUNames[] = {
-  { "arm2",          ARM::AK_ARMV2,    true },
-  { "arm3",          ARM::AK_ARMV2A,   true },
-  { "arm6",          ARM::AK_ARMV3,    true },
-  { "arm7m",         ARM::AK_ARMV3M,   true },
-  { "arm8",          ARM::AK_ARMV4,    false },
-  { "arm810",        ARM::AK_ARMV4,    false },
-  { "strongarm",     ARM::AK_ARMV4,    true },
-  { "strongarm110",  ARM::AK_ARMV4,    false },
-  { "strongarm1100", ARM::AK_ARMV4,    false },
-  { "strongarm1110", ARM::AK_ARMV4,    false },
-  { "arm7tdmi",      ARM::AK_ARMV4T,   true },
-  { "arm7tdmi-s",    ARM::AK_ARMV4T,   false },
-  { "arm710t",       ARM::AK_ARMV4T,   false },
-  { "arm720t",       ARM::AK_ARMV4T,   false },
-  { "arm9",          ARM::AK_ARMV4T,   false },
-  { "arm9tdmi",      ARM::AK_ARMV4T,   false },
-  { "arm920",        ARM::AK_ARMV4T,   false },
-  { "arm920t",       ARM::AK_ARMV4T,   false },
-  { "arm922t",       ARM::AK_ARMV4T,   false },
-  { "arm9312",       ARM::AK_ARMV4T,   false },
-  { "arm940t",       ARM::AK_ARMV4T,   false },
-  { "ep9312",        ARM::AK_ARMV4T,   false },
-  { "arm10tdmi",     ARM::AK_ARMV5T,   true },
-  { "arm1020t",      ARM::AK_ARMV5T,   false },
-  { "arm9e",         ARM::AK_ARMV5TE,  false },
-  { "arm946e-s",     ARM::AK_ARMV5TE,  false },
-  { "arm966e-s",     ARM::AK_ARMV5TE,  false },
-  { "arm968e-s",     ARM::AK_ARMV5TE,  false },
-  { "arm10e",        ARM::AK_ARMV5TE,  false },
-  { "arm1020e",      ARM::AK_ARMV5TE,  false },
-  { "arm1022e",      ARM::AK_ARMV5TE,  true },
-  { "iwmmxt",        ARM::AK_ARMV5TE,  false },
-  { "xscale",        ARM::AK_ARMV5TE,  false },
-  { "arm926ej-s",    ARM::AK_ARMV5TEJ, true },
-  { "arm1136jf-s",   ARM::AK_ARMV6,    true },
-  { "arm1176j-s",    ARM::AK_ARMV6K,   false },
-  { "arm1176jz-s",   ARM::AK_ARMV6K,   false },
-  { "mpcore",        ARM::AK_ARMV6K,   false },
-  { "mpcorenovfp",   ARM::AK_ARMV6K,   false },
-  { "arm1176jzf-s",  ARM::AK_ARMV6K,   true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6Z,   true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6ZK,  true },
-  { "arm1156t2-s",   ARM::AK_ARMV6T2,  true },
-  { "arm1156t2f-s",  ARM::AK_ARMV6T2,  false },
-  { "cortex-m0",     ARM::AK_ARMV6M,   true },
-  { "cortex-m0plus", ARM::AK_ARMV6M,   false },
-  { "cortex-m1",     ARM::AK_ARMV6M,   false },
-  { "sc000",         ARM::AK_ARMV6M,   false },
-  { "cortex-a5",     ARM::AK_ARMV7A,   false },
-  { "cortex-a7",     ARM::AK_ARMV7A,   false },
-  { "cortex-a8",     ARM::AK_ARMV7A,   true },
-  { "cortex-a9",     ARM::AK_ARMV7A,   false },
-  { "cortex-a12",    ARM::AK_ARMV7A,   false },
-  { "cortex-a15",    ARM::AK_ARMV7A,   false },
-  { "cortex-a17",    ARM::AK_ARMV7A,   false },
-  { "krait",         ARM::AK_ARMV7A,   false },
-  { "cortex-r4",     ARM::AK_ARMV7R,   true },
-  { "cortex-r4f",    ARM::AK_ARMV7R,   false },
-  { "cortex-r5",     ARM::AK_ARMV7R,   false },
-  { "cortex-r7",     ARM::AK_ARMV7R,   false },
-  { "sc300",         ARM::AK_ARMV7M,   false },
-  { "cortex-m3",     ARM::AK_ARMV7M,   true },
-  { "cortex-m4",     ARM::AK_ARMV7EM,  true },
-  { "cortex-m7",     ARM::AK_ARMV7EM,  false },
-  { "cortex-a53",    ARM::AK_ARMV8A,   true },
-  { "cortex-a57",    ARM::AK_ARMV8A,   false },
-  { "cortex-a72",    ARM::AK_ARMV8A,   false },
-  { "cyclone",       ARM::AK_ARMV8A,   false },
-  { "generic",       ARM::AK_ARMV8_1A, true },
-  // Non-standard Arch names.
-  { "iwmmxt",        ARM::AK_IWMMXT,   true },
-  { "xscale",        ARM::AK_XSCALE,   true },
-  { "arm10tdmi",     ARM::AK_ARMV5,    true },
-  { "arm1022e",      ARM::AK_ARMV5E,   true },
-  { "arm1136j-s",    ARM::AK_ARMV6J,   true },
-  { "arm1136jz-s",   ARM::AK_ARMV6J,   false },
-  { "cortex-m0",     ARM::AK_ARMV6SM,  true },
-  { "arm1176jzf-s",  ARM::AK_ARMV6HL,  true },
-  { "cortex-a8",     ARM::AK_ARMV7,    true },
-  { "cortex-a8",     ARM::AK_ARMV7L,   true },
-  { "cortex-a8",     ARM::AK_ARMV7HL,  true },
-  { "cortex-m4",     ARM::AK_ARMV7EM,  true },
-  { "swift",         ARM::AK_ARMV7S,   true },
-  // Invalid CPU
-  { "invalid",       ARM::AK_INVALID,  true }
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+  { NAME, sizeof(NAME) - 1, ID, IS_DEFAULT, DEFAULT_EXT },
+#include "llvm/Support/ARMTargetParser.def"
 };
 
 } // namespace
@@ -233,33 +132,93 @@ struct {
 // Information by ID
 // ======================================================= //
 
-const char *ARMTargetParser::getFPUName(unsigned FPUKind) {
+StringRef llvm::ARM::getFPUName(unsigned FPUKind) {
   if (FPUKind >= ARM::FK_LAST)
-    return nullptr;
-  return FPUNames[FPUKind].Name;
+    return StringRef();
+  return FPUNames[FPUKind].getName();
 }
 
-unsigned ARMTargetParser::getFPUVersion(unsigned FPUKind) {
+unsigned llvm::ARM::getFPUVersion(unsigned FPUKind) {
   if (FPUKind >= ARM::FK_LAST)
     return 0;
   return FPUNames[FPUKind].FPUVersion;
 }
 
-unsigned ARMTargetParser::getFPUNeonSupportLevel(unsigned FPUKind) {
+unsigned llvm::ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
   if (FPUKind >= ARM::FK_LAST)
     return 0;
   return FPUNames[FPUKind].NeonSupport;
 }
 
-unsigned ARMTargetParser::getFPURestriction(unsigned FPUKind) {
+unsigned llvm::ARM::getFPURestriction(unsigned FPUKind) {
   if (FPUKind >= ARM::FK_LAST)
     return 0;
   return FPUNames[FPUKind].Restriction;
 }
 
-bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
+unsigned llvm::ARM::getDefaultFPU(StringRef CPU, unsigned ArchKind) {
+  if (CPU == "generic")
+    return ARCHNames[ArchKind].DefaultFPU;
+
+  return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+    .Case(NAME, DEFAULT_FPU)
+#include "llvm/Support/ARMTargetParser.def"
+    .Default(ARM::FK_INVALID);
+}
+
+unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, unsigned ArchKind) {
+  if (CPU == "generic")
+    return ARCHNames[ArchKind].ArchBaseExtensions;
+
+  return StringSwitch<unsigned>(CPU)
+#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+    .Case(NAME, ARCHNames[ID].ArchBaseExtensions | DEFAULT_EXT)
+#include "llvm/Support/ARMTargetParser.def"
+    .Default(ARM::AEK_INVALID);
+}
+
+bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
+                                 std::vector<const char *> &Features) {
+
+  if (HWDivKind == ARM::AEK_INVALID)
+    return false;
+
+  if (HWDivKind & ARM::AEK_HWDIVARM)
+    Features.push_back("+hwdiv-arm");
+  else
+    Features.push_back("-hwdiv-arm");
+
+  if (HWDivKind & ARM::AEK_HWDIV)
+    Features.push_back("+hwdiv");
+  else
+    Features.push_back("-hwdiv");
+
+  return true;
+}
+
+bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
                                      std::vector<const char *> &Features) {
 
+  if (Extensions == ARM::AEK_INVALID)
+    return false;
+
+  if (Extensions & ARM::AEK_CRC)
+    Features.push_back("+crc");
+  else
+    Features.push_back("-crc");
+
+  if (Extensions & ARM::AEK_DSP)
+    Features.push_back("+dsp");
+  else
+    Features.push_back("-dsp");
+
+  return getHWDivFeatures(Extensions, Features);
+}
+
+bool llvm::ARM::getFPUFeatures(unsigned FPUKind,
+                               std::vector<const char *> &Features) {
+
   if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
     return false;
 
@@ -323,6 +282,7 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
   // crypto includes neon, so we handle this similarly to FPU version.
   switch (FPUNames[FPUKind].NeonSupport) {
   case ARM::NS_Crypto:
+    Features.push_back("+neon");
     Features.push_back("+crypto");
     break;
   case ARM::NS_Neon:
@@ -338,88 +298,127 @@ bool ARMTargetParser::getFPUFeatures(unsigned FPUKind,
   return true;
 }
 
-const char *ARMTargetParser::getArchName(unsigned ArchKind) {
+StringRef llvm::ARM::getArchName(unsigned ArchKind) {
   if (ArchKind >= ARM::AK_LAST)
-    return nullptr;
-  return ARCHNames[ArchKind].Name;
+    return StringRef();
+  return ARCHNames[ArchKind].getName();
 }
 
-const char *ARMTargetParser::getCPUAttr(unsigned ArchKind) {
+StringRef llvm::ARM::getCPUAttr(unsigned ArchKind) {
   if (ArchKind >= ARM::AK_LAST)
-    return nullptr;
-  return ARCHNames[ArchKind].CPUAttr;
+    return StringRef();
+  return ARCHNames[ArchKind].getCPUAttr();
 }
 
-const char *ARMTargetParser::getSubArch(unsigned ArchKind) {
+StringRef llvm::ARM::getSubArch(unsigned ArchKind) {
   if (ArchKind >= ARM::AK_LAST)
-    return nullptr;
-  return ARCHNames[ArchKind].SubArch;
+    return StringRef();
+  return ARCHNames[ArchKind].getSubArch();
 }
 
-unsigned ARMTargetParser::getArchAttr(unsigned ArchKind) {
+unsigned llvm::ARM::getArchAttr(unsigned ArchKind) {
   if (ArchKind >= ARM::AK_LAST)
     return ARMBuildAttrs::CPUArch::Pre_v4;
   return ARCHNames[ArchKind].ArchAttr;
 }
 
-const char *ARMTargetParser::getArchExtName(unsigned ArchExtKind) {
-  if (ArchExtKind >= ARM::AEK_LAST)
-    return nullptr;
-  return ARCHExtNames[ArchExtKind].Name;
+StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) {
+  for (const auto AE : ARCHExtNames) {
+    if (ArchExtKind == AE.ID)
+      return AE.getName();
+  }
+  return StringRef();
 }
 
-const char *ARMTargetParser::getDefaultCPU(StringRef Arch) {
+const char *llvm::ARM::getArchExtFeature(StringRef ArchExt) {
+  if (ArchExt.startswith("no")) {
+    StringRef ArchExtBase(ArchExt.substr(2));
+    for (const auto AE : ARCHExtNames) {
+      if (AE.NegFeature && ArchExtBase == AE.getName())
+        return AE.NegFeature;
+    }
+  }
+  for (const auto AE : ARCHExtNames) {
+    if (AE.Feature && ArchExt == AE.getName())
+      return AE.Feature;
+  }
+
+  return nullptr;
+}
+
+StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) {
+  for (const auto D : HWDivNames) {
+    if (HWDivKind == D.ID)
+      return D.getName();
+  }
+  return StringRef();
+}
+
+StringRef llvm::ARM::getDefaultCPU(StringRef Arch) {
   unsigned AK = parseArch(Arch);
   if (AK == ARM::AK_INVALID)
-    return nullptr;
+    return StringRef();
 
   // Look for multiple AKs to find the default for pair AK+Name.
   for (const auto CPU : CPUNames) {
     if (CPU.ArchID == AK && CPU.Default)
-      return CPU.Name;
+      return CPU.getName();
   }
-  return nullptr;
+
+  // If we can't find a default then target the architecture instead
+  return "generic";
 }
 
 // ======================================================= //
 // Parsers
 // ======================================================= //
 
-StringRef ARMTargetParser::getFPUSynonym(StringRef FPU) {
-  return StringSwitch<StringRef>(FPU)
-    .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
-    .Case("vfp2", "vfpv2")
-    .Case("vfp3", "vfpv3")
-    .Case("vfp4", "vfpv4")
-    .Case("vfp3-d16", "vfpv3-d16")
-    .Case("vfp4-d16", "vfpv4-d16")
-    .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
-    .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
-    .Case("fp5-sp-d16", "fpv5-sp-d16")
-    .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
-    // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
-    .Case("neon-vfpv3", "neon")
-    .Default(FPU);
+static StringRef getHWDivSynonym(StringRef HWDiv) {
+  return StringSwitch<StringRef>(HWDiv)
+      .Case("thumb,arm", "arm,thumb")
+      .Default(HWDiv);
 }
 
-StringRef ARMTargetParser::getArchSynonym(StringRef Arch) {
+static StringRef getFPUSynonym(StringRef FPU) {
+  return StringSwitch<StringRef>(FPU)
+      .Cases("fpa", "fpe2", "fpe3", "maverick", "invalid") // Unsupported
+      .Case("vfp2", "vfpv2")
+      .Case("vfp3", "vfpv3")
+      .Case("vfp4", "vfpv4")
+      .Case("vfp3-d16", "vfpv3-d16")
+      .Case("vfp4-d16", "vfpv4-d16")
+      .Cases("fp4-sp-d16", "vfpv4-sp-d16", "fpv4-sp-d16")
+      .Cases("fp4-dp-d16", "fpv4-dp-d16", "vfpv4-d16")
+      .Case("fp5-sp-d16", "fpv5-sp-d16")
+      .Cases("fp5-dp-d16", "fpv5-dp-d16", "fpv5-d16")
+      // FIXME: Clang uses it, but it's bogus, since neon defaults to vfpv3.
+      .Case("neon-vfpv3", "neon")
+      .Default(FPU);
+}
+
+static StringRef getArchSynonym(StringRef Arch) {
   return StringSwitch<StringRef>(Arch)
-    .Case("v6sm", "v6s-m")
-    .Case("v6m", "v6-m")
-    .Case("v7a", "v7-a")
-    .Case("v7r", "v7-r")
-    .Case("v7m", "v7-m")
-    .Case("v7em", "v7e-m")
-    .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
-    .Case("v8.1a", "v8.1-a")
-    .Default(Arch);
+      .Case("v5", "v5t")
+      .Case("v5e", "v5te")
+      .Case("v6j", "v6")
+      .Case("v6hl", "v6k")
+      .Cases("v6m", "v6sm", "v6s-m", "v6-m")
+      .Cases("v6z", "v6zk", "v6kz")
+      .Cases("v7", "v7a", "v7hl", "v7l", "v7-a")
+      .Case("v7r", "v7-r")
+      .Case("v7m", "v7-m")
+      .Case("v7em", "v7e-m")
+      .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
+      .Case("v8.1a", "v8.1-a")
+      .Case("v8.2a", "v8.2-a")
+      .Default(Arch);
 }
 
 // MArch is expected to be of the form (arm|thumb)?(eb)?(v.+)?(eb)?, but
 // (iwmmxt|xscale)(eb)? is also permitted. If the former, return
 // "v.+", if the latter, return unmodified string, minus 'eb'.
 // If invalid, return empty string.
-StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
+StringRef llvm::ARM::getCanonicalArchName(StringRef Arch) {
   size_t offset = StringRef::npos;
   StringRef A = Arch;
   StringRef Error = "";
@@ -436,7 +435,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
     // AArch64 uses "_be", not "eb" suffix.
     if (A.find("eb") != StringRef::npos)
       return Error;
-    if (A.substr(offset,3) == "_be")
+    if (A.substr(offset, 3) == "_be")
       offset += 3;
   }
 
@@ -456,7 +455,7 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
 
   // Only match non-marketing names
   if (offset != StringRef::npos) {
-  // Must start with 'vN'.
+    // Must start with 'vN'.
     if (A[0] != 'v' || !std::isdigit(A[1]))
       return Error;
     // Can't have an extra 'eb'.
@@ -468,56 +467,64 @@ StringRef ARMTargetParser::getCanonicalArchName(StringRef Arch) {
   return A;
 }
 
-unsigned ARMTargetParser::parseFPU(StringRef FPU) {
+unsigned llvm::ARM::parseHWDiv(StringRef HWDiv) {
+  StringRef Syn = getHWDivSynonym(HWDiv);
+  for (const auto D : HWDivNames) {
+    if (Syn == D.getName())
+      return D.ID;
+  }
+  return ARM::AEK_INVALID;
+}
+
+unsigned llvm::ARM::parseFPU(StringRef FPU) {
   StringRef Syn = getFPUSynonym(FPU);
   for (const auto F : FPUNames) {
-    if (Syn == F.Name)
+    if (Syn == F.getName())
       return F.ID;
   }
   return ARM::FK_INVALID;
 }
 
 // Allows partial match, ex. "v7a" matches "armv7a".
-unsigned ARMTargetParser::parseArch(StringRef Arch) {
+unsigned llvm::ARM::parseArch(StringRef Arch) {
   Arch = getCanonicalArchName(Arch);
   StringRef Syn = getArchSynonym(Arch);
   for (const auto A : ARCHNames) {
-    if (StringRef(A.Name).endswith(Syn))
+    if (A.getName().endswith(Syn))
       return A.ID;
   }
   return ARM::AK_INVALID;
 }
 
-unsigned ARMTargetParser::parseArchExt(StringRef ArchExt) {
+unsigned llvm::ARM::parseArchExt(StringRef ArchExt) {
   for (const auto A : ARCHExtNames) {
-    if (ArchExt == A.Name)
+    if (ArchExt == A.getName())
       return A.ID;
   }
   return ARM::AEK_INVALID;
 }
 
-unsigned ARMTargetParser::parseCPUArch(StringRef CPU) {
+unsigned llvm::ARM::parseCPUArch(StringRef CPU) {
   for (const auto C : CPUNames) {
-    if (CPU == C.Name)
+    if (CPU == C.getName())
       return C.ArchID;
   }
   return ARM::AK_INVALID;
 }
 
 // ARM, Thumb, AArch64
-unsigned ARMTargetParser::parseArchISA(StringRef Arch) {
+unsigned llvm::ARM::parseArchISA(StringRef Arch) {
   return StringSwitch<unsigned>(Arch)
       .StartsWith("aarch64", ARM::IK_AARCH64)
-      .StartsWith("arm64",   ARM::IK_AARCH64)
-      .StartsWith("thumb",   ARM::IK_THUMB)
-      .StartsWith("arm",     ARM::IK_ARM)
+      .StartsWith("arm64", ARM::IK_AARCH64)
+      .StartsWith("thumb", ARM::IK_THUMB)
+      .StartsWith("arm", ARM::IK_ARM)
       .Default(ARM::EK_INVALID);
 }
 
 // Little/Big endian
-unsigned ARMTargetParser::parseArchEndian(StringRef Arch) {
-  if (Arch.startswith("armeb") ||
-      Arch.startswith("thumbeb") ||
+unsigned llvm::ARM::parseArchEndian(StringRef Arch) {
+  if (Arch.startswith("armeb") || Arch.startswith("thumbeb") ||
       Arch.startswith("aarch64_be"))
     return ARM::EK_BIG;
 
@@ -535,29 +542,29 @@ unsigned ARMTargetParser::parseArchEndian(StringRef Arch) {
 }
 
 // Profile A/R/M
-unsigned ARMTargetParser::parseArchProfile(StringRef Arch) {
+unsigned llvm::ARM::parseArchProfile(StringRef Arch) {
   Arch = getCanonicalArchName(Arch);
-  switch(parseArch(Arch)) {
+  switch (parseArch(Arch)) {
   case ARM::AK_ARMV6M:
   case ARM::AK_ARMV7M:
-  case ARM::AK_ARMV6SM:
   case ARM::AK_ARMV7EM:
     return ARM::PK_M;
   case ARM::AK_ARMV7R:
     return ARM::PK_R;
-  case ARM::AK_ARMV7:
   case ARM::AK_ARMV7A:
+  case ARM::AK_ARMV7K:
   case ARM::AK_ARMV8A:
   case ARM::AK_ARMV8_1A:
+  case ARM::AK_ARMV8_2A:
     return ARM::PK_A;
   }
   return ARM::PK_INVALID;
 }
 
 // Version number (ex. v7 = 7).
-unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
+unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
   Arch = getCanonicalArchName(Arch);
-  switch(parseArch(Arch)) {
+  switch (parseArch(Arch)) {
   case ARM::AK_ARMV2:
   case ARM::AK_ARMV2A:
     return 2;
@@ -567,36 +574,29 @@ unsigned ARMTargetParser::parseArchVersion(StringRef Arch) {
   case ARM::AK_ARMV4:
   case ARM::AK_ARMV4T:
     return 4;
-  case ARM::AK_ARMV5:
   case ARM::AK_ARMV5T:
   case ARM::AK_ARMV5TE:
   case ARM::AK_IWMMXT:
   case ARM::AK_IWMMXT2:
   case ARM::AK_XSCALE:
-  case ARM::AK_ARMV5E:
   case ARM::AK_ARMV5TEJ:
     return 5;
   case ARM::AK_ARMV6:
-  case ARM::AK_ARMV6J:
   case ARM::AK_ARMV6K:
   case ARM::AK_ARMV6T2:
-  case ARM::AK_ARMV6Z:
-  case ARM::AK_ARMV6ZK:
+  case ARM::AK_ARMV6KZ:
   case ARM::AK_ARMV6M:
-  case ARM::AK_ARMV6SM:
-  case ARM::AK_ARMV6HL:
     return 6;
-  case ARM::AK_ARMV7:
   case ARM::AK_ARMV7A:
   case ARM::AK_ARMV7R:
   case ARM::AK_ARMV7M:
-  case ARM::AK_ARMV7L:
-  case ARM::AK_ARMV7HL:
   case ARM::AK_ARMV7S:
   case ARM::AK_ARMV7EM:
+  case ARM::AK_ARMV7K:
     return 7;
   case ARM::AK_ARMV8A:
   case ARM::AK_ARMV8_1A:
+  case ARM::AK_ARMV8_2A:
     return 8;
   }
   return 0;
diff --git a/lib/Support/ThreadPool.cpp b/lib/Support/ThreadPool.cpp
new file mode 100644
index 000000000000..d4dcb2ee96df
--- /dev/null
+++ b/lib/Support/ThreadPool.cpp
@@ -0,0 +1,155 @@
+//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a crude C++11 based thread pool.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ThreadPool.h"
+
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#if LLVM_ENABLE_THREADS
+
+// Default to std::thread::hardware_concurrency
+ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {}
+
+ThreadPool::ThreadPool(unsigned ThreadCount)
+    : ActiveThreads(0), EnableFlag(true) {
+  // Create ThreadCount threads that will loop forever, wait on QueueCondition
+  // for tasks to be queued or the Pool to be destroyed.
+  Threads.reserve(ThreadCount);
+  for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) {
+    Threads.emplace_back([&] {
+      while (true) {
+        PackagedTaskTy Task;
+        {
+          std::unique_lock<std::mutex> LockGuard(QueueLock);
+          // Wait for tasks to be pushed in the queue
+          QueueCondition.wait(LockGuard,
+                              [&] { return !EnableFlag || !Tasks.empty(); });
+          // Exit condition
+          if (!EnableFlag && Tasks.empty())
+            return;
+          // Yeah, we have a task, grab it and release the lock on the queue
+
+          // We first need to signal that we are active before popping the queue
+          // in order for wait() to properly detect that even if the queue is
+          // empty, there is still a task in flight.
+          {
+            ++ActiveThreads;
+            std::unique_lock<std::mutex> LockGuard(CompletionLock);
+          }
+          Task = std::move(Tasks.front());
+          Tasks.pop();
+        }
+        // Run the task we just grabbed
+#ifndef _MSC_VER
+        Task();
+#else
+        Task(/* unused */ false);
+#endif
+
+        {
+          // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait()
+          std::unique_lock<std::mutex> LockGuard(CompletionLock);
+          --ActiveThreads;
+        }
+
+        // Notify task completion, in case someone waits on ThreadPool::wait()
+        CompletionCondition.notify_all();
+      }
+    });
+  }
+}
+
+void ThreadPool::wait() {
+  // Wait for all threads to complete and the queue to be empty
+  std::unique_lock<std::mutex> LockGuard(CompletionLock);
+  CompletionCondition.wait(LockGuard,
+                           [&] { return Tasks.empty() && !ActiveThreads; });
+}
+
+std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
+  /// Wrap the Task in a packaged_task to return a future object.
+  PackagedTaskTy PackagedTask(std::move(Task));
+  auto Future = PackagedTask.get_future();
+  {
+    // Lock the queue and push the new task
+    std::unique_lock<std::mutex> LockGuard(QueueLock);
+
+    // Don't allow enqueueing after disabling the pool
+    assert(EnableFlag && "Queuing a thread during ThreadPool destruction");
+
+    Tasks.push(std::move(PackagedTask));
+  }
+  QueueCondition.notify_one();
+  return Future.share();
+}
+
+// The destructor joins all threads, waiting for completion.
+ThreadPool::~ThreadPool() {
+  {
+    std::unique_lock<std::mutex> LockGuard(QueueLock);
+    EnableFlag = false;
+  }
+  QueueCondition.notify_all();
+  for (auto &Worker : Threads)
+    Worker.join();
+}
+
+#else // LLVM_ENABLE_THREADS Disabled
+
+ThreadPool::ThreadPool() : ThreadPool(0) {}
+
+// No threads are launched, issue a warning if ThreadCount is not 0
+ThreadPool::ThreadPool(unsigned ThreadCount)
+    : ActiveThreads(0) {
+  if (ThreadCount) {
+    errs() << "Warning: request a ThreadPool with " << ThreadCount
+           << " threads, but LLVM_ENABLE_THREADS has been turned off\n";
+  }
+}
+
+void ThreadPool::wait() {
+  // Sequential implementation running the tasks
+  while (!Tasks.empty()) {
+    auto Task = std::move(Tasks.front());
+    Tasks.pop();
+#ifndef _MSC_VER
+        Task();
+#else
+        Task(/* unused */ false);
+#endif
+  }
+}
+
+std::shared_future<ThreadPool::VoidTy> ThreadPool::asyncImpl(TaskTy Task) {
+#ifndef _MSC_VER
+  // Get a Future with launch::deferred execution using std::async
+  auto Future = std::async(std::launch::deferred, std::move(Task)).share();
+  // Wrap the future so that both ThreadPool::wait() can operate and the
+  // returned future can be sync'ed on.
+  PackagedTaskTy PackagedTask([Future]() { Future.get(); });
+#else
+  auto Future = std::async(std::launch::deferred, std::move(Task), false).share();
+  PackagedTaskTy PackagedTask([Future](bool) -> bool { Future.get(); return false; });
+#endif
+  Tasks.push(std::move(PackagedTask));
+  return Future;
+}
+
+ThreadPool::~ThreadPool() {
+  wait();
+}
+
+#endif
diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp
index 136b93eceefa..94a4c011693c 100644
--- a/lib/Support/TimeValue.cpp
+++ b/lib/Support/TimeValue.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/config.h"
 
 namespace llvm {
+
 using namespace sys;
 
 const TimeValue::SecondsType
@@ -22,8 +23,7 @@ const TimeValue::SecondsType
 const TimeValue::SecondsType
   TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
 
-void
-TimeValue::normalize( void ) {
+void TimeValue::normalize() {
   if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
     do {
       seconds_++;
@@ -45,7 +45,7 @@ TimeValue::normalize( void ) {
   }
 }
 
-}
+} // namespace llvm
 
 /// Include the platform-specific portion of TimeValue class
 #ifdef LLVM_ON_UNIX
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index d7b65155d6ef..414f559f8f0e 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Timer.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
@@ -22,9 +23,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-// CreateInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
-
 // getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
 // of constructor/destructor ordering being unspecified by C++.  Basically the
 // problem is that a Statistic object gets destroyed, which ends up calling
@@ -52,28 +50,27 @@ namespace {
                    cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
 }
 
-// CreateInfoOutputFile - Return a file stream to print our output on.
-raw_ostream *llvm::CreateInfoOutputFile() {
+// Return a file stream to print our output on.
+std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
   const std::string &OutputFilename = getLibSupportInfoOutputFilename();
   if (OutputFilename.empty())
-    return new raw_fd_ostream(2, false); // stderr.
+    return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
   if (OutputFilename == "-")
-    return new raw_fd_ostream(1, false); // stdout.
-  
+    return llvm::make_unique<raw_fd_ostream>(1, false); // stdout.
+
   // Append mode is used because the info output file is opened and closed
   // each time -stats or -time-passes wants to print output to it. To
   // compensate for this, the test-suite Makefiles have code to delete the
   // info output file before running commands which write to it.
   std::error_code EC;
-  raw_ostream *Result = new raw_fd_ostream(OutputFilename, EC,
-                                           sys::fs::F_Append | sys::fs::F_Text);
+  auto Result = llvm::make_unique<raw_fd_ostream>(
+      OutputFilename, EC, sys::fs::F_Append | sys::fs::F_Text);
   if (!EC)
     return Result;
-  
+
   errs() << "Error opening info-output-file '"
     << OutputFilename << " for appending!\n";
-  delete Result;
-  return new raw_fd_ostream(2, false); // stderr.
+  return llvm::make_unique<raw_fd_ostream>(2, false); // stderr.
 }
 
 
@@ -99,17 +96,13 @@ static TimerGroup *getDefaultTimerGroup() {
 //===----------------------------------------------------------------------===//
 
 void Timer::init(StringRef N) {
-  assert(!TG && "Timer already initialized");
-  Name.assign(N.begin(), N.end());
-  Started = false;
-  TG = getDefaultTimerGroup();
-  TG->addTimer(*this);
+  init(N, *getDefaultTimerGroup());
 }
 
 void Timer::init(StringRef N, TimerGroup &tg) {
   assert(!TG && "Timer already initialized");
   Name.assign(N.begin(), N.end());
-  Started = false;
+  Running = Triggered = false;
   TG = &tg;
   TG->addTimer(*this);
 }
@@ -142,25 +135,22 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
   return Result;
 }
 
-static ManagedStatic<std::vector<Timer*> > ActiveTimers;
-
 void Timer::startTimer() {
-  Started = true;
-  ActiveTimers->push_back(this);
-  Time -= TimeRecord::getCurrentTime(true);
+  assert(!Running && "Cannot start a running timer");
+  Running = Triggered = true;
+  StartTime = TimeRecord::getCurrentTime(true);
 }
 
 void Timer::stopTimer() {
+  assert(Running && "Cannot stop a paused timer");
+  Running = false;
   Time += TimeRecord::getCurrentTime(false);
+  Time -= StartTime;
+}
 
-  if (ActiveTimers->back() == this) {
-    ActiveTimers->pop_back();
-  } else {
-    std::vector<Timer*>::iterator I =
-      std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
-    assert(I != ActiveTimers->end() && "stop but no startTimer?");
-    ActiveTimers->erase(I);
-  }
+void Timer::clear() {
+  Running = Triggered = false;
+  Time = StartTime = TimeRecord();
 }
 
 static void printVal(double Val, double Total, raw_ostream &OS) {
@@ -278,8 +268,8 @@ void TimerGroup::removeTimer(Timer &T) {
   sys::SmartScopedLock<true> L(*TimerLock);
   
   // If the timer was started, move its data to TimersToPrint.
-  if (T.Started)
-    TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+  if (T.hasTriggered())
+    TimersToPrint.emplace_back(T.Time, T.Name);
 
   T.TG = nullptr;
   
@@ -292,10 +282,9 @@ void TimerGroup::removeTimer(Timer &T) {
   // them were started.
   if (FirstTimer || TimersToPrint.empty())
     return;
-  
-  raw_ostream *OutStream = CreateInfoOutputFile();
+
+  std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
   PrintQueuedTimers(*OutStream);
-  delete OutStream;   // Close the file.
 }
 
 void TimerGroup::addTimer(Timer &T) {
@@ -314,8 +303,8 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   std::sort(TimersToPrint.begin(), TimersToPrint.end());
   
   TimeRecord Total;
-  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
-    Total += TimersToPrint[i].first;
+  for (auto &RecordNamePair : TimersToPrint)
+    Total += RecordNamePair.first;
   
   // Print out timing header.
   OS << "===" << std::string(73, '-') << "===\n";
@@ -365,12 +354,11 @@ void TimerGroup::print(raw_ostream &OS) {
   // See if any of our timers were started, if so add them to TimersToPrint and
   // reset them.
   for (Timer *T = FirstTimer; T; T = T->Next) {
-    if (!T->Started) continue;
-    TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+    if (!T->hasTriggered()) continue;
+    TimersToPrint.emplace_back(T->Time, T->Name);
     
     // Clear out the time.
-    T->Started = 0;
-    T->Time = TimeRecord();
+    T->clear();
   }
 
   // If any timers were started, print the group.
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index c6646fb101b7..3bb1116007ed 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -25,6 +25,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case aarch64_be:  return "aarch64_be";
   case arm:         return "arm";
   case armeb:       return "armeb";
+  case avr:         return "avr";
   case bpfel:       return "bpfel";
   case bpfeb:       return "bpfeb";
   case hexagon:     return "hexagon";
@@ -80,6 +81,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case thumb:
   case thumbeb:     return "arm";
 
+  case avr:         return "avr";
+
   case ppc64:
   case ppc64le:
   case ppc:         return "ppc";
@@ -124,8 +127,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case spir64:      return "spir";
   case kalimba:     return "kalimba";
   case shave:       return "shave";
-  case wasm32:      return "wasm32";
-  case wasm64:      return "wasm64";
+  case wasm32:
+  case wasm64:      return "wasm";
   }
 }
 
@@ -144,6 +147,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
   case MipsTechnologies: return "mti";
   case NVIDIA: return "nvidia";
   case CSR: return "csr";
+  case Myriad: return "myriad";
   }
 
   llvm_unreachable("Invalid VendorType!");
@@ -177,6 +181,9 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case NVCL: return "nvcl";
   case AMDHSA: return "amdhsa";
   case PS4: return "ps4";
+  case ELFIAMCU: return "elfiamcu";
+  case TvOS: return "tvos";
+  case WatchOS: return "watchos";
   }
 
   llvm_unreachable("Invalid OSType");
@@ -196,6 +203,8 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case MSVC: return "msvc";
   case Itanium: return "itanium";
   case Cygnus: return "cygnus";
+  case AMDOpenCL: return "amdopencl";
+  case CoreCLR: return "coreclr";
   }
 
   llvm_unreachable("Invalid EnvironmentType!");
@@ -224,6 +233,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("arm64", aarch64) // "arm64" is an alias for "aarch64"
     .Case("arm", arm)
     .Case("armeb", armeb)
+    .Case("avr", avr)
     .StartsWith("bpf", BPFArch)
     .Case("mips", mips)
     .Case("mipsel", mipsel)
@@ -265,8 +275,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
 }
 
 static Triple::ArchType parseARMArch(StringRef ArchName) {
-  unsigned ISA = ARMTargetParser::parseArchISA(ArchName);
-  unsigned ENDIAN = ARMTargetParser::parseArchEndian(ArchName);
+  unsigned ISA = ARM::parseArchISA(ArchName);
+  unsigned ENDIAN = ARM::parseArchEndian(ArchName);
 
   Triple::ArchType arch = Triple::UnknownArch;
   switch (ENDIAN) {
@@ -300,7 +310,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
   }
   }
 
-  ArchName = ARMTargetParser::getCanonicalArchName(ArchName);
+  ArchName = ARM::getCanonicalArchName(ArchName);
   if (ArchName.empty())
     return Triple::UnknownArch;
 
@@ -310,8 +320,8 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
     return Triple::UnknownArch;
 
   // Thumb only for v6m
-  unsigned Profile = ARMTargetParser::parseArchProfile(ArchName);
-  unsigned Version = ARMTargetParser::parseArchVersion(ArchName);
+  unsigned Profile = ARM::parseArchProfile(ArchName);
+  unsigned Version = ARM::parseArchVersion(ArchName);
   if (Profile == ARM::PK_M && Version == 6) {
     if (ENDIAN == ARM::EK_BIG)
       return Triple::thumbeb;
@@ -323,10 +333,7 @@ static Triple::ArchType parseARMArch(StringRef ArchName) {
 }
 
 static Triple::ArchType parseArch(StringRef ArchName) {
-  Triple::ArchType ARMArch(parseARMArch(ArchName));
-  Triple::ArchType BPFArch(parseBPFArch(ArchName));
-
-  return StringSwitch<Triple::ArchType>(ArchName)
+  auto AT = StringSwitch<Triple::ArchType>(ArchName)
     .Cases("i386", "i486", "i586", "i686", Triple::x86)
     // FIXME: Do we need to support these?
     .Cases("i786", "i886", "i986", Triple::x86)
@@ -336,9 +343,14 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("powerpc64le", Triple::ppc64le)
     .Case("xscale", Triple::arm)
     .Case("xscaleeb", Triple::armeb)
-    .StartsWith("arm", ARMArch)
-    .StartsWith("thumb", ARMArch)
-    .StartsWith("aarch64", ARMArch)
+    .Case("aarch64", Triple::aarch64)
+    .Case("aarch64_be", Triple::aarch64_be)
+    .Case("arm64", Triple::aarch64)
+    .Case("arm", Triple::arm)
+    .Case("armeb", Triple::armeb)
+    .Case("thumb", Triple::thumb)
+    .Case("thumbeb", Triple::thumbeb)
+    .Case("avr", Triple::avr)
     .Case("msp430", Triple::msp430)
     .Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
     .Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -346,7 +358,6 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("mips64el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("amdgcn", Triple::amdgcn)
-    .StartsWith("bpf", BPFArch)
     .Case("hexagon", Triple::hexagon)
     .Case("s390x", Triple::systemz)
     .Case("sparc", Triple::sparc)
@@ -369,6 +380,18 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("wasm32", Triple::wasm32)
     .Case("wasm64", Triple::wasm64)
     .Default(Triple::UnknownArch);
+
+  // Some architectures require special parsing logic just to compute the
+  // ArchType result.
+  if (AT == Triple::UnknownArch) {
+    if (ArchName.startswith("arm") || ArchName.startswith("thumb") ||
+        ArchName.startswith("aarch64"))
+      return parseARMArch(ArchName);
+    if (ArchName.startswith("bpf"))
+      return parseBPFArch(ArchName);
+  }
+
+  return AT;
 }
 
 static Triple::VendorType parseVendor(StringRef VendorName) {
@@ -384,6 +407,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) {
     .Case("mti", Triple::MipsTechnologies)
     .Case("nvidia", Triple::NVIDIA)
     .Case("csr", Triple::CSR)
+    .Case("myriad", Triple::Myriad)
     .Default(Triple::UnknownVendor);
 }
 
@@ -414,6 +438,9 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("nvcl", Triple::NVCL)
     .StartsWith("amdhsa", Triple::AMDHSA)
     .StartsWith("ps4", Triple::PS4)
+    .StartsWith("elfiamcu", Triple::ELFIAMCU)
+    .StartsWith("tvos", Triple::TvOS)
+    .StartsWith("watchos", Triple::WatchOS)
     .Default(Triple::UnknownOS);
 }
 
@@ -430,6 +457,8 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
     .StartsWith("msvc", Triple::MSVC)
     .StartsWith("itanium", Triple::Itanium)
     .StartsWith("cygnus", Triple::Cygnus)
+    .StartsWith("amdopencl", Triple::AMDOpenCL)
+    .StartsWith("coreclr", Triple::CoreCLR)
     .Default(Triple::UnknownEnvironment);
 }
 
@@ -442,7 +471,7 @@ static Triple::ObjectFormatType parseFormat(StringRef EnvironmentName) {
 }
 
 static Triple::SubArchType parseSubArch(StringRef SubArchName) {
-  StringRef ARMSubArch = ARMTargetParser::getCanonicalArchName(SubArchName);
+  StringRef ARMSubArch = ARM::getCanonicalArchName(SubArchName);
 
   // For now, this is the small part. Early return.
   if (ARMSubArch.empty())
@@ -453,14 +482,12 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
       .Default(Triple::NoSubArch);
 
   // ARM sub arch.
-  switch(ARMTargetParser::parseArch(ARMSubArch)) {
+  switch(ARM::parseArch(ARMSubArch)) {
   case ARM::AK_ARMV4:
     return Triple::NoSubArch;
   case ARM::AK_ARMV4T:
     return Triple::ARMSubArch_v4t;
-  case ARM::AK_ARMV5:
   case ARM::AK_ARMV5T:
-  case ARM::AK_ARMV5E:
     return Triple::ARMSubArch_v5;
   case ARM::AK_ARMV5TE:
   case ARM::AK_IWMMXT:
@@ -469,24 +496,19 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
   case ARM::AK_ARMV5TEJ:
     return Triple::ARMSubArch_v5te;
   case ARM::AK_ARMV6:
-  case ARM::AK_ARMV6J:
-  case ARM::AK_ARMV6Z:
     return Triple::ARMSubArch_v6;
   case ARM::AK_ARMV6K:
-  case ARM::AK_ARMV6ZK:
-  case ARM::AK_ARMV6HL:
+  case ARM::AK_ARMV6KZ:
     return Triple::ARMSubArch_v6k;
   case ARM::AK_ARMV6T2:
     return Triple::ARMSubArch_v6t2;
   case ARM::AK_ARMV6M:
-  case ARM::AK_ARMV6SM:
     return Triple::ARMSubArch_v6m;
-  case ARM::AK_ARMV7:
   case ARM::AK_ARMV7A:
   case ARM::AK_ARMV7R:
-  case ARM::AK_ARMV7L:
-  case ARM::AK_ARMV7HL:
     return Triple::ARMSubArch_v7;
+  case ARM::AK_ARMV7K:
+    return Triple::ARMSubArch_v7k;
   case ARM::AK_ARMV7M:
     return Triple::ARMSubArch_v7m;
   case ARM::AK_ARMV7S:
@@ -497,6 +519,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
     return Triple::ARMSubArch_v8;
   case ARM::AK_ARMV8_1A:
     return Triple::ARMSubArch_v8_1a;
+  case ARM::AK_ARMV8_2A:
+    return Triple::ARMSubArch_v8_2a;
   default:
     return Triple::NoSubArch;
   }
@@ -514,20 +538,53 @@ static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
 
 static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   switch (T.getArch()) {
-  default:
-    break;
+  case Triple::UnknownArch:
+  case Triple::aarch64:
+  case Triple::arm:
+  case Triple::thumb:
+  case Triple::x86:
+  case Triple::x86_64:
+    if (T.isOSDarwin())
+      return Triple::MachO;
+    else if (T.isOSWindows())
+      return Triple::COFF;
+    return Triple::ELF;
+
+  case Triple::aarch64_be:
+  case Triple::amdgcn:
+  case Triple::amdil:
+  case Triple::amdil64:
+  case Triple::armeb:
+  case Triple::avr:
+  case Triple::bpfeb:
+  case Triple::bpfel:
   case Triple::hexagon:
+  case Triple::hsail:
+  case Triple::hsail64:
+  case Triple::kalimba:
+  case Triple::le32:
+  case Triple::le64:
   case Triple::mips:
-  case Triple::mipsel:
   case Triple::mips64:
   case Triple::mips64el:
-  case Triple::r600:
-  case Triple::amdgcn:
-  case Triple::sparc:
-  case Triple::sparcv9:
-  case Triple::systemz:
-  case Triple::xcore:
+  case Triple::mipsel:
+  case Triple::msp430:
+  case Triple::nvptx:
+  case Triple::nvptx64:
   case Triple::ppc64le:
+  case Triple::r600:
+  case Triple::shave:
+  case Triple::sparc:
+  case Triple::sparcel:
+  case Triple::sparcv9:
+  case Triple::spir:
+  case Triple::spir64:
+  case Triple::systemz:
+  case Triple::tce:
+  case Triple::thumbeb:
+  case Triple::wasm32:
+  case Triple::wasm64:
+  case Triple::xcore:
     return Triple::ELF;
 
   case Triple::ppc:
@@ -536,12 +593,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
       return Triple::MachO;
     return Triple::ELF;
   }
-
-  if (T.isOSDarwin())
-    return Triple::MachO;
-  else if (T.isOSWindows())
-    return Triple::COFF;
-  return Triple::ELF;
+  llvm_unreachable("unknown architecture");
 }
 
 /// \brief Construct a triple from the string representation provided.
@@ -549,14 +601,27 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
 /// This stores the string representation and parses the various pieces into
 /// enum members.
 Triple::Triple(const Twine &Str)
-    : Data(Str.str()),
-      Arch(parseArch(getArchName())),
-      SubArch(parseSubArch(getArchName())),
-      Vendor(parseVendor(getVendorName())),
-      OS(parseOS(getOSName())),
-      Environment(parseEnvironment(getEnvironmentName())),
-      ObjectFormat(parseFormat(getEnvironmentName())) {
-  if (ObjectFormat == Triple::UnknownObjectFormat)
+    : Data(Str.str()), Arch(UnknownArch), SubArch(NoSubArch),
+      Vendor(UnknownVendor), OS(UnknownOS), Environment(UnknownEnvironment),
+      ObjectFormat(UnknownObjectFormat) {
+  // Do minimal parsing by hand here.
+  SmallVector<StringRef, 4> Components;
+  StringRef(Data).split(Components, '-', /*MaxSplit*/ 3);
+  if (Components.size() > 0) {
+    Arch = parseArch(Components[0]);
+    SubArch = parseSubArch(Components[0]);
+    if (Components.size() > 1) {
+      Vendor = parseVendor(Components[1]);
+      if (Components.size() > 2) {
+        OS = parseOS(Components[2]);
+        if (Components.size() > 3) {
+          Environment = parseEnvironment(Components[3]);
+          ObjectFormat = parseFormat(Components[3]);
+        }
+      }
+    }
+  }
+  if (ObjectFormat == UnknownObjectFormat)
     ObjectFormat = getDefaultFormat(*this);
 }
 
@@ -601,7 +666,7 @@ std::string Triple::normalize(StringRef Str) {
 
   // Parse into components.
   SmallVector<StringRef, 4> Components;
-  Str.split(Components, "-");
+  Str.split(Components, '-');
 
   // If the first component corresponds to a known architecture, preferentially
   // use it for the architecture.  If the second component corresponds to a
@@ -889,6 +954,8 @@ bool Triple::getMacOSXVersion(unsigned &Major, unsigned &Minor,
       return false;
     break;
   case IOS:
+  case TvOS:
+  case WatchOS:
     // Ignore the version from the triple.  This is only handled because the
     // the clang driver combines OS X and IOS support into a common Darwin
     // toolchain that wants to know the OS X version number even when targeting
@@ -916,11 +983,38 @@ void Triple::getiOSVersion(unsigned &Major, unsigned &Minor,
     Micro = 0;
     break;
   case IOS:
+  case TvOS:
     getOSVersion(Major, Minor, Micro);
     // Default to 5.0 (or 7.0 for arm64).
     if (Major == 0)
       Major = (getArch() == aarch64) ? 7 : 5;
     break;
+  case WatchOS:
+    llvm_unreachable("conflicting triple info");
+  }
+}
+
+void Triple::getWatchOSVersion(unsigned &Major, unsigned &Minor,
+                               unsigned &Micro) const {
+  switch (getOS()) {
+  default: llvm_unreachable("unexpected OS for Darwin triple");
+  case Darwin:
+  case MacOSX:
+    // Ignore the version from the triple.  This is only handled because the
+    // the clang driver combines OS X and IOS support into a common Darwin
+    // toolchain that wants to know the iOS version number even when targeting
+    // OS X.
+    Major = 2;
+    Minor = 0;
+    Micro = 0;
+    break;
+  case WatchOS:
+    getOSVersion(Major, Minor, Micro);
+    if (Major == 0)
+      Major = 2;
+    break;
+  case IOS:
+    llvm_unreachable("conflicting triple info");
   }
 }
 
@@ -993,6 +1087,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::UnknownArch:
     return 0;
 
+  case llvm::Triple::avr:
   case llvm::Triple::msp430:
     return 16;
 
@@ -1062,6 +1157,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::aarch64:
   case Triple::aarch64_be:
   case Triple::amdgcn:
+  case Triple::avr:
   case Triple::bpfel:
   case Triple::bpfeb:
   case Triple::msp430:
@@ -1116,6 +1212,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::UnknownArch:
   case Triple::arm:
   case Triple::armeb:
+  case Triple::avr:
   case Triple::hexagon:
   case Triple::kalimba:
   case Triple::msp430:
@@ -1172,6 +1269,7 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::amdgcn:
   case Triple::amdil64:
   case Triple::amdil:
+  case Triple::avr:
   case Triple::hexagon:
   case Triple::hsail64:
   case Triple::hsail:
@@ -1244,6 +1342,7 @@ Triple Triple::getLittleEndianArchVariant() const {
   case Triple::amdil64:
   case Triple::amdil:
   case Triple::arm:
+  case Triple::avr:
   case Triple::bpfel:
   case Triple::hexagon:
   case Triple::hsail64:
@@ -1281,10 +1380,10 @@ Triple Triple::getLittleEndianArchVariant() const {
   return T;
 }
 
-const char *Triple::getARMCPUForArch(StringRef MArch) const {
+StringRef Triple::getARMCPUForArch(StringRef MArch) const {
   if (MArch.empty())
     MArch = getArchName();
-  MArch = ARMTargetParser::getCanonicalArchName(MArch);
+  MArch = ARM::getCanonicalArchName(MArch);
 
   // Some defaults are forced.
   switch (getOS()) {
@@ -1296,15 +1395,21 @@ const char *Triple::getARMCPUForArch(StringRef MArch) const {
   case llvm::Triple::Win32:
     // FIXME: this is invalid for WindowsCE
     return "cortex-a9";
+  case llvm::Triple::MacOSX:
+  case llvm::Triple::IOS:
+  case llvm::Triple::WatchOS:
+    if (MArch == "v7k")
+      return "cortex-a7";
+    break;
   default:
     break;
   }
 
   if (MArch.empty())
-    return nullptr;
+    return StringRef();
 
-  const char *CPU = ARMTargetParser::getDefaultCPU(MArch);
-  if (CPU)
+  StringRef CPU = ARM::getDefaultCPU(MArch);
+  if (!CPU.empty())
     return CPU;
 
   // If no specific architecture version is requested, return the minimum CPU
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index c421ee84c2b7..d70319168b84 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -50,9 +50,8 @@ int getPosixProtectionFlags(unsigned Flags) {
     return PROT_READ | PROT_WRITE;
   case llvm::sys::Memory::MF_READ|llvm::sys::Memory::MF_EXEC:
     return PROT_READ | PROT_EXEC;
-  case llvm::sys::Memory::MF_READ |
-	 llvm::sys::Memory::MF_WRITE |
-	 llvm::sys::Memory::MF_EXEC:
+  case llvm::sys::Memory::MF_READ | llvm::sys::Memory::MF_WRITE |
+      llvm::sys::Memory::MF_EXEC:
     return PROT_READ | PROT_WRITE | PROT_EXEC;
   case llvm::sys::Memory::MF_EXEC:
 #if defined(__FreeBSD__)
@@ -153,6 +152,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) {
 
 std::error_code
 Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
+  static const size_t PageSize = Process::getPageSize();
   if (M.Address == nullptr || M.Size == 0)
     return std::error_code();
 
@@ -161,7 +161,7 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
 
   int Protect = getPosixProtectionFlags(Flags);
 
-  int Result = ::mprotect(M.Address, M.Size, Protect);
+  int Result = ::mprotect((void*)((uintptr_t)M.Address & ~(PageSize-1)), PageSize*((M.Size+PageSize-1)/PageSize), Protect);
   if (Result != 0)
     return std::error_code(errno, std::generic_category());
 
@@ -181,7 +181,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
                     std::string *ErrMsg) {
   if (NumBytes == 0) return MemoryBlock();
 
-  size_t PageSize = Process::getPageSize();
+  static const size_t PageSize = Process::getPageSize();
   size_t NumPages = (NumBytes+PageSize-1)/PageSize;
 
   int fd = -1;
@@ -265,15 +265,12 @@ bool Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
 }
 
 bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
-#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
   if (M.Address == 0 || M.Size == 0) return false;
   Memory::InvalidateInstructionCache(M.Address, M.Size);
+#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
   kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
     (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
   return KERN_SUCCESS == kr;
-#elif defined(__arm__) || defined(__aarch64__)
-  Memory::InvalidateInstructionCache(M.Address, M.Size);
-  return true;
 #else
   return true;
 #endif
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 973d010dcac1..d85c37ab3bfa 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -75,12 +75,12 @@ test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
   char fullpath[PATH_MAX];
 
   snprintf(fullpath, PATH_MAX, "%s/%s", dir, bin);
-  if (realpath(fullpath, ret) == NULL)
-    return (1);
+  if (!realpath(fullpath, ret))
+    return 1;
   if (stat(fullpath, &sb) != 0)
-    return (1);
+    return 1;
 
-  return (0);
+  return 0;
 }
 
 static char *
@@ -91,34 +91,34 @@ getprogpath(char ret[PATH_MAX], const char *bin)
   /* First approach: absolute path. */
   if (bin[0] == '/') {
     if (test_dir(ret, "/", bin) == 0)
-      return (ret);
-    return (NULL);
+      return ret;
+    return nullptr;
   }
 
   /* Second approach: relative path. */
-  if (strchr(bin, '/') != NULL) {
+  if (strchr(bin, '/')) {
     char cwd[PATH_MAX];
-    if (getcwd(cwd, PATH_MAX) == NULL)
-      return (NULL);
+    if (!getcwd(cwd, PATH_MAX))
+      return nullptr;
     if (test_dir(ret, cwd, bin) == 0)
-      return (ret);
-    return (NULL);
+      return ret;
+    return nullptr;
   }
 
   /* Third approach: $PATH */
-  if ((pv = getenv("PATH")) == NULL)
-    return (NULL);
+  if ((pv = getenv("PATH")) == nullptr)
+    return nullptr;
   s = pv = strdup(pv);
-  if (pv == NULL)
-    return (NULL);
-  while ((t = strsep(&s, ":")) != NULL) {
+  if (!pv)
+    return nullptr;
+  while ((t = strsep(&s, ":")) != nullptr) {
     if (test_dir(ret, t, bin) == 0) {
       free(pv);
-      return (ret);
+      return ret;
     }
   }
   free(pv);
-  return (NULL);
+  return nullptr;
 }
 #endif // __FreeBSD__ || __NetBSD__ || __FreeBSD_kernel__
 
@@ -153,8 +153,8 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
           return std::string(exe_path, len);
   } else {
       // Fall back to the classical detection.
-      if (getprogpath(exe_path, argv0) != NULL)
-          return exe_path;
+      if (getprogpath(exe_path, argv0))
+        return exe_path;
   }
 #elif defined(HAVE_DLFCN_H)
   // Use dladdr to get executable path if available.
@@ -219,11 +219,12 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
   return std::error_code();
 }
 
-std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting,
+                                 perms Perms) {
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
 
-  if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+  if (::mkdir(p.begin(), Perms) == -1) {
     if (errno != EEXIST || !IgnoreExisting)
       return std::error_code(errno, std::generic_category());
   }
@@ -324,6 +325,10 @@ std::error_code access(const Twine &Path, AccessMode Mode) {
   return std::error_code();
 }
 
+bool can_execute(const Twine &Path) {
+  return !access(Path, AccessMode::Execute);
+}
+
 bool equivalent(file_status A, file_status B) {
   assert(status_known(A) && status_known(B));
   return A.fs_st_dev == B.fs_st_dev &&
@@ -555,6 +560,54 @@ bool home_directory(SmallVectorImpl<char> &result) {
   return false;
 }
 
+static bool getDarwinConfDir(bool TempDir, SmallVectorImpl<char> &Result) {
+  #if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
+  // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
+  // macros defined in <unistd.h> on darwin >= 9
+  int ConfName = TempDir ? _CS_DARWIN_USER_TEMP_DIR
+                         : _CS_DARWIN_USER_CACHE_DIR;
+  size_t ConfLen = confstr(ConfName, nullptr, 0);
+  if (ConfLen > 0) {
+    do {
+      Result.resize(ConfLen);
+      ConfLen = confstr(ConfName, Result.data(), Result.size());
+    } while (ConfLen > 0 && ConfLen != Result.size());
+
+    if (ConfLen > 0) {
+      assert(Result.back() == 0);
+      Result.pop_back();
+      return true;
+    }
+
+    Result.clear();
+  }
+  #endif
+  return false;
+}
+
+static bool getUserCacheDir(SmallVectorImpl<char> &Result) {
+  // First try using XDS_CACHE_HOME env variable,
+  // as specified in XDG Base Directory Specification at
+  // http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
+  if (const char *XdsCacheDir = std::getenv("XDS_CACHE_HOME")) {
+    Result.clear();
+    Result.append(XdsCacheDir, XdsCacheDir + strlen(XdsCacheDir));
+    return true;
+  }
+
+  // Try Darwin configuration query
+  if (getDarwinConfDir(false, Result))
+    return true;
+
+  // Use "$HOME/.cache" if $HOME is available
+  if (home_directory(Result)) {
+    append(Result, ".cache");
+    return true;
+  }
+
+  return false;
+}
+
 static const char *getEnvTempDir() {
   // Check whether the temporary directory is specified by an environment
   // variable.
@@ -589,27 +642,8 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
     }
   }
 
-#if defined(_CS_DARWIN_USER_TEMP_DIR) && defined(_CS_DARWIN_USER_CACHE_DIR)
-  // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR.
-  // macros defined in <unistd.h> on darwin >= 9
-  int ConfName = ErasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
-                               : _CS_DARWIN_USER_CACHE_DIR;
-  size_t ConfLen = confstr(ConfName, nullptr, 0);
-  if (ConfLen > 0) {
-    do {
-      Result.resize(ConfLen);
-      ConfLen = confstr(ConfName, Result.data(), Result.size());
-    } while (ConfLen > 0 && ConfLen != Result.size());
-
-    if (ConfLen > 0) {
-      assert(Result.back() == 0);
-      Result.pop_back();
-      return;
-    }
-
-    Result.clear();
-  }
-#endif
+  if (getDarwinConfDir(ErasedOnReboot, Result))
+    return;
 
   const char *RequestedDir = getDefaultTempDir(ErasedOnReboot);
   Result.append(RequestedDir, RequestedDir + strlen(RequestedDir));
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index df13bd221739..27083eeb072d 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -430,13 +430,18 @@ const char *Process::ResetColor() {
 #if !defined(HAVE_DECL_ARC4RANDOM) || !HAVE_DECL_ARC4RANDOM
 static unsigned GetRandomNumberSeed() {
   // Attempt to get the initial seed from /dev/urandom, if possible.
-  if (FILE *RandomSource = ::fopen("/dev/urandom", "r")) {
+  int urandomFD = open("/dev/urandom", O_RDONLY);
+
+  if (urandomFD != -1) {
     unsigned seed;
-    int count = ::fread((void *)&seed, sizeof(seed), 1, RandomSource);
-    ::fclose(RandomSource);
+    // Don't use a buffered read to avoid reading more data
+    // from /dev/urandom than we need.
+    int count = read(urandomFD, (void *)&seed, sizeof(seed));
+
+    close(urandomFD);
 
     // Return the seed if the read was successful.
-    if (count == 1)
+    if (count == sizeof(seed))
       return seed;
   }
 
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index 8947b62e4dc2..a8d1fe3c07d0 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -323,7 +323,6 @@ namespace llvm {
 
 ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
                       bool WaitUntilTerminates, std::string *ErrMsg) {
-#ifdef HAVE_SYS_WAIT_H
   struct sigaction Act, Old;
   assert(PI.Pid && "invalid pid to wait on, process not started?");
 
@@ -417,12 +416,6 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
     // signal during execution as opposed to failing to execute.
     WaitResult.ReturnCode = -2;
   }
-#else
-  if (ErrMsg)
-    *ErrMsg = "Program::Wait is not implemented on this platform yet!";
-  ProcessInfo WaitResult;
-  WaitResult.ReturnCode = -2;
-#endif
   return WaitResult;
 }
 
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index bfe2a3a380ed..061cdb3da216 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -17,7 +17,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Program.h"
@@ -25,7 +24,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <string>
-#include <vector>
 #if HAVE_EXECINFO_H
 # include <execinfo.h>         // For backtrace().
 #endif
@@ -58,8 +56,6 @@ static ManagedStatic<SmartMutex<true> > SignalsMutex;
 static void (*InterruptFunction)() = nullptr;
 
 static ManagedStatic<std::vector<std::string>> FilesToRemove;
-static ManagedStatic<std::vector<std::pair<void (*)(void *), void *>>>
-    CallBacksToRun;
 
 // IntSigs - Signals that represent requested termination. There's no bug
 // or failure, or if there is, it's not our direct responsibility. For whatever
@@ -90,12 +86,11 @@ static unsigned NumRegisteredSignals = 0;
 static struct {
   struct sigaction SA;
   int SigNo;
-} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+} RegisteredSignalInfo[array_lengthof(IntSigs) + array_lengthof(KillSigs)];
 
 
 static void RegisterHandler(int Signal) {
-  assert(NumRegisteredSignals <
-         sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+  assert(NumRegisteredSignals < array_lengthof(RegisteredSignalInfo) &&
          "Out of space for signal handlers!");
 
   struct sigaction NewHandler;
@@ -117,7 +112,7 @@ static void RegisterHandlers() {
   // during handling an actual signal because you can't safely call new in a
   // signal handler.
   *SignalsMutex;
-  
+
   // If the handlers are already registered, we're done.
   if (NumRegisteredSignals != 0) return;
 
@@ -148,9 +143,6 @@ static void RemoveFilesToRemove() {
   // memory.
   std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
   for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i) {
-    // We rely on a std::string implementation for which repeated calls to
-    // 'c_str()' don't allocate memory. We pre-call 'c_str()' on all of these
-    // strings to try to ensure this is safe.
     const char *path = FilesToRemoveRef[i].c_str();
 
     // Get the status so we can determine if it's a file or directory. If we
@@ -164,7 +156,7 @@ static void RemoveFilesToRemove() {
     // super-user permissions.
     if (!S_ISREG(buf.st_mode))
       continue;
-  
+
     // Otherwise, remove the file. We ignore any errors here as there is nothing
     // else we can do.
     unlink(path);
@@ -205,11 +197,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
   }
 
   // Otherwise if it is a fault (like SEGV) run any handler.
-  if (CallBacksToRun.isConstructed()) {
-    auto &CallBacksToRunRef = *CallBacksToRun;
-    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
-      CallBacksToRunRef[i].first(CallBacksToRunRef[i].second);
-  }
+  llvm::sys::RunSignalHandlers();
 
 #ifdef __s390__
   // On S/390, certain signals are delivered with PSW Address pointing to
@@ -239,21 +227,7 @@ bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
                                    std::string* ErrMsg) {
   {
     sys::SmartScopedLock<true> Guard(*SignalsMutex);
-    std::vector<std::string>& FilesToRemoveRef = *FilesToRemove;
-    std::string *OldPtr =
-        FilesToRemoveRef.empty() ? nullptr : &FilesToRemoveRef[0];
-    FilesToRemoveRef.push_back(Filename);
-
-    // We want to call 'c_str()' on every std::string in this vector so that if
-    // the underlying implementation requires a re-allocation, it happens here
-    // rather than inside of the signal handler. If we see the vector grow, we
-    // have to call it on every entry. If it remains in place, we only need to
-    // call it on the latest one.
-    if (OldPtr == &FilesToRemoveRef[0])
-      FilesToRemoveRef.back().c_str();
-    else
-      for (unsigned i = 0, e = FilesToRemoveRef.size(); i != e; ++i)
-        FilesToRemoveRef[i].c_str();
+    FilesToRemove->push_back(Filename);
   }
 
   RegisterHandlers();
@@ -268,13 +242,6 @@ void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
   std::vector<std::string>::iterator I = FilesToRemove->end();
   if (RI != FilesToRemove->rend())
     I = FilesToRemove->erase(RI.base()-1);
-
-  // We need to call c_str() on every element which would have been moved by
-  // the erase. These elements, in a C++98 implementation where c_str()
-  // requires a reallocation on the first call may have had the call to c_str()
-  // made on insertion become invalid by being copied down an element.
-  for (std::vector<std::string>::iterator E = FilesToRemove->end(); I != E; ++I)
-    I->c_str();
 }
 
 /// AddSignalHandler - Add a function to be called when a signal is delivered
@@ -285,10 +252,9 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
   RegisterHandlers();
 }
 
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
-
-#if HAVE_LINK_H && (defined(__linux__) || defined(__FreeBSD__) ||              \
-                    defined(__FreeBSD_kernel__) || defined(__NetBSD__))
+#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && HAVE_LINK_H &&    \
+    (defined(__linux__) || defined(__FreeBSD__) ||                             \
+     defined(__FreeBSD_kernel__) || defined(__NetBSD__))
 struct DlIteratePhdrData {
   void **StackTrace;
   int depth;
@@ -321,108 +287,27 @@ static int dl_iterate_phdr_cb(dl_phdr_info *info, size_t size, void *arg) {
   return 0;
 }
 
+/// If this is an ELF platform, we can find all loaded modules and their virtual
+/// addresses with dl_iterate_phdr.
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   const char **Modules, intptr_t *Offsets,
-                                  const char *MainExecutableName) {
+                                  const char *MainExecutableName,
+                                  StringSaver &StrPool) {
   DlIteratePhdrData data = {StackTrace, Depth,   true,
                             Modules,    Offsets, MainExecutableName};
   dl_iterate_phdr(dl_iterate_phdr_cb, &data);
   return true;
 }
 #else
+/// This platform does not have dl_iterate_phdr, so we do not yet know how to
+/// find all loaded DSOs.
 static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   const char **Modules, intptr_t *Offsets,
-                                  const char *MainExecutableName) {
+                                  const char *MainExecutableName,
+                                  StringSaver &StrPool) {
   return false;
 }
-#endif
-
-static bool printSymbolizedStackTrace(void **StackTrace, int Depth,
-                                      llvm::raw_ostream &OS) {
-  // FIXME: Subtract necessary number from StackTrace entries to turn return addresses
-  // into actual instruction addresses.
-  // Use llvm-symbolizer tool to symbolize the stack traces.
-  ErrorOr<std::string> LLVMSymbolizerPathOrErr =
-      sys::findProgramByName("llvm-symbolizer");
-  if (!LLVMSymbolizerPathOrErr)
-    return false;
-  const std::string &LLVMSymbolizerPath = *LLVMSymbolizerPathOrErr;
-  // We don't know argv0 or the address of main() at this point, but try
-  // to guess it anyway (it's possible on some platforms).
-  std::string MainExecutableName = sys::fs::getMainExecutable(nullptr, nullptr);
-  if (MainExecutableName.empty() ||
-      MainExecutableName.find("llvm-symbolizer") != std::string::npos)
-    return false;
-
-  std::vector<const char *> Modules(Depth, nullptr);
-  std::vector<intptr_t> Offsets(Depth, 0);
-  if (!findModulesAndOffsets(StackTrace, Depth, Modules.data(), Offsets.data(),
-                             MainExecutableName.c_str()))
-    return false;
-  int InputFD;
-  SmallString<32> InputFile, OutputFile;
-  sys::fs::createTemporaryFile("symbolizer-input", "", InputFD, InputFile);
-  sys::fs::createTemporaryFile("symbolizer-output", "", OutputFile);
-  FileRemover InputRemover(InputFile.c_str());
-  FileRemover OutputRemover(OutputFile.c_str());
-
-  {
-    raw_fd_ostream Input(InputFD, true);
-    for (int i = 0; i < Depth; i++) {
-      if (Modules[i])
-        Input << Modules[i] << " " << (void*)Offsets[i] << "\n";
-    }
-  }
-
-  StringRef InputFileStr(InputFile);
-  StringRef OutputFileStr(OutputFile);
-  StringRef StderrFileStr;
-  const StringRef *Redirects[] = {&InputFileStr, &OutputFileStr,
-                                  &StderrFileStr};
-  const char *Args[] = {"llvm-symbolizer", "--functions=linkage", "--inlining",
-                        "--demangle", nullptr};
-  int RunResult =
-      sys::ExecuteAndWait(LLVMSymbolizerPath, Args, nullptr, Redirects);
-  if (RunResult != 0)
-    return false;
-
-  auto OutputBuf = MemoryBuffer::getFile(OutputFile.c_str());
-  if (!OutputBuf)
-    return false;
-  StringRef Output = OutputBuf.get()->getBuffer();
-  SmallVector<StringRef, 32> Lines;
-  Output.split(Lines, "\n");
-  auto CurLine = Lines.begin();
-  int frame_no = 0;
-  for (int i = 0; i < Depth; i++) {
-    if (!Modules[i]) {
-      OS << format("#%d %p\n", frame_no++, StackTrace[i]);
-      continue;
-    }
-    // Read pairs of lines (function name and file/line info) until we
-    // encounter empty line.
-    for (;;) {
-      if (CurLine == Lines.end())
-        return false;
-      StringRef FunctionName = *CurLine++;
-      if (FunctionName.empty())
-        break;
-      OS << format("#%d %p ", frame_no++, StackTrace[i]);
-      if (!FunctionName.startswith("??"))
-        OS << format("%s ", FunctionName.str().c_str());
-      if (CurLine == Lines.end())
-        return false;
-      StringRef FileLineInfo = *CurLine++;
-      if (!FileLineInfo.startswith("??"))
-        OS << format("%s", FileLineInfo.str().c_str());
-      else
-        OS << format("(%s+%p)", Modules[i], (void *)Offsets[i]);
-      OS << "\n";
-    }
-  }
-  return true;
-}
-#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && ...
 
 // PrintStackTrace - In the case of a program crash or fault, print out a stack
 // trace so that the user has an indication of why and where we died.
diff --git a/lib/Support/Unix/Unix.h b/lib/Support/Unix/Unix.h
index e16a226a8eaf..871e612f6c16 100644
--- a/lib/Support/Unix/Unix.h
+++ b/lib/Support/Unix/Unix.h
@@ -29,6 +29,7 @@
 #include <cstring>
 #include <string>
 #include <sys/types.h>
+#include <sys/wait.h>
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
@@ -43,22 +44,10 @@
 #endif
 #include <time.h>
 
-#ifdef HAVE_SYS_WAIT_H
-# include <sys/wait.h>
-#endif
-
 #ifdef HAVE_DLFCN_H
 # include <dlfcn.h>
 #endif
 
-#ifndef WEXITSTATUS
-# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
-#endif
-
-#ifndef WIFEXITED
-# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
-#endif
-
 /// This function builds an error message into \p ErrMsg using the \p prefix
 /// string and the Unix error number given by \p errnum. If errnum is -1, the
 /// default then the value of errno is used.
diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp
index facf8d927ecd..8d852a67c075 100644
--- a/lib/Support/Valgrind.cpp
+++ b/lib/Support/Valgrind.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Support/Valgrind.h"
 #include "llvm/Config/config.h"
+#include <cstddef>
 
 #if HAVE_VALGRIND_VALGRIND_H
 #include <valgrind/valgrind.h>
@@ -52,23 +53,3 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
 }
 
 #endif  // !HAVE_VALGRIND_VALGRIND_H
-
-// These functions require no implementation, tsan just looks at the arguments
-// they're called with. However, they are required to be weak as some other
-// application or library may already be providing these definitions for the
-// same reason we are.
-extern "C" {
-LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line,
-                                              const volatile void *cv);
-void AnnotateHappensAfter(const char *file, int line, const volatile void *cv) {
-}
-LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line,
-                                               const volatile void *cv);
-void AnnotateHappensBefore(const char *file, int line,
-                           const volatile void *cv) {}
-LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line);
-void AnnotateIgnoreWritesBegin(const char *file, int line) {}
-LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line);
-void AnnotateIgnoreWritesEnd(const char *file, int line) {}
-}
-
diff --git a/lib/Support/Windows/COM.inc b/lib/Support/Windows/COM.inc
index 0c50d6f74ea3..54f3ecf28ec2 100644
--- a/lib/Support/Windows/COM.inc
+++ b/lib/Support/Windows/COM.inc
@@ -1,4 +1,4 @@
-//===- llvm/Support/Windows/COM.inc - Windows COM Implementation *- C++ -*-===//
+//==- llvm/Support/Windows/COM.inc - Windows COM Implementation -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index d38f19749118..17418b015c75 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -76,14 +76,14 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
   SmallVector<wchar_t, MAX_PATH> filenameUnicode;
   if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) {
     SetLastError(ec.value());
-    MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: ");
+    MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16");
     return DynamicLibrary();
   }
   
   HMODULE a_handle = LoadLibraryW(filenameUnicode.data());
 
   if (a_handle == 0) {
-    MakeErrMsg(errMsg, std::string(filename) + ": Can't open : ");
+    MakeErrMsg(errMsg, std::string(filename) + ": Can't open");
     return DynamicLibrary();
   }
 
diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc
index 4b2ff2e2d324..7eab9ff3afd2 100644
--- a/lib/Support/Windows/Memory.inc
+++ b/lib/Support/Windows/Memory.inc
@@ -192,14 +192,14 @@ static DWORD getProtection(const void *addr) {
 
 bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
   if (!setRangeWritable(M.Address, M.Size)) {
-    return MakeErrMsg(ErrMsg, "Cannot set memory to writeable: ");
+    return MakeErrMsg(ErrMsg, "Cannot set memory to writeable");
   }
   return true;
 }
 
 bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
   if (!setRangeExecutable(M.Address, M.Size)) {
-    return MakeErrMsg(ErrMsg, "Cannot set memory to executable: ");
+    return MakeErrMsg(ErrMsg, "Cannot set memory to executable");
   }
   return true;
 }
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 72da7c5fec32..4e4841231eff 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -182,7 +182,8 @@ std::error_code current_path(SmallVectorImpl<char> &result) {
   return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result);
 }
 
-std::error_code create_directory(const Twine &path, bool IgnoreExisting) {
+std::error_code create_directory(const Twine &path, bool IgnoreExisting,
+                                 perms Perms) {
   SmallVector<wchar_t, 128> path_utf16;
 
   if (std::error_code ec = widenPath(path, path_utf16))
@@ -252,17 +253,34 @@ std::error_code rename(const Twine &from, const Twine &to) {
     return ec;
 
   std::error_code ec = std::error_code();
+
+  // Retry while we see ERROR_ACCESS_DENIED.
+  // System scanners (eg. indexer) might open the source file when it is written
+  // and closed.
+
   for (int i = 0; i < 2000; i++) {
+    // Try ReplaceFile first, as it is able to associate a new data stream with
+    // the destination even if the destination file is currently open.
+    if (::ReplaceFileW(wide_to.begin(), wide_from.begin(), NULL, 0, NULL, NULL))
+      return std::error_code();
+
+    // We get ERROR_FILE_NOT_FOUND if the destination file is missing.
+    // MoveFileEx can handle this case.
+    DWORD ReplaceError = ::GetLastError();
+    ec = mapWindowsError(ReplaceError);
+    if (ReplaceError != ERROR_ACCESS_DENIED &&
+        ReplaceError != ERROR_FILE_NOT_FOUND &&
+        ReplaceError != ERROR_SHARING_VIOLATION)
+      break;
+
     if (::MoveFileExW(wide_from.begin(), wide_to.begin(),
                       MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
       return std::error_code();
-    DWORD LastError = ::GetLastError();
-    ec = mapWindowsError(LastError);
-    if (LastError != ERROR_ACCESS_DENIED)
-      break;
-    // Retry MoveFile() at ACCESS_DENIED.
-    // System scanners (eg. indexer) might open the source file when
-    // It is written and closed.
+
+    DWORD MoveError = ::GetLastError();
+    ec = mapWindowsError(MoveError);
+    if (MoveError != ERROR_ACCESS_DENIED) break;
+
     ::Sleep(1);
   }
 
@@ -301,6 +319,11 @@ std::error_code access(const Twine &Path, AccessMode Mode) {
   return std::error_code();
 }
 
+bool can_execute(const Twine &Path) {
+  return !access(Path, AccessMode::Execute) ||
+         !access(Path + ".exe", AccessMode::Execute);
+}
+
 bool equivalent(file_status A, file_status B) {
   assert(status_known(A) && status_known(B));
   return A.FileIndexHigh      == B.FileIndexHigh &&
@@ -325,10 +348,12 @@ std::error_code equivalent(const Twine &A, const Twine &B, bool &result) {
 static bool isReservedName(StringRef path) {
   // This list of reserved names comes from MSDN, at:
   // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx
-  static const char *sReservedNames[] = { "nul", "con", "prn", "aux",
-                              "com1", "com2", "com3", "com4", "com5", "com6",
-                              "com7", "com8", "com9", "lpt1", "lpt2", "lpt3",
-                              "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" };
+  static const char *const sReservedNames[] = { "nul", "con", "prn", "aux",
+                                                "com1", "com2", "com3", "com4",
+                                                "com5", "com6", "com7", "com8",
+                                                "com9", "lpt1", "lpt2", "lpt3",
+                                                "lpt4", "lpt5", "lpt6", "lpt7",
+                                                "lpt8", "lpt9" };
 
   // First, check to see if this is a device namespace, which always
   // starts with \\.\, since device namespaces are not legal file paths.
@@ -643,9 +668,10 @@ std::error_code openFileForRead(const Twine &Name, int &ResultFD) {
   if (std::error_code EC = widenPath(Name, PathUTF16))
     return EC;
 
-  HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
-                           FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
-                           OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
+  HANDLE H =
+      ::CreateFileW(PathUTF16.begin(), GENERIC_READ,
+                    FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                    NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
   if (H == INVALID_HANDLE_VALUE) {
     DWORD LastError = ::GetLastError();
     std::error_code EC = mapWindowsError(LastError);
@@ -728,30 +754,31 @@ std::error_code openFileForWrite(const Twine &Name, int &ResultFD,
 } // end namespace fs
 
 namespace path {
-
-bool home_directory(SmallVectorImpl<char> &result) {
-  wchar_t Path[MAX_PATH];
-  if (::SHGetFolderPathW(0, CSIDL_APPDATA | CSIDL_FLAG_CREATE, 0,
-                         /*SHGFP_TYPE_CURRENT*/0, Path) != S_OK)
+static bool getKnownFolderPath(KNOWNFOLDERID folderId,
+                               SmallVectorImpl<char> &result) {
+  wchar_t *path = nullptr;
+  if (::SHGetKnownFolderPath(folderId, KF_FLAG_CREATE, nullptr, &path) != S_OK)
     return false;
 
-  if (UTF16ToUTF8(Path, ::wcslen(Path), result))
-    return false;
-
-  return true;
+  bool ok = !UTF16ToUTF8(path, ::wcslen(path), result);
+  ::CoTaskMemFree(path);
+  return ok;
 }
 
-static bool getTempDirEnvVar(const char *Var, SmallVectorImpl<char> &Res) {
-  SmallVector<wchar_t, 128> NameUTF16;
-  if (windows::UTF8ToUTF16(Var, NameUTF16))
-    return false;
+bool getUserCacheDir(SmallVectorImpl<char> &Result) {
+  return getKnownFolderPath(FOLDERID_LocalAppData, Result);
+}
 
+bool home_directory(SmallVectorImpl<char> &result) {
+  return getKnownFolderPath(FOLDERID_Profile, result);
+}
+
+static bool getTempDirEnvVar(const wchar_t *Var, SmallVectorImpl<char> &Res) {
   SmallVector<wchar_t, 1024> Buf;
   size_t Size = 1024;
   do {
     Buf.reserve(Size);
-    Size =
-        GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity());
+    Size = GetEnvironmentVariableW(Var, Buf.data(), Buf.capacity());
     if (Size == 0)
       return false;
 
@@ -759,14 +786,12 @@ static bool getTempDirEnvVar(const char *Var, SmallVectorImpl<char> &Res) {
   } while (Size > Buf.capacity());
   Buf.set_size(Size);
 
-  if (windows::UTF16ToUTF8(Buf.data(), Size, Res))
-    return false;
-  return true;
+  return !windows::UTF16ToUTF8(Buf.data(), Size, Res);
 }
 
 static bool getTempDirEnvVar(SmallVectorImpl<char> &Res) {
-  const char *EnvironmentVariables[] = {"TMP", "TEMP", "USERPROFILE"};
-  for (const char *Env : EnvironmentVariables) {
+  const wchar_t *EnvironmentVariables[] = {L"TMP", L"TEMP", L"USERPROFILE"};
+  for (auto *Env : EnvironmentVariables) {
     if (getTempDirEnvVar(Env, Res))
       return true;
   }
@@ -777,13 +802,19 @@ void system_temp_directory(bool ErasedOnReboot, SmallVectorImpl<char> &Result) {
   (void)ErasedOnReboot;
   Result.clear();
 
-  // Check whether the temporary directory is specified by an environment
-  // variable.
-  if (getTempDirEnvVar(Result))
+  // Check whether the temporary directory is specified by an environment var.
+  // This matches GetTempPath logic to some degree. GetTempPath is not used
+  // directly as it cannot handle evn var longer than 130 chars on Windows 7
+  // (fixed on Windows 8).
+  if (getTempDirEnvVar(Result)) {
+    assert(!Result.empty() && "Unexpected empty path");
+    native(Result); // Some Unix-like shells use Unix path separator in $TMP.
+    fs::make_absolute(Result); // Make it absolute if not already.
     return;
+  }
 
   // Fall back to a system default.
-  const char *DefaultResult = "C:\\TEMP";
+  const char *DefaultResult = "C:\\Temp";
   Result.append(DefaultResult, DefaultResult + strlen(DefaultResult));
 }
 } // end namespace path
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index 8164956d1511..dae35a88132b 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -417,16 +417,23 @@ const char *Process::ResetColor() {
   return 0;
 }
 
+// Include GetLastError() in a fatal error message.
+static void ReportLastErrorFatal(const char *Msg) {
+  std::string ErrMsg;
+  MakeErrMsg(&ErrMsg, Msg);
+  report_fatal_error(ErrMsg);
+}
+
 unsigned Process::GetRandomNumber() {
   HCRYPTPROV HCPC;
   if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL,
                               CRYPT_VERIFYCONTEXT))
-    report_fatal_error("Could not acquire a cryptographic context");
+    ReportLastErrorFatal("Could not acquire a cryptographic context");
 
   ScopedCryptContext CryptoProvider(HCPC);
   unsigned Ret;
   if (!::CryptGenRandom(CryptoProvider, sizeof(Ret),
                         reinterpret_cast<BYTE *>(&Ret)))
-    report_fatal_error("Could not generate a random number");
+    ReportLastErrorFatal("Could not generate a random number");
   return Ret;
 }
diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc
index c29d8729b1de..d4e14ddc6518 100644
--- a/lib/Support/Windows/Program.inc
+++ b/lib/Support/Windows/Program.inc
@@ -75,8 +75,15 @@ ErrorOr<std::string> sys::findProgramByName(StringRef Name,
 
     do {
       U16Result.reserve(Len);
-      Len = ::SearchPathW(Path, c_str(U16Name),
-                          U16Ext.empty() ? nullptr : c_str(U16Ext),
+      // Lets attach the extension manually. That is needed for files
+      // with a point in name like aaa.bbb. SearchPathW will not add extension
+      // from its argument to such files because it thinks they already had one.
+      SmallVector<wchar_t, MAX_PATH> U16NameExt;
+      if (std::error_code EC =
+              windows::UTF8ToUTF16(Twine(Name + Ext).str(), U16NameExt))
+        return EC;
+
+      Len = ::SearchPathW(Path, c_str(U16NameExt), nullptr,
                           U16Result.capacity(), U16Result.data(), nullptr);
     } while (Len > U16Result.capacity());
 
@@ -132,7 +139,7 @@ static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) {
                   FILE_ATTRIBUTE_NORMAL, NULL);
   if (h == INVALID_HANDLE_VALUE) {
     MakeErrMsg(ErrMsg, fname + ": Can't open file for " +
-        (fd ? "input: " : "output: "));
+        (fd ? "input" : "output"));
   }
 
   return h;
@@ -251,6 +258,14 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
     return false;
   }
 
+  // can_execute may succeed by looking at Program + ".exe". CreateProcessW
+  // will implicitly add the .exe if we provide a command line without an
+  // executable path, but since we use an explicit executable, we have to add
+  // ".exe" ourselves.
+  SmallString<64> ProgramStorage;
+  if (!sys::fs::exists(Program))
+    Program = Twine(Program + ".exe").toStringRef(ProgramStorage);
+
   // Windows wants a command line, not an array of args, to pass to the new
   // process.  We have to concatenate them all, while quoting the args that
   // have embedded spaces (or are empty).
@@ -416,7 +431,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
     if (SecondsToWait) {
       if (!TerminateProcess(PI.ProcessHandle, 1)) {
         if (ErrMsg)
-          MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+          MakeErrMsg(ErrMsg, "Failed to terminate timed-out program");
 
         // -2 indicates a crash or timeout as opposed to failure to execute.
         WaitResult.ReturnCode = -2;
@@ -441,7 +456,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
   if (!rc) {
     SetLastError(err);
     if (ErrMsg)
-      MakeErrMsg(ErrMsg, "Failed getting status for program.");
+      MakeErrMsg(ErrMsg, "Failed getting status for program");
 
     // -2 indicates a crash or timeout as opposed to failure to execute.
     WaitResult.ReturnCode = -2;
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index 5c8c23978ac9..d109a66d7035 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -14,7 +14,6 @@
 #include <algorithm>
 #include <signal.h>
 #include <stdio.h>
-#include <vector>
 
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -136,6 +135,10 @@ typedef BOOL (WINAPI *fpSymGetLineFromAddr64)(HANDLE, DWORD64,
                       PDWORD, PIMAGEHLP_LINE64);
 static fpSymGetLineFromAddr64 fSymGetLineFromAddr64;
 
+typedef BOOL(WINAPI *fpSymGetModuleInfo64)(HANDLE hProcess, DWORD64 dwAddr,
+                                           PIMAGEHLP_MODULE64 ModuleInfo);
+static fpSymGetModuleInfo64 fSymGetModuleInfo64;
+
 typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64);
 static fpSymFunctionTableAccess64 fSymFunctionTableAccess64;
 
@@ -145,6 +148,9 @@ static fpSymSetOptions fSymSetOptions;
 typedef BOOL (WINAPI *fpSymInitialize)(HANDLE, PCSTR, BOOL);
 static fpSymInitialize fSymInitialize;
 
+typedef BOOL (WINAPI *fpEnumerateLoadedModules)(HANDLE,PENUMLOADED_MODULES_CALLBACK64,PVOID);
+static fpEnumerateLoadedModules fEnumerateLoadedModules;
+
 static bool load64BitDebugHelp(void) {
   HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll");
   if (hLib) {
@@ -156,14 +162,20 @@ static bool load64BitDebugHelp(void) {
                       ::GetProcAddress(hLib, "SymGetSymFromAddr64");
     fSymGetLineFromAddr64 = (fpSymGetLineFromAddr64)
                       ::GetProcAddress(hLib, "SymGetLineFromAddr64");
+    fSymGetModuleInfo64 = (fpSymGetModuleInfo64)
+                      ::GetProcAddress(hLib, "SymGetModuleInfo64");
     fSymFunctionTableAccess64 = (fpSymFunctionTableAccess64)
                      ::GetProcAddress(hLib, "SymFunctionTableAccess64");
     fSymSetOptions = (fpSymSetOptions)::GetProcAddress(hLib, "SymSetOptions");
     fSymInitialize = (fpSymInitialize)::GetProcAddress(hLib, "SymInitialize");
+    fEnumerateLoadedModules = (fpEnumerateLoadedModules)
+      ::GetProcAddress(hLib, "EnumerateLoadedModules64");
   }
   return fStackWalk64 && fSymInitialize && fSymSetOptions;
 }
 
+using namespace llvm;
+
 // Forward declare.
 static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
 static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
@@ -172,7 +184,6 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
 static void (*InterruptFunction)() = 0;
 
 static std::vector<std::string> *FilesToRemove = NULL;
-static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
 static bool RegisteredUnhandledExceptionFilter = false;
 static bool CleanupExecuted = false;
 static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
@@ -183,23 +194,106 @@ static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
 static CRITICAL_SECTION CriticalSection;
 static bool CriticalSectionInitialized = false;
 
+enum {
+#if defined(_M_X64)
+  NativeMachineType = IMAGE_FILE_MACHINE_AMD64
+#else
+  NativeMachineType = IMAGE_FILE_MACHINE_I386
+#endif
+};
+
+static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS,
+                                              HANDLE hProcess, HANDLE hThread,
+                                              STACKFRAME64 &StackFrameOrig,
+                                              CONTEXT *ContextOrig) {
+  // StackWalk64 modifies the incoming stack frame and context, so copy them.
+  STACKFRAME64 StackFrame = StackFrameOrig;
+
+  // Copy the register context so that we don't modify it while we unwind. We
+  // could use InitializeContext + CopyContext, but that's only required to get
+  // at AVX registers, which typically aren't needed by StackWalk64. Reduce the
+  // flag set to indicate that there's less data.
+  CONTEXT Context = *ContextOrig;
+  Context.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
+
+  static void *StackTrace[256];
+  size_t Depth = 0;
+  while (fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame,
+                      &Context, 0, fSymFunctionTableAccess64,
+                      fSymGetModuleBase64, 0)) {
+    if (StackFrame.AddrFrame.Offset == 0)
+      break;
+    StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset;
+    if (Depth >= array_lengthof(StackTrace))
+      break;
+  }
+
+  return printSymbolizedStackTrace(&StackTrace[0], Depth, OS);
+}
+
+namespace {
+struct FindModuleData {
+  void **StackTrace;
+  int Depth;
+  const char **Modules;
+  intptr_t *Offsets;
+  StringSaver *StrPool;
+};
+}
+
+static BOOL CALLBACK findModuleCallback(WIN32_ELMCB_PCSTR ModuleName,
+                                        DWORD64 ModuleBase, ULONG ModuleSize,
+                                        void *VoidData) {
+  FindModuleData *Data = (FindModuleData*)VoidData;
+  intptr_t Beg = ModuleBase;
+  intptr_t End = Beg + ModuleSize;
+  for (int I = 0; I < Data->Depth; I++) {
+    if (Data->Modules[I])
+      continue;
+    intptr_t Addr = (intptr_t)Data->StackTrace[I];
+    if (Beg <= Addr && Addr < End) {
+      Data->Modules[I] = Data->StrPool->save(ModuleName);
+      Data->Offsets[I] = Addr - Beg;
+    }
+  }
+  return TRUE;
+}
+
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+                                  const char **Modules, intptr_t *Offsets,
+                                  const char *MainExecutableName,
+                                  StringSaver &StrPool) {
+  if (!fEnumerateLoadedModules)
+    return false;
+  FindModuleData Data;
+  Data.StackTrace = StackTrace;
+  Data.Depth = Depth;
+  Data.Modules = Modules;
+  Data.Offsets = Offsets;
+  Data.StrPool = &StrPool;
+  fEnumerateLoadedModules(GetCurrentProcess(), findModuleCallback, &Data);
+  return true;
+}
+
 static void PrintStackTraceForThread(llvm::raw_ostream &OS, HANDLE hProcess,
                                      HANDLE hThread, STACKFRAME64 &StackFrame,
                                      CONTEXT *Context) {
-  DWORD machineType;
-#if defined(_M_X64)
-  machineType = IMAGE_FILE_MACHINE_AMD64;
-#else
-  machineType = IMAGE_FILE_MACHINE_I386;
-#endif
-
   // Initialize the symbol handler.
   fSymSetOptions(SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
   fSymInitialize(hProcess, NULL, TRUE);
 
+  // Try llvm-symbolizer first. llvm-symbolizer knows how to deal with both PDBs
+  // and DWARF, so it should do a good job regardless of what debug info or
+  // linker is in use.
+  if (printStackTraceWithLLVMSymbolizer(OS, hProcess, hThread, StackFrame,
+                                        Context)) {
+    return;
+  }
+
   while (true) {
-    if (!fStackWalk64(machineType, hProcess, hThread, &StackFrame, Context, 0,
-                      fSymFunctionTableAccess64, fSymGetModuleBase64, 0)) {
+    if (!fStackWalk64(NativeMachineType, hProcess, hThread, &StackFrame,
+                      Context, 0, fSymFunctionTableAccess64,
+                      fSymGetModuleBase64, 0)) {
       break;
     }
 
@@ -311,10 +405,7 @@ static void RegisterHandler() {
   // If we cannot load up the APIs (which would be unexpected as they should
   // exist on every version of Windows we support), we will bail out since
   // there would be nothing to report.
-  if (!load64BitDebugHelp()) {
-    assert(false && "These APIs should always be available");
-    return;
-  }
+  assert(load64BitDebugHelp() && "These APIs should always be available");
 
   if (RegisteredUnhandledExceptionFilter) {
     EnterCriticalSection(&CriticalSection);
@@ -404,7 +495,6 @@ extern "C" VOID WINAPI RtlCaptureContext(PCONTEXT ContextRecord);
 #endif
 
 void llvm::sys::PrintStackTrace(raw_ostream &OS) {
-
   STACKFRAME64 StackFrame = {};
   CONTEXT Context = {};
   ::RtlCaptureContext(&Context);
@@ -436,8 +526,6 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
 /// to the process.  The handler can have a cookie passed to it to identify
 /// what instance of the handler it is.
 void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
-  if (CallBacksToRun == 0)
-    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
   CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
   RegisterHandler();
   LeaveCriticalSection(&CriticalSection);
@@ -454,17 +542,12 @@ static void Cleanup() {
   CleanupExecuted = true;
 
   // FIXME: open files cannot be deleted.
-
   if (FilesToRemove != NULL)
     while (!FilesToRemove->empty()) {
       llvm::sys::fs::remove(FilesToRemove->back());
       FilesToRemove->pop_back();
     }
-
-  if (CallBacksToRun)
-    for (auto &I : *CallBacksToRun)
-      I.first(I.second);
-
+  llvm::sys::RunSignalHandlers();
   LeaveCriticalSection(&CriticalSection);
 }
 
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index 5bb0b8d2d788..34d961b148d1 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -26,12 +26,13 @@
 #undef _WIN32_WINNT
 #undef _WIN32_IE
 
-// Require at least Windows XP(5.1) API.
-#define _WIN32_WINNT 0x0501
-#define _WIN32_IE    0x0600 // MinGW at it again.
+// Require at least Windows 7 API.
+#define _WIN32_WINNT 0x0601
+#define _WIN32_IE    0x0800 // MinGW at it again. FIXME: verify if still needed.
 #define WIN32_LEAN_AND_MEAN
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h" // Get build system configuration settings
@@ -47,13 +48,16 @@ inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
   if (!ErrMsg)
     return true;
   char *buffer = NULL;
+  DWORD LastError = GetLastError();
   DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
-                          FORMAT_MESSAGE_FROM_SYSTEM,
-                          NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+                          FORMAT_MESSAGE_FROM_SYSTEM |
+                          FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                          NULL, LastError, 0, (LPSTR)&buffer, 1, NULL);
   if (R)
-    *ErrMsg = prefix + buffer;
+    *ErrMsg = prefix + ": " + buffer;
   else
-    *ErrMsg = prefix + "Unknown error";
+    *ErrMsg = prefix + ": Unknown error";
+  *ErrMsg += " (0x" + llvm::utohexstr(LastError) + ")";
 
   LocalFree(buffer);
   return R != 0;
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index d55da5ef1e4a..c4384cafff62 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -801,7 +801,7 @@ Token &Scanner::peekNext() {
 
     removeStaleSimpleKeyCandidates();
     SimpleKey SK;
-    SK.Tok = TokenQueue.front();
+    SK.Tok = TokenQueue.begin();
     if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
         == SimpleKeys.end())
       break;
@@ -962,10 +962,8 @@ void Scanner::skip(uint32_t Distance) {
 bool Scanner::isBlankOrBreak(StringRef::iterator Position) {
   if (Position == End)
     return false;
-  if (   *Position == ' ' || *Position == '\t'
-      || *Position == '\r' || *Position == '\n')
-    return true;
-  return false;
+  return *Position == ' ' || *Position == '\t' || *Position == '\r' ||
+         *Position == '\n';
 }
 
 bool Scanner::consumeLineBreakIfPresent() {
@@ -1163,7 +1161,7 @@ bool Scanner::scanFlowCollectionStart(bool IsSequence) {
   TokenQueue.push_back(T);
 
   // [ and { may begin a simple key.
-  saveSimpleKeyCandidate(TokenQueue.back(), Column - 1, false);
+  saveSimpleKeyCandidate(--TokenQueue.end(), Column - 1, false);
 
   // And may also be followed by a simple key.
   IsSimpleKeyAllowed = true;
@@ -1326,7 +1324,7 @@ bool Scanner::scanFlowScalar(bool IsDoubleQuoted) {
   T.Range = StringRef(Start, Current - Start);
   TokenQueue.push_back(T);
 
-  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
 
@@ -1404,7 +1402,7 @@ bool Scanner::scanPlainScalar() {
   TokenQueue.push_back(T);
 
   // Plain scalars can be simple keys.
-  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
 
@@ -1439,7 +1437,7 @@ bool Scanner::scanAliasOrAnchor(bool IsAlias) {
   TokenQueue.push_back(T);
 
   // Alias and anchors can be simple keys.
-  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
 
@@ -1669,7 +1667,7 @@ bool Scanner::scanTag() {
   TokenQueue.push_back(T);
 
   // Tags can be simple keys.
-  saveSimpleKeyCandidate(TokenQueue.back(), ColStart, false);
+  saveSimpleKeyCandidate(--TokenQueue.end(), ColStart, false);
 
   IsSimpleKeyAllowed = false;
 
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 6b59a16514b1..2aa6e9b74683 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -332,17 +332,12 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
     StringRef KeyStr = SN->getValue(StringStorage);
     if (!StringStorage.empty()) {
       // Copy string to permanent storage
-      unsigned Len = StringStorage.size();
-      char *Buf = StringAllocator.Allocate<char>(Len);
-      memcpy(Buf, &StringStorage[0], Len);
-      KeyStr = StringRef(Buf, Len);
+      KeyStr = StringStorage.str().copy(StringAllocator);
     }
     return llvm::make_unique<ScalarHNode>(N, KeyStr);
   } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
-    StringRef Value = BSN->getValue();
-    char *Buf = StringAllocator.Allocate<char>(Value.size());
-    memcpy(Buf, Value.data(), Value.size());
-    return llvm::make_unique<ScalarHNode>(N, StringRef(Buf, Value.size()));
+    StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
+    return llvm::make_unique<ScalarHNode>(N, ValueCopy);
   } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
     auto SQHNode = llvm::make_unique<SequenceHNode>(N);
     for (Node &SN : *SQ) {
@@ -365,10 +360,7 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
       StringRef KeyStr = KeyScalar->getValue(StringStorage);
       if (!StringStorage.empty()) {
         // Copy string to permanent storage
-        unsigned Len = StringStorage.size();
-        char *Buf = StringAllocator.Allocate<char>(Len);
-        memcpy(Buf, &StringStorage[0], Len);
-        KeyStr = StringRef(Buf, Len);
+        KeyStr = StringStorage.str().copy(StringAllocator);
       }
       auto ValueHNode = this->createHNodes(KVN.getValue());
       if (EC)
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 42f830bbf0fa..57c7ac32f559 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -517,7 +517,7 @@ raw_fd_ostream::raw_fd_ostream(StringRef Filename, std::error_code &EC,
 /// closes the file when the stream is destroyed.
 raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
     : raw_pwrite_stream(unbuffered), FD(fd), ShouldClose(shouldClose),
-      Error(false), UseAtomicWrites(false) {
+      Error(false) {
   if (FD < 0 ) {
     ShouldClose = false;
     return;
@@ -568,21 +568,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   pos += Size;
 
   do {
-    ssize_t ret;
-
-    // Check whether we should attempt to use atomic writes.
-    if (LLVM_LIKELY(!UseAtomicWrites)) {
-      ret = ::write(FD, Ptr, Size);
-    } else {
-      // Use ::writev() where available.
-#if defined(HAVE_WRITEV)
-      const void *Addr = static_cast<const void *>(Ptr);
-      struct iovec IOV = {const_cast<void *>(Addr), Size };
-      ret = ::writev(FD, &IOV, 1);
-#else
-      ret = ::write(FD, Ptr, Size);
-#endif
-    }
+    ssize_t ret = ::write(FD, Ptr, Size);
 
     if (ret < 0) {
       // If it's a recoverable error, swallow it and retry the write.
@@ -755,72 +741,15 @@ void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
 //  raw_svector_ostream
 //===----------------------------------------------------------------------===//
 
-// The raw_svector_ostream implementation uses the SmallVector itself as the
-// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
-// always pointing past the end of the vector, but within the vector
-// capacity. This allows raw_ostream to write directly into the correct place,
-// and we only need to set the vector size when the data is flushed.
+uint64_t raw_svector_ostream::current_pos() const { return OS.size(); }
 
-raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O, unsigned)
-    : OS(O) {}
-
-raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
-  init();
-}
-
-void raw_svector_ostream::init() {
-  // Set up the initial external buffer. We make sure that the buffer has at
-  // least 128 bytes free; raw_ostream itself only requires 64, but we want to
-  // make sure that we don't grow the buffer unnecessarily on destruction (when
-  // the data is flushed). See the FIXME below.
-  OS.reserve(OS.size() + 128);
-  SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-raw_svector_ostream::~raw_svector_ostream() {
-  // FIXME: Prevent resizing during this flush().
-  flush();
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.append(Ptr, Ptr + Size);
 }
 
 void raw_svector_ostream::pwrite_impl(const char *Ptr, size_t Size,
                                       uint64_t Offset) {
-  flush();
-  memcpy(OS.begin() + Offset, Ptr, Size);
-}
-
-/// resync - This is called when the SmallVector we're appending to is changed
-/// outside of the raw_svector_ostream's control.  It is only safe to do this
-/// if the raw_svector_ostream has previously been flushed.
-void raw_svector_ostream::resync() {
-  assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
-
-  if (OS.capacity() - OS.size() < 64)
-    OS.reserve(OS.capacity() * 2);
-  SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
-  if (Ptr == OS.end()) {
-    // Grow the buffer to include the scratch area without copying.
-    size_t NewSize = OS.size() + Size;
-    assert(NewSize <= OS.capacity() && "Invalid write_impl() call!");
-    OS.set_size(NewSize);
-  } else {
-    assert(!GetNumBytesInBuffer());
-    OS.append(Ptr, Ptr + Size);
-  }
-
-  OS.reserve(OS.size() + 64);
-  SetBuffer(OS.end(), OS.capacity() - OS.size());
-}
-
-uint64_t raw_svector_ostream::current_pos() const {
-  return OS.size();
-}
-
-StringRef raw_svector_ostream::str() {
-  flush();
-  return StringRef(OS.begin(), OS.size());
+  memcpy(OS.data() + Offset, Ptr, Size);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index c9a31b64cfd3..87a3422b32ab 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -673,6 +673,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
         PrintFatalError(CurRec->getLoc(),
                         "Undefined reference:'" + Name + "'\n");
       }
+
+      if (isa<IntRecTy>(getType())) {
+        if (BitsInit *BI = dyn_cast<BitsInit>(LHS)) {
+          if (Init *NewInit = BI->convertInitializerTo(IntRecTy::get()))
+            return NewInit;
+          break;
+        }
+      }
     }
     break;
   }
@@ -1633,7 +1641,7 @@ void Record::dump() const { errs() << *this; }
 raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
   OS << R.getNameInitAsString();
 
-  const std::vector<Init *> &TArgs = R.getTemplateArgs();
+  ArrayRef<Init *> TArgs = R.getTemplateArgs();
   if (!TArgs.empty()) {
     OS << "<";
     bool NeedComma = false;
diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp
index 07c538159dcb..f56b17acbfba 100644
--- a/lib/TableGen/SetTheory.cpp
+++ b/lib/TableGen/SetTheory.cpp
@@ -196,7 +196,7 @@ struct SequenceOp : public SetTheory::Operator {
     if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2]))
       To = II->getValue();
     else
-      PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString());
+      PrintFatalError(Loc, "To must be an integer: " + Expr->getAsString());
     if (To < 0 || To >= (1 << 30))
       PrintFatalError(Loc, "To out of range");
 
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 5c36fda2e1ca..e5f6f165d13f 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -152,7 +152,7 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
     if (AddValue(CurRec, SubClass.RefRange.Start, Val))
       return true;
 
-  const std::vector<Init *> &TArgs = SC->getTemplateArgs();
+  ArrayRef<Init *> TArgs = SC->getTemplateArgs();
 
   // Ensure that an appropriate number of template arguments are specified.
   if (TArgs.size() < SubClass.TemplateArgs.size())
@@ -228,7 +228,7 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
     CurMC->DefPrototypes.push_back(std::move(NewDef));
   }
 
-  const std::vector<Init *> &SMCTArgs = SMC->Rec.getTemplateArgs();
+  ArrayRef<Init *> SMCTArgs = SMC->Rec.getTemplateArgs();
 
   // Ensure that an appropriate number of template arguments are
   // specified.
@@ -1641,7 +1641,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
   RecTy *ItemType = EltTy;
   unsigned int ArgN = 0;
   if (ArgsRec && !EltTy) {
-    const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+    ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
     if (TArgs.empty()) {
       TokError("template argument provided to non-template class");
       return std::vector<Init*>();
@@ -1662,7 +1662,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
     Lex.Lex();  // Eat the comma
 
     if (ArgsRec && !EltTy) {
-      const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
+      ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
       if (ArgN >= TArgs.size()) {
         TokError("too many template arguments");
         return std::vector<Init*>();
@@ -2313,13 +2313,11 @@ bool TGParser::ParseMultiClass() {
   return false;
 }
 
-Record *TGParser::
-InstantiateMulticlassDef(MultiClass &MC,
-                         Record *DefProto,
-                         Init *&DefmPrefix,
-                         SMRange DefmPrefixRange,
-                         const std::vector<Init *> &TArgs,
-                         std::vector<Init *> &TemplateVals) {
+Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
+                                           Init *&DefmPrefix,
+                                           SMRange DefmPrefixRange,
+                                           ArrayRef<Init *> TArgs,
+                                           std::vector<Init *> &TemplateVals) {
   // We need to preserve DefProto so it can be reused for later
   // instantiations, so create a new Record to inherit from it.
 
@@ -2437,11 +2435,9 @@ InstantiateMulticlassDef(MultiClass &MC,
   return CurRec.release();
 }
 
-bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC,
-                                        Record *CurRec,
-                                        SMLoc DefmPrefixLoc,
-                                        SMLoc SubClassLoc,
-                                        const std::vector<Init *> &TArgs,
+bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
+                                        SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
+                                        ArrayRef<Init *> TArgs,
                                         std::vector<Init *> &TemplateVals,
                                         bool DeleteArgs) {
   // Loop over all of the template arguments, setting them to the specified
@@ -2540,7 +2536,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
 
     // Verify that the correct number of template arguments were specified.
-    const std::vector<Init *> &TArgs = MC->Rec.getTemplateArgs();
+    ArrayRef<Init *> TArgs = MC->Rec.getTemplateArgs();
     if (TArgs.size() < TemplateVals.size())
       return Error(SubClassLoc,
                    "more template args specified than multiclass expects");
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index d69d1f4572f9..8b41134d4ff1 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -135,17 +135,13 @@ private:  // Parser methods.
   bool ParseObject(MultiClass *MC);
   bool ParseClass();
   bool ParseMultiClass();
-  Record *InstantiateMulticlassDef(MultiClass &MC,
-                                   Record *DefProto,
-                                   Init *&DefmPrefix,
-                                   SMRange DefmPrefixRange,
-                                   const std::vector<Init *> &TArgs,
+  Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
+                                   Init *&DefmPrefix, SMRange DefmPrefixRange,
+                                   ArrayRef<Init *> TArgs,
                                    std::vector<Init *> &TemplateVals);
-  bool ResolveMulticlassDefArgs(MultiClass &MC,
-                                Record *DefProto,
-                                SMLoc DefmPrefixLoc,
-                                SMLoc SubClassLoc,
-                                const std::vector<Init *> &TArgs,
+  bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto,
+                                SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
+                                ArrayRef<Init *> TArgs,
                                 std::vector<Init *> &TemplateVals,
                                 bool DeleteArgs);
   bool ResolveMulticlassDef(MultiClass &MC,
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index 9a7d6c884db5..0bff9b592c15 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -32,6 +32,15 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
 def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
   "Enable ARMv8 CRC-32 checksum instructions">;
 
+def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
+  "Enable ARMv8 PMUv3 Performance Monitors extension">;
+
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+  "Full FP16", [FeatureFPARMv8]>;
+
+def FeatureSPE : SubtargetFeature<"spe", "HasSPE", "true",
+  "Enable Statistical Profiling extension">;
+
 /// Cyclone has register move instructions which are "free".
 def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
                                         "Has zero-cycle register moves">;
@@ -40,6 +49,15 @@ def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
 def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
                                         "Has zero-cycle zeroing instructions">;
 
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+                                          "StrictAlign", "true",
+                                          "Disallow all unaligned memory "
+                                          "access">;
+
+def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
+                                         "Reserve X18, making it unavailable "
+                                         "as a GPR">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
@@ -47,6 +65,9 @@ def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
   "Support ARM v8.1a instructions", [FeatureCRC]>;
 
+def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+  "Support ARM v8.2a instructions", [HasV8_1aOps]>;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -70,19 +91,29 @@ include "AArch64SchedA53.td"
 include "AArch64SchedA57.td"
 include "AArch64SchedCyclone.td"
 
+def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+                                   "Cortex-A35 ARM processors",
+                                   [FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeatureCrypto,
+                                   FeatureCRC,
+                                   FeaturePerfMon]>;
+
 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
                                    "Cortex-A53 ARM processors",
                                    [FeatureFPARMv8,
                                    FeatureNEON,
                                    FeatureCrypto,
-                                   FeatureCRC]>;
+                                   FeatureCRC,
+                                   FeaturePerfMon]>;
 
 def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    "Cortex-A57 ARM processors",
                                    [FeatureFPARMv8,
                                    FeatureNEON,
                                    FeatureCrypto,
-                                   FeatureCRC]>;
+                                   FeatureCRC,
+                                   FeaturePerfMon]>;
 
 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    "Cyclone",
@@ -90,12 +121,16 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    FeatureNEON,
                                    FeatureCrypto,
                                    FeatureCRC,
+                                   FeaturePerfMon,
                                    FeatureZCRegMove, FeatureZCZeroing]>;
 
 def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
                                               FeatureNEON,
-                                              FeatureCRC]>;
+                                              FeatureCRC,
+                                              FeaturePerfMon]>;
 
+// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
+def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
 def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>;
 // FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
@@ -109,11 +144,13 @@ def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
 def GenericAsmParserVariant : AsmParserVariant {
   int Variant = 0;
   string Name = "generic";
+  string BreakCharacters = ".";
 }
 
 def AppleAsmParserVariant : AsmParserVariant {
   int Variant = 1;
   string Name = "apple-neon";
+  string BreakCharacters = ".";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/AArch64/AArch64A53Fix835769.cpp b/lib/Target/AArch64/AArch64A53Fix835769.cpp
index d7ef3f4ef653..d215d9e831c0 100644
--- a/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -122,7 +122,7 @@ AArch64A53Fix835769::runOnMachineFunction(MachineFunction &F) {
 static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
                                              const TargetInstrInfo *TII) {
   // Get the previous machine basic block in the function.
-  MachineFunction::iterator MBBI = *MBB;
+  MachineFunction::iterator MBBI(MBB);
 
   // Can't go off top of function.
   if (MBBI == MBB->getParent()->begin())
@@ -131,7 +131,7 @@ static MachineBasicBlock *getBBFallenThrough(MachineBasicBlock *MBB,
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 2> Cond;
 
-  MachineBasicBlock *PrevBB = std::prev(MBBI);
+  MachineBasicBlock *PrevBB = &*std::prev(MBBI);
   for (MachineBasicBlock *S : MBB->predecessors())
     if (S == PrevBB && !TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond) &&
         !TBB && !FBB)
@@ -151,10 +151,9 @@ static MachineInstr *getLastNonPseudo(MachineBasicBlock &MBB,
   // If there is no non-pseudo in the current block, loop back around and try
   // the previous block (if there is one).
   while ((FMBB = getBBFallenThrough(FMBB, TII))) {
-    for (auto I = FMBB->rbegin(), E = FMBB->rend(); I != E; ++I) {
-      if (!I->isPseudo())
-        return &*I;
-    }
+    for (MachineInstr &I : make_range(FMBB->rbegin(), FMBB->rend()))
+      if (!I.isPseudo())
+        return &I;
   }
 
   // There was no previous non-pseudo in the fallen through blocks
@@ -217,8 +216,8 @@ AArch64A53Fix835769::runOnBasicBlock(MachineBasicBlock &MBB) {
     ++Idx;
   }
 
-  DEBUG(dbgs() << "Scan complete, "<< Sequences.size()
-               << " occurences of pattern found.\n");
+  DEBUG(dbgs() << "Scan complete, " << Sequences.size()
+               << " occurrences of pattern found.\n");
 
   // Then update the basic block, inserting nops between the detected sequences.
   for (auto &MI : Sequences) {
diff --git a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 9d6dbd641a16..79a84ad8c6c5 100644
--- a/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -593,7 +593,6 @@ bool AArch64A57FPLoadBalancing::colorChain(Chain *G, Color C,
       if (Change) {
         Substs[MO.getReg()] = Reg;
         MO.setReg(Reg);
-        MRI->setPhysRegUsed(Reg);
 
         Changed = true;
       }
diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 716e1a37b1f7..3afcdfb8b930 100644
--- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -57,6 +57,8 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
                      " the other."),
             cl::init(true));
 
+#define AARCH64_TYPE_PROMO_NAME "AArch64 Address Type Promotion"
+
 //===----------------------------------------------------------------------===//
 //                       AArch64AddressTypePromotion
 //===----------------------------------------------------------------------===//
@@ -76,7 +78,7 @@ public:
   }
 
   const char *getPassName() const override {
-    return "AArch64 Address Type Promotion";
+    return AARCH64_TYPE_PROMO_NAME;
   }
 
   /// Iterate over the functions and promote the computation of interesting
@@ -143,10 +145,10 @@ private:
 char AArch64AddressTypePromotion::ID = 0;
 
 INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
-                      "AArch64 Type Promotion Pass", false, false)
+                      AARCH64_TYPE_PROMO_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
-                    "AArch64 Type Promotion Pass", false, false)
+                    AARCH64_TYPE_PROMO_NAME, false, false)
 
 FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
   return new AArch64AddressTypePromotion();
diff --git a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 18d21fd38618..1644d71d2821 100644
--- a/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -61,6 +61,12 @@ STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
 STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
 STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
 
+namespace llvm {
+void initializeAArch64AdvSIMDScalarPass(PassRegistry &);
+}
+
+#define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization"
+
 namespace {
 class AArch64AdvSIMDScalar : public MachineFunctionPass {
   MachineRegisterInfo *MRI;
@@ -82,12 +88,14 @@ private:
 
 public:
   static char ID; // Pass identification, replacement for typeid.
-  explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
+  explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {
+    initializeAArch64AdvSIMDScalarPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
   const char *getPassName() const override {
-    return "AdvSIMD Scalar Operation Optimization";
+    return AARCH64_ADVSIMD_NAME;
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -98,6 +106,9 @@ public:
 char AArch64AdvSIMDScalar::ID = 0;
 } // end anonymous namespace
 
+INITIALIZE_PASS(AArch64AdvSIMDScalar, "aarch64-simd-scalar",
+                AARCH64_ADVSIMD_NAME, false, false)
+
 static bool isGPR64(unsigned Reg, unsigned SubReg,
                     const MachineRegisterInfo *MRI) {
   if (SubReg)
@@ -381,7 +392,7 @@ bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
 
   // Just check things on a one-block-at-a-time basis.
   for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
-    if (processMachineBasicBlock(I))
+    if (processMachineBasicBlock(&*I))
       Changed = true;
   return Changed;
 }
diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index d973234dd86a..a614f555a4e9 100644
--- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -45,6 +45,12 @@ BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
 STATISTIC(NumSplit, "Number of basic blocks split");
 STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
 
+namespace llvm {
+void initializeAArch64BranchRelaxationPass(PassRegistry &);
+}
+
+#define AARCH64_BR_RELAX_NAME "AArch64 branch relaxation pass"
+
 namespace {
 class AArch64BranchRelaxation : public MachineFunctionPass {
   /// BasicBlockInfo - Information about the offset and size of a single
@@ -93,17 +99,22 @@ class AArch64BranchRelaxation : public MachineFunctionPass {
 
 public:
   static char ID;
-  AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
+  AArch64BranchRelaxation() : MachineFunctionPass(ID) {
+    initializeAArch64BranchRelaxationPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "AArch64 branch relaxation pass";
+    return AARCH64_BR_RELAX_NAME;
   }
 };
 char AArch64BranchRelaxation::ID = 0;
 }
 
+INITIALIZE_PASS(AArch64BranchRelaxation, "aarch64-branch-relax",
+                AARCH64_BR_RELAX_NAME, false, false)
+
 /// verify - check BBOffsets, BBSizes, alignment of islands
 void AArch64BranchRelaxation::verify() {
 #ifndef NDEBUG
@@ -131,14 +142,14 @@ void AArch64BranchRelaxation::dumpBBs() {
 /// into the block immediately after it.
 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
   // Get the next machine basic block in the function.
-  MachineFunction::iterator MBBI = MBB;
+  MachineFunction::iterator MBBI(MBB);
   // Can't fall off end of function.
-  MachineBasicBlock *NextBB = std::next(MBBI);
+  auto NextBB = std::next(MBBI);
   if (NextBB == MBB->getParent()->end())
     return false;
 
   for (MachineBasicBlock *S : MBB->successors())
-    if (S == NextBB)
+    if (S == &*NextBB)
       return true;
 
   return false;
@@ -216,9 +227,7 @@ AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
       MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
-  MachineFunction::iterator MBBI = OrigBB;
-  ++MBBI;
-  MF->insert(MBBI, NewBB);
+  MF->insert(++OrigBB->getIterator(), NewBB);
 
   // Splice the instructions starting with MI over to NewBB.
   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
@@ -421,7 +430,7 @@ bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
     MBB->replaceSuccessor(FBB, NewBB);
     NewBB->addSuccessor(FBB);
   }
-  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *NextBB = &*std::next(MachineFunction::iterator(MBB));
 
   DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                << ", invert condition and change dest. to BB#"
diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h
index 1e2d1c3b93bd..bc44bc5f2461 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/lib/Target/AArch64/AArch64CallingConvention.h
@@ -25,30 +25,28 @@
 namespace {
 using namespace llvm;
 
-static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
-                                    AArch64::X3, AArch64::X4, AArch64::X5,
-                                    AArch64::X6, AArch64::X7};
-static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
-                                    AArch64::H3, AArch64::H4, AArch64::H5,
-                                    AArch64::H6, AArch64::H7};
-static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
-                                    AArch64::S3, AArch64::S4, AArch64::S5,
-                                    AArch64::S6, AArch64::S7};
-static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
-                                    AArch64::D3, AArch64::D4, AArch64::D5,
-                                    AArch64::D6, AArch64::D7};
-static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
-                                    AArch64::Q3, AArch64::Q4, AArch64::Q5,
-                                    AArch64::Q6, AArch64::Q7};
+static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+                                     AArch64::X3, AArch64::X4, AArch64::X5,
+                                     AArch64::X6, AArch64::X7};
+static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2,
+                                     AArch64::H3, AArch64::H4, AArch64::H5,
+                                     AArch64::H6, AArch64::H7};
+static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2,
+                                     AArch64::S3, AArch64::S4, AArch64::S5,
+                                     AArch64::S6, AArch64::S7};
+static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2,
+                                     AArch64::D3, AArch64::D4, AArch64::D5,
+                                     AArch64::D6, AArch64::D7};
+static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+                                     AArch64::Q3, AArch64::Q4, AArch64::Q5,
+                                     AArch64::Q6, AArch64::Q7};
 
 static bool finishStackBlock(SmallVectorImpl<CCValAssign> &PendingMembers,
                              MVT LocVT, ISD::ArgFlagsTy &ArgFlags,
                              CCState &State, unsigned SlotAlign) {
   unsigned Size = LocVT.getSizeInBits() / 8;
-  unsigned StackAlign = State.getMachineFunction()
-                            .getTarget()
-                            .getDataLayout()
-                            ->getStackAlignment();
+  unsigned StackAlign =
+      State.getMachineFunction().getDataLayout().getStackAlignment();
   unsigned Align = std::min(ArgFlags.getOrigAlign(), StackAlign);
 
   for (auto &It : PendingMembers) {
@@ -88,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                     ISD::ArgFlagsTy &ArgFlags, CCState &State) {
   // Try to allocate a contiguous block of registers, each of the correct
   // size to hold one member.
-  ArrayRef<uint16_t> RegList;
+  ArrayRef<MCPhysReg> RegList;
   if (LocVT.SimpleTy == MVT::i64)
     RegList = XRegList;
   else if (LocVT.SimpleTy == MVT::f16)
diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 815ebef177d8..388d64ec4e99 100644
--- a/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -16,7 +16,7 @@ class CCIfAlign<string Align, CCAction A> :
   CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
 /// CCIfBigEndian - Match only if we're in big endian mode.
 class CCIfBigEndian<CCAction A> :
-  CCIf<"State.getMachineFunction().getTarget().getDataLayout()->isBigEndian()", A>;
+  CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
 
 //===----------------------------------------------------------------------===//
 // ARM AAPCS64 Calling Convention
@@ -279,6 +279,23 @@ def CSR_AArch64_TLS_Darwin
                            FP,
                            (sequence "Q%u", 0, 31))>;
 
+// We can only handle a register pair with adjacent registers, the register pair
+// should belong to the same class as well. Since the access function on the
+// fast path calls a function that follows CSR_AArch64_TLS_Darwin,
+// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin.
+def CSR_AArch64_CXX_TLS_Darwin
+    : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
+                           (sub (sequence "X%u", 1, 28), X15, X16, X17, X18),
+                           (sequence "D%u", 0, 31))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_AArch64_CXX_TLS_Darwin_PE
+    : CalleeSavedRegs<(add LR, FP)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_AArch64_CXX_TLS_Darwin_ViaCopy
+    : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>;
+
 // The ELF stub used for TLS-descriptor access saves every feasible
 // register. Only X0 and LR are clobbered.
 def CSR_AArch64_TLS_ELF
diff --git a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 06ff9af37fd7..9310ac4a44a2 100644
--- a/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -117,10 +117,10 @@ struct LDTLSCleanup : public MachineFunctionPass {
     *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
 
     // Insert a copy from X0 to TLSBaseAddrReg for later.
-    MachineInstr *Next = I->getNextNode();
-    MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
-                                 TII->get(TargetOpcode::COPY),
-                                 *TLSBaseAddrReg).addReg(AArch64::X0);
+    MachineInstr *Copy =
+        BuildMI(*I->getParent(), ++I->getIterator(), I->getDebugLoc(),
+                TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
+            .addReg(AArch64::X0);
 
     return Copy;
   }
diff --git a/lib/Target/AArch64/AArch64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index efdb2e33a36e..78c239b11ef3 100644
--- a/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -168,6 +168,8 @@ namespace llvm {
 void initializeAArch64CollectLOHPass(PassRegistry &);
 }
 
+#define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
+
 namespace {
 struct AArch64CollectLOH : public MachineFunctionPass {
   static char ID;
@@ -178,7 +180,7 @@ struct AArch64CollectLOH : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "AArch64 Collect Linker Optimization Hint (LOH)";
+    return AARCH64_COLLECT_LOH_NAME;
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -220,12 +222,10 @@ typedef SmallVector<unsigned, 32> MapIdToReg;
 char AArch64CollectLOH::ID = 0;
 
 INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
-                      "AArch64 Collect Linker Optimization Hint (LOH)", false,
-                      false)
+                      AARCH64_COLLECT_LOH_NAME, false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
-                    "AArch64 Collect Linker Optimization Hint (LOH)", false,
-                    false)
+                    AARCH64_COLLECT_LOH_NAME, false, false)
 
 /// Given a couple (MBB, reg) get the corresponding set of instruction from
 /// the given "sets".
@@ -353,9 +353,17 @@ static void initReachingDef(const MachineFunction &MF,
 
         for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
           MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
-          assert(ItRegId != RegToId.end() &&
-                 "Sub-register of an "
-                 "involved register, not recorded as involved!");
+          // If this alias has not been recorded, then it is not interesting
+          // for the current analysis.
+          // We can end up in this situation because of tuple registers.
+          // E.g., Let say we are interested in S1. When we register
+          // S1, we will also register its aliases and in particular
+          // the tuple Q1_Q2.
+          // Now, when we encounter Q1_Q2, we will look through its aliases
+          // and will find that S2 is not registered.
+          if (ItRegId == RegToId.end())
+            continue;
+
           BBKillSet.set(ItRegId->second);
           BBGen[ItRegId->second] = &MI;
         }
@@ -523,6 +531,8 @@ static bool isCandidateStore(const MachineInstr *Instr) {
   switch (Instr->getOpcode()) {
   default:
     return false;
+  case AArch64::STRBBui:
+  case AArch64::STRHHui:
   case AArch64::STRBui:
   case AArch64::STRHui:
   case AArch64::STRWui:
@@ -884,7 +894,8 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
     bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
     // If the chain is three instructions long and ldr is the second element,
     // then this ldr must load form GOT, otherwise this is not a correct chain.
-    if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
+    if (L2 && !IsL2Add &&
+        !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
       continue;
     SmallVector<const MachineInstr *, 3> Args;
     MCLOHType Kind;
@@ -1000,7 +1011,8 @@ static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
   DEBUG(dbgs() << "** Collect Involved Register\n");
   for (const auto &MBB : MF) {
     for (const MachineInstr &MI : MBB) {
-      if (!canDefBePartOfLOH(&MI))
+      if (!canDefBePartOfLOH(&MI) &&
+          !isCandidateLoad(&MI) && !isCandidateStore(&MI))
         continue;
 
       // Process defs
diff --git a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index b9e41c61defe..fc27bfee73d1 100644
--- a/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -59,6 +59,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -153,13 +154,20 @@ MachineInstr *AArch64ConditionOptimizer::findSuitableCompare(
     case AArch64::SUBSXri:
     // cmn is an alias for adds with a dead destination register.
     case AArch64::ADDSWri:
-    case AArch64::ADDSXri:
-      if (MRI->use_empty(I->getOperand(0).getReg()))
-        return I;
-
-      DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
-      return nullptr;
-
+    case AArch64::ADDSXri: {
+      unsigned ShiftAmt = AArch64_AM::getShiftValue(I->getOperand(3).getImm());
+      if (!I->getOperand(2).isImm()) {
+        DEBUG(dbgs() << "Immediate of cmp is symbolic, " << *I << '\n');
+        return nullptr;
+      } else if (I->getOperand(2).getImm() << ShiftAmt >= 0xfff) {
+        DEBUG(dbgs() << "Immediate of cmp may be out of range, " << *I << '\n');
+        return nullptr;
+      } else if (!MRI->use_empty(I->getOperand(0).getReg())) {
+        DEBUG(dbgs() << "Destination of cmp is not dead, " << *I << '\n');
+        return nullptr;
+      }
+      return I;
+    }
     // Prevent false positive case like:
     // cmp      w19, #0
     // cinc     w0, w19, gt
diff --git a/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 2b0c92fe02d5..df1320fbd4c9 100644
--- a/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -353,7 +353,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
     MIOperands::PhysRegInfo PRI =
         MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);
 
-    if (PRI.Reads) {
+    if (PRI.Read) {
       // The ccmp doesn't produce exactly the same flags as the original
       // compare, so reject the transform if there are uses of the flags
       // besides the terminators.
@@ -362,7 +362,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
       return nullptr;
     }
 
-    if (PRI.Clobbers) {
+    if (PRI.Defined || PRI.Clobbered) {
       DEBUG(dbgs() << "Not convertible compare: " << *I);
       ++NumUnknNZCVDefs;
       return nullptr;
@@ -567,8 +567,8 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   // All CmpBB instructions are moved into Head, and CmpBB is deleted.
   // Update the CFG first.
   updateTailPHIs();
-  Head->removeSuccessor(CmpBB);
-  CmpBB->removeSuccessor(Tail);
+  Head->removeSuccessor(CmpBB, true);
+  CmpBB->removeSuccessor(Tail, true);
   Head->transferSuccessorsAndUpdatePHIs(CmpBB);
   DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
   TII->RemoveBranch(*Head);
@@ -786,13 +786,13 @@ void AArch64ConditionalCompares::updateDomTree(
   // convert() removes CmpBB which was previously dominated by Head.
   // CmpBB children should be transferred to Head.
   MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
-  for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
-    MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+  for (MachineBasicBlock *RemovedMBB : Removed) {
+    MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
     assert(Node != HeadNode && "Cannot erase the head node");
     assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
     while (Node->getNumChildren())
       DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
-    DomTree->eraseNode(Removed[i]);
+    DomTree->eraseNode(RemovedMBB);
   }
 }
 
@@ -801,8 +801,8 @@ void
 AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
   if (!Loops)
     return;
-  for (unsigned i = 0, e = Removed.size(); i != e; ++i)
-    Loops->removeBlock(Removed[i]);
+  for (MachineBasicBlock *RemovedMBB : Removed)
+    Loops->removeBlock(RemovedMBB);
 }
 
 /// Invalidate MachineTraceMetrics before if-conversion.
@@ -899,7 +899,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
   Loops = getAnalysisIfAvailable<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
-  MinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+  MinSize = MF.getFunction()->optForMinSize();
 
   bool Changed = false;
   CmpConv.runOnMachineFunction(MF);
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 74fc167433f6..576cf4a74167 100644
--- a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -26,6 +26,12 @@ using namespace llvm;
 
 STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
 
+namespace llvm {
+void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry &);
+}
+
+#define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
+
 namespace {
 class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
 private:
@@ -35,11 +41,14 @@ private:
   bool usesFrameIndex(const MachineInstr &MI);
 public:
   static char ID; // Pass identification, replacement for typeid.
-  explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+  explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
+    initializeAArch64DeadRegisterDefinitionsPass(
+        *PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
-  const char *getPassName() const override { return "Dead register definitions"; }
+  const char *getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -49,6 +58,9 @@ public:
 char AArch64DeadRegisterDefinitions::ID = 0;
 } // end anonymous namespace
 
+INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
+                AARCH64_DEAD_REG_DEF_NAME, false, false)
+
 bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
     unsigned Reg, const MachineInstr &MI) {
   for (const MachineOperand &MO : MI.implicit_operands())
diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index c2470f747a38..d24e42a93763 100644
--- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -22,18 +22,26 @@
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
+namespace llvm {
+void initializeAArch64ExpandPseudoPass(PassRegistry &);
+}
+
+#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
+
 namespace {
 class AArch64ExpandPseudo : public MachineFunctionPass {
 public:
   static char ID;
-  AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
+  AArch64ExpandPseudo() : MachineFunctionPass(ID) {
+    initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
 
   const AArch64InstrInfo *TII;
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
   const char *getPassName() const override {
-    return "AArch64 pseudo instruction expansion pass";
+    return AARCH64_EXPAND_PSEUDO_NAME;
   }
 
 private:
@@ -45,6 +53,9 @@ private:
 char AArch64ExpandPseudo::ID = 0;
 }
 
+INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
+                AARCH64_EXPAND_PSEUDO_NAME, false, false)
+
 /// \brief Transfer implicit operands on the pseudo instruction to the
 /// instructions created from the expansion.
 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 072819836bb3..0ac4b39b0357 100644
--- a/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -523,7 +523,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     U = C;
   }
 
-  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
     if (Ty->getAddressSpace() > 255)
       // Fast instruction selection doesn't support the special
       // address spaces.
@@ -969,7 +969,7 @@ bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
 
   // Cannot encode an offset register and an immediate offset in the same
   // instruction. Fold the immediate offset into the load/store instruction and
-  // emit an additonal add to take care of the offset register.
+  // emit an additional add to take care of the offset register.
   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
     RegisterOffsetNeedsLowering = true;
 
@@ -1058,8 +1058,8 @@ void AArch64FastISel::addLoadStoreOperands(Address &Addr,
     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
     // and alignment should be based on the VT.
     MMO = FuncInfo.MF->getMachineMemOperand(
-      MachinePointerInfo::getFixedStack(FI, Offset), Flags,
-      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
+        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
     // Now add the rest of the operands.
     MIB.addFrameIndex(FI).addImm(Offset);
   } else {
@@ -1178,7 +1178,7 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
   }
 
   // Check if the mul can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
     if (isMulPowOf2(RHS)) {
       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1193,12 +1193,16 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
       if (!RHSReg)
         return 0;
       bool RHSIsKill = hasTrivialKill(MulLHS);
-      return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
-                           AArch64_AM::LSL, ShiftVal, SetFlags, WantResult);
+      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
+                                WantResult);
+      if (ResultReg)
+        return ResultReg;
     }
+  }
 
   // Check if the shift can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
@@ -1214,12 +1218,15 @@ unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
           if (!RHSReg)
             return 0;
           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
-          return emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
-                               RHSIsKill, ShiftType, ShiftVal, SetFlags,
-                               WantResult);
+          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                    RHSIsKill, ShiftType, ShiftVal, SetFlags,
+                                    WantResult);
+          if (ResultReg)
+            return ResultReg;
         }
       }
     }
+  }
 
   unsigned RHSReg = getRegForValue(RHS);
   if (!RHSReg)
@@ -1323,6 +1330,10 @@ unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
   if (RetVT != MVT::i32 && RetVT != MVT::i64)
     return 0;
 
+  // Don't deal with undefined shifts.
+  if (ShiftImm >= RetVT.getSizeInBits())
+    return 0;
+
   static const unsigned OpcTable[2][2][2] = {
     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
@@ -1360,6 +1371,9 @@ unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
   if (RetVT != MVT::i32 && RetVT != MVT::i64)
     return 0;
 
+  if (ShiftImm >= 4)
+    return 0;
+
   static const unsigned OpcTable[2][2][2] = {
     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
@@ -1542,7 +1556,7 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
     return ResultReg;
 
   // Check if the mul can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
     if (isMulPowOf2(RHS)) {
       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
@@ -1558,12 +1572,15 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
       if (!RHSReg)
         return 0;
       bool RHSIsKill = hasTrivialKill(MulLHS);
-      return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
-                              RHSIsKill, ShiftVal);
+      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                   RHSIsKill, ShiftVal);
+      if (ResultReg)
+        return ResultReg;
     }
+  }
 
   // Check if the shift can be folded into the instruction.
-  if (RHS->hasOneUse() && isValueAvailable(RHS))
+  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
         uint64_t ShiftVal = C->getZExtValue();
@@ -1571,9 +1588,12 @@ unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
         if (!RHSReg)
           return 0;
         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
-        return emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
-                                RHSIsKill, ShiftVal);
+        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
+                                     RHSIsKill, ShiftVal);
+        if (ResultReg)
+          return ResultReg;
       }
+  }
 
   unsigned RHSReg = getRegForValue(RHS);
   if (!RHSReg)
@@ -1646,6 +1666,11 @@ unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
     { AArch64::ORRWrs, AArch64::ORRXrs },
     { AArch64::EORWrs, AArch64::EORXrs }
   };
+
+  // Don't deal with undefined shifts.
+  if (ShiftImm >= RetVT.getSizeInBits())
+    return 0;
+
   const TargetRegisterClass *RC;
   unsigned Opc;
   switch (RetVT.SimpleTy) {
@@ -2235,14 +2260,7 @@ bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
     MIB.addImm(TestBit);
   MIB.addMBB(TBB);
 
-  // Obtain the branch weight and add the TrueBB to the successor list.
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                               TBB->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-  fastEmitBranch(FBB, DbgLoc);
-
+  finishCondBranch(BI->getParent(), TBB, FBB);
   return true;
 }
 
@@ -2257,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
 
-  AArch64CC::CondCode CC = AArch64CC::NE;
   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
     if (CI->hasOneUse() && isValueAvailable(CI)) {
       // Try to optimize or fold the cmp.
@@ -2289,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
 
       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
       // instruction.
-      CC = getCompareCC(Predicate);
+      AArch64CC::CondCode CC = getCompareCC(Predicate);
       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
       switch (Predicate) {
       default:
@@ -2317,52 +2334,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
           .addImm(CC)
           .addMBB(TBB);
 
-      // Obtain the branch weight and add the TrueBB to the successor list.
-      uint32_t BranchWeight = 0;
-      if (FuncInfo.BPI)
-        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                  TBB->getBasicBlock());
-      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-      fastEmitBranch(FBB, DbgLoc);
-      return true;
-    }
-  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
-    MVT SrcVT;
-    if (TI->hasOneUse() && isValueAvailable(TI) &&
-        isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) {
-      unsigned CondReg = getRegForValue(TI->getOperand(0));
-      if (!CondReg)
-        return false;
-      bool CondIsKill = hasTrivialKill(TI->getOperand(0));
-
-      // Issue an extract_subreg to get the lower 32-bits.
-      if (SrcVT == MVT::i64) {
-        CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill,
-                                             AArch64::sub_32);
-        CondIsKill = true;
-      }
-
-      unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1);
-      assert(ANDReg && "Unexpected AND instruction emission failure.");
-      emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0);
-
-      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
-        std::swap(TBB, FBB);
-        CC = AArch64CC::EQ;
-      }
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-          .addImm(CC)
-          .addMBB(TBB);
-
-      // Obtain the branch weight and add the TrueBB to the successor list.
-      uint32_t BranchWeight = 0;
-      if (FuncInfo.BPI)
-        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                  TBB->getBasicBlock());
-      FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-      fastEmitBranch(FBB, DbgLoc);
+      finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
@@ -2371,34 +2343,31 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
         .addMBB(Target);
 
-    // Obtain the branch weight and add the target to the successor list.
-    uint32_t BranchWeight = 0;
-    if (FuncInfo.BPI)
-      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                 Target->getBasicBlock());
-    FuncInfo.MBB->addSuccessor(Target, BranchWeight);
+    // Obtain the branch probability and add the target to the successor list.
+    if (FuncInfo.BPI) {
+      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
+          BI->getParent(), Target->getBasicBlock());
+      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
+    } else
+      FuncInfo.MBB->addSuccessorWithoutProb(Target);
     return true;
-  } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
-    // Fake request the condition, otherwise the intrinsic might be completely
-    // optimized away.
-    unsigned CondReg = getRegForValue(BI->getCondition());
-    if (!CondReg)
-      return false;
+  } else {
+    AArch64CC::CondCode CC = AArch64CC::NE;
+    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+      // Fake request the condition, otherwise the intrinsic might be completely
+      // optimized away.
+      unsigned CondReg = getRegForValue(BI->getCondition());
+      if (!CondReg)
+        return false;
 
-    // Emit the branch.
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-      .addImm(CC)
-      .addMBB(TBB);
+      // Emit the branch.
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+        .addImm(CC)
+        .addMBB(TBB);
 
-    // Obtain the branch weight and add the TrueBB to the successor list.
-    uint32_t BranchWeight = 0;
-    if (FuncInfo.BPI)
-      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                 TBB->getBasicBlock());
-    FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-    fastEmitBranch(FBB, DbgLoc);
-    return true;
+      finishCondBranch(BI->getParent(), TBB, FBB);
+      return true;
+    }
   }
 
   unsigned CondReg = getRegForValue(BI->getCondition());
@@ -2406,32 +2375,22 @@ bool AArch64FastISel::selectBranch(const Instruction *I) {
     return false;
   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
 
-  // We've been divorced from our compare!  Our block was split, and
-  // now our compare lives in a predecessor block.  We musn't
-  // re-compare here, as the children of the compare aren't guaranteed
-  // live across the block boundary (we *could* check for this).
-  // Regardless, the compare has been done in the predecessor block,
-  // and it left a value for us in a virtual register.  Ergo, we test
-  // the one-bit value left in the virtual register.
-  emitICmp_ri(MVT::i32, CondReg, CondRegIsKill, 0);
-
+  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
+  unsigned Opcode = AArch64::TBNZW;
   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
     std::swap(TBB, FBB);
-    CC = AArch64CC::EQ;
+    Opcode = AArch64::TBZW;
   }
 
-  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
-      .addImm(CC)
+  const MCInstrDesc &II = TII.get(Opcode);
+  unsigned ConstrainedCondReg
+    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+      .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
+      .addImm(0)
       .addMBB(TBB);
 
-  // Obtain the branch weight and add the TrueBB to the successor list.
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                               TBB->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(TBB, BranchWeight);
-
-  fastEmitBranch(FBB, DbgLoc);
+  finishCondBranch(BI->getParent(), TBB, FBB);
   return true;
 }
 
@@ -2447,8 +2406,8 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
 
   // Make sure the CFG is up-to-date.
-  for (unsigned i = 0, e = BI->getNumSuccessors(); i != e; ++i)
-    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[BI->getSuccessor(i)]);
+  for (auto *Succ : BI->successors())
+    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
 
   return true;
 }
@@ -2456,6 +2415,10 @@ bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
 bool AArch64FastISel::selectCmp(const Instruction *I) {
   const CmpInst *CI = cast<CmpInst>(I);
 
+  // Vectors of i1 are weird: bail out.
+  if (CI->getType()->isVectorTy())
+    return false;
+
   // Try to optimize or fold the cmp.
   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   unsigned ResultReg = 0;
@@ -2954,8 +2917,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
     .addImm(NumBytes);
 
   // Process the args.
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
+  for (CCValAssign &VA : ArgLocs) {
     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
     MVT ArgVT = OutVTs[VA.getValNo()];
 
@@ -3018,8 +2980,8 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
 
       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getStack(Addr.getOffset()),
-        MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
+          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
+          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
 
       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
         return false;
@@ -3318,8 +3280,8 @@ bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
     return false;
 
   // Make sure nothing is in the way
-  BasicBlock::const_iterator Start = I;
-  BasicBlock::const_iterator End = II;
+  BasicBlock::const_iterator Start(I);
+  BasicBlock::const_iterator End(II);
   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
     // We only expect extractvalue instructions between the intrinsic and the
     // instruction to be selected.
@@ -3684,6 +3646,9 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
   if (F.isVarArg())
     return false;
 
+  if (TLI.supportSplitCSR(FuncInfo.MF))
+    return false;
+
   // Build a list of return value registers.
   SmallVector<unsigned, 4> RetRegs;
 
@@ -3763,8 +3728,8 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
 
   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                                     TII.get(AArch64::RET_ReallyLR));
-  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
-    MIB.addReg(RetRegs[i], RegState::Implicit);
+  for (unsigned RetReg : RetRegs)
+    MIB.addReg(RetReg, RegState::Implicit);
   return true;
 }
 
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index a76473f7e539..11ae8005370d 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -72,9 +72,9 @@
 //
 // For most functions, some of the frame areas are empty. For those functions,
 // it may not be necessary to set up fp or bp:
-// * A base pointer is definitly needed when there are both VLAs and local
+// * A base pointer is definitely needed when there are both VLAs and local
 //   variables with more-than-default alignment requirements.
-// * A frame pointer is definitly needed when there are local variables with
+// * A frame pointer is definitely needed when there are local variables with
 //   more-than-default alignment requirements.
 //
 // In some cases when a base pointer is not strictly needed, it is generated
@@ -216,11 +216,11 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves(
   if (CSI.empty())
     return;
 
-  const DataLayout *TD = MF.getTarget().getDataLayout();
+  const DataLayout &TD = MF.getDataLayout();
   bool HasFP = hasFP(MF);
 
   // Calculate amount of bytes used for return address storing.
-  int stackGrowth = -TD->getPointerSize(0);
+  int stackGrowth = -TD.getPointerSize(0);
 
   // Calculate offsets.
   int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
@@ -280,14 +280,17 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   MachineBasicBlock::iterator MBBI = MBB.begin();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
-  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
   bool HasFP = hasFP(MF);
-  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc DL;
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
@@ -354,7 +357,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (NumBytes && NeedsRealignment) {
     // Use the first callee-saved register as a scratch register.
     scratchSPReg = AArch64::X9;
-    MF.getRegInfo().setPhysRegUsed(scratchSPReg);
   }
 
   // If we're a leaf function, try using the red zone.
@@ -400,8 +402,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (needsFrameMoves) {
-    const DataLayout *TD = MF.getTarget().getDataLayout();
-    const int StackGrowth = -TD->getPointerSize(0);
+    const DataLayout &TD = MF.getDataLayout();
+    const int StackGrowth = -TD.getPointerSize(0);
     unsigned FramePtr = RegInfo->getFrameRegister(MF);
     // An example of the prologue:
     //
@@ -513,33 +515,33 @@ static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
   return false;
 }
 
-static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+/// Checks whether the given instruction restores callee save registers
+/// and if so returns how many.
+static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) {
   unsigned RtIdx = 0;
-  if (MI->getOpcode() == AArch64::LDPXpost ||
-      MI->getOpcode() == AArch64::LDPDpost)
+  switch (MI.getOpcode()) {
+  case AArch64::LDPXpost:
+  case AArch64::LDPDpost:
     RtIdx = 1;
-
-  if (MI->getOpcode() == AArch64::LDPXpost ||
-      MI->getOpcode() == AArch64::LDPDpost ||
-      MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
-    if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
-        !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
-        MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
-      return false;
-    return true;
+    // FALLTHROUGH
+  case AArch64::LDPXi:
+  case AArch64::LDPDi:
+    if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) ||
+        !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) ||
+        MI.getOperand(RtIdx + 2).getReg() != AArch64::SP)
+      return 0;
+    return 2;
   }
-
-  return false;
+  return 0;
 }
 
 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  const AArch64InstrInfo *TII =
-      static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
-      MF.getSubtarget().getRegisterInfo());
+  const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+  const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL;
   bool IsTailCallReturn = false;
   if (MBB.end() != MBBI) {
@@ -585,7 +587,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   //      ---------------------|        ---           |
   //      |                    |         |            |
   //      |   CalleeSavedReg   |         |            |
-  //      | (NumRestores * 16) |         |            |
+  //      | (NumRestores * 8)  |         |            |
   //      |                    |         |            |
   //      ---------------------|         |         NumBytes
   //      |                    |     StackSize  (StackAdjustUp)
@@ -606,17 +608,17 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // Move past the restores of the callee-saved registers.
   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
-  if (LastPopI != MBB.begin()) {
-    do {
-      ++NumRestores;
-      --LastPopI;
-    } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
-    if (!isCSRestore(LastPopI, CSRegs)) {
+  MachineBasicBlock::iterator Begin = MBB.begin();
+  while (LastPopI != Begin) {
+    --LastPopI;
+    unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
+    NumRestores += Restores;
+    if (Restores == 0) {
       ++LastPopI;
-      --NumRestores;
+      break;
     }
   }
-  NumBytes -= NumRestores * 16;
+  NumBytes -= NumRestores * 8;
   assert(NumBytes >= 0 && "Negative stack allocation size!?");
 
   if (!hasFP(MF)) {
@@ -634,15 +636,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // be able to save any instructions.
   if (NumBytes || MFI->hasVarSizedObjects())
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-                    -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                              int FI) const {
-  unsigned FrameReg;
-  return getFrameIndexReference(MF, FI, FrameReg);
+                    -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
 }
 
 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
@@ -739,9 +733,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
   DebugLoc DL;
   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
 
-  if (MI != MBB.end())
-    DL = MI->getDebugLoc();
-
   for (unsigned i = 0; i < Count; i += 2) {
     unsigned idx = Count - i - 2;
     unsigned Reg1 = CSI[idx].getReg();
@@ -911,7 +902,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   unsigned NumFPRSpilled = 0;
   bool ExtraCSSpill = false;
   bool CanEliminateFrame = true;
-  DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
+  DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
 
   // Check pairs of consecutive callee-saved registers.
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 731f031ff855..427afdf4acbf 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -37,7 +37,6 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
   int resolveFrameIndexReference(const MachineFunction &MF, int FI,
@@ -61,6 +60,11 @@ public:
 
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
+
+  /// Returns true if the target will correctly handle shrink wrapping.
+  bool enableShrinkWrapping(const MachineFunction &MF) const override {
+    return true;
+  }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 772e894f4f0a..6c868880bcac 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -34,7 +34,6 @@ using namespace llvm;
 namespace {
 
 class AArch64DAGToDAGISel : public SelectionDAGISel {
-  AArch64TargetMachine &TM;
 
   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
@@ -45,7 +44,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
 public:
   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
                                CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
         ForCodeSize(false) {}
 
   const char *getPassName() const override {
@@ -53,9 +52,7 @@ public:
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
-    ForCodeSize =
-        MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
-        MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+    ForCodeSize = MF.getFunction()->optForSize();
     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
     return SelectionDAGISel::runOnMachineFunction(MF);
   }
@@ -79,6 +76,21 @@ public:
   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     return SelectShiftedRegister(N, true, Reg, Shift);
   }
+  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
+  }
+  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
+    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
+  }
   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
     return SelectAddrModeIndexed(N, 1, Base, OffImm);
   }
@@ -153,8 +165,7 @@ public:
 
   SDNode *SelectBitfieldExtractOp(SDNode *N);
   SDNode *SelectBitfieldInsertOp(SDNode *N);
-
-  SDNode *SelectLIBM(SDNode *N);
+  SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
 
   SDNode *SelectReadRegister(SDNode *N);
   SDNode *SelectWriteRegister(SDNode *N);
@@ -165,6 +176,8 @@ public:
 private:
   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
                              SDValue &Shift);
+  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
+                               SDValue &OffImm);
   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
                              SDValue &OffImm);
   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
@@ -422,7 +435,7 @@ static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
   return true;
 }
 
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
 // high lane extract.
 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
                              SDValue &LaneOp, int &LaneIdx) {
@@ -572,7 +585,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
   }
 
   // AArch64 mandates that the RHS of the operation must use the smallest
-  // register classs that could contain the size being extended from.  Thus,
+  // register class that could contain the size being extended from.  Thus,
   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
   // there might not be an actual 32-bit value in the program.  We can
   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
@@ -587,7 +600,7 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
 /// need to create a real ADD instruction from it anyway and there's no point in
 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
-/// leads to duplaicated ADRP instructions.
+/// leads to duplicated ADRP instructions.
 static bool isWorthFoldingADDlow(SDValue N) {
   for (auto Use : N->uses()) {
     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
@@ -604,6 +617,51 @@ static bool isWorthFoldingADDlow(SDValue N) {
   return true;
 }
 
+/// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
+/// immediate" address.  The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
+                                                  SDValue &Base,
+                                                  SDValue &OffImm) {
+  SDLoc dl(N);
+  const DataLayout &DL = CurDAG->getDataLayout();
+  const TargetLowering *TLI = getTargetLowering();
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+    return true;
+  }
+
+  // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
+  // selected here doesn't support labels/immediates, only base+offset.
+
+  if (CurDAG->isBaseWithConstantOffset(N)) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      int64_t RHSC = RHS->getSExtValue();
+      unsigned Scale = Log2_32(Size);
+      if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
+          RHSC < (0x40 << Scale)) {
+        Base = N.getOperand(0);
+        if (Base.getOpcode() == ISD::FrameIndex) {
+          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
+        }
+        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
+        return true;
+      }
+    }
+  }
+
+  // Base only. The address will be materialized into a register before
+  // the memory is accessed.
+  //    add x0, Xbase, #offset
+  //    stp x1, x2, [x0]
+  Base = N;
+  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
+  return true;
+}
+
 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
 /// immediate" address.  The "Size" argument is the size in bytes of the memory
 /// reference, which determines the scale.
@@ -867,7 +925,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   if (isa<ConstantSDNode>(RHS)) {
     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
     unsigned Scale = Log2_32(Size);
-    // Skip the immediate can be seleced by load/store addressing mode.
+    // Skip the immediate can be selected by load/store addressing mode.
     // Also skip the immediate can be encoded by a single ADD (SUB is also
     // checked by using -ImmOff).
     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
@@ -1034,6 +1092,8 @@ SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
       // it into an i64.
       DstVT = MVT::i32;
     }
+  } else if (VT == MVT::f16) {
+    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
   } else if (VT == MVT::f32) {
     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
   } else if (VT == MVT::f64 || VT.is64BitVector()) {
@@ -1222,8 +1282,8 @@ SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   SDValue SuperReg = SDValue(Ld, 0);
 
   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-  static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
-                              AArch64::qsub3 };
+  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+                                    AArch64::qsub2, AArch64::qsub3 };
   for (unsigned i = 0; i < NumVecs; ++i) {
     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
     if (Narrow)
@@ -1275,8 +1335,8 @@ SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
   } else {
     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
-    static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
-                                AArch64::qsub3 };
+    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
+                                      AArch64::qsub2, AArch64::qsub3 };
     for (unsigned i = 0; i < NumVecs; ++i) {
       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
                                                   SuperReg);
@@ -1420,7 +1480,7 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
     // The resulting code will be at least as good as the original one
     // plus it may expose more opportunities for bitfield insert pattern.
     // FIXME: Currently we limit this to the bigger pattern, because
-    // some optimizations expect AND and not UBFM
+    // some optimizations expect AND and not UBFM.
     Opd0 = N->getOperand(0);
   } else
     return false;
@@ -1852,6 +1912,7 @@ static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
 /// Does this tree qualify as an attempt to move a bitfield into position,
 /// essentially "(and (shl VAL, N), Mask)".
 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+                                    bool BiggerPattern,
                                     SDValue &Src, int &ShiftAmount,
                                     int &MaskWidth) {
   EVT VT = Op.getValueType();
@@ -1874,6 +1935,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
     Op = Op.getOperand(0);
   }
 
+  // Don't match if the SHL has more than one use, since then we'll end up
+  // generating SHL+UBFIZ instead of just keeping SHL+AND.
+  if (!BiggerPattern && !Op.hasOneUse())
+    return false;
+
   uint64_t ShlImm;
   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
     return false;
@@ -1887,7 +1953,11 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
 
   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
-  // amount.
+  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
+  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
+  // which case it is not profitable to insert an extra shift.
+  if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
+    return false;
   Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
 
   return true;
@@ -1904,7 +1974,8 @@ static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
                                      SDValue &Src, unsigned &ImmR,
-                                     unsigned &ImmS, SelectionDAG *CurDAG) {
+                                     unsigned &ImmS, const APInt &UsefulBits,
+                                     SelectionDAG *CurDAG) {
   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
 
   // Set Opc
@@ -1918,23 +1989,30 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
 
   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
   // have the expected shape. Try to undo that.
-  APInt UsefulBits;
-  getUsefulBits(SDValue(N, 0), UsefulBits);
 
   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
 
-  // OR is commutative, check both possibilities (does llvm provide a
-  // way to do that directely, e.g., via code matcher?)
-  SDValue OrOpd1Val = N->getOperand(1);
-  SDNode *OrOpd0 = N->getOperand(0).getNode();
-  SDNode *OrOpd1 = N->getOperand(1).getNode();
-  for (int i = 0; i < 2;
-       ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+  // OR is commutative, check all combinations of operand order and values of
+  // BiggerPattern, i.e.
+  //     Opd0, Opd1, BiggerPattern=false
+  //     Opd1, Opd0, BiggerPattern=false
+  //     Opd0, Opd1, BiggerPattern=true
+  //     Opd1, Opd0, BiggerPattern=true
+  // Several of these combinations may match, so check with BiggerPattern=false
+  // first since that will produce better results by matching more instructions
+  // and/or inserting fewer extra instructions.
+  for (int I = 0; I < 4; ++I) {
+
+    bool BiggerPattern = I / 2;
+    SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
+    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
+    SDNode *OrOpd1 = OrOpd1Val.getNode();
+
     unsigned BFXOpc;
     int DstLSB, Width;
     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
-                            NumberOfIgnoredLowBits, true)) {
+                            NumberOfIgnoredLowBits, BiggerPattern)) {
       // Check that the returned opcode is compatible with the pattern,
       // i.e., same type and zero extended (U and not S)
       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
@@ -1952,8 +2030,9 @@ static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
 
       // If the mask on the insertee is correct, we have a BFXIL operation. We
       // can share the ImmR and ImmS values from the already-computed UBFM.
-    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
-                                       DstLSB, Width)) {
+    } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
+                                       BiggerPattern,
+                                       Src, DstLSB, Width)) {
       ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
       ImmS = Width - 1;
     } else
@@ -2003,11 +2082,18 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   unsigned Opc;
   unsigned LSB, MSB;
   SDValue Opd0, Opd1;
+  EVT VT = N->getValueType(0);
+  APInt NUsefulBits;
+  getUsefulBits(SDValue(N, 0), NUsefulBits);
 
-  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+  // If all bits are not useful, just return UNDEF.
+  if (!NUsefulBits)
+    return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
+
+  if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
+                                CurDAG))
     return nullptr;
 
-  EVT VT = N->getValueType(0);
   SDLoc dl(N);
   SDValue Ops[] = { Opd0,
                     Opd1,
@@ -2016,58 +2102,37 @@ SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
-SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
+/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
+/// equivalent of a left shift by a constant amount followed by an and masking
+/// out a contiguous set of bits.
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
+  if (N->getOpcode() != ISD::AND)
+    return nullptr;
+
   EVT VT = N->getValueType(0);
-  unsigned Variant;
   unsigned Opc;
-  unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
+  if (VT == MVT::i32)
+    Opc = AArch64::UBFMWri;
+  else if (VT == MVT::i64)
+    Opc = AArch64::UBFMXri;
+  else
+    return nullptr;
 
-  if (VT == MVT::f32) {
-    Variant = 0;
-  } else if (VT == MVT::f64) {
-    Variant = 1;
-  } else
-    return nullptr; // Unrecognized argument type. Fall back on default codegen.
+  SDValue Op0;
+  int DstLSB, Width;
+  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
+                               Op0, DstLSB, Width))
+    return nullptr;
 
-  // Pick the FRINTX variant needed to set the flags.
-  unsigned FRINTXOpc = FRINTXOpcs[Variant];
+  // ImmR is the rotate right amount.
+  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+  // ImmS is the most significant bit of the source to be moved.
+  unsigned ImmS = Width - 1;
 
-  switch (N->getOpcode()) {
-  default:
-    return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
-  case ISD::FCEIL: {
-    unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
-    Opc = FRINTPOpcs[Variant];
-    break;
-  }
-  case ISD::FFLOOR: {
-    unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
-    Opc = FRINTMOpcs[Variant];
-    break;
-  }
-  case ISD::FTRUNC: {
-    unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
-    Opc = FRINTZOpcs[Variant];
-    break;
-  }
-  case ISD::FROUND: {
-    unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
-    Opc = FRINTAOpcs[Variant];
-    break;
-  }
-  }
-
-  SDLoc dl(N);
-  SDValue In = N->getOperand(0);
-  SmallVector<SDValue, 2> Ops;
-  Ops.push_back(In);
-
-  if (!TM.Options.UnsafeFPMath) {
-    SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
-    Ops.push_back(SDValue(FRINTX, 1));
-  }
-
-  return CurDAG->getMachineNode(Opc, dl, VT, Ops);
+  SDLoc DL(N);
+  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
+                   CurDAG->getTargetConstant(ImmS, DL, VT)};
+  return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
 }
 
 bool
@@ -2119,7 +2184,7 @@ AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
 // into a single value to be used in the MRS/MSR instruction.
 static int getIntOperandFromRegisterString(StringRef RegString) {
   SmallVector<StringRef, 5> Fields;
-  RegString.split(Fields, ":");
+  RegString.split(Fields, ':');
 
   if (Fields.size() == 1)
     return -1;
@@ -2206,7 +2271,15 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
     assert (isa<ConstantSDNode>(N->getOperand(2))
               && "Expected a constant integer expression.");
     uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
-    return CurDAG->getMachineNode(AArch64::MSRpstate, DL, MVT::Other,
+    unsigned State;
+    if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
+      assert(Immed < 2 && "Bad imm");
+      State = AArch64::MSRpstateImm1;
+    } else {
+      assert(Immed < 16 && "Bad imm");
+      State = AArch64::MSRpstateImm4;
+    }
+    return CurDAG->getMachineNode(State, DL, MVT::Other,
                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
                                   CurDAG->getTargetConstant(Immed, DL, MVT::i16),
                                   N->getOperand(0));
@@ -2279,6 +2352,8 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   case ISD::SRA:
     if (SDNode *I = SelectBitfieldExtractOp(Node))
       return I;
+    if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
+      return I;
     break;
 
   case ISD::OR:
@@ -2802,6 +2877,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
       break;
     }
     }
+    break;
   }
   case AArch64ISD::LD2post: {
     if (VT == MVT::v8i8)
@@ -3214,14 +3290,6 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
       return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
     break;
   }
-
-  case ISD::FCEIL:
-  case ISD::FFLOOR:
-  case ISD::FTRUNC:
-  case ISD::FROUND:
-    if (SDNode *I = SelectLIBM(Node))
-      return I;
-    break;
   }
 
   // Select the default instruction
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3e8f46cf1ecd..9f5beff12100 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -40,23 +40,6 @@ using namespace llvm;
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
 
-namespace {
-enum AlignMode {
-  StrictAlign,
-  NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
-      cl::Hidden, cl::init(NoStrictAlign),
-      cl::values(
-          clEnumValN(StrictAlign,   "aarch64-strict-align",
-                     "Disallow all unaligned memory accesses"),
-          clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
-                     "Allow unaligned memory accesses"),
-          clEnumValEnd));
-
 // Place holder until extr generation is tested fully.
 static cl::opt<bool>
 EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
@@ -76,6 +59,9 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
     cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
     cl::init(false));
 
+/// Value type used for condition codes.
+static const MVT MVT_CC = MVT::i32;
+
 AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                                              const AArch64Subtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -210,11 +196,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
 
-  // Exception handling.
-  // FIXME: These are guesses. Has this been defined yet?
-  setExceptionPointerRegister(AArch64::X0);
-  setExceptionSelectorRegister(AArch64::X1);
-
   // Constant pool entries
   setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
 
@@ -234,6 +215,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   // AArch64 lacks both left-rotate and popcount instructions.
   setOperationAction(ISD::ROTL, MVT::i32, Expand);
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
+  for (MVT VT : MVT::vector_valuetypes()) {
+    setOperationAction(ISD::ROTL, VT, Expand);
+    setOperationAction(ISD::ROTR, VT, Expand);
+  }
 
   // AArch64 doesn't have {U|S}MUL_LOHI.
   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -252,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+  for (MVT VT : MVT::vector_valuetypes()) {
+    setOperationAction(ISD::SDIVREM, VT, Expand);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
+  }
   setOperationAction(ISD::SREM, MVT::i32, Expand);
   setOperationAction(ISD::SREM, MVT::i64, Expand);
   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
@@ -315,6 +304,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FTRUNC,      MVT::f16,  Promote);
   setOperationAction(ISD::FMINNUM,     MVT::f16,  Promote);
   setOperationAction(ISD::FMAXNUM,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMINNAN,     MVT::f16,  Promote);
+  setOperationAction(ISD::FMAXNAN,     MVT::f16,  Promote);
 
   // v4f16 is also a storage-only type, so promote it to v4f32 when that is
   // known to be safe.
@@ -403,10 +394,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FRINT, Ty, Legal);
     setOperationAction(ISD::FTRUNC, Ty, Legal);
     setOperationAction(ISD::FROUND, Ty, Legal);
+    setOperationAction(ISD::FMINNUM, Ty, Legal);
+    setOperationAction(ISD::FMAXNUM, Ty, Legal);
+    setOperationAction(ISD::FMINNAN, Ty, Legal);
+    setOperationAction(ISD::FMAXNAN, Ty, Legal);
   }
 
   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
 
+  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
+  // This requires the Performance Monitors extension.
+  if (Subtarget->hasPerfMon())
+    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+
   if (Subtarget->isTargetMachO()) {
     // For iOS, we don't want to the normal expansion of a libcall to
     // sincos. We want to issue a libcall to __sincos_stret to avoid memory
@@ -456,12 +456,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setIndexedLoadAction(im, MVT::i64, Legal);
     setIndexedLoadAction(im, MVT::f64, Legal);
     setIndexedLoadAction(im, MVT::f32, Legal);
+    setIndexedLoadAction(im, MVT::f16, Legal);
     setIndexedStoreAction(im, MVT::i8, Legal);
     setIndexedStoreAction(im, MVT::i16, Legal);
     setIndexedStoreAction(im, MVT::i32, Legal);
     setIndexedStoreAction(im, MVT::i64, Legal);
     setIndexedStoreAction(im, MVT::f64, Legal);
     setIndexedStoreAction(im, MVT::f32, Legal);
+    setIndexedStoreAction(im, MVT::f16, Legal);
   }
 
   // Trap.
@@ -479,6 +481,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
 
+  setTargetDAGCombine(ISD::FP_TO_SINT);
+  setTargetDAGCombine(ISD::FP_TO_UINT);
+  setTargetDAGCombine(ISD::FDIV);
+
   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
 
   setTargetDAGCombine(ISD::ANY_EXTEND);
@@ -487,16 +493,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::BITCAST);
   setTargetDAGCombine(ISD::CONCAT_VECTORS);
   setTargetDAGCombine(ISD::STORE);
+  if (Subtarget->supportsAddressTopByteIgnored())
+    setTargetDAGCombine(ISD::LOAD);
 
   setTargetDAGCombine(ISD::MUL);
 
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::VSELECT);
-  setTargetDAGCombine(ISD::SELECT_CC);
 
   setTargetDAGCombine(ISD::INTRINSIC_VOID);
   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
 
   MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
@@ -512,10 +520,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
   setMinFunctionAlignment(2);
 
-  RequireStrictAlign = (Align == StrictAlign);
-
   setHasExtractBitsInsn(true);
 
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
   if (Subtarget->hasNEON()) {
     // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
     // silliness like this:
@@ -646,6 +654,9 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
     setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
     setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
     setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+
+    // But we do support custom-lowering for FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, VT.getSimpleVT(), Custom);
   }
 
   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
@@ -686,6 +697,12 @@ void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
     for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
       setOperationAction(Opcode, VT.getSimpleVT(), Legal);
 
+  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types (not f16 though!).
+  if (VT.isFloatingPoint() && VT.getVectorElementType() != MVT::f16)
+    for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
+                            ISD::FMINNUM, ISD::FMAXNUM})
+      setOperationAction(Opcode, VT.getSimpleVT(), Legal);
+
   if (Subtarget->isLittleEndian()) {
     for (unsigned im = (unsigned)ISD::PRE_INC;
          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
@@ -730,7 +747,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     break;
   }
   case ISD::INTRINSIC_W_CHAIN: {
-   ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+    ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
     Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
     switch (IntID) {
     default: return;
@@ -780,6 +797,34 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
   return MVT::i64;
 }
 
+bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
+                                                           unsigned AddrSpace,
+                                                           unsigned Align,
+                                                           bool *Fast) const {
+  if (Subtarget->requiresStrictAlign())
+    return false;
+
+  // FIXME: This is mostly true for Cyclone, but not necessarily others.
+  if (Fast) {
+    // FIXME: Define an attribute for slow unaligned accesses instead of
+    // relying on the CPU type as a proxy.
+    // On Cyclone, unaligned 128-bit stores are slow.
+    *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+            // See comments in performSTORECombine() for more details about
+            // these conditions.
+
+            // Code that uses clang vector extensions can mark that it
+            // wants unaligned accesses to be treated as fast by
+            // underspecifying alignment to be 1 or 2.
+            Align <= 2 ||
+
+            // Disregard v2i64. Memcpy lowering produces those and splitting
+            // them regresses performance on micro-benchmarks and olden/bh.
+            VT == MVT::v2i64;
+  }
+  return true;
+}
+
 FastISel *
 AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                                       const TargetLibraryInfo *libInfo) const {
@@ -809,9 +854,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::ADCS:              return "AArch64ISD::ADCS";
   case AArch64ISD::SBCS:              return "AArch64ISD::SBCS";
   case AArch64ISD::ANDS:              return "AArch64ISD::ANDS";
+  case AArch64ISD::CCMP:              return "AArch64ISD::CCMP";
+  case AArch64ISD::CCMN:              return "AArch64ISD::CCMN";
+  case AArch64ISD::FCCMP:             return "AArch64ISD::FCCMP";
   case AArch64ISD::FCMP:              return "AArch64ISD::FCMP";
-  case AArch64ISD::FMIN:              return "AArch64ISD::FMIN";
-  case AArch64ISD::FMAX:              return "AArch64ISD::FMAX";
   case AArch64ISD::DUP:               return "AArch64ISD::DUP";
   case AArch64ISD::DUPLANE8:          return "AArch64ISD::DUPLANE8";
   case AArch64ISD::DUPLANE16:         return "AArch64ISD::DUPLANE16";
@@ -931,8 +977,7 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
   DebugLoc DL = MI->getDebugLoc();
-  MachineFunction::iterator It = MBB;
-  ++It;
+  MachineFunction::iterator It = ++MBB->getIterator();
 
   unsigned DestReg = MI->getOperand(0).getReg();
   unsigned IfTrueReg = MI->getOperand(1).getReg();
@@ -1141,8 +1186,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   // register to WZR/XZR if it ends up being unused.
   unsigned Opcode = AArch64ISD::SUBS;
 
-  if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
-      cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
+  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
     // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
     // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
@@ -1156,8 +1200,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     // the absence of information about op2.
     Opcode = AArch64ISD::ADDS;
     RHS = RHS.getOperand(1);
-  } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
-             cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+  } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
              !isUnsignedIntSetCC(CC)) {
     // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
     // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
@@ -1167,14 +1210,230 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     LHS = LHS.getOperand(0);
   }
 
-  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
       .getValue(1);
 }
 
+/// \defgroup AArch64CCMP CMP;CCMP matching
+///
+/// These functions deal with the formation of CMP;CCMP;... sequences.
+/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
+/// a comparison. They set the NZCV flags to a predefined value if their
+/// predicate is false. This allows to express arbitrary conjunctions, for
+/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
+/// expressed as:
+///   cmp A
+///   ccmp B, inv(CB), CA
+///   check for CB flags
+///
+/// In general we can create code for arbitrary "... (and (and A B) C)"
+/// sequences. We can also implement some "or" expressions, because "(or A B)"
+/// is equivalent to "not (and (not A) (not B))" and we can implement some
+/// negation operations:
+/// We can negate the results of a single comparison by inverting the flags
+/// used when the predicate fails and inverting the flags tested in the next
+/// instruction; We can also negate the results of the whole previous
+/// conditional compare sequence by inverting the flags tested in the next
+/// instruction. However there is no way to negate the result of a partial
+/// sequence.
+///
+/// Therefore on encountering an "or" expression we can negate the subtree on
+/// one side and have to be able to push the negate to the leafs of the subtree
+/// on the other side (see also the comments in code). As complete example:
+/// "or (or (setCA (cmp A)) (setCB (cmp B)))
+///     (and (setCC (cmp C)) (setCD (cmp D)))"
+/// is transformed to
+/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
+///           (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
+/// and implemented as:
+///   cmp C
+///   ccmp D, inv(CD), CC
+///   ccmp A, CA, inv(CD)
+///   ccmp B, CB, inv(CA)
+///   check for CB flags
+/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
+/// by conditional compare sequences.
+/// @{
+
+/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
+static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
+                                         ISD::CondCode CC, SDValue CCOp,
+                                         SDValue Condition, unsigned NZCV,
+                                         SDLoc DL, SelectionDAG &DAG) {
+  unsigned Opcode = 0;
+  if (LHS.getValueType().isFloatingPoint())
+    Opcode = AArch64ISD::FCCMP;
+  else if (RHS.getOpcode() == ISD::SUB) {
+    SDValue SubOp0 = RHS.getOperand(0);
+    if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+        // See emitComparison() on why we can only do this for SETEQ and SETNE.
+        Opcode = AArch64ISD::CCMN;
+        RHS = RHS.getOperand(1);
+      }
+  }
+  if (Opcode == 0)
+    Opcode = AArch64ISD::CCMP;
+
+  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
+}
+
+/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
+/// CanPushNegate is set to true if we can push a negate operation through
+/// the tree in a was that we are left with AND operations and negate operations
+/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
+/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
+/// brought into such a form.
+static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanPushNegate,
+                                         unsigned Depth = 0) {
+  if (!Val.hasOneUse())
+    return false;
+  unsigned Opcode = Val->getOpcode();
+  if (Opcode == ISD::SETCC) {
+    CanPushNegate = true;
+    return true;
+  }
+  // Protect against stack overflow.
+  if (Depth > 15)
+    return false;
+  if (Opcode == ISD::AND || Opcode == ISD::OR) {
+    SDValue O0 = Val->getOperand(0);
+    SDValue O1 = Val->getOperand(1);
+    bool CanPushNegateL;
+    if (!isConjunctionDisjunctionTree(O0, CanPushNegateL, Depth+1))
+      return false;
+    bool CanPushNegateR;
+    if (!isConjunctionDisjunctionTree(O1, CanPushNegateR, Depth+1))
+      return false;
+    // We cannot push a negate through an AND operation (it would become an OR),
+    // we can however change a (not (or x y)) to (and (not x) (not y)) if we can
+    // push the negate through the x/y subtrees.
+    CanPushNegate = (Opcode == ISD::OR) && CanPushNegateL && CanPushNegateR;
+    return true;
+  }
+  return false;
+}
+
+/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
+/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
+/// Tries to transform the given i1 producing node @p Val to a series compare
+/// and conditional compare operations. @returns an NZCV flags producing node
+/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
+/// transformation was not possible.
+/// On recursive invocations @p PushNegate may be set to true to have negation
+/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
+/// for the comparisons in the current subtree; @p Depth limits the search
+/// depth to avoid stack overflow.
+static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
+    AArch64CC::CondCode &OutCC, bool PushNegate = false,
+    SDValue CCOp = SDValue(), AArch64CC::CondCode Predicate = AArch64CC::AL,
+    unsigned Depth = 0) {
+  // We're at a tree leaf, produce a conditional comparison operation.
+  unsigned Opcode = Val->getOpcode();
+  if (Opcode == ISD::SETCC) {
+    SDValue LHS = Val->getOperand(0);
+    SDValue RHS = Val->getOperand(1);
+    ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
+    bool isInteger = LHS.getValueType().isInteger();
+    if (PushNegate)
+      CC = getSetCCInverse(CC, isInteger);
+    SDLoc DL(Val);
+    // Determine OutCC and handle FP special case.
+    if (isInteger) {
+      OutCC = changeIntCCToAArch64CC(CC);
+    } else {
+      assert(LHS.getValueType().isFloatingPoint());
+      AArch64CC::CondCode ExtraCC;
+      changeFPCCToAArch64CC(CC, OutCC, ExtraCC);
+      // Surpisingly some floating point conditions can't be tested with a
+      // single condition code. Construct an additional comparison in this case.
+      // See comment below on how we deal with OR conditions.
+      if (ExtraCC != AArch64CC::AL) {
+        SDValue ExtraCmp;
+        if (!CCOp.getNode())
+          ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
+        else {
+          SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+          // Note that we want the inverse of ExtraCC, so NZCV is not inversed.
+          unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(ExtraCC);
+          ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp,
+                                               NZCV, DL, DAG);
+        }
+        CCOp = ExtraCmp;
+        Predicate = AArch64CC::getInvertedCondCode(ExtraCC);
+        OutCC = AArch64CC::getInvertedCondCode(OutCC);
+      }
+    }
+
+    // Produce a normal comparison if we are first in the chain
+    if (!CCOp.getNode())
+      return emitComparison(LHS, RHS, CC, DL, DAG);
+    // Otherwise produce a ccmp.
+    SDValue ConditionOp = DAG.getConstant(Predicate, DL, MVT_CC);
+    AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
+    unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
+    return emitConditionalComparison(LHS, RHS, CC, CCOp, ConditionOp, NZCV, DL,
+                                     DAG);
+  } else if ((Opcode != ISD::AND && Opcode != ISD::OR) || !Val->hasOneUse())
+    return SDValue();
+
+  assert((Opcode == ISD::OR || !PushNegate)
+         && "Can only push negate through OR operation");
+
+  // Check if both sides can be transformed.
+  SDValue LHS = Val->getOperand(0);
+  SDValue RHS = Val->getOperand(1);
+  bool CanPushNegateL;
+  if (!isConjunctionDisjunctionTree(LHS, CanPushNegateL, Depth+1))
+    return SDValue();
+  bool CanPushNegateR;
+  if (!isConjunctionDisjunctionTree(RHS, CanPushNegateR, Depth+1))
+    return SDValue();
+
+  // Do we need to negate our operands?
+  bool NegateOperands = Opcode == ISD::OR;
+  // We can negate the results of all previous operations by inverting the
+  // predicate flags giving us a free negation for one side. For the other side
+  // we need to be able to push the negation to the leafs of the tree.
+  if (NegateOperands) {
+    if (!CanPushNegateL && !CanPushNegateR)
+      return SDValue();
+    // Order the side where we can push the negate through to LHS.
+    if (!CanPushNegateL && CanPushNegateR)
+      std::swap(LHS, RHS);
+  } else {
+    bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
+    bool NeedsNegOutR = RHS->getOpcode() == ISD::OR;
+    if (NeedsNegOutL && NeedsNegOutR)
+      return SDValue();
+    // Order the side where we need to negate the output flags to RHS so it
+    // gets emitted first.
+    if (NeedsNegOutL)
+      std::swap(LHS, RHS);
+  }
+
+  // Emit RHS. If we want to negate the tree we only need to push a negate
+  // through if we are already in a PushNegate case, otherwise we can negate
+  // the "flags to test" afterwards.
+  AArch64CC::CondCode RHSCC;
+  SDValue CmpR = emitConjunctionDisjunctionTree(DAG, RHS, RHSCC, PushNegate,
+                                                CCOp, Predicate, Depth+1);
+  if (NegateOperands && !PushNegate)
+    RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
+  // Emit LHS. We must push the negate through if we need to negate it.
+  SDValue CmpL = emitConjunctionDisjunctionTree(DAG, LHS, OutCC, NegateOperands,
+                                                CmpR, RHSCC, Depth+1);
+  // If we transformed an OR to and AND then we have to negate the result
+  // (or absorb a PushNegate resulting in a double negation).
+  if (Opcode == ISD::OR && !PushNegate)
+    OutCC = AArch64CC::getInvertedCondCode(OutCC);
+  return CmpL;
+}
+
+/// @}
+
 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
-  SDValue Cmp;
-  AArch64CC::CondCode AArch64CC;
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     EVT VT = RHS.getValueType();
     uint64_t C = RHSC->getZExtValue();
@@ -1229,47 +1488,56 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       }
     }
   }
-  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
-  // For the i8 operand, the largest immediate is 255, so this can be easily
-  // encoded in the compare instruction. For the i16 operand, however, the
-  // largest immediate cannot be encoded in the compare.
-  // Therefore, use a sign extending load and cmn to avoid materializing the -1
-  // constant. For example,
-  // movz w1, #65535
-  // ldrh w0, [x0, #0]
-  // cmp w0, w1
-  // >
-  // ldrsh w0, [x0, #0]
-  // cmn w0, #1
-  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
-  // if and only if (sext LHS) == (sext RHS). The checks are in place to ensure
-  // both the LHS and RHS are truely zero extended and to make sure the
-  // transformation is profitable.
+  SDValue Cmp;
+  AArch64CC::CondCode AArch64CC;
   if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
-    if ((cast<ConstantSDNode>(RHS)->getZExtValue() >> 16 == 0) &&
-        isa<LoadSDNode>(LHS)) {
-      if (cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
-          cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
-          LHS.getNode()->hasNUsesOfValue(1, 0)) {
-        int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
-        if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
-          SDValue SExt =
-              DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
-                          DAG.getValueType(MVT::i16));
-          Cmp = emitComparison(SExt,
-                               DAG.getConstant(ValueofRHS, dl,
-                                               RHS.getValueType()),
-                               CC, dl, DAG);
-          AArch64CC = changeIntCCToAArch64CC(CC);
-          AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
-          return Cmp;
-        }
+    const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
+
+    // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
+    // For the i8 operand, the largest immediate is 255, so this can be easily
+    // encoded in the compare instruction. For the i16 operand, however, the
+    // largest immediate cannot be encoded in the compare.
+    // Therefore, use a sign extending load and cmn to avoid materializing the
+    // -1 constant. For example,
+    // movz w1, #65535
+    // ldrh w0, [x0, #0]
+    // cmp w0, w1
+    // >
+    // ldrsh w0, [x0, #0]
+    // cmn w0, #1
+    // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
+    // if and only if (sext LHS) == (sext RHS). The checks are in place to
+    // ensure both the LHS and RHS are truly zero extended and to make sure the
+    // transformation is profitable.
+    if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
+        cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
+        cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
+        LHS.getNode()->hasNUsesOfValue(1, 0)) {
+      int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
+      if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
+        SDValue SExt =
+            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
+                        DAG.getValueType(MVT::i16));
+        Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
+                                                   RHS.getValueType()),
+                             CC, dl, DAG);
+        AArch64CC = changeIntCCToAArch64CC(CC);
+      }
+    }
+
+    if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
+      if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
+        if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
+          AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
       }
     }
   }
-  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
-  AArch64CC = changeIntCCToAArch64CC(CC);
-  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT::i32);
+
+  if (!Cmp) {
+    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+    AArch64CC = changeIntCCToAArch64CC(CC);
+  }
+  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
   return Cmp;
 }
 
@@ -1391,8 +1659,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
 SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
                                              RTLIB::Libcall Call) const {
   SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
-  return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
-                     SDLoc(Op)).first;
+  return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
 }
 
 static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
@@ -1571,8 +1838,8 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
   // precise. That doesn't take part in the LibCall so we can't directly use
   // LowerF128Call.
   SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
-                     /*isSigned*/ false, SDLoc(Op)).first;
+  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+                     SDLoc(Op)).first;
 }
 
 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
@@ -1581,6 +1848,16 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   // in the cost tables.
   EVT InVT = Op.getOperand(0).getValueType();
   EVT VT = Op.getValueType();
+  unsigned NumElts = InVT.getVectorNumElements();
+
+  // f16 vectors are promoted to f32 before a conversion.
+  if (InVT.getVectorElementType() == MVT::f16) {
+    MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
+    SDLoc dl(Op);
+    return DAG.getNode(
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
+  }
 
   if (VT.getSizeInBits() < InVT.getSizeInBits()) {
     SDLoc dl(Op);
@@ -1628,8 +1905,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
     LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
 
   SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
-  return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
-                     SDLoc(Op)).first;
+  return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
 }
 
 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
@@ -1931,6 +2207,31 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
 }
 
+SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDLoc dl(Op);
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::aarch64_thread_pointer: {
+    EVT PtrVT = getPointerTy(DAG.getDataLayout());
+    return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
+  }
+  case Intrinsic::aarch64_neon_smax:
+    return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::aarch64_neon_umax:
+    return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::aarch64_neon_smin:
+    return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::aarch64_neon_umin:
+    return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+}
+
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -2032,14 +2333,11 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerFSINCOS(Op, DAG);
   case ISD::MUL:
     return LowerMUL(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   }
 }
 
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
-  return 2;
-}
-
 //===----------------------------------------------------------------------===//
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -2214,9 +2512,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
         break;
       }
 
-      ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
-                                MachinePointerInfo::getFixedStack(FI),
-                                MemVT, false, false, false, 0);
+      ArgValue = DAG.getExtLoad(
+          ExtType, DL, VA.getLocVT(), Chain, FIN,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+          MemVT, false, false, false, 0);
 
       InVals.push_back(ArgValue);
     }
@@ -2289,9 +2588,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
       unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
       SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
-      SDValue Store =
-          DAG.getStore(Val.getValue(1), DL, Val, FIN,
-                       MachinePointerInfo::getStack(i * 8), false, false, 0);
+      SDValue Store = DAG.getStore(
+          Val.getValue(1), DL, Val, FIN,
+          MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8), false,
+          false, 0);
       MemOps.push_back(Store);
       FIN =
           DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
@@ -2318,9 +2618,10 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
         unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
         SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
 
-        SDValue Store =
-            DAG.getStore(Val.getValue(1), DL, Val, FIN,
-                         MachinePointerInfo::getStack(i * 16), false, false, 0);
+        SDValue Store = DAG.getStore(
+            Val.getValue(1), DL, Val, FIN,
+            MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16),
+            false, false, 0);
         MemOps.push_back(Store);
         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
                           DAG.getConstant(16, DL, PtrVT));
@@ -2453,8 +2754,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
                    *DAG.getContext());
 
     CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
-      if (!ArgLocs[i].isRegLoc())
+    for (const CCValAssign &ArgLoc : ArgLocs)
+      if (!ArgLoc.isRegLoc())
         return false;
   }
 
@@ -2758,7 +3059,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
 
         DstAddr = DAG.getFrameIndex(FI, PtrVT);
-        DstInfo = MachinePointerInfo::getFixedStack(FI);
+        DstInfo =
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
 
         // Make sure any stack arguments overlapping with where we're storing
         // are loaded before this eventual operation. Otherwise they'll be
@@ -2768,7 +3070,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
         SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
 
         DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
-        DstInfo = MachinePointerInfo::getStack(LocMemOffset);
+        DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
+                                               LocMemOffset);
       }
 
       if (Outs[i].Flags.isByVal()) {
@@ -2802,9 +3105,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
-    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
-                             RegsToPass[i].second, InFlag);
+  for (auto &RegToPass : RegsToPass) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
+                             RegToPass.second, InFlag);
     InFlag = Chain.getValue(1);
   }
 
@@ -2860,9 +3163,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   // Add argument registers to the end of the list so that they are known live
   // into the call.
-  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
-    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
-                                  RegsToPass[i].second.getValueType()));
+  for (auto &RegToPass : RegsToPass)
+    Ops.push_back(DAG.getRegister(RegToPass.first,
+                                  RegToPass.second.getValueType()));
 
   // Add a register mask operand representing the call-preserved registers.
   const uint32_t *Mask;
@@ -2968,6 +3271,19 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     Flag = Chain.getValue(1);
     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   }
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *I =
+      TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+  if (I) {
+    for (; *I; ++I) {
+      if (AArch64::GPR64RegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+      else if (AArch64::FPR64RegClass.contains(*I))
+        RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+      else
+        llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+    }
+  }
 
   RetOps[0] = Chain; // Update chain.
 
@@ -3010,11 +3326,12 @@ SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
     unsigned char LoFlags = AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
     SDValue Lo = DAG.getTargetConstantPool(GV, PtrVT, 0, 0, LoFlags);
     SDValue PoolAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-    SDValue GlobalAddr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), PoolAddr,
-                                     MachinePointerInfo::getConstantPool(),
-                                     /*isVolatile=*/ false,
-                                     /*isNonTemporal=*/ true,
-                                     /*isInvariant=*/ true, 8);
+    SDValue GlobalAddr = DAG.getLoad(
+        PtrVT, DL, DAG.getEntryNode(), PoolAddr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+        /*isVolatile=*/false,
+        /*isNonTemporal=*/true,
+        /*isInvariant=*/true, 8);
     if (GN->getOffset() != 0)
       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalAddr,
                          DAG.getConstant(GN->getOffset(), DL, PtrVT));
@@ -3087,8 +3404,9 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   // to obtain the address of the variable.
   SDValue Chain = DAG.getEntryNode();
   SDValue FuncTLVGet =
-      DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
-                  false, true, true, 8);
+      DAG.getLoad(MVT::i64, DL, Chain, DescAddr,
+                  MachinePointerInfo::getGOT(DAG.getMachineFunction()), false,
+                  true, true, 8);
   Chain = FuncTLVGet.getValue(1);
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -3160,6 +3478,10 @@ AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
 
   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
+
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
+
   if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
     if (Model == TLSModel::LocalDynamic)
       Model = TLSModel::GeneralDynamic;
@@ -3277,8 +3599,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
   // instruction.
   unsigned Opc = LHS.getOpcode();
-  if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
-      cast<ConstantSDNode>(RHS)->isOne() &&
+  if (LHS.getResNo() == 1 && isOneConstant(RHS) &&
       (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
        Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
     assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
@@ -3392,17 +3713,11 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
   SDValue In1 = Op.getOperand(0);
   SDValue In2 = Op.getOperand(1);
   EVT SrcVT = In2.getValueType();
-  if (SrcVT != VT) {
-    if (SrcVT == MVT::f32 && VT == MVT::f64)
-      In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
-    else if (SrcVT == MVT::f64 && VT == MVT::f32)
-      In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2,
-                        DAG.getIntPtrConstant(0, DL));
-    else
-      // FIXME: Src type is different, bail out for now. Can VT really be a
-      // vector type?
-      return SDValue();
-  }
+
+  if (SrcVT.bitsLT(VT))
+    In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
+  else if (SrcVT.bitsGT(VT))
+    In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
 
   EVT VecVT;
   EVT EltVT;
@@ -3410,7 +3725,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
   SDValue VecVal1, VecVal2;
   if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
     EltVT = MVT::i32;
-    VecVT = MVT::v4i32;
+    VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
     EltMask = 0x80000000ULL;
 
     if (!VT.isVector()) {
@@ -3571,32 +3886,6 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
-/// A SELECT_CC operation is really some kind of max or min if both values being
-/// compared are, in some sense, equal to the results in either case. However,
-/// it is permissible to compare f32 values and produce directly extended f64
-/// values.
-///
-/// Extending the comparison operands would also be allowed, but is less likely
-/// to happen in practice since their use is right here. Note that truncate
-/// operations would *not* be semantically equivalent.
-static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
-  if (Cmp == Result)
-    return (Cmp.getValueType() == MVT::f32 ||
-            Cmp.getValueType() == MVT::f64);
-
-  ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
-  ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
-  if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
-      Result.getValueType() == MVT::f64) {
-    bool Lossy;
-    APFloat CmpVal = CCmp->getValueAPF();
-    CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
-    return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
-  }
-
-  return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
-}
-
 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
                                               SDValue RHS, SDValue TVal,
                                               SDValue FVal, SDLoc dl,
@@ -3614,7 +3903,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
     }
   }
 
-  // Handle integers first.
+  // Also handle f16, for which we need to do a f32 comparison.
+  if (LHS.getValueType() == MVT::f16) {
+    LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
+    RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
+  }
+
+  // Next, handle integers.
   if (LHS.getValueType().isInteger()) {
     assert((LHS.getValueType() == RHS.getValueType()) &&
            (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
@@ -3637,9 +3932,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
     } else if (TVal.getOpcode() == ISD::XOR) {
       // If TVal is a NOT we want to swap TVal and FVal so that we can match
       // with a CSINV rather than a CSEL.
-      ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
-
-      if (CVal && CVal->isAllOnesValue()) {
+      if (isAllOnesConstant(TVal.getOperand(1))) {
         std::swap(TVal, FVal);
         std::swap(CTVal, CFVal);
         CC = ISD::getSetCCInverse(CC, true);
@@ -3647,9 +3940,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
     } else if (TVal.getOpcode() == ISD::SUB) {
       // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
       // that we can match with a CSNEG rather than a CSEL.
-      ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
-
-      if (CVal && CVal->isNullValue()) {
+      if (isNullConstant(TVal.getOperand(0))) {
         std::swap(TVal, FVal);
         std::swap(CTVal, CFVal);
         CC = ISD::getSetCCInverse(CC, true);
@@ -4109,46 +4400,57 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt = Op.getOperand(2);
-  SDValue ARMcc;
   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
 
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
 
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
                                  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
-  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+  SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+
+  // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
+  // is "undef". We wanted 0, so CSEL it directly.
+  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+                               ISD::SETEQ, dl, DAG);
+  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+  HiBitsForLo =
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+                  HiBitsForLo, CCVal, Cmp);
+
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
                                    DAG.getConstant(VTBits, dl, MVT::i64));
-  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
 
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
-                               ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+  SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+  SDValue LoForNormalShift =
+      DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
 
-  SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-  SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
-  SDValue Lo =
-      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+                       dl, DAG);
+  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+  SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+                           LoForNormalShift, CCVal, Cmp);
 
   // AArch64 shifts larger than the register width are wrapped rather than
   // clamped, so we can't just emit "hi >> x".
-  SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-  SDValue TrueValHi = Opc == ISD::SRA
-                          ? DAG.getNode(Opc, dl, VT, ShOpHi,
-                                        DAG.getConstant(VTBits - 1, dl,
-                                                        MVT::i64))
-                          : DAG.getConstant(0, dl, VT);
-  SDValue Hi =
-      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
+  SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+  SDValue HiForBigShift =
+      Opc == ISD::SRA
+          ? DAG.getNode(Opc, dl, VT, ShOpHi,
+                        DAG.getConstant(VTBits - 1, dl, MVT::i64))
+          : DAG.getConstant(0, dl, VT);
+  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+                           HiForNormalShift, CCVal, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
 }
 
+
 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
-                                                 SelectionDAG &DAG) const {
+                                                   SelectionDAG &DAG) const {
   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   EVT VT = Op.getValueType();
   unsigned VTBits = VT.getSizeInBits();
@@ -4156,31 +4458,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue ShOpLo = Op.getOperand(0);
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt = Op.getOperand(2);
-  SDValue ARMcc;
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
                                  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
-  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+
+  // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
+  // is "undef". We wanted 0, so CSEL it directly.
+  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
+                               ISD::SETEQ, dl, DAG);
+  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
+  LoBitsForHi =
+      DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
+                  LoBitsForHi, CCVal, Cmp);
+
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
                                    DAG.getConstant(VTBits, dl, MVT::i64));
-  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
-  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue HiForNormalShift =
+      DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
 
-  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
 
-  SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
-                               ISD::SETGE, dl, DAG);
-  SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
-  SDValue Hi =
-      DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
+  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
+                       dl, DAG);
+  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
+  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
+                           HiForNormalShift, CCVal, Cmp);
 
   // AArch64 shifts of larger than register sizes are wrapped rather than
   // clamped, so we can't just emit "lo << a" if a is too big.
-  SDValue TrueValLo = DAG.getConstant(0, dl, VT);
-  SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-  SDValue Lo =
-      DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+  SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
+  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
+                           LoForNormalShift, CCVal, Cmp);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -4362,8 +4674,7 @@ void AArch64TargetLowering::LowerAsmOperandForConstraint(
   // Validate and return a target constant for them if we can.
   case 'z': {
     // 'z' maps to xzr or wzr so it needs an input of 0.
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
-    if (!C || C->getZExtValue() != 0)
+    if (!isNullConstant(Op))
       return;
 
     if (Op.getValueType() == MVT::i64)
@@ -5653,11 +5964,10 @@ static SDValue NormalizeBuildVector(SDValue Op,
     return Op;
 
   SmallVector<SDValue, 16> Ops;
-  for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) {
-    SDValue Lane = Op.getOperand(I);
-    if (Lane.getOpcode() == ISD::Constant) {
+  for (SDValue Lane : Op->ops()) {
+    if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
       APInt LowBits(EltTy.getSizeInBits(),
-                    cast<ConstantSDNode>(Lane)->getZExtValue());
+                    CstLane->getZExtValue());
       Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
     }
     Ops.push_back(Lane);
@@ -5997,8 +6307,7 @@ FailedModImm:
 
   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
   if (NumElts >= 4) {
-    SDValue shuffle = ReconstructShuffle(Op, DAG);
-    if (shuffle != SDValue())
+    if (SDValue shuffle = ReconstructShuffle(Op, DAG))
       return shuffle;
   }
 
@@ -6017,7 +6326,10 @@ FailedModImm:
     // a) Avoid a RMW dependency on the full vector register, and
     // b) Allow the register coalescer to fold away the copy if the
     //    value is already in an S or D register.
-    if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
+    // Do not do this for UNDEF/LOAD nodes because we have better patterns
+    // for those avoiding the SCALAR_TO_VECTOR/BUILD_VECTOR.
+    if (Op0.getOpcode() != ISD::UNDEF && Op0.getOpcode() != ISD::LOAD &&
+        (ElemSize == 32 || ElemSize == 64)) {
       unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
       MachineSDNode *N =
           DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
@@ -6123,24 +6435,11 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   unsigned Val = Cst->getZExtValue();
 
   unsigned Size = Op.getValueType().getSizeInBits();
-  if (Val == 0) {
-    switch (Size) {
-    case 8:
-      return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 16:
-      return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 32:
-      return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    case 64:
-      return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
-                                        Op.getOperand(0));
-    default:
-      llvm_unreachable("Unexpected vector type in extract_subvector!");
-    }
-  }
+
+  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
+  if (Val == 0)
+    return Op;
+
   // If this is extracting the upper 64-bits of a 128-bit vector, we match
   // that directly.
   if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
@@ -6213,26 +6512,20 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 ///   0 <= Value <= ElementBits for a long left shift.
 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
   if (!getVShiftImm(Op, ElementBits, Cnt))
     return false;
   return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
 }
 
 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation.  For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-///   1 <= |Value| <= ElementBits for a right shift; or
-///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
-                         int64_t &Cnt) {
+/// operand of a vector shift right operation. The value must be in the range:
+///   1 <= Value <= ElementBits for a right shift; or
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
   if (!getVShiftImm(Op, ElementBits, Cnt))
     return false;
-  if (isIntrinsic)
-    Cnt = -Cnt;
   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
 }
 
@@ -6261,8 +6554,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
   case ISD::SRA:
   case ISD::SRL:
     // Right shift immediate
-    if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
-        Cnt < EltSize) {
+    if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
       unsigned Opc =
           (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
       return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
@@ -6451,7 +6743,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_neon_ld4r: {
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     // Conservatively set memVT to the entire set of vectors loaded.
-    uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+    uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
     Info.offset = 0;
@@ -6477,7 +6769,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       Type *ArgTy = I.getArgOperand(ArgI)->getType();
       if (!ArgTy->isVectorTy())
         break;
-      NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+      NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
     }
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
@@ -6720,10 +7012,10 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
   const DataLayout &DL = LI->getModule()->getDataLayout();
 
   VectorType *VecTy = Shuffles[0]->getType();
-  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
+  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
 
-  // Skip illegal vector types.
-  if (VecSize != 64 && VecSize != 128)
+  // Skip if we do not have NEON and skip illegal vector types.
+  if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128))
     return false;
 
   // A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -6806,10 +7098,10 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
 
   const DataLayout &DL = SI->getModule()->getDataLayout();
-  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
+  unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
 
-  // Skip illegal vector types.
-  if (SubVecSize != 64 && SubVecSize != 128)
+  // Skip if we do not have NEON and skip illegal vector types.
+  if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128))
     return false;
 
   Value *Op0 = SVI->getOperand(0);
@@ -7228,8 +7520,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
                                      const AArch64Subtarget *Subtarget) {
   // First try to optimize away the conversion when it's conditionally from
   // a constant. Vectors only.
-  SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG);
-  if (Res != SDValue())
+  if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
     return Res;
 
   EVT VT = N->getValueType(0);
@@ -7242,7 +7533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
 
   // If the result of an integer load is only used by an integer-to-float
   // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
-  // This eliminates an "integer-to-vector-move UOP and improve throughput.
+  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
   SDValue N0 = N->getOperand(0);
   if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
       // Do not change the width of a volatile load.
@@ -7265,6 +7556,134 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Fold a floating-point multiply by power of two into floating-point to
+/// fixed-point conversion.
+static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
+                                     const AArch64Subtarget *Subtarget) {
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
+  SDValue Op = N->getOperand(0);
+  if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  SDValue ConstVec = Op->getOperand(1);
+  if (!isa<BuildVectorSDNode>(ConstVec))
+    return SDValue();
+
+  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+  uint32_t FloatBits = FloatTy.getSizeInBits();
+  if (FloatBits != 32 && FloatBits != 64)
+    return SDValue();
+
+  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+  uint32_t IntBits = IntTy.getSizeInBits();
+  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+    return SDValue();
+
+  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
+  if (IntBits > FloatBits)
+    return SDValue();
+
+  BitVector UndefElements;
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+  int32_t Bits = IntBits == 64 ? 64 : 32;
+  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
+  if (C == -1 || C == 0 || C > Bits)
+    return SDValue();
+
+  MVT ResTy;
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  switch (NumLanes) {
+  default:
+    return SDValue();
+  case 2:
+    ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+    break;
+  case 4:
+    ResTy = MVT::v4i32;
+    break;
+  }
+
+  SDLoc DL(N);
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
+  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
+                                      : Intrinsic::aarch64_neon_vcvtfp2fxu;
+  SDValue FixConv =
+      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
+                  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
+                  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
+  // We can handle smaller integers by generating an extra trunc.
+  if (IntBits < FloatBits)
+    FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
+
+  return FixConv;
+}
+
+/// Fold a floating-point divide by power of two into fixed-point to
+/// floating-point conversion.
+static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
+                                  const AArch64Subtarget *Subtarget) {
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
+  SDValue Op = N->getOperand(0);
+  unsigned Opc = Op->getOpcode();
+  if (!Op.getValueType().isVector() ||
+      (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
+    return SDValue();
+
+  SDValue ConstVec = N->getOperand(1);
+  if (!isa<BuildVectorSDNode>(ConstVec))
+    return SDValue();
+
+  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+  int32_t IntBits = IntTy.getSizeInBits();
+  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
+    return SDValue();
+
+  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+  int32_t FloatBits = FloatTy.getSizeInBits();
+  if (FloatBits != 32 && FloatBits != 64)
+    return SDValue();
+
+  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
+  if (IntBits > FloatBits)
+    return SDValue();
+
+  BitVector UndefElements;
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
+  if (C == -1 || C == 0 || C > FloatBits)
+    return SDValue();
+
+  MVT ResTy;
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  switch (NumLanes) {
+  default:
+    return SDValue();
+  case 2:
+    ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
+    break;
+  case 4:
+    ResTy = MVT::v4i32;
+    break;
+  }
+
+  SDLoc DL(N);
+  SDValue ConvInput = Op.getOperand(0);
+  bool IsSigned = Opc == ISD::SINT_TO_FP;
+  if (IntBits < FloatBits)
+    ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
+                            ResTy, ConvInput);
+
+  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
+                                      : Intrinsic::aarch64_neon_vcvtfxu2fp;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
+                     DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
+                     DAG.getConstant(C, DL, MVT::i32));
+}
+
 /// An EXTR instruction is made up of two shifts, ORed together. This helper
 /// searches for and classifies those shifts.
 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
@@ -7964,7 +8383,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_neon_vcvtfxs2fp:
   case Intrinsic::aarch64_neon_vcvtfxu2fp:
     return tryCombineFixedPointConvert(N, DCI, DAG);
-    break;
   case Intrinsic::aarch64_neon_saddv:
     return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
   case Intrinsic::aarch64_neon_uaddv:
@@ -7978,10 +8396,16 @@ static SDValue performIntrinsicCombine(SDNode *N,
   case Intrinsic::aarch64_neon_umaxv:
     return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
   case Intrinsic::aarch64_neon_fmax:
-    return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
+    return DAG.getNode(ISD::FMAXNAN, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
   case Intrinsic::aarch64_neon_fmin:
-    return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
+    return DAG.getNode(ISD::FMINNAN, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_fmaxnm:
+    return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2));
+  case Intrinsic::aarch64_neon_fminnm:
+    return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
                        N->getOperand(1), N->getOperand(2));
   case Intrinsic::aarch64_neon_smull:
   case Intrinsic::aarch64_neon_umull:
@@ -8141,7 +8565,7 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
   unsigned Alignment = std::min(OrigAlignment, EltOffset);
 
   // Create scalar stores. This is at least as good as the code sequence for a
-  // split unaligned store wich is a dup.s, ext.b, and two stores.
+  // split unaligned store which is a dup.s, ext.b, and two stores.
   // Most of the time the three stores should be replaced by store pair
   // instructions (stp).
   SDLoc DL(St);
@@ -8162,10 +8586,9 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
   return NewST1;
 }
 
-static SDValue performSTORECombine(SDNode *N,
-                                   TargetLowering::DAGCombinerInfo &DCI,
-                                   SelectionDAG &DAG,
-                                   const AArch64Subtarget *Subtarget) {
+static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                              SelectionDAG &DAG,
+                              const AArch64Subtarget *Subtarget) {
   if (!DCI.isBeforeLegalize())
     return SDValue();
 
@@ -8173,15 +8596,17 @@ static SDValue performSTORECombine(SDNode *N,
   if (S->isVolatile())
     return SDValue();
 
+  // FIXME: The logic for deciding if an unaligned store should be split should
+  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
+  // a call to that function here.
+
   // Cyclone has bad performance on unaligned 16B stores when crossing line and
   // page boundaries. We want to split such stores.
   if (!Subtarget->isCyclone())
     return SDValue();
 
-  // Don't split at Oz.
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool IsMinSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
-  if (IsMinSize)
+  // Don't split at -Oz.
+  if (DAG.getMachineFunction().getFunction()->optForMinSize())
     return SDValue();
 
   SDValue StVal = S->getValue();
@@ -8204,8 +8629,7 @@ static SDValue performSTORECombine(SDNode *N,
   // If we get a splat of a scalar convert this vector store to a store of
   // scalars. They will be merged into store pairs thereby removing two
   // instructions.
-  SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
-  if (ReplacedSplat != SDValue())
+  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S))
     return ReplacedSplat;
 
   SDLoc DL(S);
@@ -8326,6 +8750,299 @@ static SDValue performPostLD1Combine(SDNode *N,
   return SDValue();
 }
 
+/// Simplify \Addr given that the top byte of it is ignored by HW during
+/// address translation.
+static bool performTBISimplification(SDValue Addr,
+                                     TargetLowering::DAGCombinerInfo &DCI,
+                                     SelectionDAG &DAG) {
+  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
+  APInt KnownZero, KnownOne;
+  TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(),
+                                        DCI.isBeforeLegalizeOps());
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, KnownZero, KnownOne, TLO)) {
+    DCI.CommitTargetLoweringOpt(TLO);
+    return true;
+  }
+  return false;
+}
+
+static SDValue performSTORECombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   SelectionDAG &DAG,
+                                   const AArch64Subtarget *Subtarget) {
+  SDValue Split = split16BStores(N, DCI, DAG, Subtarget);
+  if (Split.getNode())
+    return Split;
+
+  if (Subtarget->supportsAddressTopByteIgnored() &&
+      performTBISimplification(N->getOperand(2), DCI, DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+  /// This function handles the log2-shuffle pattern produced by the
+/// LoopVectorizer for the across vector reduction. It consists of
+/// log2(NumVectorElements) steps and, in each step, 2^(s) elements
+/// are reduced, where s is an induction variable from 0 to
+/// log2(NumVectorElements).
+static SDValue tryMatchAcrossLaneShuffleForReduction(SDNode *N, SDValue OpV,
+                                                     unsigned Op,
+                                                     SelectionDAG &DAG) {
+  EVT VTy = OpV->getOperand(0).getValueType();
+  if (!VTy.isVector())
+    return SDValue();
+
+  int NumVecElts = VTy.getVectorNumElements();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (NumVecElts != 4)
+      return SDValue();
+  } else {
+    if (NumVecElts != 4 && NumVecElts != 8 && NumVecElts != 16)
+      return SDValue();
+  }
+
+  int NumExpectedSteps = APInt(8, NumVecElts).logBase2();
+  SDValue PreOp = OpV;
+  // Iterate over each step of the across vector reduction.
+  for (int CurStep = 0; CurStep != NumExpectedSteps; ++CurStep) {
+    SDValue CurOp = PreOp.getOperand(0);
+    SDValue Shuffle = PreOp.getOperand(1);
+    if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE) {
+      // Try to swap the 1st and 2nd operand as add and min/max instructions
+      // are commutative.
+      CurOp = PreOp.getOperand(1);
+      Shuffle = PreOp.getOperand(0);
+      if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
+        return SDValue();
+    }
+
+    // Check if the input vector is fed by the operator we want to handle,
+    // except the last step; the very first input vector is not necessarily
+    // the same operator we are handling.
+    if (CurOp.getOpcode() != Op && (CurStep != (NumExpectedSteps - 1)))
+      return SDValue();
+
+    // Check if it forms one step of the across vector reduction.
+    // E.g.,
+    //   %cur = add %1, %0
+    //   %shuffle = vector_shuffle %cur, <2, 3, u, u>
+    //   %pre = add %cur, %shuffle
+    if (Shuffle.getOperand(0) != CurOp)
+      return SDValue();
+
+    int NumMaskElts = 1 << CurStep;
+    ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Shuffle)->getMask();
+    // Check mask values in each step.
+    // We expect the shuffle mask in each step follows a specific pattern
+    // denoted here by the <M, U> form, where M is a sequence of integers
+    // starting from NumMaskElts, increasing by 1, and the number integers
+    // in M should be NumMaskElts. U is a sequence of UNDEFs and the number
+    // of undef in U should be NumVecElts - NumMaskElts.
+    // E.g., for <8 x i16>, mask values in each step should be :
+    //   step 0 : <1,u,u,u,u,u,u,u>
+    //   step 1 : <2,3,u,u,u,u,u,u>
+    //   step 2 : <4,5,6,7,u,u,u,u>
+    for (int i = 0; i < NumVecElts; ++i)
+      if ((i < NumMaskElts && Mask[i] != (NumMaskElts + i)) ||
+          (i >= NumMaskElts && !(Mask[i] < 0)))
+        return SDValue();
+
+    PreOp = CurOp;
+  }
+  unsigned Opcode;
+  bool IsIntrinsic = false;
+
+  switch (Op) {
+  default:
+    llvm_unreachable("Unexpected operator for across vector reduction");
+  case ISD::ADD:
+    Opcode = AArch64ISD::UADDV;
+    break;
+  case ISD::SMAX:
+    Opcode = AArch64ISD::SMAXV;
+    break;
+  case ISD::UMAX:
+    Opcode = AArch64ISD::UMAXV;
+    break;
+  case ISD::SMIN:
+    Opcode = AArch64ISD::SMINV;
+    break;
+  case ISD::UMIN:
+    Opcode = AArch64ISD::UMINV;
+    break;
+  case ISD::FMAXNUM:
+    Opcode = Intrinsic::aarch64_neon_fmaxnmv;
+    IsIntrinsic = true;
+    break;
+  case ISD::FMINNUM:
+    Opcode = Intrinsic::aarch64_neon_fminnmv;
+    IsIntrinsic = true;
+    break;
+  }
+  SDLoc DL(N);
+
+  return IsIntrinsic
+             ? DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
+                           DAG.getConstant(Opcode, DL, MVT::i32), PreOp)
+             : DAG.getNode(
+                   ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
+                   DAG.getNode(Opcode, DL, PreOp.getSimpleValueType(), PreOp),
+                   DAG.getConstant(0, DL, MVT::i64));
+}
+
+/// Target-specific DAG combine for the across vector min/max reductions.
+/// This function specifically handles the final clean-up step of the vector
+/// min/max reductions produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which narrows down and finds the final min/max value from all
+/// elements of the vector.
+/// For example, for a <16 x i8> vector :
+///   svn0 = vector_shuffle %0, undef<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u>
+///   %smax0 = smax %arr, svn0
+///   %svn1 = vector_shuffle %smax0, undef<4,5,6,7,u,u,u,u,u,u,u,u,u,u,u,u>
+///   %smax1 = smax %smax0, %svn1
+///   %svn2 = vector_shuffle %smax1, undef<2,3,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+///   %smax2 = smax %smax1, svn2
+///   %svn3 = vector_shuffle %smax2, undef<1,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+///   %sc = setcc %smax2, %svn3, gt
+///   %n0 = extract_vector_elt %sc, #0
+///   %n1 = extract_vector_elt %smax2, #0
+///   %n2 = extract_vector_elt $smax2, #1
+///   %result = select %n0, %n1, n2
+///     becomes :
+///   %1 = smaxv %0
+///   %result = extract_vector_elt %1, 0
+static SDValue
+performAcrossLaneMinMaxReductionCombine(SDNode *N, SelectionDAG &DAG,
+                                        const AArch64Subtarget *Subtarget) {
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue IfTrue = N->getOperand(1);
+  SDValue IfFalse = N->getOperand(2);
+
+  // Check if the SELECT merges up the final result of the min/max
+  // from a vector.
+  if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      IfTrue.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      IfFalse.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  // Expect N0 is fed by SETCC.
+  SDValue SetCC = N0.getOperand(0);
+  EVT SetCCVT = SetCC.getValueType();
+  if (SetCC.getOpcode() != ISD::SETCC || !SetCCVT.isVector() ||
+      SetCCVT.getVectorElementType() != MVT::i1)
+    return SDValue();
+
+  SDValue VectorOp = SetCC.getOperand(0);
+  unsigned Op = VectorOp->getOpcode();
+  // Check if the input vector is fed by the operator we want to handle.
+  if (Op != ISD::SMAX && Op != ISD::UMAX && Op != ISD::SMIN &&
+      Op != ISD::UMIN && Op != ISD::FMAXNUM && Op != ISD::FMINNUM)
+    return SDValue();
+
+  EVT VTy = VectorOp.getValueType();
+  if (!VTy.isVector())
+    return SDValue();
+
+  if (VTy.getSizeInBits() < 64)
+    return SDValue();
+
+  EVT EltTy = VTy.getVectorElementType();
+  if (Op == ISD::FMAXNUM || Op == ISD::FMINNUM) {
+    if (EltTy != MVT::f32)
+      return SDValue();
+  } else {
+    if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+      return SDValue();
+  }
+
+  // Check if extracting from the same vector.
+  // For example,
+  //   %sc = setcc %vector, %svn1, gt
+  //   %n0 = extract_vector_elt %sc, #0
+  //   %n1 = extract_vector_elt %vector, #0
+  //   %n2 = extract_vector_elt $vector, #1
+  if (!(VectorOp == IfTrue->getOperand(0) &&
+        VectorOp == IfFalse->getOperand(0)))
+    return SDValue();
+
+  // Check if the condition code is matched with the operator type.
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+  if ((Op == ISD::SMAX && CC != ISD::SETGT && CC != ISD::SETGE) ||
+      (Op == ISD::UMAX && CC != ISD::SETUGT && CC != ISD::SETUGE) ||
+      (Op == ISD::SMIN && CC != ISD::SETLT && CC != ISD::SETLE) ||
+      (Op == ISD::UMIN && CC != ISD::SETULT && CC != ISD::SETULE) ||
+      (Op == ISD::FMAXNUM && CC != ISD::SETOGT && CC != ISD::SETOGE &&
+       CC != ISD::SETUGT && CC != ISD::SETUGE && CC != ISD::SETGT &&
+       CC != ISD::SETGE) ||
+      (Op == ISD::FMINNUM && CC != ISD::SETOLT && CC != ISD::SETOLE &&
+       CC != ISD::SETULT && CC != ISD::SETULE && CC != ISD::SETLT &&
+       CC != ISD::SETLE))
+    return SDValue();
+
+  // Expect to check only lane 0 from the vector SETCC.
+  if (!isNullConstant(N0.getOperand(1)))
+    return SDValue();
+
+  // Expect to extract the true value from lane 0.
+  if (!isNullConstant(IfTrue.getOperand(1)))
+    return SDValue();
+
+  // Expect to extract the false value from lane 1.
+  if (!isOneConstant(IfFalse.getOperand(1)))
+    return SDValue();
+
+  return tryMatchAcrossLaneShuffleForReduction(N, SetCC, Op, DAG);
+}
+
+/// Target-specific DAG combine for the across vector add reduction.
+/// This function specifically handles the final clean-up step of the vector
+/// add reduction produced by the LoopVectorizer. It is the log2-shuffle
+/// pattern, which adds all elements of a vector together.
+/// For example, for a <4 x i32> vector :
+///   %1 = vector_shuffle %0, <2,3,u,u>
+///   %2 = add %0, %1
+///   %3 = vector_shuffle %2, <1,u,u,u>
+///   %4 = add %2, %3
+///   %result = extract_vector_elt %4, 0
+/// becomes :
+///   %0 = uaddv %0
+///   %result = extract_vector_elt %0, 0
+static SDValue
+performAcrossLaneAddReductionCombine(SDNode *N, SelectionDAG &DAG,
+                                     const AArch64Subtarget *Subtarget) {
+  if (!Subtarget->hasNEON())
+    return SDValue();
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Check if the input vector is fed by the ADD.
+  if (N0->getOpcode() != ISD::ADD)
+    return SDValue();
+
+  // The vector extract idx must constant zero because we only expect the final
+  // result of the reduction is placed in lane 0.
+  if (!isNullConstant(N1))
+    return SDValue();
+
+  EVT VTy = N0.getValueType();
+  if (!VTy.isVector())
+    return SDValue();
+
+  EVT EltTy = VTy.getVectorElementType();
+  if (EltTy != MVT::i32 && EltTy != MVT::i16 && EltTy != MVT::i8)
+    return SDValue();
+
+  if (VTy.getSizeInBits() < 64)
+    return SDValue();
+
+  return tryMatchAcrossLaneShuffleForReduction(N, N0, ISD::ADD, DAG);
+}
+
 /// Target-specific DAG combine function for NEON load/store intrinsics
 /// to merge base address updates.
 static SDValue performNEONPostLDSTCombine(SDNode *N,
@@ -8751,10 +9468,10 @@ static SDValue performBRCONDCombine(SDNode *N,
   if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
     return SDValue();
 
-  if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
+  if (isNullConstant(LHS))
     std::swap(LHS, RHS);
 
-  if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
+  if (!isNullConstant(RHS))
     return SDValue();
 
   if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
@@ -8868,75 +9585,6 @@ static SDValue performSelectCombine(SDNode *N,
   return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
 }
 
-/// performSelectCCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match FMIN/FMAX patterns.
-static SDValue performSelectCCCombine(SDNode *N, SelectionDAG &DAG) {
-  // Try to use FMIN/FMAX instructions for FP selects like "x < y ? x : y".
-  // Unless the NoNaNsFPMath option is set, be careful about NaNs:
-  // vmax/vmin return NaN if either operand is a NaN;
-  // only do the transformation when it matches that behavior.
-
-  SDValue CondLHS = N->getOperand(0);
-  SDValue CondRHS = N->getOperand(1);
-  SDValue LHS = N->getOperand(2);
-  SDValue RHS = N->getOperand(3);
-  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
-
-  unsigned Opcode;
-  bool IsReversed;
-  if (selectCCOpsAreFMaxCompatible(CondLHS, LHS) &&
-      selectCCOpsAreFMaxCompatible(CondRHS, RHS)) {
-    IsReversed = false; // x CC y ? x : y
-  } else if (selectCCOpsAreFMaxCompatible(CondRHS, LHS) &&
-             selectCCOpsAreFMaxCompatible(CondLHS, RHS)) {
-    IsReversed = true ; // x CC y ? y : x
-  } else {
-    return SDValue();
-  }
-
-  bool IsUnordered = false, IsOrEqual;
-  switch (CC) {
-  default:
-    return SDValue();
-  case ISD::SETULT:
-  case ISD::SETULE:
-    IsUnordered = true;
-  case ISD::SETOLT:
-  case ISD::SETOLE:
-  case ISD::SETLT:
-  case ISD::SETLE:
-    IsOrEqual = (CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE);
-    Opcode = IsReversed ? AArch64ISD::FMAX : AArch64ISD::FMIN;
-    break;
-
-  case ISD::SETUGT:
-  case ISD::SETUGE:
-    IsUnordered = true;
-  case ISD::SETOGT:
-  case ISD::SETOGE:
-  case ISD::SETGT:
-  case ISD::SETGE:
-    IsOrEqual = (CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE);
-    Opcode = IsReversed ? AArch64ISD::FMIN : AArch64ISD::FMAX;
-    break;
-  }
-
-  // If LHS is NaN, an ordered comparison will be false and the result will be
-  // the RHS, but FMIN(NaN, RHS) = FMAX(NaN, RHS) = NaN. Avoid this by checking
-  // that LHS != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
-  if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
-    return SDValue();
-
-  // For xxx-or-equal comparisons, "+0 <= -0" and "-0 >= +0" will both be true,
-  // but FMIN will return -0, and FMAX will return +0. So FMIN/FMAX can only be
-  // used for unsafe math or if one of the operands is known to be nonzero.
-  if (IsOrEqual && !DAG.getTarget().Options.UnsafeFPMath &&
-      !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
-    return SDValue();
-
-  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
-}
-
 /// Get rid of unnecessary NVCASTs (that don't change the type).
 static SDValue performNVCASTCombine(SDNode *N) {
   if (N->getValueType(0) == N->getOperand(0).getValueType())
@@ -8961,6 +9609,11 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:
     return performIntToFpCombine(N, DAG, Subtarget);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    return performFpToIntCombine(N, DAG, Subtarget);
+  case ISD::FDIV:
+    return performFDivCombine(N, DAG, Subtarget);
   case ISD::OR:
     return performORCombine(N, DCI, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN:
@@ -8973,12 +9626,18 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performBitcastCombine(N, DCI, DAG);
   case ISD::CONCAT_VECTORS:
     return performConcatVectorsCombine(N, DCI, DAG);
-  case ISD::SELECT:
-    return performSelectCombine(N, DCI);
+  case ISD::SELECT: {
+    SDValue RV = performSelectCombine(N, DCI);
+    if (!RV.getNode())
+      RV = performAcrossLaneMinMaxReductionCombine(N, DAG, Subtarget);
+    return RV;
+  }
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
-  case ISD::SELECT_CC:
-    return performSelectCCCombine(N, DCI.DAG);
+  case ISD::LOAD:
+    if (performTBISimplification(N->getOperand(1), DCI, DAG))
+      return SDValue(N, 0);
+    break;
   case ISD::STORE:
     return performSTORECombine(N, DCI, DAG, Subtarget);
   case AArch64ISD::BRCOND:
@@ -8991,6 +9650,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performNVCASTCombine(N);
   case ISD::INSERT_VECTOR_ELT:
     return performPostLD1Combine(N, DCI, true);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return performAcrossLaneAddReductionCombine(N, DAG, Subtarget);
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -9157,6 +9818,20 @@ static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
 }
 
+static void ReplaceReductionResults(SDNode *N,
+                                    SmallVectorImpl<SDValue> &Results,
+                                    SelectionDAG &DAG, unsigned InterOp,
+                                    unsigned AcrossOp) {
+  EVT LoVT, HiVT;
+  SDValue Lo, Hi;
+  SDLoc dl(N);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
+  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
+  Results.push_back(SplitVal);
+}
+
 void AArch64TargetLowering::ReplaceNodeResults(
     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
   switch (N->getOpcode()) {
@@ -9165,6 +9840,24 @@ void AArch64TargetLowering::ReplaceNodeResults(
   case ISD::BITCAST:
     ReplaceBITCASTResults(N, Results, DAG);
     return;
+  case AArch64ISD::SADDV:
+    ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
+    return;
+  case AArch64ISD::UADDV:
+    ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
+    return;
+  case AArch64ISD::SMINV:
+    ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
+    return;
+  case AArch64ISD::UMINV:
+    ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
+    return;
+  case AArch64ISD::SMAXV:
+    ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
+    return;
+  case AArch64ISD::UMAXV:
+    ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
+    return;
   case ISD::FP_TO_UINT:
   case ISD::FP_TO_SINT:
     assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
@@ -9177,10 +9870,10 @@ bool AArch64TargetLowering::useLoadStackGuardNode() const {
   return true;
 }
 
-bool AArch64TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   // reciprocal if there are three or more FDIVs.
-  return NumUsers > 2;
+  return 3;
 }
 
 TargetLoweringBase::LegalizeTypeAction
@@ -9206,20 +9899,21 @@ bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
 // Loads and stores less than 128-bits are already atomic; ones above that
 // are doomed anyway, so defer to the default libcall and blame the OS when
 // things go wrong.
-bool AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   unsigned Size = LI->getType()->getPrimitiveSizeInBits();
-  return Size == 128;
+  return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
 }
 
 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return Size <= 128 ? AtomicRMWExpansionKind::LLSC
-                     : AtomicRMWExpansionKind::None;
+  return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
 }
 
-bool AArch64TargetLowering::hasLoadLinkedStoreConditional() const {
+bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+    AtomicCmpXchgInst *AI) const {
   return true;
 }
 
@@ -9258,6 +9952,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       cast<PointerType>(Addr->getType())->getElementType());
 }
 
+void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+    IRBuilder<> &Builder) const {
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Builder.CreateCall(
+      llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
+}
+
 Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
                                                    Value *Val, Value *Addr,
                                                    AtomicOrdering Ord) const {
@@ -9294,3 +9995,70 @@ bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
     Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
   return Ty->isArrayTy();
 }
+
+bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
+                                                            EVT) const {
+  return false;
+}
+
+Value *AArch64TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+  if (!Subtarget->isTargetAndroid())
+    return TargetLowering::getSafeStackPointerLocation(IRB);
+
+  // Android provides a fixed TLS slot for the SafeStack pointer. See the
+  // definition of TLS_SLOT_SAFESTACK in
+  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+  const unsigned TlsOffset = 0x48;
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  Function *ThreadPointerFunc =
+      Intrinsic::getDeclaration(M, Intrinsic::aarch64_thread_pointer);
+  return IRB.CreatePointerCast(
+      IRB.CreateConstGEP1_32(IRB.CreateCall(ThreadPointerFunc), TlsOffset),
+      Type::getInt8PtrTy(IRB.getContext())->getPointerTo(0));
+}
+
+void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+  // Update IsSplitCSR in AArch64unctionInfo.
+  AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
+  AFI->setIsSplitCSR(true);
+}
+
+void AArch64TargetLowering::insertCopiesSplitCSR(
+    MachineBasicBlock *Entry,
+    const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+  if (!IStart)
+    return;
+
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+  for (const MCPhysReg *I = IStart; *I; ++I) {
+    const TargetRegisterClass *RC = nullptr;
+    if (AArch64::GPR64RegClass.contains(*I))
+      RC = &AArch64::GPR64RegClass;
+    else if (AArch64::FPR64RegClass.contains(*I))
+      RC = &AArch64::FPR64RegClass;
+    else
+      llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+    unsigned NewVR = MRI->createVirtualRegister(RC);
+    // Create copy from CSR to a virtual register.
+    // FIXME: this currently does not emit CFI pseudo-instructions, it works
+    // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+    // nounwind. If we want to generalize this later, we may need to emit
+    // CFI pseudo-instructions.
+    assert(Entry->getParent()->getFunction()->hasFnAttribute(
+               Attribute::NoUnwind) &&
+           "Function should be nounwind in insertCopiesSplitCSR!");
+    Entry->addLiveIn(*I);
+    BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+            NewVR)
+        .addReg(*I);
+
+    for (auto *Exit : Exits)
+      BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+              *I)
+          .addReg(NewVR);
+  }
+}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index c73ce1e54b3e..e99616c94068 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
 #define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
 
+#include "AArch64.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/IR/CallingConv.h"
@@ -58,13 +59,14 @@ enum NodeType : unsigned {
   SBCS,
   ANDS,
 
+  // Conditional compares. Operands: left,right,falsecc,cc,flags
+  CCMP,
+  CCMN,
+  FCCMP,
+
   // Floating point comparison
   FCMP,
 
-  // Floating point max and min instructions.
-  FMAX,
-  FMIN,
-
   // Scalar extract
   EXTR,
 
@@ -217,8 +219,6 @@ class AArch64Subtarget;
 class AArch64TargetMachine;
 
 class AArch64TargetLowering : public TargetLowering {
-  bool RequireStrictAlign;
-
 public:
   explicit AArch64TargetLowering(const TargetMachine &TM,
                                  const AArch64Subtarget &STI);
@@ -226,46 +226,35 @@ public:
   /// Selects the correct CCAssignFn for a given CallingConvention value.
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
 
-  /// computeKnownBitsForTargetNode - Determine which of the bits specified in
-  /// Mask are known to be either zero or one and return them in the
-  /// KnownZero/KnownOne bitsets.
+  /// Determine which of the bits specified in Mask are known to be either zero
+  /// or one and return them in the KnownZero/KnownOne bitsets.
   void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
                                      APInt &KnownOne, const SelectionDAG &DAG,
                                      unsigned Depth = 0) const override;
 
   MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
 
-  /// allowsMisalignedMemoryAccesses - Returns true if the target allows
-  /// unaligned memory accesses of the specified type.
+  /// Returns true if the target allows unaligned memory accesses of the
+  /// specified type.
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
                                       unsigned Align = 1,
-                                      bool *Fast = nullptr) const override {
-    if (RequireStrictAlign)
-      return false;
-    // FIXME: True for Cyclone, but not necessary others.
-    if (Fast)
-      *Fast = true;
-    return true;
-  }
+                                      bool *Fast = nullptr) const override;
 
-  /// LowerOperation - Provide custom lowering hooks for some operations.
+  /// Provide custom lowering hooks for some operations.
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
   const char *getTargetNodeName(unsigned Opcode) const override;
 
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
-  /// getFunctionAlignment - Return the Log2 alignment of this function.
-  unsigned getFunctionAlignment(const Function *F) const;
-
   /// Returns true if a cast between SrcAS and DestAS is a noop.
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
     // Addrspacecasts are always noops.
     return true;
   }
 
-  /// createFastISel - This method returns a target specific FastISel object,
-  /// or null if the target does not support "fast" ISel.
+  /// This method returns a target specific FastISel object, or null if the
+  /// target does not support "fast" ISel.
   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                            const TargetLibraryInfo *libInfo) const override;
 
@@ -273,11 +262,11 @@ public:
 
   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 
-  /// isShuffleMaskLegal - Return true if the given shuffle mask can be
-  /// codegen'd directly, or if it should be stack expanded.
+  /// Return true if the given shuffle mask can be codegen'd directly, or if it
+  /// should be stack expanded.
   bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
 
-  /// getSetCCResultType - Return the ISD::SETCC ValueType
+  /// Return the ISD::SETCC ValueType.
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                          EVT VT) const override;
 
@@ -322,8 +311,8 @@ public:
                           bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
                           MachineFunction &MF) const override;
 
-  /// isLegalAddressingMode - Return true if the addressing mode represented
-  /// by AM is legal for this target, for a load/store of the specified type.
+  /// Return true if the addressing mode represented by AM is legal for this
+  /// target, for a load/store of the specified type.
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
 
@@ -335,10 +324,9 @@ public:
   int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                            unsigned AS) const override;
 
-  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
-  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
-  /// expanded to FMAs when this method returns true, otherwise fmuladd is
-  /// expanded to fmul + fadd.
+  /// Return true if an FMA operation is faster than a pair of fmul and fadd
+  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
+  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
@@ -351,25 +339,65 @@ public:
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                          Type *Ty) const override;
 
-  bool hasLoadLinkedStoreConditional() const override;
   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                         AtomicOrdering Ord) const override;
   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                               Value *Addr, AtomicOrdering Ord) const override;
 
-  bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+  void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
+  TargetLoweringBase::AtomicExpansionKind
+  shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
   bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-  TargetLoweringBase::AtomicRMWExpansionKind
+  TargetLoweringBase::AtomicExpansionKind
   shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
+  bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+
   bool useLoadStackGuardNode() const override;
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(EVT VT) const override;
 
+  /// If the target has a standard location for the unsafe stack pointer,
+  /// returns the address of that location. Otherwise, returns nullptr.
+  Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
+  /// If a physical register, this returns the register that receives the
+  /// exception address on entry to an EH pad.
+  unsigned
+  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+    // FIXME: This is a guess. Has this been defined yet?
+    return AArch64::X0;
+  }
+
+  /// If a physical register, this returns the register that receives the
+  /// exception typeid on entry to a landing pad.
+  unsigned
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+    // FIXME: This is a guess. Has this been defined yet?
+    return AArch64::X1;
+  }
+
+  bool isCheapToSpeculateCttz() const override {
+    return true;
+  }
+
+  bool isCheapToSpeculateCtlz() const override {
+    return true;
+  }
+  bool supportSplitCSR(MachineFunction *MF) const override {
+    return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+           MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+  }
+  void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+  void insertCopiesSplitCSR(
+      MachineBasicBlock *Entry,
+      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
 private:
   bool isExtFreeImpl(const Instruction *Ext) const override;
 
-  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+  /// Keep a pointer to the AArch64Subtarget around so that we can
   /// make the right decision when generating code for different targets.
   const AArch64Subtarget *Subtarget;
 
@@ -392,6 +420,8 @@ private:
                           SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
                           bool isThisReturn, SDValue ThisVal) const;
 
+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+
   bool isEligibleForTailCallOptimization(
       SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
       bool isCalleeStructRet, bool isCallerStructRet,
@@ -470,7 +500,7 @@ private:
 
   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                         std::vector<SDNode *> *Created) const override;
-  bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+  unsigned combineRepeatedFPDivisors() const override;
 
   ConstraintType getConstraintType(StringRef Constraint) const override;
   unsigned getRegisterByName(const char* RegName, EVT VT,
@@ -516,6 +546,8 @@ private:
   bool functionArgumentNeedsConsecutiveRegisters(Type *Ty,
                                                  CallingConv::ID CallConv,
                                                  bool isVarArg) const override;
+
+  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
 };
 
 namespace AArch64 {
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 3f2e772a90c4..6ac2175e5035 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -248,6 +248,12 @@ def simm7s16 : Operand<i32> {
   let PrintMethod = "printImmScale<16>";
 }
 
+def am_indexed7s8   : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S8", []>;
+def am_indexed7s16  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S16", []>;
+def am_indexed7s32  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S32", []>;
+def am_indexed7s64  : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S64", []>;
+def am_indexed7s128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed7S128", []>;
+
 class AsmImmRange<int Low, int High> : AsmOperandClass {
   let Name = "Imm" # Low # "_" # High;
   let DiagnosticType = "InvalidImm" # Low # "_" # High;
@@ -346,9 +352,11 @@ class fixedpoint_i64<ValueType FloatVT>
   let ParserMatchClass = Imm1_64Operand;
 }
 
+def fixedpoint_f16_i32 : fixedpoint_i32<f16>;
 def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
 def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
 
+def fixedpoint_f16_i64 : fixedpoint_i64<f16>;
 def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
 def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
 
@@ -402,6 +410,7 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
   let ParserMatchClass = Imm1_32Operand;
 }
 
+def Imm0_1Operand : AsmImmRange<0, 1>;
 def Imm0_7Operand : AsmImmRange<0, 7>;
 def Imm0_15Operand : AsmImmRange<0, 15>;
 def Imm0_31Operand : AsmImmRange<0, 31>;
@@ -525,6 +534,20 @@ def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
   let ParserMatchClass = Imm0_31Operand;
 }
 
+// True if the 32-bit immediate is in the range [0,31]
+def imm32_0_31 : Operand<i32>, ImmLeaf<i32, [{
+  return ((uint64_t)Imm) < 32;
+}]> {
+  let ParserMatchClass = Imm0_31Operand;
+}
+
+// imm0_1 predicate - True if the immediate is in the range [0,1]
+def imm0_1 : Operand<i64>, ImmLeaf<i64, [{
+  return ((uint64_t)Imm) < 2;
+}]> {
+  let ParserMatchClass = Imm0_1Operand;
+}
+
 // imm0_15 predicate - True if the immediate is in the range [0,15]
 def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
   return ((uint64_t)Imm) < 16;
@@ -542,7 +565,9 @@ def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
 // imm32_0_15 predicate - True if the 32-bit immediate is in the range [0,15]
 def imm32_0_15 : Operand<i32>, ImmLeaf<i32, [{
   return ((uint32_t)Imm) < 16;
-}]>;
+}]> {
+  let ParserMatchClass = Imm0_15Operand;
+}
 
 // An arithmetic shifter operand:
 //  {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
@@ -690,6 +715,17 @@ class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
 }
 
 // Floating-point immediate.
+def fpimm16 : Operand<f16>,
+              PatLeaf<(f16 fpimm), [{
+      return AArch64_AM::getFP16Imm(N->getValueAPF()) != -1;
+    }], SDNodeXForm<fpimm, [{
+      APFloat InVal = N->getValueAPF();
+      uint32_t enc = AArch64_AM::getFP16Imm(InVal);
+      return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32);
+    }]>> {
+  let ParserMatchClass = FPImmOperand;
+  let PrintMethod = "printFPImmOperand";
+}
 def fpimm32 : Operand<f32>,
               PatLeaf<(f32 fpimm), [{
       return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
@@ -822,7 +858,7 @@ class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
 // model patterns with sufficiently fine granularity
 let mayStore = 1, mayLoad = 1, hasSideEffects = 1 in
   class HintI<string mnemonic>
-      : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "",
+      : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#"\t$imm", "",
                       [(int_aarch64_hint imm0_127:$imm)]>,
         Sched<[WriteHint]> {
     bits <7> imm;
@@ -875,6 +911,25 @@ def msr_sysreg_op : Operand<i32> {
   let PrintMethod = "printMSRSystemRegister";
 }
 
+def PSBHintOperand : AsmOperandClass {
+  let Name = "PSBHint";
+  let ParserMethod = "tryParsePSBHint";
+}
+def psbhint_op : Operand<i32> {
+  let ParserMatchClass = PSBHintOperand;
+  let PrintMethod = "printPSBHintOp";
+  let MCOperandPredicate = [{
+    // Check, if operand is valid, to fix exhaustive aliasing in disassembly.
+    // "psb" is an alias to "hint" only for certain values of CRm:Op2 fields.
+    if (!MCOp.isImm())
+      return false;
+    bool ValidNamed;
+    (void)AArch64PSBHint::PSBHintMapper().toString(MCOp.getImm(),
+      STI.getFeatureBits(), ValidNamed);
+    return ValidNamed;
+  }];
+}
+
 class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
                        "mrs", "\t$Rt, $systemreg"> {
   bits<16> systemreg;
@@ -890,19 +945,19 @@ class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
   let Inst{20-5} = systemreg;
 }
 
-def SystemPStateFieldOperand : AsmOperandClass {
-  let Name = "SystemPStateField";
+def SystemPStateFieldWithImm0_15Operand : AsmOperandClass {
+  let Name = "SystemPStateFieldWithImm0_15";
   let ParserMethod = "tryParseSysReg";
 }
-def pstatefield_op : Operand<i32> {
-  let ParserMatchClass = SystemPStateFieldOperand;
+def pstatefield4_op : Operand<i32> {
+  let ParserMatchClass = SystemPStateFieldWithImm0_15Operand;
   let PrintMethod = "printSystemPStateField";
 }
 
 let Defs = [NZCV] in
-class MSRpstateI
-  : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm),
-                  "msr", "\t$pstate_field, $imm">,
+class MSRpstateImm0_15
+  : SimpleSystemI<0, (ins pstatefield4_op:$pstatefield, imm0_15:$imm),
+                  "msr", "\t$pstatefield, $imm">,
     Sched<[WriteSys]> {
   bits<6> pstatefield;
   bits<4> imm;
@@ -913,6 +968,37 @@ class MSRpstateI
   let Inst{7-5} = pstatefield{2-0};
 
   let DecoderMethod = "DecodeSystemPStateInstruction";
+  // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+  // Fail the decoder should attempt to decode the instruction as MSRI.
+  let hasCompleteDecoder = 0;
+}
+
+def SystemPStateFieldWithImm0_1Operand : AsmOperandClass {
+  let Name = "SystemPStateFieldWithImm0_1";
+  let ParserMethod = "tryParseSysReg";
+}
+def pstatefield1_op : Operand<i32> {
+  let ParserMatchClass = SystemPStateFieldWithImm0_1Operand;
+  let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateImm0_1
+  : SimpleSystemI<0, (ins pstatefield1_op:$pstatefield, imm0_1:$imm),
+                  "msr", "\t$pstatefield, $imm">,
+    Sched<[WriteSys]> {
+  bits<6> pstatefield;
+  bit imm;
+  let Inst{20-19} = 0b00;
+  let Inst{18-16} = pstatefield{5-3};
+  let Inst{15-9} = 0b0100000;
+  let Inst{8} = imm;
+  let Inst{7-5} = pstatefield{2-0};
+
+  let DecoderMethod = "DecodeSystemPStateInstruction";
+  // MSRpstateI aliases with MSRI. When the MSRpstateI decoder method returns
+  // Fail the decoder should attempt to decode the instruction as MSRI.
+  let hasCompleteDecoder = 0;
 }
 
 // SYS and SYSL generic system instructions.
@@ -1341,7 +1427,7 @@ multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
 }
 
 class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst regtype:$dst, regtype:$src1, regtype:$src2), 0>;
 
 class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
@@ -1407,13 +1493,13 @@ class MulHi<bits<3> opc, string asm, SDNode OpNode>
 }
 
 class MulAccumWAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
 class MulAccumXAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
 class WideMulAccumAlias<string asm, Instruction inst>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
 
 class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
@@ -1643,7 +1729,7 @@ class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
 class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
                      RegisterClass src1Regtype, RegisterClass src2Regtype,
                      int shiftExt>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
                       shiftExt)>;
 
@@ -1701,10 +1787,10 @@ multiclass AddSub<bit isSub, string mnemonic, string alias,
   }
 
   // add Rd, Rb, -imm -> sub Rd, Rn, imm
-  def : InstAlias<alias#" $Rd, $Rn, $imm",
+  def : InstAlias<alias#"\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32sp:$Rn,
                       addsub_shifted_imm32_neg:$imm), 0>;
-  def : InstAlias<alias#" $Rd, $Rn, $imm",
+  def : InstAlias<alias#"\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64sp:$Rn,
                        addsub_shifted_imm64_neg:$imm), 0>;
 
@@ -1776,43 +1862,43 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp,
   } // Defs = [NZCV]
 
   // Support negative immediates, e.g. adds Rd, Rn, -imm -> subs Rd, Rn, imm
-  def : InstAlias<alias#" $Rd, $Rn, $imm",
+  def : InstAlias<alias#"\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32sp:$Rn,
                       addsub_shifted_imm32_neg:$imm), 0>;
-  def : InstAlias<alias#" $Rd, $Rn, $imm",
+  def : InstAlias<alias#"\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64sp:$Rn,
                        addsub_shifted_imm64_neg:$imm), 0>;
 
   // Compare aliases
-  def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+  def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
                   WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
-  def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+  def : InstAlias<cmp#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
                   XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>;
-  def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
+  def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
                   WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
-  def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
+  def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
                   XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
-  def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
+  def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
                   XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>;
-  def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
+  def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
                   WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>;
-  def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
+  def : InstAlias<cmp#"\t$src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
                   XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
 
   // Support negative immediates, e.g. cmp Rn, -imm -> cmn Rn, imm
-  def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+  def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Wri")
                   WZR, GPR32sp:$src, addsub_shifted_imm32_neg:$imm), 0>;
-  def : InstAlias<cmpAlias#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+  def : InstAlias<cmpAlias#"\t$src, $imm", (!cast<Instruction>(NAME#"Xri")
                   XZR, GPR64sp:$src, addsub_shifted_imm64_neg:$imm), 0>;
 
   // Compare shorthands
-  def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
+  def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrs")
                   WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
-  def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
+  def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrs")
                   XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
-  def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx")
+  def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Wrx")
                   WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>;
-  def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
+  def : InstAlias<cmp#"\t$src1, $src2", (!cast<Instruction>(NAME#"Xrx64")
                   XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>;
 
   // Register/register aliases with no shift when SP is not used.
@@ -1998,7 +2084,7 @@ class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
 
 // Aliases for register+register logical instructions.
 class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
-    : InstAlias<asm#" $dst, $src1, $src2",
+    : InstAlias<asm#"\t$dst, $src1, $src2",
                 (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
 
 multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
@@ -2017,10 +2103,10 @@ multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode,
     let Inst{31} = 1;
   }
 
-  def : InstAlias<Alias # " $Rd, $Rn, $imm",
+  def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn,
                       logical_imm32_not:$imm), 0>;
-  def : InstAlias<Alias # " $Rd, $Rn, $imm",
+  def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn,
                        logical_imm64_not:$imm), 0>;
 }
@@ -2039,10 +2125,10 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode,
   }
   } // end Defs = [NZCV]
 
-  def : InstAlias<Alias # " $Rd, $Rn, $imm",
+  def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn,
                       logical_imm32_not:$imm), 0>;
-  def : InstAlias<Alias # " $Rd, $Rn, $imm",
+  def : InstAlias<Alias # "\t$Rd, $Rn, $imm",
                   (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn,
                        logical_imm64_not:$imm), 0>;
 }
@@ -2105,9 +2191,12 @@ multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
 //---
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
+class BaseCondComparisonImm<bit op, RegisterClass regtype, ImmLeaf immtype,
+                            string mnemonic, SDNode OpNode>
+    : I<(outs), (ins regtype:$Rn, immtype:$imm, imm32_0_15:$nzcv, ccode:$cond),
+         mnemonic, "\t$Rn, $imm, $nzcv, $cond", "",
+         [(set NZCV, (OpNode regtype:$Rn, immtype:$imm, (i32 imm:$nzcv),
+                             (i32 imm:$cond), NZCV))]>,
       Sched<[WriteI, ReadI]> {
   let Uses = [NZCV];
   let Defs = [NZCV];
@@ -2127,19 +2216,13 @@ class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
   let Inst{3-0}   = nzcv;
 }
 
-multiclass CondSetFlagsImm<bit op, string asm> {
-  def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
-    let Inst{31} = 0;
-  }
-  def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
-    let Inst{31} = 1;
-  }
-}
-
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseCondComparisonReg<bit op, RegisterClass regtype, string mnemonic,
+                            SDNode OpNode>
+    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+         mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "",
+         [(set NZCV, (OpNode regtype:$Rn, regtype:$Rm, (i32 imm:$nzcv),
+                             (i32 imm:$cond), NZCV))]>,
       Sched<[WriteI, ReadI, ReadI]> {
   let Uses = [NZCV];
   let Defs = [NZCV];
@@ -2159,11 +2242,19 @@ class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
   let Inst{3-0}   = nzcv;
 }
 
-multiclass CondSetFlagsReg<bit op, string asm> {
-  def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
+multiclass CondComparison<bit op, string mnemonic, SDNode OpNode> {
+  // immediate operand variants
+  def Wi : BaseCondComparisonImm<op, GPR32, imm32_0_31, mnemonic, OpNode> {
     let Inst{31} = 0;
   }
-  def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
+  def Xi : BaseCondComparisonImm<op, GPR64, imm0_31, mnemonic, OpNode> {
+    let Inst{31} = 1;
+  }
+  // register operand variants
+  def Wr : BaseCondComparisonReg<op, GPR32, mnemonic, OpNode> {
+    let Inst{31} = 0;
+  }
+  def Xr : BaseCondComparisonReg<op, GPR64, mnemonic, OpNode> {
     let Inst{31} = 1;
   }
 }
@@ -2328,7 +2419,7 @@ multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
                            asm, pattern>,
            Sched<[WriteLD]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -2340,7 +2431,7 @@ multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
                            asm, pattern>,
            Sched<[WriteST]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -2508,7 +2599,7 @@ class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
 }
 
 class ROInstAlias<string asm, RegisterClass regtype, Instruction INST>
-  : InstAlias<asm # " $Rt, [$Rn, $Rm]",
+  : InstAlias<asm # "\t$Rt, [$Rn, $Rm]",
               (INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
 
 multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
@@ -2934,7 +3025,7 @@ multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
                                (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>,
           Sched<[WriteLD]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -2946,7 +3037,7 @@ multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
                                asm, pattern>,
           Sched<[WriteST]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -2958,7 +3049,7 @@ multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm,
                                asm, pat>,
           Sched<[WriteLD]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -2993,7 +3084,7 @@ multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc,
                                     (ins GPR64sp:$Rn, simm9:$offset), asm>,
           Sched<[WriteLD]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -3005,7 +3096,7 @@ multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
                                  asm>,
           Sched<[WriteST]>;
 
-  def : InstAlias<asm # " $Rt, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
 }
 
@@ -3136,7 +3227,7 @@ multiclass LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
                                   (ins GPR64sp:$Rn, indextype:$offset), asm>,
           Sched<[WriteLD, WriteLDHi]>;
 
-  def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
                                                   GPR64sp:$Rn, 0)>;
 }
@@ -3151,7 +3242,7 @@ multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
                                   asm>,
           Sched<[WriteSTP]>;
 
-  def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+  def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
                   (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
                                                   GPR64sp:$Rn, 0)>;
 }
@@ -3230,8 +3321,8 @@ class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
 let mayStore = 1, mayLoad = 0 in
 class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
                        Operand idxtype, string asm>
-    : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
-                             (ins GPR64sp:$wback, regtype:$Rt, regtype:$Rt2,
+    : BaseLoadStorePairPostIdx<opc, V, 0, (outs GPR64sp:$wback),
+                             (ins regtype:$Rt, regtype:$Rt2,
                                   GPR64sp:$Rn, idxtype:$offset),
                              asm>,
       Sched<[WriteAdr, WriteSTP]>;
@@ -3477,6 +3568,20 @@ class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
 
 multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
            SDPatternOperator OpN> {
+  // Unscaled half-precision to 32-bit
+  def UWHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR32, asm,
+                                     [(set GPR32:$Rd, (OpN FPR16:$Rn))]> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let Predicates = [HasFullFP16];
+  }
+
+  // Unscaled half-precision to 64-bit
+  def UXHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, GPR64, asm,
+                                     [(set GPR64:$Rd, (OpN FPR16:$Rn))]> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Predicates = [HasFullFP16];
+  }
+
   // Unscaled single-precision to 32-bit
   def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
                                      [(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
@@ -3504,6 +3609,25 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
 
 multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
                              SDPatternOperator OpN> {
+  // Scaled half-precision to 32-bit
+  def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32,
+                              fixedpoint_f16_i32, asm,
+              [(set GPR32:$Rd, (OpN (fmul FPR16:$Rn,
+                                          fixedpoint_f16_i32:$scale)))]> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let scale{5} = 1;
+    let Predicates = [HasFullFP16];
+  }
+
+  // Scaled half-precision to 64-bit
+  def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64,
+                              fixedpoint_f16_i64, asm,
+              [(set GPR64:$Rd, (OpN (fmul FPR16:$Rn,
+                                          fixedpoint_f16_i64:$scale)))]> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Predicates = [HasFullFP16];
+  }
+
   // Scaled single-precision to 32-bit
   def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
                               fixedpoint_f32_i32, asm,
@@ -3553,7 +3677,7 @@ class BaseIntegerToFP<bit isUnsigned,
   bits<5> Rd;
   bits<5> Rn;
   bits<6> scale;
-  let Inst{30-23} = 0b00111100;
+  let Inst{30-24} = 0b0011110;
   let Inst{21-17} = 0b00001;
   let Inst{16}    = isUnsigned;
   let Inst{15-10} = scale;
@@ -3570,7 +3694,7 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
   bits<5> Rd;
   bits<5> Rn;
   bits<6> scale;
-  let Inst{30-23} = 0b00111100;
+  let Inst{30-24} = 0b0011110;
   let Inst{21-17} = 0b10001;
   let Inst{16}    = isUnsigned;
   let Inst{15-10} = 0b000000;
@@ -3580,33 +3704,55 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
 
 multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
   // Unscaled
+  def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
+  }
+
   def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
   def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
+  }
+
+  def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
   }
 
   def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
   def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
   }
 
   // Scaled
+  def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_f16_i32, asm,
+                             [(set FPR16:$Rd,
+                                   (fdiv (node GPR32:$Rn),
+                                         fixedpoint_f16_i32:$scale))]> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let scale{5} = 1;
+    let Predicates = [HasFullFP16];
+  }
+
   def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
                              [(set FPR32:$Rd,
                                    (fdiv (node GPR32:$Rn),
                                          fixedpoint_f32_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
     let scale{5} = 1;
   }
 
@@ -3615,16 +3761,25 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
                                    (fdiv (node GPR32:$Rn),
                                          fixedpoint_f64_i32:$scale))]> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
     let scale{5} = 1;
   }
 
+  def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_f16_i64, asm,
+                             [(set FPR16:$Rd,
+                                   (fdiv (node GPR64:$Rn),
+                                         fixedpoint_f16_i64:$scale))]> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
+  }
+
   def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
                              [(set FPR32:$Rd,
                                    (fdiv (node GPR64:$Rn),
                                          fixedpoint_f32_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
   def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
@@ -3632,7 +3787,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
                                    (fdiv (node GPR64:$Rn),
                                          fixedpoint_f64_i64:$scale))]> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
   }
 }
 
@@ -3654,7 +3809,7 @@ class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
       Sched<[WriteFCopy]> {
   bits<5> Rd;
   bits<5> Rn;
-  let Inst{30-23} = 0b00111100;
+  let Inst{30-24} = 0b0011110;
   let Inst{21}    = 1;
   let Inst{20-19} = rmode;
   let Inst{18-16} = opcode;
@@ -3704,26 +3859,49 @@ class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
 }
 
 
-
 multiclass UnscaledConversion<string asm> {
+  def WHr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR16, asm> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
+  }
+
+  def XHr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR16, asm> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
+  }
+
   def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
   def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
+  }
+
+  def HWr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR32, asm> {
+    let Inst{31} = 0; // 32-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
+  }
+
+  def HXr : BaseUnscaledConversion<0b00, 0b110, FPR16, GPR64, asm> {
+    let Inst{31} = 1; // 64-bit GPR flag
+    let Inst{23-22} = 0b11; // 16-bit FPR flag
+    let Predicates = [HasFullFP16];
   }
 
   def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
     let Inst{31} = 0; // 32-bit GPR flag
-    let Inst{22} = 0; // 32-bit FPR flag
+    let Inst{23-22} = 0b00; // 32-bit FPR flag
   }
 
   def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
     let Inst{31} = 1; // 64-bit GPR flag
-    let Inst{22} = 1; // 64-bit FPR flag
+    let Inst{23-22} = 0b01; // 64-bit FPR flag
   }
 
   def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
@@ -3796,7 +3974,7 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
       Sched<[WriteF]> {
   bits<5> Rd;
   bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21-19} = 0b100;
   let Inst{18-15} = opcode;
   let Inst{14-10} = 0b10000;
@@ -3806,12 +3984,17 @@ class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
 
 multiclass SingleOperandFPData<bits<4> opcode, string asm,
                                SDPatternOperator node = null_frag> {
+  def Hr : BaseSingleOperandFPData<opcode, FPR16, f16, asm, node> {
+    let Inst{23-22} = 0b11; // 16-bit size flag
+    let Predicates = [HasFullFP16];
+  }
+
   def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
-    let Inst{22} = 0; // 32-bit size flag
+    let Inst{23-22} = 0b00; // 32-bit size flag
   }
 
   def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
-    let Inst{22} = 1; // 64-bit size flag
+    let Inst{23-22} = 0b01; // 64-bit size flag
   }
 }
 
@@ -3828,7 +4011,7 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
   bits<5> Rd;
   bits<5> Rn;
   bits<5> Rm;
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
   let Inst{20-16} = Rm;
   let Inst{15-12} = opcode;
@@ -3839,28 +4022,41 @@ class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
 
 multiclass TwoOperandFPData<bits<4> opcode, string asm,
                             SDPatternOperator node = null_frag> {
+  def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+                         [(set (f16 FPR16:$Rd),
+                               (node (f16 FPR16:$Rn), (f16 FPR16:$Rm)))]> {
+    let Inst{23-22} = 0b11; // 16-bit size flag
+    let Predicates = [HasFullFP16];
+  }
+
   def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
                          [(set (f32 FPR32:$Rd),
                                (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
-    let Inst{22} = 0; // 32-bit size flag
+    let Inst{23-22} = 0b00; // 32-bit size flag
   }
 
   def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
                          [(set (f64 FPR64:$Rd),
                                (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
-    let Inst{22} = 1; // 64-bit size flag
+    let Inst{23-22} = 0b01; // 64-bit size flag
   }
 }
 
 multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+  def Hrr : BaseTwoOperandFPData<opcode, FPR16, asm,
+                  [(set FPR16:$Rd, (fneg (node FPR16:$Rn, (f16 FPR16:$Rm))))]> {
+    let Inst{23-22} = 0b11; // 16-bit size flag
+    let Predicates = [HasFullFP16];
+  }
+
   def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
                   [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
-    let Inst{22} = 0; // 32-bit size flag
+    let Inst{23-22} = 0b00; // 32-bit size flag
   }
 
   def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
                   [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
-    let Inst{22} = 1; // 64-bit size flag
+    let Inst{23-22} = 0b01; // 64-bit size flag
   }
 }
 
@@ -3878,7 +4074,7 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
   bits<5> Rn;
   bits<5> Rm;
   bits<5> Ra;
-  let Inst{31-23} = 0b000111110;
+  let Inst{31-24} = 0b00011111;
   let Inst{21}    = isNegated;
   let Inst{20-16} = Rm;
   let Inst{15}    = isSub;
@@ -3889,16 +4085,23 @@ class BaseThreeOperandFPData<bit isNegated, bit isSub,
 
 multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
                               SDPatternOperator node> {
+  def Hrrr : BaseThreeOperandFPData<isNegated, isSub, FPR16, asm,
+            [(set FPR16:$Rd,
+                  (node (f16 FPR16:$Rn), (f16 FPR16:$Rm), (f16 FPR16:$Ra)))]> {
+    let Inst{23-22} = 0b11; // 16-bit size flag
+    let Predicates = [HasFullFP16];
+  }
+
   def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
             [(set FPR32:$Rd,
                   (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
-    let Inst{22} = 0; // 32-bit size flag
+    let Inst{23-22} = 0b00; // 32-bit size flag
   }
 
   def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
             [(set FPR64:$Rd,
                   (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
-    let Inst{22} = 1; // 64-bit size flag
+    let Inst{23-22} = 0b01; // 64-bit size flag
   }
 }
 
@@ -3913,7 +4116,7 @@ class BaseOneOperandFPComparison<bit signalAllNans,
     : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
       Sched<[WriteFCmp]> {
   bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
 
   let Inst{15-10} = 0b001000;
@@ -3932,7 +4135,7 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
       Sched<[WriteFCmp]> {
   bits<5> Rm;
   bits<5> Rn;
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
   let Inst{20-16} = Rm;
   let Inst{15-10} = 0b001000;
@@ -3944,24 +4147,36 @@ class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
 multiclass FPComparison<bit signalAllNans, string asm,
                         SDPatternOperator OpNode = null_frag> {
   let Defs = [NZCV] in {
+  def Hrr : BaseTwoOperandFPComparison<signalAllNans, FPR16, asm,
+      [(OpNode FPR16:$Rn, (f16 FPR16:$Rm)), (implicit NZCV)]> {
+    let Inst{23-22} = 0b11;
+    let Predicates = [HasFullFP16];
+  }
+
+  def Hri : BaseOneOperandFPComparison<signalAllNans, FPR16, asm,
+      [(OpNode (f16 FPR16:$Rn), fpimm0), (implicit NZCV)]> {
+    let Inst{23-22} = 0b11;
+    let Predicates = [HasFullFP16];
+  }
+
   def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
       [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> {
-    let Inst{22} = 0;
+    let Inst{23-22} = 0b00;
   }
 
   def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
       [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> {
-    let Inst{22} = 0;
+    let Inst{23-22} = 0b00;
   }
 
   def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
       [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> {
-    let Inst{22} = 1;
+    let Inst{23-22} = 0b01;
   }
 
   def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
       [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> {
-    let Inst{22} = 1;
+    let Inst{23-22} = 0b01;
   }
   } // Defs = [NZCV]
 }
@@ -3971,17 +4186,20 @@ multiclass FPComparison<bit signalAllNans, string asm,
 //---
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
-                              RegisterClass regtype, string asm>
-    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
-         asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+class BaseFPCondComparison<bit signalAllNans, RegisterClass regtype,
+                           string mnemonic, list<dag> pat>
+    : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm32_0_15:$nzcv, ccode:$cond),
+         mnemonic, "\t$Rn, $Rm, $nzcv, $cond", "", pat>,
       Sched<[WriteFCmp]> {
+  let Uses = [NZCV];
+  let Defs = [NZCV];
+
   bits<5> Rn;
   bits<5> Rm;
   bits<4> nzcv;
   bits<4> cond;
 
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
   let Inst{20-16} = Rm;
   let Inst{15-12} = cond;
@@ -3991,16 +4209,24 @@ class BaseFPCondComparison<bit signalAllNans,
   let Inst{3-0}   = nzcv;
 }
 
-multiclass FPCondComparison<bit signalAllNans, string asm> {
-  let Defs = [NZCV], Uses = [NZCV] in {
-  def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
-    let Inst{22} = 0;
+multiclass FPCondComparison<bit signalAllNans, string mnemonic,
+                            SDPatternOperator OpNode = null_frag> {
+  def Hrr : BaseFPCondComparison<signalAllNans, FPR16, mnemonic, []> {
+    let Inst{23-22} = 0b11;
+    let Predicates = [HasFullFP16];
   }
 
-  def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
-    let Inst{22} = 1;
+  def Srr : BaseFPCondComparison<signalAllNans, FPR32, mnemonic,
+      [(set NZCV, (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm), (i32 imm:$nzcv),
+                          (i32 imm:$cond), NZCV))]> {
+    let Inst{23-22} = 0b00;
+  }
+
+  def Drr : BaseFPCondComparison<signalAllNans, FPR64, mnemonic,
+      [(set NZCV, (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm), (i32 imm:$nzcv),
+                          (i32 imm:$cond), NZCV))]> {
+    let Inst{23-22} = 0b01;
   }
-  } // Defs = [NZCV], Uses = [NZCV]
 }
 
 //---
@@ -4019,7 +4245,7 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
   bits<5> Rm;
   bits<4> cond;
 
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
   let Inst{20-16} = Rm;
   let Inst{15-12} = cond;
@@ -4030,12 +4256,17 @@ class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
 
 multiclass FPCondSelect<string asm> {
   let Uses = [NZCV] in {
+  def Hrrr : BaseFPCondSelect<FPR16, f16, asm> {
+    let Inst{23-22} = 0b11;
+    let Predicates = [HasFullFP16];
+  }
+
   def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
-    let Inst{22} = 0;
+    let Inst{23-22} = 0b00;
   }
 
   def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
-    let Inst{22} = 1;
+    let Inst{23-22} = 0b01;
   }
   } // Uses = [NZCV]
 }
@@ -4050,7 +4281,7 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
     Sched<[WriteFImm]> {
   bits<5> Rd;
   bits<8> imm;
-  let Inst{31-23} = 0b000111100;
+  let Inst{31-24} = 0b00011110;
   let Inst{21}    = 1;
   let Inst{20-13} = imm;
   let Inst{12-5}  = 0b10000000;
@@ -4058,12 +4289,17 @@ class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
 }
 
 multiclass FPMoveImmediate<string asm> {
+  def Hi : BaseFPMoveImmediate<FPR16, fpimm16, asm> {
+    let Inst{23-22} = 0b11;
+    let Predicates = [HasFullFP16];
+  }
+
   def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
-    let Inst{22} = 0;
+    let Inst{23-22} = 0b00;
   }
 
   def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
-    let Inst{22} = 1;
+    let Inst{23-22} = 0b01;
   }
 }
 } // end of 'let Predicates = [HasFPARMv8]'
@@ -4079,7 +4315,7 @@ let Predicates = [HasNEON] in {
 //----------------------------------------------------------------------------
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<3> size, bits<5> opcode,
                         RegisterOperand regtype, string asm, string kind,
                         list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -4093,8 +4329,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{30}    = Q;
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -4103,7 +4338,7 @@ class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 }
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<3> size, bits<5> opcode,
                         RegisterOperand regtype, string asm, string kind,
                         list<dag> pattern>
   : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
@@ -4117,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{30}    = Q;
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -4129,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
 // All operand sizes distinguished in the encoding.
 multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
                                SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
          [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
          [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
          [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
          [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
          [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
          [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-  def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128,
                                       asm, ".2d",
          [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
 }
@@ -4155,49 +4389,49 @@ multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
 // As above, but D sized elements unsupported.
 multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
         [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
         [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
         [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
         [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
         [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
         [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
 }
 
 multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64,
                                       asm, ".8b",
       [(set (v8i8 V64:$dst),
             (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128,
                                       asm, ".16b",
       [(set (v16i8 V128:$dst),
             (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-  def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64,
                                       asm, ".4h",
       [(set (v4i16 V64:$dst),
             (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128,
                                       asm, ".8h",
       [(set (v8i16 V128:$dst),
             (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64,
                                       asm, ".2s",
       [(set (v2i32 V64:$dst),
             (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128,
                                       asm, ".4s",
       [(set (v4i32 V128:$dst),
             (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
@@ -4206,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
 // As above, but only B sized elements supported.
 multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64,
                                       asm, ".8b",
     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128,
                                       asm, ".16b",
     [(set (v16i8 V128:$Rd),
           (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
 }
 
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
+// As above, but only floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<3> opc,
                                  string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+        [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+        [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
         [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
         [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
         [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
 }
 
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<3> opc,
                                     string asm,
                                     SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+        [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+        [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
         [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
         [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
         [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
 }
 
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<3> opc,
                                  string asm, SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64,
+                                      asm, ".4h",
+     [(set (v4f16 V64:$dst),
+           (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>;
+  def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128,
+                                      asm, ".8h",
+     [(set (v8f16 V128:$dst),
+           (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64,
                                       asm, ".2s",
      [(set (v2f32 V64:$dst),
            (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
-  def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128,
                                       asm, ".4s",
      [(set (v4f32 V128:$dst),
            (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
-  def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128,
                                       asm, ".2d",
      [(set (v2f64 V128:$dst),
            (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
@@ -4262,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
 // As above, but D and B sized elements unsupported.
 multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64,
                                       asm, ".4h",
         [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
-  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128,
                                       asm, ".8h",
         [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
-  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64,
                                       asm, ".2s",
         [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
-  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128,
                                       asm, ".4s",
         [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
 }
@@ -4279,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
 // Logical three vector ops share opcode bits, and only use B sized elements.
 multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
                                   SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
+  def v8i8  : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64,
                                      asm, ".8b",
                          [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
-  def v16i8  : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
+  def v16i8  : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128,
                                      asm, ".16b",
                          [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
 
@@ -4303,11 +4563,11 @@ multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
 
 multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
                                   string asm, SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
+  def v8i8  : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64,
                                      asm, ".8b",
              [(set (v8i8 V64:$dst),
                    (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
-  def v16i8  : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
+  def v16i8  : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128,
                                      asm, ".16b",
              [(set (v16i8 V128:$dst),
                    (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
@@ -4347,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                        RegisterOperand regtype, string asm, string dstkind,
-                        string srckind, list<dag> pattern>
+                            bits<2> size2, RegisterOperand regtype, string asm,
+                            string dstkind, string srckind, list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "{\t$Rd" # dstkind # ", $Rn" # srckind #
       "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
@@ -4360,7 +4620,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4369,8 +4631,9 @@ class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
-                            RegisterOperand regtype, string asm, string dstkind,
-                            string srckind, list<dag> pattern>
+                                bits<2> size2, RegisterOperand regtype,
+                                string asm, string dstkind, string srckind,
+                                list<dag> pattern>
   : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
       "{\t$Rd" # dstkind # ", $Rn" # srckind #
       "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
@@ -4382,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4392,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
 // Supports B, H, and S element sizes.
 multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                       asm, ".8b", ".8b",
                           [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                       asm, ".16b", ".16b",
                           [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                       asm, ".4h", ".4h",
                           [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                       asm, ".8h", ".8h",
                           [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                       asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                       asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
@@ -4450,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS {
 // Supports all element sizes.
 multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8_v4i16  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                       asm, ".4h", ".8b",
                [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                       asm, ".8h", ".16b",
                [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                       asm, ".2s", ".4h",
                [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                       asm, ".4s", ".8h",
                [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                       asm, ".1d", ".2s",
                [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                       asm, ".2d", ".4s",
                [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
 
 multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
                                  SDPatternOperator OpNode> {
-  def v8i8_v4i16  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8_v4i16  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
                                           asm, ".4h", ".8b",
       [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
                                       (v8i8 V64:$Rn)))]>;
-  def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
                                           asm, ".8h", ".16b",
       [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
                                       (v16i8 V128:$Rn)))]>;
-  def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
                                           asm, ".2s", ".4h",
       [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
                                       (v4i16 V64:$Rn)))]>;
-  def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
                                           asm, ".4s", ".8h",
       [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
                                       (v8i16 V128:$Rn)))]>;
-  def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
                                           asm, ".1d", ".2s",
       [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
                                       (v2i32 V64:$Rn)))]>;
-  def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
                                           asm, ".2d", ".4s",
       [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
                                       (v4i32 V128:$Rn)))]>;
@@ -4501,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
 // Supports all element sizes, except 1xD.
 multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
                                   SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64,
                                     asm, ".8b", ".8b",
     [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128,
                                     asm, ".16b", ".16b",
     [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64,
                                     asm, ".4h", ".4h",
     [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128,
                                     asm, ".8h", ".8h",
     [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64,
                                     asm, ".2s", ".2s",
     [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128,
                                     asm, ".4s", ".4s",
     [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128,
                                     asm, ".2d", ".2d",
     [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
 }
 
 multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
     [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                 asm, ".4h", ".4h",
     [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                 asm, ".8h", ".8h",
     [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
-  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
     [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
     [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
+  def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
     [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
 }
@@ -4553,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
 // Supports only B element sizes.
 multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
                           SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, size, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
                     [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
                     [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
 
@@ -4565,16 +4830,16 @@ multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
 // Supports only B and H element sizes.
 multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
                                 SDPatternOperator OpNode> {
-  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+  def v8i8  : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64,
                                 asm, ".8b", ".8b",
                     [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+  def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128,
                                 asm, ".16b", ".16b",
                     [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
-  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+  def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64,
                                 asm, ".4h", ".4h",
                     [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
-  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+  def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128,
                                 asm, ".8h", ".8h",
                     [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
 }
@@ -4583,13 +4848,21 @@ multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
 // as an extra opcode bit.
 multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
 }
@@ -4597,10 +4870,10 @@ multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
 // Supports only S element size.
 multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
 }
@@ -4608,26 +4881,42 @@ multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
 
 multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
 }
 
 multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
                            SDPatternOperator OpNode> {
-  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64,
+                                asm, ".4h", ".4h",
+                          [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+  def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128,
+                                asm, ".8h", ".8h",
+                          [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64,
                                 asm, ".2s", ".2s",
                           [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
-  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+  def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128,
                                 asm, ".4s", ".4s",
                           [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+  def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128,
                                 asm, ".2d", ".2d",
                           [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
 }
@@ -4706,10 +4995,10 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 }
 
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
-                           RegisterOperand regtype,
-                           string asm, string kind, string zero,
-                           ValueType dty, ValueType sty, SDNode OpNode>
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,
+                           bits<5> opcode, RegisterOperand regtype, string asm,
+                           string kind, string zero, ValueType dty,
+                           ValueType sty, SDNode OpNode>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
       "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
@@ -4722,7 +5011,9 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b01110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -4732,54 +5023,74 @@ class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
 // Comparisons support all element sizes, except 1xD.
 multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
                             SDNode OpNode> {
-  def v8i8rz  : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
+  def v8i8rz  : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64,
                                      asm, ".8b", "0",
                                      v8i8, v8i8, OpNode>;
-  def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
+  def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128,
                                      asm, ".16b", "0",
                                      v16i8, v16i8, OpNode>;
-  def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
+  def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64,
                                      asm, ".4h", "0",
                                      v4i16, v4i16, OpNode>;
-  def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
+  def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128,
                                      asm, ".8h", "0",
                                      v8i16, v8i16, OpNode>;
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
+  def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64,
                                      asm, ".2s", "0",
                                      v2i32, v2i32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
+  def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128,
                                      asm, ".4s", "0",
                                      v4i32, v4i32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
+  def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128,
                                      asm, ".2d", "0",
                                      v2i64, v2i64, OpNode>;
 }
 
-// FP Comparisons support only S and D element sizes.
+// FP Comparisons support only S and D element sizes (and H for v8.2a).
 multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
                               string asm, SDNode OpNode> {
 
-  def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64,
+                                     asm, ".4h", "0.0",
+                                     v4i16, v4f16, OpNode>;
+  def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128,
+                                     asm, ".8h", "0.0",
+                                     v8i16, v8f16, OpNode>;
+  } // Predicates = [HasNEON, HasFullFP16]
+  def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64,
                                      asm, ".2s", "0.0",
                                      v2i32, v2f32, OpNode>;
-  def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
+  def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128,
                                      asm, ".4s", "0.0",
                                      v4i32, v4f32, OpNode>;
-  def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
+  def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128,
                                      asm, ".2d", "0.0",
                                      v2i64, v2f64, OpNode>;
 
-  def : InstAlias<asm # " $Vd.2s, $Vn.2s, #0",
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # "\t$Vd.4h, $Vn.4h, #0",
+                  (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+  def : InstAlias<asm # "\t$Vd.8h, $Vn.8h, #0",
+                  (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+  }
+  def : InstAlias<asm # "\t$Vd.2s, $Vn.2s, #0",
                   (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
-  def : InstAlias<asm # " $Vd.4s, $Vn.4s, #0",
+  def : InstAlias<asm # "\t$Vd.4s, $Vn.4s, #0",
                   (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
-  def : InstAlias<asm # " $Vd.2d, $Vn.2d, #0",
+  def : InstAlias<asm # "\t$Vd.2d, $Vn.2d, #0",
                   (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
-  def : InstAlias<asm # ".2s $Vd, $Vn, #0",
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # ".4h\t$Vd, $Vn, #0",
+                  (!cast<Instruction>(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>;
+  def : InstAlias<asm # ".8h\t$Vd, $Vn, #0",
+                  (!cast<Instruction>(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>;
+  }
+  def : InstAlias<asm # ".2s\t$Vd, $Vn, #0",
                   (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
-  def : InstAlias<asm # ".4s $Vd, $Vn, #0",
+  def : InstAlias<asm # ".4s\t$Vd, $Vn, #0",
                   (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
-  def : InstAlias<asm # ".2d $Vd, $Vn, #0",
+  def : InstAlias<asm # ".2d\t$Vd, $Vn, #0",
                   (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
 }
 
@@ -5325,7 +5636,7 @@ multiclass SIMDZipVector<bits<3>opc, string asm,
 //----------------------------------------------------------------------------
 
 let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDThreeScalar<bit U, bits<3> size, bits<5> opcode,
                         RegisterClass regtype, string asm,
                         list<dag> pattern>
   : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
@@ -5337,8 +5648,7 @@ class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{31-30} = 0b01;
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
-  let Inst{23-22} = size;
-  let Inst{21}    = 1;
+  let Inst{23-21} = size;
   let Inst{20-16} = Rm;
   let Inst{15-11} = opcode;
   let Inst{10}    = 1;
@@ -5369,17 +5679,17 @@ class BaseSIMDThreeScalarTied<bit U, bits<2> size, bit R, bits<5> opcode,
 
 multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
                             SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+  def v1i64  : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
 }
 
 multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
                                SDPatternOperator OpNode> {
-  def v1i64  : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+  def v1i64  : BaseSIMDThreeScalar<U, 0b111, opc, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
-  def v1i8   : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
+  def v1i32  : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm, []>;
+  def v1i16  : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
+  def v1i8   : BaseSIMDThreeScalar<U, 0b001, opc, FPR8 , asm, []>;
 
   def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
             (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
@@ -5389,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
 
 multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v1i32  : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
+  def v1i32  : BaseSIMDThreeScalar<U, 0b101, opc, FPR32, asm,
                              [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
-  def v1i16  : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+  def v1i16  : BaseSIMDThreeScalar<U, 0b011, opc, FPR16, asm, []>;
 }
 
 multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
@@ -5404,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied<bit U, bit R, bits<5> opc, string asm,
                                      asm, []>;
 }
 
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPThreeScalar<bit U, bit S, bits<3> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+    def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
       [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
       [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+    let Predicates = [HasNEON, HasFullFP16] in {
+    def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+      [(set FPR16:$Rd, (OpNode FPR16:$Rn, FPR16:$Rm))]>;
+    } // Predicates = [HasNEON, HasFullFP16]
   }
 
   def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
             (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
 }
 
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<3> opc, string asm,
                                 SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+    def #NAME#64 : BaseSIMDThreeScalar<U, {S,0b11}, {0b11,opc}, FPR64, asm,
       [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
-    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+    def #NAME#32 : BaseSIMDThreeScalar<U, {S,0b01}, {0b11,opc}, FPR32, asm,
       [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+    let Predicates = [HasNEON, HasFullFP16] in {
+    def #NAME#16 : BaseSIMDThreeScalar<U, {S,0b10}, {0b00,opc}, FPR16, asm,
+      []>;
+    } // Predicates = [HasNEON, HasFullFP16]
   }
 
   def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
@@ -5482,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
 //----------------------------------------------------------------------------
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                         RegisterClass regtype, RegisterClass regtype2,
                         string asm, list<dag> pat>
   : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
@@ -5494,7 +5812,9 @@ class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -5523,7 +5843,7 @@ class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
 
 
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<2> size2, bits<5> opcode,
                         RegisterClass regtype, string asm, string zero>
   : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
       "\t$Rd, $Rn, #" # zero, "", []>,
@@ -5534,7 +5854,9 @@ class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
   let Inst{29}    = U;
   let Inst{28-24} = 0b11110;
   let Inst{23-22} = size;
-  let Inst{21-17} = 0b10000;
+  let Inst{21} = 0b1;
+  let Inst{20-19} = size2;
+  let Inst{18-17} = 0b00;
   let Inst{16-12} = opcode;
   let Inst{11-10} = 0b10;
   let Inst{9-5}   = Rn;
@@ -5556,21 +5878,28 @@ class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
 
 multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm, "0">;
+  def v1i64rz  : BaseSIMDCmpTwoScalar<U, 0b11, 0b00, opc, FPR64, asm, "0">;
 
   def : Pat<(v1i64 (OpNode FPR64:$Rn)),
             (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
 }
 
-multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPCmpTwoScalar<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
-  def v1i64rz  : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm, "0.0">;
-  def v1i32rz  : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm, "0.0">;
+  def v1i64rz  : BaseSIMDCmpTwoScalar<U, {S,1}, 0b00, opc, FPR64, asm, "0.0">;
+  def v1i32rz  : BaseSIMDCmpTwoScalar<U, {S,0}, 0b00, opc, FPR32, asm, "0.0">;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16rz  : BaseSIMDCmpTwoScalar<U, {S,1}, 0b11, opc, FPR16, asm, "0.0">;
+  }
 
-  def : InstAlias<asm # " $Rd, $Rn, #0",
+  def : InstAlias<asm # "\t$Rd, $Rn, #0",
                   (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
-  def : InstAlias<asm # " $Rd, $Rn, #0",
+  def : InstAlias<asm # "\t$Rd, $Rn, #0",
                   (!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : InstAlias<asm # "\t$Rd, $Rn, #0",
+                  (!cast<Instruction>(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>;
+  }
 
   def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
             (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
@@ -5578,35 +5907,42 @@ multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
 
 multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
                           SDPatternOperator OpNode = null_frag> {
-  def v1i64       : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+  def v1i64       : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
     [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
 
   def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
             (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
 }
 
-multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
-  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
-  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
+multiclass SIMDFPTwoScalar<bit U, bit S, bits<5> opc, string asm> {
+  def v1i64       : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,[]>;
+  def v1i32       : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,[]>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1f16       : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,[]>;
+  }
 }
 
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
+multiclass SIMDFPTwoScalarCVT<bit U, bit S, bits<5> opc, string asm,
                               SDPatternOperator OpNode> {
-  def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
+  def v1i64 : BaseSIMDTwoScalar<U, {S,1}, 0b00, opc, FPR64, FPR64, asm,
                                 [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
-  def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
+  def v1i32 : BaseSIMDTwoScalar<U, {S,0}, 0b00, opc, FPR32, FPR32, asm,
                                 [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16 : BaseSIMDTwoScalar<U, {S,1}, 0b11, opc, FPR16, FPR16, asm,
+                                [(set FPR16:$Rd, (OpNode (f16 FPR16:$Rn)))]>;
+  }
 }
 
 multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
                              SDPatternOperator OpNode = null_frag> {
   let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
-    def v1i64  : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+    def v1i64  : BaseSIMDTwoScalar<U, 0b11, 0b00, opc, FPR64, FPR64, asm,
            [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-    def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
+    def v1i32  : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR32, asm,
            [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-    def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
-    def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+    def v1i16  : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR16, asm, []>;
+    def v1i8   : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR8 , asm, []>;
   }
 
   def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
@@ -5633,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
 let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
 multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
                                  SDPatternOperator OpNode = null_frag> {
-  def v1i32  : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
+  def v1i32  : BaseSIMDTwoScalar<U, 0b10, 0b00, opc, FPR32, FPR64, asm,
         [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
-  def v1i16  : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
-  def v1i8   : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
+  def v1i16  : BaseSIMDTwoScalar<U, 0b01, 0b00, opc, FPR16, FPR32, asm, []>;
+  def v1i8   : BaseSIMDTwoScalar<U, 0b00, 0b00, opc, FPR8 , FPR16, asm, []>;
 }
 
 //----------------------------------------------------------------------------
@@ -5668,10 +6004,14 @@ multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
                                       asm, ".2d">;
 }
 
-multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
-  def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
+multiclass SIMDFPPairwiseScalar<bit S, bits<5> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64,
+                                      asm, ".2h">;
+  }
+  def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64,
                                       asm, ".2s">;
-  def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
+  def v2i64p : BaseSIMDPairwiseScalar<1, {S,1}, opc, FPR64Op, V128,
                                       asm, ".2d">;
 }
 
@@ -5727,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
                                    asm, ".4s", []>;
 }
 
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
+multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
                             Intrinsic intOp> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
+                                   asm, ".4h",
+        [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>;
+  def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128,
+                                   asm, ".8h",
+        [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>;
+  } // Predicates = [HasNEON, HasFullFP16]
   def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
                                    asm, ".4s",
         [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
@@ -5925,7 +6273,7 @@ class SIMDInsMainMovAlias<string size, Instruction inst,
 class SIMDInsElementMovAlias<string size, Instruction inst,
                              Operand idxtype>
     : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
-                      # "|" # size #" $dst$idx, $src$idx2}",
+                      # "|" # size #"\t$dst$idx, $src$idx2}",
                 (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
 
 
@@ -6215,7 +6563,7 @@ multiclass SIMDScalarCPY<string asm> {
 // AdvSIMD modified immediate instructions
 //----------------------------------------------------------------------------
 
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
+class BaseSIMDModifiedImm<bit Q, bit op, bit op2, dag oops, dag iops,
                           string asm, string op_string,
                           string cstr, list<dag> pattern>
   : I<oops, iops, asm, op_string, cstr, pattern>,
@@ -6227,16 +6575,17 @@ class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
   let Inst{29}    = op;
   let Inst{28-19} = 0b0111100000;
   let Inst{18-16} = imm8{7-5};
-  let Inst{11-10} = 0b01;
+  let Inst{11} = op2;
+  let Inst{10} = 1;
   let Inst{9-5}   = imm8{4-0};
   let Inst{4-0}   = Rd;
 }
 
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
+class BaseSIMDModifiedImmVector<bit Q, bit op, bit op2, RegisterOperand vectype,
                                 Operand immtype, dag opt_shift_iop,
                                 string opt_shift, string asm, string kind,
                                 list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
+  : BaseSIMDModifiedImm<Q, op, op2, (outs vectype:$Rd),
                         !con((ins immtype:$imm8), opt_shift_iop), asm,
                         "{\t$Rd" # kind # ", $imm8" # opt_shift #
                         "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6248,7 +6597,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
                                 Operand immtype, dag opt_shift_iop,
                                 string opt_shift, string asm, string kind,
                                 list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
+  : BaseSIMDModifiedImm<Q, op, 0, (outs vectype:$dst),
                         !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
                         asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
                              "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
@@ -6259,7 +6608,7 @@ class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
 class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
                                      RegisterOperand vectype, string asm,
                                      string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins logical_vec_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<2> shift;
@@ -6284,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
 class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
                                          RegisterOperand vectype, string asm,
                                          string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins logical_vec_hw_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<2> shift;
@@ -6349,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
 class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
                              RegisterOperand vectype, string asm,
                              string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+  : BaseSIMDModifiedImmVector<Q, op, 0, vectype, imm0_255,
                               (ins move_vec_shift:$shift),
                               "$shift", asm, kind, pattern> {
   bits<1> shift;
@@ -6357,18 +6706,18 @@ class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
   let Inst{12}    = shift;
 }
 
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bit op2, bits<4> cmode,
                                    RegisterOperand vectype,
                                    Operand imm_type, string asm,
                                    string kind, list<dag> pattern>
-  : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
+  : BaseSIMDModifiedImmVector<Q, op, op2, vectype, imm_type, (ins), "",
                               asm, kind, pattern> {
   let Inst{15-12} = cmode;
 }
 
 class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
                                    list<dag> pattern>
-  : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+  : BaseSIMDModifiedImm<Q, op, 0, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
                         "\t$Rd, $imm8", "", pattern> {
   let Inst{15-12} = cmode;
   let DecoderMethod = "DecodeModImmInstruction";
@@ -6438,8 +6787,36 @@ class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
   let Inst{4-0}   = Rd;
 }
 
-multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
-                           SDPatternOperator OpNode> {
+multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
+                         SDPatternOperator OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
+                                      V64, V64,
+                                      V128_lo, VectorIndexH,
+                                      asm, ".4h", ".4h", ".4h", ".h",
+    [(set (v4f16 V64:$Rd),
+        (OpNode (v4f16 V64:$Rn),
+         (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc,
+                                      V128, V128,
+                                      V128_lo, VectorIndexH,
+                                      asm, ".8h", ".8h", ".8h", ".h",
+    [(set (v8f16 V128:$Rd),
+        (OpNode (v8f16 V128:$Rn),
+         (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
                                       V64, V64,
                                       V128, VectorIndexS,
@@ -6476,6 +6853,21 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
     let Inst{21} = 0;
   }
 
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc,
+                                      FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+                                      asm, ".h", "", "", ".h",
+    [(set (f16 FPR16Op:$Rd),
+          (OpNode (f16 FPR16Op:$Rn),
+                  (f16 (vector_extract (v8f16 V128_lo:$Rm),
+                                       VectorIndexH:$idx))))]> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
                                       FPR32Op, FPR32Op, V128, VectorIndexS,
                                       asm, ".s", "", "", ".s",
@@ -6501,7 +6893,7 @@ multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
   }
 }
 
-multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
+multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
   // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
   def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
                            (AArch64duplane32 (v4f32 V128:$Rm),
@@ -6553,7 +6945,28 @@ multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
                 V128:$Rm, VectorIndexD:$idx)>;
 }
 
-multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
+multiclass SIMDFPIndexedTied<bit U, bits<4> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64,
+                                          V128_lo, VectorIndexH,
+                                          asm, ".4h", ".4h", ".4h", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+
+  def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc,
+                                          V128, V128,
+                                          V128_lo, VectorIndexH,
+                                          asm, ".8h", ".8h", ".8h", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
                                           V128, VectorIndexS,
                                           asm, ".2s", ".2s", ".2s", ".s", []> {
@@ -6580,6 +6993,16 @@ multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
     let Inst{21} = 0;
   }
 
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc,
+                                      FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+                                      asm, ".h", "", "", ".h", []> {
+    bits<3> idx;
+    let Inst{11} = idx{2};
+    let Inst{21} = idx{1};
+    let Inst{20} = idx{0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
 
   def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
                                       FPR32Op, FPR32Op, V128, VectorIndexS,
@@ -7117,7 +7540,13 @@ class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
 }
 
 
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
+multiclass SIMDFPScalarRShift<bit U, bits<5> opc, string asm> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+                              FPR16, FPR16, vecshiftR16, asm, []> {
+    let Inst{19-16} = imm{3-0};
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
   def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
                               FPR32, FPR32, vecshiftR32, asm, []> {
     let Inst{20-16} = imm{4-0};
@@ -7297,6 +7726,23 @@ class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
 
 multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
                               Intrinsic OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+                                  V64, V64, vecshiftR16,
+                                  asm, ".4h", ".4h",
+      [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+
+  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+                                  V128, V128, vecshiftR16,
+                                  asm, ".8h", ".8h",
+      [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
   def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
                                   V64, V64, vecshiftR32,
                                   asm, ".2s", ".2s",
@@ -7322,8 +7768,26 @@ multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
   }
 }
 
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
+multiclass SIMDVectorRShiftToFP<bit U, bits<5> opc, string asm,
                                   Intrinsic OpNode> {
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+                                  V64, V64, vecshiftR16,
+                                  asm, ".4h", ".4h",
+      [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+
+  def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+                                  V128, V128, vecshiftR16,
+                                  asm, ".8h", ".8h",
+      [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> {
+    bits<4> imm;
+    let Inst{19-16} = imm;
+  }
+  } // Predicates = [HasNEON, HasFullFP16]
+
   def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
                                   V64, V64, vecshiftR32,
                                   asm, ".2s", ".2s",
@@ -8604,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in {
 class BaseSIMDThreeSameVectorTiedR0<bit Q, bit U, bits<2> size, bits<5> opcode,
                                     RegisterOperand regtype, string asm, 
                                     string kind, list<dag> pattern>
-  : BaseSIMDThreeSameVectorTied<Q, U, size, opcode, regtype, asm, kind, 
+  : BaseSIMDThreeSameVectorTied<Q, U, {size,0}, opcode, regtype, asm, kind, 
                                 pattern> {
-  let Inst{21}=0;
 }
 multiclass SIMDThreeSameVectorSQRDMLxHTiedHS<bit U, bits<5> opc, string asm,
                                              SDPatternOperator Accum> {
@@ -9041,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">;
 def : TokenAlias<".4S", ".4s">;
 def : TokenAlias<".2D", ".2d">;
 def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".2H", ".2h">;
 def : TokenAlias<".B", ".b">;
 def : TokenAlias<".H", ".h">;
 def : TokenAlias<".S", ".s">;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index c0b3f2c60916..3ef3c8b840cb 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64InstrInfo.h"
-#include "AArch64MachineCombinerPattern.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -533,6 +532,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
       CC);
 }
 
+/// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
+static bool canBeExpandedToORR(const MachineInstr *MI, unsigned BitSize) {
+  uint64_t Imm = MI->getOperand(1).getImm();
+  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+  uint64_t Encoding;
+  return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
+}
+
 // FIXME: this implementation should be micro-architecture dependent, so a
 // micro-architecture target hook should be introduced here in future.
 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
@@ -573,6 +580,12 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
   case AArch64::ORRWrr:
   case AArch64::ORRXrr:
     return true;
+  // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
+  // ORRXri, it is as cheap as MOV
+  case AArch64::MOVi32imm:
+    return canBeExpandedToORR(MI, 32);
+  case AArch64::MOVi64imm:
+    return canBeExpandedToORR(MI, 64);
   }
 
   llvm_unreachable("Unknown opcode to check as cheap as a move!");
@@ -1379,42 +1392,34 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
     Width = 1;
     Scale = 1;
     break;
-  case AArch64::LDRXui:
-  case AArch64::STRXui:
-    Scale = Width = 8;
-    break;
-  case AArch64::LDRWui:
-  case AArch64::STRWui:
-    Scale = Width = 4;
-    break;
-  case AArch64::LDRBui:
-  case AArch64::STRBui:
-    Scale = Width = 1;
-    break;
-  case AArch64::LDRHui:
-  case AArch64::STRHui:
-    Scale = Width = 2;
-    break;
-  case AArch64::LDRSui:
-  case AArch64::STRSui:
-    Scale = Width = 4;
-    break;
-  case AArch64::LDRDui:
-  case AArch64::STRDui:
-    Scale = Width = 8;
-    break;
   case AArch64::LDRQui:
   case AArch64::STRQui:
     Scale = Width = 16;
     break;
-  case AArch64::LDRBBui:
-  case AArch64::STRBBui:
-    Scale = Width = 1;
+  case AArch64::LDRXui:
+  case AArch64::LDRDui:
+  case AArch64::STRXui:
+  case AArch64::STRDui:
+    Scale = Width = 8;
     break;
+  case AArch64::LDRWui:
+  case AArch64::LDRSui:
+  case AArch64::STRWui:
+  case AArch64::STRSui:
+    Scale = Width = 4;
+    break;
+  case AArch64::LDRHui:
   case AArch64::LDRHHui:
+  case AArch64::STRHui:
   case AArch64::STRHHui:
     Scale = Width = 2;
     break;
+  case AArch64::LDRBui:
+  case AArch64::LDRBBui:
+  case AArch64::STRBui:
+  case AArch64::STRBBui:
+    Scale = Width = 1;
+    break;
   };
 
   BaseReg = LdSt->getOperand(1).getReg();
@@ -1445,23 +1450,43 @@ bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
 
 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
                                               MachineInstr *Second) const {
-  // Cyclone can fuse CMN, CMP followed by Bcc.
-
-  // FIXME: B0 can also fuse:
-  // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
-  if (Second->getOpcode() != AArch64::Bcc)
-    return false;
-  switch (First->getOpcode()) {
-  default:
-    return false;
-  case AArch64::SUBSWri:
-  case AArch64::ADDSWri:
-  case AArch64::ANDSWri:
-  case AArch64::SUBSXri:
-  case AArch64::ADDSXri:
-  case AArch64::ANDSXri:
-    return true;
+  if (Subtarget.isCyclone()) {
+    // Cyclone can fuse CMN, CMP, TST followed by Bcc.
+    unsigned SecondOpcode = Second->getOpcode();
+    if (SecondOpcode == AArch64::Bcc) {
+      switch (First->getOpcode()) {
+      default:
+        return false;
+      case AArch64::SUBSWri:
+      case AArch64::ADDSWri:
+      case AArch64::ANDSWri:
+      case AArch64::SUBSXri:
+      case AArch64::ADDSXri:
+      case AArch64::ANDSXri:
+        return true;
+      }
+    }
+    // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+    if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
+        SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
+      switch (First->getOpcode()) {
+      default:
+        return false;
+      case AArch64::ADDWri:
+      case AArch64::ADDXri:
+      case AArch64::ANDWri:
+      case AArch64::ANDXri:
+      case AArch64::EORWri:
+      case AArch64::EORXri:
+      case AArch64::ORRWri:
+      case AArch64::ORRXri:
+      case AArch64::SUBWri:
+      case AArch64::SUBXri:
+        return true;
+      }
+    }
   }
+  return false;
 }
 
 MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(
@@ -1814,7 +1839,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
-  MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
   unsigned Opc = 0;
@@ -1911,7 +1936,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
-  MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
 
@@ -2226,11 +2251,19 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::LDPDi:
   case AArch64::STPXi:
   case AArch64::STPDi:
+  case AArch64::LDNPXi:
+  case AArch64::LDNPDi:
+  case AArch64::STNPXi:
+  case AArch64::STNPDi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 8;
     break;
   case AArch64::LDPQi:
   case AArch64::STPQi:
+  case AArch64::LDNPQi:
+  case AArch64::STNPQi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 16;
     break;
@@ -2238,6 +2271,11 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
   case AArch64::LDPSi:
   case AArch64::STPWi:
   case AArch64::STPSi:
+  case AArch64::LDNPWi:
+  case AArch64::LDNPSi:
+  case AArch64::STNPWi:
+  case AArch64::STNPSi:
+    ImmIdx = 3;
     IsSigned = true;
     Scale = 4;
     break;
@@ -2457,7 +2495,7 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
 
 bool AArch64InstrInfo::getMachineCombinerPatterns(
     MachineInstr &Root,
-    SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
+    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
   unsigned Opc = Root.getOpcode();
   MachineBasicBlock &MBB = *Root.getParent();
   bool Found = false;
@@ -2485,76 +2523,76 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
            "ADDWrr does not have register operands");
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDW_OP2);
+      Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
       Found = true;
     }
     break;
   case AArch64::ADDXrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDX_OP2);
+      Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
       Found = true;
     }
     break;
   case AArch64::SUBWrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBW_OP2);
+      Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
       Found = true;
     }
     break;
   case AArch64::SUBXrr:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
       Found = true;
     }
     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBX_OP2);
+      Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
       Found = true;
     }
     break;
   case AArch64::ADDWri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDWI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
       Found = true;
     }
     break;
   case AArch64::ADDXri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULADDXI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
       Found = true;
     }
     break;
   case AArch64::SUBWri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
                           AArch64::WZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBWI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
       Found = true;
     }
     break;
   case AArch64::SUBXri:
     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
                           AArch64::XZR)) {
-      Patterns.push_back(MachineCombinerPattern::MC_MULSUBXI_OP1);
+      Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
       Found = true;
     }
     break;
@@ -2661,7 +2699,7 @@ static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
 /// this function generates the instructions that could replace the
 /// original code sequence
 void AArch64InstrInfo::genAlternativeCodeSequence(
-    MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+    MachineInstr &Root, MachineCombinerPattern Pattern,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
     SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
@@ -2677,13 +2715,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
   default:
     // signal error.
     break;
-  case MachineCombinerPattern::MC_MULADDW_OP1:
-  case MachineCombinerPattern::MC_MULADDX_OP1:
+  case MachineCombinerPattern::MULADDW_OP1:
+  case MachineCombinerPattern::MULADDX_OP1:
     // MUL I=A,B,0
     // ADD R,I,C
     // ==> MADD R,A,B,C
     // --- Create(MADD);
-    if (Pattern == MachineCombinerPattern::MC_MULADDW_OP1) {
+    if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
       Opc = AArch64::MADDWrrr;
       RC = &AArch64::GPR32RegClass;
     } else {
@@ -2692,13 +2730,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     }
     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
     break;
-  case MachineCombinerPattern::MC_MULADDW_OP2:
-  case MachineCombinerPattern::MC_MULADDX_OP2:
+  case MachineCombinerPattern::MULADDW_OP2:
+  case MachineCombinerPattern::MULADDX_OP2:
     // MUL I=A,B,0
     // ADD R,C,I
     // ==> MADD R,A,B,C
     // --- Create(MADD);
-    if (Pattern == MachineCombinerPattern::MC_MULADDW_OP2) {
+    if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
       Opc = AArch64::MADDWrrr;
       RC = &AArch64::GPR32RegClass;
     } else {
@@ -2707,8 +2745,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     }
     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
     break;
-  case MachineCombinerPattern::MC_MULADDWI_OP1:
-  case MachineCombinerPattern::MC_MULADDXI_OP1: {
+  case MachineCombinerPattern::MULADDWI_OP1:
+  case MachineCombinerPattern::MULADDXI_OP1: {
     // MUL I=A,B,0
     // ADD R,I,Imm
     // ==> ORR  V, ZR, Imm
@@ -2716,7 +2754,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     // --- Create(MADD);
     const TargetRegisterClass *OrrRC;
     unsigned BitSize, OrrOpc, ZeroReg;
-    if (Pattern == MachineCombinerPattern::MC_MULADDWI_OP1) {
+    if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
       OrrOpc = AArch64::ORRWri;
       OrrRC = &AArch64::GPR32spRegClass;
       BitSize = 32;
@@ -2751,8 +2789,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     }
     break;
   }
-  case MachineCombinerPattern::MC_MULSUBW_OP1:
-  case MachineCombinerPattern::MC_MULSUBX_OP1: {
+  case MachineCombinerPattern::MULSUBW_OP1:
+  case MachineCombinerPattern::MULSUBX_OP1: {
     // MUL I=A,B,0
     // SUB R,I, C
     // ==> SUB  V, 0, C
@@ -2760,7 +2798,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     // --- Create(MADD);
     const TargetRegisterClass *SubRC;
     unsigned SubOpc, ZeroReg;
-    if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP1) {
+    if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
       SubOpc = AArch64::SUBWrr;
       SubRC = &AArch64::GPR32spRegClass;
       ZeroReg = AArch64::WZR;
@@ -2784,13 +2822,13 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
     break;
   }
-  case MachineCombinerPattern::MC_MULSUBW_OP2:
-  case MachineCombinerPattern::MC_MULSUBX_OP2:
+  case MachineCombinerPattern::MULSUBW_OP2:
+  case MachineCombinerPattern::MULSUBX_OP2:
     // MUL I=A,B,0
     // SUB R,C,I
     // ==> MSUB R,A,B,C (computes C - A*B)
     // --- Create(MSUB);
-    if (Pattern == MachineCombinerPattern::MC_MULSUBW_OP2) {
+    if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
       Opc = AArch64::MSUBWrrr;
       RC = &AArch64::GPR32RegClass;
     } else {
@@ -2799,8 +2837,8 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     }
     MUL = genMadd(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
     break;
-  case MachineCombinerPattern::MC_MULSUBWI_OP1:
-  case MachineCombinerPattern::MC_MULSUBXI_OP1: {
+  case MachineCombinerPattern::MULSUBWI_OP1:
+  case MachineCombinerPattern::MULSUBXI_OP1: {
     // MUL I=A,B,0
     // SUB R,I, Imm
     // ==> ORR  V, ZR, -Imm
@@ -2808,7 +2846,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
     // --- Create(MADD);
     const TargetRegisterClass *OrrRC;
     unsigned BitSize, OrrOpc, ZeroReg;
-    if (Pattern == MachineCombinerPattern::MC_MULSUBWI_OP1) {
+    if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
       OrrOpc = AArch64::ORRWri;
       OrrRC = &AArch64::GPR32spRegClass;
       BitSize = 32;
@@ -2944,3 +2982,34 @@ bool AArch64InstrInfo::optimizeCondBranch(MachineInstr *MI) const {
   MI->eraseFromParent();
   return true;
 }
+
+std::pair<unsigned, unsigned>
+AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+  const unsigned Mask = AArch64II::MO_FRAGMENT;
+  return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+  using namespace AArch64II;
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_PAGE, "aarch64-page"},
+      {MO_PAGEOFF, "aarch64-pageoff"},
+      {MO_G3, "aarch64-g3"},
+      {MO_G2, "aarch64-g2"},
+      {MO_G1, "aarch64-g1"},
+      {MO_G0, "aarch64-g0"},
+      {MO_HI12, "aarch64-hi12"}};
+  return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+  using namespace AArch64II;
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_GOT, "aarch64-got"},
+      {MO_NC, "aarch64-nc"},
+      {MO_TLS, "aarch64-tls"},
+      {MO_CONSTPOOL, "aarch64-constant-pool"}};
+  return makeArrayRef(TargetFlags);
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index 68c2a2882580..ae02822a32e6 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -167,13 +167,13 @@ public:
   /// for an instruction chain ending in <Root>. All potential patterns are
   /// listed in the <Patterns> array.
   bool getMachineCombinerPatterns(MachineInstr &Root,
-                  SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns)
+                  SmallVectorImpl<MachineCombinerPattern> &Patterns)
       const override;
 
   /// When getMachineCombinerPatterns() finds patterns, this function generates
   /// the instructions that could replace the original code sequence
   void genAlternativeCodeSequence(
-      MachineInstr &Root, MachineCombinerPattern::MC_PATTERN Pattern,
+      MachineInstr &Root, MachineCombinerPattern Pattern,
       SmallVectorImpl<MachineInstr *> &InsInstrs,
       SmallVectorImpl<MachineInstr *> &DelInstrs,
       DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
@@ -181,6 +181,14 @@ public:
   bool useMachineCombiner() const override;
 
   bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
+
+  std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const override;
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableBitmaskMachineOperandTargetFlags() const override;
+
 private:
   void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
                              MachineBasicBlock *TBB,
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index fa1a46acba84..d02bc9ff394d 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -16,6 +16,8 @@
 //
 def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
                                  AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
+                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
 def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
                                AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">;
 def HasNEON          : Predicate<"Subtarget->hasNEON()">,
@@ -24,6 +26,12 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
+def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
+def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
+                                 AssemblerPredicate<"FeatureFullFP16", "fullfp16">;
+def HasSPE           : Predicate<"Subtarget->hasSPE()">,
+                                 AssemblerPredicate<"FeatureSPE", "spe">;
+
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
@@ -66,6 +74,20 @@ def SDT_AArch64CSel  : SDTypeProfile<1, 4,
                                     SDTCisSameAs<0, 2>,
                                     SDTCisInt<3>,
                                     SDTCisVT<4, i32>]>;
+def SDT_AArch64CCMP : SDTypeProfile<1, 5,
+                                    [SDTCisVT<0, i32>,
+                                     SDTCisInt<1>,
+                                     SDTCisSameAs<1, 2>,
+                                     SDTCisInt<3>,
+                                     SDTCisInt<4>,
+                                     SDTCisVT<5, i32>]>;
+def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
+                                     [SDTCisVT<0, i32>,
+                                      SDTCisFP<1>,
+                                      SDTCisSameAs<1, 2>,
+                                      SDTCisInt<3>,
+                                      SDTCisInt<4>,
+                                      SDTCisVT<5, i32>]>;
 def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
                                    [SDTCisFP<0>,
                                     SDTCisSameAs<0, 1>]>;
@@ -160,13 +182,14 @@ def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
 def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
 def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
 
+def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
+def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
+def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
+
 def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
 
 def AArch64fcmp      : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
 
-def AArch64fmax      : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
-def AArch64fmin      : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
-
 def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
 def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
 def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
@@ -361,6 +384,9 @@ def : InstAlias<"wfi",  (HINT 0b011)>;
 def : InstAlias<"sev",  (HINT 0b100)>;
 def : InstAlias<"sevl", (HINT 0b101)>;
 
+// v8.2a Statistical Profiling extension
+def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
+
 // As far as LLVM is concerned this writes to the system's exclusive monitors.
 let mayLoad = 1, mayStore = 1 in
 def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
@@ -383,12 +409,17 @@ def : InstAlias<"isb", (ISB 0xf)>;
 
 def MRS    : MRSI;
 def MSR    : MSRI;
-def MSRpstate: MSRpstateI;
+def MSRpstateImm1 : MSRpstateImm0_1;
+def MSRpstateImm4 : MSRpstateImm0_15;
 
 // The thread pointer (on Linux, at least, where this has been implemented) is
 // TPIDR_EL0.
 def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
 
+// The cycle counter PMC register is PMCCNTR_EL0.
+let Predicates = [HasPerfMon] in
+def : Pat<(readcyclecounter), (MRS 0xdce8)>;
+
 // Generic system instructions
 def SYSxt  : SystemXtI<0, "sys">;
 def SYSLxt : SystemLXtI<1, "sysl">;
@@ -595,10 +626,12 @@ def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
           (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
 def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
           (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
+let AddedComplexity = 1 in {
 def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
           (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
 def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
           (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+}
 
 // Because of the immediate format for add/sub-imm instructions, the
 // expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
@@ -823,7 +856,7 @@ defm AND  : LogicalReg<0b00, 0, "and", and>;
 defm BIC  : LogicalReg<0b00, 1, "bic",
                        BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 defm EON  : LogicalReg<0b10, 1, "eon",
-                       BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
+                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
 defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
 defm ORN  : LogicalReg<0b01, 1, "orn",
                        BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
@@ -1020,13 +1053,10 @@ def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
 def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
 
 //===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
+// Conditional comparison instructions.
 //===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
+defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
+defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
 
 //===----------------------------------------------------------------------===//
 // Conditional select instructions.
@@ -2421,6 +2451,26 @@ defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvt
 defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
 }
 
+multiclass FPToIntegerPats<SDNode to_int, SDNode round, string INST> {
+  def : Pat<(i32 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
+  def : Pat<(i64 (to_int (round f32:$Rn))),
+            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
+  def : Pat<(i32 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
+  def : Pat<(i64 (to_int (round f64:$Rn))),
+            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
+}
+
+defm : FPToIntegerPats<fp_to_sint, fceil,  "FCVTPS">;
+defm : FPToIntegerPats<fp_to_uint, fceil,  "FCVTPU">;
+defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
+defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
+defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
+defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
+defm : FPToIntegerPats<fp_to_sint, frnd,   "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, frnd,   "FCVTAU">;
+
 //===----------------------------------------------------------------------===//
 // Scaled integer to floating point conversion instructions.
 //===----------------------------------------------------------------------===//
@@ -2466,14 +2516,7 @@ defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
 def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
           (FRINTNDr FPR64:$Rn)>;
 
-// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
-// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
-// <rdar://problem/13715968>
-// TODO: We should really model the FPSR flags correctly. This is really ugly.
-let hasSideEffects = 1 in {
 defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-}
-
 defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
 
 let SchedRW = [WriteFDiv] in {
@@ -2488,23 +2531,23 @@ defm FADD   : TwoOperandFPData<0b0010, "fadd", fadd>;
 let SchedRW = [WriteFDiv] in {
 defm FDIV   : TwoOperandFPData<0b0001, "fdiv", fdiv>;
 }
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAX   : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
-defm FMIN   : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", fmaxnum>;
+defm FMAX   : TwoOperandFPData<0b0100, "fmax", fmaxnan>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", fminnum>;
+defm FMIN   : TwoOperandFPData<0b0101, "fmin", fminnan>;
 let SchedRW = [WriteFMul] in {
 defm FMUL   : TwoOperandFPData<0b0000, "fmul", fmul>;
 defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
 }
 defm FSUB   : TwoOperandFPData<0b0011, "fsub", fsub>;
 
-def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnan (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
 
 //===----------------------------------------------------------------------===//
@@ -2556,7 +2599,7 @@ defm FCMP  : FPComparison<0, "fcmp", AArch64fcmp>;
 //===----------------------------------------------------------------------===//
 
 defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP  : FPCondComparison<0, "fccmp">;
+defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
 
 //===----------------------------------------------------------------------===//
 // Floating point conditional select instruction.
@@ -2589,6 +2632,40 @@ defm FMOV : FPMoveImmediate<"fmov">;
 // Advanced SIMD two vector instructions.
 //===----------------------------------------------------------------------===//
 
+defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
+                                          int_aarch64_neon_uabd>;
+// Match UABDL in log2-shuffle patterns.
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+               (v8i16 (add (sub (zext (v8i8 V64:$opA)),
+                                (zext (v8i8 V64:$opB))),
+                           (AArch64vashr v8i16:$src, (i32 15))))),
+          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v8i16 (AArch64vashr v8i16:$src, (i32 15))),
+               (v8i16 (add (sub (zext (extract_high_v16i8 V128:$opA)),
+                                (zext (extract_high_v16i8 V128:$opB))),
+                           (AArch64vashr v8i16:$src, (i32 15))))),
+          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+               (v4i32 (add (sub (zext (v4i16 V64:$opA)),
+                                (zext (v4i16 V64:$opB))),
+                           (AArch64vashr v4i32:$src, (i32 31))))),
+          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v4i32 (AArch64vashr v4i32:$src, (i32 31))),
+               (v4i32 (add (sub (zext (extract_high_v8i16 V128:$opA)),
+                                (zext (extract_high_v8i16 V128:$opB))),
+                          (AArch64vashr v4i32:$src, (i32 31))))),
+          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+               (v2i64 (add (sub (zext (v2i32 V64:$opA)),
+                                (zext (v2i32 V64:$opB))),
+                           (AArch64vashr v2i64:$src, (i32 63))))),
+          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
+def : Pat<(xor (v2i64 (AArch64vashr v2i64:$src, (i32 63))),
+               (v2i64 (add (sub (zext (extract_high_v4i32 V128:$opA)),
+                                (zext (extract_high_v4i32 V128:$opB))),
+                          (AArch64vashr v2i64:$src, (i32 63))))),
+          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
+
 defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
 def : Pat<(xor (v8i8 (AArch64vashr V64:$src, (i32 7))),
                (v8i8 (add V64:$src, (AArch64vashr V64:$src, (i32 7))))),
@@ -2780,29 +2857,29 @@ defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
 defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
 defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
 defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD    : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
-defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
-defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
-defm FADDP   : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
-defm FADD    : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FDIV    : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
-defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
-defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
-defm FMAX    : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
-defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
-defm FMINP   : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
-defm FMIN    : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
+defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
+defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>;
+defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>;
+defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>;
+defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>;
+defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>;
 
 // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
 // instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
+defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
             TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
+defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
             TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
 
 // The following def pats catch the case where the LHS of an FMA is negated.
@@ -2816,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
 def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
           (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
 
-defm FMULX    : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
-defm FMUL     : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
-defm FSUB     : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
+defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>;
+defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>;
 defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
                       TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
 defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
@@ -2833,9 +2910,9 @@ defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
 defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
 defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
 defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
-defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
+defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
 defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
-defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
+defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
 defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
@@ -2852,9 +2929,9 @@ defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
 defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
 defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
 defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
-defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
+defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
 defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
-defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
+defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
 defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
 defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
 defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
@@ -2879,54 +2956,6 @@ defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
                                   BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
 defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
 
-def : Pat<(v8i8 (smin V64:$Rn, V64:$Rm)),
-          (SMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smin V64:$Rn, V64:$Rm)),
-          (SMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smin V64:$Rn, V64:$Rm)),
-          (SMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smin V128:$Rn, V128:$Rm)),
-          (SMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smin V128:$Rn, V128:$Rm)),
-          (SMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smin V128:$Rn, V128:$Rm)),
-          (SMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (smax V64:$Rn, V64:$Rm)),
-          (SMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (smax V64:$Rn, V64:$Rm)),
-          (SMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (smax V64:$Rn, V64:$Rm)),
-          (SMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (smax V128:$Rn, V128:$Rm)),
-          (SMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (smax V128:$Rn, V128:$Rm)),
-          (SMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (smax V128:$Rn, V128:$Rm)),
-          (SMAXv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umin V64:$Rn, V64:$Rm)),
-          (UMINv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umin V64:$Rn, V64:$Rm)),
-          (UMINv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umin V64:$Rn, V64:$Rm)),
-          (UMINv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umin V128:$Rn, V128:$Rm)),
-          (UMINv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umin V128:$Rn, V128:$Rm)),
-          (UMINv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umin V128:$Rn, V128:$Rm)),
-          (UMINv4i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i8 (umax V64:$Rn, V64:$Rm)),
-          (UMAXv8i8 V64:$Rn, V64:$Rm)>;
-def : Pat<(v4i16 (umax V64:$Rn, V64:$Rm)),
-          (UMAXv4i16 V64:$Rn, V64:$Rm)>;
-def : Pat<(v2i32 (umax V64:$Rn, V64:$Rm)),
-          (UMAXv2i32 V64:$Rn, V64:$Rm)>;
-def : Pat<(v16i8 (umax V128:$Rn, V128:$Rm)),
-          (UMAXv16i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v8i16 (umax V128:$Rn, V128:$Rm)),
-          (UMAXv8i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i32 (umax V128:$Rn, V128:$Rm)),
-          (UMAXv4i32 V128:$Rn, V128:$Rm)>;
 
 def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
           (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
@@ -3052,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
                 "|cmlt.2d\t$dst, $src1, $src2}",
                 (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
+                "|fcmle.4h\t$dst, $src1, $src2}",
+                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
+                "|fcmle.8h\t$dst, $src1, $src2}",
+                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
                 "|fcmle.2s\t$dst, $src1, $src2}",
                 (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3062,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
                 "|fcmle.2d\t$dst, $src1, $src2}",
                 (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
+                "|fcmlt.4h\t$dst, $src1, $src2}",
+                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
+                "|fcmlt.8h\t$dst, $src1, $src2}",
+                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
                 "|fcmlt.2s\t$dst, $src1, $src2}",
                 (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3072,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
                 "|fcmlt.2d\t$dst, $src1, $src2}",
                 (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
+                "|facle.4h\t$dst, $src1, $src2}",
+                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
+                "|facle.8h\t$dst, $src1, $src2}",
+                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
                 "|facle.2s\t$dst, $src1, $src2}",
                 (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3082,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
                 "|facle.2d\t$dst, $src1, $src2}",
                 (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
 
+let Predicates = [HasNEON, HasFullFP16] in {
+def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
+                "|faclt.4h\t$dst, $src1, $src2}",
+                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
+                "|faclt.8h\t$dst, $src1, $src2}",
+                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
+}
 def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
                 "|faclt.2s\t$dst, $src1, $src2}",
                 (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
@@ -3103,19 +3164,19 @@ defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
 defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
 defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
 defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
-defm FABD     : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
 def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
           (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
+defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
                                      int_aarch64_neon_facge>;
-defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
+defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
                                      int_aarch64_neon_facgt>;
-defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
-defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
-defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
-defm FMULX    : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
-defm FRECPS   : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
-defm FRSQRTS  : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
+defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
+defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
+defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>;
 defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
@@ -3198,35 +3259,35 @@ defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
 defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
 defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
 defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
-defm FCMEQ  : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
-defm FCMGE  : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
-defm FCMGT  : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
-defm FCMLE  : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
-defm FCMLT  : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
-defm FCVTAS : SIMDTwoScalarSD<   0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDTwoScalarSD<   1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDTwoScalarSD<   0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDTwoScalarSD<   1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDTwoScalarSD<   0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDTwoScalarSD<   1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDTwoScalarSD<   0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDTwoScalarSD<   1, 1, 0b11010, "fcvtpu">;
+defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
 def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDTwoScalarSD<   0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDTwoScalarSD<   1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDTwoScalarSD<   0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDTwoScalarSD<   0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDTwoScalarSD<  1, 1, 0b11101, "frsqrte">;
+defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
+defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
 defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
                                  UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF  : SIMDTwoScalarCVTSD<   0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
 defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
 defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
 defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
 defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
 defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
                                      int_aarch64_neon_suqadd>;
-defm UCVTF  : SIMDTwoScalarCVTSD<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
 defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
 defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
                                     int_aarch64_neon_usqadd>;
@@ -3390,8 +3451,6 @@ defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
                  BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
 defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
                                               int_aarch64_neon_uabd>;
-defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
-                                          int_aarch64_neon_uabd>;
 defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
                  BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
 defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
@@ -3449,8 +3508,8 @@ defm : Neon_mulacc_widen_patterns<
 // Patterns for 64-bit pmull
 def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
           (PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
-                                  (vector_extract (v2i64 V128:$Rm), (i64 1))),
+def : Pat<(int_aarch64_neon_pmull64 (extractelt (v2i64 V128:$Rn), (i64 1)),
+                                    (extractelt (v2i64 V128:$Rm), (i64 1))),
           (PMULLv2i64 V128:$Rn, V128:$Rm)>;
 
 // CodeGen patterns for addhn and subhn instructions, which can actually be
@@ -3593,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">;
 //----------------------------------------------------------------------------
 
 defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP   : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP   : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
-defm FMINP   : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
+defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
+defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
+defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
 def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
 def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
@@ -3713,12 +3772,12 @@ defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
 
 multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
                                SDNodeXForm IdxXFORM> {
-  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
+  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
                                                          imm:$idx))))),
             (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
 
-  def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
-                                                         imm:$idx))))),
+  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
+                                                       imm:$idx))))),
             (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
 }
 
@@ -3747,6 +3806,13 @@ def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
 def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
           (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
 
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
+            VectorIndexB:$idx)))), i8),
+          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
+            VectorIndexH:$idx)))), i16),
+          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+
 // Extracting i8 or i16 elements will have the zero-extend transformed to
 // an 'and' mask by type legalization since neither i8 nor i16 are legal types
 // for AArch64. Match these patterns here since UMOV already zeroes out the high
@@ -3784,6 +3850,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
             (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
                                   (i64 FPR64:$Rn), dsub))>;
 
+def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
+          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
+          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
+
 def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
           (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
 def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
@@ -3949,10 +4020,10 @@ defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
 defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
 defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
 defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
-defm FMAXV   : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
-defm FMINV   : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
 
 // Patterns for across-vector intrinsics, that have a node equivalent, that
 // returns a vector (with only the low lane defined) instead of a scalar.
@@ -4199,15 +4270,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
 def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
 
 // AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
                                               "fmov", ".2d",
                        [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64,  fpimm8,
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
                                               "fmov", ".2s",
                        [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
                                               "fmov", ".4s",
                        [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
+                                              "fmov", ".4h",
+                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
+                                              "fmov", ".8h",
+                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+} // Predicates = [HasNEON, HasFullFP16]
 
 // AdvSIMD MOVI
 
@@ -4235,7 +4314,7 @@ def : Pat<(v8i8  immAllOnesV), (MOVID (i32 255))>;
 // The movi_edit node has the immediate value already encoded, so we use
 // a plain imm0_255 in the pattern
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
+def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
                                                 simdimmtype10,
                                                 "movi", ".2d",
                    [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
@@ -4296,10 +4375,10 @@ def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
                             (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
 
 // Per byte: 8b & 16b
-def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64,  imm0_255,
+def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
                                                  "movi", ".8b",
                        [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
+def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
                                                  "movi", ".16b",
                        [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
 
@@ -4340,8 +4419,8 @@ def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
 //----------------------------------------------------------------------------
 
 let hasSideEffects = 0 in {
-  defm FMLA  : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
-  defm FMLS  : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
+  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
+  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
 }
 
 // NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
@@ -4349,18 +4428,18 @@ let hasSideEffects = 0 in {
 
 // On the other hand, there are quite a few valid combinatorial options due to
 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
            TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+defm : SIMDFPIndexedTiedPatterns<"FMLA",
            TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
 
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
            TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
            TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
            TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+defm : SIMDFPIndexedTiedPatterns<"FMLS",
            TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
 
 multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
@@ -4424,7 +4503,9 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
             (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
                 V128:$Rm, VectorIndexS:$idx)>;
   def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
-                         (vector_extract (v2f32 (fneg V64:$Rm)),
+                         (vector_extract (v4f32 (insert_subvector undef,
+                                                    (v2f32 (fneg V64:$Rm)),
+                                                    (i32 0))),
                                          VectorIndexS:$idx))),
             (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
                 (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
@@ -4442,8 +4523,8 @@ defm : FMLSIndexedAfterNegPatterns<
 defm : FMLSIndexedAfterNegPatterns<
            TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
 
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
-defm FMUL  : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
+defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
+defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", fmul>;
 
 def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
           (FMULv2i32_indexed V64:$Rn,
@@ -4497,10 +4578,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
 //----------------------------------------------------------------------------
 // AdvSIMD scalar shift instructions
 //----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF  : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF  : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
+defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
+defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
+defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
 // Codegen patterns for the above. We don't put these directly on the
 // instructions because TableGen's type inference can't handle the truth.
 // Having the same base pattern for fp <--> int totally freaks it out.
@@ -4573,7 +4654,7 @@ defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
 //----------------------------------------------------------------------------
 defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
 defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
+defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
                                    int_aarch64_neon_vcvtfxs2fp>;
 defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
                                          int_aarch64_neon_rshrn>;
@@ -4608,7 +4689,7 @@ defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
 defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
 defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
                 TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF   : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
+defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
                         int_aarch64_neon_vcvtfxu2fp>;
 defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
                                          int_aarch64_neon_uqrshrn>;
@@ -5133,10 +5214,10 @@ def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
 def : Pat<(i64 (anyext GPR32:$src)),
           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
 
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
+// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
+// then assert the extension has happened.
 def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
 
 // To sign extend, we use a signed bitfield move instruction (SBFM) on the
 // containing super-reg.
@@ -5801,6 +5882,21 @@ def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
                              (v16i8 (REV16v16i8 FPR128:$src))>;
 }
 
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
+           (EXTRACT_SUBREG V128:$Rn, dsub)>;
+
 def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
           (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
 def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
@@ -5852,6 +5948,45 @@ def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
 def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
           (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
 
+// Patterns for nontemporal/no-allocate stores.
+// We have to resort to tricks to turn a single-input store into a store pair,
+// because there is no single-input nontemporal store, only STNP.
+let Predicates = [IsLE] in {
+let AddedComplexity = 15 in {
+class NTStore128Pat<ValueType VT> :
+  Pat<(nontemporalstore (VT FPR128:$Rt),
+        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
+      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
+              (CPYi64 FPR128:$Rt, (i64 1)),
+              GPR64sp:$Rn, simm7s8:$offset)>;
+
+def : NTStore128Pat<v2i64>;
+def : NTStore128Pat<v4i32>;
+def : NTStore128Pat<v8i16>;
+def : NTStore128Pat<v16i8>;
+
+class NTStore64Pat<ValueType VT> :
+  Pat<(nontemporalstore (VT FPR64:$Rt),
+        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
+              (CPYi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
+              GPR64sp:$Rn, simm7s4:$offset)>;
+
+// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
+def : NTStore64Pat<v1f64>;
+def : NTStore64Pat<v1i64>;
+def : NTStore64Pat<v2i32>;
+def : NTStore64Pat<v4i16>;
+def : NTStore64Pat<v8i8>;
+
+def : Pat<(nontemporalstore GPR64:$Rt,
+            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
+          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 0, 31), sub_32),
+                  GPR64sp:$Rn, simm7s4:$offset)>;
+} // AddedComplexity=10
+} // Predicates = [IsLE]
+
 // Tail call return handling. These are all compiler pseudo-instructions,
 // so no encoding information or anything like that.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 82f77a77ab5e..566aa2c9a9ba 100644
--- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -41,54 +41,85 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded");
 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
 STATISTIC(NumUnscaledPairCreated,
           "Number of load/store from unscaled generated");
+STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
+STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
+STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
 
 static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit",
                                    cl::init(20), cl::Hidden);
 
-// Place holder while testing unscaled load/store combining
-static cl::opt<bool> EnableAArch64UnscaledMemOp(
-    "aarch64-unscaled-mem-op", cl::Hidden,
-    cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true));
+namespace llvm {
+void initializeAArch64LoadStoreOptPass(PassRegistry &);
+}
+
+#define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
 
 namespace {
+
+typedef struct LdStPairFlags {
+  // If a matching instruction is found, MergeForward is set to true if the
+  // merge is to remove the first instruction and replace the second with
+  // a pair-wise insn, and false if the reverse is true.
+  bool MergeForward;
+
+  // SExtIdx gives the index of the result of the load pair that must be
+  // extended. The value of SExtIdx assumes that the paired load produces the
+  // value in this order: (I, returned iterator), i.e., -1 means no value has
+  // to be extended, 0 means I, and 1 means the returned iterator.
+  int SExtIdx;
+
+  LdStPairFlags() : MergeForward(false), SExtIdx(-1) {}
+
+  void setMergeForward(bool V = true) { MergeForward = V; }
+  bool getMergeForward() const { return MergeForward; }
+
+  void setSExtIdx(int V) { SExtIdx = V; }
+  int getSExtIdx() const { return SExtIdx; }
+
+} LdStPairFlags;
+
 struct AArch64LoadStoreOpt : public MachineFunctionPass {
   static char ID;
-  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
+  AArch64LoadStoreOpt() : MachineFunctionPass(ID) {
+    initializeAArch64LoadStoreOptPass(*PassRegistry::getPassRegistry());
+  }
 
   const AArch64InstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  const AArch64Subtarget *Subtarget;
 
   // Scan the instructions looking for a load/store that can be combined
   // with the current instruction into a load/store pair.
   // Return the matching instruction if one is found, else MBB->end().
-  // If a matching instruction is found, MergeForward is set to true if the
-  // merge is to remove the first instruction and replace the second with
-  // a pair-wise insn, and false if the reverse is true.
-  // \p SExtIdx[out] gives the index of the result of the load pair that
-  // must be extended. The value of SExtIdx assumes that the paired load
-  // produces the value in this order: (I, returned iterator), i.e.,
-  // -1 means no value has to be extended, 0 means I, and 1 means the
-  // returned iterator.
   MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I,
-                                               bool &MergeForward, int &SExtIdx,
+                                               LdStPairFlags &Flags,
                                                unsigned Limit);
+
+  // Scan the instructions looking for a store that writes to the address from
+  // which the current load instruction reads. Return true if one is found.
+  bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
+                         MachineBasicBlock::iterator &StoreI);
+
   // Merge the two instructions indicated into a single pair-wise instruction.
   // If MergeForward is true, erase the first instruction and fold its
   // operation into the second. If false, the reverse. Return the instruction
   // following the first instruction (which may change during processing).
-  // \p SExtIdx index of the result that must be extended for a paired load.
-  // -1 means none, 0 means I, and 1 means Paired.
   MachineBasicBlock::iterator
   mergePairedInsns(MachineBasicBlock::iterator I,
-                   MachineBasicBlock::iterator Paired, bool MergeForward,
-                   int SExtIdx);
+                   MachineBasicBlock::iterator Paired,
+                   const LdStPairFlags &Flags);
+
+  // Promote the load that reads directly from the address stored to.
+  MachineBasicBlock::iterator
+  promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+                       MachineBasicBlock::iterator StoreI);
 
   // Scan the instruction list to find a base register update that can
   // be combined with the current instruction (a load or store) using
   // pre or post indexed addressing with writeback. Scan forwards.
   MachineBasicBlock::iterator
   findMatchingUpdateInsnForward(MachineBasicBlock::iterator I, unsigned Limit,
-                                int Value);
+                                int UnscaledOffset);
 
   // Scan the instruction list to find a base register update that can
   // be combined with the current instruction (a load or store) using
@@ -96,97 +127,177 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   MachineBasicBlock::iterator
   findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I, unsigned Limit);
 
-  // Merge a pre-index base register update into a ld/st instruction.
-  MachineBasicBlock::iterator
-  mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
-                        MachineBasicBlock::iterator Update);
+  // Find an instruction that updates the base register of the ld/st
+  // instruction.
+  bool isMatchingUpdateInsn(MachineInstr *MemMI, MachineInstr *MI,
+                            unsigned BaseReg, int Offset);
 
-  // Merge a post-index base register update into a ld/st instruction.
+  // Merge a pre- or post-index base register update into a ld/st instruction.
   MachineBasicBlock::iterator
-  mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
-                         MachineBasicBlock::iterator Update);
+  mergeUpdateInsn(MachineBasicBlock::iterator I,
+                  MachineBasicBlock::iterator Update, bool IsPreIdx);
 
-  bool optimizeBlock(MachineBasicBlock &MBB);
+  // Find and merge foldable ldr/str instructions.
+  bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
+
+  // Find and promote load instructions which read directly from store.
+  bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
+
+  // Check if converting two narrow loads into a single wider load with
+  // bitfield extracts could be enabled.
+  bool enableNarrowLdMerge(MachineFunction &Fn);
+
+  bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
   const char *getPassName() const override {
-    return "AArch64 load / store optimization pass";
+    return AARCH64_LOAD_STORE_OPT_NAME;
   }
-
-private:
-  int getMemSize(MachineInstr *MemMI);
 };
 char AArch64LoadStoreOpt::ID = 0;
 } // namespace
 
-static bool isUnscaledLdst(unsigned Opc) {
+INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
+                AARCH64_LOAD_STORE_OPT_NAME, false, false)
+
+static bool isUnscaledLdSt(unsigned Opc) {
   switch (Opc) {
   default:
     return false;
   case AArch64::STURSi:
-    return true;
   case AArch64::STURDi:
-    return true;
   case AArch64::STURQi:
-    return true;
+  case AArch64::STURBBi:
+  case AArch64::STURHHi:
   case AArch64::STURWi:
-    return true;
   case AArch64::STURXi:
-    return true;
   case AArch64::LDURSi:
-    return true;
   case AArch64::LDURDi:
-    return true;
   case AArch64::LDURQi:
-    return true;
   case AArch64::LDURWi:
-    return true;
   case AArch64::LDURXi:
-    return true;
   case AArch64::LDURSWi:
+  case AArch64::LDURHHi:
+  case AArch64::LDURBBi:
+  case AArch64::LDURSBWi:
+  case AArch64::LDURSHWi:
     return true;
   }
 }
 
-// Size in bytes of the data moved by an unscaled load or store
-int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
-  switch (MemMI->getOpcode()) {
+static bool isUnscaledLdSt(MachineInstr *MI) {
+  return isUnscaledLdSt(MI->getOpcode());
+}
+
+static unsigned getBitExtrOpcode(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
   default:
-    llvm_unreachable("Opcode has unknown size!");
-  case AArch64::STRSui:
-  case AArch64::STURSi:
-    return 4;
-  case AArch64::STRDui:
-  case AArch64::STURDi:
-    return 8;
-  case AArch64::STRQui:
-  case AArch64::STURQi:
-    return 16;
-  case AArch64::STRWui:
-  case AArch64::STURWi:
-    return 4;
-  case AArch64::STRXui:
-  case AArch64::STURXi:
-    return 8;
+    llvm_unreachable("Unexpected opcode.");
+  case AArch64::LDRBBui:
+  case AArch64::LDURBBi:
+  case AArch64::LDRHHui:
+  case AArch64::LDURHHi:
+    return AArch64::UBFMWri;
+  case AArch64::LDRSBWui:
+  case AArch64::LDURSBWi:
+  case AArch64::LDRSHWui:
+  case AArch64::LDURSHWi:
+    return AArch64::SBFMWri;
+  }
+}
+
+static bool isNarrowStore(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return false;
+  case AArch64::STRBBui:
+  case AArch64::STURBBi:
+  case AArch64::STRHHui:
+  case AArch64::STURHHi:
+    return true;
+  }
+}
+
+static bool isNarrowStore(MachineInstr *MI) {
+  return isNarrowStore(MI->getOpcode());
+}
+
+static bool isNarrowLoad(unsigned Opc) {
+  switch (Opc) {
+  default:
+    return false;
+  case AArch64::LDRHHui:
+  case AArch64::LDURHHi:
+  case AArch64::LDRBBui:
+  case AArch64::LDURBBi:
+  case AArch64::LDRSHWui:
+  case AArch64::LDURSHWi:
+  case AArch64::LDRSBWui:
+  case AArch64::LDURSBWi:
+    return true;
+  }
+}
+
+static bool isNarrowLoad(MachineInstr *MI) {
+  return isNarrowLoad(MI->getOpcode());
+}
+
+// Scaling factor for unscaled load or store.
+static int getMemScale(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    llvm_unreachable("Opcode has unknown scale!");
+  case AArch64::LDRBBui:
+  case AArch64::LDURBBi:
+  case AArch64::LDRSBWui:
+  case AArch64::LDURSBWi:
+  case AArch64::STRBBui:
+  case AArch64::STURBBi:
+    return 1;
+  case AArch64::LDRHHui:
+  case AArch64::LDURHHi:
+  case AArch64::LDRSHWui:
+  case AArch64::LDURSHWi:
+  case AArch64::STRHHui:
+  case AArch64::STURHHi:
+    return 2;
   case AArch64::LDRSui:
   case AArch64::LDURSi:
+  case AArch64::LDRSWui:
+  case AArch64::LDURSWi:
+  case AArch64::LDRWui:
+  case AArch64::LDURWi:
+  case AArch64::STRSui:
+  case AArch64::STURSi:
+  case AArch64::STRWui:
+  case AArch64::STURWi:
+  case AArch64::LDPSi:
+  case AArch64::LDPSWi:
+  case AArch64::LDPWi:
+  case AArch64::STPSi:
+  case AArch64::STPWi:
     return 4;
   case AArch64::LDRDui:
   case AArch64::LDURDi:
+  case AArch64::LDRXui:
+  case AArch64::LDURXi:
+  case AArch64::STRDui:
+  case AArch64::STURDi:
+  case AArch64::STRXui:
+  case AArch64::STURXi:
+  case AArch64::LDPDi:
+  case AArch64::LDPXi:
+  case AArch64::STPDi:
+  case AArch64::STPXi:
     return 8;
   case AArch64::LDRQui:
   case AArch64::LDURQi:
+  case AArch64::STRQui:
+  case AArch64::STURQi:
+  case AArch64::LDPQi:
+  case AArch64::STPQi:
     return 16;
-  case AArch64::LDRWui:
-  case AArch64::LDURWi:
-    return 4;
-  case AArch64::LDRXui:
-  case AArch64::LDURXi:
-    return 8;
-  case AArch64::LDRSWui:
-  case AArch64::LDURSWi:
-    return 4;
   }
 }
 
@@ -203,6 +314,10 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
   case AArch64::STURDi:
   case AArch64::STRQui:
   case AArch64::STURQi:
+  case AArch64::STRBBui:
+  case AArch64::STURBBi:
+  case AArch64::STRHHui:
+  case AArch64::STURHHi:
   case AArch64::STRWui:
   case AArch64::STURWi:
   case AArch64::STRXui:
@@ -219,11 +334,23 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
   case AArch64::STURSi:
   case AArch64::LDRSui:
   case AArch64::LDURSi:
+  case AArch64::LDRHHui:
+  case AArch64::LDURHHi:
+  case AArch64::LDRBBui:
+  case AArch64::LDURBBi:
     return Opc;
   case AArch64::LDRSWui:
     return AArch64::LDRWui;
   case AArch64::LDURSWi:
     return AArch64::LDURWi;
+  case AArch64::LDRSBWui:
+    return AArch64::LDRBBui;
+  case AArch64::LDRSHWui:
+    return AArch64::LDRHHui;
+  case AArch64::LDURSBWi:
+    return AArch64::LDURBBi;
+  case AArch64::LDURSHWi:
+    return AArch64::LDURHHi;
   }
 }
 
@@ -240,6 +367,14 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
   case AArch64::STRQui:
   case AArch64::STURQi:
     return AArch64::STPQi;
+  case AArch64::STRBBui:
+    return AArch64::STRHHui;
+  case AArch64::STRHHui:
+    return AArch64::STRWui;
+  case AArch64::STURBBi:
+    return AArch64::STURHHi;
+  case AArch64::STURHHi:
+    return AArch64::STURWi;
   case AArch64::STRWui:
   case AArch64::STURWi:
     return AArch64::STPWi;
@@ -264,6 +399,48 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
   case AArch64::LDRSWui:
   case AArch64::LDURSWi:
     return AArch64::LDPSWi;
+  case AArch64::LDRHHui:
+  case AArch64::LDRSHWui:
+    return AArch64::LDRWui;
+  case AArch64::LDURHHi:
+  case AArch64::LDURSHWi:
+    return AArch64::LDURWi;
+  case AArch64::LDRBBui:
+  case AArch64::LDRSBWui:
+    return AArch64::LDRHHui;
+  case AArch64::LDURBBi:
+  case AArch64::LDURSBWi:
+    return AArch64::LDURHHi;
+  }
+}
+
+static unsigned isMatchingStore(MachineInstr *LoadInst,
+                                MachineInstr *StoreInst) {
+  unsigned LdOpc = LoadInst->getOpcode();
+  unsigned StOpc = StoreInst->getOpcode();
+  switch (LdOpc) {
+  default:
+    llvm_unreachable("Unsupported load instruction!");
+  case AArch64::LDRBBui:
+    return StOpc == AArch64::STRBBui || StOpc == AArch64::STRHHui ||
+           StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+  case AArch64::LDURBBi:
+    return StOpc == AArch64::STURBBi || StOpc == AArch64::STURHHi ||
+           StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+  case AArch64::LDRHHui:
+    return StOpc == AArch64::STRHHui || StOpc == AArch64::STRWui ||
+           StOpc == AArch64::STRXui;
+  case AArch64::LDURHHi:
+    return StOpc == AArch64::STURHHi || StOpc == AArch64::STURWi ||
+           StOpc == AArch64::STURXi;
+  case AArch64::LDRWui:
+    return StOpc == AArch64::STRWui || StOpc == AArch64::STRXui;
+  case AArch64::LDURWi:
+    return StOpc == AArch64::STURWi || StOpc == AArch64::STURXi;
+  case AArch64::LDRXui:
+    return StOpc == AArch64::STRXui;
+  case AArch64::LDURXi:
+    return StOpc == AArch64::STURXi;
   }
 }
 
@@ -277,6 +454,10 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
     return AArch64::STRDpre;
   case AArch64::STRQui:
     return AArch64::STRQpre;
+  case AArch64::STRBBui:
+    return AArch64::STRBBpre;
+  case AArch64::STRHHui:
+    return AArch64::STRHHpre;
   case AArch64::STRWui:
     return AArch64::STRWpre;
   case AArch64::STRXui:
@@ -287,12 +468,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
     return AArch64::LDRDpre;
   case AArch64::LDRQui:
     return AArch64::LDRQpre;
+  case AArch64::LDRBBui:
+    return AArch64::LDRBBpre;
+  case AArch64::LDRHHui:
+    return AArch64::LDRHHpre;
   case AArch64::LDRWui:
     return AArch64::LDRWpre;
   case AArch64::LDRXui:
     return AArch64::LDRXpre;
   case AArch64::LDRSWui:
     return AArch64::LDRSWpre;
+  case AArch64::LDPSi:
+    return AArch64::LDPSpre;
+  case AArch64::LDPSWi:
+    return AArch64::LDPSWpre;
+  case AArch64::LDPDi:
+    return AArch64::LDPDpre;
+  case AArch64::LDPQi:
+    return AArch64::LDPQpre;
+  case AArch64::LDPWi:
+    return AArch64::LDPWpre;
+  case AArch64::LDPXi:
+    return AArch64::LDPXpre;
+  case AArch64::STPSi:
+    return AArch64::STPSpre;
+  case AArch64::STPDi:
+    return AArch64::STPDpre;
+  case AArch64::STPQi:
+    return AArch64::STPQpre;
+  case AArch64::STPWi:
+    return AArch64::STPWpre;
+  case AArch64::STPXi:
+    return AArch64::STPXpre;
   }
 }
 
@@ -306,6 +513,10 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
     return AArch64::STRDpost;
   case AArch64::STRQui:
     return AArch64::STRQpost;
+  case AArch64::STRBBui:
+    return AArch64::STRBBpost;
+  case AArch64::STRHHui:
+    return AArch64::STRHHpost;
   case AArch64::STRWui:
     return AArch64::STRWpost;
   case AArch64::STRXui:
@@ -316,19 +527,111 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
     return AArch64::LDRDpost;
   case AArch64::LDRQui:
     return AArch64::LDRQpost;
+  case AArch64::LDRBBui:
+    return AArch64::LDRBBpost;
+  case AArch64::LDRHHui:
+    return AArch64::LDRHHpost;
   case AArch64::LDRWui:
     return AArch64::LDRWpost;
   case AArch64::LDRXui:
     return AArch64::LDRXpost;
   case AArch64::LDRSWui:
     return AArch64::LDRSWpost;
+  case AArch64::LDPSi:
+    return AArch64::LDPSpost;
+  case AArch64::LDPSWi:
+    return AArch64::LDPSWpost;
+  case AArch64::LDPDi:
+    return AArch64::LDPDpost;
+  case AArch64::LDPQi:
+    return AArch64::LDPQpost;
+  case AArch64::LDPWi:
+    return AArch64::LDPWpost;
+  case AArch64::LDPXi:
+    return AArch64::LDPXpost;
+  case AArch64::STPSi:
+    return AArch64::STPSpost;
+  case AArch64::STPDi:
+    return AArch64::STPDpost;
+  case AArch64::STPQi:
+    return AArch64::STPQpost;
+  case AArch64::STPWi:
+    return AArch64::STPWpost;
+  case AArch64::STPXi:
+    return AArch64::STPXpost;
   }
 }
 
+static bool isPairedLdSt(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  case AArch64::LDPSi:
+  case AArch64::LDPSWi:
+  case AArch64::LDPDi:
+  case AArch64::LDPQi:
+  case AArch64::LDPWi:
+  case AArch64::LDPXi:
+  case AArch64::STPSi:
+  case AArch64::STPDi:
+  case AArch64::STPQi:
+  case AArch64::STPWi:
+  case AArch64::STPXi:
+    return true;
+  }
+}
+
+static const MachineOperand &getLdStRegOp(const MachineInstr *MI,
+                                          unsigned PairedRegOp = 0) {
+  assert(PairedRegOp < 2 && "Unexpected register operand idx.");
+  unsigned Idx = isPairedLdSt(MI) ? PairedRegOp : 0;
+  return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStBaseOp(const MachineInstr *MI) {
+  unsigned Idx = isPairedLdSt(MI) ? 2 : 1;
+  return MI->getOperand(Idx);
+}
+
+static const MachineOperand &getLdStOffsetOp(const MachineInstr *MI) {
+  unsigned Idx = isPairedLdSt(MI) ? 3 : 2;
+  return MI->getOperand(Idx);
+}
+
+static bool isLdOffsetInRangeOfSt(MachineInstr *LoadInst,
+                                  MachineInstr *StoreInst) {
+  assert(isMatchingStore(LoadInst, StoreInst) && "Expect only matched ld/st.");
+  int LoadSize = getMemScale(LoadInst);
+  int StoreSize = getMemScale(StoreInst);
+  int UnscaledStOffset = isUnscaledLdSt(StoreInst)
+                             ? getLdStOffsetOp(StoreInst).getImm()
+                             : getLdStOffsetOp(StoreInst).getImm() * StoreSize;
+  int UnscaledLdOffset = isUnscaledLdSt(LoadInst)
+                             ? getLdStOffsetOp(LoadInst).getImm()
+                             : getLdStOffsetOp(LoadInst).getImm() * LoadSize;
+  return (UnscaledStOffset <= UnscaledLdOffset) &&
+         (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
+}
+
+// Copy MachineMemOperands from Op0 and Op1 to a new array assigned to MI.
+static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
+                                   MachineInstr *Op1) {
+  assert(MI->memoperands_empty() && "expected a new machineinstr");
+  size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) +
+                      (Op1->memoperands_end() - Op1->memoperands_begin());
+
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
+  MachineSDNode::mmo_iterator MemEnd =
+      std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
+  MemEnd = std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
+  MI->setMemRefs(MemBegin, MemEnd);
+}
+
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
                                       MachineBasicBlock::iterator Paired,
-                                      bool MergeForward, int SExtIdx) {
+                                      const LdStPairFlags &Flags) {
   MachineBasicBlock::iterator NextI = I;
   ++NextI;
   // If NextI is the second of the two instructions to be merged, we need
@@ -338,25 +641,26 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   if (NextI == Paired)
     ++NextI;
 
+  int SExtIdx = Flags.getSExtIdx();
   unsigned Opc =
       SExtIdx == -1 ? I->getOpcode() : getMatchingNonSExtOpcode(I->getOpcode());
-  bool IsUnscaled = isUnscaledLdst(Opc);
-  int OffsetStride =
-      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
+  bool IsUnscaled = isUnscaledLdSt(Opc);
+  int OffsetStride = IsUnscaled ? getMemScale(I) : 1;
 
+  bool MergeForward = Flags.getMergeForward();
   unsigned NewOpc = getMatchingPairOpcode(Opc);
   // Insert our new paired instruction after whichever of the paired
   // instructions MergeForward indicates.
   MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I;
   // Also based on MergeForward is from where we copy the base register operand
   // so we get the flags compatible with the input code.
-  MachineOperand &BaseRegOp =
-      MergeForward ? Paired->getOperand(1) : I->getOperand(1);
+  const MachineOperand &BaseRegOp =
+      MergeForward ? getLdStBaseOp(Paired) : getLdStBaseOp(I);
 
   // Which register is Rt and which is Rt2 depends on the offset order.
   MachineInstr *RtMI, *Rt2MI;
-  if (I->getOperand(2).getImm() ==
-      Paired->getOperand(2).getImm() + OffsetStride) {
+  if (getLdStOffsetOp(I).getImm() ==
+      getLdStOffsetOp(Paired).getImm() + OffsetStride) {
     RtMI = Paired;
     Rt2MI = I;
     // Here we swapped the assumption made for SExtIdx.
@@ -368,18 +672,135 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
     RtMI = I;
     Rt2MI = Paired;
   }
-  // Handle Unscaled
-  int OffsetImm = RtMI->getOperand(2).getImm();
-  if (IsUnscaled && EnableAArch64UnscaledMemOp)
-    OffsetImm /= OffsetStride;
+
+  int OffsetImm = getLdStOffsetOp(RtMI).getImm();
+
+  if (isNarrowLoad(Opc)) {
+    // Change the scaled offset from small to large type.
+    if (!IsUnscaled) {
+      assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+      OffsetImm /= 2;
+    }
+    MachineInstr *RtNewDest = MergeForward ? I : Paired;
+    // When merging small (< 32 bit) loads for big-endian targets, the order of
+    // the component parts gets swapped.
+    if (!Subtarget->isLittleEndian())
+      std::swap(RtMI, Rt2MI);
+    // Construct the new load instruction.
+    MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
+    NewMemMI = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                       TII->get(NewOpc))
+                   .addOperand(getLdStRegOp(RtNewDest))
+                   .addOperand(BaseRegOp)
+                   .addImm(OffsetImm);
+
+    // Copy MachineMemOperands from the original loads.
+    concatenateMemOperands(NewMemMI, I, Paired);
+
+    DEBUG(
+        dbgs()
+        << "Creating the new load and extract. Replacing instructions:\n    ");
+    DEBUG(I->print(dbgs()));
+    DEBUG(dbgs() << "    ");
+    DEBUG(Paired->print(dbgs()));
+    DEBUG(dbgs() << "  with instructions:\n    ");
+    DEBUG((NewMemMI)->print(dbgs()));
+
+    int Width = getMemScale(I) == 1 ? 8 : 16;
+    int LSBLow = 0;
+    int LSBHigh = Width;
+    int ImmsLow = LSBLow + Width - 1;
+    int ImmsHigh = LSBHigh + Width - 1;
+    MachineInstr *ExtDestMI = MergeForward ? Paired : I;
+    if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
+      // Create the bitfield extract for high bits.
+      BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                          TII->get(getBitExtrOpcode(Rt2MI)))
+                      .addOperand(getLdStRegOp(Rt2MI))
+                      .addReg(getLdStRegOp(RtNewDest).getReg())
+                      .addImm(LSBHigh)
+                      .addImm(ImmsHigh);
+      // Create the bitfield extract for low bits.
+      if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+        // For unsigned, prefer to use AND for low bits.
+        BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                            TII->get(AArch64::ANDWri))
+                        .addOperand(getLdStRegOp(RtMI))
+                        .addReg(getLdStRegOp(RtNewDest).getReg())
+                        .addImm(ImmsLow);
+      } else {
+        BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                            TII->get(getBitExtrOpcode(RtMI)))
+                        .addOperand(getLdStRegOp(RtMI))
+                        .addReg(getLdStRegOp(RtNewDest).getReg())
+                        .addImm(LSBLow)
+                        .addImm(ImmsLow);
+      }
+    } else {
+      // Create the bitfield extract for low bits.
+      if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
+        // For unsigned, prefer to use AND for low bits.
+        BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                            TII->get(AArch64::ANDWri))
+                        .addOperand(getLdStRegOp(RtMI))
+                        .addReg(getLdStRegOp(RtNewDest).getReg())
+                        .addImm(ImmsLow);
+      } else {
+        BitExtMI1 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                            TII->get(getBitExtrOpcode(RtMI)))
+                        .addOperand(getLdStRegOp(RtMI))
+                        .addReg(getLdStRegOp(RtNewDest).getReg())
+                        .addImm(LSBLow)
+                        .addImm(ImmsLow);
+      }
+
+      // Create the bitfield extract for high bits.
+      BitExtMI2 = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                          TII->get(getBitExtrOpcode(Rt2MI)))
+                      .addOperand(getLdStRegOp(Rt2MI))
+                      .addReg(getLdStRegOp(RtNewDest).getReg())
+                      .addImm(LSBHigh)
+                      .addImm(ImmsHigh);
+    }
+    DEBUG(dbgs() << "    ");
+    DEBUG((BitExtMI1)->print(dbgs()));
+    DEBUG(dbgs() << "    ");
+    DEBUG((BitExtMI2)->print(dbgs()));
+    DEBUG(dbgs() << "\n");
+
+    // Erase the old instructions.
+    I->eraseFromParent();
+    Paired->eraseFromParent();
+    return NextI;
+  }
 
   // Construct the new instruction.
-  MachineInstrBuilder MIB = BuildMI(*I->getParent(), InsertionPoint,
-                                    I->getDebugLoc(), TII->get(NewOpc))
-                                .addOperand(RtMI->getOperand(0))
-                                .addOperand(Rt2MI->getOperand(0))
-                                .addOperand(BaseRegOp)
-                                .addImm(OffsetImm);
+  MachineInstrBuilder MIB;
+  if (isNarrowStore(Opc)) {
+    // Change the scaled offset from small to large type.
+    if (!IsUnscaled) {
+      assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
+      OffsetImm /= 2;
+    }
+    MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                  TII->get(NewOpc))
+              .addOperand(getLdStRegOp(I))
+              .addOperand(BaseRegOp)
+              .addImm(OffsetImm);
+    // Copy MachineMemOperands from the original stores.
+    concatenateMemOperands(MIB, I, Paired);
+  } else {
+    // Handle Unscaled
+    if (IsUnscaled)
+      OffsetImm /= OffsetStride;
+    MIB = BuildMI(*I->getParent(), InsertionPoint, I->getDebugLoc(),
+                  TII->get(NewOpc))
+              .addOperand(getLdStRegOp(RtMI))
+              .addOperand(getLdStRegOp(Rt2MI))
+              .addOperand(BaseRegOp)
+              .addImm(OffsetImm);
+  }
+
   (void)MIB;
 
   // FIXME: Do we need/want to copy the mem operands from the source
@@ -439,13 +860,112 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
   return NextI;
 }
 
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
+                                          MachineBasicBlock::iterator StoreI) {
+  MachineBasicBlock::iterator NextI = LoadI;
+  ++NextI;
+
+  int LoadSize = getMemScale(LoadI);
+  int StoreSize = getMemScale(StoreI);
+  unsigned LdRt = getLdStRegOp(LoadI).getReg();
+  unsigned StRt = getLdStRegOp(StoreI).getReg();
+  bool IsStoreXReg = TRI->getRegClass(AArch64::GPR64RegClassID)->contains(StRt);
+
+  assert((IsStoreXReg ||
+          TRI->getRegClass(AArch64::GPR32RegClassID)->contains(StRt)) &&
+         "Unexpected RegClass");
+
+  MachineInstr *BitExtMI;
+  if (LoadSize == StoreSize && (LoadSize == 4 || LoadSize == 8)) {
+    // Remove the load, if the destination register of the loads is the same
+    // register for stored value.
+    if (StRt == LdRt && LoadSize == 8) {
+      DEBUG(dbgs() << "Remove load instruction:\n    ");
+      DEBUG(LoadI->print(dbgs()));
+      DEBUG(dbgs() << "\n");
+      LoadI->eraseFromParent();
+      return NextI;
+    }
+    // Replace the load with a mov if the load and store are in the same size.
+    BitExtMI =
+        BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+                TII->get(IsStoreXReg ? AArch64::ORRXrs : AArch64::ORRWrs), LdRt)
+            .addReg(IsStoreXReg ? AArch64::XZR : AArch64::WZR)
+            .addReg(StRt)
+            .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+  } else {
+    // FIXME: Currently we disable this transformation in big-endian targets as
+    // performance and correctness are verified only in little-endian.
+    if (!Subtarget->isLittleEndian())
+      return NextI;
+    bool IsUnscaled = isUnscaledLdSt(LoadI);
+    assert(IsUnscaled == isUnscaledLdSt(StoreI) && "Unsupported ld/st match");
+    assert(LoadSize <= StoreSize && "Invalid load size");
+    int UnscaledLdOffset = IsUnscaled
+                               ? getLdStOffsetOp(LoadI).getImm()
+                               : getLdStOffsetOp(LoadI).getImm() * LoadSize;
+    int UnscaledStOffset = IsUnscaled
+                               ? getLdStOffsetOp(StoreI).getImm()
+                               : getLdStOffsetOp(StoreI).getImm() * StoreSize;
+    int Width = LoadSize * 8;
+    int Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+    int Imms = Immr + Width - 1;
+    unsigned DestReg = IsStoreXReg
+                           ? TRI->getMatchingSuperReg(LdRt, AArch64::sub_32,
+                                                      &AArch64::GPR64RegClass)
+                           : LdRt;
+
+    assert((UnscaledLdOffset >= UnscaledStOffset &&
+            (UnscaledLdOffset + LoadSize) <= UnscaledStOffset + StoreSize) &&
+           "Invalid offset");
+
+    Immr = 8 * (UnscaledLdOffset - UnscaledStOffset);
+    Imms = Immr + Width - 1;
+    if (UnscaledLdOffset == UnscaledStOffset) {
+      uint32_t AndMaskEncoded = ((IsStoreXReg ? 1 : 0) << 12) // N
+                                | ((Immr) << 6)               // immr
+                                | ((Imms) << 0)               // imms
+          ;
+
+      BitExtMI =
+          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+                  TII->get(IsStoreXReg ? AArch64::ANDXri : AArch64::ANDWri),
+                  DestReg)
+              .addReg(StRt)
+              .addImm(AndMaskEncoded);
+    } else {
+      BitExtMI =
+          BuildMI(*LoadI->getParent(), LoadI, LoadI->getDebugLoc(),
+                  TII->get(IsStoreXReg ? AArch64::UBFMXri : AArch64::UBFMWri),
+                  DestReg)
+              .addReg(StRt)
+              .addImm(Immr)
+              .addImm(Imms);
+    }
+  }
+
+  DEBUG(dbgs() << "Promoting load by replacing :\n    ");
+  DEBUG(StoreI->print(dbgs()));
+  DEBUG(dbgs() << "    ");
+  DEBUG(LoadI->print(dbgs()));
+  DEBUG(dbgs() << "  with instructions:\n    ");
+  DEBUG(StoreI->print(dbgs()));
+  DEBUG(dbgs() << "    ");
+  DEBUG((BitExtMI)->print(dbgs()));
+  DEBUG(dbgs() << "\n");
+
+  // Erase the old instructions.
+  LoadI->eraseFromParent();
+  return NextI;
+}
+
 /// trackRegDefsUses - Remember what registers the specified instruction uses
 /// and modifies.
-static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
+static void trackRegDefsUses(const MachineInstr *MI, BitVector &ModifiedRegs,
                              BitVector &UsedRegs,
                              const TargetRegisterInfo *TRI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     if (MO.isRegMask())
       ModifiedRegs.setBitsNotInMask(MO.getRegMask());
 
@@ -464,16 +984,12 @@ static void trackRegDefsUses(MachineInstr *MI, BitVector &ModifiedRegs,
 }
 
 static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) {
-  if (!IsUnscaled && (Offset > 63 || Offset < -64))
-    return false;
-  if (IsUnscaled) {
-    // Convert the byte-offset used by unscaled into an "element" offset used
-    // by the scaled pair load/store instructions.
-    int ElemOffset = Offset / OffsetStride;
-    if (ElemOffset > 63 || ElemOffset < -64)
-      return false;
-  }
-  return true;
+  // Convert the byte-offset used by unscaled into an "element" offset used
+  // by the scaled pair load/store instructions.
+  if (IsUnscaled)
+    Offset /= OffsetStride;
+
+  return Offset <= 63 && Offset >= -64;
 }
 
 // Do alignment, specialized to power of 2 and for signed ints,
@@ -507,12 +1023,65 @@ static bool mayAlias(MachineInstr *MIa,
   return false;
 }
 
+bool AArch64LoadStoreOpt::findMatchingStore(
+    MachineBasicBlock::iterator I, unsigned Limit,
+    MachineBasicBlock::iterator &StoreI) {
+  MachineBasicBlock::iterator E = I->getParent()->begin();
+  MachineBasicBlock::iterator MBBI = I;
+  MachineInstr *FirstMI = I;
+  unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+
+  // Track which registers have been modified and used between the first insn
+  // and the second insn.
+  BitVector ModifiedRegs, UsedRegs;
+  ModifiedRegs.resize(TRI->getNumRegs());
+  UsedRegs.resize(TRI->getNumRegs());
+
+  for (unsigned Count = 0; MBBI != E && Count < Limit;) {
+    --MBBI;
+    MachineInstr *MI = MBBI;
+    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
+    // optimization by changing how far we scan.
+    if (MI->isDebugValue())
+      continue;
+    // Now that we know this is a real instruction, count it.
+    ++Count;
+
+    // If the load instruction reads directly from the address to which the
+    // store instruction writes and the stored value is not modified, we can
+    // promote the load. Since we do not handle stores with pre-/post-index,
+    // it's unnecessary to check if BaseReg is modified by the store itself.
+    if (MI->mayStore() && isMatchingStore(FirstMI, MI) &&
+        BaseReg == getLdStBaseOp(MI).getReg() &&
+        isLdOffsetInRangeOfSt(FirstMI, MI) &&
+        !ModifiedRegs[getLdStRegOp(MI).getReg()]) {
+      StoreI = MBBI;
+      return true;
+    }
+
+    if (MI->isCall())
+      return false;
+
+    // Update modified / uses register lists.
+    trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+
+    // Otherwise, if the base register is modified, we have no match, so
+    // return early.
+    if (ModifiedRegs[BaseReg])
+      return false;
+
+    // If we encounter a store aliased with the load, return early.
+    if (MI->mayStore() && mayAlias(FirstMI, MI, TII))
+      return false;
+  }
+  return false;
+}
+
 /// findMatchingInsn - Scan the instructions looking for a load/store that can
 /// be combined with the current instruction into a load/store pair.
 MachineBasicBlock::iterator
 AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
-                                      bool &MergeForward, int &SExtIdx,
-                                      unsigned Limit) {
+                                      LdStPairFlags &Flags, unsigned Limit) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineBasicBlock::iterator MBBI = I;
   MachineInstr *FirstMI = I;
@@ -520,21 +1089,27 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
 
   unsigned Opc = FirstMI->getOpcode();
   bool MayLoad = FirstMI->mayLoad();
-  bool IsUnscaled = isUnscaledLdst(Opc);
-  unsigned Reg = FirstMI->getOperand(0).getReg();
-  unsigned BaseReg = FirstMI->getOperand(1).getReg();
-  int Offset = FirstMI->getOperand(2).getImm();
+  bool IsUnscaled = isUnscaledLdSt(FirstMI);
+  unsigned Reg = getLdStRegOp(FirstMI).getReg();
+  unsigned BaseReg = getLdStBaseOp(FirstMI).getReg();
+  int Offset = getLdStOffsetOp(FirstMI).getImm();
+  bool IsNarrowStore = isNarrowStore(Opc);
+
+  // For narrow stores, find only the case where the stored value is WZR.
+  if (IsNarrowStore && Reg != AArch64::WZR)
+    return E;
 
   // Early exit if the first instruction modifies the base register.
   // e.g., ldr x0, [x0]
-  // Early exit if the offset if not possible to match. (6 bits of positive
-  // range, plus allow an extra one in case we find a later insn that matches
-  // with Offset-1
   if (FirstMI->modifiesRegister(BaseReg, TRI))
     return E;
-  int OffsetStride =
-      IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
-  if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
+
+  // Early exit if the offset if not possible to match. (6 bits of positive
+  // range, plus allow an extra one in case we find a later insn that matches
+  // with Offset-1)
+  int OffsetStride = IsUnscaled ? getMemScale(FirstMI) : 1;
+  if (!(isNarrowLoad(Opc) || IsNarrowStore) &&
+      !inBoundsForPair(IsUnscaled, Offset, OffsetStride))
     return E;
 
   // Track which registers have been modified and used between the first insn
@@ -557,18 +1132,19 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
     ++Count;
 
     bool CanMergeOpc = Opc == MI->getOpcode();
-    SExtIdx = -1;
+    Flags.setSExtIdx(-1);
     if (!CanMergeOpc) {
       bool IsValidLdStrOpc;
       unsigned NonSExtOpc = getMatchingNonSExtOpcode(Opc, &IsValidLdStrOpc);
-      if (!IsValidLdStrOpc)
-        continue;
+      assert(IsValidLdStrOpc &&
+             "Given Opc should be a Load or Store with an immediate");
       // Opc will be the first instruction in the pair.
-      SExtIdx = NonSExtOpc == (unsigned)Opc ? 1 : 0;
+      Flags.setSExtIdx(NonSExtOpc == (unsigned)Opc ? 1 : 0);
       CanMergeOpc = NonSExtOpc == getMatchingNonSExtOpcode(MI->getOpcode());
     }
 
-    if (CanMergeOpc && MI->getOperand(2).isImm()) {
+    if (CanMergeOpc && getLdStOffsetOp(MI).isImm()) {
+      assert(MI->mayLoadOrStore() && "Expected memory operation.");
       // If we've found another instruction with the same opcode, check to see
       // if the base and offset are compatible with our starting instruction.
       // These instructions all have scaled immediate operands, so we just
@@ -579,8 +1155,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
       // Pairwise instructions have a 7-bit signed offset field. Single insns
       // have a 12-bit unsigned offset field. To be a valid combine, the
       // final offset must be in range.
-      unsigned MIBaseReg = MI->getOperand(1).getReg();
-      int MIOffset = MI->getOperand(2).getImm();
+      unsigned MIBaseReg = getLdStBaseOp(MI).getReg();
+      int MIOffset = getLdStOffsetOp(MI).getImm();
       if (BaseReg == MIBaseReg && ((Offset == MIOffset + OffsetStride) ||
                                    (Offset + OffsetStride == MIOffset))) {
         int MinOffset = Offset < MIOffset ? Offset : MIOffset;
@@ -591,30 +1167,43 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
           return E;
         // If the resultant immediate offset of merging these instructions
         // is out of range for a pairwise instruction, bail and keep looking.
-        bool MIIsUnscaled = isUnscaledLdst(MI->getOpcode());
-        if (!inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
+        bool MIIsUnscaled = isUnscaledLdSt(MI);
+        bool IsNarrowLoad = isNarrowLoad(MI->getOpcode());
+        if (!IsNarrowLoad &&
+            !inBoundsForPair(MIIsUnscaled, MinOffset, OffsetStride)) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          if (MI->mayLoadOrStore())
-            MemInsns.push_back(MI);
+          MemInsns.push_back(MI);
           continue;
         }
-        // If the alignment requirements of the paired (scaled) instruction
-        // can't express the offset of the unscaled input, bail and keep
-        // looking.
-        if (IsUnscaled && EnableAArch64UnscaledMemOp &&
-            (alignTo(MinOffset, OffsetStride) != MinOffset)) {
-          trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          if (MI->mayLoadOrStore())
+
+        if (IsNarrowLoad || IsNarrowStore) {
+          // If the alignment requirements of the scaled wide load/store
+          // instruction can't express the offset of the scaled narrow
+          // input, bail and keep looking.
+          if (!IsUnscaled && alignTo(MinOffset, 2) != MinOffset) {
+            trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
             MemInsns.push_back(MI);
-          continue;
+            continue;
+          }
+        } else {
+          // If the alignment requirements of the paired (scaled) instruction
+          // can't express the offset of the unscaled input, bail and keep
+          // looking.
+          if (IsUnscaled && (alignTo(MinOffset, OffsetStride) != MinOffset)) {
+            trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
+            MemInsns.push_back(MI);
+            continue;
+          }
         }
         // If the destination register of the loads is the same register, bail
         // and keep looking. A load-pair instruction with both destination
         // registers the same is UNPREDICTABLE and will result in an exception.
-        if (MayLoad && Reg == MI->getOperand(0).getReg()) {
+        // For narrow stores, allow only when the stored value is the same
+        // (i.e., WZR).
+        if ((MayLoad && Reg == getLdStRegOp(MI).getReg()) ||
+            (IsNarrowStore && Reg != getLdStRegOp(MI).getReg())) {
           trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
-          if (MI->mayLoadOrStore())
-            MemInsns.push_back(MI);
+          MemInsns.push_back(MI);
           continue;
         }
 
@@ -622,10 +1211,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // the two instructions and none of the instructions between the second
         // and first alias with the second, we can combine the second into the
         // first.
-        if (!ModifiedRegs[MI->getOperand(0).getReg()] &&
-            !(MI->mayLoad() && UsedRegs[MI->getOperand(0).getReg()]) &&
+        if (!ModifiedRegs[getLdStRegOp(MI).getReg()] &&
+            !(MI->mayLoad() && UsedRegs[getLdStRegOp(MI).getReg()]) &&
             !mayAlias(MI, MemInsns, TII)) {
-          MergeForward = false;
+          Flags.setMergeForward(false);
           return MBBI;
         }
 
@@ -633,11 +1222,10 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
         // between the two instructions and none of the instructions between the
         // first and the second alias with the first, we can combine the first
         // into the second.
-        if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] &&
-            !(FirstMI->mayLoad() &&
-              UsedRegs[FirstMI->getOperand(0).getReg()]) &&
+        if (!ModifiedRegs[getLdStRegOp(FirstMI).getReg()] &&
+            !(MayLoad && UsedRegs[getLdStRegOp(FirstMI).getReg()]) &&
             !mayAlias(FirstMI, MemInsns, TII)) {
-          MergeForward = true;
+          Flags.setMergeForward(true);
           return MBBI;
         }
         // Unable to combine these instructions due to interference in between.
@@ -666,8 +1254,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
 }
 
 MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
-                                           MachineBasicBlock::iterator Update) {
+AArch64LoadStoreOpt::mergeUpdateInsn(MachineBasicBlock::iterator I,
+                                     MachineBasicBlock::iterator Update,
+                                     bool IsPreIdx) {
   assert((Update->getOpcode() == AArch64::ADDXri ||
           Update->getOpcode() == AArch64::SUBXri) &&
          "Unexpected base register update instruction to merge!");
@@ -680,20 +1269,36 @@ AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
 
   int Value = Update->getOperand(2).getImm();
   assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
-         "Can't merge 1 << 12 offset into pre-indexed load / store");
+         "Can't merge 1 << 12 offset into pre-/post-indexed load / store");
   if (Update->getOpcode() == AArch64::SUBXri)
     Value = -Value;
 
-  unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
-  MachineInstrBuilder MIB =
-      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
-          .addOperand(Update->getOperand(0))
-          .addOperand(I->getOperand(0))
-          .addOperand(I->getOperand(1))
-          .addImm(Value);
+  unsigned NewOpc = IsPreIdx ? getPreIndexedOpcode(I->getOpcode())
+                             : getPostIndexedOpcode(I->getOpcode());
+  MachineInstrBuilder MIB;
+  if (!isPairedLdSt(I)) {
+    // Non-paired instruction.
+    MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+              .addOperand(getLdStRegOp(Update))
+              .addOperand(getLdStRegOp(I))
+              .addOperand(getLdStBaseOp(I))
+              .addImm(Value);
+  } else {
+    // Paired instruction.
+    int Scale = getMemScale(I);
+    MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+              .addOperand(getLdStRegOp(Update))
+              .addOperand(getLdStRegOp(I, 0))
+              .addOperand(getLdStRegOp(I, 1))
+              .addOperand(getLdStBaseOp(I))
+              .addImm(Value / Scale);
+  }
   (void)MIB;
 
-  DEBUG(dbgs() << "Creating pre-indexed load/store.");
+  if (IsPreIdx)
+    DEBUG(dbgs() << "Creating pre-indexed load/store.");
+  else
+    DEBUG(dbgs() << "Creating post-indexed load/store.");
   DEBUG(dbgs() << "    Replacing instructions:\n    ");
   DEBUG(I->print(dbgs()));
   DEBUG(dbgs() << "    ");
@@ -709,51 +1314,9 @@ AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
   return NextI;
 }
 
-MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
-    MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
-  assert((Update->getOpcode() == AArch64::ADDXri ||
-          Update->getOpcode() == AArch64::SUBXri) &&
-         "Unexpected base register update instruction to merge!");
-  MachineBasicBlock::iterator NextI = I;
-  // Return the instruction following the merged instruction, which is
-  // the instruction following our unmerged load. Unless that's the add/sub
-  // instruction we're merging, in which case it's the one after that.
-  if (++NextI == Update)
-    ++NextI;
-
-  int Value = Update->getOperand(2).getImm();
-  assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
-         "Can't merge 1 << 12 offset into post-indexed load / store");
-  if (Update->getOpcode() == AArch64::SUBXri)
-    Value = -Value;
-
-  unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
-  MachineInstrBuilder MIB =
-      BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
-          .addOperand(Update->getOperand(0))
-          .addOperand(I->getOperand(0))
-          .addOperand(I->getOperand(1))
-          .addImm(Value);
-  (void)MIB;
-
-  DEBUG(dbgs() << "Creating post-indexed load/store.");
-  DEBUG(dbgs() << "    Replacing instructions:\n    ");
-  DEBUG(I->print(dbgs()));
-  DEBUG(dbgs() << "    ");
-  DEBUG(Update->print(dbgs()));
-  DEBUG(dbgs() << "  with instruction:\n    ");
-  DEBUG(((MachineInstr *)MIB)->print(dbgs()));
-  DEBUG(dbgs() << "\n");
-
-  // Erase the old instructions for the block.
-  I->eraseFromParent();
-  Update->eraseFromParent();
-
-  return NextI;
-}
-
-static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
-                                 int Offset) {
+bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr *MemMI,
+                                               MachineInstr *MI,
+                                               unsigned BaseReg, int Offset) {
   switch (MI->getOpcode()) {
   default:
     break;
@@ -769,44 +1332,65 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
     // Watch out for 1 << 12 shifted value.
     if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
       break;
-    // If the instruction has the base register as source and dest and the
-    // immediate will fit in a signed 9-bit integer, then we have a match.
-    if (MI->getOperand(0).getReg() == BaseReg &&
-        MI->getOperand(1).getReg() == BaseReg &&
-        MI->getOperand(2).getImm() <= 255 &&
-        MI->getOperand(2).getImm() >= -256) {
-      // If we have a non-zero Offset, we check that it matches the amount
-      // we're adding to the register.
-      if (!Offset || Offset == MI->getOperand(2).getImm())
-        return true;
+
+    // The update instruction source and destination register must be the
+    // same as the load/store base register.
+    if (MI->getOperand(0).getReg() != BaseReg ||
+        MI->getOperand(1).getReg() != BaseReg)
+      break;
+
+    bool IsPairedInsn = isPairedLdSt(MemMI);
+    int UpdateOffset = MI->getOperand(2).getImm();
+    // For non-paired load/store instructions, the immediate must fit in a
+    // signed 9-bit integer.
+    if (!IsPairedInsn && (UpdateOffset > 255 || UpdateOffset < -256))
+      break;
+
+    // For paired load/store instructions, the immediate must be a multiple of
+    // the scaling factor.  The scaled offset must also fit into a signed 7-bit
+    // integer.
+    if (IsPairedInsn) {
+      int Scale = getMemScale(MemMI);
+      if (UpdateOffset % Scale != 0)
+        break;
+
+      int ScaledOffset = UpdateOffset / Scale;
+      if (ScaledOffset > 64 || ScaledOffset < -64)
+        break;
     }
+
+    // If we have a non-zero Offset, we check that it matches the amount
+    // we're adding to the register.
+    if (!Offset || Offset == MI->getOperand(2).getImm())
+      return true;
     break;
   }
   return false;
 }
 
 MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
-    MachineBasicBlock::iterator I, unsigned Limit, int Value) {
+    MachineBasicBlock::iterator I, unsigned Limit, int UnscaledOffset) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
   MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
 
-  unsigned DestReg = MemMI->getOperand(0).getReg();
-  unsigned BaseReg = MemMI->getOperand(1).getReg();
-  int Offset = MemMI->getOperand(2).getImm() *
-               TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+  unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+  int MIUnscaledOffset = getLdStOffsetOp(MemMI).getImm() * getMemScale(MemMI);
 
-  // If the base register overlaps the destination register, we can't
+  // Scan forward looking for post-index opportunities.  Updating instructions
+  // can't be formed if the memory instruction doesn't have the offset we're
+  // looking for.
+  if (MIUnscaledOffset != UnscaledOffset)
+    return E;
+
+  // If the base register overlaps a destination register, we can't
   // merge the update.
-  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-    return E;
-
-  // Scan forward looking for post-index opportunities.
-  // Updating instructions can't be formed if the memory insn already
-  // has an offset other than the value we're looking for.
-  if (Offset != Value)
-    return E;
+  bool IsPairedInsn = isPairedLdSt(MemMI);
+  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+    unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+    if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+      return E;
+  }
 
   // Track which registers have been modified and used between the first insn
   // (inclusive) and the second insn.
@@ -825,7 +1409,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
     ++Count;
 
     // If we found a match, return it.
-    if (isMatchingUpdateInsn(MI, BaseReg, Value))
+    if (isMatchingUpdateInsn(I, MI, BaseReg, UnscaledOffset))
       return MBBI;
 
     // Update the status of what the instruction clobbered and used.
@@ -845,21 +1429,22 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineInstr *MemMI = I;
   MachineBasicBlock::iterator MBBI = I;
-  const MachineFunction &MF = *MemMI->getParent()->getParent();
 
-  unsigned DestReg = MemMI->getOperand(0).getReg();
-  unsigned BaseReg = MemMI->getOperand(1).getReg();
-  int Offset = MemMI->getOperand(2).getImm();
-  unsigned RegSize = TII->getRegClass(MemMI->getDesc(), 0, TRI, MF)->getSize();
+  unsigned BaseReg = getLdStBaseOp(MemMI).getReg();
+  int Offset = getLdStOffsetOp(MemMI).getImm();
 
   // If the load/store is the first instruction in the block, there's obviously
   // not any matching update. Ditto if the memory offset isn't zero.
   if (MBBI == B || Offset != 0)
     return E;
-  // If the base register overlaps the destination register, we can't
+  // If the base register overlaps a destination register, we can't
   // merge the update.
-  if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
-    return E;
+  bool IsPairedInsn = isPairedLdSt(MemMI);
+  for (unsigned i = 0, e = IsPairedInsn ? 2 : 1; i != e; ++i) {
+    unsigned DestReg = getLdStRegOp(MemMI, i).getReg();
+    if (DestReg == BaseReg || TRI->isSubRegister(BaseReg, DestReg))
+      return E;
+  }
 
   // Track which registers have been modified and used between the first insn
   // (inclusive) and the second insn.
@@ -878,7 +1463,7 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
     ++Count;
 
     // If we found a match, return it.
-    if (isMatchingUpdateInsn(MI, BaseReg, RegSize))
+    if (isMatchingUpdateInsn(I, MI, BaseReg, Offset))
       return MBBI;
 
     // Update the status of what the instruction clobbered and used.
@@ -892,17 +1477,101 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
   return E;
 }
 
-bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
+    MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = MBBI;
+  // If this is a volatile load, don't mess with it.
+  if (MI->hasOrderedMemoryRef())
+    return false;
+
+  // Make sure this is a reg+imm.
+  // FIXME: It is possible to extend it to handle reg+reg cases.
+  if (!getLdStOffsetOp(MI).isImm())
+    return false;
+
+  // Look backward up to ScanLimit instructions.
+  MachineBasicBlock::iterator StoreI;
+  if (findMatchingStore(MBBI, ScanLimit, StoreI)) {
+    ++NumLoadsFromStoresPromoted;
+    // Promote the load. Keeping the iterator straight is a
+    // pain, so we let the merge routine tell us what the next instruction
+    // is after it's done mucking about.
+    MBBI = promoteLoadFromStore(MBBI, StoreI);
+    return true;
+  }
+  return false;
+}
+
+bool AArch64LoadStoreOpt::tryToMergeLdStInst(
+    MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = MBBI;
+  MachineBasicBlock::iterator E = MI->getParent()->end();
+  // If this is a volatile load/store, don't mess with it.
+  if (MI->hasOrderedMemoryRef())
+    return false;
+
+  // Make sure this is a reg+imm (as opposed to an address reloc).
+  if (!getLdStOffsetOp(MI).isImm())
+    return false;
+
+  // Check if this load/store has a hint to avoid pair formation.
+  // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
+  if (TII->isLdStPairSuppressed(MI))
+    return false;
+
+  // Look ahead up to ScanLimit instructions for a pairable instruction.
+  LdStPairFlags Flags;
+  MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, Flags, ScanLimit);
+  if (Paired != E) {
+    if (isNarrowLoad(MI)) {
+      ++NumNarrowLoadsPromoted;
+    } else if (isNarrowStore(MI)) {
+      ++NumZeroStoresPromoted;
+    } else {
+      ++NumPairCreated;
+      if (isUnscaledLdSt(MI))
+        ++NumUnscaledPairCreated;
+    }
+
+    // Merge the loads into a pair. Keeping the iterator straight is a
+    // pain, so we let the merge routine tell us what the next instruction
+    // is after it's done mucking about.
+    MBBI = mergePairedInsns(MBBI, Paired, Flags);
+    return true;
+  }
+  return false;
+}
+
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
+                                        bool enableNarrowLdOpt) {
   bool Modified = false;
-  // Two tranformations to do here:
-  // 1) Find loads and stores that can be merged into a single load or store
+  // Three tranformations to do here:
+  // 1) Find loads that directly read from stores and promote them by
+  //    replacing with mov instructions. If the store is wider than the load,
+  //    the load will be replaced with a bitfield extract.
+  //      e.g.,
+  //        str w1, [x0, #4]
+  //        ldrh w2, [x0, #6]
+  //        ; becomes
+  //        str w1, [x0, #4]
+  //        lsr	w2, w1, #16
+  // 2) Find narrow loads that can be converted into a single wider load
+  //    with bitfield extract instructions.
+  //      e.g.,
+  //        ldrh w0, [x2]
+  //        ldrh w1, [x2, #2]
+  //        ; becomes
+  //        ldr w0, [x2]
+  //        ubfx w1, w0, #16, #16
+  //        and w0, w0, #ffff
+  // 3) Find loads and stores that can be merged into a single load or store
   //    pair instruction.
   //      e.g.,
   //        ldr x0, [x2]
   //        ldr x1, [x2, #8]
   //        ; becomes
   //        ldp x0, x1, [x2]
-  // 2) Find base register updates that can be merged into the load or store
+  // 4) Find base register updates that can be merged into the load or store
   //    as a base-reg writeback.
   //      e.g.,
   //        ldr x0, [x2]
@@ -918,6 +1587,69 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       // Just move on to the next instruction.
       ++MBBI;
       break;
+    // Scaled instructions.
+    case AArch64::LDRBBui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRWui:
+    case AArch64::LDRXui:
+    // Unscaled instructions.
+    case AArch64::LDURBBi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi: {
+      if (tryToPromoteLoadFromStore(MBBI)) {
+        Modified = true;
+        break;
+      }
+      ++MBBI;
+      break;
+    }
+      // FIXME: Do the other instructions.
+    }
+  }
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       enableNarrowLdOpt && MBBI != E;) {
+    MachineInstr *MI = MBBI;
+    switch (MI->getOpcode()) {
+    default:
+      // Just move on to the next instruction.
+      ++MBBI;
+      break;
+    // Scaled instructions.
+    case AArch64::LDRBBui:
+    case AArch64::LDRHHui:
+    case AArch64::LDRSBWui:
+    case AArch64::LDRSHWui:
+    case AArch64::STRBBui:
+    case AArch64::STRHHui:
+    // Unscaled instructions.
+    case AArch64::LDURBBi:
+    case AArch64::LDURHHi:
+    case AArch64::LDURSBWi:
+    case AArch64::LDURSHWi:
+    case AArch64::STURBBi:
+    case AArch64::STURHHi: {
+      if (tryToMergeLdStInst(MBBI)) {
+        Modified = true;
+        break;
+      }
+      ++MBBI;
+      break;
+    }
+      // FIXME: Do the other instructions.
+    }
+  }
+
+  for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+       MBBI != E;) {
+    MachineInstr *MI = MBBI;
+    switch (MI->getOpcode()) {
+    default:
+      // Just move on to the next instruction.
+      ++MBBI;
+      break;
+    // Scaled instructions.
     case AArch64::STRSui:
     case AArch64::STRDui:
     case AArch64::STRQui:
@@ -929,7 +1661,7 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     case AArch64::LDRXui:
     case AArch64::LDRWui:
     case AArch64::LDRSWui:
-    // do the unscaled versions as well
+    // Unscaled instructions.
     case AArch64::STURSi:
     case AArch64::STURDi:
     case AArch64::STURQi:
@@ -941,37 +1673,8 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     case AArch64::LDURWi:
     case AArch64::LDURXi:
     case AArch64::LDURSWi: {
-      // If this is a volatile load/store, don't mess with it.
-      if (MI->hasOrderedMemoryRef()) {
-        ++MBBI;
-        break;
-      }
-      // Make sure this is a reg+imm (as opposed to an address reloc).
-      if (!MI->getOperand(2).isImm()) {
-        ++MBBI;
-        break;
-      }
-      // Check if this load/store has a hint to avoid pair formation.
-      // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
-      if (TII->isLdStPairSuppressed(MI)) {
-        ++MBBI;
-        break;
-      }
-      // Look ahead up to ScanLimit instructions for a pairable instruction.
-      bool MergeForward = false;
-      int SExtIdx = -1;
-      MachineBasicBlock::iterator Paired =
-          findMatchingInsn(MBBI, MergeForward, SExtIdx, ScanLimit);
-      if (Paired != E) {
-        // Merge the loads into a pair. Keeping the iterator straight is a
-        // pain, so we let the merge routine tell us what the next instruction
-        // is after it's done mucking about.
-        MBBI = mergePairedInsns(MBBI, Paired, MergeForward, SExtIdx);
-
+      if (tryToMergeLdStInst(MBBI)) {
         Modified = true;
-        ++NumPairCreated;
-        if (isUnscaledLdst(MI->getOpcode()))
-          ++NumUnscaledPairCreated;
         break;
       }
       ++MBBI;
@@ -992,17 +1695,22 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       // Just move on to the next instruction.
       ++MBBI;
       break;
+    // Scaled instructions.
     case AArch64::STRSui:
     case AArch64::STRDui:
     case AArch64::STRQui:
     case AArch64::STRXui:
     case AArch64::STRWui:
+    case AArch64::STRHHui:
+    case AArch64::STRBBui:
     case AArch64::LDRSui:
     case AArch64::LDRDui:
     case AArch64::LDRQui:
     case AArch64::LDRXui:
     case AArch64::LDRWui:
-    // do the unscaled versions as well
+    case AArch64::LDRHHui:
+    case AArch64::LDRBBui:
+    // Unscaled instructions.
     case AArch64::STURSi:
     case AArch64::STURDi:
     case AArch64::STURQi:
@@ -1012,25 +1720,41 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
     case AArch64::LDURDi:
     case AArch64::LDURQi:
     case AArch64::LDURWi:
-    case AArch64::LDURXi: {
+    case AArch64::LDURXi:
+    // Paired instructions.
+    case AArch64::LDPSi:
+    case AArch64::LDPSWi:
+    case AArch64::LDPDi:
+    case AArch64::LDPQi:
+    case AArch64::LDPWi:
+    case AArch64::LDPXi:
+    case AArch64::STPSi:
+    case AArch64::STPDi:
+    case AArch64::STPQi:
+    case AArch64::STPWi:
+    case AArch64::STPXi: {
       // Make sure this is a reg+imm (as opposed to an address reloc).
-      if (!MI->getOperand(2).isImm()) {
+      if (!getLdStOffsetOp(MI).isImm()) {
         ++MBBI;
         break;
       }
-      // Look ahead up to ScanLimit instructions for a mergable instruction.
+      // Look forward to try to form a post-index instruction. For example,
+      // ldr x0, [x20]
+      // add x20, x20, #32
+      //   merged into:
+      // ldr x0, [x20], #32
       MachineBasicBlock::iterator Update =
           findMatchingUpdateInsnForward(MBBI, ScanLimit, 0);
       if (Update != E) {
         // Merge the update into the ld/st.
-        MBBI = mergePostIdxUpdateInsn(MBBI, Update);
+        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
         Modified = true;
         ++NumPostFolded;
         break;
       }
       // Don't know how to handle pre/post-index versions, so move to the next
       // instruction.
-      if (isUnscaledLdst(Opc)) {
+      if (isUnscaledLdSt(Opc)) {
         ++MBBI;
         break;
       }
@@ -1043,28 +1767,25 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
       Update = findMatchingUpdateInsnBackward(MBBI, ScanLimit);
       if (Update != E) {
         // Merge the update into the ld/st.
-        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
         Modified = true;
         ++NumPreFolded;
         break;
       }
+      // The immediate in the load/store is scaled by the size of the memory
+      // operation. The immediate in the add we're looking for,
+      // however, is not, so adjust here.
+      int UnscaledOffset = getLdStOffsetOp(MI).getImm() * getMemScale(MI);
 
       // Look forward to try to find a post-index instruction. For example,
       // ldr x1, [x0, #64]
       // add x0, x0, #64
       //   merged into:
       // ldr x1, [x0, #64]!
-
-      // The immediate in the load/store is scaled by the size of the register
-      // being loaded. The immediate in the add we're looking for,
-      // however, is not, so adjust here.
-      int Value = MI->getOperand(2).getImm() *
-                  TII->getRegClass(MI->getDesc(), 0, TRI, *(MBB.getParent()))
-                      ->getSize();
-      Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, Value);
+      Update = findMatchingUpdateInsnForward(MBBI, ScanLimit, UnscaledOffset);
       if (Update != E) {
         // Merge the update into the ld/st.
-        MBBI = mergePreIdxUpdateInsn(MBBI, Update);
+        MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
         Modified = true;
         ++NumPreFolded;
         break;
@@ -1081,13 +1802,24 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
   return Modified;
 }
 
+bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
+  bool ProfitableArch = Subtarget->isCortexA57();
+  // FIXME: The benefit from converting narrow loads into a wider load could be
+  // microarchitectural as it assumes that a single load with two bitfield
+  // extracts is cheaper than two narrow loads. Currently, this conversion is
+  // enabled only in cortex-a57 on which performance benefits were verified.
+  return ProfitableArch && !Subtarget->requiresStrictAlign();
+}
+
 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TII = static_cast<const AArch64InstrInfo *>(Fn.getSubtarget().getInstrInfo());
-  TRI = Fn.getSubtarget().getRegisterInfo();
+  Subtarget = &static_cast<const AArch64Subtarget &>(Fn.getSubtarget());
+  TII = static_cast<const AArch64InstrInfo *>(Subtarget->getInstrInfo());
+  TRI = Subtarget->getRegisterInfo();
 
   bool Modified = false;
+  bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
   for (auto &MBB : Fn)
-    Modified |= optimizeBlock(MBB);
+    Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
 
   return Modified;
 }
@@ -1095,8 +1827,8 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
 // FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
 // loads and stores near one another?
 
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
+/// createAArch64LoadStoreOptimizationPass - returns an instance of the
+/// load / store optimization pass.
 FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
   return new AArch64LoadStoreOpt();
 }
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 580427ab3cc1..2b4cdf1083be 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -207,9 +207,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
 void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
   OutMI.setOpcode(MI->getOpcode());
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+  for (const MachineOperand &MO : MI->operands()) {
     MCOperand MCOp;
-    if (lowerOperand(MI->getOperand(i), MCOp))
+    if (lowerOperand(MO, MCOp))
       OutMI.addOperand(MCOp);
   }
 }
diff --git a/lib/Target/AArch64/AArch64MachineCombinerPattern.h b/lib/Target/AArch64/AArch64MachineCombinerPattern.h
deleted file mode 100644
index 4164b3364559..000000000000
--- a/lib/Target/AArch64/AArch64MachineCombinerPattern.h
+++ /dev/null
@@ -1,42 +0,0 @@
-//===- AArch64MachineCombinerPattern.h                                    -===//
-//===- AArch64 instruction pattern supported by combiner                  -===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines instruction pattern supported by combiner
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINECOMBINERPATTERN_H
-
-namespace llvm {
-
-/// Enumeration of instruction pattern supported by machine combiner
-///
-///
-namespace MachineCombinerPattern {
-enum MC_PATTERN : int {
-  MC_NONE = 0,
-  MC_MULADDW_OP1 = 1,
-  MC_MULADDW_OP2 = 2,
-  MC_MULSUBW_OP1 = 3,
-  MC_MULSUBW_OP2 = 4,
-  MC_MULADDWI_OP1 = 5,
-  MC_MULSUBWI_OP1 = 6,
-  MC_MULADDX_OP1 = 7,
-  MC_MULADDX_OP2 = 8,
-  MC_MULSUBX_OP1 = 9,
-  MC_MULSUBX_OP2 = 10,
-  MC_MULADDXI_OP1 = 11,
-  MC_MULSUBXI_OP1 = 12
-};
-} // end namespace MachineCombinerPattern
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 536a8d0f97a0..318f83953505 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=//
+//=- AArch64MachineFunctionInfo.h - AArch64 machine function info -*- C++ -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -42,7 +42,7 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
   unsigned ArgumentStackToRestore;
 
   /// HasStackFrame - True if this function has a stack frame. Set by
-  /// processFunctionBeforeCalleeSavedScan().
+  /// determineCalleeSaves().
   bool HasStackFrame;
 
   /// \brief Amount of stack frame size, not including callee-saved registers.
@@ -72,16 +72,22 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
   /// registers.
   unsigned VarArgsFPRSize;
 
+  /// True if this function has a subset of CSRs that is handled explicitly via
+  /// copies.
+  bool IsSplitCSR;
+
 public:
   AArch64FunctionInfo()
       : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
         NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
-        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
+        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+        IsSplitCSR(false) {}
 
   explicit AArch64FunctionInfo(MachineFunction &MF)
       : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
         NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
-        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
+        VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
+        IsSplitCSR(false) {
     (void)MF;
   }
 
@@ -96,6 +102,9 @@ public:
   bool hasStackFrame() const { return HasStackFrame; }
   void setHasStackFrame(bool s) { HasStackFrame = s; }
 
+  bool isSplitCSR() const { return IsSplitCSR; }
+  void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
   void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
   unsigned getLocalStackSize() const { return LocalStackSize; }
 
diff --git a/lib/Target/AArch64/AArch64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index e1b93bf07c89..79c09d9f058d 100644
--- a/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -489,7 +489,7 @@ bool AArch64PromoteConstant::insertDefinitions(
 
     for (const auto &IPI : InsertPts) {
       // Create the load of the global variable.
-      IRBuilder<> Builder(IPI.first->getParent(), IPI.first);
+      IRBuilder<> Builder(IPI.first);
       LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
       DEBUG(dbgs() << "**********\n");
       DEBUG(dbgs() << "New def: ");
@@ -540,7 +540,7 @@ bool AArch64PromoteConstant::runOnFunction(Function &F) {
   bool LocalChange = false;
   SmallPtrSet<Constant *, 8> AlreadyChecked;
 
-  for (Instruction &I : inst_range(&F)) {
+  for (Instruction &I : instructions(&F)) {
     // Traverse the operand, looking for constant vectors. Replace them by a
     // load of a global variable of constant vector type.
     for (Value *Op : I.operand_values()) {
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 841af55f7a65..32b4888f2f64 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -15,6 +15,7 @@
 #include "AArch64RegisterInfo.h"
 #include "AArch64FrameLowering.h"
 #include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
 #include "AArch64Subtarget.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "llvm/ADT/BitVector.h"
@@ -34,10 +35,6 @@ using namespace llvm;
 #define GET_REGINFO_TARGET_DESC
 #include "AArch64GenRegisterInfo.inc"
 
-static cl::opt<bool>
-ReserveX18("aarch64-reserve-x18", cl::Hidden,
-          cl::desc("Reserve X18, making it unavailable as GPR"));
-
 AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
     : AArch64GenRegisterInfo(AArch64::LR), TT(TT) {}
 
@@ -50,10 +47,23 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     return CSR_AArch64_NoRegs_SaveList;
   if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
     return CSR_AArch64_AllRegs_SaveList;
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS)
+    return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
+           CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
+           CSR_AArch64_CXX_TLS_Darwin_SaveList;
   else
     return CSR_AArch64_AAPCS_SaveList;
 }
 
+const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy(
+    const MachineFunction *MF) const {
+  assert(MF && "Invalid MachineFunction pointer.");
+  if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+      MF->getInfo<AArch64FunctionInfo>()->isSplitCSR())
+    return CSR_AArch64_CXX_TLS_Darwin_ViaCopy_SaveList;
+  return nullptr;
+}
+
 const uint32_t *
 AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                           CallingConv::ID CC) const {
@@ -62,6 +72,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
     return CSR_AArch64_NoRegs_RegMask;
   if (CC == CallingConv::AnyReg)
     return CSR_AArch64_AllRegs_RegMask;
+  if (CC == CallingConv::CXX_FAST_TLS)
+    return CSR_AArch64_CXX_TLS_Darwin_RegMask;
   else
     return CSR_AArch64_AAPCS_RegMask;
 }
@@ -104,7 +116,7 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(AArch64::W29);
   }
 
-  if (TT.isOSDarwin() || ReserveX18) {
+  if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
     Reserved.set(AArch64::X18); // Platform register
     Reserved.set(AArch64::W18);
   }
@@ -131,7 +143,7 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
     return true;
   case AArch64::X18:
   case AArch64::W18:
-    return TT.isOSDarwin() || ReserveX18;
+    return MF.getSubtarget<AArch64Subtarget>().isX18Reserved();
   case AArch64::FP:
   case AArch64::W29:
     return TFI->hasFP(MF) || TT.isOSDarwin();
@@ -186,29 +198,6 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return false;
 }
 
-bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
-
-  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
-    return false;
-
-  return true;
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool
-AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const AArch64FrameLowering *TFI = getFrameLowering(MF);
-  const Function *F = MF.getFunction();
-  unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment =
-      ((MFI->getMaxAlignment() > StackAlign) ||
-       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::StackAlignment));
-
-  return requiresRealignment && canRealignStack(MF);
-}
-
 unsigned
 AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
@@ -424,10 +413,11 @@ unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case AArch64::GPR64RegClassID:
   case AArch64::GPR32commonRegClassID:
   case AArch64::GPR64commonRegClassID:
-    return 32 - 1                                // XZR/SP
-           - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
-           - (TT.isOSDarwin() || ReserveX18) // X18 reserved as platform register
-           - hasBasePointer(MF);           // X19
+    return 32 - 1                                   // XZR/SP
+              - (TFI->hasFP(MF) || TT.isOSDarwin()) // FP
+              - MF.getSubtarget<AArch64Subtarget>()
+                    .isX18Reserved() // X18 reserved as platform register
+              - hasBasePointer(MF);  // X19
   case AArch64::FPR8RegClassID:
   case AArch64::FPR16RegClassID:
   case AArch64::FPR32RegClassID:
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 8c379d926108..f33f788fd437 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -35,6 +35,8 @@ public:
 
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+  const MCPhysReg *
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
 
@@ -93,9 +95,6 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
-  // Base pointer (stack realignment) support.
-  bool canRealignStack(const MachineFunction &MF) const;
-  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index b2efca023372..a8c8b176efa9 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64",
 // The lower 16 vector registers.  Some instructions can only take registers
 // in this range.
 def FPR128_lo : RegisterClass<"AArch64",
-                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16],
                               128, (trunc FPR128, 16)>;
 
 // Pairs, triples, and quads of 64-bit vector registers.
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 486efd6ce3a2..f6ee8cf47a6a 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -31,6 +31,11 @@ static cl::opt<bool>
 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
                      "converter pass"), cl::init(true), cl::Hidden);
 
+// If OS supports TBI, use this flag to enable it.
+static cl::opt<bool>
+UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
+                         "an address is ignored"), cl::init(false), cl::Hidden);
+
 AArch64Subtarget &
 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
   // Determine default and user-specified characteristics
@@ -46,9 +51,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
-      HasV8_1aOps(false), HasFPARMv8(false), HasNEON(false), HasCrypto(false),
-      HasCRC(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
-      IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
+      HasV8_1aOps(false), HasV8_2aOps(false), HasFPARMv8(false), HasNEON(false),
+      HasCrypto(false), HasCRC(false), HasPerfMon(false), HasFullFP16(false),
+      HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
+      StrictAlign(false), ReserveX18(TT.isOSDarwin()), IsLittle(LittleEndian),
+      CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
       TLInfo(TM, *this) {}
 
@@ -113,12 +120,30 @@ void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
   // bi-directional scheduling. 253.perlbmk.
   Policy.OnlyTopDown = false;
   Policy.OnlyBottomUp = false;
+  // Enabling or Disabling the latency heuristic is a close call: It seems to
+  // help nearly no benchmark on out-of-order architectures, on the other hand
+  // it regresses register pressure on a few benchmarking.
+  if (isCyclone())
+    Policy.DisableLatencyHeuristic = true;
 }
 
 bool AArch64Subtarget::enableEarlyIfConversion() const {
   return EnableEarlyIfConvert;
 }
 
+bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
+  if (!UseAddressTopByteIgnored)
+    return false;
+
+  if (TargetTriple.isiOS()) {
+    unsigned Major, Minor, Micro;
+    TargetTriple.getiOSVersion(Major, Minor, Micro);
+    return Major >= 8;
+  }
+
+  return false;
+}
+
 std::unique_ptr<PBQPRAConstraint>
 AArch64Subtarget::getCustomPBQPConstraints() const {
   if (!isCortexA57())
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 6bb069423060..1b8b9b27719c 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -33,17 +33,21 @@ class Triple;
 
 class AArch64Subtarget : public AArch64GenSubtargetInfo {
 protected:
-  enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone};
+  enum ARMProcFamilyEnum {Others, CortexA35, CortexA53, CortexA57, Cyclone};
 
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
   bool HasV8_1aOps;
+  bool HasV8_2aOps;
 
   bool HasFPARMv8;
   bool HasNEON;
   bool HasCrypto;
   bool HasCRC;
+  bool HasPerfMon;
+  bool HasFullFP16;
+  bool HasSPE;
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove;
@@ -51,6 +55,12 @@ protected:
   // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
   bool HasZeroCycleZeroing;
 
+  // StrictAlign - Disallow unaligned memory accesses.
+  bool StrictAlign;
+
+  // ReserveX18 - X18 is not available as a general purpose register.
+  bool ReserveX18;
+
   bool IsLittle;
 
   /// CPUString - String name of used CPU.
@@ -92,19 +102,30 @@ public:
   const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override { return true; }
   bool enablePostRAScheduler() const override {
-    return isCortexA53() || isCortexA57();
+    return isGeneric() || isCortexA53() || isCortexA57();
   }
 
   bool hasV8_1aOps() const { return HasV8_1aOps; }
+  bool hasV8_2aOps() const { return HasV8_2aOps; }
 
   bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
 
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
 
+  bool requiresStrictAlign() const { return StrictAlign; }
+
+  bool isX18Reserved() const { return ReserveX18; }
   bool hasFPARMv8() const { return HasFPARMv8; }
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
+  /// CPU has TBI (top byte of addresses is ignored during HW address
+  /// translation) and OS enables it.
+  bool supportsAddressTopByteIgnored() const;
+
+  bool hasPerfMon() const { return HasPerfMon; }
+  bool hasFullFP16() const { return HasFullFP16; }
+  bool hasSPE() const { return HasSPE; }
 
   bool isLittleEndian() const { return IsLittle; }
 
@@ -112,11 +133,13 @@ public:
   bool isTargetIOS() const { return TargetTriple.isiOS(); }
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
   bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
+  bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 
   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
+  bool isGeneric() const { return CPUString == "generic"; }
   bool isCyclone() const { return CPUString == "cyclone"; }
   bool isCortexA57() const { return CPUString == "cortex-a57"; }
   bool isCortexA53() const { return CPUString == "cortex-a53"; }
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index db6e244337a7..c52c5544fc7e 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -203,7 +203,7 @@ public:
 } // namespace
 
 TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(AArch64TTIImpl(this, F));
   });
 }
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e085cca35f1c..9af0e6444789 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
-unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
+int AArch64TTIImpl::getIntImmCost(int64_t Val) {
   // Check if the immediate can be encoded within an instruction.
   if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
     return 0;
@@ -37,7 +37,7 @@ unsigned AArch64TTIImpl::getIntImmCost(int64_t Val) {
 }
 
 /// \brief Calculate the cost of materializing the given constant.
-unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -51,18 +51,18 @@ unsigned AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 
   // Split the constant into 64-bit chunks and calculate the cost for each
   // chunk.
-  unsigned Cost = 0;
+  int Cost = 0;
   for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
     APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
     int64_t Val = Tmp.getSExtValue();
     Cost += getIntImmCost(Val);
   }
   // We need at least one instruction to materialze the constant.
-  return std::max(1U, Cost);
+  return std::max(1, Cost);
 }
 
-unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                  const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -118,17 +118,17 @@ unsigned AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   }
 
   if (Idx == ImmIdx) {
-    unsigned NumConstants = (BitSize + 63) / 64;
-    unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+    int NumConstants = (BitSize + 63) / 64;
+    int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
     return (Cost <= NumConstants * TTI::TCC_Basic)
-               ? static_cast<unsigned>(TTI::TCC_Free)
+               ? static_cast<int>(TTI::TCC_Free)
                : Cost;
   }
   return AArch64TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) {
+int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                  const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -147,10 +147,10 @@ unsigned AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   case Intrinsic::smul_with_overflow:
   case Intrinsic::umul_with_overflow:
     if (Idx == 1) {
-      unsigned NumConstants = (BitSize + 63) / 64;
-      unsigned Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
+      int NumConstants = (BitSize + 63) / 64;
+      int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
       return (Cost <= NumConstants * TTI::TCC_Basic)
-                 ? static_cast<unsigned>(TTI::TCC_Free)
+                 ? static_cast<int>(TTI::TCC_Free)
                  : Cost;
     }
     break;
@@ -176,8 +176,7 @@ AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
   return TTI::PSK_Software;
 }
 
-unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
-                                          Type *Src) {
+int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
@@ -187,7 +186,31 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
   if (!SrcTy.isSimple() || !DstTy.isSimple())
     return BaseT::getCastInstrCost(Opcode, Dst, Src);
 
-  static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
+  static const TypeConversionCostTblEntry
+  ConversionTbl[] = {
+    { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32,  1 },
+    { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64,  0 },
+    { ISD::TRUNCATE, MVT::v8i8,  MVT::v8i32,  3 },
+    { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
+
+    // The number of shll instructions for the extension.
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
+    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i16, 3 },
+    { ISD::SIGN_EXTEND, MVT::v4i64,  MVT::v4i32, 2 },
+    { ISD::ZERO_EXTEND, MVT::v4i64,  MVT::v4i32, 2 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,  3 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,  3 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16, 2 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16, 2 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i8,  7 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i8,  7 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16, 6 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16, 6 },
+    { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+    { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
+
     // LowerVectorINT_TO_FP:
     { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
     { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
@@ -210,6 +233,16 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8,  3 },
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
 
+    // Complex: to v8f32
+    { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8,  10 },
+    { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+    { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8,  10 },
+    { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
+
+    // Complex: to v16f32
+    { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+    { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
+
     // Complex: to v2f64
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8,  4 },
     { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
@@ -250,22 +283,21 @@ unsigned AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::FP_TO_UINT, MVT::v2i8,  MVT::v2f64, 2 },
   };
 
-  int Idx = ConvertCostTableLookup<MVT>(
-      ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
-      SrcTy.getSimpleVT());
-  if (Idx != -1)
-    return ConversionTbl[Idx].Cost;
+  if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
+                                                 DstTy.getSimpleVT(),
+                                                 SrcTy.getSimpleVT()))
+    return Entry->Cost;
 
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                            unsigned Index) {
+int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
+                                       unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   if (Index != -1U) {
     // Legalize the type.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
 
     // This type is legalized to a scalar type.
     if (!LT.second.isVector())
@@ -281,15 +313,15 @@ unsigned AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   }
 
   // All other insert/extracts cost this much.
-  return 2;
+  return 3;
 }
 
-unsigned AArch64TTIImpl::getArithmeticInstrCost(
+int AArch64TTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
@@ -300,10 +332,9 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
     // normally expanded to the sequence ADD + CMP + SELECT + SRA.
     // The OperandValue properties many not be same as that of previous
     // operation; conservatively assume OP_None.
-    unsigned Cost =
-      getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
-                             TargetTransformInfo::OP_None,
-                             TargetTransformInfo::OP_None);
+    int Cost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
+                                      TargetTransformInfo::OP_None,
+                                      TargetTransformInfo::OP_None);
     Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
@@ -331,7 +362,7 @@ unsigned AArch64TTIImpl::getArithmeticInstrCost(
   }
 }
 
-unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -346,19 +377,20 @@ unsigned AArch64TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   return 1;
 }
 
-unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                            Type *CondTy) {
+int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+                                       Type *CondTy) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
-  // We don't lower vector selects well that are wider than the register width.
+  // We don't lower some vector selects well that are wider than the register
+  // width.
   if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
     // We would need this many instructions to hide the scalarization happening.
-    const unsigned AmortizationCost = 20;
-    static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+    const int AmortizationCost = 20;
+    static const TypeConversionCostTblEntry
     VectorSelectTbl[] = {
-      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
+      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
+      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
       { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
@@ -367,20 +399,18 @@ unsigned AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     EVT SelCondTy = TLI->getValueType(DL, CondTy);
     EVT SelValTy = TLI->getValueType(DL, ValTy);
     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
-      int Idx =
-          ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
-                                 SelValTy.getSimpleVT());
-      if (Idx != -1)
-        return VectorSelectTbl[Idx].Cost;
+      if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
+                                                     SelCondTy.getSimpleVT(),
+                                                     SelValTy.getSimpleVT()))
+        return Entry->Cost;
     }
   }
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                         unsigned Alignment,
-                                         unsigned AddressSpace) {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                    unsigned Alignment, unsigned AddressSpace) {
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
 
   if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
       Src->getVectorElementType()->isIntegerTy(64)) {
@@ -389,7 +419,7 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
     // practice on inlined memcpy code.
     // We make v2i64 stores expensive so that we will only vectorize if there
     // are 6 other instructions getting vectorized.
-    unsigned AmortizationCost = 6;
+    int AmortizationCost = 6;
 
     return LT.first * 2 * AmortizationCost;
   }
@@ -407,16 +437,18 @@ unsigned AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   return LT.first;
 }
 
-unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
-    unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
-    unsigned Alignment, unsigned AddressSpace) {
+int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                               unsigned Factor,
+                                               ArrayRef<unsigned> Indices,
+                                               unsigned Alignment,
+                                               unsigned AddressSpace) {
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
   if (Factor <= TLI->getMaxSupportedInterleaveFactor()) {
     unsigned NumElts = VecTy->getVectorNumElements();
     Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
-    unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+    unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
 
     // ldN/stN only support legal vector types of size 64 or 128 in bits.
     if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
@@ -427,8 +459,8 @@ unsigned AArch64TTIImpl::getInterleavedMemoryOpCost(
                                            Alignment, AddressSpace);
 }
 
-unsigned AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
-  unsigned Cost = 0;
+int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
+  int Cost = 0;
   for (auto *I : Tys) {
     if (!I->isVectorTy())
       continue;
@@ -506,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::aarch64_neon_ld4:
     Info.ReadMem = true;
     Info.WriteMem = false;
-    Info.Vol = false;
+    Info.IsSimple = true;
     Info.NumMemRefs = 1;
     Info.PtrVal = Inst->getArgOperand(0);
     break;
@@ -515,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
   case Intrinsic::aarch64_neon_st4:
     Info.ReadMem = false;
     Info.WriteMem = true;
-    Info.Vol = false;
+    Info.IsSimple = true;
     Info.NumMemRefs = 1;
     Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
     break;
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 444d3ccc15e1..ec58c4fe309f 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -48,7 +48,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   };
 
 public:
-  explicit AArch64TTIImpl(const AArch64TargetMachine *TM, Function &F)
+  explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -63,12 +63,11 @@ public:
   /// @{
 
   using BaseT::getIntImmCost;
-  unsigned getIntImmCost(int64_t Val);
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
+  int getIntImmCost(int64_t Val);
+  int getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty);
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
 
   /// @}
@@ -76,6 +75,8 @@ public:
   /// \name Vector TTI Implementations
   /// @{
 
+  bool enableInterleavedAccessVectorization() { return true; }
+
   unsigned getNumberOfRegisters(bool Vector) {
     if (Vector) {
       if (ST->hasNEON())
@@ -96,25 +97,25 @@ public:
 
   unsigned getMaxInterleaveFactor(unsigned VF);
 
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
 
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
-  unsigned getArithmeticInstrCost(
+  int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
 
-  unsigned getAddressComputationCost(Type *Ty, bool IsComplex);
+  int getAddressComputationCost(Type *Ty, bool IsComplex);
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
 
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace);
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace);
 
-  unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
+  int getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys);
 
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
 
@@ -123,11 +124,9 @@ public:
 
   bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
 
-  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                      unsigned Factor,
-                                      ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace);
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+                                 ArrayRef<unsigned> Indices, unsigned Alignment,
+                                 unsigned AddressSpace);
   /// @}
 };
 
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 38e8b4d9a938..394c8e78581f 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -43,7 +43,6 @@ class AArch64Operand;
 class AArch64AsmParser : public MCTargetAsmParser {
 private:
   StringRef Mnemonic; ///< Instruction mnemonic.
-  MCSubtargetInfo &STI;
 
   // Map of register aliases registers via the .req directive.
   StringMap<std::pair<bool, unsigned> > RegisterReqs;
@@ -101,6 +100,7 @@ private:
   OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
   OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
   OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
+  OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
   OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
   OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
   OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
@@ -115,16 +115,16 @@ public:
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "AArch64GenAsmMatcher.inc"
   };
-  AArch64AsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+  AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                    const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(STI) {
+    : MCTargetAsmParser(Options, STI) {
     MCAsmParserExtension::Initialize(Parser);
     MCStreamer &S = getParser().getStreamer();
     if (S.getTargetStreamer() == nullptr)
       new AArch64TargetStreamer(S);
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
   }
 
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -160,7 +160,8 @@ private:
     k_Prefetch,
     k_ShiftExtend,
     k_FPImm,
-    k_Barrier
+    k_Barrier,
+    k_PSBHint,
   } Kind;
 
   SMLoc StartLoc, EndLoc;
@@ -228,6 +229,12 @@ private:
     unsigned Length;
   };
 
+  struct PSBHintOp {
+    unsigned Val;
+    const char *Data;
+    unsigned Length;
+  };
+
   struct ShiftExtendOp {
     AArch64_AM::ShiftExtendType Type;
     unsigned Amount;
@@ -251,6 +258,7 @@ private:
     struct SysRegOp SysReg;
     struct SysCRImmOp SysCRImm;
     struct PrefetchOp Prefetch;
+    struct PSBHintOp PSBHint;
     struct ShiftExtendOp ShiftExtend;
   };
 
@@ -302,6 +310,9 @@ public:
     case k_Prefetch:
       Prefetch = o.Prefetch;
       break;
+    case k_PSBHint:
+      PSBHint = o.PSBHint;
+      break;
     case k_ShiftExtend:
       ShiftExtend = o.ShiftExtend;
       break;
@@ -393,6 +404,16 @@ public:
     return Prefetch.Val;
   }
 
+  unsigned getPSBHint() const {
+    assert(Kind == k_PSBHint && "Invalid access!");
+    return PSBHint.Val;
+  }
+
+  StringRef getPSBHintName() const {
+    assert(Kind == k_PSBHint && "Invalid access!");
+    return StringRef(PSBHint.Data, PSBHint.Length);
+  }
+
   StringRef getPrefetchName() const {
     assert(Kind == k_Prefetch && "Invalid access!");
     return StringRef(Prefetch.Data, Prefetch.Length);
@@ -497,6 +518,15 @@ public:
     return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
   }
 
+  bool isImm0_1() const {
+    if (!isImm())
+      return false;
+    const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+    if (!MCE)
+      return false;
+    int64_t Val = MCE->getValue();
+    return (Val >= 0 && Val < 2);
+  }
   bool isImm0_7() const {
     if (!isImm())
       return false;
@@ -876,12 +906,15 @@ public:
   }
   bool isMSRSystemRegister() const {
     if (!isSysReg()) return false;
-
     return SysReg.MSRReg != -1U;
   }
-  bool isSystemPStateField() const {
+  bool isSystemPStateFieldWithImm0_1() const {
     if (!isSysReg()) return false;
-
+    return (SysReg.PStateField == AArch64PState::PAN ||
+            SysReg.PStateField == AArch64PState::UAO);
+  }
+  bool isSystemPStateFieldWithImm0_15() const {
+    if (!isSysReg() || isSystemPStateFieldWithImm0_1()) return false;
     return SysReg.PStateField != -1U;
   }
   bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
@@ -950,6 +983,7 @@ public:
   }
   bool isSysCR() const { return Kind == k_SysCR; }
   bool isPrefetch() const { return Kind == k_Prefetch; }
+  bool isPSBHint() const { return Kind == k_PSBHint; }
   bool isShiftExtend() const { return Kind == k_ShiftExtend; }
   bool isShifter() const {
     if (!isShiftExtend())
@@ -1175,8 +1209,10 @@ public:
   template <unsigned NumRegs>
   void addVectorList64Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { AArch64::D0,       AArch64::D0_D1,
-                                    AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 };
+    static const unsigned FirstRegs[] = { AArch64::D0,
+                                          AArch64::D0_D1,
+                                          AArch64::D0_D1_D2,
+                                          AArch64::D0_D1_D2_D3 };
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
@@ -1186,8 +1222,10 @@ public:
   template <unsigned NumRegs>
   void addVectorList128Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
-    static unsigned FirstRegs[] = { AArch64::Q0,       AArch64::Q0_Q1,
-                                    AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 };
+    static const unsigned FirstRegs[] = { AArch64::Q0,
+                                          AArch64::Q0_Q1,
+                                          AArch64::Q0_Q1_Q2,
+                                          AArch64::Q0_Q1_Q2_Q3 };
     unsigned FirstReg = FirstRegs[NumRegs - 1];
 
     Inst.addOperand(
@@ -1304,6 +1342,12 @@ public:
     Inst.addOperand(MCOperand::createImm(MCE->getValue() / 16));
   }
 
+  void addImm0_1Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(MCE->getValue()));
+  }
+
   void addImm0_7Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm());
@@ -1491,7 +1535,13 @@ public:
     Inst.addOperand(MCOperand::createImm(SysReg.MSRReg));
   }
 
-  void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
+  void addSystemPStateFieldWithImm0_1Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
+  }
+
+  void addSystemPStateFieldWithImm0_15Operands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
 
     Inst.addOperand(MCOperand::createImm(SysReg.PStateField));
@@ -1507,6 +1557,11 @@ public:
     Inst.addOperand(MCOperand::createImm(getPrefetch()));
   }
 
+  void addPSBHintOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createImm(getPSBHint()));
+  }
+
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     unsigned Imm =
@@ -1703,6 +1758,19 @@ public:
     return Op;
   }
 
+  static std::unique_ptr<AArch64Operand> CreatePSBHint(unsigned Val,
+                                                       StringRef Str,
+                                                       SMLoc S,
+                                                       MCContext &Ctx) {
+    auto Op = make_unique<AArch64Operand>(k_PSBHint, Ctx);
+    Op->PSBHint.Val = Val;
+    Op->PSBHint.Data = Str.data();
+    Op->PSBHint.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
   static std::unique_ptr<AArch64Operand>
   CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val,
                     bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) {
@@ -1776,6 +1844,10 @@ void AArch64Operand::print(raw_ostream &OS) const {
       OS << "<prfop invalid #" << getPrefetch() << ">";
     break;
   }
+  case k_PSBHint: {
+    OS << getPSBHintName();
+    break;
+  }
   case k_ShiftExtend: {
     OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
        << getShiftExtendAmount();
@@ -1849,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) {
       .Case(".h", true)
       .Case(".s", true)
       .Case(".d", true)
+      // Needed for fp16 scalar pairwise reductions
+      .Case(".2h", true)
       .Default(false);
 }
 
@@ -2016,7 +2090,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
     bool Valid;
     auto Mapper = AArch64PRFM::PRFMMapper();
     StringRef Name = 
-        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+        Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
     Operands.push_back(AArch64Operand::CreatePrefetch(prfop, Name,
                                                       S, getContext()));
     return MatchOperand_Success;
@@ -2030,7 +2104,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   bool Valid;
   auto Mapper = AArch64PRFM::PRFMMapper();
   unsigned prfop = 
-      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+      Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
   if (!Valid) {
     TokError("pre-fetch hint expected");
     return MatchOperand_ParseFail;
@@ -2042,6 +2116,32 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
+/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  SMLoc S = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier)) {
+    TokError("invalid operand for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  bool Valid;
+  auto Mapper = AArch64PSBHint::PSBHintMapper();
+  unsigned psbhint =
+      Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
+  if (!Valid) {
+    TokError("invalid operand for instruction");
+    return MatchOperand_ParseFail;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(AArch64Operand::CreatePSBHint(psbhint, Tok.getString(),
+                                                   S, getContext()));
+  return MatchOperand_Success;
+}
+
 /// tryParseAdrpLabel - Parse and validate a source label for the ADRP
 /// instruction.
 AArch64AsmParser::OperandMatchResultTy
@@ -2439,6 +2539,13 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     } else if (!Op.compare_lower("cisw")) {
       // SYS #0, C7, C14, #2
       SYS_ALIAS(0, 7, 14, 2);
+    } else if (!Op.compare_lower("cvap")) {
+      if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+        // SYS #3, C7, C12, #1
+        SYS_ALIAS(3, 7, 12, 1);
+      } else {
+        return TokError("DC CVAP requires ARMv8.2a");
+      }
     } else {
       return TokError("invalid operand for DC instruction");
     }
@@ -2479,6 +2586,20 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     } else if (!Op.compare_lower("s12e0w")) {
       // SYS #4, C7, C8, #7
       SYS_ALIAS(4, 7, 8, 7);
+    } else if (!Op.compare_lower("s1e1rp")) {
+      if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+        // SYS #0, C7, C9, #0
+        SYS_ALIAS(0, 7, 9, 0);
+      } else {
+        return TokError("AT S1E1RP requires ARMv8.2a");
+      }
+    } else if (!Op.compare_lower("s1e1wp")) {
+      if (getSTI().getFeatureBits()[AArch64::HasV8_2aOps]) {
+        // SYS #0, C7, C9, #1
+        SYS_ALIAS(0, 7, 9, 1);
+      } else {
+        return TokError("AT S1E1WP requires ARMv8.2a");
+      }
     } else {
       return TokError("invalid operand for AT instruction");
     }
@@ -2644,7 +2765,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
     bool Valid;
     auto Mapper = AArch64DB::DBarrierMapper();
     StringRef Name = 
-        Mapper.toString(MCE->getValue(), STI.getFeatureBits(), Valid);
+        Mapper.toString(MCE->getValue(), getSTI().getFeatureBits(), Valid);
     Operands.push_back( AArch64Operand::CreateBarrier(MCE->getValue(), Name,
                                                       ExprLoc, getContext()));
     return MatchOperand_Success;
@@ -2658,7 +2779,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   bool Valid;
   auto Mapper = AArch64DB::DBarrierMapper();
   unsigned Opt = 
-      Mapper.fromString(Tok.getString(), STI.getFeatureBits(), Valid);
+      Mapper.fromString(Tok.getString(), getSTI().getFeatureBits(), Valid);
   if (!Valid) {
     TokError("invalid barrier option name");
     return MatchOperand_ParseFail;
@@ -2687,20 +2808,21 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
 
   bool IsKnown;
   auto MRSMapper = AArch64SysReg::MRSMapper();
-  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(), STI.getFeatureBits(),
-                                         IsKnown);
+  uint32_t MRSReg = MRSMapper.fromString(Tok.getString(),
+                                         getSTI().getFeatureBits(), IsKnown);
   assert(IsKnown == (MRSReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
   auto MSRMapper = AArch64SysReg::MSRMapper();
-  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(), STI.getFeatureBits(),
-                                         IsKnown);
+  uint32_t MSRReg = MSRMapper.fromString(Tok.getString(),
+                                         getSTI().getFeatureBits(), IsKnown);
   assert(IsKnown == (MSRReg != -1U) &&
          "register should be -1 if and only if it's unknown");
 
   auto PStateMapper = AArch64PState::PStateMapper();
   uint32_t PStateField = 
-      PStateMapper.fromString(Tok.getString(), STI.getFeatureBits(), IsKnown);
+      PStateMapper.fromString(Tok.getString(),
+                              getSTI().getFeatureBits(), IsKnown);
   assert(IsKnown == (PStateField != -1U) &&
          "register should be -1 if and only if it's unknown");
 
@@ -3151,7 +3273,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
 
     if (Operands.size() < 2 ||
         !static_cast<AArch64Operand &>(*Operands[1]).isReg())
-      return true;
+      return Error(Loc, "Only valid when first operand is register");
 
     bool IsXReg =
         AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
@@ -3183,7 +3305,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     }
     // If it is a label or an imm that cannot fit in a movz, put it into CP.
     const MCExpr *CPLoc =
-        getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4);
+        getTargetStreamer().addConstantPoolEntry(SubExprVal, IsXReg ? 8 : 4, Loc);
     Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx));
     return false;
   }
@@ -3601,6 +3723,8 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
     return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
   case Match_InvalidMemoryIndexed16:
     return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+  case Match_InvalidImm0_1:
+    return Error(Loc, "immediate must be an integer in range [0, 1].");
   case Match_InvalidImm0_7:
     return Error(Loc, "immediate must be an integer in range [0, 7].");
   case Match_InvalidImm0_15:
@@ -3912,7 +4036,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]);
     if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) {
       unsigned zreg =
-          AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
+          !AArch64MCRegisterClasses[AArch64::FPR64RegClassID].contains(
               RegOp.getReg())
               ? AArch64::WZR
               : AArch64::XZR;
@@ -3929,10 +4053,27 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   // If that fails, try against the alternate table containing long-form NEON:
   // "fadd v0.2s, v1.2s, v2.2s"
-  if (MatchResult != Match_Success)
+  if (MatchResult != Match_Success) {
+    // But first, save the short-form match result: we can use it in case the
+    // long-form match also fails.
+    auto ShortFormNEONErrorInfo = ErrorInfo;
+    auto ShortFormNEONMatchResult = MatchResult;
+
     MatchResult =
         MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
 
+    // Now, both matches failed, and the long-form match failed on the mnemonic
+    // suffix token operand.  The short-form match failure is probably more
+    // relevant: use it instead.
+    if (MatchResult == Match_InvalidOperand && ErrorInfo == 1 &&
+        Operands.size() > 1 && ((AArch64Operand &)*Operands[1]).isToken() &&
+        ((AArch64Operand &)*Operands[1]).isTokenSuffix()) {
+      MatchResult = ShortFormNEONMatchResult;
+      ErrorInfo = ShortFormNEONErrorInfo;
+    }
+  }
+
+
   switch (MatchResult) {
   case Match_Success: {
     // Perform range checking and other semantic validations
@@ -3944,7 +4085,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       return true;
 
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
+    Out.EmitInstruction(Inst, getSTI());
     return false;
   }
   case Match_MissingFeature: {
@@ -3966,6 +4107,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return showMatchError(IDLoc, MatchResult);
   case Match_InvalidOperand: {
     SMLoc ErrorLoc = IDLoc;
+
     if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
@@ -4011,6 +4153,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_InvalidMemoryIndexed8SImm7:
   case Match_InvalidMemoryIndexed16SImm7:
   case Match_InvalidMemoryIndexedSImm9:
+  case Match_InvalidImm0_1:
   case Match_InvalidImm0_7:
   case Match_InvalidImm0_15:
   case Match_InvalidImm0_31:
@@ -4083,7 +4226,7 @@ bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
       if (getParser().parseExpression(Value))
         return true;
 
-      getParser().getStreamer().EmitValue(Value, Size);
+      getParser().getStreamer().EmitValue(Value, Size, L);
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
@@ -4155,7 +4298,7 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
   Inst.setOpcode(AArch64::TLSDESCCALL);
   Inst.addOperand(MCOperand::createExpr(Expr));
 
-  getParser().getStreamer().EmitInstruction(Inst, STI);
+  getParser().getStreamer().EmitInstruction(Inst, getSTI());
   return false;
 }
 
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index db9fb0e775df..f1f968e73123 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1516,6 +1516,10 @@ static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
 
   uint64_t pstate_field = (op1 << 3) | op2;
 
+  if ((pstate_field == AArch64PState::PAN  ||
+       pstate_field == AArch64PState::UAO) && crm > 1)
+    return Fail;
+
   Inst.addOperand(MCOperand::createImm(pstate_field));
   Inst.addOperand(MCOperand::createImm(crm));
 
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index 7f56c2cf6bb8..d8a810824370 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -55,7 +56,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   unsigned Opcode = MI->getOpcode();
 
   if (Opcode == AArch64::SYSxt)
-    if (printSysAlias(MI, O)) {
+    if (printSysAlias(MI, STI, O)) {
       printAnnotation(O, Annot);
       return;
     }
@@ -269,7 +270,7 @@ struct LdStNInstrDesc {
   int NaturalOffset;
 };
 
-static LdStNInstrDesc LdStNInstInfo[] = {
+static const LdStNInstrDesc LdStNInstInfo[] = {
   { AArch64::LD1i8,             "ld1",  ".b",     1, true,  0  },
   { AArch64::LD1i16,            "ld1",  ".h",     1, true,  0  },
   { AArch64::LD1i32,            "ld1",  ".s",     1, true,  0  },
@@ -612,7 +613,7 @@ static LdStNInstrDesc LdStNInstInfo[] = {
   { AArch64::ST4Fourv2s_POST,   "st4",  ".2s",    1, false, 32 },
 };
 
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
   unsigned Idx;
   for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
     if (LdStNInstInfo[Idx].Opcode == Opcode)
@@ -641,7 +642,7 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     return;
   }
 
-  if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+  if (const LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
     O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
 
     // Now onto the operands: first a vector list with possible lane
@@ -674,7 +675,9 @@ void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
   AArch64InstPrinter::printInst(MI, O, Annot, STI);
 }
 
-bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
 #ifndef NDEBUG
   unsigned Opcode = MI->getOpcode();
   assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
@@ -729,6 +732,11 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
       if (Op1Val == 3 && Op2Val == 1)
         Asm = "dc\tcvau";
       break;
+    case 12:
+      if (Op1Val == 3 && Op2Val == 1 &&
+          (STI.getFeatureBits()[AArch64::HasV8_2aOps]))
+        Asm = "dc\tcvap";
+      break;
     case 14:
       if (Op1Val == 3 && Op2Val == 1)
         Asm = "dc\tcivac";
@@ -773,6 +781,21 @@ bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
         break;
       }
       break;
+    case 9:
+      switch (Op1Val) {
+      default:
+        break;
+      case 0:
+        if (STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
+          switch (Op2Val) {
+          default:
+            break;
+          case 0: Asm = "at\ts1e1rp"; break;
+          case 1: Asm = "at\ts1e1wp"; break;
+          }
+        }
+        break;
+      }
     }
   } else if (CnVal == 8) {
     // TLBI aliases
@@ -1122,6 +1145,19 @@ void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
     O << '#' << prfop;
 }
 
+void AArch64InstPrinter::printPSBHintOp(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned psbhintop = MI->getOperand(OpNum).getImm();
+  bool Valid;
+  StringRef Name =
+      AArch64PSBHint::PSBHintMapper().toString(psbhintop, STI.getFeatureBits(), Valid);
+  if (Valid)
+    O << Name;
+  else
+    O << '#' << psbhintop;
+}
+
 void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
                                            const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 15dee978e229..ea68d9848b42 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -15,14 +15,10 @@
 #define LLVM_LIB_TARGET_AARCH64_INSTPRINTER_AARCH64INSTPRINTER_H
 
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
 
-class MCOperand;
-
 class AArch64InstPrinter : public MCInstPrinter {
 public:
   AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
@@ -48,7 +44,8 @@ public:
                                      unsigned AltIdx = AArch64::NoRegAltName);
 
 protected:
-  bool printSysAlias(const MCInst *MI, raw_ostream &O);
+  bool printSysAlias(const MCInst *MI, const MCSubtargetInfo &STI,
+                     raw_ostream &O);
   // Operand printers
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O);
@@ -122,6 +119,9 @@ protected:
   void printPrefetchOp(const MCInst *MI, unsigned OpNum,
                        const MCSubtargetInfo &STI, raw_ostream &O);
 
+  void printPSBHintOp(const MCInst *MI, unsigned OpNum,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
+
   void printFPImmOperand(const MCInst *MI, unsigned OpNum,
                          const MCSubtargetInfo &STI, raw_ostream &O);
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index ed24343a6f2a..648b1dfc8c5e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -364,6 +364,32 @@ static inline float getFPImmFloat(unsigned Imm) {
   return FPUnion.F;
 }
 
+/// getFP16Imm - Return an 8-bit floating-point version of the 16-bit
+/// floating-point value. If the value cannot be represented as an 8-bit
+/// floating-point value, then return -1.
+static inline int getFP16Imm(const APInt &Imm) {
+  uint32_t Sign = Imm.lshr(15).getZExtValue() & 1;
+  int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15;  // -14 to 15
+  int32_t Mantissa = Imm.getZExtValue() & 0x3ff;  // 10 bits
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0x3f)
+    return -1;
+  Mantissa >>= 6;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+static inline int getFP16Imm(const APFloat &FPImm) {
+  return getFP16Imm(FPImm.bitcastToAPInt());
+}
+
 /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit
 /// floating-point value. If the value cannot be represented as an 8-bit
 /// floating-point value, then return -1.
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 16d53569b231..d26604f5765d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -128,10 +128,9 @@ public:
   /// This is one of the functions used to emit data into an ELF section, so the
   /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
   /// if necessary.
-  void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc) override {
+  void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
     EmitDataMappingSymbol();
-    MCELFStreamer::EmitValueImpl(Value, Size);
+    MCELFStreamer::EmitValueImpl(Value, Size, Loc);
   }
 
 private:
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 921c4b94a729..fbce26e1d9a1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -48,10 +48,6 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
   UseDataRegionDirectives = true;
 
   ExceptionsType = ExceptionHandling::DwarfCFI;
-
-  // AArch64 Darwin doesn't have the baggage of X86/ARM, so it's fine to use
-  // LShr instead of AShr.
-  UseLogicalShr = true;
 }
 
 const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index 28703419514a..a540f49866a9 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -85,13 +85,13 @@ void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const {
   Streamer.visitUsedExpr(*getSubExpr());
 }
 
-MCSection *AArch64MCExpr::findAssociatedSection() const {
+MCFragment *AArch64MCExpr::findAssociatedFragment() const {
   llvm_unreachable("FIXME: what goes here?");
 }
 
 bool AArch64MCExpr::evaluateAsRelocatableImpl(MCValue &Res,
-                                            const MCAsmLayout *Layout,
-					    const MCFixup *Fixup) const {
+                                              const MCAsmLayout *Layout,
+                                              const MCFixup *Fixup) const {
   if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
     return false;
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index 1165314e4105..db36a65564ce 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -149,11 +149,10 @@ public:
 
   void visitUsedExpr(MCStreamer &Streamer) const override;
 
-  MCSection *findAssociatedSection() const override;
+  MCFragment *findAssociatedFragment() const override;
 
-  bool evaluateAsRelocatableImpl(MCValue &Res,
-                                 const MCAsmLayout *Layout,
-				 const MCFixup *Fixup) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
 
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
 
@@ -162,7 +161,6 @@ public:
   }
 
   static bool classof(const AArch64MCExpr *) { return true; }
-
 };
 } // end namespace llvm
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 741b273073e4..61c96f1d93c1 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -90,9 +90,11 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     Log2Size = llvm::Log2_32(4);
     // This encompasses the relocation for the whole 21-bit value.
     switch (Sym->getKind()) {
-    default:
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                  "ADR/ADRP relocations must be GOT relative");
+    default: {
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "ADR/ADRP relocations must be GOT relative");
+      return false;
+    }
     case MCSymbolRefExpr::VK_PAGE:
       RelocType = unsigned(MachO::ARM64_RELOC_PAGE21);
       return true;
@@ -170,25 +172,25 @@ void AArch64MachObjectWriter::recordRelocation(
   // assembler local symbols. If we got here, that's not what we have,
   // so complain loudly.
   if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                "conditional branch requires assembler-local"
-                                " label. '" +
-                                    Target.getSymA()->getSymbol().getName() +
-                                    "' is external.");
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "conditional branch requires assembler-local"
+                                 " label. '" +
+                                     Target.getSymA()->getSymbol().getName() +
+                                     "' is external.");
     return;
   }
 
   // 14-bit branch relocations should only target internal labels, and so
   // should never get here.
   if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                "Invalid relocation on conditional branch!");
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "Invalid relocation on conditional branch!");
     return;
   }
 
   if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
-                                  Asm)) {
-    Asm.getContext().reportFatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
+                                    Asm)) {
+    Asm.getContext().reportError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
     return;
   }
 
@@ -200,8 +202,9 @@ void AArch64MachObjectWriter::recordRelocation(
     Type = MachO::ARM64_RELOC_UNSIGNED;
 
     if (IsPCRel) {
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                  "PC relative absolute relocation!");
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "PC relative absolute relocation!");
+      return;
 
       // FIXME: x86_64 sets the type to a branch reloc here. Should we do
       // something similar?
@@ -229,16 +232,20 @@ void AArch64MachObjectWriter::recordRelocation(
       Writer->addRelocation(A_Base, Fragment->getParent(), MRE);
       return;
     } else if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-               Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+               Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
       // Otherwise, neither symbol can be modified.
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                  "unsupported relocation of modified symbol");
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "unsupported relocation of modified symbol");
+      return;
+    }
 
     // We don't support PCrel relocations of differences.
-    if (IsPCRel)
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                  "unsupported pc-relative relocation of "
-                                  "difference");
+    if (IsPCRel) {
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "unsupported pc-relative relocation of "
+                                   "difference");
+      return;
+    }
 
     // AArch64 always uses external relocations. If there is no symbol to use as
     // a base address (a local symbol with no preceding non-local symbol),
@@ -246,20 +253,26 @@ void AArch64MachObjectWriter::recordRelocation(
     //
     // FIXME: We should probably just synthesize an external symbol and use
     // that.
-    if (!A_Base)
-      Asm.getContext().reportFatalError(
+    if (!A_Base) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
           "unsupported relocation of local symbol '" + A->getName() +
               "'. Must have non-local symbol earlier in section.");
-    if (!B_Base)
-      Asm.getContext().reportFatalError(
+      return;
+    }
+    if (!B_Base) {
+      Asm.getContext().reportError(
           Fixup.getLoc(),
           "unsupported relocation of local symbol '" + B->getName() +
               "'. Must have non-local symbol earlier in section.");
+      return;
+    }
 
-    if (A_Base == B_Base && A_Base)
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                  "unsupported relocation with identical base");
+    if (A_Base == B_Base && A_Base) {
+      Asm.getContext().reportError(
+          Fixup.getLoc(), "unsupported relocation with identical base");
+      return;
+    }
 
     Value += (!A->getFragment() ? 0 : Writer->getSymbolAddress(*A, Layout)) -
              (!A_Base || !A_Base->getFragment() ? 0 : Writer->getSymbolAddress(
@@ -309,10 +322,12 @@ void AArch64MachObjectWriter::recordRelocation(
       // we need to preserve and merge with the new Target? How about
       // the FixedValue?
       if (!Symbol->getVariableValue()->evaluateAsRelocatable(Target, &Layout,
-                                                             &Fixup))
-        Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                    "unable to resolve variable '" +
-                                        Symbol->getName() + "'");
+                                                             &Fixup)) {
+        Asm.getContext().reportError(Fixup.getLoc(),
+                                     "unable to resolve variable '" +
+                                         Symbol->getName() + "'");
+        return;
+      }
       return recordRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
                               FixedValue);
     }
@@ -337,11 +352,13 @@ void AArch64MachObjectWriter::recordRelocation(
         Value +=
             Layout.getSymbolOffset(*Symbol) - Layout.getSymbolOffset(*Base);
     } else if (Symbol->isInSection()) {
-      if (!CanUseLocalRelocation)
-        Asm.getContext().reportFatalError(
+      if (!CanUseLocalRelocation) {
+        Asm.getContext().reportError(
             Fixup.getLoc(),
             "unsupported relocation of local symbol '" + Symbol->getName() +
                 "'. Must have non-local symbol earlier in section.");
+        return;
+      }
       // Adjust the relocation to be section-relative.
       // The index is the section ordinal (1-based).
       const MCSection &Sec = Symbol->getSection();
@@ -361,9 +378,10 @@ void AArch64MachObjectWriter::recordRelocation(
           return;
         }
       }
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+      Asm.getContext().reportError(Fixup.getLoc(),
                                   "unsupported relocation of variable '" +
                                       Symbol->getName() + "'");
+      return;
     }
   }
 
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
index 52b000d15b8d..3e86a42d5be6 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp
@@ -26,8 +26,9 @@ AArch64TargetStreamer::~AArch64TargetStreamer() {}
 // The constant pool handling is shared by all AArch64TargetStreamer
 // implementations.
 const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr,
-                                                          unsigned Size) {
-  return ConstantPools->addEntry(Streamer, Expr, Size);
+                                                          unsigned Size,
+                                                          SMLoc Loc) {
+  return ConstantPools->addEntry(Streamer, Expr, Size, Loc);
 }
 
 void AArch64TargetStreamer::emitCurrentConstantPool() {
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
index fcc0d053f6e2..51432830f795 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h
@@ -24,7 +24,7 @@ public:
   /// Callback used to implement the ldr= pseudo.
   /// Add a new entry to the constant pool for the current section and return an
   /// MCExpr that can be used to refer to the constant pool location.
-  const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size);
+  const MCExpr *addConstantPoolEntry(const MCExpr *, unsigned Size, SMLoc Loc);
 
   /// Callback used to implemnt the .ltorg directive.
   /// Emit contents of constant pool for the current section.
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index ee85b65bf39a..78f5289ec26d 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -146,11 +146,22 @@ const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStateMappings
 
   // v8.1a "Privileged Access Never" extension-specific PStates
   {"pan", PAN, {AArch64::HasV8_1aOps}},
+
+  // v8.2a
+  {"uao", UAO, {AArch64::HasV8_2aOps}},
 };
 
 AArch64PState::PStateMapper::PStateMapper()
   : AArch64NamedImmMapper(PStateMappings, 0) {}
 
+const AArch64NamedImmMapper::Mapping AArch64PSBHint::PSBHintMapper::PSBHintMappings[] = {
+  // v8.2a "Statistical Profiling" extension-specific PSB operand
+  {"csync", CSync, {AArch64::FeatureSPE}},
+};
+
+AArch64PSBHint::PSBHintMapper::PSBHintMapper()
+  : AArch64NamedImmMapper(PSBHintMappings, 0) {}
+
 const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
   {"mdccsr_el0", MDCCSR_EL0, {}},
   {"dbgdtrrx_el0", DBGDTRRX_EL0, {}},
@@ -192,6 +203,7 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSMappings[] = {
   {"id_aa64isar1_el1", ID_A64ISAR1_EL1, {}},
   {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1, {}},
   {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1, {}},
+  {"id_aa64mmfr2_el1", ID_A64MMFR2_EL1, {AArch64::HasV8_2aOps}},
   {"mvfr0_el1", MVFR0_EL1, {}},
   {"mvfr1_el1", MVFR1_EL1, {}},
   {"mvfr2_el1", MVFR2_EL1, {}},
@@ -275,9 +287,6 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRMappings[] = {
   {"icc_sgi1r_el1", ICC_SGI1R_EL1, {}},
   {"icc_asgi1r_el1", ICC_ASGI1R_EL1, {}},
   {"icc_sgi0r_el1", ICC_SGI0R_EL1, {}},
-
-  // v8.1a "Privileged Access Never" extension-specific system registers
-  {"pan", PAN, {AArch64::HasV8_1aOps}},
 };
 
 AArch64SysReg::MSRMapper::MSRMapper() {
@@ -804,6 +813,24 @@ const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegMappings
   {"cntv_cval_el02", CNTV_CVAL_EL02, {AArch64::HasV8_1aOps}},
   {"spsr_el12", SPSR_EL12, {AArch64::HasV8_1aOps}},
   {"elr_el12", ELR_EL12, {AArch64::HasV8_1aOps}},
+
+  // v8.2a registers
+  {"uao",           UAO,           {AArch64::HasV8_2aOps}},
+
+  // v8.2a "Statistical Profiling extension" registers
+  {"pmblimitr_el1", PMBLIMITR_EL1, {AArch64::FeatureSPE}},
+  {"pmbptr_el1",    PMBPTR_EL1,    {AArch64::FeatureSPE}},
+  {"pmbsr_el1",     PMBSR_EL1,     {AArch64::FeatureSPE}},
+  {"pmbidr_el1",    PMBIDR_EL1,    {AArch64::FeatureSPE}},
+  {"pmscr_el2",     PMSCR_EL2,     {AArch64::FeatureSPE}},
+  {"pmscr_el12",    PMSCR_EL12,    {AArch64::FeatureSPE}},
+  {"pmscr_el1",     PMSCR_EL1,     {AArch64::FeatureSPE}},
+  {"pmsicr_el1",    PMSICR_EL1,    {AArch64::FeatureSPE}},
+  {"pmsirr_el1",    PMSIRR_EL1,    {AArch64::FeatureSPE}},
+  {"pmsfcr_el1",    PMSFCR_EL1,    {AArch64::FeatureSPE}},
+  {"pmsevfr_el1",   PMSEVFR_EL1,   {AArch64::FeatureSPE}},
+  {"pmslatfr_el1",  PMSLATFR_EL1,  {AArch64::FeatureSPE}},
+  {"pmsidr_el1",    PMSIDR_EL1,    {AArch64::FeatureSPE}},
 };
 
 uint32_t
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 7e42f8e3601e..f649cb9b8a8d 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -337,7 +337,9 @@ namespace AArch64AT {
     S12E1R = 0x63c4, // 01  100  0111  1000  100
     S12E1W = 0x63c5, // 01  100  0111  1000  101
     S12E0R = 0x63c6, // 01  100  0111  1000  110
-    S12E0W = 0x63c7  // 01  100  0111  1000  111
+    S12E0W = 0x63c7, // 01  100  0111  1000  111
+    S1E1RP = 0x43c8, // 01  000  0111  1001  000
+    S1E1WP = 0x43c9  // 01  000  0111  1001  001
   };
 
   struct ATMapper : AArch64NamedImmMapper {
@@ -463,6 +465,9 @@ namespace AArch64PState {
 
     // v8.1a "Privileged Access Never" extension-specific PStates
     PAN = 0x04,
+
+    // v8.2a "User Access Override" extension-specific PStates
+    UAO = 0x03
   };
 
   struct PStateMapper : AArch64NamedImmMapper {
@@ -473,6 +478,21 @@ namespace AArch64PState {
 
 }
 
+namespace AArch64PSBHint {
+  enum PSBHintValues {
+    Invalid = -1,
+    // v8.2a "Statistical Profiling" extension-specific PSB operands
+    CSync = 0x11,  // psb csync = hint #0x11
+  };
+
+  struct PSBHintMapper : AArch64NamedImmMapper {
+    const static Mapping PSBHintMappings[];
+
+    PSBHintMapper();
+  };
+
+}
+
 namespace AArch64SE {
     enum ShiftExtSpecifiers {
         Invalid = -1,
@@ -594,6 +614,7 @@ namespace AArch64SysReg {
     ID_A64ISAR1_EL1   = 0xc031, // 11  000  0000  0110  001
     ID_A64MMFR0_EL1   = 0xc038, // 11  000  0000  0111  000
     ID_A64MMFR1_EL1   = 0xc039, // 11  000  0000  0111  001
+    ID_A64MMFR2_EL1   = 0xc03a, // 11  000  0000  0111  010
     MVFR0_EL1         = 0xc018, // 11  000  0000  0011  000
     MVFR1_EL1         = 0xc019, // 11  000  0000  0011  001
     MVFR2_EL1         = 0xc01a, // 11  000  0000  0011  010
@@ -1190,6 +1211,24 @@ namespace AArch64SysReg {
     SPSR_EL12         = 0xea00, // 11  101  0100  0000  000
     ELR_EL12          = 0xea01, // 11  101  0100  0000  001
 
+    // v8.2a registers
+    UAO               = 0xc214, // 11  000  0100  0010  100
+
+    // v8.2a "Statistical Profiling extension" registers
+    PMBLIMITR_EL1     = 0xc4d0, // 11  000  1001  1010  000
+    PMBPTR_EL1        = 0xc4d1, // 11  000  1001  1010  001
+    PMBSR_EL1         = 0xc4d3, // 11  000  1001  1010  011
+    PMBIDR_EL1        = 0xc4d7, // 11  000  1001  1010  111
+    PMSCR_EL2         = 0xe4c8, // 11  100  1001  1001  000
+    PMSCR_EL12        = 0xecc8, // 11  101  1001  1001  000
+    PMSCR_EL1         = 0xc4c8, // 11  000  1001  1001  000
+    PMSICR_EL1        = 0xc4ca, // 11  000  1001  1001  010
+    PMSIRR_EL1        = 0xc4cb, // 11  000  1001  1001  011
+    PMSFCR_EL1        = 0xc4cc, // 11  000  1001  1001  100
+    PMSEVFR_EL1       = 0xc4cd, // 11  000  1001  1001  101
+    PMSLATFR_EL1      = 0xc4ce, // 11  000  1001  1001  110
+    PMSIDR_EL1        = 0xc4cf, // 11  000  1001  1001  111
+
     // Cyclone specific system registers
     CPM_IOACC_CTL_EL3 = 0xff90,
   };
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 0a05d25189b0..8c3cb567fc7e 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -44,15 +44,21 @@ FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
 FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
-FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
+FunctionPass *createSIFixSGPRCopiesPass();
 FunctionPass *createSIFixSGPRLiveRangesPass();
 FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
 FunctionPass *createSIInsertWaits(TargetMachine &tm);
-FunctionPass *createSIPrepareScratchRegs();
+
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
+extern char &AMDGPUAnnotateKernelFeaturesID;
 
 void initializeSIFoldOperandsPass(PassRegistry &);
 extern char &SIFoldOperandsID;
 
+void initializeSIFixSGPRCopiesPass(PassRegistry &);
+extern char &SIFixSGPRCopiesID;
+
 void initializeSILowerI1CopiesPass(PassRegistry &);
 extern char &SILowerI1CopiesID;
 
@@ -64,6 +70,8 @@ FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST);
 Pass *createAMDGPUStructurizeCFGPass();
 FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
 ModulePass *createAMDGPUAlwaysInlinePass();
+ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
+FunctionPass *createAMDGPUAnnotateUniformValues();
 
 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
 extern char &SIFixControlFlowLiveIntervalsID;
@@ -71,6 +79,8 @@ extern char &SIFixControlFlowLiveIntervalsID;
 void initializeSIFixSGPRLiveRangesPass(PassRegistry&);
 extern char &SIFixSGPRLiveRangesID;
 
+void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
+extern char &AMDGPUAnnotateUniformValuesPassID;
 
 extern Target TheAMDGPUTarget;
 extern Target TheGCNTarget;
@@ -85,8 +95,6 @@ enum TargetIndex {
 };
 }
 
-#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel"
-
 } // End namespace llvm
 
 namespace ShaderType {
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index 68b50504ee44..d4af8d2e48d1 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -108,6 +108,11 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature <"unsafe-ds-offset-fol
         "true",
         "Force using DS instruction immediate offsets on SI">;
 
+def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
+        "FlatForGlobal",
+        "true",
+        "Force to generate flat instruction for global">;
+
 def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
         "FlatAddressSpace",
         "true",
@@ -272,9 +277,14 @@ def isSICI : Predicate<
   "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
 >, AssemblerPredicate<"FeatureGCN1Encoding">;
 
+def isVI : Predicate <
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
+  AssemblerPredicate<"FeatureGCN3Encoding">;
+
 class PredicateControl {
   Predicate SubtargetPredicate;
   Predicate SIAssemblerPredicate = isSICI;
+  Predicate VIAssemblerPredicate = isVI;
   list<Predicate> AssemblerPredicates = [];
   Predicate AssemblerPredicate = TruePredicate;
   list<Predicate> OtherPredicates = [];
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
new file mode 100644
index 000000000000..378183927242
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -0,0 +1,126 @@
+//===-- AMDGPUAnnotateKernelFeaturesPass.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This pass adds target attributes to functions which use intrinsics
+/// which will impact calling convention lowering.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-kernel-features"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateKernelFeatures : public ModulePass {
+private:
+  void addAttrToCallers(Function *Intrin, StringRef AttrName);
+  bool addAttrsForIntrinsics(Module &M, ArrayRef<StringRef[2]>);
+
+public:
+  static char ID;
+
+  AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+  bool runOnModule(Module &M) override;
+  const char *getPassName() const override {
+    return "AMDGPU Annotate Kernel Features";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    ModulePass::getAnalysisUsage(AU);
+  }
+};
+
+}
+
+char AMDGPUAnnotateKernelFeatures::ID = 0;
+
+char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
+
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+                      "Add AMDGPU function attributes", false, false)
+INITIALIZE_PASS_END(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
+                    "Add AMDGPU function attributes", false, false)
+
+
+void AMDGPUAnnotateKernelFeatures::addAttrToCallers(Function *Intrin,
+                                                    StringRef AttrName) {
+  SmallPtrSet<Function *, 4> SeenFuncs;
+
+  for (User *U : Intrin->users()) {
+    // CallInst is the only valid user for an intrinsic.
+    CallInst *CI = cast<CallInst>(U);
+
+    Function *CallingFunction = CI->getParent()->getParent();
+    if (SeenFuncs.insert(CallingFunction).second)
+      CallingFunction->addFnAttr(AttrName);
+  }
+}
+
+bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics(
+  Module &M,
+  ArrayRef<StringRef[2]> IntrinsicToAttr) {
+  bool Changed = false;
+
+  for (const StringRef *Arr  : IntrinsicToAttr) {
+    if (Function *Fn = M.getFunction(Arr[0])) {
+      addAttrToCallers(Fn, Arr[1]);
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
+  Triple TT(M.getTargetTriple());
+
+  static const StringRef IntrinsicToAttr[][2] = {
+    // .x omitted
+    { "llvm.r600.read.tgid.y", "amdgpu-work-group-id-y" },
+    { "llvm.r600.read.tgid.z", "amdgpu-work-group-id-z" },
+
+    // .x omitted
+    { "llvm.r600.read.tidig.y", "amdgpu-work-item-id-y" },
+    { "llvm.r600.read.tidig.z", "amdgpu-work-item-id-z" }
+
+  };
+
+  static const StringRef HSAIntrinsicToAttr[][2] = {
+    { "llvm.r600.read.local.size.x", "amdgpu-dispatch-ptr" },
+    { "llvm.r600.read.local.size.y", "amdgpu-dispatch-ptr" },
+    { "llvm.r600.read.local.size.z", "amdgpu-dispatch-ptr" },
+
+    { "llvm.r600.read.global.size.x", "amdgpu-dispatch-ptr" },
+    { "llvm.r600.read.global.size.y", "amdgpu-dispatch-ptr" },
+    { "llvm.r600.read.global.size.z", "amdgpu-dispatch-ptr" },
+    { "llvm.amdgcn.dispatch.ptr",     "amdgpu-dispatch-ptr" }
+  };
+
+  // TODO: Intrinsics that require queue ptr.
+
+  // We do not need to note the x workitem or workgroup id because they are
+  // always initialized.
+
+  bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
+  if (TT.getOS() == Triple::AMDHSA)
+    Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
+
+  return Changed;
+}
+
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
+  return new AMDGPUAnnotateKernelFeatures();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
new file mode 100644
index 000000000000..dfddc345f286
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -0,0 +1,84 @@
+//===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass adds amdgpu.uniform metadata to IR values so this information
+/// can be used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
+#include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "amdgpu-annotate-uniform"
+
+using namespace llvm;
+
+namespace {
+
+class AMDGPUAnnotateUniformValues : public FunctionPass,
+                       public InstVisitor<AMDGPUAnnotateUniformValues> {
+  DivergenceAnalysis *DA;
+
+public:
+  static char ID;
+  AMDGPUAnnotateUniformValues() :
+    FunctionPass(ID) { }
+  bool doInitialization(Module &M) override;
+  bool runOnFunction(Function &F) override;
+  const char *getPassName() const override { return "AMDGPU Annotate Uniform Values"; }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DivergenceAnalysis>();
+    AU.setPreservesAll();
+ }
+
+  void visitLoadInst(LoadInst &I);
+
+};
+
+} // End anonymous namespace
+
+INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+                      "Add AMDGPU uniform metadata", false, false)
+INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
+                    "Add AMDGPU uniform metadata", false, false)
+
+char AMDGPUAnnotateUniformValues::ID = 0;
+
+void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
+  Value *Ptr = I.getPointerOperand();
+  if (!DA->isUniform(Ptr))
+    return;
+
+  if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
+    PtrI->setMetadata("amdgpu.uniform", MDNode::get(I.getContext(), {}));
+
+}
+
+bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
+  return false;
+}
+
+bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
+  DA = &getAnalysis<DivergenceAnalysis>();
+  visit(F);
+
+  return true;
+}
+
+FunctionPass *
+llvm::createAMDGPUAnnotateUniformValues() {
+  return new AMDGPUAnnotateUniformValues();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 0a5309b16ee5..ba71dc05a8fc 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -100,14 +100,63 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   }
 }
 
-void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
+void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
+  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
+  if (MFI->isKernel() && STM.isAmdHsaOS()) {
+    AMDGPUTargetStreamer *TS =
+        static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+    TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
+                             ELF::STT_AMDGPU_HSA_KERNEL);
+  }
 
-  // This label is used to mark the end of the .text section.
-  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
-  OutStreamer->SwitchSection(TLOF.getTextSection());
-  MCSymbol *EndOfTextLabel =
-      OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-  OutStreamer->EmitLabel(EndOfTextLabel);
+  AsmPrinter::EmitFunctionEntryLabel();
+}
+
+static bool isModuleLinkage(const GlobalValue *GV) {
+  switch (GV->getLinkage()) {
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::CommonLinkage:
+   return true;
+  case GlobalValue::ExternalLinkage:
+   return false;
+  default: llvm_unreachable("unknown linkage type");
+  }
+}
+
+void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+
+  if (TM.getTargetTriple().getOS() != Triple::AMDHSA) {
+    AsmPrinter::EmitGlobalVariable(GV);
+    return;
+  }
+
+  if (GV->isDeclaration() || GV->getLinkage() == GlobalValue::PrivateLinkage) {
+    AsmPrinter::EmitGlobalVariable(GV);
+    return;
+  }
+
+  // Group segment variables aren't emitted in HSA.
+  if (AMDGPU::isGroupSegment(GV))
+    return;
+
+  AMDGPUTargetStreamer *TS =
+      static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+  if (isModuleLinkage(GV)) {
+    TS->EmitAMDGPUHsaModuleScopeGlobal(GV->getName());
+  } else {
+    TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
+  }
+
+  const DataLayout &DL = getDataLayout();
+  OutStreamer->PushSection();
+  OutStreamer->SwitchSection(
+      getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
+  MCSymbol *GVSym = getSymbol(GV);
+  const Constant *C = GV->getInitializer();
+  OutStreamer->EmitLabel(GVSym);
+  EmitGlobalConstant(DL, C);
+  OutStreamer->PopSection();
 }
 
 bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -125,8 +174,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const AMDGPUSubtarget &STM = MF.getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
   if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    getSIProgramInfo(KernelInfo, MF);
     if (!STM.isAmdHsaOS()) {
-      getSIProgramInfo(KernelInfo, MF);
       EmitProgramInfoSI(MF, KernelInfo);
     }
     // Emit directives
@@ -165,6 +214,23 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
                                   false);
       OutStreamer->emitRawComment(" ScratchSize: " + Twine(KernelInfo.ScratchSize),
                                   false);
+
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
+                                  Twine(G_00B84C_USER_SGPR(KernelInfo.ComputePGMRSrc2)),
+                                  false);
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+                                  Twine(G_00B84C_TGID_X_EN(KernelInfo.ComputePGMRSrc2)),
+                                  false);
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+                                  Twine(G_00B84C_TGID_Y_EN(KernelInfo.ComputePGMRSrc2)),
+                                  false);
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+                                  Twine(G_00B84C_TGID_Z_EN(KernelInfo.ComputePGMRSrc2)),
+                                  false);
+      OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+                                  Twine(G_00B84C_TIDIG_COMP_CNT(KernelInfo.ComputePGMRSrc2)),
+                                  false);
+
     } else {
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       OutStreamer->emitRawComment(
@@ -278,27 +344,30 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         unsigned width = 0;
         bool isSGPR = false;
 
-        if (!MO.isReg()) {
+        if (!MO.isReg())
           continue;
-        }
-        unsigned reg = MO.getReg();
-        if (reg == AMDGPU::VCC || reg == AMDGPU::VCC_LO ||
-	    reg == AMDGPU::VCC_HI) {
-          VCCUsed = true;
-          continue;
-        } else if (reg == AMDGPU::FLAT_SCR ||
-                   reg == AMDGPU::FLAT_SCR_LO ||
-                   reg == AMDGPU::FLAT_SCR_HI) {
-          FlatUsed = true;
-          continue;
-        }
 
+        unsigned reg = MO.getReg();
         switch (reg) {
-        default: break;
-        case AMDGPU::SCC:
         case AMDGPU::EXEC:
+        case AMDGPU::SCC:
         case AMDGPU::M0:
           continue;
+
+        case AMDGPU::VCC:
+        case AMDGPU::VCC_LO:
+        case AMDGPU::VCC_HI:
+          VCCUsed = true;
+          continue;
+
+        case AMDGPU::FLAT_SCR:
+        case AMDGPU::FLAT_SCR_LO:
+        case AMDGPU::FLAT_SCR_HI:
+          FlatUsed = true;
+          continue;
+
+        default:
+          break;
         }
 
         if (AMDGPU::SReg_32RegClass.contains(reg)) {
@@ -348,11 +417,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     }
   }
 
-  if (VCCUsed)
+  if (VCCUsed || FlatUsed)
     MaxSGPR += 2;
 
-  if (FlatUsed)
+  if (FlatUsed) {
     MaxSGPR += 2;
+    // 2 additional for VI+.
+    if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+      MaxSGPR += 2;
+  }
 
   // We found the maximum register index. They start at 0, so add one to get the
   // number of registers.
@@ -368,6 +441,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     ProgInfo.NumSGPR = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
   }
 
+  if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
+    LLVMContext &Ctx = MF.getFunction()->getContext();
+    Ctx.emitError("too many user SGPRs used");
+  }
+
   ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
   ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
@@ -419,18 +497,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       S_00B848_FLOAT_MODE(ProgInfo.FloatMode) |
       S_00B848_PRIV(ProgInfo.Priv) |
       S_00B848_DX10_CLAMP(ProgInfo.DX10Clamp) |
-      S_00B848_IEEE_MODE(ProgInfo.DebugMode) |
+      S_00B848_DEBUG_MODE(ProgInfo.DebugMode) |
       S_00B848_IEEE_MODE(ProgInfo.IEEEMode);
 
+  // 0 = X, 1 = XY, 2 = XYZ
+  unsigned TIDIGCompCnt = 0;
+  if (MFI->hasWorkItemIDZ())
+    TIDIGCompCnt = 2;
+  else if (MFI->hasWorkItemIDY())
+    TIDIGCompCnt = 1;
+
   ProgInfo.ComputePGMRSrc2 =
       S_00B84C_SCRATCH_EN(ProgInfo.ScratchBlocks > 0) |
-      S_00B84C_USER_SGPR(MFI->NumUserSGPRs) |
-      S_00B84C_TGID_X_EN(1) |
-      S_00B84C_TGID_Y_EN(1) |
-      S_00B84C_TGID_Z_EN(1) |
-      S_00B84C_TG_SIZE_EN(1) |
-      S_00B84C_TIDIG_COMP_CNT(2) |
-      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks);
+      S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
+      S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
+      S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
+      S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
+      S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
+      S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
+      S_00B84C_EXCP_EN_MSB(0) |
+      S_00B84C_LDS_SIZE(ProgInfo.LDSBlocks) |
+      S_00B84C_EXCP_EN(0);
 }
 
 static unsigned getRsrcReg(unsigned ShaderType) {
@@ -491,14 +578,53 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
   header.compute_pgm_resource_registers =
       KernelInfo.ComputePGMRSrc1 |
       (KernelInfo.ComputePGMRSrc2 << 32);
-  header.code_properties =
-      AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR |
-      AMD_CODE_PROPERTY_IS_PTR64;
+  header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+
+  if (MFI->hasPrivateSegmentBuffer()) {
+    header.code_properties |=
+      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
+  }
+
+  if (MFI->hasDispatchPtr())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
+
+  if (MFI->hasQueuePtr())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
+
+  if (MFI->hasKernargSegmentPtr())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;
+
+  if (MFI->hasDispatchID())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;
+
+  if (MFI->hasFlatScratchInit())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
+
+  // TODO: Private segment size
+
+  if (MFI->hasGridWorkgroupCountX()) {
+    header.code_properties |=
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X;
+  }
+
+  if (MFI->hasGridWorkgroupCountY()) {
+    header.code_properties |=
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y;
+  }
+
+  if (MFI->hasGridWorkgroupCountZ()) {
+    header.code_properties |=
+      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z;
+  }
+
+  if (MFI->hasDispatchPtr())
+    header.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
 
   header.kernarg_segment_byte_size = MFI->ABIArgOffset;
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
-
+  header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
+  header.workgroup_group_segment_byte_size = KernelInfo.LDSSize;
 
   AMDGPUTargetStreamer *TS =
       static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 345af9b85e15..817cbfc0c0eb 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -99,7 +99,9 @@ public:
 
   void EmitFunctionBodyStart() override;
 
-  void EmitEndOfAsmFile(Module &M) override;
+  void EmitFunctionEntryLabel() override;
+
+  void EmitGlobalVariable(const GlobalVariable *GV) override;
 
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
diff --git a/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
new file mode 100644
index 000000000000..2f6b3022dd6e
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.cpp
@@ -0,0 +1,26 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
+
+using namespace llvm;
+
+DiagnosticInfoUnsupported::DiagnosticInfoUnsupported(
+  const Function &Fn,
+  const Twine &Desc,
+  DiagnosticSeverity Severity)
+  : DiagnosticInfo(getKindID(), Severity),
+    Description(Desc),
+    Fn(Fn) { }
+
+int DiagnosticInfoUnsupported::KindID = 0;
+
+void DiagnosticInfoUnsupported::print(DiagnosticPrinter &DP) const {
+  DP << "unsupported " << getDescription() << " in " << Fn.getName();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
new file mode 100644
index 000000000000..0fd37e1ede6b
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUDiagnosticInfoUnsupported.h
@@ -0,0 +1,48 @@
+//===-- AMDGPUDiagnosticInfoUnsupported.h - Error reporting -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUDIAGNOSTICINFOUNSUPPORTED_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+
+namespace llvm {
+
+/// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+  const Twine &Description;
+  const Function &Fn;
+
+  static int KindID;
+
+  static int getKindID() {
+    if (KindID == 0)
+      KindID = llvm::getNextAvailablePluginDiagnosticKind();
+    return KindID;
+  }
+
+public:
+  DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
+                            DiagnosticSeverity Severity = DS_Error);
+
+  const Function &getFunction() const { return Fn; }
+  const Twine &getDescription() const { return Description; }
+
+  void print(DiagnosticPrinter &DP) const override;
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == getKindID();
+  }
+};
+
+}
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index 8175786fb9b1..4d84d281d998 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -71,9 +71,15 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
 }
 
 /// \returns The number of registers allocated for \p FI.
-int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                         int FI) const {
+int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                int FI,
+                                                unsigned &FrameReg) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+  // Fill in FrameReg output argument.
+  FrameReg = RI->getFrameRegister(MF);
+
   // Start the offset at 2 so we don't overwrite work group information.
   // XXX: We should only do this when the shader actually uses this
   // information.
diff --git a/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 9f31be1af794..257a3da40589 100644
--- a/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -8,14 +8,12 @@
 //===----------------------------------------------------------------------===//
 //
 /// \file
-/// \brief Interface to describe a layout of a stack frame on a AMDIL target
-/// machine.
+/// \brief Interface to describe a layout of a stack frame on an AMDGPU target.
 //
 //===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
-#define LLVM_LIB_TARGET_R600_AMDGPUFRAMELOWERING_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
 
-#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -34,7 +32,8 @@ public:
   /// \returns The number of 32-bit sub-registers that are used when storing
   /// values to the stack.
   unsigned getStackWidth(const MachineFunction &MF) const;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
   const SpillSlot *
     getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 64c54ccb31ff..b33040b4d06a 100644
--- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -11,6 +11,8 @@
 /// \brief Defines an instruction selector for the AMDGPU target.
 //
 //===----------------------------------------------------------------------===//
+
+#include "AMDGPUDiagnosticInfoUnsupported.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPURegisterInfo.h"
@@ -20,9 +22,9 @@
 #include "SIISelLowering.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Function.h"
@@ -40,12 +42,14 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
   // make the right decision when generating code for different targets.
   const AMDGPUSubtarget *Subtarget;
+
 public:
   AMDGPUDAGToDAGISel(TargetMachine &TM);
   virtual ~AMDGPUDAGToDAGISel();
   bool runOnMachineFunction(MachineFunction &MF) override;
   SDNode *Select(SDNode *N) override;
   const char *getPassName() const override;
+  void PreprocessISelDAG() override;
   void PostprocessISelDAG() override;
 
 private:
@@ -91,7 +95,7 @@ private:
   bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
   bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
                                  SDValue &Offset1) const;
-  void SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
+  bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
                    SDValue &SOffset, SDValue &Offset, SDValue &Offen,
                    SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
                    SDValue &TFE) const;
@@ -108,6 +112,16 @@ private:
                          SDValue &TFE) const;
   bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
                          SDValue &Offset, SDValue &GLC) const;
+  bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
+                        bool &Imm) const;
+  bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
+                  bool &Imm) const;
+  bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
+  bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
+  bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
+  bool SelectSMRDBufferSgpr(SDValue Addr, SDValue &Offset) const;
   SDNode *SelectAddrSpaceCast(SDNode *N);
   bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
   bool SelectVOP3NoMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
@@ -273,6 +287,23 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
   return N;
 }
 
+static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
+  switch (NumVectorElts) {
+  case 1:
+    return AMDGPU::SReg_32RegClassID;
+  case 2:
+    return AMDGPU::SReg_64RegClassID;
+  case 4:
+    return AMDGPU::SReg_128RegClassID;
+  case 8:
+    return AMDGPU::SReg_256RegClassID;
+  case 16:
+    return AMDGPU::SReg_512RegClassID;
+  }
+
+  llvm_unreachable("invalid vector size");
+}
+
 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   unsigned int Opc = N->getOpcode();
   if (N->isMachineOpcode()) {
@@ -306,38 +337,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     EVT EltVT = VT.getVectorElementType();
     assert(EltVT.bitsEq(MVT::i32));
     if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-      bool UseVReg = true;
-      for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
-                                                    U != E; ++U) {
-        if (!U->isMachineOpcode()) {
-          continue;
-        }
-        const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-        if (!RC) {
-          continue;
-        }
-        if (static_cast<const SIRegisterInfo *>(TRI)->isSGPRClass(RC)) {
-          UseVReg = false;
-        }
-      }
-      switch(NumVectorElts) {
-      case 1: RegClassID = UseVReg ? AMDGPU::VGPR_32RegClassID :
-                                     AMDGPU::SReg_32RegClassID;
-        break;
-      case 2: RegClassID = UseVReg ? AMDGPU::VReg_64RegClassID :
-                                     AMDGPU::SReg_64RegClassID;
-        break;
-      case 4: RegClassID = UseVReg ? AMDGPU::VReg_128RegClassID :
-                                     AMDGPU::SReg_128RegClassID;
-        break;
-      case 8: RegClassID = UseVReg ? AMDGPU::VReg_256RegClassID :
-                                     AMDGPU::SReg_256RegClassID;
-        break;
-      case 16: RegClassID = UseVReg ? AMDGPU::VReg_512RegClassID :
-                                      AMDGPU::SReg_512RegClassID;
-        break;
-      default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
-      }
+      RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
     } else {
       // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
       // that adds a 128 bits reg copy when going through TwoAddressInstructions
@@ -455,98 +455,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
     return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
                                   N->getValueType(0), Ops);
   }
-
-  case ISD::LOAD: {
-    LoadSDNode *LD = cast<LoadSDNode>(N);
-    SDLoc SL(N);
-    EVT VT = N->getValueType(0);
-
-    if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD) {
-      N = glueCopyToM0(N);
-      break;
-    }
-
-    // To simplify the TableGen patters, we replace all i64 loads with
-    // v2i32 loads.  Alternatively, we could promote i64 loads to v2i32
-    // during DAG legalization, however, so places (ExpandUnalignedLoad)
-    // in the DAG legalizer assume that if i64 is legal, so doing this
-    // promotion early can cause problems.
-
-    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SDLoc(N), LD->getChain(),
-                                      LD->getBasePtr(), LD->getMemOperand());
-    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
-                                      MVT::i64, NewLoad);
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLoad.getValue(1));
-    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), BitCast);
-    SDNode *Load = glueCopyToM0(NewLoad.getNode());
-    SelectCode(Load);
-    N = BitCast.getNode();
-    break;
-  }
-
+  case ISD::LOAD:
   case ISD::STORE: {
-    // Handle i64 stores here for the same reason mentioned above for loads.
-    StoreSDNode *ST = cast<StoreSDNode>(N);
-    SDValue Value = ST->getValue();
-    if (Value.getValueType() == MVT::i64 && !ST->isTruncatingStore()) {
-
-      SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(N),
-                                        MVT::v2i32, Value);
-      SDValue NewStore = CurDAG->getStore(ST->getChain(), SDLoc(N), NewValue,
-                                          ST->getBasePtr(), ST->getMemOperand());
-
-      CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewStore);
-
-      if (NewValue.getOpcode() == ISD::BITCAST) {
-        Select(NewStore.getNode());
-        return SelectCode(NewValue.getNode());
-      }
-
-      // getNode() may fold the bitcast if its input was another bitcast.  If that
-      // happens we should only select the new store.
-      N = NewStore.getNode();
-    }
-
     N = glueCopyToM0(N);
     break;
   }
 
-  case AMDGPUISD::REGISTER_LOAD: {
-    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
-      break;
-    SDValue Addr, Offset;
-
-    SDLoc DL(N);
-    SelectADDRIndirect(N->getOperand(1), Addr, Offset);
-    const SDValue Ops[] = {
-      Addr,
-      Offset,
-      CurDAG->getTargetConstant(0, DL, MVT::i32),
-      N->getOperand(0),
-    };
-    return CurDAG->getMachineNode(AMDGPU::SI_RegisterLoad, DL,
-                                  CurDAG->getVTList(MVT::i32, MVT::i64,
-                                                    MVT::Other),
-                                  Ops);
-  }
-  case AMDGPUISD::REGISTER_STORE: {
-    if (Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
-      break;
-    SDValue Addr, Offset;
-    SelectADDRIndirect(N->getOperand(2), Addr, Offset);
-    SDLoc DL(N);
-    const SDValue Ops[] = {
-      N->getOperand(1),
-      Addr,
-      Offset,
-      CurDAG->getTargetConstant(0, DL, MVT::i32),
-      N->getOperand(0),
-    };
-    return CurDAG->getMachineNode(AMDGPU::SI_RegisterStorePseudo, DL,
-                                        CurDAG->getVTList(MVT::Other),
-                                        Ops);
-  }
-
   case AMDGPUISD::BFE_I32:
   case AMDGPUISD::BFE_U32: {
     if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -575,7 +489,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
 
     return getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, SDLoc(N),
                     N->getOperand(0), OffsetVal, WidthVal);
-
   }
   case AMDGPUISD::DIV_SCALE: {
     return SelectDIV_SCALE(N);
@@ -601,7 +514,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
   return SelectCode(N);
 }
 
-
 bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
   assert(AS != 0 && "Use checkPrivateAddress instead.");
   if (!Ptr)
@@ -681,7 +593,7 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
   if (checkPrivateAddress(N->getMemOperand())) {
     if (MMO) {
       const PseudoSourceValue *PSV = MMO->getPseudoValue();
-      if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
+      if (PSV && PSV->isConstantPool()) {
         return true;
       }
     }
@@ -847,7 +759,8 @@ SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
   unsigned Opc
     = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
 
-  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+  // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp,
+  // omod
   SDValue Ops[8];
 
   SelectVOP3Mods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]);
@@ -883,15 +796,39 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
       Offset = N1;
       return true;
     }
-  }
+  } else if (Addr.getOpcode() == ISD::SUB) {
+    // sub C, x -> add (sub 0, x), C
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+      int64_t ByteOffset = C->getSExtValue();
+      if (isUInt<16>(ByteOffset)) {
+        SDLoc DL(Addr);
+        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
 
-  SDLoc DL(Addr);
+        // XXX - This is kind of hacky. Create a dummy sub node so we can check
+        // the known bits in isDSOffsetLegal. We need to emit the selected node
+        // here, so this is thrown away.
+        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+                                      Zero, Addr.getOperand(1));
+
+        if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
+          MachineSDNode *MachineSub
+            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+                                     Zero, Addr.getOperand(1));
+
+          Base = SDValue(MachineSub, 0);
+          Offset = Addr.getOperand(0);
+          return true;
+        }
+      }
+    }
+  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+    // If we have a constant address, prefer to put the constant into the
+    // offset. This can save moves to load the constant address since multiple
+    // operations can share the zero base address register, and enables merging
+    // into read2 / write2 instructions.
+
+    SDLoc DL(Addr);
 
-  // If we have a constant address, prefer to put the constant into the
-  // offset. This can save moves to load the constant address since multiple
-  // operations can share the zero base address register, and enables merging
-  // into read2 / write2 instructions.
-  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     if (isUInt<16>(CAddr->getZExtValue())) {
       SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
       MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
@@ -904,10 +841,11 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
 
   // default case
   Base = Addr;
-  Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+  Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
   return true;
 }
 
+// TODO: If offset is too big, put low 16-bit into offset.
 bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
                                                    SDValue &Offset0,
                                                    SDValue &Offset1) const {
@@ -926,9 +864,35 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
       Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
       return true;
     }
-  }
+  } else if (Addr.getOpcode() == ISD::SUB) {
+    // sub C, x -> add (sub 0, x), C
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
+      unsigned DWordOffset0 = C->getZExtValue() / 4;
+      unsigned DWordOffset1 = DWordOffset0 + 1;
 
-  if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
+      if (isUInt<8>(DWordOffset0)) {
+        SDLoc DL(Addr);
+        SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
+
+        // XXX - This is kind of hacky. Create a dummy sub node so we can check
+        // the known bits in isDSOffsetLegal. We need to emit the selected node
+        // here, so this is thrown away.
+        SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
+                                      Zero, Addr.getOperand(1));
+
+        if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
+          MachineSDNode *MachineSub
+            = CurDAG->getMachineNode(AMDGPU::V_SUB_I32_e32, DL, MVT::i32,
+                                     Zero, Addr.getOperand(1));
+
+          Base = SDValue(MachineSub, 0);
+          Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
+          Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
+          return true;
+        }
+      }
+    }
+  } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
     unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
     unsigned DWordOffset1 = DWordOffset0 + 1;
     assert(4 * DWordOffset0 == CAddr->getZExtValue());
@@ -956,12 +920,16 @@ static bool isLegalMUBUFImmOffset(const ConstantSDNode *Imm) {
   return isUInt<12>(Imm->getZExtValue());
 }
 
-void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
+bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
                                      SDValue &VAddr, SDValue &SOffset,
                                      SDValue &Offset, SDValue &Offen,
                                      SDValue &Idxen, SDValue &Addr64,
                                      SDValue &GLC, SDValue &SLC,
                                      SDValue &TFE) const {
+  // Subtarget prefers to use flat instruction
+  if (Subtarget->useFlatForGlobal())
+    return false;
+
   SDLoc DL(Addr);
 
   GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
@@ -994,14 +962,14 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
 
     if (isLegalMUBUFImmOffset(C1)) {
         Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
-        return;
+        return true;
     } else if (isUInt<32>(C1->getZExtValue())) {
       // Illegal offset, store it in soffset.
       Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
       SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
                    CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
                         0);
-      return;
+      return true;
     }
   }
 
@@ -1013,7 +981,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
     Ptr = N0;
     VAddr = N1;
     Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
-    return;
+    return true;
   }
 
   // default case -> offset
@@ -1021,6 +989,7 @@ void AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
   Ptr = Addr;
   Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
 
+  return true;
 }
 
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
@@ -1033,8 +1002,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
   if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
     return false;
 
-  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE);
+  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+              GLC, SLC, TFE))
+    return false;
 
   ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
   if (C->getSExtValue()) {
@@ -1052,8 +1022,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
 
 bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
                                            SDValue &VAddr, SDValue &SOffset,
-					   SDValue &Offset,
-					   SDValue &SLC) const {
+                                           SDValue &Offset,
+                                           SDValue &SLC) const {
   SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
   SDValue GLC, TFE;
 
@@ -1066,36 +1036,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
 
   SDLoc DL(Addr);
   MachineFunction &MF = CurDAG->getMachineFunction();
-  const SIRegisterInfo *TRI =
-      static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  const SITargetLowering& Lowering =
-    *static_cast<const SITargetLowering*>(getTargetLowering());
+  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
 
-  unsigned ScratchOffsetReg =
-      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-  Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
-                                ScratchOffsetReg, MVT::i32);
-  SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
-  SDValue ScratchRsrcDword0 =
-      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
-
-  SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
-  SDValue ScratchRsrcDword1 =
-      SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
-
-  const SDValue RsrcOps[] = {
-      CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
-      ScratchRsrcDword0,
-      CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
-      ScratchRsrcDword1,
-      CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32),
-  };
-  SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
-                                              MVT::v2i32, RsrcOps), 0);
-  Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
-  SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
-      MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
+  Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
+  SOffset = CurDAG->getRegister(Info->getScratchWaveOffsetReg(), MVT::i32);
 
   // (add n0, c1)
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -1126,8 +1070,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
   const SIInstrInfo *TII =
     static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
-  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
-              GLC, SLC, TFE);
+  if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
+              GLC, SLC, TFE))
+    return false;
 
   if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
       !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
@@ -1153,18 +1098,134 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
   return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
 }
 
+///
+/// \param EncodedOffset This is the immediate value that will be encoded
+///        directly into the instruction.  On SI/CI the \p EncodedOffset
+///        will be in units of dwords and on VI+ it will be units of bytes.
+static bool isLegalSMRDImmOffset(const AMDGPUSubtarget *ST,
+                                 int64_t EncodedOffset) {
+  return ST->getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+     isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
+                                          SDValue &Offset, bool &Imm) const {
+
+  // FIXME: Handle non-constant offsets.
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
+  if (!C)
+    return false;
+
+  SDLoc SL(ByteOffsetNode);
+  AMDGPUSubtarget::Generation Gen = Subtarget->getGeneration();
+  int64_t ByteOffset = C->getSExtValue();
+  int64_t EncodedOffset = Gen < AMDGPUSubtarget::VOLCANIC_ISLANDS ?
+      ByteOffset >> 2 : ByteOffset;
+
+  if (isLegalSMRDImmOffset(Subtarget, EncodedOffset)) {
+    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+    Imm = true;
+    return true;
+  }
+
+  if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
+    return false;
+
+  if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
+    // 32-bit Immediates are supported on Sea Islands.
+    Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
+  } else {
+    SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
+    Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
+                                            C32Bit), 0);
+  }
+  Imm = false;
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
+                                     SDValue &Offset, bool &Imm) const {
+
+  SDLoc SL(Addr);
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    SDValue N0 = Addr.getOperand(0);
+    SDValue N1 = Addr.getOperand(1);
+
+    if (SelectSMRDOffset(N1, Offset, Imm)) {
+      SBase = N0;
+      return true;
+    }
+  }
+  SBase = Addr;
+  Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
+  Imm = true;
+  return true;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
+                                       SDValue &Offset) const {
+  bool Imm;
+  return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
+                                         SDValue &Offset) const {
+
+  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+    return false;
+
+  bool Imm;
+  if (!SelectSMRD(Addr, SBase, Offset, Imm))
+    return false;
+
+  return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
+                                        SDValue &Offset) const {
+  bool Imm;
+  return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
+         !isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
+                                             SDValue &Offset) const {
+  bool Imm;
+  return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
+                                               SDValue &Offset) const {
+  if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
+    return false;
+
+  bool Imm;
+  if (!SelectSMRDOffset(Addr, Offset, Imm))
+    return false;
+
+  return !Imm && isa<ConstantSDNode>(Offset);
+}
+
+bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgpr(SDValue Addr,
+                                              SDValue &Offset) const {
+  bool Imm;
+  return SelectSMRDOffset(Addr, Offset, Imm) && !Imm &&
+         !isa<ConstantSDNode>(Offset);
+}
+
 // FIXME: This is incorrect and only enough to be able to compile.
 SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
   AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(N);
   SDLoc DL(N);
 
+  const MachineFunction &MF = CurDAG->getMachineFunction();
+  DiagnosticInfoUnsupported NotImplemented(*MF.getFunction(),
+                                           "addrspacecast not implemented");
+  CurDAG->getContext()->diagnose(NotImplemented);
+
   assert(Subtarget->hasFlatAddressSpace() &&
          "addrspacecast only supported with flat address space!");
 
-  assert((ASC->getSrcAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
-          ASC->getDestAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) &&
-         "Cannot cast address space to / from constant address!");
-
   assert((ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
           ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) &&
          "Can only cast to / from flat address space!");
@@ -1190,7 +1251,6 @@ SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
       CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32));
   }
 
-
   if (DestSize > SrcSize) {
     assert(SrcSize == 32 && DestSize == 64);
 
@@ -1371,6 +1431,65 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
   return SelectVOP3Mods(In, Src, SrcMods);
 }
 
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+  bool Modified = false;
+
+  // XXX - Other targets seem to be able to do this without a worklist.
+  SmallVector<LoadSDNode *, 8> LoadsToReplace;
+  SmallVector<StoreSDNode *, 8> StoresToReplace;
+
+  for (SDNode &Node : CurDAG->allnodes()) {
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(&Node)) {
+      EVT VT = LD->getValueType(0);
+      if (VT != MVT::i64 || LD->getExtensionType() != ISD::NON_EXTLOAD)
+        continue;
+
+      // To simplify the TableGen patters, we replace all i64 loads with v2i32
+      // loads.  Alternatively, we could promote i64 loads to v2i32 during DAG
+      // legalization, however, so places (ExpandUnalignedLoad) in the DAG
+      // legalizer assume that if i64 is legal, so doing this promotion early
+      // can cause problems.
+      LoadsToReplace.push_back(LD);
+    } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(&Node)) {
+      // Handle i64 stores here for the same reason mentioned above for loads.
+      SDValue Value = ST->getValue();
+      if (Value.getValueType() != MVT::i64 || ST->isTruncatingStore())
+        continue;
+      StoresToReplace.push_back(ST);
+    }
+  }
+
+  for (LoadSDNode *LD : LoadsToReplace) {
+    SDLoc SL(LD);
+
+    SDValue NewLoad = CurDAG->getLoad(MVT::v2i32, SL, LD->getChain(),
+                                      LD->getBasePtr(), LD->getMemOperand());
+    SDValue BitCast = CurDAG->getNode(ISD::BITCAST, SL,
+                                      MVT::i64, NewLoad);
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(LD, 0), BitCast);
+    Modified = true;
+  }
+
+  for (StoreSDNode *ST : StoresToReplace) {
+    SDValue NewValue = CurDAG->getNode(ISD::BITCAST, SDLoc(ST),
+                                       MVT::v2i32, ST->getValue());
+    const SDValue StoreOps[] = {
+      ST->getChain(),
+      NewValue,
+      ST->getBasePtr(),
+      ST->getOffset()
+    };
+
+    CurDAG->UpdateNodeOperands(ST, StoreOps);
+    Modified = true;
+  }
+
+  // XXX - Is this necessary?
+  if (Modified)
+    CurDAG->RemoveDeadNodes();
+}
+
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3a65f3b56146..222f63161be5 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -15,6 +15,7 @@
 
 #include "AMDGPUISelLowering.h"
 #include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
 #include "AMDGPUFrameLowering.h"
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPURegisterInfo.h"
@@ -27,50 +28,9 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DiagnosticPrinter.h"
 
 using namespace llvm;
 
-namespace {
-
-/// Diagnostic information for unimplemented or unsupported feature reporting.
-class DiagnosticInfoUnsupported : public DiagnosticInfo {
-private:
-  const Twine &Description;
-  const Function &Fn;
-
-  static int KindID;
-
-  static int getKindID() {
-    if (KindID == 0)
-      KindID = llvm::getNextAvailablePluginDiagnosticKind();
-    return KindID;
-  }
-
-public:
-  DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
-                          DiagnosticSeverity Severity = DS_Error)
-    : DiagnosticInfo(getKindID(), Severity),
-      Description(Desc),
-      Fn(Fn) { }
-
-  const Function &getFunction() const { return Fn; }
-  const Twine &getDescription() const { return Description; }
-
-  void print(DiagnosticPrinter &DP) const override {
-    DP << "unsupported " << getDescription() << " in " << Fn.getName();
-  }
-
-  static bool classof(const DiagnosticInfo *DI) {
-    return DI->getKind() == getKindID();
-  }
-};
-
-int DiagnosticInfoUnsupported::KindID = 0;
-}
-
-
 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
                       CCValAssign::LocInfo LocInfo,
                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -113,6 +73,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   setOperationAction(ISD::BRIND, MVT::Other, Expand);
 
+  // This is totally unsupported, just custom lower to produce an error.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+
   // We need to custom lower some of the intrinsics
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -352,7 +315,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
     setOperationAction(ISD::SDIVREM, VT, Custom);
-    setOperationAction(ISD::UDIVREM, VT, Custom);
+    setOperationAction(ISD::UDIVREM, VT, Expand);
     setOperationAction(ISD::ADDC, VT, Expand);
     setOperationAction(ISD::SUBC, VT, Expand);
     setOperationAction(ISD::ADDE, VT, Expand);
@@ -429,12 +392,18 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setSelectIsExpensive(false);
   PredictableSelectIsExpensive = false;
 
-  // There are no integer divide instructions, and these expand to a pretty
-  // large sequence of instructions.
-  setIntDivIsCheap(false);
-  setPow2SDivIsCheap(false);
   setFsqrtIsCheap(true);
 
+  // We want to find all load dependencies for long chains of stores to enable
+  // merging into very wide vectors. The problem is with vectors with > 4
+  // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
+  // vectors are a legal type, even though we have to split the loads
+  // usually. When we can more precisely specify load legality per address
+  // space, we should be able to make FindBetterChain/MergeConsecutiveStores
+  // smarter so that they can figure out what to do in 2 iterations without all
+  // N > 4 stores on the same chain.
+  GatherAllAliasesMaxDepth = 16;
+
   // FIXME: Need to really handle these.
   MaxStoresPerMemcpy  = 4096;
   MaxStoresPerMemmove = 4096;
@@ -534,6 +503,18 @@ bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
   return true;
 }
 
+bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
+  // There are few operations which truly have vector input operands. Any vector
+  // operation is going to involve operations on each component, and a
+  // build_vector will be a copy per element, so it always makes sense to use a
+  // build_vector input in place of the extracted element to avoid a copy into a
+  // super register.
+  //
+  // We should probably only do this if all users are extracts only, but this
+  // should be the common case.
+  return true;
+}
+
 bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
   // Truncate is just accessing a subregister.
   return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
@@ -617,6 +598,15 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
   return SDValue();
 }
 
+SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+  DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "dynamic alloca");
+  DAG.getContext()->diagnose(NoDynamicAlloca);
+  return SDValue();
+}
+
 SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
                                              SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
@@ -643,6 +633,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   }
   return Op;
 }
@@ -892,7 +883,9 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
   FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
 
   unsigned FrameIndex = FIN->getIndex();
-  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
+  unsigned IgnoredFrameReg;
+  unsigned Offset =
+      TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
   return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
                          Op.getValueType());
 }
@@ -1043,9 +1036,6 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                          Op.getOperand(1),
                          Op.getOperand(2));
 
-    case AMDGPUIntrinsic::AMDGPU_brev:
-      return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
-
   case Intrinsic::AMDGPU_class:
     return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
                        Op.getOperand(1), Op.getOperand(2));
@@ -1057,6 +1047,8 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
     case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name.
       return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1));
+    case AMDGPUIntrinsic::AMDGPU_brev: // Legacy name
+      return DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(1));
   }
 }
 
@@ -1077,6 +1069,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
                                                 SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
+  // TODO: Should this propagate fast-math-flags?
   SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
                                 DAG.getConstantFP(1.0f, DL, MVT::f32),
                                 Op.getOperand(1));
@@ -1167,45 +1160,6 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
   return SDValue();
 }
 
-// FIXME: Remove this when combines added to DAGCombiner.
-SDValue AMDGPUTargetLowering::CombineIMinMax(SDLoc DL,
-                                             EVT VT,
-                                             SDValue LHS,
-                                             SDValue RHS,
-                                             SDValue True,
-                                             SDValue False,
-                                             SDValue CC,
-                                             SelectionDAG &DAG) const {
-  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
-    return SDValue();
-
-  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-  switch (CCOpcode) {
-  case ISD::SETULE:
-  case ISD::SETULT: {
-    unsigned Opc = (LHS == True) ? ISD::UMIN : ISD::UMAX;
-    return DAG.getNode(Opc, DL, VT, LHS, RHS);
-  }
-  case ISD::SETLE:
-  case ISD::SETLT: {
-    unsigned Opc = (LHS == True) ? ISD::SMIN : ISD::SMAX;
-    return DAG.getNode(Opc, DL, VT, LHS, RHS);
-  }
-  case ISD::SETGT:
-  case ISD::SETGE: {
-    unsigned Opc = (LHS == True) ? ISD::SMAX : ISD::SMIN;
-    return DAG.getNode(Opc, DL, VT, LHS, RHS);
-  }
-  case ISD::SETUGE:
-  case ISD::SETUGT: {
-    unsigned Opc = (LHS == True) ? ISD::UMAX : ISD::UMIN;
-    return DAG.getNode(Opc, DL, VT, LHS, RHS);
-  }
-  default:
-    return SDValue();
-  }
-}
-
 SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
                                                   SelectionDAG &DAG) const {
   LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -1260,7 +1214,8 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
   EVT PtrVT = BasePtr.getValueType();
   EVT MemVT = Load->getMemoryVT();
   SDLoc SL(Op);
-  MachinePointerInfo SrcValue(Load->getMemOperand()->getValue());
+
+  const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
 
   EVT LoVT, HiVT;
   EVT LoMemVT, HiMemVT;
@@ -1269,23 +1224,27 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemVT);
   std::tie(Lo, Hi) = DAG.SplitVector(Op, SL, LoVT, HiVT);
+
+  unsigned Size = LoMemVT.getStoreSize();
+  unsigned BaseAlign = Load->getAlignment();
+  unsigned HiAlign = MinAlign(BaseAlign, Size);
+
   SDValue LoLoad
     = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
                      Load->getChain(), BasePtr,
                      SrcValue,
                      LoMemVT, Load->isVolatile(), Load->isNonTemporal(),
-                     Load->isInvariant(), Load->getAlignment());
+                     Load->isInvariant(), BaseAlign);
 
   SDValue HiPtr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
-                              DAG.getConstant(LoMemVT.getStoreSize(), SL,
-                                              PtrVT));
+                              DAG.getConstant(Size, SL, PtrVT));
 
   SDValue HiLoad
     = DAG.getExtLoad(Load->getExtensionType(), SL, HiVT,
                      Load->getChain(), HiPtr,
                      SrcValue.getWithOffset(LoMemVT.getStoreSize()),
                      HiMemVT, Load->isVolatile(), Load->isNonTemporal(),
-                     Load->isInvariant(), Load->getAlignment());
+                     Load->isInvariant(), HiAlign);
 
   SDValue Ops[] = {
     DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad),
@@ -1415,7 +1374,11 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
                               DAG.getConstant(LoMemVT.getStoreSize(), SL,
                                               PtrVT));
 
-  MachinePointerInfo SrcValue(Store->getMemOperand()->getValue());
+  const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
+  unsigned BaseAlign = Store->getAlignment();
+  unsigned Size = LoMemVT.getStoreSize();
+  unsigned HiAlign = MinAlign(BaseAlign, Size);
+
   SDValue LoStore
     = DAG.getTruncStore(Chain, SL, Lo,
                         BasePtr,
@@ -1423,15 +1386,15 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
                         LoMemVT,
                         Store->isNonTemporal(),
                         Store->isVolatile(),
-                        Store->getAlignment());
+                        BaseAlign);
   SDValue HiStore
     = DAG.getTruncStore(Chain, SL, Hi,
                         HiPtr,
-                        SrcValue.getWithOffset(LoMemVT.getStoreSize()),
+                        SrcValue.getWithOffset(Size),
                         HiMemVT,
                         Store->isNonTemporal(),
                         Store->isVolatile(),
-                        Store->getAlignment());
+                        HiAlign);
 
   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
 }
@@ -1529,7 +1492,7 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
        Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
       Store->getValue().getValueType().isVector()) {
-    return ScalarizeVectorStore(Op, DAG);
+    return SplitVectorStore(Op, DAG);
   }
 
   EVT MemVT = Store->getMemoryVT();
@@ -1630,6 +1593,7 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool
   // float fb = (float)ib;
   SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
 
+  // TODO: Should this propagate fast-math-flags?
   // float fq = native_divide(fa, fb);
   SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
                            fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
@@ -1940,6 +1904,8 @@ SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
   SDValue X = Op.getOperand(0);
   SDValue Y = Op.getOperand(1);
 
+  // TODO: Should this propagate fast-math-flags?
+
   SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y);
   SDValue Floor = DAG.getNode(ISD::FTRUNC, SL, VT, Div);
   SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Floor, Y);
@@ -1968,6 +1934,7 @@ SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
 
   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
+  // TODO: Should this propagate fast-math-flags?
   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }
 
@@ -2045,6 +2012,8 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
   SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
 
+  // TODO: Should this propagate fast-math-flags?
+
   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
   SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
 
@@ -2074,6 +2043,8 @@ SDValue AMDGPUTargetLowering::LowerFROUND32(SDValue Op, SelectionDAG &DAG) const
 
   SDValue T = DAG.getNode(ISD::FTRUNC, SL, MVT::f32, X);
 
+  // TODO: Should this propagate fast-math-flags?
+
   SDValue Diff = DAG.getNode(ISD::FSUB, SL, MVT::f32, X, T);
 
   SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, MVT::f32, Diff);
@@ -2184,6 +2155,7 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
 
   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
+  // TODO: Should this propagate fast-math-flags?
   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
 }
 
@@ -2206,7 +2178,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
 
   SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
                               DAG.getConstant(32, SL, MVT::i32));
-
+  // TODO: Should this propagate fast-math-flags?
   return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
 }
 
@@ -2231,6 +2203,7 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
                            DAG.getConstant(1, DL, MVT::i32));
   SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
+  // TODO: Should this propagate fast-math-flags?
   FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
                         DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
   return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
@@ -2257,7 +2230,7 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
                                  MVT::f64);
   SDValue K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(0xc1f0000000000000)), SL,
                                  MVT::f64);
-
+  // TODO: Should this propagate fast-math-flags?
   SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f64, Trunc, K0);
 
   SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, MVT::f64, Mul);
@@ -2511,12 +2484,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
 
       if (VT == MVT::f32)
         return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
-
-      // TODO: Implement min / max Evergreen instructions.
-      if (VT == MVT::i32 &&
-          Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
-        return CombineIMinMax(DL, VT, LHS, RHS, True, False, CC, DAG);
-      }
     }
 
     break;
@@ -2652,20 +2619,14 @@ bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     return CFP->isExactlyValue(1.0);
   }
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-    return C->isAllOnesValue();
-  }
-  return false;
+  return isAllOnesConstant(Op);
 }
 
 bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
   if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
     return CFP->getValueAPF().isZero();
   }
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-    return C->isNullValue();
-  }
-  return false;
+  return isNullConstant(Op);
 }
 
 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
@@ -2738,7 +2699,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BFE_I32)
   NODE_NAME_CASE(BFI)
   NODE_NAME_CASE(BFM)
-  NODE_NAME_CASE(BREV)
   NODE_NAME_CASE(MUL_U24)
   NODE_NAME_CASE(MUL_I24)
   NODE_NAME_CASE(MAD_U24)
@@ -2893,8 +2853,7 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
       return 1;
 
     unsigned SignBits = 32 - Width->getZExtValue() + 1;
-    ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-    if (!Offset || !Offset->isNullValue())
+    if (!isNullConstant(Op.getOperand(1)))
       return SignBits;
 
     // TODO: Could probably figure something out with non-0 offsets.
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 478b2035fd75..7314cc050ba5 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -138,6 +138,7 @@ public:
   bool storeOfVectorConstantIsCheap(EVT MemVT,
                                     unsigned NumElem,
                                     unsigned AS) const override;
+  bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
   bool isCheapToSpeculateCttz() const override;
   bool isCheapToSpeculateCtlz() const override;
 
@@ -149,6 +150,9 @@ public:
   SDValue LowerCall(CallLoweringInfo &CLI,
                     SmallVectorImpl<SDValue> &InVals) const override;
 
+  SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                  SelectionDAG &DAG) const;
+
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
   void ReplaceNodeResults(SDNode * N,
@@ -165,14 +169,6 @@ public:
                                SDValue False,
                                SDValue CC,
                                DAGCombinerInfo &DCI) const;
-  SDValue CombineIMinMax(SDLoc DL,
-                         EVT VT,
-                         SDValue LHS,
-                         SDValue RHS,
-                         SDValue True,
-                         SDValue False,
-                         SDValue CC,
-                         SelectionDAG &DAG) const;
 
   const char* getTargetNodeName(unsigned Opcode) const override;
 
@@ -216,7 +212,7 @@ public:
 
   /// \brief Helper function that returns the byte offset of the given
   /// type of implicit parameter.
-  unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
+  uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI,
                                       const ImplicitParameter Param) const;
 };
 
@@ -267,7 +263,6 @@ enum NodeType : unsigned {
   BFE_I32, // Extract range of bits with sign extension to 32-bits.
   BFI, // (src0 & src1) | (~src0 & src2)
   BFM, // Insert a range of bits into a 32-bit word.
-  BREV, // Reverse bits.
   MUL_U24,
   MUL_I24,
   MAD_U24,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 15a3d543a68c..a266e711af5b 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -164,11 +164,6 @@ MachineInstr *AMDGPUInstrInfo::foldMemoryOperandImpl(
   // TODO: Implement this function
   return nullptr;
 }
-bool AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                           ArrayRef<unsigned> Ops) const {
-  // TODO: Implement this function
-  return false;
-}
 bool
 AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                  unsigned Reg, bool UnfoldLoad,
@@ -312,7 +307,9 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
     return -1;
   }
 
-  Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexOffset(MF, -1);
+  unsigned IgnoredFrameReg;
+  Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
+      MF, -1, IgnoredFrameReg);
 
   return getIndirectIndexBegin(MF) + Offset;
 }
@@ -367,3 +364,14 @@ int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
 
   return MCOp;
 }
+
+ArrayRef<std::pair<int, const char *>>
+AMDGPUInstrInfo::getSerializableTargetIndices() const {
+  static const std::pair<int, const char *> TargetIndices[] = {
+      {AMDGPU::TI_CONSTDATA_START, "amdgpu-constdata-start"},
+      {AMDGPU::TI_SCRATCH_RSRC_DWORD0, "amdgpu-scratch-rsrc-dword0"},
+      {AMDGPU::TI_SCRATCH_RSRC_DWORD1, "amdgpu-scratch-rsrc-dword1"},
+      {AMDGPU::TI_SCRATCH_RSRC_DWORD2, "amdgpu-scratch-rsrc-dword2"},
+      {AMDGPU::TI_SCRATCH_RSRC_DWORD3, "amdgpu-scratch-rsrc-dword3"}};
+  return makeArrayRef(TargetIndices);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 86d3962b3856..53e8b23b3d62 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -103,8 +103,6 @@ public:
   /// read or write or -1 if indirect addressing is not used by this program.
   int getIndirectIndexEnd(const MachineFunction &MF) const;
 
-  bool canFoldMemoryOperand(const MachineInstr *MI,
-                            ArrayRef<unsigned> Ops) const override;
   bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                         unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                         SmallVectorImpl<MachineInstr *> &NewMIs) const override;
@@ -147,6 +145,9 @@ public:
     return get(pseudoToMCOpcode(Opcode));
   }
 
+  ArrayRef<std::pair<int, const char *>>
+  getSerializableTargetIndices() const override;
+
 //===---------------------------------------------------------------------===//
 // Pure virtual funtions to be implemented by sub-classes.
 //===---------------------------------------------------------------------===//
@@ -195,6 +196,7 @@ public:
 };
 
 namespace AMDGPU {
+  LLVM_READONLY
   int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
 }  // End namespace AMDGPU
 
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index b413897d9d23..70e589c28429 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,8 +191,6 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
 
-def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>;
-
 // Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
 // performing the mulitply.  The result is a 32-bit value.
 def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td
index 72cab39277c6..11f6139deddd 100644
--- a/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -514,7 +514,7 @@ class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul>
 class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
                        SubRegIndex sub_reg>
   : Pat<
-  (sub_type (vector_extract vec_type:$src, sub_idx)),
+  (sub_type (extractelt vec_type:$src, sub_idx)),
   (EXTRACT_SUBREG $src, sub_reg)
 >;
 
@@ -522,7 +522,7 @@ class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx,
 class Insert_Element <ValueType elem_type, ValueType vec_type,
                       int sub_idx, SubRegIndex sub_reg>
   : Pat <
-  (vector_insert vec_type:$vec, elem_type:$elem, sub_idx),
+  (insertelt vec_type:$vec, elem_type:$elem, sub_idx),
   (INSERT_SUBREG $vec, $elem, sub_reg)
 >;
 
diff --git a/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index ab489cd2a4ab..1de3546485b1 100644
--- a/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -69,8 +69,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
   def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
-  def int_AMDGPU_barrier_local  : Intrinsic<[], [], []>;
-  def int_AMDGPU_barrier_global  : Intrinsic<[], [], []>;
+  def int_AMDGPU_barrier_local  : Intrinsic<[], [], [IntrConvergent]>;
+  def int_AMDGPU_barrier_global  : Intrinsic<[], [], [IntrConvergent]>;
 }
 
 // Legacy names for compatibility.
diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 20831460b933..dfc652f31da5 100644
--- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -61,7 +61,7 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = MCOperand::createImm(MO.getImm());
       break;
     case MachineOperand::MO_Register:
-      MCOp = MCOperand::createReg(MO.getReg());
+      MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
@@ -73,13 +73,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
       MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx));
       break;
     }
-    case MachineOperand::MO_TargetIndex: {
-      assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START);
-      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
-      MCOp = MCOperand::createExpr(Expr);
-      break;
-    }
     case MachineOperand::MO_ExternalSymbol: {
       MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
       const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
@@ -104,10 +97,9 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 #endif
   if (MI->isBundle()) {
     const MachineBasicBlock *MBB = MI->getParent();
-    MachineBasicBlock::const_instr_iterator I = MI;
-    ++I;
-    while (I != MBB->end() && I->isInsideBundle()) {
-      EmitInstruction(I);
+    MachineBasicBlock::const_instr_iterator I = ++MI->getIterator();
+    while (I != MBB->instr_end() && I->isInsideBundle()) {
+      EmitInstruction(&*I);
       ++I;
     }
   } else {
@@ -136,8 +128,6 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       MCCodeEmitter &InstEmitter = ObjStreamer.getAssembler().getEmitter();
       InstEmitter.encodeInstruction(TmpInst, CodeStream, Fixups,
                                     MF->getSubtarget<MCSubtargetInfo>());
-      CodeStream.flush();
-
       HexLines.resize(HexLines.size() + 1);
       std::string &HexLine = HexLines.back();
       raw_string_ostream HexStream(HexLine);
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 21c7da663234..54137177e4c0 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,11 +1,10 @@
 #include "AMDGPUMachineFunction.h"
 #include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 using namespace llvm;
 
-static const char *const ShaderTypeAttribute = "ShaderType";
-
 // Pin the vtable to this file.
 void AMDGPUMachineFunction::anchor() {}
 
@@ -13,13 +12,9 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
   MachineFunctionInfo(),
   ShaderType(ShaderType::COMPUTE),
   LDSSize(0),
+  ABIArgOffset(0),
   ScratchSize(0),
   IsKernel(true) {
-  Attribute A = MF.getFunction()->getFnAttribute(ShaderTypeAttribute);
 
-  if (A.isStringAttribute()) {
-    StringRef Str = A.getValueAsString();
-    if (Str.getAsInteger(0, ShaderType))
-      llvm_unreachable("Can't parse shader type!");
-  }
+  ShaderType = AMDGPU::getShaderType(*MF.getFunction());
 }
diff --git a/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f5e4694e76f6..46fcee874887 100644
--- a/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -37,6 +37,11 @@ public:
     return ShaderType;
   }
 
+  bool isKernel() const {
+    // FIXME: Assume everything is a kernel until function calls are supported.
+    return true;
+  }
+
   unsigned ScratchSize;
   bool IsKernel;
 };
diff --git a/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
new file mode 100644
index 000000000000..554bf1da81f5
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
@@ -0,0 +1,373 @@
+//===-- AMDGPUOpenCLImageTypeLoweringPass.cpp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass resolves calls to OpenCL image attribute, image resource ID and
+/// sampler resource ID getter functions.
+///
+/// Image attributes (size and format) are expected to be passed to the kernel
+/// as kernel arguments immediately following the image argument itself,
+/// therefore this pass adds image size and format arguments to the kernel
+/// functions in the module. The kernel functions with image arguments are
+/// re-created using the new signature. The new arguments are added to the
+/// kernel metadata with kernel_arg_type set to "image_size" or "image_format".
+/// Note: this pass may invalidate pointers to functions.
+///
+/// Resource IDs of read-only images, write-only images and samplers are
+/// defined to be their index among the kernel arguments of the same
+/// type and access qualifier.
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+namespace {
+
+StringRef GetImageSizeFunc =         "llvm.OpenCL.image.get.size";
+StringRef GetImageFormatFunc =       "llvm.OpenCL.image.get.format";
+StringRef GetImageResourceIDFunc =   "llvm.OpenCL.image.get.resource.id";
+StringRef GetSamplerResourceIDFunc = "llvm.OpenCL.sampler.get.resource.id";
+
+StringRef ImageSizeArgMDType =   "__llvm_image_size";
+StringRef ImageFormatArgMDType = "__llvm_image_format";
+
+StringRef KernelsMDNodeName = "opencl.kernels";
+StringRef KernelArgMDNodeNames[] = {
+  "kernel_arg_addr_space",
+  "kernel_arg_access_qual",
+  "kernel_arg_type",
+  "kernel_arg_base_type",
+  "kernel_arg_type_qual"};
+const unsigned NumKernelArgMDNodes = 5;
+
+typedef SmallVector<Metadata *, 8> MDVector;
+struct KernelArgMD {
+  MDVector ArgVector[NumKernelArgMDNodes];
+};
+
+} // end anonymous namespace
+
+static inline bool
+IsImageType(StringRef TypeString) {
+  return TypeString == "image2d_t" || TypeString == "image3d_t";
+}
+
+static inline bool
+IsSamplerType(StringRef TypeString) {
+  return TypeString == "sampler_t";
+}
+
+static Function *
+GetFunctionFromMDNode(MDNode *Node) {
+  if (!Node)
+    return nullptr;
+
+  size_t NumOps = Node->getNumOperands();
+  if (NumOps != NumKernelArgMDNodes + 1)
+    return nullptr;
+
+  auto F = mdconst::dyn_extract<Function>(Node->getOperand(0));
+  if (!F)
+    return nullptr;
+
+  // Sanity checks.
+  size_t ExpectNumArgNodeOps = F->arg_size() + 1;
+  for (size_t i = 0; i < NumKernelArgMDNodes; ++i) {
+    MDNode *ArgNode = dyn_cast_or_null<MDNode>(Node->getOperand(i + 1));
+    if (ArgNode->getNumOperands() != ExpectNumArgNodeOps)
+      return nullptr;
+    if (!ArgNode->getOperand(0))
+      return nullptr;
+
+    // FIXME: It should be possible to do image lowering when some metadata
+    // args missing or not in the expected order.
+    MDString *StringNode = dyn_cast<MDString>(ArgNode->getOperand(0));
+    if (!StringNode || StringNode->getString() != KernelArgMDNodeNames[i])
+      return nullptr;
+  }
+
+  return F;
+}
+
+static StringRef
+AccessQualFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+  MDNode *ArgAQNode = cast<MDNode>(KernelMDNode->getOperand(2));
+  return cast<MDString>(ArgAQNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static StringRef
+ArgTypeFromMD(MDNode *KernelMDNode, unsigned ArgIdx) {
+  MDNode *ArgTypeNode = cast<MDNode>(KernelMDNode->getOperand(3));
+  return cast<MDString>(ArgTypeNode->getOperand(ArgIdx + 1))->getString();
+}
+
+static MDVector
+GetArgMD(MDNode *KernelMDNode, unsigned OpIdx) {
+  MDVector Res;
+  for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+    MDNode *Node = cast<MDNode>(KernelMDNode->getOperand(i + 1));
+    Res.push_back(Node->getOperand(OpIdx));
+  }
+  return Res;
+}
+
+static void
+PushArgMD(KernelArgMD &MD, const MDVector &V) {
+  assert(V.size() == NumKernelArgMDNodes);
+  for (unsigned i = 0; i < NumKernelArgMDNodes; ++i) {
+    MD.ArgVector[i].push_back(V[i]);
+  }
+}
+
+namespace {
+
+class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
+  static char ID;
+
+  LLVMContext *Context;
+  Type *Int32Type;
+  Type *ImageSizeType;
+  Type *ImageFormatType;
+  SmallVector<Instruction *, 4> InstsToErase;
+
+  bool replaceImageUses(Argument &ImageArg, uint32_t ResourceID,
+                        Argument &ImageSizeArg,
+                        Argument &ImageFormatArg) {
+    bool Modified = false;
+
+    for (auto &Use : ImageArg.uses()) {
+      auto Inst = dyn_cast<CallInst>(Use.getUser());
+      if (!Inst) {
+        continue;
+      }
+
+      Function *F = Inst->getCalledFunction();
+      if (!F)
+        continue;
+
+      Value *Replacement = nullptr;
+      StringRef Name = F->getName();
+      if (Name.startswith(GetImageResourceIDFunc)) {
+        Replacement = ConstantInt::get(Int32Type, ResourceID);
+      } else if (Name.startswith(GetImageSizeFunc)) {
+        Replacement = &ImageSizeArg;
+      } else if (Name.startswith(GetImageFormatFunc)) {
+        Replacement = &ImageFormatArg;
+      } else {
+        continue;
+      }
+
+      Inst->replaceAllUsesWith(Replacement);
+      InstsToErase.push_back(Inst);
+      Modified = true;
+    }
+
+    return Modified;
+  }
+
+  bool replaceSamplerUses(Argument &SamplerArg, uint32_t ResourceID) {
+    bool Modified = false;
+
+    for (const auto &Use : SamplerArg.uses()) {
+      auto Inst = dyn_cast<CallInst>(Use.getUser());
+      if (!Inst) {
+        continue;
+      }
+
+      Function *F = Inst->getCalledFunction();
+      if (!F)
+        continue;
+
+      Value *Replacement = nullptr;
+      StringRef Name = F->getName();
+      if (Name == GetSamplerResourceIDFunc) {
+        Replacement = ConstantInt::get(Int32Type, ResourceID);
+      } else {
+        continue;
+      }
+
+      Inst->replaceAllUsesWith(Replacement);
+      InstsToErase.push_back(Inst);
+      Modified = true;
+    }
+
+    return Modified;
+  }
+
+  bool replaceImageAndSamplerUses(Function *F, MDNode *KernelMDNode) {
+    uint32_t NumReadOnlyImageArgs = 0;
+    uint32_t NumWriteOnlyImageArgs = 0;
+    uint32_t NumSamplerArgs = 0;
+
+    bool Modified = false;
+    InstsToErase.clear();
+    for (auto ArgI = F->arg_begin(); ArgI != F->arg_end(); ++ArgI) {
+      Argument &Arg = *ArgI;
+      StringRef Type = ArgTypeFromMD(KernelMDNode, Arg.getArgNo());
+
+      // Handle image types.
+      if (IsImageType(Type)) {
+        StringRef AccessQual = AccessQualFromMD(KernelMDNode, Arg.getArgNo());
+        uint32_t ResourceID;
+        if (AccessQual == "read_only") {
+          ResourceID = NumReadOnlyImageArgs++;
+        } else if (AccessQual == "write_only") {
+          ResourceID = NumWriteOnlyImageArgs++;
+        } else {
+          llvm_unreachable("Wrong image access qualifier.");
+        }
+
+        Argument &SizeArg = *(++ArgI);
+        Argument &FormatArg = *(++ArgI);
+        Modified |= replaceImageUses(Arg, ResourceID, SizeArg, FormatArg);
+
+      // Handle sampler type.
+      } else if (IsSamplerType(Type)) {
+        uint32_t ResourceID = NumSamplerArgs++;
+        Modified |= replaceSamplerUses(Arg, ResourceID);
+      }
+    }
+    for (unsigned i = 0; i < InstsToErase.size(); ++i) {
+      InstsToErase[i]->eraseFromParent();
+    }
+
+    return Modified;
+  }
+
+  std::tuple<Function *, MDNode *>
+  addImplicitArgs(Function *F, MDNode *KernelMDNode) {
+    bool Modified = false;
+
+    FunctionType *FT = F->getFunctionType();
+    SmallVector<Type *, 8> ArgTypes;
+
+    // Metadata operands for new MDNode.
+    KernelArgMD NewArgMDs;
+    PushArgMD(NewArgMDs, GetArgMD(KernelMDNode, 0));
+
+    // Add implicit arguments to the signature.
+    for (unsigned i = 0; i < FT->getNumParams(); ++i) {
+      ArgTypes.push_back(FT->getParamType(i));
+      MDVector ArgMD = GetArgMD(KernelMDNode, i + 1);
+      PushArgMD(NewArgMDs, ArgMD);
+
+      if (!IsImageType(ArgTypeFromMD(KernelMDNode, i)))
+        continue;
+
+      // Add size implicit argument.
+      ArgTypes.push_back(ImageSizeType);
+      ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageSizeArgMDType);
+      PushArgMD(NewArgMDs, ArgMD);
+
+      // Add format implicit argument.
+      ArgTypes.push_back(ImageFormatType);
+      ArgMD[2] = ArgMD[3] = MDString::get(*Context, ImageFormatArgMDType);
+      PushArgMD(NewArgMDs, ArgMD);
+
+      Modified = true;
+    }
+    if (!Modified) {
+      return std::make_tuple(nullptr, nullptr);
+    }
+
+    // Create function with new signature and clone the old body into it.
+    auto NewFT = FunctionType::get(FT->getReturnType(), ArgTypes, false);
+    auto NewF = Function::Create(NewFT, F->getLinkage(), F->getName());
+    ValueToValueMapTy VMap;
+    auto NewFArgIt = NewF->arg_begin();
+    for (auto &Arg: F->args()) {
+      auto ArgName = Arg.getName();
+      NewFArgIt->setName(ArgName);
+      VMap[&Arg] = &(*NewFArgIt++);
+      if (IsImageType(ArgTypeFromMD(KernelMDNode, Arg.getArgNo()))) {
+        (NewFArgIt++)->setName(Twine("__size_") + ArgName);
+        (NewFArgIt++)->setName(Twine("__format_") + ArgName);
+      }
+    }
+    SmallVector<ReturnInst*, 8> Returns;
+    CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns);
+
+    // Build new MDNode.
+    SmallVector<llvm::Metadata *, 6> KernelMDArgs;
+    KernelMDArgs.push_back(ConstantAsMetadata::get(NewF));
+    for (unsigned i = 0; i < NumKernelArgMDNodes; ++i)
+      KernelMDArgs.push_back(MDNode::get(*Context, NewArgMDs.ArgVector[i]));
+    MDNode *NewMDNode = MDNode::get(*Context, KernelMDArgs);
+
+    return std::make_tuple(NewF, NewMDNode);
+  }
+
+  bool transformKernels(Module &M) {
+    NamedMDNode *KernelsMDNode = M.getNamedMetadata(KernelsMDNodeName);
+    if (!KernelsMDNode)
+      return false;
+
+    bool Modified = false;
+    for (unsigned i = 0; i < KernelsMDNode->getNumOperands(); ++i) {
+      MDNode *KernelMDNode = KernelsMDNode->getOperand(i);
+      Function *F = GetFunctionFromMDNode(KernelMDNode);
+      if (!F)
+        continue;
+
+      Function *NewF;
+      MDNode *NewMDNode;
+      std::tie(NewF, NewMDNode) = addImplicitArgs(F, KernelMDNode);
+      if (NewF) {
+        // Replace old function and metadata with new ones.
+        F->eraseFromParent();
+        M.getFunctionList().push_back(NewF);
+        M.getOrInsertFunction(NewF->getName(), NewF->getFunctionType(),
+                              NewF->getAttributes());
+        KernelsMDNode->setOperand(i, NewMDNode);
+
+        F = NewF;
+        KernelMDNode = NewMDNode;
+        Modified = true;
+      }
+
+      Modified |= replaceImageAndSamplerUses(F, KernelMDNode);
+    }
+
+    return Modified;
+  }
+
+ public:
+  AMDGPUOpenCLImageTypeLoweringPass() : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override {
+    Context = &M.getContext();
+    Int32Type = Type::getInt32Ty(M.getContext());
+    ImageSizeType = ArrayType::get(Int32Type, 3);
+    ImageFormatType = ArrayType::get(Int32Type, 2);
+
+    return transformKernels(M);
+  }
+
+  const char *getPassName() const override {
+    return "AMDGPU OpenCL Image Type Pass";
+  }
+};
+
+char AMDGPUOpenCLImageTypeLoweringPass::ID = 0;
+
+} // end anonymous namespace
+
+ModulePass *llvm::createAMDGPUOpenCLImageTypeLoweringPass() {
+  return new AMDGPUOpenCLImageTypeLoweringPass();
+}
diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 57b7a73bf56c..87d50d587059 100644
--- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -54,7 +54,7 @@ bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
 
 bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
 
-  const FunctionType *FTy = F.getFunctionType();
+  FunctionType *FTy = F.getFunctionType();
 
   LocalMemAvailable = ST.getLocalMemorySize();
 
@@ -63,7 +63,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
   // possible these arguments require the entire local memory space, so
   // we cannot use local memory in the pass.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
-    const Type *ParamTy = FTy->getParamType(i);
+    Type *ParamTy = FTy->getParamType(i);
     if (ParamTy->isPointerTy() &&
         ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       LocalMemAvailable = 0;
@@ -77,7 +77,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
     // Check how much local memory is being used by global objects
     for (Module::global_iterator I = Mod->global_begin(),
                                  E = Mod->global_end(); I != E; ++I) {
-      GlobalVariable *GV = I;
+      GlobalVariable *GV = &*I;
       PointerType *GVTy = GV->getType();
       if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
         continue;
@@ -101,7 +101,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
   return false;
 }
 
-static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
+static VectorType *arrayTypeToVecType(Type *ArrayTy) {
   return VectorType::get(ArrayTy->getArrayElementType(),
                          ArrayTy->getArrayNumElements());
 }
@@ -276,6 +276,9 @@ static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
 }
 
 void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
+  if (!I.isStaticAlloca())
+    return;
+
   IRBuilder<> Builder(&I);
 
   // First try to replace the alloca with a vector
diff --git a/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index cfd800bdc703..0344834328f6 100644
--- a/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -37,10 +37,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
     assert(!"Unimplemented");  return BitVector();
   }
 
-  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
-    assert(!"Unimplemented"); return nullptr;
-  }
-
   virtual unsigned getHWRegIndex(unsigned Reg) const {
     assert(!"Unimplemented"); return 0;
   }
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5f32a65c9338..44e0c47877a9 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -16,6 +16,7 @@
 #include "R600ISelLowering.h"
 #include "R600InstrInfo.h"
 #include "R600MachineScheduler.h"
+#include "SIFrameLowering.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
@@ -44,6 +45,8 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // disable it.
 
   SmallString<256> FullFS("+promote-alloca,+fp64-denormals,");
+  if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
+    FullFS += "+flat-for-global,";
   FullFS += FS;
 
   if (GPU == "" && TT.getArch() == Triple::amdgcn)
@@ -67,26 +70,36 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
       DumpCode(false), R600ALUInst(false), HasVertexCache(false),
       TexVTXClauseSize(0), Gen(AMDGPUSubtarget::R600), FP64(false),
       FP64Denormals(false), FP32Denormals(false), FastFMAF32(false),
-      CaymanISA(false), FlatAddressSpace(false), EnableIRStructurizer(true),
-      EnablePromoteAlloca(false), EnableIfCvt(true), EnableLoadStoreOpt(false),
-      EnableUnsafeDSOffsetFolding(false),
+      CaymanISA(false), FlatAddressSpace(false), FlatForGlobal(false),
+      EnableIRStructurizer(true), EnablePromoteAlloca(false), EnableIfCvt(true),
+      EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
       WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
       EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
       GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
       IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
-      FrameLowering(TargetFrameLowering::StackGrowsUp,
-                    64 * 16, // Maximum stack alignment (long16)
-                    0),
+      FrameLowering(nullptr),
       InstrItins(getInstrItineraryForCPU(GPU)), TargetTriple(TT) {
 
   initializeSubtargetDependencies(TT, GPU, FS);
 
+  const unsigned MaxStackAlign = 64 * 16; // Maximum stack alignment (long16)
+
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
     InstrInfo.reset(new R600InstrInfo(*this));
     TLInfo.reset(new R600TargetLowering(TM, *this));
+
+    // FIXME: Should have R600 specific FrameLowering
+    FrameLowering.reset(new AMDGPUFrameLowering(
+                          TargetFrameLowering::StackGrowsUp,
+                          MaxStackAlign,
+                          0));
   } else {
     InstrInfo.reset(new SIInstrInfo(*this));
     TLInfo.reset(new SITargetLowering(TM, *this));
+    FrameLowering.reset(new SIFrameLowering(
+                          TargetFrameLowering::StackGrowsUp,
+                          MaxStackAlign,
+                          0));
   }
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 735f01dfa7c5..9c7bb88f8f4a 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1,4 +1,4 @@
-//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
+//=====-- AMDGPUSubtarget.h - Define Subtarget for AMDGPU ------*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,17 +12,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
-#define LLVM_LIB_TARGET_R600_AMDGPUSUBTARGET_H
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
+
 #include "AMDGPU.h"
 #include "AMDGPUFrameLowering.h"
 #include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPUISelLowering.h"
 #include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "AMDKernelCodeT.h"
 #include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
@@ -72,6 +70,7 @@ private:
   bool FastFMAF32;
   bool CaymanISA;
   bool FlatAddressSpace;
+  bool FlatForGlobal;
   bool EnableIRStructurizer;
   bool EnablePromoteAlloca;
   bool EnableIfCvt;
@@ -88,10 +87,10 @@ private:
   bool CIInsts;
   bool FeatureDisable;
   int LDSBankCount;
-  unsigned IsaVersion; 
+  unsigned IsaVersion;
   bool EnableHugeScratchBuffer;
 
-  AMDGPUFrameLowering FrameLowering;
+  std::unique_ptr<AMDGPUFrameLowering> FrameLowering;
   std::unique_ptr<AMDGPUTargetLowering> TLInfo;
   std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
   InstrItineraryData InstrItins;
@@ -104,7 +103,7 @@ public:
                                                    StringRef GPU, StringRef FS);
 
   const AMDGPUFrameLowering *getFrameLowering() const override {
-    return &FrameLowering;
+    return FrameLowering.get();
   }
   const AMDGPUInstrInfo *getInstrInfo() const override {
     return InstrInfo.get();
@@ -161,6 +160,10 @@ public:
     return FlatAddressSpace;
   }
 
+  bool useFlatForGlobal() const {
+    return FlatForGlobal;
+  }
+
   bool hasBFE() const {
     return (getGeneration() >= EVERGREEN);
   }
@@ -305,6 +308,9 @@ public:
     return isAmdHsaOS() ? 0 : 36;
   }
 
+  unsigned getMaxNumUserSGPRs() const {
+    return 16;
+  }
 };
 
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 2297b52b423c..22f85b3e663c 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUTargetMachine.h"
+#include "AMDGPUTargetObjectFile.h"
 #include "AMDGPU.h"
 #include "AMDGPUTargetTransformInfo.h"
 #include "R600ISelLowering.h"
@@ -41,6 +42,23 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
   RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
   RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
+
+  PassRegistry *PR = PassRegistry::getPassRegistry();
+  initializeSILowerI1CopiesPass(*PR);
+  initializeSIFixSGPRCopiesPass(*PR);
+  initializeSIFoldOperandsPass(*PR);
+  initializeSIFixSGPRLiveRangesPass(*PR);
+  initializeSIFixControlFlowLiveIntervalsPass(*PR);
+  initializeSILoadStoreOptimizerPass(*PR);
+  initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
+  initializeAMDGPUAnnotateUniformValuesPass(*PR);
+}
+
+static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
+  if (TT.getOS() == Triple::AMDHSA)
+    return make_unique<AMDGPUHSATargetObjectFile>();
+
+  return make_unique<AMDGPUTargetObjectFile>();
 }
 
 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
@@ -72,15 +90,13 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          CodeGenOpt::Level OptLevel)
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM,
                         OptLevel),
-      TLOF(new TargetLoweringObjectFileELF()), Subtarget(TT, CPU, FS, *this),
+      TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this),
       IntrinsicInfo() {
   setRequiresStructuredCFG(true);
   initAsmInfo();
 }
 
-AMDGPUTargetMachine::~AMDGPUTargetMachine() {
-  delete TLOF;
-}
+AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
 
 //===----------------------------------------------------------------------===//
 // R600 Target Machine (R600 -> Cayman)
@@ -110,7 +126,13 @@ namespace {
 class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
-    : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {
+
+    // Exceptions and StackMaps are not supported, so these passes will never do
+    // anything.
+    disablePass(&StackMapLivenessID);
+    disablePass(&FuncletLayoutID);
+  }
 
   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
     return getTM<AMDGPUTargetMachine>();
@@ -126,8 +148,9 @@ public:
 
   void addIRPasses() override;
   void addCodeGenPrepare() override;
-  virtual bool addPreISel() override;
-  virtual bool addInstSelector() override;
+  bool addPreISel() override;
+  bool addInstSelector() override;
+  bool addGCPasses() override;
 };
 
 class R600PassConfig : public AMDGPUPassConfig {
@@ -147,6 +170,8 @@ public:
     : AMDGPUPassConfig(TM, PM) { }
   bool addPreISel() override;
   bool addInstSelector() override;
+  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
   void addPreSched2() override;
@@ -156,7 +181,7 @@ public:
 } // End of anonymous namespace
 
 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(
         AMDGPUTTIImpl(this, F.getParent()->getDataLayout()));
   });
@@ -172,6 +197,10 @@ void AMDGPUPassConfig::addIRPasses() {
   // functions, then we will generate code for the first function
   // without ever running any passes on the second.
   addPass(createBarrierNoopPass());
+
+  // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
+  addPass(createAMDGPUOpenCLImageTypeLoweringPass());
+
   TargetPassConfig::addIRPasses();
 }
 
@@ -198,6 +227,11 @@ bool AMDGPUPassConfig::addInstSelector() {
   return false;
 }
 
+bool AMDGPUPassConfig::addGCPasses() {
+  // Do nothing. GC is not supported.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // R600 Pass Setup
 //===----------------------------------------------------------------------===//
@@ -238,16 +272,23 @@ TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
 
 bool GCNPassConfig::addPreISel() {
   AMDGPUPassConfig::addPreISel();
+
+  // FIXME: We need to run a pass to propagate the attributes when calls are
+  // supported.
+  addPass(&AMDGPUAnnotateKernelFeaturesID);
+
   addPass(createSinkingPass());
   addPass(createSITypeRewriter());
   addPass(createSIAnnotateControlFlowPass());
+  addPass(createAMDGPUAnnotateUniformValues());
+
   return false;
 }
 
 bool GCNPassConfig::addInstSelector() {
   AMDGPUPassConfig::addInstSelector();
   addPass(createSILowerI1CopiesPass());
-  addPass(createSIFixSGPRCopiesPass(*TM));
+  addPass(&SIFixSGPRCopiesID);
   addPass(createSIFoldOperandsPass());
   return false;
 }
@@ -259,7 +300,6 @@ void GCNPassConfig::addPreRegAlloc() {
   // earlier passes might recompute live intervals.
   // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
   if (getOptLevel() > CodeGenOpt::None) {
-    initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry());
     insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
   }
 
@@ -269,16 +309,27 @@ void GCNPassConfig::addPreRegAlloc() {
 
     // This should be run after scheduling, but before register allocation. It
     // also need extra copies to the address operand to be eliminated.
-    initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
     insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
     insertPass(&MachineSchedulerID, &RegisterCoalescerID);
   }
   addPass(createSIShrinkInstructionsPass(), false);
-  addPass(createSIFixSGPRLiveRangesPass(), false);
+}
+
+void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  addPass(&SIFixSGPRLiveRangesID);
+  TargetPassConfig::addFastRegAlloc(RegAllocPass);
+}
+
+void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  // We want to run this after LiveVariables is computed to avoid computing them
+  // twice.
+  // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure
+  // that needs to be fixed.
+  insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false);
+  TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
 }
 
 void GCNPassConfig::addPostRegAlloc() {
-  addPass(createSIPrepareScratchRegs(), false);
   addPass(createSIShrinkInstructionsPass(), false);
 }
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 14792e347a7a..236e3f824030 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -32,7 +32,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
 private:
 
 protected:
-  TargetLoweringObjectFile *TLOF;
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
   AMDGPUSubtarget Subtarget;
   AMDGPUIntrinsicInfo IntrinsicInfo;
 
@@ -52,7 +52,7 @@ public:
   TargetIRAnalysis getTargetIRAnalysis() override;
 
   TargetLoweringObjectFile *getObjFileLowering() const override {
-    return TLOF;
+    return TLOF.get();
   }
 };
 
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
new file mode 100644
index 000000000000..e050f21091ba
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -0,0 +1,87 @@
+//===-- AMDGPUHSATargetObjectFile.cpp - AMDGPU Object Files ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetObjectFile.h"
+#include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Object File
+//===----------------------------------------------------------------------===//
+
+MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                                          SectionKind Kind,
+                                                          Mangler &Mang,
+                                                const TargetMachine &TM) const {
+  if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+    return TextSection;
+
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+//===----------------------------------------------------------------------===//
+// HSA Object File
+//===----------------------------------------------------------------------===//
+
+
+void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx,
+                                           const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+
+  TextSection = AMDGPU::getHSATextSection(Ctx);
+
+  DataGlobalAgentSection = AMDGPU::getHSADataGlobalAgentSection(Ctx);
+  DataGlobalProgramSection = AMDGPU::getHSADataGlobalProgramSection(Ctx);
+
+  RodataReadonlyAgentSection = AMDGPU::getHSARodataReadonlyAgentSection(Ctx);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocationSection(
+    const char *SectionName) const {
+  return cast<MCSectionELF>(DataGlobalAgentSection)
+      ->getSectionName()
+      .equals(SectionName);
+}
+
+bool AMDGPUHSATargetObjectFile::isAgentAllocation(const GlobalValue *GV) const {
+  // Read-only segments can only have agent allocation.
+  return AMDGPU::isReadOnlySegment(GV) ||
+         (AMDGPU::isGlobalSegment(GV) && GV->hasSection() &&
+          isAgentAllocationSection(GV->getSection()));
+}
+
+bool AMDGPUHSATargetObjectFile::isProgramAllocation(
+    const GlobalValue *GV) const {
+  // The default for global segments is program allocation.
+  return AMDGPU::isGlobalSegment(GV) && !isAgentAllocation(GV);
+}
+
+MCSection *AMDGPUHSATargetObjectFile::SelectSectionForGlobal(
+                                        const GlobalValue *GV, SectionKind Kind,
+                                        Mangler &Mang,
+                                        const TargetMachine &TM) const {
+  if (Kind.isText() && !GV->hasComdat())
+    return getTextSection();
+
+  if (AMDGPU::isGlobalSegment(GV)) {
+    if (isAgentAllocation(GV))
+      return DataGlobalAgentSection;
+
+    if (isProgramAllocation(GV))
+      return DataGlobalProgramSection;
+  }
+
+  return AMDGPUTargetObjectFile::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
new file mode 100644
index 000000000000..921341ebb897
--- /dev/null
+++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -0,0 +1,51 @@
+//===-- AMDGPUTargetObjectFile.h - AMDGPU  Object Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the AMDGPU-specific subclass of
+/// TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
+  public:
+    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                      Mangler &Mang,
+                                      const TargetMachine &TM) const override;
+};
+
+class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile {
+private:
+  MCSection *DataGlobalAgentSection;
+  MCSection *DataGlobalProgramSection;
+  MCSection *RodataReadonlyAgentSection;
+
+  bool isAgentAllocationSection(const char *SectionName) const;
+  bool isAgentAllocation(const GlobalValue *GV) const;
+  bool isProgramAllocation(const GlobalValue *GV) const;
+
+public:
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                                    Mangler &Mang,
+                                    const TargetMachine &TM) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 6dacc742b129..54a003d6a9cf 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -74,9 +74,109 @@ unsigned AMDGPUTTIImpl::getNumberOfRegisters(bool Vec) {
   return 4 * 128; // XXX - 4 channels. Should these count as vector instead?
 }
 
-unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool) { return 32; }
+unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
+  return Vector ? 0 : 32;
+}
 
 unsigned AMDGPUTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   // Semi-arbitrary large amount.
   return 64;
 }
+
+unsigned AMDGPUTTIImpl::getCFInstrCost(unsigned Opcode) {
+  // XXX - For some reason this isn't called for switch.
+  switch (Opcode) {
+  case Instruction::Br:
+  case Instruction::Ret:
+    return 10;
+  default:
+    return BaseT::getCFInstrCost(Opcode);
+  }
+}
+
+int AMDGPUTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                      unsigned Index) {
+  switch (Opcode) {
+  case Instruction::ExtractElement:
+    // Dynamic indexing isn't free and is best avoided.
+    return Index == ~0u ? 2 : 0;
+  default:
+    return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
+  }
+}
+
+static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
+                                          const IntrinsicInst *I) {
+  switch (I->getIntrinsicID()) {
+  default:
+    return false;
+  case Intrinsic::not_intrinsic:
+    // This means we have an intrinsic that isn't defined in
+    // IntrinsicsAMDGPU.td
+    break;
+
+  case Intrinsic::amdgcn_interp_p1:
+  case Intrinsic::amdgcn_interp_p2:
+  case Intrinsic::amdgcn_mbcnt_hi:
+  case Intrinsic::amdgcn_mbcnt_lo:
+  case Intrinsic::r600_read_tidig_x:
+  case Intrinsic::r600_read_tidig_y:
+  case Intrinsic::r600_read_tidig_z:
+    return true;
+  }
+
+  StringRef Name = I->getCalledFunction()->getName();
+  switch (TII->lookupName((const char *)Name.bytes_begin(), Name.size())) {
+  default:
+    return false;
+  case AMDGPUIntrinsic::SI_tid:
+  case AMDGPUIntrinsic::SI_fs_interp:
+    return true;
+  }
+}
+
+static bool isArgPassedInSGPR(const Argument *A) {
+  const Function *F = A->getParent();
+  unsigned ShaderType = AMDGPU::getShaderType(*F);
+
+  // Arguments to compute shaders are never a source of divergence.
+  if (ShaderType == ShaderType::COMPUTE)
+    return true;
+
+  // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
+  if (F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::InReg) ||
+      F->getAttributes().hasAttribute(A->getArgNo() + 1, Attribute::ByVal))
+    return true;
+
+  // Everything else is in VGPRs.
+  return false;
+}
+
+///
+/// \returns true if the result of the value could potentially be
+/// different across workitems in a wavefront.
+bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const {
+
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return !isArgPassedInSGPR(A);
+
+  // Loads from the private address space are divergent, because threads
+  // can execute the load instruction with the same inputs and get different
+  // results.
+  //
+  // All other loads are not divergent, because if threads issue loads with the
+  // same arguments, they will always get the same result.
+  if (const LoadInst *Load = dyn_cast<LoadInst>(V))
+    return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+  if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
+    const TargetMachine &TM = getTLI()->getTargetMachine();
+    return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic);
+  }
+
+  // Assume all function calls are a source of divergence.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V))
+    return true;
+
+  return false;
+}
diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index dee0a69d1e68..976afb03443b 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -60,6 +60,11 @@ public:
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
   unsigned getMaxInterleaveFactor(unsigned VF);
+
+  unsigned getCFInstrCost(unsigned Opcode);
+
+  int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index);
+  bool isSourceOfDivergence(const Value *V) const;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index d918ac3a5b3b..917efd149e00 100644
--- a/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -185,7 +185,7 @@ protected:
   MachinePostDominatorTree *PDT;
   MachineLoopInfo *MLI;
   const R600InstrInfo *TII;
-  const AMDGPURegisterInfo *TRI;
+  const R600RegisterInfo *TRI;
 
   // PRINT FUNCTIONS
   /// Print the ordered Blocks.
@@ -881,7 +881,7 @@ bool AMDGPUCFGStructurizer::run() {
     } //while, "one iteration" over the function.
 
     MachineBasicBlock *EntryMBB =
-        GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+        &*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
     if (EntryMBB->succ_size() == 0) {
       Finish = true;
       DEBUG(
@@ -904,7 +904,7 @@ bool AMDGPUCFGStructurizer::run() {
   } while (!Finish && MakeProgress);
 
   // Misc wrap up to maintain the consistency of the Function representation.
-  wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
+  wrapup(&*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
 
   // Detach retired Block, release memory.
   for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
@@ -1164,7 +1164,7 @@ int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
 
   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
       E = ContMBB.end(); It != E; ++It) {
-    (*It)->removeSuccessor(LoopHeader);
+    (*It)->removeSuccessor(LoopHeader, true);
   }
 
   numLoopcontPatternMatch += NumCont;
@@ -1353,7 +1353,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
     // If MigrateTrue is true, then TrueBB is the block being "branched into"
     // and if MigrateFalse is true, then FalseBB is the block being
     // "branched into"
-    // 
+    //
     // Here is the pseudo code for how I think the optimization should work:
     // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
     // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
@@ -1372,7 +1372,7 @@ int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
     // the late machine optimization passes, however if we implement
     // bool TargetRegisterInfo::requiresRegisterScavenging(
     //                                                const MachineFunction &MF)
-    // and have it return true, liveness will be tracked correctly 
+    // and have it return true, liveness will be tracked correctly
     // by generic optimization passes.  We will also need to make sure that
     // all of our target-specific passes that run after regalloc and before
     // the CFGStructurizer track liveness and we will need to modify this pass
@@ -1487,7 +1487,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
   );
   DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
 
-  DstMBB->removeSuccessor(SrcMBB);
+  DstMBB->removeSuccessor(SrcMBB, true);
   cloneSuccessorList(DstMBB, SrcMBB);
 
   removeSuccessor(SrcMBB);
@@ -1537,9 +1537,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
 
   if (TrueMBB) {
     MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
-    MBB->removeSuccessor(TrueMBB);
+    MBB->removeSuccessor(TrueMBB, true);
     if (LandMBB && TrueMBB->succ_size()!=0)
-      TrueMBB->removeSuccessor(LandMBB);
+      TrueMBB->removeSuccessor(LandMBB, true);
     retireBlock(TrueMBB);
     MLI->removeBlock(TrueMBB);
   }
@@ -1548,9 +1548,9 @@ void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
     insertInstrBefore(I, AMDGPU::ELSE);
     MBB->splice(I, FalseMBB, FalseMBB->begin(),
                    FalseMBB->end());
-    MBB->removeSuccessor(FalseMBB);
+    MBB->removeSuccessor(FalseMBB, true);
     if (LandMBB && FalseMBB->succ_size() != 0)
-      FalseMBB->removeSuccessor(LandMBB);
+      FalseMBB->removeSuccessor(LandMBB, true);
     retireBlock(FalseMBB);
     MLI->removeBlock(FalseMBB);
   }
@@ -1570,8 +1570,7 @@ void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
 
   insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
   insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
-  DstBlk->addSuccessor(LandMBB);
-  DstBlk->removeSuccessor(DstBlk);
+  DstBlk->replaceSuccessor(DstBlk, LandMBB);
 }
 
 
@@ -1592,7 +1591,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
   //now branchInst can be erase safely
   BranchMI->eraseFromParent();
   //now take care of successors, retire blocks
-  ExitingMBB->removeSuccessor(LandMBB);
+  ExitingMBB->removeSuccessor(LandMBB, true);
 }
 
 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
@@ -1666,8 +1665,7 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
   replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
   //srcBlk, oldBlk, newBlk
 
-  PredMBB->removeSuccessor(MBB);
-  PredMBB->addSuccessor(CloneMBB);
+  PredMBB->replaceSuccessor(MBB, CloneMBB);
 
   // add all successor to cloneBlk
   cloneSuccessorList(CloneMBB, MBB);
@@ -1695,10 +1693,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
     );
     SpliceEnd = SrcMBB->end();
   } else {
-    DEBUG(
-      dbgs() << "migrateInstruction see branch instr\n" ;
-      BranchMI->dump();
-    );
+    DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
     SpliceEnd = BranchMI;
   }
   DEBUG(
@@ -1711,7 +1706,7 @@ void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
 
   DEBUG(
     dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
-      << "srcSize = " << SrcMBB->size() << "\n";
+      << "srcSize = " << SrcMBB->size() << '\n';
   );
 }
 
@@ -1743,7 +1738,7 @@ void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
   while ((BranchMI = getLoopendBlockBranchInstr(MBB))
           && isUncondBranch(BranchMI)) {
-    DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
+    DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
     BranchMI->eraseFromParent();
   }
 }
@@ -1759,10 +1754,10 @@ void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
 
   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
   assert(BranchMI && isCondBranch(BranchMI));
-  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
+  DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
   BranchMI->eraseFromParent();
   SHOWNEWBLK(MBB1, "Removing redundant successor");
-  MBB->removeSuccessor(MBB1);
+  MBB->removeSuccessor(MBB1, true);
 }
 
 void AMDGPUCFGStructurizer::addDummyExitBlock(
diff --git a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2018983bc306..d9f753f40133 100644
--- a/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -28,7 +28,9 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -83,6 +85,7 @@ public:
     unsigned RegNo;
     int Modifiers;
     const MCRegisterInfo *TRI;
+    const MCSubtargetInfo *STI;
     bool IsForcedVOP3;
   };
 
@@ -102,7 +105,7 @@ public:
   }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createReg(getReg()));
+    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
   }
 
   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
@@ -215,6 +218,10 @@ public:
            (isReg() && isRegClass(AMDGPU::SReg_64RegClassID));
   }
 
+  bool isSCSrc64() const {
+    return (isReg() && isRegClass(AMDGPU::SReg_64RegClassID)) || isInlineImm();
+  }
+
   bool isVCSrc32() const {
     return isInlineImm() || (isReg() && isRegClass(AMDGPU::VS_32RegClassID));
   }
@@ -251,7 +258,22 @@ public:
     return EndLoc;
   }
 
-  void print(raw_ostream &OS) const override { }
+  void print(raw_ostream &OS) const override {
+    switch (Kind) {
+    case Register:
+      OS << "<register " << getReg() << " mods: " << Reg.Modifiers << '>';
+      break;
+    case Immediate:
+      OS << getImm();
+      break;
+    case Token:
+      OS << '\'' << getToken() << '\'';
+      break;
+    case Expression:
+      OS << "<expr " << *Expr << '>';
+      break;
+    }
+  }
 
   static std::unique_ptr<AMDGPUOperand> CreateImm(int64_t Val, SMLoc Loc,
                                                   enum ImmTy Type = ImmTyNone,
@@ -278,10 +300,12 @@ public:
   static std::unique_ptr<AMDGPUOperand> CreateReg(unsigned RegNo, SMLoc S,
                                                   SMLoc E,
                                                   const MCRegisterInfo *TRI,
+                                                  const MCSubtargetInfo *STI,
                                                   bool ForceVOP3) {
     auto Op = llvm::make_unique<AMDGPUOperand>(Register);
     Op->Reg.RegNo = RegNo;
     Op->Reg.TRI = TRI;
+    Op->Reg.STI = STI;
     Op->Reg.Modifiers = -1;
     Op->Reg.IsForcedVOP3 = ForceVOP3;
     Op->StartLoc = S;
@@ -301,14 +325,32 @@ public:
   bool isDSOffset01() const;
   bool isSWaitCnt() const;
   bool isMubufOffset() const;
+  bool isSMRDOffset() const;
+  bool isSMRDLiteralOffset() const;
 };
 
 class AMDGPUAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
   const MCInstrInfo &MII;
   MCAsmParser &Parser;
 
   unsigned ForcedEncodingSize;
+
+  bool isSI() const {
+    return AMDGPU::isSI(getSTI());
+  }
+
+  bool isCI() const {
+    return AMDGPU::isCI(getSTI());
+  }
+
+  bool isVI() const {
+    return AMDGPU::isVI(getSTI());
+  }
+
+  bool hasSGPR102_SGPR103() const {
+    return !isVI();
+  }
+
   /// @name Auto-generated Match Functions
   /// {
 
@@ -323,20 +365,34 @@ private:
   bool ParseDirectiveHSACodeObjectISA();
   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   bool ParseDirectiveAMDKernelCodeT();
+  bool ParseSectionDirectiveHSAText();
+  bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
+  bool ParseDirectiveAMDGPUHsaKernel();
+  bool ParseDirectiveAMDGPUHsaModuleGlobal();
+  bool ParseDirectiveAMDGPUHsaProgramGlobal();
+  bool ParseSectionDirectiveHSADataGlobalAgent();
+  bool ParseSectionDirectiveHSADataGlobalProgram();
+  bool ParseSectionDirectiveHSARodataReadonlyAgent();
 
 public:
-  AMDGPUAsmParser(MCSubtargetInfo &STI, MCAsmParser &_Parser,
+public:
+  enum AMDGPUMatchResultTy {
+    Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
+  };
+
+  AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
                const MCInstrInfo &MII,
                const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(STI), MII(MII), Parser(_Parser),
-        ForcedEncodingSize(0){
+      : MCTargetAsmParser(Options, STI), MII(MII), Parser(_Parser),
+        ForcedEncodingSize(0) {
+    MCAsmParserExtension::Initialize(Parser);
 
-    if (STI.getFeatureBits().none()) {
+    if (getSTI().getFeatureBits().none()) {
       // Set default features.
-      STI.ToggleFeature("SOUTHERN_ISLANDS");
+      copySTI().ToggleFeature("SOUTHERN_ISLANDS");
     }
 
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
   }
 
   AMDGPUTargetStreamer &getTargetStreamer() {
@@ -420,10 +476,10 @@ struct OptionalOperand {
 
 }
 
-static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
+static int getRegClass(bool IsVgpr, unsigned RegWidth) {
   if (IsVgpr) {
     switch (RegWidth) {
-      default: llvm_unreachable("Unknown register width");
+      default: return -1;
       case 1: return AMDGPU::VGPR_32RegClassID;
       case 2: return AMDGPU::VReg_64RegClassID;
       case 3: return AMDGPU::VReg_96RegClassID;
@@ -434,7 +490,7 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
   }
 
   switch (RegWidth) {
-    default: llvm_unreachable("Unknown register width");
+    default: return -1;
     case 1: return AMDGPU::SGPR_32RegClassID;
     case 2: return AMDGPU::SGPR_64RegClassID;
     case 4: return AMDGPU::SReg_128RegClassID;
@@ -443,16 +499,16 @@ static unsigned getRegClass(bool IsVgpr, unsigned RegWidth) {
   }
 }
 
-static unsigned getRegForName(const StringRef &RegName) {
+static unsigned getRegForName(StringRef RegName) {
 
   return StringSwitch<unsigned>(RegName)
     .Case("exec", AMDGPU::EXEC)
     .Case("vcc", AMDGPU::VCC)
-    .Case("flat_scr", AMDGPU::FLAT_SCR)
+    .Case("flat_scratch", AMDGPU::FLAT_SCR)
     .Case("m0", AMDGPU::M0)
     .Case("scc", AMDGPU::SCC)
-    .Case("flat_scr_lo", AMDGPU::FLAT_SCR_LO)
-    .Case("flat_scr_hi", AMDGPU::FLAT_SCR_HI)
+    .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
+    .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
     .Case("vcc_lo", AMDGPU::VCC_LO)
     .Case("vcc_hi", AMDGPU::VCC_HI)
     .Case("exec_lo", AMDGPU::EXEC_LO)
@@ -464,12 +520,14 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
   const AsmToken Tok = Parser.getTok();
   StartLoc = Tok.getLoc();
   EndLoc = Tok.getEndLoc();
-  const StringRef &RegName = Tok.getString();
+  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+
+  StringRef RegName = Tok.getString();
   RegNo = getRegForName(RegName);
 
   if (RegNo) {
     Parser.Lex();
-    return false;
+    return !subtargetHasRegister(*TRI, RegNo);
   }
 
   // Match vgprs and sgprs
@@ -514,16 +572,24 @@ bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &End
       RegIndexInClass = RegLo;
     } else {
       // SGPR registers are aligned.  Max alignment is 4 dwords.
-      RegIndexInClass = RegLo / std::min(RegWidth, 4u);
+      unsigned Size = std::min(RegWidth, 4u);
+      if (RegLo % Size != 0)
+        return true;
+
+      RegIndexInClass = RegLo / Size;
     }
   }
 
-  const MCRegisterInfo *TRC = getContext().getRegisterInfo();
-  unsigned RC = getRegClass(IsVgpr, RegWidth);
-  if (RegIndexInClass > TRC->getRegClass(RC).getNumRegs())
+  int RCID = getRegClass(IsVgpr, RegWidth);
+  if (RCID == -1)
     return true;
-  RegNo = TRC->getRegClass(RC).getRegister(RegIndexInClass);
-  return false;
+
+  const MCRegisterClass RC = TRI->getRegClass(RCID);
+  if (RegIndexInClass >= RC.getNumRegs())
+    return true;
+
+  RegNo = RC.getRegister(RegIndexInClass);
+  return !subtargetHasRegister(*TRI, RegNo);
 }
 
 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -534,6 +600,11 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)))
     return Match_InvalidOperand;
 
+  if ((TSFlags & SIInstrFlags::VOP3) &&
+      (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
+      getForcedEncodingSize() != 64)
+    return Match_PreferE32;
+
   return Match_Success;
 }
 
@@ -549,7 +620,7 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     default: break;
     case Match_Success:
       Inst.setLoc(IDLoc);
-      Out.EmitInstruction(Inst, STI);
+      Out.EmitInstruction(Inst, getSTI());
       return false;
     case Match_MissingFeature:
       return Error(IDLoc, "instruction not supported on this GPU");
@@ -592,6 +663,9 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       }
       return Error(ErrorLoc, "invalid operand for instruction");
     }
+    case Match_PreferE32:
+      return Error(IDLoc, "internal error: instruction without _e64 suffix "
+                          "should be encoded as e32");
   }
   llvm_unreachable("Implement any new match types added!");
 }
@@ -640,7 +714,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
   // If this directive has no arguments, then use the ISA version for the
   // targeted GPU.
   if (getLexer().is(AsmToken::EndOfStatement)) {
-    AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(STI.getFeatureBits());
+    AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
     getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
                                                       Isa.Stepping,
                                                       "AMD", "AMDGPU");
@@ -852,7 +926,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
 
   amd_kernel_code_t Header;
-  AMDGPU::initDefaultAMDKernelCodeT(Header, STI.getFeatureBits());
+  AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
 
   while (true) {
 
@@ -882,6 +956,64 @@ bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
   return false;
 }
 
+bool AMDGPUAsmParser::ParseSectionDirectiveHSAText() {
+  getParser().getStreamer().SwitchSection(
+      AMDGPU::getHSATextSection(getContext()));
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
+  if (getLexer().isNot(AsmToken::Identifier))
+    return TokError("expected symbol name");
+
+  StringRef KernelName = Parser.getTok().getString();
+
+  getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
+                                           ELF::STT_AMDGPU_HSA_KERNEL);
+  Lex();
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaModuleGlobal() {
+  if (getLexer().isNot(AsmToken::Identifier))
+    return TokError("expected symbol name");
+
+  StringRef GlobalName = Parser.getTok().getIdentifier();
+
+  getTargetStreamer().EmitAMDGPUHsaModuleScopeGlobal(GlobalName);
+  Lex();
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaProgramGlobal() {
+  if (getLexer().isNot(AsmToken::Identifier))
+    return TokError("expected symbol name");
+
+  StringRef GlobalName = Parser.getTok().getIdentifier();
+
+  getTargetStreamer().EmitAMDGPUHsaProgramScopeGlobal(GlobalName);
+  Lex();
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalAgent() {
+  getParser().getStreamer().SwitchSection(
+      AMDGPU::getHSADataGlobalAgentSection(getContext()));
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSADataGlobalProgram() {
+  getParser().getStreamer().SwitchSection(
+      AMDGPU::getHSADataGlobalProgramSection(getContext()));
+  return false;
+}
+
+bool AMDGPUAsmParser::ParseSectionDirectiveHSARodataReadonlyAgent() {
+  getParser().getStreamer().SwitchSection(
+      AMDGPU::getHSARodataReadonlyAgentSection(getContext()));
+  return false;
+}
+
 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getString();
 
@@ -894,6 +1026,55 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();
 
+  if (IDVal == ".hsatext" || IDVal == ".text")
+    return ParseSectionDirectiveHSAText();
+
+  if (IDVal == ".amdgpu_hsa_kernel")
+    return ParseDirectiveAMDGPUHsaKernel();
+
+  if (IDVal == ".amdgpu_hsa_module_global")
+    return ParseDirectiveAMDGPUHsaModuleGlobal();
+
+  if (IDVal == ".amdgpu_hsa_program_global")
+    return ParseDirectiveAMDGPUHsaProgramGlobal();
+
+  if (IDVal == ".hsadata_global_agent")
+    return ParseSectionDirectiveHSADataGlobalAgent();
+
+  if (IDVal == ".hsadata_global_program")
+    return ParseSectionDirectiveHSADataGlobalProgram();
+
+  if (IDVal == ".hsarodata_readonly_agent")
+    return ParseSectionDirectiveHSARodataReadonlyAgent();
+
+  return true;
+}
+
+bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
+                                           unsigned RegNo) const {
+  if (isCI())
+    return true;
+
+  if (isSI()) {
+    // No flat_scr
+    switch (RegNo) {
+    case AMDGPU::FLAT_SCR:
+    case AMDGPU::FLAT_SCR_LO:
+    case AMDGPU::FLAT_SCR_HI:
+      return false;
+    default:
+      return true;
+    }
+  }
+
+  // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
+  // SI/CI have.
+  for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
+       R.isValid(); ++R) {
+    if (*R == RegNo)
+      return false;
+  }
+
   return true;
 }
 
@@ -943,13 +1124,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       int64_t IntVal;
       if (getParser().parseAbsoluteExpression(IntVal))
         return MatchOperand_ParseFail;
-      APInt IntVal32(32, IntVal);
-      if (IntVal32.getSExtValue() != IntVal) {
+      if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
         Error(S, "invalid immediate: only 32-bit values are legal");
         return MatchOperand_ParseFail;
       }
 
-      IntVal = IntVal32.getSExtValue();
       if (Negate)
         IntVal *= -1;
       Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
@@ -1002,7 +1181,7 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
 
         Operands.push_back(AMDGPUOperand::CreateReg(
-            RegNo, S, E, getContext().getRegisterInfo(),
+            RegNo, S, E, getContext().getRegisterInfo(), &getSTI(),
             isForcedVOP3()));
 
         if (HasModifiers || Modifiers) {
@@ -1570,6 +1749,23 @@ AMDGPUAsmParser::parseR128(OperandVector &Operands) {
   return parseNamedBit("r128", Operands);
 }
 
+//===----------------------------------------------------------------------===//
+// smrd
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isSMRDOffset() const {
+
+  // FIXME: Support 20-bit offsets on VI.  We need to to pass subtarget
+  // information here.
+  return isImm() && isUInt<8>(getImm());
+}
+
+bool AMDGPUOperand::isSMRDLiteralOffset() const {
+  // 32-bit literals are only supported on CI and we only want to use them
+  // when the offset is > 8-bits.
+  return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
+}
+
 //===----------------------------------------------------------------------===//
 // vop3
 //===----------------------------------------------------------------------===//
@@ -1653,8 +1849,12 @@ AMDGPUAsmParser::parseVOP3OptionalOps(OperandVector &Operands) {
 }
 
 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
-  ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
-  unsigned i = 2;
+
+  unsigned i = 1;
+  const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
+  if (Desc.getNumDefs() > 0) {
+    ((AMDGPUOperand &)*Operands[i++]).addRegOperands(Inst, 1);
+  }
 
   std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
 
diff --git a/lib/Target/AMDGPU/CIInstructions.td b/lib/Target/AMDGPU/CIInstructions.td
index 2f5fdbe92078..88a090d3df35 100644
--- a/lib/Target/AMDGPU/CIInstructions.td
+++ b/lib/Target/AMDGPU/CIInstructions.td
@@ -8,6 +8,22 @@
 //===----------------------------------------------------------------------===//
 // Instruction definitions for CI and newer.
 //===----------------------------------------------------------------------===//
+// Remaining instructions:
+// S_CBRANCH_CDBGUSER
+// S_CBRANCH_CDBGSYS
+// S_CBRANCH_CDBGSYS_OR_USER
+// S_CBRANCH_CDBGSYS_AND_USER
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
 
 
 def isCIVI : Predicate <
@@ -23,6 +39,7 @@ def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
 
 let SubtargetPredicate = isCIVI in {
 
+let SchedRW = [WriteDoubleAdd] in {
 defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
   VOP_F64_F64, ftrunc
 >;
@@ -35,82 +52,218 @@ defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
 defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
   VOP_F64_F64, frint
 >;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
 defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
   VOP_F32_F32
 >;
 defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
   VOP_F32_F32
 >;
+} // End SchedRW = [WriteQuarterRate32]
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
+  VOP_I32_I32_I32
+>;
+defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
+  VOP_I32_I32_I32
+>;
+defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
+  VOP_I32_I32_I32
+>;
+
+let isCommutable = 1 in {
+defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
+  VOP_I64_I32_I32_I64
+>;
+
+// XXX - Does this set VCC?
+defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
+  VOP_I64_I32_I32_I64
+>;
+} // End isCommutable = 1
+
+
+//===----------------------------------------------------------------------===//
+// DS Instructions
+//===----------------------------------------------------------------------===//
+defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
+
+// DS_CONDXCHG32_RTN_B64
+// DS_CONDXCHG32_RTN_B128
+
+//===----------------------------------------------------------------------===//
+// SMRD Instructions
+//===----------------------------------------------------------------------===//
+
+defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
+  "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
+
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
+
+defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
+  "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
+>;
 
 //===----------------------------------------------------------------------===//
 // Flat Instructions
 //===----------------------------------------------------------------------===//
 
-def FLAT_LOAD_UBYTE : FLAT_Load_Helper <0x8, "flat_load_ubyte", VGPR_32>;
-def FLAT_LOAD_SBYTE : FLAT_Load_Helper <0x9, "flat_load_sbyte", VGPR_32>;
-def FLAT_LOAD_USHORT : FLAT_Load_Helper <0xa, "flat_load_ushort", VGPR_32>;
-def FLAT_LOAD_SSHORT : FLAT_Load_Helper <0xb, "flat_load_sshort", VGPR_32>;
-def FLAT_LOAD_DWORD : FLAT_Load_Helper <0xc, "flat_load_dword", VGPR_32>;
-def FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <0xd, "flat_load_dwordx2", VReg_64>;
-def FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <0xe, "flat_load_dwordx4", VReg_128>;
-def FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <0xf, "flat_load_dwordx3", VReg_96>;
-def FLAT_STORE_BYTE : FLAT_Store_Helper <0x18, "flat_store_byte", VGPR_32>;
-def FLAT_STORE_SHORT : FLAT_Store_Helper <0x1a, "flat_store_short", VGPR_32>;
-def FLAT_STORE_DWORD : FLAT_Store_Helper <0x1c, "flat_store_dword", VGPR_32>;
-def FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
-  0x1d, "flat_store_dwordx2", VReg_64
+defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
+  flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
 >;
-def FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
-  0x1e, "flat_store_dwordx4", VReg_128
+defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
+  flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
 >;
-def FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
-  0x1f, "flat_store_dwordx3", VReg_96
+defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
+  flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
+>;
+defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
+  flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
+;
+defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
+  flat<0xc, 0x14>, "flat_load_dword", VGPR_32
+>;
+defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
+  flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
+>;
+defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
+  flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
+>;
+defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
+  flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
+>;
+defm FLAT_STORE_BYTE : FLAT_Store_Helper <
+  flat<0x18>, "flat_store_byte", VGPR_32
+>;
+defm FLAT_STORE_SHORT : FLAT_Store_Helper <
+  flat <0x1a>, "flat_store_short", VGPR_32
+>;
+defm FLAT_STORE_DWORD : FLAT_Store_Helper <
+  flat<0x1c>, "flat_store_dword", VGPR_32
+>;
+defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
+  flat<0x1d>, "flat_store_dwordx2", VReg_64
+>;
+defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
+  flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
+>;
+defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
+  flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
+>;
+defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
+  flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32
 >;
-defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <0x30, "flat_atomic_swap", VGPR_32>;
 defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
-  0x31, "flat_atomic_cmpswap", VGPR_32, VReg_64
+  flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, VReg_64
 >;
-defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <0x32, "flat_atomic_add", VGPR_32>;
-defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <0x33, "flat_atomic_sub", VGPR_32>;
-defm FLAT_ATOMIC_RSUB : FLAT_ATOMIC <0x34, "flat_atomic_rsub", VGPR_32>;
-defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <0x35, "flat_atomic_smin", VGPR_32>;
-defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <0x36, "flat_atomic_umin", VGPR_32>;
-defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <0x37, "flat_atomic_smax", VGPR_32>;
-defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <0x38, "flat_atomic_umax", VGPR_32>;
-defm FLAT_ATOMIC_AND : FLAT_ATOMIC <0x39, "flat_atomic_and", VGPR_32>;
-defm FLAT_ATOMIC_OR : FLAT_ATOMIC <0x3a, "flat_atomic_or", VGPR_32>;
-defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <0x3b, "flat_atomic_xor", VGPR_32>;
-defm FLAT_ATOMIC_INC : FLAT_ATOMIC <0x3c, "flat_atomic_inc", VGPR_32>;
-defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <0x3d, "flat_atomic_dec", VGPR_32>;
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
-  0x3e, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
+  flat<0x32, 0x42>, "flat_atomic_add", VGPR_32
+>;
+defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
+  flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32
+>;
+defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
+  flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32
+>;
+defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
+  flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32
+>;
+defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
+  flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32
+>;
+defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
+  flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32
+>;
+defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
+  flat<0x39, 0x48>, "flat_atomic_and", VGPR_32
+>;
+defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
+  flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32
+>;
+defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
+  flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32
+>;
+defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
+  flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32
+>;
+defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
+  flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32
+>;
+defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
+  flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64
 >;
-defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <0x3f, "flat_atomic_fmin", VGPR_32>;
-defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <0x40, "flat_atomic_fmax", VGPR_32>;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <0x50, "flat_atomic_swap_x2", VReg_64>;
 defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
-  0x51, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
+  flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, VReg_128
 >;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <0x52, "flat_atomic_add_x2", VReg_64>;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <0x53, "flat_atomic_sub_x2", VReg_64>;
-defm FLAT_ATOMIC_RSUB_X2 : FLAT_ATOMIC <0x54, "flat_atomic_rsub_x2", VReg_64>;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <0x55, "flat_atomic_smin_x2", VReg_64>;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <0x56, "flat_atomic_umin_x2", VReg_64>;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <0x57, "flat_atomic_smax_x2", VReg_64>;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <0x58, "flat_atomic_umax_x2", VReg_64>;
-defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <0x59, "flat_atomic_and_x2", VReg_64>;
-defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <0x5a, "flat_atomic_or_x2", VReg_64>;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <0x5b, "flat_atomic_xor_x2", VReg_64>;
-defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <0x5c, "flat_atomic_inc_x2", VReg_64>;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <0x5d, "flat_atomic_dec_x2", VReg_64>;
-defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
-  0x5e, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
+  flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
+  flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
+  flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
+  flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
+  flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
+  flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64
+>;
+defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
+  flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64
+>;
+defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
+  flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64
+>;
+defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
+  flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64
+>;
+defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
+  flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64
+>;
+defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
+  flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64
 >;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <0x5f, "flat_atomic_fmin_x2", VReg_64>;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <0x60, "flat_atomic_fmax_x2", VReg_64>;
 
 } // End SubtargetPredicate = isCIVI
 
+// CI Only flat instructions
+
+let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst in {
+
+defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
+  flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, VReg_64
+>;
+defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
+  flat<0x3f>, "flat_atomic_fmin", VGPR_32
+>;
+defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
+  flat<0x40>, "flat_atomic_fmax", VGPR_32
+>;
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
+  flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, VReg_128
+>;
+defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
+  flat<0x5f>, "flat_atomic_fmin_x2", VReg_64
+>;
+defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
+  flat<0x60>, "flat_atomic_fmax_x2", VReg_64
+>;
+
+} // End let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst
+
 //===----------------------------------------------------------------------===//
 // Flat Patterns
 //===----------------------------------------------------------------------===//
@@ -147,3 +300,80 @@ def : FLATStore_Pattern <FLAT_STORE_DWORDX4, v4i32, flat_store>;
 
 } // End HasFlatAddressSpace predicate
 
+let Predicates = [isCI] in {
+
+// Convert (x - floor(x)) to fract(x)
+def : Pat <
+  (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
+             (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
+  (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+// Convert (x + (-floor(x))) to fract(x)
+def : Pat <
+  (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
+             (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
+  (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+} // End Predicates = [isCI]
+
+
+//===----------------------------------------------------------------------===//
+// Patterns to generate flat for global
+//===----------------------------------------------------------------------===//
+
+def useFlatForGlobal : Predicate <
+  "Subtarget->useFlatForGlobal() || "
+  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">;
+
+let Predicates = [useFlatForGlobal] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
+>;
+
+// Patterns for global loads with no offset
+class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+  (vt (node i64:$addr)),
+  (inst $addr, 0, 0, 0)
+>;
+
+def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
+
+class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+  (node vt:$data, i64:$addr),
+  (inst $data, $addr, 0, 0, 0)
+>;
+
+def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
+
+class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
+  (vt (node i64:$addr, vt:$data)),
+  (inst $addr, $data, 0, 0)
+>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+} // End Predicates = [useFlatForGlobal]
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 9460bf6b9338..30bb0e0adde8 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -15,12 +15,17 @@ add_public_tablegen_target(AMDGPUCommonTableGen)
 add_llvm_target(AMDGPUCodeGen
   AMDILCFGStructurizer.cpp
   AMDGPUAlwaysInlinePass.cpp
+  AMDGPUAnnotateKernelFeatures.cpp
+  AMDGPUAnnotateUniformValues.cpp
   AMDGPUAsmPrinter.cpp
+  AMDGPUDiagnosticInfoUnsupported.cpp
   AMDGPUFrameLowering.cpp
+  AMDGPUTargetObjectFile.cpp
   AMDGPUIntrinsicInfo.cpp
   AMDGPUISelDAGToDAG.cpp
   AMDGPUMCInstLower.cpp
   AMDGPUMachineFunction.cpp
+  AMDGPUOpenCLImageTypeLoweringPass.cpp
   AMDGPUSubtarget.cpp
   AMDGPUTargetMachine.cpp
   AMDGPUTargetTransformInfo.cpp
@@ -45,6 +50,7 @@ add_llvm_target(AMDGPUCodeGen
   SIFixSGPRCopies.cpp
   SIFixSGPRLiveRanges.cpp
   SIFoldOperands.cpp
+  SIFrameLowering.cpp
   SIInsertWaits.cpp
   SIInstrInfo.cpp
   SIISelLowering.cpp
@@ -52,7 +58,6 @@ add_llvm_target(AMDGPUCodeGen
   SILowerControlFlow.cpp
   SILowerI1Copies.cpp
   SIMachineFunctionInfo.cpp
-  SIPrepareScratchRegs.cpp
   SIRegisterInfo.cpp
   SIShrinkInstructions.cpp
   SITypeRewriter.cpp
diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td
index ba4df82a6d37..a6c3785c815b 100644
--- a/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/lib/Target/AMDGPU/CaymanInstructions.td
@@ -82,6 +82,10 @@ def RAT_STORE_DWORD32 : RAT_STORE_DWORD <R600_TReg32_X, i32, 0x1>;
 def RAT_STORE_DWORD64 : RAT_STORE_DWORD <R600_Reg64, v2i32, 0x3>;
 def RAT_STORE_DWORD128 : RAT_STORE_DWORD <R600_Reg128, v4i32, 0xf>;
 
+def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> {
+  let eop = 0; // This bit is not used on Cayman.
+}
+
 class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
     : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
 
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 7adcd46fe196..779a14e95d22 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -40,6 +40,15 @@ class CF_MEM_RAT <bits<6> rat_inst, bits<4> rat_id, dag ins, string name,
     : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins,
                  "MEM_RAT "#name, pattern>;
 
+class CF_MEM_RAT_STORE_TYPED<bits<1> has_eop>
+    : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr,
+                           i32imm:$rat_id, InstFlag:$eop),
+                  "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr"
+                               #!if(has_eop, ", $eop", ""),
+                  [(int_r600_rat_store_typed R600_Reg128:$rw_gpr,
+                                             R600_Reg128:$index_gpr,
+                                             (i32 imm:$rat_id))]>;
+
 def RAT_MSKOR : CF_MEM_RAT <0x11, 0,
   (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr),
   "MSKOR $rw_gpr.XW, $index_gpr",
@@ -105,6 +114,8 @@ def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf,
   [(global_store v4i32:$rw_gpr, i32:$index_gpr)]
 >;
 
+def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>;
+
 } // End usesCustomInserter = 1
 
 class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index e811d5cff221..a187de88f639 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -283,8 +284,13 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
     O << "4.0";
   else if (Imm == DoubleToBits(-4.0))
     O << "-4.0";
-  else
-    llvm_unreachable("64-bit literal constants not supported");
+  else {
+    assert(isUInt<32>(Imm));
+
+    // In rare situations, we will have a 32-bit literal in a 64-bit
+    // operand. This is technically allowed for the encoding of s_mov_b64.
+    O << formatHex(static_cast<uint64_t>(Imm));
+  }
 }
 
 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -592,11 +598,11 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
     } else {
       unsigned Stream = (SImm16 >> 8) & 0x3;
       if (Op == 1)
-	O << "cut";
+        O << "cut";
       else if (Op == 2)
-	O << "emit";
+        O << "emit";
       else if (Op == 3)
-	O << "emit-cut";
+        O << "emit-cut";
       O << " stream " << Stream;
     }
     O << "), [m0] ";
diff --git a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index 14fb511e9232..90541d86132d 100644
--- a/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -13,9 +13,7 @@
 #ifndef LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
 #define LLVM_LIB_TARGET_R600_INSTPRINTER_AMDGPUINSTPRINTER_H
 
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 4434d9b119c6..60e8c8f3d303 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -99,14 +99,22 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 
     case AMDGPU::fixup_si_rodata: {
       uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
-      *Dst = Value;
-      break;
-    }
-
-    case AMDGPU::fixup_si_end_of_text: {
-      uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset());
-      // The value points to the last instruction in the text section, so we
-      // need to add 4 bytes to get to the start of the constants.
+      // We emit constant data at the end of the text section and generate its
+      // address using the following code sequence:
+      // s_getpc_b64 s[0:1]
+      // s_add_u32 s0, s0, $symbol
+      // s_addc_u32 s1, s1, 0
+      //
+      // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+      // the fixup replaces $symbol with a literal constant, which is a
+      // pc-relative  offset from the encoding of the $symbol operand to the
+      // constant data.
+      //
+      // What we want here is an offset from the start of the s_add_u32
+      // instruction to the constant data, but since the encoding of $symbol
+      // starts 4 bytes after the start of the add instruction, we end up
+      // with an offset that is 4 bytes too small.  This requires us to
+      // add 4 to the fixup value before applying it.
       *Dst = Value + 4;
       break;
     }
@@ -136,8 +144,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
   const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = {
     // name                   offset bits  flags
     { "fixup_si_sopp_br",     0,     16,   MCFixupKindInfo::FKF_IsPCRel },
-    { "fixup_si_rodata",      0,     32,   0 },
-    { "fixup_si_end_of_text", 0,     32,   MCFixupKindInfo::FKF_IsPCRel }
+    { "fixup_si_rodata",      0,     32,   MCFixupKindInfo::FKF_IsPCRel }
   };
 
   if (Kind < FirstTargetFixupKind)
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
new file mode 100644
index 000000000000..9ff9fe794d2b
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.cpp
@@ -0,0 +1,26 @@
+//===-------- AMDGPUELFStreamer.cpp - ELF Object Output -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUELFStreamer.h"
+#include "Utils/AMDGPUBaseInfo.h"
+
+using namespace llvm;
+
+void AMDGPUELFStreamer::InitSections(bool NoExecStack) {
+  // Start with the .hsatext section by default.
+  SwitchSection(AMDGPU::getHSATextSection(getContext()));
+}
+
+MCELFStreamer *llvm::createAMDGPUELFStreamer(MCContext &Context,
+                                           MCAsmBackend &MAB,
+                                           raw_pwrite_stream &OS,
+                                           MCCodeEmitter *Emitter,
+                                           bool RelaxAll) {
+  return new AMDGPUELFStreamer(Context, MAB, OS, Emitter);
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
new file mode 100644
index 000000000000..488d7e74d741
--- /dev/null
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFStreamer.h
@@ -0,0 +1,40 @@
+//===-------- AMDGPUELFStreamer.h - ELF Object Output ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a custom MCELFStreamer which allows us to insert some hooks before
+// emitting data into an actual object file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUELFSTREAMER_H
+
+#include "llvm/MC/MCELFStreamer.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCSubtargetInfo;
+
+class AMDGPUELFStreamer : public MCELFStreamer {
+public:
+  AMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                  MCCodeEmitter *Emitter)
+      : MCELFStreamer(Context, MAB, OS, Emitter) { }
+
+  virtual void InitSections(bool NoExecStac) override;
+};
+
+MCELFStreamer *createAMDGPUELFStreamer(MCContext &Context, MCAsmBackend &MAB,
+                                     raw_pwrite_stream &OS,
+                                     MCCodeEmitter *Emitter, bool RelaxAll);
+} // namespace llvm.
+
+#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
index 01021d67ffd9..59a9178082f6 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h
@@ -21,9 +21,6 @@ enum Fixups {
   /// fixup for global addresses with constant initializers
   fixup_si_rodata,
 
-  /// fixup for offset from instruction to end of text section
-  fixup_si_end_of_text,
-
   // Marker
   LastTargetFixupKind,
   NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 028a86dfc7ad..68b1d1ae83cc 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -22,13 +22,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
   InlineAsmEnd = ";#ASMEND";
 
   //===--- Data Emission Directives -------------------------------------===//
-  ZeroDirective = ".zero";
-  AsciiDirective = ".ascii\t";
-  AscizDirective = ".asciz\t";
-  Data8bitsDirective = ".byte\t";
-  Data16bitsDirective = ".short\t";
-  Data32bitsDirective = ".long\t";
-  Data64bitsDirective = ".quad\t";
   SunStyleELFSectionSwitchSyntax = true;
   UsesELFSectionDirectiveForBSS = true;
 
@@ -41,3 +34,10 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
   //===--- Dwarf Emission Directives -----------------------------------===//
   SupportsDebugInformation = true;
 }
+
+bool AMDGPUMCAsmInfo::shouldOmitSectionDirective(StringRef SectionName) const {
+  return SectionName == ".hsatext" || SectionName == ".hsadata_global_agent" ||
+         SectionName == ".hsadata_global_program" ||
+         SectionName == ".hsarodata_readonly_agent" ||
+         MCAsmInfo::shouldOmitSectionDirective(SectionName);
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
index a5bac51e356f..a546961705d7 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -21,12 +21,13 @@ class Triple;
 
 // If you need to create another MCAsmInfo class, which inherits from MCAsmInfo,
 // you will need to make sure your new class sets PrivateGlobalPrefix to
-// a prefix that won't appeary in a fuction name.  The default value
+// a prefix that won't appear in a function name.  The default value
 // for PrivateGlobalPrefix is 'L', so it will consider any function starting
 // with 'L' as a local symbol.
 class AMDGPUMCAsmInfo : public MCAsmInfoELF {
 public:
   explicit AMDGPUMCAsmInfo(const Triple &TT);
+  bool shouldOmitSectionDirective(StringRef SectionName) const override;
 };
 } // namespace llvm
 #endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index c709741f3777..f70409470276 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUMCTargetDesc.h"
+#include "AMDGPUELFStreamer.h"
 #include "AMDGPUMCAsmInfo.h"
 #include "AMDGPUTargetStreamer.h"
 #include "InstPrinter/AMDGPUInstPrinter.h"
@@ -85,6 +86,15 @@ static MCTargetStreamer * createAMDGPUObjectTargetStreamer(
   return new AMDGPUTargetELFStreamer(S);
 }
 
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll) {
+  if (T.getOS() == Triple::AMDHSA)
+    return createAMDGPUELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+
+  return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+}
+
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
   for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
     RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
@@ -95,6 +105,7 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
     TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
     TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
     TargetRegistry::RegisterMCAsmBackend(*T, createAMDGPUAsmBackend);
+    TargetRegistry::RegisterELFStreamer(*T, createMCStreamer);
   }
 
   // R600 specific registration
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 09e6cb1f1ffc..b91134d2ee9b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -13,6 +13,7 @@
 
 #include "AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFStreamer.h"
@@ -220,6 +221,26 @@ AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
 
 }
 
+void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+                                                   unsigned Type) {
+  switch (Type) {
+    default: llvm_unreachable("Invalid AMDGPU symbol type");
+    case ELF::STT_AMDGPU_HSA_KERNEL:
+      OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
+      break;
+  }
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+    StringRef GlobalName) {
+  OS << "\t.amdgpu_hsa_module_global " << GlobalName << '\n';
+}
+
+void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+    StringRef GlobalName) {
+  OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetELFStreamer
 //===----------------------------------------------------------------------===//
@@ -291,7 +312,35 @@ AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
 
   MCStreamer &OS = getStreamer();
   OS.PushSection();
-  OS.SwitchSection(OS.getContext().getObjectFileInfo()->getTextSection());
+  // The MCObjectFileInfo that is available to the assembler is a generic
+  // implementation and not AMDGPUHSATargetObjectFile, so we can't use
+  // MCObjectFileInfo::getTextSection() here for fetching the HSATextSection.
+  OS.SwitchSection(AMDGPU::getHSATextSection(OS.getContext()));
   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
   OS.PopSection();
 }
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
+                                                   unsigned Type) {
+  MCSymbolELF *Symbol = cast<MCSymbolELF>(
+      getStreamer().getContext().getOrCreateSymbol(SymbolName));
+  Symbol->setType(ELF::STT_AMDGPU_HSA_KERNEL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaModuleScopeGlobal(
+    StringRef GlobalName) {
+
+  MCSymbolELF *Symbol = cast<MCSymbolELF>(
+      getStreamer().getContext().getOrCreateSymbol(GlobalName));
+  Symbol->setType(ELF::STT_OBJECT);
+  Symbol->setBinding(ELF::STB_LOCAL);
+}
+
+void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
+    StringRef GlobalName) {
+
+  MCSymbolELF *Symbol = cast<MCSymbolELF>(
+      getStreamer().getContext().getOrCreateSymbol(GlobalName));
+  Symbol->setType(ELF::STT_OBJECT);
+  Symbol->setBinding(ELF::STB_GLOBAL);
+}
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index d37677c6b863..83bb728f541c 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -7,6 +7,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_R600_MCTARGETDESC_AMDGPUTARGETSTREAMER_H
+
 #include "AMDKernelCodeT.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -27,6 +30,12 @@ public:
                                              StringRef ArchName) = 0;
 
   virtual void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) = 0;
+
+  virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) = 0;
+
+  virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
+
+  virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
 };
 
 class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -41,6 +50,12 @@ public:
                                      StringRef ArchName) override;
 
   void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
+
+  void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 };
 
 class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
@@ -72,6 +87,12 @@ public:
 
   void EmitAMDKernelCodeT(const amd_kernel_code_t &Header) override;
 
+  void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type) override;
+
+  void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
+
+  void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 };
 
 }
+#endif
diff --git a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
index 8306a051ff98..c823ee7e0080 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
@@ -2,6 +2,7 @@
 add_llvm_library(LLVMAMDGPUDesc
   AMDGPUAsmBackend.cpp
   AMDGPUELFObjectWriter.cpp
+  AMDGPUELFStreamer.cpp
   AMDGPUMCCodeEmitter.cpp
   AMDGPUMCTargetDesc.cpp
   AMDGPUMCAsmInfo.cpp
diff --git a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
index 4217bb362975..aa9a02198d04 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = AMDGPUDesc
 parent = AMDGPU
-required_libraries = MC AMDGPUAsmPrinter AMDGPUInfo Support
+required_libraries = MC AMDGPUAsmPrinter AMDGPUInfo AMDGPUUtils Support
 add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index e683498d52a5..3c1142dd664b 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -37,7 +37,6 @@ class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
   const MCRegisterInfo &MRI;
 
 public:
-
   R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
     : MCII(mcii), MRI(mri) { }
 
@@ -50,8 +49,8 @@ public:
   uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                              SmallVectorImpl<MCFixup> &Fixups,
                              const MCSubtargetInfo &STI) const override;
-private:
 
+private:
   void EmitByte(unsigned int byte, raw_ostream &OS) const;
 
   void Emit(uint32_t value, raw_ostream &OS) const;
@@ -59,7 +58,6 @@ private:
 
   unsigned getHWRegChan(unsigned reg) const;
   unsigned getHWReg(unsigned regNo) const;
-
 };
 
 } // End anonymous namespace
@@ -83,7 +81,7 @@ enum FCInstr {
 
 MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                              const MCRegisterInfo &MRI,
-					     MCContext &Ctx) {
+                                             MCContext &Ctx) {
   return new R600MCCodeEmitter(MCII, MRI);
 }
 
diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 65a0eeba2b16..9eb3dadbc5e2 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -36,7 +36,6 @@ class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
   void operator=(const SIMCCodeEmitter &) = delete;
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
-  MCContext &Ctx;
 
   /// \brief Can this operand also contain immediate values?
   bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
@@ -47,7 +46,7 @@ class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
 public:
   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
                   MCContext &ctx)
-    : MCII(mcii), MRI(mri), Ctx(ctx) { }
+    : MCII(mcii), MRI(mri) { }
 
   ~SIMCCodeEmitter() override {}
 
@@ -250,17 +249,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
 
   if (MO.isExpr()) {
     const MCSymbolRefExpr *Expr = cast<MCSymbolRefExpr>(MO.getExpr());
-    MCFixupKind Kind;
-    const MCSymbol *Sym =
-        Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME));
-
-    if (&Expr->getSymbol() == Sym) {
-      // Add the offset to the beginning of the constant values.
-      Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text;
-    } else {
-      // This is used for constant data stored in .rodata.
-     Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
-    }
+    MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata;
     Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc()));
   }
 
diff --git a/lib/Target/AMDGPU/Processors.td b/lib/Target/AMDGPU/Processors.td
index d9a0723bedc9..a1584a224cbd 100644
--- a/lib/Target/AMDGPU/Processors.td
+++ b/lib/Target/AMDGPU/Processors.td
@@ -142,3 +142,7 @@ def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
 def : ProcessorModel<"fiji", SIQuarterSpeedModel,
   [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
 >;
+
+def : ProcessorModel<"stoney", SIQuarterSpeedModel,
+  [FeatureVolcanicIslands, FeatureISAVersion8_0_1]
+>;
diff --git a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index c8f37f61fc16..bd80bb211b4f 100644
--- a/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -405,8 +405,8 @@ private:
             if (MO.isReg() && MO.isInternalRead())
               MO.setIsInternalRead(false);
           }
-          getLiteral(BI, Literals);
-          ClauseContent.push_back(BI);
+          getLiteral(&*BI, Literals);
+          ClauseContent.push_back(&*BI);
         }
         I = BI;
         DeleteMI->eraseFromParent();
diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp
index 4e4d554f0ee7..124a9c6e0f56 100644
--- a/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -190,6 +190,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM,
   setSchedulingPreference(Sched::Source);
 }
 
+static inline bool isEOP(MachineBasicBlock::iterator I) {
+  return std::next(I)->getOpcode() == AMDGPU::RETURN;
+}
+
 MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
   MachineFunction * MF = BB->getParent();
@@ -276,12 +280,18 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
-    unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
-
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
             .addOperand(MI->getOperand(0))
             .addOperand(MI->getOperand(1))
-            .addImm(EOP); // Set End of program bit
+            .addImm(isEOP(I)); // Set End of program bit
+    break;
+  }
+  case AMDGPU::RAT_STORE_TYPED_eg: {
+    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1))
+            .addOperand(MI->getOperand(2))
+            .addImm(isEOP(I)); // Set End of program bit
     break;
   }
 
@@ -539,7 +549,7 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
         }
       }
     }
-    bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
+    bool EOP = isEOP(I);
     if (!EOP && !isLastInstructionOfItsType)
       return BB;
     unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
@@ -946,6 +956,8 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue Arg = Op.getOperand(0);
   SDLoc DL(Op);
+
+  // TODO: Should this propagate fast-math-flags?
   SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
       DAG.getNode(ISD::FADD, DL, VT,
         DAG.getNode(ISD::FMUL, DL, VT, Arg,
@@ -1936,6 +1948,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
             Arg->getOperand(0).getOperand(Element));
       }
     }
+    break;
   }
 
   case ISD::SELECT_CC: {
diff --git a/lib/Target/AMDGPU/R600InstrInfo.cpp b/lib/Target/AMDGPU/R600InstrInfo.cpp
index 855fa9fe45b2..8b6eea17130b 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -922,7 +922,7 @@ bool
 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
                                    unsigned NumCyles,
                                    unsigned ExtraPredCycles,
-                                   const BranchProbability &Probability) const{
+                                   BranchProbability Probability) const{
   return true;
 }
 
@@ -933,14 +933,14 @@ R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
                                    MachineBasicBlock &FMBB,
                                    unsigned NumFCycles,
                                    unsigned ExtraFCycles,
-                                   const BranchProbability &Probability) const {
+                                   BranchProbability Probability) const {
   return true;
 }
 
 bool
 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
                                          unsigned NumCyles,
-                                         const BranchProbability &Probability)
+                                         BranchProbability Probability)
                                          const {
   return true;
 }
diff --git a/lib/Target/AMDGPU/R600InstrInfo.h b/lib/Target/AMDGPU/R600InstrInfo.h
index dee4c2b9ae31..e7251c31107b 100644
--- a/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/lib/Target/AMDGPU/R600InstrInfo.h
@@ -174,18 +174,18 @@ namespace llvm {
 
   bool
    isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
-                             const BranchProbability &Probability) const override;
+                             BranchProbability Probability) const override;
 
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
                            unsigned ExtraPredCycles,
-                           const BranchProbability &Probability) const override ;
+                           BranchProbability Probability) const override ;
 
   bool
    isProfitableToIfCvt(MachineBasicBlock &TMBB,
                        unsigned NumTCycles, unsigned ExtraTCycles,
                        MachineBasicBlock &FMBB,
                        unsigned NumFCycles, unsigned ExtraFCycles,
-                       const BranchProbability &Probability) const override;
+                       BranchProbability Probability) const override;
 
   bool DefinesPredicate(MachineInstr *MI,
                                   std::vector<MachineOperand> &Pred) const override;
diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td
index 7beed092b3f7..33ef6a4e19ea 100644
--- a/lib/Target/AMDGPU/R600Instructions.td
+++ b/lib/Target/AMDGPU/R600Instructions.td
@@ -1655,7 +1655,7 @@ def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>;
 // ISel Patterns
 //===----------------------------------------------------------------------===//
 
-// CND*_INT Pattterns for f32 True / False values
+// CND*_INT Patterns for f32 True / False values
 
 class CND_INT_f32 <InstR600 cnd, CondCode cc> : Pat <
   (selectcc i32:$src0, 0, f32:$src1, f32:$src2, cc),
diff --git a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index 0c06ccc736d0..5efb3b9fc20e 100644
--- a/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -318,7 +318,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
   MRI = &(Fn.getRegInfo());
   for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
        MBB != MBBe; ++MBB) {
-    MachineBasicBlock *MB = MBB;
+    MachineBasicBlock *MB = &*MBB;
     PreviousRegSeq.clear();
     PreviousRegSeqByReg.clear();
     PreviousRegSeqByUndefCount.clear();
diff --git a/lib/Target/AMDGPU/R600Packetizer.cpp b/lib/Target/AMDGPU/R600Packetizer.cpp
index deee5bc39974..21269613a305 100644
--- a/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -81,11 +81,11 @@ private:
     int LastDstChan = -1;
     do {
       bool isTrans = false;
-      int BISlot = getSlot(BI);
+      int BISlot = getSlot(&*BI);
       if (LastDstChan >= BISlot)
         isTrans = true;
       LastDstChan = BISlot;
-      if (TII->isPredicated(BI))
+      if (TII->isPredicated(&*BI))
         continue;
       int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
       if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
@@ -95,7 +95,7 @@ private:
         continue;
       }
       unsigned Dst = BI->getOperand(DstIdx).getReg();
-      if (isTrans || TII->isTransOnly(BI)) {
+      if (isTrans || TII->isTransOnly(&*BI)) {
         Result[Dst] = AMDGPU::PS;
         continue;
       }
@@ -149,7 +149,7 @@ private:
 public:
   // Ctor.
   R600PacketizerList(MachineFunction &MF, MachineLoopInfo &MLI)
-      : VLIWPacketizerList(MF, MLI, true),
+      : VLIWPacketizerList(MF, MLI, nullptr),
         TII(static_cast<const R600InstrInfo *>(
             MF.getSubtarget().getInstrInfo())),
         TRI(TII->getRegisterInfo()) {
@@ -162,14 +162,14 @@ public:
   }
 
   // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-  bool ignorePseudoInstruction(MachineInstr *MI,
-                               MachineBasicBlock *MBB) override {
+  bool ignorePseudoInstruction(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB) override {
     return false;
   }
 
   // isSoloInstruction - return true if instruction MI can not be packetized
   // with any other instruction, which means that MI itself is a packet.
-  bool isSoloInstruction(MachineInstr *MI) override {
+  bool isSoloInstruction(const MachineInstr *MI) override {
     if (TII->isVector(*MI))
       return true;
     if (!TII->isALUInstr(MI->getOpcode()))
@@ -375,7 +375,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
       // instruction stream until we find the nearest boundary.
       MachineBasicBlock::iterator I = RegionEnd;
       for(;I != MBB->begin(); --I, --RemainingCount) {
-        if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
+        if (TII->isSchedulingBoundary(&*std::prev(I), &*MBB, Fn))
           break;
       }
       I = MBB->begin();
@@ -392,7 +392,7 @@ bool R600Packetizer::runOnMachineFunction(MachineFunction &Fn) {
         continue;
       }
 
-      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
+      Packetizer.PacketizeMIs(&*MBB, &*I, RegionEnd);
       RegionEnd = I;
     }
   }
diff --git a/lib/Target/AMDGPU/R600RegisterInfo.h b/lib/Target/AMDGPU/R600RegisterInfo.h
index 9713e600a721..4f8a129ce4a6 100644
--- a/lib/Target/AMDGPU/R600RegisterInfo.h
+++ b/lib/Target/AMDGPU/R600RegisterInfo.h
@@ -35,7 +35,7 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
 
   /// \brief get the register class of the specified type to use in the
   /// CFGStructurizer
-  const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
+  const TargetRegisterClass *getCFGStructurizerRegClass(MVT VT) const;
 
   const RegClassWeight &
     getRegClassWeight(const TargetRegisterClass *RC) const override;
diff --git a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index ccfbf1bf19ed..fa4d24a2f25a 100644
--- a/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -312,11 +312,10 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
       if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
         Preds.push_back(*PI);
     }
-    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", nullptr, DT,
-                                      LI, false);
+    BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
   }
 
-  CallInst::Create(EndCf, popSaved(), "", BB->getFirstInsertionPt());
+  CallInst::Create(EndCf, popSaved(), "", &*BB->getFirstInsertionPt());
 }
 
 /// \brief Annotate the control flow with intrinsics so the backend can
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 4c3263911c40..7f79dd34f3ba 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -37,7 +37,8 @@ enum {
   MIMG = 1 << 18,
   FLAT = 1 << 19,
   WQM = 1 << 20,
-  VGPRSpill = 1 << 21
+  VGPRSpill = 1 << 21,
+  VOPAsmPrefer32Bit = 1 << 22
 };
 }
 
diff --git a/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
index 5fe8d19426dd..636750dcfba2 100644
--- a/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
+++ b/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
@@ -16,15 +16,9 @@
 
 #include "AMDGPU.h"
 #include "SIInstrInfo.h"
-#include "SIRegisterInfo.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
 
 using namespace llvm;
 
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 23502b45905c..96e37c566240 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -82,22 +82,10 @@ using namespace llvm;
 namespace {
 
 class SIFixSGPRCopies : public MachineFunctionPass {
-
-private:
-  static char ID;
-  const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI,
-                                           const MachineRegisterInfo &MRI,
-                                           unsigned Reg,
-                                           unsigned SubReg) const;
-  const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI,
-                                                 const MachineRegisterInfo &MRI,
-                                                 unsigned Reg,
-                                                 unsigned SubReg) const;
-  bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI,
-                        const MachineRegisterInfo &MRI) const;
-
 public:
-  SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
+  static char ID;
+
+  SIFixSGPRCopies() : MachineFunctionPass(ID) { }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -105,14 +93,23 @@ public:
     return "SI Fix SGPR copies";
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 };
 
 } // End anonymous namespace
 
+INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
+                "SI Fix SGPR copies", false, false)
+
 char SIFixSGPRCopies::ID = 0;
 
-FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) {
-  return new SIFixSGPRCopies(tm);
+char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID;
+
+FunctionPass *llvm::createSIFixSGPRCopiesPass() {
+  return new SIFixSGPRCopies();
 }
 
 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
@@ -128,77 +125,115 @@ static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) {
   return false;
 }
 
-/// This functions walks the use list of Reg until it finds an Instruction
-/// that isn't a COPY returns the register class of that instruction.
-/// \return The register defined by the first non-COPY instruction.
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses(
-                                                 const SIRegisterInfo *TRI,
-                                                 const MachineRegisterInfo &MRI,
-                                                 unsigned Reg,
-                                                 unsigned SubReg) const {
-
-  const TargetRegisterClass *RC
-    = TargetRegisterInfo::isVirtualRegister(Reg) ?
-    MRI.getRegClass(Reg) :
-    TRI->getPhysRegClass(Reg);
-
-  RC = TRI->getSubRegClass(RC, SubReg);
-  for (MachineRegisterInfo::use_instr_iterator
-       I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) {
-    switch (I->getOpcode()) {
-    case AMDGPU::COPY:
-      RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI,
-                                  I->getOperand(0).getReg(),
-                                  I->getOperand(0).getSubReg()));
-      break;
-    }
-  }
-
-  return RC;
-}
-
-const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef(
-                                                 const SIRegisterInfo *TRI,
-                                                 const MachineRegisterInfo &MRI,
-                                                 unsigned Reg,
-                                                 unsigned SubReg) const {
-  if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
-    const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
-    return TRI->getSubRegClass(RC, SubReg);
-  }
-  MachineInstr *Def = MRI.getVRegDef(Reg);
-  if (Def->getOpcode() != AMDGPU::COPY) {
-    return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg);
-  }
-
-  return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(),
-                                   Def->getOperand(1).getSubReg());
-}
-
-bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
-                                      const SIRegisterInfo *TRI,
-                                      const MachineRegisterInfo &MRI) const {
-
+static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
+getCopyRegClasses(const MachineInstr &Copy,
+                  const SIRegisterInfo &TRI,
+                  const MachineRegisterInfo &MRI) {
   unsigned DstReg = Copy.getOperand(0).getReg();
   unsigned SrcReg = Copy.getOperand(1).getReg();
-  unsigned SrcSubReg = Copy.getOperand(1).getSubReg();
 
-  if (!TargetRegisterInfo::isVirtualRegister(DstReg)) {
-    // If the destination register is a physical register there isn't really
-    // much we can do to fix this.
+  const TargetRegisterClass *SrcRC =
+    TargetRegisterInfo::isVirtualRegister(SrcReg) ?
+    MRI.getRegClass(SrcReg) :
+    TRI.getPhysRegClass(SrcReg);
+
+  // We don't really care about the subregister here.
+  // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
+
+  const TargetRegisterClass *DstRC =
+    TargetRegisterInfo::isVirtualRegister(DstReg) ?
+    MRI.getRegClass(DstReg) :
+    TRI.getPhysRegClass(DstReg);
+
+  return std::make_pair(SrcRC, DstRC);
+}
+
+static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC,
+                             const TargetRegisterClass *DstRC,
+                             const SIRegisterInfo &TRI) {
+  return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC);
+}
+
+static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC,
+                             const TargetRegisterClass *DstRC,
+                             const SIRegisterInfo &TRI) {
+  return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC);
+}
+
+// Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE.
+//
+// SGPRx = ...
+// SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+// VGPRz = COPY SGPRy
+//
+// ==>
+//
+// VGPRx = COPY SGPRx
+// VGPRz = REG_SEQUENCE VGPRx, sub0
+//
+// This exposes immediate folding opportunities when materializing 64-bit
+// immediates.
+static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
+                                        const SIRegisterInfo *TRI,
+                                        const SIInstrInfo *TII,
+                                        MachineRegisterInfo &MRI) {
+  assert(MI.isRegSequence());
+
+  unsigned DstReg = MI.getOperand(0).getReg();
+  if (!TRI->isSGPRClass(MRI.getRegClass(DstReg)))
     return false;
+
+  if (!MRI.hasOneUse(DstReg))
+    return false;
+
+  MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg);
+  if (!CopyUse.isCopy())
+    return false;
+
+  const TargetRegisterClass *SrcRC, *DstRC;
+  std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI);
+
+  if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI))
+    return false;
+
+  // TODO: Could have multiple extracts?
+  unsigned SubReg = CopyUse.getOperand(1).getSubReg();
+  if (SubReg != AMDGPU::NoSubRegister)
+    return false;
+
+  MRI.setRegClass(DstReg, DstRC);
+
+  // SGPRx = ...
+  // SGPRy = REG_SEQUENCE SGPRx, sub0 ...
+  // VGPRz = COPY SGPRy
+
+  // =>
+  // VGPRx = COPY SGPRx
+  // VGPRz = REG_SEQUENCE VGPRx, sub0
+
+  MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg());
+
+  for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
+    unsigned SrcReg = MI.getOperand(I).getReg();
+    unsigned SrcSubReg = MI.getOperand(I).getReg();
+
+    const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+    assert(TRI->isSGPRClass(SrcRC) &&
+           "Expected SGPR REG_SEQUENCE to only have SGPR inputs");
+
+    SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
+    const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
+
+    unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC);
+
+    BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg)
+      .addOperand(MI.getOperand(I));
+
+    MI.getOperand(I).setReg(TmpReg);
   }
 
-  const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg);
-
-  const TargetRegisterClass *SrcRC;
-
-  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
-      MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
-    return false;
-
-  SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
-  return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC);
+  CopyUse.eraseFromParent();
+  return true;
 }
 
 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
@@ -207,40 +242,38 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
       static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
   const SIInstrInfo *TII =
       static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  SmallVector<MachineInstr *, 16> Worklist;
+
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
                                                   BI != BE; ++BI) {
 
     MachineBasicBlock &MBB = *BI;
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-                                                      I != E; ++I) {
+         I != E; ++I) {
       MachineInstr &MI = *I;
-      if (MI.getOpcode() == AMDGPU::COPY && isVGPRToSGPRCopy(MI, TRI, MRI)) {
-        DEBUG(dbgs() << "Fixing VGPR -> SGPR copy:\n");
-        DEBUG(MI.print(dbgs()));
-        TII->moveToVALU(MI);
-
-      }
 
       switch (MI.getOpcode()) {
-      default: continue;
+      default:
+        continue;
+      case AMDGPU::COPY: {
+        // If the destination register is a physical register there isn't really
+        // much we can do to fix this.
+        if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()))
+          continue;
+
+        const TargetRegisterClass *SrcRC, *DstRC;
+        std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
+        if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
+          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+          TII->moveToVALU(MI);
+        }
+
+        break;
+      }
       case AMDGPU::PHI: {
         DEBUG(dbgs() << "Fixing PHI: " << MI);
-
-        for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
-          const MachineOperand &Op = MI.getOperand(i);
-          unsigned Reg = Op.getReg();
-          const TargetRegisterClass *RC
-            = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg());
-
-          MRI.constrainRegClass(Op.getReg(), RC);
-        }
         unsigned Reg = MI.getOperand(0).getReg();
-        const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg,
-                                                  MI.getOperand(0).getSubReg());
-        if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) {
-          MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass);
-        }
-
         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
           break;
 
@@ -310,8 +343,10 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
       }
       case AMDGPU::REG_SEQUENCE: {
         if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) ||
-            !hasVGPROperands(MI, TRI))
+            !hasVGPROperands(MI, TRI)) {
+          foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI);
           continue;
+        }
 
         DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI);
 
diff --git a/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp b/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
index 0c54446b0fb1..8bda283f0fca 100644
--- a/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRLiveRanges.cpp
@@ -7,9 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-/// \file
-/// SALU instructions ignore control flow, so we need to modify the live ranges
-/// of the registers they define in some cases.
+/// \file SALU instructions ignore the execution mask, so we need to modify the
+/// live ranges of the registers they define in some cases.
 ///
 /// The main case we need to handle is when a def is used in one side of a
 /// branch and not another.  For example:
@@ -42,13 +41,15 @@
 /// ENDIF
 /// %use
 ///
-/// Adding this use will make the def live thoughout the IF branch, which is
+/// Adding this use will make the def live throughout the IF branch, which is
 /// what we want.
 
 #include "AMDGPU.h"
 #include "SIInstrInfo.h"
 #include "SIRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
@@ -79,9 +80,13 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<LiveIntervals>();
+    AU.addRequired<LiveVariables>();
+    AU.addPreserved<LiveVariables>();
+
     AU.addRequired<MachinePostDominatorTree>();
+    AU.addPreserved<MachinePostDominatorTree>();
     AU.setPreservesCFG();
+
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 };
@@ -90,7 +95,7 @@ public:
 
 INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE,
                       "SI Fix SGPR Live Ranges", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
 INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE,
                     "SI Fix SGPR Live Ranges", false, false)
@@ -108,40 +113,48 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
-  LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
- MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
-  std::vector<std::pair<unsigned, LiveRange *>> SGPRLiveRanges;
+  bool MadeChange = false;
 
-  // First pass, collect all live intervals for SGPRs
-  for (const MachineBasicBlock &MBB : MF) {
-    for (const MachineInstr &MI : MBB) {
+  MachinePostDominatorTree *PDT = &getAnalysis<MachinePostDominatorTree>();
+  SmallVector<unsigned, 16> SGPRLiveRanges;
+
+  LiveVariables *LV = &getAnalysis<LiveVariables>();
+  MachineBasicBlock *Entry = &MF.front();
+
+  // Use a depth first order so that in SSA, we encounter all defs before
+  // uses. Once the defs of the block have been found, attempt to insert
+  // SGPR_USE instructions in successor blocks if required.
+  for (MachineBasicBlock *MBB : depth_first(Entry)) {
+    for (const MachineInstr &MI : *MBB) {
       for (const MachineOperand &MO : MI.defs()) {
-        if (MO.isImplicit())
-          continue;
+        // We should never see a live out def of a physical register, so we also
+        // do not need to worry about implicit_defs().
         unsigned Def = MO.getReg();
         if (TargetRegisterInfo::isVirtualRegister(Def)) {
-          if (TRI->isSGPRClass(MRI.getRegClass(Def)))
-            SGPRLiveRanges.push_back(
-                std::make_pair(Def, &LIS->getInterval(Def)));
-        } else if (TRI->isSGPRClass(TRI->getPhysRegClass(Def))) {
-            SGPRLiveRanges.push_back(
-                std::make_pair(Def, &LIS->getRegUnit(Def)));
+          if (TRI->isSGPRClass(MRI.getRegClass(Def))) {
+            // Only consider defs that are live outs. We don't care about def /
+            // use within the same block.
+
+            // LiveVariables does not consider registers that are only used in a
+            // phi in a sucessor block as live out, unlike LiveIntervals.
+            //
+            // This is OK because SIFixSGPRCopies replaced any SGPR phis with
+            // VGPRs.
+            if (LV->isLiveOut(Def, *MBB))
+              SGPRLiveRanges.push_back(Def);
+          }
         }
       }
     }
-  }
 
-  // Second pass fix the intervals
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-                                                  BI != BE; ++BI) {
-    MachineBasicBlock &MBB = *BI;
-    if (MBB.succ_size() < 2)
+    if (MBB->succ_size() < 2)
       continue;
 
-    // We have structured control flow, so number of succesors should be two.
-    assert(MBB.succ_size() == 2);
-    MachineBasicBlock *SuccA = *MBB.succ_begin();
-    MachineBasicBlock *SuccB = *(++MBB.succ_begin());
+    // We have structured control flow, so the number of successors should be
+    // two.
+    assert(MBB->succ_size() == 2);
+    MachineBasicBlock *SuccA = *MBB->succ_begin();
+    MachineBasicBlock *SuccB = *(++MBB->succ_begin());
     MachineBasicBlock *NCD = PDT->findNearestCommonDominator(SuccA, SuccB);
 
     if (!NCD)
@@ -156,37 +169,51 @@ bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) {
       NCD = PDT->findNearestCommonDominator(*NCD->succ_begin(),
                                             *(++NCD->succ_begin()));
     }
-    assert(SuccA && SuccB);
-    for (std::pair<unsigned, LiveRange*> RegLR : SGPRLiveRanges) {
-      unsigned Reg = RegLR.first;
-      LiveRange *LR = RegLR.second;
 
-      // FIXME: We could be smarter here.  If the register is Live-In to
-      // one block, but the other doesn't have any SGPR defs, then there
-      // won't be a conflict.  Also, if the branch decision is based on
-      // a value in an SGPR, then there will be no conflict.
-      bool LiveInToA = LIS->isLiveInToMBB(*LR, SuccA);
-      bool LiveInToB = LIS->isLiveInToMBB(*LR, SuccB);
+    for (unsigned Reg : SGPRLiveRanges) {
+      // FIXME: We could be smarter here. If the register is Live-In to one
+      // block, but the other doesn't have any SGPR defs, then there won't be a
+      // conflict. Also, if the branch condition is uniform then there will be
+      // no conflict.
+      bool LiveInToA = LV->isLiveIn(Reg, *SuccA);
+      bool LiveInToB = LV->isLiveIn(Reg, *SuccB);
 
-      if ((!LiveInToA && !LiveInToB) ||
-          (LiveInToA && LiveInToB))
+      if (!LiveInToA && !LiveInToB) {
+        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+              << " is live into neither successor\n");
         continue;
+      }
+
+      if (LiveInToA && LiveInToB) {
+        DEBUG(dbgs() << PrintReg(Reg, TRI, 0)
+              << " is live into both successors\n");
+        continue;
+      }
 
       // This interval is live in to one successor, but not the other, so
       // we need to update its range so it is live in to both.
-      DEBUG(dbgs() << "Possible SGPR conflict detected " <<  " in " << *LR <<
-                      " BB#" << SuccA->getNumber() << ", BB#" <<
-                      SuccB->getNumber() <<
-                      " with NCD = " << NCD->getNumber() << '\n');
+      DEBUG(dbgs() << "Possible SGPR conflict detected for "
+            << PrintReg(Reg, TRI, 0)
+            << " BB#" << SuccA->getNumber()
+            << ", BB#" << SuccB->getNumber()
+            << " with NCD = BB#" << NCD->getNumber() << '\n');
+
+      assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+             "Not expecting to extend live range of physreg");
 
       // FIXME: Need to figure out how to update LiveRange here so this pass
       // will be able to preserve LiveInterval analysis.
-      BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
-              TII->get(AMDGPU::SGPR_USE))
-              .addReg(Reg, RegState::Implicit);
-      DEBUG(NCD->getFirstNonPHI()->dump());
+      MachineInstr *NCDSGPRUse =
+        BuildMI(*NCD, NCD->getFirstNonPHI(), DebugLoc(),
+                TII->get(AMDGPU::SGPR_USE))
+        .addReg(Reg, RegState::Implicit);
+
+      MadeChange = true;
+      LV->HandleVirtRegUse(Reg, NCD, NCDSGPRUse);
+
+      DEBUG(NCDSGPRUse->dump());
     }
   }
 
-  return false;
+  return MadeChange;
 }
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index c2887255cc11..02a39307e74e 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -45,6 +45,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -164,8 +165,8 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
 
     // Operand is not legal, so try to commute the instruction to
     // see if this makes it possible to fold.
-    unsigned CommuteIdx0;
-    unsigned CommuteIdx1;
+    unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
+    unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
     bool CanCommute = TII->findCommutedOpIndices(MI, CommuteIdx0, CommuteIdx1);
 
     if (CanCommute) {
@@ -175,7 +176,16 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
         OpNo = CommuteIdx0;
     }
 
-    if (!CanCommute || !TII->commuteInstruction(MI))
+    // One of operands might be an Imm operand, and OpNo may refer to it after
+    // the call of commuteInstruction() below. Such situations are avoided
+    // here explicitly as OpNo must be a register operand to be a candidate
+    // for memory folding.
+    if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
+                       !MI->getOperand(CommuteIdx1).isReg()))
+      return false;
+
+    if (!CanCommute ||
+        !TII->commuteInstruction(MI, false, CommuteIdx0, CommuteIdx1))
       return false;
 
     if (!TII->isOperandLegal(MI, OpNo, OpToFold))
@@ -186,6 +196,110 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
   return true;
 }
 
+static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
+                        unsigned UseOpIdx,
+                        std::vector<FoldCandidate> &FoldList,
+                        SmallVectorImpl<MachineInstr *> &CopiesToReplace,
+                        const SIInstrInfo *TII, const SIRegisterInfo &TRI,
+                        MachineRegisterInfo &MRI) {
+  const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
+
+  // FIXME: Fold operands with subregs.
+  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
+      UseOp.isImplicit())) {
+    return;
+  }
+
+  bool FoldingImm = OpToFold.isImm();
+  APInt Imm;
+
+  if (FoldingImm) {
+    unsigned UseReg = UseOp.getReg();
+    const TargetRegisterClass *UseRC
+      = TargetRegisterInfo::isVirtualRegister(UseReg) ?
+      MRI.getRegClass(UseReg) :
+      TRI.getPhysRegClass(UseReg);
+
+    Imm = APInt(64, OpToFold.getImm());
+
+    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
+    const TargetRegisterClass *FoldRC =
+        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+    // Split 64-bit constants into 32-bits for folding.
+    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
+      if (UseRC->getSize() != 8)
+        return;
+
+      if (UseOp.getSubReg() == AMDGPU::sub0) {
+        Imm = Imm.getLoBits(32);
+      } else {
+        assert(UseOp.getSubReg() == AMDGPU::sub1);
+        Imm = Imm.getHiBits(32);
+      }
+    }
+
+    // In order to fold immediates into copies, we need to change the
+    // copy to a MOV.
+    if (UseMI->getOpcode() == AMDGPU::COPY) {
+      unsigned DestReg = UseMI->getOperand(0).getReg();
+      const TargetRegisterClass *DestRC
+        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+        MRI.getRegClass(DestReg) :
+        TRI.getPhysRegClass(DestReg);
+
+      unsigned MovOp = TII->getMovOpcode(DestRC);
+      if (MovOp == AMDGPU::COPY)
+        return;
+
+      UseMI->setDesc(TII->get(MovOp));
+      CopiesToReplace.push_back(UseMI);
+    }
+  }
+
+  // Special case for REG_SEQUENCE: We can't fold literals into
+  // REG_SEQUENCE instructions, so we have to fold them into the
+  // uses of REG_SEQUENCE.
+  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+    for (MachineRegisterInfo::use_iterator
+         RSUse = MRI.use_begin(RegSeqDstReg),
+         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+
+      MachineInstr *RSUseMI = RSUse->getParent();
+      if (RSUse->getSubReg() != RegSeqDstSubReg)
+        continue;
+
+      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+                  CopiesToReplace, TII, TRI, MRI);
+    }
+    return;
+  }
+
+  const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+  // Don't fold into target independent nodes.  Target independent opcodes
+  // don't have defined register classes.
+  if (UseDesc.isVariadic() ||
+      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+    return;
+
+  if (FoldingImm) {
+    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+    return;
+  }
+
+  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+  // FIXME: We could try to change the instruction from 64-bit to 32-bit
+  // to enable more folding opportunites.  The shrink operands pass
+  // already does this.
+  return;
+}
+
 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIInstrInfo *TII =
@@ -226,88 +340,36 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
            OpToFold.getSubReg()))
         continue;
 
+
+      // We need mutate the operands of new mov instructions to add implicit
+      // uses of EXEC, but adding them invalidates the use_iterator, so defer
+      // this.
+      SmallVector<MachineInstr *, 4> CopiesToReplace;
+
       std::vector<FoldCandidate> FoldList;
       for (MachineRegisterInfo::use_iterator
            Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
            Use != E; ++Use) {
 
         MachineInstr *UseMI = Use->getParent();
-        const MachineOperand &UseOp = UseMI->getOperand(Use.getOperandNo());
 
-        // FIXME: Fold operands with subregs.
-        if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
-            UseOp.isImplicit())) {
-          continue;
-        }
-
-        APInt Imm;
-
-        if (FoldingImm) {
-          unsigned UseReg = UseOp.getReg();
-          const TargetRegisterClass *UseRC
-            = TargetRegisterInfo::isVirtualRegister(UseReg) ?
-            MRI.getRegClass(UseReg) :
-            TRI.getPhysRegClass(UseReg);
-
-          Imm = APInt(64, OpToFold.getImm());
-
-          // Split 64-bit constants into 32-bits for folding.
-          if (UseOp.getSubReg()) {
-            if (UseRC->getSize() != 8)
-              continue;
-
-            if (UseOp.getSubReg() == AMDGPU::sub0) {
-              Imm = Imm.getLoBits(32);
-            } else {
-              assert(UseOp.getSubReg() == AMDGPU::sub1);
-              Imm = Imm.getHiBits(32);
-            }
-          }
-
-          // In order to fold immediates into copies, we need to change the
-          // copy to a MOV.
-          if (UseMI->getOpcode() == AMDGPU::COPY) {
-            unsigned DestReg = UseMI->getOperand(0).getReg();
-            const TargetRegisterClass *DestRC
-              = TargetRegisterInfo::isVirtualRegister(DestReg) ?
-              MRI.getRegClass(DestReg) :
-              TRI.getPhysRegClass(DestReg);
-
-            unsigned MovOp = TII->getMovOpcode(DestRC);
-            if (MovOp == AMDGPU::COPY)
-              continue;
-
-            UseMI->setDesc(TII->get(MovOp));
-          }
-        }
-
-        const MCInstrDesc &UseDesc = UseMI->getDesc();
-
-        // Don't fold into target independent nodes.  Target independent opcodes
-        // don't have defined register classes.
-        if (UseDesc.isVariadic() ||
-            UseDesc.OpInfo[Use.getOperandNo()].RegClass == -1)
-          continue;
-
-        if (FoldingImm) {
-          MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
-          tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &ImmOp, TII);
-          continue;
-        }
-
-        tryAddToFoldList(FoldList, UseMI, Use.getOperandNo(), &OpToFold, TII);
-
-        // FIXME: We could try to change the instruction from 64-bit to 32-bit
-        // to enable more folding opportunites.  The shrink operands pass
-        // already does this.
+        foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
+                    CopiesToReplace, TII, TRI, MRI);
       }
 
+      // Make sure we add EXEC uses to any new v_mov instructions created.
+      for (MachineInstr *Copy : CopiesToReplace)
+        Copy->addImplicitDefUseOperands(MF);
+
       for (FoldCandidate &Fold : FoldList) {
         if (updateOperand(Fold, TRI)) {
           // Clear kill flags.
           if (!Fold.isImm()) {
             assert(Fold.OpToFold && Fold.OpToFold->isReg());
-            Fold.OpToFold->setIsKill(false);
+            // FIXME: Probably shouldn't bother trying to fold if not an
+            // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
+            // copies.
+            MRI.clearKillFlags(Fold.OpToFold->getReg());
           }
           DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
                 Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp
new file mode 100644
index 000000000000..6b3c81c3af74
--- /dev/null
+++ b/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -0,0 +1,243 @@
+//===----------------------- SIFrameLowering.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+
+#include "SIFrameLowering.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+
+using namespace llvm;
+
+
+static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
+                              const MachineFrameInfo *FrameInfo) {
+  if (!FuncInfo->hasSpilledSGPRs())
+    return false;
+
+  if (FuncInfo->hasSpilledVGPRs())
+    return false;
+
+  for (int I = FrameInfo->getObjectIndexBegin(),
+         E = FrameInfo->getObjectIndexEnd(); I != E; ++I) {
+    if (!FrameInfo->isSpillSlotObjectIndex(I))
+      return false;
+  }
+
+  return true;
+}
+
+static ArrayRef<MCPhysReg> getAllSGPR128() {
+  return makeArrayRef(AMDGPU::SReg_128RegClass.begin(),
+                      AMDGPU::SReg_128RegClass.getNumRegs());
+}
+
+static ArrayRef<MCPhysReg> getAllSGPRs() {
+  return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
+                      AMDGPU::SGPR_32RegClass.getNumRegs());
+}
+
+void SIFrameLowering::emitPrologue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  if (!MF.getFrameInfo()->hasStackObjects())
+    return;
+
+  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
+
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  // If we only have SGPR spills, we won't actually be using scratch memory
+  // since these spill to VGPRs.
+  //
+  // FIXME: We should be cleaning up these unused SGPR spill frame indices
+  // somewhere.
+  if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
+    return;
+
+  const SIInstrInfo *TII =
+      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+  // We need to insert initialization of the scratch resource descriptor.
+  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+  assert(ScratchRsrcReg != AMDGPU::NoRegister);
+
+  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+  assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
+
+  unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
+    MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+
+  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
+  if (ST.isAmdHsaOS()) {
+    PreloadedPrivateBufferReg = TRI->getPreloadedValue(
+      MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+  }
+
+  // If we reserved the original input registers, we don't need to copy to the
+  // reserved registers.
+  if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
+    // We should always reserve these 5 registers at the same time.
+    assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
+           "scratch wave offset and private segment buffer inconsistent");
+    return;
+  }
+
+
+  // We added live-ins during argument lowering, but since they were not used
+  // they were deleted. We're adding the uses now, so add them back.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
+
+  if (ST.isAmdHsaOS()) {
+    MRI.addLiveIn(PreloadedPrivateBufferReg);
+    MBB.addLiveIn(PreloadedPrivateBufferReg);
+  }
+
+  // We reserved the last registers for this. Shift it down to the end of those
+  // which were actually used.
+  //
+  // FIXME: It might be safer to use a pseudoregister before replacement.
+
+  // FIXME: We should be able to eliminate unused input registers. We only
+  // cannot do this for the resources required for scratch access. For now we
+  // skip over user SGPRs and may leave unused holes.
+
+  // We find the resource first because it has an alignment requirement.
+  if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+
+    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
+    // Skip the last 2 elements because the last one is reserved for VCC, and
+    // this is the 2nd to last element already.
+    for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
+      // Pick the first unallocated one. Make sure we don't clobber the other
+      // reserved input we needed.
+      if (!MRI.isPhysRegUsed(Reg)) {
+        assert(MRI.isAllocatable(Reg));
+        MRI.replaceRegWith(ScratchRsrcReg, Reg);
+        ScratchRsrcReg = Reg;
+        MFI->setScratchRSrcReg(ScratchRsrcReg);
+        break;
+      }
+    }
+  }
+
+  if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    // Skip the last 2 elements because the last one is reserved for VCC, and
+    // this is the 2nd to last element already.
+    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+    for (MCPhysReg Reg : getAllSGPRs().drop_back(6).slice(NumPreloaded)) {
+      // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+      // scratch descriptor, since we haven’t added its uses yet.
+      if (!MRI.isPhysRegUsed(Reg)) {
+        assert(MRI.isAllocatable(Reg) &&
+               !TRI->isSubRegisterEq(ScratchRsrcReg, Reg));
+
+        MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+        ScratchWaveOffsetReg = Reg;
+        MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+        break;
+      }
+    }
+  }
+
+
+  assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
+
+  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
+  MachineBasicBlock::iterator I = MBB.begin();
+  DebugLoc DL;
+
+  if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+    // Make sure we emit the copy for the offset first. We may have chosen to copy
+    // the buffer resource into a register that aliases the input offset register.
+    BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
+      .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
+  }
+
+  if (ST.isAmdHsaOS()) {
+    // Insert copies from argument register.
+    assert(
+      !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
+      !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
+
+    unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
+    unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3);
+
+    unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1);
+    unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3);
+
+    const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64);
+
+    BuildMI(MBB, I, DL, SMovB64, Rsrc01)
+      .addReg(Lo, RegState::Kill);
+    BuildMI(MBB, I, DL, SMovB64, Rsrc23)
+      .addReg(Hi, RegState::Kill);
+  } else {
+    unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+    unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+    unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+    unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+    // Use relocations to get the pointer, and setup the other bits manually.
+    uint64_t Rsrc23 = TII->getScratchRsrcWords23();
+    BuildMI(MBB, I, DL, SMovB32, Rsrc0)
+      .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+    BuildMI(MBB, I, DL, SMovB32, Rsrc1)
+      .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+    BuildMI(MBB, I, DL, SMovB32, Rsrc2)
+      .addImm(Rsrc23 & 0xffffffff)
+      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+    BuildMI(MBB, I, DL, SMovB32, Rsrc3)
+      .addImm(Rsrc23 >> 32)
+      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+  }
+
+  // Make the register selected live throughout the function.
+  for (MachineBasicBlock &OtherBB : MF) {
+    if (&OtherBB == &MBB)
+      continue;
+
+    OtherBB.addLiveIn(ScratchRsrcReg);
+    OtherBB.addLiveIn(ScratchWaveOffsetReg);
+  }
+}
+
+void SIFrameLowering::processFunctionBeforeFrameFinalized(
+  MachineFunction &MF,
+  RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  if (!MFI->hasStackObjects())
+    return;
+
+  bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
+
+  assert((RS || !MayNeedScavengingEmergencySlot) &&
+         "RegScavenger required if spilling");
+
+  if (MayNeedScavengingEmergencySlot) {
+    int ScavengeFI = MFI->CreateSpillStackObject(
+      AMDGPU::SGPR_32RegClass.getSize(),
+      AMDGPU::SGPR_32RegClass.getAlignment());
+    RS->addScavengingFrameIndex(ScavengeFI);
+  }
+}
diff --git a/lib/Target/AMDGPU/SIFrameLowering.h b/lib/Target/AMDGPU/SIFrameLowering.h
new file mode 100644
index 000000000000..a9152fd8b2aa
--- /dev/null
+++ b/lib/Target/AMDGPU/SIFrameLowering.h
@@ -0,0 +1,34 @@
+//===--------------------- SIFrameLowering.h --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+#define LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
+
+#include "AMDGPUFrameLowering.h"
+
+namespace llvm {
+
+class SIFrameLowering final : public AMDGPUFrameLowering {
+public:
+  SIFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                  unsigned TransAl = 1) :
+    AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
+  ~SIFrameLowering() override {}
+
+  void emitPrologue(MachineFunction &MF,
+                    MachineBasicBlock &MBB) const override;
+
+  void processFunctionBeforeFrameFinalized(
+    MachineFunction &MF,
+    RegScavenger *RS = nullptr) const override;
+};
+
+}
+
+#endif
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index c2db9ff537e9..0e043cb47da7 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -20,6 +20,7 @@
 
 #include "SIISelLowering.h"
 #include "AMDGPU.h"
+#include "AMDGPUDiagnosticInfoUnsupported.h"
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
@@ -51,6 +52,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
   addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
 
+  addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
+  addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
+
   addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
   addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
 
@@ -103,6 +107,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
 
   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
+  setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
 
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
@@ -155,13 +160,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   for (MVT VT : MVT::fp_valuetypes())
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
 
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+  setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
+
   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
   setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
   setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
   setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
 
+
+  setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
+
+  setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+  setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
+
   setOperationAction(ISD::LOAD, MVT::i1, Custom);
 
+  setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+  AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
+
+  setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+  AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
+
+  setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
+
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
   setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
@@ -173,9 +195,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
   setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
   setOperationAction(ISD::SELECT, MVT::i1, Promote);
 
+  setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
+
+
+  setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
+
   // We only support LOAD/STORE and vector manipulation ops for vectors
   // with > 4 elements.
-  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
+  for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch(Op) {
       case ISD::LOAD:
@@ -186,6 +213,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
       case ISD::INSERT_VECTOR_ELT:
       case ISD::INSERT_SUBVECTOR:
       case ISD::EXTRACT_SUBVECTOR:
+      case ISD::SCALAR_TO_VECTOR:
         break;
       case ISD::CONCAT_VECTORS:
         setOperationAction(Op, VT, Custom);
@@ -197,6 +225,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
     }
   }
 
+  // Most operations are naturally 32-bit vector operations. We only support
+  // load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
+  for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
+    setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
+    AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
+    AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+    setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
+    AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);
+
+    setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
+    AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
+  }
+
   if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
     setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -261,6 +305,41 @@ bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
   return AM.BaseOffs == 0 && (AM.Scale == 0 || AM.Scale == 1);
 }
 
+bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const {
+  // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
+  // additionally can do r + r + i with addr64. 32-bit has more addressing
+  // mode options. Depending on the resource constant, it can also do
+  // (i64 r0) + (i32 r1) * (i14 i).
+  //
+  // Private arrays end up using a scratch buffer most of the time, so also
+  // assume those use MUBUF instructions. Scratch loads / stores are currently
+  // implemented as mubuf instructions with offen bit set, so slightly
+  // different than the normal addr64.
+  if (!isUInt<12>(AM.BaseOffs))
+    return false;
+
+  // FIXME: Since we can split immediate into soffset and immediate offset,
+  // would it make sense to allow any immediate?
+
+  switch (AM.Scale) {
+  case 0: // r + i or just i, depending on HasBaseReg.
+    return true;
+  case 1:
+    return true; // We have r + r or r + i.
+  case 2:
+    if (AM.HasBaseReg) {
+      // Reject 2 * r + r.
+      return false;
+    }
+
+    // Allow 2 * r as r + r
+    // Or  2 * r + i is allowed as r + r + i.
+    return true;
+  default: // Don't allow n * r
+    return false;
+  }
+}
+
 bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                              const AddrMode &AM, Type *Ty,
                                              unsigned AS) const {
@@ -269,7 +348,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
     return false;
 
   switch (AS) {
-  case AMDGPUAS::GLOBAL_ADDRESS:
+  case AMDGPUAS::GLOBAL_ADDRESS: {
     if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
       // Assume the we will use FLAT for all global memory accesses
       // on VI.
@@ -282,51 +361,51 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
       // because it has never been validated.
       return isLegalFlatAddressingMode(AM);
     }
-    // fall-through
-  case AMDGPUAS::PRIVATE_ADDRESS:
-  case AMDGPUAS::CONSTANT_ADDRESS: // XXX - Should we assume SMRD instructions?
-  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: {
-    // MUBUF / MTBUF instructions have a 12-bit unsigned byte offset, and
-    // additionally can do r + r + i with addr64. 32-bit has more addressing
-    // mode options. Depending on the resource constant, it can also do
-    // (i64 r0) + (i32 r1) * (i14 i).
-    //
-    // SMRD instructions have an 8-bit, dword offset.
-    //
-    // Assume nonunifom access, since the address space isn't enough to know
-    // what instruction we will use, and since we don't know if this is a load
-    // or store and scalar stores are only available on VI.
-    //
-    // We also know if we are doing an extload, we can't do a scalar load.
-    //
-    // Private arrays end up using a scratch buffer most of the time, so also
-    // assume those use MUBUF instructions. Scratch loads / stores are currently
-    // implemented as mubuf instructions with offen bit set, so slightly
-    // different than the normal addr64.
-    if (!isUInt<12>(AM.BaseOffs))
-      return false;
 
-    // FIXME: Since we can split immediate into soffset and immediate offset,
-    // would it make sense to allow any immediate?
-
-    switch (AM.Scale) {
-    case 0: // r + i or just i, depending on HasBaseReg.
-      return true;
-    case 1:
-      return true; // We have r + r or r + i.
-    case 2:
-      if (AM.HasBaseReg) {
-        // Reject 2 * r + r.
-        return false;
-      }
-
-      // Allow 2 * r as r + r
-      // Or  2 * r + i is allowed as r + r + i.
-      return true;
-    default: // Don't allow n * r
-      return false;
-    }
+    return isLegalMUBUFAddressingMode(AM);
   }
+  case AMDGPUAS::CONSTANT_ADDRESS: {
+    // If the offset isn't a multiple of 4, it probably isn't going to be
+    // correctly aligned.
+    if (AM.BaseOffs % 4 != 0)
+      return isLegalMUBUFAddressingMode(AM);
+
+    // There are no SMRD extloads, so if we have to do a small type access we
+    // will use a MUBUF load.
+    // FIXME?: We also need to do this if unaligned, but we don't know the
+    // alignment here.
+    if (DL.getTypeStoreSize(Ty) < 4)
+      return isLegalMUBUFAddressingMode(AM);
+
+    if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+      // SMRD instructions have an 8-bit, dword offset on SI.
+      if (!isUInt<8>(AM.BaseOffs / 4))
+        return false;
+    } else if (Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS) {
+      // On CI+, this can also be a 32-bit literal constant offset. If it fits
+      // in 8-bits, it can use a smaller encoding.
+      if (!isUInt<32>(AM.BaseOffs / 4))
+        return false;
+    } else if (Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+      // On VI, these use the SMEM format and the offset is 20-bit in bytes.
+      if (!isUInt<20>(AM.BaseOffs))
+        return false;
+    } else
+      llvm_unreachable("unhandled generation");
+
+    if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
+      return true;
+
+    if (AM.Scale == 1 && AM.HasBaseReg)
+      return true;
+
+    return false;
+  }
+
+  case AMDGPUAS::PRIVATE_ADDRESS:
+  case AMDGPUAS::UNKNOWN_ADDRESS_SPACE:
+    return isLegalMUBUFAddressingMode(AM);
+
   case AMDGPUAS::LOCAL_ADDRESS:
   case AMDGPUAS::REGION_ADDRESS: {
     // Basic, single offset DS instructions allow a 16-bit unsigned immediate
@@ -374,7 +453,10 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
     // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte
     // aligned, 8 byte access in a single operation using ds_read2/write2_b32
     // with adjacent offsets.
-    return Align % 4 == 0;
+    bool AlignedBy4 = (Align % 4 == 0);
+    if (IsFast)
+      *IsFast = AlignedBy4;
+    return AlignedBy4;
   }
 
   // Smaller than dword value must be aligned.
@@ -411,6 +493,32 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
   return MVT::Other;
 }
 
+static bool isFlatGlobalAddrSpace(unsigned AS) {
+  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+    AS == AMDGPUAS::FLAT_ADDRESS ||
+    AS == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
+                                           unsigned DestAS) const {
+  return isFlatGlobalAddrSpace(SrcAS) &&  isFlatGlobalAddrSpace(DestAS);
+}
+
+
+bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
+  const MemSDNode *MemNode = cast<MemSDNode>(N);
+  const Value *Ptr = MemNode->getMemOperand()->getValue();
+
+  // UndefValue means this is a load of a kernel input.  These are uniform.
+  // Sometimes LDS instructions have constant pointers
+  if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
+      isa<GlobalValue>(Ptr))
+    return true;
+
+  const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+  return I && I->getMetadata("amdgpu.uniform");
+}
+
 TargetLoweringBase::LegalizeTypeAction
 SITargetLowering::getPreferredVectorAction(EVT VT) const {
   if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
@@ -426,12 +534,6 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return TII->isInlineConstant(Imm);
 }
 
-static EVT toIntegerVT(EVT VT) {
-  if (VT.isVector())
-    return VT.changeVectorElementTypeToInteger();
-  return MVT::getIntegerVT(VT.getSizeInBits());
-}
-
 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          SDLoc SL, SDValue Chain,
                                          unsigned Offset, bool Signed) const {
@@ -439,7 +541,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
   MachineFunction &MF = DAG.getMachineFunction();
   const SIRegisterInfo *TRI =
       static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
-  unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
+  unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
 
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
 
@@ -455,30 +557,10 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
 
   unsigned Align = DL.getABITypeAlignment(Ty);
 
-  if (VT != MemVT && VT.isFloatingPoint()) {
-    // Do an integer load and convert.
-    // FIXME: This is mostly because load legalization after type legalization
-    // doesn't handle FP extloads.
-    assert(VT.getScalarType() == MVT::f32 &&
-           MemVT.getScalarType() == MVT::f16);
-
-    EVT IVT = toIntegerVT(VT);
-    EVT MemIVT = toIntegerVT(MemVT);
-    SDValue Load = DAG.getLoad(ISD::UNINDEXED, ISD::ZEXTLOAD,
-                               IVT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemIVT,
-                               false, // isVolatile
-                               true, // isNonTemporal
-                               true, // isInvariant
-                               Align); // Alignment
-    SDValue Ops[] = {
-      DAG.getNode(ISD::FP16_TO_FP, SL, VT, Load),
-      Load.getValue(1)
-    };
-
-    return DAG.getMergeValues(Ops, SL);
-  }
-
   ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+  if (MemVT.isFloatingPoint())
+    ExtTy = ISD::EXTLOAD;
+
   return DAG.getLoad(ISD::UNINDEXED, ExtTy,
                      VT, SL, Chain, Ptr, PtrOffset, PtrInfo, MemVT,
                      false, // isVolatile
@@ -497,8 +579,16 @@ SDValue SITargetLowering::LowerFormalArguments(
   MachineFunction &MF = DAG.getMachineFunction();
   FunctionType *FType = MF.getFunction()->getFunctionType();
   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
 
-  assert(CallConv == CallingConv::C);
+  if (Subtarget->isAmdHsaOS() && Info->getShaderType() != ShaderType::COMPUTE) {
+    const Function *Fn = MF.getFunction();
+    DiagnosticInfoUnsupported NoGraphicsHSA(*Fn, "non-compute shaders with HSA");
+    DAG.getContext()->diagnose(NoGraphicsHSA);
+    return SDValue();
+  }
+
+  // FIXME: We currently assume all calling conventions are kernels.
 
   SmallVector<ISD::InputArg, 16> Splits;
   BitVector Skipped(Ins.size());
@@ -513,7 +603,7 @@ SDValue SITargetLowering::LowerFormalArguments(
       assert((PSInputNum <= 15) && "Too many PS inputs!");
 
       if (!Arg.Used) {
-        // We can savely skip PS inputs
+        // We can safely skip PS inputs
         Skipped.set(i);
         ++PSInputNum;
         continue;
@@ -530,7 +620,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
       // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
       // three or five element vertex only needs three or five registers,
-      // NOT four or eigth.
+      // NOT four or eight.
       Type *ParamType = FType->getParamType(Arg.getOrigArgIndex());
       unsigned NumElements = ParamType->getVectorNumElements();
 
@@ -556,41 +646,30 @@ SDValue SITargetLowering::LowerFormalArguments(
     CCInfo.AllocateReg(AMDGPU::VGPR1);
   }
 
-  // The pointer to the list of arguments is stored in SGPR0, SGPR1
-	// The pointer to the scratch buffer is stored in SGPR2, SGPR3
-  if (Info->getShaderType() == ShaderType::COMPUTE) {
-    if (Subtarget->isAmdHsaOS())
-      Info->NumUserSGPRs = 2;  // FIXME: Need to support scratch buffers.
-    else
-      Info->NumUserSGPRs = 4;
-
-    unsigned InputPtrReg =
-        TRI->getPreloadedValue(MF, SIRegisterInfo::INPUT_PTR);
-    unsigned InputPtrRegLo =
-        TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 0);
-    unsigned InputPtrRegHi =
-        TRI->getPhysRegSubReg(InputPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
-    unsigned ScratchPtrReg =
-        TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
-    unsigned ScratchPtrRegLo =
-        TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 0);
-    unsigned ScratchPtrRegHi =
-        TRI->getPhysRegSubReg(ScratchPtrReg, &AMDGPU::SReg_32RegClass, 1);
-
-    CCInfo.AllocateReg(InputPtrRegLo);
-    CCInfo.AllocateReg(InputPtrRegHi);
-    CCInfo.AllocateReg(ScratchPtrRegLo);
-    CCInfo.AllocateReg(ScratchPtrRegHi);
-    MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
-    MF.addLiveIn(ScratchPtrReg, &AMDGPU::SReg_64RegClass);
-  }
-
   if (Info->getShaderType() == ShaderType::COMPUTE) {
     getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
                             Splits);
   }
 
+  // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
+  if (Info->hasPrivateSegmentBuffer()) {
+    unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
+    MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
+    CCInfo.AllocateReg(PrivateSegmentBufferReg);
+  }
+
+  if (Info->hasDispatchPtr()) {
+    unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
+    MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+    CCInfo.AllocateReg(DispatchPtrReg);
+  }
+
+  if (Info->hasKernargSegmentPtr()) {
+    unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
+    MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+    CCInfo.AllocateReg(InputPtrReg);
+  }
+
   AnalyzeFormalArguments(CCInfo, Splits);
 
   SmallVector<SDValue, 16> Chains;
@@ -617,7 +696,7 @@ SDValue SITargetLowering::LowerFormalArguments(
                                    Offset, Ins[i].Flags.isSExt());
       Chains.push_back(Arg.getValue(1));
 
-      const PointerType *ParamTy =
+      auto *ParamTy =
         dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
       if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
           ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
@@ -678,10 +757,113 @@ SDValue SITargetLowering::LowerFormalArguments(
     InVals.push_back(Val);
   }
 
-  if (Info->getShaderType() != ShaderType::COMPUTE) {
-    unsigned ScratchIdx = CCInfo.getFirstUnallocated(ArrayRef<MCPhysReg>(
-        AMDGPU::SGPR_32RegClass.begin(), AMDGPU::SGPR_32RegClass.getNumRegs()));
-    Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
+  // TODO: Add GridWorkGroupCount user SGPRs when used. For now with HSA we read
+  // these from the dispatch pointer.
+
+  // Start adding system SGPRs.
+  if (Info->hasWorkGroupIDX()) {
+    unsigned Reg = Info->addWorkGroupIDX();
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  } else
+    llvm_unreachable("work group id x is always enabled");
+
+  if (Info->hasWorkGroupIDY()) {
+    unsigned Reg = Info->addWorkGroupIDY();
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info->hasWorkGroupIDZ()) {
+    unsigned Reg = Info->addWorkGroupIDZ();
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info->hasWorkGroupInfo()) {
+    unsigned Reg = Info->addWorkGroupInfo();
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info->hasPrivateSegmentWaveByteOffset()) {
+    // Scratch wave offset passed in system SGPR.
+    unsigned PrivateSegmentWaveByteOffsetReg
+      = Info->addPrivateSegmentWaveByteOffset();
+
+    MF.addLiveIn(PrivateSegmentWaveByteOffsetReg, &AMDGPU::SGPR_32RegClass);
+    CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
+  }
+
+  // Now that we've figured out where the scratch register inputs are, see if
+  // should reserve the arguments and use them directly.
+
+  bool HasStackObjects = MF.getFrameInfo()->hasStackObjects();
+
+  if (ST.isAmdHsaOS()) {
+    // TODO: Assume we will spill without optimizations.
+    if (HasStackObjects) {
+      // If we have stack objects, we unquestionably need the private buffer
+      // resource. For the HSA ABI, this will be the first 4 user SGPR
+      // inputs. We can reserve those and use them directly.
+
+      unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
+        MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+      Info->setScratchRSrcReg(PrivateSegmentBufferReg);
+
+      unsigned PrivateSegmentWaveByteOffsetReg = TRI->getPreloadedValue(
+        MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+      Info->setScratchWaveOffsetReg(PrivateSegmentWaveByteOffsetReg);
+    } else {
+      unsigned ReservedBufferReg
+        = TRI->reservedPrivateSegmentBufferReg(MF);
+      unsigned ReservedOffsetReg
+        = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+
+      // We tentatively reserve the last registers (skipping the last two
+      // which may contain VCC). After register allocation, we'll replace
+      // these with the ones immediately after those which were really
+      // allocated. In the prologue copies will be inserted from the argument
+      // to these reserved registers.
+      Info->setScratchRSrcReg(ReservedBufferReg);
+      Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+    }
+  } else {
+    unsigned ReservedBufferReg = TRI->reservedPrivateSegmentBufferReg(MF);
+
+    // Without HSA, relocations are used for the scratch pointer and the
+    // buffer resource setup is always inserted in the prologue. Scratch wave
+    // offset is still in an input SGPR.
+    Info->setScratchRSrcReg(ReservedBufferReg);
+
+    if (HasStackObjects) {
+      unsigned ScratchWaveOffsetReg = TRI->getPreloadedValue(
+        MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
+      Info->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
+    } else {
+      unsigned ReservedOffsetReg
+        = TRI->reservedPrivateSegmentWaveByteOffsetReg(MF);
+      Info->setScratchWaveOffsetReg(ReservedOffsetReg);
+    }
+  }
+
+  if (Info->hasWorkItemIDX()) {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X);
+    MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  } else
+    llvm_unreachable("workitem id x should always be enabled");
+
+  if (Info->hasWorkItemIDY()) {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y);
+    MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+    CCInfo.AllocateReg(Reg);
+  }
+
+  if (Info->hasWorkItemIDZ()) {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z);
+    MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass);
+    CCInfo.AllocateReg(Reg);
   }
 
   if (Chains.empty())
@@ -693,27 +875,11 @@ SDValue SITargetLowering::LowerFormalArguments(
 MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
     MachineInstr * MI, MachineBasicBlock * BB) const {
 
-  MachineBasicBlock::iterator I = *MI;
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
   switch (MI->getOpcode()) {
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   case AMDGPU::BRANCH:
     return BB;
-  case AMDGPU::SI_RegisterStorePseudo: {
-    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-    MachineInstrBuilder MIB =
-        BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
-                Reg);
-    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
-      MIB.addOperand(MI->getOperand(i));
-
-    MI->eraseFromParent();
-    break;
-  }
   }
   return BB;
 }
@@ -944,20 +1110,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
   const GlobalValue *GV = GSD->getGlobal();
   MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace());
 
-  SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT);
   SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
-
-  SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
-                              DAG.getConstant(0, DL, MVT::i32));
-  SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr,
-                              DAG.getConstant(1, DL, MVT::i32));
-
-  SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue),
-                           PtrLo, GA);
-  SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue),
-                           PtrHi, DAG.getConstant(0, DL, MVT::i32),
-                           SDValue(Lo.getNode(), 1));
-  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
+  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA);
 }
 
 SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
@@ -977,6 +1131,18 @@ SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
                                                       // a glue result.
 }
 
+SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
+                                                 SDValue Op,
+                                                 MVT VT,
+                                                 unsigned Offset) const {
+  SDLoc SL(Op);
+  SDValue Param = LowerParameter(DAG, MVT::i32, MVT::i32, SL,
+                                 DAG.getEntryNode(), Offset, false);
+  // The local size values will have the hi 16-bits as zero.
+  return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
+                     DAG.getValueType(VT));
+}
+
 SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                   SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
@@ -988,7 +1154,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   SDLoc DL(Op);
   unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
+  // TODO: Should this propagate fast-math-flags?
+
   switch (IntrinsicID) {
+  case Intrinsic::amdgcn_dispatch_ptr:
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
+      TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
+
   case Intrinsic::r600_read_ngroups_x:
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
                           SI::KernelInputOffsets::NGROUPS_X, false);
@@ -1008,37 +1180,36 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
                           SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
   case Intrinsic::r600_read_local_size_x:
-    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
-                          SI::KernelInputOffsets::LOCAL_SIZE_X, false);
+    return lowerImplicitZextParam(DAG, Op, MVT::i16,
+                                  SI::KernelInputOffsets::LOCAL_SIZE_X);
   case Intrinsic::r600_read_local_size_y:
-    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
-                          SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
+    return lowerImplicitZextParam(DAG, Op, MVT::i16,
+                                  SI::KernelInputOffsets::LOCAL_SIZE_Y);
   case Intrinsic::r600_read_local_size_z:
-    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
-                          SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
-
+    return lowerImplicitZextParam(DAG, Op, MVT::i16,
+                                  SI::KernelInputOffsets::LOCAL_SIZE_Z);
   case Intrinsic::AMDGPU_read_workdim:
-    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
-                          getImplicitParameterOffset(MFI, GRID_DIM), false);
-
+    // Really only 2 bits.
+    return lowerImplicitZextParam(DAG, Op, MVT::i8,
+                                  getImplicitParameterOffset(MFI, GRID_DIM));
   case Intrinsic::r600_read_tgid_x:
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
   case Intrinsic::r600_read_tgid_y:
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
   case Intrinsic::r600_read_tgid_z:
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
   case Intrinsic::r600_read_tidig_x:
     return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_X), VT);
   case Intrinsic::r600_read_tidig_y:
     return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Y), VT);
   case Intrinsic::r600_read_tidig_z:
     return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
-      TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
+      TRI->getPreloadedValue(MF, SIRegisterInfo::WORKITEM_ID_Z), VT);
   case AMDGPUIntrinsic::SI_load_const: {
     SDValue Ops[] = {
       Op.getOperand(1),
@@ -1077,6 +1248,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                        DAG.getConstant(2, DL, MVT::i32), // P0
                        Op.getOperand(1), Op.getOperand(2), Glue);
   }
+  case AMDGPUIntrinsic::SI_packf16:
+    if (Op.getOperand(1).isUndef() && Op.getOperand(2).isUndef())
+      return DAG.getUNDEF(MVT::i32);
+    return Op;
   case AMDGPUIntrinsic::SI_fs_interp: {
     SDValue IJ = Op.getOperand(4);
     SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
@@ -1092,6 +1267,19 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
                              Op.getOperand(1), Op.getOperand(2), Glue);
   }
+  case Intrinsic::amdgcn_interp_p1: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Glue);
+  }
+  case Intrinsic::amdgcn_interp_p2: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
+    SDValue Glue = SDValue(M0.getNode(), 1);
+    return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
+                       Glue);
+  }
   default:
     return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   }
@@ -1152,16 +1340,29 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
            "Custom lowering for non-i32 vectors hasn't been implemented.");
     unsigned NumElements = Op.getValueType().getVectorNumElements();
     assert(NumElements != 2 && "v2 loads are supported for all address spaces.");
+
     switch (Load->getAddressSpace()) {
       default: break;
+      case AMDGPUAS::CONSTANT_ADDRESS:
+      if (isMemOpUniform(Load))
+        break;
+        // Non-uniform loads will be selected to MUBUF instructions, so they
+        // have the same legalization requires ments as global and private
+        // loads.
+        //
+        // Fall-through
       case AMDGPUAS::GLOBAL_ADDRESS:
       case AMDGPUAS::PRIVATE_ADDRESS:
+        if (NumElements >= 8)
+          return SplitVectorLoad(Op, DAG);
+
         // v4 loads are supported for private and global memory.
         if (NumElements <= 4)
           break;
         // fall-through
       case AMDGPUAS::LOCAL_ADDRESS:
-        return ScalarizeVectorLoad(Op, DAG);
+        // If properly aligned, if we split we might be able to use ds_read_b64.
+        return SplitVectorLoad(Op, DAG);
     }
   }
 
@@ -1236,8 +1437,10 @@ SDValue SITargetLowering::LowerFastFDIV(SDValue Op, SelectionDAG &DAG) const {
   if (Unsafe) {
     // Turn into multiply by the reciprocal.
     // x / y -> x * (1.0 / y)
+    SDNodeFlags Flags;
+    Flags.setUnsafeAlgebra(true);
     SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
-    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip);
+    return DAG.getNode(ISD::FMUL, SL, VT, LHS, Recip, &Flags);
   }
 
   return SDValue();
@@ -1274,6 +1477,8 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
 
+  // TODO: Should this propagate fast-math-flags?
+
   r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
 
   SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
@@ -1379,7 +1584,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     return Ret;
 
   if (VT.isVector() && VT.getVectorNumElements() >= 8)
-      return ScalarizeVectorStore(Op, DAG);
+      return SplitVectorStore(Op, DAG);
 
   if (VT == MVT::i1)
     return DAG.getTruncStore(Store->getChain(), DL,
@@ -1393,6 +1598,7 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
   SDValue Arg = Op.getOperand(0);
+  // TODO: Should this propagate fast-math-flags?
   SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
                                   DAG.getNode(ISD::FMUL, DL, VT, Arg,
                                               DAG.getConstantFP(0.5/M_PI, DL,
@@ -2125,9 +2331,14 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
       static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
   MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
-  TII->legalizeOperands(MI);
 
-  if (TII->isMIMG(MI->getOpcode())) {
+  if (TII->isVOP3(MI->getOpcode())) {
+    // Make sure constant bus requirements are respected.
+    TII->legalizeOperandsVOP3(MRI, MI);
+    return;
+  }
+
+  if (TII->isMIMG(*MI)) {
     unsigned VReg = MI->getOperand(0).getReg();
     unsigned Writemask = MI->getOperand(1).getImm();
     unsigned BitsSet = 0;
@@ -2169,53 +2380,38 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
                                                 SDLoc DL,
                                                 SDValue Ptr) const {
   const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-#if 1
-    // XXX - Workaround for moveToVALU not handling different register class
-    // inserts for REG_SEQUENCE.
+    static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
 
-    // Build the half of the subregister with the constants.
-    const SDValue Ops0[] = {
-      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
-      buildSMovImm32(DAG, DL, 0),
-      DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
-      buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
-      DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
-    };
+  // Build the half of the subregister with the constants before building the
+  // full 128-bit register. If we are building multiple resource descriptors,
+  // this will allow CSEing of the 2-component register.
+  const SDValue Ops0[] = {
+    DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, DL, MVT::i32),
+    buildSMovImm32(DAG, DL, 0),
+    DAG.getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
+    buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
+    DAG.getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
+  };
 
-    SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
-                                                  MVT::v2i32, Ops0), 0);
+  SDValue SubRegHi = SDValue(DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                                MVT::v2i32, Ops0), 0);
 
-    // Combine the constants and the pointer.
-    const SDValue Ops1[] = {
-      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
-      Ptr,
-      DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
-      SubRegHi,
-      DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
-    };
+  // Combine the constants and the pointer.
+  const SDValue Ops1[] = {
+    DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32),
+    Ptr,
+    DAG.getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32),
+    SubRegHi,
+    DAG.getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32)
+  };
 
-    return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
-#else
-    const SDValue Ops[] = {
-      DAG.getTargetConstant(AMDGPU::SReg_128RegClassID, MVT::i32),
-      Ptr,
-      DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
-      buildSMovImm32(DAG, DL, 0),
-      DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
-      buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
-      DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
-    };
-
-    return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
-
-#endif
+  return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops1);
 }
 
 /// \brief Return a resource descriptor with the 'Add TID' bit enabled
-///        The TID (Thread ID) is multipled by the stride value (bits [61:48]
-///        of the resource descriptor) to create an offset, which is added to the
-///        resource ponter.
+///        The TID (Thread ID) is multiplied by the stride value (bits [61:48]
+///        of the resource descriptor) to create an offset, which is added to
+///        the resource pointer.
 MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
                                            SDLoc DL,
                                            SDValue Ptr,
@@ -2248,15 +2444,6 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
   return DAG.getMachineNode(AMDGPU::REG_SEQUENCE, DL, MVT::v4i32, Ops);
 }
 
-MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
-                                                  SDLoc DL,
-                                                  SDValue Ptr) const {
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
-
-  return buildRSRC(DAG, DL, Ptr, 0, TII->getScratchRsrcWords23());
-}
-
 SDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
                                                const TargetRegisterClass *RC,
                                                unsigned Reg, EVT VT) const {
@@ -2274,13 +2461,41 @@ std::pair<unsigned, const TargetRegisterClass *>
 SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                StringRef Constraint,
                                                MVT VT) const {
-  if (Constraint == "r") {
-    switch(VT.SimpleTy) {
-      default: llvm_unreachable("Unhandled type for 'r' inline asm constraint");
-      case MVT::i64:
-        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
-      case MVT::i32:
+
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 's':
+    case 'r':
+      switch (VT.getSizeInBits()) {
+      default:
+        return std::make_pair(0U, nullptr);
+      case 32:
         return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+      case 64:
+        return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+      case 128:
+        return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+      case 256:
+        return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+      }
+
+    case 'v':
+      switch (VT.getSizeInBits()) {
+      default:
+        return std::make_pair(0U, nullptr);
+      case 32:
+        return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+      case 64:
+        return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+      case 96:
+        return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+      case 128:
+        return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+      case 256:
+        return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+      case 512:
+        return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+      }
     }
   }
 
@@ -2301,3 +2516,16 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   }
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
+
+SITargetLowering::ConstraintType
+SITargetLowering::getConstraintType(StringRef Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 's':
+    case 'v':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index d84c32ec0092..e2f8cb19d6be 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -28,6 +28,9 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
                              SelectionDAG &DAG) const override;
 
+  SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op,
+                                 MVT VT, unsigned Offset) const;
+
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
@@ -57,6 +60,7 @@ class SITargetLowering : public AMDGPUTargetLowering {
   SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   bool isLegalFlatAddressingMode(const AddrMode &AM) const;
+  bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
 public:
   SITargetLowering(TargetMachine &tm, const AMDGPUSubtarget &STI);
 
@@ -76,6 +80,9 @@ public:
                           bool MemcpyStrSrc,
                           MachineFunction &MF) const override;
 
+  bool isMemOpUniform(const SDNode *N) const;
+  bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(EVT VT) const override;
 
@@ -112,13 +119,10 @@ public:
                            SDValue Ptr,
                            uint32_t RsrcDword1,
                            uint64_t RsrcDword2And3) const;
-  MachineSDNode *buildScratchRSRC(SelectionDAG &DAG,
-                                  SDLoc DL,
-                                  SDValue Ptr) const;
-
   std::pair<unsigned, const TargetRegisterClass *>
   getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                StringRef Constraint, MVT VT) const override;
+  ConstraintType getConstraintType(StringRef Constraint) const override;
   SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
 };
 
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 90a37f174682..821aada526c7 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -91,7 +91,8 @@ private:
   bool isOpRelevant(MachineOperand &Op);
 
   /// \brief Get register interval an operand affects.
-  RegInterval getRegInterval(MachineOperand &Op);
+  RegInterval getRegInterval(const TargetRegisterClass *RC,
+                             const MachineOperand &Reg) const;
 
   /// \brief Handle instructions async components
   void pushInstruction(MachineBasicBlock &MBB,
@@ -121,9 +122,13 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   const char *getPassName() const override {
-    return "SI insert wait  instructions";
+    return "SI insert wait instructions";
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 };
 
 } // End anonymous namespace
@@ -138,9 +143,8 @@ FunctionPass *llvm::createSIInsertWaits(TargetMachine &tm) {
 }
 
 Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
-
-  uint64_t TSFlags = TII->get(MI.getOpcode()).TSFlags;
-  Counters Result;
+  uint64_t TSFlags = MI.getDesc().TSFlags;
+  Counters Result = { { 0, 0, 0 } };
 
   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
 
@@ -151,15 +155,22 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
   // LGKM may uses larger values
   if (TSFlags & SIInstrFlags::LGKM_CNT) {
 
-    if (TII->isSMRD(MI.getOpcode())) {
+    if (TII->isSMRD(MI)) {
 
-      MachineOperand &Op = MI.getOperand(0);
-      assert(Op.isReg() && "First LGKM operand must be a register!");
-
-      unsigned Reg = Op.getReg();
-      unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-      Result.Named.LGKM = Size > 4 ? 2 : 1;
+      if (MI.getNumOperands() != 0) {
+        assert(MI.getOperand(0).isReg() &&
+               "First LGKM operand must be a register!");
 
+        // XXX - What if this is a write into a super register?
+        const TargetRegisterClass *RC = TII->getOpRegClass(MI, 0);
+        unsigned Size = RC->getSize();
+        Result.Named.LGKM = Size > 4 ? 2 : 1;
+      } else {
+        // s_dcache_inv etc. do not have a a destination register. Assume we
+        // want a wait on these.
+        // XXX - What is the right value?
+        Result.Named.LGKM = 1;
+      }
     } else {
       // DS
       Result.Named.LGKM = 1;
@@ -173,9 +184,8 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
 }
 
 bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
-
   // Constants are always irrelevant
-  if (!Op.isReg())
+  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
     return false;
 
   // Defines are always relevant
@@ -196,7 +206,7 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
   // operand comes before the value operand and it may have
   // multiple data operands.
 
-  if (TII->isDS(MI.getOpcode())) {
+  if (TII->isDS(MI)) {
     MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
     if (Data && Op.isIdenticalTo(*Data))
       return true;
@@ -224,18 +234,13 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
   return false;
 }
 
-RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
-
-  if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
-    return std::make_pair(0, 0);
-
-  unsigned Reg = Op.getReg();
-  unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize();
-
+RegInterval SIInsertWaits::getRegInterval(const TargetRegisterClass *RC,
+                                          const MachineOperand &Reg) const {
+  unsigned Size = RC->getSize();
   assert(Size >= 4);
 
   RegInterval Result;
-  Result.first = TRI->getEncodingValue(Reg);
+  Result.first = TRI->getEncodingValue(Reg.getReg());
   Result.second = Result.first + Size / 4;
 
   return Result;
@@ -246,10 +251,13 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
 
   // Get the hardware counter increments and sum them up
   Counters Increment = getHwCounts(*I);
+  Counters Limit = ZeroCounts;
   unsigned Sum = 0;
 
   for (unsigned i = 0; i < 3; ++i) {
     LastIssued.Array[i] += Increment.Array[i];
+    if (Increment.Array[i])
+      Limit.Array[i] = LastIssued.Array[i];
     Sum += Increment.Array[i];
   }
 
@@ -261,7 +269,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
 
   if (MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration() >=
       AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
+    // Any occurrence of consecutive VMEM or SMEM instructions forms a VMEM
     // or SMEM clause, respectively.
     //
     // The temporary workaround is to break the clauses with S_NOP.
@@ -270,7 +278,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
     // and destination registers don't overlap, e.g. this is illegal:
     //   r0 = load r2
     //   r2 = load r0
-    if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
+    if ((LastOpcodeType == SMEM && TII->isSMRD(*I)) ||
         (LastOpcodeType == VMEM && Increment.Named.VM)) {
       // Insert a NOP to break the clause.
       BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
@@ -278,7 +286,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
       LastInstWritesM0 = false;
     }
 
-    if (TII->isSMRD(I->getOpcode()))
+    if (TII->isSMRD(*I))
       LastOpcodeType = SMEM;
     else if (Increment.Named.VM)
       LastOpcodeType = VMEM;
@@ -290,21 +298,21 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
   }
 
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
-
     MachineOperand &Op = I->getOperand(i);
     if (!isOpRelevant(Op))
       continue;
 
-    RegInterval Interval = getRegInterval(Op);
+    const TargetRegisterClass *RC = TII->getOpRegClass(*I, i);
+    RegInterval Interval = getRegInterval(RC, Op);
     for (unsigned j = Interval.first; j < Interval.second; ++j) {
 
       // Remember which registers we define
       if (Op.isDef())
-        DefinedRegs[j] = LastIssued;
+        DefinedRegs[j] = Limit;
 
       // and which one we are using
       if (Op.isUse())
-        UsedRegs[j] = LastIssued;
+        UsedRegs[j] = Limit;
     }
   }
 }
@@ -390,12 +398,18 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) {
   if (MI.getOpcode() == AMDGPU::S_SENDMSG)
     return LastIssued;
 
-  // For each register affected by this
-  // instruction increase the result sequence
+  // For each register affected by this instruction increase the result
+  // sequence.
+  //
+  // TODO: We could probably just look at explicit operands if we removed VCC /
+  // EXEC from SMRD dest reg classes.
   for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-
     MachineOperand &Op = MI.getOperand(i);
-    RegInterval Interval = getRegInterval(Op);
+    if (!Op.isReg() || !TRI->isInAllocatableClass(Op.getReg()))
+      continue;
+
+    const TargetRegisterClass *RC = TII->getOpRegClass(MI, i);
+    RegInterval Interval = getRegInterval(RC, Op);
     for (unsigned j = Interval.first; j < Interval.second; ++j) {
 
       if (Op.isDef()) {
diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td
index 211666a9bdbc..0e883f64caa3 100644
--- a/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/lib/Target/AMDGPU/SIInstrFormats.td
@@ -41,6 +41,10 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
   field bits<1> WQM = 0;
   field bits<1> VGPRSpill = 0;
 
+  // This bit tells the assembler to use the 32-bit encoding in case it
+  // is unable to infer the encoding from the operands.
+  field bits<1> VOPAsmPrefer32Bit = 0;
+
   // These need to be kept in sync with the enum in SIInstrFlags.
   let TSFlags{0} = VM_CNT;
   let TSFlags{1} = EXP_CNT;
@@ -68,10 +72,8 @@ class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
   let TSFlags{19} = FLAT;
   let TSFlags{20} = WQM;
   let TSFlags{21} = VGPRSpill;
+  let TSFlags{22} = VOPAsmPrefer32Bit;
 
-  // Most instructions require adjustments after selection to satisfy
-  // operand requirements.
-  let hasPostISelHook = 1;
   let SchedRW = [Write32Bit];
 }
 
@@ -86,7 +88,6 @@ class Enc64 {
 }
 
 class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-def VOPDstVCC : VOPDstOperand <VCCReg>;
 
 let Uses = [EXEC] in {
 
@@ -101,11 +102,11 @@ class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
 }
 
 class VOPCCommon <dag ins, string asm, list<dag> pattern> :
-    VOPAnyCommon <(outs VOPDstVCC:$dst), ins, asm, pattern> {
+    VOPAnyCommon <(outs), ins, asm, pattern> {
 
-  let DisableEncoding = "$dst";
   let VOPC = 1;
   let Size = 4;
+  let Defs = [VCC];
 }
 
 class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
@@ -138,6 +139,11 @@ class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> :
   let isCodeGenOnly = 0;
 
   int Size = 8;
+
+  // Because SGPRs may be allowed if there are multiple operands, we
+  // need a post-isel hook to insert copies in order to avoid
+  // violating constant bus requirements.
+  let hasPostISelHook = 1;
 }
 
 } // End Uses = [EXEC]
@@ -222,6 +228,20 @@ class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
   let Inst{31-27} = 0x18; //encoding
 }
 
+class SMRD_IMMe_ci <bits<5> op> : Enc64 {
+  bits<7> sdst;
+  bits<7> sbase;
+  bits<32> offset;
+
+  let Inst{7-0}   = 0xff;
+  let Inst{8}     = 0;
+  let Inst{14-9}  = sbase{6-1};
+  let Inst{21-15} = sdst;
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+  let Inst{63-32} = offset;
+}
+
 let SchedRW = [WriteSALU] in {
 class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
     InstSI<outs, ins, asm, pattern> {
@@ -249,13 +269,13 @@ class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
 class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   InstSI<outs, ins, asm, pattern>, SOPCe <op> {
 
-  let DisableEncoding = "$dst";
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
   let SALU = 1;
   let SOPC = 1;
   let isCodeGenOnly = 0;
+  let Defs = [SCC];
 
   let UseNamedOperandTable = 1;
 }
@@ -598,15 +618,13 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
 // Vector I/O operations
 //===----------------------------------------------------------------------===//
 
-let Uses = [EXEC] in {
-
 class DS <dag outs, dag ins, string asm, list<dag> pattern> :
     InstSI <outs, ins, asm, pattern> {
 
   let LGKM_CNT = 1;
   let DS = 1;
   let UseNamedOperandTable = 1;
-  let Uses = [M0];
+  let Uses = [M0, EXEC];
 
   // Most instruction load and store data, so set this as the default.
   let mayLoad = 1;
@@ -623,6 +641,7 @@ class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
   let VM_CNT = 1;
   let EXP_CNT = 1;
   let MUBUF = 1;
+  let Uses = [EXEC];
 
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
@@ -636,6 +655,7 @@ class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
   let VM_CNT = 1;
   let EXP_CNT = 1;
   let MTBUF = 1;
+  let Uses = [EXEC];
 
   let hasSideEffects = 0;
   let UseNamedOperandTable = 1;
@@ -665,9 +685,7 @@ class MIMG <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
   let VM_CNT = 1;
   let EXP_CNT = 1;
   let MIMG = 1;
+  let Uses = [EXEC];
 
   let hasSideEffects = 0; // XXX ????
 }
-
-
-} // End Uses = [EXEC]
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index cfd2c42d1aef..a08a5a8fed36 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -82,6 +82,7 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
   switch (MI->getOpcode()) {
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
+  case AMDGPU::V_MOV_B64_PSEUDO:
     return true;
   default:
     return false;
@@ -204,7 +205,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
                                         unsigned &Offset,
                                         const TargetRegisterInfo *TRI) const {
   unsigned Opc = LdSt->getOpcode();
-  if (isDS(Opc)) {
+
+  if (isDS(*LdSt)) {
     const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
                                                       AMDGPU::OpName::offset);
     if (OffsetImm) {
@@ -254,7 +256,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
     return false;
   }
 
-  if (isMUBUF(Opc) || isMTBUF(Opc)) {
+  if (isMUBUF(*LdSt) || isMTBUF(*LdSt)) {
     if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
       return false;
 
@@ -270,7 +272,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
     return true;
   }
 
-  if (isSMRD(Opc)) {
+  if (isSMRD(*LdSt)) {
     const MachineOperand *OffsetImm = getNamedOperand(*LdSt,
                                                       AMDGPU::OpName::offset);
     if (!OffsetImm)
@@ -289,20 +291,18 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
 bool SIInstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
                                      MachineInstr *SecondLdSt,
                                      unsigned NumLoads) const {
-  unsigned Opc0 = FirstLdSt->getOpcode();
-  unsigned Opc1 = SecondLdSt->getOpcode();
-
   // TODO: This needs finer tuning
   if (NumLoads > 4)
     return false;
 
-  if (isDS(Opc0) && isDS(Opc1))
+  if (isDS(*FirstLdSt) && isDS(*SecondLdSt))
     return true;
 
-  if (isSMRD(Opc0) && isSMRD(Opc1))
+  if (isSMRD(*FirstLdSt) && isSMRD(*SecondLdSt))
     return true;
 
-  if ((isMUBUF(Opc0) || isMTBUF(Opc0)) && (isMUBUF(Opc1) || isMTBUF(Opc1)))
+  if ((isMUBUF(*FirstLdSt) || isMTBUF(*FirstLdSt)) &&
+      (isMUBUF(*SecondLdSt) || isMTBUF(*SecondLdSt)))
     return true;
 
   return false;
@@ -323,28 +323,45 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
     AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
     AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
-    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, 0
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+  };
+
+  static const int16_t Sub0_15_64[] = {
+    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+    AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+    AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
   };
 
   static const int16_t Sub0_7[] = {
     AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, 0
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+  };
+
+  static const int16_t Sub0_7_64[] = {
+    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
   };
 
   static const int16_t Sub0_3[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, 0
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+  };
+
+  static const int16_t Sub0_3_64[] = {
+    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
   };
 
   static const int16_t Sub0_2[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, 0
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
   };
 
   static const int16_t Sub0_1[] = {
-    AMDGPU::sub0, AMDGPU::sub1, 0
+    AMDGPU::sub0, AMDGPU::sub1,
   };
 
   unsigned Opcode;
-  const int16_t *SubIndices;
+  ArrayRef<int16_t> SubIndices;
+  bool Forward;
 
   if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
@@ -360,7 +377,7 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       } else {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
-        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32), AMDGPU::VCC)
+        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
           .addImm(0)
           .addReg(SrcReg, getKillRegState(KillSrc));
       }
@@ -375,18 +392,18 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B32;
-    SubIndices = Sub0_3;
+    Opcode = AMDGPU::S_MOV_B64;
+    SubIndices = Sub0_3_64;
 
   } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B32;
-    SubIndices = Sub0_7;
+    Opcode = AMDGPU::S_MOV_B64;
+    SubIndices = Sub0_7_64;
 
   } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B32;
-    SubIndices = Sub0_15;
+    Opcode = AMDGPU::S_MOV_B64;
+    SubIndices = Sub0_15_64;
 
   } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
     assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
@@ -428,13 +445,27 @@ SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     llvm_unreachable("Can't copy register!");
   }
 
-  while (unsigned SubIdx = *SubIndices++) {
+  if (RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg))
+    Forward = true;
+  else
+    Forward = false;
+
+  for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
+    unsigned SubIdx;
+    if (Forward)
+      SubIdx = SubIndices[Idx];
+    else
+      SubIdx = SubIndices[SubIndices.size() - Idx - 1];
+
     MachineInstrBuilder Builder = BuildMI(MBB, MI, DL,
       get(Opcode), RI.getSubReg(DestReg, SubIdx));
 
-    Builder.addReg(RI.getSubReg(SrcReg, SubIdx), getKillRegState(KillSrc));
+    Builder.addReg(RI.getSubReg(SrcReg, SubIdx));
 
-    if (*SubIndices)
+    if (Idx == SubIndices.size() - 1)
+      Builder.addReg(SrcReg, RegState::Kill | RegState::Implicit);
+
+    if (Idx == 0)
       Builder.addReg(DestReg, RegState::Define | RegState::Implicit);
   }
 }
@@ -471,6 +502,40 @@ unsigned SIInstrInfo::getMovOpcode(const TargetRegisterClass *DstRC) const {
   return AMDGPU::COPY;
 }
 
+static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_S32_SAVE;
+  case 8:
+    return AMDGPU::SI_SPILL_S64_SAVE;
+  case 16:
+    return AMDGPU::SI_SPILL_S128_SAVE;
+  case 32:
+    return AMDGPU::SI_SPILL_S256_SAVE;
+  case 64:
+    return AMDGPU::SI_SPILL_S512_SAVE;
+  default:
+    llvm_unreachable("unknown register size");
+  }
+}
+
+static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_V32_SAVE;
+  case 8:
+    return AMDGPU::SI_SPILL_V64_SAVE;
+  case 16:
+    return AMDGPU::SI_SPILL_V128_SAVE;
+  case 32:
+    return AMDGPU::SI_SPILL_V256_SAVE;
+  case 64:
+    return AMDGPU::SI_SPILL_V512_SAVE;
+  default:
+    llvm_unreachable("unknown register size");
+  }
+}
+
 void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                       MachineBasicBlock::iterator MI,
                                       unsigned SrcReg, bool isKill,
@@ -481,47 +546,83 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo *FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
-  int Opcode = -1;
+
+  unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+  unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+  MachinePointerInfo PtrInfo
+    = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+  MachineMemOperand *MMO
+    = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                               Size, Align);
 
   if (RI.isSGPRClass(RC)) {
+    MFI->setHasSpilledSGPRs();
+
     // We are only allowed to create one new instruction when spilling
     // registers, so we need to use pseudo instruction for spilling
     // SGPRs.
-    switch (RC->getSize() * 8) {
-      case 32:  Opcode = AMDGPU::SI_SPILL_S32_SAVE;  break;
-      case 64:  Opcode = AMDGPU::SI_SPILL_S64_SAVE;  break;
-      case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
-      case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
-      case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
-    }
-  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
-    MFI->setHasSpilledVGPRs();
+    unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
+    BuildMI(MBB, MI, DL, get(Opcode))
+      .addReg(SrcReg)            // src
+      .addFrameIndex(FrameIndex) // frame_idx
+      .addMemOperand(MMO);
 
-    switch(RC->getSize() * 8) {
-      case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
-      case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
-      case 96: Opcode = AMDGPU::SI_SPILL_V96_SAVE; break;
-      case 128: Opcode = AMDGPU::SI_SPILL_V128_SAVE; break;
-      case 256: Opcode = AMDGPU::SI_SPILL_V256_SAVE; break;
-      case 512: Opcode = AMDGPU::SI_SPILL_V512_SAVE; break;
-    }
+    return;
   }
 
-  if (Opcode != -1) {
-    FrameInfo->setObjectAlignment(FrameIndex, 4);
-    BuildMI(MBB, MI, DL, get(Opcode))
-            .addReg(SrcReg)
-            .addFrameIndex(FrameIndex)
-            // Place-holder registers, these will be filled in by
-            // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
-            .addReg(AMDGPU::SGPR0, RegState::Undef);
-  } else {
+  if (!ST.isVGPRSpillingEnabled(MFI)) {
     LLVMContext &Ctx = MF->getFunction()->getContext();
     Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
                   " spill register");
     BuildMI(MBB, MI, DL, get(AMDGPU::KILL))
-            .addReg(SrcReg);
+      .addReg(SrcReg);
+
+    return;
+  }
+
+  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+  unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
+  MFI->setHasSpilledVGPRs();
+  BuildMI(MBB, MI, DL, get(Opcode))
+    .addReg(SrcReg)                   // src
+    .addFrameIndex(FrameIndex)        // frame_idx
+    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
+    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+    .addMemOperand(MMO);
+}
+
+static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_S32_RESTORE;
+  case 8:
+    return AMDGPU::SI_SPILL_S64_RESTORE;
+  case 16:
+    return AMDGPU::SI_SPILL_S128_RESTORE;
+  case 32:
+    return AMDGPU::SI_SPILL_S256_RESTORE;
+  case 64:
+    return AMDGPU::SI_SPILL_S512_RESTORE;
+  default:
+    llvm_unreachable("unknown register size");
+  }
+}
+
+static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return AMDGPU::SI_SPILL_V32_RESTORE;
+  case 8:
+    return AMDGPU::SI_SPILL_V64_RESTORE;
+  case 16:
+    return AMDGPU::SI_SPILL_V128_RESTORE;
+  case 32:
+    return AMDGPU::SI_SPILL_V256_RESTORE;
+  case 64:
+    return AMDGPU::SI_SPILL_V512_RESTORE;
+  default:
+    llvm_unreachable("unknown register size");
   }
 }
 
@@ -534,42 +635,43 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   MachineFrameInfo *FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
-  int Opcode = -1;
+  unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+  unsigned Size = FrameInfo->getObjectSize(FrameIndex);
 
-  if (RI.isSGPRClass(RC)){
-    switch(RC->getSize() * 8) {
-      case 32:  Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break;
-      case 64:  Opcode = AMDGPU::SI_SPILL_S64_RESTORE;  break;
-      case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
-      case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
-      case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
-    }
-  } else if(RI.hasVGPRs(RC) && ST.isVGPRSpillingEnabled(MFI)) {
-    switch(RC->getSize() * 8) {
-      case 32: Opcode = AMDGPU::SI_SPILL_V32_RESTORE; break;
-      case 64: Opcode = AMDGPU::SI_SPILL_V64_RESTORE; break;
-      case 96: Opcode = AMDGPU::SI_SPILL_V96_RESTORE; break;
-      case 128: Opcode = AMDGPU::SI_SPILL_V128_RESTORE; break;
-      case 256: Opcode = AMDGPU::SI_SPILL_V256_RESTORE; break;
-      case 512: Opcode = AMDGPU::SI_SPILL_V512_RESTORE; break;
-    }
+  MachinePointerInfo PtrInfo
+    = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
+
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+    PtrInfo, MachineMemOperand::MOLoad, Size, Align);
+
+  if (RI.isSGPRClass(RC)) {
+    // FIXME: Maybe this should not include a memoperand because it will be
+    // lowered to non-memory instructions.
+    unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
+    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+      .addFrameIndex(FrameIndex) // frame_idx
+      .addMemOperand(MMO);
+
+    return;
   }
 
-  if (Opcode != -1) {
-    FrameInfo->setObjectAlignment(FrameIndex, 4);
-    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-            .addFrameIndex(FrameIndex)
-            // Place-holder registers, these will be filled in by
-            // SIPrepareScratchRegs.
-            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
-            .addReg(AMDGPU::SGPR0, RegState::Undef);
-
-  } else {
+  if (!ST.isVGPRSpillingEnabled(MFI)) {
     LLVMContext &Ctx = MF->getFunction()->getContext();
     Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
                   " restore register");
     BuildMI(MBB, MI, DL, get(AMDGPU::IMPLICIT_DEF), DestReg);
+
+    return;
   }
+
+  assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");
+
+  unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
+  BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+    .addFrameIndex(FrameIndex)        // frame_idx
+    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
+    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
+    .addMemOperand(MMO);
 }
 
 /// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -601,17 +703,21 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
     if (MFI->getShaderType() == ShaderType::COMPUTE &&
         WorkGroupSize > WavefrontSize) {
 
-      unsigned TIDIGXReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_X);
-      unsigned TIDIGYReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Y);
-      unsigned TIDIGZReg = TRI->getPreloadedValue(*MF, SIRegisterInfo::TIDIG_Z);
+      unsigned TIDIGXReg
+        = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_X);
+      unsigned TIDIGYReg
+        = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Y);
+      unsigned TIDIGZReg
+        = TRI->getPreloadedValue(*MF, SIRegisterInfo::WORKGROUP_ID_Z);
       unsigned InputPtrReg =
-          TRI->getPreloadedValue(*MF, SIRegisterInfo::INPUT_PTR);
+          TRI->getPreloadedValue(*MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
       for (unsigned Reg : {TIDIGXReg, TIDIGYReg, TIDIGZReg}) {
         if (!Entry.isLiveIn(Reg))
           Entry.addLiveIn(Reg);
       }
 
       RS->enterBasicBlock(&Entry);
+      // FIXME: Can we scavenge an SReg_64 and access the subregs?
       unsigned STmp0 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
       unsigned STmp1 = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
       BuildMI(Entry, Insert, DL, get(AMDGPU::S_LOAD_DWORD_IMM), STmp0)
@@ -667,8 +773,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
   return TmpReg;
 }
 
-void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
-                             int Count) const {
+void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
+                                   int Count) const {
   while (Count > 0) {
     int Arg;
     if (Count >= 8)
@@ -687,26 +793,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   switch (MI->getOpcode()) {
   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
 
-  case AMDGPU::SI_CONSTDATA_PTR: {
-    unsigned Reg = MI->getOperand(0).getReg();
-    unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
-    unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
-
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg);
-
-    // Add 32-bit offset from this instruction to the start of the constant data.
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo)
-            .addReg(RegLo)
-            .addTargetIndex(AMDGPU::TI_CONSTDATA_START)
-            .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit);
-    BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi)
-            .addReg(RegHi)
-            .addImm(0)
-            .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit)
-            .addReg(AMDGPU::SCC, RegState::Implicit);
-    MI->eraseFromParent();
-    break;
-  }
   case AMDGPU::SGPR_USE:
     // This is just a placeholder for register allocation.
     MI->eraseFromParent();
@@ -760,49 +846,90 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
     MI->eraseFromParent();
     break;
   }
+
+  case AMDGPU::SI_CONSTDATA_PTR: {
+    const SIRegisterInfo *TRI =
+        static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
+    MachineFunction &MF = *MBB.getParent();
+    unsigned Reg = MI->getOperand(0).getReg();
+    unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
+    unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+
+    // Create a bundle so these instructions won't be re-ordered by the
+    // post-RA scheduler.
+    MIBundleBuilder Bundler(MBB, MI);
+    Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg));
+
+    // Add 32-bit offset from this instruction to the start of the
+    // constant data.
+    Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
+                           .addReg(RegLo)
+                           .addOperand(MI->getOperand(1)));
+    Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+                           .addReg(RegHi)
+                           .addImm(0));
+
+    llvm::finalizeBundle(MBB, Bundler.begin());
+
+    MI->eraseFromParent();
+    break;
+  }
   }
   return true;
 }
 
-MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
-                                              bool NewMI) const {
-
-  if (MI->getNumOperands() < 3)
-    return nullptr;
-
+/// Commutes the operands in the given instruction.
+/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
+///
+/// Do not call this method for a non-commutable instruction or for
+/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
+/// Even though the instruction is commutable, the method may still
+/// fail to commute the operands, null pointer is returned in such cases.
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                  bool NewMI,
+                                                  unsigned OpIdx0,
+                                                  unsigned OpIdx1) const {
   int CommutedOpcode = commuteOpcode(*MI);
   if (CommutedOpcode == -1)
     return nullptr;
 
   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                            AMDGPU::OpName::src0);
-  assert(Src0Idx != -1 && "Should always have src0 operand");
-
   MachineOperand &Src0 = MI->getOperand(Src0Idx);
   if (!Src0.isReg())
     return nullptr;
 
   int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
                                            AMDGPU::OpName::src1);
-  if (Src1Idx == -1)
+
+  if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
+       OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
+      (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
+       OpIdx1 != static_cast<unsigned>(Src0Idx)))
     return nullptr;
 
   MachineOperand &Src1 = MI->getOperand(Src1Idx);
 
-  // Make sure it's legal to commute operands for VOP2.
-  if (isVOP2(MI->getOpcode()) &&
-      (!isOperandLegal(MI, Src0Idx, &Src1) ||
-       !isOperandLegal(MI, Src1Idx, &Src0))) {
-    return nullptr;
+
+  if (isVOP2(*MI)) {
+    const MCInstrDesc &InstrDesc = MI->getDesc();
+    // For VOP2 instructions, any operand type is valid to use for src0.  Make
+    // sure we can use the src1 as src0.
+    //
+    // We could be stricter here and only allow commuting if there is a reason
+    // to do so. i.e. if both operands are VGPRs there is no real benefit,
+    // although MachineCSE attempts to find matches by commuting.
+    const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+    if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
+      return nullptr;
   }
 
   if (!Src1.isReg()) {
     // Allow commuting instructions with Imm operands.
     if (NewMI || !Src1.isImm() ||
-       (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
+        (!isVOP2(*MI) && !isVOP3(*MI))) {
       return nullptr;
     }
-
     // Be sure to copy the source modifiers to the right place.
     if (MachineOperand *Src0Mods
           = getNamedOperand(*MI, AMDGPU::OpName::src0_modifiers)) {
@@ -832,7 +959,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
     Src1.ChangeToRegister(Reg, false);
     Src1.setSubReg(SubReg);
   } else {
-    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+    MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
   }
 
   if (MI)
@@ -845,8 +972,8 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
 // between the true commutable operands, and the base
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
-                                        unsigned &SrcOpIdx1,
-                                        unsigned &SrcOpIdx2) const {
+                                        unsigned &SrcOpIdx0,
+                                        unsigned &SrcOpIdx1) const {
   const MCInstrDesc &MCID = MI->getDesc();
   if (!MCID.isCommutable())
     return false;
@@ -857,7 +984,8 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
     return false;
 
   // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
-  // immediate.
+  // immediate. Also, immediate src0 operand is not handled in
+  // SIInstrInfo::commuteInstruction();
   if (!MI->getOperand(Src0Idx).isReg())
     return false;
 
@@ -865,18 +993,22 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr *MI,
   if (Src1Idx == -1)
     return false;
 
-  if (!MI->getOperand(Src1Idx).isReg())
+  MachineOperand &Src1 = MI->getOperand(Src1Idx);
+  if (Src1.isImm()) {
+    // SIInstrInfo::commuteInstruction() does support commuting the immediate
+    // operand src1 in 2 and 3 operand instructions.
+    if (!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))
+      return false;
+  } else if (Src1.isReg()) {
+    // If any source modifiers are set, the generic instruction commuting won't
+    // understand how to copy the source modifiers.
+    if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
+        hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
+      return false;
+  } else
     return false;
 
-  // If any source modifiers are set, the generic instruction commuting won't
-  // understand how to copy the source modifiers.
-  if (hasModifiersSet(*MI, AMDGPU::OpName::src0_modifiers) ||
-      hasModifiersSet(*MI, AMDGPU::OpName::src1_modifiers))
-    return false;
-
-  SrcOpIdx1 = Src0Idx;
-  SrcOpIdx2 = Src1Idx;
-  return true;
+  return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
 }
 
 MachineInstr *SIInstrInfo::buildMovInstr(MachineBasicBlock *MBB,
@@ -898,11 +1030,6 @@ bool SIInstrInfo::isMov(unsigned Opcode) const {
   }
 }
 
-bool
-SIInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
-  return RC != &AMDGPU::EXECRegRegClass;
-}
-
 static void removeModOperands(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
   int Src0ModIdx = AMDGPU::getNamedOperandIdx(Opc,
@@ -984,9 +1111,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
       }
 
-      UseMI->RemoveOperand(AMDGPU::getNamedOperandIdx(Opc,
-                                                      AMDGPU::OpName::src2));
-      // ChangingToImmediate adds Src2 back to the instruction.
       Src2->ChangeToImmediate(Imm);
 
       removeModOperands(*UseMI);
@@ -1045,18 +1169,6 @@ bool SIInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
   return false;
 }
 
-bool
-SIInstrInfo::isTriviallyReMaterializable(const MachineInstr *MI,
-                                         AliasAnalysis *AA) const {
-  switch(MI->getOpcode()) {
-  default: return AMDGPUInstrInfo::isTriviallyReMaterializable(MI, AA);
-  case AMDGPU::S_MOV_B32:
-  case AMDGPU::S_MOV_B64:
-  case AMDGPU::V_MOV_B32_e32:
-    return MI->getOperand(1).isImm();
-  }
-}
-
 static bool offsetsDoNotOverlap(int WidthA, int OffsetA,
                                 int WidthB, int OffsetB) {
   int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
@@ -1088,9 +1200,6 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
 bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
                                                   MachineInstr *MIb,
                                                   AliasAnalysis *AA) const {
-  unsigned Opc0 = MIa->getOpcode();
-  unsigned Opc1 = MIb->getOpcode();
-
   assert(MIa && (MIa->mayLoad() || MIa->mayStore()) &&
          "MIa must load from or modify a memory location");
   assert(MIb && (MIb->mayLoad() || MIb->mayStore()) &&
@@ -1105,32 +1214,32 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
 
   // TODO: Should we check the address space from the MachineMemOperand? That
   // would allow us to distinguish objects we know don't alias based on the
-  // underlying addres space, even if it was lowered to a different one,
+  // underlying address space, even if it was lowered to a different one,
   // e.g. private accesses lowered to use MUBUF instructions on a scratch
   // buffer.
-  if (isDS(Opc0)) {
-    if (isDS(Opc1))
+  if (isDS(*MIa)) {
+    if (isDS(*MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
-    return !isFLAT(Opc1);
+    return !isFLAT(*MIb);
   }
 
-  if (isMUBUF(Opc0) || isMTBUF(Opc0)) {
-    if (isMUBUF(Opc1) || isMTBUF(Opc1))
+  if (isMUBUF(*MIa) || isMTBUF(*MIa)) {
+    if (isMUBUF(*MIb) || isMTBUF(*MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
-    return !isFLAT(Opc1) && !isSMRD(Opc1);
+    return !isFLAT(*MIb) && !isSMRD(*MIb);
   }
 
-  if (isSMRD(Opc0)) {
-    if (isSMRD(Opc1))
+  if (isSMRD(*MIa)) {
+    if (isSMRD(*MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
-    return !isFLAT(Opc1) && !isMUBUF(Opc0) && !isMTBUF(Opc0);
+    return !isFLAT(*MIb) && !isMUBUF(*MIa) && !isMTBUF(*MIa);
   }
 
-  if (isFLAT(Opc0)) {
-    if (isFLAT(Opc1))
+  if (isFLAT(*MIa)) {
+    if (isFLAT(*MIb))
       return checkInstOffsetsDoNotOverlap(MIa, MIb);
 
     return false;
@@ -1319,6 +1428,26 @@ bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
   return false;
 }
 
+static unsigned findImplicitSGPRRead(const MachineInstr &MI) {
+  for (const MachineOperand &MO : MI.implicit_operands()) {
+    // We only care about reads.
+    if (MO.isDef())
+      continue;
+
+    switch (MO.getReg()) {
+    case AMDGPU::VCC:
+    case AMDGPU::M0:
+    case AMDGPU::FLAT_SCR:
+      return MO.getReg();
+
+    default:
+      break;
+    }
+  }
+
+  return AMDGPU::NoRegister;
+}
+
 bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
                                     StringRef &ErrInfo) const {
   uint16_t Opcode = MI->getOpcode();
@@ -1335,7 +1464,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
      return false;
   }
 
-  // Make sure the register classes are correct
+  // Make sure the register classes are correct.
   for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
     if (MI->getOperand(i).isFPImm()) {
       ErrInfo = "FPImm Machine Operands are not supported. ISel should bitcast "
@@ -1392,14 +1521,17 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
 
 
   // Verify VOP*
-  if (isVOP1(Opcode) || isVOP2(Opcode) || isVOP3(Opcode) || isVOPC(Opcode)) {
+  if (isVOP1(*MI) || isVOP2(*MI) || isVOP3(*MI) || isVOPC(*MI)) {
     // Only look at the true operands. Only a real operand can use the constant
     // bus, and we don't want to check pseudo-operands like the source modifier
     // flags.
     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
 
     unsigned ConstantBusCount = 0;
-    unsigned SGPRUsed = AMDGPU::NoRegister;
+    unsigned SGPRUsed = findImplicitSGPRRead(*MI);
+    if (SGPRUsed != AMDGPU::NoRegister)
+      ++ConstantBusCount;
+
     for (int OpIdx : OpIndices) {
       if (OpIdx == -1)
         break;
@@ -1435,6 +1567,16 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI,
     }
   }
 
+  // Make sure we aren't losing exec uses in the td files. This mostly requires
+  // being careful when using let Uses to try to add other use registers.
+  if (!isGenericOpcode(Opcode) && !isSALU(Opcode) && !isSMRD(Opcode)) {
+    const MachineOperand *Exec = MI->findRegisterUseOperand(AMDGPU::EXEC);
+    if (!Exec || !Exec->isImplicit()) {
+      ErrInfo = "VALU instruction does not implicitly read exec mask";
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -1483,11 +1625,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
   case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
   case AMDGPU::S_LOAD_DWORD_IMM:
-  case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+  case AMDGPU::S_LOAD_DWORD_SGPR:
+  case AMDGPU::S_LOAD_DWORD_IMM_ci:
+    return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
   case AMDGPU::S_LOAD_DWORDX2_IMM:
-  case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+  case AMDGPU::S_LOAD_DWORDX2_SGPR:
+  case AMDGPU::S_LOAD_DWORDX2_IMM_ci:
+    return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
   case AMDGPU::S_LOAD_DWORDX4_IMM:
-  case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
+  case AMDGPU::S_LOAD_DWORDX4_SGPR:
+  case AMDGPU::S_LOAD_DWORDX4_IMM_ci:
+    return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
   case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
   case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -1562,17 +1710,21 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI,
                                          unsigned SubIdx,
                                          const TargetRegisterClass *SubRC)
                                          const {
-  assert(SuperReg.isReg());
-
-  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
+  MachineBasicBlock *MBB = MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
   unsigned SubReg = MRI.createVirtualRegister(SubRC);
 
+  if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) {
+    BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg)
+      .addReg(SuperReg.getReg(), 0, SubIdx);
+    return SubReg;
+  }
+
   // Just in case the super register is itself a sub-register, copy it to a new
   // value so we don't need to worry about merging its subreg index with the
   // SubIdx passed to this function. The register coalescer should be able to
   // eliminate this extra copy.
-  MachineBasicBlock *MBB = MI->getParent();
-  DebugLoc DL = MI->getDebugLoc();
+  unsigned NewSuperReg = MRI.createVirtualRegister(SuperRC);
 
   BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg)
     .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg());
@@ -1605,36 +1757,6 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
   return MachineOperand::CreateReg(SubReg, false);
 }
 
-unsigned SIInstrInfo::split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
-                                    MachineBasicBlock::iterator MI,
-                                    MachineRegisterInfo &MRI,
-                                    const TargetRegisterClass *RC,
-                                    const MachineOperand &Op) const {
-  MachineBasicBlock *MBB = MI->getParent();
-  DebugLoc DL = MI->getDebugLoc();
-  unsigned LoDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-  unsigned HiDst = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
-  unsigned Dst = MRI.createVirtualRegister(RC);
-
-  MachineInstr *Lo = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
-                             LoDst)
-    .addImm(Op.getImm() & 0xFFFFFFFF);
-  MachineInstr *Hi = BuildMI(*MBB, MI, DL, get(AMDGPU::S_MOV_B32),
-                             HiDst)
-    .addImm(Op.getImm() >> 32);
-
-  BuildMI(*MBB, MI, DL, get(TargetOpcode::REG_SEQUENCE), Dst)
-    .addReg(LoDst)
-    .addImm(AMDGPU::sub0)
-    .addReg(HiDst)
-    .addImm(AMDGPU::sub1);
-
-  Worklist.push_back(Lo);
-  Worklist.push_back(Hi);
-
-  return Dst;
-}
-
 // Change the order of operands from (0, 1, 2) to (0, 2, 1)
 void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
   assert(Inst->getNumExplicitOperands() == 3);
@@ -1643,6 +1765,41 @@ void SIInstrInfo::swapOperands(MachineBasicBlock::iterator Inst) const {
   Inst->addOperand(Op1);
 }
 
+bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
+                                    const MCOperandInfo &OpInfo,
+                                    const MachineOperand &MO) const {
+  if (!MO.isReg())
+    return false;
+
+  unsigned Reg = MO.getReg();
+  const TargetRegisterClass *RC =
+    TargetRegisterInfo::isVirtualRegister(Reg) ?
+    MRI.getRegClass(Reg) :
+    RI.getPhysRegClass(Reg);
+
+  // In order to be legal, the common sub-class must be equal to the
+  // class of the current operand.  For example:
+  //
+  // v_mov_b32 s0 ; Operand defined as vsrc_32
+  //              ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+  //
+  // s_sendmsg 0, s0 ; Operand defined as m0reg
+  //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+
+  return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+}
+
+bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+                                     const MCOperandInfo &OpInfo,
+                                     const MachineOperand &MO) const {
+  if (MO.isReg())
+    return isLegalRegOperand(MRI, OpInfo, MO);
+
+  // Handle non-register types that are treated like immediates.
+  assert(MO.isImm() || MO.isTargetIndex() || MO.isFI());
+  return true;
+}
+
 bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
                                  const MachineOperand *MO) const {
   const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -1653,7 +1810,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
   if (!MO)
     MO = &MI->getOperand(OpIdx);
 
-  if (isVALU(InstDesc.Opcode) &&
+  if (isVALU(*MI) &&
       usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
     unsigned SGPRUsed =
         MO->isReg() ? MO->getReg() : (unsigned)AMDGPU::NoRegister;
@@ -1670,21 +1827,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
 
   if (MO->isReg()) {
     assert(DefinedRC);
-    const TargetRegisterClass *RC =
-        TargetRegisterInfo::isVirtualRegister(MO->getReg()) ?
-            MRI.getRegClass(MO->getReg()) :
-            RI.getPhysRegClass(MO->getReg());
-
-    // In order to be legal, the common sub-class must be equal to the
-    // class of the current operand.  For example:
-    //
-    // v_mov_b32 s0 ; Operand defined as vsrc_32
-    //              ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
-    //
-    // s_sendmsg 0, s0 ; Operand defined as m0reg
-    //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
-
-    return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+    return isLegalRegOperand(MRI, OpInfo, *MO);
   }
 
 
@@ -1699,81 +1842,143 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
   return isImmOperandLegal(MI, OpIdx, *MO);
 }
 
-void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
-  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+void SIInstrInfo::legalizeOperandsVOP2(MachineRegisterInfo &MRI,
+                                       MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+  const MCInstrDesc &InstrDesc = get(Opc);
 
-  int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                           AMDGPU::OpName::src0);
-  int Src1Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                           AMDGPU::OpName::src1);
-  int Src2Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                           AMDGPU::OpName::src2);
+  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+  MachineOperand &Src1 = MI->getOperand(Src1Idx);
 
-  // Legalize VOP2
-  if (isVOP2(MI->getOpcode()) && Src1Idx != -1) {
-    // Legalize src0
-    if (!isOperandLegal(MI, Src0Idx))
+  // If there is an implicit SGPR use such as VCC use for v_addc_u32/v_subb_u32
+  // we need to only have one constant bus use.
+  //
+  // Note we do not need to worry about literal constants here. They are
+  // disabled for the operand type for instructions because they will always
+  // violate the one constant bus use rule.
+  bool HasImplicitSGPR = findImplicitSGPRRead(*MI) != AMDGPU::NoRegister;
+  if (HasImplicitSGPR) {
+    int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+    MachineOperand &Src0 = MI->getOperand(Src0Idx);
+
+    if (Src0.isReg() && RI.isSGPRReg(MRI, Src0.getReg()))
       legalizeOpWithMove(MI, Src0Idx);
+  }
 
-    // Legalize src1
-    if (isOperandLegal(MI, Src1Idx))
-      return;
-
-    // Usually src0 of VOP2 instructions allow more types of inputs
-    // than src1, so try to commute the instruction to decrease our
-    // chances of having to insert a MOV instruction to legalize src1.
-    if (MI->isCommutable()) {
-      if (commuteInstruction(MI))
-        // If we are successful in commuting, then we know MI is legal, so
-        // we are done.
-        return;
-    }
+  // VOP2 src0 instructions support all operand types, so we don't need to check
+  // their legality. If src1 is already legal, we don't need to do anything.
+  if (isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src1))
+    return;
 
+  // We do not use commuteInstruction here because it is too aggressive and will
+  // commute if it is possible. We only want to commute here if it improves
+  // legality. This can be called a fairly large number of times so don't waste
+  // compile time pointlessly swapping and checking legality again.
+  if (HasImplicitSGPR || !MI->isCommutable()) {
     legalizeOpWithMove(MI, Src1Idx);
     return;
   }
 
-  // XXX - Do any VOP3 instructions read VCC?
-  // Legalize VOP3
-  if (isVOP3(MI->getOpcode())) {
-    int VOP3Idx[3] = { Src0Idx, Src1Idx, Src2Idx };
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+  MachineOperand &Src0 = MI->getOperand(Src0Idx);
 
-    // Find the one SGPR operand we are allowed to use.
-    unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+  // If src0 can be used as src1, commuting will make the operands legal.
+  // Otherwise we have to give up and insert a move.
+  //
+  // TODO: Other immediate-like operand kinds could be commuted if there was a
+  // MachineOperand::ChangeTo* for them.
+  if ((!Src1.isImm() && !Src1.isReg()) ||
+      !isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0)) {
+    legalizeOpWithMove(MI, Src1Idx);
+    return;
+  }
 
-    for (unsigned i = 0; i < 3; ++i) {
-      int Idx = VOP3Idx[i];
-      if (Idx == -1)
-        break;
-      MachineOperand &MO = MI->getOperand(Idx);
+  int CommutedOpc = commuteOpcode(*MI);
+  if (CommutedOpc == -1) {
+    legalizeOpWithMove(MI, Src1Idx);
+    return;
+  }
 
-      if (MO.isReg()) {
-        if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
-          continue; // VGPRs are legal
+  MI->setDesc(get(CommutedOpc));
 
-        assert(MO.getReg() != AMDGPU::SCC && "SCC operand to VOP3 instruction");
+  unsigned Src0Reg = Src0.getReg();
+  unsigned Src0SubReg = Src0.getSubReg();
+  bool Src0Kill = Src0.isKill();
 
-        if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
-          SGPRReg = MO.getReg();
-          // We can use one SGPR in each VOP3 instruction.
-          continue;
-        }
-      } else if (!isLiteralConstant(MO, getOpSize(MI->getOpcode(), Idx))) {
-        // If it is not a register and not a literal constant, then it must be
-        // an inline constant which is always legal.
-        continue;
-      }
-      // If we make it this far, then the operand is not legal and we must
-      // legalize it.
-      legalizeOpWithMove(MI, Idx);
+  if (Src1.isImm())
+    Src0.ChangeToImmediate(Src1.getImm());
+  else if (Src1.isReg()) {
+    Src0.ChangeToRegister(Src1.getReg(), false, false, Src1.isKill());
+    Src0.setSubReg(Src1.getSubReg());
+  } else
+    llvm_unreachable("Should only have register or immediate operands");
+
+  Src1.ChangeToRegister(Src0Reg, false, false, Src0Kill);
+  Src1.setSubReg(Src0SubReg);
+}
+
+// Legalize VOP3 operands. Because all operand types are supported for any
+// operand, and since literal constants are not allowed and should never be
+// seen, we only need to worry about inserting copies if we use multiple SGPR
+// operands.
+void SIInstrInfo::legalizeOperandsVOP3(
+  MachineRegisterInfo &MRI,
+  MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+
+  int VOP3Idx[3] = {
+    AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0),
+    AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1),
+    AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)
+  };
+
+  // Find the one SGPR operand we are allowed to use.
+  unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+
+  for (unsigned i = 0; i < 3; ++i) {
+    int Idx = VOP3Idx[i];
+    if (Idx == -1)
+      break;
+    MachineOperand &MO = MI->getOperand(Idx);
+
+    // We should never see a VOP3 instruction with an illegal immediate operand.
+    if (!MO.isReg())
+      continue;
+
+    if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
+      continue; // VGPRs are legal
+
+    if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
+      SGPRReg = MO.getReg();
+      // We can use one SGPR in each VOP3 instruction.
+      continue;
     }
+
+    // If we make it this far, then the operand is not legal and we must
+    // legalize it.
+    legalizeOpWithMove(MI, Idx);
+  }
+}
+
+void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+  // Legalize VOP2
+  if (isVOP2(*MI)) {
+    legalizeOperandsVOP2(MRI, MI);
+    return;
+  }
+
+  // Legalize VOP3
+  if (isVOP3(*MI)) {
+    legalizeOperandsVOP3(MRI, MI);
+    return;
   }
 
   // Legalize REG_SEQUENCE and PHI
   // The register class of the operands much be the same type as the register
   // class of the output.
-  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
-      MI->getOpcode() == AMDGPU::PHI) {
+  if (MI->getOpcode() == AMDGPU::PHI) {
     const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
     for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
       if (!MI->getOperand(i).isReg() ||
@@ -1802,28 +2007,55 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
     }
 
     // Update all the operands so they have the same type.
-    for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
-      if (!MI->getOperand(i).isReg() ||
-          !TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
+    for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+      MachineOperand &Op = MI->getOperand(I);
+      if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
         continue;
       unsigned DstReg = MRI.createVirtualRegister(RC);
-      MachineBasicBlock *InsertBB;
-      MachineBasicBlock::iterator Insert;
-      if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
-        InsertBB = MI->getParent();
-        Insert = MI;
-      } else {
-        // MI is a PHI instruction.
-        InsertBB = MI->getOperand(i + 1).getMBB();
-        Insert = InsertBB->getFirstTerminator();
-      }
-      BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
-              get(AMDGPU::COPY), DstReg)
-              .addOperand(MI->getOperand(i));
-      MI->getOperand(i).setReg(DstReg);
+
+      // MI is a PHI instruction.
+      MachineBasicBlock *InsertBB = MI->getOperand(I + 1).getMBB();
+      MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
+
+      BuildMI(*InsertBB, Insert, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+        .addOperand(Op);
+      Op.setReg(DstReg);
     }
   }
 
+  // REG_SEQUENCE doesn't really require operand legalization, but if one has a
+  // VGPR dest type and SGPR sources, insert copies so all operands are
+  // VGPRs. This seems to help operand folding / the register coalescer.
+  if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+    MachineBasicBlock *MBB = MI->getParent();
+    const TargetRegisterClass *DstRC = getOpRegClass(*MI, 0);
+    if (RI.hasVGPRs(DstRC)) {
+      // Update all the operands so they are VGPR register classes. These may
+      // not be the same register class because REG_SEQUENCE supports mixing
+      // subregister index types e.g. sub0_sub1 + sub2 + sub3
+      for (unsigned I = 1, E = MI->getNumOperands(); I != E; I += 2) {
+        MachineOperand &Op = MI->getOperand(I);
+        if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
+          continue;
+
+        const TargetRegisterClass *OpRC = MRI.getRegClass(Op.getReg());
+        const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(OpRC);
+        if (VRC == OpRC)
+          continue;
+
+        unsigned DstReg = MRI.createVirtualRegister(VRC);
+
+        BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), DstReg)
+          .addOperand(Op);
+
+        Op.setReg(DstReg);
+        Op.setIsKill();
+      }
+    }
+
+    return;
+  }
+
   // Legalize INSERT_SUBREG
   // src0 must have the same register class as dst
   if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
@@ -1858,15 +2090,10 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
     }
 
     MachineBasicBlock &MBB = *MI->getParent();
+
     // Extract the ptr from the resource descriptor.
-
-    // SRsrcPtrLo = srsrc:sub0
-    unsigned SRsrcPtrLo = buildExtractSubReg(MI, MRI, *SRsrc,
-        &AMDGPU::VReg_128RegClass, AMDGPU::sub0, &AMDGPU::VGPR_32RegClass);
-
-    // SRsrcPtrHi = srsrc:sub1
-    unsigned SRsrcPtrHi = buildExtractSubReg(MI, MRI, *SRsrc,
-        &AMDGPU::VReg_128RegClass, AMDGPU::sub1, &AMDGPU::VGPR_32RegClass);
+    unsigned SRsrcPtr = buildExtractSubReg(MI, MRI, *SRsrc,
+      &AMDGPU::VReg_128RegClass, AMDGPU::sub0_sub1, &AMDGPU::VReg_64RegClass);
 
     // Create an empty resource descriptor
     unsigned Zero64 = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
@@ -1891,80 +2118,112 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
             .addImm(RsrcDataFormat >> 32);
 
     // NewSRsrc = {Zero64, SRsrcFormat}
-    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
-            NewSRsrc)
-            .addReg(Zero64)
-            .addImm(AMDGPU::sub0_sub1)
-            .addReg(SRsrcFormatLo)
-            .addImm(AMDGPU::sub2)
-            .addReg(SRsrcFormatHi)
-            .addImm(AMDGPU::sub3);
+    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewSRsrc)
+      .addReg(Zero64)
+      .addImm(AMDGPU::sub0_sub1)
+      .addReg(SRsrcFormatLo)
+      .addImm(AMDGPU::sub2)
+      .addReg(SRsrcFormatHi)
+      .addImm(AMDGPU::sub3);
 
     MachineOperand *VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
     unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
-    unsigned NewVAddrLo;
-    unsigned NewVAddrHi;
     if (VAddr) {
       // This is already an ADDR64 instruction so we need to add the pointer
       // extracted from the resource descriptor to the current value of VAddr.
-      NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-      NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      unsigned NewVAddrLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
-      // NewVaddrLo = SRsrcPtrLo + VAddr:sub0
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADD_I32_e32),
-              NewVAddrLo)
-              .addReg(SRsrcPtrLo)
-              .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
-              .addReg(AMDGPU::VCC, RegState::ImplicitDefine);
+      // NewVaddrLo = SRsrcPtr:sub0 + VAddr:sub0
+      DebugLoc DL = MI->getDebugLoc();
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
+        .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+        .addReg(VAddr->getReg(), 0, AMDGPU::sub0);
 
-      // NewVaddrHi = SRsrcPtrHi + VAddr:sub1
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::V_ADDC_U32_e32),
-              NewVAddrHi)
-              .addReg(SRsrcPtrHi)
-              .addReg(VAddr->getReg(), 0, AMDGPU::sub1)
-              .addReg(AMDGPU::VCC, RegState::ImplicitDefine)
-              .addReg(AMDGPU::VCC, RegState::Implicit);
+      // NewVaddrHi = SRsrcPtr:sub1 + VAddr:sub1
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
+        .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+        .addReg(VAddr->getReg(), 0, AMDGPU::sub1);
 
+      // NewVaddr = {NewVaddrHi, NewVaddrLo}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+        .addReg(NewVAddrLo)
+        .addImm(AMDGPU::sub0)
+        .addReg(NewVAddrHi)
+        .addImm(AMDGPU::sub1);
     } else {
       // This instructions is the _OFFSET variant, so we need to convert it to
       // ADDR64.
+      assert(MBB.getParent()->getSubtarget<AMDGPUSubtarget>().getGeneration()
+             < AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+             "FIXME: Need to emit flat atomics here");
+
       MachineOperand *VData = getNamedOperand(*MI, AMDGPU::OpName::vdata);
       MachineOperand *Offset = getNamedOperand(*MI, AMDGPU::OpName::offset);
       MachineOperand *SOffset = getNamedOperand(*MI, AMDGPU::OpName::soffset);
-
-      // Create the new instruction.
       unsigned Addr64Opcode = AMDGPU::getAddr64Inst(MI->getOpcode());
-      MachineInstr *Addr64 =
-          BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
-                  .addOperand(*VData)
-                  .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
-                                              // This will be replaced later
-                                              // with the new value of vaddr.
-                  .addOperand(*SRsrc)
-                  .addOperand(*SOffset)
-                  .addOperand(*Offset)
-                  .addImm(0) // glc
-                  .addImm(0) // slc
-                  .addImm(0); // tfe
+
+      // Atomics rith return have have an additional tied operand and are
+      // missing some of the special bits.
+      MachineOperand *VDataIn = getNamedOperand(*MI, AMDGPU::OpName::vdata_in);
+      MachineInstr *Addr64;
+
+      if (!VDataIn) {
+        // Regular buffer load / store.
+        MachineInstrBuilder MIB
+          = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+          .addOperand(*VData)
+          .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+          // This will be replaced later
+          // with the new value of vaddr.
+          .addOperand(*SRsrc)
+          .addOperand(*SOffset)
+          .addOperand(*Offset);
+
+        // Atomics do not have this operand.
+        if (const MachineOperand *GLC
+            = getNamedOperand(*MI, AMDGPU::OpName::glc)) {
+          MIB.addImm(GLC->getImm());
+        }
+
+        MIB.addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc));
+
+        if (const MachineOperand *TFE
+            = getNamedOperand(*MI, AMDGPU::OpName::tfe)) {
+          MIB.addImm(TFE->getImm());
+        }
+
+        MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+        Addr64 = MIB;
+      } else {
+        // Atomics with return.
+        Addr64 = BuildMI(MBB, MI, MI->getDebugLoc(), get(Addr64Opcode))
+          .addOperand(*VData)
+          .addOperand(*VDataIn)
+          .addReg(AMDGPU::NoRegister) // Dummy value for vaddr.
+          // This will be replaced later
+          // with the new value of vaddr.
+          .addOperand(*SRsrc)
+          .addOperand(*SOffset)
+          .addOperand(*Offset)
+          .addImm(getNamedImmOperand(*MI, AMDGPU::OpName::slc))
+          .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      }
 
       MI->removeFromParent();
       MI = Addr64;
 
-      NewVAddrLo = SRsrcPtrLo;
-      NewVAddrHi = SRsrcPtrHi;
+      // NewVaddr = {NewVaddrHi, NewVaddrLo}
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)
+        .addReg(SRsrcPtr, 0, AMDGPU::sub0)
+        .addImm(AMDGPU::sub0)
+        .addReg(SRsrcPtr, 0, AMDGPU::sub1)
+        .addImm(AMDGPU::sub1);
+
       VAddr = getNamedOperand(*MI, AMDGPU::OpName::vaddr);
       SRsrc = getNamedOperand(*MI, AMDGPU::OpName::srsrc);
     }
 
-    // NewVaddr = {NewVaddrHi, NewVaddrLo}
-    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
-            NewVAddr)
-            .addReg(NewVAddrLo)
-            .addImm(AMDGPU::sub0)
-            .addReg(NewVAddrHi)
-            .addImm(AMDGPU::sub1);
-
-
     // Update the instruction to use NewVaddr
     VAddr->setReg(NewVAddr);
     // Update the instruction to use NewSRsrc
@@ -2028,53 +2287,64 @@ void SIInstrInfo::splitSMRD(MachineInstr *MI,
                   .addOperand(*SOff);
     unsigned OffsetSGPR = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
     BuildMI(*MBB, MI, DL, get(AMDGPU::S_ADD_I32), OffsetSGPR)
-            .addOperand(*SOff)
-            .addImm(HalfSize);
-    Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp))
+      .addReg(SOff->getReg(), 0, SOff->getSubReg())
+      .addImm(HalfSize);
+    Hi = BuildMI(*MBB, MI, DL, get(HalfSGPROp), RegHi)
                   .addReg(SBase->getReg(), getKillRegState(IsKill),
                           SBase->getSubReg())
                   .addReg(OffsetSGPR);
   }
 
   unsigned SubLo, SubHi;
+  const TargetRegisterClass *NewDstRC;
   switch (HalfSize) {
     case 4:
       SubLo = AMDGPU::sub0;
       SubHi = AMDGPU::sub1;
+      NewDstRC = &AMDGPU::VReg_64RegClass;
       break;
     case 8:
       SubLo = AMDGPU::sub0_sub1;
       SubHi = AMDGPU::sub2_sub3;
+      NewDstRC = &AMDGPU::VReg_128RegClass;
       break;
     case 16:
       SubLo = AMDGPU::sub0_sub1_sub2_sub3;
       SubHi = AMDGPU::sub4_sub5_sub6_sub7;
+      NewDstRC = &AMDGPU::VReg_256RegClass;
       break;
     case 32:
       SubLo = AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7;
       SubHi = AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15;
+      NewDstRC = &AMDGPU::VReg_512RegClass;
       break;
     default:
       llvm_unreachable("Unhandled HalfSize");
   }
 
-  BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE))
-          .addOperand(MI->getOperand(0))
-          .addReg(RegLo)
-          .addImm(SubLo)
-          .addReg(RegHi)
-          .addImm(SubHi);
+  unsigned OldDst = MI->getOperand(0).getReg();
+  unsigned NewDst = MRI.createVirtualRegister(NewDstRC);
+
+  MRI.replaceRegWith(OldDst, NewDst);
+
+  BuildMI(*MBB, MI, DL, get(AMDGPU::REG_SEQUENCE), NewDst)
+    .addReg(RegLo)
+    .addImm(SubLo)
+    .addReg(RegHi)
+    .addImm(SubHi);
 }
 
-void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI,
+                                 MachineRegisterInfo &MRI,
+                                 SmallVectorImpl<MachineInstr *> &Worklist) const {
   MachineBasicBlock *MBB = MI->getParent();
-  switch (MI->getOpcode()) {
-    case AMDGPU::S_LOAD_DWORD_IMM:
-    case AMDGPU::S_LOAD_DWORD_SGPR:
-    case AMDGPU::S_LOAD_DWORDX2_IMM:
-    case AMDGPU::S_LOAD_DWORDX2_SGPR:
-    case AMDGPU::S_LOAD_DWORDX4_IMM:
-    case AMDGPU::S_LOAD_DWORDX4_SGPR: {
+  int DstIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+  assert(DstIdx != -1);
+  unsigned DstRCID = get(MI->getOpcode()).OpInfo[DstIdx].RegClass;
+  switch(RI.getRegClass(DstRCID)->getSize()) {
+    case 4:
+    case 8:
+    case 16: {
       unsigned NewOpcode = getVALUOp(*MI);
       unsigned RegOffset;
       unsigned ImmOffset;
@@ -2118,53 +2388,55 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
               .addImm(RsrcDataFormat >> 32);
       BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
-              .addReg(DWord0)
-              .addImm(AMDGPU::sub0)
-              .addReg(DWord1)
-              .addImm(AMDGPU::sub1)
-              .addReg(DWord2)
-              .addImm(AMDGPU::sub2)
-              .addReg(DWord3)
-              .addImm(AMDGPU::sub3);
-      MI->setDesc(get(NewOpcode));
-      if (MI->getOperand(2).isReg()) {
-        MI->getOperand(2).setReg(SRsrc);
-      } else {
-        MI->getOperand(2).ChangeToRegister(SRsrc, false);
-      }
-      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0));
-      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
-      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // glc
-      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // slc
-      MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(0)); // tfe
+        .addReg(DWord0)
+        .addImm(AMDGPU::sub0)
+        .addReg(DWord1)
+        .addImm(AMDGPU::sub1)
+        .addReg(DWord2)
+        .addImm(AMDGPU::sub2)
+        .addReg(DWord3)
+        .addImm(AMDGPU::sub3);
 
-      const TargetRegisterClass *NewDstRC =
-          RI.getRegClass(get(NewOpcode).OpInfo[0].RegClass);
-
-      unsigned DstReg = MI->getOperand(0).getReg();
+      const MCInstrDesc &NewInstDesc = get(NewOpcode);
+      const TargetRegisterClass *NewDstRC
+        = RI.getRegClass(NewInstDesc.OpInfo[0].RegClass);
       unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
+      unsigned DstReg = MI->getOperand(0).getReg();
       MRI.replaceRegWith(DstReg, NewDstReg);
+
+      MachineInstr *NewInst =
+        BuildMI(*MBB, MI, MI->getDebugLoc(), NewInstDesc, NewDstReg)
+        .addOperand(MI->getOperand(1)) // sbase
+        .addReg(SRsrc)
+        .addImm(0)
+        .addImm(ImmOffset)
+        .addImm(0) // glc
+        .addImm(0) // slc
+        .addImm(0) // tfe
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MI->eraseFromParent();
+
+      legalizeOperands(NewInst);
+      addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
       break;
     }
-    case AMDGPU::S_LOAD_DWORDX8_IMM:
-    case AMDGPU::S_LOAD_DWORDX8_SGPR: {
+    case 32: {
       MachineInstr *Lo, *Hi;
       splitSMRD(MI, &AMDGPU::SReg_128RegClass, AMDGPU::S_LOAD_DWORDX4_IMM,
                 AMDGPU::S_LOAD_DWORDX4_SGPR, Lo, Hi);
       MI->eraseFromParent();
-      moveSMRDToVALU(Lo, MRI);
-      moveSMRDToVALU(Hi, MRI);
+      moveSMRDToVALU(Lo, MRI, Worklist);
+      moveSMRDToVALU(Hi, MRI, Worklist);
       break;
     }
 
-    case AMDGPU::S_LOAD_DWORDX16_IMM:
-    case AMDGPU::S_LOAD_DWORDX16_SGPR: {
+    case 64: {
       MachineInstr *Lo, *Hi;
       splitSMRD(MI, &AMDGPU::SReg_256RegClass, AMDGPU::S_LOAD_DWORDX8_IMM,
                 AMDGPU::S_LOAD_DWORDX8_SGPR, Lo, Hi);
       MI->eraseFromParent();
-      moveSMRDToVALU(Lo, MRI);
-      moveSMRDToVALU(Hi, MRI);
+      moveSMRDToVALU(Lo, MRI, Worklist);
+      moveSMRDToVALU(Hi, MRI, Worklist);
       break;
     }
   }
@@ -2185,51 +2457,28 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
     // Handle some special cases
     switch (Opcode) {
     default:
-      if (isSMRD(Inst->getOpcode())) {
-        moveSMRDToVALU(Inst, MRI);
+      if (isSMRD(*Inst)) {
+        moveSMRDToVALU(Inst, MRI, Worklist);
+        continue;
       }
       break;
-    case AMDGPU::S_MOV_B64: {
-      DebugLoc DL = Inst->getDebugLoc();
-
-      // If the source operand is a register we can replace this with a
-      // copy.
-      if (Inst->getOperand(1).isReg()) {
-        MachineInstr *Copy = BuildMI(*MBB, Inst, DL, get(TargetOpcode::COPY))
-          .addOperand(Inst->getOperand(0))
-          .addOperand(Inst->getOperand(1));
-        Worklist.push_back(Copy);
-      } else {
-        // Otherwise, we need to split this into two movs, because there is
-        // no 64-bit VALU move instruction.
-        unsigned Reg = Inst->getOperand(0).getReg();
-        unsigned Dst = split64BitImm(Worklist,
-                                     Inst,
-                                     MRI,
-                                     MRI.getRegClass(Reg),
-                                     Inst->getOperand(1));
-        MRI.replaceRegWith(Reg, Dst);
-      }
-      Inst->eraseFromParent();
-      continue;
-    }
     case AMDGPU::S_AND_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_AND_B32_e64);
       Inst->eraseFromParent();
       continue;
 
     case AMDGPU::S_OR_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_OR_B32_e64);
       Inst->eraseFromParent();
       continue;
 
     case AMDGPU::S_XOR_B64:
-      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32);
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::V_XOR_B32_e64);
       Inst->eraseFromParent();
       continue;
 
     case AMDGPU::S_NOT_B64:
-      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+      splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::V_NOT_B32_e32);
       Inst->eraseFromParent();
       continue;
 
@@ -2281,6 +2530,11 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       }
       break;
 
+    case AMDGPU::S_ABS_I32:
+      lowerScalarAbs(Worklist, Inst);
+      Inst->eraseFromParent();
+      continue;
+
     case AMDGPU::S_BFE_U64:
     case AMDGPU::S_BFM_B64:
       llvm_unreachable("Moving this op to VALU not implemented");
@@ -2319,7 +2573,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
       Inst->addOperand(MachineOperand::CreateImm(0));
     }
 
-    addDescImplicitUseDef(NewDesc, Inst);
+    Inst->addImplicitDefUseOperands(*Inst->getParent()->getParent());
 
     if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
       const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
@@ -2337,27 +2591,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
     }
 
     // Update the destination register class.
-
-    const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
-
-    switch (Opcode) {
-      // For target instructions, getOpRegClass just returns the virtual
-      // register class associated with the operand, so we need to find an
-      // equivalent VGPR register class in order to move the instruction to the
-      // VALU.
-    case AMDGPU::COPY:
-    case AMDGPU::PHI:
-    case AMDGPU::REG_SEQUENCE:
-    case AMDGPU::INSERT_SUBREG:
-      if (RI.hasVGPRs(NewDstRC))
-        continue;
-      NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
-      if (!NewDstRC)
-        continue;
-      break;
-    default:
-      break;
-    }
+    const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*Inst);
+    if (!NewDstRC)
+      continue;
 
     unsigned DstReg = Inst->getOperand(0).getReg();
     unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
@@ -2366,13 +2602,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
     // Legalize the operands
     legalizeOperands(Inst);
 
-    for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
-           E = MRI.use_end(); I != E; ++I) {
-      MachineInstr &UseMI = *I->getParent();
-      if (!canReadVGPR(UseMI, I.getOperandNo())) {
-        Worklist.push_back(&UseMI);
-      }
-    }
+    addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
   }
 }
 
@@ -2390,6 +2620,30 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const {
   return &AMDGPU::VGPR_32RegClass;
 }
 
+void SIInstrInfo::lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+                                 MachineInstr *Inst) const {
+  MachineBasicBlock &MBB = *Inst->getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineBasicBlock::iterator MII = Inst;
+  DebugLoc DL = Inst->getDebugLoc();
+
+  MachineOperand &Dest = Inst->getOperand(0);
+  MachineOperand &Src = Inst->getOperand(1);
+  unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::V_SUB_I32_e32), TmpReg)
+    .addImm(0)
+    .addReg(Src.getReg());
+
+  BuildMI(MBB, MII, DL, get(AMDGPU::V_MAX_I32_e64), ResultReg)
+    .addReg(Src.getReg())
+    .addReg(TmpReg);
+
+  MRI.replaceRegWith(Dest.getReg(), ResultReg);
+  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
+}
+
 void SIInstrInfo::splitScalar64BitUnaryOp(
   SmallVectorImpl<MachineInstr *> &Worklist,
   MachineInstr *Inst,
@@ -2414,20 +2668,21 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
                                                        AMDGPU::sub0, Src0SubRC);
 
   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
-  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
 
-  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
-  MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
+  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
+  BuildMI(MBB, MII, DL, InstDesc, DestSub0)
     .addOperand(SrcReg0Sub0);
 
   MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
                                                        AMDGPU::sub1, Src0SubRC);
 
-  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
-  MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
+  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
+  BuildMI(MBB, MII, DL, InstDesc, DestSub1)
     .addOperand(SrcReg0Sub1);
 
-  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
     .addImm(AMDGPU::sub0)
@@ -2436,10 +2691,11 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
 
   MRI.replaceRegWith(Dest.getReg(), FullDestReg);
 
-  // Try to legalize the operands in case we need to swap the order to keep it
-  // valid.
-  Worklist.push_back(LoHalf);
-  Worklist.push_back(HiHalf);
+  // We don't need to legalizeOperands here because for a single operand, src0
+  // will support any kind of input.
+
+  // Move all users of this moved value.
+  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBinaryOp(
@@ -2474,9 +2730,10 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
                                                        AMDGPU::sub0, Src1SubRC);
 
   const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
-  const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0);
+  const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
+  const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
 
-  unsigned DestSub0 = MRI.createVirtualRegister(DestRC);
+  unsigned DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
   MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0)
     .addOperand(SrcReg0Sub0)
     .addOperand(SrcReg1Sub0);
@@ -2486,12 +2743,12 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
   MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub1, Src1SubRC);
 
-  unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC);
+  unsigned DestSub1 = MRI.createVirtualRegister(NewDestSubRC);
   MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1)
     .addOperand(SrcReg0Sub1)
     .addOperand(SrcReg1Sub1);
 
-  unsigned FullDestReg = MRI.createVirtualRegister(DestRC);
+  unsigned FullDestReg = MRI.createVirtualRegister(NewDestRC);
   BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg)
     .addReg(DestSub0)
     .addImm(AMDGPU::sub0)
@@ -2502,8 +2759,11 @@ void SIInstrInfo::splitScalar64BitBinaryOp(
 
   // Try to legalize the operands in case we need to swap the order to keep it
   // valid.
-  Worklist.push_back(LoHalf);
-  Worklist.push_back(HiHalf);
+  legalizeOperands(LoHalf);
+  legalizeOperands(HiHalf);
+
+  // Move all users of this moved vlaue.
+  addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2532,18 +2792,19 @@ void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist
   MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
                                                       AMDGPU::sub1, SrcSubRC);
 
-  MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg)
+  BuildMI(MBB, MII, DL, InstDesc, MidReg)
     .addOperand(SrcRegSub0)
     .addImm(0);
 
-  MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg)
+  BuildMI(MBB, MII, DL, InstDesc, ResultReg)
     .addOperand(SrcRegSub1)
     .addReg(MidReg);
 
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
 
-  Worklist.push_back(First);
-  Worklist.push_back(Second);
+  // We don't need to legalize operands here. src0 for etiher instruction can be
+  // an SGPR, and the second input is unused or determined here.
+  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
 void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
@@ -2587,6 +2848,7 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
       .addImm(AMDGPU::sub1);
 
     MRI.replaceRegWith(Dest.getReg(), ResultReg);
+    addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
     return;
   }
 
@@ -2605,33 +2867,53 @@ void SIInstrInfo::splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
     .addImm(AMDGPU::sub1);
 
   MRI.replaceRegWith(Dest.getReg(), ResultReg);
+  addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
 }
 
-void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
-                                        MachineInstr *Inst) const {
-  // Add the implict and explicit register definitions.
-  if (NewDesc.ImplicitUses) {
-    for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
-      unsigned Reg = NewDesc.ImplicitUses[i];
-      Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
-    }
-  }
-
-  if (NewDesc.ImplicitDefs) {
-    for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
-      unsigned Reg = NewDesc.ImplicitDefs[i];
-      Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+void SIInstrInfo::addUsersToMoveToVALUWorklist(
+  unsigned DstReg,
+  MachineRegisterInfo &MRI,
+  SmallVectorImpl<MachineInstr *> &Worklist) const {
+  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
+         E = MRI.use_end(); I != E; ++I) {
+    MachineInstr &UseMI = *I->getParent();
+    if (!canReadVGPR(UseMI, I.getOperandNo())) {
+      Worklist.push_back(&UseMI);
     }
   }
 }
 
+const TargetRegisterClass *SIInstrInfo::getDestEquivalentVGPRClass(
+  const MachineInstr &Inst) const {
+  const TargetRegisterClass *NewDstRC = getOpRegClass(Inst, 0);
+
+  switch (Inst.getOpcode()) {
+  // For target instructions, getOpRegClass just returns the virtual register
+  // class associated with the operand, so we need to find an equivalent VGPR
+  // register class in order to move the instruction to the VALU.
+  case AMDGPU::COPY:
+  case AMDGPU::PHI:
+  case AMDGPU::REG_SEQUENCE:
+  case AMDGPU::INSERT_SUBREG:
+    if (RI.hasVGPRs(NewDstRC))
+      return nullptr;
+
+    NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
+    if (!NewDstRC)
+      return nullptr;
+    return NewDstRC;
+  default:
+    return NewDstRC;
+  }
+}
+
+// Find the one SGPR operand we are allowed to use.
 unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
                                    int OpIndices[3]) const {
-  const MCInstrDesc &Desc = get(MI->getOpcode());
+  const MCInstrDesc &Desc = MI->getDesc();
 
   // Find the one SGPR operand we are allowed to use.
-  unsigned SGPRReg = AMDGPU::NoRegister;
-
+  //
   // First we need to consider the instruction's operand requirements before
   // legalizing. Some operands are required to be SGPRs, such as implicit uses
   // of VCC, but we are still bound by the constant bus requirement to only use
@@ -2639,17 +2921,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
   //
   // If the operand's class is an SGPR, we can never move it.
 
-  for (const MachineOperand &MO : MI->implicit_operands()) {
-    // We only care about reads.
-    if (MO.isDef())
-      continue;
-
-    if (MO.getReg() == AMDGPU::VCC)
-      return AMDGPU::VCC;
-
-    if (MO.getReg() == AMDGPU::FLAT_SCR)
-      return AMDGPU::FLAT_SCR;
-  }
+  unsigned SGPRReg = findImplicitSGPRRead(*MI);
+  if (SGPRReg != AMDGPU::NoRegister)
+    return SGPRReg;
 
   unsigned UsedSGPRs[3] = { AMDGPU::NoRegister };
   const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
@@ -2660,16 +2934,23 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
       break;
 
     const MachineOperand &MO = MI->getOperand(Idx);
-    if (RI.isSGPRClassID(Desc.OpInfo[Idx].RegClass))
-      SGPRReg = MO.getReg();
+    if (!MO.isReg())
+      continue;
 
-    if (MO.isReg() && RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
-      UsedSGPRs[i] = MO.getReg();
+    // Is this operand statically required to be an SGPR based on the operand
+    // constraints?
+    const TargetRegisterClass *OpRC = RI.getRegClass(Desc.OpInfo[Idx].RegClass);
+    bool IsRequiredSGPR = RI.isSGPRClass(OpRC);
+    if (IsRequiredSGPR)
+      return MO.getReg();
+
+    // If this could be a VGPR or an SGPR, Check the dynamic register class.
+    unsigned Reg = MO.getReg();
+    const TargetRegisterClass *RegRC = MRI.getRegClass(Reg);
+    if (RI.isSGPRClass(RegRC))
+      UsedSGPRs[i] = Reg;
   }
 
-  if (SGPRReg != AMDGPU::NoRegister)
-    return SGPRReg;
-
   // We don't have a required SGPR operand, so we have a bit more freedom in
   // selecting operands to move.
 
@@ -2680,6 +2961,9 @@ unsigned SIInstrInfo::findUsedSGPR(const MachineInstr *MI,
   // V_FMA_F32 v0, s0, s0, s0 -> No moves
   // V_FMA_F32 v0, s0, s1, s0 -> Move s1
 
+  // TODO: If some of the operands are 64-bit SGPRs and some 32, we should
+  // prefer those.
+
   if (UsedSGPRs[0] != AMDGPU::NoRegister) {
     if (UsedSGPRs[0] == UsedSGPRs[1] || UsedSGPRs[0] == UsedSGPRs[2])
       SGPRReg = UsedSGPRs[0];
@@ -2720,7 +3004,7 @@ MachineInstrBuilder SIInstrInfo::buildIndirectRead(
   unsigned IndirectBaseReg = AMDGPU::VGPR_32RegClass.getRegister(
                                       getIndirectIndexBegin(*MBB->getParent()));
 
-  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC))
+  return BuildMI(*MBB, I, DL, get(AMDGPU::SI_INDIRECT_SRC_V1))
           .addOperand(I->getOperand(0))
           .addOperand(I->getOperand(1))
           .addReg(IndirectBaseReg)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 5053786a39f5..307ef67ed263 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -39,14 +39,11 @@ private:
                                          unsigned SubIdx,
                                          const TargetRegisterClass *SubRC) const;
 
-  unsigned split64BitImm(SmallVectorImpl<MachineInstr *> &Worklist,
-                         MachineBasicBlock::iterator MI,
-                         MachineRegisterInfo &MRI,
-                         const TargetRegisterClass *RC,
-                         const MachineOperand &Op) const;
-
   void swapOperands(MachineBasicBlock::iterator Inst) const;
 
+  void lowerScalarAbs(SmallVectorImpl<MachineInstr *> &Worklist,
+                      MachineInstr *Inst) const;
+
   void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist,
                                MachineInstr *Inst, unsigned Opcode) const;
 
@@ -58,13 +55,24 @@ private:
   void splitScalar64BitBFE(SmallVectorImpl<MachineInstr *> &Worklist,
                            MachineInstr *Inst) const;
 
-  void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
+  void addUsersToMoveToVALUWorklist(
+    unsigned Reg, MachineRegisterInfo &MRI,
+    SmallVectorImpl<MachineInstr *> &Worklist) const;
+
+  const TargetRegisterClass *
+  getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
 
   bool checkInstOffsetsDoNotOverlap(MachineInstr *MIa,
                                     MachineInstr *MIb) const;
 
   unsigned findUsedSGPR(const MachineInstr *MI, int OpIndices[3]) const;
 
+protected:
+  MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+                                       bool NewMI,
+                                       unsigned OpIdx0,
+                                       unsigned OpIdx1) const override;
+
 public:
   explicit SIInstrInfo(const AMDGPUSubtarget &st);
 
@@ -117,17 +125,14 @@ public:
   // register.  If there is no hardware instruction that can store to \p
   // DstRC, then AMDGPU::COPY is returned.
   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
+
+  LLVM_READONLY
   int commuteOpcode(const MachineInstr &MI) const;
 
-  MachineInstr *commuteInstruction(MachineInstr *MI,
-                                   bool NewMI = false) const override;
   bool findCommutedOpIndices(MachineInstr *MI,
                              unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
-  bool isTriviallyReMaterializable(const MachineInstr *MI,
-                                   AliasAnalysis *AA = nullptr) const;
-
   bool areMemAccessesTriviallyDisjoint(
     MachineInstr *MIa, MachineInstr *MIb,
     AliasAnalysis *AA = nullptr) const override;
@@ -137,8 +142,6 @@ public:
                               unsigned DstReg, unsigned SrcReg) const override;
   bool isMov(unsigned Opcode) const override;
 
-  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
-
   bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
                      unsigned Reg, MachineRegisterInfo *MRI) const final;
 
@@ -148,78 +151,154 @@ public:
                                       MachineBasicBlock::iterator &MI,
                                       LiveVariables *LV) const override;
 
+  static bool isSALU(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SALU;
+  }
+
   bool isSALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
 
+  static bool isVALU(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VALU;
+  }
+
   bool isVALU(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VALU;
   }
 
+  static bool isSOP1(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
+  }
+
   bool isSOP1(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
   }
 
+  static bool isSOP2(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
+  }
+
   bool isSOP2(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
   }
 
+  static bool isSOPC(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
+  }
+
   bool isSOPC(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
   }
 
+  static bool isSOPK(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
+  }
+
   bool isSOPK(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
   }
 
+  static bool isSOPP(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
+  }
+
   bool isSOPP(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
   }
 
+  static bool isVOP1(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
+  }
+
   bool isVOP1(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
   }
 
+  static bool isVOP2(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
+  }
+
   bool isVOP2(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
   }
 
+  static bool isVOP3(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
+  }
+
   bool isVOP3(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
   }
 
+  static bool isVOPC(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
+  }
+
   bool isVOPC(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
   }
 
+  static bool isMUBUF(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
+  }
+
   bool isMUBUF(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
   }
 
+  static bool isMTBUF(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
+  }
+
   bool isMTBUF(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
   }
 
+  static bool isSMRD(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
+  }
+
   bool isSMRD(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
   }
 
+  static bool isDS(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::DS;
+  }
+
   bool isDS(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::DS;
   }
 
+  static bool isMIMG(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
+  }
+
   bool isMIMG(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
   }
 
+  static bool isFLAT(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
+  }
+
   bool isFLAT(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
   }
 
+  static bool isWQM(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::WQM;
+  }
+
   bool isWQM(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::WQM;
   }
 
+  static bool isVGPRSpill(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
+  }
+
   bool isVGPRSpill(uint16_t Opcode) const {
     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
   }
@@ -302,6 +381,26 @@ public:
   bool isOperandLegal(const MachineInstr *MI, unsigned OpIdx,
                       const MachineOperand *MO = nullptr) const;
 
+  /// \brief Check if \p MO would be a valid operand for the given operand
+  /// definition \p OpInfo. Note this does not attempt to validate constant bus
+  /// restrictions (e.g. literal constant usage).
+  bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
+                          const MCOperandInfo &OpInfo,
+                          const MachineOperand &MO) const;
+
+  /// \brief Check if \p MO (a register operand) is a legal register for the
+  /// given operand description.
+  bool isLegalRegOperand(const MachineRegisterInfo &MRI,
+                         const MCOperandInfo &OpInfo,
+                         const MachineOperand &MO) const;
+
+  /// \brief Legalize operands in \p MI by either commuting it or inserting a
+  /// copy of src1.
+  void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
+  /// \brief Fix operands in \p MI to satisfy constant bus requirements.
+  void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr *MI) const;
+
   /// \brief Legalize all operands in this instruction.  This function may
   /// create new instruction and insert them before \p MI.
   void legalizeOperands(MachineInstr *MI) const;
@@ -312,7 +411,8 @@ public:
                  unsigned HalfImmOp, unsigned HalfSGPROp,
                  MachineInstr *&Lo, MachineInstr *&Hi) const;
 
-  void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
+  void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI,
+                      SmallVectorImpl<MachineInstr *> &Worklist) const;
 
   /// \brief Replace this instruction's opcode with the equivalent VALU
   /// opcode.  This function will also move the users of \p MI to the
@@ -341,29 +441,49 @@ public:
   void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
               unsigned SavReg, unsigned IndexReg) const;
 
-  void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
+  void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const;
 
   /// \brief Returns the operand named \p Op.  If \p MI does not have an
   /// operand named \c Op, this function returns nullptr.
+  LLVM_READONLY
   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
 
+  LLVM_READONLY
   const MachineOperand *getNamedOperand(const MachineInstr &MI,
                                         unsigned OpName) const {
     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
   }
 
+  /// Get required immediate operand
+  int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
+    int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
+    return MI.getOperand(Idx).getImm();
+  }
+
   uint64_t getDefaultRsrcDataFormat() const;
   uint64_t getScratchRsrcWords23() const;
 };
 
 namespace AMDGPU {
-
+  LLVM_READONLY
   int getVOPe64(uint16_t Opcode);
+
+  LLVM_READONLY
   int getVOPe32(uint16_t Opcode);
+
+  LLVM_READONLY
   int getCommuteRev(uint16_t Opcode);
+
+  LLVM_READONLY
   int getCommuteOrig(uint16_t Opcode);
+
+  LLVM_READONLY
   int getAddr64Inst(uint16_t Opcode);
+
+  LLVM_READONLY
   int getAtomicRetOp(uint16_t Opcode);
+
+  LLVM_READONLY
   int getAtomicNoRetOp(uint16_t Opcode);
 
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td
index 8d8110bca4c5..10f2adde4867 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/lib/Target/AMDGPU/SIInstrInfo.td
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 def isCI : Predicate<"Subtarget->getGeneration() "
                       ">= AMDGPUSubtarget::SEA_ISLANDS">;
-def isVI : Predicate <
-  "Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
-  AssemblerPredicate<"FeatureGCN3Encoding">;
+def isCIOnly : Predicate<"Subtarget->getGeneration() =="
+                         "AMDGPUSubtarget::SEA_ISLANDS">,
+  AssemblerPredicate <"FeatureSeaIslands">;
 
 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
 
@@ -69,6 +69,15 @@ class sopk <bits<5> si, bits<5> vi = si> {
   field bits<5> VI = vi;
 }
 
+// Specify an SMRD opcode for SI and SMEM opcode for VI
+
+// FIXME: This should really be bits<5> si, Tablegen crashes if
+// parameter default value is other parameter with different bit size
+class smrd<bits<8> si, bits<8> vi = si> {
+  field bits<5> SI = si{4-0};
+  field bits<8> VI = vi;
+}
+
 // Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
 // in AMDGPUInstrInfo.cpp
 def SISubtarget {
@@ -121,9 +130,20 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
 def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
 
 def SIconstdata_ptr : SDNode<
-  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]>
+  "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>,
+                                                     SDTCisVT<0, i64>]>
 >;
 
+def mubuf_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+	return isGlobalLoad(cast<LoadSDNode>(N)) ||
+         isConstantLoad(cast<LoadSDNode>(N), -1);
+}]>;
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+  return isConstantLoad(cast<LoadSDNode>(N), -1) &&
+  static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
+}]>;
+
 //===----------------------------------------------------------------------===//
 // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
 // to be glued to the memory instructions.
@@ -328,9 +348,9 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
                                                 U != E; ++U) {
-    if (SIRI->isSGPRClass(getOperandRegClass(*U, U.getOperandNo()))) {
+    const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
+    if (RC && SIRI->isSGPRClass(RC))
       return true;
-    }
   }
   return false;
 }]>;
@@ -354,6 +374,8 @@ def sopp_brtarget : Operand<OtherVT> {
   let ParserMatchClass = SoppBrTarget;
 }
 
+def const_ga : Operand<iPTR>;
+
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
@@ -393,7 +415,7 @@ def GDS01MatchClass : GDSBaseMatchClass <"parseDSOff01OptionalOps">;
 class GLCBaseMatchClass <string parser> : AsmOperandClass {
   let Name = "GLC"#parser;
   let PredicateMethod = "isImm";
-  let ParserMethod = parser; 
+  let ParserMethod = parser;
   let RenderMethod = "addImmOperands";
 }
 
@@ -436,6 +458,17 @@ def ClampMatchClass : AsmOperandClass {
   let RenderMethod = "addImmOperands";
 }
 
+class SMRDOffsetBaseMatchClass <string predicate> : AsmOperandClass {
+  let Name = "SMRDOffset"#predicate;
+  let PredicateMethod = predicate;
+  let RenderMethod = "addImmOperands";
+}
+
+def SMRDOffsetMatchClass : SMRDOffsetBaseMatchClass <"isSMRDOffset">;
+def SMRDLiteralOffsetMatchClass : SMRDOffsetBaseMatchClass <
+  "isSMRDLiteralOffset"
+>;
+
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def offen : Operand<i1> {
@@ -510,6 +543,16 @@ def ClampMod : Operand <i1> {
   let ParserMatchClass = ClampMatchClass;
 }
 
+def smrd_offset : Operand <i32> {
+  let PrintMethod = "printU32ImmOperand";
+  let ParserMatchClass = SMRDOffsetMatchClass;
+}
+
+def smrd_literal_offset : Operand <i32> {
+  let PrintMethod = "printU32ImmOperand";
+  let ParserMatchClass = SMRDLiteralOffsetMatchClass;
+}
+
 } // End OperandType = "OPERAND_IMMEDIATE"
 
 def VOPDstS64 : VOPDstOperand <SReg_64>;
@@ -528,6 +571,13 @@ def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
 def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
 def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
 
+def SMRDImm   : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr  : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm   : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgpr  : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
+
 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
 def VOP3NoMods0 : ComplexPattern<untyped, 4, "SelectVOP3NoMods0">;
 def VOP3Mods0Clamp : ComplexPattern<untyped, 3, "SelectVOP3Mods0Clamp">;
@@ -717,19 +767,6 @@ class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
   let AssemblerPredicates = [isVI];
 }
 
-multiclass SOP2_SELECT_32 <sop2 op, string opName, list<dag> pattern> {
-  def "" : SOP2_Pseudo <opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc), pattern>;
-
-  def _si : SOP2_Real_si <op, opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
-    opName#" $dst, $src0, $src1 [$scc]">;
-
-  def _vi : SOP2_Real_vi <op, opName, (outs SReg_32:$dst),
-    (ins SSrc_32:$src0, SSrc_32:$src1, SCCReg:$scc),
-    opName#" $dst, $src0, $src1 [$scc]">;
-}
-
 multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
                    list<dag> pattern> {
 
@@ -758,8 +795,10 @@ multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
 
 class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
                     string opName, PatLeaf cond> : SOPC <
-  op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
-  opName#" $src0, $src1", []>;
+  op, (outs), (ins rc:$src0, rc:$src1),
+  opName#" $src0, $src1", []> {
+  let Defs = [SCC];
+}
 
 class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
   : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
@@ -812,15 +851,20 @@ multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
 }
 
 multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
-  def "" : SOPK_Pseudo <opName, (outs SCCReg:$dst),
-    (ins SReg_32:$src0, u16imm:$src1), pattern>;
+  def "" : SOPK_Pseudo <opName, (outs),
+    (ins SReg_32:$src0, u16imm:$src1), pattern> {
+    let Defs = [SCC];
+  }
 
-  let DisableEncoding = "$dst" in {
-    def _si : SOPK_Real_si <op, opName, (outs SCCReg:$dst),
-      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
 
-    def _vi : SOPK_Real_vi <op, opName, (outs SCCReg:$dst),
-      (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16">;
+  def _si : SOPK_Real_si <op, opName, (outs),
+    (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+    let Defs = [SCC];
+  }
+
+  def _vi : SOPK_Real_vi <op, opName, (outs),
+    (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
+    let Defs = [SCC];
   }
 }
 
@@ -868,35 +912,68 @@ class SMRD_Real_si <bits<5> op, string opName, bit imm, dag outs, dag ins,
 }
 
 class SMRD_Real_vi <bits<8> op, string opName, bit imm, dag outs, dag ins,
-                    string asm> :
-  SMRD <outs, ins, asm, []>,
+                    string asm, list<dag> pattern = []> :
+  SMRD <outs, ins, asm, pattern>,
   SMEMe_vi <op, imm>,
   SIMCInstr<opName, SISubtarget.VI> {
   let AssemblerPredicates = [isVI];
 }
 
-multiclass SMRD_m <bits<5> op, string opName, bit imm, dag outs, dag ins,
+multiclass SMRD_m <smrd op, string opName, bit imm, dag outs, dag ins,
                    string asm, list<dag> pattern> {
 
   def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
 
-  def _si : SMRD_Real_si <op, opName, imm, outs, ins, asm>;
+  def _si : SMRD_Real_si <op.SI, opName, imm, outs, ins, asm>;
 
   // glc is only applicable to scalar stores, which are not yet
   // implemented.
   let glc = 0 in {
-    def _vi : SMRD_Real_vi <{0, 0, 0, op}, opName, imm, outs, ins, asm>;
+    def _vi : SMRD_Real_vi <op.VI, opName, imm, outs, ins, asm>;
   }
 }
 
-multiclass SMRD_Helper <bits<5> op, string opName, RegisterClass baseClass,
+multiclass SMRD_Inval <smrd op, string opName,
+                       SDPatternOperator node> {
+  let hasSideEffects = 1, mayStore = 1 in {
+    def "" : SMRD_Pseudo <opName, (outs), (ins), [(node)]>;
+
+    let sbase = 0, offset = 0 in {
+      let sdst = 0 in {
+        def _si : SMRD_Real_si <op.SI, opName, 0, (outs), (ins), opName>;
+      }
+
+      let glc = 0, sdata = 0 in {
+        def _vi : SMRD_Real_vi <op.VI, opName, 0, (outs), (ins), opName>;
+      }
+    }
+  }
+}
+
+class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
+  SMRD_Real_vi<op, opName, 0, (outs), (ins), opName, [(node)]> {
+  let hasSideEffects = 1;
+  let mayStore = 1;
+  let sbase = 0;
+  let sdata = 0;
+  let glc = 0;
+  let offset = 0;
+}
+
+multiclass SMRD_Helper <smrd op, string opName, RegisterClass baseClass,
                         RegisterClass dstClass> {
   defm _IMM : SMRD_m <
     op, opName#"_IMM", 1, (outs dstClass:$dst),
-    (ins baseClass:$sbase, u32imm:$offset),
+    (ins baseClass:$sbase, smrd_offset:$offset),
     opName#" $dst, $sbase, $offset", []
   >;
 
+  def _IMM_ci : SMRD <
+    (outs dstClass:$dst), (ins baseClass:$sbase, smrd_literal_offset:$offset),
+    opName#" $dst, $sbase, $offset", []>, SMRD_IMMe_ci <op.SI> {
+    let AssemblerPredicates = [isCIOnly];
+  }
+
   defm _SGPR : SMRD_m <
     op, opName#"_SGPR", 0, (outs dstClass:$dst),
     (ins baseClass:$sbase, SReg_32:$soff),
@@ -922,11 +999,12 @@ def InputModsNoDefault : Operand <i32> {
   let ParserMatchClass = InputModsMatchClass;
 }
 
-class getNumSrcArgs<ValueType Src1, ValueType Src2> {
+class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
   int ret =
-    !if (!eq(Src1.Value, untyped.Value),      1,   // VOP1
+    !if (!eq(Src0.Value, untyped.Value),      0,
+      !if (!eq(Src1.Value, untyped.Value),    1,   // VOP1
          !if (!eq(Src2.Value, untyped.Value), 2,   // VOP2
-                                              3)); // VOP3
+                                              3))); // VOP3
 }
 
 // Returns the register class to use for the destination of VOP[123C]
@@ -934,28 +1012,37 @@ class getNumSrcArgs<ValueType Src1, ValueType Src2> {
 class getVALUDstForVT<ValueType VT> {
   RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
                           !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
-                            VOPDstOperand<SReg_64>)); // else VT == i1
+                            !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
+                            VOPDstOperand<SReg_64>))); // else VT == i1
 }
 
 // Returns the register class to use for source 0 of VOP[12C]
 // instructions for the given VT.
 class getVOPSrc0ForVT<ValueType VT> {
-  RegisterOperand ret = !if(!eq(VT.Size, 32), VSrc_32, VSrc_64);
+  RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
 }
 
 // Returns the register class to use for source 1 of VOP[12C] for the
 // given VT.
 class getVOPSrc1ForVT<ValueType VT> {
-  RegisterClass ret = !if(!eq(VT.Size, 32), VGPR_32, VReg_64);
+  RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
 }
 
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
-  RegisterOperand ret = !if(!eq(VT.Size, 32), VCSrc_32, VCSrc_64);
+  RegisterOperand ret =
+  !if(!eq(VT.Size, 64),
+      VCSrc_64,
+      !if(!eq(VT.Value, i1.Value),
+          SCSrc_64,
+          VCSrc_32
+       )
+    );
 }
 
 // Returns 1 if the source arguments have modifiers, 0 if they do not.
+// XXX - do f16 instructions?
 class hasModifiers<ValueType SrcVT> {
   bit ret = !if(!eq(SrcVT.Value, f32.Value), 1,
             !if(!eq(SrcVT.Value, f64.Value), 1, 0));
@@ -1009,17 +1096,20 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
 // Returns the assembly string for the inputs and outputs of a VOP[12C]
 // instruction.  This does not add the _e32 suffix, so it can be reused
 // by getAsm64.
-class getAsm32 <int NumSrcArgs> {
+class getAsm32 <bit HasDst, int NumSrcArgs> {
+  string dst = "$dst";
+  string src0 = ", $src0";
   string src1 = ", $src1";
   string src2 = ", $src2";
-  string ret = "$dst, $src0"#
-               !if(!eq(NumSrcArgs, 1), "", src1)#
-               !if(!eq(NumSrcArgs, 3), src2, "");
+  string ret = !if(HasDst, dst, "") #
+               !if(!eq(NumSrcArgs, 1), src0, "") #
+               !if(!eq(NumSrcArgs, 2), src0#src1, "") #
+               !if(!eq(NumSrcArgs, 3), src0#src1#src2, "");
 }
 
 // Returns the assembly string for the inputs and outputs of a VOP3
 // instruction.
-class getAsm64 <int NumSrcArgs, bit HasModifiers> {
+class getAsm64 <bit HasDst, int NumSrcArgs, bit HasModifiers> {
   string src0 = !if(!eq(NumSrcArgs, 1), "$src0_modifiers", "$src0_modifiers,");
   string src1 = !if(!eq(NumSrcArgs, 1), "",
                    !if(!eq(NumSrcArgs, 2), " $src1_modifiers",
@@ -1027,11 +1117,10 @@ class getAsm64 <int NumSrcArgs, bit HasModifiers> {
   string src2 = !if(!eq(NumSrcArgs, 3), " $src2_modifiers", "");
   string ret =
   !if(!eq(HasModifiers, 0),
-      getAsm32<NumSrcArgs>.ret,
+      getAsm32<HasDst, NumSrcArgs>.ret,
       "$dst, "#src0#src1#src2#"$clamp"#"$omod");
 }
 
-
 class VOPProfile <list<ValueType> _ArgVT> {
 
   field list<ValueType> ArgVT = _ArgVT;
@@ -1047,29 +1136,38 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field RegisterOperand Src1RC64 = getVOP3SrcForVT<Src1VT>.ret;
   field RegisterOperand Src2RC64 = getVOP3SrcForVT<Src2VT>.ret;
 
-  field int NumSrcArgs = getNumSrcArgs<Src1VT, Src2VT>.ret;
+  field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
+  field bit HasDst32 = HasDst;
+  field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
   field bit HasModifiers = hasModifiers<Src0VT>.ret;
 
-  field dag Outs = (outs DstRC:$dst);
+  field dag Outs = !if(HasDst,(outs DstRC:$dst),(outs));
+
+  // VOP3b instructions are a special case with a second explicit
+  // output. This is manually overridden for them.
+  field dag Outs32 = Outs;
+  field dag Outs64 = Outs;
 
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                              HasModifiers>.ret;
 
-  field string Asm32 = getAsm32<NumSrcArgs>.ret;
-  field string Asm64 = getAsm64<NumSrcArgs, HasModifiers>.ret;
+  field string Asm32 = getAsm32<HasDst, NumSrcArgs>.ret;
+  field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers>.ret;
 }
 
 // FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
 //        for the instruction patterns to work.
-def VOP_F16_F16 : VOPProfile <[f32, f32, untyped, untyped]>;
-def VOP_F16_I16 : VOPProfile <[f32, i32, untyped, untyped]>;
-def VOP_I16_F16 : VOPProfile <[i32, f32, untyped, untyped]>;
+def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f16, i32, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i32, f16, untyped, untyped]>;
 
-def VOP_F16_F16_F16 : VOPProfile <[f32, f32, f32, untyped]>;
-def VOP_F16_F16_I16 : VOPProfile <[f32, f32, i32, untyped]>;
+def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
 def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
 
+def VOP_NONE : VOPProfile <[untyped, untyped, untyped, untyped]>;
+
 def VOP_F32_F32 : VOPProfile <[f32, f32, untyped, untyped]>;
 def VOP_F32_F64 : VOPProfile <[f32, f64, untyped, untyped]>;
 def VOP_F32_I32 : VOPProfile <[f32, i32, untyped, untyped]>;
@@ -1087,25 +1185,76 @@ def VOP_F64_F64_I32 : VOPProfile <[f64, f64, i32, untyped]>;
 def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
-def VOP_I32_I32_I32_VCC : VOPProfile <[i32, i32, i32, untyped]> {
+
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+  let Asm32 = "$dst, vcc, $src0, $src1";
+  let Asm64 = "$dst, $sdst, $src0, $src1";
+  let Outs32 = (outs DstRC:$dst);
+  let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+}
+
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+  // We use VCSrc_32 to exclude literal constants, even though the
+  // encoding normally allows them since the implicit VCC use means
+  // using one would always violate the constant bus
+  // restriction. SGPRs are still allowed because it should
+  // technically be possible to use VCC again as src0.
   let Src0RC32 = VCSrc_32;
+  let Asm32 = "$dst, vcc, $src0, $src1, vcc";
+  let Asm64 = "$dst, $sdst, $src0, $src1, $src2";
+  let Outs32 = (outs DstRC:$dst);
+  let Outs64 = (outs DstRC:$dst, SReg_64:$sdst);
+
+  // Suppress src2 implied by type since the 32-bit encoding uses an
+  // implicit VCC use.
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
 }
 
-def VOP_I1_F32_I32 : VOPProfile <[i1, f32, i32, untyped]> {
+class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
+}
+
+def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
+  // FIXME: Hack to stop printing _e64
+  let DstRC = RegisterOperand<VGPR_32>;
+}
+
+def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
+  // FIXME: Hack to stop printing _e64
+  let DstRC = RegisterOperand<VReg_64>;
+}
+
+// VOPC instructions are a special case because for the 32-bit
+// encoding, we want to display the implicit vcc write as if it were
+// an explicit $dst.
+class VOPC_Profile<ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, vt1, untyped]> {
+  let Asm32 = "vcc, $src0, $src1";
+  // The destination for 32-bit encoding is implicit.
+  let HasDst32 = 0;
+}
+
+class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> {
   let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
   let Asm64 = "$dst, $src0_modifiers, $src1";
 }
 
-def VOP_I1_F64_I32 : VOPProfile <[i1, f64, i32, untyped]> {
-  let Ins64 = (ins InputModsNoDefault:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
-  let Asm64 = "$dst, $src0_modifiers, $src1";
-}
+def VOPC_I1_F32_F32 : VOPC_Profile<f32>;
+def VOPC_I1_F64_F64 : VOPC_Profile<f64>;
+def VOPC_I1_I32_I32 : VOPC_Profile<i32>;
+def VOPC_I1_I64_I64 : VOPC_Profile<i64>;
+
+def VOPC_I1_F32_I32 : VOPC_Class_Profile<f32>;
+def VOPC_I1_F64_I32 : VOPC_Class_Profile<f64>;
 
 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
 def VOP_CNDMASK : VOPProfile <[i32, i32, i32, untyped]> {
-  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VCCReg:$src2);
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
   let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, SSrc_64:$src2);
   let Asm64 = "$dst, $src0, $src1, $src2";
 }
@@ -1119,13 +1268,60 @@ def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
   let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
                              HasModifiers>.ret;
-  let Asm32 = getAsm32<2>.ret;
-  let Asm64 = getAsm64<2, HasModifiers>.ret;
+  let Asm32 = getAsm32<1, 2>.ret;
+  let Asm64 = getAsm64<1, 2, HasModifiers>.ret;
 }
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
 
+class SIInstAlias <string asm, Instruction inst, VOPProfile p> :
+    InstAlias <asm, (inst)>, PredicateControl {
+
+  field bit isCompare;
+  field bit isCommutable;
+
+  let ResultInst =
+    !if (p.HasDst32,
+      !if (!eq(p.NumSrcArgs, 0),
+        // 1 dst, 0 src
+        (inst p.DstRC:$dst),
+      !if (!eq(p.NumSrcArgs, 1),
+        // 1 dst, 1 src
+        (inst p.DstRC:$dst, p.Src0RC32:$src0),
+      !if (!eq(p.NumSrcArgs, 2),
+        // 1 dst, 2 src
+        (inst p.DstRC:$dst, p.Src0RC32:$src0, p.Src1RC32:$src1),
+      // else - unreachable
+        (inst)))),
+    // else
+      !if (!eq(p.NumSrcArgs, 2),
+        // 0 dst, 2 src
+        (inst p.Src0RC32:$src0, p.Src1RC32:$src1),
+      !if (!eq(p.NumSrcArgs, 1),
+        // 0 dst, 1 src
+        (inst p.Src0RC32:$src1),
+      // else
+        // 0 dst, 0 src
+        (inst))));
+}
+
+class SIInstAliasSI <string asm, string op_name, VOPProfile p> :
+  SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_si"), p> {
+  let AssemblerPredicate = SIAssemblerPredicate;
+}
+
+class SIInstAliasVI <string asm, string op_name, VOPProfile p> :
+  SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_vi"), p> {
+  let AssemblerPredicates = [isVI];
+}
+
+multiclass SIInstAliasBuilder <string asm, VOPProfile p> {
+
+  def : SIInstAliasSI <asm, NAME, p>;
+
+  def : SIInstAliasVI <asm, NAME, p>;
+}
 
 class VOP <string opName> {
   string OpName = opName;
@@ -1165,20 +1361,22 @@ class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
   let AssemblerPredicates = [isVI];
 }
 
-multiclass VOP1_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName> {
-  def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+                   string asm = opName#p.Asm32> {
+  def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
 
-  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+  def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
+
+  def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
 
-  def _vi : VOP1_Real_vi <opName, op, outs, ins, asm>;
 }
 
-multiclass VOP1SI_m <vop1 op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName> {
-  def "" : VOP1_Pseudo <outs, ins, pattern, opName>;
+multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
+                     string asm = opName#p.Asm32> {
 
-  def _si : VOP1_Real_si <opName, op, outs, ins, asm>;
+  def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
+
+  def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
 }
 
 class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
@@ -1202,22 +1400,24 @@ class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
   let AssemblerPredicates = [isVI];
 }
 
-multiclass VOP2SI_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
-                     string opName, string revOp> {
-  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
+                     string revOp> {
+
+  def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
            VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
-  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+  def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
 }
 
-multiclass VOP2_m <vop2 op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName, string revOp> {
-  def "" : VOP2_Pseudo <outs, ins, pattern, opName>,
+multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
+                   string revOp> {
+
+  def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
            VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
 
-  def _si : VOP2_Real_si <opName, op, outs, ins, asm>;
+  def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
 
-  def _vi : VOP2_Real_vi <opName, op, outs, ins, asm>;
+  def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
 
 }
 
@@ -1250,6 +1450,9 @@ class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
   MnemonicAlias<opName#"_e64", opName> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
+
+  field bit vdst;
+  field bit src0;
 }
 
 class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName> :
@@ -1295,22 +1498,6 @@ multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
                               HasMods>;
 }
 
-// VOP3_m without source modifiers
-multiclass VOP3_m_nomods <vop op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName, int NumSrcArgs, bit HasMods = 1> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
-  let src0_modifiers = 0,
-      src1_modifiers = 0,
-      src2_modifiers = 0,
-      clamp = 0,
-      omod = 0 in {
-    def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>;
-    def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>;
-  }
-}
-
 multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
                      list<dag> pattern, string opName, bit HasMods = 1> {
 
@@ -1335,7 +1522,7 @@ multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
 
 multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
                      list<dag> pattern, string opName, string revOp,
-                     bit HasMods = 1, bit UseFullOp = 0> {
+                     bit HasMods = 1> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
            VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1349,7 +1536,7 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
 
 multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
                      list<dag> pattern, string opName, string revOp,
-                     bit HasMods = 1, bit UseFullOp = 0> {
+                     bit HasMods = 1> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
            VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
@@ -1360,54 +1547,41 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
   // No VI instruction. This class is for SI only.
 }
 
-// XXX - Is v_div_scale_{f32|f64} only available in vop3b without
-// option of implicit vcc use?
-multiclass VOP3b_2_m <vop op, dag outs, dag ins, string asm,
-                      list<dag> pattern, string opName, string revOp,
-                      bit HasMods = 1, bit UseFullOp = 0> {
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
-  // The VOP2 variant puts the carry out into VCC, the VOP3 variant
-  // can write it into any SGPR. We currently don't use the carry out,
-  // so for now hardcode it to VCC as well.
-  let sdst = SIOperand.VCC, Defs = [VCC] in {
-    def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
-              VOP3DisableFields<1, 0, HasMods>;
-
-    def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
-              VOP3DisableFields<1, 0, HasMods>;
-  } // End sdst = SIOperand.VCC, Defs = [VCC]
-}
-
-multiclass VOP3b_3_m <vop op, dag outs, dag ins, string asm,
-                      list<dag> pattern, string opName, string revOp,
-                      bit HasMods = 1, bit UseFullOp = 0> {
+// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
+// instead of an implicit VCC as in the VOP2b format.
+multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
+                        list<dag> pattern, string opName, string revOp,
+                        bit HasMods = 1, bit useSrc2Input = 0> {
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
 
-
   def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 1, HasMods>;
+            VOP3DisableFields<1, useSrc2Input, HasMods>;
 
   def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName>,
-            VOP3DisableFields<1, 1, HasMods>;
+            VOP3DisableFields<1, useSrc2Input, HasMods>;
 }
 
 multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
                      list<dag> pattern, string opName,
-                     bit HasMods, bit defExec, string revOp> {
+                     bit HasMods, bit defExec,
+                     string revOp, list<SchedReadWrite> sched> {
 
   def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+           VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+    let Defs = !if(defExec, [EXEC], []);
+    let SchedRW = sched;
+  }
 
   def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
             VOP3DisableFields<1, 0, HasMods> {
     let Defs = !if(defExec, [EXEC], []);
+    let SchedRW = sched;
   }
 
   def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
             VOP3DisableFields<1, 0, HasMods> {
     let Defs = !if(defExec, [EXEC], []);
+    let SchedRW = sched;
   }
 }
 
@@ -1432,32 +1606,28 @@ multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
   }
 }
 
-multiclass VOP1_Helper <vop1 op, string opName, dag outs,
-                        dag ins32, string asm32, list<dag> pat32,
-                        dag ins64, string asm64, list<dag> pat64,
-                        bit HasMods> {
+multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
+                        list<dag> pat64> {
 
-  defm _e32 : VOP1_m <op, outs, ins32, opName#asm32, pat32, opName>;
+  defm _e32 : VOP1_m <op, opName, p, pat32>;
 
-  defm _e64 : VOP3_1_m <op, outs, ins64, opName#asm64, pat64, opName, HasMods>;
+  defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+                        p.HasModifiers>;
 }
 
 multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
                      SDPatternOperator node = null_frag> : VOP1_Helper <
-  op, opName, P.Outs,
-  P.Ins32, P.Asm32, [],
-  P.Ins64, P.Asm64,
+  op, opName, P, [],
   !if(P.HasModifiers,
       [(set P.DstVT:$dst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
                                 i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
-      [(set P.DstVT:$dst, (node P.Src0VT:$src0))]),
-  P.HasModifiers
+      [(set P.DstVT:$dst, (node P.Src0VT:$src0))])
 >;
 
 multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
                        SDPatternOperator node = null_frag> {
 
-  defm _e32 : VOP1SI_m <op, P.Outs, P.Ins32, opName#P.Asm32, [], opName>;
+  defm _e32 : VOP1SI_m <op, opName, P, []>;
 
   defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
     !if(P.HasModifiers,
@@ -1467,36 +1637,33 @@ multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
     opName, P.HasModifiers>;
 }
 
-multiclass VOP2_Helper <vop2 op, string opName, dag outs,
-                        dag ins32, string asm32, list<dag> pat32,
-                        dag ins64, string asm64, list<dag> pat64,
-                        string revOp, bit HasMods> {
-  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
+                        list<dag> pat64, string revOp> {
 
-  defm _e64 : VOP3_2_m <op,
-    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
-  >;
+  defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+
+  defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+                        revOp, p.HasModifiers>;
 }
 
 multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
                      SDPatternOperator node = null_frag,
                      string revOp = opName> : VOP2_Helper <
-  op, opName, P.Outs,
-  P.Ins32, P.Asm32, [],
-  P.Ins64, P.Asm64,
+  op, opName, P, [],
   !if(P.HasModifiers,
       [(set P.DstVT:$dst,
            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
                                       i1:$clamp, i32:$omod)),
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
       [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, P.HasModifiers
+  revOp
 >;
 
 multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
                        SDPatternOperator node = null_frag,
                        string revOp = opName> {
-  defm _e32 : VOP2SI_m <op, P.Outs, P.Ins32, P.Asm32, [], opName, revOp>;
+
+  defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
 
   defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
     !if(P.HasModifiers,
@@ -1508,58 +1675,55 @@ multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
     opName, revOp, P.HasModifiers>;
 }
 
-multiclass VOP2b_Helper <vop2 op, string opName, dag outs,
-                         dag ins32, string asm32, list<dag> pat32,
-                         dag ins64, string asm64, list<dag> pat64,
-                         string revOp, bit HasMods> {
+multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
+                         list<dag> pat32, list<dag> pat64,
+                         string revOp, bit useSGPRInput> {
 
-  defm _e32 : VOP2_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+  let SchedRW = [Write32Bit, WriteSALU] in {
+    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+      defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
+    }
 
-  defm _e64 : VOP3b_2_m <op,
-    outs, ins64, opName#asm64, pat64, opName, revOp, HasMods
-  >;
+    defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
+                             opName, revOp, p.HasModifiers, useSGPRInput>;
+  }
 }
 
 multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
                       SDPatternOperator node = null_frag,
                       string revOp = opName> : VOP2b_Helper <
-  op, opName, P.Outs,
-  P.Ins32, P.Asm32, [],
-  P.Ins64, P.Asm64,
+  op, opName, P, [],
   !if(P.HasModifiers,
       [(set P.DstVT:$dst,
            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
                                       i1:$clamp, i32:$omod)),
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
       [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, P.HasModifiers
+  revOp, !eq(P.NumSrcArgs, 3)
 >;
 
 // A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, dag outs,
-                            dag ins32, string asm32, list<dag> pat32,
-                            dag ins64, string asm64, list<dag> pat64,
-                            string revOp, bit HasMods> {
-  defm _e32 : VOP2SI_m <op, outs, ins32, asm32, pat32, opName, revOp>;
+multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
+                            list<dag> pat32, list<dag> pat64, string revOp> {
 
-  defm _e64 : VOP3_2_m <op, outs, ins64, opName#asm64, pat64, opName,
-                        revOp, HasMods>;
+  defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
+
+  defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
+                        revOp, p.HasModifiers>;
 }
 
 multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
                           SDPatternOperator node = null_frag,
                           string revOp = opName>
                           : VOP2_VI3_Helper <
-  op, opName, P.Outs,
-  P.Ins32, P.Asm32, [],
-  P.Ins64, P.Asm64,
+  op, opName, P, [],
   !if(P.HasModifiers,
       [(set P.DstVT:$dst,
            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
                                       i1:$clamp, i32:$omod)),
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
       [(set P.DstVT:$dst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, P.HasModifiers
+  revOp
 >;
 
 multiclass VOP2MADK <vop2 op, string opName, list<dag> pattern = []> {
@@ -1583,64 +1747,75 @@ let isCodeGenOnly = 0 in {
 } // End isCodeGenOnly = 0
 }
 
-class VOPC_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
+class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
   VOPCCommon <ins, "", pattern>,
   VOP <opName>,
-  SIMCInstr<opName#"_e32", SISubtarget.NONE>,
-  MnemonicAlias<opName#"_e32", opName> {
+  SIMCInstr<opName#"_e32", SISubtarget.NONE> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
 }
 
-multiclass VOPC_m <vopc op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName, bit DefExec, string revOpName = ""> {
-  def "" : VOPC_Pseudo <outs, ins, pattern, opName>;
-
-  def _si : VOPC<op.SI, ins, asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.SI> {
-    let Defs = !if(DefExec, [EXEC], []);
-    let hasSideEffects = DefExec;
-    let AssemblerPredicates = [isSICI];
+multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
+                   string opName, bit DefExec, VOPProfile p,
+                   list<SchedReadWrite> sched,
+                   string revOpName = "", string asm = opName#"_e32 "#op_asm,
+                   string alias_asm = opName#" "#op_asm> {
+  def "" : VOPC_Pseudo <ins, pattern, opName> {
+    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let SchedRW = sched;
   }
 
-  def _vi : VOPC<op.VI, ins, asm, []>,
-            SIMCInstr <opName#"_e32", SISubtarget.VI> {
-    let Defs = !if(DefExec, [EXEC], []);
-    let hasSideEffects = DefExec;
-    let AssemblerPredicates = [isVI];
-  }
+  let AssemblerPredicates = [isSICI] in {
+    def _si : VOPC<op.SI, ins, asm, []>,
+              SIMCInstr <opName#"_e32", SISubtarget.SI> {
+      let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+      let hasSideEffects = DefExec;
+      let SchedRW = sched;
+    }
+
+  } // End AssemblerPredicates = [isSICI]
+
+  let AssemblerPredicates = [isVI] in {
+    def _vi : VOPC<op.VI, ins, asm, []>,
+              SIMCInstr <opName#"_e32", SISubtarget.VI> {
+      let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+      let hasSideEffects = DefExec;
+      let SchedRW = sched;
+    }
+
+  } // End AssemblerPredicates = [isVI]
+
+  defm : SIInstAliasBuilder<alias_asm, p>;
 }
 
-multiclass VOPC_Helper <vopc op, string opName,
-                        dag ins32, string asm32, list<dag> pat32,
-                        dag out64, dag ins64, string asm64, list<dag> pat64,
-                        bit HasMods, bit DefExec, string revOp> {
-  defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+multiclass VOPC_Helper <vopc op, string opName, list<dag> pat32,
+                        list<dag> pat64, bit DefExec, string revOp,
+                        VOPProfile p, list<SchedReadWrite> sched> {
+  defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
 
-  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
-                        opName, HasMods, DefExec, revOp>;
+  defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+                        opName, p.HasModifiers, DefExec, revOp, sched>;
 }
 
 // Special case for class instructions which only have modifiers on
 // the 1st source operand.
-multiclass VOPC_Class_Helper <vopc op, string opName,
-                             dag ins32, string asm32, list<dag> pat32,
-                             dag out64, dag ins64, string asm64, list<dag> pat64,
-                             bit HasMods, bit DefExec, string revOp> {
-  defm _e32 : VOPC_m <op, (outs), ins32, opName#asm32, pat32, opName, DefExec>;
+multiclass VOPC_Class_Helper <vopc op, string opName, list<dag> pat32,
+                              list<dag> pat64, bit DefExec, string revOp,
+                              VOPProfile p, list<SchedReadWrite> sched> {
+  defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
 
-  defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
-                        opName, HasMods, DefExec, revOp>,
+  defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$dst), p.Ins64, opName#p.Asm64, pat64,
+                        opName, p.HasModifiers, DefExec, revOp, sched>,
                         VOP3DisableModFields<1, 0, 0>;
 }
 
 multiclass VOPCInst <vopc op, string opName,
                      VOPProfile P, PatLeaf cond = COND_NULL,
                      string revOp = opName,
-                     bit DefExec = 0> : VOPC_Helper <
-  op, opName,
-  P.Ins32, P.Asm32, [],
-  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+                     bit DefExec = 0,
+                     list<SchedReadWrite> sched = [Write32Bit]> :
+                     VOPC_Helper <
+  op, opName, [],
   !if(P.HasModifiers,
       [(set i1:$dst,
           (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -1648,51 +1823,51 @@ multiclass VOPCInst <vopc op, string opName,
                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
                  cond))],
       [(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
-  P.HasModifiers, DefExec, revOp
+  DefExec, revOp, P, sched
 >;
 
 multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
-                     bit DefExec = 0> : VOPC_Class_Helper <
-  op, opName,
-  P.Ins32, P.Asm32, [],
-  (outs VOPDstS64:$dst), P.Ins64, P.Asm64,
+                     bit DefExec = 0,
+                     list<SchedReadWrite> sched> : VOPC_Class_Helper <
+  op, opName, [],
   !if(P.HasModifiers,
       [(set i1:$dst,
           (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
       [(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
-  P.HasModifiers, DefExec, opName
+  DefExec, opName, P, sched
 >;
 
 
 multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOP_F32_F32_F32, cond, revOp>;
+  VOPCInst <op, opName, VOPC_I1_F32_F32, cond, revOp>;
 
 multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOP_F64_F64_F64, cond, revOp>;
+  VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp, 0, [WriteDoubleAdd]>;
 
 multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOP_I32_I32_I32, cond, revOp>;
+  VOPCInst <op, opName, VOPC_I1_I32_I32, cond, revOp>;
 
 multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOP_I64_I64_I64, cond, revOp>;
+  VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp, 0, [Write64Bit]>;
 
 
 multiclass VOPCX <vopc op, string opName, VOPProfile P,
                   PatLeaf cond = COND_NULL,
+                  list<SchedReadWrite> sched,
                   string revOp = "">
-  : VOPCInst <op, opName, P, cond, revOp, 1>;
+  : VOPCInst <op, opName, P, cond, revOp, 1, sched>;
 
 multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOP_F32_F32_F32, COND_NULL, revOp>;
+  VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, [Write32Bit], revOp>;
 
 multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOP_F64_F64_F64, COND_NULL, revOp>;
+  VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, [WriteDoubleAdd], revOp>;
 
 multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOP_I32_I32_I32, COND_NULL, revOp>;
+  VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, [Write32Bit], revOp>;
 
 multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOP_I64_I64_I64, COND_NULL, revOp>;
+  VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, [Write64Bit], revOp>;
 
 multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
                         list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
@@ -1700,16 +1875,16 @@ multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
 >;
 
 multiclass VOPC_CLASS_F32 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOP_I1_F32_I32, 0>;
+  VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0, [Write32Bit]>;
 
 multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOP_I1_F32_I32, 1>;
+  VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1, [Write32Bit]>;
 
 multiclass VOPC_CLASS_F64 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOP_I1_F64_I32, 0>;
+  VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0, [WriteDoubleAdd]>;
 
 multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOP_I1_F64_I32, 1>;
+  VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1, [WriteDoubleAdd]>;
 
 multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
                      SDPatternOperator node = null_frag> : VOP3_Helper <
@@ -1761,25 +1936,13 @@ multiclass VOP3_VCC_Inst <vop3 op, string opName,
   3, 1
 >;
 
-multiclass VOP3b_Helper <vop op, RegisterClass vrc, RegisterOperand arc,
-                    string opName, list<dag> pattern> :
-  VOP3b_3_m <
-  op, (outs vrc:$vdst, SReg_64:$sdst),
-      (ins InputModsNoDefault:$src0_modifiers, arc:$src0,
-           InputModsNoDefault:$src1_modifiers, arc:$src1,
-           InputModsNoDefault:$src2_modifiers, arc:$src2,
-           ClampMod:$clamp, omod:$omod),
-  opName#" $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod", pattern,
-  opName, opName, 1, 1
+multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = []> :
+  VOP3b_2_3_m <
+  op, P.Outs64, P.Ins64,
+  opName#" "#P.Asm64, pattern,
+  opName, "", 1, 1
 >;
 
-multiclass VOP3b_64 <vop3 op, string opName, list<dag> pattern> :
-  VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>;
-
-multiclass VOP3b_32 <vop3 op, string opName, list<dag> pattern> :
-  VOP3b_Helper <op, VGPR_32, VSrc_32, opName, pattern>;
-
-
 class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
   (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
         (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
@@ -1925,12 +2088,14 @@ multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
   dag ins = (ins VGPR_32:$addr, rc:$data0, ds_offset:$offset, gds:$gds),
   string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
 
-  def "" : DS_Pseudo <opName, outs, ins, []>,
-           AtomicNoRet<noRetOp, 1>;
+  let hasPostISelHook = 1 in {
+    def "" : DS_Pseudo <opName, outs, ins, []>,
+             AtomicNoRet<noRetOp, 1>;
 
-  let data1 = 0 in {
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    let data1 = 0 in {
+      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    }
   }
 }
 
@@ -1939,11 +2104,13 @@ multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
   dag outs = (outs rc:$vdst),
   string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
 
-  def "" : DS_Pseudo <opName, outs, ins, []>,
-           AtomicNoRet<noRetOp, 1>;
+  let hasPostISelHook = 1 in {
+    def "" : DS_Pseudo <opName, outs, ins, []>,
+             AtomicNoRet<noRetOp, 1>;
 
-  def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-  def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
+    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
+  }
 }
 
 multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
@@ -2214,7 +2381,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
 
       defm _ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_addr64", (outs),
-        (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
+        (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
              SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
         name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#"$slc", [], 0
       >;
@@ -2233,7 +2400,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
 
       defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_rtn_addr64", (outs rc:$vdata),
-        (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr,
+        (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
              SCSrc_32:$soffset, mbuf_offset:$offset, slc:$slc),
         name#" $vdata, $vaddr, $srsrc, $soffset addr64"#"$offset"#" glc"#"$slc",
         [(set vt:$vdata,
@@ -2245,7 +2412,7 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
         op, name#"_rtn_offset", (outs rc:$vdata),
         (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
              mbuf_offset:$offset, slc:$slc),
-        name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc",
+        name#" $vdata, $srsrc, $soffset"#"$offset"#" glc$slc",
         [(set vt:$vdata,
          (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
                                     i1:$slc), vt:$vdata_in))], 1
@@ -2256,6 +2423,8 @@ multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
   } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
 }
 
+// FIXME: tfe can't be an operand because it requires a separate
+// opcode because it needs an N+1 register class dest register.
 multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
                               ValueType load_vt = i32,
                               SDPatternOperator ld = null_frag> {
@@ -2368,47 +2537,121 @@ multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
   } // End mayLoad = 0, mayStore = 1
 }
 
-class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
-      FLAT <op, (outs regClass:$vdst),
-                (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
-            asm#" $vdst, $addr"#"$glc"#"$slc"#"$tfe", []> {
-  let data = 0;
-  let mayLoad = 1;
+// For cache invalidation instructions.
+multiclass MUBUF_Invalidate <mubuf op, string opName, SDPatternOperator node> {
+  let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in {
+    def "" : MUBUF_Pseudo <opName, (outs), (ins), [(node)]>;
+
+    // Set everything to 0.
+    let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0,
+        vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in {
+      let addr64 = 0 in {
+        def _si : MUBUF_Real_si <op, opName, (outs), (ins), opName>;
+      }
+
+      def _vi : MUBUF_Real_vi <op, opName, (outs), (ins), opName>;
+    }
+  } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = ""
 }
 
-class FLAT_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
-      FLAT <op, (outs), (ins vdataClass:$data, VReg_64:$addr,
-                             glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
-          name#" $data, $addr"#"$glc"#"$slc"#"$tfe",
-         []> {
+//===----------------------------------------------------------------------===//
+// FLAT classes
+//===----------------------------------------------------------------------===//
 
-  let mayLoad = 0;
-  let mayStore = 1;
-
-  // Encoding
-  let vdst = 0;
+class flat <bits<7> ci, bits<7> vi = ci> {
+  field bits<7> CI = ci;
+  field bits<7> VI = vi;
 }
 
-multiclass FLAT_ATOMIC <bits<7> op, string name, RegisterClass vdst_rc,
-                        RegisterClass data_rc = vdst_rc> {
+class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
+     FLAT <0, outs, ins, "", pattern>,
+      SIMCInstr<opName, SISubtarget.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+}
 
-  let mayLoad = 1, mayStore = 1 in {
-    def "" : FLAT <op, (outs),
-                  (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
-                       tfe_flat_atomic:$tfe),
-                   name#" $addr, $data"#"$slc"#"$tfe", []>,
-             AtomicNoRet <NAME, 0> {
-      let glc = 0;
-      let vdst = 0;
-    }
+class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
+    FLAT <op, outs, ins, asm, []>,
+    SIMCInstr<opName, SISubtarget.SI> {
+  let AssemblerPredicate = isCIOnly;
+}
 
-    def _RTN : FLAT <op, (outs vdst_rc:$vdst),
-                     (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
-                          tfe_flat_atomic:$tfe),
-                     name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>,
-               AtomicNoRet <NAME, 1> {
-      let glc = 1;
-    }
+class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
+    FLAT <op, outs, ins, asm, []>,
+    SIMCInstr<opName, SISubtarget.VI> {
+  let AssemblerPredicate = VIAssemblerPredicate;
+}
+
+multiclass FLAT_AtomicRet_m <flat op, dag outs, dag ins, string asm,
+                   list<dag> pattern> {
+  def "" : FLAT_Pseudo <NAME#"_RTN", outs, ins, pattern>,
+               AtomicNoRet <NAME, 1>;
+
+  def _ci : FLAT_Real_ci <op.CI, NAME#"_RTN", outs, ins, asm>;
+
+  def _vi : FLAT_Real_vi <op.VI, NAME#"_RTN", outs, ins, asm>;
+}
+
+multiclass FLAT_Load_Helper <flat op, string asm_name,
+    RegisterClass regClass,
+    dag outs = (outs regClass:$vdst),
+    dag ins = (ins VReg_64:$addr, glc_flat:$glc, slc_flat:$slc, tfe_flat:$tfe),
+    string asm = asm_name#" $vdst, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+  let data = 0, mayLoad = 1 in {
+
+    def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+    def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+    def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+  }
+}
+
+multiclass FLAT_Store_Helper <flat op, string asm_name,
+    RegisterClass vdataClass,
+    dag outs = (outs),
+    dag ins = (ins vdataClass:$data, VReg_64:$addr, glc_flat:$glc,
+                   slc_flat:$slc, tfe_flat:$tfe),
+    string asm = asm_name#" $data, $addr"#"$glc"#"$slc"#"$tfe"> {
+
+  let mayLoad = 0, mayStore = 1, vdst = 0 in {
+
+    def "" : FLAT_Pseudo <NAME, outs, ins, []>;
+
+    def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
+
+    def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
+  }
+}
+
+multiclass FLAT_ATOMIC <flat op, string asm_name, RegisterClass vdst_rc,
+    RegisterClass data_rc = vdst_rc,
+    dag outs_noret = (outs),
+    string asm_noret = asm_name#" $addr, $data"#"$slc"#"$tfe"> {
+
+  let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in {
+    def "" : FLAT_Pseudo <NAME, outs_noret,
+                          (ins VReg_64:$addr, data_rc:$data,
+                               slc_flat_atomic:$slc, tfe_flat_atomic:$tfe), []>,
+             AtomicNoRet <NAME, 0>;
+
+    def _ci : FLAT_Real_ci <op.CI, NAME, outs_noret,
+                            (ins VReg_64:$addr, data_rc:$data,
+                                 slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+                            asm_noret>;
+
+    def _vi : FLAT_Real_vi <op.VI, NAME, outs_noret,
+                            (ins VReg_64:$addr, data_rc:$data,
+                                 slc_flat_atomic:$slc, tfe_flat_atomic:$tfe),
+                            asm_noret>;
+  }
+
+  let glc = 1, hasPostISelHook = 1 in {
+    defm _RTN : FLAT_AtomicRet_m <op, (outs vdst_rc:$vdst),
+                        (ins VReg_64:$addr, data_rc:$data, slc_flat_atomic:$slc,
+                             tfe_flat_atomic:$tfe),
+                        asm_name#" $vdst, $addr, $data glc"#"$slc"#"$tfe", []>;
   }
 }
 
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index e0eeea9034b3..6f653c70aca0 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -30,7 +30,9 @@ def isGCN : Predicate<"Subtarget->getGeneration() "
                       ">= AMDGPUSubtarget::SOUTHERN_ISLANDS">,
             AssemblerPredicate<"FeatureGCN">;
 def isSI : Predicate<"Subtarget->getGeneration() "
-                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">;
+                      "== AMDGPUSubtarget::SOUTHERN_ISLANDS">,
+           AssemblerPredicate<"FeatureSouthernIslands">;
+
 
 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
@@ -62,36 +64,38 @@ let mayLoad = 1 in {
 // We are using the SGPR_32 and not the SReg_32 register class for 32-bit
 // SMRD instructions, because the SGPR_32 register class does not include M0
 // and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "s_load_dword", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "s_load_dwordx16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
 
 defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
-  0x08, "s_buffer_load_dword", SReg_128, SGPR_32
+  smrd<0x08>, "s_buffer_load_dword", SReg_128, SGPR_32
 >;
 
 defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
-  0x09, "s_buffer_load_dwordx2", SReg_128, SReg_64
+  smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64
 >;
 
 defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
-  0x0a, "s_buffer_load_dwordx4", SReg_128, SReg_128
+  smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128
 >;
 
 defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
-  0x0b, "s_buffer_load_dwordx8", SReg_128, SReg_256
+  smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256
 >;
 
 defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
-  0x0c, "s_buffer_load_dwordx16", SReg_128, SReg_512
+  smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
 >;
 
 } // mayLoad = 1
 
 //def S_MEMTIME : SMRD_ <0x0000001e, "s_memtime", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "s_dcache_inv", []>;
+
+defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
+  int_amdgcn_s_dcache_inv>;
 
 //===----------------------------------------------------------------------===//
 // SOP1 Instructions
@@ -123,7 +127,7 @@ let Defs = [SCC] in {
 
 
 defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
-  [(set i32:$dst, (AMDGPUbrev i32:$src0))]
+  [(set i32:$dst, (bitreverse i32:$src0))]
 >;
 defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
 
@@ -183,10 +187,14 @@ defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>
 
 defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
 defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
+
+let Uses = [M0] in {
 defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
 defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
 defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
 defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
+} // End Uses = [M0]
+
 defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
 defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
 let Defs = [SCC] in {
@@ -354,7 +362,7 @@ def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "s_cmp_le_u32">;
 // SOPK Instructions
 //===----------------------------------------------------------------------===//
 
-let isReMaterializable = 1 in {
+let isReMaterializable = 1, isMoveImm = 1 in {
 defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
 } // End isReMaterializable = 1
 let Uses = [SCC] in {
@@ -438,36 +446,38 @@ def S_BRANCH : SOPP <
   let isBarrier = 1;
 }
 
-let DisableEncoding = "$scc" in {
+let Uses = [SCC] in {
 def S_CBRANCH_SCC0 : SOPP <
-  0x00000004, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+  0x00000004, (ins sopp_brtarget:$simm16),
   "s_cbranch_scc0 $simm16"
 >;
 def S_CBRANCH_SCC1 : SOPP <
-  0x00000005, (ins sopp_brtarget:$simm16, SCCReg:$scc),
+  0x00000005, (ins sopp_brtarget:$simm16),
   "s_cbranch_scc1 $simm16"
 >;
-} // End DisableEncoding = "$scc"
+} // End Uses = [SCC]
 
+let Uses = [VCC] in {
 def S_CBRANCH_VCCZ : SOPP <
-  0x00000006, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+  0x00000006, (ins sopp_brtarget:$simm16),
   "s_cbranch_vccz $simm16"
 >;
 def S_CBRANCH_VCCNZ : SOPP <
-  0x00000007, (ins sopp_brtarget:$simm16, VCCReg:$vcc),
+  0x00000007, (ins sopp_brtarget:$simm16),
   "s_cbranch_vccnz $simm16"
 >;
+} // End Uses = [VCC]
 
-let DisableEncoding = "$exec" in {
+let Uses = [EXEC] in {
 def S_CBRANCH_EXECZ : SOPP <
-  0x00000008, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+  0x00000008, (ins sopp_brtarget:$simm16),
   "s_cbranch_execz $simm16"
 >;
 def S_CBRANCH_EXECNZ : SOPP <
-  0x00000009, (ins sopp_brtarget:$simm16, EXECReg:$exec),
+  0x00000009, (ins sopp_brtarget:$simm16),
   "s_cbranch_execnz $simm16"
 >;
-} // End DisableEncoding = "$exec"
+} // End Uses = [EXEC]
 
 
 } // End isBranch = 1
@@ -477,11 +487,11 @@ let hasSideEffects = 1 in {
 def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
   [(int_AMDGPU_barrier_local)]
 > {
+  let SchedRW = [WriteBarrier];
   let simm16 = 0;
-  let isBarrier = 1;
-  let hasCtrlDep = 1;
   let mayLoad = 1;
   let mayStore = 1;
+  let isConvergent = 1;
 }
 
 def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
@@ -805,9 +815,6 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps
 defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
 defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
 defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-let SubtargetPredicate = isCI in {
-defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
-} // End isCI
 defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
 let mayStore = 0 in {
 defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
@@ -905,11 +912,6 @@ defm DS_WRITE_SRC2_B64 : DS_1A <0xcc, "ds_write_src2_b64">;
 defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
 defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
 
-//let SubtargetPredicate = isCI in {
-// DS_CONDXCHG32_RTN_B64
-// DS_CONDXCHG32_RTN_B128
-//} // End isCI
-
 //===----------------------------------------------------------------------===//
 // MUBUF Instructions
 //===----------------------------------------------------------------------===//
@@ -951,13 +953,13 @@ defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
   mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, sextloadi16_global
 >;
 defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
-  mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, global_load
+  mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load
 >;
 defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
-  mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, global_load
+  mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
 >;
 defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
-  mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, global_load
+  mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
 >;
 
 defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
@@ -1034,9 +1036,12 @@ defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
 //def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
 //def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
 //def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <mubuf<0x70>, "buffer_wbinvl1_sc", []>; // isn't on CI & VI
-//def BUFFER_WBINVL1_VOL : MUBUF_WBINVL1 <mubuf<0x70, 0x3f>, "buffer_wbinvl1_vol", []>; // isn't on SI
-//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <mubuf<0x71, 0x3e>, "buffer_wbinvl1", []>;
+
+let SubtargetPredicate = isSI in {
+defm BUFFER_WBINVL1_SC : MUBUF_Invalidate <mubuf<0x70>, "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI
+}
+
+defm BUFFER_WBINVL1 : MUBUF_Invalidate <mubuf<0x71, 0x3e>, "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>;
 
 //===----------------------------------------------------------------------===//
 // MTBUF Instructions
@@ -1155,8 +1160,8 @@ defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o"
 // VOP1 Instructions
 //===----------------------------------------------------------------------===//
 
-let vdst = 0, src0 = 0 in {
-defm V_NOP : VOP1_m <vop1<0x0>, (outs), (ins), "v_nop", [], "v_nop">;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
 }
 
 let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
@@ -1292,7 +1297,9 @@ defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
   VOP_F64_F64, fsqrt
 >;
 
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
+
+let SchedRW = [WriteQuarterRate32] in {
 
 defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
   VOP_F32_F32, AMDGPUsin
@@ -1300,6 +1307,9 @@ defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
 defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
   VOP_F32_F32, AMDGPUcos
 >;
+
+} // End SchedRW = [WriteQuarterRate32]
+
 defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
 defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
 defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
@@ -1308,24 +1318,33 @@ defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
 defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
   VOP_I32_F64
 >;
+
+let SchedRW = [WriteDoubleAdd] in {
 defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
   VOP_F64_F64
 >;
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64", VOP_F64_F64>;
+
+defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
+  VOP_F64_F64
+>;
+} // End SchedRW = [WriteDoubleAdd]
+
+
 defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
   VOP_I32_F32
 >;
 defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
   VOP_F32_F32
 >;
-let vdst = 0, src0 = 0 in {
-defm V_CLREXCP : VOP1_m <vop1<0x41,0x35>, (outs), (ins), "v_clrexcp", [],
-  "v_clrexcp"
->;
+let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NONE>;
 }
+
+let Uses = [M0, EXEC] in {
 defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_I32_I32>;
 defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_I32_I32>;
 defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_I32_I32>;
+} // End Uses = [M0, EXEC]
 
 // These instruction only exist on SI and CI
 let SubtargetPredicate = isSICI in {
@@ -1343,7 +1362,7 @@ defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
   VOP_F32_F32, AMDGPUrsq_legacy
 >;
 
-} // End let SchedRW = [WriteQuarterRate32]
+} // End SchedRW = [WriteQuarterRate32]
 
 let SchedRW = [WriteDouble] in {
 
@@ -1360,7 +1379,7 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
 // VINTRP Instructions
 //===----------------------------------------------------------------------===//
 
-let Uses = [M0] in {
+let Uses = [M0, EXEC] in {
 
 // FIXME: Specify SchedRW for VINTRP insturctions.
 
@@ -1405,16 +1424,14 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
   [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
                                     (i32 imm:$attr)))]>;
 
-} // End Uses = [M0]
+} // End Uses = [M0, EXEC]
 
 //===----------------------------------------------------------------------===//
 // VOP2 Instructions
 //===----------------------------------------------------------------------===//
 
 multiclass V_CNDMASK <vop2 op, string name> {
-  defm _e32 : VOP2_m <
-      op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins32, VOP_CNDMASK.Asm32, [],
-      name, name>;
+  defm _e32 : VOP2_m <op, name, VOP_CNDMASK, [], name>;
 
   defm _e64  : VOP3_m <
       op, VOP_CNDMASK.Outs, VOP_CNDMASK.Ins64,
@@ -1500,34 +1517,32 @@ let isCommutable = 1 in {
 defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32">;
 } // End isCommutable = 1
 
-let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
+let isCommutable = 1 in {
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
 
 // V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
 // but the VI instructions behave the same as the SI versions.
 defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
-  VOP_I32_I32_I32, add
+  VOP2b_I32_I1_I32_I32
 >;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP_I32_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
 
 defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
-  VOP_I32_I32_I32, null_frag, "v_sub_i32"
+  VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
 >;
 
-let Uses = [VCC] in { // Carry-in comes from VCC
 defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
-  VOP_I32_I32_I32_VCC
+  VOP2b_I32_I1_I32_I32_I1
 >;
 defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
-  VOP_I32_I32_I32_VCC
+  VOP2b_I32_I1_I32_I32_I1
 >;
 defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
-  VOP_I32_I32_I32_VCC, null_frag, "v_subb_u32"
+  VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
 >;
 
-} // End Uses = [VCC]
-} // End isCommutable = 1, Defs = [VCC]
+} // End isCommutable = 1
 
 defm V_READLANE_B32 : VOP2SI_3VI_m <
   vop3 <0x001, 0x289>,
@@ -1575,10 +1590,10 @@ defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
   VOP_I32_I32_I32
 >;
 defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
-  VOP_I32_I32_I32
+  VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
 >;
 defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
-  VOP_I32_I32_I32
+  VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
 >;
 defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
   VOP_F32_F32_I32, AMDGPUldexp
@@ -1704,15 +1719,15 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <
   vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
 >;
 
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
 
 defm V_DIV_FIXUP_F64 : VOP3Inst <
   vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
 >;
 
-} // let SchedRW = [WriteDouble]
+} // End SchedRW = [WriteDouble]
 
-let SchedRW = [WriteDouble] in {
+let SchedRW = [WriteDoubleAdd] in {
 let isCommutable = 1 in {
 
 defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
@@ -1735,7 +1750,7 @@ defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
   VOP_F64_F64_I32, AMDGPUldexp
 >;
 
-} // let SchedRW = [WriteDouble]
+} // let SchedRW = [WriteDoubleAdd]
 
 let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
 
@@ -1756,16 +1771,21 @@ defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
 } // isCommutable = 1, SchedRW = [WriteQuarterRate32]
 
 let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3b_32 <vop3<0x16d, 0x1e0>, "v_div_scale_f32", []>;
+defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
+  VOP3b_F32_I1_F32_F32_F32
+>;
 }
 
 let SchedRW = [WriteDouble, WriteSALU] in {
 // Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3b_64 <vop3<0x16e, 0x1e1>, "v_div_scale_f64", []>;
+defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
+  VOP3b_F64_I1_F64_F64_F64
+>;
 } // let SchedRW = [WriteDouble]
 
-let isCommutable = 1, Uses = [VCC] in {
+let isCommutable = 1, Uses = [VCC, EXEC] in {
 
+let SchedRW = [WriteFloatFMA] in {
 // v_div_fmas_f32:
 //   result = src0 * src1 + src2
 //   if (vcc)
@@ -1774,6 +1794,7 @@ let isCommutable = 1, Uses = [VCC] in {
 defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
   VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
 >;
+}
 
 let SchedRW = [WriteDouble] in {
 // v_div_fmas_f64:
@@ -1786,7 +1807,7 @@ defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
 >;
 
 } // End SchedRW = [WriteDouble]
-} // End isCommutable = 1
+} // End isCommutable = 1, Uses = [VCC, EXEC]
 
 //def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
 //def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
@@ -1835,13 +1856,13 @@ def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$dst),
   (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []
 >;
 
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
 def V_MOV_B64_PSEUDO : InstSI <(outs VReg_64:$dst), (ins VSrc_64:$src0), "", []>;
 } // end let hasSideEffects = 0, mayLoad = 0, mayStore = 0
 
-let hasSideEffects = 1 in {
+let hasSideEffects = 1, SALU = 1 in {
 def SGPR_USE : InstSI <(outs),(ins), "", []>;
 }
 
@@ -1921,39 +1942,9 @@ def SI_KILL : InstSI <
 
 let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
 
-//defm SI_ : RegisterLoadStore <VGPR_32, FRAMEri, ADDRIndirect>;
-
-let UseNamedOperandTable = 1 in {
-
-def SI_RegisterLoad : InstSI <
+class SI_INDIRECT_SRC<RegisterClass rc> : InstSI <
   (outs VGPR_32:$dst, SReg_64:$temp),
-  (ins FRAMEri32:$addr, i32imm:$chan),
-  "", []
-> {
-  let isRegisterLoad = 1;
-  let mayLoad = 1;
-}
-
-class SIRegStore<dag outs> : InstSI <
-  outs,
-  (ins VGPR_32:$val, FRAMEri32:$addr, i32imm:$chan),
-  "", []
-> {
-  let isRegisterStore = 1;
-  let mayStore = 1;
-}
-
-let usesCustomInserter = 1 in {
-def SI_RegisterStorePseudo : SIRegStore<(outs)>;
-} // End usesCustomInserter = 1
-def SI_RegisterStore : SIRegStore<(outs SReg_64:$temp)>;
-
-
-} // End UseNamedOperandTable = 1
-
-def SI_INDIRECT_SRC : InstSI <
-  (outs VGPR_32:$dst, SReg_64:$temp),
-  (ins unknown:$src, VSrc_32:$idx, i32imm:$off),
+  (ins rc:$src, VSrc_32:$idx, i32imm:$off),
   "si_indirect_src $dst, $temp, $src, $idx, $off",
   []
 >;
@@ -1967,6 +1958,13 @@ class SI_INDIRECT_DST<RegisterClass rc> : InstSI <
   let Constraints = "$src = $dst";
 }
 
+// TODO: We can support indirect SGPR access.
+def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
+def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
+def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
+def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
+def SI_INDIRECT_SRC_V16 : SI_INDIRECT_SRC<VReg_512>;
+
 def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
 def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
 def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
@@ -1977,19 +1975,24 @@ def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 
 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
 
-  let UseNamedOperandTable = 1 in {
+  let UseNamedOperandTable = 1, Uses = [EXEC] in {
     def _SAVE : InstSI <
       (outs),
-      (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
-           SReg_32:$scratch_offset),
+      (ins sgpr_class:$src, i32imm:$frame_idx),
       "", []
-    >;
+    > {
+      let mayStore = 1;
+      let mayLoad = 0;
+    }
 
     def _RESTORE : InstSI <
       (outs sgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
+      (ins i32imm:$frame_idx),
       "", []
-    >;
+    > {
+      let mayStore = 0;
+      let mayLoad = 1;
+    }
   } // End UseNamedOperandTable = 1
 }
 
@@ -2003,19 +2006,25 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 
 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
-  let UseNamedOperandTable = 1, VGPRSpill = 1 in {
+  let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in {
     def _SAVE : InstSI <
       (outs),
       (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
            SReg_32:$scratch_offset),
       "", []
-    >;
+    > {
+      let mayStore = 1;
+      let mayLoad = 0;
+    }
 
     def _RESTORE : InstSI <
       (outs vgpr_class:$dst),
       (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
       "", []
-    >;
+    > {
+      let mayStore = 0;
+      let mayLoad = 1;
+    }
   } // End UseNamedOperandTable = 1, VGPRSpill = 1
 }
 
@@ -2030,9 +2039,11 @@ let Defs = [SCC] in {
 
 def SI_CONSTDATA_PTR : InstSI <
   (outs SReg_64:$dst),
-  (ins),
-  "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))]
->;
+  (ins const_ga:$ptr),
+  "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))]
+> {
+  let SALU = 1;
+}
 
 } // End Defs = [SCC]
 
@@ -2072,84 +2083,63 @@ def : Pat <
 // SMRD Patterns
 //===----------------------------------------------------------------------===//
 
-multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
 
-  // 1. SI-CI: Offset as 8bit DWORD immediate
+  // 1. IMM offset
   def : Pat <
-    (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
-    (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
+    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+    (vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
   >;
 
-  // 2. Offset loaded in an 32bit SGPR
+  // 2. SGPR offset
   def : Pat <
-    (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
-    (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+    (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+    (vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
   >;
 
-  // 3. No offset at all
   def : Pat <
-    (constant_load i64:$sbase),
-    (vt (Instr_IMM $sbase, 0))
-  >;
+    (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+    (vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
+  > {
+    let Predicates = [isCIOnly];
+  }
 }
 
-multiclass SMRD_Pattern_vi <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
 
-  // 1. VI: Offset as 20bit immediate in bytes
-  def : Pat <
-    (constant_load (add i64:$sbase, (i64 IMM20bit:$offset))),
-    (vt (Instr_IMM $sbase, (as_i32imm $offset)))
-  >;
+defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v32i8>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
 
-  // 2. Offset loaded in an 32bit SGPR
-  def : Pat <
-    (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
-    (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
-  >;
-
-  // 3. No offset at all
-  def : Pat <
-    (constant_load i64:$sbase),
-    (vt (Instr_IMM $sbase, 0))
-  >;
-}
-
-let Predicates = [isSICI] in {
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isSICI]
-
-let Predicates = [isVI] in {
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern_vi <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-} // End Predicates = [isVI]
-
-let Predicates = [isSICI] in {
-
-// 1. Offset as 8bit DWORD immediate
+// 1. Offset as an immediate
 def : Pat <
-  (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
+  (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
 >;
 
-} // End Predicates = [isSICI]
-
 // 2. Offset loaded in an 32bit SGPR
 def : Pat <
-  (SIload_constant v4i32:$sbase, imm:$offset),
-  (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
+  (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
 >;
 
+let Predicates = [isCI] in {
+
+def : Pat <
+  (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)
+>;
+
+} // End Predicates = [isCI]
+
+} // End let AddedComplexity = 10000
+
 //===----------------------------------------------------------------------===//
 // SOP1 Patterns
 //===----------------------------------------------------------------------===//
@@ -2161,6 +2151,11 @@ def : Pat <
      (S_MOV_B32 0), sub1))
 >;
 
+def : Pat <
+  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+  (S_ABS_I32 $x)
+>;
+
 //===----------------------------------------------------------------------===//
 // SOP2 Patterns
 //===----------------------------------------------------------------------===//
@@ -2488,6 +2483,11 @@ def : Pat <
 /********** Extraction, Insertion, Building and Casting  **********/
 /********** ============================================ **********/
 
+//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
+//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
+//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
+//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
+
 foreach Index = 0-2 in {
   def Extract_Element_v2i32_#Index : Extract_Element <
     i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2568,11 +2568,25 @@ def : BitConvert <v2i32, i64, VReg_64>;
 def : BitConvert <i64, v2i32, VReg_64>;
 def : BitConvert <v2f32, i64, VReg_64>;
 def : BitConvert <i64, v2f32, VReg_64>;
+def : BitConvert <v2f32, f64, VReg_64>;
 def : BitConvert <v2i32, f64, VReg_64>;
+def : BitConvert <f64, v2f32, VReg_64>;
 def : BitConvert <f64, v2i32, VReg_64>;
 def : BitConvert <v4f32, v4i32, VReg_128>;
 def : BitConvert <v4i32, v4f32, VReg_128>;
 
+
+def : BitConvert <v2i64, v4i32, SReg_128>;
+def : BitConvert <v4i32, v2i64, SReg_128>;
+
+def : BitConvert <v2f64, v4f32, VReg_128>;
+def : BitConvert <v2f64, v4i32, VReg_128>;
+def : BitConvert <v4f32, v2f64, VReg_128>;
+def : BitConvert <v4i32, v2f64, VReg_128>;
+
+
+
+
 def : BitConvert <v8f32, v8i32, SReg_256>;
 def : BitConvert <v8i32, v8f32, SReg_256>;
 def : BitConvert <v8i32, v32i8, SReg_256>;
@@ -2601,10 +2615,9 @@ def : Pat <
 
 // Prevent expanding both fneg and fabs.
 
-// FIXME: Should use S_OR_B32
 def : Pat <
   (fneg (fabs f32:$src)),
-  (V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
+  (S_OR_B32 $src, 0x80000000) /* Set sign bit */
 >;
 
 // FIXME: Should use S_OR_B32
@@ -2836,10 +2849,6 @@ class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
 // -1. For the non-rtn variants, the manual says it does
 // DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max
 // will always do the increment so I'm assuming it's the same.
-//
-// We also load this -1 with s_mov_b32 / s_mov_b64 even though this
-// needs to be a VGPR. The SGPR copy pass will fix this, and it's
-// easier since there is no v_mov_b64.
 class DSAtomicIncRetPat<DS inst, ValueType vt,
                         Instruction LoadImm, PatFrag frag> : Pat <
   (frag (DS1Addr1Offset i32:$ptr, i32:$offset), (vt 1)),
@@ -2855,9 +2864,9 @@ class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
 
 // 32-bit atomics.
 def : DSAtomicIncRetPat<DS_INC_RTN_U32, i32,
-                        S_MOV_B32, si_atomic_load_add_local>;
+                        V_MOV_B32_e32, si_atomic_load_add_local>;
 def : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32,
-                        S_MOV_B32, si_atomic_load_sub_local>;
+                        V_MOV_B32_e32, si_atomic_load_sub_local>;
 
 def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
 def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
@@ -2874,9 +2883,9 @@ def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
 
 // 64-bit atomics.
 def : DSAtomicIncRetPat<DS_INC_RTN_U64, i64,
-                        S_MOV_B64, si_atomic_load_add_local>;
+                        V_MOV_B64_PSEUDO, si_atomic_load_add_local>;
 def : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64,
-                        S_MOV_B64, si_atomic_load_sub_local>;
+                        V_MOV_B64_PSEUDO, si_atomic_load_sub_local>;
 
 def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
 def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
@@ -3019,90 +3028,46 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
 def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
 def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
 
-let SubtargetPredicate = isCI in {
-
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
-  VOP_I32_I32_I32
->;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
-  VOP_I32_I32_I32
->;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
-  VOP_I32_I32_I32
->;
-
-let isCommutable = 1 in {
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
-  VOP_I64_I32_I32_I64
->;
-
-// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
-  VOP_I64_I32_I32_I64
->;
-} // End isCommutable = 1
-
-// Remaining instructions:
-// FLAT_*
-// S_CBRANCH_CDBGUSER
-// S_CBRANCH_CDBGSYS
-// S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-// S_DCACHE_INV_VOL
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
-
-} // End isCI
-
 /********** ====================== **********/
 /**********   Indirect adressing   **********/
 /********** ====================== **********/
 
-multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
+multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
 
   // 1. Extract with offset
   def : Pat<
-    (eltvt (vector_extract vt:$vec, (add i32:$idx, imm:$off))),
-    (SI_INDIRECT_SRC $vec, $idx, imm:$off)
+    (eltvt (extractelt vt:$vec, (add i32:$idx, imm:$off))),
+    (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, imm:$off)
   >;
 
   // 2. Extract without offset
   def : Pat<
-    (eltvt (vector_extract vt:$vec, i32:$idx)),
-    (SI_INDIRECT_SRC $vec, $idx, 0)
+    (eltvt (extractelt vt:$vec, i32:$idx)),
+    (!cast<Instruction>("SI_INDIRECT_SRC_"#VecSize) $vec, $idx, 0)
   >;
 
   // 3. Insert with offset
   def : Pat<
-    (vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
-    (IndDst $vec, $idx, imm:$off, $val)
+    (insertelt vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
+    (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, imm:$off, $val)
   >;
 
   // 4. Insert without offset
   def : Pat<
-    (vector_insert vt:$vec, eltvt:$val, i32:$idx),
-    (IndDst $vec, $idx, 0, $val)
+    (insertelt vt:$vec, eltvt:$val, i32:$idx),
+    (!cast<Instruction>("SI_INDIRECT_DST_"#VecSize) $vec, $idx, 0, $val)
   >;
 }
 
-defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
+defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
+defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
+defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
 
-defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
-defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
-defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
-defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
+defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
+defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
+defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
+defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
 
 //===----------------------------------------------------------------------===//
 // Conversion Patterns
@@ -3215,12 +3180,12 @@ def : Pat <
 
 def : Pat <
   (i1 (trunc i32:$a)),
-  (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
+  (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
 >;
 
 def : Pat <
   (i1 (trunc i64:$a)),
-  (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1),
+  (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
                     (EXTRACT_SUBREG $a, sub0)), 1)
 >;
 
@@ -3301,24 +3266,6 @@ def : Pat <
 
 } // End Predicates = [isSI]
 
-let Predicates = [isCI] in {
-
-// Convert (x - floor(x)) to fract(x)
-def : Pat <
-  (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
-             (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
-  (V_FRACT_F32_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-// Convert (x + (-floor(x))) to fract(x)
-def : Pat <
-  (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
-             (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
-  (V_FRACT_F64_e64 $mods, $x, DSTCLAMP.NONE, DSTOMOD.NONE)
->;
-
-} // End Predicates = [isCI]
-
 //============================================================================//
 // Miscellaneous Optimization Patterns
 //============================================================================//
diff --git a/lib/Target/AMDGPU/SILowerControlFlow.cpp b/lib/Target/AMDGPU/SILowerControlFlow.cpp
index c319b32111fe..126f6245dfc0 100644
--- a/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -103,6 +103,10 @@ public:
     return "SI Lower control flow instructions";
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 };
 
 } // End anonymous namespace
@@ -140,8 +144,7 @@ void SILowerControlFlowPass::Skip(MachineInstr &From, MachineOperand &To) {
 
   DebugLoc DL = From.getDebugLoc();
   BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
-          .addOperand(To)
-          .addReg(AMDGPU::EXEC);
+    .addOperand(To);
 }
 
 void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
@@ -159,8 +162,7 @@ void SILowerControlFlowPass::SkipIfDead(MachineInstr &MI) {
 
   // If the exec mask is non-zero, skip the next two instructions
   BuildMI(MBB, Insert, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-          .addImm(3)
-          .addReg(AMDGPU::EXEC);
+    .addImm(3);
 
   // Exec mask is zero: Export to NULL target...
   BuildMI(MBB, Insert, DL, TII->get(AMDGPU::EXP))
@@ -269,8 +271,7 @@ void SILowerControlFlowPass::Loop(MachineInstr &MI) {
           .addReg(Src);
 
   BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-          .addOperand(MI.getOperand(1))
-          .addReg(AMDGPU::EXEC);
+    .addOperand(MI.getOperand(1));
 
   MI.eraseFromParent();
 }
@@ -316,7 +317,7 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) {
               .addImm(0);
     }
   } else {
-    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32), AMDGPU::VCC)
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
            .addImm(0)
            .addOperand(Op);
   }
@@ -362,9 +363,9 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
             .addReg(AMDGPU::VCC_LO);
 
     // Compare the just read M0 value to all possible Idx values
-    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC)
-            .addReg(AMDGPU::M0)
-            .addReg(Idx);
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
+      .addReg(AMDGPU::M0)
+      .addReg(Idx);
 
     // Update EXEC, save the original EXEC value to VCC
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
@@ -385,8 +386,7 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel, int
 
     // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-            .addImm(-7)
-            .addReg(AMDGPU::EXEC);
+      .addImm(-7);
 
     // Restore EXEC
     BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
@@ -438,7 +438,6 @@ void SILowerControlFlowPass::IndirectSrc(MachineInstr &MI) {
   MachineInstr *MovRel =
     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
             .addReg(Reg)
-            .addReg(AMDGPU::M0, RegState::Implicit)
             .addReg(Vec, RegState::Implicit);
 
   LoadM0(MI, MovRel, Off);
@@ -460,7 +459,6 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
     BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32))
             .addReg(Reg, RegState::Define)
             .addReg(Val)
-            .addReg(AMDGPU::M0, RegState::Implicit)
             .addReg(Dst, RegState::Implicit);
 
   LoadM0(MI, MovRel, Off);
@@ -486,11 +484,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
       Next = std::next(I);
 
       MachineInstr &MI = *I;
-      if (TII->isWQM(MI.getOpcode()) || TII->isDS(MI.getOpcode()))
+      if (TII->isWQM(MI) || TII->isDS(MI))
         NeedWQM = true;
 
       // Flat uses m0 in case it needs to access LDS.
-      if (TII->isFLAT(MI.getOpcode()))
+      if (TII->isFLAT(MI))
         NeedFlat = true;
 
       switch (MI.getOpcode()) {
@@ -541,7 +539,11 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
           Branch(MI);
           break;
 
-        case AMDGPU::SI_INDIRECT_SRC:
+        case AMDGPU::SI_INDIRECT_SRC_V1:
+        case AMDGPU::SI_INDIRECT_SRC_V2:
+        case AMDGPU::SI_INDIRECT_SRC_V4:
+        case AMDGPU::SI_INDIRECT_SRC_V8:
+        case AMDGPU::SI_INDIRECT_SRC_V16:
           IndirectSrc(MI);
           break;
 
diff --git a/lib/Target/AMDGPU/SILowerI1Copies.cpp b/lib/Target/AMDGPU/SILowerI1Copies.cpp
index 67421e231d8d..a2fa5fd93aad 100644
--- a/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -48,6 +48,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 587ea63d6796..935aad427198 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -29,10 +29,114 @@ void SIMachineFunctionInfo::anchor() {}
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
     TIDReg(AMDGPU::NoRegister),
-    HasSpilledVGPRs(false),
+    ScratchRSrcReg(AMDGPU::NoRegister),
+    ScratchWaveOffsetReg(AMDGPU::NoRegister),
+    PrivateSegmentBufferUserSGPR(AMDGPU::NoRegister),
+    DispatchPtrUserSGPR(AMDGPU::NoRegister),
+    QueuePtrUserSGPR(AMDGPU::NoRegister),
+    KernargSegmentPtrUserSGPR(AMDGPU::NoRegister),
+    DispatchIDUserSGPR(AMDGPU::NoRegister),
+    FlatScratchInitUserSGPR(AMDGPU::NoRegister),
+    PrivateSegmentSizeUserSGPR(AMDGPU::NoRegister),
+    GridWorkGroupCountXUserSGPR(AMDGPU::NoRegister),
+    GridWorkGroupCountYUserSGPR(AMDGPU::NoRegister),
+    GridWorkGroupCountZUserSGPR(AMDGPU::NoRegister),
+    WorkGroupIDXSystemSGPR(AMDGPU::NoRegister),
+    WorkGroupIDYSystemSGPR(AMDGPU::NoRegister),
+    WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
+    WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
+    PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
+    LDSWaveSpillSize(0),
     PSInputAddr(0),
     NumUserSGPRs(0),
-    LDSWaveSpillSize(0) { }
+    NumSystemSGPRs(0),
+    HasSpilledSGPRs(false),
+    HasSpilledVGPRs(false),
+    PrivateSegmentBuffer(false),
+    DispatchPtr(false),
+    QueuePtr(false),
+    DispatchID(false),
+    KernargSegmentPtr(false),
+    FlatScratchInit(false),
+    GridWorkgroupCountX(false),
+    GridWorkgroupCountY(false),
+    GridWorkgroupCountZ(false),
+    WorkGroupIDX(true),
+    WorkGroupIDY(false),
+    WorkGroupIDZ(false),
+    WorkGroupInfo(false),
+    PrivateSegmentWaveByteOffset(false),
+    WorkItemIDX(true),
+    WorkItemIDY(false),
+    WorkItemIDZ(false) {
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  const Function *F = MF.getFunction();
+
+  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+
+  if (getShaderType() == ShaderType::COMPUTE)
+    KernargSegmentPtr = true;
+
+  if (F->hasFnAttribute("amdgpu-work-group-id-y"))
+    WorkGroupIDY = true;
+
+  if (F->hasFnAttribute("amdgpu-work-group-id-z"))
+    WorkGroupIDZ = true;
+
+  if (F->hasFnAttribute("amdgpu-work-item-id-y"))
+    WorkItemIDY = true;
+
+  if (F->hasFnAttribute("amdgpu-work-item-id-z"))
+    WorkItemIDZ = true;
+
+  bool MaySpill = ST.isVGPRSpillingEnabled(this);
+  bool HasStackObjects = FrameInfo->hasStackObjects();
+
+  if (HasStackObjects || MaySpill)
+    PrivateSegmentWaveByteOffset = true;
+
+  if (ST.isAmdHsaOS()) {
+    if (HasStackObjects || MaySpill)
+      PrivateSegmentBuffer = true;
+
+    if (F->hasFnAttribute("amdgpu-dispatch-ptr"))
+      DispatchPtr = true;
+  }
+
+  // X, XY, and XYZ are the only supported combinations, so make sure Y is
+  // enabled if Z is.
+  if (WorkItemIDZ)
+    WorkItemIDY = true;
+}
+
+unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
+  const SIRegisterInfo &TRI) {
+  PrivateSegmentBufferUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+  NumUserSGPRs += 4;
+  return PrivateSegmentBufferUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addDispatchPtr(const SIRegisterInfo &TRI) {
+  DispatchPtrUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return DispatchPtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addQueuePtr(const SIRegisterInfo &TRI) {
+  QueuePtrUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return QueuePtrUserSGPR;
+}
+
+unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI) {
+  KernargSegmentPtrUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return KernargSegmentPtrUserSGPR;
+}
 
 SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
                                                        MachineFunction *MF,
@@ -53,7 +157,6 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
   if (!LaneVGPRs.count(LaneVGPRIdx)) {
     unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
     LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
-    MRI.setPhysRegUsed(LaneVGPR);
 
     // Add this register as live-in to all blocks to avoid machine verifer
     // complaining about use of an undefined physical register.
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 667da4c8af61..9c528d63bd0e 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -26,13 +26,83 @@ class MachineRegisterInfo;
 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 /// tells the hardware which interpolation parameters to load.
 class SIMachineFunctionInfo : public AMDGPUMachineFunction {
+  // FIXME: This should be removed and getPreloadedValue moved here.
+  friend struct SIRegisterInfo;
   void anchor() override;
 
   unsigned TIDReg;
-  bool HasSpilledVGPRs;
+
+  // Registers that may be reserved for spilling purposes. These may be the same
+  // as the input registers.
+  unsigned ScratchRSrcReg;
+  unsigned ScratchWaveOffsetReg;
+
+  // Input registers setup for the HSA ABI.
+  // User SGPRs in allocation order.
+  unsigned PrivateSegmentBufferUserSGPR;
+  unsigned DispatchPtrUserSGPR;
+  unsigned QueuePtrUserSGPR;
+  unsigned KernargSegmentPtrUserSGPR;
+  unsigned DispatchIDUserSGPR;
+  unsigned FlatScratchInitUserSGPR;
+  unsigned PrivateSegmentSizeUserSGPR;
+  unsigned GridWorkGroupCountXUserSGPR;
+  unsigned GridWorkGroupCountYUserSGPR;
+  unsigned GridWorkGroupCountZUserSGPR;
+
+  // System SGPRs in allocation order.
+  unsigned WorkGroupIDXSystemSGPR;
+  unsigned WorkGroupIDYSystemSGPR;
+  unsigned WorkGroupIDZSystemSGPR;
+  unsigned WorkGroupInfoSystemSGPR;
+  unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
 
 public:
+  // FIXME: Make private
+  unsigned LDSWaveSpillSize;
+  unsigned PSInputAddr;
+  std::map<unsigned, unsigned> LaneVGPRs;
+  unsigned ScratchOffsetReg;
+  unsigned NumUserSGPRs;
+  unsigned NumSystemSGPRs;
 
+private:
+  bool HasSpilledSGPRs;
+  bool HasSpilledVGPRs;
+
+  // Feature bits required for inputs passed in user SGPRs.
+  bool PrivateSegmentBuffer : 1;
+  bool DispatchPtr : 1;
+  bool QueuePtr : 1;
+  bool DispatchID : 1;
+  bool KernargSegmentPtr : 1;
+  bool FlatScratchInit : 1;
+  bool GridWorkgroupCountX : 1;
+  bool GridWorkgroupCountY : 1;
+  bool GridWorkgroupCountZ : 1;
+
+  // Feature bits required for inputs passed in system SGPRs.
+  bool WorkGroupIDX : 1; // Always initialized.
+  bool WorkGroupIDY : 1;
+  bool WorkGroupIDZ : 1;
+  bool WorkGroupInfo : 1;
+  bool PrivateSegmentWaveByteOffset : 1;
+
+  bool WorkItemIDX : 1; // Always initialized.
+  bool WorkItemIDY : 1;
+  bool WorkItemIDZ : 1;
+
+
+  MCPhysReg getNextUserSGPR() const {
+    assert(NumSystemSGPRs == 0 && "System SGPRs must be added after user SGPRs");
+    return AMDGPU::SGPR0 + NumUserSGPRs;
+  }
+
+  MCPhysReg getNextSystemSGPR() const {
+    return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
+  }
+
+public:
   struct SpilledReg {
     unsigned VGPR;
     int Lane;
@@ -46,16 +116,162 @@ public:
   SIMachineFunctionInfo(const MachineFunction &MF);
   SpilledReg getSpilledReg(MachineFunction *MF, unsigned FrameIndex,
                            unsigned SubIdx);
-  unsigned PSInputAddr;
-  unsigned NumUserSGPRs;
-  std::map<unsigned, unsigned> LaneVGPRs;
-  unsigned LDSWaveSpillSize;
-  unsigned ScratchOffsetReg;
   bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
   unsigned getTIDReg() const { return TIDReg; };
   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
-  bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
-  void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
+
+  // Add user SGPRs.
+  unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
+  unsigned addDispatchPtr(const SIRegisterInfo &TRI);
+  unsigned addQueuePtr(const SIRegisterInfo &TRI);
+  unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
+
+  // Add system SGPRs.
+  unsigned addWorkGroupIDX() {
+    WorkGroupIDXSystemSGPR = getNextSystemSGPR();
+    NumSystemSGPRs += 1;
+    return WorkGroupIDXSystemSGPR;
+  }
+
+  unsigned addWorkGroupIDY() {
+    WorkGroupIDYSystemSGPR = getNextSystemSGPR();
+    NumSystemSGPRs += 1;
+    return WorkGroupIDYSystemSGPR;
+  }
+
+  unsigned addWorkGroupIDZ() {
+    WorkGroupIDZSystemSGPR = getNextSystemSGPR();
+    NumSystemSGPRs += 1;
+    return WorkGroupIDZSystemSGPR;
+  }
+
+  unsigned addWorkGroupInfo() {
+    WorkGroupInfoSystemSGPR = getNextSystemSGPR();
+    NumSystemSGPRs += 1;
+    return WorkGroupInfoSystemSGPR;
+  }
+
+  unsigned addPrivateSegmentWaveByteOffset() {
+    PrivateSegmentWaveByteOffsetSystemSGPR = getNextSystemSGPR();
+    NumSystemSGPRs += 1;
+    return PrivateSegmentWaveByteOffsetSystemSGPR;
+  }
+
+  bool hasPrivateSegmentBuffer() const {
+    return PrivateSegmentBuffer;
+  }
+
+  bool hasDispatchPtr() const {
+    return DispatchPtr;
+  }
+
+  bool hasQueuePtr() const {
+    return QueuePtr;
+  }
+
+  bool hasDispatchID() const {
+    return DispatchID;
+  }
+
+  bool hasKernargSegmentPtr() const {
+    return KernargSegmentPtr;
+  }
+
+  bool hasFlatScratchInit() const {
+    return FlatScratchInit;
+  }
+
+  bool hasGridWorkgroupCountX() const {
+    return GridWorkgroupCountX;
+  }
+
+  bool hasGridWorkgroupCountY() const {
+    return GridWorkgroupCountY;
+  }
+
+  bool hasGridWorkgroupCountZ() const {
+    return GridWorkgroupCountZ;
+  }
+
+  bool hasWorkGroupIDX() const {
+    return WorkGroupIDX;
+  }
+
+  bool hasWorkGroupIDY() const {
+    return WorkGroupIDY;
+  }
+
+  bool hasWorkGroupIDZ() const {
+    return WorkGroupIDZ;
+  }
+
+  bool hasWorkGroupInfo() const {
+    return WorkGroupInfo;
+  }
+
+  bool hasPrivateSegmentWaveByteOffset() const {
+    return PrivateSegmentWaveByteOffset;
+  }
+
+  bool hasWorkItemIDX() const {
+    return WorkItemIDX;
+  }
+
+  bool hasWorkItemIDY() const {
+    return WorkItemIDY;
+  }
+
+  bool hasWorkItemIDZ() const {
+    return WorkItemIDZ;
+  }
+
+  unsigned getNumUserSGPRs() const {
+    return NumUserSGPRs;
+  }
+
+  unsigned getNumPreloadedSGPRs() const {
+    return NumUserSGPRs + NumSystemSGPRs;
+  }
+
+  unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
+    return PrivateSegmentWaveByteOffsetSystemSGPR;
+  }
+
+  /// \brief Returns the physical register reserved for use as the resource
+  /// descriptor for scratch accesses.
+  unsigned getScratchRSrcReg() const {
+    return ScratchRSrcReg;
+  }
+
+  void setScratchRSrcReg(unsigned Reg) {
+    assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+    ScratchRSrcReg = Reg;
+  }
+
+  unsigned getScratchWaveOffsetReg() const {
+    return ScratchWaveOffsetReg;
+  }
+
+  void setScratchWaveOffsetReg(unsigned Reg) {
+    assert(Reg != AMDGPU::NoRegister && "Should never be unset");
+    ScratchWaveOffsetReg = Reg;
+  }
+
+  bool hasSpilledSGPRs() const {
+    return HasSpilledSGPRs;
+  }
+
+  void setHasSpilledSGPRs(bool Spill = true) {
+    HasSpilledSGPRs = Spill;
+  }
+
+  bool hasSpilledVGPRs() const {
+    return HasSpilledVGPRs;
+  }
+
+  void setHasSpilledVGPRs(bool Spill = true) {
+    HasSpilledVGPRs = Spill;
+  }
 
   unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
 };
diff --git a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp b/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
deleted file mode 100644
index 2cd600df2268..000000000000
--- a/lib/Target/AMDGPU/SIPrepareScratchRegs.cpp
+++ /dev/null
@@ -1,193 +0,0 @@
-//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-///
-/// This pass loads scratch pointer and scratch offset into a register or a
-/// frame index which can be used anywhere in the program.  These values will
-/// be used for spilling VGPRs.
-///
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "SIDefines.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
-
-using namespace llvm;
-
-namespace {
-
-class SIPrepareScratchRegs : public MachineFunctionPass {
-
-private:
-  static char ID;
-
-public:
-  SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "SI prepare scratch registers";
-  }
-
-};
-
-} // End anonymous namespace
-
-char SIPrepareScratchRegs::ID = 0;
-
-FunctionPass *llvm::createSIPrepareScratchRegs() {
-  return new SIPrepareScratchRegs();
-}
-
-bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
-  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  const SIInstrInfo *TII =
-      static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  const SIRegisterInfo *TRI = &TII->getRegisterInfo();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  MachineBasicBlock *Entry = MF.begin();
-  MachineBasicBlock::iterator I = Entry->begin();
-  DebugLoc DL = I->getDebugLoc();
-
-  // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
-  // run this pass.
-  if (!MFI->hasSpilledVGPRs())
-    return false;
-
-  unsigned ScratchPtrPreloadReg =
-      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
-  unsigned ScratchOffsetPreloadReg =
-      TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
-
-  if (!Entry->isLiveIn(ScratchPtrPreloadReg))
-    Entry->addLiveIn(ScratchPtrPreloadReg);
-
-  if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
-    Entry->addLiveIn(ScratchOffsetPreloadReg);
-
-  // Load the scratch offset.
-  unsigned ScratchOffsetReg =
-      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
-  int ScratchOffsetFI = -1;
-
-  if (ScratchOffsetReg != AMDGPU::NoRegister) {
-    // Found an SGPR to use
-    MRI.setPhysRegUsed(ScratchOffsetReg);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
-            .addReg(ScratchOffsetPreloadReg);
-  } else {
-    // No SGPR is available, we must spill.
-    ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
-    BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
-            .addReg(ScratchOffsetPreloadReg)
-            .addFrameIndex(ScratchOffsetFI)
-            .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
-            .addReg(AMDGPU::SGPR0, RegState::Undef);
-  }
-
-
-  // Now that we have the scratch pointer and offset values, we need to
-  // add them to all the SI_SPILL_V* instructions.
-
-  RegScavenger RS;
-  unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
-  RS.addScavengingFrameIndex(ScratchRsrcFI);
-
-  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
-       BI != BE; ++BI) {
-
-    MachineBasicBlock &MBB = *BI;
-    // Add the scratch offset reg as a live-in so that the register scavenger
-    // doesn't re-use it.
-    if (!MBB.isLiveIn(ScratchOffsetReg) &&
-        ScratchOffsetReg != AMDGPU::NoRegister)
-      MBB.addLiveIn(ScratchOffsetReg);
-    RS.enterBasicBlock(&MBB);
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-         I != E; ++I) {
-      MachineInstr &MI = *I;
-      RS.forward(I);
-      DebugLoc DL = MI.getDebugLoc();
-      if (!TII->isVGPRSpill(MI.getOpcode()))
-        continue;
-
-      // Scratch resource
-      unsigned ScratchRsrcReg =
-          RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
-
-      uint64_t Rsrc23 = TII->getScratchRsrcWords23();
-
-      unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
-      unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
-      unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
-      unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
-
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
-              .addExternalSymbol("SCRATCH_RSRC_DWORD0")
-              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
-              .addExternalSymbol("SCRATCH_RSRC_DWORD1")
-              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
-              .addImm(Rsrc23 & 0xffffffff)
-              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
-              .addImm(Rsrc23 >> 32)
-              .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
-
-      // Scratch Offset
-      if (ScratchOffsetReg == AMDGPU::NoRegister) {
-        ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
-        BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
-                ScratchOffsetReg)
-                .addFrameIndex(ScratchOffsetFI)
-                .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
-                .addReg(AMDGPU::SGPR0, RegState::Undef);
-      } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
-        MBB.addLiveIn(ScratchOffsetReg);
-      }
-
-      if (ScratchRsrcReg == AMDGPU::NoRegister ||
-          ScratchOffsetReg == AMDGPU::NoRegister) {
-        LLVMContext &Ctx = MF.getFunction()->getContext();
-        Ctx.emitError("ran out of SGPRs for spilling VGPRs");
-        ScratchRsrcReg = AMDGPU::SGPR0;
-        ScratchOffsetReg = AMDGPU::SGPR0;
-      }
-      MI.getOperand(2).setReg(ScratchRsrcReg);
-      MI.getOperand(2).setIsKill(true);
-      MI.getOperand(2).setIsUndef(false);
-      MI.getOperand(3).setReg(ScratchOffsetReg);
-      MI.getOperand(3).setIsUndef(false);
-      MI.getOperand(3).setIsKill(false);
-      MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
-      MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
-      MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
-      MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
-    }
-  }
-  return true;
-}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e9e8412e263d..3cdffef05583 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "SIRegisterInfo.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
@@ -33,6 +32,40 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
     Reserved.set(*R);
 }
 
+unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
+  const MachineFunction &MF) const {
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  if (ST.hasSGPRInitBug()) {
+    unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
+    unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
+    return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
+  }
+
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    // 98/99 need to be reserved for flat_scr, and 100/101 for vcc. This is the
+    // next sgpr128 down.
+    return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
+  }
+
+  return AMDGPU::SGPR96_SGPR97_SGPR98_SGPR99;
+}
+
+unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
+  const MachineFunction &MF) const {
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  if (ST.hasSGPRInitBug()) {
+    unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
+    return AMDGPU::SGPR_32RegClass.getRegister(Idx);
+  }
+
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    // Next register before reservations for flat_scr and vcc.
+    return AMDGPU::SGPR97;
+  }
+
+  return AMDGPU::SGPR95;
+}
+
 BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   Reserved.set(AMDGPU::INDIRECT_BASE_ADDR);
@@ -42,13 +75,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   reserveRegisterTuples(Reserved, AMDGPU::EXEC);
   reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
 
-  // Reserve some VGPRs to use as temp registers in case we have to spill VGPRs
-  reserveRegisterTuples(Reserved, AMDGPU::VGPR254);
-  reserveRegisterTuples(Reserved, AMDGPU::VGPR255);
+  // Reserve the last 2 registers so we will always have at least 2 more that
+  // will physically contain VCC.
+  reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103);
+
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    // SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
+    // for VCC/FLAT_SCR.
+    reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
+    reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
+  }
 
   // Tonga and Iceland can only allocate a fixed number of SGPRs due
   // to a hw bug.
-  if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
+  if (ST.hasSGPRInitBug()) {
     unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
     // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
     // Assume XNACK_MASK is unused.
@@ -60,34 +102,57 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+  if (ScratchWaveOffsetReg != AMDGPU::NoRegister) {
+    // Reserve 1 SGPR for scratch wave offset in case we need to spill.
+    reserveRegisterTuples(Reserved, ScratchWaveOffsetReg);
+  }
+
+  unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
+  if (ScratchRSrcReg != AMDGPU::NoRegister) {
+    // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
+    // to spill.
+    // TODO: May need to reserve a VGPR if doing LDS spilling.
+    reserveRegisterTuples(Reserved, ScratchRSrcReg);
+    assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
+  }
+
   return Reserved;
 }
 
 unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
                                                 unsigned Idx) const {
-
   const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
   // FIXME: We should adjust the max number of waves based on LDS size.
   unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
                                           STI.getMaxWavesPerCU());
   unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
 
+  unsigned VSLimit = SGPRLimit + VGPRLimit;
+
   for (regclass_iterator I = regclass_begin(), E = regclass_end();
        I != E; ++I) {
+    const TargetRegisterClass *RC = *I;
 
-    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
+    unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1);
     unsigned Limit;
 
-    if (isSGPRClass(*I)) {
+    if (isPseudoRegClass(RC)) {
+      // FIXME: This is a hack. We should never be considering the pressure of
+      // these since no virtual register should ever have this class.
+      Limit = VSLimit;
+    } else if (isSGPRClass(RC)) {
       Limit = SGPRLimit / NumSubRegs;
     } else {
       Limit = VGPRLimit / NumSubRegs;
     }
 
-    const int *Sets = getRegClassPressureSets(*I);
+    const int *Sets = getRegClassPressureSets(RC);
     assert(Sets);
     for (unsigned i = 0; Sets[i] != -1; ++i) {
-	    if (Sets[i] == (int)Idx)
+      if (Sets[i] == (int)Idx)
         return Limit;
     }
   }
@@ -174,17 +239,17 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
     unsigned SubReg = NumSubRegs > 1 ?
         getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
         Value;
-    bool IsKill = (i == e - 1);
 
     BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
-            .addReg(SubReg, getDefRegState(IsLoad))
-            .addReg(ScratchRsrcReg, getKillRegState(IsKill))
-            .addReg(SOffset)
-            .addImm(Offset)
-            .addImm(0) // glc
-            .addImm(0) // slc
-            .addImm(0) // tfe
-            .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+      .addReg(SubReg, getDefRegState(IsLoad))
+      .addReg(ScratchRsrcReg)
+      .addReg(SOffset)
+      .addImm(Offset)
+      .addImm(0) // glc
+      .addImm(0) // slc
+      .addImm(0) // tfe
+      .addReg(Value, RegState::Implicit | getDefRegState(IsLoad))
+      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
   }
 }
 
@@ -228,6 +293,9 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
                 .addReg(SubReg)
                 .addImm(Spill.Lane);
 
+        // FIXME: Since this spills to another register instead of an actual
+        // frame index, we should delete the frame index when all references to
+        // it are fixed.
       }
       MI->eraseFromParent();
       break;
@@ -263,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
       // TODO: only do this when it is needed
       switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
       case AMDGPUSubtarget::SOUTHERN_ISLANDS:
-        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
-        TII->insertNOPs(MI, 3);
+        // "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
+        // ("S_NOP 3") on SI
+        TII->insertWaitStates(MI, 4);
         break;
       case AMDGPUSubtarget::SEA_ISLANDS:
         break;
       default: // VOLCANIC_ISLANDS and later
-        // "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
-        // and later. This also applies to VALUs which write VCC, but we're
-        // unlikely to see VMEM use VCC.
-        TII->insertNOPs(MI, 4);
+        // "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
+        // ("S_NOP 4") on VI and later. This also applies to VALUs which write
+        // VCC, but we're unlikely to see VMEM use VCC.
+        TII->insertWaitStates(MI, 5);
       }
 
       MI->eraseFromParent();
@@ -322,22 +391,16 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   }
 }
 
-const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass(
-                                                                   MVT VT) const {
-  switch(VT.SimpleTy) {
-    default:
-    case MVT::i32: return &AMDGPU::VGPR_32RegClass;
-  }
-}
-
 unsigned SIRegisterInfo::getHWRegIndex(unsigned Reg) const {
   return getEncodingValue(Reg) & 0xff;
 }
 
+// FIXME: This is very slow. It might be worth creating a map from physreg to
+// register class.
 const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
   assert(!TargetRegisterInfo::isVirtualRegister(Reg));
 
-  static const TargetRegisterClass *BaseClasses[] = {
+  static const TargetRegisterClass *const BaseClasses[] = {
     &AMDGPU::VGPR_32RegClass,
     &AMDGPU::SReg_32RegClass,
     &AMDGPU::VReg_64RegClass,
@@ -359,33 +422,45 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
   return nullptr;
 }
 
+// TODO: It might be helpful to have some target specific flags in
+// TargetRegisterClass to mark which classes are VGPRs to make this trivial.
 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
-  return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) ||
-         getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) ||
-         getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) ||
-         getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) ||
-         getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) ||
-         getCommonSubClass(&AMDGPU::VReg_512RegClass, RC);
+  switch (RC->getSize()) {
+  case 4:
+    return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
+  case 8:
+    return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
+  case 12:
+    return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
+  case 16:
+    return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
+  case 32:
+    return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
+  case 64:
+    return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
+  default:
+    llvm_unreachable("Invalid register class size");
+  }
 }
 
 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
                                          const TargetRegisterClass *SRC) const {
-    if (hasVGPRs(SRC)) {
-      return SRC;
-    } else if (SRC == &AMDGPU::SCCRegRegClass) {
-      return &AMDGPU::VCCRegRegClass;
-    } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_32RegClass)) {
-      return &AMDGPU::VGPR_32RegClass;
-    } else if (getCommonSubClass(SRC, &AMDGPU::SGPR_64RegClass)) {
-      return &AMDGPU::VReg_64RegClass;
-    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_128RegClass)) {
-      return &AMDGPU::VReg_128RegClass;
-    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_256RegClass)) {
-      return &AMDGPU::VReg_256RegClass;
-    } else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
-      return &AMDGPU::VReg_512RegClass;
-    }
-    return nullptr;
+  switch (SRC->getSize()) {
+  case 4:
+    return &AMDGPU::VGPR_32RegClass;
+  case 8:
+    return &AMDGPU::VReg_64RegClass;
+  case 12:
+    return &AMDGPU::VReg_96RegClass;
+  case 16:
+    return &AMDGPU::VReg_128RegClass;
+  case 32:
+    return &AMDGPU::VReg_256RegClass;
+  case 64:
+    return &AMDGPU::VReg_512RegClass;
+  default:
+    llvm_unreachable("Invalid register class size");
+  }
 }
 
 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -402,6 +477,30 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
   }
 }
 
+bool SIRegisterInfo::shouldRewriteCopySrc(
+  const TargetRegisterClass *DefRC,
+  unsigned DefSubReg,
+  const TargetRegisterClass *SrcRC,
+  unsigned SrcSubReg) const {
+  // We want to prefer the smallest register class possible, so we don't want to
+  // stop and rewrite on anything that looks like a subregister
+  // extract. Operations mostly don't care about the super register class, so we
+  // only want to stop on the most basic of copies between the smae register
+  // class.
+  //
+  // e.g. if we have something like
+  // vreg0 = ...
+  // vreg1 = ...
+  // vreg2 = REG_SEQUENCE vreg0, sub0, vreg1, sub1, vreg2, sub2
+  // vreg3 = COPY vreg2, sub0
+  //
+  // We want to look through the COPY to find:
+  //  => vreg3 = COPY vreg0
+
+  // Plain copy.
+  return getCommonSubClass(DefRC, SrcRC) != nullptr;
+}
+
 unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
                                           const TargetRegisterClass *SubRC,
                                           unsigned Channel) const {
@@ -462,30 +561,47 @@ bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
   return OpType == AMDGPU::OPERAND_REG_INLINE_C;
 }
 
+// FIXME: Most of these are flexible with HSA and we don't need to reserve them
+// as input registers if unused. Whether the dispatch ptr is necessary should be
+// easy to detect from used intrinsics. Scratch setup is harder to know.
 unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
                                            enum PreloadedValue Value) const {
 
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
+  (void)ST;
   switch (Value) {
-  case SIRegisterInfo::TGID_X:
-    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 0);
-  case SIRegisterInfo::TGID_Y:
-    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 1);
-  case SIRegisterInfo::TGID_Z:
-    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 2);
-  case SIRegisterInfo::SCRATCH_WAVE_OFFSET:
-    if (MFI->getShaderType() != ShaderType::COMPUTE)
-      return MFI->ScratchOffsetReg;
-    return AMDGPU::SReg_32RegClass.getRegister(MFI->NumUserSGPRs + 4);
-  case SIRegisterInfo::SCRATCH_PTR:
-    return AMDGPU::SGPR2_SGPR3;
-  case SIRegisterInfo::INPUT_PTR:
-    return AMDGPU::SGPR0_SGPR1;
-  case SIRegisterInfo::TIDIG_X:
+  case SIRegisterInfo::WORKGROUP_ID_X:
+    assert(MFI->hasWorkGroupIDX());
+    return MFI->WorkGroupIDXSystemSGPR;
+  case SIRegisterInfo::WORKGROUP_ID_Y:
+    assert(MFI->hasWorkGroupIDY());
+    return MFI->WorkGroupIDYSystemSGPR;
+  case SIRegisterInfo::WORKGROUP_ID_Z:
+    assert(MFI->hasWorkGroupIDZ());
+    return MFI->WorkGroupIDZSystemSGPR;
+  case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
+    return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
+  case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
+    assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations");
+    assert(MFI->hasPrivateSegmentBuffer());
+    return MFI->PrivateSegmentBufferUserSGPR;
+  case SIRegisterInfo::KERNARG_SEGMENT_PTR:
+    assert(MFI->hasKernargSegmentPtr());
+    return MFI->KernargSegmentPtrUserSGPR;
+  case SIRegisterInfo::DISPATCH_PTR:
+    assert(MFI->hasDispatchPtr());
+    return MFI->DispatchPtrUserSGPR;
+  case SIRegisterInfo::QUEUE_PTR:
+    llvm_unreachable("not implemented");
+  case SIRegisterInfo::WORKITEM_ID_X:
+    assert(MFI->hasWorkItemIDX());
     return AMDGPU::VGPR0;
-  case SIRegisterInfo::TIDIG_Y:
+  case SIRegisterInfo::WORKITEM_ID_Y:
+    assert(MFI->hasWorkItemIDY());
     return AMDGPU::VGPR1;
-  case SIRegisterInfo::TIDIG_Z:
+  case SIRegisterInfo::WORKITEM_ID_Z:
+    assert(MFI->hasWorkItemIDZ());
     return AMDGPU::VGPR2;
   }
   llvm_unreachable("unexpected preloaded value type");
@@ -496,12 +612,9 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
 //         AMDGPU::NoRegister.
 unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
                                            const TargetRegisterClass *RC) const {
-
-  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
-       I != E; ++I) {
-    if (!MRI.isPhysRegUsed(*I))
-      return *I;
-  }
+  for (unsigned Reg : *RC)
+    if (!MRI.isPhysRegUsed(Reg))
+      return Reg;
   return AMDGPU::NoRegister;
 }
 
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 7da6de282c11..1795237c2140 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -18,6 +18,7 @@
 
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 
 namespace llvm {
@@ -29,6 +30,15 @@ private:
 public:
   SIRegisterInfo();
 
+  /// Return the end register initially reserved for the scratch buffer in case
+  /// spilling is needed.
+  unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
+
+  /// Return the end register initially reserved for the scratch wave offset in
+  /// case spilling is needed.
+  unsigned reservedPrivateSegmentWaveByteOffsetReg(
+    const MachineFunction &MF) const;
+
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
   unsigned getRegPressureSetLimit(const MachineFunction &MF,
@@ -40,10 +50,6 @@ public:
                            unsigned FIOperandNum,
                            RegScavenger *RS) const override;
 
-  /// \brief get the register class of the specified type to use in the
-  /// CFGStructurizer
-  const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
-
   unsigned getHWRegIndex(unsigned Reg) const override;
 
   /// \brief Return the 'base' register class for this register.
@@ -52,23 +58,30 @@ public:
 
   /// \returns true if this class contains only SGPR registers
   bool isSGPRClass(const TargetRegisterClass *RC) const {
-    if (!RC)
-      return false;
-
     return !hasVGPRs(RC);
   }
 
   /// \returns true if this class ID contains only SGPR registers
   bool isSGPRClassID(unsigned RCID) const {
-    if (static_cast<int>(RCID) == -1)
-      return false;
-
     return isSGPRClass(getRegClass(RCID));
   }
 
+  bool isSGPRReg(const MachineRegisterInfo &MRI, unsigned Reg) const {
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return isSGPRClass(MRI.getRegClass(Reg));
+    return getPhysRegClass(Reg);
+  }
+
   /// \returns true if this class contains VGPR registers.
   bool hasVGPRs(const TargetRegisterClass *RC) const;
 
+  /// returns true if this is a pseudoregister class combination of VGPRs and
+  /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on
+  /// them.
+  static bool isPseudoRegClass(const TargetRegisterClass *RC) {
+    return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass;
+  }
+
   /// \returns A VGPR reg class with the same width as \p SRC
   const TargetRegisterClass *getEquivalentVGPRClass(
                                           const TargetRegisterClass *SRC) const;
@@ -79,6 +92,11 @@ public:
   const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
                                             unsigned SubIdx) const;
 
+  bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
+                            unsigned DefSubReg,
+                            const TargetRegisterClass *SrcRC,
+                            unsigned SrcSubReg) const override;
+
   /// \p Channel This is the register channel (e.g. a value from 0-16), not the
   ///            SubReg index.
   /// \returns The sub-register of Reg that is in Channel.
@@ -91,19 +109,25 @@ public:
 
   /// \returns True if operands defined with this operand type can accept
   /// an inline constant. i.e. An integer value in the range (-16, 64) or
-  /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f. 
+  /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
   bool opCanUseInlineConstant(unsigned OpType) const;
 
   enum PreloadedValue {
-    TGID_X,
-    TGID_Y,
-    TGID_Z,
-    SCRATCH_WAVE_OFFSET,
-    SCRATCH_PTR,
-    INPUT_PTR,
-    TIDIG_X,
-    TIDIG_Y,
-    TIDIG_Z
+    // SGPRS:
+    PRIVATE_SEGMENT_BUFFER =  0,
+    DISPATCH_PTR        =  1,
+    QUEUE_PTR           =  2,
+    KERNARG_SEGMENT_PTR =  3,
+    WORKGROUP_ID_X      = 10,
+    WORKGROUP_ID_Y      = 11,
+    WORKGROUP_ID_Z      = 12,
+    PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
+
+    // VGPRS:
+    FIRST_VGPR_VALUE    = 15,
+    WORKITEM_ID_X       = FIRST_VGPR_VALUE,
+    WORKITEM_ID_Y       = 16,
+    WORKITEM_ID_Z       = 17
   };
 
   /// \brief Returns the physical register that \p Value is stored in.
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.td b/lib/Target/AMDGPU/SIRegisterInfo.td
index 2a9017fa2a98..bfaf93709d8c 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -10,10 +10,13 @@
 //===----------------------------------------------------------------------===//
 //  Declarations that describe the SI registers
 //===----------------------------------------------------------------------===//
-
-class SIReg <string n, bits<16> encoding = 0> : Register<n> {
+class SIReg <string n, bits<16> regIdx = 0> : Register<n>,
+  DwarfRegNum<[!cast<int>(HWEncoding)]> {
   let Namespace = "AMDGPU";
-  let HWEncoding = encoding;
+
+  // This is the not yet the complete register encoding. An additional
+  // bit is set for VGPRs.
+  let HWEncoding = regIdx;
 }
 
 // Special Registers
@@ -21,7 +24,8 @@ def VCC_LO : SIReg<"vcc_lo", 106>;
 def VCC_HI : SIReg<"vcc_hi", 107>;
 
 // VCC for 64-bit instructions
-def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
+def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]>,
+          DwarfRegAlias<VCC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = 106;
@@ -30,7 +34,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
 def EXEC_LO : SIReg<"exec_lo", 126>;
 def EXEC_HI : SIReg<"exec_hi", 127>;
 
-def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
+def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
+           DwarfRegAlias<EXEC_LO> {
   let Namespace = "AMDGPU";
   let SubRegIndices = [sub0, sub1];
   let HWEncoding = 126;
@@ -39,18 +44,29 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]> {
 def SCC : SIReg<"scc", 253>;
 def M0 : SIReg <"m0", 124>;
 
-def FLAT_SCR_LO : SIReg<"flat_scr_lo", 104>; // Offset in units of 256-bytes.
-def FLAT_SCR_HI : SIReg<"flat_scr_hi", 105>; // Size is the per-thread scratch size, in bytes.
-
-// Pair to indicate location of scratch space for flat accesses.
-def FLAT_SCR : RegisterWithSubRegs <"flat_scr", [FLAT_SCR_LO, FLAT_SCR_HI]> {
-  let Namespace = "AMDGPU";
-  let SubRegIndices = [sub0, sub1];
-  let HWEncoding = 104;
+multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
+  def _ci : SIReg<n, ci_e>;
+  def _vi : SIReg<n, vi_e>;
+  def "" : SIReg<"", 0>;
 }
 
+class FlatReg <Register lo, Register hi, bits<16> encoding> :
+    RegisterWithSubRegs<"flat_scratch", [lo, hi]>,
+    DwarfRegAlias<lo> {
+  let Namespace = "AMDGPU";
+  let SubRegIndices = [sub0, sub1];
+  let HWEncoding = encoding;
+}
+
+defm FLAT_SCR_LO : FLAT_SCR_LOHI_m<"flat_scratch_lo", 104, 102>; // Offset in units of 256-bytes.
+defm FLAT_SCR_HI : FLAT_SCR_LOHI_m<"flat_scratch_hi", 105, 103>; // Size is the per-thread scratch size, in bytes.
+
+def FLAT_SCR_ci : FlatReg<FLAT_SCR_LO_ci, FLAT_SCR_HI_ci, 104>;
+def FLAT_SCR_vi : FlatReg<FLAT_SCR_LO_vi, FLAT_SCR_HI_vi, 102>;
+def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
+
 // SGPR registers
-foreach Index = 0-101 in {
+foreach Index = 0-103 in {
   def SGPR#Index : SIReg <"SGPR"#Index, Index>;
 }
 
@@ -65,25 +81,27 @@ foreach Index = 0-255 in {
 //  Groupings using register classes and tuples
 //===----------------------------------------------------------------------===//
 
+// TODO: Do we need to set DwarfRegAlias on register tuples?
+
 // SGPR 32-bit registers
 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
-                            (add (sequence "SGPR%u", 0, 101))>;
+                            (add (sequence "SGPR%u", 0, 103))>;
 
 // SGPR 64-bit registers
 def SGPR_64Regs : RegisterTuples<[sub0, sub1],
-                             [(add (decimate (trunc SGPR_32, 101), 2)),
+                             [(add (decimate SGPR_32, 2)),
                               (add (decimate (shl SGPR_32, 1), 2))]>;
 
 // SGPR 128-bit registers
 def SGPR_128 : RegisterTuples<[sub0, sub1, sub2, sub3],
-                              [(add (decimate (trunc SGPR_32, 99), 4)),
+                              [(add (decimate SGPR_32, 4)),
                                (add (decimate (shl SGPR_32, 1), 4)),
                                (add (decimate (shl SGPR_32, 2), 4)),
                                (add (decimate (shl SGPR_32, 3), 4))]>;
 
 // SGPR 256-bit registers
 def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
-                              [(add (decimate (trunc SGPR_32, 95), 4)),
+                              [(add (decimate SGPR_32, 4)),
                                (add (decimate (shl SGPR_32, 1), 4)),
                                (add (decimate (shl SGPR_32, 2), 4)),
                                (add (decimate (shl SGPR_32, 3), 4)),
@@ -95,7 +113,7 @@ def SGPR_256 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7],
 // SGPR 512-bit registers
 def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
                                sub8, sub9, sub10, sub11, sub12, sub13, sub14, sub15],
-                              [(add (decimate (trunc SGPR_32, 87), 4)),
+                              [(add (decimate SGPR_32, 4)),
                                (add (decimate (shl SGPR_32, 1), 4)),
                                (add (decimate (shl SGPR_32, 2), 4)),
                                (add (decimate (shl SGPR_32, 3), 4)),
@@ -174,44 +192,57 @@ class RegImmMatcher<string name> : AsmOperandClass {
   let RenderMethod = "addRegOrImmOperands";
 }
 
-// Special register classes for predicates and the M0 register
-def SCCReg : RegisterClass<"AMDGPU", [i32, i1], 32, (add SCC)> {
-  let CopyCost = -1; // Theoretically it is possible to read from SCC,
-                     // but it should never be necessary.
-}
-
-def VCCReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add VCC)>;
-def EXECReg : RegisterClass<"AMDGPU", [i64, i1], 64, (add EXEC)>;
-
 // Register class for all scalar registers (SGPRs + Special Registers)
 def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
   (add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
 >;
 
-def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 64, (add SGPR_64Regs)>;
+def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;
 
-def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 64,
-  (add SGPR_64, VCCReg, EXECReg, FLAT_SCR)
+def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
+  (add SGPR_64, VCC, EXEC, FLAT_SCR)
 >;
 
-def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>;
-
-def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
-
-def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 512, (add SGPR_512)>;
-
-// Register class for all vector registers (VGPRs + Interploation Registers)
-def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 64, (add VGPR_64)>;
-
-def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
-  let Size = 96;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
+  // Requires 2 s_mov_b64 to copy
+  let CopyCost = 2;
 }
 
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
+def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add SGPR_256)> {
+  // Requires 4 s_mov_b64 to copy
+  let CopyCost = 4;
+}
 
-def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
+def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
+  // Requires 8 s_mov_b64 to copy
+  let CopyCost = 8;
+}
 
-def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+// Register class for all vector registers (VGPRs + Interploation Registers)
+def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
+  // Requires 2 v_mov_b32 to copy
+  let CopyCost = 2;
+}
+
+def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
+  let Size = 96;
+
+  // Requires 3 v_mov_b32 to copy
+  let CopyCost = 3;
+}
+
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+  // Requires 4 v_mov_b32 to copy
+  let CopyCost = 4;
+}
+
+def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 32, (add VGPR_256)> {
+  let CopyCost = 8;
+}
+
+def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+  let CopyCost = 16;
+}
 
 def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
   let Size = 32;
@@ -253,7 +284,9 @@ def SCSrc_32 : RegInlineOperand<SReg_32> {
 
 def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
 
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+  let CopyCost = 2;
+}
 
 def VSrc_32 : RegisterOperand<VS_32> {
   let OperandNamespace = "AMDGPU";
@@ -282,3 +315,13 @@ def VCSrc_64 : RegisterOperand<VS_64> {
   let OperandType = "OPERAND_REG_INLINE_C";
   let ParserMatchClass = RegImmMatcher<"VCSrc64">;
 }
+
+//===----------------------------------------------------------------------===//
+//  SCSrc_* Operands with an SGPR or an inline constant
+//===----------------------------------------------------------------------===//
+
+def SCSrc_64 : RegisterOperand<SReg_64> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_REG_INLINE_C";
+  let ParserMatchClass = RegImmMatcher<"SCSrc64">;
+}
diff --git a/lib/Target/AMDGPU/SISchedule.td b/lib/Target/AMDGPU/SISchedule.td
index 9b1f676020bf..cd77e519abb2 100644
--- a/lib/Target/AMDGPU/SISchedule.td
+++ b/lib/Target/AMDGPU/SISchedule.td
@@ -17,16 +17,28 @@ def WriteLDS    : SchedWrite;
 def WriteSALU   : SchedWrite;
 def WriteSMEM   : SchedWrite;
 def WriteVMEM   : SchedWrite;
+def WriteBarrier : SchedWrite;
 
 // Vector ALU instructions
 def Write32Bit         : SchedWrite;
 def WriteQuarterRate32 : SchedWrite;
+def WriteFullOrQuarterRate32 : SchedWrite;
 
 def WriteFloatFMA   : SchedWrite;
 
-def WriteDouble     : SchedWrite;
+// Slow quarter rate f64 instruction.
+def WriteDouble : SchedWrite;
+
+// half rate f64 instruction (same as v_add_f64)
 def WriteDoubleAdd  : SchedWrite;
 
+// Half rate 64-bit instructions.
+def Write64Bit : SchedWrite;
+
+// FIXME: Should there be a class for instructions which are VALU
+// instructions and have VALU rates, but write to the SALU (i.e. VOPC
+// instructions)
+
 def SIFullSpeedModel : SchedMachineModel;
 def SIQuarterSpeedModel : SchedMachineModel;
 
@@ -53,7 +65,7 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
 
 
 // The latency numbers are taken from AMD Accelerated Parallel Processing
-// guide.  They may not be acurate.
+// guide. They may not be accurate.
 
 // The latency values are 1 / (operations / cycle) / 4.
 multiclass SICommonWriteRes {
@@ -64,8 +76,10 @@ multiclass SICommonWriteRes {
   def : HWWriteRes<WriteSALU,    [HWSALU],     1>;
   def : HWWriteRes<WriteSMEM,    [HWLGKM],    10>; // XXX: Guessed ???
   def : HWWriteRes<WriteVMEM,    [HWVMEM],   450>; // 300 - 600
+  def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
 
   def : HWVALUWriteRes<Write32Bit,         1>;
+  def : HWVALUWriteRes<Write64Bit,         2>;
   def : HWVALUWriteRes<WriteQuarterRate32, 4>;
 }
 
diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 5d00bdd6a9bb..4f0913fe62f2 100644
--- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -141,8 +141,7 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
   if (!MRI.isSSA())
     return;
 
-  assert(TII->isVOP1(MI.getOpcode()) || TII->isVOP2(MI.getOpcode()) ||
-         TII->isVOPC(MI.getOpcode()));
+  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
 
   const SIRegisterInfo &TRI = TII->getRegisterInfo();
   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
@@ -187,6 +186,21 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
 
 }
 
+// Copy MachineOperand with all flags except setting it as implicit.
+static MachineOperand copyRegOperandAsImplicit(const MachineOperand &Orig) {
+  assert(!Orig.isImplicit());
+  return MachineOperand::CreateReg(Orig.getReg(),
+                                   Orig.isDef(),
+                                   true,
+                                   Orig.isKill(),
+                                   Orig.isDead(),
+                                   Orig.isUndef(),
+                                   Orig.isEarlyClobber(),
+                                   Orig.getSubReg(),
+                                   Orig.isDebug(),
+                                   Orig.isInternalRead());
+}
+
 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIInstrInfo *TII =
@@ -236,14 +250,10 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       if (TII->isVOPC(Op32)) {
         unsigned DstReg = MI.getOperand(0).getReg();
         if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-          // VOPC instructions can only write to the VCC register.  We can't
-          // force them to use VCC here, because the register allocator has
-          // trouble with sequences like this, which cause the allocator to run
-          // out of registers if vreg0 and vreg1 belong to the VCCReg register
-          // class:
-          // vreg0 = VOPC;
-          // vreg1 = VOPC;
-          // S_AND_B64 vreg0, vreg1
+          // VOPC instructions can only write to the VCC register. We can't
+          // force them to use VCC here, because this is only one register and
+          // cannot deal with sequences which would require multiple copies of
+          // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
           //
           // So, instead of forcing the instruction to write to VCC, we provide
           // a hint to the register allocator to use VCC and then we we will run
@@ -272,13 +282,22 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // We can shrink this instruction
-      DEBUG(dbgs() << "Shrinking "; MI.dump(); dbgs() << '\n';);
+      DEBUG(dbgs() << "Shrinking " << MI);
 
       MachineInstrBuilder Inst32 =
           BuildMI(MBB, I, MI.getDebugLoc(), TII->get(Op32));
 
-      // dst
-      Inst32.addOperand(MI.getOperand(0));
+      // Add the dst operand if the 32-bit encoding also has an explicit $dst.
+      // For VOPC instructions, this is replaced by an implicit def of vcc.
+      int Op32DstIdx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::dst);
+      if (Op32DstIdx != -1) {
+        // dst
+        Inst32.addOperand(MI.getOperand(0));
+      } else {
+        assert(MI.getOperand(0).getReg() == AMDGPU::VCC &&
+               "Unexpected case");
+      }
+
 
       Inst32.addOperand(*TII->getNamedOperand(MI, AMDGPU::OpName::src0));
 
@@ -288,9 +307,19 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         Inst32.addOperand(*Src1);
 
       const MachineOperand *Src2 =
-          TII->getNamedOperand(MI, AMDGPU::OpName::src2);
-      if (Src2)
-        Inst32.addOperand(*Src2);
+        TII->getNamedOperand(MI, AMDGPU::OpName::src2);
+      if (Src2) {
+        int Op32Src2Idx = AMDGPU::getNamedOperandIdx(Op32, AMDGPU::OpName::src2);
+        if (Op32Src2Idx != -1) {
+          Inst32.addOperand(*Src2);
+        } else {
+          // In the case of V_CNDMASK_B32_e32, the explicit operand src2 is
+          // replaced with an implicit read of vcc.
+          assert(Src2->getReg() == AMDGPU::VCC &&
+                 "Unexpected missing register operand");
+          Inst32.addOperand(copyRegOperandAsImplicit(*Src2));
+        }
+      }
 
       ++NumInstructionsShrunk;
       MI.eraseFromParent();
diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp
index 591ce857cc7d..dbdc76b917f3 100644
--- a/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -22,6 +22,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 
@@ -61,14 +62,7 @@ bool SITypeRewriter::doInitialization(Module &M) {
 }
 
 bool SITypeRewriter::runOnFunction(Function &F) {
-  Attribute A = F.getFnAttribute("ShaderType");
-
-  unsigned ShaderType = ShaderType::COMPUTE;
-  if (A.isStringAttribute()) {
-    StringRef Str = A.getValueAsString();
-    Str.getAsInteger(0, ShaderType);
-  }
-  if (ShaderType == ShaderType::COMPUTE)
+  if (AMDGPU::getShaderType(F) == ShaderType::COMPUTE)
     return false;
 
   visit(F);
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b76b4007003f..add415e215cf 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -7,12 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 #include "AMDGPUBaseInfo.h"
+#include "AMDGPU.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
 
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef GET_SUBTARGETINFO_ENUM
 
+#define GET_REGINFO_ENUM
+#include "AMDGPUGenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
+
 namespace llvm {
 namespace AMDGPU {
 
@@ -56,5 +67,91 @@ void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
   Header.private_segment_alignment = 4;
 }
 
+MCSection *getHSATextSection(MCContext &Ctx) {
+  return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
+                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                           ELF::SHF_EXECINSTR |
+                           ELF::SHF_AMDGPU_HSA_AGENT |
+                           ELF::SHF_AMDGPU_HSA_CODE);
+}
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
+  return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
+                           ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                           ELF::SHF_AMDGPU_HSA_GLOBAL |
+                           ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
+  return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
+                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                            ELF::SHF_AMDGPU_HSA_GLOBAL);
+}
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
+  return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
+                           ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
+                           ELF::SHF_AMDGPU_HSA_AGENT);
+}
+
+bool isGroupSegment(const GlobalValue *GV) {
+  return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
+}
+
+bool isGlobalSegment(const GlobalValue *GV) {
+  return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+}
+
+bool isReadOnlySegment(const GlobalValue *GV) {
+  return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
+}
+
+static const char ShaderTypeAttribute[] = "ShaderType";
+
+unsigned getShaderType(const Function &F) {
+  Attribute A = F.getFnAttribute(ShaderTypeAttribute);
+  unsigned ShaderType = ShaderType::COMPUTE;
+
+  if (A.isStringAttribute()) {
+    StringRef Str = A.getValueAsString();
+    if (Str.getAsInteger(0, ShaderType)) {
+      LLVMContext &Ctx = F.getContext();
+      Ctx.emitError("can't parse shader type");
+    }
+  }
+  return ShaderType;
+}
+
+bool isSI(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
+}
+
+bool isCI(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
+}
+
+bool isVI(const MCSubtargetInfo &STI) {
+  return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+}
+
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+
+  switch(Reg) {
+  default: break;
+  case AMDGPU::FLAT_SCR:
+    assert(!isSI(STI));
+    return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
+
+  case AMDGPU::FLAT_SCR_LO:
+    assert(!isSI(STI));
+    return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
+
+  case AMDGPU::FLAT_SCR_HI:
+    assert(!isSI(STI));
+    return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
+  }
+  return Reg;
+}
+
 } // End namespace AMDGPU
 } // End namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f57028cc5bfd..19419a29f5e0 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -15,6 +15,11 @@
 namespace llvm {
 
 class FeatureBitset;
+class Function;
+class GlobalValue;
+class MCContext;
+class MCSection;
+class MCSubtargetInfo;
 
 namespace AMDGPU {
 
@@ -27,6 +32,27 @@ struct IsaVersion {
 IsaVersion getIsaVersion(const FeatureBitset &Features);
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);
+MCSection *getHSATextSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalAgentSection(MCContext &Ctx);
+
+MCSection *getHSADataGlobalProgramSection(MCContext &Ctx);
+
+MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx);
+
+bool isGroupSegment(const GlobalValue *GV);
+bool isGlobalSegment(const GlobalValue *GV);
+bool isReadOnlySegment(const GlobalValue *GV);
+
+unsigned getShaderType(const Function &F);
+
+bool isSI(const MCSubtargetInfo &STI);
+bool isCI(const MCSubtargetInfo &STI);
+bool isVI(const MCSubtargetInfo &STI);
+
+/// If \p Reg is a pseudo reg, return the correct hardware register given
+/// \p STI otherwise return \p Reg.
+unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 
 } // end namespace AMDGPU
 } // end namespace llvm
diff --git a/lib/Target/AMDGPU/Utils/LLVMBuild.txt b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
index dec5360e3bc7..2453bc546b99 100644
--- a/lib/Target/AMDGPU/Utils/LLVMBuild.txt
+++ b/lib/Target/AMDGPU/Utils/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = AMDGPUUtils
 parent = AMDGPU
-required_libraries = Support
+required_libraries = Core MC Support
 add_to_library_groups = AMDGPU
diff --git a/lib/Target/AMDGPU/VIInstructions.td b/lib/Target/AMDGPU/VIInstructions.td
index aca46732adb9..20a026a822e2 100644
--- a/lib/Target/AMDGPU/VIInstructions.td
+++ b/lib/Target/AMDGPU/VIInstructions.td
@@ -73,8 +73,8 @@ defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
 } // End isCommutable = 1
 defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
 
-// Aliases to simplify matching of floating-pint instructions that are VOP2 on
-// SI and VOP3 on VI.
+// Aliases to simplify matching of floating-point instructions that
+// are VOP2 on SI and VOP3 on VI.
 
 class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
   name#" $dst, $src0, $src1",
@@ -89,60 +89,15 @@ def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
 def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
 def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
 
+//===----------------------------------------------------------------------===//
+// SMEM Instructions
+//===----------------------------------------------------------------------===//
+
+def S_DCACHE_WB : SMEM_Inval <0x21,
+  "s_dcache_wb", int_amdgcn_s_dcache_wb>;
+
+def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
+  "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+
 } // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
 
-//===----------------------------------------------------------------------===//
-// SMEM Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isVI] in {
-
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
-  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
-
-// Patterns for global loads with no offset
-class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (vt (node i64:$addr)),
-  (inst $addr, 0, 0, 0)
->;
-
-def : FlatLoadPat <FLAT_LOAD_UBYTE, az_extloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, az_extloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_global, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, global_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, global_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, global_load, v4i32>;
-
-class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (node vt:$data, i64:$addr),
-  (inst $data, $addr, 0, 0, 0)
->;
-
-def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_global, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_global, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, global_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, global_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, global_store, v4i32>;
-
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (vt (node i64:$addr, vt:$data)),
-  (inst $addr, $data, 0, 0)
->;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
-
-
-} // End Predicates = [isVI]
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 9550a3a3cad1..cd7540e52410 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -35,7 +35,6 @@ FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
 FunctionPass *createA15SDOptimizerPass();
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
 FunctionPass *createARMExpandPseudoPass();
-FunctionPass *createARMGlobalBaseRegPass();
 FunctionPass *createARMConstantIslandPass();
 FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index ef609a66d032..a44dc830a673 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -16,6 +16,17 @@
 
 include "llvm/Target/Target.td"
 
+//===----------------------------------------------------------------------===//
+// ARM Helper classes.
+//
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+class Architecture<string fname, string aname, list<SubtargetFeature> features >
+  : SubtargetFeature<fname, "ARMArch", aname,
+                     !strconcat(aname, " architecture"), features>;
+
 //===----------------------------------------------------------------------===//
 // ARM Subtarget state.
 //
@@ -51,8 +62,11 @@ def FeatureVFP4   : SubtargetFeature<"vfp4", "HasVFPv4", "true",
 def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8",
                                    "true", "Enable ARMv8 FP",
                                    [FeatureVFP4]>;
+def FeatureFullFP16 : SubtargetFeature<"fullfp16", "HasFullFP16", "true",
+                                       "Enable full half-precision floating point",
+                                       [FeatureFPARMv8]>;
 def FeatureD16    : SubtargetFeature<"d16", "HasD16", "true",
-                                     "Restrict VFP3 to 16 double registers">;
+                                     "Restrict FP to 16 double registers">;
 def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
                                      "Enable divide instructions">;
 def FeatureHWDivARM  : SubtargetFeature<"hwdiv-arm",
@@ -119,9 +133,9 @@ def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
 def FeatureHasRAS : SubtargetFeature<"ras", "HasRAS", "true",
                                      "Has return address stack">;
 
-/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
-def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
-                                 "Supports v7 DSP instructions in Thumb2">;
+/// DSP extension.
+def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true",
+                              "Supports DSP instructions in ARM and/or Thumb2">;
 
 // Multiprocessing extension.
 def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
@@ -150,11 +164,28 @@ def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass",
 def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true",
                                        "NaCl trap">;
 
+def FeatureStrictAlign : SubtargetFeature<"strict-align",
+                                          "StrictAlign", "true",
+                                          "Disallow all unaligned memory "
+                                          "access">;
+
 def FeatureLongCalls : SubtargetFeature<"long-calls", "GenLongCalls", "true",
                                         "Generate calls via indirect call "
                                         "instructions">;
 
-// ARM ISAs.
+def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true",
+                                        "Reserve R9, making it unavailable as "
+                                        "GPR">;
+
+def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true",
+                                     "Don't use movt/movw pairs for 32-bit "
+                                     "imms">;
+
+
+//===----------------------------------------------------------------------===//
+// ARM ISAa.
+//
+
 def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
                                    "Support ARM v4T instructions">;
 def HasV5TOps   : SubtargetFeature<"v5t", "HasV5TOps", "true",
@@ -180,302 +211,444 @@ def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
                                    [HasV6T2Ops, FeaturePerfMon]>;
 def HasV8Ops    : SubtargetFeature<"v8", "HasV8Ops", "true",
                                    "Support ARM v8 instructions",
-                                   [HasV7Ops, FeatureVirtualization,
-                                    FeatureMP]>;
+                                   [HasV7Ops]>;
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
                                    "Support ARM v8.1a instructions",
-                                   [HasV8Ops, FeatureAClass, FeatureCRC]>;
+                                   [HasV8Ops]>;
+def HasV8_2aOps   : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
+                                   "Support ARM v8.2a instructions",
+                                   [HasV8_1aOps]>;
+
 
 //===----------------------------------------------------------------------===//
-// ARM Processors supported.
+// ARM Processor subtarget features.
+//
+
+def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
+                                   "Cortex-A5 ARM processors", []>;
+def ProcA7      : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
+                                   "Cortex-A7 ARM processors", []>;
+def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+                                   "Cortex-A8 ARM processors", []>;
+def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+                                   "Cortex-A9 ARM processors", []>;
+def ProcA12     : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
+                                   "Cortex-A12 ARM processors", []>;
+def ProcA15     : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
+                                   "Cortex-A15 ARM processors", []>;
+def ProcA17     : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
+                                   "Cortex-A17 ARM processors", []>;
+def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
+                                   "Cortex-A35 ARM processors", []>;
+def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
+                                   "Cortex-A53 ARM processors", []>;
+def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
+                                   "Cortex-A57 ARM processors", []>;
+def ProcA72     : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
+                                   "Cortex-A72 ARM processors", []>;
+
+def ProcKrait   : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
+                                   "Qualcomm ARM processors", []>;
+def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
+                                   "Swift ARM processors", []>;
+
+
+def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
+                                    "Cortex-R4 ARM processors", []>;
+def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
+                                   "Cortex-R5 ARM processors", []>;
+def ProcR7      : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
+                                   "Cortex-R7 ARM processors", []>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM schedules.
 //
 
 include "ARMSchedule.td"
 
-// ARM processor families.
-def ProcA5      : SubtargetFeature<"a5", "ARMProcFamily", "CortexA5",
-                                   "Cortex-A5 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk,
-                                    FeatureTrustZone, FeatureMP]>;
-def ProcA7      : SubtargetFeature<"a7", "ARMProcFamily", "CortexA7",
-                                   "Cortex-A7 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk,
-                                    FeatureVFP4, FeatureMP,
-                                    FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureTrustZone, FeatureVirtualization]>;
-def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
-                                   "Cortex-A8 ARM processors",
-                                   [FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                    FeatureVMLxForwarding, FeatureT2XtPk,
-                                    FeatureTrustZone]>;
-def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
-                                   "Cortex-A9 ARM processors",
-                                   [FeatureVMLxForwarding,
-                                    FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureTrustZone]>;
-def ProcSwift   : SubtargetFeature<"swift", "ARMProcFamily", "Swift",
-                                   "Swift ARM processors",
-                                   [FeatureNEONForFP, FeatureT2XtPk,
-                                    FeatureVFP4, FeatureMP, FeatureHWDiv,
-                                    FeatureHWDivARM, FeatureAvoidPartialCPSR,
-                                    FeatureAvoidMOVsShOp,
-                                    FeatureHasSlowFPVMLx, FeatureTrustZone]>;
-def ProcA12     : SubtargetFeature<"a12", "ARMProcFamily", "CortexA12",
-                                   "Cortex-A12 ARM processors",
-                                   [FeatureVMLxForwarding,
-                                    FeatureT2XtPk, FeatureVFP4,
-                                    FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureVirtualization,
-                                    FeatureTrustZone]>;
+
+//===----------------------------------------------------------------------===//
+// ARM architectures
+//
+
+def ARMv2     : Architecture<"armv2",     "ARMv2",    []>;
+
+def ARMv2a    : Architecture<"armv2a",    "ARMv2a",   []>;
+
+def ARMv3     : Architecture<"armv3",     "ARMv3",    []>;
+
+def ARMv3m    : Architecture<"armv3m",    "ARMv3m",   []>;
+
+def ARMv4     : Architecture<"armv4",     "ARMv4",    []>;
+
+def ARMv4t    : Architecture<"armv4t",    "ARMv4t",   [HasV4TOps]>;
+
+def ARMv5t    : Architecture<"armv5t",    "ARMv5t",   [HasV5TOps]>;
+
+def ARMv5te   : Architecture<"armv5te",   "ARMv5te",  [HasV5TEOps]>;
+
+def ARMv5tej  : Architecture<"armv5tej",  "ARMv5tej", [HasV5TEOps]>;
+
+def ARMv6     : Architecture<"armv6",     "ARMv6",    [HasV6Ops]>;
+
+def ARMv6t2   : Architecture<"armv6t2",   "ARMv6t2",  [HasV6T2Ops,
+                                                       FeatureDSP]>;
+
+def ARMv6k    : Architecture<"armv6k",    "ARMv6k",   [HasV6KOps]>;
+
+def ARMv6kz   : Architecture<"armv6kz",   "ARMv6kz",  [HasV6KOps,
+                                                       FeatureTrustZone]>;
+
+def ARMv6m    : Architecture<"armv6-m",   "ARMv6m",   [HasV6MOps,
+                                                       FeatureNoARM,
+                                                       FeatureDB,
+                                                       FeatureMClass]>;
+
+def ARMv6sm   : Architecture<"armv6s-m",  "ARMv6sm",  [HasV6MOps,
+                                                       FeatureNoARM,
+                                                       FeatureDB,
+                                                       FeatureMClass]>;
+
+def ARMv7a    : Architecture<"armv7-a",   "ARMv7a",   [HasV7Ops,
+                                                       FeatureNEON,
+                                                       FeatureDB,
+                                                       FeatureDSP,
+                                                       FeatureAClass]>;
+
+def ARMv7r    : Architecture<"armv7-r",   "ARMv7r",   [HasV7Ops,
+                                                       FeatureDB,
+                                                       FeatureDSP,
+                                                       FeatureHWDiv,
+                                                       FeatureRClass]>;
+
+def ARMv7m    : Architecture<"armv7-m",   "ARMv7m",   [HasV7Ops,
+                                                       FeatureThumb2,
+                                                       FeatureNoARM,
+                                                       FeatureDB,
+                                                       FeatureHWDiv,
+                                                       FeatureMClass]>;
+
+def ARMv7em   : Architecture<"armv7e-m",  "ARMv7em",  [HasV7Ops,
+                                                       FeatureThumb2,
+                                                       FeatureNoARM,
+                                                       FeatureDB,
+                                                       FeatureHWDiv,
+                                                       FeatureMClass,
+                                                       FeatureDSP,
+                                                       FeatureT2XtPk]>;
+
+def ARMv8a    : Architecture<"armv8-a",   "ARMv8a",   [HasV8Ops,
+                                                       FeatureAClass,
+                                                       FeatureDB,
+                                                       FeatureFPARMv8,
+                                                       FeatureNEON,
+                                                       FeatureDSP,
+                                                       FeatureTrustZone,
+                                                       FeatureMP,
+                                                       FeatureVirtualization,
+                                                       FeatureCrypto,
+                                                       FeatureCRC]>;
+
+def ARMv81a   : Architecture<"armv8.1-a", "ARMv81a",  [HasV8_1aOps,
+                                                       FeatureAClass,
+                                                       FeatureDB,
+                                                       FeatureFPARMv8,
+                                                       FeatureNEON,
+                                                       FeatureDSP,
+                                                       FeatureTrustZone,
+                                                       FeatureMP,
+                                                       FeatureVirtualization,
+                                                       FeatureCrypto,
+                                                       FeatureCRC]>;
+
+def ARMv82a   : Architecture<"armv8.2-a", "ARMv82a",  [HasV8_2aOps,
+                                                       FeatureAClass,
+                                                       FeatureDB,
+                                                       FeatureFPARMv8,
+                                                       FeatureNEON,
+                                                       FeatureDSP,
+                                                       FeatureTrustZone,
+                                                       FeatureMP,
+                                                       FeatureVirtualization,
+                                                       FeatureCrypto,
+                                                       FeatureCRC]>;
+
+// Aliases
+def IWMMXT   : Architecture<"iwmmxt",      "ARMv5te",  [ARMv5te]>;
+def IWMMXT2  : Architecture<"iwmmxt2",     "ARMv5te",  [ARMv5te]>;
+def XScale   : Architecture<"xscale",      "ARMv5te",  [ARMv5te]>;
+def ARMv6j   : Architecture<"armv6j",      "ARMv7a",   [ARMv6]>;
+def ARMv7k   : Architecture<"armv7k",      "ARMv7a",   [ARMv7a]>;
+def ARMv7s   : Architecture<"armv7s",      "ARMv7a",   [ARMv7a]>;
 
 
-// FIXME: It has not been determined if A15 has these features.
-def ProcA15      : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15",
-                                   "Cortex-A15 ARM processors",
-                                   [FeatureT2XtPk, FeatureVFP4,
-                                    FeatureMP, FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureTrustZone, FeatureVirtualization]>;
+//===----------------------------------------------------------------------===//
+// ARM processors
+//
 
-def ProcA17     : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17",
-                                   "Cortex-A17 ARM processors",
-                                   [FeatureVMLxForwarding,
-                                    FeatureT2XtPk, FeatureVFP4,
-                                    FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureVirtualization,
-                                    FeatureTrustZone]>;
+// Dummy CPU, used to target architectures
+def : ProcNoItin<"generic",                             []>;
 
-def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
-                                   "Cortex-A53 ARM processors",
-                                   [FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureTrustZone, FeatureT2XtPk,
-                                    FeatureCrypto, FeatureCRC]>;
+def : ProcNoItin<"arm8",                                [ARMv4]>;
+def : ProcNoItin<"arm810",                              [ARMv4]>;
+def : ProcNoItin<"strongarm",                           [ARMv4]>;
+def : ProcNoItin<"strongarm110",                        [ARMv4]>;
+def : ProcNoItin<"strongarm1100",                       [ARMv4]>;
+def : ProcNoItin<"strongarm1110",                       [ARMv4]>;
 
-def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
-                                   "Cortex-A57 ARM processors",
-                                   [FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureTrustZone, FeatureT2XtPk,
-                                    FeatureCrypto, FeatureCRC]>;
+def : ProcNoItin<"arm7tdmi",                            [ARMv4t]>;
+def : ProcNoItin<"arm7tdmi-s",                          [ARMv4t]>;
+def : ProcNoItin<"arm710t",                             [ARMv4t]>;
+def : ProcNoItin<"arm720t",                             [ARMv4t]>;
+def : ProcNoItin<"arm9",                                [ARMv4t]>;
+def : ProcNoItin<"arm9tdmi",                            [ARMv4t]>;
+def : ProcNoItin<"arm920",                              [ARMv4t]>;
+def : ProcNoItin<"arm920t",                             [ARMv4t]>;
+def : ProcNoItin<"arm922t",                             [ARMv4t]>;
+def : ProcNoItin<"arm940t",                             [ARMv4t]>;
+def : ProcNoItin<"ep9312",                              [ARMv4t]>;
 
-def ProcR4      : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4",
-                                   "Cortex-R4 ARM processors",
-                                   [FeatureHWDiv,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureDSPThumb2, FeatureT2XtPk,
-                                    HasV7Ops, FeatureDB, FeatureHasRAS,
-                                    FeatureRClass]>;
+def : ProcNoItin<"arm10tdmi",                           [ARMv5t]>;
+def : ProcNoItin<"arm1020t",                            [ARMv5t]>;
 
-def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
-                                   "Cortex-R5 ARM processors",
-                                   [FeatureSlowFPBrcc,
-                                    FeatureHWDiv, FeatureHWDivARM,
-                                    FeatureHasSlowFPVMLx,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureT2XtPk]>;
+def : ProcNoItin<"arm9e",                               [ARMv5te]>;
+def : ProcNoItin<"arm926ej-s",                          [ARMv5te]>;
+def : ProcNoItin<"arm946e-s",                           [ARMv5te]>;
+def : ProcNoItin<"arm966e-s",                           [ARMv5te]>;
+def : ProcNoItin<"arm968e-s",                           [ARMv5te]>;
+def : ProcNoItin<"arm10e",                              [ARMv5te]>;
+def : ProcNoItin<"arm1020e",                            [ARMv5te]>;
+def : ProcNoItin<"arm1022e",                            [ARMv5te]>;
+def : ProcNoItin<"xscale",                              [ARMv5te]>;
+def : ProcNoItin<"iwmmxt",                              [ARMv5te]>;
 
-// FIXME: krait has currently the same features as A9
-// plus VFP4 and hardware division features.
-def ProcKrait   : SubtargetFeature<"krait", "ARMProcFamily", "Krait",
-                                   "Qualcomm ARM processors",
-                                   [FeatureVMLxForwarding,
-                                    FeatureT2XtPk, FeatureFP16,
-                                    FeatureAvoidPartialCPSR,
-                                    FeatureTrustZone,
-                                    FeatureVFP4,
-                                    FeatureHWDiv,
-                                    FeatureHWDivARM]>;
+def : Processor<"arm1136j-s",       ARMV6Itineraries,   [ARMv6]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries,   [ARMv6,
+                                                         FeatureVFP2,
+                                                         FeatureHasSlowFPVMLx]>;
 
+def : Processor<"cortex-m0",        ARMV6Itineraries,   [ARMv6m]>;
+def : Processor<"cortex-m0plus",    ARMV6Itineraries,   [ARMv6m]>;
+def : Processor<"cortex-m1",        ARMV6Itineraries,   [ARMv6m]>;
+def : Processor<"sc000",            ARMV6Itineraries,   [ARMv6m]>;
 
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+def : Processor<"arm1176jz-s",      ARMV6Itineraries,   [ARMv6kz]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries,   [ARMv6kz,
+                                                         FeatureVFP2,
+                                                         FeatureHasSlowFPVMLx]>;
 
-// V4 Processors.
-def : ProcNoItin<"generic",         []>;
-def : ProcNoItin<"arm8",            []>;
-def : ProcNoItin<"arm810",          []>;
-def : ProcNoItin<"strongarm",       []>;
-def : ProcNoItin<"strongarm110",    []>;
-def : ProcNoItin<"strongarm1100",   []>;
-def : ProcNoItin<"strongarm1110",   []>;
+def : Processor<"mpcorenovfp",      ARMV6Itineraries,   [ARMv6k]>;
+def : Processor<"mpcore",           ARMV6Itineraries,   [ARMv6k,
+                                                         FeatureVFP2,
+                                                         FeatureHasSlowFPVMLx]>;
 
-// V4T Processors.
-def : ProcNoItin<"arm7tdmi",        [HasV4TOps]>;
-def : ProcNoItin<"arm7tdmi-s",      [HasV4TOps]>;
-def : ProcNoItin<"arm710t",         [HasV4TOps]>;
-def : ProcNoItin<"arm720t",         [HasV4TOps]>;
-def : ProcNoItin<"arm9",            [HasV4TOps]>;
-def : ProcNoItin<"arm9tdmi",        [HasV4TOps]>;
-def : ProcNoItin<"arm920",          [HasV4TOps]>;
-def : ProcNoItin<"arm920t",         [HasV4TOps]>;
-def : ProcNoItin<"arm922t",         [HasV4TOps]>;
-def : ProcNoItin<"arm940t",         [HasV4TOps]>;
-def : ProcNoItin<"ep9312",          [HasV4TOps]>;
+def : Processor<"arm1156t2-s",      ARMV6Itineraries,   [ARMv6t2]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries,   [ARMv6t2,
+                                                         FeatureVFP2,
+                                                         FeatureHasSlowFPVMLx]>;
 
-// V5T Processors.
-def : ProcNoItin<"arm10tdmi",       [HasV5TOps]>;
-def : ProcNoItin<"arm1020t",        [HasV5TOps]>;
-
-// V5TE Processors.
-def : ProcNoItin<"arm9e",           [HasV5TEOps]>;
-def : ProcNoItin<"arm926ej-s",      [HasV5TEOps]>;
-def : ProcNoItin<"arm946e-s",       [HasV5TEOps]>;
-def : ProcNoItin<"arm966e-s",       [HasV5TEOps]>;
-def : ProcNoItin<"arm968e-s",       [HasV5TEOps]>;
-def : ProcNoItin<"arm10e",          [HasV5TEOps]>;
-def : ProcNoItin<"arm1020e",        [HasV5TEOps]>;
-def : ProcNoItin<"arm1022e",        [HasV5TEOps]>;
-def : ProcNoItin<"xscale",          [HasV5TEOps]>;
-def : ProcNoItin<"iwmmxt",          [HasV5TEOps]>;
-
-// V6 Processors.
-def : Processor<"arm1136j-s",       ARMV6Itineraries, [HasV6Ops]>;
-def : Processor<"arm1136jf-s",      ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx]>;
-
-// V6M Processors.
-def : Processor<"cortex-m0",        ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
-                                                       FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m0plus",    ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
-                                                       FeatureDB, FeatureMClass]>;
-def : Processor<"cortex-m1",        ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
-                                                       FeatureDB, FeatureMClass]>;
-def : Processor<"sc000",            ARMV6Itineraries, [HasV6MOps, FeatureNoARM,
-                                                       FeatureDB, FeatureMClass]>;
-
-// V6K Processors.
-def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6KOps]>;
-def : Processor<"mpcore",           ARMV6Itineraries, [HasV6KOps, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx]>;
-
-// V6T2 Processors.
-def : Processor<"arm1156t2-s",      ARMV6Itineraries, [HasV6T2Ops,
-                                                       FeatureDSPThumb2]>;
-def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
-                                                       FeatureHasSlowFPVMLx,
-                                                       FeatureDSPThumb2]>;
-
-// V7a Processors.
 // FIXME: A5 has currently the same Schedule model as A8
-def : ProcessorModel<"cortex-a5",   CortexA8Model,
-                                    [ProcA5, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureVFP4, FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureAClass]>;
-def : ProcessorModel<"cortex-a7",   CortexA8Model,
-                                    [ProcA7, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
-                                     FeatureAClass]>;
-def : ProcessorModel<"cortex-a8",   CortexA8Model,
-                                    [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
-                                     FeatureAClass]>;
-def : ProcessorModel<"cortex-a9",   CortexA9Model,
-                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS, FeatureMP,
-                                     FeatureAClass]>;
+def : ProcessorModel<"cortex-a5",   CortexA8Model,      [ARMv7a, ProcA5,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureMP,
+                                                         FeatureVFP4]>;
+
+def : ProcessorModel<"cortex-a7",   CortexA8Model,      [ARMv7a, ProcA7,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureMP,
+                                                         FeatureVFP4,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureVirtualization]>;
+
+def : ProcessorModel<"cortex-a8",   CortexA8Model,      [ARMv7a, ProcA8,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk]>;
+
+def : ProcessorModel<"cortex-a9",   CortexA9Model,      [ARMv7a, ProcA9,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureFP16,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureMP]>;
 
 // FIXME: A12 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a12", CortexA9Model,
-                                    [ProcA12, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureMP,
-                                     FeatureHasRAS, FeatureAClass]>;
+def : ProcessorModel<"cortex-a12",  CortexA9Model,      [ARMv7a, ProcA12,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureVFP4,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureVirtualization,
+                                                         FeatureMP]>;
 
-// FIXME: A15 has currently the same ProcessorModel as A9.
-def : ProcessorModel<"cortex-a15",   CortexA9Model,
-                                    [ProcA15, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
-                                     FeatureAClass]>;
+// FIXME: A15 has currently the same Schedule model as A9.
+def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureT2XtPk,
+                                                         FeatureVFP4,
+                                                         FeatureMP,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureVirtualization]>;
 
 // FIXME: A17 has currently the same Schedule model as A9
-def : ProcessorModel<"cortex-a17",  CortexA9Model,
-                                    [ProcA17, HasV7Ops, FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureMP,
-                                     FeatureHasRAS, FeatureAClass]>;
+def : ProcessorModel<"cortex-a17",  CortexA9Model,      [ARMv7a, ProcA17,
+                                                         FeatureHasRAS,
+                                                         FeatureTrustZone,
+                                                         FeatureMP,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureVFP4,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureVirtualization]>;
 
 // FIXME: krait has currently the same Schedule model as A9
-def : ProcessorModel<"krait",       CortexA9Model,
-                                    [ProcKrait, HasV7Ops,
-                                     FeatureNEON, FeatureDB,
-                                     FeatureDSPThumb2, FeatureHasRAS,
-                                     FeatureAClass]>;
+// FIXME: krait has currently the same features as A9 plus VFP4 and hardware
+//        division features.
+def : ProcessorModel<"krait",       CortexA9Model,      [ARMv7a, ProcKrait,
+                                                         FeatureHasRAS,
+                                                         FeatureVMLxForwarding,
+                                                         FeatureT2XtPk,
+                                                         FeatureFP16,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureVFP4,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM]>;
+
+def : ProcessorModel<"swift",       SwiftModel,         [ARMv7a, ProcSwift,
+                                                         FeatureHasRAS,
+                                                         FeatureNEONForFP,
+                                                         FeatureT2XtPk,
+                                                         FeatureVFP4,
+                                                         FeatureMP,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureAvoidMOVsShOp,
+                                                         FeatureHasSlowFPVMLx]>;
 
 // FIXME: R4 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4",   CortexA8Model,
-                                    [ProcR4]>;
+def : ProcessorModel<"cortex-r4",   CortexA8Model,      [ARMv7r, ProcR4,
+                                                         FeatureHasRAS,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureT2XtPk]>;
 
 // FIXME: R4F has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r4f",  CortexA8Model,
-                                    [ProcR4,
-                                     FeatureSlowFPBrcc, FeatureHasSlowFPVMLx,
-                                     FeatureVFP3, FeatureD16]>;
+def : ProcessorModel<"cortex-r4f",  CortexA8Model,      [ARMv7r, ProcR4,
+                                                         FeatureHasRAS,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureVFP3,
+                                                         FeatureD16,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureT2XtPk]>;
 
 // FIXME: R5 has currently the same ProcessorModel as A8.
-def : ProcessorModel<"cortex-r5",   CortexA8Model,
-                                    [ProcR5, HasV7Ops, FeatureDB,
-                                     FeatureVFP3, FeatureDSPThumb2,
-                                     FeatureHasRAS,
-                                     FeatureD16, FeatureRClass]>;
+def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
+                                                         FeatureHasRAS,
+                                                         FeatureVFP3,
+                                                         FeatureD16,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHWDivARM,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureT2XtPk]>;
 
 // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
-def : ProcessorModel<"cortex-r7",   CortexA8Model,
-                                    [ProcR5, HasV7Ops, FeatureDB,
-                                     FeatureVFP3, FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureVFPOnlySP,
-                                     FeatureD16, FeatureMP, FeatureRClass]>;
+def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
+                                                         FeatureHasRAS,
+                                                         FeatureVFP3,
+                                                         FeatureVFPOnlySP,
+                                                         FeatureD16,
+                                                         FeatureFP16,
+                                                         FeatureMP,
+                                                         FeatureSlowFPBrcc,
+                                                         FeatureHWDivARM,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureT2XtPk]>;
 
-// V7M Processors.
-def : ProcNoItin<"cortex-m3",       [HasV7Ops,
-                                     FeatureThumb2, FeatureNoARM, FeatureDB,
-                                     FeatureHWDiv, FeatureMClass]>;
-def : ProcNoItin<"sc300",           [HasV7Ops,
-                                     FeatureThumb2, FeatureNoARM, FeatureDB,
-                                     FeatureHWDiv, FeatureMClass]>;
+def : ProcNoItin<"cortex-m3",                           [ARMv7m]>;
+def : ProcNoItin<"sc300",                               [ARMv7m]>;
 
-// V7EM Processors.
-def : ProcNoItin<"cortex-m4",       [HasV7Ops,
-                                     FeatureThumb2, FeatureNoARM, FeatureDB,
-                                     FeatureHWDiv, FeatureDSPThumb2,
-                                     FeatureT2XtPk, FeatureVFP4,
-                                     FeatureVFPOnlySP, FeatureD16,
-                                     FeatureMClass]>;
-def : ProcNoItin<"cortex-m7",       [HasV7Ops,
-                                     FeatureThumb2, FeatureNoARM, FeatureDB,
-                                     FeatureHWDiv, FeatureDSPThumb2,
-                                     FeatureT2XtPk, FeatureFPARMv8,
-                                     FeatureD16, FeatureMClass]>;
+def : ProcNoItin<"cortex-m4",                           [ARMv7em,
+                                                         FeatureVFP4,
+                                                         FeatureVFPOnlySP,
+                                                         FeatureD16]>;
+
+def : ProcNoItin<"cortex-m7",                           [ARMv7em,
+                                                         FeatureFPARMv8,
+                                                         FeatureD16]>;
 
 
-// Swift uArch Processors.
-def : ProcessorModel<"swift",       SwiftModel,
-                                    [ProcSwift, HasV7Ops, FeatureNEON,
-                                     FeatureDB, FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureAClass]>;
+def : ProcNoItin<"cortex-a35",                          [ARMv8a, ProcA35,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureT2XtPk,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
 
-// V8 Processors
-def : ProcNoItin<"cortex-a53",      [ProcA53, HasV8Ops, FeatureAClass,
-                                    FeatureDB, FeatureFPARMv8,
-                                    FeatureNEON, FeatureDSPThumb2]>;
-def : ProcNoItin<"cortex-a57",      [ProcA57, HasV8Ops, FeatureAClass,
-                                    FeatureDB, FeatureFPARMv8,
-                                    FeatureNEON, FeatureDSPThumb2]>;
-// FIXME: Cortex-A72 is currently modelled as an Cortex-A57.
-def : ProcNoItin<"cortex-a72",      [ProcA57, HasV8Ops, FeatureAClass,
-                                    FeatureDB, FeatureFPARMv8,
-                                    FeatureNEON, FeatureDSPThumb2]>;
+def : ProcNoItin<"cortex-a53",                          [ARMv8a, ProcA53,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureT2XtPk,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a57",                          [ARMv8a, ProcA57,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureT2XtPk,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
+
+def : ProcNoItin<"cortex-a72",                          [ARMv8a, ProcA72,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureT2XtPk,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
 
 // Cyclone is very similar to swift
-def : ProcessorModel<"cyclone",     SwiftModel,
-                                    [ProcSwift, HasV8Ops, HasV7Ops,
-                                     FeatureCrypto, FeatureFPARMv8,
-                                     FeatureDB,FeatureDSPThumb2,
-                                     FeatureHasRAS, FeatureZCZeroing]>;
+def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
+                                                         FeatureHasRAS,
+                                                         FeatureNEONForFP,
+                                                         FeatureT2XtPk,
+                                                         FeatureVFP4,
+                                                         FeatureMP,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureAvoidPartialCPSR,
+                                                         FeatureAvoidMOVsShOp,
+                                                         FeatureHasSlowFPVMLx,
+                                                         FeatureCrypto,
+                                                         FeatureZCZeroing]>;
+
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -504,8 +677,15 @@ def ARMAsmWriter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def ARMAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string Name = "ARM";
+  string BreakCharacters = ".";
+}
+
 def ARM : Target {
   // Pull in Instruction Info:
   let InstructionSet = ARMInstrInfo;
   let AssemblyWriters = [ARMAsmWriter];
+  let AssemblyParserVariants = [ARMAsmParserVariant];
 }
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 738ddedccdac..206db9619a2f 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -60,7 +60,7 @@ using namespace llvm;
 ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM,
                              std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr),
-      InConstantPool(false) {}
+      InConstantPool(false), OptimizationGoals(-1) {}
 
 void ARMAsmPrinter::EmitFunctionBodyEnd() {
   // Make sure to terminate any constant pools that were at the end
@@ -80,8 +80,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer->EmitLabel(CurrentFnSym);
 }
 
-void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
-  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType());
+void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
+  uint64_t Size = getDataLayout().getTypeAllocSize(CV->getType());
   assert(Size && "C++ constructor pointer had zero size!");
 
   const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts());
@@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<ARMSubtarget>();
 
   SetupMachineFunction(MF);
+  const Function* F = MF.getFunction();
+  const TargetMachine& TM = MF.getTarget();
+
+  // Calculate this function's optimization goal.
+  unsigned OptimizationGoal;
+  if (F->hasFnAttribute(Attribute::OptimizeNone))
+    // For best debugging illusion, speed and small size sacrificed
+    OptimizationGoal = 6;
+  else if (F->optForMinSize())
+    // Aggressively for small size, speed and debug illusion sacrificed
+    OptimizationGoal = 4;
+  else if (F->optForSize())
+    // For small size, but speed and debugging illusion preserved
+    OptimizationGoal = 3;
+  else if (TM.getOptLevel() == CodeGenOpt::Aggressive)
+    // Aggressively for speed, small size and debug illusion sacrificed
+    OptimizationGoal = 2;
+  else if (TM.getOptLevel() > CodeGenOpt::None)
+    // For speed, but small size and good debug illusion preserved
+    OptimizationGoal = 1;
+  else // TM.getOptLevel() == CodeGenOpt::None
+    // For good debugging, but speed and small size preserved
+    OptimizationGoal = 5;
+
+  // Combine a new optimization goal with existing ones.
+  if (OptimizationGoals == -1) // uninitialized goals
+    OptimizationGoals = OptimizationGoal;
+  else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals
+    OptimizationGoals = 0;
 
   if (Subtarget->isTargetCOFF()) {
-    bool Internal = MF.getFunction()->hasInternalLinkage();
+    bool Internal = F->hasInternalLinkage();
     COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
                                             : COFF::IMAGE_SYM_CLASS_EXTERNAL;
     int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
@@ -198,22 +227,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
 
 MCSymbol *ARMAsmPrinter::
 GetARMJTIPICJumpTableLabel(unsigned uid) const {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   SmallString<60> Name;
-  raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+  raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
                             << getFunctionNumber() << '_' << uid;
   return OutContext.getOrCreateSymbol(Name);
 }
 
-
-MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const {
-  const DataLayout *DL = TM.getDataLayout();
-  SmallString<60> Name;
-  raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH"
-    << getFunctionNumber();
-  return OutContext.getOrCreateSymbol(Name);
-}
-
 bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
                                     unsigned AsmVariant, const char *ExtraCode,
                                     raw_ostream &O) {
@@ -515,6 +535,17 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     // generates code that does this, it is always safe to set.
     OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
   }
+
+  // The last attribute to be emitted is ABI_optimization_goals
+  MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
+  ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+
+  if (OptimizationGoals > 0 &&
+      (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI()))
+    ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals);
+  OptimizationGoals = -1;
+
+  ATS.finishAttributeSection();
 }
 
 //===----------------------------------------------------------------------===//
@@ -532,7 +563,7 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
   if (Subtarget->hasV8Ops())
     return ARMBuildAttrs::v8;
   else if (Subtarget->hasV7Ops()) {
-    if (Subtarget->isMClass() && Subtarget->hasThumb2DSP())
+    if (Subtarget->isMClass() && Subtarget->hasDSP())
       return ARMBuildAttrs::v7E_M;
     return ARMBuildAttrs::v7;
   } else if (Subtarget->hasV6T2Ops())
@@ -587,7 +618,7 @@ void ARMAsmPrinter::emitAttributes() {
       // We consider krait as a "cortex-a9" + hwdiv CPU
       // Enable hwdiv through ".arch_extension idiv"
       if (STI.hasDivide() || STI.hasDivideInARMMode())
-        ATS.emitArchExtension(ARM::AEK_HWDIV);
+        ATS.emitArchExtension(ARM::AEK_HWDIV | ARM::AEK_HWDIVARM);
     } else
       ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString);
   }
@@ -807,8 +838,6 @@ void ARMAsmPrinter::emitAttributes() {
   else if (STI.hasVirtualization())
     ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
                       ARMBuildAttrs::AllowVirtualization);
-
-  ATS.finishAttributeSection();
 }
 
 //===----------------------------------------------------------------------===//
@@ -828,8 +857,7 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
   case ARMCP::TLSGD:       return MCSymbolRefExpr::VK_TLSGD;
   case ARMCP::TPOFF:       return MCSymbolRefExpr::VK_TPOFF;
   case ARMCP::GOTTPOFF:    return MCSymbolRefExpr::VK_GOTTPOFF;
-  case ARMCP::GOT:         return MCSymbolRefExpr::VK_GOT;
-  case ARMCP::GOTOFF:      return MCSymbolRefExpr::VK_GOTOFF;
+  case ARMCP::GOT_PREL:    return MCSymbolRefExpr::VK_ARM_GOT_PREL;
   }
   llvm_unreachable("Invalid ARMCPModifier!");
 }
@@ -875,8 +903,8 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
 
 void ARMAsmPrinter::
 EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
-  const DataLayout *DL = TM.getDataLayout();
-  int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType());
+  const DataLayout &DL = getDataLayout();
+  int Size = DL.getTypeAllocSize(MCPV->getType());
 
   ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
 
@@ -909,10 +937,9 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
                             OutContext);
 
   if (ACPV->getPCAdjustment()) {
-    MCSymbol *PCLabel = getPICLabel(DL->getPrivateGlobalPrefix(),
-                                    getFunctionNumber(),
-                                    ACPV->getLabelId(),
-                                    OutContext);
+    MCSymbol *PCLabel =
+        getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                    ACPV->getLabelId(), OutContext);
     const MCExpr *PCRelExpr = MCSymbolRefExpr::create(PCLabel, OutContext);
     PCRelExpr =
       MCBinaryExpr::createAdd(PCRelExpr,
@@ -1136,6 +1163,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         Offset = 0;
         break;
       case ARM::ADDri:
+      case ARM::t2ADDri:
         Offset = -MI->getOperand(2).getImm();
         break;
       case ARM::SUBri:
@@ -1198,7 +1226,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
 #include "ARMGenMCPseudoLowering.inc"
 
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
   // If we just ended a constant pool, mark it as such.
   if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) {
@@ -1355,9 +1383,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
     const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
 
-    MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
-                                     getFunctionNumber(),
-                                     MI->getOperand(2).getImm(), OutContext);
+    MCSymbol *LabelSym =
+        getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                    MI->getOperand(2).getImm(), OutContext);
     const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
     unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
     const MCExpr *PCRelExpr =
@@ -1388,9 +1416,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *GVSym = GetARMGVSymbol(GV, TF);
     const MCExpr *GVSymExpr = MCSymbolRefExpr::create(GVSym, OutContext);
 
-    MCSymbol *LabelSym = getPICLabel(DL->getPrivateGlobalPrefix(),
-                                     getFunctionNumber(),
-                                     MI->getOperand(3).getImm(), OutContext);
+    MCSymbol *LabelSym =
+        getPICLabel(DL.getPrivateGlobalPrefix(), getFunctionNumber(),
+                    MI->getOperand(3).getImm(), OutContext);
     const MCExpr *LabelSymExpr= MCSymbolRefExpr::create(LabelSym, OutContext);
     unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
     const MCExpr *PCRelExpr =
@@ -1414,10 +1442,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // This adds the address of LPC0 to r0.
 
     // Emit the label.
-    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+    OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
                                        getFunctionNumber(),
-                                       MI->getOperand(2).getImm(),
-                                       OutContext));
+                                       MI->getOperand(2).getImm(), OutContext));
 
     // Form and emit the add.
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
@@ -1436,10 +1463,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // This adds the address of LPC0 to r0.
 
     // Emit the label.
-    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+    OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
                                        getFunctionNumber(),
-                                       MI->getOperand(2).getImm(),
-                                       OutContext));
+                                       MI->getOperand(2).getImm(), OutContext));
 
     // Form and emit the add.
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr)
@@ -1468,10 +1494,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // a PC-relative address at the ldr instruction.
 
     // Emit the label.
-    OutStreamer->EmitLabel(getPICLabel(DL->getPrivateGlobalPrefix(),
+    OutStreamer->EmitLabel(getPICLabel(DL.getPrivateGlobalPrefix(),
                                        getFunctionNumber(),
-                                       MI->getOperand(2).getImm(),
-                                       OutContext));
+                                       MI->getOperand(2).getImm(), OutContext));
 
     // Form and emit the load
     unsigned Opcode;
@@ -1519,7 +1544,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (MCPE.isMachineConstantPoolEntry())
       EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
     else
-      EmitGlobalConstant(MCPE.Val.ConstVal);
+      EmitGlobalConstant(DL, MCPE.Val.ConstVal);
     return;
   }
   case ARM::JUMPTABLE_ADDRS:
@@ -1653,12 +1678,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // adds $val, #7
     // str $val, [$src, #4]
     // movs r0, #0
-    // b 1f
+    // b LSJLJEH
     // movs r0, #1
-    // 1:
+    // LSJLJEH:
     unsigned SrcReg = MI->getOperand(0).getReg();
     unsigned ValReg = MI->getOperand(1).getReg();
-    MCSymbol *Label = GetARMSJLJEHLabel();
+    MCSymbol *Label = OutContext.createTempSymbol("SJLJEH", false, true);
     OutStreamer->AddComment("eh_setjmp begin");
     EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr)
       .addReg(ValReg)
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 3d251213f5bf..ed7be2de51ca 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
   /// labels used for ARMv4t thumb code to make register indirect calls.
   SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads;
 
+  /// OptimizationGoals - Maintain a combined optimization goal for all
+  /// functions in a module: one of Tag_ABI_optimization_goals values,
+  /// -1 if uninitialized, 0 if conflicting goals
+  int OptimizationGoals;
+
 public:
   explicit ARMAsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer);
@@ -84,7 +89,7 @@ public:
   void EmitFunctionEntryLabel() override;
   void EmitStartOfAsmFile(Module &M) override;
   void EmitEndOfAsmFile(Module &M) override;
-  void EmitXXStructor(const Constant *CV) override;
+  void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
 
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
@@ -119,8 +124,6 @@ private:
   MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
   MCSymbol *GetARMJTIPICJumpTableLabel(unsigned uid) const;
 
-  MCSymbol *GetARMSJLJEHLabel() const;
-
   MCSymbol *GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags);
 
 public:
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 9f43e732bd73..49f328852667 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -97,7 +97,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
     Subtarget(STI) {
   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
-      assert(false && "Duplicated entries?");
+      llvm_unreachable("Duplicated entries?");
     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
     MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
   }
@@ -440,7 +440,7 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 
 bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
   if (MI->isBundle()) {
-    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator I = MI->getIterator();
     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
     while (++I != E && I->isInsideBundle()) {
       int PIdx = I->findFirstPredOperandIdx();
@@ -518,7 +518,7 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
 
 static bool isCPSRDefined(const MachineInstr *MI) {
   for (const auto &MO : MI->operands())
-    if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef())
+    if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
       return true;
   return false;
 }
@@ -647,7 +647,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
 
 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
   unsigned Size = 0;
-  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
   while (++I != E && I->isInsideBundle()) {
     assert(!I->isBundle() && "No nested bundle!");
@@ -853,11 +853,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                            MachineMemOperand::MOStore,
-                            MFI.getObjectSize(FI),
-                            Align);
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), Align);
 
   switch (RC->getSize()) {
     case 4:
@@ -1043,12 +1041,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(
-                    MachinePointerInfo::getFixedStack(FI),
-                            MachineMemOperand::MOLoad,
-                            MFI.getObjectSize(FI),
-                            Align);
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), Align);
 
   switch (RC->getSize()) {
   case 4:
@@ -1224,6 +1219,60 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
   return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
 }
 
+/// \brief Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD
+/// depending on whether the result is used.
+void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MBBI) const {
+  bool isThumb1 = Subtarget.isThumb1Only();
+  bool isThumb2 = Subtarget.isThumb2();
+  const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();
+
+  MachineInstr *MI = MBBI;
+  DebugLoc dl = MI->getDebugLoc();
+  MachineBasicBlock *BB = MI->getParent();
+
+  MachineInstrBuilder LDM, STM;
+  if (isThumb1 || !MI->getOperand(1).isDead()) {
+    LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD
+                                                 : isThumb1 ? ARM::tLDMIA_UPD
+                                                            : ARM::LDMIA_UPD))
+             .addOperand(MI->getOperand(1));
+  } else {
+    LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));
+  }
+
+  if (isThumb1 || !MI->getOperand(0).isDead()) {
+    STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD
+                                                 : isThumb1 ? ARM::tSTMIA_UPD
+                                                            : ARM::STMIA_UPD))
+             .addOperand(MI->getOperand(0));
+  } else {
+    STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));
+  }
+
+  AddDefaultPred(LDM.addOperand(MI->getOperand(3)));
+  AddDefaultPred(STM.addOperand(MI->getOperand(2)));
+
+  // Sort the scratch registers into ascending order.
+  const TargetRegisterInfo &TRI = getRegisterInfo();
+  llvm::SmallVector<unsigned, 6> ScratchRegs;
+  for(unsigned I = 5; I < MI->getNumOperands(); ++I)
+    ScratchRegs.push_back(MI->getOperand(I).getReg());
+  std::sort(ScratchRegs.begin(), ScratchRegs.end(),
+            [&TRI](const unsigned &Reg1,
+                   const unsigned &Reg2) -> bool {
+              return TRI.getEncodingValue(Reg1) <
+                     TRI.getEncodingValue(Reg2);
+            });
+
+  for (const auto &Reg : ScratchRegs) {
+    LDM.addReg(Reg, RegState::Define);
+    STM.addReg(Reg, RegState::Kill);
+  }
+
+  BB->erase(MBBI);
+}
+
+
 bool
 ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   MachineFunction &MF = *MI->getParent()->getParent();
@@ -1237,6 +1286,11 @@ ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
     return true;
   }
 
+  if (MI->getOpcode() == ARM::MEMCPY) {
+    expandMEMCPY(MI);
+    return true;
+  }
+
   // This hook gets to expand COPY instructions before they become
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
@@ -1325,9 +1379,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
   // instructions, so that's probably OK, but is PIC always correct when
   // we get here?
   if (ACPV->isGlobalValue())
-    NewCPV = ARMConstantPoolConstant::
-      Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
-             ARMCP::CPValue, 4);
+    NewCPV = ARMConstantPoolConstant::Create(
+        cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,
+        4, ACPV->getModifier(), ACPV->mustAddCurrentAddress());
   else if (ACPV->isExtSymbol())
     NewCPV = ARMConstantPoolSymbol::
       Create(MF.getFunction()->getContext(),
@@ -1645,16 +1699,14 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
 bool ARMBaseInstrInfo::
 isProfitableToIfCvt(MachineBasicBlock &MBB,
                     unsigned NumCycles, unsigned ExtraPredCycles,
-                    const BranchProbability &Probability) const {
+                    BranchProbability Probability) const {
   if (!NumCycles)
     return false;
 
   // If we are optimizing for size, see if the branch in the predecessor can be
   // lowered to cbn?z by the constant island lowering pass, and return false if
   // so. This results in a shorter instruction sequence.
-  const Function *F = MBB.getParent()->getFunction();
-  if (F->hasFnAttribute(Attribute::OptimizeForSize) ||
-      F->hasFnAttribute(Attribute::MinSize)) {
+  if (MBB.getParent()->getFunction()->optForSize()) {
     MachineBasicBlock *Pred = *MBB.pred_begin();
     if (!Pred->empty()) {
       MachineInstr *LastMI = &*Pred->rbegin();
@@ -1677,12 +1729,14 @@ isProfitableToIfCvt(MachineBasicBlock &MBB,
   }
 
   // Attempt to estimate the relative costs of predication versus branching.
-  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
-  UnpredCost /= Probability.getDenominator();
-  UnpredCost += 1; // The branch itself
-  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+  // Here we scale up each component of UnpredCost to avoid precision issue when
+  // scaling NumCycles by Probability.
+  const unsigned ScalingUpFactor = 1024;
+  unsigned UnpredCost = Probability.scale(NumCycles * ScalingUpFactor);
+  UnpredCost += ScalingUpFactor; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
 
-  return (NumCycles + ExtraPredCycles) <= UnpredCost;
+  return (NumCycles + ExtraPredCycles) * ScalingUpFactor <= UnpredCost;
 }
 
 bool ARMBaseInstrInfo::
@@ -1690,23 +1744,22 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
                     unsigned TCycles, unsigned TExtra,
                     MachineBasicBlock &FMBB,
                     unsigned FCycles, unsigned FExtra,
-                    const BranchProbability &Probability) const {
+                    BranchProbability Probability) const {
   if (!TCycles || !FCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
-  TUnpredCost /= Probability.getDenominator();
-
-  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
-  unsigned FUnpredCost = Comp * FCycles;
-  FUnpredCost /= Probability.getDenominator();
-
+  // Here we scale up each component of UnpredCost to avoid precision issue when
+  // scaling TCycles/FCycles by Probability.
+  const unsigned ScalingUpFactor = 1024;
+  unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);
+  unsigned FUnpredCost =
+      Probability.getCompl().scale(FCycles * ScalingUpFactor);
   unsigned UnpredCost = TUnpredCost + FUnpredCost;
-  UnpredCost += 1; // The branch itself
-  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+  UnpredCost += 1 * ScalingUpFactor; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;
 
-  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
+  return (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor <= UnpredCost;
 }
 
 bool
@@ -1744,9 +1797,10 @@ unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {
   llvm_unreachable("Unknown unconditional branch opcode!");
 }
 
-/// commuteInstruction - Handle commutable instructions.
-MachineInstr *
-ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                       bool NewMI,
+                                                       unsigned OpIdx1,
+                                                       unsigned OpIdx2) const {
   switch (MI->getOpcode()) {
   case ARM::MOVCCr:
   case ARM::t2MOVCCr: {
@@ -1756,7 +1810,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // MOVCC AL can't be inverted. Shouldn't happen.
     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
       return nullptr;
-    MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
+    MI = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
     if (!MI)
       return nullptr;
     // After swapping the MOVCC operands, also invert the condition.
@@ -1765,7 +1819,7 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     return MI;
   }
   }
-  return TargetInstrInfo::commuteInstruction(MI, NewMI);
+  return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
 }
 
 /// Identify instructions that can be folded into a MOVCC instruction, and
@@ -1975,21 +2029,12 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
   }
 }
 
-static bool isAnySubRegLive(unsigned Reg, const TargetRegisterInfo *TRI,
-                      MachineInstr *MI) {
-  for (MCSubRegIterator Subreg(Reg, TRI, /* IncludeSelf */ true);
-       Subreg.isValid(); ++Subreg)
-    if (MI->getParent()->computeRegisterLiveness(TRI, *Subreg, MI) !=
-        MachineBasicBlock::LQR_Dead)
-      return true;
-  return false;
-}
 bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
                                       MachineFunction &MF, MachineInstr *MI,
                                       unsigned NumBytes) {
   // This optimisation potentially adds lots of load and store
   // micro-operations, it's only really a great benefit to code-size.
-  if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize))
+  if (!MF.getFunction()->optForMinSize())
     return false;
 
   // If only one register is pushed/popped, LLVM can use an LDR/STR
@@ -2058,11 +2103,9 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
     // registers live within the function we might clobber a return value
     // register; the other way a register can be live here is if it's
     // callee-saved.
-    // TODO: Currently, computeRegisterLiveness() does not report "live" if a
-    // sub reg is live. When computeRegisterLiveness() works for sub reg, it
-    // can replace isAnySubRegLive().
     if (isCalleeSavedRegister(CurReg, CSRegs) ||
-        isAnySubRegLive(CurReg, TRI, MI)) {
+        MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=
+        MachineBasicBlock::LQR_Dead) {
       // VFP pops don't allow holes in the register list, so any skip is fatal
       // for our transformation. GPR pops do, so we should just keep looking.
       if (IsVFPPushPop)
@@ -3381,7 +3424,7 @@ static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
 
   assert(Idx != -1 && "Cannot find bundled definition!");
   DefIdx = Idx;
-  return II;
+  return &*II;
 }
 
 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
@@ -3389,7 +3432,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
                                            unsigned &UseIdx, unsigned &Dist) {
   Dist = 0;
 
-  MachineBasicBlock::const_instr_iterator II = MI; ++II;
+  MachineBasicBlock::const_instr_iterator II = ++MI->getIterator();
   assert(II->isInsideBundle() && "Empty bundle?");
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
@@ -3410,7 +3453,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
   }
 
   UseIdx = Idx;
-  return II;
+  return &*II;
 }
 
 /// Return the number of cycles to add to (or subtract from) the static
@@ -3652,6 +3695,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     // instructions).
     if (Latency > 0 && Subtarget.isThumb2()) {
       const MachineFunction *MF = DefMI->getParent()->getParent();
+      // FIXME: Use Function::optForSize().
       if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
         --Latency;
     }
@@ -3931,11 +3975,11 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   // other passes may query the latency of a bundled instruction.
   if (MI->isBundle()) {
     unsigned Latency = 0;
-    MachineBasicBlock::const_instr_iterator I = MI;
+    MachineBasicBlock::const_instr_iterator I = MI->getIterator();
     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
     while (++I != E && I->isInsideBundle()) {
       if (I->getOpcode() != ARM::t2IT)
-        Latency += getInstrLatency(ItinData, I, PredCost);
+        Latency += getInstrLatency(ItinData, &*I, PredCost);
     }
     return Latency;
   }
@@ -4054,8 +4098,8 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
     MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
     MIB.addReg(Reg, RegState::Kill).addImm(0);
     unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
-    MachineMemOperand *MMO = MBB.getParent()->
-        getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4);
+    MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+        MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
     MIB.addMemOperand(MMO);
     AddDefaultPred(MIB);
   }
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b4706e348933..d80c49494c77 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -86,6 +86,18 @@ protected:
                             RegSubRegPair &BaseReg,
                             RegSubRegPairAndIdx &InsertedReg) const override;
 
+  /// Commutes the operands in the given instruction.
+  /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+  ///
+  /// Do not call this method for a non-commutable instruction or for
+  /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+                                       bool NewMI,
+                                       unsigned OpIdx1,
+                                       unsigned OpIdx2) const override;
+
 public:
   // Return whether the target has an explicit NOP encoding.
   bool hasNOP() const;
@@ -188,9 +200,6 @@ public:
   MachineInstr *duplicate(MachineInstr *Orig,
                           MachineFunction &MF) const override;
 
-  MachineInstr *commuteInstruction(MachineInstr*,
-                                   bool=false) const override;
-
   const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
                                      unsigned SubIdx, unsigned State,
                                      const TargetRegisterInfo *TRI) const;
@@ -224,15 +233,15 @@ public:
 
   bool isProfitableToIfCvt(MachineBasicBlock &MBB,
                            unsigned NumCycles, unsigned ExtraPredCycles,
-                           const BranchProbability &Probability) const override;
+                           BranchProbability Probability) const override;
 
   bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
                            unsigned ExtraT, MachineBasicBlock &FMBB,
                            unsigned NumF, unsigned ExtraF,
-                           const BranchProbability &Probability) const override;
+                           BranchProbability Probability) const override;
 
   bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                          const BranchProbability &Probability) const override {
+                                 BranchProbability Probability) const override {
     return NumCycles == 1;
   }
 
@@ -343,6 +352,8 @@ private:
   virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI,
                                     Reloc::Model RM) const = 0;
 
+  void expandMEMCPY(MachineBasicBlock::iterator) const;
+
 private:
   /// Modeling special VFP / NEON fp MLA / MLS hazards.
 
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e7d5be7753e4..419717c85a79 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -225,7 +225,8 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
                                            ArrayRef<MCPhysReg> Order,
                                            SmallVectorImpl<MCPhysReg> &Hints,
                                            const MachineFunction &MF,
-                                           const VirtRegMap *VRM) const {
+                                           const VirtRegMap *VRM,
+                                           const LiveRegMatrix *Matrix) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg);
 
@@ -338,7 +339,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   // 1. Dynamic stack realignment is explicitly disabled,
   // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
   // 3. There are VLAs in the function and the base pointer is disabled.
-  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+  if (!TargetRegisterInfo::canRealignStack(MF))
     return false;
   if (AFI->isThumb1OnlyFunction())
     return false;
@@ -355,18 +356,6 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   return MRI->canReserveReg(BasePtr);
 }
 
-bool ARMBaseRegisterInfo::
-needsStackRealignment(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const ARMFrameLowering *TFI = getFrameLowering(MF);
-  const Function *F = MF.getFunction();
-  unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
-                              F->hasFnAttribute(Attribute::StackAlignment));
-
-  return requiresRealignment && canRealignStack(MF);
-}
-
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index fdc1ef9432c8..cea8b80c7821 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -94,7 +94,7 @@ public:
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
-  const uint32_t *getNoPreservedMask() const;
+  const uint32_t *getNoPreservedMask() const override;
 
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i32 first argument if the calling convention
@@ -126,15 +126,15 @@ public:
                              ArrayRef<MCPhysReg> Order,
                              SmallVectorImpl<MCPhysReg> &Hints,
                              const MachineFunction &MF,
-                             const VirtRegMap *VRM) const override;
+                             const VirtRegMap *VRM,
+                             const LiveRegMatrix *Matrix) const override;
 
   void updateRegAllocHint(unsigned Reg, unsigned NewReg,
                           MachineFunction &MF) const override;
 
   bool hasBasePointer(const MachineFunction &MF) const;
 
-  bool canRealignStack(const MachineFunction &MF) const;
-  bool needsStackRealignment(const MachineFunction &MF) const override;
+  bool canRealignStack(const MachineFunction &MF) const override;
   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
                                    int Idx) const override;
   bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index d687568d7eb9..a731d00883a1 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
                                    State);
 }
 
-static const uint16_t RRegList[] = { ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3 };
+static const MCPhysReg RRegList[] = { ARM::R0,  ARM::R1,  ARM::R2,  ARM::R3 };
 
-static const uint16_t SRegList[] = { ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
-                                     ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
-                                     ARM::S8,  ARM::S9,  ARM::S10, ARM::S11,
-                                     ARM::S12, ARM::S13, ARM::S14,  ARM::S15 };
-static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
-                                     ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
-static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+static const MCPhysReg SRegList[] = { ARM::S0,  ARM::S1,  ARM::S2,  ARM::S3,
+                                      ARM::S4,  ARM::S5,  ARM::S6,  ARM::S7,
+                                      ARM::S8,  ARM::S9,  ARM::S10, ARM::S11,
+                                      ARM::S12, ARM::S13, ARM::S14,  ARM::S15 };
+static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+                                      ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
 
 
 // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
@@ -199,9 +199,11 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT,
 
   // Try to allocate a contiguous block of registers, each of the correct
   // size to hold one member.
-  unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U);
+  auto &DL = State.getMachineFunction().getDataLayout();
+  unsigned StackAlign = DL.getStackAlignment();
+  unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign);
 
-  ArrayRef<uint16_t> RegList;
+  ArrayRef<MCPhysReg> RegList;
   switch (LocVT.SimpleTy) {
   case MVT::i32: {
     RegList = RRegList;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 27cf06b995a0..233516415149 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -125,6 +125,8 @@ def CC_ARM_AAPCS_Common : CallingConv<[
   CCIfType<[i32], CCAssignToStackWithShadow<4, 4, [R0, R1, R2, R3]>>,
   CCIfType<[f32], CCAssignToStackWithShadow<4, 4, [Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToStackWithShadow<8, 8, [Q0, Q1, Q2, Q3]>>,
+  CCIfType<[v2f64], CCIfAlign<"16",
+           CCAssignToStackWithShadow<16, 16, [Q0, Q1, Q2, Q3]>>>,
   CCIfType<[v2f64], CCAssignToStackWithShadow<16, 8, [Q0, Q1, Q2, Q3]>>
 ]>;
 
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index f4ec8c67c977..e89757c19ecc 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -342,7 +342,7 @@ void ARMConstantIslands::verify() {
 #ifndef NDEBUG
   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
        MBBI != E; ++MBBI) {
-    MachineBasicBlock *MBB = MBBI;
+    MachineBasicBlock *MBB = &*MBBI;
     unsigned MBBId = MBB->getNumber();
     assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
   }
@@ -542,7 +542,7 @@ ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs)
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const DataLayout &TD = *MF->getTarget().getDataLayout();
+  const DataLayout &TD = MF->getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     assert(Size >= 4 && "Too small constant pool entry");
@@ -589,6 +589,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
   MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
   for (MachineBasicBlock &MBB : *MF) {
     auto MI = MBB.getLastNonDebugInstr();
+    if (MI == MBB.end())
+      continue;
 
     unsigned JTOpcode;
     switch (MI->getOpcode()) {
@@ -639,12 +641,12 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
 /// into the block immediately after it.
 bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
   // Get the next machine basic block in the function.
-  MachineFunction::iterator MBBI = MBB;
+  MachineFunction::iterator MBBI = MBB->getIterator();
   // Can't fall off end of function.
   if (std::next(MBBI) == MBB->getParent()->end())
     return false;
 
-  MachineBasicBlock *NextBB = std::next(MBBI);
+  MachineBasicBlock *NextBB = &*std::next(MBBI);
   if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
     return false;
 
@@ -722,15 +724,15 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
   // has any inline assembly in it. If so, we have to be conservative about
   // alignment assumptions, as we don't know for sure the size of any
   // instructions in the inline assembly.
-  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
-    computeBlockSize(I);
+  for (MachineBasicBlock &MBB : *MF)
+    computeBlockSize(&MBB);
 
   // The known bits of the entry block offset are determined by the function
   // alignment.
   BBInfo.front().KnownBits = MF->getAlignment();
 
   // Compute block offsets and known bits.
-  adjustBBOffsetsAfter(MF->begin());
+  adjustBBOffsetsAfter(&MF->front());
 
   // Now go back through the instructions and build up our data structures.
   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -968,7 +970,7 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
     MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
-  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MachineFunction::iterator MBBI = ++OrigBB->getIterator();
   MF->insert(MBBI, NewBB);
 
   // Splice the instructions starting with MI over to NewBB.
@@ -1088,7 +1090,7 @@ bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
   unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
   unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
   unsigned NextBlockOffset, NextBlockAlignment;
-  MachineFunction::const_iterator NextBlock = Water;
+  MachineFunction::const_iterator NextBlock = Water->getIterator();
   if (++NextBlock == MF->end()) {
     NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
     NextBlockAlignment = 0;
@@ -1350,7 +1352,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
     if (isOffsetInRange(UserOffset, CPEOffset, U)) {
       DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
             << format(", expected CPE offset %#x\n", CPEOffset));
-      NewMBB = std::next(MachineFunction::iterator(UserMBB));
+      NewMBB = &*++UserMBB->getIterator();
       // Add an unconditional branch from UserMBB to fallthrough block.  Record
       // it for branch lengthening; this new branch will not get out of range,
       // but if the preceding conditional branch is out of range, the targets
@@ -1503,8 +1505,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
       NewWaterList.insert(NewIsland);
 
     // The new CPE goes before the following block (NewMBB).
-    NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+    NewMBB = &*++WaterBB->getIterator();
   } else {
     // No water found.
     DEBUG(dbgs() << "No water found\n");
@@ -1515,7 +1516,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
     // next iteration for constant pools, but in this context, we don't want
     // it.  Check for this so it will be removed from the WaterList.
     // Also remove any entry from NewWaterList.
-    MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+    MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
     IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
     if (IP != WaterList.end())
       NewWaterList.erase(WaterBB);
@@ -1532,7 +1533,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
     WaterList.erase(IP);
 
   // Okay, we know we can put an island before NewMBB now, do it!
-  MF->insert(NewMBB, NewIsland);
+  MF->insert(NewMBB->getIterator(), NewIsland);
 
   // Update internal data structures to account for the newly inserted MBB.
   updateForInsertedWaterBlock(NewIsland);
@@ -1553,7 +1554,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
 
   // Increase the size of the island block to account for the new entry.
   BBInfo[NewIsland->getNumber()].Size += Size;
-  adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
+  adjustBBOffsetsAfter(&*--NewIsland->getIterator());
 
   // Finally, change the CPI in the instruction operand to be ID.
   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1732,7 +1733,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
     MBB->back().eraseFromParent();
     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
   }
-  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *NextBB = &*++MBB->getIterator();
 
   DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                << " also invert condition and change dest. to BB#"
@@ -2058,9 +2059,9 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
 /// \brief Returns whether CPEMI is the first instruction in the block
 /// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
 /// we can switch the first register to PC and usually remove the address
-/// calculation that preceeded it.
+/// calculation that preceded it.
 static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
-  MachineFunction::iterator MBB = JTMI->getParent();
+  MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
   MachineFunction *MF = MBB->getParent();
   ++MBB;
 
@@ -2235,7 +2236,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 4> Cond;
   SmallVector<MachineOperand, 4> CondPrior;
-  MachineFunction::iterator BBi = BB;
+  MachineFunction::iterator BBi = BB->getIterator();
   MachineFunction::iterator OldPrior = std::prev(BBi);
 
   // If the block terminator isn't analyzable, don't try to move the block
@@ -2258,7 +2259,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
   // Create a new MBB for the code after the jump BB.
   MachineBasicBlock *NewBB =
     MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
-  MachineFunction::iterator MBBI = JTBB; ++MBBI;
+  MachineFunction::iterator MBBI = ++JTBB->getIterator();
   MF->insert(MBBI, NewBB);
 
   // Add an unconditional branch from NewBB to BB.
@@ -2273,8 +2274,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
 
   // Update the CFG.
   NewBB->addSuccessor(BB);
-  JTBB->removeSuccessor(BB);
-  JTBB->addSuccessor(NewBB);
+  JTBB->replaceSuccessor(BB, NewBB);
 
   ++NumJTInserted;
   return NewBB;
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index 7d41c69f08b8..c9849b2605ea 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -52,8 +52,7 @@ const char *ARMConstantPoolValue::getModifierText() const {
     // strings if that's legal.
   case ARMCP::no_modifier: return "none";
   case ARMCP::TLSGD:       return "tlsgd";
-  case ARMCP::GOT:         return "GOT";
-  case ARMCP::GOTOFF:      return "GOTOFF";
+  case ARMCP::GOT_PREL:    return "GOT_PREL";
   case ARMCP::GOTTPOFF:    return "gottpoff";
   case ARMCP::TPOFF:       return "tpoff";
   }
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 36f63e239a9e..6b18a4e52878 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -39,8 +39,7 @@ namespace ARMCP {
   enum ARMCPModifier {
     no_modifier,
     TLSGD,
-    GOT,
-    GOTOFF,
+    GOT_PREL,
     GOTTPOFF,
     TPOFF
   };
@@ -103,8 +102,6 @@ public:
   bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
   bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
 
-  unsigned getRelocationInfo() const override { return 2; }
-
   int getExistingMachineCPValue(MachineConstantPool *CP,
                                 unsigned Alignment) override;
 
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 4438f50758dc..56f3498e1204 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -330,22 +330,19 @@ static const NEONLdStTableEntry NEONLdStTable[] = {
 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
 /// load or store pseudo instruction.
 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
-  const unsigned NumEntries = array_lengthof(NEONLdStTable);
-
 #ifndef NDEBUG
   // Make sure the table is sorted.
   static bool TableChecked = false;
   if (!TableChecked) {
-    for (unsigned i = 0; i != NumEntries-1; ++i)
-      assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
-             "NEONLdStTable is not sorted!");
+    assert(std::is_sorted(std::begin(NEONLdStTable), std::end(NEONLdStTable)) &&
+           "NEONLdStTable is not sorted!");
     TableChecked = true;
   }
 #endif
 
-  const NEONLdStTableEntry *I =
-    std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
-  if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+  auto I = std::lower_bound(std::begin(NEONLdStTable),
+                            std::end(NEONLdStTable), Opcode);
+  if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
     return I;
   return nullptr;
 }
@@ -734,7 +731,7 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
   HI16.addImm(Pred).addReg(PredReg);
 
   if (RequiresBundling)
-    finalizeBundle(MBB, &*LO16, &*MBBI);
+    finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
 
   TransferImpOps(MI, LO16, HI16);
   MI.eraseFromParent();
@@ -747,6 +744,55 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   switch (Opcode) {
     default:
       return false;
+
+    case ARM::TCRETURNdi:
+    case ARM::TCRETURNri: {
+      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      assert(MBBI->isReturn() &&
+             "Can only insert epilog into returning blocks");
+      unsigned RetOpcode = MBBI->getOpcode();
+      DebugLoc dl = MBBI->getDebugLoc();
+      const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
+          MBB.getParent()->getSubtarget().getInstrInfo());
+
+      // Tail call return: adjust the stack pointer and jump to callee.
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+      // Jump to label or value in register.
+      if (RetOpcode == ARM::TCRETURNdi) {
+        unsigned TCOpcode =
+            STI->isThumb()
+                ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
+                : ARM::TAILJMPd;
+        MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+        if (JumpTarget.isGlobal())
+          MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                               JumpTarget.getTargetFlags());
+        else {
+          assert(JumpTarget.isSymbol());
+          MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                                JumpTarget.getTargetFlags());
+        }
+
+        // Add the default predicate in Thumb mode.
+        if (STI->isThumb())
+          MIB.addImm(ARMCC::AL).addReg(0);
+      } else if (RetOpcode == ARM::TCRETURNri) {
+        BuildMI(MBB, MBBI, dl,
+                TII.get(STI->isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr))
+            .addReg(JumpTarget.getReg(), RegState::Kill);
+      }
+
+      MachineInstr *NewMI = std::prev(MBBI);
+      for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+        NewMI->addOperand(MBBI->getOperand(i));
+
+      // Delete the pseudo instruction TCRETURN.
+      MBB.erase(MBBI);
+      MBBI = NewMI;
+      return true;
+    }
     case ARM::VMOVScc:
     case ARM::VMOVDcc: {
       unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD;
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index fdd0763ea608..9bdf823c85bd 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -922,12 +922,9 @@ void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
   if (Addr.BaseType == Address::FrameIndexBase) {
     int FI = Addr.Base.FI;
     int Offset = Addr.Offset;
-    MachineMemOperand *MMO =
-          FuncInfo.MF->getMachineMemOperand(
-                                  MachinePointerInfo::getFixedStack(FI, Offset),
-                                  Flags,
-                                  MFI.getObjectSize(FI),
-                                  MFI.getObjectAlignment(FI));
+    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
+        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
     // Now add the rest of the operands.
     MIB.addFrameIndex(FI);
 
@@ -1278,8 +1275,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
-      fastEmitBranch(FBB, DbgLoc);
-      FuncInfo.MBB->addSuccessor(TBB);
+      finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1303,8 +1299,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
 
-      fastEmitBranch(FBB, DbgLoc);
-      FuncInfo.MBB->addSuccessor(TBB);
+      finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
   } else if (const ConstantInt *CI =
@@ -1341,8 +1336,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) {
   unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
-  fastEmitBranch(FBB, DbgLoc);
-  FuncInfo.MBB->addSuccessor(TBB);
+  finishCondBranch(BI->getParent(), TBB, FBB);
   return true;
 }
 
@@ -1355,8 +1349,8 @@ bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
                           TII.get(Opc)).addReg(AddrReg));
 
   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
-  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
-    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+  for (const BasicBlock *SuccBB : IB->successors())
+    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
 
   return true;
 }
@@ -1860,8 +1854,9 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else
         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
-    } else
-        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    } else {
+      return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+    }
   case CallingConv::ARM_AAPCS_VFP:
     if (!isVarArg)
       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
@@ -2944,48 +2939,51 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
 
 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
                                      unsigned Align, MVT VT) {
-  bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
-  ARMConstantPoolConstant *CPV =
-    ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
-  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+  bool UseGOT_PREL =
+      !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
 
-  unsigned Opc;
-  unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
-  // Load value.
-  if (isThumb2) {
-    DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                            TII.get(ARM::t2LDRpci), DestReg1)
-                    .addConstantPoolIndex(Idx));
-    Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
-  } else {
-    // The extra immediate is for addrmode2.
-    DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-                            DbgLoc, TII.get(ARM::LDRcp), DestReg1)
-                    .addConstantPoolIndex(Idx).addImm(0));
-    Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
-  }
+  LLVMContext *Context = &MF->getFunction()->getContext();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+  ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+      GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+      UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+      /*AddCurrentAddress=*/UseGOT_PREL);
 
-  unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
-  if (GlobalBaseReg == 0) {
-    GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
-    AFI->setGlobalBaseReg(GlobalBaseReg);
-  }
+  unsigned ConstAlign =
+      MF->getDataLayout().getPrefTypeAlignment(Type::getInt32PtrTy(*Context));
+  unsigned Idx = MF->getConstantPool()->getConstantPoolIndex(CPV, ConstAlign);
 
-  unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
-  DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
-  DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
-  GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
-  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
-                                    DbgLoc, TII.get(Opc), DestReg2)
-                            .addReg(DestReg1)
-                            .addReg(GlobalBaseReg);
-  if (!UseGOTOFF)
+  unsigned TempReg = MF->getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
+  unsigned Opc = isThumb2 ? ARM::t2LDRpci : ARM::LDRcp;
+  MachineInstrBuilder MIB =
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), TempReg)
+          .addConstantPoolIndex(Idx);
+  if (Opc == ARM::LDRcp)
     MIB.addImm(0);
-  AddOptionalDefs(MIB);
+  AddDefaultPred(MIB);
 
-  return DestReg2;
+  // Fix the address by adding pc.
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  Opc = Subtarget->isThumb() ? ARM::tPICADD : UseGOT_PREL ? ARM::PICLDR
+                                                          : ARM::PICADD;
+  DestReg = constrainOperandRegClass(TII.get(Opc), DestReg, 0);
+  MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+            .addReg(TempReg)
+            .addImm(ARMPCLabelIndex);
+  if (!Subtarget->isThumb())
+    AddDefaultPred(MIB);
+
+  if (UseGOT_PREL && Subtarget->isThumb()) {
+    unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+                  TII.get(ARM::t2LDRi12), NewDestReg)
+              .addReg(DestReg)
+              .addImm(0);
+    DestReg = NewDestReg;
+    AddOptionalDefs(MIB);
+  }
+  return DestReg;
 }
 
 bool ARMFastISel::fastLowerArguments() {
@@ -3038,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() {
   }
 
 
-  static const uint16_t GPRArgRegs[] = {
+  static const MCPhysReg GPRArgRegs[] = {
     ARM::R0, ARM::R1, ARM::R2, ARM::R3
   };
 
@@ -3055,7 +3053,7 @@ bool ARMFastISel::fastLowerArguments() {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY),
             ResultReg).addReg(DstReg, getKillRegState(true));
-    updateValueMap(I, ResultReg);
+    updateValueMap(&*I, ResultReg);
   }
 
   return true;
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 6744000afe2b..c5990bb7d1fb 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCContext.h"
@@ -58,7 +59,7 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
   // iOS requires FP not to be clobbered for backtracing purpose.
-  if (STI.isTargetIOS())
+  if (STI.isTargetIOS() || STI.isTargetWatchOS())
     return true;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -288,7 +289,6 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 
 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -305,7 +305,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
   unsigned NumBytes = MFI->getStackSize();
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc dl;
+  
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
   // Determine the sizes of each callee-save spill areas and record which frame
@@ -489,7 +493,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
 
   if (NumBytes) {
     // Adjust SP after all the callee-save spills.
-    if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
+    if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
+        tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes))
       DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
     else {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
@@ -689,60 +694,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     AFI->setShouldRestoreSPFromFP(true);
 }
 
-// Resolve TCReturn pseudo-instruction
-void ARMFrameLowering::fixTCReturn(MachineFunction &MF,
-                                   MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl = MBBI->getDebugLoc();
-  const ARMBaseInstrInfo &TII =
-      *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
-
-  if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri))
-    return;
-
-  // Tail call return: adjust the stack pointer and jump to callee.
-  MBBI = MBB.getLastNonDebugInstr();
-  MachineOperand &JumpTarget = MBBI->getOperand(0);
-
-  // Jump to label or value in register.
-  if (RetOpcode == ARM::TCRETURNdi) {
-    unsigned TCOpcode = STI.isThumb() ?
-             (STI.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) :
-             ARM::TAILJMPd;
-    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
-    if (JumpTarget.isGlobal())
-      MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
-                           JumpTarget.getTargetFlags());
-    else {
-      assert(JumpTarget.isSymbol());
-      MIB.addExternalSymbol(JumpTarget.getSymbolName(),
-                            JumpTarget.getTargetFlags());
-    }
-
-    // Add the default predicate in Thumb mode.
-    if (STI.isThumb()) MIB.addImm(ARMCC::AL).addReg(0);
-  } else if (RetOpcode == ARM::TCRETURNri) {
-    BuildMI(MBB, MBBI, dl,
-            TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)).
-      addReg(JumpTarget.getReg(), RegState::Kill);
-  }
-
-  MachineInstr *NewMI = std::prev(MBBI);
-  for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
-    NewMI->addOperand(MBBI->getOperand(i));
-
-  // Delete the pseudo instruction TCRETURN.
-  MBB.erase(MBBI);
-  MBBI = NewMI;
-}
-
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
@@ -758,10 +711,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // All calls are tail calls in GHC calling conv, and functions have no
   // prologue/epilogue.
-  if (MF.getFunction()->getCallingConv() == CallingConv::GHC) {
-    fixTCReturn(MF, MBB);
+  if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
-  }
+
+  // First put ourselves on the first (from top) terminator instructions.
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   if (!AFI->hasStackFrame()) {
     if (NumBytes - ArgRegsSaveSize != 0)
@@ -840,8 +795,6 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
   }
 
-  fixTCReturn(MF, MBB);
-
   if (ArgRegsSaveSize)
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, ArgRegsSaveSize);
 }
@@ -932,12 +885,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
   return Offset;
 }
 
-int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                          int FI) const {
-  unsigned FrameReg;
-  return getFrameIndexReference(MF, FI, FrameReg);
-}
-
 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     MachineBasicBlock::iterator MI,
                                     const std::vector<CalleeSavedInfo> &CSI,
@@ -950,7 +897,6 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   DebugLoc DL;
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
 
   SmallVector<std::pair<unsigned,bool>, 4> Regs;
   unsigned i = CSI.size();
@@ -1008,7 +954,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
     // Put any subsequent vpush instructions before this one: they will refer to
     // higher register numbers so need to be pushed first in order to preserve
     // monotonicity.
-    --MI;
+    if (MI != MBB.begin())
+      --MI;
   }
 }
 
@@ -1022,12 +969,20 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc DL = MI->getDebugLoc();
-  unsigned RetOpcode = MI->getOpcode();
-  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
-                     RetOpcode == ARM::TCRETURNri);
-  bool isInterrupt =
-      RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+  DebugLoc DL;
+  bool isTailCall = false;
+  bool isInterrupt = false;
+  bool isTrap = false;
+  if (MBB.end() != MI) {
+    DL = MI->getDebugLoc();
+    unsigned RetOpcode = MI->getOpcode();
+    isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri);
+    isInterrupt =
+        RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
+    isTrap =
+        RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
+        RetOpcode == ARM::tTRAP;
+  }
 
   SmallVector<unsigned, 4> Regs;
   unsigned i = CSI.size();
@@ -1043,11 +998,14 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
         continue;
 
       if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
-          STI.hasV5TOps()) {
-        Reg = ARM::PC;
-        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+          !isTrap && STI.hasV5TOps()) {
+        if (MBB.succ_empty()) {
+          Reg = ARM::PC;
+          DeleteRet = true;
+          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        } else
+          LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
         // Fold the return instruction into the LDM.
-        DeleteRet = true;
       }
 
       // If NoGap is true, pop consecutive registers and then leave the rest
@@ -1068,7 +1026,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                        .addReg(ARM::SP));
       for (unsigned i = 0, e = Regs.size(); i < e; ++i)
         MIB.addReg(Regs[i], getDefRegState(true));
-      if (DeleteRet) {
+      if (DeleteRet && MI != MBB.end()) {
         MIB.copyImplicitOps(&*MI);
         MI->eraseFromParent();
       }
@@ -1095,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
 
     // Put any subsequent vpop instructions after this one: they will refer to
     // higher register numbers so need to be popped afterwards.
-    ++MI;
+    if (MI != MBB.end())
+      ++MI;
   }
 }
 
@@ -1109,7 +1068,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
                                     const TargetRegisterInfo *TRI) {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
@@ -1118,7 +1077,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
   // slot offsets can be wrong. The offset for d8 will always be correct.
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned DNum = CSI[i].getReg() - ARM::D8;
-    if (DNum >= 8)
+    if (DNum > NumAlignedDPRCS2Regs - 1)
       continue;
     int FI = CSI[i].getFrameIdx();
     // The even-numbered registers will be 16-byte aligned, the odd-numbered
@@ -1269,7 +1228,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,
                                       const TargetRegisterInfo *TRI) {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
 
   // Find the frame index assigned to d8.
@@ -1654,13 +1613,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // FIXME: We could add logic to be more precise about negative offsets
   //        and which instructions will need a scratch register for them. Is it
   //        worth the effort and added fragility?
-  bool BigStack =
-    (RS &&
-     (MFI->estimateStackSize(MF) +
-      ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
-      estimateRSStackSizeLimit(MF, this)))
-    || MFI->hasVarSizedObjects()
-    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+  bool BigStack = (RS && (MFI->estimateStackSize(MF) +
+                              ((hasFP(MF) && AFI->hasStackFrame()) ? 4 : 0) >=
+                          estimateRSStackSizeLimit(MF, this))) ||
+                  MFI->hasVarSizedObjects() ||
+                  (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
 
   bool ExtraCSSpill = false;
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
@@ -1698,8 +1655,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
         for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
           unsigned Reg = UnspilledCS1GPRs[i];
-          // Don't spill high register if the function is thumb
+          // Don't spill high register if the function is thumb.  In the case of
+          // Windows on ARM, accept R11 (frame pointer)
           if (!AFI->isThumbFunction() ||
+              (STI.isTargetWindows() && Reg == ARM::R11) ||
               isARMLowRegister(Reg) || Reg == ARM::LR) {
             SavedRegs.set(Reg);
             if (!MRI.isReserved(Reg))
@@ -1784,8 +1743,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
       // alignment boundary.
-      unsigned Align = getStackAlignment();
-      Amount = (Amount+Align-1)/Align*Align;
+      Amount = alignSPAdjust(Amount);
 
       ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
       assert(!AFI->isThumb1OnlyFunction() &&
@@ -1885,7 +1843,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
     report_fatal_error("Segmented stacks not supported on this platform.");
 
-  assert(&PrologueMBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineFrameInfo *MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   MCContext &Context = MMI.getContext();
@@ -1913,21 +1870,48 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();
   MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();
 
-  for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
-                                          e = PrologueMBB.livein_end();
-       i != e; ++i) {
-    AllocMBB->addLiveIn(*i);
-    GetMBB->addLiveIn(*i);
-    McrMBB->addLiveIn(*i);
-    PrevStackMBB->addLiveIn(*i);
-    PostStackMBB->addLiveIn(*i);
+  // Grab everything that reaches PrologueMBB to update there liveness as well.
+  SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
+  SmallVector<MachineBasicBlock *, 2> WalkList;
+  WalkList.push_back(&PrologueMBB);
+
+  do {
+    MachineBasicBlock *CurMBB = WalkList.pop_back_val();
+    for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
+      if (BeforePrologueRegion.insert(PredBB).second)
+        WalkList.push_back(PredBB);
+    }
+  } while (!WalkList.empty());
+
+  // The order in that list is important.
+  // The blocks will all be inserted before PrologueMBB using that order.
+  // Therefore the block that should appear first in the CFG should appear
+  // first in the list.
+  MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
+                                      PostStackMBB};
+
+  for (MachineBasicBlock *B : AddedBlocks)
+    BeforePrologueRegion.insert(B);
+
+  for (const auto &LI : PrologueMBB.liveins()) {
+    for (MachineBasicBlock *PredBB : BeforePrologueRegion)
+      PredBB->addLiveIn(LI);
   }
 
-  MF.push_front(PostStackMBB);
-  MF.push_front(AllocMBB);
-  MF.push_front(GetMBB);
-  MF.push_front(McrMBB);
-  MF.push_front(PrevStackMBB);
+  // Remove the newly added blocks from the list, since we know
+  // we do not have to do the following updates for them.
+  for (MachineBasicBlock *B : AddedBlocks) {
+    BeforePrologueRegion.erase(B);
+    MF.insert(PrologueMBB.getIterator(), B);
+  }
+
+  for (MachineBasicBlock *MBB : BeforePrologueRegion) {
+    // Make sure the LiveIns are still sorted and unique.
+    MBB->sortUniqueLiveIns();
+    // Replace the edges to PrologueMBB by edges to the sequences
+    // we are about to add.
+    MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
+  }
 
   // The required stack size that is aligned to ARM constant criterion.
   AlignedStackSize = alignToARMConstant(StackSize);
@@ -1991,7 +1975,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
     ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(
         MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0);
     MachineConstantPool *MCP = MF.getConstantPool();
-    unsigned CPI = MCP->getConstantPoolIndex(NewCPV, MF.getAlignment());
+    unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4);
 
     // ldr SR0, [pc, offset(STACK_LIMIT)]
     AddDefaultPred(BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 6fdc5eff5e47..66f4dfb6ef52 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -31,8 +31,6 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
-  void fixTCReturn(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -52,7 +50,6 @@ public:
                              unsigned &FrameReg) const override;
   int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
                                  unsigned &FrameReg, int SPAdj) const;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
 
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
@@ -60,6 +57,11 @@ public:
   void adjustForSegmentedStacks(MachineFunction &MF,
                                 MachineBasicBlock &MBB) const override;
 
+  /// Returns true if the target will correctly handle shrink wrapping.
+  bool enableShrinkWrapping(const MachineFunction &MF) const override {
+    return true;
+  }
+
  private:
   void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                     const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index b110628a0a86..024244092a34 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -160,11 +160,6 @@ public:
 
   // Thumb Addressing Modes:
   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
-  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
-                             unsigned Scale);
-  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
-  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
-  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
                                 SDValue &OffImm);
   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
@@ -176,8 +171,6 @@ public:
   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
 
   // Thumb 2 Addressing Modes:
-  bool SelectT2ShifterOperandReg(SDValue N,
-                                 SDValue &BaseReg, SDValue &Opc);
   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
                             SDValue &OffImm);
@@ -278,6 +271,22 @@ private:
   // Get the alignment operand for a NEON VLD or VST instruction.
   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
                         bool is64BitVector);
+
+  /// Returns the number of instructions required to materialize the given
+  /// constant in a register, or 3 if a literal pool load is needed.
+  unsigned ConstantMaterializationCost(unsigned Val) const;
+
+  /// Checks if N is a multiplication by a constant where we can extract out a
+  /// power of two from the constant so that it can be used in a shift, but only
+  /// if it simplifies the materialization of the constant. Returns true if it
+  /// is, and assigns to PowerOfTwo the power of two that should be extracted
+  /// out and to NewMulConst the new constant to be multiplied by.
+  bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
+                              unsigned &PowerOfTwo, SDValue &NewMulConst) const;
+
+  /// Replace N with M in CurDAG, in a way that also ensures that M gets
+  /// selected when N would have been selected.
+  void replaceDAGValue(const SDValue &N, SDValue M);
 };
 }
 
@@ -334,7 +343,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
   bool isThumb2 = Subtarget->isThumb();
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
-    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 
     if (N->getOpcode() != ISD::ADD)
       continue;
@@ -388,7 +397,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
     SDValue CPTmp1;
     SDValue CPTmp2;
     if (isThumb2) {
-      if (SelectT2ShifterOperandReg(N0, CPTmp0, CPTmp1))
+      if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
         continue;
     } else {
       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
@@ -471,6 +480,61 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
 }
 
+unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
+  if (Subtarget->isThumb()) {
+    if (Val <= 255) return 1;                               // MOV
+    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+    if (~Val <= 255) return 2;                              // MOV + MVN
+    if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
+  } else {
+    if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
+    if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
+    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+    if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
+  }
+  if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
+  return 3; // Literal pool load
+}
+
+bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
+                                             unsigned MaxShift,
+                                             unsigned &PowerOfTwo,
+                                             SDValue &NewMulConst) const {
+  assert(N.getOpcode() == ISD::MUL);
+  assert(MaxShift > 0);
+
+  // If the multiply is used in more than one place then changing the constant
+  // will make other uses incorrect, so don't.
+  if (!N.hasOneUse()) return false;
+  // Check if the multiply is by a constant
+  ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!MulConst) return false;
+  // If the constant is used in more than one place then modifying it will mean
+  // we need to materialize two constants instead of one, which is a bad idea.
+  if (!MulConst->hasOneUse()) return false;
+  unsigned MulConstVal = MulConst->getZExtValue();
+  if (MulConstVal == 0) return false;
+
+  // Find the largest power of 2 that MulConstVal is a multiple of
+  PowerOfTwo = MaxShift;
+  while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
+    --PowerOfTwo;
+    if (PowerOfTwo == 0) return false;
+  }
+
+  // Only optimise if the new cost is better
+  unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
+  NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
+  unsigned OldCost = ConstantMaterializationCost(MulConstVal);
+  unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
+  return NewCost < OldCost;
+}
+
+void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
+  CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
+  CurDAG->ReplaceAllUsesWith(N, M);
+}
+
 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
                                               SDValue &BaseReg,
                                               SDValue &Opc,
@@ -478,6 +542,24 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
   if (DisableShifterOp)
     return false;
 
+  // If N is a multiply-by-constant and it's profitable to extract a shift and
+  // use it in a shifted operand do so.
+  if (N.getOpcode() == ISD::MUL) {
+    unsigned PowerOfTwo = 0;
+    SDValue NewMulConst;
+    if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
+      BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
+                                               N.getOperand(0), NewMulConst)
+                                   .getNode()),
+                        0);
+      replaceDAGValue(N.getOperand(1), NewMulConst);
+      Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
+                                                          PowerOfTwo),
+                                      SDLoc(N), MVT::i32);
+      return true;
+    }
+  }
+
   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
 
   // Don't match base register only case. That is matched to a separate
@@ -662,6 +744,18 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
     }
   }
 
+  // If Offset is a multiply-by-constant and it's profitable to extract a shift
+  // and use it in a shifted operand do so.
+  if (Offset.getOpcode() == ISD::MUL) {
+    unsigned PowerOfTwo = 0;
+    SDValue NewMulConst;
+    if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
+      replaceDAGValue(Offset.getOperand(1), NewMulConst);
+      ShAmt = PowerOfTwo;
+      ShOpcVal = ARM_AM::lsl;
+    }
+  }
+
   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
                                   SDLoc(N), MVT::i32);
   return true;
@@ -1085,78 +1179,14 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
   return true;
 }
 
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
-                                       SDValue &Offset, unsigned Scale) {
-  if (Scale == 4) {
-    SDValue TmpBase, TmpOffImm;
-    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
-      return false;  // We want to select tLDRspi / tSTRspi instead.
-
-    if (N.getOpcode() == ARMISD::Wrapper &&
-        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
-      return false;  // We want to select tLDRpci instead.
-  }
-
-  if (!CurDAG->isBaseWithConstantOffset(N))
-    return false;
-
-  // Thumb does not have [sp, r] address mode.
-  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
-  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
-  if ((LHSR && LHSR->getReg() == ARM::SP) ||
-      (RHSR && RHSR->getReg() == ARM::SP))
-    return false;
-
-  // FIXME: Why do we explicitly check for a match here and then return false?
-  // Presumably to allow something else to match, but shouldn't this be
-  // documented?
-  int RHSC;
-  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
-    return false;
-
-  Base = N.getOperand(0);
-  Offset = N.getOperand(1);
-  return true;
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
-                                          SDValue &Base,
-                                          SDValue &Offset) {
-  return SelectThumbAddrModeRI(N, Base, Offset, 1);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
-                                          SDValue &Base,
-                                          SDValue &Offset) {
-  return SelectThumbAddrModeRI(N, Base, Offset, 2);
-}
-
-bool
-ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
-                                          SDValue &Base,
-                                          SDValue &Offset) {
-  return SelectThumbAddrModeRI(N, Base, Offset, 4);
-}
-
 bool
 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
                                           SDValue &Base, SDValue &OffImm) {
-  if (Scale == 4) {
-    SDValue TmpBase, TmpOffImm;
-    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
-      return false;  // We want to select tLDRspi / tSTRspi instead.
-
-    if (N.getOpcode() == ARMISD::Wrapper &&
-        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
-      return false;  // We want to select tLDRpci instead.
-  }
-
   if (!CurDAG->isBaseWithConstantOffset(N)) {
-    if (N.getOpcode() == ARMISD::Wrapper &&
-        N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+    if (N.getOpcode() == ISD::ADD) {
+      return false; // We want to select register offset instead
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
       Base = N.getOperand(0);
     } else {
       Base = N;
@@ -1166,23 +1196,6 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
     return true;
   }
 
-  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
-  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
-  if ((LHSR && LHSR->getReg() == ARM::SP) ||
-      (RHSR && RHSR->getReg() == ARM::SP)) {
-    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
-    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
-    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
-    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
-
-    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
-    if (LHSC != 0 || RHSC != 0) return false;
-
-    Base = N;
-    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
-    return true;
-  }
-
   // If the RHS is + imm5 * scale, fold into addr mode.
   int RHSC;
   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
@@ -1191,9 +1204,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
     return true;
   }
 
-  Base = N.getOperand(0);
-  OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
-  return true;
+  // Offset is too large, so use register offset instead.
+  return false;
 }
 
 bool
@@ -1263,28 +1275,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
 //===----------------------------------------------------------------------===//
 
 
-bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
-                                                SDValue &Opc) {
-  if (DisableShifterOp)
-    return false;
-
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    ShImmVal = RHS->getZExtValue() & 31;
-    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), SDLoc(N));
-    return true;
-  }
-
-  return false;
-}
-
 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
                                             SDValue &Base, SDValue &OffImm) {
   // Match simple R + imm12 operands.
@@ -1425,6 +1415,17 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
     }
   }
 
+  // If OffReg is a multiply-by-constant and it's profitable to extract a shift
+  // and use it in a shifted operand do so.
+  if (OffReg.getOpcode() == ISD::MUL) {
+    unsigned PowerOfTwo = 0;
+    SDValue NewMulConst;
+    if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
+      replaceDAGValue(OffReg.getOperand(1), NewMulConst);
+      ShAmt = PowerOfTwo;
+    }
+  }
+
   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
 
   return true;
@@ -2503,25 +2504,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::Constant: {
     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
-    bool UseCP = true;
-    if (Subtarget->useMovt(*MF))
-      // Thumb2-aware targets have the MOVT instruction, so all immediates can
-      // be done with MOV + MOVT, at worst.
-      UseCP = false;
-    else {
-      if (Subtarget->isThumb()) {
-        UseCP = (Val > 255 &&                                  // MOV
-                 ~Val > 255 &&                                 // MOV + MVN
-                 !ARM_AM::isThumbImmShiftedVal(Val) &&         // MOV + LSL
-                 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
-      } else
-        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&             // MOV
-                 ARM_AM::getSOImmVal(~Val) == -1 &&            // MVN
-                 !ARM_AM::isSOImmTwoPartVal(Val) &&            // two instrs.
-                 !(Subtarget->hasV6T2Ops() && Val <= 0xffff)); // MOVW
-    }
-
-    if (UseCP) {
+    // If we can't materialize the constant we need to use a literal pool
+    if (ConstantMaterializationCost(Val) > 2) {
       SDValue CPIdx = CurDAG->getTargetConstantPool(
           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
           TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -3376,7 +3360,7 @@ static void getIntOperandsFromRegisterString(StringRef RegString,
                                              SelectionDAG *CurDAG, SDLoc DL,
                                              std::vector<SDValue>& Ops) {
   SmallVector<StringRef, 5> Fields;
-  RegString.split(Fields, ":");
+  RegString.split(Fields, ':');
 
   if (Fields.size() > 1) {
     bool AllIntFields = true;
@@ -3461,9 +3445,9 @@ static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
 // The flags here are common to those allowed for apsr in the A class cores and
 // those allowed for the special registers in the M class cores. Returns a
 // value representing which flags were present, -1 if invalid.
-static inline int getMClassFlagsMask(StringRef Flags) {
+static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
   if (Flags.empty())
-    return 0x3;
+    return 0x2 | (int)hasDSP;
 
   return StringSwitch<int>(Flags)
           .Case("g", 0x1)
@@ -3492,7 +3476,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
   }
 
   // We know we are now handling a write so need to get the mask for the flags.
-  int Mask = getMClassFlagsMask(Flags);
+  int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
 
   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
   // shouldn't have flags present.
@@ -3501,7 +3485,7 @@ static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
 
   // The _g and _nzcvqg versions are only valid if the DSP extension is
   // available.
-  if (!Subtarget->hasThumb2DSP() && (Mask & 0x2))
+  if (!Subtarget->hasDSP() && (Mask & 0x1))
     return -1;
 
   // The register was valid so need to put the mask in the correct place
@@ -3523,7 +3507,7 @@ static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
     // The flags permitted for apsr are the same flags that are allowed in
     // M class registers. We get the flag value and then shift the flags into
     // the correct place to combine with the mask.
-    Mask = getMClassFlagsMask(Flags);
+    Mask = getMClassFlagsMask(Flags, true);
     if (Mask == -1)
       return -1;
     return Mask << 2;
@@ -3742,7 +3726,7 @@ SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
   }
 
   SmallVector<StringRef, 5> Fields;
-  StringRef(SpecialReg).split(Fields, "_", 1, false);
+  StringRef(SpecialReg).split(Fields, '_', 1, false);
   std::string Reg = Fields[0].str();
   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
 
@@ -3943,6 +3927,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
     //        be an immediate and not a memory constraint.
     // Fallthrough.
   case InlineAsm::Constraint_m:
+  case InlineAsm::Constraint_o:
   case InlineAsm::Constraint_Q:
   case InlineAsm::Constraint_Um:
   case InlineAsm::Constraint_Un:
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 8cc06df71633..9cfb06b00c4b 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -142,6 +142,11 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
   setOperationAction(ISD::SREM, VT, Expand);
   setOperationAction(ISD::UREM, VT, Expand);
   setOperationAction(ISD::FREM, VT, Expand);
+
+  if (!VT.isFloatingPoint() &&
+      VT != MVT::v2i64 && VT != MVT::v1i64)
+    for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
+      setOperationAction(Opcode, VT, Legal);
 }
 
 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
@@ -166,77 +171,78 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     // Uses VFP for Thumb libfuncs if available.
     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
         Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
-      // Single-precision floating-point arithmetic.
-      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
-      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
-      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
-      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+      static const struct {
+        const RTLIB::Libcall Op;
+        const char * const Name;
+        const ISD::CondCode Cond;
+      } LibraryCalls[] = {
+        // Single-precision floating-point arithmetic.
+        { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
 
-      // Double-precision floating-point arithmetic.
-      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
-      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
-      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
-      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+        // Double-precision floating-point arithmetic.
+        { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
+        { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
 
-      // Single-precision comparisons.
-      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
-      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
-      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
-      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
-      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
-      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
-      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
-      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
+        // Single-precision comparisons.
+        { RTLIB::OEQ_F32, "__eqsf2vfp",    ISD::SETNE },
+        { RTLIB::UNE_F32, "__nesf2vfp",    ISD::SETNE },
+        { RTLIB::OLT_F32, "__ltsf2vfp",    ISD::SETNE },
+        { RTLIB::OLE_F32, "__lesf2vfp",    ISD::SETNE },
+        { RTLIB::OGE_F32, "__gesf2vfp",    ISD::SETNE },
+        { RTLIB::OGT_F32, "__gtsf2vfp",    ISD::SETNE },
+        { RTLIB::UO_F32,  "__unordsf2vfp", ISD::SETNE },
+        { RTLIB::O_F32,   "__unordsf2vfp", ISD::SETEQ },
 
-      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
-      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+        // Double-precision comparisons.
+        { RTLIB::OEQ_F64, "__eqdf2vfp",    ISD::SETNE },
+        { RTLIB::UNE_F64, "__nedf2vfp",    ISD::SETNE },
+        { RTLIB::OLT_F64, "__ltdf2vfp",    ISD::SETNE },
+        { RTLIB::OLE_F64, "__ledf2vfp",    ISD::SETNE },
+        { RTLIB::OGE_F64, "__gedf2vfp",    ISD::SETNE },
+        { RTLIB::OGT_F64, "__gtdf2vfp",    ISD::SETNE },
+        { RTLIB::UO_F64,  "__unorddf2vfp", ISD::SETNE },
+        { RTLIB::O_F64,   "__unorddf2vfp", ISD::SETEQ },
 
-      // Double-precision comparisons.
-      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
-      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
-      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
-      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
-      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
-      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
-      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
-      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
+        // Floating-point to integer conversions.
+        // i64 conversions are done via library routines even when generating VFP
+        // instructions, so use the same ones.
+        { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp",    ISD::SETCC_INVALID },
+        { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
+        { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp",    ISD::SETCC_INVALID },
+        { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
 
-      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
-      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
-      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+        // Conversions between floating types.
+        { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp",  ISD::SETCC_INVALID },
+        { RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp", ISD::SETCC_INVALID },
 
-      // Floating-point to integer conversions.
-      // i64 conversions are done via library routines even when generating VFP
-      // instructions, so use the same ones.
-      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
-      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
-      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
-      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+        // Integer to floating-point conversions.
+        // i64 conversions are done via library routines even when generating VFP
+        // instructions, so use the same ones.
+        // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+        // e.g., __floatunsidf vs. __floatunssidfvfp.
+        { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp",    ISD::SETCC_INVALID },
+        { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
+        { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp",    ISD::SETCC_INVALID },
+        { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
+      };
 
-      // Conversions between floating types.
-      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
-      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
+      for (const auto &LC : LibraryCalls) {
+        setLibcallName(LC.Op, LC.Name);
+        if (LC.Cond != ISD::SETCC_INVALID)
+          setCmpLibcallCC(LC.Op, LC.Cond);
+      }
+    }
 
-      // Integer to floating-point conversions.
-      // i64 conversions are done via library routines even when generating VFP
-      // instructions, so use the same ones.
-      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
-      // e.g., __floatunsidf vs. __floatunssidfvfp.
-      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
-      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
-      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
-      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+    // Set the correct calling convention for ARMv7k WatchOS. It's just
+    // AAPCS_VFP for functions as simple as libcalls.
+    if (Subtarget->isTargetWatchOS()) {
+      for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
+        setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
     }
   }
 
@@ -245,8 +251,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setLibcallName(RTLIB::SRL_I128, nullptr);
   setLibcallName(RTLIB::SRA_I128, nullptr);
 
-  if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
-      !Subtarget->isTargetWindows()) {
+  // RTLIB
+  if (Subtarget->isAAPCS_ABI() &&
+      (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
+       Subtarget->isTargetAndroid())) {
     static const struct {
       const RTLIB::Libcall Op;
       const char * const Name;
@@ -334,12 +342,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
       { RTLIB::UDIV_I16, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
       { RTLIB::UDIV_I32, "__aeabi_uidiv",    CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
       { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-
-      // Memory operations
-      // RTABI chapter 4.3.4
-      { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-      { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
-      { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
     };
 
     for (const auto &LC : LibraryCalls) {
@@ -348,6 +350,30 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
       if (LC.Cond != ISD::SETCC_INVALID)
         setCmpLibcallCC(LC.Op, LC.Cond);
     }
+
+    // EABI dependent RTLIB
+    if (TM.Options.EABIVersion == EABI::EABI4 ||
+        TM.Options.EABIVersion == EABI::EABI5) {
+      static const struct {
+        const RTLIB::Libcall Op;
+        const char *const Name;
+        const CallingConv::ID CC;
+        const ISD::CondCode Cond;
+      } MemOpsLibraryCalls[] = {
+        // Memory operations
+        // RTABI chapter 4.3.4
+        { RTLIB::MEMCPY,  "__aeabi_memcpy",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+        { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+        { RTLIB::MEMSET,  "__aeabi_memset",  CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+      };
+
+      for (const auto &LC : MemOpsLibraryCalls) {
+        setLibcallName(LC.Op, LC.Name);
+        setLibcallCallingConv(LC.Op, LC.CC);
+        if (LC.Cond != ISD::SETCC_INVALID)
+          setCmpLibcallCC(LC.Op, LC.Cond);
+      }
+    }
   }
 
   if (Subtarget->isTargetWindows()) {
@@ -364,6 +390,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
       { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
       { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
       { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+      { RTLIB::SDIV_I32, "__rt_sdiv",   CallingConv::ARM_AAPCS_VFP },
+      { RTLIB::UDIV_I32, "__rt_udiv",   CallingConv::ARM_AAPCS_VFP },
+      { RTLIB::SDIV_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS_VFP },
+      { RTLIB::UDIV_I64, "__rt_udiv64", CallingConv::ARM_AAPCS_VFP },
     };
 
     for (const auto &LC : LibraryCalls) {
@@ -373,8 +403,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   }
 
   // Use divmod compiler-rt calls for iOS 5.0 and later.
-  if (Subtarget->getTargetTriple().isiOS() &&
-      !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
+  if (Subtarget->isTargetWatchOS() ||
+      (Subtarget->isTargetIOS() &&
+       !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
   }
@@ -392,6 +423,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
   }
 
+  // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
+  // a __gnu_ prefix (which is the default).
+  if (Subtarget->isTargetAEABI()) {
+    setLibcallName(RTLIB::FPROUND_F32_F16, "__aeabi_f2h");
+    setLibcallName(RTLIB::FPROUND_F64_F16, "__aeabi_d2h");
+    setLibcallName(RTLIB::FPEXT_F16_F32,   "__aeabi_h2f");
+  }
+
   if (Subtarget->isThumb1Only())
     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
   else
@@ -579,7 +618,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::SIGN_EXTEND);
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::ANY_EXTEND);
-    setTargetDAGCombine(ISD::SELECT_CC);
     setTargetDAGCombine(ISD::BUILD_VECTOR);
     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
@@ -605,7 +643,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::ADDC);
 
   if (Subtarget->isFPOnlySP()) {
-    // When targetting a floating-point unit with only single-precision
+    // When targeting a floating-point unit with only single-precision
     // operations, f64 is legal for the few double-precision instructions which
     // are present However, no double-precision operations other than moves,
     // loads and stores are provided by the hardware.
@@ -689,7 +727,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
   }
   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
-      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
+      || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
     setOperationAction(ISD::MULHS, MVT::i32, Expand);
 
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -706,8 +744,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
   }
 
+  if (!Subtarget->isThumb1Only())
+    setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
+
   // ARM does not have ROTL.
-  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
+  setOperationAction(ISD::ROTL, MVT::i32, Expand);
+  for (MVT VT : MVT::vector_valuetypes()) {
+    setOperationAction(ISD::ROTL, VT, Expand);
+    setOperationAction(ISD::ROTR, VT, Expand);
+  }
   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
@@ -717,7 +762,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
 
-  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+  // @llvm.readcyclecounter requires the Performance Monitors extension.
+  // Default to the 0 expansion on unsupported platforms.
+  // FIXME: Technically there are older ARM CPUs that have
+  // implementation-specific ways of obtaining this information.
+  if (Subtarget->hasPerfMon())
+    setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
 
   // Only ARMv6 has BSWAP.
   if (!Subtarget->hasV6Ops())
@@ -726,15 +776,17 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
     // These are expanded into libcalls if the cpu doesn't have HW divider.
-    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
-    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+    setOperationAction(ISD::SDIV,  MVT::i32, LibCall);
+    setOperationAction(ISD::UDIV,  MVT::i32, LibCall);
   }
 
-  // FIXME: Also set divmod for SREM on EABI
   setOperationAction(ISD::SREM,  MVT::i32, Expand);
   setOperationAction(ISD::UREM,  MVT::i32, Expand);
   // Register based DivRem for AEABI (RTABI 4.2)
-  if (Subtarget->isTargetAEABI()) {
+  if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) {
+    setOperationAction(ISD::SREM, MVT::i64, Custom);
+    setOperationAction(ISD::UREM, MVT::i64, Custom);
+
     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
@@ -762,7 +814,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
-  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
 
@@ -776,13 +827,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 
-  if (!Subtarget->isTargetMachO()) {
-    // Non-MachO platforms may return values in these registers via the
-    // personality function.
-    setExceptionPointerRegister(ARM::R0);
-    setExceptionSelectorRegister(ARM::R1);
-  }
-
   if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   else
@@ -849,11 +893,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-  if (Subtarget->isTargetDarwin()) {
-    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
-    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+  setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
+  if (Subtarget->useSjLjEH())
     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
-  }
 
   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
@@ -912,7 +956,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   if (Subtarget->hasSinCos()) {
     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
     setLibcallName(RTLIB::SINCOS_F64, "sincos");
-    if (Subtarget->getTargetTriple().isiOS()) {
+    if (Subtarget->isTargetWatchOS()) {
+      setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
+      setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
+    }
+    if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
       // For iOS, we don't want to the normal expansion of a libcall to
       // sincos. We want to issue a libcall to __sincos_stret.
       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
@@ -928,6 +976,13 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
     setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
     setOperationAction(ISD::FRINT, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
+
     if (!Subtarget->isFPOnlySP()) {
       setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::f64, Legal);
@@ -935,8 +990,22 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
       setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
       setOperationAction(ISD::FRINT, MVT::f64, Legal);
+      setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+      setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
     }
   }
+
+  if (Subtarget->hasNEON()) {
+    // vmin and vmax aren't available in a scalar form, so we use
+    // a NEON instruction with an undef lane instead.
+    setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
+    setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
+    setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
+    setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
+  }
+
   // We have target-specific dag combine patterns for the following nodes:
   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
   setTargetDAGCombine(ISD::ADD);
@@ -959,11 +1028,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   //// temporary - rewrite interface to use type
   MaxStoresPerMemset = 8;
-  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemsetOptSize = 4;
   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
-  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemcpyOptSize = 2;
   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
-  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
+  MaxStoresPerMemmoveOptSize = 2;
 
   // On ARM arguments smaller than 4 bytes are extended, so all arguments
   // are at least 4 bytes aligned.
@@ -1054,8 +1123,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::CMOV:          return "ARMISD::CMOV";
 
-  case ARMISD::RBIT:          return "ARMISD::RBIT";
-
   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
   case ARMISD::RRX:           return "ARMISD::RRX";
@@ -1069,7 +1136,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
 
   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
-  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+  case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
+  case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
 
   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
 
@@ -1082,6 +1150,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 
   case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
+  case ARMISD::WIN__DBZCHK:   return "ARMISD::WIN__DBZCHK";
 
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
@@ -1133,14 +1202,11 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
-  case ARMISD::FMAX:          return "ARMISD::FMAX";
-  case ARMISD::FMIN:          return "ARMISD::FMIN";
-  case ARMISD::VMAXNM:        return "ARMISD::VMAX";
-  case ARMISD::VMINNM:        return "ARMISD::VMIN";
   case ARMISD::BFI:           return "ARMISD::BFI";
   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
   case ARMISD::VBSL:          return "ARMISD::VBSL";
+  case ARMISD::MEMCPY:        return "ARMISD::MEMCPY";
   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
@@ -1449,9 +1515,10 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
                        StackPtr, PtrOff);
-  return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      MachinePointerInfo::getStack(LocMemOffset),
-                      false, false, 0);
+  return DAG.getStore(
+      Chain, dl, Arg, PtrOff,
+      MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+      false, false, 0);
 }
 
 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -1734,9 +1801,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // Get the address of the callee into a register
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(), false, false,
-                           false, 0);
+      Callee = DAG.getLoad(
+          PtrVt, dl, DAG.getEntryNode(), CPAddr,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
       const char *Sym = S->getSymbol();
 
@@ -1748,9 +1816,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // Get the address of the callee into a register
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(), false, false,
-                           false, 0);
+      Callee = DAG.getLoad(
+          PtrVt, dl, DAG.getEntryNode(), CPAddr,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
     }
   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = G->getGlobal();
@@ -1768,7 +1837,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
           ARMISD::WrapperPIC, dl, PtrVt,
           DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
       Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
-                           MachinePointerInfo::getGOT(), false, false, true, 0);
+                           MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                           false, false, true, 0);
     } else if (Subtarget->isTargetCOFF()) {
       assert(Subtarget->isTargetWindows() &&
              "Windows is the only supported COFF target");
@@ -1781,7 +1851,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         Callee =
             DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
                         DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
-                        MachinePointerInfo::getGOT(), false, false, false, 0);
+                        MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                        false, false, false, 0);
     } else {
       // On ELF targets for PIC code, direct calls should go through the PLT
       unsigned OpFlags = 0;
@@ -1804,9 +1875,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                       ARMPCLabelIndex, 4);
       SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-      Callee = DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), CPAddr,
-                           MachinePointerInfo::getConstantPool(), false, false,
-                           false, 0);
+      Callee = DAG.getLoad(
+          PtrVt, dl, DAG.getEntryNode(), CPAddr,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
       Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
     } else {
@@ -1821,7 +1893,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // FIXME: handle tail calls differently.
   unsigned CallOpc;
-  bool HasMinSizeAttr = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
   if (Subtarget->isThumb()) {
     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
@@ -1831,8 +1902,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     if (!isDirect && !Subtarget->hasV5TOps())
       CallOpc = ARMISD::CALL_NOLINK;
     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
-               // Emit regular call when code size is the priority
-               !HasMinSizeAttr)
+             // Emit regular call when code size is the priority
+             !MF.getFunction()->optForMinSize())
       // "mov lr, pc; b _foo" to avoid confusing the RSP
       CallOpc = ARMISD::CALL_NOLINK;
     else
@@ -2014,6 +2085,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   CallingConv::ID CallerCC = CallerF->getCallingConv();
   bool CCMatch = CallerCC == CalleeCC;
 
+  assert(Subtarget->supportsTailCall());
+
   // Look for obvious safe cases to perform tail call optimization that do not
   // require ABI changes. This is what gcc calls sibcall.
 
@@ -2033,26 +2106,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
-  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
-  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
-  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
-  // support in the assembler and linker to be used. This would need to be
-  // fixed to fully support tail calls in Thumb1.
-  //
-  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
-  // LR.  This means if we need to reload LR, it takes an extra instructions,
-  // which outweighs the value of the tail call; but here we don't know yet
-  // whether LR is going to be used.  Probably the right approach is to
-  // generate the tail call here and turn it back into CALL/RET in
-  // emitEpilogue if LR is used.
-
-  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
-  // but we need to make sure there are enough registers; the only valid
-  // registers are the 4 used for parameters.  We don't currently do this
-  // case.
-  if (Subtarget->isThumb1Only())
-    return false;
-
   // Externally-defined functions with weak linkage should not be
   // tail-called on ARM when the OS does not support dynamic
   // pre-emption of symbols, as the AAELF spec requires normal calls
@@ -2400,7 +2453,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
     return false;
 
-  return !Subtarget->isThumb1Only();
+  return true;
 }
 
 // Trying to write a 64 bit value so need to split into two 32 bit values first,
@@ -2467,9 +2520,10 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   }
   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
-                               MachinePointerInfo::getConstantPool(),
-                               false, false, false, 0);
+  SDValue Result =
+      DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 0);
   if (RelocM == Reloc::Static)
     return Result;
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
@@ -2491,9 +2545,10 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
-  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+  Argument =
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 0);
   SDValue Chain = Argument.getValue(1);
 
   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2543,17 +2598,19 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                       true);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+    Offset = DAG.getLoad(
+        PtrVT, dl, Chain, Offset,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
     Chain = Offset.getValue(1);
 
     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
 
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+    Offset = DAG.getLoad(
+        PtrVT, dl, Chain, Offset,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
   } else {
     // local exec model
     assert(model == TLSModel::LocalExec);
@@ -2561,9 +2618,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
-    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
-                         MachinePointerInfo::getConstantPool(),
-                         false, false, false, 0);
+    Offset = DAG.getLoad(
+        PtrVT, dl, Chain, Offset,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -2577,6 +2635,8 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
 
   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
 
@@ -2597,22 +2657,31 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
-    ARMConstantPoolValue *CPV =
-      ARMConstantPoolConstant::Create(GV,
-                                      UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+    bool UseGOT_PREL =
+        !(GV->hasHiddenVisibility() || GV->hasLocalLinkage());
+
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    EVT PtrVT = getPointerTy(DAG.getDataLayout());
+    SDLoc dl(Op);
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
+        GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
+        UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
+        /*AddCurrentAddress=*/UseGOT_PREL);
     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
-                                 CPAddr,
-                                 MachinePointerInfo::getConstantPool(),
-                                 false, false, false, 0);
+    SDValue Result = DAG.getLoad(
+        PtrVT, dl, DAG.getEntryNode(), CPAddr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
     SDValue Chain = Result.getValue(1);
-    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
-    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
-    if (!UseGOTOFF)
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
+    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    if (UseGOT_PREL)
       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
-                           MachinePointerInfo::getGOT(),
+                           MachinePointerInfo::getGOT(DAG.getMachineFunction()),
                            false, false, false, 0);
     return Result;
   }
@@ -2628,9 +2697,10 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   } else {
     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                       MachinePointerInfo::getConstantPool(),
-                       false, false, false, 0);
+    return DAG.getLoad(
+        PtrVT, dl, DAG.getEntryNode(), CPAddr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
   }
 }
 
@@ -2654,7 +2724,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
 
   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
   return Result;
 }
 
@@ -2680,32 +2751,11 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
                                                   TargetFlags));
   if (GV->hasDLLImportStorageClass())
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
   return Result;
 }
 
-SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
-                                                    SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetELF() &&
-         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
-  MachineFunction &MF = DAG.getMachineFunction();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-  EVT PtrVT = getPointerTy(DAG.getDataLayout());
-  SDLoc dl(Op);
-  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
-  ARMConstantPoolValue *CPV =
-    ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
-                                  ARMPCLabelIndex, PCAdj);
-  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
-  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                               MachinePointerInfo::getConstantPool(),
-                               false, false, false, 0);
-  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
-  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
-}
-
 SDValue
 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -2722,6 +2772,13 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
                      Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
 }
 
+SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
+                     Op.getOperand(0));
+}
+
 SDValue
 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                           const ARMSubtarget *Subtarget) const {
@@ -2732,7 +2789,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   case Intrinsic::arm_rbit: {
     assert(Op.getOperand(1).getValueType() == MVT::i32 &&
            "RBIT intrinsic must have i32 type!");
-    return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(1));
+    return DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Op.getOperand(1));
   }
   case Intrinsic::arm_thread_pointer: {
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2752,10 +2809,10 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                       ARMCP::CPLSDA, PCAdj);
     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
-    SDValue Result =
-      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
-                  MachinePointerInfo::getConstantPool(),
-                  false, false, false, 0);
+    SDValue Result = DAG.getLoad(
+        PtrVT, dl, DAG.getEntryNode(), CPAddr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+        false, false, 0);
 
     if (RelocM == Reloc::PIC_) {
       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
@@ -2770,6 +2827,36 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   }
+  case Intrinsic::arm_neon_vminnm:
+  case Intrinsic::arm_neon_vmaxnm: {
+    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
+      ? ISD::FMINNUM : ISD::FMAXNUM;
+    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+  case Intrinsic::arm_neon_vminu:
+  case Intrinsic::arm_neon_vmaxu: {
+    if (Op.getValueType().isFloatingPoint())
+      return SDValue();
+    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
+      ? ISD::UMIN : ISD::UMAX;
+    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+                         Op.getOperand(1), Op.getOperand(2));
+  }
+  case Intrinsic::arm_neon_vmins:
+  case Intrinsic::arm_neon_vmaxs: {
+    // v{min,max}s is overloaded between signed integers and floats.
+    if (!Op.getValueType().isFloatingPoint()) {
+      unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+        ? ISD::SMIN : ISD::SMAX;
+      return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+                         Op.getOperand(1), Op.getOperand(2));
+    }
+    unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
+      ? ISD::FMINNAN : ISD::FMAXNAN;
+    return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
   }
 }
 
@@ -2870,9 +2957,10 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
-                            MachinePointerInfo::getFixedStack(FI),
-                            false, false, false, 0);
+    ArgValue2 = DAG.getLoad(
+        MVT::i32, dl, Root, FIN,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+        false, false, 0);
   } else {
     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
@@ -3056,9 +3144,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
           if (VA.isMemLoc()) {
             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
-                                    MachinePointerInfo::getFixedStack(FI),
-                                    false, false, false, 0);
+            ArgValue2 = DAG.getLoad(
+                MVT::f64, dl, Chain, FIN,
+                MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+                false, false, false, 0);
           } else {
             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
                                              Chain, DAG, dl);
@@ -3139,9 +3228,9 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
                    "Byval arguments cannot be implicit");
             unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
 
-            int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, CurOrigArg,
-                                            CurByValIndex, VA.getLocMemOffset(),
-                                            Flags.getByValSize());
+            int FrameIndex = StoreByValRegs(
+                CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
+                VA.getLocMemOffset(), Flags.getByValSize());
             InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
             CCInfo.nextInRegsParam();
           } else {
@@ -3151,9 +3240,10 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
 
             // Create load nodes to retrieve arguments from the stack.
             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
-            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
-                                         MachinePointerInfo::getFixedStack(FI),
-                                         false, false, false, 0));
+            InVals.push_back(DAG.getLoad(
+                VA.getValVT(), dl, Chain, FIN,
+                MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+                false, false, false, 0));
           }
           lastInsIndex = index;
         }
@@ -3188,13 +3278,9 @@ static bool isFloatingPointZero(SDValue Op) {
     // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
     // created by LowerConstantFP().
     SDValue BitcastOp = Op->getOperand(0);
-    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) {
-      SDValue MoveOp = BitcastOp->getOperand(0);
-      if (MoveOp->getOpcode() == ISD::TargetConstant &&
-          cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) {
-        return true;
-      }
-    }
+    if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
+        isNullConstant(BitcastOp->getOperand(0)))
+      return true;
   }
   return false;
 }
@@ -3559,113 +3645,6 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   // Try to generate VMAXNM/VMINNM on ARMv8.
   if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
                                   TrueVal.getValueType() == MVT::f64)) {
-    // We can use VMAXNM/VMINNM for a compare followed by a select with the
-    // same operands, as follows:
-    //   c = fcmp [?gt, ?ge, ?lt, ?le] a, b
-    //   select c, a, b
-    // In NoNaNsFPMath the CC will have been changed from, e.g., 'ogt' to 'gt'.
-    bool swapSides = false;
-    if (!getTargetMachine().Options.NoNaNsFPMath) {
-      // transformability may depend on which way around we compare
-      switch (CC) {
-      default:
-        break;
-      case ISD::SETOGT:
-      case ISD::SETOGE:
-      case ISD::SETOLT:
-      case ISD::SETOLE:
-        // the non-NaN should be RHS
-        swapSides = DAG.isKnownNeverNaN(LHS) && !DAG.isKnownNeverNaN(RHS);
-        break;
-      case ISD::SETUGT:
-      case ISD::SETUGE:
-      case ISD::SETULT:
-      case ISD::SETULE:
-        // the non-NaN should be LHS
-        swapSides = DAG.isKnownNeverNaN(RHS) && !DAG.isKnownNeverNaN(LHS);
-        break;
-      }
-    }
-    swapSides = swapSides || (LHS == FalseVal && RHS == TrueVal);
-    if (swapSides) {
-      CC = ISD::getSetCCSwappedOperands(CC);
-      std::swap(LHS, RHS);
-    }
-    if (LHS == TrueVal && RHS == FalseVal) {
-      bool canTransform = true;
-      // FIXME: FastMathFlags::noSignedZeros() doesn't appear reachable from here
-      if (!getTargetMachine().Options.UnsafeFPMath &&
-          !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
-        const ConstantFPSDNode *Zero;
-        switch (CC) {
-        default:
-          break;
-        case ISD::SETOGT:
-        case ISD::SETUGT:
-        case ISD::SETGT:
-          // RHS must not be -0
-          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
-                         !Zero->isNegative();
-          break;
-        case ISD::SETOGE:
-        case ISD::SETUGE:
-        case ISD::SETGE:
-          // LHS must not be -0
-          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
-                         !Zero->isNegative();
-          break;
-        case ISD::SETOLT:
-        case ISD::SETULT:
-        case ISD::SETLT:
-          // RHS must not be +0
-          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(RHS)) &&
-                          Zero->isNegative();
-          break;
-        case ISD::SETOLE:
-        case ISD::SETULE:
-        case ISD::SETLE:
-          // LHS must not be +0
-          canTransform = (Zero = dyn_cast<ConstantFPSDNode>(LHS)) &&
-                          Zero->isNegative();
-          break;
-        }
-      }
-      if (canTransform) {
-        // Note: If one of the elements in a pair is a number and the other
-        // element is NaN, the corresponding result element is the number.
-        // This is consistent with the IEEE 754-2008 standard.
-        // Therefore, a > b ? a : b <=> vmax(a,b), if b is constant and a is NaN
-        switch (CC) {
-        default:
-          break;
-        case ISD::SETOGT:
-        case ISD::SETOGE:
-          if (!DAG.isKnownNeverNaN(RHS))
-            break;
-          return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
-        case ISD::SETUGT:
-        case ISD::SETUGE:
-          if (!DAG.isKnownNeverNaN(LHS))
-            break;
-        case ISD::SETGT:
-        case ISD::SETGE:
-          return DAG.getNode(ARMISD::VMAXNM, dl, VT, LHS, RHS);
-        case ISD::SETOLT:
-        case ISD::SETOLE:
-          if (!DAG.isKnownNeverNaN(RHS))
-            break;
-          return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
-        case ISD::SETULT:
-        case ISD::SETULE:
-          if (!DAG.isKnownNeverNaN(LHS))
-            break;
-        case ISD::SETLT:
-        case ISD::SETLE:
-          return DAG.getNode(ARMISD::VMINNM, dl, VT, LHS, RHS);
-        }
-      }
-    }
-
     bool swpCmpOps = false;
     bool swpVselOps = false;
     checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
@@ -3890,16 +3869,18 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
                        Addr, Op.getOperand(2), JTI);
   }
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
-    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
-                       MachinePointerInfo::getJumpTable(),
-                       false, false, false, 0);
+    Addr =
+        DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+                    MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+                    false, false, false, 0);
     Chain = Addr.getValue(1);
     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
   } else {
-    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
-                       MachinePointerInfo::getJumpTable(),
-                       false, false, false, 0);
+    Addr =
+        DAG.getLoad(PTy, dl, Chain, Addr,
+                    MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+                    false, false, false, 0);
     Chain = Addr.getValue(1);
     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
   }
@@ -3936,7 +3917,7 @@ SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
     else
       LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
                               Op.getValueType());
-    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+    return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
                        /*isSigned*/ false, SDLoc(Op)).first;
   }
 
@@ -3988,7 +3969,7 @@ SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
     else
       LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
                               Op.getValueType());
-    return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1,
+    return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
                        /*isSigned*/ false, SDLoc(Op)).first;
   }
 
@@ -4153,6 +4134,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
   Results.push_back(Read.getOperand(0));
 }
 
+/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
+/// When \p DstVT, the destination type of \p BC, is on the vector
+/// register bank and the source of bitcast, \p Op, operates on the same bank,
+/// it might be possible to combine them, such that everything stays on the
+/// vector register bank.
+/// \p return The node that would replace \p BT, if the combine
+/// is possible.
+static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
+                                                SelectionDAG &DAG) {
+  SDValue Op = BC->getOperand(0);
+  EVT DstVT = BC->getValueType(0);
+
+  // The only vector instruction that can produce a scalar (remember,
+  // since the bitcast was about to be turned into VMOVDRR, the source
+  // type is i64) from a vector is EXTRACT_VECTOR_ELT.
+  // Moreover, we can do this combine only if there is one use.
+  // Finally, if the destination type is not a vector, there is not
+  // much point on forcing everything on the vector bank.
+  if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+      !Op.hasOneUse())
+    return SDValue();
+
+  // If the index is not constant, we will introduce an additional
+  // multiply that will stick.
+  // Give up in that case.
+  ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!Index)
+    return SDValue();
+  unsigned DstNumElt = DstVT.getVectorNumElements();
+
+  // Compute the new index.
+  const APInt &APIntIndex = Index->getAPIntValue();
+  APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
+  NewIndex *= APIntIndex;
+  // Check if the new constant index fits into i32.
+  if (NewIndex.getBitWidth() > 32)
+    return SDValue();
+
+  // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
+  // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
+  SDLoc dl(Op);
+  SDValue ExtractSrc = Op.getOperand(0);
+  EVT VecVT = EVT::getVectorVT(
+      *DAG.getContext(), DstVT.getScalarType(),
+      ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
+  SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
+                     DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
+}
+
 /// ExpandBITCAST - If the target supports VFP, this function is called to
 /// expand a bit convert where either the source or destination type is i64 to
 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
@@ -4172,6 +4203,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
 
   // Turn i64->f64 into VMOVDRR.
   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+    // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
+    // if we can combine the bitcast with its source.
+    if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
+      return Val;
+
     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
                              DAG.getConstant(0, dl, MVT::i32));
     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
@@ -4383,7 +4419,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   if (!ST->hasV6T2Ops())
     return SDValue();
 
-  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+  SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
 }
 
@@ -4544,8 +4580,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
          "Unknown shift to lower!");
 
   // We only lower SRA, SRL of 1 here, all others use generic lowering.
-  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
-      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+  if (!isOneConstant(N->getOperand(1)))
     return SDValue();
 
   // If we are in thumb mode, we don't have RRX.
@@ -5036,18 +5071,56 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
   return VT == MVT::v8i8 && M.size() == 8;
 }
 
+// Checks whether the shuffle mask represents a vector transpose (VTRN) by
+// checking that pairs of elements in the shuffle mask represent the same index
+// in each vector, incrementing the expected index by 2 at each step.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
+//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
+//  v2={e,f,g,h}
+// WhichResult gives the offset for each element in the mask based on which
+// of the two results it belongs to.
+//
+// The transpose can be represented either as:
+// result1 = shufflevector v1, v2, result1_shuffle_mask
+// result2 = shufflevector v1, v2, result2_shuffle_mask
+// where v1/v2 and the shuffle masks have the same number of elements
+// (here WhichResult (see below) indicates which result is being checked)
+//
+// or as:
+// results = shufflevector v1, v2, shuffle_mask
+// where both results are returned in one vector and the shuffle mask has twice
+// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
+// want to check the low half and high half of the shuffle mask as if it were
+// the other case
 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
-        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
-      return false;
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  // If the mask is twice as long as the input vector then we need to check the
+  // upper and lower parts of the mask with a matching value for WhichResult
+  // FIXME: A mask with only even values will be rejected in case the first
+  // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
+  // M[0] is used to determine WhichResult
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
+    for (unsigned j = 0; j < NumElts; j += 2) {
+      if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
+        return false;
+    }
   }
+
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   return true;
 }
 
@@ -5060,28 +5133,55 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i < NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
-        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
-      return false;
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    if (M.size() == NumElts * 2)
+      WhichResult = i / NumElts;
+    else
+      WhichResult = M[i] == 0 ? 0 : 1;
+    for (unsigned j = 0; j < NumElts; j += 2) {
+      if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
+          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
+        return false;
+    }
   }
+
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   return true;
 }
 
+// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
+// that the mask elements are either all even and in steps of size 2 or all odd
+// and in steps of size 2.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
+//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
+//  v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with
+// respect the how results are returned.
 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned i = 0; i != NumElts; ++i) {
-    if (M[i] < 0) continue; // ignore UNDEF indices
-    if ((unsigned) M[i] != 2 * i + WhichResult)
-      return false;
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    WhichResult = M[i] == 0 ? 0 : 1;
+    for (unsigned j = 0; j < NumElts; ++j) {
+      if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
+        return false;
+    }
   }
 
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   if (VT.is64BitVector() && EltSz == 32)
     return false;
@@ -5097,18 +5197,27 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   if (EltSz == 64)
     return false;
 
-  unsigned Half = VT.getVectorNumElements() / 2;
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  for (unsigned j = 0; j != 2; ++j) {
-    unsigned Idx = WhichResult;
-    for (unsigned i = 0; i != Half; ++i) {
-      int MIdx = M[i + j * Half];
-      if (MIdx >= 0 && (unsigned) MIdx != Idx)
-        return false;
-      Idx += 2;
+  unsigned NumElts = VT.getVectorNumElements();
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  unsigned Half = NumElts / 2;
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    WhichResult = M[i] == 0 ? 0 : 1;
+    for (unsigned j = 0; j < NumElts; j += Half) {
+      unsigned Idx = WhichResult;
+      for (unsigned k = 0; k < Half; ++k) {
+        int MIdx = M[i + j + k];
+        if (MIdx >= 0 && (unsigned) MIdx != Idx)
+          return false;
+        Idx += 2;
+      }
     }
   }
 
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   if (VT.is64BitVector() && EltSz == 32)
     return false;
@@ -5116,21 +5225,37 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   return true;
 }
 
+// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
+// that pairs of elements of the shufflemask represent the same index in each
+// vector incrementing sequentially through the vectors.
+// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
+//  v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
+//  v2={e,f,g,h}
+// Requires similar checks to that of isVTRNMask with respect the how results
+// are returned.
 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   if (EltSz == 64)
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
-        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
-      return false;
-    Idx += 1;
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    WhichResult = M[i] == 0 ? 0 : 1;
+    unsigned Idx = WhichResult * NumElts / 2;
+    for (unsigned j = 0; j < NumElts; j += 2) {
+      if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
+        return false;
+      Idx += 1;
+    }
   }
 
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   if (VT.is64BitVector() && EltSz == 32)
     return false;
@@ -5147,15 +5272,23 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
     return false;
 
   unsigned NumElts = VT.getVectorNumElements();
-  WhichResult = (M[0] == 0 ? 0 : 1);
-  unsigned Idx = WhichResult * NumElts / 2;
-  for (unsigned i = 0; i != NumElts; i += 2) {
-    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
-        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
-      return false;
-    Idx += 1;
+  if (M.size() != NumElts && M.size() != NumElts*2)
+    return false;
+
+  for (unsigned i = 0; i < M.size(); i += NumElts) {
+    WhichResult = M[i] == 0 ? 0 : 1;
+    unsigned Idx = WhichResult * NumElts / 2;
+    for (unsigned j = 0; j < NumElts; j += 2) {
+      if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
+          (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
+        return false;
+      Idx += 1;
+    }
   }
 
+  if (M.size() == NumElts*2)
+    WhichResult = 0;
+
   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   if (VT.is64BitVector() && EltSz == 32)
     return false;
@@ -5329,16 +5462,14 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       // just use VDUPLANE. We can only do this if the lane being extracted
       // is at a constant index, as the VDUP from lane instructions only have
       // constant-index forms.
+      ConstantSDNode *constIndex;
       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-          isa<ConstantSDNode>(Value->getOperand(1))) {
+          (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
         // We need to create a new undef vector to use for the VDUPLANE if the
         // size of the vector from which we get the value is different than the
         // size of the vector that we need to create. We will insert the element
         // such that the register coalescer will remove unnecessary copies.
         if (VT != Value->getOperand(0).getValueType()) {
-          ConstantSDNode *constIndex;
-          constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
-          assert(constIndex && "The index is not a constant!");
           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
                              VT.getVectorNumElements();
           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
@@ -5437,14 +5568,35 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
 // shuffle in combination with VEXTs.
 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
                                               SelectionDAG &DAG) const {
+  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
 
-  SmallVector<SDValue, 2> SourceVecs;
-  SmallVector<unsigned, 2> MinElts;
-  SmallVector<unsigned, 2> MaxElts;
+  struct ShuffleSourceInfo {
+    SDValue Vec;
+    unsigned MinElt;
+    unsigned MaxElt;
 
+    // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
+    // be compatible with the shuffle we intend to construct. As a result
+    // ShuffleVec will be some sliding window into the original Vec.
+    SDValue ShuffleVec;
+
+    // Code should guarantee that element i in Vec starts at element "WindowBase
+    // + i * WindowScale in ShuffleVec".
+    int WindowBase;
+    int WindowScale;
+
+    bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
+    ShuffleSourceInfo(SDValue Vec)
+        : Vec(Vec), MinElt(UINT_MAX), MaxElt(0), ShuffleVec(Vec), WindowBase(0),
+          WindowScale(1) {}
+  };
+
+  // First gather all vectors used as an immediate source for this BUILD_VECTOR
+  // node.
+  SmallVector<ShuffleSourceInfo, 2> Sources;
   for (unsigned i = 0; i < NumElts; ++i) {
     SDValue V = Op.getOperand(i);
     if (V.getOpcode() == ISD::UNDEF)
@@ -5453,127 +5605,166 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
       // A shuffle can only come from building a vector from various
       // elements of other vectors.
       return SDValue();
-    } else if (V.getOperand(0).getValueType().getVectorElementType() !=
-               VT.getVectorElementType()) {
-      // This code doesn't know how to handle shuffles where the vector
-      // element types do not match (this happens because type legalization
-      // promotes the return type of EXTRACT_VECTOR_ELT).
-      // FIXME: It might be appropriate to extend this code to handle
-      // mismatched types.
+    } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
+      // Furthermore, shuffles require a constant mask, whereas extractelts
+      // accept variable indices.
       return SDValue();
     }
 
-    // Record this extraction against the appropriate vector if possible...
+    // Add this element source to the list if it's not already there.
     SDValue SourceVec = V.getOperand(0);
-    // If the element number isn't a constant, we can't effectively
-    // analyze what's going on.
-    if (!isa<ConstantSDNode>(V.getOperand(1)))
-      return SDValue();
-    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
-    bool FoundSource = false;
-    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
-      if (SourceVecs[j] == SourceVec) {
-        if (MinElts[j] > EltNo)
-          MinElts[j] = EltNo;
-        if (MaxElts[j] < EltNo)
-          MaxElts[j] = EltNo;
-        FoundSource = true;
-        break;
-      }
-    }
+    auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+    if (Source == Sources.end())
+      Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
 
-    // Or record a new source if not...
-    if (!FoundSource) {
-      SourceVecs.push_back(SourceVec);
-      MinElts.push_back(EltNo);
-      MaxElts.push_back(EltNo);
-    }
+    // Update the minimum and maximum lane number seen.
+    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+    Source->MinElt = std::min(Source->MinElt, EltNo);
+    Source->MaxElt = std::max(Source->MaxElt, EltNo);
   }
 
   // Currently only do something sane when at most two source vectors
-  // involved.
-  if (SourceVecs.size() > 2)
+  // are involved.
+  if (Sources.size() > 2)
     return SDValue();
 
-  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
-  int VEXTOffsets[2] = {0, 0};
+  // Find out the smallest element size among result and two sources, and use
+  // it as element size to build the shuffle_vector.
+  EVT SmallestEltTy = VT.getVectorElementType();
+  for (auto &Source : Sources) {
+    EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
+    if (SrcEltTy.bitsLT(SmallestEltTy))
+      SmallestEltTy = SrcEltTy;
+  }
+  unsigned ResMultiplier =
+      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+  NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
+  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
 
-  // This loop extracts the usage patterns of the source vectors
-  // and prepares appropriate SDValues for a shuffle if possible.
-  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
-    if (SourceVecs[i].getValueType() == VT) {
-      // No VEXT necessary
-      ShuffleSrcs[i] = SourceVecs[i];
-      VEXTOffsets[i] = 0;
+  // If the source vector is too wide or too narrow, we may nevertheless be able
+  // to construct a compatible shuffle either by concatenating it with UNDEF or
+  // extracting a suitable range of elements.
+  for (auto &Src : Sources) {
+    EVT SrcVT = Src.ShuffleVec.getValueType();
+
+    if (SrcVT.getSizeInBits() == VT.getSizeInBits())
+      continue;
+
+    // This stage of the search produces a source with the same element type as
+    // the original, but with a total width matching the BUILD_VECTOR output.
+    EVT EltVT = SrcVT.getVectorElementType();
+    unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
+    EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
+
+    if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
+      if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
+        return SDValue();
+      // We can pad out the smaller vector for free, so if it's part of a
+      // shuffle...
+      Src.ShuffleVec =
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
+                      DAG.getUNDEF(Src.ShuffleVec.getValueType()));
       continue;
-    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
-      // It probably isn't worth padding out a smaller vector just to
-      // break it down again in a shuffle.
-      return SDValue();
     }
 
-    // Since only 64-bit and 128-bit vectors are legal on ARM and
-    // we've eliminated the other cases...
-    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
-           "unexpected vector sizes in ReconstructShuffle");
+    if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
+      return SDValue();
 
-    if (MaxElts[i] - MinElts[i] >= NumElts) {
+    if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
       // Span too large for a VEXT to cope
       return SDValue();
     }
 
-    if (MinElts[i] >= NumElts) {
+    if (Src.MinElt >= NumSrcElts) {
       // The extraction can just take the second half
-      VEXTOffsets[i] = NumElts;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i],
-                                   DAG.getIntPtrConstant(NumElts, dl));
-    } else if (MaxElts[i] < NumElts) {
+      Src.ShuffleVec =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(NumSrcElts, dl, MVT::i32));
+      Src.WindowBase = -NumSrcElts;
+    } else if (Src.MaxElt < NumSrcElts) {
       // The extraction can just take the first half
-      VEXTOffsets[i] = 0;
-      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                   SourceVecs[i],
-                                   DAG.getIntPtrConstant(0, dl));
+      Src.ShuffleVec =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i32));
     } else {
       // An actual VEXT is needed
-      VEXTOffsets[i] = MinElts[i];
-      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i],
-                                     DAG.getIntPtrConstant(0, dl));
-      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
-                                     SourceVecs[i],
-                                     DAG.getIntPtrConstant(NumElts, dl));
-      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
-                                   DAG.getConstant(VEXTOffsets[i], dl,
-                                                   MVT::i32));
+      SDValue VEXTSrc1 =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i32));
+      SDValue VEXTSrc2 =
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(NumSrcElts, dl, MVT::i32));
+
+      Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
+                                   VEXTSrc2,
+                                   DAG.getConstant(Src.MinElt, dl, MVT::i32));
+      Src.WindowBase = -Src.MinElt;
     }
   }
 
-  SmallVector<int, 8> Mask;
-
-  for (unsigned i = 0; i < NumElts; ++i) {
-    SDValue Entry = Op.getOperand(i);
-    if (Entry.getOpcode() == ISD::UNDEF) {
-      Mask.push_back(-1);
+  // Another possible incompatibility occurs from the vector element types. We
+  // can fix this by bitcasting the source vectors to the same type we intend
+  // for the shuffle.
+  for (auto &Src : Sources) {
+    EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
+    if (SrcEltTy == SmallestEltTy)
       continue;
-    }
+    assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
+    Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
+    Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
+    Src.WindowBase *= Src.WindowScale;
+  }
 
-    SDValue ExtractVec = Entry.getOperand(0);
-    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
-                                          .getOperand(1))->getSExtValue();
-    if (ExtractVec == SourceVecs[0]) {
-      Mask.push_back(ExtractElt - VEXTOffsets[0]);
-    } else {
-      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
-    }
+  // Final sanity check before we try to actually produce a shuffle.
+  DEBUG(
+    for (auto Src : Sources)
+      assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+  );
+
+  // The stars all align, our next step is to produce the mask for the shuffle.
+  SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
+  int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+    SDValue Entry = Op.getOperand(i);
+    if (Entry.getOpcode() == ISD::UNDEF)
+      continue;
+
+    auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+    int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
+
+    // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
+    // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
+    // segment.
+    EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
+    int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
+                               VT.getVectorElementType().getSizeInBits());
+    int LanesDefined = BitsDefined / BitsPerShuffleLane;
+
+    // This source is expected to fill ResMultiplier lanes of the final shuffle,
+    // starting at the appropriate offset.
+    int *LaneMask = &Mask[i * ResMultiplier];
+
+    int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
+    ExtractBase += NumElts * (Src - Sources.begin());
+    for (int j = 0; j < LanesDefined; ++j)
+      LaneMask[j] = ExtractBase + j;
   }
 
   // Final check before we try to produce nonsense...
-  if (isShuffleMaskLegal(Mask, VT))
-    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
-                                &Mask[0]);
+  if (!isShuffleMaskLegal(Mask, ShuffleVT))
+    return SDValue();
 
-  return SDValue();
+  // We can't handle more than two sources. This should have already
+  // been checked before this point.
+  assert(Sources.size() <= 2 && "Too many sources!");
+
+  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
+  for (unsigned i = 0; i < Sources.size(); ++i)
+    ShuffleOps[i] = Sources[i].ShuffleVec;
+
+  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
+                                         ShuffleOps[1], &Mask[0]);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
 }
 
 /// isShuffleMaskLegal - Targets can use this to indicate that they only
@@ -6235,6 +6426,8 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue
 LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
   // Convert to float
   // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
   // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
@@ -6265,6 +6458,8 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) {
 
 static SDValue
 LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
+
   SDValue N2;
   // Convert to float.
   // float4 yf = vcvt_f32_s32(vmovl_s16(y));
@@ -6337,6 +6532,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  // TODO: Should this propagate fast-math-flags?
   EVT VT = Op.getValueType();
   assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
          "unexpected type for custom-lowering ISD::UDIV");
@@ -6445,45 +6641,56 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Pair of floats / doubles used to pass the result.
-  StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
-
-  // Create stack object for sret.
+  Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
   auto &DL = DAG.getDataLayout();
-  const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
-  const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
-  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
-  SDValue SRet = DAG.getFrameIndex(FrameIdx, getPointerTy(DL));
 
   ArgListTy Args;
+  bool ShouldUseSRet = Subtarget->isAPCS_ABI();
+  SDValue SRet;
+  if (ShouldUseSRet) {
+    // Create stack object for sret.
+    const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
+    const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
+    int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+    SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
+
+    ArgListEntry Entry;
+    Entry.Node = SRet;
+    Entry.Ty = RetTy->getPointerTo();
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isSRet = true;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(*DAG.getContext());
+  }
+
   ArgListEntry Entry;
-
-  Entry.Node = SRet;
-  Entry.Ty = RetTy->getPointerTo();
-  Entry.isSExt = false;
-  Entry.isZExt = false;
-  Entry.isSRet = true;
-  Args.push_back(Entry);
-
   Entry.Node = Arg;
   Entry.Ty = ArgTy;
   Entry.isSExt = false;
   Entry.isZExt = false;
   Args.push_back(Entry);
 
-  const char *LibcallName  = (ArgVT == MVT::f64)
-  ? "__sincos_stret" : "__sincosf_stret";
+  const char *LibcallName =
+      (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+  RTLIB::Libcall LC =
+      (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
+  CallingConv::ID CC = getLibcallCallingConv(LC);
   SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
 
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
-    .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
-               std::move(Args), 0)
-    .setDiscardResult();
-
+  CLI.setDebugLoc(dl)
+      .setChain(DAG.getEntryNode())
+      .setCallee(CC, RetTy, Callee, std::move(Args), 0)
+      .setDiscardResult(ShouldUseSRet);
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
 
+  if (!ShouldUseSRet)
+    return CallResult.first;
+
   SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
                                 MachinePointerInfo(), false, false, false, 0);
 
@@ -6498,6 +6705,85 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
                      LoadSin.getValue(0), LoadCos.getValue(0));
 }
 
+SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
+                                                  bool Signed,
+                                                  SDValue &Chain) const {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::i32 || VT == MVT::i64) &&
+         "unexpected type for custom lowering DIV");
+  SDLoc dl(Op);
+
+  const auto &DL = DAG.getDataLayout();
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  const char *Name = nullptr;
+  if (Signed)
+    Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
+  else
+    Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
+
+  SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
+
+  ARMTargetLowering::ArgListTy Args;
+
+  for (auto AI : {1, 0}) {
+    ArgListEntry Arg;
+    Arg.Node = Op.getOperand(AI);
+    Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Args.push_back(Arg);
+  }
+
+  CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+    .setChain(Chain)
+    .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
+               ES, std::move(Args), 0);
+
+  return LowerCallTo(CLI).first;
+}
+
+SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
+                                            bool Signed) const {
+  assert(Op.getValueType() == MVT::i32 &&
+         "unexpected type for custom lowering DIV");
+  SDLoc dl(Op);
+
+  SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
+                               DAG.getEntryNode(), Op.getOperand(1));
+
+  return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+}
+
+void ARMTargetLowering::ExpandDIV_Windows(
+    SDValue Op, SelectionDAG &DAG, bool Signed,
+    SmallVectorImpl<SDValue> &Results) const {
+  const auto &DL = DAG.getDataLayout();
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  assert(Op.getValueType() == MVT::i64 &&
+         "unexpected type for custom lowering DIV");
+  SDLoc dl(Op);
+
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+                           DAG.getConstant(0, dl, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
+                           DAG.getConstant(1, dl, MVT::i32));
+  SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
+
+  SDValue DBZCHK =
+      DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+
+  SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
+
+  SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
+  SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
+                              DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
+  Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
+
+  Results.push_back(Lower);
+  Results.push_back(Upper);
+}
+
 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
   // Monotonic load/store is legal for all targets
   if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
@@ -6513,36 +6799,22 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
                                     SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) {
   SDLoc DL(N);
-  SDValue Cycles32, OutChain;
+  // Under Power Management extensions, the cycle-count is:
+  //    mrc p15, #0, <Rt>, c9, c13, #0
+  SDValue Ops[] = { N->getOperand(0), // Chain
+                    DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
+                    DAG.getConstant(15, DL, MVT::i32),
+                    DAG.getConstant(0, DL, MVT::i32),
+                    DAG.getConstant(9, DL, MVT::i32),
+                    DAG.getConstant(13, DL, MVT::i32),
+                    DAG.getConstant(0, DL, MVT::i32)
+  };
 
-  if (Subtarget->hasPerfMon()) {
-    // Under Power Management extensions, the cycle-count is:
-    //    mrc p15, #0, <Rt>, c9, c13, #0
-    SDValue Ops[] = { N->getOperand(0), // Chain
-                      DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
-                      DAG.getConstant(15, DL, MVT::i32),
-                      DAG.getConstant(0, DL, MVT::i32),
-                      DAG.getConstant(9, DL, MVT::i32),
-                      DAG.getConstant(13, DL, MVT::i32),
-                      DAG.getConstant(0, DL, MVT::i32)
-    };
-
-    Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
-                           DAG.getVTList(MVT::i32, MVT::Other), Ops);
-    OutChain = Cycles32.getValue(1);
-  } else {
-    // Intrinsic is defined to return 0 on unsupported platforms. Technically
-    // there are older ARM CPUs that have implementation-specific ways of
-    // obtaining this information (FIXME!).
-    Cycles32 = DAG.getConstant(0, DL, MVT::i32);
-    OutChain = DAG.getEntryNode();
-  }
-
-
-  SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
-                                 Cycles32, DAG.getConstant(0, DL, MVT::i32));
-  Results.push_back(Cycles64);
-  Results.push_back(OutChain);
+  SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
+                                 DAG.getVTList(MVT::i32, MVT::Other), Ops);
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
+                                DAG.getConstant(0, DL, MVT::i32)));
+  Results.push_back(Cycles32.getValue(1));
 }
 
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
@@ -6576,15 +6848,17 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
-  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+  case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
                                                                Subtarget);
   case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
   case ISD::SHL:
   case ISD::SRL:
   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
+  case ISD::SREM:          return LowerREM(Op.getNode(), DAG);
+  case ISD::UREM:          return LowerREM(Op.getNode(), DAG);
   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
   case ISD::SRL_PARTS:
   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
@@ -6622,13 +6896,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     llvm_unreachable("Don't know how to custom lower this!");
   case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
   case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+  case ARMISD::WIN__DBZCHK: return SDValue();
   }
 }
 
 /// ReplaceNodeResults - Replace the results of node with an illegal result
 /// type with new values built out of custom code.
 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
-                                           SmallVectorImpl<SDValue>&Results,
+                                           SmallVectorImpl<SDValue> &Results,
                                            SelectionDAG &DAG) const {
   SDValue Res;
   switch (N->getOpcode()) {
@@ -6644,9 +6919,18 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::SRA:
     Res = Expand64BitShift(N, DAG, Subtarget);
     break;
+  case ISD::SREM:
+  case ISD::UREM:
+    Res = LowerREM(N, DAG);
+    break;
   case ISD::READCYCLECOUNTER:
     ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
     return;
+  case ISD::UDIV:
+  case ISD::SDIV:
+    assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
+    return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
+                             Results);
   }
   if (Res.getNode())
     Results.push_back(Res);
@@ -6683,12 +6967,12 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
 
   // Grab constant pool and fixed stack memory operands.
   MachineMemOperand *CPMMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
-                             MachineMemOperand::MOLoad, 4, 4);
+      MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+                               MachineMemOperand::MOLoad, 4, 4);
 
   MachineMemOperand *FIMMOSt =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                             MachineMemOperand::MOStore, 4, 4);
+      MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
+                               MachineMemOperand::MOStore, 4, 4);
 
   // Load the address of the dispatch MBB into the jump buffer.
   if (isThumb2) {
@@ -6792,7 +7076,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
   MachineModuleInfo &MMI = MF->getMMI();
   for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
        ++BB) {
-    if (!BB->isLandingPad()) continue;
+    if (!BB->isEHPad()) continue;
 
     // FIXME: We should assert that the EH_LABEL is the first MI in the landing
     // pad.
@@ -6807,7 +7091,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
       for (SmallVectorImpl<unsigned>::iterator
              CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
            CSI != CSE; ++CSI) {
-        CallSiteNumToLPad[*CSI].push_back(BB);
+        CallSiteNumToLPad[*CSI].push_back(&*BB);
         MaxCSNum = std::max(MaxCSNum, *CSI);
       }
       break;
@@ -6840,7 +7124,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
 
   // Shove the dispatch's address into the return slot in the function context.
   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
-  DispatchBB->setIsLandingPad();
+  DispatchBB->setIsEHPad();
 
   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
   unsigned trap_opcode;
@@ -6864,10 +7148,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
   // context.
   SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
 
-  MachineMemOperand *FIMMOLd =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                             MachineMemOperand::MOLoad |
-                             MachineMemOperand::MOVolatile, 4, 4);
+  MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI),
+      MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile, 4, 4);
 
   MachineInstrBuilder MIB;
   MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
@@ -6982,9 +7265,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
                    .addReg(NewVReg2, RegState::Kill)
                    .addReg(NewVReg3));
 
-    MachineMemOperand *JTMMOLd =
-      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
-                               MachineMemOperand::MOLoad, 4, 4);
+    MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+        MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
 
     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
@@ -7066,9 +7348,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
                    .addJumpTableIndex(MJTI));
 
-    MachineMemOperand *JTMMOLd =
-      MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
-                               MachineMemOperand::MOLoad, 4, 4);
+    MachineMemOperand *JTMMOLd = MF->getMachineMemOperand(
+        MachinePointerInfo::getJumpTable(*MF), MachineMemOperand::MOLoad, 4, 4);
     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
     AddDefaultPred(
       BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
@@ -7109,13 +7390,14 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
                                                   BB->succ_end());
     while (!Successors.empty()) {
       MachineBasicBlock *SMBB = Successors.pop_back_val();
-      if (SMBB->isLandingPad()) {
+      if (SMBB->isEHPad()) {
         BB->removeSuccessor(SMBB);
         MBBLPads.push_back(SMBB);
       }
     }
 
-    BB->addSuccessor(DispatchBB);
+    BB->addSuccessor(DispatchBB, BranchProbability::getZero());
+    BB->normalizeSuccProbs();
 
     // Find the invoke call and mark all of the callee-saved registers as
     // 'implicit defined' so that they're spilled. This prevents code from
@@ -7157,7 +7439,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr *MI,
   // landing pad now.
   for (SmallVectorImpl<MachineBasicBlock*>::iterator
          I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
-    (*I)->setIsLandingPad(false);
+    (*I)->setIsEHPad(false);
 
   // The instruction is gone now.
   MI->eraseFromParent();
@@ -7280,8 +7562,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
   // Otherwise, we will generate unrolled scalar copies.
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   unsigned dest = MI->getOperand(0).getReg();
   unsigned src = MI->getOperand(1).getReg();
@@ -7573,6 +7854,32 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI,
   return MBB;
 }
 
+MachineBasicBlock *
+ARMTargetLowering::EmitLowered__dbzchk(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction *MF = MBB->getParent();
+  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+
+  MachineBasicBlock *ContBB = MF->CreateMachineBasicBlock();
+  MF->push_back(ContBB);
+  ContBB->splice(ContBB->begin(), MBB,
+                 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  MBB->addSuccessor(ContBB);
+
+  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+  MF->push_back(TrapBB);
+  BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
+  MBB->addSuccessor(TrapBB);
+
+  BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
+      .addReg(MI->getOperand(0).getReg())
+      .addMBB(TrapBB);
+
+  MI->eraseFromParent();
+  return ContBB;
+}
+
 MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB) const {
@@ -7643,8 +7950,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // destination vreg to set, the condition code register to branch on, the
     // true/false values to select between, and a branch opcode to use.
     const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator It = BB;
-    ++It;
+    MachineFunction::iterator It = ++BB->getIterator();
 
     //  thisMBB:
     //  ...
@@ -7741,6 +8047,9 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   case ARM::tInt_eh_sjlj_setjmp:
   case ARM::t2Int_eh_sjlj_setjmp:
   case ARM::t2Int_eh_sjlj_setjmp_nofp:
+    return BB;
+
+  case ARM::Int_eh_sjlj_setup_dispatch:
     EmitSjLjDispatchBlock(MI, BB);
     return BB;
 
@@ -7759,8 +8068,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
     //     SinkBB: V1 = PHI(V2, V3)
     const BasicBlock *LLVM_BB = BB->getBasicBlock();
-    MachineFunction::iterator BBI = BB;
-    ++BBI;
+    MachineFunction::iterator BBI = ++BB->getIterator();
     MachineFunction *Fn = BB->getParent();
     MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
     MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
@@ -7824,11 +8132,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return EmitStructByval(MI, BB);
   case ARM::WIN__CHKSTK:
     return EmitLowered__chkstk(MI, BB);
+  case ARM::WIN__DBZCHK:
+    return EmitLowered__dbzchk(MI, BB);
+  }
+}
+
+/// \brief Attaches vregs to MEMCPY that it will use as scratch registers
+/// when it is expanded into LDM/STM. This is done as a post-isel lowering
+/// instead of as a custom inserter because we need the use list from the SDNode.
+static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
+                                   MachineInstr *MI, const SDNode *Node) {
+  bool isThumb1 = Subtarget->isThumb1Only();
+
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineInstrBuilder MIB(*MF, MI);
+
+  // If the new dst/src is unused mark it as dead.
+  if (!Node->hasAnyUseOfValue(0)) {
+    MI->getOperand(0).setIsDead(true);
+  }
+  if (!Node->hasAnyUseOfValue(1)) {
+    MI->getOperand(1).setIsDead(true);
+  }
+
+  // The MEMCPY both defines and kills the scratch registers.
+  for (unsigned I = 0; I != MI->getOperand(4).getImm(); ++I) {
+    unsigned TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
+                                                         : &ARM::GPRRegClass);
+    MIB.addReg(TmpReg, RegState::Define|RegState::Dead);
   }
 }
 
 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
                                                       SDNode *Node) const {
+  if (MI->getOpcode() == ARM::MEMCPY) {
+    attachMEMCPYScratchRegs(Subtarget, MI, Node);
+    return;
+  }
+
   const MCInstrDesc *MCID = &MI->getDesc();
   // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
   // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
@@ -7898,10 +8241,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
 
 // Helper function that checks if N is a null or all ones constant.
 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
-  if (!C)
-    return false;
-  return AllOnes ? C->isAllOnesValue() : C->isNullValue();
+  return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
 }
 
 // Return true if N is conditionally 0 or all ones.
@@ -8723,12 +9063,88 @@ static SDValue PerformXORCombine(SDNode *N,
   return SDValue();
 }
 
-/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
-/// the bits being cleared by the AND are not demanded by the BFI.
+// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
+// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
+// their position in "to" (Rd).
+static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
+  assert(N->getOpcode() == ARMISD::BFI);
+
+  SDValue From = N->getOperand(1);
+  ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
+  FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+
+  // If the Base came from a SHR #C, we can deduce that it is really testing bit
+  // #C in the base of the SHR.
+  if (From->getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(From->getOperand(1))) {
+    APInt Shift = cast<ConstantSDNode>(From->getOperand(1))->getAPIntValue();
+    assert(Shift.getLimitedValue() < 32 && "Shift too large!");
+    FromMask <<= Shift.getLimitedValue(31);
+    From = From->getOperand(0);
+  }
+
+  return From;
+}
+
+// If A and B contain one contiguous set of bits, does A | B == A . B?
+//
+// Neither A nor B must be zero.
+static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
+  unsigned LastActiveBitInA =  A.countTrailingZeros();
+  unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+  return LastActiveBitInA - 1 == FirstActiveBitInB;
+}
+
+static SDValue FindBFIToCombineWith(SDNode *N) {
+  // We have a BFI in N. Follow a possible chain of BFIs and find a BFI it can combine with,
+  // if one exists.
+  APInt ToMask, FromMask;
+  SDValue From = ParseBFI(N, ToMask, FromMask);
+  SDValue To = N->getOperand(0);
+
+  // Now check for a compatible BFI to merge with. We can pass through BFIs that
+  // aren't compatible, but not if they set the same bit in their destination as
+  // we do (or that of any BFI we're going to combine with).
+  SDValue V = To;
+  APInt CombinedToMask = ToMask;
+  while (V.getOpcode() == ARMISD::BFI) {
+    APInt NewToMask, NewFromMask;
+    SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
+    if (NewFrom != From) {
+      // This BFI has a different base. Keep going.
+      CombinedToMask |= NewToMask;
+      V = V.getOperand(0);
+      continue;
+    }
+
+    // Do the written bits conflict with any we've seen so far?
+    if ((NewToMask & CombinedToMask).getBoolValue())
+      // Conflicting bits - bail out because going further is unsafe.
+      return SDValue();
+
+    // Are the new bits contiguous when combined with the old bits?
+    if (BitsProperlyConcatenate(ToMask, NewToMask) &&
+        BitsProperlyConcatenate(FromMask, NewFromMask))
+      return V;
+    if (BitsProperlyConcatenate(NewToMask, ToMask) &&
+        BitsProperlyConcatenate(NewFromMask, FromMask))
+      return V;
+
+    // We've seen a write to some bits, so track it.
+    CombinedToMask |= NewToMask;
+    // Keep going...
+    V = V.getOperand(0);
+  }
+
+  return SDValue();
+}
+
 static SDValue PerformBFICombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
   SDValue N1 = N->getOperand(1);
   if (N1.getOpcode() == ISD::AND) {
+    // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+    // the bits being cleared by the AND are not demanded by the BFI.
     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
     if (!N11C)
       return SDValue();
@@ -8744,6 +9160,38 @@ static SDValue PerformBFICombine(SDNode *N,
       return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
                              N->getOperand(0), N1.getOperand(0),
                              N->getOperand(2));
+  } else if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
+    // We have a BFI of a BFI. Walk up the BFI chain to see how long it goes.
+    // Keep track of any consecutive bits set that all come from the same base
+    // value. We can combine these together into a single BFI.
+    SDValue CombineBFI = FindBFIToCombineWith(N);
+    if (CombineBFI == SDValue())
+      return SDValue();
+
+    // We've found a BFI.
+    APInt ToMask1, FromMask1;
+    SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
+
+    APInt ToMask2, FromMask2;
+    SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
+    assert(From1 == From2);
+    (void)From2;
+
+    // First, unlink CombineBFI.
+    DCI.DAG.ReplaceAllUsesWith(CombineBFI, CombineBFI.getOperand(0));
+    // Then create a new BFI, combining the two together.
+    APInt NewFromMask = FromMask1 | FromMask2;
+    APInt NewToMask = ToMask1 | ToMask2;
+
+    EVT VT = N->getValueType(0);
+    SDLoc dl(N);
+
+    if (NewFromMask[0] == 0)
+      From1 = DCI.DAG.getNode(
+        ISD::SRL, dl, VT, From1,
+        DCI.DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+    return DCI.DAG.getNode(ARMISD::BFI, dl, VT, N->getOperand(0), From1,
+                           DCI.DAG.getConstant(~NewToMask, dl, VT));
   }
   return SDValue();
 }
@@ -9521,32 +9969,6 @@ static SDValue PerformSTORECombine(SDNode *N,
   return SDValue();
 }
 
-// isConstVecPow2 - Return true if each vector element is a power of 2, all
-// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
-static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
-{
-  integerPart cN;
-  integerPart c0 = 0;
-  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
-       I != E; I++) {
-    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
-    if (!C)
-      return false;
-
-    bool isExact;
-    APFloat APF = C->getValueAPF();
-    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
-        != APFloat::opOK || !isExact)
-      return false;
-
-    c0 = (I == 0) ? cN : c0;
-    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
-      return false;
-  }
-  C = c0;
-  return true;
-}
-
 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
 /// can replace combinations of VMUL and VCVT (floating-point to integer)
 /// when the VMUL has a constant operand that is a power of 2.
@@ -9556,30 +9978,25 @@ static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
 ///  vcvt.s32.f32    d16, d16
 /// becomes:
 ///  vcvt.s32.f32    d16, d16, #3
-static SDValue PerformVCVTCombine(SDNode *N,
-                                  TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG,
                                   const ARMSubtarget *Subtarget) {
-  SelectionDAG &DAG = DCI.DAG;
-  SDValue Op = N->getOperand(0);
-
-  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
-      Op.getOpcode() != ISD::FMUL)
+  if (!Subtarget->hasNEON())
     return SDValue();
 
-  uint64_t C;
-  SDValue N0 = Op->getOperand(0);
-  SDValue ConstVec = Op->getOperand(1);
-  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+  SDValue Op = N->getOperand(0);
+  if (!Op.getValueType().isVector() || Op.getOpcode() != ISD::FMUL)
+    return SDValue();
 
-  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
-      !isConstVecPow2(ConstVec, isSigned, C))
+  SDValue ConstVec = Op->getOperand(1);
+  if (!isa<BuildVectorSDNode>(ConstVec))
     return SDValue();
 
   MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+  uint32_t FloatBits = FloatTy.getSizeInBits();
   MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+  uint32_t IntBits = IntTy.getSizeInBits();
   unsigned NumLanes = Op.getValueType().getVectorNumElements();
-  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 ||
-      NumLanes > 4) {
+  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
     // These instructions only exist converting from f32 to i32. We can handle
     // smaller integers by generating an extra truncate, but larger ones would
     // be lossy. We also can't handle more then 4 lanes, since these intructions
@@ -9587,16 +10004,22 @@ static SDValue PerformVCVTCombine(SDNode *N,
     return SDValue();
   }
 
+  BitVector UndefElements;
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+  if (C == -1 || C == 0 || C > 32)
+    return SDValue();
+
   SDLoc dl(N);
+  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
     Intrinsic::arm_neon_vcvtfp2fxu;
-  SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
-                                 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
-                                 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
-                                 N0,
-                                 DAG.getConstant(Log2_64(C), dl, MVT::i32));
+  SDValue FixConv = DAG.getNode(
+      ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+      DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
+      DAG.getConstant(C, dl, MVT::i32));
 
-  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+  if (IntBits < FloatBits)
     FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
 
   return FixConv;
@@ -9611,38 +10034,44 @@ static SDValue PerformVCVTCombine(SDNode *N,
 ///  vdiv.f32        d16, d17, d16
 /// becomes:
 ///  vcvt.f32.s32    d16, d16, #3
-static SDValue PerformVDIVCombine(SDNode *N,
-                                  TargetLowering::DAGCombinerInfo &DCI,
+static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG,
                                   const ARMSubtarget *Subtarget) {
-  SelectionDAG &DAG = DCI.DAG;
+  if (!Subtarget->hasNEON())
+    return SDValue();
+
   SDValue Op = N->getOperand(0);
   unsigned OpOpcode = Op.getNode()->getOpcode();
-
-  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+  if (!N->getValueType(0).isVector() ||
       (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
     return SDValue();
 
-  uint64_t C;
   SDValue ConstVec = N->getOperand(1);
-  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
-
-  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
-      !isConstVecPow2(ConstVec, isSigned, C))
+  if (!isa<BuildVectorSDNode>(ConstVec))
     return SDValue();
 
   MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+  uint32_t FloatBits = FloatTy.getSizeInBits();
   MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
-  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+  uint32_t IntBits = IntTy.getSizeInBits();
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  if (FloatBits != 32 || IntBits > 32 || NumLanes > 4) {
     // These instructions only exist converting from i32 to f32. We can handle
     // smaller integers by generating an extra extend, but larger ones would
-    // be lossy.
+    // be lossy. We also can't handle more then 4 lanes, since these intructions
+    // only support v2i32/v4i32 types.
     return SDValue();
   }
 
+  BitVector UndefElements;
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
+  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
+  if (C == -1 || C == 0 || C > 32)
+    return SDValue();
+
   SDLoc dl(N);
+  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
   SDValue ConvInput = Op.getOperand(0);
-  unsigned NumLanes = Op.getValueType().getVectorNumElements();
-  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+  if (IntBits < FloatBits)
     ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
                             dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
                             ConvInput);
@@ -9652,7 +10081,7 @@ static SDValue PerformVDIVCombine(SDNode *N,
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
                      Op.getValueType(),
                      DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
-                     ConvInput, DAG.getConstant(Log2_64(C), dl, MVT::i32));
+                     ConvInput, DAG.getConstant(C, dl, MVT::i32));
 }
 
 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
@@ -9680,7 +10109,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 ///   0 <= Value <= ElementBits for a long left shift.
 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
   if (! getVShiftImm(Op, ElementBits, Cnt))
     return false;
   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -9695,12 +10124,16 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
                          int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
   if (! getVShiftImm(Op, ElementBits, Cnt))
     return false;
-  if (isIntrinsic)
+  if (!isIntrinsic)
+    return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+  if (Cnt >= -(isNarrow ? ElementBits/2 : ElementBits) && Cnt <= -1) {
     Cnt = -Cnt;
-  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+    return true;
+  }
+  return false;
 }
 
 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
@@ -9939,89 +10372,123 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
-/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
-static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
-                                       const ARMSubtarget *ST) {
-  // If the target supports NEON, try to use vmax/vmin instructions for f32
-  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
-  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
-  // a NaN; only do the transformation when it matches that behavior.
+static void computeKnownBits(SelectionDAG &DAG, SDValue Op, APInt &KnownZero,
+                             APInt &KnownOne) {
+  if (Op.getOpcode() == ARMISD::BFI) {
+    // Conservatively, we can recurse down the first operand
+    // and just mask out all affected bits.
+    computeKnownBits(DAG, Op.getOperand(0), KnownZero, KnownOne);
 
-  // For now only do this when using NEON for FP operations; if using VFP, it
-  // is not obvious that the benefit outweighs the cost of switching to the
-  // NEON pipeline.
-  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
-      N->getValueType(0) != MVT::f32)
+    // The operand to BFI is already a mask suitable for removing the bits it
+    // sets.
+    ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
+    APInt Mask = CI->getAPIntValue();
+    KnownZero &= Mask;
+    KnownOne &= Mask;
+    return;
+  }
+  if (Op.getOpcode() == ARMISD::CMOV) {
+    APInt KZ2(KnownZero.getBitWidth(), 0);
+    APInt KO2(KnownOne.getBitWidth(), 0);
+    computeKnownBits(DAG, Op.getOperand(1), KnownZero, KnownOne);
+    computeKnownBits(DAG, Op.getOperand(2), KZ2, KO2);
+
+    KnownZero &= KZ2;
+    KnownOne &= KO2;
+    return;
+  }
+  return DAG.computeKnownBits(Op, KnownZero, KnownOne);
+}
+
+SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const {
+  // If we have a CMOV, OR and AND combination such as:
+  //   if (x & CN)
+  //     y |= CM;
+  //
+  // And:
+  //   * CN is a single bit;
+  //   * All bits covered by CM are known zero in y
+  //
+  // Then we can convert this into a sequence of BFI instructions. This will
+  // always be a win if CM is a single bit, will always be no worse than the
+  // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
+  // three bits (due to the extra IT instruction).
+
+  SDValue Op0 = CMOV->getOperand(0);
+  SDValue Op1 = CMOV->getOperand(1);
+  auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
+  auto CC = CCNode->getAPIntValue().getLimitedValue();
+  SDValue CmpZ = CMOV->getOperand(4);
+
+  // The compare must be against zero.
+  if (!isNullConstant(CmpZ->getOperand(1)))
     return SDValue();
 
-  SDValue CondLHS = N->getOperand(0);
-  SDValue CondRHS = N->getOperand(1);
-  SDValue LHS = N->getOperand(2);
-  SDValue RHS = N->getOperand(3);
-  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  assert(CmpZ->getOpcode() == ARMISD::CMPZ);
+  SDValue And = CmpZ->getOperand(0);
+  if (And->getOpcode() != ISD::AND)
+    return SDValue();
+  ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1));
+  if (!AndC || !AndC->getAPIntValue().isPowerOf2())
+    return SDValue();
+  SDValue X = And->getOperand(0);
 
-  unsigned Opcode = 0;
-  bool IsReversed;
-  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
-    IsReversed = false; // x CC y ? x : y
-  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
-    IsReversed = true ; // x CC y ? y : x
+  if (CC == ARMCC::EQ) {
+    // We're performing an "equal to zero" compare. Swap the operands so we
+    // canonicalize on a "not equal to zero" compare.
+    std::swap(Op0, Op1);
   } else {
+    assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
+  }
+  
+  if (Op1->getOpcode() != ISD::OR)
     return SDValue();
+
+  ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
+  if (!OrC)
+    return SDValue();
+  SDValue Y = Op1->getOperand(0);
+
+  if (Op0 != Y)
+    return SDValue();
+
+  // Now, is it profitable to continue?
+  APInt OrCI = OrC->getAPIntValue();
+  unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
+  if (OrCI.countPopulation() > Heuristic)
+    return SDValue();
+
+  // Lastly, can we determine that the bits defined by OrCI
+  // are zero in Y?
+  APInt KnownZero, KnownOne;
+  computeKnownBits(DAG, Y, KnownZero, KnownOne);
+  if ((OrCI & KnownZero) != OrCI)
+    return SDValue();
+
+  // OK, we can do the combine.
+  SDValue V = Y;
+  SDLoc dl(X);
+  EVT VT = X.getValueType();
+  unsigned BitInX = AndC->getAPIntValue().logBase2();
+  
+  if (BitInX != 0) {
+    // We must shift X first.
+    X = DAG.getNode(ISD::SRL, dl, VT, X,
+                    DAG.getConstant(BitInX, dl, VT));
   }
 
-  bool IsUnordered;
-  switch (CC) {
-  default: break;
-  case ISD::SETOLT:
-  case ISD::SETOLE:
-  case ISD::SETLT:
-  case ISD::SETLE:
-  case ISD::SETULT:
-  case ISD::SETULE:
-    // If LHS is NaN, an ordered comparison will be false and the result will
-    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
-    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
-    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
-    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
-      break;
-    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
-    // will return -0, so vmin can only be used for unsafe math or if one of
-    // the operands is known to be nonzero.
-    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
-        !DAG.getTarget().Options.UnsafeFPMath &&
-        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
-      break;
-    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
-    break;
-
-  case ISD::SETOGT:
-  case ISD::SETOGE:
-  case ISD::SETGT:
-  case ISD::SETGE:
-  case ISD::SETUGT:
-  case ISD::SETUGE:
-    // If LHS is NaN, an ordered comparison will be false and the result will
-    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
-    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
-    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
-    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
-      break;
-    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
-    // will return +0, so vmax can only be used for unsafe math or if one of
-    // the operands is known to be nonzero.
-    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
-        !DAG.getTarget().Options.UnsafeFPMath &&
-        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
-      break;
-    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
-    break;
+  for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
+       BitInY < NumActiveBits; ++BitInY) {
+    if (OrCI[BitInY] == 0)
+      continue;
+    APInt Mask(VT.getSizeInBits(), 0);
+    Mask.setBit(BitInY);
+    V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
+                    // Confusingly, the operand is an *inverted* mask.
+                    DAG.getConstant(~Mask, dl, VT));
   }
 
-  if (!Opcode)
-    return SDValue();
-  return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS);
+  return V;
 }
 
 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
@@ -10042,6 +10509,13 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   ARMCC::CondCodes CC =
     (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
 
+  // BFI is only available on V6T2+.
+  if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
+    SDValue R = PerformCMOVToBFICombine(N, DAG);
+    if (R)
+      return R;
+  }
+
   // Simplify
   //   mov     r1, r0
   //   cmp     r1, x
@@ -10108,8 +10582,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   case ISD::FP_TO_SINT:
-  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
-  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
+  case ISD::FP_TO_UINT:
+    return PerformVCVTCombine(N, DCI.DAG, Subtarget);
+  case ISD::FDIV:
+    return PerformVDIVCombine(N, DCI.DAG, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
@@ -10117,7 +10593,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
-  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
   case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
   case ISD::LOAD:       return PerformLOADCombine(N, DCI);
   case ARMISD::VLD2DUP:
@@ -11043,8 +11518,48 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
+static RTLIB::Libcall getDivRemLibcall(
+    const SDNode *N, MVT::SimpleValueType SVT) {
+  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+          N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&
+         "Unhandled Opcode in getDivRemLibcall");
+  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+                  N->getOpcode() == ISD::SREM;
+  RTLIB::Libcall LC;
+  switch (SVT) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::i8:  LC = isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
+  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
+  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
+  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
+  }
+  return LC;
+}
+
+static TargetLowering::ArgListTy getDivRemArgList(
+    const SDNode *N, LLVMContext *Context) {
+  assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
+          N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&
+         "Unhandled Opcode in getDivRemArgList");
+  bool isSigned = N->getOpcode() == ISD::SDIVREM ||
+                  N->getOpcode() == ISD::SREM;
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    Type *ArgTy = ArgVT.getTypeForEVT(*Context);
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  return Args;
+}
+
 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
-  assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only");
+  assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid()) &&
+         "Register-based DivRem lowering only");
   unsigned Opcode = Op->getOpcode();
   assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
          "Invalid opcode for Div/Rem lowering");
@@ -11052,28 +11567,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op->getValueType(0);
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
 
-  RTLIB::Libcall LC;
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
-  case MVT::i8:  LC = isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
-  case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
-  case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
-  case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
-  }
-
+  RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
+                                       VT.getSimpleVT().SimpleTy);
   SDValue InChain = DAG.getEntryNode();
 
-  TargetLowering::ArgListTy Args;
-  TargetLowering::ArgListEntry Entry;
-  for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
-    EVT ArgVT = Op->getOperand(i).getValueType();
-    Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-    Entry.Node = Op->getOperand(i);
-    Entry.Ty = ArgTy;
-    Entry.isSExt = isSigned;
-    Entry.isZExt = !isSigned;
-    Args.push_back(Entry);
-  }
+  TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
+                                                    DAG.getContext());
 
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
                                          getPointerTy(DAG.getDataLayout()));
@@ -11090,6 +11589,47 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   return CallInfo.first;
 }
 
+// Lowers REM using divmod helpers
+// see RTABI section 4.2/4.3
+SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
+  // Build return types (div and rem)
+  std::vector<Type*> RetTyParams;
+  Type *RetTyElement;
+
+  switch (N->getValueType(0).getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unexpected request for libcall!");
+  case MVT::i8:   RetTyElement = Type::getInt8Ty(*DAG.getContext());  break;
+  case MVT::i16:  RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
+  case MVT::i32:  RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
+  case MVT::i64:  RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
+  }
+
+  RetTyParams.push_back(RetTyElement);
+  RetTyParams.push_back(RetTyElement);
+  ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
+  Type *RetTy = StructType::get(*DAG.getContext(), ret);
+
+  RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
+                                                             SimpleTy);
+  SDValue InChain = DAG.getEntryNode();
+  TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+  bool isSigned = N->getOpcode() == ISD::SREM;
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+                                         getPointerTy(DAG.getDataLayout()));
+
+  // Lower call
+  CallLoweringInfo CLI(DAG);
+  CLI.setChain(InChain)
+     .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args), 0)
+     .setSExtResult(isSigned).setZExtResult(!isSigned).setDebugLoc(SDLoc(N));
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+
+  // Return second (rem) result operand (first contains div)
+  SDNode *ResNode = CallResult.first.getNode();
+  assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
+  return ResNode->getOperand(1);
+}
+
 SDValue
 ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->isTargetWindows() && "unsupported target platform");
@@ -11124,8 +11664,8 @@ SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
 
   SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
-                     /*isSigned*/ false, SDLoc(Op)).first;
+  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+                     SDLoc(Op)).first;
 }
 
 SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
@@ -11137,8 +11677,8 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
   LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
 
   SDValue SrcVal = Op.getOperand(0);
-  return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
-                     /*isSigned*/ false, SDLoc(Op)).first;
+  return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
+                     SDLoc(Op)).first;
 }
 
 bool
@@ -11186,7 +11726,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.opc = ISD::INTRINSIC_W_CHAIN;
     // Conservatively set memVT to the entire set of vectors loaded.
     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
-    uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8;
+    uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
@@ -11212,7 +11752,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       Type *ArgTy = I.getArgOperand(ArgI)->getType();
       if (!ArgTy->isVectorTy())
         break;
-      NumElts += DL.getTypeAllocSize(ArgTy) / 8;
+      NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
     }
     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
     Info.ptrVal = I.getArgOperand(0);
@@ -11295,8 +11835,6 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
-bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; }
-
 Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
                                         ARM_MB::MemBOpt Domain) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11392,19 +11930,26 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
 // FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
 // guarantee, see DDI0406C ARM architecture reference manual,
 // sections A8.8.72-74 LDRD)
-bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   unsigned Size = LI->getType()->getPrimitiveSizeInBits();
-  return (Size == 64) && !Subtarget->isMClass();
+  return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
+                                                  : AtomicExpansionKind::None;
 }
 
 // For the real atomic operations, we have ldrex/strex up to 32 bits,
 // and up to 64 bits on the non-M profiles
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   return (Size <= (Subtarget->isMClass() ? 32U : 64U))
-             ? AtomicRMWExpansionKind::LLSC
-             : AtomicRMWExpansionKind::None;
+             ? AtomicExpansionKind::LLSC
+             : AtomicExpansionKind::None;
+}
+
+bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+    AtomicCmpXchgInst *AI) const {
+  return true;
 }
 
 // This has so far only been implemented for MachO.
@@ -11419,7 +11964,7 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
     return false;
 
   // Floating point values and vector values map to the same register file.
-  // Therefore, althought we could do a store extract of a vector type, this is
+  // Therefore, although we could do a store extract of a vector type, this is
   // better to leave at float as we have more freedom in the addressing mode for
   // those.
   if (VectorTy->isFPOrFPVectorTy())
@@ -11441,6 +11986,14 @@ bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
   return false;
 }
 
+bool ARMTargetLowering::isCheapToSpeculateCttz() const {
+  return Subtarget->hasV6T2Ops();
+}
+
+bool ARMTargetLowering::isCheapToSpeculateCtlz() const {
+  return Subtarget->hasV6T2Ops();
+}
+
 Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                                          AtomicOrdering Ord) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -11477,6 +12030,14 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       cast<PointerType>(Addr->getType())->getElementType());
 }
 
+void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
+    IRBuilder<> &Builder) const {
+  if (!Subtarget->hasV7Ops())
+    return;
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Builder.CreateCall(llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+}
+
 Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                                Value *Addr,
                                                AtomicOrdering Ord) const {
@@ -11534,12 +12095,12 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   Type *EltTy = VecTy->getVectorElementType();
 
   const DataLayout &DL = LI->getModule()->getDataLayout();
-  unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy);
-  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+  bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
 
-  // Skip illegal vector types and vector types of i64/f64 element (vldN doesn't
-  // support i64/f64 element).
-  if ((VecSize != 64 && VecSize != 128) || EltIs64Bits)
+  // Skip if we do not have NEON and skip illegal vector types and vector types
+  // with i64/f64 elements (vldN doesn't support i64/f64 elements).
+  if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128) || EltIs64Bits)
     return false;
 
   // A pointer vector can not be the return type of the ldN intrinsics. Need to
@@ -11552,9 +12113,6 @@ bool ARMTargetLowering::lowerInterleavedLoad(
                                             Intrinsic::arm_neon_vld3,
                                             Intrinsic::arm_neon_vld4};
 
-  Function *VldnFunc =
-      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], VecTy);
-
   IRBuilder<> Builder(LI);
   SmallVector<Value *, 2> Ops;
 
@@ -11562,6 +12120,9 @@ bool ARMTargetLowering::lowerInterleavedLoad(
   Ops.push_back(Builder.CreateBitCast(LI->getPointerOperand(), Int8Ptr));
   Ops.push_back(Builder.getInt32(LI->getAlignment()));
 
+  Type *Tys[] = { VecTy, Int8Ptr };
+  Function *VldnFunc =
+      Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
   CallInst *VldN = Builder.CreateCall(VldnFunc, Ops, "vldN");
 
   // Replace uses of each shufflevector with the corresponding vector loaded
@@ -11624,12 +12185,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
   VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
 
   const DataLayout &DL = SI->getModule()->getDataLayout();
-  unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy);
-  bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64;
+  unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
+  bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64;
 
-  // Skip illegal sub vector types and vector types of i64/f64 element (vstN
-  // doesn't support i64/f64 element).
-  if ((SubVecSize != 64 && SubVecSize != 128) || EltIs64Bits)
+  // Skip if we do not have NEON and skip illegal vector types and vector types
+  // with i64/f64 elements (vstN doesn't support i64/f64 elements).
+  if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128) ||
+      EltIs64Bits)
     return false;
 
   Value *Op0 = SVI->getOperand(0);
@@ -11650,17 +12212,18 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
     SubVecTy = VectorType::get(IntTy, NumSubElts);
   }
 
-  static Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
-                                       Intrinsic::arm_neon_vst3,
-                                       Intrinsic::arm_neon_vst4};
-  Function *VstNFunc = Intrinsic::getDeclaration(
-      SI->getModule(), StoreInts[Factor - 2], SubVecTy);
-
+  static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
+                                             Intrinsic::arm_neon_vst3,
+                                             Intrinsic::arm_neon_vst4};
   SmallVector<Value *, 6> Ops;
 
   Type *Int8Ptr = Builder.getInt8PtrTy(SI->getPointerAddressSpace());
   Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), Int8Ptr));
 
+  Type *Tys[] = { Int8Ptr, SubVecTy };
+  Function *VstNFunc = Intrinsic::getDeclaration(
+      SI->getModule(), StoreInts[Factor - 2], Tys);
+
   // Split the shufflevector operands into sub vectors for the new vstN call.
   for (unsigned i = 0; i < Factor; i++)
     Ops.push_back(Builder.CreateShuffleVector(
@@ -11681,14 +12244,14 @@ enum HABaseType {
 
 static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
                                    uint64_t &Members) {
-  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+  if (auto *ST = dyn_cast<StructType>(Ty)) {
     for (unsigned i = 0; i < ST->getNumElements(); ++i) {
       uint64_t SubMembers = 0;
       if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
         return false;
       Members += SubMembers;
     }
-  } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+  } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
     uint64_t SubMembers = 0;
     if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
       return false;
@@ -11703,7 +12266,7 @@ static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
       return false;
     Members = 1;
     Base = HA_DOUBLE;
-  } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+  } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
     Members = 1;
     switch (Base) {
     case HA_FLOAT:
@@ -11747,3 +12310,17 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
   bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
   return IsHA || IsIntArray;
 }
+
+unsigned ARMTargetLowering::getExceptionPointerRegister(
+    const Constant *PersonalityFn) const {
+  // Platforms which do not use SjLj EH may return values in these registers
+  // via the personality function.
+  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R0;
+}
+
+unsigned ARMTargetLowering::getExceptionSelectorRegister(
+    const Constant *PersonalityFn) const {
+  // Platforms which do not use SjLj EH may return values in these registers
+  // via the personality function.
+  return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index efc9020c193a..b764624f1492 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -63,8 +63,6 @@ namespace llvm {
 
       BCC_i64,
 
-      RBIT,         // ARM bitreverse instruction
-
       SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
       SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
       RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
@@ -79,6 +77,7 @@ namespace llvm {
 
       EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
       EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
+      EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch.
 
       TC_RETURN,    // Tail call return pseudo.
 
@@ -91,6 +90,7 @@ namespace llvm {
       PRELOAD,      // Preload
 
       WIN__CHKSTK,  // Windows' __chkstk call to do stack probing.
+      WIN__DBZCHK,  // Windows' divide by zero check
 
       VCEQ,         // Vector compare equal.
       VCEQZ,        // Vector compare equal to zero.
@@ -172,12 +172,6 @@ namespace llvm {
       // BUILD_VECTOR for this purpose.
       BUILD_VECTOR,
 
-      // Floating-point max and min:
-      FMAX,
-      FMIN,
-      VMAXNM,
-      VMINNM,
-
       // Bit-field insert
       BFI,
 
@@ -189,6 +183,10 @@ namespace llvm {
       // Vector bitwise select
       VBSL,
 
+      // Pseudo-instruction representing a memory copy using ldm/stm
+      // instructions.
+      MEMCPY,
+
       // Vector load N-element structure to all lanes:
       VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
       VLD3DUP,
@@ -260,6 +258,7 @@ namespace llvm {
                                        SDNode *Node) const override;
 
     SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
+    SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const;
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override;
@@ -348,6 +347,8 @@ namespace llvm {
     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
       if (ConstraintCode == "Q")
         return InlineAsm::Constraint_Q;
+      else if (ConstraintCode == "o")
+        return InlineAsm::Constraint_o;
       else if (ConstraintCode.size() == 2) {
         if (ConstraintCode[0] == 'U') {
           switch(ConstraintCode[1]) {
@@ -420,13 +421,24 @@ namespace llvm {
     bool functionArgumentNeedsConsecutiveRegisters(
         Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
 
-    bool hasLoadLinkedStoreConditional() const override;
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
     Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const;
     Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                           AtomicOrdering Ord) const override;
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
                                 Value *Addr, AtomicOrdering Ord) const override;
 
+    void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
+
     Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
                           bool IsStore, bool IsLoad) const override;
     Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord,
@@ -441,16 +453,21 @@ namespace llvm {
     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
                                unsigned Factor) const override;
 
-    bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    TargetLoweringBase::AtomicRMWExpansionKind
+    TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
+    bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
 
     bool useLoadStackGuardNode() const override;
 
     bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
                                    unsigned &Cost) const override;
 
+    bool isCheapToSpeculateCttz() const override;
+    bool isCheapToSpeculateCtlz() const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -496,6 +513,7 @@ namespace llvm {
                              ISD::ArgFlagsTy Flags) const;
     SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
                                     const ARMSubtarget *Subtarget) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -508,7 +526,6 @@ namespace llvm {
     SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
                                  SelectionDAG &DAG,
                                  TLSModel::Model model) const;
-    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -526,6 +543,12 @@ namespace llvm {
                               const ARMSubtarget *ST) const;
     SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+    void ExpandDIV_Windows(SDValue Op, SelectionDAG &DAG, bool Signed,
+                           SmallVectorImpl<SDValue> &Results) const;
+    SDValue LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG, bool Signed,
+                                   SDValue &Chain) const;
+    SDValue LowerREM(SDNode *N, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
@@ -635,6 +658,8 @@ namespace llvm {
 
     MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI,
                                            MachineBasicBlock *MBB) const;
+    MachineBasicBlock *EmitLowered__dbzchk(MachineInstr *MI,
+                                           MachineBasicBlock *MBB) const;
   };
 
   enum NEONModImmType {
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 84f95be30991..cf973d68085f 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -51,7 +51,8 @@ void ARMInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
 
 unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   switch (Opc) {
-  default: break;
+  default:
+    break;
   case ARM::LDR_PRE_IMM:
   case ARM::LDR_PRE_REG:
   case ARM::LDR_POST_IMM:
@@ -124,82 +125,10 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI,
             .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
   unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
   MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
-      MachinePointerInfo::getGOT(), Flag, 4, 4);
+      MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 4, 4);
   MIB.addMemOperand(MMO);
   MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg);
   MIB.addReg(Reg, RegState::Kill).addImm(0);
   MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
   AddDefaultPred(MIB);
 }
-
-namespace {
-  /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC
-  /// global base register for ARM ELF.
-  struct ARMCGBR : public MachineFunctionPass {
-    static char ID;
-    ARMCGBR() : MachineFunctionPass(ID) {}
-
-    bool runOnMachineFunction(MachineFunction &MF) override {
-      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-      if (AFI->getGlobalBaseReg() == 0)
-        return false;
-      const ARMSubtarget &STI =
-          static_cast<const ARMSubtarget &>(MF.getSubtarget());
-      // Don't do this for Thumb1.
-      if (STI.isThumb1Only())
-	return false;
-
-      const TargetMachine &TM = MF.getTarget();
-      if (TM.getRelocationModel() != Reloc::PIC_)
-        return false;
-
-      LLVMContext *Context = &MF.getFunction()->getContext();
-      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
-      unsigned PCAdj = STI.isThumb() ? 4 : 8;
-      ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(
-          *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj);
-
-      unsigned Align = TM.getDataLayout()->getPrefTypeAlignment(
-          Type::getInt32PtrTy(*Context));
-      unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align);
-
-      MachineBasicBlock &FirstMBB = MF.front();
-      MachineBasicBlock::iterator MBBI = FirstMBB.begin();
-      DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
-      unsigned TempReg =
-          MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass);
-      unsigned Opc = STI.isThumb2() ? ARM::t2LDRpci : ARM::LDRcp;
-      const TargetInstrInfo &TII = *STI.getInstrInfo();
-      MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL,
-                                        TII.get(Opc), TempReg)
-                                .addConstantPoolIndex(Idx);
-      if (Opc == ARM::LDRcp)
-        MIB.addImm(0);
-      AddDefaultPred(MIB);
-
-      // Fix the GOT address by adding pc.
-      unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
-      Opc = STI.isThumb2() ? ARM::tPICADD : ARM::PICADD;
-      MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg)
-                .addReg(TempReg)
-                .addImm(ARMPCLabelIndex);
-      if (Opc == ARM::PICADD)
-        AddDefaultPred(MIB);
-
-      return true;
-    }
-
-    const char *getPassName() const override {
-      return "ARM PIC Global Base Reg Initialization";
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-char ARMCGBR::ID = 0;
-FunctionPass*
-llvm::createARMGlobalBaseRegPass() { return new ARMCGBR(); }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 61c45af26fe1..b9de83bfe6dc 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -59,6 +59,7 @@ def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
                                                  SDTCisInt<2>]>;
 def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+def SDT_ARMEH_SJLJ_SetupDispatch: SDTypeProfile<0, 0, []>;
 
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
@@ -70,8 +71,11 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
 
-def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
-def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>;
+def SDT_WIN__DBZCHK : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+def SDT_ARMMEMCPY  : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                          SDTCisVT<2, i32>, SDTCisVT<3, i32>,
+                                          SDTCisVT<4, i32>]>;
 
 def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
                                             [SDTCisSameAs<0, 2>,
@@ -163,21 +167,23 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
 def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
                                SDT_ARMEH_SJLJ_Longjmp,
                                [SDNPHasChain, SDNPSideEffect]>;
+def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH",
+                                      SDT_ARMEH_SJLJ_SetupDispatch,
+                                      [SDNPHasChain, SDNPSideEffect]>;
 
 def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
                                [SDNPHasChain, SDNPSideEffect]>;
 def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
                                [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
-def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
-
 def ARMtcret         : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
                         [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
 
 def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
 
-def ARMvmaxnm        : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>;
-def ARMvminnm        : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>;
+def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
+                         SDNPMayStore, SDNPMayLoad]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
@@ -209,6 +215,8 @@ def PreV8            : Predicate<"!Subtarget->hasV8Ops()">,
                                  AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">;
 def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
                                  AssemblerPredicate<"HasV8_1aOps", "armv8.1a">;
+def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
+                                 AssemblerPredicate<"HasV8_2aOps", "armv8.2a">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
 def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
                                  AssemblerPredicate<"FeatureVFP2", "VFP2">;
@@ -228,7 +236,9 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
 def HasFP16          : Predicate<"Subtarget->hasFP16()">,
-                                 AssemblerPredicate<"FeatureFP16","half-float">;
+                                 AssemblerPredicate<"FeatureFP16","half-float conversions">;
+def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
+                                 AssemblerPredicate<"FeatureFullFP16","full half-float">;
 def HasDivide        : Predicate<"Subtarget->hasDivide()">,
                                  AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">;
 def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
@@ -236,9 +246,8 @@ def HasDivideInARM   : Predicate<"Subtarget->hasDivideInARMMode()">,
 def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
                                  AssemblerPredicate<"FeatureT2XtPk",
                                                      "pack/extract">;
-def HasThumb2DSP     : Predicate<"Subtarget->hasThumb2DSP()">,
-                                 AssemblerPredicate<"FeatureDSPThumb2",
-                                                    "thumb2-dsp">;
+def HasDSP           : Predicate<"Subtarget->hasDSP()">,
+                                 AssemblerPredicate<"FeatureDSP", "dsp">;
 def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
                                  AssemblerPredicate<"FeatureDB",
                                                     "data-barriers">;
@@ -2322,6 +2331,7 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
   let Inst{23-4} = 0b01100000000000000111;
   let Inst{3-0} = opt;
 }
+def : MnemonicAlias<"smi", "smc">;
 
 // Supervisor Call (Software Interrupt)
 let isCall = 1, Uses = [SP] in {
@@ -3671,10 +3681,10 @@ def USAT16 : AI<(outs GPRnopc:$Rd),
   let Inst{3-0} = Rn;
 }
 
-def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm:$pos),
-               (SSAT imm:$pos, GPRnopc:$a, 0)>;
-def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm:$pos),
-               (USAT imm:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_ssat GPRnopc:$a, imm1_32:$pos),
+               (SSAT imm1_32:$pos, GPRnopc:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos),
+               (USAT imm0_31:$pos, GPRnopc:$a, 0)>;
 
 //===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
@@ -4186,7 +4196,7 @@ def CLZ  : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
 
 def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rbit", "\t$Rd, $Rm",
-              [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+              [(set GPR:$Rd, (bitreverse GPR:$Rm))]>,
            Requires<[IsARM, HasV6T2]>,
            Sched<[WriteALU]>;
 
@@ -4578,6 +4588,19 @@ let usesCustomInserter = 1 in {
       [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>;
 }
 
+let hasPostISelHook = 1, Constraints = "$newdst = $dst, $newsrc = $src" in {
+    // %newsrc, %newdst = MEMCPY %dst, %src, N, ...N scratch regs...
+    // Copies N registers worth of memory from address %src to address %dst
+    // and returns the incremented addresses.  N scratch register will
+    // be attached for the copy to use.
+    def MEMCPY : PseudoInst<
+      (outs GPR:$newdst, GPR:$newsrc),
+      (ins GPR:$dst, GPR:$src, i32imm:$nreg, variable_ops),
+      NoItinerary,
+      [(set GPR:$newdst, GPR:$newsrc,
+            (ARMmemcopy GPR:$dst, GPR:$src, imm:$nreg))]>;
+}
+
 def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{
   return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
 }]>;
@@ -4705,7 +4728,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
 
 def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
                 [(int_arm_clrex)]>,
-            Requires<[IsARM, HasV7]>  {
+            Requires<[IsARM, HasV6K]>  {
   let Inst{31-0} = 0b11110101011111111111000000011111;
 }
 
@@ -5242,6 +5265,12 @@ def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone,
 let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in
   def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>;
 
+def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK,
+                         [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
+let usesCustomInserter = 1, Defs = [CPSR] in
+  def WIN__DBZCHK : PseudoInst<(outs), (ins GPR:$divisor), NoItinerary,
+                               [(win__dbzchk GPR:$divisor)]>;
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
@@ -5301,6 +5330,10 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
                                 Requires<[IsARM]>;
 }
 
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1 in
+def Int_eh_sjlj_setup_dispatch : PseudoInst<(outs), (ins), NoItinerary,
+            [(ARMeh_sjlj_setup_dispatch)]>;
+
 // eh.sjlj.dispatchsetup pseudo-instruction.
 // This pseudo is used for both ARM and Thumb. Any differences are handled when
 // the pseudo is expanded (which happens before any passes that need the
@@ -5622,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm",
                    (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>;
 // Same for AND <--> BIC
 def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm",
-                   (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+                   (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
                           pred:$p, cc_out:$s)>;
 def : ARMInstAlias<"bic${s}${p} $Rdn, $imm",
-                   (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+                   (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
                           pred:$p, cc_out:$s)>;
 def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm",
-                   (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm,
+                   (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm,
                           pred:$p, cc_out:$s)>;
 def : ARMInstAlias<"and${s}${p} $Rdn, $imm",
-                   (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm,
+                   (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm,
                           pred:$p, cc_out:$s)>;
 
 // Likewise, "add Rd, mod_imm_neg" -> sub
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index f035d6150ec0..7020ffb41b64 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -587,11 +587,6 @@ def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
 def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
 def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
 
-def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
-                                         SDTCisSameAs<0, 2>]>;
-def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
-def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
-
 def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
   ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
   unsigned EltBits = 0;
@@ -2465,17 +2460,17 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
         [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
 // Same as above, but not predicated.
-class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VDIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<0b10, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
+  : N2Vnp<op19_18, op17_16, op10_8, op7, 0,  (outs DPR:$Vd), (ins DPR:$Vm),
           itin, OpcodeStr, Dt,
           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
 
-class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
+class N2VQIntnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7,
               InstrItinClass itin, string OpcodeStr, string Dt,
               ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
-  : N2Vnp<0b10, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
+  : N2Vnp<op19_18, op17_16, op10_8, op7, 1,  (outs QPR:$Vd), (ins QPR:$Vm),
           itin, OpcodeStr, Dt,
           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
 
@@ -3255,6 +3250,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                   [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
+  def v4f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, "f16", asm, "",
+                  [(set DPR:$Vd, (v4i16 (OpNode (v4f16 DPR:$Vm))))]>,
+              Requires<[HasNEON,HasFullFP16]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
 
   // 128-bit vector types.
   def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
@@ -3275,6 +3277,13 @@ multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
                   [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
     let Inst{10} = 1; // overwrite F = 1
   }
+  def v8f16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, "f16", asm, "",
+                  [(set QPR:$Vd, (v8i16 (OpNode (v8f16 QPR:$Vm))))]>,
+              Requires<[HasNEON,HasFullFP16]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
 }
 
 
@@ -4110,6 +4119,12 @@ def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
                      v2f32, v2f32, fadd, 1>;
 def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
                      v4f32, v4f32, fadd, 1>;
+def  VADDhd   : N3VD<0, 0, 0b01, 0b1101, 0, IIC_VBIND, "vadd", "f16",
+                     v4f16, v4f16, fadd, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VADDhq   : N3VQ<0, 0, 0b01, 0b1101, 0, IIC_VBINQ, "vadd", "f16",
+                     v8f16, v8f16, fadd, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
 //   VADDL    : Vector Add Long (Q = D + D)
 defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vaddl", "s", add, sext, 1>;
@@ -4165,10 +4180,21 @@ def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
                      v2f32, v2f32, fmul, 1>;
 def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
                      v4f32, v4f32, fmul, 1>;
+def  VMULhd   : N3VD<1, 0, 0b01, 0b1101, 1, IIC_VFMULD, "vmul", "f16",
+                     v4f16, v4f16, fmul, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VMULhq   : N3VQ<1, 0, 0b01, 0b1101, 1, IIC_VFMULQ, "vmul", "f16",
+                     v8f16, v8f16, fmul, 1>,
+                Requires<[HasNEON,HasFullFP16]>;
 defm VMULsl   : N3VSL_HS<0b1000, "vmul", mul>;
 def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
 def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
                        v2f32, fmul>;
+def  VMULslhd : N3VDSL16<0b01, 0b1001, "vmul", "f16", v4f16, fmul>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VMULslhq : N3VQSL16<0b01, 0b1001, "vmul", "f16", v8f16,
+                       v4f16, fmul>,
+                Requires<[HasNEON,HasFullFP16]>;
 
 def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
                       (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
@@ -4277,6 +4303,12 @@ def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
 def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
                           v4f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLAhd   : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16",
+                          v4f16, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLAhq   : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16",
+                          v8f16, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
 def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
@@ -4285,6 +4317,12 @@ def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
 def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
                             v4f32, v2f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAslhd : N3VDMulOpSL16<0b01, 0b0001, IIC_VMACD, "vmla", "f16",
+                            v4f16, fmul, fadd>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def  VMLAslhq : N3VQMulOpSL16<0b01, 0b0001, IIC_VMACQ, "vmla", "f16",
+                            v8f16, v4f16, fmul, fadd>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
 def : Pat<(v8i16 (add (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -4495,6 +4533,12 @@ def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
 def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
                           v4f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLShd   : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16",
+                          v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
+def  VMLShq   : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16",
+                          v8f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>;
 defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
                               IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
 def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
@@ -4503,6 +4547,12 @@ def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
 def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
                             v4f32, v2f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSslhd : N3VDMulOpSL16<0b01, 0b0101, IIC_VMACD, "vmls", "f16",
+                            v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
+def  VMLSslhq : N3VQMulOpSL16<0b01, 0b0101, IIC_VMACQ, "vmls", "f16",
+                            v8f16, v4f16, fmul, fsub>,
+                Requires<[HasNEON, HasFullFP16, UseFPVMLx]>;
 
 def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
                   (mul (v8i16 QPR:$src2),
@@ -4570,6 +4620,13 @@ def  VFMAfd   : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32",
 def  VFMAfq   : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32",
                           v4f32, fmul_su, fadd_mlx>,
                 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def  VFMAhd   : N3VDMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACD, "vfma", "f16",
+                          v4f16, fmul, fadd>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+
+def  VFMAhq   : N3VQMulOp<0, 0, 0b01, 0b1100, 1, IIC_VFMACQ, "vfma", "f16",
+                          v8f16, fmul, fadd>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 //   Fused Vector Multiply Subtract (floating-point)
 def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
@@ -4578,6 +4635,12 @@ def  VFMSfd   : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32",
 def  VFMSfq   : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32",
                           v4f32, fmul_su, fsub_mlx>,
                 Requires<[HasNEON,HasVFP4,UseFusedMAC]>;
+def  VFMShd   : N3VDMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACD, "vfms", "f16",
+                          v4f16, fmul, fsub>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
+def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b1100, 1, IIC_VFMACQ, "vfms", "f16",
+                          v8f16, fmul, fsub>,
+                Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 // Match @llvm.fma.* intrinsics
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
@@ -4602,6 +4665,12 @@ def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
                      v2f32, v2f32, fsub, 0>;
 def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
                      v4f32, v4f32, fsub, 0>;
+def  VSUBhd   : N3VD<0, 0, 0b11, 0b1101, 0, IIC_VBIND, "vsub", "f16",
+                     v4f16, v4f16, fsub, 0>,
+                Requires<[HasNEON,HasFullFP16]>;
+def  VSUBhq   : N3VQ<0, 0, 0b11, 0b1101, 0, IIC_VBINQ, "vsub", "f16",
+                     v8f16, v8f16, fsub, 0>,
+                Requires<[HasNEON,HasFullFP16]>;
 //   VSUBL    : Vector Subtract Long (Q = D - D)
 defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
                             "vsubl", "s", sub, sext, 0>;
@@ -4646,6 +4715,12 @@ def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
                      NEONvceq, 1>;
 def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
                      NEONvceq, 1>;
+def  VCEQhd   : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16,
+                     NEONvceq, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCEQhq   : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16,
+                     NEONvceq, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in
 defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
@@ -4660,6 +4735,12 @@ def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
                      NEONvcge, 0>;
 def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
                      NEONvcge, 0>;
+def  VCGEhd   : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16,
+                     NEONvcge, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCGEhq   : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16,
+                     NEONvcge, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
@@ -4677,6 +4758,12 @@ def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
                      NEONvcgt, 0>;
 def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
                      NEONvcgt, 0>;
+def  VCGThd   : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16,
+                     NEONvcgt, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCGThq   : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16,
+                     NEONvcgt, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 let TwoOperandAliasConstraint = "$Vm = $Vd" in {
 defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
@@ -4686,36 +4773,68 @@ defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
 }
 
 //   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+def  VACGEfd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
                         "f32", v2i32, v2f32, int_arm_neon_vacge, 0>;
-def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+def  VACGEfq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
                         "f32", v4i32, v4f32, int_arm_neon_vacge, 0>;
+def  VACGEhd   : N3VDInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+                        "f16", v4i16, v4f16, int_arm_neon_vacge, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
+def  VACGEhq   : N3VQInt<1, 0, 0b01, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+                        "f16", v8i16, v8f16, int_arm_neon_vacge, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
 //   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
-def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+def  VACGTfd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
                         "f32", v2i32, v2f32, int_arm_neon_vacgt, 0>;
-def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+def  VACGTfq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
                         "f32", v4i32, v4f32, int_arm_neon_vacgt, 0>;
+def  VACGThd   : N3VDInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+                        "f16", v4i16, v4f16, int_arm_neon_vacgt, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
+def  VACGThq   : N3VQInt<1, 0, 0b11, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+                        "f16", v8f16, v8f16, int_arm_neon_vacgt, 0>,
+                 Requires<[HasNEON, HasFullFP16]>;
 //   VTST     : Vector Test Bits
 defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
                         IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
 
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vn, $Vm",
-                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vn, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vn, $Vm",
+                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vn, pred:$p)>;
+}
 
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
-                   (VACGTd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+                   (VACGTfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vaclt${p}.f32 $Vd, $Vm",
-                   (VACGTq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+                   (VACGTfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
-                   (VACGEd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+                   (VACGEfd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
 def: NEONInstAlias<"vacle${p}.f32 $Vd, $Vm",
-                   (VACGEq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+                   (VACGEfq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+                   (VACGThd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vaclt${p}.f16 $Vd, $Vm",
+                   (VACGThq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+                   (VACGEhd DPR:$Vd, DPR:$Vm, DPR:$Vd, pred:$p)>;
+def: NEONInstAlias<"vacle${p}.f16 $Vd, $Vm",
+                   (VACGEhq QPR:$Vd, QPR:$Vm, QPR:$Vd, pred:$p)>;
+}
 
 // Vector Bitwise Operations.
 
@@ -5007,6 +5126,12 @@ def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
                         "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
 def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
                         "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+def  VABDhd   : N3VDInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBIND,
+                        "vabd", "f16", v4f16, v4f16, int_arm_neon_vabds, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VABDhq   : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+                        "vabd", "f16", v8f16, v8f16, int_arm_neon_vabds, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
 defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
@@ -5014,6 +5139,29 @@ defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
 defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
                                "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
 
+def abd_shr :
+    PatFrag<(ops node:$in1, node:$in2, node:$shift),
+            (NEONvshrs (sub (zext node:$in1),
+                            (zext node:$in2)), (i32 $shift))>;
+
+def : Pat<(xor (v4i32 (bitconvert (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15)))),
+               (v4i32 (bitconvert (v8i16 (add (sub (zext (v8i8 DPR:$opA)),
+                                                   (zext (v8i8 DPR:$opB))),
+                                              (v8i16 (abd_shr (v8i8 DPR:$opA), (v8i8 DPR:$opB), 15))))))),
+          (VABDLuv8i16 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)),
+               (v4i32 (add (sub (zext (v4i16 DPR:$opA)),
+                                (zext (v4i16 DPR:$opB))),
+                           (abd_shr (v4i16 DPR:$opA), (v4i16 DPR:$opB), 31)))),
+          (VABDLuv4i32 DPR:$opA, DPR:$opB)>;
+
+def : Pat<(xor (v4i32 (bitconvert (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
+               (v4i32 (bitconvert (v2i64 (add (sub (zext (v2i32 DPR:$opA)),
+                                                   (zext (v2i32 DPR:$opB))),
+                                         (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))))),
+          (VABDLuv2i64 DPR:$opA, DPR:$opB)>;
+
 //   VABA     : Vector Absolute Difference and Accumulate
 defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
                              "vaba", "s", int_arm_neon_vabds, add>;
@@ -5031,53 +5179,85 @@ defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
 //   VMAX     : Vector Maximum
 defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
-                           "vmax", "s", int_arm_neon_vmaxs, 1>;
+                           "vmax", "s", smax, 1>;
 defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
-                           "vmax", "u", int_arm_neon_vmaxu, 1>;
+                           "vmax", "u", umax, 1>;
 def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmax", "f32",
-                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+                        v2f32, v2f32, fmaxnan, 1>;
 def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmax", "f32",
-                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+                        v4f32, v4f32, fmaxnan, 1>;
+def  VMAXhd   : N3VDInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f16",
+                        v4f16, v4f16, fmaxnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VMAXhq   : N3VQInt<0, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f16",
+                        v8f16, v8f16, fmaxnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // VMAXNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMAXNMND  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
+  def VMAXNMNDf  : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1,
                             N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v2f32, v2f32, int_arm_neon_vmaxnm, 1>,
+                            v2f32, v2f32, fmaxnum, 1>,
                             Requires<[HasV8, HasNEON]>;
-  def VMAXNMNQ  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
+  def VMAXNMNQf  : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1,
                             N3RegFrm, NoItinerary, "vmaxnm", "f32",
-                            v4f32, v4f32, int_arm_neon_vmaxnm, 1>,
+                            v4f32, v4f32, fmaxnum, 1>,
                             Requires<[HasV8, HasNEON]>;
+  def VMAXNMNDh  : N3VDIntnp<0b00110, 0b01, 0b1111, 0, 1,
+                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                            v4f16, v4f16, fmaxnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def VMAXNMNQh  : N3VQIntnp<0b00110, 0b01, 0b1111, 1, 1,
+                            N3RegFrm, NoItinerary, "vmaxnm", "f16",
+                            v8f16, v8f16, fmaxnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 //   VMIN     : Vector Minimum
 defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
-                           "vmin", "s", int_arm_neon_vmins, 1>;
+                           "vmin", "s", smin, 1>;
 defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
-                           "vmin", "u", int_arm_neon_vminu, 1>;
+                           "vmin", "u", umin, 1>;
 def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
                         "vmin", "f32",
-                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+                        v2f32, v2f32, fminnan, 1>;
 def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
                         "vmin", "f32",
-                        v4f32, v4f32, int_arm_neon_vmins, 1>;
+                        v4f32, v4f32, fminnan, 1>;
+def  VMINhd   : N3VDInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f16",
+                        v4f16, v4f16, fminnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VMINhq   : N3VQInt<0, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f16",
+                        v8f16, v8f16, fminnan, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // VMINNM
 let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-  def VMINNMND  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
+  def VMINNMNDf  : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1,
                             N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v2f32, v2f32, int_arm_neon_vminnm, 1>,
+                            v2f32, v2f32, fminnum, 1>,
                             Requires<[HasV8, HasNEON]>;
-  def VMINNMNQ  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
+  def VMINNMNQf  : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1,
                             N3RegFrm, NoItinerary, "vminnm", "f32",
-                            v4f32, v4f32, int_arm_neon_vminnm, 1>,
+                            v4f32, v4f32, fminnum, 1>,
                             Requires<[HasV8, HasNEON]>;
+  def VMINNMNDh  : N3VDIntnp<0b00110, 0b11, 0b1111, 0, 1,
+                            N3RegFrm, NoItinerary, "vminnm", "f16",
+                            v4f16, v4f16, fminnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
+  def VMINNMNQh  : N3VQIntnp<0b00110, 0b11, 0b1111, 1, 1,
+                            N3RegFrm, NoItinerary, "vminnm", "f16",
+                            v8f16, v8f16, fminnum, 1>,
+                            Requires<[HasV8, HasNEON, HasFullFP16]>;
 }
 
 // Vector Pairwise Operations.
@@ -5095,6 +5275,10 @@ def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
 def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
                         IIC_VPBIND, "vpadd", "f32",
                         v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def  VPADDh   : N3VDInt<1, 0, 0b01, 0b1101, 0, N3RegFrm,
+                        IIC_VPBIND, "vpadd", "f16",
+                        v4f16, v4f16, int_arm_neon_vpadd, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VPADDL   : Vector Pairwise Add Long
 defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -5123,6 +5307,9 @@ def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
                         "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
 def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
                         "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXh   : N3VDInt<1, 0, 0b01, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+                        "f16", v4f16, v4f16, int_arm_neon_vpmaxs, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VPMIN    : Vector Pairwise Minimum
 def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
@@ -5139,6 +5326,9 @@ def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
                         "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
 def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
                         "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def  VPMINh   : N3VDInt<1, 0, 0b11, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+                        "f16", v4f16, v4f16, int_arm_neon_vpmins, 0>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // Vector Reciprocal and Reciprocal Square Root Estimate and Step.
 
@@ -5155,6 +5345,14 @@ def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
 def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
                         IIC_VUNAQ, "vrecpe", "f32",
                         v4f32, v4f32, int_arm_neon_vrecpe>;
+def  VRECPEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+                        IIC_VUNAD, "vrecpe", "f16",
+                        v4f16, v4f16, int_arm_neon_vrecpe>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRECPEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01010, 0,
+                        IIC_VUNAQ, "vrecpe", "f16",
+                        v8f16, v8f16, int_arm_neon_vrecpe>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRECPS   : Vector Reciprocal Step
 def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
@@ -5163,6 +5361,14 @@ def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
 def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
                         IIC_VRECSQ, "vrecps", "f32",
                         v4f32, v4f32, int_arm_neon_vrecps, 1>;
+def  VRECPShd : N3VDInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrecps", "f16",
+                        v4f16, v4f16, int_arm_neon_vrecps, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRECPShq : N3VQInt<0, 0, 0b01, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrecps", "f16",
+                        v8f16, v8f16, int_arm_neon_vrecps, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRSQRTE  : Vector Reciprocal Square Root Estimate
 def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
@@ -5177,6 +5383,14 @@ def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
 def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
                          IIC_VUNAQ, "vrsqrte", "f32",
                          v4f32, v4f32, int_arm_neon_vrsqrte>;
+def  VRSQRTEhd : N2VDInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+                         IIC_VUNAD, "vrsqrte", "f16",
+                         v4f16, v4f16, int_arm_neon_vrsqrte>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VRSQRTEhq : N2VQInt<0b11, 0b11, 0b01, 0b11, 0b01011, 0,
+                         IIC_VUNAQ, "vrsqrte", "f16",
+                         v8f16, v8f16, int_arm_neon_vrsqrte>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 //   VRSQRTS  : Vector Reciprocal Square Root Step
 def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
@@ -5185,6 +5399,14 @@ def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
 def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
                         IIC_VRECSQ, "vrsqrts", "f32",
                         v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTShd : N3VDInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrsqrts", "f16",
+                        v4f16, v4f16, int_arm_neon_vrsqrts, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
+def VRSQRTShq : N3VQInt<0, 0, 0b11, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrsqrts", "f16",
+                        v8f16, v8f16, int_arm_neon_vrsqrts, 1>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 // Vector Shifts.
 
@@ -5336,6 +5558,14 @@ def  VABSfd   : N2VD<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
 def  VABSfq   : N2VQ<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
                      "vabs", "f32",
                       v4f32, v4f32, fabs>;
+def  VABShd   : N2VD<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+                     "vabs", "f16",
+                     v4f16, v4f16, fabs>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VABShq   : N2VQ<0b11, 0b11, 0b01, 0b01, 0b01110, 0,
+                     "vabs", "f16",
+                      v8f16, v8f16, fabs>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 def : Pat<(xor (v2i32 (bitconvert (v8i8 (NEONvshrs DPR:$src, (i32 7))))),
                (v2i32 (bitconvert (v8i8 (add DPR:$src,
@@ -5398,6 +5628,16 @@ def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
                     "vneg", "f32", "$Vd, $Vm", "",
                     [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+def  VNEGhd   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 0, 0,
+                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+                    "vneg", "f16", "$Vd, $Vm", "",
+                    [(set DPR:$Vd, (v4f16 (fneg DPR:$Vm)))]>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VNEGhq   : N2V<0b11, 0b11, 0b01, 0b01, 0b01111, 1, 0,
+                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+                    "vneg", "f16", "$Vd, $Vm", "",
+                    [(set QPR:$Vd, (v8f16 (fneg QPR:$Vm)))]>,
+                Requires<[HasNEON, HasFullFP16]>;
 
 def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
 def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
@@ -5868,18 +6108,56 @@ def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
 def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
                      v4f32, v4i32, uint_to_fp>;
 
+def  VCVTh2sd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+                     v4i16, v4f16, fp_to_sint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTh2ud : N2VD<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+                     v4i16, v4f16, fp_to_uint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTs2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+                     v4f16, v4i16, sint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTu2hd : N2VD<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+                     v4f16, v4i16, uint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+
+def  VCVTh2sq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01110, 0, "vcvt", "s16.f16",
+                     v8i16, v8f16, fp_to_sint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTh2uq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01111, 0, "vcvt", "u16.f16",
+                     v8i16, v8f16, fp_to_uint>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTs2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01100, 0, "vcvt", "f16.s16",
+                     v8f16, v8i16, sint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+def  VCVTu2hq : N2VQ<0b11, 0b11, 0b01, 0b11, 0b01101, 0, "vcvt", "f16.u16",
+                     v8f16, v8i16, uint_to_fp>,
+                Requires<[HasNEON, HasFullFP16]>;
+
 // VCVT{A, N, P, M}
 multiclass VCVT_FPI<string op, bits<3> op10_8, SDPatternOperator IntS,
                     SDPatternOperator IntU> {
   let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-    def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+    def SDf : N2VDIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
                        "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>;
-    def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+    def SQf : N2VQIntnp<0b10, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
                        "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>;
-    def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+    def UDf : N2VDIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
                        "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>;
-    def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+    def UQf : N2VQIntnp<0b10, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
                        "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>;
+    def SDh : N2VDIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+                       "s16.f16", v4i16, v4f16, IntS>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def SQh : N2VQIntnp<0b01, 0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op),
+                       "s16.f16", v8i16, v8f16, IntS>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def UDh : N2VDIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+                       "u16.f16", v4i16, v4f16, IntU>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
+    def UQh : N2VQIntnp<0b01, 0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op),
+                       "u16.f16", v8i16, v8f16, IntU>,
+              Requires<[HasV8, HasNEON, HasFullFP16]>;
   }
 }
 
@@ -5898,6 +6176,16 @@ def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsd : N2VCvtD<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+                        v4i16, v4f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xud : N2VCvtD<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+                        v4i16, v4f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hd : N2VCvtD<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+                        v4f16, v4i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hd : N2VCvtD<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+                        v4f16, v4i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
 }
 
 let DecoderMethod = "DecodeVCVTQ" in {
@@ -5909,6 +6197,16 @@ def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
                         v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
 def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
                         v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+let Predicates = [HasNEON, HasFullFP16] in {
+def VCVTh2xsq : N2VCvtQ<0, 1, 0b1101, 0, 1, "vcvt", "s16.f16",
+                        v8i16, v8f16, int_arm_neon_vcvtfp2fxs>;
+def VCVTh2xuq : N2VCvtQ<1, 1, 0b1101, 0, 1, "vcvt", "u16.f16",
+                        v8i16, v8f16, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2hq : N2VCvtQ<0, 1, 0b1100, 0, 1, "vcvt", "f16.s16",
+                        v8f16, v8i16, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2hq : N2VCvtQ<1, 1, 0b1100, 0, 1, "vcvt", "f16.u16",
+                        v8f16, v8i16, int_arm_neon_vcvtfxu2fp>;
+} // Predicates = [HasNEON, HasFullFP16]
 }
 
 def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
@@ -5929,6 +6227,24 @@ def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
 def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
                     (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
 
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Dd, $Dm, #0",
+                    (VCVTh2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Dd, $Dm, #0",
+                    (VCVTh2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Dd, $Dm, #0",
+                    (VCVTs2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Dd, $Dm, #0",
+                    (VCVTu2hd DPR:$Dd, DPR:$Dm, pred:$p)>;
+
+def : NEONInstAlias<"vcvt${p}.s16.f16 $Qd, $Qm, #0",
+                    (VCVTh2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.u16.f16 $Qd, $Qm, #0",
+                    (VCVTh2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.s16 $Qd, $Qm, #0",
+                    (VCVTs2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+def : NEONInstAlias<"vcvt${p}.f16.u16 $Qd, $Qm, #0",
+                    (VCVTu2hq QPR:$Qd, QPR:$Qm, pred:$p)>;
+
 
 //   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
 def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
@@ -6182,22 +6498,40 @@ def  VTBX4Pseudo
 // VRINT      : Vector Rounding
 multiclass VRINT_FPI<string op, bits<3> op9_7, SDPatternOperator Int> {
   let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in {
-    def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary,
+    def Df : N2VDIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
                       !strconcat("vrint", op), "f32",
                       v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> {
       let Inst{9-7} = op9_7;
     }
-    def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary,
+    def Qf : N2VQIntnp<0b10, 0b10, 0b100, 0, NoItinerary,
                       !strconcat("vrint", op), "f32",
                       v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> {
       let Inst{9-7} = op9_7;
     }
+    def Dh : N2VDIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f16",
+                      v4f16, v4f16, Int>,
+             Requires<[HasV8, HasNEON, HasFullFP16]> {
+      let Inst{9-7} = op9_7;
+    }
+    def Qh : N2VQIntnp<0b01, 0b10, 0b100, 0, NoItinerary,
+                      !strconcat("vrint", op), "f16",
+                      v8f16, v8f16, Int>,
+             Requires<[HasV8, HasNEON, HasFullFP16]> {
+      let Inst{9-7} = op9_7;
+    }
   }
 
   def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Dd, $Dm"),
-                  (!cast<Instruction>(NAME#"D") DPR:$Dd, DPR:$Dm)>;
+                  (!cast<Instruction>(NAME#"Df") DPR:$Dd, DPR:$Dm)>;
   def : NEONInstAlias<!strconcat("vrint", op, ".f32.f32\t$Qd, $Qm"),
-                  (!cast<Instruction>(NAME#"Q") QPR:$Qd, QPR:$Qm)>;
+                  (!cast<Instruction>(NAME#"Qf") QPR:$Qd, QPR:$Qm)>;
+  let Predicates = [HasNEON, HasFullFP16] in {
+  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Dd, $Dm"),
+                  (!cast<Instruction>(NAME#"Dh") DPR:$Dd, DPR:$Dm)>;
+  def : NEONInstAlias<!strconcat("vrint", op, ".f16.f16\t$Qd, $Qm"),
+                  (!cast<Instruction>(NAME#"Qh") QPR:$Qd, QPR:$Qm)>;
+  }
 }
 
 defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>;
@@ -6343,8 +6677,8 @@ def : N3VSMulOpPat<fmul, fsub, VFMSfd>,
       Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>;
 def : N2VSPat<fabs, VABSfd>;
 def : N2VSPat<fneg, VNEGfd>;
-def : N3VSPat<NEONfmax, VMAXfd>;
-def : N3VSPat<NEONfmin, VMINfd>;
+def : N3VSPat<fmaxnan, VMAXfd>, Requires<[HasNEON]>;
+def : N3VSPat<fminnan, VMINfd>, Requires<[HasNEON]>;
 def : NVCVTFIPat<fp_to_sint, VCVTf2sd>;
 def : NVCVTFIPat<fp_to_uint, VCVTf2ud>;
 def : NVCVTIFPat<sint_to_fp, VCVTs2fd>;
@@ -7704,6 +8038,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Dd, $Dn, $Dm",
                     (VCGEuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 def : NEONInstAlias<"vcle${p}.f32 $Dd, $Dn, $Dm",
                     (VCGEfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Dd, $Dn, $Dm",
+                    (VCGEhd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 // Q-register versions.
 def : NEONInstAlias<"vcle${p}.s8 $Qd, $Qn, $Qm",
                     (VCGEsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7719,6 +8056,9 @@ def : NEONInstAlias<"vcle${p}.u32 $Qd, $Qn, $Qm",
                     (VCGEuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 def : NEONInstAlias<"vcle${p}.f32 $Qd, $Qn, $Qm",
                     (VCGEfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vcle${p}.f16 $Qd, $Qn, $Qm",
+                    (VCGEhq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 
 // VCLT (register) is an assembler alias for VCGT w/ the operands reversed.
 // D-register versions.
@@ -7736,6 +8076,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Dd, $Dn, $Dm",
                     (VCGTuv2i32 DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 def : NEONInstAlias<"vclt${p}.f32 $Dd, $Dn, $Dm",
                     (VCGTfd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Dd, $Dn, $Dm",
+                    (VCGThd DPR:$Dd, DPR:$Dm, DPR:$Dn, pred:$p)>;
 // Q-register versions.
 def : NEONInstAlias<"vclt${p}.s8 $Qd, $Qn, $Qm",
                     (VCGTsv16i8 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
@@ -7751,6 +8094,9 @@ def : NEONInstAlias<"vclt${p}.u32 $Qd, $Qn, $Qm",
                     (VCGTuv4i32 QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 def : NEONInstAlias<"vclt${p}.f32 $Qd, $Qn, $Qm",
                     (VCGTfq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
+let Predicates = [HasNEON, HasFullFP16] in
+def : NEONInstAlias<"vclt${p}.f16 $Qd, $Qn, $Qm",
+                    (VCGThq QPR:$Qd, QPR:$Qm, QPR:$Qn, pred:$p)>;
 
 // VSWP allows, but does not require, a type suffix.
 defm : NEONDTAnyInstAlias<"vswp${p}", "$Vd, $Vm",
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 40414da3ca81..df6f24306354 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -591,6 +591,34 @@ def tTRAP : TI<(outs), (ins), IIC_Br,
 //  Load Store Instructions.
 //
 
+// PC-relative loads need to be matched first as constant pool accesses need to
+// always be PC-relative. We do this using AddedComplexity, as the pattern is
+// simpler than the patterns of the other load instructions.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 10 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                  "ldr", "\t$Rt, $addr",
+                  [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+              T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
+// SP-relative loads should be matched before standard immediate-offset loads as
+// it means we avoid having to move SP to another register.
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                    "ldr", "\t$Rt, $addr",
+                    [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+              T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
 // Loads: reg/reg and reg/imm5
 let canFoldAsLoad = 1, isReMaterializable = 1 in
 multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -598,16 +626,20 @@ multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
                               AddrMode am, InstrItinClass itin_r,
                               InstrItinClass itin_i, string asm,
                               PatFrag opnode> {
-  def r : // reg/reg
-    T1pILdStEncode<reg_opc,
-                   (outs tGPR:$Rt), (ins AddrMode_r:$addr),
-                   am, itin_r, asm, "\t$Rt, $addr",
-                   [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+  // Immediate-offset loads should be matched before register-offset loads as
+  // when the offset is a constant it's simpler to first check if it fits in the
+  // immediate offset field then fall back to register-offset if it doesn't.
   def i : // reg/imm5
     T1pILdStEncodeImm<imm_opc, 1 /* Load */,
                       (outs tGPR:$Rt), (ins AddrMode_i:$addr),
                       am, itin_i, asm, "\t$Rt, $addr",
                       [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+  // Register-offset loads are matched last.
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
 }
 // Stores: reg/reg and reg/imm5
 multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
@@ -615,32 +647,32 @@ multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
                               AddrMode am, InstrItinClass itin_r,
                               InstrItinClass itin_i, string asm,
                               PatFrag opnode> {
-  def r : // reg/reg
-    T1pILdStEncode<reg_opc,
-                   (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
-                   am, itin_r, asm, "\t$Rt, $addr",
-                   [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
   def i : // reg/imm5
     T1pILdStEncodeImm<imm_opc, 0 /* Store */,
                       (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
                       am, itin_i, asm, "\t$Rt, $addr",
                       [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
 }
 
 // A8.6.57 & A8.6.60
-defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr,
                                 t_addrmode_is4, AddrModeT1_4,
                                 IIC_iLoad_r, IIC_iLoad_i, "ldr",
                                 UnOpFrag<(load node:$Src)>>;
 
 // A8.6.64 & A8.6.61
-defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr,
                                 t_addrmode_is1, AddrModeT1_1,
                                 IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
                                 UnOpFrag<(zextloadi8 node:$Src)>>;
 
 // A8.6.76 & A8.6.73
-defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr,
                                 t_addrmode_is2, AddrModeT1_2,
                                 IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
                                 UnOpFrag<(zextloadi16 node:$Src)>>;
@@ -659,47 +691,6 @@ def tLDRSH :                    // A8.6.84
                  "ldrsh", "\t$Rt, $addr",
                  [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>;
 
-let canFoldAsLoad = 1 in
-def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
-                    "ldr", "\t$Rt, $addr",
-                    [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
-              T1LdStSP<{1,?,?}> {
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0} = addr;
-}
-
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
-                  "ldr", "\t$Rt, $addr",
-                  [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
-              T1Encoding<{0,1,0,0,1,?}> {
-  // A6.2 & A8.6.59
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0}  = addr;
-}
-
-// A8.6.194 & A8.6.192
-defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
-                                t_addrmode_is4, AddrModeT1_4,
-                                IIC_iStore_r, IIC_iStore_i, "str",
-                                BinOpFrag<(store node:$LHS, node:$RHS)>>;
-
-// A8.6.197 & A8.6.195
-defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
-                                t_addrmode_is1, AddrModeT1_1,
-                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
-                                BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-
-// A8.6.207 & A8.6.205
-defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
-                               t_addrmode_is2, AddrModeT1_2,
-                               IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
-                               BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
-
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
                     "str", "\t$Rt, $addr",
@@ -711,6 +702,25 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
   let Inst{7-0} = addr;
 }
 
+// A8.6.194 & A8.6.192
+defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iStore_r, IIC_iStore_i, "str",
+                                BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+                                BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr,
+                               t_addrmode_is2, AddrModeT1_2,
+                               IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                               BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -730,6 +740,7 @@ def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops),
 // Writeback version is just a pseudo, as there's no encoding difference.
 // Writeback happens iff the base register is not in the destination register
 // list.
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
 def tLDMIA_UPD :
     InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo, GenericDomain,
                  "$Rn = $wb", IIC_iLoad_mu>,
@@ -1328,16 +1339,16 @@ def : T1Pat<(subc   tGPR:$lhs, tGPR:$rhs),
             (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
 
 // Bswap 16 with load/store
-def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
-            (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
 def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
             (tREV16 (tLDRHi t_addrmode_is2:$addr))>;
-def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
-                           t_addrmode_rrs2:$addr),
-            (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rr:$addr)), (i32 16)),
+            (tREV16 (tLDRHr t_addrmode_rr:$addr))>;
 def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
                            t_addrmode_is2:$addr),
             (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+                           t_addrmode_rr:$addr),
+            (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rr:$addr)>;
 
 // ConstantPool
 def : T1Pat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
@@ -1372,10 +1383,10 @@ def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
       Requires<[IsThumb, HasV5T]>;
 
 // zextload i1 -> zextload i8
-def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
-            (tLDRBr t_addrmode_rrs1:$addr)>;
 def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
             (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_rr:$addr),
+            (tLDRBr t_addrmode_rr:$addr)>;
 
 // extload from the stack -> word load from the stack, as it avoids having to
 // materialize the base in a separate register. This only works when a word
@@ -1389,61 +1400,61 @@ def : T1Pat<(extloadi16 t_addrmode_sp:$addr), (tLDRspi t_addrmode_sp:$addr)>,
       Requires<[IsThumb, IsThumb1Only, IsLE]>;
 
 // extload -> zextload
-def : T1Pat<(extloadi1  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi1  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi8  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
-def : T1Pat<(extloadi8  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
-def : T1Pat<(extloadi16 t_addrmode_is2:$addr),  (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_rr:$addr),  (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_is1:$addr), (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_rr:$addr),  (tLDRBr t_addrmode_rr:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rr:$addr),  (tLDRHr t_addrmode_rr:$addr)>;
 
 // If it's impossible to use [r,r] address mode for sextload, select to
 // ldr{b|h} + sxt{b|h} instead.
 def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
             (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
       Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
-            (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+            (tSXTB (tLDRBr t_addrmode_rr:$addr))>,
       Requires<[IsThumb, IsThumb1Only, HasV6]>;
 def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
             (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
       Requires<[IsThumb, IsThumb1Only, HasV6]>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
-            (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+            (tSXTH (tLDRHr t_addrmode_rr:$addr))>,
       Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
-def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
-            (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
 def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
             (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
-def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
-            (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi8 t_addrmode_rr:$addr),
+            (tASRri (tLSLri (tLDRBr t_addrmode_rr:$addr), 24), 24)>;
 def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
             (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_rr:$addr),
+            (tASRri (tLSLri (tLDRHr t_addrmode_rr:$addr), 16), 16)>;
 
 def : T1Pat<(atomic_load_8 t_addrmode_is1:$src),
              (tLDRBi t_addrmode_is1:$src)>;
-def : T1Pat<(atomic_load_8 t_addrmode_rrs1:$src),
-             (tLDRBr t_addrmode_rrs1:$src)>;
+def : T1Pat<(atomic_load_8 t_addrmode_rr:$src),
+             (tLDRBr t_addrmode_rr:$src)>;
 def : T1Pat<(atomic_load_16 t_addrmode_is2:$src),
              (tLDRHi t_addrmode_is2:$src)>;
-def : T1Pat<(atomic_load_16 t_addrmode_rrs2:$src),
-             (tLDRHr t_addrmode_rrs2:$src)>;
+def : T1Pat<(atomic_load_16 t_addrmode_rr:$src),
+             (tLDRHr t_addrmode_rr:$src)>;
 def : T1Pat<(atomic_load_32 t_addrmode_is4:$src),
              (tLDRi t_addrmode_is4:$src)>;
-def : T1Pat<(atomic_load_32 t_addrmode_rrs4:$src),
-             (tLDRr t_addrmode_rrs4:$src)>;
+def : T1Pat<(atomic_load_32 t_addrmode_rr:$src),
+             (tLDRr t_addrmode_rr:$src)>;
 def : T1Pat<(atomic_store_8 t_addrmode_is1:$ptr, tGPR:$val),
              (tSTRBi tGPR:$val, t_addrmode_is1:$ptr)>;
-def : T1Pat<(atomic_store_8 t_addrmode_rrs1:$ptr, tGPR:$val),
-             (tSTRBr tGPR:$val, t_addrmode_rrs1:$ptr)>;
+def : T1Pat<(atomic_store_8 t_addrmode_rr:$ptr, tGPR:$val),
+             (tSTRBr tGPR:$val, t_addrmode_rr:$ptr)>;
 def : T1Pat<(atomic_store_16 t_addrmode_is2:$ptr, tGPR:$val),
              (tSTRHi tGPR:$val, t_addrmode_is2:$ptr)>;
-def : T1Pat<(atomic_store_16 t_addrmode_rrs2:$ptr, tGPR:$val),
-             (tSTRHr tGPR:$val, t_addrmode_rrs2:$ptr)>;
+def : T1Pat<(atomic_store_16 t_addrmode_rr:$ptr, tGPR:$val),
+             (tSTRHr tGPR:$val, t_addrmode_rr:$ptr)>;
 def : T1Pat<(atomic_store_32 t_addrmode_is4:$ptr, tGPR:$val),
              (tSTRi tGPR:$val, t_addrmode_is4:$ptr)>;
-def : T1Pat<(atomic_store_32 t_addrmode_rrs4:$ptr, tGPR:$val),
-             (tSTRr tGPR:$val, t_addrmode_rrs4:$ptr)>;
+def : T1Pat<(atomic_store_32 t_addrmode_rr:$ptr, tGPR:$val),
+             (tSTRr tGPR:$val, t_addrmode_rr:$ptr)>;
 
 // Large immediate handling.
 
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index aba8a7b10fd9..d460d33fa0a3 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -43,7 +43,7 @@ def t2_shift_imm : Operand<i32> {
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
-                ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+                ComplexPattern<i32, 2, "SelectShiftImmShifterOperand",
                                [shl,srl,sra,rotr]> {
   let EncoderMethod = "getT2SORegOpValue";
   let PrintMethod = "printT2SOOperand";
@@ -1554,19 +1554,21 @@ def t2STRBT  : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
 def t2STRHT  : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
 
 // ldrd / strd pre / post variants
-// For disassembly only.
 
+let mayLoad = 1 in
 def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
                  (ins t2addrmode_imm8s4_pre:$addr), IIC_iLoad_d_ru,
                  "ldrd", "\t$Rt, $Rt2, $addr!", "$addr.base = $wb", []> {
   let DecoderMethod = "DecodeT2LDRDPreInstruction";
 }
 
+let mayLoad = 1 in
 def t2LDRD_POST : T2Ii8s4post<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2, GPR:$wb),
                  (ins addr_offset_none:$addr, t2am_imm8s4_offset:$imm),
                  IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, $addr$imm",
                  "$addr.base = $wb", []>;
 
+let mayStore = 1 in
 def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
                  (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4_pre:$addr),
                  IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, $addr!",
@@ -1574,6 +1576,7 @@ def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs GPR:$wb),
   let DecoderMethod = "DecodeT2STRDPreInstruction";
 }
 
+let mayStore = 1 in
 def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb),
                  (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr,
                       t2am_imm8s4_offset:$imm),
@@ -2100,7 +2103,7 @@ def : T2Pat<(ARMadde    rGPR:$src, imm0_65535_neg:$imm, CPSR),
 
 def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                 NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-24} = 0b010;
   let Inst{23} = 0b1;
@@ -2117,7 +2120,7 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
               dag iops = (ins rGPR:$Rn, rGPR:$Rm),
               string asm = "\t$Rd, $Rn, $Rm">
   : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
-    Requires<[IsThumb2, HasThumb2DSP]> {
+    Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0101;
   let Inst{22-20} = op22_20;
@@ -2215,13 +2218,13 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
 def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                                            (ins rGPR:$Rn, rGPR:$Rm),
                         NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                        (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
                         "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 
 // Signed/Unsigned saturate.
 class T2SatI<dag oops, dag iops, InstrItinClass itin,
@@ -2254,7 +2257,7 @@ def t2SSAT: T2SatI<
 def t2SSAT16: T2SatI<
                 (outs rGPR:$Rd), (ins imm1_16:$sat_imm, rGPR:$Rn), NoItinerary,
                 "ssat16", "\t$Rd, $sat_imm, $Rn", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1100;
   let Inst{20} = 0;
@@ -2278,7 +2281,7 @@ def t2USAT: T2SatI<
 def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
                      NoItinerary,
                      "usat16", "\t$Rd, $sat_imm, $Rn", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-22} = 0b1111001110;
   let Inst{20} = 0;
   let Inst{15} = 0;
@@ -2288,8 +2291,8 @@ def t2USAT16: T2SatI<(outs rGPR:$Rd), (ins imm0_15:$sat_imm, rGPR:$Rn),
   let Inst{5-4} = 0b00;
 }
 
-def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
-def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), (t2USAT imm0_31:$pos, GPR:$a, 0)>;
 
 //===----------------------------------------------------------------------===//
 //  Shift and rotate Instructions.
@@ -2605,7 +2608,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
                   (outs rGPR:$RdLo, rGPR:$RdHi),
                   (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
                   "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 } // hasSideEffects
 
 // Rounding variants of the below included for disassembly only
@@ -2614,7 +2617,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
 def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
                   "smmul", "\t$Rd, $Rn, $Rm",
                   [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2624,7 +2627,7 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
 
 def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
                   "smmulr", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2636,7 +2639,7 @@ def t2SMMLA : T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmla", "\t$Rd, $Rn, $Rm, $Ra",
                 [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
-              Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+              Requires<[IsThumb2, HasDSP, UseMulOps]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2646,7 +2649,7 @@ def t2SMMLA : T2FourReg<
 def t2SMMLAR: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                   "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2657,7 +2660,7 @@ def t2SMMLS: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmls", "\t$Rd, $Rn, $Rm, $Ra",
                 [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
-             Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+             Requires<[IsThumb2, HasDSP, UseMulOps]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2667,7 +2670,7 @@ def t2SMMLS: T2FourReg<
 def t2SMMLSR:T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2679,7 +2682,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
               !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
                                       (sext_inreg rGPR:$Rm, i16)))]>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2692,7 +2695,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
               !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
                                       (sra rGPR:$Rm, (i32 16))))]>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2705,7 +2708,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
                                       (sext_inreg rGPR:$Rm, i16)))]>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2718,7 +2721,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
                                       (sra rGPR:$Rm, (i32 16))))]>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2730,7 +2733,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
               []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2742,7 +2745,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
               []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2760,7 +2763,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
               [(set rGPR:$Rd, (add rGPR:$Ra,
                                (opnode (sext_inreg rGPR:$Rn, i16),
                                        (sext_inreg rGPR:$Rm, i16))))]>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2773,7 +2776,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
                                                  (sra rGPR:$Rm, (i32 16)))))]>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2786,7 +2789,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
                                                (sext_inreg rGPR:$Rm, i16))))]>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2799,7 +2802,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
                                                  (sra rGPR:$Rm, (i32 16)))))]>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2811,7 +2814,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
               []>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2823,7 +2826,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
               []>,
-           Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> {
+           Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2839,79 +2842,79 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
            [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 
 // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 def t2SMUAD: T2ThreeReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
             IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUADX:T2ThreeReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
             IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSD: T2ThreeReg_mac<
             0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
             IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSDX:T2ThreeReg_mac<
             0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
             IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]> {
+          Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMLAD   : T2FourReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
             "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLADX  : T2FourReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
             "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
             "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
             "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
                         "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
                         "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
                         "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
                         "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasThumb2DSP]>;
+          Requires<[IsThumb2, HasDSP]>;
 
 //===----------------------------------------------------------------------===//
 //  Division Instructions.
@@ -2961,7 +2964,7 @@ def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
 
 def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                       "rbit", "\t$Rd, $Rm",
-                      [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>,
+                      [(set rGPR:$Rd, (bitreverse rGPR:$Rm))]>,
                       Sched<[WriteALU]>;
 
 def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e83f8c850632..050cd1a445ad 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -20,7 +20,6 @@ def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMCmp, [SDNPOutGlue]>;
 def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
 def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
 
-
 //===----------------------------------------------------------------------===//
 // Operand Definitions.
 //
@@ -93,7 +92,7 @@ def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
 
 def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
                  IIC_fpLoad32, "vldr", "\t$Sd, $addr",
-                 [(set SPR:$Sd, (load addrmode5:$addr))]> {
+                 [(set SPR:$Sd, (alignedload32 addrmode5:$addr))]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -107,7 +106,7 @@ def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
 
 def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
                  IIC_fpStore32, "vstr", "\t$Sd, $addr",
-                 [(store SPR:$Sd, addrmode5:$addr)]> {
+                 [(alignedstore32 SPR:$Sd, addrmode5:$addr)]> {
   // Some single precision VFP instructions may be executed on both NEON and VFP
   // pipelines.
   let D = VFPNeonDomain;
@@ -393,8 +392,8 @@ multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
   }
 }
 
-defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, ARMvmaxnm>;
-defm VMINNM : vmaxmin_inst<"vminnm", 1, ARMvminnm>;
+defm VMAXNM : vmaxmin_inst<"vmaxnm", 0, fmaxnum>;
+defm VMINNM : vmaxmin_inst<"vminnm", 1, fminnum>;
 
 // Match reassociated forms only if not sign dependent rounding.
 def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
@@ -541,19 +540,23 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
 // FIXME: Verify encoding after integrated assembler is working.
 def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]>,
+                 Requires<[HasFP16]>;
 
 def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]>,
+                 Requires<[HasFP16]>;
 
 def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]>,
+                 Requires<[HasFP16]>;
 
 def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>;
+                 [/* For disassembly only; pattern left blank */]>,
+                 Requires<[HasFP16]>;
 
 def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
@@ -922,6 +925,22 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
   let isRegSequence = 1;
 }
 
+// Hoist an fabs or a fneg of a value coming from integer registers
+// and do the fabs/fneg on the integer value. This is never a lose
+// and could enable the conversion to float to be removed completely.
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+          (VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+      Requires<[IsARM]>;
+def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+          (VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
+      Requires<[IsThumb2]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+          (VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
+      Requires<[IsARM]>;
+def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
+          (VMOVDRR GPR:$Rl, (t2EORri GPR:$Rh, (i32 0x80000000)))>,
+      Requires<[IsThumb2]>;
+
 let hasSideEffects = 0 in
 def VMOVSRR : AVConv5I<0b11000100, 0b1010,
                      (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
@@ -1003,7 +1022,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(f64 (sint_to_fp GPR:$a)),
                (VSITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-  def : VFPPat<(f64 (sint_to_fp (i32 (load addrmode5:$a)))),
+  def : VFPPat<(f64 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
                (VSITOD (VLDRS addrmode5:$a))>;
 }
 
@@ -1021,7 +1040,7 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
 def : VFPNoNEONPat<(f32 (sint_to_fp GPR:$a)),
                    (VSITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (sint_to_fp (i32 (alignedload32 addrmode5:$a)))),
                    (VSITOS (VLDRS addrmode5:$a))>;
 
 def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
@@ -1035,7 +1054,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(f64 (uint_to_fp GPR:$a)),
                (VUITOD (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-  def : VFPPat<(f64 (uint_to_fp (i32 (load addrmode5:$a)))),
+  def : VFPPat<(f64 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
                (VUITOD (VLDRS addrmode5:$a))>;
 }
 
@@ -1053,7 +1072,7 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
 def : VFPNoNEONPat<(f32 (uint_to_fp GPR:$a)),
                    (VUITOS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
-def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (load addrmode5:$a)))),
+def : VFPNoNEONPat<(f32 (uint_to_fp (i32 (alignedload32 addrmode5:$a)))),
                    (VUITOS (VLDRS addrmode5:$a))>;
 
 // FP -> Int:
@@ -1106,7 +1125,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(i32 (fp_to_sint (f64 DPR:$a))),
                (COPY_TO_REGCLASS (VTOSIZD DPR:$a), GPR)>;
 
-  def : VFPPat<(store (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
+  def : VFPPat<(alignedstore32 (i32 (fp_to_sint (f64 DPR:$a))), addrmode5:$ptr),
                (VSTRS (VTOSIZD DPR:$a), addrmode5:$ptr)>;
 }
 
@@ -1124,7 +1143,8 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
 def : VFPNoNEONPat<(i32 (fp_to_sint SPR:$a)),
                    (COPY_TO_REGCLASS (VTOSIZS SPR:$a), GPR)>;
 
-def : VFPNoNEONPat<(store (i32 (fp_to_sint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_sint (f32 SPR:$a))),
+                                   addrmode5:$ptr),
                    (VSTRS (VTOSIZS SPR:$a), addrmode5:$ptr)>;
 
 def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
@@ -1138,7 +1158,7 @@ let Predicates=[HasVFP2, HasDPVFP] in {
   def : VFPPat<(i32 (fp_to_uint (f64 DPR:$a))),
                (COPY_TO_REGCLASS (VTOUIZD DPR:$a), GPR)>;
 
-  def : VFPPat<(store (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
+  def : VFPPat<(alignedstore32 (i32 (fp_to_uint (f64 DPR:$a))), addrmode5:$ptr),
                (VSTRS (VTOUIZD DPR:$a), addrmode5:$ptr)>;
 }
 
@@ -1156,7 +1176,8 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
 def : VFPNoNEONPat<(i32 (fp_to_uint SPR:$a)),
                    (COPY_TO_REGCLASS (VTOUIZS SPR:$a), GPR)>;
 
-def : VFPNoNEONPat<(store (i32 (fp_to_uint (f32 SPR:$a))), addrmode5:$ptr),
+def : VFPNoNEONPat<(alignedstore32 (i32 (fp_to_uint (f32 SPR:$a))),
+                                   addrmode5:$ptr),
                   (VSTRS (VTOUIZS SPR:$a), addrmode5:$ptr)>;
 
 // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 265b86f75f1d..725b8383c961 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -60,17 +60,24 @@ STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
 
+namespace llvm {
+void initializeARMLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
+
 namespace {
   /// Post- register allocation pass the combine load / store instructions to
   /// form ldm / stm instructions.
   struct ARMLoadStoreOpt : public MachineFunctionPass {
     static char ID;
-    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+    ARMLoadStoreOpt() : MachineFunctionPass(ID) {
+      initializeARMLoadStoreOptPass(*PassRegistry::getPassRegistry());
+    }
 
     const MachineFunction *MF;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-    const MachineRegisterInfo *MRI;
     const ARMSubtarget *STI;
     const TargetLowering *TL;
     ARMFunctionInfo *AFI;
@@ -84,7 +91,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
     const char *getPassName() const override {
-      return "ARM load / store optimization pass";
+      return ARM_LOAD_STORE_OPT_NAME;
     }
 
   private:
@@ -118,6 +125,7 @@ namespace {
     };
     SpecificBumpPtrAllocator<MergeCandidate> Allocator;
     SmallVector<const MergeCandidate*,4> Candidates;
+    SmallVector<MachineInstr*,4> MergeBaseCandidates;
 
     void moveLiveRegsBefore(const MachineBasicBlock &MBB,
                             MachineBasicBlock::const_iterator Before);
@@ -140,12 +148,16 @@ namespace {
                              MachineBasicBlock::iterator &MBBI);
     bool MergeBaseUpdateLoadStore(MachineInstr *MI);
     bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
+    bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+    bool CombineMovBx(MachineBasicBlock &MBB);
   };
   char ARMLoadStoreOpt::ID = 0;
 }
 
+INITIALIZE_PASS(ARMLoadStoreOpt, "arm-load-store-opt", ARM_LOAD_STORE_OPT_NAME, false, false)
+
 static bool definesCPSR(const MachineInstr *MI) {
   for (const auto &MO : MI->operands()) {
     if (!MO.isReg())
@@ -619,9 +631,10 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
 
     unsigned NewBase;
     if (isi32Load(Opcode)) {
-      // If it is a load, then just use one of the destination register to
-      // use as the new base.
+      // If it is a load, then just use one of the destination registers
+      // as the new base. Will no longer be writeback in Thumb1.
       NewBase = Regs[NumRegs-1].first;
+      Writeback = false;
     } else {
       // Find a free register that we can use as scratch register.
       moveLiveRegsBefore(MBB, InsertBefore);
@@ -725,9 +738,12 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
   MachineInstrBuilder MIB;
 
   if (Writeback) {
-    if (Opcode == ARM::tLDMIA)
+    assert(isThumb1 && "expected Writeback only inThumb1");
+    if (Opcode == ARM::tLDMIA) {
+      assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
       // Update tLDMIA with writeback if necessary.
       Opcode = ARM::tLDMIA_UPD;
+    }
 
     MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
 
@@ -784,6 +800,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
   SmallVector<std::pair<unsigned, bool>, 8> Regs;
   SmallVector<unsigned, 4> ImpDefs;
   DenseSet<unsigned> KilledRegs;
+  DenseSet<unsigned> UsedRegs;
   // Determine list of registers and list of implicit super-register defs.
   for (const MachineInstr *MI : Cand.Instrs) {
     const MachineOperand &MO = getLoadStoreRegOp(*MI);
@@ -792,6 +809,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
     if (IsKill)
       KilledRegs.insert(Reg);
     Regs.push_back(std::make_pair(Reg, IsKill));
+    UsedRegs.insert(Reg);
 
     if (IsLoad) {
       // Collect any implicit defs of super-registers, after merging we can't
@@ -881,7 +899,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
       for (MachineOperand &MO : MI.uses()) {
         if (!MO.isReg() || !MO.isKill())
           continue;
-        if (KilledRegs.count(MO.getReg()))
+        if (UsedRegs.count(MO.getReg()))
           MO.setIsKill(false);
       }
     }
@@ -995,76 +1013,6 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
   } while (SIndex < EIndex);
 }
 
-static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
-                                unsigned Bytes, unsigned Limit,
-                                ARMCC::CondCodes Pred, unsigned PredReg) {
-  unsigned MyPredReg = 0;
-  if (!MI)
-    return false;
-
-  bool CheckCPSRDef = false;
-  switch (MI->getOpcode()) {
-  default: return false;
-  case ARM::tSUBi8:
-  case ARM::t2SUBri:
-  case ARM::SUBri:
-    CheckCPSRDef = true;
-    break;
-  case ARM::tSUBspi:
-    break;
-  }
-
-  // Make sure the offset fits in 8 bits.
-  if (Bytes == 0 || (Limit && Bytes >= Limit))
-    return false;
-
-  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
-                    MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
-  if (!(MI->getOperand(0).getReg() == Base &&
-        MI->getOperand(1).getReg() == Base &&
-        (MI->getOperand(2).getImm() * Scale) == Bytes &&
-        getInstrPredicate(MI, MyPredReg) == Pred &&
-        MyPredReg == PredReg))
-    return false;
-
-  return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
-static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
-                                unsigned Bytes, unsigned Limit,
-                                ARMCC::CondCodes Pred, unsigned PredReg) {
-  unsigned MyPredReg = 0;
-  if (!MI)
-    return false;
-
-  bool CheckCPSRDef = false;
-  switch (MI->getOpcode()) {
-  default: return false;
-  case ARM::tADDi8:
-  case ARM::t2ADDri:
-  case ARM::ADDri:
-    CheckCPSRDef = true;
-    break;
-  case ARM::tADDspi:
-    break;
-  }
-
-  if (Bytes == 0 || (Limit && Bytes >= Limit))
-    // Make sure the offset fits in 8 bits.
-    return false;
-
-  unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
-                    MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
-  if (!(MI->getOperand(0).getReg() == Base &&
-        MI->getOperand(1).getReg() == Base &&
-        (MI->getOperand(2).getImm() * Scale) == Bytes &&
-        getInstrPredicate(MI, MyPredReg) == Pred &&
-        MyPredReg == PredReg))
-    return false;
-
-  return CheckCPSRDef ? !definesCPSR(MI) : true;
-}
-
 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
                                             ARM_AM::AMSubMode Mode) {
   switch (Opc) {
@@ -1132,6 +1080,75 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
   }
 }
 
+/// Check if the given instruction increments or decrements a register and
+/// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
+/// generated by the instruction are possibly read as well.
+static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
+                                  ARMCC::CondCodes Pred, unsigned PredReg) {
+  bool CheckCPSRDef;
+  int Scale;
+  switch (MI.getOpcode()) {
+  case ARM::tADDi8:  Scale =  4; CheckCPSRDef = true; break;
+  case ARM::tSUBi8:  Scale = -4; CheckCPSRDef = true; break;
+  case ARM::t2SUBri:
+  case ARM::SUBri:   Scale = -1; CheckCPSRDef = true; break;
+  case ARM::t2ADDri:
+  case ARM::ADDri:   Scale =  1; CheckCPSRDef = true; break;
+  case ARM::tADDspi: Scale =  4; CheckCPSRDef = false; break;
+  case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
+  default: return 0;
+  }
+
+  unsigned MIPredReg;
+  if (MI.getOperand(0).getReg() != Reg ||
+      MI.getOperand(1).getReg() != Reg ||
+      getInstrPredicate(&MI, MIPredReg) != Pred ||
+      MIPredReg != PredReg)
+    return 0;
+
+  if (CheckCPSRDef && definesCPSR(&MI))
+    return 0;
+  return MI.getOperand(2).getImm() * Scale;
+}
+
+/// Searches for an increment or decrement of \p Reg before \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
+                 ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+  Offset = 0;
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (MBBI == BeginMBBI)
+    return EndMBBI;
+
+  // Skip debug values.
+  MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
+  while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
+    --PrevMBBI;
+
+  Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
+  return Offset == 0 ? EndMBBI : PrevMBBI;
+}
+
+/// Searches for a increment or decrement of \p Reg after \p MBBI.
+static MachineBasicBlock::iterator
+findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
+                ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
+  Offset = 0;
+  MachineBasicBlock &MBB = *MBBI->getParent();
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
+  // Skip debug values.
+  while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+    ++NextMBBI;
+  if (NextMBBI == EndMBBI)
+    return EndMBBI;
+
+  Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
+  return Offset == 0 ? EndMBBI : NextMBBI;
+}
+
 /// Fold proceeding/trailing inc/dec of base register into the
 /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
 ///
@@ -1151,7 +1168,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
   const MachineOperand &BaseOP = MI->getOperand(0);
   unsigned Base = BaseOP.getReg();
   bool BaseKill = BaseOP.isKill();
-  unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   unsigned Opcode = MI->getOpcode();
@@ -1163,49 +1179,24 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
     if (MI->getOperand(i).getReg() == Base)
       return false;
 
-  bool DoMerge = false;
-  ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
-
-  // Try merging with the previous instruction.
+  int Bytes = getLSMultipleTransferSize(MI);
   MachineBasicBlock &MBB = *MI->getParent();
-  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
   MachineBasicBlock::iterator MBBI(MI);
-  if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
-    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
-      --PrevMBBI;
-    if (Mode == ARM_AM::ia &&
-        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      Mode = ARM_AM::db;
-      DoMerge = true;
-    } else if (Mode == ARM_AM::ib &&
-               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      Mode = ARM_AM::da;
-      DoMerge = true;
-    }
-    if (DoMerge)
-      MBB.erase(PrevMBBI);
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr
+    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+  ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
+  if (Mode == ARM_AM::ia && Offset == -Bytes) {
+    Mode = ARM_AM::db;
+  } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
+    Mode = ARM_AM::da;
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
+        ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes))
+      return false;
   }
-
-  // Try merging with the next instruction.
-  MachineBasicBlock::iterator EndMBBI = MBB.end();
-  if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
-      ++NextMBBI;
-    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
-        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      DoMerge = true;
-    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
-               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge)
-      MBB.erase(NextMBBI);
-  }
-
-  if (!DoMerge)
-    return false;
+  MBB.erase(MergeInstr);
 
   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
@@ -1283,7 +1274,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
 
   unsigned Base = getLoadStoreBaseOp(*MI).getReg();
   bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
-  unsigned Bytes = getLSMultipleTransferSize(MI);
   unsigned Opcode = MI->getOpcode();
   DebugLoc DL = MI->getDebugLoc();
   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
@@ -1295,7 +1285,6 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
     return false;
 
-  bool isLd = isLoadSingle(Opcode);
   // Can't do the merge if the destination register is the same as the would-be
   // writeback register.
   if (MI->getOperand(0).getReg() == Base)
@@ -1303,55 +1292,31 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
 
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
-  bool DoMerge = false;
-  ARM_AM::AddrOpc AddSub = ARM_AM::add;
-  unsigned NewOpc = 0;
-  // AM2 - 12 bits, thumb2 - 8 bits.
-  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
-
-  // Try merging with the previous instruction.
+  int Bytes = getLSMultipleTransferSize(MI);
   MachineBasicBlock &MBB = *MI->getParent();
-  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
   MachineBasicBlock::iterator MBBI(MI);
-  if (MBBI != BeginMBBI) {
-    MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
-    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
-      --PrevMBBI;
-    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
-      DoMerge = true;
-      AddSub = ARM_AM::sub;
-    } else if (!isAM5 &&
-               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge) {
-      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
-      MBB.erase(PrevMBBI);
-    }
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr
+    = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
+  unsigned NewOpc;
+  if (!isAM5 && Offset == Bytes) {
+    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+  } else if (Offset == -Bytes) {
+    NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (Offset == Bytes) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
+    } else if (!isAM5 && Offset == -Bytes) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
+    } else
+      return false;
   }
+  MBB.erase(MergeInstr);
 
-  // Try merging with the next instruction.
-  MachineBasicBlock::iterator EndMBBI = MBB.end();
-  if (!DoMerge && MBBI != EndMBBI) {
-    MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
-      ++NextMBBI;
-    if (!isAM5 &&
-        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
-      DoMerge = true;
-      AddSub = ARM_AM::sub;
-    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
-      DoMerge = true;
-    }
-    if (DoMerge) {
-      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
-      MBB.erase(NextMBBI);
-    }
-  }
-
-  if (!DoMerge)
-    return false;
+  ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
 
+  bool isLd = isLoadSingle(Opcode);
   if (isAM5) {
     // VLDM[SD]_UPD, VSTM[SD]_UPD
     // (There are no base-updating versions of VLDR/VSTR instructions, but the
@@ -1368,18 +1333,16 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
     if (isAM2) {
       // LDR_PRE, LDR_POST
       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
-        int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
           .addReg(Base, RegState::Define)
           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
       } else {
-        int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+        int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
         BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
           .addReg(Base, RegState::Define)
-          .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+          .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
       }
     } else {
-      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
       // t2LDR_PRE, t2LDR_POST
       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
         .addReg(Base, RegState::Define)
@@ -1391,13 +1354,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
     // the vestigal zero-reg offset register. When that's fixed, this clause
     // can be removed entirely.
     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
-      int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+      int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
       // STR_PRE, STR_POST
       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
-        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+        .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
     } else {
-      int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
       // t2STR_PRE, t2STR_POST
       BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
@@ -1409,46 +1371,75 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
   return true;
 }
 
+bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
+  assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
+         "Must have t2STRDi8 or t2LDRDi8");
+  if (MI.getOperand(3).getImm() != 0)
+    return false;
+
+  // Behaviour for writeback is undefined if base register is the same as one
+  // of the others.
+  const MachineOperand &BaseOp = MI.getOperand(2);
+  unsigned Base = BaseOp.getReg();
+  const MachineOperand &Reg0Op = MI.getOperand(0);
+  const MachineOperand &Reg1Op = MI.getOperand(1);
+  if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
+    return false;
+
+  unsigned PredReg;
+  ARMCC::CondCodes Pred = getInstrPredicate(&MI, PredReg);
+  MachineBasicBlock::iterator MBBI(MI);
+  MachineBasicBlock &MBB = *MI.getParent();
+  int Offset;
+  MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
+                                                            PredReg, Offset);
+  unsigned NewOpc;
+  if (Offset == 8 || Offset == -8) {
+    NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
+  } else {
+    MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
+    if (Offset == 8 || Offset == -8) {
+      NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
+    } else
+      return false;
+  }
+  MBB.erase(MergeInstr);
+
+  DebugLoc DL = MI.getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
+  if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
+    MIB.addOperand(Reg0Op).addOperand(Reg1Op)
+       .addReg(BaseOp.getReg(), RegState::Define);
+  } else {
+    assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
+    MIB.addReg(BaseOp.getReg(), RegState::Define)
+       .addOperand(Reg0Op).addOperand(Reg1Op);
+  }
+  MIB.addReg(BaseOp.getReg(), RegState::Kill)
+     .addImm(Offset).addImm(Pred).addReg(PredReg);
+  assert(TII->get(Opcode).getNumOperands() == 6 &&
+         TII->get(NewOpc).getNumOperands() == 7 &&
+         "Unexpected number of operands in Opcode specification.");
+
+  // Transfer implicit operands.
+  for (const MachineOperand &MO : MI.implicit_operands())
+    MIB.addOperand(MO);
+  MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MBB.erase(MBBI);
+  return true;
+}
+
 /// Returns true if instruction is a memory operation that this pass is capable
 /// of operating on.
-static bool isMemoryOp(const MachineInstr *MI) {
-  // When no memory operands are present, conservatively assume unaligned,
-  // volatile, unfoldable.
-  if (!MI->hasOneMemOperand())
-    return false;
-
-  const MachineMemOperand *MMO = *MI->memoperands_begin();
-
-  // Don't touch volatile memory accesses - we may be changing their order.
-  if (MMO->isVolatile())
-    return false;
-
-  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
-  // not.
-  if (MMO->getAlignment() < 4)
-    return false;
-
-  // str <undef> could probably be eliminated entirely, but for now we just want
-  // to avoid making a mess of it.
-  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
-  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
-      MI->getOperand(0).isUndef())
-    return false;
-
-  // Likewise don't mess with references to undefined addresses.
-  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
-      MI->getOperand(1).isUndef())
-    return false;
-
-  unsigned Opcode = MI->getOpcode();
+static bool isMemoryOp(const MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
   switch (Opcode) {
-  default: break;
   case ARM::VLDRS:
   case ARM::VSTRS:
-    return MI->getOperand(1).isReg();
   case ARM::VLDRD:
   case ARM::VSTRD:
-    return MI->getOperand(1).isReg();
   case ARM::LDRi12:
   case ARM::STRi12:
   case ARM::tLDRi:
@@ -1459,9 +1450,40 @@ static bool isMemoryOp(const MachineInstr *MI) {
   case ARM::t2LDRi12:
   case ARM::t2STRi8:
   case ARM::t2STRi12:
-    return MI->getOperand(1).isReg();
+    break;
+  default:
+    return false;
   }
-  return false;
+  if (!MI.getOperand(1).isReg())
+    return false;
+
+  // When no memory operands are present, conservatively assume unaligned,
+  // volatile, unfoldable.
+  if (!MI.hasOneMemOperand())
+    return false;
+
+  const MachineMemOperand &MMO = **MI.memoperands_begin();
+
+  // Don't touch volatile memory accesses - we may be changing their order.
+  if (MMO.isVolatile())
+    return false;
+
+  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+  // not.
+  if (MMO.getAlignment() < 4)
+    return false;
+
+  // str <undef> could probably be eliminated entirely, but for now we just want
+  // to avoid making a mess of it.
+  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+  if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
+    return false;
+
+  // Likewise don't mess with references to undefined addresses.
+  if (MI.getOperand(1).isUndef())
+    return false;
+
+  return true;
 }
 
 static void InsertLDR_STR(MachineBasicBlock &MBB,
@@ -1616,6 +1638,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   ARMCC::CondCodes CurrPred = ARMCC::AL;
   unsigned Position = 0;
   assert(Candidates.size() == 0);
+  assert(MergeBaseCandidates.size() == 0);
   LiveRegsValid = false;
 
   for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
@@ -1626,7 +1649,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       continue;
     ++Position;
 
-    if (isMemoryOp(MBBI)) {
+    if (isMemoryOp(*MBBI)) {
       unsigned Opcode = MBBI->getOpcode();
       const MachineOperand &MO = MBBI->getOperand(0);
       unsigned Reg = MO.getReg();
@@ -1694,8 +1717,15 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       MBBI = I;
       --Position;
       // Fallthrough to look into existing chain.
-    } else if (MBBI->isDebugValue())
+    } else if (MBBI->isDebugValue()) {
       continue;
+    } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
+               MBBI->getOpcode() == ARM::t2STRDi8) {
+      // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
+      // remember them because we may still be able to merge add/sub into them.
+      MergeBaseCandidates.push_back(MBBI);
+    }
+
 
     // If we are here then the chain is broken; Extract candidates for a merge.
     if (MemOps.size() > 0) {
@@ -1726,7 +1756,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
       if (Merged) {
         Changed = true;
         unsigned Opcode = Merged->getOpcode();
-        if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
+        if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
+          MergeBaseUpdateLSDouble(*Merged);
+        else
           MergeBaseUpdateLSMultiple(Merged);
       } else {
         for (MachineInstr *MI : Candidate->Instrs) {
@@ -1741,6 +1773,10 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
     }
   }
   Candidates.clear();
+  // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
+  for (MachineInstr *MI : MergeBaseCandidates)
+    MergeBaseUpdateLSDouble(*MI);
+  MergeBaseCandidates.clear();
 
   return Changed;
 }
@@ -1765,7 +1801,11 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
       (MBBI->getOpcode() == ARM::BX_RET ||
        MBBI->getOpcode() == ARM::tBX_RET ||
        MBBI->getOpcode() == ARM::MOVPCLR)) {
-    MachineInstr *PrevMI = std::prev(MBBI);
+    MachineBasicBlock::iterator PrevI = std::prev(MBBI);
+    // Ignore any DBG_VALUE instructions.
+    while (PrevI->isDebugValue() && PrevI != MBB.begin())
+      --PrevI;
+    MachineInstr *PrevMI = PrevI;
     unsigned Opcode = PrevMI->getOpcode();
     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
@@ -1786,6 +1826,30 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   return false;
 }
 
+bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  if (MBBI == MBB.begin() || MBBI == MBB.end() ||
+      MBBI->getOpcode() != ARM::tBX_RET)
+    return false;
+
+  MachineBasicBlock::iterator Prev = MBBI;
+  --Prev;
+  if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
+    return false;
+
+  for (auto Use : Prev->uses())
+    if (Use.isKill()) {
+      AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
+          .addReg(Use.getReg(), RegState::Kill))
+          .copyImplicitOps(&*MBBI);
+      MBB.erase(MBBI);
+      MBB.erase(Prev);
+      return true;
+    }
+
+  llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
+}
+
 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   MF = &Fn;
   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
@@ -1793,7 +1857,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   AFI = Fn.getInfo<ARMFunctionInfo>();
   TII = STI->getInstrInfo();
   TRI = STI->getRegisterInfo();
-  MRI = &Fn.getRegInfo();
+
   RegClassInfoValid = false;
   isThumb2 = AFI->isThumb2Function();
   isThumb1 = AFI->isThumbFunction() && !isThumb2;
@@ -1805,18 +1869,29 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
     Modified |= LoadStoreMultipleOpti(MBB);
     if (STI->hasV5TOps())
       Modified |= MergeReturnIntoLDM(MBB);
+    if (isThumb1)
+      Modified |= CombineMovBx(MBB);
   }
 
   Allocator.DestroyAll();
   return Modified;
 }
 
+namespace llvm {
+void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
+}
+
+#define ARM_PREALLOC_LOAD_STORE_OPT_NAME                                       \
+  "ARM pre- register allocation load / store optimization pass"
+
 namespace {
   /// Pre- register allocation pass that move load / stores from consecutive
   /// locations close to make it more likely they will be combined later.
   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
     static char ID;
-    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {
+      initializeARMPreAllocLoadStoreOptPass(*PassRegistry::getPassRegistry());
+    }
 
     const DataLayout *TD;
     const TargetInstrInfo *TII;
@@ -1828,7 +1903,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
     const char *getPassName() const override {
-      return "ARM pre- register allocation load / store optimization pass";
+      return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
     }
 
   private:
@@ -1847,8 +1922,11 @@ namespace {
   char ARMPreAllocLoadStoreOpt::ID = 0;
 }
 
+INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-load-store-opt",
+                ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
+
 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
-  TD = Fn.getTarget().getDataLayout();
+  TD = &Fn.getDataLayout();
   STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
   TII = STI->getInstrInfo();
   TRI = STI->getRegisterInfo();
@@ -1856,9 +1934,8 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   MF  = &Fn;
 
   bool Modified = false;
-  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
-       ++MFI)
-    Modified |= RescheduleLoadStoreInstrs(MFI);
+  for (MachineBasicBlock &MFI : Fn)
+    Modified |= RescheduleLoadStoreInstrs(&MFI);
 
   return Modified;
 }
@@ -2187,7 +2264,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
       if (!MI->isDebugValue())
         MI2LocMap[MI] = ++Loc;
 
-      if (!isMemoryOp(MI))
+      if (!isMemoryOp(*MI))
         continue;
       unsigned PredReg = 0;
       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
@@ -2275,3 +2352,4 @@ FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
     return new ARMPreAllocLoadStoreOpt();
   return new ARMLoadStoreOpt();
 }
+
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index f5250ff83f0b..ac0330fbcb34 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.cpp - ARM machine function info -------------===//
+//===-- ARMMachineFunctionInfo.cpp - ARM machine function info ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -20,5 +20,4 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
       RestoreSPFromFP(false), LRSpilledForFarJump(false),
       FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
       GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
-      GlobalBaseReg(0) {}
+      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 14dd9ef333af..d6447978ef2c 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//===-- ARMMachineFunctionInfo.h - ARM machine function info ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -52,7 +52,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   unsigned ReturnRegsCount;
 
   /// HasStackFrame - True if this function has a stack frame. Set by
-  /// processFunctionBeforeCalleeSavedScan().
+  /// determineCalleeSaves().
   bool HasStackFrame;
 
   /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
@@ -110,11 +110,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// pass.
   DenseMap<unsigned, unsigned> CPEClones;
 
-  /// GlobalBaseReg - keeps track of the virtual register initialized for
-  /// use as the global base register. This is used for PIC in some PIC
-  /// relocation models.
-  unsigned GlobalBaseReg;
-
   /// ArgumentStackSize - amount of bytes on stack consumed by the arguments
   /// being passed on the stack
   unsigned ArgumentStackSize;
@@ -133,7 +128,7 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
     NumAlignedDPRCS2Regs(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {}
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF);
 
@@ -204,9 +199,6 @@ public:
   bool hasITBlocks() const { return HasITBlocks; }
   void setHasITBlocks(bool h) { HasITBlocks = h; }
 
-  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
-  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
-
   void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
     if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
       llvm_unreachable("Duplicate entries!");
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 45cc9ea91f37..02cbfb1fa9f1 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -266,12 +266,19 @@ def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
 }
 
 // Scalar single precision floating point register class..
-// FIXME: Allocation order changed to s0, s2, s4, ... as a quick hack to
-// avoid partial-write dependencies on D registers (S registers are
-// renamed as portions of D registers).
-def SPR : RegisterClass<"ARM", [f32], 32, (add (decimate
-                                                (sequence "S%u", 0, 31), 2),
-                                               (sequence "S%u", 0, 31))>;
+// FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
+// to avoid partial-write dependencies on D or Q (depending on platform)
+// registers (S registers are renamed as portions of D/Q registers).
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> {
+  let AltOrders = [(add (decimate SPR, 2), SPR),
+                   (add (decimate SPR, 4),
+                        (decimate SPR, 2),
+                        (decimate (rotl SPR, 1), 4),
+                        (decimate (rotl SPR, 1), 2))];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+  }];
+}
 
 // Subset of SPR which can be used as a source of NEON scalars for 16-bit
 // operations
@@ -281,25 +288,29 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>;
 // class.
 // ARM requires only word alignment for double. It's more performant if it
 // is double-word alignment though.
-def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                         (sequence "D%u", 0, 31)> {
-  // Allocate non-VFP2 registers D16-D31 first.
-  let AltOrders = [(rotl DPR, 16)];
-  let AltOrderSelect = [{ return 1; }];
+  // Allocate non-VFP2 registers D16-D31 first, and prefer even registers on
+  // Darwin platforms.
+  let AltOrders = [(rotl DPR, 16),
+                   (add (decimate (rotl DPR, 16), 2), (rotl DPR, 16))];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+  }];
 }
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
-def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                              (trunc DPR, 16)>;
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
-def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64,
                           (trunc DPR, 8)>;
 
 // Generic 128-bit vector register class.
-def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
                         (sequence "Q%u", 0, 15)> {
   // Allocate non-VFP2 aliases Q8-Q15 first.
   let AltOrders = [(rotl QPR, 8)];
diff --git a/lib/Target/ARM/ARMScheduleSwift.td b/lib/Target/ARM/ARMScheduleSwift.td
index b03d5ff44c6e..3ad7730228e5 100644
--- a/lib/Target/ARM/ARMScheduleSwift.td
+++ b/lib/Target/ARM/ARMScheduleSwift.td
@@ -37,1050 +37,13 @@ def SW_FDIV : FuncUnit;
 // FIXME: Add preload instruction when it is documented.
 // FIXME: Model non-pipelined nature of FP div / sqrt unit.
 
-def SwiftItineraries : ProcessorItineraries<
-  [SW_DIS0, SW_DIS1, SW_DIS2, SW_ALU0, SW_ALU1, SW_LS, SW_IDIV, SW_FDIV], [], [
-  //
-  // Move instructions, unconditional
-  InstrItinData<IIC_iMOVi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2]>,
-  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                 [3]>,
-  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_LS]>],
-                              [5]>,
-  //
-  // MVN instructions
-  InstrItinData<IIC_iMVNi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  //
-  // No operand cycles
-  InstrItinData<IIC_iALUx   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>]>,
-  //
-  // Binary Instructions that produce a result
-  InstrItinData<IIC_iALUi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iALUr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iALUsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iALUsir,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iALUsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1, 1]>,
-  //
-  // Bitwise Instructions that produce a result
-  InstrItinData<IIC_iBITi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iBITr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iBITsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1]>,
-  InstrItinData<IIC_iBITsr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1, 1, 1]>,
-  //
-  // Unary Instructions that produce a result
-
-  // CLZ, RBIT, etc.
-  InstrItinData<IIC_iUNAr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-
-  // BFC, BFI, UBFX, SBFX
-  InstrItinData<IIC_iUNAsi, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [2, 1]>,
-
-  //
-  // Zero and sign extension instructions
-  InstrItinData<IIC_iEXTr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1]>,
-  InstrItinData<IIC_iEXTAr, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1]>,
-  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                            [1, 1, 1, 1]>,
-  //
-  // Compare instructions
-  InstrItinData<IIC_iCMPi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iCMPr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1, 1]>,
-  //
-  // Test instructions
-  InstrItinData<IIC_iTSTi   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iTSTr   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<2, [SW_ALU0, SW_ALU1]>],
-                              [1, 1, 1]>,
-  //
-  // Move instructions, conditional
-  // FIXME: Correctly model the extra input dep on the destination.
-  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1]>,
-  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [1, 1]>,
-  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2]>,
-
-  // Integer multiply pipeline
-  //
-  InstrItinData<IIC_iMUL16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [3, 1, 1]>,
-  InstrItinData<IIC_iMAC16  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [3, 1, 1, 1]>,
-  InstrItinData<IIC_iMUL32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  InstrItinData<IIC_iMAC32  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  InstrItinData<IIC_iMUL64  , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0], 3>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [5, 5, 1, 1]>,
-  InstrItinData<IIC_iMAC64  , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0], 1>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1], 3>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [5, 6, 1, 1]>,
-  //
-  // Integer divide
-  InstrItinData<IIC_iDIV  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                             InstrStage<1, [SW_ALU0], 0>,
-                             InstrStage<14, [SW_IDIV]>],
-                            [14, 1, 1]>,
-
-  // Integer load pipeline
-  // FIXME: The timings are some rough approximations
-  //
-  // Immediate offset
-  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1]>,
-  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1]>,
-  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_LS], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 4, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 1>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [3, 4, 1, 1]>,
-  //
-  // Scaled register offset
-  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS]>],
-                                [5, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS]>],
-                                [5, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_ALU0], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [3, 1, 1, 1]>,
-  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [3, 4, 1, 1]>,
-  //
-  // Scaled register offset with update
-  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                 InstrStage<1, [SW_LS], 3>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [5, 3, 1, 1]>,
-  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_DIS2], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                                [5, 3, 1, 1]>,
-  //
-  // Load multiple, def is the 5th operand.
-  // FIXME: This assumes 3 to 4 registers.
-  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-
-  //
-  // Load multiple + update, defs are the 1st and 5th operands.
-  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 0>,
-                                InstrStage<1, [SW_LS], 3>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [2, 1, 1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Load multiple plus branch
-  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Pop, def is the 3rd operand.
-  InstrItinData<IIC_iPop  ,    [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
-  //
-  // Pop + branch, def is the 3rd operand.
-  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [1, 1, 3], [], -1>, // dynamic uops
-
-  //
-  // iLoadi + iALUr for t2LDRpci_pic.
-  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                InstrStage<1, [SW_LS], 3>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [4, 1]>,
-
-  // Integer store pipeline
-  ///
-  // Immediate offset
-  InstrItinData<IIC_iStore_i  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1]>,
-  //
-  // Register offset
-  InstrItinData<IIC_iStore_r  , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [SW_DIS0], 0>,
-                                 InstrStage<1, [SW_DIS1], 0>,
-                                 InstrStage<1, [SW_DIS2], 0>,
-                                 InstrStage<1, [SW_LS], 0>,
-                                 InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                 InstrStage<1, [SW_LS]>],
-                                [1, 1, 1]>,
-  //
-  // Scaled register offset
-  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  //
-  // Immediate offset with update
-  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1]>,
-  //
-  // Register offset with update
-  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [SW_DIS0], 0>,
-                                  InstrStage<1, [SW_DIS1], 0>,
-                                  InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                  InstrStage<1, [SW_LS]>],
-                                 [1, 1, 1, 1]>,
-  //
-  // Scaled register offset with update
-  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [SW_DIS0], 0>,
-                                    InstrStage<1, [SW_DIS1], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                    InstrStage<1, [SW_LS], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
-                                   [3, 1, 1, 1]>,
-  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [SW_DIS0], 0>,
-                                    InstrStage<1, [SW_DIS1], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 2>,
-                                    InstrStage<1, [SW_LS], 0>,
-                                    InstrStage<1, [SW_ALU0, SW_ALU1], 1>],
-                                   [3, 1, 1, 1]>,
-  //
-  // Store multiple
-  InstrItinData<IIC_iStore_m , [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                                [], [], -1>, // dynamic uops
-  //
-  // Store multiple + update
-  InstrItinData<IIC_iStore_mu, [InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_DIS2], 0>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS], 1>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1], 1>,
-                                InstrStage<1, [SW_LS]>],
-                               [2], [], -1>, // dynamic uops
-
-  //
-  // Preload
-  InstrItinData<IIC_Preload,   [InstrStage<1, [SW_DIS0], 0>], [1, 1]>,
-
-  // Branch
-  //
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br       , [InstrStage<1, [SW_DIS0], 0>]>,
-
-  // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                              InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                             [1]>,
-  //
-  // Single-precision FP Unary
-  //
-  // Most floating-point moves get issued on ALU0.
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-
-  //
-  // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [1, 1]>,
-  //
-  // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [1, 1]>,
-  //
-  // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-
-  //
-  // Single to Half FP Convert
-  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 4>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1]>,
-  //
-  // Half to Single FP Convert
-  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-
-  //
-  // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1]>,
-  //
-  // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 1]>,
-  //
-  // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [12, 1, 1]>,
-  //
-  // Single-precision Fused FP MAC
-  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-precision Fused FP MAC
-  InstrItinData<IIC_fpFMAC64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [12, 1, 1]>,
-  //
-  // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<15, [SW_FDIV]>],
-                              [17, 1, 1]>,
-  //
-  // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<30, [SW_FDIV]>],
-                              [32, 1, 1]>,
-  //
-  // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<15, [SW_FDIV]>],
-                              [17, 1]>,
-  //
-  // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 0>,
-                               InstrStage<30, [SW_FDIV]>],
-                              [32, 1, 1]>,
-
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 3>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 4, 1]>,
-  //
-  // Single-precision FP Load
-  InstrItinData<IIC_fpLoad32, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // Double-precision FP Load
-  InstrItinData<IIC_fpLoad64, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1]>,
-  //
-  // FP Load Multiple
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1, 1, 4], [], -1>, // dynamic uops
-  //
-  // FP Load Multiple + update
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                              [2, 1, 1, 1, 4], [], -1>, // dynamic uops
-  //
-  // Single-precision FP Store
-  InstrItinData<IIC_fpStore32,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1]>,
-  //
-  // Double-precision FP Store
-  InstrItinData<IIC_fpStore64,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1]>,
-  //
-  // FP Store Multiple
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpStore_m,[InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [1, 1, 1], [], -1>, // dynamic uops
-  //
-  // FP Store Multiple + update
-  // FIXME: Assumes a single Q register.
-  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [SW_DIS0], 0>,
-                                InstrStage<1, [SW_DIS1], 0>,
-                                InstrStage<1, [SW_LS], 4>,
-                                InstrStage<1, [SW_ALU0, SW_ALU1]>],
-                               [2, 1, 1, 1], [], -1>, // dynamic uops
-  // NEON
-  //
-  // Double-register Integer Unary
-  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Quad-register Integer Unary
-  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Double-register Integer Q-Unary
-  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Quad-register Integer CountQ-Unary
-  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1]>,
-  //
-  // Double-register Integer Binary
-  InstrItinData<IIC_VBINiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Binary
-  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Subtract
-  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Subtract
-  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Shift
-  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Shift
-  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Shift (4 cycle)
-  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Binary (4 cycle)
-  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Subtract (4 cycle)
-  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Count
-  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register Integer Count
-  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1, 1]>,
-  //
-  // Double-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Absolute Difference and Accumulate
-  InstrItinData<IIC_VABAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Double-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Pair Add Long
-  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Multiply (.8, .16)
-  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-
-  //
-  // Double-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register Integer Multiply (.32)
-  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Double-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32D, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.8, .16)
-  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-  //
-  // Quad-register Integer Multiply-Accumulate (.32)
-  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1, 1]>,
-
-  //
-  // Move
-  InstrItinData<IIC_VMOV,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Move Immediate
-  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2]>,
-  //
-  // Double-register Permute Move
-  InstrItinData<IIC_VMOVD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Quad-register Permute Move
-  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Integer to Single-precision Move
-  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-  //
-  // Integer to Double-precision Move
-  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [4, 1, 1]>,
-  //
-  // Single-precision to Integer Move
-  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 1]>,
-  //
-  // Double-precision to Integer Move
-  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 3>,
-                               InstrStage<1, [SW_LS]>],
-                              [3, 4, 1]>,
-  //
-  // Integer to Lane Move
-  // FIXME: I think this is correct, but it is not clear from the tuning guide.
-  InstrItinData<IIC_VMOVISL , [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_LS], 4>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [6, 1]>,
-
-  //
-  // Vector narrow move
-  InstrItinData<IIC_VMOVN,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1]>,
-  //
-  // Double-register FP Unary
-  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
-  //        and they issue on a different pipeline.
-  InstrItinData<IIC_VUNAD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Quad-register FP Unary
-  // FIXME: VRECPE / VRSQRTE has a longer latency than VABS, which is used here,
-  //        and they issue on a different pipeline.
-  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [2, 1]>,
-  //
-  // Double-register FP Binary
-  // FIXME: We're using this itin for many instructions.
-  InstrItinData<IIC_VBIND,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-
-  //
-  // VPADD, etc.
-  InstrItinData<IIC_VPBIND,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register FP VMUL
-  InstrItinData<IIC_VFMULD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register FP Binary
-  InstrItinData<IIC_VBINQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU0]>],
-                              [4, 1, 1]>,
-  //
-  // Quad-register FP VMUL
-  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 1]>,
-  //
-  // Double-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register FP Multiple-Accumulate
-  InstrItinData<IIC_VMACQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Fused FP Multiple-Accumulate
-  InstrItinData<IIC_VFMACD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register FusedF P Multiple-Accumulate
-  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Reciprical Step
-  InstrItinData<IIC_VRECSD,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Quad-register Reciprical Step
-  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 1]>,
-  //
-  // Double-register Permute
-  // FIXME: The latencies are unclear from the documentation.
-  InstrItinData<IIC_VPERMD,   [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-  //
-  // Quad-register Permute
-  // FIXME: The latencies are unclear from the documentation.
-  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-  //
-  // Quad-register Permute (3 cycle issue on A9)
-  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [3, 4, 3, 4]>,
-
-  //
-  // Double-register VEXT
-  InstrItinData<IIC_VEXTD,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  //
-  // Quad-register VEXT
-  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  //
-  // VTB
-  InstrItinData<IIC_VTB1,     [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_VTB2,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 3, 3]>,
-  InstrItinData<IIC_VTB3,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 3, 5, 5]>,
-  InstrItinData<IIC_VTB4,     [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 3, 5, 7, 7]>,
-  //
-  // VTBX
-  InstrItinData<IIC_VTBX1,    [InstrStage<1, [SW_DIS0, SW_DIS1, SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [2, 1, 1]>,
-  InstrItinData<IIC_VTBX2,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [4, 1, 3, 3]>,
-  InstrItinData<IIC_VTBX3,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [6, 1, 3, 5, 5]>,
-  InstrItinData<IIC_VTBX4,    [InstrStage<1, [SW_DIS0], 0>,
-                               InstrStage<1, [SW_DIS1], 0>,
-                               InstrStage<1, [SW_DIS2], 0>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1], 2>,
-                               InstrStage<1, [SW_ALU1]>],
-                              [8, 1, 3, 5, 7, 7]>
-]>;
-
-// ===---------------------------------------------------------------------===//
-// This following definitions describe the simple machine model which
-// will replace itineraries.
-
 // Swift machine model for scheduling and other instruction cost heuristics.
 def SwiftModel : SchedMachineModel {
   let IssueWidth = 3; // 3 micro-ops are dispatched per cycle.
   let MicroOpBufferSize = 45; // Based on NEON renamed registers.
   let LoadLatency = 3;
   let MispredictPenalty = 14; // A branch direction mispredict.
-
-  let Itineraries = SwiftItineraries;
+  let CompleteModel = 0;      // FIXME: Remove if all instructions are covered.
 }
 
 // Swift predicates.
@@ -1558,6 +521,13 @@ let SchedModel = SwiftModel in {
         (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
         "PUSH", "tPUSH")>;
 
+  // LDRLIT pseudo instructions, they expand to LDR + PICADD
+  def : InstRW<[SwiftWriteP2ThreeCycle, WriteALU],
+        (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+  // LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+  def : InstRW<[SwiftWriteP2ThreeCycle, SwiftWriteP2ThreeCycle],
+        (instregex "LDRLIT_ga_pcrel_ldr")>;
+
   // 4.2.26 Branch
   def : WriteRes<WriteBr, [SwiftUnitP1]> { let Latency = 0; }
   def : WriteRes<WriteBrL, [SwiftUnitP1]> { let Latency = 2; }
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 6cafbbb9f8eb..6fded9c8ab73 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -160,41 +160,39 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
   unsigned VTSize = 4;
   unsigned i = 0;
   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
-  const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6;
+  const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
   SDValue TFOps[6];
   SDValue Loads[6];
   uint64_t SrcOff = 0, DstOff = 0;
 
-  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
-  // same number of stores.  The loads and stores will get combined into
-  // ldm/stm later on.
-  while (EmittedNumMemOps < NumMemOps) {
-    for (i = 0;
-         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
-      Loads[i] = DAG.getLoad(VT, dl, Chain,
-                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
-                                         DAG.getConstant(SrcOff, dl, MVT::i32)),
-                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
-                             false, false, 0);
-      TFOps[i] = Loads[i].getValue(1);
-      SrcOff += VTSize;
-    }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        makeArrayRef(TFOps, i));
+  // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
+  // VLDM/VSTM and make this code emit it when appropriate. This would reduce
+  // pressure on the general purpose registers. However this seems harder to map
+  // onto the register allocator's view of the world.
 
-    for (i = 0;
-         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
-      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
-                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
-                                          DAG.getConstant(DstOff, dl, MVT::i32)),
-                              DstPtrInfo.getWithOffset(DstOff),
-                              isVolatile, false, 0);
-      DstOff += VTSize;
-    }
-    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                        makeArrayRef(TFOps, i));
+  // The number of MEMCPY pseudo-instructions to emit. We use up to
+  // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
+  // later on. This is a lower bound on the number of MEMCPY operations we must
+  // emit.
+  unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
 
-    EmittedNumMemOps += i;
+  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
+
+  for (unsigned I = 0; I != NumMEMCPYs; ++I) {
+    // Evenly distribute registers among MEMCPY operations to reduce register
+    // pressure.
+    unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
+    unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
+
+    Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
+                      DAG.getConstant(NumRegs, dl, MVT::i32));
+    Src = Dst.getValue(1);
+    Chain = Dst.getValue(2);
+
+    DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
+    SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
+
+    EmittedNumMemOps = NextEmittedNumMemOps;
   }
 
   if (BytesLeft == 0)
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 002c3e9b6291..bb6ae28065bd 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
@@ -39,38 +40,10 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_CTOR
 #include "ARMGenSubtargetInfo.inc"
 
-static cl::opt<bool>
-ReserveR9("arm-reserve-r9", cl::Hidden,
-          cl::desc("Reserve R9, making it unavailable as GPR"));
-
-static cl::opt<bool>
-ArmUseMOVT("arm-use-movt", cl::init(true), cl::Hidden);
-
 static cl::opt<bool>
 UseFusedMulOps("arm-use-mulops",
                cl::init(true), cl::Hidden);
 
-namespace {
-enum AlignMode {
-  DefaultAlign,
-  StrictAlign,
-  NoStrictAlign
-};
-}
-
-static cl::opt<AlignMode>
-Align(cl::desc("Load/store alignment support"),
-      cl::Hidden, cl::init(DefaultAlign),
-      cl::values(
-          clEnumValN(DefaultAlign,  "arm-default-align",
-                     "Generate unaligned accesses only on hardware/OS "
-                     "combinations that are known to support them"),
-          clEnumValN(StrictAlign,   "arm-strict-align",
-                     "Disallow all unaligned memory accesses"),
-          clEnumValN(NoStrictAlign, "arm-no-strict-align",
-                     "Allow unaligned memory accesses"),
-          clEnumValEnd));
-
 enum ITMode {
   DefaultIT,
   RestrictedIT,
@@ -88,6 +61,12 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
                          "Allow IT blocks based on ARMv7"),
               clEnumValEnd));
 
+/// ForceFastISel - Use the fast-isel, even for subtargets where it is not
+/// currently supported (for testing only).
+static cl::opt<bool>
+ForceFastISel("arm-force-fast-isel",
+               cl::init(false), cl::Hidden);
+
 /// initializeSubtargetDependencies - Initializes using a CPU and feature string
 /// so that we can use initializer lists for subtarget initialization.
 ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -110,8 +89,8 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle)
     : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
-      ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle),
-      TargetTriple(TT), Options(TM.Options), TM(TM),
+      ARMProcClass(None), ARMArch(ARMv4t), stackAlignment(4), CPUString(CPU),
+      IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM),
       FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
@@ -133,6 +112,7 @@ void ARMSubtarget::initializeEnvironment() {
   HasV7Ops = false;
   HasV8Ops = false;
   HasV8_1aOps = false;
+  HasV8_2aOps = false;
   HasVFPv2 = false;
   HasVFPv3 = false;
   HasVFPv4 = false;
@@ -147,10 +127,11 @@ void ARMSubtarget::initializeEnvironment() {
   UseSoftFloat = false;
   HasThumb2 = false;
   NoARM = false;
-  IsR9Reserved = ReserveR9;
-  UseMovt = false;
+  ReserveR9 = false;
+  NoMovt = false;
   SupportsTailCall = false;
   HasFP16 = false;
+  HasFullFP16 = false;
   HasD16 = false;
   HasHardwareDivide = false;
   HasHardwareDivideInARM = false;
@@ -168,20 +149,36 @@ void ARMSubtarget::initializeEnvironment() {
   HasCrypto = false;
   HasCRC = false;
   HasZeroCycleZeroing = false;
-  AllowsUnalignedMem = false;
-  Thumb2DSP = false;
+  StrictAlign = false;
+  HasDSP = false;
   UseNaClTrap = false;
   GenLongCalls = false;
   UnsafeFPMath = false;
+
+  // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
+  // directly from it, but we can try to make sure they're consistent when both
+  // available.
+  UseSjLjEH = isTargetDarwin() && !isTargetWatchOS();
+  assert((!TM.getMCAsmInfo() ||
+          (TM.getMCAsmInfo()->getExceptionHandlingType() ==
+           ExceptionHandling::SjLj) == UseSjLjEH) &&
+         "inconsistent sjlj choice between CodeGen and MC");
 }
 
 void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   if (CPUString.empty()) {
-    if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s"))
-      // Default to the Swift CPU when targeting armv7s/thumbv7s.
-      CPUString = "swift";
-    else
-      CPUString = "generic";
+    CPUString = "generic";
+
+    if (isTargetDarwin()) {
+      StringRef ArchName = TargetTriple.getArchName();
+      if (ArchName.endswith("v7s"))
+        // Default to the Swift CPU when targeting armv7s/thumbv7s.
+        CPUString = "swift";
+      else if (ArchName.endswith("v7k"))
+        // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
+        // ARMv7k does not use SjLj exception handling.
+        CPUString = "cortex-a7";
+    }
   }
 
   // Insert the architecture feature derived from the target triple into the
@@ -212,44 +209,31 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
   if (isAAPCS_ABI())
     stackAlignment = 8;
-  if (isTargetNaCl())
+  if (isTargetNaCl() || isAAPCS16_ABI())
     stackAlignment = 16;
 
-  UseMovt = hasV6T2Ops() && ArmUseMOVT;
+  // FIXME: Completely disable sibcall for Thumb1 since ThumbRegisterInfo::
+  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+  // support in the assembler and linker to be used. This would need to be
+  // fixed to fully support tail calls in Thumb1.
+  //
+  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+  // LR.  This means if we need to reload LR, it takes an extra instructions,
+  // which outweighs the value of the tail call; but here we don't know yet
+  // whether LR is going to be used.  Probably the right approach is to
+  // generate the tail call here and turn it back into CALL/RET in
+  // emitEpilogue if LR is used.
 
-  if (isTargetMachO()) {
-    IsR9Reserved = ReserveR9 || !HasV6Ops;
-    SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0);
-  } else {
-    IsR9Reserved = ReserveR9;
-    SupportsTailCall = !isThumb1Only();
-  }
+  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+  // but we need to make sure there are enough registers; the only valid
+  // registers are the 4 used for parameters.  We don't currently do this
+  // case.
 
-  if (Align == DefaultAlign) {
-    // Assume pre-ARMv6 doesn't support unaligned accesses.
-    //
-    // ARMv6 may or may not support unaligned accesses depending on the
-    // SCTLR.U bit, which is architecture-specific. We assume ARMv6
-    // Darwin and NetBSD targets support unaligned accesses, and others don't.
-    //
-    // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
-    // which raises an alignment fault on unaligned accesses. Linux
-    // defaults this bit to 0 and handles it as a system-wide (not
-    // per-process) setting. It is therefore safe to assume that ARMv7+
-    // Linux targets support unaligned accesses. The same goes for NaCl.
-    //
-    // The above behavior is consistent with GCC.
-    AllowsUnalignedMem =
-      (hasV7Ops() && (isTargetLinux() || isTargetNaCl() ||
-                      isTargetNetBSD())) ||
-      (hasV6Ops() && (isTargetMachO() || isTargetNetBSD()));
-  } else {
-    AllowsUnalignedMem = !(Align == StrictAlign);
-  }
+  SupportsTailCall = !isThumb1Only();
 
-  // No v6M core supports unaligned memory access (v6M ARM ARM A3.2)
-  if (isV6M())
-    AllowsUnalignedMem = false;
+  if (isTargetMachO() && isTargetIOS() && getTargetTriple().isOSVersionLT(5, 0))
+    SupportsTailCall = false;
 
   switch (IT) {
   case DefaultIT:
@@ -276,8 +260,14 @@ bool ARMSubtarget::isAPCS_ABI() const {
 }
 bool ARMSubtarget::isAAPCS_ABI() const {
   assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
-  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS;
+  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS ||
+         TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
 }
+bool ARMSubtarget::isAAPCS16_ABI() const {
+  assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN);
+  return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+}
+
 
 /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
 bool
@@ -321,11 +311,23 @@ unsigned ARMSubtarget::getMispredictionPenalty() const {
 }
 
 bool ARMSubtarget::hasSinCos() const {
-  return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0);
+  return isTargetWatchOS() ||
+    (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0));
+}
+
+bool ARMSubtarget::enableMachineScheduler() const {
+  // Enable the MachineScheduler before register allocation for out-of-order
+  // architectures where we do not use the PostRA scheduler anymore (for now
+  // restricted to swift).
+  return getSchedModel().isOutOfOrder() && isSwift();
 }
 
 // This overrides the PostRAScheduler bit in the SchedModel for any CPU.
 bool ARMSubtarget::enablePostRAScheduler() const {
+  // No need for PostRA scheduling on out of order CPUs (for now restricted to
+  // swift).
+  if (getSchedModel().isOutOfOrder() && isSwift())
+    return false;
   return (!isThumb() || hasThumb2());
 }
 
@@ -333,15 +335,30 @@ bool ARMSubtarget::enableAtomicExpand() const {
   return hasAnyDataBarrier() && !isThumb1Only();
 }
 
+bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
+  // For general targets, the prologue can grow when VFPs are allocated with
+  // stride 4 (more vpush instructions). But WatchOS uses a compact unwind
+  // format which it's more important to get right.
+  return isTargetWatchOS() || (isSwift() && !MF.getFunction()->optForMinSize());
+}
+
 bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
   // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
   // immediates as it is inherently position independent, and may be out of
   // range otherwise.
-  return UseMovt && (isTargetWindows() ||
-                     !MF.getFunction()->hasFnAttribute(Attribute::MinSize));
+  return !NoMovt && hasV6T2Ops() &&
+         (isTargetWindows() || !MF.getFunction()->optForMinSize());
 }
 
 bool ARMSubtarget::useFastISel() const {
+  // Enable fast-isel for any target, for testing only.
+  if (ForceFastISel)
+    return true;
+
+  // Limit fast-isel to the targets that are or have been tested.
+  if (!hasV6Ops())
+    return false;
+
   // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
   return TM.Options.EnableFastISel &&
          ((isTargetMachO() && !isThumb1Only()) ||
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index dd101df9b63d..a8b28018f1b2 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -43,11 +43,17 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexA17, CortexR4, CortexR4F, CortexR5, Swift, CortexA53, CortexA57, Krait,
+    CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexA35, CortexA53,
+    CortexA57, CortexA72, Krait, Swift
   };
   enum ARMProcClassEnum {
     None, AClass, RClass, MClass
   };
+  enum ARMArchEnum {
+    ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
+    ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
+    ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a
+  };
 
   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
   ARMProcFamilyEnum ARMProcFamily;
@@ -55,6 +61,9 @@ protected:
   /// ARMProcClass - ARM processor class: None, AClass, RClass or MClass.
   ARMProcClassEnum ARMProcClass;
 
+  /// ARMArch - ARM architecture
+  ARMArchEnum ARMArch;
+
   /// HasV4TOps, HasV5TOps, HasV5TEOps,
   /// HasV6Ops, HasV6MOps, HasV6KOps, HasV6T2Ops, HasV7Ops, HasV8Ops -
   /// Specify whether target support specific ARM ISA variants.
@@ -68,6 +77,7 @@ protected:
   bool HasV7Ops;
   bool HasV8Ops;
   bool HasV8_1aOps;
+  bool HasV8_2aOps;
 
   /// HasVFPv2, HasVFPv3, HasVFPv4, HasFPARMv8, HasNEON - Specify what
   /// floating point ISAs are supported.
@@ -109,22 +119,24 @@ protected:
   /// NoARM - True if subtarget does not support ARM mode execution.
   bool NoARM;
 
-  /// IsR9Reserved - True if R9 is a not available as general purpose register.
-  bool IsR9Reserved;
+  /// ReserveR9 - True if R9 is not available as a general purpose register.
+  bool ReserveR9;
 
-  /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
-  /// imms (including global addresses).
-  bool UseMovt;
+  /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of
+  /// 32-bit imms (including global addresses).
+  bool NoMovt;
 
   /// SupportsTailCall - True if the OS supports tail call. The dynamic linker
   /// must be able to synthesize call stubs for interworking between ARM and
   /// Thumb.
   bool SupportsTailCall;
 
-  /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
-  /// only so far)
+  /// HasFP16 - True if subtarget supports half-precision FP conversions
   bool HasFP16;
 
+  /// HasFullFP16 - True if subtarget supports half-precision FP operations
+  bool HasFullFP16;
+
   /// HasD16 - True if subtarget is limited to 16 double precision
   /// FP registers for VFPv3.
   bool HasD16;
@@ -190,18 +202,18 @@ protected:
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing;
 
-  /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+  /// StrictAlign - If true, the subtarget disallows unaligned memory
   /// accesses for some types.  For details, see
   /// ARMTargetLowering::allowsMisalignedMemoryAccesses().
-  bool AllowsUnalignedMem;
+  bool StrictAlign;
 
   /// RestrictIT - If true, the subtarget disallows generation of deprecated IT
   ///  blocks to conform to ARMv8 rule.
   bool RestrictIT;
 
-  /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
-  /// and such) instructions in Thumb2 code.
-  bool Thumb2DSP;
+  /// HasDSP - If true, the subtarget supports the DSP (saturating arith
+  /// and such) instructions.
+  bool HasDSP;
 
   /// NaCl TRAP instruction is generated instead of the regular TRAP.
   bool UseNaClTrap;
@@ -212,6 +224,9 @@ protected:
   /// Target machine allowed unsafe FP math (such as use of NEON fp)
   bool UnsafeFPMath;
 
+  /// UseSjLjEH - If true, the target uses SjLj exception handling (e.g. iOS).
+  bool UseSjLjEH;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -297,6 +312,7 @@ public:
   bool hasV7Ops()   const { return HasV7Ops;  }
   bool hasV8Ops()   const { return HasV8Ops;  }
   bool hasV8_1aOps() const { return HasV8_1aOps; }
+  bool hasV8_2aOps() const { return HasV8_2aOps; }
 
   bool isCortexA5() const { return ARMProcFamily == CortexA5; }
   bool isCortexA7() const { return ARMProcFamily == CortexA7; }
@@ -343,17 +359,20 @@ public:
   bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
   bool hasRAS() const { return HasRAS; }
   bool hasMPExtension() const { return HasMPExtension; }
-  bool hasThumb2DSP() const { return Thumb2DSP; }
+  bool hasDSP() const { return HasDSP; }
   bool useNaClTrap() const { return UseNaClTrap; }
+  bool useSjLjEH() const { return UseSjLjEH; }
   bool genLongCalls() const { return GenLongCalls; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
+  bool hasFullFP16() const { return HasFullFP16; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
   bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
   bool isTargetIOS() const { return TargetTriple.isiOS(); }
+  bool isTargetWatchOS() const { return TargetTriple.isWatchOS(); }
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
   bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); }
@@ -375,6 +394,11 @@ public:
             TargetTriple.getEnvironment() == Triple::EABIHF) &&
            !isTargetDarwin() && !isTargetWindows();
   }
+  bool isTargetGNUAEABI() const {
+    return (TargetTriple.getEnvironment() == Triple::GNUEABI ||
+            TargetTriple.getEnvironment() == Triple::GNUEABIHF) &&
+           !isTargetDarwin() && !isTargetWindows();
+  }
 
   // ARM Targets that support EHABI exception handling standard
   // Darwin uses SjLj. Other targets might need more checks.
@@ -383,7 +407,7 @@ public:
             TargetTriple.getEnvironment() == Triple::GNUEABI ||
             TargetTriple.getEnvironment() == Triple::EABIHF ||
             TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
-            TargetTriple.getEnvironment() == Triple::Android) &&
+            isTargetAndroid()) &&
            !isTargetDarwin() && !isTargetWindows();
   }
 
@@ -391,14 +415,13 @@ public:
     // FIXME: this is invalid for WindowsCE
     return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
            TargetTriple.getEnvironment() == Triple::EABIHF ||
-           isTargetWindows();
-  }
-  bool isTargetAndroid() const {
-    return TargetTriple.getEnvironment() == Triple::Android;
+           isTargetWindows() || isAAPCS16_ABI();
   }
+  bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 
   bool isAPCS_ABI() const;
   bool isAAPCS_ABI() const;
+  bool isAAPCS16_ABI() const;
 
   bool useSoftFloat() const { return UseSoftFloat; }
   bool isThumb() const { return InThumbMode; }
@@ -409,17 +432,17 @@ public:
   bool isRClass() const { return ARMProcClass == RClass; }
   bool isAClass() const { return ARMProcClass == AClass; }
 
-  bool isV6M() const {
-    return isThumb1Only() && isMClass();
+  bool isR9Reserved() const {
+    return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
   }
 
-  bool isR9Reserved() const { return IsR9Reserved; }
+  bool useStride4VFPs(const MachineFunction &MF) const;
 
   bool useMovt(const MachineFunction &MF) const;
 
   bool supportsTailCall() const { return SupportsTailCall; }
 
-  bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+  bool allowsUnalignedMem() const { return !StrictAlign; }
 
   bool restrictIT() const { return RestrictIT; }
 
@@ -433,6 +456,9 @@ public:
   /// compiler runtime or math libraries.
   bool hasSinCos() const;
 
+  /// Returns true if machine scheduler should be enabled.
+  bool enableMachineScheduler() const override;
+
   /// True for some subtargets at > -O0.
   bool enablePostRAScheduler() const override;
 
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 93495d66ae70..fca1901dc57c 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -66,7 +66,9 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 static ARMBaseTargetMachine::ARMABI
 computeTargetABI(const Triple &TT, StringRef CPU,
                  const TargetOptions &Options) {
-  if (Options.MCOptions.getABIName().startswith("aapcs"))
+  if (Options.MCOptions.getABIName() == "aapcs16")
+    return ARMBaseTargetMachine::ARM_ABI_AAPCS16;
+  else if (Options.MCOptions.getABIName().startswith("aapcs"))
     return ARMBaseTargetMachine::ARM_ABI_AAPCS;
   else if (Options.MCOptions.getABIName().startswith("apcs"))
     return ARMBaseTargetMachine::ARM_ABI_APCS;
@@ -83,6 +85,8 @@ computeTargetABI(const Triple &TT, StringRef CPU,
         (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
         CPU.startswith("cortex-m")) {
       TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+    } else if (TT.isWatchOS()) {
+      TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
     } else {
       TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
     }
@@ -106,7 +110,7 @@ computeTargetABI(const Triple &TT, StringRef CPU,
       if (TT.isOSNetBSD())
         TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS;
       else
-	TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
+        TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
       break;
     }
   }
@@ -145,7 +149,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
   // to 64. We always ty to give them natural alignment.
   if (ABI == ARMBaseTargetMachine::ARM_ABI_APCS)
     Ret += "-v64:32:64-v128:32:128";
-  else
+  else if (ABI != ARMBaseTargetMachine::ARM_ABI_AAPCS16)
     Ret += "-v128:64:128";
 
   // Try to align aggregates to 32 bits (the default is 64 bits, which has no
@@ -157,7 +161,7 @@ static std::string computeDataLayout(const Triple &TT, StringRef CPU,
 
   // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit
   // aligned everywhere else.
-  if (TT.isOSNaCl())
+  if (TT.isOSNaCl() || ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16)
     Ret += "-S128";
   else if (ABI == ARMBaseTargetMachine::ARM_ABI_AAPCS)
     Ret += "-S64";
@@ -184,6 +188,15 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
   if (Options.FloatABIType == FloatABI::Default)
     this->Options.FloatABIType =
         Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft;
+
+  // Default to triple-appropriate EABI
+  if (Options.EABIVersion == EABI::Default ||
+      Options.EABIVersion == EABI::Unknown) {
+    if (Subtarget.isTargetGNUAEABI())
+      this->Options.EABIVersion = EABI::GNU;
+    else
+      this->Options.EABIVersion = EABI::EABI5;
+  }
 }
 
 ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
@@ -225,12 +238,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
 }
 
 TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &F) { return TargetTransformInfo(ARMTTIImpl(this, F)); });
+  return TargetIRAnalysis([this](const Function &F) {
+    return TargetTransformInfo(ARMTTIImpl(this, F));
+  });
 }
 
-
-void ARMTargetMachine::anchor() { }
+void ARMTargetMachine::anchor() {}
 
 ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
@@ -244,7 +257,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT,
                        "support ARM mode execution!");
 }
 
-void ARMLETargetMachine::anchor() { }
+void ARMLETargetMachine::anchor() {}
 
 ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
@@ -253,7 +266,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT,
                                        CodeGenOpt::Level OL)
     : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
-void ARMBETargetMachine::anchor() { }
+void ARMBETargetMachine::anchor() {}
 
 ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
@@ -262,7 +275,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT,
                                        CodeGenOpt::Level OL)
     : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
-void ThumbTargetMachine::anchor() { }
+void ThumbTargetMachine::anchor() {}
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
                                        StringRef CPU, StringRef FS,
@@ -273,7 +286,7 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT,
   initAsmInfo();
 }
 
-void ThumbLETargetMachine::anchor() { }
+void ThumbLETargetMachine::anchor() {}
 
 ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
@@ -282,7 +295,7 @@ ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT,
                                            CodeGenOpt::Level OL)
     : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
 
-void ThumbBETargetMachine::anchor() { }
+void ThumbBETargetMachine::anchor() {}
 
 ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
@@ -348,7 +361,13 @@ bool ARMPassConfig::addPreISel() {
     // tricky when doing code gen per function.
     bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
                                (EnableGlobalMerge == cl::BOU_UNSET);
-    addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize));
+    // Merging of extern globals is enabled by default on non-Mach-O as we
+    // expect it to be generally either beneficial or harmless. On Mach-O it
+    // is disabled as we emit the .subsections_via_symbols directive which
+    // means that merging extern globals is not safe.
+    bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
+    addPass(createGlobalMergePass(TM, 127, OnlyOptimizeForSize,
+                                  MergeExternalByDefault));
   }
 
   return false;
@@ -356,9 +375,6 @@ bool ARMPassConfig::addPreISel() {
 
 bool ARMPassConfig::addInstSelector() {
   addPass(createARMISelDag(getARMTargetMachine(), getOptLevel()));
-
-  if (TM->getTargetTriple().isOSBinFormatELF() && TM->Options.EnableFastISel)
-    addPass(createARMGlobalBaseRegPass());
   return false;
 }
 
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 8c98e082ce9a..8ad1f3dc2c34 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -26,7 +26,8 @@ public:
   enum ARMABI {
     ARM_ABI_UNKNOWN,
     ARM_ABI_APCS,
-    ARM_ABI_AAPCS // ARM EABI
+    ARM_ABI_AAPCS, // ARM EABI
+    ARM_ABI_AAPCS16
   } TargetABI;
 
 protected:
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 2f194cf7ae06..c1520119ef21 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "armtti"
 
-unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned Bits = Ty->getPrimitiveSizeInBits();
@@ -47,12 +47,12 @@ unsigned ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   return 3;
 }
 
-unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
   // Single to/from double precision conversions.
-  static const CostTblEntry<MVT::SimpleValueType> NEONFltDblTbl[] = {
+  static const CostTblEntry NEONFltDblTbl[] = {
     // Vector fptrunc/fpext conversions.
     { ISD::FP_ROUND,   MVT::v2f64, 2 },
     { ISD::FP_EXTEND,  MVT::v2f32, 2 },
@@ -61,10 +61,9 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
 
   if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
                                           ISD == ISD::FP_EXTEND)) {
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
-    int Idx = CostTableLookup(NEONFltDblTbl, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * NEONFltDblTbl[Idx].Cost;
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+    if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
+      return LT.first * Entry->Cost;
   }
 
   EVT SrcTy = TLI->getValueType(DL, Src);
@@ -76,8 +75,7 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   // Some arithmetic, load and store operations have specific instructions
   // to cast up/down their types automatically at no extra cost.
   // TODO: Get these tables to know at least what the related operations are.
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  NEONVectorConversionTbl[] = {
+  static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
     { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
     { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
@@ -153,15 +151,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   };
 
   if (SrcTy.isVector() && ST->hasNEON()) {
-    int Idx = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
-                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return NEONVectorConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   // Scalar float to integer conversions.
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  NEONFloatConversionTbl[] = {
+  static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
     { ISD::FP_TO_SINT,  MVT::i1, MVT::f32, 2 },
     { ISD::FP_TO_UINT,  MVT::i1, MVT::f32, 2 },
     { ISD::FP_TO_SINT,  MVT::i1, MVT::f64, 2 },
@@ -184,15 +181,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::FP_TO_UINT,  MVT::i64, MVT::f64, 10 }
   };
   if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
-    int Idx = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
-                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-        return NEONFloatConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   // Scalar integer to float conversions.
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  NEONIntegerConversionTbl[] = {
+  static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
     { ISD::SINT_TO_FP,  MVT::f32, MVT::i1, 2 },
     { ISD::UINT_TO_FP,  MVT::f32, MVT::i1, 2 },
     { ISD::SINT_TO_FP,  MVT::f64, MVT::i1, 2 },
@@ -216,15 +212,14 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   };
 
   if (SrcTy.isInteger() && ST->hasNEON()) {
-    int Idx = ConvertCostTableLookup(NEONIntegerConversionTbl, ISD,
-                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return NEONIntegerConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
+                                                   ISD, DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   // Scalar integer conversion costs.
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  ARMIntegerConversionTbl[] = {
+  static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
     // i16 -> i64 requires two dependent operations.
     { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
 
@@ -236,17 +231,17 @@ unsigned ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   };
 
   if (SrcTy.isInteger()) {
-    int Idx = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
-                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return ARMIntegerConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
-                                        unsigned Index) {
+int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
+                                   unsigned Index) {
   // Penalize inserting into an D-subregister. We end up with a three times
   // lower estimated throughput on swift.
   if (ST->isSwift() &&
@@ -255,28 +250,30 @@ unsigned ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
       ValTy->getScalarSizeInBits() <= 32)
     return 3;
 
-  // Cross-class copies are expensive on many microarchitectures,
-  // so assume they are expensive by default.
   if ((Opcode == Instruction::InsertElement ||
-       Opcode == Instruction::ExtractElement) &&
-      ValTy->getVectorElementType()->isIntegerTy())
-    return 3;
+       Opcode == Instruction::ExtractElement)) {
+    // Cross-class copies are expensive on many microarchitectures,
+    // so assume they are expensive by default.
+    if (ValTy->getVectorElementType()->isIntegerTy())
+      return 3;
+
+    // Even if it's not a cross class copy, this likely leads to mixing
+    // of NEON and VFP code and should be therefore penalized.
+    if (ValTy->isVectorTy() &&
+        ValTy->getScalarSizeInBits() <= 32)
+      return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
+  }
 
   return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
 }
 
-unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                        Type *CondTy) {
+int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   // On NEON a a vector select gets lowered to vbsl.
   if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
     // Lowering of some vector selects is currently far from perfect.
-    static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-    NEONVectorSelectTbl[] = {
-      { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 },
-      { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 },
-      { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 },
+    static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
       { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
@@ -285,21 +282,20 @@ unsigned ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     EVT SelCondTy = TLI->getValueType(DL, CondTy);
     EVT SelValTy = TLI->getValueType(DL, ValTy);
     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
-      int Idx = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
-                                       SelCondTy.getSimpleVT(),
-                                       SelValTy.getSimpleVT());
-      if (Idx != -1)
-        return NEONVectorSelectTbl[Idx].Cost;
+      if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
+                                                     SelCondTy.getSimpleVT(),
+                                                     SelValTy.getSimpleVT()))
+        return Entry->Cost;
     }
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
     return LT.first;
   }
 
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -314,7 +310,7 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   return 1;
 }
 
-unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+int ARMTTIImpl::getFPOpCost(Type *Ty) {
   // Use similar logic that's in ARMISelLowering:
   // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
   // to VFP.
@@ -333,14 +329,14 @@ unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
   return TargetTransformInfo::TCC_Expensive;
 }
 
-unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                                    Type *SubTp) {
+int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                               Type *SubTp) {
   // We only handle costs of reverse and alternate shuffles for now.
   if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
   if (Kind == TTI::SK_Reverse) {
-    static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = {
+    static const CostTblEntry NEONShuffleTbl[] = {
         // Reverse shuffle cost one instruction if we are shuffling within a
         // double word (vrev) or two if we shuffle a quad word (vrev, vext).
         {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
@@ -353,16 +349,16 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
-    int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-    if (Idx == -1)
-      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+    if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
+                                            LT.second))
+      return LT.first * Entry->Cost;
 
-    return LT.first * NEONShuffleTbl[Idx].Cost;
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
   }
   if (Kind == TTI::SK_Alternate) {
-    static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = {
+    static const CostTblEntry NEONAltShuffleTbl[] = {
         // Alt shuffle cost table for ARM. Cost is the number of instructions
         // required to create the shuffled vector.
 
@@ -379,27 +375,26 @@ unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
 
         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
 
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-    int Idx =
-        CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-    if (Idx == -1)
-      return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
-    return LT.first * NEONAltShuffleTbl[Idx].Cost;
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+                                            ISD::VECTOR_SHUFFLE, LT.second))
+      return LT.first * Entry->Cost;
+    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
   }
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned ARMTTIImpl::getArithmeticInstrCost(
+int ARMTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
 
   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   const unsigned FunctionCallDivCost = 20;
   const unsigned ReciprocalDivCost = 10;
-  static const CostTblEntry<MVT::SimpleValueType> CostTbl[] = {
+  static const CostTblEntry CostTbl[] = {
     // Division.
     // These costs are somewhat random. Choose a cost of 20 to indicate that
     // vectorizing devision (added function call) is going to be very expensive.
@@ -440,16 +435,12 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
     // Multiplication.
   };
 
-  int Idx = -1;
-
   if (ST->hasNEON())
-    Idx = CostTableLookup(CostTbl, ISDOpcode, LT.second);
+    if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
+      return LT.first * Entry->Cost;
 
-  if (Idx != -1)
-    return LT.first * CostTbl[Idx].Cost;
-
-  unsigned Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
-                                                Opd1PropInfo, Opd2PropInfo);
+  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
+                                           Opd1PropInfo, Opd2PropInfo);
 
   // This is somewhat of a hack. The problem that we are facing is that SROA
   // creates a sequence of shift, and, or instructions to construct values.
@@ -465,10 +456,9 @@ unsigned ARMTTIImpl::getArithmeticInstrCost(
   return Cost;
 }
 
-unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                     unsigned Alignment,
-                                     unsigned AddressSpace) {
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                unsigned AddressSpace) {
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
 
   if (Src->isVectorTy() && Alignment != 16 &&
       Src->getVectorElementType()->isDoubleTy()) {
@@ -479,21 +469,21 @@ unsigned ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   return LT.first;
 }
 
-unsigned ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                                unsigned Factor,
-                                                ArrayRef<unsigned> Indices,
-                                                unsigned Alignment,
-                                                unsigned AddressSpace) {
+int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                           unsigned Factor,
+                                           ArrayRef<unsigned> Indices,
+                                           unsigned Alignment,
+                                           unsigned AddressSpace) {
   assert(Factor >= 2 && "Invalid interleave factor");
   assert(isa<VectorType>(VecTy) && "Expect a vector type");
 
   // vldN/vstN doesn't support vector types of i64/f64 element.
-  bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64;
+  bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
 
   if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
     unsigned NumElts = VecTy->getVectorNumElements();
     Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
-    unsigned SubVecSize = DL.getTypeAllocSize(SubVecTy);
+    unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
 
     // vldN/vstN only support legal vector types of size 64 or 128 in bits.
     if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128))
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h
index 84f256f73722..7d8d2381c983 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -41,7 +41,7 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   const ARMTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, Function &F)
+  explicit ARMTTIImpl(const ARMBaseTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -52,11 +52,13 @@ public:
       : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
         TLI(std::move(Arg.TLI)) {}
 
+  bool enableInterleavedAccessVectorization() { return true; }
+
   /// \name Scalar TTI Implementations
   /// @{
 
   using BaseT::getIntImmCost;
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty);
 
   /// @}
 
@@ -92,34 +94,31 @@ public:
     return 1;
   }
 
-  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                          Type *SubTp);
+  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
 
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
 
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
 
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
 
-  unsigned getAddressComputationCost(Type *Val, bool IsComplex);
+  int getAddressComputationCost(Type *Val, bool IsComplex);
 
-  unsigned getFPOpCost(Type *Ty);
+  int getFPOpCost(Type *Ty);
 
-  unsigned getArithmeticInstrCost(
+  int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Op2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
 
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace);
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace);
 
-  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
-                                      unsigned Factor,
-                                      ArrayRef<unsigned> Indices,
-                                      unsigned Alignment,
-                                      unsigned AddressSpace);
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
+                                 ArrayRef<unsigned> Indices, unsigned Alignment,
+                                 unsigned AddressSpace);
   /// @}
 };
 
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index cf6b8929f311..c69a741244cf 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -129,7 +129,6 @@ public:
 };
 
 class ARMAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
   const MCInstrInfo &MII;
   const MCRegisterInfo *MRI;
   UnwindContext UC;
@@ -247,48 +246,49 @@ class ARMAsmParser : public MCTargetAsmParser {
                                      OperandVector &Operands);
   bool isThumb() const {
     // FIXME: Can tablegen auto-generate this?
-    return STI.getFeatureBits()[ARM::ModeThumb];
+    return getSTI().getFeatureBits()[ARM::ModeThumb];
   }
   bool isThumbOne() const {
-    return isThumb() && !STI.getFeatureBits()[ARM::FeatureThumb2];
+    return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
   }
   bool isThumbTwo() const {
-    return isThumb() && STI.getFeatureBits()[ARM::FeatureThumb2];
+    return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
   }
   bool hasThumb() const {
-    return STI.getFeatureBits()[ARM::HasV4TOps];
+    return getSTI().getFeatureBits()[ARM::HasV4TOps];
   }
   bool hasV6Ops() const {
-    return STI.getFeatureBits()[ARM::HasV6Ops];
+    return getSTI().getFeatureBits()[ARM::HasV6Ops];
   }
   bool hasV6MOps() const {
-    return STI.getFeatureBits()[ARM::HasV6MOps];
+    return getSTI().getFeatureBits()[ARM::HasV6MOps];
   }
   bool hasV7Ops() const {
-    return STI.getFeatureBits()[ARM::HasV7Ops];
+    return getSTI().getFeatureBits()[ARM::HasV7Ops];
   }
   bool hasV8Ops() const {
-    return STI.getFeatureBits()[ARM::HasV8Ops];
+    return getSTI().getFeatureBits()[ARM::HasV8Ops];
   }
   bool hasARM() const {
-    return !STI.getFeatureBits()[ARM::FeatureNoARM];
+    return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
   }
-  bool hasThumb2DSP() const {
-    return STI.getFeatureBits()[ARM::FeatureDSPThumb2];
+  bool hasDSP() const {
+    return getSTI().getFeatureBits()[ARM::FeatureDSP];
   }
   bool hasD16() const {
-    return STI.getFeatureBits()[ARM::FeatureD16];
+    return getSTI().getFeatureBits()[ARM::FeatureD16];
   }
   bool hasV8_1aOps() const {
-    return STI.getFeatureBits()[ARM::HasV8_1aOps];
+    return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
   }
 
   void SwitchMode() {
+    MCSubtargetInfo &STI = copySTI();
     uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
     setAvailableFeatures(FB);
   }
   bool isMClass() const {
-    return STI.getFeatureBits()[ARM::FeatureMClass];
+    return getSTI().getFeatureBits()[ARM::FeatureMClass];
   }
 
   /// @name Auto-generated Match Functions
@@ -343,14 +343,15 @@ public:
     Match_RequiresNotITBlock,
     Match_RequiresV6,
     Match_RequiresThumb2,
+    Match_RequiresV8,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "ARMGenAsmMatcher.inc"
 
   };
 
-  ARMAsmParser(MCSubtargetInfo &STI, MCAsmParser &Parser,
+  ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : STI(STI), MII(MII), UC(Parser) {
+    : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) {
     MCAsmParserExtension::Initialize(Parser);
 
     // Cache the MCRegisterInfo.
@@ -564,87 +565,6 @@ class ARMOperand : public MCParsedAsmOperand {
 
 public:
   ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
-  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
-    Kind = o.Kind;
-    StartLoc = o.StartLoc;
-    EndLoc = o.EndLoc;
-    switch (Kind) {
-    case k_CondCode:
-      CC = o.CC;
-      break;
-    case k_ITCondMask:
-      ITMask = o.ITMask;
-      break;
-    case k_Token:
-      Tok = o.Tok;
-      break;
-    case k_CCOut:
-    case k_Register:
-      Reg = o.Reg;
-      break;
-    case k_RegisterList:
-    case k_DPRRegisterList:
-    case k_SPRRegisterList:
-      Registers = o.Registers;
-      break;
-    case k_VectorList:
-    case k_VectorListAllLanes:
-    case k_VectorListIndexed:
-      VectorList = o.VectorList;
-      break;
-    case k_CoprocNum:
-    case k_CoprocReg:
-      Cop = o.Cop;
-      break;
-    case k_CoprocOption:
-      CoprocOption = o.CoprocOption;
-      break;
-    case k_Immediate:
-      Imm = o.Imm;
-      break;
-    case k_MemBarrierOpt:
-      MBOpt = o.MBOpt;
-      break;
-    case k_InstSyncBarrierOpt:
-      ISBOpt = o.ISBOpt;
-    case k_Memory:
-      Memory = o.Memory;
-      break;
-    case k_PostIndexRegister:
-      PostIdxReg = o.PostIdxReg;
-      break;
-    case k_MSRMask:
-      MMask = o.MMask;
-      break;
-    case k_BankedReg:
-      BankedReg = o.BankedReg;
-      break;
-    case k_ProcIFlags:
-      IFlags = o.IFlags;
-      break;
-    case k_ShifterImmediate:
-      ShifterImm = o.ShifterImm;
-      break;
-    case k_ShiftedRegister:
-      RegShiftedReg = o.RegShiftedReg;
-      break;
-    case k_ShiftedImmediate:
-      RegShiftedImm = o.RegShiftedImm;
-      break;
-    case k_RotateImmediate:
-      RotImm = o.RotImm;
-      break;
-    case k_ModifiedImmediate:
-      ModImm = o.ModImm;
-      break;
-    case k_BitfieldDescriptor:
-      Bitfield = o.Bitfield;
-      break;
-    case k_VectorIndex:
-      VectorIndex = o.VectorIndex;
-      break;
-    }
-  }
 
   /// getStartLoc - Get the location of the first token of this operand.
   SMLoc getStartLoc() const override { return StartLoc; }
@@ -4054,7 +3974,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
     if (FlagsVal == ~0U)
       return MatchOperand_NoMatch;
 
-    if (!hasThumb2DSP() && (FlagsVal & 0x400))
+    if (!hasDSP() && (FlagsVal & 0x400))
       // The _g and _nzcvqg versions are only valid if the DSP extension is
       // available.
       return MatchOperand_NoMatch;
@@ -5202,6 +5122,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     // FALLTHROUGH
   }
   case AsmToken::Colon: {
+    S = Parser.getTok().getLoc();
     // ":lower16:" and ":upper16:" expression prefixes
     // FIXME: Check it's an expression prefix,
     // e.g. (FOO - :lower16:BAR) isn't legal.
@@ -5220,8 +5141,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     return false;
   }
   case AsmToken::Equal: {
+    S = Parser.getTok().getLoc();
     if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
-      return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+      return Error(S, "unexpected token in operand");
 
     Parser.Lex(); // Eat '='
     const MCExpr *SubExprVal;
@@ -5229,7 +5151,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       return true;
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
 
-    const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal);
+    const MCExpr *CPLoc =
+        getTargetStreamer().addConstantPoolEntry(SubExprVal, S);
     Operands.push_back(ARMOperand::CreateImm(CPLoc, S, E));
     return false;
   }
@@ -5682,9 +5605,11 @@ bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic,
   // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON
   unsigned RegIdx = 3;
   if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") &&
-      static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") {
+      (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" ||
+       static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) {
     if (static_cast<ARMOperand &>(*Operands[3]).isToken() &&
-        static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32")
+        (static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32" ||
+         static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f16"))
       RegIdx = 4;
 
     if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() &&
@@ -8610,18 +8535,29 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     if (isThumbTwo() && Inst.getOperand(OpNo).getReg() == ARM::CPSR &&
         inITBlock())
       return Match_RequiresNotITBlock;
+  } else if (isThumbOne()) {
+    // Some high-register supporting Thumb1 encodings only allow both registers
+    // to be from r0-r7 when in Thumb2.
+    if (Opc == ARM::tADDhirr && !hasV6MOps() &&
+        isARMLowRegister(Inst.getOperand(1).getReg()) &&
+        isARMLowRegister(Inst.getOperand(2).getReg()))
+      return Match_RequiresThumb2;
+    // Others only require ARMv6 or later.
+    else if (Opc == ARM::tMOVr && !hasV6Ops() &&
+             isARMLowRegister(Inst.getOperand(0).getReg()) &&
+             isARMLowRegister(Inst.getOperand(1).getReg()))
+      return Match_RequiresV6;
   }
-  // Some high-register supporting Thumb1 encodings only allow both registers
-  // to be from r0-r7 when in Thumb2.
-  else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() &&
-           isARMLowRegister(Inst.getOperand(1).getReg()) &&
-           isARMLowRegister(Inst.getOperand(2).getReg()))
-    return Match_RequiresThumb2;
-  // Others only require ARMv6 or later.
-  else if (Opc == ARM::tMOVr && isThumbOne() && !hasV6Ops() &&
-           isARMLowRegister(Inst.getOperand(0).getReg()) &&
-           isARMLowRegister(Inst.getOperand(1).getReg()))
-    return Match_RequiresV6;
+
+  for (unsigned I = 0; I < MCID.NumOperands; ++I)
+    if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) {
+      // rGPRRegClass excludes PC, and also excluded SP before ARMv8
+      if ((Inst.getOperand(I).getReg() == ARM::SP) && !hasV8Ops())
+        return Match_RequiresV8;
+      else if (Inst.getOperand(I).getReg() == ARM::PC)
+        return Match_InvalidOperand;
+    }
+
   return Match_Success;
 }
 
@@ -8680,7 +8616,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       return false;
 
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
+    Out.EmitInstruction(Inst, getSTI());
     return false;
   case Match_MissingFeature: {
     assert(ErrorInfo && "Unknown missing feature!");
@@ -8720,6 +8656,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "instruction variant requires ARMv6 or later");
   case Match_RequiresThumb2:
     return Error(IDLoc, "instruction variant requires Thumb2");
+  case Match_RequiresV8:
+    return Error(IDLoc, "instruction variant requires ARMv8 or later");
   case Match_ImmRange0_15: {
     SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8868,7 +8806,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
         return false;
       }
 
-      getParser().getStreamer().EmitValue(Value, Size);
+      getParser().getStreamer().EmitValue(Value, Size, L);
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
@@ -9098,7 +9036,7 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
 bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
   StringRef Arch = getParser().parseStringToEndOfStatement().trim();
 
-  unsigned ID = ARMTargetParser::parseArch(Arch);
+  unsigned ID = ARM::parseArch(Arch);
 
   if (ID == ARM::AK_INVALID) {
     Error(L, "Unknown arch name");
@@ -9106,7 +9044,8 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
   }
 
   Triple T;
-  STI.setDefaultFeatures(T.getARMCPUForArch(Arch));
+  MCSubtargetInfo &STI = copySTI();
+  STI.setDefaultFeatures("", ("+" + ARM::getArchName(ID)).str());
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   getTargetStreamer().emitArch(ID);
@@ -9233,12 +9172,13 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
 
   // FIXME: This is using table-gen data, but should be moved to
   // ARMTargetParser once that is table-gen'd.
-  if (!STI.isCPUStringValid(CPU)) {
+  if (!getSTI().isCPUStringValid(CPU)) {
     Error(L, "Unknown CPU name");
     return false;
   }
 
-  STI.setDefaultFeatures(CPU);
+  MCSubtargetInfo &STI = copySTI();
+  STI.setDefaultFeatures(CPU, "");
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
 
   return false;
@@ -9249,13 +9189,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
   SMLoc FPUNameLoc = getTok().getLoc();
   StringRef FPU = getParser().parseStringToEndOfStatement().trim();
 
-  unsigned ID = ARMTargetParser::parseFPU(FPU);
+  unsigned ID = ARM::parseFPU(FPU);
   std::vector<const char *> Features;
-  if (!ARMTargetParser::getFPUFeatures(ID, Features)) {
+  if (!ARM::getFPUFeatures(ID, Features)) {
     Error(FPUNameLoc, "Unknown FPU name");
     return false;
   }
 
+  MCSubtargetInfo &STI = copySTI();
   for (auto Feature : Features)
     STI.ApplyFeatureFlag(Feature);
   setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -9895,7 +9836,7 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
   SMLoc ArchLoc = Parser.getTok().getLoc();
   getLexer().Lex();
 
-  unsigned ID = ARMTargetParser::parseArch(Arch);
+  unsigned ID = ARM::parseArch(Arch);
 
   if (ID == ARM::AK_INVALID) {
     Error(ArchLoc, "unknown architecture '" + Arch + "'");
@@ -9976,22 +9917,22 @@ extern "C" void LLVMInitializeARMAsmParser() {
 // when we start to table-generate them, and we can use the ARM
 // flags below, that were generated by table-gen.
 static const struct {
-  const ARM::ArchExtKind Kind;
-  const unsigned ArchCheck;
+  const unsigned Kind;
+  const uint64_t ArchCheck;
   const FeatureBitset Features;
 } Extensions[] = {
   { ARM::AEK_CRC, Feature_HasV8, {ARM::FeatureCRC} },
   { ARM::AEK_CRYPTO,  Feature_HasV8,
     {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8} },
   { ARM::AEK_FP, Feature_HasV8, {ARM::FeatureFPARMv8} },
-  { ARM::AEK_HWDIV, Feature_HasV7 | Feature_IsNotMClass,
+  { (ARM::AEK_HWDIV | ARM::AEK_HWDIVARM), Feature_HasV7 | Feature_IsNotMClass,
     {ARM::FeatureHWDiv, ARM::FeatureHWDivARM} },
   { ARM::AEK_MP, Feature_HasV7 | Feature_IsNotMClass, {ARM::FeatureMP} },
   { ARM::AEK_SIMD, Feature_HasV8, {ARM::FeatureNEON, ARM::FeatureFPARMv8} },
-  // FIXME: Also available in ARMv6-K
-  { ARM::AEK_SEC, Feature_HasV7, {ARM::FeatureTrustZone} },
+  { ARM::AEK_SEC, Feature_HasV6K, {ARM::FeatureTrustZone} },
   // FIXME: Only available in A-class, isel not predicated
   { ARM::AEK_VIRT, Feature_HasV7, {ARM::FeatureVirtualization} },
+  { ARM::AEK_FP16, Feature_HasV8_2a, {ARM::FeatureFPARMv8, ARM::FeatureFullFP16} },
   // FIXME: Unsupported extensions.
   { ARM::AEK_OS, Feature_None, {} },
   { ARM::AEK_IWMMXT, Feature_None, {} },
@@ -10020,7 +9961,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     EnableFeature = false;
     Name = Name.substr(2);
   }
-  unsigned FeatureKind = ARMTargetParser::parseArchExt(Name);
+  unsigned FeatureKind = ARM::parseArchExt(Name);
   if (FeatureKind == ARM::AEK_INVALID)
     Error(ExtLoc, "unknown architectural extension: " + Name);
 
@@ -10037,6 +9978,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
       return false;
     }
 
+    MCSubtargetInfo &STI = copySTI();
     FeatureBitset ToggleFeatures = EnableFeature
       ? (~STI.getFeatureBits() & Extension.Features)
       : ( STI.getFeatureBits() & Extension.Features);
@@ -10078,6 +10020,10 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
              "expression value must be representable in 32 bits");
     }
     break;
+  case MCK_rGPR:
+    if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP)
+      return Match_Success;
+    break;
   case MCK_GPRPair:
     if (Op.isReg() &&
         MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg()))
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 097ec04e7052..e63defed2288 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -59,7 +59,7 @@ namespace {
       }
 
       // Called when decoding an IT instruction. Sets the IT state for the following
-      // instructions that for the IT block. Firstcond and Mask correspond to the 
+      // instructions that for the IT block. Firstcond and Mask correspond to the
       // fields in the IT instruction encoding.
       void setITState(char Firstcond, char Mask) {
         // (3 - the number of trailing zeros) is the number of then / else.
@@ -459,21 +459,18 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   // VFP and NEON instructions, similarly, are shared between ARM
   // and Thumb modes.
-  MI.clear();
   Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
   }
 
-  MI.clear();
   Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
     Size = 4;
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -485,7 +482,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address,
                              this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -497,7 +493,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -509,7 +504,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -517,7 +511,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -525,7 +518,6 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Size = 0;
   return MCDisassembler::Fail;
 }
@@ -718,7 +710,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this,
                              STI);
   if (Result) {
@@ -729,7 +720,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -763,7 +753,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   uint32_t Insn32 =
       (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16);
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -774,7 +763,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     return Result;
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -784,7 +772,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
-    MI.clear();
     Result =
         decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI);
     if (Result != MCDisassembler::Fail) {
@@ -794,7 +781,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
-  MI.clear();
   Result =
       decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI);
   if (Result != MCDisassembler::Fail) {
@@ -803,7 +789,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   if (fieldFromInstruction(Insn32, 28, 4) == 0xE) {
-    MI.clear();
     Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this,
                                STI);
     if (Result != MCDisassembler::Fail) {
@@ -814,7 +799,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) {
-    MI.clear();
     uint32_t NEONLdStInsn = Insn32;
     NEONLdStInsn &= 0xF0FFFFFF;
     NEONLdStInsn |= 0x04000000;
@@ -828,7 +812,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   }
 
   if (fieldFromInstruction(Insn32, 24, 4) == 0xF) {
-    MI.clear();
     uint32_t NEONDataInsn = Insn32;
     NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -841,7 +824,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       return Result;
     }
 
-    MI.clear();
     uint32_t NEONCryptoInsn = Insn32;
     NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24
     NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24
@@ -853,7 +835,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       return Result;
     }
 
-    MI.clear();
     uint32_t NEONv8Insn = Insn32;
     NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26
     Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address,
@@ -864,7 +845,6 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     }
   }
 
-  MI.clear();
   Size = 0;
   return MCDisassembler::Fail;
 }
@@ -902,7 +882,7 @@ static DecodeStatus
 DecodeGPRnopcRegisterClass(MCInst &Inst, unsigned RegNo,
                            uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  
+
   if (RegNo == 15) 
     S = MCDisassembler::SoftFail;
 
@@ -986,8 +966,13 @@ static DecodeStatus DecodetcGPRRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecoderGPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder) {
   DecodeStatus S = MCDisassembler::Success;
-  if (RegNo == 13 || RegNo == 15)
+
+  const FeatureBitset &featureBits =
+    ((const MCDisassembler*)Decoder)->getSubtargetInfo().getFeatureBits();
+
+  if ((RegNo == 13 && !featureBits[ARM::HasV8Ops]) || RegNo == 15)
     S = MCDisassembler::SoftFail;
+
   Check(S, DecodeGPRRegisterClass(Inst, RegNo, Address, Decoder));
   return S;
 }
@@ -1147,7 +1132,7 @@ static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val,
   unsigned imm = fieldFromInstruction(Val, 7, 5);
 
   // Register-immediate
-  if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder)))
+  if (!Check(S, DecoderGPRRegisterClass(Inst, Rm, Address, Decoder)))
     return MCDisassembler::Fail;
 
   ARM_AM::ShiftOpc Shift = ARM_AM::lsl;
@@ -1658,7 +1643,7 @@ DecodeAddrMode3Instruction(MCInst &Inst, unsigned Insn,
     case ARM::STRD_POST:
       if (P == 0 && W == 1)
         S = MCDisassembler::SoftFail;
-      
+
       if (writeback && (Rn == 15 || Rn == Rt || Rn == Rt2))
         S = MCDisassembler::SoftFail;
       if (type && Rm == 15)
@@ -4131,7 +4116,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
         // indicates the move for the GE{3:0} bits, the mask{0} bit can be set
         // only if the processor includes the DSP extension.
         if (Mask == 0 || (Mask != 2 && ValLow > 3) ||
-            (!(FeatureBits[ARM::FeatureDSPThumb2]) && (Mask & 1)))
+            (!(FeatureBits[ARM::FeatureDSP]) && (Mask & 1)))
           S = MCDisassembler::SoftFail;
       }
     }
@@ -5065,6 +5050,10 @@ static DecodeStatus DecodeSwap(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
+  const FeatureBitset &featureBits =
+      ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+  bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
   unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
   Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
   unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5075,10 +5064,35 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  // VMOVv2f32 is ambiguous with these decodings.
-  if (!(imm & 0x38) && cmode == 0xF) {
-    if (op == 1) return MCDisassembler::Fail;
-    Inst.setOpcode(ARM::VMOVv2f32);
+  // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+  if (!(imm & 0x38)) {
+    if (cmode == 0xF) {
+      if (op == 1) return MCDisassembler::Fail;
+      Inst.setOpcode(ARM::VMOVv2f32);
+    }
+    if (hasFullFP16) {
+      if (cmode == 0xE) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMOVv1i64);
+        } else {
+          Inst.setOpcode(ARM::VMOVv8i8);
+        }
+      }
+      if (cmode == 0xD) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv2i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv2i32);
+        }
+      }
+      if (cmode == 0xC) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv2i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv2i32);
+        }
+      }
+    }
     return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
   }
 
@@ -5095,6 +5109,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn,
 
 static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
                                 uint64_t Address, const void *Decoder) {
+  const FeatureBitset &featureBits =
+      ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits();
+  bool hasFullFP16 = featureBits[ARM::FeatureFullFP16];
+
   unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0);
   Vd |= (fieldFromInstruction(Insn, 22, 1) << 4);
   unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0);
@@ -5105,10 +5123,35 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn,
 
   DecodeStatus S = MCDisassembler::Success;
 
-  // VMOVv4f32 is ambiguous with these decodings.
-  if (!(imm & 0x38) && cmode == 0xF) {
-    if (op == 1) return MCDisassembler::Fail;
-    Inst.setOpcode(ARM::VMOVv4f32);
+  // If the top 3 bits of imm are clear, this is a VMOV (immediate)
+  if (!(imm & 0x38)) {
+    if (cmode == 0xF) {
+      if (op == 1) return MCDisassembler::Fail;
+      Inst.setOpcode(ARM::VMOVv4f32);
+    }
+    if (hasFullFP16) {
+      if (cmode == 0xE) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMOVv2i64);
+        } else {
+          Inst.setOpcode(ARM::VMOVv16i8);
+        }
+      }
+      if (cmode == 0xD) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv4i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv4i32);
+        }
+      }
+      if (cmode == 0xC) {
+        if (op == 1) {
+          Inst.setOpcode(ARM::VMVNv4i32);
+        } else {
+          Inst.setOpcode(ARM::VMOVv4i32);
+        }
+      }
+    }
     return DecodeNEONModImmInstruction(Inst, Insn, Address, Decoder);
   }
 
@@ -5132,7 +5175,7 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val,
   unsigned Rm = fieldFromInstruction(Val, 0, 4);
   Rm |= (fieldFromInstruction(Val, 23, 1) << 4);
   unsigned Cond = fieldFromInstruction(Val, 28, 4);
- 
+
   if (fieldFromInstruction(Val, 8, 4) != 0 || Rn == Rt)
     S = MCDisassembler::SoftFail;
 
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 0bff52141da5..33fc85af9b19 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -804,7 +805,7 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
     unsigned Opcode = MI->getOpcode();
 
     // For writes, handle extended mask bits if the DSP extension is present.
-    if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSPThumb2]) {
+    if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) {
       switch (SYSm) {
       case 0x400:
         O << "apsr_g";
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index 3927c9f8bfd3..52f7115f0558 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -15,12 +15,9 @@
 #define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
 
-class MCOperand;
-
 class ARMInstPrinter : public MCInstPrinter {
 public:
   ARMInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 111463588565..fa52c9354c17 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -25,13 +25,17 @@
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -180,9 +184,8 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
   return false;
 }
 
-bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
-                                         const MCRelaxableFragment *DF,
-                                         const MCAsmLayout &Layout) const {
+const char *ARMAsmBackend::reasonForFixupRelaxation(const MCFixup &Fixup,
+                                                    uint64_t Value) const {
   switch ((unsigned)Fixup.getKind()) {
   case ARM::fixup_arm_thumb_br: {
     // Relaxing tB to t2B. tB has a signed 12-bit displacement with the
@@ -192,7 +195,9 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
     //
     // Relax if the value is too big for a (signed) i8.
     int64_t Offset = int64_t(Value) - 4;
-    return Offset > 2046 || Offset < -2048;
+    if (Offset > 2046 || Offset < -2048)
+      return "out of range pc-relative fixup value";
+    break;
   }
   case ARM::fixup_arm_thumb_bcc: {
     // Relaxing tBcc to t2Bcc. tBcc has a signed 9-bit displacement with the
@@ -202,23 +207,40 @@ bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
     //
     // Relax if the value is too big for a (signed) i8.
     int64_t Offset = int64_t(Value) - 4;
-    return Offset > 254 || Offset < -256;
+    if (Offset > 254 || Offset < -256)
+      return "out of range pc-relative fixup value";
+    break;
   }
   case ARM::fixup_thumb_adr_pcrel_10:
   case ARM::fixup_arm_thumb_cp: {
     // If the immediate is negative, greater than 1020, or not a multiple
     // of four, the wide version of the instruction must be used.
     int64_t Offset = int64_t(Value) - 4;
-    return Offset > 1020 || Offset < 0 || Offset & 3;
+    if (Offset & 3)
+      return "misaligned pc-relative fixup value";
+    else if (Offset > 1020 || Offset < 0)
+      return "out of range pc-relative fixup value";
+    break;
   }
-  case ARM::fixup_arm_thumb_cb:
+  case ARM::fixup_arm_thumb_cb: {
     // If we have a Thumb CBZ or CBNZ instruction and its target is the next
     // instruction it is is actually out of range for the instruction.
     // It will be changed to a NOP.
     int64_t Offset = (Value & ~1);
-    return Offset == 2;
+    if (Offset == 2)
+      return "will be converted to nop";
+    break;
   }
-  llvm_unreachable("Unexpected fixup kind in fixupNeedsRelaxation()!");
+  default:
+    llvm_unreachable("Unexpected fixup kind in reasonForFixupRelaxation()!");
+  }
+  return nullptr;
+}
+
+bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                                         const MCRelaxableFragment *DF,
+                                         const MCAsmLayout &Layout) const {
+  return reasonForFixupRelaxation(Fixup, Value);
 }
 
 void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
@@ -317,9 +339,10 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
   return Value;
 }
 
-static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
-                                 bool IsPCRel, MCContext *Ctx,
-                                 bool IsLittleEndian) {
+unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+                                         bool IsPCRel, MCContext *Ctx,
+                                         bool IsLittleEndian,
+                                         bool IsResolved) const {
   unsigned Kind = Fixup.getKind();
   switch (Kind) {
   default:
@@ -372,8 +395,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       Value = -Value;
       isAdd = false;
     }
-    if (Ctx && Value >= 4096)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    if (Ctx && Value >= 4096) {
+      Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      return 0;
+    }
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10,
@@ -383,8 +408,6 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
 
     return Value;
   }
-  case ARM::fixup_thumb_adr_pcrel_10:
-    return ((Value - 4) >> 2) & 0xff;
   case ARM::fixup_arm_adr_pcrel_12: {
     // ARM PC-relative values are offset by 8.
     Value -= 8;
@@ -393,8 +416,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       Value = -Value;
       opc = 2; // 0b0010
     }
-    if (Ctx && ARM_AM::getSOImmVal(Value) == -1)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    if (Ctx && ARM_AM::getSOImmVal(Value) == -1) {
+      Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      return 0;
+    }
     // Encode the immediate and shift the opcode into place.
     return ARM_AM::getSOImmVal(Value) | (opc << 21);
   }
@@ -517,21 +542,44 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
                            ((uint16_t)imm10LBits) << 1);
     return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
   }
+  case ARM::fixup_thumb_adr_pcrel_10:
   case ARM::fixup_arm_thumb_cp:
-    // Offset by 4, and don't encode the low two bits. Two bytes of that
-    // 'off by 4' is implicitly handled by the half-word ordering of the
-    // Thumb encoding, so we only need to adjust by 2 here.
-    return ((Value - 2) >> 2) & 0xff;
+    // On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
+    // could have an error on our hands.
+    if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+      if (FixupDiagnostic) {
+        Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+        return 0;
+      }
+    }
+    // Offset by 4, and don't encode the low two bits.
+    return ((Value - 4) >> 2) & 0xff;
   case ARM::fixup_arm_thumb_cb: {
     // Offset by 4 and don't encode the lower bit, which is always 0.
+    // FIXME: diagnose if no Thumb2
     uint32_t Binary = (Value - 4) >> 1;
     return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
   }
   case ARM::fixup_arm_thumb_br:
     // Offset by 4 and don't encode the lower bit, which is always 0.
+    if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+      if (FixupDiagnostic) {
+        Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+        return 0;
+      }
+    }
     return ((Value - 4) >> 1) & 0x7ff;
   case ARM::fixup_arm_thumb_bcc:
     // Offset by 4 and don't encode the lower bit, which is always 0.
+    if (Ctx && !STI->getFeatureBits()[ARM::FeatureThumb2]) {
+      const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
+      if (FixupDiagnostic) {
+        Ctx->reportError(Fixup.getLoc(), FixupDiagnostic);
+        return 0;
+      }
+    }
     return ((Value - 4) >> 1) & 0xff;
   case ARM::fixup_arm_pcrel_10_unscaled: {
     Value = Value - 8; // ARM fixups offset by an additional word and don't
@@ -542,8 +590,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
       isAdd = false;
     }
     // The value has the low 4 bits encoded in [3:0] and the high 4 in [11:8].
-    if (Ctx && Value >= 256)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    if (Ctx && Value >= 256) {
+      Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      return 0;
+    }
     Value = (Value & 0xf) | ((Value & 0xf0) << 4);
     return Value | (isAdd << 23);
   }
@@ -561,8 +611,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     }
     // These values don't encode the low two bits since they're always zero.
     Value >>= 2;
-    if (Ctx && Value >= 256)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range pc-relative fixup value");
+    if (Ctx && Value >= 256) {
+      Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      return 0;
+    }
     Value |= isAdd << 23;
 
     // Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
@@ -582,6 +634,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
                                       const MCValue &Target, uint64_t &Value,
                                       bool &IsResolved) {
   const MCSymbolRefExpr *A = Target.getSymA();
+  const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
   // Some fixups to thumb function symbols need the low bit (thumb bit)
   // twiddled.
   if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
@@ -590,18 +643,21 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
       (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
       (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
       (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
-    if (A) {
-      const MCSymbol &Sym = A->getSymbol();
-      if (Asm.isThumbFunc(&Sym))
+    if (Sym) {
+      if (Asm.isThumbFunc(Sym))
         Value |= 1;
     }
   }
-  // For Thumb1 BL instruction, it is possible to be a long jump between
-  // the basic blocks of the same function.  Thus, we would like to resolve
-  // the offset when the destination has the same MCFragment.
-  if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
-    const MCSymbol &Sym = A->getSymbol();
-    IsResolved = (Sym.getFragment() == DF);
+  if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
+    assert(Sym && "How did we resolve this?");
+
+    // If the symbol is external the linker will handle it.
+    // FIXME: Should we handle it as an optimization?
+
+    // If the symbol is out of range, produce a relocation and hope the
+    // linker can handle it. GNU AS produces an error in this case.
+    if (Sym->isExternal() || Value >= 0x400004)
+      IsResolved = false;
   }
   // We must always generate a relocation for BL/BLX instructions if we have
   // a symbol to reference, as the linker relies on knowing the destination
@@ -616,7 +672,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
   // the instruction. This allows adjustFixupValue() to issue a diagnostic
   // if the value aren't invalid.
   (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
-                         IsLittleEndian);
+                         IsLittleEndian, IsResolved);
 }
 
 /// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -719,7 +775,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                unsigned DataSize, uint64_t Value,
                                bool IsPCRel) const {
   unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
-  Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
+  Value =
+      adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian, true);
   if (!Value)
     return; // Doesn't change encoding.
 
@@ -743,6 +800,249 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   }
 }
 
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+  UNWIND_ARM_MODE_MASK                         = 0x0F000000,
+  UNWIND_ARM_MODE_FRAME                        = 0x01000000,
+  UNWIND_ARM_MODE_FRAME_D                      = 0x02000000,
+  UNWIND_ARM_MODE_DWARF                        = 0x04000000,
+
+  UNWIND_ARM_FRAME_STACK_ADJUST_MASK           = 0x00C00000,
+
+  UNWIND_ARM_FRAME_FIRST_PUSH_R4               = 0x00000001,
+  UNWIND_ARM_FRAME_FIRST_PUSH_R5               = 0x00000002,
+  UNWIND_ARM_FRAME_FIRST_PUSH_R6               = 0x00000004,
+
+  UNWIND_ARM_FRAME_SECOND_PUSH_R8              = 0x00000008,
+  UNWIND_ARM_FRAME_SECOND_PUSH_R9              = 0x00000010,
+  UNWIND_ARM_FRAME_SECOND_PUSH_R10             = 0x00000020,
+  UNWIND_ARM_FRAME_SECOND_PUSH_R11             = 0x00000040,
+  UNWIND_ARM_FRAME_SECOND_PUSH_R12             = 0x00000080,
+
+  UNWIND_ARM_FRAME_D_REG_COUNT_MASK            = 0x00000F00,
+
+  UNWIND_ARM_DWARF_SECTION_OFFSET              = 0x00FFFFFF
+};
+
+} // end CU namespace
+
+/// Generate compact unwind encoding for the function based on the CFI
+/// instructions. If the CFI instructions describe a frame that cannot be
+/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
+/// tells the runtime to fallback and unwind using dwarf.
+uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
+    ArrayRef<MCCFIInstruction> Instrs) const {
+  DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
+  // Only armv7k uses CFI based unwinding.
+  if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
+    return 0;
+  // No .cfi directives means no frame.
+  if (Instrs.empty())
+    return 0;
+  // Start off assuming CFA is at SP+0.
+  int CFARegister = ARM::SP;
+  int CFARegisterOffset = 0;
+  // Mark savable registers as initially unsaved
+  DenseMap<unsigned, int> RegOffsets;
+  int FloatRegCount = 0;
+  // Process each .cfi directive and build up compact unwind info.
+  for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+    int Reg;
+    const MCCFIInstruction &Inst = Instrs[i];
+    switch (Inst.getOperation()) {
+    case MCCFIInstruction::OpDefCfa: // DW_CFA_def_cfa
+      CFARegisterOffset = -Inst.getOffset();
+      CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+      break;
+    case MCCFIInstruction::OpDefCfaOffset: // DW_CFA_def_cfa_offset
+      CFARegisterOffset = -Inst.getOffset();
+      break;
+    case MCCFIInstruction::OpDefCfaRegister: // DW_CFA_def_cfa_register
+      CFARegister = MRI.getLLVMRegNum(Inst.getRegister(), true);
+      break;
+    case MCCFIInstruction::OpOffset: // DW_CFA_offset
+      Reg = MRI.getLLVMRegNum(Inst.getRegister(), true);
+      if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg))
+        RegOffsets[Reg] = Inst.getOffset();
+      else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
+        RegOffsets[Reg] = Inst.getOffset();
+        ++FloatRegCount;
+      } else {
+        DEBUG_WITH_TYPE("compact-unwind",
+                        llvm::dbgs() << ".cfi_offset on unknown register="
+                                     << Inst.getRegister() << "\n");
+        return CU::UNWIND_ARM_MODE_DWARF;
+      }
+      break;
+    case MCCFIInstruction::OpRelOffset: // DW_CFA_advance_loc
+      // Ignore
+      break;
+    default:
+      // Directive not convertable to compact unwind, bail out.
+      DEBUG_WITH_TYPE("compact-unwind",
+                      llvm::dbgs()
+                          << "CFI directive not compatiable with comact "
+                             "unwind encoding, opcode=" << Inst.getOperation()
+                          << "\n");
+      return CU::UNWIND_ARM_MODE_DWARF;
+      break;
+    }
+  }
+
+  // If no frame set up, return no unwind info.
+  if ((CFARegister == ARM::SP) && (CFARegisterOffset == 0))
+    return 0;
+
+  // Verify standard frame (lr/r7) was used.
+  if (CFARegister != ARM::R7) {
+    DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "frame register is "
+                                                   << CFARegister
+                                                   << " instead of r7\n");
+    return CU::UNWIND_ARM_MODE_DWARF;
+  }
+  int StackAdjust = CFARegisterOffset - 8;
+  if (RegOffsets.lookup(ARM::LR) != (-4 - StackAdjust)) {
+    DEBUG_WITH_TYPE("compact-unwind",
+                    llvm::dbgs()
+                        << "LR not saved as standard frame, StackAdjust="
+                        << StackAdjust
+                        << ", CFARegisterOffset=" << CFARegisterOffset
+                        << ", lr save at offset=" << RegOffsets[14] << "\n");
+    return CU::UNWIND_ARM_MODE_DWARF;
+  }
+  if (RegOffsets.lookup(ARM::R7) != (-8 - StackAdjust)) {
+    DEBUG_WITH_TYPE("compact-unwind",
+                    llvm::dbgs() << "r7 not saved as standard frame\n");
+    return CU::UNWIND_ARM_MODE_DWARF;
+  }
+  uint32_t CompactUnwindEncoding = CU::UNWIND_ARM_MODE_FRAME;
+
+  // If var-args are used, there may be a stack adjust required.
+  switch (StackAdjust) {
+  case 0:
+    break;
+  case 4:
+    CompactUnwindEncoding |= 0x00400000;
+    break;
+  case 8:
+    CompactUnwindEncoding |= 0x00800000;
+    break;
+  case 12:
+    CompactUnwindEncoding |= 0x00C00000;
+    break;
+  default:
+    DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs()
+                                          << ".cfi_def_cfa stack adjust ("
+                                          << StackAdjust << ") out of range\n");
+    return CU::UNWIND_ARM_MODE_DWARF;
+  }
+
+  // If r6 is saved, it must be right below r7.
+  static struct {
+    unsigned Reg;
+    unsigned Encoding;
+  } GPRCSRegs[] = {{ARM::R6, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R6},
+                   {ARM::R5, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R5},
+                   {ARM::R4, CU::UNWIND_ARM_FRAME_FIRST_PUSH_R4},
+                   {ARM::R12, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R12},
+                   {ARM::R11, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R11},
+                   {ARM::R10, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R10},
+                   {ARM::R9, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R9},
+                   {ARM::R8, CU::UNWIND_ARM_FRAME_SECOND_PUSH_R8}};
+
+  int CurOffset = -8 - StackAdjust;
+  for (auto CSReg : GPRCSRegs) {
+    auto Offset = RegOffsets.find(CSReg.Reg);
+    if (Offset == RegOffsets.end())
+      continue;
+
+    int RegOffset = Offset->second;
+    if (RegOffset != CurOffset - 4) {
+      DEBUG_WITH_TYPE("compact-unwind",
+                      llvm::dbgs() << MRI.getName(CSReg.Reg) << " saved at "
+                                   << RegOffset << " but only supported at "
+                                   << CurOffset << "\n");
+      return CU::UNWIND_ARM_MODE_DWARF;
+    }
+    CompactUnwindEncoding |= CSReg.Encoding;
+    CurOffset -= 4;
+  }
+
+  // If no floats saved, we are done.
+  if (FloatRegCount == 0)
+    return CompactUnwindEncoding;
+
+  // Switch mode to include D register saving.
+  CompactUnwindEncoding &= ~CU::UNWIND_ARM_MODE_MASK;
+  CompactUnwindEncoding |= CU::UNWIND_ARM_MODE_FRAME_D;
+
+  // FIXME: supporting more than 4 saved D-registers compactly would be trivial,
+  // but needs coordination with the linker and libunwind.
+  if (FloatRegCount > 4) {
+    DEBUG_WITH_TYPE("compact-unwind",
+                    llvm::dbgs() << "unsupported number of D registers saved ("
+                                 << FloatRegCount << ")\n");
+      return CU::UNWIND_ARM_MODE_DWARF;
+  }
+
+  // Floating point registers must either be saved sequentially, or we defer to
+  // DWARF. No gaps allowed here so check that each saved d-register is
+  // precisely where it should be.
+  static unsigned FPRCSRegs[] = { ARM::D8, ARM::D10, ARM::D12, ARM::D14 };
+  for (int Idx = FloatRegCount - 1; Idx >= 0; --Idx) {
+    auto Offset = RegOffsets.find(FPRCSRegs[Idx]);
+    if (Offset == RegOffsets.end()) {
+      DEBUG_WITH_TYPE("compact-unwind",
+                      llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+                                   << MRI.getName(FPRCSRegs[Idx])
+                                   << " not saved\n");
+      return CU::UNWIND_ARM_MODE_DWARF;
+    } else if (Offset->second != CurOffset - 8) {
+      DEBUG_WITH_TYPE("compact-unwind",
+                      llvm::dbgs() << FloatRegCount << " D-regs saved, but "
+                                   << MRI.getName(FPRCSRegs[Idx])
+                                   << " saved at " << Offset->second
+                                   << ", expected at " << CurOffset - 8
+                                   << "\n");
+      return CU::UNWIND_ARM_MODE_DWARF;
+    }
+    CurOffset -= 8;
+  }
+
+  return CompactUnwindEncoding | ((FloatRegCount - 1) << 8);
+}
+
+static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
+  unsigned AK = ARM::parseArch(Arch);
+  switch (AK) {
+  default:
+    return MachO::CPU_SUBTYPE_ARM_V7;
+  case ARM::AK_ARMV4T:
+    return MachO::CPU_SUBTYPE_ARM_V4T;
+  case ARM::AK_ARMV5T:
+  case ARM::AK_ARMV5TE:
+  case ARM::AK_ARMV5TEJ:
+    return MachO::CPU_SUBTYPE_ARM_V5;
+  case ARM::AK_ARMV6:
+  case ARM::AK_ARMV6K:
+    return MachO::CPU_SUBTYPE_ARM_V6;
+  case ARM::AK_ARMV7A:
+    return MachO::CPU_SUBTYPE_ARM_V7;
+  case ARM::AK_ARMV7S:
+    return MachO::CPU_SUBTYPE_ARM_V7S;
+  case ARM::AK_ARMV7K:
+    return MachO::CPU_SUBTYPE_ARM_V7K;
+  case ARM::AK_ARMV6M:
+    return MachO::CPU_SUBTYPE_ARM_V6M;
+  case ARM::AK_ARMV7M:
+    return MachO::CPU_SUBTYPE_ARM_V7M;
+  case ARM::AK_ARMV7EM:
+    return MachO::CPU_SUBTYPE_ARM_V7EM;
+  }
+}
+
 MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
                                         const Triple &TheTriple, StringRef CPU,
@@ -751,19 +1051,8 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
   default:
     llvm_unreachable("unsupported object format");
   case Triple::MachO: {
-    MachO::CPUSubTypeARM CS =
-        StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
-            .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
-            .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ)
-            .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6)
-            .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M)
-            .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM)
-            .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K)
-            .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M)
-            .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S)
-            .Default(MachO::CPU_SUBTYPE_ARM_V7);
-
-    return new ARMAsmBackendDarwin(T, TheTriple, CS);
+    MachO::CPUSubTypeARM CS = getMachOSubTypeFromArch(TheTriple.getArchName());
+    return new ARMAsmBackendDarwin(T, TheTriple, MRI, CS);
   }
   case Triple::COFF:
     assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index 6b4abd5898eb..28a62132a419 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -45,6 +45,10 @@ public:
                          const MCValue &Target, uint64_t &Value,
                          bool &IsResolved) override;
 
+  unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel,
+                            MCContext *Ctx, bool IsLittleEndian,
+                            bool IsResolved) const;
+
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
@@ -52,6 +56,9 @@ public:
 
   bool mayNeedRelaxation(const MCInst &Inst) const override;
 
+  const char *reasonForFixupRelaxation(const MCFixup &Fixup,
+                                       uint64_t Value) const;
+
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
                             const MCRelaxableFragment *DF,
                             const MCAsmLayout &Layout) const override;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index a6206e3d9585..995dd0fe08ee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -16,11 +16,12 @@ using namespace llvm;
 
 namespace {
 class ARMAsmBackendDarwin : public ARMAsmBackend {
+  const MCRegisterInfo &MRI;
 public:
   const MachO::CPUSubTypeARM Subtype;
   ARMAsmBackendDarwin(const Target &T, const Triple &TT,
-                      MachO::CPUSubTypeARM st)
-      : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) {
+                      const MCRegisterInfo &MRI, MachO::CPUSubTypeARM st)
+      : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) {
     HasDataInCodeSupport = true;
   }
 
@@ -28,6 +29,9 @@ public:
     return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM,
                                      Subtype);
   }
+
+  uint32_t generateCompactUnwindEncoding(
+      ArrayRef<MCCFIInstruction> Instrs) const override;
 };
 }
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 804d3534096a..52eba8be288f 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -95,7 +95,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_GOTTPOFF:
         Type = ELF::R_ARM_TLS_IE32;
         break;
-      case MCSymbolRefExpr::VK_GOTPCREL:
+      case MCSymbolRefExpr::VK_ARM_GOT_PREL:
         Type = ELF::R_ARM_GOT_PREL;
         break;
       }
@@ -192,7 +192,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_GOTOFF:
         Type = ELF::R_ARM_GOTOFF32;
         break;
-      case MCSymbolRefExpr::VK_GOTPCREL:
+      case MCSymbolRefExpr::VK_ARM_GOT_PREL:
         Type = ELF::R_ARM_GOT_PREL;
         break;
       case MCSymbolRefExpr::VK_ARM_TARGET1:
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index d17fdb95dbdf..6084f22c8470 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
   void emitAttribute(unsigned Attribute, unsigned Value) override;
   void emitTextAttribute(unsigned Attribute, StringRef String) override;
   void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
-                            StringRef StrinValue) override;
+                            StringRef StringValue) override;
   void emitArch(unsigned Arch) override;
   void emitArchExtension(unsigned ArchExt) override;
   void emitObjectArch(unsigned Arch) override;
@@ -195,16 +195,16 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute,
   OS << "\n";
 }
 void ARMTargetAsmStreamer::emitArch(unsigned Arch) {
-  OS << "\t.arch\t" << ARMTargetParser::getArchName(Arch) << "\n";
+  OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n";
 }
 void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) {
-  OS << "\t.arch_extension\t" << ARMTargetParser::getArchExtName(ArchExt) << "\n";
+  OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n";
 }
 void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) {
-  OS << "\t.object_arch\t" << ARMTargetParser::getArchName(Arch) << '\n';
+  OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
 }
 void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
-  OS << "\t.fpu\t" << ARMTargetParser::getFPUName(FPU) << "\n";
+  OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
 }
 void ARMTargetAsmStreamer::finishAttributeSection() {
 }
@@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset,
 class ARMTargetELFStreamer : public ARMTargetStreamer {
 private:
   // This structure holds all attributes, accounting for
-  // their string/numeric value, so we can later emmit them
+  // their string/numeric value, so we can later emit them
   // in declaration order, keeping all in the same vector
   struct AttributeItem {
     enum {
@@ -254,7 +254,7 @@ private:
     } Type;
     unsigned Tag;
     unsigned IntValue;
-    StringRef StringValue;
+    std::string StringValue;
 
     static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) {
       // The conformance tag must be emitted first when serialised
@@ -507,14 +507,15 @@ public:
   /// This is one of the functions used to emit data into an ELF section, so the
   /// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
   /// necessary.
-  void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc) override {
+  void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override {
     if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value))
-      if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4))
-        getContext().reportFatalError(Loc, "relocated expression must be 32-bit");
+      if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) {
+        getContext().reportError(Loc, "relocated expression must be 32-bit");
+        return;
+      }
 
     EmitDataMappingSymbol();
-    MCELFStreamer::EmitValueImpl(Value, Size);
+    MCELFStreamer::EmitValueImpl(Value, Size, Loc);
   }
 
   void EmitAssemblerFlag(MCAssemblerFlag Flag) override {
@@ -684,16 +685,16 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   using namespace ARMBuildAttrs;
 
   setAttributeItem(CPU_name,
-                   ARMTargetParser::getCPUAttr(Arch),
+                   ARM::getCPUAttr(Arch),
                    false);
 
   if (EmittedArch == ARM::AK_INVALID)
     setAttributeItem(CPU_arch,
-                     ARMTargetParser::getArchAttr(Arch),
+                     ARM::getArchAttr(Arch),
                      false);
   else
     setAttributeItem(CPU_arch,
-                     ARMTargetParser::getArchAttr(EmittedArch),
+                     ARM::getArchAttr(EmittedArch),
                      false);
 
   switch (Arch) {
@@ -702,7 +703,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   case ARM::AK_ARMV3:
   case ARM::AK_ARMV3M:
   case ARM::AK_ARMV4:
-  case ARM::AK_ARMV5:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     break;
 
@@ -710,7 +710,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
   case ARM::AK_ARMV5T:
   case ARM::AK_ARMV5TE:
   case ARM::AK_ARMV6:
-  case ARM::AK_ARMV6J:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     break;
@@ -721,8 +720,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
     break;
 
   case ARM::AK_ARMV6K:
-  case ARM::AK_ARMV6Z:
-  case ARM::AK_ARMV6ZK:
+  case ARM::AK_ARMV6KZ:
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     setAttributeItem(Virtualization_use, AllowTZ, false);
@@ -732,10 +730,6 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
     setAttributeItem(THUMB_ISA_use, Allowed, false);
     break;
 
-  case ARM::AK_ARMV7:
-    setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
-    break;
-
   case ARM::AK_ARMV7A:
     setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
@@ -755,6 +749,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
 
   case ARM::AK_ARMV8A:
   case ARM::AK_ARMV8_1A:
+  case ARM::AK_ARMV8_2A:
     setAttributeItem(CPU_arch_profile, ApplicationProfile, false);
     setAttributeItem(ARM_ISA_use, Allowed, false);
     setAttributeItem(THUMB_ISA_use, AllowThumb32, false);
@@ -1084,19 +1079,14 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
 }
 
 inline void ARMELFStreamer::SwitchToExTabSection(const MCSymbol &FnStart) {
-  SwitchToEHSection(".ARM.extab",
-                    ELF::SHT_PROGBITS,
-                    ELF::SHF_ALLOC,
-                    SectionKind::getDataRel(),
-                    FnStart);
+  SwitchToEHSection(".ARM.extab", ELF::SHT_PROGBITS, ELF::SHF_ALLOC,
+                    SectionKind::getData(), FnStart);
 }
 
 inline void ARMELFStreamer::SwitchToExIdxSection(const MCSymbol &FnStart) {
-  SwitchToEHSection(".ARM.exidx",
-                    ELF::SHT_ARM_EXIDX,
+  SwitchToEHSection(".ARM.exidx", ELF::SHT_ARM_EXIDX,
                     ELF::SHF_ALLOC | ELF::SHF_LINK_ORDER,
-                    SectionKind::getDataRel(),
-                    FnStart);
+                    SectionKind::getData(), FnStart);
 }
 void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
   MCDataFragment *Frag = getOrCreateDataFragment();
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 1ac08159bd3d..bda37f6616a8 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -33,7 +33,9 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(const Triple &TheTriple) {
   SupportsDebugInformation = true;
 
   // Exceptions handling
-  ExceptionsType = ExceptionHandling::SjLj;
+  ExceptionsType = TheTriple.isOSDarwin() && !TheTriple.isWatchOS()
+                       ? ExceptionHandling::SjLj
+                       : ExceptionHandling::DwarfCFI;
 
   UseIntegratedAssembler = true;
 }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 99a5fff5ec27..5e548162bec6 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -19,34 +19,37 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
-  class Triple;
+class Triple;
 
-  class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
-    virtual void anchor();
+class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+  virtual void anchor();
 
-  public:
-    explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
-  };
+public:
+  explicit ARMMCAsmInfoDarwin(const Triple &TheTriple);
+};
 
-  class ARMELFMCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit ARMELFMCAsmInfo(const Triple &TT);
+class ARMELFMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
 
-    void setUseIntegratedAssembler(bool Value) override;
-  };
+public:
+  explicit ARMELFMCAsmInfo(const Triple &TT);
 
-  class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
-    void anchor() override;
-  public:
-    explicit ARMCOFFMCAsmInfoMicrosoft();
-  };
+  void setUseIntegratedAssembler(bool Value) override;
+};
 
-  class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
-    void anchor() override;
-  public:
-    explicit ARMCOFFMCAsmInfoGNU();
-  };
+class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+  void anchor() override;
+
+public:
+  explicit ARMCOFFMCAsmInfoMicrosoft();
+};
+
+class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
+  void anchor() override;
+
+public:
+  explicit ARMCOFFMCAsmInfoGNU();
+};
 
 } // namespace llvm
 
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 9146d4def75a..75dde8008fca 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -63,8 +63,8 @@ public:
     return false;
   }
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCSection *findAssociatedSection() const override {
-    return getSubExpr()->findAssociatedSection();
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
   }
 
   // There are no TLS ARMMCExprs at the moment.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 21c9fc1e58b2..8c8c249addb5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -134,101 +135,11 @@ std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) {
   bool isThumb =
       TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb;
 
-  bool NoCPU = CPU == "generic" || CPU.empty();
   std::string ARMArchFeature;
-  switch (TT.getSubArch()) {
-  default:
-    llvm_unreachable("invalid sub-architecture for ARM");
-  case Triple::ARMSubArch_v8:
-    if (NoCPU)
-      // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
-      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
-      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC
-      ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
-                       "+trustzone,+t2xtpk,+crypto,+crc";
-    else
-      // Use CPU to figure out the exact features
-      ARMArchFeature = "+v8";
-    break;
-  case Triple::ARMSubArch_v8_1a:
-    if (NoCPU)
-      // v8.1a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
-      //      FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
-      //      FeatureT2XtPk, FeatureCrypto, FeatureCRC, FeatureV8_1a
-      ARMArchFeature = "+v8.1a,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
-                       "+trustzone,+t2xtpk,+crypto,+crc";
-    else
-      // Use CPU to figure out the exact features
-      ARMArchFeature = "+v8.1a";
-    break;
-  case Triple::ARMSubArch_v7m:
-    isThumb = true;
-    if (NoCPU)
-      // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureMClass
-      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+mclass";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7em:
-    if (NoCPU)
-      // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
-      //       FeatureT2XtPk, FeatureMClass
-      ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,+t2xtpk,+mclass";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7s:
-    if (NoCPU)
-      // v7s: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureHasRAS
-      //      Swift
-      ARMArchFeature = "+v7,+swift,+neon,+db,+t2dsp,+ras";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v7:
-    // v7 CPUs have lots of different feature sets. If no CPU is specified,
-    // then assume v7a (e.g. cortex-a8) feature set. Otherwise, return
-    // the "minimum" feature set and use CPU string to figure out the exact
-    // features.
-    if (NoCPU)
-      // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureT2XtPk
-      ARMArchFeature = "+v7,+neon,+db,+t2dsp,+t2xtpk";
-    else
-      // Use CPU to figure out the exact features.
-      ARMArchFeature = "+v7";
-    break;
-  case Triple::ARMSubArch_v6t2:
-    ARMArchFeature = "+v6t2";
-    break;
-  case Triple::ARMSubArch_v6k:
-    ARMArchFeature = "+v6k";
-    break;
-  case Triple::ARMSubArch_v6m:
-    isThumb = true;
-    if (NoCPU)
-      // v6m: FeatureNoARM, FeatureMClass
-      ARMArchFeature = "+v6m,+noarm,+mclass";
-    else
-      ARMArchFeature = "+v6";
-    break;
-  case Triple::ARMSubArch_v6:
-    ARMArchFeature = "+v6";
-    break;
-  case Triple::ARMSubArch_v5te:
-    ARMArchFeature = "+v5te";
-    break;
-  case Triple::ARMSubArch_v5:
-    ARMArchFeature = "+v5t";
-    break;
-  case Triple::ARMSubArch_v4t:
-    ARMArchFeature = "+v4t";
-    break;
-  case Triple::NoSubArch:
-    break;
-  }
+
+  unsigned ArchID = ARM::parseArch(TT.getArchName());
+  if (ArchID != ARM::AK_INVALID &&  (CPU.empty() || CPU == "generic"))
+    ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str();
 
   if (isThumb) {
     if (ARMArchFeature.empty())
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index fd30623d79af..c2bbc8e828c4 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -86,7 +86,8 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T,
 // object file.
 MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
                                      raw_pwrite_stream &OS,
-                                     MCCodeEmitter *Emitter, bool RelaxAll);
+                                     MCCodeEmitter *Emitter, bool RelaxAll,
+                                     bool IncrementalLinkerCompatible);
 
 /// Construct an ELF Mach-O object writer.
 MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 95d7ea7c04a3..cfd504e533af 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -150,10 +150,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
 
-  if (!A->getFragment())
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
+  if (!A->getFragment()) {
+    Asm.getContext().reportError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
+    return;
+  }
 
   uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint32_t Value2 = 0;
@@ -163,10 +165,12 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
     const MCSymbol *SB = &B->getSymbol();
 
-    if (!SB->getFragment())
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+    if (!SB->getFragment()) {
+      Asm.getContext().reportError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
+      return;
+    }
 
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_HALF_SECTDIFF;
@@ -251,10 +255,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
 
-  if (!A->getFragment())
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
+  if (!A->getFragment()) {
+    Asm.getContext().reportError(Fixup.getLoc(),
                        "symbol '" + A->getName() +
                        "' can not be undefined in a subtraction expression");
+    return;
+  }
 
   uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -265,10 +271,12 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
     assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols");
     const MCSymbol *SB = &B->getSymbol();
 
-    if (!SB->getFragment())
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+    if (!SB->getFragment()) {
+      Asm.getContext().reportError(Fixup.getLoc(),
                          "symbol '" + B->getSymbol().getName() +
                          "' can not be undefined in a subtraction expression");
+      return;
+    }
 
     // Select the appropriate difference relocation type.
     Type = MachO::ARM_RELOC_SECTDIFF;
@@ -346,13 +354,15 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
   unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
   unsigned Log2Size;
   unsigned RelocType = MachO::ARM_RELOC_VANILLA;
-  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size))
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
     // If we failed to get fixup kind info, it's because there's no legal
     // relocation type for the fixup kind. This happens when it's a fixup that's
     // expected to always be resolvable at assembly time and not have any
     // relocations needed.
-    Asm.getContext().reportFatalError(Fixup.getLoc(),
-                                "unsupported relocation on symbol");
+    Asm.getContext().reportError(Fixup.getLoc(),
+                                 "unsupported relocation on symbol");
+    return;
+  }
 
   // If this is a difference or a defined symbol plus an offset, then we need a
   // scattered relocation entry.  Differences always require scattered
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index b680db5c3a78..dad50f2834ee 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -27,8 +27,8 @@ ARMTargetStreamer::~ARMTargetStreamer() {}
 
 // The constant pool handling is shared by all ARMTargetStreamer
 // implementations.
-const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr) {
-  return ConstantPools->addEntry(Streamer, Expr, 4);
+const MCExpr *ARMTargetStreamer::addConstantPoolEntry(const MCExpr *Expr, SMLoc Loc) {
+  return ConstantPools->addEntry(Streamer, Expr, 4, Loc);
 }
 
 void ARMTargetStreamer::emitCurrentConstantPool() {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
index b993b1be4847..83fa084e60c7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -37,11 +37,11 @@ void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
 }
 }
 
-MCStreamer *llvm::createARMWinCOFFStreamer(MCContext &Context,
-                                           MCAsmBackend &MAB,
-                                           raw_pwrite_stream &OS,
-                                           MCCodeEmitter *Emitter,
-                                           bool RelaxAll) {
-  return new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+MCStreamer *llvm::createARMWinCOFFStreamer(
+    MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS,
+    MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) {
+  auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS);
+  S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
+  return S;
 }
 
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 090a003424a4..5acb2d46f3e7 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -711,7 +711,6 @@ this in a target-independent way: we should probably fold that (when using
 "undefined at zero" semantics) to set the "defined at zero" bit and have
 the code generator expand out the right code.
 
-
 //===---------------------------------------------------------------------===//
 
 Clean up the test/MC/ARM files to have more robust register choices.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 3b4358b5d9bf..93e0ac4aa320 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -13,6 +13,7 @@
 
 #include "Thumb1FrameLowering.h"
 #include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -84,7 +85,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
-  assert(&MBB == &MF.front() && "Shrink-wrapping not yet implemented");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo  *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -100,7 +100,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   assert(NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc dl;
+  
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned BasePtr = RegInfo->getBaseRegister();
   int CFAOffset = 0;
@@ -168,8 +172,6 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
     ++MBBI;
-    if (MBBI != MBB.end())
-      dl = MBBI->getDebugLoc();
   }
 
   // Determine starting offsets of spill areas.
@@ -232,11 +234,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-
   // Adjust FP so it point to the stack slot that contains the previous FP.
   if (HasFP) {
-    FramePtrOffsetInBlock += MFI->getObjectOffset(FramePtrSpillFI)
-		                     + GPRCS1Size + ArgRegsSaveSize;
+    FramePtrOffsetInBlock +=
+        MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
       .setMIFlags(MachineInstr::FrameSetup));
@@ -321,11 +322,8 @@ static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
 
 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert((MBBI->getOpcode() == ARM::tBX_RET ||
-          MBBI->getOpcode() == ARM::tPOP_RET) &&
-         "Can only insert epilog into returning blocks");
-  DebugLoc dl = MBBI->getDebugLoc();
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const ThumbRegisterInfo *RegInfo =
@@ -377,9 +375,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                ARM::SP)
           .addReg(FramePtr));
     } else {
-      if (MBBI->getOpcode() == ARM::tBX_RET &&
-          &MBB.front() != MBBI &&
-          std::prev(MBBI)->getOpcode() == ARM::tPOP) {
+      if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) {
         MachineBasicBlock::iterator PMBBI = std::prev(MBBI);
         if (!tryFoldSPUpdateIntoPushPop(STI, MF, PMBBI, NumBytes))
           emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
@@ -388,66 +385,189 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
-  bool IsV4PopReturn = false;
-  for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo())
+  if (needPopSpecialFixUp(MF)) {
+    bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true);
+    (void)Done;
+    assert(Done && "Emission of the special fixup failed!?");
+  }
+}
+
+bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+  if (!needPopSpecialFixUp(*MBB.getParent()))
+    return true;
+
+  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+  return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false);
+}
+
+bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
+  ARMFunctionInfo *AFI =
+      const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>();
+  if (AFI->getArgRegsSaveSize())
+    return true;
+
+  // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
+  for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
     if (CSI.getReg() == ARM::LR)
-      IsV4PopReturn = true;
-  IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps();
+      return true;
 
-  // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
-  // to LR, and we can't pop the value directly to the PC since
-  // we need to update the SP after popping the value. So instead
-  // we have to emit:
-  //   POP {r3}
-  //   ADD sp, #offset
-  //   BX r3
-  // If this would clobber a return value, then generate this sequence instead:
-  //   MOV ip, r3
-  //   POP {r3}
-  //   ADD sp, #offset
-  //   MOV lr, r3
-  //   MOV r3, ip
-  //   BX lr
-  if (ArgRegsSaveSize || IsV4PopReturn) {
-    // Get the last instruction, tBX_RET
-    MBBI = MBB.getLastNonDebugInstr();
-    assert (MBBI->getOpcode() == ARM::tBX_RET);
-    DebugLoc dl = MBBI->getDebugLoc();
+  return false;
+}
 
-    if (AFI->getReturnRegsCount() <= 3) {
-      // Epilogue: pop saved LR to R3 and branch off it. 
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-        .addReg(ARM::R3, RegState::Define);
+bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
+                                              bool DoIt) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const ThumbRegisterInfo *RegInfo =
+      static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
 
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
-      MachineInstrBuilder MIB =
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX))
-        .addReg(ARM::R3, RegState::Kill);
-      AddDefaultPred(MIB);
-      MIB.copyImplicitOps(&*MBBI);
-      // erase the old tBX_RET instruction
-      MBB.erase(MBBI);
-    } else {
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::R12, RegState::Define)
-        .addReg(ARM::R3, RegState::Kill));
-
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
-        .addReg(ARM::R3, RegState::Define);
-
-      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
-
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::LR, RegState::Define)
-        .addReg(ARM::R3, RegState::Kill));
-
-      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
-        .addReg(ARM::R3, RegState::Define)
-        .addReg(ARM::R12, RegState::Kill));
-      // Keep the tBX_RET instruction
+  // If MBBI is a return instruction, or is a tPOP followed by a return
+  // instruction in the successor BB, we may be able to directly restore
+  // LR in the PC.
+  // This is only possible with v5T ops (v4T can't change the Thumb bit via
+  // a POP PC instruction), and only if we do not need to emit any SP update.
+  // Otherwise, we need a temporary register to pop the value
+  // and copy that value into LR.
+  auto MBBI = MBB.getFirstTerminator();
+  bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize;
+  if (CanRestoreDirectly) {
+    if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB)
+      CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET ||
+                            MBBI->getOpcode() == ARM::tPOP_RET);
+    else {
+      auto MBBI_prev = MBBI;
+      MBBI_prev--;
+      assert(MBBI_prev->getOpcode() == ARM::tPOP);
+      assert(MBB.succ_size() == 1);
+      if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET)
+        MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET.
+      else
+        CanRestoreDirectly = false;
     }
   }
+
+  if (CanRestoreDirectly) {
+    if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET)
+      return true;
+    MachineInstrBuilder MIB =
+        AddDefaultPred(
+            BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)));
+    // Copy implicit ops and popped registers, if any.
+    for (auto MO: MBBI->operands())
+      if (MO.isReg() && (MO.isImplicit() || MO.isDef()))
+        MIB.addOperand(MO);
+    MIB.addReg(ARM::PC, RegState::Define);
+    // Erase the old instruction (tBX_RET or tPOP).
+    MBB.erase(MBBI);
+    return true;
+  }
+
+  // Look for a temporary register to use.
+  // First, compute the liveness information.
+  LivePhysRegs UsedRegs(STI.getRegisterInfo());
+  UsedRegs.addLiveOuts(&MBB, /*AddPristines*/ true);
+  // The semantic of pristines changed recently and now,
+  // the callee-saved registers that are touched in the function
+  // are not part of the pristines set anymore.
+  // Add those callee-saved now.
+  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+  const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    UsedRegs.addReg(CSRegs[i]);
+
+  DebugLoc dl = DebugLoc();
+  if (MBBI != MBB.end()) {
+    dl = MBBI->getDebugLoc();
+    auto InstUpToMBBI = MBB.end();
+    while (InstUpToMBBI != MBBI)
+      // The pre-decrement is on purpose here.
+      // We want to have the liveness right before MBBI.
+      UsedRegs.stepBackward(*--InstUpToMBBI);
+  }
+
+  // Look for a register that can be directly use in the POP.
+  unsigned PopReg = 0;
+  // And some temporary register, just in case.
+  unsigned TemporaryReg = 0;
+  BitVector PopFriendly =
+      TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID));
+  assert(PopFriendly.any() && "No allocatable pop-friendly register?!");
+  // Rebuild the GPRs from the high registers because they are removed
+  // form the GPR reg class for thumb1.
+  BitVector GPRsNoLRSP =
+      TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID));
+  GPRsNoLRSP |= PopFriendly;
+  GPRsNoLRSP.reset(ARM::LR);
+  GPRsNoLRSP.reset(ARM::SP);
+  GPRsNoLRSP.reset(ARM::PC);
+  for (int Register = GPRsNoLRSP.find_first(); Register != -1;
+       Register = GPRsNoLRSP.find_next(Register)) {
+    if (!UsedRegs.contains(Register)) {
+      // Remember the first pop-friendly register and exit.
+      if (PopFriendly.test(Register)) {
+        PopReg = Register;
+        TemporaryReg = 0;
+        break;
+      }
+      // Otherwise, remember that the register will be available to
+      // save a pop-friendly register.
+      TemporaryReg = Register;
+    }
+  }
+
+  if (!DoIt && !PopReg && !TemporaryReg)
+    return false;
+
+  assert((PopReg || TemporaryReg) && "Cannot get LR");
+
+  if (TemporaryReg) {
+    assert(!PopReg && "Unnecessary MOV is about to be inserted");
+    PopReg = PopFriendly.find_first();
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+                       .addReg(TemporaryReg, RegState::Define)
+                       .addReg(PopReg, RegState::Kill));
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) {
+    // We couldn't use the direct restoration above, so
+    // perform the opposite conversion: tPOP_RET to tPOP.
+    MachineInstrBuilder MIB =
+        AddDefaultPred(
+            BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)));
+    bool Popped = false;
+    for (auto MO: MBBI->operands())
+      if (MO.isReg() && (MO.isImplicit() || MO.isDef()) &&
+          MO.getReg() != ARM::PC) {
+        MIB.addOperand(MO);
+        if (!MO.isImplicit())
+          Popped = true;
+      }
+    // Is there anything left to pop?
+    if (!Popped)
+      MBB.erase(MIB.getInstr());
+    // Erase the old instruction.
+    MBB.erase(MBBI);
+    MBBI = AddDefaultPred(BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)));
+  }
+
+  assert(PopReg && "Do not know how to get LR");
+  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+      .addReg(PopReg, RegState::Define);
+
+  emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize);
+
+  AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+                     .addReg(ARM::LR, RegState::Define)
+                     .addReg(PopReg, RegState::Kill));
+
+  if (TemporaryReg)
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr))
+                       .addReg(PopReg, RegState::Define)
+                       .addReg(TemporaryReg, RegState::Kill));
+
+  return true;
 }
 
 bool Thumb1FrameLowering::
@@ -461,8 +581,6 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   DebugLoc DL;
   const TargetInstrInfo &TII = *STI.getInstrInfo();
 
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
   MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
   AddDefaultPred(MIB);
   for (unsigned i = CSI.size(); i != 0; --i) {
@@ -501,31 +619,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
   MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
   AddDefaultPred(MIB);
 
-  bool NumRegs = false;
+  bool NeedsPop = false;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
     if (Reg == ARM::LR) {
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
+      if (MBB.succ_empty()) {
+        // Special epilogue for vararg functions. See emitEpilogue
+        if (isVarArg)
+          continue;
+        // ARMv4T requires BX, see emitEpilogue
+        if (!STI.hasV5TOps())
+          continue;
+        Reg = ARM::PC;
+        (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+        if (MI != MBB.end())
+          MIB.copyImplicitOps(&*MI);
+        MI = MBB.erase(MI);
+      } else
+        // LR may only be popped into PC, as part of return sequence.
+        // If this isn't the return sequence, we'll need emitPopSpecialFixUp
+        // to restore LR the hard way.
         continue;
-      // ARMv4T requires BX, see emitEpilogue
-      if (STI.hasV4TOps() && !STI.hasV5TOps())
-        continue;
-      Reg = ARM::PC;
-      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
-      MIB.copyImplicitOps(&*MI);
-      MI = MBB.erase(MI);
     }
     MIB.addReg(Reg, getDefRegState(true));
-    NumRegs = true;
+    NeedsPop = true;
   }
 
   // It's illegal to emit pop instruction without operands.
-  if (NumRegs)
+  if (NeedsPop)
     MBB.insert(MI, &*MIB);
   else
     MF.DeleteMachineInstr(MIB);
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index 31d57325ebd6..812f9830824d 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -45,6 +45,42 @@ public:
   eliminateCallFramePseudoInstr(MachineFunction &MF,
                                 MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI) const override;
+
+  /// Check whether or not the given \p MBB can be used as a epilogue
+  /// for the target.
+  /// The epilogue will be inserted before the first terminator of that block.
+  /// This method is used by the shrink-wrapping pass to decide if
+  /// \p MBB will be correctly handled by the target.
+  bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+
+private:
+  /// Check if the frame lowering of \p MF needs a special fixup
+  /// code sequence for the epilogue.
+  /// Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+  /// to LR, and we can't pop the value directly to the PC when
+  /// we need to update the SP after popping the value. So instead
+  /// we have to emit:
+  ///   POP {r3}
+  ///   ADD sp, #offset
+  ///   BX r3
+  /// If this would clobber a return value, then generate this sequence instead:
+  ///   MOV ip, r3
+  ///   POP {r3}
+  ///   ADD sp, #offset
+  ///   MOV lr, r3
+  ///   MOV r3, ip
+  ///   BX lr
+  bool needPopSpecialFixUp(const MachineFunction &MF) const;
+
+  /// Emit the special fixup code sequence for the epilogue.
+  /// \see needPopSpecialFixUp for more details.
+  /// \p DoIt, tells this method whether or not to actually insert
+  /// the code sequence in \p MBB. I.e., when \p DoIt is false,
+  /// \p MBB is left untouched.
+  /// \returns For \p DoIt == true: True when the emission succeeded
+  /// false otherwise. For \p DoIt == false: True when the emission
+  /// would have been possible, false otherwise.
+  bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 216e776932be..530e1d33839a 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -84,11 +84,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOStore,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
@@ -112,11 +110,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
     MachineFunction &MF = *MBB.getParent();
     MachineFrameInfo &MFI = *MF.getFrameInfo();
-    MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                              MachineMemOperand::MOLoad,
-                              MFI.getObjectSize(FI),
-                              MFI.getObjectAlignment(FI));
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+        MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 68736bc1decd..bf0498dfda69 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -256,8 +256,8 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
     LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
 
     // Finalize the bundle.
-    MachineBasicBlock::instr_iterator LI = LastITMI;
-    finalizeBundle(MBB, InsertPos.getInstrIterator(), std::next(LI));
+    finalizeBundle(MBB, InsertPos.getInstrIterator(),
+                   ++LastITMI->getIterator());
 
     Modified = true;
     ++NumITs;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index dc74f4e38ff8..4da769f23280 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -131,11 +131,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                            MachineMemOperand::MOStore,
-                            MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 
   if (RC == &ARM::GPRRegClass   || RC == &ARM::tGPRRegClass ||
       RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
@@ -171,11 +169,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                            MachineMemOperand::MOLoad,
-                            MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
 
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index d9ab824995c1..bcd0e5751258 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -125,7 +125,10 @@ namespace {
   { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,   1,   1,  1,1, 0,1,0 },
   { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,   1,   1,  1,1, 0,1,0 },
   { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,   1,   1,  1,1, 0,1,0 },
-  // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+  // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
+  // tSTMIA_UPD is a change in semantics which can only be used if the base
+  // register is killed. This difference is correctly handled elsewhere.
+  { ARM::t2STMIA, ARM::tSTMIA_UPD, 0,          0,   0,   1,   1,  1,1, 0,1,0 },
   { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,   1,   1,  1,1, 0,1,0 },
   { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,   1,   1,  1,1, 0,1,0 }
   };
@@ -210,12 +213,12 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
   for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
-      assert(false && "Duplicated entries?");
+      llvm_unreachable("Duplicated entries?");
   }
 }
 
 static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
-  for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
+  for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
@@ -435,6 +438,14 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     isLdStMul = true;
     break;
   }
+  case ARM::t2STMIA: {
+    // If the base register is killed, we don't care what its value is after the
+    // instruction, so we can use an updating STMIA.
+    if (!MI->getOperand(0).isKill())
+      return false;
+
+    break;
+  }
   case ARM::t2LDMIA_RET: {
     unsigned BaseReg = MI->getOperand(1).getReg();
     if (BaseReg != ARM::SP)
@@ -492,6 +503,12 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
   // Add the 16-bit load / store instruction.
   DebugLoc dl = MI->getDebugLoc();
   MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
+
+  // tSTMIA_UPD takes a defining register operand. We've already checked that
+  // the register is killed, so mark it as dead here.
+  if (Entry.WideOpc == ARM::t2STMIA)
+    MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
+
   if (!isLdStMul) {
     MIB.addOperand(MI->getOperand(0));
     MIB.addOperand(MI->getOperand(1));
@@ -633,10 +650,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
     return false;
 
-  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
-      STI->avoidMOVsShifterOperand())
+  if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     // Don't issue movs with shifter operand for some CPUs unless we
-    // are optimizing / minimizing for size.
+    // are optimizing for size.
     return false;
 
   unsigned Reg0 = MI->getOperand(0).getReg();
@@ -660,11 +676,13 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
     }
   } else if (Reg0 != Reg1) {
     // Try to commute the operands to make it a 2-address instruction.
-    unsigned CommOpIdx1, CommOpIdx2;
+    unsigned CommOpIdx1 = 1;
+    unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
     if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
-        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+        MI->getOperand(CommOpIdx2).getReg() != Reg0)
       return false;
-    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+    MachineInstr *CommutedMI =
+        TII->commuteInstruction(MI, false, CommOpIdx1, CommOpIdx2);
     if (!CommutedMI)
       return false;
   }
@@ -750,10 +768,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
     return false;
 
-  if (!MinimizeSize && !OptimizeSize && Entry.AvoidMovs &&
-      STI->avoidMOVsShifterOperand())
+  if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
     // Don't issue movs with shifter operand for some CPUs unless we
-    // are optimizing / minimizing for size.
+    // are optimizing for size.
     return false;
 
   unsigned Limit = ~0U;
@@ -1012,9 +1029,9 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
 
   TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
 
-  // Optimizing / minimizing size?
-  OptimizeSize = MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
-  MinimizeSize = MF.getFunction()->hasFnAttribute(Attribute::MinSize);
+  // Optimizing / minimizing size? Minimizing size implies optimizing for size.
+  OptimizeSize = MF.getFunction()->optForSize();
+  MinimizeSize = MF.getFunction()->optForMinSize();
 
   BlockInfo.clear();
   BlockInfo.resize(MF.getNumBlockIDs());
diff --git a/lib/Target/AVR/AVR.td b/lib/Target/AVR/AVR.td
new file mode 100644
index 000000000000..9e80717cd680
--- /dev/null
+++ b/lib/Target/AVR/AVR.td
@@ -0,0 +1,563 @@
+//===-- AVR.td - Describe the AVR Target Machine ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+// This is the top level entry point for the AVR target.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===---------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===---------------------------------------------------------------------===//
+// AVR Subtarget Features.
+//===---------------------------------------------------------------------===//
+
+// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details
+// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
+//        In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
+//        avr2 (with SRAM) adds the rest of the variants.
+// :TODO: s/AVRTiny/Tiny
+
+
+// A feature set aggregates features, grouping them. We don't want to create a
+// new member in AVRSubtarget (to store a value) for each set because we do not
+// care if the set is supported, only the subfeatures inside the set. We fix
+// this by simply setting the same dummy member for all feature sets, which is
+// then ignored.
+class FeatureSet<string name, string desc, list<SubtargetFeature> i>
+  : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+
+// A family of microcontrollers, defining a set of supported features.
+class Family<string name, list<SubtargetFeature> i>
+  : FeatureSet<name, !strconcat("The device is a part of the ",
+               name, " family"), i>;
+
+// The device has SRAM, and supports the bare minimum of
+// SRAM-relevant instructions.
+//
+// These are:
+// LD - all 9 variants
+// ST - all 9 variants
+// LDD - two variants for Y and Z
+// STD - two variants for Y and Z
+// `LDS Rd, K`
+// `STS k, Rr`
+// `PUSH`/`POP`
+def FeatureSRAM           : SubtargetFeature<"sram", "m_hasSRAM", "true",
+                                  "The device has random access memory">;
+
+// The device supports the `JMP k` and `CALL k` instructions.
+def FeatureJMPCALL        : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+                                  "The device supports the `JMP` and "
+                                  "`CALL` instructions">;
+
+
+// The device supports the indirect branches `IJMP` and `ICALL`.
+def FeatureIJMPCALL       : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
+                                  "true",
+                                  "The device supports `IJMP`/`ICALL`"
+                                  "instructions">;
+
+// The device supports the extended indirect branches `EIJMP` and `EICALL`.
+def FeatureEIJMPCALL      : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
+                                  "true", "The device supports the "
+                                  "`EIJMP`/`EICALL` instructions">;
+
+// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
+def FeatureADDSUBIW       : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
+                                  "true", "Enable 16-bit register-immediate "
+                                  "addition and subtraction instructions">;
+
+// The device has an 8-bit stack pointer (SP) register.
+def FeatureSmallStack     : SubtargetFeature<"smallstack", "m_hasSmallStack",
+                                  "true", "The device has an 8-bit "
+                                  "stack pointer">;
+
+// The device supports the 16-bit GPR pair MOVW instruction.
+def FeatureMOVW           : SubtargetFeature<"movw", "m_hasMOVW", "true",
+                                  "The device supports the 16-bit MOVW "
+                                  "instruction">;
+
+// The device supports the `LPM` instruction, with implied destination being r0.
+def FeatureLPM            : SubtargetFeature<"lpm", "m_hasLPM", "true",
+                                  "The device supports the `LPM` instruction">;
+
+// The device supports the `LPM Rd, Z[+] instruction.
+def FeatureLPMX           : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+                                  "The device supports the `LPM Rd, Z[+]` "
+                                  "instruction">;
+
+// The device supports the `ELPM` instruction.
+def FeatureELPM           : SubtargetFeature<"elpm", "m_hasELPM", "true",
+                                  "The device supports the ELPM instruction">;
+
+// The device supports the `ELPM Rd, Z[+]` instructions.
+def FeatureELPMX          : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+                                  "The device supports the `ELPM Rd, Z[+]` "
+                                  "instructions">;
+
+// The device supports the `SPM` instruction.
+def FeatureSPM            : SubtargetFeature<"spm", "m_hasSPM", "true",
+                                  "The device supports the `SPM` instruction">;
+
+// The device supports the `SPM Z+` instruction.
+def FeatureSPMX           : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+                                  "The device supports the `SPM Z+` "
+                                  "instruction">;
+
+// The device supports the `DES k` instruction.
+def FeatureDES            : SubtargetFeature<"des", "m_hasDES", "true",
+                                  "The device supports the `DES k` encryption "
+                                  "instruction">;
+
+// The device supports the Read-Write-Modify instructions
+// XCH, LAS, LAC, and LAT.
+def FeatureRMW            : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+                                  "The device supports the read-write-modify "
+                                  "instructions: XCH, LAS, LAC, LAT">;
+
+// The device supports the `[F]MUL[S][U]` family of instructions.
+def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
+                                  "true", "The device supports the "
+                                  "multiplication instructions">;
+
+// The device supports the `BREAK` instruction.
+def FeatureBREAK          : SubtargetFeature<"break", "m_hasBREAK", "true",
+                                  "The device supports the `BREAK` debugging "
+                                  "instruction">;
+
+// The device has instruction encodings specific to the Tiny core.
+def FeatureTinyEncoding   : SubtargetFeature<"tinyencoding",
+                                  "m_hasTinyEncoding", "true",
+                                  "The device has Tiny core specific "
+                                  "instruction encodings">;
+
+class ELFArch<string name>  : SubtargetFeature<"", "ELFArch",
+                                    !strconcat("ELF::",name), "">;
+
+// ELF e_flags architecture values
+def ELFArchAVR1    : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2    : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25   : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3    : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31   : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35   : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4    : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5    : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51   : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6    : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1  : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2  : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3  : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4  : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5  : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6  : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7  : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+
+//===---------------------------------------------------------------------===//
+// AVR Families
+//===---------------------------------------------------------------------===//
+
+// The device has at least the bare minimum that **every** single AVR
+// device should have.
+def FamilyAVR0           : Family<"avr0", []>;
+
+def FamilyAVR1           : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
+
+def FamilyAVR2           : Family<"avr2",
+                                 [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
+                                  FeatureSRAM]>;
+
+def FamilyAVR25          : Family<"avr25",
+                                 [FamilyAVR2, FeatureMOVW, FeatureLPMX,
+                                  FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR3           : Family<"avr3",
+                                 [FamilyAVR2, FeatureJMPCALL]>;
+
+def FamilyAVR31          : Family<"avr31",
+                                 [FamilyAVR3, FeatureELPM]>;
+
+def FamilyAVR35          : Family<"avr35",
+                                 [FamilyAVR3, FeatureMOVW, FeatureLPMX,
+                                  FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR4           : Family<"avr4",
+                                 [FamilyAVR2, FeatureMultiplication,
+                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
+                                  FeatureBREAK]>;
+
+def FamilyAVR5           : Family<"avr5",
+                                 [FamilyAVR3, FeatureMultiplication,
+                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
+                                  FeatureBREAK]>;
+
+def FamilyAVR51          : Family<"avr51",
+                                 [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+
+def FamilyAVR6           : Family<"avr6",
+                                 [FamilyAVR51]>;
+
+def FamilyAVRTiny        : Family<"avrtiny",
+                                 [FamilyAVR0, FeatureBREAK, FeatureSRAM,
+                                  FeatureTinyEncoding]>;
+
+def FamilyXMEGA          : Family<"xmega",
+                                 [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
+                                  FeatureDES]>;
+
+def FamilyXMEGAU         : Family<"xmegau",
+                                  [FamilyXMEGA, FeatureRMW]>;
+
+def FeatureSetSpecial    : FeatureSet<"special",
+                                      "Enable use of the entire instruction "
+                                      "set - used for debugging",
+                                      [FeatureSRAM, FeatureJMPCALL,
+                                       FeatureIJMPCALL, FeatureEIJMPCALL,
+                                       FeatureADDSUBIW, FeatureMOVW,
+                                       FeatureLPM, FeatureLPMX, FeatureELPM,
+                                       FeatureELPMX, FeatureSPM, FeatureSPMX,
+                                       FeatureDES, FeatureRMW,
+                                       FeatureMultiplication, FeatureBREAK]>;
+
+//===---------------------------------------------------------------------===//
+// AVR microcontrollers supported.
+//===---------------------------------------------------------------------===//
+
+class Device<string Name, Family Fam, ELFArch Arch,
+             list<SubtargetFeature> ExtraFeatures = []>
+  : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+
+// Generic MCUs
+// Note that several versions of GCC has strange ELF architecture
+// settings for backwards compatibility - see `gas/config/tc-avr.c`
+// in AVR binutils. We do not replicate this.
+def : Device<"avr1",      FamilyAVR1,    ELFArchAVR1>;
+def : Device<"avr2",      FamilyAVR2,    ELFArchAVR2>;
+def : Device<"avr25",     FamilyAVR25,   ELFArchAVR25>;
+def : Device<"avr3",      FamilyAVR3,    ELFArchAVR3>;
+def : Device<"avr31",     FamilyAVR31,   ELFArchAVR31>;
+def : Device<"avr35",     FamilyAVR35,   ELFArchAVR35>;
+def : Device<"avr4",      FamilyAVR4,    ELFArchAVR4>;
+def : Device<"avr5",      FamilyAVR5,    ELFArchAVR5>;
+def : Device<"avr51",     FamilyAVR51,   ELFArchAVR51>;
+def : Device<"avr6",      FamilyAVR6,    ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA,   ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA,   ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA,   ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA,   ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA,   ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA,   ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA,   ELFArchXMEGA7>;
+def : Device<"avrtiny",   FamilyAVRTiny, ELFArchAVRTiny>;
+
+// Specific MCUs
+def : Device<"at90s1200",          FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22",           FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26",           FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401",          FamilyAVR2, ELFArchAVR25,
+             [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272",            FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a",        FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355",         FamilyAVR3,  ELFArchAVR3>;
+def : Device<"at76c711",           FamilyAVR3,  ELFArchAVR3>;
+def : Device<"atmega103",          FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320",         FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505",            FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8",            FamilyAVR4,  ELFArchAVR4>; // FIXME: family may be wrong
+def : Device<"ata6289",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega8a",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata6285",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata6286",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48a",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48pa",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48p",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88a",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88p",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88pa",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega8515",         FamilyAVR2,  ELFArchAVR4,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8535",         FamilyAVR2,  ELFArchAVR4,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8hva",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm1",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm2",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm2b",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm3",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm3b",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm81",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata5790",            FamilyAVR5,  ELFArchAVR5>;
+def : Device<"ata5795",            FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega161",          FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega162",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega163",          FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega164a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega164p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega164pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega323",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250pa",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega328",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega328p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290pa",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega406",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega640",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64rfr2",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644rfr2",      FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hva",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hva2",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hvb",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32hvb",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64hve",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90can32",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90can64",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm161",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm216",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm316",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32c1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64c1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16u4",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32u4",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32u6",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90usb646",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90usb647",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90scr100",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at94k",              FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
+def : Device<"m3000",              FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega128",          FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1",      FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2",      FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2",     FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560",         FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega2561",         FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega256rfr2",      FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega2564rfr2",     FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atxmega16a4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4",        FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4",        FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16e5",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega8e5",         FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32x1",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u",       FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u",       FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1",        FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u",       FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b",      FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu",     FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1",       FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u",      FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u",      FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny5",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny9",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny10",           FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny20",           FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny40",           FamilyAVRTiny, ELFArchAVRTiny>;
+
+//===---------------------------------------------------------------------===//
+// Register File Description
+//===---------------------------------------------------------------------===//
+
+include "AVRRegisterInfo.td"
+
+//===---------------------------------------------------------------------===//
+// Instruction Descriptions
+//===---------------------------------------------------------------------===//
+
+//include "AVRInstrInfo.td"
+
+//def AVRInstrInfo : InstrInfo;
+
+//===---------------------------------------------------------------------===//
+// Calling Conventions
+//===---------------------------------------------------------------------===//
+
+include "AVRCallingConv.td"
+
+//===---------------------------------------------------------------------===//
+// Assembly Printers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmWriter : AsmWriter {
+//  string AsmWriterClassName = "InstPrinter";
+//  bit isMCAsmWriter = 1;
+// }
+
+//===---------------------------------------------------------------------===//
+// Assembly Parsers
+//===---------------------------------------------------------------------===//
+
+// def AVRAsmParser : AsmParser {
+//   let ShouldEmitMatchRegisterName = 1;
+//   let ShouldEmitMatchRegisterAltName = 1;
+// }
+
+// def AVRAsmParserVariant : AsmParserVariant {
+//   int Variant = 0;
+//
+//   // Recognize hard coded registers.
+//   string RegisterPrefix = "$";
+// }
+
+//===---------------------------------------------------------------------===//
+// Target Declaration
+//===---------------------------------------------------------------------===//
+
+def AVR : Target {
+//   let InstructionSet         = AVRInstrInfo;
+//   let AssemblyWriters        = [AVRAsmWriter];
+//
+//   let AssemblyParsers        = [AVRAsmParser];
+//   let AssemblyParserVariants = [AVRAsmParserVariant];
+}
+
diff --git a/lib/Target/AVR/AVRCallingConv.td b/lib/Target/AVR/AVRCallingConv.td
new file mode 100644
index 000000000000..d8cb3fe84022
--- /dev/null
+++ b/lib/Target/AVR/AVRCallingConv.td
@@ -0,0 +1,65 @@
+//===-- AVRCallingConv.td - Calling Conventions for AVR ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for AVR architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AVR Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_AVR : CallingConv
+<[
+  // i8 is returned in R24.
+  CCIfType<[i8], CCAssignToReg<[R24]>>,
+
+  // i16 are returned in R25:R24, R23:R22, R21:R20 and R19:R18.
+  CCIfType<[i16], CCAssignToReg<[R25R24, R23R22, R21R20, R19R18]>>
+]>;
+
+// Special return value calling convention for runtime functions.
+def RetCC_AVR_RT : CallingConv
+<[
+  CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
+  CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// AVR Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// The calling conventions are implemented in custom C++ code
+
+// Calling convention for variadic functions.
+def ArgCC_AVR_Vararg : CallingConv
+<[
+  // i16 are always passed through the stack with an alignment of 1.
+  CCAssignToStack<2, 1>
+]>;
+
+// Special argument calling convention for
+// multiplication runtime functions.
+def ArgCC_AVR_RT_MUL : CallingConv
+<[
+  CCIfType<[i16], CCAssignToReg<[R27R26,R19R18]>>
+]>;
+
+// Special argument calling convention for
+// division runtime functions.
+def ArgCC_AVR_RT_DIV : CallingConv
+<[
+  CCIfType<[i8], CCAssignToReg<[R24,R22]>>,
+  CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Callee-saved register lists.
+//===----------------------------------------------------------------------===//
+
+def CSR_Normal : CalleeSavedRegs<(add R29, R28, (sequence "R%u", 17, 2))>;
+def CSR_Interrupts : CalleeSavedRegs<(add (sequence "R%u", 31, 0))>;
diff --git a/lib/Target/AVR/AVRConfig.h b/lib/Target/AVR/AVRConfig.h
new file mode 100644
index 000000000000..65588bc50840
--- /dev/null
+++ b/lib/Target/AVR/AVRConfig.h
@@ -0,0 +1,15 @@
+//===-- AVRConfig.h - AVR Backend Configuration Header ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_CONFIG_H
+#define LLVM_AVR_CONFIG_H
+
+#define LLVM_AVR_GCC_COMPAT
+
+#endif // LLVM_AVR_CONFIG_H
diff --git a/lib/Target/AVR/AVRMachineFunctionInfo.h b/lib/Target/AVR/AVRMachineFunctionInfo.h
new file mode 100644
index 000000000000..6571d5d3e603
--- /dev/null
+++ b/lib/Target/AVR/AVRMachineFunctionInfo.h
@@ -0,0 +1,73 @@
+//===-- AVRMachineFuctionInfo.h - AVR machine function info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AVR-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_MACHINE_FUNCTION_INFO_H
+#define LLVM_AVR_MACHINE_FUNCTION_INFO_H
+
+#include "AVRConfig.h"
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/**
+ * Contains AVR-specific information for each MachineFunction.
+ */
+class AVRMachineFunctionInfo : public MachineFunctionInfo {
+  /// Indicates if a register has been spilled by the register
+  /// allocator.
+  bool HasSpills;
+
+  /// Indicates if there are any fixed size allocas present.
+  /// Note that if there are only variable sized allocas this is set to false.
+  bool HasAllocas;
+
+  /// Indicates if arguments passed using the stack are being
+  /// used inside the function.
+  bool HasStackArgs;
+
+  /// Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+public:
+  AVRMachineFunctionInfo()
+      : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+        CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+  explicit AVRMachineFunctionInfo(MachineFunction &MF)
+      : HasSpills(false), HasAllocas(false), HasStackArgs(false),
+        CalleeSavedFrameSize(0), VarArgsFrameIndex(0) {}
+
+  bool getHasSpills() const { return HasSpills; }
+  void setHasSpills(bool B) { HasSpills = B; }
+
+  bool getHasAllocas() const { return HasAllocas; }
+  void setHasAllocas(bool B) { HasAllocas = B; }
+
+  bool getHasStackArgs() const { return HasStackArgs; }
+  void setHasStackArgs(bool B) { HasStackArgs = B; }
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned Bytes) { CalleeSavedFrameSize = Bytes; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+};
+
+} // end llvm namespace
+
+#endif // LLVM_AVR_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/AVR/AVRRegisterInfo.td b/lib/Target/AVR/AVRRegisterInfo.td
new file mode 100644
index 000000000000..32650fc66751
--- /dev/null
+++ b/lib/Target/AVR/AVRRegisterInfo.td
@@ -0,0 +1,216 @@
+//===-- AVRRegisterInfo.td - AVR Register defs -------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the AVR register file
+//===----------------------------------------------------------------------===//
+
+// 8-bit General purpose register definition.
+class AVRReg<bits<16> num,
+             string name,
+             list<Register> subregs = [],
+             list<string> altNames = []>
+  : RegisterWithSubRegs<name, subregs>
+{
+  field bits<16> Num = num;
+
+  let HWEncoding = num;
+  let Namespace = "AVR";
+  let SubRegs = subregs;
+  let AltNames = altNames;
+}
+
+// Subregister indices.
+let Namespace = "AVR" in
+{
+  def sub_lo : SubRegIndex<8>;
+  def sub_hi : SubRegIndex<8, 8>;
+}
+
+let Namespace = "AVR" in {
+  def ptr : RegAltNameIndex;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  8-bit general purpose registers
+//===----------------------------------------------------------------------===//
+
+def R0  : AVRReg<0,  "r0">,  DwarfRegNum<[0]>;
+def R1  : AVRReg<1,  "r1">,  DwarfRegNum<[1]>;
+def R2  : AVRReg<2,  "r2">,  DwarfRegNum<[2]>;
+def R3  : AVRReg<3,  "r3">,  DwarfRegNum<[3]>;
+def R4  : AVRReg<4,  "r4">,  DwarfRegNum<[4]>;
+def R5  : AVRReg<5,  "r5">,  DwarfRegNum<[5]>;
+def R6  : AVRReg<6,  "r6">,  DwarfRegNum<[6]>;
+def R7  : AVRReg<7,  "r7">,  DwarfRegNum<[7]>;
+def R8  : AVRReg<8,  "r8">,  DwarfRegNum<[8]>;
+def R9  : AVRReg<9,  "r9">,  DwarfRegNum<[9]>;
+def R10 : AVRReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : AVRReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : AVRReg<12, "r12">, DwarfRegNum<[12]>;
+def R13 : AVRReg<13, "r13">, DwarfRegNum<[13]>;
+def R14 : AVRReg<14, "r14">, DwarfRegNum<[14]>;
+def R15 : AVRReg<15, "r15">, DwarfRegNum<[15]>;
+def R16 : AVRReg<16, "r16">, DwarfRegNum<[16]>;
+def R17 : AVRReg<17, "r17">, DwarfRegNum<[17]>;
+def R18 : AVRReg<18, "r18">, DwarfRegNum<[18]>;
+def R19 : AVRReg<19, "r19">, DwarfRegNum<[19]>;
+def R20 : AVRReg<20, "r20">, DwarfRegNum<[20]>;
+def R21 : AVRReg<21, "r21">, DwarfRegNum<[21]>;
+def R22 : AVRReg<22, "r22">, DwarfRegNum<[22]>;
+def R23 : AVRReg<23, "r23">, DwarfRegNum<[23]>;
+def R24 : AVRReg<24, "r24">, DwarfRegNum<[24]>;
+def R25 : AVRReg<25, "r25">, DwarfRegNum<[25]>;
+def R26 : AVRReg<26, "r26">, DwarfRegNum<[26]>;
+def R27 : AVRReg<27, "r27">, DwarfRegNum<[27]>;
+def R28 : AVRReg<28, "r28">, DwarfRegNum<[28]>;
+def R29 : AVRReg<29, "r29">, DwarfRegNum<[29]>;
+def R30 : AVRReg<30, "r30">, DwarfRegNum<[30]>;
+def R31 : AVRReg<31, "r31">, DwarfRegNum<[31]>;
+def SPL : AVRReg<32, "SPL">, DwarfRegNum<[32]>;
+def SPH : AVRReg<33, "SPH">, DwarfRegNum<[33]>;
+
+let SubRegIndices = [sub_lo, sub_hi],
+CoveredBySubRegs = 1 in
+{
+  // 16 bit GPR pairs.
+  def SP     : AVRReg<32, "SP",      [SPL, SPH]>, DwarfRegNum<[32]>;
+
+  // The pointer registers (X,Y,Z) are a special case because they
+  // are printed as a `high:low` pair when a DREG is expected,
+  // but printed using `X`, `Y`, `Z` when a pointer register is expected.
+  let RegAltNameIndices = [ptr] in {
+      def R31R30 : AVRReg<30, "r31:r30", [R30, R31], ["Z"]>, DwarfRegNum<[30]>;
+      def R29R28 : AVRReg<28, "r29:r28", [R28, R29], ["Y"]>, DwarfRegNum<[28]>;
+      def R27R26 : AVRReg<26, "r27:r26", [R26, R27], ["X"]>, DwarfRegNum<[26]>;
+  }
+  def R25R24 : AVRReg<24, "r25:r24", [R24, R25]>, DwarfRegNum<[24]>;
+  def R23R22 : AVRReg<22, "r23:r22", [R22, R23]>, DwarfRegNum<[22]>;
+  def R21R20 : AVRReg<20, "r21:r20", [R20, R21]>, DwarfRegNum<[20]>;
+  def R19R18 : AVRReg<18, "r19:r18", [R18, R19]>, DwarfRegNum<[18]>;
+  def R17R16 : AVRReg<16, "r17:r16", [R16, R17]>, DwarfRegNum<[16]>;
+  def R15R14 : AVRReg<14, "r15:r14", [R14, R15]>, DwarfRegNum<[14]>;
+  def R13R12 : AVRReg<12, "r13:r12", [R12, R13]>, DwarfRegNum<[12]>;
+  def R11R10 : AVRReg<10, "r11:r10", [R10, R11]>, DwarfRegNum<[10]>;
+  def R9R8   : AVRReg<8,  "r9:r8",   [R8, R9]>,   DwarfRegNum<[8]>;
+  def R7R6   : AVRReg<6,  "r7:r6",   [R6, R7]>,   DwarfRegNum<[6]>;
+  def R5R4   : AVRReg<4,  "r5:r4",   [R4, R5]>,   DwarfRegNum<[4]>;
+  def R3R2   : AVRReg<2,  "r3:r2",   [R2, R3]>,   DwarfRegNum<[2]>;
+  def R1R0   : AVRReg<0,  "r1:r0",   [R0, R1]>,   DwarfRegNum<[0]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+//:TODO: use proper set instructions instead of using always "add"
+
+// Main 8-bit register class.
+def GPR8 : RegisterClass<"AVR", [i8], 8,
+  (
+    // Return value and argument registers.
+    add R24, R25, R18, R19, R20, R21, R22, R23,
+    // Scratch registers.
+    R30, R31, R26, R27,
+    // Callee saved registers.
+    R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
+    R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+  )>;
+
+// Simple lower registers r0..r15
+def GPR8lo : RegisterClass<"AVR", [i8], 8,
+  (
+    add R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R0, R1
+  )>;
+
+// 8-bit register class for instructions which take immediates.
+def LD8 : RegisterClass<"AVR", [i8], 8,
+  (
+    // Return value and arguments.
+    add R24, R25, R18, R19, R20, R21, R22, R23,
+    // Scratch registers.
+    R30, R31, R26, R27,
+    // Callee saved registers.
+    R28, R29, R17, R16
+  )>;
+
+// Simple lower registers r16..r23
+def LD8lo : RegisterClass<"AVR", [i8], 8,
+  (
+    add R23, R22, R21, R20, R19, R18, R17, R16
+  )>;
+
+// Main 16-bit pair register class.
+def DREGS : RegisterClass<"AVR", [i16], 8,
+  (
+    // Return value and arguments.
+    add R25R24, R19R18, R21R20, R23R22,
+    // Scratch registers.
+    R31R30, R27R26,
+    // Callee saved registers.
+    R29R28, R17R16, R15R14, R13R12, R11R10,
+    R9R8, R7R6, R5R4, R3R2, R1R0
+  )>;
+
+// 16-bit register class for immediate instructions.
+def DLDREGS : RegisterClass<"AVR", [i16], 8,
+  (
+    // Return value and arguments.
+    add R25R24, R19R18, R21R20, R23R22,
+    // Scratch registers.
+    R31R30, R27R26,
+    // Callee saved registers.
+    R29R28, R17R16
+  )>;
+
+// 16-bit register class for the adiw/sbiw instructions.
+def IWREGS : RegisterClass<"AVR", [i16], 8,
+  (
+    // Return value and arguments.
+    add R25R24,
+    // Scratch registers.
+    R31R30, R27R26,
+    // Callee saved registers.
+    R29R28
+  )>;
+
+// 16-bit register class for the ld and st instructions.
+// AKA X,Y, and Z
+def PTRREGS : RegisterClass<"AVR", [i16], 8,
+  (
+    add R27R26, // X
+        R29R28, // Y
+        R31R30  // Z
+  ), ptr>;
+
+// 16-bit register class for the ldd and std instructions.
+// AKA Y and Z.
+def PTRDISPREGS : RegisterClass<"AVR", [i16], 8,
+  (
+    add R31R30, R29R28
+  ), ptr>;
+
+// We have a bunch of instructions with an explicit Z register argument. We
+// model this using a register class containing only the Z register.
+// :TODO: Rename to 'ZREG'.
+def ZREGS : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
+
+// Register class used for the stack read pseudo instruction.
+def GPRSP: RegisterClass<"AVR", [i16], 8, (add SP)>;
+
+//:TODO: if we remove this we get an error in tablegen
+//:TODO: this is just a hack, remove it once add16 works!
+// Status register.
+def SREG : AVRReg<14, "FLAGS">, DwarfRegNum<[88]>;
+def CCR : RegisterClass<"AVR", [i8], 8, (add SREG)>
+{
+  let CopyCost = -1;      // Don't allow copying of status registers
+}
+
diff --git a/lib/Target/AVR/AVRTargetMachine.cpp b/lib/Target/AVR/AVRTargetMachine.cpp
new file mode 100644
index 000000000000..a91dce8a63f4
--- /dev/null
+++ b/lib/Target/AVR/AVRTargetMachine.cpp
@@ -0,0 +1,4 @@
+
+extern "C" void LLVMInitializeAVRTarget() {
+
+}
diff --git a/lib/Target/AVR/CMakeLists.txt b/lib/Target/AVR/CMakeLists.txt
new file mode 100644
index 000000000000..22b30ef35855
--- /dev/null
+++ b/lib/Target/AVR/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_TARGET_DEFINITIONS AVR.td)
+
+tablegen(LLVM AVRGenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AVRGenCallingConv.inc -gen-callingconv)
+add_public_tablegen_target(AVRCommonTableGen)
+
+add_llvm_target(AVRCodeGen
+    AVRTargetMachine.cpp
+  )
+
+add_dependencies(LLVMAVRCodeGen intrinsics_gen)
+
+add_subdirectory(TargetInfo)
+
diff --git a/lib/Target/AVR/LLVMBuild.txt b/lib/Target/AVR/LLVMBuild.txt
new file mode 100644
index 000000000000..386c594b20ec
--- /dev/null
+++ b/lib/Target/AVR/LLVMBuild.txt
@@ -0,0 +1,33 @@
+;===- ./lib/Target/AVR/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = TargetInfo
+
+[component_0]
+type = TargetGroup
+name = AVR
+parent = Target
+has_asmprinter = 0
+has_asmparser = 0
+
+[component_1]
+type = Library
+name = AVRCodeGen
+parent = AVR
+required_libraries = AsmPrinter CodeGen Core MC AVRInfo SelectionDAG Support Target
+add_to_library_groups = AVR
diff --git a/lib/Target/AVR/Makefile b/lib/Target/AVR/Makefile
new file mode 100644
index 000000000000..c91b6f5c0ae9
--- /dev/null
+++ b/lib/Target/AVR/Makefile
@@ -0,0 +1,19 @@
+##===- lib/Target/AVR/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAVRCodeGen
+TARGET = AVR
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AVRGenRegisterInfo.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
new file mode 100644
index 000000000000..c0e0d20029c2
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -0,0 +1,25 @@
+//===-- AVRTargetInfo.cpp - AVR Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+
+namespace llvm {
+Target TheAVRTarget;
+}
+
+extern "C" void LLVMInitializeAVRTargetInfo() {
+  llvm::RegisterTarget<llvm::Triple::avr> X(
+      llvm::TheAVRTarget, "avr", "Atmel AVR Microcontroller");
+}
+
+// FIXME: Temporary stub - this function must be defined for linking
+// to succeed. Remove once this function is properly implemented.
+extern "C" void LLVMInitializeAVRTargetMC() {
+}
diff --git a/lib/Target/AVR/TargetInfo/CMakeLists.txt b/lib/Target/AVR/TargetInfo/CMakeLists.txt
new file mode 100644
index 000000000000..f27090037702
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/..
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAVRInfo
+  AVRTargetInfo.cpp
+)
+
diff --git a/lib/Target/AVR/TargetInfo/LLVMBuild.txt b/lib/Target/AVR/TargetInfo/LLVMBuild.txt
new file mode 100644
index 000000000000..bc6e0ad2ee19
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/AVR/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = AVRInfo
+parent = AVR
+required_libraries = MC Support
+add_to_library_groups = AVR
\ No newline at end of file
diff --git a/lib/Target/AVR/TargetInfo/Makefile b/lib/Target/AVR/TargetInfo/Makefile
new file mode 100644
index 000000000000..92b483dd028b
--- /dev/null
+++ b/lib/Target/AVR/TargetInfo/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/AVR/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAVRInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/BPF/BPF.td b/lib/Target/BPF/BPF.td
index a4ce90af0439..8493b0fd1e4a 100644
--- a/lib/Target/BPF/BPF.td
+++ b/lib/Target/BPF/BPF.td
@@ -25,7 +25,14 @@ def BPFInstPrinter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def BPFAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string Name = "BPF";
+  string BreakCharacters = ".";
+}
+
 def BPF : Target {
   let InstructionSet = BPFInstrInfo;
   let AssemblyWriters = [BPFInstPrinter];
+  let AssemblyParserVariants = [BPFAsmParserVariant];
 }
diff --git a/lib/Target/BPF/BPFISelLowering.cpp b/lib/Target/BPF/BPFISelLowering.cpp
index 73418283d9bf..6a5b37e153d8 100644
--- a/lib/Target/BPF/BPFISelLowering.cpp
+++ b/lib/Target/BPF/BPFISelLowering.cpp
@@ -547,8 +547,7 @@ BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   // to set, the condition code register to branch on, the true/false values to
   // select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator I = BB;
-  ++I;
+  MachineFunction::iterator I = ++BB->getIterator();
 
   // ThisMBB:
   // ...
diff --git a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
index adcaff686933..4276d0858c2e 100644
--- a/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
+++ b/lib/Target/BPF/InstPrinter/BPFInstPrinter.h
@@ -17,8 +17,6 @@
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
-class MCOperand;
-
 class BPFInstPrinter : public MCInstPrinter {
 public:
   BPFInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 36f99262ed70..8c358cab62e8 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -68,16 +68,23 @@ void BPFAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
 
   if (Fixup.getKind() == FK_SecRel_4 || Fixup.getKind() == FK_SecRel_8) {
     assert(Value == 0);
-    return;
-  }
-  assert(Fixup.getKind() == FK_PCRel_2);
-  Value = (uint16_t)((Value - 8) / 8);
-  if (IsLittleEndian) {
-    Data[Fixup.getOffset() + 2] = Value & 0xFF;
-    Data[Fixup.getOffset() + 3] = Value >> 8;
+  } else if (Fixup.getKind() == FK_Data_4 || Fixup.getKind() == FK_Data_8) {
+    unsigned Size = Fixup.getKind() == FK_Data_4 ? 4 : 8;
+
+    for (unsigned i = 0; i != Size; ++i) {
+      unsigned Idx = IsLittleEndian ? i : Size - i;
+      Data[Fixup.getOffset() + Idx] = uint8_t(Value >> (i * 8));
+    }
   } else {
-    Data[Fixup.getOffset() + 2] = Value >> 8;
-    Data[Fixup.getOffset() + 3] = Value & 0xFF;
+    assert(Fixup.getKind() == FK_PCRel_2);
+    Value = (uint16_t)((Value - 8) / 8);
+    if (IsLittleEndian) {
+      Data[Fixup.getOffset() + 2] = Value & 0xFF;
+      Data[Fixup.getOffset() + 3] = Value >> 8;
+    } else {
+      Data[Fixup.getOffset() + 2] = Value >> 8;
+      Data[Fixup.getOffset() + 3] = Value & 0xFF;
+    }
   }
 }
 
diff --git a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 05ba6183e322..87cdd5eb9dad 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -44,6 +44,10 @@ unsigned BPFELFObjectWriter::GetRelocType(const MCValue &Target,
     return ELF::R_X86_64_64;
   case FK_SecRel_4:
     return ELF::R_X86_64_PC32;
+  case FK_Data_8:
+    return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+  case FK_Data_4:
+    return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
   }
 }
 
diff --git a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index d63bbf49294e..1f440fe87871 100644
--- a/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -34,6 +34,8 @@ public:
     UsesELFSectionDirectiveForBSS = true;
     HasSingleParameterDotFile = false;
     HasDotTypeDotSizeDirective = false;
+
+    SupportsDebugInformation = true;
   }
 };
 }
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 272688edb8a1..5ea6551ebc9c 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -551,7 +551,8 @@ void CppWriter::printAttributes(const AttributeSet &PAL,
 void CppWriter::printType(Type* Ty) {
   // We don't print definitions for primitive types
   if (Ty->isFloatingPointTy() || Ty->isX86_MMXTy() || Ty->isIntegerTy() ||
-      Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy())
+      Ty->isLabelTy() || Ty->isMetadataTy() || Ty->isVoidTy() ||
+      Ty->isTokenTy())
     return;
 
   // If we already defined this type, we don't need to define it again.
@@ -1355,23 +1356,18 @@ void CppWriter::printInstruction(const Instruction *I,
   }
   case Instruction::GetElementPtr: {
     const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
-    if (gep->getNumOperands() <= 2) {
-      Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
-          << opNames[0];
-      if (gep->getNumOperands() == 2)
-        Out << ", " << opNames[1];
-    } else {
-      Out << "std::vector<Value*> " << iName << "_indices;";
-      nl(Out);
-      for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
-        Out << iName << "_indices.push_back("
-            << opNames[i] << ");";
-        nl(Out);
+    Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+        << getCppName(gep->getSourceElementType()) << ", " << opNames[0] << ", {";
+    in();
+    for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+      if (i != 1) {
+        Out << ", ";
       }
-      Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
-          << opNames[0] << ", " << iName << "_indices";
+      nl(Out);
+      Out << opNames[i];
     }
-    Out << ", \"";
+    out();
+    nl(Out) << "}, \"";
     printEscapedString(gep->getName());
     Out << "\", " << bbname << ");";
     break;
@@ -1803,13 +1799,12 @@ void CppWriter::printFunctionBody(const Function *F) {
           << "->arg_begin();";
       nl(Out);
     }
-    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-         AI != AE; ++AI) {
-      Out << "Value* " << getCppName(AI) << " = args++;";
+    for (const Argument &AI : F->args()) {
+      Out << "Value* " << getCppName(&AI) << " = args++;";
       nl(Out);
-      if (AI->hasName()) {
-        Out << getCppName(AI) << "->setName(\"";
-        printEscapedString(AI->getName());
+      if (AI.hasName()) {
+        Out << getCppName(&AI) << "->setName(\"";
+        printEscapedString(AI.getName());
         Out << "\");";
         nl(Out);
       }
@@ -1818,29 +1813,25 @@ void CppWriter::printFunctionBody(const Function *F) {
 
   // Create all the basic blocks
   nl(Out);
-  for (Function::const_iterator BI = F->begin(), BE = F->end();
-       BI != BE; ++BI) {
-    std::string bbname(getCppName(BI));
+  for (const BasicBlock &BI : *F) {
+    std::string bbname(getCppName(&BI));
     Out << "BasicBlock* " << bbname <<
            " = BasicBlock::Create(mod->getContext(), \"";
-    if (BI->hasName())
-      printEscapedString(BI->getName());
-    Out << "\"," << getCppName(BI->getParent()) << ",0);";
+    if (BI.hasName())
+      printEscapedString(BI.getName());
+    Out << "\"," << getCppName(BI.getParent()) << ",0);";
     nl(Out);
   }
 
   // Output all of its basic blocks... for the function
-  for (Function::const_iterator BI = F->begin(), BE = F->end();
-       BI != BE; ++BI) {
-    std::string bbname(getCppName(BI));
-    nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+  for (const BasicBlock &BI : *F) {
+    std::string bbname(getCppName(&BI));
+    nl(Out) << "// Block " << BI.getName() << " (" << bbname << ")";
     nl(Out);
 
     // Output all of the instructions in the basic block...
-    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
-         I != E; ++I) {
-      printInstruction(I,bbname);
-    }
+    for (const Instruction &I : BI)
+      printInstruction(&I, bbname);
   }
 
   // Loop over the ForwardRefs and resolve them now that all instructions
@@ -1883,7 +1874,7 @@ void CppWriter::printInline(const std::string& fname,
   printFunctionUses(F);
   printFunctionBody(F);
   is_inline = false;
-  Out << "return " << getCppName(F->begin()) << ";";
+  Out << "return " << getCppName(&F->front()) << ";";
   nl(Out) << "}";
   nl(Out);
 }
@@ -1896,17 +1887,14 @@ void CppWriter::printModuleBody() {
   // Functions can call each other and global variables can reference them so
   // define all the functions first before emitting their function bodies.
   nl(Out) << "// Function Declarations"; nl(Out);
-  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
-       I != E; ++I)
-    printFunctionHead(I);
+  for (const Function &I : *TheModule)
+    printFunctionHead(&I);
 
   // Process the global variables declarations. We can't initialze them until
   // after the constants are printed so just print a header for each global
   nl(Out) << "// Global Variable Declarations\n"; nl(Out);
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    printVariableHead(I);
-  }
+  for (const GlobalVariable &I : TheModule->globals())
+    printVariableHead(&I);
 
   // Print out all the constants definitions. Constants don't recurse except
   // through GlobalValues. All GlobalValues have been declared at this point
@@ -1918,21 +1906,18 @@ void CppWriter::printModuleBody() {
   // been emitted. These definitions just couple the gvars with their constant
   // initializers.
   nl(Out) << "// Global Variable Definitions"; nl(Out);
-  for (Module::const_global_iterator I = TheModule->global_begin(),
-         E = TheModule->global_end(); I != E; ++I) {
-    printVariableBody(I);
-  }
+  for (const GlobalVariable &I : TheModule->globals())
+    printVariableBody(&I);
 
   // Finally, we can safely put out all of the function bodies.
   nl(Out) << "// Function Definitions"; nl(Out);
-  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
-       I != E; ++I) {
-    if (!I->isDeclaration()) {
-      nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+  for (const Function &I : *TheModule) {
+    if (!I.isDeclaration()) {
+      nl(Out) << "// Function: " << I.getName() << " (" << getCppName(&I)
               << ")";
       nl(Out) << "{";
       nl(Out,1);
-      printFunctionBody(I);
+      printFunctionBody(&I);
       nl(Out,-1) << "}";
       nl(Out);
     }
diff --git a/lib/Target/Hexagon/AsmParser/CMakeLists.txt b/lib/Target/Hexagon/AsmParser/CMakeLists.txt
new file mode 100644
index 000000000000..bbfa92d59628
--- /dev/null
+++ b/lib/Target/Hexagon/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMHexagonAsmParser
+  HexagonAsmParser.cpp
+  )
+
+add_dependencies( LLVMHexagonAsmParser HexagonCommonTableGen )
diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
new file mode 100644
index 000000000000..a8622a96527c
--- /dev/null
+++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -0,0 +1,2152 @@
+//===-- HexagonAsmParser.cpp - Parse Hexagon asm to MCInst instructions----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcasmparser"
+
+#include "Hexagon.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetStreamer.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
+#include "MCTargetDesc/HexagonMCExpr.h"
+#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonShuffler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+
+using namespace llvm;
+
+static cl::opt<bool> EnableFutureRegs("mfuture-regs",
+                                      cl::desc("Enable future registers"));
+
+static cl::opt<bool> WarnMissingParenthesis("mwarn-missing-parenthesis",
+cl::desc("Warn for missing parenthesis around predicate registers"),
+cl::init(true));
+static cl::opt<bool> ErrorMissingParenthesis("merror-missing-parenthesis",
+cl::desc("Error for missing parenthesis around predicate registers"),
+cl::init(false));
+static cl::opt<bool> WarnSignedMismatch("mwarn-sign-mismatch",
+cl::desc("Warn for mismatching a signed and unsigned value"),
+cl::init(true));
+static cl::opt<bool> WarnNoncontigiousRegister("mwarn-noncontigious-register",
+cl::desc("Warn for register names that arent contigious"),
+cl::init(true));
+static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
+cl::desc("Error for register names that aren't contigious"),
+cl::init(false));
+
+
+namespace {
+struct HexagonOperand;
+
+class HexagonAsmParser : public MCTargetAsmParser {
+
+  HexagonTargetStreamer &getTargetStreamer() {
+    MCTargetStreamer &TS = *Parser.getStreamer().getTargetStreamer();
+    return static_cast<HexagonTargetStreamer &>(TS);
+  }
+
+  MCAsmParser &Parser;
+  MCAssembler *Assembler;
+  MCInstrInfo const &MCII;
+  MCInst MCB;
+  bool InBrackets;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAssembler *getAssembler() const { return Assembler; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  bool equalIsAsmAssignment() override { return false; }
+  bool isLabel(AsmToken &Token) override;
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+  bool ParseDirectiveFalign(unsigned Size, SMLoc L);
+
+  virtual bool ParseRegister(unsigned &RegNo,
+                             SMLoc &StartLoc,
+                             SMLoc &EndLoc) override;
+  bool ParseDirectiveSubsection(SMLoc L);
+  bool ParseDirectiveValue(unsigned Size, SMLoc L);
+  bool ParseDirectiveComm(bool IsLocal, SMLoc L);
+  bool RegisterMatchesArch(unsigned MatchNum) const;
+
+  bool matchBundleOptions();
+  bool handleNoncontigiousRegister(bool Contigious, SMLoc &Loc);
+  bool finishBundle(SMLoc IDLoc, MCStreamer &Out);
+  void canonicalizeImmediates(MCInst &MCI);
+  bool matchOneInstruction(MCInst &MCB, SMLoc IDLoc,
+                           OperandVector &InstOperands, uint64_t &ErrorInfo,
+                           bool MatchingInlineAsm, bool &MustExtend);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
+  void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+  int processInstruction(MCInst &Inst, OperandVector const &Operands,
+                         SMLoc IDLoc, bool &MustExtend);
+
+  // Check if we have an assembler and, if so, set the ELF e_header flags.
+  void chksetELFHeaderEFlags(unsigned flags) {
+    if (getAssembler())
+      getAssembler()->setELFHeaderEFlags(flags);
+  }
+
+/// @name Auto-generated Match Functions
+/// {
+
+#define GET_ASSEMBLER_HEADER
+#include "HexagonGenAsmMatcher.inc"
+
+  /// }
+
+public:
+  HexagonAsmParser(const MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+                   const MCInstrInfo &MII, const MCTargetOptions &Options)
+    : MCTargetAsmParser(Options, _STI), Parser(_Parser),
+      MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
+  MCAsmParserExtension::Initialize(_Parser);
+
+  Assembler = nullptr;
+  // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+  if (!Parser.getStreamer().hasRawTextSupport()) {
+    MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+    Assembler = &MES->getAssembler();
+  }
+  }
+
+  bool mustExtend(OperandVector &Operands);
+  bool splitIdentifier(OperandVector &Operands);
+  bool parseOperand(OperandVector &Operands);
+  bool parseInstruction(OperandVector &Operands);
+  bool implicitExpressionLocation(OperandVector &Operands);
+  bool parseExpressionOrOperand(OperandVector &Operands);
+  bool parseExpression(MCExpr const *& Expr);
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                SMLoc NameLoc, OperandVector &Operands) override
+  {
+    llvm_unreachable("Unimplemented");
+  }
+  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                                AsmToken ID, OperandVector &Operands) override;
+
+  virtual bool ParseDirective(AsmToken DirectiveID) override;
+};
+
+/// HexagonOperand - Instances of this class represent a parsed Hexagon machine
+/// instruction.
+struct HexagonOperand : public MCParsedAsmOperand {
+  enum KindTy { Token, Immediate, Register } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  struct TokTy {
+    const char *Data;
+    unsigned Length;
+  };
+
+  struct RegTy {
+    unsigned RegNum;
+  };
+
+  struct ImmTy {
+    const MCExpr *Val;
+    bool MustExtend;
+  };
+
+  struct InstTy {
+    OperandVector *SubInsts;
+  };
+
+  union {
+    struct TokTy Tok;
+    struct RegTy Reg;
+    struct ImmTy Imm;
+  };
+
+  HexagonOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+
+public:
+  HexagonOperand(const HexagonOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isMem() const { llvm_unreachable("No isMem"); }
+  bool isReg() const { return Kind == Register; }
+
+  bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
+                     bool isRelocatable, bool Extendable) const {
+    if (Kind == Immediate) {
+      const MCExpr *myMCExpr = getImm();
+      if (Imm.MustExtend && !Extendable)
+        return false;
+      int64_t Res;
+      if (myMCExpr->evaluateAsAbsolute(Res)) {
+        int bits = immBits + zeroBits;
+        // Field bit range is zerobits + bits
+        // zeroBits must be 0
+        if (Res & ((1 << zeroBits) - 1))
+          return false;
+        if (isSigned) {
+          if (Res < (1LL << (bits - 1)) && Res >= -(1LL << (bits - 1)))
+            return true;
+        } else {
+          if (bits == 64)
+            return true;
+          if (Res >= 0)
+            return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+          else {
+            const int64_t high_bit_set = 1ULL << 63;
+            const uint64_t mask = (high_bit_set >> (63 - bits));
+            return (((uint64_t)Res & mask) == mask) ? true : false;
+          }
+        }
+      } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
+        return true;
+      else if (myMCExpr->getKind() == MCExpr::Binary ||
+               myMCExpr->getKind() == MCExpr::Unary)
+        return true;
+    }
+    return false;
+  }
+
+  bool isf32Ext() const { return false; }
+  bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+  bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
+  bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+  bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
+  bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
+  bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
+  bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
+  bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
+  bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
+  bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+
+  bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
+  bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+  bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
+  bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
+  bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
+  bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
+  bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
+  bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
+  bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
+  bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
+  bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
+  bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
+  bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
+  bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
+  bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+  bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
+  bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
+  bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
+  bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
+  bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
+  bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+  bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
+
+  bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+  bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+  bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+  bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+  bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+  bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+  bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+  bool iss11_0Ext() const {
+    return CheckImmRange(11 + 26, 0, true, true, true);
+  }
+  bool iss11_1Ext() const {
+    return CheckImmRange(11 + 26, 1, true, true, true);
+  }
+  bool iss11_2Ext() const {
+    return CheckImmRange(11 + 26, 2, true, true, true);
+  }
+  bool iss11_3Ext() const {
+    return CheckImmRange(11 + 26, 3, true, true, true);
+  }
+
+  bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+  bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+  bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+  bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+  bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+  bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
+  bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
+  bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
+  bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
+  bool isu32MustExt() const { return isImm() && Imm.MustExtend; }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createExpr(getImm()));
+  }
+
+  void addSignedImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    MCExpr const *Expr = getImm();
+    int64_t Value;
+    if (!Expr->evaluateAsAbsolute(Value)) {
+      Inst.addOperand(MCOperand::createExpr(Expr));
+      return;
+    }
+    int64_t Extended = SignExtend64 (Value, 32);
+    if ((Extended < 0) == (Value < 0)) {
+      Inst.addOperand(MCOperand::createExpr(Expr));
+      return;
+    }
+    // Flip bit 33 to signal signed unsigned mismatch
+    Extended ^= 0x100000000;
+    Inst.addOperand(MCOperand::createImm(Extended));
+  }
+
+  void addf32ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+
+  void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu16_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu7ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_1ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_2ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+  void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
+    addSignedImmOperands(Inst, N);
+  }
+
+  void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_1ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_2ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+  void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+    addImmOperands(Inst, N);
+  }
+
+  void adds4_6ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+  }
+
+  void adds3_6ImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    Inst.addOperand(MCOperand::createImm(CE->getValue() * 64));
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void print(raw_ostream &OS) const;
+
+  static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
+    HexagonOperand *Op = new HexagonOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+
+  static std::unique_ptr<HexagonOperand> CreateReg(unsigned RegNum, SMLoc S,
+                                                   SMLoc E) {
+    HexagonOperand *Op = new HexagonOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+
+  static std::unique_ptr<HexagonOperand> CreateImm(const MCExpr *Val, SMLoc S,
+                                                   SMLoc E) {
+    HexagonOperand *Op = new HexagonOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->Imm.MustExtend = false;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return std::unique_ptr<HexagonOperand>(Op);
+  }
+};
+
+} // end anonymous namespace.
+
+void HexagonOperand::print(raw_ostream &OS) const {
+  switch (Kind) {
+  case Immediate:
+    getImm()->print(OS, nullptr);
+    break;
+  case Register:
+    OS << "<register R";
+    OS << getReg() << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+static unsigned MatchRegisterName(StringRef Name);
+
+bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
+  DEBUG(dbgs() << "Bundle:");
+  DEBUG(MCB.dump_pretty(dbgs()));
+  DEBUG(dbgs() << "--\n");
+
+  // Check the bundle for errors.
+  const MCRegisterInfo *RI = getContext().getRegisterInfo();
+  HexagonMCChecker Check(MCII, getSTI(), MCB, MCB, *RI);
+
+  bool CheckOk = HexagonMCInstrInfo::canonicalizePacket(MCII, getSTI(),
+                                                        getContext(), MCB,
+                                                        &Check);
+
+  while (Check.getNextErrInfo() == true) {
+    unsigned Reg = Check.getErrRegister();
+    Twine R(RI->getName(Reg));
+
+    uint64_t Err = Check.getError();
+    if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
+      if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
+        Error(IDLoc,
+              "unconditional branch cannot precede another branch in packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
+          HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
+        Error(IDLoc, "register `" + R +
+                         "' used with `.new' "
+                         "but not validly modified in the same packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
+        Error(IDLoc, "register `" + R + "' modified more than once");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
+        Error(IDLoc, "cannot write to read-only register `" + R + "'");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
+        Error(IDLoc, "loop-setup and some branch instructions "
+                     "cannot be in the same packet");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
+        Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
+        Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+                         "cannot contain instructions that modify register " +
+                         "`" + R + "'");
+      }
+
+      if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
+        Error(IDLoc,
+              "instruction cannot appear in packet with other instructions");
+
+      if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
+        Error(IDLoc, "too many slots used in packet");
+
+      if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
+        uint64_t Erm = Check.getShuffleError();
+
+        if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
+          Error(IDLoc, "invalid instruction packet");
+        else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
+          Error(IDLoc, "invalid instruction packet: too many stores");
+        else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
+          Error(IDLoc, "invalid instruction packet: too many loads");
+        else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
+          Error(IDLoc, "too many branches in packet");
+        else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
+          Error(IDLoc, "invalid instruction packet: out of slots");
+        else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
+          Error(IDLoc, "invalid instruction packet: slot error");
+        else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
+          Error(IDLoc, "v60 packet violation");
+        else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
+          Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+        else
+          Error(IDLoc, "unknown error in instruction packet");
+      }
+    }
+
+    unsigned Warn = Check.getWarning();
+    if (Warn != HexagonMCErrInfo::CHECK_SUCCESS) {
+      if (HexagonMCErrInfo::CHECK_WARN_CURRENT & Warn)
+        Warning(IDLoc, "register `" + R + "' used with `.cur' "
+                                          "but not used in the same packet");
+      else if (HexagonMCErrInfo::CHECK_WARN_TEMPORARY & Warn)
+        Warning(IDLoc, "register `" + R + "' used with `.tmp' "
+                                          "but not used in the same packet");
+    }
+  }
+
+  if (CheckOk) {
+    MCB.setLoc(IDLoc);
+    if (HexagonMCInstrInfo::bundleSize(MCB) == 0) {
+      assert(!HexagonMCInstrInfo::isInnerLoop(MCB));
+      assert(!HexagonMCInstrInfo::isOuterLoop(MCB));
+      // Empty packets are valid yet aren't emitted
+      return false;
+    }
+    Out.EmitInstruction(MCB, getSTI());
+  } else {
+    // If compounding and duplexing didn't reduce the size below
+    // 4 or less we have a packet that is too big.
+    if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE) {
+      Error(IDLoc, "invalid instruction packet: out of slots");
+      return true; // Error
+    }
+  }
+
+  return false; // No error
+}
+
+bool HexagonAsmParser::matchBundleOptions() {
+  MCAsmParser &Parser = getParser();
+  MCAsmLexer &Lexer = getLexer();
+  while (true) {
+    if (!Parser.getTok().is(AsmToken::Colon))
+      return false;
+    Lexer.Lex();
+    StringRef Option = Parser.getTok().getString();
+    if (Option.compare_lower("endloop0") == 0)
+      HexagonMCInstrInfo::setInnerLoop(MCB);
+    else if (Option.compare_lower("endloop1") == 0)
+      HexagonMCInstrInfo::setOuterLoop(MCB);
+    else if (Option.compare_lower("mem_noshuf") == 0)
+      HexagonMCInstrInfo::setMemReorderDisabled(MCB);
+    else if (Option.compare_lower("mem_shuf") == 0)
+      HexagonMCInstrInfo::setMemStoreReorderEnabled(MCB);
+    else
+      return true;
+    Lexer.Lex();
+  }
+}
+
+// For instruction aliases, immediates are generated rather than
+// MCConstantExpr.  Convert them for uniform MCExpr.
+// Also check for signed/unsigned mismatches and warn
+void HexagonAsmParser::canonicalizeImmediates(MCInst &MCI) {
+  MCInst NewInst;
+  NewInst.setOpcode(MCI.getOpcode());
+  for (MCOperand &I : MCI)
+    if (I.isImm()) {
+      int64_t Value (I.getImm());
+      if ((Value & 0x100000000) != (Value & 0x80000000)) {
+        // Detect flipped bit 33 wrt bit 32 and signal warning
+        Value ^= 0x100000000;
+        if (WarnSignedMismatch)
+          Warning (MCI.getLoc(), "Signed/Unsigned mismatch");
+      }
+      NewInst.addOperand(MCOperand::createExpr(
+          MCConstantExpr::create(Value, getContext())));
+    }
+    else
+      NewInst.addOperand(I);
+  MCI = NewInst;
+}
+
+bool HexagonAsmParser::matchOneInstruction(MCInst &MCI, SMLoc IDLoc,
+                                           OperandVector &InstOperands,
+                                           uint64_t &ErrorInfo,
+                                           bool MatchingInlineAsm,
+                                           bool &MustExtend) {
+  // Perform matching with tablegen asmmatcher generated function
+  int result =
+      MatchInstructionImpl(InstOperands, MCI, ErrorInfo, MatchingInlineAsm);
+  if (result == Match_Success) {
+    MCI.setLoc(IDLoc);
+    MustExtend = mustExtend(InstOperands);
+    canonicalizeImmediates(MCI);
+    result = processInstruction(MCI, InstOperands, IDLoc, MustExtend);
+
+    DEBUG(dbgs() << "Insn:");
+    DEBUG(MCI.dump_pretty(dbgs()));
+    DEBUG(dbgs() << "\n\n");
+
+    MCI.setLoc(IDLoc);
+  }
+
+  // Create instruction operand for bundle instruction
+  //   Break this into a separate function Code here is less readable
+  //   Think about how to get an instruction error to report correctly.
+  //   SMLoc will return the "{"
+  switch (result) {
+  default:
+    break;
+  case Match_Success:
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "invalid instruction");
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction");
+  case Match_InvalidOperand:
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= InstOperands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = (static_cast<HexagonOperand *>(InstOperands[ErrorInfo].get()))
+                     ->getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
+    }
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  llvm_unreachable("Implement any new match types added!");
+}
+
+bool HexagonAsmParser::mustExtend(OperandVector &Operands) {
+  unsigned Count = 0;
+  for (std::unique_ptr<MCParsedAsmOperand> &i : Operands)
+    if (i->isImm())
+      if (static_cast<HexagonOperand *>(i.get())->Imm.MustExtend)
+        ++Count;
+  // Multiple extenders should have been filtered by iss9Ext et. al.
+  assert(Count < 2 && "Multiple extenders");
+  return Count == 1;
+}
+
+bool HexagonAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                                               OperandVector &Operands,
+                                               MCStreamer &Out,
+                                               uint64_t &ErrorInfo,
+                                               bool MatchingInlineAsm) {
+  if (!InBrackets) {
+    MCB.clear();
+    MCB.addOperand(MCOperand::createImm(0));
+  }
+  HexagonOperand &FirstOperand = static_cast<HexagonOperand &>(*Operands[0]);
+  if (FirstOperand.isToken() && FirstOperand.getToken() == "{") {
+    assert(Operands.size() == 1 && "Brackets should be by themselves");
+    if (InBrackets) {
+      getParser().Error(IDLoc, "Already in a packet");
+      return true;
+    }
+    InBrackets = true;
+    return false;
+  }
+  if (FirstOperand.isToken() && FirstOperand.getToken() == "}") {
+    assert(Operands.size() == 1 && "Brackets should be by themselves");
+    if (!InBrackets) {
+      getParser().Error(IDLoc, "Not in a packet");
+      return true;
+    }
+    InBrackets = false;
+    if (matchBundleOptions())
+      return true;
+    return finishBundle(IDLoc, Out);
+  }
+  MCInst *SubInst = new (getParser().getContext()) MCInst;
+  bool MustExtend = false;
+  if (matchOneInstruction(*SubInst, IDLoc, Operands, ErrorInfo,
+                          MatchingInlineAsm, MustExtend))
+    return true;
+  HexagonMCInstrInfo::extendIfNeeded(
+      getParser().getContext(), MCII, MCB, *SubInst,
+      HexagonMCInstrInfo::isExtended(MCII, *SubInst) || MustExtend);
+  MCB.addOperand(MCOperand::createInst(SubInst));
+  if (!InBrackets)
+    return finishBundle(IDLoc, Out);
+  return false;
+}
+
+/// ParseDirective parses the Hexagon specific directives
+bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if ((IDVal.lower() == ".word") || (IDVal.lower() == ".4byte"))
+    return ParseDirectiveValue(4, DirectiveID.getLoc());
+  if (IDVal.lower() == ".short" || IDVal.lower() == ".hword" ||
+      IDVal.lower() == ".half")
+    return ParseDirectiveValue(2, DirectiveID.getLoc());
+  if (IDVal.lower() == ".falign")
+    return ParseDirectiveFalign(256, DirectiveID.getLoc());
+  if ((IDVal.lower() == ".lcomm") || (IDVal.lower() == ".lcommon"))
+    return ParseDirectiveComm(true, DirectiveID.getLoc());
+  if ((IDVal.lower() == ".comm") || (IDVal.lower() == ".common"))
+    return ParseDirectiveComm(false, DirectiveID.getLoc());
+  if (IDVal.lower() == ".subsection")
+    return ParseDirectiveSubsection(DirectiveID.getLoc());
+
+  return true;
+}
+bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
+  const MCExpr *Subsection = 0;
+  int64_t Res;
+
+  assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
+         "Invalid subsection directive");
+  getParser().parseExpression(Subsection);
+
+  if (!Subsection->evaluateAsAbsolute(Res))
+    return Error(L, "Cannot evaluate subsection number");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  // 0-8192 is the hard-coded range in MCObjectStreamper.cpp, this keeps the
+  // negative subsections together and in the same order but at the opposite
+  // end of the section.  Only legacy hexagon-gcc created assembly code
+  // used negative subsections.
+  if ((Res < 0) && (Res > -8193))
+    Subsection = MCConstantExpr::create(8192 + Res, this->getContext());
+
+  getStreamer().SubSection(Subsection);
+  return false;
+}
+
+///  ::= .falign [expression]
+bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
+
+  int64_t MaxBytesToFill = 15;
+
+  // if there is an arguement
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    const MCExpr *Value;
+    SMLoc ExprLoc = L;
+
+    // Make sure we have a number (false is returned if expression is a number)
+    if (getParser().parseExpression(Value) == false) {
+      // Make sure this is a number that is in range
+      const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
+      uint64_t IntValue = MCE->getValue();
+      if (!isUIntN(Size, IntValue) && !isIntN(Size, IntValue))
+        return Error(ExprLoc, "literal value out of range (256) for falign");
+      MaxBytesToFill = IntValue;
+      Lex();
+    } else {
+      return Error(ExprLoc, "not a valid expression for falign directive");
+    }
+  }
+
+  getTargetStreamer().emitFAlign(16, MaxBytesToFill);
+  Lex();
+
+  return false;
+}
+
+///  ::= .word [ expression (, expression)* ]
+bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+    for (;;) {
+      const MCExpr *Value;
+      SMLoc ExprLoc = L;
+      if (getParser().parseExpression(Value))
+        return true;
+
+      // Special case constant expressions to match code generator.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size);
+      } else
+        getStreamer().EmitValue(Value, Size);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+// This is largely a copy of AsmParser's ParseDirectiveComm extended to
+// accept a 3rd argument, AccessAlignment which indicates the smallest
+// memory access made to the symbol, expressed in bytes.  If no
+// AccessAlignment is specified it defaults to the Alignment Value.
+// Hexagon's .lcomm:
+//   .lcomm Symbol, Length, Alignment, AccessAlignment
+bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
+  // FIXME: need better way to detect if AsmStreamer (upstream removed
+  // getKind())
+  if (getStreamer().hasRawTextSupport())
+    return true; // Only object file output requires special treatment.
+
+  StringRef Name;
+  if (getParser().parseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().parseAbsoluteExpression(Size))
+    return true;
+
+  int64_t ByteAlignment = 1;
+  SMLoc ByteAlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    ByteAlignmentLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(ByteAlignment))
+      return true;
+    if (!isPowerOf2_64(ByteAlignment))
+      return Error(ByteAlignmentLoc, "alignment must be a power of 2");
+  }
+
+  int64_t AccessAlignment = 0;
+  if (getLexer().is(AsmToken::Comma)) {
+    // The optional access argument specifies the size of the smallest memory
+    //   access to be made to the symbol, expressed in bytes.
+    SMLoc AccessAlignmentLoc;
+    Lex();
+    AccessAlignmentLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(AccessAlignment))
+      return true;
+
+    if (!isPowerOf2_64(AccessAlignment))
+      return Error(AccessAlignmentLoc, "access alignment must be a power of 2");
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+  Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                          "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (ByteAlignment < 0)
+    return Error(ByteAlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                                   "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(Loc, "invalid symbol redefinition");
+
+  HexagonMCELFStreamer &HexagonELFStreamer =
+      static_cast<HexagonMCELFStreamer &>(getStreamer());
+  if (IsLocal) {
+    HexagonELFStreamer.HexagonMCEmitLocalCommonSymbol(Sym, Size, ByteAlignment,
+                                                      AccessAlignment);
+    return false;
+  }
+
+  HexagonELFStreamer.HexagonMCEmitCommonSymbol(Sym, Size, ByteAlignment,
+                                               AccessAlignment);
+  return false;
+}
+
+// validate register against architecture
+bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
+  return true;
+}
+
+// extern "C" void LLVMInitializeHexagonAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeHexagonAsmParser() {
+  RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+}
+
+#define GET_MATCHER_IMPLEMENTATION
+#define GET_REGISTER_MATCHER
+#include "HexagonGenAsmMatcher.inc"
+
+namespace {
+bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+  if (Index >= Operands.size())
+    return false;
+  MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
+  if (!Operand.isToken())
+    return false;
+  return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
+}
+bool previousIsLoop(OperandVector &Operands, size_t Index) {
+  return previousEqual(Operands, Index, "loop0") ||
+         previousEqual(Operands, Index, "loop1") ||
+         previousEqual(Operands, Index, "sp1loop0") ||
+         previousEqual(Operands, Index, "sp2loop0") ||
+         previousEqual(Operands, Index, "sp3loop0");
+}
+}
+
+bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
+  AsmToken const &Token = getParser().getTok();
+  StringRef String = Token.getString();
+  SMLoc Loc = Token.getLoc();
+  getLexer().Lex();
+  do {
+    std::pair<StringRef, StringRef> HeadTail = String.split('.');
+    if (!HeadTail.first.empty())
+      Operands.push_back(HexagonOperand::CreateToken(HeadTail.first, Loc));
+    if (!HeadTail.second.empty())
+      Operands.push_back(HexagonOperand::CreateToken(
+          String.substr(HeadTail.first.size(), 1), Loc));
+    String = HeadTail.second;
+  } while (!String.empty());
+  return false;
+}
+
+bool HexagonAsmParser::parseOperand(OperandVector &Operands) {
+  unsigned Register;
+  SMLoc Begin;
+  SMLoc End;
+  MCAsmLexer &Lexer = getLexer();
+  if (!ParseRegister(Register, Begin, End)) {
+    if (!ErrorMissingParenthesis)
+      switch (Register) {
+      default:
+        break;
+      case Hexagon::P0:
+      case Hexagon::P1:
+      case Hexagon::P2:
+      case Hexagon::P3:
+        if (previousEqual(Operands, 0, "if")) {
+          if (WarnMissingParenthesis)
+            Warning (Begin, "Missing parenthesis around predicate register");
+          static char const *LParen = "(";
+          static char const *RParen = ")";
+          Operands.push_back(HexagonOperand::CreateToken(LParen, Begin));
+          Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+          AsmToken MaybeDotNew = Lexer.getTok();
+          if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+              MaybeDotNew.getString().equals_lower(".new"))
+            splitIdentifier(Operands);
+          Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+          return false;
+        }
+        if (previousEqual(Operands, 0, "!") &&
+            previousEqual(Operands, 1, "if")) {
+          if (WarnMissingParenthesis)
+            Warning (Begin, "Missing parenthesis around predicate register");
+          static char const *LParen = "(";
+          static char const *RParen = ")";
+          Operands.insert(Operands.end () - 1,
+                          HexagonOperand::CreateToken(LParen, Begin));
+          Operands.push_back(HexagonOperand::CreateReg(Register, Begin, End));
+          AsmToken MaybeDotNew = Lexer.getTok();
+          if (MaybeDotNew.is(AsmToken::TokenKind::Identifier) &&
+              MaybeDotNew.getString().equals_lower(".new"))
+            splitIdentifier(Operands);
+          Operands.push_back(HexagonOperand::CreateToken(RParen, Begin));
+          return false;
+        }
+        break;
+      }
+    Operands.push_back(HexagonOperand::CreateReg(
+        Register, Begin, End));
+    return false;
+  }
+  return splitIdentifier(Operands);
+}
+
+bool HexagonAsmParser::isLabel(AsmToken &Token) {
+  MCAsmLexer &Lexer = getLexer();
+  AsmToken const &Second = Lexer.getTok();
+  AsmToken Third = Lexer.peekTok();  
+  StringRef String = Token.getString();
+  if (Token.is(AsmToken::TokenKind::LCurly) ||
+      Token.is(AsmToken::TokenKind::RCurly))
+    return false;
+  if (!Token.is(AsmToken::TokenKind::Identifier))
+    return true;
+  if (!MatchRegisterName(String.lower()))
+    return true;
+  (void)Second;
+  assert(Second.is(AsmToken::Colon));
+  StringRef Raw (String.data(), Third.getString().data() - String.data() +
+                 Third.getString().size());
+  std::string Collapsed = Raw;
+  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+                  Collapsed.end());
+  StringRef Whole = Collapsed;
+  std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
+  if (!MatchRegisterName(DotSplit.first.lower()))
+    return true;
+  return false;
+}
+
+bool HexagonAsmParser::handleNoncontigiousRegister(bool Contigious, SMLoc &Loc) {
+  if (!Contigious && ErrorNoncontigiousRegister) {
+    Error(Loc, "Register name is not contigious");
+    return true;
+  }
+  if (!Contigious && WarnNoncontigiousRegister)
+    Warning(Loc, "Register name is not contigious");
+  return false;
+}
+
+bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+  MCAsmLexer &Lexer = getLexer();
+  StartLoc = getLexer().getLoc();
+  SmallVector<AsmToken, 5> Lookahead;
+  StringRef RawString(Lexer.getTok().getString().data(), 0);
+  bool Again = Lexer.is(AsmToken::Identifier);
+  bool NeededWorkaround = false;
+  while (Again) {
+    AsmToken const &Token = Lexer.getTok();
+    RawString = StringRef(RawString.data(),
+                          Token.getString().data() - RawString.data () +
+                          Token.getString().size());
+    Lookahead.push_back(Token);
+    Lexer.Lex();
+    bool Contigious = Lexer.getTok().getString().data() ==
+                      Lookahead.back().getString().data() +
+                      Lookahead.back().getString().size();
+    bool Type = Lexer.is(AsmToken::Identifier) || Lexer.is(AsmToken::Dot) ||
+                Lexer.is(AsmToken::Integer) || Lexer.is(AsmToken::Real) ||
+                Lexer.is(AsmToken::Colon);
+    bool Workaround = Lexer.is(AsmToken::Colon) ||
+                      Lookahead.back().is(AsmToken::Colon);
+    Again = (Contigious && Type) || (Workaround && Type);
+    NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
+  }
+  std::string Collapsed = RawString;
+  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
+                  Collapsed.end());
+  StringRef FullString = Collapsed;
+  std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
+  unsigned DotReg = MatchRegisterName(DotSplit.first.lower());
+  if (DotReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+    if (DotSplit.second.empty()) {
+      RegNo = DotReg;
+      EndLoc = Lexer.getLoc();
+      if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+        return true;
+      return false;
+    } else {
+      RegNo = DotReg;
+      size_t First = RawString.find('.');
+      StringRef DotString (RawString.data() + First, RawString.size() - First);
+      Lexer.UnLex(AsmToken(AsmToken::Identifier, DotString));
+      EndLoc = Lexer.getLoc();
+      if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+        return true;
+      return false;
+    }
+  }
+  std::pair<StringRef, StringRef> ColonSplit = StringRef(FullString).split(':');
+  unsigned ColonReg = MatchRegisterName(ColonSplit.first.lower());
+  if (ColonReg != Hexagon::NoRegister && RegisterMatchesArch(DotReg)) {
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+    RegNo = ColonReg;
+    EndLoc = Lexer.getLoc();
+    if (handleNoncontigiousRegister(!NeededWorkaround, StartLoc))
+      return true;
+    return false;
+  }
+  while (!Lookahead.empty()) {
+    Lexer.UnLex(Lookahead.back());
+    Lookahead.pop_back();
+  }
+  return true;
+}
+
+bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
+  if (previousEqual(Operands, 0, "call"))
+    return true;
+  if (previousEqual(Operands, 0, "jump"))
+    if (!getLexer().getTok().is(AsmToken::Colon))
+      return true;
+  if (previousEqual(Operands, 0, "(") && previousIsLoop(Operands, 1))
+    return true;
+  if (previousEqual(Operands, 1, ":") && previousEqual(Operands, 2, "jump") &&
+      (previousEqual(Operands, 0, "nt") || previousEqual(Operands, 0, "t")))
+    return true;
+  return false;
+}
+
+bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
+  llvm::SmallVector<AsmToken, 4> Tokens;
+  MCAsmLexer &Lexer = getLexer();
+  bool Done = false;
+  static char const * Comma = ",";
+  do {
+    Tokens.emplace_back (Lexer.getTok());
+    Lexer.Lex();
+    switch (Tokens.back().getKind())
+    {
+    case AsmToken::TokenKind::Hash:
+      if (Tokens.size () > 1)
+        if ((Tokens.end () - 2)->getKind() == AsmToken::TokenKind::Plus) {
+          Tokens.insert(Tokens.end() - 2,
+                        AsmToken(AsmToken::TokenKind::Comma, Comma));
+          Done = true;
+        }
+      break;
+    case AsmToken::TokenKind::RCurly:
+    case AsmToken::TokenKind::EndOfStatement:
+    case AsmToken::TokenKind::Eof:
+      Done = true;
+      break;
+    default:
+      break;
+    }
+  } while (!Done);
+  while (!Tokens.empty()) {
+    Lexer.UnLex(Tokens.back());
+    Tokens.pop_back();
+  }
+  return getParser().parseExpression(Expr);
+}
+
+bool HexagonAsmParser::parseExpressionOrOperand(OperandVector &Operands) {
+  if (implicitExpressionLocation(Operands)) {
+    MCAsmParser &Parser = getParser();
+    SMLoc Loc = Parser.getLexer().getLoc();
+    std::unique_ptr<HexagonOperand> Expr =
+        HexagonOperand::CreateImm(nullptr, Loc, Loc);
+    MCExpr const *& Val = Expr->Imm.Val;
+    Operands.push_back(std::move(Expr));
+    return parseExpression(Val);
+  }
+  return parseOperand(Operands);
+}
+
+/// Parse an instruction.
+bool HexagonAsmParser::parseInstruction(OperandVector &Operands) {
+  MCAsmParser &Parser = getParser();
+  MCAsmLexer &Lexer = getLexer();
+  while (true) {
+    AsmToken const &Token = Parser.getTok();
+    switch (Token.getKind()) {
+    case AsmToken::EndOfStatement: {
+      Lexer.Lex();
+      return false;
+    }
+    case AsmToken::LCurly: {
+      if (!Operands.empty())
+        return true;
+      Operands.push_back(
+          HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+      Lexer.Lex();
+      return false;
+    }
+    case AsmToken::RCurly: {
+      if (Operands.empty()) {
+        Operands.push_back(
+            HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+        Lexer.Lex();
+      }
+      return false;
+    }
+    case AsmToken::Comma: {
+      Lexer.Lex();
+      continue;
+    }
+    case AsmToken::EqualEqual:
+    case AsmToken::ExclaimEqual:
+    case AsmToken::GreaterEqual:
+    case AsmToken::GreaterGreater:
+    case AsmToken::LessEqual:
+    case AsmToken::LessLess: {
+      Operands.push_back(HexagonOperand::CreateToken(
+          Token.getString().substr(0, 1), Token.getLoc()));
+      Operands.push_back(HexagonOperand::CreateToken(
+          Token.getString().substr(1, 1), Token.getLoc()));
+      Lexer.Lex();
+      continue;
+    }
+    case AsmToken::Hash: {
+      bool MustNotExtend = false;
+      bool ImplicitExpression = implicitExpressionLocation(Operands);
+      std::unique_ptr<HexagonOperand> Expr = HexagonOperand::CreateImm(
+          nullptr, Lexer.getLoc(), Lexer.getLoc());
+      if (!ImplicitExpression)
+        Operands.push_back(
+          HexagonOperand::CreateToken(Token.getString(), Token.getLoc()));
+      Lexer.Lex();
+      bool MustExtend = false;
+      bool HiOnly = false;
+      bool LoOnly = false;
+      if (Lexer.is(AsmToken::Hash)) {
+        Lexer.Lex();
+        MustExtend = true;
+      } else if (ImplicitExpression)
+        MustNotExtend = true;
+      AsmToken const &Token = Parser.getTok();
+      if (Token.is(AsmToken::Identifier)) {
+        StringRef String = Token.getString();
+        AsmToken IDToken = Token;
+        if (String.lower() == "hi") {
+          HiOnly = true;
+        } else if (String.lower() == "lo") {
+          LoOnly = true;
+        }
+        if (HiOnly || LoOnly) {
+          AsmToken LParen = Lexer.peekTok();
+          if (!LParen.is(AsmToken::LParen)) {
+            HiOnly = false;
+            LoOnly = false;
+          } else {
+            Lexer.Lex();
+          }
+        }
+      }
+      if (parseExpression(Expr->Imm.Val))
+        return true;
+      int64_t Value;
+      MCContext &Context = Parser.getContext();
+      assert(Expr->Imm.Val != nullptr);
+      if (Expr->Imm.Val->evaluateAsAbsolute(Value)) {
+        if (HiOnly)
+          Expr->Imm.Val = MCBinaryExpr::createLShr(
+              Expr->Imm.Val, MCConstantExpr::create(16, Context), Context);
+        if (HiOnly || LoOnly)
+          Expr->Imm.Val = MCBinaryExpr::createAnd(
+              Expr->Imm.Val, MCConstantExpr::create(0xffff, Context), Context);
+      }
+      if (MustNotExtend)
+        Expr->Imm.Val = HexagonNoExtendOperand::Create(Expr->Imm.Val, Context);
+      Expr->Imm.MustExtend = MustExtend;
+      Operands.push_back(std::move(Expr));
+      continue;
+    }
+    default:
+      break;
+    }
+    if (parseExpressionOrOperand(Operands))
+      return true;
+  }
+}
+
+bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                        StringRef Name,
+                                        AsmToken ID,
+                                        OperandVector &Operands) {
+  getLexer().UnLex(ID);
+  return parseInstruction(Operands);
+}
+
+namespace {
+MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+                       MCOperand &MO1, MCOperand &MO2) {
+  MCInst TmpInst;
+  TmpInst.setOpcode(opCode);
+  TmpInst.addOperand(Rdd);
+  TmpInst.addOperand(MO1);
+  TmpInst.addOperand(MO2);
+
+  return TmpInst;
+}
+}
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+                                                      unsigned Kind) {
+  HexagonOperand *Op = static_cast<HexagonOperand *>(&AsmOp);
+
+  switch (Kind) {
+  case MCK_0: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 0
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  case MCK_1: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == 1
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  case MCK__MINUS_1: {
+    int64_t Value;
+    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
+               ? Match_Success
+               : Match_InvalidOperand;
+  }
+  }
+  if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
+    StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
+    if (matchTokenString(myStringRef.lower()) == (MatchClassKind)Kind)
+      return Match_Success;
+    if (matchTokenString(myStringRef.upper()) == (MatchClassKind)Kind)
+      return Match_Success;
+  }
+
+  DEBUG(dbgs() << "Unmatched Operand:");
+  DEBUG(Op->dump());
+  DEBUG(dbgs() << "\n");
+
+  return Match_InvalidOperand;
+}
+
+void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+  std::string errStr;
+  raw_string_ostream ES(errStr);
+  ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
+  if (Max >= 0)
+    ES << "0-" << Max;
+  else
+    ES << Max << "-" << (-Max - 1);
+  Error(IDLoc, ES.str().c_str());
+}
+
+int HexagonAsmParser::processInstruction(MCInst &Inst,
+                                         OperandVector const &Operands,
+                                         SMLoc IDLoc, bool &MustExtend) {
+  MCContext &Context = getParser().getContext();
+  const MCRegisterInfo *RI = getContext().getRegisterInfo();
+  std::string r = "r";
+  std::string v = "v";
+  std::string Colon = ":";
+
+  bool is32bit = false; // used to distinguish between CONST32 and CONST64
+  switch (Inst.getOpcode()) {
+  default:
+    break;
+
+  case Hexagon::M4_mpyrr_addr:
+  case Hexagon::S4_addi_asl_ri:
+  case Hexagon::S4_addi_lsr_ri:
+  case Hexagon::S4_andi_asl_ri:
+  case Hexagon::S4_andi_lsr_ri:
+  case Hexagon::S4_ori_asl_ri:
+  case Hexagon::S4_ori_lsr_ri:
+  case Hexagon::S4_or_andix:
+  case Hexagon::S4_subi_asl_ri:
+  case Hexagon::S4_subi_lsr_ri: {
+    MCOperand &Ry = Inst.getOperand(0);
+    MCOperand &src = Inst.getOperand(2);
+    if (RI->getEncodingValue(Ry.getReg()) != RI->getEncodingValue(src.getReg()))
+      return Match_InvalidOperand;
+    break;
+  }
+
+  case Hexagon::C2_cmpgei: {
+    MCOperand &MO = Inst.getOperand(2);
+    MO.setExpr(MCBinaryExpr::createSub(
+        MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+    Inst.setOpcode(Hexagon::C2_cmpgti);
+    break;
+  }
+
+  case Hexagon::C2_cmpgeui: {
+    MCOperand &MO = Inst.getOperand(2);
+    int64_t Value;
+    bool Success = MO.getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success && "Assured by matcher");
+    if (Value == 0) {
+      MCInst TmpInst;
+      MCOperand &Pd = Inst.getOperand(0);
+      MCOperand &Rt = Inst.getOperand(1);
+      TmpInst.setOpcode(Hexagon::C2_cmpeq);
+      TmpInst.addOperand(Pd);
+      TmpInst.addOperand(Rt);
+      TmpInst.addOperand(Rt);
+      Inst = TmpInst;
+    } else {
+      MO.setExpr(MCBinaryExpr::createSub(
+          MO.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::C2_cmpgtui);
+    }
+    break;
+  }
+  case Hexagon::J2_loop1r:
+  case Hexagon::J2_loop1i:
+  case Hexagon::J2_loop0r:
+  case Hexagon::J2_loop0i: {
+    MCOperand &MO = Inst.getOperand(0);
+    // Loop has different opcodes for extended vs not extended, but we should
+    //   not use the other opcode as it is a legacy artifact of TD files.
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+      // if the operand can fit within a 7:2 field
+      if (Value < (1 << 8) && Value >= -(1 << 8)) {
+        SMLoc myLoc = Operands[2]->getStartLoc();
+        // # is left in startLoc in the case of ##
+        // If '##' found then force extension.
+        if (*myLoc.getPointer() == '#') {
+          MustExtend = true;
+          break;
+        }
+      } else {
+        // If immediate and out of 7:2 range.
+        MustExtend = true;
+      }
+    }
+    break;
+  }
+
+  // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+  case Hexagon::A2_tfrp: {
+    MCOperand &MO = Inst.getOperand(1);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode(Hexagon::A2_combinew);
+    break;
+  }
+
+  case Hexagon::A2_tfrpt:
+  case Hexagon::A2_tfrpf: {
+    MCOperand &MO = Inst.getOperand(2);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+                       ? Hexagon::C2_ccombinewt
+                       : Hexagon::C2_ccombinewf);
+    break;
+  }
+  case Hexagon::A2_tfrptnew:
+  case Hexagon::A2_tfrpfnew: {
+    MCOperand &MO = Inst.getOperand(2);
+    unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
+    std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+    StringRef Reg1(R1);
+    MO.setReg(MatchRegisterName(Reg1));
+    // Add a new operand for the second register in the pair.
+    std::string R2 = r + llvm::utostr_32(RegPairNum);
+    StringRef Reg2(R2);
+    Inst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+    Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+                       ? Hexagon::C2_ccombinewnewt
+                       : Hexagon::C2_ccombinewnewf);
+    break;
+  }
+
+  // Translate a "$Rx =  CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
+  case Hexagon::CONST32:
+  case Hexagon::CONST32_Float_Real:
+  case Hexagon::CONST32_Int_Real:
+  case Hexagon::FCONST32_nsdata:
+    is32bit = true;
+  // Translate a "$Rx:y =  CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
+  case Hexagon::CONST64_Float_Real:
+  case Hexagon::CONST64_Int_Real:
+
+    // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+    if (!Parser.getStreamer().hasRawTextSupport()) {
+      MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+      MCOperand &MO_1 = Inst.getOperand(1);
+      MCOperand &MO_0 = Inst.getOperand(0);
+
+      // push section onto section stack
+      MES->PushSection();
+
+      std::string myCharStr;
+      MCSectionELF *mySection;
+
+      // check if this as an immediate or a symbol
+      int64_t Value;
+      bool Absolute = MO_1.getExpr()->evaluateAsAbsolute(Value);
+      if (Absolute) {
+        // Create a new section - one for each constant
+        // Some or all of the zeros are replaced with the given immediate.
+        if (is32bit) {
+          std::string myImmStr = utohexstr(static_cast<uint32_t>(Value));
+          myCharStr = StringRef(".gnu.linkonce.l4.CONST_00000000")
+                          .drop_back(myImmStr.size())
+                          .str() +
+                      myImmStr;
+        } else {
+          std::string myImmStr = utohexstr(Value);
+          myCharStr = StringRef(".gnu.linkonce.l8.CONST_0000000000000000")
+                          .drop_back(myImmStr.size())
+                          .str() +
+                      myImmStr;
+        }
+
+        mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+                                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+      } else if (MO_1.isExpr()) {
+        // .lita - for expressions
+        myCharStr = ".lita";
+        mySection = getContext().getELFSection(myCharStr, ELF::SHT_PROGBITS,
+                                               ELF::SHF_ALLOC | ELF::SHF_WRITE);
+      } else
+        llvm_unreachable("unexpected type of machine operand!");
+
+      MES->SwitchSection(mySection);
+      unsigned byteSize = is32bit ? 4 : 8;
+      getStreamer().EmitCodeAlignment(byteSize, byteSize);
+
+      MCSymbol *Sym;
+
+      // for symbols, get rid of prepended ".gnu.linkonce.lx."
+
+      // emit symbol if needed
+      if (Absolute) {
+        Sym = getContext().getOrCreateSymbol(StringRef(myCharStr.c_str() + 16));
+        if (Sym->isUndefined()) {
+          getStreamer().EmitLabel(Sym);
+          getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
+          getStreamer().EmitIntValue(Value, byteSize);
+        }
+      } else if (MO_1.isExpr()) {
+        const char *StringStart = 0;
+        const char *StringEnd = 0;
+        if (*Operands[4]->getStartLoc().getPointer() == '#') {
+          StringStart = Operands[5]->getStartLoc().getPointer();
+          StringEnd = Operands[6]->getStartLoc().getPointer();
+        } else { // no pound
+          StringStart = Operands[4]->getStartLoc().getPointer();
+          StringEnd = Operands[5]->getStartLoc().getPointer();
+        }
+
+        unsigned size = StringEnd - StringStart;
+        std::string DotConst = ".CONST_";
+        Sym = getContext().getOrCreateSymbol(DotConst +
+                                             StringRef(StringStart, size));
+
+        if (Sym->isUndefined()) {
+          // case where symbol is not yet defined: emit symbol
+          getStreamer().EmitLabel(Sym);
+          getStreamer().EmitSymbolAttribute(Sym, MCSA_Local);
+          getStreamer().EmitValue(MO_1.getExpr(), 4);
+        }
+      } else
+        llvm_unreachable("unexpected type of machine operand!");
+
+      MES->PopSection();
+
+      if (Sym) {
+        MCInst TmpInst;
+        if (is32bit) // 32 bit
+          TmpInst.setOpcode(Hexagon::L2_loadrigp);
+        else // 64 bit
+          TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+
+        TmpInst.addOperand(MO_0);
+        TmpInst.addOperand(
+            MCOperand::createExpr(MCSymbolRefExpr::create(Sym, getContext())));
+        Inst = TmpInst;
+      }
+    }
+    break;
+
+  // Translate a "$Rdd = #-imm" to "$Rdd = combine(#[-1,0], #-imm)"
+  case Hexagon::A2_tfrpi: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO = Inst.getOperand(1);
+    int64_t Value;
+    int sVal = (MO.getExpr()->evaluateAsAbsolute(Value) && Value < 0) ? -1 : 0;
+    MCOperand imm(MCOperand::createExpr(MCConstantExpr::create(sVal, Context)));
+    Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, imm, MO);
+    break;
+  }
+
+  // Translate a "$Rdd = [#]#imm" to "$Rdd = combine(#, [#]#imm)"
+  case Hexagon::TFRI64_V4: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO = Inst.getOperand(1);
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value)) {
+      unsigned long long u64 = Value;
+      signed int s8 = (u64 >> 32) & 0xFFFFFFFF;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+      MCOperand imm(MCOperand::createExpr(
+          MCConstantExpr::create(s8, Context))); // upper 32
+      MCOperand imm2(MCOperand::createExpr(
+          MCConstantExpr::create(u64 & 0xFFFFFFFF, Context))); // lower 32
+      Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, imm2);
+    } else {
+      MCOperand imm(MCOperand::createExpr(
+          MCConstantExpr::create(0, Context))); // upper 32
+      Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, imm, MO);
+    }
+    break;
+  }
+
+  // Handle $Rdd = combine(##imm, #imm)"
+  case Hexagon::TFRI64_V2_ext: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO1 = Inst.getOperand(1);
+    MCOperand &MO2 = Inst.getOperand(2);
+    int64_t Value;
+    if (MO2.getExpr()->evaluateAsAbsolute(Value)) {
+      int s8 = Value;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+    }
+    Inst = makeCombineInst(Hexagon::A2_combineii, Rdd, MO1, MO2);
+    break;
+  }
+
+  // Handle $Rdd = combine(#imm, ##imm)"
+  case Hexagon::A4_combineii: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &MO1 = Inst.getOperand(1);
+    int64_t Value;
+    if (MO1.getExpr()->evaluateAsAbsolute(Value)) {
+      int s8 = Value;
+      if (s8 < -128 || s8 > 127)
+        OutOfRange(IDLoc, s8, -128);
+    }
+    MCOperand &MO2 = Inst.getOperand(2);
+    Inst = makeCombineInst(Hexagon::A4_combineii, Rdd, MO1, MO2);
+    break;
+  }
+
+  case Hexagon::S2_tableidxb_goodsyntax: {
+    Inst.setOpcode(Hexagon::S2_tableidxb);
+    break;
+  }
+
+  case Hexagon::S2_tableidxh_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(1, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxh);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_tableidxw_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(2, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxw);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_tableidxd_goodsyntax: {
+    MCInst TmpInst;
+    MCOperand &Rx = Inst.getOperand(0);
+    MCOperand &_dst_ = Inst.getOperand(1);
+    MCOperand &Rs = Inst.getOperand(2);
+    MCOperand &Imm4 = Inst.getOperand(3);
+    MCOperand &Imm6 = Inst.getOperand(4);
+    Imm6.setExpr(MCBinaryExpr::createSub(
+        Imm6.getExpr(), MCConstantExpr::create(3, Context), Context));
+    TmpInst.setOpcode(Hexagon::S2_tableidxd);
+    TmpInst.addOperand(Rx);
+    TmpInst.addOperand(_dst_);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm4);
+    TmpInst.addOperand(Imm6);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::M2_mpyui: {
+    Inst.setOpcode(Hexagon::M2_mpyi);
+    break;
+  }
+  case Hexagon::M2_mpysmi: {
+    MCInst TmpInst;
+    MCOperand &Rd = Inst.getOperand(0);
+    MCOperand &Rs = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (!MustExtend) {
+      if (Value < 0 && Value > -256) {
+        Imm.setExpr(MCConstantExpr::create(Value * -1, Context));
+        TmpInst.setOpcode(Hexagon::M2_mpysin);
+      } else if (Value < 256 && Value >= 0)
+        TmpInst.setOpcode(Hexagon::M2_mpysip);
+      else
+        return Match_InvalidOperand;
+    } else {
+      if (Value >= 0)
+        TmpInst.setOpcode(Hexagon::M2_mpysip);
+      else
+        return Match_InvalidOperand;
+    }
+    TmpInst.addOperand(Rd);
+    TmpInst.addOperand(Rs);
+    TmpInst.addOperand(Imm);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+    MCOperand &Imm = Inst.getOperand(2);
+    MCInst TmpInst;
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) { // convert to $Rd = $Rs
+      TmpInst.setOpcode(Hexagon::A2_tfr);
+      MCOperand &Rd = Inst.getOperand(0);
+      MCOperand &Rs = Inst.getOperand(1);
+      TmpInst.addOperand(Rd);
+      TmpInst.addOperand(Rs);
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+      MCOperand &Rd = Inst.getOperand(0);
+      MCOperand &Rs = Inst.getOperand(1);
+      TmpInst.addOperand(Rd);
+      TmpInst.addOperand(Rs);
+      TmpInst.addOperand(Imm);
+    }
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
+      MCInst TmpInst;
+      unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+      std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+      StringRef Reg1(R1);
+      Rss.setReg(MatchRegisterName(Reg1));
+      // Add a new operand for the second register in the pair.
+      std::string R2 = r + llvm::utostr_32(RegPairNum);
+      StringRef Reg2(R2);
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(Rdd);
+      TmpInst.addOperand(Rss);
+      TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+      Inst = TmpInst;
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+    }
+    break;
+  }
+
+  case Hexagon::A4_boundscheck: {
+    MCOperand &Rs = Inst.getOperand(1);
+    unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+    if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+      Inst.setOpcode(Hexagon::A4_boundscheck_hi);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    } else { // raw:lo
+      Inst.setOpcode(Hexagon::A4_boundscheck_lo);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::A2_addsp: {
+    MCOperand &Rs = Inst.getOperand(1);
+    unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
+    if (RegNum & 1) { // Odd mapped to raw:hi
+      Inst.setOpcode(Hexagon::A2_addsph);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped raw:lo
+      Inst.setOpcode(Hexagon::A2_addspl);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rs.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_s1: {
+    MCOperand &Rt = Inst.getOperand(2);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to sat:raw:hi
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped sat:raw:lo
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_acc_s1: {
+    MCInst TmpInst;
+    MCOperand &Rxx = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(2);
+    MCOperand &Rt = Inst.getOperand(3);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to sat:raw:hi
+      TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped sat:raw:lo
+      TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    // Registers are in different positions
+    TmpInst.addOperand(Rxx);
+    TmpInst.addOperand(Rxx);
+    TmpInst.addOperand(Rss);
+    TmpInst.addOperand(Rt);
+    Inst = TmpInst;
+    break;
+  }
+
+  case Hexagon::M2_vrcmpys_s1rp: {
+    MCOperand &Rt = Inst.getOperand(2);
+    unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
+    if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+      std::string Name =
+          r + llvm::utostr_32(RegNum) + Colon + llvm::utostr_32(RegNum - 1);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    } else { // Even mapped rnd:sat:raw:lo
+      Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+      std::string Name =
+          r + llvm::utostr_32(RegNum + 1) + Colon + llvm::utostr_32(RegNum);
+      StringRef RegPair = Name;
+      Rt.setReg(MatchRegisterName(RegPair));
+    }
+    break;
+  }
+
+  case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0)
+      Inst.setOpcode(Hexagon::S2_vsathub);
+    else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+    }
+    break;
+  }
+
+  case Hexagon::S5_vasrhrnd_goodsyntax: {
+    MCOperand &Rdd = Inst.getOperand(0);
+    MCOperand &Rss = Inst.getOperand(1);
+    MCOperand &Imm = Inst.getOperand(2);
+    int64_t Value;
+    bool Absolute = Imm.getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);
+    (void)Absolute;
+    if (Value == 0) {
+      MCInst TmpInst;
+      unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
+      std::string R1 = r + llvm::utostr_32(RegPairNum + 1);
+      StringRef Reg1(R1);
+      Rss.setReg(MatchRegisterName(Reg1));
+      // Add a new operand for the second register in the pair.
+      std::string R2 = r + llvm::utostr_32(RegPairNum);
+      StringRef Reg2(R2);
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(Rdd);
+      TmpInst.addOperand(Rss);
+      TmpInst.addOperand(MCOperand::createReg(MatchRegisterName(Reg2)));
+      Inst = TmpInst;
+    } else {
+      Imm.setExpr(MCBinaryExpr::createSub(
+          Imm.getExpr(), MCConstantExpr::create(1, Context), Context));
+      Inst.setOpcode(Hexagon::S5_vasrhrnd);
+    }
+    break;
+  }
+
+  case Hexagon::A2_not: {
+    MCInst TmpInst;
+    MCOperand &Rd = Inst.getOperand(0);
+    MCOperand &Rs = Inst.getOperand(1);
+    TmpInst.setOpcode(Hexagon::A2_subri);
+    TmpInst.addOperand(Rd);
+    TmpInst.addOperand(
+        MCOperand::createExpr(MCConstantExpr::create(-1, Context)));
+    TmpInst.addOperand(Rs);
+    Inst = TmpInst;
+    break;
+  }
+  } // switch
+
+  return Match_Success;
+}
diff --git a/lib/Target/Hexagon/AsmParser/LLVMBuild.txt b/lib/Target/Hexagon/AsmParser/LLVMBuild.txt
new file mode 100644
index 000000000000..fdd875b61906
--- /dev/null
+++ b/lib/Target/Hexagon/AsmParser/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Target/Hexagon/AsmParser/LLVMBuild.txt --------------*- Conf -*-===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = HexagonAsmParser
+parent = Hexagon
+required_libraries = MC MCParser Support HexagonDesc HexagonInfo
+add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/AsmParser/Makefile b/lib/Target/Hexagon/AsmParser/Makefile
new file mode 100644
index 000000000000..0aa0b4140c3e
--- /dev/null
+++ b/lib/Target/Hexagon/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Hexagon/AsmParser/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMHexagonAsmParser
+
+# Hack: we need to include 'main' Hexagon target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Hexagon/BitTracker.cpp b/lib/Target/Hexagon/BitTracker.cpp
index cb7e633fb82f..ea96eb0ee10a 100644
--- a/lib/Target/Hexagon/BitTracker.cpp
+++ b/lib/Target/Hexagon/BitTracker.cpp
@@ -868,7 +868,7 @@ void BT::visitNonBranch(const MachineInstr *MI) {
       continue;
 
     bool Changed = false;
-    if (!Eval || !ResMap.has(RD.Reg)) {
+    if (!Eval || ResMap.count(RD.Reg) == 0) {
       // Set to "ref" (aka "bottom").
       uint16_t DefBW = ME.getRegBitWidth(RD);
       RegisterCell RefC = RegisterCell::self(RD.Reg, DefBW);
@@ -951,11 +951,11 @@ void BT::visitBranchesFrom(const MachineInstr *BI) {
     // be processed.
     for (succ_iterator I = B.succ_begin(), E = B.succ_end(); I != E; ++I) {
       const MachineBasicBlock *SB = *I;
-      if (SB->isLandingPad())
+      if (SB->isEHPad())
         Targets.insert(SB);
     }
     if (FallsThrough) {
-      MachineFunction::const_iterator BIt = &B;
+      MachineFunction::const_iterator BIt = B.getIterator();
       MachineFunction::const_iterator Next = std::next(BIt);
       if (Next != MF.end())
         Targets.insert(&*Next);
@@ -1005,7 +1005,7 @@ void BT::put(RegisterRef RR, const RegisterCell &RC) {
 // Replace all references to bits from OldRR with the corresponding bits
 // in NewRR.
 void BT::subst(RegisterRef OldRR, RegisterRef NewRR) {
-  assert(Map.has(OldRR.Reg) && "OldRR not present in map");
+  assert(Map.count(OldRR.Reg) > 0 && "OldRR not present in map");
   BitMask OM = ME.mask(OldRR.Reg, OldRR.Sub);
   BitMask NM = ME.mask(NewRR.Reg, NewRR.Sub);
   uint16_t OMB = OM.first(), OME = OM.last();
@@ -1104,9 +1104,9 @@ void BT::run() {
     }
     // If block end has been reached, add the fall-through edge to the queue.
     if (It == End) {
-      MachineFunction::const_iterator BIt = &B;
+      MachineFunction::const_iterator BIt = B.getIterator();
       MachineFunction::const_iterator Next = std::next(BIt);
-      if (Next != MF.end()) {
+      if (Next != MF.end() && B.isSuccessor(&*Next)) {
         int ThisN = B.getNumber();
         int NextN = Next->getNumber();
         FlowQ.push(CFGEdge(ThisN, NextN));
diff --git a/lib/Target/Hexagon/BitTracker.h b/lib/Target/Hexagon/BitTracker.h
index ed002a794d66..959c8318fd60 100644
--- a/lib/Target/Hexagon/BitTracker.h
+++ b/lib/Target/Hexagon/BitTracker.h
@@ -36,9 +36,7 @@ struct BitTracker {
 
   typedef SetVector<const MachineBasicBlock *> BranchTargetList;
 
-  struct CellMapType : public std::map<unsigned,RegisterCell> {
-    bool has(unsigned Reg) const;
-  };
+  typedef std::map<unsigned, RegisterCell> CellMapType;
 
   BitTracker(const MachineEvaluator &E, MachineFunction &F);
   ~BitTracker();
@@ -79,7 +77,6 @@ private:
 // Abstraction of a reference to bit at position Pos from a register Reg.
 struct BitTracker::BitRef {
   BitRef(unsigned R = 0, uint16_t P = 0) : Reg(R), Pos(P) {}
-  BitRef(const BitRef &BR) : Reg(BR.Reg), Pos(BR.Pos) {}
   bool operator== (const BitRef &BR) const {
     // If Reg is 0, disregard Pos.
     return Reg == BR.Reg && (Reg == 0 || Pos == BR.Pos);
@@ -146,7 +143,6 @@ struct BitTracker::BitValue {
 
   BitValue(ValueType T = Top) : Type(T) {}
   BitValue(bool B) : Type(B ? One : Zero) {}
-  BitValue(const BitValue &V) : Type(V.Type), RefI(V.RefI) {}
   BitValue(unsigned Reg, uint16_t Pos) : Type(Ref), RefI(Reg, Pos) {}
 
   bool operator== (const BitValue &V) const {
@@ -279,11 +275,6 @@ struct BitTracker::RegisterCell {
     return !operator==(RC);
   }
 
-  const RegisterCell &operator=(const RegisterCell &RC) {
-    Bits = RC.Bits;
-    return *this;
-  }
-
   // Generate a "ref" cell for the corresponding register. In the resulting
   // cell each bit will be described as being the same as the corresponding
   // bit in register Reg (i.e. the cell is "defined" by register Reg).
@@ -344,11 +335,6 @@ BitTracker::RegisterCell::ref(const RegisterCell &C) {
   return RC;
 }
 
-
-inline bool BitTracker::CellMapType::has(unsigned Reg) const {
-  return find(Reg) != end();
-}
-
 // A class to evaluate target's instructions and update the cell maps.
 // This is used internally by the bit tracker.  A target that wants to
 // utilize this should implement the evaluation functions (noted below)
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 7ab2f0ba01df..181e4e3aa85d 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_TARGET_DEFINITIONS Hexagon.td)
 
+tablegen(LLVM HexagonGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM HexagonGenAsmWriter.inc -gen-asm-writer)
 tablegen(LLVM HexagonGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM HexagonGenDAGISel.inc -gen-dag-isel)
@@ -14,16 +15,19 @@ add_public_tablegen_target(HexagonCommonTableGen)
 add_llvm_target(HexagonCodeGen
   BitTracker.cpp
   HexagonAsmPrinter.cpp
+  HexagonBitSimplify.cpp
   HexagonBitTracker.cpp
   HexagonCFGOptimizer.cpp
   HexagonCommonGEP.cpp
   HexagonCopyToCombine.cpp
+  HexagonEarlyIfConv.cpp
   HexagonExpandCondsets.cpp
   HexagonExpandPredSpillCode.cpp
   HexagonFixupHwLoops.cpp
   HexagonFrameLowering.cpp
   HexagonGenExtract.cpp
   HexagonGenInsert.cpp
+  HexagonGenMux.cpp
   HexagonGenPredicate.cpp
   HexagonHardwareLoops.cpp
   HexagonInstrInfo.cpp
@@ -33,17 +37,21 @@ add_llvm_target(HexagonCodeGen
   HexagonMachineScheduler.cpp
   HexagonMCInstLower.cpp
   HexagonNewValueJump.cpp
+  HexagonOptimizeSZextends.cpp
   HexagonPeephole.cpp
   HexagonRegisterInfo.cpp
-  HexagonRemoveSZExtArgs.cpp
   HexagonSelectionDAGInfo.cpp
   HexagonSplitConst32AndConst64.cpp
+  HexagonSplitDouble.cpp
+  HexagonStoreWidening.cpp
   HexagonSubtarget.cpp
   HexagonTargetMachine.cpp
   HexagonTargetObjectFile.cpp
+  HexagonTargetTransformInfo.cpp
   HexagonVLIWPacketizer.cpp
 )
 
+add_subdirectory(AsmParser)
 add_subdirectory(TargetInfo)
 add_subdirectory(MCTargetDesc)
 add_subdirectory(Disassembler)
diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 9cc1e944d359..4a9c3413cb29 100644
--- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -7,42 +7,45 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "hexagon-disassembler"
+
 #include "Hexagon.h"
 #include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
-
-#include "llvm/MC/MCContext.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonInstPrinter.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/raw_ostream.h"
-#include <array>
+#include "llvm/Support/TargetRegistry.h"
 #include <vector>
 
 using namespace llvm;
 using namespace Hexagon;
 
-#define DEBUG_TYPE "hexagon-disassembler"
-
-// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
+typedef MCDisassembler::DecodeStatus DecodeStatus;
 
 namespace {
 /// \brief Hexagon disassembler for all Hexagon platforms.
 class HexagonDisassembler : public MCDisassembler {
 public:
+  std::unique_ptr<MCInstrInfo const> const MCII;
   std::unique_ptr<MCInst *> CurrentBundle;
-  HexagonDisassembler(MCSubtargetInfo const &STI, MCContext &Ctx)
-      : MCDisassembler(STI, Ctx), CurrentBundle(new MCInst *) {}
+  HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+                      MCInstrInfo const *MCII)
+      : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
 
   DecodeStatus getSingleInstruction(MCInst &Instr, MCInst &MCB,
                                     ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -52,23 +55,57 @@ public:
                               ArrayRef<uint8_t> Bytes, uint64_t Address,
                               raw_ostream &VStream,
                               raw_ostream &CStream) const override;
+
+  void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
+  void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
 };
 }
 
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+// Forward declare these because the auto-generated code will reference them.
+// Definitions are further down.
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
                                                const void *Decoder);
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t Address,
+                                                  const void *Decoder);
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                uint64_t Address,
+                                                const void *Decoder);
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder);
 static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
                                                const void *Decoder);
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder);
 static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
                                                  uint64_t Address,
-                                                 void const *Decoder);
+                                                 const void *Decoder);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn);
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+                                 void const *Decoder);
 
 static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
                                  raw_ostream &os);
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst);
 
+static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+                                       uint64_t Address, const void *Decoder);
 static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                   const void *Decoder);
 static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
@@ -95,129 +132,19 @@ static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                    const void *Decoder);
 static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                    const void *Decoder);
-
-static const uint16_t IntRegDecoderTable[] = {
-    Hexagon::R0,  Hexagon::R1,  Hexagon::R2,  Hexagon::R3,  Hexagon::R4,
-    Hexagon::R5,  Hexagon::R6,  Hexagon::R7,  Hexagon::R8,  Hexagon::R9,
-    Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
-    Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
-    Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
-    Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
-    Hexagon::R30, Hexagon::R31};
-
-static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
-                                               Hexagon::P2, Hexagon::P3};
-
-static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
-                                        const uint16_t Table[], size_t Size) {
-  if (RegNo < Size) {
-    Inst.addOperand(MCOperand::createReg(Table[RegNo]));
-    return MCDisassembler::Success;
-  } else
-    return MCDisassembler::Fail;
-}
-
-static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
-                                               uint64_t /*Address*/,
-                                               void const *Decoder) {
-  if (RegNo > 31)
-    return MCDisassembler::Fail;
-
-  unsigned Register = IntRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::createReg(Register));
-  return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
-                                               uint64_t /*Address*/,
-                                               const void *Decoder) {
-  static const uint16_t CtrlRegDecoderTable[] = {
-      Hexagon::SA0,  Hexagon::LC0,        Hexagon::SA1,  Hexagon::LC1,
-      Hexagon::P3_0, Hexagon::NoRegister, Hexagon::C6,   Hexagon::C7,
-      Hexagon::USR,  Hexagon::PC,         Hexagon::UGP,  Hexagon::GP,
-      Hexagon::CS0,  Hexagon::CS1,        Hexagon::UPCL, Hexagon::UPCH};
-
-  if (RegNo >= sizeof(CtrlRegDecoderTable) / sizeof(CtrlRegDecoderTable[0]))
-    return MCDisassembler::Fail;
-
-  if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
-    return MCDisassembler::Fail;
-
-  unsigned Register = CtrlRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::createReg(Register));
-  return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
-                                                 uint64_t /*Address*/,
-                                                 void const *Decoder) {
-  static const uint16_t CtrlReg64DecoderTable[] = {
-      Hexagon::C1_0,       Hexagon::NoRegister, Hexagon::C3_2,
-      Hexagon::NoRegister, Hexagon::NoRegister, Hexagon::NoRegister,
-      Hexagon::C7_6,       Hexagon::NoRegister, Hexagon::C9_8,
-      Hexagon::NoRegister, Hexagon::C11_10,     Hexagon::NoRegister,
-      Hexagon::CS,         Hexagon::NoRegister, Hexagon::UPC,
-      Hexagon::NoRegister};
-
-  if (RegNo >= sizeof(CtrlReg64DecoderTable) / sizeof(CtrlReg64DecoderTable[0]))
-    return MCDisassembler::Fail;
-
-  if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
-    return MCDisassembler::Fail;
-
-  unsigned Register = CtrlReg64DecoderTable[RegNo];
-  Inst.addOperand(MCOperand::createReg(Register));
-  return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
-                                               uint64_t /*Address*/,
-                                               const void *Decoder) {
-  unsigned Register = 0;
-  switch (RegNo) {
-  case 0:
-    Register = Hexagon::M0;
-    break;
-  case 1:
-    Register = Hexagon::M1;
-    break;
-  default:
-    return MCDisassembler::Fail;
-  }
-  Inst.addOperand(MCOperand::createReg(Register));
-  return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
-                                                  uint64_t /*Address*/,
-                                                  const void *Decoder) {
-  static const uint16_t DoubleRegDecoderTable[] = {
-      Hexagon::D0,  Hexagon::D1,  Hexagon::D2,  Hexagon::D3,
-      Hexagon::D4,  Hexagon::D5,  Hexagon::D6,  Hexagon::D7,
-      Hexagon::D8,  Hexagon::D9,  Hexagon::D10, Hexagon::D11,
-      Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
-
-  return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable,
-                              sizeof(DoubleRegDecoderTable)));
-}
-
-static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
-                                                uint64_t /*Address*/,
-                                                void const *Decoder) {
-  if (RegNo > 3)
-    return MCDisassembler::Fail;
-
-  unsigned Register = PredRegDecoderTable[RegNo];
-  Inst.addOperand(MCOperand::createReg(Register));
-  return MCDisassembler::Success;
-}
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                   const void *Decoder);
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder);
 
 #include "HexagonGenDisassemblerTables.inc"
 
-static MCDisassembler *createHexagonDisassembler(Target const &T,
-                                                 MCSubtargetInfo const &STI,
+static MCDisassembler *createHexagonDisassembler(const Target &T,
+                                                 const MCSubtargetInfo &STI,
                                                  MCContext &Ctx) {
-  return new HexagonDisassembler(STI, Ctx);
+  return new HexagonDisassembler(STI, Ctx, T.createMCInstrInfo());
 }
 
 extern "C" void LLVMInitializeHexagonDisassembler() {
@@ -235,8 +162,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   Size = 0;
 
   *CurrentBundle = &MI;
-  MI.setOpcode(Hexagon::BUNDLE);
-  MI.addOperand(MCOperand::createImm(0));
+  MI = HexagonMCInstrInfo::createBundle();
   while (Result == Success && Complete == false) {
     if (Bytes.size() < HEXAGON_INSTR_SIZE)
       return MCDisassembler::Fail;
@@ -246,7 +172,21 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
     Size += HEXAGON_INSTR_SIZE;
     Bytes = Bytes.slice(HEXAGON_INSTR_SIZE);
   }
-  return Result;
+  if(Result == MCDisassembler::Fail)
+    return Result;
+  HexagonMCChecker Checker (*MCII, STI, MI, MI, *getContext().getRegisterInfo());
+  if(!Checker.check())
+    return MCDisassembler::Fail;
+  return MCDisassembler::Success;
+}
+
+namespace {
+HexagonDisassembler const &disassembler(void const *Decoder) {
+  return *static_cast<HexagonDisassembler const *>(Decoder);
+}
+MCContext &contextFromDecoder(void const *Decoder) {
+  return disassembler(Decoder).getContext();
+}
 }
 
 DecodeStatus HexagonDisassembler::getSingleInstruction(
@@ -255,8 +195,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
   assert(Bytes.size() >= HEXAGON_INSTR_SIZE);
 
   uint32_t Instruction =
-      llvm::support::endian::read<uint32_t, llvm::support::little,
-                                  llvm::support::unaligned>(Bytes.data());
+      (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0);
 
   auto BundleSize = HexagonMCInstrInfo::bundleSize(MCB);
   if ((Instruction & HexagonII::INST_PARSE_MASK) ==
@@ -360,8 +299,8 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
     MILow->setOpcode(opLow);
     MCInst *MIHigh = new (getContext()) MCInst;
     MIHigh->setOpcode(opHigh);
-    AddSubinstOperands(MILow, opLow, instLow);
-    AddSubinstOperands(MIHigh, opHigh, instHigh);
+    addSubinstOperands(MILow, opLow, instLow);
+    addSubinstOperands(MIHigh, opHigh, instHigh);
     // see ConvertToSubInst() in
     // lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
 
@@ -378,102 +317,774 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
     // Calling the auto-generated decoder function.
     Result =
         decodeInstruction(DecoderTable32, MI, Instruction, Address, this, STI);
+
+    // If a, "standard" insn isn't found check special cases.
+    if (MCDisassembler::Success != Result ||
+        MI.getOpcode() == Hexagon::A4_ext) {
+      Result = decodeImmext(MI, Instruction, this);
+      if (MCDisassembler::Success != Result) {
+        Result = decodeSpecial(MI, Instruction);
+      }
+    } else {
+      // If the instruction is a compound instruction, register values will
+      // follow the duplex model, so the register values in the MCInst are
+      // incorrect. If the instruction is a compound, loop through the
+      // operands and change registers appropriately.
+      if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
+          HexagonII::TypeCOMPOUND) {
+        for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
+          if (i->isReg()) {
+            unsigned reg = i->getReg() - Hexagon::R0;
+            i->setReg(getRegFromSubinstEncoding(reg));
+          }
+        }
+      }
+    }
   }
 
+  if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
+    unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
+    MCOperand &MCO = MI.getOperand(OpIndex);
+    assert(MCO.isReg() && "New value consumers must be registers");
+    unsigned Register =
+        getContext().getRegisterInfo()->getEncodingValue(MCO.getReg());
+    if ((Register & 0x6) == 0)
+      // HexagonPRM 10.11 Bit 1-2 == 0 is reserved
+      return MCDisassembler::Fail;
+    unsigned Lookback = (Register & 0x6) >> 1;
+    unsigned Offset = 1;
+    bool Vector = HexagonMCInstrInfo::isVector(*MCII, MI);
+    auto Instructions = HexagonMCInstrInfo::bundleInstructions(**CurrentBundle);
+    auto i = Instructions.end() - 1;
+    for (auto n = Instructions.begin() - 1;; --i, ++Offset) {
+      if (i == n)
+        // Couldn't find producer
+        return MCDisassembler::Fail;
+      if (Vector && !HexagonMCInstrInfo::isVector(*MCII, *i->getInst()))
+        // Skip scalars when calculating distances for vectors
+        ++Lookback;
+      if (HexagonMCInstrInfo::isImmext(*i->getInst()))
+        ++Lookback;
+      if (Offset == Lookback)
+        break;
+    }
+    auto const &Inst = *i->getInst();
+    bool SubregBit = (Register & 0x1) != 0;
+    if (SubregBit && HexagonMCInstrInfo::hasNewValue2(*MCII, Inst)) {
+      // If subreg bit is set we're selecting the second produced newvalue
+      unsigned Producer =
+          HexagonMCInstrInfo::getNewValueOperand2(*MCII, Inst).getReg();
+      assert(Producer != Hexagon::NoRegister);
+      MCO.setReg(Producer);
+    } else if (HexagonMCInstrInfo::hasNewValue(*MCII, Inst)) {
+      unsigned Producer =
+          HexagonMCInstrInfo::getNewValueOperand(*MCII, Inst).getReg();
+      if (Producer >= Hexagon::W0 && Producer <= Hexagon::W15)
+        Producer = ((Producer - Hexagon::W0) << 1) + SubregBit + Hexagon::V0;
+      else if (SubregBit)
+        // Subreg bit should not be set for non-doublevector newvalue producers
+        return MCDisassembler::Fail;
+      assert(Producer != Hexagon::NoRegister);
+      MCO.setReg(Producer);
+    } else
+      return MCDisassembler::Fail;
+  }
+
+  adjustExtendedInstructions(MI, MCB);
+  MCInst const *Extender =
+    HexagonMCInstrInfo::extenderForIndex(MCB,
+                                         HexagonMCInstrInfo::bundleSize(MCB));
+  if(Extender != nullptr) {
+    MCInst const & Inst = HexagonMCInstrInfo::isDuplex(*MCII, MI) ?
+                          *MI.getOperand(1).getInst() : MI;
+    if (!HexagonMCInstrInfo::isExtendable(*MCII, Inst) &&
+        !HexagonMCInstrInfo::isExtended(*MCII, Inst))
+      return MCDisassembler::Fail;
+  }
   return Result;
 }
 
+void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
+                                                     MCInst const &MCB) const {
+  if (!HexagonMCInstrInfo::hasExtenderForIndex(
+          MCB, HexagonMCInstrInfo::bundleSize(MCB))) {
+    unsigned opcode;
+    // This code is used by the disassembler to disambiguate between GP
+    // relative and absolute addressing instructions since they both have
+    // same encoding bits. However, an absolute addressing instruction must
+    // follow an immediate extender. Disassembler alwaus select absolute
+    // addressing instructions first and uses this code to change them into
+    // GP relative instruction in the absence of the corresponding immediate
+    // extender.
+    switch (MCI.getOpcode()) {
+    case Hexagon::S2_storerbabs:
+      opcode = Hexagon::S2_storerbgp;
+      break;
+    case Hexagon::S2_storerhabs:
+      opcode = Hexagon::S2_storerhgp;
+      break;
+    case Hexagon::S2_storerfabs:
+      opcode = Hexagon::S2_storerfgp;
+      break;
+    case Hexagon::S2_storeriabs:
+      opcode = Hexagon::S2_storerigp;
+      break;
+    case Hexagon::S2_storerbnewabs:
+      opcode = Hexagon::S2_storerbnewgp;
+      break;
+    case Hexagon::S2_storerhnewabs:
+      opcode = Hexagon::S2_storerhnewgp;
+      break;
+    case Hexagon::S2_storerinewabs:
+      opcode = Hexagon::S2_storerinewgp;
+      break;
+    case Hexagon::S2_storerdabs:
+      opcode = Hexagon::S2_storerdgp;
+      break;
+    case Hexagon::L4_loadrb_abs:
+      opcode = Hexagon::L2_loadrbgp;
+      break;
+    case Hexagon::L4_loadrub_abs:
+      opcode = Hexagon::L2_loadrubgp;
+      break;
+    case Hexagon::L4_loadrh_abs:
+      opcode = Hexagon::L2_loadrhgp;
+      break;
+    case Hexagon::L4_loadruh_abs:
+      opcode = Hexagon::L2_loadruhgp;
+      break;
+    case Hexagon::L4_loadri_abs:
+      opcode = Hexagon::L2_loadrigp;
+      break;
+    case Hexagon::L4_loadrd_abs:
+      opcode = Hexagon::L2_loadrdgp;
+      break;
+    default:
+      opcode = MCI.getOpcode();
+    }
+    MCI.setOpcode(opcode);
+  }
+}
+
+namespace llvm {
+extern const MCInstrDesc HexagonInsts[];
+}
+
+static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
+                                        ArrayRef<MCPhysReg> Table) {
+  if (RegNo < Table.size()) {
+    Inst.addOperand(MCOperand::createReg(Table[RegNo]));
+    return MCDisassembler::Success;
+  }
+
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t Address,
+                                                   const void *Decoder) {
+  return DecodeIntRegsRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
+static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  static const MCPhysReg IntRegDecoderTable[] = {
+      Hexagon::R0,  Hexagon::R1,  Hexagon::R2,  Hexagon::R3,  Hexagon::R4,
+      Hexagon::R5,  Hexagon::R6,  Hexagon::R7,  Hexagon::R8,  Hexagon::R9,
+      Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14,
+      Hexagon::R15, Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
+      Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23, Hexagon::R24,
+      Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29,
+      Hexagon::R30, Hexagon::R31};
+
+  return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable);
+}
+
+static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg VecRegDecoderTable[] = {
+      Hexagon::V0,  Hexagon::V1,  Hexagon::V2,  Hexagon::V3,  Hexagon::V4,
+      Hexagon::V5,  Hexagon::V6,  Hexagon::V7,  Hexagon::V8,  Hexagon::V9,
+      Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+      Hexagon::V15, Hexagon::V16, Hexagon::V17, Hexagon::V18, Hexagon::V19,
+      Hexagon::V20, Hexagon::V21, Hexagon::V22, Hexagon::V23, Hexagon::V24,
+      Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29,
+      Hexagon::V30, Hexagon::V31};
+
+  return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable);
+}
+
+static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg DoubleRegDecoderTable[] = {
+      Hexagon::D0,  Hexagon::D1,  Hexagon::D2,  Hexagon::D3,
+      Hexagon::D4,  Hexagon::D5,  Hexagon::D6,  Hexagon::D7,
+      Hexagon::D8,  Hexagon::D9,  Hexagon::D10, Hexagon::D11,
+      Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15};
+
+  return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                  uint64_t /*Address*/,
+                                                  const void *Decoder) {
+  static const MCPhysReg VecDblRegDecoderTable[] = {
+      Hexagon::W0,  Hexagon::W1,  Hexagon::W2,  Hexagon::W3,
+      Hexagon::W4,  Hexagon::W5,  Hexagon::W6,  Hexagon::W7,
+      Hexagon::W8,  Hexagon::W9,  Hexagon::W10, Hexagon::W11,
+      Hexagon::W12, Hexagon::W13, Hexagon::W14, Hexagon::W15};
+
+  return (DecodeRegisterClass(Inst, RegNo >> 1, VecDblRegDecoderTable));
+}
+
+static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                uint64_t /*Address*/,
+                                                const void *Decoder) {
+  static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1,
+                                                  Hexagon::P2, Hexagon::P3};
+
+  return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable);
+}
+
+static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                                   uint64_t /*Address*/,
+                                                   const void *Decoder) {
+  static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1,
+                                                     Hexagon::Q2, Hexagon::Q3};
+
+  return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable);
+}
+
+static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t /*Address*/,
+                                               const void *Decoder) {
+  static const MCPhysReg CtrlRegDecoderTable[] = {
+    Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1,
+    Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7,
+    Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP,
+    Hexagon::CS0, Hexagon::CS1, Hexagon::UPCL, Hexagon::UPC
+  };
+
+  if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+    return MCDisassembler::Fail;
+
+  if (CtrlRegDecoderTable[RegNo] == Hexagon::NoRegister)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CtrlRegDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
+                                                 uint64_t /*Address*/,
+                                                 const void *Decoder) {
+  static const MCPhysReg CtrlReg64DecoderTable[] = {
+      Hexagon::C1_0,   Hexagon::NoRegister,
+      Hexagon::C3_2,   Hexagon::NoRegister,
+      Hexagon::C7_6,   Hexagon::NoRegister,
+      Hexagon::C9_8,   Hexagon::NoRegister,
+      Hexagon::C11_10, Hexagon::NoRegister,
+      Hexagon::CS,     Hexagon::NoRegister,
+      Hexagon::UPC,    Hexagon::NoRegister
+  };
+
+  if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+    return MCDisassembler::Fail;
+
+  if (CtrlReg64DecoderTable[RegNo] == Hexagon::NoRegister)
+    return MCDisassembler::Fail;
+
+  unsigned Register = CtrlReg64DecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t /*Address*/,
+                                               const void *Decoder) {
+  unsigned Register = 0;
+  switch (RegNo) {
+  case 0:
+    Register = Hexagon::M0;
+    break;
+  case 1:
+    Register = Hexagon::M1;
+    break;
+  default:
+    return MCDisassembler::Fail;
+  }
+  Inst.addOperand(MCOperand::createReg(Register));
+  return MCDisassembler::Success;
+}
+
+namespace {
+uint32_t fullValue(MCInstrInfo const &MCII,
+                  MCInst &MCB,
+                  MCInst &MI,
+                  int64_t Value) {
+  MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
+    MCB, HexagonMCInstrInfo::bundleSize(MCB));
+  if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
+    return Value;
+  unsigned Alignment = HexagonMCInstrInfo::getExtentAlignment(MCII, MI);
+  uint32_t Lower6 = static_cast<uint32_t>(Value >> Alignment) & 0x3f;
+  int64_t Bits;
+  bool Success = Extender->getOperand(0).getExpr()->evaluateAsAbsolute(Bits);
+  assert(Success);(void)Success;
+  uint32_t Upper26 = static_cast<uint32_t>(Bits);
+  uint32_t Operand = Upper26 | Lower6;
+  return Operand;
+}
+template <size_t T>
+void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  int64_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, SignExtend64<T>(tmp));
+  int64_t Extended = SignExtend64<32>(FullValue);
+  HexagonMCInstrInfo::addConstant(MI, Extended,
+                                  Disassembler.getContext());
+}
+}
+
+static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
+                                       uint64_t /*Address*/,
+                                       const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  int64_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, tmp);
+  assert(FullValue >= 0 && "Negative in unsigned decoder");
+  HexagonMCInstrInfo::addConstant(MI, FullValue, Disassembler.getContext());
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<16>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<16>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<12>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<12>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<11>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<11>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s11_1ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<12>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  HexagonMCInstrInfo::addConstant(MI, SignExtend64<12>(tmp), contextFromDecoder(Decoder));
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<13>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<13>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
                                     uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<14>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<14>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<10>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<10>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
                                  const void *Decoder) {
-  uint64_t imm = SignExtend64<8>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<8>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp,
                                    uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<6>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<6>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s4_0ImmDecoder(MCInst &MI, unsigned tmp,
                                    uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<4>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<4>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s4_1ImmDecoder(MCInst &MI, unsigned tmp,
                                    uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<5>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<5>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s4_2ImmDecoder(MCInst &MI, unsigned tmp,
                                    uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<6>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<6>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
 static DecodeStatus s4_3ImmDecoder(MCInst &MI, unsigned tmp,
                                    uint64_t /*Address*/, const void *Decoder) {
-  uint64_t imm = SignExtend64<7>(tmp);
-  MI.addOperand(MCOperand::createImm(imm));
+  signedDecoder<7>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
+static DecodeStatus s4_6ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<10>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus s3_6ImmDecoder(MCInst &MI, unsigned tmp,
+                                   uint64_t /*Address*/, const void *Decoder) {
+  signedDecoder<19>(MI, tmp, Decoder);
+  return MCDisassembler::Success;
+}
+
+// custom decoder for various jump/call immediates
+static DecodeStatus brtargetDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+                                    const void *Decoder) {
+  HexagonDisassembler const &Disassembler = disassembler(Decoder);
+  unsigned Bits = HexagonMCInstrInfo::getExtentBits(*Disassembler.MCII, MI);
+  // r13_2 is not extendable, so if there are no extent bits, it's r13_2
+  if (Bits == 0)
+    Bits = 15;
+  uint32_t FullValue = fullValue(*Disassembler.MCII,
+                                **Disassembler.CurrentBundle,
+                                MI, SignExtend64(tmp, Bits));
+  int64_t Extended = SignExtend64<32>(FullValue) + Address;
+  if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true,
+                                              0, 4))
+    HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
+  return MCDisassembler::Success;
+}
+
+// Addressing mode dependent load store opcode map.
+//   - If an insn is preceded by an extender the address is absolute.
+//      - memw(##symbol) = r0
+//   - If an insn is not preceded by an extender the address is GP relative.
+//      - memw(gp + #symbol) = r0
+// Please note that the instructions must be ordered in the descending order
+// of their opcode.
+// HexagonII::INST_ICLASS_ST
+static const unsigned int StoreConditionalOpcodeData[][2] = {
+    {S4_pstorerdfnew_abs, 0xafc02084},
+    {S4_pstorerdtnew_abs, 0xafc02080},
+    {S4_pstorerdf_abs, 0xafc00084},
+    {S4_pstorerdt_abs, 0xafc00080},
+    {S4_pstorerinewfnew_abs, 0xafa03084},
+    {S4_pstorerinewtnew_abs, 0xafa03080},
+    {S4_pstorerhnewfnew_abs, 0xafa02884},
+    {S4_pstorerhnewtnew_abs, 0xafa02880},
+    {S4_pstorerbnewfnew_abs, 0xafa02084},
+    {S4_pstorerbnewtnew_abs, 0xafa02080},
+    {S4_pstorerinewf_abs, 0xafa01084},
+    {S4_pstorerinewt_abs, 0xafa01080},
+    {S4_pstorerhnewf_abs, 0xafa00884},
+    {S4_pstorerhnewt_abs, 0xafa00880},
+    {S4_pstorerbnewf_abs, 0xafa00084},
+    {S4_pstorerbnewt_abs, 0xafa00080},
+    {S4_pstorerifnew_abs, 0xaf802084},
+    {S4_pstoreritnew_abs, 0xaf802080},
+    {S4_pstorerif_abs, 0xaf800084},
+    {S4_pstorerit_abs, 0xaf800080},
+    {S4_pstorerhfnew_abs, 0xaf402084},
+    {S4_pstorerhtnew_abs, 0xaf402080},
+    {S4_pstorerhf_abs, 0xaf400084},
+    {S4_pstorerht_abs, 0xaf400080},
+    {S4_pstorerbfnew_abs, 0xaf002084},
+    {S4_pstorerbtnew_abs, 0xaf002080},
+    {S4_pstorerbf_abs, 0xaf000084},
+    {S4_pstorerbt_abs, 0xaf000080}};
+// HexagonII::INST_ICLASS_LD
+
+// HexagonII::INST_ICLASS_LD_ST_2
+static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
+                                                {L4_loadri_abs, 0x49800000},
+                                                {L4_loadruh_abs, 0x49600000},
+                                                {L4_loadrh_abs, 0x49400000},
+                                                {L4_loadrub_abs, 0x49200000},
+                                                {L4_loadrb_abs, 0x49000000},
+                                                {S2_storerdabs, 0x48c00000},
+                                                {S2_storerinewabs, 0x48a01000},
+                                                {S2_storerhnewabs, 0x48a00800},
+                                                {S2_storerbnewabs, 0x48a00000},
+                                                {S2_storeriabs, 0x48800000},
+                                                {S2_storerfabs, 0x48600000},
+                                                {S2_storerhabs, 0x48400000},
+                                                {S2_storerbabs, 0x48000000}};
+static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
+static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
+
+static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
+
+  unsigned MachineOpcode = 0;
+  unsigned LLVMOpcode = 0;
+
+  if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_ST) {
+    for (size_t i = 0; i < NumCondS; ++i) {
+      if ((insn & StoreConditionalOpcodeData[i][1]) ==
+          StoreConditionalOpcodeData[i][1]) {
+        MachineOpcode = StoreConditionalOpcodeData[i][1];
+        LLVMOpcode = StoreConditionalOpcodeData[i][0];
+        break;
+      }
+    }
+  }
+  if ((insn & HexagonII::INST_ICLASS_MASK) == HexagonII::INST_ICLASS_LD_ST_2) {
+    for (size_t i = 0; i < NumLS; ++i) {
+      if ((insn & LoadStoreOpcodeData[i][1]) == LoadStoreOpcodeData[i][1]) {
+        MachineOpcode = LoadStoreOpcodeData[i][1];
+        LLVMOpcode = LoadStoreOpcodeData[i][0];
+        break;
+      }
+    }
+  }
+
+  if (MachineOpcode) {
+    unsigned Value = 0;
+    unsigned shift = 0;
+    MI.setOpcode(LLVMOpcode);
+    // Remove the parse bits from the insn.
+    insn &= ~HexagonII::INST_PARSE_MASK;
+
+    switch (LLVMOpcode) {
+    default:
+      return MCDisassembler::Fail;
+      break;
+
+    case Hexagon::S4_pstorerdf_abs:
+    case Hexagon::S4_pstorerdt_abs:
+    case Hexagon::S4_pstorerdfnew_abs:
+    case Hexagon::S4_pstorerdtnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Rtt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::S4_pstorerbnewf_abs:
+    case Hexagon::S4_pstorerbnewt_abs:
+    case Hexagon::S4_pstorerbnewfnew_abs:
+    case Hexagon::S4_pstorerbnewtnew_abs:
+    case Hexagon::S4_pstorerhnewf_abs:
+    case Hexagon::S4_pstorerhnewt_abs:
+    case Hexagon::S4_pstorerhnewfnew_abs:
+    case Hexagon::S4_pstorerhnewtnew_abs:
+    case Hexagon::S4_pstorerinewf_abs:
+    case Hexagon::S4_pstorerinewt_abs:
+    case Hexagon::S4_pstorerinewfnew_abs:
+    case Hexagon::S4_pstorerinewtnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Nt
+      Value = (insn >> 8) & UINT64_C(7);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::S4_pstorerbf_abs:
+    case Hexagon::S4_pstorerbt_abs:
+    case Hexagon::S4_pstorerbfnew_abs:
+    case Hexagon::S4_pstorerbtnew_abs:
+    case Hexagon::S4_pstorerhf_abs:
+    case Hexagon::S4_pstorerht_abs:
+    case Hexagon::S4_pstorerhfnew_abs:
+    case Hexagon::S4_pstorerhtnew_abs:
+    case Hexagon::S4_pstorerif_abs:
+    case Hexagon::S4_pstorerit_abs:
+    case Hexagon::S4_pstorerifnew_abs:
+    case Hexagon::S4_pstoreritnew_abs: {
+      // op: Pv
+      Value = insn & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 12) & UINT64_C(48);
+      Value |= (insn >> 3) & UINT64_C(15);
+      MI.addOperand(MCOperand::createImm(Value));
+      // op: Rt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    case Hexagon::L4_ploadrdf_abs:
+    case Hexagon::L4_ploadrdt_abs:
+    case Hexagon::L4_ploadrdfnew_abs:
+    case Hexagon::L4_ploadrdtnew_abs: {
+      // op: Rdd
+      Value = insn & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      // op: Pt
+      Value = ((insn >> 9) & UINT64_C(3));
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = ((insn >> 15) & UINT64_C(62));
+      Value |= ((insn >> 8) & UINT64_C(1));
+      MI.addOperand(MCOperand::createImm(Value));
+      break;
+    }
+
+    case Hexagon::L4_ploadrbf_abs:
+    case Hexagon::L4_ploadrbt_abs:
+    case Hexagon::L4_ploadrbfnew_abs:
+    case Hexagon::L4_ploadrbtnew_abs:
+    case Hexagon::L4_ploadrhf_abs:
+    case Hexagon::L4_ploadrht_abs:
+    case Hexagon::L4_ploadrhfnew_abs:
+    case Hexagon::L4_ploadrhtnew_abs:
+    case Hexagon::L4_ploadrubf_abs:
+    case Hexagon::L4_ploadrubt_abs:
+    case Hexagon::L4_ploadrubfnew_abs:
+    case Hexagon::L4_ploadrubtnew_abs:
+    case Hexagon::L4_ploadruhf_abs:
+    case Hexagon::L4_ploadruht_abs:
+    case Hexagon::L4_ploadruhfnew_abs:
+    case Hexagon::L4_ploadruhtnew_abs:
+    case Hexagon::L4_ploadrif_abs:
+    case Hexagon::L4_ploadrit_abs:
+    case Hexagon::L4_ploadrifnew_abs:
+    case Hexagon::L4_ploadritnew_abs:
+      // op: Rd
+      Value = insn & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      // op: Pt
+      Value = (insn >> 9) & UINT64_C(3);
+      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      // op: u6
+      Value = (insn >> 15) & UINT64_C(62);
+      Value |= (insn >> 8) & UINT64_C(1);
+      MI.addOperand(MCOperand::createImm(Value));
+      break;
+
+    // op: g16_2
+    case (Hexagon::L4_loadri_abs):
+      ++shift;
+    // op: g16_1
+    case Hexagon::L4_loadrh_abs:
+    case Hexagon::L4_loadruh_abs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::L4_loadrb_abs:
+    case Hexagon::L4_loadrub_abs: {
+      // op: Rd
+      Value |= insn & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(511);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      break;
+    }
+
+    case Hexagon::L4_loadrd_abs: {
+      Value = insn & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(511);
+      MI.addOperand(MCOperand::createImm(Value << 3));
+      break;
+    }
+
+    case Hexagon::S2_storerdabs: {
+      // op: g16_3
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << 3));
+      // op: Rtt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    // op: g16_2
+    case Hexagon::S2_storerinewabs:
+      ++shift;
+    // op: g16_1
+    case Hexagon::S2_storerhnewabs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::S2_storerbnewabs: {
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      // op: Nt
+      Value = (insn >> 8) & UINT64_C(7);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+
+    // op: g16_2
+    case Hexagon::S2_storeriabs:
+      ++shift;
+    // op: g16_1
+    case Hexagon::S2_storerhabs:
+    case Hexagon::S2_storerfabs:
+      ++shift;
+    // op: g16_0
+    case Hexagon::S2_storerbabs: {
+      Value = (insn >> 11) & UINT64_C(49152);
+      Value |= (insn >> 7) & UINT64_C(15872);
+      Value |= (insn >> 5) & UINT64_C(256);
+      Value |= insn & UINT64_C(255);
+      MI.addOperand(MCOperand::createImm(Value << shift));
+      // op: Rt
+      Value = (insn >> 8) & UINT64_C(31);
+      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      break;
+    }
+    }
+    return MCDisassembler::Success;
+  }
+  return MCDisassembler::Fail;
+}
+
+static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
+                                 void const *Decoder) {
+
+  // Instruction Class for a constant a extender: bits 31:28 = 0x0000
+  if ((~insn & 0xf0000000) == 0xf0000000) {
+    unsigned Value;
+    // 27:16 High 12 bits of 26-bit extender.
+    Value = (insn & 0x0fff0000) << 4;
+    // 13:0 Low 14 bits of 26-bit extender.
+    Value |= ((insn & 0x3fff) << 6);
+    MI.setOpcode(Hexagon::A4_ext);
+    HexagonMCInstrInfo::addConstant(MI, Value, contextFromDecoder(Decoder));
+    return MCDisassembler::Success;
+  }
+  return MCDisassembler::Fail;
+}
+
 // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
 enum subInstBinaryValues {
   V4_SA1_addi_BITS = 0x0000,
@@ -731,6 +1342,8 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg) {
     return Hexagon::R0 + encoded_reg;
   else if (encoded_reg < 16)
     return Hexagon::R0 + encoded_reg + 8;
+
+  // patently false value
   return Hexagon::NoRegister;
 }
 
@@ -739,10 +1352,13 @@ static unsigned getDRegFromSubinstEncoding(unsigned encoded_dreg) {
     return Hexagon::D0 + encoded_dreg;
   else if (encoded_dreg < 8)
     return Hexagon::D0 + encoded_dreg + 4;
+
+  // patently false value
   return Hexagon::NoRegister;
 }
 
-static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
+void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
+                                             unsigned inst) const {
   int64_t operand;
   MCOperand Op;
   switch (opcode) {
@@ -762,8 +1378,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
   case Hexagon::V4_SS2_allocframe:
     // u 8-4{5_3}
     operand = ((inst & 0x1f0) >> 4) << 3;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL1_loadri_io:
     // Rd 3-0, Rs 7-4, u 11-8{4_2}
@@ -774,8 +1389,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0xf00) >> 6;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL1_loadrub_io:
     // Rd 3-0, Rs 7-4, u 11-8
@@ -786,8 +1400,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0xf00) >> 8;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL2_loadrb_io:
     // Rd 3-0, Rs 7-4, u 10-8
@@ -798,8 +1411,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0x700) >> 8;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL2_loadrh_io:
   case Hexagon::V4_SL2_loadruh_io:
@@ -811,8 +1423,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0x700) >> 8) << 1;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL2_loadrd_sp:
     // Rdd 2-0, u 7-3{5_3}
@@ -820,8 +1431,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0x0f8) >> 3) << 3;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SL2_loadri_sp:
     // Rd 3-0, u 8-4{5_2}
@@ -829,8 +1439,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0x1f0) >> 4) << 2;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_addi:
     // Rx 3-0 (x2), s7 10-4
@@ -839,8 +1448,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     MI->addOperand(Op);
     MI->addOperand(Op);
     operand = SignExtend64<7>((inst & 0x7f0) >> 4);
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_addrx:
     // Rx 3-0 (x2), Rs 7-4
@@ -873,8 +1481,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0x3f0) >> 4) << 2;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_seti:
     // Rd 3-0, u 9-4
@@ -882,8 +1489,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0x3f0) >> 4;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_clrf:
   case Hexagon::V4_SA1_clrfnew:
@@ -901,8 +1507,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = inst & 0x3;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_combine0i:
   case Hexagon::V4_SA1_combine1i:
@@ -913,8 +1518,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0x060) >> 5;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SA1_combinerz:
   case Hexagon::V4_SA1_combinezr:
@@ -932,8 +1536,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0xf00) >> 8;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
@@ -944,8 +1547,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0xf00) >> 8) << 2;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
@@ -957,8 +1559,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = inst & 0xf;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SS2_storewi0:
   case Hexagon::V4_SS2_storewi1:
@@ -967,25 +1568,23 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = (inst & 0xf) << 2;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
   case Hexagon::V4_SS2_stored_sp:
     // s 8-3{6_3}, Rtt 2-0
     operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     operand = getDRegFromSubinstEncoding(inst & 0x7);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
+    break;
   case Hexagon::V4_SS2_storeh_io:
     // Rs 7-4, u 10-8{3_1}, Rt 3-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     operand = ((inst & 0x700) >> 8) << 1;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
@@ -993,8 +1592,7 @@ static void AddSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) {
   case Hexagon::V4_SS2_storew_sp:
     // u 8-4{5_2}, Rd 3-0
     operand = ((inst & 0x1f0) >> 4) << 2;
-    Op = MCOperand::createImm(operand);
-    MI->addOperand(Op);
+    HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
diff --git a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
index 43bace75a852..6c251020f818 100644
--- a/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Hexagon/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = HexagonDisassembler
 parent = Hexagon
-required_libraries = HexagonDesc HexagonInfo MCDisassembler Support
+required_libraries = HexagonDesc HexagonInfo MC MCDisassembler Support
 add_to_library_groups = Hexagon
diff --git a/lib/Target/Hexagon/Hexagon.h b/lib/Target/Hexagon/Hexagon.h
index d360be2aa5b2..ed7d9578902e 100644
--- a/lib/Target/Hexagon/Hexagon.h
+++ b/lib/Target/Hexagon/Hexagon.h
@@ -47,15 +47,8 @@
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
-  class MachineInstr;
-  class MCInst;
-  class MCInstrInfo;
-  class HexagonAsmPrinter;
   class HexagonTargetMachine;
 
-  void HexagonLowerToMC(const MachineInstr *MI, MCInst &MCI,
-                        HexagonAsmPrinter &AP);
-
   /// \brief Creates a Hexagon-specific Target Transformation Info pass.
   ImmutablePass *createHexagonTargetTransformInfoPass(const HexagonTargetMachine *TM);
 } // end namespace llvm;
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index 53a687c337ec..1189cfd488ee 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -24,14 +24,32 @@ include "llvm/Target/Target.td"
 // Hexagon Architectures
 def ArchV4:  SubtargetFeature<"v4",  "HexagonArchVersion", "V4",  "Hexagon V4">;
 def ArchV5:  SubtargetFeature<"v5",  "HexagonArchVersion", "V5",  "Hexagon V5">;
+def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
+def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
+
+// Hexagon ISA Extensions
+def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
+                                   "true", "Hexagon HVX instructions">;
+def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
+                                   "true", "Hexagon HVX Double instructions">;
 
 //===----------------------------------------------------------------------===//
 // Hexagon Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
-def HasV5T                      : Predicate<"HST->hasV5TOps()">;
-def NoV5T                       : Predicate<"!HST->hasV5TOps()">;
-def UseMEMOP                    : Predicate<"HST->useMemOps()">;
-def IEEERndNearV5T              : Predicate<"HST->modeIEEERndNear()">;
+def HasV5T             : Predicate<"HST->hasV5TOps()">;
+def NoV5T              : Predicate<"!HST->hasV5TOps()">;
+def HasV55T            : Predicate<"HST->hasV55TOps()">,
+                         AssemblerPredicate<"ArchV55">;
+def HasV60T            : Predicate<"HST->hasV60TOps()">,
+                         AssemblerPredicate<"ArchV60">;
+def UseMEMOP           : Predicate<"HST->useMemOps()">;
+def IEEERndNearV5T     : Predicate<"HST->modeIEEERndNear()">;
+def UseHVXDbl          : Predicate<"HST->useHVXDblOps()">,
+                         AssemblerPredicate<"ExtensionHVXDbl">;
+def UseHVXSgl          : Predicate<"HST->useHVXSglOps()">;
+
+def UseHVX             : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
+                         AssemblerPredicate<"ExtensionHVX">;
 
 //===----------------------------------------------------------------------===//
 // Classes used for relation maps.
@@ -53,6 +71,7 @@ class NewValueRel: PredNewRel;
 // NewValueRel - Filter class used to relate load/store instructions having
 // different addressing modes with each other.
 class AddrModeRel: NewValueRel;
+class IntrinsicsRel;
 
 //===----------------------------------------------------------------------===//
 // Generate mapping table to relate non-predicate instructions with their
@@ -62,7 +81,7 @@ class AddrModeRel: NewValueRel;
 def getPredOpcode : InstrMapping {
   let FilterClass = "PredRel";
   // Instructions with the same BaseOpcode and isNVStore values form a row.
-  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue"];
+  let RowFields = ["BaseOpcode", "isNVStore", "PNewValue", "isNT"];
   // Instructions with the same predicate sense form a column.
   let ColFields = ["PredSense"];
   // The key column is the unpredicated instructions.
@@ -77,7 +96,7 @@ def getPredOpcode : InstrMapping {
 //
 def getFalsePredOpcode : InstrMapping {
   let FilterClass = "PredRel";
-  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
   let ColFields = ["PredSense"];
   let KeyCol = ["true"];
   let ValueCols = [["false"]];
@@ -89,7 +108,7 @@ def getFalsePredOpcode : InstrMapping {
 //
 def getTruePredOpcode : InstrMapping {
   let FilterClass = "PredRel";
-  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken"];
+  let RowFields = ["BaseOpcode", "PNewValue", "isNVStore", "isBrTaken", "isNT"];
   let ColFields = ["PredSense"];
   let KeyCol = ["false"];
   let ValueCols = [["true"]];
@@ -125,7 +144,7 @@ def getPredOldOpcode : InstrMapping {
 //
 def getNewValueOpcode : InstrMapping {
   let FilterClass = "NewValueRel";
-  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
   let ColFields = ["NValueST"];
   let KeyCol = ["false"];
   let ValueCols = [["true"]];
@@ -137,16 +156,16 @@ def getNewValueOpcode : InstrMapping {
 //
 def getNonNVStore : InstrMapping {
   let FilterClass = "NewValueRel";
-  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode"];
+  let RowFields = ["BaseOpcode", "PredSense", "PNewValue", "addrMode", "isNT"];
   let ColFields = ["NValueST"];
   let KeyCol = ["true"];
   let ValueCols = [["false"]];
 }
 
-def getBasedWithImmOffset : InstrMapping {
+def getBaseWithImmOffset : InstrMapping {
   let FilterClass = "AddrModeRel";
   let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore",
-                   "isMEMri", "isFloat"];
+                   "isFloat"];
   let ColFields = ["addrMode"];
   let KeyCol = ["Absolute"];
   let ValueCols = [["BaseImmOffset"]];
@@ -168,6 +187,37 @@ def getRegForm : InstrMapping {
   let ValueCols = [["reg"]];
 }
 
+def getRegShlForm : InstrMapping {
+  let FilterClass = "ImmRegShl";
+  let RowFields = ["CextOpcode", "PredSense", "PNewValue", "isNVStore"];
+  let ColFields = ["InputType"];
+  let KeyCol = ["imm"];
+  let ValueCols = [["reg"]];
+}
+
+def notTakenBranchPrediction : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue",  "PredSense", "isBranch", "isPredicated"];
+  let ColFields = ["isBrTaken"];
+  let KeyCol = ["true"];
+  let ValueCols = [["false"]];
+}
+
+def takenBranchPrediction : InstrMapping {
+  let FilterClass = "PredRel";
+  let RowFields = ["BaseOpcode", "PNewValue",  "PredSense", "isBranch", "isPredicated"];
+  let ColFields = ["isBrTaken"];
+  let KeyCol = ["false"];
+  let ValueCols = [["true"]];
+}
+
+def getRealHWInstr : InstrMapping {
+  let FilterClass = "IntrinsicsRel";
+  let RowFields = ["BaseOpcode"];
+  let ColFields = ["InstrType"];
+  let KeyCol = ["Pseudo"];
+  let ValueCols = [["Pseudo"], ["Real"]];
+}
 //===----------------------------------------------------------------------===//
 // Register File, Calling Conv, Instruction Descriptions
 //===----------------------------------------------------------------------===//
@@ -192,12 +242,22 @@ def : Proc<"hexagonv4",  HexagonModelV4,
            [ArchV4]>;
 def : Proc<"hexagonv5",  HexagonModelV4,
            [ArchV4, ArchV5]>;
+def : Proc<"hexagonv55", HexagonModelV55,
+           [ArchV4, ArchV5, ArchV55]>;
+def : Proc<"hexagonv60", HexagonModelV60,
+           [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
+def HexagonAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+  string TokenizingCharacters = "#()=:.<>!+*";
+}
+
 def Hexagon : Target {
   // Pull in Instruction Info:
   let InstructionSet = HexagonInstrInfo;
+  let AssemblyParserVariants = [HexagonAsmParserVariant];
 }
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 05728d2b627e..e213089687e8 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -40,11 +40,13 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -56,12 +58,27 @@
 
 using namespace llvm;
 
+namespace llvm {
+  void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                        MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
 #define DEBUG_TYPE "asm-printer"
 
 static cl::opt<bool> AlignCalls(
          "hexagon-align-calls", cl::Hidden, cl::init(true),
           cl::desc("Insert falign after call instruction for Hexagon target"));
 
+// Given a scalar register return its pair.
+inline static unsigned getHexagonRegisterPair(unsigned Reg,
+      const MCRegisterInfo *RI) {
+  assert(Hexagon::IntRegsRegClass.contains(Reg));
+  MCSuperRegIterator SR(Reg, RI, false);
+  unsigned Pair = *SR;
+  assert(Hexagon::DoubleRegsRegClass.contains(Pair));
+  return Pair;
+}
+
 HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
                                      std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
@@ -102,9 +119,8 @@ void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 //
 bool HexagonAsmPrinter::
 isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
-  if (MBB->hasAddressTaken()) {
+  if (MBB->hasAddressTaken())
     return false;
-  }
   return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
 }
 
@@ -117,7 +133,8 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                                         raw_ostream &OS) {
   // Does this asm operand have a single letter operand modifier?
   if (ExtraCode && ExtraCode[0]) {
-    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
 
     switch (ExtraCode[0]) {
     default:
@@ -173,45 +190,407 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
+MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+		    MCStreamer &OutStreamer,
+                    const MCOperand &Imm, int AlignSize) {
+  MCSymbol *Sym;
+  int64_t Value;
+  if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
+    StringRef sectionPrefix;
+    std::string ImmString;
+    StringRef Name;
+    if (AlignSize == 8) {
+       Name = ".CONST_0000000000000000";
+       sectionPrefix = ".gnu.linkonce.l8";
+       ImmString = utohexstr(Value);
+    } else {
+       Name = ".CONST_00000000";
+       sectionPrefix = ".gnu.linkonce.l4";
+       ImmString = utohexstr(static_cast<uint32_t>(Value));
+    }
+
+    std::string symbolName =   // Yes, leading zeros are kept.
+      Name.drop_back(ImmString.size()).str() + ImmString;
+    std::string sectionName = sectionPrefix.str() + symbolName;
+
+    MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+        sectionName, ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+    OutStreamer.SwitchSection(Section);
+
+    Sym = AP.OutContext.getOrCreateSymbol(Twine(symbolName));
+    if (Sym->isUndefined()) {
+      OutStreamer.EmitLabel(Sym);
+      OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+      OutStreamer.EmitIntValue(Value, AlignSize);
+      OutStreamer.EmitCodeAlignment(AlignSize);
+    }
+  } else {
+    assert(Imm.isExpr() && "Expected expression and found none");
+    const MachineOperand &MO = MI.getOperand(1);
+    assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+    MCSymbol *MOSymbol = nullptr;
+    if (MO.isGlobal())
+      MOSymbol = AP.getSymbol(MO.getGlobal());
+    else if (MO.isCPI())
+      MOSymbol = AP.GetCPISymbol(MO.getIndex());
+    else if (MO.isJTI())
+      MOSymbol = AP.GetJTISymbol(MO.getIndex());
+    else
+      llvm_unreachable("Unknown operand type!");
+
+    StringRef SymbolName = MOSymbol->getName();
+    std::string LitaName = ".CONST_" + SymbolName.str();
+
+    MCSectionELF *Section = OutStreamer.getContext().getELFSection(
+        ".lita", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+
+    OutStreamer.SwitchSection(Section);
+    Sym = AP.OutContext.getOrCreateSymbol(Twine(LitaName));
+    if (Sym->isUndefined()) {
+      OutStreamer.EmitLabel(Sym);
+      OutStreamer.EmitSymbolAttribute(Sym, MCSA_Local);
+      OutStreamer.EmitValue(Imm.getExpr(), AlignSize);
+      OutStreamer.EmitCodeAlignment(AlignSize);
+    }
+  }
+  return Sym;
+}
+
+void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
+                                                  const MachineInstr &MI) {
+  MCInst &MappedInst = static_cast <MCInst &>(Inst);
+  const MCRegisterInfo *RI = OutStreamer->getContext().getRegisterInfo();
+
+  switch (Inst.getOpcode()) {
+  default: return;
+
+  // "$dst = CONST64(#$src1)",
+  case Hexagon::CONST64_Float_Real:
+  case Hexagon::CONST64_Int_Real:
+    if (!OutStreamer->hasRawTextSupport()) {
+      const MCOperand &Imm = MappedInst.getOperand(1);
+      MCSectionSubPair Current = OutStreamer->getCurrentSection();
+
+      MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 8);
+
+      OutStreamer->SwitchSection(Current.first, Current.second);
+      MCInst TmpInst;
+      MCOperand &Reg = MappedInst.getOperand(0);
+      TmpInst.setOpcode(Hexagon::L2_loadrdgp);
+      TmpInst.addOperand(Reg);
+      TmpInst.addOperand(MCOperand::createExpr(
+                         MCSymbolRefExpr::create(Sym, OutContext)));
+      MappedInst = TmpInst;
+
+    }
+    break;
+  case Hexagon::CONST32:
+  case Hexagon::CONST32_Float_Real:
+  case Hexagon::CONST32_Int_Real:
+  case Hexagon::FCONST32_nsdata:
+    if (!OutStreamer->hasRawTextSupport()) {
+      MCOperand &Imm = MappedInst.getOperand(1);
+      MCSectionSubPair Current = OutStreamer->getCurrentSection();
+      MCSymbol *Sym = smallData(*this, MI, *OutStreamer, Imm, 4);
+      OutStreamer->SwitchSection(Current.first, Current.second);
+      MCInst TmpInst;
+      MCOperand &Reg = MappedInst.getOperand(0);
+      TmpInst.setOpcode(Hexagon::L2_loadrigp);
+      TmpInst.addOperand(Reg);
+      TmpInst.addOperand(MCOperand::createExpr(
+                         MCSymbolRefExpr::create(Sym, OutContext)));
+      MappedInst = TmpInst;
+    }
+    break;
+
+  // C2_pxfer_map maps to C2_or instruction. Though, it's possible to use
+  // C2_or during instruction selection itself but it results
+  // into suboptimal code.
+  case Hexagon::C2_pxfer_map: {
+    MCOperand &Ps = Inst.getOperand(1);
+    MappedInst.setOpcode(Hexagon::C2_or);
+    MappedInst.addOperand(Ps);
+    return;
+  }
+
+  // Vector reduce complex multiply by scalar, Rt & 1 map to :hi else :lo
+  // The insn is mapped from the 4 operand to the 3 operand raw form taking
+  // 3 register pairs.
+  case Hexagon::M2_vrcmpys_acc_s1: {
+    MCOperand &Rt = Inst.getOperand(3);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+  case Hexagon::M2_vrcmpys_s1: {
+    MCOperand &Rt = Inst.getOperand(2);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+
+  case Hexagon::M2_vrcmpys_s1rp: {
+    MCOperand &Rt = Inst.getOperand(2);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
+    else
+      MappedInst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+
+  case Hexagon::A4_boundscheck: {
+    MCOperand &Rs = Inst.getOperand(1);
+    assert (Rs.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rs.getReg());
+    if (Reg & 1) // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
+      MappedInst.setOpcode(Hexagon::A4_boundscheck_hi);
+    else         // raw:lo
+      MappedInst.setOpcode(Hexagon::A4_boundscheck_lo);
+    Rs.setReg(getHexagonRegisterPair(Rs.getReg(), RI));
+    return;
+  }
+  case Hexagon::S5_asrhub_rnd_sat_goodsyntax: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    int64_t Imm;
+    MCExpr const *Expr = MO.getExpr();
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::S2_vsathub);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      TmpInst.addOperand(MappedInst.getOperand(1));
+      MappedInst = TmpInst;
+      return;
+    }
+    TmpInst.setOpcode(Hexagon::S5_asrhub_rnd_sat);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  case Hexagon::S5_vasrhrnd_goodsyntax:
+  case Hexagon::S2_asr_i_p_rnd_goodsyntax: {
+    MCOperand &MO2 = MappedInst.getOperand(2);
+    MCExpr const *Expr = MO2.getExpr();
+    int64_t Imm;
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::A2_combinew);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      MCOperand &MO1 = MappedInst.getOperand(1);
+      unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
+      unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+      // Add a new operand for the second register in the pair.
+      TmpInst.addOperand(MCOperand::createReg(High));
+      TmpInst.addOperand(MCOperand::createReg(Low));
+      MappedInst = TmpInst;
+      return;
+    }
+
+    if (Inst.getOpcode() == Hexagon::S2_asr_i_p_rnd_goodsyntax)
+      TmpInst.setOpcode(Hexagon::S2_asr_i_p_rnd);
+    else
+      TmpInst.setOpcode(Hexagon::S5_vasrhrnd);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  // if ("#u5==0") Assembler mapped to: "Rd=Rs"; else Rd=asr(Rs,#u5-1):rnd
+  case Hexagon::S2_asr_i_r_rnd_goodsyntax: {
+    MCOperand &MO = Inst.getOperand(2);
+    MCExpr const *Expr = MO.getExpr();
+    int64_t Imm;
+    bool Success = Expr->evaluateAsAbsolute(Imm);
+    assert (Success && "Expected immediate and none was found");(void)Success;
+    MCInst TmpInst;
+    if (Imm == 0) {
+      TmpInst.setOpcode(Hexagon::A2_tfr);
+      TmpInst.addOperand(MappedInst.getOperand(0));
+      TmpInst.addOperand(MappedInst.getOperand(1));
+      MappedInst = TmpInst;
+      return;
+    }
+    TmpInst.setOpcode(Hexagon::S2_asr_i_r_rnd);
+    TmpInst.addOperand(MappedInst.getOperand(0));
+    TmpInst.addOperand(MappedInst.getOperand(1));
+    const MCExpr *One = MCConstantExpr::create(1, OutContext);
+    const MCExpr *Sub = MCBinaryExpr::createSub(Expr, One, OutContext);
+    TmpInst.addOperand(MCOperand::createExpr(Sub));
+    MappedInst = TmpInst;
+    return;
+  }
+  case Hexagon::TFRI_f:
+    MappedInst.setOpcode(Hexagon::A2_tfrsi);
+    return;
+  case Hexagon::TFRI_cPt_f:
+    MappedInst.setOpcode(Hexagon::C2_cmoveit);
+    return;
+  case Hexagon::TFRI_cNotPt_f:
+    MappedInst.setOpcode(Hexagon::C2_cmoveif);
+    return;
+  case Hexagon::MUX_ri_f:
+    MappedInst.setOpcode(Hexagon::C2_muxri);
+    return;
+  case Hexagon::MUX_ir_f:
+    MappedInst.setOpcode(Hexagon::C2_muxir);
+    return;
+
+  // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
+  case Hexagon::A2_tfrpi: {
+    MCInst TmpInst;
+    MCOperand &Rdd = MappedInst.getOperand(0);
+    MCOperand &MO = MappedInst.getOperand(1);
+
+    TmpInst.setOpcode(Hexagon::A2_combineii);
+    TmpInst.addOperand(Rdd);
+    int64_t Imm;
+    bool Success = MO.getExpr()->evaluateAsAbsolute(Imm);
+    if (Success && Imm < 0) {
+      const MCExpr *MOne = MCConstantExpr::create(-1, OutContext);
+      TmpInst.addOperand(MCOperand::createExpr(MOne));
+    } else {
+      const MCExpr *Zero = MCConstantExpr::create(0, OutContext);
+      TmpInst.addOperand(MCOperand::createExpr(Zero));
+    }
+    TmpInst.addOperand(MO);
+    MappedInst = TmpInst;
+    return;
+  }
+  // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
+  case Hexagon::A2_tfrp: {
+    MCOperand &MO = MappedInst.getOperand(1);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode(Hexagon::A2_combinew);
+    return;
+  }
+
+  case Hexagon::A2_tfrpt:
+  case Hexagon::A2_tfrpf: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
+                          ? Hexagon::C2_ccombinewt
+                          : Hexagon::C2_ccombinewf);
+    return;
+  }
+  case Hexagon::A2_tfrptnew:
+  case Hexagon::A2_tfrpfnew: {
+    MCOperand &MO = MappedInst.getOperand(2);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    MO.setReg(High);
+    // Add a new operand for the second register in the pair.
+    MappedInst.addOperand(MCOperand::createReg(Low));
+    MappedInst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
+                          ? Hexagon::C2_ccombinewnewt
+                          : Hexagon::C2_ccombinewnewf);
+    return;
+  }
+
+  case Hexagon::M2_mpysmi: {
+    MCOperand &Imm = MappedInst.getOperand(2);
+    MCExpr const *Expr = Imm.getExpr();
+    int64_t Value;
+    bool Success = Expr->evaluateAsAbsolute(Value);
+    assert(Success);(void)Success;
+    if (Value < 0 && Value > -256) {
+      MappedInst.setOpcode(Hexagon::M2_mpysin);
+      Imm.setExpr(MCUnaryExpr::createMinus(Expr, OutContext));
+    }
+    else
+      MappedInst.setOpcode(Hexagon::M2_mpysip);
+    return;
+  }
+
+  case Hexagon::A2_addsp: {
+    MCOperand &Rt = Inst.getOperand(1);
+    assert (Rt.isReg() && "Expected register and none was found");
+    unsigned Reg = RI->getEncodingValue(Rt.getReg());
+    if (Reg & 1)
+      MappedInst.setOpcode(Hexagon::A2_addsph);
+    else
+      MappedInst.setOpcode(Hexagon::A2_addspl);
+    Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
+    return;
+  }
+  case Hexagon::HEXAGON_V6_vd0_pseudo:
+  case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+    MCInst TmpInst;
+    assert (Inst.getOperand(0).isReg() &&
+            "Expected register and none was found");
+
+    TmpInst.setOpcode(Hexagon::V6_vxor);
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(0));
+    MappedInst = TmpInst;
+    return;
+  }
+
+  }
+}
+
 
 /// printMachineInstruction -- Print out a single Hexagon MI in Darwin syntax to
 /// the current output stream.
 ///
 void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  MCInst MCB;
-  MCB.setOpcode(Hexagon::BUNDLE);
-  MCB.addOperand(MCOperand::createImm(0));
+  MCInst MCB = HexagonMCInstrInfo::createBundle();
+  const MCInstrInfo &MCII = *Subtarget->getInstrInfo();
 
   if (MI->isBundle()) {
     const MachineBasicBlock* MBB = MI->getParent();
-    MachineBasicBlock::const_instr_iterator MII = MI;
+    MachineBasicBlock::const_instr_iterator MII = MI->getIterator();
     unsigned IgnoreCount = 0;
 
-    for (++MII; MII != MBB->end() && MII->isInsideBundle(); ++MII) {
+    for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII)
       if (MII->getOpcode() == TargetOpcode::DBG_VALUE ||
           MII->getOpcode() == TargetOpcode::IMPLICIT_DEF)
         ++IgnoreCount;
-      else {
-        HexagonLowerToMC(MII, MCB, *this);
-      }
-    }
+      else
+        HexagonLowerToMC(MCII, &*MII, MCB, *this);
   }
-  else {
-    HexagonLowerToMC(MI, MCB, *this);
-    HexagonMCInstrInfo::padEndloop(MCB);
-  }
-  // Examine the packet and try to find instructions that can be converted
-  // to compounds.
-  HexagonMCInstrInfo::tryCompound(*Subtarget->getInstrInfo(),
-                                  OutStreamer->getContext(), MCB);
-  // Examine the packet and convert pairs of instructions to duplex
-  // instructions when possible.
-  SmallVector<DuplexCandidate, 8> possibleDuplexes;
-  possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(
-      *Subtarget->getInstrInfo(), MCB);
-  HexagonMCShuffle(*Subtarget->getInstrInfo(), *Subtarget,
-                   OutStreamer->getContext(), MCB, possibleDuplexes);
-  EmitToStreamer(*OutStreamer, MCB);
+  else
+    HexagonLowerToMC(MCII, MI, MCB, *this);
+
+  bool Ok = HexagonMCInstrInfo::canonicalizePacket(
+      MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr);
+  assert(Ok);
+  (void)Ok;
+  if(HexagonMCInstrInfo::bundleSize(MCB) == 0)
+    return;
+  OutStreamer->EmitInstruction(MCB, getSubtargetInfo());
 }
 
 extern "C" void LLVMInitializeHexagonAsmPrinter() {
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index 792fc8b7af3a..a78d97e28427 100755
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -42,6 +42,10 @@ namespace llvm {
 
     void EmitInstruction(const MachineInstr *MI) override;
 
+    void HexagonProcessInstruction(MCInst &Inst,
+                                   const MachineInstr &MBB);
+
+
     void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
     bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                          unsigned AsmVariant, const char *ExtraCode,
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
new file mode 100644
index 000000000000..77907b054d54
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -0,0 +1,2778 @@
+//===--- HexagonBitSimplify.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexbit"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonBitTracker.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
+  FunctionPass *createHexagonBitSimplify();
+}
+
+namespace {
+  // Set of virtual registers, based on BitVector.
+  struct RegisterSet : private BitVector {
+    RegisterSet() : BitVector() {}
+    explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
+    RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+
+    using BitVector::clear;
+    using BitVector::count;
+
+    unsigned find_first() const {
+      int First = BitVector::find_first();
+      if (First < 0)
+        return 0;
+      return x2v(First);
+    }
+
+    unsigned find_next(unsigned Prev) const {
+      int Next = BitVector::find_next(v2x(Prev));
+      if (Next < 0)
+        return 0;
+      return x2v(Next);
+    }
+
+    RegisterSet &insert(unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return static_cast<RegisterSet&>(BitVector::set(Idx));
+    }
+    RegisterSet &remove(unsigned R) {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return *this;
+      return static_cast<RegisterSet&>(BitVector::reset(Idx));
+    }
+
+    RegisterSet &insert(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
+    }
+    RegisterSet &remove(const RegisterSet &Rs) {
+      return static_cast<RegisterSet&>(BitVector::reset(Rs));
+    }
+
+    reference operator[](unsigned R) {
+      unsigned Idx = v2x(R);
+      ensure(Idx);
+      return BitVector::operator[](Idx);
+    }
+    bool operator[](unsigned R) const {
+      unsigned Idx = v2x(R);
+      assert(Idx < size());
+      return BitVector::operator[](Idx);
+    }
+    bool has(unsigned R) const {
+      unsigned Idx = v2x(R);
+      if (Idx >= size())
+        return false;
+      return BitVector::test(Idx);
+    }
+
+    bool empty() const {
+      return !BitVector::any();
+    }
+    bool includes(const RegisterSet &Rs) const {
+      // A.BitVector::test(B)  <=>  A-B != {}
+      return !Rs.BitVector::test(*this);
+    }
+    bool intersects(const RegisterSet &Rs) const {
+      return BitVector::anyCommon(Rs);
+    }
+
+  private:
+    void ensure(unsigned Idx) {
+      if (size() <= Idx)
+        resize(std::max(Idx+1, 32U));
+    }
+    static inline unsigned v2x(unsigned v) {
+      return TargetRegisterInfo::virtReg2Index(v);
+    }
+    static inline unsigned x2v(unsigned x) {
+      return TargetRegisterInfo::index2VirtReg(x);
+    }
+  };
+
+
+  struct PrintRegSet {
+    PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
+      : RS(S), TRI(RI) {}
+    friend raw_ostream &operator<< (raw_ostream &OS,
+          const PrintRegSet &P);
+  private:
+    const RegisterSet &RS;
+    const TargetRegisterInfo *TRI;
+  };
+
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
+    LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
+    OS << '{';
+    for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
+      OS << ' ' << PrintReg(R, P.TRI);
+    OS << " }";
+    return OS;
+  }
+}
+
+
+namespace {
+  class Transformation;
+
+  class HexagonBitSimplify : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+      initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+    virtual const char *getPassName() const {
+      return "Hexagon bit simplification";
+    }
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
+    static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
+    static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
+        const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
+    static bool isConst(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W);
+    static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W);
+    static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
+        uint16_t W, uint64_t &U);
+    static bool replaceReg(unsigned OldR, unsigned NewR,
+        MachineRegisterInfo &MRI);
+    static bool getSubregMask(const BitTracker::RegisterRef &RR,
+        unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
+    static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
+        unsigned NewSR, MachineRegisterInfo &MRI);
+    static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
+        unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
+    static bool parseRegSequence(const MachineInstr &I,
+        BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+
+    static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+        uint16_t Begin);
+    static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
+        uint16_t Begin, const HexagonInstrInfo &HII);
+
+    static const TargetRegisterClass *getFinalVRegClass(
+        const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
+    static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
+        const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
+
+  private:
+    MachineDominatorTree *MDT;
+
+    bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+  };
+
+  char HexagonBitSimplify::ID = 0;
+  typedef HexagonBitSimplify HBS;
+
+
+  // The purpose of this class is to provide a common facility to traverse
+  // the function top-down or bottom-up via the dominator tree, and keep
+  // track of the available registers.
+  class Transformation {
+  public:
+    bool TopDown;
+    Transformation(bool TD) : TopDown(TD) {}
+    virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
+    virtual ~Transformation() {}
+  };
+}
+
+INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
+      "Hexagon bit simplification", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
+      "Hexagon bit simplification", false, false)
+
+
+bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
+      RegisterSet &AVs) {
+  MachineDomTreeNode *N = MDT->getNode(&B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  bool Changed = false;
+
+  if (T.TopDown)
+    Changed = T.processBlock(B, AVs);
+
+  RegisterSet Defs;
+  for (auto &I : B)
+    getInstrDefs(I, Defs);
+  RegisterSet NewAVs = AVs;
+  NewAVs.insert(Defs);
+
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    Changed |= visitBlock(*SB, T, NewAVs);
+  }
+  if (!T.TopDown)
+    Changed |= T.processBlock(B, AVs);
+
+  return Changed;
+}
+
+//
+// Utility functions:
+//
+void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
+      RegisterSet &Defs) {
+  for (auto &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isDef())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Defs.insert(R);
+  }
+}
+
+void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
+      RegisterSet &Uses) {
+  for (auto &Op : MI.operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    Uses.insert(R);
+  }
+}
+
+// Check if all the bits in range [B, E) in both cells are equal.
+bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
+      uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
+      uint16_t W) {
+  for (uint16_t i = 0; i < W; ++i) {
+    // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
+    if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
+      return false;
+    // Same for RC2[i].
+    if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
+      return false;
+    if (RC1[B1+i] != RC2[B2+i])
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonBitSimplify::isConst(const BitTracker::RegisterCell &RC,
+      uint16_t B, uint16_t W) {
+  assert(B < RC.width() && B+W <= RC.width());
+  for (uint16_t i = B; i < B+W; ++i)
+    if (!RC[i].num())
+      return false;
+  return true;
+}
+
+
+bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
+      uint16_t B, uint16_t W) {
+  assert(B < RC.width() && B+W <= RC.width());
+  for (uint16_t i = B; i < B+W; ++i)
+    if (!RC[i].is(0))
+      return false;
+  return true;
+}
+
+
+bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
+        uint16_t B, uint16_t W, uint64_t &U) {
+  assert(B < RC.width() && B+W <= RC.width());
+  int64_t T = 0;
+  for (uint16_t i = B+W; i > B; --i) {
+    const BitTracker::BitValue &BV = RC[i-1];
+    T <<= 1;
+    if (BV.is(1))
+      T |= 1;
+    else if (!BV.is(0))
+      return false;
+  }
+  U = T;
+  return true;
+}
+
+
+bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
+      MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    I->setReg(NewR);
+  }
+  return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
+      unsigned NewSR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    I->setReg(NewR);
+    I->setSubReg(NewSR);
+  }
+  return Begin != End;
+}
+
+
+bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
+      unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
+      !TargetRegisterInfo::isVirtualRegister(NewR))
+    return false;
+  auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
+  decltype(End) NextI;
+  for (auto I = Begin; I != End; I = NextI) {
+    NextI = std::next(I);
+    if (I->getSubReg() != OldSR)
+      continue;
+    I->setReg(NewR);
+    I->setSubReg(NewSR);
+  }
+  return Begin != End;
+}
+
+
+// For a register ref (pair Reg:Sub), set Begin to the position of the LSB
+// of Sub in Reg, and set Width to the size of Sub in bits. Return true,
+// if this succeeded, otherwise return false.
+bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
+      unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
+  const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    assert(RR.Sub == 0);
+    Begin = 0;
+    Width = 32;
+    return true;
+  }
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    if (RR.Sub == 0) {
+      Begin = 0;
+      Width = 64;
+      return true;
+    }
+    assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
+    Width = 32;
+    Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
+    return true;
+  }
+  return false;
+}
+
+
+// For a REG_SEQUENCE, set SL to the low subregister and SH to the high
+// subregister.
+bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
+      BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+  assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
+  unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
+  assert(Sub1 != Sub2);
+  if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+    SL = I.getOperand(1);
+    SH = I.getOperand(3);
+    return true;
+  }
+  if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+    SH = I.getOperand(1);
+    SL = I.getOperand(3);
+    return true;
+  }
+  return false;
+}
+
+
+// All stores (except 64-bit stores) take a 32-bit register as the source
+// of the value to be stored. If the instruction stores into a location
+// that is shorter than 32 bits, some bits of the source register are not
+// used. For each store instruction, calculate the set of used bits in
+// the source register, and set appropriate bits in Bits. Return true if
+// the bits are calculated, false otherwise.
+bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
+      uint16_t Begin) {
+  using namespace Hexagon;
+
+  switch (Opc) {
+    // Store byte
+    case S2_storerb_io:           // memb(Rs32+#s11:0)=Rt32
+    case S2_storerbnew_io:        // memb(Rs32+#s11:0)=Nt8.new
+    case S2_pstorerbt_io:         // if (Pv4) memb(Rs32+#u6:0)=Rt32
+    case S2_pstorerbf_io:         // if (!Pv4) memb(Rs32+#u6:0)=Rt32
+    case S4_pstorerbtnew_io:      // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
+    case S4_pstorerbfnew_io:      // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
+    case S2_pstorerbnewt_io:      // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
+    case S2_pstorerbnewf_io:      // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
+    case S4_pstorerbnewtnew_io:   // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+    case S4_pstorerbnewfnew_io:   // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
+    case S2_storerb_pi:           // memb(Rx32++#s4:0)=Rt32
+    case S2_storerbnew_pi:        // memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbt_pi:         // if (Pv4) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbf_pi:         // if (!Pv4) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbtnew_pi:      // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbfnew_pi:      // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
+    case S2_pstorerbnewt_pi:      // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewf_pi:      // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewtnew_pi:   // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+    case S2_pstorerbnewfnew_pi:   // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
+    case S4_storerb_ap:           // memb(Re32=#U6)=Rt32
+    case S4_storerbnew_ap:        // memb(Re32=#U6)=Nt8.new
+    case S2_storerb_pr:           // memb(Rx32++Mu2)=Rt32
+    case S2_storerbnew_pr:        // memb(Rx32++Mu2)=Nt8.new
+    case S4_storerb_ur:           // memb(Ru32<<#u2+#U6)=Rt32
+    case S4_storerbnew_ur:        // memb(Ru32<<#u2+#U6)=Nt8.new
+    case S2_storerb_pbr:          // memb(Rx32++Mu2:brev)=Rt32
+    case S2_storerbnew_pbr:       // memb(Rx32++Mu2:brev)=Nt8.new
+    case S2_storerb_pci:          // memb(Rx32++#s4:0:circ(Mu2))=Rt32
+    case S2_storerbnew_pci:       // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
+    case S2_storerb_pcr:          // memb(Rx32++I:circ(Mu2))=Rt32
+    case S2_storerbnew_pcr:       // memb(Rx32++I:circ(Mu2))=Nt8.new
+    case S4_storerb_rr:           // memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_storerbnew_rr:        // memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbt_rr:         // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbf_rr:         // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbtnew_rr:      // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbfnew_rr:      // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerbnewt_rr:      // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewf_rr:      // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewtnew_rr:   // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerbnewfnew_rr:   // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
+    case S2_storerbgp:            // memb(gp+#u16:0)=Rt32
+    case S2_storerbnewgp:         // memb(gp+#u16:0)=Nt8.new
+    case S4_pstorerbt_abs:        // if (Pv4) memb(#u6)=Rt32
+    case S4_pstorerbf_abs:        // if (!Pv4) memb(#u6)=Rt32
+    case S4_pstorerbtnew_abs:     // if (Pv4.new) memb(#u6)=Rt32
+    case S4_pstorerbfnew_abs:     // if (!Pv4.new) memb(#u6)=Rt32
+    case S4_pstorerbnewt_abs:     // if (Pv4) memb(#u6)=Nt8.new
+    case S4_pstorerbnewf_abs:     // if (!Pv4) memb(#u6)=Nt8.new
+    case S4_pstorerbnewtnew_abs:  // if (Pv4.new) memb(#u6)=Nt8.new
+    case S4_pstorerbnewfnew_abs:  // if (!Pv4.new) memb(#u6)=Nt8.new
+      Bits.set(Begin, Begin+8);
+      return true;
+
+    // Store low half
+    case S2_storerh_io:           // memh(Rs32+#s11:1)=Rt32
+    case S2_storerhnew_io:        // memh(Rs32+#s11:1)=Nt8.new
+    case S2_pstorerht_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt32
+    case S2_pstorerhf_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt32
+    case S4_pstorerhtnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
+    case S4_pstorerhfnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
+    case S2_pstorerhnewt_io:      // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
+    case S2_pstorerhnewf_io:      // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
+    case S4_pstorerhnewtnew_io:   // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+    case S4_pstorerhnewfnew_io:   // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
+    case S2_storerh_pi:           // memh(Rx32++#s4:1)=Rt32
+    case S2_storerhnew_pi:        // memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerht_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhf_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhtnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhfnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
+    case S2_pstorerhnewt_pi:      // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewf_pi:      // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewtnew_pi:   // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+    case S2_pstorerhnewfnew_pi:   // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
+    case S4_storerh_ap:           // memh(Re32=#U6)=Rt32
+    case S4_storerhnew_ap:        // memh(Re32=#U6)=Nt8.new
+    case S2_storerh_pr:           // memh(Rx32++Mu2)=Rt32
+    case S2_storerhnew_pr:        // memh(Rx32++Mu2)=Nt8.new
+    case S4_storerh_ur:           // memh(Ru32<<#u2+#U6)=Rt32
+    case S4_storerhnew_ur:        // memh(Ru32<<#u2+#U6)=Nt8.new
+    case S2_storerh_pbr:          // memh(Rx32++Mu2:brev)=Rt32
+    case S2_storerhnew_pbr:       // memh(Rx32++Mu2:brev)=Nt8.new
+    case S2_storerh_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt32
+    case S2_storerhnew_pci:       // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
+    case S2_storerh_pcr:          // memh(Rx32++I:circ(Mu2))=Rt32
+    case S2_storerhnew_pcr:       // memh(Rx32++I:circ(Mu2))=Nt8.new
+    case S4_storerh_rr:           // memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerht_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhf_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhtnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_pstorerhfnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
+    case S4_storerhnew_rr:        // memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewt_rr:      // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewf_rr:      // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewtnew_rr:   // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S4_pstorerhnewfnew_rr:   // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
+    case S2_storerhgp:            // memh(gp+#u16:1)=Rt32
+    case S2_storerhnewgp:         // memh(gp+#u16:1)=Nt8.new
+    case S4_pstorerht_abs:        // if (Pv4) memh(#u6)=Rt32
+    case S4_pstorerhf_abs:        // if (!Pv4) memh(#u6)=Rt32
+    case S4_pstorerhtnew_abs:     // if (Pv4.new) memh(#u6)=Rt32
+    case S4_pstorerhfnew_abs:     // if (!Pv4.new) memh(#u6)=Rt32
+    case S4_pstorerhnewt_abs:     // if (Pv4) memh(#u6)=Nt8.new
+    case S4_pstorerhnewf_abs:     // if (!Pv4) memh(#u6)=Nt8.new
+    case S4_pstorerhnewtnew_abs:  // if (Pv4.new) memh(#u6)=Nt8.new
+    case S4_pstorerhnewfnew_abs:  // if (!Pv4.new) memh(#u6)=Nt8.new
+      Bits.set(Begin, Begin+16);
+      return true;
+
+    // Store high half
+    case S2_storerf_io:           // memh(Rs32+#s11:1)=Rt.H32
+    case S2_pstorerft_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
+    case S2_pstorerff_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
+    case S4_pstorerftnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+    case S4_pstorerffnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
+    case S2_storerf_pi:           // memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerft_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerff_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerftnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+    case S2_pstorerffnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
+    case S4_storerf_ap:           // memh(Re32=#U6)=Rt.H32
+    case S2_storerf_pr:           // memh(Rx32++Mu2)=Rt.H32
+    case S4_storerf_ur:           // memh(Ru32<<#u2+#U6)=Rt.H32
+    case S2_storerf_pbr:          // memh(Rx32++Mu2:brev)=Rt.H32
+    case S2_storerf_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
+    case S2_storerf_pcr:          // memh(Rx32++I:circ(Mu2))=Rt.H32
+    case S4_storerf_rr:           // memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerft_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerff_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerftnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S4_pstorerffnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
+    case S2_storerfgp:            // memh(gp+#u16:1)=Rt.H32
+    case S4_pstorerft_abs:        // if (Pv4) memh(#u6)=Rt.H32
+    case S4_pstorerff_abs:        // if (!Pv4) memh(#u6)=Rt.H32
+    case S4_pstorerftnew_abs:     // if (Pv4.new) memh(#u6)=Rt.H32
+    case S4_pstorerffnew_abs:     // if (!Pv4.new) memh(#u6)=Rt.H32
+      Bits.set(Begin+16, Begin+32);
+      return true;
+  }
+
+  return false;
+}
+
+
+// For an instruction with opcode Opc, calculate the set of bits that it
+// uses in a register in operand OpN. This only calculates the set of used
+// bits for cases where it does not depend on any operands (as is the case
+// in shifts, for example). For concrete instructions from a program, the
+// operand may be a subregister of a larger register, while Bits would
+// correspond to the larger register in its entirety. Because of that,
+// the parameter Begin can be used to indicate which bit of Bits should be
+// considered the LSB of of the operand.
+bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
+      BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
+  using namespace Hexagon;
+
+  const MCInstrDesc &D = HII.get(Opc);
+  if (D.mayStore()) {
+    if (OpN == D.getNumOperands()-1)
+      return getUsedBitsInStore(Opc, Bits, Begin);
+    return false;
+  }
+
+  switch (Opc) {
+    // One register source. Used bits: R1[0-7].
+    case A2_sxtb:
+    case A2_zxtb:
+    case A4_cmpbeqi:
+    case A4_cmpbgti:
+    case A4_cmpbgtui:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+8);
+        return true;
+      }
+      break;
+
+    // One register source. Used bits: R1[0-15].
+    case A2_aslh:
+    case A2_sxth:
+    case A2_zxth:
+    case A4_cmpheqi:
+    case A4_cmphgti:
+    case A4_cmphgtui:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // One register source. Used bits: R1[16-31].
+    case A2_asrh:
+      if (OpN == 1) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-7], R2[0-7].
+    case A4_cmpbeq:
+    case A4_cmpbgt:
+    case A4_cmpbgtu:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+8);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-15], R2[0-15].
+    case A4_cmpheq:
+    case A4_cmphgt:
+    case A4_cmphgtu:
+    case A2_addh_h16_ll:
+    case A2_addh_h16_sat_ll:
+    case A2_addh_l16_ll:
+    case A2_addh_l16_sat_ll:
+    case A2_combine_ll:
+    case A2_subh_h16_ll:
+    case A2_subh_h16_sat_ll:
+    case A2_subh_l16_ll:
+    case A2_subh_l16_sat_ll:
+    case M2_mpy_acc_ll_s0:
+    case M2_mpy_acc_ll_s1:
+    case M2_mpy_acc_sat_ll_s0:
+    case M2_mpy_acc_sat_ll_s1:
+    case M2_mpy_ll_s0:
+    case M2_mpy_ll_s1:
+    case M2_mpy_nac_ll_s0:
+    case M2_mpy_nac_ll_s1:
+    case M2_mpy_nac_sat_ll_s0:
+    case M2_mpy_nac_sat_ll_s1:
+    case M2_mpy_rnd_ll_s0:
+    case M2_mpy_rnd_ll_s1:
+    case M2_mpy_sat_ll_s0:
+    case M2_mpy_sat_ll_s1:
+    case M2_mpy_sat_rnd_ll_s0:
+    case M2_mpy_sat_rnd_ll_s1:
+    case M2_mpyd_acc_ll_s0:
+    case M2_mpyd_acc_ll_s1:
+    case M2_mpyd_ll_s0:
+    case M2_mpyd_ll_s1:
+    case M2_mpyd_nac_ll_s0:
+    case M2_mpyd_nac_ll_s1:
+    case M2_mpyd_rnd_ll_s0:
+    case M2_mpyd_rnd_ll_s1:
+    case M2_mpyu_acc_ll_s0:
+    case M2_mpyu_acc_ll_s1:
+    case M2_mpyu_ll_s0:
+    case M2_mpyu_ll_s1:
+    case M2_mpyu_nac_ll_s0:
+    case M2_mpyu_nac_ll_s1:
+    case M2_mpyud_acc_ll_s0:
+    case M2_mpyud_acc_ll_s1:
+    case M2_mpyud_ll_s0:
+    case M2_mpyud_ll_s1:
+    case M2_mpyud_nac_ll_s0:
+    case M2_mpyud_nac_ll_s1:
+      if (OpN == 1 || OpN == 2) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // Two register sources. Used bits: R1[0-15], R2[16-31].
+    case A2_addh_h16_lh:
+    case A2_addh_h16_sat_lh:
+    case A2_combine_lh:
+    case A2_subh_h16_lh:
+    case A2_subh_h16_sat_lh:
+    case M2_mpy_acc_lh_s0:
+    case M2_mpy_acc_lh_s1:
+    case M2_mpy_acc_sat_lh_s0:
+    case M2_mpy_acc_sat_lh_s1:
+    case M2_mpy_lh_s0:
+    case M2_mpy_lh_s1:
+    case M2_mpy_nac_lh_s0:
+    case M2_mpy_nac_lh_s1:
+    case M2_mpy_nac_sat_lh_s0:
+    case M2_mpy_nac_sat_lh_s1:
+    case M2_mpy_rnd_lh_s0:
+    case M2_mpy_rnd_lh_s1:
+    case M2_mpy_sat_lh_s0:
+    case M2_mpy_sat_lh_s1:
+    case M2_mpy_sat_rnd_lh_s0:
+    case M2_mpy_sat_rnd_lh_s1:
+    case M2_mpyd_acc_lh_s0:
+    case M2_mpyd_acc_lh_s1:
+    case M2_mpyd_lh_s0:
+    case M2_mpyd_lh_s1:
+    case M2_mpyd_nac_lh_s0:
+    case M2_mpyd_nac_lh_s1:
+    case M2_mpyd_rnd_lh_s0:
+    case M2_mpyd_rnd_lh_s1:
+    case M2_mpyu_acc_lh_s0:
+    case M2_mpyu_acc_lh_s1:
+    case M2_mpyu_lh_s0:
+    case M2_mpyu_lh_s1:
+    case M2_mpyu_nac_lh_s0:
+    case M2_mpyu_nac_lh_s1:
+    case M2_mpyud_acc_lh_s0:
+    case M2_mpyud_acc_lh_s1:
+    case M2_mpyud_lh_s0:
+    case M2_mpyud_lh_s1:
+    case M2_mpyud_nac_lh_s0:
+    case M2_mpyud_nac_lh_s1:
+    // These four are actually LH.
+    case A2_addh_l16_hl:
+    case A2_addh_l16_sat_hl:
+    case A2_subh_l16_hl:
+    case A2_subh_l16_sat_hl:
+      if (OpN == 1) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      if (OpN == 2) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+
+    // Two register sources, used bits: R1[16-31], R2[0-15].
+    case A2_addh_h16_hl:
+    case A2_addh_h16_sat_hl:
+    case A2_combine_hl:
+    case A2_subh_h16_hl:
+    case A2_subh_h16_sat_hl:
+    case M2_mpy_acc_hl_s0:
+    case M2_mpy_acc_hl_s1:
+    case M2_mpy_acc_sat_hl_s0:
+    case M2_mpy_acc_sat_hl_s1:
+    case M2_mpy_hl_s0:
+    case M2_mpy_hl_s1:
+    case M2_mpy_nac_hl_s0:
+    case M2_mpy_nac_hl_s1:
+    case M2_mpy_nac_sat_hl_s0:
+    case M2_mpy_nac_sat_hl_s1:
+    case M2_mpy_rnd_hl_s0:
+    case M2_mpy_rnd_hl_s1:
+    case M2_mpy_sat_hl_s0:
+    case M2_mpy_sat_hl_s1:
+    case M2_mpy_sat_rnd_hl_s0:
+    case M2_mpy_sat_rnd_hl_s1:
+    case M2_mpyd_acc_hl_s0:
+    case M2_mpyd_acc_hl_s1:
+    case M2_mpyd_hl_s0:
+    case M2_mpyd_hl_s1:
+    case M2_mpyd_nac_hl_s0:
+    case M2_mpyd_nac_hl_s1:
+    case M2_mpyd_rnd_hl_s0:
+    case M2_mpyd_rnd_hl_s1:
+    case M2_mpyu_acc_hl_s0:
+    case M2_mpyu_acc_hl_s1:
+    case M2_mpyu_hl_s0:
+    case M2_mpyu_hl_s1:
+    case M2_mpyu_nac_hl_s0:
+    case M2_mpyu_nac_hl_s1:
+    case M2_mpyud_acc_hl_s0:
+    case M2_mpyud_acc_hl_s1:
+    case M2_mpyud_hl_s0:
+    case M2_mpyud_hl_s1:
+    case M2_mpyud_nac_hl_s0:
+    case M2_mpyud_nac_hl_s1:
+      if (OpN == 1) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      if (OpN == 2) {
+        Bits.set(Begin, Begin+16);
+        return true;
+      }
+      break;
+
+    // Two register sources, used bits: R1[16-31], R2[16-31].
+    case A2_addh_h16_hh:
+    case A2_addh_h16_sat_hh:
+    case A2_combine_hh:
+    case A2_subh_h16_hh:
+    case A2_subh_h16_sat_hh:
+    case M2_mpy_acc_hh_s0:
+    case M2_mpy_acc_hh_s1:
+    case M2_mpy_acc_sat_hh_s0:
+    case M2_mpy_acc_sat_hh_s1:
+    case M2_mpy_hh_s0:
+    case M2_mpy_hh_s1:
+    case M2_mpy_nac_hh_s0:
+    case M2_mpy_nac_hh_s1:
+    case M2_mpy_nac_sat_hh_s0:
+    case M2_mpy_nac_sat_hh_s1:
+    case M2_mpy_rnd_hh_s0:
+    case M2_mpy_rnd_hh_s1:
+    case M2_mpy_sat_hh_s0:
+    case M2_mpy_sat_hh_s1:
+    case M2_mpy_sat_rnd_hh_s0:
+    case M2_mpy_sat_rnd_hh_s1:
+    case M2_mpyd_acc_hh_s0:
+    case M2_mpyd_acc_hh_s1:
+    case M2_mpyd_hh_s0:
+    case M2_mpyd_hh_s1:
+    case M2_mpyd_nac_hh_s0:
+    case M2_mpyd_nac_hh_s1:
+    case M2_mpyd_rnd_hh_s0:
+    case M2_mpyd_rnd_hh_s1:
+    case M2_mpyu_acc_hh_s0:
+    case M2_mpyu_acc_hh_s1:
+    case M2_mpyu_hh_s0:
+    case M2_mpyu_hh_s1:
+    case M2_mpyu_nac_hh_s0:
+    case M2_mpyu_nac_hh_s1:
+    case M2_mpyud_acc_hh_s0:
+    case M2_mpyud_acc_hh_s1:
+    case M2_mpyud_hh_s0:
+    case M2_mpyud_hh_s1:
+    case M2_mpyud_nac_hh_s0:
+    case M2_mpyud_nac_hh_s1:
+      if (OpN == 1 || OpN == 2) {
+        Bits.set(Begin+16, Begin+32);
+        return true;
+      }
+      break;
+  }
+
+  return false;
+}
+
+
+// Calculate the register class that matches Reg:Sub. For example, if
+// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// register class.
+const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
+      const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
+    return nullptr;
+  auto *RC = MRI.getRegClass(RR.Reg);
+  if (RR.Sub == 0)
+    return RC;
+
+  auto VerifySR = [] (unsigned Sub) -> void {
+    assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+  };
+
+  switch (RC->getID()) {
+    case Hexagon::DoubleRegsRegClassID:
+      VerifySR(RR.Sub);
+      return &Hexagon::IntRegsRegClass;
+  }
+  return nullptr;
+}
+
+
+// Check if RD could be replaced with RS at any possible use of RD.
+// For example a predicate register cannot be replaced with a integer
+// register, but a 64-bit register with a subregister can be replaced
+// with a 32-bit register.
+bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
+      const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
+  if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
+      !TargetRegisterInfo::isVirtualRegister(RS.Reg))
+    return false;
+  // Return false if one (or both) classes are nullptr.
+  auto *DRC = getFinalVRegClass(RD, MRI);
+  if (!DRC)
+    return false;
+
+  return DRC == getFinalVRegClass(RS, MRI);
+}
+
+
+//
+// Dead code elimination
+//
+namespace {
+  class DeadCodeElimination {
+  public:
+    DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
+      : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+        MDT(mdt), MRI(mf.getRegInfo()) {}
+
+    bool run() {
+      return runOnNode(MDT.getRootNode());
+    }
+
+  private:
+    bool isDead(unsigned R) const;
+    bool runOnNode(MachineDomTreeNode *N);
+
+    MachineFunction &MF;
+    const HexagonInstrInfo &HII;
+    MachineDominatorTree &MDT;
+    MachineRegisterInfo &MRI;
+  };
+}
+
+
+bool DeadCodeElimination::isDead(unsigned R) const {
+  for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+    MachineInstr *UseI = I->getParent();
+    if (UseI->isDebugValue())
+      continue;
+    if (UseI->isPHI()) {
+      assert(!UseI->getOperand(0).getSubReg());
+      unsigned DR = UseI->getOperand(0).getReg();
+      if (DR == R)
+        continue;
+    }
+    return false;
+  }
+  return true;
+}
+
+
+bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
+  bool Changed = false;
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
+    Changed |= runOnNode(*I);
+
+  MachineBasicBlock *B = N->getBlock();
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  for (auto MI : Instrs) {
+    unsigned Opc = MI->getOpcode();
+    // Do not touch lifetime markers. This is why the target-independent DCE
+    // cannot be used.
+    if (Opc == TargetOpcode::LIFETIME_START ||
+        Opc == TargetOpcode::LIFETIME_END)
+      continue;
+    bool Store = false;
+    if (MI->isInlineAsm())
+      continue;
+    // Delete PHIs if possible.
+    if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
+      continue;
+
+    bool AllDead = true;
+    SmallVector<unsigned,2> Regs;
+    for (auto &Op : MI->operands()) {
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+      unsigned R = Op.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
+        AllDead = false;
+        break;
+      }
+      Regs.push_back(R);
+    }
+    if (!AllDead)
+      continue;
+
+    B->erase(MI);
+    for (unsigned i = 0, n = Regs.size(); i != n; ++i)
+      MRI.markUsesInDebugValueAsUndef(Regs[i]);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+//
+// Eliminate redundant instructions
+//
+// This transformation will identify instructions where the output register
+// is the same as one of its input registers. This only works on instructions
+// that define a single register (unlike post-increment loads, for example).
+// The equality check is actually more detailed: the code calculates which
+// bits of the output are used, and only compares these bits with the input
+// registers.
+// If the output matches an input, the instruction is replaced with COPY.
+// The copies will be removed by another transformation.
+namespace {
+  class RedundantInstrElimination : public Transformation {
+  public:
+    RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
+          MachineRegisterInfo &mri)
+        : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
+          unsigned &LostB, unsigned &LostE);
+    bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
+          unsigned &LostB, unsigned &LostE);
+    bool computeUsedBits(unsigned Reg, BitVector &Bits);
+    bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
+          uint16_t Begin);
+    bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+
+// Check if the instruction is a lossy shift left, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
+      unsigned OpN, unsigned &LostB, unsigned &LostE) {
+  using namespace Hexagon;
+  unsigned Opc = MI.getOpcode();
+  unsigned ImN, RegN, Width;
+  switch (Opc) {
+    case S2_asl_i_p:
+      ImN = 2;
+      RegN = 1;
+      Width = 64;
+      break;
+    case S2_asl_i_p_acc:
+    case S2_asl_i_p_and:
+    case S2_asl_i_p_nac:
+    case S2_asl_i_p_or:
+    case S2_asl_i_p_xacc:
+      ImN = 3;
+      RegN = 2;
+      Width = 64;
+      break;
+    case S2_asl_i_r:
+      ImN = 2;
+      RegN = 1;
+      Width = 32;
+      break;
+    case S2_addasl_rrri:
+    case S4_andi_asl_ri:
+    case S4_ori_asl_ri:
+    case S4_addi_asl_ri:
+    case S4_subi_asl_ri:
+    case S2_asl_i_r_acc:
+    case S2_asl_i_r_and:
+    case S2_asl_i_r_nac:
+    case S2_asl_i_r_or:
+    case S2_asl_i_r_sat:
+    case S2_asl_i_r_xacc:
+      ImN = 3;
+      RegN = 2;
+      Width = 32;
+      break;
+    default:
+      return false;
+  }
+
+  if (RegN != OpN)
+    return false;
+
+  assert(MI.getOperand(ImN).isImm());
+  unsigned S = MI.getOperand(ImN).getImm();
+  if (S == 0)
+    return false;
+  LostB = Width-S;
+  LostE = Width;
+  return true;
+}
+
+
+// Check if the instruction is a lossy shift right, where the input being
+// shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
+// of bit indices that are lost.
+bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
+      unsigned OpN, unsigned &LostB, unsigned &LostE) {
+  using namespace Hexagon;
+  unsigned Opc = MI.getOpcode();
+  unsigned ImN, RegN;
+  switch (Opc) {
+    case S2_asr_i_p:
+    case S2_lsr_i_p:
+      ImN = 2;
+      RegN = 1;
+      break;
+    case S2_asr_i_p_acc:
+    case S2_asr_i_p_and:
+    case S2_asr_i_p_nac:
+    case S2_asr_i_p_or:
+    case S2_lsr_i_p_acc:
+    case S2_lsr_i_p_and:
+    case S2_lsr_i_p_nac:
+    case S2_lsr_i_p_or:
+    case S2_lsr_i_p_xacc:
+      ImN = 3;
+      RegN = 2;
+      break;
+    case S2_asr_i_r:
+    case S2_lsr_i_r:
+      ImN = 2;
+      RegN = 1;
+      break;
+    case S4_andi_lsr_ri:
+    case S4_ori_lsr_ri:
+    case S4_addi_lsr_ri:
+    case S4_subi_lsr_ri:
+    case S2_asr_i_r_acc:
+    case S2_asr_i_r_and:
+    case S2_asr_i_r_nac:
+    case S2_asr_i_r_or:
+    case S2_lsr_i_r_acc:
+    case S2_lsr_i_r_and:
+    case S2_lsr_i_r_nac:
+    case S2_lsr_i_r_or:
+    case S2_lsr_i_r_xacc:
+      ImN = 3;
+      RegN = 2;
+      break;
+
+    default:
+      return false;
+  }
+
+  if (RegN != OpN)
+    return false;
+
+  assert(MI.getOperand(ImN).isImm());
+  unsigned S = MI.getOperand(ImN).getImm();
+  LostB = 0;
+  LostE = S;
+  return true;
+}
+
+
+// Calculate the bit vector that corresponds to the used bits of register Reg.
+// The vector Bits has the same size, as the size of Reg in bits. If the cal-
+// culation fails (i.e. the used bits are unknown), it returns false. Other-
+// wise, it returns true and sets the corresponding bits in Bits.
+bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
+  BitVector Used(Bits.size());
+  RegisterSet Visited;
+  std::vector<unsigned> Pending;
+  Pending.push_back(Reg);
+
+  for (unsigned i = 0; i < Pending.size(); ++i) {
+    unsigned R = Pending[i];
+    if (Visited.has(R))
+      continue;
+    Visited.insert(R);
+    for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
+      BitTracker::RegisterRef UR = *I;
+      unsigned B, W;
+      if (!HBS::getSubregMask(UR, B, W, MRI))
+        return false;
+      MachineInstr &UseI = *I->getParent();
+      if (UseI.isPHI() || UseI.isCopy()) {
+        unsigned DefR = UseI.getOperand(0).getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(DefR))
+          return false;
+        Pending.push_back(DefR);
+      } else {
+        if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
+          return false;
+      }
+    }
+  }
+  Bits |= Used;
+  return true;
+}
+
+
+// Calculate the bits used by instruction MI in a register in operand OpN.
+// Return true/false if the calculation succeeds/fails. If is succeeds, set
+// used bits in Bits. This function does not reset any bits in Bits, so
+// subsequent calls over different instructions will result in the union
+// of the used bits in all these instructions.
+// The register in question may be used with a sub-register, whereas Bits
+// holds the bits for the entire register. To keep track of that, the
+// argument Begin indicates where in Bits is the lowest-significant bit
+// of the register used in operand OpN. For example, in instruction:
+//   vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+// the operand 1 is a 32-bit register, which happens to be a subregister
+// of the 64-bit register vreg2, and that subregister starts at position 32.
+// In this case Begin=32, since Bits[32] would be the lowest-significant bit
+// of vreg2:subreg_hireg.
+bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
+      unsigned OpN, BitVector &Bits, uint16_t Begin) {
+  unsigned Opc = MI.getOpcode();
+  BitVector T(Bits.size());
+  bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
+  // Even if we don't have bits yet, we could still provide some information
+  // if the instruction is a lossy shift: the lost bits will be marked as
+  // not used.
+  unsigned LB, LE;
+  if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
+    assert(MI.getOperand(OpN).isReg());
+    BitTracker::RegisterRef RR = MI.getOperand(OpN);
+    const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
+    uint16_t Width = RC->getSize()*8;
+
+    if (!GotBits)
+      T.set(Begin, Begin+Width);
+    assert(LB <= LE && LB < Width && LE <= Width);
+    T.reset(Begin+LB, Begin+LE);
+    GotBits = true;
+  }
+  if (GotBits)
+    Bits |= T;
+  return GotBits;
+}
+
+
+// Calculates the used bits in RD ("defined register"), and checks if these
+// bits in RS ("used register") and RD are identical.
+bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
+      BitTracker::RegisterRef RS) {
+  const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+
+  unsigned DB, DW;
+  if (!HBS::getSubregMask(RD, DB, DW, MRI))
+    return false;
+  unsigned SB, SW;
+  if (!HBS::getSubregMask(RS, SB, SW, MRI))
+    return false;
+  if (SW != DW)
+    return false;
+
+  BitVector Used(DC.width());
+  if (!computeUsedBits(RD.Reg, Used))
+    return false;
+
+  for (unsigned i = 0; i != DW; ++i)
+    if (Used[i+DB] && DC[DB+i] != SC[SB+i])
+      return false;
+  return true;
+}
+
+
+bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
+      const RegisterSet&) {
+  bool Changed = false;
+
+  for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
+    NextI = std::next(I);
+    MachineInstr *MI = &*I;
+
+    if (MI->getOpcode() == TargetOpcode::COPY)
+      continue;
+    if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+      continue;
+    unsigned NumD = MI->getDesc().getNumDefs();
+    if (NumD != 1)
+      continue;
+
+    BitTracker::RegisterRef RD = MI->getOperand(0);
+    if (!BT.has(RD.Reg))
+      continue;
+    const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+
+    // Find a source operand that is equal to the result.
+    for (auto &Op : MI->uses()) {
+      if (!Op.isReg())
+        continue;
+      BitTracker::RegisterRef RS = Op;
+      if (!BT.has(RS.Reg))
+        continue;
+      if (!HBS::isTransparentCopy(RD, RS, MRI))
+        continue;
+
+      unsigned BN, BW;
+      if (!HBS::getSubregMask(RS, BN, BW, MRI))
+        continue;
+
+      const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+      if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
+        continue;
+
+      // If found, replace the instruction with a COPY.
+      DebugLoc DL = MI->getDebugLoc();
+      const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+      unsigned NewR = MRI.createVirtualRegister(FRC);
+      BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+          .addReg(RS.Reg, 0, RS.Sub);
+      HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+      BT.put(BitTracker::RegisterRef(NewR), SC);
+      Changed = true;
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+
+//
+// Const generation
+//
+// Recognize instructions that produce constant values known at compile-time.
+// Replace them with register definitions that load these constants directly.
+namespace {
+  class ConstGeneration : public Transformation {
+  public:
+    ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool isTfrConst(const MachineInstr *MI) const;
+    bool isConst(unsigned R, int64_t &V) const;
+    unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
+        MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
+  if (!BT.has(R))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(R);
+  int64_t T = 0;
+  for (unsigned i = RC.width(); i > 0; --i) {
+    const BitTracker::BitValue &V = RC[i-1];
+    T <<= 1;
+    if (V.is(1))
+      T |= 1;
+    else if (!V.is(0))
+      return false;
+  }
+  C = T;
+  return true;
+}
+
+
+bool ConstGeneration::isTfrConst(const MachineInstr *MI) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+    case Hexagon::A2_tfrsi:
+    case Hexagon::A2_tfrpi:
+    case Hexagon::TFR_PdTrue:
+    case Hexagon::TFR_PdFalse:
+    case Hexagon::CONST32_Int_Real:
+    case Hexagon::CONST64_Int_Real:
+      return true;
+  }
+  return false;
+}
+
+
+// Generate a transfer-immediate instruction that is appropriate for the
+// register class and the actual value being transferred.
+unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
+      MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
+  unsigned Reg = MRI.createVirtualRegister(RC);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
+        .addImm(int32_t(C));
+    return Reg;
+  }
+
+  if (RC == &Hexagon::DoubleRegsRegClass) {
+    if (isInt<8>(C)) {
+      BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
+          .addImm(C);
+      return Reg;
+    }
+
+    unsigned Lo = Lo_32(C), Hi = Hi_32(C);
+    if (isInt<8>(Lo) || isInt<8>(Hi)) {
+      unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
+                                  : Hexagon::A4_combineii;
+      BuildMI(B, At, DL, HII.get(Opc), Reg)
+          .addImm(int32_t(Hi))
+          .addImm(int32_t(Lo));
+      return Reg;
+    }
+
+    BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+        .addImm(C);
+    return Reg;
+  }
+
+  if (RC == &Hexagon::PredRegsRegClass) {
+    unsigned Opc;
+    if (C == 0)
+      Opc = Hexagon::TFR_PdFalse;
+    else if ((C & 0xFF) == 0xFF)
+      Opc = Hexagon::TFR_PdTrue;
+    else
+      return 0;
+    BuildMI(B, At, DL, HII.get(Opc), Reg);
+    return Reg;
+  }
+
+  return 0;
+}
+
+
+bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+  bool Changed = false;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(); I != E; ++I) {
+    if (isTfrConst(I))
+      continue;
+    Defs.clear();
+    HBS::getInstrDefs(*I, Defs);
+    if (Defs.count() != 1)
+      continue;
+    unsigned DR = Defs.find_first();
+    if (!TargetRegisterInfo::isVirtualRegister(DR))
+      continue;
+    int64_t C;
+    if (isConst(DR, C)) {
+      DebugLoc DL = I->getDebugLoc();
+      auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+      unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
+      if (ImmReg) {
+        HBS::replaceReg(DR, ImmReg, MRI);
+        BT.put(ImmReg, BT.lookup(DR));
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+
+//
+// Copy generation
+//
+// Identify pairs of available registers which hold identical values.
+// In such cases, only one of them needs to be calculated, the other one
+// will be defined as a copy of the first.
+//
+// Copy propagation
+//
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
+namespace {
+  class CopyGeneration : public Transformation {
+  public:
+    CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    bool findMatch(const BitTracker::RegisterRef &Inp,
+        BitTracker::RegisterRef &Out, const RegisterSet &AVs);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+
+  class CopyPropagation : public Transformation {
+  public:
+    CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+        : Transformation(false), MRI(mri) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+    static bool isCopyReg(unsigned Opc);
+  private:
+    bool propagateRegCopy(MachineInstr &MI);
+
+    MachineRegisterInfo &MRI;
+  };
+
+}
+
+
+/// Check if there is a register in AVs that is identical to Inp. If so,
+/// set Out to the found register. The output may be a pair Reg:Sub.
+bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
+      BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
+  if (!BT.has(Inp.Reg))
+    return false;
+  const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+  unsigned B, W;
+  if (!HBS::getSubregMask(Inp, B, W, MRI))
+    return false;
+
+  for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
+    if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+      continue;
+    const BitTracker::RegisterCell &RC = BT.lookup(R);
+    unsigned RW = RC.width();
+    if (W == RW) {
+      if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+        continue;
+      if (!HBS::isEqual(InpRC, B, RC, 0, W))
+        continue;
+      Out.Reg = R;
+      Out.Sub = 0;
+      return true;
+    }
+    // Check if there is a super-register, whose part (with a subregister)
+    // is equal to the input.
+    // Only do double registers for now.
+    if (W*2 != RW)
+      continue;
+    if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
+      continue;
+
+    if (HBS::isEqual(InpRC, B, RC, 0, W))
+      Out.Sub = Hexagon::subreg_loreg;
+    else if (HBS::isEqual(InpRC, B, RC, W, W))
+      Out.Sub = Hexagon::subreg_hireg;
+    else
+      continue;
+    Out.Reg = R;
+    return true;
+  }
+  return false;
+}
+
+
+bool CopyGeneration::processBlock(MachineBasicBlock &B,
+      const RegisterSet &AVs) {
+  RegisterSet AVB(AVs);
+  bool Changed = false;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
+       ++I, AVB.insert(Defs)) {
+    NextI = std::next(I);
+    Defs.clear();
+    HBS::getInstrDefs(*I, Defs);
+
+    unsigned Opc = I->getOpcode();
+    if (CopyPropagation::isCopyReg(Opc))
+      continue;
+
+    for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
+      BitTracker::RegisterRef MR;
+      if (!findMatch(R, MR, AVB))
+        continue;
+      DebugLoc DL = I->getDebugLoc();
+      auto *FRC = HBS::getFinalVRegClass(MR, MRI);
+      unsigned NewR = MRI.createVirtualRegister(FRC);
+      auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+      BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+        .addReg(MR.Reg, 0, MR.Sub);
+      BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+    }
+  }
+
+  return Changed;
+}
+
+
+bool CopyPropagation::isCopyReg(unsigned Opc) {
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case TargetOpcode::REG_SEQUENCE:
+    case Hexagon::A2_tfr:
+    case Hexagon::A2_tfrp:
+    case Hexagon::A2_combinew:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineri:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+
+bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
+  bool Changed = false;
+  unsigned Opc = MI.getOpcode();
+  BitTracker::RegisterRef RD = MI.getOperand(0);
+  assert(MI.getOperand(0).getSubReg() == 0);
+
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case Hexagon::A2_tfr:
+    case Hexagon::A2_tfrp: {
+      BitTracker::RegisterRef RS = MI.getOperand(1);
+      if (!HBS::isTransparentCopy(RD, RS, MRI))
+        break;
+      if (RS.Sub != 0)
+        Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
+      else
+        Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
+      break;
+    }
+    case TargetOpcode::REG_SEQUENCE: {
+      BitTracker::RegisterRef SL, SH;
+      if (HBS::parseRegSequence(MI, SL, SH)) {
+        Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+                                         SL.Reg, SL.Sub, MRI);
+        Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+                                          SH.Reg, SH.Sub, MRI);
+      }
+      break;
+    }
+    case Hexagon::A2_combinew: {
+      BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
+      Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
+                                       RL.Reg, RL.Sub, MRI);
+      Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
+                                        RH.Reg, RH.Sub, MRI);
+      break;
+    }
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineri: {
+      unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
+      unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
+                                                    : Hexagon::subreg_hireg;
+      BitTracker::RegisterRef RS = MI.getOperand(SrcX);
+      Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
+      break;
+    }
+  }
+  return Changed;
+}
+
+
+bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+  std::vector<MachineInstr*> Instrs;
+  for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
+    Instrs.push_back(&*I);
+
+  bool Changed = false;
+  for (auto I : Instrs) {
+    unsigned Opc = I->getOpcode();
+    if (!CopyPropagation::isCopyReg(Opc))
+      continue;
+    Changed |= propagateRegCopy(*I);
+  }
+
+  return Changed;
+}
+
+
+//
+// Bit simplification
+//
+// Recognize patterns that can be simplified and replace them with the
+// simpler forms.
+// This is by no means complete
+namespace {
+  class BitSimplification : public Transformation {
+  public:
+    BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
+        MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+    bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+  private:
+    struct RegHalf : public BitTracker::RegisterRef {
+      bool Low;  // Low/High halfword.
+    };
+
+    bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
+          unsigned B, RegHalf &RH);
+
+    bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
+          BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
+    unsigned getCombineOpcode(bool HLow, bool LLow);
+
+    bool genStoreUpperHalf(MachineInstr *MI);
+    bool genStoreImmediate(MachineInstr *MI);
+    bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+    bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
+          const BitTracker::RegisterCell &RC);
+
+    const HexagonInstrInfo &HII;
+    MachineRegisterInfo &MRI;
+    BitTracker &BT;
+  };
+}
+
+
+// Check if the bits [B..B+16) in register cell RC form a valid halfword,
+// i.e. [0..16), [16..32), etc. of some register. If so, return true and
+// set the information about the found register in RH.
+bool BitSimplification::matchHalf(unsigned SelfR,
+      const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
+  // XXX This could be searching in the set of available registers, in case
+  // the match is not exact.
+
+  // Match 16-bit chunks, where the RC[B..B+15] references exactly one
+  // register and all the bits B..B+15 match between RC and the register.
+  // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
+  // and RC = { [0]:0 [1-15]:v1[1-15]... }.
+  bool Low = false;
+  unsigned I = B;
+  while (I < B+16 && RC[I].num())
+    I++;
+  if (I == B+16)
+    return false;
+
+  unsigned Reg = RC[I].RefI.Reg;
+  unsigned P = RC[I].RefI.Pos;    // The RefI.Pos will be advanced by I-B.
+  if (P < I-B)
+    return false;
+  unsigned Pos = P - (I-B);
+
+  if (Reg == 0 || Reg == SelfR)    // Don't match "self".
+    return false;
+  if (!TargetRegisterInfo::isVirtualRegister(Reg))
+    return false;
+  if (!BT.has(Reg))
+    return false;
+
+  const BitTracker::RegisterCell &SC = BT.lookup(Reg);
+  if (Pos+16 > SC.width())
+    return false;
+
+  for (unsigned i = 0; i < 16; ++i) {
+    const BitTracker::BitValue &RV = RC[i+B];
+    if (RV.Type == BitTracker::BitValue::Ref) {
+      if (RV.RefI.Reg != Reg)
+        return false;
+      if (RV.RefI.Pos != i+Pos)
+        return false;
+      continue;
+    }
+    if (RC[i+B] != SC[i+Pos])
+      return false;
+  }
+
+  unsigned Sub = 0;
+  switch (Pos) {
+    case 0:
+      Sub = Hexagon::subreg_loreg;
+      Low = true;
+      break;
+    case 16:
+      Sub = Hexagon::subreg_loreg;
+      Low = false;
+      break;
+    case 32:
+      Sub = Hexagon::subreg_hireg;
+      Low = true;
+      break;
+    case 48:
+      Sub = Hexagon::subreg_hireg;
+      Low = false;
+      break;
+    default:
+      return false;
+  }
+
+  RH.Reg = Reg;
+  RH.Sub = Sub;
+  RH.Low = Low;
+  // If the subregister is not valid with the register, set it to 0.
+  if (!HBS::getFinalVRegClass(RH, MRI))
+    RH.Sub = 0;
+
+  return true;
+}
+
+
+// Check if RC matches the pattern of a S2_packhl. If so, return true and
+// set the inputs Rs and Rt.
+bool BitSimplification::matchPackhl(unsigned SelfR,
+      const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
+      BitTracker::RegisterRef &Rt) {
+  RegHalf L1, H1, L2, H2;
+
+  if (!matchHalf(SelfR, RC, 0, L2)  || !matchHalf(SelfR, RC, 16, L1))
+    return false;
+  if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
+    return false;
+
+  // Rs = H1.L1, Rt = H2.L2
+  if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
+    return false;
+  if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
+    return false;
+
+  Rs = H1;
+  Rt = H2;
+  return true;
+}
+
+
+unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
+  return HLow ? LLow ? Hexagon::A2_combine_ll
+                     : Hexagon::A2_combine_lh
+              : LLow ? Hexagon::A2_combine_hl
+                     : Hexagon::A2_combine_hh;
+}
+
+
+// If MI stores the upper halfword of a register (potentially obtained via
+// shifts or extracts), replace it with a storerf instruction. This could
+// cause the "extraction" code to become dead.
+bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc != Hexagon::S2_storerh_io)
+    return false;
+
+  MachineOperand &ValOp = MI->getOperand(2);
+  BitTracker::RegisterRef RS = ValOp;
+  if (!BT.has(RS.Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+  RegHalf H;
+  if (!matchHalf(0, RC, 0, H))
+    return false;
+  if (H.Low)
+    return false;
+  MI->setDesc(HII.get(Hexagon::S2_storerf_io));
+  ValOp.setReg(H.Reg);
+  ValOp.setSubReg(H.Sub);
+  return true;
+}
+
+
+// If MI stores a value known at compile-time, and the value is within a range
+// that avoids using constant-extenders, replace it with a store-immediate.
+bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  unsigned Align = 0;
+  switch (Opc) {
+    case Hexagon::S2_storeri_io:
+      Align++;
+    case Hexagon::S2_storerh_io:
+      Align++;
+    case Hexagon::S2_storerb_io:
+      break;
+    default:
+      return false;
+  }
+
+  // Avoid stores to frame-indices (due to an unknown offset).
+  if (!MI->getOperand(0).isReg())
+    return false;
+  MachineOperand &OffOp = MI->getOperand(1);
+  if (!OffOp.isImm())
+    return false;
+
+  int64_t Off = OffOp.getImm();
+  // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
+  if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
+    return false;
+  // Source register:
+  BitTracker::RegisterRef RS = MI->getOperand(2);
+  if (!BT.has(RS.Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
+  uint64_t U;
+  if (!HBS::getConst(RC, 0, RC.width(), U))
+    return false;
+
+  // Only consider 8-bit values to avoid constant-extenders.
+  int V;
+  switch (Opc) {
+    case Hexagon::S2_storerb_io:
+      V = int8_t(U);
+      break;
+    case Hexagon::S2_storerh_io:
+      V = int16_t(U);
+      break;
+    case Hexagon::S2_storeri_io:
+      V = int32_t(U);
+      break;
+  }
+  if (!isInt<8>(V))
+    return false;
+
+  MI->RemoveOperand(2);
+  switch (Opc) {
+    case Hexagon::S2_storerb_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
+      break;
+    case Hexagon::S2_storerh_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
+      break;
+    case Hexagon::S2_storeri_io:
+      MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
+      break;
+  }
+  MI->addOperand(MachineOperand::CreateImm(V));
+  return true;
+}
+
+
+// If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
+// last instruction in a sequence that results in something equivalent to
+// the pack-halfwords. The intent is to cause the entire sequence to become
+// dead.
+bool BitSimplification::genPackhl(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == Hexagon::S2_packhl)
+    return false;
+  BitTracker::RegisterRef Rs, Rt;
+  if (!matchPackhl(RD.Reg, RC, Rs, Rt))
+    return false;
+
+  MachineBasicBlock &B = *MI->getParent();
+  unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
+  DebugLoc DL = MI->getDebugLoc();
+  BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+      .addReg(Rs.Reg, 0, Rs.Sub)
+      .addReg(Rt.Reg, 0, Rt.Sub);
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI produces halfword of the input in the low half of the output,
+// replace it with zero-extend or extractu.
+bool BitSimplification::genExtractHalf(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  RegHalf L;
+  // Check for halfword in low 16 bits, zeros elsewhere.
+  if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Prefer zxth, since zxth can go in any slot, while extractu only in
+  // slots 2 and 3.
+  unsigned NewR = 0;
+  if (L.Low && Opc != Hexagon::A2_zxth) {
+    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+        .addReg(L.Reg, 0, L.Sub);
+  } else if (!L.Low && Opc != Hexagon::S2_extractu) {
+    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    BuildMI(B, MI, DL, HII.get(Hexagon::S2_extractu), NewR)
+        .addReg(L.Reg, 0, L.Sub)
+        .addImm(16)
+        .addImm(16);
+  }
+  if (NewR == 0)
+    return false;
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
+// combine.
+bool BitSimplification::genCombineHalf(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  RegHalf L, H;
+  // Check for combine h/l
+  if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
+    return false;
+  // Do nothing if this is just a reg copy.
+  if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  unsigned COpc = getCombineOpcode(H.Low, L.Low);
+  if (COpc == Opc)
+    return false;
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+  BuildMI(B, MI, DL, HII.get(COpc), NewR)
+      .addReg(H.Reg, 0, H.Sub)
+      .addReg(L.Reg, 0, L.Sub);
+  HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+  BT.put(BitTracker::RegisterRef(NewR), RC);
+  return true;
+}
+
+
+// If MI resets high bits of a register and keeps the lower ones, replace it
+// with zero-extend byte/half, and-immediate, or extractu, as appropriate.
+bool BitSimplification::genExtractLow(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::A2_zxtb:
+    case Hexagon::A2_zxth:
+    case Hexagon::S2_extractu:
+      return false;
+  }
+  if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
+    int32_t Imm = MI->getOperand(2).getImm();
+    if (isInt<10>(Imm))
+      return false;
+  }
+
+  if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
+    return false;
+  unsigned W = RC.width();
+  while (W > 0 && RC[W-1].is(0))
+    W--;
+  if (W == 0 || W == RC.width())
+    return false;
+  unsigned NewOpc = (W == 8)  ? Hexagon::A2_zxtb
+                  : (W == 16) ? Hexagon::A2_zxth
+                  : (W < 10)  ? Hexagon::A2_andir
+                  : Hexagon::S2_extractu;
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  for (auto &Op : MI->uses()) {
+    if (!Op.isReg())
+      continue;
+    BitTracker::RegisterRef RS = Op;
+    if (!BT.has(RS.Reg))
+      continue;
+    const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+    unsigned BN, BW;
+    if (!HBS::getSubregMask(RS, BN, BW, MRI))
+      continue;
+    if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
+      continue;
+
+    unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+    auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+                  .addReg(RS.Reg, 0, RS.Sub);
+    if (NewOpc == Hexagon::A2_andir)
+      MIB.addImm((1 << W) - 1);
+    else if (NewOpc == Hexagon::S2_extractu)
+      MIB.addImm(W).addImm(0);
+    HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
+    BT.put(BitTracker::RegisterRef(NewR), RC);
+    return true;
+  }
+  return false;
+}
+
+
+// Check for tstbit simplification opportunity, where the bit being checked
+// can be tracked back to another register. For example:
+//   vreg2 = S2_lsr_i_r  vreg1, 5
+//   vreg3 = S2_tstbit_i vreg2, 0
+// =>
+//   vreg3 = S2_tstbit_i vreg1, 5
+bool BitSimplification::simplifyTstbit(MachineInstr *MI,
+      BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc != Hexagon::S2_tstbit_i)
+    return false;
+
+  unsigned BN = MI->getOperand(2).getImm();
+  BitTracker::RegisterRef RS = MI->getOperand(1);
+  unsigned F, W;
+  DebugLoc DL = MI->getDebugLoc();
+  if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
+    return false;
+  MachineBasicBlock &B = *MI->getParent();
+
+  const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
+  const BitTracker::BitValue &V = SC[F+BN];
+  if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
+    const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
+    // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
+    // a double register, need to use a subregister and adjust bit
+    // number.
+    unsigned P = UINT_MAX;
+    BitTracker::RegisterRef RR(V.RefI.Reg, 0);
+    if (TC == &Hexagon::DoubleRegsRegClass) {
+      P = V.RefI.Pos;
+      RR.Sub = Hexagon::subreg_loreg;
+      if (P >= 32) {
+        P -= 32;
+        RR.Sub = Hexagon::subreg_hireg;
+      }
+    } else if (TC == &Hexagon::IntRegsRegClass) {
+      P = V.RefI.Pos;
+    }
+    if (P != UINT_MAX) {
+      unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+      BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+          .addReg(RR.Reg, 0, RR.Sub)
+          .addImm(P);
+      HBS::replaceReg(RD.Reg, NewR, MRI);
+      BT.put(NewR, RC);
+      return true;
+    }
+  } else if (V.is(0) || V.is(1)) {
+    unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
+    unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+    BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+    HBS::replaceReg(RD.Reg, NewR, MRI);
+    return true;
+  }
+
+  return false;
+}
+
+
+bool BitSimplification::processBlock(MachineBasicBlock &B,
+      const RegisterSet &AVs) {
+  bool Changed = false;
+  RegisterSet AVB = AVs;
+  RegisterSet Defs;
+
+  for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
+    MachineInstr *MI = &*I;
+    Defs.clear();
+    HBS::getInstrDefs(*MI, Defs);
+
+    unsigned Opc = MI->getOpcode();
+    if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
+      continue;
+
+    if (MI->mayStore()) {
+      bool T = genStoreUpperHalf(MI);
+      T = T || genStoreImmediate(MI);
+      Changed |= T;
+      continue;
+    }
+
+    if (Defs.count() != 1)
+      continue;
+    const MachineOperand &Op0 = MI->getOperand(0);
+    if (!Op0.isReg() || !Op0.isDef())
+      continue;
+    BitTracker::RegisterRef RD = Op0;
+    if (!BT.has(RD.Reg))
+      continue;
+    const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
+    const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
+
+    if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
+      bool T = genPackhl(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+
+    if (FRC->getID() == Hexagon::IntRegsRegClassID) {
+      bool T = genExtractHalf(MI, RD, RC);
+      T = T || genCombineHalf(MI, RD, RC);
+      T = T || genExtractLow(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+
+    if (FRC->getID() == Hexagon::PredRegsRegClassID) {
+      bool T = simplifyTstbit(MI, RD, RC);
+      Changed |= T;
+      continue;
+    }
+  }
+  return Changed;
+}
+
+
+bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HRI = *HST.getRegisterInfo();
+  auto &HII = *HST.getInstrInfo();
+
+  MDT = &getAnalysis<MachineDominatorTree>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool Changed;
+
+  Changed = DeadCodeElimination(MF, *MDT).run();
+
+  const HexagonEvaluator HE(HRI, MRI, HII, MF);
+  BitTracker BT(HE, MF);
+  DEBUG(BT.trace(true));
+  BT.run();
+
+  MachineBasicBlock &Entry = MF.front();
+
+  RegisterSet AIG;  // Available registers for IG.
+  ConstGeneration ImmG(BT, HII, MRI);
+  Changed |= visitBlock(Entry, ImmG, AIG);
+
+  RegisterSet ARE;  // Available registers for RIE.
+  RedundantInstrElimination RIE(BT, HII, MRI);
+  Changed |= visitBlock(Entry, RIE, ARE);
+
+  RegisterSet ACG;  // Available registers for CG.
+  CopyGeneration CopyG(BT, HII, MRI);
+  Changed |= visitBlock(Entry, CopyG, ACG);
+
+  RegisterSet ACP;  // Available registers for CP.
+  CopyPropagation CopyP(HRI, MRI);
+  Changed |= visitBlock(Entry, CopyP, ACP);
+
+  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+  BT.run();
+  RegisterSet ABS;  // Available registers for BS.
+  BitSimplification BitS(BT, HII, MRI);
+  Changed |= visitBlock(Entry, BitS, ABS);
+
+  Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
+
+  if (Changed) {
+    for (auto &B : MF)
+      for (auto &I : B)
+        I.clearKillInfo();
+    DeadCodeElimination(MF, *MDT).run();
+  }
+  return Changed;
+}
+
+
+// Recognize loops where the code at the end of the loop matches the code
+// before the entry of the loop, and the matching code is such that is can
+// be simplified. This pass relies on the bit simplification above and only
+// prepares code in a way that can be handled by the bit simplifcation.
+//
+// This is the motivating testcase (and explanation):
+//
+// {
+//   loop0(.LBB0_2, r1)      // %for.body.preheader
+//   r5:4 = memd(r0++#8)
+// }
+// {
+//   r3 = lsr(r4, #16)
+//   r7:6 = combine(r5, r5)
+// }
+// {
+//   r3 = insert(r5, #16, #16)
+//   r7:6 = vlsrw(r7:6, #16)
+// }
+// .LBB0_2:
+// {
+//   memh(r2+#4) = r5
+//   memh(r2+#6) = r6            # R6 is really R5.H
+// }
+// {
+//   r2 = add(r2, #8)
+//   memh(r2+#0) = r4
+//   memh(r2+#2) = r3            # R3 is really R4.H
+// }
+// {
+//   r5:4 = memd(r0++#8)
+// }
+// {                             # "Shuffling" code that sets up R3 and R6
+//   r3 = lsr(r4, #16)           # so that their halves can be stored in the
+//   r7:6 = combine(r5, r5)      # next iteration. This could be folded into
+// }                             # the stores if the code was at the beginning
+// {                             # of the loop iteration. Since the same code
+//   r3 = insert(r5, #16, #16)   # precedes the loop, it can actually be moved
+//   r7:6 = vlsrw(r7:6, #16)     # there.
+// }:endloop0
+//
+//
+// The outcome:
+//
+// {
+//   loop0(.LBB0_2, r1)
+//   r5:4 = memd(r0++#8)
+// }
+// .LBB0_2:
+// {
+//   memh(r2+#4) = r5
+//   memh(r2+#6) = r5.h
+// }
+// {
+//   r2 = add(r2, #8)
+//   memh(r2+#0) = r4
+//   memh(r2+#2) = r4.h
+// }
+// {
+//   r5:4 = memd(r0++#8)
+// }:endloop0
+
+namespace llvm {
+  FunctionPass *createHexagonLoopRescheduling();
+  void initializeHexagonLoopReschedulingPass(PassRegistry&);
+}
+
+namespace {
+  class HexagonLoopRescheduling : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonLoopRescheduling() : MachineFunctionPass(ID),
+        HII(0), HRI(0), MRI(0), BTP(0) {
+      initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+    MachineRegisterInfo *MRI;
+    BitTracker *BTP;
+
+    struct LoopCand {
+      LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
+            MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
+      MachineBasicBlock *LB, *PB, *EB;
+    };
+    typedef std::vector<MachineInstr*> InstrList;
+    struct InstrGroup {
+      BitTracker::RegisterRef Inp, Out;
+      InstrList Ins;
+    };
+    struct PhiInfo {
+      PhiInfo(MachineInstr &P, MachineBasicBlock &B);
+      unsigned DefR;
+      BitTracker::RegisterRef LR, PR;
+      MachineBasicBlock *LB, *PB;
+    };
+
+    static unsigned getDefReg(const MachineInstr *MI);
+    bool isConst(unsigned Reg) const;
+    bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
+    bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
+    bool isShuffleOf(unsigned OutR, unsigned InpR) const;
+    bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
+        unsigned &InpR2) const;
+    void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
+        MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
+    bool processLoop(LoopCand &C);
+  };
+}
+
+char HexagonLoopRescheduling::ID = 0;
+
+INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
+  "Hexagon Loop Rescheduling", false, false)
+
+
+HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
+      MachineBasicBlock &B) {
+  DefR = HexagonLoopRescheduling::getDefReg(&P);
+  LB = &B;
+  PB = nullptr;
+  for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
+    const MachineOperand &OpB = P.getOperand(i+1);
+    if (OpB.getMBB() == &B) {
+      LR = P.getOperand(i);
+      continue;
+    }
+    PB = OpB.getMBB();
+    PR = P.getOperand(i);
+  }
+}
+
+
+unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
+  RegisterSet Defs;
+  HBS::getInstrDefs(*MI, Defs);
+  if (Defs.count() != 1)
+    return 0;
+  return Defs.find_first();
+}
+
+
+bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
+  if (!BTP->has(Reg))
+    return false;
+  const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
+  for (unsigned i = 0, w = RC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = RC[i];
+    if (!V.is(0) && !V.is(1))
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
+      unsigned DefR) const {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::COPY:
+    case Hexagon::S2_lsr_i_r:
+    case Hexagon::S2_asr_i_r:
+    case Hexagon::S2_asl_i_r:
+    case Hexagon::S2_lsr_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_insert:
+    case Hexagon::A2_or:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_and:
+    case Hexagon::A2_andp:
+    case Hexagon::A2_combinew:
+    case Hexagon::A4_combineri:
+    case Hexagon::A4_combineir:
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+    case Hexagon::A2_combine_ll:
+    case Hexagon::A2_combine_lh:
+    case Hexagon::A2_combine_hl:
+    case Hexagon::A2_combine_hh:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
+      unsigned InpR) const {
+  for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    if (!Op.isReg())
+      continue;
+    if (Op.getReg() == InpR)
+      return i == n-1;
+  }
+  return false;
+}
+
+
+bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
+  if (!BTP->has(OutR) || !BTP->has(InpR))
+    return false;
+  const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
+  for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
+    const BitTracker::BitValue &V = OutC[i];
+    if (V.Type != BitTracker::BitValue::Ref)
+      continue;
+    if (V.RefI.Reg != InpR)
+      return false;
+  }
+  return true;
+}
+
+
+bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
+      unsigned OutR2, unsigned &InpR2) const {
+  if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
+    return false;
+  const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
+  const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
+  unsigned W = OutC1.width();
+  unsigned MatchR = 0;
+  if (W != OutC2.width())
+    return false;
+  for (unsigned i = 0; i < W; ++i) {
+    const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
+    if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
+      return false;
+    if (V1.Type != BitTracker::BitValue::Ref)
+      continue;
+    if (V1.RefI.Pos != V2.RefI.Pos)
+      return false;
+    if (V1.RefI.Reg != InpR1)
+      return false;
+    if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
+      return false;
+    if (!MatchR)
+      MatchR = V2.RefI.Reg;
+    else if (V2.RefI.Reg != MatchR)
+      return false;
+  }
+  InpR2 = MatchR;
+  return true;
+}
+
+
+void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
+      MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
+      unsigned NewPredR) {
+  DenseMap<unsigned,unsigned> RegMap;
+
+  const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
+  unsigned PhiR = MRI->createVirtualRegister(PhiRC);
+  BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
+    .addReg(NewPredR)
+    .addMBB(&PB)
+    .addReg(G.Inp.Reg)
+    .addMBB(&LB);
+  RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
+
+  for (unsigned i = G.Ins.size(); i > 0; --i) {
+    const MachineInstr *SI = G.Ins[i-1];
+    unsigned DR = getDefReg(SI);
+    const TargetRegisterClass *RC = MRI->getRegClass(DR);
+    unsigned NewDR = MRI->createVirtualRegister(RC);
+    DebugLoc DL = SI->getDebugLoc();
+
+    auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
+    for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
+      const MachineOperand &Op = SI->getOperand(j);
+      if (!Op.isReg()) {
+        MIB.addOperand(Op);
+        continue;
+      }
+      if (!Op.isUse())
+        continue;
+      unsigned UseR = RegMap[Op.getReg()];
+      MIB.addReg(UseR, 0, Op.getSubReg());
+    }
+    RegMap.insert(std::make_pair(DR, NewDR));
+  }
+
+  HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
+}
+
+
+bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
+  DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
+  std::vector<PhiInfo> Phis;
+  for (auto &I : *C.LB) {
+    if (!I.isPHI())
+      break;
+    unsigned PR = getDefReg(&I);
+    if (isConst(PR))
+      continue;
+    bool BadUse = false, GoodUse = false;
+    for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
+      MachineInstr *UseI = UI->getParent();
+      if (UseI->getParent() != C.LB) {
+        BadUse = true;
+        break;
+      }
+      if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
+        GoodUse = true;
+    }
+    if (BadUse || !GoodUse)
+      continue;
+
+    Phis.push_back(PhiInfo(I, *C.LB));
+  }
+
+  DEBUG({
+    dbgs() << "Phis: {";
+    for (auto &I : Phis) {
+      dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
+             << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
+             << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
+             << I.LB->getNumber() << ')';
+    }
+    dbgs() << " }\n";
+  });
+
+  if (Phis.empty())
+    return false;
+
+  bool Changed = false;
+  InstrList ShufIns;
+
+  // Go backwards in the block: for each bit shuffling instruction, check
+  // if that instruction could potentially be moved to the front of the loop:
+  // the output of the loop cannot be used in a non-shuffling instruction
+  // in this loop.
+  for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
+    if (I->isTerminator())
+      continue;
+    if (I->isPHI())
+      break;
+
+    RegisterSet Defs;
+    HBS::getInstrDefs(*I, Defs);
+    if (Defs.count() != 1)
+      continue;
+    unsigned DefR = Defs.find_first();
+    if (!TargetRegisterInfo::isVirtualRegister(DefR))
+      continue;
+    if (!isBitShuffle(&*I, DefR))
+      continue;
+
+    bool BadUse = false;
+    for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
+      MachineInstr *UseI = UI->getParent();
+      if (UseI->getParent() == C.LB) {
+        if (UseI->isPHI()) {
+          // If the use is in a phi node in this loop, then it should be
+          // the value corresponding to the back edge.
+          unsigned Idx = UI.getOperandNo();
+          if (UseI->getOperand(Idx+1).getMBB() != C.LB)
+            BadUse = true;
+        } else {
+          auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+          if (F == ShufIns.end())
+            BadUse = true;
+        }
+      } else {
+        // There is a use outside of the loop, but there is no epilog block
+        // suitable for a copy-out.
+        if (C.EB == nullptr)
+          BadUse = true;
+      }
+      if (BadUse)
+        break;
+    }
+
+    if (BadUse)
+      continue;
+    ShufIns.push_back(&*I);
+  }
+
+  // Partition the list of shuffling instructions into instruction groups,
+  // where each group has to be moved as a whole (i.e. a group is a chain of
+  // dependent instructions). A group produces a single live output register,
+  // which is meant to be the input of the loop phi node (although this is
+  // not checked here yet). It also uses a single register as its input,
+  // which is some value produced in the loop body. After moving the group
+  // to the beginning of the loop, that input register would need to be
+  // the loop-carried register (through a phi node) instead of the (currently
+  // loop-carried) output register.
+  typedef std::vector<InstrGroup> InstrGroupList;
+  InstrGroupList Groups;
+
+  for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
+    MachineInstr *SI = ShufIns[i];
+    if (SI == nullptr)
+      continue;
+
+    InstrGroup G;
+    G.Ins.push_back(SI);
+    G.Out.Reg = getDefReg(SI);
+    RegisterSet Inputs;
+    HBS::getInstrUses(*SI, Inputs);
+
+    for (unsigned j = i+1; j < n; ++j) {
+      MachineInstr *MI = ShufIns[j];
+      if (MI == nullptr)
+        continue;
+      RegisterSet Defs;
+      HBS::getInstrDefs(*MI, Defs);
+      // If this instruction does not define any pending inputs, skip it.
+      if (!Defs.intersects(Inputs))
+        continue;
+      // Otherwise, add it to the current group and remove the inputs that
+      // are defined by MI.
+      G.Ins.push_back(MI);
+      Inputs.remove(Defs);
+      // Then add all registers used by MI.
+      HBS::getInstrUses(*MI, Inputs);
+      ShufIns[j] = nullptr;
+    }
+
+    // Only add a group if it requires at most one register.
+    if (Inputs.count() > 1)
+      continue;
+    auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+      return G.Out.Reg == P.LR.Reg;
+    };
+    if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+      continue;
+
+    G.Inp.Reg = Inputs.find_first();
+    Groups.push_back(G);
+  }
+
+  DEBUG({
+    for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+      InstrGroup &G = Groups[i];
+      dbgs() << "Group[" << i << "] inp: "
+             << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
+             << "  out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
+      for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
+        dbgs() << "  " << *G.Ins[j];
+    }
+  });
+
+  for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
+    InstrGroup &G = Groups[i];
+    if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
+      continue;
+    auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
+      return G.Out.Reg == P.LR.Reg;
+    };
+    auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+    if (F == Phis.end())
+      continue;
+    unsigned PredR = 0;
+    if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
+      const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
+      unsigned Opc = DefPredR->getOpcode();
+      if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
+        continue;
+      if (!DefPredR->getOperand(1).isImm())
+        continue;
+      if (DefPredR->getOperand(1).getImm() != 0)
+        continue;
+      const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
+      if (RC != MRI->getRegClass(F->PR.Reg)) {
+        PredR = MRI->createVirtualRegister(RC);
+        unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
+                                                          : Hexagon::A2_tfrpi;
+        auto T = C.PB->getFirstTerminator();
+        DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
+        BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+          .addImm(0);
+      } else {
+        PredR = F->PR.Reg;
+      }
+    }
+    assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
+    moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  HII = HST.getInstrInfo();
+  HRI = HST.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
+  BitTracker BT(HE, MF);
+  DEBUG(BT.trace(true));
+  BT.run();
+  BTP = &BT;
+
+  std::vector<LoopCand> Cand;
+
+  for (auto &B : MF) {
+    if (B.pred_size() != 2 || B.succ_size() != 2)
+      continue;
+    MachineBasicBlock *PB = nullptr;
+    bool IsLoop = false;
+    for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
+      if (*PI != &B)
+        PB = *PI;
+      else
+        IsLoop = true;
+    }
+    if (!IsLoop)
+      continue;
+
+    MachineBasicBlock *EB = nullptr;
+    for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
+      if (*SI == &B)
+        continue;
+      // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
+      // edge from B to EP is non-critical.
+      if ((*SI)->pred_size() == 1)
+        EB = *SI;
+      break;
+    }
+
+    Cand.push_back(LoopCand(&B, PB, EB));
+  }
+
+  bool Changed = false;
+  for (auto &C : Cand)
+    Changed |= processLoop(C);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createHexagonLoopRescheduling() {
+  return new HexagonLoopRescheduling();
+}
+
+FunctionPass *llvm::createHexagonBitSimplify() {
+  return new HexagonBitSimplify();
+}
+
diff --git a/lib/Target/Hexagon/HexagonBitTracker.cpp b/lib/Target/Hexagon/HexagonBitTracker.cpp
index 021e58a1d08a..d5848dc45a3b 100644
--- a/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -84,6 +84,8 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
   uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
   switch (ID) {
     case DoubleRegsRegClassID:
+    case VecDblRegsRegClassID:
+    case VecDblRegs128BRegClassID:
       return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
                                    : BT::BitMask(RW, 2*RW-1);
     default:
@@ -95,30 +97,29 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
   llvm_unreachable("Unexpected register/subregister");
 }
 
-
 namespace {
-  struct RegisterRefs : public std::vector<BT::RegisterRef> {
-    typedef std::vector<BT::RegisterRef> Base;
-    RegisterRefs(const MachineInstr *MI);
-    const BT::RegisterRef &operator[](unsigned n) const {
-      // The main purpose of this operator is to assert with bad argument.
-      assert(n < size());
-      return Base::operator[](n);
-    }
-  };
+class RegisterRefs {
+  std::vector<BT::RegisterRef> Vector;
 
-  RegisterRefs::RegisterRefs(const MachineInstr *MI)
-    : Base(MI->getNumOperands()) {
-    for (unsigned i = 0, n = size(); i < n; ++i) {
+public:
+  RegisterRefs(const MachineInstr *MI) : Vector(MI->getNumOperands()) {
+    for (unsigned i = 0, n = Vector.size(); i < n; ++i) {
       const MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg())
-        at(i) = BT::RegisterRef(MO);
+        Vector[i] = BT::RegisterRef(MO);
       // For indices that don't correspond to registers, the entry will
       // remain constructed via the default constructor.
     }
   }
-}
 
+  size_t size() const { return Vector.size(); }
+  const BT::RegisterRef &operator[](unsigned n) const {
+    // The main purpose of this operator is to assert with bad argument.
+    assert(n < Vector.size());
+    return Vector[n];
+  }
+};
+}
 
 bool HexagonEvaluator::evaluate(const MachineInstr *MI,
       const CellMapType &Inputs, CellMapType &Outputs) const {
@@ -189,7 +190,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr *MI,
     return true;
   };
   // Get the cell corresponding to the N-th operand.
-  auto cop = [this,Reg,MI,Inputs] (unsigned N, uint16_t W)
+  auto cop = [this,&Reg,&MI,&Inputs] (unsigned N, uint16_t W)
         -> BT::RegisterCell {
     const MachineOperand &Op = MI->getOperand(N);
     if (Op.isImm())
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 3753b745657b..efafdd007289 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -102,7 +102,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
        MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
 
     // Traverse the basic block.
     MachineBasicBlock::iterator MII = MBB->getFirstTerminator();
@@ -186,13 +186,11 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
 
             if (case1 || case2) {
               InvertAndChangeJumpTarget(MI, UncondTarget);
-              MBB->removeSuccessor(JumpAroundTarget);
-              MBB->addSuccessor(UncondTarget);
+              MBB->replaceSuccessor(JumpAroundTarget, UncondTarget);
 
               // Remove the unconditional branch in LayoutSucc.
               LayoutSucc->erase(LayoutSucc->begin());
-              LayoutSucc->removeSuccessor(UncondTarget);
-              LayoutSucc->addSuccessor(JumpAroundTarget);
+              LayoutSucc->replaceSuccessor(UncondTarget, JumpAroundTarget);
 
               // This code performs the conversion for case 2, which moves
               // the block to the fall-thru case (BB3 in the code above).
@@ -210,16 +208,15 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
               // The live-in to LayoutSucc is now all values live-in to
               // JumpAroundTarget.
               //
-              std::vector<unsigned> OrigLiveIn(LayoutSucc->livein_begin(),
-                                               LayoutSucc->livein_end());
-              std::vector<unsigned> NewLiveIn(JumpAroundTarget->livein_begin(),
-                                              JumpAroundTarget->livein_end());
-              for (unsigned i = 0; i < OrigLiveIn.size(); ++i) {
-                LayoutSucc->removeLiveIn(OrigLiveIn[i]);
-              }
-              for (unsigned i = 0; i < NewLiveIn.size(); ++i) {
-                LayoutSucc->addLiveIn(NewLiveIn[i]);
-              }
+              std::vector<MachineBasicBlock::RegisterMaskPair> OrigLiveIn(
+                  LayoutSucc->livein_begin(), LayoutSucc->livein_end());
+              std::vector<MachineBasicBlock::RegisterMaskPair> NewLiveIn(
+                  JumpAroundTarget->livein_begin(),
+                  JumpAroundTarget->livein_end());
+              for (const auto &OrigLI : OrigLiveIn)
+                LayoutSucc->removeLiveIn(OrigLI.PhysReg);
+              for (const auto &NewLI : NewLiveIn)
+                LayoutSucc->addLiveIn(NewLI);
             }
           }
         }
diff --git a/lib/Target/Hexagon/HexagonCommonGEP.cpp b/lib/Target/Hexagon/HexagonCommonGEP.cpp
index 9f5fac156527..931db6687bf8 100644
--- a/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -59,30 +59,23 @@ namespace {
 
   // Numbering map for gep nodes. Used to keep track of ordering for
   // gep nodes.
-  struct NodeNumbering : public std::map<const GepNode*,unsigned> {
-  };
-
-  struct NodeOrdering : public NodeNumbering {
+  struct NodeOrdering {
     NodeOrdering() : LastNum(0) {}
-#ifdef _MSC_VER
-    void special_insert_for_special_msvc(const GepNode *N)
-#else
-    using NodeNumbering::insert;
-    void insert(const GepNode* N)
-#endif
-    {
-      insert(std::make_pair(N, ++LastNum));
-    }
-    bool operator() (const GepNode* N1, const GepNode *N2) const {
-      const_iterator F1 = find(N1), F2 = find(N2);
-      assert(F1 != end() && F2 != end());
+
+    void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
+    void clear() { Map.clear(); }
+
+    bool operator()(const GepNode *N1, const GepNode *N2) const {
+      auto F1 = Map.find(N1), F2 = Map.find(N2);
+      assert(F1 != Map.end() && F2 != Map.end());
       return F1->second < F2->second;
     }
+
   private:
+    std::map<const GepNode *, unsigned> Map;
     unsigned LastNum;
   };
 
-
   class HexagonCommonGEP : public FunctionPass {
   public:
     static char ID;
@@ -360,11 +353,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
     Us.insert(&UI.getUse());
   }
   Nodes.push_back(N);
-#ifdef _MSC_VER
-  NodeOrder.special_insert_for_special_msvc(N);
-#else
   NodeOrder.insert(N);
-#endif
 
   // Skip the first index operand, since we only handle 0. This dereferences
   // the pointer operand.
@@ -379,11 +368,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
     Nx->PTy = PtrTy;
     Nx->Idx = Op;
     Nodes.push_back(Nx);
-#ifdef _MSC_VER
-    NodeOrder.special_insert_for_special_msvc(Nx);
-#else
     NodeOrder.insert(Nx);
-#endif
     PN = Nx;
 
     PtrTy = next_type(PtrTy, Op);
@@ -404,7 +389,7 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
 void HexagonCommonGEP::collect() {
   // Establish depth-first traversal order of the dominator tree.
   ValueVect BO;
-  getBlockTraversalOrder(Fn->begin(), BO);
+  getBlockTraversalOrder(&Fn->front(), BO);
 
   // The creation of gep nodes requires DT-traversal. When processing a GEP
   // instruction that uses another GEP instruction as the base pointer, the
@@ -737,7 +722,7 @@ namespace {
       Instruction *In = cast<Instruction>(V);
       if (In->getParent() != B)
         continue;
-      BasicBlock::iterator It = In;
+      BasicBlock::iterator It = In->getIterator();
       if (std::distance(FirstUse, BEnd) < std::distance(It, BEnd))
         FirstUse = It;
     }
@@ -1135,7 +1120,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
     ArrayRef<Value*> A(IdxList, IdxC);
     Type *InpTy = Input->getType();
     Type *ElTy = cast<PointerType>(InpTy->getScalarType())->getElementType();
-    NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", At);
+    NewInst = GetElementPtrInst::Create(ElTy, Input, A, "cgep", &*At);
     DEBUG(dbgs() << "new GEP: " << *NewInst << '\n');
     Input = NewInst;
   } while (nax <= Num);
@@ -1213,7 +1198,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
       Last = Child;
     } while (true);
 
-    BasicBlock::iterator InsertAt = LastB->getTerminator();
+    BasicBlock::iterator InsertAt = LastB->getTerminator()->getIterator();
     if (LastUsed || LastCN > 0) {
       ValueVect Urs;
       getAllUsersForNode(Root, Urs, NCM);
diff --git a/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
new file mode 100644
index 000000000000..ee0c318ffb5d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -0,0 +1,1063 @@
+//===--- HexagonEarlyIfConv.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a Hexagon-specific if-conversion pass that runs on the
+// SSA form.
+// In SSA it is not straightforward to represent instructions that condi-
+// tionally define registers, since a conditionally-defined register may
+// only be used under the same condition on which the definition was based.
+// To avoid complications of this nature, this patch will only generate
+// predicated stores, and speculate other instructions from the "if-conver-
+// ted" block.
+// The code will recognize CFG patterns where a block with a conditional
+// branch "splits" into a "true block" and a "false block". Either of these
+// could be omitted (in case of a triangle, for example).
+// If after conversion of the side block(s) the CFG allows it, the resul-
+// ting blocks may be merged. If the "join" block contained PHI nodes, they
+// will be replaced with MUX (or MUX-like) instructions to maintain the
+// semantics of the PHI.
+//
+// Example:
+//
+//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+//         J2_jumpt %vreg41<kill>, <BB#5>, %PC<imp-def,dead>
+//         J2_jump <BB#4>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#4(62) BB#5(62)
+//
+// BB#4: derived from LLVM BB %if.then
+//     Predecessors according to CFG: BB#3
+//         %vreg11<def> = A2_addp %vreg6, %vreg10
+//         S2_storerd_io %vreg32, 16, %vreg11
+//     Successors according to CFG: BB#5
+//
+// BB#5: derived from LLVM BB %if.end
+//     Predecessors according to CFG: BB#3 BB#4
+//         %vreg12<def> = PHI %vreg6, <BB#3>, %vreg11, <BB#4>
+//         %vreg13<def> = A2_addp %vreg7, %vreg12
+//         %vreg42<def> = C2_cmpeqi %vreg9, 10
+//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+//         J2_jump <BB#6>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#6(4) BB#3(124)
+//
+// would become:
+//
+//         %vreg40<def> = L2_loadrub_io %vreg39<kill>, 1
+//         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
+// spec->  %vreg11<def> = A2_addp %vreg6, %vreg10
+// pred->  S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
+//         %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+//         %vreg13<def> = A2_addp %vreg7, %vreg46
+//         %vreg42<def> = C2_cmpeqi %vreg9, 10
+//         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
+//         J2_jump <BB#6>, %PC<imp-def,dead>
+//     Successors according to CFG: BB#6 BB#3
+
+#define DEBUG_TYPE "hexagon-eif"
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "HexagonTargetMachine.h"
+
+#include <functional>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonEarlyIfConversion();
+  void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
+}
+
+namespace {
+  cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
+    cl::init(false), cl::desc("Enable branch probability info"));
+  cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
+    cl::desc("Size limit in Hexagon early if-conversion"));
+
+  struct PrintMB {
+    PrintMB(const MachineBasicBlock *B) : MB(B) {}
+    const MachineBasicBlock *MB;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const PrintMB &P) {
+    if (!P.MB)
+      return OS << "<none>";
+    return OS << '#' << P.MB->getNumber();
+  }
+
+  struct FlowPattern {
+    FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+    FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
+          MachineBasicBlock *FB, MachineBasicBlock *JB)
+      : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
+
+    MachineBasicBlock *SplitB;
+    MachineBasicBlock *TrueB, *FalseB, *JoinB;
+    unsigned PredR;
+  };
+  struct PrintFP {
+    PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
+      : FP(P), TRI(T) {}
+    const FlowPattern &FP;
+    const TargetRegisterInfo &TRI;
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
+  };
+  raw_ostream &operator<<(raw_ostream &OS,
+                          const PrintFP &P) LLVM_ATTRIBUTE_UNUSED;
+  raw_ostream &operator<<(raw_ostream &OS, const PrintFP &P) {
+    OS << "{ SplitB:" << PrintMB(P.FP.SplitB)
+       << ", PredR:" << PrintReg(P.FP.PredR, &P.TRI)
+       << ", TrueB:" << PrintMB(P.FP.TrueB) << ", FalseB:"
+       << PrintMB(P.FP.FalseB)
+       << ", JoinB:" << PrintMB(P.FP.JoinB) << " }";
+    return OS;
+  }
+
+  class HexagonEarlyIfConversion : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonEarlyIfConversion() : MachineFunctionPass(ID),
+        TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+      initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon early if conversion";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineBranchProbabilityInfo>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    typedef DenseSet<MachineBasicBlock*> BlockSetType;
+
+    bool isPreheader(const MachineBasicBlock *B) const;
+    bool matchFlowPattern(MachineBasicBlock *B, MachineLoop *L,
+          FlowPattern &FP);
+    bool visitBlock(MachineBasicBlock *B, MachineLoop *L);
+    bool visitLoop(MachineLoop *L);
+
+    bool hasEHLabel(const MachineBasicBlock *B) const;
+    bool hasUncondBranch(const MachineBasicBlock *B) const;
+    bool isValidCandidate(const MachineBasicBlock *B) const;
+    bool usesUndefVReg(const MachineInstr *MI) const;
+    bool isValid(const FlowPattern &FP) const;
+    unsigned countPredicateDefs(const MachineBasicBlock *B) const;
+    unsigned computePhiCost(MachineBasicBlock *B) const;
+    bool isProfitable(const FlowPattern &FP) const;
+    bool isPredicableStore(const MachineInstr *MI) const;
+    bool isSafeToSpeculate(const MachineInstr *MI) const;
+
+    unsigned getCondStoreOpcode(unsigned Opc, bool IfTrue) const;
+    void predicateInstr(MachineBasicBlock *ToB, MachineBasicBlock::iterator At,
+          MachineInstr *MI, unsigned PredR, bool IfTrue);
+    void predicateBlockNB(MachineBasicBlock *ToB,
+          MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+          unsigned PredR, bool IfTrue);
+
+    void updatePhiNodes(MachineBasicBlock *WhereB, const FlowPattern &FP);
+    void convert(const FlowPattern &FP);
+
+    void removeBlock(MachineBasicBlock *B);
+    void eliminatePhis(MachineBasicBlock *B);
+    void replacePhiEdges(MachineBasicBlock *OldB, MachineBasicBlock *NewB);
+    void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
+    void simplifyFlowGraph(const FlowPattern &FP);
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineFunction *MFN;
+    MachineRegisterInfo *MRI;
+    MachineDominatorTree *MDT;
+    MachineLoopInfo *MLI;
+    BlockSetType Deleted;
+    const MachineBranchProbabilityInfo *MBPI;
+  };
+
+  char HexagonEarlyIfConversion::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
+  "Hexagon early if conversion", false, false)
+
+bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
+  if (B->succ_size() != 1)
+    return false;
+  MachineBasicBlock *SB = *B->succ_begin();
+  MachineLoop *L = MLI->getLoopFor(SB);
+  return L && SB == L->getHeader();
+}
+
+
+bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
+    MachineLoop *L, FlowPattern &FP) {
+  DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
+
+  // Interested only in conditional branches, no .new, no new-value, etc.
+  // Check the terminators directly, it's easier than handling all responses
+  // from AnalyzeBranch.
+  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
+  if (T1I == B->end())
+    return false;
+  unsigned Opc = T1I->getOpcode();
+  if (Opc != Hexagon::J2_jumpt && Opc != Hexagon::J2_jumpf)
+    return false;
+  unsigned PredR = T1I->getOperand(0).getReg();
+
+  // Get the layout successor, or 0 if B does not have one.
+  MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
+  MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+
+  MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
+  MachineBasicBlock::const_iterator T2I = std::next(T1I);
+  // The second terminator should be an unconditional branch.
+  assert(T2I == B->end() || T2I->getOpcode() == Hexagon::J2_jump);
+  MachineBasicBlock *T2B = (T2I == B->end()) ? NextB
+                                             : T2I->getOperand(0).getMBB();
+  if (T1B == T2B) {
+    // XXX merge if T1B == NextB, or convert branch to unconditional.
+    // mark as diamond with both sides equal?
+    return false;
+  }
+  // Loop could be null for both.
+  if (MLI->getLoopFor(T1B) != L || MLI->getLoopFor(T2B) != L)
+    return false;
+
+  // Record the true/false blocks in such a way that "true" means "if (PredR)",
+  // and "false" means "if (!PredR)".
+  if (Opc == Hexagon::J2_jumpt)
+    TB = T1B, FB = T2B;
+  else
+    TB = T2B, FB = T1B;
+
+  if (!MDT->properlyDominates(B, TB) || !MDT->properlyDominates(B, FB))
+    return false;
+
+  // Detect triangle first. In case of a triangle, one of the blocks TB/FB
+  // can fall through into the other, in other words, it will be executed
+  // in both cases. We only want to predicate the block that is executed
+  // conditionally.
+  unsigned TNP = TB->pred_size(), FNP = FB->pred_size();
+  unsigned TNS = TB->succ_size(), FNS = FB->succ_size();
+
+  // A block is predicable if it has one predecessor (it must be B), and
+  // it has a single successor. In fact, the block has to end either with
+  // an unconditional branch (which can be predicated), or with a fall-
+  // through.
+  bool TOk = (TNP == 1) && (TNS == 1);
+  bool FOk = (FNP == 1) && (FNS == 1);
+
+  // If neither is predicable, there is nothing interesting.
+  if (!TOk && !FOk)
+    return false;
+
+  MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
+  MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
+  MachineBasicBlock *JB = 0;
+
+  if (TOk) {
+    if (FOk) {
+      if (TSB == FSB)
+        JB = TSB;
+      // Diamond: "if (P) then TB; else FB;".
+    } else {
+      // TOk && !FOk
+      if (TSB == FB) {
+        JB = FB;
+        FB = 0;
+      }
+    }
+  } else {
+    // !TOk && FOk  (at least one must be true by now).
+    if (FSB == TB) {
+      JB = TB;
+      TB = 0;
+    }
+  }
+  // Don't try to predicate loop preheaders.
+  if ((TB && isPreheader(TB)) || (FB && isPreheader(FB))) {
+    DEBUG(dbgs() << "One of blocks " << PrintMB(TB) << ", " << PrintMB(FB)
+                 << " is a loop preheader. Skipping.\n");
+    return false;
+  }
+
+  FP = FlowPattern(B, PredR, TB, FB, JB);
+  DEBUG(dbgs() << "Detected " << PrintFP(FP, *TRI) << "\n");
+  return true;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
+// contains EH_LABEL.
+bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
+  for (auto &I : *B)
+    if (I.isEHLabel())
+      return true;
+  return false;
+}
+
+
+// KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
+// that a block can never fall-through.
+bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
+      const {
+  MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+  while (I != E) {
+    if (I->isBarrier())
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
+      const {
+  if (!B)
+    return true;
+  if (B->isEHPad() || B->hasAddressTaken())
+    return false;
+  if (B->succ_size() == 0)
+    return false;
+
+  for (auto &MI : *B) {
+    if (MI.isDebugValue())
+      continue;
+    if (MI.isConditionalBranch())
+      return false;
+    unsigned Opc = MI.getOpcode();
+    bool IsJMP = (Opc == Hexagon::J2_jump);
+    if (!isPredicableStore(&MI) && !IsJMP && !isSafeToSpeculate(&MI))
+      return false;
+    // Look for predicate registers defined by this instruction. It's ok
+    // to speculate such an instruction, but the predicate register cannot
+    // be used outside of this block (or else it won't be possible to
+    // update the use of it after predication). PHI uses will be updated
+    // to use a result of a MUX, and a MUX cannot be created for predicate
+    // registers.
+    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned R = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R))
+        continue;
+      if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
+        continue;
+      for (auto U = MRI->use_begin(R); U != MRI->use_end(); ++U)
+        if (U->getParent()->isPHI())
+          return false;
+    }
+  }
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
+  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
+    if (!MO->isReg() || !MO->isUse())
+      continue;
+    unsigned R = MO->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    const MachineInstr *DefI = MRI->getVRegDef(R);
+    // "Undefined" virtual registers are actually defined via IMPLICIT_DEF.
+    assert(DefI && "Expecting a reaching def in MRI");
+    if (DefI->isImplicitDef())
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
+  if (hasEHLabel(FP.SplitB))  // KLUDGE: see function definition
+    return false;
+  if (FP.TrueB && !isValidCandidate(FP.TrueB))
+    return false;
+  if (FP.FalseB && !isValidCandidate(FP.FalseB))
+    return false;
+  // Check the PHIs in the join block. If any of them use a register
+  // that is defined as IMPLICIT_DEF, do not convert this. This can
+  // legitimately happen if one side of the split never executes, but
+  // the compiler is unable to prove it. That side may then seem to
+  // provide an "undef" value to the join block, however it will never
+  // execute at run-time. If we convert this case, the "undef" will
+  // be used in a MUX instruction, and that may seem like actually
+  // using an undefined value to other optimizations. This could lead
+  // to trouble further down the optimization stream, cause assertions
+  // to fail, etc.
+  if (FP.JoinB) {
+    const MachineBasicBlock &B = *FP.JoinB;
+    for (auto &MI : B) {
+      if (!MI.isPHI())
+        break;
+      if (usesUndefVReg(&MI))
+        return false;
+      unsigned DefR = MI.getOperand(0).getReg();
+      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+      if (RC == &Hexagon::PredRegsRegClass)
+        return false;
+    }
+  }
+  return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
+  assert(B->pred_size() <= 2);
+  if (B->pred_size() < 2)
+    return 0;
+
+  unsigned Cost = 0;
+  MachineBasicBlock::const_iterator I, E = B->getFirstNonPHI();
+  for (I = B->begin(); I != E; ++I) {
+    const MachineOperand &RO1 = I->getOperand(1);
+    const MachineOperand &RO3 = I->getOperand(3);
+    assert(RO1.isReg() && RO3.isReg());
+    // Must have a MUX if the phi uses a subregister.
+    if (RO1.getSubReg() != 0 || RO3.getSubReg() != 0) {
+      Cost++;
+      continue;
+    }
+    MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
+    MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
+    if (!TII->isPredicable(Def1) || !TII->isPredicable(Def3))
+      Cost++;
+  }
+  return Cost;
+}
+
+
+unsigned HexagonEarlyIfConversion::countPredicateDefs(
+      const MachineBasicBlock *B) const {
+  unsigned PredDefs = 0;
+  for (auto &MI : *B) {
+    for (ConstMIOperands MO(&MI); MO.isValid(); ++MO) {
+      if (!MO->isReg() || !MO->isDef())
+        continue;
+      unsigned R = MO->getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(R))
+        continue;
+      if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
+        PredDefs++;
+    }
+  }
+  return PredDefs;
+}
+
+
+bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
+  if (FP.TrueB && FP.FalseB) {
+
+    // Do not IfCovert if the branch is one sided.
+    if (MBPI) {
+      BranchProbability Prob(9, 10);
+      if (MBPI->getEdgeProbability(FP.SplitB, FP.TrueB) > Prob)
+        return false;
+      if (MBPI->getEdgeProbability(FP.SplitB, FP.FalseB) > Prob)
+        return false;
+    }
+
+    // If both sides are predicable, convert them if they join, and the
+    // join block has no other predecessors.
+    MachineBasicBlock *TSB = *FP.TrueB->succ_begin();
+    MachineBasicBlock *FSB = *FP.FalseB->succ_begin();
+    if (TSB != FSB)
+      return false;
+    if (TSB->pred_size() != 2)
+      return false;
+  }
+
+  // Calculate the total size of the predicated blocks.
+  // Assume instruction counts without branches to be the approximation of
+  // the code size. If the predicated blocks are smaller than a packet size,
+  // approximate the spare room in the packet that could be filled with the
+  // predicated/speculated instructions.
+  unsigned TS = 0, FS = 0, Spare = 0;
+  if (FP.TrueB) {
+    TS = std::distance(FP.TrueB->begin(), FP.TrueB->getFirstTerminator());
+    if (TS < HEXAGON_PACKET_SIZE)
+      Spare += HEXAGON_PACKET_SIZE-TS;
+  }
+  if (FP.FalseB) {
+    FS = std::distance(FP.FalseB->begin(), FP.FalseB->getFirstTerminator());
+    if (FS < HEXAGON_PACKET_SIZE)
+      Spare += HEXAGON_PACKET_SIZE-TS;
+  }
+  unsigned TotalIn = TS+FS;
+  DEBUG(dbgs() << "Total number of instructions to be predicated/speculated: "
+               << TotalIn << ", spare room: " << Spare << "\n");
+  if (TotalIn >= SizeLimit+Spare)
+    return false;
+
+  // Count the number of PHI nodes that will need to be updated (converted
+  // to MUX). Those can be later converted to predicated instructions, so
+  // they aren't always adding extra cost.
+  // KLUDGE: Also, count the number of predicate register definitions in
+  // each block. The scheduler may increase the pressure of these and cause
+  // expensive spills (e.g. bitmnp01).
+  unsigned TotalPh = 0;
+  unsigned PredDefs = countPredicateDefs(FP.SplitB);
+  if (FP.JoinB) {
+    TotalPh = computePhiCost(FP.JoinB);
+    PredDefs += countPredicateDefs(FP.JoinB);
+  } else {
+    if (FP.TrueB && FP.TrueB->succ_size() > 0) {
+      MachineBasicBlock *SB = *FP.TrueB->succ_begin();
+      TotalPh += computePhiCost(SB);
+      PredDefs += countPredicateDefs(SB);
+    }
+    if (FP.FalseB && FP.FalseB->succ_size() > 0) {
+      MachineBasicBlock *SB = *FP.FalseB->succ_begin();
+      TotalPh += computePhiCost(SB);
+      PredDefs += countPredicateDefs(SB);
+    }
+  }
+  DEBUG(dbgs() << "Total number of extra muxes from converted phis: "
+               << TotalPh << "\n");
+  if (TotalIn+TotalPh >= SizeLimit+Spare)
+    return false;
+
+  DEBUG(dbgs() << "Total number of predicate registers: " << PredDefs << "\n");
+  if (PredDefs > 4)
+    return false;
+
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
+      MachineLoop *L) {
+  bool Changed = false;
+
+  // Visit all dominated blocks from the same loop first, then process B.
+  MachineDomTreeNode *N = MDT->getNode(B);
+  typedef GraphTraits<MachineDomTreeNode*> GTN;
+  // We will change CFG/DT during this traversal, so take precautions to
+  // avoid problems related to invalidated iterators. In fact, processing
+  // a child C of B cannot cause another child to be removed, but it can
+  // cause a new child to be added (which was a child of C before C itself
+  // was removed. This new child C, however, would have been processed
+  // prior to processing B, so there is no need to process it again.
+  // Simply keep a list of children of B, and traverse that list.
+  typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+  DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+  for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+    MachineBasicBlock *SB = (*I)->getBlock();
+    if (!Deleted.count(SB))
+      Changed |= visitBlock(SB, L);
+  }
+  // When walking down the dominator tree, we want to traverse through
+  // blocks from nested (other) loops, because they can dominate blocks
+  // that are in L. Skip the non-L blocks only after the tree traversal.
+  if (MLI->getLoopFor(B) != L)
+    return Changed;
+
+  FlowPattern FP;
+  if (!matchFlowPattern(B, L, FP))
+    return Changed;
+
+  if (!isValid(FP)) {
+    DEBUG(dbgs() << "Conversion is not valid\n");
+    return Changed;
+  }
+  if (!isProfitable(FP)) {
+    DEBUG(dbgs() << "Conversion is not profitable\n");
+    return Changed;
+  }
+
+  convert(FP);
+  simplifyFlowGraph(FP);
+  return true;
+}
+
+
+bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
+  MachineBasicBlock *HB = L ? L->getHeader() : 0;
+  DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
+           : dbgs() << "Visiting function") << "\n");
+  bool Changed = false;
+  if (L) {
+    for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+      Changed |= visitLoop(*I);
+  }
+
+  MachineBasicBlock *EntryB = GraphTraits<MachineFunction*>::getEntryNode(MFN);
+  Changed |= visitBlock(L ? HB : EntryB, L);
+  return Changed;
+}
+
+
+bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
+      const {
+  // Exclude post-increment stores. Those return a value, so we cannot
+  // predicate them.
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+  switch (Opc) {
+    // Store byte:
+    case S2_storerb_io: case S4_storerb_rr:
+    case S2_storerbabs: case S4_storeirb_io:  case S2_storerbgp:
+    // Store halfword:
+    case S2_storerh_io: case S4_storerh_rr:
+    case S2_storerhabs: case S4_storeirh_io:  case S2_storerhgp:
+    // Store upper halfword:
+    case S2_storerf_io: case S4_storerf_rr:
+    case S2_storerfabs: case S2_storerfgp:
+    // Store word:
+    case S2_storeri_io: case S4_storeri_rr:
+    case S2_storeriabs: case S4_storeiri_io:  case S2_storerigp:
+    // Store doubleword:
+    case S2_storerd_io: case S4_storerd_rr:
+    case S2_storerdabs: case S2_storerdgp:
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
+      const {
+  if (MI->mayLoad() || MI->mayStore())
+    return false;
+  if (MI->isCall() || MI->isBarrier() || MI->isBranch())
+    return false;
+  if (MI->hasUnmodeledSideEffects())
+    return false;
+
+  return true;
+}
+
+
+unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
+      bool IfTrue) const {
+  // Exclude post-increment stores.
+  using namespace Hexagon;
+  switch (Opc) {
+    case S2_storerb_io:
+      return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
+    case S4_storerb_rr:
+      return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
+    case S2_storerbabs:
+    case S2_storerbgp:
+      return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
+    case S4_storeirb_io:
+      return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
+    case S2_storerh_io:
+      return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
+    case S4_storerh_rr:
+      return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
+    case S2_storerhabs:
+    case S2_storerhgp:
+      return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
+    case S2_storerf_io:
+      return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
+    case S4_storerf_rr:
+      return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
+    case S2_storerfabs:
+    case S2_storerfgp:
+      return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
+    case S4_storeirh_io:
+      return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
+    case S2_storeri_io:
+      return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
+    case S4_storeri_rr:
+      return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
+    case S2_storeriabs:
+    case S2_storerigp:
+      return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
+    case S4_storeiri_io:
+      return IfTrue ? S4_storeirit_io : S4_storeirif_io;
+    case S2_storerd_io:
+      return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
+    case S4_storerd_rr:
+      return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
+    case S2_storerdabs:
+    case S2_storerdgp:
+      return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
+  }
+  llvm_unreachable("Unexpected opcode");
+  return 0;
+}
+
+
+void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
+      MachineBasicBlock::iterator At, MachineInstr *MI,
+      unsigned PredR, bool IfTrue) {
+  DebugLoc DL;
+  if (At != ToB->end())
+    DL = At->getDebugLoc();
+  else if (!ToB->empty())
+    DL = ToB->back().getDebugLoc();
+
+  unsigned Opc = MI->getOpcode();
+
+  if (isPredicableStore(MI)) {
+    unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
+    assert(COpc);
+    MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
+      .addReg(PredR);
+    for (MIOperands MO(MI); MO.isValid(); ++MO)
+      MIB.addOperand(*MO);
+
+    // Set memory references.
+    MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+    MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+    MIB.setMemRefs(MMOBegin, MMOEnd);
+
+    MI->eraseFromParent();
+    return;
+  }
+
+  if (Opc == Hexagon::J2_jump) {
+    MachineBasicBlock *TB = MI->getOperand(0).getMBB();
+    const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+                                           : Hexagon::J2_jumpf);
+    BuildMI(*ToB, At, DL, D)
+      .addReg(PredR)
+      .addMBB(TB);
+    MI->eraseFromParent();
+    return;
+  }
+
+  // Print the offending instruction unconditionally as we are about to
+  // abort.
+  dbgs() << *MI;
+  llvm_unreachable("Unexpected instruction");
+}
+
+
+// Predicate/speculate non-branch instructions from FromB into block ToB.
+// Leave the branches alone, they will be handled later. Btw, at this point
+// FromB should have at most one branch, and it should be unconditional.
+void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
+      MachineBasicBlock::iterator At, MachineBasicBlock *FromB,
+      unsigned PredR, bool IfTrue) {
+  DEBUG(dbgs() << "Predicating block " << PrintMB(FromB) << "\n");
+  MachineBasicBlock::iterator End = FromB->getFirstTerminator();
+  MachineBasicBlock::iterator I, NextI;
+
+  for (I = FromB->begin(); I != End; I = NextI) {
+    assert(!I->isPHI());
+    NextI = std::next(I);
+    if (isSafeToSpeculate(&*I))
+      ToB->splice(At, FromB, I);
+    else
+      predicateInstr(ToB, At, &*I, PredR, IfTrue);
+  }
+}
+
+
+void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
+      const FlowPattern &FP) {
+  // Visit all PHI nodes in the WhereB block and generate MUX instructions
+  // in the split block. Update the PHI nodes with the values of the MUX.
+  auto NonPHI = WhereB->getFirstNonPHI();
+  for (auto I = WhereB->begin(); I != NonPHI; ++I) {
+    MachineInstr *PN = &*I;
+    // Registers and subregisters corresponding to TrueB, FalseB and SplitB.
+    unsigned TR = 0, TSR = 0, FR = 0, FSR = 0, SR = 0, SSR = 0;
+    for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
+      const MachineOperand &RO = PN->getOperand(i), &BO = PN->getOperand(i+1);
+      if (BO.getMBB() == FP.SplitB)
+        SR = RO.getReg(), SSR = RO.getSubReg();
+      else if (BO.getMBB() == FP.TrueB)
+        TR = RO.getReg(), TSR = RO.getSubReg();
+      else if (BO.getMBB() == FP.FalseB)
+        FR = RO.getReg(), FSR = RO.getSubReg();
+      else
+        continue;
+      PN->RemoveOperand(i+1);
+      PN->RemoveOperand(i);
+    }
+    if (TR == 0)
+      TR = SR, TSR = SSR;
+    else if (FR == 0)
+      FR = SR, FSR = SSR;
+    assert(TR && FR);
+
+    using namespace Hexagon;
+    unsigned DR = PN->getOperand(0).getReg();
+    const TargetRegisterClass *RC = MRI->getRegClass(DR);
+    const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
+                                                  : TII->get(MUX64_rr);
+
+    MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
+    DebugLoc DL;
+    if (MuxAt != FP.SplitB->end())
+      DL = MuxAt->getDebugLoc();
+    unsigned MuxR = MRI->createVirtualRegister(RC);
+    BuildMI(*FP.SplitB, MuxAt, DL, D, MuxR)
+      .addReg(FP.PredR)
+      .addReg(TR, 0, TSR)
+      .addReg(FR, 0, FSR);
+
+    PN->addOperand(MachineOperand::CreateReg(MuxR, false));
+    PN->addOperand(MachineOperand::CreateMBB(FP.SplitB));
+  }
+}
+
+
+void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
+  MachineBasicBlock *TSB = 0, *FSB = 0;
+  MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
+  assert(OldTI != FP.SplitB->end());
+  DebugLoc DL = OldTI->getDebugLoc();
+
+  if (FP.TrueB) {
+    TSB = *FP.TrueB->succ_begin();
+    predicateBlockNB(FP.SplitB, OldTI, FP.TrueB, FP.PredR, true);
+  }
+  if (FP.FalseB) {
+    FSB = *FP.FalseB->succ_begin();
+    MachineBasicBlock::iterator At = FP.SplitB->getFirstTerminator();
+    predicateBlockNB(FP.SplitB, At, FP.FalseB, FP.PredR, false);
+  }
+
+  // Regenerate new terminators in the split block and update the successors.
+  // First, remember any information that may be needed later and remove the
+  // existing terminators/successors from the split block.
+  MachineBasicBlock *SSB = 0;
+  FP.SplitB->erase(OldTI, FP.SplitB->end());
+  while (FP.SplitB->succ_size() > 0) {
+    MachineBasicBlock *T = *FP.SplitB->succ_begin();
+    // It's possible that the split block had a successor that is not a pre-
+    // dicated block. This could only happen if there was only one block to
+    // be predicated. Example:
+    //   split_b:
+    //     if (p) jump true_b
+    //     jump unrelated2_b
+    //   unrelated1_b:
+    //     ...
+    //   unrelated2_b:  ; can have other predecessors, so it's not "false_b"
+    //     jump other_b
+    //   true_b:        ; only reachable from split_b, can be predicated
+    //     ...
+    //
+    // Find this successor (SSB) if it exists.
+    if (T != FP.TrueB && T != FP.FalseB) {
+      assert(!SSB);
+      SSB = T;
+    }
+    FP.SplitB->removeSuccessor(FP.SplitB->succ_begin());
+  }
+
+  // Insert new branches and update the successors of the split block. This
+  // may create unconditional branches to the layout successor, etc., but
+  // that will be cleaned up later. For now, make sure that correct code is
+  // generated.
+  if (FP.JoinB) {
+    assert(!SSB || SSB == FP.JoinB);
+    BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+      .addMBB(FP.JoinB);
+    FP.SplitB->addSuccessor(FP.JoinB);
+  } else {
+    bool HasBranch = false;
+    if (TSB) {
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+        .addReg(FP.PredR)
+        .addMBB(TSB);
+      FP.SplitB->addSuccessor(TSB);
+      HasBranch = true;
+    }
+    if (FSB) {
+      const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
+                                       : TII->get(Hexagon::J2_jumpf);
+      MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
+      if (!HasBranch)
+        MIB.addReg(FP.PredR);
+      MIB.addMBB(FSB);
+      FP.SplitB->addSuccessor(FSB);
+    }
+    if (SSB) {
+      // This cannot happen if both TSB and FSB are set. [TF]SB are the
+      // successor blocks of the TrueB and FalseB (or null of the TrueB
+      // or FalseB block is null). SSB is the potential successor block
+      // of the SplitB that is neither TrueB nor FalseB.
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+        .addMBB(SSB);
+      FP.SplitB->addSuccessor(SSB);
+    }
+  }
+
+  // What is left to do is to update the PHI nodes that could have entries
+  // referring to predicated blocks.
+  if (FP.JoinB) {
+    updatePhiNodes(FP.JoinB, FP);
+  } else {
+    if (TSB)
+      updatePhiNodes(TSB, FP);
+    if (FSB)
+      updatePhiNodes(FSB, FP);
+    // Nothing to update in SSB, since SSB's predecessors haven't changed.
+  }
+}
+
+
+void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
+  DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
+
+  // Transfer the immediate dominator information from B to its descendants.
+  MachineDomTreeNode *N = MDT->getNode(B);
+  MachineDomTreeNode *IDN = N->getIDom();
+  if (IDN) {
+    MachineBasicBlock *IDB = IDN->getBlock();
+    typedef GraphTraits<MachineDomTreeNode*> GTN;
+    typedef SmallVector<MachineDomTreeNode*,4> DTNodeVectType;
+    DTNodeVectType Cn(GTN::child_begin(N), GTN::child_end(N));
+    for (DTNodeVectType::iterator I = Cn.begin(), E = Cn.end(); I != E; ++I) {
+      MachineBasicBlock *SB = (*I)->getBlock();
+      MDT->changeImmediateDominator(SB, IDB);
+    }
+  }
+
+  while (B->succ_size() > 0)
+    B->removeSuccessor(B->succ_begin());
+
+  for (auto I = B->pred_begin(), E = B->pred_end(); I != E; ++I)
+    (*I)->removeSuccessor(B, true);
+
+  Deleted.insert(B);
+  MDT->eraseNode(B);
+  MFN->erase(B->getIterator());
+}
+
+
+void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
+  DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
+  MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
+  for (I = B->begin(); I != NonPHI; I = NextI) {
+    NextI = std::next(I);
+    MachineInstr *PN = &*I;
+    assert(PN->getNumOperands() == 3 && "Invalid phi node");
+    MachineOperand &UO = PN->getOperand(1);
+    unsigned UseR = UO.getReg(), UseSR = UO.getSubReg();
+    unsigned DefR = PN->getOperand(0).getReg();
+    unsigned NewR = UseR;
+    if (UseSR) {
+      // MRI.replaceVregUsesWith does not allow to update the subregister,
+      // so instead of doing the use-iteration here, create a copy into a
+      // "non-subregistered" register.
+      DebugLoc DL = PN->getDebugLoc();
+      const TargetRegisterClass *RC = MRI->getRegClass(DefR);
+      NewR = MRI->createVirtualRegister(RC);
+      NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+        .addReg(UseR, 0, UseSR);
+    }
+    MRI->replaceRegWith(DefR, NewR);
+    B->erase(I);
+  }
+}
+
+
+void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
+      MachineBasicBlock *NewB) {
+  for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *SB = *I;
+    MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
+    for (P = SB->begin(); P != N; ++P) {
+      MachineInstr *PN = &*P;
+      for (MIOperands MO(PN); MO.isValid(); ++MO)
+        if (MO->isMBB() && MO->getMBB() == OldB)
+          MO->setMBB(NewB);
+    }
+  }
+}
+
+
+void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
+      MachineBasicBlock *SuccB) {
+  DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
+               << PrintMB(SuccB) << "\n");
+  bool TermOk = hasUncondBranch(SuccB);
+  eliminatePhis(SuccB);
+  TII->RemoveBranch(*PredB);
+  PredB->removeSuccessor(SuccB);
+  PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
+  MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
+  for (I = SuccB->succ_begin(); I != E; ++I)
+    PredB->addSuccessor(*I);
+  PredB->normalizeSuccProbs();
+  replacePhiEdges(SuccB, PredB);
+  removeBlock(SuccB);
+  if (!TermOk)
+    PredB->updateTerminator();
+}
+
+
+void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
+  if (FP.TrueB)
+    removeBlock(FP.TrueB);
+  if (FP.FalseB)
+    removeBlock(FP.FalseB);
+
+  FP.SplitB->updateTerminator();
+  if (FP.SplitB->succ_size() != 1)
+    return;
+
+  MachineBasicBlock *SB = *FP.SplitB->succ_begin();
+  if (SB->pred_size() != 1)
+    return;
+
+  // By now, the split block has only one successor (SB), and SB has only
+  // one predecessor. We can try to merge them. We will need to update ter-
+  // minators in FP.Split+SB, and that requires working AnalyzeBranch, which
+  // fails on Hexagon for blocks that have EH_LABELs. However, if SB ends
+  // with an unconditional branch, we won't need to touch the terminators.
+  if (!hasEHLabel(SB) || hasUncondBranch(SB))
+    mergeBlocks(FP.SplitB, SB);
+}
+
+
+bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
+  auto &ST = MF.getSubtarget();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  MFN = &MF;
+  MRI = &MF.getRegInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+    nullptr;
+
+  Deleted.clear();
+  bool Changed = false;
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
+    Changed |= visitLoop(*I);
+  Changed |= visitLoop(0);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+FunctionPass *llvm::createHexagonEarlyIfConversion() {
+  return new HexagonEarlyIfConversion();
+}
+
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index e4c8d8f7b28c..6e2dbc06b124 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -74,7 +74,7 @@ bool HexagonExpandPredSpillCode::runOnMachineFunction(MachineFunction &Fn) {
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
        MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
     // Traverse the basic block.
     for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
          ++MII) {
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 21a8996b1159..7a52a1c9eaec 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -147,6 +147,48 @@ static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
     cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
     "shrink-wraps"));
 
+static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
+    cl::Hidden, cl::desc("Use allocframe more conservatively"));
+
+
+namespace llvm {
+  void initializeHexagonCallFrameInformationPass(PassRegistry&);
+  FunctionPass *createHexagonCallFrameInformation();
+}
+
+namespace {
+  class HexagonCallFrameInformation : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonCallFrameInformation() : MachineFunctionPass(ID) {
+      PassRegistry &PR = *PassRegistry::getPassRegistry();
+      initializeHexagonCallFrameInformationPass(PR);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+  };
+
+  char HexagonCallFrameInformation::ID = 0;
+}
+
+bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
+  auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+  bool NeedCFI = MF.getMMI().hasDebugInfo() ||
+                 MF.getFunction()->needsUnwindTableEntry();
+
+  if (!NeedCFI)
+    return false;
+  HFI.insertCFIInstructions(MF);
+  return true;
+}
+
+INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
+                "Hexagon call frame information", false, false)
+
+FunctionPass *llvm::createHexagonCallFrameInformation() {
+  return new HexagonCallFrameInformation();
+}
+
+
 namespace {
   /// Map a register pair Reg to the subregister that has the greater "number",
   /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
@@ -370,11 +412,11 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
     insertEpilogueInBlock(*EpilogB);
   } else {
     for (auto &B : MF)
-      if (!B.empty() && B.back().isReturn())
+      if (B.isReturnBlock())
         insertCSRRestoresInBlock(B, CSI, HRI);
 
     for (auto &B : MF)
-      if (!B.empty() && B.back().isReturn())
+      if (B.isReturnBlock())
         insertEpilogueInBlock(B);
   }
 }
@@ -383,10 +425,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
 void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  MachineBasicBlock::iterator MBBI = MBB.begin();
-  auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
-  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HII = *HST.getInstrInfo();
   auto &HRI = *HST.getRegisterInfo();
   DebugLoc dl;
@@ -405,10 +444,6 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
 
   bool AlignStack = (MaxAlign > getStackAlignment());
 
-  // Check if frame moves are needed for EH.
-  bool needsFrameMoves = MMI.hasDebugInfo() ||
-    MF.getFunction()->needsUnwindTableEntry();
-
   // Get the number of bytes to allocate from the FrameInfo.
   unsigned NumBytes = MFI->getStackSize();
   unsigned SP = HRI.getStackRegister();
@@ -424,14 +459,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
     MI->eraseFromParent();
   }
 
-  //
-  // Only insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
-  // that this shouldn't be required, but doing so now because gcc does and
-  // gdb can't break at the start of the function without it.  Will remove if
-  // this turns out to be a gdb bug.
-  //
-  bool NoOpt = (HTM.getOptLevel() == CodeGenOpt::None);
-  if (!NoOpt && !FuncInfo->hasClobberLR() && !hasFP(MF))
+  if (!hasFP(MF))
     return;
 
   // Check for overflow.
@@ -469,92 +497,11 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB) const {
         .addReg(SP)
         .addImm(-int64_t(MaxAlign));
   }
-
-  if (needsFrameMoves) {
-    std::vector<MCCFIInstruction> Instructions = MMI.getFrameInstructions();
-    MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
-
-    // Advance CFA. DW_CFA_def_cfa
-    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
-    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
-
-    // CFA = FP + 8
-    unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
-                                               FrameLabel, DwFPReg, -8));
-    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-           .addCFIIndex(CFIIndex);
-
-    // R31 (return addr) = CFA - #4
-    CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-                                               FrameLabel, DwRAReg, -4));
-    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-           .addCFIIndex(CFIIndex);
-
-    // R30 (frame ptr) = CFA - #8)
-    CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-                                               FrameLabel, DwFPReg, -8));
-    BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-           .addCFIIndex(CFIIndex);
-
-    unsigned int regsToMove[] = {
-      Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
-      Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
-      Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
-      Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
-      Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,  Hexagon::D10,
-      Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::NoRegister
-    };
-
-    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
-
-    for (unsigned i = 0; regsToMove[i] != Hexagon::NoRegister; ++i) {
-      for (unsigned I = 0, E = CSI.size(); I < E; ++I) {
-        if (CSI[I].getReg() == regsToMove[i]) {
-          // Subtract 8 to make room for R30 and R31, which are added above.
-          int64_t Offset = getFrameIndexOffset(MF, CSI[I].getFrameIdx()) - 8;
-
-          if (regsToMove[i] < Hexagon::D0 || regsToMove[i] > Hexagon::D15) {
-            unsigned DwarfReg = HRI.getDwarfRegNum(regsToMove[i], true);
-            unsigned CFIIndex = MMI.addFrameInst(
-                                    MCCFIInstruction::createOffset(FrameLabel,
-                                                        DwarfReg, Offset));
-            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-                   .addCFIIndex(CFIIndex);
-          } else {
-            // Split the double regs into subregs, and generate appropriate
-            // cfi_offsets.
-            // The only reason, we are split double regs is, llvm-mc does not
-            // understand paired registers for cfi_offset.
-            // Eg .cfi_offset r1:0, -64
-            unsigned HiReg = getMax32BitSubRegister(regsToMove[i], HRI);
-            unsigned LoReg = getMax32BitSubRegister(regsToMove[i], HRI, false);
-            unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
-            unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
-            unsigned HiCFIIndex = MMI.addFrameInst(
-                                    MCCFIInstruction::createOffset(FrameLabel,
-                                                        HiDwarfReg, Offset+4));
-            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-                   .addCFIIndex(HiCFIIndex);
-            unsigned LoCFIIndex = MMI.addFrameInst(
-                                    MCCFIInstruction::createOffset(FrameLabel,
-                                                        LoDwarfReg, Offset));
-            BuildMI(MBB, MBBI, dl, HII.get(TargetOpcode::CFI_INSTRUCTION))
-                   .addCFIIndex(LoCFIIndex);
-          }
-          break;
-        }
-      } // for CSI.size()
-    } // for regsToMove
-  } // needsFrameMoves
 }
 
 void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
-  //
-  // Only insert deallocframe if we need to.  Also at -O0.  See comment
-  // in insertPrologueInBlock above.
-  //
-  if (!hasFP(MF) && MF.getTarget().getOptLevel() != CodeGenOpt::None)
+  if (!hasFP(MF))
     return;
 
   auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
@@ -630,12 +577,172 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
 }
 
 
+namespace {
+  bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
+    if (!It->isBundle())
+      return It->getOpcode() == Hexagon::S2_allocframe;
+    auto End = It->getParent()->instr_end();
+    MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
+    while (++I != End && I->isBundled())
+      if (I->getOpcode() == Hexagon::S2_allocframe)
+        return true;
+    return false;
+  }
+
+  MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
+    for (auto &I : B)
+      if (IsAllocFrame(I))
+        return I;
+    return B.end();
+  }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
+  for (auto &B : MF) {
+    auto AF = FindAllocFrame(B);
+    if (AF == B.end())
+      continue;
+    insertCFIInstructionsAt(B, ++AF);
+  }
+}
+
+
+void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator At) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
+  auto &HRI = *HST.getRegisterInfo();
+
+  // If CFI instructions have debug information attached, something goes
+  // wrong with the final assembly generation: the prolog_end is placed
+  // in a wrong location.
+  DebugLoc DL;
+  const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
+
+  MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
+
+  if (hasFP(MF)) {
+    unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
+    unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
+
+    // Define CFA via an offset from the value of FP.
+    //
+    //  -8   -4    0 (SP)
+    // --+----+----+---------------------
+    //   | FP | LR |          increasing addresses -->
+    // --+----+----+---------------------
+    //   |         +-- Old SP (before allocframe)
+    //   +-- New FP (after allocframe)
+    //
+    // MCCFIInstruction::createDefCfa subtracts the offset from the register.
+    // MCCFIInstruction::createOffset takes the offset without sign change.
+    auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(DefCfa));
+    // R31 (return addr) = CFA - 4
+    auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(OffR31));
+    // R30 (frame ptr) = CFA - 8
+    auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
+    BuildMI(MBB, At, DL, CFID)
+        .addCFIIndex(MMI.addFrameInst(OffR30));
+  }
+
+  static unsigned int RegsToMove[] = {
+    Hexagon::R1,  Hexagon::R0,  Hexagon::R3,  Hexagon::R2,
+    Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
+    Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
+    Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
+    Hexagon::D0,  Hexagon::D1,  Hexagon::D8,  Hexagon::D9,
+    Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,
+    Hexagon::NoRegister
+  };
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {
+    unsigned Reg = RegsToMove[i];
+    auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
+      return C.getReg() == Reg;
+    };
+    auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+    if (F == CSI.end())
+      continue;
+
+    // Subtract 8 to make room for R30 and R31, which are added above.
+    unsigned FrameReg;
+    int64_t Offset = getFrameIndexReference(MF, F->getFrameIdx(), FrameReg) - 8;
+
+    if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
+      unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
+      auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
+                                                   Offset);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffReg));
+    } else {
+      // Split the double regs into subregs, and generate appropriate
+      // cfi_offsets.
+      // The only reason, we are split double regs is, llvm-mc does not
+      // understand paired registers for cfi_offset.
+      // Eg .cfi_offset r1:0, -64
+
+      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
+      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+      unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
+      unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
+      auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
+                                                  Offset+4);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffHi));
+      auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
+                                                  Offset);
+      BuildMI(MBB, At, DL, CFID)
+          .addCFIIndex(MMI.addFrameInst(OffLo));
+    }
+  }
+}
+
+
 bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const HexagonMachineFunctionInfo *FuncInfo =
-    MF.getInfo<HexagonMachineFunctionInfo>();
-  return MFI->hasCalls() || MFI->getStackSize() > 0 ||
-         FuncInfo->hasClobberLR();
+  auto &MFI = *MF.getFrameInfo();
+  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+  bool HasFixed = MFI.getNumFixedObjects();
+  bool HasPrealloc = const_cast<MachineFrameInfo&>(MFI)
+                        .getLocalFrameObjectCount();
+  bool HasExtraAlign = HRI.needsStackRealignment(MF);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+
+  // Insert ALLOCFRAME if we need to or at -O0 for the debugger.  Think
+  // that this shouldn't be required, but doing so now because gcc does and
+  // gdb can't break at the start of the function without it.  Will remove if
+  // this turns out to be a gdb bug.
+  //
+  if (MF.getTarget().getOptLevel() == CodeGenOpt::None)
+    return true;
+
+  // By default we want to use SP (since it's always there). FP requires
+  // some setup (i.e. ALLOCFRAME).
+  // Fixed and preallocated objects need FP if the distance from them to
+  // the SP is unknown (as is with alloca or aligna).
+  if ((HasFixed || HasPrealloc) && (HasAlloca || HasExtraAlign))
+    return true;
+
+  if (MFI.getStackSize() > 0) {
+    if (UseAllocframe)
+      return true;
+  }
+
+  if (MFI.hasCalls() ||
+      MF.getInfo<HexagonMachineFunctionInfo>()->hasClobberLR())
+    return true;
+
+  return false;
 }
 
 
@@ -718,9 +825,89 @@ static void addCalleeSaveRegistersAsImpOperand(MachineInstr *Inst,
 }
 
 
-int HexagonFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-      int FI) const {
-  return MF.getFrameInfo()->getObjectOffset(FI);
+int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+      int FI, unsigned &FrameReg) const {
+  auto &MFI = *MF.getFrameInfo();
+  auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+
+  // Large parts of this code are shared with HRI::eliminateFrameIndex.
+  int Offset = MFI.getObjectOffset(FI);
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  bool HasExtraAlign = HRI.needsStackRealignment(MF);
+  bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOpt::None;
+
+  unsigned SP = HRI.getStackRegister(), FP = HRI.getFrameRegister();
+  unsigned AP = 0;
+  if (const MachineInstr *AI = getAlignaInstr(MF))
+    AP = AI->getOperand(0).getReg();
+  unsigned FrameSize = MFI.getStackSize();
+
+  bool UseFP = false, UseAP = false;  // Default: use SP (except at -O0).
+  // Use FP at -O0, except when there are objects with extra alignment.
+  // That additional alignment requirement may cause a pad to be inserted,
+  // which will make it impossible to use FP to access objects located
+  // past the pad.
+  if (NoOpt && !HasExtraAlign)
+    UseFP = true;
+  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
+    // Fixed and preallocated objects will be located before any padding
+    // so FP must be used to access them.
+    UseFP |= (HasAlloca || HasExtraAlign);
+  } else {
+    if (HasAlloca) {
+      if (HasExtraAlign)
+        UseAP = true;
+      else
+        UseFP = true;
+    }
+  }
+
+  // If FP was picked, then there had better be FP.
+  bool HasFP = hasFP(MF);
+  assert((HasFP || !UseFP) && "This function must have frame pointer");
+
+  // Having FP implies allocframe. Allocframe will store extra 8 bytes:
+  // FP/LR. If the base register is used to access an object across these
+  // 8 bytes, then the offset will need to be adjusted by 8.
+  //
+  // After allocframe:
+  //                    HexagonISelLowering adds 8 to ---+
+  //                    the offsets of all stack-based   |
+  //                    arguments (*)                    |
+  //                                                     |
+  //   getObjectOffset < 0   0     8  getObjectOffset >= 8
+  // ------------------------+-----+------------------------> increasing
+  //     <local objects>     |FP/LR|    <input arguments>     addresses
+  // -----------------+------+-----+------------------------>
+  //                  |      |
+  //    SP/AP point --+      +-- FP points here (**)
+  //    somewhere on
+  //    this side of FP/LR
+  //
+  // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
+  // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
+
+  // The lowering assumes that FP/LR is present, and so the offsets of
+  // the formal arguments start at 8. If FP/LR is not there we need to
+  // reduce the offset by 8.
+  if (Offset > 0 && !HasFP)
+    Offset -= 8;
+
+  if (UseFP)
+    FrameReg = FP;
+  else if (UseAP)
+    FrameReg = AP;
+  else
+    FrameReg = SP;
+
+  // Calculate the actual offset in the instruction. If there is no FP
+  // (in other words, no allocframe), then SP will not be adjusted (i.e.
+  // there will be no SP -= FrameSize), so the frame size should not be
+  // added to the calculated offset.
+  int RealOffset = Offset;
+  if (!UseFP && !UseAP && HasFP)
+    RealOffset = FrameSize+Offset;
+  return RealOffset;
 }
 
 
@@ -731,7 +918,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
 
   MachineBasicBlock::iterator MI = MBB.begin();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 
   if (useSpillFunction(MF, CSI)) {
     unsigned MaxReg = getMaxCalleeSavedReg(CSI, HRI);
@@ -739,7 +926,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
     // Call spill function.
     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
     MachineInstr *SaveRegsCall =
-        BuildMI(MBB, MI, DL, TII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
+        BuildMI(MBB, MI, DL, HII.get(Hexagon::SAVE_REGISTERS_CALL_V4))
           .addExternalSymbol(SpillFun);
     // Add callee-saved registers as use.
     addCalleeSaveRegistersAsImpOperand(SaveRegsCall, MaxReg, false);
@@ -757,7 +944,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
     bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
     int FI = CSI[i].getFrameIdx();
     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
+    HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI);
     if (IsKill)
       MBB.addLiveIn(Reg);
   }
@@ -772,7 +959,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
 
   MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
   MachineFunction &MF = *MBB.getParent();
-  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
 
   if (useRestoreFunction(MF, CSI)) {
     bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
@@ -787,14 +974,14 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
 
     if (HasTC) {
       unsigned ROpc = Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
-      DeallocCall = BuildMI(MBB, MI, DL, TII.get(ROpc))
+      DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
           .addExternalSymbol(RestoreFn);
     } else {
       // The block has a return.
       MachineBasicBlock::iterator It = MBB.getFirstTerminator();
       assert(It->isReturn() && std::next(It) == MBB.end());
       unsigned ROpc = Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
-      DeallocCall = BuildMI(MBB, It, DL, TII.get(ROpc))
+      DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
           .addExternalSymbol(RestoreFn);
       // Transfer the function live-out registers.
       DeallocCall->copyImplicitOps(MF, It);
@@ -807,7 +994,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
     unsigned Reg = CSI[i].getReg();
     const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
     int FI = CSI[i].getFrameIdx();
-    TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
+    HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI);
   }
   return true;
 }
@@ -832,9 +1019,9 @@ void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
   // via AP, which may not be available at the particular place in the program.
   MachineFrameInfo *MFI = MF.getFrameInfo();
   bool HasAlloca = MFI->hasVarSizedObjects();
-  bool HasAligna = (MFI->getMaxAlignment() > getStackAlignment());
+  bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
 
-  if (!HasAlloca || !HasAligna)
+  if (!HasAlloca || !NeedsAlign)
     return;
 
   unsigned LFS = MFI->getLocalFrameSize();
@@ -864,13 +1051,13 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
   // Check for an unused caller-saved register.
   for ( ; *CallerSavedRegs; ++CallerSavedRegs) {
     MCPhysReg FreeReg = *CallerSavedRegs;
-    if (MRI.isPhysRegUsed(FreeReg))
+    if (!MRI.reg_nodbg_empty(FreeReg))
       continue;
 
     // Check aliased register usage.
     bool IsCurrentRegUsed = false;
     for (MCRegAliasIterator AI(FreeReg, &HRI, false); AI.isValid(); ++AI)
-      if (MRI.isPhysRegUsed(*AI)) {
+      if (!MRI.reg_nodbg_empty(*AI)) {
         IsCurrentRegUsed = true;
         break;
       }
@@ -896,7 +1083,7 @@ bool HexagonFrameLowering::replacePredRegPseudoSpillCode(MachineFunction &MF)
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
     // Traverse the basic block.
     MachineBasicBlock::iterator NextII;
     for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
@@ -1210,7 +1397,8 @@ bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
 }
 
 
-MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
+const MachineInstr *HexagonFrameLowering::getAlignaInstr(
+      const MachineFunction &MF) const {
   for (auto &B : MF)
     for (auto &I : B)
       if (I.getOpcode() == Hexagon::ALIGNA)
@@ -1219,6 +1407,7 @@ MachineInstr *HexagonFrameLowering::getAlignaInstr(MachineFunction &MF) const {
 }
 
 
+// FIXME: Use Function::optForSize().
 inline static bool isOptSize(const MachineFunction &MF) {
   AttributeSet AF = MF.getFunction()->getAttributes();
   return AF.hasAttribute(AttributeSet::FunctionIndex,
@@ -1226,8 +1415,7 @@ inline static bool isOptSize(const MachineFunction &MF) {
 }
 
 inline static bool isMinSize(const MachineFunction &MF) {
-  AttributeSet AF = MF.getFunction()->getAttributes();
-  return AF.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+  return MF.getFunction()->optForMinSize();
 }
 
 
@@ -1289,4 +1477,3 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
                                      : SpillFuncThreshold;
   return Threshold < NumCSI;
 }
-
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index d39ee2c77195..683b303d43ea 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -51,7 +51,8 @@ public:
   bool targetHandlesStackFrameRounding() const override {
     return true;
   }
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
   bool hasFP(const MachineFunction &MF) const override;
 
   const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries)
@@ -73,7 +74,9 @@ public:
       const override;
 
   bool needsAligna(const MachineFunction &MF) const;
-  MachineInstr *getAlignaInstr(MachineFunction &MF) const;
+  const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
+
+  void insertCFIInstructions(MachineFunction &MF) const;
 
 private:
   typedef std::vector<CalleeSavedInfo> CSIVect;
@@ -86,6 +89,8 @@ private:
       const HexagonRegisterInfo &HRI) const;
   bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
       const HexagonRegisterInfo &HRI) const;
+  void insertCFIInstructionsAt(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator At) const;
 
   void adjustForCalleeSavedRegsSpillCall(MachineFunction &MF) const;
   bool replacePredRegPseudoSpillCode(MachineFunction &MF) const;
@@ -94,7 +99,7 @@ private:
   void findShrunkPrologEpilog(MachineFunction &MF, MachineBasicBlock *&PrologB,
       MachineBasicBlock *&EpilogB) const;
 
-  bool shouldInlineCSR(llvm::MachineFunction&, const CSIVect&) const;
+  bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
   bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
   bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
 };
diff --git a/lib/Target/Hexagon/HexagonGenExtract.cpp b/lib/Target/Hexagon/HexagonGenExtract.cpp
index 4d32208bd5aa..f26e2ff764d7 100644
--- a/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -195,7 +195,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
       return false;
   }
 
-  IRBuilder<> IRB(BB, In);
+  IRBuilder<> IRB(In);
   Intrinsic::ID IntId = (BW == 32) ? Intrinsic::hexagon_S2_extractu
                                    : Intrinsic::hexagon_S2_extractup;
   Module *Mod = BB->getParent()->getParent();
diff --git a/lib/Target/Hexagon/HexagonGenInsert.cpp b/lib/Target/Hexagon/HexagonGenInsert.cpp
index 096da949e77b..64a2b6cec18a 100644
--- a/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -77,9 +77,8 @@ namespace {
 namespace {
   // Set of virtual registers, based on BitVector.
   struct RegisterSet : private BitVector {
-    RegisterSet() : BitVector() {}
+    RegisterSet() = default;
     explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
-    RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
 
     using BitVector::clear;
 
@@ -1496,7 +1495,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
   // version of DCE that preserves lifetime markers. Without it, merging
   // of stack objects can fail to recognize and merge disjoint objects
   // leading to unnecessary stack growth.
-  Changed |= removeDeadCode(MDT->getRootNode());
+  Changed = removeDeadCode(MDT->getRootNode());
 
   const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
   BitTracker BTLoc(HE, MF);
@@ -1534,7 +1533,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
   }
 
   if (IFMap.empty())
-    return false;
+    return Changed;
 
   {
     NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
@@ -1547,7 +1546,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
   }
 
   if (IFMap.empty())
-    return false;
+    return Changed;
 
   {
     NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
@@ -1572,13 +1571,15 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
     for (unsigned i = 0, n = Out.size(); i < n; ++i)
       IFMap.erase(Out[i]);
   }
+  if (IFMap.empty())
+    return Changed;
 
   {
     NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
-    Changed = generateInserts();
+    generateInserts();
   }
 
-  return Changed;
+  return true;
 }
 
 
diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp
new file mode 100644
index 000000000000..c059d566709e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -0,0 +1,319 @@
+//===--- HexagonGenMux.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// During instruction selection, MUX instructions are generated for
+// conditional assignments. Since such assignments often present an
+// opportunity to predicate instructions, HexagonExpandCondsets
+// expands MUXes into pairs of conditional transfers, and then proceeds
+// with predication of the producers/consumers of the registers involved.
+// This happens after exiting from the SSA form, but before the machine
+// instruction scheduler. After the scheduler and after the register
+// allocation there can be cases of pairs of conditional transfers
+// resulting from a MUX where neither of them was further predicated. If
+// these transfers are now placed far enough from the instruction defining
+// the predicate register, they cannot use the .new form. In such cases it
+// is better to collapse them back to a single MUX instruction.
+
+#define DEBUG_TYPE "hexmux"
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonGenMux();
+  void initializeHexagonGenMuxPass(PassRegistry& Registry);
+}
+
+namespace {
+  class HexagonGenMux : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+      initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon generate mux instructions";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
+
+    struct CondsetInfo {
+      unsigned PredR;
+      unsigned TrueX, FalseX;
+      CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+    };
+    struct DefUseInfo {
+      BitVector Defs, Uses;
+      DefUseInfo() : Defs(), Uses() {}
+      DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
+    };
+    struct MuxInfo {
+      MachineBasicBlock::iterator At;
+      unsigned DefR, PredR;
+      MachineOperand *SrcT, *SrcF;
+      MachineInstr *Def1, *Def2;
+      MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
+            MachineOperand *TOp, MachineOperand *FOp,
+            MachineInstr *D1, MachineInstr *D2)
+        : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(D1),
+          Def2(D2) {}
+    };
+    typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
+    typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
+    typedef SmallVector<MuxInfo,4> MuxInfoList;
+
+    bool isRegPair(unsigned Reg) const {
+      return Hexagon::DoubleRegsRegClass.contains(Reg);
+    }
+    void getSubRegs(unsigned Reg, BitVector &SRs) const;
+    void expandReg(unsigned Reg, BitVector &Set) const;
+    void getDefsUses(const MachineInstr *MI, BitVector &Defs,
+          BitVector &Uses) const;
+    void buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+          DefUseInfoMap &DUM);
+    bool isCondTransfer(unsigned Opc) const;
+    unsigned getMuxOpcode(const MachineOperand &Src1,
+          const MachineOperand &Src2) const;
+    bool genMuxInBlock(MachineBasicBlock &B);
+  };
+
+  char HexagonGenMux::ID = 0;
+}
+
+INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
+  "Hexagon generate mux instructions", false, false)
+
+
+void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
+  for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
+    SRs[*I] = true;
+}
+
+
+void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
+  if (isRegPair(Reg))
+    getSubRegs(Reg, Set);
+  else
+    Set[Reg] = true;
+}
+
+
+void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
+      BitVector &Uses) const {
+  // First, get the implicit defs and uses for this instruction.
+  unsigned Opc = MI->getOpcode();
+  const MCInstrDesc &D = HII->get(Opc);
+  if (const MCPhysReg *R = D.ImplicitDefs)
+    while (*R)
+      expandReg(*R++, Defs);
+  if (const MCPhysReg *R = D.ImplicitUses)
+    while (*R)
+      expandReg(*R++, Uses);
+
+  // Look over all operands, and collect explicit defs and uses.
+  for (ConstMIOperands Mo(MI); Mo.isValid(); ++Mo) {
+    if (!Mo->isReg() || Mo->isImplicit())
+      continue;
+    unsigned R = Mo->getReg();
+    BitVector &Set = Mo->isDef() ? Defs : Uses;
+    expandReg(R, Set);
+  }
+}
+
+
+void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
+      DefUseInfoMap &DUM) {
+  unsigned Index = 0;
+  unsigned NR = HRI->getNumRegs();
+  BitVector Defs(NR), Uses(NR);
+
+  for (MachineBasicBlock::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    I2X.insert(std::make_pair(MI, Index));
+    Defs.reset();
+    Uses.reset();
+    getDefsUses(MI, Defs, Uses);
+    DUM.insert(std::make_pair(Index, DefUseInfo(Defs, Uses)));
+    Index++;
+  }
+}
+
+
+bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
+  switch (Opc) {
+    case Hexagon::A2_tfrt:
+    case Hexagon::A2_tfrf:
+    case Hexagon::C2_cmoveit:
+    case Hexagon::C2_cmoveif:
+      return true;
+  }
+  return false;
+}
+
+
+unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
+      const MachineOperand &Src2) const {
+  bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
+  if (IsReg1)
+    return IsReg2 ? Hexagon::C2_mux : Hexagon::C2_muxir;
+  if (IsReg2)
+    return Hexagon::C2_muxri;
+
+  // Neither is a register. The first source is extendable, but the second
+  // is not (s8).
+  if (Src2.isImm() && isInt<8>(Src2.getImm()))
+    return Hexagon::C2_muxii;
+
+  return 0;
+}
+
+
+bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
+  bool Changed = false;
+  InstrIndexMap I2X;
+  DefUseInfoMap DUM;
+  buildMaps(B, I2X, DUM);
+
+  typedef DenseMap<unsigned,CondsetInfo> CondsetMap;
+  CondsetMap CM;
+  MuxInfoList ML;
+
+  MachineBasicBlock::iterator NextI, End = B.end();
+  for (MachineBasicBlock::iterator I = B.begin(); I != End; I = NextI) {
+    MachineInstr *MI = &*I;
+    NextI = std::next(I);
+    unsigned Opc = MI->getOpcode();
+    if (!isCondTransfer(Opc))
+      continue;
+    unsigned DR = MI->getOperand(0).getReg();
+    if (isRegPair(DR))
+      continue;
+
+    unsigned PR = MI->getOperand(1).getReg();
+    unsigned Idx = I2X.lookup(MI);
+    CondsetMap::iterator F = CM.find(DR);
+    bool IfTrue = HII->isPredicatedTrue(Opc);
+
+    // If there is no record of a conditional transfer for this register,
+    // or the predicate register differs, create a new record for it.
+    if (F != CM.end() && F->second.PredR != PR) {
+      CM.erase(F);
+      F = CM.end();
+    }
+    if (F == CM.end()) {
+      auto It = CM.insert(std::make_pair(DR, CondsetInfo()));
+      F = It.first;
+      F->second.PredR = PR;
+    }
+    CondsetInfo &CI = F->second;
+    if (IfTrue)
+      CI.TrueX = Idx;
+    else
+      CI.FalseX = Idx;
+    if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+      continue;
+
+    // There is now a complete definition of DR, i.e. we have the predicate
+    // register, the definition if-true, and definition if-false.
+
+    // First, check if both definitions are far enough from the definition
+    // of the predicate register.
+    unsigned MinX = std::min(CI.TrueX, CI.FalseX);
+    unsigned MaxX = std::max(CI.TrueX, CI.FalseX);
+    unsigned SearchX = (MaxX > 4) ? MaxX-4 : 0;
+    bool NearDef = false;
+    for (unsigned X = SearchX; X < MaxX; ++X) {
+      const DefUseInfo &DU = DUM.lookup(X);
+      if (!DU.Defs[PR])
+        continue;
+      NearDef = true;
+      break;
+    }
+    if (NearDef)
+      continue;
+
+    // The predicate register is not defined in the last few instructions.
+    // Check if the conversion to MUX is possible (either "up", i.e. at the
+    // place of the earlier partial definition, or "down", where the later
+    // definition is located). Examine all defs and uses between these two
+    // definitions.
+    // SR1, SR2 - source registers from the first and the second definition.
+    MachineBasicBlock::iterator It1 = B.begin(), It2 = B.begin();
+    std::advance(It1, MinX);
+    std::advance(It2, MaxX);
+    MachineInstr *Def1 = It1, *Def2 = It2;
+    MachineOperand *Src1 = &Def1->getOperand(2), *Src2 = &Def2->getOperand(2);
+    unsigned SR1 = Src1->isReg() ? Src1->getReg() : 0;
+    unsigned SR2 = Src2->isReg() ? Src2->getReg() : 0;
+    bool Failure = false, CanUp = true, CanDown = true;
+    for (unsigned X = MinX+1; X < MaxX; X++) {
+      const DefUseInfo &DU = DUM.lookup(X);
+      if (DU.Defs[PR] || DU.Defs[DR] || DU.Uses[DR]) {
+        Failure = true;
+        break;
+      }
+      if (CanDown && DU.Defs[SR1])
+        CanDown = false;
+      if (CanUp && DU.Defs[SR2])
+        CanUp = false;
+    }
+    if (Failure || (!CanUp && !CanDown))
+      continue;
+
+    MachineOperand *SrcT = (MinX == CI.TrueX) ? Src1 : Src2;
+    MachineOperand *SrcF = (MinX == CI.FalseX) ? Src1 : Src2;
+    // Prefer "down", since this will move the MUX farther away from the
+    // predicate definition.
+    MachineBasicBlock::iterator At = CanDown ? Def2 : Def1;
+    ML.push_back(MuxInfo(At, DR, PR, SrcT, SrcF, Def1, Def2));
+  }
+
+  for (unsigned I = 0, N = ML.size(); I < N; ++I) {
+    MuxInfo &MX = ML[I];
+    MachineBasicBlock &B = *MX.At->getParent();
+    DebugLoc DL = MX.At->getDebugLoc();
+    unsigned MxOpc = getMuxOpcode(*MX.SrcT, *MX.SrcF);
+    if (!MxOpc)
+      continue;
+    BuildMI(B, MX.At, DL, HII->get(MxOpc), MX.DefR)
+      .addReg(MX.PredR)
+      .addOperand(*MX.SrcT)
+      .addOperand(*MX.SrcF);
+    B.erase(MX.Def1);
+    B.erase(MX.Def2);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool HexagonGenMux::runOnMachineFunction(MachineFunction &MF) {
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  bool Changed = false;
+  for (auto &I : MF)
+    Changed |= genMuxInBlock(I);
+  return Changed;
+}
+
+FunctionPass *llvm::createHexagonGenMux() {
+  return new HexagonGenMux();
+}
+
diff --git a/lib/Target/Hexagon/HexagonGenPredicate.cpp b/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 6905c4f6d125..d9675b5173d2 100644
--- a/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -250,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
   unsigned NewPR = MRI->createVirtualRegister(PredRC);
 
   // For convertible instructions, do not modify them, so that they can
-  // be coverted later.  Generate a copy from Reg to NewPR.
+  // be converted later.  Generate a copy from Reg to NewPR.
   if (isConvertibleToPredForm(DefI)) {
     MachineBasicBlock::iterator DefIt = DefI;
     BuildMI(B, std::next(DefIt), DL, TII->get(TargetOpcode::COPY), NewPR)
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 53b6bf617e8f..d20a809d6c09 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -727,9 +727,9 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
   // Phis that may feed into the loop.
   LoopFeederMap LoopFeederPhi;
 
-  // Check if the inital value may be zero and can be decremented in the first
+  // Check if the initial value may be zero and can be decremented in the first
   // iteration. If the value is zero, the endloop instruction will not decrement
-  // the loop counter, so we shoudn't generate a hardware loop in this case.
+  // the loop counter, so we shouldn't generate a hardware loop in this case.
   if (loopCountMayWrapOrUnderFlow(Start, End, Loop->getLoopPreheader(), Loop,
                                   LoopFeederPhi))
       return nullptr;
@@ -1288,14 +1288,14 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
 
   typedef MachineBasicBlock::instr_iterator instr_iterator;
   // Check if things are in order to begin with.
-  for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
+  for (instr_iterator I(BumpI), E = BB->instr_end(); I != E; ++I)
     if (&*I == CmpI)
       return true;
 
   // Out of order.
   unsigned PredR = CmpI->getOperand(0).getReg();
   bool FoundBump = false;
-  instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt);
+  instr_iterator CmpIt = CmpI->getIterator(), NextIt = std::next(CmpIt);
   for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
     MachineInstr *In = &*I;
     for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
@@ -1307,9 +1307,7 @@ bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
     }
 
     if (In == BumpI) {
-      instr_iterator After = BumpI;
-      instr_iterator From = CmpI;
-      BB->splice(std::next(After), BB, From);
+      BB->splice(++BumpI->getIterator(), BB, CmpI->getIterator());
       FoundBump = true;
       break;
     }
@@ -1440,7 +1438,7 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
     if (Comparison::isSigned(Cmp))
       return false;
 
-    // Check if there is a comparison of the inital value. If the initial value
+    // Check if there is a comparison of the initial value. If the initial value
     // is greater than or not equal to another value, then assume this is a
     // range check.
     if ((Cmp & Comparison::G) || Cmp == Comparison::NE)
@@ -1850,7 +1848,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   }
 
   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
-  MF->insert(Header, NewPH);
+  MF->insert(Header->getIterator(), NewPH);
 
   if (Header->pred_size() > 2) {
     // Ensure that the header has only two predecessors: the preheader and
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 9123057e60d1..a0da945e7572 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -50,16 +50,21 @@ namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
   const HexagonTargetMachine& HTM;
   const HexagonSubtarget *HST;
+  const HexagonInstrInfo *HII;
+  const HexagonRegisterInfo *HRI;
 public:
   explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
                                CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), HTM(tm) {
+      : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+        HRI(nullptr) {
     initializeHexagonDAGToDAGISelPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     // Reset the subtarget each time through.
     HST = &MF.getSubtarget<HexagonSubtarget>();
+    HII = HST->getInstrInfo();
+    HRI = HST->getRegisterInfo();
     SelectionDAGISel::runOnMachineFunction(MF);
     return true;
   }
@@ -104,7 +109,6 @@ public:
   SDNode *SelectConstantFP(SDNode *N);
   SDNode *SelectAdd(SDNode *N);
   SDNode *SelectBitOp(SDNode *N);
-  bool isConstExtProfitable(SDNode *N) const;
 
   // XformMskToBitPosU5Imm - Returns the bit position which
   // the single bit 32 bit mask represents.
@@ -139,8 +143,8 @@ public:
   // type i32 where the negative literal is transformed into a positive literal
   // for use in -= memops.
   inline SDValue XformM5ToU5Imm(signed Imm, SDLoc DL) {
-     assert( (Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
-     return CurDAG->getTargetConstant( - Imm, DL, MVT::i32);
+     assert((Imm >= -31 && Imm <= -1)  && "Constant out of range for Memops");
+     return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
   }
 
   // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
@@ -203,11 +207,10 @@ void llvm::initializeHexagonDAGToDAGISelPass(PassRegistry &Registry) {
 
 
 // Intrinsics that return a a predicate.
-static unsigned doesIntrinsicReturnPredicate(unsigned ID)
-{
+static bool doesIntrinsicReturnPredicate(unsigned ID) {
   switch (ID) {
     default:
-      return 0;
+      return false;
     case Intrinsic::hexagon_C2_cmpeq:
     case Intrinsic::hexagon_C2_cmpgt:
     case Intrinsic::hexagon_C2_cmpgtu:
@@ -244,7 +247,7 @@ static unsigned doesIntrinsicReturnPredicate(unsigned ID)
     case Intrinsic::hexagon_C2_tfrrp:
     case Intrinsic::hexagon_S2_tstbit_i:
     case Intrinsic::hexagon_S2_tstbit_r:
-      return 1;
+      return true;
   }
 }
 
@@ -258,8 +261,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadSignExtend64(LoadSDNode *LD,
   SDNode *OffsetNode = Offset.getNode();
   int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
 
-  const HexagonInstrInfo &TII = *HST->getInstrInfo();
-  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
     SDValue TargetConst = CurDAG->getTargetConstant(Val, dl, MVT::i32);
     SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32,
                                               MVT::Other, Base, TargetConst,
@@ -312,8 +314,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoadZeroExtend64(LoadSDNode *LD,
   SDNode *OffsetNode = Offset.getNode();
   int32_t Val = cast<ConstantSDNode>(OffsetNode)->getSExtValue();
 
-  const HexagonInstrInfo &TII = *HST->getInstrInfo();
-  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
     SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
     SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
     SDNode *Result_1 = CurDAG->getMachineNode(Opcode, dl, MVT::i32,
@@ -378,29 +379,46 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
   // loads.
   ISD::LoadExtType ExtType = LD->getExtensionType();
   bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
+  bool HasVecOffset = false;
 
   // Figure out the opcode.
-  const HexagonInstrInfo &TII = *HST->getInstrInfo();
   if (LoadedVT == MVT::i64) {
-    if (TII.isValidAutoIncImm(LoadedVT, Val))
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
       Opcode = Hexagon::L2_loadrd_pi;
     else
       Opcode = Hexagon::L2_loadrd_io;
   } else if (LoadedVT == MVT::i32) {
-    if (TII.isValidAutoIncImm(LoadedVT, Val))
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
       Opcode = Hexagon::L2_loadri_pi;
     else
       Opcode = Hexagon::L2_loadri_io;
   } else if (LoadedVT == MVT::i16) {
-    if (TII.isValidAutoIncImm(LoadedVT, Val))
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
       Opcode = IsZeroExt ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadrh_pi;
     else
       Opcode = IsZeroExt ? Hexagon::L2_loadruh_io : Hexagon::L2_loadrh_io;
   } else if (LoadedVT == MVT::i8) {
-    if (TII.isValidAutoIncImm(LoadedVT, Val))
+    if (HII->isValidAutoIncImm(LoadedVT, Val))
       Opcode = IsZeroExt ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrb_pi;
     else
       Opcode = IsZeroExt ? Hexagon::L2_loadrub_io : Hexagon::L2_loadrb_io;
+  } else if (LoadedVT == MVT::v16i32 || LoadedVT == MVT::v8i64 ||
+             LoadedVT == MVT::v32i16 || LoadedVT == MVT::v64i8) {
+    HasVecOffset = true;
+    if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+      Opcode = Hexagon::V6_vL32b_pi;
+    }
+    else
+      Opcode = Hexagon::V6_vL32b_ai;
+  // 128B
+  } else if (LoadedVT == MVT::v32i32 || LoadedVT == MVT::v16i64 ||
+             LoadedVT == MVT::v64i16 || LoadedVT == MVT::v128i8) {
+    HasVecOffset = true;
+    if (HII->isValidAutoIncImm(LoadedVT, Val)) {
+      Opcode = Hexagon::V6_vL32b_pi_128B;
+    }
+    else
+      Opcode = Hexagon::V6_vL32b_ai_128B;
   } else
     llvm_unreachable("unknown memory type");
 
@@ -411,7 +429,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
   if (LD->getValueType(0) == MVT::i64 && ExtType == ISD::SEXTLOAD)
     return SelectIndexedLoadSignExtend64(LD, Opcode, dl);
 
-  if (TII.isValidAutoIncImm(LoadedVT, Val)) {
+  if (HII->isValidAutoIncImm(LoadedVT, Val)) {
     SDValue TargetConstVal = CurDAG->getTargetConstant(Val, dl, MVT::i32);
     SDNode* Result = CurDAG->getMachineNode(Opcode, dl,
                                             LD->getValueType(0),
@@ -420,15 +438,25 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, SDLoc dl) {
     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
     MemOp[0] = LD->getMemOperand();
     cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
-    const SDValue Froms[] = { SDValue(LD, 0),
-                              SDValue(LD, 1),
-                              SDValue(LD, 2)
-    };
-    const SDValue Tos[]   = { SDValue(Result, 0),
-                              SDValue(Result, 1),
-                              SDValue(Result, 2)
-    };
-    ReplaceUses(Froms, Tos, 3);
+    if (HasVecOffset) {
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result, 0),
+                                SDValue(Result, 2)
+      };
+      ReplaceUses(Froms, Tos, 2);
+    } else {
+      const SDValue Froms[] = { SDValue(LD, 0),
+                                SDValue(LD, 1),
+                                SDValue(LD, 2)
+      };
+      const SDValue Tos[]   = { SDValue(Result, 0),
+                                SDValue(Result, 1),
+                                SDValue(Result, 2)
+      };
+      ReplaceUses(Froms, Tos, 3);
+    }
     return Result;
   } else {
     SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
@@ -487,8 +515,7 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
 
   // Offset value must be within representable range
   // and must have correct alignment properties.
-  const HexagonInstrInfo &TII = *HST->getInstrInfo();
-  if (TII.isValidAutoIncImm(StoredVT, Val)) {
+  if (HII->isValidAutoIncImm(StoredVT, Val)) {
     unsigned Opcode = 0;
 
     // Figure out the post inc version of opcode.
@@ -496,7 +523,15 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
     else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_pi;
     else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_pi;
     else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_pi;
-    else llvm_unreachable("unknown memory type");
+    else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+             StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8) {
+      Opcode = Hexagon::V6_vS32b_pi;
+    }
+    // 128B
+    else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+             StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8) {
+      Opcode = Hexagon::V6_vS32b_pi_128B;
+    } else llvm_unreachable("unknown memory type");
 
     if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
       assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
@@ -530,6 +565,13 @@ SDNode *HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, SDLoc dl) {
   else if (StoredVT == MVT::i32) Opcode = Hexagon::S2_storeri_io;
   else if (StoredVT == MVT::i16) Opcode = Hexagon::S2_storerh_io;
   else if (StoredVT == MVT::i8) Opcode = Hexagon::S2_storerb_io;
+  else if (StoredVT == MVT::v16i32 || StoredVT == MVT::v8i64 ||
+           StoredVT == MVT::v32i16 || StoredVT == MVT::v64i8)
+     Opcode = Hexagon::V6_vS32b_ai;
+  // 128B
+  else if (StoredVT == MVT::v32i32 || StoredVT == MVT::v16i64 ||
+           StoredVT == MVT::v64i16 || StoredVT == MVT::v128i8)
+     Opcode = Hexagon::V6_vS32b_ai_128B;
   else llvm_unreachable("unknown memory type");
 
   // Build regular store.
@@ -1113,14 +1155,12 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
   }
 
   if (Opc == ISD::AND) {
-    if (((ValueVT == MVT::i32) &&
-                  (!((Val & 0x80000000) || (Val & 0x7fffffff)))) ||
-        ((ValueVT == MVT::i64) &&
-                  (!((Val & 0x8000000000000000) || (Val & 0x7fffffff)))))
-      // If it's simple AND, do the normal op.
-      return SelectCode(N);
-    else
+    // Check if this is a bit-clearing AND, if not select code the usual way.
+    if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
+        (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
       Val = ~Val;
+    else
+      return SelectCode(N);
   }
 
   // If OR or AND is being fed by shl, srl and, sra don't do this change,
@@ -1128,7 +1168,8 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
   // Traverse the DAG to see if there is shl, srl and sra.
   if (Opc == ISD::OR || Opc == ISD::AND) {
     switch (N->getOperand(0)->getOpcode()) {
-      default: break;
+      default:
+        break;
       case ISD::SRA:
       case ISD::SRL:
       case ISD::SHL:
@@ -1137,23 +1178,24 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
   }
 
   // Make sure it's power of 2.
-  unsigned bitpos = 0;
+  unsigned BitPos = 0;
   if (Opc != ISD::FABS && Opc != ISD::FNEG) {
-    if (((ValueVT == MVT::i32) && !isPowerOf2_32(Val)) ||
-        ((ValueVT == MVT::i64) && !isPowerOf2_64(Val)))
+    if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
+        (ValueVT == MVT::i64 && !isPowerOf2_64(Val)))
       return SelectCode(N);
 
     // Get the bit position.
-    bitpos = countTrailingZeros(uint64_t(Val));
+    BitPos = countTrailingZeros(uint64_t(Val));
   } else {
     // For fabs and fneg, it's always the 31st bit.
-    bitpos = 31;
+    BitPos = 31;
   }
 
   unsigned BitOpc = 0;
   // Set the right opcode for bitwise operations.
-  switch(Opc) {
-    default: llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
+  switch (Opc) {
+    default:
+      llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
     case ISD::AND:
     case ISD::FABS:
       BitOpc = Hexagon::S2_clrbit_i;
@@ -1169,7 +1211,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
 
   SDNode *Result;
   // Get the right SDVal for the opcode.
-  SDValue SDVal = CurDAG->getTargetConstant(bitpos, dl, MVT::i32);
+  SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
 
   if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
     Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
@@ -1198,7 +1240,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
                                                     MVT::i32, SDValue(Reg, 0));
 
     // Clear/set/toggle hi or lo registers depending on the bit position.
-    if (SubValueVT != MVT::f32 && bitpos < 32) {
+    if (SubValueVT != MVT::f32 && BitPos < 32) {
       SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
                                                SubregLO, SDVal);
       const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
@@ -1207,7 +1249,7 @@ SDNode *HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
                                       dl, ValueVT, Ops);
     } else {
       if (Opc != ISD::FABS && Opc != ISD::FNEG)
-        SDVal = CurDAG->getTargetConstant(bitpos - 32, dl, MVT::i32);
+        SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
       SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
                                                SubregHI, SDVal);
       const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
@@ -1328,25 +1370,12 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   return false;
 }
 
-bool HexagonDAGToDAGISel::isConstExtProfitable(SDNode *N) const {
-  unsigned UseCount = 0;
-  unsigned CallCount = 0;
-  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
-    // Ignore call instructions.
-    if (I->getOpcode() == ISD::CopyToReg)
-      ++CallCount;
-    UseCount++;
-  }
-
-  return (UseCount <= 1) || (CallCount > 1);
-
-}
 
 void HexagonDAGToDAGISel::PreprocessISelDAG() {
   SelectionDAG &DAG = *CurDAG;
   std::vector<SDNode*> Nodes;
-  for (auto I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I)
-    Nodes.push_back(I);
+  for (SDNode &Node : DAG.allnodes())
+    Nodes.push_back(&Node);
 
   // Simplify: (or (select c x 0) z)  ->  (select c (or x z) z)
   //           (or (select c 0 y) z)  ->  (select c z (or y z))
@@ -1397,11 +1426,10 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
     return;
 
   MachineFrameInfo *MFI = MF->getFrameInfo();
-  MachineBasicBlock *EntryBB = MF->begin();
+  MachineBasicBlock *EntryBB = &MF->front();
   unsigned AR = FuncInfo->CreateReg(MVT::i32);
   unsigned MaxA = MFI->getMaxAlignment();
-  auto &HII = *HST.getInstrInfo();
-  BuildMI(EntryBB, DebugLoc(), HII.get(Hexagon::ALIGNA), AR)
+  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
       .addImm(MaxA);
   MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
 }
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index c739afb70c15..01670902e2b0 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "hexagon-lowering"
 
-static cl::opt<bool>
-EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
+static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
+  cl::init(true), cl::Hidden,
   cl::desc("Control jump table emission on Hexagon target"));
 
 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
@@ -98,6 +98,9 @@ public:
 }
 
 // Implement calling convention for Hexagon.
+
+static bool IsHvxVectorType(MVT ty);
+
 static bool
 CC_Hexagon(unsigned ValNo, MVT ValVT,
            MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -113,6 +116,11 @@ CC_Hexagon64(unsigned ValNo, MVT ValVT,
              MVT LocVT, CCValAssign::LocInfo LocInfo,
              ISD::ArgFlagsTy ArgFlags, CCState &State);
 
+static bool
+CC_HexagonVector(unsigned ValNo, MVT ValVT,
+                 MVT LocVT, CCValAssign::LocInfo LocInfo,
+                 ISD::ArgFlagsTy ArgFlags, CCState &State);
+
 static bool
 RetCC_Hexagon(unsigned ValNo, MVT ValVT,
               MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -128,6 +136,11 @@ RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
                 MVT LocVT, CCValAssign::LocInfo LocInfo,
                 ISD::ArgFlagsTy ArgFlags, CCState &State);
 
+static bool
+RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+                    MVT LocVT, CCValAssign::LocInfo LocInfo,
+                    ISD::ArgFlagsTy ArgFlags, CCState &State);
+
 static bool
 CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
             MVT LocVT, CCValAssign::LocInfo LocInfo,
@@ -169,15 +182,43 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
     return false;
   }
+  if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
+      LocVT == MVT::v16i8) {
+    ofst = State.AllocateStack(16, 16);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
+      LocVT == MVT::v32i8) {
+    ofst = State.AllocateStack(32, 32);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+      LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
+    ofst = State.AllocateStack(64, 64);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+      LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
+    ofst = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+  if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+      LocVT == MVT::v256i8) {
+    ofst = State.AllocateStack(256, 256);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    return false;
+  }
+
   llvm_unreachable(nullptr);
 }
 
 
-static bool
-CC_Hexagon (unsigned ValNo, MVT ValVT,
-            MVT LocVT, CCValAssign::LocInfo LocInfo,
-            ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
+      CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
   if (ArgFlags.isByVal()) {
     // Passed on stack.
     unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
@@ -213,6 +254,17 @@ CC_Hexagon (unsigned ValNo, MVT ValVT,
       return false;
   }
 
+  if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
+    unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+
+  if (IsHvxVectorType(LocVT)) {
+    if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+      return false;
+  }
+
   return true;  // CC didn't match.
 }
 
@@ -260,10 +312,82 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
   return false;
 }
 
+static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
+                             MVT LocVT, CCValAssign::LocInfo LocInfo,
+                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+    static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
+                                         Hexagon::V2, Hexagon::V3,
+                                         Hexagon::V4, Hexagon::V5,
+                                         Hexagon::V6, Hexagon::V7,
+                                         Hexagon::V8, Hexagon::V9,
+                                         Hexagon::V10, Hexagon::V11,
+                                         Hexagon::V12, Hexagon::V13,
+                                         Hexagon::V14, Hexagon::V15};
+    static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
+                                         Hexagon::W2, Hexagon::W3,
+                                         Hexagon::W4, Hexagon::W5,
+                                         Hexagon::W6, Hexagon::W7};
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  if ((UseHVX && !UseHVXDbl) &&
+      (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
+       LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
+    if (unsigned Reg = State.AllocateReg(VecLstS)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(64, 64);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  if ((UseHVX && !UseHVXDbl) &&
+      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+       LocVT == MVT::v128i8)) {
+    if (unsigned Reg = State.AllocateReg(VecLstD)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  // 128B Mode
+  if ((UseHVX && UseHVXDbl) &&
+      (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
+       LocVT == MVT::v256i8)) {
+    if (unsigned Reg = State.AllocateReg(VecLstD)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(256, 256);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  if ((UseHVX && UseHVXDbl) &&
+      (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
+       LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
+    if (unsigned Reg = State.AllocateReg(VecLstS)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    unsigned Offset = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return false;
+  }
+  return true;
+}
+
 static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
                           MVT LocVT, CCValAssign::LocInfo LocInfo,
                           ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
 
   if (LocVT == MVT::i1 ||
       LocVT == MVT::i8 ||
@@ -282,8 +406,24 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
   } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
     LocVT = MVT::i64;
     LocInfo = CCValAssign::BCvt;
+  } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
+             LocVT == MVT::v16i32 || LocVT == MVT::v8i64 ||
+             LocVT == MVT::v512i1) {
+    LocVT = MVT::v16i32;
+    ValVT = MVT::v16i32;
+    LocInfo = CCValAssign::Full;
+  } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
+             LocVT == MVT::v32i32 || LocVT == MVT::v16i64 ||
+             (LocVT == MVT::v1024i1 && UseHVX && UseHVXDbl)) {
+    LocVT = MVT::v32i32;
+    ValVT = MVT::v32i32;
+    LocInfo = CCValAssign::Full;
+  } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
+             LocVT == MVT::v64i32 || LocVT == MVT::v32i64) {
+    LocVT = MVT::v64i32;
+    ValVT = MVT::v64i32;
+    LocInfo = CCValAssign::Full;
   }
-
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
     if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
     return false;
@@ -293,7 +433,10 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
     if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
     return false;
   }
-
+  if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
+    if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
+    return false;
+  }
   return true;  // CC didn't match.
 }
 
@@ -328,6 +471,52 @@ static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
   return false;
 }
 
+static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
+                                MVT LocVT, CCValAssign::LocInfo LocInfo,
+                                ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  auto &MF = State.getMachineFunction();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  unsigned OffSiz = 64;
+  if (LocVT == MVT::v16i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+  } else if (LocVT == MVT::v32i32) {
+    unsigned Req = (UseHVX && UseHVXDbl) ? Hexagon::V0 : Hexagon::W0;
+    if (unsigned Reg = State.AllocateReg(Req)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    OffSiz = 128;
+  } else if (LocVT == MVT::v64i32) {
+    if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return false;
+    }
+    OffSiz = 256;
+  }
+
+  unsigned Offset = State.AllocateStack(OffSiz, OffSiz);
+  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  return false;
+}
+
+void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
+  if (VT != PromotedLdStVT) {
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
+                      PromotedLdStVT.getSimpleVT());
+  }
+}
+
 SDValue
 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
 const {
@@ -351,6 +540,15 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
+static bool IsHvxVectorType(MVT ty) {
+  return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
+          ty == MVT::v64i8 ||
+          ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
+          ty == MVT::v128i8 ||
+          ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
+          ty == MVT::v256i8 ||
+          ty == MVT::v512i1 || ty == MVT::v1024i1);
+}
 
 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
 // passed by value, the function prototype is modified to return void and
@@ -463,19 +661,15 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Check for varargs.
   int NumNamedVarArgParams = -1;
-  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
-  {
-    const Function* CalleeFn = nullptr;
-    Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
-    if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
-    {
+  if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = GAN->getGlobal();
+    Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+    if (const Function* F = dyn_cast<Function>(GV)) {
       // If a function has zero args and is a vararg function, that's
       // disallowed so it must be an undeclared function.  Do not assume
       // varargs if the callee is undefined.
-      if (CalleeFn->isVarArg() &&
-          CalleeFn->getFunctionType()->getNumParams() != 0) {
-        NumNamedVarArgParams = CalleeFn->getFunctionType()->getNumParams();
-      }
+      if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
+        NumNamedVarArgParams = F->getFunctionType()->getNumParams();
     }
   }
 
@@ -519,11 +713,16 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SDValue StackPtr =
       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
 
+  bool NeedsArgAlign = false;
+  unsigned LargestAlignSeen = 0;
   // Walk the register/memloc assignments, inserting copies/loads.
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    // Record if we need > 8 byte alignment on an argument.
+    bool ArgAlign = IsHvxVectorType(VA.getValVT());
+    NeedsArgAlign |= ArgAlign;
 
     // Promote the value if needed.
     switch (VA.getLocInfo()) {
@@ -549,13 +748,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
                                         StackPtr.getValueType());
       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
+      if (ArgAlign)
+        LargestAlignSeen = std::max(LargestAlignSeen,
+                                    VA.getLocVT().getStoreSizeInBits() >> 3);
       if (Flags.isByVal()) {
         // The argument is a struct passed by value. According to LLVM, "Arg"
         // is is pointer.
         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
                                                         Flags, DAG, dl));
       } else {
-        MachinePointerInfo LocPI = MachinePointerInfo::getStack(LocMemOffset);
+        MachinePointerInfo LocPI = MachinePointerInfo::getStack(
+            DAG.getMachineFunction(), LocMemOffset);
         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI, false,
                                  false, 0);
         MemOpChains.push_back(S);
@@ -569,6 +772,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   }
 
+  if (NeedsArgAlign && Subtarget.hasV60TOps()) {
+    DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+    MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+    // V6 vectors passed by value have 64 or 128 byte alignment depending
+    // on whether we are 64 byte vector mode or 128 byte.
+    bool UseHVXDbl = Subtarget.useHVXDblOps();
+    assert(Subtarget.useHVXOps());
+    const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
+    LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+    MFI->ensureMaxAlignment(LargestAlignSeen);
+  }
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
   if (!MemOpChains.empty())
@@ -613,12 +827,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
-  if (flag_aligned_memcpy) {
-    const char *MemcpyName =
-      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
-    Callee = DAG.getTargetExternalSymbol(MemcpyName, PtrVT);
-    flag_aligned_memcpy = false;
-  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
   } else if (ExternalSymbolSDNode *S =
              dyn_cast<ExternalSymbolSDNode>(Callee)) {
@@ -668,7 +877,19 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
   if (Ptr->getOpcode() != ISD::ADD)
     return false;
 
-  if (VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
+  auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
+  bool UseHVX = HST.useHVXOps();
+  bool UseHVXDbl = HST.useHVXDblOps();
+
+  bool ValidHVXDblType =
+    (UseHVX && UseHVXDbl) && (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+                              VT == MVT::v64i16 || VT == MVT::v128i8);
+  bool ValidHVXType =
+    UseHVX && !UseHVXDbl && (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+                             VT == MVT::v32i16 || VT == MVT::v64i8);
+
+  if (ValidHVXDblType || ValidHVXType ||
+      VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
     isInc = (Ptr->getOpcode() == ISD::ADD);
     Base = Ptr->getOperand(0);
     Offset = Ptr->getOperand(1);
@@ -679,23 +900,6 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
   return false;
 }
 
-// TODO: Put this function along with the other isS* functions in
-// HexagonISelDAGToDAG.cpp into a common file. Or better still, use the
-// functions defined in HexagonOperands.td.
-static bool Is_PostInc_S4_Offset(SDNode * S, int ShiftAmount) {
-  ConstantSDNode *N = cast<ConstantSDNode>(S);
-
-  // immS4 predicate - True if the immediate fits in a 4-bit sign extended.
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  int64_t m = 0;
-  if (ShiftAmount > 0) {
-    m = v % ShiftAmount;
-    v = v >> ShiftAmount;
-  }
-  return (v <= 7) && (v >= -8) && (m == 0);
-}
-
 /// getPostIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if this node can be
 /// combined with a load / store to form a post-indexed load / store.
@@ -724,18 +928,20 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   bool isInc = false;
   bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
                                         isInc, DAG);
-  // ShiftAmount = number of left-shifted bits in the Hexagon instruction.
-  int ShiftAmount = VT.getSizeInBits() / 16;
-  if (isLegal && Is_PostInc_S4_Offset(Offset.getNode(), ShiftAmount)) {
-    AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
-    return true;
+  if (isLegal) {
+    auto &HII = *Subtarget.getInstrInfo();
+    int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+    if (HII.isValidAutoIncImm(VT, OffsetVal)) {
+      AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+      return true;
+    }
   }
 
   return false;
 }
 
-SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
-                                              SelectionDAG &DAG) const {
+SDValue
+HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
   SDNode *Node = Op.getNode();
   MachineFunction &MF = DAG.getMachineFunction();
   auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
@@ -784,47 +990,6 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op,
   return Op;
 }
 
-
-//
-// Taken from the XCore backend.
-//
-SDValue HexagonTargetLowering::
-LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
-{
-  SDValue Chain = Op.getOperand(0);
-  SDValue Table = Op.getOperand(1);
-  SDValue Index = Op.getOperand(2);
-  SDLoc dl(Op);
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
-  unsigned JTI = JT->getIndex();
-  MachineFunction &MF = DAG.getMachineFunction();
-  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
-  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
-
-  // Mark all jump table targets as address taken.
-  const std::vector<MachineJumpTableEntry> &JTE = MJTI->getJumpTables();
-  const std::vector<MachineBasicBlock*> &JTBBs = JTE[JTI].MBBs;
-  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
-    MachineBasicBlock *MBB = JTBBs[i];
-    MBB->setHasAddressTaken();
-    // This line is needed to set the hasAddressTaken flag on the BasicBlock
-    // object.
-    BlockAddress::get(const_cast<BasicBlock *>(MBB->getBasicBlock()));
-  }
-
-  SDValue JumpTableBase = DAG.getNode(
-      HexagonISD::JT, dl, getPointerTy(DAG.getDataLayout()), TargetJT);
-  SDValue ShiftIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
-                                   DAG.getConstant(2, dl, MVT::i32));
-  SDValue JTAddress = DAG.getNode(ISD::ADD, dl, MVT::i32, JumpTableBase,
-                                  ShiftIndex);
-  SDValue LoadTarget = DAG.getLoad(MVT::i32, dl, Chain, JTAddress,
-                                   MachinePointerInfo(), false, false, false,
-                                   0);
-  return DAG.getNode(HexagonISD::BR_JT, dl, MVT::Other, Chain, LoadTarget);
-}
-
-
 SDValue
 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                                                SelectionDAG &DAG) const {
@@ -850,7 +1015,10 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 
   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
-  return DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+  SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
+  if (Op.getNode()->getHasDebugValue())
+    DAG.TransferDbgValues(Op, AA);
+  return AA;
 }
 
 SDValue
@@ -882,7 +1050,8 @@ const {
   // equal to) 8 bytes. If not, no address will be passed into callee and
   // callee return the result direclty through R0/R1.
 
-  SmallVector<SDValue, 4> MemOps;
+  SmallVector<SDValue, 8> MemOps;
+  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -908,6 +1077,42 @@ const {
           RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
         RegInfo.addLiveIn(VA.getLocReg(), VReg);
         InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+      // Single Vector
+      } else if ((RegVT == MVT::v8i64 || RegVT == MVT::v16i32 ||
+                  RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VectorRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+    } else if (UseHVX && UseHVXDbl &&
+               ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+                 RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VectorRegs128BRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+
+      // Double Vector
+      } else if ((RegVT == MVT::v16i64 || RegVT == MVT::v32i32 ||
+                  RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecDblRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (UseHVX && UseHVXDbl &&
+                ((RegVT == MVT::v32i64 || RegVT == MVT::v64i32 ||
+                  RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecDblRegs128BRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
+        assert(0 && "need to support VecPred regs");
+        unsigned VReg =
+          RegInfo.createVirtualRegister(&Hexagon::VecPredRegsRegClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
       } else {
         assert (0);
       }
@@ -1056,8 +1261,8 @@ SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-SDValue HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG)
-      const {
+SDValue
+HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue PredOp = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
   EVT OpVT = Op1.getValueType();
@@ -1163,16 +1368,33 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   EVT ValTy = Op.getValueType();
-  SDLoc dl(Op);
-  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  SDValue Res;
-  if (CP->isMachineConstantPoolEntry())
-    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), ValTy,
-                                    CP->getAlignment());
+  ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
+  unsigned Align = CPN->getAlignment();
+  Reloc::Model RM = HTM.getRelocationModel();
+  unsigned char TF = (RM == Reloc::PIC_) ? HexagonII::MO_PCREL : 0;
+
+  SDValue T;
+  if (CPN->isMachineConstantPoolEntry())
+    T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
   else
-    Res = DAG.getTargetConstantPool(CP->getConstVal(), ValTy,
-                                    CP->getAlignment());
-  return DAG.getNode(HexagonISD::CP, dl, ValTy, Res);
+    T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+  if (RM == Reloc::PIC_)
+    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
+  return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
+}
+
+SDValue
+HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  int Idx = cast<JumpTableSDNode>(Op)->getIndex();
+  Reloc::Model RM = HTM.getRelocationModel();
+  if (RM == Reloc::PIC_) {
+    SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+    return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
+  }
+
+  SDValue T = DAG.getTargetJumpTable(Idx, VT);
+  return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
 }
 
 SDValue
@@ -1219,52 +1441,70 @@ HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   return FrameAddr;
 }
 
-SDValue HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op,
-                                                 SelectionDAG& DAG) const {
+SDValue
+HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
   SDLoc dl(Op);
   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
 }
 
 
-SDValue HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op,
-                                                  SelectionDAG &DAG) const {
-  SDValue Result;
-  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+SDValue
+HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
+  auto *GAN = cast<GlobalAddressSDNode>(Op);
   auto PtrVT = getPointerTy(DAG.getDataLayout());
-  Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+  auto *GV = GAN->getGlobal();
+  int64_t Offset = GAN->getOffset();
 
-  const HexagonTargetObjectFile *TLOF =
-      static_cast<const HexagonTargetObjectFile *>(
-          getTargetMachine().getObjFileLowering());
-  if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine())) {
-    return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, Result);
+  auto &HLOF = *HTM.getObjFileLowering();
+  Reloc::Model RM = HTM.getRelocationModel();
+
+  if (RM == Reloc::Static) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
+    if (HLOF.IsGlobalInSmallSection(GV, HTM))
+      return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
+    return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
   }
 
-  return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, Result);
+  bool UsePCRel = GV->hasInternalLinkage() || GV->hasHiddenVisibility() ||
+                  (GV->hasLocalLinkage() && !isa<Function>(GV));
+  if (UsePCRel) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
+                                            HexagonII::MO_PCREL);
+    return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
+  }
+
+  // Use GOT index.
+  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
+  SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
+  return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
 }
 
 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
-void HexagonTargetLowering::promoteLdStType(EVT VT, EVT PromotedLdStVT) {
-  if (VT != PromotedLdStVT) {
-    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::LOAD, VT.getSimpleVT(),
-                      PromotedLdStVT.getSimpleVT());
-
-    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
-    AddPromotedToType(ISD::STORE, VT.getSimpleVT(),
-                      PromotedLdStVT.getSimpleVT());
-  }
-}
-
 SDValue
 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-  SDValue BA_SD =  DAG.getTargetBlockAddress(BA, MVT::i32);
   SDLoc dl(Op);
-  return DAG.getNode(HexagonISD::CONST32_GP, dl,
-                     getPointerTy(DAG.getDataLayout()), BA_SD);
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+  Reloc::Model RM = HTM.getRelocationModel();
+  if (RM == Reloc::Static) {
+    SDValue A =  DAG.getTargetBlockAddress(BA, PtrVT);
+    return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
+  }
+
+  SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
+}
+
+SDValue
+HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
+      const {
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
+                                               HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1272,18 +1512,19 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
 //===----------------------------------------------------------------------===//
 
 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
-                                             const HexagonSubtarget &STI)
+                                             const HexagonSubtarget &ST)
     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
-      Subtarget(STI) {
+      Subtarget(ST) {
   bool IsV4 = !Subtarget.hasV5TOps();
   auto &HRI = *Subtarget.getRegisterInfo();
+  bool UseHVX = Subtarget.useHVXOps();
+  bool UseHVXSgl = Subtarget.useHVXSglOps();
+  bool UseHVXDbl = Subtarget.useHVXDblOps();
 
   setPrefLoopAlignment(4);
   setPrefFunctionAlignment(4);
   setMinFunctionAlignment(2);
   setInsertFencesForAtomic(false);
-  setExceptionPointerRegister(Hexagon::R0);
-  setExceptionSelectorRegister(Hexagon::R1);
   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
 
   if (EnableHexSDNodeSched)
@@ -1320,6 +1561,31 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
   }
 
+  if (Subtarget.hasV60TOps()) {
+    if (Subtarget.useHVXSglOps()) {
+      addRegisterClass(MVT::v64i8,  &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v32i16, &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v16i32, &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v8i64,  &Hexagon::VectorRegsRegClass);
+      addRegisterClass(MVT::v128i8, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v64i16, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v32i32, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v16i64, &Hexagon::VecDblRegsRegClass);
+      addRegisterClass(MVT::v512i1, &Hexagon::VecPredRegsRegClass);
+    } else if (Subtarget.useHVXDblOps()) {
+      addRegisterClass(MVT::v128i8,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v64i16,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v32i32,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v16i64,  &Hexagon::VectorRegs128BRegClass);
+      addRegisterClass(MVT::v256i8,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v128i16, &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v64i32,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v32i64,  &Hexagon::VecDblRegs128BRegClass);
+      addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
+    }
+
+  }
+
   //
   // Handling of scalar operations.
   //
@@ -1336,10 +1602,12 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ConstantFP, MVT::f64, Legal); // Default: expand
 
   setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 
   // Custom legalize GlobalAddress nodes into CONST32.
@@ -1361,11 +1629,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
 
   if (EmitJumpTables)
-    setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+    setMinimumJumpTableEntries(2);
   else
-    setOperationAction(ISD::BR_JT, MVT::Other, Expand);
-  // Increase jump tables cutover to 5, was 4.
-  setMinimumJumpTableEntries(MinimumJumpTables);
+    setMinimumJumpTableEntries(MinimumJumpTables);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 
   // Hexagon has instructions for add/sub with carry. The problem with
   // modeling these instructions is that they produce 2 results: Rdd and Px.
@@ -1420,9 +1687,10 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::MULHS, MVT::i64, Expand);
 
   for (unsigned IntExpOp :
-       {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM,
-        ISD::ROTL, ISD::ROTR, ISD::BSWAP, ISD::SHL_PARTS, ISD::SRA_PARTS,
-        ISD::SRL_PARTS, ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+       { ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
+         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
+         ISD::BSWAP,     ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+         ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
     setOperationAction(IntExpOp, MVT::i32, Expand);
     setOperationAction(IntExpOp, MVT::i64, Expand);
   }
@@ -1475,7 +1743,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
 
   // Set the action for vector operations to "expand", then override it with
   // either "custom" or "legal" for specific cases.
-  static unsigned VectExpOps[] = {
+  static const unsigned VectExpOps[] = {
     // Integer arithmetic:
     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,    ISD::UDIV,
     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
@@ -1539,7 +1807,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
-
+  if (UseHVX) {
+    if (UseHVXSgl) {
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64,  Custom);
+    } else if (UseHVXDbl) {
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32,  Custom);
+      setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64,  Custom);
+    } else {
+      llvm_unreachable("Unrecognized HVX mode");
+    }
+  }
   // Subtarget-specific operation actions.
   //
   if (Subtarget.hasV5TOps()) {
@@ -1586,7 +1868,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
 
     for (ISD::CondCode FPExpCCV4 :
          {ISD::SETOEQ, ISD::SETOGT, ISD::SETOLT, ISD::SETOGE, ISD::SETOLE,
-          ISD::SETUO, ISD::SETO}) {
+          ISD::SETUO,  ISD::SETO}) {
       setCondCodeAction(FPExpCCV4, MVT::f32, Expand);
       setCondCodeAction(FPExpCCV4, MVT::f64, Expand);
     }
@@ -1599,6 +1881,13 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
   }
 
+  if (UseHVXDbl) {
+    for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+    }
+  }
+
   computeRegisterProperties(&HRI);
 
   //
@@ -1720,7 +2009,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
-  case HexagonISD::BR_JT:         return "HexagonISD::BR_JT";
   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
   case HexagonISD::CALLv3nr:      return "HexagonISD::CALLv3nr";
   case HexagonISD::CALLv3:        return "HexagonISD::CALLv3";
@@ -1737,7 +2025,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case HexagonISD::INSERTRP:      return "HexagonISD::INSERTRP";
   case HexagonISD::JT:            return "HexagonISD::JT";
   case HexagonISD::PACKHL:        return "HexagonISD::PACKHL";
-  case HexagonISD::PIC_ADD:       return "HexagonISD::PIC_ADD";
   case HexagonISD::POPCOUNT:      return "HexagonISD::POPCOUNT";
   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
   case HexagonISD::SHUFFEB:       return "HexagonISD::SHUFFEB";
@@ -1754,6 +2041,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case HexagonISD::VCMPWEQ:       return "HexagonISD::VCMPWEQ";
   case HexagonISD::VCMPWGT:       return "HexagonISD::VCMPWGT";
   case HexagonISD::VCMPWGTU:      return "HexagonISD::VCMPWGTU";
+  case HexagonISD::VCOMBINE:      return "HexagonISD::VCOMBINE";
   case HexagonISD::VSHLH:         return "HexagonISD::VSHLH";
   case HexagonISD::VSHLW:         return "HexagonISD::VSHLW";
   case HexagonISD::VSPLATB:       return "HexagonISD::VSPLTB";
@@ -1923,8 +2211,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned Size = VT.getSizeInBits();
 
-  // A vector larger than 64 bits cannot be represented in Hexagon.
-  // Expand will split the vector.
+  // Only handle vectors of 64 bits or shorter.
   if (Size > 64)
     return SDValue();
 
@@ -2058,58 +2345,61 @@ SDValue
 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
                                            SelectionDAG &DAG) const {
   SDLoc dl(Op);
+  bool UseHVX = Subtarget.useHVXOps();
   EVT VT = Op.getValueType();
   unsigned NElts = Op.getNumOperands();
-  SDValue Vec = Op.getOperand(0);
-  EVT VecVT = Vec.getValueType();
-  SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64);
-  SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width,
-                                DAG.getConstant(32, dl, MVT::i64));
-  SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64);
+  SDValue Vec0 = Op.getOperand(0);
+  EVT VecVT = Vec0.getValueType();
+  unsigned Width = VecVT.getSizeInBits();
 
-  ConstantSDNode *W = dyn_cast<ConstantSDNode>(Width);
-  ConstantSDNode *S = dyn_cast<ConstantSDNode>(Shifted);
+  if (NElts == 2) {
+    MVT ST = VecVT.getSimpleVT();
+    // We are trying to concat two v2i16 to a single v4i16, or two v4i8
+    // into a single v8i8.
+    if (ST == MVT::v2i16 || ST == MVT::v4i8)
+      return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0);
 
-  if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) {
-    if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
-      // We are trying to concat two v2i16 to a single v4i16.
-      SDValue Vec0 = Op.getOperand(1);
-      SDValue Combined  = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
-      return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
+    if (UseHVX) {
+      assert((Width ==  64*8 && Subtarget.useHVXSglOps()) ||
+             (Width == 128*8 && Subtarget.useHVXDblOps()));
+      SDValue Vec1 = Op.getOperand(1);
+      MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32;
+      MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32;
+      SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0);
+      SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1);
+      SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0);
+      return DAG.getNode(ISD::BITCAST, dl, VT, VC);
     }
   }
 
-  if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) {
-    if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) {
-      // We are trying to concat two v4i8 to a single v8i8.
-      SDValue Vec0 = Op.getOperand(1);
-      SDValue Combined  = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec);
-      return DAG.getNode(ISD::BITCAST, dl, VT, Combined);
-    }
-  }
+  if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64)
+    return SDValue();
+
+  SDValue C0 = DAG.getConstant(0, dl, MVT::i64);
+  SDValue C32 = DAG.getConstant(32, dl, MVT::i64);
+  SDValue W = DAG.getConstant(Width, dl, MVT::i64);
+  // Create the "width" part of the argument to insert_rp/insertp_rp.
+  SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32);
+  SDValue V = C0;
 
   for (unsigned i = 0, e = NElts; i != e; ++i) {
-    unsigned OpIdx = NElts - i - 1;
-    SDValue Operand = Op.getOperand(OpIdx);
+    unsigned N = NElts-i-1;
+    SDValue OpN = Op.getOperand(N);
 
-    if (VT.getSizeInBits() == 64 &&
-        Operand.getValueType().getSizeInBits() == 32) {
+    if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
       SDValue C = DAG.getConstant(0, dl, MVT::i32);
-      Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
+      OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
     }
-
-    SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64);
-    SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width);
-    SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
-    const SDValue Ops[] = {ConstVal, Operand, Combined};
-
+    SDValue Idx = DAG.getConstant(N, dl, MVT::i64);
+    SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W);
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
     if (VT.getSizeInBits() == 32)
-      ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
+      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
     else
-      ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+      V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
   }
 
-  return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal);
+  return DAG.getNode(ISD::BITCAST, dl, VT, V);
 }
 
 SDValue
@@ -2301,6 +2591,7 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::SHL:
     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
+    case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
       // Frame & Return address. Currently unimplemented.
     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
@@ -2308,8 +2599,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
+    case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
     case ISD::VASTART:              return LowerVASTART(Op, DAG);
-    case ISD::BR_JT:                return LowerBR_JT(Op, DAG);
     // Custom lower some vector loads.
     case ISD::LOAD:                 return LowerLOAD(Op, DAG);
     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
@@ -2321,6 +2612,16 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   }
 }
 
+/// Returns relocation base for the given PIC jumptable.
+SDValue
+HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                SelectionDAG &DAG) const {
+  int Idx = cast<JumpTableSDNode>(Table)->getIndex();
+  EVT VT = Table.getValueType();
+  SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
+  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
+}
+
 MachineBasicBlock *
 HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                    MachineBasicBlock *BB)
@@ -2343,6 +2644,8 @@ HexagonTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
 std::pair<unsigned, const TargetRegisterClass *>
 HexagonTargetLowering::getRegForInlineAsmConstraint(
     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+  bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
+
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':   // R0-R31
@@ -2358,6 +2661,42 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
        case MVT::f64:
          return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
       }
+    case 'q': // q0-q3
+       switch (VT.SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::v1024i1:
+       case MVT::v512i1:
+       case MVT::v32i16:
+       case MVT::v16i32:
+       case MVT::v64i8:
+       case MVT::v8i64:
+         return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+    }
+    case 'v': // V0-V31
+       switch (VT.SimpleTy) {
+       default:
+         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+       case MVT::v16i32:
+       case MVT::v32i16:
+       case MVT::v64i8:
+       case MVT::v8i64:
+         return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+       case MVT::v32i32:
+       case MVT::v64i16:
+       case MVT::v16i64:
+       case MVT::v128i8:
+         if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+           return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+         else
+           return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+       case MVT::v256i8:
+       case MVT::v128i16:
+       case MVT::v64i32:
+       case MVT::v32i64:
+         return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+       }
+
     default:
       llvm_unreachable("Unknown asm register class");
     }
@@ -2397,6 +2736,14 @@ bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   return true;
 }
 
+/// Return true if folding a constant offset with the given GlobalAddress is
+/// legal.  It is frequently not legal in PIC relocation models.
+bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
+      const {
+  return HTM.getRelocationModel() == Reloc::Static;
+}
+
+
 /// isLegalICmpImmediate - Return true if the specified immediate is legal
 /// icmp immediate, that is the target has icmp instructions which can compare
 /// a register against the immediate without having to materialize the
@@ -2428,8 +2775,8 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
   // ***************************************************************************
 
   // If this is a tail call via a function pointer, then don't do it!
-  if (!(dyn_cast<GlobalAddressSDNode>(Callee))
-      && !(dyn_cast<ExternalSymbolSDNode>(Callee))) {
+  if (!(isa<GlobalAddressSDNode>(Callee)) &&
+      !(isa<ExternalSymbolSDNode>(Callee))) {
     return false;
   }
 
@@ -2467,6 +2814,41 @@ bool llvm::isPositiveHalfWord(SDNode *N) {
   }
 }
 
+std::pair<const TargetRegisterClass*, uint8_t>
+HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
+      MVT VT) const {
+  const TargetRegisterClass *RRC = nullptr;
+
+  uint8_t Cost = 1;
+  switch (VT.SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(TRI, VT);
+  case MVT::v64i8:
+  case MVT::v32i16:
+  case MVT::v16i32:
+  case MVT::v8i64:
+    RRC = &Hexagon::VectorRegsRegClass;
+    break;
+  case MVT::v128i8:
+  case MVT::v64i16:
+  case MVT::v32i32:
+  case MVT::v16i64:
+    if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
+        Subtarget.useHVXDblOps())
+      RRC = &Hexagon::VectorRegs128BRegClass;
+    else
+      RRC = &Hexagon::VecDblRegsRegClass;
+    break;
+  case MVT::v256i8:
+  case MVT::v128i16:
+  case MVT::v64i32:
+  case MVT::v32i64:
+    RRC = &Hexagon::VecDblRegs128BRegClass;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
       AtomicOrdering Ord) const {
   BasicBlock *BB = Builder.GetInsertBlock();
@@ -2498,13 +2880,15 @@ Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   return Ext;
 }
 
-bool HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   // Do not expand loads and stores that don't exceed 64 bits.
-  return LI->getType()->getPrimitiveSizeInBits() > 64;
+  return LI->getType()->getPrimitiveSizeInBits() > 64
+             ? AtomicExpansionKind::LLOnly
+             : AtomicExpansionKind::None;
 }
 
 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
   // Do not expand loads and stores that don't exceed 64 bits.
   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
 }
-
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 2642abffaddd..bf378b922220 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -35,16 +35,14 @@ bool isPositiveHalfWord(SDNode *N);
       ALLOCA,
       ARGEXTEND,
 
-      PIC_ADD,
-      AT_GOT,
-      AT_PCREL,
+      AT_GOT,      // Index in GOT.
+      AT_PCREL,    // Offset relative to PC.
 
       CALLv3,      // A V3+ call instruction.
       CALLv3nr,    // A V3+ call instruction that doesn't return.
       CALLR,
 
       RET_FLAG,    // Return with a flag operand.
-      BR_JT,       // Branch through jump table.
       BARRIER,     // Memory barrier.
       JT,          // Jump table.
       CP,          // Constant pool.
@@ -80,6 +78,7 @@ bool isPositiveHalfWord(SDNode *N);
       INSERTRP,
       EXTRACTU,
       EXTRACTURP,
+      VCOMBINE,
       TC_RETURN,
       EH_RETURN,
       DCFETCH,
@@ -127,7 +126,6 @@ bool isPositiveHalfWord(SDNode *N);
     SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_LABEL(SDValue Op, SelectionDAG &DAG) const;
@@ -137,6 +135,7 @@ bool isPositiveHalfWord(SDNode *N);
         SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override;
     SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
 
     SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
         SmallVectorImpl<SDValue> &InVals) const override;
@@ -163,8 +162,23 @@ bool isPositiveHalfWord(SDNode *N);
     MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
         MachineBasicBlock *BB) const override;
 
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+      return Hexagon::R0;
+    }
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+      return Hexagon::R1;
+    }
+
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
                            EVT VT) const override {
       if (!VT.isVector())
@@ -200,6 +214,10 @@ bool isPositiveHalfWord(SDNode *N);
     /// TODO: Handle pre/postinc as well.
     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
                                Type *Ty, unsigned AS) const override;
+    /// Return true if folding a constant offset with the given GlobalAddress
+    /// is legal.  It is frequently not legal in PIC relocation models.
+    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
     bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -208,20 +226,26 @@ bool isPositiveHalfWord(SDNode *N);
     /// the immediate into a register.
     bool isLegalICmpImmediate(int64_t Imm) const override;
 
+    /// Returns relocation base for the given PIC jumptable.
+    SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG)
+                                     const override;
+
     // Handling of atomic RMW instructions.
-    bool hasLoadLinkedStoreConditional() const override {
-      return true;
-    }
     Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
         AtomicOrdering Ord) const override;
     Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
         Value *Addr, AtomicOrdering Ord) const override;
-    bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
+    AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    AtomicRMWExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI)
-        const override {
-      return AtomicRMWExpansionKind::LLSC;
+    AtomicExpansionKind
+    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {
+      return AtomicExpansionKind::LLSC;
     }
+
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
+        const override;
   };
 } // end namespace llvm
 
diff --git a/lib/Target/Hexagon/HexagonInstrAlias.td b/lib/Target/Hexagon/HexagonInstrAlias.td
new file mode 100644
index 000000000000..5a1a69b40d4d
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -0,0 +1,462 @@
+//==- HexagonInstrAlias.td - Hexagon Instruction Aliases ---*- tablegen -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//                     Hexagon Instruction Mappings
+//===----------------------------------------------------------------------===//
+
+
+def : InstAlias<"memb({GP}+#$addr) = $Nt.new",
+                (S2_storerbnewgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.new",
+                (S2_storerhnewgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt.new",
+                (S2_storerinewgp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memb({GP}+#$addr) = $Nt",
+                (S2_storerbgp u16_0Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt",
+                (S2_storerhgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memh({GP}+#$addr) = $Nt.h",
+                (S2_storerfgp u16_1Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memw({GP}+#$addr) = $Nt",
+                (S2_storerigp u16_2Imm:$addr, IntRegs:$Nt)>;
+def : InstAlias<"memd({GP}+#$addr) = $Nt",
+                (S2_storerdgp u16_3Imm:$addr, DoubleRegs:$Nt)>;
+
+def : InstAlias<"$Nt = memb({GP}+#$addr)",
+                (L2_loadrbgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memub({GP}+#$addr)",
+                (L2_loadrubgp IntRegs:$Nt, u16_0Imm:$addr)>;
+def : InstAlias<"$Nt = memh({GP}+#$addr)",
+                (L2_loadrhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memuh({GP}+#$addr)",
+                (L2_loadruhgp IntRegs:$Nt, u16_1Imm:$addr)>;
+def : InstAlias<"$Nt = memw({GP}+#$addr)",
+                (L2_loadrigp IntRegs:$Nt, u16_2Imm:$addr)>;
+def : InstAlias<"$Nt = memd({GP}+#$addr)",
+                (L2_loadrdgp DoubleRegs:$Nt, u16_3Imm:$addr)>;
+
+// Alias of: memXX($Rs+#XX) = $Rt to memXX($Rs) = $Rt
+def : InstAlias<"memb($Rs) = $Rt",
+      (S2_storerb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt",
+      (S2_storerh_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.h",
+      (S2_storerf_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt",
+      (S2_storeri_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = $Rt.new",
+      (S2_storerbnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memh($Rs) = $Rt.new",
+      (S2_storerhnew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memw($Rs) = $Rt.new",
+      (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"memb($Rs) = #$S8",
+      (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memh($Rs) = #$S8",
+      (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memw($Rs) = #$S8",
+      (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+
+def : InstAlias<"memd($Rs) = $Rtt",
+      (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"memb($Rs) = setbit(#$U5)",
+      (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = setbit(#$U5)",
+      (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = setbit(#$U5)",
+      (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memb($Rs) = clrbit(#$U5)",
+      (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memh($Rs) = clrbit(#$U5)",
+      (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+def : InstAlias<"memw($Rs) = clrbit(#$U5)",
+      (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+
+// Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
+def : InstAlias<"$Rd = memb($Rs)",
+      (L2_loadrb_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memub($Rs)",
+      (L2_loadrub_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memh($Rs)",
+      (L2_loadrh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memuh($Rs)",
+      (L2_loadruh_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memw($Rs)",
+      (L2_loadri_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memd($Rs)",
+      (L2_loadrd_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = memubh($Rs)",
+      (L2_loadbzw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memubh($Rs)",
+      (L2_loadbzw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rd = membh($Rs)",
+      (L2_loadbsw2_io IntRegs:$Rd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = membh($Rs)",
+      (L2_loadbsw4_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memb_fifo($Rs)",
+      (L2_loadalignb_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"$Rdd = memh_fifo($Rs)",
+      (L2_loadalignh_io DoubleRegs:$Rdd, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) $Rd = memXX($Rs + #$u6_X)
+//       to: if ($Pt) $Rd = memXX($Rs)
+def : InstAlias<"if ($Pt) $Rd = memb($Rs)",
+      (L2_ploadrbt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memub($Rs)",
+      (L2_ploadrubt_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memh($Rs)",
+      (L2_ploadrht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memuh($Rs)",
+      (L2_ploadruht_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rd = memw($Rs)",
+      (L2_ploadrit_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt) $Rdd = memd($Rs)",
+      (L2_ploadrdt_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if ($Pt) memXX($Rs + #$u6_X) = $Rt
+//       to: if ($Pt) memXX($Rs) = $Rt
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt",
+      (S2_pstorerbt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt",
+      (S2_pstorerht_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.h",
+      (S2_pstorerft_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt",
+      (S2_pstorerit_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memd($Rs) = $Rtt",
+      (S2_pstorerdt_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = $Rt.new",
+      (S2_pstorerbnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = $Rt.new",
+      (S2_pstorerhnewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = $Rt.new",
+      (S2_pstorerinewt_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = $Rt.new",
+      (S4_pstorerbnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = $Rt.new",
+      (S4_pstorerhnewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = $Rt.new",
+      (S4_pstorerinewtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+
+// Alias of: if (!$Pt) $Rd = memXX($Rs + #$u6_X)
+//       to: if (!$Pt) $Rd = memXX($Rs)
+def : InstAlias<"if (!$Pt) $Rd = memb($Rs)",
+      (L2_ploadrbf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memub($Rs)",
+      (L2_ploadrubf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memh($Rs)",
+      (L2_ploadrhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memuh($Rs)",
+      (L2_ploadruhf_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rd = memw($Rs)",
+      (L2_ploadrif_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt) $Rdd = memd($Rs)",
+      (L2_ploadrdf_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+// Alias of: if (!$Pt) memXX($Rs + #$u6_X) = $Rt
+//       to: if (!$Pt) memXX($Rs) = $Rt
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt",
+      (S2_pstorerbf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt",
+      (S2_pstorerhf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.h",
+      (S2_pstorerff_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt",
+      (S2_pstorerif_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memd($Rs) = $Rtt",
+      (S2_pstorerdf_io PredRegs:$Pt, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = $Rt.new",
+      (S2_pstorerbnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = $Rt.new",
+      (S2_pstorerhnewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = $Rt.new",
+      (S2_pstorerinewf_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = $Rt.new",
+      (S4_pstorerbnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = $Rt.new",
+      (S4_pstorerhnewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
+      (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
+      (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
+      (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
+      (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
+      (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
+      (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
+      (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
+      (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
+      (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
+      (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
+      (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
+      (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
+      (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+
+// Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
+//       to: memXX($Rs) |= $Rt
+def : InstAlias<"memb($Rs) &= $Rt",
+      (L4_and_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) |= $Rt",
+      (L4_or_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += $Rt",
+      (L4_add_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= $Rt",
+      (L4_sub_memopb_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) += #$U5",
+      (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memb($Rs) -= #$U5",
+      (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) &= $Rt",
+      (L4_and_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) |= $Rt",
+      (L4_or_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += $Rt",
+      (L4_add_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= $Rt",
+      (L4_sub_memoph_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) += #$U5",
+      (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memh($Rs) -= #$U5",
+      (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) &= $Rt",
+      (L4_and_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) |= $Rt",
+      (L4_or_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += $Rt",
+      (L4_add_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= $Rt",
+      (L4_sub_memopw_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) += #$U5",
+      (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+def : InstAlias<"memw($Rs) -= #$U5",
+      (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      Requires<[UseMEMOP]>;
+
+//
+// Alias of: if ($Pv.new) memX($Rs) = $Rt
+//       to: if (p3.new) memX(r17 + #0) = $Rt
+def : InstAlias<"if ($Pv.new) memb($Rs) = $Rt",
+      (S4_pstorerbtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt",
+      (S4_pstorerhtnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memh($Rs) = $Rt.h",
+      (S4_pstorerftnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memw($Rs) = $Rt",
+      (S4_pstoreritnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if ($Pv.new) memd($Rs) = $Rtt",
+      (S4_pstorerdtnew_io
+       PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memb($Rs) = $Rt",
+      (S4_pstorerbfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt",
+      (S4_pstorerhfnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memh($Rs) = $Rt.h",
+      (S4_pstorerffnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memw($Rs) = $Rt",
+      (S4_pstorerifnew_io PredRegs:$Pv, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
+
+def : InstAlias<"if (!$Pv.new) memd($Rs) = $Rtt",
+      (S4_pstorerdfnew_io
+       PredRegs:$Pv, IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
+
+//
+// Alias of: if ($Pt.new) $Rd = memub($Rs) -- And if (!$Pt.new) ...
+//       to: if ($Pt.new) $Rd = memub($Rs + #$u6_0)
+def : InstAlias<"if ($Pt.new) $Rd = memub($Rs)",
+      (L2_ploadrubtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memb($Rs)",
+      (L2_ploadrbtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memh($Rs)",
+      (L2_ploadrhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memuh($Rs)",
+      (L2_ploadruhtnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rd = memw($Rs)",
+      (L2_ploadritnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if ($Pt.new) $Rdd = memd($Rs)",
+      (L2_ploadrdtnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memub($Rs)",
+      (L2_ploadrubfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memb($Rs)",
+      (L2_ploadrbfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memh($Rs)",
+      (L2_ploadrhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memuh($Rs)",
+      (L2_ploadruhfnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rd = memw($Rs)",
+      (L2_ploadrifnew_io IntRegs:$Rd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"if (!$Pt.new) $Rdd = memd($Rs)",
+      (L2_ploadrdfnew_io DoubleRegs:$Rdd, PredRegs:$Pt, IntRegs:$Rs, 0), 0>;
+
+def : InstAlias<"dcfetch($Rs)",
+      (Y2_dcfetchbo IntRegs:$Rs, 0), 0>;
+
+// Alias of some insn mappings, others must be handled by the parser
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+      (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+      (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
+// Rd=neg(Rs) is aliased to Rd=sub(#0,Rs)
+def : InstAlias<"$Rd = neg($Rs)",
+      (A2_subri IntRegs:$Rd, 0, IntRegs:$Rs), 0>;
+
+def : InstAlias<"m0 = $Rs", (A2_tfrrcr C6, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m0", (A2_tfrcrr IntRegs:$Rd, C6)>;
+def : InstAlias<"m1 = $Rs", (A2_tfrrcr C7, IntRegs:$Rs)>;
+def : InstAlias<"$Rd = m1", (A2_tfrcrr IntRegs:$Rd, C7)>;
+
+def : InstAlias<"$Pd = $Ps",
+      (C2_or PredRegs:$Pd, PredRegs:$Ps, PredRegs:$Ps), 0>;
+
+def : InstAlias<"$Rdd = vaddb($Rss, $Rtt)",
+      (A2_vaddub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 1>;
+
+def : InstAlias<"$Rdd = vsubb($Rss,$Rtt)",
+      (A2_vsubub DoubleRegs:$Rdd, DoubleRegs:$Rss, DoubleRegs:$Rtt), 0>;
+
+def : InstAlias<"$Rd = mpyui($Rs,$Rt)",
+      (M2_mpyi IntRegs:$Rd, IntRegs:$Rs, IntRegs:$Rt), 0>;
+
+// Assembler mapped insns: cmp.lt(a,b) -> cmp.gt(b,a)
+def : InstAlias<"$Pd=cmp.lt($Rs, $Rt)",
+      (C2_cmpgt PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+def : InstAlias<"$Pd=cmp.ltu($Rs, $Rt)",
+      (C2_cmpgtu PredRegs:$Pd, IntRegs:$Rt, IntRegs:$Rs), 0>;
+
diff --git a/lib/Target/Hexagon/HexagonInstrEnc.td b/lib/Target/Hexagon/HexagonInstrEnc.td
new file mode 100644
index 000000000000..280832fd167f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrEnc.td
@@ -0,0 +1,1019 @@
+class Enc_COPROC_VX_3op_v<bits<15> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { opc{14-4}, src2};
+  let Inst{13-0} = { opc{3}, src1, opc{2-0}, dst};
+}
+
+class V6_vtmpyb_enc : Enc_COPROC_VX_3op_v<0b000110010000000>;
+class V6_vtmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000001>;
+class V6_vdmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110010000010>;
+class V6_vrmpyub_enc : Enc_COPROC_VX_3op_v<0b000110010000011>;
+class V6_vrmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000100>;
+class V6_vdsaduh_enc : Enc_COPROC_VX_3op_v<0b000110010000101>;
+class V6_vdmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010000110>;
+class V6_vdmpybus_dv_enc : Enc_COPROC_VX_3op_v<0b000110010000111>;
+class V6_vtmpyb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001000>;
+class V6_vtmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001001>;
+class V6_vtmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001010>;
+class V6_vdmpyhb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001011>;
+class V6_vrmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001100>;
+class V6_vrmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001101>;
+class V6_vdmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001110>;
+class V6_vdmpybus_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010001111>;
+class V6_vdmpyhsusat_enc : Enc_COPROC_VX_3op_v<0b000110010010000>;
+class V6_vdmpyhsuisat_enc : Enc_COPROC_VX_3op_v<0b000110010010001>;
+class V6_vdmpyhsat_enc : Enc_COPROC_VX_3op_v<0b000110010010010>;
+class V6_vdmpyhisat_enc : Enc_COPROC_VX_3op_v<0b000110010010011>;
+class V6_vdmpyhb_dv_enc : Enc_COPROC_VX_3op_v<0b000110010010100>;
+class V6_vmpybus_enc : Enc_COPROC_VX_3op_v<0b000110010010101>;
+class V6_vmpabus_enc : Enc_COPROC_VX_3op_v<0b000110010010110>;
+class V6_vmpahb_enc : Enc_COPROC_VX_3op_v<0b000110010010111>;
+class V6_vdmpyhsusat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011000>;
+class V6_vdmpyhsuisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011001>;
+class V6_vdmpyhisat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011010>;
+class V6_vdmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011011>;
+class V6_vdmpyhb_dv_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011100>;
+class V6_vmpybus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011101>;
+class V6_vmpabus_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011110>;
+class V6_vmpahb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010011111>;
+class V6_vmpyh_enc : Enc_COPROC_VX_3op_v<0b000110010100000>;
+class V6_vmpyhss_enc : Enc_COPROC_VX_3op_v<0b000110010100001>;
+class V6_vmpyhsrs_enc : Enc_COPROC_VX_3op_v<0b000110010100010>;
+class V6_vmpyuh_enc : Enc_COPROC_VX_3op_v<0b000110010100011>;
+class V6_vmpyhsat_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101000>;
+class V6_vmpyuh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101001>;
+class V6_vmpyiwb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101010>;
+class V6_vmpyiwh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010101011>;
+class V6_vmpyihb_enc : Enc_COPROC_VX_3op_v<0b000110010110000>;
+class V6_vror_enc : Enc_COPROC_VX_3op_v<0b000110010110001>;
+class V6_vasrw_enc : Enc_COPROC_VX_3op_v<0b000110010110101>;
+class V6_vasrh_enc : Enc_COPROC_VX_3op_v<0b000110010110110>;
+class V6_vaslw_enc : Enc_COPROC_VX_3op_v<0b000110010110111>;
+class V6_vdsaduh_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111000>;
+class V6_vmpyihb_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111001>;
+class V6_vaslw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111010>;
+class V6_vasrw_acc_enc : Enc_COPROC_VX_3op_v<0b000110010111101>;
+class V6_vaslh_enc : Enc_COPROC_VX_3op_v<0b000110011000000>;
+class V6_vlsrw_enc : Enc_COPROC_VX_3op_v<0b000110011000001>;
+class V6_vlsrh_enc : Enc_COPROC_VX_3op_v<0b000110011000010>;
+class V6_vmpyiwh_enc : Enc_COPROC_VX_3op_v<0b000110011000111>;
+class V6_vmpyub_acc_enc : Enc_COPROC_VX_3op_v<0b000110011001000>;
+class V6_vmpyiwb_enc : Enc_COPROC_VX_3op_v<0b000110011010000>;
+class V6_vtmpyhb_enc : Enc_COPROC_VX_3op_v<0b000110011010100>;
+class V6_vmpyub_enc : Enc_COPROC_VX_3op_v<0b000110011100000>;
+class V6_vrmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000000>;
+class V6_vrmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000001>;
+class V6_vrmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000010>;
+class V6_vdmpyhvsat_enc : Enc_COPROC_VX_3op_v<0b000111000000011>;
+class V6_vmpybv_enc : Enc_COPROC_VX_3op_v<0b000111000000100>;
+class V6_vmpyubv_enc : Enc_COPROC_VX_3op_v<0b000111000000101>;
+class V6_vmpybusv_enc : Enc_COPROC_VX_3op_v<0b000111000000110>;
+class V6_vmpyhv_enc : Enc_COPROC_VX_3op_v<0b000111000000111>;
+class V6_vrmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001000>;
+class V6_vrmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001001>;
+class V6_vrmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001010>;
+class V6_vdmpyhvsat_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001011>;
+class V6_vmpybv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001100>;
+class V6_vmpyubv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001101>;
+class V6_vmpybusv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001110>;
+class V6_vmpyhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000001111>;
+class V6_vmpyuhv_enc : Enc_COPROC_VX_3op_v<0b000111000010000>;
+class V6_vmpyhvsrs_enc : Enc_COPROC_VX_3op_v<0b000111000010001>;
+class V6_vmpyhus_enc : Enc_COPROC_VX_3op_v<0b000111000010010>;
+class V6_vmpabusv_enc : Enc_COPROC_VX_3op_v<0b000111000010011>;
+class V6_vmpyih_enc : Enc_COPROC_VX_3op_v<0b000111000010100>;
+class V6_vand_enc : Enc_COPROC_VX_3op_v<0b000111000010101>;
+class V6_vor_enc : Enc_COPROC_VX_3op_v<0b000111000010110>;
+class V6_vxor_enc : Enc_COPROC_VX_3op_v<0b000111000010111>;
+class V6_vmpyuhv_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011000>;
+class V6_vmpyhus_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011001>;
+class V6_vmpyih_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011100>;
+class V6_vmpyiewuh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000011101>;
+class V6_vmpyowh_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011110>;
+class V6_vmpyowh_rnd_sacc_enc : Enc_COPROC_VX_3op_v<0b000111000011111>;
+class V6_vaddw_enc : Enc_COPROC_VX_3op_v<0b000111000100000>;
+class V6_vaddubsat_enc : Enc_COPROC_VX_3op_v<0b000111000100001>;
+class V6_vadduhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100010>;
+class V6_vaddhsat_enc : Enc_COPROC_VX_3op_v<0b000111000100011>;
+class V6_vaddwsat_enc : Enc_COPROC_VX_3op_v<0b000111000100100>;
+class V6_vsubb_enc : Enc_COPROC_VX_3op_v<0b000111000100101>;
+class V6_vsubh_enc : Enc_COPROC_VX_3op_v<0b000111000100110>;
+class V6_vsubw_enc : Enc_COPROC_VX_3op_v<0b000111000100111>;
+class V6_vmpyiewh_acc_enc : Enc_COPROC_VX_3op_v<0b000111000101000>;
+class V6_vsububsat_enc : Enc_COPROC_VX_3op_v<0b000111000110000>;
+class V6_vsubuhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110001>;
+class V6_vsubhsat_enc : Enc_COPROC_VX_3op_v<0b000111000110010>;
+class V6_vsubwsat_enc : Enc_COPROC_VX_3op_v<0b000111000110011>;
+class V6_vaddb_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110100>;
+class V6_vaddh_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110101>;
+class V6_vaddw_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110110>;
+class V6_vaddubsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111000110111>;
+class V6_vadduhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000000>;
+class V6_vaddhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000001>;
+class V6_vaddwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000010>;
+class V6_vsubb_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000011>;
+class V6_vsubh_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000100>;
+class V6_vsubw_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000101>;
+class V6_vsububsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000110>;
+class V6_vsubuhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001000111>;
+class V6_vsubhsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010000>;
+class V6_vsubwsat_dv_enc : Enc_COPROC_VX_3op_v<0b000111001010001>;
+class V6_vaddubh_enc : Enc_COPROC_VX_3op_v<0b000111001010010>;
+class V6_vadduhw_enc : Enc_COPROC_VX_3op_v<0b000111001010011>;
+class V6_vaddhw_enc : Enc_COPROC_VX_3op_v<0b000111001010100>;
+class V6_vsububh_enc : Enc_COPROC_VX_3op_v<0b000111001010101>;
+class V6_vsubuhw_enc : Enc_COPROC_VX_3op_v<0b000111001010110>;
+class V6_vsubhw_enc : Enc_COPROC_VX_3op_v<0b000111001010111>;
+class V6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b000111001100000>;
+class V6_vabsdiffh_enc : Enc_COPROC_VX_3op_v<0b000111001100001>;
+class V6_vabsdiffuh_enc : Enc_COPROC_VX_3op_v<0b000111001100010>;
+class V6_vabsdiffw_enc : Enc_COPROC_VX_3op_v<0b000111001100011>;
+class V6_vavgub_enc : Enc_COPROC_VX_3op_v<0b000111001100100>;
+class V6_vavguh_enc : Enc_COPROC_VX_3op_v<0b000111001100101>;
+class V6_vavgh_enc : Enc_COPROC_VX_3op_v<0b000111001100110>;
+class V6_vavgw_enc : Enc_COPROC_VX_3op_v<0b000111001100111>;
+class V6_vnavgub_enc : Enc_COPROC_VX_3op_v<0b000111001110000>;
+class V6_vnavgh_enc : Enc_COPROC_VX_3op_v<0b000111001110001>;
+class V6_vnavgw_enc : Enc_COPROC_VX_3op_v<0b000111001110010>;
+class V6_vavgubrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110011>;
+class V6_vavguhrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110100>;
+class V6_vavghrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110101>;
+class V6_vavgwrnd_enc : Enc_COPROC_VX_3op_v<0b000111001110110>;
+class V6_vmpabuuv_enc : Enc_COPROC_VX_3op_v<0b000111001110111>;
+class V6_vminub_enc : Enc_COPROC_VX_3op_v<0b000111110000001>;
+class V6_vminuh_enc : Enc_COPROC_VX_3op_v<0b000111110000010>;
+class V6_vminh_enc : Enc_COPROC_VX_3op_v<0b000111110000011>;
+class V6_vminw_enc : Enc_COPROC_VX_3op_v<0b000111110000100>;
+class V6_vmaxub_enc : Enc_COPROC_VX_3op_v<0b000111110000101>;
+class V6_vmaxuh_enc : Enc_COPROC_VX_3op_v<0b000111110000110>;
+class V6_vmaxh_enc : Enc_COPROC_VX_3op_v<0b000111110000111>;
+class V6_vmaxw_enc : Enc_COPROC_VX_3op_v<0b000111110010000>;
+class V6_vdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010001>;
+class V6_vrdelta_enc : Enc_COPROC_VX_3op_v<0b000111110010011>;
+class V6_vdealb4w_enc : Enc_COPROC_VX_3op_v<0b000111110010111>;
+class V6_vmpyowh_rnd_enc : Enc_COPROC_VX_3op_v<0b000111110100000>;
+class V6_vshuffeb_enc : Enc_COPROC_VX_3op_v<0b000111110100001>;
+class V6_vshuffob_enc : Enc_COPROC_VX_3op_v<0b000111110100010>;
+class V6_vshufeh_enc : Enc_COPROC_VX_3op_v<0b000111110100011>;
+class V6_vshufoh_enc : Enc_COPROC_VX_3op_v<0b000111110100100>;
+class V6_vshufoeh_enc : Enc_COPROC_VX_3op_v<0b000111110100101>;
+class V6_vshufoeb_enc : Enc_COPROC_VX_3op_v<0b000111110100110>;
+class V6_vcombine_enc : Enc_COPROC_VX_3op_v<0b000111110100111>;
+class V6_vmpyieoh_enc : Enc_COPROC_VX_3op_v<0b000111110110000>;
+class V6_vsathub_enc : Enc_COPROC_VX_3op_v<0b000111110110010>;
+class V6_vsatwh_enc : Enc_COPROC_VX_3op_v<0b000111110110011>;
+class V6_vroundwh_enc : Enc_COPROC_VX_3op_v<0b000111110110100>;
+class V6_vroundwuh_enc : Enc_COPROC_VX_3op_v<0b000111110110101>;
+class V6_vroundhb_enc : Enc_COPROC_VX_3op_v<0b000111110110110>;
+class V6_vroundhub_enc : Enc_COPROC_VX_3op_v<0b000111110110111>;
+class V6_vasrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010000>;
+class V6_vlsrwv_enc : Enc_COPROC_VX_3op_v<0b000111111010001>;
+class V6_vlsrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010010>;
+class V6_vasrhv_enc : Enc_COPROC_VX_3op_v<0b000111111010011>;
+class V6_vaslwv_enc : Enc_COPROC_VX_3op_v<0b000111111010100>;
+class V6_vaslhv_enc : Enc_COPROC_VX_3op_v<0b000111111010101>;
+class V6_vaddb_enc : Enc_COPROC_VX_3op_v<0b000111111010110>;
+class V6_vaddh_enc : Enc_COPROC_VX_3op_v<0b000111111010111>;
+class V6_vmpyiewuh_enc : Enc_COPROC_VX_3op_v<0b000111111100000>;
+class V6_vmpyiowh_enc : Enc_COPROC_VX_3op_v<0b000111111100001>;
+class V6_vpackeb_enc : Enc_COPROC_VX_3op_v<0b000111111100010>;
+class V6_vpackeh_enc : Enc_COPROC_VX_3op_v<0b000111111100011>;
+class V6_vpackhub_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100101>;
+class V6_vpackhb_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100110>;
+class V6_vpackwuh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111100111>;
+class V6_vpackwh_sat_enc : Enc_COPROC_VX_3op_v<0b000111111110000>;
+class V6_vpackob_enc : Enc_COPROC_VX_3op_v<0b000111111110001>;
+class V6_vpackoh_enc : Enc_COPROC_VX_3op_v<0b000111111110010>;
+class V6_vmpyewuh_enc : Enc_COPROC_VX_3op_v<0b000111111110101>;
+class V6_vmpyowh_enc : Enc_COPROC_VX_3op_v<0b000111111110111>;
+class V6_extractw_enc : Enc_COPROC_VX_3op_v<0b100100100000001>;
+class M6_vabsdiffub_enc : Enc_COPROC_VX_3op_v<0b111010001010000>;
+class M6_vabsdiffb_enc : Enc_COPROC_VX_3op_v<0b111010001110000>;
+
+class Enc_COPROC_VX_cmp<bits<13> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011, opc{12-7}, src2{4-0} };
+  let Inst{13-0} = { opc{6}, src1{4-0}, opc{5-0}, dst{1-0} };
+}
+
+class V6_vandvrt_acc_enc : Enc_COPROC_VX_cmp<0b0010111100000>;
+class V6_vandvrt_enc : Enc_COPROC_VX_cmp<0b0011010010010>;
+class V6_veqb_and_enc : Enc_COPROC_VX_cmp<0b1001001000000>;
+class V6_veqh_and_enc : Enc_COPROC_VX_cmp<0b1001001000001>;
+class V6_veqw_and_enc : Enc_COPROC_VX_cmp<0b1001001000010>;
+class V6_vgtb_and_enc : Enc_COPROC_VX_cmp<0b1001001000100>;
+class V6_vgth_and_enc : Enc_COPROC_VX_cmp<0b1001001000101>;
+class V6_vgtw_and_enc : Enc_COPROC_VX_cmp<0b1001001000110>;
+class V6_vgtub_and_enc : Enc_COPROC_VX_cmp<0b1001001001000>;
+class V6_vgtuh_and_enc : Enc_COPROC_VX_cmp<0b1001001001001>;
+class V6_vgtuw_and_enc : Enc_COPROC_VX_cmp<0b1001001001010>;
+class V6_veqb_or_enc : Enc_COPROC_VX_cmp<0b1001001010000>;
+class V6_veqh_or_enc : Enc_COPROC_VX_cmp<0b1001001010001>;
+class V6_veqw_or_enc : Enc_COPROC_VX_cmp<0b1001001010010>;
+class V6_vgtb_or_enc : Enc_COPROC_VX_cmp<0b1001001010100>;
+class V6_vgth_or_enc : Enc_COPROC_VX_cmp<0b1001001010101>;
+class V6_vgtw_or_enc : Enc_COPROC_VX_cmp<0b1001001010110>;
+class V6_vgtub_or_enc : Enc_COPROC_VX_cmp<0b1001001011000>;
+class V6_vgtuh_or_enc : Enc_COPROC_VX_cmp<0b1001001011001>;
+class V6_vgtuw_or_enc : Enc_COPROC_VX_cmp<0b1001001011010>;
+class V6_veqb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100000>;
+class V6_veqh_xor_enc : Enc_COPROC_VX_cmp<0b1001001100001>;
+class V6_veqw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100010>;
+class V6_vgtb_xor_enc : Enc_COPROC_VX_cmp<0b1001001100100>;
+class V6_vgth_xor_enc : Enc_COPROC_VX_cmp<0b1001001100101>;
+class V6_vgtw_xor_enc : Enc_COPROC_VX_cmp<0b1001001100110>;
+class V6_vgtub_xor_enc : Enc_COPROC_VX_cmp<0b1001001101000>;
+class V6_vgtuh_xor_enc : Enc_COPROC_VX_cmp<0b1001001101001>;
+class V6_vgtuw_xor_enc : Enc_COPROC_VX_cmp<0b1001001101010>;
+class V6_veqb_enc : Enc_COPROC_VX_cmp<0b1111000000000>;
+class V6_veqh_enc : Enc_COPROC_VX_cmp<0b1111000000001>;
+class V6_veqw_enc : Enc_COPROC_VX_cmp<0b1111000000010>;
+class V6_vgtb_enc : Enc_COPROC_VX_cmp<0b1111000000100>;
+class V6_vgth_enc : Enc_COPROC_VX_cmp<0b1111000000101>;
+class V6_vgtw_enc : Enc_COPROC_VX_cmp<0b1111000000110>;
+class V6_vgtub_enc : Enc_COPROC_VX_cmp<0b1111000001000>;
+class V6_vgtuh_enc : Enc_COPROC_VX_cmp<0b1111000001001>;
+class V6_vgtuw_enc : Enc_COPROC_VX_cmp<0b1111000001010>;
+
+class Enc_COPROC_VX_p2op<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011110, src1{1-0}, 0b0000, opc{4-3} };
+  let Inst{13-0} = { 1, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vaddbq_enc : Enc_COPROC_VX_p2op<0b01000>;
+class V6_vaddhq_enc : Enc_COPROC_VX_p2op<0b01001>;
+class V6_vaddwq_enc : Enc_COPROC_VX_p2op<0b01010>;
+class V6_vaddbnq_enc : Enc_COPROC_VX_p2op<0b01011>;
+class V6_vaddhnq_enc : Enc_COPROC_VX_p2op<0b01100>;
+class V6_vaddwnq_enc : Enc_COPROC_VX_p2op<0b01101>;
+class V6_vsubbq_enc : Enc_COPROC_VX_p2op<0b01110>;
+class V6_vsubhq_enc : Enc_COPROC_VX_p2op<0b01111>;
+class V6_vsubwq_enc : Enc_COPROC_VX_p2op<0b10000>;
+class V6_vsubbnq_enc : Enc_COPROC_VX_p2op<0b10001>;
+class V6_vsubhnq_enc : Enc_COPROC_VX_p2op<0b10010>;
+class V6_vsubwnq_enc : Enc_COPROC_VX_p2op<0b10011>;
+
+class Enc_COPROC_VX_2op<bits<6> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { 0b00011110000000, opc{5-4} };
+  let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vabsh_enc : Enc_COPROC_VX_2op<0b000000>;
+class V6_vabsh_sat_enc : Enc_COPROC_VX_2op<0b000001>;
+class V6_vabsw_enc : Enc_COPROC_VX_2op<0b000010>;
+class V6_vabsw_sat_enc : Enc_COPROC_VX_2op<0b000011>;
+class V6_vnot_enc : Enc_COPROC_VX_2op<0b000100>;
+class V6_vdealh_enc : Enc_COPROC_VX_2op<0b000110>;
+class V6_vdealb_enc : Enc_COPROC_VX_2op<0b000111>;
+class V6_vunpackob_enc : Enc_COPROC_VX_2op<0b001000>;
+class V6_vunpackoh_enc : Enc_COPROC_VX_2op<0b001001>;
+class V6_vunpackub_enc : Enc_COPROC_VX_2op<0b010000>;
+class V6_vunpackuh_enc : Enc_COPROC_VX_2op<0b010001>;
+class V6_vunpackb_enc : Enc_COPROC_VX_2op<0b010010>;
+class V6_vunpackh_enc : Enc_COPROC_VX_2op<0b010011>;
+class V6_vshuffh_enc : Enc_COPROC_VX_2op<0b010111>;
+class V6_vshuffb_enc : Enc_COPROC_VX_2op<0b100000>;
+class V6_vzb_enc : Enc_COPROC_VX_2op<0b100001>;
+class V6_vzh_enc : Enc_COPROC_VX_2op<0b100010>;
+class V6_vsb_enc : Enc_COPROC_VX_2op<0b100011>;
+class V6_vsh_enc : Enc_COPROC_VX_2op<0b100100>;
+class V6_vcl0w_enc : Enc_COPROC_VX_2op<0b100101>;
+class V6_vpopcounth_enc : Enc_COPROC_VX_2op<0b100110>;
+class V6_vcl0h_enc : Enc_COPROC_VX_2op<0b100111>;
+class V6_vnormamtw_enc : Enc_COPROC_VX_2op<0b110100>;
+class V6_vnormamth_enc : Enc_COPROC_VX_2op<0b110101>;
+class V6_vassign_enc : Enc_COPROC_VX_2op<0b111111>;
+
+class Enc_COPROC_VMEM_vL32_b_ai<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0000>;
+class V6_vL32b_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0001>;
+class V6_vL32b_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0010>;
+class V6_vL32Ub_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b0111>;
+class V6_vL32b_nt_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1000>;
+class V6_vL32b_nt_cur_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1001>;
+class V6_vL32b_nt_tmp_ai_enc : Enc_COPROC_VMEM_vL32_b_ai<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0000>;
+class V6_vL32b_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0001>;
+class V6_vL32b_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0010>;
+class V6_vL32Ub_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b0111>;
+class V6_vL32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1000>;
+class V6_vL32b_nt_cur_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1001>;
+class V6_vL32b_nt_tmp_ai_128B_enc : Enc_COPROC_VMEM_vL32_b_ai_128B<0b1010>;
+
+class Enc_COPROC_VMEM_vS32_b_ai_64B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0000>;
+class V6_vS32Ub_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b0111>;
+class V6_vS32b_nt_ai_enc : Enc_COPROC_VMEM_vS32_b_ai_64B<0b1000>;
+
+class V6_vS32b_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0000>;
+class V6_vS32Ub_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b0111>;
+class V6_vS32b_nt_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_ai_128B<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<4> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{9-6};
+  let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<0>;
+class V6_vS32b_nt_new_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_64B<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<11> src2;
+  bits<4> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{10-7};
+  let Inst{31-16} = { 0b001010000, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2_vector{3}, 0b00, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<0>;
+class V6_vS32b_nt_new_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_ai_128B<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<4> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{9-6};
+  let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class Enc_COPROC_VMEM_vS32_b_pred_ai_128B<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<11> src3;
+  bits<4> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{10-7};
+  let Inst{31-16} = { 0b001010001, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00000>;
+class V6_vS32b_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b00001>;
+class V6_vS32b_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01000>;
+class V6_vS32b_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01001>;
+class V6_vS32Ub_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01110>;
+class V6_vS32Ub_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b01111>;
+class V6_vS32b_nt_qpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10000>;
+class V6_vS32b_nt_nqpred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b10001>;
+class V6_vS32b_nt_pred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11000>;
+class V6_vS32b_nt_npred_ai_enc : Enc_COPROC_VMEM_vS32_b_pred_ai<0b11001>;
+
+class V6_vS32b_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00000>;
+class V6_vS32b_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b00001>;
+class V6_vS32b_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01000>;
+class V6_vS32b_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01001>;
+class V6_vS32Ub_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01110>;
+class V6_vS32Ub_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b01111>;
+class V6_vS32b_nt_qpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10000>;
+class V6_vS32b_nt_nqpred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b10001>;
+class V6_vS32b_nt_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11000>;
+class V6_vS32b_nt_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_ai_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<4> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{9-6};
+  let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0000>;
+class V6_vS32b_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b0101>;
+class V6_vS32b_nt_new_pred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1010>;
+class V6_vS32b_nt_new_npred_ai_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<11> src3;
+  bits<4> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{10-7};
+  let Inst{31-16} = { 0b001010001, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3_vector{3}, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0000>;
+class V6_vS32b_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b0101>;
+class V6_vS32b_nt_new_pred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1010>;
+class V6_vS32b_nt_new_npred_ai_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ai_128B<0b1111>;
+
+// TODO: Change script to generate dst, src1, src2 instead of
+// dst, dst2, src1.
+class Enc_COPROC_VMEM_vL32_b_pi<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0000>;
+class V6_vL32b_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0001>;
+class V6_vL32b_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0010>;
+class V6_vL32Ub_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b0111>;
+class V6_vL32b_nt_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1000>;
+class V6_vL32b_nt_cur_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1001>;
+class V6_vL32b_nt_tmp_pi_enc : Enc_COPROC_VMEM_vL32_b_pi<0b1010>;
+
+class Enc_COPROC_VMEM_vL32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{3}, 0, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0000>;
+class V6_vL32b_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0001>;
+class V6_vL32b_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0010>;
+class V6_vL32Ub_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b0111>;
+class V6_vL32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1000>;
+class V6_vL32b_nt_cur_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1001>;
+class V6_vL32b_nt_tmp_pi_128B_enc : Enc_COPROC_VMEM_vL32_b_pi_128B<0b1010>;
+
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32_b_pi<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+  let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0000>;
+class V6_vS32Ub_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b0111>;
+class V6_vS32b_nt_pi_enc : Enc_COPROC_VMEM_vS32_b_pi<0b1000>;
+
+class Enc_COPROC_VMEM_vS32_b_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+  bits<5> src3;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{3}, 1, src1{4-0} };
+  let Inst{10-0} = {src2_vector{2-0}, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0000>;
+class V6_vS32Ub_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b0111>;
+class V6_vS32b_nt_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pi_128B<0b1000>;
+
+// TODO: Change script to generate src1, src2 and src3 instead of
+// dst, src1, src2.
+class Enc_COPROC_VMEM_vS32b_n_ew_pi<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<9> src2;
+  bits<3> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{8-6};
+  let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<0>;
+class V6_vS32b_nt_new_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi<1>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<10> src2;
+  bits<3> src2_vector;
+  bits<3> src3;
+
+  let src2_vector = src2{9-7};
+  let Inst{31-16} = { 0b001010010, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { 0b000, src2_vector{2-0}, 0b00100, src3{2-0} };
+}
+
+class V6_vS32b_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<0>;
+class V6_vS32b_nt_new_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pi_128B<1>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<9> src3;
+  bits<3> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{8-6};
+  let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00000>;
+class V6_vS32b_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b00001>;
+class V6_vS32b_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01000>;
+class V6_vS32b_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01001>;
+class V6_vS32Ub_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01110>;
+class V6_vS32Ub_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b01111>;
+class V6_vS32b_nt_qpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10000>;
+class V6_vS32b_nt_nqpred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b10001>;
+class V6_vS32b_nt_pred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11000>;
+class V6_vS32b_nt_npred_pi_enc : Enc_COPROC_VMEM_vS32_b_pred_pi<0b11001>;
+
+// TODO: Change script to generate src1, src2,src3 and src4 instead of
+// dst, src1, src2, src3.
+class Enc_COPROC_VMEM_vS32_b_pred_pi_128B<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<3> src3_vector;
+  bits<5> src4;
+
+  let src3_vector = src3{9-7};
+  let Inst{31-16} = { 0b001010011, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00000>;
+class V6_vS32b_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b00001>;
+class V6_vS32b_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01000>;
+class V6_vS32b_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01001>;
+class V6_vS32Ub_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01110>;
+class V6_vS32Ub_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b01111>;
+class V6_vS32b_nt_qpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10000>;
+class V6_vS32b_nt_nqpred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b10001>;
+class V6_vS32b_nt_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11000>;
+class V6_vS32b_nt_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32_b_pred_pi_128B<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<9> src3;
+  bits<3> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{8-6};
+  let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0000>;
+class V6_vS32b_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b0101>;
+class V6_vS32b_nt_new_pred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1010>;
+class V6_vS32b_nt_new_npred_pi_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi<0b1111>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<10> src3;
+  bits<3> src3_vector;
+  bits<3> src4;
+
+  let src3_vector = src3{9-7};
+  let Inst{31-16} = { 0b001010011, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { 0, src1{1-0}, src3_vector{2-0}, 0b01, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0000>;
+class V6_vS32b_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b0101>;
+class V6_vS32b_nt_new_pred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1010>;
+class V6_vS32b_nt_new_npred_pi_128B_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_pi_128B<0b1111>;
+
+class Enc_LD_load_m<bits<13> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<1> src2;
+
+  let Inst{31-16} = { opc{12}, 0, opc{11-10}, 1, opc{9-4}, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b000, opc{3}, 0, opc{2-0}, dst{4-0} };
+}
+
+class V6_vL32b_ppu_enc : Enc_LD_load_m<0b0100110000000>;
+class V6_vL32b_cur_ppu_enc : Enc_LD_load_m<0b0100110000001>;
+class V6_vL32b_tmp_ppu_enc : Enc_LD_load_m<0b0100110000010>;
+class V6_vL32Ub_ppu_enc : Enc_LD_load_m<0b0100110000111>;
+class V6_vL32b_nt_ppu_enc : Enc_LD_load_m<0b0100110100000>;
+class V6_vL32b_nt_cur_ppu_enc : Enc_LD_load_m<0b0100110100001>;
+class V6_vL32b_nt_tmp_ppu_enc : Enc_LD_load_m<0b0100110100010>;
+
+class Enc_COPROC_VMEM_vS32_b_ppu<bits<4> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<1> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b001010110, opc{3}, 1, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b00000, opc{2-0}, src3{4-0} };
+}
+
+class V6_vS32b_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0000>;
+class V6_vS32Ub_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b0111>;
+class V6_vS32b_nt_ppu_enc : Enc_COPROC_VMEM_vS32_b_ppu<0b1000>;
+
+class Enc_COPROC_VMEM_vS32b_new_ppu<bits<1> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<1> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { 0b001010110, opc{0}, 1, src1{4-0} };
+  let Inst{13-0} = { src2{0}, 0b0000000100, src3{2-0} };
+}
+
+class V6_vS32b_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<0>;
+class V6_vS32b_nt_new_ppu_enc : Enc_COPROC_VMEM_vS32b_new_ppu<1>;
+
+class Enc_COPROC_VMEM_vS32_b_pred_ppu<bits<5> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<1> src3;
+  bits<5> src4;
+
+  let Inst{31-16} = { 0b001010111, opc{4-3}, src2{4-0} };
+  let Inst{13-0} = { src3{0}, src1{1-0}, 0b000, opc{2-0}, src4{4-0} };
+}
+
+class V6_vS32b_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00000>;
+class V6_vS32b_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b00001>;
+class V6_vS32b_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01000>;
+class V6_vS32b_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01001>;
+class V6_vS32Ub_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01110>;
+class V6_vS32Ub_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b01111>;
+class V6_vS32b_nt_qpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10000>;
+class V6_vS32b_nt_nqpred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b10001>;
+class V6_vS32b_nt_pred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11000>;
+class V6_vS32b_nt_npred_ppu_enc : Enc_COPROC_VMEM_vS32_b_pred_ppu<0b11001>;
+
+class Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<bits<4> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> src2;
+  bits<1> src3;
+  bits<3> src4;
+
+  let Inst{31-16} = { 0b001010111, opc{3}, 1, src2{4-0} };
+  let Inst{13-0} = { src3{0}, src1{1-0}, 0b00001, opc{2-0}, src4{2-0} };
+}
+
+class V6_vS32b_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0000>;
+class V6_vS32b_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b0101>;
+class V6_vS32b_nt_new_pred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1010>;
+class V6_vS32b_nt_new_npred_ppu_enc : Enc_COPROC_VMEM_vS32b_n_ew_pred_ppu<0b1111>;
+
+
+class Enc_COPROC_VX_4op_i<bits<5> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<1> src3;
+
+  let Inst{31-16} = { 0b00011001, opc{4-2}, src2{4-0} };
+  let Inst{13-0} = { opc{1}, src1{4-0}, 1, opc{0}, src3{0}, dst{4-0} };
+}
+
+class V6_vrmpybusi_enc : Enc_COPROC_VX_4op_i<0b01000>;
+class V6_vrsadubi_enc : Enc_COPROC_VX_4op_i<0b01001>;
+class V6_vrmpybusi_acc_enc : Enc_COPROC_VX_4op_i<0b01010>;
+class V6_vrsadubi_acc_enc : Enc_COPROC_VX_4op_i<0b01011>;
+class V6_vrmpyubi_acc_enc : Enc_COPROC_VX_4op_i<0b01111>;
+class V6_vrmpyubi_enc : Enc_COPROC_VX_4op_i<0b10101>;
+
+class Enc_COPROC_VX_vandqrt<bits<5> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<2> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b00011001, opc{4-3}, 1, src2{4-0} };
+  let Inst{13-0} = { opc{2}, 0b000, src1{1-0}, opc{1-0}, 1, dst{4-0} };
+}
+
+class V6_vandqrt_acc_enc : Enc_COPROC_VX_vandqrt<0b01101>;
+class V6_vandqrt_enc : Enc_COPROC_VX_vandqrt<0b10010>;
+
+class Enc_COPROC_VX_cards<bits<2> opc> : OpcodeHexagon {
+  bits<5> src1;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b00011001111, src3{4-0} };
+  let Inst{13-0} = { 1, src1{4-0}, 0, opc{1-0}, src2{4-0} };
+}
+
+class V6_vshuff_enc : Enc_COPROC_VX_cards<0b01>;
+class V6_vdeal_enc : Enc_COPROC_VX_cards<0b10>;
+
+
+class Enc_COPROC_VX_v_cmov<bits<1> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b0001101000, opc{0}, 0b00000 };
+  let Inst{13-0} = { 0, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vcmov_enc : Enc_COPROC_VX_v_cmov<0>;
+class V6_vncmov_enc : Enc_COPROC_VX_v_cmov<1>;
+
+class Enc_X_p3op<bits<8> opc> : OpcodeHexagon {
+  bits<2> src1;
+  bits<5> dst;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { opc{7-5}, 0b1101, opc{4}, 0, opc{3-2}, src3{4-0} };
+  let Inst{13-0} = { opc{1}, src2{4-0}, opc{0}, src1{1-0}, dst{4-0} };
+}
+
+class V6_vnccombine_enc : Enc_X_p3op<0b00001000>;
+class V6_vccombine_enc : Enc_X_p3op<0b00001100>;
+
+class Enc_COPROC_VX_4op_r<bits<4> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { 0b00011011, src2{4-0}, src3{2-0} };
+  let Inst{13-0} = { opc{3}, src1{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class V6_valignb_enc : Enc_COPROC_VX_4op_r<0b0000>;
+class V6_vlalignb_enc : Enc_COPROC_VX_4op_r<0b0001>;
+class V6_vasrwh_enc : Enc_COPROC_VX_4op_r<0b0010>;
+class V6_vasrwhsat_enc : Enc_COPROC_VX_4op_r<0b0011>;
+class V6_vasrwhrndsat_enc : Enc_COPROC_VX_4op_r<0b0100>;
+class V6_vasrwuhsat_enc : Enc_COPROC_VX_4op_r<0b0101>;
+class V6_vasrhubsat_enc : Enc_COPROC_VX_4op_r<0b0110>;
+class V6_vasrhubrndsat_enc : Enc_COPROC_VX_4op_r<0b0111>;
+class V6_vasrhbrndsat_enc : Enc_COPROC_VX_4op_r<0b1000>;
+class V6_vlutvvb_enc : Enc_COPROC_VX_4op_r<0b1001>;
+class V6_vshuffvdd_enc : Enc_COPROC_VX_4op_r<0b1011>;
+class V6_vdealvdd_enc : Enc_COPROC_VX_4op_r<0b1100>;
+class V6_vlutvvb_oracc_enc : Enc_COPROC_VX_4op_r<0b1101>;
+class V6_vlutvwh_enc : Enc_COPROC_VX_4op_r<0b1110>;
+class V6_vlutvwh_oracc_enc : Enc_COPROC_VX_4op_r<0b1111>;
+
+class Enc_S_3op_valign_i<bits<9> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<3> src3;
+
+  let Inst{31-16} = { opc{8-7}, 0, opc{6-3}, 0b00, opc{2-1}, src2{4-0} };
+  let Inst{13-0} = { opc{0}, src1{4-0}, src3{2-0}, dst{4-0} };
+}
+
+class V6_vlutb_enc : Enc_S_3op_valign_i<0b001100000>;
+class V6_vlutb_dv_enc : Enc_S_3op_valign_i<0b001100010>;
+class V6_vlutb_acc_enc : Enc_S_3op_valign_i<0b001100100>;
+class V6_vlutb_dv_acc_enc : Enc_S_3op_valign_i<0b001100110>;
+class V6_valignbi_enc : Enc_S_3op_valign_i<0b001111011>;
+class V6_vlalignbi_enc : Enc_S_3op_valign_i<0b001111111>;
+class S2_valignib_enc : Enc_S_3op_valign_i<0b110000000>;
+class S2_addasl_rrri_enc : Enc_S_3op_valign_i<0b110010000>;
+
+class Enc_COPROC_VX_3op_q<bits<3> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<2> src1;
+  bits<2> src2;
+
+  let Inst{31-16} = { 0b00011110, src2{1-0}, 0b000011 };
+  let Inst{13-0} = { 0b0000, src1{1-0}, 0b000, opc{2-0}, dst{1-0} };
+}
+
+class V6_pred_and_enc : Enc_COPROC_VX_3op_q<0b000>;
+class V6_pred_or_enc : Enc_COPROC_VX_3op_q<0b001>;
+class V6_pred_xor_enc : Enc_COPROC_VX_3op_q<0b011>;
+class V6_pred_or_n_enc : Enc_COPROC_VX_3op_q<0b100>;
+class V6_pred_and_n_enc : Enc_COPROC_VX_3op_q<0b101>;
+
+class V6_pred_not_enc : OpcodeHexagon {
+  bits<2> dst;
+  bits<2> src1;
+
+  let Inst{31-16} = { 0b0001111000000011 };
+  let Inst{13-0} = { 0b0000, src1{1-0}, 0b000010, dst{1-0} };
+}
+
+class Enc_COPROC_VX_4op_q<bits<1> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<2> src1;
+  bits<5> src2;
+  bits<5> src3;
+
+  let Inst{31-16} = { 0b000111101, opc{0}, 1, src3{4-0} };
+  let Inst{13-0} = { 1, src2{4-0}, 0, src1{1-0}, dst{4-0} };
+}
+
+class V6_vswap_enc : Enc_COPROC_VX_4op_q<0>;
+class V6_vmux_enc : Enc_COPROC_VX_4op_q<1>;
+
+class Enc_X_2op<bits<16> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{15-5}, src1{4-0} };
+  let Inst{13-0} = { opc{4-3}, 0b0000, opc{2-0}, dst{4-0} };
+}
+
+class V6_lvsplatw_enc : Enc_X_2op<0b0001100110100001>;
+class V6_vinsertwr_enc : Enc_X_2op<0b0001100110110001>;
+class S6_vsplatrbp_enc : Enc_X_2op<0b1000010001000100>;
+
+
+class Enc_CR_2op_r<bits<12> opc> : OpcodeHexagon {
+  bits<2> dst;
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{11}, 0, opc{10-7}, 0, opc{6-3}, src1{4-0} };
+  let Inst{13-0} = { opc{2}, 0b000000, opc{1}, 0b000, opc{0}, dst{1-0} };
+}
+
+class V6_pred_scalar2_enc : Enc_CR_2op_r<0b001101101011>;
+class Y5_l2locka_enc : Enc_CR_2op_r<0b110000111100>;
+
+class Enc_S_3op_i6<bits<9> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<6> src2;
+
+  let Inst{31-16} = { 0b1000, opc{8-6}, 0, opc{5-3}, src1{4-0} };
+  let Inst{13-0} = { src2{5-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_p_enc : Enc_S_3op_i6<0b000000011>;
+class S6_rol_i_p_nac_enc : Enc_S_3op_i6<0b001000011>;
+class S6_rol_i_p_acc_enc : Enc_S_3op_i6<0b001000111>;
+class S6_rol_i_p_and_enc : Enc_S_3op_i6<0b001010011>;
+class S6_rol_i_p_or_enc : Enc_S_3op_i6<0b001010111>;
+class S6_rol_i_p_xacc_enc : Enc_S_3op_i6<0b001100011>;
+
+class Enc_X_3op_r<bits<15> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { opc{14-4}, src1{4-0} };
+  let Inst{13-0} = { opc{3}, src2{4-0}, opc{2-0}, dst{4-0} };
+}
+
+class S6_rol_i_r_enc : Enc_X_3op_r<0b100011000000011>;
+class S6_rol_i_r_nac_enc : Enc_X_3op_r<0b100011100000011>;
+class S6_rol_i_r_acc_enc : Enc_X_3op_r<0b100011100000111>;
+class S6_rol_i_r_and_enc : Enc_X_3op_r<0b100011100100011>;
+class S6_rol_i_r_or_enc : Enc_X_3op_r<0b100011100100111>;
+class S6_rol_i_r_xacc_enc : Enc_X_3op_r<0b100011101000011>;
+class S6_vtrunehb_ppp_enc : Enc_X_3op_r<0b110000011000011>;
+class S6_vtrunohb_ppp_enc : Enc_X_3op_r<0b110000011000101>;
+
+class Enc_no_operands<bits<25> opc> : OpcodeHexagon {
+
+  let Inst{31-16} = { opc{24-10}, 0 };
+  let Inst{13-0} = { opc{9-7}, 0b000, opc{6-0}, 0 };
+}
+
+class Y5_l2gunlock_enc : Enc_no_operands<0b1010100000100000010000000>;
+class Y5_l2gclean_enc : Enc_no_operands<0b1010100000100000100000000>;
+class Y5_l2gcleaninv_enc : Enc_no_operands<0b1010100000100000110000000>;
+class V6_vhist_enc : Enc_no_operands<0b0001111000000001001000000>;
+
+class Enc_J_jumpr<bits<13> opc> : OpcodeHexagon {
+  bits<5> src1;
+
+  let Inst{31-16} = { opc{12-6}, 0, opc{5-3}, src1{4-0} };
+  let Inst{13-0} = { 0b00, opc{2}, 0b0000, opc{1-0}, 0b00000 };
+}
+
+class Y5_l2unlocka_enc : Enc_J_jumpr<0b1010011011000>;
+class Y2_l2cleaninvidx_enc : Enc_J_jumpr<0b1010100011000>;
+
+class Enc_ST_l2gclean_pa<bits<2> opc> : OpcodeHexagon {
+  bits<5> src1;
+
+  let Inst{31-16} = { 0b101001101, opc{1-0}, 0b00000 };
+  let Inst{13-0} = { 0, src1{4-0}, 0b00000000 };
+}
+
+class Y6_l2gcleanpa_enc : Enc_ST_l2gclean_pa<0b01>;
+class Y6_l2gcleaninvpa_enc : Enc_ST_l2gclean_pa<0b10>;
+
+class A5_ACS_enc : OpcodeHexagon {
+  bits<5> dst1;
+  bits<2> dst2;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b11101010101, src1{4-0} };
+  let Inst{13-0} = { 0, src2{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
+
+class Enc_X_4op_r<bits<8> opc> : OpcodeHexagon {
+  bits<5> dst;
+  bits<5> src1;
+  bits<5> src2;
+  bits<2> src3;
+
+  let Inst{31-16} = { 0b11, opc{7}, 0, opc{6-5}, 1, opc{4-1}, src1{4-0} };
+  let Inst{13-0} = { 0, src2{4-0}, opc{0}, src3{1-0}, dst{4-0} };
+}
+
+class S2_vsplicerb_enc : Enc_X_4op_r<0b00001000>;
+class S2_cabacencbin_enc : Enc_X_4op_r<0b00001010>;
+class F2_sffma_sc_enc : Enc_X_4op_r<0b11110111>;
+
+class V6_vhistq_enc : OpcodeHexagon {
+  bits<2> src1;
+
+  let Inst{31-16} = { 0b00011110, src1{1-0}, 0b000010 };
+  let Inst{13-0} = { 0b10000010000000 };
+}
+
+// TODO: Change script to generate dst1 instead of dst.
+class A6_vminub_RdP_enc : OpcodeHexagon {
+  bits<5> dst1;
+  bits<2> dst2;
+  bits<5> src1;
+  bits<5> src2;
+
+  let Inst{31-16} = { 0b11101010111, src2{4-0} };
+  let Inst{13-0} = { 0, src1{4-0}, 0, dst2{1-0}, dst1{4-0} };
+}
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index 44bab292f32c..3c5ec1701dc2 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -34,6 +34,8 @@ class SubTarget<bits<6> value> {
 
 def HasAnySubT    : SubTarget<0x3f>;  // 111111
 def HasV5SubT     : SubTarget<0x3e>;  // 111110
+def HasV55SubT    : SubTarget<0x3c>;  // 111100
+def HasV60SubT    : SubTarget<0x38>;  // 111000
 
 // Addressing modes for load/store instructions
 class AddrModeType<bits<3> value> {
@@ -57,6 +59,8 @@ def ByteAccess       : MemAccessSize<1>;// Byte access instruction (memb).
 def HalfWordAccess   : MemAccessSize<2>;// Half word access instruction (memh).
 def WordAccess       : MemAccessSize<3>;// Word access instruction (memw).
 def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
+def Vector64Access   : MemAccessSize<7>;// Vector access instruction (memv)
+def Vector128Access  : MemAccessSize<8>;// Vector access instruction (memv)
 
 
 //===----------------------------------------------------------------------===//
@@ -167,14 +171,23 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   bits<1> isFP = 0;
   let TSFlags {48} = isFP; // Floating-point.
 
+  bits<1> hasNewValue2 = 0;
+  let TSFlags{50} = hasNewValue2; // Second New-value producer insn.
+  bits<3> opNewValue2 = 0;
+  let TSFlags{53-51} = opNewValue2; // Second New-value produced operand.
+
+  bits<1> isAccumulator = 0;
+  let TSFlags{54} = isAccumulator;
+
   // Fields used for relation models.
+  bit isNonTemporal = 0;
+  string isNT = ""; // set to "true" for non-temporal vector stores.
   string BaseOpcode = "";
   string CextOpcode = "";
   string PredSense = "";
   string PNewValue = "";
   string NValueST  = "";    // Set to "true" for new-value stores.
   string InputType = "";    // Input is "imm" or "reg" type.
-  string isMEMri = "false"; // Set to "true" for load/store with MEMri operand.
   string isFloat = "false"; // Set to "true" for the floating-point load/store.
   string isBrTaken = !if(isTaken, "true", "false"); // Set to "true"/"false" for jump instructions
 
@@ -182,6 +195,7 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
                                     "");
   let PNewValue = !if(isPredicatedNew, "new", "");
   let NValueST = !if(isNVStore, "true", "false");
+  let isNT = !if(isNonTemporal, "true", "false");
 
   // *** Must match MCTargetDesc/HexagonBaseInfo.h ***
 }
@@ -217,6 +231,11 @@ class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>, OpcodeHexagon;
 
+let mayLoad = 1 in
+class LD1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
+
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
 // Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -234,6 +253,12 @@ class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>, OpcodeHexagon;
 
+// Same as ST0Inst but doesn't derive from OpcodeHexagon.
+let mayStore = 1 in
+class ST1Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
+
 // ST Instruction Class in V2/V3 can take SLOT0 only.
 // ST Instruction Class in V4    can take SLOT0 & SLOT1.
 // Definition of the instruction class CHANGED from V2/V3 to V4.
@@ -277,6 +302,11 @@ class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
     OpcodeHexagon;
 
+// Same as above but doesn't derive from OpcodeHexagon
+class MInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
 // M Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
@@ -294,6 +324,10 @@ class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>,
     OpcodeHexagon;
 
+class SInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+            string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
+
 // S Instruction Class in V2/V3.
 // XTYPE Instruction Class in V4.
 // Definition of the instruction class NOT CHANGED.
@@ -402,3 +436,13 @@ include "HexagonInstrFormatsV4.td"
 //===----------------------------------------------------------------------===//
 // V4 Instruction Format Definitions +
 //===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrFormatsV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instruction Format Definitions +
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index db83ef6bc474..2d1dea526eed 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -21,8 +21,6 @@ def TypeMEMOP    : IType<9>;
 def TypeNV       : IType<10>;
 def TypeDUPLEX   : IType<11>;
 def TypeCOMPOUND : IType<12>;
-def TypeAG_VX    : IType<28>;
-def TypeAG_VM    : IType<29>;
 def TypePREFIX   : IType<30>;
 
 //                      Duplex Instruction Class Declaration
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
new file mode 100644
index 000000000000..f3d43dec733e
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -0,0 +1,238 @@
+//==- HexagonInstrFormatsV60.td - Hexagon Instruction Formats -*- tablegen -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instruction classes in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//----------------------------------------------------------------------------//
+//                         Hexagon Intruction Flags +
+//
+//                        *** Must match BaseInfo.h ***
+//----------------------------------------------------------------------------//
+
+def TypeCVI_VA         : IType<13>;
+def TypeCVI_VA_DV      : IType<14>;
+def TypeCVI_VX         : IType<15>;
+def TypeCVI_VX_DV      : IType<16>;
+def TypeCVI_VP         : IType<17>;
+def TypeCVI_VP_VS      : IType<18>;
+def TypeCVI_VS         : IType<19>;
+def TypeCVI_VINLANESAT : IType<20>;
+def TypeCVI_VM_LD      : IType<21>;
+def TypeCVI_VM_TMP_LD  : IType<22>;
+def TypeCVI_VM_CUR_LD  : IType<23>;
+def TypeCVI_VM_VP_LDU  : IType<24>;
+def TypeCVI_VM_ST      : IType<25>;
+def TypeCVI_VM_NEW_ST  : IType<26>;
+def TypeCVI_VM_STU     : IType<27>;
+def TypeCVI_HIST       : IType<28>;
+//----------------------------------------------------------------------------//
+//                         Intruction Classes Definitions +
+//----------------------------------------------------------------------------//
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VA>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VA_DV_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VA_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_long<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource_late<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_LATE>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Slot2_Resource<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VX_DV_SLOT2>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VX_DV_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_early<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_EARLY>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_LONG>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VP_VS_Resource_long_early<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VP_VS_LONG_EARLY>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VP_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VS>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VINLANESAT_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VINLANESAT>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VINLANESAT>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VS_Resource_long<dag outs, dag ins, string asmstr,
+                           list<dag> pattern = [], string cstr = "",
+                           InstrItinClass itin = CVI_VS>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VS>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_LD_Resource_long<dag outs, dag ins, string asmstr,
+                              list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_TMP_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_TMP_LD_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_TMP_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_TMP_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_CUR_LD_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_CUR_LD>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_CUR_LD>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_VP_LDU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_VP_LDU_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_VP_LDU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_VP_LDU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VM_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_ST_Resource_long<dag outs, dag ins, string asmstr,
+                              list<dag> pattern = [], string cstr = "",
+                              InstrItinClass itin = CVI_VM_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource<dag outs, dag ins, string asmstr,
+                             list<dag> pattern = [], string cstr = "",
+                             InstrItinClass itin = CVI_VM_NEW_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_NEW_ST_Resource_long<dag outs, dag ins, string asmstr,
+                                  list<dag> pattern = [], string cstr = "",
+                                  InstrItinClass itin = CVI_VM_NEW_ST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_NEW_ST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource<dag outs, dag ins, string asmstr,
+                          list<dag> pattern = [], string cstr = "",
+                          InstrItinClass itin = CVI_VM_STU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_VM_STU_Resource_long<dag outs, dag ins, string asmstr,
+                               list<dag> pattern = [], string cstr = "",
+                               InstrItinClass itin = CVI_VM_STU>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VM_STU>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource<dag outs, dag ins, string asmstr,
+                        list<dag> pattern = [], string cstr = "",
+                        InstrItinClass itin = CVI_HIST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+     OpcodeHexagon, Requires<[HasV60T, UseHVX]>;
+}
+
+let validSubTargets = HasV60SubT in
+{
+class CVI_VA_Resource1<dag outs, dag ins, string asmstr,
+                       list<dag> pattern = [], string cstr = "",
+                       InstrItinClass itin = CVI_VA>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VA>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_VX_DV_Resource1<dag outs, dag ins, string asmstr,
+                         list<dag> pattern = [], string cstr = "",
+                         InstrItinClass itin = CVI_VX_DV>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_VX_DV>,
+     Requires<[HasV60T, UseHVX]>;
+
+class CVI_HIST_Resource1<dag outs, dag ins, string asmstr,
+                        list<dag> pattern = [], string cstr = "",
+                        InstrItinClass itin = CVI_HIST>
+   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCVI_HIST>,
+     Requires<[HasV60T, UseHVX]>;
+}
+
+
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 3cb082349b41..eb3590cb1076 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -23,9 +23,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cctype>
 
 using namespace llvm;
 
@@ -36,9 +38,41 @@ using namespace llvm;
 #include "HexagonGenInstrInfo.inc"
 #include "HexagonGenDFAPacketizer.inc"
 
+using namespace llvm;
+
+cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
+  cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
+                            "packetization boundary."));
+
+static cl::opt<bool> EnableBranchPrediction("hexagon-enable-branch-prediction",
+  cl::Hidden, cl::init(true), cl::desc("Enable branch prediction"));
+
+static cl::opt<bool> DisableNVSchedule("disable-hexagon-nv-schedule",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable schedule adjustment for new value stores."));
+
+static cl::opt<bool> EnableTimingClassLatency(
+  "enable-timing-class-latency", cl::Hidden, cl::init(false),
+  cl::desc("Enable timing class latency"));
+
+static cl::opt<bool> EnableALUForwarding(
+  "enable-alu-forwarding", cl::Hidden, cl::init(true),
+  cl::desc("Enable vec alu forwarding"));
+
+static cl::opt<bool> EnableACCForwarding(
+  "enable-acc-forwarding", cl::Hidden, cl::init(true),
+  cl::desc("Enable vec acc forwarding"));
+
+static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
+
 ///
 /// Constants for Hexagon instructions.
 ///
+const int Hexagon_MEMV_OFFSET_MAX_128B = 2047;  // #s7
+const int Hexagon_MEMV_OFFSET_MIN_128B = -2048; // #s7
+const int Hexagon_MEMV_OFFSET_MAX = 1023;  // #s6
+const int Hexagon_MEMV_OFFSET_MIN = -1024; // #s6
 const int Hexagon_MEMW_OFFSET_MAX = 4095;
 const int Hexagon_MEMW_OFFSET_MIN = -4096;
 const int Hexagon_MEMD_OFFSET_MAX = 8191;
@@ -57,71 +91,49 @@ const int Hexagon_MEMH_AUTOINC_MAX = 14;
 const int Hexagon_MEMH_AUTOINC_MIN = -16;
 const int Hexagon_MEMB_AUTOINC_MAX = 7;
 const int Hexagon_MEMB_AUTOINC_MIN = -8;
+const int Hexagon_MEMV_AUTOINC_MAX = 192;
+const int Hexagon_MEMV_AUTOINC_MIN = -256;
+const int Hexagon_MEMV_AUTOINC_MAX_128B = 384;
+const int Hexagon_MEMV_AUTOINC_MIN_128B = -512;
 
 // Pin the vtable to this file.
 void HexagonInstrInfo::anchor() {}
 
 HexagonInstrInfo::HexagonInstrInfo(HexagonSubtarget &ST)
     : HexagonGenInstrInfo(Hexagon::ADJCALLSTACKDOWN, Hexagon::ADJCALLSTACKUP),
-      RI(), Subtarget(ST) {}
-
-/// isLoadFromStackSlot - If the specified machine instruction is a direct
-/// load from a stack slot, return the virtual or physical register number of
-/// the destination along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than loading from the stack slot.
-unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-                                             int &FrameIndex) const {
+      RI() {}
 
 
-  switch (MI->getOpcode()) {
-  default: break;
-  case Hexagon::L2_loadri_io:
-  case Hexagon::L2_loadrd_io:
-  case Hexagon::L2_loadrh_io:
-  case Hexagon::L2_loadrb_io:
-  case Hexagon::L2_loadrub_io:
-    if (MI->getOperand(2).isFI() &&
-        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
-      FrameIndex = MI->getOperand(2).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
-  }
-  return 0;
+static bool isIntRegForSubInst(unsigned Reg) {
+  return (Reg >= Hexagon::R0 && Reg <= Hexagon::R7) ||
+         (Reg >= Hexagon::R16 && Reg <= Hexagon::R23);
 }
 
 
-/// isStoreToStackSlot - If the specified machine instruction is a direct
-/// store to a stack slot, return the virtual or physical register number of
-/// the source reg along with the FrameIndex of the loaded stack slot.  If
-/// not, return 0.  This predicate must return 0 if the instruction has
-/// any side effects other than storing to the stack slot.
-unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
-                                            int &FrameIndex) const {
-  switch (MI->getOpcode()) {
-  default: break;
-  case Hexagon::S2_storeri_io:
-  case Hexagon::S2_storerd_io:
-  case Hexagon::S2_storerh_io:
-  case Hexagon::S2_storerb_io:
-    if (MI->getOperand(2).isFI() &&
-        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
-      FrameIndex = MI->getOperand(0).getIndex();
-      return MI->getOperand(2).getReg();
-    }
-    break;
-  }
-  return 0;
+static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
+  return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
+         isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
 }
 
-// Find the hardware loop instruction used to set-up the specified loop.
-// On Hexagon, we have two instructions used to set-up the hardware loop
-// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
-// to indicate the end of a loop.
-static MachineInstr *
-findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
-              SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
+
+/// Calculate number of instructions excluding the debug instructions.
+static unsigned nonDbgMICount(MachineBasicBlock::const_instr_iterator MIB,
+                              MachineBasicBlock::const_instr_iterator MIE) {
+  unsigned Count = 0;
+  for (; MIB != MIE; ++MIB) {
+    if (!MIB->isDebugValue())
+      ++Count;
+  }
+  return Count;
+}
+
+
+/// Find the hardware loop instruction used to set-up the specified loop.
+/// On Hexagon, we have two instructions used to set-up the hardware loop
+/// (LOOP0, LOOP1) with corresponding endloop (ENDLOOP0, ENDLOOP1) instructions
+/// to indicate the end of a loop.
+static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
+      SmallPtrSet<MachineBasicBlock *, 8> &Visited) {
   int LOOPi;
   int LOOPr;
   if (EndLoopOp == Hexagon::ENDLOOP0) {
@@ -157,100 +169,108 @@ findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
   return 0;
 }
 
-unsigned HexagonInstrInfo::InsertBranch(
-    MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB,
-    ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
 
-  Opcode_t BOpc   = Hexagon::J2_jump;
-  Opcode_t BccOpc = Hexagon::J2_jumpt;
+/// Gather register def/uses from MI.
+/// This treats possible (predicated) defs as actually happening ones
+/// (conservatively).
+static inline void parseOperands(const MachineInstr *MI,
+      SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
+  Defs.clear();
+  Uses.clear();
 
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
 
-  // Check if ReverseBranchCondition has asked to reverse this branch
-  // If we want to reverse the branch an odd number of times, we want
-  // J2_jumpf.
-  if (!Cond.empty() && Cond[0].isImm())
-    BccOpc = Cond[0].getImm();
+    if (!MO.isReg())
+      continue;
 
-  if (!FBB) {
-    if (Cond.empty()) {
-      // Due to a bug in TailMerging/CFG Optimization, we need to add a
-      // special case handling of a predicated jump followed by an
-      // unconditional jump. If not, Tail Merging and CFG Optimization go
-      // into an infinite loop.
-      MachineBasicBlock *NewTBB, *NewFBB;
-      SmallVector<MachineOperand, 4> Cond;
-      MachineInstr *Term = MBB.getFirstTerminator();
-      if (Term != MBB.end() && isPredicated(Term) &&
-          !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
-        MachineBasicBlock *NextBB =
-          std::next(MachineFunction::iterator(&MBB));
-        if (NewTBB == NextBB) {
-          ReverseBranchCondition(Cond);
-          RemoveBranch(MBB);
-          return InsertBranch(MBB, TBB, nullptr, Cond, DL);
-        }
-      }
-      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
-    } else if (isEndLoopN(Cond[0].getImm())) {
-      int EndLoopOp = Cond[0].getImm();
-      assert(Cond[1].isMBB());
-      // Since we're adding an ENDLOOP, there better be a LOOP instruction.
-      // Check for it, and change the BB target if needed.
-      SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
-      MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
-      assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
-      Loop->getOperand(0).setMBB(TBB);
-      // Add the ENDLOOP after the finding the LOOP0.
-      BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
-    } else if (isNewValueJump(Cond[0].getImm())) {
-      assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
-      // New value jump
-      // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
-      // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
-      unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
-      DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
-      if (Cond[2].isReg()) {
-        unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
-        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
-          addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
-      } else if(Cond[2].isImm()) {
-        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
-          addImm(Cond[2].getImm()).addMBB(TBB);
-      } else
-        llvm_unreachable("Invalid condition for branching");
-    } else {
-      assert((Cond.size() == 2) && "Malformed cond vector");
-      const MachineOperand &RO = Cond[1];
-      unsigned Flags = getUndefRegState(RO.isUndef());
-      BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (MO.isUse())
+      Uses.push_back(MO.getReg());
+
+    if (MO.isDef())
+      Defs.push_back(MO.getReg());
+  }
+}
+
+
+// Position dependent, so check twice for swap.
+static bool isDuplexPairMatch(unsigned Ga, unsigned Gb) {
+  switch (Ga) {
+  case HexagonII::HSIG_None:
+  default:
+    return false;
+  case HexagonII::HSIG_L1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_L2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S1:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_S2:
+    return (Gb == HexagonII::HSIG_L1 || Gb == HexagonII::HSIG_L2 ||
+            Gb == HexagonII::HSIG_S1 || Gb == HexagonII::HSIG_S2 ||
+            Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_A:
+    return (Gb == HexagonII::HSIG_A);
+  case HexagonII::HSIG_Compound:
+    return (Gb == HexagonII::HSIG_Compound);
+  }
+  return false;
+}
+
+
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case Hexagon::L2_loadri_io:
+  case Hexagon::L2_loadrd_io:
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
     }
-    return 1;
+    break;
   }
-  assert((!Cond.empty()) &&
-         "Cond. cannot be empty when multiple branchings are required");
-  assert((!isNewValueJump(Cond[0].getImm())) &&
-         "NV-jump cannot be inserted with another branch");
-  // Special case for hardware loops.  The condition is a basic block.
-  if (isEndLoopN(Cond[0].getImm())) {
-    int EndLoopOp = Cond[0].getImm();
-    assert(Cond[1].isMBB());
-    // Since we're adding an ENDLOOP, there better be a LOOP instruction.
-    // Check for it, and change the BB target if needed.
-    SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
-    MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
-    assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
-    Loop->getOperand(0).setMBB(TBB);
-    // Add the ENDLOOP after the finding the LOOP0.
-    BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
-  } else {
-    const MachineOperand &RO = Cond[1];
-    unsigned Flags = getUndefRegState(RO.isUndef());
-    BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
-  }
-  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+  return 0;
+}
 
-  return 2;
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case Hexagon::S2_storeri_io:
+  case Hexagon::S2_storerd_io:
+  case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerb_io:
+    if (MI->getOperand(2).isFI() &&
+        MI->getOperand(1).isImm() && (MI->getOperand(1).getImm() == 0)) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+    break;
+  }
+  return 0;
 }
 
 
@@ -269,9 +289,6 @@ unsigned HexagonInstrInfo::InsertBranch(
 /// Cond[0] = Hexagon::CMPEQri_f_Jumpnv_t_V4 -- specific opcode
 /// Cond[1] = R
 /// Cond[2] = Imm
-/// @note Related function is \fn findInstrPredicate which fills in
-/// Cond. vector when a predicated instruction is passed to it.
-/// We follow same protocol in that case too.
 ///
 bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *&TBB,
@@ -314,7 +331,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return false;
     --I;
   }
-  
+
   bool JumpToBlock = I->getOpcode() == Hexagon::J2_jump &&
                      I->getOperand(0).isMBB();
   // Delete the J2_jump if it's equivalent to a fall-through.
@@ -327,17 +344,17 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       return false;
     --I;
   }
-  if (!isUnpredicatedTerminator(I))
+  if (!isUnpredicatedTerminator(&*I))
     return false;
 
   // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
+  MachineInstr *LastInst = &*I;
   MachineInstr *SecondLastInst = nullptr;
   // Find one more terminator if present.
-  do {
-    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
+  for (;;) {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
       if (!SecondLastInst)
-        SecondLastInst = I;
+        SecondLastInst = &*I;
       else
         // This is a third branch.
         return true;
@@ -345,7 +362,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
     if (I == MBB.instr_begin())
       break;
     --I;
-  } while(I);
+  }
 
   int LastOpcode = LastInst->getOpcode();
   int SecLastOpcode = SecondLastInst ? SecondLastInst->getOpcode() : 0;
@@ -418,7 +435,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   // executed, so remove it.
   if (SecLastOpcode == Hexagon::J2_jump && LastOpcode == Hexagon::J2_jump) {
     TBB = SecondLastInst->getOperand(0).getMBB();
-    I = LastInst;
+    I = LastInst->getIterator();
     if (AllowModify)
       I->eraseFromParent();
     return false;
@@ -438,6 +455,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
+
 unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
   MachineBasicBlock::iterator I = MBB.end();
@@ -458,13 +476,818 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
+
+unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+      MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+      ArrayRef<MachineOperand> Cond, DebugLoc DL) const {
+  unsigned BOpc   = Hexagon::J2_jump;
+  unsigned BccOpc = Hexagon::J2_jumpt;
+  assert(validateBranchCond(Cond) && "Invalid branching condition");
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+  // Check if ReverseBranchCondition has asked to reverse this branch
+  // If we want to reverse the branch an odd number of times, we want
+  // J2_jumpf.
+  if (!Cond.empty() && Cond[0].isImm())
+    BccOpc = Cond[0].getImm();
+
+  if (!FBB) {
+    if (Cond.empty()) {
+      // Due to a bug in TailMerging/CFG Optimization, we need to add a
+      // special case handling of a predicated jump followed by an
+      // unconditional jump. If not, Tail Merging and CFG Optimization go
+      // into an infinite loop.
+      MachineBasicBlock *NewTBB, *NewFBB;
+      SmallVector<MachineOperand, 4> Cond;
+      MachineInstr *Term = MBB.getFirstTerminator();
+      if (Term != MBB.end() && isPredicated(Term) &&
+          !AnalyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
+        MachineBasicBlock *NextBB = &*++MBB.getIterator();
+        if (NewTBB == NextBB) {
+          ReverseBranchCondition(Cond);
+          RemoveBranch(MBB);
+          return InsertBranch(MBB, TBB, nullptr, Cond, DL);
+        }
+      }
+      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+    } else if (isEndLoopN(Cond[0].getImm())) {
+      int EndLoopOp = Cond[0].getImm();
+      assert(Cond[1].isMBB());
+      // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+      // Check for it, and change the BB target if needed.
+      SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+      MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+      assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+      Loop->getOperand(0).setMBB(TBB);
+      // Add the ENDLOOP after the finding the LOOP0.
+      BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+    } else if (isNewValueJump(Cond[0].getImm())) {
+      assert((Cond.size() == 3) && "Only supporting rr/ri version of nvjump");
+      // New value jump
+      // (ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset)
+      // (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset)
+      unsigned Flags1 = getUndefRegState(Cond[1].isUndef());
+      DEBUG(dbgs() << "\nInserting NVJump for BB#" << MBB.getNumber(););
+      if (Cond[2].isReg()) {
+        unsigned Flags2 = getUndefRegState(Cond[2].isUndef());
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addReg(Cond[2].getReg(), Flags2).addMBB(TBB);
+      } else if(Cond[2].isImm()) {
+        BuildMI(&MBB, DL, get(BccOpc)).addReg(Cond[1].getReg(), Flags1).
+          addImm(Cond[2].getImm()).addMBB(TBB);
+      } else
+        llvm_unreachable("Invalid condition for branching");
+    } else {
+      assert((Cond.size() == 2) && "Malformed cond vector");
+      const MachineOperand &RO = Cond[1];
+      unsigned Flags = getUndefRegState(RO.isUndef());
+      BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+    }
+    return 1;
+  }
+  assert((!Cond.empty()) &&
+         "Cond. cannot be empty when multiple branchings are required");
+  assert((!isNewValueJump(Cond[0].getImm())) &&
+         "NV-jump cannot be inserted with another branch");
+  // Special case for hardware loops.  The condition is a basic block.
+  if (isEndLoopN(Cond[0].getImm())) {
+    int EndLoopOp = Cond[0].getImm();
+    assert(Cond[1].isMBB());
+    // Since we're adding an ENDLOOP, there better be a LOOP instruction.
+    // Check for it, and change the BB target if needed.
+    SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+    MachineInstr *Loop = findLoopInstr(TBB, EndLoopOp, VisitedBBs);
+    assert(Loop != 0 && "Inserting an ENDLOOP without a LOOP");
+    Loop->getOperand(0).setMBB(TBB);
+    // Add the ENDLOOP after the finding the LOOP0.
+    BuildMI(&MBB, DL, get(EndLoopOp)).addMBB(TBB);
+  } else {
+    const MachineOperand &RO = Cond[1];
+    unsigned Flags = getUndefRegState(RO.isUndef());
+    BuildMI(&MBB, DL, get(BccOpc)).addReg(RO.getReg(), Flags).addMBB(TBB);
+  }
+  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+
+  return 2;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+      unsigned NumCycles, unsigned ExtraPredCycles,
+      BranchProbability Probability) const {
+  return nonDbgBBSize(&MBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+      unsigned NumTCycles, unsigned ExtraTCycles, MachineBasicBlock &FMBB,
+      unsigned NumFCycles, unsigned ExtraFCycles, BranchProbability Probability)
+      const {
+  return nonDbgBBSize(&TMBB) <= 3 && nonDbgBBSize(&FMBB) <= 3;
+}
+
+
+bool HexagonInstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+      unsigned NumInstrs, BranchProbability Probability) const {
+  return NumInstrs <= 4;
+}
+
+
+void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg,
+      unsigned SrcReg, bool KillSrc) const {
+  auto &HRI = getRegisterInfo();
+  if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
+    // Map Pd = Ps to Pd = or(Ps, Ps).
+    BuildMI(MBB, I, DL, get(Hexagon::C2_or),
+            DestReg).addReg(SrcReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
+      Hexagon::IntRegsRegClass.contains(SrcReg)) {
+    // We can have an overlap between single and double reg: r1:0 = r0.
+    if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
+        // r1:0 = r0
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    } else {
+        // r1:0 = r1 or no overlap.
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg,
+                Hexagon::subreg_loreg))).addReg(SrcReg);
+        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
+                Hexagon::subreg_hireg))).addImm(0);
+    }
+    return;
+  }
+  if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
+      Hexagon::IntRegsRegClass.contains(SrcReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg);
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+      Hexagon::IntRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+      Hexagon::PredRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
+      Hexagon::IntRegsRegClass.contains(DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VectorRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_vassign), DestReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+             getKillRegState(KillSrc)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+             getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DestReg).
+      addReg(SrcReg).
+      addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(SrcReg) &&
+    Hexagon::VectorRegsRegClass.contains(DestReg)) {
+    llvm_unreachable("Unimplemented pred to vec");
+    return;
+  }
+  if (Hexagon::VecPredRegsRegClass.contains(DestReg) &&
+      Hexagon::VectorRegsRegClass.contains(SrcReg)) {
+    llvm_unreachable("Unimplemented vec to pred");
+    return;
+  }
+  if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+      HRI.getSubReg(DestReg, Hexagon::subreg_hireg)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg),
+             getKillRegState(KillSrc));
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and),
+      HRI.getSubReg(DestReg, Hexagon::subreg_loreg)).
+      addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg),
+             getKillRegState(KillSrc));
+    return;
+  }
+
+#ifndef NDEBUG
+  // Show the invalid registers to ease debugging.
+  dbgs() << "Invalid registers for copy in BB#" << MBB.getNumber()
+         << ": " << PrintReg(DestReg, &HRI)
+         << " = " << PrintReg(SrcReg, &HRI) << '\n';
+#endif
+  llvm_unreachable("Unimplemented");
+}
+
+
+void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI,
+      const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), Align);
+
+  if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
+    BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
+          .addFrameIndex(FI).addImm(0)
+          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Unimplemented");
+  }
+}
+
+
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator I, unsigned DestReg, int FI,
+      const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = MBB.findDebugLoc(I);
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), Align);
+  if (RC == &Hexagon::IntRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == &Hexagon::DoubleRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else if (RC == &Hexagon::PredRegsRegClass) {
+    BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
+          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+  } else {
+    llvm_unreachable("Can't store this register to stack slot");
+  }
+}
+
+
+/// expandPostRAPseudo - This function is called for all pseudo instructions
+/// that remain after register allocation. Many pseudo instructions are
+/// created to help register allocation. This is the place to convert them
+/// into real instructions. The target can edit MI in place, or it can insert
+/// new instructions and erase MI. The function should return true if
+/// anything was changed.
+bool HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI)
+      const {
+  const HexagonRegisterInfo &HRI = getRegisterInfo();
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineBasicBlock &MBB = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned Opc = MI->getOpcode();
+  const unsigned VecOffset = 1;
+  bool Is128B = false;
+
+  switch (Opc) {
+    case Hexagon::ALIGNA:
+      BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
+          .addReg(HRI.getFrameRegister())
+          .addImm(-MI->getOperand(1).getImm());
+      MBB.erase(MI);
+      return true;
+    case Hexagon::HEXAGON_V6_vassignp_128B:
+    case Hexagon::HEXAGON_V6_vassignp: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (SrcReg != DstReg)
+        copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::HEXAGON_V6_lo_128B:
+    case Hexagon::HEXAGON_V6_lo: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+      copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      MRI.clearKillFlags(SrcSubLo);
+      return true;
+    }
+    case Hexagon::HEXAGON_V6_hi_128B:
+    case Hexagon::HEXAGON_V6_hi: {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+      copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI->getOperand(1).isKill());
+      MBB.erase(MI);
+      MRI.clearKillFlags(SrcSubHi);
+      return true;
+    }
+    case Hexagon::STrivv_indexed_128B:
+      Is128B = true;
+    case Hexagon::STrivv_indexed: {
+      unsigned SrcReg = MI->getOperand(2).getReg();
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+      unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
+                                : Hexagon::V6_vS32b_ai;
+      unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+      MachineInstr *MI1New = BuildMI(MBB, MI, DL, get(NewOpcd))
+          .addOperand(MI->getOperand(0))
+          .addImm(MI->getOperand(1).getImm())
+          .addReg(SrcSubLo)
+          .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MI1New->getOperand(0).setIsKill(false);
+      BuildMI(MBB, MI, DL, get(NewOpcd))
+        .addOperand(MI->getOperand(0))
+        // The Vectors are indexed in multiples of vector size.
+        .addImm(MI->getOperand(1).getImm()+Offset)
+        .addReg(SrcSubHi)
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::LDrivv_pseudo_V6_128B:
+    case Hexagon::LDrivv_indexed_128B:
+      Is128B = true;
+    case Hexagon::LDrivv_pseudo_V6:
+    case Hexagon::LDrivv_indexed: {
+      unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
+                                : Hexagon::V6_vL32b_ai;
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
+      MachineInstr *MI1New =
+          BuildMI(MBB, MI, DL, get(NewOpcd),
+                  HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+              .addOperand(MI->getOperand(1))
+              .addImm(MI->getOperand(2).getImm());
+      MI1New->getOperand(1).setIsKill(false);
+      BuildMI(MBB, MI, DL, get(NewOpcd),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+          .addOperand(MI->getOperand(1))
+          // The Vectors are indexed in multiples of vector size.
+          .addImm(MI->getOperand(2).getImm() + Offset)
+          .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::LDriv_pseudo_V6_128B:
+      Is128B = true;
+    case Hexagon::LDriv_pseudo_V6: {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+                               : Hexagon::V6_vL32b_ai;
+      int32_t Off = MI->getOperand(2).getImm();
+      int32_t Idx = Off;
+      BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
+        .addOperand(MI->getOperand(1))
+        .addImm(Idx)
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::STriv_pseudo_V6_128B:
+      Is128B = true;
+    case Hexagon::STriv_pseudo_V6: {
+      unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+                               : Hexagon::V6_vS32b_ai;
+      int32_t Off = MI->getOperand(1).getImm();
+      int32_t Idx = Is128B ? (Off >> 7) : (Off >> 6);
+      BuildMI(MBB, MI, DL, get(NewOpc))
+        .addOperand(MI->getOperand(0))
+        .addImm(Idx)
+        .addOperand(MI->getOperand(2))
+        .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TFR_PdTrue: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TFR_PdFalse: {
+      unsigned Reg = MI->getOperand(0).getReg();
+      BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
+        .addReg(Reg, RegState::Undef)
+        .addReg(Reg, RegState::Undef);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::VMULW: {
+      // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
+              HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      return true;
+    }
+    case Hexagon::VMULW_ACC: {
+      // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned Src1Reg = MI->getOperand(1).getReg();
+      unsigned Src2Reg = MI->getOperand(2).getReg();
+      unsigned Src3Reg = MI->getOperand(3).getReg();
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
+      unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              HRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
+          .addReg(Src2SubHi).addReg(Src3SubHi);
+      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
+              HRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
+          .addReg(Src2SubLo).addReg(Src3SubLo);
+      MBB.erase(MI);
+      MRI.clearKillFlags(Src1SubHi);
+      MRI.clearKillFlags(Src1SubLo);
+      MRI.clearKillFlags(Src2SubHi);
+      MRI.clearKillFlags(Src2SubLo);
+      MRI.clearKillFlags(Src3SubHi);
+      MRI.clearKillFlags(Src3SubLo);
+      return true;
+    }
+    case Hexagon::MUX64_rr: {
+      const MachineOperand &Op0 = MI->getOperand(0);
+      const MachineOperand &Op1 = MI->getOperand(1);
+      const MachineOperand &Op2 = MI->getOperand(2);
+      const MachineOperand &Op3 = MI->getOperand(3);
+      unsigned Rd = Op0.getReg();
+      unsigned Pu = Op1.getReg();
+      unsigned Rs = Op2.getReg();
+      unsigned Rt = Op3.getReg();
+      DebugLoc DL = MI->getDebugLoc();
+      unsigned K1 = getKillRegState(Op1.isKill());
+      unsigned K2 = getKillRegState(Op2.isKill());
+      unsigned K3 = getKillRegState(Op3.isKill());
+      if (Rd != Rs)
+        BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpt), Rd)
+          .addReg(Pu, (Rd == Rt) ? K1 : 0)
+          .addReg(Rs, K2);
+      if (Rd != Rt)
+        BuildMI(MBB, MI, DL, get(Hexagon::A2_tfrpf), Rd)
+          .addReg(Pu, K1)
+          .addReg(Rt, K3);
+      MBB.erase(MI);
+      return true;
+    }
+    case Hexagon::TCRETURNi:
+      MI->setDesc(get(Hexagon::J2_jump));
+      return true;
+    case Hexagon::TCRETURNr:
+      MI->setDesc(get(Hexagon::J2_jumpr));
+      return true;
+    case Hexagon::TFRI_f:
+    case Hexagon::TFRI_cPt_f:
+    case Hexagon::TFRI_cNotPt_f: {
+      unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
+      APFloat FVal = MI->getOperand(Opx).getFPImm()->getValueAPF();
+      APInt IVal = FVal.bitcastToAPInt();
+      MI->RemoveOperand(Opx);
+      unsigned NewOpc = (Opc == Hexagon::TFRI_f)     ? Hexagon::A2_tfrsi   :
+                        (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
+                                                       Hexagon::C2_cmoveif;
+      MI->setDesc(get(NewOpc));
+      MI->addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+// We indicate that we want to reverse the branch by
+// inserting the reversed branching opcode.
+bool HexagonInstrInfo::ReverseBranchCondition(
+      SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty())
+    return true;
+  assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
+  unsigned opcode = Cond[0].getImm();
+  //unsigned temp;
+  assert(get(opcode).isBranch() && "Should be a branching condition.");
+  if (isEndLoopN(opcode))
+    return true;
+  unsigned NewOpcode = getInvertedPredicatedOpcode(opcode);
+  Cond[0].setImm(NewOpcode);
+  return false;
+}
+
+
+void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
+      MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Hexagon::A2_nop));
+}
+
+
+// Returns true if an instruction is predicated irrespective of the predicate
+// sense. For example, all of the following will return true.
+// if (p0) R1 = add(R2, R3)
+// if (!p0) R1 = add(R2, R3)
+// if (p0.new) R1 = add(R2, R3)
+// if (!p0.new) R1 = add(R2, R3)
+// Note: New-value stores are not included here as in the current
+// implementation, we don't need to check their predicate sense.
+bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::PredicateInstruction(MachineInstr *MI,
+      ArrayRef<MachineOperand> Cond) const {
+  if (Cond.empty() || isNewValueJump(Cond[0].getImm()) ||
+      isEndLoopN(Cond[0].getImm())) {
+    DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
+    return false;
+  }
+  int Opc = MI->getOpcode();
+  assert (isPredicable(MI) && "Expected predicable instruction");
+  bool invertJump = predOpcodeHasNot(Cond);
+
+  // We have to predicate MI "in place", i.e. after this function returns,
+  // MI will need to be transformed into a predicated form. To avoid com-
+  // plicated manipulations with the operands (handling tied operands,
+  // etc.), build a new temporary instruction, then overwrite MI with it.
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned PredOpc = getCondOpcode(Opc, invertJump);
+  MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
+  unsigned NOp = 0, NumOps = MI->getNumOperands();
+  while (NOp < NumOps) {
+    MachineOperand &Op = MI->getOperand(NOp);
+    if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+      break;
+    T.addOperand(Op);
+    NOp++;
+  }
+
+  unsigned PredReg, PredRegPos, PredRegFlags;
+  bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
+  (void)GotPredReg;
+  assert(GotPredReg);
+  T.addReg(PredReg, PredRegFlags);
+  while (NOp < NumOps)
+    T.addOperand(MI->getOperand(NOp++));
+
+  MI->setDesc(get(PredOpc));
+  while (unsigned n = MI->getNumOperands())
+    MI->RemoveOperand(n-1);
+  for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
+    MI->addOperand(T->getOperand(i));
+
+  MachineBasicBlock::instr_iterator TI = T->getIterator();
+  B.erase(TI);
+
+  MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
+  MRI.clearKillFlags(PredReg);
+  return true;
+}
+
+
+bool HexagonInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
+      ArrayRef<MachineOperand> Pred2) const {
+  // TODO: Fix this
+  return false;
+}
+
+
+bool HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                   std::vector<MachineOperand> &Pred) const {
+  auto &HRI = getRegisterInfo();
+  for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
+    MachineOperand MO = MI->getOperand(oper);
+    if (MO.isReg() && MO.isDef()) {
+      const TargetRegisterClass* RC = HRI.getMinimalPhysRegClass(MO.getReg());
+      if (RC == &Hexagon::PredRegsRegClass) {
+        Pred.push_back(MO);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
+  bool isPred = MI->getDesc().isPredicable();
+
+  if (!isPred)
+    return false;
+
+  const int Opc = MI->getOpcode();
+  int NumOperands = MI->getNumOperands();
+
+  // Keep a flag for upto 4 operands in the instructions, to indicate if
+  // that operand has been constant extended.
+  bool OpCExtended[4];
+  if (NumOperands > 4)
+    NumOperands = 4;
+
+  for (int i = 0; i < NumOperands; i++)
+    OpCExtended[i] = (isOperandExtended(MI, i) && isConstExtended(MI));
+
+  switch(Opc) {
+  case Hexagon::A2_tfrsi:
+    return (isOperandExtended(MI, 1) && isConstExtended(MI)) ||
+           isInt<12>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerd_io:
+    return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storeri_io:
+  case Hexagon::S2_storerinew_io:
+    return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerh_io:
+  case Hexagon::S2_storerhnew_io:
+    return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
+
+  case Hexagon::S2_storerb_io:
+  case Hexagon::S2_storerbnew_io:
+    return isUInt<6>(MI->getOperand(1).getImm());
+
+  case Hexagon::L2_loadrd_io:
+    return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadri_io:
+    return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+    return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+    return isUInt<6>(MI->getOperand(2).getImm());
+
+  case Hexagon::L2_loadrd_pi:
+    return isShiftedInt<4,3>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadri_pi:
+    return isShiftedInt<4,2>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadrh_pi:
+  case Hexagon::L2_loadruh_pi:
+    return isShiftedInt<4,1>(MI->getOperand(3).getImm());
+
+  case Hexagon::L2_loadrb_pi:
+  case Hexagon::L2_loadrub_pi:
+    return isInt<4>(MI->getOperand(3).getImm());
+
+  case Hexagon::S4_storeirb_io:
+  case Hexagon::S4_storeirh_io:
+  case Hexagon::S4_storeiri_io:
+    return (OpCExtended[1] || isUInt<6>(MI->getOperand(1).getImm())) &&
+           (OpCExtended[2] || isInt<6>(MI->getOperand(2).getImm()));
+
+  case Hexagon::A2_addi:
+    return isInt<8>(MI->getOperand(2).getImm());
+
+  case Hexagon::A2_aslh:
+  case Hexagon::A2_asrh:
+  case Hexagon::A2_sxtb:
+  case Hexagon::A2_sxth:
+  case Hexagon::A2_zxtb:
+  case Hexagon::A2_zxth:
+    return true;
+  }
+
+  return true;
+}
+
+
+bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+      const MachineBasicBlock *MBB, const MachineFunction &MF) const {
+  // Debug info is never a scheduling boundary. It's necessary to be explicit
+  // due to the special treatment of IT instructions below, otherwise a
+  // dbg_value followed by an IT will result in the IT instruction being
+  // considered a scheduling hazard, which is wrong. It should be the actual
+  // instruction preceding the dbg_value instruction(s), just like it is
+  // when debug info is not present.
+  if (MI->isDebugValue())
+    return false;
+
+  // Throwing call is a boundary.
+  if (MI->isCall()) {
+    // If any of the block's successors is a landing pad, this could be a
+    // throwing call.
+    for (auto I : MBB->successors())
+      if (I->isEHPad())
+        return true;
+  }
+
+  // Don't mess around with no return calls.
+  if (MI->getOpcode() == Hexagon::CALLv3nr)
+    return true;
+
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isPosition())
+    return true;
+
+  if (MI->isInlineAsm() && !ScheduleInlineAsm)
+      return true;
+
+  return false;
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorString or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorString or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+/// Hexagon counts the number of ##'s and adjust for that many
+/// constant exenders.
+unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
+      const MCAsmInfo &MAI) const {
+  StringRef AStr(Str);
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
+                                strlen(MAI.getSeparatorString())) == 0)
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  // Add to size number of constant extenders seen * 4.
+  StringRef Occ("##");
+  Length += AStr.count(Occ)*4;
+  return Length;
+}
+
+
+ScheduleHazardRecognizer*
+HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
+      const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+  return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+
 /// \brief For a comparison instruction, return the source registers in
 /// \p SrcReg and \p SrcReg2 if having two register operands, and the value it
 /// compares against in CmpValue. Return true if the comparison instruction
 /// can be analyzed.
 bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
-                                      unsigned &SrcReg, unsigned &SrcReg2,
-                                      int &Mask, int &Value) const {
+      unsigned &SrcReg, unsigned &SrcReg2, int &Mask, int &Value) const {
   unsigned Opc = MI->getOpcode();
 
   // Set mask and the first source register.
@@ -548,247 +1371,66 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr *MI,
 }
 
 
-void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I, DebugLoc DL,
-                                 unsigned DestReg, unsigned SrcReg,
-                                 bool KillSrc) const {
-  if (Hexagon::IntRegsRegClass.contains(SrcReg, DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), DestReg).addReg(SrcReg);
-    return;
-  }
-  if (Hexagon::DoubleRegsRegClass.contains(SrcReg, DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrp), DestReg).addReg(SrcReg);
-    return;
-  }
-  if (Hexagon::PredRegsRegClass.contains(SrcReg, DestReg)) {
-    // Map Pd = Ps to Pd = or(Ps, Ps).
-    BuildMI(MBB, I, DL, get(Hexagon::C2_or),
-            DestReg).addReg(SrcReg).addReg(SrcReg);
-    return;
-  }
-  if (Hexagon::DoubleRegsRegClass.contains(DestReg) &&
-      Hexagon::IntRegsRegClass.contains(SrcReg)) {
-    // We can have an overlap between single and double reg: r1:0 = r0.
-    if(SrcReg == RI.getSubReg(DestReg, Hexagon::subreg_loreg)) {
-        // r1:0 = r0
-        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
-                Hexagon::subreg_hireg))).addImm(0);
-    } else {
-        // r1:0 = r1 or no overlap.
-        BuildMI(MBB, I, DL, get(Hexagon::A2_tfr), (RI.getSubReg(DestReg,
-                Hexagon::subreg_loreg))).addReg(SrcReg);
-        BuildMI(MBB, I, DL, get(Hexagon::A2_tfrsi), (RI.getSubReg(DestReg,
-                Hexagon::subreg_hireg))).addImm(0);
-    }
-    return;
-  }
-  if (Hexagon::CtrRegsRegClass.contains(DestReg) &&
-      Hexagon::IntRegsRegClass.contains(SrcReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::A2_tfrrcr), DestReg).addReg(SrcReg);
-    return;
-  }
-  if (Hexagon::PredRegsRegClass.contains(SrcReg) &&
-      Hexagon::IntRegsRegClass.contains(DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrpr), DestReg).
-      addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-  if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
-      Hexagon::PredRegsRegClass.contains(DestReg)) {
-    BuildMI(MBB, I, DL, get(Hexagon::C2_tfrrp), DestReg).
-      addReg(SrcReg, getKillRegState(KillSrc));
-    return;
-  }
-
-  llvm_unreachable("Unimplemented");
+unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+      const MachineInstr *MI, unsigned *PredCost) const {
+  return getInstrTimingClassLatency(ItinData, MI);
 }
 
 
-void HexagonInstrInfo::
-storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                    unsigned SrcReg, bool isKill, int FI,
-                    const TargetRegisterClass *RC,
-                    const TargetRegisterInfo *TRI) const {
-
-  DebugLoc DL = MBB.findDebugLoc(I);
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  unsigned Align = MFI.getObjectAlignment(FI);
-
-  MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
-                      MachineMemOperand::MOStore,
-                      MFI.getObjectSize(FI),
-                      Align);
-
-  if (Hexagon::IntRegsRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::S2_storeri_io))
-          .addFrameIndex(FI).addImm(0)
-          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
-  } else if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::S2_storerd_io))
-          .addFrameIndex(FI).addImm(0)
-          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
-  } else if (Hexagon::PredRegsRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::STriw_pred))
-          .addFrameIndex(FI).addImm(0)
-          .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
-  } else {
-    llvm_unreachable("Unimplemented");
-  }
+DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
+    const TargetSubtargetInfo &STI) const {
+  const InstrItineraryData *II = STI.getInstrItineraryData();
+  return static_cast<const HexagonSubtarget&>(STI).createDFAPacketizer(II);
 }
 
 
-void HexagonInstrInfo::storeRegToAddr(
-                                 MachineFunction &MF, unsigned SrcReg,
-                                 bool isKill,
-                                 SmallVectorImpl<MachineOperand> &Addr,
-                                 const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const
-{
-  llvm_unreachable("Unimplemented");
-}
+// Inspired by this pair:
+//  %R13<def> = L2_loadri_io %R29, 136; mem:LD4[FixedStack0]
+//  S2_storeri_io %R29, 132, %R1<kill>; flags:  mem:ST4[FixedStack1]
+// Currently AA considers the addresses in these instructions to be aliasing.
+bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr *MIa,
+      MachineInstr *MIb, AliasAnalysis *AA) const {
+  int OffsetA = 0, OffsetB = 0;
+  unsigned SizeA = 0, SizeB = 0;
 
+  if (MIa->hasUnmodeledSideEffects() || MIb->hasUnmodeledSideEffects() ||
+      MIa->hasOrderedMemoryRef() || MIa->hasOrderedMemoryRef())
+    return false;
 
-void HexagonInstrInfo::
-loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
-                     unsigned DestReg, int FI,
-                     const TargetRegisterClass *RC,
-                     const TargetRegisterInfo *TRI) const {
-  DebugLoc DL = MBB.findDebugLoc(I);
-  MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  unsigned Align = MFI.getObjectAlignment(FI);
+  // Instructions that are pure loads, not loads and stores like memops are not
+  // dependent.
+  if (MIa->mayLoad() && !isMemOp(MIa) && MIb->mayLoad() && !isMemOp(MIb))
+    return true;
 
-  MachineMemOperand *MMO =
-      MF.getMachineMemOperand(
-                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
-                      MachineMemOperand::MOLoad,
-                      MFI.getObjectSize(FI),
-                      Align);
-  if (RC == &Hexagon::IntRegsRegClass) {
-    BuildMI(MBB, I, DL, get(Hexagon::L2_loadri_io), DestReg)
-          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-  } else if (RC == &Hexagon::DoubleRegsRegClass) {
-    BuildMI(MBB, I, DL, get(Hexagon::L2_loadrd_io), DestReg)
-          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-  } else if (RC == &Hexagon::PredRegsRegClass) {
-    BuildMI(MBB, I, DL, get(Hexagon::LDriw_pred), DestReg)
-          .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
-  } else {
-    llvm_unreachable("Can't store this register to stack slot");
-  }
-}
+  // Get base, offset, and access size in MIa.
+  unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
+  if (!BaseRegA || !SizeA)
+    return false;
 
+  // Get base, offset, and access size in MIb.
+  unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
+  if (!BaseRegB || !SizeB)
+    return false;
 
-void HexagonInstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                                        SmallVectorImpl<MachineOperand> &Addr,
-                                        const TargetRegisterClass *RC,
-                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-  llvm_unreachable("Unimplemented");
-}
-bool
-HexagonInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
-  const HexagonRegisterInfo &TRI = getRegisterInfo();
-  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
-  MachineBasicBlock &MBB = *MI->getParent();
-  DebugLoc DL = MI->getDebugLoc();
-  unsigned Opc = MI->getOpcode();
+  if (BaseRegA != BaseRegB)
+    return false;
 
-  switch (Opc) {
-    case Hexagon::ALIGNA:
-      BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI->getOperand(0).getReg())
-          .addReg(TRI.getFrameRegister())
-          .addImm(-MI->getOperand(1).getImm());
-      MBB.erase(MI);
-      return true;
-    case Hexagon::TFR_PdTrue: {
-      unsigned Reg = MI->getOperand(0).getReg();
-      BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
-        .addReg(Reg, RegState::Undef)
-        .addReg(Reg, RegState::Undef);
-      MBB.erase(MI);
-      return true;
-    }
-    case Hexagon::TFR_PdFalse: {
-      unsigned Reg = MI->getOperand(0).getReg();
-      BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
-        .addReg(Reg, RegState::Undef)
-        .addReg(Reg, RegState::Undef);
-      MBB.erase(MI);
-      return true;
-    }
-    case Hexagon::VMULW: {
-      // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
-      unsigned DstReg = MI->getOperand(0).getReg();
-      unsigned Src1Reg = MI->getOperand(1).getReg();
-      unsigned Src2Reg = MI->getOperand(2).getReg();
-      unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
-      unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
-      unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
-      unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
-              TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
-          .addReg(Src2SubHi);
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_mpyi),
-              TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
-          .addReg(Src2SubLo);
-      MBB.erase(MI);
-      MRI.clearKillFlags(Src1SubHi);
-      MRI.clearKillFlags(Src1SubLo);
-      MRI.clearKillFlags(Src2SubHi);
-      MRI.clearKillFlags(Src2SubLo);
-      return true;
-    }
-    case Hexagon::VMULW_ACC: {
-      // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
-      unsigned DstReg = MI->getOperand(0).getReg();
-      unsigned Src1Reg = MI->getOperand(1).getReg();
-      unsigned Src2Reg = MI->getOperand(2).getReg();
-      unsigned Src3Reg = MI->getOperand(3).getReg();
-      unsigned Src1SubHi = TRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
-      unsigned Src1SubLo = TRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
-      unsigned Src2SubHi = TRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
-      unsigned Src2SubLo = TRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
-      unsigned Src3SubHi = TRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
-      unsigned Src3SubLo = TRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
-              TRI.getSubReg(DstReg, Hexagon::subreg_hireg)).addReg(Src1SubHi)
-          .addReg(Src2SubHi).addReg(Src3SubHi);
-      BuildMI(MBB, MI, MI->getDebugLoc(), get(Hexagon::M2_maci),
-              TRI.getSubReg(DstReg, Hexagon::subreg_loreg)).addReg(Src1SubLo)
-          .addReg(Src2SubLo).addReg(Src3SubLo);
-      MBB.erase(MI);
-      MRI.clearKillFlags(Src1SubHi);
-      MRI.clearKillFlags(Src1SubLo);
-      MRI.clearKillFlags(Src2SubHi);
-      MRI.clearKillFlags(Src2SubLo);
-      MRI.clearKillFlags(Src3SubHi);
-      MRI.clearKillFlags(Src3SubLo);
-      return true;
-    }
-    case Hexagon::TCRETURNi:
-      MI->setDesc(get(Hexagon::J2_jump));
-      return true;
-    case Hexagon::TCRETURNr:
-      MI->setDesc(get(Hexagon::J2_jumpr));
-      return true;
+  // This is a mem access with the same base register and known offsets from it.
+  // Reason about it.
+  if (OffsetA > OffsetB) {
+    uint64_t offDiff = (uint64_t)((int64_t)OffsetA - (int64_t)OffsetB);
+    return (SizeB <= offDiff);
+  } else if (OffsetA < OffsetB) {
+    uint64_t offDiff = (uint64_t)((int64_t)OffsetB - (int64_t)OffsetA);
+    return (SizeA <= offDiff);
   }
 
   return false;
 }
 
-MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(
-    MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
-    MachineBasicBlock::iterator InsertPt, int FI) const {
-  // Hexagon_TODO: Implement.
-  return nullptr;
-}
 
 unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
-
-  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *TRC;
   if (VT == MVT::i1) {
     TRC = &Hexagon::PredRegsRegClass;
@@ -800,629 +1442,74 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
     llvm_unreachable("Cannot handle this register class");
   }
 
-  unsigned NewReg = RegInfo.createVirtualRegister(TRC);
+  unsigned NewReg = MRI.createVirtualRegister(TRC);
   return NewReg;
 }
 
-bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
-  const MCInstrDesc &MID = MI->getDesc();
-  const uint64_t F = MID.TSFlags;
-  if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
-    return true;
 
-  // TODO: This is largely obsolete now. Will need to be removed
-  // in consecutive patches.
-  switch(MI->getOpcode()) {
-    // TFR_FI Remains a special case.
-    case Hexagon::TFR_FI:
-      return true;
-    default:
-      return false;
-  }
-  return  false;
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+  return (getAddrMode(MI) == HexagonII::AbsoluteSet);
 }
 
-// This returns true in two cases:
-// - The OP code itself indicates that this is an extended instruction.
-// - One of MOs has been marked with HMOTF_ConstExtended flag.
-bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
-  // First check if this is permanently extended op code.
+
+bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
-  if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
-    return true;
-  // Use MO operand flags to determine if one of MI's operands
-  // has HMOTF_ConstExtended flag set.
-  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
-       E = MI->operands_end(); I != E; ++I) {
-    if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
-      return true;
-  }
-  return  false;
+  return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
 }
 
-bool HexagonInstrInfo::isBranch (const MachineInstr *MI) const {
-  return MI->getDesc().isBranch();
-}
 
-bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
-  if (isNewValueJump(MI))
-    return true;
+bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
 
-  if (isNewValueStore(MI))
+  if (!(isTC1(MI))
+      && !(QII->isTC2Early(MI))
+      && !(MI->getDesc().mayLoad())
+      && !(MI->getDesc().mayStore())
+      && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
+      && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+      && !(QII->isMemOp(MI))
+      && !(MI->isBranch())
+      && !(MI->isReturn())
+      && !MI->isCall())
     return true;
 
   return false;
 }
 
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
 
-bool HexagonInstrInfo::isNewValue(Opcode_t Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-  return ((F >> HexagonII::NewValuePos) & HexagonII::NewValueMask);
-}
-
-bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
-  return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4;
-}
-
-bool HexagonInstrInfo::isPredicable(MachineInstr *MI) const {
-  bool isPred = MI->getDesc().isPredicable();
-
-  if (!isPred)
-    return false;
-
-  const int Opc = MI->getOpcode();
-
-  switch(Opc) {
-  case Hexagon::A2_tfrsi:
-    return (isOperandExtended(MI, 1) && isConstExtended(MI)) || isInt<12>(MI->getOperand(1).getImm());
-
-  case Hexagon::S2_storerd_io:
-    return isShiftedUInt<6,3>(MI->getOperand(1).getImm());
-
-  case Hexagon::S2_storeri_io:
-  case Hexagon::S2_storerinew_io:
-    return isShiftedUInt<6,2>(MI->getOperand(1).getImm());
-
-  case Hexagon::S2_storerh_io:
-  case Hexagon::S2_storerhnew_io:
-    return isShiftedUInt<6,1>(MI->getOperand(1).getImm());
-
-  case Hexagon::S2_storerb_io:
-  case Hexagon::S2_storerbnew_io:
-    return isUInt<6>(MI->getOperand(1).getImm());
-
-  case Hexagon::L2_loadrd_io:
-    return isShiftedUInt<6,3>(MI->getOperand(2).getImm());
-
-  case Hexagon::L2_loadri_io:
-    return isShiftedUInt<6,2>(MI->getOperand(2).getImm());
-
-  case Hexagon::L2_loadrh_io:
-  case Hexagon::L2_loadruh_io:
-    return isShiftedUInt<6,1>(MI->getOperand(2).getImm());
-
-  case Hexagon::L2_loadrb_io:
-  case Hexagon::L2_loadrub_io:
-    return isUInt<6>(MI->getOperand(2).getImm());
-
-  case Hexagon::L2_loadrd_pi:
-    return isShiftedInt<4,3>(MI->getOperand(3).getImm());
-
-  case Hexagon::L2_loadri_pi:
-    return isShiftedInt<4,2>(MI->getOperand(3).getImm());
-
-  case Hexagon::L2_loadrh_pi:
-  case Hexagon::L2_loadruh_pi:
-    return isShiftedInt<4,1>(MI->getOperand(3).getImm());
-
-  case Hexagon::L2_loadrb_pi:
-  case Hexagon::L2_loadrub_pi:
-    return isInt<4>(MI->getOperand(3).getImm());
-
-  case Hexagon::S4_storeirb_io:
-  case Hexagon::S4_storeirh_io:
-  case Hexagon::S4_storeiri_io:
-    return (isUInt<6>(MI->getOperand(1).getImm()) &&
-            isInt<6>(MI->getOperand(2).getImm()));
-
-  case Hexagon::A2_addi:
-    return isInt<8>(MI->getOperand(2).getImm());
-
-  case Hexagon::A2_aslh:
-  case Hexagon::A2_asrh:
-  case Hexagon::A2_sxtb:
-  case Hexagon::A2_sxth:
-  case Hexagon::A2_zxtb:
-  case Hexagon::A2_zxth:
-    return true;
-  }
-
-  return true;
-}
-
-// This function performs the following inversiones:
-//
-//  cPt    ---> cNotPt
-//  cNotPt ---> cPt
-//
-unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
-  int InvPredOpcode;
-  InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
-                                        : Hexagon::getTruePredOpcode(Opc);
-  if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
-    return InvPredOpcode;
-
-  switch(Opc) {
-    default: llvm_unreachable("Unexpected predicated instruction");
-    case Hexagon::C2_ccombinewt:
-      return Hexagon::C2_ccombinewf;
-    case Hexagon::C2_ccombinewf:
-      return Hexagon::C2_ccombinewt;
-
-      // Dealloc_return.
-    case Hexagon::L4_return_t:
-      return Hexagon::L4_return_f;
-    case Hexagon::L4_return_f:
-      return Hexagon::L4_return_t;
-  }
-}
-
-// New Value Store instructions.
-bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
-}
-
-bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-
-  return ((F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask);
-}
-
-int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
-  enum Hexagon::PredSense inPredSense;
-  inPredSense = invertPredicate ? Hexagon::PredSense_false :
-                                  Hexagon::PredSense_true;
-  int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
-  if (CondOpcode >= 0) // Valid Conditional opcode/instruction
-    return CondOpcode;
-
-  // This switch case will be removed once all the instructions have been
-  // modified to use relation maps.
-  switch(Opc) {
-  case Hexagon::TFRI_f:
-    return !invertPredicate ? Hexagon::TFRI_cPt_f :
-                              Hexagon::TFRI_cNotPt_f;
-  case Hexagon::A2_combinew:
-    return !invertPredicate ? Hexagon::C2_ccombinewt :
-                              Hexagon::C2_ccombinewf;
-
-  // DEALLOC_RETURN.
-  case Hexagon::L4_return:
-    return !invertPredicate ? Hexagon::L4_return_t:
-                              Hexagon::L4_return_f;
-  }
-  llvm_unreachable("Unexpected predicable instruction");
+// Return true if the instruction is a compund branch instruction.
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
+  return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
 }
 
 
-bool HexagonInstrInfo::
-PredicateInstruction(MachineInstr *MI,
-                     ArrayRef<MachineOperand> Cond) const {
-  if (Cond.empty() || isEndLoopN(Cond[0].getImm())) {
-    DEBUG(dbgs() << "\nCannot predicate:"; MI->dump(););
-    return false;
-  }
-  int Opc = MI->getOpcode();
-  assert (isPredicable(MI) && "Expected predicable instruction");
-  bool invertJump = predOpcodeHasNot(Cond);
-
-  // We have to predicate MI "in place", i.e. after this function returns,
-  // MI will need to be transformed into a predicated form. To avoid com-
-  // plicated manipulations with the operands (handling tied operands,
-  // etc.), build a new temporary instruction, then overwrite MI with it.
-
-  MachineBasicBlock &B = *MI->getParent();
-  DebugLoc DL = MI->getDebugLoc();
-  unsigned PredOpc = getCondOpcode(Opc, invertJump);
-  MachineInstrBuilder T = BuildMI(B, MI, DL, get(PredOpc));
-  unsigned NOp = 0, NumOps = MI->getNumOperands();
-  while (NOp < NumOps) {
-    MachineOperand &Op = MI->getOperand(NOp);
-    if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
-      break;
-    T.addOperand(Op);
-    NOp++;
-  }
-
-  unsigned PredReg, PredRegPos, PredRegFlags;
-  bool GotPredReg = getPredReg(Cond, PredReg, PredRegPos, PredRegFlags);
-  (void)GotPredReg;
-  assert(GotPredReg);
-  T.addReg(PredReg, PredRegFlags);
-  while (NOp < NumOps)
-    T.addOperand(MI->getOperand(NOp++));
-
-  MI->setDesc(get(PredOpc));
-  while (unsigned n = MI->getNumOperands())
-    MI->RemoveOperand(n-1);
-  for (unsigned i = 0, n = T->getNumOperands(); i < n; ++i)
-    MI->addOperand(T->getOperand(i));
-
-  MachineBasicBlock::instr_iterator TI = &*T;
-  B.erase(TI);
-
-  MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
-  MRI.clearKillFlags(PredReg);
-
-  return true;
+bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
+  return (MI->isBranch() && isPredicated(MI)) ||
+         isConditionalTransfer(MI) ||
+         isConditionalALU32(MI)    ||
+         isConditionalLoad(MI)     ||
+         // Predicated stores which don't have a .new on any operands.
+         (MI->mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+          !isPredicatedNew(MI));
 }
 
 
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &MBB,
-                    unsigned NumCycles,
-                    unsigned ExtraPredCycles,
-                    const BranchProbability &Probability) const {
-  return true;
-}
-
-
-bool
-HexagonInstrInfo::
-isProfitableToIfCvt(MachineBasicBlock &TMBB,
-                    unsigned NumTCycles,
-                    unsigned ExtraTCycles,
-                    MachineBasicBlock &FMBB,
-                    unsigned NumFCycles,
-                    unsigned ExtraFCycles,
-                    const BranchProbability &Probability) const {
-  return true;
-}
-
-// Returns true if an instruction is predicated irrespective of the predicate
-// sense. For example, all of the following will return true.
-// if (p0) R1 = add(R2, R3)
-// if (!p0) R1 = add(R2, R3)
-// if (p0.new) R1 = add(R2, R3)
-// if (!p0.new) R1 = add(R2, R3)
-bool HexagonInstrInfo::isPredicated(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
-}
-
-bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-
-  return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
-}
-
-bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  assert(isPredicated(MI));
-  return (!((F >> HexagonII::PredicatedFalsePos) &
-            HexagonII::PredicatedFalseMask));
-}
-
-bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-
-  // Make sure that the instruction is predicated.
-  assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
-  return (!((F >> HexagonII::PredicatedFalsePos) &
-            HexagonII::PredicatedFalseMask));
-}
-
-bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  assert(isPredicated(MI));
-  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
-}
-
-bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
-  const uint64_t F = get(Opcode).TSFlags;
-
-  assert(isPredicated(Opcode));
-  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
-}
-
-// Returns true, if a ST insn can be promoted to a new-value store.
-bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  return ((F >> HexagonII::mayNVStorePos) &
-           HexagonII::mayNVStoreMask);
-}
-
-bool
-HexagonInstrInfo::DefinesPredicate(MachineInstr *MI,
-                                   std::vector<MachineOperand> &Pred) const {
-  for (unsigned oper = 0; oper < MI->getNumOperands(); ++oper) {
-    MachineOperand MO = MI->getOperand(oper);
-    if (MO.isReg() && MO.isDef()) {
-      const TargetRegisterClass* RC = RI.getMinimalPhysRegClass(MO.getReg());
-      if (RC == &Hexagon::PredRegsRegClass) {
-        Pred.push_back(MO);
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-
-bool
-HexagonInstrInfo::
-SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
-                  ArrayRef<MachineOperand> Pred2) const {
-  // TODO: Fix this
-  return false;
-}
-
-
-//
-// We indicate that we want to reverse the branch by
-// inserting the reversed branching opcode.
-//
-bool HexagonInstrInfo::ReverseBranchCondition(
-    SmallVectorImpl<MachineOperand> &Cond) const {
-  if (Cond.empty())
-    return true;
-  assert(Cond[0].isImm() && "First entry in the cond vector not imm-val");
-  Opcode_t opcode = Cond[0].getImm();
-  //unsigned temp;
-  assert(get(opcode).isBranch() && "Should be a branching condition.");
-  if (isEndLoopN(opcode))
-    return true;
-  Opcode_t NewOpcode = getInvertedPredicatedOpcode(opcode);
-  Cond[0].setImm(NewOpcode);
-  return false;
-}
-
-
-bool HexagonInstrInfo::
-isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs,
-                          const BranchProbability &Probability) const {
-  return (NumInstrs <= 4);
-}
-
-bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
   switch (MI->getOpcode()) {
-  default: return false;
-  case Hexagon::L4_return:
-  case Hexagon::L4_return_t:
-  case Hexagon::L4_return_f:
-  case Hexagon::L4_return_tnew_pnt:
-  case Hexagon::L4_return_fnew_pnt:
-  case Hexagon::L4_return_tnew_pt:
-  case Hexagon::L4_return_fnew_pt:
-   return true;
-  }
-}
-
-
-bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
-      bool Extend) const {
-  // This function is to check whether the "Offset" is in the correct range of
-  // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
-  // inserted to calculate the final address. Due to this reason, the function
-  // assumes that the "Offset" has correct alignment.
-  // We used to assert if the offset was not properly aligned, however,
-  // there are cases where a misaligned pointer recast can cause this
-  // problem, and we need to allow for it. The front end warns of such
-  // misaligns with respect to load size.
-
-  switch (Opcode) {
-  case Hexagon::J2_loop0i:
-  case Hexagon::J2_loop1i:
-    return isUInt<10>(Offset);
-  }
-
-  if (Extend)
-    return true;
-
-  switch (Opcode) {
-  case Hexagon::L2_loadri_io:
-  case Hexagon::S2_storeri_io:
-    return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
-      (Offset <= Hexagon_MEMW_OFFSET_MAX);
-
-  case Hexagon::L2_loadrd_io:
-  case Hexagon::S2_storerd_io:
-    return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
-      (Offset <= Hexagon_MEMD_OFFSET_MAX);
-
-  case Hexagon::L2_loadrh_io:
-  case Hexagon::L2_loadruh_io:
-  case Hexagon::S2_storerh_io:
-    return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
-      (Offset <= Hexagon_MEMH_OFFSET_MAX);
-
-  case Hexagon::L2_loadrb_io:
-  case Hexagon::S2_storerb_io:
-  case Hexagon::L2_loadrub_io:
-    return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
-      (Offset <= Hexagon_MEMB_OFFSET_MAX);
-
-  case Hexagon::A2_addi:
-    return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
-      (Offset <= Hexagon_ADDI_OFFSET_MAX);
-
-  case Hexagon::L4_iadd_memopw_io:
-  case Hexagon::L4_isub_memopw_io:
-  case Hexagon::L4_add_memopw_io:
-  case Hexagon::L4_sub_memopw_io:
-  case Hexagon::L4_and_memopw_io:
-  case Hexagon::L4_or_memopw_io:
-    return (0 <= Offset && Offset <= 255);
-
-  case Hexagon::L4_iadd_memoph_io:
-  case Hexagon::L4_isub_memoph_io:
-  case Hexagon::L4_add_memoph_io:
-  case Hexagon::L4_sub_memoph_io:
-  case Hexagon::L4_and_memoph_io:
-  case Hexagon::L4_or_memoph_io:
-    return (0 <= Offset && Offset <= 127);
-
-  case Hexagon::L4_iadd_memopb_io:
-  case Hexagon::L4_isub_memopb_io:
-  case Hexagon::L4_add_memopb_io:
-  case Hexagon::L4_sub_memopb_io:
-  case Hexagon::L4_and_memopb_io:
-  case Hexagon::L4_or_memopb_io:
-    return (0 <= Offset && Offset <= 63);
-
-  // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
-  // any size. Later pass knows how to handle it.
-  case Hexagon::STriw_pred:
-  case Hexagon::LDriw_pred:
-    return true;
-
-  case Hexagon::TFR_FI:
-  case Hexagon::TFR_FIA:
-  case Hexagon::INLINEASM:
-    return true;
-  }
-
-  llvm_unreachable("No offset range is defined for this opcode. "
-                   "Please define it in the above switch statement!");
-}
-
-
-//
-// Check if the Offset is a valid auto-inc imm by Load/Store Type.
-//
-bool HexagonInstrInfo::
-isValidAutoIncImm(const EVT VT, const int Offset) const {
-
-  if (VT == MVT::i64) {
-      return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
-              Offset <= Hexagon_MEMD_AUTOINC_MAX &&
-              (Offset & 0x7) == 0);
-  }
-  if (VT == MVT::i32) {
-      return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
-              Offset <= Hexagon_MEMW_AUTOINC_MAX &&
-              (Offset & 0x3) == 0);
-  }
-  if (VT == MVT::i16) {
-      return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
-              Offset <= Hexagon_MEMH_AUTOINC_MAX &&
-              (Offset & 0x1) == 0);
-  }
-  if (VT == MVT::i8) {
-      return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
-              Offset <= Hexagon_MEMB_AUTOINC_MAX);
-  }
-  llvm_unreachable("Not an auto-inc opc!");
-}
-
-
-bool HexagonInstrInfo::
-isMemOp(const MachineInstr *MI) const {
-//  return MI->getDesc().mayLoad() && MI->getDesc().mayStore();
-
-  switch (MI->getOpcode())
-  {
-  default: return false;
-  case Hexagon::L4_iadd_memopw_io:
-  case Hexagon::L4_isub_memopw_io:
-  case Hexagon::L4_add_memopw_io:
-  case Hexagon::L4_sub_memopw_io:
-  case Hexagon::L4_and_memopw_io:
-  case Hexagon::L4_or_memopw_io:
-  case Hexagon::L4_iadd_memoph_io:
-  case Hexagon::L4_isub_memoph_io:
-  case Hexagon::L4_add_memoph_io:
-  case Hexagon::L4_sub_memoph_io:
-  case Hexagon::L4_and_memoph_io:
-  case Hexagon::L4_or_memoph_io:
-  case Hexagon::L4_iadd_memopb_io:
-  case Hexagon::L4_isub_memopb_io:
-  case Hexagon::L4_add_memopb_io:
-  case Hexagon::L4_sub_memopb_io:
-  case Hexagon::L4_and_memopb_io:
-  case Hexagon::L4_or_memopb_io:
-  case Hexagon::L4_ior_memopb_io:
-  case Hexagon::L4_ior_memoph_io:
-  case Hexagon::L4_ior_memopw_io:
-  case Hexagon::L4_iand_memopb_io:
-  case Hexagon::L4_iand_memoph_io:
-  case Hexagon::L4_iand_memopw_io:
-    return true;
-  }
-  return false;
-}
-
-
-bool HexagonInstrInfo::
-isSpillPredRegOp(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-    default: return false;
-    case Hexagon::STriw_pred :
-    case Hexagon::LDriw_pred :
-      return true;
-  }
-}
-
-bool HexagonInstrInfo::isNewValueJumpCandidate(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-    default: return false;
-    case Hexagon::C2_cmpeq:
-    case Hexagon::C2_cmpeqi:
-    case Hexagon::C2_cmpgt:
-    case Hexagon::C2_cmpgti:
-    case Hexagon::C2_cmpgtu:
-    case Hexagon::C2_cmpgtui:
-      return true;
-  }
-}
-
-bool HexagonInstrInfo::
-isConditionalTransfer (const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
-    default: return false;
-    case Hexagon::A2_tfrt:
-    case Hexagon::A2_tfrf:
-    case Hexagon::C2_cmoveit:
-    case Hexagon::C2_cmoveif:
-    case Hexagon::A2_tfrtnew:
-    case Hexagon::A2_tfrfnew:
-    case Hexagon::C2_cmovenewit:
-    case Hexagon::C2_cmovenewif:
-      return true;
-  }
-}
-
-bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
-  switch (MI->getOpcode())
-  {
-    default: return false;
     case Hexagon::A2_paddf:
     case Hexagon::A2_paddfnew:
+    case Hexagon::A2_paddif:
+    case Hexagon::A2_paddifnew:
+    case Hexagon::A2_paddit:
+    case Hexagon::A2_padditnew:
     case Hexagon::A2_paddt:
     case Hexagon::A2_paddtnew:
     case Hexagon::A2_pandf:
     case Hexagon::A2_pandfnew:
     case Hexagon::A2_pandt:
     case Hexagon::A2_pandtnew:
-    case Hexagon::A4_paslhf:
-    case Hexagon::A4_paslhfnew:
-    case Hexagon::A4_paslht:
-    case Hexagon::A4_paslhtnew:
-    case Hexagon::A4_pasrhf:
-    case Hexagon::A4_pasrhfnew:
-    case Hexagon::A4_pasrht:
-    case Hexagon::A4_pasrhtnew:
     case Hexagon::A2_porf:
     case Hexagon::A2_porfnew:
     case Hexagon::A2_port:
@@ -1435,14 +1522,22 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
     case Hexagon::A2_pxorfnew:
     case Hexagon::A2_pxort:
     case Hexagon::A2_pxortnew:
-    case Hexagon::A4_psxthf:
-    case Hexagon::A4_psxthfnew:
-    case Hexagon::A4_psxtht:
-    case Hexagon::A4_psxthtnew:
+    case Hexagon::A4_paslhf:
+    case Hexagon::A4_paslhfnew:
+    case Hexagon::A4_paslht:
+    case Hexagon::A4_paslhtnew:
+    case Hexagon::A4_pasrhf:
+    case Hexagon::A4_pasrhfnew:
+    case Hexagon::A4_pasrht:
+    case Hexagon::A4_pasrhtnew:
     case Hexagon::A4_psxtbf:
     case Hexagon::A4_psxtbfnew:
     case Hexagon::A4_psxtbt:
     case Hexagon::A4_psxtbtnew:
+    case Hexagon::A4_psxthf:
+    case Hexagon::A4_psxthfnew:
+    case Hexagon::A4_psxtht:
+    case Hexagon::A4_psxthtnew:
     case Hexagon::A4_pzxtbf:
     case Hexagon::A4_pzxtbfnew:
     case Hexagon::A4_pzxtbt:
@@ -1451,97 +1546,32 @@ bool HexagonInstrInfo::isConditionalALU32 (const MachineInstr* MI) const {
     case Hexagon::A4_pzxthfnew:
     case Hexagon::A4_pzxtht:
     case Hexagon::A4_pzxthtnew:
-    case Hexagon::A2_paddit:
-    case Hexagon::A2_paddif:
-    case Hexagon::C2_ccombinewt:
     case Hexagon::C2_ccombinewf:
+    case Hexagon::C2_ccombinewt:
       return true;
   }
+  return false;
 }
 
-bool HexagonInstrInfo::
-isConditionalLoad (const MachineInstr* MI) const {
-  switch (MI->getOpcode())
-  {
-    default: return false;
-    case Hexagon::L2_ploadrdt_io :
-    case Hexagon::L2_ploadrdf_io:
-    case Hexagon::L2_ploadrit_io:
-    case Hexagon::L2_ploadrif_io:
-    case Hexagon::L2_ploadrht_io:
-    case Hexagon::L2_ploadrhf_io:
-    case Hexagon::L2_ploadrbt_io:
-    case Hexagon::L2_ploadrbf_io:
-    case Hexagon::L2_ploadruht_io:
-    case Hexagon::L2_ploadruhf_io:
-    case Hexagon::L2_ploadrubt_io:
-    case Hexagon::L2_ploadrubf_io:
-    case Hexagon::L2_ploadrdt_pi:
-    case Hexagon::L2_ploadrdf_pi:
-    case Hexagon::L2_ploadrit_pi:
-    case Hexagon::L2_ploadrif_pi:
-    case Hexagon::L2_ploadrht_pi:
-    case Hexagon::L2_ploadrhf_pi:
-    case Hexagon::L2_ploadrbt_pi:
-    case Hexagon::L2_ploadrbf_pi:
-    case Hexagon::L2_ploadruht_pi:
-    case Hexagon::L2_ploadruhf_pi:
-    case Hexagon::L2_ploadrubt_pi:
-    case Hexagon::L2_ploadrubf_pi:
-    case Hexagon::L4_ploadrdt_rr:
-    case Hexagon::L4_ploadrdf_rr:
-    case Hexagon::L4_ploadrbt_rr:
-    case Hexagon::L4_ploadrbf_rr:
-    case Hexagon::L4_ploadrubt_rr:
-    case Hexagon::L4_ploadrubf_rr:
-    case Hexagon::L4_ploadrht_rr:
-    case Hexagon::L4_ploadrhf_rr:
-    case Hexagon::L4_ploadruht_rr:
-    case Hexagon::L4_ploadruhf_rr:
-    case Hexagon::L4_ploadrit_rr:
-    case Hexagon::L4_ploadrif_rr:
-      return true;
-  }
+
+// FIXME - Function name and it's functionality don't match.
+// It should be renamed to hasPredNewOpcode()
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
+  if (!MI->getDesc().mayLoad() || !isPredicated(MI))
+    return false;
+
+  int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  // Instruction with valid predicated-new opcode can be promoted to .new.
+  return PNewOpcode >= 0;
 }
 
+
 // Returns true if an instruction is a conditional store.
 //
 // Note: It doesn't include conditional new-value stores as they can't be
 // converted to .new predicate.
-//
-//               p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
-//                ^           ^
-//               /             \ (not OK. it will cause new-value store to be
-//              /               X conditional on p0.new while R2 producer is
-//             /                 \ on p0)
-//            /                   \.
-//     p.new store                 p.old NV store
-// [if(p0.new)memw(R0+#0)=R2]    [if(p0)memw(R0+#0)=R2.new]
-//            ^                  ^
-//             \                /
-//              \              /
-//               \            /
-//                 p.old store
-//             [if (p0)memw(R0+#0)=R2]
-//
-// The above diagram shows the steps involoved in the conversion of a predicated
-// store instruction to its .new predicated new-value form.
-//
-// The following set of instructions further explains the scenario where
-// conditional new-value store becomes invalid when promoted to .new predicate
-// form.
-//
-// { 1) if (p0) r0 = add(r1, r2)
-//   2) p0 = cmp.eq(r3, #0) }
-//
-//   3) if (p0) memb(r1+#0) = r0  --> this instruction can't be grouped with
-// the first two instructions because in instr 1, r0 is conditional on old value
-// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
-// is not valid for new-value stores.
-bool HexagonInstrInfo::
-isConditionalStore (const MachineInstr* MI) const {
-  switch (MI->getOpcode())
-  {
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
     default: return false;
     case Hexagon::S4_storeirbt_io:
     case Hexagon::S4_storeirbf_io:
@@ -1587,199 +1617,36 @@ isConditionalStore (const MachineInstr* MI) const {
 
     // Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
     // from the "Conditional Store" list. Because a predicated new value store
-    // would NOT be promoted to a double dot new store. See diagram below:
+    // would NOT be promoted to a double dot new store.
     // This function returns yes for those stores that are predicated but not
     // yet promoted to predicate dot new instructions.
-    //
-    //                          +---------------------+
-    //                    /-----| if (p0) memw(..)=r0 |---------\~
-    //                   ||     +---------------------+         ||
-    //          promote  ||       /\       /\                   ||  promote
-    //                   ||      /||\     /||\                  ||
-    //                  \||/    demote     ||                  \||/
-    //                   \/       ||       ||                   \/
-    //       +-------------------------+   ||   +-------------------------+
-    //       | if (p0.new) memw(..)=r0 |   ||   | if (p0) memw(..)=r0.new |
-    //       +-------------------------+   ||   +-------------------------+
-    //                        ||           ||         ||
-    //                        ||         demote      \||/
-    //                      promote        ||         \/ NOT possible
-    //                        ||           ||         /\~
-    //                       \||/          ||        /||\~
-    //                        \/           ||         ||
-    //                      +-----------------------------+
-    //                      | if (p0.new) memw(..)=r0.new |
-    //                      +-----------------------------+
-    //                           Double Dot New Store
-    //
   }
 }
 
 
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
-  if (isNewValue(MI) && isBranch(MI))
-    return true;
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    case Hexagon::A2_tfrt:
+    case Hexagon::A2_tfrf:
+    case Hexagon::C2_cmoveit:
+    case Hexagon::C2_cmoveif:
+    case Hexagon::A2_tfrtnew:
+    case Hexagon::A2_tfrfnew:
+    case Hexagon::C2_cmovenewit:
+    case Hexagon::C2_cmovenewif:
+    case Hexagon::A2_tfrpt:
+    case Hexagon::A2_tfrpf:
+      return true;
+
+    default:
+      return false;
+  }
   return false;
 }
 
-bool HexagonInstrInfo::isNewValueJump(Opcode_t Opcode) const {
-  return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
-}
-
-bool HexagonInstrInfo::isPostIncrement (const MachineInstr* MI) const {
-  return (getAddrMode(MI) == HexagonII::PostInc);
-}
-
-// Returns true, if any one of the operands is a dot new
-// insn, whether it is predicated dot new or register dot new.
-bool HexagonInstrInfo::isDotNewInst (const MachineInstr* MI) const {
-  return (isNewValueInst(MI) ||
-     (isPredicated(MI) && isPredicatedNew(MI)));
-}
-
-// Returns the most basic instruction for the .new predicated instructions and
-// new-value stores.
-// For example, all of the following instructions will be converted back to the
-// same instruction:
-// 1) if (p0.new) memw(R0+#0) = R1.new  --->
-// 2) if (p0) memw(R0+#0)= R1.new      -------> if (p0) memw(R0+#0) = R1
-// 3) if (p0.new) memw(R0+#0) = R1      --->
-//
-
-int HexagonInstrInfo::GetDotOldOp(const int opc) const {
-  int NewOp = opc;
-  if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
-    NewOp = Hexagon::getPredOldOpcode(NewOp);
-    assert(NewOp >= 0 &&
-           "Couldn't change predicate new instruction to its old form.");
-  }
-
-  if (isNewValueStore(NewOp)) { // Convert into non-new-value format
-    NewOp = Hexagon::getNonNVStore(NewOp);
-    assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
-  }
-  return NewOp;
-}
-
-// Return the new value instruction for a given store.
-int HexagonInstrInfo::GetDotNewOp(const MachineInstr* MI) const {
-  int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
-  if (NVOpcode >= 0) // Valid new-value store instruction.
-    return NVOpcode;
-
-  switch (MI->getOpcode()) {
-  default: llvm_unreachable("Unknown .new type");
-  case Hexagon::S4_storerb_ur:
-    return Hexagon::S4_storerbnew_ur;
-
-  case Hexagon::S4_storerh_ur:
-    return Hexagon::S4_storerhnew_ur;
-
-  case Hexagon::S4_storeri_ur:
-    return Hexagon::S4_storerinew_ur;
-
-  case Hexagon::S2_storerb_pci:
-    return Hexagon::S2_storerb_pci;
-
-  case Hexagon::S2_storeri_pci:
-    return Hexagon::S2_storeri_pci;
-
-  case Hexagon::S2_storerh_pci:
-    return Hexagon::S2_storerh_pci;
-
-  case Hexagon::S2_storerd_pci:
-    return Hexagon::S2_storerd_pci;
-
-  case Hexagon::S2_storerf_pci:
-    return Hexagon::S2_storerf_pci;
-  }
-  return 0;
-}
-
-// Return .new predicate version for an instruction.
-int HexagonInstrInfo::GetDotNewPredOp(MachineInstr *MI,
-                                      const MachineBranchProbabilityInfo
-                                      *MBPI) const {
-
-  int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
-  if (NewOpcode >= 0) // Valid predicate new instruction
-    return NewOpcode;
-
-  switch (MI->getOpcode()) {
-  default: llvm_unreachable("Unknown .new type");
-  // Condtional Jumps
-  case Hexagon::J2_jumpt:
-  case Hexagon::J2_jumpf:
-    return getDotNewPredJumpOp(MI, MBPI);
-
-  case Hexagon::J2_jumprt:
-    return Hexagon::J2_jumptnewpt;
-
-  case Hexagon::J2_jumprf:
-    return Hexagon::J2_jumprfnewpt;
-
-  case Hexagon::JMPrett:
-    return Hexagon::J2_jumprtnewpt;
-
-  case Hexagon::JMPretf:
-    return Hexagon::J2_jumprfnewpt;
-
-
-  // Conditional combine
-  case Hexagon::C2_ccombinewt:
-    return Hexagon::C2_ccombinewnewt;
-  case Hexagon::C2_ccombinewf:
-    return Hexagon::C2_ccombinewnewf;
-  }
-}
-
-
-unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-
-  return((F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask);
-}
-
-/// immediateExtend - Changes the instruction in place to one using an immediate
-/// extender.
-void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
-  assert((isExtendable(MI)||isConstExtended(MI)) &&
-                               "Instruction must be extendable");
-  // Find which operand is extendable.
-  short ExtOpNum = getCExtOpNum(MI);
-  MachineOperand &MO = MI->getOperand(ExtOpNum);
-  // This needs to be something we understand.
-  assert((MO.isMBB() || MO.isImm()) &&
-         "Branch with unknown extendable field type");
-  // Mark given operand as extended.
-  MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
-}
-
-DFAPacketizer *HexagonInstrInfo::CreateTargetScheduleState(
-    const TargetSubtargetInfo &STI) const {
-  const InstrItineraryData *II = STI.getInstrItineraryData();
-  return static_cast<const HexagonSubtarget &>(STI).createDFAPacketizer(II);
-}
-
-bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
-                                            const MachineBasicBlock *MBB,
-                                            const MachineFunction &MF) const {
-  // Debug info is never a scheduling boundary. It's necessary to be explicit
-  // due to the special treatment of IT instructions below, otherwise a
-  // dbg_value followed by an IT will result in the IT instruction being
-  // considered a scheduling hazard, which is wrong. It should be the actual
-  // instruction preceding the dbg_value instruction(s), just like it is
-  // when debug info is not present.
-  if (MI->isDebugValue())
-    return false;
-
-  // Terminators and labels can't be scheduled around.
-  if (MI->getDesc().isTerminator() || MI->isPosition() || MI->isInlineAsm())
-    return true;
-
-  return false;
-}
 
+// TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
+// isFPImm and later getFPImm as well.
 bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
   unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
@@ -1791,6 +1658,9 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   if (!isExtendable)
     return false;
 
+  if (MI->isCall())
+    return false;
+
   short ExtOpNum = getCExtOpNum(MI);
   const MachineOperand &MO = MI->getOperand(ExtOpNum);
   // Use MO operand flags to determine if MO
@@ -1823,40 +1693,1425 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   return (ImmValue < MinValue || ImmValue > MaxValue);
 }
 
-// Return the number of bytes required to encode the instruction.
-// Hexagon instructions are fixed length, 4 bytes, unless they
-// use a constant extender, which requires another 4 bytes.
-// For debug instructions and prolog labels, return 0.
-unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
 
-  if (MI->isDebugValue() || MI->isPosition())
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::L4_return :
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+  case Hexagon::L4_return_fnew_pt :
+   return true;
+  }
+  return false;
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  const MCInstrDesc &ProdMCID = ProdMI->getDesc();
+  if (!ProdMCID.getNumDefs())
+    return false;
+
+  auto &HRI = getRegisterInfo();
+
+  SmallVector<unsigned, 4> DefsA;
+  SmallVector<unsigned, 4> DefsB;
+  SmallVector<unsigned, 8> UsesA;
+  SmallVector<unsigned, 8> UsesB;
+
+  parseOperands(ProdMI, DefsA, UsesA);
+  parseOperands(ConsMI, DefsB, UsesB);
+
+  for (auto &RegA : DefsA)
+    for (auto &RegB : UsesB) {
+      // True data dependency.
+      if (RegA == RegB)
+        return true;
+
+      if (Hexagon::DoubleRegsRegClass.contains(RegA))
+        for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
+          if (RegB == *SubRegs)
+            return true;
+
+      if (Hexagon::DoubleRegsRegClass.contains(RegB))
+        for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
+          if (RegA == *SubRegs)
+            return true;
+    }
+
+  return false;
+}
+
+
+// Returns true if the instruction is alread a .cur.
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::V6_vL32b_cur_pi:
+  case Hexagon::V6_vL32b_cur_ai:
+  case Hexagon::V6_vL32b_cur_pi_128B:
+  case Hexagon::V6_vL32b_cur_ai_128B:
+    return true;
+  }
+  return false;
+}
+
+
+// Returns true, if any one of the operands is a dot new
+// insn, whether it is predicated dot new or register dot new.
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
+  if (isNewValueInst(MI) ||
+     (isPredicated(MI) && isPredicatedNew(MI)))
+    return true;
+
+  return false;
+}
+
+
+/// Symmetrical. See if these two instructions are fit for duplex pair.
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
+      const MachineInstr *MIb) const {
+  HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
+  HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
+  return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
+}
+
+
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+    return true;
+
+  // Multiply
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
+    return true;
+  return false;
+}
+
+
+bool HexagonInstrInfo::isEndLoopN(unsigned Opcode) const {
+  return (Opcode == Hexagon::ENDLOOP0 ||
+          Opcode == Hexagon::ENDLOOP1);
+}
+
+
+bool HexagonInstrInfo::isExpr(unsigned OpType) const {
+  switch(OpType) {
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_BlockAddress:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
+  const MCInstrDesc &MID = MI->getDesc();
+  const uint64_t F = MID.TSFlags;
+  if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
+    return true;
+
+  // TODO: This is largely obsolete now. Will need to be removed
+  // in consecutive patches.
+  switch(MI->getOpcode()) {
+    // TFR_FI Remains a special case.
+    case Hexagon::TFR_FI:
+      return true;
+    default:
+      return false;
+  }
+  return  false;
+}
+
+
+// This returns true in two cases:
+// - The OP code itself indicates that this is an extended instruction.
+// - One of MOs has been marked with HMOTF_ConstExtended flag.
+bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+  // First check if this is permanently extended op code.
+  const uint64_t F = MI->getDesc().TSFlags;
+  if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
+    return true;
+  // Use MO operand flags to determine if one of MI's operands
+  // has HMOTF_ConstExtended flag set.
+  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
+       E = MI->operands_end(); I != E; ++I) {
+    if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
+      return true;
+  }
+  return  false;
+}
+
+
+bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
+  unsigned Opcode = MI->getOpcode();
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::FPPos) & HexagonII::FPMask;
+}
+
+
+// No V60 HVX VMEM with A_INDIRECT.
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
+      const MachineInstr *J) const {
+  if (!isV60VectorInstruction(I))
+    return false;
+  if (!I->mayLoad() && !I->mayStore())
+    return false;
+  return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
+}
+
+
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::J2_callr :
+  case Hexagon::J2_callrf :
+  case Hexagon::J2_callrt :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::L4_return :
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_fnew_pt :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::J2_jumpr :
+  case Hexagon::J2_jumprt :
+  case Hexagon::J2_jumprf :
+  case Hexagon::J2_jumprtnewpt :
+  case Hexagon::J2_jumprfnewpt  :
+  case Hexagon::J2_jumprtnew :
+  case Hexagon::J2_jumprfnew :
+    return true;
+  }
+  return false;
+}
+
+
+// Return true if a given MI can accomodate given offset.
+// Use abs estimate as oppose to the exact number.
+// TODO: This will need to be changed to use MC level
+// definition of instruction extendable field size.
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+      unsigned offset) const {
+  // This selection of jump instructions matches to that what
+  // AnalyzeBranch can parse, plus NVJ.
+  if (isNewValueJump(MI)) // r9:2
+    return isInt<11>(offset);
+
+  switch (MI->getOpcode()) {
+  // Still missing Jump to address condition on register value.
+  default:
+    return false;
+  case Hexagon::J2_jump: // bits<24> dst; // r22:2
+  case Hexagon::J2_call:
+  case Hexagon::CALLv3nr:
+    return isInt<24>(offset);
+  case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
+  case Hexagon::J2_jumpf:
+  case Hexagon::J2_jumptnew:
+  case Hexagon::J2_jumptnewpt:
+  case Hexagon::J2_jumpfnew:
+  case Hexagon::J2_jumpfnewpt:
+  case Hexagon::J2_callt:
+  case Hexagon::J2_callf:
+    return isInt<17>(offset);
+  case Hexagon::J2_loop0i:
+  case Hexagon::J2_loop0iext:
+  case Hexagon::J2_loop0r:
+  case Hexagon::J2_loop0rext:
+  case Hexagon::J2_loop1i:
+  case Hexagon::J2_loop1iext:
+  case Hexagon::J2_loop1r:
+  case Hexagon::J2_loop1rext:
+    return isInt<9>(offset);
+  // TODO: Add all the compound branches here. Can we do this in Relation model?
+  case Hexagon::J4_cmpeqi_tp0_jump_nt:
+  case Hexagon::J4_cmpeqi_tp1_jump_nt:
+    return isInt<11>(offset);
+  }
+}
+
+
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+      const MachineInstr *ESMI) const {
+  if (!LRMI || !ESMI)
+    return false;
+
+  bool isLate = isLateResultInstr(LRMI);
+  bool isEarly = isEarlySourceInstr(ESMI);
+
+  DEBUG(dbgs() << "V60" <<  (isLate ? "-LR  " : " --  "));
+  DEBUG(LRMI->dump());
+  DEBUG(dbgs() << "V60" <<  (isEarly ? "-ES  " : " --  "));
+  DEBUG(ESMI->dump());
+
+  if (isLate && isEarly) {
+    DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
+    return true;
+  }
+
+  return false;
+}
+
+
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  switch (MI->getOpcode()) {
+  case TargetOpcode::EXTRACT_SUBREG:
+  case TargetOpcode::INSERT_SUBREG:
+  case TargetOpcode::SUBREG_TO_REG:
+  case TargetOpcode::REG_SEQUENCE:
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::COPY:
+  case TargetOpcode::INLINEASM:
+  case TargetOpcode::PHI:
+    return false;
+  default:
+    break;
+  }
+
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_1_SLOT23:
+  case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+  case Hexagon::Sched::S_2op_tc_1_SLOT23:
+  case Hexagon::Sched::S_3op_tc_1_SLOT23:
+  case Hexagon::Sched::V2LDST_tc_ld_SLOT01:
+  case Hexagon::Sched::V2LDST_tc_st_SLOT0:
+  case Hexagon::Sched::V2LDST_tc_st_SLOT01:
+  case Hexagon::Sched::V4LDST_tc_ld_SLOT01:
+  case Hexagon::Sched::V4LDST_tc_st_SLOT0:
+  case Hexagon::Sched::V4LDST_tc_st_SLOT01:
+    return false;
+  }
+  return true;
+}
+
+
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
+  // resource, but all operands can be received late like an ALU instruction.
+  return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
+}
+
+
+bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
+  unsigned Opcode = MI->getOpcode();
+  return Opcode == Hexagon::J2_loop0i    ||
+         Opcode == Hexagon::J2_loop0r    ||
+         Opcode == Hexagon::J2_loop0iext ||
+         Opcode == Hexagon::J2_loop0rext ||
+         Opcode == Hexagon::J2_loop1i    ||
+         Opcode == Hexagon::J2_loop1r    ||
+         Opcode == Hexagon::J2_loop1iext ||
+         Opcode == Hexagon::J2_loop1rext;
+}
+
+
+bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+    default: return false;
+    case Hexagon::L4_iadd_memopw_io :
+    case Hexagon::L4_isub_memopw_io :
+    case Hexagon::L4_add_memopw_io :
+    case Hexagon::L4_sub_memopw_io :
+    case Hexagon::L4_and_memopw_io :
+    case Hexagon::L4_or_memopw_io :
+    case Hexagon::L4_iadd_memoph_io :
+    case Hexagon::L4_isub_memoph_io :
+    case Hexagon::L4_add_memoph_io :
+    case Hexagon::L4_sub_memoph_io :
+    case Hexagon::L4_and_memoph_io :
+    case Hexagon::L4_or_memoph_io :
+    case Hexagon::L4_iadd_memopb_io :
+    case Hexagon::L4_isub_memopb_io :
+    case Hexagon::L4_add_memopb_io :
+    case Hexagon::L4_sub_memopb_io :
+    case Hexagon::L4_and_memopb_io :
+    case Hexagon::L4_or_memopb_io :
+    case Hexagon::L4_ior_memopb_io:
+    case Hexagon::L4_ior_memoph_io:
+    case Hexagon::L4_ior_memopw_io:
+    case Hexagon::L4_iand_memopb_io:
+    case Hexagon::L4_iand_memoph_io:
+    case Hexagon::L4_iand_memopw_io:
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+  return isNewValueJump(MI) || isNewValueStore(MI);
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
+  return isNewValue(MI) && MI->isBranch();
+}
+
+
+bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
+  return isNewValue(Opcode) && get(Opcode).isBranch() && isPredicated(Opcode);
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
+}
+
+
+// Returns true if a particular operand is extendable for an instruction.
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+    unsigned OperandNum) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
+          == OperandNum;
+}
+
+
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
+  return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  assert(isPredicated(MI));
+  return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedNew(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  assert(isPredicated(Opcode));
+  return (F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask;
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return !((F >> HexagonII::PredicatedFalsePos) &
+           HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicatedTrue(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  // Make sure that the instruction is predicated.
+  assert((F>> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
+  return !((F >> HexagonII::PredicatedFalsePos) &
+           HexagonII::PredicatedFalseMask);
+}
+
+
+bool HexagonInstrInfo::isPredicated(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask;
+}
+
+
+bool HexagonInstrInfo::isPredicateLate(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return ~(F >> HexagonII::PredicateLatePos) & HexagonII::PredicateLateMask;
+}
+
+
+bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  assert(get(Opcode).isBranch() &&
+         (isPredicatedNew(Opcode) || isNewValue(Opcode)));
+  return (F >> HexagonII::TakenPos) & HexagonII::TakenMask;
+}
+
+
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
+  return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+         MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+}
+
+
+bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
+}
+
+
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case Hexagon::STriw_pred :
+  case Hexagon::LDriw_pred :
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+// Returns true when SU has a timing class TC1.
+bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU32_ADDI_tc_1_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_1_SLOT23:
+  case Hexagon::Sched::EXTENDER_tc_1_SLOT0123:
+  //case Hexagon::Sched::M_tc_1_SLOT23:
+  case Hexagon::Sched::S_2op_tc_1_SLOT23:
+  case Hexagon::Sched::S_3op_tc_1_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_2_SLOT23:
+  case Hexagon::Sched::CR_tc_2_SLOT3:
+  case Hexagon::Sched::M_tc_2_SLOT23:
+  case Hexagon::Sched::S_2op_tc_2_SLOT23:
+  case Hexagon::Sched::S_3op_tc_2_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  switch (SchedClass) {
+  case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
+  case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
+  case Hexagon::Sched::ALU64_tc_2early_SLOT23:
+  case Hexagon::Sched::CR_tc_2early_SLOT23:
+  case Hexagon::Sched::CR_tc_2early_SLOT3:
+  case Hexagon::Sched::J_tc_2early_SLOT0123:
+  case Hexagon::Sched::J_tc_2early_SLOT2:
+  case Hexagon::Sched::J_tc_2early_SLOT23:
+  case Hexagon::Sched::S_2op_tc_2early_SLOT23:
+  case Hexagon::Sched::S_3op_tc_2early_SLOT23:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
+
+bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  unsigned SchedClass = MI->getDesc().getSchedClass();
+  return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
+}
+
+
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+
+  const uint64_t V = getType(MI);
+  return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
+}
+
+
+// Check if the Offset is a valid auto-inc imm by Load/Store Type.
+//
+bool HexagonInstrInfo::isValidAutoIncImm(const EVT VT, const int Offset) const {
+  if (VT == MVT::v16i32 || VT == MVT::v8i64 ||
+      VT == MVT::v32i16 || VT == MVT::v64i8) {
+      return (Offset >= Hexagon_MEMV_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMV_AUTOINC_MAX &&
+              (Offset & 0x3f) == 0);
+  }
+  // 128B
+  if (VT == MVT::v32i32 || VT == MVT::v16i64 ||
+      VT == MVT::v64i16 || VT == MVT::v128i8) {
+      return (Offset >= Hexagon_MEMV_AUTOINC_MIN_128B &&
+              Offset <= Hexagon_MEMV_AUTOINC_MAX_128B &&
+              (Offset & 0x7f) == 0);
+  }
+  if (VT == MVT::i64) {
+      return (Offset >= Hexagon_MEMD_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMD_AUTOINC_MAX &&
+              (Offset & 0x7) == 0);
+  }
+  if (VT == MVT::i32) {
+      return (Offset >= Hexagon_MEMW_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMW_AUTOINC_MAX &&
+              (Offset & 0x3) == 0);
+  }
+  if (VT == MVT::i16) {
+      return (Offset >= Hexagon_MEMH_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMH_AUTOINC_MAX &&
+              (Offset & 0x1) == 0);
+  }
+  if (VT == MVT::i8) {
+      return (Offset >= Hexagon_MEMB_AUTOINC_MIN &&
+              Offset <= Hexagon_MEMB_AUTOINC_MAX);
+  }
+  llvm_unreachable("Not an auto-inc opc!");
+}
+
+
+bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
+      bool Extend) const {
+  // This function is to check whether the "Offset" is in the correct range of
+  // the given "Opcode". If "Offset" is not in the correct range, "A2_addi" is
+  // inserted to calculate the final address. Due to this reason, the function
+  // assumes that the "Offset" has correct alignment.
+  // We used to assert if the offset was not properly aligned, however,
+  // there are cases where a misaligned pointer recast can cause this
+  // problem, and we need to allow for it. The front end warns of such
+  // misaligns with respect to load size.
+
+  switch (Opcode) {
+  case Hexagon::STriq_pred_V6:
+  case Hexagon::STriq_pred_vec_V6:
+  case Hexagon::STriv_pseudo_V6:
+  case Hexagon::STrivv_pseudo_V6:
+  case Hexagon::LDriq_pred_V6:
+  case Hexagon::LDriq_pred_vec_V6:
+  case Hexagon::LDriv_pseudo_V6:
+  case Hexagon::LDrivv_pseudo_V6:
+  case Hexagon::LDrivv_indexed:
+  case Hexagon::STrivv_indexed:
+  case Hexagon::V6_vL32b_ai:
+  case Hexagon::V6_vS32b_ai:
+  case Hexagon::V6_vL32Ub_ai:
+  case Hexagon::V6_vS32Ub_ai:
+    return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMV_OFFSET_MAX);
+
+  case Hexagon::STriq_pred_V6_128B:
+  case Hexagon::STriq_pred_vec_V6_128B:
+  case Hexagon::STriv_pseudo_V6_128B:
+  case Hexagon::STrivv_pseudo_V6_128B:
+  case Hexagon::LDriq_pred_V6_128B:
+  case Hexagon::LDriq_pred_vec_V6_128B:
+  case Hexagon::LDriv_pseudo_V6_128B:
+  case Hexagon::LDrivv_pseudo_V6_128B:
+  case Hexagon::LDrivv_indexed_128B:
+  case Hexagon::STrivv_indexed_128B:
+  case Hexagon::V6_vL32b_ai_128B:
+  case Hexagon::V6_vS32b_ai_128B:
+  case Hexagon::V6_vL32Ub_ai_128B:
+  case Hexagon::V6_vS32Ub_ai_128B:
+    return (Offset >= Hexagon_MEMV_OFFSET_MIN_128B) &&
+      (Offset <= Hexagon_MEMV_OFFSET_MAX_128B);
+
+  case Hexagon::J2_loop0i:
+  case Hexagon::J2_loop1i:
+    return isUInt<10>(Offset);
+  }
+
+  if (Extend)
+    return true;
+
+  switch (Opcode) {
+  case Hexagon::L2_loadri_io:
+  case Hexagon::S2_storeri_io:
+    return (Offset >= Hexagon_MEMW_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMW_OFFSET_MAX);
+
+  case Hexagon::L2_loadrd_io:
+  case Hexagon::S2_storerd_io:
+    return (Offset >= Hexagon_MEMD_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMD_OFFSET_MAX);
+
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+  case Hexagon::S2_storerh_io:
+    return (Offset >= Hexagon_MEMH_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMH_OFFSET_MAX);
+
+  case Hexagon::L2_loadrb_io:
+  case Hexagon::L2_loadrub_io:
+  case Hexagon::S2_storerb_io:
+    return (Offset >= Hexagon_MEMB_OFFSET_MIN) &&
+      (Offset <= Hexagon_MEMB_OFFSET_MAX);
+
+  case Hexagon::A2_addi:
+    return (Offset >= Hexagon_ADDI_OFFSET_MIN) &&
+      (Offset <= Hexagon_ADDI_OFFSET_MAX);
+
+  case Hexagon::L4_iadd_memopw_io :
+  case Hexagon::L4_isub_memopw_io :
+  case Hexagon::L4_add_memopw_io :
+  case Hexagon::L4_sub_memopw_io :
+  case Hexagon::L4_and_memopw_io :
+  case Hexagon::L4_or_memopw_io :
+    return (0 <= Offset && Offset <= 255);
+
+  case Hexagon::L4_iadd_memoph_io :
+  case Hexagon::L4_isub_memoph_io :
+  case Hexagon::L4_add_memoph_io :
+  case Hexagon::L4_sub_memoph_io :
+  case Hexagon::L4_and_memoph_io :
+  case Hexagon::L4_or_memoph_io :
+    return (0 <= Offset && Offset <= 127);
+
+  case Hexagon::L4_iadd_memopb_io :
+  case Hexagon::L4_isub_memopb_io :
+  case Hexagon::L4_add_memopb_io :
+  case Hexagon::L4_sub_memopb_io :
+  case Hexagon::L4_and_memopb_io :
+  case Hexagon::L4_or_memopb_io :
+    return (0 <= Offset && Offset <= 63);
+
+  // LDri_pred and STriw_pred are pseudo operations, so it has to take offset of
+  // any size. Later pass knows how to handle it.
+  case Hexagon::STriw_pred:
+  case Hexagon::LDriw_pred:
+    return true;
+
+  case Hexagon::TFR_FI:
+  case Hexagon::TFR_FIA:
+  case Hexagon::INLINEASM:
+    return true;
+
+  case Hexagon::L2_ploadrbt_io:
+  case Hexagon::L2_ploadrbf_io:
+  case Hexagon::L2_ploadrubt_io:
+  case Hexagon::L2_ploadrubf_io:
+  case Hexagon::S2_pstorerbt_io:
+  case Hexagon::S2_pstorerbf_io:
+  case Hexagon::S4_storeirb_io:
+  case Hexagon::S4_storeirbt_io:
+  case Hexagon::S4_storeirbf_io:
+    return isUInt<6>(Offset);
+
+  case Hexagon::L2_ploadrht_io:
+  case Hexagon::L2_ploadrhf_io:
+  case Hexagon::L2_ploadruht_io:
+  case Hexagon::L2_ploadruhf_io:
+  case Hexagon::S2_pstorerht_io:
+  case Hexagon::S2_pstorerhf_io:
+  case Hexagon::S4_storeirh_io:
+  case Hexagon::S4_storeirht_io:
+  case Hexagon::S4_storeirhf_io:
+    return isShiftedUInt<6,1>(Offset);
+
+  case Hexagon::L2_ploadrit_io:
+  case Hexagon::L2_ploadrif_io:
+  case Hexagon::S2_pstorerit_io:
+  case Hexagon::S2_pstorerif_io:
+  case Hexagon::S4_storeiri_io:
+  case Hexagon::S4_storeirit_io:
+  case Hexagon::S4_storeirif_io:
+    return isShiftedUInt<6,2>(Offset);
+
+  case Hexagon::L2_ploadrdt_io:
+  case Hexagon::L2_ploadrdf_io:
+  case Hexagon::S2_pstorerdt_io:
+  case Hexagon::S2_pstorerdf_io:
+    return isShiftedUInt<6,3>(Offset);
+  } // switch
+
+  llvm_unreachable("No offset range is defined for this opcode. "
+                   "Please define it in the above switch statement!");
+}
+
+
+bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
+  return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+}
+
+
+bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+  const uint64_t F = get(MI->getOpcode()).TSFlags;
+  const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
+  return
+    V == HexagonII::TypeCVI_VA         ||
+    V == HexagonII::TypeCVI_VA_DV;
+}
+
+
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
+    return true;
+
+  if (EnableALUForwarding && (isVecALU(ConsMI) || isLateSourceInstr(ConsMI)))
+    return true;
+
+  if (mayBeNewStore(ConsMI))
+    return true;
+
+  return false;
+}
+
+
+/// \brief Can these instructions execute at the same time in a bundle.
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
+      const MachineInstr *Second) const {
+  if (DisableNVSchedule)
+    return false;
+  if (mayBeNewStore(Second)) {
+    // Make sure the definition of the first instruction is the value being
+    // stored.
+    const MachineOperand &Stored =
+      Second->getOperand(Second->getNumOperands() - 1);
+    if (!Stored.isReg())
+      return false;
+    for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
+      const MachineOperand &Op = First->getOperand(i);
+      if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
+        return true;
+    }
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
+  for (auto &I : *B)
+    if (I.isEHLabel())
+      return true;
+  return false;
+}
+
+
+// Returns true if an instruction can be converted into a non-extended
+// equivalent instruction.
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+  short NonExtOpcode;
+  // Check if the instruction has a register form that uses register in place
+  // of the extended operand, if so return that as the non-extended form.
+  if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+    return true;
+
+  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+    // Check addressing mode and retrieve non-ext equivalent instruction.
+
+    switch (getAddrMode(MI)) {
+    case HexagonII::Absolute :
+      // Load/store with absolute addressing mode can be converted into
+      // base+offset mode.
+      NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+      break;
+    case HexagonII::BaseImmOffset :
+      // Load/store with base+offset addressing mode can be converted into
+      // base+register offset addressing mode. However left shift operand should
+      // be set to 0.
+      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+      break;
+    case HexagonII::BaseLongOffset:
+      NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+      break;
+    default:
+      return false;
+    }
+    if (NonExtOpcode < 0)
+      return false;
+    return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(),
+                                 Hexagon::InstrType_Pseudo) >= 0;
+}
+
+
+bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
+      const {
+  MachineBasicBlock::const_iterator I = B->getFirstTerminator(), E = B->end();
+  while (I != E) {
+    if (I->isBarrier())
+      return true;
+    ++I;
+  }
+  return false;
+}
+
+
+// Returns true, if a LD insn can be promoted to a cur load.
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
+  auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+  const uint64_t F = MI->getDesc().TSFlags;
+  return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
+         HST.hasV60TOps();
+}
+
+
+// Returns true, if a ST insn can be promoted to a new-value store.
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) const {
+  // There is no stall when ProdMI is not a V60 vector.
+  if (!isV60VectorInstruction(ProdMI))
+    return false;
+
+  // There is no stall when ProdMI and ConsMI are not dependent.
+  if (!isDependent(ProdMI, ConsMI))
+    return false;
+
+  // When Forward Scheduling is enabled, there is no stall if ProdMI and ConsMI
+  // are scheduled in consecutive packets.
+  if (isVecUsableNextPacket(ProdMI, ConsMI))
+    return false;
+
+  return true;
+}
+
+
+bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+      MachineBasicBlock::const_instr_iterator BII) const {
+  // There is no stall when I is not a V60 vector.
+  if (!isV60VectorInstruction(MI))
+    return false;
+
+  MachineBasicBlock::const_instr_iterator MII = BII;
+  MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
+
+  if (!(*MII).isBundle()) {
+    const MachineInstr *J = &*MII;
+    if (!isV60VectorInstruction(J))
+      return false;
+    else if (isVecUsableNextPacket(J, MI))
+      return false;
+    return true;
+  }
+
+  for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
+    const MachineInstr *J = &*MII;
+    if (producesStall(J, MI))
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+      unsigned PredReg) const {
+  for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
+    const MachineOperand &MO = MI->getOperand(opNum);
+    if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
+      return false; // Predicate register must be explicitly defined.
+  }
+
+  // Hexagon Programmer's Reference says that decbin, memw_locked, and
+  // memd_locked cannot be used as .new as well,
+  // but we don't seem to have these instructions defined.
+  return MI->getOpcode() != Hexagon::A4_tlbmatch;
+}
+
+
+bool HexagonInstrInfo::PredOpcodeHasJMP_c(unsigned Opcode) const {
+  return (Opcode == Hexagon::J2_jumpt)      ||
+         (Opcode == Hexagon::J2_jumpf)      ||
+         (Opcode == Hexagon::J2_jumptnew)   ||
+         (Opcode == Hexagon::J2_jumpfnew)   ||
+         (Opcode == Hexagon::J2_jumptnewpt) ||
+         (Opcode == Hexagon::J2_jumpfnewpt);
+}
+
+
+bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
+  if (Cond.empty() || !isPredicated(Cond[0].getImm()))
+    return false;
+  return !isPredicatedTrue(Cond[0].getImm());
+}
+
+
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
+}
+
+
+// Returns the base register in a memory access (load/store). The offset is
+// returned in Offset and the access size is returned in AccessSize.
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+      int &Offset, unsigned &AccessSize) const {
+  // Return if it is not a base+offset type instruction or a MemOp.
+  if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
+      getAddrMode(MI) != HexagonII::BaseLongOffset &&
+      !isMemOp(MI) && !isPostIncrement(MI))
     return 0;
 
-  unsigned Size = MI->getDesc().getSize();
-  if (!Size)
-    // Assume the default insn size in case it cannot be determined
-    // for whatever reason.
-    Size = HEXAGON_INSTR_SIZE;
+  // Since it is a memory access instruction, getMemAccessSize() should never
+  // return 0.
+  assert (getMemAccessSize(MI) &&
+          "BaseImmOffset or BaseLongOffset or MemOp without accessSize");
 
-  if (isConstExtended(MI) || isExtended(MI))
-    Size += HEXAGON_INSTR_SIZE;
+  // Return Values of getMemAccessSize() are
+  // 0 - Checked in the assert above.
+  // 1, 2, 3, 4 & 7, 8 - The statement below is correct for all these.
+  // MemAccessSize is represented as 1+log2(N) where N is size in bits.
+  AccessSize = (1U << (getMemAccessSize(MI) - 1));
 
-  return Size;
+  unsigned basePos = 0, offsetPos = 0;
+  if (!getBaseAndOffsetPosition(MI, basePos, offsetPos))
+    return 0;
+
+  // Post increment updates its EA after the mem access,
+  // so we need to treat its offset as zero.
+  if (isPostIncrement(MI))
+    Offset = 0;
+  else {
+    Offset = MI->getOperand(offsetPos).getImm();
+  }
+
+  return MI->getOperand(basePos).getReg();
+}
+
+
+/// Return the position of the base and offset operands for this instruction.
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+      unsigned &BasePos, unsigned &OffsetPos) const {
+  // Deal with memops first.
+  if (isMemOp(MI)) {
+    assert (MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+            "Bad Memop.");
+    BasePos = 0;
+    OffsetPos = 1;
+  } else if (MI->mayStore()) {
+    BasePos = 0;
+    OffsetPos = 1;
+  } else if (MI->mayLoad()) {
+    BasePos = 1;
+    OffsetPos = 2;
+  } else
+    return false;
+
+  if (isPredicated(MI)) {
+    BasePos++;
+    OffsetPos++;
+  }
+  if (isPostIncrement(MI)) {
+    BasePos++;
+    OffsetPos++;
+  }
+
+  if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+    return false;
+
+  return true;
+}
+
+
+// Inserts branching instructions in reverse order of their occurence.
+// e.g. jump_t t1 (i1)
+// jump t2        (i2)
+// Jumpers = {i2, i1}
+SmallVector<MachineInstr*, 2> HexagonInstrInfo::getBranchingInstrs(
+      MachineBasicBlock& MBB) const {
+  SmallVector<MachineInstr*, 2> Jumpers;
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  if (I == MBB.instr_begin())
+    return Jumpers;
+
+  // A basic block may looks like this:
+  //
+  //  [   insn
+  //     EH_LABEL
+  //      insn
+  //      insn
+  //      insn
+  //     EH_LABEL
+  //      insn     ]
+  //
+  // It has two succs but does not have a terminator
+  // Don't know how to handle it.
+  do {
+    --I;
+    if (I->isEHLabel())
+      return Jumpers;
+  } while (I != MBB.instr_begin());
+
+  I = MBB.instr_end();
+  --I;
+
+  while (I->isDebugValue()) {
+    if (I == MBB.instr_begin())
+      return Jumpers;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(&*I))
+    return Jumpers;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = &*I;
+  Jumpers.push_back(LastInst);
+  MachineInstr *SecondLastInst = nullptr;
+  // Find one more terminator if present.
+  do {
+    if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(&*I)) {
+      if (!SecondLastInst) {
+        SecondLastInst = &*I;
+        Jumpers.push_back(SecondLastInst);
+      } else // This is a third branch.
+        return Jumpers;
+    }
+    if (I == MBB.instr_begin())
+      break;
+    --I;
+  } while (true);
+  return Jumpers;
+}
+
+
+// Returns Operand Index for the constant extended instruction.
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
+}
+
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
+      const MachineInstr *MI) const {
+  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+
+  switch (MI->getOpcode()) {
+  default:
+    return HexagonII::HCG_None;
+  //
+  // Compound pairs.
+  // "p0=cmp.eq(Rs16,Rt16); if (p0.new) jump:nt #r9:2"
+  // "Rd16=#U6 ; jump #r9:2"
+  // "Rd16=Rs16 ; jump #r9:2"
+  //
+  case Hexagon::C2_cmpeq:
+  case Hexagon::C2_cmpgt:
+  case Hexagon::C2_cmpgtu:
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::C2_cmpeqi:
+  case Hexagon::C2_cmpgti:
+  case Hexagon::C2_cmpgtui:
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+        ((isUInt<5>(MI->getOperand(2).getImm())) ||
+         (MI->getOperand(2).getImm() == -1)))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfr:
+    // Rd = Rs
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::A2_tfrsi:
+    // Rd = #u6
+    // Do not test for #u6 size since the const is getting extended
+    // regardless and compound could be formed.
+    DstReg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(DstReg))
+      return HexagonII::HCG_A;
+    break;
+  case Hexagon::S2_tstbit_i:
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
+        MI->getOperand(2).isImm() &&
+        isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+      return HexagonII::HCG_A;
+    break;
+  // The fact that .new form is used pretty much guarantees
+  // that predicate register will match. Nevertheless,
+  // there could be some false positives without additional
+  // checking.
+  case Hexagon::J2_jumptnew:
+  case Hexagon::J2_jumpfnew:
+  case Hexagon::J2_jumptnewpt:
+  case Hexagon::J2_jumpfnewpt:
+    Src1Reg = MI->getOperand(0).getReg();
+    if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
+        (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
+      return HexagonII::HCG_B;
+    break;
+  // Transfer and jump:
+  // Rd=#U6 ; jump #r9:2
+  // Rd=Rs ; jump #r9:2
+  // Do not test for jump range here.
+  case Hexagon::J2_jump:
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+    return HexagonII::HCG_C;
+    break;
+  }
+
+  return HexagonII::HCG_None;
+}
+
+
+// Returns -1 when there is no opcode found.
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
+      const MachineInstr *GB) const {
+  assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
+  assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
+  if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
+      (GB->getOpcode() != Hexagon::J2_jumptnew))
+    return -1;
+  unsigned DestReg = GA->getOperand(0).getReg();
+  if (!GB->readsRegister(DestReg))
+    return -1;
+  if (DestReg == Hexagon::P0)
+    return Hexagon::J4_cmpeqi_tp0_jump_nt;
+  if (DestReg == Hexagon::P1)
+    return Hexagon::J4_cmpeqi_tp1_jump_nt;
+  return -1;
+}
+
+
+int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
+  enum Hexagon::PredSense inPredSense;
+  inPredSense = invertPredicate ? Hexagon::PredSense_false :
+                                  Hexagon::PredSense_true;
+  int CondOpcode = Hexagon::getPredOpcode(Opc, inPredSense);
+  if (CondOpcode >= 0) // Valid Conditional opcode/instruction
+    return CondOpcode;
+
+  // This switch case will be removed once all the instructions have been
+  // modified to use relation maps.
+  switch(Opc) {
+  case Hexagon::TFRI_f:
+    return !invertPredicate ? Hexagon::TFRI_cPt_f :
+                              Hexagon::TFRI_cNotPt_f;
+  }
+
+  llvm_unreachable("Unexpected predicable instruction");
+}
+
+
+// Return the cur value instruction for a given store.
+int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown .cur type");
+  case Hexagon::V6_vL32b_pi:
+    return Hexagon::V6_vL32b_cur_pi;
+  case Hexagon::V6_vL32b_ai:
+    return Hexagon::V6_vL32b_cur_ai;
+  //128B
+  case Hexagon::V6_vL32b_pi_128B:
+    return Hexagon::V6_vL32b_cur_pi_128B;
+  case Hexagon::V6_vL32b_ai_128B:
+    return Hexagon::V6_vL32b_cur_ai_128B;
+  }
+  return 0;
+}
+
+
+
+// The diagram below shows the steps involved in the conversion of a predicated
+// store instruction to its .new predicated new-value form.
+//
+//               p.new NV store [ if(p0.new)memw(R0+#0)=R2.new ]
+//                ^           ^
+//               /             \ (not OK. it will cause new-value store to be
+//              /               X conditional on p0.new while R2 producer is
+//             /                 \ on p0)
+//            /                   \.
+//     p.new store                 p.old NV store
+// [if(p0.new)memw(R0+#0)=R2]    [if(p0)memw(R0+#0)=R2.new]
+//            ^                  ^
+//             \                /
+//              \              /
+//               \            /
+//                 p.old store
+//             [if (p0)memw(R0+#0)=R2]
+//
+//
+// The following set of instructions further explains the scenario where
+// conditional new-value store becomes invalid when promoted to .new predicate
+// form.
+//
+// { 1) if (p0) r0 = add(r1, r2)
+//   2) p0 = cmp.eq(r3, #0) }
+//
+//   3) if (p0) memb(r1+#0) = r0  --> this instruction can't be grouped with
+// the first two instructions because in instr 1, r0 is conditional on old value
+// of p0 but its use in instr 3 is conditional on p0 modified by instr 2 which
+// is not valid for new-value stores.
+// Predicated new value stores (i.e. if (p0) memw(..)=r0.new) are excluded
+// from the "Conditional Store" list. Because a predicated new value store
+// would NOT be promoted to a double dot new store. See diagram below:
+// This function returns yes for those stores that are predicated but not
+// yet promoted to predicate dot new instructions.
+//
+//                          +---------------------+
+//                    /-----| if (p0) memw(..)=r0 |---------\~
+//                   ||     +---------------------+         ||
+//          promote  ||       /\       /\                   ||  promote
+//                   ||      /||\     /||\                  ||
+//                  \||/    demote     ||                  \||/
+//                   \/       ||       ||                   \/
+//       +-------------------------+   ||   +-------------------------+
+//       | if (p0.new) memw(..)=r0 |   ||   | if (p0) memw(..)=r0.new |
+//       +-------------------------+   ||   +-------------------------+
+//                        ||           ||         ||
+//                        ||         demote      \||/
+//                      promote        ||         \/ NOT possible
+//                        ||           ||         /\~
+//                       \||/          ||        /||\~
+//                        \/           ||         ||
+//                      +-----------------------------+
+//                      | if (p0.new) memw(..)=r0.new |
+//                      +-----------------------------+
+//                           Double Dot New Store
+//
+// Returns the most basic instruction for the .new predicated instructions and
+// new-value stores.
+// For example, all of the following instructions will be converted back to the
+// same instruction:
+// 1) if (p0.new) memw(R0+#0) = R1.new  --->
+// 2) if (p0) memw(R0+#0)= R1.new      -------> if (p0) memw(R0+#0) = R1
+// 3) if (p0.new) memw(R0+#0) = R1      --->
+//
+// To understand the translation of instruction 1 to its original form, consider
+// a packet with 3 instructions.
+// { p0 = cmp.eq(R0,R1)
+//   if (p0.new) R2 = add(R3, R4)
+//   R5 = add (R3, R1)
+// }
+// if (p0) memw(R5+#0) = R2 <--- trying to include it in the previous packet
+//
+// This instruction can be part of the previous packet only if both p0 and R2
+// are promoted to .new values. This promotion happens in steps, first
+// predicate register is promoted to .new and in the next iteration R2 is
+// promoted. Therefore, in case of dependence check failure (due to R5) during
+// next iteration, it should be converted back to its most basic form.
+
+
+// Return the new value instruction for a given store.
+int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
+  int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
+  if (NVOpcode >= 0) // Valid new-value store instruction.
+    return NVOpcode;
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown .new type");
+  case Hexagon::S4_storerb_ur:
+    return Hexagon::S4_storerbnew_ur;
+
+  case Hexagon::S2_storerb_pci:
+    return Hexagon::S2_storerb_pci;
+
+  case Hexagon::S2_storeri_pci:
+    return Hexagon::S2_storeri_pci;
+
+  case Hexagon::S2_storerh_pci:
+    return Hexagon::S2_storerh_pci;
+
+  case Hexagon::S2_storerd_pci:
+    return Hexagon::S2_storerd_pci;
+
+  case Hexagon::S2_storerf_pci:
+    return Hexagon::S2_storerf_pci;
+
+  case Hexagon::V6_vS32b_ai:
+    return Hexagon::V6_vS32b_new_ai;
+
+  case Hexagon::V6_vS32b_pi:
+    return Hexagon::V6_vS32b_new_pi;
+
+  // 128B
+  case Hexagon::V6_vS32b_ai_128B:
+    return Hexagon::V6_vS32b_new_ai_128B;
+
+  case Hexagon::V6_vS32b_pi_128B:
+    return Hexagon::V6_vS32b_new_pi_128B;
+  }
+  return 0;
 }
 
 // Returns the opcode to use when converting MI, which is a conditional jump,
 // into a conditional instruction which uses the .new value of the predicate.
 // We also use branch probabilities to add a hint to the jump.
-int
-HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
-                                  const
-                                  MachineBranchProbabilityInfo *MBPI) const {
-
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+      const MachineBranchProbabilityInfo *MBPI) const {
   // We assume that block can have at most two successors.
   bool taken = false;
-  MachineBasicBlock *Src = MI->getParent();
-  MachineOperand *BrTarget = &MI->getOperand(1);
-  MachineBasicBlock *Dst = BrTarget->getMBB();
+  const MachineBasicBlock *Src = MI->getParent();
+  const MachineOperand *BrTarget = &MI->getOperand(1);
+  const MachineBasicBlock *Dst = BrTarget->getMBB();
 
   const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
   if (Prediction >= BranchProbability(1,2))
@@ -1872,35 +3127,452 @@ HexagonInstrInfo::getDotNewPredJumpOp(MachineInstr *MI,
     llvm_unreachable("Unexpected jump instruction.");
   }
 }
-// Returns true if a particular operand is extendable for an instruction.
-bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
-                                         unsigned short OperandNum) const {
-  const uint64_t F = MI->getDesc().TSFlags;
 
-  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
-          == OperandNum;
+
+// Return .new predicate version for an instruction.
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+      const MachineBranchProbabilityInfo *MBPI) const {
+  int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  if (NewOpcode >= 0) // Valid predicate new instruction
+    return NewOpcode;
+
+  switch (MI->getOpcode()) {
+  // Condtional Jumps
+  case Hexagon::J2_jumpt:
+  case Hexagon::J2_jumpf:
+    return getDotNewPredJumpOp(MI, MBPI);
+
+  default:
+    assert(0 && "Unknown .new type");
+  }
+  return 0;
 }
 
-// Returns Operand Index for the constant extended instruction.
-unsigned short HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-  return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask);
+
+int HexagonInstrInfo::getDotOldOp(const int opc) const {
+  int NewOp = opc;
+  if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form
+    NewOp = Hexagon::getPredOldOpcode(NewOp);
+    assert(NewOp >= 0 &&
+           "Couldn't change predicate new instruction to its old form.");
+  }
+
+  if (isNewValueStore(NewOp)) { // Convert into non-new-value format
+    NewOp = Hexagon::getNonNVStore(NewOp);
+    assert(NewOp >= 0 && "Couldn't change new-value store to its old form.");
+  }
+  return NewOp;
 }
 
-// Returns the min value that doesn't need to be extended.
-int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
-  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
-                    & HexagonII::ExtentSignedMask;
-  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
-                    & HexagonII::ExtentBitsMask;
 
-  if (isSigned) // if value is signed
-    return -1U << (bits - 1);
-  else
+// See if instruction could potentially be a duplex candidate.
+// If so, return its group. Zero otherwise.
+HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
+      const MachineInstr *MI) const {
+  unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
+  auto &HRI = getRegisterInfo();
+
+  switch (MI->getOpcode()) {
+  default:
+    return HexagonII::HSIG_None;
+  //
+  // Group L1:
+  //
+  // Rd = memw(Rs+#u4:2)
+  // Rd = memub(Rs+#u4:0)
+  case Hexagon::L2_loadri_io:
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    // Special case this one from Group L2.
+    // Rd = memw(r29+#u5:2)
+    if (isIntRegForSubInst(DstReg)) {
+      if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+          HRI.getStackRegister() == SrcReg &&
+          MI->getOperand(2).isImm() &&
+          isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_L2;
+      // Rd = memw(Rs+#u4:2)
+      if (isIntRegForSubInst(SrcReg) &&
+          (MI->getOperand(2).isImm() &&
+          isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+        return HexagonII::HSIG_L1;
+    }
+    break;
+  case Hexagon::L2_loadrub_io:
+    // Rd = memub(Rs+#u4:0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L1;
+    break;
+  //
+  // Group L2:
+  //
+  // Rd = memh/memuh(Rs+#u3:1)
+  // Rd = memb(Rs+#u3:0)
+  // Rd = memw(r29+#u5:2) - Handled above.
+  // Rdd = memd(r29+#u5:3)
+  // deallocframe
+  // [if ([!]p0[.new])] dealloc_return
+  // [if ([!]p0[.new])] jumpr r31
+  case Hexagon::L2_loadrh_io:
+  case Hexagon::L2_loadruh_io:
+    // Rd = memh/memuh(Rs+#u3:1)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::L2_loadrb_io:
+    // Rd = memb(Rs+#u3:0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        isUInt<3>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::L2_loadrd_io:
+    // Rdd = memd(r29+#u5:3)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) &&
+        Hexagon::IntRegsRegClass.contains(SrcReg) &&
+        HRI.getStackRegister() == SrcReg &&
+        MI->getOperand(2).isImm() &&
+        isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_L2;
+    break;
+  // dealloc_return is not documented in Hexagon Manual, but marked
+  // with A_SUBINSN attribute in iset_v4classic.py.
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+  case Hexagon::L4_return:
+  case Hexagon::L2_deallocframe:
+    return HexagonII::HSIG_L2;
+  case Hexagon::EH_RETURN_JMPR:
+  case Hexagon::JMPret :
+    // jumpr r31
+    // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
+    DstReg = MI->getOperand(0).getReg();
+    if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
+      return HexagonII::HSIG_L2;
+    break;
+  case Hexagon::JMPrett:
+  case Hexagon::JMPretf:
+  case Hexagon::JMPrettnewpt:
+  case Hexagon::JMPretfnewpt :
+  case Hexagon::JMPrettnew :
+  case Hexagon::JMPretfnew :
+    DstReg = MI->getOperand(1).getReg();
+    SrcReg = MI->getOperand(0).getReg();
+    // [if ([!]p0[.new])] jumpr r31
+    if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
+        (Hexagon::P0 == SrcReg)) &&
+        (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
+      return HexagonII::HSIG_L2;
+     break;
+  case Hexagon::L4_return_t :
+  case Hexagon::L4_return_f :
+  case Hexagon::L4_return_tnew_pnt :
+  case Hexagon::L4_return_fnew_pnt :
+  case Hexagon::L4_return_tnew_pt :
+  case Hexagon::L4_return_fnew_pt :
+    // [if ([!]p0[.new])] dealloc_return
+    SrcReg = MI->getOperand(0).getReg();
+    if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
+      return HexagonII::HSIG_L2;
+    break;
+  //
+  // Group S1:
+  //
+  // memw(Rs+#u4:2) = Rt
+  // memb(Rs+#u4:0) = Rt
+  case Hexagon::S2_storeri_io:
+    // Special case this one from Group S2.
+    // memw(r29+#u5:2) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+        isIntRegForSubInst(Src2Reg) &&
+        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+        isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S2;
+    // memw(Rs+#u4:2) = Rt
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  case Hexagon::S2_storerb_io:
+    // memb(Rs+#u4:0) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  //
+  // Group S2:
+  //
+  // memh(Rs+#u3:1) = Rt
+  // memw(r29+#u5:2) = Rt
+  // memd(r29+#s6:3) = Rtt
+  // memw(Rs+#u4:2) = #U1
+  // memb(Rs+#u4) = #U1
+  // allocframe(#u5:3)
+  case Hexagon::S2_storerh_io:
+    // memh(Rs+#u3:1) = Rt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
+        MI->getOperand(1).isImm() &&
+        isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  case Hexagon::S2_storerd_io:
+    // memd(r29+#s6:3) = Rtt
+    Src1Reg = MI->getOperand(0).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isDblRegForSubInst(Src2Reg, HRI) &&
+        Hexagon::IntRegsRegClass.contains(Src1Reg) &&
+        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
+        isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S4_storeiri_io:
+    // memw(Rs+#u4:2) = #U1
+    Src1Reg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
+        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S4_storeirb_io:
+    // memb(Rs+#u4) = #U1
+    Src1Reg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
+        isUInt<4>(MI->getOperand(1).getImm()) && MI->getOperand(2).isImm() &&
+        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_S2;
+    break;
+  case Hexagon::S2_allocframe:
+    if (MI->getOperand(0).isImm() &&
+        isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+      return HexagonII::HSIG_S1;
+    break;
+  //
+  // Group A:
+  //
+  // Rx = add(Rx,#s7)
+  // Rd = Rs
+  // Rd = #u6
+  // Rd = #-1
+  // if ([!]P0[.new]) Rd = #0
+  // Rd = add(r29,#u6:2)
+  // Rx = add(Rx,Rs)
+  // P0 = cmp.eq(Rs,#u2)
+  // Rdd = combine(#0,Rs)
+  // Rdd = combine(Rs,#0)
+  // Rdd = combine(#u2,#U2)
+  // Rd = add(Rs,#1)
+  // Rd = add(Rs,#-1)
+  // Rd = sxth/sxtb/zxtb/zxth(Rs)
+  // Rd = and(Rs,#1)
+  case Hexagon::A2_addi:
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg)) {
+      // Rd = add(r29,#u6:2)
+      if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
+        HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
+        isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_A;
+      // Rx = add(Rx,#s7)
+      if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
+          isInt<7>(MI->getOperand(2).getImm()))
+        return HexagonII::HSIG_A;
+      // Rd = add(Rs,#1)
+      // Rd = add(Rs,#-1)
+      if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
+          ((MI->getOperand(2).getImm() == 1) ||
+          (MI->getOperand(2).getImm() == -1)))
+        return HexagonII::HSIG_A;
+    }
+    break;
+  case Hexagon::A2_add:
+    // Rx = add(Rx,Rs)
+    DstReg = MI->getOperand(0).getReg();
+    Src1Reg = MI->getOperand(1).getReg();
+    Src2Reg = MI->getOperand(2).getReg();
+    if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
+        isIntRegForSubInst(Src2Reg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_andir:
+    // Same as zxtb.
+    // Rd16=and(Rs16,#255)
+    // Rd16=and(Rs16,#1)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() &&
+        ((MI->getOperand(2).getImm() == 1) ||
+        (MI->getOperand(2).getImm() == 255)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_tfr:
+    // Rd = Rs
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_tfrsi:
+    // Rd = #u6
+    // Do not test for #u6 size since the const is getting extended
+    // regardless and compound could be formed.
+    // Rd = #-1
+    DstReg = MI->getOperand(0).getReg();
+    if (isIntRegForSubInst(DstReg))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::C2_cmoveit:
+  case Hexagon::C2_cmovenewit:
+  case Hexagon::C2_cmoveif:
+  case Hexagon::C2_cmovenewif:
+    // if ([!]P0[.new]) Rd = #0
+    // Actual form:
+    // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) &&
+        Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
+        MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::C2_cmpeqi:
+    // P0 = cmp.eq(Rs,#u2)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (Hexagon::PredRegsRegClass.contains(DstReg) &&
+        Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
+        MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_combineii:
+  case Hexagon::A4_combineii:
+    // Rdd = combine(#u2,#U2)
+    DstReg = MI->getOperand(0).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) &&
+        ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
+        (MI->getOperand(1).isGlobal() &&
+        isUInt<2>(MI->getOperand(1).getOffset()))) &&
+        ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
+        (MI->getOperand(2).isGlobal() &&
+        isUInt<2>(MI->getOperand(2).getOffset()))))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A4_combineri:
+    // Rdd = combine(Rs,#0)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+        ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
+        (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A4_combineir:
+    // Rdd = combine(#0,Rs)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(2).getReg();
+    if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
+        ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
+        (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+      return HexagonII::HSIG_A;
+    break;
+  case Hexagon::A2_sxtb:
+  case Hexagon::A2_sxth:
+  case Hexagon::A2_zxtb:
+  case Hexagon::A2_zxth:
+    // Rd = sxth/sxtb/zxtb/zxth(Rs)
+    DstReg = MI->getOperand(0).getReg();
+    SrcReg = MI->getOperand(1).getReg();
+    if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
+      return HexagonII::HSIG_A;
+    break;
+  }
+
+  return HexagonII::HSIG_None;
+}
+
+
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+}
+
+
+// Return first non-debug instruction in the basic block.
+MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
+      const {
+  for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
+    MachineInstr *MI = &*MII;
+    if (MI->isDebugValue())
+      continue;
+    return MI;
+  }
+  return nullptr;
+}
+
+
+unsigned HexagonInstrInfo::getInstrTimingClassLatency(
+      const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+  // Default to one cycle for no itinerary. However, an "empty" itinerary may
+  // still have a MinLatency property, which getStageLatency checks.
+  if (!ItinData)
+    return getInstrLatency(ItinData, MI);
+
+  // Get the latency embedded in the itinerary. If we're not using timing class
+  // latencies or if we using BSB scheduling, then restrict the maximum latency
+  // to 1 (that is, either 0 or 1).
+  if (MI->isTransient())
     return 0;
+  unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+  if (!EnableTimingClassLatency ||
+      MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+      useBSBScheduling())
+    if (Latency > 1)
+      Latency = 1;
+  return Latency;
 }
 
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::getInvertedPredSense(
+      SmallVectorImpl<MachineOperand> &Cond) const {
+  if (Cond.empty())
+    return false;
+  unsigned Opc = getInvertedPredicatedOpcode(Cond[0].getImm());
+  Cond[0].setImm(Opc);
+  return true;
+}
+
+
+unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
+  int InvPredOpcode;
+  InvPredOpcode = isPredicatedTrue(Opc) ? Hexagon::getFalsePredOpcode(Opc)
+                                        : Hexagon::getTruePredOpcode(Opc);
+  if (InvPredOpcode >= 0) // Valid instruction with the inverted predicate.
+    return InvPredOpcode;
+
+  llvm_unreachable("Unexpected predicated instruction");
+}
+
+
 // Returns the max value that doesn't need to be extended.
 int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
   const uint64_t F = MI->getDesc().TSFlags;
@@ -1915,44 +3587,30 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
     return ~(-1U << bits);
 }
 
-// Returns true if an instruction can be converted into a non-extended
-// equivalent instruction.
-bool HexagonInstrInfo::NonExtEquivalentExists (const MachineInstr *MI) const {
 
-  short NonExtOpcode;
-  // Check if the instruction has a register form that uses register in place
-  // of the extended operand, if so return that as the non-extended form.
-  if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
-    return true;
-
-  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
-    // Check addressing mode and retrieve non-ext equivalent instruction.
-
-    switch (getAddrMode(MI)) {
-    case HexagonII::Absolute :
-      // Load/store with absolute addressing mode can be converted into
-      // base+offset mode.
-      NonExtOpcode = Hexagon::getBasedWithImmOffset(MI->getOpcode());
-      break;
-    case HexagonII::BaseImmOffset :
-      // Load/store with base+offset addressing mode can be converted into
-      // base+register offset addressing mode. However left shift operand should
-      // be set to 0.
-      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
-      break;
-    default:
-      return false;
-    }
-    if (NonExtOpcode < 0)
-      return false;
-    return true;
-  }
-  return false;
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
 }
 
-// Returns opcode of the non-extended equivalent instruction.
-short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
 
+// Returns the min value that doesn't need to be extended.
+int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
+                    & HexagonII::ExtentSignedMask;
+  unsigned bits =  (F >> HexagonII::ExtentBitsPos)
+                    & HexagonII::ExtentBitsMask;
+
+  if (isSigned) // if value is signed
+    return -1U << (bits - 1);
+  else
+    return 0;
+}
+
+
+// Returns opcode of the non-extended equivalent instruction.
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
   // Check if the instruction has a register form that uses register in place
   // of the extended operand, if so return that as the non-extended form.
   short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
@@ -1963,9 +3621,12 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
     // Check addressing mode and retrieve non-ext equivalent instruction.
     switch (getAddrMode(MI)) {
     case HexagonII::Absolute :
-      return Hexagon::getBasedWithImmOffset(MI->getOpcode());
+      return Hexagon::getBaseWithImmOffset(MI->getOpcode());
     case HexagonII::BaseImmOffset :
       return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+    case HexagonII::BaseLongOffset:
+      return Hexagon::getRegShlForm(MI->getOpcode());
+
     default:
       return -1;
     }
@@ -1973,29 +3634,9 @@ short HexagonInstrInfo::getNonExtOpcode (const MachineInstr *MI) const {
   return -1;
 }
 
-bool HexagonInstrInfo::PredOpcodeHasJMP_c(Opcode_t Opcode) const {
-  return (Opcode == Hexagon::J2_jumpt) ||
-         (Opcode == Hexagon::J2_jumpf) ||
-         (Opcode == Hexagon::J2_jumptnewpt) ||
-         (Opcode == Hexagon::J2_jumpfnewpt) ||
-         (Opcode == Hexagon::J2_jumpt) ||
-         (Opcode == Hexagon::J2_jumpf);
-}
-
-bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
-  if (Cond.empty() || !isPredicated(Cond[0].getImm()))
-    return false;
-  return !isPredicatedTrue(Cond[0].getImm());
-}
-
-bool HexagonInstrInfo::isEndLoopN(Opcode_t Opcode) const {
-  return (Opcode == Hexagon::ENDLOOP0 ||
-          Opcode == Hexagon::ENDLOOP1);
-}
 
 bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
-                                  unsigned &PredReg, unsigned &PredRegPos,
-                                  unsigned &PredRegFlags) const {
+      unsigned &PredReg, unsigned &PredRegPos, unsigned &PredRegFlags) const {
   if (Cond.empty())
     return false;
   assert(Cond.size() == 2);
@@ -2014,3 +3655,174 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
   return true;
 }
 
+
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
+  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+}
+
+
+short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
+  return Hexagon::getRegForm(MI->getOpcode());
+}
+
+
+// Return the number of bytes required to encode the instruction.
+// Hexagon instructions are fixed length, 4 bytes, unless they
+// use a constant extender, which requires another 4 bytes.
+// For debug instructions and prolog labels, return 0.
+unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
+  if (MI->isDebugValue() || MI->isPosition())
+    return 0;
+
+  unsigned Size = MI->getDesc().getSize();
+  if (!Size)
+    // Assume the default insn size in case it cannot be determined
+    // for whatever reason.
+    Size = HEXAGON_INSTR_SIZE;
+
+  if (isConstExtended(MI) || isExtended(MI))
+    Size += HEXAGON_INSTR_SIZE;
+
+  // Try and compute number of instructions in asm.
+  if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
+    const MachineBasicBlock &MBB = *MI->getParent();
+    const MachineFunction *MF = MBB.getParent();
+    const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+    // Count the number of register definitions to find the asm string.
+    unsigned NumDefs = 0;
+    for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+         ++NumDefs)
+      assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+
+    assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+    // Disassemble the AsmStr and approximate number of instructions.
+    const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+    Size = getInlineAsmLength(AsmStr, *MAI);
+  }
+
+  return Size;
+}
+
+
+uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
+  const uint64_t F = MI->getDesc().TSFlags;
+  return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
+}
+
+
+unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
+  const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+  const InstrItineraryData &II = *ST.getInstrItineraryData();
+  const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+
+  return IS.getUnits();
+}
+
+
+unsigned HexagonInstrInfo::getValidSubTargets(const unsigned Opcode) const {
+  const uint64_t F = get(Opcode).TSFlags;
+  return (F >> HexagonII::validSubTargetPos) & HexagonII::validSubTargetMask;
+}
+
+
+// Calculate size of the basic block without debug instructions.
+unsigned HexagonInstrInfo::nonDbgBBSize(const MachineBasicBlock *BB) const {
+  return nonDbgMICount(BB->instr_begin(), BB->instr_end());
+}
+
+
+unsigned HexagonInstrInfo::nonDbgBundleSize(
+      MachineBasicBlock::const_iterator BundleHead) const {
+  assert(BundleHead->isBundle() && "Not a bundle header");
+  auto MII = BundleHead.getInstrIterator();
+  // Skip the bundle header.
+  return nonDbgMICount(++MII, getBundleEnd(BundleHead));
+}
+
+
+/// immediateExtend - Changes the instruction in place to one using an immediate
+/// extender.
+void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+  assert((isExtendable(MI)||isConstExtended(MI)) &&
+                               "Instruction must be extendable");
+  // Find which operand is extendable.
+  short ExtOpNum = getCExtOpNum(MI);
+  MachineOperand &MO = MI->getOperand(ExtOpNum);
+  // This needs to be something we understand.
+  assert((MO.isMBB() || MO.isImm()) &&
+         "Branch with unknown extendable field type");
+  // Mark given operand as extended.
+  MO.addTargetFlag(HexagonII::HMOTF_ConstExtended);
+}
+
+
+bool HexagonInstrInfo::invertAndChangeJumpTarget(
+      MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+  DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
+               << NewTarget->getNumber(); MI->dump(););
+  assert(MI->isBranch());
+  unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
+  int TargetPos = MI->getNumOperands() - 1;
+  // In general branch target is the last operand,
+  // but some implicit defs added at the end might change it.
+  while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+    --TargetPos;
+  assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
+  MI->getOperand(TargetPos).setMBB(NewTarget);
+  if (EnableBranchPrediction && isPredicatedNew(MI)) {
+    NewOpcode = reversePrediction(NewOpcode);
+  }
+  MI->setDesc(get(NewOpcode));
+  return true;
+}
+
+
+void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
+  /* +++ The code below is used to generate complete set of Hexagon Insn +++ */
+  MachineFunction::iterator A = MF.begin();
+  MachineBasicBlock &B = *A;
+  MachineBasicBlock::iterator I = B.begin();
+  MachineInstr *MI = &*I;
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstr *NewMI;
+
+  for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
+       insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
+    NewMI = BuildMI(B, MI, DL, get(insn));
+    DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
+          "  Class: " << NewMI->getDesc().getSchedClass());
+    NewMI->eraseFromParent();
+  }
+  /* --- The code above is used to generate complete set of Hexagon Insn --- */
+}
+
+
+// inverts the predication logic.
+// p -> NotP
+// NotP -> P
+bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
+  DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
+  MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+  return true;
+}
+
+
+// Reverse the branch prediction.
+unsigned HexagonInstrInfo::reversePrediction(unsigned Opcode) const {
+  int PredRevOpcode = -1;
+  if (isPredictedTaken(Opcode))
+    PredRevOpcode = Hexagon::notTakenBranchPrediction(Opcode);
+  else
+    PredRevOpcode = Hexagon::takenBranchPrediction(Opcode);
+  assert(PredRevOpcode > 0);
+  return PredRevOpcode;
+}
+
+
+// TODO: Add more rigorous validation.
+bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
+      const {
+  return Cond.empty() || (Cond[0].isImm() && (Cond.size() != 1));
+}
+
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index d0b8a4631c1d..9530d9f2aa0d 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -1,4 +1,3 @@
-
 //===- HexagonInstrInfo.h - Hexagon Instruction Information -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -28,23 +27,18 @@ namespace llvm {
 
 struct EVT;
 class HexagonSubtarget;
+
 class HexagonInstrInfo : public HexagonGenInstrInfo {
   virtual void anchor();
   const HexagonRegisterInfo RI;
-  const HexagonSubtarget &Subtarget;
 
 public:
-  typedef unsigned Opcode_t;
-
   explicit HexagonInstrInfo(HexagonSubtarget &ST);
 
-  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
-  /// such, whenever a client has an instance of instruction info, it should
-  /// always be able to get register info as well (through this method).
+  /// TargetInstrInfo overrides.
   ///
-  const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
 
-  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// If the specified machine instruction is a direct
   /// load from a stack slot, return the virtual or physical register number of
   /// the destination along with the FrameIndex of the loaded stack slot.  If
   /// not, return 0.  This predicate must return 0 if the instruction has
@@ -52,7 +46,7 @@ public:
   unsigned isLoadFromStackSlot(const MachineInstr *MI,
                                int &FrameIndex) const override;
 
-  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// If the specified machine instruction is a direct
   /// store to a stack slot, return the virtual or physical register number of
   /// the source reg along with the FrameIndex of the loaded stack slot.  If
   /// not, return 0.  This predicate must return 0 if the instruction has
@@ -60,50 +54,118 @@ public:
   unsigned isStoreToStackSlot(const MachineInstr *MI,
                               int &FrameIndex) const override;
 
-
+  /// Analyze the branching code at the end of MBB, returning
+  /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+  /// implemented for a target).  Upon success, this returns false and returns
+  /// with the following information in various cases:
+  ///
+  /// 1. If this block ends with no branches (it just falls through to its succ)
+  ///    just return false, leaving TBB/FBB null.
+  /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+  ///    the destination block.
+  /// 3. If this block ends with a conditional branch and it falls through to a
+  ///    successor block, it sets TBB to be the branch destination block and a
+  ///    list of operands that evaluate the condition. These operands can be
+  ///    passed to other TargetInstrInfo methods to create new branches.
+  /// 4. If this block ends with a conditional branch followed by an
+  ///    unconditional branch, it returns the 'true' destination in TBB, the
+  ///    'false' destination in FBB, and a list of operands that evaluate the
+  ///    condition.  These operands can be passed to other TargetInstrInfo
+  ///    methods to create new branches.
+  ///
+  /// Note that RemoveBranch and InsertBranch must be implemented to support
+  /// cases where this method returns success.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
   bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
                          MachineBasicBlock *&FBB,
                          SmallVectorImpl<MachineOperand> &Cond,
                          bool AllowModify) const override;
 
+  /// Remove the branching code at the end of the specific MBB.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions that were removed.
   unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
 
+  /// Insert branch code into the end of the specified MachineBasicBlock.
+  /// The operands to this method are the same as those
+  /// returned by AnalyzeBranch.  This is only invoked in cases where
+  /// AnalyzeBranch returns success. It returns the number of instructions
+  /// inserted.
+  ///
+  /// It is also invoked by tail merging to add unconditional branches in
+  /// cases where AnalyzeBranch doesn't apply because there was no original
+  /// branch to analyze.  At least this much must be implemented, else tail
+  /// merging needs to be disabled.
   unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
                         DebugLoc DL) const override;
 
-  bool analyzeCompare(const MachineInstr *MI,
-                      unsigned &SrcReg, unsigned &SrcReg2,
-                      int &Mask, int &Value) const override;
+  /// Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
+  /// of the specified basic block, where the probability of the instructions
+  /// being executed is given by Probability, and Confidence is a measure
+  /// of our confidence that it will be properly predicted.
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                           unsigned ExtraPredCycles,
+                           BranchProbability Probability) const override;
 
+  /// Second variant of isProfitableToIfCvt. This one
+  /// checks for the case where two basic blocks from true and false path
+  /// of a if-then-else (diamond) are predicated on mutally exclusive
+  /// predicates, where the probability of the true path being taken is given
+  /// by Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                           unsigned NumTCycles, unsigned ExtraTCycles,
+                           MachineBasicBlock &FMBB,
+                           unsigned NumFCycles, unsigned ExtraFCycles,
+                           BranchProbability Probability) const override;
+
+  /// Return true if it's profitable for if-converter to duplicate instructions
+  /// of specified accumulated instruction latencies in the specified MBB to
+  /// enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                                 BranchProbability Probability) const override;
+
+  /// Emit instructions to copy a pair of physical registers.
+  ///
+  /// This function should support copies within any legal register class as
+  /// well as any cross-class copies created during instruction selection.
+  ///
+  /// The source and destination registers may overlap, which may require a
+  /// careful implementation when multiple copy instructions are required for
+  /// large registers. See for example the ARM target.
   void copyPhysReg(MachineBasicBlock &MBB,
                    MachineBasicBlock::iterator I, DebugLoc DL,
                    unsigned DestReg, unsigned SrcReg,
                    bool KillSrc) const override;
 
+  /// Store the specified register of the given register class to the specified
+  /// stack frame index. The store instruction is to be added to the given
+  /// machine basic block before the specified machine instruction. If isKill
+  /// is true, the register operand is the last use and must be marked kill.
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
                            unsigned SrcReg, bool isKill, int FrameIndex,
                            const TargetRegisterClass *RC,
                            const TargetRegisterInfo *TRI) const override;
 
-  void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
-                      SmallVectorImpl<MachineOperand> &Addr,
-                      const TargetRegisterClass *RC,
-                      SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
+  /// Load the specified register of the given register class from the specified
+  /// stack frame index. The load instruction is to be added to the given
+  /// machine basic block before the specified machine instruction.
   void loadRegFromStackSlot(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator MBBI,
                             unsigned DestReg, int FrameIndex,
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
 
-  void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
-                       SmallVectorImpl<MachineOperand> &Addr,
-                       const TargetRegisterClass *RC,
-                       SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
-  /// expandPostRAPseudo - This function is called for all pseudo instructions
+  /// This function is called for all pseudo instructions
   /// that remain after register allocation. Many pseudo instructions are
   /// created to help register allocation. This is the place to convert them
   /// into real instructions. The target can edit MI in place, or it can insert
@@ -111,122 +173,228 @@ public:
   /// anything was changed.
   bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
 
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      ArrayRef<unsigned> Ops,
-                                      MachineBasicBlock::iterator InsertPt,
-                                      int FrameIndex) const override;
+  /// Reverses the branch condition of the specified condition list,
+  /// returning false on success and true if it cannot be reversed.
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+        const override;
 
-  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
-                                      ArrayRef<unsigned> Ops,
-                                      MachineBasicBlock::iterator InsertPt,
-                                      MachineInstr *LoadMI) const override {
-    return nullptr;
-  }
+  /// Insert a noop into the instruction stream at the specified point.
+  void insertNoop(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MI) const override;
 
-  unsigned createVR(MachineFunction* MF, MVT VT) const;
+  /// Returns true if the instruction is already predicated.
+  bool isPredicated(const MachineInstr *MI) const override;
 
-  bool isBranch(const MachineInstr *MI) const;
-  bool isPredicable(MachineInstr *MI) const override;
+  /// Convert the instruction into a predicated instruction.
+  /// It returns true if the operation was successful.
   bool PredicateInstruction(MachineInstr *MI,
                             ArrayRef<MachineOperand> Cond) const override;
 
-  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                           unsigned ExtraPredCycles,
-                           const BranchProbability &Probability) const override;
-
-  bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
-                           unsigned NumTCycles, unsigned ExtraTCycles,
-                           MachineBasicBlock &FMBB,
-                           unsigned NumFCycles, unsigned ExtraFCycles,
-                           const BranchProbability &Probability) const override;
-
-  bool isPredicated(const MachineInstr *MI) const override;
-  bool isPredicated(unsigned Opcode) const;
-  bool isPredicatedTrue(const MachineInstr *MI) const;
-  bool isPredicatedTrue(unsigned Opcode) const;
-  bool isPredicatedNew(const MachineInstr *MI) const;
-  bool isPredicatedNew(unsigned Opcode) const;
-  bool DefinesPredicate(MachineInstr *MI,
-                        std::vector<MachineOperand> &Pred) const override;
+  /// Returns true if the first specified predicate
+  /// subsumes the second, e.g. GE subsumes GT.
   bool SubsumesPredicate(ArrayRef<MachineOperand> Pred1,
                          ArrayRef<MachineOperand> Pred2) const override;
 
-  bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  /// If the specified instruction defines any predicate
+  /// or condition code register(s) used for predication, returns true as well
+  /// as the definition predicate(s) by reference.
+  bool DefinesPredicate(MachineInstr *MI,
+                        std::vector<MachineOperand> &Pred) const override;
 
-  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
-                           const BranchProbability &Probability) const override;
-
-  DFAPacketizer *
-  CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
+  /// Return true if the specified instruction can be predicated.
+  /// By default, this returns true for every instruction with a
+  /// PredicateOperand.
+  bool isPredicable(MachineInstr *MI) const override;
 
+  /// Test if the given instruction should be considered a scheduling boundary.
+  /// This primarily includes labels and terminators.
   bool isSchedulingBoundary(const MachineInstr *MI,
                             const MachineBasicBlock *MBB,
                             const MachineFunction &MF) const override;
-  bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
-  bool isValidAutoIncImm(const EVT VT, const int Offset) const;
-  bool isMemOp(const MachineInstr *MI) const;
-  bool isSpillPredRegOp(const MachineInstr *MI) const;
-  bool isU6_3Immediate(const int value) const;
-  bool isU6_2Immediate(const int value) const;
-  bool isU6_1Immediate(const int value) const;
-  bool isU6_0Immediate(const int value) const;
-  bool isS4_3Immediate(const int value) const;
-  bool isS4_2Immediate(const int value) const;
-  bool isS4_1Immediate(const int value) const;
-  bool isS4_0Immediate(const int value) const;
-  bool isS12_Immediate(const int value) const;
-  bool isU6_Immediate(const int value) const;
-  bool isS8_Immediate(const int value) const;
-  bool isS6_Immediate(const int value) const;
 
-  bool isSaveCalleeSavedRegsCall(const MachineInstr* MI) const;
-  bool isConditionalTransfer(const MachineInstr* MI) const;
+  /// Measure the specified inline asm to determine an approximation of its
+  /// length.
+  unsigned getInlineAsmLength(const char *Str,
+                              const MCAsmInfo &MAI) const override;
+
+  /// Allocate and return a hazard recognizer to use for this target when
+  /// scheduling the machine instructions after register allocation.
+  ScheduleHazardRecognizer*
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG *DAG) const override;
+
+  /// For a comparison instruction, return the source registers
+  /// in SrcReg and SrcReg2 if having two register operands, and the value it
+  /// compares against in CmpValue. Return true if the comparison instruction
+  /// can be analyzed.
+  bool analyzeCompare(const MachineInstr *MI,
+                      unsigned &SrcReg, unsigned &SrcReg2,
+                      int &Mask, int &Value) const override;
+
+  /// Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  unsigned getInstrLatency(const InstrItineraryData *ItinData,
+                           const MachineInstr *MI,
+                           unsigned *PredCost = 0) const override;
+
+  /// Create machine specific model for scheduling.
+  DFAPacketizer *
+  CreateTargetScheduleState(const TargetSubtargetInfo &STI) const override;
+
+  // Sometimes, it is possible for the target
+  // to tell, even without aliasing information, that two MIs access different
+  // memory addresses. This function returns true if two MIs access different
+  // memory addresses and false otherwise.
+  bool areMemAccessesTriviallyDisjoint(MachineInstr *MIa, MachineInstr *MIb,
+                                       AliasAnalysis *AA = nullptr)
+                                       const override;
+
+
+  /// HexagonInstrInfo specifics.
+  ///
+
+  const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+
+  unsigned createVR(MachineFunction* MF, MVT VT) const;
+
+  bool isAbsoluteSet(const MachineInstr* MI) const;
+  bool isAccumulator(const MachineInstr *MI) const;
+  bool isComplex(const MachineInstr *MI) const;
+  bool isCompoundBranchInstr(const MachineInstr *MI) const;
+  bool isCondInst(const MachineInstr *MI) const;
   bool isConditionalALU32 (const MachineInstr* MI) const;
-  bool isConditionalLoad (const MachineInstr* MI) const;
+  bool isConditionalLoad(const MachineInstr* MI) const;
   bool isConditionalStore(const MachineInstr* MI) const;
-  bool isNewValueInst(const MachineInstr* MI) const;
-  bool isNewValue(const MachineInstr* MI) const;
-  bool isNewValue(Opcode_t Opcode) const;
-  bool isDotNewInst(const MachineInstr* MI) const;
-  int GetDotOldOp(const int opc) const;
-  int GetDotNewOp(const MachineInstr* MI) const;
-  int GetDotNewPredOp(MachineInstr *MI,
-                      const MachineBranchProbabilityInfo
-                      *MBPI) const;
-  bool mayBeNewStore(const MachineInstr* MI) const;
+  bool isConditionalTransfer(const MachineInstr* MI) const;
+  bool isConstExtended(const MachineInstr *MI) const;
   bool isDeallocRet(const MachineInstr *MI) const;
-  unsigned getInvertedPredicatedOpcode(const int Opc) const;
+  bool isDependent(const MachineInstr *ProdMI,
+                   const MachineInstr *ConsMI) const;
+  bool isDotCurInst(const MachineInstr* MI) const;
+  bool isDotNewInst(const MachineInstr* MI) const;
+  bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
+  bool isEarlySourceInstr(const MachineInstr *MI) const;
+  bool isEndLoopN(unsigned Opcode) const;
+  bool isExpr(unsigned OpType) const;
   bool isExtendable(const MachineInstr* MI) const;
   bool isExtended(const MachineInstr* MI) const;
-  bool isPostIncrement(const MachineInstr* MI) const;
+  bool isFloat(const MachineInstr *MI) const;
+  bool isHVXMemWithAIndirect(const MachineInstr *I,
+                             const MachineInstr *J) const;
+  bool isIndirectCall(const MachineInstr *MI) const;
+  bool isIndirectL4Return(const MachineInstr *MI) const;
+  bool isJumpR(const MachineInstr *MI) const;
+  bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
+  bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
+                                  const MachineInstr *ESMI) const;
+  bool isLateResultInstr(const MachineInstr *MI) const;
+  bool isLateSourceInstr(const MachineInstr *MI) const;
+  bool isLoopN(const MachineInstr *MI) const;
+  bool isMemOp(const MachineInstr *MI) const;
+  bool isNewValue(const MachineInstr* MI) const;
+  bool isNewValue(unsigned Opcode) const;
+  bool isNewValueInst(const MachineInstr* MI) const;
+  bool isNewValueJump(const MachineInstr* MI) const;
+  bool isNewValueJump(unsigned Opcode) const;
   bool isNewValueStore(const MachineInstr* MI) const;
   bool isNewValueStore(unsigned Opcode) const;
-  bool isNewValueJump(const MachineInstr* MI) const;
-  bool isNewValueJump(Opcode_t Opcode) const;
-  bool isNewValueJumpCandidate(const MachineInstr *MI) const;
+  bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
+  bool isPostIncrement(const MachineInstr* MI) const;
+  bool isPredicatedNew(const MachineInstr *MI) const;
+  bool isPredicatedNew(unsigned Opcode) const;
+  bool isPredicatedTrue(const MachineInstr *MI) const;
+  bool isPredicatedTrue(unsigned Opcode) const;
+  bool isPredicated(unsigned Opcode) const;
+  bool isPredicateLate(unsigned Opcode) const;
+  bool isPredictedTaken(unsigned Opcode) const;
+  bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+  bool isSolo(const MachineInstr* MI) const;
+  bool isSpillPredRegOp(const MachineInstr *MI) const;
+  bool isTC1(const MachineInstr *MI) const;
+  bool isTC2(const MachineInstr *MI) const;
+  bool isTC2Early(const MachineInstr *MI) const;
+  bool isTC4x(const MachineInstr *MI) const;
+  bool isV60VectorInstruction(const MachineInstr *MI) const;
+  bool isValidAutoIncImm(const EVT VT, const int Offset) const;
+  bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
+  bool isVecAcc(const MachineInstr *MI) const;
+  bool isVecALU(const MachineInstr *MI) const;
+  bool isVecUsableNextPacket(const MachineInstr *ProdMI,
+                             const MachineInstr *ConsMI) const;
+
+
+  bool canExecuteInBundle(const MachineInstr *First,
+                          const MachineInstr *Second) const;
+  bool hasEHLabel(const MachineBasicBlock *B) const;
+  bool hasNonExtEquivalent(const MachineInstr *MI) const;
+  bool hasPseudoInstrPair(const MachineInstr *MI) const;
+  bool hasUncondBranch(const MachineBasicBlock *B) const;
+  bool mayBeCurLoad(const MachineInstr* MI) const;
+  bool mayBeNewStore(const MachineInstr* MI) const;
+  bool producesStall(const MachineInstr *ProdMI,
+                     const MachineInstr *ConsMI) const;
+  bool producesStall(const MachineInstr *MI,
+                     MachineBasicBlock::const_instr_iterator MII) const;
+  bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+  bool PredOpcodeHasJMP_c(unsigned Opcode) const;
+  bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
+
+
+  unsigned getAddrMode(const MachineInstr* MI) const;
+  unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+                            unsigned &AccessSize) const;
+  bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
+                                unsigned &OffsetPos) const;
+  SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
+  unsigned getCExtOpNum(const MachineInstr *MI) const;
+  HexagonII::CompoundGroup
+  getCompoundCandidateGroup(const MachineInstr *MI) const;
+  unsigned getCompoundOpcode(const MachineInstr *GA,
+                             const MachineInstr *GB) const;
+  int getCondOpcode(int Opc, bool sense) const;
+  int getDotCurOp(const MachineInstr* MI) const;
+  int getDotNewOp(const MachineInstr* MI) const;
+  int getDotNewPredJumpOp(const MachineInstr *MI,
+                          const MachineBranchProbabilityInfo *MBPI) const;
+  int getDotNewPredOp(const MachineInstr *MI,
+                      const MachineBranchProbabilityInfo *MBPI) const;
+  int getDotOldOp(const int opc) const;
+  HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+                                                         const;
+  short getEquivalentHWInstr(const MachineInstr *MI) const;
+  MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
+  unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI) const;
+  bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
+  unsigned getInvertedPredicatedOpcode(const int Opc) const;
+  int getMaxValue(const MachineInstr *MI) const;
+  unsigned getMemAccessSize(const MachineInstr* MI) const;
+  int getMinValue(const MachineInstr *MI) const;
+  short getNonExtOpcode(const MachineInstr *MI) const;
+  bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
+                  unsigned &PredRegPos, unsigned &PredRegFlags) const;
+  short getPseudoInstrPair(const MachineInstr *MI) const;
+  short getRegForm(const MachineInstr *MI) const;
+  unsigned getSize(const MachineInstr *MI) const;
+  uint64_t getType(const MachineInstr* MI) const;
+  unsigned getUnits(const MachineInstr* MI) const;
+  unsigned getValidSubTargets(const unsigned Opcode) const;
+
+
+  /// getInstrTimingClassLatency - Compute the instruction latency of a given
+  /// instruction using Timing Class information, if available.
+  unsigned nonDbgBBSize(const MachineBasicBlock *BB) const;
+  unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
 
 
   void immediateExtend(MachineInstr *MI) const;
-  bool isConstExtended(const MachineInstr *MI) const;
-  unsigned getSize(const MachineInstr *MI) const;  
-  int getDotNewPredJumpOp(MachineInstr *MI,
-                      const MachineBranchProbabilityInfo *MBPI) const;
-  unsigned getAddrMode(const MachineInstr* MI) const;
-  bool isOperandExtended(const MachineInstr *MI,
-                         unsigned short OperandNum) const;
-  unsigned short getCExtOpNum(const MachineInstr *MI) const;
-  int getMinValue(const MachineInstr *MI) const;
-  int getMaxValue(const MachineInstr *MI) const;
-  bool NonExtEquivalentExists (const MachineInstr *MI) const;
-  short getNonExtOpcode(const MachineInstr *MI) const;
-  bool PredOpcodeHasJMP_c(Opcode_t Opcode) const;
-  bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
-  bool isEndLoopN(Opcode_t Opcode) const;
-  bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
-                  unsigned &PredRegPos, unsigned &PredRegFlags) const;
-  int getCondOpcode(int Opc, bool sense) const;
-
+  bool invertAndChangeJumpTarget(MachineInstr* MI,
+                                 MachineBasicBlock* NewTarget) const;
+  void genAllInsnTimingClasses(MachineFunction &MF) const;
+  bool reversePredSense(MachineInstr* MI) const;
+  unsigned reversePrediction(unsigned Opcode) const;
+  bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
 };
 
 }
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index 3b32c10ed5b0..5cfeba720d90 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -13,7 +13,7 @@
 
 include "HexagonInstrFormats.td"
 include "HexagonOperands.td"
-
+include "HexagonInstrEnc.td"
 // Pattern fragment that combines the value type and the register class
 // into a single parameter.
 // The pat frags in the definitions below need to have a named register,
@@ -1426,9 +1426,6 @@ def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
                      [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
 
-def SDHexagonBR_JT: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def HexagonBR_JT: SDNode<"HexagonISD::BR_JT", SDHexagonBR_JT, [SDNPHasChain]>;
-
 class CondStr<string CReg, bit True, bit New> {
   string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
 }
@@ -1606,8 +1603,6 @@ def EH_RETURN_JMPR : T_JMPr;
 
 def: Pat<(eh_return),
          (EH_RETURN_JMPR (i32 R31))>;
-def: Pat<(HexagonBR_JT (i32 IntRegs:$dst)),
-         (J2_jumpr IntRegs:$dst)>;
 def: Pat<(brind (i32 IntRegs:$dst)),
          (J2_jumpr IntRegs:$dst)>;
 
@@ -2825,7 +2820,7 @@ let CextOpcode = "ADD_acc" in {
   let isExtentSigned = 1 in
   def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
                  [(set (i32 IntRegs:$dst),
-                       (add (add (i32 IntRegs:$src2), s16_16ImmPred:$src3),
+                       (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
                             (i32 IntRegs:$src1)))]>, ImmRegRel;
 
   def M2_acci  : T_MType_acc_rr <"+= add",  0b000, 0b001, 0,
@@ -2859,7 +2854,7 @@ class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
 def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
 def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
 
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s16_16ImmPred>;
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
 def : T_MType_acc_pat2 <M2_nacci, add, sub>;
 
 //===----------------------------------------------------------------------===//
@@ -3303,7 +3298,8 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
                      !if (!eq(ImmOpStr, "s4_2Imm"), offset{5-2},
                      !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
                                       /* s4_0Imm */ offset{3-0})));
-    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
 
     let IClass = 0b1010;
 
@@ -3322,7 +3318,7 @@ class T_store_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
 //===----------------------------------------------------------------------===//
 let isPredicated = 1, hasSideEffects = 0, addrMode = PostInc in
 class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
-                      bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew >
+                   bits<4> MajOp, bit isHalf, bit isPredNot, bit isPredNew>
   : STInst <(outs IntRegs:$_dst_),
             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$offset, RC:$src3),
   !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
@@ -3341,7 +3337,8 @@ class T_pstore_pi <string mnemonic, RegisterClass RC, Operand ImmOp,
                      !if (!eq(ImmOpStr, "s4_1Imm"), offset{4-1},
                                       /* s4_0Imm */ offset{3-0})));
 
-    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, 1);
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(ImmOpStr, "s4_3Imm"), 0, !if(isHalf,0,1));
     let isPredicatedNew = isPredNew;
     let isPredicatedFalse = isPredNot;
 
@@ -3404,7 +3401,6 @@ def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
 //===----------------------------------------------------------------------===//
 // Template class for post increment stores with register offset.
 //===----------------------------------------------------------------------===//
-let isNVStorable = 1 in
 class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
                      MemAccessSize AccessSz, bit isHalf = 0>
   : STInst <(outs IntRegs:$_dst_),
@@ -3416,6 +3412,9 @@ class T_store_pr <string mnemonic, RegisterClass RC, bits<3> MajOp,
     bits<5> src3;
     let accessSize = AccessSz;
 
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0, !if(isHalf,0,1));
+
     let IClass = 0b1010;
 
     let Inst{27-24} = 0b1101;
@@ -3430,12 +3429,11 @@ def S2_storerb_pr : T_store_pr<"memb", IntRegs, 0b000, ByteAccess>;
 def S2_storerh_pr : T_store_pr<"memh", IntRegs, 0b010, HalfWordAccess>;
 def S2_storeri_pr : T_store_pr<"memw", IntRegs, 0b100, WordAccess>;
 def S2_storerd_pr : T_store_pr<"memd", DoubleRegs, 0b110, DoubleWordAccess>;
-
 def S2_storerf_pr : T_store_pr<"memh", IntRegs, 0b011, HalfWordAccess, 1>;
 
 let opExtendable = 1, isExtentSigned = 1, isPredicable = 1 in
 class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
-                 bits<3>MajOp, bit isH = 0>
+                  bits<3> MajOp, bit isH = 0>
   : STInst <(outs),
             (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
   mnemonic#"($src1+#$src2) = $src3"#!if(isH,".h","")>,
@@ -3455,6 +3453,8 @@ class T_store_io <string mnemonic, RegisterClass RC, Operand ImmOp,
                      !if (!eq(ImmOpStr, "s11_2Ext"), src2{12-2},
                      !if (!eq(ImmOpStr, "s11_1Ext"), src2{11-1},
                                       /* s11_0Ext */ src2{10-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
     let IClass = 0b1010;
 
     let Inst{27} = 0b0;
@@ -3494,7 +3494,10 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
                      !if (!eq(ImmOpStr, "u6_2Ext"), src3{7-2},
                      !if (!eq(ImmOpStr, "u6_1Ext"), src3{6-1},
                                       /* u6_0Ext */ src3{5-0})));
-     let IClass = 0b0100;
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
+    let IClass = 0b0100;
 
     let Inst{27} = 0b0;
     let Inst{26} = PredNot;
@@ -3508,7 +3511,7 @@ class T_pstore_io <string mnemonic, RegisterClass RC, Operand ImmOp,
     let Inst{1-0} = src1;
   }
 
-let isExtendable = 1, isNVStorable = 1, hasSideEffects = 0 in
+let isExtendable = 1, hasSideEffects = 0 in
 multiclass ST_Idxd<string mnemonic, string CextOp, RegisterClass RC,
                  Operand ImmOp, Operand predImmOp, bits<3> MajOp, bit isH = 0> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_indexed in {
@@ -3665,7 +3668,7 @@ def S2_allocframe: ST0Inst <
 
 // S2_storer[bhwdf]_pci: Store byte/half/word/double.
 // S2_storer[bhwdf]_pci -> S2_storerbnew_pci
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
 class T_store_pci <string mnemonic, RegisterClass RC,
                          Operand Imm, bits<4>MajOp,
                          MemAccessSize AlignSize, string RegSrc = "Rt">
@@ -3679,6 +3682,8 @@ class T_store_pci <string mnemonic, RegisterClass RC,
     bits<1> Mu;
     bits<5> Rt;
     let accessSize = AlignSize;
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+                       !if(!eq(RegSrc,"Rt.h"), 0, 1));
 
     let IClass = 0b1010;
     let Inst{27-25} = 0b100;
@@ -3696,15 +3701,15 @@ class T_store_pci <string mnemonic, RegisterClass RC,
   }
 
 def S2_storerb_pci : T_store_pci<"memb", IntRegs, s4_0Imm, 0b1000,
-                                        ByteAccess>;
+                                 ByteAccess>;
 def S2_storerh_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1010,
-                                        HalfWordAccess>;
+                                 HalfWordAccess>;
 def S2_storerf_pci : T_store_pci<"memh", IntRegs, s4_1Imm, 0b1011,
-                                        HalfWordAccess, "Rt.h">;
+                                 HalfWordAccess, "Rt.h">;
 def S2_storeri_pci : T_store_pci<"memw", IntRegs, s4_2Imm, 0b1100,
-                                        WordAccess>;
+                                 WordAccess>;
 def S2_storerd_pci : T_store_pci<"memd", DoubleRegs, s4_3Imm, 0b1110,
-                                        DoubleWordAccess>;
+                                 DoubleWordAccess>;
 
 let Uses = [CS], isNewValue = 1, mayStore = 1, isNVStore = 1, opNewValue = 4 in
 class T_storenew_pci <string mnemonic, Operand Imm,
@@ -3762,7 +3767,7 @@ def S2_storerd_pci_pseudo : T_store_pci_pseudo <"memd", DoubleRegs>;
 //===----------------------------------------------------------------------===//
 // Circular stores with auto-increment register
 //===----------------------------------------------------------------------===//
-let Uses = [CS], isNVStorable = 1 in
+let Uses = [CS] in
 class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
                                MemAccessSize AlignSize, string RegSrc = "Rt">
   : STInst <(outs IntRegs:$_dst_),
@@ -3775,6 +3780,8 @@ class T_store_pcr <string mnemonic, RegisterClass RC, bits<4>MajOp,
     bits<5> Rt;
 
     let accessSize = AlignSize;
+    let isNVStorable = !if(!eq(mnemonic,"memd"), 0,
+                       !if(!eq(RegSrc,"Rt.h"), 0, 1));
 
     let IClass = 0b1010;
     let Inst{27-25} = 0b100;
@@ -5783,8 +5790,20 @@ include "HexagonInstrInfoV5.td"
 // V5 Instructions -
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// V60 Instructions +
+//===----------------------------------------------------------------------===//
+
+include "HexagonInstrInfoV60.td"
+
+//===----------------------------------------------------------------------===//
+// V60 Instructions -
+//===----------------------------------------------------------------------===//
+
 //===----------------------------------------------------------------------===//
 // ALU32/64/Vector +
 //===----------------------------------------------------------------------===///
 
 include "HexagonInstrInfoVector.td"
+
+include "HexagonInstrAlias.td"
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 65b0f4974367..87d6b359f5fb 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -684,7 +684,7 @@ def: Pat<(i64 (zext (i32 IntRegs:$src1))),
 // Template class for store instructions with Absolute set addressing mode.
 //===----------------------------------------------------------------------===//
 let isExtended = 1, opExtendable = 1, opExtentBits = 6,
-    addrMode = AbsoluteSet, isNVStorable = 1 in
+    addrMode = AbsoluteSet in
 class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
                    bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
   : STInst<(outs IntRegs:$dst),
@@ -696,6 +696,9 @@ class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
     let accessSize = AccessSz;
     let BaseOpcode = BaseOp#"_AbsSet";
 
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
     let IClass = 0b1010;
 
     let Inst{27-24} = 0b1011;
@@ -750,7 +753,7 @@ let mayStore = 1, addrMode = AbsoluteSet in {
 }
 
 let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
-addrMode = BaseLongOffset, AddedComplexity = 40 in
+    addrMode = BaseLongOffset, AddedComplexity = 40 in
 class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
                      bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
   : STInst<(outs),
@@ -766,6 +769,10 @@ class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
     let accessSize = AccessSz;
     let CextOpcode = CextOp;
     let BaseOpcode = CextOp#"_shl";
+
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
     let IClass = 0b1010;
 
     let Inst{27-24} =0b1101;
@@ -856,6 +863,9 @@ class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
     bits<2> u2;
     bits<5> Rt;
 
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
+
     let IClass = 0b0011;
 
     let Inst{27-24} = 0b1011;
@@ -888,6 +898,8 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
 
     let isPredicatedFalse = isNot;
     let isPredicatedNew = isPredNew;
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isH,0,1));
 
     let IClass = 0b0011;
 
@@ -1826,43 +1838,22 @@ def: LogLogNot_pat<or,  or,  C4_or_orn>;
 // below are needed to support code generation for PIC
 //===----------------------------------------------------------------------===//
 
-def SDT_HexagonPICAdd
+def SDT_HexagonAtGot
+  : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def SDT_HexagonGOTAdd
-  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
 
-def SDT_HexagonGOTAddInternal   : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalJT : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def SDT_HexagonGOTAddInternalBA : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel     : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
 
-def Hexagonpic_add      : SDNode<"HexagonISD::PIC_ADD", SDT_HexagonPICAdd>;
-def Hexagonat_got       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonGOTAdd>;
-def Hexagongat_pcrel    : SDNode<"HexagonISD::AT_PCREL",
-                                 SDT_HexagonGOTAddInternal>;
-def Hexagongat_pcrel_jt : SDNode<"HexagonISD::AT_PCREL",
-                                 SDT_HexagonGOTAddInternalJT>;
-def Hexagongat_pcrel_ba : SDNode<"HexagonISD::AT_PCREL",
-                                 SDT_HexagonGOTAddInternalBA>;
-
-// PIC: Map from a block address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_ba tblockaddress:$src1),
-         (C4_addipc u32ImmPred:$src1)>;
-
-// PIC: Map from the computation to generate a GOT pointer to a PC-relative add
-def: Pat<(Hexagonpic_add texternalsym:$src1),
-         (C4_addipc u32ImmPred:$src1)>;
-
-// PIC: Map from a jump table address computation to a PC-relative add
-def: Pat<(Hexagongat_pcrel_jt tjumptable:$src1),
-         (C4_addipc u32ImmPred:$src1)>;
-
-// PIC: Map from a GOT-relative symbol reference to a load
-def: Pat<(Hexagonat_got (i32 IntRegs:$src1), tglobaladdr:$src2),
-         (L2_loadri_io IntRegs:$src1, s30_2ImmPred:$src2)>;
-
-// PIC: Map from a static symbol reference to a PC-relative add
-def: Pat<(Hexagongat_pcrel tglobaladdr:$src1),
-         (C4_addipc u32ImmPred:$src1)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+         (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+         (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+         (C4_addipc imm:$addr)>;
 
 //===----------------------------------------------------------------------===//
 // CR -
@@ -1903,7 +1894,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
                             (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
   "$Rd = add($Rs, add($Ru, #$s6))" ,
   [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
-                           (add (i32 IntRegs:$Ru), s16_16ImmPred:$s6)))],
+                           (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
   "", ALU64_tc_2_SLOT23> {
     bits<5> Rd;
     bits<5> Rs;
@@ -3290,27 +3281,33 @@ defm L4_return: LD_MISC_L4_RETURN <"dealloc_return">, PredNewRel;
 let isCall = 1, isBarrier = 1, isReturn = 1, isTerminator = 1,
     Defs = [R29, R30, R31, PC], isPredicable = 0, isAsmParserOnly = 1 in {
   def RESTORE_DEALLOC_RET_JMP_V4 : T_JMP<"">;
+  let isExtended = 1, opExtendable = 0 in
+    def RESTORE_DEALLOC_RET_JMP_V4_EXT : T_JMP<"">;
 }
 
 // Restore registers and dealloc frame before a tail call.
 let isCall = 1, Defs = [R29, R30, R31, PC], isAsmParserOnly = 1 in {
   def RESTORE_DEALLOC_BEFORE_TAILCALL_V4 : T_Call<"">, PredRel;
+  let isExtended = 1, opExtendable = 0 in
+    def RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT : T_Call<"">, PredRel;
 }
 
 // Save registers function call.
 let isCall = 1, Uses = [R29, R31], isAsmParserOnly = 1 in {
   def SAVE_REGISTERS_CALL_V4 : T_Call<"">, PredRel;
+  let isExtended = 1, opExtendable = 0 in
+    def SAVE_REGISTERS_CALL_V4_EXT : T_Call<"">, PredRel;
 }
 
 //===----------------------------------------------------------------------===//
 // Template class for non predicated store instructions with
 // GP-Relative or absolute addressing.
 //===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicable = 1, isNVStorable = 1 in
+let hasSideEffects = 0, isPredicable = 1 in
 class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
-                    bits<2>MajOp, Operand AddrOp, bit isAbs, bit isHalf>
-  : STInst<(outs), (ins AddrOp:$addr, RC:$src),
-  mnemonic # !if(isAbs, "(##", "(#")#"$addr) = $src"#!if(isHalf, ".h",""),
+                    bits<2>MajOp, bit isAbs, bit isHalf>
+  : STInst<(outs), (ins ImmOp:$addr, RC:$src),
+  mnemonic # "(#$addr) = $src"#!if(isHalf, ".h",""),
   [], "", V2LDST_tc_st_SLOT01> {
     bits<19> addr;
     bits<5> src;
@@ -3321,6 +3318,9 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
                      !if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
                      !if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
                                       /* u16_0Imm */ addr{15-0})));
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+
     let IClass = 0b0100;
     let Inst{27} = 1;
     let Inst{26-25} = offsetBits{15-14};
@@ -3337,11 +3337,10 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
 // Template class for predicated store instructions with
 // GP-Relative or absolute addressing.
 //===----------------------------------------------------------------------===//
-let hasSideEffects = 0, isPredicated = 1, isNVStorable = 1, opExtentBits = 6,
-    opExtendable = 1 in
+let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
 class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
                        bit isHalf, bit isNot, bit isNew>
-  : STInst<(outs), (ins PredRegs:$src1, u6Ext:$absaddr, RC: $src2),
+  : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
   !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
   ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
   [], "", ST_tc_st_SLOT01>, AddrModeRel {
@@ -3351,6 +3350,8 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
 
     let isPredicatedNew = isNew;
     let isPredicatedFalse = isNot;
+    // Store upper-half and store doubleword cannot be NV.
+    let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
 
     let IClass = 0b1010;
 
@@ -3371,7 +3372,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
 //===----------------------------------------------------------------------===//
 class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<2> MajOp, bit isHalf>
-  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1, isHalf>,
+  : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
                   AddrModeRel {
   string ImmOpStr = !cast<string>(ImmOp);
   let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3538,7 +3539,7 @@ defm storerf : ST_Abs <"memh", "STrif", IntRegs, u16_1Imm, 0b01, 1>;
 let isAsmParserOnly = 1 in
 class T_StoreGP <string mnemonic, string BaseOp, RegisterClass RC,
                  Operand ImmOp, bits<2> MajOp, bit isHalf = 0>
-  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0, isHalf> {
+  : T_StoreAbsGP <mnemonic, RC, ImmOp, MajOp, 0, isHalf> {
     // Set BaseOpcode same as absolute addressing instructions so that
     // non-predicated GP-Rel instructions can have relate with predicated
     // Absolute instruction.
@@ -3553,7 +3554,7 @@ multiclass ST_GP <string mnemonic, string BaseOp, Operand ImmOp,
   // Absolute instruction.
   let BaseOpcode = BaseOp#_abs in {
     def NAME#gp : T_StoreAbsGP <mnemonic, IntRegs, ImmOp, MajOp,
-                                globaladdress, 0, isHalf>;
+                                0, isHalf>;
     // New-value store
     def NAME#newgp : T_StoreAbsGP_NV <mnemonic, ImmOp, MajOp, 0> ;
   }
@@ -3615,9 +3616,9 @@ let AddedComplexity = 100 in {
 //===----------------------------------------------------------------------===//
 let isPredicable = 1, hasSideEffects = 0 in
 class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
-                   bits<3> MajOp, Operand AddrOp, bit isAbs>
-  : LDInst <(outs RC:$dst), (ins AddrOp:$addr),
-  "$dst = "#mnemonic# !if(isAbs, "(##", "(#")#"$addr)",
+                   bits<3> MajOp>
+  : LDInst <(outs RC:$dst), (ins ImmOp:$addr),
+  "$dst = "#mnemonic# "(#$addr)",
   [], "", V2LDST_tc_ld_SLOT01> {
     bits<5> dst;
     bits<19> addr;
@@ -3642,7 +3643,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
 
 class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<3> MajOp>
-  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, u32Imm, 1>, AddrModeRel {
+  : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
 
     string ImmOpStr = !cast<string>(ImmOp);
     let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3660,10 +3661,11 @@ class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
 // Template class for predicated load instructions with
 // absolute addressing mode.
 //===----------------------------------------------------------------------===//
-let isPredicated = 1, opExtentBits = 6, opExtendable = 2 in
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
+    opExtendable = 2 in
 class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
                       bit isPredNot, bit isPredNew>
-  : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u6Ext:$absaddr),
+  : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
   !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
   ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
     bits<5> dst;
@@ -3737,7 +3739,7 @@ defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 let isAsmParserOnly = 1 in
 class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
                 bits<3> MajOp>
-  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp, globaladdress, 0>, PredNewRel {
+  : T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
     let BaseOpcode = BaseOp#_abs;
   }
 
@@ -3841,26 +3843,6 @@ let AddedComplexity = 100 in {
   def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
 }
 
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-let AddedComplexity = 100 in
-def : Pat <(i1 (load (HexagonCONST32_GP tglobaladdr:$global))),
-           (i1 (C2_tfrrp (i32 (L2_loadrbgp tglobaladdr:$global))))>;
-
-// Transfer global address into a register
-let isExtended = 1, opExtendable = 1, AddedComplexity=50, isMoveImm = 1,
-isAsCheapAsAMove = 1, isReMaterializable = 1, isCodeGenOnly = 1 in
-def TFRI_V4 : ALU32_ri<(outs IntRegs:$dst), (ins s16Ext:$src1),
-           "$dst = #$src1",
-           [(set IntRegs:$dst, (HexagonCONST32 tglobaladdr:$src1))]>;
-
-// Transfer a block address into a register
-def : Pat<(HexagonCONST32_GP tblockaddress:$src1),
-          (TFRI_V4 tblockaddress:$src1)>;
-
-let AddedComplexity = 50 in
-def : Pat<(HexagonCONST32_GP tglobaladdr:$src1),
-           (TFRI_V4 tglobaladdr:$src1)>;
-
 // i8/i16/i32 -> i64 loads
 // We need a complexity of 120 here to override preceding handling of
 // zextload.
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV5.td b/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 337f4ea2184a..823961fb6e6f 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -98,21 +98,21 @@ def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
 // HexagonInstrInfo.td patterns.
 let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
     isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
-    isCodeGenOnly = 1 in
+    isCodeGenOnly = 1, isPseudo = 1 in
 def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
                       "$dst = #$src1",
                       [(set F32:$dst, fpimm:$src1)]>,
                       Requires<[HasV5T]>;
 
-let isExtended = 1, opExtendable = 2, isPredicated = 1,
-    hasSideEffects = 0, validSubTargets = HasV5SubT, isCodeGenOnly = 1 in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
+    validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
 def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
                           (ins PredRegs:$src1, f32Ext:$src2),
                           "if ($src1) $dst = #$src2", []>,
                           Requires<[HasV5T]>;
 
-let isPseudo = 1, isExtended = 1, opExtendable = 2, isPredicated = 1,
-    isPredicatedFalse = 1, hasSideEffects = 0, validSubTargets = HasV5SubT in
+let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
+    hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
 def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
                              (ins PredRegs:$src1, f32Ext:$src2),
                              "if (!$src1) $dst = #$src2", []>,
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td
new file mode 100644
index 000000000000..897ada081534
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -0,0 +1,2241 @@
+//=- HexagonInstrInfoV60.td - Target Desc. for Hexagon Target -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+
+// Vector store
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+{
+  class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = CVI_VM_ST,
+                IType type = TypeCVI_VM_ST>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
+
+}
+
+// Vector load
+let Predicates = [HasV60T, UseHVX] in
+let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+  class V6_LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                  string cstr = "", InstrItinClass itin = CVI_VM_LD,
+                  IType type = TypeCVI_VM_LD>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let Predicates = [HasV60T, UseHVX] in
+let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
+class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+                string cstr = "", InstrItinClass itin = CVI_VM_ST,
+                IType type = TypeCVI_VM_ST>
+: InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+//===----------------------------------------------------------------------===//
+// Vector loads with base + immediate offset
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vload_ai<string asmStr>
+  : V6_LDInst <(outs VectorRegs:$dst), (ins IntRegs:$src1, s4_6Imm:$src2),
+                asmStr>;
+
+let isCodeGenOnly = 1, addrMode = BaseImmOffset, accessSize = Vector128Access in
+class T_vload_ai_128B<string asmStr>
+  : V6_LDInst <(outs VectorRegs128B:$dst), (ins IntRegs:$src1, s4_7Imm:$src2),
+                asmStr>;
+
+let isCVLoadable = 1, hasNewValue = 1 in {
+  def V6_vL32b_ai         : T_vload_ai <"$dst = vmem($src1+#$src2)">,
+                            V6_vL32b_ai_enc;
+  def V6_vL32b_nt_ai      : T_vload_ai <"$dst = vmem($src1+#$src2):nt">,
+                            V6_vL32b_nt_ai_enc;
+  // 128B
+  def V6_vL32b_ai_128B    : T_vload_ai_128B <"$dst = vmem($src1+#$src2)">,
+                            V6_vL32b_ai_128B_enc;
+  def V6_vL32b_nt_ai_128B : T_vload_ai_128B <"$dst = vmem($src1+#$src2):nt">,
+                            V6_vL32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU, hasNewValue = 1 in {
+  def V6_vL32Ub_ai      : T_vload_ai <"$dst = vmemu($src1+#$src2)">,
+                          V6_vL32Ub_ai_enc;
+  def V6_vL32Ub_ai_128B : T_vload_ai_128B <"$dst = vmemu($src1+#$src2)">,
+                          V6_vL32Ub_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD, isCVLoad = 1,
+    hasNewValue = 1 in {
+  def V6_vL32b_cur_ai    : T_vload_ai <"$dst.cur = vmem($src1+#$src2)">,
+                           V6_vL32b_cur_ai_enc;
+  def V6_vL32b_nt_cur_ai : T_vload_ai <"$dst.cur = vmem($src1+#$src2):nt">,
+                           V6_vL32b_nt_cur_ai_enc;
+  // 128B
+  def V6_vL32b_cur_ai_128B    : T_vload_ai_128B
+                                <"$dst.cur = vmem($src1+#$src2)">,
+                                V6_vL32b_cur_ai_128B_enc;
+  def V6_vL32b_nt_cur_ai_128B : T_vload_ai_128B
+                                <"$dst.cur = vmem($src1+#$src2):nt">,
+                                V6_vL32b_nt_cur_ai_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD, hasNewValue = 1 in {
+  def V6_vL32b_tmp_ai    : T_vload_ai <"$dst.tmp = vmem($src1+#$src2)">,
+                           V6_vL32b_tmp_ai_enc;
+  def V6_vL32b_nt_tmp_ai : T_vload_ai <"$dst.tmp = vmem($src1+#$src2):nt">,
+                           V6_vL32b_nt_tmp_ai_enc;
+  // 128B
+  def V6_vL32b_tmp_ai_128B    : T_vload_ai_128B
+                                <"$dst.tmp = vmem($src1+#$src2)">,
+                                V6_vL32b_tmp_ai_128B_enc;
+  def V6_vL32b_nt_tmp_ai_128B : T_vload_ai_128B
+                                <"$dst.tmp = vmem($src1+#$src2)">,
+                                V6_vL32b_nt_tmp_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, accessSize = Vector64Access in
+class T_vstore_ai <string mnemonic, string baseOp, Operand ImmOp,
+                   RegisterClass RC, bit isNT>
+  : V6_STInst <(outs), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    mnemonic#"($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_ai_64B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_ai_128B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_ai         : T_vstore_ai_64B <"vmem", "vS32b_ai">,
+                            V6_vS32b_ai_enc;
+  def V6_vS32b_ai_128B    : T_vstore_ai_128B <"vmem", "vS32b_ai">,
+                            V6_vS32b_ai_128B_enc;
+}
+
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_ai      : T_vstore_ai_64B <"vmem", "vS32b_ai", 1>,
+                            V6_vS32b_nt_ai_enc;
+  def V6_vS32b_nt_ai_128B : T_vstore_ai_128B <"vmem", "vS32b_ai", 1>,
+                            V6_vS32b_nt_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_ai      : T_vstore_ai_64B <"vmemu", "vs32Ub_ai">,
+                          V6_vS32Ub_ai_enc;
+  def V6_vS32Ub_ai_128B : T_vstore_ai_128B <"vmemu", "vs32Ub_ai">,
+                          V6_vS32Ub_ai_128B_enc;
+}
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - unconditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isNewValue = 1, opNewValue = 2, isNVStore = 1,
+    Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST in
+class T_vstore_new_ai <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+  : V6_STInst <(outs ), (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    "vmem($src1+#$src2)"#!if(isNT, ":nt", "")#" = $src3.new">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_ai_64B <string baseOp, bit isNT = 0>
+  : T_vstore_new_ai <baseOp, s4_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_ai_128B <string baseOp, bit isNT = 0>
+  : T_vstore_new_ai <baseOp#"128B", s4_7Imm, VectorRegs128B, isNT>;
+
+def V6_vS32b_new_ai      : T_vstore_new_ai_64B <"vS32b_ai">, V6_vS32b_new_ai_enc;
+def V6_vS32b_new_ai_128B : T_vstore_new_ai_128B <"vS32b_ai">,
+                           V6_vS32b_new_ai_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_ai      : T_vstore_new_ai_64B<"vS32b_ai", 1>,
+                                V6_vS32b_nt_new_ai_enc;
+  def V6_vS32b_nt_new_ai_128B : T_vstore_new_ai_128B<"vS32b_ai", 1>,
+                                V6_vS32b_nt_new_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1 in
+class T_vstore_pred_ai <string mnemonic, string baseOp, Operand ImmOp,
+                        RegisterClass RC, bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "
+     #mnemonic#"($src2+#$src3)"#!if(isNT, ":nt", "")#" = $src4">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_ai_64B <string mnemonic, string baseOp,
+                            bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_ai <mnemonic, baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_ai_128B <string mnemonic, string baseOp,
+                             bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_ai <mnemonic, baseOp#"128B", s4_7Imm, VectorRegs128B,
+                      isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pred_ai     : T_vstore_pred_ai_64B <"vmem", "vS32b_ai">,
+                             V6_vS32b_pred_ai_enc;
+  def V6_vS32b_npred_ai    : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1>,
+                             V6_vS32b_npred_ai_enc;
+  // 128B
+  def V6_vS32b_pred_ai_128B    : T_vstore_pred_ai_128B <"vmem", "vS32b_ai">,
+                                 V6_vS32b_pred_ai_128B_enc;
+  def V6_vS32b_npred_ai_128B   : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
+                                 V6_vS32b_npred_ai_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_ai  : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
+                             V6_vS32b_nt_pred_ai_enc;
+  def V6_vS32b_nt_npred_ai : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 1, 1>,
+                             V6_vS32b_nt_npred_ai_enc;
+  // 128B
+  def V6_vS32b_nt_pred_ai_128B  : T_vstore_pred_ai_128B
+                                  <"vmem", "vS32b_ai", 0, 1>,
+                                  V6_vS32b_nt_pred_ai_128B_enc;
+  def V6_vS32b_nt_npred_ai_128B : T_vstore_pred_ai_128B
+                                  <"vmem", "vS32b_ai", 1, 1>,
+                                  V6_vS32b_nt_npred_ai_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_ai  : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai">,
+                           V6_vS32Ub_pred_ai_enc;
+  def V6_vS32Ub_npred_ai : T_vstore_pred_ai_64B <"vmemu", "vS32Ub_ai", 1>,
+                           V6_vS32Ub_npred_ai_enc;
+  // 128B
+  def V6_vS32Ub_pred_ai_128B  :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai">,
+                               V6_vS32Ub_pred_ai_128B_enc;
+  def V6_vS32Ub_npred_ai_128B :T_vstore_pred_ai_128B <"vmemu", "vS32Ub_ai", 1>,
+                               V6_vS32Ub_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset in
+class T_vstore_qpred_ai <Operand ImmOp, RegisterClass RC,
+                         bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs),
+               (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_ai_64B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_ai <s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_ai_128B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_ai <s4_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_ai  : T_vstore_qpred_ai_64B, V6_vS32b_qpred_ai_enc;
+def V6_vS32b_nqpred_ai : T_vstore_qpred_ai_64B <1>,
+                         V6_vS32b_nqpred_ai_enc;
+def V6_vS32b_nt_qpred_ai  : T_vstore_qpred_ai_64B <0, 1>,
+                            V6_vS32b_nt_qpred_ai_enc;
+def V6_vS32b_nt_nqpred_ai : T_vstore_qpred_ai_64B <1, 1>,
+                            V6_vS32b_nt_nqpred_ai_enc;
+// 128B
+def V6_vS32b_qpred_ai_128B  : T_vstore_qpred_ai_128B, V6_vS32b_qpred_ai_128B_enc;
+def V6_vS32b_nqpred_ai_128B : T_vstore_qpred_ai_128B<1>,
+                              V6_vS32b_nqpred_ai_128B_enc;
+def V6_vS32b_nt_qpred_ai_128B  : T_vstore_qpred_ai_128B<0, 1>,
+                                 V6_vS32b_nt_qpred_ai_128B_enc;
+def V6_vS32b_nt_nqpred_ai_128B : T_vstore_qpred_ai_128B<1, 1>,
+                                 V6_vS32b_nt_nqpred_ai_128B_enc;
+
+
+//===----------------------------------------------------------------------===//
+// Vector stores with base + immediate offset - conditional new
+//===----------------------------------------------------------------------===//
+let addrMode = BaseImmOffset, isPredicated = 1, isNewValue = 1, opNewValue = 3,
+    isNVStore = 1, Type = TypeCVI_VM_NEW_ST, Itinerary = CVI_VM_NEW_ST in
+class T_vstore_new_pred_ai <string baseOp, Operand ImmOp, RegisterClass RC,
+                            bit isPredNot, bit isNT>
+  : V6_STInst <(outs),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2+#$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_ai_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_ai <baseOp, s4_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_ai_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_ai <baseOp#"128B", s4_7Imm, VectorRegs128B,
+                          isPredNot, isNT>;
+
+
+def V6_vS32b_new_pred_ai     : T_vstore_new_pred_ai_64B <"vS32b_ai">,
+                               V6_vS32b_new_pred_ai_enc;
+def V6_vS32b_new_npred_ai    : T_vstore_new_pred_ai_64B <"vS32b_ai", 1>,
+                               V6_vS32b_new_npred_ai_enc;
+// 128B
+def V6_vS32b_new_pred_ai_128B     : T_vstore_new_pred_ai_128B <"vS32b_ai">,
+                                    V6_vS32b_new_pred_ai_128B_enc;
+def V6_vS32b_new_npred_ai_128B    : T_vstore_new_pred_ai_128B <"vS32b_ai", 1>,
+                                    V6_vS32b_new_npred_ai_128B_enc;
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pred_ai  : T_vstore_new_pred_ai_64B <"vS32b_ai", 0, 1>,
+                                 V6_vS32b_nt_new_pred_ai_enc;
+  def V6_vS32b_nt_new_npred_ai : T_vstore_new_pred_ai_64B <"vS32b_ai", 1, 1>,
+                                 V6_vS32b_nt_new_npred_ai_enc;
+  // 128B
+  def V6_vS32b_nt_new_pred_ai_128B  : T_vstore_new_pred_ai_128B
+                                      <"vS32b_ai", 0, 1>,
+                                      V6_vS32b_nt_new_pred_ai_128B_enc;
+  def V6_vS32b_nt_new_npred_ai_128B : T_vstore_new_pred_ai_128B
+                                      <"vS32b_ai", 1, 1>,
+                                      V6_vS32b_nt_new_npred_ai_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, hasNewValue = 1 in
+class T_vload_pi<string asmStr, Operand ImmOp, RegisterClass RC>
+  : V6_LDInst <(outs RC:$dst, IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2), asmStr, [],
+    "$src1 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vload_pi_64B <string asmStr>
+  : T_vload_pi <asmStr, s3_6Imm, VectorRegs>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vload_pi_128B <string asmStr>
+  : T_vload_pi <asmStr, s3_7Imm, VectorRegs128B>;
+
+let isCVLoadable = 1 in {
+  def V6_vL32b_pi    : T_vload_pi_64B <"$dst = vmem($src1++#$src2)">,
+                       V6_vL32b_pi_enc;
+  def V6_vL32b_nt_pi : T_vload_pi_64B <"$dst = vmem($src1++#$src2):nt">,
+                       V6_vL32b_nt_pi_enc;
+  // 128B
+  def V6_vL32b_pi_128B    : T_vload_pi_128B <"$dst = vmem($src1++#$src2)">,
+                            V6_vL32b_pi_128B_enc;
+  def V6_vL32b_nt_pi_128B : T_vload_pi_128B <"$dst = vmem($src1++#$src2):nt">,
+                            V6_vL32b_nt_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in {
+  def V6_vL32Ub_pi : T_vload_pi_64B <"$dst = vmemu($src1++#$src2)">,
+                     V6_vL32Ub_pi_enc;
+  // 128B
+  def V6_vL32Ub_pi_128B : T_vload_pi_128B <"$dst = vmemu($src1++#$src2)">,
+                          V6_vL32Ub_pi_128B_enc;
+}
+
+let isCVLoad = 1, Itinerary = CVI_VM_LD, Type = TypeCVI_VM_LD in {
+  def V6_vL32b_cur_pi    : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2)">,
+                           V6_vL32b_cur_pi_enc;
+  def V6_vL32b_nt_cur_pi : T_vload_pi_64B <"$dst.cur = vmem($src1++#$src2):nt">,
+                           V6_vL32b_nt_cur_pi_enc;
+  // 128B
+  def V6_vL32b_cur_pi_128B    : T_vload_pi_128B
+                                <"$dst.cur = vmem($src1++#$src2)">,
+                                V6_vL32b_cur_pi_128B_enc;
+  def V6_vL32b_nt_cur_pi_128B : T_vload_pi_128B
+                                <"$dst.cur = vmem($src1++#$src2):nt">,
+                                V6_vL32b_nt_cur_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+  def V6_vL32b_tmp_pi    : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2)">,
+                           V6_vL32b_tmp_pi_enc;
+  def V6_vL32b_nt_tmp_pi : T_vload_pi_64B <"$dst.tmp = vmem($src1++#$src2):nt">,
+                           V6_vL32b_nt_tmp_pi_enc;
+  //128B
+  def V6_vL32b_tmp_pi_128B    : T_vload_pi_128B
+                                <"$dst.tmp = vmem($src1++#$src2)">,
+                                V6_vL32b_tmp_pi_128B_enc;
+  def V6_vL32b_nt_tmp_pi_128B : T_vload_pi_128B
+                                <"$dst.tmp = vmem($src1++#$src2):nt">,
+                                V6_vL32b_nt_tmp_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_pi <string mnemonic, string baseOp, Operand ImmOp,
+                   RegisterClass RC, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    mnemonic#"($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let accessSize = Vector64Access in
+class T_vstore_pi_64B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pi_128B <string mnemonic, string baseOp, bit isNT = 0>
+  : T_vstore_pi <mnemonic, baseOp, s3_7Imm, VectorRegs128B, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pi      : T_vstore_pi_64B <"vmem", "vS32b_pi">, V6_vS32b_pi_enc;
+  def V6_vS32b_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi">,
+                         V6_vS32b_pi_128B_enc;
+}
+
+let isNVStorable = 1 , isNonTemporal = 1  in {
+  def V6_vS32b_nt_pi      : T_vstore_pi_64B <"vmem", "vS32b_pi", 1>,
+                            V6_vS32b_nt_pi_enc;
+  def V6_vS32b_nt_pi_128B : T_vstore_pi_128B <"vmem", "vS32b_pi", 1>,
+                            V6_vS32b_nt_pi_128B_enc;
+}
+
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pi      : T_vstore_pi_64B <"vmemu", "vS32Ub_pi">,
+                          V6_vS32Ub_pi_enc;
+  def V6_vS32Ub_pi_128B : T_vstore_pi_128B <"vmemu", "vS32Ub_pi">,
+                          V6_vS32Ub_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment unconditional .new vector stores with immediate offset.
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc, isNVStore = 1 in
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+    opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_pi <string baseOp, Operand ImmOp, RegisterClass RC, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ImmOp:$src2, RC:$src3),
+    "vmem($src1++#$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+    "$src1 = $_dst_">, NewValueRel {
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pi_64B <string baseOp, bit isNT = 0>
+  : T_vstore_new_pi <baseOp, s3_6Imm, VectorRegs, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pi_128B <string baseOp, bit isNT = 0>
+  : T_vstore_new_pi <baseOp#"128B", s3_7Imm, VectorRegs128B, isNT>;
+
+
+def V6_vS32b_new_pi      : T_vstore_new_pi_64B <"vS32b_pi">,
+                           V6_vS32b_new_pi_enc;
+def V6_vS32b_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi">,
+                           V6_vS32b_new_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pi      : T_vstore_new_pi_64B <"vS32b_pi", 1>,
+                                V6_vS32b_nt_new_pi_enc;
+  def V6_vS32b_nt_new_pi_128B : T_vstore_new_pi_128B <"vS32b_pi", 1>,
+                                V6_vS32b_nt_new_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1, addrMode = PostInc in
+class T_vstore_pred_pi <string mnemonic, string baseOp, Operand ImmOp,
+                        RegisterClass RC, bit isPredNot, bit isNT>
+  : V6_STInst<(outs IntRegs:$_dst_),
+             (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_pred_pi_64B <string mnemonic, string baseOp,
+                            bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_pi <mnemonic, baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_pred_pi_128B <string mnemonic, string baseOp,
+                             bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_pred_pi <mnemonic, baseOp#"128B", s3_7Imm, VectorRegs128B,
+                      isPredNot, isNT>;
+
+let isNVStorable = 1 in {
+  def V6_vS32b_pred_pi     : T_vstore_pred_pi_64B <"vmem", "vS32b_pi">,
+                             V6_vS32b_pred_pi_enc;
+  def V6_vS32b_npred_pi    : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1>,
+                             V6_vS32b_npred_pi_enc;
+  // 128B
+  def V6_vS32b_pred_pi_128B  : T_vstore_pred_pi_128B <"vmem", "vS32b_pi">,
+                               V6_vS32b_pred_pi_128B_enc;
+  def V6_vS32b_npred_pi_128B : T_vstore_pred_pi_128B <"vmem", "vS32b_pi", 1>,
+                               V6_vS32b_npred_pi_128B_enc;
+}
+let isNVStorable = 1, isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_pi  : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 0, 1>,
+                             V6_vS32b_nt_pred_pi_enc;
+  def V6_vS32b_nt_npred_pi : T_vstore_pred_pi_64B <"vmem", "vS32b_pi", 1, 1>,
+                             V6_vS32b_nt_npred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_pred_pi_128B  : T_vstore_pred_pi_128B
+                                  <"vmem", "vS32b_pi", 0, 1>,
+                                  V6_vS32b_nt_pred_pi_128B_enc;
+  def V6_vS32b_nt_npred_pi_128B : T_vstore_pred_pi_128B
+                                  <"vmem", "vS32b_pi", 1, 1>,
+                                  V6_vS32b_nt_npred_pi_128B_enc;
+}
+
+let Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_pi  : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi">,
+                           V6_vS32Ub_pred_pi_enc;
+  def V6_vS32Ub_npred_pi : T_vstore_pred_pi_64B <"vmemu", "vS32Ub_pi", 1>,
+                           V6_vS32Ub_npred_pi_enc;
+  // 128B
+  def V6_vS32Ub_pred_pi_128B  : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi">,
+                                V6_vS32Ub_pred_pi_128B_enc;
+  def V6_vS32Ub_npred_pi_128B : T_vstore_pred_pi_128B <"vmemu", "vS32Ub_pi", 1>,
+                                V6_vS32Ub_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with immediate offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+let addrMode = PostInc in
+class T_vstore_qpred_pi <Operand ImmOp, RegisterClass RC, bit isPredNot = 0,
+                         bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins VecPredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">;
+
+let accessSize = Vector64Access in
+class T_vstore_qpred_pi_64B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_pi <s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_qpred_pi_128B <bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_qpred_pi <s3_7Imm, VectorRegs128B, isPredNot, isNT>;
+
+def V6_vS32b_qpred_pi  : T_vstore_qpred_pi_64B, V6_vS32b_qpred_pi_enc;
+def V6_vS32b_nqpred_pi : T_vstore_qpred_pi_64B <1>, V6_vS32b_nqpred_pi_enc;
+// 128B
+def V6_vS32b_qpred_pi_128B  : T_vstore_qpred_pi_128B,
+                              V6_vS32b_qpred_pi_128B_enc;
+def V6_vS32b_nqpred_pi_128B : T_vstore_qpred_pi_128B<1>,
+                              V6_vS32b_nqpred_pi_128B_enc;
+
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_qpred_pi  : T_vstore_qpred_pi_64B <0, 1>,
+                              V6_vS32b_nt_qpred_pi_enc;
+  def V6_vS32b_nt_nqpred_pi : T_vstore_qpred_pi_64B <1, 1>,
+                              V6_vS32b_nt_nqpred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_qpred_pi_128B  : T_vstore_qpred_pi_128B<0, 1>,
+                                   V6_vS32b_nt_qpred_pi_128B_enc;
+  def V6_vS32b_nt_nqpred_pi_128B : T_vstore_qpred_pi_128B<1, 1>,
+                                   V6_vS32b_nt_nqpred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with immediate offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+    isNewValue = 1, opNewValue = 4, addrMode = PostInc, isNVStore = 1 in
+class T_vstore_new_pred_pi <string baseOp, Operand ImmOp, RegisterClass RC,
+                            bit isPredNot, bit isNT>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins PredRegs:$src1, IntRegs:$src2, ImmOp:$src3, RC:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++#$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new", [],
+    "$src2 = $_dst_"> , NewValueRel {
+  let isPredicatedFalse = isPredNot;
+  let BaseOpcode = baseOp;
+}
+
+let accessSize = Vector64Access in
+class T_vstore_new_pred_pi_64B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_pi <baseOp, s3_6Imm, VectorRegs, isPredNot, isNT>;
+
+let isCodeGenOnly = 1, accessSize = Vector128Access in
+class T_vstore_new_pred_pi_128B <string baseOp, bit isPredNot = 0, bit isNT = 0>
+  : T_vstore_new_pred_pi <baseOp#"128B", s3_7Imm, VectorRegs128B,
+                          isPredNot, isNT>;
+
+def V6_vS32b_new_pred_pi     : T_vstore_new_pred_pi_64B <"vS32b_pi">,
+                               V6_vS32b_new_pred_pi_enc;
+def V6_vS32b_new_npred_pi    : T_vstore_new_pred_pi_64B <"vS32b_pi", 1>,
+                               V6_vS32b_new_npred_pi_enc;
+// 128B
+def V6_vS32b_new_pred_pi_128B    : T_vstore_new_pred_pi_128B <"vS32b_pi">,
+                                   V6_vS32b_new_pred_pi_128B_enc;
+def V6_vS32b_new_npred_pi_128B   : T_vstore_new_pred_pi_128B <"vS32b_pi", 1>,
+                                   V6_vS32b_new_npred_pi_128B_enc;
+let isNonTemporal = 1 in {
+  def V6_vS32b_nt_new_pred_pi  : T_vstore_new_pred_pi_64B <"vS32b_pi", 0, 1>,
+                                 V6_vS32b_nt_new_pred_pi_enc;
+  def V6_vS32b_nt_new_npred_pi : T_vstore_new_pred_pi_64B <"vS32b_pi", 1, 1>,
+                                 V6_vS32b_nt_new_npred_pi_enc;
+  // 128B
+  def V6_vS32b_nt_new_pred_pi_128B : T_vstore_new_pred_pi_128B
+                                     <"vS32b_pi", 0, 1>,
+                                     V6_vS32b_nt_new_pred_pi_128B_enc;
+  def V6_vS32b_nt_new_npred_pi_128B : T_vstore_new_pred_pi_128B
+                                      <"vS32b_pi", 1, 1>,
+                                      V6_vS32b_nt_new_npred_pi_128B_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector loads with register offset
+//===----------------------------------------------------------------------===//
+let hasNewValue = 1 in
+class T_vload_ppu<string asmStr>
+  : V6_LDInst <(outs VectorRegs:$dst, IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2), asmStr, [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let isCVLoadable = 1 in {
+  def V6_vL32b_ppu    : T_vload_ppu <"$dst = vmem($src1++$src2)">,
+                        V6_vL32b_ppu_enc;
+  def V6_vL32b_nt_ppu : T_vload_ppu <"$dst = vmem($src1++$src2):nt">,
+                        V6_vL32b_nt_ppu_enc;
+}
+
+let Itinerary = CVI_VM_VP_LDU, Type = TypeCVI_VM_VP_LDU in
+def V6_vL32Ub_ppu : T_vload_ppu <"$dst = vmemu($src1++$src2)">,
+                     V6_vL32Ub_ppu_enc;
+
+let isCVLoad = 1, Itinerary = CVI_VM_CUR_LD, Type = TypeCVI_VM_CUR_LD in {
+  def V6_vL32b_cur_ppu    : T_vload_ppu <"$dst.cur = vmem($src1++$src2)">,
+                             V6_vL32b_cur_ppu_enc;
+  def V6_vL32b_nt_cur_ppu : T_vload_ppu <"$dst.cur = vmem($src1++$src2):nt">,
+                             V6_vL32b_nt_cur_ppu_enc;
+}
+
+let Itinerary = CVI_VM_TMP_LD, Type = TypeCVI_VM_TMP_LD in {
+  def V6_vL32b_tmp_ppu    : T_vload_ppu <"$dst.tmp = vmem($src1++$src2)">,
+                             V6_vL32b_tmp_ppu_enc;
+  def V6_vL32b_nt_tmp_ppu : T_vload_ppu <"$dst.tmp = vmem($src1++$src2):nt">,
+                             V6_vL32b_nt_tmp_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset
+//===----------------------------------------------------------------------===//
+class T_vstore_ppu <string mnemonic, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+    mnemonic#"($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_ppu    : T_vstore_ppu <"vmem">,
+                        V6_vS32b_ppu_enc;
+  let isNonTemporal = 1, BaseOpcode = "vS32b_ppu" in
+  def V6_vS32b_nt_ppu : T_vstore_ppu <"vmem", 1>,
+                        V6_vS32b_nt_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU, Type = TypeCVI_VM_STU in
+def V6_vS32Ub_ppu   : T_vstore_ppu <"vmemu">, V6_vS32Ub_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isNewValue = 1,
+    opNewValue = 3, isNVStore = 1 in
+class T_vstore_new_ppu <bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+               (ins IntRegs:$src1, ModRegs:$src2, VectorRegs:$src3),
+    "vmem($src1++$src2)"#!if(isNT, ":nt", "")#" = $src3.new", [],
+    "$src1 = $_dst_">, NewValueRel;
+
+let BaseOpcode = "vS32b_ppu" in
+def V6_vS32b_new_ppu    : T_vstore_new_ppu, V6_vS32b_new_ppu_enc;
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in
+def V6_vS32b_nt_new_ppu : T_vstore_new_ppu<1>, V6_vS32b_nt_new_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let isPredicated = 1 in
+class T_vstore_pred_ppu <string mnemonic, bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst<(outs IntRegs:$_dst_),
+           (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) "#mnemonic#"($src2++$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_pred_ppu : T_vstore_pred_ppu<"vmem">, V6_vS32b_pred_ppu_enc;
+  def V6_vS32b_npred_ppu: T_vstore_pred_ppu<"vmem", 1>, V6_vS32b_npred_ppu_enc;
+}
+
+let isNVStorable = 1, BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+  def V6_vS32b_nt_pred_ppu  : T_vstore_pred_ppu <"vmem", 0, 1>,
+                              V6_vS32b_nt_pred_ppu_enc;
+  def V6_vS32b_nt_npred_ppu : T_vstore_pred_ppu <"vmem", 1, 1>,
+                              V6_vS32b_nt_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32Ub_ppu", Itinerary = CVI_VM_STU,
+    Type = TypeCVI_VM_STU in {
+  def V6_vS32Ub_pred_ppu  : T_vstore_pred_ppu <"vmemu">,
+                            V6_vS32Ub_pred_ppu_enc;
+  def V6_vS32Ub_npred_ppu : T_vstore_pred_ppu <"vmemu", 1>,
+                            V6_vS32Ub_npred_ppu_enc;
+}
+
+//===----------------------------------------------------------------------===//
+// Post increment vector stores with register offset - byte-enabled aligned
+//===----------------------------------------------------------------------===//
+class T_vstore_qpred_ppu <bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+        (ins VecPredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if ("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+          #!if(isNT, ":nt", "")#" = $src4", [],
+    "$src2 = $_dst_">, NewValueRel;
+
+def V6_vS32b_qpred_ppu  : T_vstore_qpred_ppu, V6_vS32b_qpred_ppu_enc;
+def V6_vS32b_nqpred_ppu : T_vstore_qpred_ppu<1>, V6_vS32b_nqpred_ppu_enc;
+def V6_vS32b_nt_qpred_ppu  : T_vstore_qpred_ppu<0, 1>,
+                             V6_vS32b_nt_qpred_ppu_enc;
+def V6_vS32b_nt_nqpred_ppu : T_vstore_qpred_ppu<1, 1>,
+                             V6_vS32b_nt_nqpred_ppu_enc;
+
+//===----------------------------------------------------------------------===//
+// Post increment conditional .new vector stores with register offset
+//===----------------------------------------------------------------------===//
+let Itinerary = CVI_VM_NEW_ST, Type = TypeCVI_VM_NEW_ST, isPredicated = 1,
+    isNewValue = 1, opNewValue = 4, isNVStore = 1 in
+class T_vstore_new_pred_ppu <bit isPredNot = 0, bit isNT = 0>
+  : V6_STInst <(outs IntRegs:$_dst_),
+           (ins PredRegs:$src1, IntRegs:$src2, ModRegs:$src3, VectorRegs:$src4),
+    "if("#!if(isPredNot, "!", "")#"$src1) vmem($src2++$src3)"
+         #!if(isNT, ":nt", "")#" = $src4.new", [],
+    "$src2 = $_dst_">, NewValueRel {
+  let isPredicatedFalse = isPredNot;
+}
+
+let BaseOpcode = "vS32b_ppu" in {
+  def V6_vS32b_new_pred_ppu  : T_vstore_new_pred_ppu,
+                               V6_vS32b_new_pred_ppu_enc;
+  def V6_vS32b_new_npred_ppu : T_vstore_new_pred_ppu<1>,
+                               V6_vS32b_new_npred_ppu_enc;
+}
+
+let BaseOpcode = "vS32b_ppu", isNonTemporal = 1 in {
+def V6_vS32b_nt_new_pred_ppu :  T_vstore_new_pred_ppu<0, 1>,
+                                V6_vS32b_nt_new_pred_ppu_enc;
+def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
+                                V6_vS32b_nt_new_npred_ppu_enc;
+}
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
+        VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
+                #mnemonic#"($addr+#$off) = $src", []>;
+
+def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
+                    Requires<[HasV60T, UseHVXSgl]>;
+def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
+                         Requires<[HasV60T, UseHVXDbl]>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+            (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+            (STrivv_indexed_128B IntRegs:$addr, #0,
+                                 (VTDbl VecDblRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned stores
+  def : Pat<(store (VTSgl VectorRegs:$src1), IntRegs:$addr),
+            (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  // 128B Aligned stores
+  def : Pat<(store (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+            (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+
+  // Fold Add R+IFF into vector store.
+  let AddedComplexity = 10 in
+  def : Pat<(store (VTSgl VectorRegs:$src1),
+                   (add IntRegs:$src2, s4_6ImmPred:$offset)),
+            (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+                         (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  // Fold Add R+IFF into vector store 128B.
+  let AddedComplexity = 10 in
+  def : Pat<(store (VTDbl VectorRegs128B:$src1),
+                   (add IntRegs:$src2, s4_7ImmPred:$offset)),
+            (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+                              (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+}
+
+defm : vS32b_ai_pats <v64i8,  v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64,  v16i64>;
+
+let isPseudo = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
+  : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
+               "$dst="#mnemonic#"($addr+#$off)",
+               []>,
+               Requires<[HasV60T,UseHVXSgl]>;
+
+def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
+def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat < (VTSgl (load IntRegs:$addr)),
+              (LDrivv_indexed IntRegs:$addr, #0) >,
+              Requires<[UseHVXSgl]>;
+
+  def : Pat < (VTDbl (load IntRegs:$addr)),
+              (LDrivv_indexed_128B IntRegs:$addr, #0) >,
+              Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned loads
+  def : Pat < (VTSgl (load IntRegs:$addr)),
+              (V6_vL32b_ai IntRegs:$addr, #0) >,
+              Requires<[UseHVXSgl]>;
+
+  // 128B Load
+  def : Pat < (VTDbl (load IntRegs:$addr)),
+              (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
+              Requires<[UseHVXDbl]>;
+
+  // Fold Add R+IFF into vector load.
+  let AddedComplexity = 10 in
+  def : Pat<(VTDbl (load (add IntRegs:$src2, s4_7ImmPred:$offset))),
+            (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+             Requires<[UseHVXDbl]>;
+
+  let AddedComplexity = 10 in
+  def : Pat<(VTSgl (load (add IntRegs:$src2, s4_6ImmPred:$offset))),
+            (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+            Requires<[UseHVXSgl]>;
+}
+
+defm : vL32b_ai_pats <v64i8,  v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64,  v16i64>;
+
+// Store vector predicate pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriq_pred_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_vec_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+
+def STriq_pred_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+
+def STriq_pred_vec_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector predicate pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Store vector pseudo.
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STriv_pseudo_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def STriv_pseudo_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
+    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def STrivv_pseudo_V6 : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def STrivv_pseudo_V6_128B : STInst<(outs),
+            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+// Load vector pseudo.
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
+    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
+            (ins IntRegs:$base, s32Imm:$offset),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXDbl]>;
+}
+
+class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+              string cstr = "", InstrItinClass itin = CVI_VA_DV,
+              IType type = TypeCVI_VA_DV>
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
+
+let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
+def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
+            (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+            ".error \"should not emit\" ",
+            []>,
+            Requires<[HasV60T,UseHVXSgl]>;
+def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
+               (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
+               ".error \"should not emit\" ",
+               []>,
+               Requires<[HasV60T,UseHVXSgl]>;
+}
+
+def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
+                             (v16i32 VectorRegs:$tval),
+                             (v16i32 VectorRegs:$fval), SETEQ)),
+      (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
+                                (i32 IntRegs:$rhs))),
+                                (v16i32 VectorRegs:$tval),
+                                (v16i32 VectorRegs:$fval)))>;
+
+
+let hasNewValue = 1 in
+class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+    asmString >;
+
+multiclass T_vmpy <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_vmpy <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_vmpy <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                      !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_vmpy_VV <string asmString>:
+  T_vmpy <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_vmpy_WW <string asmString>:
+  T_vmpy <asmString, VecDblRegs, VecDblRegs>;
+
+multiclass T_vmpy_VW <string asmString>:
+  T_vmpy <asmString, VectorRegs, VecDblRegs>;
+
+multiclass T_vmpy_WV <string asmString>:
+  T_vmpy <asmString, VecDblRegs, VectorRegs>;
+
+defm V6_vtmpyb   :T_vmpy_WW<"$dst.h = vtmpy($src1.b,$src2.b)">, V6_vtmpyb_enc;
+defm V6_vtmpybus :T_vmpy_WW<"$dst.h = vtmpy($src1.ub,$src2.b)">, V6_vtmpybus_enc;
+defm V6_vdsaduh  :T_vmpy_WW<"$dst.uw = vdsad($src1.uh,$src2.uh)">, V6_vdsaduh_enc;
+defm V6_vmpybus  :T_vmpy_WV<"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybus_enc;
+defm V6_vmpabus  :T_vmpy_WW<"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabus_enc;
+defm V6_vmpahb   :T_vmpy_WW<"$dst.w = vmpa($src1.h,$src2.b)">, V6_vmpahb_enc;
+defm V6_vmpyh    :T_vmpy_WV<"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyh_enc;
+defm V6_vmpyuh   :T_vmpy_WV<"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuh_enc;
+defm V6_vmpyiwh  :T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_enc;
+defm V6_vtmpyhb  :T_vmpy_WW<"$dst.w = vtmpy($src1.h,$src2.b)">, V6_vtmpyhb_enc;
+defm V6_vmpyub   :T_vmpy_WV<"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyub_enc;
+
+let Itinerary = CVI_VX_LONG, Type = TypeCVI_VX in
+defm V6_vmpyihb  :T_vmpy_VV<"$dst.h = vmpyi($src1.h,$src2.b)">, V6_vmpyihb_enc;
+
+defm V6_vdmpybus_dv :
+     T_vmpy_WW <"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_dv_enc;
+defm V6_vdmpyhsusat :
+     T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.uh):sat">, V6_vdmpyhsusat_enc;
+defm V6_vdmpyhsuisat :
+     T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.uh,#1):sat">, V6_vdmpyhsuisat_enc;
+defm V6_vdmpyhsat :
+     T_vmpy_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhsat_enc;
+defm V6_vdmpyhisat :
+     T_vmpy_VW <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhisat_enc;
+defm V6_vdmpyhb_dv :
+     T_vmpy_WW <"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_dv_enc;
+defm V6_vmpyhss :
+     T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:sat">, V6_vmpyhss_enc;
+defm V6_vmpyhsrs :
+     T_vmpy_VV <"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhsrs_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in
+defm V6_vror : T_vmpy_VV <"$dst = vror($src1,$src2)">, V6_vror_enc;
+
+let Itinerary = CVI_VX, Type = TypeCVI_VX in {
+defm V6_vdmpyhb  : T_vmpy_VV<"$dst.w = vdmpy($src1.h,$src2.b)">, V6_vdmpyhb_enc;
+defm V6_vrmpybus : T_vmpy_VV<"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybus_enc;
+defm V6_vdmpybus : T_vmpy_VV<"$dst.h = vdmpy($src1.ub,$src2.b)">, V6_vdmpybus_enc;
+defm V6_vmpyiwb  : T_vmpy_VV<"$dst.w = vmpyi($src1.w,$src2.b)">, V6_vmpyiwb_enc;
+defm V6_vrmpyub : T_vmpy_VV<"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyub_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrw  : T_vmpy_VV <"$dst.w = vasr($src1.w,$src2)">, V6_vasrw_enc;
+defm V6_vasrh  : T_vmpy_VV <"$dst.h = vasr($src1.h,$src2)">, V6_vasrh_enc;
+defm V6_vaslw  : T_vmpy_VV <"$dst.w = vasl($src1.w,$src2)">, V6_vaslw_enc;
+defm V6_vaslh  : T_vmpy_VV <"$dst.h = vasl($src1.h,$src2)">, V6_vaslh_enc;
+defm V6_vlsrw  : T_vmpy_VV <"$dst.uw = vlsr($src1.uw,$src2)">, V6_vlsrw_enc;
+defm V6_vlsrh  : T_vmpy_VV <"$dst.uh = vlsr($src1.uh,$src2)">, V6_vlsrh_enc;
+}
+
+let hasNewValue = 1 in
+class T_HVX_alu <string asmString, InstrItinClass itin,
+                 RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+    asmString >{
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu <string asmString, RegisterClass RCout,
+           RegisterClass RCin, InstrItinClass itin> {
+  def NAME : T_HVX_alu <asmString, itin, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu <asmString, itin,
+                              !cast<RegisterClass>(RCout#"128B"),
+                              !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_alu_VV <string asmString>:
+  T_HVX_alu <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_WW <string asmString>:
+  T_HVX_alu <asmString, VecDblRegs, VecDblRegs, CVI_VA_DV>;
+
+multiclass T_HVX_alu_WV <string asmString>:
+  T_HVX_alu <asmString, VecDblRegs, VectorRegs, CVI_VX_DV>;
+
+
+let Itinerary  =  CVI_VX, Type  =  TypeCVI_VX in {
+defm V6_vrmpyubv :
+     T_HVX_alu_VV <"$dst.uw = vrmpy($src1.ub,$src2.ub)">, V6_vrmpyubv_enc;
+defm V6_vrmpybv :
+     T_HVX_alu_VV <"$dst.w = vrmpy($src1.b,$src2.b)">, V6_vrmpybv_enc;
+defm V6_vrmpybusv :
+     T_HVX_alu_VV <"$dst.w = vrmpy($src1.ub,$src2.b)">, V6_vrmpybusv_enc;
+defm V6_vabsdiffub :
+     T_HVX_alu_VV <"$dst.ub = vabsdiff($src1.ub,$src2.ub)">, V6_vabsdiffub_enc;
+defm V6_vabsdiffh :
+     T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.h,$src2.h)">, V6_vabsdiffh_enc;
+defm V6_vabsdiffuh :
+     T_HVX_alu_VV <"$dst.uh = vabsdiff($src1.uh,$src2.uh)">, V6_vabsdiffuh_enc;
+defm V6_vabsdiffw :
+     T_HVX_alu_VV <"$dst.uw = vabsdiff($src1.w,$src2.w)">, V6_vabsdiffw_enc;
+}
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhvsat :
+     T_HVX_alu_VV <"$dst.w = vdmpy($src1.h,$src2.h):sat">, V6_vdmpyhvsat_enc;
+defm V6_vmpyhvsrs :
+     T_HVX_alu_VV<"$dst.h = vmpy($src1.h,$src2.h):<<1:rnd:sat">, V6_vmpyhvsrs_enc;
+defm V6_vmpyih :
+     T_HVX_alu_VV <"$dst.h = vmpyi($src1.h,$src2.h)">, V6_vmpyih_enc;
+}
+
+defm V6_vand :
+     T_HVX_alu_VV <"$dst = vand($src1,$src2)">, V6_vand_enc;
+defm V6_vor :
+     T_HVX_alu_VV <"$dst = vor($src1,$src2)">, V6_vor_enc;
+defm V6_vxor :
+     T_HVX_alu_VV <"$dst = vxor($src1,$src2)">, V6_vxor_enc;
+defm V6_vaddw :
+     T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_enc;
+defm V6_vaddubsat :
+     T_HVX_alu_VV <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_enc;
+defm V6_vadduhsat :
+     T_HVX_alu_VV <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_enc;
+defm V6_vaddhsat :
+     T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_enc;
+defm V6_vaddwsat :
+     T_HVX_alu_VV <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_enc;
+defm V6_vsubb :
+     T_HVX_alu_VV <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_enc;
+defm V6_vsubh :
+     T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_enc;
+defm V6_vsubw :
+     T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_enc;
+defm V6_vsububsat :
+     T_HVX_alu_VV <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_enc;
+defm V6_vsubuhsat :
+     T_HVX_alu_VV <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_enc;
+defm V6_vsubhsat :
+     T_HVX_alu_VV <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_enc;
+defm V6_vsubwsat :
+     T_HVX_alu_VV <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_enc;
+defm V6_vavgub :
+     T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub)">, V6_vavgub_enc;
+defm V6_vavguh :
+     T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh)">, V6_vavguh_enc;
+defm V6_vavgh :
+     T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h)">, V6_vavgh_enc;
+defm V6_vavgw :
+     T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w)">, V6_vavgw_enc;
+defm V6_vnavgub :
+     T_HVX_alu_VV <"$dst.b = vnavg($src1.ub,$src2.ub)">, V6_vnavgub_enc;
+defm V6_vnavgh :
+     T_HVX_alu_VV <"$dst.h = vnavg($src1.h,$src2.h)">, V6_vnavgh_enc;
+defm V6_vnavgw :
+     T_HVX_alu_VV <"$dst.w = vnavg($src1.w,$src2.w)">, V6_vnavgw_enc;
+defm V6_vavgubrnd :
+     T_HVX_alu_VV <"$dst.ub = vavg($src1.ub,$src2.ub):rnd">, V6_vavgubrnd_enc;
+defm V6_vavguhrnd :
+     T_HVX_alu_VV <"$dst.uh = vavg($src1.uh,$src2.uh):rnd">, V6_vavguhrnd_enc;
+defm V6_vavghrnd :
+     T_HVX_alu_VV <"$dst.h = vavg($src1.h,$src2.h):rnd">, V6_vavghrnd_enc;
+defm V6_vavgwrnd :
+     T_HVX_alu_VV <"$dst.w = vavg($src1.w,$src2.w):rnd">, V6_vavgwrnd_enc;
+
+defm V6_vmpybv :
+     T_HVX_alu_WV <"$dst.h = vmpy($src1.b,$src2.b)">, V6_vmpybv_enc;
+defm V6_vmpyubv :
+     T_HVX_alu_WV <"$dst.uh = vmpy($src1.ub,$src2.ub)">, V6_vmpyubv_enc;
+defm V6_vmpybusv :
+     T_HVX_alu_WV <"$dst.h = vmpy($src1.ub,$src2.b)">, V6_vmpybusv_enc;
+defm V6_vmpyhv :
+     T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.h)">, V6_vmpyhv_enc;
+defm V6_vmpyuhv :
+     T_HVX_alu_WV <"$dst.uw = vmpy($src1.uh,$src2.uh)">, V6_vmpyuhv_enc;
+defm V6_vmpyhus :
+     T_HVX_alu_WV <"$dst.w = vmpy($src1.h,$src2.uh)">, V6_vmpyhus_enc;
+defm V6_vaddubh :
+     T_HVX_alu_WV <"$dst.h = vadd($src1.ub,$src2.ub)">, V6_vaddubh_enc;
+defm V6_vadduhw :
+     T_HVX_alu_WV <"$dst.w = vadd($src1.uh,$src2.uh)">, V6_vadduhw_enc;
+defm V6_vaddhw :
+     T_HVX_alu_WV <"$dst.w = vadd($src1.h,$src2.h)">, V6_vaddhw_enc;
+defm V6_vsububh :
+     T_HVX_alu_WV <"$dst.h = vsub($src1.ub,$src2.ub)">, V6_vsububh_enc;
+defm V6_vsubuhw :
+     T_HVX_alu_WV <"$dst.w = vsub($src1.uh,$src2.uh)">, V6_vsubuhw_enc;
+defm V6_vsubhw :
+     T_HVX_alu_WV <"$dst.w = vsub($src1.h,$src2.h)">, V6_vsubhw_enc;
+
+defm V6_vaddb_dv :
+     T_HVX_alu_WW <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_dv_enc;
+defm V6_vaddh_dv :
+     T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_dv_enc;
+defm V6_vaddw_dv :
+     T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w)">, V6_vaddw_dv_enc;
+defm V6_vaddubsat_dv :
+     T_HVX_alu_WW <"$dst.ub = vadd($src1.ub,$src2.ub):sat">, V6_vaddubsat_dv_enc;
+defm V6_vadduhsat_dv :
+     T_HVX_alu_WW <"$dst.uh = vadd($src1.uh,$src2.uh):sat">, V6_vadduhsat_dv_enc;
+defm V6_vaddhsat_dv :
+     T_HVX_alu_WW <"$dst.h = vadd($src1.h,$src2.h):sat">, V6_vaddhsat_dv_enc;
+defm V6_vaddwsat_dv :
+     T_HVX_alu_WW <"$dst.w = vadd($src1.w,$src2.w):sat">, V6_vaddwsat_dv_enc;
+defm V6_vsubb_dv :
+     T_HVX_alu_WW <"$dst.b = vsub($src1.b,$src2.b)">, V6_vsubb_dv_enc;
+defm V6_vsubh_dv :
+     T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h)">, V6_vsubh_dv_enc;
+defm V6_vsubw_dv :
+     T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w)">, V6_vsubw_dv_enc;
+defm V6_vsububsat_dv :
+     T_HVX_alu_WW <"$dst.ub = vsub($src1.ub,$src2.ub):sat">, V6_vsububsat_dv_enc;
+defm V6_vsubuhsat_dv :
+     T_HVX_alu_WW <"$dst.uh = vsub($src1.uh,$src2.uh):sat">, V6_vsubuhsat_dv_enc;
+defm V6_vsubhsat_dv :
+     T_HVX_alu_WW <"$dst.h = vsub($src1.h,$src2.h):sat">, V6_vsubhsat_dv_enc;
+defm V6_vsubwsat_dv :
+     T_HVX_alu_WW <"$dst.w = vsub($src1.w,$src2.w):sat">, V6_vsubwsat_dv_enc;
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV in {
+defm V6_vmpabusv :
+     T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.b)">, V6_vmpabusv_enc;
+defm V6_vmpabuuv :
+     T_HVX_alu_WW <"$dst.h = vmpa($src1.ub,$src2.ub)">, V6_vmpabuuv_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1 in
+class T_HVX_vmpyacc <string asmString, InstrItinClass itin, RegisterClass RCout,
+                     RegisterClass RCin1, RegisterClass RCin2>
+  : CVI_VA_Resource1 <(outs RCout:$dst),
+                      (ins RCout:$_src_, RCin1:$src1, RCin2:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_vmpyacc_both <string asmString, RegisterClass RCout,
+           RegisterClass RCin1, RegisterClass RCin2, InstrItinClass itin > {
+  def NAME : T_HVX_vmpyacc <asmString, itin, RCout, RCin1, RCin2>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpyacc <asmString, itin,
+                   !cast<RegisterClass>(RCout#"128B"),
+                   !cast<RegisterClass>(RCin1#"128B"),
+                   !cast<RegisterClass>(RCin2#
+                   !if(!eq (!cast<string>(RCin2), "IntRegs"), "", "128B"))>;
+}
+
+multiclass T_HVX_vmpyacc_VVR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, IntRegs, CVI_VX>;
+
+multiclass T_HVX_vmpyacc_VWR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WWR <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VecDblRegs, IntRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_VVV <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VectorRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+multiclass T_HVX_vmpyacc_WVV <string asmString>:
+  T_HVX_vmpyacc_both <asmString, VecDblRegs, VectorRegs, VectorRegs, CVI_VX_DV>;
+
+
+defm V6_vtmpyb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.b,$src2.b)">,
+     V6_vtmpyb_acc_enc;
+defm V6_vtmpybus_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vtmpy($src1.ub,$src2.b)">,
+     V6_vtmpybus_acc_enc;
+defm V6_vtmpyhb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vtmpy($src1.h,$src2.b)">,
+     V6_vtmpyhb_acc_enc;
+defm V6_vdmpyhb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+     V6_vdmpyhb_acc_enc;
+defm V6_vrmpyub_acc :
+     T_HVX_vmpyacc_VVR <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+     V6_vrmpyub_acc_enc;
+defm V6_vrmpybus_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+     V6_vrmpybus_acc_enc;
+defm V6_vdmpybus_acc :
+     T_HVX_vmpyacc_VVR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+     V6_vdmpybus_acc_enc;
+defm V6_vdmpybus_dv_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vdmpy($src1.ub,$src2.b)">,
+     V6_vdmpybus_dv_acc_enc;
+defm V6_vdmpyhsuisat_acc :
+     T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.uh,#1):sat">,
+     V6_vdmpyhsuisat_acc_enc;
+defm V6_vdmpyhisat_acc :
+     T_HVX_vmpyacc_VWR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhisat_acc_enc;
+defm V6_vdmpyhb_dv_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vdmpy($src1.h,$src2.b)">,
+     V6_vdmpyhb_dv_acc_enc;
+defm V6_vmpybus_acc :
+     T_HVX_vmpyacc_WVR <"$dst.h += vmpy($src1.ub,$src2.b)">,
+     V6_vmpybus_acc_enc;
+defm V6_vmpabus_acc :
+     T_HVX_vmpyacc_WWR <"$dst.h += vmpa($src1.ub,$src2.b)">,
+     V6_vmpabus_acc_enc;
+defm V6_vmpahb_acc :
+     T_HVX_vmpyacc_WWR <"$dst.w += vmpa($src1.h,$src2.b)">,
+     V6_vmpahb_acc_enc;
+defm V6_vmpyhsat_acc :
+     T_HVX_vmpyacc_WVR <"$dst.w += vmpy($src1.h,$src2.h):sat">,
+     V6_vmpyhsat_acc_enc;
+defm V6_vmpyuh_acc :
+     T_HVX_vmpyacc_WVR <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+     V6_vmpyuh_acc_enc;
+defm V6_vmpyiwb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vmpyi($src1.w,$src2.b)">,
+     V6_vmpyiwb_acc_enc;
+defm V6_vdsaduh_acc :
+     T_HVX_vmpyacc_WWR <"$dst.uw += vdsad($src1.uh,$src2.uh)">,
+     V6_vdsaduh_acc_enc;
+defm V6_vmpyihb_acc :
+     T_HVX_vmpyacc_VVR <"$dst.h += vmpyi($src1.h,$src2.b)">,
+     V6_vmpyihb_acc_enc;
+defm V6_vmpyub_acc :
+     T_HVX_vmpyacc_WVR <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+     V6_vmpyub_acc_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vdmpyhsusat_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.uh):sat">,
+     V6_vdmpyhsusat_acc_enc;
+defm V6_vdmpyhsat_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhsat_acc_enc;
+defm V6_vmpyiwh_acc : T_HVX_vmpyacc_VVR
+     <"$dst.w += vmpyi($src1.w,$src2.h)">, V6_vmpyiwh_acc_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vaslw_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vasl($src1.w,$src2)">, V6_vaslw_acc_enc;
+defm V6_vasrw_acc :
+     T_HVX_vmpyacc_VVR <"$dst.w += vasr($src1.w,$src2)">, V6_vasrw_acc_enc;
+}
+
+defm V6_vdmpyhvsat_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vdmpy($src1.h,$src2.h):sat">,
+     V6_vdmpyhvsat_acc_enc;
+defm V6_vmpybusv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.ub,$src2.b)">,
+     V6_vmpybusv_acc_enc;
+defm V6_vmpybv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.h += vmpy($src1.b,$src2.b)">, V6_vmpybv_acc_enc;
+defm V6_vmpyhus_acc :
+     T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.uh)">, V6_vmpyhus_acc_enc;
+defm V6_vmpyhv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.w += vmpy($src1.h,$src2.h)">, V6_vmpyhv_acc_enc;
+defm V6_vmpyiewh_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.h)">,
+     V6_vmpyiewh_acc_enc;
+defm V6_vmpyiewuh_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyie($src1.w,$src2.uh)">,
+     V6_vmpyiewuh_acc_enc;
+defm V6_vmpyih_acc :
+     T_HVX_vmpyacc_VVV <"$dst.h += vmpyi($src1.h,$src2.h)">, V6_vmpyih_acc_enc;
+defm V6_vmpyowh_rnd_sacc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:rnd:sat:shift">,
+     V6_vmpyowh_rnd_sacc_enc;
+defm V6_vmpyowh_sacc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vmpyo($src1.w,$src2.h):<<1:sat:shift">,
+     V6_vmpyowh_sacc_enc;
+defm V6_vmpyubv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.uh += vmpy($src1.ub,$src2.ub)">,
+     V6_vmpyubv_acc_enc;
+defm V6_vmpyuhv_acc :
+     T_HVX_vmpyacc_WVV <"$dst.uw += vmpy($src1.uh,$src2.uh)">,
+     V6_vmpyuhv_acc_enc;
+defm V6_vrmpybusv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.ub,$src2.b)">,
+     V6_vrmpybusv_acc_enc;
+defm V6_vrmpybv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.w += vrmpy($src1.b,$src2.b)">, V6_vrmpybv_acc_enc;
+defm V6_vrmpyubv_acc :
+     T_HVX_vmpyacc_VVV <"$dst.uw += vrmpy($src1.ub,$src2.ub)">,
+     V6_vrmpyubv_acc_enc;
+
+
+class T_HVX_vcmp <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst),
+                      (ins RCout:$_src_, RCin:$src1, RCin:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = CVI_VA;
+  let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_vcmp <string asmString> {
+  def NAME : T_HVX_vcmp <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vcmp <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.b,$src2.b)">, V6_veqb_and_enc;
+defm V6_veqh_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.h,$src2.h)">, V6_veqh_and_enc;
+defm V6_veqw_and :
+     T_HVX_vcmp <"$dst &= vcmp.eq($src1.w,$src2.w)">, V6_veqw_and_enc;
+defm V6_vgtb_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_and_enc;
+defm V6_vgth_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.h,$src2.h)">, V6_vgth_and_enc;
+defm V6_vgtw_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_and_enc;
+defm V6_vgtub_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_and_enc;
+defm V6_vgtuh_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_and_enc;
+defm V6_vgtuw_and :
+     T_HVX_vcmp <"$dst &= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_and_enc;
+defm V6_veqb_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.b,$src2.b)">, V6_veqb_or_enc;
+defm V6_veqh_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.h,$src2.h)">, V6_veqh_or_enc;
+defm V6_veqw_or :
+     T_HVX_vcmp <"$dst |= vcmp.eq($src1.w,$src2.w)">, V6_veqw_or_enc;
+defm V6_vgtb_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_or_enc;
+defm V6_vgth_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.h,$src2.h)">, V6_vgth_or_enc;
+defm V6_vgtw_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_or_enc;
+defm V6_vgtub_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_or_enc;
+defm V6_vgtuh_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_or_enc;
+defm V6_vgtuw_or :
+     T_HVX_vcmp <"$dst |= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_or_enc;
+defm V6_veqb_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.b,$src2.b)">, V6_veqb_xor_enc;
+defm V6_veqh_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.h,$src2.h)">, V6_veqh_xor_enc;
+defm V6_veqw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.eq($src1.w,$src2.w)">, V6_veqw_xor_enc;
+defm V6_vgtb_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.b,$src2.b)">, V6_vgtb_xor_enc;
+defm V6_vgth_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.h,$src2.h)">, V6_vgth_xor_enc;
+defm V6_vgtw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.w,$src2.w)">, V6_vgtw_xor_enc;
+defm V6_vgtub_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_xor_enc;
+defm V6_vgtuh_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_xor_enc;
+defm V6_vgtuw_xor :
+     T_HVX_vcmp <"$dst ^= vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_xor_enc;
+
+defm V6_vminub :
+     T_HVX_alu_VV <"$dst.ub = vmin($src1.ub,$src2.ub)">, V6_vminub_enc;
+defm V6_vminuh :
+     T_HVX_alu_VV <"$dst.uh = vmin($src1.uh,$src2.uh)">, V6_vminuh_enc;
+defm V6_vminh :
+     T_HVX_alu_VV <"$dst.h = vmin($src1.h,$src2.h)">, V6_vminh_enc;
+defm V6_vminw :
+     T_HVX_alu_VV <"$dst.w = vmin($src1.w,$src2.w)">, V6_vminw_enc;
+defm V6_vmaxub :
+     T_HVX_alu_VV <"$dst.ub = vmax($src1.ub,$src2.ub)">, V6_vmaxub_enc;
+defm V6_vmaxuh :
+     T_HVX_alu_VV <"$dst.uh = vmax($src1.uh,$src2.uh)">, V6_vmaxuh_enc;
+defm V6_vmaxh :
+     T_HVX_alu_VV <"$dst.h = vmax($src1.h,$src2.h)">, V6_vmaxh_enc;
+defm V6_vmaxw :
+     T_HVX_alu_VV <"$dst.w = vmax($src1.w,$src2.w)">, V6_vmaxw_enc;
+defm V6_vshuffeb :
+     T_HVX_alu_VV <"$dst.b = vshuffe($src1.b,$src2.b)">, V6_vshuffeb_enc;
+defm V6_vshuffob :
+     T_HVX_alu_VV <"$dst.b = vshuffo($src1.b,$src2.b)">, V6_vshuffob_enc;
+defm V6_vshufeh :
+     T_HVX_alu_VV <"$dst.h = vshuffe($src1.h,$src2.h)">, V6_vshufeh_enc;
+defm V6_vshufoh :
+     T_HVX_alu_VV <"$dst.h = vshuffo($src1.h,$src2.h)">, V6_vshufoh_enc;
+
+let Itinerary = CVI_VX_DV, Type = TypeCVI_VX_DV in {
+defm V6_vmpyowh_rnd :
+     T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:rnd:sat">,
+     V6_vmpyowh_rnd_enc;
+defm V6_vmpyiewuh :
+     T_HVX_alu_VV <"$dst.w = vmpyie($src1.w,$src2.uh)">, V6_vmpyiewuh_enc;
+defm V6_vmpyewuh :
+     T_HVX_alu_VV <"$dst.w = vmpye($src1.w,$src2.uh)">, V6_vmpyewuh_enc;
+defm V6_vmpyowh :
+     T_HVX_alu_VV <"$dst.w = vmpyo($src1.w,$src2.h):<<1:sat">, V6_vmpyowh_enc;
+defm V6_vmpyiowh :
+     T_HVX_alu_VV <"$dst.w = vmpyio($src1.w,$src2.h)">, V6_vmpyiowh_enc;
+}
+let Itinerary = CVI_VX, Type = TypeCVI_VX in
+defm V6_vmpyieoh :
+     T_HVX_alu_VV <"$dst.w = vmpyieo($src1.h,$src2.h)">, V6_vmpyieoh_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in {
+defm V6_vshufoeh :
+     T_HVX_alu_WV <"$dst.h = vshuffoe($src1.h,$src2.h)">, V6_vshufoeh_enc;
+defm V6_vshufoeb :
+     T_HVX_alu_WV <"$dst.b = vshuffoe($src1.b,$src2.b)">, V6_vshufoeb_enc;
+}
+
+let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+defm V6_vcombine :
+     T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+      SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+                                  (v16i32 VectorRegs:$Vt))),
+         (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+                                  (v32i32 VecDblRegs:$Vt))),
+         (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+
+let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
+defm V6_vsathub :
+     T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
+defm V6_vsatwh :
+     T_HVX_alu_VV <"$dst.h = vsat($src1.w,$src2.w)">, V6_vsatwh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vroundwh :
+     T_HVX_alu_VV <"$dst.h = vround($src1.w,$src2.w):sat">, V6_vroundwh_enc;
+defm V6_vroundwuh :
+     T_HVX_alu_VV <"$dst.uh = vround($src1.w,$src2.w):sat">, V6_vroundwuh_enc;
+defm V6_vroundhb :
+     T_HVX_alu_VV <"$dst.b = vround($src1.h,$src2.h):sat">, V6_vroundhb_enc;
+defm V6_vroundhub :
+     T_HVX_alu_VV <"$dst.ub = vround($src1.h,$src2.h):sat">, V6_vroundhub_enc;
+defm V6_vasrwv :
+     T_HVX_alu_VV <"$dst.w = vasr($src1.w,$src2.w)">, V6_vasrwv_enc;
+defm V6_vlsrwv :
+     T_HVX_alu_VV <"$dst.w = vlsr($src1.w,$src2.w)">, V6_vlsrwv_enc;
+defm V6_vlsrhv :
+     T_HVX_alu_VV <"$dst.h = vlsr($src1.h,$src2.h)">, V6_vlsrhv_enc;
+defm V6_vasrhv :
+     T_HVX_alu_VV <"$dst.h = vasr($src1.h,$src2.h)">, V6_vasrhv_enc;
+defm V6_vaslwv :
+     T_HVX_alu_VV <"$dst.w = vasl($src1.w,$src2.w)">, V6_vaslwv_enc;
+defm V6_vaslhv :
+     T_HVX_alu_VV <"$dst.h = vasl($src1.h,$src2.h)">, V6_vaslhv_enc;
+}
+
+defm V6_vaddb :
+     T_HVX_alu_VV <"$dst.b = vadd($src1.b,$src2.b)">, V6_vaddb_enc;
+defm V6_vaddh :
+     T_HVX_alu_VV <"$dst.h = vadd($src1.h,$src2.h)">, V6_vaddh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdelta :
+     T_HVX_alu_VV <"$dst = vdelta($src1,$src2)">, V6_vdelta_enc;
+defm V6_vrdelta :
+     T_HVX_alu_VV <"$dst = vrdelta($src1,$src2)">, V6_vrdelta_enc;
+defm V6_vdealb4w :
+     T_HVX_alu_VV <"$dst.b = vdeale($src1.b,$src2.b)">, V6_vdealb4w_enc;
+defm V6_vpackeb :
+     T_HVX_alu_VV <"$dst.b = vpacke($src1.h,$src2.h)">, V6_vpackeb_enc;
+defm V6_vpackeh :
+     T_HVX_alu_VV <"$dst.h = vpacke($src1.w,$src2.w)">, V6_vpackeh_enc;
+defm V6_vpackhub_sat :
+     T_HVX_alu_VV <"$dst.ub = vpack($src1.h,$src2.h):sat">, V6_vpackhub_sat_enc;
+defm V6_vpackhb_sat :
+     T_HVX_alu_VV <"$dst.b = vpack($src1.h,$src2.h):sat">, V6_vpackhb_sat_enc;
+defm V6_vpackwuh_sat :
+     T_HVX_alu_VV <"$dst.uh = vpack($src1.w,$src2.w):sat">, V6_vpackwuh_sat_enc;
+defm V6_vpackwh_sat :
+     T_HVX_alu_VV <"$dst.h = vpack($src1.w,$src2.w):sat">, V6_vpackwh_sat_enc;
+defm V6_vpackob :
+     T_HVX_alu_VV <"$dst.b = vpacko($src1.h,$src2.h)">, V6_vpackob_enc;
+defm V6_vpackoh :
+     T_HVX_alu_VV <"$dst.h = vpacko($src1.w,$src2.w)">, V6_vpackoh_enc;
+}
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_HVX_condALU <string asmString, RegisterClass RC1, RegisterClass RC2>
+  : CVI_VA_Resource1 <(outs RC2:$dst),
+                      (ins RC1:$src1, RC2:$_src_, RC2:$src2), asmString,
+                      [], "$dst = $_src_" > {
+  let Itinerary = CVI_VA;
+  let Type = TypeCVI_VA;
+}
+
+multiclass T_HVX_condALU <string asmString> {
+  def NAME : T_HVX_condALU <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_condALU <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_vaddbq  : T_HVX_condALU <"if ($src1) $dst.b += $src2.b">,
+                  V6_vaddbq_enc;
+defm V6_vaddhq  : T_HVX_condALU <"if ($src1) $dst.h += $src2.h">,
+                  V6_vaddhq_enc;
+defm V6_vaddwq  : T_HVX_condALU <"if ($src1) $dst.w += $src2.w">,
+                  V6_vaddwq_enc;
+defm V6_vsubbq  : T_HVX_condALU <"if ($src1) $dst.b -= $src2.b">,
+                  V6_vsubbq_enc;
+defm V6_vsubhq  : T_HVX_condALU <"if ($src1) $dst.h -= $src2.h">,
+                  V6_vsubhq_enc;
+defm V6_vsubwq  : T_HVX_condALU <"if ($src1) $dst.w -= $src2.w">,
+                  V6_vsubwq_enc;
+defm V6_vaddbnq : T_HVX_condALU <"if (!$src1) $dst.b += $src2.b">,
+                  V6_vaddbnq_enc;
+defm V6_vaddhnq : T_HVX_condALU <"if (!$src1) $dst.h += $src2.h">,
+                  V6_vaddhnq_enc;
+defm V6_vaddwnq : T_HVX_condALU <"if (!$src1) $dst.w += $src2.w">,
+                  V6_vaddwnq_enc;
+defm V6_vsubbnq : T_HVX_condALU <"if (!$src1) $dst.b -= $src2.b">,
+                  V6_vsubbnq_enc;
+defm V6_vsubhnq : T_HVX_condALU <"if (!$src1) $dst.h -= $src2.h">,
+                  V6_vsubhnq_enc;
+defm V6_vsubwnq : T_HVX_condALU <"if (!$src1) $dst.w -= $src2.w">,
+                  V6_vsubwnq_enc;
+
+let hasNewValue = 1 in
+class T_HVX_alu_2op <string asmString, InstrItinClass itin,
+                 RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1),
+    asmString >{
+  let Itinerary = itin;
+  let Type = !cast<IType>("Type"#itin);
+}
+
+multiclass T_HVX_alu_2op <string asmString, RegisterClass RCout,
+           RegisterClass RCin, InstrItinClass itin> {
+  def NAME : T_HVX_alu_2op <asmString, itin, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu_2op <asmString, itin,
+                              !cast<RegisterClass>(RCout#"128B"),
+                              !cast<RegisterClass>(RCin#"128B")>;
+}
+
+let hasNewValue = 1 in
+multiclass T_HVX_alu_2op_VV <string asmString>:
+  T_HVX_alu_2op <asmString, VectorRegs, VectorRegs, CVI_VA>;
+
+multiclass T_HVX_alu_2op_WV <string asmString>:
+  T_HVX_alu_2op <asmString, VecDblRegs, VectorRegs, CVI_VA_DV>;
+
+
+defm V6_vabsh     : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h)">,
+                    V6_vabsh_enc;
+defm V6_vabsw     : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w)">,
+                    V6_vabsw_enc;
+defm V6_vabsh_sat : T_HVX_alu_2op_VV <"$dst.h = vabs($src1.h):sat">,
+                    V6_vabsh_sat_enc;
+defm V6_vabsw_sat : T_HVX_alu_2op_VV <"$dst.w = vabs($src1.w):sat">,
+                    V6_vabsw_sat_enc;
+defm V6_vnot      : T_HVX_alu_2op_VV <"$dst = vnot($src1)">,
+                    V6_vnot_enc;
+defm V6_vassign   : T_HVX_alu_2op_VV <"$dst = $src1">,
+                    V6_vassign_enc;
+
+defm V6_vzb       : T_HVX_alu_2op_WV <"$dst.uh = vzxt($src1.ub)">,
+                    V6_vzb_enc;
+defm V6_vzh       : T_HVX_alu_2op_WV <"$dst.uw = vzxt($src1.uh)">,
+                    V6_vzh_enc;
+defm V6_vsb       : T_HVX_alu_2op_WV <"$dst.h = vsxt($src1.b)">,
+                    V6_vsb_enc;
+defm V6_vsh       : T_HVX_alu_2op_WV <"$dst.w = vsxt($src1.h)">,
+                    V6_vsh_enc;
+
+let Itinerary = CVI_VP, Type = TypeCVI_VP in {
+defm V6_vdealh    : T_HVX_alu_2op_VV <"$dst.h = vdeal($src1.h)">,
+                    V6_vdealh_enc;
+defm V6_vdealb    : T_HVX_alu_2op_VV <"$dst.b = vdeal($src1.b)">,
+                    V6_vdealb_enc;
+defm V6_vshuffh   : T_HVX_alu_2op_VV <"$dst.h = vshuff($src1.h)">,
+                    V6_vshuffh_enc;
+defm V6_vshuffb   : T_HVX_alu_2op_VV <"$dst.b = vshuff($src1.b)">,
+                    V6_vshuffb_enc;
+}
+
+let Itinerary = CVI_VP_VS, Type = TypeCVI_VP_VS in {
+defm V6_vunpackub : T_HVX_alu_2op_WV <"$dst.uh = vunpack($src1.ub)">,
+                    V6_vunpackub_enc;
+defm V6_vunpackuh : T_HVX_alu_2op_WV <"$dst.uw = vunpack($src1.uh)">,
+                    V6_vunpackuh_enc;
+defm V6_vunpackb  : T_HVX_alu_2op_WV <"$dst.h = vunpack($src1.b)">,
+                    V6_vunpackb_enc;
+defm V6_vunpackh  : T_HVX_alu_2op_WV <"$dst.w = vunpack($src1.h)">,
+                    V6_vunpackh_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vcl0w     : T_HVX_alu_2op_VV <"$dst.uw = vcl0($src1.uw)">,
+                    V6_vcl0w_enc;
+defm V6_vcl0h     : T_HVX_alu_2op_VV <"$dst.uh = vcl0($src1.uh)">,
+                    V6_vcl0h_enc;
+defm V6_vnormamtw : T_HVX_alu_2op_VV <"$dst.w = vnormamt($src1.w)">,
+                    V6_vnormamtw_enc;
+defm V6_vnormamth : T_HVX_alu_2op_VV <"$dst.h = vnormamt($src1.h)">,
+                    V6_vnormamth_enc;
+defm V6_vpopcounth : T_HVX_alu_2op_VV <"$dst.h = vpopcount($src1.h)">,
+                     V6_vpopcounth_enc;
+}
+
+let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
+    Type = TypeCVI_VX_DV in
+class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$dst),
+                      (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+    asmString, [], "$dst = $_src_" > ;
+
+
+multiclass T_HVX_vmpyacc2 <string asmString> {
+  def NAME : T_HVX_vmpyacc2 <asmString, VecDblRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpyacc2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi_acc :
+     T_HVX_vmpyacc2<"$dst.w += vrmpy($src1.ub,$src2.b,#$src3)">,
+     V6_vrmpybusi_acc_enc;
+defm V6_vrsadubi_acc :
+     T_HVX_vmpyacc2<"$dst.uw += vrsad($src1.ub,$src2.ub,#$src3)">,
+     V6_vrsadubi_acc_enc;
+defm V6_vrmpyubi_acc :
+     T_HVX_vmpyacc2<"$dst.uw += vrmpy($src1.ub,$src2.ub,#$src3)">,
+     V6_vrmpyubi_acc_enc;
+
+
+let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
+class T_HVX_vmpy2 <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+    asmString>;
+
+
+multiclass T_HVX_vmpy2 <string asmString> {
+  def NAME : T_HVX_vmpy2 <asmString, VecDblRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vmpy2 <asmString, VecDblRegs128B>;
+}
+
+defm V6_vrmpybusi :
+     T_HVX_vmpy2 <"$dst.w = vrmpy($src1.ub,$src2.b,#$src3)">, V6_vrmpybusi_enc;
+defm V6_vrsadubi :
+     T_HVX_vmpy2 <"$dst.uw = vrsad($src1.ub,$src2.ub,#$src3)">, V6_vrsadubi_enc;
+defm V6_vrmpyubi :
+     T_HVX_vmpy2 <"$dst.uw = vrmpy($src1.ub,$src2.ub,#$src3)">, V6_vrmpyubi_enc;
+
+
+let Itinerary = CVI_VP_VS_LONG_EARLY, Type = TypeCVI_VP_VS,
+    hasSideEffects = 0, hasNewValue2 = 1, opNewValue2 = 1 in
+class T_HVX_perm <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$_dst1_, RC:$_dst2_),
+                      (ins RC:$src1, RC:$src2, IntRegs:$src3),
+    asmString, [], "$_dst1_ = $src1, $_dst2_ = $src2" >;
+
+multiclass T_HVX_perm <string asmString> {
+  def NAME : T_HVX_perm <asmString, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_perm <asmString, VectorRegs128B>;
+}
+
+let hasNewValue = 1, opNewValue = 0, hasNewValue2 = 1, opNewValue2 = 1 in {
+  defm V6_vshuff : T_HVX_perm <"vshuff($src1,$src2,$src3)">, V6_vshuff_enc;
+  defm V6_vdeal : T_HVX_perm <"vdeal($src1,$src2,$src3)">, V6_vdeal_enc;
+}
+
+// Conditional vector move.
+let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_cmov <bit isPredNot, RegisterClass RC>
+  : CVI_VA_Resource1 <(outs RC:$dst), (ins PredRegs:$src1, RC:$src2),
+    "if ("#!if(isPredNot, "!", "")#"$src1) $dst = $src2"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_cmov <bit isPredNot = 0> {
+  def NAME : T_HVX_cmov <isPredNot, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_cmov <isPredNot, VectorRegs128B>;
+}
+
+defm V6_vcmov : T_HVX_cmov, V6_vcmov_enc;
+defm V6_vncmov : T_HVX_cmov<1>, V6_vncmov_enc;
+
+// Conditional vector combine.
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, isPredicated = 1,
+    hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
+class T_HVX_ccombine <bit isPredNot, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 < (outs RCout:$dst),
+    (ins PredRegs:$src1, RCin:$src2, RCin:$src3),
+    "if ("#!if(isPredNot, "!", "")#"$src1) $dst = vcombine($src2,$src3)"> {
+  let isPredicatedFalse = isPredNot;
+}
+
+multiclass T_HVX_ccombine <bit isPredNot = 0> {
+  def NAME : T_HVX_ccombine <isPredNot, VecDblRegs, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_ccombine <isPredNot, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vccombine : T_HVX_ccombine, V6_vccombine_enc;
+defm V6_vnccombine : T_HVX_ccombine<1>, V6_vnccombine_enc;
+
+let hasNewValue = 1 in
+class T_HVX_shift <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst),
+    (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+    asmString >;
+
+multiclass T_HVX_shift <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_HVX_shift <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_shift <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                           !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_shift_VV <string asmString>:
+  T_HVX_shift <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_shift_WV <string asmString>:
+  T_HVX_shift <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in {
+defm V6_valignb :
+     T_HVX_shift_VV <"$dst = valign($src1,$src2,$src3)">, V6_valignb_enc;
+defm V6_vlalignb :
+     T_HVX_shift_VV <"$dst = vlalign($src1,$src2,$src3)">, V6_vlalignb_enc;
+}
+
+let Itinerary = CVI_VS, Type = TypeCVI_VS in {
+defm V6_vasrwh :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3)">, V6_vasrwh_enc;
+defm V6_vasrwhsat :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):sat">,
+     V6_vasrwhsat_enc;
+defm V6_vasrwhrndsat :
+     T_HVX_shift_VV <"$dst.h = vasr($src1.w,$src2.w,$src3):rnd:sat">,
+     V6_vasrwhrndsat_enc;
+defm V6_vasrwuhsat :
+     T_HVX_shift_VV <"$dst.uh = vasr($src1.w,$src2.w,$src3):sat">,
+     V6_vasrwuhsat_enc;
+defm V6_vasrhubsat :
+     T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):sat">,
+     V6_vasrhubsat_enc;
+defm V6_vasrhubrndsat :
+     T_HVX_shift_VV <"$dst.ub = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+     V6_vasrhubrndsat_enc;
+defm V6_vasrhbrndsat :
+     T_HVX_shift_VV <"$dst.b = vasr($src1.h,$src2.h,$src3):rnd:sat">,
+     V6_vasrhbrndsat_enc;
+}
+
+// Assembler mapped -- alias?
+//defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc;
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in {
+defm V6_vshuffvdd :
+     T_HVX_shift_WV <"$dst = vshuff($src1,$src2,$src3)">, V6_vshuffvdd_enc;
+defm V6_vdealvdd :
+     T_HVX_shift_WV <"$dst = vdeal($src1,$src2,$src3)">, V6_vdealvdd_enc;
+}
+
+let hasNewValue = 1, Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in
+class T_HVX_unpack <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_DV_Resource1<(outs RCout:$dst), (ins RCout:$_src_, RCin:$src1),
+    asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_unpack <string asmString> {
+  def NAME : T_HVX_unpack <asmString, VecDblRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_unpack <asmString, VecDblRegs128B, VectorRegs128B>;
+}
+
+defm V6_vunpackob : T_HVX_unpack <"$dst.h |= vunpacko($src1.b)">, V6_vunpackob_enc;
+defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_enc;
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
+    hasSideEffects = 0 in
+class T_HVX_valign <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+    asmString>;
+
+multiclass T_HVX_valign <string asmString> {
+  def NAME : T_HVX_valign <asmString, VectorRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_valign <asmString, VectorRegs128B>;
+}
+
+defm V6_valignbi :
+     T_HVX_valign <"$dst = valign($src1,$src2,#$src3)">, V6_valignbi_enc;
+defm V6_vlalignbi :
+     T_HVX_valign <"$dst = vlalign($src1,$src2,#$src3)">, V6_vlalignbi_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
+class T_HVX_predAlu <string asmString, RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2),
+    asmString>;
+
+multiclass T_HVX_predAlu <string asmString> {
+  def NAME : T_HVX_predAlu <asmString, VecPredRegs>;
+
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_predAlu <asmString, VecPredRegs128B>;
+}
+
+defm V6_pred_and  : T_HVX_predAlu <"$dst = and($src1,$src2)">, V6_pred_and_enc;
+defm V6_pred_or   : T_HVX_predAlu <"$dst = or($src1,$src2)">, V6_pred_or_enc;
+defm V6_pred_xor  : T_HVX_predAlu <"$dst = xor($src1,$src2)">, V6_pred_xor_enc;
+defm V6_pred_or_n : T_HVX_predAlu <"$dst = or($src1,!$src2)">, V6_pred_or_n_enc;
+defm V6_pred_and_n :
+     T_HVX_predAlu <"$dst = and($src1,!$src2)">, V6_pred_and_n_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_prednot <RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1),
+    "$dst = not($src1)">, V6_pred_not_enc;
+
+def V6_pred_not : T_HVX_prednot <VecPredRegs>;
+let isCodeGenOnly =  1 in
+def V6_pred_not_128B : T_HVX_prednot <VecPredRegs128B>;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA in
+class T_HVX_vcmp2 <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1 <(outs RCout:$dst), (ins RCin:$src1, RCin:$src2),
+    asmString >;
+
+multiclass T_HVX_vcmp2 <string asmString> {
+  def NAME : T_HVX_vcmp2 <asmString, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vcmp2 <asmString, VecPredRegs128B, VectorRegs128B>;
+}
+
+defm V6_veqb : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.b,$src2.b)">, V6_veqb_enc;
+defm V6_veqh : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.h,$src2.h)">, V6_veqh_enc;
+defm V6_veqw : T_HVX_vcmp2  <"$dst = vcmp.eq($src1.w,$src2.w)">, V6_veqw_enc;
+defm V6_vgtb : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.b,$src2.b)">, V6_vgtb_enc;
+defm V6_vgth : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.h,$src2.h)">, V6_vgth_enc;
+defm V6_vgtw : T_HVX_vcmp2  <"$dst = vcmp.gt($src1.w,$src2.w)">, V6_vgtw_enc;
+defm V6_vgtub : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.ub,$src2.ub)">, V6_vgtub_enc;
+defm V6_vgtuh : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uh,$src2.uh)">, V6_vgtuh_enc;
+defm V6_vgtuw : T_HVX_vcmp2 <"$dst = vcmp.gt($src1.uw,$src2.uw)">, V6_vgtuw_enc;
+
+let isAccumulator = 1, hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_vandqrt_acc <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+    "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandqrt_acc_enc;
+
+def V6_vandqrt_acc : T_V6_vandqrt_acc <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_acc_128B : T_V6_vandqrt_acc <VectorRegs128B, VecPredRegs128B>;
+
+let isAccumulator = 1 in
+class T_V6_vandvrt_acc <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, IntRegs:$src2),
+    "$dst |= vand($src1,$src2)", [], "$dst = $_src_">, V6_vandvrt_acc_enc;
+
+def V6_vandvrt_acc : T_V6_vandvrt_acc <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_acc_128B : T_V6_vandvrt_acc <VecPredRegs128B, VectorRegs128B>;
+
+let hasNewValue =  1, hasSideEffects = 0 in
+class T_V6_vandqrt <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst),
+    (ins RCin:$src1, IntRegs:$src2),
+    "$dst = vand($src1,$src2)" >, V6_vandqrt_enc;
+
+def V6_vandqrt : T_V6_vandqrt <VectorRegs, VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandqrt_128B : T_V6_vandqrt <VectorRegs128B, VecPredRegs128B>;
+
+let hasNewValue = 1, hasSideEffects = 0 in
+class T_V6_lvsplatw <RegisterClass RC>
+  : CVI_VX_Resource_late<(outs RC:$dst), (ins IntRegs:$src1),
+    "$dst = vsplat($src1)" >, V6_lvsplatw_enc;
+
+def V6_lvsplatw : T_V6_lvsplatw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_lvsplatw_128B : T_V6_lvsplatw <VectorRegs128B>;
+
+
+let hasNewValue = 1 in
+class T_V6_vinsertwr <RegisterClass RC>
+  : CVI_VX_Resource_late<(outs RC:$dst), (ins RC:$_src_, IntRegs:$src1),
+    "$dst.w = vinsert($src1)", [], "$dst = $_src_">,
+    V6_vinsertwr_enc;
+
+def V6_vinsertwr : T_V6_vinsertwr <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vinsertwr_128B : T_V6_vinsertwr <VectorRegs128B>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP in
+class T_V6_pred_scalar2 <RegisterClass RC>
+  : CVI_VA_Resource1<(outs RC:$dst), (ins IntRegs:$src1),
+    "$dst = vsetq($src1)">, V6_pred_scalar2_enc;
+
+def V6_pred_scalar2 : T_V6_pred_scalar2 <VecPredRegs>;
+let isCodeGenOnly = 1 in
+def V6_pred_scalar2_128B : T_V6_pred_scalar2 <VecPredRegs128B>;
+
+class T_V6_vandvrt <RegisterClass RCout, RegisterClass RCin>
+  : CVI_VX_Resource_late<(outs RCout:$dst), (ins RCin:$src1, IntRegs:$src2),
+    "$dst = vand($src1,$src2)">, V6_vandvrt_enc;
+
+def V6_vandvrt : T_V6_vandvrt <VecPredRegs, VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_vandvrt_128B : T_V6_vandvrt <VecPredRegs128B, VectorRegs128B>;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
+  : SInst2 <(outs RC:$dst), (ins  RC:$src1, ImmOp:$src2), asmString>;
+
+class T_HVX_rol_R <string asmString>
+  : T_HVX_rol <asmString, IntRegs, u5Imm>;
+class T_HVX_rol_P <string asmString>
+  : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+
+def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
+let hasNewValue = 1, opNewValue = 0 in
+def S6_rol_i_r : T_HVX_rol_R <"$dst = rol($src1,#$src2)">, S6_rol_i_r_enc;
+
+let validSubTargets = HasV60SubT in
+class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
+  : SInst2 <(outs RC:$dst), (ins RC:$_src_, RC:$src1, ImmOp:$src2),
+    asmString, [], "$dst = $_src_" >;
+
+class T_HVX_rol_acc_P <string asmString>
+  : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+
+class T_HVX_rol_acc_R <string asmString>
+  : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+
+def S6_rol_i_p_nac :
+    T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
+def S6_rol_i_p_acc :
+    T_HVX_rol_acc_P <"$dst += rol($src1,#$src2)">, S6_rol_i_p_acc_enc;
+def S6_rol_i_p_and :
+    T_HVX_rol_acc_P <"$dst &= rol($src1,#$src2)">, S6_rol_i_p_and_enc;
+def S6_rol_i_p_or  :
+    T_HVX_rol_acc_P <"$dst |= rol($src1,#$src2)">, S6_rol_i_p_or_enc;
+def S6_rol_i_p_xacc :
+    T_HVX_rol_acc_P<"$dst ^= rol($src1,#$src2)">, S6_rol_i_p_xacc_enc;
+
+let hasNewValue = 1, opNewValue = 0 in {
+def S6_rol_i_r_nac :
+    T_HVX_rol_acc_R <"$dst -= rol($src1,#$src2)">, S6_rol_i_r_nac_enc;
+def S6_rol_i_r_acc :
+    T_HVX_rol_acc_R <"$dst += rol($src1,#$src2)">, S6_rol_i_r_acc_enc;
+def S6_rol_i_r_and :
+    T_HVX_rol_acc_R <"$dst &= rol($src1,#$src2)">, S6_rol_i_r_and_enc;
+def S6_rol_i_r_or :
+    T_HVX_rol_acc_R <"$dst |= rol($src1,#$src2)">, S6_rol_i_r_or_enc;
+def S6_rol_i_r_xacc :
+    T_HVX_rol_acc_R <"$dst ^= rol($src1,#$src2)">, S6_rol_i_r_xacc_enc;
+}
+
+let isSolo = 1, Itinerary = LD_tc_ld_SLOT0, Type = TypeLD in
+class T_V6_extractw <RegisterClass RC>
+  : LD1Inst <(outs IntRegs:$dst), (ins RC:$src1, IntRegs:$src2),
+    "$dst = vextract($src1,$src2)">, V6_extractw_enc;
+
+def V6_extractw : T_V6_extractw <VectorRegs>;
+let isCodeGenOnly = 1 in
+def V6_extractw_128B : T_V6_extractw <VectorRegs128B>;
+
+let Itinerary = ST_tc_st_SLOT0, validSubTargets = HasV55SubT  in
+class T_sys0op <string asmString>
+  : ST1Inst <(outs), (ins), asmString>;
+
+let isSolo = 1, validSubTargets = HasV55SubT in {
+def Y5_l2gunlock   : T_sys0op <"l2gunlock">, Y5_l2gunlock_enc;
+def Y5_l2gclean    : T_sys0op <"l2gclean">, Y5_l2gclean_enc;
+def Y5_l2gcleaninv : T_sys0op <"l2gcleaninv">, Y5_l2gcleaninv_enc;
+}
+
+class T_sys1op <string asmString, RegisterClass RC>
+  : ST1Inst <(outs), (ins RC:$src1), asmString>;
+
+class T_sys1op_R <string asmString> : T_sys1op <asmString, IntRegs>;
+class T_sys1op_P <string asmString> : T_sys1op <asmString, DoubleRegs>;
+
+let isSoloAX = 1, validSubTargets = HasV55SubT in
+def Y5_l2unlocka     : T_sys1op_R <"l2unlocka($src1)">, Y5_l2unlocka_enc;
+
+let isSolo = 1, validSubTargets = HasV60SubT in {
+def Y6_l2gcleanpa    : T_sys1op_P <"l2gclean($src1)">, Y6_l2gcleanpa_enc;
+def Y6_l2gcleaninvpa : T_sys1op_P <"l2gcleaninv($src1)">, Y6_l2gcleaninvpa_enc;
+}
+
+let Itinerary = ST_tc_3stall_SLOT0, isPredicateLate = 1, isSoloAX = 1,
+    validSubTargets = HasV55SubT in
+def Y5_l2locka : ST1Inst <(outs PredRegs:$dst), (ins IntRegs:$src1),
+  "$dst = l2locka($src1)">, Y5_l2locka_enc;
+
+// not defined on etc side. why?
+// defm S2_cabacencbin : _VV <"Rdd=encbin(Rss,$src2,Pu)">, S2_cabacencbin_enc;
+
+let Defs = [USR_OVF], Itinerary = M_tc_3stall_SLOT23, isPredicateLate = 1,
+    hasSideEffects = 0,
+validSubTargets = HasV55SubT in
+def A5_ACS : MInst2 <(outs DoubleRegs:$dst1, PredRegs:$dst2),
+  (ins DoubleRegs:$_src_, DoubleRegs:$src1, DoubleRegs:$src2),
+  "$dst1,$dst2 = vacsh($src1,$src2)", [],
+  "$dst1 = $_src_" >, Requires<[HasV55T]>, A5_ACS_enc;
+
+let Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV, hasNewValue = 1,
+    hasSideEffects = 0 in
+class T_HVX_alu2 <string asmString, RegisterClass RCout, RegisterClass RCin1,
+                  RegisterClass RCin2>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCin1:$src1, RCin2:$src2, RCin2:$src3), asmString>;
+
+multiclass T_HVX_alu2 <string asmString, RegisterClass RC > {
+  def NAME : T_HVX_alu2 <asmString, RC, VecPredRegs, VectorRegs>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_alu2 <asmString, !cast<RegisterClass>(RC#"128B"),
+                               VecPredRegs128B, VectorRegs128B>;
+}
+
+multiclass T_HVX_alu2_V <string asmString> :
+  T_HVX_alu2 <asmString, VectorRegs>;
+
+multiclass T_HVX_alu2_W <string asmString> :
+  T_HVX_alu2 <asmString, VecDblRegs>;
+
+defm V6_vswap : T_HVX_alu2_W <"$dst = vswap($src1,$src2,$src3)">, V6_vswap_enc;
+
+let Itinerary = CVI_VA, Type = TypeCVI_VA, hasNewValue = 1,
+    hasSideEffects = 0 in
+defm V6_vmux  : T_HVX_alu2_V <"$dst = vmux($src1,$src2,$src3)">, V6_vmux_enc;
+
+class T_HVX_vlutb <string asmString, RegisterClass RCout, RegisterClass RCin>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCin:$src1, RCin:$src2, IntRegsLow8:$src3), asmString>;
+
+multiclass T_HVX_vlutb <string asmString, RegisterClass RCout,
+                        RegisterClass RCin> {
+  def NAME : T_HVX_vlutb <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vlutb <asmString, !cast<RegisterClass>(RCout#"128B"),
+                                           !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_V <string asmString> :
+  T_HVX_vlutb <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_W <string asmString> :
+  T_HVX_vlutb <asmString, VecDblRegs, VectorRegs>;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, isAccumulator = 1 in
+class T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+                       RegisterClass RCin>
+  : CVI_VA_Resource1<(outs RCout:$dst),
+    (ins RCout:$_src_, RCin:$src1, RCin:$src2, IntRegsLow8:$src3),
+    asmString, [], "$dst = $_src_">;
+
+multiclass T_HVX_vlutb_acc <string asmString, RegisterClass RCout,
+                            RegisterClass RCin> {
+  def NAME : T_HVX_vlutb_acc <asmString, RCout, RCin>;
+  let isCodeGenOnly = 1 in
+  def NAME#_128B : T_HVX_vlutb_acc<asmString,
+                                   !cast<RegisterClass>(RCout#"128B"),
+                                   !cast<RegisterClass>(RCin#"128B")>;
+}
+
+multiclass T_HVX_vlutb_acc_V <string asmString> :
+  T_HVX_vlutb_acc <asmString, VectorRegs, VectorRegs>;
+
+multiclass T_HVX_vlutb_acc_W <string asmString> :
+  T_HVX_vlutb_acc <asmString, VecDblRegs, VectorRegs>;
+
+
+let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1 in
+defm V6_vlutvvb:
+     T_HVX_vlutb_V <"$dst.b = vlut32($src1.b,$src2.b,$src3)">, V6_vlutvvb_enc;
+
+let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS, hasNewValue = 1 in
+defm V6_vlutvwh:
+     T_HVX_vlutb_W <"$dst.h = vlut16($src1.b,$src2.h,$src3)">, V6_vlutvwh_enc;
+
+let hasNewValue = 1 in {
+  defm V6_vlutvvb_oracc:
+       T_HVX_vlutb_acc_V <"$dst.b |= vlut32($src1.b,$src2.b,$src3)">,
+       V6_vlutvvb_oracc_enc;
+  defm V6_vlutvwh_oracc:
+       T_HVX_vlutb_acc_W <"$dst.h |= vlut16($src1.b,$src2.h,$src3)">,
+       V6_vlutvwh_oracc_enc;
+}
+
+// It's a fake instruction and should not be defined?
+def S2_cabacencbin
+  : SInst2<(outs DoubleRegs:$dst),
+          (ins DoubleRegs:$src1, DoubleRegs:$src2, PredRegs:$src3),
+    "$dst = encbin($src1,$src2,$src3)">, S2_cabacencbin_enc;
+
+// Vhist instructions
+def V6_vhistq
+  : CVI_HIST_Resource1 <(outs), (ins VecPredRegs:$src1),
+    "vhist($src1)">, V6_vhistq_enc;
+
+def V6_vhist
+  : CVI_HIST_Resource1 <(outs), (ins),
+    "vhist" >, V6_vhist_enc;
diff --git a/lib/Target/Hexagon/HexagonInstrInfoVector.td b/lib/Target/Hexagon/HexagonInstrInfoVector.td
index f4fb946d5bad..96dd5315b87f 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -35,6 +35,34 @@ multiclass bitconvert_64<ValueType a, ValueType b> {
              (a DoubleRegs:$src)>;
 }
 
+multiclass bitconvert_vec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VectorRegs:$src))),
+             (b  VectorRegs:$src)>;
+  def : Pat <(a (bitconvert (b VectorRegs:$src))),
+             (a  VectorRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecDblRegs:$src))),
+             (b  VecDblRegs:$src)>;
+  def : Pat <(a (bitconvert (b VecDblRegs:$src))),
+             (a  VecDblRegs:$src)>;
+}
+
+multiclass bitconvert_predvec<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecPredRegs:$src))),
+             (b  VectorRegs:$src)>;
+  def : Pat <(a (bitconvert (b VectorRegs:$src))),
+             (a  VecPredRegs:$src)>;
+}
+
+multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
+             (b  VecDblRegs128B:$src)>;
+  def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
+             (a  VecDblRegs128B:$src)>;
+}
+
 // Bit convert vector types.
 defm : bitconvert_32<v4i8, i32>;
 defm : bitconvert_32<v2i16, i32>;
@@ -47,6 +75,21 @@ defm : bitconvert_64<v8i8, v4i16>;
 defm : bitconvert_64<v8i8, v2i32>;
 defm : bitconvert_64<v4i16, v2i32>;
 
+defm : bitconvert_vec<v64i8, v16i32>;
+defm : bitconvert_vec<v8i64 , v16i32>;
+defm : bitconvert_vec<v32i16, v16i32>;
+
+defm : bitconvert_dblvec<v16i64, v128i8>;
+defm : bitconvert_dblvec<v32i32, v128i8>;
+defm : bitconvert_dblvec<v64i16, v128i8>;
+
+defm : bitconvert_dblvec128B<v64i32, v128i16>;
+defm : bitconvert_dblvec128B<v256i8, v128i16>;
+defm : bitconvert_dblvec128B<v32i64, v128i16>;
+
+defm : bitconvert_dblvec128B<v64i32, v256i8>;
+defm : bitconvert_dblvec128B<v32i64, v256i8>;
+defm : bitconvert_dblvec128B<v128i16, v256i8>;
 
 // Vector shift support. Vector shifting in Hexagon is rather different
 // from internal representation of LLVM.
diff --git a/lib/Target/Hexagon/HexagonIntrinsics.td b/lib/Target/Hexagon/HexagonIntrinsics.td
index 1d0d015f798b..b207aaf392f4 100644
--- a/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -691,15 +691,15 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
 def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
 def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
 
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s8ExtPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
 
 def: Pat<(i32 (int_hexagon_C2_mux (I32:$Rp), (I32:$Rs), (I32:$Rt))),
          (i32 (C2_mux (C2_tfrrp IntRegs:$Rp), IntRegs:$Rs, IntRegs:$Rt))>;
 
 // Mux
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s8ExtPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s8ExtPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s8ExtPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
 
 // Shift halfword
 def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -720,17 +720,17 @@ def : T_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
 def : T_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
 def : T_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
 
-def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s10ExtPred>;
-def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s10ExtPred>;
-def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u9ExtPred>;
+def : T_RI_pat<C2_cmpeqi, int_hexagon_C2_cmpeqi, s32ImmPred>;
+def : T_RI_pat<C2_cmpgti, int_hexagon_C2_cmpgti, s32ImmPred>;
+def : T_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
 
-def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgei (I32:$src1), s32ImmPred:$src2)),
       (i32 (C2_cmpgti (I32:$src1),
-                      (DEC_CONST_SIGNED s8ExtPred:$src2)))>;
+                      (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
 
-def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u8ExtPred:$src2)),
+def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), u32ImmPred:$src2)),
       (i32 (C2_cmpgtui (I32:$src1),
-                       (DEC_CONST_UNSIGNED u8ExtPred:$src2)))>;
+                       (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
 
 // The instruction, Pd=cmp.geu(Rs, #u8) -> Pd=cmp.eq(Rs,Rs) when #u8 == 0.
 def : Pat <(i32 (int_hexagon_C2_cmpgeui (I32:$src1), 0)),
@@ -1289,3 +1289,5 @@ def: T_stc_pat<S2_storerf_pci_pseudo, int_hexagon_circ_sthhi, s4_1ImmPred, I32>;
 include "HexagonIntrinsicsV3.td"
 include "HexagonIntrinsicsV4.td"
 include "HexagonIntrinsicsV5.td"
+include "HexagonIntrinsicsV60.td"
+
diff --git a/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
new file mode 100644
index 000000000000..24a3e4d36de9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -0,0 +1,836 @@
+//=- HexagonIntrinsicsV60.td - Target Description for Hexagon -*- tablegen *-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Hexagon V60 Compiler Intrinsics in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins ),
+    "$dst=#0",
+    [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
+
+def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins ),
+    "$dst=#0",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
+}
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=vassignp_W($src1)",
+    [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=vassignp_W_128B($src1)",
+    [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
+                                VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=lo_W($src1)",
+    [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
+    (ins VecDblRegs:$src1),
+    "$dst=hi_W($src1)",
+    [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=lo_W($src1)",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
+
+let isPseudo = 1 in
+def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
+    (ins VecDblRegs128B:$src1),
+    "$dst=hi_W($src1)",
+    [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
+
+let AddedComplexity = 100 in {
+def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+            Requires<[UseHVXSgl]>;
+
+def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+            Requires<[UseHVXSgl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+                                     subreg_loreg)) >,
+            Requires<[UseHVXDbl]>;
+
+def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
+                                     subreg_hireg)) >,
+            Requires<[UseHVXDbl]>;
+}
+
+def : Pat <(v512i1 (bitconvert (v16i32 VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v16i32 VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v32i16 VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v32i16 VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v64i8  VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v64i8  VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (bitconvert (v8i64  VectorRegs:$src1))),
+           (v512i1 (V6_vandvrt(v8i64  VectorRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v16i32 (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v16i32 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v32i16 (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v32i16 (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v64i8  (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v64i8  (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v8i64  (bitconvert (v512i1 VecPredRegs:$src1))),
+           (v8i64  (V6_vandqrt(v512i1 VecPredRegs:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v1024i1 (bitconvert (v32i32 VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v32i32 VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v64i16 VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v64i16 VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v128i8  VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v128i8  VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (bitconvert (v16i64  VectorRegs128B:$src1))),
+           (v1024i1 (V6_vandvrt_128B(v16i64  VectorRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v32i32 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v32i32 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v64i16 (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v64i16 (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v128i8  (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v128i8  (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v16i64  (bitconvert (v1024i1 VecPredRegs128B:$src1))),
+           (v16i64  (V6_vandqrt_128B(v1024i1 VecPredRegs128B:$src1),
+                                              (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+
+let AddedComplexity = 140 in {
+def : Pat <(store (v512i1 VecPredRegs:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai IntRegs:$addr, 0,
+           (v16i32 (V6_vandqrt (v512i1 VecPredRegs:$src1),
+                                       (A2_tfrsi 0x01010101))))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(v512i1 (load (i32 IntRegs:$addr))),
+           (v512i1 (V6_vandvrt
+           (v16i32 (V6_vL32b_ai IntRegs:$addr, 0)), (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXSgl]>;
+
+def : Pat <(store (v1024i1 VecPredRegs128B:$src1), (i32 IntRegs:$addr)),
+           (V6_vS32b_ai_128B IntRegs:$addr, 0,
+           (v32i32 (V6_vandqrt_128B (v1024i1 VecPredRegs128B:$src1),
+                                       (A2_tfrsi 0x01010101))))>,
+            Requires<[UseHVXDbl]>;
+
+def : Pat <(v1024i1 (load (i32 IntRegs:$addr))),
+           (v1024i1 (V6_vandvrt_128B
+           (v32i32 (V6_vL32b_ai_128B IntRegs:$addr, 0)),
+                                       (A2_tfrsi 0x01010101)))>,
+            Requires<[UseHVXDbl]>;
+}
+
+multiclass T_R_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID IntRegs:$src1), (MI IntRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") IntRegs:$src1),
+           (!cast<InstHexagon>(MI#"_128B") IntRegs:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1),
+           (MI    VectorRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1),
+           (!cast<InstHexagon>(MI#"_128B") VectorRegs128B:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1),
+           (MI    VecPredRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1),
+           (!cast<InstHexagon>(MI#"_128B") VecPredRegs128B:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2),
+           (MI    VecDblRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B")VecDblRegs128B:$src1, IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")VecDblRegs128B:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, IntRegs:$src2),
+           (MI    VectorRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B")VectorRegs128B:$src1, IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")VectorRegs128B:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WW_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2),
+           (MI    VecDblRegs:$src1, VecDblRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2),
+           (MI    VectorRegs:$src1, VectorRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, IntRegs:$src2),
+           (MI    VecPredRegs:$src1, IntRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            IntRegs:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            IntRegs:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QQ_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VecPredRegs:$src2),
+           (MI    VecPredRegs:$src1, VecPredRegs:$src2)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VecPredRegs128B:$src2),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VecPredRegs128B:$src2)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+           (MI    VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VWR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VecDblRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_QVV_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
+           (MI    VecPredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VQR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3),
+           (MI    VectorRegs:$src1, VecPredRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VecPredRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VecPredRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+
+multiclass T_QVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3),
+           (MI    VecPredRegs:$src1, VectorRegs:$src2, IntRegs:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecPredRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            IntRegs:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, imm:$src3),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2, imm:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2, imm:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WRI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, IntRegs:$src2, imm:$src3),
+           (MI    VecDblRegs:$src1, IntRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            IntRegs:$src2, imm:$src3),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            IntRegs:$src2, imm:$src3)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WWRI_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4),
+           (MI   VecDblRegs:$src1, VecDblRegs:$src2, IntRegs:$src3, imm:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3, imm:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VecDblRegs128B:$src2,
+                                            IntRegs:$src3, imm:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_VVVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4),
+           (MI    VectorRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VectorRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4),
+           (MI    VecDblRegs:$src1, VectorRegs:$src2, VectorRegs:$src3,
+                  IntRegs:$src4)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4),
+           (!cast<InstHexagon>(MI#"_128B")  VecDblRegs128B:$src1,
+                                            VectorRegs128B:$src2,
+                                            VectorRegs128B:$src3,
+                                            IntRegs:$src4)>,
+       Requires<[UseHVXDbl]>;
+}
+
+defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
+defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
+defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
+defm : T_VR_pat <V6_vrmpybus, int_hexagon_V6_vrmpybus>;
+defm : T_WR_pat <V6_vdsaduh, int_hexagon_V6_vdsaduh>;
+defm : T_VR_pat <V6_vdmpybus, int_hexagon_V6_vdmpybus>;
+defm : T_WR_pat <V6_vdmpybus_dv, int_hexagon_V6_vdmpybus_dv>;
+defm : T_VR_pat <V6_vdmpyhsusat, int_hexagon_V6_vdmpyhsusat>;
+defm : T_WR_pat <V6_vdmpyhsuisat, int_hexagon_V6_vdmpyhsuisat>;
+defm : T_VR_pat <V6_vdmpyhsat, int_hexagon_V6_vdmpyhsat>;
+defm : T_WR_pat <V6_vdmpyhisat, int_hexagon_V6_vdmpyhisat>;
+defm : T_WR_pat <V6_vdmpyhb_dv, int_hexagon_V6_vdmpyhb_dv>;
+defm : T_VR_pat <V6_vmpybus, int_hexagon_V6_vmpybus>;
+defm : T_WR_pat <V6_vmpabus, int_hexagon_V6_vmpabus>;
+defm : T_WR_pat <V6_vmpahb, int_hexagon_V6_vmpahb>;
+defm : T_VR_pat <V6_vmpyh, int_hexagon_V6_vmpyh>;
+defm : T_VR_pat <V6_vmpyhss, int_hexagon_V6_vmpyhss>;
+defm : T_VR_pat <V6_vmpyhsrs, int_hexagon_V6_vmpyhsrs>;
+defm : T_VR_pat <V6_vmpyuh, int_hexagon_V6_vmpyuh>;
+defm : T_VR_pat <V6_vmpyihb, int_hexagon_V6_vmpyihb>;
+defm : T_VR_pat <V6_vror, int_hexagon_V6_vror>;
+defm : T_VR_pat <V6_vasrw, int_hexagon_V6_vasrw>;
+defm : T_VR_pat <V6_vasrh, int_hexagon_V6_vasrh>;
+defm : T_VR_pat <V6_vaslw, int_hexagon_V6_vaslw>;
+defm : T_VR_pat <V6_vaslh, int_hexagon_V6_vaslh>;
+defm : T_VR_pat <V6_vlsrw, int_hexagon_V6_vlsrw>;
+defm : T_VR_pat <V6_vlsrh, int_hexagon_V6_vlsrh>;
+defm : T_VR_pat <V6_vmpyiwh, int_hexagon_V6_vmpyiwh>;
+defm : T_VR_pat <V6_vmpyiwb, int_hexagon_V6_vmpyiwb>;
+defm : T_WR_pat <V6_vtmpyhb, int_hexagon_V6_vtmpyhb>;
+defm : T_VR_pat <V6_vmpyub, int_hexagon_V6_vmpyub>;
+
+defm : T_VV_pat <V6_vrmpyubv, int_hexagon_V6_vrmpyubv>;
+defm : T_VV_pat <V6_vrmpybv, int_hexagon_V6_vrmpybv>;
+defm : T_VV_pat <V6_vrmpybusv, int_hexagon_V6_vrmpybusv>;
+defm : T_VV_pat <V6_vdmpyhvsat, int_hexagon_V6_vdmpyhvsat>;
+defm : T_VV_pat <V6_vmpybv, int_hexagon_V6_vmpybv>;
+defm : T_VV_pat <V6_vmpyubv, int_hexagon_V6_vmpyubv>;
+defm : T_VV_pat <V6_vmpybusv, int_hexagon_V6_vmpybusv>;
+defm : T_VV_pat <V6_vmpyhv, int_hexagon_V6_vmpyhv>;
+defm : T_VV_pat <V6_vmpyuhv, int_hexagon_V6_vmpyuhv>;
+defm : T_VV_pat <V6_vmpyhvsrs, int_hexagon_V6_vmpyhvsrs>;
+defm : T_VV_pat <V6_vmpyhus, int_hexagon_V6_vmpyhus>;
+defm : T_WW_pat <V6_vmpabusv, int_hexagon_V6_vmpabusv>;
+defm : T_VV_pat <V6_vmpyih, int_hexagon_V6_vmpyih>;
+defm : T_VV_pat <V6_vand, int_hexagon_V6_vand>;
+defm : T_VV_pat <V6_vor, int_hexagon_V6_vor>;
+defm : T_VV_pat <V6_vxor, int_hexagon_V6_vxor>;
+defm : T_VV_pat <V6_vaddw, int_hexagon_V6_vaddw>;
+defm : T_VV_pat <V6_vaddubsat, int_hexagon_V6_vaddubsat>;
+defm : T_VV_pat <V6_vadduhsat, int_hexagon_V6_vadduhsat>;
+defm : T_VV_pat <V6_vaddhsat, int_hexagon_V6_vaddhsat>;
+defm : T_VV_pat <V6_vaddwsat, int_hexagon_V6_vaddwsat>;
+defm : T_VV_pat <V6_vsubb, int_hexagon_V6_vsubb>;
+defm : T_VV_pat <V6_vsubh, int_hexagon_V6_vsubh>;
+defm : T_VV_pat <V6_vsubw, int_hexagon_V6_vsubw>;
+defm : T_VV_pat <V6_vsububsat, int_hexagon_V6_vsububsat>;
+defm : T_VV_pat <V6_vsubuhsat, int_hexagon_V6_vsubuhsat>;
+defm : T_VV_pat <V6_vsubhsat, int_hexagon_V6_vsubhsat>;
+defm : T_VV_pat <V6_vsubwsat, int_hexagon_V6_vsubwsat>;
+defm : T_WW_pat <V6_vaddb_dv, int_hexagon_V6_vaddb_dv>;
+defm : T_WW_pat <V6_vaddh_dv, int_hexagon_V6_vaddh_dv>;
+defm : T_WW_pat <V6_vaddw_dv, int_hexagon_V6_vaddw_dv>;
+defm : T_WW_pat <V6_vaddubsat_dv, int_hexagon_V6_vaddubsat_dv>;
+defm : T_WW_pat <V6_vadduhsat_dv, int_hexagon_V6_vadduhsat_dv>;
+defm : T_WW_pat <V6_vaddhsat_dv, int_hexagon_V6_vaddhsat_dv>;
+defm : T_WW_pat <V6_vaddwsat_dv, int_hexagon_V6_vaddwsat_dv>;
+defm : T_WW_pat <V6_vsubb_dv, int_hexagon_V6_vsubb_dv>;
+defm : T_WW_pat <V6_vsubh_dv, int_hexagon_V6_vsubh_dv>;
+defm : T_WW_pat <V6_vsubw_dv, int_hexagon_V6_vsubw_dv>;
+defm : T_WW_pat <V6_vsububsat_dv, int_hexagon_V6_vsububsat_dv>;
+defm : T_WW_pat <V6_vsubuhsat_dv, int_hexagon_V6_vsubuhsat_dv>;
+defm : T_WW_pat <V6_vsubhsat_dv, int_hexagon_V6_vsubhsat_dv>;
+defm : T_WW_pat <V6_vsubwsat_dv, int_hexagon_V6_vsubwsat_dv>;
+defm : T_VV_pat <V6_vaddubh, int_hexagon_V6_vaddubh>;
+defm : T_VV_pat <V6_vadduhw, int_hexagon_V6_vadduhw>;
+defm : T_VV_pat <V6_vaddhw, int_hexagon_V6_vaddhw>;
+defm : T_VV_pat <V6_vsububh, int_hexagon_V6_vsububh>;
+defm : T_VV_pat <V6_vsubuhw, int_hexagon_V6_vsubuhw>;
+defm : T_VV_pat <V6_vsubhw, int_hexagon_V6_vsubhw>;
+defm : T_VV_pat <V6_vabsdiffub, int_hexagon_V6_vabsdiffub>;
+defm : T_VV_pat <V6_vabsdiffh, int_hexagon_V6_vabsdiffh>;
+defm : T_VV_pat <V6_vabsdiffuh, int_hexagon_V6_vabsdiffuh>;
+defm : T_VV_pat <V6_vabsdiffw, int_hexagon_V6_vabsdiffw>;
+defm : T_VV_pat <V6_vavgub, int_hexagon_V6_vavgub>;
+defm : T_VV_pat <V6_vavguh, int_hexagon_V6_vavguh>;
+defm : T_VV_pat <V6_vavgh, int_hexagon_V6_vavgh>;
+defm : T_VV_pat <V6_vavgw, int_hexagon_V6_vavgw>;
+defm : T_VV_pat <V6_vnavgub, int_hexagon_V6_vnavgub>;
+defm : T_VV_pat <V6_vnavgh, int_hexagon_V6_vnavgh>;
+defm : T_VV_pat <V6_vnavgw, int_hexagon_V6_vnavgw>;
+defm : T_VV_pat <V6_vavgubrnd, int_hexagon_V6_vavgubrnd>;
+defm : T_VV_pat <V6_vavguhrnd, int_hexagon_V6_vavguhrnd>;
+defm : T_VV_pat <V6_vavghrnd, int_hexagon_V6_vavghrnd>;
+defm : T_VV_pat <V6_vavgwrnd, int_hexagon_V6_vavgwrnd>;
+defm : T_WW_pat <V6_vmpabuuv, int_hexagon_V6_vmpabuuv>;
+
+defm : T_VVR_pat <V6_vdmpyhb_acc, int_hexagon_V6_vdmpyhb_acc>;
+defm : T_VVR_pat <V6_vrmpyub_acc, int_hexagon_V6_vrmpyub_acc>;
+defm : T_VVR_pat <V6_vrmpybus_acc, int_hexagon_V6_vrmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpybus_acc, int_hexagon_V6_vdmpybus_acc>;
+defm : T_VVR_pat <V6_vdmpyhsusat_acc, int_hexagon_V6_vdmpyhsusat_acc>;
+defm : T_VVR_pat <V6_vdmpyhsat_acc, int_hexagon_V6_vdmpyhsat_acc>;
+defm : T_VVR_pat <V6_vmpyiwb_acc, int_hexagon_V6_vmpyiwb_acc>;
+defm : T_VVR_pat <V6_vmpyiwh_acc, int_hexagon_V6_vmpyiwh_acc>;
+defm : T_VVR_pat <V6_vmpyihb_acc, int_hexagon_V6_vmpyihb_acc>;
+defm : T_VVR_pat <V6_vaslw_acc, int_hexagon_V6_vaslw_acc>;
+defm : T_VVR_pat <V6_vasrw_acc, int_hexagon_V6_vasrw_acc>;
+
+defm : T_VWR_pat <V6_vdmpyhsuisat_acc, int_hexagon_V6_vdmpyhsuisat_acc>;
+defm : T_VWR_pat <V6_vdmpyhisat_acc, int_hexagon_V6_vdmpyhisat_acc>;
+
+defm : T_WVR_pat <V6_vmpybus_acc, int_hexagon_V6_vmpybus_acc>;
+defm : T_WVR_pat <V6_vmpyhsat_acc, int_hexagon_V6_vmpyhsat_acc>;
+defm : T_WVR_pat <V6_vmpyuh_acc, int_hexagon_V6_vmpyuh_acc>;
+defm : T_WVR_pat <V6_vmpyub_acc, int_hexagon_V6_vmpyub_acc>;
+
+defm : T_WWR_pat <V6_vtmpyb_acc, int_hexagon_V6_vtmpyb_acc>;
+defm : T_WWR_pat <V6_vtmpybus_acc, int_hexagon_V6_vtmpybus_acc>;
+defm : T_WWR_pat <V6_vtmpyhb_acc, int_hexagon_V6_vtmpyhb_acc>;
+defm : T_WWR_pat <V6_vdmpybus_dv_acc, int_hexagon_V6_vdmpybus_dv_acc>;
+defm : T_WWR_pat <V6_vdmpyhb_dv_acc, int_hexagon_V6_vdmpyhb_dv_acc>;
+defm : T_WWR_pat <V6_vmpabus_acc, int_hexagon_V6_vmpabus_acc>;
+defm : T_WWR_pat <V6_vmpahb_acc, int_hexagon_V6_vmpahb_acc>;
+defm : T_WWR_pat <V6_vdsaduh_acc, int_hexagon_V6_vdsaduh_acc>;
+
+defm : T_VVV_pat <V6_vdmpyhvsat_acc, int_hexagon_V6_vdmpyhvsat_acc>;
+defm : T_WVV_pat <V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_acc>;
+defm : T_WVV_pat <V6_vmpybv_acc, int_hexagon_V6_vmpybv_acc>;
+defm : T_WVV_pat <V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_acc>;
+defm : T_WVV_pat <V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_acc>;
+defm : T_VVV_pat <V6_vmpyiewh_acc, int_hexagon_V6_vmpyiewh_acc>;
+defm : T_VVV_pat <V6_vmpyiewuh_acc, int_hexagon_V6_vmpyiewuh_acc>;
+defm : T_VVV_pat <V6_vmpyih_acc, int_hexagon_V6_vmpyih_acc>;
+defm : T_VVV_pat <V6_vmpyowh_rnd_sacc, int_hexagon_V6_vmpyowh_rnd_sacc>;
+defm : T_VVV_pat <V6_vmpyowh_sacc, int_hexagon_V6_vmpyowh_sacc>;
+defm : T_WVV_pat <V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_acc>;
+defm : T_WVV_pat <V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_acc>;
+defm : T_VVV_pat <V6_vrmpybusv_acc, int_hexagon_V6_vrmpybusv_acc>;
+defm : T_VVV_pat <V6_vrmpybv_acc, int_hexagon_V6_vrmpybv_acc>;
+defm : T_VVV_pat <V6_vrmpyubv_acc, int_hexagon_V6_vrmpyubv_acc>;
+
+// Compare instructions
+defm : T_QVV_pat <V6_veqb_and, int_hexagon_V6_veqb_and>;
+defm : T_QVV_pat <V6_veqh_and, int_hexagon_V6_veqh_and>;
+defm : T_QVV_pat <V6_veqw_and, int_hexagon_V6_veqw_and>;
+defm : T_QVV_pat <V6_vgtb_and, int_hexagon_V6_vgtb_and>;
+defm : T_QVV_pat <V6_vgth_and, int_hexagon_V6_vgth_and>;
+defm : T_QVV_pat <V6_vgtw_and, int_hexagon_V6_vgtw_and>;
+defm : T_QVV_pat <V6_vgtub_and, int_hexagon_V6_vgtub_and>;
+defm : T_QVV_pat <V6_vgtuh_and, int_hexagon_V6_vgtuh_and>;
+defm : T_QVV_pat <V6_vgtuw_and, int_hexagon_V6_vgtuw_and>;
+defm : T_QVV_pat <V6_veqb_or, int_hexagon_V6_veqb_or>;
+defm : T_QVV_pat <V6_veqh_or, int_hexagon_V6_veqh_or>;
+defm : T_QVV_pat <V6_veqw_or, int_hexagon_V6_veqw_or>;
+defm : T_QVV_pat <V6_vgtb_or, int_hexagon_V6_vgtb_or>;
+defm : T_QVV_pat <V6_vgth_or, int_hexagon_V6_vgth_or>;
+defm : T_QVV_pat <V6_vgtw_or, int_hexagon_V6_vgtw_or>;
+defm : T_QVV_pat <V6_vgtub_or, int_hexagon_V6_vgtub_or>;
+defm : T_QVV_pat <V6_vgtuh_or, int_hexagon_V6_vgtuh_or>;
+defm : T_QVV_pat <V6_vgtuw_or, int_hexagon_V6_vgtuw_or>;
+defm : T_QVV_pat <V6_veqb_xor, int_hexagon_V6_veqb_xor>;
+defm : T_QVV_pat <V6_veqh_xor, int_hexagon_V6_veqh_xor>;
+defm : T_QVV_pat <V6_veqw_xor, int_hexagon_V6_veqw_xor>;
+defm : T_QVV_pat <V6_vgtb_xor, int_hexagon_V6_vgtb_xor>;
+defm : T_QVV_pat <V6_vgth_xor, int_hexagon_V6_vgth_xor>;
+defm : T_QVV_pat <V6_vgtw_xor, int_hexagon_V6_vgtw_xor>;
+defm : T_QVV_pat <V6_vgtub_xor, int_hexagon_V6_vgtub_xor>;
+defm : T_QVV_pat <V6_vgtuh_xor, int_hexagon_V6_vgtuh_xor>;
+defm : T_QVV_pat <V6_vgtuw_xor, int_hexagon_V6_vgtuw_xor>;
+
+defm : T_VV_pat <V6_vminub, int_hexagon_V6_vminub>;
+defm : T_VV_pat <V6_vminuh, int_hexagon_V6_vminuh>;
+defm : T_VV_pat <V6_vminh, int_hexagon_V6_vminh>;
+defm : T_VV_pat <V6_vminw, int_hexagon_V6_vminw>;
+defm : T_VV_pat <V6_vmaxub, int_hexagon_V6_vmaxub>;
+defm : T_VV_pat <V6_vmaxuh, int_hexagon_V6_vmaxuh>;
+defm : T_VV_pat <V6_vmaxh, int_hexagon_V6_vmaxh>;
+defm : T_VV_pat <V6_vmaxw, int_hexagon_V6_vmaxw>;
+defm : T_VV_pat <V6_vdelta, int_hexagon_V6_vdelta>;
+defm : T_VV_pat <V6_vrdelta, int_hexagon_V6_vrdelta>;
+defm : T_VV_pat <V6_vdealb4w, int_hexagon_V6_vdealb4w>;
+defm : T_VV_pat <V6_vmpyowh_rnd, int_hexagon_V6_vmpyowh_rnd>;
+defm : T_VV_pat <V6_vshuffeb, int_hexagon_V6_vshuffeb>;
+defm : T_VV_pat <V6_vshuffob, int_hexagon_V6_vshuffob>;
+defm : T_VV_pat <V6_vshufeh, int_hexagon_V6_vshufeh>;
+defm : T_VV_pat <V6_vshufoh, int_hexagon_V6_vshufoh>;
+defm : T_VV_pat <V6_vshufoeh, int_hexagon_V6_vshufoeh>;
+defm : T_VV_pat <V6_vshufoeb, int_hexagon_V6_vshufoeb>;
+defm : T_VV_pat <V6_vcombine, int_hexagon_V6_vcombine>;
+defm : T_VV_pat <V6_vmpyieoh, int_hexagon_V6_vmpyieoh>;
+defm : T_VV_pat <V6_vsathub, int_hexagon_V6_vsathub>;
+defm : T_VV_pat <V6_vsatwh, int_hexagon_V6_vsatwh>;
+defm : T_VV_pat <V6_vroundwh, int_hexagon_V6_vroundwh>;
+defm : T_VV_pat <V6_vroundwuh, int_hexagon_V6_vroundwuh>;
+defm : T_VV_pat <V6_vroundhb, int_hexagon_V6_vroundhb>;
+defm : T_VV_pat <V6_vroundhub, int_hexagon_V6_vroundhub>;
+defm : T_VV_pat <V6_vasrwv, int_hexagon_V6_vasrwv>;
+defm : T_VV_pat <V6_vlsrwv, int_hexagon_V6_vlsrwv>;
+defm : T_VV_pat <V6_vlsrhv, int_hexagon_V6_vlsrhv>;
+defm : T_VV_pat <V6_vasrhv, int_hexagon_V6_vasrhv>;
+defm : T_VV_pat <V6_vaslwv, int_hexagon_V6_vaslwv>;
+defm : T_VV_pat <V6_vaslhv, int_hexagon_V6_vaslhv>;
+defm : T_VV_pat <V6_vaddb, int_hexagon_V6_vaddb>;
+defm : T_VV_pat <V6_vaddh, int_hexagon_V6_vaddh>;
+defm : T_VV_pat <V6_vmpyiewuh, int_hexagon_V6_vmpyiewuh>;
+defm : T_VV_pat <V6_vmpyiowh, int_hexagon_V6_vmpyiowh>;
+defm : T_VV_pat <V6_vpackeb, int_hexagon_V6_vpackeb>;
+defm : T_VV_pat <V6_vpackeh, int_hexagon_V6_vpackeh>;
+defm : T_VV_pat <V6_vpackhub_sat, int_hexagon_V6_vpackhub_sat>;
+defm : T_VV_pat <V6_vpackhb_sat, int_hexagon_V6_vpackhb_sat>;
+defm : T_VV_pat <V6_vpackwuh_sat, int_hexagon_V6_vpackwuh_sat>;
+defm : T_VV_pat <V6_vpackwh_sat, int_hexagon_V6_vpackwh_sat>;
+defm : T_VV_pat <V6_vpackob, int_hexagon_V6_vpackob>;
+defm : T_VV_pat <V6_vpackoh, int_hexagon_V6_vpackoh>;
+defm : T_VV_pat <V6_vmpyewuh, int_hexagon_V6_vmpyewuh>;
+defm : T_VV_pat <V6_vmpyowh, int_hexagon_V6_vmpyowh>;
+
+defm : T_QVV_pat <V6_vaddbq, int_hexagon_V6_vaddbq>;
+defm : T_QVV_pat <V6_vaddhq, int_hexagon_V6_vaddhq>;
+defm : T_QVV_pat <V6_vaddwq, int_hexagon_V6_vaddwq>;
+defm : T_QVV_pat <V6_vaddbnq, int_hexagon_V6_vaddbnq>;
+defm : T_QVV_pat <V6_vaddhnq, int_hexagon_V6_vaddhnq>;
+defm : T_QVV_pat <V6_vaddwnq, int_hexagon_V6_vaddwnq>;
+defm : T_QVV_pat <V6_vsubbq, int_hexagon_V6_vsubbq>;
+defm : T_QVV_pat <V6_vsubhq, int_hexagon_V6_vsubhq>;
+defm : T_QVV_pat <V6_vsubwq, int_hexagon_V6_vsubwq>;
+defm : T_QVV_pat <V6_vsubbnq, int_hexagon_V6_vsubbnq>;
+defm : T_QVV_pat <V6_vsubhnq, int_hexagon_V6_vsubhnq>;
+defm : T_QVV_pat <V6_vsubwnq, int_hexagon_V6_vsubwnq>;
+
+defm : T_V_pat <V6_vabsh, int_hexagon_V6_vabsh>;
+defm : T_V_pat <V6_vabsw, int_hexagon_V6_vabsw>;
+defm : T_V_pat <V6_vabsw_sat, int_hexagon_V6_vabsw_sat>;
+defm : T_V_pat <V6_vabsh_sat, int_hexagon_V6_vabsh_sat>;
+defm : T_V_pat <V6_vnot, int_hexagon_V6_vnot>;
+defm : T_V_pat <V6_vassign, int_hexagon_V6_vassign>;
+defm : T_V_pat <V6_vzb, int_hexagon_V6_vzb>;
+defm : T_V_pat <V6_vzh, int_hexagon_V6_vzh>;
+defm : T_V_pat <V6_vsb, int_hexagon_V6_vsb>;
+defm : T_V_pat <V6_vsh, int_hexagon_V6_vsh>;
+defm : T_V_pat <V6_vdealh, int_hexagon_V6_vdealh>;
+defm : T_V_pat <V6_vdealb, int_hexagon_V6_vdealb>;
+defm : T_V_pat <V6_vunpackub, int_hexagon_V6_vunpackub>;
+defm : T_V_pat <V6_vunpackuh, int_hexagon_V6_vunpackuh>;
+defm : T_V_pat <V6_vunpackb, int_hexagon_V6_vunpackb>;
+defm : T_V_pat <V6_vunpackh, int_hexagon_V6_vunpackh>;
+defm : T_V_pat <V6_vshuffh, int_hexagon_V6_vshuffh>;
+defm : T_V_pat <V6_vshuffb, int_hexagon_V6_vshuffb>;
+defm : T_V_pat <V6_vcl0w, int_hexagon_V6_vcl0w>;
+defm : T_V_pat <V6_vpopcounth, int_hexagon_V6_vpopcounth>;
+defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
+defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
+defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
+
+defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
+defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
+defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
+
+defm : T_WWRI_pat <V6_vrmpybusi_acc, int_hexagon_V6_vrmpybusi_acc>;
+defm : T_WWRI_pat <V6_vrsadubi_acc, int_hexagon_V6_vrsadubi_acc>;
+defm : T_WWRI_pat <V6_vrmpyubi_acc, int_hexagon_V6_vrmpyubi_acc>;
+
+// assembler mapped.
+//defm : T_V_pat <V6_vtran2x2, int_hexagon_V6_vtran2x2>;
+// not present earlier.. need to add intrinsic
+defm : T_VVR_pat <V6_valignb, int_hexagon_V6_valignb>;
+defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignb>;
+defm : T_VVR_pat <V6_vasrwh, int_hexagon_V6_vasrwh>;
+defm : T_VVR_pat <V6_vasrwhsat, int_hexagon_V6_vasrwhsat>;
+defm : T_VVR_pat <V6_vasrwhrndsat, int_hexagon_V6_vasrwhrndsat>;
+defm : T_VVR_pat <V6_vasrwuhsat, int_hexagon_V6_vasrwuhsat>;
+defm : T_VVR_pat <V6_vasrhubsat, int_hexagon_V6_vasrhubsat>;
+defm : T_VVR_pat <V6_vasrhubrndsat, int_hexagon_V6_vasrhubrndsat>;
+defm : T_VVR_pat <V6_vasrhbrndsat, int_hexagon_V6_vasrhbrndsat>;
+
+defm : T_VVR_pat <V6_vshuffvdd, int_hexagon_V6_vshuffvdd>;
+defm : T_VVR_pat <V6_vdealvdd, int_hexagon_V6_vdealvdd>;
+
+defm : T_WV_pat <V6_vunpackob, int_hexagon_V6_vunpackob>;
+defm : T_WV_pat <V6_vunpackoh, int_hexagon_V6_vunpackoh>;
+defm : T_VVI_pat <V6_valignbi, int_hexagon_V6_valignbi>;
+defm : T_VVI_pat <V6_vlalignbi, int_hexagon_V6_vlalignbi>;
+
+defm : T_QVV_pat <V6_vswap, int_hexagon_V6_vswap>;
+defm : T_QVV_pat <V6_vmux, int_hexagon_V6_vmux>;
+defm : T_QQ_pat <V6_pred_and, int_hexagon_V6_pred_and>;
+defm : T_QQ_pat <V6_pred_or, int_hexagon_V6_pred_or>;
+defm : T_Q_pat <V6_pred_not, int_hexagon_V6_pred_not>;
+defm : T_QQ_pat <V6_pred_xor, int_hexagon_V6_pred_xor>;
+defm : T_QQ_pat <V6_pred_or_n, int_hexagon_V6_pred_or_n>;
+defm : T_QQ_pat <V6_pred_and_n, int_hexagon_V6_pred_and_n>;
+defm : T_VV_pat <V6_veqb, int_hexagon_V6_veqb>;
+defm : T_VV_pat <V6_veqh, int_hexagon_V6_veqh>;
+defm : T_VV_pat <V6_veqw, int_hexagon_V6_veqw>;
+defm : T_VV_pat <V6_vgtb, int_hexagon_V6_vgtb>;
+defm : T_VV_pat <V6_vgth, int_hexagon_V6_vgth>;
+defm : T_VV_pat <V6_vgtw, int_hexagon_V6_vgtw>;
+defm : T_VV_pat <V6_vgtub, int_hexagon_V6_vgtub>;
+defm : T_VV_pat <V6_vgtuh, int_hexagon_V6_vgtuh>;
+defm : T_VV_pat <V6_vgtuw, int_hexagon_V6_vgtuw>;
+
+defm : T_VQR_pat <V6_vandqrt_acc, int_hexagon_V6_vandqrt_acc>;
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+defm : T_QR_pat <V6_vandqrt, int_hexagon_V6_vandqrt>;
+defm : T_R_pat <V6_lvsplatw, int_hexagon_V6_lvsplatw>;
+defm  : T_R_pat <V6_pred_scalar2, int_hexagon_V6_pred_scalar2>;
+defm : T_VR_pat <V6_vandvrt, int_hexagon_V6_vandvrt>;
+
+defm : T_VVR_pat <V6_vlutvvb, int_hexagon_V6_vlutvvb>;
+defm : T_VVR_pat <V6_vlutvwh, int_hexagon_V6_vlutvwh>;
+defm : T_VVVR_pat <V6_vlutvvb_oracc, int_hexagon_V6_vlutvvb_oracc>;
+defm : T_WVVR_pat <V6_vlutvwh_oracc, int_hexagon_V6_vlutvwh_oracc>;
+
+defm : T_QVR_pat <V6_vandvrt_acc, int_hexagon_V6_vandvrt_acc>;
+def : T_PI_pat <S6_rol_i_p, int_hexagon_S6_rol_i_p>;
+def : T_RI_pat <S6_rol_i_r, int_hexagon_S6_rol_i_r>;
+def : T_PPI_pat <S6_rol_i_p_nac, int_hexagon_S6_rol_i_p_nac>;
+def : T_PPI_pat <S6_rol_i_p_acc, int_hexagon_S6_rol_i_p_acc>;
+def : T_PPI_pat <S6_rol_i_p_and, int_hexagon_S6_rol_i_p_and>;
+def : T_PPI_pat <S6_rol_i_p_or, int_hexagon_S6_rol_i_p_or>;
+def : T_PPI_pat <S6_rol_i_p_xacc, int_hexagon_S6_rol_i_p_xacc>;
+def : T_RRI_pat <S6_rol_i_r_nac, int_hexagon_S6_rol_i_r_nac>;
+def : T_RRI_pat <S6_rol_i_r_acc, int_hexagon_S6_rol_i_r_acc>;
+def : T_RRI_pat <S6_rol_i_r_and, int_hexagon_S6_rol_i_r_and>;
+def : T_RRI_pat <S6_rol_i_r_or, int_hexagon_S6_rol_i_r_or>;
+def : T_RRI_pat <S6_rol_i_r_xacc, int_hexagon_S6_rol_i_r_xacc>;
+
+defm : T_VR_pat <V6_extractw, int_hexagon_V6_extractw>;
+defm : T_VR_pat <V6_vinsertwr, int_hexagon_V6_vinsertwr>;
+
+def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
+
+def: Pat<(v64i16 (trunc v64i32:$Vdd)),
+         (v64i16 (V6_vpackwh_sat_128B
+                 (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
+                 (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+     Requires<[UseHVXDbl]>;
+
+
diff --git a/lib/Target/Hexagon/HexagonMCInstLower.cpp b/lib/Target/Hexagon/HexagonMCInstLower.cpp
index 75189b696ea2..624c0f6cf49d 100644
--- a/lib/Target/Hexagon/HexagonMCInstLower.cpp
+++ b/lib/Target/Hexagon/HexagonMCInstLower.cpp
@@ -26,39 +26,71 @@
 
 using namespace llvm;
 
-static MCOperand GetSymbolRef(const MachineOperand& MO, const MCSymbol* Symbol,
-                              HexagonAsmPrinter& Printer) {
+namespace llvm {
+  void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                        MCInst &MCB, HexagonAsmPrinter &AP);
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              HexagonAsmPrinter &Printer) {
   MCContext &MC = Printer.OutContext;
   const MCExpr *ME;
 
-  ME = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, MC);
+  // Populate the relocation type based on Hexagon target flags
+  // set on an operand
+  MCSymbolRefExpr::VariantKind RelocationType;
+  switch (MO.getTargetFlags()) {
+  default:
+    RelocationType = MCSymbolRefExpr::VK_None;
+    break;
+  case HexagonII::MO_PCREL:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_PCREL;
+    break;
+  case HexagonII::MO_GOT:
+    RelocationType = MCSymbolRefExpr::VK_GOT;
+    break;
+  case HexagonII::MO_LO16:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_LO16;
+    break;
+  case HexagonII::MO_HI16:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_HI16;
+    break;
+  case HexagonII::MO_GPREL:
+    RelocationType = MCSymbolRefExpr::VK_Hexagon_GPREL;
+    break;
+  }
+
+  ME = MCSymbolRefExpr::create(Symbol, RelocationType, MC);
 
   if (!MO.isJTI() && MO.getOffset())
     ME = MCBinaryExpr::createAdd(ME, MCConstantExpr::create(MO.getOffset(), MC),
                                  MC);
 
-  return (MCOperand::createExpr(ME));
+  return MCOperand::createExpr(ME);
 }
 
 // Create an MCInst from a MachineInstr
-void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
-                            HexagonAsmPrinter& AP) {
-  if(MI->getOpcode() == Hexagon::ENDLOOP0){
+void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
+                            MCInst &MCB, HexagonAsmPrinter &AP) {
+  if (MI->getOpcode() == Hexagon::ENDLOOP0) {
     HexagonMCInstrInfo::setInnerLoop(MCB);
     return;
   }
-  if(MI->getOpcode() == Hexagon::ENDLOOP1){
+  if (MI->getOpcode() == Hexagon::ENDLOOP1) {
     HexagonMCInstrInfo::setOuterLoop(MCB);
     return;
   }
-  MCInst* MCI = new (AP.OutContext) MCInst;
+  MCInst *MCI = new (AP.OutContext) MCInst;
   MCI->setOpcode(MI->getOpcode());
   assert(MCI->getOpcode() == static_cast<unsigned>(MI->getOpcode()) &&
          "MCI opcode should have been set on construction");
+  bool MustExtend = false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i < e; i++) {
     const MachineOperand &MO = MI->getOperand(i);
     MCOperand MCO;
+    if (MO.getTargetFlags() & HexagonII::HMOTF_ConstExtended)
+      MustExtend = true;
 
     switch (MO.getType()) {
     default:
@@ -73,11 +105,14 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
       APFloat Val = MO.getFPImm()->getValueAPF();
       // FP immediates are used only when setting GPRs, so they may be dealt
       // with like regular immediates from this point on.
-      MCO = MCOperand::createImm(*Val.bitcastToAPInt().getRawData());
+      MCO = MCOperand::createExpr(
+        MCConstantExpr::create(*Val.bitcastToAPInt().getRawData(),
+                               AP.OutContext));
       break;
     }
     case MachineOperand::MO_Immediate:
-      MCO = MCOperand::createImm(MO.getImm());
+      MCO = MCOperand::createExpr(
+        MCConstantExpr::create(MO.getImm(), AP.OutContext));
       break;
     case MachineOperand::MO_MachineBasicBlock:
       MCO = MCOperand::createExpr
@@ -104,5 +139,8 @@ void llvm::HexagonLowerToMC(MachineInstr const* MI, MCInst& MCB,
 
     MCI->addOperand(MCO);
   }
+  AP.HexagonProcessInstruction(*MCI, *MI);
+  HexagonMCInstrInfo::extendIfNeeded(AP.OutContext, MCII, MCB, *MCI,
+                                     MustExtend);
   MCB.addOperand(MCOperand::createInst(MCI));
 }
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 35f732cd6207..7a52d6874c33 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -179,7 +179,11 @@ void VLIWMachineScheduler::schedule() {
   initQueues(TopRoots, BotRoots);
 
   bool IsTopNode = false;
-  while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
+  while (true) {
+    DEBUG(dbgs() << "** VLIWMachineScheduler::schedule picking next node\n");
+    SUnit *SU = SchedImpl->pickNode(IsTopNode);
+    if (!SU) break;
+
     if (!checkSchedLimit())
       break;
 
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 707bfdbb6ab6..20c4ab112b5f 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -92,6 +92,7 @@ namespace {
     /// \brief A handle to the branch probability pass.
     const MachineBranchProbabilityInfo *MBPI;
 
+    bool isNewValueJumpCandidate(const MachineInstr *MI) const;
   };
 
 } // end of anonymous namespace
@@ -280,9 +281,9 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   return true;
 }
 
-// Given a compare operator, return a matching New Value Jump
-// compare operator. Make sure that MI here is included in
-// HexagonInstrInfo.cpp::isNewValueJumpCandidate
+
+// Given a compare operator, return a matching New Value Jump compare operator.
+// Make sure that MI here is included in isNewValueJumpCandidate.
 static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
                                       bool secondRegNewified,
                                       MachineBasicBlock *jmpTarget,
@@ -341,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
       return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t
                    : Hexagon::J4_cmpgtui_t_jumpnv_nt;
 
+    case Hexagon::C4_cmpneq:
+      return taken ? Hexagon::J4_cmpeq_f_jumpnv_t
+                   : Hexagon::J4_cmpeq_f_jumpnv_nt;
+
+    case Hexagon::C4_cmplte:
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmplt_f_jumpnv_t
+                     : Hexagon::J4_cmplt_f_jumpnv_nt;
+      return taken ? Hexagon::J4_cmpgt_f_jumpnv_t
+                   : Hexagon::J4_cmpgt_f_jumpnv_nt;
+
+    case Hexagon::C4_cmplteu:
+      if (secondRegNewified)
+        return taken ? Hexagon::J4_cmpltu_f_jumpnv_t
+                     : Hexagon::J4_cmpltu_f_jumpnv_nt;
+      return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t
+                   : Hexagon::J4_cmpgtu_f_jumpnv_nt;
+
     default:
        llvm_unreachable("Could not find matching New Value Jump instruction.");
   }
@@ -348,6 +367,26 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg,
   return 0;
 }
 
+bool HexagonNewValueJump::isNewValueJumpCandidate(const MachineInstr *MI)
+      const {
+  switch (MI->getOpcode()) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmplteu:
+      return true;
+
+    default:
+      return false;
+  }
+}
+
+
 bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "********** Hexagon New Value Jump **********\n"
@@ -372,7 +411,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
   // Loop through all the bb's of the function
   for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
         MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
 
     DEBUG(dbgs() << "** dumping bb ** "
                  << MBB->getNumber() << "\n");
@@ -468,7 +507,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
           MI->getOperand(0).getReg() == predReg) {
 
         // Not all compares can be new value compare. Arch Spec: 7.6.1.1
-        if (QII->isNewValueJumpCandidate(MI)) {
+        if (isNewValueJumpCandidate(MI)) {
 
           assert((MI->getDesc().isCompare()) &&
               "Only compare instruction can be collapsed into New Value Jump");
@@ -591,8 +630,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
           DebugLoc dl = MI->getDebugLoc();
           MachineInstr *NewMI;
 
-           assert((QII->isNewValueJumpCandidate(cmpInstr)) &&
-                      "This compare is not a New Value Jump candidate.");
+          assert((isNewValueJumpCandidate(cmpInstr)) &&
+                 "This compare is not a New Value Jump candidate.");
           unsigned opc = getNewValueJumpOpcode(cmpInstr, cmpOp2,
                                                isSecondOpNewified,
                                                jmpTarget, MBPI);
diff --git a/lib/Target/Hexagon/HexagonOperands.td b/lib/Target/Hexagon/HexagonOperands.td
index 2bece8f42f53..fbd29cd4d6d1 100644
--- a/lib/Target/Hexagon/HexagonOperands.td
+++ b/lib/Target/Hexagon/HexagonOperands.td
@@ -1,4 +1,4 @@
-//===- HexagonOperands.td - Hexagon immediate processing -*- tablegen -*-===//
+//===- HexagonImmediates.td - Hexagon immediate processing -*- tablegen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,59 +7,114 @@
 //
 //===----------------------------------------------------------------------===//
 
+def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
+def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
+def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
+def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
 def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
 def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
 def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
 def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
-
+def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
+def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
+def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
+def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
+def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
+def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
+def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
+def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
+def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
+def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
+def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
+def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
+def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
+def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
+def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
+def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
+def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
+def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
+def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
+def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
+def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
+def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
+def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
+def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
 // Immediate operands.
 
-let PrintMethod = "printImmOperand" in {
-  def s32Imm : Operand<i32>;
-  def s8Imm : Operand<i32>;
-  def s8Imm64 : Operand<i64>;
-  def s6Imm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE",
+    DecoderMethod = "unsignedImmDecoder" in {
+  def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
+                              let DecoderMethod = "s32ImmDecoder"; }
+  def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
+                             let DecoderMethod = "s8ImmDecoder"; }
+  def s8Imm64 : Operand<i64>  { let ParserMatchClass = s8Imm64Operand;
+                                let DecoderMethod = "s8ImmDecoder"; }
+  def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+                             let DecoderMethod = "s6_0ImmDecoder"; }
   def s6_3Imm : Operand<i32>;
-  def s4Imm : Operand<i32>;
-  def s4_0Imm : Operand<i32> { let DecoderMethod = "s4_0ImmDecoder"; }
-  def s4_1Imm : Operand<i32> { let DecoderMethod = "s4_1ImmDecoder"; }
-  def s4_2Imm : Operand<i32> { let DecoderMethod = "s4_2ImmDecoder"; }
-  def s4_3Imm : Operand<i32> { let DecoderMethod = "s4_3ImmDecoder"; }
-  def u64Imm : Operand<i64>;
-  def u32Imm : Operand<i32>;
-  def u26_6Imm : Operand<i32>;
-  def u16Imm : Operand<i32>;
-  def u16_0Imm : Operand<i32>;
-  def u16_1Imm : Operand<i32>;
-  def u16_2Imm : Operand<i32>;
-  def u16_3Imm : Operand<i32>;
-  def u11_3Imm : Operand<i32>;
-  def u10Imm : Operand<i32>;
-  def u9Imm : Operand<i32>;
-  def u8Imm : Operand<i32>;
-  def u7Imm : Operand<i32>;
-  def u6Imm : Operand<i32>;
-  def u6_0Imm : Operand<i32>;
-  def u6_1Imm : Operand<i32>;
-  def u6_2Imm : Operand<i32>;
-  def u6_3Imm : Operand<i32>;
-  def u5Imm : Operand<i32>;
+  def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
+                             let DecoderMethod = "s4_0ImmDecoder"; }
+  def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
+                               let DecoderMethod = "s4_0ImmDecoder"; }
+  def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
+                               let DecoderMethod = "s4_1ImmDecoder"; }
+  def s4_2Imm : Operand<i32> { let ParserMatchClass = s4_2ImmOperand;
+                               let DecoderMethod = "s4_2ImmDecoder"; }
+  def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
+                               let DecoderMethod = "s4_3ImmDecoder"; }
+  def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
+  def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+  def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
+  def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
+  def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
+  def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
+  def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
+  def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
+  def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
+  def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
+  def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
+  def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
+  def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
+  def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+  def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
+  def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
+  def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
+  def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
+  def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
+  def u5_0Imm : Operand<i32>;
+  def u5_1Imm : Operand<i32>;
   def u5_2Imm : Operand<i32>;
   def u5_3Imm : Operand<i32>;
-  def u4Imm : Operand<i32>;
+  def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
   def u4_0Imm : Operand<i32>;
+  def u4_1Imm : Operand<i32>;
   def u4_2Imm : Operand<i32>;
-  def u3Imm : Operand<i32>;
+  def u4_3Imm : Operand<i32>;
+  def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
   def u3_0Imm : Operand<i32>;
   def u3_1Imm : Operand<i32>;
-  def u2Imm : Operand<i32>;
-  def u1Imm : Operand<i32>;
-  def n8Imm : Operand<i32>;
-  def m6Imm : Operand<i32>;
+  def u3_2Imm : Operand<i32>;
+  def u3_3Imm : Operand<i32>;
+  def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
+  def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
+  def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
 }
 
-let PrintMethod = "printNOneImmOperand" in
-def nOneImm : Operand<i32>;
+let OperandType = "OPERAND_IMMEDIATE" in {
+  def s4_6Imm : Operand<i32> { let ParserMatchClass = s4_6ImmOperand;
+                               let PrintMethod = "prints4_6ImmOperand";
+                               let DecoderMethod = "s4_6ImmDecoder";}
+  def s4_7Imm : Operand<i32> { let PrintMethod = "prints4_7ImmOperand";
+                               let DecoderMethod = "s4_6ImmDecoder";}
+  def s3_6Imm : Operand<i32> { let ParserMatchClass = s3_6ImmOperand;
+                               let PrintMethod = "prints3_6ImmOperand";
+                               let DecoderMethod = "s3_6ImmDecoder";}
+  def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
+                               let DecoderMethod = "s3_6ImmDecoder";}
+}
 
 //
 // Immediate predicates
@@ -81,32 +136,12 @@ def s31_1ImmPred  : PatLeaf<(i32 imm), [{
 
 def s30_2ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<31,1>(v);
+  return isShiftedInt<30,2>(v);
 }]>;
 
 def s29_3ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<31,1>(v);
-}]>;
-
-def s22_10ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<22,10>(v);
-}]>;
-
-def s8_24ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<8,24>(v);
-}]>;
-
-def s16_16ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<16,16>(v);
-}]>;
-
-def s26_6ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<26,6>(v);
+  return isShiftedInt<29,3>(v);
 }]>;
 
 def s16ImmPred  : PatLeaf<(i32 imm), [{
@@ -114,16 +149,6 @@ def s16ImmPred  : PatLeaf<(i32 imm), [{
   return isInt<16>(v);
 }]>;
 
-def s13ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<13>(v);
-}]>;
-
-def s12ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<12>(v);
-}]>;
-
 def s11_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<11>(v);
@@ -149,16 +174,6 @@ def s10ImmPred  : PatLeaf<(i32 imm), [{
   return isInt<10>(v);
 }]>;
 
-def s9ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<9>(v);
-}]>;
-
-def m9ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<9>(v) && (v != -256);
-}]>;
-
 def s8ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<8>(v);
@@ -194,7 +209,6 @@ def s4_3ImmPred  : PatLeaf<(i32 imm), [{
   return isShiftedInt<4,3>(v);
 }]>;
 
-
 def u64ImmPred  : PatLeaf<(i64 imm), [{
   // Adding "N ||" to suppress gcc unused warning.
   return (N || true);
@@ -230,26 +244,31 @@ def u26_6ImmPred  : PatLeaf<(i32 imm), [{
   return isShiftedUInt<26,6>(v);
 }]>;
 
-def u16ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<16>(v);
-}]>;
-
-def u16_s8ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<16,8>(v);
-}]>;
-
 def u16_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<16>(v);
 }]>;
 
+def u16_1ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,1>(v);
+}]>;
+
+def u16_2ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedUInt<16,2>(v);
+}]>;
+
 def u11_3ImmPred : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<11,3>(v);
 }]>;
 
+def u10ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<10>(v);
+}]>;
+
 def u9ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<9>(v);
@@ -321,6 +340,11 @@ def u1ImmPred  : PatLeaf<(i1 imm), [{
   return isUInt<1>(v);
 }]>;
 
+def u1ImmPred32  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isUInt<1>(v);
+}]>;
+
 def m5BImmPred  : PatLeaf<(i32 imm), [{
   // m5BImmPred predicate - True if the (char) number is in range -1 .. -31
   // and will fit in a 5 bit field when made positive, for use in memops.
@@ -379,7 +403,7 @@ def Clr5ImmPred : PatLeaf<(i32 imm), [{
 }]>;
 
 def SetClr5ImmPred : PatLeaf<(i32 imm), [{
-  // SetClr5ImmPred predicate - True if the immediate is in range 0..31.
+  // True if the immediate is in range 0..31.
   int32_t v = (int32_t)N->getSExtValue();
   return (v >= 0 && v <= 31);
 }]>;
@@ -404,14 +428,13 @@ def Clr4ImmPred : PatLeaf<(i32 imm), [{
 }]>;
 
 def SetClr4ImmPred : PatLeaf<(i32 imm), [{
-  // SetClr4ImmPred predicate - True if the immediate is in the range 0..15.
+  // True if the immediate is in the range 0..15.
   int16_t v = (int16_t)N->getSExtValue();
   return (v >= 0 && v <= 15);
 }]>;
 
 def Set3ImmPred : PatLeaf<(i32 imm), [{
-  // Set3ImmPred predicate - True if the number is in the series of values:
-  // [ 2^0, 2^1, ... 2^7 ].
+  // True if the number is in the series of values: [ 2^0, 2^1, ... 2^7 ].
   // For use in setbit immediate.
   uint8_t v = (int8_t)N->getSExtValue();
   // Constrain to 8 bits, and then check for single bit.
@@ -419,9 +442,7 @@ def Set3ImmPred : PatLeaf<(i32 imm), [{
 }]>;
 
 def Clr3ImmPred : PatLeaf<(i32 imm), [{
-  // Clr3ImmPred predicate - True if the number is in the series of
-  // bit negated values:
-  // [ 2^0, 2^1, ... 2^7 ].
+  // True if the number is in the series of bit negated values: [ 2^0, 2^1, ... 2^7 ].
   // For use in setbit and clrbit immediate.
   uint8_t v = ~ (int8_t)N->getSExtValue();
   // Constrain to 8 bits, and then check for single bit.
@@ -429,76 +450,109 @@ def Clr3ImmPred : PatLeaf<(i32 imm), [{
 }]>;
 
 def SetClr3ImmPred : PatLeaf<(i32 imm), [{
-  // SetClr3ImmPred predicate - True if the immediate is in the range  0..7.
+  // True if the immediate is in the range  0..7.
   int8_t v = (int8_t)N->getSExtValue();
   return (v >= 0 && v <= 7);
 }]>;
 
 
 // Extendable immediate operands.
+def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
+def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
+def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
+def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
+def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
+def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
+def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
+def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
+def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
+def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
+def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
+def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
+def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
+def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
+def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
+def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
+def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
+def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
+def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
+def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
 
-let PrintMethod = "printExtOperand" in {
-  def f32Ext : Operand<f32>;
-  def s16Ext : Operand<i32> { let DecoderMethod = "s16ImmDecoder"; }
-  def s12Ext : Operand<i32> { let DecoderMethod = "s12ImmDecoder"; }
-  def s11_0Ext : Operand<i32> { let DecoderMethod = "s11_0ImmDecoder"; }
-  def s11_1Ext : Operand<i32> { let DecoderMethod = "s11_1ImmDecoder"; }
-  def s11_2Ext : Operand<i32> { let DecoderMethod = "s11_2ImmDecoder"; }
-  def s11_3Ext : Operand<i32> { let DecoderMethod = "s11_3ImmDecoder"; }
-  def s10Ext : Operand<i32> { let DecoderMethod = "s10ImmDecoder"; }
-  def s9Ext : Operand<i32> { let DecoderMethod = "s90ImmDecoder"; }
-  def s8Ext : Operand<i32> { let DecoderMethod = "s8ImmDecoder"; }
-  def s7Ext : Operand<i32>;
-  def s6Ext : Operand<i32> { let DecoderMethod = "s6_0ImmDecoder"; }
-  def u6Ext : Operand<i32>;
-  def u7Ext : Operand<i32>;
-  def u8Ext : Operand<i32>;
-  def u9Ext : Operand<i32>;
-  def u10Ext : Operand<i32>;
-  def u6_0Ext : Operand<i32>;
-  def u6_1Ext : Operand<i32>;
-  def u6_2Ext : Operand<i32>;
-  def u6_3Ext : Operand<i32>;
+
+
+let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
+    DecoderMethod = "unsignedImmDecoder" in {
+  def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
+  def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
+                              let DecoderMethod = "s16ImmDecoder"; }
+  def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
+                              let DecoderMethod = "s12ImmDecoder"; }
+  def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
+                              let DecoderMethod = "s11_0ImmDecoder"; }
+  def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
+                              let DecoderMethod = "s11_1ImmDecoder"; }
+  def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
+                              let DecoderMethod = "s11_2ImmDecoder"; }
+  def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
+                              let DecoderMethod = "s11_3ImmDecoder"; }
+  def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
+                              let DecoderMethod = "s10ImmDecoder"; }
+  def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
+                              let DecoderMethod = "s90ImmDecoder"; }
+  def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
+                              let DecoderMethod = "s8ImmDecoder"; }
+  def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
+  def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
+                              let DecoderMethod = "s6_0ImmDecoder"; }
+  def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
+  def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
+  def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
+  def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
+  def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+  def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
+  def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
+  def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
+  def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
+  def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
 }
 
-def s10ExtPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (isInt<10>(v))
-    return true;
 
-  // Return true if extending this immediate is profitable and the value
-  // can fit in a 32-bit signed field.
-  return isConstExtProfitable(Node) && isInt<32>(v);
+def s4_7ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field and
+    // is 128-byte aligned.
+    return isShiftedInt<4,7>(v);
+  return false;
 }]>;
 
-def s8ExtPred  : PatLeaf<(i32 imm), [{
+def s3_7ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  if (isInt<8>(v))
-    return true;
-
-  // Return true if extending this immediate is profitable and the value
-  // can fit in a 32-bit signed field.
-  return isConstExtProfitable(Node) && isInt<32>(v);
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field and
+    // is 128-byte aligned.
+    return isShiftedInt<3,7>(v);
+  return false;
 }]>;
 
-def u8ExtPred  : PatLeaf<(i32 imm), [{
+def s4_6ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  if (isUInt<8>(v))
-    return true;
-
-  // Return true if extending this immediate is profitable and the value
-  // can fit in a 32-bit unsigned field.
-  return isConstExtProfitable(Node) && isUInt<32>(v);
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 10-bit sign extended field and
+    // is 64-byte aligned.
+    return isShiftedInt<4,6>(v);
+  return false;
 }]>;
 
-def u9ExtPred  : PatLeaf<(i32 imm), [{
+def s3_6ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
-  if (isUInt<9>(v))
-    return true;
-
-  // Return true if extending this immediate is profitable and the value
-  // can fit in a 32-bit unsigned field.
-  return isConstExtProfitable(Node) && isUInt<32>(v);
+  if (HST->hasV60TOps())
+    // Return true if the immediate can fit in a 9-bit sign extended field and
+    // is 64-byte aligned.
+    return isShiftedInt<3,6>(v);
+  return false;
 }]>;
 
 
@@ -523,21 +577,21 @@ let PrintMethod = "printGlobalOperand" in {
 let PrintMethod = "printJumpTable" in
 def jumptablebase : Operand<i32>;
 
-def brtarget : Operand<OtherVT>;
-def brtargetExt : Operand<OtherVT> {
-  let PrintMethod = "printExtBrtarget";
+def brtarget : Operand<OtherVT> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
+}
+def brtargetExt : Operand<OtherVT> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
+}
+def calltarget : Operand<i32> {
+  let DecoderMethod = "brtargetDecoder";
+  let PrintMethod = "printBrtarget";
 }
-def calltarget : Operand<i32>;
 
 def bblabel : Operand<i32>;
-def bbl   : SDNode<"ISD::BasicBlock", SDTPtrLeaf   , [], "BasicBlockSDNode">;
-
-def symbolHi32 : Operand<i32> {
-  let PrintMethod = "printSymbolHi";
-}
-def symbolLo32 : Operand<i32> {
-  let PrintMethod = "printSymbolLo";
-}
+def bbl     : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
 
 // Return true if for a 32 to 64-bit sign-extended load.
 def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
diff --git a/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
new file mode 100644
index 000000000000..1723771550c9
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -0,0 +1,150 @@
+//===- HexagonOptimizeSZextends.cpp - Remove unnecessary argument extends -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass that removes sign extends for function parameters. These parameters
+// are already sign extended by the caller per Hexagon's ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+#include "Hexagon.h"
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonOptimizeSZextends();
+  void initializeHexagonOptimizeSZextendsPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonOptimizeSZextends : public FunctionPass {
+  public:
+    static char ID;
+    HexagonOptimizeSZextends() : FunctionPass(ID) {
+      initializeHexagonOptimizeSZextendsPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnFunction(Function &F) override;
+
+    const char *getPassName() const override {
+      return "Remove sign extends";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineFunctionAnalysis>();
+      AU.addPreserved<MachineFunctionAnalysis>();
+      AU.addPreserved<StackProtector>();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool intrinsicAlreadySextended(Intrinsic::ID IntID);
+  };
+}
+
+char HexagonOptimizeSZextends::ID = 0;
+
+INITIALIZE_PASS(HexagonOptimizeSZextends, "reargs",
+                "Remove Sign and Zero Extends for Args", false, false)
+
+bool HexagonOptimizeSZextends::intrinsicAlreadySextended(Intrinsic::ID IntID) {
+  switch(IntID) {
+    case llvm::Intrinsic::hexagon_A2_addh_l16_sat_ll:
+      return true;
+    default:
+      break;
+  }
+  return false;
+}
+
+bool HexagonOptimizeSZextends::runOnFunction(Function &F) {
+  unsigned Idx = 1;
+  // Try to optimize sign extends in formal parameters. It's relying on
+  // callee already sign extending the values. I'm not sure if our ABI
+  // requires callee to sign extend though.
+  for (auto &Arg : F.args()) {
+    if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
+      if (!isa<PointerType>(Arg.getType())) {
+        for (auto UI = Arg.use_begin(); UI != Arg.use_end();) {
+          if (isa<SExtInst>(*UI)) {
+            Instruction* Use = cast<Instruction>(*UI);
+            SExtInst* SI = new SExtInst(&Arg, Use->getType());
+            assert (EVT::getEVT(SI->getType()) ==
+                    (EVT::getEVT(Use->getType())));
+            ++UI;
+            Use->replaceAllUsesWith(SI);
+            Instruction* First = &F.getEntryBlock().front();
+            SI->insertBefore(First);
+            Use->eraseFromParent();
+          } else {
+            ++UI;
+          }
+        }
+      }
+    }
+    ++Idx;
+  }
+
+  // Try to remove redundant sext operations on Hexagon. The hardware
+  // already sign extends many 16 bit intrinsic operations to 32 bits.
+  // For example:
+  // %34 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %x, i32 %y)
+  // %sext233 = shl i32 %34, 16
+  // %conv52 = ashr exact i32 %sext233, 16
+  for (auto &B : F) {
+    for (auto &I : B) {
+      // Look for arithmetic shift right by 16.
+      BinaryOperator *Ashr = dyn_cast<BinaryOperator>(&I);
+      if (!(Ashr && Ashr->getOpcode() == Instruction::AShr))
+        continue;
+      Value *AshrOp1 = Ashr->getOperand(1);
+      ConstantInt *C = dyn_cast<ConstantInt>(AshrOp1);
+      // Right shifted by 16.
+      if (!(C && C->getSExtValue() == 16))
+        continue;
+
+      // The first operand of Ashr comes from logical shift left.
+      Instruction *Shl = dyn_cast<Instruction>(Ashr->getOperand(0));
+      if (!(Shl && Shl->getOpcode() == Instruction::Shl))
+        continue;
+      Value *Intr = Shl->getOperand(0);
+      Value *ShlOp1 = Shl->getOperand(1);
+      C = dyn_cast<ConstantInt>(ShlOp1);
+      // Left shifted by 16.
+      if (!(C && C->getSExtValue() == 16))
+        continue;
+
+      // The first operand of Shl comes from an intrinsic.
+      if (IntrinsicInst *I = dyn_cast<IntrinsicInst>(Intr)) {
+        if (!intrinsicAlreadySextended(I->getIntrinsicID()))
+          continue;
+        // All is well. Replace all uses of AShr with I.
+        for (auto UI = Ashr->user_begin(), UE = Ashr->user_end();
+             UI != UE; ++UI) {
+          const Use &TheUse = UI.getUse();
+          if (Instruction *J = dyn_cast<Instruction>(TheUse.getUser())) {
+            J->replaceUsesOfWith(Ashr, I);
+          }
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+
+FunctionPass *llvm::createHexagonOptimizeSZextends() {
+  return new HexagonOptimizeSZextends();
+}
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 93dcbe233b25..e68ff85b1da6 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -124,7 +124,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
   // Loop over all of the basic blocks.
   for (MachineFunction::iterator MBBb = MF.begin(), MBBe = MF.end();
        MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
     PeepholeMap.clear();
     PeepholeDoubleRegsMap.clear();
 
@@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
         unsigned DstReg = Dst.getReg();
         unsigned SrcReg = Src1.getReg();
         PeepholeDoubleRegsMap[DstReg] =
-          std::make_pair(*&SrcReg, 1/*Hexagon::subreg_hireg*/);
+          std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
       }
 
       // Look for P=NOT(P).
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index f6bb4a045438..61c0589fb5bf 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -66,6 +66,8 @@ HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF) const {
   switch (HST.getHexagonArchVersion()) {
   case HexagonSubtarget::V4:
   case HexagonSubtarget::V5:
+  case HexagonSubtarget::V55:
+  case HexagonSubtarget::V60:
     return CallerSavedRegsV4;
   }
   llvm_unreachable(
@@ -84,6 +86,8 @@ HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   switch (MF->getSubtarget<HexagonSubtarget>().getHexagonArchVersion()) {
   case HexagonSubtarget::V4:
   case HexagonSubtarget::V5:
+  case HexagonSubtarget::V55:
+  case HexagonSubtarget::V60:
     return CalleeSavedRegsV3;
   }
   llvm_unreachable("Callee saved registers requested for unknown architecture "
@@ -98,7 +102,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
   Reserved.set(Hexagon::R29);
   Reserved.set(Hexagon::R30);
   Reserved.set(Hexagon::R31);
-  Reserved.set(Hexagon::D14);
+  Reserved.set(Hexagon::PC);
   Reserved.set(Hexagon::D15);
   Reserved.set(Hexagon::LC0);
   Reserved.set(Hexagon::LC1);
@@ -116,62 +120,21 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   assert(SPAdj == 0 && "Unexpected");
 
   MachineInstr &MI = *II;
-
   MachineBasicBlock &MB = *MI.getParent();
   MachineFunction &MF = *MB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
-  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HII = *HST.getInstrInfo();
   auto &HFI = *HST.getFrameLowering();
 
+  unsigned BP = 0;
   int FI = MI.getOperand(FIOp).getIndex();
-  int Offset = MFI.getObjectOffset(FI) + MI.getOperand(FIOp+1).getImm();
-  bool HasAlloca = MFI.hasVarSizedObjects();
-  bool HasAlign = needsStackRealignment(MF);
-
-  // XXX: Fixed objects cannot be accessed through SP if there are aligned
-  // objects in the local frame, or if there are dynamically allocated objects.
-  // In such cases, there has to be FP available.
-  if (!HFI.hasFP(MF)) {
-    assert(!HasAlloca && !HasAlign && "This function must have frame pointer");
-    // We will not reserve space on the stack for the lr and fp registers.
-    Offset -= 8;
-  }
-
-  unsigned SP = getStackRegister(), FP = getFrameRegister();
-  unsigned AP = 0;
-  if (MachineInstr *AI = HFI.getAlignaInstr(MF))
-    AP = AI->getOperand(0).getReg();
-  unsigned FrameSize = MFI.getStackSize();
-
-  // Special handling of dbg_value instructions and INLINEASM.
-  if (MI.isDebugValue() || MI.isInlineAsm()) {
-    MI.getOperand(FIOp).ChangeToRegister(SP, false /*isDef*/);
-    MI.getOperand(FIOp+1).ChangeToImmediate(Offset+FrameSize);
-    return;
-  }
-
-  bool UseFP = false, UseAP = false;  // Default: use SP.
-  if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
-    UseFP = HasAlloca || HasAlign;
-  } else {
-    if (HasAlloca) {
-      if (HasAlign)
-        UseAP = true;
-      else
-        UseFP = true;
-    }
-  }
+  // Select the base pointer (BP) and calculate the actual offset from BP
+  // to the beginning of the object at index FI.
+  int Offset = HFI.getFrameIndexReference(MF, FI, BP);
+  // Add the offset from the instruction.
+  int RealOffset = Offset + MI.getOperand(FIOp+1).getImm();
 
   unsigned Opc = MI.getOpcode();
-  bool ValidSP = HII.isValidOffset(Opc, FrameSize+Offset);
-  bool ValidFP = HII.isValidOffset(Opc, Offset);
-
-  // Calculate the actual offset in the instruction.
-  int64_t RealOffset = Offset;
-  if (!UseFP && !UseAP)
-    RealOffset = FrameSize+Offset;
-
   switch (Opc) {
     case Hexagon::TFR_FIA:
       MI.setDesc(HII.get(Hexagon::A2_addi));
@@ -184,20 +147,7 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       break;
   }
 
-  unsigned BP = 0;
-  bool Valid = false;
-  if (UseFP) {
-    BP = FP;
-    Valid = ValidFP;
-  } else if (UseAP) {
-    BP = AP;
-    Valid = ValidFP;
-  } else {
-    BP = SP;
-    Valid = ValidSP;
-  }
-
-  if (Valid) {
+  if (HII.isValidOffset(Opc, RealOffset)) {
     MI.getOperand(FIOp).ChangeToRegister(BP, false);
     MI.getOperand(FIOp+1).ChangeToImmediate(RealOffset);
     return;
@@ -223,8 +173,8 @@ unsigned HexagonRegisterInfo::getFrameRegister(const MachineFunction
                                                &MF) const {
   const HexagonFrameLowering *TFI = getFrameLowering(MF);
   if (TFI->hasFP(MF))
-    return Hexagon::R30;
-  return Hexagon::R29;
+    return getFrameRegister();
+  return getStackRegister();
 }
 
 
@@ -238,17 +188,9 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
 }
 
 
-bool
-HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
-  const HexagonFrameLowering *TFI = getFrameLowering(MF);
-  return TFI->hasFP(MF);
-}
-
-
-bool
-HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  return MFI->getMaxAlignment() > 8;
+bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
+      const {
+  return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
 }
 
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 7edefee93993..db7e0f27815d 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -63,8 +63,6 @@ public:
     return true;
   }
 
-  bool needsStackRealignment(const MachineFunction &MF) const override;
-
   /// Returns true if the frame pointer is valid.
   bool useFPForScavengingIndex(const MachineFunction &MF) const override;
 
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.td b/lib/Target/Hexagon/HexagonRegisterInfo.td
index edf1c251ac77..81629dc6d47f 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -53,6 +53,12 @@ let Namespace = "Hexagon" in {
     let Num = num;
   }
 
+
+  // Rq - vector predicate registers
+  class Rq<bits<3> num, string n> : Register<n, []> {
+    let HWEncoding{2-0} = num;
+  }
+
   // Rc - control registers
   class Rc<bits<5> num, string n,
            list<string> alt = [], list<Register> alias = []> : 
@@ -131,20 +137,21 @@ let Namespace = "Hexagon" in {
   def LC1  : Rc<3,  "lc1",       ["c3"]>,   DwarfRegNum<[70]>;
   def P3_0 : Rc<4,  "p3:0",      ["c4"], [P0, P1, P2, P3]>,
                                             DwarfRegNum<[71]>;
-  def C6   : Rc<6,  "c6",        [], [M0]>, DwarfRegNum<[72]>;
-  def C7   : Rc<7,  "c7",        [], [M1]>, DwarfRegNum<[73]>;
+  def C5   : Rc<5,  "c5",        ["c5"]>,   DwarfRegNum<[72]>; // future use
+  def C6   : Rc<6,  "c6",        [], [M0]>, DwarfRegNum<[73]>;
+  def C7   : Rc<7,  "c7",        [], [M1]>, DwarfRegNum<[74]>;
 
-  def USR  : Rc<8,  "usr",       ["c8"]>,   DwarfRegNum<[74]> {
+  def USR  : Rc<8,  "usr",       ["c8"]>,   DwarfRegNum<[75]> {
     let SubRegIndices = [subreg_overflow];
     let SubRegs = [USR_OVF];
   }
-  def PC   : Rc<9,  "pc">,                  DwarfRegNum<[75]>;
-  def UGP  : Rc<10, "ugp",       ["c10"]>,  DwarfRegNum<[76]>;
-  def GP   : Rc<11, "gp">,                  DwarfRegNum<[77]>;
-  def CS0  : Rc<12, "cs0",       ["c12"]>,  DwarfRegNum<[78]>;
-  def CS1  : Rc<13, "cs1",       ["c13"]>,  DwarfRegNum<[79]>;
-  def UPCL : Rc<14, "upcyclelo", ["c14"]>,  DwarfRegNum<[80]>;
-  def UPCH : Rc<15, "upcyclehi", ["c15"]>,  DwarfRegNum<[81]>;
+  def PC   : Rc<9,  "pc">,                  DwarfRegNum<[76]>;
+  def UGP  : Rc<10, "ugp",       ["c10"]>,  DwarfRegNum<[77]>;
+  def GP   : Rc<11, "gp">,                  DwarfRegNum<[78]>;
+  def CS0  : Rc<12, "cs0",       ["c12"]>,  DwarfRegNum<[79]>;
+  def CS1  : Rc<13, "cs1",       ["c13"]>,  DwarfRegNum<[80]>;
+  def UPCL : Rc<14, "upcyclelo", ["c14"]>,  DwarfRegNum<[81]>;
+  def UPCH : Rc<15, "upcyclehi", ["c15"]>,  DwarfRegNum<[82]>;
 }
 
   // Control registers pairs.
@@ -158,6 +165,36 @@ let Namespace = "Hexagon" in {
     def UPC    : Rcc<14, "c15:14", [UPCL, UPCH]>,            DwarfRegNum<[80]>;
   }
 
+  foreach i = 0-31 in {
+    def V#i  : Ri<i, "v"#i>,  DwarfRegNum<[!add(i, 99)]>;
+  }
+
+  // Aliases of the V* registers used to hold double vec values.
+  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  def W0  : Rd< 0,  "v1:0",  [V0,  V1]>,  DwarfRegNum<[99]>;
+  def W1  : Rd< 2,  "v3:2",  [V2,  V3]>,  DwarfRegNum<[101]>;
+  def W2  : Rd< 4,  "v5:4",  [V4,  V5]>,  DwarfRegNum<[103]>;
+  def W3  : Rd< 6,  "v7:6",  [V6,  V7]>,  DwarfRegNum<[105]>;
+  def W4  : Rd< 8,  "v9:8",  [V8,  V9]>,  DwarfRegNum<[107]>;
+  def W5  : Rd<10, "v11:10", [V10, V11]>, DwarfRegNum<[109]>;
+  def W6  : Rd<12, "v13:12", [V12, V13]>, DwarfRegNum<[111]>;
+  def W7  : Rd<14, "v15:14", [V14, V15]>, DwarfRegNum<[113]>;
+  def W8  : Rd<16, "v17:16", [V16, V17]>, DwarfRegNum<[115]>;
+  def W9  : Rd<18, "v19:18", [V18, V19]>, DwarfRegNum<[117]>;
+  def W10 : Rd<20, "v21:20", [V20, V21]>, DwarfRegNum<[119]>;
+  def W11 : Rd<22, "v23:22", [V22, V23]>, DwarfRegNum<[121]>;
+  def W12 : Rd<24, "v25:24", [V24, V25]>, DwarfRegNum<[123]>;
+  def W13 : Rd<26, "v27:26", [V26, V27]>, DwarfRegNum<[125]>;
+  def W14 : Rd<28, "v29:28", [V28, V29]>, DwarfRegNum<[127]>;
+  def W15 : Rd<30, "v31:30", [V30, V31]>, DwarfRegNum<[129]>;
+  }
+
+  // Vector Predicate registers.
+  def Q0 : Rq<0, "q0">, DwarfRegNum<[131]>;
+  def Q1 : Rq<1, "q1">, DwarfRegNum<[132]>;
+  def Q2 : Rq<2, "q2">, DwarfRegNum<[133]>;
+  def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
+
 // Register classes.
 //
 // FIXME: the register order should be defined in terms of the preferred
@@ -169,10 +206,34 @@ def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
                                  R10, R11, R29, R30, R31)> {
 }
 
+// Registers are listed in reverse order for allocation preference reasons.
+def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
+                                (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+
 def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
                                (add (sequence "D%u", 0, 4),
                                     (sequence "D%u", 6, 13), D5, D14, D15)>;
 
+def VectorRegs : RegisterClass<"Hexagon", [v64i8, v32i16, v16i32, v8i64], 512,
+                               (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs : RegisterClass<"Hexagon",
+                         [v128i8, v64i16, v32i32, v16i64], 1024,
+                               (add (sequence "W%u", 0, 15))>;
+
+def VectorRegs128B : RegisterClass<"Hexagon",
+                         [v128i8, v64i16, v32i32, v16i64], 1024,
+                               (add (sequence "V%u", 0, 31))>;
+
+def VecDblRegs128B : RegisterClass<"Hexagon",
+                         [v256i8,v128i16,v64i32,v32i64], 2048,
+                               (add (sequence "W%u", 0, 15))>;
+
+def VecPredRegs : RegisterClass<"Hexagon", [v512i1], 512,
+                                (add (sequence "Q%u", 0, 3))>;
+
+def VecPredRegs128B : RegisterClass<"Hexagon", [v1024i1], 1024,
+                                   (add (sequence "Q%u", 0, 3))>;
 
 def PredRegs : RegisterClass<"Hexagon", 
                              [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
deleted file mode 100644
index 7069ad36e21a..000000000000
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//===- HexagonRemoveExtendArgs.cpp - Remove unnecessary argument sign extends //
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Pass that removes sign extends for function parameters. These parameters
-// are already sign extended by the caller per Hexagon's ABI
-//
-//===----------------------------------------------------------------------===//
-
-#include "Hexagon.h"
-#include "HexagonTargetMachine.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/StackProtector.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Scalar.h"
-
-using namespace llvm;
-
-namespace llvm {
-  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
-  void initializeHexagonRemoveExtendArgsPass(PassRegistry&);
-}
-
-namespace {
-  struct HexagonRemoveExtendArgs : public FunctionPass {
-  public:
-    static char ID;
-    HexagonRemoveExtendArgs() : FunctionPass(ID) {
-      initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
-    }
-    bool runOnFunction(Function &F) override;
-
-    const char *getPassName() const override {
-      return "Remove sign extends";
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineFunctionAnalysis>();
-      AU.addPreserved<MachineFunctionAnalysis>();
-      AU.addPreserved<StackProtector>();
-      FunctionPass::getAnalysisUsage(AU);
-    }
-  };
-}
-
-char HexagonRemoveExtendArgs::ID = 0;
-
-INITIALIZE_PASS(HexagonRemoveExtendArgs, "reargs",
-                "Remove Sign and Zero Extends for Args", false, false)
-
-bool HexagonRemoveExtendArgs::runOnFunction(Function &F) {
-  unsigned Idx = 1;
-  for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE;
-       ++AI, ++Idx) {
-    if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) {
-      Argument* Arg = AI;
-      if (!isa<PointerType>(Arg->getType())) {
-        for (auto UI = Arg->user_begin(); UI != Arg->user_end();) {
-          if (isa<SExtInst>(*UI)) {
-            Instruction* I = cast<Instruction>(*UI);
-            SExtInst* SI = new SExtInst(Arg, I->getType());
-            assert (EVT::getEVT(SI->getType()) ==
-                    (EVT::getEVT(I->getType())));
-            ++UI;
-            I->replaceAllUsesWith(SI);
-            Instruction* First = F.getEntryBlock().begin();
-            SI->insertBefore(First);
-            I->eraseFromParent();
-          } else {
-            ++UI;
-          }
-        }
-      }
-    }
-  }
-  return true;
-}
-
-
-
-FunctionPass*
-llvm::createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM) {
-  return new HexagonRemoveExtendArgs();
-}
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index 528cafc2bfea..6e4987b7e4e3 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -13,6 +13,12 @@
 
 include "HexagonScheduleV4.td"
 
+// V55 Machine Info +
+include "HexagonScheduleV55.td"
+
 //===----------------------------------------------------------------------===//
-// V4 Machine Info -
+// V60 Machine Info -
 //===----------------------------------------------------------------------===//
+
+include "HexagonScheduleV60.td"
+
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index a7d2d4724d0b..67af147b25b3 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -35,10 +35,11 @@ def SLOT_ENDLOOP: FuncUnit;
 
 // Itinerary classes.
 def PSEUDO      : InstrItinClass;
-def PSEUDOM   : InstrItinClass;
+def PSEUDOM     : InstrItinClass;
 // ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
 def DUPLEX      : InstrItinClass;
 def PREFIX      : InstrItinClass;
+def COMPOUND_CJ_ARCHDEPSLOT    : InstrItinClass;
 def COMPOUND    : InstrItinClass;
 
 def ALU32_2op_tc_1_SLOT0123  : InstrItinClass;
@@ -58,6 +59,7 @@ def CR_tc_2early_SLOT3       : InstrItinClass;
 def CR_tc_3x_SLOT23          : InstrItinClass;
 def CR_tc_3x_SLOT3           : InstrItinClass;
 def J_tc_2early_SLOT23       : InstrItinClass;
+def J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT       : InstrItinClass;
 def J_tc_2early_SLOT2        : InstrItinClass;
 def LD_tc_ld_SLOT01          : InstrItinClass;
 def LD_tc_ld_SLOT0           : InstrItinClass;
@@ -91,6 +93,7 @@ def V4LDST_tc_st_SLOT0       : InstrItinClass;
 def V4LDST_tc_st_SLOT01      : InstrItinClass;
 def J_tc_2early_SLOT0123     : InstrItinClass;
 def EXTENDER_tc_1_SLOT0123   : InstrItinClass;
+def S_3op_tc_3stall_SLOT23   : InstrItinClass;
 
 
 def HexagonItinerariesV4 :
diff --git a/lib/Target/Hexagon/HexagonScheduleV55.td b/lib/Target/Hexagon/HexagonScheduleV55.td
new file mode 100644
index 000000000000..d9ad25d4cd5a
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV55.td
@@ -0,0 +1,170 @@
+//=-HexagonScheduleV4.td - HexagonV4 Scheduling Definitions --*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V4 machine.
+// This file describes that machine information.
+
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+
+def CJ_tc_1_SLOT23              : InstrItinClass;
+def CJ_tc_2early_SLOT23         : InstrItinClass;
+def COPROC_VMEM_vtc_long_SLOT01 : InstrItinClass;
+def COPROC_VX_vtc_long_SLOT23   : InstrItinClass;
+def COPROC_VX_vtc_SLOT23        : InstrItinClass;
+def J_tc_3stall_SLOT2           : InstrItinClass;
+def MAPPING_tc_1_SLOT0123       : InstrItinClass;
+def M_tc_3stall_SLOT23          : InstrItinClass;
+def SUBINSN_tc_1_SLOT01         : InstrItinClass;
+def SUBINSN_tc_2early_SLOT0     : InstrItinClass;
+def SUBINSN_tc_2early_SLOT01    : InstrItinClass;
+def SUBINSN_tc_3stall_SLOT0     : InstrItinClass;
+def SUBINSN_tc_ld_SLOT0         : InstrItinClass;
+def SUBINSN_tc_ld_SLOT01        : InstrItinClass;
+def SUBINSN_tc_st_SLOT01        : InstrItinClass;
+
+def HexagonItinerariesV55 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
+        // ALU32
+        InstrItinData<ALU32_2op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2_SLOT0123     ,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_ADDI_tc_1_SLOT0123    ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // ALU64
+        InstrItinData<ALU64_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // CR -> System
+        InstrItinData<CR_tc_2_SLOT3      , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT3     , [InstrStage<3, [SLOT3]>]>,
+
+        // Jump (conditional/unconditional/return etc)
+        InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // JR
+        InstrItinData<J_tc_2early_SLOT2  , [InstrStage<2, [SLOT2]>]>,
+        InstrItinData<J_tc_3stall_SLOT2  , [InstrStage<3, [SLOT2]>]>,
+
+        // Extender
+        InstrItinData<EXTENDER_tc_1_SLOT0123,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Load
+        InstrItinData<LD_tc_ld_SLOT01      , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<LD_tc_ld_SLOT0       , [InstrStage<3, [SLOT0]>]>,
+
+        // M
+        InstrItinData<M_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // Store
+        InstrItinData<ST_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_st_SLOT0    , [InstrStage<1, [SLOT0]>]>,
+
+        // Subinsn
+        InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_1_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_2early_SLOT01,
+                              [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT01   , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // S
+        InstrItinData<S_2op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // New Value Compare Jump
+        InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+
+        // Mem ops
+        InstrItinData<V2LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // Endloop
+        InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+        // Vector
+        InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+                      [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<COPROC_VX_vtc_long_SLOT23  ,
+                      [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<COPROC_VX_vtc_SLOT23 ,
+                      [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<MAPPING_tc_1_SLOT0123      ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Misc
+        InstrItinData<COMPOUND_CJ_ARCHDEPSLOT , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                InstrStage<1, [SLOT2, SLOT3]>]>
+
+      ]>;
+
+def HexagonModelV55 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV55;
+  let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V4 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV60.td b/lib/Target/Hexagon/HexagonScheduleV60.td
new file mode 100644
index 000000000000..2ccff8242a47
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonScheduleV60.td
@@ -0,0 +1,310 @@
+//=-HexagonScheduleV60.td - HexagonV60 Scheduling Definitions *- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// CVI pipes from the "Hexagon Multimedia Co-Processor Extensions Arch Spec".
+def CVI_ST     : FuncUnit;
+def CVI_XLANE  : FuncUnit;
+def CVI_SHIFT  : FuncUnit;
+def CVI_MPY0   : FuncUnit;
+def CVI_MPY1   : FuncUnit;
+def CVI_LD     : FuncUnit;
+
+// Combined functional units.
+def CVI_XLSHF  : FuncUnit;
+def CVI_MPY01  : FuncUnit;
+def CVI_ALL    : FuncUnit;
+
+// Combined functional unit data.
+def HexagonComboFuncsV60 :
+    ComboFuncUnits<[
+      ComboFuncData<CVI_XLSHF    , [CVI_XLANE, CVI_SHIFT]>,
+      ComboFuncData<CVI_MPY01    , [CVI_MPY0, CVI_MPY1]>,
+      ComboFuncData<CVI_ALL      , [CVI_ST, CVI_XLANE, CVI_SHIFT,
+                                    CVI_MPY0, CVI_MPY1, CVI_LD]>
+    ]>;
+
+// Note: When adding additional vector scheduling classes, add the
+// corresponding methods to the class HexagonInstrInfo.
+def CVI_VA           : InstrItinClass;
+def CVI_VA_DV        : InstrItinClass;
+def CVI_VX_LONG      : InstrItinClass;
+def CVI_VX_LATE      : InstrItinClass;
+def CVI_VX           : InstrItinClass;
+def CVI_VX_DV_LONG   : InstrItinClass;
+def CVI_VX_DV        : InstrItinClass;
+def CVI_VX_DV_SLOT2  : InstrItinClass;
+def CVI_VP           : InstrItinClass;
+def CVI_VP_LONG      : InstrItinClass;
+def CVI_VP_VS_EARLY  : InstrItinClass;
+def CVI_VP_VS_LONG_EARLY   : InstrItinClass;
+def CVI_VP_VS_LONG   : InstrItinClass;
+def CVI_VP_VS   : InstrItinClass;
+def CVI_VP_DV        : InstrItinClass;
+def CVI_VS           : InstrItinClass;
+def CVI_VINLANESAT   : InstrItinClass;
+def CVI_VM_LD        : InstrItinClass;
+def CVI_VM_TMP_LD    : InstrItinClass;
+def CVI_VM_CUR_LD    : InstrItinClass;
+def CVI_VM_VP_LDU    : InstrItinClass;
+def CVI_VM_ST        : InstrItinClass;
+def CVI_VM_NEW_ST    : InstrItinClass;
+def CVI_VM_STU       : InstrItinClass;
+def CVI_HIST         : InstrItinClass;
+def CVI_VA_EXT       : InstrItinClass;
+
+// There are four SLOTS (four parallel pipelines) in Hexagon V60 machine.
+// This file describes that machine information.
+//
+//    |===========|==================================================|
+//    | PIPELINE  |              Instruction Classes                 |
+//    |===========|==================================================|
+//    | SLOT0     |  LD       ST    ALU32     MEMOP     NV    SYSTEM |
+//    |-----------|--------------------------------------------------|
+//    | SLOT1     |  LD       ST    ALU32                            |
+//    |-----------|--------------------------------------------------|
+//    | SLOT2     |  XTYPE          ALU32     J         JR           |
+//    |-----------|--------------------------------------------------|
+//    | SLOT3     |  XTYPE          ALU32     J         CR           |
+//    |===========|==================================================|
+//
+//
+// In addition to using the above SLOTS, there are also six vector pipelines
+// in the CVI co-processor in the Hexagon V60 machine.
+//
+//      |=========| |=========| |=========| |=========| |=========| |=========|
+// SLOT | CVI_LD  | |CVI_MPY3 | |CVI_MPY2 | |CVI_SHIFT| |CVI_XLANE| | CVI_ST  |
+// ==== |=========| |=========| |=========| |=========| |=========| |=========|
+// S0-3 |         | | CVI_VA  | | CVI_VA  | | CVI_VA  | | CVI_VA  | |         |
+// S2-3 |         | | CVI_VX  | | CVI_VX  | |         | |         | |         |
+// S0-3 |         | |         | |         | |         | | CVI_VP  | |         |
+// S0-3 |         | |         | |         | | CVI_VS  | |         | |         |
+// S0-1 |(CVI_LD) | | CVI_LD  | | CVI_LD  | | CVI_LD  | | CVI_LD  | |         |
+// S0-1 |(C*TMP_LD) |         | |         | |         | |         | |         |
+// S01  |(C*_LDU) | |         | |         | |         | | C*_LDU  | |         |
+// S0   |         | | CVI_ST  | | CVI_ST  | | CVI_ST  | | CVI_ST  | |(CVI_ST) |
+// S0   |         | |         | |         | |         | |         | |(C*TMP_ST)
+// S01  |         | |         | |         | |         | | VSTU    | |(C*_STU) |
+//      |=========| |=========| |=========| |=========| |=========| |=========|
+//                  |=====================| |=====================|
+//                  | CVI_MPY2 & CVI_MPY3 | |CVI_XLANE & CVI_SHIFT|
+//                  |=====================| |=====================|
+// S0-3             | CVI_VA_DV           | | CVI_VA_DV           |
+// S0-3             |                     | | CVI_VP_DV           |
+// S2-3             | CVI_VX_DV           | |                     |
+//                  |=====================| |=====================|
+//      |=====================================================================|
+// S0-3 | CVI_HIST   Histogram                                                |
+// S0123| CVI_VA_EXT Extract                                                  |
+//      |=====================================================================|
+
+def HexagonItinerariesV60 :
+      ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP,
+                            CVI_ST, CVI_XLANE, CVI_SHIFT, CVI_MPY0, CVI_MPY1,
+                            CVI_LD, CVI_XLSHF, CVI_MPY01, CVI_ALL], [], [
+        // ALU32
+        InstrItinData<ALU32_2op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_1_SLOT0123     ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2_SLOT0123     ,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+                      [InstrStage<2, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<ALU32_ADDI_tc_1_SLOT0123    ,
+                      [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // ALU64
+        InstrItinData<ALU64_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<ALU64_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // CR -> System
+        InstrItinData<CR_tc_2_SLOT3      , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<2, [SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT3     , [InstrStage<3, [SLOT3]>]>,
+
+        // Jump (conditional/unconditional/return etc)
+        InstrItinData<CR_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<CR_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<CJ_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_SLOT23 , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<J_tc_2early_CJUMP_UCJUMP_ARCHDEPSLOT     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // JR
+        InstrItinData<J_tc_2early_SLOT2  , [InstrStage<2, [SLOT2]>]>,
+        InstrItinData<J_tc_3stall_SLOT2  , [InstrStage<3, [SLOT2]>]>,
+
+        // Extender
+        InstrItinData<EXTENDER_tc_1_SLOT0123, [InstrStage<1,
+                              [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Load
+        InstrItinData<LD_tc_ld_SLOT01      , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<LD_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+        InstrItinData<LD_tc_ld_SLOT0       , [InstrStage<3, [SLOT0]>]>,
+
+        // M
+        InstrItinData<M_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+        InstrItinData<M_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // Store
+        InstrItinData<ST_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<ST_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<ST_tc_st_SLOT0    , [InstrStage<1, [SLOT0]>]>,
+
+        // Subinsn
+        InstrItinData<SUBINSN_tc_2early_SLOT0, [InstrStage<2, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_3stall_SLOT0, [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT0    , [InstrStage<3, [SLOT0]>]>,
+        InstrItinData<SUBINSN_tc_1_SLOT01    , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_2early_SLOT01,
+                                               [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_ld_SLOT01   , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<SUBINSN_tc_st_SLOT01   , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // S
+        InstrItinData<S_2op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_2op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        // The S_2op_tc_3x_SLOT23 slots are 4 cycles on v60.
+        InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<4, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_1_SLOT23     , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2_SLOT23     , [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_2early_SLOT23, [InstrStage<2, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3_SLOT23     , [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3stall_SLOT23, [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<S_3op_tc_3x_SLOT23    , [InstrStage<3, [SLOT2, SLOT3]>]>,
+
+        // New Value Compare Jump
+        InstrItinData<NCJ_tc_3or4stall_SLOT0, [InstrStage<4, [SLOT0]>]>,
+
+        // Mem ops
+        InstrItinData<V2LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<2, [SLOT0, SLOT1]>]>,
+        InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT0  , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+        // Endloop
+        InstrItinData<J_tc_2early_SLOT0123, [InstrStage<2, [SLOT_ENDLOOP]>]>,
+
+        // Vector
+        InstrItinData<COPROC_VMEM_vtc_long_SLOT01,
+                             [InstrStage<3, [SLOT0, SLOT1]>]>,
+        InstrItinData<COPROC_VX_vtc_long_SLOT23  ,
+                             [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<COPROC_VX_vtc_SLOT23 ,
+                             [InstrStage<3, [SLOT2, SLOT3]>]>,
+        InstrItinData<MAPPING_tc_1_SLOT0123      ,
+                             [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+        // Duplex and Compound
+        InstrItinData<DUPLEX     , [InstrStage<1, [SLOT0]>]>,
+        InstrItinData<COMPOUND_CJ_ARCHDEPSLOT   , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
+        // Misc
+        InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+        InstrItinData<PSEUDOM    , [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [SLOT2, SLOT3]>]>,
+
+        // Latest CVI spec definitions.
+        InstrItinData<CVI_VA,[InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE,CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VA_DV,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF, CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_LONG, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX_LATE, [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX,[InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VX_DV_LONG,
+                                   [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_DV,
+                                   [InstrStage<1, [SLOT2, SLOT3], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VX_DV_SLOT2,
+                                   [InstrStage<1, [SLOT2], 0>,
+                                    InstrStage<1, [CVI_MPY01]>]>,
+        InstrItinData<CVI_VP,      [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VP_LONG, [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VP_VS_EARLY,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS_LONG,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_VS_LONG_EARLY,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VP_DV  , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_XLSHF]>]>,
+        InstrItinData<CVI_VS,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_SHIFT]>]>,
+        InstrItinData<CVI_VINLANESAT,
+                                   [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_SHIFT]>]>,
+        InstrItinData<CVI_VM_LD  , [InstrStage<1, [SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_TMP_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD]>]>,
+        InstrItinData<CVI_VM_CUR_LD,[InstrStage<1,[SLOT0, SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_VP_LDU,[InstrStage<1,[SLOT0], 0>,
+                                    InstrStage<1, [SLOT1], 0>,
+                                    InstrStage<1, [CVI_LD], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_VM_ST  , [InstrStage<1, [SLOT0], 0>,
+                                    InstrStage<1, [CVI_ST], 0>,
+                                    InstrStage<1, [CVI_XLANE, CVI_SHIFT,
+                                                   CVI_MPY0, CVI_MPY1]>]>,
+        InstrItinData<CVI_VM_NEW_ST,[InstrStage<1,[SLOT0], 0>,
+                                    InstrStage<1, [CVI_ST]>]>,
+        InstrItinData<CVI_VM_STU , [InstrStage<1, [SLOT0], 0>,
+                                    InstrStage<1, [SLOT1], 0>,
+                                    InstrStage<1, [CVI_ST], 0>,
+                                    InstrStage<1, [CVI_XLANE]>]>,
+        InstrItinData<CVI_HIST   , [InstrStage<1, [SLOT0,SLOT1,SLOT2,SLOT3], 0>,
+                                    InstrStage<1, [CVI_ALL]>]>
+      ]>;
+
+def HexagonModelV60 : SchedMachineModel {
+  // Max issue per cycle == bundle width.
+  let IssueWidth = 4;
+  let Itineraries = HexagonItinerariesV60;
+  let LoadLatency = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Hexagon V60 Resource Definitions -
+//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 276cc69eed0f..239dbda8f27b 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -12,12 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "hexagon-selectiondag-info"
 
-bool llvm::flag_aligned_memcpy;
-
 SDValue
 HexagonSelectionDAGInfo::
 EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
@@ -25,15 +24,40 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
                         bool isVolatile, bool AlwaysInline,
                         MachinePointerInfo DstPtrInfo,
                         MachinePointerInfo SrcPtrInfo) const {
-  flag_aligned_memcpy = false;
-  if ((Align & 0x3) == 0) {
-    ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-    if (ConstantSize) {
-      uint64_t SizeVal = ConstantSize->getZExtValue();
-      if ((SizeVal > 32) && ((SizeVal % 8) == 0))
-        flag_aligned_memcpy = true;
-    }
-  }
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (AlwaysInline || (Align & 0x3) != 0 || !ConstantSize)
+    return SDValue();
 
-  return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (SizeVal < 32 || (SizeVal % 8) != 0)
+    return SDValue();
+
+  // Special case aligned memcpys with size >= 32 bytes and a multiple of 8.
+  //
+  const TargetLowering &TLI = *DAG.getSubtarget().getTargetLowering();
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+  Entry.Node = Dst;
+  Args.push_back(Entry);
+  Entry.Node = Src;
+  Args.push_back(Entry);
+  Entry.Node = Size;
+  Args.push_back(Entry);
+
+  const char *SpecialMemcpyName =
+      "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(Chain)
+      .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+                 Type::getVoidTy(*DAG.getContext()),
+                 DAG.getTargetExternalSymbol(
+                     SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+                 std::move(Args), 0)
+      .setDiscardResult();
+
+  std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+  return CallResult.second;
 }
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index d3eb56f4ba0f..10fe606985dd 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -81,7 +81,7 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
   // Loop over all of the basic blocks
   for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
        MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock* MBB = MBBb;
+    MachineBasicBlock *MBB = &*MBBb;
     // Traverse the basic block
     MachineBasicBlock::iterator MII = MBB->begin();
     MachineBasicBlock::iterator MIE = MBB->end ();
diff --git a/lib/Target/Hexagon/HexagonSplitDouble.cpp b/lib/Target/Hexagon/HexagonSplitDouble.cpp
new file mode 100644
index 000000000000..d4e95b0d0210
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -0,0 +1,1209 @@
+//===--- HexagonSplitDouble.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hsdr"
+
+#include "HexagonRegisterInfo.h"
+#include "HexagonTargetMachine.h"
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonSplitDoubleRegs();
+  void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
+}
+
+namespace {
+  static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
+      cl::desc("Maximum number of split partitions"));
+  static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
+      cl::desc("Do not split loads or stores"));
+
+  class HexagonSplitDoubleRegs : public MachineFunctionPass {
+  public:
+    static char ID;
+    HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
+        TII(nullptr) {
+      initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
+    }
+    const char *getPassName() const override {
+      return "Hexagon Split Double Registers";
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+  private:
+    static const TargetRegisterClass *const DoubleRC;
+
+    const HexagonRegisterInfo *TRI;
+    const HexagonInstrInfo *TII;
+    const MachineLoopInfo *MLI;
+    MachineRegisterInfo *MRI;
+
+    typedef std::set<unsigned> USet;
+    typedef std::map<unsigned,USet> UUSetMap;
+    typedef std::pair<unsigned,unsigned> UUPair;
+    typedef std::map<unsigned,UUPair> UUPairMap;
+    typedef std::map<const MachineLoop*,USet> LoopRegMap;
+
+    bool isInduction(unsigned Reg, LoopRegMap &IRM) const;
+    bool isVolatileInstr(const MachineInstr *MI) const;
+    bool isFixedInstr(const MachineInstr *MI) const;
+    void partitionRegisters(UUSetMap &P2Rs);
+    int32_t profit(const MachineInstr *MI) const;
+    bool isProfitable(const USet &Part, LoopRegMap &IRM) const;
+
+    void collectIndRegsForLoop(const MachineLoop *L, USet &Rs);
+    void collectIndRegs(LoopRegMap &IRM);
+
+    void createHalfInstr(unsigned Opc, MachineInstr *MI,
+        const UUPairMap &PairMap, unsigned SubR);
+    void splitMemRef(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitImmediate(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitCombine(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitExt(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitShift(MachineInstr *MI, const UUPairMap &PairMap);
+    void splitAslOr(MachineInstr *MI, const UUPairMap &PairMap);
+    bool splitInstr(MachineInstr *MI, const UUPairMap &PairMap);
+    void replaceSubregUses(MachineInstr *MI, const UUPairMap &PairMap);
+    void collapseRegPairs(MachineInstr *MI, const UUPairMap &PairMap);
+    bool splitPartition(const USet &Part);
+
+    static int Counter;
+    static void dump_partition(raw_ostream&, const USet&,
+       const TargetRegisterInfo&);
+  };
+  char HexagonSplitDoubleRegs::ID;
+  int HexagonSplitDoubleRegs::Counter = 0;
+  const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
+      = &Hexagon::DoubleRegsRegClass;
+}
+
+INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
+  "Hexagon Split Double Registers", false, false)
+
+
+static inline uint32_t getRegState(const MachineOperand &R) {
+  assert(R.isReg());
+  return getDefRegState(R.isDef()) |
+         getImplRegState(R.isImplicit()) |
+         getKillRegState(R.isKill()) |
+         getDeadRegState(R.isDead()) |
+         getUndefRegState(R.isUndef()) |
+         getInternalReadRegState(R.isInternalRead()) |
+         (R.isDebug() ? RegState::Debug : 0);
+}
+
+
+void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
+      const USet &Part, const TargetRegisterInfo &TRI) {
+  dbgs() << '{';
+  for (auto I : Part)
+    dbgs() << ' ' << PrintReg(I, &TRI);
+  dbgs() << " }";
+}
+
+
+bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
+  for (auto I : IRM) {
+    const USet &Rs = I.second;
+    if (Rs.find(Reg) != Rs.end())
+      return true;
+  }
+  return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
+  for (auto &I : MI->memoperands())
+    if (I->isVolatile())
+      return true;
+  return false;
+}
+
+
+bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
+  if (MI->mayLoad() || MI->mayStore())
+    if (MemRefsFixed || isVolatileInstr(MI))
+      return true;
+  if (MI->isDebugValue())
+    return false;
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    default:
+      return true;
+
+    case TargetOpcode::PHI:
+    case TargetOpcode::COPY:
+      break;
+
+    case Hexagon::L2_loadrd_io:
+      // Not handling stack stores (only reg-based addresses).
+      if (MI->getOperand(1).isReg())
+        break;
+      return true;
+    case Hexagon::S2_storerd_io:
+      // Not handling stack stores (only reg-based addresses).
+      if (MI->getOperand(0).isReg())
+        break;
+      return true;
+    case Hexagon::L2_loadrd_pi:
+    case Hexagon::S2_storerd_pi:
+
+    case Hexagon::A2_tfrpi:
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineir:
+    case Hexagon::A4_combineii:
+    case Hexagon::A4_combineri:
+    case Hexagon::A2_combinew:
+    case Hexagon::CONST64_Int_Real:
+
+    case Hexagon::A2_sxtw:
+
+    case Hexagon::A2_andp:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_xorp:
+    case Hexagon::S2_asl_i_p_or:
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_lsr_i_p:
+      break;
+  }
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      return true;
+  }
+  return false;
+}
+
+
+void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
+  typedef std::map<unsigned,unsigned> UUMap;
+  typedef std::vector<unsigned> UVect;
+
+  unsigned NumRegs = MRI->getNumVirtRegs();
+  BitVector DoubleRegs(NumRegs);
+  for (unsigned i = 0; i < NumRegs; ++i) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI->getRegClass(R) == DoubleRC)
+      DoubleRegs.set(i);
+  }
+
+  BitVector FixedRegs(NumRegs);
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    MachineInstr *DefI = MRI->getVRegDef(R);
+    // In some cases a register may exist, but never be defined or used.
+    // It should never appear anywhere, but mark it as "fixed", just to be
+    // safe.
+    if (!DefI || isFixedInstr(DefI))
+      FixedRegs.set(x);
+  }
+
+  UUSetMap AssocMap;
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    if (FixedRegs[x])
+      continue;
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    DEBUG(dbgs() << PrintReg(R, TRI) << " ~~");
+    USet &Asc = AssocMap[R];
+    for (auto U = MRI->use_nodbg_begin(R), Z = MRI->use_nodbg_end();
+         U != Z; ++U) {
+      MachineOperand &Op = *U;
+      MachineInstr *UseI = Op.getParent();
+      if (isFixedInstr(UseI))
+        continue;
+      for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
+        MachineOperand &MO = UseI->getOperand(i);
+        // Skip non-registers or registers with subregisters.
+        if (&MO == &Op || !MO.isReg() || MO.getSubReg())
+          continue;
+        unsigned T = MO.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(T)) {
+          FixedRegs.set(x);
+          continue;
+        }
+        if (MRI->getRegClass(T) != DoubleRC)
+          continue;
+        unsigned u = TargetRegisterInfo::virtReg2Index(T);
+        if (FixedRegs[u])
+          continue;
+        DEBUG(dbgs() << ' ' << PrintReg(T, TRI));
+        Asc.insert(T);
+        // Make it symmetric.
+        AssocMap[T].insert(R);
+      }
+    }
+    DEBUG(dbgs() << '\n');
+  }
+
+  UUMap R2P;
+  unsigned NextP = 1;
+  USet Visited;
+  for (int x = DoubleRegs.find_first(); x >= 0; x = DoubleRegs.find_next(x)) {
+    unsigned R = TargetRegisterInfo::index2VirtReg(x);
+    if (Visited.count(R))
+      continue;
+    // Create a new partition for R.
+    unsigned ThisP = FixedRegs[x] ? 0 : NextP++;
+    UVect WorkQ;
+    WorkQ.push_back(R);
+    for (unsigned i = 0; i < WorkQ.size(); ++i) {
+      unsigned T = WorkQ[i];
+      if (Visited.count(T))
+        continue;
+      R2P[T] = ThisP;
+      Visited.insert(T);
+      // Add all registers associated with T.
+      USet &Asc = AssocMap[T];
+      for (USet::iterator J = Asc.begin(), F = Asc.end(); J != F; ++J)
+        WorkQ.push_back(*J);
+    }
+  }
+
+  for (auto I : R2P)
+    P2Rs[I.second].insert(I.first);
+}
+
+
+static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
+  int32_t P = 0;
+  bool LoZ1 = false, HiZ1 = false;
+  if (Lo == 0 || Lo == 0xFFFFFFFF)
+    P += 10, LoZ1 = true;
+  if (Hi == 0 || Hi == 0xFFFFFFFF)
+    P += 10, HiZ1 = true;
+  if (!LoZ1 && !HiZ1 && Lo == Hi)
+    P += 3;
+  return P;
+}
+
+
+int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
+  unsigned ImmX = 0;
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case TargetOpcode::PHI:
+      for (const auto &Op : MI->operands())
+        if (!Op.getSubReg())
+          return 0;
+      return 10;
+    case TargetOpcode::COPY:
+      if (MI->getOperand(1).getSubReg() != 0)
+        return 10;
+      return 0;
+
+    case Hexagon::L2_loadrd_io:
+    case Hexagon::S2_storerd_io:
+      return -1;
+    case Hexagon::L2_loadrd_pi:
+    case Hexagon::S2_storerd_pi:
+      return 2;
+
+    case Hexagon::A2_tfrpi:
+    case Hexagon::CONST64_Int_Real: {
+      uint64_t D = MI->getOperand(1).getImm();
+      unsigned Lo = D & 0xFFFFFFFFULL;
+      unsigned Hi = D >> 32;
+      return profitImm(Lo, Hi);
+    }
+    case Hexagon::A2_combineii:
+    case Hexagon::A4_combineii:
+      return profitImm(MI->getOperand(1).getImm(),
+                       MI->getOperand(2).getImm());
+    case Hexagon::A4_combineri:
+      ImmX++;
+    case Hexagon::A4_combineir: {
+      ImmX++;
+      int64_t V = MI->getOperand(ImmX).getImm();
+      if (V == 0 || V == -1)
+        return 10;
+      // Fall through into A2_combinew.
+    }
+    case Hexagon::A2_combinew:
+      return 2;
+
+    case Hexagon::A2_sxtw:
+      return 3;
+
+    case Hexagon::A2_andp:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_xorp:
+      return 1;
+
+    case Hexagon::S2_asl_i_p_or: {
+      unsigned S = MI->getOperand(3).getImm();
+      if (S == 0 || S == 32)
+        return 10;
+      return -1;
+    }
+    case Hexagon::S2_asl_i_p:
+    case Hexagon::S2_asr_i_p:
+    case Hexagon::S2_lsr_i_p:
+      unsigned S = MI->getOperand(2).getImm();
+      if (S == 0 || S == 32)
+        return 10;
+      if (S == 16)
+        return 5;
+      if (S == 48)
+        return 7;
+      return -10;
+  }
+
+  return 0;
+}
+
+
+bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
+      const {
+  unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
+  int32_t TotalP = 0;
+
+  for (unsigned DR : Part) {
+    MachineInstr *DefI = MRI->getVRegDef(DR);
+    int32_t P = profit(DefI);
+    if (P == INT_MIN)
+      return false;
+    TotalP += P;
+    // Reduce the profitability of splitting induction registers.
+    if (isInduction(DR, IRM))
+      TotalP -= 30;
+
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U) {
+      MachineInstr *UseI = U->getParent();
+      if (isFixedInstr(UseI)) {
+        FixedNum++;
+        // Calculate the cost of generating REG_SEQUENCE instructions.
+        for (auto &Op : UseI->operands()) {
+          if (Op.isReg() && Part.count(Op.getReg()))
+            if (Op.getSubReg())
+              TotalP -= 2;
+        }
+        continue;
+      }
+      // If a register from this partition is used in a fixed instruction,
+      // and there is also a register in this partition that is used in
+      // a loop phi node, then decrease the splitting profit as this can
+      // confuse the modulo scheduler.
+      if (UseI->isPHI()) {
+        const MachineBasicBlock *PB = UseI->getParent();
+        const MachineLoop *L = MLI->getLoopFor(PB);
+        if (L && L->getHeader() == PB)
+          LoopPhiNum++;
+      }
+      // Splittable instruction.
+      SplitNum++;
+      int32_t P = profit(UseI);
+      if (P == INT_MIN)
+        return false;
+      TotalP += P;
+    }
+  }
+
+  if (FixedNum > 0 && LoopPhiNum > 0)
+    TotalP -= 20*LoopPhiNum;
+
+  DEBUG(dbgs() << "Partition profit: " << TotalP << '\n');
+  return TotalP > 0;
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
+      USet &Rs) {
+  const MachineBasicBlock *HB = L->getHeader();
+  const MachineBasicBlock *LB = L->getLoopLatch();
+  if (!HB || !LB)
+    return;
+
+  // Examine the latch branch. Expect it to be a conditional branch to
+  // the header (either "br-cond header" or "br-cond exit; br header").
+  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
+  SmallVector<MachineOperand,2> Cond;
+  bool BadLB = TII->AnalyzeBranch(*TmpLB, TB, FB, Cond, false);
+  // Only analyzable conditional branches. HII::AnalyzeBranch will put
+  // the branch opcode as the first element of Cond, and the predicate
+  // operand as the second.
+  if (BadLB || Cond.size() != 2)
+    return;
+  // Only simple jump-conditional (with or without negation).
+  if (!TII->PredOpcodeHasJMP_c(Cond[0].getImm()))
+    return;
+  // Must go to the header.
+  if (TB != HB && FB != HB)
+    return;
+  assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+  // Expect a predicate register.
+  unsigned PR = Cond[1].getReg();
+  assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
+
+  // Get the registers on which the loop controlling compare instruction
+  // depends.
+  unsigned CmpR1 = 0, CmpR2 = 0;
+  const MachineInstr *CmpI = MRI->getVRegDef(PR);
+  while (CmpI->getOpcode() == Hexagon::C2_not)
+    CmpI = MRI->getVRegDef(CmpI->getOperand(1).getReg());
+
+  int Mask = 0, Val = 0;
+  bool OkCI = TII->analyzeCompare(CmpI, CmpR1, CmpR2, Mask, Val);
+  if (!OkCI)
+    return;
+  // Eliminate non-double input registers.
+  if (CmpR1 && MRI->getRegClass(CmpR1) != DoubleRC)
+    CmpR1 = 0;
+  if (CmpR2 && MRI->getRegClass(CmpR2) != DoubleRC)
+    CmpR2 = 0;
+  if (!CmpR1 && !CmpR2)
+    return;
+
+  // Now examine the top of the loop: the phi nodes that could poten-
+  // tially define loop induction registers. The registers defined by
+  // such a phi node would be used in a 64-bit add, which then would
+  // be used in the loop compare instruction.
+
+  // Get the set of all double registers defined by phi nodes in the
+  // loop header.
+  typedef std::vector<unsigned> UVect;
+  UVect DP;
+  for (auto &MI : *HB) {
+    if (!MI.isPHI())
+      break;
+    const MachineOperand &MD = MI.getOperand(0);
+    unsigned R = MD.getReg();
+    if (MRI->getRegClass(R) == DoubleRC)
+      DP.push_back(R);
+  }
+  if (DP.empty())
+    return;
+
+  auto NoIndOp = [this, CmpR1, CmpR2] (unsigned R) -> bool {
+    for (auto I = MRI->use_nodbg_begin(R), E = MRI->use_nodbg_end();
+         I != E; ++I) {
+      const MachineInstr *UseI = I->getParent();
+      if (UseI->getOpcode() != Hexagon::A2_addp)
+        continue;
+      // Get the output from the add. If it is one of the inputs to the
+      // loop-controlling compare instruction, then R is likely an induc-
+      // tion register.
+      unsigned T = UseI->getOperand(0).getReg();
+      if (T == CmpR1 || T == CmpR2)
+        return false;
+    }
+    return true;
+  };
+  UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+  Rs.insert(DP.begin(), End);
+  Rs.insert(CmpR1);
+  Rs.insert(CmpR2);
+
+  DEBUG({
+    dbgs() << "For loop at BB#" << HB->getNumber() << " ind regs: ";
+    dump_partition(dbgs(), Rs, *TRI);
+    dbgs() << '\n';
+  });
+}
+
+
+void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
+  typedef std::vector<MachineLoop*> LoopVector;
+  LoopVector WorkQ;
+
+  for (auto I : *MLI)
+    WorkQ.push_back(I);
+  for (unsigned i = 0; i < WorkQ.size(); ++i) {
+    for (auto I : *WorkQ[i])
+      WorkQ.push_back(I);
+  }
+
+  USet Rs;
+  for (unsigned i = 0, n = WorkQ.size(); i < n; ++i) {
+    MachineLoop *L = WorkQ[i];
+    Rs.clear();
+    collectIndRegsForLoop(L, Rs);
+    if (!Rs.empty())
+      IRM.insert(std::make_pair(L, Rs));
+  }
+}
+
+
+void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
+      const UUPairMap &PairMap, unsigned SubR) {
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstr *NewI = BuildMI(B, MI, DL, TII->get(Opc));
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg()) {
+      NewI->addOperand(Op);
+      continue;
+    }
+    // For register operands, set the subregister.
+    unsigned R = Op.getReg();
+    unsigned SR = Op.getSubReg();
+    bool isVirtReg = TargetRegisterInfo::isVirtualRegister(R);
+    bool isKill = Op.isKill();
+    if (isVirtReg && MRI->getRegClass(R) == DoubleRC) {
+      isKill = false;
+      UUPairMap::const_iterator F = PairMap.find(R);
+      if (F == PairMap.end()) {
+        SR = SubR;
+      } else {
+        const UUPair &P = F->second;
+        R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+        SR = 0;
+      }
+    }
+    auto CO = MachineOperand::CreateReg(R, Op.isDef(), Op.isImplicit(), isKill,
+          Op.isDead(), Op.isUndef(), Op.isEarlyClobber(), SR, Op.isDebug(),
+          Op.isInternalRead());
+    NewI->addOperand(CO);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  bool Load = MI->mayLoad();
+  unsigned OrigOpc = MI->getOpcode();
+  bool PostInc = (OrigOpc == Hexagon::L2_loadrd_pi ||
+                  OrigOpc == Hexagon::S2_storerd_pi);
+  MachineInstr *LowI, *HighI;
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // Index of the base-address-register operand.
+  unsigned AdrX = PostInc ? (Load ? 2 : 1)
+                          : (Load ? 1 : 0);
+  MachineOperand &AdrOp = MI->getOperand(AdrX);
+  unsigned RSA = getRegState(AdrOp);
+  MachineOperand &ValOp = Load ? MI->getOperand(0)
+                               : (PostInc ? MI->getOperand(3)
+                                          : MI->getOperand(2));
+  UUPairMap::const_iterator F = PairMap.find(ValOp.getReg());
+  assert(F != PairMap.end());
+
+  if (Load) {
+    const UUPair &P = F->second;
+    int64_t Off = PostInc ? 0 : MI->getOperand(2).getImm();
+    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.first)
+             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+             .addImm(Off);
+    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::L2_loadri_io), P.second)
+              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+              .addImm(Off+4);
+  } else {
+    const UUPair &P = F->second;
+    int64_t Off = PostInc ? 0 : MI->getOperand(1).getImm();
+    LowI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+             .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+             .addImm(Off)
+             .addReg(P.first);
+    HighI = BuildMI(B, MI, DL, TII->get(Hexagon::S2_storeri_io))
+              .addReg(AdrOp.getReg(), RSA & ~RegState::Kill, AdrOp.getSubReg())
+              .addImm(Off+4)
+              .addReg(P.second);
+  }
+
+  if (PostInc) {
+    // Create the increment of the address register.
+    int64_t Inc = Load ? MI->getOperand(3).getImm()
+                       : MI->getOperand(2).getImm();
+    MachineOperand &UpdOp = Load ? MI->getOperand(1) : MI->getOperand(0);
+    const TargetRegisterClass *RC = MRI->getRegClass(UpdOp.getReg());
+    unsigned NewR = MRI->createVirtualRegister(RC);
+    assert(!UpdOp.getSubReg() && "Def operand with subreg");
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_addi), NewR)
+      .addReg(AdrOp.getReg(), RSA)
+      .addImm(Inc);
+    MRI->replaceRegWith(UpdOp.getReg(), NewR);
+    // The original instruction will be deleted later.
+  }
+
+  // Generate a new pair of memory-operands.
+  MachineFunction &MF = *B.getParent();
+  for (auto &MO : MI->memoperands()) {
+    const MachinePointerInfo &Ptr = MO->getPointerInfo();
+    unsigned F = MO->getFlags();
+    int A = MO->getAlignment();
+
+    auto *Tmp1 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, A);
+    LowI->addMemOperand(MF, Tmp1);
+    auto *Tmp2 = MF.getMachineMemOperand(Ptr, F, 4/*size*/, std::min(A, 4));
+    HighI->addMemOperand(MF, Tmp2);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  assert(Op0.isReg() && Op1.isImm());
+  uint64_t V = Op1.getImm();
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+
+  // The operand to A2_tfrsi can only have 32 significant bits. Immediate
+  // values in MachineOperand are stored as 64-bit integers, and so the
+  // value -1 may be represented either as 64-bit -1, or 4294967295. Both
+  // will have the 32 higher bits truncated in the end, but -1 will remain
+  // as -1, while the latter may appear to be a large unsigned value
+  // requiring a constant extender. The casting to int32_t will select the
+  // former representation. (The same reasoning applies to all 32-bit
+  // values.)
+  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+    .addImm(int32_t(V & 0xFFFFFFFFULL));
+  BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+    .addImm(int32_t(V >> 32));
+}
+
+
+void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  assert(Op0.isReg());
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+
+  if (Op1.isImm()) {
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.second)
+      .addImm(Op1.getImm());
+  } else if (Op1.isReg()) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.second)
+      .addReg(Op1.getReg(), getRegState(Op1), Op1.getSubReg());
+  } else
+    llvm_unreachable("Unexpected operand");
+
+  if (Op2.isImm()) {
+    BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), P.first)
+      .addImm(Op2.getImm());
+  } else if (Op2.isReg()) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+      .addReg(Op2.getReg(), getRegState(Op2), Op2.getSubReg());
+  } else
+    llvm_unreachable("Unexpected operand");
+}
+
+
+void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  assert(Op0.isReg() && Op1.isReg());
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned RS = getRegState(Op1);
+
+  BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), P.first)
+    .addReg(Op1.getReg(), RS & ~RegState::Kill, Op1.getSubReg());
+  BuildMI(B, MI, DL, TII->get(Hexagon::S2_asr_i_r), P.second)
+    .addReg(Op1.getReg(), RS, Op1.getSubReg())
+    .addImm(31);
+}
+
+
+void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  assert(Op0.isReg() && Op1.isReg() && Op2.isImm());
+  int64_t Sh64 = Op2.getImm();
+  assert(Sh64 >= 0 && Sh64 < 64);
+  unsigned S = Sh64;
+
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned LoR = P.first;
+  unsigned HiR = P.second;
+  using namespace Hexagon;
+
+  unsigned Opc = MI->getOpcode();
+  bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
+  bool Left = !Right;
+  bool Signed = (Opc == S2_asr_i_p);
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RS = getRegState(Op1);
+  unsigned ShiftOpc = Left ? S2_asl_i_r
+                           : (Signed ? S2_asr_i_r : S2_lsr_i_r);
+  unsigned LoSR = subreg_loreg;
+  unsigned HiSR = subreg_hireg;
+
+  if (S == 0) {
+    // No shift, subregister copy.
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), HiR)
+      .addReg(Op1.getReg(), RS, HiSR);
+  } else if (S < 32) {
+    const TargetRegisterClass *IntRC = &IntRegsRegClass;
+    unsigned TmpR = MRI->createVirtualRegister(IntRC);
+    // Expansion:
+    // Shift left:    DR = shl R, #s
+    //   LoR  = shl R.lo, #s
+    //   TmpR = extractu R.lo, #s, #32-s
+    //   HiR  = or (TmpR, asl(R.hi, #s))
+    // Shift right:   DR = shr R, #s
+    //   HiR  = shr R.hi, #s
+    //   TmpR = shr R.lo, #s
+    //   LoR  = insert TmpR, R.hi, #s, #32-s
+
+    // Shift left:
+    //   LoR  = shl R.lo, #s
+    // Shift right:
+    //   TmpR = shr R.lo, #s
+
+    // Make a special case for A2_aslh and A2_asrh (they are predicable as
+    // opposed to S2_asl_i_r/S2_asr_i_r).
+    if (S == 16 && Left)
+      BuildMI(B, MI, DL, TII->get(A2_aslh), LoR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else if (S == 16 && Signed)
+      BuildMI(B, MI, DL, TII->get(A2_asrh), TmpR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? LoR : TmpR))
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+        .addImm(S);
+
+    if (Left) {
+      // TmpR = extractu R.lo, #s, #32-s
+      BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR)
+        .addImm(S)
+        .addImm(32-S);
+      // HiR  = or (TmpR, asl(R.hi, #s))
+      BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+        .addReg(TmpR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(S);
+    } else {
+      // HiR  = shr R.hi, #s
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), HiR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR)
+        .addImm(S);
+      // LoR  = insert TmpR, R.hi, #s, #32-s
+      BuildMI(B, MI, DL, TII->get(S2_insert), LoR)
+        .addReg(TmpR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(S)
+        .addImm(32-S);
+    }
+  } else if (S == 32) {
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), (Left ? HiR : LoR))
+      .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR));
+    if (!Signed)
+      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+        .addImm(0);
+    else  // Must be right shift.
+      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(31);
+  } else if (S < 64) {
+    S -= 32;
+    if (S == 16 && Left)
+      BuildMI(B, MI, DL, TII->get(A2_aslh), HiR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, LoSR);
+    else if (S == 16 && Signed)
+      BuildMI(B, MI, DL, TII->get(A2_asrh), LoR)
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, HiSR);
+    else
+      BuildMI(B, MI, DL, TII->get(ShiftOpc), (Left ? HiR : LoR))
+        .addReg(Op1.getReg(), RS & ~RegState::Kill, (Left ? LoSR : HiSR))
+        .addImm(S);
+
+    if (Signed)
+      BuildMI(B, MI, DL, TII->get(S2_asr_i_r), HiR)
+        .addReg(Op1.getReg(), RS, HiSR)
+        .addImm(31);
+    else
+      BuildMI(B, MI, DL, TII->get(A2_tfrsi), (Left ? LoR : HiR))
+        .addImm(0);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineOperand &Op0 = MI->getOperand(0);
+  MachineOperand &Op1 = MI->getOperand(1);
+  MachineOperand &Op2 = MI->getOperand(2);
+  MachineOperand &Op3 = MI->getOperand(3);
+  assert(Op0.isReg() && Op1.isReg() && Op2.isReg() && Op3.isImm());
+  int64_t Sh64 = Op3.getImm();
+  assert(Sh64 >= 0 && Sh64 < 64);
+  unsigned S = Sh64;
+
+  UUPairMap::const_iterator F = PairMap.find(Op0.getReg());
+  assert(F != PairMap.end());
+  const UUPair &P = F->second;
+  unsigned LoR = P.first;
+  unsigned HiR = P.second;
+  using namespace Hexagon;
+
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RS1 = getRegState(Op1);
+  unsigned RS2 = getRegState(Op2);
+  const TargetRegisterClass *IntRC = &IntRegsRegClass;
+
+  unsigned LoSR = subreg_loreg;
+  unsigned HiSR = subreg_hireg;
+
+  // Op0 = S2_asl_i_p_or Op1, Op2, Op3
+  // means:  Op0 = or (Op1, asl(Op2, Op3))
+
+  // Expansion of
+  //   DR = or (R1, asl(R2, #s))
+  //
+  //   LoR  = or (R1.lo, asl(R2.lo, #s))
+  //   Tmp1 = extractu R2.lo, #s, #32-s
+  //   Tmp2 = or R1.hi, Tmp1
+  //   HiR  = or (Tmp2, asl(R2.hi, #s))
+
+  if (S == 0) {
+    // DR  = or (R1, asl(R2, #0))
+    //    -> or (R1, R2)
+    // i.e. LoR = or R1.lo, R2.lo
+    //      HiR = or R1.hi, R2.hi
+    BuildMI(B, MI, DL, TII->get(A2_or), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, HiSR);
+  } else if (S < 32) {
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+      .addImm(S);
+    unsigned TmpR1 = MRI->createVirtualRegister(IntRC);
+    BuildMI(B, MI, DL, TII->get(S2_extractu), TmpR1)
+      .addReg(Op2.getReg(), RS2 & ~RegState::Kill, LoSR)
+      .addImm(S)
+      .addImm(32-S);
+    unsigned TmpR2 = MRI->createVirtualRegister(IntRC);
+    BuildMI(B, MI, DL, TII->get(A2_or), TmpR2)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(TmpR1);
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+      .addReg(TmpR2)
+      .addReg(Op2.getReg(), RS2, HiSR)
+      .addImm(S);
+  } else if (S == 32) {
+    // DR  = or (R1, asl(R2, #32))
+    //    -> or R1, R2.lo
+    // LoR = R1.lo
+    // HiR = or R1.hi, R2.lo
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(A2_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, LoSR);
+  } else if (S < 64) {
+    // DR  = or (R1, asl(R2, #s))
+    //
+    // LoR = R1:lo
+    // HiR = or (R1:hi, asl(R2:lo, #s-32))
+    S -= 32;
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::COPY), LoR)
+      .addReg(Op1.getReg(), RS1 & ~RegState::Kill, LoSR);
+    BuildMI(B, MI, DL, TII->get(S2_asl_i_r_or), HiR)
+      .addReg(Op1.getReg(), RS1, HiSR)
+      .addReg(Op2.getReg(), RS2, LoSR)
+      .addImm(S);
+  }
+}
+
+
+bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  DEBUG(dbgs() << "Splitting: " << *MI);
+  bool Split = false;
+  unsigned Opc = MI->getOpcode();
+  using namespace Hexagon;
+
+  switch (Opc) {
+    case TargetOpcode::PHI:
+    case TargetOpcode::COPY: {
+      unsigned DstR = MI->getOperand(0).getReg();
+      if (MRI->getRegClass(DstR) == DoubleRC) {
+        createHalfInstr(Opc, MI, PairMap, subreg_loreg);
+        createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+        Split = true;
+      }
+      break;
+    }
+    case A2_andp:
+      createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+    case A2_orp:
+      createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+    case A2_xorp:
+      createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
+      createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+      Split = true;
+      break;
+
+    case L2_loadrd_io:
+    case L2_loadrd_pi:
+    case S2_storerd_io:
+    case S2_storerd_pi:
+      splitMemRef(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_tfrpi:
+    case CONST64_Int_Real:
+      splitImmediate(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_combineii:
+    case A4_combineir:
+    case A4_combineii:
+    case A4_combineri:
+    case A2_combinew:
+      splitCombine(MI, PairMap);
+      Split = true;
+      break;
+
+    case A2_sxtw:
+      splitExt(MI, PairMap);
+      Split = true;
+      break;
+
+    case S2_asl_i_p:
+    case S2_asr_i_p:
+    case S2_lsr_i_p:
+      splitShift(MI, PairMap);
+      Split = true;
+      break;
+
+    case S2_asl_i_p_or:
+      splitAslOr(MI, PairMap);
+      Split = true;
+      break;
+
+    default:
+      llvm_unreachable("Instruction not splitable");
+      return false;
+  }
+
+  return Split;
+}
+
+
+void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse() || !Op.getSubReg())
+      continue;
+    unsigned R = Op.getReg();
+    UUPairMap::const_iterator F = PairMap.find(R);
+    if (F == PairMap.end())
+      continue;
+    const UUPair &P = F->second;
+    switch (Op.getSubReg()) {
+      case Hexagon::subreg_loreg:
+        Op.setReg(P.first);
+        break;
+      case Hexagon::subreg_hireg:
+        Op.setReg(P.second);
+        break;
+    }
+    Op.setSubReg(0);
+  }
+}
+
+
+void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
+      const UUPairMap &PairMap) {
+  MachineBasicBlock &B = *MI->getParent();
+  DebugLoc DL = MI->getDebugLoc();
+
+  for (auto &Op : MI->operands()) {
+    if (!Op.isReg() || !Op.isUse())
+      continue;
+    unsigned R = Op.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    if (MRI->getRegClass(R) != DoubleRC || Op.getSubReg())
+      continue;
+    UUPairMap::const_iterator F = PairMap.find(R);
+    if (F == PairMap.end())
+      continue;
+    const UUPair &Pr = F->second;
+    unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
+    BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
+      .addReg(Pr.first)
+      .addImm(Hexagon::subreg_loreg)
+      .addReg(Pr.second)
+      .addImm(Hexagon::subreg_hireg);
+    Op.setReg(NewDR);
+  }
+}
+
+
+bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
+  const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
+  typedef std::set<MachineInstr*> MISet;
+  bool Changed = false;
+
+  DEBUG(dbgs() << "Splitting partition: "; dump_partition(dbgs(), Part, *TRI);
+        dbgs() << '\n');
+
+  UUPairMap PairMap;
+
+  MISet SplitIns;
+  for (unsigned DR : Part) {
+    MachineInstr *DefI = MRI->getVRegDef(DR);
+    SplitIns.insert(DefI);
+
+    // Collect all instructions, including fixed ones.  We won't split them,
+    // but we need to visit them again to insert the REG_SEQUENCE instructions.
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U)
+      SplitIns.insert(U->getParent());
+
+    unsigned LoR = MRI->createVirtualRegister(IntRC);
+    unsigned HiR = MRI->createVirtualRegister(IntRC);
+    DEBUG(dbgs() << "Created mapping: " << PrintReg(DR, TRI) << " -> "
+                 << PrintReg(HiR, TRI) << ':' << PrintReg(LoR, TRI) << '\n');
+    PairMap.insert(std::make_pair(DR, UUPair(LoR, HiR)));
+  }
+
+  MISet Erase;
+  for (auto MI : SplitIns) {
+    if (isFixedInstr(MI)) {
+      collapseRegPairs(MI, PairMap);
+    } else {
+      bool Done = splitInstr(MI, PairMap);
+      if (Done)
+        Erase.insert(MI);
+      Changed |= Done;
+    }
+  }
+
+  for (unsigned DR : Part) {
+    // Before erasing "double" instructions, revisit all uses of the double
+    // registers in this partition, and replace all uses of them with subre-
+    // gisters, with the corresponding single registers.
+    MISet Uses;
+    for (auto U = MRI->use_nodbg_begin(DR), W = MRI->use_nodbg_end();
+         U != W; ++U)
+      Uses.insert(U->getParent());
+    for (auto M : Uses)
+      replaceSubregUses(M, PairMap);
+  }
+
+  for (auto MI : Erase) {
+    MachineBasicBlock *B = MI->getParent();
+    B->erase(MI);
+  }
+
+  return Changed;
+}
+
+
+bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Splitting double registers in function: "
+        << MF.getName() << '\n');
+
+  auto &ST = MF.getSubtarget<HexagonSubtarget>();
+  TRI = ST.getRegisterInfo();
+  TII = ST.getInstrInfo();
+  MRI = &MF.getRegInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+
+  UUSetMap P2Rs;
+  LoopRegMap IRM;
+
+  collectIndRegs(IRM);
+  partitionRegisters(P2Rs);
+
+  DEBUG({
+    dbgs() << "Register partitioning: (partition #0 is fixed)\n";
+    for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+      dbgs() << '#' << I->first << " -> ";
+      dump_partition(dbgs(), I->second, *TRI);
+      dbgs() << '\n';
+    }
+  });
+
+  bool Changed = false;
+  int Limit = MaxHSDR;
+
+  for (UUSetMap::iterator I = P2Rs.begin(), E = P2Rs.end(); I != E; ++I) {
+    if (I->first == 0)
+      continue;
+    if (Limit >= 0 && Counter >= Limit)
+      break;
+    USet &Part = I->second;
+    DEBUG(dbgs() << "Calculating profit for partition #" << I->first << '\n');
+    if (!isProfitable(Part, IRM))
+      continue;
+    Counter++;
+    Changed |= splitPartition(Part);
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createHexagonSplitDoubleRegs() {
+  return new HexagonSplitDoubleRegs();
+}
diff --git a/lib/Target/Hexagon/HexagonStoreWidening.cpp b/lib/Target/Hexagon/HexagonStoreWidening.cpp
new file mode 100644
index 000000000000..b5339ff4c0dc
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -0,0 +1,616 @@
+//===--- HexagonStoreWidening.cpp------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Replace sequences of "narrow" stores to adjacent memory locations with
+// a fewer "wide" stores that have the same effect.
+// For example, replace:
+//   S4_storeirb_io  %vreg100, 0, 0   ; store-immediate-byte
+//   S4_storeirb_io  %vreg100, 1, 0   ; store-immediate-byte
+// with
+//   S4_storeirh_io  %vreg100, 0, 0   ; store-immediate-halfword
+// The above is the general idea.  The actual cases handled by the code
+// may be a bit more complex.
+// The purpose of this pass is to reduce the number of outstanding stores,
+// or as one could say, "reduce store queue pressure".  Also, wide stores
+// mean fewer stores, and since there are only two memory instructions allowed
+// per packet, it also means fewer packets, and ultimately fewer cycles.
+//===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-widen-stores"
+
+#include "HexagonTargetMachine.h"
+
+#include "llvm/PassSupport.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#include <algorithm>
+
+
+using namespace llvm;
+
+namespace llvm {
+  FunctionPass *createHexagonStoreWidening();
+  void initializeHexagonStoreWideningPass(PassRegistry&);
+}
+
+namespace {
+  struct HexagonStoreWidening : public MachineFunctionPass {
+    const HexagonInstrInfo      *TII;
+    const HexagonRegisterInfo   *TRI;
+    const MachineRegisterInfo   *MRI;
+    AliasAnalysis               *AA;
+    MachineFunction             *MF;
+
+  public:
+    static char ID;
+    HexagonStoreWidening() : MachineFunctionPass(ID) {
+      initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
+
+    const char *getPassName() const override {
+      return "Hexagon Store Widening";
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.addPreserved<AAResultsWrapperPass>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    static bool handledStoreType(const MachineInstr *MI);
+
+  private:
+    static const int MaxWideSize = 4;
+
+    typedef std::vector<MachineInstr*> InstrGroup;
+    typedef std::vector<InstrGroup> InstrGroupList;
+
+    bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
+    bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
+    void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
+        InstrGroup::iterator End, InstrGroup &Group);
+    void createStoreGroups(MachineBasicBlock &MBB,
+        InstrGroupList &StoreGroups);
+    bool processBasicBlock(MachineBasicBlock &MBB);
+    bool processStoreGroup(InstrGroup &Group);
+    bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
+        InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
+    bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
+    bool replaceStores(InstrGroup &OG, InstrGroup &NG);
+    bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
+  };
+
+} // namespace
+
+
+namespace {
+
+// Some local helper functions...
+unsigned getBaseAddressRegister(const MachineInstr *MI) {
+  const MachineOperand &MO = MI->getOperand(0);
+  assert(MO.isReg() && "Expecting register operand");
+  return MO.getReg();
+}
+
+int64_t getStoreOffset(const MachineInstr *MI) {
+  unsigned OpC = MI->getOpcode();
+  assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
+
+  switch (OpC) {
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io: {
+      const MachineOperand &MO = MI->getOperand(1);
+      assert(MO.isImm() && "Expecting immediate offset");
+      return MO.getImm();
+    }
+  }
+  dbgs() << *MI;
+  llvm_unreachable("Store offset calculation missing for a handled opcode");
+  return 0;
+}
+
+const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+  assert(!MI->memoperands_empty() && "Expecting memory operands");
+  return **MI->memoperands_begin();
+}
+
+} // namespace
+
+
+char HexagonStoreWidening::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
+                "Hexason Store Widening", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
+                "Hexagon Store Widening", false, false)
+
+
+// Filtering function: any stores whose opcodes are not "approved" of by
+// this function will not be subjected to widening.
+inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
+  // For now, only handle stores of immediate values.
+  // Also, reject stores to stack slots.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io:
+      // Base address must be a register. (Implement FI later.)
+      return MI->getOperand(0).isReg();
+    default:
+      return false;
+  }
+}
+
+
+// Check if the machine memory operand MMO is aliased with any of the
+// stores in the store group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+      const MachineMemOperand &MMO) {
+  if (!MMO.getValue())
+    return true;
+
+  MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
+
+  for (auto SI : Stores) {
+    const MachineMemOperand &SMO = getStoreTarget(SI);
+    if (!SMO.getValue())
+      return true;
+
+    MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
+    if (AA->alias(L, SL))
+      return true;
+  }
+
+  return false;
+}
+
+
+// Check if the machine instruction MI accesses any storage aliased with
+// any store in the group Stores.
+bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
+      const MachineInstr *MI) {
+  for (auto &I : MI->memoperands())
+    if (instrAliased(Stores, *I))
+      return true;
+  return false;
+}
+
+
+// Inspect a machine basic block, and generate store groups out of stores
+// encountered in the block.
+//
+// A store group is a group of stores that use the same base register,
+// and which can be reordered within that group without altering the
+// semantics of the program.  A single store group could be widened as
+// a whole, if there existed a single store instruction with the same
+// semantics as the entire group.  In many cases, a single store group
+// may need more than one wide store.
+void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
+      InstrGroupList &StoreGroups) {
+  InstrGroup AllInsns;
+
+  // Copy all instruction pointers from the basic block to a temporary
+  // list.  This will allow operating on the list, and modifying its
+  // elements without affecting the basic block.
+  for (auto &I : MBB)
+    AllInsns.push_back(&I);
+
+  // Traverse all instructions in the AllInsns list, and if we encounter
+  // a store, then try to create a store group starting at that instruction
+  // i.e. a sequence of independent stores that can be widened.
+  for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    // Skip null pointers (processed instructions).
+    if (!MI || !handledStoreType(MI))
+      continue;
+
+    // Found a store.  Try to create a store group.
+    InstrGroup G;
+    createStoreGroup(MI, I+1, E, G);
+    if (G.size() > 1)
+      StoreGroups.push_back(G);
+  }
+}
+
+
+// Create a single store group.  The stores need to be independent between
+// themselves, and also there cannot be other instructions between them
+// that could read or modify storage being stored into.
+void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
+      InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
+  assert(handledStoreType(BaseStore) && "Unexpected instruction");
+  unsigned BaseReg = getBaseAddressRegister(BaseStore);
+  InstrGroup Other;
+
+  Group.push_back(BaseStore);
+
+  for (auto I = Begin; I != End; ++I) {
+    MachineInstr *MI = *I;
+    if (!MI)
+      continue;
+
+    if (handledStoreType(MI)) {
+      // If this store instruction is aliased with anything already in the
+      // group, terminate the group now.
+      if (instrAliased(Group, getStoreTarget(MI)))
+        return;
+      // If this store is aliased to any of the memory instructions we have
+      // seen so far (that are not a part of this group), terminate the group.
+      if (instrAliased(Other, getStoreTarget(MI)))
+        return;
+
+      unsigned BR = getBaseAddressRegister(MI);
+      if (BR == BaseReg) {
+        Group.push_back(MI);
+        *I = 0;
+        continue;
+      }
+    }
+
+    // Assume calls are aliased to everything.
+    if (MI->isCall() || MI->hasUnmodeledSideEffects())
+      return;
+
+    if (MI->mayLoad() || MI->mayStore()) {
+      if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
+        return;
+      Other.push_back(MI);
+    }
+  } // for
+}
+
+
+// Check if store instructions S1 and S2 are adjacent.  More precisely,
+// S2 has to access memory immediately following that accessed by S1.
+bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
+      const MachineInstr *S2) {
+  if (!handledStoreType(S1) || !handledStoreType(S2))
+    return false;
+
+  const MachineMemOperand &S1MO = getStoreTarget(S1);
+
+  // Currently only handling immediate stores.
+  int Off1 = S1->getOperand(1).getImm();
+  int Off2 = S2->getOperand(1).getImm();
+
+  return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
+                     : int(Off1+S1MO.getSize()) == Off2;
+}
+
+
+/// Given a sequence of adjacent stores, and a maximum size of a single wide
+/// store, pick a group of stores that  can be replaced by a single store
+/// of size not exceeding MaxSize.  The selected sequence will be recorded
+/// in OG ("old group" of instructions).
+/// OG should be empty on entry, and should be left empty if the function
+/// fails.
+bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
+      InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
+      unsigned MaxSize) {
+  assert(Begin != End && "No instructions to analyze");
+  assert(OG.empty() && "Old group not empty on entry");
+
+  if (std::distance(Begin, End) <= 1)
+    return false;
+
+  MachineInstr *FirstMI = *Begin;
+  assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
+  const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
+  unsigned Alignment = FirstMMO.getAlignment();
+  unsigned SizeAccum = FirstMMO.getSize();
+  unsigned FirstOffset = getStoreOffset(FirstMI);
+
+  // The initial value of SizeAccum should always be a power of 2.
+  assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
+
+  // If the size of the first store equals to or exceeds the limit, do nothing.
+  if (SizeAccum >= MaxSize)
+    return false;
+
+  // If the size of the first store is greater than or equal to the address
+  // stored to, then the store cannot be made any wider.
+  if (SizeAccum >= Alignment)
+    return false;
+
+  // The offset of a store will put restrictions on how wide the store can be.
+  // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
+  // If the first store already exhausts the offset limits, quit.  Test this
+  // by checking if the next wider size would exceed the limit.
+  if ((2*SizeAccum-1) & FirstOffset)
+    return false;
+
+  OG.push_back(FirstMI);
+  MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
+  InstrGroup::iterator I = Begin+1;
+
+  // Pow2Num will be the largest number of elements in OG such that the sum
+  // of sizes of stores 0...Pow2Num-1 will be a power of 2.
+  unsigned Pow2Num = 1;
+  unsigned Pow2Size = SizeAccum;
+
+  // Be greedy: keep accumulating stores as long as they are to adjacent
+  // memory locations, and as long as the total number of bytes stored
+  // does not exceed the limit (MaxSize).
+  // Keep track of when the total size covered is a power of 2, since
+  // this is a size a single store can cover.
+  while (I != End) {
+    S2 = *I;
+    // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
+    // any other store to fill the "hole".
+    if (!storesAreAdjacent(S1, S2))
+      break;
+
+    unsigned S2Size = getStoreTarget(S2).getSize();
+    if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
+      break;
+
+    OG.push_back(S2);
+    SizeAccum += S2Size;
+    if (isPowerOf2_32(SizeAccum)) {
+      Pow2Num = OG.size();
+      Pow2Size = SizeAccum;
+    }
+    if ((2*Pow2Size-1) & FirstOffset)
+      break;
+
+    S1 = S2;
+    ++I;
+  }
+
+  // The stores don't add up to anything that can be widened.  Clean up.
+  if (Pow2Num <= 1) {
+    OG.clear();
+    return false;
+  }
+
+  // Only leave the stored being widened.
+  OG.resize(Pow2Num);
+  TotalSize = Pow2Size;
+  return true;
+}
+
+
+/// Given an "old group" OG of stores, create a "new group" NG of instructions
+/// to replace them.  Ideally, NG would only have a single instruction in it,
+/// but that may only be possible for store-immediate.
+bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
+      unsigned TotalSize) {
+  // XXX Current limitations:
+  // - only expect stores of immediate values in OG,
+  // - only handle a TotalSize of up to 4.
+
+  if (TotalSize > 4)
+    return false;
+
+  unsigned Acc = 0;  // Value accumulator.
+  unsigned Shift = 0;
+
+  for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+    const MachineMemOperand &MMO = getStoreTarget(MI);
+    MachineOperand &SO = MI->getOperand(2);  // Source.
+    assert(SO.isImm() && "Expecting an immediate operand");
+
+    unsigned NBits = MMO.getSize()*8;
+    unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
+    unsigned Val = (SO.getImm() & Mask) << Shift;
+    Acc |= Val;
+    Shift += NBits;
+  }
+
+
+  MachineInstr *FirstSt = OG.front();
+  DebugLoc DL = OG.back()->getDebugLoc();
+  const MachineMemOperand &OldM = getStoreTarget(FirstSt);
+  MachineMemOperand *NewM =
+    MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
+                             TotalSize, OldM.getAlignment(),
+                             OldM.getAAInfo());
+
+  if (Acc < 0x10000) {
+    // Create mem[hw] = #Acc
+    unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
+                    (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
+    assert(WOpc && "Unexpected size");
+
+    int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
+    const MCInstrDesc &StD = TII->get(WOpc);
+    MachineOperand &MR = FirstSt->getOperand(0);
+    int64_t Off = FirstSt->getOperand(1).getImm();
+    MachineInstr *StI = BuildMI(*MF, DL, StD)
+                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+                          .addImm(Off)
+                          .addImm(Val);
+    StI->addMemOperand(*MF, NewM);
+    NG.push_back(StI);
+  } else {
+    // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
+    const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
+    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
+    unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
+    MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
+                           .addImm(int(Acc));
+    NG.push_back(TfrI);
+
+    unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
+                    (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
+    assert(WOpc && "Unexpected size");
+
+    const MCInstrDesc &StD = TII->get(WOpc);
+    MachineOperand &MR = FirstSt->getOperand(0);
+    int64_t Off = FirstSt->getOperand(1).getImm();
+    MachineInstr *StI = BuildMI(*MF, DL, StD)
+                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
+                          .addImm(Off)
+                          .addReg(VReg, RegState::Kill);
+    StI->addMemOperand(*MF, NewM);
+    NG.push_back(StI);
+  }
+
+  return true;
+}
+
+
+// Replace instructions from the old group OG with instructions from the
+// new group NG.  Conceptually, remove all instructions in OG, and then
+// insert all instructions in NG, starting at where the first instruction
+// from OG was (in the order in which they appeared in the basic block).
+// (The ordering in OG does not have to match the order in the basic block.)
+bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
+  DEBUG({
+    dbgs() << "Replacing:\n";
+    for (auto I : OG)
+      dbgs() << "  " << *I;
+    dbgs() << "with\n";
+    for (auto I : NG)
+      dbgs() << "  " << *I;
+  });
+
+  MachineBasicBlock *MBB = OG.back()->getParent();
+  MachineBasicBlock::iterator InsertAt = MBB->end();
+
+  // Need to establish the insertion point.  The best one is right before
+  // the first store in the OG, but in the order in which the stores occur
+  // in the program list.  Since the ordering in OG does not correspond
+  // to the order in the program list, we need to do some work to find
+  // the insertion point.
+
+  // Create a set of all instructions in OG (for quick lookup).
+  SmallPtrSet<MachineInstr*, 4> InstrSet;
+  for (auto I : OG)
+    InstrSet.insert(I);
+
+  // Traverse the block, until we hit an instruction from OG.
+  for (auto &I : *MBB) {
+    if (InstrSet.count(&I)) {
+      InsertAt = I;
+      break;
+    }
+  }
+
+  assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
+
+  bool AtBBStart = false;
+
+  // InsertAt points at the first instruction that will be removed.  We need
+  // to move it out of the way, so it remains valid after removing all the
+  // old stores, and so we are able to recover it back to the proper insertion
+  // position.
+  if (InsertAt != MBB->begin())
+    --InsertAt;
+  else
+    AtBBStart = true;
+
+  for (auto I : OG)
+    I->eraseFromParent();
+
+  if (!AtBBStart)
+    ++InsertAt;
+  else
+    InsertAt = MBB->begin();
+
+  for (auto I : NG)
+    MBB->insert(InsertAt, I);
+
+  return true;
+}
+
+
+// Break up the group into smaller groups, each of which can be replaced by
+// a single wide store.  Widen each such smaller group and replace the old
+// instructions with the widened ones.
+bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
+  bool Changed = false;
+  InstrGroup::iterator I = Group.begin(), E = Group.end();
+  InstrGroup OG, NG;   // Old and new groups.
+  unsigned CollectedSize;
+
+  while (I != E) {
+    OG.clear();
+    NG.clear();
+
+    bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
+                createWideStores(OG, NG, CollectedSize)              &&
+                replaceStores(OG, NG);
+    if (!Succ)
+      continue;
+
+    assert(OG.size() > 1 && "Created invalid group");
+    assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
+    I += OG.size()-1;
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+// Process a single basic block: create the store groups, and replace them
+// with the widened stores, if possible.  Processing of each basic block
+// is independent from processing of any other basic block.  This transfor-
+// mation could be stopped after having processed any basic block without
+// any ill effects (other than not having performed widening in the unpro-
+// cessed blocks).  Also, the basic blocks can be processed in any order.
+bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
+  InstrGroupList SGs;
+  bool Changed = false;
+
+  createStoreGroups(MBB, SGs);
+
+  auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
+    return getStoreOffset(A) < getStoreOffset(B);
+  };
+  for (auto &G : SGs) {
+    assert(G.size() > 1 && "Store group with fewer than 2 elements");
+    std::sort(G.begin(), G.end(), Less);
+
+    Changed |= processStoreGroup(G);
+  }
+
+  return Changed;
+}
+
+
+bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
+  MF = &MFn;
+  auto &ST = MFn.getSubtarget<HexagonSubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  MRI = &MFn.getRegInfo();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+  bool Changed = false;
+
+  for (auto &B : MFn)
+    Changed |= processBasicBlock(B);
+
+  return Changed;
+}
+
+
+FunctionPass *llvm::createHexagonStoreWidening() {
+  return new HexagonStoreWidening();
+}
+
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index cd482b3e3af1..aa0efd4f65e0 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -16,6 +16,8 @@
 #include "HexagonRegisterInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <map>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "hexagon-subtarget"
@@ -24,49 +26,65 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_TARGET_DESC
 #include "HexagonGenSubtargetInfo.inc"
 
-static cl::opt<bool>
-EnableV3("enable-hexagon-v3", cl::Hidden,
-         cl::desc("Enable Hexagon V3 instructions."));
+static cl::opt<bool> EnableMemOps("enable-hexagon-memops",
+  cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
+  cl::desc("Generate V4 MEMOP in code generation for Hexagon target"));
 
-static cl::opt<bool>
-EnableMemOps(
-    "enable-hexagon-memops",
-    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(true),
-    cl::desc(
-      "Generate V4 MEMOP in code generation for Hexagon target"));
+static cl::opt<bool> DisableMemOps("disable-hexagon-memops",
+  cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
+  cl::desc("Do not generate V4 MEMOP in code generation for Hexagon target"));
 
-static cl::opt<bool>
-DisableMemOps(
-    "disable-hexagon-memops",
-    cl::Hidden, cl::ZeroOrMore, cl::ValueDisallowed, cl::init(false),
-    cl::desc(
-      "Do not generate V4 MEMOP in code generation for Hexagon target"));
+static cl::opt<bool> EnableIEEERndNear("enable-hexagon-ieee-rnd-near",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Generate non-chopped conversion from fp to int."));
 
-static cl::opt<bool>
-EnableIEEERndNear(
-    "enable-hexagon-ieee-rnd-near",
-    cl::Hidden, cl::ZeroOrMore, cl::init(false),
-    cl::desc("Generate non-chopped conversion from fp to int."));
+static cl::opt<bool> EnableBSBSched("enable-bsb-sched",
+  cl::Hidden, cl::ZeroOrMore, cl::init(true));
+
+static cl::opt<bool> EnableHexagonHVXDouble("enable-hexagon-hvx-double",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon Double Vector eXtensions"));
+
+static cl::opt<bool> EnableHexagonHVX("enable-hexagon-hvx",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon Vector eXtensions"));
 
 static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
-      cl::Hidden, cl::ZeroOrMore, cl::init(false),
-      cl::desc("Disable Hexagon MI Scheduling"));
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon MI Scheduling"));
+
+void HexagonSubtarget::initializeEnvironment() {
+  UseMemOps = false;
+  ModeIEEERndNear = false;
+  UseBSBScheduling = false;
+}
 
 HexagonSubtarget &
 HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
-  // If the programmer has not specified a Hexagon version, default to -mv4.
-  if (CPUString.empty())
-    CPUString = "hexagonv4";
+  CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
 
-  if (CPUString == "hexagonv4") {
-    HexagonArchVersion = V4;
-  } else if (CPUString == "hexagonv5") {
-    HexagonArchVersion = V5;
-  } else {
+  static std::map<StringRef, HexagonArchEnum> CpuTable {
+    { "hexagonv4", V4 },
+    { "hexagonv5", V5 },
+    { "hexagonv55", V55 },
+    { "hexagonv60", V60 },
+  };
+
+  auto foundIt = CpuTable.find(CPUString);
+  if (foundIt != CpuTable.end())
+    HexagonArchVersion = foundIt->second;
+  else
     llvm_unreachable("Unrecognized Hexagon processor version");
-  }
 
+  UseHVXOps = false;
+  UseHVXDblOps = false;
   ParseSubtargetFeatures(CPUString, FS);
+
+  if (EnableHexagonHVX.getPosition())
+    UseHVXOps = EnableHexagonHVX;
+  if (EnableHexagonHVXDouble.getPosition())
+    UseHVXDblOps = EnableHexagonHVXDouble;
+
   return *this;
 }
 
@@ -76,6 +94,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
       FrameLowering() {
 
+  initializeEnvironment();
+
   // Initialize scheduling itinerary for the specified CPU.
   InstrItins = getInstrItineraryForCPU(CPUString);
 
@@ -91,6 +111,8 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
     ModeIEEERndNear = true;
   else
     ModeIEEERndNear = false;
+
+  UseBSBScheduling = hasV60TOps() && EnableBSBSched;
 }
 
 // Pin the vtable to this file.
diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h
index 34cdad786f82..c7ae139c4346 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/lib/Target/Hexagon/HexagonSubtarget.h
@@ -34,15 +34,19 @@ namespace llvm {
 class HexagonSubtarget : public HexagonGenSubtargetInfo {
   virtual void anchor();
 
-  bool UseMemOps;
+  bool UseMemOps, UseHVXOps, UseHVXDblOps;
   bool ModeIEEERndNear;
 
 public:
   enum HexagonArchEnum {
-    V4, V5
+    V4, V5, V55, V60
   };
 
   HexagonArchEnum HexagonArchVersion;
+  /// True if the target should use Back-Skip-Back scheduling. This is the
+  /// default for V60.
+  bool UseBSBScheduling;
+
 private:
   std::string CPUString;
   HexagonInstrInfo InstrInfo;
@@ -50,6 +54,7 @@ private:
   HexagonSelectionDAGInfo TSInfo;
   HexagonFrameLowering FrameLowering;
   InstrItineraryData InstrItins;
+  void initializeEnvironment();
 
 public:
   HexagonSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
@@ -84,7 +89,16 @@ public:
   bool useMemOps() const { return UseMemOps; }
   bool hasV5TOps() const { return getHexagonArchVersion() >= V5; }
   bool hasV5TOpsOnly() const { return getHexagonArchVersion() == V5; }
+  bool hasV55TOps() const { return getHexagonArchVersion() >= V55; }
+  bool hasV55TOpsOnly() const { return getHexagonArchVersion() == V55; }
+  bool hasV60TOps() const { return getHexagonArchVersion() >= V60; }
+  bool hasV60TOpsOnly() const { return getHexagonArchVersion() == V60; }
   bool modeIEEERndNear() const { return ModeIEEERndNear; }
+  bool useHVXOps() const { return UseHVXOps; }
+  bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
+  bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+
+  bool useBSBScheduling() const { return UseBSBScheduling; }
   bool enableMachineScheduler() const override;
   // Always use the TargetLowering default scheduler.
   // FIXME: This will use the vliw scheduler which is probably just hurting
@@ -98,7 +112,7 @@ public:
     return Hexagon_SMALL_DATA_THRESHOLD;
   }
   const HexagonArchEnum &getHexagonArchVersion() const {
-    return  HexagonArchVersion;
+    return HexagonArchVersion;
   }
 };
 
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b50442969a29..9dccd696c989 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -16,12 +16,12 @@
 #include "HexagonISelLowering.h"
 #include "HexagonMachineScheduler.h"
 #include "HexagonTargetObjectFile.h"
+#include "HexagonTargetTransformInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
@@ -33,10 +33,16 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
   cl::Hidden, cl::ZeroOrMore, cl::init(false),
   cl::desc("Disable Hexagon CFG Optimization"));
 
+static cl::opt<bool> DisableStoreWidening("disable-store-widen",
+  cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
+
 static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
   cl::init(true), cl::Hidden, cl::ZeroOrMore,
   cl::desc("Early expansion of MUX"));
 
+static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
+  cl::ZeroOrMore, cl::desc("Enable early if-conversion"));
+
 static cl::opt<bool> EnableGenInsert("hexagon-insert", cl::init(true),
   cl::Hidden, cl::desc("Generate \"insert\" instructions"));
 
@@ -46,10 +52,22 @@ static cl::opt<bool> EnableCommGEP("hexagon-commgep", cl::init(true),
 static cl::opt<bool> EnableGenExtract("hexagon-extract", cl::init(true),
   cl::Hidden, cl::desc("Generate \"extract\" instructions"));
 
+static cl::opt<bool> EnableGenMux("hexagon-mux", cl::init(true), cl::Hidden,
+  cl::desc("Enable converting conditional transfers into MUX instructions"));
+
 static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
   cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
   "predicate instructions"));
 
+static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
+  cl::desc("Disable splitting double registers"));
+
+static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
+  cl::Hidden, cl::desc("Bit simplification"));
+
+static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
+  cl::Hidden, cl::desc("Loop rescheduling"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -72,23 +90,30 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
 namespace llvm {
+  FunctionPass *createHexagonBitSimplify();
+  FunctionPass *createHexagonCallFrameInformation();
   FunctionPass *createHexagonCFGOptimizer();
   FunctionPass *createHexagonCommonGEP();
   FunctionPass *createHexagonCopyToCombine();
+  FunctionPass *createHexagonEarlyIfConversion();
   FunctionPass *createHexagonExpandCondsets();
   FunctionPass *createHexagonExpandPredSpillCode();
   FunctionPass *createHexagonFixupHwLoops();
   FunctionPass *createHexagonGenExtract();
   FunctionPass *createHexagonGenInsert();
+  FunctionPass *createHexagonGenMux();
   FunctionPass *createHexagonGenPredicate();
   FunctionPass *createHexagonHardwareLoops();
   FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
                                      CodeGenOpt::Level OptLevel);
+  FunctionPass *createHexagonLoopRescheduling();
   FunctionPass *createHexagonNewValueJump();
+  FunctionPass *createHexagonOptimizeSZextends();
   FunctionPass *createHexagonPacketizer();
   FunctionPass *createHexagonPeephole();
-  FunctionPass *createHexagonRemoveExtendArgs(const HexagonTargetMachine &TM);
   FunctionPass *createHexagonSplitConst32AndConst64();
+  FunctionPass *createHexagonSplitDoubleRegs();
+  FunctionPass *createHexagonStoreWidening();
 } // end namespace llvm;
 
 /// HexagonTargetMachine ctor - Create an ILP32 architecture model.
@@ -101,13 +126,46 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
                                            const TargetOptions &Options,
                                            Reloc::Model RM, CodeModel::Model CM,
                                            CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, "e-m:e-p:32:32-i1:32-i64:64-a:0-n32", TT, CPU, FS,
-                        Options, RM, CM, OL),
-      TLOF(make_unique<HexagonTargetObjectFile>()),
-      Subtarget(TT, CPU, FS, *this) {
-    initAsmInfo();
+    : LLVMTargetMachine(T, "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-"
+                        "i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-"
+                        "n16:32", TT, CPU, FS, Options, RM, CM, OL),
+      TLOF(make_unique<HexagonTargetObjectFile>()) {
+  initAsmInfo();
 }
 
+const HexagonSubtarget *
+HexagonTargetMachine::getSubtargetImpl(const Function &F) const {
+  AttributeSet FnAttrs = F.getAttributes();
+  Attribute CPUAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+  Attribute FSAttr =
+      FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
+
+  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
+                        ? CPUAttr.getValueAsString().str()
+                        : TargetCPU;
+  std::string FS = !FSAttr.hasAttribute(Attribute::None)
+                       ? FSAttr.getValueAsString().str()
+                       : TargetFS;
+
+  auto &I = SubtargetMap[CPU + FS];
+  if (!I) {
+    // This needs to be done before we create a new subtarget since any
+    // creation will depend on the TM and the code generation flags on the
+    // function that reside in TargetOptions.
+    resetTargetOptions(F);
+    I = llvm::make_unique<HexagonSubtarget>(TargetTriple, CPU, FS, *this);
+  }
+  return I.get();
+}
+
+TargetIRAnalysis HexagonTargetMachine::getTargetIRAnalysis() {
+  return TargetIRAnalysis([this](const Function &F) {
+    return TargetTransformInfo(HexagonTTIImpl(this, F));
+  });
+}
+
+
 HexagonTargetMachine::~HexagonTargetMachine() {}
 
 namespace {
@@ -166,7 +224,7 @@ bool HexagonPassConfig::addInstSelector() {
   bool NoOpt = (getOptLevel() == CodeGenOpt::None);
 
   if (!NoOpt)
-    addPass(createHexagonRemoveExtendArgs(TM));
+    addPass(createHexagonOptimizeSZextends());
 
   addPass(createHexagonISelDag(TM, getOptLevel()));
 
@@ -174,19 +232,33 @@ bool HexagonPassConfig::addInstSelector() {
     // Create logical operations on predicate registers.
     if (EnableGenPred)
       addPass(createHexagonGenPredicate(), false);
+    // Rotate loops to expose bit-simplification opportunities.
+    if (EnableLoopResched)
+      addPass(createHexagonLoopRescheduling(), false);
+    // Split double registers.
+    if (!DisableHSDR)
+      addPass(createHexagonSplitDoubleRegs());
+    // Bit simplification.
+    if (EnableBitSimplify)
+      addPass(createHexagonBitSimplify(), false);
     addPass(createHexagonPeephole());
     printAndVerify("After hexagon peephole pass");
     if (EnableGenInsert)
       addPass(createHexagonGenInsert(), false);
+    if (EnableEarlyIf)
+      addPass(createHexagonEarlyIfConversion(), false);
   }
 
   return false;
 }
 
 void HexagonPassConfig::addPreRegAlloc() {
-  if (getOptLevel() != CodeGenOpt::None)
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (!DisableStoreWidening)
+      addPass(createHexagonStoreWidening(), false);
     if (!DisableHardwareLoops)
       addPass(createHexagonHardwareLoops(), false);
+  }
 }
 
 void HexagonPassConfig::addPostRegAlloc() {
@@ -215,6 +287,13 @@ void HexagonPassConfig::addPreEmitPass() {
   if (!NoOpt) {
     if (!DisableHardwareLoops)
       addPass(createHexagonFixupHwLoops(), false);
+    // Generate MUX from pairs of conditional transfers.
+    if (EnableGenMux)
+      addPass(createHexagonGenMux(), false);
+
     addPass(createHexagonPacketizer(), false);
   }
+
+  // Add CFI instructions if necessary.
+  addPass(createHexagonCallFrameInformation(), false);
 }
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index 115eadb98c33..968814b3ea32 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -16,6 +16,7 @@
 
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
+#include "HexagonTargetObjectFile.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -24,7 +25,7 @@ class Module;
 
 class HexagonTargetMachine : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  HexagonSubtarget Subtarget;
+  mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap;
 
 public:
   HexagonTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
@@ -32,20 +33,18 @@ public:
                        Reloc::Model RM, CodeModel::Model CM,
                        CodeGenOpt::Level OL);
   ~HexagonTargetMachine() override;
-  const HexagonSubtarget *getSubtargetImpl(const Function &) const override {
-    return &Subtarget;
-  }
+  const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
+
   static unsigned getModuleMatchQuality(const Module &M);
 
   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+  TargetIRAnalysis getTargetIRAnalysis() override;
 
-  TargetLoweringObjectFile *getObjFileLowering() const override {
-    return TLOF.get();
+  HexagonTargetObjectFile *getObjFileLowering() const override {
+    return static_cast<HexagonTargetObjectFile*>(TLOF.get());
   }
 };
 
-extern bool flag_aligned_memcpy;
-
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 4ea0e0d11998..ccca62021f5b 100644
--- a/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -73,9 +73,10 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
   if (!GVA)
     return false;
 
-  if (Kind.isBSS() || Kind.isDataNoRel() || Kind.isCommon()) {
+  if (Kind.isBSS() || Kind.isData() || Kind.isCommon()) {
     Type *Ty = GV->getType()->getElementType();
-    return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+    return IsInSmallSection(
+        GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
   }
 
   return false;
@@ -89,7 +90,7 @@ HexagonTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
   // Handle Small Section classification here.
   if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallBSSSection;
-  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallDataSection;
 
   // Otherwise, we work the same as ELF.
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
new file mode 100644
index 000000000000..a05443eb83b8
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -0,0 +1,38 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetTransformInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagontti"
+
+TargetTransformInfo::PopcntSupportKind
+HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
+  // Return Fast Hardware support as every input  < 64 bits will be promoted
+  // to 64 bits.
+  return TargetTransformInfo::PSK_FastHardware;
+}
+
+// The Hexagon target can unroll loops with run-time trip counts.
+void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
+                                             TTI::UnrollingPreferences &UP) {
+  UP.Runtime = UP.Partial = true;
+}
+
+unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
+  return vector ? 0 : 32;
+}
diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
new file mode 100644
index 000000000000..71ae17a19e5f
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -0,0 +1,70 @@
+//===-- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// Hexagon target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
+
+#include "Hexagon.h"
+#include "HexagonTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
+  typedef BasicTTIImplBase<HexagonTTIImpl> BaseT;
+  typedef TargetTransformInfo TTI;
+  friend BaseT;
+
+  const HexagonSubtarget *ST;
+  const HexagonTargetLowering *TLI;
+
+  const HexagonSubtarget *getST() const { return ST; }
+  const HexagonTargetLowering *getTLI() const { return TLI; }
+
+public:
+  explicit HexagonTTIImpl(const HexagonTargetMachine *TM, const Function &F)
+      : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+        TLI(ST->getTargetLowering()) {}
+
+  // Provide value semantics. MSVC requires that we spell all of these out.
+  HexagonTTIImpl(const HexagonTTIImpl &Arg)
+      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
+  HexagonTTIImpl(HexagonTTIImpl &&Arg)
+      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
+        TLI(std::move(Arg.TLI)) {}
+
+  /// \name Scalar TTI Implementations
+  /// @{
+
+  TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
+
+  // The Hexagon target can unroll loops with run-time trip counts.
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
+  /// @}
+
+  /// \name Vector TTI Implementations
+  /// @{
+
+  unsigned getNumberOfRegisters(bool vector) const;
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index b91a3f6f8c6c..81850548bb6e 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,35 +16,19 @@
 // prune the dependence.
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/DFAPacketizer.h"
-#include "Hexagon.h"
-#include "HexagonMachineFunctionInfo.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "HexagonVLIWPacketizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
 #include <map>
 #include <vector>
 
@@ -52,9 +36,22 @@ using namespace llvm;
 
 #define DEBUG_TYPE "packets"
 
+static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
+  cl::ZeroOrMore, cl::init(false),
+  cl::desc("Disable Hexagon packetizer pass"));
+
 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
-      cl::ZeroOrMore, cl::Hidden, cl::init(true),
-      cl::desc("Allow non-solo packetization of volatile memory references"));
+  cl::ZeroOrMore, cl::Hidden, cl::init(true),
+  cl::desc("Allow non-solo packetization of volatile memory references"));
+
+static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
+
+static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
+  cl::init(false), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Disable vector double new-value-stores"));
+
+extern cl::opt<bool> ScheduleInlineAsm;
 
 namespace llvm {
   FunctionPass *createHexagonPacketizer();
@@ -64,7 +61,6 @@ namespace llvm {
 
 namespace {
   class HexagonPacketizer : public MachineFunctionPass {
-
   public:
     static char ID;
     HexagonPacketizer() : MachineFunctionPass(ID) {
@@ -73,103 +69,25 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
-      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<MachineBranchProbabilityInfo>();
-      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-
     const char *getPassName() const override {
       return "Hexagon Packetizer";
     }
-
     bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  private:
+    const HexagonInstrInfo *HII;
+    const HexagonRegisterInfo *HRI;
   };
+
   char HexagonPacketizer::ID = 0;
-
-  class HexagonPacketizerList : public VLIWPacketizerList {
-
-  private:
-
-    // Has the instruction been promoted to a dot-new instruction.
-    bool PromotedToDotNew;
-
-    // Has the instruction been glued to allocframe.
-    bool GlueAllocframeStore;
-
-    // Has the feeder instruction been glued to new value jump.
-    bool GlueToNewValueJump;
-
-    // Check if there is a dependence between some instruction already in this
-    // packet and this instruction.
-    bool Dependence;
-
-    // Only check for dependence if there are resources available to
-    // schedule this instruction.
-    bool FoundSequentialDependence;
-
-    /// \brief A handle to the branch probability pass.
-   const MachineBranchProbabilityInfo *MBPI;
-
-   // Track MIs with ignored dependece.
-   std::vector<MachineInstr*> IgnoreDepMIs;
-
-  public:
-    // Ctor.
-    HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
-                          const MachineBranchProbabilityInfo *MBPI);
-
-    // initPacketizerState - initialize some internal flags.
-    void initPacketizerState() override;
-
-    // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-    bool ignorePseudoInstruction(MachineInstr *MI,
-                                 MachineBasicBlock *MBB) override;
-
-    // isSoloInstruction - return true if instruction MI can not be packetized
-    // with any other instruction, which means that MI itself is a packet.
-    bool isSoloInstruction(MachineInstr *MI) override;
-
-    // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
-    // together.
-    bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
-
-    // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
-    // and SUJ.
-    bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
-
-    MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
-  private:
-    bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
-    bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
-                         MachineBasicBlock::iterator &MII,
-                         const TargetRegisterClass* RC);
-    bool CanPromoteToDotNew(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
-                            const std::map<MachineInstr *, SUnit *> &MIToSUnit,
-                            MachineBasicBlock::iterator &MII,
-                            const TargetRegisterClass *RC);
-    bool
-    CanPromoteToNewValue(MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
-                         const std::map<MachineInstr *, SUnit *> &MIToSUnit,
-                         MachineBasicBlock::iterator &MII);
-    bool CanPromoteToNewValueStore(
-        MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
-        const std::map<MachineInstr *, SUnit *> &MIToSUnit);
-    bool DemoteToDotOld(MachineInstr *MI);
-    bool ArePredicatesComplements(
-        MachineInstr *MI1, MachineInstr *MI2,
-        const std::map<MachineInstr *, SUnit *> &MIToSUnit);
-    bool RestrictingDepExistInPacket(MachineInstr *, unsigned,
-                                     const std::map<MachineInstr *, SUnit *> &);
-    bool isNewifiable(MachineInstr* MI);
-    bool isCondInst(MachineInstr* MI);
-    bool tryAllocateResourcesForConstExt(MachineInstr* MI);
-    bool canReserveResourcesForConstExt(MachineInstr *MI);
-    void reserveResourcesForConstExt(MachineInstr* MI);
-    bool isNewValueInst(MachineInstr* MI);
-  };
 }
 
 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
@@ -177,26 +95,93 @@ INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
                     false, false)
 
 
-// HexagonPacketizerList Ctor.
-HexagonPacketizerList::HexagonPacketizerList(
-    MachineFunction &MF, MachineLoopInfo &MLI,
-    const MachineBranchProbabilityInfo *MBPI)
-    : VLIWPacketizerList(MF, MLI, true) {
-  this->MBPI = MBPI;
+HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
+      MachineLoopInfo &MLI, AliasAnalysis *AA,
+      const MachineBranchProbabilityInfo *MBPI)
+    : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
 }
 
-bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
-  const TargetInstrInfo *TII = Fn.getSubtarget().getInstrInfo();
-  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
-  const MachineBranchProbabilityInfo *MBPI =
-    &getAnalysis<MachineBranchProbabilityInfo>();
+// Check if FirstI modifies a register that SecondI reads.
+static bool hasWriteToReadDep(const MachineInstr *FirstI,
+      const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
+  for (auto &MO : FirstI->operands()) {
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (SecondI->readsRegister(R, TRI))
+      return true;
+  }
+  return false;
+}
+
+
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+      MachineBasicBlock::iterator BundleIt, bool Before) {
+  MachineBasicBlock::instr_iterator InsertPt;
+  if (Before)
+    InsertPt = BundleIt.getInstrIterator();
+  else
+    InsertPt = std::next(BundleIt).getInstrIterator();
+
+  MachineBasicBlock &B = *MI->getParent();
+  // The instruction should at least be bundled with the preceding instruction
+  // (there will always be one, i.e. BUNDLE, if nothing else).
+  assert(MI->isBundledWithPred());
+  if (MI->isBundledWithSucc()) {
+    MI->clearFlag(MachineInstr::BundledSucc);
+    MI->clearFlag(MachineInstr::BundledPred);
+  } else {
+    // If it's not bundled with the successor (i.e. it is the last one
+    // in the bundle), then we can simply unbundle it from the predecessor,
+    // which will take care of updating the predecessor's flag.
+    MI->unbundleFromPred();
+  }
+  B.splice(InsertPt, &B, MI);
+
+  // Get the size of the bundle without asserting.
+  MachineBasicBlock::const_instr_iterator I(BundleIt);
+  MachineBasicBlock::const_instr_iterator E = B.instr_end();
+  unsigned Size = 0;
+  for (++I; I != E && I->isBundledWithPred(); ++I)
+    ++Size;
+
+  // If there are still two or more instructions, then there is nothing
+  // else to be done.
+  if (Size > 1)
+    return BundleIt;
+
+  // Otherwise, extract the single instruction out and delete the bundle.
+  MachineBasicBlock::iterator NextIt = std::next(BundleIt);
+  MachineInstr *SingleI = BundleIt->getNextNode();
+  SingleI->unbundleFromPred();
+  assert(!SingleI->isBundledWithSucc());
+  BundleIt->eraseFromParent();
+  return NextIt;
+}
+
+
+bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePacketizer)
+    return false;
+
+  HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+  auto &MLI = getAnalysis<MachineLoopInfo>();
+  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
+
+  if (EnableGenAllInsnClass)
+    HII->genAllInsnTimingClasses(MF);
+
   // Instantiate the packetizer.
-  HexagonPacketizerList Packetizer(Fn, MLI, MBPI);
+  HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
 
   // DFA state table should not be empty.
   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
@@ -211,162 +196,107 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &Fn) {
   // dependence between Insn 0 and Insn 2. This can lead to incorrect
   // packetization
   //
-  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
-       MBB != MBBe; ++MBB) {
-    MachineBasicBlock::iterator End = MBB->end();
-    MachineBasicBlock::iterator MI = MBB->begin();
+  for (auto &MB : MF) {
+    auto End = MB.end();
+    auto MI = MB.begin();
     while (MI != End) {
+      auto NextI = std::next(MI);
       if (MI->isKill()) {
-        MachineBasicBlock::iterator DeleteMI = MI;
-        ++MI;
-        MBB->erase(DeleteMI);
-        End = MBB->end();
-        continue;
+        MB.erase(MI);
+        End = MB.end();
       }
-      ++MI;
+      MI = NextI;
     }
   }
 
   // Loop over all of the basic blocks.
-  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
-       MBB != MBBe; ++MBB) {
-    // Find scheduling regions and schedule / packetize each region.
-    unsigned RemainingCount = MBB->size();
-    for(MachineBasicBlock::iterator RegionEnd = MBB->end();
-        RegionEnd != MBB->begin();) {
-      // The next region starts above the previous region. Look backward in the
-      // instruction stream until we find the nearest boundary.
-      MachineBasicBlock::iterator I = RegionEnd;
-      for(;I != MBB->begin(); --I, --RemainingCount) {
-        if (TII->isSchedulingBoundary(std::prev(I), MBB, Fn))
-          break;
-      }
-      I = MBB->begin();
+  for (auto &MB : MF) {
+    auto Begin = MB.begin(), End = MB.end();
+    while (Begin != End) {
+      // First the first non-boundary starting from the end of the last
+      // scheduling region.
+      MachineBasicBlock::iterator RB = Begin;
+      while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
+        ++RB;
+      // First the first boundary starting from the beginning of the new
+      // region.
+      MachineBasicBlock::iterator RE = RB;
+      while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
+        ++RE;
+      // Add the scheduling boundary if it's not block end.
+      if (RE != End)
+        ++RE;
+      // If RB == End, then RE == End.
+      if (RB != End)
+        Packetizer.PacketizeMIs(&MB, RB, RE);
 
-      // Skip empty scheduling regions.
-      if (I == RegionEnd) {
-        RegionEnd = std::prev(RegionEnd);
-        --RemainingCount;
-        continue;
-      }
-      // Skip regions with one instruction.
-      if (I == std::prev(RegionEnd)) {
-        RegionEnd = std::prev(RegionEnd);
-        continue;
-      }
-
-      Packetizer.PacketizeMIs(MBB, I, RegionEnd);
-      RegionEnd = I;
+      Begin = RE;
     }
   }
 
+  Packetizer.unpacketizeSoloInstrs(MF);
   return true;
 }
 
 
-static bool IsIndirectCall(MachineInstr* MI) {
-  return MI->getOpcode() == Hexagon::J2_callr;
+// Reserve resources for a constant extender. Trigger an assertion if the
+// reservation fails.
+void HexagonPacketizerList::reserveResourcesForConstExt() {
+  if (!tryAllocateResourcesForConstExt(true))
+    llvm_unreachable("Resources not available");
 }
 
-// Reserve resources for constant extender. Trigure an assertion if
-// reservation fail.
-void HexagonPacketizerList::reserveResourcesForConstExt(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                 MI->getDebugLoc());
-
-  if (ResourceTracker->canReserveResources(PseudoMI)) {
-    ResourceTracker->reserveResources(PseudoMI);
-    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
-  } else {
-    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
-    llvm_unreachable("can not reserve resources for constant extender.");
-  }
-  return;
+bool HexagonPacketizerList::canReserveResourcesForConstExt() {
+  return tryAllocateResourcesForConstExt(false);
 }
 
-bool HexagonPacketizerList::canReserveResourcesForConstExt(MachineInstr *MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  assert((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
-         "Should only be called for constant extended instructions");
-  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                 MI->getDebugLoc());
-  bool CanReserve = ResourceTracker->canReserveResources(PseudoMI);
-  MF.DeleteMachineInstr(PseudoMI);
-  return CanReserve;
+// Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
+// return true, otherwise, return false.
+bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
+  auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
+  bool Avail = ResourceTracker->canReserveResources(ExtMI);
+  if (Reserve && Avail)
+    ResourceTracker->reserveResources(ExtMI);
+  MF.DeleteMachineInstr(ExtMI);
+  return Avail;
 }
 
-// Allocate resources (i.e. 4 bytes) for constant extender. If succeed, return
-// true, otherwise, return false.
-bool HexagonPacketizerList::tryAllocateResourcesForConstExt(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  MachineInstr *PseudoMI = MF.CreateMachineInstr(QII->get(Hexagon::A4_ext),
-                                                 MI->getDebugLoc());
 
-  if (ResourceTracker->canReserveResources(PseudoMI)) {
-    ResourceTracker->reserveResources(PseudoMI);
-    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
+bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+      SDep::Kind DepType, unsigned DepReg) {
+  // Check for LR dependence.
+  if (DepReg == HRI->getRARegister())
     return true;
-  } else {
-    MI->getParent()->getParent()->DeleteMachineInstr(PseudoMI);
-    return false;
-  }
-}
 
-
-bool HexagonPacketizerList::IsCallDependent(MachineInstr* MI,
-                                          SDep::Kind DepType,
-                                          unsigned DepReg) {
-
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  const HexagonRegisterInfo *QRI =
-      (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
-
-  // Check for lr dependence
-  if (DepReg == QRI->getRARegister()) {
-    return true;
-  }
-
-  if (QII->isDeallocRet(MI)) {
-    if (DepReg == QRI->getFrameRegister() ||
-        DepReg == QRI->getStackRegister())
+  if (HII->isDeallocRet(MI))
+    if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
       return true;
-  }
 
-  // Check if this is a predicate dependence
-  const TargetRegisterClass* RC = QRI->getMinimalPhysRegClass(DepReg);
-  if (RC == &Hexagon::PredRegsRegClass) {
+  // Check if this is a predicate dependence.
+  const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
+  if (RC == &Hexagon::PredRegsRegClass)
     return true;
-  }
 
-  //
-  // Lastly check for an operand used in an indirect call
-  // If we had an attribute for checking if an instruction is an indirect call,
-  // then we could have avoided this relatively brittle implementation of
-  // IsIndirectCall()
-  //
-  // Assumes that the first operand of the CALLr is the function address
-  //
-  if (IsIndirectCall(MI) && (DepType == SDep::Data)) {
+  // Assumes that the first operand of the CALLr is the function address.
+  if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
     MachineOperand MO = MI->getOperand(0);
-    if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) {
+    if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
       return true;
-    }
   }
 
   return false;
 }
 
-static bool IsRegDependence(const SDep::Kind DepType) {
-  return (DepType == SDep::Data || DepType == SDep::Anti ||
-          DepType == SDep::Output);
+static bool isRegDependence(const SDep::Kind DepType) {
+  return DepType == SDep::Data || DepType == SDep::Anti ||
+         DepType == SDep::Output;
 }
 
-static bool IsDirectJump(MachineInstr* MI) {
-  return (MI->getOpcode() == Hexagon::J2_jump);
+static bool isDirectJump(const MachineInstr* MI) {
+  return MI->getOpcode() == Hexagon::J2_jump;
 }
 
-static bool IsSchedBarrier(MachineInstr* MI) {
+static bool isSchedBarrier(const MachineInstr* MI) {
   switch (MI->getOpcode()) {
   case Hexagon::Y2_barrier:
     return true;
@@ -374,76 +304,127 @@ static bool IsSchedBarrier(MachineInstr* MI) {
   return false;
 }
 
-static bool IsControlFlow(MachineInstr* MI) {
+static bool isControlFlow(const MachineInstr* MI) {
   return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
 }
 
-static bool IsLoopN(MachineInstr *MI) {
-  return (MI->getOpcode() == Hexagon::J2_loop0i ||
-          MI->getOpcode() == Hexagon::J2_loop0r);
-}
 
-/// DoesModifyCalleeSavedReg - Returns true if the instruction modifies a
-/// callee-saved register.
-static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
+/// Returns true if the instruction modifies a callee-saved register.
+static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
                                      const TargetRegisterInfo *TRI) {
-  for (const MCPhysReg *CSR =
-           TRI->getCalleeSavedRegs(MI->getParent()->getParent());
-       *CSR; ++CSR) {
-    unsigned CalleeSavedReg = *CSR;
-    if (MI->modifiesRegister(CalleeSavedReg, TRI))
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
+    if (MI->modifiesRegister(*CSR, TRI))
       return true;
-  }
   return false;
 }
 
-// Returns true if an instruction can be promoted to .new predicate
-// or new-value store.
-bool HexagonPacketizerList::isNewifiable(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  return isCondInst(MI) || QII->mayBeNewStore(MI);
+// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+// Returns true if an instruction can be promoted to .new predicate or
+// new-value store.
+bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
+  return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
 }
 
-bool HexagonPacketizerList::isCondInst (MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  const MCInstrDesc& TID = MI->getDesc();
-                                    // bug 5670: until that is fixed,
-                                    // this portion is disabled.
-  if (   TID.isConditionalBranch()  // && !IsRegisterJump(MI)) ||
-      || QII->isConditionalTransfer(MI)
-      || QII->isConditionalALU32(MI)
-      || QII->isConditionalLoad(MI)
-      || QII->isConditionalStore(MI)) {
-    return true;
-  }
-  return false;
-}
-
-
-// Promote an instructiont to its .new form.
-// At this time, we have already made a call to CanPromoteToDotNew
-// and made sure that it can *indeed* be promoted.
-bool HexagonPacketizerList::PromoteToDotNew(MachineInstr* MI,
-                        SDep::Kind DepType, MachineBasicBlock::iterator &MII,
-                        const TargetRegisterClass* RC) {
-
-  assert (DepType == SDep::Data);
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
-  int NewOpcode;
-  if (RC == &Hexagon::PredRegsRegClass)
-    NewOpcode = QII->GetDotNewPredOp(MI, MBPI);
-  else
-    NewOpcode = QII->GetDotNewOp(MI);
-  MI->setDesc(QII->get(NewOpcode));
-
+// Promote an instructiont to its .cur form.
+// At this time, we have already made a call to canPromoteToDotCur and made
+// sure that it can *indeed* be promoted.
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
+  assert(DepType == SDep::Data);
+  int CurOpcode = HII->getDotCurOp(MI);
+  MI->setDesc(HII->get(CurOpcode));
   return true;
 }
 
-bool HexagonPacketizerList::DemoteToDotOld(MachineInstr* MI) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  int NewOpcode = QII->GetDotOldOp(MI->getOpcode());
-  MI->setDesc(QII->get(NewOpcode));
+void HexagonPacketizerList::cleanUpDotCur() {
+  MachineInstr *MI = NULL;
+  for (auto BI : CurrentPacketMIs) {
+    DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
+    if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
+      MI = BI;
+      continue;
+    }
+    if (MI) {
+      for (auto &MO : BI->operands())
+        if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
+          return;
+    }
+  }
+  if (!MI)
+    return;
+  // We did not find a use of the CUR, so de-cur it.
+  MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
+  DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
+}
+
+// Check to see if an instruction can be dot cur.
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass *RC) {
+  if (!HII->isV60VectorInstruction(MI))
+    return false;
+  if (!HII->isV60VectorInstruction(MII))
+    return false;
+
+  // Already a dot new instruction.
+  if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
+    return false;
+
+  if (!HII->mayBeCurLoad(MI))
+    return false;
+
+  // The "cur value" cannot come from inline asm.
+  if (PacketSU->getInstr()->isInlineAsm())
+    return false;
+
+  // Make sure candidate instruction uses cur.
+  DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
+        MI->dump();
+        dbgs() << "in packet\n";);
+  MachineInstr *MJ = MII;
+  DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
+  unsigned DestReg = MI->getOperand(0).getReg();
+  bool FoundMatch = false;
+  for (auto &MO : MJ->operands())
+    if (MO.isReg() && MO.getReg() == DestReg)
+      FoundMatch = true;
+  if (!FoundMatch)
+    return false;
+
+  // Check for existing uses of a vector register within the packet which
+  // would be affected by converting a vector load into .cur formt.
+  for (auto BI : CurrentPacketMIs) {
+    DEBUG(dbgs() << "packet has "; BI->dump(););
+    if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+  // We can convert the opcode into a .cur.
+  return true;
+}
+
+// Promote an instruction to its .new form. At this time, we have already
+// made a call to canPromoteToDotNew and made sure that it can *indeed* be
+// promoted.
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+      SDep::Kind DepType, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
+  assert (DepType == SDep::Data);
+  int NewOpcode;
+  if (RC == &Hexagon::PredRegsRegClass)
+    NewOpcode = HII->getDotNewPredOp(MI, MBPI);
+  else
+    NewOpcode = HII->getDotNewOp(MI);
+  MI->setDesc(HII->get(NewOpcode));
+  return true;
+}
+
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
+  int NewOpcode = HII->getDotOldOp(MI->getOpcode());
+  MI->setDesc(HII->get(NewOpcode));
   return true;
 }
 
@@ -455,175 +436,173 @@ enum PredicateKind {
 
 /// Returns true if an instruction is predicated on p0 and false if it's
 /// predicated on !p0.
-static PredicateKind getPredicateSense(MachineInstr* MI,
-                                       const HexagonInstrInfo *QII) {
-  if (!QII->isPredicated(MI))
+static PredicateKind getPredicateSense(const MachineInstr *MI,
+                                       const HexagonInstrInfo *HII) {
+  if (!HII->isPredicated(MI))
     return PK_Unknown;
-
-  if (QII->isPredicatedTrue(MI))
+  if (HII->isPredicatedTrue(MI))
     return PK_True;
-
   return PK_False;
 }
 
-static MachineOperand& GetPostIncrementOperand(MachineInstr *MI,
-                                               const HexagonInstrInfo *QII) {
-  assert(QII->isPostIncrement(MI) && "Not a post increment operation.");
+static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+      const HexagonInstrInfo *HII) {
+  assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
 #ifndef NDEBUG
   // Post Increment means duplicates. Use dense map to find duplicates in the
   // list. Caution: Densemap initializes with the minimum of 64 buckets,
   // whereas there are at most 5 operands in the post increment.
-  DenseMap<unsigned,  unsigned> DefRegsSet;
-  for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
-    if (MI->getOperand(opNum).isReg() &&
-        MI->getOperand(opNum).isDef()) {
-      DefRegsSet[MI->getOperand(opNum).getReg()] = 1;
-    }
+  DenseSet<unsigned> DefRegsSet;
+  for (auto &MO : MI->operands())
+    if (MO.isReg() && MO.isDef())
+      DefRegsSet.insert(MO.getReg());
 
-  for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++)
-    if (MI->getOperand(opNum).isReg() &&
-        MI->getOperand(opNum).isUse()) {
-      if (DefRegsSet[MI->getOperand(opNum).getReg()]) {
-        return MI->getOperand(opNum);
-      }
-    }
+  for (auto &MO : MI->operands())
+    if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
+      return MO;
 #else
-  if (MI->getDesc().mayLoad()) {
+  if (MI->mayLoad()) {
+    const MachineOperand &Op1 = MI->getOperand(1);
     // The 2nd operand is always the post increment operand in load.
-    assert(MI->getOperand(1).isReg() &&
-                "Post increment operand has be to a register.");
-    return (MI->getOperand(1));
+    assert(Op1.isReg() && "Post increment operand has be to a register.");
+    return Op1;
   }
   if (MI->getDesc().mayStore()) {
+    const MachineOperand &Op0 = MI->getOperand(0);
     // The 1st operand is always the post increment operand in store.
-    assert(MI->getOperand(0).isReg() &&
-                "Post increment operand has be to a register.");
-    return (MI->getOperand(0));
+    assert(Op0.isReg() && "Post increment operand has be to a register.");
+    return Op0;
   }
 #endif
   // we should never come here.
   llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
 }
 
-// get the value being stored
-static MachineOperand& GetStoreValueOperand(MachineInstr *MI) {
+// Get the value being stored.
+static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
   // value being stored is always the last operand.
-  return (MI->getOperand(MI->getNumOperands()-1));
+  return MI->getOperand(MI->getNumOperands()-1);
 }
 
-// can be new value store?
+static bool isLoadAbsSet(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::L4_loadrd_ap:
+    case Hexagon::L4_loadrb_ap:
+    case Hexagon::L4_loadrh_ap:
+    case Hexagon::L4_loadrub_ap:
+    case Hexagon::L4_loadruh_ap:
+    case Hexagon::L4_loadri_ap:
+      return true;
+  }
+  return false;
+}
+
+static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+  assert(isLoadAbsSet(MI));
+  return MI->getOperand(1);
+}
+
+
+// Can be new value store?
 // Following restrictions are to be respected in convert a store into
 // a new value store.
 // 1. If an instruction uses auto-increment, its address register cannot
 //    be a new-value register. Arch Spec 5.4.2.1
-// 2. If an instruction uses absolute-set addressing mode,
-//    its address register cannot be a new-value register.
-//    Arch Spec 5.4.2.1.TODO: This is not enabled as
-//    as absolute-set address mode patters are not implemented.
+// 2. If an instruction uses absolute-set addressing mode, its address
+//    register cannot be a new-value register. Arch Spec 5.4.2.1.
 // 3. If an instruction produces a 64-bit result, its registers cannot be used
 //    as new-value registers. Arch Spec 5.4.2.2.
-// 4. If the instruction that sets a new-value register is conditional, then
+// 4. If the instruction that sets the new-value register is conditional, then
 //    the instruction that uses the new-value register must also be conditional,
 //    and both must always have their predicates evaluate identically.
 //    Arch Spec 5.4.2.3.
-// 5. There is an implied restriction of a packet can not have another store,
-//    if there is a  new value store in the packet. Corollary, if there is
+// 5. There is an implied restriction that a packet cannot have another store,
+//    if there is a new value store in the packet. Corollary: if there is
 //    already a store in a packet, there can not be a new value store.
 //    Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::CanPromoteToNewValueStore(
-    MachineInstr *MI, MachineInstr *PacketMI, unsigned DepReg,
-    const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
+      const MachineInstr *PacketMI, unsigned DepReg) {
   // Make sure we are looking at the store, that can be promoted.
-  if (!QII->mayBeNewStore(MI))
+  if (!HII->mayBeNewStore(MI))
     return false;
 
-  // Make sure there is dependency and can be new value'ed
-  if (GetStoreValueOperand(MI).isReg() &&
-      GetStoreValueOperand(MI).getReg() != DepReg)
+  // Make sure there is dependency and can be new value'd.
+  const MachineOperand &Val = getStoreValueOperand(MI);
+  if (Val.isReg() && Val.getReg() != DepReg)
     return false;
 
-  const HexagonRegisterInfo *QRI =
-      (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
   const MCInstrDesc& MCID = PacketMI->getDesc();
-  // first operand is always the result
 
-  const TargetRegisterClass* PacketRC = QII->getRegClass(MCID, 0, QRI, MF);
-
-  // if there is already an store in the packet, no can do new value store
-  // Arch Spec 3.4.4.2.
-  for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(),
-         VE = CurrentPacketMIs.end();
-       (VI != VE); ++VI) {
-    SUnit *PacketSU = MIToSUnit.find(*VI)->second;
-    if (PacketSU->getInstr()->getDesc().mayStore() ||
-        // if we have mayStore = 1 set on ALLOCFRAME and DEALLOCFRAME,
-        // then we don't need this
-        PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
-        PacketSU->getInstr()->getOpcode() == Hexagon::L2_deallocframe)
-      return false;
-  }
-
-  if (PacketRC == &Hexagon::DoubleRegsRegClass) {
-    // new value store constraint: double regs can not feed into new value store
-    // arch spec section: 5.4.2.2
+  // First operand is always the result.
+  const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
+  // Double regs can not feed into new value store: PRM section: 5.4.2.2.
+  if (PacketRC == &Hexagon::DoubleRegsRegClass)
     return false;
+
+  // New-value stores are of class NV (slot 0), dual stores require class ST
+  // in slot 0 (PRM 5.5).
+  for (auto I : CurrentPacketMIs) {
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
+    if (PacketSU->getInstr()->mayStore())
+      return false;
   }
 
   // Make sure it's NOT the post increment register that we are going to
   // new value.
-  if (QII->isPostIncrement(MI) &&
-      MI->getDesc().mayStore() &&
-      GetPostIncrementOperand(MI, QII).getReg() == DepReg) {
+  if (HII->isPostIncrement(MI) &&
+      getPostIncrementOperand(MI, HII).getReg() == DepReg) {
     return false;
   }
 
-  if (QII->isPostIncrement(PacketMI) &&
-      PacketMI->getDesc().mayLoad() &&
-      GetPostIncrementOperand(PacketMI, QII).getReg() == DepReg) {
-    // if source is post_inc, or absolute-set addressing,
-    // it can not feed into new value store
-    //  r3 = memw(r2++#4)
-    //  memw(r30 + #-1404) = r2.new -> can not be new value store
-    // arch spec section: 5.4.2.1
+  if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+      getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
+    // If source is post_inc, or absolute-set addressing, it can not feed
+    // into new value store
+    //   r3 = memw(r2++#4)
+    //   memw(r30 + #-1404) = r2.new -> can not be new value store
+    // arch spec section: 5.4.2.1.
     return false;
   }
 
+  if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
+    return false;
+
   // If the source that feeds the store is predicated, new value store must
   // also be predicated.
-  if (QII->isPredicated(PacketMI)) {
-    if (!QII->isPredicated(MI))
+  if (HII->isPredicated(PacketMI)) {
+    if (!HII->isPredicated(MI))
       return false;
 
     // Check to make sure that they both will have their predicates
-    // evaluate identically
+    // evaluate identically.
     unsigned predRegNumSrc = 0;
     unsigned predRegNumDst = 0;
     const TargetRegisterClass* predRegClass = nullptr;
 
-    // Get predicate register used in the source instruction
-    for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
-      if ( PacketMI->getOperand(opNum).isReg())
-      predRegNumSrc = PacketMI->getOperand(opNum).getReg();
-      predRegClass = QRI->getMinimalPhysRegClass(predRegNumSrc);
-      if (predRegClass == &Hexagon::PredRegsRegClass) {
+    // Get predicate register used in the source instruction.
+    for (auto &MO : PacketMI->operands()) {
+      if (!MO.isReg())
+        continue;
+      predRegNumSrc = MO.getReg();
+      predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
+      if (predRegClass == &Hexagon::PredRegsRegClass)
         break;
-      }
     }
-    assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
-        ("predicate register not found in a predicated PacketMI instruction"));
+    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+        "predicate register not found in a predicated PacketMI instruction");
 
-    // Get predicate register used in new-value store instruction
-    for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
-      if ( MI->getOperand(opNum).isReg())
-      predRegNumDst = MI->getOperand(opNum).getReg();
-      predRegClass = QRI->getMinimalPhysRegClass(predRegNumDst);
-      if (predRegClass == &Hexagon::PredRegsRegClass) {
+    // Get predicate register used in new-value store instruction.
+    for (auto &MO : MI->operands()) {
+      if (!MO.isReg())
+        continue;
+      predRegNumDst = MO.getReg();
+      predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
+      if (predRegClass == &Hexagon::PredRegsRegClass)
         break;
-      }
     }
-    assert ((predRegClass == &Hexagon::PredRegsRegClass ) &&
-            ("predicate register not found in a predicated MI instruction"));
+    assert((predRegClass == &Hexagon::PredRegsRegClass) &&
+           "predicate register not found in a predicated MI instruction");
 
     // New-value register producer and user (store) need to satisfy these
     // constraints:
@@ -632,13 +611,11 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
     // should also be .new predicated and if producer is not .new predicated
     // then store should not be .new predicated.
     // 3) Both new-value register producer and user should have same predicate
-    // sense, i.e, either both should be negated or both should be none negated.
-
-    if (( predRegNumDst != predRegNumSrc) ||
-          QII->isDotNewInst(PacketMI) != QII->isDotNewInst(MI)  ||
-          getPredicateSense(MI, QII) != getPredicateSense(PacketMI, QII)) {
+    // sense, i.e, either both should be negated or both should be non-negated.
+    if (predRegNumDst != predRegNumSrc ||
+        HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI)  ||
+        getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
       return false;
-    }
   }
 
   // Make sure that other than the new-value register no other store instruction
@@ -649,81 +626,77 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore(
   // including PacketMI. Howerver, we need to perform the check for the
   // remaining instructions in the packet.
 
-  std::vector<MachineInstr*>::iterator VI;
-  std::vector<MachineInstr*>::iterator VE;
   unsigned StartCheck = 0;
 
-  for (VI=CurrentPacketMIs.begin(), VE = CurrentPacketMIs.end();
-      (VI != VE); ++VI) {
-    SUnit *TempSU = MIToSUnit.find(*VI)->second;
+  for (auto I : CurrentPacketMIs) {
+    SUnit *TempSU = MIToSUnit.find(I)->second;
     MachineInstr* TempMI = TempSU->getInstr();
 
     // Following condition is true for all the instructions until PacketMI is
     // reached (StartCheck is set to 0 before the for loop).
     // StartCheck flag is 1 for all the instructions after PacketMI.
-    if (TempMI != PacketMI && !StartCheck) // start processing only after
-      continue;                            // encountering PacketMI
+    if (TempMI != PacketMI && !StartCheck) // Start processing only after
+      continue;                            // encountering PacketMI.
 
     StartCheck = 1;
-    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence
+    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
       continue;
 
-    for(unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
-      if (MI->getOperand(opNum).isReg() &&
-          TempSU->getInstr()->modifiesRegister(MI->getOperand(opNum).getReg(),
-                                               QRI))
+    for (auto &MO : MI->operands())
+      if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
         return false;
-    }
   }
 
   // Make sure that for non-POST_INC stores:
   // 1. The only use of reg is DepReg and no other registers.
   //    This handles V4 base+index registers.
   //    The following store can not be dot new.
-  //    Eg.   r0 = add(r0, #3)a
+  //    Eg.   r0 = add(r0, #3)
   //          memw(r1+r0<<#2) = r0
-  if (!QII->isPostIncrement(MI) &&
-      GetStoreValueOperand(MI).isReg() &&
-      GetStoreValueOperand(MI).getReg() == DepReg) {
-    for(unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
-      if (MI->getOperand(opNum).isReg() &&
-          MI->getOperand(opNum).getReg() == DepReg) {
+  if (!HII->isPostIncrement(MI)) {
+    for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
+      const MachineOperand &MO = MI->getOperand(opNum);
+      if (MO.isReg() && MO.getReg() == DepReg)
         return false;
-      }
-    }
-    // 2. If data definition is because of implicit definition of the register,
-    //    do not newify the store. Eg.
-    //    %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
-    //    STrih_indexed %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
-    for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
-      if (PacketMI->getOperand(opNum).isReg() &&
-          PacketMI->getOperand(opNum).getReg() == DepReg &&
-          PacketMI->getOperand(opNum).isDef() &&
-          PacketMI->getOperand(opNum).isImplicit()) {
-        return false;
-      }
     }
   }
 
+  // If data definition is because of implicit definition of the register,
+  // do not newify the store. Eg.
+  // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
+  // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
+  for (auto &MO : PacketMI->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
+      continue;
+    unsigned R = MO.getReg();
+    if (R == DepReg || HRI->isSuperRegister(DepReg, R))
+      return false;
+  }
+
+  // Handle imp-use of super reg case. There is a target independent side
+  // change that should prevent this situation but I am handling it for
+  // just-in-case. For example, we cannot newify R2 in the following case:
+  // %R3<def> = A2_tfrsi 0;
+  // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
+  for (auto &MO : MI->operands()) {
+    if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
+      return false;
+  }
+
   // Can be dot new store.
   return true;
 }
 
-// can this MI to promoted to either
-// new value store or new value jump
-bool HexagonPacketizerList::CanPromoteToNewValue(
-    MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
-    const std::map<MachineInstr *, SUnit *> &MIToSUnit,
-    MachineBasicBlock::iterator &MII) {
-
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-  if (!QII->mayBeNewStore(MI))
+// Can this MI to promoted to either new value store or new value jump.
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg,
+      MachineBasicBlock::iterator &MII) {
+  if (!HII->mayBeNewStore(MI))
     return false;
 
-  MachineInstr *PacketMI = PacketSU->getInstr();
-
   // Check to see the store can be new value'ed.
-  if (CanPromoteToNewValueStore(MI, PacketMI, DepReg, MIToSUnit))
+  MachineInstr *PacketMI = PacketSU->getInstr();
+  if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
     return true;
 
   // Check to see the compare/jump can be new value'ed.
@@ -731,93 +704,110 @@ bool HexagonPacketizerList::CanPromoteToNewValue(
   return false;
 }
 
+static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
+  for (auto &MO : I->operands())
+    if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
+      return true;
+  return false;
+}
+
 // Check to see if an instruction can be dot new
 // There are three kinds.
 // 1. dot new on predicate - V2/V3/V4
 // 2. dot new on stores NV/ST - V4
 // 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::CanPromoteToDotNew(
-    MachineInstr *MI, SUnit *PacketSU, unsigned DepReg,
-    const std::map<MachineInstr *, SUnit *> &MIToSUnit,
-    MachineBasicBlock::iterator &MII, const TargetRegisterClass *RC) {
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+      const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
+      const TargetRegisterClass* RC) {
   // Already a dot new instruction.
-  if (QII->isDotNewInst(MI) && !QII->mayBeNewStore(MI))
+  if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
     return false;
 
   if (!isNewifiable(MI))
     return false;
 
-  // predicate .new
-  if (RC == &Hexagon::PredRegsRegClass && isCondInst(MI))
-      return true;
-  else if (RC != &Hexagon::PredRegsRegClass &&
-      !QII->mayBeNewStore(MI)) // MI is not a new-value store
+  const MachineInstr *PI = PacketSU->getInstr();
+
+  // The "new value" cannot come from inline asm.
+  if (PI->isInlineAsm())
     return false;
-  else {
-    // Create a dot new machine instruction to see if resources can be
-    // allocated. If not, bail out now.
-    int NewOpcode = QII->GetDotNewOp(MI);
-    const MCInstrDesc &desc = QII->get(NewOpcode);
-    DebugLoc dl;
-    MachineInstr *NewMI =
-                    MI->getParent()->getParent()->CreateMachineInstr(desc, dl);
-    bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
-    MI->getParent()->getParent()->DeleteMachineInstr(NewMI);
 
-    if (!ResourcesAvailable)
-      return false;
+  // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
+  // sense.
+  if (PI->isImplicitDef())
+    return false;
+
+  // If dependency is trough an implicitly defined register, we should not
+  // newify the use.
+  if (isImplicitDependency(PI, DepReg))
+    return false;
+
+  const MCInstrDesc& MCID = PI->getDesc();
+  const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
+  if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
+    return false;
+
+  // predicate .new
+  // bug 5670: until that is fixed
+  // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
+  if (RC == &Hexagon::PredRegsRegClass)
+    if (HII->isCondInst(MI) || MI->isReturn())
+      return HII->predCanBeUsedAsDotNew(PI, DepReg);
+
+  if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
+    return false;
+
+  // Create a dot new machine instruction to see if resources can be
+  // allocated. If not, bail out now.
+  int NewOpcode = HII->getDotNewOp(MI);
+  const MCInstrDesc &D = HII->get(NewOpcode);
+  MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
+  bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
+  MF.DeleteMachineInstr(NewMI);
+  if (!ResourcesAvailable)
+    return false;
+
+  // New Value Store only. New Value Jump generated as a separate pass.
+  if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
+    return false;
 
-    // new value store only
-    // new new value jump generated as a passes
-    if (!CanPromoteToNewValue(MI, PacketSU, DepReg, MIToSUnit, MII)) {
-      return false;
-    }
-  }
   return true;
 }
 
-// Go through the packet instructions and search for anti dependency
-// between them and DepReg from MI
-// Consider this case:
+// Go through the packet instructions and search for an anti dependency between
+// them and DepReg from MI. Consider this case:
 // Trying to add
 // a) %R1<def> = TFRI_cdNotPt %P3, 2
 // to this packet:
 // {
-//   b) %P0<def> = OR_pp %P3<kill>, %P0<kill>
-//   c) %P3<def> = TFR_PdRs %R23
-//   d) %R1<def> = TFRI_cdnPt %P3, 4
+//   b) %P0<def> = C2_or %P3<kill>, %P0<kill>
+//   c) %P3<def> = C2_tfrrp %R23
+//   d) %R1<def> = C2_cmovenewit %P3, 4
 //  }
 // The P3 from a) and d) will be complements after
 // a)'s P3 is converted to .new form
-// Anti Dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::RestrictingDepExistInPacket(
-    MachineInstr *MI, unsigned DepReg,
-    const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
+// Anti-dep between c) and b) is irrelevant for this case
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+                                                        unsigned DepReg) {
   SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
 
-  for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
-       VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
+  for (auto I : CurrentPacketMIs) {
     // We only care for dependencies to predicated instructions
-    if(!QII->isPredicated(*VIN)) continue;
+    if (!HII->isPredicated(I))
+      continue;
 
     // Scheduling Unit for current insn in the packet
-    SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
 
-    // Look at dependencies between current members of the packet
-    // and predicate defining instruction MI.
-    // Make sure that dependency is on the exact register
-    // we care about.
+    // Look at dependencies between current members of the packet and
+    // predicate defining instruction MI. Make sure that dependency is
+    // on the exact register we care about.
     if (PacketSU->isSucc(PacketSUDep)) {
       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
-        if ((PacketSU->Succs[i].getSUnit() == PacketSUDep) &&
-            (PacketSU->Succs[i].getKind() == SDep::Anti) &&
-            (PacketSU->Succs[i].getReg() == DepReg)) {
+        auto &Dep = PacketSU->Succs[i];
+        if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
+            Dep.getReg() == DepReg)
           return true;
-        }
       }
     }
   }
@@ -831,159 +821,308 @@ static unsigned getPredicatedRegister(MachineInstr *MI,
                                       const HexagonInstrInfo *QII) {
   /// We use the following rule: The first predicate register that is a use is
   /// the predicate register of a predicated instruction.
-
   assert(QII->isPredicated(MI) && "Must be predicated instruction");
 
-  for (MachineInstr::mop_iterator OI = MI->operands_begin(),
-       OE = MI->operands_end(); OI != OE; ++OI) {
-    MachineOperand &Op = *OI;
+  for (auto &Op : MI->operands()) {
     if (Op.isReg() && Op.getReg() && Op.isUse() &&
         Hexagon::PredRegsRegClass.contains(Op.getReg()))
       return Op.getReg();
   }
 
   llvm_unreachable("Unknown instruction operand layout");
-
   return 0;
 }
 
 // Given two predicated instructions, this function detects whether
-// the predicates are complements
-bool HexagonPacketizerList::ArePredicatesComplements(
-    MachineInstr *MI1, MachineInstr *MI2,
-    const std::map<MachineInstr *, SUnit *> &MIToSUnit) {
-
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
+// the predicates are complements.
+bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
+                                                     MachineInstr *MI2) {
   // If we don't know the predicate sense of the instructions bail out early, we
   // need it later.
-  if (getPredicateSense(MI1, QII) == PK_Unknown ||
-      getPredicateSense(MI2, QII) == PK_Unknown)
+  if (getPredicateSense(MI1, HII) == PK_Unknown ||
+      getPredicateSense(MI2, HII) == PK_Unknown)
     return false;
 
-  // Scheduling unit for candidate
-  SUnit *SU = MIToSUnit.find(MI1)->second;
+  // Scheduling unit for candidate.
+  SUnit *SU = MIToSUnit[MI1];
 
   // One corner case deals with the following scenario:
   // Trying to add
-  // a) %R24<def> = TFR_cPt %P0, %R25
+  // a) %R24<def> = A2_tfrt %P0, %R25
   // to this packet:
-  //
   // {
-  //   b) %R25<def> = TFR_cNotPt %P0, %R24
-  //   c) %P0<def> = CMPEQri %R26, 1
+  //   b) %R25<def> = A2_tfrf %P0, %R24
+  //   c) %P0<def> = C2_cmpeqi %R26, 1
   // }
   //
-  // On general check a) and b) are complements, but
-  // presence of c) will convert a) to .new form, and
-  // then it is not a complement
-  // We attempt to detect it by analyzing  existing
-  // dependencies in the packet
+  // On general check a) and b) are complements, but presence of c) will
+  // convert a) to .new form, and then it is not a complement.
+  // We attempt to detect it by analyzing existing dependencies in the packet.
 
   // Analyze relationships between all existing members of the packet.
-  // Look for Anti dependecy on the same predicate reg
-  // as used in the candidate
-  for (std::vector<MachineInstr*>::iterator VIN = CurrentPacketMIs.begin(),
-       VEN = CurrentPacketMIs.end(); (VIN != VEN); ++VIN) {
-
-    // Scheduling Unit for current insn in the packet
-    SUnit *PacketSU = MIToSUnit.find(*VIN)->second;
+  // Look for Anti dependecy on the same predicate reg as used in the
+  // candidate.
+  for (auto I : CurrentPacketMIs) {
+    // Scheduling Unit for current insn in the packet.
+    SUnit *PacketSU = MIToSUnit.find(I)->second;
 
     // If this instruction in the packet is succeeded by the candidate...
     if (PacketSU->isSucc(SU)) {
       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
-        // The corner case exist when there is true data
-        // dependency between candidate and one of current
-        // packet members, this dep is on predicate reg, and
-        // there already exist anti dep on the same pred in
+        auto Dep = PacketSU->Succs[i];
+        // The corner case exist when there is true data dependency between
+        // candidate and one of current packet members, this dep is on
+        // predicate reg, and there already exist anti dep on the same pred in
         // the packet.
-        if (PacketSU->Succs[i].getSUnit() == SU &&
-            PacketSU->Succs[i].getKind() == SDep::Data &&
-            Hexagon::PredRegsRegClass.contains(
-              PacketSU->Succs[i].getReg()) &&
-            // Here I know that *VIN is predicate setting instruction
-            // with true data dep to candidate on the register
-            // we care about - c) in the above example.
-            // Now I need to see if there is an anti dependency
-            // from c) to any other instruction in the
-            // same packet on the pred reg of interest
-            RestrictingDepExistInPacket(*VIN,PacketSU->Succs[i].getReg(),
-                                        MIToSUnit)) {
-           return false;
+        if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
+            Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
+          // Here I know that I is predicate setting instruction with true
+          // data dep to candidate on the register we care about - c) in the
+          // above example. Now I need to see if there is an anti dependency
+          // from c) to any other instruction in the same packet on the pred
+          // reg of interest.
+          if (restrictingDepExistInPacket(I, Dep.getReg()))
+            return false;
         }
       }
     }
   }
 
-  // If the above case does not apply, check regular
-  // complement condition.
-  // Check that the predicate register is the same and
-  // that the predicate sense is different
-  // We also need to differentiate .old vs. .new:
-  // !p0 is not complimentary to p0.new
-  unsigned PReg1 = getPredicatedRegister(MI1, QII);
-  unsigned PReg2 = getPredicatedRegister(MI2, QII);
-  return ((PReg1 == PReg2) &&
-          Hexagon::PredRegsRegClass.contains(PReg1) &&
-          Hexagon::PredRegsRegClass.contains(PReg2) &&
-          (getPredicateSense(MI1, QII) != getPredicateSense(MI2, QII)) &&
-          (QII->isDotNewInst(MI1) == QII->isDotNewInst(MI2)));
+  // If the above case does not apply, check regular complement condition.
+  // Check that the predicate register is the same and that the predicate
+  // sense is different We also need to differentiate .old vs. .new: !p0
+  // is not complementary to p0.new.
+  unsigned PReg1 = getPredicatedRegister(MI1, HII);
+  unsigned PReg2 = getPredicatedRegister(MI2, HII);
+  return PReg1 == PReg2 &&
+         Hexagon::PredRegsRegClass.contains(PReg1) &&
+         Hexagon::PredRegsRegClass.contains(PReg2) &&
+         getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
+         HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
 }
 
-// initPacketizerState - Initialize packetizer flags
+// Initialize packetizer flags.
 void HexagonPacketizerList::initPacketizerState() {
-
   Dependence = false;
   PromotedToDotNew = false;
   GlueToNewValueJump = false;
   GlueAllocframeStore = false;
   FoundSequentialDependence = false;
-
-  return;
 }
 
-// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
-bool HexagonPacketizerList::ignorePseudoInstruction(MachineInstr *MI,
-                                                    MachineBasicBlock *MBB) {
+// Ignore bundling of pseudo instructions.
+bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
+      const MachineBasicBlock*) {
   if (MI->isDebugValue())
     return true;
 
   if (MI->isCFIInstruction())
     return false;
 
-  // We must print out inline assembly
+  // We must print out inline assembly.
   if (MI->isInlineAsm())
     return false;
 
-  // We check if MI has any functional units mapped to it.
-  // If it doesn't, we ignore the instruction.
+  if (MI->isImplicitDef())
+    return false;
+
+  // We check if MI has any functional units mapped to it. If it doesn't,
+  // we ignore the instruction.
   const MCInstrDesc& TID = MI->getDesc();
-  unsigned SchedClass = TID.getSchedClass();
-  const InstrStage* IS =
-                    ResourceTracker->getInstrItins()->beginStage(SchedClass);
+  auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
   unsigned FuncUnits = IS->getUnits();
   return !FuncUnits;
 }
 
-// isSoloInstruction: - Returns true for instructions that must be
-// scheduled in their own packet.
-bool HexagonPacketizerList::isSoloInstruction(MachineInstr *MI) {
+bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
   if (MI->isEHLabel() || MI->isCFIInstruction())
     return true;
 
-  if (MI->isInlineAsm())
+  // Consider inline asm to not be a solo instruction by default.
+  // Inline asm will be put in a packet temporarily, but then it will be
+  // removed, and placed outside of the packet (before or after, depending
+  // on dependencies).  This is to reduce the impact of inline asm as a
+  // "packet splitting" instruction.
+  if (MI->isInlineAsm() && !ScheduleInlineAsm)
     return true;
 
   // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
   // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
   // They must not be grouped with other instructions in a packet.
-  if (IsSchedBarrier(MI))
+  if (isSchedBarrier(MI))
+    return true;
+
+  if (HII->isSolo(MI))
+    return true;
+
+  if (MI->getOpcode() == Hexagon::A2_nop)
     return true;
 
   return false;
 }
 
-// isLegalToPacketizeTogether:
+
+// Quick check if instructions MI and MJ cannot coexist in the same packet.
+// Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
+// but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
+// For full test call this function twice:
+//   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
+// Doing the test only one way saves the amount of code in this function,
+// since every test would need to be repeated with the MI and MJ reversed.
+static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+      const HexagonInstrInfo &HII) {
+  const MachineFunction *MF = MI->getParent()->getParent();
+  if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
+      HII.isHVXMemWithAIndirect(MI, MJ))
+    return true;
+
+  // An inline asm cannot be together with a branch, because we may not be
+  // able to remove the asm out after packetizing (i.e. if the asm must be
+  // moved past the bundle).  Similarly, two asms cannot be together to avoid
+  // complications when determining their relative order outside of a bundle.
+  if (MI->isInlineAsm())
+    return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
+           MJ->isCall() || MJ->isTerminator();
+
+  // "False" really means that the quick check failed to determine if
+  // I and J cannot coexist.
+  return false;
+}
+
+
+// Full, symmetric check.
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
+      const MachineInstr *MJ) {
+  return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
+}
+
+void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
+  for (auto &B : MF) {
+    MachineBasicBlock::iterator BundleIt;
+    MachineBasicBlock::instr_iterator NextI;
+    for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
+      NextI = std::next(I);
+      MachineInstr *MI = &*I;
+      if (MI->isBundle())
+        BundleIt = I;
+      if (!MI->isInsideBundle())
+        continue;
+
+      // Decide on where to insert the instruction that we are pulling out.
+      // Debug instructions always go before the bundle, but the placement of
+      // INLINE_ASM depends on potential dependencies.  By default, try to
+      // put it before the bundle, but if the asm writes to a register that
+      // other instructions in the bundle read, then we need to place it
+      // after the bundle (to preserve the bundle semantics).
+      bool InsertBeforeBundle;
+      if (MI->isInlineAsm())
+        InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
+      else if (MI->isDebugValue())
+        InsertBeforeBundle = true;
+      else
+        continue;
+
+      BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
+    }
+  }
+}
+
+// Check if a given instruction is of class "system".
+static bool isSystemInstr(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+    case Hexagon::Y2_barrier:
+    case Hexagon::Y2_dcfetchbo:
+      return true;
+  }
+  return false;
+}
+
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
+                                              const MachineInstr *J) {
+  // The dependence graph may not include edges between dead definitions,
+  // so without extra checks, we could end up packetizing two instruction
+  // defining the same (dead) register.
+  if (I->isCall() || J->isCall())
+    return false;
+  if (HII->isPredicated(I) || HII->isPredicated(J))
+    return false;
+
+  BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
+  for (auto &MO : I->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+      continue;
+    DeadDefs[MO.getReg()] = true;
+  }
+
+  for (auto &MO : J->operands()) {
+    if (!MO.isReg() || !MO.isDef() || !MO.isDead())
+      continue;
+    unsigned R = MO.getReg();
+    if (R != Hexagon::USR_OVF && DeadDefs[R])
+      return true;
+  }
+  return false;
+}
+
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
+                                                 const MachineInstr *J) {
+  // A save callee-save register function call can only be in a packet
+  // with instructions that don't write to the callee-save registers.
+  if ((HII->isSaveCalleeSavedRegsCall(I) &&
+       doesModifyCalleeSavedReg(J, HRI)) ||
+      (HII->isSaveCalleeSavedRegsCall(J) &&
+       doesModifyCalleeSavedReg(I, HRI)))
+    return true;
+
+  // Two control flow instructions cannot go in the same packet.
+  if (isControlFlow(I) && isControlFlow(J))
+    return true;
+
+  // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
+  // contain a speculative indirect jump,
+  // a new-value compare jump or a dealloc_return.
+  auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
+    if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+      return true;
+    if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
+      return true;
+    return false;
+  };
+
+  if (HII->isLoopN(I) && isBadForLoopN(J))
+    return true;
+  if (HII->isLoopN(J) && isBadForLoopN(I))
+    return true;
+
+  // dealloc_return cannot appear in the same packet as a conditional or
+  // unconditional jump.
+  return HII->isDeallocRet(I) &&
+         (J->isBranch() || J->isCall() || J->isBarrier());
+}
+
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
+                                                    const MachineInstr *J) {
+  bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
+  bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+  if ((SysI && StoreJ) || (SysJ && StoreI))
+    return true;
+
+  if (StoreI && StoreJ) {
+    if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
+      return true;
+  } else {
+    // A memop cannot be in the same packet with another memop or a store.
+    // Two stores can be together, but here I and J cannot both be stores.
+    bool MopStI = HII->isMemOp(I) || StoreI;
+    bool MopStJ = HII->isMemOp(J) || StoreJ;
+    if (MopStI && MopStJ)
+      return true;
+  }
+
+  return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
+}
+
 // SUI is the current instruction that is out side of the current packet.
 // SUJ is the current instruction inside the current packet against which that
 // SUI will be packetized.
@@ -992,115 +1131,52 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   MachineInstr *J = SUJ->getInstr();
   assert(I && J && "Unable to packetize null instruction!");
 
-  const MCInstrDesc &MCIDI = I->getDesc();
-  const MCInstrDesc &MCIDJ = J->getDesc();
+  // Clear IgnoreDepMIs when Packet starts.
+  if (CurrentPacketMIs.size() == 1)
+    IgnoreDepMIs.clear();
 
   MachineBasicBlock::iterator II = I;
-
   const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
-  const HexagonRegisterInfo *QRI =
-      (const HexagonRegisterInfo *)MF.getSubtarget().getRegisterInfo();
-  const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
 
-  // Inline asm cannot go in the packet.
-  if (I->getOpcode() == Hexagon::INLINEASM)
-    llvm_unreachable("Should not meet inline asm here!");
+  // Solo instructions cannot go in the packet.
+  assert(!isSoloInstruction(I) && "Unexpected solo instr!");
 
-  if (isSoloInstruction(I))
-    llvm_unreachable("Should not meet solo instr here!");
-
-  // A save callee-save register function call can only be in a packet
-  // with instructions that don't write to the callee-save registers.
-  if ((QII->isSaveCalleeSavedRegsCall(I) &&
-       DoesModifyCalleeSavedReg(J, QRI)) ||
-      (QII->isSaveCalleeSavedRegsCall(J) &&
-       DoesModifyCalleeSavedReg(I, QRI))) {
-    Dependence = true;
+  if (cannotCoexist(I, J))
     return false;
-  }
 
-  // Two control flow instructions cannot go in the same packet.
-  if (IsControlFlow(I) && IsControlFlow(J)) {
-    Dependence = true;
+  Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
+  if (Dependence)
     return false;
-  }
 
-  // A LoopN instruction cannot appear in the same packet as a jump or call.
-  if (IsLoopN(I) &&
-     (IsDirectJump(J) || MCIDJ.isCall() || QII->isDeallocRet(J))) {
-    Dependence = true;
+  // V4 allows dual stores. It does not allow second store, if the first
+  // store is not in SLOT0. New value store, new value jump, dealloc_return
+  // and memop always take SLOT0. Arch spec 3.4.4.2.
+  Dependence = hasV4SpecificDependence(I, J);
+  if (Dependence)
     return false;
-  }
-  if (IsLoopN(J) &&
-     (IsDirectJump(I) || MCIDI.isCall() || QII->isDeallocRet(I))) {
-    Dependence = true;
-    return false;
-  }
-
-  // dealloc_return cannot appear in the same packet as a conditional or
-  // unconditional jump.
-  if (QII->isDeallocRet(I) &&
-     (MCIDJ.isBranch() || MCIDJ.isCall() || MCIDJ.isBarrier())) {
-    Dependence = true;
-    return false;
-  }
-
-
-  // V4 allows dual store. But does not allow second store, if the
-  // first store is not in SLOT0. New value store, new value jump,
-  // dealloc_return and memop always take SLOT0.
-  // Arch spec 3.4.4.2
-  if (MCIDI.mayStore() && MCIDJ.mayStore() &&
-      (QII->isNewValueInst(J) || QII->isMemOp(J) || QII->isMemOp(I))) {
-    Dependence = true;
-    return false;
-  }
-
-  if ((QII->isMemOp(J) && MCIDI.mayStore())
-      || (MCIDJ.mayStore() && QII->isMemOp(I))
-      || (QII->isMemOp(J) && QII->isMemOp(I))) {
-    Dependence = true;
-    return false;
-  }
-
-  //if dealloc_return
-  if (MCIDJ.mayStore() && QII->isDeallocRet(I)) {
-    Dependence = true;
-    return false;
-  }
 
   // If an instruction feeds new value jump, glue it.
   MachineBasicBlock::iterator NextMII = I;
   ++NextMII;
-  if (NextMII != I->getParent()->end() && QII->isNewValueJump(NextMII)) {
+  if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
     MachineInstr *NextMI = NextMII;
 
     bool secondRegMatch = false;
-    bool maintainNewValueJump = false;
+    const MachineOperand &NOp0 = NextMI->getOperand(0);
+    const MachineOperand &NOp1 = NextMI->getOperand(1);
 
-    if (NextMI->getOperand(1).isReg() &&
-        I->getOperand(0).getReg() == NextMI->getOperand(1).getReg()) {
+    if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
       secondRegMatch = true;
-      maintainNewValueJump = true;
-    }
 
-    if (!secondRegMatch &&
-          I->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) {
-      maintainNewValueJump = true;
-    }
-
-    for (std::vector<MachineInstr*>::iterator
-          VI = CurrentPacketMIs.begin(),
-            VE = CurrentPacketMIs.end();
-          (VI != VE && maintainNewValueJump); ++VI) {
-      SUnit *PacketSU = MIToSUnit.find(*VI)->second;
-
-      // NVJ can not be part of the dual jump - Arch Spec: section 7.8
-      if (PacketSU->getInstr()->getDesc().isCall()) {
+    for (auto I : CurrentPacketMIs) {
+      SUnit *PacketSU = MIToSUnit.find(I)->second;
+      MachineInstr *PI = PacketSU->getInstr();
+      // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
+      if (PI->isCall()) {
         Dependence = true;
         break;
       }
-      // Validate
+      // Validate:
       // 1. Packet does not have a store in it.
       // 2. If the first operand of the nvj is newified, and the second
       //    operand is also a reg, it (second reg) is not defined in
@@ -1108,302 +1184,413 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       // 3. If the second operand of the nvj is newified, (which means
       //    first operand is also a reg), first reg is not defined in
       //    the same packet.
-      if (PacketSU->getInstr()->getDesc().mayStore()               ||
-          PacketSU->getInstr()->getOpcode() == Hexagon::S2_allocframe ||
-          // Check #2.
-          (!secondRegMatch && NextMI->getOperand(1).isReg() &&
-            PacketSU->getInstr()->modifiesRegister(
-                              NextMI->getOperand(1).getReg(), QRI)) ||
-          // Check #3.
-          (secondRegMatch &&
-            PacketSU->getInstr()->modifiesRegister(
-                              NextMI->getOperand(0).getReg(), QRI))) {
+      if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+          HII->isLoopN(PI)) {
+        Dependence = true;
+        break;
+      }
+      // Check #2/#3.
+      const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
+      if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
         Dependence = true;
         break;
       }
     }
-    if (!Dependence)
-      GlueToNewValueJump = true;
-    else
+
+    if (Dependence)
       return false;
+    GlueToNewValueJump = true;
   }
 
-  if (SUJ->isSucc(SUI)) {
-    for (unsigned i = 0;
-         (i < SUJ->Succs.size()) && !FoundSequentialDependence;
-         ++i) {
+  // There no dependency between a prolog instruction and its successor.
+  if (!SUJ->isSucc(SUI))
+    return true;
 
-      if (SUJ->Succs[i].getSUnit() != SUI) {
+  for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
+    if (FoundSequentialDependence)
+      break;
+
+    if (SUJ->Succs[i].getSUnit() != SUI)
+      continue;
+
+    SDep::Kind DepType = SUJ->Succs[i].getKind();
+    // For direct calls:
+    // Ignore register dependences for call instructions for packetization
+    // purposes except for those due to r31 and predicate registers.
+    //
+    // For indirect calls:
+    // Same as direct calls + check for true dependences to the register
+    // used in the indirect call.
+    //
+    // We completely ignore Order dependences for call instructions.
+    //
+    // For returns:
+    // Ignore register dependences for return instructions like jumpr,
+    // dealloc return unless we have dependencies on the explicit uses
+    // of the registers used by jumpr (like r31) or dealloc return
+    // (like r29 or r30).
+    //
+    // TODO: Currently, jumpr is handling only return of r31. So, the
+    // following logic (specificaly isCallDependent) is working fine.
+    // We need to enable jumpr for register other than r31 and then,
+    // we need to rework the last part, where it handles indirect call
+    // of that (isCallDependent) function. Bug 6216 is opened for this.
+    unsigned DepReg = 0;
+    const TargetRegisterClass *RC = nullptr;
+    if (DepType == SDep::Data) {
+      DepReg = SUJ->Succs[i].getReg();
+      RC = HRI->getMinimalPhysRegClass(DepReg);
+    }
+
+    if (I->isCall() || I->isReturn()) {
+      if (!isRegDependence(DepType))
         continue;
+      if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
+        continue;
+    }
+
+    if (DepType == SDep::Data) {
+      if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
+        if (promoteToDotCur(J, DepType, II, RC))
+          continue;
+    }
+
+    // Data dpendence ok if we have load.cur.
+    if (DepType == SDep::Data && HII->isDotCurInst(J)) {
+      if (HII->isV60VectorInstruction(I))
+        continue;
+    }
+
+    // For instructions that can be promoted to dot-new, try to promote.
+    if (DepType == SDep::Data) {
+      if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
+        if (promoteToDotNew(I, DepType, II, RC)) {
+          PromotedToDotNew = true;
+          continue;
+        }
       }
+      if (HII->isNewValueJump(I))
+        continue;
+    }
 
-      SDep::Kind DepType = SUJ->Succs[i].getKind();
-
-      // For direct calls:
-      // Ignore register dependences for call instructions for
-      // packetization purposes except for those due to r31 and
-      // predicate registers.
+    // For predicated instructions, if the predicates are complements then
+    // there can be no dependence.
+    if (HII->isPredicated(I) && HII->isPredicated(J) &&
+        arePredicatesComplements(I, J)) {
+      // Not always safe to do this translation.
+      // DAG Builder attempts to reduce dependence edges using transitive
+      // nature of dependencies. Here is an example:
       //
-      // For indirect calls:
-      // Same as direct calls + check for true dependences to the register
-      // used in the indirect call.
+      // r0 = tfr_pt ... (1)
+      // r0 = tfr_pf ... (2)
+      // r0 = tfr_pt ... (3)
       //
-      // We completely ignore Order dependences for call instructions
-      //
-      // For returns:
-      // Ignore register dependences for return instructions like jumpr,
-      // dealloc return unless we have dependencies on the explicit uses
-      // of the registers used by jumpr (like r31) or dealloc return
-      // (like r29 or r30).
-      //
-      // TODO: Currently, jumpr is handling only return of r31. So, the
-      // following logic (specificaly IsCallDependent) is working fine.
-      // We need to enable jumpr for register other than r31 and then,
-      // we need to rework the last part, where it handles indirect call
-      // of that (IsCallDependent) function. Bug 6216 is opened for this.
-      //
-      unsigned DepReg = 0;
-      const TargetRegisterClass* RC = nullptr;
-      if (DepType == SDep::Data) {
-        DepReg = SUJ->Succs[i].getReg();
-        RC = QRI->getMinimalPhysRegClass(DepReg);
-      }
-      if ((MCIDI.isCall() || MCIDI.isReturn()) &&
-          (!IsRegDependence(DepType) ||
-            !IsCallDependent(I, DepType, SUJ->Succs[i].getReg()))) {
-        /* do nothing */
+      // There will be an output dependence between (1)->(2) and (2)->(3).
+      // However, there is no dependence edge between (1)->(3). This results
+      // in all 3 instructions going in the same packet. We ignore dependce
+      // only once to avoid this situation.
+      auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+      if (Itr != IgnoreDepMIs.end()) {
+        Dependence = true;
+        return false;
       }
+      IgnoreDepMIs.push_back(I);
+      continue;
+    }
 
-      // For instructions that can be promoted to dot-new, try to promote.
-      else if ((DepType == SDep::Data) &&
-               CanPromoteToDotNew(I, SUJ, DepReg, MIToSUnit, II, RC) &&
-               PromoteToDotNew(I, DepType, II, RC)) {
-        PromotedToDotNew = true;
-        /* do nothing */
-      }
+    // Ignore Order dependences between unconditional direct branches
+    // and non-control-flow instructions.
+    if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+        DepType == SDep::Order)
+      continue;
 
-      else if ((DepType == SDep::Data) &&
-               (QII->isNewValueJump(I))) {
-        /* do nothing */
-      }
+    // Ignore all dependences for jumps except for true and output
+    // dependences.
+    if (I->isConditionalBranch() && DepType != SDep::Data &&
+        DepType != SDep::Output)
+      continue;
 
-      // For predicated instructions, if the predicates are complements
-      // then there can be no dependence.
-      else if (QII->isPredicated(I) &&
-               QII->isPredicated(J) &&
-          ArePredicatesComplements(I, J, MIToSUnit)) {
-        /* do nothing */
+    // Ignore output dependences due to superregs. We can write to two
+    // different subregisters of R1:0 for instance in the same cycle.
 
-      }
-      else if (IsDirectJump(I) &&
-               !MCIDJ.isBranch() &&
-               !MCIDJ.isCall() &&
-               (DepType == SDep::Order)) {
-        // Ignore Order dependences between unconditional direct branches
-        // and non-control-flow instructions
-        /* do nothing */
-      }
-      else if (MCIDI.isConditionalBranch() && (DepType != SDep::Data) &&
-               (DepType != SDep::Output)) {
-        // Ignore all dependences for jumps except for true and output
-        // dependences
-        /* do nothing */
-      }
+    // If neither I nor J defines DepReg, then this is a superfluous output
+    // dependence. The dependence must be of the form:
+    //   R0 = ...
+    //   R1 = ...
+    // and there is an output dependence between the two instructions with
+    // DepReg = D0.
+    // We want to ignore these dependences. Ideally, the dependence
+    // constructor should annotate such dependences. We can then avoid this
+    // relatively expensive check.
+    //
+    if (DepType == SDep::Output) {
+      // DepReg is the register that's responsible for the dependence.
+      unsigned DepReg = SUJ->Succs[i].getReg();
 
-      // Ignore output dependences due to superregs. We can
-      // write to two different subregisters of R1:0 for instance
-      // in the same cycle
-      //
+      // Check if I and J really defines DepReg.
+      if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+        continue;
+      FoundSequentialDependence = true;
+      break;
+    }
 
-      //
-      // Let the
-      // If neither I nor J defines DepReg, then this is a
-      // superfluous output dependence. The dependence must be of the
-      // form:
-      //  R0 = ...
-      //  R1 = ...
-      // and there is an output dependence between the two instructions
-      // with
-      // DepReg = D0
-      // We want to ignore these dependences.
-      // Ideally, the dependence constructor should annotate such
-      // dependences. We can then avoid this relatively expensive check.
-      //
-      else if (DepType == SDep::Output) {
-        // DepReg is the register that's responsible for the dependence.
-        unsigned DepReg = SUJ->Succs[i].getReg();
-
-        // Check if I and J really defines DepReg.
-        if (I->definesRegister(DepReg) ||
-            J->definesRegister(DepReg)) {
+    // For Order dependences:
+    // 1. On V4 or later, volatile loads/stores can be packetized together,
+    //    unless other rules prevent is.
+    // 2. Store followed by a load is not allowed.
+    // 3. Store followed by a store is only valid on V4 or later.
+    // 4. Load followed by any memory operation is allowed.
+    if (DepType == SDep::Order) {
+      if (!PacketizeVolatiles) {
+        bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+        if (OrdRefs) {
           FoundSequentialDependence = true;
           break;
         }
       }
-
-      // We ignore Order dependences for
-      // 1. Two loads unless they are volatile.
-      // 2. Two stores in V4 unless they are volatile.
-      else if ((DepType == SDep::Order) &&
-               !I->hasOrderedMemoryRef() &&
-               !J->hasOrderedMemoryRef()) {
-        if (MCIDI.mayStore() && MCIDJ.mayStore()) {
-          /* do nothing */
-        }
-        // store followed by store-- not OK on V2
-        // store followed by load -- not OK on all (OK if addresses
-        // are not aliased)
-        // load followed by store -- OK on all
-        // load followed by load  -- OK on all
-        else if ( !MCIDJ.mayStore()) {
-          /* do nothing */
-        }
-        else {
+      // J is first, I is second.
+      bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
+      bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+      if (StoreJ) {
+        // Two stores are only allowed on V4+. Load following store is never
+        // allowed.
+        if (LoadI) {
           FoundSequentialDependence = true;
           break;
         }
-      }
-
-      // For V4, special case ALLOCFRAME. Even though there is dependency
-      // between ALLOCFRAME and subsequent store, allow it to be
-      // packetized in a same packet. This implies that the store is using
-      // caller's SP. Hence, offset needs to be updated accordingly.
-      else if (DepType == SDep::Data
-               && J->getOpcode() == Hexagon::S2_allocframe
-               && (I->getOpcode() == Hexagon::S2_storerd_io
-                   || I->getOpcode() == Hexagon::S2_storeri_io
-                   || I->getOpcode() == Hexagon::S2_storerb_io)
-               && I->getOperand(0).getReg() == QRI->getStackRegister()
-               && QII->isValidOffset(I->getOpcode(),
-                                     I->getOperand(1).getImm() -
-                                     (FrameSize + HEXAGON_LRFP_SIZE)))
-      {
-        GlueAllocframeStore = true;
-        // Since this store is to be glued with allocframe in the same
-        // packet, it will use SP of the previous stack frame, i.e
-        // caller's SP. Therefore, we need to recalculate offset according
-        // to this change.
-        I->getOperand(1).setImm(I->getOperand(1).getImm() -
-                                        (FrameSize + HEXAGON_LRFP_SIZE));
-      }
-
-      //
-      // Skip over anti-dependences. Two instructions that are
-      // anti-dependent can share a packet
-      //
-      else if (DepType != SDep::Anti) {
+      } else if (!LoadJ || (!LoadI && !StoreI)) {
+        // If J is neither load nor store, assume a dependency.
+        // If J is a load, but I is neither, also assume a dependency.
         FoundSequentialDependence = true;
         break;
       }
+      // Store followed by store: not OK on V2.
+      // Store followed by load: not OK on all.
+      // Load followed by store: OK on all.
+      // Load followed by load: OK on all.
+      continue;
     }
 
-    if (FoundSequentialDependence) {
-      Dependence = true;
-      return false;
+    // For V4, special case ALLOCFRAME. Even though there is dependency
+    // between ALLOCFRAME and subsequent store, allow it to be packetized
+    // in a same packet. This implies that the store is using the caller's
+    // SP. Hence, offset needs to be updated accordingly.
+    if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
+      unsigned Opc = I->getOpcode();
+      switch (Opc) {
+        case Hexagon::S2_storerd_io:
+        case Hexagon::S2_storeri_io:
+        case Hexagon::S2_storerh_io:
+        case Hexagon::S2_storerb_io:
+          if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
+            int64_t Imm = I->getOperand(1).getImm();
+            int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
+            if (HII->isValidOffset(Opc, NewOff)) {
+              GlueAllocframeStore = true;
+              // Since this store is to be glued with allocframe in the same
+              // packet, it will use SP of the previous stack frame, i.e.
+              // caller's SP. Therefore, we need to recalculate offset
+              // according to this change.
+              I->getOperand(1).setImm(NewOff);
+              continue;
+            }
+          }
+        default:
+          break;
+      }
+    }
+
+    // Skip over anti-dependences. Two instructions that are anti-dependent
+    // can share a packet.
+    if (DepType != SDep::Anti) {
+      FoundSequentialDependence = true;
+      break;
     }
   }
 
-  return true;
-}
-
-// isLegalToPruneDependencies
-bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
-  MachineInstr *I = SUI->getInstr();
-  assert(I && SUJ->getInstr() && "Unable to packetize null instruction!");
-
-  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
-
-  if (Dependence) {
-
-    // Check if the instruction was promoted to a dot-new. If so, demote it
-    // back into a dot-old.
-    if (PromotedToDotNew) {
-      DemoteToDotOld(I);
-    }
-
-    // Check if the instruction (must be a store) was glued with an Allocframe
-    // instruction. If so, restore its offset to its original value, i.e. use
-    // curent SP instead of caller's SP.
-    if (GlueAllocframeStore) {
-      I->getOperand(1).setImm(I->getOperand(1).getImm() +
-                                             FrameSize + HEXAGON_LRFP_SIZE);
-    }
-
+  if (FoundSequentialDependence) {
+    Dependence = true;
     return false;
   }
+
   return true;
 }
 
+bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
+  MachineInstr *I = SUI->getInstr();
+  MachineInstr *J = SUJ->getInstr();
+  assert(I && J && "Unable to packetize null instruction!");
+
+  if (cannotCoexist(I, J))
+    return false;
+
+  if (!Dependence)
+    return true;
+
+  // Check if the instruction was promoted to a dot-new. If so, demote it
+  // back into a dot-old.
+  if (PromotedToDotNew)
+    demoteToDotOld(I);
+
+  cleanUpDotCur();
+  // Check if the instruction (must be a store) was glued with an allocframe
+  // instruction. If so, restore its offset to its original value, i.e. use
+  // current SP instead of caller's SP.
+  if (GlueAllocframeStore) {
+    unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+    MachineOperand &MOff = I->getOperand(1);
+    MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
+  }
+  return false;
+}
+
+
 MachineBasicBlock::iterator
 HexagonPacketizerList::addToPacket(MachineInstr *MI) {
-
-    MachineBasicBlock::iterator MII = MI;
-    MachineBasicBlock *MBB = MI->getParent();
-
-    const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
-
-    if (GlueToNewValueJump) {
-
-      ++MII;
-      MachineInstr *nvjMI = MII;
-      assert(ResourceTracker->canReserveResources(MI));
-      ResourceTracker->reserveResources(MI);
-      if ((QII->isExtended(MI) || QII->isConstExtended(MI)) &&
-          !tryAllocateResourcesForConstExt(MI)) {
-        endPacket(MBB, MI);
-        ResourceTracker->reserveResources(MI);
-        assert(canReserveResourcesForConstExt(MI) &&
-               "Ensure that there is a slot");
-        reserveResourcesForConstExt(MI);
-        // Reserve resources for new value jump constant extender.
-        assert(canReserveResourcesForConstExt(MI) &&
-               "Ensure that there is a slot");
-        reserveResourcesForConstExt(nvjMI);
-        assert(ResourceTracker->canReserveResources(nvjMI) &&
-               "Ensure that there is a slot");
-
-      } else if (   // Extended instruction takes two slots in the packet.
-        // Try reserve and allocate 4-byte in the current packet first.
-        (QII->isExtended(nvjMI)
-            && (!tryAllocateResourcesForConstExt(nvjMI)
-                || !ResourceTracker->canReserveResources(nvjMI)))
-        || // For non-extended instruction, no need to allocate extra 4 bytes.
-        (!QII->isExtended(nvjMI) &&
-              !ResourceTracker->canReserveResources(nvjMI)))
-      {
-        endPacket(MBB, MI);
-        // A new and empty packet starts.
-        // We are sure that the resources requirements can be satisfied.
-        // Therefore, do not need to call "canReserveResources" anymore.
-        ResourceTracker->reserveResources(MI);
-        if (QII->isExtended(nvjMI))
-          reserveResourcesForConstExt(nvjMI);
-      }
-      // Here, we are sure that "reserveResources" would succeed.
-      ResourceTracker->reserveResources(nvjMI);
-      CurrentPacketMIs.push_back(MI);
-      CurrentPacketMIs.push_back(nvjMI);
-    } else {
-      if (   (QII->isExtended(MI) || QII->isConstExtended(MI))
-          && (   !tryAllocateResourcesForConstExt(MI)
-              || !ResourceTracker->canReserveResources(MI)))
-      {
-        endPacket(MBB, MI);
-        // Check if the instruction was promoted to a dot-new. If so, demote it
-        // back into a dot-old
-        if (PromotedToDotNew) {
-          DemoteToDotOld(MI);
-        }
-        reserveResourcesForConstExt(MI);
-      }
-      // In case that "MI" is not an extended insn,
-      // the resource availability has already been checked.
-      ResourceTracker->reserveResources(MI);
-      CurrentPacketMIs.push_back(MI);
+  MachineBasicBlock::iterator MII = MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  if (MI->isImplicitDef()) {
+    unsigned R = MI->getOperand(0).getReg();
+    if (Hexagon::IntRegsRegClass.contains(R)) {
+      MCSuperRegIterator S(R, HRI, false);
+      MI->addOperand(MachineOperand::CreateReg(*S, true, true));
     }
     return MII;
+  }
+  assert(ResourceTracker->canReserveResources(MI));
+
+  bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
+  bool Good = true;
+
+  if (GlueToNewValueJump) {
+    MachineInstr *NvjMI = ++MII;
+    // We need to put both instructions in the same packet: MI and NvjMI.
+    // Either of them can require a constant extender. Try to add both to
+    // the current packet, and if that fails, end the packet and start a
+    // new one.
+    ResourceTracker->reserveResources(MI);
+    if (ExtMI)
+      Good = tryAllocateResourcesForConstExt(true);
+
+    bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
+    if (Good) {
+      if (ResourceTracker->canReserveResources(NvjMI))
+        ResourceTracker->reserveResources(NvjMI);
+      else
+        Good = false;
+    }
+    if (Good && ExtNvjMI)
+      Good = tryAllocateResourcesForConstExt(true);
+
+    if (!Good) {
+      endPacket(MBB, MI);
+      assert(ResourceTracker->canReserveResources(MI));
+      ResourceTracker->reserveResources(MI);
+      if (ExtMI) {
+        assert(canReserveResourcesForConstExt());
+        tryAllocateResourcesForConstExt(true);
+      }
+      assert(ResourceTracker->canReserveResources(NvjMI));
+      ResourceTracker->reserveResources(NvjMI);
+      if (ExtNvjMI) {
+        assert(canReserveResourcesForConstExt());
+        reserveResourcesForConstExt();
+      }
+    }
+    CurrentPacketMIs.push_back(MI);
+    CurrentPacketMIs.push_back(NvjMI);
+    return MII;
+  }
+
+  ResourceTracker->reserveResources(MI);
+  if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
+    endPacket(MBB, MI);
+    if (PromotedToDotNew)
+      demoteToDotOld(MI);
+    ResourceTracker->reserveResources(MI);
+    reserveResourcesForConstExt();
+  }
+
+  CurrentPacketMIs.push_back(MI);
+  return MII;
 }
 
+void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
+      MachineInstr *MI) {
+  OldPacketMIs = CurrentPacketMIs;
+  VLIWPacketizerList::endPacket(MBB, MI);
+}
+
+bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
+  return !producesStall(MI);
+}
+
+
+// Return true when ConsMI uses a register defined by ProdMI.
+static bool isDependent(const MachineInstr *ProdMI,
+      const MachineInstr *ConsMI) {
+  if (!ProdMI->getOperand(0).isReg())
+    return false;
+  unsigned DstReg = ProdMI->getOperand(0).getReg();
+
+  for (auto &Op : ConsMI->operands())
+    if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
+      // The MIs depend on each other.
+      return true;
+
+  return false;
+}
+
+// V60 forward scheduling.
+bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+  // Check whether the previous packet is in a different loop. If this is the
+  // case, there is little point in trying to avoid a stall because that would
+  // favor the rare case (loop entry) over the common case (loop iteration).
+  //
+  // TODO: We should really be able to check all the incoming edges if this is
+  // the first packet in a basic block, so we can avoid stalls from the loop
+  // backedge.
+  if (!OldPacketMIs.empty()) {
+    auto *OldBB = OldPacketMIs.front()->getParent();
+    auto *ThisBB = I->getParent();
+    if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
+      return false;
+  }
+
+  // Check for stall between two vector instructions.
+  if (HII->isV60VectorInstruction(I)) {
+    for (auto J : OldPacketMIs) {
+      if (!HII->isV60VectorInstruction(J))
+        continue;
+      if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+        return true;
+    }
+    return false;
+  }
+
+  // Check for stall between two scalar instructions. First, check that
+  // there is no definition of a use in the current packet, because it
+  // may be a candidate for .new.
+  for (auto J : CurrentPacketMIs)
+    if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+      return false;
+
+  // Check for stall between I and instructions in the previous packet.
+  if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
+    for (auto J : OldPacketMIs) {
+      if (HII->isV60VectorInstruction(J))
+        continue;
+      if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+        continue;
+      if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
new file mode 100644
index 000000000000..960cf6ca5bbc
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -0,0 +1,114 @@
+#ifndef HEXAGONVLIWPACKETIZER_H
+#define HEXAGONVLIWPACKETIZER_H
+
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+
+namespace llvm {
+class HexagonPacketizerList : public VLIWPacketizerList {
+  // Vector of instructions assigned to the packet that has just been created.
+  std::vector<MachineInstr*> OldPacketMIs;
+
+  // Has the instruction been promoted to a dot-new instruction.
+  bool PromotedToDotNew;
+
+  // Has the instruction been glued to allocframe.
+  bool GlueAllocframeStore;
+
+  // Has the feeder instruction been glued to new value jump.
+  bool GlueToNewValueJump;
+
+  // Check if there is a dependence between some instruction already in this
+  // packet and this instruction.
+  bool Dependence;
+
+  // Only check for dependence if there are resources available to
+  // schedule this instruction.
+  bool FoundSequentialDependence;
+
+  // Track MIs with ignored dependence.
+  std::vector<MachineInstr*> IgnoreDepMIs;
+
+protected:
+  /// \brief A handle to the branch probability pass.
+  const MachineBranchProbabilityInfo *MBPI;
+  const MachineLoopInfo *MLI;
+
+private:
+  const HexagonInstrInfo *HII;
+  const HexagonRegisterInfo *HRI;
+
+public:
+  // Ctor.
+  HexagonPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
+                        AliasAnalysis *AA,
+                        const MachineBranchProbabilityInfo *MBPI);
+
+  // initPacketizerState - initialize some internal flags.
+  void initPacketizerState() override;
+
+  // ignorePseudoInstruction - Ignore bundling of pseudo instructions.
+  bool ignorePseudoInstruction(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB) override;
+
+  // isSoloInstruction - return true if instruction MI can not be packetized
+  // with any other instruction, which means that MI itself is a packet.
+  bool isSoloInstruction(const MachineInstr *MI) override;
+
+  // isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
+  // together.
+  bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
+
+  // isLegalToPruneDependencies - Is it legal to prune dependece between SUI
+  // and SUJ.
+  bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
+
+  MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
+  void endPacket(MachineBasicBlock *MBB, MachineInstr *MI) override;
+  bool shouldAddToPacket(const MachineInstr *MI) override;
+
+  void unpacketizeSoloInstrs(MachineFunction &MF);
+
+protected:
+  bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+                       unsigned DepReg);
+  bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+                       MachineBasicBlock::iterator &MII,
+                       const TargetRegisterClass* RC);
+  bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+                          unsigned DepReg, MachineBasicBlock::iterator &MII,
+                          const TargetRegisterClass* RC);
+  void cleanUpDotCur();
+
+  bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+                       MachineBasicBlock::iterator &MII,
+                       const TargetRegisterClass* RC);
+  bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+                          unsigned DepReg, MachineBasicBlock::iterator &MII,
+                          const TargetRegisterClass* RC);
+  bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+                            unsigned DepReg, MachineBasicBlock::iterator &MII);
+  bool canPromoteToNewValueStore(const MachineInstr* MI,
+                                 const MachineInstr* PacketMI, unsigned DepReg);
+  bool demoteToDotOld(MachineInstr* MI);
+  bool arePredicatesComplements(MachineInstr* MI1, MachineInstr* MI2);
+  bool restrictingDepExistInPacket(MachineInstr*, unsigned);
+  bool isNewifiable(const MachineInstr *MI);
+  bool isCurifiable(MachineInstr* MI);
+  bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+  inline bool isPromotedToDotNew() const {
+    return PromotedToDotNew;
+  }
+  bool tryAllocateResourcesForConstExt(bool Reserve);
+  bool canReserveResourcesForConstExt();
+  void reserveResourcesForConstExt();
+  bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
+  bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
+  bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
+  bool producesStall(const MachineInstr *MI);
+};
+} // namespace llvm
+#endif // HEXAGONVLIWPACKETIZER_H
+
diff --git a/lib/Target/Hexagon/LLVMBuild.txt b/lib/Target/Hexagon/LLVMBuild.txt
index 9d288af0214a..4088cedafd84 100644
--- a/lib/Target/Hexagon/LLVMBuild.txt
+++ b/lib/Target/Hexagon/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Disassembler MCTargetDesc TargetInfo
+subdirectories = AsmParser Disassembler MCTargetDesc TargetInfo
 
 [component_0]
 type = TargetGroup
@@ -33,6 +33,7 @@ required_libraries =
  AsmPrinter
  CodeGen
  Core
+ HexagonAsmParser
  HexagonDesc
  HexagonInfo
  MC
diff --git a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
index 5403b106cbbe..2c5d0dab2848 100644
--- a/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt
@@ -3,10 +3,12 @@ add_llvm_library(LLVMHexagonDesc
   HexagonELFObjectWriter.cpp
   HexagonInstPrinter.cpp
   HexagonMCAsmInfo.cpp
+  HexagonMCChecker.cpp
   HexagonMCCodeEmitter.cpp
   HexagonMCCompound.cpp
   HexagonMCDuplexInfo.cpp
   HexagonMCELFStreamer.cpp
+  HexagonMCExpr.cpp
   HexagonMCInstrInfo.cpp
   HexagonMCShuffler.cpp
   HexagonMCTargetDesc.cpp
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 99ea2fabf867..b73af8249cb5 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -13,7 +13,9 @@
 #include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -33,14 +35,28 @@ class HexagonAsmBackend : public MCAsmBackend {
   mutable uint64_t relaxedCnt;
   std::unique_ptr <MCInstrInfo> MCII;
   std::unique_ptr <MCInst *> RelaxTarget;
+  MCInst * Extender;
 public:
   HexagonAsmBackend(Target const &T,  uint8_t OSABI, StringRef CPU) :
-    OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *){}
+    OSABI(OSABI), MCII (T.createMCInstrInfo()), RelaxTarget(new MCInst *),
+    Extender(nullptr) {}
 
   MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
     return createHexagonELFObjectWriter(OS, OSABI, CPU);
   }
 
+  void setExtender(MCContext &Context) const {
+    if (Extender == nullptr)
+      const_cast<HexagonAsmBackend *>(this)->Extender = new (Context) MCInst;
+  }
+
+  MCInst *takeExtender() const {
+    assert(Extender != nullptr);
+    MCInst * Result = Extender;
+    const_cast<HexagonAsmBackend *>(this)->Extender = nullptr;
+    return Result;
+  }
+
   unsigned getNumFixupKinds() const override {
     return Hexagon::NumTargetFixupKinds;
   }
@@ -222,6 +238,7 @@ public:
         if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
           ++relaxedCnt;
           *RelaxTarget = &MCI;
+          setExtender(Layout.getAssembler().getContext());
           return true;
         } else {
           return false;
@@ -262,6 +279,7 @@ public:
       if (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_SIZE) {
         ++relaxedCnt;
         *RelaxTarget = &MCI;
+        setExtender(Layout.getAssembler().getContext());
         return true;
       }
     }
@@ -276,9 +294,35 @@ public:
     llvm_unreachable("Handled by fixupNeedsRelaxationAdvanced");
   }
 
-  void relaxInstruction(MCInst const & /*Inst*/,
-                        MCInst & /*Res*/) const override {
-    llvm_unreachable("relaxInstruction() unimplemented");
+  void relaxInstruction(MCInst const & Inst,
+                        MCInst & Res) const override {
+    assert(HexagonMCInstrInfo::isBundle(Inst) &&
+           "Hexagon relaxInstruction only works on bundles");
+
+    Res = HexagonMCInstrInfo::createBundle();
+    // Copy the results into the bundle.
+    bool Update = false;
+    for (auto &I : HexagonMCInstrInfo::bundleInstructions(Inst)) {
+      MCInst &CrntHMI = const_cast<MCInst &>(*I.getInst());
+
+      // if immediate extender needed, add it in
+      if (*RelaxTarget == &CrntHMI) {
+        Update = true;
+        assert((HexagonMCInstrInfo::bundleSize(Res) < HEXAGON_PACKET_SIZE) &&
+               "No room to insert extender for relaxation");
+
+        MCInst *HMIx = takeExtender();
+        *HMIx = HexagonMCInstrInfo::deriveExtender(
+                *MCII, CrntHMI,
+                HexagonMCInstrInfo::getExtendableOperand(*MCII, CrntHMI));
+        Res.addOperand(MCOperand::createInst(HMIx));
+        *RelaxTarget = nullptr;
+      }
+      // now copy over the original instruction(the one we may have extended)
+      Res.addOperand(MCOperand::createInst(I.getInst()));
+    }
+    (void)Update;
+    assert(Update && "Didn't find relaxation target");
   }
 
   bool writeNopData(uint64_t Count,
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index f4d162ccf6a8..47a6f8636276 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -44,6 +44,25 @@ namespace HexagonII {
     TypeMEMOP   = 9,
     TypeNV      = 10,
     TypeDUPLEX  = 11,
+    TypeCOMPOUND = 12,
+    TypeCVI_FIRST     = 13,
+    TypeCVI_VA        = TypeCVI_FIRST,
+    TypeCVI_VA_DV     = 14,
+    TypeCVI_VX        = 15,
+    TypeCVI_VX_DV     = 16,
+    TypeCVI_VP        = 17,
+    TypeCVI_VP_VS     = 18,
+    TypeCVI_VS        = 19,
+    TypeCVI_VINLANESAT= 20,
+    TypeCVI_VM_LD     = 21,
+    TypeCVI_VM_TMP_LD = 22,
+    TypeCVI_VM_CUR_LD = 23,
+    TypeCVI_VM_VP_LDU = 24,
+    TypeCVI_VM_ST     = 25,
+    TypeCVI_VM_NEW_ST = 26,
+    TypeCVI_VM_STU    = 27,
+    TypeCVI_HIST      = 28,
+    TypeCVI_LAST      = TypeCVI_HIST,
     TypePREFIX  = 30, // Such as extenders.
     TypeENDLOOP = 31  // Such as end of a HW loop.
   };
@@ -71,12 +90,16 @@ namespace HexagonII {
     PostInc        = 6   // Post increment addressing mode
   };
 
+  // MemAccessSize is represented as 1+log2(N) where N is size in bits.
   enum class MemAccessSize {
     NoMemAccess = 0,            // Not a memory acces instruction.
     ByteAccess = 1,             // Byte access instruction (memb).
     HalfWordAccess = 2,         // Half word access instruction (memh).
     WordAccess = 3,             // Word access instruction (memw).
-    DoubleWordAccess = 4        // Double word access instruction (memd)
+    DoubleWordAccess = 4,       // Double word access instruction (memd)
+                    // 5,       // We do not have a 16 byte vector access.
+    Vector64Access = 7,         // 64 Byte vector access instruction (vmem).
+    Vector128Access = 8         // 128 Byte vector access instruction (vmem).
   };
 
   // MCInstrDesc TSFlags
@@ -156,7 +179,7 @@ namespace HexagonII {
     AddrModeMask = 0x7,
     // Access size for load/store instructions.
     MemAccessSizePos = 43,
-    MemAccesSizeMask = 0x7,
+    MemAccesSizeMask = 0xf,
 
     // Branch predicted taken.
     TakenPos = 47,
@@ -164,7 +187,23 @@ namespace HexagonII {
 
     // Floating-point instructions.
     FPPos  = 48,
-    FPMask = 0x1
+    FPMask = 0x1,
+
+    // New-Value producer-2 instructions.
+    hasNewValuePos2  = 50,
+    hasNewValueMask2 = 0x1,
+
+    // Which operand consumes or produces a new value.
+    NewValueOpPos2  = 51,
+    NewValueOpMask2 = 0x7,
+
+    // Accumulator instructions.
+    AccumulatorPos = 54,
+    AccumulatorMask = 0x1,
+
+    // Complex XU, prevent xu competition by prefering slot3
+    PrefersSlot3Pos = 55,
+    PrefersSlot3Mask = 0x1,
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
@@ -219,6 +258,26 @@ namespace HexagonII {
     INST_PARSE_EXTENDER   = 0x00000000
   };
 
+  enum InstIClassBits : unsigned {
+    INST_ICLASS_MASK      = 0xf0000000,
+    INST_ICLASS_EXTENDER  = 0x00000000,
+    INST_ICLASS_J_1       = 0x10000000,
+    INST_ICLASS_J_2       = 0x20000000,
+    INST_ICLASS_LD_ST_1   = 0x30000000,
+    INST_ICLASS_LD_ST_2   = 0x40000000,
+    INST_ICLASS_J_3       = 0x50000000,
+    INST_ICLASS_CR        = 0x60000000,
+    INST_ICLASS_ALU32_1   = 0x70000000,
+    INST_ICLASS_XTYPE_1   = 0x80000000,
+    INST_ICLASS_LD        = 0x90000000,
+    INST_ICLASS_ST        = 0xa0000000,
+    INST_ICLASS_ALU32_2   = 0xb0000000,
+    INST_ICLASS_XTYPE_2   = 0xc0000000,
+    INST_ICLASS_XTYPE_3   = 0xd0000000,
+    INST_ICLASS_XTYPE_4   = 0xe0000000,
+    INST_ICLASS_ALU32_3   = 0xf0000000
+  };
+
 } // End namespace HexagonII.
 
 } // End namespace llvm.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
index 36f81465eef6..06ccec532211 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonAsmPrinter.h"
-#include "Hexagon.h"
 #include "HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -28,104 +28,33 @@ using namespace llvm;
 #define GET_INSTRUCTION_NAME
 #include "HexagonGenAsmWriter.inc"
 
-HexagonAsmInstPrinter::HexagonAsmInstPrinter(MCInstPrinter *RawPrinter)
-    : MCInstPrinter(*RawPrinter), RawPrinter(RawPrinter) {}
-
-void HexagonAsmInstPrinter::printInst(MCInst const *MI, raw_ostream &O,
-                                      StringRef Annot,
-                                      MCSubtargetInfo const &STI) {
-  assert(HexagonMCInstrInfo::isBundle(*MI));
-  assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
-  std::string Buffer;
-  {
-    raw_string_ostream TempStream(Buffer);
-    RawPrinter->printInst(MI, TempStream, "", STI);
-  }
-  StringRef Contents(Buffer);
-  auto PacketBundle = Contents.rsplit('\n');
-  auto HeadTail = PacketBundle.first.split('\n');
-  auto Preamble = "\t{\n\t\t";
-  auto Separator = "";
-  while(!HeadTail.first.empty()) {
-    O << Separator;
-    StringRef Inst;
-    auto Duplex = HeadTail.first.split('\v');
-    if(!Duplex.second.empty()){
-      O << Duplex.first << "\n";
-      Inst = Duplex.second;
-    }
-    else
-      Inst = Duplex.first;
-    O << Preamble;
-    O << Inst;
-    HeadTail = HeadTail.second.split('\n');
-    Preamble = "";
-    Separator = "\n\t\t";
-  }
-  O << "\n\t}" << PacketBundle.second;
-}
-
-void HexagonAsmInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
-  RawPrinter->printRegName(O, RegNo);
-}
-
-// Return the minimum value that a constant extendable operand can have
-// without being extended.
-static int getMinValue(uint64_t TSFlags) {
-  unsigned isSigned =
-      (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-  unsigned bits =
-      (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
-  if (isSigned)
-    return -1U << (bits - 1);
-
-  return 0;
-}
-
-// Return the maximum value that a constant extendable operand can have
-// without being extended.
-static int getMaxValue(uint64_t TSFlags) {
-  unsigned isSigned =
-      (TSFlags >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask;
-  unsigned bits =
-      (TSFlags >> HexagonII::ExtentBitsPos) & HexagonII::ExtentBitsMask;
-
-  if (isSigned)
-    return ~(-1U << (bits - 1));
-
-  return ~(-1U << bits);
-}
-
-// Return true if the instruction must be extended.
-static bool isExtended(uint64_t TSFlags) {
-  return (TSFlags >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
-}
-
-// Currently just used in an assert statement
-static bool isExtendable(uint64_t TSFlags) LLVM_ATTRIBUTE_UNUSED;
-// Return true if the instruction may be extended based on the operand value.
-static bool isExtendable(uint64_t TSFlags) {
-  return (TSFlags >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask;
+HexagonInstPrinter::HexagonInstPrinter(MCAsmInfo const &MAI,
+                                       MCInstrInfo const &MII,
+                                       MCRegisterInfo const &MRI)
+    : MCInstPrinter(MAI, MII, MRI), MII(MII), HasExtender(false) {
 }
 
 StringRef HexagonInstPrinter::getOpcodeName(unsigned Opcode) const {
   return MII.getName(Opcode);
 }
 
-void HexagonInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
-  OS << getRegisterName(RegNo);
+void HexagonInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
+  O << getRegName(RegNo);
+}
+
+StringRef HexagonInstPrinter::getRegName(unsigned RegNo) const {
+  return getRegisterName(RegNo);
 }
 
 void HexagonInstPrinter::setExtender(MCInst const &MCI) {
   HasExtender = HexagonMCInstrInfo::isImmext(MCI);
 }
 
-void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
-                                   StringRef Annot,
-                                   MCSubtargetInfo const &STI) {
+void HexagonInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
+                                   StringRef Annot, const MCSubtargetInfo &STI) {
   assert(HexagonMCInstrInfo::isBundle(*MI));
   assert(HexagonMCInstrInfo::bundleSize(*MI) <= HEXAGON_PACKET_SIZE);
+  assert(HexagonMCInstrInfo::bundleSize(*MI) > 0);
   HasExtender = false;
   for (auto const &I : HexagonMCInstrInfo::bundleInstructions(*MI)) {
     MCInst const &MCI = *I.getInst();
@@ -157,145 +86,148 @@ void HexagonInstPrinter::printInst(MCInst const *MI, raw_ostream &OS,
   }
 }
 
-void HexagonInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printOperand(MCInst const *MI, unsigned OpNo,
                                       raw_ostream &O) const {
-  const MCOperand& MO = MI->getOperand(OpNo);
-
-  if (MO.isReg()) {
-    printRegName(O, MO.getReg());
-  } else if(MO.isExpr()) {
-    MO.getExpr()->print(O, &MAI);
-  } else if(MO.isImm()) {
-    printImmOperand(MI, OpNo, O);
-  } else {
-    llvm_unreachable("Unknown operand");
-  }
-}
-
-void HexagonInstPrinter::printImmOperand(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) const {
-  const MCOperand& MO = MI->getOperand(OpNo);
-
-  if(MO.isExpr()) {
-    MO.getExpr()->print(O, &MAI);
-  } else if(MO.isImm()) {
-    O << MI->getOperand(OpNo).getImm();
-  } else {
-    llvm_unreachable("Unknown operand");
-  }
-}
-
-void HexagonInstPrinter::printExtOperand(const MCInst *MI, unsigned OpNo,
-                                         raw_ostream &O) const {
-  const MCOperand &MO = MI->getOperand(OpNo);
-  const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
-  assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
-         "Expecting an extendable operand");
-
-  if (MO.isExpr() || isExtended(MII.TSFlags)) {
+  if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo &&
+      (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI)))
     O << "#";
-  } else if (MO.isImm()) {
-    int ImmValue = MO.getImm();
-    if (ImmValue < getMinValue(MII.TSFlags) ||
-        ImmValue > getMaxValue(MII.TSFlags))
-      O << "#";
+  MCOperand const &MO = MI->getOperand(OpNo);
+  if (MO.isReg()) {
+    O << getRegisterName(MO.getReg());
+  } else if (MO.isExpr()) {
+    int64_t Value;
+    if (MO.getExpr()->evaluateAsAbsolute(Value))
+      O << formatImm(Value);
+    else
+      O << *MO.getExpr();
+  } else {
+    llvm_unreachable("Unknown operand");
   }
+}
+
+void HexagonInstPrinter::printExtOperand(MCInst const *MI, unsigned OpNo,
+                                         raw_ostream &O) const {
   printOperand(MI, OpNo, O);
 }
 
-void HexagonInstPrinter::printUnsignedImmOperand(const MCInst *MI,
-                                    unsigned OpNo, raw_ostream &O) const {
+void HexagonInstPrinter::printUnsignedImmOperand(MCInst const *MI,
+                                                 unsigned OpNo,
+                                                 raw_ostream &O) const {
   O << MI->getOperand(OpNo).getImm();
 }
 
-void HexagonInstPrinter::printNegImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNegImmOperand(MCInst const *MI, unsigned OpNo,
                                             raw_ostream &O) const {
   O << -MI->getOperand(OpNo).getImm();
 }
 
-void HexagonInstPrinter::printNOneImmOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printNOneImmOperand(MCInst const *MI, unsigned OpNo,
                                              raw_ostream &O) const {
   O << -1;
 }
 
-void HexagonInstPrinter::printMEMriOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) const {
-  const MCOperand& MO0 = MI->getOperand(OpNo);
-  const MCOperand& MO1 = MI->getOperand(OpNo + 1);
-
-  printRegName(O, MO0.getReg());
-  O << " + #" << MO1.getImm();
+void HexagonInstPrinter::prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<9>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+  O << formatImm(Imm/64);
 }
 
-void HexagonInstPrinter::printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
-                                                raw_ostream &O) const {
-  const MCOperand& MO0 = MI->getOperand(OpNo);
-  const MCOperand& MO1 = MI->getOperand(OpNo + 1);
-
-  printRegName(O, MO0.getReg());
-  O << ", #" << MO1.getImm();
+void HexagonInstPrinter::prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<10>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+  O << formatImm(Imm/128);
 }
 
-void HexagonInstPrinter::printGlobalOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<10>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x3f) == 0) && "Lower 6 bits must be ZERO.");
+  O << formatImm(Imm/64);
+}
+
+void HexagonInstPrinter::prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {
+  int64_t Imm;
+  bool Success = MI->getOperand(OpNo).getExpr()->evaluateAsAbsolute(Imm);
+  Imm = SignExtend64<11>(Imm);
+  assert(Success); (void)Success;
+  assert(((Imm & 0x7f) == 0) && "Lower 7 bits must be ZERO.");
+  O << formatImm(Imm/128);
+}
+
+void HexagonInstPrinter::printGlobalOperand(MCInst const *MI, unsigned OpNo,
                                             raw_ostream &O) const {
-  assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
-
   printOperand(MI, OpNo, O);
 }
 
-void HexagonInstPrinter::printJumpTable(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printJumpTable(MCInst const *MI, unsigned OpNo,
                                         raw_ostream &O) const {
   assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
 
   printOperand(MI, OpNo, O);
 }
 
-void HexagonInstPrinter::printConstantPool(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printConstantPool(MCInst const *MI, unsigned OpNo,
                                            raw_ostream &O) const {
   assert(MI->getOperand(OpNo).isExpr() && "Expecting expression");
 
   printOperand(MI, OpNo, O);
 }
 
-void HexagonInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printBranchOperand(MCInst const *MI, unsigned OpNo,
                                             raw_ostream &O) const {
   // Branches can take an immediate operand.  This is used by the branch
   // selection pass to print $+8, an eight byte displacement from the PC.
   llvm_unreachable("Unknown branch operand.");
 }
 
-void HexagonInstPrinter::printCallOperand(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) const {
-}
+void HexagonInstPrinter::printCallOperand(MCInst const *MI, unsigned OpNo,
+                                          raw_ostream &O) const {}
 
-void HexagonInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) const {
-}
+void HexagonInstPrinter::printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+                                             raw_ostream &O) const {}
 
-void HexagonInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
-                                               raw_ostream &O) const {
-}
+void HexagonInstPrinter::printPredicateOperand(MCInst const *MI, unsigned OpNo,
+                                               raw_ostream &O) const {}
 
-void HexagonInstPrinter::printSymbol(const MCInst *MI, unsigned OpNo,
+void HexagonInstPrinter::printSymbol(MCInst const *MI, unsigned OpNo,
                                      raw_ostream &O, bool hi) const {
-  assert(MI->getOperand(OpNo).isImm() && "Unknown symbol operand");
+  MCOperand const &MO = MI->getOperand(OpNo);
 
-  O << '#' << (hi ? "HI" : "LO") << "(#";
-  printOperand(MI, OpNo, O);
+  O << '#' << (hi ? "HI" : "LO") << '(';
+  if (MO.isImm()) {
+    O << '#';
+    printOperand(MI, OpNo, O);
+  } else {
+    printOperand(MI, OpNo, O);
+    assert("Unknown symbol operand");
+  }
   O << ')';
 }
 
-void HexagonInstPrinter::printExtBrtarget(const MCInst *MI, unsigned OpNo,
-                                          raw_ostream &O) const {
-  const MCOperand &MO = MI->getOperand(OpNo);
-  const MCInstrDesc &MII = getMII().get(MI->getOpcode());
-
-  assert((isExtendable(MII.TSFlags) || isExtended(MII.TSFlags)) &&
-         "Expecting an extendable operand");
-
-  if (MO.isExpr() || isExtended(MII.TSFlags)) {
-    O << "##";
+void HexagonInstPrinter::printBrtarget(MCInst const *MI, unsigned OpNo,
+                                       raw_ostream &O) const {
+  MCOperand const &MO = MI->getOperand(OpNo);
+  assert (MO.isExpr());
+  MCExpr const &Expr = *MO.getExpr();
+  int64_t Value;
+  if (Expr.evaluateAsAbsolute(Value))
+    O << format("0x%" PRIx64, Value);
+  else {
+    if (HasExtender || HexagonMCInstrInfo::isConstExtended(MII, *MI))
+      if (HexagonMCInstrInfo::getExtendableOp(MII, *MI) == OpNo)
+        O << "##";
+    O << Expr;
   }
-  printOperand(MI, OpNo, O);
 }
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
index 534ac237d635..5f421184b20a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonInstPrinter.h
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This class prints an Hexagon MCInst to a .s file.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,17 +14,8 @@
 #define LLVM_LIB_TARGET_HEXAGON_INSTPRINTER_HEXAGONINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
 
 namespace llvm {
-class HexagonAsmInstPrinter : public MCInstPrinter {
-public:
-  HexagonAsmInstPrinter(MCInstPrinter *RawPrinter);
-  void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
-                 MCSubtargetInfo const &STI) override;
-  void printRegName(raw_ostream &O, unsigned RegNo) const override;
-  std::unique_ptr<MCInstPrinter> RawPrinter;
-};
 /// Prints bundles as a newline separated list of individual instructions
 /// Duplexes are separated by a vertical tab \v character
 /// A trailing line includes bundle properties such as endloop0/1
@@ -33,68 +23,69 @@ public:
 /// r0 = add(r1, r2)
 /// r0 = #0 \v jump 0x0
 /// :endloop0 :endloop1
-  class HexagonInstPrinter : public MCInstPrinter {
-  public:
-    explicit HexagonInstPrinter(MCAsmInfo const &MAI,
-                                MCInstrInfo const &MII,
-                                MCRegisterInfo const &MRI)
-      : MCInstPrinter(MAI, MII, MRI), MII(MII) {}
+class HexagonInstPrinter : public MCInstPrinter {
+public:
+  explicit HexagonInstPrinter(MCAsmInfo const &MAI, MCInstrInfo const &MII,
+                              MCRegisterInfo const &MRI);
+  void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  void printInstruction(MCInst const *MI, raw_ostream &O);
 
-    void printInst(MCInst const *MI, raw_ostream &O, StringRef Annot,
-                   const MCSubtargetInfo &STI) override;
-    virtual StringRef getOpcodeName(unsigned Opcode) const;
-    void printInstruction(const MCInst *MI, raw_ostream &O);
-    void printRegName(raw_ostream &OS, unsigned RegNo) const override;
-    static const char *getRegisterName(unsigned RegNo);
+  StringRef getRegName(unsigned RegNo) const;
+  static char const *getRegisterName(unsigned RegNo);
+  void printRegName(raw_ostream &O, unsigned RegNo) const override;
 
-    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
-    void printImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
-    void printExtOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
-    void printUnsignedImmOperand(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) const;
-    void printNegImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printNOneImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printMEMriOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printFrameIndexOperand(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) const;
-    void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printCallOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printPredicateOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printGlobalOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O)
-           const;
-    void printJumpTable(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
-    void printExtBrtarget(const MCInst *MI, unsigned OpNo, raw_ostream &O) const;
-
-    void printConstantPool(const MCInst *MI, unsigned OpNo,
+  void printOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printExtOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printUnsignedImmOperand(MCInst const *MI, unsigned OpNo,
+                               raw_ostream &O) const;
+  void printNegImmOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printNOneImmOperand(MCInst const *MI, unsigned OpNo,
                            raw_ostream &O) const;
+  void prints3_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints3_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints4_6ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void prints4_7ImmOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void printBranchOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printCallOperand(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printAbsAddrOperand(MCInst const *MI, unsigned OpNo,
+                           raw_ostream &O) const;
+  void printPredicateOperand(MCInst const *MI, unsigned OpNo,
+                             raw_ostream &O) const;
+  void printGlobalOperand(MCInst const *MI, unsigned OpNo,
+                          raw_ostream &O) const;
+  void printJumpTable(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
+  void printBrtarget(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
 
-    void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
-      { printSymbol(MI, OpNo, O, true); }
-    void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O) const
-      { printSymbol(MI, OpNo, O, false); }
+  void printConstantPool(MCInst const *MI, unsigned OpNo, raw_ostream &O) const;
 
-    const MCInstrInfo &getMII() const {
-      return MII;
-    }
+  void printSymbolHi(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+    printSymbol(MI, OpNo, O, true);
+  }
+  void printSymbolLo(MCInst const *MI, unsigned OpNo, raw_ostream &O) const {
+    printSymbol(MI, OpNo, O, false);
+  }
 
-  protected:
-    void printSymbol(const MCInst *MI, unsigned OpNo, raw_ostream &O, bool hi)
-           const;
+  MCAsmInfo const &getMAI() const { return MAI; }
+  MCInstrInfo const &getMII() const { return MII; }
 
-  private:
-    const MCInstrInfo &MII;
+protected:
+  void printSymbol(MCInst const *MI, unsigned OpNo, raw_ostream &O,
+                   bool hi) const;
 
-    bool HasExtender;
-    void setExtender(MCInst const &MCI);
-  };
+private:
+  MCInstrInfo const &MII;
+
+  bool HasExtender;
+  void setExtender(MCInst const &MCI);
+};
 
 } // end namespace llvm
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index dc0706994786..a8456b4ead9c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -18,13 +18,14 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
-  class Triple;
+class Triple;
 
-  class HexagonMCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit HexagonMCAsmInfo(const Triple &TT);
-  };
+class HexagonMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit HexagonMCAsmInfo(const Triple &TT);
+};
 
 } // namespace llvm
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
new file mode 100644
index 000000000000..46b7b41fec3b
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -0,0 +1,581 @@
+//===----- HexagonMCChecker.cpp - Instruction bundle checking -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCChecker.h"
+
+#include "HexagonBaseInfo.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> RelaxNVChecks("relax-nv-checks", cl::init(false),
+  cl::ZeroOrMore, cl::Hidden, cl::desc("Relax checks of new-value validity"));
+
+const HexagonMCChecker::PredSense
+  HexagonMCChecker::Unconditional(Hexagon::NoRegister, false);
+
+void HexagonMCChecker::init() {
+  // Initialize read-only registers set.
+  ReadOnly.insert(Hexagon::PC);
+
+  // Figure out the loop-registers definitions.
+  if (HexagonMCInstrInfo::isInnerLoop(MCB)) {
+    Defs[Hexagon::SA0].insert(Unconditional); // FIXME: define or change SA0?
+    Defs[Hexagon::LC0].insert(Unconditional);
+  }
+  if (HexagonMCInstrInfo::isOuterLoop(MCB)) {
+    Defs[Hexagon::SA1].insert(Unconditional); // FIXME: define or change SA0?
+    Defs[Hexagon::LC1].insert(Unconditional);
+  }
+
+  if (HexagonMCInstrInfo::isBundle(MCB))
+    // Unfurl a bundle.
+    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      init(*I.getInst());
+    }
+  else
+    init(MCB);
+}
+
+void HexagonMCChecker::init(MCInst const& MCI) {
+  const MCInstrDesc& MCID = HexagonMCInstrInfo::getDesc(MCII, MCI);
+  unsigned PredReg = Hexagon::NoRegister;
+  bool isTrue = false;
+
+  // Get used registers.
+  for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+    if (MCI.getOperand(i).isReg()) {
+      unsigned R = MCI.getOperand(i).getReg();
+
+      if (HexagonMCInstrInfo::isPredicated(MCII, MCI) && isPredicateRegister(R)) {
+        // Note an used predicate register.
+        PredReg = R;
+        isTrue = HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI);
+
+        // Note use of new predicate register.
+        if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+          NewPreds.insert(PredReg);
+      }
+      else
+        // Note register use.  Super-registers are not tracked directly,
+        // but their components.
+        for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+           SRI.isValid();
+           ++SRI)
+         if (!MCSubRegIterator(*SRI, &RI).isValid())
+           // Skip super-registers used indirectly.
+           Uses.insert(*SRI);
+    }
+
+  // Get implicit register definitions.
+  if (const MCPhysReg *ImpDef = MCID.getImplicitDefs())
+    for (; *ImpDef; ++ImpDef) {
+      unsigned R = *ImpDef;
+
+      if (Hexagon::R31 != R && MCID.isCall())
+        // Any register other than the LR and the PC are actually volatile ones
+        // as defined by the ABI, not modified implicitly by the call insn.
+        continue;
+      if (Hexagon::PC == R)
+        // Branches are the only insns that can change the PC,
+        // otherwise a read-only register.
+        continue;
+
+      if (Hexagon::USR_OVF == R)
+        // Many insns change the USR implicitly, but only one or another flag.
+        // The instruction table models the USR.OVF flag, which can be implicitly
+        // modified more than once, but cannot be modified in the same packet
+        // with an instruction that modifies is explicitly. Deal with such situ-
+        // ations individually.
+        SoftDefs.insert(R);
+      else if (isPredicateRegister(R) &&
+               HexagonMCInstrInfo::isPredicateLate(MCII, MCI))
+        // Include implicit late predicates.
+        LatePreds.insert(R);
+      else
+        Defs[R].insert(PredSense(PredReg, isTrue));
+    }
+
+  // Figure out explicit register definitions.
+  for (unsigned i = 0; i < MCID.getNumDefs(); ++i) {
+    unsigned R = MCI.getOperand(i).getReg(),
+             S = Hexagon::NoRegister;
+
+    // Note register definitions, direct ones as well as indirect side-effects.
+    // Super-registers are not tracked directly, but their components.
+    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+        SRI.isValid();
+        ++SRI) {
+      if (MCSubRegIterator(*SRI, &RI).isValid())
+        // Skip super-registers defined indirectly.
+        continue;
+
+      if (R == *SRI) {
+        if (S == R)
+          // Avoid scoring the defined register multiple times.
+          continue;
+        else
+          // Note that the defined register has already been scored.
+          S = R;
+      }
+
+      if (Hexagon::P3_0 != R && Hexagon::P3_0 == *SRI)
+        // P3:0 is a special case, since multiple predicate register definitions
+        // in a packet is allowed as the equivalent of their logical "and".
+        // Only an explicit definition of P3:0 is noted as such; if a
+        // side-effect, then note as a soft definition.
+        SoftDefs.insert(*SRI);
+      else if (HexagonMCInstrInfo::isPredicateLate(MCII, MCI) && isPredicateRegister(*SRI))
+        // Some insns produce predicates too late to be used in the same packet.
+        LatePreds.insert(*SRI);
+      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_CUR_LD)
+        // Current loads should be used in the same packet.
+        // TODO: relies on the impossibility of a current and a temporary loads
+        // in the same packet.
+        CurDefs.insert(*SRI), Defs[*SRI].insert(PredSense(PredReg, isTrue));
+      else if (i == 0 && llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCVI_VM_TMP_LD)
+        // Temporary loads should be used in the same packet, but don't commit
+        // results, so it should be disregarded if another insn changes the same
+        // register.
+        // TODO: relies on the impossibility of a current and a temporary loads
+        // in the same packet.
+        TmpDefs.insert(*SRI);
+      else if (i <= 1 && llvm::HexagonMCInstrInfo::hasNewValue2(MCII, MCI) )
+        // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
+        // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
+        Uses.insert(*SRI);
+      else
+        Defs[*SRI].insert(PredSense(PredReg, isTrue));
+    }
+  }
+
+  // Figure out register definitions that produce new values.
+  if (HexagonMCInstrInfo::hasNewValue(MCII, MCI)) {
+    unsigned R = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+    if (HexagonMCInstrInfo::isCompound(MCII, MCI))
+      compoundRegisterMap(R); // Compound insns have a limited register range.
+
+    for(MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
+        SRI.isValid();
+        ++SRI)
+      if (!MCSubRegIterator(*SRI, &RI).isValid())
+        // No super-registers defined indirectly.
+        NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+                                              HexagonMCInstrInfo::isFloat(MCII, MCI)));
+
+    // For fairly unique 2-dot-new producers, example:
+    // vdeal(V1, V9, R0) V1.new and V9.new can be used by consumers.
+    if (HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) {
+      unsigned R2 = HexagonMCInstrInfo::getNewValueOperand2(MCII, MCI).getReg();
+
+      for(MCRegAliasIterator SRI(R2, &RI, !MCSubRegIterator(R2, &RI).isValid());
+          SRI.isValid();
+          ++SRI)
+        if (!MCSubRegIterator(*SRI, &RI).isValid())
+          NewDefs[*SRI].push_back(NewSense::Def(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI),
+                                                HexagonMCInstrInfo::isFloat(MCII, MCI)));
+    }
+  }
+
+  // Figure out definitions of new predicate registers.
+  if (HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+    for (unsigned i = MCID.getNumDefs(); i < MCID.getNumOperands(); ++i)
+      if (MCI.getOperand(i).isReg()) {
+        unsigned P = MCI.getOperand(i).getReg();
+
+        if (isPredicateRegister(P))
+          NewPreds.insert(P);
+      }
+
+  // Figure out uses of new values.
+  if (HexagonMCInstrInfo::isNewValue(MCII, MCI)) {
+    unsigned N = HexagonMCInstrInfo::getNewValueOperand(MCII, MCI).getReg();
+
+    if (!MCSubRegIterator(N, &RI).isValid()) {
+      // Super-registers cannot use new values.
+      if (MCID.isBranch())
+        NewUses[N] = NewSense::Jmp(llvm::HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV);
+      else
+        NewUses[N] = NewSense::Use(PredReg, HexagonMCInstrInfo::isPredicatedTrue(MCII, MCI));
+    }
+  }
+}
+
+HexagonMCChecker::HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst &mcb, MCInst &mcbdx,
+                                   MCRegisterInfo const &ri)
+    : MCB(mcb), MCBDX(mcbdx), RI(ri), MCII(MCII), STI(STI),
+      bLoadErrInfo(false) {
+  init();
+}
+
+bool HexagonMCChecker::check() {
+  bool chkB = checkBranches();
+  bool chkP = checkPredicates();
+  bool chkNV = checkNewValues();
+  bool chkR = checkRegisters();
+  bool chkS = checkSolo();
+  bool chkSh = checkShuffle();
+  bool chkSl = checkSlots();
+  bool chk = chkB && chkP && chkNV && chkR && chkS && chkSh && chkSl;
+
+  return chk;
+}
+
+bool HexagonMCChecker::checkSlots()
+
+{
+  unsigned slotsUsed = 0;
+  for (auto HMI: HexagonMCInstrInfo::bundleInstructions(MCBDX)) {
+    MCInst const& MCI = *HMI.getInst();
+    if (HexagonMCInstrInfo::isImmext(MCI))
+      continue;
+    if (HexagonMCInstrInfo::isDuplex(MCII, MCI))
+      slotsUsed += 2;
+    else
+      ++slotsUsed;
+  }
+
+  if (slotsUsed > HEXAGON_PACKET_SIZE) {
+    HexagonMCErrInfo errInfo;
+    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NOSLOTS);
+    addErrInfo(errInfo);
+    return false;
+  }
+  return true;
+}
+
+// Check legal use of branches.
+bool HexagonMCChecker::checkBranches() {
+  HexagonMCErrInfo errInfo;
+  if (HexagonMCInstrInfo::isBundle(MCB)) {
+    bool hasConditional = false;
+    unsigned Branches = 0, Returns = 0, NewIndirectBranches = 0,
+             NewValueBranches = 0, Conditional = HEXAGON_PRESHUFFLE_PACKET_SIZE,
+             Unconditional = HEXAGON_PRESHUFFLE_PACKET_SIZE;
+
+    for (unsigned i = HexagonMCInstrInfo::bundleInstructionsOffset;
+         i < MCB.size(); ++i) {
+      MCInst const &MCI = *MCB.begin()[i].getInst();
+
+      if (HexagonMCInstrInfo::isImmext(MCI))
+        continue;
+      if (HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch() ||
+          HexagonMCInstrInfo::getDesc(MCII, MCI).isCall()) {
+        ++Branches;
+        if (HexagonMCInstrInfo::getDesc(MCII, MCI).isIndirectBranch() &&
+            HexagonMCInstrInfo::isPredicatedNew(MCII, MCI))
+          ++NewIndirectBranches;
+        if (HexagonMCInstrInfo::isNewValue(MCII, MCI))
+          ++NewValueBranches;
+
+        if (HexagonMCInstrInfo::isPredicated(MCII, MCI) ||
+            HexagonMCInstrInfo::isPredicatedNew(MCII, MCI)) {
+          hasConditional = true;
+          Conditional = i; // Record the position of the conditional branch.
+        } else {
+          Unconditional = i; // Record the position of the unconditional branch.
+        }
+      }
+      if (HexagonMCInstrInfo::getDesc(MCII, MCI).isReturn() &&
+          HexagonMCInstrInfo::getDesc(MCII, MCI).mayLoad())
+        ++Returns;
+    }
+
+    if (Branches) // FIXME: should "Defs.count(Hexagon::PC)" be here too?
+      if (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+          HexagonMCInstrInfo::isOuterLoop(MCB)) {
+        // Error out if there's any branch in a loop-end packet.
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_ENDLOOP, Hexagon::PC);
+        addErrInfo(errInfo);
+        return false;
+      }
+    if (Branches > 1)
+      if (!hasConditional || Conditional > Unconditional) {
+        // Error out if more than one unconditional branch or
+        // the conditional branch appears after the unconditional one.
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_BRANCHES);
+        addErrInfo(errInfo);
+        return false;
+      }
+  }
+
+  return true;
+}
+
+// Check legal use of predicate registers.
+bool HexagonMCChecker::checkPredicates() {
+  HexagonMCErrInfo errInfo;
+  // Check for proper use of new predicate registers.
+  for (const auto& I : NewPreds) {
+    unsigned P = I;
+
+    if (!Defs.count(P) || LatePreds.count(P)) {
+      // Error out if the new predicate register is not defined,
+      // or defined "late"
+      // (e.g., "{ if (p3.new)... ; p3 = sp1loop0(#r7:2, Rs) }").
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWP, P);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  // Check for proper use of auto-anded of predicate registers.
+  for (const auto& I : LatePreds) {
+    unsigned P = I;
+
+    if (LatePreds.count(P) > 1 || Defs.count(P)) {
+      // Error out if predicate register defined "late" multiple times or
+      // defined late and regularly defined
+      // (e.g., "{ p3 = sp1loop0(...); p3 = cmp.eq(...) }".
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, P);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Check legal use of new values.
+bool HexagonMCChecker::checkNewValues() {
+  HexagonMCErrInfo errInfo;
+  memset(&errInfo, 0, sizeof(errInfo));
+  for (auto& I : NewUses) {
+    unsigned R = I.first;
+    NewSense &US = I.second;
+
+    if (!hasValidNewValueDef(US, NewDefs[R])) {
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_NEWV, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Check for legal register uses and definitions.
+bool HexagonMCChecker::checkRegisters() {
+  HexagonMCErrInfo errInfo;
+  // Check for proper register definitions.
+  for (const auto& I : Defs) {
+    unsigned R = I.first;
+
+    if (ReadOnly.count(R)) {
+      // Error out for definitions of read-only registers.
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_READONLY, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (isLoopRegister(R) && Defs.count(R) > 1 &&
+        (HexagonMCInstrInfo::isInnerLoop(MCB) ||
+         HexagonMCInstrInfo::isOuterLoop(MCB))) {
+      // Error out for definitions of loop registers at the end of a loop.
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_LOOP, R);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (SoftDefs.count(R)) {
+      // Error out for explicit changes to registers also weakly defined
+      // (e.g., "{ usr = r0; r0 = sfadd(...) }").
+      unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
+      unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+      addErrInfo(errInfo);
+      return false;
+    }
+    if (!isPredicateRegister(R) && Defs[R].size() > 1) {
+      // Check for multiple register definitions.
+      PredSet &PM = Defs[R];
+
+      // Check for multiple unconditional register definitions.
+      if (PM.count(Unconditional)) {
+        // Error out on an unconditional change when there are any other
+        // changes, conditional or not.
+        unsigned UsrR = Hexagon::USR;
+        unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, BadR);
+        addErrInfo(errInfo);
+        return false;
+      }
+      // Check for multiple conditional register definitions.
+      for (const auto& J : PM) {
+        PredSense P = J;
+
+        // Check for multiple uses of the same condition.
+        if (PM.count(P) > 1) {
+          // Error out on conditional changes based on the same predicate
+          // (e.g., "{ if (!p0) r0 =...; if (!p0) r0 =... }").
+          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+          addErrInfo(errInfo);
+          return false;
+        }
+        // Check for the use of the complementary condition.
+        P.second = !P.second;
+        if (PM.count(P) && PM.size() > 2) {
+          // Error out on conditional changes based on the same predicate
+          // multiple times
+          // (e.g., "{ if (p0) r0 =...; if (!p0) r0 =... }; if (!p0) r0 =... }").
+          errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_REGISTERS, R);
+          addErrInfo(errInfo);
+          return false;
+        }
+      }
+    }
+  }
+
+  // Check for use of current definitions.
+  for (const auto& I : CurDefs) {
+    unsigned R = I;
+
+    if (!Uses.count(R)) {
+      // Warn on an unused current definition.
+      errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_CURRENT, R);
+      addErrInfo(errInfo);
+      return true;
+    }
+  }
+
+  // Check for use of temporary definitions.
+  for (const auto& I : TmpDefs) {
+    unsigned R = I;
+
+    if (!Uses.count(R)) {
+      // special case for vhist
+      bool vHistFound = false;
+      for (auto const&HMI : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+        if(llvm::HexagonMCInstrInfo::getType(MCII, *HMI.getInst()) == HexagonII::TypeCVI_HIST) {
+          vHistFound = true;  // vhist() implicitly uses ALL REGxx.tmp
+          break;
+        }
+      }
+      // Warn on an unused temporary definition.
+      if (vHistFound == false) {
+        errInfo.setWarning(HexagonMCErrInfo::CHECK_WARN_TEMPORARY, R);
+        addErrInfo(errInfo);
+        return true;
+      }
+    }
+  }
+
+  return true;
+}
+
+// Check for legal use of solo insns.
+bool HexagonMCChecker::checkSolo() {
+  HexagonMCErrInfo errInfo;
+  if (HexagonMCInstrInfo::isBundle(MCB) &&
+      HexagonMCInstrInfo::bundleSize(MCB) > 1) {
+    for (auto const&I : HexagonMCInstrInfo::bundleInstructions(MCB)) {
+      if (llvm::HexagonMCInstrInfo::isSolo(MCII, *I.getInst())) {
+        errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SOLO);
+        addErrInfo(errInfo);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+bool HexagonMCChecker::checkShuffle() {
+  HexagonMCErrInfo errInfo;
+  // Branch info is lost when duplexing. The unduplexed insns must be
+  // checked and only branch errors matter for this case.
+  HexagonMCShuffler MCS(MCII, STI, MCB);
+  if (!MCS.check()) {
+    if (MCS.getError() == HexagonShuffler::SHUFFLE_ERROR_BRANCHES) {
+      errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+      errInfo.setShuffleError(MCS.getError());
+      addErrInfo(errInfo);
+      return false;
+    }
+  }
+  HexagonMCShuffler MCSDX(MCII, STI, MCBDX);
+  if (!MCSDX.check()) {
+    errInfo.setError(HexagonMCErrInfo::CHECK_ERROR_SHUFFLE);
+    errInfo.setShuffleError(MCSDX.getError());
+    addErrInfo(errInfo);
+    return false;
+  }
+  return true;
+}
+
+void HexagonMCChecker::compoundRegisterMap(unsigned& Register) {
+  switch (Register) {
+  default:
+    break;
+  case Hexagon::R15:
+    Register = Hexagon::R23;
+    break;
+  case Hexagon::R14:
+    Register = Hexagon::R22;
+    break;
+  case Hexagon::R13:
+    Register = Hexagon::R21;
+    break;
+  case Hexagon::R12:
+    Register = Hexagon::R20;
+    break;
+  case Hexagon::R11:
+    Register = Hexagon::R19;
+    break;
+  case Hexagon::R10:
+    Register = Hexagon::R18;
+    break;
+  case Hexagon::R9:
+    Register = Hexagon::R17;
+    break;
+  case Hexagon::R8:
+    Register = Hexagon::R16;
+    break;
+  }
+}
+
+bool HexagonMCChecker::hasValidNewValueDef(const NewSense &Use,
+      const NewSenseList &Defs) const {
+  bool Strict = !RelaxNVChecks;
+
+  for (unsigned i = 0, n = Defs.size(); i < n; ++i) {
+    const NewSense &Def = Defs[i];
+    // NVJ cannot use a new FP value [7.6.1]
+    if (Use.IsNVJ && (Def.IsFloat || Def.PredReg != 0))
+      continue;
+    // If the definition was not predicated, then it does not matter if
+    // the use is.
+    if (Def.PredReg == 0)
+      return true;
+    // With the strict checks, both the definition and the use must be
+    // predicated on the same register and condition.
+    if (Strict) {
+      if (Def.PredReg == Use.PredReg && Def.Cond == Use.Cond)
+        return true;
+    } else {
+      // With the relaxed checks, if the definition was predicated, the only
+      // detectable violation is if the use is predicated on the opposing
+      // condition, otherwise, it's ok.
+      if (Def.PredReg != Use.PredReg || Def.Cond == Use.Cond)
+        return true;
+    }
+  }
+  return false;
+}
+
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
new file mode 100644
index 000000000000..5fc0bdeaccbb
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -0,0 +1,218 @@
+//===----- HexagonMCChecker.h - Instruction bundle checking ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the checking of insns inside a bundle according to the
+// packet constraint rules of the Hexagon ISA.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGONMCCHECKER_H
+#define HEXAGONMCCHECKER_H
+
+#include <map>
+#include <set>
+#include <queue>
+#include "MCTargetDesc/HexagonMCShuffler.h"
+
+using namespace llvm;
+
+namespace llvm {
+class MCOperandInfo;
+
+typedef struct {
+  unsigned Error, Warning, ShuffleError;
+  unsigned Register;
+} ErrInfo_T;
+
+class HexagonMCErrInfo {
+public:
+  enum {
+    CHECK_SUCCESS         = 0,
+    // Errors.
+    CHECK_ERROR_BRANCHES  = 0x00001,
+    CHECK_ERROR_NEWP      = 0x00002,
+    CHECK_ERROR_NEWV      = 0x00004,
+    CHECK_ERROR_REGISTERS = 0x00008,
+    CHECK_ERROR_READONLY  = 0x00010,
+    CHECK_ERROR_LOOP      = 0x00020,
+    CHECK_ERROR_ENDLOOP   = 0x00040,
+    CHECK_ERROR_SOLO      = 0x00080,
+    CHECK_ERROR_SHUFFLE   = 0x00100,
+    CHECK_ERROR_NOSLOTS   = 0x00200,
+    CHECK_ERROR_UNKNOWN   = 0x00400,
+    // Warnings.
+    CHECK_WARN_CURRENT    = 0x10000,
+    CHECK_WARN_TEMPORARY  = 0x20000
+  };
+  ErrInfo_T s;
+
+  void reset() {
+    s.Error = CHECK_SUCCESS;
+    s.Warning = CHECK_SUCCESS;
+    s.ShuffleError = HexagonShuffler::SHUFFLE_SUCCESS;
+    s.Register = Hexagon::NoRegister;
+  };
+  HexagonMCErrInfo() {
+    reset();
+  };
+
+  void setError(unsigned e, unsigned r = Hexagon::NoRegister)
+    { s.Error = e; s.Register = r; };
+  void setWarning(unsigned w, unsigned r = Hexagon::NoRegister)
+    { s.Warning = w; s.Register = r; };
+  void setShuffleError(unsigned e) { s.ShuffleError = e; };
+};
+
+/// Check for a valid bundle.
+class HexagonMCChecker {
+  /// Insn bundle.
+  MCInst& MCB;
+  MCInst& MCBDX;
+  const MCRegisterInfo& RI;
+  MCInstrInfo const &MCII;
+  MCSubtargetInfo const &STI;
+  bool bLoadErrInfo;
+
+  /// Set of definitions: register #, if predicated, if predicated true.
+  typedef std::pair<unsigned, bool> PredSense;
+  static const PredSense Unconditional;
+  typedef std::multiset<PredSense> PredSet;
+  typedef std::multiset<PredSense>::iterator PredSetIterator;
+
+  typedef llvm::DenseMap<unsigned, PredSet>::iterator DefsIterator;
+  llvm::DenseMap<unsigned, PredSet> Defs;
+
+  /// Information about how a new-value register is defined or used:
+  ///   PredReg = predicate register, 0 if use/def not predicated,
+  ///   Cond    = true/false for if(PredReg)/if(!PredReg) respectively,
+  ///   IsFloat = true if definition produces a floating point value
+  ///             (not valid for uses),
+  ///   IsNVJ   = true if the use is a new-value branch (not valid for
+  ///             definitions).
+  struct NewSense {
+    unsigned PredReg;
+    bool IsFloat, IsNVJ, Cond;
+    // The special-case "constructors":
+    static NewSense Jmp(bool isNVJ) {
+      NewSense NS = { /*PredReg=*/ 0, /*IsFloat=*/ false, /*IsNVJ=*/ isNVJ,
+                      /*Cond=*/ false };
+      return NS;
+    }
+    static NewSense Use(unsigned PR, bool True) {
+      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ false, /*IsNVJ=*/ false,
+                      /*Cond=*/ True };
+      return NS;
+    }
+    static NewSense Def(unsigned PR, bool True, bool Float) {
+      NewSense NS = { /*PredReg=*/ PR, /*IsFloat=*/ Float, /*IsNVJ=*/ false,
+                      /*Cond=*/ True };
+      return NS;
+    }
+  };
+  /// Set of definitions that produce new register:
+  typedef llvm::SmallVector<NewSense,2> NewSenseList;
+  typedef llvm::DenseMap<unsigned, NewSenseList>::iterator NewDefsIterator;
+  llvm::DenseMap<unsigned, NewSenseList> NewDefs;
+
+  /// Set of weak definitions whose clashes should be enforced selectively.
+  typedef std::set<unsigned>::iterator SoftDefsIterator;
+  std::set<unsigned> SoftDefs;
+
+  /// Set of current definitions committed to the register file.
+  typedef std::set<unsigned>::iterator CurDefsIterator;
+  std::set<unsigned> CurDefs;
+
+  /// Set of temporary definitions not committed to the register file.
+  typedef std::set<unsigned>::iterator TmpDefsIterator;
+  std::set<unsigned> TmpDefs;
+
+  /// Set of new predicates used.
+  typedef std::set<unsigned>::iterator NewPredsIterator;
+  std::set<unsigned> NewPreds;
+
+  /// Set of predicates defined late.
+  typedef std::multiset<unsigned>::iterator LatePredsIterator;
+  std::multiset<unsigned> LatePreds;
+
+  /// Set of uses.
+  typedef std::set<unsigned>::iterator UsesIterator;
+  std::set<unsigned> Uses;
+
+  /// Set of new values used: new register, if new-value jump.
+  typedef llvm::DenseMap<unsigned, NewSense>::iterator NewUsesIterator;
+  llvm::DenseMap<unsigned, NewSense> NewUses;
+
+  /// Pre-defined set of read-only registers.
+  typedef std::set<unsigned>::iterator ReadOnlyIterator;
+  std::set<unsigned> ReadOnly;
+
+  std::queue<ErrInfo_T> ErrInfoQ;
+  HexagonMCErrInfo CrntErrInfo;
+
+  void getErrInfo() {
+    if (bLoadErrInfo == true) {
+      if (ErrInfoQ.empty()) {
+        CrntErrInfo.reset();
+      } else {
+        CrntErrInfo.s = ErrInfoQ.front();
+        ErrInfoQ.pop();
+      }
+    }
+    bLoadErrInfo = false;
+  }
+
+  void init();
+  void init(MCInst const&);
+
+  // Checks performed.
+  bool checkBranches();
+  bool checkPredicates();
+  bool checkNewValues();
+  bool checkRegisters();
+  bool checkSolo();
+  bool checkShuffle();
+  bool checkSlots();
+
+  static void compoundRegisterMap(unsigned&);
+
+  bool isPredicateRegister(unsigned R) const {
+    return (Hexagon::P0 == R || Hexagon::P1 == R ||
+            Hexagon::P2 == R || Hexagon::P3 == R);
+  };
+  bool isLoopRegister(unsigned R) const {
+    return (Hexagon::SA0 == R || Hexagon::LC0 == R ||
+            Hexagon::SA1 == R || Hexagon::LC1 == R);
+  };
+
+  bool hasValidNewValueDef(const NewSense &Use,
+                           const NewSenseList &Defs) const;
+
+  public:
+  explicit HexagonMCChecker(MCInstrInfo const &MCII, MCSubtargetInfo const &STI, MCInst& mcb, MCInst &mcbdx,
+                            const MCRegisterInfo& ri);
+
+  bool check();
+
+  /// add a new error/warning
+  void addErrInfo(HexagonMCErrInfo &err) { ErrInfoQ.push(err.s); };
+
+  /// Return the error code for the last operation in the insn bundle.
+  unsigned getError() { getErrInfo(); return CrntErrInfo.s.Error; };
+  unsigned getWarning() { getErrInfo(); return CrntErrInfo.s.Warning; };
+  unsigned getShuffleError() { getErrInfo(); return CrntErrInfo.s.ShuffleError; };
+  unsigned getErrRegister() { getErrInfo(); return CrntErrInfo.s.Register; };
+  bool getNextErrInfo() {
+    bLoadErrInfo = true;
+    return (ErrInfoQ.empty()) ? false : (getErrInfo(), true);
+  }
+};
+
+}
+
+#endif // HEXAGONMCCHECKER_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 9fc4e2aeaba6..c2c6275e7e8d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -96,6 +96,12 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
   assert(!HexagonMCInstrInfo::isBundle(HMB));
   uint64_t Binary;
 
+  // Compound instructions are limited to using registers 0-7 and 16-23
+  // and here we make a map 16-23 to 8-15 so they can be correctly encoded.
+  static unsigned RegMap[8] = {Hexagon::R8,  Hexagon::R9,  Hexagon::R10,
+                               Hexagon::R11, Hexagon::R12, Hexagon::R13,
+                               Hexagon::R14, Hexagon::R15};
+
   // Pseudo instructions don't get encoded and shouldn't be here
   // in the first place!
   assert(!HexagonMCInstrInfo::getDesc(MCII, HMB).isPseudo() &&
@@ -104,6 +110,16 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(
                   " `" << HexagonMCInstrInfo::getName(MCII, HMB) << "'"
                                                                     "\n");
 
+  if (llvm::HexagonMCInstrInfo::getType(MCII, HMB) == HexagonII::TypeCOMPOUND) {
+    for (unsigned i = 0; i < HMB.getNumOperands(); ++i)
+      if (HMB.getOperand(i).isReg()) {
+        unsigned Reg =
+            MCT.getRegisterInfo()->getEncodingValue(HMB.getOperand(i).getReg());
+        if ((Reg <= 23) && (Reg >= 16))
+          HMB.getOperand(i).setReg(RegMap[Reg - 16]);
+      }
+  }
+
   if (HexagonMCInstrInfo::isNewValue(MCII, HMB)) {
     // Calculate the new value distance to the associated producer
     MCOperand &MCO =
@@ -318,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
   // The only relocs left should be GP relative:
   default:
     if (MCID.mayStore() || MCID.mayLoad()) {
-      for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses;
+      for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
            ++ImpUses) {
         if (*ImpUses == Hexagon::GP) {
           switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
@@ -389,10 +405,8 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
     return cast<MCConstantExpr>(ME)->getValue();
   }
   if (MK == MCExpr::Binary) {
-    unsigned Res;
-    Res = getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
-    Res +=
-        getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
+    getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getLHS(), Fixups, STI);
+    getExprOpValue(MI, MO, cast<MCBinaryExpr>(ME)->getRHS(), Fixups, STI);
     return 0;
   }
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index 886f8db3bc63..d194bea3d8dc 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -115,8 +115,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
     SrcReg = MI.getOperand(1).getReg();
     if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MI.getOperand(2).isImm() && ((isUInt<5>(MI.getOperand(2).getImm())) ||
-                                     (MI.getOperand(2).getImm() == -1)))
+        (HexagonMCInstrInfo::inRange<5>(MI, 2) ||
+         HexagonMCInstrInfo::minConstant(MI, 2) == -1))
       return HexagonII::HCG_A;
     break;
   case Hexagon::A2_tfr:
@@ -134,8 +134,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
       return false;
     // Rd = #u6
     DstReg = MI.getOperand(0).getReg();
-    if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() <= 63 &&
-        MI.getOperand(1).getImm() >= 0 &&
+    if (HexagonMCInstrInfo::minConstant(MI, 1) <= 63 &&
+        HexagonMCInstrInfo::minConstant(MI, 1) >= 0 &&
         HexagonMCInstrInfo::isIntRegForSubInst(DstReg))
       return HexagonII::HCG_A;
     break;
@@ -145,9 +145,8 @@ unsigned getCompoundCandidateGroup(MCInst const &MI, bool IsExtended) {
     DstReg = MI.getOperand(0).getReg();
     Src1Reg = MI.getOperand(1).getReg();
     if ((Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
-        MI.getOperand(2).isImm() &&
         HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
-        (MI.getOperand(2).getImm() == 0))
+        HexagonMCInstrInfo::minConstant(MI, 2) == 0)
       return HexagonII::HCG_A;
     break;
   // The fact that .new form is used pretty much guarantees
@@ -206,6 +205,8 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
   MCInst *CompoundInsn = 0;
   unsigned compoundOpcode;
   MCOperand Rs, Rt;
+  int64_t Value;
+  bool Success;
 
   switch (L.getOpcode()) {
   default:
@@ -277,7 +278,10 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
 
   case Hexagon::C2_cmpeqi:
     DEBUG(dbgs() << "CX: C2_cmpeqi\n");
-    if (L.getOperand(2).getImm() == -1)
+    Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success);
+    if (Value == -1)
       compoundOpcode = cmpeqn1BitOpcode[getCompoundOp(R)];
     else
       compoundOpcode = cmpeqiBitOpcode[getCompoundOp(R)];
@@ -286,14 +290,17 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
     CompoundInsn = new (Context) MCInst;
     CompoundInsn->setOpcode(compoundOpcode);
     CompoundInsn->addOperand(Rs);
-    if (L.getOperand(2).getImm() != -1)
+    if (Value != -1)
       CompoundInsn->addOperand(L.getOperand(2));
     CompoundInsn->addOperand(R.getOperand(1));
     break;
 
   case Hexagon::C2_cmpgti:
     DEBUG(dbgs() << "CX: C2_cmpgti\n");
-    if (L.getOperand(2).getImm() == -1)
+    Success = L.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    (void)Success;
+    assert(Success);
+    if (Value == -1)
       compoundOpcode = cmpgtn1BitOpcode[getCompoundOp(R)];
     else
       compoundOpcode = cmpgtiBitOpcode[getCompoundOp(R)];
@@ -302,7 +309,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
     CompoundInsn = new (Context) MCInst;
     CompoundInsn->setOpcode(compoundOpcode);
     CompoundInsn->addOperand(Rs);
-    if (L.getOperand(2).getImm() != -1)
+    if (Value != -1)
       CompoundInsn->addOperand(L.getOperand(2));
     CompoundInsn->addOperand(R.getOperand(1));
     break;
@@ -404,7 +411,7 @@ bool lookForCompound(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI) {
 /// additional slot.
 void HexagonMCInstrInfo::tryCompound(MCInstrInfo const &MCII,
                                      MCContext &Context, MCInst &MCI) {
-  assert(MCI.getOpcode() == Hexagon::BUNDLE &&
+  assert(HexagonMCInstrInfo::isBundle(MCI) &&
          "Non-Bundle where Bundle expected");
 
   // By definition a compound must have 2 insn.
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 7e9247cef6ad..e6194f61a6ba 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -26,7 +26,7 @@ using namespace Hexagon;
 #define DEBUG_TYPE "hexagon-mcduplex-info"
 
 // pair table of subInstructions with opcodes
-static std::pair<unsigned, unsigned> opcodeData[] = {
+static const std::pair<unsigned, unsigned> opcodeData[] = {
     std::make_pair((unsigned)V4_SA1_addi, 0),
     std::make_pair((unsigned)V4_SA1_addrx, 6144),
     std::make_pair((unsigned)V4_SA1_addsp, 3072),
@@ -81,8 +81,7 @@ static std::pair<unsigned, unsigned> opcodeData[] = {
     std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
 
 static std::map<unsigned, unsigned>
-    subinstOpcodeMap(opcodeData,
-                     opcodeData + sizeof(opcodeData) / sizeof(opcodeData[0]));
+    subinstOpcodeMap(std::begin(opcodeData), std::end(opcodeData));
 
 bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   switch (Ga) {
@@ -195,15 +194,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     // Special case this one from Group L2.
     // Rd = memw(r29+#u5:2)
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
-      if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
-          MCI.getOperand(2).isImm() &&
-          isShiftedUInt<5, 2>(MCI.getOperand(2).getImm())) {
+      if (HexagonMCInstrInfo::isIntReg(SrcReg) &&
+          Hexagon::R29 == SrcReg && inRange<5, 2>(MCI, 2)) {
         return HexagonII::HSIG_L2;
       }
       // Rd = memw(Rs+#u4:2)
       if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-          (MCI.getOperand(2).isImm() &&
-           isShiftedUInt<4, 2>(MCI.getOperand(2).getImm()))) {
+          inRange<4, 2>(MCI, 2)) {
         return HexagonII::HSIG_L1;
       }
     }
@@ -214,7 +211,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MCI.getOperand(2).isImm() && isUInt<4>(MCI.getOperand(2).getImm())) {
+        inRange<4>(MCI, 2)) {
       return HexagonII::HSIG_L1;
     }
     break;
@@ -235,8 +232,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MCI.getOperand(2).isImm() &&
-        isShiftedUInt<3, 1>(MCI.getOperand(2).getImm())) {
+        inRange<3, 1>(MCI, 2)) {
       return HexagonII::HSIG_L2;
     }
     break;
@@ -246,7 +242,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MCI.getOperand(2).isImm() && isUInt<3>(MCI.getOperand(2).getImm())) {
+        inRange<3>(MCI, 2)) {
       return HexagonII::HSIG_L2;
     }
     break;
@@ -256,8 +252,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
-        MCI.getOperand(2).isImm() &&
-        isShiftedUInt<5, 3>(MCI.getOperand(2).getImm())) {
+        inRange<5, 3>(MCI, 2)) {
       return HexagonII::HSIG_L2;
     }
     break;
@@ -326,15 +321,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     Src2Reg = MCI.getOperand(2).getReg();
     if (HexagonMCInstrInfo::isIntReg(Src1Reg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
-        Hexagon::R29 == Src1Reg && MCI.getOperand(1).isImm() &&
-        isShiftedUInt<5, 2>(MCI.getOperand(1).getImm())) {
+        Hexagon::R29 == Src1Reg && inRange<5, 2>(MCI, 1)) {
       return HexagonII::HSIG_S2;
     }
     // memw(Rs+#u4:2) = Rt
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
-        MCI.getOperand(1).isImm() &&
-        isShiftedUInt<4, 2>(MCI.getOperand(1).getImm())) {
+        inRange<4, 2>(MCI, 1)) {
       return HexagonII::HSIG_S1;
     }
     break;
@@ -344,7 +337,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     Src2Reg = MCI.getOperand(2).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
-        MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm())) {
+        inRange<4>(MCI, 1)) {
       return HexagonII::HSIG_S1;
     }
     break;
@@ -363,8 +356,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     Src2Reg = MCI.getOperand(2).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(Src2Reg) &&
-        MCI.getOperand(1).isImm() &&
-        isShiftedUInt<3, 1>(MCI.getOperand(1).getImm())) {
+        inRange<3, 1>(MCI, 1)) {
       return HexagonII::HSIG_S2;
     }
     break;
@@ -374,8 +366,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     Src2Reg = MCI.getOperand(2).getReg();
     if (HexagonMCInstrInfo::isDblRegForSubInst(Src2Reg) &&
         HexagonMCInstrInfo::isIntReg(Src1Reg) && Hexagon::R29 == Src1Reg &&
-        MCI.getOperand(1).isImm() &&
-        isShiftedInt<6, 3>(MCI.getOperand(1).getImm())) {
+        inSRange<6, 3>(MCI, 1)) {
       return HexagonII::HSIG_S2;
     }
     break;
@@ -383,9 +374,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     // memw(Rs+#u4:2) = #U1
     Src1Reg = MCI.getOperand(0).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
-        MCI.getOperand(1).isImm() &&
-        isShiftedUInt<4, 2>(MCI.getOperand(1).getImm()) &&
-        MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+        inRange<4, 2>(MCI, 1) && inRange<1>(MCI, 2)) {
       return HexagonII::HSIG_S2;
     }
     break;
@@ -393,16 +382,13 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     // memb(Rs+#u4) = #U1
     Src1Reg = MCI.getOperand(0).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(Src1Reg) &&
-        MCI.getOperand(1).isImm() && isUInt<4>(MCI.getOperand(1).getImm()) &&
-        MCI.getOperand(2).isImm() && isUInt<1>(MCI.getOperand(2).getImm())) {
+        inRange<4>(MCI, 1) && inRange<1>(MCI, 2)) {
       return HexagonII::HSIG_S2;
     }
     break;
   case Hexagon::S2_allocframe:
-    if (MCI.getOperand(0).isImm() &&
-        isShiftedUInt<5, 3>(MCI.getOperand(0).getImm())) {
+    if (inRange<5, 3>(MCI, 0))
       return HexagonII::HSIG_S2;
-    }
     break;
   //
   // Group A:
@@ -428,8 +414,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
       // Rd = add(r29,#u6:2)
       if (HexagonMCInstrInfo::isIntReg(SrcReg) && Hexagon::R29 == SrcReg &&
-          MCI.getOperand(2).isImm() &&
-          isShiftedUInt<6, 2>(MCI.getOperand(2).getImm())) {
+          inRange<6, 2>(MCI, 2)) {
         return HexagonII::HSIG_A;
       }
       // Rx = add(Rx,#s7)
@@ -439,8 +424,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
       // Rd = add(Rs,#1)
       // Rd = add(Rs,#-1)
       if (HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-          MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
-                                        (MCI.getOperand(2).getImm() == -1))) {
+          (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == -1)) {
         return HexagonII::HSIG_A;
       }
     }
@@ -460,8 +444,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MCI.getOperand(2).isImm() && ((MCI.getOperand(2).getImm() == 1) ||
-                                      (MCI.getOperand(2).getImm() == 255))) {
+        (minConstant(MCI, 2) == 1 || minConstant(MCI, 2) == 255)) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -491,8 +474,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     DstReg = MCI.getOperand(0).getReg();  // Rd
     PredReg = MCI.getOperand(1).getReg(); // P0
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg) &&
-        Hexagon::P0 == PredReg && MCI.getOperand(2).isImm() &&
-        MCI.getOperand(2).getImm() == 0) {
+        Hexagon::P0 == PredReg && minConstant(MCI, 2) == 0) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -502,7 +484,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (Hexagon::P0 == DstReg &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        MCI.getOperand(2).isImm() && isUInt<2>(MCI.getOperand(2).getImm())) {
+        inRange<2>(MCI, 2)) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -511,10 +493,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     // Rdd = combine(#u2,#U2)
     DstReg = MCI.getOperand(0).getReg();
     if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
-        // TODO: Handle Globals/Symbols
-        (MCI.getOperand(1).isImm() && isUInt<2>(MCI.getOperand(1).getImm())) &&
-        ((MCI.getOperand(2).isImm() &&
-          isUInt<2>(MCI.getOperand(2).getImm())))) {
+        inRange<2>(MCI, 1) && inRange<2>(MCI, 2)) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -524,7 +503,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(1).getReg();
     if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        (MCI.getOperand(2).isImm() && MCI.getOperand(2).getImm() == 0)) {
+        minConstant(MCI, 2) == 0) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -534,7 +513,7 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
     SrcReg = MCI.getOperand(2).getReg();
     if (HexagonMCInstrInfo::isDblRegForSubInst(DstReg) &&
         HexagonMCInstrInfo::isIntRegForSubInst(SrcReg) &&
-        (MCI.getOperand(1).isImm() && MCI.getOperand(1).getImm() == 0)) {
+        minConstant(MCI, 1) == 0) {
       return HexagonII::HSIG_A;
     }
     break;
@@ -556,19 +535,17 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
 }
 
 bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
-
   unsigned DstReg, SrcReg;
-
   switch (potentialDuplex.getOpcode()) {
   case Hexagon::A2_addi:
     // testing for case of: Rx = add(Rx,#s7)
     DstReg = potentialDuplex.getOperand(0).getReg();
     SrcReg = potentialDuplex.getOperand(1).getReg();
     if (DstReg == SrcReg && HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
-      if (potentialDuplex.getOperand(2).isExpr())
+      int64_t Value;
+      if (!potentialDuplex.getOperand(2).getExpr()->evaluateAsAbsolute(Value))
         return true;
-      if (potentialDuplex.getOperand(2).isImm() &&
-          !(isShiftedInt<7, 0>(potentialDuplex.getOperand(2).getImm())))
+      if (!isShiftedInt<7, 0>(Value))
         return true;
     }
     break;
@@ -576,15 +553,14 @@ bool HexagonMCInstrInfo::subInstWouldBeExtended(MCInst const &potentialDuplex) {
     DstReg = potentialDuplex.getOperand(0).getReg();
 
     if (HexagonMCInstrInfo::isIntRegForSubInst(DstReg)) {
-      if (potentialDuplex.getOperand(1).isExpr())
+      int64_t Value;
+      if (!potentialDuplex.getOperand(1).getExpr()->evaluateAsAbsolute(Value))
         return true;
       // Check for case of Rd = #-1.
-      if (potentialDuplex.getOperand(1).isImm() &&
-          (potentialDuplex.getOperand(1).getImm() == -1))
+      if (Value == -1)
         return false;
       // Check for case of Rd = #u6.
-      if (potentialDuplex.getOperand(1).isImm() &&
-          !isShiftedUInt<6, 0>(potentialDuplex.getOperand(1).getImm()))
+      if (!isShiftedUInt<6, 0>(Value))
         return true;
     }
     break;
@@ -712,19 +688,23 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
 
 MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
   MCInst Result;
+  bool Absolute;
+  int64_t Value;
   switch (Inst.getOpcode()) {
   default:
     // dbgs() << "opcode: "<< Inst->getOpcode() << "\n";
     llvm_unreachable("Unimplemented subinstruction \n");
     break;
   case Hexagon::A2_addi:
-    if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == 1) {
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 1) {
       Result.setOpcode(Hexagon::V4_SA1_inc);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break;
     } //  1,2 SUBInst $Rd = add($Rs, #1)
-    else if (Inst.getOperand(2).isImm() && Inst.getOperand(2).getImm() == -1) {
+    else if (Value == -1) {
       Result.setOpcode(Hexagon::V4_SA1_dec);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
@@ -754,7 +734,7 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     addOps(Result, Inst, 0);
     break; //    1 SUBInst allocframe(#$u5_3)
   case Hexagon::A2_andir:
-    if (Inst.getOperand(2).getImm() == 255) {
+    if (minConstant(Inst, 2) == 255) {
       Result.setOpcode(Hexagon::V4_SA1_zxtb);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
@@ -772,26 +752,27 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     break; //    2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
   case Hexagon::A4_combineii:
   case Hexagon::A2_combineii:
-    if (Inst.getOperand(1).getImm() == 1) {
+    Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 1) {
       Result.setOpcode(Hexagon::V4_SA1_combine1i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#1, #$u2)
     }
-
-    if (Inst.getOperand(1).getImm() == 3) {
+    if (Value == 3) {
       Result.setOpcode(Hexagon::V4_SA1_combine3i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#3, #$u2)
     }
-    if (Inst.getOperand(1).getImm() == 0) {
+    if (Value == 0) {
       Result.setOpcode(Hexagon::V4_SA1_combine0i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#0, #$u2)
     }
-    if (Inst.getOperand(1).getImm() == 2) {
+    if (Value == 2) {
       Result.setOpcode(Hexagon::V4_SA1_combine2i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
@@ -894,12 +875,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
       break; //    1,2,3 SUBInst $Rd = memw($Rs + #$u4_2)
     }
   case Hexagon::S4_storeirb_io:
-    if (Inst.getOperand(2).getImm() == 0) {
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 0) {
       Result.setOpcode(Hexagon::V4_SS2_storebi0);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //    1,2 SUBInst memb($Rs + #$u4_0)=#0
-    } else if (Inst.getOperand(2).getImm() == 1) {
+    } else if (Value == 1) {
       Result.setOpcode(Hexagon::V4_SS2_storebi1);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
@@ -923,12 +906,14 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
   case Hexagon::S4_storeiri_io:
-    if (Inst.getOperand(2).getImm() == 0) {
+    Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
+    assert(Absolute);(void)Absolute;
+    if (Value == 0) {
       Result.setOpcode(Hexagon::V4_SS2_storewi0);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //  3 1,2 SUBInst memw($Rs + #$u4_2)=#0
-    } else if (Inst.getOperand(2).getImm() == 1) {
+    } else if (Value == 1) {
       Result.setOpcode(Hexagon::V4_SS2_storewi1);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
@@ -983,7 +968,8 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     addOps(Result, Inst, 0);
     break; //  2 SUBInst if (p0) $Rd = #0
   case Hexagon::A2_tfrsi:
-    if (Inst.getOperand(1).isImm() && Inst.getOperand(1).getImm() == -1) {
+    Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
+    if (Absolute && Value == -1) {
       Result.setOpcode(Hexagon::V4_SA1_setin1);
       addOps(Result, Inst, 0);
       break; //  2 1 SUBInst $Rd = #-1
@@ -1044,6 +1030,8 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
                      << "\n");
         bisReversable = false;
       }
+      if (HexagonMCInstrInfo::isMemReorderDisabled(MCB)) // }:mem_noshuf
+        bisReversable = false;
 
       // Try in order.
       if (isOrderedDuplexPair(
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index bf51c3515e95..eaa3550d07f6 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -37,9 +37,7 @@ static cl::opt<unsigned>
 
 void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
                                            const MCSubtargetInfo &STI) {
-  MCInst HMI;
-  HMI.setOpcode(Hexagon::BUNDLE);
-  HMI.addOperand(MCOperand::createImm(0));
+  MCInst HMI = HexagonMCInstrInfo::createBundle();
   MCInst *MCB;
 
   if (MCK.getOpcode() != Hexagon::BUNDLE) {
@@ -50,7 +48,7 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
 
   // Examines packet and pad the packet, if needed, when an
   // end-loop is in the bundle.
-  HexagonMCInstrInfo::padEndloop(*MCB);
+  HexagonMCInstrInfo::padEndloop(getContext(), *MCB);
   HexagonMCShuffle(*MCII, STI, *MCB);
 
   assert(HexagonMCInstrInfo::bundleSize(*MCB) <= HEXAGON_PACKET_SIZE);
@@ -60,9 +58,9 @@ void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCK,
     if (Extended) {
       if (HexagonMCInstrInfo::isDuplex(*MCII, *MCI)) {
         MCInst *SubInst = const_cast<MCInst *>(MCI->getOperand(1).getInst());
-        HexagonMCInstrInfo::clampExtended(*MCII, *SubInst);
+        HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *SubInst);
       } else {
-        HexagonMCInstrInfo::clampExtended(*MCII, *MCI);
+        HexagonMCInstrInfo::clampExtended(*MCII, getContext(), *MCI);
       }
       Extended = false;
     } else {
@@ -114,7 +112,7 @@ void HexagonMCELFStreamer::HexagonMCEmitCommonSymbol(MCSymbol *Symbol,
     MCSection *Section = getAssembler().getContext().getELFSection(
         SectionName, ELF::SHT_NOBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
     SwitchSection(Section);
-    AssignSection(Symbol, Section);
+    AssignFragment(Symbol, getCurrentFragment());
 
     MCELFStreamer::EmitCommonSymbol(Symbol, Size, ByteAlignment);
     SwitchSection(CrntSection);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
new file mode 100644
index 000000000000..fc6262657514
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- HexagonMCExpr.cpp - Hexagon specific MC expression classes
+//----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-mcexpr"
+
+HexagonNoExtendOperand *HexagonNoExtendOperand::Create(MCExpr const *Expr,
+                                                       MCContext &Ctx) {
+  return new (Ctx) HexagonNoExtendOperand(Expr);
+}
+
+bool HexagonNoExtendOperand::evaluateAsRelocatableImpl(
+    MCValue &Res, MCAsmLayout const *Layout, MCFixup const *Fixup) const {
+  return Expr->evaluateAsRelocatable(Res, Layout, Fixup);
+}
+
+void HexagonNoExtendOperand::visitUsedExpr(MCStreamer &Streamer) const {}
+
+MCFragment *llvm::HexagonNoExtendOperand::findAssociatedFragment() const {
+  return Expr->findAssociatedFragment();
+}
+
+void HexagonNoExtendOperand::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+MCExpr const *HexagonNoExtendOperand::getExpr() const { return Expr; }
+
+bool HexagonNoExtendOperand::classof(MCExpr const *E) {
+  return E->getKind() == MCExpr::Target;
+}
+
+HexagonNoExtendOperand::HexagonNoExtendOperand(MCExpr const *Expr)
+    : Expr(Expr) {}
+
+void HexagonNoExtendOperand::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  Expr->print(OS, MAI);
+}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
new file mode 100644
index 000000000000..60f180fb2bc4
--- /dev/null
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -0,0 +1,35 @@
+//==- HexagonMCExpr.h - Hexagon specific MC expression classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCInst;
+class HexagonNoExtendOperand : public MCTargetExpr {
+public:
+  static HexagonNoExtendOperand *Create(MCExpr const *Expr, MCContext &Ctx);
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+  void visitUsedExpr(MCStreamer &Streamer) const override;
+  MCFragment *findAssociatedFragment() const override;
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
+  static bool classof(MCExpr const *E);
+  MCExpr const *getExpr() const;
+
+private:
+  HexagonNoExtendOperand(MCExpr const *Expr);
+  MCExpr const *Expr;
+};
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONMCEXPR_H
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 48b15f85a783..e6842076db2a 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -15,17 +15,37 @@
 
 #include "Hexagon.h"
 #include "HexagonBaseInfo.h"
+#include "HexagonMCChecker.h"
 
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
+void HexagonMCInstrInfo::addConstant(MCInst &MI, uint64_t Value,
+                                     MCContext &Context) {
+  MI.addOperand(MCOperand::createExpr(MCConstantExpr::create(Value, Context)));
+}
+
+void HexagonMCInstrInfo::addConstExtender(MCContext &Context,
+                                          MCInstrInfo const &MCII, MCInst &MCB,
+                                          MCInst const &MCI) {
+  assert(HexagonMCInstrInfo::isBundle(MCB));
+  MCOperand const &exOp =
+      MCI.getOperand(HexagonMCInstrInfo::getExtendableOp(MCII, MCI));
+
+  // Create the extender.
+  MCInst *XMCI =
+      new (Context) MCInst(HexagonMCInstrInfo::deriveExtender(MCII, MCI, exOp));
+
+  MCB.addOperand(MCOperand::createInst(XMCI));
+}
+
 iterator_range<MCInst::const_iterator>
 HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) {
   assert(isBundle(MCI));
-  return iterator_range<MCInst::const_iterator>(
-      MCI.begin() + bundleInstructionsOffset, MCI.end());
+  return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end());
 }
 
 size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
@@ -35,7 +55,40 @@ size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) {
     return (1);
 }
 
-void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
+bool HexagonMCInstrInfo::canonicalizePacket(MCInstrInfo const &MCII,
+                                            MCSubtargetInfo const &STI,
+                                            MCContext &Context, MCInst &MCB,
+                                            HexagonMCChecker *Check) {
+  // Examine the packet and convert pairs of instructions to compound
+  // instructions when possible.
+  if (!HexagonDisableCompound)
+    HexagonMCInstrInfo::tryCompound(MCII, Context, MCB);
+  // Check the bundle for errors.
+  bool CheckOk = Check ? Check->check() : true;
+  if (!CheckOk)
+    return false;
+  HexagonMCShuffle(MCII, STI, MCB);
+  // Examine the packet and convert pairs of instructions to duplex
+  // instructions when possible.
+  MCInst InstBundlePreDuplex = MCInst(MCB);
+  if (!HexagonDisableDuplex) {
+    SmallVector<DuplexCandidate, 8> possibleDuplexes;
+    possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB);
+    HexagonMCShuffle(MCII, STI, Context, MCB, possibleDuplexes);
+  }
+  // Examines packet and pad the packet, if needed, when an
+  // end-loop is in the bundle.
+  HexagonMCInstrInfo::padEndloop(Context, MCB);
+  // If compounding and duplexing didn't reduce the size below
+  // 4 or less we have a packet that is too big.
+  if (HexagonMCInstrInfo::bundleSize(MCB) > HEXAGON_PACKET_SIZE)
+    return false;
+  HexagonMCShuffle(MCII, STI, MCB);
+  return true;
+}
+
+void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII,
+                                       MCContext &Context, MCInst &MCI) {
   assert(HexagonMCInstrInfo::isExtendable(MCII, MCI) ||
          HexagonMCInstrInfo::isExtended(MCII, MCI));
   MCOperand &exOp =
@@ -43,13 +96,20 @@ void HexagonMCInstrInfo::clampExtended(MCInstrInfo const &MCII, MCInst &MCI) {
   // If the extended value is a constant, then use it for the extended and
   // for the extender instructions, masking off the lower 6 bits and
   // including the assumed bits.
-  if (exOp.isImm()) {
+  int64_t Value;
+  if (exOp.getExpr()->evaluateAsAbsolute(Value)) {
     unsigned Shift = HexagonMCInstrInfo::getExtentAlignment(MCII, MCI);
-    int64_t Bits = exOp.getImm();
-    exOp.setImm((Bits & 0x3f) << Shift);
+    exOp.setExpr(MCConstantExpr::create((Value & 0x3f) << Shift, Context));
   }
 }
 
+MCInst HexagonMCInstrInfo::createBundle() {
+  MCInst Result;
+  Result.setOpcode(Hexagon::BUNDLE);
+  Result.addOperand(MCOperand::createImm(0));
+  return Result;
+}
+
 MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
                                          MCInst const &inst0,
                                          MCInst const &inst1) {
@@ -64,6 +124,27 @@ MCInst *HexagonMCInstrInfo::deriveDuplex(MCContext &Context, unsigned iClass,
   return duplexInst;
 }
 
+MCInst HexagonMCInstrInfo::deriveExtender(MCInstrInfo const &MCII,
+                                          MCInst const &Inst,
+                                          MCOperand const &MO) {
+  assert(HexagonMCInstrInfo::isExtendable(MCII, Inst) ||
+         HexagonMCInstrInfo::isExtended(MCII, Inst));
+
+  MCInstrDesc const &Desc = HexagonMCInstrInfo::getDesc(MCII, Inst);
+  MCInst XMI;
+  XMI.setOpcode((Desc.isBranch() || Desc.isCall() ||
+                 HexagonMCInstrInfo::getType(MCII, Inst) == HexagonII::TypeCR)
+                    ? Hexagon::A4_ext_b
+                    : Hexagon::A4_ext);
+  if (MO.isImm())
+    XMI.addOperand(MCOperand::createImm(MO.getImm() & (~0x3f)));
+  else if (MO.isExpr())
+    XMI.addOperand(MCOperand::createExpr(MO.getExpr()));
+  else
+    llvm_unreachable("invalid extendable operand");
+  return XMI;
+}
+
 MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
                                                    size_t Index) {
   assert(Index <= bundleSize(MCB));
@@ -76,6 +157,13 @@ MCInst const *HexagonMCInstrInfo::extenderForIndex(MCInst const &MCB,
   return nullptr;
 }
 
+void HexagonMCInstrInfo::extendIfNeeded(MCContext &Context,
+                                        MCInstrInfo const &MCII, MCInst &MCB,
+                                        MCInst const &MCI, bool MustExtend) {
+  if (isConstExtended(MCII, MCI) || MustExtend)
+    addConstExtender(Context, MCII, MCB, MCI);
+}
+
 HexagonII::MemAccessSize
 HexagonMCInstrInfo::getAccessSize(MCInstrInfo const &MCII, MCInst const &MCI) {
   const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -186,6 +274,25 @@ MCOperand const &HexagonMCInstrInfo::getNewValueOperand(MCInstrInfo const &MCII,
   return (MCO);
 }
 
+/// Return the new value or the newly produced value.
+unsigned short HexagonMCInstrInfo::getNewValueOp2(MCInstrInfo const &MCII,
+                                                  MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::NewValueOpPos2) & HexagonII::NewValueOpMask2);
+}
+
+MCOperand const &
+HexagonMCInstrInfo::getNewValueOperand2(MCInstrInfo const &MCII,
+                                        MCInst const &MCI) {
+  unsigned O = HexagonMCInstrInfo::getNewValueOp2(MCII, MCI);
+  MCOperand const &MCO = MCI.getOperand(O);
+
+  assert((HexagonMCInstrInfo::isNewValue(MCII, MCI) ||
+          HexagonMCInstrInfo::hasNewValue2(MCII, MCI)) &&
+         MCO.isReg());
+  return (MCO);
+}
+
 int HexagonMCInstrInfo::getSubTarget(MCInstrInfo const &MCII,
                                      MCInst const &MCI) {
   const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -242,6 +349,13 @@ bool HexagonMCInstrInfo::hasNewValue(MCInstrInfo const &MCII,
   return ((F >> HexagonII::hasNewValuePos) & HexagonII::hasNewValueMask);
 }
 
+/// Return whether the insn produces a second value.
+bool HexagonMCInstrInfo::hasNewValue2(MCInstrInfo const &MCII,
+                                      MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::hasNewValuePos2) & HexagonII::hasNewValueMask2);
+}
+
 MCInst const &HexagonMCInstrInfo::instruction(MCInst const &MCB, size_t Index) {
   assert(isBundle(MCB));
   assert(Index < HEXAGON_PACKET_SIZE);
@@ -261,6 +375,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
           HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
 }
 
+bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
+                                    MCInst const &MCI) {
+  return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
+}
+
 bool HexagonMCInstrInfo::isDblRegForSubInst(unsigned Reg) {
   return ((Reg >= Hexagon::D0 && Reg <= Hexagon::D3) ||
           (Reg >= Hexagon::D8 && Reg <= Hexagon::D11));
@@ -282,14 +401,21 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
                                          MCInst const &MCI) {
   if (HexagonMCInstrInfo::isExtended(MCII, MCI))
     return true;
-
-  if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
+  // Branch insns are handled as necessary by relaxation.
+  if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeJ) ||
+      (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCOMPOUND &&
+       HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()) ||
+      (HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeNV &&
+       HexagonMCInstrInfo::getDesc(MCII, MCI).isBranch()))
+    return false;
+  // Otherwise loop instructions and other CR insts are handled by relaxation
+  else if ((HexagonMCInstrInfo::getType(MCII, MCI) == HexagonII::TypeCR) &&
+           (MCI.getOpcode() != Hexagon::C4_addipc))
+    return false;
+  else if (!HexagonMCInstrInfo::isExtendable(MCII, MCI))
     return false;
 
-  short ExtOpNum = HexagonMCInstrInfo::getCExtOpNum(MCII, MCI);
-  int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
-  int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
-  MCOperand const &MO = MCI.getOperand(ExtOpNum);
+  MCOperand const &MO = HexagonMCInstrInfo::getExtendableOperand(MCII, MCI);
 
   // We could be using an instruction with an extendable immediate and shoehorn
   // a global address into it. If it is a global address it will be constant
@@ -297,15 +423,13 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
   // We currently only handle isGlobal() because it is the only kind of
   // object we are going to end up with here for now.
   // In the future we probably should add isSymbol(), etc.
-  if (MO.isExpr())
+  assert(!MO.isImm());
+  int64_t Value;
+  if (!MO.getExpr()->evaluateAsAbsolute(Value))
     return true;
-
-  // If the extendable operand is not 'Immediate' type, the instruction should
-  // have 'isExtended' flag set.
-  assert(MO.isImm() && "Extendable operand must be Immediate type");
-
-  int ImmValue = MO.getImm();
-  return (ImmValue < MinValue || ImmValue > MaxValue);
+  int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+  int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+  return (MinValue > Value || Value > MaxValue);
 }
 
 bool HexagonMCInstrInfo::isExtendable(MCInstrInfo const &MCII,
@@ -374,6 +498,19 @@ bool HexagonMCInstrInfo::isPredicated(MCInstrInfo const &MCII,
   return ((F >> HexagonII::PredicatedPos) & HexagonII::PredicatedMask);
 }
 
+bool HexagonMCInstrInfo::isPredicateLate(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return (F >> HexagonII::PredicateLatePos & HexagonII::PredicateLateMask);
+}
+
+/// Return whether the insn is newly predicated.
+bool HexagonMCInstrInfo::isPredicatedNew(MCInstrInfo const &MCII,
+                                         MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::PredicatedNewPos) & HexagonII::PredicatedNewMask);
+}
+
 bool HexagonMCInstrInfo::isPredicatedTrue(MCInstrInfo const &MCII,
                                           MCInst const &MCI) {
   const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
@@ -394,6 +531,18 @@ bool HexagonMCInstrInfo::isSolo(MCInstrInfo const &MCII, MCInst const &MCI) {
   return ((F >> HexagonII::SoloPos) & HexagonII::SoloMask);
 }
 
+bool HexagonMCInstrInfo::isMemReorderDisabled(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  auto Flags = MCI.getOperand(0).getImm();
+  return (Flags & memReorderDisabledMask) != 0;
+}
+
+bool HexagonMCInstrInfo::isMemStoreReorderEnabled(MCInst const &MCI) {
+  assert(isBundle(MCI));
+  auto Flags = MCI.getOperand(0).getImm();
+  return (Flags & memStoreReorderEnabledMask) != 0;
+}
+
 bool HexagonMCInstrInfo::isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI) {
   const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
   return ((F >> HexagonII::SoloAXPos) & HexagonII::SoloAXMask);
@@ -405,7 +554,28 @@ bool HexagonMCInstrInfo::isSoloAin1(MCInstrInfo const &MCII,
   return ((F >> HexagonII::SoloAin1Pos) & HexagonII::SoloAin1Mask);
 }
 
-void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
+bool HexagonMCInstrInfo::isVector(MCInstrInfo const &MCII, MCInst const &MCI) {
+  if ((getType(MCII, MCI) <= HexagonII::TypeCVI_LAST) &&
+      (getType(MCII, MCI) >= HexagonII::TypeCVI_FIRST))
+    return true;
+  return false;
+}
+
+int64_t HexagonMCInstrInfo::minConstant(MCInst const &MCI, size_t Index) {
+  auto Sentinal = static_cast<int64_t>(std::numeric_limits<uint32_t>::max())
+                  << 8;
+  if (MCI.size() <= Index)
+    return Sentinal;
+  MCOperand const &MCO = MCI.getOperand(Index);
+  if (!MCO.isExpr())
+    return Sentinal;
+  int64_t Value;
+  if (!MCO.getExpr()->evaluateAsAbsolute(Value))
+    return Sentinal;
+  return Value;
+}
+
+void HexagonMCInstrInfo::padEndloop(MCContext &Context, MCInst &MCB) {
   MCInst Nop;
   Nop.setOpcode(Hexagon::A2_nop);
   assert(isBundle(MCB));
@@ -413,7 +583,7 @@ void HexagonMCInstrInfo::padEndloop(MCInst &MCB) {
           (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_INNER_SIZE)) ||
          ((HexagonMCInstrInfo::isOuterLoop(MCB) &&
            (HexagonMCInstrInfo::bundleSize(MCB) < HEXAGON_PACKET_OUTER_SIZE))))
-    MCB.addOperand(MCOperand::createInst(new MCInst(Nop)));
+    MCB.addOperand(MCOperand::createInst(new (Context) MCInst(Nop)));
 }
 
 bool HexagonMCInstrInfo::prefersSlot3(MCInstrInfo const &MCII,
@@ -456,6 +626,20 @@ void HexagonMCInstrInfo::setInnerLoop(MCInst &MCI) {
   Operand.setImm(Operand.getImm() | innerLoopMask);
 }
 
+void HexagonMCInstrInfo::setMemReorderDisabled(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | memReorderDisabledMask);
+  assert(isMemReorderDisabled(MCI));
+}
+
+void HexagonMCInstrInfo::setMemStoreReorderEnabled(MCInst &MCI) {
+  assert(isBundle(MCI));
+  MCOperand &Operand = MCI.getOperand(0);
+  Operand.setImm(Operand.getImm() | memStoreReorderEnabledMask);
+  assert(isMemStoreReorderEnabled(MCI));
+}
+
 void HexagonMCInstrInfo::setOuterLoop(MCInst &MCI) {
   assert(isBundle(MCI));
   MCOperand &Operand = MCI.getOperand(0);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 32d61a4a7be5..0237b2884a3b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -14,9 +14,11 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
 #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCINSTRINFO_H
 
+#include "HexagonMCExpr.h"
 #include "llvm/MC/MCInst.h"
 
 namespace llvm {
+class HexagonMCChecker;
 class MCContext;
 class MCInstrDesc;
 class MCInstrInfo;
@@ -39,20 +41,47 @@ int64_t const innerLoopMask = 1 << innerLoopOffset;
 size_t const outerLoopOffset = 1;
 int64_t const outerLoopMask = 1 << outerLoopOffset;
 
+// do not reorder memory load/stores by default load/stores are re-ordered
+// and by default loads can be re-ordered
+size_t const memReorderDisabledOffset = 2;
+int64_t const memReorderDisabledMask = 1 << memReorderDisabledOffset;
+
+// allow re-ordering of memory stores by default stores cannot be re-ordered
+size_t const memStoreReorderEnabledOffset = 3;
+int64_t const memStoreReorderEnabledMask = 1 << memStoreReorderEnabledOffset;
+
 size_t const bundleInstructionsOffset = 1;
 
+void addConstant(MCInst &MI, uint64_t Value, MCContext &Context);
+void addConstExtender(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+                      MCInst const &MCI);
+
 // Returns a iterator range of instructions in this bundle
 iterator_range<MCInst::const_iterator> bundleInstructions(MCInst const &MCI);
 
 // Returns the number of instructions in the bundle
 size_t bundleSize(MCInst const &MCI);
 
+// Put the packet in to canonical form, compound, duplex, pad, and shuffle
+bool canonicalizePacket(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
+                        MCContext &Context, MCInst &MCB,
+                        HexagonMCChecker *Checker);
+
 // Clamp off upper 26 bits of extendable operand for emission
-void clampExtended(MCInstrInfo const &MCII, MCInst &MCI);
+void clampExtended(MCInstrInfo const &MCII, MCContext &Context, MCInst &MCI);
+
+MCInst createBundle();
+
+// Return the extender for instruction at Index or nullptr if none
+MCInst const *extenderForIndex(MCInst const &MCB, size_t Index);
+void extendIfNeeded(MCContext &Context, MCInstrInfo const &MCII, MCInst &MCB,
+                    MCInst const &MCI, bool MustExtend);
 
 // Create a duplex instruction given the two subinsts
 MCInst *deriveDuplex(MCContext &Context, unsigned iClass, MCInst const &inst0,
                      MCInst const &inst1);
+MCInst deriveExtender(MCInstrInfo const &MCII, MCInst const &Inst,
+                      MCOperand const &MO);
 
 // Convert this instruction in to a duplex subinst
 MCInst deriveSubInst(MCInst const &Inst);
@@ -108,6 +137,9 @@ unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the operand that consumes or produces a new value.
 MCOperand const &getNewValueOperand(MCInstrInfo const &MCII, MCInst const &MCI);
+unsigned short getNewValueOp2(MCInstrInfo const &MCII, MCInst const &MCI);
+MCOperand const &getNewValueOperand2(MCInstrInfo const &MCII,
+                                     MCInst const &MCI);
 
 int getSubTarget(MCInstrInfo const &MCII, MCInst const &MCI);
 
@@ -125,6 +157,7 @@ bool hasImmExt(MCInst const &MCI);
 
 // Return whether the instruction is a legal new-value producer.
 bool hasNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
+bool hasNewValue2(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the instruction at Index
 MCInst const &instruction(MCInst const &MCB, size_t Index);
@@ -134,10 +167,24 @@ bool isBundle(MCInst const &MCI);
 
 // Return whether the insn is an actual insn.
 bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the duplex iclass given the two duplex classes
 unsigned iClassOfDuplexPair(unsigned Ga, unsigned Gb);
 
+int64_t minConstant(MCInst const &MCI, size_t Index);
+template <unsigned N, unsigned S>
+bool inRange(MCInst const &MCI, size_t Index) {
+  return isShiftedUInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N, unsigned S>
+bool inSRange(MCInst const &MCI, size_t Index) {
+  return isShiftedInt<N, S>(minConstant(MCI, Index));
+}
+template <unsigned N> bool inRange(MCInst const &MCI, size_t Index) {
+  return isUInt<N>(minConstant(MCI, Index));
+}
+
 // Return whether the instruction needs to be constant extended.
 bool isConstExtended(MCInstrInfo const &MCII, MCInst const &MCI);
 
@@ -173,6 +220,8 @@ bool isIntReg(unsigned Reg);
 
 // Is this register suitable for use in a duplex subinst
 bool isIntRegForSubInst(unsigned Reg);
+bool isMemReorderDisabled(MCInst const &MCI);
+bool isMemStoreReorderEnabled(MCInst const &MCI);
 
 // Return whether the insn is a new-value consumer.
 bool isNewValue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -191,6 +240,8 @@ bool isOuterLoop(MCInst const &MCI);
 
 // Return whether this instruction is predicated
 bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicateLate(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isPredicatedNew(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return whether the predicate sense is true
 bool isPredicatedTrue(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -209,9 +260,10 @@ bool isSoloAX(MCInstrInfo const &MCII, MCInst const &MCI);
 
 /// Return whether the insn can be packaged only with an A-type insn in slot #1.
 bool isSoloAin1(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isVector(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Pad the bundle with nops to satisfy endloop requirements
-void padEndloop(MCInst &MCI);
+void padEndloop(MCContext &Context, MCInst &MCI);
 
 bool prefersSlot3(MCInstrInfo const &MCII, MCInst const &MCI);
 
@@ -220,6 +272,8 @@ void replaceDuplex(MCContext &Context, MCInst &MCB, DuplexCandidate Candidate);
 
 // Marks a bundle as endloop0
 void setInnerLoop(MCInst &MCI);
+void setMemReorderDisabled(MCInst &MCI);
+void setMemStoreReorderEnabled(MCInst &MCI);
 
 // Marks a bundle as endloop1
 void setOuterLoop(MCInst &MCI);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 53305d85fd80..9a292577a8f3 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -40,6 +40,20 @@ using namespace llvm;
 #define GET_REGINFO_MC_DESC
 #include "HexagonGenRegisterInfo.inc"
 
+cl::opt<bool> llvm::HexagonDisableCompound
+  ("mno-compound",
+   cl::desc("Disable looking for compound instructions for Hexagon"));
+
+cl::opt<bool> llvm::HexagonDisableDuplex
+  ("mno-pairing",
+   cl::desc("Disable looking for duplex instructions for Hexagon"));
+
+StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+  if (CPU.empty())
+    CPU = "hexagonv60";
+  return CPU;
+}
+
 MCInstrInfo *llvm::createHexagonMCInstrInfo() {
   MCInstrInfo *X = new MCInstrInfo();
   InitHexagonMCInstrInfo(X);
@@ -54,6 +68,7 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
 
 static MCSubtargetInfo *
 createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+  CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
   return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
@@ -76,28 +91,23 @@ public:
     StringRef Contents(Buffer);
     auto PacketBundle = Contents.rsplit('\n');
     auto HeadTail = PacketBundle.first.split('\n');
-    auto Preamble = "\t{\n\t\t";
-    auto Separator = "";
-    while(!HeadTail.first.empty()) {
-      OS << Separator;
-      StringRef Inst;
+    StringRef Separator = "\n";
+    StringRef Indent = "\t\t";
+    OS << "\t{\n";
+    while (!HeadTail.first.empty()) {
+      StringRef InstTxt;
       auto Duplex = HeadTail.first.split('\v');
-      if(!Duplex.second.empty()){
-        OS << Duplex.first << "\n";
-        Inst = Duplex.second;
+      if (!Duplex.second.empty()) {
+        OS << Indent << Duplex.first << Separator;
+        InstTxt = Duplex.second;
+      } else if (!HeadTail.first.trim().startswith("immext")) {
+        InstTxt = Duplex.first;
       }
-      else {
-        if(!HeadTail.first.startswith("immext"))
-          Inst = Duplex.first;
-      }
-      OS << Preamble;
-      OS << Inst;
+      if (!InstTxt.empty())
+        OS << Indent << InstTxt << Separator;
       HeadTail = HeadTail.second.split('\n');
-      Preamble = "";
-      Separator = "\n\t\t";
     }
-    if(HexagonMCInstrInfo::bundleSize(Inst) != 0)
-      OS << "\n\t}" << PacketBundle.second;
+    OS << "\t}" << PacketBundle.second;
   }
 };
 }
@@ -154,9 +164,9 @@ static MCCodeGenInfo *createHexagonMCCodeGenInfo(const Triple &TT,
                                                  CodeModel::Model CM,
                                                  CodeGenOpt::Level OL) {
   MCCodeGenInfo *X = new MCCodeGenInfo();
-  // For the time being, use static relocations, since there's really no
-  // support for PIC yet.
-  X->initMCCodeGenInfo(Reloc::Static, CM, OL);
+  if (RM == Reloc::Default)
+    RM = Reloc::Static;
+  X->initMCCodeGenInfo(RM, CM, OL);
   return X;
 }
 
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index cb626503313f..a005a014416b 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -16,6 +16,8 @@
 
 #include <cstdint>
 
+#include "llvm/Support/CommandLine.h"
+
 namespace llvm {
 struct InstrItinerary;
 struct InstrStage;
@@ -33,22 +35,27 @@ class raw_ostream;
 class raw_pwrite_stream;
 
 extern Target TheHexagonTarget;
-
+extern cl::opt<bool> HexagonDisableCompound;
+extern cl::opt<bool> HexagonDisableDuplex;
 extern const InstrStage HexagonStages[];
 
 MCInstrInfo *createHexagonMCInstrInfo();
 
-MCCodeEmitter *createHexagonMCCodeEmitter(MCInstrInfo const &MCII,
-                                          MCRegisterInfo const &MRI,
+MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
+                                          const MCRegisterInfo &MRI,
                                           MCContext &MCT);
 
-MCAsmBackend *createHexagonAsmBackend(Target const &T,
-                                      MCRegisterInfo const &MRI,
+MCAsmBackend *createHexagonAsmBackend(const Target &T,
+                                      const MCRegisterInfo &MRI,
                                       const Triple &TT, StringRef CPU);
 
 MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI, StringRef CPU);
 
+namespace HEXAGON_MC {
+  StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
+}
+
 } // End llvm namespace
 
 // Define symbolic names for Hexagon registers.  This defines a mapping from
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 41112ac0b46e..6ceb848ba20c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -27,6 +27,7 @@
 
 using namespace llvm;
 
+namespace {
 // Insn shuffling priority.
 class HexagonBid {
   // The priority is directly proportional to how restricted the insn is based
@@ -75,6 +76,7 @@ public:
       return false;
   };
 };
+} // end anonymous namespace
 
 unsigned HexagonResource::setWeight(unsigned s) {
   const unsigned SlotWeight = 8;
@@ -93,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) {
   return (Weight);
 }
 
+HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
+
+bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
+
+bool HexagonCVIResource::setup() {
+  assert(!TUL);
+  TUL = new (TypeUnitsAndLanes);
+
+  (*TUL)[HexagonII::TypeCVI_VA] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
+  (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2);
+  (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2);
+  (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1);
+  (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_LD] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0);
+  (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_ST] =
+      UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
+  (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
+  (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
+  (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
+
+  return true;
+}
+
+HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+                                       MCInst const *id)
+    : HexagonResource(s) {
+  unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
+
+  if (TUL->count(T)) {
+    // For an HVX insn.
+    Valid = true;
+    setUnits((*TUL)[T].first);
+    setLanes((*TUL)[T].second);
+    setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad());
+    setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore());
+  } else {
+    // For core insns.
+    Valid = false;
+    setUnits(0);
+    setLanes(0);
+    setLoad(false);
+    setStore(false);
+  }
+}
+
 HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
                                  MCSubtargetInfo const &STI)
     : MCII(MCII), STI(STI) {
@@ -107,7 +163,7 @@ void HexagonShuffler::reset() {
 
 void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
                              unsigned S, bool X) {
-  HexagonInstr PI(ID, Extender, S, X);
+  HexagonInstr PI(MCII, ID, Extender, S, X);
 
   Packet.push_back(PI);
 }
@@ -126,6 +182,8 @@ bool HexagonShuffler::check() {
   // Number of memory operations, loads, solo loads, stores, solo stores, single
   // stores.
   unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
+  // Number of HVX loads, HVX stores.
+  unsigned CVIloads = 0, CVIstores = 0;
   // Number of duplex insns, solo insns.
   unsigned duplex = 0, solo = 0;
   // Number of insns restricting other insns in the packet to A and X types,
@@ -168,6 +226,12 @@ bool HexagonShuffler::check() {
     case HexagonII::TypeJ:
       ++jumps;
       break;
+    case HexagonII::TypeCVI_VM_VP_LDU:
+      ++onlyNo1;
+    case HexagonII::TypeCVI_VM_LD:
+    case HexagonII::TypeCVI_VM_TMP_LD:
+    case HexagonII::TypeCVI_VM_CUR_LD:
+      ++CVIloads;
     case HexagonII::TypeLD:
       ++loads;
       ++memory;
@@ -176,6 +240,11 @@ bool HexagonShuffler::check() {
       if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn())
         ++jumps, ++jump1; // DEALLOC_RETURN is of type LD.
       break;
+    case HexagonII::TypeCVI_VM_STU:
+      ++onlyNo1;
+    case HexagonII::TypeCVI_VM_ST:
+    case HexagonII::TypeCVI_VM_NEW_ST:
+      ++CVIstores;
     case HexagonII::TypeST:
       ++stores;
       ++memory;
@@ -203,9 +272,9 @@ bool HexagonShuffler::check() {
   }
 
   // Check if the packet is legal.
-  if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) ||
-      (solo && size() > 1) || (onlyAX && neitherAnorX > 1) ||
-      (onlyAX && xtypeFloat)) {
+  if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) ||
+      (duplex > 1 || (duplex && memory)) || (solo && size() > 1) ||
+      (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) {
     Error = SHUFFLE_ERROR_INVALID;
     return false;
   }
@@ -336,6 +405,19 @@ bool HexagonShuffler::check() {
         return false;
       }
   }
+  // Verify the CVI slot subscriptions.
+  {
+    HexagonUnitAuction AuctionCVI;
+
+    std::sort(begin(), end(), HexagonInstr::lessCVI);
+
+    for (iterator I = begin(); I != end(); ++I)
+      for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid?
+        if (!AuctionCVI.bid(I->CVI.getUnits() << i)) {
+          Error = SHUFFLE_ERROR_SLOTS;
+          return false;
+        }
+  }
 
   Error = SHUFFLE_SUCCESS;
   return true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 8b6c72ee25e6..174f10fb2580 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -51,6 +51,44 @@ public:
   };
 };
 
+// HVX insn resources.
+class HexagonCVIResource : public HexagonResource {
+  typedef std::pair<unsigned, unsigned> UnitsAndLanes;
+  typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+
+  // Available HVX slots.
+  enum {
+    CVI_NONE = 0,
+    CVI_XLANE = 1 << 0,
+    CVI_SHIFT = 1 << 1,
+    CVI_MPY0 = 1 << 2,
+    CVI_MPY1 = 1 << 3
+  };
+
+  static bool SetUp;
+  static bool setup();
+  static TypeUnitsAndLanes *TUL;
+
+  // Count of adjacent slots that the insn requires to be executed.
+  unsigned Lanes;
+  // Flag whether the insn is a load or a store.
+  bool Load, Store;
+  // Flag whether the HVX resources are valid.
+  bool Valid;
+
+  void setLanes(unsigned l) { Lanes = l; };
+  void setLoad(bool f = true) { Load = f; };
+  void setStore(bool f = true) { Store = f; };
+
+public:
+  HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+
+  bool isValid() const { return (Valid); };
+  unsigned getLanes() const { return (Lanes); };
+  bool mayLoad() const { return (Load); };
+  bool mayStore() const { return (Store); };
+};
+
 // Handle to an insn used by the shuffling algorithm.
 class HexagonInstr {
   friend class HexagonShuffler;
@@ -58,12 +96,14 @@ class HexagonInstr {
   MCInst const *ID;
   MCInst const *Extender;
   HexagonResource Core;
+  HexagonCVIResource CVI;
   bool SoloException;
 
 public:
-  HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s,
-               bool x = false)
-      : ID(id), Extender(Extender), Core(s), SoloException(x){};
+  HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+               MCInst const *Extender, unsigned s, bool x = false)
+      : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
+        SoloException(x){};
 
   MCInst const *getDesc() const { return (ID); };
 
@@ -79,6 +119,10 @@ public:
   static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) {
     return (HexagonResource::lessUnits(A.Core, B.Core));
   };
+  // Check if the handles are in ascending order by HVX slots.
+  static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) {
+    return (HexagonResource::lessUnits(A.CVI, B.CVI));
+  };
 };
 
 // Bundle shuffler.
@@ -108,6 +152,8 @@ public:
     SHUFFLE_ERROR_BRANCHES, ///< No free slots for branch insns.
     SHUFFLE_ERROR_NOSLOTS,  ///< No free slots for other insns.
     SHUFFLE_ERROR_SLOTS,    ///< Over-subscribed slots.
+    SHUFFLE_ERROR_ERRATA2, ///< Errata violation (v60).
+    SHUFFLE_ERROR_STORE_LOAD_CONFLICT, ///< store/load conflict
     SHUFFLE_ERROR_UNKNOWN   ///< Unknown error.
   };
 
diff --git a/lib/Target/Hexagon/Makefile b/lib/Target/Hexagon/Makefile
index 04b41e5986ac..c53b8e56aafc 100644
--- a/lib/Target/Hexagon/Makefile
+++ b/lib/Target/Hexagon/Makefile
@@ -13,6 +13,7 @@ TARGET = Hexagon
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = HexagonGenRegisterInfo.inc \
                 HexagonGenInstrInfo.inc  \
+                HexagonGenAsmMatcher.inc \
                 HexagonGenAsmWriter.inc \
                 HexagonGenDAGISel.inc HexagonGenSubtargetInfo.inc \
                 HexagonGenCallingConv.inc \
@@ -20,6 +21,6 @@ BUILT_SOURCES = HexagonGenRegisterInfo.inc \
                 HexagonGenMCCodeEmitter.inc \
                 HexagonGenDisassemblerTables.inc
 
-DIRS = TargetInfo MCTargetDesc Disassembler
+DIRS = TargetInfo MCTargetDesc Disassembler AsmParser
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index f05d7a465252..eb794ebc7216 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -22,6 +22,7 @@ subdirectories =
  AMDGPU
  ARM
  AArch64
+ AVR
  BPF
  CppBackend
  Hexagon
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 70141a998e4a..72afec18becb 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -17,8 +17,6 @@
 #include "llvm/MC/MCInstPrinter.h"
 
 namespace llvm {
-  class MCOperand;
-
   class MSP430InstPrinter : public MCInstPrinter {
   public:
     MSP430InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index ff5b0b6d858c..183dee36a047 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -17,13 +17,14 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
-  class Triple;
+class Triple;
 
-  class MSP430MCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit MSP430MCAsmInfo(const Triple &TT);
-  };
+class MSP430MCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit MSP430MCAsmInfo(const Triple &TT);
+};
 
 } // namespace llvm
 
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index ffcf22216d4f..606abc250d98 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -64,7 +64,7 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
   unsigned FuncSize = 0;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
-    MachineBasicBlock *MBB = MFI;
+    MachineBasicBlock *MBB = &*MFI;
 
     unsigned BlockSize = 0;
     for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29bc8b33988a..18f38b7e90da 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -69,10 +69,6 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
   computeRegisterProperties(STI.getRegisterInfo());
 
   // Provide all sorts of operation actions
-
-  // Division is expensive
-  setIntDivIsCheap(false);
-
   setStackPointerRegisterToSaveRestore(MSP430::SP);
   setBooleanContents(ZeroOrOneBooleanContent);
   setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
@@ -508,9 +504,10 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
         // Create the SelectionDAG nodes corresponding to a load
         //from this parameter
         SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
-        InVal = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                            MachinePointerInfo::getFixedStack(FI),
-                            false, false, false, 0);
+        InVal = DAG.getLoad(
+            VA.getLocVT(), dl, Chain, FIN,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+            false, false, false, 0);
       }
 
       InVals.push_back(InVal);
@@ -1231,8 +1228,7 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
   }
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator I = BB;
-  ++I;
+  MachineFunction::iterator I = ++BB->getIterator();
 
   // Create loop block
   MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
@@ -1320,8 +1316,7 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   // to set, the condition code register to branch on, the true/false values to
   // select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator I = BB;
-  ++I;
+  MachineFunction::iterator I = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 72b1780fd1ce..d4f82bda1ec9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -44,11 +44,10 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore,
-                            MFI.getObjectSize(FrameIdx),
-                            MFI.getObjectAlignment(FrameIdx));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FrameIdx),
+      MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+      MFI.getObjectAlignment(FrameIdx));
 
   if (RC == &MSP430::GR16RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
@@ -72,11 +71,10 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
 
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad,
-                            MFI.getObjectSize(FrameIdx),
-                            MFI.getObjectAlignment(FrameIdx));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FrameIdx),
+      MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+      MFI.getObjectAlignment(FrameIdx));
 
   if (RC == &MSP430::GR16RegClass)
     BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
diff --git a/lib/Target/MSP430/MSP430MCInstLower.cpp b/lib/Target/MSP430/MSP430MCInstLower.cpp
index 54154a8afac1..47b0e270c5b3 100644
--- a/lib/Target/MSP430/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -50,9 +50,9 @@ GetExternalSymbolSymbol(const MachineOperand &MO) const {
 
 MCSymbol *MSP430MCInstLower::
 GetJumpTableSymbol(const MachineOperand &MO) const {
-  const DataLayout *DL = Printer.TM.getDataLayout();
+  const DataLayout &DL = Printer.getDataLayout();
   SmallString<256> Name;
-  raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI"
+  raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "JTI"
                             << Printer.getFunctionNumber() << '_'
                             << MO.getIndex();
 
@@ -67,9 +67,9 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
 
 MCSymbol *MSP430MCInstLower::
 GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
-  const DataLayout *DL = Printer.TM.getDataLayout();
+  const DataLayout &DL = Printer.getDataLayout();
   SmallString<256> Name;
-  raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "CPI"
+  raw_svector_ostream(Name) << DL.getPrivateGlobalPrefix() << "CPI"
                             << Printer.getFunctionNumber() << '_'
                             << MO.getIndex();
 
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
index 0f7539908458..b442fc03b257 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- MSP430MachineFuctionInfo.cpp - MSP430 machine function info -------===//
+//===-- MSP430MachineFunctionInfo.cpp - MSP430 machine function info ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index fcc5f5b88600..2d937318c7e5 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//=== MSP430MachineFunctionInfo.h - MSP430 machine function info -*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt
index e9899247fb5e..458f3f8b4c7e 100644
--- a/lib/Target/MSP430/README.txt
+++ b/lib/Target/MSP430/README.txt
@@ -2,7 +2,7 @@
 // MSP430 backend.
 //===---------------------------------------------------------------------===//
 
-DISCLAIMER: Thid backend should be considered as highly experimental. I never
+DISCLAIMER: This backend should be considered as highly experimental. I never
 seen nor worked with this MCU, all information was gathered from datasheet
 only. The original intention of making this backend was to write documentation
 of form "How to write backend for dummies" :) Thes notes hopefully will be
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 5107d2ae58c3..d4e061f00d3a 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -11,6 +11,7 @@
 #include "MCTargetDesc/MipsMCExpr.h"
 #include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "MipsRegisterInfo.h"
+#include "MipsTargetObjectFile.h"
 #include "MipsTargetStreamer.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallVector.h"
@@ -106,7 +107,6 @@ class MipsAsmParser : public MCTargetAsmParser {
     return static_cast<MipsTargetStreamer &>(TS);
   }
 
-  MCSubtargetInfo &STI;
   MipsABIInfo ABI;
   SmallVector<std::unique_ptr<MipsAssemblerOptions>, 2> AssemblerOptions;
   MCSymbol *CurrentFn; // Pointer to the function being parsed. It may be a
@@ -114,6 +114,12 @@ class MipsAsmParser : public MCTargetAsmParser {
                        // selected. This usually happens after an '.end func'
                        // directive.
   bool IsLittleEndian;
+  bool IsPicEnabled;
+  bool IsCpRestoreSet;
+  int CpRestoreOffset;
+  unsigned CpSaveLocation;
+  /// If true, then CpSaveLocation is a register, otherwise it's an offset.
+  bool     CpSaveLocationIsRegister;
 
   // Print a warning along with its fix-it message at the given range.
   void printWarningWithFixIt(const Twine &Msg, const Twine &FixMsg,
@@ -141,50 +147,41 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   bool ParseDirective(AsmToken DirectiveID) override;
 
-  MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy
+  OperandMatchResultTy parseMemOperand(OperandVector &Operands);
+  OperandMatchResultTy
   matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
                                     StringRef Identifier, SMLoc S);
-
-  MipsAsmParser::OperandMatchResultTy
-  matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
-
-  MipsAsmParser::OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy parseImm(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy parseLSAImm(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy
-  parseRegisterPair (OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy
-  parseMovePRegPair(OperandVector &Operands);
-
-  MipsAsmParser::OperandMatchResultTy
-  parseRegisterList (OperandVector  &Operands);
+  OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
+                                                     SMLoc S);
+  OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+  OperandMatchResultTy parseImm(OperandVector &Operands);
+  OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
+  OperandMatchResultTy parseInvNum(OperandVector &Operands);
+  OperandMatchResultTy parseLSAImm(OperandVector &Operands);
+  OperandMatchResultTy parseRegisterPair(OperandVector &Operands);
+  OperandMatchResultTy parseMovePRegPair(OperandVector &Operands);
+  OperandMatchResultTy parseRegisterList(OperandVector &Operands);
 
   bool searchSymbolAlias(OperandVector &Operands);
 
   bool parseOperand(OperandVector &, StringRef Mnemonic);
 
-  bool needsExpansion(MCInst &Inst);
+  enum MacroExpanderResultTy {
+    MER_NotAMacro,
+    MER_Success,
+    MER_Fail,
+  };
 
   // Expands assembly pseudo instructions.
-  // Returns false on success, true otherwise.
-  bool expandInstruction(MCInst &Inst, SMLoc IDLoc,
-                         SmallVectorImpl<MCInst> &Instructions);
+  MacroExpanderResultTy
+  tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+                       SmallVectorImpl<MCInst> &Instructions);
 
   bool expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
                          SmallVectorImpl<MCInst> &Instructions);
 
   bool loadImmediate(int64_t ImmValue, unsigned DstReg, unsigned SrcReg,
-                     bool Is32BitImm, SMLoc IDLoc,
+                     bool Is32BitImm, bool IsAddress, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
 
   bool loadAndAddSymbolAddress(const MCExpr *SymExpr, unsigned DstReg,
@@ -194,11 +191,10 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
                      SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
-                            SmallVectorImpl<MCInst> &Instructions);
+  bool expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+                         const MCOperand &Offset, bool Is32BitAddress,
+                         SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
-                            SmallVectorImpl<MCInst> &Instructions);
   bool expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions);
 
@@ -209,24 +205,43 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
                                SmallVectorImpl<MCInst> &Instructions);
 
+  bool expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+                          SmallVectorImpl<MCInst> &Instructions);
+
   bool expandBranchImm(MCInst &Inst, SMLoc IDLoc,
                        SmallVectorImpl<MCInst> &Instructions);
 
   bool expandCondBranches(MCInst &Inst, SMLoc IDLoc,
                           SmallVectorImpl<MCInst> &Instructions);
 
-  bool expandUlhu(MCInst &Inst, SMLoc IDLoc,
-                  SmallVectorImpl<MCInst> &Instructions);
+  bool expandDiv(MCInst &Inst, SMLoc IDLoc,
+                 SmallVectorImpl<MCInst> &Instructions, const bool IsMips64,
+                 const bool Signed);
+
+  bool expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+                 SmallVectorImpl<MCInst> &Instructions);
 
   bool expandUlw(MCInst &Inst, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
+  bool expandRotation(MCInst &Inst, SMLoc IDLoc,
+                      SmallVectorImpl<MCInst> &Instructions);
+  bool expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+                         SmallVectorImpl<MCInst> &Instructions);
+  bool expandDRotation(MCInst &Inst, SMLoc IDLoc,
+                       SmallVectorImpl<MCInst> &Instructions);
+  bool expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+                          SmallVectorImpl<MCInst> &Instructions);
+
   void createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
   void createAddu(unsigned DstReg, unsigned SrcReg, unsigned TrgReg,
                   bool Is64Bit, SmallVectorImpl<MCInst> &Instructions);
 
+  void createCpRestoreMemOp(bool IsLoad, int StackOffset, SMLoc IDLoc,
+                            SmallVectorImpl<MCInst> &Instructions);
+
   bool reportParseError(Twine ErrorMsg);
   bool reportParseError(SMLoc Loc, Twine ErrorMsg);
 
@@ -239,8 +254,11 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseSetMips0Directive();
   bool parseSetArchDirective();
   bool parseSetFeature(uint64_t Feature);
+  bool isPicAndNotNxxAbi(); // Used by .cpload, .cprestore, and .cpsetup.
   bool parseDirectiveCpLoad(SMLoc Loc);
+  bool parseDirectiveCpRestore(SMLoc Loc);
   bool parseDirectiveCPSetup();
+  bool parseDirectiveCPReturn();
   bool parseDirectiveNaN();
   bool parseDirectiveSet();
   bool parseDirectiveOption();
@@ -337,6 +355,7 @@ class MipsAsmParser : public MCTargetAsmParser {
   // FeatureMipsGP64 | FeatureMips1)
   // Clearing Mips3 is equivalent to clear (FeatureMips3 | FeatureMips4).
   void selectArch(StringRef ArchFeature) {
+    MCSubtargetInfo &STI = copySTI();
     FeatureBitset FeatureBits = STI.getFeatureBits();
     FeatureBits &= ~MipsAssemblerOptions::AllArchRelatedMask;
     STI.setFeatureBits(FeatureBits);
@@ -346,7 +365,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   }
 
   void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
-    if (!(STI.getFeatureBits()[Feature])) {
+    if (!(getSTI().getFeatureBits()[Feature])) {
+      MCSubtargetInfo &STI = copySTI();
       setAvailableFeatures(
           ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
       AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -354,7 +374,8 @@ class MipsAsmParser : public MCTargetAsmParser {
   }
 
   void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
-    if (STI.getFeatureBits()[Feature]) {
+    if (getSTI().getFeatureBits()[Feature]) {
+      MCSubtargetInfo &STI = copySTI();
       setAvailableFeatures(
           ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
       AssemblerOptions.back()->setFeatures(STI.getFeatureBits());
@@ -363,26 +384,25 @@ class MipsAsmParser : public MCTargetAsmParser {
 
   void setModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
     setFeatureBits(Feature, FeatureString);
-    AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
   }
 
   void clearModuleFeatureBits(uint64_t Feature, StringRef FeatureString) {
     clearFeatureBits(Feature, FeatureString);
-    AssemblerOptions.front()->setFeatures(STI.getFeatureBits());
+    AssemblerOptions.front()->setFeatures(getSTI().getFeatureBits());
   }
 
 public:
   enum MipsMatchResultTy {
-    Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY
+    Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "MipsGenAsmMatcher.inc"
 #undef GET_OPERAND_DIAGNOSTIC_TYPES
-
   };
 
-  MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+  MipsAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
                 const MCInstrInfo &MII, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(sti),
+    : MCTargetAsmParser(Options, sti),
         ABI(MipsABIInfo::computeTargetABI(Triple(sti.getTargetTriple()),
                                           sti.getCPU(), Options)) {
     MCAsmParserExtension::Initialize(parser);
@@ -390,15 +410,15 @@ public:
     parser.addAliasForDirective(".asciiz", ".asciz");
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
-    
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
+
     // Remember the initial assembler options. The user can not modify these.
     AssemblerOptions.push_back(
-        llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
-    
+        llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
+
     // Create an assembler options environment for the user to modify.
     AssemblerOptions.push_back(
-        llvm::make_unique<MipsAssemblerOptions>(STI.getFeatureBits()));
+        llvm::make_unique<MipsAssemblerOptions>(getSTI().getFeatureBits()));
 
     getTargetStreamer().updateABIInfo(*this);
 
@@ -407,6 +427,12 @@ public:
 
     CurrentFn = nullptr;
 
+    IsPicEnabled =
+        (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_);
+
+    IsCpRestoreSet = false;
+    CpRestoreOffset = -1;
+
     Triple TheTriple(sti.getTargetTriple());
     if ((TheTriple.getArch() == Triple::mips) ||
         (TheTriple.getArch() == Triple::mips64))
@@ -418,70 +444,103 @@ public:
   /// True if all of $fcc0 - $fcc7 exist for the current ISA.
   bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
 
-  bool isGP64bit() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
-  bool isFP64bit() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
+  bool isGP64bit() const {
+    return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
+  }
+  bool isFP64bit() const {
+    return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
+  }
   const MipsABIInfo &getABI() const { return ABI; }
   bool isABI_N32() const { return ABI.IsN32(); }
   bool isABI_N64() const { return ABI.IsN64(); }
   bool isABI_O32() const { return ABI.IsO32(); }
-  bool isABI_FPXX() const { return STI.getFeatureBits()[Mips::FeatureFPXX]; }
+  bool isABI_FPXX() const {
+    return getSTI().getFeatureBits()[Mips::FeatureFPXX];
+  }
 
   bool useOddSPReg() const {
-    return !(STI.getFeatureBits()[Mips::FeatureNoOddSPReg]);
+    return !(getSTI().getFeatureBits()[Mips::FeatureNoOddSPReg]);
   }
 
   bool inMicroMipsMode() const {
-    return STI.getFeatureBits()[Mips::FeatureMicroMips];
+    return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
+  }
+  bool hasMips1() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMips1];
+  }
+  bool hasMips2() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMips2];
+  }
+  bool hasMips3() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMips3];
+  }
+  bool hasMips4() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMips4];
+  }
+  bool hasMips5() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMips5];
   }
-  bool hasMips1() const { return STI.getFeatureBits()[Mips::FeatureMips1]; }
-  bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
-  bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
-  bool hasMips4() const { return STI.getFeatureBits()[Mips::FeatureMips4]; }
-  bool hasMips5() const { return STI.getFeatureBits()[Mips::FeatureMips5]; }
   bool hasMips32() const {
-    return STI.getFeatureBits()[Mips::FeatureMips32];
+    return getSTI().getFeatureBits()[Mips::FeatureMips32];
   }
   bool hasMips64() const {
-    return STI.getFeatureBits()[Mips::FeatureMips64];
+    return getSTI().getFeatureBits()[Mips::FeatureMips64];
   }
   bool hasMips32r2() const {
-    return STI.getFeatureBits()[Mips::FeatureMips32r2];
+    return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
   }
   bool hasMips64r2() const {
-    return STI.getFeatureBits()[Mips::FeatureMips64r2];
+    return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
   }
   bool hasMips32r3() const {
-    return (STI.getFeatureBits()[Mips::FeatureMips32r3]);
+    return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
   }
   bool hasMips64r3() const {
-    return (STI.getFeatureBits()[Mips::FeatureMips64r3]);
+    return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
   }
   bool hasMips32r5() const {
-    return (STI.getFeatureBits()[Mips::FeatureMips32r5]);
+    return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
   }
   bool hasMips64r5() const {
-    return (STI.getFeatureBits()[Mips::FeatureMips64r5]);
+    return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
   }
   bool hasMips32r6() const {
-    return STI.getFeatureBits()[Mips::FeatureMips32r6];
+    return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
   }
   bool hasMips64r6() const {
-    return STI.getFeatureBits()[Mips::FeatureMips64r6];
+    return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
   }
 
-  bool hasDSP() const { return STI.getFeatureBits()[Mips::FeatureDSP]; }
-  bool hasDSPR2() const { return STI.getFeatureBits()[Mips::FeatureDSPR2]; }
-  bool hasMSA() const { return STI.getFeatureBits()[Mips::FeatureMSA]; }
+  bool hasDSP() const {
+    return getSTI().getFeatureBits()[Mips::FeatureDSP];
+  }
+  bool hasDSPR2() const {
+    return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
+  }
+  bool hasDSPR3() const {
+    return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
+  }
+  bool hasMSA() const {
+    return getSTI().getFeatureBits()[Mips::FeatureMSA];
+  }
   bool hasCnMips() const {
-    return (STI.getFeatureBits()[Mips::FeatureCnMips]);
+    return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
+  }
+
+  bool inPicMode() {
+    return IsPicEnabled;
   }
 
   bool inMips16Mode() const {
-    return STI.getFeatureBits()[Mips::FeatureMips16];
+    return getSTI().getFeatureBits()[Mips::FeatureMips16];
+  }
+
+  bool useTraps() const {
+    return getSTI().getFeatureBits()[Mips::FeatureUseTCCInDIV];
   }
 
   bool useSoftFloat() const {
-    return STI.getFeatureBits()[Mips::FeatureSoftFloat];
+    return getSTI().getFeatureBits()[Mips::FeatureSoftFloat];
   }
 
   /// Warn if RegIndex is the same as the current AT.
@@ -869,6 +928,16 @@ public:
     Inst.addOperand(MCOperand::createReg(getHWRegsReg()));
   }
 
+  template <unsigned Bits, int Offset = 0, int AdjustOffset = 0>
+  void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    uint64_t Imm = getConstantImm() - Offset;
+    Imm &= (1 << Bits) - 1;
+    Imm += Offset;
+    Imm += AdjustOffset;
+    Inst.addOperand(MCOperand::createImm(Imm));
+  }
+
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     const MCExpr *Expr = getImm();
@@ -878,7 +947,9 @@ public:
   void addMemOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands!");
 
-    Inst.addOperand(MCOperand::createReg(getMemBase()->getGPR32Reg()));
+    Inst.addOperand(MCOperand::createReg(AsmParser.getABI().ArePtrs64bit()
+                                             ? getMemBase()->getGPR64Reg()
+                                             : getMemBase()->getGPR32Reg()));
 
     const MCExpr *Expr = getMemOff();
     addExpr(Inst, Expr);
@@ -924,10 +995,16 @@ public:
   bool isRegIdx() const { return Kind == k_RegisterIndex; }
   bool isImm() const override { return Kind == k_Immediate; }
   bool isConstantImm() const {
-    return isImm() && dyn_cast<MCConstantExpr>(getImm());
+    return isImm() && isa<MCConstantExpr>(getImm());
   }
-  template <unsigned Bits> bool isUImm() const {
-    return isImm() && isConstantImm() && isUInt<Bits>(getConstantImm());
+  bool isConstantImmz() const {
+    return isConstantImm() && getConstantImm() == 0;
+  }
+  template <unsigned Bits, int Offset = 0> bool isConstantUImm() const {
+    return isConstantImm() && isUInt<Bits>(getConstantImm() - Offset);
+  }
+  template <unsigned Bits> bool isConstantSImm() const {
+    return isConstantImm() && isInt<Bits>(getConstantImm());
   }
   bool isToken() const override {
     // Note: It's not possible to pretend that other operand kinds are tokens.
@@ -936,10 +1013,15 @@ public:
   }
   bool isMem() const override { return Kind == k_Memory; }
   bool isConstantMemOff() const {
-    return isMem() && dyn_cast<MCConstantExpr>(getMemOff());
+    return isMem() && isa<MCConstantExpr>(getMemOff());
   }
   template <unsigned Bits> bool isMemWithSimmOffset() const {
-    return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff());
+    return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff())
+      && getMemBase()->isGPRAsmReg();
+  }
+  template <unsigned Bits> bool isMemWithSimmOffsetGPR() const {
+    return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff()) &&
+           getMemBase()->isGPRAsmReg();
   }
   bool isMemWithGRPMM16Base() const {
     return isMem() && getMemBase()->isMM16AsmReg();
@@ -953,13 +1035,23 @@ public:
       && (getConstantMemOff() % 4 == 0) && getMemBase()->isRegIdx()
       && (getMemBase()->getGPR32Reg() == Mips::SP);
   }
+  template <unsigned Bits, unsigned ShiftLeftAmount>
+  bool isScaledUImm() const {
+    return isConstantImm() &&
+           isShiftedUInt<Bits, ShiftLeftAmount>(getConstantImm());
+  }
   bool isRegList16() const {
     if (!isRegList())
       return false;
 
     int Size = RegList.List->size();
-    if (Size < 2 || Size > 5 || *RegList.List->begin() != Mips::S0 ||
-        RegList.List->back() != Mips::RA)
+    if (Size < 2 || Size > 5)
+      return false;
+
+    unsigned R0 = RegList.List->front();
+    unsigned R1 = RegList.List->back();
+    if (!((R0 == Mips::S0 && R1 == Mips::RA) ||
+          (R0 == Mips::S0_64 && R1 == Mips::RA_64)))
       return false;
 
     int PrevReg = *RegList.List->begin();
@@ -1304,9 +1396,123 @@ static bool hasShortDelaySlot(unsigned Opcode) {
   }
 }
 
+static const MCSymbol *getSingleMCSymbol(const MCExpr *Expr) {
+  if (const MCSymbolRefExpr *SRExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
+    return &SRExpr->getSymbol();
+  }
+
+  if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr)) {
+    const MCSymbol *LHSSym = getSingleMCSymbol(BExpr->getLHS());
+    const MCSymbol *RHSSym = getSingleMCSymbol(BExpr->getRHS());
+
+    if (LHSSym)
+      return LHSSym;
+
+    if (RHSSym)
+      return RHSSym;
+
+    return nullptr;
+  }
+
+  if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+    return getSingleMCSymbol(UExpr->getSubExpr());
+
+  return nullptr;
+}
+
+static unsigned countMCSymbolRefExpr(const MCExpr *Expr) {
+  if (isa<MCSymbolRefExpr>(Expr))
+    return 1;
+
+  if (const MCBinaryExpr *BExpr = dyn_cast<MCBinaryExpr>(Expr))
+    return countMCSymbolRefExpr(BExpr->getLHS()) +
+           countMCSymbolRefExpr(BExpr->getRHS());
+
+  if (const MCUnaryExpr *UExpr = dyn_cast<MCUnaryExpr>(Expr))
+    return countMCSymbolRefExpr(UExpr->getSubExpr());
+
+  return 0;
+}
+
+namespace {
+void emitRX(unsigned Opcode, unsigned Reg0, MCOperand Op1, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createReg(Reg0));
+  tmpInst.addOperand(Op1);
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitRI(unsigned Opcode, unsigned Reg0, int32_t Imm, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  emitRX(Opcode, Reg0, MCOperand::createImm(Imm), IDLoc, Instructions);
+}
+
+void emitRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  emitRX(Opcode, Reg0, MCOperand::createReg(Reg1), IDLoc, Instructions);
+}
+
+void emitII(unsigned Opcode, int16_t Imm1, int16_t Imm2, SMLoc IDLoc,
+            SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createImm(Imm1));
+  tmpInst.addOperand(MCOperand::createImm(Imm2));
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitR(unsigned Opcode, unsigned Reg0, SMLoc IDLoc,
+           SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createReg(Reg0));
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitRRX(unsigned Opcode, unsigned Reg0, unsigned Reg1, MCOperand Op2,
+             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  MCInst tmpInst;
+  tmpInst.setOpcode(Opcode);
+  tmpInst.addOperand(MCOperand::createReg(Reg0));
+  tmpInst.addOperand(MCOperand::createReg(Reg1));
+  tmpInst.addOperand(Op2);
+  tmpInst.setLoc(IDLoc);
+  Instructions.push_back(tmpInst);
+}
+
+void emitRRR(unsigned Opcode, unsigned Reg0, unsigned Reg1, unsigned Reg2,
+             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  emitRRX(Opcode, Reg0, Reg1, MCOperand::createReg(Reg2), IDLoc,
+          Instructions);
+}
+
+void emitRRI(unsigned Opcode, unsigned Reg0, unsigned Reg1, int16_t Imm,
+             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  emitRRX(Opcode, Reg0, Reg1, MCOperand::createImm(Imm), IDLoc,
+          Instructions);
+}
+
+void emitAppropriateDSLL(unsigned DstReg, unsigned SrcReg, int16_t ShiftAmount,
+                         SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
+  if (ShiftAmount >= 32) {
+    emitRRI(Mips::DSLL32, DstReg, SrcReg, ShiftAmount - 32, IDLoc,
+            Instructions);
+    return;
+  }
+
+  emitRRI(Mips::DSLL, DstReg, SrcReg, ShiftAmount, IDLoc, Instructions);
+}
+} // end anonymous namespace.
+
 bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                                        SmallVectorImpl<MCInst> &Instructions) {
   const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+  bool ExpandedJalSym = false;
 
   Inst.setLoc(IDLoc);
 
@@ -1365,12 +1571,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         return Error(IDLoc, "branch to misaligned address");
       break;
     case Mips::BEQZ16_MM:
+    case Mips::BEQZC16_MMR6:
     case Mips::BNEZ16_MM:
+    case Mips::BNEZC16_MMR6:
       assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
       Offset = Inst.getOperand(1);
       if (!Offset.isImm())
         break; // We'll deal with this situation later on when applying fixups.
-      if (!isIntN(8, Offset.getImm()))
+      if (!isInt<8>(Offset.getImm()))
         return Error(IDLoc, "branch target out of range");
       if (OffsetToAlignment(Offset.getImm(), 2LL))
         return Error(IDLoc, "branch to misaligned address");
@@ -1415,32 +1623,6 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         }
         break;
 
-      case Mips::CINS:
-      case Mips::CINS32:
-      case Mips::EXTS:
-      case Mips::EXTS32:
-        assert(MCID.getNumOperands() == 4 && "unexpected number of operands");
-        // Check length
-        Opnd = Inst.getOperand(3);
-        if (!Opnd.isImm())
-          return Error(IDLoc, "expected immediate operand kind");
-        Imm = Opnd.getImm();
-        if (Imm < 0 || Imm > 31)
-          return Error(IDLoc, "immediate operand value out of range");
-        // Check position
-        Opnd = Inst.getOperand(2);
-        if (!Opnd.isImm())
-          return Error(IDLoc, "expected immediate operand kind");
-        Imm = Opnd.getImm();
-        if (Imm < 0 || Imm > (Opcode == Mips::CINS ||
-                              Opcode == Mips::EXTS ? 63 : 31))
-          return Error(IDLoc, "immediate operand value out of range");
-        if (Imm > 31) {
-          Inst.setOpcode(Opcode == Mips::CINS ? Mips::CINS32 : Mips::EXTS32);
-          Inst.getOperand(2).setImm(Imm - 32);
-        }
-        break;
-
       case Mips::SEQi:
       case Mips::SNEi:
         assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
@@ -1454,6 +1636,81 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     }
   }
 
+  // This expansion is not in a function called by tryExpandInstruction()
+  // because the pseudo-instruction doesn't have a distinct opcode.
+  if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) &&
+      inPicMode()) {
+    warnIfNoMacro(IDLoc);
+
+    const MCExpr *JalExpr = Inst.getOperand(0).getExpr();
+
+    // We can do this expansion if there's only 1 symbol in the argument
+    // expression.
+    if (countMCSymbolRefExpr(JalExpr) > 1)
+      return Error(IDLoc, "jal doesn't support multiple symbols in PIC mode");
+
+    // FIXME: This is checking the expression can be handled by the later stages
+    //        of the assembler. We ought to leave it to those later stages but
+    //        we can't do that until we stop evaluateRelocExpr() rewriting the
+    //        expressions into non-equivalent forms.
+    const MCSymbol *JalSym = getSingleMCSymbol(JalExpr);
+
+    // FIXME: Add support for label+offset operands (currently causes an error).
+    // FIXME: Add support for forward-declared local symbols.
+    // FIXME: Add expansion for when the LargeGOT option is enabled.
+    if (JalSym->isInSection() || JalSym->isTemporary()) {
+      if (isABI_O32()) {
+        // If it's a local symbol and the O32 ABI is being used, we expand to:
+        //  lw $25, 0($gp)
+        //    R_(MICRO)MIPS_GOT16  label
+        //  addiu $25, $25, 0
+        //    R_(MICRO)MIPS_LO16   label
+        //  jalr  $25
+        const MCExpr *Got16RelocExpr = evaluateRelocExpr(JalExpr, "got");
+        const MCExpr *Lo16RelocExpr = evaluateRelocExpr(JalExpr, "lo");
+
+        emitRRX(Mips::LW, Mips::T9, Mips::GP,
+                MCOperand::createExpr(Got16RelocExpr), IDLoc, Instructions);
+        emitRRX(Mips::ADDiu, Mips::T9, Mips::T9,
+                MCOperand::createExpr(Lo16RelocExpr), IDLoc, Instructions);
+      } else if (isABI_N32() || isABI_N64()) {
+        // If it's a local symbol and the N32/N64 ABIs are being used,
+        // we expand to:
+        //  lw/ld $25, 0($gp)
+        //    R_(MICRO)MIPS_GOT_DISP  label
+        //  jalr  $25
+        const MCExpr *GotDispRelocExpr = evaluateRelocExpr(JalExpr, "got_disp");
+
+        emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+                MCOperand::createExpr(GotDispRelocExpr), IDLoc, Instructions);
+      }
+    } else {
+      // If it's an external/weak symbol, we expand to:
+      //  lw/ld    $25, 0($gp)
+      //    R_(MICRO)MIPS_CALL16  label
+      //  jalr  $25
+      const MCExpr *Call16RelocExpr = evaluateRelocExpr(JalExpr, "call16");
+
+      emitRRX(ABI.ArePtrs64bit() ? Mips::LD : Mips::LW, Mips::T9, Mips::GP,
+              MCOperand::createExpr(Call16RelocExpr), IDLoc, Instructions);
+    }
+
+    MCInst JalrInst;
+    if (IsCpRestoreSet && inMicroMipsMode())
+      JalrInst.setOpcode(Mips::JALRS_MM);
+    else
+      JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+    JalrInst.addOperand(MCOperand::createReg(Mips::RA));
+    JalrInst.addOperand(MCOperand::createReg(Mips::T9));
+
+    // FIXME: Add an R_(MICRO)MIPS_JALR relocation after the JALR.
+    // This relocation is supposed to be an optimization hint for the linker
+    // and is not necessary for correctness.
+
+    Inst = JalrInst;
+    ExpandedJalSym = true;
+  }
+
   if (MCID.mayLoad() || MCID.mayStore()) {
     // Check the offset of memory operand, if it is a symbol
     // reference or immediate we may have to expand instructions.
@@ -1500,17 +1757,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
             int MemOffset = Op.getImm();
             MCOperand &DstReg = Inst.getOperand(0);
             MCOperand &BaseReg = Inst.getOperand(1);
-            if (isIntN(9, MemOffset) && (MemOffset % 4 == 0) &&
+            if (isInt<9>(MemOffset) && (MemOffset % 4 == 0) &&
                 getContext().getRegisterInfo()->getRegClass(
                   Mips::GPRMM16RegClassID).contains(DstReg.getReg()) &&
-                BaseReg.getReg() == Mips::GP) {
-              MCInst TmpInst;
-              TmpInst.setLoc(IDLoc);
-              TmpInst.setOpcode(Mips::LWGP_MM);
-              TmpInst.addOperand(MCOperand::createReg(DstReg.getReg()));
-              TmpInst.addOperand(MCOperand::createReg(Mips::GP));
-              TmpInst.addOperand(MCOperand::createImm(MemOffset));
-              Instructions.push_back(TmpInst);
+                (BaseReg.getReg() == Mips::GP ||
+                BaseReg.getReg() == Mips::GP_64)) {
+
+              emitRRI(Mips::LWGP_MM, DstReg.getReg(), Mips::GP, MemOffset,
+                      IDLoc, Instructions);
               return false;
             }
           }
@@ -1597,7 +1851,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         if (Imm < -1 || Imm > 14)
           return Error(IDLoc, "immediate operand value out of range");
         break;
+      case Mips::TEQ_MM:
+      case Mips::TGE_MM:
+      case Mips::TGEU_MM:
+      case Mips::TLT_MM:
+      case Mips::TLTU_MM:
+      case Mips::TNE_MM:
       case Mips::SB16_MM:
+      case Mips::SB16_MMR6:
         Opnd = Inst.getOperand(2);
         if (!Opnd.isImm())
           return Error(IDLoc, "expected immediate operand kind");
@@ -1607,6 +1868,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         break;
       case Mips::LHU16_MM:
       case Mips::SH16_MM:
+      case Mips::SH16_MMR6:
         Opnd = Inst.getOperand(2);
         if (!Opnd.isImm())
           return Error(IDLoc, "expected immediate operand kind");
@@ -1616,6 +1878,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         break;
       case Mips::LW16_MM:
       case Mips::SW16_MM:
+      case Mips::SW16_MMR6:
         Opnd = Inst.getOperand(2);
         if (!Opnd.isImm())
           return Error(IDLoc, "expected immediate operand kind");
@@ -1623,93 +1886,111 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
         if (Imm < 0 || Imm > 60 || (Imm % 4 != 0))
           return Error(IDLoc, "immediate operand value out of range");
         break;
-      case Mips::CACHE:
-      case Mips::PREF:
-        Opnd = Inst.getOperand(2);
-        if (!Opnd.isImm())
-          return Error(IDLoc, "expected immediate operand kind");
-        Imm = Opnd.getImm();
-        if (!isUInt<5>(Imm))
-          return Error(IDLoc, "immediate operand value out of range");
-        break;
       case Mips::ADDIUPC_MM:
         MCOperand Opnd = Inst.getOperand(1);
         if (!Opnd.isImm())
           return Error(IDLoc, "expected immediate operand kind");
         int Imm = Opnd.getImm();
-        if ((Imm % 4 != 0) || !isIntN(25, Imm))
+        if ((Imm % 4 != 0) || !isInt<25>(Imm))
           return Error(IDLoc, "immediate operand value out of range");
         break;
     }
   }
 
-  if (needsExpansion(Inst)) {
-    if (expandInstruction(Inst, IDLoc, Instructions))
-      return true;
-  } else
+  MacroExpanderResultTy ExpandResult =
+      tryExpandInstruction(Inst, IDLoc, Instructions);
+  switch (ExpandResult) {
+  case MER_NotAMacro:
     Instructions.push_back(Inst);
+    break;
+  case MER_Success:
+    break;
+  case MER_Fail:
+    return true;
+  }
 
   // If this instruction has a delay slot and .set reorder is active,
   // emit a NOP after it.
   if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
     createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
 
+  if ((Inst.getOpcode() == Mips::JalOneReg ||
+       Inst.getOpcode() == Mips::JalTwoReg || ExpandedJalSym) &&
+      isPicAndNotNxxAbi()) {
+    if (IsCpRestoreSet) {
+      // We need a NOP between the JALR and the LW:
+      // If .set reorder has been used, we've already emitted a NOP.
+      // If .set noreorder has been used, we need to emit a NOP at this point.
+      if (!AssemblerOptions.back()->isReorder())
+        createNop(hasShortDelaySlot(Inst.getOpcode()), IDLoc, Instructions);
+
+      // Load the $gp from the stack.
+      SmallVector<MCInst, 3> LoadInsts;
+      createCpRestoreMemOp(true /*IsLoad*/, CpRestoreOffset /*StackOffset*/,
+                           IDLoc, LoadInsts);
+
+      for (const MCInst &Inst : LoadInsts)
+        Instructions.push_back(Inst);
+
+    } else
+      Warning(IDLoc, "no .cprestore used in PIC mode");
+  }
+
   return false;
 }
 
-bool MipsAsmParser::needsExpansion(MCInst &Inst) {
-
+MipsAsmParser::MacroExpanderResultTy
+MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc,
+                                    SmallVectorImpl<MCInst> &Instructions) {
   switch (Inst.getOpcode()) {
-  case Mips::LoadImm32:
-  case Mips::LoadImm64:
-  case Mips::LoadAddrImm32:
-  case Mips::LoadAddrReg32:
-  case Mips::B_MM_Pseudo:
-  case Mips::LWM_MM:
-  case Mips::SWM_MM:
-  case Mips::JalOneReg:
-  case Mips::JalTwoReg:
-  case Mips::BneImm:
-  case Mips::BeqImm:
-  case Mips::BLT:
-  case Mips::BLE:
-  case Mips::BGE:
-  case Mips::BGT:
-  case Mips::BLTU:
-  case Mips::BLEU:
-  case Mips::BGEU:
-  case Mips::BGTU:
-  case Mips::Ulhu:
-  case Mips::Ulw:
-    return true;
   default:
-    return false;
-  }
-}
-
-bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
-                                      SmallVectorImpl<MCInst> &Instructions) {
-  switch (Inst.getOpcode()) {
-  default: llvm_unreachable("unimplemented expansion");
+    return MER_NotAMacro;
   case Mips::LoadImm32:
-    return expandLoadImm(Inst, true, IDLoc, Instructions);
+    return expandLoadImm(Inst, true, IDLoc, Instructions) ? MER_Fail
+                                                          : MER_Success;
   case Mips::LoadImm64:
-    return expandLoadImm(Inst, false, IDLoc, Instructions);
+    return expandLoadImm(Inst, false, IDLoc, Instructions) ? MER_Fail
+                                                           : MER_Success;
   case Mips::LoadAddrImm32:
-    return expandLoadAddressImm(Inst, true, IDLoc, Instructions);
+  case Mips::LoadAddrImm64:
+    assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+    assert((Inst.getOperand(1).isImm() || Inst.getOperand(1).isExpr()) &&
+           "expected immediate operand kind");
+
+    return expandLoadAddress(Inst.getOperand(0).getReg(), Mips::NoRegister,
+                             Inst.getOperand(1),
+                             Inst.getOpcode() == Mips::LoadAddrImm32, IDLoc,
+                             Instructions)
+               ? MER_Fail
+               : MER_Success;
   case Mips::LoadAddrReg32:
-    return expandLoadAddressReg(Inst, true, IDLoc, Instructions);
+  case Mips::LoadAddrReg64:
+    assert(Inst.getOperand(0).isReg() && "expected register operand kind");
+    assert(Inst.getOperand(1).isReg() && "expected register operand kind");
+    assert((Inst.getOperand(2).isImm() || Inst.getOperand(2).isExpr()) &&
+           "expected immediate operand kind");
+
+    return expandLoadAddress(Inst.getOperand(0).getReg(),
+                             Inst.getOperand(1).getReg(), Inst.getOperand(2),
+                             Inst.getOpcode() == Mips::LoadAddrReg32, IDLoc,
+                             Instructions)
+               ? MER_Fail
+               : MER_Success;
   case Mips::B_MM_Pseudo:
-    return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions);
+  case Mips::B_MMR6_Pseudo:
+    return expandUncondBranchMMPseudo(Inst, IDLoc, Instructions) ? MER_Fail
+                                                                 : MER_Success;
   case Mips::SWM_MM:
   case Mips::LWM_MM:
-    return expandLoadStoreMultiple(Inst, IDLoc, Instructions);
+    return expandLoadStoreMultiple(Inst, IDLoc, Instructions) ? MER_Fail
+                                                              : MER_Success;
   case Mips::JalOneReg:
   case Mips::JalTwoReg:
-    return expandJalWithRegs(Inst, IDLoc, Instructions);
+    return expandJalWithRegs(Inst, IDLoc, Instructions) ? MER_Fail
+                                                        : MER_Success;
   case Mips::BneImm:
   case Mips::BeqImm:
-    return expandBranchImm(Inst, IDLoc, Instructions);
+    return expandBranchImm(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
   case Mips::BLT:
   case Mips::BLE:
   case Mips::BGE:
@@ -1718,78 +1999,97 @@ bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
   case Mips::BLEU:
   case Mips::BGEU:
   case Mips::BGTU:
-    return expandCondBranches(Inst, IDLoc, Instructions);
+  case Mips::BLTL:
+  case Mips::BLEL:
+  case Mips::BGEL:
+  case Mips::BGTL:
+  case Mips::BLTUL:
+  case Mips::BLEUL:
+  case Mips::BGEUL:
+  case Mips::BGTUL:
+  case Mips::BLTImmMacro:
+  case Mips::BLEImmMacro:
+  case Mips::BGEImmMacro:
+  case Mips::BGTImmMacro:
+  case Mips::BLTUImmMacro:
+  case Mips::BLEUImmMacro:
+  case Mips::BGEUImmMacro:
+  case Mips::BGTUImmMacro:
+  case Mips::BLTLImmMacro:
+  case Mips::BLELImmMacro:
+  case Mips::BGELImmMacro:
+  case Mips::BGTLImmMacro:
+  case Mips::BLTULImmMacro:
+  case Mips::BLEULImmMacro:
+  case Mips::BGEULImmMacro:
+  case Mips::BGTULImmMacro:
+    return expandCondBranches(Inst, IDLoc, Instructions) ? MER_Fail
+                                                         : MER_Success;
+  case Mips::SDivMacro:
+    return expandDiv(Inst, IDLoc, Instructions, false, true) ? MER_Fail
+                                                             : MER_Success;
+  case Mips::DSDivMacro:
+    return expandDiv(Inst, IDLoc, Instructions, true, true) ? MER_Fail
+                                                            : MER_Success;
+  case Mips::UDivMacro:
+    return expandDiv(Inst, IDLoc, Instructions, false, false) ? MER_Fail
+                                                              : MER_Success;
+  case Mips::DUDivMacro:
+    return expandDiv(Inst, IDLoc, Instructions, true, false) ? MER_Fail
+                                                             : MER_Success;
+  case Mips::Ulh:
+    return expandUlh(Inst, true, IDLoc, Instructions) ? MER_Fail : MER_Success;
   case Mips::Ulhu:
-    return expandUlhu(Inst, IDLoc, Instructions);
+    return expandUlh(Inst, false, IDLoc, Instructions) ? MER_Fail : MER_Success;
   case Mips::Ulw:
-    return expandUlw(Inst, IDLoc, Instructions);
+    return expandUlw(Inst, IDLoc, Instructions) ? MER_Fail : MER_Success;
+  case Mips::NORImm:
+    return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+                                                           : MER_Success;
+  case Mips::ADDi:
+  case Mips::ADDiu:
+  case Mips::SLTi:
+  case Mips::SLTiu:
+    if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+        Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+      int64_t ImmValue = Inst.getOperand(2).getImm();
+      if (isInt<16>(ImmValue))
+        return MER_NotAMacro;
+      return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+                                                             : MER_Success;
+    }
+    return MER_NotAMacro;
+  case Mips::ANDi:
+  case Mips::ORi:
+  case Mips::XORi:
+    if ((Inst.getNumOperands() == 3) && Inst.getOperand(0).isReg() &&
+        Inst.getOperand(1).isReg() && Inst.getOperand(2).isImm()) {
+      int64_t ImmValue = Inst.getOperand(2).getImm();
+      if (isUInt<16>(ImmValue))
+        return MER_NotAMacro;
+      return expandAliasImmediate(Inst, IDLoc, Instructions) ? MER_Fail
+                                                             : MER_Success;
+    }
+    return MER_NotAMacro;
+  case Mips::ROL:
+  case Mips::ROR:
+    return expandRotation(Inst, IDLoc, Instructions) ? MER_Fail
+                                                     : MER_Success;
+  case Mips::ROLImm:
+  case Mips::RORImm:
+    return expandRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+                                                        : MER_Success;
+  case Mips::DROL:
+  case Mips::DROR:
+    return expandDRotation(Inst, IDLoc, Instructions) ? MER_Fail
+                                                      : MER_Success;
+  case Mips::DROLImm:
+  case Mips::DRORImm:
+    return expandDRotationImm(Inst, IDLoc, Instructions) ? MER_Fail
+                                                         : MER_Success;
   }
 }
 
-namespace {
-void emitRX(unsigned Opcode, unsigned DstReg, MCOperand Imm, SMLoc IDLoc,
-            SmallVectorImpl<MCInst> &Instructions) {
-  MCInst tmpInst;
-  tmpInst.setOpcode(Opcode);
-  tmpInst.addOperand(MCOperand::createReg(DstReg));
-  tmpInst.addOperand(Imm);
-  tmpInst.setLoc(IDLoc);
-  Instructions.push_back(tmpInst);
-}
-
-void emitRI(unsigned Opcode, unsigned DstReg, int16_t Imm, SMLoc IDLoc,
-            SmallVectorImpl<MCInst> &Instructions) {
-  emitRX(Opcode, DstReg, MCOperand::createImm(Imm), IDLoc, Instructions);
-}
-
-
-void emitRRX(unsigned Opcode, unsigned DstReg, unsigned SrcReg, MCOperand Imm,
-             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
-  MCInst tmpInst;
-  tmpInst.setOpcode(Opcode);
-  tmpInst.addOperand(MCOperand::createReg(DstReg));
-  tmpInst.addOperand(MCOperand::createReg(SrcReg));
-  tmpInst.addOperand(Imm);
-  tmpInst.setLoc(IDLoc);
-  Instructions.push_back(tmpInst);
-}
-
-void emitRRR(unsigned Opcode, unsigned DstReg, unsigned SrcReg,
-             unsigned SrcReg2, SMLoc IDLoc,
-             SmallVectorImpl<MCInst> &Instructions) {
-  emitRRX(Opcode, DstReg, SrcReg, MCOperand::createReg(SrcReg2), IDLoc,
-          Instructions);
-}
-
-void emitRRI(unsigned Opcode, unsigned DstReg, unsigned SrcReg, int16_t Imm,
-             SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
-  emitRRX(Opcode, DstReg, SrcReg, MCOperand::createImm(Imm), IDLoc,
-          Instructions);
-}
-
-template <int16_t ShiftAmount>
-void createLShiftOri(MCOperand Operand, unsigned RegNo, SMLoc IDLoc,
-                     SmallVectorImpl<MCInst> &Instructions) {
-  if (ShiftAmount >= 32)
-    emitRRI(Mips::DSLL32, RegNo, RegNo, ShiftAmount - 32, IDLoc, Instructions);
-  else if (ShiftAmount > 0)
-    emitRRI(Mips::DSLL, RegNo, RegNo, ShiftAmount, IDLoc, Instructions);
-
-  // There's no need for an ORi if the immediate is 0.
-  if (Operand.isImm() && Operand.getImm() == 0)
-    return;
-
-  emitRRX(Mips::ORi, RegNo, RegNo, Operand, IDLoc, Instructions);
-}
-
-template <unsigned ShiftAmount>
-void createLShiftOri(int64_t Value, unsigned RegNo, SMLoc IDLoc,
-                     SmallVectorImpl<MCInst> &Instructions) {
-  createLShiftOri<ShiftAmount>(MCOperand::createImm(Value), RegNo, IDLoc,
-                               Instructions);
-}
-}
-
 bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
                                       SmallVectorImpl<MCInst> &Instructions) {
   // Create a JALR instruction which is going to replace the pseudo-JAL.
@@ -1800,8 +2100,11 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
 
   if (Opcode == Mips::JalOneReg) {
     // jal $rs => jalr $rs
-    if (inMicroMipsMode()) {
-      JalrInst.setOpcode(Mips::JALR16_MM);
+    if (IsCpRestoreSet && inMicroMipsMode()) {
+      JalrInst.setOpcode(Mips::JALRS16_MM);
+      JalrInst.addOperand(FirstRegOp);
+    } else if (inMicroMipsMode()) {
+      JalrInst.setOpcode(hasMips32r6() ? Mips::JALRC16_MMR6 : Mips::JALR16_MM);
       JalrInst.addOperand(FirstRegOp);
     } else {
       JalrInst.setOpcode(Mips::JALR);
@@ -1810,30 +2113,47 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
     }
   } else if (Opcode == Mips::JalTwoReg) {
     // jal $rd, $rs => jalr $rd, $rs
-    JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
+    if (IsCpRestoreSet && inMicroMipsMode())
+      JalrInst.setOpcode(Mips::JALRS_MM);
+    else
+      JalrInst.setOpcode(inMicroMipsMode() ? Mips::JALR_MM : Mips::JALR);
     JalrInst.addOperand(FirstRegOp);
     const MCOperand SecondRegOp = Inst.getOperand(1);
     JalrInst.addOperand(SecondRegOp);
   }
   Instructions.push_back(JalrInst);
 
-  // If .set reorder is active, emit a NOP after it.
-  if (AssemblerOptions.back()->isReorder()) {
-    // This is a 32-bit NOP because these 2 pseudo-instructions
-    // do not have a short delay slot.
-    MCInst NopInst;
-    NopInst.setOpcode(Mips::SLL);
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    NopInst.addOperand(MCOperand::createImm(0));
-    Instructions.push_back(NopInst);
+  // If .set reorder is active and branch instruction has a delay slot,
+  // emit a NOP after it.
+  const MCInstrDesc &MCID = getInstDesc(JalrInst.getOpcode());
+  if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder()) {
+    createNop(hasShortDelaySlot(JalrInst.getOpcode()), IDLoc, Instructions);
   }
 
   return false;
 }
 
+/// Can the value be represented by a unsigned N-bit value and a shift left?
+template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
+  unsigned BitNum = findFirstSet(x);
+
+  return (x == x >> BitNum << BitNum) && isUInt<N>(x >> BitNum);
+}
+
+/// Load (or add) an immediate into a register.
+///
+/// @param ImmValue     The immediate to load.
+/// @param DstReg       The register that will hold the immediate.
+/// @param SrcReg       A register to add to the immediate or Mips::NoRegister
+///                     for a simple initialization.
+/// @param Is32BitImm   Is ImmValue 32-bit or 64-bit?
+/// @param IsAddress    True if the immediate represents an address. False if it
+///                     is an integer.
+/// @param IDLoc        Location of the immediate in the source file.
+/// @param Instructions The instructions emitted by this expansion.
 bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
-                                  unsigned SrcReg, bool Is32BitImm, SMLoc IDLoc,
+                                  unsigned SrcReg, bool Is32BitImm,
+                                  bool IsAddress, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions) {
   if (!Is32BitImm && !isGP64bit()) {
     Error(IDLoc, "instruction requires a 64-bit architecture");
@@ -1852,6 +2172,9 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     }
   }
 
+  unsigned ZeroReg = IsAddress ? ABI.GetNullPtr() : ABI.GetZeroReg();
+  unsigned AdduOp = !Is32BitImm ? Mips::DADDu : Mips::ADDu;
+
   bool UseSrcReg = false;
   if (SrcReg != Mips::NoRegister)
     UseSrcReg = true;
@@ -1866,111 +2189,129 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
     TmpReg = ATReg;
   }
 
-  // FIXME: gas has a special case for values that are 000...1111, which
-  // becomes a li -1 and then a dsrl
   if (isInt<16>(ImmValue)) {
-    // li d,j => addiu d,$zero,j
     if (!UseSrcReg)
-      SrcReg = Mips::ZERO;
-    emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
-  } else if (isUInt<16>(ImmValue)) {
-    // li d,j => ori d,$zero,j
-    unsigned TmpReg = DstReg;
-    if (SrcReg == DstReg) {
-      unsigned ATReg = getATReg(IDLoc);
-      if (!ATReg)
-        return true;
-      TmpReg = ATReg;
+      SrcReg = ZeroReg;
+
+    // This doesn't quite follow the usual ABI expectations for N32 but matches
+    // traditional assembler behaviour. N32 would normally use addiu for both
+    // integers and addresses.
+    if (IsAddress && !Is32BitImm) {
+      emitRRI(Mips::DADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+      return false;
     }
 
-    emitRRI(Mips::ORi, TmpReg, Mips::ZERO, ImmValue, IDLoc, Instructions);
+    emitRRI(Mips::ADDiu, DstReg, SrcReg, ImmValue, IDLoc, Instructions);
+    return false;
+  }
+
+  if (isUInt<16>(ImmValue)) {
+    unsigned TmpReg = DstReg;
+    if (SrcReg == DstReg) {
+      TmpReg = getATReg(IDLoc);
+      if (!TmpReg)
+        return true;
+    }
+
+    emitRRI(Mips::ORi, TmpReg, ZeroReg, ImmValue, IDLoc, Instructions);
     if (UseSrcReg)
-      emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
-  } else if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
+      emitRRR(ABI.GetPtrAdduOp(), DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+    return false;
+  }
+
+  if (isInt<32>(ImmValue) || isUInt<32>(ImmValue)) {
     warnIfNoMacro(IDLoc);
 
-    // For all other values which are representable as a 32-bit integer:
-    // li d,j => lui d,hi16(j)
-    //           ori d,d,lo16(j)
     uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
     uint16_t Bits15To0 = ImmValue & 0xffff;
 
     if (!Is32BitImm && !isInt<32>(ImmValue)) {
-      // For DLI, expand to an ORi instead of a LUi to avoid sign-extending the
+      // Traditional behaviour seems to special case this particular value. It's
+      // not clear why other masks are handled differently.
+      if (ImmValue == 0xffffffff) {
+        emitRI(Mips::LUi, TmpReg, 0xffff, IDLoc, Instructions);
+        emitRRI(Mips::DSRL32, TmpReg, TmpReg, 0, IDLoc, Instructions);
+        if (UseSrcReg)
+          emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+        return false;
+      }
+
+      // Expand to an ORi instead of a LUi to avoid sign-extending into the
       // upper 32 bits.
-      emitRRI(Mips::ORi, TmpReg, Mips::ZERO, Bits31To16, IDLoc, Instructions);
+      emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits31To16, IDLoc, Instructions);
       emitRRI(Mips::DSLL, TmpReg, TmpReg, 16, IDLoc, Instructions);
-    } else
-      emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
-    createLShiftOri<0>(Bits15To0, TmpReg, IDLoc, Instructions);
-
-    if (UseSrcReg)
-      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
-
-  } else if ((ImmValue & (0xffffLL << 48)) == 0) {
-    warnIfNoMacro(IDLoc);
-
-    //            <-------  lo32 ------>
-    // <-------  hi32 ------>
-    // <- hi16 ->             <- lo16 ->
-    //  _________________________________
-    // |          |          |          |
-    // | 16-bits  | 16-bits  | 16-bits  |
-    // |__________|__________|__________|
-    //
-    // For any 64-bit value that is representable as a 48-bit integer:
-    // li d,j => lui d,hi16(j)
-    //           ori d,d,hi16(lo32(j))
-    //           dsll d,d,16
-    //           ori d,d,lo16(lo32(j))
-    uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
-    uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
-    uint16_t Bits15To0 = ImmValue & 0xffff;
-
-    emitRI(Mips::LUi, TmpReg, Bits47To32, IDLoc, Instructions);
-    createLShiftOri<0>(Bits31To16, TmpReg, IDLoc, Instructions);
-    createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
-
-    if (UseSrcReg)
-      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
-
-  } else {
-    warnIfNoMacro(IDLoc);
-
-    // <-------  hi32 ------> <-------  lo32 ------>
-    // <- hi16 ->                        <- lo16 ->
-    //  ___________________________________________
-    // |          |          |          |          |
-    // | 16-bits  | 16-bits  | 16-bits  | 16-bits  |
-    // |__________|__________|__________|__________|
-    //
-    // For all other values which are representable as a 64-bit integer:
-    // li d,j => lui d,hi16(j)
-    //           ori d,d,lo16(hi32(j))
-    //           dsll d,d,16
-    //           ori d,d,hi16(lo32(j))
-    //           dsll d,d,16
-    //           ori d,d,lo16(lo32(j))
-    uint16_t Bits63To48 = (ImmValue >> 48) & 0xffff;
-    uint16_t Bits47To32 = (ImmValue >> 32) & 0xffff;
-    uint16_t Bits31To16 = (ImmValue >> 16) & 0xffff;
-    uint16_t Bits15To0 = ImmValue & 0xffff;
-
-    emitRI(Mips::LUi, TmpReg, Bits63To48, IDLoc, Instructions);
-    createLShiftOri<0>(Bits47To32, TmpReg, IDLoc, Instructions);
-
-    // When Bits31To16 is 0, do a left shift of 32 bits instead of doing
-    // two left shifts of 16 bits.
-    if (Bits31To16 == 0) {
-      createLShiftOri<32>(Bits15To0, TmpReg, IDLoc, Instructions);
-    } else {
-      createLShiftOri<16>(Bits31To16, TmpReg, IDLoc, Instructions);
-      createLShiftOri<16>(Bits15To0, TmpReg, IDLoc, Instructions);
+      if (Bits15To0)
+        emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
+      if (UseSrcReg)
+        emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+      return false;
     }
 
+    emitRI(Mips::LUi, TmpReg, Bits31To16, IDLoc, Instructions);
+    if (Bits15To0)
+      emitRRI(Mips::ORi, TmpReg, TmpReg, Bits15To0, IDLoc, Instructions);
     if (UseSrcReg)
-      createAddu(DstReg, TmpReg, SrcReg, !Is32BitImm, Instructions);
+      emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+    return false;
   }
+
+  if (isShiftedUIntAtAnyPosition<16>(ImmValue)) {
+    if (Is32BitImm) {
+      Error(IDLoc, "instruction requires a 32-bit immediate");
+      return true;
+    }
+
+    // Traditionally, these immediates are shifted as little as possible and as
+    // such we align the most significant bit to bit 15 of our temporary.
+    unsigned FirstSet = findFirstSet((uint64_t)ImmValue);
+    unsigned LastSet = findLastSet((uint64_t)ImmValue);
+    unsigned ShiftAmount = FirstSet - (15 - (LastSet - FirstSet));
+    uint16_t Bits = (ImmValue >> ShiftAmount) & 0xffff;
+    emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits, IDLoc, Instructions);
+    emitRRI(Mips::DSLL, TmpReg, TmpReg, ShiftAmount, IDLoc, Instructions);
+
+    if (UseSrcReg)
+      emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+
+    return false;
+  }
+
+  warnIfNoMacro(IDLoc);
+
+  // The remaining case is packed with a sequence of dsll and ori with zeros
+  // being omitted and any neighbouring dsll's being coalesced.
+  // The highest 32-bit's are equivalent to a 32-bit immediate load.
+
+  // Load bits 32-63 of ImmValue into bits 0-31 of the temporary register.
+  if (loadImmediate(ImmValue >> 32, TmpReg, Mips::NoRegister, true, false,
+                    IDLoc, Instructions))
+    return false;
+
+  // Shift and accumulate into the register. If a 16-bit chunk is zero, then
+  // skip it and defer the shift to the next chunk.
+  unsigned ShiftCarriedForwards = 16;
+  for (int BitNum = 16; BitNum >= 0; BitNum -= 16) {
+    uint16_t ImmChunk = (ImmValue >> BitNum) & 0xffff;
+
+    if (ImmChunk != 0) {
+      emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+                          Instructions);
+      emitRRI(Mips::ORi, TmpReg, TmpReg, ImmChunk, IDLoc, Instructions);
+      ShiftCarriedForwards = 0;
+    }
+
+    ShiftCarriedForwards += 16;
+  }
+  ShiftCarriedForwards -= 16;
+
+  // Finish any remaining shifts left by trailing zeros.
+  if (ShiftCarriedForwards)
+    emitAppropriateDSLL(TmpReg, TmpReg, ShiftCarriedForwards, IDLoc,
+                        Instructions);
+
+  if (UseSrcReg)
+    emitRRR(AdduOp, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+
   return false;
 }
 
@@ -1982,63 +2323,38 @@ bool MipsAsmParser::expandLoadImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
   assert(DstRegOp.isReg() && "expected register operand kind");
 
   if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
-                    Is32BitImm, IDLoc, Instructions))
+                    Is32BitImm, false, IDLoc, Instructions))
     return true;
 
   return false;
 }
 
-bool
-MipsAsmParser::expandLoadAddressReg(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
-                                    SmallVectorImpl<MCInst> &Instructions) {
-  const MCOperand &DstRegOp = Inst.getOperand(0);
-  assert(DstRegOp.isReg() && "expected register operand kind");
-
-  const MCOperand &SrcRegOp = Inst.getOperand(1);
-  assert(SrcRegOp.isReg() && "expected register operand kind");
-
-  const MCOperand &ImmOp = Inst.getOperand(2);
-  assert((ImmOp.isImm() || ImmOp.isExpr()) &&
-         "expected immediate operand kind");
-  if (!ImmOp.isImm()) {
-    if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
-                                SrcRegOp.getReg(), Is32BitImm, IDLoc,
-                                Instructions))
-      return true;
-
-    return false;
+bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg,
+                                      const MCOperand &Offset,
+                                      bool Is32BitAddress, SMLoc IDLoc,
+                                      SmallVectorImpl<MCInst> &Instructions) {
+  // la can't produce a usable address when addresses are 64-bit.
+  if (Is32BitAddress && ABI.ArePtrs64bit()) {
+    // FIXME: Demote this to a warning and continue as if we had 'dla' instead.
+    //        We currently can't do this because we depend on the equality
+    //        operator and N64 can end up with a GPR32/GPR64 mismatch.
+    Error(IDLoc, "la used to load 64-bit address");
+    // Continue as if we had 'dla' instead.
+    Is32BitAddress = false;
   }
 
-  if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), SrcRegOp.getReg(),
-                    Is32BitImm, IDLoc, Instructions))
+  // dla requires 64-bit addresses.
+  if (!Is32BitAddress && !ABI.ArePtrs64bit()) {
+    Error(IDLoc, "instruction requires a 64-bit architecture");
     return true;
-
-  return false;
-}
-
-bool
-MipsAsmParser::expandLoadAddressImm(MCInst &Inst, bool Is32BitImm, SMLoc IDLoc,
-                                    SmallVectorImpl<MCInst> &Instructions) {
-  const MCOperand &DstRegOp = Inst.getOperand(0);
-  assert(DstRegOp.isReg() && "expected register operand kind");
-
-  const MCOperand &ImmOp = Inst.getOperand(1);
-  assert((ImmOp.isImm() || ImmOp.isExpr()) &&
-         "expected immediate operand kind");
-  if (!ImmOp.isImm()) {
-    if (loadAndAddSymbolAddress(ImmOp.getExpr(), DstRegOp.getReg(),
-                                Mips::NoRegister, Is32BitImm, IDLoc,
-                                Instructions))
-      return true;
-
-    return false;
   }
 
-  if (loadImmediate(ImmOp.getImm(), DstRegOp.getReg(), Mips::NoRegister,
-                    Is32BitImm, IDLoc, Instructions))
-    return true;
+  if (!Offset.isImm())
+    return loadAndAddSymbolAddress(Offset.getExpr(), DstReg, BaseReg,
+                                   Is32BitAddress, IDLoc, Instructions);
 
-  return false;
+  return loadImmediate(Offset.getImm(), DstReg, BaseReg, Is32BitAddress, true,
+                       IDLoc, Instructions);
 }
 
 bool MipsAsmParser::loadAndAddSymbolAddress(
@@ -2046,67 +2362,102 @@ bool MipsAsmParser::loadAndAddSymbolAddress(
     SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) {
   warnIfNoMacro(IDLoc);
 
-  if (Is32BitSym && isABI_N64())
-    Warning(IDLoc, "instruction loads the 32-bit address of a 64-bit symbol");
-
-  MCInst tmpInst;
-  const MCSymbolRefExpr *Symbol = cast<MCSymbolRefExpr>(SymExpr);
-  const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::create(
-      &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_HI, getContext());
-  const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::create(
-      &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_ABS_LO, getContext());
+  const MCExpr *Symbol = cast<MCExpr>(SymExpr);
+  const MipsMCExpr *HiExpr = MipsMCExpr::create(
+      MCSymbolRefExpr::VK_Mips_ABS_HI, Symbol, getContext());
+  const MipsMCExpr *LoExpr = MipsMCExpr::create(
+      MCSymbolRefExpr::VK_Mips_ABS_LO, Symbol, getContext());
 
   bool UseSrcReg = SrcReg != Mips::NoRegister;
 
+  // This is the 64-bit symbol address expansion.
+  if (ABI.ArePtrs64bit() && isGP64bit()) {
+    // We always need AT for the 64-bit expansion.
+    // If it is not available we exit.
+    unsigned ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+
+    const MipsMCExpr *HighestExpr = MipsMCExpr::create(
+        MCSymbolRefExpr::VK_Mips_HIGHEST, Symbol, getContext());
+    const MipsMCExpr *HigherExpr = MipsMCExpr::create(
+        MCSymbolRefExpr::VK_Mips_HIGHER, Symbol, getContext());
+
+    if (UseSrcReg && (DstReg == SrcReg)) {
+      // If $rs is the same as $rd:
+      // (d)la $rd, sym($rd) => lui    $at, %highest(sym)
+      //                        daddiu $at, $at, %higher(sym)
+      //                        dsll   $at, $at, 16
+      //                        daddiu $at, $at, %hi(sym)
+      //                        dsll   $at, $at, 16
+      //                        daddiu $at, $at, %lo(sym)
+      //                        daddu  $rd, $at, $rd
+      emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HighestExpr), IDLoc,
+             Instructions);
+      emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HigherExpr),
+              IDLoc, Instructions);
+      emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+      emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+              Instructions);
+      emitRRI(Mips::DSLL, ATReg, ATReg, 16, IDLoc, Instructions);
+      emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+              Instructions);
+      emitRRR(Mips::DADDu, DstReg, ATReg, SrcReg, IDLoc, Instructions);
+
+      return false;
+    }
+
+    // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+    // (d)la $rd, sym/sym($rs) => lui    $rd, %highest(sym)
+    //                            lui    $at, %hi(sym)
+    //                            daddiu $rd, $rd, %higher(sym)
+    //                            daddiu $at, $at, %lo(sym)
+    //                            dsll32 $rd, $rd, 0
+    //                            daddu  $rd, $rd, $at
+    //                            (daddu  $rd, $rd, $rs)
+    emitRX(Mips::LUi, DstReg, MCOperand::createExpr(HighestExpr), IDLoc,
+           Instructions);
+    emitRX(Mips::LUi, ATReg, MCOperand::createExpr(HiExpr), IDLoc,
+           Instructions);
+    emitRRX(Mips::DADDiu, DstReg, DstReg, MCOperand::createExpr(HigherExpr),
+            IDLoc, Instructions);
+    emitRRX(Mips::DADDiu, ATReg, ATReg, MCOperand::createExpr(LoExpr), IDLoc,
+            Instructions);
+    emitRRI(Mips::DSLL32, DstReg, DstReg, 0, IDLoc, Instructions);
+    emitRRR(Mips::DADDu, DstReg, DstReg, ATReg, IDLoc, Instructions);
+    if (UseSrcReg)
+      emitRRR(Mips::DADDu, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+
+    return false;
+  }
+
+  // And now, the 32-bit symbol address expansion:
+  // If $rs is the same as $rd:
+  // (d)la $rd, sym($rd)     => lui   $at, %hi(sym)
+  //                            ori   $at, $at, %lo(sym)
+  //                            addu  $rd, $at, $rd
+  // Otherwise, if the $rs is different from $rd or if $rs isn't specified:
+  // (d)la $rd, sym/sym($rs) => lui   $rd, %hi(sym)
+  //                            ori   $rd, $rd, %lo(sym)
+  //                            (addu $rd, $rd, $rs)
   unsigned TmpReg = DstReg;
   if (UseSrcReg && (DstReg == SrcReg)) {
-    // At this point we need AT to perform the expansions and we exit if it is
-    // not available.
+    // If $rs is the same as $rd, we need to use AT.
+    // If it is not available we exit.
     unsigned ATReg = getATReg(IDLoc);
     if (!ATReg)
       return true;
     TmpReg = ATReg;
   }
 
-  if (!Is32BitSym) {
-    // If it's a 64-bit architecture, expand to:
-    // la d,sym => lui  d,highest(sym)
-    //             ori  d,d,higher(sym)
-    //             dsll d,d,16
-    //             ori  d,d,hi16(sym)
-    //             dsll d,d,16
-    //             ori  d,d,lo16(sym)
-    const MCSymbolRefExpr *HighestExpr = MCSymbolRefExpr::create(
-        &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHEST, getContext());
-    const MCSymbolRefExpr *HigherExpr = MCSymbolRefExpr::create(
-        &Symbol->getSymbol(), MCSymbolRefExpr::VK_Mips_HIGHER, getContext());
-
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(TmpReg));
-    tmpInst.addOperand(MCOperand::createExpr(HighestExpr));
-    Instructions.push_back(tmpInst);
-
-    createLShiftOri<0>(MCOperand::createExpr(HigherExpr), TmpReg, SMLoc(),
-                       Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(HiExpr), TmpReg, SMLoc(),
-                        Instructions);
-    createLShiftOri<16>(MCOperand::createExpr(LoExpr), TmpReg, SMLoc(),
-                        Instructions);
-  } else {
-    // Otherwise, expand to:
-    // la d,sym => lui  d,hi16(sym)
-    //             ori  d,d,lo16(sym)
-    tmpInst.setOpcode(Mips::LUi);
-    tmpInst.addOperand(MCOperand::createReg(TmpReg));
-    tmpInst.addOperand(MCOperand::createExpr(HiExpr));
-    Instructions.push_back(tmpInst);
-
-    emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), SMLoc(),
-            Instructions);
-  }
+  emitRX(Mips::LUi, TmpReg, MCOperand::createExpr(HiExpr), IDLoc, Instructions);
+  emitRRX(Mips::ADDiu, TmpReg, TmpReg, MCOperand::createExpr(LoExpr), IDLoc,
+          Instructions);
 
   if (UseSrcReg)
-    createAddu(DstReg, TmpReg, SrcReg, !Is32BitSym, Instructions);
+    emitRRR(Mips::ADDu, DstReg, TmpReg, SrcReg, IDLoc, Instructions);
+  else
+    assert(DstReg == TmpReg);
 
   return false;
 }
@@ -2125,12 +2476,13 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
     Inst.addOperand(MCOperand::createExpr(Offset.getExpr()));
   } else {
     assert(Offset.isImm() && "expected immediate operand kind");
-    if (isIntN(11, Offset.getImm())) {
+    if (isInt<11>(Offset.getImm())) {
       // If offset fits into 11 bits then this instruction becomes microMIPS
       // 16-bit unconditional branch instruction.
-      Inst.setOpcode(Mips::B16_MM);
+      if (inMicroMipsMode())
+        Inst.setOpcode(hasMips32r6() ? Mips::BC16_MMR6 : Mips::B16_MM);
     } else {
-      if (!isIntN(17, Offset.getImm()))
+      if (!isInt<17>(Offset.getImm()))
         Error(IDLoc, "branch target out of range");
       if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
         Error(IDLoc, "branch to misaligned address");
@@ -2143,8 +2495,10 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(
   }
   Instructions.push_back(Inst);
 
-  // If .set reorder is active, emit a NOP after the branch instruction.
-  if (AssemblerOptions.back()->isReorder())
+  // If .set reorder is active and branch instruction has a delay slot,
+  // emit a NOP after it.
+  const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode());
+  if (MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder())
     createNop(true, IDLoc, Instructions);
 
   return false;
@@ -2175,30 +2529,21 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
   }
 
   int64_t ImmValue = ImmOp.getImm();
-  if (ImmValue == 0) {
-    MCInst BranchInst;
-    BranchInst.setOpcode(OpCode);
-    BranchInst.addOperand(DstRegOp);
-    BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    BranchInst.addOperand(MemOffsetOp);
-    Instructions.push_back(BranchInst);
-  } else {
+  if (ImmValue == 0)
+    emitRRX(OpCode, DstRegOp.getReg(), Mips::ZERO, MemOffsetOp, IDLoc,
+            Instructions);
+  else {
     warnIfNoMacro(IDLoc);
 
     unsigned ATReg = getATReg(IDLoc);
     if (!ATReg)
       return true;
 
-    if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), IDLoc,
-                      Instructions))
+    if (loadImmediate(ImmValue, ATReg, Mips::NoRegister, !isGP64bit(), true,
+                      IDLoc, Instructions))
       return true;
 
-    MCInst BranchInst;
-    BranchInst.setOpcode(OpCode);
-    BranchInst.addOperand(DstRegOp);
-    BranchInst.addOperand(MCOperand::createReg(ATReg));
-    BranchInst.addOperand(MemOffsetOp);
-    Instructions.push_back(BranchInst);
+    emitRRX(OpCode, DstRegOp.getReg(), ATReg, MemOffsetOp, IDLoc, Instructions);
   }
   return false;
 }
@@ -2206,7 +2551,6 @@ bool MipsAsmParser::expandBranchImm(MCInst &Inst, SMLoc IDLoc,
 void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
                                   SmallVectorImpl<MCInst> &Instructions,
                                   bool isLoad, bool isImmOpnd) {
-  MCInst TempInst;
   unsigned ImmOffset, HiOffset, LoOffset;
   const MCExpr *ExprOffset;
   unsigned TmpRegNum;
@@ -2227,8 +2571,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
       HiOffset++;
   } else
     ExprOffset = Inst.getOperand(2).getExpr();
-  // All instructions will have the same location.
-  TempInst.setLoc(IDLoc);
   // These are some of the types of expansions we perform here:
   // 1) lw $8, sym        => lui $8, %hi(sym)
   //                         lw $8, %lo(sym)($8)
@@ -2267,40 +2609,20 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc,
       return;
   }
 
-  TempInst.setOpcode(Mips::LUi);
-  TempInst.addOperand(MCOperand::createReg(TmpRegNum));
-  if (isImmOpnd)
-    TempInst.addOperand(MCOperand::createImm(HiOffset));
-  else {
-    const MCExpr *HiExpr = evaluateRelocExpr(ExprOffset, "hi");
-    TempInst.addOperand(MCOperand::createExpr(HiExpr));
-  }
-  // Add the instruction to the list.
-  Instructions.push_back(TempInst);
-  // Prepare TempInst for next instruction.
-  TempInst.clear();
+  emitRX(Mips::LUi, TmpRegNum,
+         isImmOpnd ? MCOperand::createImm(HiOffset)
+                   : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "hi")),
+         IDLoc, Instructions);
   // Add temp register to base.
-  if (BaseRegNum != Mips::ZERO) {
-    TempInst.setOpcode(Mips::ADDu);
-    TempInst.addOperand(MCOperand::createReg(TmpRegNum));
-    TempInst.addOperand(MCOperand::createReg(TmpRegNum));
-    TempInst.addOperand(MCOperand::createReg(BaseRegNum));
-    Instructions.push_back(TempInst);
-    TempInst.clear();
-  }
+  if (BaseRegNum != Mips::ZERO)
+    emitRRR(Mips::ADDu, TmpRegNum, TmpRegNum, BaseRegNum, IDLoc, Instructions);
   // And finally, create original instruction with low part
   // of offset and new base.
-  TempInst.setOpcode(Inst.getOpcode());
-  TempInst.addOperand(MCOperand::createReg(RegOpNum));
-  TempInst.addOperand(MCOperand::createReg(TmpRegNum));
-  if (isImmOpnd)
-    TempInst.addOperand(MCOperand::createImm(LoOffset));
-  else {
-    const MCExpr *LoExpr = evaluateRelocExpr(ExprOffset, "lo");
-    TempInst.addOperand(MCOperand::createExpr(LoExpr));
-  }
-  Instructions.push_back(TempInst);
-  TempInst.clear();
+  emitRRX(Inst.getOpcode(), RegOpNum, TmpRegNum,
+          isImmOpnd
+              ? MCOperand::createImm(LoOffset)
+              : MCOperand::createExpr(evaluateRelocExpr(ExprOffset, "lo")),
+          IDLoc, Instructions);
 }
 
 bool
@@ -2316,10 +2638,16 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
 
   if (OpNum < 8 && Inst.getOperand(OpNum - 1).getImm() <= 60 &&
       Inst.getOperand(OpNum - 1).getImm() >= 0 &&
-      Inst.getOperand(OpNum - 2).getReg() == Mips::SP &&
-      Inst.getOperand(OpNum - 3).getReg() == Mips::RA)
+      (Inst.getOperand(OpNum - 2).getReg() == Mips::SP ||
+       Inst.getOperand(OpNum - 2).getReg() == Mips::SP_64) &&
+      (Inst.getOperand(OpNum - 3).getReg() == Mips::RA ||
+       Inst.getOperand(OpNum - 3).getReg() == Mips::RA_64)) {
     // It can be implemented as SWM16 or LWM16 instruction.
-    NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+    if (inMicroMipsMode() && hasMips32r6())
+      NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MMR6 : Mips::LWM16_MMR6;
+    else
+      NewOpcode = Opcode == Mips::SWM_MM ? Mips::SWM16_MM : Mips::LWM16_MM;
+  }
 
   Inst.setOpcode(NewOpcode);
   Instructions.push_back(Inst);
@@ -2328,44 +2656,126 @@ MipsAsmParser::expandLoadStoreMultiple(MCInst &Inst, SMLoc IDLoc,
 
 bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
                                        SmallVectorImpl<MCInst> &Instructions) {
+  bool EmittedNoMacroWarning = false;
   unsigned PseudoOpcode = Inst.getOpcode();
   unsigned SrcReg = Inst.getOperand(0).getReg();
-  unsigned TrgReg = Inst.getOperand(1).getReg();
+  const MCOperand &TrgOp = Inst.getOperand(1);
   const MCExpr *OffsetExpr = Inst.getOperand(2).getExpr();
 
   unsigned ZeroSrcOpcode, ZeroTrgOpcode;
-  bool ReverseOrderSLT, IsUnsigned, AcceptsEquality;
+  bool ReverseOrderSLT, IsUnsigned, IsLikely, AcceptsEquality;
+
+  unsigned TrgReg;
+  if (TrgOp.isReg())
+    TrgReg = TrgOp.getReg();
+  else if (TrgOp.isImm()) {
+    warnIfNoMacro(IDLoc);
+    EmittedNoMacroWarning = true;
+
+    TrgReg = getATReg(IDLoc);
+    if (!TrgReg)
+      return true;
+
+    switch(PseudoOpcode) {
+    default:
+      llvm_unreachable("unknown opcode for branch pseudo-instruction");
+    case Mips::BLTImmMacro:
+      PseudoOpcode = Mips::BLT;
+      break;
+    case Mips::BLEImmMacro:
+      PseudoOpcode = Mips::BLE;
+      break;
+    case Mips::BGEImmMacro:
+      PseudoOpcode = Mips::BGE;
+      break;
+    case Mips::BGTImmMacro:
+      PseudoOpcode = Mips::BGT;
+      break;
+    case Mips::BLTUImmMacro:
+      PseudoOpcode = Mips::BLTU;
+      break;
+    case Mips::BLEUImmMacro:
+      PseudoOpcode = Mips::BLEU;
+      break;
+    case Mips::BGEUImmMacro:
+      PseudoOpcode = Mips::BGEU;
+      break;
+    case Mips::BGTUImmMacro:
+      PseudoOpcode = Mips::BGTU;
+      break;
+    case Mips::BLTLImmMacro:
+      PseudoOpcode = Mips::BLTL;
+      break;
+    case Mips::BLELImmMacro:
+      PseudoOpcode = Mips::BLEL;
+      break;
+    case Mips::BGELImmMacro:
+      PseudoOpcode = Mips::BGEL;
+      break;
+    case Mips::BGTLImmMacro:
+      PseudoOpcode = Mips::BGTL;
+      break;
+    case Mips::BLTULImmMacro:
+      PseudoOpcode = Mips::BLTUL;
+      break;
+    case Mips::BLEULImmMacro:
+      PseudoOpcode = Mips::BLEUL;
+      break;
+    case Mips::BGEULImmMacro:
+      PseudoOpcode = Mips::BGEUL;
+      break;
+    case Mips::BGTULImmMacro:
+      PseudoOpcode = Mips::BGTUL;
+      break;
+    }
+
+    if (loadImmediate(TrgOp.getImm(), TrgReg, Mips::NoRegister, !isGP64bit(),
+                      false, IDLoc, Instructions))
+      return true;
+  }
 
   switch (PseudoOpcode) {
   case Mips::BLT:
   case Mips::BLTU:
+  case Mips::BLTL:
+  case Mips::BLTUL:
     AcceptsEquality = false;
     ReverseOrderSLT = false;
-    IsUnsigned = (PseudoOpcode == Mips::BLTU);
+    IsUnsigned = ((PseudoOpcode == Mips::BLTU) || (PseudoOpcode == Mips::BLTUL));
+    IsLikely = ((PseudoOpcode == Mips::BLTL) || (PseudoOpcode == Mips::BLTUL));
     ZeroSrcOpcode = Mips::BGTZ;
     ZeroTrgOpcode = Mips::BLTZ;
     break;
   case Mips::BLE:
   case Mips::BLEU:
+  case Mips::BLEL:
+  case Mips::BLEUL:
     AcceptsEquality = true;
     ReverseOrderSLT = true;
-    IsUnsigned = (PseudoOpcode == Mips::BLEU);
+    IsUnsigned = ((PseudoOpcode == Mips::BLEU) || (PseudoOpcode == Mips::BLEUL));
+    IsLikely = ((PseudoOpcode == Mips::BLEL) || (PseudoOpcode == Mips::BLEUL));
     ZeroSrcOpcode = Mips::BGEZ;
     ZeroTrgOpcode = Mips::BLEZ;
     break;
   case Mips::BGE:
   case Mips::BGEU:
+  case Mips::BGEL:
+  case Mips::BGEUL:
     AcceptsEquality = true;
     ReverseOrderSLT = false;
-    IsUnsigned = (PseudoOpcode == Mips::BGEU);
+    IsUnsigned = ((PseudoOpcode == Mips::BGEU) || (PseudoOpcode == Mips::BGEUL));
+    IsLikely = ((PseudoOpcode == Mips::BGEL) || (PseudoOpcode == Mips::BGEUL));
     ZeroSrcOpcode = Mips::BLEZ;
     ZeroTrgOpcode = Mips::BGEZ;
     break;
   case Mips::BGT:
   case Mips::BGTU:
+  case Mips::BGTL:
+  case Mips::BGTUL:
     AcceptsEquality = false;
     ReverseOrderSLT = true;
-    IsUnsigned = (PseudoOpcode == Mips::BGTU);
+    IsUnsigned = ((PseudoOpcode == Mips::BGTU) || (PseudoOpcode == Mips::BGTUL));
+    IsLikely = ((PseudoOpcode == Mips::BGTL) || (PseudoOpcode == Mips::BGTUL));
     ZeroSrcOpcode = Mips::BLTZ;
     ZeroTrgOpcode = Mips::BGTZ;
     break;
@@ -2373,7 +2783,6 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
     llvm_unreachable("unknown opcode for branch pseudo-instruction");
   }
 
-  MCInst BranchInst;
   bool IsTrgRegZero = (TrgReg == Mips::ZERO);
   bool IsSrcRegZero = (SrcReg == Mips::ZERO);
   if (IsSrcRegZero && IsTrgRegZero) {
@@ -2381,51 +2790,37 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
     // with GAS' behaviour. However, they may not generate the most efficient
     // code in some circumstances.
     if (PseudoOpcode == Mips::BLT) {
-      BranchInst.setOpcode(Mips::BLTZ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRX(Mips::BLTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+             Instructions);
       return false;
     }
     if (PseudoOpcode == Mips::BLE) {
-      BranchInst.setOpcode(Mips::BLEZ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRX(Mips::BLEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+             Instructions);
       Warning(IDLoc, "branch is always taken");
       return false;
     }
     if (PseudoOpcode == Mips::BGE) {
-      BranchInst.setOpcode(Mips::BGEZ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRX(Mips::BGEZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+             Instructions);
       Warning(IDLoc, "branch is always taken");
       return false;
     }
     if (PseudoOpcode == Mips::BGT) {
-      BranchInst.setOpcode(Mips::BGTZ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRX(Mips::BGTZ, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+             Instructions);
       return false;
     }
     if (PseudoOpcode == Mips::BGTU) {
-      BranchInst.setOpcode(Mips::BNE);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRRX(Mips::BNE, Mips::ZERO, Mips::ZERO,
+              MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
       return false;
     }
     if (AcceptsEquality) {
       // If both registers are $0 and the pseudo-branch accepts equality, it
       // will always be taken, so we emit an unconditional branch.
-      BranchInst.setOpcode(Mips::BEQ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+              MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
       Warning(IDLoc, "branch is always taken");
       return false;
     }
@@ -2449,11 +2844,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
       // the pseudo-branch will always be taken, so we emit an unconditional
       // branch.
       // This only applies to unsigned pseudo-branches.
-      BranchInst.setOpcode(Mips::BEQ);
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRRX(Mips::BEQ, Mips::ZERO, Mips::ZERO,
+              MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
       Warning(IDLoc, "branch is always taken");
       return false;
     }
@@ -2470,21 +2862,17 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
       //
       // Because only BLEU and BGEU branch on equality, we can use the
       // AcceptsEquality variable to decide when to emit the BEQZ.
-      BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
-      BranchInst.addOperand(
-          MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
-      BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-      BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-      Instructions.push_back(BranchInst);
+      emitRRX(AcceptsEquality ? Mips::BEQ : Mips::BNE,
+              IsSrcRegZero ? TrgReg : SrcReg, Mips::ZERO,
+              MCOperand::createExpr(OffsetExpr), IDLoc, Instructions);
       return false;
     }
     // If we have a signed pseudo-branch and one of the registers is $0,
     // we can use an appropriate compare-to-zero branch. We select which one
     // to use in the switch statement above.
-    BranchInst.setOpcode(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode);
-    BranchInst.addOperand(MCOperand::createReg(IsSrcRegZero ? TrgReg : SrcReg));
-    BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-    Instructions.push_back(BranchInst);
+    emitRX(IsSrcRegZero ? ZeroSrcOpcode : ZeroTrgOpcode,
+           IsSrcRegZero ? TrgReg : SrcReg, MCOperand::createExpr(OffsetExpr),
+           IDLoc, Instructions);
     return false;
   }
 
@@ -2494,7 +2882,8 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
   if (!ATRegNum)
     return true;
 
-  warnIfNoMacro(IDLoc);
+  if (!EmittedNoMacroWarning)
+    warnIfNoMacro(IDLoc);
 
   // SLT fits well with 2 of our 4 pseudo-branches:
   //   BLT, where $rs < $rt, translates into "slt $at, $rs, $rt" and
@@ -2511,23 +2900,135 @@ bool MipsAsmParser::expandCondBranches(MCInst &Inst, SMLoc IDLoc,
   //
   // The same applies to the unsigned variants, except that SLTu is used
   // instead of SLT.
-  MCInst SetInst;
-  SetInst.setOpcode(IsUnsigned ? Mips::SLTu : Mips::SLT);
-  SetInst.addOperand(MCOperand::createReg(ATRegNum));
-  SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? TrgReg : SrcReg));
-  SetInst.addOperand(MCOperand::createReg(ReverseOrderSLT ? SrcReg : TrgReg));
-  Instructions.push_back(SetInst);
+  emitRRR(IsUnsigned ? Mips::SLTu : Mips::SLT, ATRegNum,
+          ReverseOrderSLT ? TrgReg : SrcReg, ReverseOrderSLT ? SrcReg : TrgReg,
+          IDLoc, Instructions);
 
-  BranchInst.setOpcode(AcceptsEquality ? Mips::BEQ : Mips::BNE);
-  BranchInst.addOperand(MCOperand::createReg(ATRegNum));
-  BranchInst.addOperand(MCOperand::createReg(Mips::ZERO));
-  BranchInst.addOperand(MCOperand::createExpr(OffsetExpr));
-  Instructions.push_back(BranchInst);
+  emitRRX(IsLikely ? (AcceptsEquality ? Mips::BEQL : Mips::BNEL)
+                   : (AcceptsEquality ? Mips::BEQ : Mips::BNE),
+          ATRegNum, Mips::ZERO, MCOperand::createExpr(OffsetExpr), IDLoc,
+          Instructions);
   return false;
 }
 
-bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
-                               SmallVectorImpl<MCInst> &Instructions) {
+bool MipsAsmParser::expandDiv(MCInst &Inst, SMLoc IDLoc,
+                              SmallVectorImpl<MCInst> &Instructions,
+                              const bool IsMips64, const bool Signed) {
+  if (hasMips32r6()) {
+    Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+    return false;
+  }
+
+  warnIfNoMacro(IDLoc);
+
+  const MCOperand &RsRegOp = Inst.getOperand(0);
+  assert(RsRegOp.isReg() && "expected register operand kind");
+  unsigned RsReg = RsRegOp.getReg();
+
+  const MCOperand &RtRegOp = Inst.getOperand(1);
+  assert(RtRegOp.isReg() && "expected register operand kind");
+  unsigned RtReg = RtRegOp.getReg();
+  unsigned DivOp;
+  unsigned ZeroReg;
+
+  if (IsMips64) {
+    DivOp = Signed ? Mips::DSDIV : Mips::DUDIV;
+    ZeroReg = Mips::ZERO_64;
+  } else {
+    DivOp = Signed ? Mips::SDIV : Mips::UDIV;
+    ZeroReg = Mips::ZERO;
+  }
+
+  bool UseTraps = useTraps();
+
+  if (RsReg == Mips::ZERO || RsReg == Mips::ZERO_64) {
+    if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)
+      Warning(IDLoc, "dividing zero by zero");
+    if (IsMips64) {
+      if (Signed && (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64)) {
+        if (UseTraps) {
+          emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+          return false;
+        }
+
+        emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+        return false;
+      }
+    } else {
+      emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+      return false;
+    }
+  }
+
+  if (RtReg == Mips::ZERO || RtReg == Mips::ZERO_64) {
+    Warning(IDLoc, "division by zero");
+    if (Signed) {
+      if (UseTraps) {
+        emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+        return false;
+      }
+
+      emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+      return false;
+    }
+  }
+
+  // FIXME: The values for these two BranchTarget variables may be different in
+  // micromips. These magic numbers need to be removed.
+  unsigned BranchTargetNoTraps;
+  unsigned BranchTarget;
+
+  if (UseTraps) {
+    BranchTarget = IsMips64 ? 12 : 8;
+    emitRRI(Mips::TEQ, RtReg, ZeroReg, 0x7, IDLoc, Instructions);
+  } else {
+    BranchTarget = IsMips64 ? 20 : 16;
+    BranchTargetNoTraps = 8;
+    // Branch to the li instruction.
+    emitRRI(Mips::BNE, RtReg, ZeroReg, BranchTargetNoTraps, IDLoc,
+            Instructions);
+  }
+
+  emitRR(DivOp, RsReg, RtReg, IDLoc, Instructions);
+
+  if (!UseTraps)
+    emitII(Mips::BREAK, 0x7, 0, IDLoc, Instructions);
+
+  if (!Signed) {
+    emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+    return false;
+  }
+
+  unsigned ATReg = getATReg(IDLoc);
+  if (!ATReg)
+    return true;
+
+  emitRRI(Mips::ADDiu, ATReg, ZeroReg, -1, IDLoc, Instructions);
+  if (IsMips64) {
+    // Branch to the mflo instruction.
+    emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+    emitRRI(Mips::ADDiu, ATReg, ZeroReg, 1, IDLoc, Instructions);
+    emitRRI(Mips::DSLL32, ATReg, ATReg, 0x1f, IDLoc, Instructions);
+  } else {
+    // Branch to the mflo instruction.
+    emitRRI(Mips::BNE, RtReg, ATReg, BranchTarget, IDLoc, Instructions);
+    emitRI(Mips::LUi, ATReg, (uint16_t)0x8000, IDLoc, Instructions);
+  }
+
+  if (UseTraps)
+    emitRRI(Mips::TEQ, RsReg, ATReg, 0x6, IDLoc, Instructions);
+  else {
+    // Branch to the mflo instruction.
+    emitRRI(Mips::BNE, RsReg, ATReg, BranchTargetNoTraps, IDLoc, Instructions);
+    emitRRI(Mips::SLL, ZeroReg, ZeroReg, 0, IDLoc, Instructions);
+    emitII(Mips::BREAK, 0x6, 0, IDLoc, Instructions);
+  }
+  emitR(Mips::MFLO, RsReg, IDLoc, Instructions);
+  return false;
+}
+
+bool MipsAsmParser::expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
+                              SmallVectorImpl<MCInst> &Instructions) {
   if (hasMips32r6() || hasMips64r6()) {
     Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
     return false;
@@ -2562,7 +3063,7 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
     LoadedOffsetInAT = true;
 
     if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
-                      IDLoc, Instructions))
+                      true, IDLoc, Instructions))
       return true;
 
     // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2590,33 +3091,15 @@ bool MipsAsmParser::expandUlhu(MCInst &Inst, SMLoc IDLoc,
 
   unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
 
-  MCInst TmpInst;
-  TmpInst.setOpcode(Mips::LBu);
-  TmpInst.addOperand(MCOperand::createReg(FirstLbuDstReg));
-  TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
-  TmpInst.addOperand(MCOperand::createImm(FirstLbuOffset));
-  Instructions.push_back(TmpInst);
+  emitRRI(Signed ? Mips::LB : Mips::LBu, FirstLbuDstReg, LbuSrcReg,
+          FirstLbuOffset, IDLoc, Instructions);
 
-  TmpInst.clear();
-  TmpInst.setOpcode(Mips::LBu);
-  TmpInst.addOperand(MCOperand::createReg(SecondLbuDstReg));
-  TmpInst.addOperand(MCOperand::createReg(LbuSrcReg));
-  TmpInst.addOperand(MCOperand::createImm(SecondLbuOffset));
-  Instructions.push_back(TmpInst);
+  emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondLbuOffset, IDLoc,
+          Instructions);
 
-  TmpInst.clear();
-  TmpInst.setOpcode(Mips::SLL);
-  TmpInst.addOperand(MCOperand::createReg(SllReg));
-  TmpInst.addOperand(MCOperand::createReg(SllReg));
-  TmpInst.addOperand(MCOperand::createImm(8));
-  Instructions.push_back(TmpInst);
+  emitRRI(Mips::SLL, SllReg, SllReg, 8, IDLoc, Instructions);
 
-  TmpInst.clear();
-  TmpInst.setOpcode(Mips::OR);
-  TmpInst.addOperand(MCOperand::createReg(DstReg));
-  TmpInst.addOperand(MCOperand::createReg(DstReg));
-  TmpInst.addOperand(MCOperand::createReg(ATReg));
-  Instructions.push_back(TmpInst);
+  emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, Instructions);
 
   return false;
 }
@@ -2654,7 +3137,7 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
     warnIfNoMacro(IDLoc);
 
     if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
-                      IDLoc, Instructions))
+                      true, IDLoc, Instructions))
       return true;
 
     // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
@@ -2677,37 +3160,373 @@ bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc,
     RightLoadOffset  = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
   }
 
-  MCInst LeftLoadInst;
-  LeftLoadInst.setOpcode(Mips::LWL);
-  LeftLoadInst.addOperand(DstRegOp);
-  LeftLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
-  LeftLoadInst.addOperand(MCOperand::createImm(LeftLoadOffset));
-  Instructions.push_back(LeftLoadInst);
+  emitRRI(Mips::LWL, DstRegOp.getReg(), FinalSrcReg, LeftLoadOffset, IDLoc,
+          Instructions);
 
-  MCInst RightLoadInst;
-  RightLoadInst.setOpcode(Mips::LWR);
-  RightLoadInst.addOperand(DstRegOp);
-  RightLoadInst.addOperand(MCOperand::createReg(FinalSrcReg));
-  RightLoadInst.addOperand(MCOperand::createImm(RightLoadOffset ));
-  Instructions.push_back(RightLoadInst);
+  emitRRI(Mips::LWR, DstRegOp.getReg(), FinalSrcReg, RightLoadOffset, IDLoc,
+          Instructions);
 
   return false;
 }
 
+bool MipsAsmParser::expandAliasImmediate(MCInst &Inst, SMLoc IDLoc,
+                                         SmallVectorImpl<MCInst> &Instructions) {
+
+  assert (Inst.getNumOperands() == 3 && "Invalid operand count");
+  assert (Inst.getOperand(0).isReg() &&
+          Inst.getOperand(1).isReg() &&
+          Inst.getOperand(2).isImm() && "Invalid instruction operand.");
+
+  unsigned ATReg = Mips::NoRegister;
+  unsigned FinalDstReg = Mips::NoRegister;
+  unsigned DstReg = Inst.getOperand(0).getReg();
+  unsigned SrcReg = Inst.getOperand(1).getReg();
+  int64_t ImmValue = Inst.getOperand(2).getImm();
+
+  bool Is32Bit = isInt<32>(ImmValue) || isUInt<32>(ImmValue);
+
+  unsigned FinalOpcode = Inst.getOpcode();
+
+  if (DstReg == SrcReg) {
+    ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+    FinalDstReg = DstReg;
+    DstReg = ATReg;
+  }
+
+  if (!loadImmediate(ImmValue, DstReg, Mips::NoRegister, Is32Bit, false, Inst.getLoc(), Instructions)) {
+    switch (FinalOpcode) {
+    default:
+      llvm_unreachable("unimplemented expansion");
+    case (Mips::ADDi):
+      FinalOpcode = Mips::ADD;
+      break;
+    case (Mips::ADDiu):
+      FinalOpcode = Mips::ADDu;
+      break;
+    case (Mips::ANDi):
+      FinalOpcode = Mips::AND;
+      break;
+    case (Mips::NORImm):
+      FinalOpcode = Mips::NOR;
+      break;
+    case (Mips::ORi):
+      FinalOpcode = Mips::OR;
+      break;
+    case (Mips::SLTi):
+      FinalOpcode = Mips::SLT;
+      break;
+    case (Mips::SLTiu):
+      FinalOpcode = Mips::SLTu;
+      break;
+    case (Mips::XORi):
+      FinalOpcode = Mips::XOR;
+      break;
+    }
+
+    if (FinalDstReg == Mips::NoRegister)
+      emitRRR(FinalOpcode, DstReg, DstReg, SrcReg, IDLoc, Instructions);
+    else
+      emitRRR(FinalOpcode, FinalDstReg, FinalDstReg, DstReg, IDLoc,
+              Instructions);
+    return false;
+  }
+  return true;
+}
+
+bool MipsAsmParser::expandRotation(MCInst &Inst, SMLoc IDLoc,
+                                   SmallVectorImpl<MCInst> &Instructions) {
+  unsigned ATReg = Mips::NoRegister;
+  unsigned DReg = Inst.getOperand(0).getReg();
+  unsigned SReg = Inst.getOperand(1).getReg();
+  unsigned TReg = Inst.getOperand(2).getReg();
+  unsigned TmpReg = DReg;
+
+  unsigned FirstShift = Mips::NOP;
+  unsigned SecondShift = Mips::NOP;
+
+  if (hasMips32r2()) {
+
+    if (DReg == SReg) {
+      TmpReg = getATReg(Inst.getLoc());
+      if (!TmpReg)
+        return true;
+    }
+
+    if (Inst.getOpcode() == Mips::ROL) {
+      emitRRR(Mips::SUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+      emitRRR(Mips::ROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    if (Inst.getOpcode() == Mips::ROR) {
+      emitRRR(Mips::ROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    return true;
+  }
+
+  if (hasMips32()) {
+
+    switch (Inst.getOpcode()) {
+    default:
+      llvm_unreachable("unexpected instruction opcode");
+    case Mips::ROL:
+      FirstShift = Mips::SRLV;
+      SecondShift = Mips::SLLV;
+      break;
+    case Mips::ROR:
+      FirstShift = Mips::SLLV;
+      SecondShift = Mips::SRLV;
+      break;
+    }
+
+    ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+
+    emitRRR(Mips::SUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+    emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+    emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+    emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+    return false;
+  }
+
+  return true;
+}
+
+bool MipsAsmParser::expandRotationImm(MCInst &Inst, SMLoc IDLoc,
+                                      SmallVectorImpl<MCInst> &Instructions) {
+
+  unsigned ATReg = Mips::NoRegister;
+  unsigned DReg = Inst.getOperand(0).getReg();
+  unsigned SReg = Inst.getOperand(1).getReg();
+  int64_t ImmValue = Inst.getOperand(2).getImm();
+
+  unsigned FirstShift = Mips::NOP;
+  unsigned SecondShift = Mips::NOP;
+
+  if (hasMips32r2()) {
+
+    if (Inst.getOpcode() == Mips::ROLImm) {
+      uint64_t MaxShift = 32;
+      uint64_t ShiftValue = ImmValue;
+      if (ImmValue != 0)
+        ShiftValue = MaxShift - ImmValue;
+      emitRRI(Mips::ROTR, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    if (Inst.getOpcode() == Mips::RORImm) {
+      emitRRI(Mips::ROTR, DReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    return true;
+  }
+
+  if (hasMips32()) {
+
+    if (ImmValue == 0) {
+      emitRRI(Mips::SRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    switch (Inst.getOpcode()) {
+    default:
+      llvm_unreachable("unexpected instruction opcode");
+    case Mips::ROLImm:
+      FirstShift = Mips::SLL;
+      SecondShift = Mips::SRL;
+      break;
+    case Mips::RORImm:
+      FirstShift = Mips::SRL;
+      SecondShift = Mips::SLL;
+      break;
+    }
+
+    ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+
+    emitRRI(FirstShift, ATReg, SReg, ImmValue, Inst.getLoc(), Instructions);
+    emitRRI(SecondShift, DReg, SReg, 32 - ImmValue, Inst.getLoc(), Instructions);
+    emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+    return false;
+  }
+
+  return true;
+}
+
+bool MipsAsmParser::expandDRotation(MCInst &Inst, SMLoc IDLoc,
+                                    SmallVectorImpl<MCInst> &Instructions) {
+
+  unsigned ATReg = Mips::NoRegister;
+  unsigned DReg = Inst.getOperand(0).getReg();
+  unsigned SReg = Inst.getOperand(1).getReg();
+  unsigned TReg = Inst.getOperand(2).getReg();
+  unsigned TmpReg = DReg;
+
+  unsigned FirstShift = Mips::NOP;
+  unsigned SecondShift = Mips::NOP;
+
+  if (hasMips64r2()) {
+
+    if (TmpReg == SReg) {
+      TmpReg = getATReg(Inst.getLoc());
+      if (!TmpReg)
+        return true;
+    }
+
+    if (Inst.getOpcode() == Mips::DROL) {
+      emitRRR(Mips::DSUBu, TmpReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+      emitRRR(Mips::DROTRV, DReg, SReg, TmpReg, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    if (Inst.getOpcode() == Mips::DROR) {
+      emitRRR(Mips::DROTRV, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    return true;
+  }
+
+  if (hasMips64()) {
+
+    switch (Inst.getOpcode()) {
+    default:
+      llvm_unreachable("unexpected instruction opcode");
+    case Mips::DROL:
+      FirstShift = Mips::DSRLV;
+      SecondShift = Mips::DSLLV;
+      break;
+    case Mips::DROR:
+      FirstShift = Mips::DSLLV;
+      SecondShift = Mips::DSRLV;
+      break;
+    }
+
+    ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+
+    emitRRR(Mips::DSUBu, ATReg, Mips::ZERO, TReg, Inst.getLoc(), Instructions);
+    emitRRR(FirstShift, ATReg, SReg, ATReg, Inst.getLoc(), Instructions);
+    emitRRR(SecondShift, DReg, SReg, TReg, Inst.getLoc(), Instructions);
+    emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+    return false;
+  }
+
+  return true;
+}
+
+bool MipsAsmParser::expandDRotationImm(MCInst &Inst, SMLoc IDLoc,
+                                       SmallVectorImpl<MCInst> &Instructions) {
+
+  unsigned ATReg = Mips::NoRegister;
+  unsigned DReg = Inst.getOperand(0).getReg();
+  unsigned SReg = Inst.getOperand(1).getReg();
+  int64_t ImmValue = Inst.getOperand(2).getImm() % 64;
+
+  unsigned FirstShift = Mips::NOP;
+  unsigned SecondShift = Mips::NOP;
+
+  MCInst TmpInst;
+
+  if (hasMips64r2()) {
+
+    unsigned FinalOpcode = Mips::NOP;
+    if (ImmValue == 0)
+      FinalOpcode = Mips::DROTR;
+    else if (ImmValue % 32 == 0)
+      FinalOpcode = Mips::DROTR32;
+    else if ((ImmValue >= 1) && (ImmValue <= 32)) {
+      if (Inst.getOpcode() == Mips::DROLImm)
+        FinalOpcode = Mips::DROTR32;
+      else
+        FinalOpcode = Mips::DROTR;
+    } else if (ImmValue >= 33) {
+      if (Inst.getOpcode() == Mips::DROLImm)
+        FinalOpcode = Mips::DROTR;
+      else
+        FinalOpcode = Mips::DROTR32;
+    }
+
+    uint64_t ShiftValue = ImmValue % 32;
+    if (Inst.getOpcode() == Mips::DROLImm)
+      ShiftValue = (32 - ImmValue % 32) % 32;
+
+    emitRRI(FinalOpcode, DReg, SReg, ShiftValue, Inst.getLoc(), Instructions);
+
+    return false;
+  }
+
+  if (hasMips64()) {
+
+    if (ImmValue == 0) {
+      emitRRI(Mips::DSRL, DReg, SReg, 0, Inst.getLoc(), Instructions);
+      return false;
+    }
+
+    switch (Inst.getOpcode()) {
+    default:
+      llvm_unreachable("unexpected instruction opcode");
+    case Mips::DROLImm:
+      if ((ImmValue >= 1) && (ImmValue <= 31)) {
+        FirstShift = Mips::DSLL;
+        SecondShift = Mips::DSRL32;
+      }
+      if (ImmValue == 32) {
+        FirstShift = Mips::DSLL32;
+        SecondShift = Mips::DSRL32;
+      }
+      if ((ImmValue >= 33) && (ImmValue <= 63)) {
+        FirstShift = Mips::DSLL32;
+        SecondShift = Mips::DSRL;
+      }
+      break;
+    case Mips::DRORImm:
+      if ((ImmValue >= 1) && (ImmValue <= 31)) {
+        FirstShift = Mips::DSRL;
+        SecondShift = Mips::DSLL32;
+      }
+      if (ImmValue == 32) {
+        FirstShift = Mips::DSRL32;
+        SecondShift = Mips::DSLL32;
+      }
+      if ((ImmValue >= 33) && (ImmValue <= 63)) {
+        FirstShift = Mips::DSRL32;
+        SecondShift = Mips::DSLL;
+      }
+      break;
+    }
+
+    ATReg = getATReg(Inst.getLoc());
+    if (!ATReg)
+      return true;
+
+    emitRRI(FirstShift, ATReg, SReg, ImmValue % 32, Inst.getLoc(), Instructions);
+    emitRRI(SecondShift, DReg, SReg, (32 - ImmValue % 32) % 32, Inst.getLoc(), Instructions);
+    emitRRR(Mips::OR, DReg, DReg, ATReg, Inst.getLoc(), Instructions);
+
+    return false;
+  }
+
+  return true;
+}
+
 void MipsAsmParser::createNop(bool hasShortDelaySlot, SMLoc IDLoc,
                               SmallVectorImpl<MCInst> &Instructions) {
-  MCInst NopInst;
-  if (hasShortDelaySlot) {
-    NopInst.setOpcode(Mips::MOVE16_MM);
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-  } else {
-    NopInst.setOpcode(Mips::SLL);
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    NopInst.addOperand(MCOperand::createReg(Mips::ZERO));
-    NopInst.addOperand(MCOperand::createImm(0));
-  }
-  Instructions.push_back(NopInst);
+  if (hasShortDelaySlot)
+    emitRR(Mips::MOVE16_MM, Mips::ZERO, Mips::ZERO, IDLoc, Instructions);
+  else
+    emitRRI(Mips::SLL, Mips::ZERO, Mips::ZERO, 0, IDLoc, Instructions);
 }
 
 void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
@@ -2717,6 +3536,24 @@ void MipsAsmParser::createAddu(unsigned DstReg, unsigned SrcReg,
           Instructions);
 }
 
+void MipsAsmParser::createCpRestoreMemOp(
+    bool IsLoad, int StackOffset, SMLoc IDLoc,
+    SmallVectorImpl<MCInst> &Instructions) {
+  // If the offset can not fit into 16 bits, we need to expand.
+  if (!isInt<16>(StackOffset)) {
+    MCInst MemInst;
+    MemInst.setOpcode(IsLoad ? Mips::LW : Mips::SW);
+    MemInst.addOperand(MCOperand::createReg(Mips::GP));
+    MemInst.addOperand(MCOperand::createReg(Mips::SP));
+    MemInst.addOperand(MCOperand::createImm(StackOffset));
+    expandMemInst(MemInst, IDLoc, Instructions, IsLoad, true /*HasImmOpnd*/);
+    return;
+  }
+
+  emitRRI(IsLoad ? Mips::LW : Mips::SW, Mips::GP, Mips::SP, StackOffset, IDLoc,
+          Instructions);
+}
+
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   // As described by the Mips32r2 spec, the registers Rd and Rs for
   // jalr.hb must be different.
@@ -2729,6 +3566,17 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   return Match_Success;
 }
 
+static SMLoc RefineErrorLoc(const SMLoc Loc, const OperandVector &Operands,
+                            uint64_t ErrorInfo) {
+  if (ErrorInfo != ~0ULL && ErrorInfo < Operands.size()) {
+    SMLoc ErrorLoc = Operands[ErrorInfo]->getStartLoc();
+    if (ErrorLoc == SMLoc())
+      return Loc;
+    return ErrorLoc;
+  }
+  return Loc;
+}
+
 bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                             OperandVector &Operands,
                                             MCStreamer &Out,
@@ -2745,7 +3593,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     if (processInstruction(Inst, IDLoc, Instructions))
       return true;
     for (unsigned i = 0; i < Instructions.size(); i++)
-      Out.EmitInstruction(Instructions[i], STI);
+      Out.EmitInstruction(Instructions[i], getSTI());
     return false;
   }
   case Match_MissingFeature:
@@ -2757,7 +3605,7 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       if (ErrorInfo >= Operands.size())
         return Error(IDLoc, "too few operands for instruction");
 
-      ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc();
+      ErrorLoc = Operands[ErrorInfo]->getStartLoc();
       if (ErrorLoc == SMLoc())
         ErrorLoc = IDLoc;
     }
@@ -2768,6 +3616,58 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "invalid instruction");
   case Match_RequiresDifferentSrcAndDst:
     return Error(IDLoc, "source and destination must be different");
+  case Match_Immz:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
+  case Match_UImm1_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 1-bit unsigned immediate");
+  case Match_UImm2_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 2-bit unsigned immediate");
+  case Match_UImm2_1:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected immediate in range 1 .. 4");
+  case Match_UImm3_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 3-bit unsigned immediate");
+  case Match_UImm4_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 4-bit unsigned immediate");
+  case Match_UImm5_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 5-bit unsigned immediate");
+  case Match_UImm5_1:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected immediate in range 1 .. 32");
+  case Match_UImm5_32:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected immediate in range 32 .. 63");
+  case Match_UImm5_33:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected immediate in range 33 .. 64");
+  case Match_UImm5_0_Report_UImm6:
+    // This is used on UImm5 operands that have a corresponding UImm5_32
+    // operand to avoid confusing the user.
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 6-bit unsigned immediate");
+  case Match_UImm5_Lsl2:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected both 7-bit unsigned immediate and multiple of 4");
+  case Match_UImm6_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 6-bit unsigned immediate");
+  case Match_SImm6:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 6-bit signed immediate");
+  case Match_UImm7_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 7-bit unsigned immediate");
+  case Match_UImm8_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 8-bit unsigned immediate");
+  case Match_UImm10_0:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 10-bit unsigned immediate");
   }
 
   llvm_unreachable("Implement any new match types added!");
@@ -3264,7 +4164,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
     const AsmToken &Tok = Parser.getTok(); // Get the next token.
     if (Tok.isNot(AsmToken::LParen)) {
       MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]);
-      if (Mnemonic.getToken() == "la") {
+      if (Mnemonic.getToken() == "la" || Mnemonic.getToken() == "dla") {
         SMLoc E =
             SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
         Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
@@ -3598,12 +4498,15 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
     if (RegRange) {
       // Remove last register operand because registers from register range
       // should be inserted first.
-      if (RegNo == Mips::RA) {
+      if ((isGP64bit() && RegNo == Mips::RA_64) ||
+          (!isGP64bit() && RegNo == Mips::RA)) {
         Regs.push_back(RegNo);
       } else {
         unsigned TmpReg = PrevReg + 1;
         while (TmpReg <= RegNo) {
-          if ((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) {
+          if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) ||
+              (((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) &&
+               isGP64bit())) {
             Error(E, "invalid register operand");
             return MatchOperand_ParseFail;
           }
@@ -3615,16 +4518,23 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
 
       RegRange = false;
     } else {
-      if ((PrevReg == Mips::NoRegister) && (RegNo != Mips::S0) &&
-          (RegNo != Mips::RA)) {
+      if ((PrevReg == Mips::NoRegister) &&
+          ((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) ||
+          (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) {
         Error(E, "$16 or $31 expected");
         return MatchOperand_ParseFail;
-      } else if (((RegNo < Mips::S0) || (RegNo > Mips::S7)) &&
-                 (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+      } else if (!(((RegNo == Mips::FP || RegNo == Mips::RA ||
+                    (RegNo >= Mips::S0 && RegNo <= Mips::S7)) &&
+                    !isGP64bit()) ||
+                   ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 ||
+                    (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) &&
+                    isGP64bit()))) {
         Error(E, "invalid register operand");
         return MatchOperand_ParseFail;
       } else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
-                 (RegNo != Mips::FP) && (RegNo != Mips::RA)) {
+                 ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) ||
+                  (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 &&
+                   isGP64bit()))) {
         Error(E, "consecutive register numbers expected");
         return MatchOperand_ParseFail;
       }
@@ -4152,6 +5062,7 @@ bool MipsAsmParser::parseSetPopDirective() {
   if (AssemblerOptions.size() == 2)
     return reportParseError(Loc, ".set pop with no .set push");
 
+  MCSubtargetInfo &STI = copySTI();
   AssemblerOptions.pop_back();
   setAvailableFeatures(
       ComputeAvailableFeatures(AssemblerOptions.back()->getFeatures()));
@@ -4225,6 +5136,7 @@ bool MipsAsmParser::parseSetMips0Directive() {
     return reportParseError("unexpected token, expected end of statement");
 
   // Reset assembler options to their initial values.
+  MCSubtargetInfo &STI = copySTI();
   setAvailableFeatures(
       ComputeAvailableFeatures(AssemblerOptions.front()->getFeatures()));
   STI.setFeatureBits(AssemblerOptions.front()->getFeatures());
@@ -4366,6 +5278,14 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
   return true;
 }
 
+// Used to determine if .cpload, .cprestore, and .cpsetup have any effect.
+// In this class, it is only used for .cprestore.
+// FIXME: Only keep track of IsPicEnabled in one place, instead of in both
+// MipsTargetELFStreamer and MipsAsmParser.
+bool MipsAsmParser::isPicAndNotNxxAbi() {
+  return inPicMode() && !(isABI_N32() || isABI_N64());
+}
+
 bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
   if (AssemblerOptions.back()->isReorder())
     Warning(Loc, ".cpload should be inside a noreorder section");
@@ -4398,6 +5318,54 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
   return false;
 }
 
+bool MipsAsmParser::parseDirectiveCpRestore(SMLoc Loc) {
+  MCAsmParser &Parser = getParser();
+
+  // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+  // is used in non-PIC mode.
+
+  if (inMips16Mode()) {
+    reportParseError(".cprestore is not supported in Mips16 mode");
+    return false;
+  }
+
+  // Get the stack offset value.
+  const MCExpr *StackOffset;
+  int64_t StackOffsetVal;
+  if (Parser.parseExpression(StackOffset)) {
+    reportParseError("expected stack offset value");
+    return false;
+  }
+
+  if (!StackOffset->evaluateAsAbsolute(StackOffsetVal)) {
+    reportParseError("stack offset is not an absolute expression");
+    return false;
+  }
+
+  if (StackOffsetVal < 0) {
+    Warning(Loc, ".cprestore with negative stack offset has no effect");
+    IsCpRestoreSet = false;
+  } else {
+    IsCpRestoreSet = true;
+    CpRestoreOffset = StackOffsetVal;
+  }
+
+  // If this is not the end of the statement, report an error.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    reportParseError("unexpected token, expected end of statement");
+    return false;
+  }
+
+  // Store the $gp on the stack.
+  SmallVector<MCInst, 3> StoreInsts;
+  createCpRestoreMemOp(false /*IsLoad*/, CpRestoreOffset /*StackOffset*/, Loc,
+                       StoreInsts);
+
+  getTargetStreamer().emitDirectiveCpRestore(StoreInsts, CpRestoreOffset);
+  Parser.Lex(); // Consume the EndOfStatement.
+  return false;
+}
+
 bool MipsAsmParser::parseDirectiveCPSetup() {
   MCAsmParser &Parser = getParser();
   unsigned FuncReg;
@@ -4427,16 +5395,19 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
 
   ResTy = parseAnyRegister(TmpReg);
   if (ResTy == MatchOperand_NoMatch) {
-    const AsmToken &Tok = Parser.getTok();
-    if (Tok.is(AsmToken::Integer)) {
-      Save = Tok.getIntVal();
-      SaveIsReg = false;
-      Parser.Lex();
-    } else {
-      reportParseError("expected save register or stack offset");
+    const MCExpr *OffsetExpr;
+    int64_t OffsetVal;
+    SMLoc ExprLoc = getLexer().getLoc();
+
+    if (Parser.parseExpression(OffsetExpr) ||
+        !OffsetExpr->evaluateAsAbsolute(OffsetVal)) {
+      reportParseError(ExprLoc, "expected save register or stack offset");
       Parser.eatToEndOfStatement();
       return false;
     }
+
+    Save = OffsetVal;
+    SaveIsReg = false;
   } else {
     MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
     if (!SaveOpnd.isGPRAsmReg()) {
@@ -4462,11 +5433,20 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
   }
   const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
 
+  CpSaveLocation = Save;
+  CpSaveLocationIsRegister = SaveIsReg;
+
   getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, Ref->getSymbol(),
                                            SaveIsReg);
   return false;
 }
 
+bool MipsAsmParser::parseDirectiveCPReturn() {
+  getTargetStreamer().emitDirectiveCpreturn(CpSaveLocation,
+                                            CpSaveLocationIsRegister);
+  return false;
+}
+
 bool MipsAsmParser::parseDirectiveNaN() {
   MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -4655,6 +5635,9 @@ bool MipsAsmParser::parseDirectiveOption() {
   StringRef Option = Tok.getIdentifier();
 
   if (Option == "pic0") {
+    // MipsAsmParser needs to know if the current PIC mode changes.
+    IsPicEnabled = false;
+
     getTargetStreamer().emitDirectiveOptionPic0();
     Parser.Lex();
     if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4666,6 +5649,9 @@ bool MipsAsmParser::parseDirectiveOption() {
   }
 
   if (Option == "pic2") {
+    // MipsAsmParser needs to know if the current PIC mode changes.
+    IsPicEnabled = true;
+
     getTargetStreamer().emitDirectiveOptionPic2();
     Parser.Lex();
     if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
@@ -4924,6 +5910,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
   if (IDVal == ".cpload")
     return parseDirectiveCpLoad(DirectiveID.getLoc());
+  if (IDVal == ".cprestore")
+    return parseDirectiveCpRestore(DirectiveID.getLoc());
   if (IDVal == ".dword") {
     parseDataDirective(8, DirectiveID.getLoc());
     return false;
@@ -4974,6 +5962,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
     getTargetStreamer().emitDirectiveEnt(*Sym);
     CurrentFn = Sym;
+    IsCpRestoreSet = false;
     return false;
   }
 
@@ -5002,6 +5991,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
     getTargetStreamer().emitDirectiveEnd(SymbolName);
     CurrentFn = nullptr;
+    IsCpRestoreSet = false;
     return false;
   }
 
@@ -5073,6 +6063,7 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
 
     getTargetStreamer().emitFrame(StackReg, FrameSizeVal,
                                   ReturnRegOpnd.getGPR32Reg());
+    IsCpRestoreSet = false;
     return false;
   }
 
@@ -5173,6 +6164,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".cpsetup")
     return parseDirectiveCPSetup();
 
+  if (IDVal == ".cpreturn")
+    return parseDirectiveCPReturn();
+
   if (IDVal == ".module")
     return parseDirectiveModule();
 
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index a34ba3bd0ee8..3c1a771f97e9 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -229,6 +229,13 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
                                          uint64_t Address,
                                          const void *Decoder);
 
+// DecodeBranchTarget26MM - Decode microMIPS branch offset, which is
+// shifted left by 1 bit.
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+                                           unsigned Offset,
+                                           uint64_t Address,
+                                           const void *Decoder);
+
 // DecodeJumpTargetMM - Decode microMIPS jump target, which is
 // shifted left by 1 bit.
 static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
@@ -241,17 +248,42 @@ static DecodeStatus DecodeMem(MCInst &Inst,
                               uint64_t Address,
                               const void *Decoder);
 
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+                                 unsigned Insn,
+                                 uint64_t Address,
+                                 const void *Decoder);
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+                                    unsigned Insn,
+                                    uint64_t Address,
+                                    const void *Decoder);
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+                                     unsigned Insn,
+                                     uint64_t Address,
+                                     const void *Decoder);
+
 static DecodeStatus DecodeCacheOp(MCInst &Inst,
                               unsigned Insn,
                               uint64_t Address,
                               const void *Decoder);
 
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+                                             unsigned Insn,
+                                             uint64_t Address,
+                                             const void *Decoder);
+
+static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
                                     unsigned Insn,
                                     uint64_t Address,
                                     const void *Decoder);
 
-static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+                                       unsigned Insn,
+                                       uint64_t Address,
+                                       const void *Decoder);
+
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
                                     unsigned Insn,
                                     uint64_t Address,
                                     const void *Decoder);
@@ -261,6 +293,11 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
                                 uint64_t Address,
                                 const void *Decoder);
 
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+                                  unsigned Insn,
+                                  uint64_t Address,
+                                  const void *Decoder);
+
 static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder);
 
@@ -284,6 +321,11 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
                                                uint64_t Address,
                                                const void *Decoder);
 
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+                                    unsigned Insn,
+                                    uint64_t Address,
+                                    const void *Decoder);
+
 static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
                                      unsigned Insn,
                                      uint64_t Address,
@@ -330,6 +372,11 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
                                   uint64_t Address,
                                   const void *Decoder);
 
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+                                              unsigned Value,
+                                              uint64_t Address,
+                                              const void *Decoder);
+
 static DecodeStatus DecodeSimm4(MCInst &Inst,
                                 unsigned Value,
                                 uint64_t Address,
@@ -340,23 +387,15 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
                                  uint64_t Address,
                                  const void *Decoder);
 
-// Decode the immediate field of an LSA instruction which
-// is off by one.
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
-                                 unsigned Insn,
-                                 uint64_t Address,
-                                 const void *Decoder);
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+                                         uint64_t Address, const void *Decoder);
 
 static DecodeStatus DecodeInsSize(MCInst &Inst,
                                   unsigned Insn,
                                   uint64_t Address,
                                   const void *Decoder);
 
-static DecodeStatus DecodeExtSize(MCInst &Inst,
-                                  unsigned Insn,
-                                  uint64_t Address,
-                                  const void *Decoder);
-
 static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
                                      uint64_t Address, const void *Decoder);
 
@@ -830,9 +869,24 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
 
   if (IsMicroMips) {
     Result = readInstruction16(Bytes, Address, Size, Insn, IsBigEndian);
+    if (Result == MCDisassembler::Fail)
+      return MCDisassembler::Fail;
+
+    if (hasMips32r6()) {
+      DEBUG(dbgs() << "Trying MicroMipsR616 table (16-bit instructions):\n");
+      // Calling the auto-generated decoder function for microMIPS32R6
+      // (and microMIPS64R6) 16-bit instructions.
+      Result = decodeInstruction(DecoderTableMicroMipsR616, Instr, Insn,
+                                 Address, this, STI);
+      if (Result != MCDisassembler::Fail) {
+        Size = 2;
+        return Result;
+      }
+    }
 
     DEBUG(dbgs() << "Trying MicroMips16 table (16-bit instructions):\n");
-    // Calling the auto-generated decoder function.
+    // Calling the auto-generated decoder function for microMIPS 16-bit
+    // instructions.
     Result = decodeInstruction(DecoderTableMicroMips16, Instr, Insn, Address,
                                this, STI);
     if (Result != MCDisassembler::Fail) {
@@ -847,24 +901,33 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
     if (hasMips32r6()) {
       DEBUG(dbgs() << "Trying MicroMips32r632 table (32-bit instructions):\n");
       // Calling the auto-generated decoder function.
-      Result = decodeInstruction(DecoderTableMicroMips32r632, Instr, Insn, Address,
-                                 this, STI);
-    } else {
-      DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
-      // Calling the auto-generated decoder function.
-      Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+      Result = decodeInstruction(DecoderTableMicroMipsR632, Instr, Insn, Address,
                                  this, STI);
+      if (Result != MCDisassembler::Fail) {
+        Size = 4;
+        return Result;
+      }
     }
+
+    DEBUG(dbgs() << "Trying MicroMips32 table (32-bit instructions):\n");
+    // Calling the auto-generated decoder function.
+    Result = decodeInstruction(DecoderTableMicroMips32, Instr, Insn, Address,
+                               this, STI);
     if (Result != MCDisassembler::Fail) {
       Size = 4;
       return Result;
     }
+    // This is an invalid instruction. Let the disassembler move forward by the
+    // minimum instruction size.
+    Size = 2;
     return MCDisassembler::Fail;
   }
 
   Result = readInstruction32(Bytes, Address, Size, Insn, IsBigEndian, false);
-  if (Result == MCDisassembler::Fail)
+  if (Result == MCDisassembler::Fail) {
+    Size = 4;
     return MCDisassembler::Fail;
+  }
 
   if (hasCOP3()) {
     DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n");
@@ -925,6 +988,7 @@ DecodeStatus MipsDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
     return Result;
   }
 
+  Size = 4;
   return MCDisassembler::Fail;
 }
 
@@ -1079,10 +1143,66 @@ static DecodeStatus DecodeMem(MCInst &Inst,
   Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
   Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
 
-  if(Inst.getOpcode() == Mips::SC ||
-     Inst.getOpcode() == Mips::SCD){
+  if (Inst.getOpcode() == Mips::SC ||
+      Inst.getOpcode() == Mips::SCD)
     Inst.addOperand(MCOperand::createReg(Reg));
-  }
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeMemEVA(MCInst &Inst,
+                                 unsigned Insn,
+                                 uint64_t Address,
+                                 const void *Decoder) {
+  int Offset = SignExtend32<9>(Insn >> 7);
+  unsigned Reg = fieldFromInstruction(Insn, 16, 5);
+  unsigned Base = fieldFromInstruction(Insn, 21, 5);
+
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+   if (Inst.getOpcode() == Mips::SCE)
+     Inst.addOperand(MCOperand::createReg(Reg));
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte9(MCInst &Inst,
+                                    unsigned Insn,
+                                    uint64_t Address,
+                                    const void *Decoder) {
+  int Offset = SignExtend32<9>(Insn & 0x1ff);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLoadByte15(MCInst &Inst,
+                                     unsigned Insn,
+                                     uint64_t Address,
+                                     const void *Decoder) {
+  int Offset = SignExtend32<16>(Insn & 0xffff);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+  unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
 
   Inst.addOperand(MCOperand::createReg(Reg));
   Inst.addOperand(MCOperand::createReg(Base));
@@ -1125,11 +1245,28 @@ static DecodeStatus DecodeCacheOpMM(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
+static DecodeStatus DecodePrefeOpMM(MCInst &Inst,
                                     unsigned Insn,
                                     uint64_t Address,
                                     const void *Decoder) {
-  int Offset = fieldFromInstruction(Insn, 7, 9);
+  int Offset = SignExtend32<9>(Insn & 0x1ff);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+  unsigned Hint = fieldFromInstruction(Insn, 21, 5);
+
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+  Inst.addOperand(MCOperand::createImm(Hint));
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCacheeOp_CacheOpR6(MCInst &Inst,
+                                             unsigned Insn,
+                                             uint64_t Address,
+                                             const void *Decoder) {
+  int Offset = SignExtend32<9>(Insn >> 7);
   unsigned Hint = fieldFromInstruction(Insn, 16, 5);
   unsigned Base = fieldFromInstruction(Insn, 21, 5);
 
@@ -1142,6 +1279,24 @@ static DecodeStatus DecodeCacheOpR6(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeStoreEvaOpMM(MCInst &Inst,
+                                       unsigned Insn,
+                                       uint64_t Address,
+                                       const void *Decoder) {
+  int Offset = SignExtend32<9>(Insn & 0x1ff);
+  unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeSyncI(MCInst &Inst,
                               unsigned Insn,
                               uint64_t Address,
@@ -1157,6 +1312,21 @@ static DecodeStatus DecodeSyncI(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeSynciR6(MCInst &Inst,
+                                  unsigned Insn,
+                                  uint64_t Address,
+                                  const void *Decoder) {
+  int Immediate = SignExtend32<16>(Insn & 0xffff);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Immediate));
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeMSA128Mem(MCInst &Inst, unsigned Insn,
                                     uint64_t Address, const void *Decoder) {
   int Offset = SignExtend32<10>(fieldFromInstruction(Insn, 16, 10));
@@ -1220,8 +1390,11 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
         return MCDisassembler::Fail;
       break;
     case Mips::SB16_MM:
+    case Mips::SB16_MMR6:
     case Mips::SH16_MM:
+    case Mips::SH16_MMR6:
     case Mips::SW16_MM:
+    case Mips::SW16_MMR6:
       if (DecodeGPRMM16ZeroRegisterClass(Inst, Reg, Address, Decoder)
             == MCDisassembler::Fail)
         return MCDisassembler::Fail;
@@ -1240,14 +1413,17 @@ static DecodeStatus DecodeMemMMImm4(MCInst &Inst,
         Inst.addOperand(MCOperand::createImm(Offset));
       break;
     case Mips::SB16_MM:
+    case Mips::SB16_MMR6:
       Inst.addOperand(MCOperand::createImm(Offset));
       break;
     case Mips::LHU16_MM:
     case Mips::SH16_MM:
+    case Mips::SH16_MMR6:
       Inst.addOperand(MCOperand::createImm(Offset << 1));
       break;
     case Mips::LW16_MM:
     case Mips::SW16_MM:
+    case Mips::SW16_MMR6:
       Inst.addOperand(MCOperand::createImm(Offset << 2));
       break;
   }
@@ -1291,7 +1467,16 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
                                                unsigned Insn,
                                                uint64_t Address,
                                                const void *Decoder) {
-  int Offset = SignExtend32<4>(Insn & 0xf);
+  int Offset;
+  switch (Inst.getOpcode()) {
+  case Mips::LWM16_MMR6:
+  case Mips::SWM16_MMR6:
+    Offset = fieldFromInstruction(Insn, 4, 4);
+    break;
+  default:
+    Offset = SignExtend32<4>(Insn & 0xf);
+    break;
+  }
 
   if (DecodeRegListOperand16(Inst, Insn, Address, Decoder)
       == MCDisassembler::Fail)
@@ -1303,6 +1488,27 @@ static DecodeStatus DecodeMemMMReglistImm4Lsl2(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeMemMMImm9(MCInst &Inst,
+                                    unsigned Insn,
+                                    uint64_t Address,
+                                    const void *Decoder) {
+  int Offset = SignExtend32<9>(Insn & 0x1ff);
+  unsigned Reg = fieldFromInstruction(Insn, 21, 5);
+  unsigned Base = fieldFromInstruction(Insn, 16, 5);
+
+  Reg = getReg(Decoder, Mips::GPR32RegClassID, Reg);
+  Base = getReg(Decoder, Mips::GPR32RegClassID, Base);
+
+  if (Inst.getOpcode() == Mips::SCE_MM)
+    Inst.addOperand(MCOperand::createReg(Reg));
+
+  Inst.addOperand(MCOperand::createReg(Reg));
+  Inst.addOperand(MCOperand::createReg(Base));
+  Inst.addOperand(MCOperand::createImm(Offset));
+
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
                                      unsigned Insn,
                                      uint64_t Address,
@@ -1659,6 +1865,16 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeBranchTarget26MM(MCInst &Inst,
+  unsigned Offset,
+  uint64_t Address,
+  const void *Decoder) {
+  int32_t BranchOffset = SignExtend32<26>(Offset) << 1;
+
+  Inst.addOperand(MCOperand::createImm(BranchOffset));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeJumpTargetMM(MCInst &Inst,
                                        unsigned Insn,
                                        uint64_t Address,
@@ -1700,6 +1916,14 @@ static DecodeStatus DecodeLiSimm7(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodePOOL16BEncodedField(MCInst &Inst,
+                                              unsigned Value,
+                                              uint64_t Address,
+                                              const void *Decoder) {
+  Inst.addOperand(MCOperand::createImm(Value == 0x0 ? 8 : Value));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus DecodeSimm4(MCInst &Inst,
                                 unsigned Value,
                                 uint64_t Address,
@@ -1716,12 +1940,12 @@ static DecodeStatus DecodeSimm16(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeLSAImm(MCInst &Inst,
-                                 unsigned Insn,
-                                 uint64_t Address,
-                                 const void *Decoder) {
-  // We add one to the immediate field as it was encoded as 'imm - 1'.
-  Inst.addOperand(MCOperand::createImm(Insn + 1));
+template <unsigned Bits, int Offset>
+static DecodeStatus DecodeUImmWithOffset(MCInst &Inst, unsigned Value,
+                                         uint64_t Address,
+                                         const void *Decoder) {
+  Value &= ((1 << Bits) - 1);
+  Inst.addOperand(MCOperand::createImm(Value + Offset));
   return MCDisassembler::Success;
 }
 
@@ -1736,15 +1960,6 @@ static DecodeStatus DecodeInsSize(MCInst &Inst,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus DecodeExtSize(MCInst &Inst,
-                                  unsigned Insn,
-                                  uint64_t Address,
-                                  const void *Decoder) {
-  int Size = (int) Insn  + 1;
-  Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Size)));
-  return MCDisassembler::Success;
-}
-
 static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
                                      uint64_t Address, const void *Decoder) {
   Inst.addOperand(MCOperand::createImm(SignExtend32<19>(Insn) * 4));
@@ -1792,15 +2007,21 @@ static DecodeStatus DecodeRegListOperand(MCInst &Inst,
                                          uint64_t Address,
                                          const void *Decoder) {
   unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
-                     Mips::S6, Mips::FP};
+                     Mips::S6, Mips::S7, Mips::FP};
   unsigned RegNum;
 
   unsigned RegLst = fieldFromInstruction(Insn, 21, 5);
+
   // Empty register lists are not allowed.
   if (RegLst == 0)
     return MCDisassembler::Fail;
 
   RegNum = RegLst & 0xf;
+
+  // RegLst values 10-15, and 26-31 are reserved.
+  if (RegNum > 9)
+    return MCDisassembler::Fail;
+
   for (unsigned i = 0; i < RegNum; i++)
     Inst.addOperand(MCOperand::createReg(Regs[i]));
 
@@ -1814,7 +2035,16 @@ static DecodeStatus DecodeRegListOperand16(MCInst &Inst, unsigned Insn,
                                            uint64_t Address,
                                            const void *Decoder) {
   unsigned Regs[] = {Mips::S0, Mips::S1, Mips::S2, Mips::S3};
-  unsigned RegLst = fieldFromInstruction(Insn, 4, 2);
+  unsigned RegLst;
+  switch(Inst.getOpcode()) {
+  default:
+    RegLst = fieldFromInstruction(Insn, 4, 2);
+    break;
+  case Mips::LWM16_MMR6:
+  case Mips::SWM16_MMR6:
+    RegLst = fieldFromInstruction(Insn, 8, 2);
+    break;
+  }
   unsigned RegNum = RegLst & 0x3;
 
   for (unsigned i = 0; i <= RegNum; i++)
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index a5637b16b636..a7b7d2e080ee 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -235,7 +235,9 @@ printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
   case Mips::SWM32_MM:
   case Mips::LWM32_MM:
   case Mips::SWM16_MM:
+  case Mips::SWM16_MMR6:
   case Mips::LWM16_MM:
+  case Mips::LWM16_MMR6:
     opNum = MI->getNumOperands() - 2;
     break;
   }
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 713f35c70830..0e61ea61899a 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -73,8 +73,6 @@ enum CondCode {
 const char *MipsFCCToString(Mips::CondCode CC);
 } // end namespace Mips
 
-class TargetMachine;
-
 class MipsInstPrinter : public MCInstPrinter {
 public:
   MipsInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 8e6c9e69b223..cdcc3923b81e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -23,7 +23,7 @@ static const MCPhysReg Mips64IntRegs[8] = {
     Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64};
 }
 
-const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
   if (IsO32())
     return makeArrayRef(O32IntRegs);
   if (IsN32() || IsN64())
@@ -31,7 +31,7 @@ const ArrayRef<MCPhysReg> MipsABIInfo::GetByValArgRegs() const {
   llvm_unreachable("Unhandled ABI");
 }
 
-const ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
+ArrayRef<MCPhysReg> MipsABIInfo::GetVarArgRegs() const {
   if (IsO32())
     return makeArrayRef(O32IntRegs);
   if (IsN32() || IsN64())
@@ -78,7 +78,6 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
       .Case("mips32r3", MipsABIInfo::O32())
       .Case("mips32r5", MipsABIInfo::O32())
       .Case("mips32r6", MipsABIInfo::O32())
-      .Case("mips16", MipsABIInfo::O32())
       .Case("mips3", MipsABIInfo::N64())
       .Case("mips4", MipsABIInfo::N64())
       .Case("mips5", MipsABIInfo::N64())
@@ -107,6 +106,10 @@ unsigned MipsABIInfo::GetNullPtr() const {
   return ArePtrs64bit() ? Mips::ZERO_64 : Mips::ZERO;
 }
 
+unsigned MipsABIInfo::GetZeroReg() const {
+  return AreGprs64bit() ? Mips::ZERO_64 : Mips::ZERO;
+}
+
 unsigned MipsABIInfo::GetPtrAdduOp() const {
   return ArePtrs64bit() ? Mips::DADDu : Mips::ADDu;
 }
@@ -115,6 +118,10 @@ unsigned MipsABIInfo::GetPtrAddiuOp() const {
   return ArePtrs64bit() ? Mips::DADDiu : Mips::ADDiu;
 }
 
+unsigned MipsABIInfo::GetGPRMoveOp() const {
+  return ArePtrs64bit() ? Mips::OR64 : Mips::OR;
+}
+
 unsigned MipsABIInfo::GetEhDataReg(unsigned I) const {
   static const unsigned EhDataReg[] = {
     Mips::A0, Mips::A1, Mips::A2, Mips::A3
diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 40c5681acc17..ffa2c765e79b 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -47,10 +47,10 @@ public:
   ABI GetEnumValue() const { return ThisABI; }
 
   /// The registers to use for byval arguments.
-  const ArrayRef<MCPhysReg> GetByValArgRegs() const;
+  ArrayRef<MCPhysReg> GetByValArgRegs() const;
 
   /// The registers to use for the variable argument list.
-  const ArrayRef<MCPhysReg> GetVarArgRegs() const;
+  ArrayRef<MCPhysReg> GetVarArgRegs() const;
 
   /// Obtain the size of the area allocated by the callee for arguments.
   /// CallingConv::FastCall affects the value for O32.
@@ -67,9 +67,12 @@ public:
   unsigned GetFramePtr() const;
   unsigned GetBasePtr() const;
   unsigned GetNullPtr() const;
+  unsigned GetZeroReg() const;
   unsigned GetPtrAdduOp() const;
   unsigned GetPtrAddiuOp() const;
+  unsigned GetGPRMoveOp() const;
   inline bool ArePtrs64bit() const { return IsN64(); }
+  inline bool AreGprs64bit() const { return IsN32() || IsN64(); }
 
   unsigned GetEhDataReg(unsigned I) const;
 };
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 328e71720cac..e4865e2455ee 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -63,15 +63,19 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     // address range. Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 4;
     // We now check if Value can be encoded as a 16-bit signed immediate.
-    if (!isIntN(16, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+    if (!isInt<16>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MIPS_PC19_S2:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 4;
     // We now check if Value can be encoded as a 19-bit signed immediate.
-    if (!isIntN(19, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC19 fixup");
+    if (!isInt<19>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC19 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_Mips_26:
     // So far we are only using this type for jumps.
@@ -104,45 +108,57 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 2;
     // We now check if Value can be encoded as a 7-bit signed immediate.
-    if (!isIntN(7, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC7 fixup");
+    if (!isInt<7>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC7 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MICROMIPS_PC10_S1:
     Value -= 2;
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 2;
     // We now check if Value can be encoded as a 10-bit signed immediate.
-    if (!isIntN(10, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC10 fixup");
+    if (!isInt<10>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC10 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MICROMIPS_PC16_S1:
     Value -= 4;
     // Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 2;
     // We now check if Value can be encoded as a 16-bit signed immediate.
-    if (!isIntN(16, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC16 fixup");
+    if (!isInt<16>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC16 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MIPS_PC18_S3:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t)Value / 8;
     // We now check if Value can be encoded as a 18-bit signed immediate.
-    if (!isIntN(18, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC18 fixup");
+    if (!isInt<18>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC18 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MIPS_PC21_S2:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 21-bit signed immediate.
-    if (!isIntN(21, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC21 fixup");
+    if (!isInt<21>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC21 fixup");
+      return 0;
+    }
     break;
   case Mips::fixup_MIPS_PC26_S2:
     // Forcing a signed division because Value can be negative.
     Value = (int64_t) Value / 4;
     // We now check if Value can be encoded as a 26-bit signed immediate.
-    if (!isIntN(26, Value) && Ctx)
-      Ctx->reportFatalError(Fixup.getLoc(), "out of range PC26 fixup");
+    if (!isInt<26>(Value) && Ctx) {
+      Ctx->reportError(Fixup.getLoc(), "out of range PC26 fixup");
+      return 0;
+    }
     break;
   }
 
@@ -232,6 +248,18 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
   }
 }
 
+bool MipsAsmBackend::getFixupKind(StringRef Name, MCFixupKind &MappedKind) const {
+  if (Name == "R_MIPS_NONE") {
+    MappedKind = (MCFixupKind)Mips::fixup_Mips_NONE;
+    return true;
+  }
+  if (Name == "R_MIPS_32") {
+    MappedKind = FK_Data_4;
+    return true;
+  }
+  return MCAsmBackend::getFixupKind(Name, MappedKind);
+}
+
 const MCFixupKindInfo &MipsAsmBackend::
 getFixupKindInfo(MCFixupKind Kind) const {
   const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = {
@@ -239,6 +267,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     // MipsFixupKinds.h.
     //
     // name                    offset  bits  flags
+    { "fixup_Mips_NONE",         0,      0,   0 },
     { "fixup_Mips_16",           0,     16,   0 },
     { "fixup_Mips_32",           0,     32,   0 },
     { "fixup_Mips_REL32",        0,     32,   0 },
@@ -304,6 +333,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     // MipsFixupKinds.h.
     //
     // name                    offset  bits  flags
+    { "fixup_Mips_NONE",         0,      0,   0 },
     { "fixup_Mips_16",          16,     16,   0 },
     { "fixup_Mips_32",           0,     32,   0 },
     { "fixup_Mips_REL32",        0,     32,   0 },
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index b3d5a4964f86..1c9af9227ffe 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -41,6 +41,7 @@ public:
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
 
+  bool getFixupKind(StringRef Name, MCFixupKind &MappedKind) const override;
   const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
 
   unsigned getNumFixupKinds() const override {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 9b2952720edd..5b9f02b89be5 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -68,6 +68,8 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
   unsigned Kind = (unsigned)Fixup.getKind();
 
   switch (Kind) {
+  case Mips::fixup_Mips_NONE:
+    return ELF::R_MIPS_NONE;
   case Mips::fixup_Mips_16:
   case FK_Data_2:
     return IsPCRel ? ELF::R_MIPS_PC16 : ELF::R_MIPS_16;
@@ -325,13 +327,24 @@ static void setMatch(MipsRelocationEntry &Hi, MipsRelocationEntry &Lo) {
 //   matching LO;
 // - prefer LOs without a pair;
 // - prefer LOs with higher offset;
+
+static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) {
+  const ELFRelocationEntry &A = *AP;
+  const ELFRelocationEntry &B = *BP;
+  if (A.Offset != B.Offset)
+    return B.Offset - A.Offset;
+  if (B.Type != A.Type)
+    return A.Type - B.Type;
+  return 0;
+}
+
 void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
                                      std::vector<ELFRelocationEntry> &Relocs) {
   if (Relocs.size() < 2)
     return;
 
-  // The default function sorts entries by Offset in descending order.
-  MCELFObjectTargetWriter::sortRelocs(Asm, Relocs);
+  // Sorts entries by Offset in descending order.
+  array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel);
 
   // Init MipsRelocs from Relocs.
   std::vector<MipsRelocationEntry> MipsRelocs;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index b45d9cf621d7..e7d687e89a8a 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -63,7 +63,7 @@ void MipsELFStreamer::SwitchSection(MCSection *Section,
 }
 
 void MipsELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
-                                    const SMLoc &Loc) {
+                                    SMLoc Loc) {
   MCELFStreamer::EmitValueImpl(Value, Size, Loc);
   Labels.clear();
 }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index af9311fa4288..a241cdebdcc8 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -60,8 +60,7 @@ public:
 
   /// Overriding this function allows us to dismiss all labels that are
   /// candidates for marking as microMIPS when .word directive is emitted.
-  void EmitValueImpl(const MCExpr *Value, unsigned Size,
-                     const SMLoc &Loc) override;
+  void EmitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
 
   /// Emits all the option records stored up until the point it's called.
   void EmitMipsOptionRecords();
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index e601963264de..3652f4bab0d4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -23,8 +23,11 @@ namespace Mips {
   // in MipsAsmBackend.cpp.
   //
   enum Fixups {
+    // Branch fixups resulting in R_MIPS_NONE.
+    fixup_Mips_NONE = FirstTargetFixupKind,
+
     // Branch fixups resulting in R_MIPS_16.
-    fixup_Mips_16 = FirstTargetFixupKind,
+    fixup_Mips_16,
 
     // Pure 32 bit data fixup resulting in - R_MIPS_32.
     fixup_Mips_32,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 5d23fcbd7a44..d4ccf0349c16 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -17,13 +17,14 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
-  class Triple;
+class Triple;
 
-  class MipsMCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit MipsMCAsmInfo(const Triple &TheTriple);
-  };
+class MipsMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit MipsMCAsmInfo(const Triple &TheTriple);
+};
 
 } // namespace llvm
 
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index e36263d54ca4..4b030ebfce8c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -190,6 +190,10 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     else
       NewOpcode = Mips::Std2MicroMips(Opcode, Mips::Arch_micromips);
 
+    // Check whether it is Dsp instruction.
+    if (NewOpcode == -1)
+      NewOpcode = Mips::Dsp2MicroMips(Opcode, Mips::Arch_mmdsp);
+
     if (NewOpcode != -1) {
       if (Fixups.size() > N)
         Fixups.pop_back();
@@ -346,6 +350,23 @@ getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
+/// getBranchTarget26OpValueMM - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::getBranchTarget26OpValueMM(
+    const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  // If the destination is an immediate, divide by 2.
+  if (MO.isImm())
+    return MO.getImm() >> 1;
+
+  // TODO: Push 26 PC fixup.
+  return 0;
+}
+
 /// getJumpOffset16OpValue - Return binary encoding of the jump
 /// target operand. If the machine operand requires relocation,
 /// record the relocation and return zero.
@@ -745,7 +766,8 @@ getMemEncodingMMSPImm5Lsl2(const MCInst &MI, unsigned OpNo,
                            const MCSubtargetInfo &STI) const {
   // Register is encoded in bits 9-5, offset is encoded in bits 4-0.
   assert(MI.getOperand(OpNo).isReg() &&
-         MI.getOperand(OpNo).getReg() == Mips::SP &&
+         (MI.getOperand(OpNo).getReg() == Mips::SP ||
+         MI.getOperand(OpNo).getReg() == Mips::SP_64) &&
          "Unexpected base register!");
   unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1),
                                        Fixups, STI) >> 2;
@@ -768,6 +790,19 @@ getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
   return OffBits & 0x7F;
 }
 
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     const MCSubtargetInfo &STI) const {
+  // Base register is encoded in bits 20-16, offset is encoded in bits 8-0.
+  assert(MI.getOperand(OpNo).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+                                       STI) << 16;
+  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI);
+
+  return (OffBits & 0x1FF) | RegBits;
+}
+
 unsigned MipsMCCodeEmitter::
 getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
                       SmallVectorImpl<MCFixup> &Fixups,
@@ -791,6 +826,19 @@ getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
   return (OffBits & 0x0FFF) | RegBits;
 }
 
+unsigned MipsMCCodeEmitter::
+getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+                      SmallVectorImpl<MCFixup> &Fixups,
+                      const MCSubtargetInfo &STI) const {
+  // Base register is encoded in bits 20-16, offset is encoded in bits 15-0.
+  assert(MI.getOperand(OpNo).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups,
+                                       STI) << 16;
+  unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI);
+
+  return (OffBits & 0xFFFF) | RegBits;
+}
+
 unsigned MipsMCCodeEmitter::
 getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
                        SmallVectorImpl<MCFixup> &Fixups,
@@ -801,7 +849,9 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
   default:
     break;
   case Mips::SWM16_MM:
+  case Mips::SWM16_MMR6:
   case Mips::LWM16_MM:
+  case Mips::LWM16_MMR6:
     OpNo = MI.getNumOperands() - 2;
     break;
   }
@@ -815,15 +865,6 @@ getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
   return ((OffBits >> 2) & 0x0F);
 }
 
-unsigned
-MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
-                                      SmallVectorImpl<MCFixup> &Fixups,
-                                      const MCSubtargetInfo &STI) const {
-  assert(MI.getOperand(OpNo).isImm());
-  unsigned SizeEncoding = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
-  return SizeEncoding - 1;
-}
-
 // FIXME: should be called getMSBEncoding
 //
 unsigned
@@ -838,13 +879,15 @@ MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
   return Position + Size - 1;
 }
 
+template <unsigned Bits, int Offset>
 unsigned
-MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     const MCSubtargetInfo &STI) const {
+MipsMCCodeEmitter::getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
   assert(MI.getOperand(OpNo).isImm());
-  // The immediate is encoded as 'immediate - 1'.
-  return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1;
+  unsigned Value = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+  Value -= Offset;
+  return Value;
 }
 
 unsigned
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 911cc2f77a45..fdacd172e3a2 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -137,6 +137,13 @@ public:
                                     SmallVectorImpl<MCFixup> &Fixups,
                                     const MCSubtargetInfo &STI) const;
 
+  // getBranchTarget26OpValueMM - Return binary encoding of the branch
+  // offset operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+  unsigned getBranchTarget26OpValueMM(const MCInst &MI, unsigned OpNo,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const;
+
   // getJumpOffset16OpValue - Return binary encoding of the jump
   // offset operand. If the machine operand requires relocation,
   // record the relocation and return zero.
@@ -172,23 +179,27 @@ public:
   unsigned getMemEncodingMMGPImm7Lsl2(const MCInst &MI, unsigned OpNo,
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       const MCSubtargetInfo &STI) const;
+  unsigned getMemEncodingMMImm9(const MCInst &MI, unsigned OpNo,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const;
   unsigned getMemEncodingMMImm12(const MCInst &MI, unsigned OpNo,
                                  SmallVectorImpl<MCFixup> &Fixups,
                                  const MCSubtargetInfo &STI) const;
+  unsigned getMemEncodingMMImm16(const MCInst &MI, unsigned OpNo,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
   unsigned getMemEncodingMMImm4sp(const MCInst &MI, unsigned OpNo,
                                   SmallVectorImpl<MCFixup> &Fixups,
                                   const MCSubtargetInfo &STI) const;
-  unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo,
-                              SmallVectorImpl<MCFixup> &Fixups,
-                              const MCSubtargetInfo &STI) const;
   unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo,
                               SmallVectorImpl<MCFixup> &Fixups,
                               const MCSubtargetInfo &STI) const;
 
-  // getLSAImmEncoding - Return binary encoding of LSA immediate.
-  unsigned getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
-                             SmallVectorImpl<MCFixup> &Fixups,
-                             const MCSubtargetInfo &STI) const;
+  /// Subtract Offset then encode as a N-bit unsigned integer.
+  template <unsigned Bits, int Offset>
+  unsigned getUImmWithOffsetEncoding(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const;
 
   unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
                                  SmallVectorImpl<MCFixup> &Fixups,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index fd2ed17ee785..e889972c5c0e 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -51,8 +51,8 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCSection *findAssociatedSection() const override {
-    return getSubExpr()->findAssociatedSection();
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
   }
 
   // There are no TLS MipsMCExprs at the moment.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index e4da2df75d47..e5fa7556053f 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -89,9 +89,15 @@ void MipsTargetStreamer::emitDirectiveSetHardFloat() {
 void MipsTargetStreamer::emitDirectiveSetDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveSetNoDsp() { forbidModuleDirective(); }
 void MipsTargetStreamer::emitDirectiveCpLoad(unsigned RegNo) {}
+void MipsTargetStreamer::emitDirectiveCpRestore(
+    SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+  forbidModuleDirective();
+}
 void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                                               const MCSymbol &Sym, bool IsReg) {
 }
+void MipsTargetStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+                                               bool SaveLocationIsRegister) {}
 
 void MipsTargetStreamer::emitDirectiveModuleFP() {}
 
@@ -358,6 +364,12 @@ void MipsTargetAsmStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   forbidModuleDirective();
 }
 
+void MipsTargetAsmStreamer::emitDirectiveCpRestore(
+    SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+  MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+  OS << "\t.cprestore\t" << Offset << "\n";
+}
+
 void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
                                                  int RegOrOffset,
                                                  const MCSymbol &Sym,
@@ -373,7 +385,13 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
 
   OS << ", ";
 
-  OS << Sym.getName() << "\n";
+  OS << Sym.getName();
+  forbidModuleDirective();
+}
+
+void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+                                                  bool SaveLocationIsRegister) {
+  OS << "\t.cpreturn";
   forbidModuleDirective();
 }
 
@@ -595,8 +613,9 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
   MCSectionELF *Sec = Context.getELFSection(".pdr", ELF::SHT_PROGBITS,
                                             ELF::SHF_ALLOC | ELF::SHT_REL);
 
+  MCSymbol *Sym = Context.getOrCreateSymbol(Name);
   const MCSymbolRefExpr *ExprRef =
-      MCSymbolRefExpr::create(Name, MCSymbolRefExpr::VK_None, Context);
+      MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Context);
 
   MCA.registerSection(*Sec);
   Sec->setAlignment(4);
@@ -622,10 +641,25 @@ void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) {
   GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
 
   OS.PopSection();
+
+  // .end also implicitly sets the size.
+  MCSymbol *CurPCSym = Context.createTempSymbol();
+  OS.EmitLabel(CurPCSym);
+  const MCExpr *Size = MCBinaryExpr::createSub(
+      MCSymbolRefExpr::create(CurPCSym, MCSymbolRefExpr::VK_None, Context),
+      ExprRef, Context);
+  int64_t AbsSize;
+  if (!Size->evaluateAsAbsolute(AbsSize, MCA))
+    llvm_unreachable("Function size must be evaluatable as absolute");
+  Size = MCConstantExpr::create(AbsSize, Context);
+  static_cast<MCSymbolELF *>(Sym)->setSize(Size);
 }
 
 void MipsTargetELFStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
   GPRInfoSet = FPRInfoSet = FrameInfoSet = false;
+
+  // .ent also acts like an implicit '.type symbol, STT_FUNC'
+  static_cast<const MCSymbolELF &>(Symbol).setType(ELF::STT_FUNC);
 }
 
 void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
@@ -752,6 +786,24 @@ void MipsTargetELFStreamer::emitDirectiveCpLoad(unsigned RegNo) {
   forbidModuleDirective();
 }
 
+void MipsTargetELFStreamer::emitDirectiveCpRestore(
+    SmallVector<MCInst, 3> &StoreInsts, int Offset) {
+  MipsTargetStreamer::emitDirectiveCpRestore(StoreInsts, Offset);
+  // .cprestore offset
+  // When PIC mode is enabled and the O32 ABI is used, this directive expands
+  // to:
+  //    sw $gp, offset($sp)
+  // and adds a corresponding LW after every JAL.
+
+  // Note that .cprestore is ignored if used with the N32 and N64 ABIs or if it
+  // is used in non-PIC mode.
+  if (!Pic || (getABI().IsN32() || getABI().IsN64()))
+    return;
+
+  for (const MCInst &Inst : StoreInsts)
+    getStreamer().EmitInstruction(Inst, STI);
+}
+
 void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
                                                  int RegOrOffset,
                                                  const MCSymbol &Sym,
@@ -766,7 +818,7 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
   // Either store the old $gp in a register or on the stack
   if (IsReg) {
     // move $save, $gpreg
-    Inst.setOpcode(Mips::DADDu);
+    Inst.setOpcode(Mips::OR64);
     Inst.addOperand(MCOperand::createReg(RegOrOffset));
     Inst.addOperand(MCOperand::createReg(Mips::GP));
     Inst.addOperand(MCOperand::createReg(Mips::ZERO));
@@ -810,6 +862,30 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
   forbidModuleDirective();
 }
 
+void MipsTargetELFStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
+                                                  bool SaveLocationIsRegister) {
+  // Only N32 and N64 emit anything for .cpreturn iff PIC is set.
+  if (!Pic || !(getABI().IsN32() || getABI().IsN64()))
+    return;
+
+  MCInst Inst;
+  // Either restore the old $gp from a register or on the stack
+  if (SaveLocationIsRegister) {
+    Inst.setOpcode(Mips::OR);
+    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(SaveLocation));
+    Inst.addOperand(MCOperand::createReg(Mips::ZERO));
+  } else {
+    Inst.setOpcode(Mips::LD);
+    Inst.addOperand(MCOperand::createReg(Mips::GP));
+    Inst.addOperand(MCOperand::createReg(Mips::SP));
+    Inst.addOperand(MCOperand::createImm(SaveLocation));
+  }
+  getStreamer().EmitInstruction(Inst, STI);
+
+  forbidModuleDirective();
+}
+
 void MipsTargetELFStreamer::emitMipsAbiFlags() {
   MCAssembler &MCA = getStreamer().getAssembler();
   MCContext &Context = MCA.getContext();
diff --git a/lib/Target/Mips/MicroMips32r6InstrFormats.td b/lib/Target/Mips/MicroMips32r6InstrFormats.td
index 187a022b2563..400f6eef3fb0 100644
--- a/lib/Target/Mips/MicroMips32r6InstrFormats.td
+++ b/lib/Target/Mips/MicroMips32r6InstrFormats.td
@@ -16,6 +16,64 @@ class MMR6Arch<string opstr> {
   string BaseOpcode = opstr;
 }
 
+// Class used for microMIPS32r6 and microMIPS64r6 instructions.
+class MicroMipsR6Inst16 : PredicateControl {
+  string DecoderNamespace = "MicroMipsR6";
+  let InsnPredicates = [HasMicroMips32r6];
+}
+
+class BC16_FM_MM16R6 {
+  bits<10> offset;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x33;
+  let Inst{9-0}   = offset;
+}
+
+class BEQZC_BNEZC_FM_MM16R6<bits<6> op> : MicroMipsR6Inst16 {
+  bits<3> rs;
+  bits<7> offset;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = op;
+  let Inst{9-7}   = rs;
+  let Inst{6-0}   = offset;
+}
+
+class POOL16C_JALRC_FM_MM16R6<bits<5> op> {
+  bits<5> rs;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x11;
+  let Inst{9-5}   = rs;
+  let Inst{4-0}   = op;
+}
+
+class POOL16C_JRCADDIUSP_FM_MM16R6<bits<5> op> {
+  bits<5> imm;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x11;
+  let Inst{9-5}   = imm;
+  let Inst{4-0}   = op;
+}
+
+class POOL16C_LWM_SWM_FM_MM16R6<bits<4> funct> {
+  bits<2> rt;
+  bits<4> addr;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x11;
+  let Inst{9-8}   = rt;
+  let Inst{7-4}   = addr;
+  let Inst{3-0}   = funct;
+}
+
 class POOL32A_BITSWAP_FM_MMR6<bits<6> funct> : MipsR6Inst {
   bits<5> rd;
   bits<5> rt;
@@ -71,6 +129,64 @@ class ADDI_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
   let Inst{15-0}  = imm16;
 }
 
+class POOL32C_ST_EVA_FM_MMR6<bits<6> op, bits<3> funct> : MipsR6Inst {
+  bits<21> addr;
+  bits<5> hint;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = hint;
+  let Inst{20-16} = base;
+  let Inst{15-12} = 0b1010;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = offset;
+}
+
+class LB32_FM_MMR6 : MipsR6Inst {
+  bits<21> addr;
+  bits<5> rt;
+  bits<5> base = addr{20-16};
+  bits<16> offset = addr{15-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000111;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-0}  = offset;
+}
+
+class LBU32_FM_MMR6 : MipsR6Inst {
+  bits<21> addr;
+  bits<5> rt;
+  bits<5> base = addr{20-16};
+  bits<16> offset = addr{15-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000101;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-0}  = offset;
+}
+
+class POOL32C_LB_LBU_FM_MMR6<bits<3> funct> : MipsR6Inst {
+  bits<21> addr;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-12} = 0b0110;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = addr{8-0};
+}
+
 class SIGN_EXTEND_FM_MMR6<string instr_asm, bits<10> funct>
     : MMR6Arch<instr_asm> {
   bits<5> rd;
@@ -124,6 +240,69 @@ class POOL32A_FM_MMR6<bits<10> funct> : MipsR6Inst {
   let Inst{9-0}   = funct;
 }
 
+class POOL32A_PAUSE_FM_MMR6<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = 0;
+  let Inst{15-11} = op;
+  let Inst{10-6} = 0;
+  let Inst{5-0} = 0;
+}
+
+class POOL32A_RDPGPR_FM_MMR6<bits<10> funct> {
+  bits<5> rt;
+  bits<5> rd;
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rd;
+  let Inst{15-6} = funct;
+  let Inst{5-0} = 0b111100;
+}
+
+class POOL32A_RDHWR_FM_MMR6 {
+  bits<5> rt;
+  bits<5> rs;
+  bits<3> sel;
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-14} = 0;
+  let Inst{13-11} = sel;
+  let Inst{10} = 0;
+  let Inst{9-0} = 0b0111000000;
+}
+
+class POOL32A_SYNC_FM_MMR6 {
+  bits<5> stype;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = stype;
+  let Inst{15-6}  = 0b0110101101;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32I_SYNCI_FM_MMR6 {
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<16> immediate = addr{15-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010000;
+  let Inst{25-21} = 0b01100;
+  let Inst{20-16} = base;
+  let Inst{15-0}  = immediate;
+}
+
 class POOL32A_2R_FM_MMR6<bits<10> funct> : MipsR6Inst {
   bits<5> rs;
   bits<5> rt;
@@ -198,6 +377,78 @@ class POOL32A_LSA_FM<bits<6> funct> : MipsR6Inst {
   let Inst{5-0}   = funct;
 }
 
+class SB32_SH32_STORE_FM_MMR6<bits<6> op> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<16> offset = addr{15-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-0}  = offset;
+}
+
+class POOL32C_STORE_EVA_FM_MMR6<bits<3> funct> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = 0b1010;
+  let Inst{11-9}  = funct;
+  let Inst{8-0}   = offset;
+}
+
+class LOAD_WORD_EVA_FM_MMR6<bits<3> funct> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = 0b0110;
+  let Inst{11-9}  = funct;
+  let Inst{8-0}   = offset;
+}
+
+class LOAD_WORD_FM_MMR6 {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<16> offset = addr{15-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b111111;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-0}  = offset;
+}
+
+class LOAD_UPPER_IMM_FM_MMR6 {
+  bits<5> rt;
+  bits<16> imm16;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000100;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = 0;
+  let Inst{15-0}  = imm16;
+}
+
 class CMP_BRANCH_1R_RT_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
   bits<5> rt;
   bits<16> offset;
@@ -222,12 +473,13 @@ class CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<bits<6> funct> : MipsR6Inst {
   let Inst{15-0}  = offset;
 }
 
-class ERET_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm> {
+class POOL32A_ERET_FM_MMR6<string instr_asm, bits<10> funct>
+    : MMR6Arch<instr_asm> {
   bits<32> Inst;
 
   let Inst{31-26} = 0x00;
   let Inst{25-16} = 0x00;
-  let Inst{15-6}  = 0x3cd;
+  let Inst{15-6}  = funct;
   let Inst{5-0}   = 0x3c;
 }
 
@@ -262,7 +514,8 @@ class BARRIER_MMR6_ENC<string instr_asm, bits<5> op> : MMR6Arch<instr_asm> {
   let Inst{5-0}   = 0x0;
 }
 
-class EIDI_MMR6_ENC<string instr_asm, bits<10> funct> : MMR6Arch<instr_asm> {
+class POOL32A_EIDI_MMR6_ENC<string instr_asm, bits<10> funct>
+    : MMR6Arch<instr_asm> {
   bits<32> Inst;
   bits<5> rt; // Actually rs but we're sharing code with the standard encodings which call it rt
 
@@ -287,3 +540,323 @@ class SHIFT_MMR6_ENC<string instr_asm, bits<10> funct, bit rotate> : MMR6Arch<in
   let Inst{10}    = rotate;
   let Inst{9-0}   = funct;
 }
+
+class SW32_FM_MMR6<string instr_asm, bits<6> op> : MMR6Arch<instr_asm> {
+  bits<5> rt;
+  bits<21> addr;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = addr{20-16};
+  let Inst{15-0}  = addr{15-0};
+}
+
+class POOL32C_SWE_FM_MMR6<string instr_asm, bits<6> op, bits<4> fmt,
+    bits<3> funct> : MMR6Arch<instr_asm> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = fmt;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = offset;
+}
+
+class POOL32F_ARITH_FM_MMR6<string instr_asm, bits<2> fmt, bits<8> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15-11} = fd;
+  let Inst{10}    = 0;
+  let Inst{9-8}   = fmt;
+  let Inst{7-0}   = funct;
+}
+
+class POOL32F_ARITHF_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15-11} = fd;
+  let Inst{10-9}  = fmt;
+  let Inst{8-0}   = funct;
+}
+
+class POOL32F_MOV_NEG_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15}    = 0;
+  let Inst{14-13} = fmt;
+  let Inst{12-6}  = funct;
+  let Inst{5-0}   = 0b111011;
+}
+
+class POOL32F_MINMAX_FM<string instr_asm, bits<2> fmt, bits<9> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15-11} = fd;
+  let Inst{10-9} = fmt;
+  let Inst{8-0} = funct;
+}
+
+class POOL32F_CMP_FM<string instr_asm, bits<6> format, FIELD_CMP_COND Cond>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15-11} = fd;
+  let Inst{10-6} = Cond.Value;
+  let Inst{5-0} = format;
+}
+
+class POOL32F_CVT_LW_FM<string instr_asm, bit fmt, bits<8> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15} = 0;
+  let Inst{14} = fmt;
+  let Inst{13-6} = funct;
+  let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_CVT_DS_FM<string instr_asm, bits<2> fmt, bits<7> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15} = 0;
+  let Inst{14-13} = fmt;
+  let Inst{12-6} = funct;
+  let Inst{5-0} = 0b111011;
+}
+
+class POOL32F_ABS_FM_MMR6<string instr_asm, bits<2> fmt, bits<7> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15}    = 0;
+  let Inst{14-13} = fmt;
+  let Inst{12-6}  = funct;
+  let Inst{5-0}   = 0b111011;
+}
+
+class POOL32F_MATH_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15}    = 0;
+  let Inst{14}    = fmt;
+  let Inst{13-6}  = funct;
+  let Inst{5-0}   = 0b111011;
+}
+
+class POOL16A_ADDU16_FM_MMR6 : MicroMipsR6Inst16 {
+  bits<3> rs;
+  bits<3> rt;
+  bits<3> rd;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0b000001;
+  let Inst{9-7}   = rs;
+  let Inst{6-4}   = rt;
+  let Inst{3-1}   = rd;
+  let Inst{0}     = 0;
+}
+
+class POOL16C_AND16_FM_MMR6 : MicroMipsR6Inst16 {
+  bits<3> rt;
+  bits<3> rs;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0b010001;
+  let Inst{9-7}   = rt;
+  let Inst{6-4}   = rs;
+  let Inst{3-0}   = 0b0001;
+}
+
+class POOL16C_NOT16_FM_MMR6 : MicroMipsR6Inst16 {
+  bits<3> rt;
+  bits<3> rs;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0x11;
+  let Inst{9-7}   = rt;
+  let Inst{6-4}   = rs;
+  let Inst{3-0}   = 0b0000;
+}
+
+class POOL16C_OR16_XOR16_FM_MMR6<bits<4> op> {
+  bits<3> rt;
+  bits<3> rs;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0b010001;
+  let Inst{9-7}   = rt;
+  let Inst{6-4}   = rs;
+  let Inst{3-0}   = op;
+}
+
+class POOL16C_BREAKPOINT_FM_MMR6<bits<6> op> {
+  bits<4> code_;
+  bits<16> Inst;
+
+  let Inst{15-10} = 0b010001;
+  let Inst{9-6}   = code_;
+  let Inst{5-0}   = op;
+}
+
+class POOL16A_SUBU16_FM_MMR6 {
+  bits<3> rs;
+  bits<3> rt;
+  bits<3> rd;
+
+  bits<16> Inst;
+
+  let Inst{15-10} = 0b000001;
+  let Inst{9-7}   = rs;
+  let Inst{6-4}   = rt;
+  let Inst{3-1}   = rd;
+  let Inst{0}     = 0b1;
+}
+
+class POOL32A_WRPGPR_WSBH_FM_MMR6<bits<10> funct> : MipsR6Inst {
+  bits<5> rt;
+  bits<5> rs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x00;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0x3c;
+}
+
+class POOL32F_RECIP_ROUND_FM_MMR6<string instr_asm, bits<1> fmt, bits<8> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15}    = 0;
+  let Inst{14}    = fmt;
+  let Inst{13-6}  = funct;
+  let Inst{5-0}   = 0b111011;
+}
+
+class POOL32F_RINT_FM_MMR6<string instr_asm, bits<2> fmt>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = fs;
+  let Inst{20-16} = fd;
+  let Inst{15-11} = 0;
+  let Inst{10-9}  = fmt;
+  let Inst{8-0}   = 0b000100000;
+}
+
+class POOL32F_SEL_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> ft;
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = ft;
+  let Inst{20-16} = fs;
+  let Inst{15-11} = fd;
+  let Inst{10-9}  = fmt;
+  let Inst{8-0}   = funct;
+}
+
+class POOL32F_CLASS_FM_MMR6<string instr_asm, bits<2> fmt, bits<9> funct>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  bits<5> fs;
+  bits<5> fd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010101;
+  let Inst{25-21} = fs;
+  let Inst{20-16} = fd;
+  let Inst{15-11} = 0b00000;
+  let Inst{10-9}  = fmt;
+  let Inst{8-0}   = funct;
+}
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 53bde1379e29..31b5db036daa 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -11,6 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+def brtarget26_mm : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTarget26OpValueMM";
+  let OperandType = "OPERAND_PCREL";
+  let DecoderMethod = "DecodeBranchTarget26MM";
+  let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Encodings
@@ -28,6 +35,9 @@ class ALIGN_MMR6_ENC : POOL32A_ALIGN_FM_MMR6<0b011111>;
 class AUI_MMR6_ENC : AUI_FM_MMR6;
 class BALC_MMR6_ENC  : BRANCH_OFF26_FM<0b101101>;
 class BC_MMR6_ENC : BRANCH_OFF26_FM<0b100101>;
+class BC16_MMR6_ENC : BC16_FM_MM16R6;
+class BEQZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x23>;
+class BNEZC16_MMR6_ENC : BEQZC_BNEZC_FM_MM16R6<0x2b>;
 class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
 class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
 class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<0b011101>;
@@ -42,13 +52,19 @@ class CLZ_MMR6_ENC : SPECIAL_2R_FM_MMR6<0b010000>;
 class DIV_MMR6_ENC : ARITH_FM_MMR6<"div", 0x118>;
 class DIVU_MMR6_ENC : ARITH_FM_MMR6<"divu", 0x198>;
 class EHB_MMR6_ENC : BARRIER_MMR6_ENC<"ehb", 0x3>;
-class EI_MMR6_ENC : EIDI_MMR6_ENC<"ei", 0x15d>;
-class ERET_MMR6_ENC : ERET_FM_MMR6<"eret">;
+class EI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"ei", 0x15d>;
+class DI_MMR6_ENC : POOL32A_EIDI_MMR6_ENC<"di", 0b0100011101>;
+class ERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0x3cd>;
+class DERET_MMR6_ENC : POOL32A_ERET_FM_MMR6<"eret", 0b1110001101>;
 class ERETNC_MMR6_ENC : ERETNC_FM_MMR6<"eretnc">;
+class JALRC16_MMR6_ENC : POOL16C_JALRC_FM_MM16R6<0xb>;
 class JIALC_MMR6_ENC : JMP_IDX_COMPACT_FM<0b100000>;
 class JIC_MMR6_ENC   : JMP_IDX_COMPACT_FM<0b101000>;
+class JRC16_MMR6_ENC: POOL16C_JALRC_FM_MM16R6<0x3>;
+class JRCADDIUSP_MMR6_ENC : POOL16C_JRCADDIUSP_FM_MM16R6<0x13>;
 class LSA_MMR6_ENC : POOL32A_LSA_FM<0b001111>;
 class LWPC_MMR6_ENC  : PCREL19_FM_MMR6<0b01>;
+class LWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0x2>;
 class MOD_MMR6_ENC : ARITH_FM_MMR6<"mod", 0x158>;
 class MODU_MMR6_ENC : ARITH_FM_MMR6<"modu", 0x1d8>;
 class MUL_MMR6_ENC : ARITH_FM_MMR6<"mul", 0x18>;
@@ -59,15 +75,99 @@ class NOR_MMR6_ENC : ARITH_FM_MMR6<"nor", 0x2d0>;
 class OR_MMR6_ENC : ARITH_FM_MMR6<"or", 0x290>;
 class ORI_MMR6_ENC : ADDI_FM_MMR6<"ori", 0x14>;
 class PREF_MMR6_ENC : CACHE_PREF_FM_MMR6<0b011000, 0b0010>;
+class SB16_MMR6_ENC : LOAD_STORE_FM_MM16<0x22>;
 class SEB_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seb", 0b0010101100>;
 class SEH_MMR6_ENC : SIGN_EXTEND_FM_MMR6<"seh", 0b0011101100>;
 class SELEQZ_MMR6_ENC : POOL32A_FM_MMR6<0b0101000000>;
 class SELNEZ_MMR6_ENC : POOL32A_FM_MMR6<0b0110000000>;
+class SH16_MMR6_ENC : LOAD_STORE_FM_MM16<0x2a>;
 class SLL_MMR6_ENC : SHIFT_MMR6_ENC<"sll", 0x00, 0b0>;
 class SUB_MMR6_ENC : ARITH_FM_MMR6<"sub", 0x190>;
 class SUBU_MMR6_ENC : ARITH_FM_MMR6<"subu", 0x1d0>;
+class SW_MMR6_ENC : SW32_FM_MMR6<"sw", 0x3e>;
+class SWE_MMR6_ENC : POOL32C_SWE_FM_MMR6<"swe", 0x18, 0xa, 0x7>;
+class SW16_MMR6_ENC : LOAD_STORE_FM_MM16<0x3a>;
+class SWM16_MMR6_ENC : POOL16C_LWM_SWM_FM_MM16R6<0xa>;
+class SWSP_MMR6_ENC : LOAD_STORE_SP_FM_MM16<0x32>;
+class PREFE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b010>;
+class CACHEE_MMR6_ENC : POOL32C_ST_EVA_FM_MMR6<0b011000, 0b011>;
+class WRPGPR_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x3c5>;
+class WSBH_MMR6_ENC : POOL32A_WRPGPR_WSBH_FM_MMR6<0x1ec>;
+class LB_MMR6_ENC : LB32_FM_MMR6;
+class LBU_MMR6_ENC : LBU32_FM_MMR6;
+class LBE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b100>;
+class LBUE_MMR6_ENC : POOL32C_LB_LBU_FM_MMR6<0b000>;
+class PAUSE_MMR6_ENC : POOL32A_PAUSE_FM_MMR6<"pause", 0b00101>;
+class RDHWR_MMR6_ENC : POOL32A_RDHWR_FM_MMR6;
+class WAIT_MMR6_ENC : WAIT_FM_MM, MMR6Arch<"wait">;
+class SSNOP_MMR6_ENC : BARRIER_FM_MM<0x1>, MMR6Arch<"ssnop">;
+class SYNC_MMR6_ENC : POOL32A_SYNC_FM_MMR6;
+class SYNCI_MMR6_ENC : POOL32I_SYNCI_FM_MMR6, MMR6Arch<"synci">;
+class RDPGPR_MMR6_ENC : POOL32A_RDPGPR_FM_MMR6<0b1110000101>;
+class SDBBP_MMR6_ENC : SDBBP_FM_MM, MMR6Arch<"sdbbp">;
 class XOR_MMR6_ENC : ARITH_FM_MMR6<"xor", 0x310>;
 class XORI_MMR6_ENC : ADDI_FM_MMR6<"xori", 0x1c>;
+class ABS_S_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.s", 0, 0b0001101>;
+class ABS_D_MMR6_ENC : POOL32F_ABS_FM_MMR6<"abs.d", 1, 0b0001101>;
+class FLOOR_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.s", 0, 0b00001100>;
+class FLOOR_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.l.d", 1, 0b00001100>;
+class FLOOR_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.s", 0, 0b00101100>;
+class FLOOR_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"floor.w.d", 1, 0b00101100>;
+class CEIL_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.s", 0, 0b01001100>;
+class CEIL_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.l.d", 1, 0b01001100>;
+class CEIL_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.s", 0, 0b01101100>;
+class CEIL_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"ceil.w.d", 1, 0b01101100>;
+class TRUNC_L_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.s", 0, 0b10001100>;
+class TRUNC_L_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.l.d", 1, 0b10001100>;
+class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>;
+class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>;
+class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>;
+class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>;
+class RSQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.s", 0, 0b00001000>;
+class RSQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.d", 1, 0b00001000>;
+class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>;
+class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>;
+class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>;
+class SH_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b001110>;
+class SHE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b101>;
+class LLE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b110>;
+class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>;
+class LW_MMR6_ENC : LOAD_WORD_FM_MMR6;
+class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6;
+class RECIP_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.s", 0, 0b01001000>;
+class RECIP_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.d", 1, 0b01001000>;
+class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>;
+class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>;
+class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0,
+                                                       0b11001100>;
+class ROUND_L_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.d", 1,
+                                                       0b11001100>;
+class ROUND_W_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.s", 0,
+                                                       0b11101100>;
+class ROUND_W_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.w.d", 1,
+                                                       0b11101100>;
+class SEL_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.s", 0, 0b010111000>;
+class SEL_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"sel.d", 1, 0b010111000>;
+class SELEQZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.s", 0, 0b000111000>;
+class SELEQZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"seleqz.d", 1, 0b000111000>;
+class SELENZ_S_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.s", 0, 0b001111000>;
+class SELENZ_D_MMR6_ENC : POOL32F_SEL_FM_MMR6<"selenz.d", 1, 0b001111000>;
+class CLASS_S_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.s", 0, 0b001100000>;
+class CLASS_D_MMR6_ENC : POOL32F_CLASS_FM_MMR6<"class.d", 1, 0b001100000>;
+
+class ADDU16_MMR6_ENC : POOL16A_ADDU16_FM_MMR6;
+class AND16_MMR6_ENC : POOL16C_AND16_FM_MMR6;
+class ANDI16_MMR6_ENC : ANDI_FM_MM16<0b001011>, MicroMipsR6Inst16;
+class NOT16_MMR6_ENC : POOL16C_NOT16_FM_MMR6;
+class OR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1001>;
+class SLL16_MMR6_ENC : SHIFT_FM_MM16<0>, MicroMipsR6Inst16;
+class SRL16_MMR6_ENC : SHIFT_FM_MM16<1>, MicroMipsR6Inst16;
+class BREAK16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b011011>;
+class LI16_MMR6_ENC : LI_FM_MM16;
+class MOVE16_MMR6_ENC : MOVE_FM_MM16<0b000011>;
+class SDBBP16_MMR6_ENC : POOL16C_BREAKPOINT_FM_MMR6<0b111011>;
+class SUBU16_MMR6_ENC : POOL16A_SUBU16_FM_MMR6;
+class XOR16_MMR6_ENC : POOL16C_OR16_XOR16_FM_MMR6<0b1000>;
 
 class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
                                   RegisterOperand GPROpnd>
@@ -108,6 +208,43 @@ class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm,
   list<Register> Defs = [RA];
 }
 
+/// Floating Point Instructions
+class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>;
+class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>;
+class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>;
+class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>;
+class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>;
+class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>;
+class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>;
+class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>;
+class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>;
+class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
+class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
+class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
+class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
+class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
+class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>;
+class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
+class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
+class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>;
+class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>;
+class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>;
+class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>;
+class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>;
+class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>;
+
+class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>;
+class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>;
+class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>;
+class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>;
+class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>;
+class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>;
+class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>;
+class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>;
+class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>;
+class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>;
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Descriptions
@@ -130,11 +267,34 @@ class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd>
   bit isBarrier = 1;
 }
 
-class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26> {
+class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm> {
   bit isCall = 1;
   list<Register> Defs = [RA];
 }
-class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26>;
+class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm>;
+
+class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
+                                       !strconcat("bc16", "\t$offset"), [],
+                                       II_BC, FrmI>,
+                       MMR6Arch<"bc16">, MicroMipsR6Inst16 {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let hasDelaySlot = 0;
+  let AdditionalPredicates = [RelocPIC];
+  let Defs = [AT];
+}
+
+class BEQZC_BNEZC_MM16R6_DESC_BASE<string instr_asm>
+    : CBranchZeroMM<instr_asm, brtarget7_mm, GPRMM16Opnd>, MMR6Arch<instr_asm> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 0;
+  let Defs = [AT];
+}
+class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">;
+class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">;
+
 class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>;
 class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>;
 
@@ -162,6 +322,35 @@ class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
 class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd>;
 class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>;
 
+class PREFE_CACHEE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+                                  RegisterOperand GPROpnd> :
+                                  CACHE_HINT_MMR6_DESC<instr_asm, MemOpnd,
+                                  GPROpnd> {
+  string DecoderMethod = "DecodePrefeOpMM";
+}
+
+class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, GPR32Opnd>;
+class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, GPR32Opnd>;
+
+class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+                            RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins MemOpnd:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  string DecoderMethod = "DecodeLoadByte15";
+  bit mayLoad = 1;
+}
+class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd>;
+class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd>;
+
+class LBE_LBUE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
+                              RegisterOperand GPROpnd>
+    : LB_LBU_MMR6_DESC_BASE<instr_asm, MemOpnd, GPROpnd> {
+  let DecoderMethod = "DecodeLoadByte9";
+}
+class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd>;
+class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd>;
+
 class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
     : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
@@ -174,10 +363,22 @@ class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
 
 class EHB_MMR6_DESC : Barrier<"ehb">;
 class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
+class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd>;
 
 class ERET_MMR6_DESC : ER_FT<"eret">;
+class DERET_MMR6_DESC : ER_FT<"deret">;
 class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
 
+class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+    : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+                      [(MipsJmpLink RO:$rs)], II_JALR, FrmR>,
+      MMR6Arch<opstr>, MicroMipsR6Inst16 {
+  let isCall = 1;
+  let hasDelaySlot = 0;
+  let Defs = [RA];
+}
+class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
+
 class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
                                      RegisterOperand GPROpnd>
     : MMR6Arch<opstr> {
@@ -200,6 +401,27 @@ class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16,
   list<Register> Defs = [AT];
 }
 
+class JRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
+    : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
+                      [], II_JR, FrmR>,
+      MMR6Arch<opstr>, MicroMipsR6Inst16 {
+  let hasDelaySlot = 0;
+  let isBranch = 1;
+  let isIndirectBranch = 1;
+}
+class JRC16_MMR6_DESC : JRC16_MMR6_DESC_BASE<"jrc16", GPR32Opnd>;
+
+class JRCADDIUSP_MMR6_DESC
+    : MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jrcaddiusp\t$imm",
+                      [], II_JRADDIUSP, FrmR>,
+      MMR6Arch<"jrcaddiusp">, MicroMipsR6Inst16 {
+  let hasDelaySlot = 0;
+  let isTerminator = 1;
+  let isBarrier = 1;
+  let isBranch = 1;
+  let isIndirectBranch = 1;
+}
+
 class ALIGN_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
                       Operand ImmOpnd>  : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
@@ -241,7 +463,7 @@ class LSA_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
   list<dag> Pattern = [];
 }
 
-class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
 
 class PCREL_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
                            Operand ImmOpnd> : MMR6Arch<instr_asm> {
@@ -264,6 +486,18 @@ class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
 
 class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
 class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
+class PAUSE_MMR6_DESC : Barrier<"pause">;
+class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel);
+  string AsmString = !strconcat("rdhwr", "\t$rt, $rs, $sel");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_RDHWR;
+  Format Form = FrmR;
+}
+
+class WAIT_MMR6_DESC : WaitMM<"wait">;
+class SSNOP_MMR6_DESC : Barrier<"ssnop">;
 class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
 class DIV_MMR6_DESC : ArithLogicR<"div", GPR32Opnd>;
 class DIVU_MMR6_DESC : ArithLogicR<"divu", GPR32Opnd>;
@@ -277,13 +511,426 @@ class ORI_MMR6_DESC : ArithLogicI<"ori", simm16, GPR32Opnd>;
 class XOR_MMR6_DESC : ArithLogicR<"xor", GPR32Opnd>;
 class XORI_MMR6_DESC : ArithLogicI<"xori", simm16, GPR32Opnd>;
 
+class SWE_MMR6_DESC_BASE<string opstr, DAGOperand RO, DAGOperand MO,
+                  SDPatternOperator OpNode = null_frag,
+                  InstrItinClass Itin = NoItinerary,
+                  ComplexPattern Addr = addr> :
+  InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
+         [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
+  let DecoderMethod = "DecodeMem";
+  let mayStore = 1;
+}
+class SW_MMR6_DESC : Store<"sw", GPR32Opnd>;
+class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9>;
+
+class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+    : MMR6Arch<instr_asm> {
+  dag InOperandList = (ins RO:$rs);
+  dag OutOperandList = (outs RO:$rt);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+  list<dag> Pattern = [];
+  Format f = FrmR;
+  string BaseOpcode = instr_asm;
+  bit hasSideEffects = 0;
+}
+class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd>;
+class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd>;
+
+/// Floating Point Instructions
+class FARITH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RC,
+                            InstrItinClass Itin, bit isComm,
+                            SDPatternOperator OpNode = null_frag> : HARDFLOAT {
+  dag OutOperandList = (outs RC:$fd);
+  dag InOperandList = (ins RC:$ft, RC:$fs);
+  string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+  list<dag> Pattern = [(set RC:$fd, (OpNode RC:$fs, RC:$ft))];
+  InstrItinClass Itinerary = Itin;
+  bit isCommutable = isComm;
+}
+class FADD_S_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"add.s", FGR32Opnd, II_ADD_S, 1, fadd>;
+class FADD_D_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"add.d", AFGR64Opnd, II_ADD_D, 1, fadd>;
+class FSUB_S_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>;
+class FSUB_D_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"sub.d", AFGR64Opnd, II_SUB_D, 0, fsub>;
+class FMUL_S_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"mul.s", FGR32Opnd, II_MUL_S, 1, fmul>;
+class FMUL_D_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"mul.d", AFGR64Opnd, II_MUL_D, 1, fmul>;
+class FDIV_S_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>;
+class FDIV_D_MMR6_DESC
+  : FARITH_MMR6_DESC_BASE<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>;
+class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>, HARDFLOAT;
+class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>, HARDFLOAT;
+class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>, HARDFLOAT;
+class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>, HARDFLOAT;
+
+class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+                               RegisterOperand SrcRC, InstrItinClass Itin,
+                               SDPatternOperator OpNode = null_frag>
+                               : HARDFLOAT, NeverHasSideEffects {
+  dag OutOperandList = (outs DstRC:$ft);
+  dag InOperandList = (ins SrcRC:$fs);
+  string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+  list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+  InstrItinClass Itinerary = Itin;
+  Format Form = FrmFR;
+}
+class FMOV_S_MMR6_DESC
+  : FMOV_FNEG_MMR6_DESC_BASE<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>;
+class FMOV_D_MMR6_DESC
+  : FMOV_FNEG_MMR6_DESC_BASE<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>;
+class FNEG_S_MMR6_DESC
+  : FMOV_FNEG_MMR6_DESC_BASE<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>;
+class FNEG_D_MMR6_DESC
+  : FMOV_FNEG_MMR6_DESC_BASE<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>;
+
+class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>, HARDFLOAT;
+class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>, HARDFLOAT;
+class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>, HARDFLOAT;
+class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>, HARDFLOAT;
+
+class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>, HARDFLOAT;
+class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>, HARDFLOAT;
+class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>, HARDFLOAT;
+class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>, HARDFLOAT;
+
+class CVT_MMR6_DESC_BASE<
+    string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC,
+    InstrItinClass Itin, SDPatternOperator OpNode = null_frag>
+    : HARDFLOAT, NeverHasSideEffects {
+  dag OutOperandList = (outs DstRC:$ft);
+  dag InOperandList = (ins SrcRC:$fs);
+  string AsmString = !strconcat(instr_asm, "\t$ft, $fs");
+  list<dag> Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+  InstrItinClass Itinerary = Itin;
+  Format Form = FrmFR;
+}
+
+class CVT_L_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.s", FGR64Opnd, FGR32Opnd,
+                                             II_CVT>;
+class CVT_L_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.l.d", FGR64Opnd, FGR64Opnd,
+                                             II_CVT>;
+class CVT_W_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.s", FGR32Opnd, FGR32Opnd,
+                                             II_CVT>;
+class CVT_W_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.w.d", FGR32Opnd, AFGR64Opnd,
+                                             II_CVT>;
+class CVT_D_S_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.s", FGR32Opnd, AFGR64Opnd,
+                                             II_CVT>;
+class CVT_D_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.w", FGR32Opnd, AFGR64Opnd,
+                                             II_CVT>;
+class CVT_D_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.d.l", FGR64Opnd, FGR64Opnd,
+                                             II_CVT>, FGR_64;
+class CVT_S_D_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.d", AFGR64Opnd, FGR32Opnd,
+                                             II_CVT>;
+class CVT_S_W_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.w", FGR32Opnd, FGR32Opnd,
+                                             II_CVT>;
+class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd,
+                                             II_CVT>, FGR_64;
+
+multiclass CMP_CC_MMR6<bits<6> format, string Typestr,
+                       RegisterOperand FGROpnd> {
+  def CMP_AF_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>,
+      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_UN_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>,
+      CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_EQ_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>,
+      CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_UEQ_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>,
+      CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_LT_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>,
+      CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_ULT_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>,
+      CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_LE_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>,
+      CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_ULE_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>,
+      CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SAF_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>,
+      CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SUN_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>,
+      CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SEQ_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>,
+      CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SUEQ_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>,
+      CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SLT_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>,
+      CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SULT_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>,
+      CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SLE_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>,
+      CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+  def CMP_SULE_#NAME : POOL32F_CMP_FM<
+      !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>,
+      CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, HARDFLOAT, R6MMR6Rel,
+      ISA_MICROMIPS32R6;
+}
+
+class ABSS_FT_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
+                             RegisterOperand SrcRC, InstrItinClass Itin,
+                             SDPatternOperator OpNode = null_frag>
+    : HARDFLOAT, NeverHasSideEffects {
+  dag OutOperandList = (outs DstRC:$ft);
+  dag InOperandList  = (ins SrcRC:$fs);
+  string AsmString   = !strconcat(instr_asm, "\t$ft, $fs");
+  list<dag>  Pattern = [(set DstRC:$ft, (OpNode SrcRC:$fs))];
+  InstrItinClass Itinerary = Itin;
+  Format Form = FrmFR;
+  list<Predicate> EncodingPredicates = [HasStdEnc];
+}
+
+class ABS_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.s", FGR32Opnd, FGR32Opnd,
+                                                II_ABS, fabs>;
+class ABS_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"abs.d", AFGR64Opnd, AFGR64Opnd,
+                                                II_ABS, fabs>;
+class FLOOR_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.s", FGR64Opnd,
+                                                    FGR32Opnd, II_FLOOR>;
+class FLOOR_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.l.d", FGR64Opnd,
+                                                    FGR64Opnd, II_FLOOR>;
+class FLOOR_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.s", FGR32Opnd,
+                                                    FGR32Opnd, II_FLOOR>;
+class FLOOR_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"floor.w.d", FGR32Opnd,
+                                                    AFGR64Opnd, II_FLOOR>;
+class CEIL_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.s", FGR64Opnd,
+                                                   FGR32Opnd, II_CEIL>;
+class CEIL_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.l.d", FGR64Opnd,
+                                                   FGR64Opnd, II_CEIL>;
+class CEIL_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.s", FGR32Opnd,
+                                                   FGR32Opnd, II_CEIL>;
+class CEIL_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"ceil.w.d", FGR32Opnd,
+                                                   AFGR64Opnd, II_CEIL>;
+class TRUNC_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.s", FGR64Opnd,
+                                                    FGR32Opnd, II_TRUNC>;
+class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
+                                                    FGR64Opnd, II_TRUNC>;
+class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
+                                                    FGR32Opnd, II_TRUNC>;
+class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
+                                                    AFGR64Opnd, II_TRUNC>;
+class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
+                                                 II_SQRT_S, fsqrt>;
+class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
+                                                 II_SQRT_D, fsqrt>;
+class RSQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.s", FGR32Opnd,
+                                                  FGR32Opnd, II_TRUNC>;
+class RSQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.d", FGR32Opnd,
+                                                  AFGR64Opnd, II_TRUNC>;
+class RECIP_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.s", FGR32Opnd,
+                                                 FGR32Opnd, II_ROUND>;
+class RECIP_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.d", FGR32Opnd, FGR32Opnd,
+                                                 II_ROUND>;
+class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd,
+                                                   FGR32Opnd, II_ROUND>;
+class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd,
+                                                   FGR64Opnd, II_ROUND>;
+class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd,
+                                                   FGR32Opnd, II_ROUND>;
+class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd,
+                                                   FGR64Opnd, II_ROUND>;
+
+class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
+class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+  // We must insert a SUBREG_TO_REG around $fd_in
+  bit usesCustomInserter = 1;
+}
+
+class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>;
+class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>;
+class SELENZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>;
+class SELENZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>;
+class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
+class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
+class CLASS_S_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
+class CLASS_D_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+
+class STORE_MMR6_DESC_BASE<string opstr, DAGOperand RO>
+    : Store<opstr, RO>, MMR6Arch<opstr> {
+  let DecoderMethod = "DecodeMemMMImm16";
+}
+class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd>;
+
+class STORE_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins RO:$rt, mem_mm_9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  string DecoderMethod = "DecodeStoreEvaOpMM";
+  bit mayStore = 1;
+}
+class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd>;
+class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd>;
+class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd>;
+class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd>;
+class LOAD_WORD_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO> :
+            MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs RO:$rt);
+  dag InOperandList = (ins mem_mm_12:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  string DecoderMethod = "DecodeMemMMImm9";
+  bit mayLoad = 1;
+}
+class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd>;
+class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd>;
+class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
+      MMR6Arch<"addu16">;
+class AND16_MMR6_DESC : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
+      MMR6Arch<"and16">;
+class ANDI16_MMR6_DESC : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>,
+      MMR6Arch<"andi16">;
+class NOT16_MMR6_DESC : NotMM16<"not16", GPRMM16Opnd>, MMR6Arch<"not16">;
+class OR16_MMR6_DESC : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
+      MMR6Arch<"or16">;
+class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
+      MMR6Arch<"sll16">;
+class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
+      MMR6Arch<"srl16">;
+class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16">, MMR6Arch<"srl16">,
+      MicroMipsR6Inst16;
+class LI16_MMR6_DESC : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>,
+      MMR6Arch<"srl16">, MicroMipsR6Inst16, IsAsCheapAsAMove;
+class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"srl16">,
+      MicroMipsR6Inst16;
+class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16">, MMR6Arch<"sdbbp16">,
+      MicroMipsR6Inst16;
+class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+      MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+class XOR16_MMR6_DESC : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+      MMR6Arch<"sdbbp16">, MicroMipsR6Inst16;
+
+class LW_MMR6_DESC : MMR6Arch<"lw">, MipsR6Inst {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins mem:$addr);
+  string AsmString = "lw\t$rt, $addr";
+  let DecoderMethod = "DecodeMemMMImm16";
+  let canFoldAsLoad = 1;
+  let mayLoad = 1;
+  list<dag> Pattern = [(set GPR32Opnd:$rt, (load addrDefault:$addr))];
+  InstrItinClass Itinerary = II_LW;
+}
+
+class LUI_MMR6_DESC : IsAsCheapAsAMove, MMR6Arch<"lui">, MipsR6Inst{
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins uimm16:$imm16);
+  string AsmString = "lui\t$rt, $imm16";
+  list<dag> Pattern = [];
+  bit hasSideEffects = 0;
+  bit isReMaterializable = 1;
+  InstrItinClass Itinerary = II_LUI;
+  Format Form = FrmI;
+}
+
+class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins i32imm:$stype);
+  string AsmString = !strconcat("sync", "\t$stype");
+  list<dag> Pattern = [(MipsSync imm:$stype)];
+  InstrItinClass Itinerary = NoItinerary;
+  bit HasSideEffects = 1;
+}
+
+class SYNCI_MMR6_DESC : SYNCI_FT<"synci"> {
+  let DecoderMethod = "DecodeSynciR6";
+}
+
+class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins GPR32Opnd:$rd);
+  string AsmString = !strconcat("rdpgpr", "\t$rt, $rd");
+}
+
+class SDBBP_MMR6_DESC : MipsR6Inst {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins uimm20:$code_);
+  string AsmString = !strconcat("sdbbp", "\t$code_");
+  list<dag> Pattern = [];
+}
+
+class LWM16_MMR6_DESC
+    : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
+                      !strconcat("lwm16", "\t$rt, $addr"), [],
+                      NoItinerary, FrmI>,
+      MMR6Arch<"lwm16">, MicroMipsR6Inst16 {
+  let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+  let mayLoad = 1;
+  InstrItinClass Itin = NoItinerary;
+  ComplexPattern Addr = addr;
+}
+
+class SWM16_MMR6_DESC
+    : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
+                      !strconcat("swm16", "\t$rt, $addr"), [],
+                      NoItinerary, FrmI>,
+      MMR6Arch<"swm16">, MicroMipsR6Inst16 {
+  let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
+  let mayStore = 1;
+  InstrItinClass Itin = NoItinerary;
+  ComplexPattern Addr = addr;
+}
+
+class SB16_MMR6_DESC_BASE<string opstr, DAGOperand RTOpnd, DAGOperand RO,
+                          SDPatternOperator OpNode, InstrItinClass Itin,
+                          Operand MemOpnd>
+    : MicroMipsInst16<(outs), (ins RTOpnd:$rt, MemOpnd:$addr),
+                      !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI>,
+      MMR6Arch<opstr>, MicroMipsR6Inst16 {
+  let DecoderMethod = "DecodeMemMMImm4";
+  let mayStore = 1;
+}
+class SB16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sb16", GPRMM16OpndZero, GPRMM16Opnd,
+                                           truncstorei8, II_SB, mem_mm_4>;
+class SH16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sh16", GPRMM16OpndZero, GPRMM16Opnd,
+                                           truncstorei16, II_SH, mem_mm_4_lsl1>;
+class SW16_MMR6_DESC : SB16_MMR6_DESC_BASE<"sw16", GPRMM16OpndZero, GPRMM16Opnd,
+                                           store, II_SW, mem_mm_4_lsl2>;
+
+class SWSP_MMR6_DESC
+    : MicroMipsInst16<(outs), (ins GPR32Opnd:$rt, mem_mm_sp_imm5_lsl2:$offset),
+                      !strconcat("sw", "\t$rt, $offset"), [], II_SW, FrmI>,
+      MMR6Arch<"sw">, MicroMipsR6Inst16 {
+  let DecoderMethod = "DecodeMemMMSPImm5Lsl2";
+  let mayStore = 1;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Definitions
 //
 //===----------------------------------------------------------------------===//
 
-let DecoderNamespace = "MicroMips32r6" in {
+let DecoderNamespace = "MicroMipsR6" in {
 def ADD_MMR6 : StdMMR6Rel, ADD_MMR6_DESC, ADD_MMR6_ENC, ISA_MICROMIPS32R6;
 def ADDIU_MMR6 : StdMMR6Rel, ADDIU_MMR6_DESC, ADDIU_MMR6_ENC, ISA_MICROMIPS32R6;
 def ADDU_MMR6 : StdMMR6Rel, ADDU_MMR6_DESC, ADDU_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -298,6 +945,11 @@ def ALIGN_MMR6 : R6MMR6Rel, ALIGN_MMR6_ENC, ALIGN_MMR6_DESC, ISA_MICROMIPS32R6;
 def AUI_MMR6 : R6MMR6Rel, AUI_MMR6_ENC, AUI_MMR6_DESC, ISA_MICROMIPS32R6;
 def BALC_MMR6 : R6MMR6Rel, BALC_MMR6_ENC, BALC_MMR6_DESC, ISA_MICROMIPS32R6;
 def BC_MMR6 : R6MMR6Rel, BC_MMR6_ENC, BC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BC16_MMR6 : StdMMR6Rel, BC16_MMR6_DESC, BC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def BEQZC16_MMR6 : StdMMR6Rel, BEQZC16_MMR6_DESC, BEQZC16_MMR6_ENC,
+                   ISA_MICROMIPS32R6;
+def BNEZC16_MMR6 : StdMMR6Rel, BNEZC16_MMR6_DESC, BNEZC16_MMR6_ENC,
+                   ISA_MICROMIPS32R6;
 def BITSWAP_MMR6 : R6MMR6Rel, BITSWAP_MMR6_ENC, BITSWAP_MMR6_DESC,
                    ISA_MICROMIPS32R6;
 def BEQZALC_MMR6 : R6MMR6Rel, BEQZALC_MMR6_ENC, BEQZALC_MMR6_DESC,
@@ -320,13 +972,21 @@ def DIV_MMR6 : R6MMR6Rel, DIV_MMR6_DESC, DIV_MMR6_ENC, ISA_MICROMIPS32R6;
 def DIVU_MMR6 : R6MMR6Rel, DIVU_MMR6_DESC, DIVU_MMR6_ENC, ISA_MICROMIPS32R6;
 def EHB_MMR6 : StdMMR6Rel, EHB_MMR6_DESC, EHB_MMR6_ENC, ISA_MICROMIPS32R6;
 def EI_MMR6 : StdMMR6Rel, EI_MMR6_DESC, EI_MMR6_ENC, ISA_MICROMIPS32R6;
-def ERET_MMR6 : R6MMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DI_MMR6 : StdMMR6Rel, DI_MMR6_DESC, DI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ERET_MMR6 : StdMMR6Rel, ERET_MMR6_DESC, ERET_MMR6_ENC, ISA_MICROMIPS32R6;
+def DERET_MMR6 : StdMMR6Rel, DERET_MMR6_DESC, DERET_MMR6_ENC, ISA_MICROMIPS32R6;
 def ERETNC_MMR6 : R6MMR6Rel, ERETNC_MMR6_DESC, ERETNC_MMR6_ENC,
                   ISA_MICROMIPS32R6;
+def JALRC16_MMR6 : R6MMR6Rel, JALRC16_MMR6_DESC, JALRC16_MMR6_ENC,
+                   ISA_MICROMIPS32R6;
 def JIALC_MMR6 : R6MMR6Rel, JIALC_MMR6_ENC, JIALC_MMR6_DESC, ISA_MICROMIPS32R6;
 def JIC_MMR6 : R6MMR6Rel, JIC_MMR6_ENC, JIC_MMR6_DESC, ISA_MICROMIPS32R6;
+def JRC16_MMR6 : R6MMR6Rel, JRC16_MMR6_DESC, JRC16_MMR6_ENC, ISA_MICROMIPS32R6;
+def JRCADDIUSP_MMR6 : R6MMR6Rel, JRCADDIUSP_MMR6_DESC, JRCADDIUSP_MMR6_ENC,
+                      ISA_MICROMIPS32R6;
 def LSA_MMR6 : R6MMR6Rel, LSA_MMR6_ENC, LSA_MMR6_DESC, ISA_MICROMIPS32R6;
 def LWPC_MMR6 : R6MMR6Rel, LWPC_MMR6_ENC, LWPC_MMR6_DESC, ISA_MICROMIPS32R6;
+def LWM16_MMR6 : StdMMR6Rel, LWM16_MMR6_DESC, LWM16_MMR6_ENC, ISA_MICROMIPS32R6;
 def MOD_MMR6 : R6MMR6Rel, MOD_MMR6_DESC, MOD_MMR6_ENC, ISA_MICROMIPS32R6;
 def MODU_MMR6 : R6MMR6Rel, MODU_MMR6_DESC, MODU_MMR6_ENC, ISA_MICROMIPS32R6;
 def MUL_MMR6 : R6MMR6Rel, MUL_MMR6_DESC, MUL_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -337,17 +997,211 @@ def NOR_MMR6 : StdMMR6Rel, NOR_MMR6_DESC, NOR_MMR6_ENC, ISA_MICROMIPS32R6;
 def OR_MMR6 : StdMMR6Rel, OR_MMR6_DESC, OR_MMR6_ENC, ISA_MICROMIPS32R6;
 def ORI_MMR6 : StdMMR6Rel, ORI_MMR6_DESC, ORI_MMR6_ENC, ISA_MICROMIPS32R6;
 def PREF_MMR6 : R6MMR6Rel, PREF_MMR6_ENC, PREF_MMR6_DESC, ISA_MICROMIPS32R6;
+def SB16_MMR6 : StdMMR6Rel, SB16_MMR6_DESC, SB16_MMR6_ENC, ISA_MICROMIPS32R6;
 def SEB_MMR6 : StdMMR6Rel, SEB_MMR6_DESC, SEB_MMR6_ENC, ISA_MICROMIPS32R6;
 def SEH_MMR6 : StdMMR6Rel, SEH_MMR6_DESC, SEH_MMR6_ENC, ISA_MICROMIPS32R6;
 def SELEQZ_MMR6 : R6MMR6Rel, SELEQZ_MMR6_ENC, SELEQZ_MMR6_DESC,
                   ISA_MICROMIPS32R6;
 def SELNEZ_MMR6 : R6MMR6Rel, SELNEZ_MMR6_ENC, SELNEZ_MMR6_DESC,
                   ISA_MICROMIPS32R6;
+def SH16_MMR6 : StdMMR6Rel, SH16_MMR6_DESC, SH16_MMR6_ENC, ISA_MICROMIPS32R6;
 def SLL_MMR6 : StdMMR6Rel, SLL_MMR6_DESC, SLL_MMR6_ENC, ISA_MICROMIPS32R6;
 def SUB_MMR6 : StdMMR6Rel, SUB_MMR6_DESC, SUB_MMR6_ENC, ISA_MICROMIPS32R6;
 def SUBU_MMR6 : StdMMR6Rel, SUBU_MMR6_DESC, SUBU_MMR6_ENC, ISA_MICROMIPS32R6;
+def SW16_MMR6 : StdMMR6Rel, SW16_MMR6_DESC, SW16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWM16_MMR6 : StdMMR6Rel, SWM16_MMR6_DESC, SWM16_MMR6_ENC, ISA_MICROMIPS32R6;
+def SWSP_MMR6 : StdMMR6Rel, SWSP_MMR6_DESC, SWSP_MMR6_ENC, ISA_MICROMIPS32R6;
+def PREFE_MMR6 : StdMMR6Rel, PREFE_MMR6_ENC, PREFE_MMR6_DESC, ISA_MICROMIPS32R6;
+def CACHEE_MMR6 : StdMMR6Rel, CACHEE_MMR6_ENC, CACHEE_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def WRPGPR_MMR6 : StdMMR6Rel, WRPGPR_MMR6_ENC, WRPGPR_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def WSBH_MMR6 : StdMMR6Rel, WSBH_MMR6_ENC, WSBH_MMR6_DESC, ISA_MICROMIPS32R6;
+def LB_MMR6 : R6MMR6Rel, LB_MMR6_ENC, LB_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBU_MMR6 : R6MMR6Rel, LBU_MMR6_ENC, LBU_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBE_MMR6 : R6MMR6Rel, LBE_MMR6_ENC, LBE_MMR6_DESC, ISA_MICROMIPS32R6;
+def LBUE_MMR6 : R6MMR6Rel, LBUE_MMR6_ENC, LBUE_MMR6_DESC, ISA_MICROMIPS32R6;
+def PAUSE_MMR6 : StdMMR6Rel, PAUSE_MMR6_DESC, PAUSE_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDHWR_MMR6 : R6MMR6Rel, RDHWR_MMR6_DESC, RDHWR_MMR6_ENC, ISA_MICROMIPS32R6;
+def WAIT_MMR6 : StdMMR6Rel, WAIT_MMR6_DESC, WAIT_MMR6_ENC, ISA_MICROMIPS32R6;
+def SSNOP_MMR6 : StdMMR6Rel, SSNOP_MMR6_DESC, SSNOP_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNC_MMR6 : StdMMR6Rel, SYNC_MMR6_DESC, SYNC_MMR6_ENC, ISA_MICROMIPS32R6;
+def SYNCI_MMR6 : StdMMR6Rel, SYNCI_MMR6_DESC, SYNCI_MMR6_ENC, ISA_MICROMIPS32R6;
+def RDPGPR_MMR6 : R6MMR6Rel, RDPGPR_MMR6_DESC, RDPGPR_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def SDBBP_MMR6 : R6MMR6Rel, SDBBP_MMR6_DESC, SDBBP_MMR6_ENC, ISA_MICROMIPS32R6;
 def XOR_MMR6 : StdMMR6Rel, XOR_MMR6_DESC, XOR_MMR6_ENC, ISA_MICROMIPS32R6;
 def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
+let DecoderMethod = "DecodeMemMMImm16" in {
+  def SW_MMR6 : StdMMR6Rel, SW_MMR6_DESC, SW_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+let DecoderMethod = "DecodeMemMMImm9" in {
+  def SWE_MMR6 : StdMMR6Rel, SWE_MMR6_DESC, SWE_MMR6_ENC, ISA_MICROMIPS32R6;
+}
+/// Floating Point Instructions
+def FADD_S_MMR6 : StdMMR6Rel, FADD_S_MMR6_ENC, FADD_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FADD_D_MMR6 : StdMMR6Rel, FADD_D_MMR6_ENC, FADD_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FSUB_S_MMR6 : StdMMR6Rel, FSUB_S_MMR6_ENC, FSUB_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FSUB_D_MMR6 : StdMMR6Rel, FSUB_D_MMR6_ENC, FSUB_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FMUL_S_MMR6 : StdMMR6Rel, FMUL_S_MMR6_ENC, FMUL_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FMUL_D_MMR6 : StdMMR6Rel, FMUL_D_MMR6_ENC, FMUL_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FDIV_S_MMR6 : StdMMR6Rel, FDIV_S_MMR6_ENC, FDIV_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FDIV_D_MMR6 : StdMMR6Rel, FDIV_D_MMR6_ENC, FDIV_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def MADDF_S_MMR6 : R6MMR6Rel, MADDF_S_MMR6_ENC, MADDF_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def MADDF_D_MMR6 : R6MMR6Rel, MADDF_D_MMR6_ENC, MADDF_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def MSUBF_S_MMR6 : R6MMR6Rel, MSUBF_S_MMR6_ENC, MSUBF_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def MSUBF_D_MMR6 : R6MMR6Rel, MSUBF_D_MMR6_ENC, MSUBF_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def FMOV_S_MMR6 : StdMMR6Rel, FMOV_S_MMR6_ENC, FMOV_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FMOV_D_MMR6 : StdMMR6Rel, FMOV_D_MMR6_ENC, FMOV_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FNEG_S_MMR6 : StdMMR6Rel, FNEG_S_MMR6_ENC, FNEG_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def FNEG_D_MMR6 : StdMMR6Rel, FNEG_D_MMR6_ENC, FNEG_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def MAX_S_MMR6 : R6MMR6Rel, MAX_S_MMR6_ENC, MAX_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAX_D_MMR6 : R6MMR6Rel, MAX_D_MMR6_ENC, MAX_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_S_MMR6 : R6MMR6Rel, MIN_S_MMR6_ENC, MIN_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def MIN_D_MMR6 : R6MMR6Rel, MIN_D_MMR6_ENC, MIN_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def MAXA_S_MMR6 : R6MMR6Rel, MAXA_S_MMR6_ENC, MAXA_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def MAXA_D_MMR6 : R6MMR6Rel, MAXA_D_MMR6_ENC, MAXA_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def MINA_S_MMR6 : R6MMR6Rel, MINA_S_MMR6_ENC, MINA_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def MINA_D_MMR6 : R6MMR6Rel, MINA_D_MMR6_ENC, MINA_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def CVT_L_S_MMR6 : StdMMR6Rel, CVT_L_S_MMR6_ENC, CVT_L_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_L_D_MMR6 : StdMMR6Rel, CVT_L_D_MMR6_ENC, CVT_L_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_W_S_MMR6 : StdMMR6Rel, CVT_W_S_MMR6_ENC, CVT_W_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_W_D_MMR6 : StdMMR6Rel, CVT_W_D_MMR6_ENC, CVT_W_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_D_S_MMR6 : StdMMR6Rel, CVT_D_S_MMR6_ENC, CVT_D_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_D_W_MMR6 : StdMMR6Rel, CVT_D_W_MMR6_ENC, CVT_D_W_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_D_L_MMR6 : StdMMR6Rel, CVT_D_L_MMR6_ENC, CVT_D_L_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_S_D_MMR6 : StdMMR6Rel, CVT_S_D_MMR6_ENC, CVT_S_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd>;
+defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd>;
+def ABS_S_MMR6 : StdMMR6Rel, ABS_S_MMR6_ENC, ABS_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def ABS_D_MMR6 : StdMMR6Rel, ABS_D_MMR6_ENC, ABS_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def FLOOR_L_D_MMR6 : StdMMR6Rel, FLOOR_L_D_MMR6_ENC, FLOOR_L_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def FLOOR_W_S_MMR6 : StdMMR6Rel, FLOOR_W_S_MMR6_ENC, FLOOR_W_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def FLOOR_W_D_MMR6 : StdMMR6Rel, FLOOR_W_D_MMR6_ENC, FLOOR_W_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def CEIL_L_S_MMR6 : StdMMR6Rel, CEIL_L_S_MMR6_ENC, CEIL_L_S_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def CEIL_L_D_MMR6 : StdMMR6Rel, CEIL_L_D_MMR6_ENC, CEIL_L_D_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def CEIL_W_S_MMR6 : StdMMR6Rel, CEIL_W_S_MMR6_ENC, CEIL_W_S_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def CEIL_W_D_MMR6 : StdMMR6Rel, CEIL_W_D_MMR6_ENC, CEIL_W_D_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def TRUNC_L_S_MMR6 : StdMMR6Rel, TRUNC_L_S_MMR6_ENC, TRUNC_L_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def TRUNC_L_D_MMR6 : StdMMR6Rel, TRUNC_L_D_MMR6_ENC, TRUNC_L_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def TRUNC_W_S_MMR6 : StdMMR6Rel, TRUNC_W_S_MMR6_ENC, TRUNC_W_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def TRUNC_W_D_MMR6 : StdMMR6Rel, TRUNC_W_D_MMR6_ENC, TRUNC_W_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def RSQRT_S_MMR6 : StdMMR6Rel, RSQRT_S_MMR6_ENC, RSQRT_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def RSQRT_D_MMR6 : StdMMR6Rel, RSQRT_D_MMR6_ENC, RSQRT_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6;
+def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6;
+def SH_MMR6 : StdMMR6Rel, SH_MMR6_DESC, SH_MMR6_ENC, ISA_MICROMIPS32R6;
+def SHE_MMR6 : StdMMR6Rel, SHE_MMR6_DESC, SHE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LLE_MMR6 : StdMMR6Rel, LLE_MMR6_DESC, LLE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LWE_MMR6 : StdMMR6Rel, LWE_MMR6_DESC, LWE_MMR6_ENC, ISA_MICROMIPS32R6;
+def LW_MMR6 : StdMMR6Rel, LW_MMR6_DESC, LW_MMR6_ENC, ISA_MICROMIPS32R6;
+def LUI_MMR6 : R6MMR6Rel, LUI_MMR6_DESC, LUI_MMR6_ENC, ISA_MICROMIPS32R6;
+def ADDU16_MMR6 : StdMMR6Rel, ADDU16_MMR6_DESC, ADDU16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def AND16_MMR6 : StdMMR6Rel, AND16_MMR6_DESC, AND16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def ANDI16_MMR6 : StdMMR6Rel, ANDI16_MMR6_DESC, ANDI16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def NOT16_MMR6 : StdMMR6Rel, NOT16_MMR6_DESC, NOT16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def OR16_MMR6 : StdMMR6Rel, OR16_MMR6_DESC, OR16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def SLL16_MMR6 : StdMMR6Rel, SLL16_MMR6_DESC, SLL16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def SRL16_MMR6 : StdMMR6Rel, SRL16_MMR6_DESC, SRL16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def BREAK16_MMR6 : StdMMR6Rel, BREAK16_MMR6_DESC, BREAK16_MMR6_ENC,
+                   ISA_MICROMIPS32R6;
+def LI16_MMR6 : StdMMR6Rel, LI16_MMR6_DESC, LI16_MMR6_ENC,
+                ISA_MICROMIPS32R6;
+def MOVE16_MMR6 : StdMMR6Rel, MOVE16_MMR6_DESC, MOVE16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def SDBBP16_MMR6 : StdMMR6Rel, SDBBP16_MMR6_DESC, SDBBP16_MMR6_ENC,
+                   ISA_MICROMIPS32R6;
+def SUBU16_MMR6 : StdMMR6Rel, SUBU16_MMR6_DESC, SUBU16_MMR6_ENC,
+                  ISA_MICROMIPS32R6;
+def XOR16_MMR6 : StdMMR6Rel, XOR16_MMR6_DESC, XOR16_MMR6_ENC,
+                 ISA_MICROMIPS32R6;
+def RECIP_S_MMR6 : StdMMR6Rel, RECIP_S_MMR6_ENC, RECIP_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def RECIP_D_MMR6 : StdMMR6Rel, RECIP_D_MMR6_ENC, RECIP_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC,
+                  ISA_MICROMIPS32R6;
+def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def ROUND_L_S_MMR6 : StdMMR6Rel, ROUND_L_S_MMR6_ENC, ROUND_L_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def ROUND_L_D_MMR6 : StdMMR6Rel, ROUND_L_D_MMR6_ENC, ROUND_L_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def ROUND_W_S_MMR6 : StdMMR6Rel, ROUND_W_S_MMR6_ENC, ROUND_W_S_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def ROUND_W_D_MMR6 : StdMMR6Rel, ROUND_W_D_MMR6_ENC, ROUND_W_D_MMR6_DESC,
+                     ISA_MICROMIPS32R6;
+def SEL_S_MMR6 : StdMMR6Rel, SEL_S_MMR6_ENC, SEL_S_MMR6_DESC, ISA_MICROMIPS32R6;
+def SEL_D_MMR6 : StdMMR6Rel, SEL_D_MMR6_ENC, SEL_D_MMR6_DESC, ISA_MICROMIPS32R6;
+def SELEQZ_S_MMR6 : StdMMR6Rel, SELEQZ_S_MMR6_ENC, SELEQZ_S_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def SELEQZ_D_MMR6 : StdMMR6Rel, SELEQZ_D_MMR6_ENC, SELEQZ_D_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def SELENZ_S_MMR6 : StdMMR6Rel, SELENZ_S_MMR6_ENC, SELENZ_S_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def SELENZ_D_MMR6 : StdMMR6Rel, SELENZ_D_MMR6_ENC, SELENZ_D_MMR6_DESC,
+                    ISA_MICROMIPS32R6;
+def CLASS_S_MMR6 : StdMMR6Rel, CLASS_S_MMR6_ENC, CLASS_S_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
+def CLASS_D_MMR6 : StdMMR6Rel, CLASS_D_MMR6_ENC, CLASS_D_MMR6_DESC,
+                   ISA_MICROMIPS32R6;
 }
 
 //===----------------------------------------------------------------------===//
@@ -357,4 +1211,23 @@ def XORI_MMR6 : StdMMR6Rel, XORI_MMR6_DESC, XORI_MMR6_ENC, ISA_MICROMIPS32R6;
 //===----------------------------------------------------------------------===//
 
 def : MipsInstAlias<"ei", (EI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"di", (DI_MMR6 ZERO), 1>, ISA_MICROMIPS32R6;
 def : MipsInstAlias<"nop", (SLL_MMR6 ZERO, ZERO, 0), 1>, ISA_MICROMIPS32R6;
+def B_MMR6_Pseudo : MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
+                                      !strconcat("b", "\t$offset")> {
+  string DecoderNamespace = "MicroMipsR6";
+}
+def : MipsInstAlias<"sync", (SYNC_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"sdbbp", (SDBBP_MMR6 0), 1>, ISA_MICROMIPS32R6;
+def : MipsInstAlias<"rdhwr $rt, $rs",
+                    (RDHWR_MMR6 GPR32Opnd:$rt, HWRegsOpnd:$rs, 0), 1>,
+                    ISA_MICROMIPS32R6;
+
+//===----------------------------------------------------------------------===//
+//
+// MicroMips arbitrary patterns that map to one or more instructions
+//
+//===----------------------------------------------------------------------===//
+
+def : MipsPat<(store GPRMM16:$src, addrimm4lsl2:$addr),
+              (SW16_MMR6 GPRMM16:$src, addrimm4lsl2:$addr)>, ISA_MICROMIPS32R6;
diff --git a/lib/Target/Mips/MicroMips64r6InstrFormats.td b/lib/Target/Mips/MicroMips64r6InstrFormats.td
new file mode 100644
index 000000000000..da305a2d508a
--- /dev/null
+++ b/lib/Target/Mips/MicroMips64r6InstrFormats.td
@@ -0,0 +1,86 @@
+//=-   MicroMips64r6InstrFormats.td - Instruction Formats  -*- tablegen -*  -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes microMIPS64r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_FM_MMR6 {
+  bits<5> rt;
+  bits<5> rs;
+  bits<16> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b111100;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = imm;
+}
+
+class POOL32I_ADD_IMM_FM_MMR6<bits<5> funct> {
+  bits<5> rs;
+  bits<16> imm;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010000;
+  let Inst{25-21} = funct;
+  let Inst{20-16} = rs;
+  let Inst{15-0} = imm;
+}
+
+class POOL32S_EXTBITS_FM_MMR6<bits<6> funct> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> size;
+  bits<5> pos;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010110;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = size;
+  let Inst{10-6}  = pos;
+  let Inst{5-0}   = funct;
+}
+
+class POOL32S_DALIGN_FM_MMR6 {
+  bits<5> rs;
+  bits<5> rt;
+  bits<5> rd;
+  bits<3> bp;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010110;
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-8}  = bp;
+  let Inst{7-6}   = 0b00;
+  let Inst{5-0}   = 0b011100;
+}
+
+class POOL32A_DIVMOD_FM_MMR6<string instr_asm, bits<9> funct>
+    : MMR6Arch<instr_asm> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010110;
+  let Inst{25-21} = rd;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rt;
+  let Inst{10-9}  = 0b00;
+  let Inst{8-0}  = funct;
+}
diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td
new file mode 100644
index 000000000000..ec1aef86a942
--- /dev/null
+++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -0,0 +1,119 @@
+//=-  MicroMips64r6InstrInfo.td - Instruction Information  -*- tablegen -*- -=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips64r6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_ENC : DAUI_FM_MMR6;
+class DAHI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10001>;
+class DATI_MMR6_ENC : POOL32I_ADD_IMM_FM_MMR6<0b10000>;
+class DEXT_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b101100>;
+class DEXTM_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b100100>;
+class DEXTU_MMR6_ENC : POOL32S_EXTBITS_FM_MMR6<0b010100>;
+class DALIGN_MMR6_ENC : POOL32S_DALIGN_FM_MMR6;
+class DDIV_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddiv", 0b100011000>;
+class DMOD_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmod", 0b101011000>;
+class DDIVU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"ddivu", 0b110011000>;
+class DMODU_MM64R6_ENC : POOL32A_DIVMOD_FM_MMR6<"dmodu", 0b111011000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class DAUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
+  list<dag> Pattern = [];
+}
+class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd>;
+
+class DAHI_DATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs GPROpnd:$rs);
+  dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+  string Constraints = "$rs = $rt";
+}
+class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd>;
+class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd>;
+
+class EXTBITS_DESC_BASE<string instr_asm, RegisterOperand RO, Operand PosOpnd,
+                        Operand SizeOpnd, SDPatternOperator Op = null_frag>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs RO:$rt);
+  dag InOperandList = (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $pos, $size");
+  list<dag> Pattern = [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))];
+  InstrItinClass Itinerary = II_EXT;
+  Format Form = FrmR;
+  string BaseOpcode = instr_asm;
+}
+// TODO: Add 'pos + size' constraint check to dext* instructions
+//       DEXT: 0 < pos + size <= 63
+//       DEXTM, DEXTU: 32 < pos + size <= 64
+class DEXT_MMR6_DESC : EXTBITS_DESC_BASE<"dext", GPR64Opnd, uimm5,
+                                         uimm5_plus1, MipsExt>;
+class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5,
+                                          uimm5_plus33, MipsExt>;
+class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32,
+                                          uimm5_plus1, MipsExt>;
+
+class DALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                      Operand ImmOpnd> : MMR6Arch<instr_asm>, MipsR6Inst {
+  dag OutOperandList = (outs GPROpnd:$rd);
+  dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
+  list<dag> Pattern = [];
+}
+
+class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>;
+
+class DDIV_MM64R6_DESC : ArithLogicR<"ddiv", GPR32Opnd>;
+class DMOD_MM64R6_DESC : ArithLogicR<"dmod", GPR32Opnd>;
+class DDIVU_MM64R6_DESC : ArithLogicR<"ddivu", GPR32Opnd>;
+class DMODU_MM64R6_DESC : ArithLogicR<"dmodu", GPR32Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "MicroMipsR6" in {
+  def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6;
+  def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6;
+  def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6;
+  def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC,
+                    ISA_MICROMIPS64R6;
+  def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC,
+                     ISA_MICROMIPS64R6;
+  def DEXTU_MM64R6 : StdMMR6Rel, DEXTU_MMR6_DESC, DEXTU_MMR6_ENC,
+                     ISA_MICROMIPS64R6;
+  def DALIGN_MM64R6 : StdMMR6Rel, DALIGN_MMR6_DESC, DALIGN_MMR6_ENC,
+                      ISA_MICROMIPS64R6;
+  def DDIV_MM64R6 : R6MMR6Rel, DDIV_MM64R6_DESC, DDIV_MM64R6_ENC,
+                    ISA_MICROMIPS64R6;
+  def DMOD_MM64R6 : R6MMR6Rel, DMOD_MM64R6_DESC, DMOD_MM64R6_ENC,
+                    ISA_MICROMIPS64R6;
+  def DDIVU_MM64R6 : R6MMR6Rel, DDIVU_MM64R6_DESC, DDIVU_MM64R6_ENC,
+                     ISA_MICROMIPS64R6;
+  def DMODU_MM64R6 : R6MMR6Rel, DMODU_MM64R6_DESC, DMODU_MM64R6_ENC,
+                     ISA_MICROMIPS64R6;
+}
diff --git a/lib/Target/Mips/MicroMipsDSPInstrFormats.td b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
new file mode 100644
index 000000000000..f11c09abfc36
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsDSPInstrFormats.td
@@ -0,0 +1,244 @@
+//===-- MicroMipsDSPInstrFormats.td - Instruction Formats --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class MMDSPInst<string opstr = "">
+    : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+  let InsnPredicates = [HasDSP];
+  let AdditionalPredicates = [InMicroMips];
+  string BaseOpcode = opstr;
+  string Arch = "mmdsp";
+  let DecoderNamespace = "MicroMips";
+}
+
+class MMDSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+    : InstAlias<Asm, Result, Emit>, PredicateControl {
+  let InsnPredicates = [HasDSP];
+  let AdditionalPredicates = [InMicroMips];
+}
+
+class POOL32A_3R_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10-0}  = op;
+}
+
+class POOL32A_2R_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_2RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-14} = ac;
+  let Inst{13-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_3RB0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+  bits<5> rd;
+  bits<5> rs;
+  bits<5> rt;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0b0;
+  let Inst{9-0}   = op;
+}
+
+class POOL32A_2RSA4_FMT<string opstr, bits<12> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<4> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-12} = sa;
+  let Inst{11-0}  = op;
+}
+
+class POOL32A_2RSA3_FMT<string opstr, bits<7> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<3> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-13} = sa;
+  let Inst{12-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_2RSA5B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = sa;
+  let Inst{10}    = 0b0;
+  let Inst{9-0}   = op;
+}
+
+class POOL32A_2RSA4B0_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<4> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-12} = sa;
+  let Inst{11}    = 0b0;
+  let Inst{10-0}  = op;
+}
+
+class POOL32A_2RSA4OP6_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<4> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-12} = sa;
+  let Inst{11-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_1RIMM5AC_FMT<string opstr, bits<8> funct> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> imm;
+  bits<2> ac;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = imm;
+  let Inst{15-14} = ac;
+  let Inst{13-6}  = funct;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_2RSA5_FMT<string opstr, bits<11> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> sa;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = sa;
+  let Inst{10-0}  = op;
+}
+
+class POOL32A_1RMEMB0_FMT<string opstr, bits<10> funct> : MMDSPInst<opstr> {
+  bits<5> index;
+  bits<5> base;
+  bits<5> rd;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = index;
+  let Inst{20-16} = base;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0b0;
+  let Inst{9-0}   = funct;
+}
+
+class POOL32A_1RAC_FMT<string instr_asm, bits<8> funct> : MMDSPInst<instr_asm> {
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = 0;
+  let Inst{20-16} = rs;
+  let Inst{15-14} = ac;
+  let Inst{13-6}  = funct;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_1RMASK7_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<7> mask;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-14} = mask;
+  let Inst{13-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_1RIMM10_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+  bits<5> rd;
+  bits<10> imm;
+
+  let Inst{31-26} = 0;
+  let Inst{25-16} = imm;
+  let Inst{15-11} = rd;
+  let Inst{10}    = 0;
+  let Inst{9-0}   = op;
+}
+
+class POOL32A_1RIMM8_FMT<string opstr, bits<6> op> : MMDSPInst<opstr> {
+  bits<5> rt;
+  bits<8> imm;
+
+  let Inst{31-26} = 0;
+  let Inst{25-21} = rt;
+  let Inst{20-13} = imm;
+  let Inst{12}    = 0;
+  let Inst{11-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32A_4B0SHIFT6AC4B0_FMT<string opstr, bits<10> op> : MMDSPInst<opstr> {
+  bits<6> shift;
+  bits<2> ac;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-22} = 0b0000;
+  let Inst{21-16} = shift;
+  let Inst{15-14} = ac;
+  let Inst{13-10} = 0b0000;
+  let Inst{9-0}   = op;
+}
+
+class POOL32A_5B01RAC_FMT<string opstr, bits<8> op> : MMDSPInst<opstr> {
+  bits<5> rs;
+  bits<2> ac;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = 0b00000;
+  let Inst{20-16} = rs;
+  let Inst{15-14} = ac;
+  let Inst{13-6}  = op;
+  let Inst{5-0}   = 0b111100;
+}
diff --git a/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
new file mode 100644
index 000000000000..b342e2371df4
--- /dev/null
+++ b/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -0,0 +1,528 @@
+//===- MicroMipsDSPInstrInfo.td - Micromips DSP instructions -*- tablegen *-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes MicroMips DSP instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// Instruction encoding.
+class ADDQ_PH_MM_ENC : POOL32A_3R_FMT<"addq.ph", 0b00000001101>;
+class ADDQ_S_PH_MM_ENC : POOL32A_3R_FMT<"addq_s.ph", 0b10000001101>;
+class ADDQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"addq_s.w", 0b1100000101>;
+class ADDQH_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh.ph", 0b00001001101>;
+class ADDQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.ph", 0b10001001101>;
+class ADDQH_W_MMR2_ENC: POOL32A_3R_FMT<"addqh.w", 0b00010001101>;
+class ADDQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"addqh_r.w", 0b10010001101>;
+class ADDU_PH_MMR2_ENC : POOL32A_3R_FMT<"addu.ph", 0b00100001101>;
+class ADDU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"addu_s.ph", 0b10100001101>;
+class ADDU_QB_MM_ENC : POOL32A_3R_FMT<"addu.qb", 0b00011001101>;
+class ADDU_S_QB_MM_ENC : POOL32A_3R_FMT<"addu_s.qb", 0b10011001101>;
+class ADDUH_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh.qb", 0b00101001101>;
+class ADDUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"adduh_r.qb", 0b10101001101>;
+class ADDSC_MM_ENC : POOL32A_3RB0_FMT<"addsc", 0b1110000101>;
+class ADDWC_MM_ENC : POOL32A_3RB0_FMT<"addwc", 0b1111000101>;
+class DPA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpa.w.ph", 0b00000010>;
+class DPAQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpaq_s.w.ph", 0b00001010>;
+class DPAQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpaq_sa.l.w", 0b01001010>;
+class DPAQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_s.w.ph", 0b10001010>;
+class DPAQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpaqx_sa.w.ph", 0b11001010>;
+class DPAU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbl", 0b10000010>;
+class DPAU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpau.h.qbr", 0b11000010>;
+class DPAX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpax.w.ph", 0b01000010>;
+class ABSQ_S_PH_MM_ENC : POOL32A_2R_FMT<"absq_s.ph", 0b0001000100>;
+class ABSQ_S_W_MM_ENC : POOL32A_2R_FMT<"absq_s.w", 0b0010000100>;
+class ABSQ_S_QB_MMR2_ENC : POOL32A_2R_FMT<"absq_s.qb", 0b0000000100>;
+class INSV_MM_ENC : POOL32A_2R_FMT<"insv", 0b0100000100>;
+class MADD_DSP_MM_ENC : POOL32A_2RAC_FMT<"madd", 0b00101010>;
+class MADDU_DSP_MM_ENC : POOL32A_2RAC_FMT<"maddu", 0b01101010>;
+class MSUB_DSP_MM_ENC : POOL32A_2RAC_FMT<"msub", 0b10101010>;
+class MSUBU_DSP_MM_ENC : POOL32A_2RAC_FMT<"msubu", 0b11101010>;
+class MULT_DSP_MM_ENC : POOL32A_2RAC_FMT<"mult", 0b00110010>;
+class MULTU_DSP_MM_ENC : POOL32A_2RAC_FMT<"multu", 0b01110010>;
+class SHLL_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll.ph", 0b001110110101>;
+class SHLL_S_PH_MM_ENC : POOL32A_2RSA4_FMT<"shll_s.ph", 0b101110110101>;
+class SHLL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shll.qb", 0b0100001>;
+class SHLLV_PH_MM_ENC : POOL32A_3R_FMT<"shllv.ph", 0b00000001110>;
+class SHLLV_S_PH_MM_ENC : POOL32A_3R_FMT<"shllv_s.ph", 0b10000001110>;
+class SHLLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shllv.qb", 0b1110010101>;
+class SHLLV_S_W_MM_ENC : POOL32A_3RB0_FMT<"shllv_s.w", 0b1111010101>;
+class SHLL_S_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shll_s.w", 0b1111110101>;
+class SHRA_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra.qb", 0b0000111>;
+class SHRA_R_QB_MMR2_ENC : POOL32A_2RSA3_FMT<"shra_r.qb", 0b1000111>;
+class SHRA_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra.ph", 0b01100110101>;
+class SHRA_R_PH_MM_ENC : POOL32A_2RSA4B0_FMT<"shra_r.ph", 0b11100110101>;
+class SHRAV_PH_MM_ENC : POOL32A_3R_FMT<"shrav.ph", 0b00110001101>;
+class SHRAV_R_PH_MM_ENC : POOL32A_3R_FMT<"shrav_r.ph", 0b10110001101>;
+class SHRAV_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav.qb", 0b00111001101>;
+class SHRAV_R_QB_MMR2_ENC : POOL32A_3R_FMT<"shrav_r.qb", 0b10111001101>;
+class SHRAV_R_W_MM_ENC : POOL32A_3RB0_FMT<"shrav_r.w", 0b1011010101>;
+class SHRA_R_W_MM_ENC : POOL32A_2RSA5B0_FMT<"shra_r.w", 0b1011110101>;
+class SHRL_PH_MMR2_ENC : POOL32A_2RSA4OP6_FMT<"shrl.ph", 0b001111>;
+class SHRL_QB_MM_ENC : POOL32A_2RSA3_FMT<"shrl.qb", 0b1100001>;
+class SHRLV_PH_MMR2_ENC : POOL32A_3RB0_FMT<"shrlv.ph", 0b1100010101>;
+class SHRLV_QB_MM_ENC : POOL32A_3RB0_FMT<"shrlv.qb", 0b1101010101>;
+class PRECEQ_W_PHL_MM_ENC : POOL32A_2R_FMT<"preceq.w.phl", 0b0101000100>;
+class PRECEQ_W_PHR_MM_ENC : POOL32A_2R_FMT<"preceq.w.phr", 0b0110000100>;
+class PRECEQU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbl", 0b0111000100>;
+class PRECEQU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbla", 0b0111001100>;
+class PRECEQU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbr", 0b1001000100>;
+class PRECEQU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"precequ.ph.qbra", 0b1001001100>;
+class PRECEU_PH_QBL_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbl", 0b1011000100>;
+class PRECEU_PH_QBLA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbla", 0b1011001100>;
+class PRECEU_PH_QBR_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbr", 0b1101000100>;
+class PRECEU_PH_QBRA_MM_ENC : POOL32A_2R_FMT<"preceu.ph.qbra", 0b1101001100>;
+class SUBQ_PH_MM_ENC : POOL32A_3R_FMT<"subq.ph", 0b01000001101>;
+class SUBQ_S_PH_MM_ENC : POOL32A_3R_FMT<"subq_s.ph", 0b11000001101>;
+class SUBQ_S_W_MM_ENC : POOL32A_3RB0_FMT<"subq_s.w", 0b1101000101>;
+class SUBQH_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh.ph", 0b01001001101>;
+class SUBQH_R_PH_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.ph", 0b11001001101>;
+class SUBQH_W_MMR2_ENC : POOL32A_3R_FMT<"subqh.w", 0b01010001101>;
+class SUBQH_R_W_MMR2_ENC : POOL32A_3R_FMT<"subqh_r.w", 0b11010001101>;
+class SUBU_PH_MMR2_ENC : POOL32A_3R_FMT<"subu.ph", 0b01100001101>;
+class SUBU_S_PH_MMR2_ENC : POOL32A_3R_FMT<"subu_s.ph", 0b11100001101>;
+class SUBU_QB_MM_ENC : POOL32A_3R_FMT<"subu.qb", 0b01011001101>;
+class SUBU_S_QB_MM_ENC : POOL32A_3R_FMT<"subu_s.qb", 0b11011001101>;
+class SUBUH_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh.qb", 0b01101001101>;
+class SUBUH_R_QB_MMR2_ENC : POOL32A_3R_FMT<"subuh_r.qb", 0b11101001101>;
+class EXTP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extp", 0b10011001>;
+class EXTPDP_MM_ENC : POOL32A_1RIMM5AC_FMT<"extpdp", 0b11011001>;
+class EXTPDPV_MM_ENC : POOL32A_2RAC_FMT<"extpdpv", 0b11100010>;
+class EXTPV_MM_ENC : POOL32A_2RAC_FMT<"extpv", 0b10100010>;
+class EXTR_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr.w", 0b00111001>;
+class EXTR_R_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_r.w", 0b01111001>;
+class EXTR_RS_W_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_rs.w", 0b10111001>;
+class EXTR_S_H_MM_ENC : POOL32A_1RIMM5AC_FMT<"extr_s.h", 0b11111001>;
+class EXTRV_W_MM_ENC : POOL32A_2RAC_FMT<"extrv.w", 0b00111010>;
+class EXTRV_R_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_r.w", 0b01111010>;
+class EXTRV_RS_W_MM_ENC : POOL32A_2RAC_FMT<"extrv_rs.w", 0b10111010>;
+class EXTRV_S_H_MM_ENC : POOL32A_2RAC_FMT<"extrv_s.h", 0b11111010>;
+class DPS_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dps.w.ph", 0b00010010>;
+class DPSQ_S_W_PH_MM_ENC : POOL32A_2RAC_FMT<"dpsq_s.w.ph", 0b00011010>;
+class DPSQ_SA_L_W_MM_ENC : POOL32A_2RAC_FMT<"dpsq_sa.l.w", 0b01011010>;
+class DPSQX_S_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_s.w.ph", 0b10011010>;
+class DPSQX_SA_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsqx_sa.w.ph", 0b11011010>;
+class DPSU_H_QBL_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbl", 0b10010010>;
+class DPSU_H_QBR_MM_ENC : POOL32A_2RAC_FMT<"dpsu.h.qbr", 0b11010010>;
+class DPSX_W_PH_MMR2_ENC : POOL32A_2RAC_FMT<"dpsx.w.ph", 0b01010010>;
+class MUL_PH_MMR2_ENC : POOL32A_3R_FMT<"mul.ph", 0b00000101101>;
+class MUL_S_PH_MMR2_ENC : POOL32A_3R_FMT<"mul_s.ph", 0b10000101101>;
+class MULEQ_S_W_PHL_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phl", 0b0000100101>;
+class MULEQ_S_W_PHR_MM_ENC : POOL32A_3RB0_FMT<"muleq_s.w.phr", 0b0001100101>;
+class MULEU_S_PH_QBL_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbl", 0b0010010101>;
+class MULEU_S_PH_QBR_MM_ENC : POOL32A_3RB0_FMT<"muleu_s.ph.qbr", 0b0011010101>;
+class MULQ_RS_PH_MM_ENC : POOL32A_3RB0_FMT<"mulq_rs.ph", 0b0100010101>;
+class MULQ_RS_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_rs.w", 0b0110010101>;
+class MULQ_S_PH_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.ph", 0b0101010101>;
+class MULQ_S_W_MMR2_ENC : POOL32A_3RB0_FMT<"mulq_s.w", 0b0111010101>;
+class PRECR_QB_PH_MMR2_ENC : POOL32A_3RB0_FMT<"precr.qb.ph", 0b0001101101>;
+class PRECR_SRA_PH_W_MMR2_ENC
+    : POOL32A_2RSA5_FMT<"precr_sra.ph.w", 0b01111001101>;
+class PRECR_SRA_R_PH_W_MMR2_ENC
+    : POOL32A_2RSA5_FMT<"precr_sra_r.ph.w", 0b11111001101>;
+class PRECRQ_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq.ph.w", 0b0011101101>;
+class PRECRQ_QB_PH_MM_ENC : POOL32A_3RB0_FMT<"precrq.qb.ph", 0b0010101101>;
+class PRECRQU_S_QB_PH_MM_ENC
+    : POOL32A_3RB0_FMT<"precrqu_s.qb.ph", 0b0101101101>;
+class PRECRQ_RS_PH_W_MM_ENC : POOL32A_3RB0_FMT<"precrq_rs.ph.w", 0b0100101101>;
+class LBUX_MM_ENC : POOL32A_1RMEMB0_FMT<"lbux", 0b1000100101>;
+class LHX_MM_ENC : POOL32A_1RMEMB0_FMT<"lhx", 0b0101100101>;
+class LWX_MM_ENC : POOL32A_1RMEMB0_FMT<"lwx", 0b0110100101>;
+class MAQ_S_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phl", 0b01101001>;
+class MAQ_SA_W_PHL_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phl", 0b11101001>;
+class MAQ_S_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_s.w.phr", 0b00101001>;
+class MAQ_SA_W_PHR_MM_ENC : POOL32A_2RAC_FMT<"maq_sa.w.phr", 0b10101001>;
+class MFHI_MM_ENC : POOL32A_1RAC_FMT<"mfhi", 0b00000001>;
+class MFLO_MM_ENC : POOL32A_1RAC_FMT<"mflo", 0b01000001>;
+class MTHI_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b10000001>;
+class MTLO_MM_ENC : POOL32A_1RAC_FMT<"mthi", 0b11000001>;
+class PREPEND_MMR2_ENC : POOL32A_2RSA5B0_FMT<"prepend", 0b1001010101>;
+class RADDU_W_QB_MM_ENC : POOL32A_2R_FMT<"raddu.w.qb", 0b1111000100>;
+class RDDSP_MM_ENC : POOL32A_1RMASK7_FMT<"rddsp", 0b00011001>;
+class REPL_PH_MM_ENC : POOL32A_1RIMM10_FMT<"repl.ph", 0b0000111101>;
+class REPL_QB_MM_ENC : POOL32A_1RIMM8_FMT<"repl.qb", 0b010111>;
+class REPLV_PH_MM_ENC : POOL32A_2R_FMT<"replv.ph", 0b0000001100>;
+class REPLV_QB_MM_ENC : POOL32A_2R_FMT<"replv.qb", 0b0001001100>;
+class MTHLIP_MM_ENC : POOL32A_1RAC_FMT<"mthlip", 0b00001001>;
+class PACKRL_PH_MM_ENC : POOL32A_3RB0_FMT<"packrl.ph", 0b0110101101>;
+class PICK_PH_MM_ENC : POOL32A_3RB0_FMT<"pick.ph", 0b1000101101>;
+class PICK_QB_MM_ENC : POOL32A_3RB0_FMT<"pick.qb", 0b0111101101>;
+class SHILO_MM_ENC : POOL32A_4B0SHIFT6AC4B0_FMT<"shilo", 0b0000011101>;
+class SHILOV_MM_ENC : POOL32A_5B01RAC_FMT<"shilov", 0b01001001>;
+class WRDSP_MM_ENC : POOL32A_1RMASK7_FMT<"wrdsp", 0b01011001>;
+
+// Instruction desc.
+class ABSQ_S_PH_MM_R2_DESC_BASE<string opstr, SDPatternOperator OpNode,
+                                InstrItinClass itin, RegisterOperand ROD,
+                                RegisterOperand ROS = ROD> {
+  dag OutOperandList = (outs ROD:$rt);
+  dag InOperandList = (ins ROS:$rs);
+  string AsmString = !strconcat(opstr, "\t$rt, $rs");
+  list<dag> Pattern = [(set ROD:$rt, (OpNode ROS:$rs))];
+  InstrItinClass Itinerary = itin;
+}
+class ABSQ_S_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "absq_s.ph", int_mips_absq_s_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_W_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "absq_s.w", int_mips_absq_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag20]>;
+class ABSQ_S_QB_MMR2_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "absq_s.qb", int_mips_absq_s_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag20]>;
+class PRECEQ_W_PHL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceq.w.phl", int_mips_preceq_w_phl, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQ_W_PHR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceq.w.phr", int_mips_preceq_w_phr, NoItinerary, GPR32Opnd, DSPROpnd>;
+class PRECEQU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "precequ.ph.qbl", int_mips_precequ_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "precequ.ph.qbla", int_mips_precequ_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "precequ.ph.qbr", int_mips_precequ_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEQU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "precequ.ph.qbra", int_mips_precequ_ph_qbra, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBL_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceu.ph.qbl", int_mips_preceu_ph_qbl, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBLA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceu.ph.qbla", int_mips_preceu_ph_qbla, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBR_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceu.ph.qbr", int_mips_preceu_ph_qbr, NoItinerary, DSPROpnd>;
+class PRECEU_PH_QBRA_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<
+  "preceu.ph.qbra", int_mips_preceu_ph_qbra, NoItinerary, DSPROpnd>;
+
+class SHLL_R2_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                           SDPatternOperator ImmPat, InstrItinClass itin,
+                           RegisterOperand RO, Operand ImmOpnd> {
+  dag OutOperandList = (outs RO:$rt);
+  dag InOperandList = (ins RO:$rs, ImmOpnd:$sa);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
+  list<dag> Pattern = [(set RO:$rt, (OpNode RO:$rs, ImmPat:$sa))];
+  InstrItinClass Itinerary = itin;
+  bit hasSideEffects = 1;
+}
+class SHLL_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shll.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+  Defs<[DSPOutFlag22]>;
+class SHLL_S_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shll_s.ph", int_mips_shll_s_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>,
+  Defs<[DSPOutFlag22]>;
+class SHLL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shll.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>,
+  Defs<[DSPOutFlag22]>;
+class SHLL_S_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shll_s.w", int_mips_shll_s_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>,
+  Defs<[DSPOutFlag22]>;
+class SHRA_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+  "shra.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_R_QB_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+  "shra_r.qb", int_mips_shra_r_qb, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRA_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shra.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_PH_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shra_r.ph", int_mips_shra_r_ph, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+class SHRA_R_W_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shra_r.w", int_mips_shra_r_w, immZExt5, NoItinerary, GPR32Opnd, uimm5>;
+class SHRL_QB_MM_DESC : SHLL_R2_MM_DESC_BASE<
+  "shrl.qb", null_frag, immZExt3, NoItinerary, DSPROpnd, uimm3>;
+class SHRL_PH_MMR2_DESC : SHLL_R2_MM_DESC_BASE<
+  "shrl.ph", null_frag, immZExt4, NoItinerary, DSPROpnd, uimm4>;
+
+class SHLLV_R3_MM_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                            InstrItinClass itin, RegisterOperand RO> {
+  dag OutOperandList = (outs RO:$rd);
+  dag InOperandList =  (ins RO:$rt, GPR32Opnd:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs");
+  list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))];
+  InstrItinClass Itinerary = itin;
+}
+class SHLLV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shllv.ph", int_mips_shll_ph, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPROpnd>,
+  Defs<[DSPOutFlag22]>;
+class SHLLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shllv.qb", int_mips_shll_qb, NoItinerary, DSPROpnd>, Defs<[DSPOutFlag22]>;
+class SHLLV_S_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shllv_s.w", int_mips_shll_s_w, NoItinerary, GPR32Opnd>, Defs<[DSPOutFlag22]>;
+class SHRAV_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrav.ph", int_mips_shra_ph, NoItinerary, DSPROpnd>;
+class SHRAV_R_PH_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrav_r.ph", int_mips_shra_r_ph, NoItinerary, DSPROpnd>;
+class SHRAV_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrav.qb", int_mips_shra_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_QB_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPROpnd>;
+class SHRAV_R_W_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrav_r.w", int_mips_shra_r_w, NoItinerary, GPR32Opnd>;
+class SHRLV_PH_MMR2_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrlv.ph", int_mips_shrl_ph, NoItinerary, DSPROpnd>;
+class SHRLV_QB_MM_DESC : SHLLV_R3_MM_DESC_BASE<
+  "shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPROpnd>;
+
+class EXT_MM_2R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                          InstrItinClass itin> {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$rs);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $rs");
+  InstrItinClass Itinerary = itin;
+}
+class EXT_MM_1R_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                          InstrItinClass itin> {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm5:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $imm");
+  InstrItinClass Itinerary = itin;
+}
+
+class EXTP_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extp", MipsEXTP, NoItinerary>,
+      Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTPDP_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extpdp", MipsEXTPDP, NoItinerary>,
+      Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPDPV_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extpdpv", MipsEXTPDP, NoItinerary>,
+      Uses<[DSPPos]>, Defs<[DSPPos, DSPEFI]>;
+class EXTPV_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extpv", MipsEXTP, NoItinerary>,
+      Uses<[DSPPos]>, Defs<[DSPEFI]>;
+class EXTR_W_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extr.w", MipsEXTR_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTR_R_W_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extr_r.w", MipsEXTR_R_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTR_RS_W_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extr_rs.w", MipsEXTR_RS_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTR_S_H_MM_DESC
+    : EXT_MM_1R_DESC_BASE<"extr_s.h", MipsEXTR_S_H, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTRV_W_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extrv.w", MipsEXTR_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTRV_R_W_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extrv_r.w", MipsEXTR_R_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTRV_RS_W_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extrv_rs.w", MipsEXTR_RS_W, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+class EXTRV_S_H_MM_DESC
+    : EXT_MM_2R_DESC_BASE<"extrv_s.h", MipsEXTR_S_H, NoItinerary>,
+      Defs<[DSPOutFlag23]>;
+
+class MFHI_MM_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
+                        InstrItinClass itin> {
+  dag OutOperandList = (outs GPR32Opnd:$rs);
+  dag InOperandList = (ins RO:$ac);
+  string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
+  list<dag> Pattern = [(set GPR32Opnd:$rs, (OpNode RO:$ac))];
+  InstrItinClass Itinerary = itin;
+}
+
+class MFHI_MM_DESC : MFHI_MM_DESC_BASE<"mfhi", ACC64DSPOpnd, MipsMFHI,
+                                       NoItinerary>;
+class MFLO_MM_DESC : MFHI_MM_DESC_BASE<"mflo", ACC64DSPOpnd, MipsMFLO,
+                                       NoItinerary>;
+
+class RADDU_W_QB_MM_DESC {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins DSPROpnd:$rs);
+  string AsmString = !strconcat("raddu.w.qb", "\t$rt, $rs");
+  list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_raddu_w_qb DSPROpnd:$rs))];
+  InstrItinClass Itinerary = NoItinerary;
+  string BaseOpcode = "raddu.w.qb";
+}
+
+class RDDSP_MM_DESC {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins uimm16:$mask);
+  string AsmString = !strconcat("rddsp", "\t$rt, $mask");
+  list<dag> Pattern = [(set GPR32Opnd:$rt, (int_mips_rddsp immZExt10:$mask))];
+  InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPL_QB_MM_DESC {
+  dag OutOperandList = (outs DSPROpnd:$rt);
+  dag InOperandList = (ins uimm16:$imm);
+  string AsmString = !strconcat("repl.qb", "\t$rt, $imm");
+  list<dag> Pattern = [(set DSPROpnd:$rt, (int_mips_repl_qb immZExt8:$imm))];
+  InstrItinClass Itinerary = NoItinerary;
+}
+
+class REPLV_PH_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.ph", int_mips_repl_ph,
+                                                   NoItinerary, DSPROpnd,
+                                                   GPR32Opnd>;
+class REPLV_QB_MM_DESC : ABSQ_S_PH_MM_R2_DESC_BASE<"replv.qb", int_mips_repl_qb,
+                                                   NoItinerary, DSPROpnd,
+                                                   GPR32Opnd>;
+
+class WRDSP_MM_DESC {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins GPR32Opnd:$rt, uimm7:$mask);
+  string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
+  list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, immZExt7:$mask)];
+  InstrItinClass Itinerary = NoItinerary;
+}
+
+// Instruction defs.
+// microMIPS DSP Rev 1
+def ADDQ_PH_MM : DspMMRel, ADDQ_PH_MM_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH_MM : DspMMRel, ADDQ_S_PH_MM_ENC, ADDQ_S_PH_DESC;
+def ADDQ_S_W_MM : DspMMRel, ADDQ_S_W_MM_ENC, ADDQ_S_W_DESC;
+def ADDU_QB_MM : DspMMRel, ADDU_QB_MM_ENC, ADDU_QB_DESC;
+def ADDU_S_QB_MM : DspMMRel, ADDU_S_QB_MM_ENC, ADDU_S_QB_DESC;
+def ADDSC_MM : DspMMRel, ADDSC_MM_ENC, ADDSC_DESC;
+def ADDWC_MM : DspMMRel, ADDWC_MM_ENC, ADDWC_DESC;
+def DPAQ_S_W_PH_MM : DspMMRel, DPAQ_S_W_PH_MM_ENC, DPAQ_S_W_PH_DESC;
+def DPAQ_SA_L_W_MM : DspMMRel, DPAQ_SA_L_W_MM_ENC, DPAQ_SA_L_W_DESC;
+def DPAU_H_QBL_MM : DspMMRel, DPAU_H_QBL_MM_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR_MM : DspMMRel, DPAU_H_QBR_MM_ENC, DPAU_H_QBR_DESC;
+def ABSQ_S_PH_MM : DspMMRel, ABSQ_S_PH_MM_ENC, ABSQ_S_PH_MM_DESC;
+def ABSQ_S_W_MM : DspMMRel, ABSQ_S_W_MM_ENC, ABSQ_S_W_MM_DESC;
+def INSV_MM : DspMMRel, INSV_MM_ENC, INSV_DESC;
+def MADD_DSP_MM : DspMMRel, MADD_DSP_MM_ENC, MADD_DSP_DESC;
+def MADDU_DSP_MM : DspMMRel, MADDU_DSP_MM_ENC, MADDU_DSP_DESC;
+def MSUB_DSP_MM : DspMMRel, MSUB_DSP_MM_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP_MM : DspMMRel, MSUBU_DSP_MM_ENC, MSUBU_DSP_DESC;
+def MULT_DSP_MM : DspMMRel, MULT_DSP_MM_ENC, MULT_DSP_DESC;
+def MULTU_DSP_MM : DspMMRel, MULTU_DSP_MM_ENC, MULTU_DSP_DESC;
+def SHLL_PH_MM : DspMMRel, SHLL_PH_MM_ENC, SHLL_PH_MM_DESC;
+def SHLL_S_PH_MM : DspMMRel, SHLL_S_PH_MM_ENC, SHLL_S_PH_MM_DESC;
+def SHLL_QB_MM : DspMMRel, SHLL_QB_MM_ENC, SHLL_QB_MM_DESC;
+def SHLLV_PH_MM : DspMMRel, SHLLV_PH_MM_ENC, SHLLV_PH_MM_DESC;
+def SHLLV_S_PH_MM : DspMMRel, SHLLV_S_PH_MM_ENC, SHLLV_S_PH_MM_DESC;
+def SHLLV_QB_MM : DspMMRel, SHLLV_QB_MM_ENC, SHLLV_QB_MM_DESC;
+def SHLLV_S_W_MM : DspMMRel, SHLLV_S_W_MM_ENC, SHLLV_S_W_MM_DESC;
+def SHLL_S_W_MM : DspMMRel, SHLL_S_W_MM_ENC, SHLL_S_W_MM_DESC;
+def SHRA_PH_MM : DspMMRel, SHRA_PH_MM_ENC, SHRA_PH_MM_DESC;
+def SHRA_R_PH_MM : DspMMRel, SHRA_R_PH_MM_ENC, SHRA_R_PH_MM_DESC;
+def SHRAV_PH_MM : DspMMRel, SHRAV_PH_MM_ENC, SHRAV_PH_MM_DESC;
+def SHRAV_R_PH_MM : DspMMRel, SHRAV_R_PH_MM_ENC, SHRAV_R_PH_MM_DESC;
+def SHRAV_R_W_MM : DspMMRel, SHRAV_R_W_MM_ENC, SHRAV_R_W_MM_DESC;
+def SHRA_R_W_MM : DspMMRel, SHRA_R_W_MM_ENC, SHRA_R_W_MM_DESC;
+def SHRL_QB_MM : DspMMRel, SHRL_QB_MM_ENC, SHRL_QB_MM_DESC;
+def SHRLV_QB_MM : DspMMRel, SHRLV_QB_MM_ENC, SHRLV_QB_MM_DESC;
+def PRECEQ_W_PHL_MM : DspMMRel, PRECEQ_W_PHL_MM_ENC, PRECEQ_W_PHL_MM_DESC;
+def PRECEQ_W_PHR_MM : DspMMRel, PRECEQ_W_PHR_MM_ENC, PRECEQ_W_PHR_MM_DESC;
+def PRECEQU_PH_QBL_MM : DspMMRel, PRECEQU_PH_QBL_MM_ENC, PRECEQU_PH_QBL_MM_DESC;
+def PRECEQU_PH_QBLA_MM : DspMMRel, PRECEQU_PH_QBLA_MM_ENC,
+                         PRECEQU_PH_QBLA_MM_DESC;
+def PRECEQU_PH_QBR_MM : DspMMRel, PRECEQU_PH_QBR_MM_ENC, PRECEQU_PH_QBR_MM_DESC;
+def PRECEQU_PH_QBRA_MM : DspMMRel, PRECEQU_PH_QBRA_MM_ENC,
+                         PRECEQU_PH_QBRA_MM_DESC;
+def PRECEU_PH_QBL_MM : DspMMRel, PRECEU_PH_QBL_MM_ENC, PRECEU_PH_QBL_MM_DESC;
+def PRECEU_PH_QBLA_MM : DspMMRel, PRECEU_PH_QBLA_MM_ENC, PRECEU_PH_QBLA_MM_DESC;
+def PRECEU_PH_QBR_MM : DspMMRel, PRECEU_PH_QBR_MM_ENC, PRECEU_PH_QBR_MM_DESC;
+def PRECEU_PH_QBRA_MM : DspMMRel, PRECEU_PH_QBRA_MM_ENC, PRECEU_PH_QBRA_MM_DESC;
+def SUBQ_PH_MM : DspMMRel, SUBQ_PH_MM_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH_MM : DspMMRel, SUBQ_S_PH_MM_ENC, SUBQ_S_PH_DESC;
+def SUBQ_S_W_MM : DspMMRel, SUBQ_S_W_MM_ENC, SUBQ_S_W_DESC;
+def SUBU_QB_MM : DspMMRel, SUBU_QB_MM_ENC, SUBU_QB_DESC;
+def SUBU_S_QB_MM : DspMMRel, SUBU_S_QB_MM_ENC, SUBU_S_QB_DESC;
+def EXTP_MM : DspMMRel, EXTP_MM_ENC, EXTP_MM_DESC;
+def EXTPDP_MM : DspMMRel, EXTPDP_MM_ENC, EXTPDP_MM_DESC;
+def EXTPDPV_MM : DspMMRel, EXTPDPV_MM_ENC, EXTPDPV_MM_DESC;
+def EXTPV_MM : DspMMRel, EXTPV_MM_ENC, EXTPV_MM_DESC;
+def EXTR_W_MM : DspMMRel, EXTR_W_MM_ENC, EXTR_W_MM_DESC;
+def EXTR_R_W_MM : DspMMRel, EXTR_R_W_MM_ENC, EXTR_R_W_MM_DESC;
+def EXTR_RS_W_MM : DspMMRel, EXTR_RS_W_MM_ENC, EXTR_RS_W_MM_DESC;
+def EXTR_S_H_MM : DspMMRel, EXTR_S_H_MM_ENC, EXTR_S_H_MM_DESC;
+def EXTRV_W_MM : DspMMRel, EXTRV_W_MM_ENC, EXTRV_W_MM_DESC;
+def EXTRV_R_W_MM : DspMMRel, EXTRV_R_W_MM_ENC, EXTRV_R_W_MM_DESC;
+def EXTRV_RS_W_MM : DspMMRel, EXTRV_RS_W_MM_ENC, EXTRV_RS_W_MM_DESC;
+def EXTRV_S_H_MM : DspMMRel, EXTRV_S_H_MM_ENC, EXTRV_S_H_MM_DESC;
+def DPSQ_S_W_PH_MM : DspMMRel, DPSQ_S_W_PH_MM_ENC, DPSQ_S_W_PH_DESC;
+def DPSQ_SA_L_W_MM : DspMMRel, DPSQ_SA_L_W_MM_ENC, DPSQ_SA_L_W_DESC;
+def DPSU_H_QBL_MM : DspMMRel, DPSU_H_QBL_MM_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR_MM : DspMMRel, DPSU_H_QBR_MM_ENC, DPSU_H_QBR_DESC;
+def MULEQ_S_W_PHL_MM : DspMMRel, MULEQ_S_W_PHL_MM_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR_MM : DspMMRel, MULEQ_S_W_PHR_MM_ENC, MULEQ_S_W_PHR_DESC;
+def MULEU_S_PH_QBL_MM : DspMMRel, MULEU_S_PH_QBL_MM_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR_MM : DspMMRel, MULEU_S_PH_QBR_MM_ENC, MULEU_S_PH_QBR_DESC;
+def MULQ_RS_PH_MM : DspMMRel, MULQ_RS_PH_MM_ENC, MULQ_RS_PH_DESC;
+def PRECRQ_PH_W_MM : DspMMRel, PRECRQ_PH_W_MM_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_QB_PH_MM : DspMMRel, PRECRQ_QB_PH_MM_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQU_S_QB_PH_MM : DspMMRel, PRECRQU_S_QB_PH_MM_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECRQ_RS_PH_W_MM : DspMMRel, PRECRQ_RS_PH_W_MM_ENC, PRECRQ_RS_PH_W_DESC;
+def LBUX_MM : DspMMRel, LBUX_MM_ENC, LBUX_DESC;
+def LHX_MM : DspMMRel, LHX_MM_ENC, LHX_DESC;
+def LWX_MM : DspMMRel, LWX_MM_ENC, LWX_DESC;
+def MAQ_S_W_PHL_MM : DspMMRel, MAQ_S_W_PHL_MM_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_SA_W_PHL_MM : DspMMRel, MAQ_SA_W_PHL_MM_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_S_W_PHR_MM : DspMMRel, MAQ_S_W_PHR_MM_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHR_MM : DspMMRel, MAQ_SA_W_PHR_MM_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP_MM : DspMMRel, MFHI_MM_ENC, MFHI_MM_DESC;
+def MFLO_DSP_MM : DspMMRel, MFLO_MM_ENC, MFLO_MM_DESC;
+def MTHI_DSP_MM : DspMMRel, MTHI_MM_ENC, MTHI_DESC;
+def MTLO_DSP_MM : DspMMRel, MTLO_MM_ENC, MTLO_DESC;
+def RADDU_W_QB_MM : DspMMRel, RADDU_W_QB_MM_ENC, RADDU_W_QB_MM_DESC;
+def RDDSP_MM : DspMMRel, RDDSP_MM_ENC, RDDSP_MM_DESC;
+def REPL_PH_MM : DspMMRel, REPL_PH_MM_ENC, REPL_PH_DESC;
+def REPL_QB_MM : DspMMRel, REPL_QB_MM_ENC, REPL_QB_MM_DESC;
+def REPLV_PH_MM : DspMMRel, REPLV_PH_MM_ENC, REPLV_PH_MM_DESC;
+def REPLV_QB_MM : DspMMRel, REPLV_QB_MM_ENC, REPLV_QB_MM_DESC;
+def MTHLIP_MM : DspMMRel, MTHLIP_MM_ENC, MTHLIP_DESC;
+def PACKRL_PH_MM : DspMMRel, PACKRL_PH_MM_ENC, PACKRL_PH_DESC;
+def PICK_PH_MM : DspMMRel, PICK_PH_MM_ENC, PICK_PH_DESC;
+def PICK_QB_MM : DspMMRel, PICK_QB_MM_ENC, PICK_QB_DESC;
+def SHILO_MM : DspMMRel, SHILO_MM_ENC, SHILO_DESC;
+def SHILOV_MM : DspMMRel, SHILOV_MM_ENC, SHILOV_DESC;
+def WRDSP_MM : DspMMRel, WRDSP_MM_ENC, WRDSP_MM_DESC;
+// microMIPS DSP Rev 2
+def ABSQ_S_QB_MMR2 : DspMMRel, ABSQ_S_QB_MMR2_ENC, ABSQ_S_QB_MMR2_DESC,
+                     ISA_DSPR2;
+def ADDQH_PH_MMR2 : DspMMRel, ADDQH_PH_MMR2_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH_MMR2 : DspMMRel, ADDQH_R_PH_MMR2_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W_MMR2 : DspMMRel, ADDQH_W_MMR2_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W_MMR2 : DspMMRel, ADDQH_R_W_MMR2_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def ADDU_PH_MMR2 : DspMMRel, ADDU_PH_MMR2_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH_MMR2 : DspMMRel, ADDU_S_PH_MMR2_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def ADDUH_QB_MMR2 : DspMMRel, ADDUH_QB_MMR2_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB_MMR2 : DspMMRel, ADDUH_R_QB_MMR2_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def DPA_W_PH_MMR2 : DspMMRel, DPA_W_PH_MMR2_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH_MMR2 : DspMMRel, DPAQX_S_W_PH_MMR2_ENC, DPAQX_S_W_PH_DESC,
+                        ISA_DSPR2;
+def DPAQX_SA_W_PH_MMR2 : DspMMRel, DPAQX_SA_W_PH_MMR2_ENC, DPAQX_SA_W_PH_DESC,
+                         ISA_DSPR2;
+def DPAX_W_PH_MMR2 : DspMMRel, DPAX_W_PH_MMR2_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def SHRA_QB_MMR2 : DspMMRel, SHRA_QB_MMR2_ENC, SHRA_QB_MMR2_DESC, ISA_DSPR2;
+def SHRA_R_QB_MMR2 : DspMMRel, SHRA_R_QB_MMR2_ENC, SHRA_R_QB_MMR2_DESC,
+                     ISA_DSPR2;
+def SHRAV_QB_MMR2 : DspMMRel, SHRAV_QB_MMR2_ENC, SHRAV_QB_MMR2_DESC, ISA_DSPR2;
+def SHRAV_R_QB_MMR2 : DspMMRel, SHRAV_R_QB_MMR2_ENC, SHRAV_R_QB_MMR2_DESC,
+                      ISA_DSPR2;
+def SHRL_PH_MMR2 : DspMMRel, SHRL_PH_MMR2_ENC, SHRL_PH_MMR2_DESC, ISA_DSPR2;
+def SHRLV_PH_MMR2 : DspMMRel, SHRLV_PH_MMR2_ENC, SHRLV_PH_MMR2_DESC, ISA_DSPR2;
+def SUBQH_PH_MMR2 : DspMMRel, SUBQH_PH_MMR2_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH_MMR2 : DspMMRel, SUBQH_R_PH_MMR2_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_W_MMR2 : DspMMRel, SUBQH_W_MMR2_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W_MMR2 : DspMMRel, SUBQH_R_W_MMR2_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def SUBU_PH_MMR2 : DspMMRel, SUBU_PH_MMR2_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH_MMR2 : DspMMRel, SUBU_S_PH_MMR2_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def SUBUH_QB_MMR2 : DspMMRel, SUBUH_QB_MMR2_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB_MMR2 : DspMMRel, SUBUH_R_QB_MMR2_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def DPS_W_PH_MMR2 : DspMMRel, DPS_W_PH_MMR2_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH_MMR2 : DspMMRel, DPSQX_S_W_PH_MMR2_ENC, DPSQX_S_W_PH_DESC,
+                        ISA_DSPR2;
+def DPSQX_SA_W_PH_MMR2 : DspMMRel, DPSQX_SA_W_PH_MMR2_ENC, DPSQX_SA_W_PH_DESC,
+                         ISA_DSPR2;
+def DPSX_W_PH_MMR2 : DspMMRel, DPSX_W_PH_MMR2_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def MUL_PH_MMR2 : DspMMRel, MUL_PH_MMR2_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH_MMR2 : DspMMRel, MUL_S_PH_MMR2_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_RS_W_MMR2 : DspMMRel, MULQ_RS_W_MMR2_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH_MMR2 : DspMMRel, MULQ_S_PH_MMR2_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W_MMR2 : DspMMRel, MULQ_S_W_MMR2_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def PRECR_QB_PH_MMR2 : DspMMRel, PRECR_QB_PH_MMR2_ENC, PRECR_QB_PH_DESC,
+                       ISA_DSPR2;
+def PRECR_SRA_PH_W_MMR2 : DspMMRel, PRECR_SRA_PH_W_MMR2_ENC,
+                          PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W_MMR2 : DspMMRel, PRECR_SRA_R_PH_W_MMR2_ENC,
+                            PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def PREPEND_MMR2 : DspMMRel, PREPEND_MMR2_ENC, PREPEND_DESC, ISA_DSPR2;
+
+// Instruction alias.
+def : MMDSPInstAlias<"wrdsp $rt", (WRDSP_MM GPR32Opnd:$rt, 0x1F), 1>;
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index 004b0d51f4b4..756e6c92c1d1 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -37,23 +37,14 @@ def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
 def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>,
                   CEQS_FM_MM<1>;
 
-def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>,
+def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, II_BC1F, MIPS_BRANCH_F>,
               BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>,
+def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, II_BC1T, MIPS_BRANCH_T>,
               BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6;
-
-def CEIL_W_S_MM  : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
-                   ROUND_W_FM_MM<0, 0x6c>;
 def CVT_W_S_MM   : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
                    ROUND_W_FM_MM<0, 0x24>;
-def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
-                   ROUND_W_FM_MM<0, 0x2c>;
-def ROUND_W_S_MM : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
                    ROUND_W_FM_MM<0, 0xec>;
-def TRUNC_W_S_MM : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
-                   ROUND_W_FM_MM<0, 0xac>;
-def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
-                                fsqrt>, ROUND_W_FM_MM<0, 0x28>;
 
 def CEIL_W_MM  : MMRel, ABSS_FT<"ceil.w.d", FGR32Opnd, AFGR64Opnd, II_CEIL>,
                  ROUND_W_FM_MM<1, 0x6c>;
@@ -61,7 +52,7 @@ def CVT_W_MM   : MMRel, ABSS_FT<"cvt.w.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
                  ROUND_W_FM_MM<1, 0x24>;
 def FLOOR_W_MM : MMRel, ABSS_FT<"floor.w.d", FGR32Opnd, AFGR64Opnd, II_FLOOR>,
                  ROUND_W_FM_MM<1, 0x2c>;
-def ROUND_W_MM : MMRel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
+def ROUND_W_MM : MMRel, StdMMR6Rel, ABSS_FT<"round.w.d", FGR32Opnd, AFGR64Opnd, II_ROUND>,
                  ROUND_W_FM_MM<1, 0xec>;
 def TRUNC_W_MM : MMRel, ABSS_FT<"trunc.w.d", FGR32Opnd, AFGR64Opnd, II_TRUNC>,
                  ROUND_W_FM_MM<1, 0xac>;
@@ -146,3 +137,14 @@ def NMADD_D32_MM : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
 def NMSUB_D32_MM : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
                    MADDS_FM_MM<0x2a>;
 }
+
+let AdditionalPredicates = [InMicroMips] in {
+  def FLOOR_W_S_MM : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd,
+    II_FLOOR>, ROUND_W_FM_MM<0, 0x2c>;
+  def TRUNC_W_S_MM : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd,
+    FGR32Opnd, II_TRUNC>, ROUND_W_FM_MM<0, 0xac>;
+  def CEIL_W_S_MM  : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+    ROUND_W_FM_MM<0, 0x6c>;
+  def FSQRT_S_MM : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S,
+    fsqrt>, ROUND_W_FM_MM<0, 0x28>;
+}
diff --git a/lib/Target/Mips/MicroMipsInstrFormats.td b/lib/Target/Mips/MicroMipsInstrFormats.td
index 560afa48908c..b736367ee5fa 100644
--- a/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -389,6 +389,22 @@ class LW_FM_MM<bits<6> op> : MMArch {
   let Inst{15-0}  = addr{15-0};
 }
 
+class POOL32C_LHUE_FM_MM<bits<6> op, bits<4> fmt, bits<3> funct> : MMArch {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = fmt;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = offset;
+}
+
 class LWL_FM_MM<bits<4> funct> {
   bits<5> rt;
   bits<21> addr;
@@ -402,6 +418,22 @@ class LWL_FM_MM<bits<4> funct> {
   let Inst{11-0}  = addr{11-0};
 }
 
+class POOL32C_STEVA_LDEVA_FM_MM<bits<4> type, bits<3> funct> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x18;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = type;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = offset;
+}
+
 class CMov_F_I_FM_MM<bits<7> func> : MMArch {
   bits<5> rd;
   bits<5> rs;
@@ -655,6 +687,22 @@ class LL_FM_MM<bits<4> funct> {
   let Inst{11-0}  = addr{11-0};
 }
 
+class LLE_FM_MM<bits<4> funct> {
+  bits<5> rt;
+  bits<21> addr;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0x18;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = base;
+  let Inst{15-12} = funct;
+  let Inst{11-9} = 0x6;
+  let Inst{8-0} = offset;
+}
+
 class ADDS_FM_MM<bits<2> fmt, bits<8> funct> : MMArch {
   bits<5> ft;
   bits<5> fs;
@@ -895,7 +943,7 @@ class LWM_FM_MM<bits<4> funct> : MMArch {
   let Inst{11-0}  = addr{11-0};
 }
 
-class LWM_FM_MM16<bits<4> funct> : MMArch {
+class LWM_FM_MM16<bits<4> funct> : MMArch, PredicateControl {
   bits<2> rt;
   bits<4> addr;
 
@@ -922,6 +970,37 @@ class CACHE_PREF_FM_MM<bits<6> op, bits<4> funct> : MMArch {
   let Inst{11-0}  = offset;
 }
 
+class CACHE_PREFE_FM_MM<bits<6> op, bits<3> funct> : MMArch {
+  bits<21> addr;
+  bits<5> hint;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = hint;
+  let Inst{20-16} = base;
+  let Inst{15-12} = 0xA;
+  let Inst{11-9} = funct;
+  let Inst{8-0}  = offset;
+}
+
+class POOL32F_PREFX_FM_MM<bits<6> op, bits<9> funct> : MMArch {
+  bits<5> index;
+  bits<5> base;
+  bits<5> hint;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = op;
+  let Inst{25-21} = index;
+  let Inst{20-16} = base;
+  let Inst{15-11} = hint;
+  let Inst{10-9}  = 0x0;
+  let Inst{8-0}   = funct;
+}
+
 class BARRIER_FM_MM<bits<5> op> : MMArch {
   bits<32> Inst;
 
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 39393840c6f2..99f0f446deab 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -13,11 +13,6 @@ def simm12 : Operand<i32> {
   let DecoderMethod = "DecodeSimm12";
 }
 
-def uimm5_lsl2 : Operand<OtherVT> {
-  let EncoderMethod = "getUImm5Lsl2Encoding";
-  let DecoderMethod = "DecodeUImm5lsl2";
-}
-
 def uimm6_lsl2 : Operand<i32> {
   let EncoderMethod = "getUImm6Lsl2Encoding";
   let DecoderMethod = "DecodeUImm6Lsl2";
@@ -30,6 +25,7 @@ def simm9_addiusp : Operand<i32> {
 
 def uimm3_shift : Operand<i32> {
   let EncoderMethod = "getUImm3Mod8Encoding";
+  let DecoderMethod = "DecodePOOL16BEncodedField";
 }
 
 def simm3_lsa2 : Operand<i32> {
@@ -105,6 +101,14 @@ def mem_mm_gp_imm7_lsl2 : Operand<i32> {
   let EncoderMethod = "getMemEncodingMMGPImm7Lsl2";
 }
 
+def mem_mm_9 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR32, simm9);
+  let EncoderMethod = "getMemEncodingMMImm9";
+  let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
 def mem_mm_12 : Operand<i32> {
   let PrintMethod = "printMemOperand";
   let MIOperandInfo = (ops GPR32, simm12);
@@ -113,6 +117,14 @@ def mem_mm_12 : Operand<i32> {
   let OperandType = "OPERAND_MEMORY";
 }
 
+def mem_mm_16 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR32, simm16);
+  let EncoderMethod = "getMemEncodingMMImm16";
+  let ParserMatchClass = MipsMemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
+}
+
 def MipsMemUimm4AsmOperand : AsmOperandClass {
   let Name = "MemOffsetUimm4";
   let SuperClasses = [MipsMemAsmOperand];
@@ -166,7 +178,7 @@ def simm23_lsl2 : Operand<i32> {
 class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
                       RegisterOperand RO> :
   InstSE<(outs), (ins RO:$rs, opnd:$offset),
-         !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+         !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZC, FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 0;
@@ -251,6 +263,13 @@ class LLBaseMM<string opstr, RegisterOperand RO> :
   let mayLoad = 1;
 }
 
+class LLEBaseMM<string opstr, RegisterOperand RO> :
+  InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
+         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMemMMImm9";
+  let mayLoad = 1;
+}
+
 class SCBaseMM<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
          !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
@@ -259,6 +278,14 @@ class SCBaseMM<string opstr, RegisterOperand RO> :
   let Constraints = "$rt = $dst";
 }
 
+class SCEBaseMM<string opstr, RegisterOperand RO> :
+  InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
+         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+  let DecoderMethod = "DecodeMemMMImm9";
+  let mayStore = 1;
+  let Constraints = "$rt = $dst";
+}
+
 class LoadMM<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
              InstrItinClass Itin = NoItinerary> :
   InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
@@ -392,7 +419,7 @@ class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO> :
 // 16-bit Jump and Link (Call)
 class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
-           [(MipsJmpLink RO:$rs)], IIBranch, FrmR> {
+           [(MipsJmpLink RO:$rs)], II_JALR, FrmR>, PredicateControl {
   let isCall = 1;
   let hasDelaySlot = 1;
   let Defs = [RA];
@@ -401,7 +428,7 @@ class JumpLinkRegMM16<string opstr, RegisterOperand RO> :
 // 16-bit Jump Reg
 class JumpRegMM16<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
-           [], IIBranch, FrmR> {
+           [], II_JR, FrmR> {
   let hasDelaySlot = 1;
   let isBranch = 1;
   let isIndirectBranch = 1;
@@ -410,7 +437,7 @@ class JumpRegMM16<string opstr, RegisterOperand RO> :
 // Base class for JRADDIUSP instruction.
 class JumpRAddiuStackMM16 :
   MicroMipsInst16<(outs), (ins uimm5_lsl2:$imm), "jraddiusp\t$imm",
-                  [], IIBranch, FrmR> {
+                  [], II_JRADDIUSP, FrmR> {
   let isTerminator = 1;
   let isBarrier = 1;
   let isBranch = 1;
@@ -420,7 +447,7 @@ class JumpRAddiuStackMM16 :
 // 16-bit Jump and Link (Call) - Short Delay Slot
 class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
-           [], IIBranch, FrmR> {
+           [], II_JALRS, FrmR> {
   let isCall = 1;
   let hasDelaySlot = 1;
   let Defs = [RA];
@@ -429,7 +456,7 @@ class JumpLinkRegSMM16<string opstr, RegisterOperand RO> :
 // 16-bit Jump Register Compact - No delay slot
 class JumpRegCMM16<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
-                  [], IIBranch, FrmR> {
+                  [], II_JRC, FrmR> {
   let isTerminator = 1;
   let isBarrier = 1;
   let isBranch = 1;
@@ -444,7 +471,7 @@ class BrkSdbbp16MM<string opstr> :
 
 class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset),
-                  !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI> {
+                  !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZ, FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
@@ -455,18 +482,18 @@ class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
 let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
   class JumpLinkMM<string opstr, DAGOperand opnd> :
     InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
-           [], IIBranch, FrmJ, opstr> {
+           [], II_JALS, FrmJ, opstr> {
     let DecoderMethod = "DecodeJumpTargetMM";
   }
 
   class JumpLinkRegMM<string opstr, RegisterOperand RO>:
     InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-            [], IIBranch, FrmR>;
+            [], II_JALRS, FrmR>;
 
   class BranchCompareToZeroLinkMM<string opstr, DAGOperand opnd,
                                   RegisterOperand RO> :
     InstSE<(outs), (ins RO:$rs, opnd:$offset),
-           !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr>;
+           !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZALS, FrmI, opstr>;
 }
 
 class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
@@ -475,6 +502,10 @@ class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
   InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
          !strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
 
+class PrefetchIndexed<string opstr> :
+  InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint),
+         !strconcat(opstr, "\t$hint, ${index}(${base})"), [], NoItinerary, FrmOther>;
+
 class AddImmUPC<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
          !strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>;
@@ -543,7 +574,7 @@ class LoadMultMM16<string opstr,
 class UncondBranchMM16<string opstr> :
   MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
                   !strconcat(opstr, "\t$offset"),
-                  [], IIBranch, FrmI> {
+                  [], II_B, FrmI> {
   let isBranch = 1;
   let isTerminator = 1;
   let isBarrier = 1;
@@ -553,21 +584,24 @@ class UncondBranchMM16<string opstr> :
 }
 
 def ADDU16_MM : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
-                ARITH_FM_MM16<0>;
-def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
-                ARITH_FM_MM16<1>;
-def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>;
+    ARITH_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
 def AND16_MM : LogicRMM16<"and16", GPRMM16Opnd, II_AND, and>,
-               LOGIC_FM_MM16<0x2>;
-def OR16_MM  : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>,
-               LOGIC_FM_MM16<0x3>;
-def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
-               LOGIC_FM_MM16<0x1>;
-def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>;
+    LOGIC_FM_MM16<0x2>, ISA_MICROMIPS_NOT_32R6_64R6;
+def ANDI16_MM : AndImmMM16<"andi16", GPRMM16Opnd, II_AND>, ANDI_FM_MM16<0x0b>,
+    ISA_MICROMIPS_NOT_32R6_64R6;
+def NOT16_MM : NotMM16<"not16", GPRMM16Opnd>, LOGIC_FM_MM16<0x0>,
+    ISA_MICROMIPS_NOT_32R6_64R6;
+def OR16_MM : LogicRMM16<"or16", GPRMM16Opnd, II_OR, or>, LOGIC_FM_MM16<0x3>,
+    ISA_MICROMIPS_NOT_32R6_64R6;
 def SLL16_MM : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
-               SHIFT_FM_MM16<0>;
+    SHIFT_FM_MM16<0>, ISA_MICROMIPS_NOT_32R6_64R6;
 def SRL16_MM : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
-               SHIFT_FM_MM16<1>;
+    SHIFT_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+
+def SUBU16_MM : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
+                ARITH_FM_MM16<1>, ISA_MICROMIPS_NOT_32R6_64R6;
+def XOR16_MM : LogicRMM16<"xor16", GPRMM16Opnd, II_XOR, xor>,
+               LOGIC_FM_MM16<0x1>, ISA_MICROMIPS_NOT_32R6_64R6;
 def LBU16_MM : LoadMM16<"lbu16", GPRMM16Opnd, zextloadi8, II_LBU,
                         mem_mm_4>, LOAD_STORE_FM_MM16<0x02>;
 def LHU16_MM : LoadMM16<"lhu16", GPRMM16Opnd, zextloadi16, II_LHU,
@@ -597,7 +631,8 @@ def MOVE16_MM : MoveMM16<"move", GPR32Opnd>, MOVE_FM_MM16<0x03>;
 def MOVEP_MM : MovePMM16<"movep", GPRMM16OpndMoveP>, MOVEP_FM_MM16;
 def LI16_MM : LoadImmMM16<"li16", li_simm7, GPRMM16Opnd>, LI_FM_MM16,
               IsAsCheapAsAMove;
-def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>;
+def JALR16_MM : JumpLinkRegMM16<"jalr", GPR32Opnd>, JALR_FM_MM16<0x0e>,
+                ISA_MICROMIPS32_NOT_MIPS32R6;
 def JALRS16_MM : JumpLinkRegSMM16<"jalrs16", GPR32Opnd>, JALR_FM_MM16<0x0f>;
 def JRC16_MM : JumpRegCMM16<"jrc", GPR32Opnd>, JALR_FM_MM16<0x0d>;
 def JRADDIUSP : JumpRAddiuStackMM16, JRADDIUSP_FM_MM16<0x18>;
@@ -607,8 +642,18 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
 def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
                 BEQNEZ_FM_MM16<0x2b>;
 def B16_MM : UncondBranchMM16<"b16">, B16_FM;
-def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>;
-def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>;
+def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>,
+    ISA_MICROMIPS_NOT_32R6_64R6;
+def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>,
+    ISA_MICROMIPS_NOT_32R6_64R6;
+
+let DecoderNamespace = "MicroMips" in {
+  /// Load and Store Instructions - multiple
+  def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>,
+                 ISA_MICROMIPS32_NOT_MIPS32R6;
+  def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>,
+                 ISA_MICROMIPS32_NOT_MIPS32R6;
+}
 
 class WaitMM<string opstr> :
   InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
@@ -701,6 +746,18 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
     def SW_MM  : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
   }
 
+  let DecoderMethod = "DecodeMemMMImm9" in {
+    def LBE_MM  : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>;
+    def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>;
+    def LHE_MM  : Load<"lhe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>;
+    def LHuE_MM : Load<"lhue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>;
+    def LWE_MM  : Load<"lwe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>;
+    def SBE_MM  : Store<"sbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>;
+    def SHE_MM  : Store<"she", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>;
+    def SWE_MM  : StoreMemory<"swe", GPR32Opnd, mem_simm9gpr>,
+                  POOL32C_LHUE_FM_MM<0x18, 0xa, 0x7>;
+  }
+
   def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
 
   def LWU_MM : LoadMM<"lwu", GPR32Opnd, zextloadi32, II_LWU>, LL_FM_MM<0xe>;
@@ -714,12 +771,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
                LWL_FM_MM<0x8>;
   def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
                LWL_FM_MM<0x9>;
+  let DecoderMethod = "DecodeMemMMImm9" in {
+    def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_12>,
+                  POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>;
+    def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_12>,
+                  POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>;
+    def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_12>,
+                  POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x0>;
+    def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_12>,
+                  POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x1>, ISA_MIPS1_NOT_32R6_64R6;
+  }
 
   /// Load and Store Instructions - multiple
   def SWM32_MM  : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
   def LWM32_MM  : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
-  def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>;
-  def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>;
 
   /// Load and Store Pair Instructions
   def SWP_MM  : StorePairMM<"swp">, LWM_FM_MM<0x9>;
@@ -777,11 +842,11 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
                SEB_FM_MM<0x0ec>, ISA_MIPS32R2;
 
   /// Word Swap Bytes Within Halfwords
-  def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>,
-                ISA_MIPS32R2;
-
-  def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
-               EXT_FM_MM<0x2c>;
+  def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>,
+                SEB_FM_MM<0x1ec>, ISA_MIPS32R2;
+  // TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+  def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1,
+                              MipsExt>, EXT_FM_MM<0x2c>;
   def INS_MM : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>,
                EXT_FM_MM<0x0c>;
 
@@ -854,12 +919,22 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
   def SC_MM : SCBaseMM<"sc", GPR32Opnd>, LL_FM_MM<0xb>;
 
+  def LLE_MM : LLEBaseMM<"lle", GPR32Opnd>, LLE_FM_MM<0x6>;
+  def SCE_MM : SCEBaseMM<"sce", GPR32Opnd>, LLE_FM_MM<0xA>;
+
   let DecoderMethod = "DecodeCacheOpMM" in {
   def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>,
                  CACHE_PREF_FM_MM<0x08, 0x6>;
   def PREF_MM  : MMRel, CacheOp<"pref", mem_mm_12>,
                  CACHE_PREF_FM_MM<0x18, 0x2>;
   }
+
+  let DecoderMethod = "DecodePrefeOpMM" in {
+    def PREFE_MM  : MMRel, CacheOp<"prefe", mem_mm_9>,
+                 CACHE_PREFE_FM_MM<0x18, 0x2>;
+    def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9>,
+                 CACHE_PREFE_FM_MM<0x18, 0x3>;
+  }
   def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>;
   def EHB_MM   : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>;
   def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>;
@@ -870,7 +945,13 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
 
   def SDBBP_MM : MMRel, SYS_FT<"sdbbp">, SDBBP_FM_MM;
-  def RDHWR_MM : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM_MM;
+
+  def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>;
+}
+
+let DecoderNamespace = "MicroMips" in {
+  def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
+                 RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
 }
 
 let Predicates = [InMicroMips] in {
@@ -928,7 +1009,7 @@ class UncondBranchMMPseudo<string opstr> :
   MipsAsmPseudoInst<(outs), (ins brtarget_mm:$offset),
                     !strconcat(opstr, "\t$offset")>;
 
-  def B_MM_Pseudo : UncondBranchMMPseudo<"b">;
+def B_MM_Pseudo : UncondBranchMMPseudo<"b">, ISA_MICROMIPS;
 
   def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
   def : MipsInstAlias<"nop", (SLL_MM ZERO, ZERO, 0), 1>;
@@ -937,4 +1018,17 @@ class UncondBranchMMPseudo<string opstr> :
 
 let Predicates = [InMicroMips] in {
 def : MipsInstAlias<"ei", (EI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"di", (DI_MM ZERO), 1>, ISA_MIPS32R2;
+def : MipsInstAlias<"teq $rs, $rt",
+                    (TEQ_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tge $rs, $rt",
+                    (TGE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tgeu $rs, $rt",
+                    (TGEU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tlt $rs, $rt",
+                    (TLT_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tltu $rs, $rt",
+                    (TLTU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tne $rs, $rt",
+                    (TNE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
 }
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index dbb5f7b71d82..35352b6115c5 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -154,9 +154,14 @@ def FeatureMips16  : SubtargetFeature<"mips16", "InMips16Mode", "true",
 def FeatureDSP : SubtargetFeature<"dsp", "HasDSP", "true", "Mips DSP ASE">;
 def FeatureDSPR2 : SubtargetFeature<"dspr2", "HasDSPR2", "true",
                                     "Mips DSP-R2 ASE", [FeatureDSP]>;
+def FeatureDSPR3
+    : SubtargetFeature<"dspr3", "HasDSPR3", "true", "Mips DSP-R3 ASE",
+                       [ FeatureDSP, FeatureDSPR2 ]>;
 
 def FeatureMSA : SubtargetFeature<"msa", "HasMSA", "true", "Mips MSA ASE">;
 
+def FeatureEVA : SubtargetFeature<"eva", "HasEVA", "true", "Mips EVA ASE">;
+
 def FeatureMicroMips  : SubtargetFeature<"micromips", "InMicroMipsMode", "true",
                                          "microMips mode">;
 
@@ -164,10 +169,19 @@ def FeatureCnMips     : SubtargetFeature<"cnmips", "HasCnMips",
                                 "true", "Octeon cnMIPS Support",
                                 [FeatureMips64r2]>;
 
+def FeatureUseTCCInDIV : SubtargetFeature<
+                               "use-tcc-in-div",
+                               "UseTCCInDIV", "false",
+                               "Force the assembler to use trapping">;
+
 //===----------------------------------------------------------------------===//
 // Mips processors supported.
 //===----------------------------------------------------------------------===//
 
+def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
+                                 "MipsSubtarget::CPU::P5600",
+                                 "The P5600 Processor", [FeatureMips32r5]>;
+
 class Proc<string Name, list<SubtargetFeature> Features>
  : Processor<Name, MipsGenericItineraries, Features>;
 
@@ -187,12 +201,11 @@ def : Proc<"mips64r2", [FeatureMips64r2]>;
 def : Proc<"mips64r3", [FeatureMips64r3]>;
 def : Proc<"mips64r5", [FeatureMips64r5]>;
 def : Proc<"mips64r6", [FeatureMips64r6]>;
-def : Proc<"mips16", [FeatureMips16]>;
 def : Proc<"octeon", [FeatureMips64r2, FeatureCnMips]>;
+def : ProcessorModel<"p5600", MipsP5600Model, [ImplP5600]>;
 
 def MipsAsmParser : AsmParser {
   let ShouldEmitMatchRegisterName = 0;
-  let MnemonicContainsDot = 1;
 }
 
 def MipsAsmParserVariant : AsmParserVariant {
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 46cc99c62393..26426c087164 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -39,7 +39,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
   const Mips16InstrInfo &TII =
       *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc dl;
+
   uint64_t StackSize = MFI->getStackSize();
 
   // No need to allocate space on the stack.
@@ -107,7 +111,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           const std::vector<CalleeSavedInfo> &CSI,
                           const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
-  MachineBasicBlock *EntryBlock = MF->begin();
+  MachineBasicBlock *EntryBlock = &MF->front();
 
   //
   // Registers RA, S0,S1 are the callee saved registers and they
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index 893fc7cdf473..b2bc7e74c706 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -40,26 +40,17 @@ namespace {
     const MipsTargetMachine &TM;
   };
 
-  class InlineAsmHelper {
-    LLVMContext &C;
-    BasicBlock *BB;
-  public:
-    InlineAsmHelper(LLVMContext &C_, BasicBlock *BB_) :
-      C(C_), BB(BB_) {
-      }
+  static void EmitInlineAsm(LLVMContext &C, BasicBlock *BB, StringRef AsmText) {
+    std::vector<llvm::Type *> AsmArgTypes;
+    std::vector<llvm::Value *> AsmArgs;
 
-    void Out(StringRef AsmString) {
-      std::vector<llvm::Type *> AsmArgTypes;
-      std::vector<llvm::Value*> AsmArgs;
-
-      llvm::FunctionType *AsmFTy = llvm::FunctionType::get(Type::getVoidTy(C),
-                                                           AsmArgTypes, false);
-      llvm::InlineAsm *IA = llvm::InlineAsm::get(AsmFTy, AsmString, "", true,
-                                                 /* IsAlignStack */ false,
-                                                 llvm::InlineAsm::AD_ATT);
-      CallInst::Create(IA, AsmArgs, "", BB);
-    }
-  };
+    llvm::FunctionType *AsmFTy =
+        llvm::FunctionType::get(Type::getVoidTy(C), AsmArgTypes, false);
+    llvm::InlineAsm *IA =
+        llvm::InlineAsm::get(AsmFTy, AsmText, "", true,
+                             /* IsAlignStack */ false, llvm::InlineAsm::AD_ATT);
+    CallInst::Create(IA, AsmArgs, "", BB);
+  }
 
   char Mips16HardFloat::ID = 0;
 }
@@ -182,7 +173,7 @@ static bool needsFPReturnHelper(Function &F) {
   return whichFPReturnVariant(RetType) != NoFPRet;
 }
 
-static bool needsFPReturnHelper(const FunctionType &FT) {
+static bool needsFPReturnHelper(FunctionType &FT) {
   Type* RetType = FT.getReturnType();
   return whichFPReturnVariant(RetType) != NoFPRet;
 }
@@ -195,63 +186,72 @@ static bool needsFPHelperFromSig(Function &F) {
 // We swap between FP and Integer registers to allow Mips16 and Mips32 to
 // interoperate
 //
-static void swapFPIntParams(FPParamVariant PV, Module *M, InlineAsmHelper &IAH,
-                            bool LE, bool ToFP) {
-  //LLVMContext &Context = M->getContext();
-  std::string MI = ToFP? "mtc1 ": "mfc1 ";
+static std::string swapFPIntParams(FPParamVariant PV, Module *M, bool LE,
+                                   bool ToFP) {
+  std::string MI = ToFP ? "mtc1 ": "mfc1 ";
+  std::string AsmText;
+
   switch (PV) {
   case FSig:
-    IAH.Out(MI + "$$4,$$f12");
+    AsmText += MI + "$$4, $$f12\n";
     break;
+
   case FFSig:
-    IAH.Out(MI +"$$4,$$f12");
-    IAH.Out(MI + "$$5,$$f14");
+    AsmText += MI + "$$4, $$f12\n";
+    AsmText += MI + "$$5, $$f14\n";
     break;
+
   case FDSig:
-    IAH.Out(MI + "$$4,$$f12");
+    AsmText += MI + "$$4, $$f12\n";
     if (LE) {
-      IAH.Out(MI + "$$6,$$f14");
-      IAH.Out(MI + "$$7,$$f15");
+      AsmText += MI + "$$6, $$f14\n";
+      AsmText += MI + "$$7, $$f15\n";
     } else {
-      IAH.Out(MI + "$$7,$$f14");
-      IAH.Out(MI + "$$6,$$f15");
+      AsmText += MI + "$$7, $$f14\n";
+      AsmText += MI + "$$6, $$f15\n";
     }
     break;
+
   case DSig:
     if (LE) {
-      IAH.Out(MI + "$$4,$$f12");
-      IAH.Out(MI + "$$5,$$f13");
+      AsmText += MI + "$$4, $$f12\n";
+      AsmText += MI + "$$5, $$f13\n";
     } else {
-      IAH.Out(MI + "$$5,$$f12");
-      IAH.Out(MI + "$$4,$$f13");
+      AsmText += MI + "$$5, $$f12\n";
+      AsmText += MI + "$$4, $$f13\n";
     }
     break;
+
   case DDSig:
     if (LE) {
-      IAH.Out(MI + "$$4,$$f12");
-      IAH.Out(MI + "$$5,$$f13");
-      IAH.Out(MI + "$$6,$$f14");
-      IAH.Out(MI + "$$7,$$f15");
+      AsmText += MI + "$$4, $$f12\n";
+      AsmText += MI + "$$5, $$f13\n";
+      AsmText += MI + "$$6, $$f14\n";
+      AsmText += MI + "$$7, $$f15\n";
     } else {
-      IAH.Out(MI + "$$5,$$f12");
-      IAH.Out(MI + "$$4,$$f13");
-      IAH.Out(MI + "$$7,$$f14");
-      IAH.Out(MI + "$$6,$$f15");
+      AsmText += MI + "$$5, $$f12\n";
+      AsmText += MI + "$$4, $$f13\n";
+      AsmText += MI + "$$7, $$f14\n";
+      AsmText += MI + "$$6, $$f15\n";
     }
     break;
+
   case DFSig:
     if (LE) {
-      IAH.Out(MI + "$$4,$$f12");
-      IAH.Out(MI + "$$5,$$f13");
+      AsmText += MI + "$$4, $$f12\n";
+      AsmText += MI + "$$5, $$f13\n";
     } else {
-      IAH.Out(MI + "$$5,$$f12");
-      IAH.Out(MI + "$$4,$$f13");
+      AsmText += MI + "$$5, $$f12\n";
+      AsmText += MI + "$$4, $$f13\n";
     }
-    IAH.Out(MI + "$$6,$$f14");
+    AsmText += MI + "$$6, $$f14\n";
     break;
+
   case NoSig:
-    return;
+    break;
   }
+
+  return AsmText;
 }
 
 //
@@ -282,68 +282,77 @@ static void assureFPCallStub(Function &F, Module *M,
   FStub->addFnAttr("nomips16");
   FStub->setSection(SectionName);
   BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
-  InlineAsmHelper IAH(Context, BB);
-  IAH.Out(".set reorder");
   FPReturnVariant RV = whichFPReturnVariant(FStub->getReturnType());
   FPParamVariant PV = whichFPParamVariantNeeded(F);
-  swapFPIntParams(PV, M, IAH, LE, true);
+
+  std::string AsmText;
+  AsmText += ".set reorder\n";
+  AsmText += swapFPIntParams(PV, M, LE, true);
   if (RV != NoFPRet) {
-    IAH.Out("move $$18, $$31");
-    IAH.Out("jal " + Name);
+    AsmText += "move $$18, $$31\n";
+    AsmText += "jal " + Name + "\n";
   } else {
-    IAH.Out("lui  $$25,%hi(" + Name + ")");
-    IAH.Out("addiu  $$25,$$25,%lo(" + Name + ")" );
+    AsmText += "lui  $$25, %hi(" + Name + ")\n";
+    AsmText += "addiu  $$25, $$25, %lo(" + Name + ")\n";
   }
+
   switch (RV) {
   case FRet:
-    IAH.Out("mfc1 $$2,$$f0");
+    AsmText += "mfc1 $$2, $$f0\n";
     break;
+
   case DRet:
     if (LE) {
-      IAH.Out("mfc1 $$2,$$f0");
-      IAH.Out("mfc1 $$3,$$f1");
+      AsmText += "mfc1 $$2, $$f0\n";
+      AsmText += "mfc1 $$3, $$f1\n";
     } else {
-      IAH.Out("mfc1 $$3,$$f0");
-      IAH.Out("mfc1 $$2,$$f1");
+      AsmText += "mfc1 $$3, $$f0\n";
+      AsmText += "mfc1 $$2, $$f1\n";
     }
     break;
+
   case CFRet:
     if (LE) {
-      IAH.Out("mfc1 $$2,$$f0");
-      IAH.Out("mfc1 $$3,$$f2");
+      AsmText += "mfc1 $$2, $$f0\n";
+      AsmText += "mfc1 $$3, $$f2\n";
     } else {
-      IAH.Out("mfc1 $$3,$$f0");
-      IAH.Out("mfc1 $$3,$$f2");
+      AsmText += "mfc1 $$3, $$f0\n";
+      AsmText += "mfc1 $$3, $$f2\n";
     }
     break;
+
   case CDRet:
     if (LE) {
-      IAH.Out("mfc1 $$4,$$f2");
-      IAH.Out("mfc1 $$5,$$f3");
-      IAH.Out("mfc1 $$2,$$f0");
-      IAH.Out("mfc1 $$3,$$f1");
+      AsmText += "mfc1 $$4, $$f2\n";
+      AsmText += "mfc1 $$5, $$f3\n";
+      AsmText += "mfc1 $$2, $$f0\n";
+      AsmText += "mfc1 $$3, $$f1\n";
 
     } else {
-      IAH.Out("mfc1 $$5,$$f2");
-      IAH.Out("mfc1 $$4,$$f3");
-      IAH.Out("mfc1 $$3,$$f0");
-      IAH.Out("mfc1 $$2,$$f1");
+      AsmText += "mfc1 $$5, $$f2\n";
+      AsmText += "mfc1 $$4, $$f3\n";
+      AsmText += "mfc1 $$3, $$f0\n";
+      AsmText += "mfc1 $$2, $$f1\n";
     }
     break;
+
   case NoFPRet:
     break;
   }
+
   if (RV != NoFPRet)
-    IAH.Out("jr $$18");
+    AsmText += "jr $$18\n";
   else
-    IAH.Out("jr $$25");
+    AsmText += "jr $$25\n";
+  EmitInlineAsm(Context, BB, AsmText);
+
   new UnreachableInst(Context, BB);
 }
 
 //
 // Functions that are llvm intrinsics and don't need helpers.
 //
-static const char *IntrinsicInline[] = {
+static const char *const IntrinsicInline[] = {
   "fabs", "fabsf",
   "llvm.ceil.f32", "llvm.ceil.f64",
   "llvm.copysign.f32", "llvm.copysign.f64",
@@ -395,7 +404,7 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
         Type *T = RVal->getType();
         FPReturnVariant RV = whichFPReturnVariant(T);
         if (RV == NoFPRet) continue;
-        static const char* Helper[NoFPRet] = {
+        static const char *const Helper[NoFPRet] = {
           "__mips16_ret_sf", "__mips16_ret_df", "__mips16_ret_sc",
           "__mips16_ret_dc"
         };
@@ -419,11 +428,11 @@ static bool fixupFPReturnAndCall(Function &F, Module *M,
         CallInst::Create(F, Params, "", &Inst );
       } else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
         const Value* V = CI->getCalledValue();
-        const Type* T = nullptr;
+        Type* T = nullptr;
         if (V) T = V->getType();
-        const PointerType *PFT=nullptr;
+        PointerType *PFT = nullptr;
         if (T) PFT = dyn_cast<PointerType>(T);
-        const FunctionType *FT=nullptr;
+        FunctionType *FT = nullptr;
         if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
         Function *F_ =  CI->getCalledFunction();
         if (FT && needsFPReturnHelper(*FT) &&
@@ -469,20 +478,21 @@ static void createFPFnStub(Function *F, Module *M, FPParamVariant PV,
   FStub->addFnAttr("nomips16");
   FStub->setSection(SectionName);
   BasicBlock *BB = BasicBlock::Create(Context, "entry", FStub);
-  InlineAsmHelper IAH(Context, BB);
+
+  std::string AsmText;
   if (PicMode) {
-    IAH.Out(".set noreorder");
-    IAH.Out(".cpload  $$25");
-    IAH.Out(".set reorder");
-    IAH.Out(".reloc 0,R_MIPS_NONE," + Name);
-    IAH.Out("la $$25," + LocalName);
-  }
-  else {
-    IAH.Out("la $$25," + Name);
-  }
-  swapFPIntParams(PV, M, IAH, LE, false);
-  IAH.Out("jr $$25");
-  IAH.Out(LocalName + " = " + Name);
+    AsmText += ".set noreorder\n";
+    AsmText += ".cpload $$25\n";
+    AsmText += ".set reorder\n";
+    AsmText += ".reloc 0, R_MIPS_NONE, " + Name + "\n";
+    AsmText += "la $$25, " + LocalName + "\n";
+  } else
+    AsmText += "la $$25, " + Name + "\n";
+  AsmText += swapFPIntParams(PV, M, LE, false);
+  AsmText += "jr $$25\n";
+  AsmText += LocalName + " = " + Name + "\n";
+  EmitInlineAsm(Context, BB, AsmText);
+
   new UnreachableInst(FStub->getContext(), BB);
 }
 
@@ -535,7 +545,7 @@ bool Mips16HardFloat::runOnModule(Module &M) {
     FPParamVariant V = whichFPParamVariantNeeded(*F);
     if (V != NoSig) {
       Modified = true;
-      createFPFnStub(F, &M, V, TM);
+      createFPFnStub(&*F, &M, V, TM);
     }
   }
   return Modified;
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index bce2c1eb4485..5a1c2c67cc70 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -73,7 +73,7 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   MachineBasicBlock::iterator I = MBB.begin();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   unsigned V0, V1, V2, GlobalBaseReg = MipsFI->getGlobalBaseReg();
   const TargetRegisterClass *RC = &Mips::CPU16RegsRegClass;
 
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 3522cbb1f36a..e7483253e61d 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -530,8 +530,7 @@ emitSel16(unsigned Opc, MachineInstr *MI, MachineBasicBlock *BB) const {
   // destination vreg to set, the condition code register to branch on, the
   // true/false values to select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
@@ -592,8 +591,7 @@ Mips16TargetLowering::emitSelT16(unsigned Opc1, unsigned Opc2, MachineInstr *MI,
   // destination vreg to set, the condition code register to branch on, the
   // true/false values to select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
@@ -657,8 +655,7 @@ Mips16TargetLowering::emitSeliT16(unsigned Opc1, unsigned Opc2,
   // destination vreg to set, the condition code register to branch on, the
   // true/false values to select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index a49572efdbf9..da8ada4e5391 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -196,7 +196,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
 void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
                                 MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator I) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   MachineFunction &MF = *MBB.getParent();
   MachineFrameInfo *MFI    = MF.getFrameInfo();
   const BitVector Reserved = RI.getReservedRegs(MF);
@@ -263,7 +263,7 @@ void Mips16InstrInfo::adjustStackPtrBig(unsigned SP, int64_t Amount,
                                         MachineBasicBlock &MBB,
                                         MachineBasicBlock::iterator I,
                                         unsigned Reg1, unsigned Reg2) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   //
   // li reg1, constant
   // move reg2, sp
@@ -446,7 +446,7 @@ const MCInstrDesc &Mips16InstrInfo::AddiuSpImm(int64_t Imm) const {
 
 void Mips16InstrInfo::BuildAddiuSpImm
   (MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const {
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   BuildMI(MBB, I, DL, AddiuSpImm(Imm)).addImm(Imm);
 }
 
diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td
index 10fff03b7240..dad6ea4c9e98 100644
--- a/lib/Target/Mips/Mips16InstrInfo.td
+++ b/lib/Target/Mips/Mips16InstrInfo.td
@@ -530,19 +530,19 @@ class MayStore {
 // Purpose: Add Immediate Unsigned Word (2-Operand, Extended)
 // To add a constant to a 32-bit integer.
 //
-def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIAlu>;
+def AddiuRxImmX16: FEXT_RI16_ins<0b01001, "addiu", IIM16Alu>;
 
-def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImm16: F2RI16_ins<0b01001, "addiu", IIM16Alu>,
   ArithLogic16Defs<0> {
   let AddedComplexity = 5;
 }
-def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIAlu>,
+def AddiuRxRxImmX16: FEXT_2RI16_ins<0b01001, "addiu", IIM16Alu>,
   ArithLogic16Defs<0> {
   let isCodeGenOnly = 1;
 }
 
 def AddiuRxRyOffMemX16:
-  FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIAlu>;
+  FEXT_RRI_A16_mem_ins<0, "addiu", mem16_ea, IIM16Alu>;
 
 //
 
@@ -550,7 +550,7 @@ def AddiuRxRyOffMemX16:
 // Purpose: Add Immediate Unsigned Word (3-Operand, PC-Relative, Extended)
 // To add a constant to the program counter.
 //
-def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
+def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIM16Alu>;
 
 //
 // Format: ADDIU sp, immediate MIPS16e
@@ -558,14 +558,14 @@ def AddiuRxPcImmX16: FEXT_RI16_PC_ins<0b00001, "addiu", IIAlu>;
 // To add a constant to the stack pointer.
 //
 def AddiuSpImm16
-  : FI816_SP_ins<0b011, "addiu", IIAlu> {
+  : FI816_SP_ins<0b011, "addiu", IIM16Alu> {
   let Defs = [SP];
   let Uses = [SP];
   let AddedComplexity = 5;
 }
 
 def AddiuSpImmX16
-  : FEXT_I816_SP_ins<0b011, "addiu", IIAlu> {
+  : FEXT_I816_SP_ins<0b011, "addiu", IIM16Alu> {
   let Defs = [SP];
   let Uses = [SP];
 }
@@ -576,14 +576,14 @@ def AddiuSpImmX16
 // To add 32-bit integers.
 //
 
-def AdduRxRyRz16: FRRR16_ins<01, "addu", IIAlu>, ArithLogic16Defs<1>;
+def AdduRxRyRz16: FRRR16_ins<01, "addu", IIM16Alu>, ArithLogic16Defs<1>;
 
 //
 // Format: AND rx, ry MIPS16e
 // Purpose: AND
 // To do a bitwise logical AND.
 
-def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
+def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIM16Alu>, ArithLogic16Defs<1>;
 
 
 //
@@ -591,7 +591,7 @@ def AndRxRxRy16: FRxRxRy16_ins<0b01100, "and", IIAlu>, ArithLogic16Defs<1>;
 // Purpose: Branch on Equal to Zero
 // To test a GPR then do a PC-relative conditional branch.
 //
-def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
 
 
 //
@@ -599,7 +599,7 @@ def BeqzRxImm16: FRI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
 // Purpose: Branch on Equal to Zero (Extended)
 // To test a GPR then do a PC-relative conditional branch.
 //
-def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
+def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIM16Alu>, cbranch16;
 
 //
 // Format: B offset MIPS16e
@@ -607,27 +607,27 @@ def BeqzRxImmX16: FEXT_RI16_B_ins<0b00100, "beqz", IIAlu>, cbranch16;
 // To do an unconditional PC-relative branch.
 //
 
-def Bimm16: FI16_ins<0b00010, "b", IIAlu>, branch16;
+def Bimm16: FI16_ins<0b00010, "b", IIM16Alu>, branch16;
 
 // Format: B offset MIPS16e
 // Purpose: Unconditional Branch
 // To do an unconditional PC-relative branch.
 //
-def BimmX16: FEXT_I16_ins<0b00010, "b", IIAlu>, branch16;
+def BimmX16: FEXT_I16_ins<0b00010, "b", IIM16Alu>, branch16;
 
 //
 // Format: BNEZ rx, offset MIPS16e
 // Purpose: Branch on Not Equal to Zero
 // To test a GPR then do a PC-relative conditional branch.
 //
-def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImm16: FRI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
 
 //
 // Format: BNEZ rx, offset MIPS16e
 // Purpose: Branch on Not Equal to Zero (Extended)
 // To test a GPR then do a PC-relative conditional branch.
 //
-def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIAlu>, cbranch16;
+def BnezRxImmX16: FEXT_RI16_B_ins<0b00101, "bnez", IIM16Alu>, cbranch16;
 
 
 //
@@ -641,11 +641,11 @@ def Break16: FRRBreakNull16_ins<"break 0", NoItinerary>;
 // Purpose: Branch on T Equal to Zero (Extended)
 // To test special register T then do a PC-relative conditional branch.
 //
-def Bteqz16: FI816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def Bteqz16: FI816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
   let Uses = [T8];
 }
 
-def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIAlu>, cbranch16 {
+def BteqzX16: FEXT_I816_ins<0b000, "bteqz", IIM16Alu>, cbranch16 {
   let Uses = [T8];
 }
 
@@ -669,11 +669,11 @@ def BteqzT8SltiuX16: FEXT_T8I8I16_ins<"bteqz", "sltiu">,
 // To test special register T then do a PC-relative conditional branch.
 //
 
-def Btnez16: FI816_ins<0b001, "btnez", IIAlu>, cbranch16 {
+def Btnez16: FI816_ins<0b001, "btnez", IIM16Alu>, cbranch16 {
   let Uses = [T8];
 }
 
-def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIAlu> ,cbranch16 {
+def BtnezX16: FEXT_I816_ins<0b001, "btnez", IIM16Alu> ,cbranch16 {
   let Uses = [T8];
 }
 
@@ -695,7 +695,7 @@ def BtnezT8SltiuX16: FEXT_T8I8I16_ins<"btnez", "sltiu">,
 // Purpose: Compare
 // To compare the contents of two GPRs.
 //
-def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
+def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -704,7 +704,7 @@ def CmpRxRy16: FRR16R_ins<0b01010, "cmp", IIAlu> {
 // Purpose: Compare Immediate
 // To compare a constant with the contents of a GPR.
 //
-def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -713,7 +713,7 @@ def CmpiRxImm16: FRI16R_ins<0b01110, "cmpi", IIAlu> {
 // Purpose: Compare Immediate (Extended)
 // To compare a constant with the contents of a GPR.
 //
-def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
+def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -723,7 +723,7 @@ def CmpiRxImmX16: FEXT_RI16R_ins<0b01110, "cmpi", IIAlu> {
 // Purpose: Divide Word
 // To divide 32-bit signed integers.
 //
-def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
+def DivRxRy16: FRR16_div_ins<0b11010, "div", IIM16Alu> {
   let Defs = [HI0, LO0];
 }
 
@@ -732,7 +732,7 @@ def DivRxRy16: FRR16_div_ins<0b11010, "div", IIAlu> {
 // Purpose: Divide Unsigned Word
 // To divide 32-bit unsigned integers.
 //
-def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
+def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIM16Alu> {
   let Defs = [HI0, LO0];
 }
 //
@@ -742,13 +742,13 @@ def DivuRxRy16: FRR16_div_ins<0b11011, "divu", IIAlu> {
 // region and preserve the current ISA.
 //
 
-def Jal16 : FJAL16_ins<0b0, "jal", IIAlu> {
+def Jal16 : FJAL16_ins<0b0, "jal", IIM16Alu> {
   let hasDelaySlot = 0;  // not true, but we add the nop for now
   let isCall=1;
   let Defs = [RA];
 }
 
-def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
+def JalB16 : FJALB16_ins<0b0, "jal", IIM16Alu>, branch16 {
   let hasDelaySlot = 0;  // not true, but we add the nop for now
   let isBranch=1;
   let Defs = [RA];
@@ -761,7 +761,7 @@ def JalB16 : FJALB16_ins<0b0, "jal", IIAlu>, branch16 {
 // address register.
 //
 
-def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
+def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIM16Alu> {
   let isBranch = 1;
   let isIndirectBranch = 1;
   let hasDelaySlot = 1;
@@ -769,14 +769,14 @@ def JrRa16: FRR16_JALRC_RA_only_ins<0, 0, "jr", IIAlu> {
   let isBarrier=1;
 }
 
-def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIAlu> {
+def JrcRa16: FRR16_JALRC_RA_only_ins<1, 1, "jrc", IIM16Alu> {
   let isBranch = 1;
   let isIndirectBranch = 1;
   let isTerminator=1;
   let isBarrier=1;
 }
 
-def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIAlu> {
+def JrcRx16: FRR16_JALRC_ins<1, 1, 0, "jrc", IIM16Alu> {
   let isBranch = 1;
   let isIndirectBranch = 1;
   let isTerminator=1;
@@ -825,16 +825,16 @@ def LhuRxRyOffMemX16:
 // Purpose: Load Immediate
 // To load a constant into a GPR.
 //
-def LiRxImm16: FRI16_ins<0b01101, "li", IIAlu>;
+def LiRxImm16: FRI16_ins<0b01101, "li", IIM16Alu>;
 
 //
 // Format: LI rx, immediate MIPS16e
 // Purpose: Load Immediate (Extended)
 // To load a constant into a GPR.
 //
-def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIAlu>;
+def LiRxImmX16: FEXT_RI16_ins<0b01101, "li", IIM16Alu>;
 
-def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIAlu> {
+def LiRxImmAlignX16: FEXT_RI16_ins<0b01101, ".align 2\n\tli", IIM16Alu> {
   let isCodeGenOnly = 1;
 }
 
@@ -863,21 +863,21 @@ def LwRxPcTcpX16: FEXT_RI16_TCP_ins<0b10110, "lw", II_LW>, MayLoad;
 // Purpose: Move
 // To move the contents of a GPR to a GPR.
 //
-def Move32R16: FI8_MOV32R16_ins<"move", IIAlu>;
+def Move32R16: FI8_MOV32R16_ins<"move", IIM16Alu>;
 
 //
 // Format: MOVE ry, r32 MIPS16e
 //Purpose: Move
 // To move the contents of a GPR to a GPR.
 //
-def MoveR3216: FI8_MOVR3216_ins<"move", IIAlu>;
+def MoveR3216: FI8_MOVR3216_ins<"move", IIM16Alu>;
 
 //
 // Format: MFHI rx MIPS16e
 // Purpose: Move From HI Register
 // To copy the special purpose HI register to a GPR.
 //
-def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
+def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIM16Alu> {
   let Uses = [HI0];
   let hasSideEffects = 0;
 }
@@ -887,7 +887,7 @@ def Mfhi16: FRR16_M_ins<0b10000, "mfhi", IIAlu> {
 // Purpose: Move From LO Register
 // To copy the special purpose LO register to a GPR.
 //
-def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
+def Mflo16: FRR16_M_ins<0b10010, "mflo", IIM16Alu> {
   let Uses = [LO0];
   let hasSideEffects = 0;
 }
@@ -895,13 +895,13 @@ def Mflo16: FRR16_M_ins<0b10010, "mflo", IIAlu> {
 //
 // Pseudo Instruction for mult
 //
-def MultRxRy16:  FMULT16_ins<"mult",  IIAlu> {
+def MultRxRy16:  FMULT16_ins<"mult",  IIM16Alu> {
   let isCommutable = 1;
   let hasSideEffects = 0;
   let Defs = [HI0, LO0];
 }
 
-def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
+def MultuRxRy16: FMULT16_ins<"multu", IIM16Alu> {
   let isCommutable = 1;
   let hasSideEffects = 0;
   let Defs = [HI0, LO0];
@@ -912,7 +912,7 @@ def MultuRxRy16: FMULT16_ins<"multu", IIAlu> {
 // Purpose: Multiply Word
 // To multiply 32-bit signed integers.
 //
-def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
+def MultRxRyRz16: FMULT16_LO_ins<"mult", IIM16Alu> {
   let isCommutable = 1;
   let hasSideEffects = 0;
   let Defs = [HI0, LO0];
@@ -923,7 +923,7 @@ def MultRxRyRz16: FMULT16_LO_ins<"mult", IIAlu> {
 // Purpose: Multiply Unsigned Word
 // To multiply 32-bit unsigned integers.
 //
-def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
+def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIM16Alu> {
   let isCommutable = 1;
   let hasSideEffects = 0;
   let Defs = [HI0, LO0];
@@ -934,21 +934,21 @@ def MultuRxRyRz16: FMULT16_LO_ins<"multu", IIAlu> {
 // Purpose: Negate
 // To negate an integer value.
 //
-def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIAlu>;
+def NegRxRy16: FUnaryRR16_ins<0b11101, "neg", IIM16Alu>;
 
 //
 // Format: NOT rx, ry MIPS16e
 // Purpose: Not
 // To complement an integer value
 //
-def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIAlu>;
+def NotRxRy16: FUnaryRR16_ins<0b01111, "not", IIM16Alu>;
 
 //
 // Format: OR rx, ry MIPS16e
 // Purpose: Or
 // To do a bitwise logical OR.
 //
-def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIAlu>, ArithLogic16Defs<1>;
+def OrRxRxRy16: FRxRxRy16_ins<0b01101, "or", IIM16Alu>, ArithLogic16Defs<1>;
 
 //
 // Format: RESTORE {ra,}{s0/s1/s0-1,}{framesize}
@@ -1012,7 +1012,7 @@ def SbRxRyOffMemX16:
 // Sign-extend least significant byte in register rx.
 //
 def SebRx16
-  : FRR_SF16_ins<0b10001, 0b100, "seb", IIAlu>;
+  : FRR_SF16_ins<0b10001, 0b100, "seb", IIM16Alu>;
 
 //
 // Format: SEH rx MIPS16e
@@ -1020,7 +1020,7 @@ def SebRx16
 // Sign-extend least significant word in register rx.
 //
 def SehRx16
-  : FRR_SF16_ins<0b10001, 0b101, "seh", IIAlu>;
+  : FRR_SF16_ins<0b10001, 0b101, "seh", IIM16Alu>;
 
 //
 // The Sel(T) instructions are pseudos
@@ -1149,21 +1149,21 @@ def ShRxRyOffMemX16:
 // Purpose: Shift Word Left Logical (Extended)
 // To execute a left-shift of a word by a fixed number of bits-0 to 31 bits.
 //
-def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIAlu>;
+def SllX16: FEXT_SHIFT16_ins<0b00, "sll", IIM16Alu>;
 
 //
 // Format: SLLV ry, rx MIPS16e
 // Purpose: Shift Word Left Logical Variable
 // To execute a left-shift of a word by a variable number of bits.
 //
-def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIAlu>;
+def SllvRxRy16 : FRxRxRy16_ins<0b00100, "sllv", IIM16Alu>;
 
 // Format: SLTI rx, immediate MIPS16e
 // Purpose: Set on Less Than Immediate
 // To record the result of a less-than comparison with a constant.
 //
 //
-def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -1173,7 +1173,7 @@ def SltiRxImm16: FRI16R_ins<0b01010, "slti", IIAlu> {
 // To record the result of a less-than comparison with a constant.
 //
 //
-def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIAlu> {
+def SltiRxImmX16: FEXT_RI16R_ins<0b01010, "slti", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -1184,7 +1184,7 @@ def SltiCCRxImmX16: FEXT_CCRXI16_ins<"slti">;
 // To record the result of a less-than comparison with a constant.
 //
 //
-def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIM16Alu> {
   let Defs = [T8];
 }
 
@@ -1194,7 +1194,7 @@ def SltiuRxImm16: FRI16R_ins<0b01011, "sltiu", IIAlu> {
 // To record the result of a less-than comparison with a constant.
 //
 //
-def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIAlu> {
+def SltiuRxImmX16: FEXT_RI16R_ins<0b01011, "sltiu", IIM16Alu> {
   let Defs = [T8];
 }
 //
@@ -1209,7 +1209,7 @@ def SltiuCCRxImmX16: FEXT_CCRXI16_ins<"sltiu">;
 // Purpose: Set on Less Than
 // To record the result of a less-than comparison.
 //
-def SltRxRy16: FRR16R_ins<0b00010, "slt", IIAlu>{
+def SltRxRy16: FRR16R_ins<0b00010, "slt", IIM16Alu>{
   let Defs = [T8];
 }
 
@@ -1219,7 +1219,7 @@ def SltCCRxRy16: FCCRR16_ins<"slt">;
 // Purpose: Set on Less Than Unsigned
 // To record the result of an unsigned less-than comparison.
 //
-def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIAlu>{
+def SltuRxRy16: FRR16R_ins<0b00011, "sltu", IIM16Alu>{
   let Defs = [T8];
 }
 
@@ -1236,7 +1236,7 @@ def SltuCCRxRy16: FCCRR16_ins<"sltu">;
 // To execute an arithmetic right-shift of a word by a variable
 // number of bits.
 //
-def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
+def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIM16Alu>;
 
 
 //
@@ -1245,7 +1245,7 @@ def SravRxRy16: FRxRxRy16_ins<0b00111, "srav", IIAlu>;
 // To execute an arithmetic right-shift of a word by a fixed
 // number of bits-1 to 8 bits.
 //
-def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
+def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIM16Alu>;
 
 
 //
@@ -1254,7 +1254,7 @@ def SraX16: FEXT_SHIFT16_ins<0b11, "sra", IIAlu>;
 // To execute a logical right-shift of a word by a variable
 // number of bits.
 //
-def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
+def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIM16Alu>;
 
 
 //
@@ -1263,14 +1263,14 @@ def SrlvRxRy16: FRxRxRy16_ins<0b00110, "srlv", IIAlu>;
 // To execute a logical right-shift of a word by a fixed
 // number of bits-1 to 31 bits.
 //
-def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIAlu>;
+def SrlX16: FEXT_SHIFT16_ins<0b10, "srl", IIM16Alu>;
 
 //
 // Format: SUBU rz, rx, ry MIPS16e
 // Purpose: Subtract Unsigned Word
 // To subtract 32-bit integers
 //
-def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIAlu>, ArithLogic16Defs<0>;
+def SubuRxRyRz16: FRRR16_ins<0b11, "subu", IIM16Alu>, ArithLogic16Defs<0>;
 
 //
 // Format: SW ry, offset(rx) MIPS16e
@@ -1294,7 +1294,7 @@ def SwRxSpImmX16: FEXT_RI16_SP_Store_explicit_ins
 // Purpose: Xor
 // To do a bitwise logical XOR.
 //
-def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIAlu>, ArithLogic16Defs<1>;
+def XorRxRxRy16: FRxRxRy16_ins<0b01110, "xor", IIM16Alu>, ArithLogic16Defs<1>;
 
 class Mips16Pat<dag pattern, dag result> : Pat<pattern, result> {
   let Predicates = [InMips16Mode];
@@ -1380,7 +1380,7 @@ def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> {
 let isCall=1, hasDelaySlot=0 in
 def JumpLinkReg16:
   FRR16_JALRC<0, 0, 0, (outs), (ins CPU16Regs:$rs),
-              "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], IIBranch> {
+              "jalrc \t$rs", [(MipsJmpLink CPU16Regs:$rs)], II_JALRC> {
   let Defs = [RA];
 }
 
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
index d6ab8a6e5411..82d2c8ee9905 100644
--- a/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -186,54 +186,56 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
 
 multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
                      RegisterOperand FGROpnd>{
-  def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
-                    CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
-                    ISA_MIPS32R6, HARDFLOAT;
-  def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
-                     CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
-                     ISA_MIPS32R6, HARDFLOAT;
-  def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
-                     CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
-                     ISA_MIPS32R6, HARDFLOAT;
-  def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
-                      CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+  let AdditionalPredicates = [NotInMicroMips] in {
+    def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
+                      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
                       ISA_MIPS32R6, HARDFLOAT;
-  def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
-                     CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
-                     ISA_MIPS32R6, HARDFLOAT;
-  def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
-                      CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
-                     CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
-                     ISA_MIPS32R6, HARDFLOAT;
-  def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
-                      CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
-                      CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
-                      CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
-                      CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
-                       CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+    def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
+                       CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
                        ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
-                      CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
-                       CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+    def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
+                       CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
                        ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
-                      CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
-                      ISA_MIPS32R6, HARDFLOAT;
-  def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
-                       CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+    def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
+                        CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+                       CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
+                        CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+                       CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+                       ISA_MIPS32R6, HARDFLOAT;
+    def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
+                        CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>,
+                        CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>,
+                        CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
+                        CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>,
+                         CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+                         ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>,
+                        CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>,
+                         CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+                         ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>,
+                        CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
+                        ISA_MIPS32R6, HARDFLOAT;
+    def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>,
+                         CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+                         ISA_MIPS32R6, HARDFLOAT;
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -557,7 +559,7 @@ class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
   dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
   string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
   list<dag> Pattern = [];
-  string DecoderMethod = "DecodeCacheOpR6";
+  string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
 }
 
 class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
@@ -595,7 +597,7 @@ class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
   list<dag> Pattern = [];
 }
 
-class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>;
+class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
 
 class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
   dag OutOperandList = (outs GPROpnd:$rt);
@@ -685,8 +687,10 @@ def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
 def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
 def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
 def CACHE_R6 : R6MMR6Rel, CACHE_ENC, CACHE_DESC, ISA_MIPS32R6;
-def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
 def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
 defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
@@ -702,39 +706,51 @@ def LSA_R6 : R6MMR6Rel, LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6;
 def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
 def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
 def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
-def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_D  : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAXA_S  : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_D   : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MAX_S   : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_D  : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MINA_S  : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_D   : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MIN_S   : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def MOD : R6MMR6Rel, MOD_ENC, MOD_DESC, ISA_MIPS32R6;
 def MODU : R6MMR6Rel, MODU_ENC, MODU_DESC, ISA_MIPS32R6;
-def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def MUH    : R6MMR6Rel, MUH_ENC, MUH_DESC, ISA_MIPS32R6;
 def MUHU   : R6MMR6Rel, MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
 def MUL_R6 : R6MMR6Rel, MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
 def MULU   : R6MMR6Rel, MULU_ENC, MULU_DESC, ISA_MIPS32R6;
 def NAL; // BAL with rd=0
 def PREF_R6 : R6MMR6Rel, PREF_ENC, PREF_DESC, ISA_MIPS32R6;
-def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6;
+let AdditionalPredicates = [NotInMicroMips] in {
 def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6;
+}
 def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6;
 def SELEQZ : R6MMR6Rel, SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def SELNEZ : R6MMR6Rel, SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32;
-def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_D    : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
-def SEL_S    : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6, HARDFLOAT;
+  def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6, HARDFLOAT;
+}
 def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
 
 //===----------------------------------------------------------------------===//
@@ -743,7 +759,9 @@ def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6;
 //
 //===----------------------------------------------------------------------===//
 
+let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
+}
 def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
 
 //===----------------------------------------------------------------------===//
@@ -752,84 +770,78 @@ def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6;
 //
 //===----------------------------------------------------------------------===//
 
-// f32 comparisons supported via another comparison
-def : MipsPat<(setone f32:$lhs, f32:$rhs),
-              (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f32:$lhs, f32:$rhs),
-              (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f32:$lhs, f32:$rhs),
-              (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setne f32:$lhs, f32:$rhs),
-              (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6;
+// comparisons supported via another comparison
+multiclass Cmp_Pats<ValueType VT, Instruction NOROp, Register ZEROReg> {
+def : MipsPat<(setone VT:$lhs, VT:$rhs),
+      (NOROp (!cast<Instruction>("CMP_UEQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seto VT:$lhs, VT:$rhs),
+      (NOROp (!cast<Instruction>("CMP_UN_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(setune VT:$lhs, VT:$rhs),
+      (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+def : MipsPat<(seteq VT:$lhs, VT:$rhs),
+      (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setgt VT:$lhs, VT:$rhs),
+      (!cast<Instruction>("CMP_LE_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setge VT:$lhs, VT:$rhs),
+      (!cast<Instruction>("CMP_LT_"#NAME) VT:$rhs, VT:$lhs)>;
+def : MipsPat<(setlt VT:$lhs, VT:$rhs),
+      (!cast<Instruction>("CMP_LT_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setle VT:$lhs, VT:$rhs),
+      (!cast<Instruction>("CMP_LE_"#NAME) VT:$lhs, VT:$rhs)>;
+def : MipsPat<(setne VT:$lhs, VT:$rhs),
+      (NOROp (!cast<Instruction>("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>;
+}
 
-// f64 comparisons supported via another comparison
-def : MipsPat<(setone f64:$lhs, f64:$rhs),
-              (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seto f64:$lhs, f64:$rhs),
-              (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(setune f64:$lhs, f64:$rhs),
-              (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
-def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>,
-      ISA_MIPS32R6;
-def : MipsPat<(setne f64:$lhs, f64:$rhs),
-              (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6;
+defm S : Cmp_Pats<f32, NOR, ZERO>, ISA_MIPS32R6;
+defm D : Cmp_Pats<f64, NOR, ZERO>, ISA_MIPS32R6;
 
 // i32 selects
-def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
-              (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f),
-              (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f),
-              (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
-              (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
-                  (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f),
-              (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)),
-                  (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t,
-                      i32:$f),
-              (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))),
-                  (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>,
-              ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)),
-                      i32:$t, i32:$f),
-              (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))),
-                  (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>,
-              ISA_MIPS32R6;
+multiclass SelectInt_Pats<ValueType RC, Instruction OROp, Instruction XORiOp,
+                          Instruction SLTiOp, Instruction SLTiuOp,
+                          Instruction SELEQZOp, Instruction SELNEZOp,
+                          SDPatternOperator imm_type, ValueType Opg> {
+// reg, immz
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f),
+              (OROp (SELEQZOp RC:$t, RC:$cond), (SELNEZOp RC:$f, RC:$cond))>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f),
+              (OROp (SELNEZOp RC:$t, RC:$cond), (SELEQZOp RC:$f, RC:$cond))>;
 
+// reg, immZExt16[_64]
+def : MipsPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+              (OROp (SELEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+                    (SELNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+def : MipsPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f),
+              (OROp (SELNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)),
+                    (SELEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>;
+
+// reg, immSExt16Plus1
+def : MipsPat<(select (Opg (setgt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+              (OROp (SELEQZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))),
+                    (SELNEZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>;
+def : MipsPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f),
+              (OROp (SELEQZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))),
+                    (SELNEZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>;
+
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz),
+              (SELEQZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz),
+              (SELNEZOp RC:$t, RC:$cond)>;
+def : MipsPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f),
+              (SELNEZOp RC:$f, RC:$cond)>;
+def : MipsPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f),
+              (SELEQZOp RC:$f, RC:$cond)>;
+}
+
+defm : SelectInt_Pats<i32, OR, XORi, SLTi, SLTiu, SELEQZ, SELNEZ,
+                      immZExt16, i32>, ISA_MIPS32R6;
+
+def : MipsPat<(select i32:$cond, i32:$t, i32:$f),
+              (OR (SELNEZ i32:$t, i32:$cond),
+                  (SELEQZ i32:$f, i32:$cond))>,
+              ISA_MIPS32R6;
 def : MipsPat<(select i32:$cond, i32:$t, immz),
-              (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz),
-              (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz),
-              (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6;
+              (SELNEZ i32:$t, i32:$cond)>,
+              ISA_MIPS32R6;
 def : MipsPat<(select i32:$cond, immz, i32:$f),
-              (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f),
-              (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
-def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f),
-              (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6;
+              (SELEQZ i32:$f, i32:$cond)>,
+              ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index f917ecad4a53..cbdcdd788bec 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -16,10 +16,6 @@
 //===----------------------------------------------------------------------===//
 
 // Unsigned Operand
-def uimm5_64      : Operand<i64> {
-  let PrintMethod = "printUnsignedImm";
-}
-
 def uimm16_64      : Operand<i64> {
   let PrintMethod = "printUnsignedImm";
 }
@@ -276,12 +272,20 @@ def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
 let isCodeGenOnly = 1 in
 def RDHWR64 : ReadHardware<GPR64Opnd, HWRegsOpnd>, RDHWR_FM;
 
-def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>;
-def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>;
-def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>;
+let AdditionalPredicates = [NotInMicroMips] in {
+  // TODO: Add 'pos + size' constraint check to dext* instructions
+  //       DEXT: 0 < pos + size <= 63
+  //       DEXTM, DEXTU: 32 < pos + size <= 64
+  def DEXT : ExtBase<"dext", GPR64Opnd, uimm5, uimm5_plus1, MipsExt>,
+             EXT_FM<3>;
+  def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5, uimm5_plus33, MipsExt>,
+              EXT_FM<1>;
+  def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32, uimm5_plus1,
+                      MipsExt>, EXT_FM<2>;
+}
 
 def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>;
-def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>;
+def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>;
 def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>;
 
 let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
@@ -341,11 +345,11 @@ class SetCC64_I<string opstr, PatFrag cond_op>:
 }
 
 class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
-                    RegisterOperand RO, bits<64> shift = 1> :
-  InstSE<(outs), (ins RO:$rs, uimm5_64:$p, opnd:$offset),
+                    RegisterOperand RO, Operand ImmOp, bits<64> shift = 1> :
+  InstSE<(outs), (ins RO:$rs, ImmOp:$p, opnd:$offset),
          !strconcat(opstr, "\t$rs, $p, $offset"),
          [(brcond (i32 (cond_op (and RO:$rs, (shl shift, immZExt5_64:$p)), 0)),
-                  bb:$offset)], IIBranch, FrmI, opstr> {
+                  bb:$offset)], II_BBIT, FrmI, opstr> {
   let isBranch = 1;
   let isTerminator = 1;
   let hasDelaySlot = 1;
@@ -363,14 +367,17 @@ def BADDu  : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>,
                               ADD_FM<0x1c, 0x28>;
 
 // Branch on Bit Clear /+32
-def BBIT0  : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd>, BBIT_FM<0x32>;
-def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, 0x100000000>,
+def BBIT0  : CBranchBitNum<"bbit0", brtarget, seteq, GPR64Opnd,
+                           uimm5_64_report_uimm6>, BBIT_FM<0x32>;
+def BBIT032: CBranchBitNum<"bbit032", brtarget, seteq, GPR64Opnd, uimm5_64,
+                           0x100000000>,
                            BBIT_FM<0x36>;
 
 // Branch on Bit Set /+32
-def BBIT1  : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd>, BBIT_FM<0x3a>;
-def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, 0x100000000>,
-                           BBIT_FM<0x3e>;
+def BBIT1  : CBranchBitNum<"bbit1", brtarget, setne, GPR64Opnd,
+                           uimm5_64_report_uimm6>, BBIT_FM<0x3a>;
+def BBIT132: CBranchBitNum<"bbit132", brtarget, setne, GPR64Opnd, uimm5_64,
+                           0x100000000>, BBIT_FM<0x3e>;
 
 // Multiply Doubleword to GPR
 let Defs = [HI0, LO0, P0, P1, P2] in
@@ -544,9 +551,24 @@ def : MipsPat<(brcond (i32 (setne (and i64:$lhs, PowerOf2HI:$mask), 0)), bb:$dst
               (BBIT132 i64:$lhs, (Log2HI PowerOf2HI:$mask), bb:$dst)>;
 }
 
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB64 addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH64 addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW64 addr:$a)>;
+def : MipsPat<(atomic_load_64 addr:$a), (LD addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR64:$v), (SB64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR64:$v), (SH64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR64:$v), (SW64 GPR64:$v, addr:$a)>;
+def : MipsPat<(atomic_store_64 addr:$a, GPR64:$v), (SD GPR64:$v, addr:$a)>;
+
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
+def : MipsInstAlias<"move $dst, $src",
+                    (OR64 GPR64Opnd:$dst,  GPR64Opnd:$src, ZERO_64), 1>,
+      GPR_64;
 def : MipsInstAlias<"move $dst, $src",
                     (DADDu GPR64Opnd:$dst,  GPR64Opnd:$src, ZERO_64), 1>,
       GPR_64;
@@ -617,6 +639,38 @@ def : MipsInstAlias<"syncw",      (SYNC 0x4), 0>;
 def : MipsInstAlias<"syncws",     (SYNC 0x5), 0>;
 }
 
+// cnMIPS Aliases.
+
+// bbit* with $p 32-63 converted to bbit*32 with $p 0-31
+def : MipsInstAlias<"bbit0 $rs, $p, $offset",
+                    (BBIT032 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+                             brtarget:$offset), 0>,
+      ASE_CNMIPS;
+def : MipsInstAlias<"bbit1 $rs, $p, $offset",
+                    (BBIT132 GPR64Opnd:$rs, uimm5_plus32_normalize_64:$p,
+                             brtarget:$offset), 0>,
+      ASE_CNMIPS;
+
+// exts with $pos 32-63 in converted to exts32 with $pos 0-31
+def : MipsInstAlias<"exts $rt, $rs, $pos, $lenm1",
+                    (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+                            uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+      ASE_CNMIPS;
+def : MipsInstAlias<"exts $rt, $pos, $lenm1",
+                    (EXTS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+                            uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+      ASE_CNMIPS;
+
+// cins with $pos 32-63 in converted to cins32 with $pos 0-31
+def : MipsInstAlias<"cins $rt, $rs, $pos, $lenm1",
+                    (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rs,
+                            uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+      ASE_CNMIPS;
+def : MipsInstAlias<"cins $rt, $pos, $lenm1",
+                    (CINS32 GPR64Opnd:$rt, GPR64Opnd:$rt,
+                            uimm5_plus32_normalize:$pos, uimm5:$lenm1), 0>,
+      ASE_CNMIPS;
+
 //===----------------------------------------------------------------------===//
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -625,3 +679,8 @@ class LoadImmediate64<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64),
                      !strconcat(instr_asm, "\t$rt, $imm64")> ;
 def LoadImm64 : LoadImmediate64<"dli", imm64, GPR64Opnd>;
+
+def LoadAddrReg64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins mem:$addr),
+                                       "dla\t$rt, $addr">;
+def LoadAddrImm64 : MipsAsmPseudoInst<(outs GPR64Opnd:$rt), (ins imm64:$imm64),
+                                       "dla\t$rt, $imm64">;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
index 6b546e864bd3..6f34dbe28d30 100644
--- a/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -62,7 +62,7 @@ class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>;
 class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>;
 class DDIV_DESC    : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
 class DDIVU_DESC   : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
-class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>;
+class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2_plus1>;
 class DMOD_DESC    : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>;
 class DMODU_DESC   : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>;
 class DMUH_DESC    : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>;
@@ -81,10 +81,12 @@ class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
 //
 //===----------------------------------------------------------------------===//
 
-def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
-def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
-def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
-def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
+  def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+  def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+  def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
+}
 def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
 def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
 def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index fdba064b5c5e..957529376b37 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -169,12 +169,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (MCPE.isMachineConstantPoolEntry())
       EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
     else
-      EmitGlobalConstant(MCPE.Val.ConstVal);
+      EmitGlobalConstant(MF->getDataLayout(), MCPE.Val.ConstVal);
     return;
   }
 
 
-  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
 
   do {
@@ -202,7 +202,7 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       llvm_unreachable("Pseudo opcode found in EmitInstruction()");
 
     MCInst TmpInst0;
-    MCInstLowering.Lower(I, TmpInst0);
+    MCInstLowering.Lower(&*I, TmpInst0);
     EmitToStreamer(*OutStreamer, TmpInst0);
   } while ((++I != E) && I->isInsideBundle()); // Delay slot check
 }
@@ -405,7 +405,7 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
 
   // If this is a landing pad, it isn't a fall through.  If it has no preds,
   // then nothing falls through to it.
-  if (MBB->isLandingPad() || MBB->pred_empty())
+  if (MBB->isEHPad() || MBB->pred_empty())
     return false;
 
   // If there isn't exactly one predecessor, it can't be a fall through.
@@ -559,7 +559,6 @@ bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
 void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                   raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
   const MachineOperand &MO = MI->getOperand(opNum);
   bool closeP = false;
 
@@ -608,7 +607,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     }
 
     case MachineOperand::MO_ConstantPoolIndex:
-      O << DL->getPrivateGlobalPrefix() << "CPI"
+      O << getDataLayout().getPrivateGlobalPrefix() << "CPI"
         << getFunctionNumber() << "_" << MO.getIndex();
       if (MO.getOffset())
         O << "+" << MO.getOffset();
@@ -1009,7 +1008,7 @@ void MipsAsmPrinter::EmitFPCallStub(
   //
   // Mov $18, $31
 
-  EmitInstrRegRegReg(*STI, Mips::ADDu, Mips::S2, Mips::RA, Mips::ZERO);
+  EmitInstrRegRegReg(*STI, Mips::OR, Mips::S2, Mips::RA, Mips::ZERO);
 
   EmitSwapFPIntParams(*STI, Signature->ParamSig, LE, true);
 
diff --git a/lib/Target/Mips/MipsCCState.cpp b/lib/Target/Mips/MipsCCState.cpp
index b8081295ca64..d82063e3d2a9 100644
--- a/lib/Target/Mips/MipsCCState.cpp
+++ b/lib/Target/Mips/MipsCCState.cpp
@@ -29,22 +29,16 @@ static bool isF128SoftLibCall(const char *CallSym) {
       "powl",          "rintl",        "sinl",          "sqrtl",
       "truncl"};
 
-  const char *const *End = LibCalls + array_lengthof(LibCalls);
-
   // Check that LibCalls is sorted alphabetically.
-  MipsTargetLowering::LTStr Comp;
-
-#ifndef NDEBUG
-  for (const char *const *I = LibCalls; I < End - 1; ++I)
-    assert(Comp(*I, *(I + 1)));
-#endif
-
-  return std::binary_search(LibCalls, End, CallSym, Comp);
+  auto Comp = [](const char *S1, const char *S2) { return strcmp(S1, S2) < 0; };
+  assert(std::is_sorted(std::begin(LibCalls), std::end(LibCalls), Comp));
+  return std::binary_search(std::begin(LibCalls), std::end(LibCalls),
+                            CallSym, Comp);
 }
 
 /// This function returns true if Ty is fp128, {f128} or i128 which was
 /// originally a fp128.
-static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
+static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
   if (Ty->isFP128Ty())
     return true;
 
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 93e1908083cb..0b4b7785af67 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -427,3 +427,28 @@ def CSR_Mips16RetHelper :
   CalleeSavedRegs<(add V0, V1, FP,
                    (sequence "A%u", 3, 0), (sequence "S%u", 7, 0),
                    (sequence "D%u", 15, 10))>;
+
+def CSR_Interrupt_32R6 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+                                              (sequence "S%u", 7, 0),
+                                              (sequence "V%u", 1, 0),
+                                              (sequence "T%u", 9, 0),
+                                              RA, FP, GP, AT)>;
+
+def CSR_Interrupt_32 : CalleeSavedRegs<(add (sequence "A%u", 3, 0),
+                                            (sequence "S%u", 7, 0),
+                                            (sequence "V%u", 1, 0),
+                                            (sequence "T%u", 9, 0),
+                                            RA, FP, GP, AT, LO0, HI0)>;
+
+def CSR_Interrupt_64R6 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+                                              (sequence "V%u_64", 1, 0),
+                                              (sequence "S%u_64", 7, 0),
+                                              (sequence "T%u_64", 9, 0),
+                                              RA_64, FP_64, GP_64, AT_64)>;
+
+def CSR_Interrupt_64 : CalleeSavedRegs<(add (sequence "A%u_64", 3, 0),
+                                            (sequence "S%u_64", 7, 0),
+                                            (sequence "T%u_64", 9, 0),
+                                            (sequence "V%u_64", 1, 0),
+                                            RA_64, FP_64, GP_64, AT_64,
+                                            LO0_64, HI0_64)>;
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index 96553d28fc57..ea8c5871fa0e 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -560,7 +560,7 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
   // identity mapping of CPI's to CPE's.
   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
 
-  const DataLayout &TD = *MF->getTarget().getDataLayout();
+  const DataLayout &TD = MF->getDataLayout();
   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
     assert(Size >= 4 && "Too small constant pool entry");
@@ -598,12 +598,12 @@ MipsConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) {
 /// into the block immediately after it.
 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
   // Get the next machine basic block in the function.
-  MachineFunction::iterator MBBI = MBB;
+  MachineFunction::iterator MBBI = MBB->getIterator();
   // Can't fall off end of function.
   if (std::next(MBBI) == MBB->getParent()->end())
     return false;
 
-  MachineBasicBlock *NextBB = std::next(MBBI);
+  MachineBasicBlock *NextBB = &*std::next(MBBI);
   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I)
     if (*I == NextBB)
@@ -656,11 +656,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
   // alignment assumptions, as we don't know for sure the size of any
   // instructions in the inline assembly.
   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
-    computeBlockSize(I);
+    computeBlockSize(&*I);
 
 
   // Compute block offsets.
-  adjustBBOffsetsAfter(MF->begin());
+  adjustBBOffsetsAfter(&MF->front());
 
   // Now go back through the instructions and build up our data structures.
   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
@@ -879,7 +879,7 @@ MachineBasicBlock *MipsConstantIslands::splitBlockBeforeInstr
   // Create a new MBB for the code after the OrigBB.
   MachineBasicBlock *NewBB =
     MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
-  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MachineFunction::iterator MBBI = ++OrigBB->getIterator();
   MF->insert(MBBI, NewBB);
 
   // Splice the instructions starting with MI over to NewBB.
@@ -967,8 +967,8 @@ bool MipsConstantIslands::isWaterInRange(unsigned UserOffset,
   unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
   unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
   unsigned NextBlockOffset, NextBlockAlignment;
-  MachineFunction::const_iterator NextBlock = Water;
-  if (++NextBlock == MF->end()) {
+  MachineFunction::const_iterator NextBlock = ++Water->getIterator();
+  if (NextBlock == MF->end()) {
     NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
     NextBlockAlignment = 0;
   } else {
@@ -1261,7 +1261,7 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
     if (isOffsetInRange(UserOffset, CPEOffset, U)) {
       DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
             << format(", expected CPE offset %#x\n", CPEOffset));
-      NewMBB = std::next(MachineFunction::iterator(UserMBB));
+      NewMBB = &*++UserMBB->getIterator();
       // Add an unconditional branch from UserMBB to fallthrough block.  Record
       // it for branch lengthening; this new branch will not get out of range,
       // but if the preceding conditional branch is out of range, the targets
@@ -1371,8 +1371,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
       NewWaterList.insert(NewIsland);
 
     // The new CPE goes before the following block (NewMBB).
-    NewMBB = std::next(MachineFunction::iterator(WaterBB));
-
+    NewMBB = &*++WaterBB->getIterator();
   } else {
     // No water found.
     // we first see if a longer form of the instrucion could have reached
@@ -1389,7 +1388,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
     // next iteration for constant pools, but in this context, we don't want
     // it.  Check for this so it will be removed from the WaterList.
     // Also remove any entry from NewWaterList.
-    MachineBasicBlock *WaterBB = std::prev(MachineFunction::iterator(NewMBB));
+    MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
     IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
     if (IP != WaterList.end())
       NewWaterList.erase(WaterBB);
@@ -1406,7 +1405,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
     WaterList.erase(IP);
 
   // Okay, we know we can put an island before NewMBB now, do it!
-  MF->insert(NewMBB, NewIsland);
+  MF->insert(NewMBB->getIterator(), NewIsland);
 
   // Update internal data structures to account for the newly inserted MBB.
   updateForInsertedWaterBlock(NewIsland);
@@ -1431,9 +1430,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
 
   // Increase the size of the island block to account for the new entry.
   BBInfo[NewIsland->getNumber()].Size += Size;
-  adjustBBOffsetsAfter(std::prev(MachineFunction::iterator(NewIsland)));
-
-
+  adjustBBOffsetsAfter(&*--NewIsland->getIterator());
 
   // Finally, change the CPI in the instruction operand to be ID.
   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
@@ -1645,7 +1642,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
     MBB->back().eraseFromParent();
     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
   }
-  MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *NextBB = &*++MBB->getIterator();
 
   DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
                << " also invert condition and change dest. to BB#"
diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td
index b5d52ced9d3d..f959bd4d8db3 100644
--- a/lib/Target/Mips/MipsDSPInstrFormats.td
+++ b/lib/Target/Mips/MipsDSPInstrFormats.td
@@ -7,10 +7,30 @@
 //
 //===----------------------------------------------------------------------===//
 
+class DspMMRel;
+
+def Dsp2MicroMips : InstrMapping {
+  let FilterClass = "DspMMRel";
+  // Instructions with the same BaseOpcode and isNVStore values form a row.
+  let RowFields = ["BaseOpcode"];
+  // Instructions with the same predicate sense form a column.
+  let ColFields = ["Arch"];
+  // The key column is the unpredicated instructions.
+  let KeyCol = ["dsp"];
+  // Value columns are PredSense=true and PredSense=false
+  let ValueCols = [["dsp"], ["mmdsp"]];
+}
+
 def HasDSP : Predicate<"Subtarget->hasDSP()">,
              AssemblerPredicate<"FeatureDSP">;
 def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">,
                AssemblerPredicate<"FeatureDSPR2">;
+def HasDSPR3 : Predicate<"Subtarget->hasDSPR3()">,
+               AssemblerPredicate<"FeatureDSPR3">;
+
+class ISA_DSPR2 {
+  list<Predicate> InsnPredicates = [HasDSPR2];
+}
 
 // Fields.
 class Field6<bits<6> val> {
@@ -20,14 +40,22 @@ class Field6<bits<6> val> {
 def SPECIAL3_OPCODE : Field6<0b011111>;
 def REGIMM_OPCODE : Field6<0b000001>;
 
-class DSPInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
-  let Predicates = [HasDSP];
+class DSPInst<string opstr = "">
+    : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>, PredicateControl {
+  let InsnPredicates = [HasDSP];
+  string BaseOpcode = opstr;
+  string Arch = "dsp";
 }
 
 class PseudoDSP<dag outs, dag ins, list<dag> pattern,
-                InstrItinClass itin = IIPseudo>:
-  MipsPseudo<outs, ins, pattern, itin> {
-  let Predicates = [HasDSP];
+                InstrItinClass itin = IIPseudo>
+    : MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+  let InsnPredicates = [HasDSP];
+}
+
+class DSPInstAlias<string Asm, dag Result, bit Emit = 0b1>
+    : InstAlias<Asm, Result, Emit>, PredicateControl {
+  let InsnPredicates = [HasDSP];
 }
 
 // ADDU.QB sub-class format.
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index d26838404451..da6f174e2a19 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -12,9 +12,11 @@
 //===----------------------------------------------------------------------===//
 
 // ImmLeaf
+def immZExt1 : ImmLeaf<i32, [{return isUInt<1>(Imm);}]>;
 def immZExt2 : ImmLeaf<i32, [{return isUInt<2>(Imm);}]>;
 def immZExt3 : ImmLeaf<i32, [{return isUInt<3>(Imm);}]>;
 def immZExt4 : ImmLeaf<i32, [{return isUInt<4>(Imm);}]>;
+def immZExt7 : ImmLeaf<i32, [{return isUInt<7>(Imm);}]>;
 def immZExt8 : ImmLeaf<i32, [{return isUInt<8>(Imm);}]>;
 def immZExt10 : ImmLeaf<i32, [{return isUInt<10>(Imm);}]>;
 def immSExt6 : ImmLeaf<i32, [{return isInt<6>(Imm);}]>;
@@ -263,6 +265,7 @@ class ADDU_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -273,6 +276,7 @@ class RADDU_W_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs");
   list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class CMP_EQ_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -293,6 +297,7 @@ class CMP_EQ_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -304,6 +309,7 @@ class PRECR_SRA_PH_W_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set ROT:$rt, (OpNode ROS:$src, ROS:$rs, immZExt5:$sa))];
   InstrItinClass Itinerary = itin;
   string Constraints = "$src = $rt";
+  string BaseOpcode = instr_asm;
 }
 
 class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -314,6 +320,7 @@ class ABSQ_S_PH_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
   list<dag> Pattern = [(set ROD:$rd, (OpNode ROT:$rt))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -323,6 +330,7 @@ class REPL_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $imm");
   list<dag> Pattern = [(set RO:$rd, (OpNode immPat:$imm))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -332,17 +340,19 @@ class SHLL_QB_R3_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs_sa))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class SHLL_QB_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                            SDPatternOperator ImmPat, InstrItinClass itin,
-                           RegisterOperand RO> {
+                           RegisterOperand RO, Operand ImmOpnd> {
   dag OutOperandList = (outs RO:$rd);
-  dag InOperandList = (ins RO:$rt, uimm16:$rs_sa);
+  dag InOperandList = (ins RO:$rt, ImmOpnd:$rs_sa);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt, $rs_sa");
   list<dag> Pattern = [(set RO:$rd, (OpNode RO:$rt, ImmPat:$rs_sa))];
   InstrItinClass Itinerary = itin;
   bit hasSideEffects = 1;
+  string BaseOpcode = instr_asm;
 }
 
 class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -353,6 +363,7 @@ class LX_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode iPTR:$base, iPTR:$index))];
   InstrItinClass Itinerary = itin;
   bit mayLoad = 1;
+  string BaseOpcode = instr_asm;
 }
 
 class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -363,17 +374,19 @@ class ADDUH_QB_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [(set ROD:$rd, (OpNode ROS:$rs, ROT:$rt))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class APPEND_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                       SDPatternOperator ImmOp, InstrItinClass itin> {
+                       Operand ImmOp, SDPatternOperator Imm, InstrItinClass itin> {
   dag OutOperandList = (outs GPR32Opnd:$rt);
-  dag InOperandList = (ins GPR32Opnd:$rs, uimm5:$sa, GPR32Opnd:$src);
+  dag InOperandList = (ins GPR32Opnd:$rs, ImmOp:$sa, GPR32Opnd:$src);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $sa");
   list<dag> Pattern =  [(set GPR32Opnd:$rt,
-                        (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, ImmOp:$sa))];
+                        (OpNode GPR32Opnd:$src, GPR32Opnd:$rs, Imm:$sa))];
   InstrItinClass Itinerary = itin;
   string Constraints = "$src = $rt";
+  string BaseOpcode = instr_asm;
 }
 
 class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -382,6 +395,7 @@ class EXTR_W_TY1_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACC64DSPOpnd:$ac, GPR32Opnd:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -390,15 +404,17 @@ class EXTR_W_TY1_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   dag InOperandList = (ins ACC64DSPOpnd:$ac, uimm16:$shift_rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $ac, $shift_rs");
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class SHILO_R1_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   dag OutOperandList = (outs ACC64DSPOpnd:$ac);
-  dag InOperandList = (ins simm16:$shift, ACC64DSPOpnd:$acin);
+  dag InOperandList = (ins simm6:$shift, ACC64DSPOpnd:$acin);
   string AsmString = !strconcat(instr_asm, "\t$ac, $shift");
   list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
                         (OpNode immSExt6:$shift, ACC64DSPOpnd:$acin))];
   string Constraints = "$acin = $ac";
+  string BaseOpcode = instr_asm;
 }
 
 class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -408,6 +424,7 @@ class SHILO_R2_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
                         (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
   string Constraints = "$acin = $ac";
+  string BaseOpcode = instr_asm;
 }
 
 class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -417,6 +434,7 @@ class MTHLIP_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
                         (OpNode GPR32Opnd:$rs, ACC64DSPOpnd:$acin))];
   string Constraints = "$acin = $ac";
+  string BaseOpcode = instr_asm;
 }
 
 class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -426,15 +444,17 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $mask");
   list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode immZExt10:$mask))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                       InstrItinClass itin> {
   dag OutOperandList = (outs);
-  dag InOperandList = (ins GPR32Opnd:$rs, uimm16:$mask);
+  dag InOperandList = (ins GPR32Opnd:$rs, uimm10:$mask);
   string AsmString = !strconcat(instr_asm, "\t$rs, $mask");
   list<dag> Pattern = [(OpNode GPR32Opnd:$rs, immZExt10:$mask)];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
@@ -444,6 +464,7 @@ class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
   list<dag> Pattern = [(set ACC64DSPOpnd:$ac,
                         (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
   string Constraints = "$acin = $ac";
+  string BaseOpcode = instr_asm;
 }
 
 class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -454,6 +475,7 @@ class MULT_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set ACC64DSPOpnd:$ac, (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt))];
   InstrItinClass Itinerary = itin;
   bit isCommutable = 1;
+  string BaseOpcode = instr_asm;
 }
 
 class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -465,6 +487,7 @@ class MADD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                         (OpNode GPR32Opnd:$rs, GPR32Opnd:$rt, ACC64DSPOpnd:$acin))];
   InstrItinClass Itinerary = itin;
   string Constraints = "$acin = $ac";
+  string BaseOpcode = instr_asm;
 }
 
 class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
@@ -474,6 +497,7 @@ class MFHI_DESC_BASE<string instr_asm, RegisterOperand RO, SDNode OpNode,
   string AsmString = !strconcat(instr_asm, "\t$rd, $ac");
   list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode RO:$ac))];
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin> {
@@ -481,6 +505,7 @@ class MTHI_DESC_BASE<string instr_asm, RegisterOperand RO, InstrItinClass itin>
   dag InOperandList = (ins GPR32Opnd:$rs);
   string AsmString = !strconcat(instr_asm, "\t$rs, $ac");
   InstrItinClass Itinerary = itin;
+  string BaseOpcode = instr_asm;
 }
 
 class BPOSGE32_PSEUDO_DESC_BASE<SDPatternOperator OpNode, InstrItinClass itin> :
@@ -506,6 +531,7 @@ class INSV_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   list<dag> Pattern = [(set GPR32Opnd:$rt, (OpNode GPR32Opnd:$src, GPR32Opnd:$rs))];
   InstrItinClass Itinerary = itin;
   string Constraints = "$src = $rt";
+  string BaseOpcode = instr_asm;
 }
 
 //===----------------------------------------------------------------------===//
@@ -639,7 +665,7 @@ class PRECEU_PH_QBRA_DESC : ABSQ_S_PH_R2_DESC_BASE<"preceu.ph.qbra",
 
 // Shift
 class SHLL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shll.qb", null_frag, immZExt3,
-                                          NoItinerary, DSPROpnd>,
+                                          NoItinerary, DSPROpnd, uimm3>,
                      Defs<[DSPOutFlag22]>;
 
 class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
@@ -647,13 +673,13 @@ class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb,
                       Defs<[DSPOutFlag22]>;
 
 class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3,
-                                          NoItinerary, DSPROpnd>;
+                                          NoItinerary, DSPROpnd, uimm3>;
 
 class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb,
                                            NoItinerary, DSPROpnd>;
 
 class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4,
-                                          NoItinerary, DSPROpnd>,
+                                          NoItinerary, DSPROpnd, uimm4>,
                      Defs<[DSPOutFlag22]>;
 
 class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
@@ -661,7 +687,8 @@ class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph,
                       Defs<[DSPOutFlag22]>;
 
 class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph,
-                                            immZExt4, NoItinerary, DSPROpnd>,
+                                            immZExt4, NoItinerary, DSPROpnd,
+                                            uimm4>,
                        Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
@@ -669,19 +696,21 @@ class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph,
                         Defs<[DSPOutFlag22]>;
 
 class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4,
-                                          NoItinerary, DSPROpnd>;
+                                          NoItinerary, DSPROpnd, uimm4>;
 
 class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph,
                                            NoItinerary, DSPROpnd>;
 
 class SHRA_R_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.ph", int_mips_shra_r_ph,
-                                            immZExt4, NoItinerary, DSPROpnd>;
+                                            immZExt4, NoItinerary, DSPROpnd,
+                                            uimm4>;
 
 class SHRAV_R_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.ph", int_mips_shra_r_ph,
                                              NoItinerary, DSPROpnd>;
 
 class SHLL_S_W_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.w", int_mips_shll_s_w,
-                                           immZExt5, NoItinerary, GPR32Opnd>,
+                                           immZExt5, NoItinerary, GPR32Opnd,
+                                           uimm5>,
                       Defs<[DSPOutFlag22]>;
 
 class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
@@ -689,7 +718,8 @@ class SHLLV_S_W_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.w", int_mips_shll_s_w,
                        Defs<[DSPOutFlag22]>;
 
 class SHRA_R_W_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.w", int_mips_shra_r_w,
-                                           immZExt5, NoItinerary, GPR32Opnd>;
+                                           immZExt5, NoItinerary, GPR32Opnd,
+                                           uimm5>;
 
 class SHRAV_R_W_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.w", int_mips_shra_r_w,
                                             NoItinerary, GPR32Opnd>;
@@ -1039,32 +1069,33 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w",
 
 // Shift
 class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3,
-                                          NoItinerary, DSPROpnd>;
+                                          NoItinerary, DSPROpnd, uimm3>;
 
 class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb,
                                            NoItinerary, DSPROpnd>;
 
 class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb,
-                                            immZExt3, NoItinerary, DSPROpnd>;
+                                            immZExt3, NoItinerary, DSPROpnd,
+                                            uimm3>;
 
 class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb,
                                              NoItinerary, DSPROpnd>;
 
 class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4,
-                                          NoItinerary, DSPROpnd>;
+                                          NoItinerary, DSPROpnd, uimm4>;
 
 class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph,
                                            NoItinerary, DSPROpnd>;
 
 // Misc
-class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, immZExt5,
+class APPEND_DESC : APPEND_DESC_BASE<"append", int_mips_append, uimm5, immZExt5,
                                      NoItinerary>;
 
-class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, immZExt2,
+class BALIGN_DESC : APPEND_DESC_BASE<"balign", int_mips_balign, uimm2, immZExt2,
                                      NoItinerary>;
 
-class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, immZExt5,
-                                      NoItinerary>;
+class PREPEND_DESC : APPEND_DESC_BASE<"prepend", int_mips_prepend, uimm5,
+                                      immZExt5, NoItinerary>;
 
 // Pseudos.
 def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
@@ -1072,80 +1103,80 @@ def BPOSGE32_PSEUDO : BPOSGE32_PSEUDO_DESC_BASE<int_mips_bposge32,
 
 // Instruction defs.
 // MIPS DSP Rev 1
-def ADDU_QB : ADDU_QB_ENC, ADDU_QB_DESC;
-def ADDU_S_QB : ADDU_S_QB_ENC, ADDU_S_QB_DESC;
-def SUBU_QB : SUBU_QB_ENC, SUBU_QB_DESC;
-def SUBU_S_QB : SUBU_S_QB_ENC, SUBU_S_QB_DESC;
-def ADDQ_PH : ADDQ_PH_ENC, ADDQ_PH_DESC;
-def ADDQ_S_PH : ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
-def SUBQ_PH : SUBQ_PH_ENC, SUBQ_PH_DESC;
-def SUBQ_S_PH : SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
-def ADDQ_S_W : ADDQ_S_W_ENC, ADDQ_S_W_DESC;
-def SUBQ_S_W : SUBQ_S_W_ENC, SUBQ_S_W_DESC;
-def ADDSC : ADDSC_ENC, ADDSC_DESC;
-def ADDWC : ADDWC_ENC, ADDWC_DESC;
+def ADDU_QB : DspMMRel, ADDU_QB_ENC, ADDU_QB_DESC;
+def ADDU_S_QB : DspMMRel, ADDU_S_QB_ENC, ADDU_S_QB_DESC;
+def SUBU_QB : DspMMRel, SUBU_QB_ENC, SUBU_QB_DESC;
+def SUBU_S_QB : DspMMRel, SUBU_S_QB_ENC, SUBU_S_QB_DESC;
+def ADDQ_PH : DspMMRel, ADDQ_PH_ENC, ADDQ_PH_DESC;
+def ADDQ_S_PH : DspMMRel, ADDQ_S_PH_ENC, ADDQ_S_PH_DESC;
+def SUBQ_PH : DspMMRel, SUBQ_PH_ENC, SUBQ_PH_DESC;
+def SUBQ_S_PH : DspMMRel, SUBQ_S_PH_ENC, SUBQ_S_PH_DESC;
+def ADDQ_S_W : DspMMRel, ADDQ_S_W_ENC, ADDQ_S_W_DESC;
+def SUBQ_S_W : DspMMRel, SUBQ_S_W_ENC, SUBQ_S_W_DESC;
+def ADDSC : DspMMRel, ADDSC_ENC, ADDSC_DESC;
+def ADDWC : DspMMRel, ADDWC_ENC, ADDWC_DESC;
 def MODSUB : MODSUB_ENC, MODSUB_DESC;
-def RADDU_W_QB : RADDU_W_QB_ENC, RADDU_W_QB_DESC;
-def ABSQ_S_PH : ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
-def ABSQ_S_W : ABSQ_S_W_ENC, ABSQ_S_W_DESC;
-def PRECRQ_QB_PH : PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
-def PRECRQ_PH_W : PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
-def PRECRQ_RS_PH_W : PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
-def PRECRQU_S_QB_PH : PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
-def PRECEQ_W_PHL : PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
-def PRECEQ_W_PHR : PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
-def PRECEQU_PH_QBL : PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
-def PRECEQU_PH_QBR : PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
-def PRECEQU_PH_QBLA : PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
-def PRECEQU_PH_QBRA : PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
-def PRECEU_PH_QBL : PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
-def PRECEU_PH_QBR : PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
-def PRECEU_PH_QBLA : PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
-def PRECEU_PH_QBRA : PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
-def SHLL_QB : SHLL_QB_ENC, SHLL_QB_DESC;
-def SHLLV_QB : SHLLV_QB_ENC, SHLLV_QB_DESC;
-def SHRL_QB : SHRL_QB_ENC, SHRL_QB_DESC;
-def SHRLV_QB : SHRLV_QB_ENC, SHRLV_QB_DESC;
-def SHLL_PH : SHLL_PH_ENC, SHLL_PH_DESC;
-def SHLLV_PH : SHLLV_PH_ENC, SHLLV_PH_DESC;
-def SHLL_S_PH : SHLL_S_PH_ENC, SHLL_S_PH_DESC;
-def SHLLV_S_PH : SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
-def SHRA_PH : SHRA_PH_ENC, SHRA_PH_DESC;
-def SHRAV_PH : SHRAV_PH_ENC, SHRAV_PH_DESC;
-def SHRA_R_PH : SHRA_R_PH_ENC, SHRA_R_PH_DESC;
-def SHRAV_R_PH : SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
-def SHLL_S_W : SHLL_S_W_ENC, SHLL_S_W_DESC;
-def SHLLV_S_W : SHLLV_S_W_ENC, SHLLV_S_W_DESC;
-def SHRA_R_W : SHRA_R_W_ENC, SHRA_R_W_DESC;
-def SHRAV_R_W : SHRAV_R_W_ENC, SHRAV_R_W_DESC;
-def MULEU_S_PH_QBL : MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
-def MULEU_S_PH_QBR : MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
-def MULEQ_S_W_PHL : MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
-def MULEQ_S_W_PHR : MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
-def MULQ_RS_PH : MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
+def RADDU_W_QB : DspMMRel, RADDU_W_QB_ENC, RADDU_W_QB_DESC;
+def ABSQ_S_PH : DspMMRel, ABSQ_S_PH_ENC, ABSQ_S_PH_DESC;
+def ABSQ_S_W : DspMMRel, ABSQ_S_W_ENC, ABSQ_S_W_DESC;
+def PRECRQ_QB_PH : DspMMRel, PRECRQ_QB_PH_ENC, PRECRQ_QB_PH_DESC;
+def PRECRQ_PH_W : DspMMRel, PRECRQ_PH_W_ENC, PRECRQ_PH_W_DESC;
+def PRECRQ_RS_PH_W : DspMMRel, PRECRQ_RS_PH_W_ENC, PRECRQ_RS_PH_W_DESC;
+def PRECRQU_S_QB_PH : DspMMRel, PRECRQU_S_QB_PH_ENC, PRECRQU_S_QB_PH_DESC;
+def PRECEQ_W_PHL : DspMMRel, PRECEQ_W_PHL_ENC, PRECEQ_W_PHL_DESC;
+def PRECEQ_W_PHR : DspMMRel, PRECEQ_W_PHR_ENC, PRECEQ_W_PHR_DESC;
+def PRECEQU_PH_QBL : DspMMRel, PRECEQU_PH_QBL_ENC, PRECEQU_PH_QBL_DESC;
+def PRECEQU_PH_QBR : DspMMRel, PRECEQU_PH_QBR_ENC, PRECEQU_PH_QBR_DESC;
+def PRECEQU_PH_QBLA : DspMMRel, PRECEQU_PH_QBLA_ENC, PRECEQU_PH_QBLA_DESC;
+def PRECEQU_PH_QBRA : DspMMRel, PRECEQU_PH_QBRA_ENC, PRECEQU_PH_QBRA_DESC;
+def PRECEU_PH_QBL : DspMMRel, PRECEU_PH_QBL_ENC, PRECEU_PH_QBL_DESC;
+def PRECEU_PH_QBR : DspMMRel, PRECEU_PH_QBR_ENC, PRECEU_PH_QBR_DESC;
+def PRECEU_PH_QBLA : DspMMRel, PRECEU_PH_QBLA_ENC, PRECEU_PH_QBLA_DESC;
+def PRECEU_PH_QBRA : DspMMRel, PRECEU_PH_QBRA_ENC, PRECEU_PH_QBRA_DESC;
+def SHLL_QB : DspMMRel, SHLL_QB_ENC, SHLL_QB_DESC;
+def SHLLV_QB : DspMMRel, SHLLV_QB_ENC, SHLLV_QB_DESC;
+def SHRL_QB : DspMMRel, SHRL_QB_ENC, SHRL_QB_DESC;
+def SHRLV_QB : DspMMRel, SHRLV_QB_ENC, SHRLV_QB_DESC;
+def SHLL_PH : DspMMRel, SHLL_PH_ENC, SHLL_PH_DESC;
+def SHLLV_PH : DspMMRel, SHLLV_PH_ENC, SHLLV_PH_DESC;
+def SHLL_S_PH : DspMMRel, SHLL_S_PH_ENC, SHLL_S_PH_DESC;
+def SHLLV_S_PH : DspMMRel, SHLLV_S_PH_ENC, SHLLV_S_PH_DESC;
+def SHRA_PH : DspMMRel, SHRA_PH_ENC, SHRA_PH_DESC;
+def SHRAV_PH : DspMMRel, SHRAV_PH_ENC, SHRAV_PH_DESC;
+def SHRA_R_PH : DspMMRel, SHRA_R_PH_ENC, SHRA_R_PH_DESC;
+def SHRAV_R_PH : DspMMRel, SHRAV_R_PH_ENC, SHRAV_R_PH_DESC;
+def SHLL_S_W : DspMMRel, SHLL_S_W_ENC, SHLL_S_W_DESC;
+def SHLLV_S_W : DspMMRel, SHLLV_S_W_ENC, SHLLV_S_W_DESC;
+def SHRA_R_W : DspMMRel, SHRA_R_W_ENC, SHRA_R_W_DESC;
+def SHRAV_R_W : DspMMRel, SHRAV_R_W_ENC, SHRAV_R_W_DESC;
+def MULEU_S_PH_QBL : DspMMRel, MULEU_S_PH_QBL_ENC, MULEU_S_PH_QBL_DESC;
+def MULEU_S_PH_QBR : DspMMRel, MULEU_S_PH_QBR_ENC, MULEU_S_PH_QBR_DESC;
+def MULEQ_S_W_PHL : DspMMRel, MULEQ_S_W_PHL_ENC, MULEQ_S_W_PHL_DESC;
+def MULEQ_S_W_PHR : DspMMRel, MULEQ_S_W_PHR_ENC, MULEQ_S_W_PHR_DESC;
+def MULQ_RS_PH : DspMMRel, MULQ_RS_PH_ENC, MULQ_RS_PH_DESC;
 def MULSAQ_S_W_PH : MULSAQ_S_W_PH_ENC, MULSAQ_S_W_PH_DESC;
-def MAQ_S_W_PHL : MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
-def MAQ_S_W_PHR : MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
-def MAQ_SA_W_PHL : MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
-def MAQ_SA_W_PHR : MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
-def MFHI_DSP : MFHI_ENC, MFHI_DESC;
-def MFLO_DSP : MFLO_ENC, MFLO_DESC;
-def MTHI_DSP : MTHI_ENC, MTHI_DESC;
-def MTLO_DSP : MTLO_ENC, MTLO_DESC;
-def DPAU_H_QBL : DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
-def DPAU_H_QBR : DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
-def DPSU_H_QBL : DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
-def DPSU_H_QBR : DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
-def DPAQ_S_W_PH : DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
-def DPSQ_S_W_PH : DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
-def DPAQ_SA_L_W : DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
-def DPSQ_SA_L_W : DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
-def MULT_DSP : MULT_DSP_ENC, MULT_DSP_DESC;
-def MULTU_DSP : MULTU_DSP_ENC, MULTU_DSP_DESC;
-def MADD_DSP : MADD_DSP_ENC, MADD_DSP_DESC;
-def MADDU_DSP : MADDU_DSP_ENC, MADDU_DSP_DESC;
-def MSUB_DSP : MSUB_DSP_ENC, MSUB_DSP_DESC;
-def MSUBU_DSP : MSUBU_DSP_ENC, MSUBU_DSP_DESC;
+def MAQ_S_W_PHL : DspMMRel, MAQ_S_W_PHL_ENC, MAQ_S_W_PHL_DESC;
+def MAQ_S_W_PHR : DspMMRel, MAQ_S_W_PHR_ENC, MAQ_S_W_PHR_DESC;
+def MAQ_SA_W_PHL : DspMMRel, MAQ_SA_W_PHL_ENC, MAQ_SA_W_PHL_DESC;
+def MAQ_SA_W_PHR : DspMMRel, MAQ_SA_W_PHR_ENC, MAQ_SA_W_PHR_DESC;
+def MFHI_DSP : DspMMRel, MFHI_ENC, MFHI_DESC;
+def MFLO_DSP : DspMMRel, MFLO_ENC, MFLO_DESC;
+def MTHI_DSP : DspMMRel, MTHI_ENC, MTHI_DESC;
+def MTLO_DSP : DspMMRel, MTLO_ENC, MTLO_DESC;
+def DPAU_H_QBL : DspMMRel, DPAU_H_QBL_ENC, DPAU_H_QBL_DESC;
+def DPAU_H_QBR : DspMMRel, DPAU_H_QBR_ENC, DPAU_H_QBR_DESC;
+def DPSU_H_QBL : DspMMRel, DPSU_H_QBL_ENC, DPSU_H_QBL_DESC;
+def DPSU_H_QBR : DspMMRel, DPSU_H_QBR_ENC, DPSU_H_QBR_DESC;
+def DPAQ_S_W_PH : DspMMRel, DPAQ_S_W_PH_ENC, DPAQ_S_W_PH_DESC;
+def DPSQ_S_W_PH : DspMMRel, DPSQ_S_W_PH_ENC, DPSQ_S_W_PH_DESC;
+def DPAQ_SA_L_W : DspMMRel, DPAQ_SA_L_W_ENC, DPAQ_SA_L_W_DESC;
+def DPSQ_SA_L_W : DspMMRel, DPSQ_SA_L_W_ENC, DPSQ_SA_L_W_DESC;
+def MULT_DSP : DspMMRel, MULT_DSP_ENC, MULT_DSP_DESC;
+def MULTU_DSP : DspMMRel, MULTU_DSP_ENC, MULTU_DSP_DESC;
+def MADD_DSP : DspMMRel, MADD_DSP_ENC, MADD_DSP_DESC;
+def MADDU_DSP : DspMMRel, MADDU_DSP_ENC, MADDU_DSP_DESC;
+def MSUB_DSP : DspMMRel, MSUB_DSP_ENC, MSUB_DSP_DESC;
+def MSUBU_DSP : DspMMRel, MSUBU_DSP_ENC, MSUBU_DSP_DESC;
 def CMPU_EQ_QB : CMPU_EQ_QB_ENC, CMPU_EQ_QB_DESC;
 def CMPU_LT_QB : CMPU_LT_QB_ENC, CMPU_LT_QB_DESC;
 def CMPU_LE_QB : CMPU_LE_QB_ENC, CMPU_LE_QB_DESC;
@@ -1156,87 +1187,85 @@ def CMP_EQ_PH : CMP_EQ_PH_ENC, CMP_EQ_PH_DESC;
 def CMP_LT_PH : CMP_LT_PH_ENC, CMP_LT_PH_DESC;
 def CMP_LE_PH : CMP_LE_PH_ENC, CMP_LE_PH_DESC;
 def BITREV : BITREV_ENC, BITREV_DESC;
-def PACKRL_PH : PACKRL_PH_ENC, PACKRL_PH_DESC;
-def REPL_QB : REPL_QB_ENC, REPL_QB_DESC;
-def REPL_PH : REPL_PH_ENC, REPL_PH_DESC;
-def REPLV_QB : REPLV_QB_ENC, REPLV_QB_DESC;
-def REPLV_PH : REPLV_PH_ENC, REPLV_PH_DESC;
-def PICK_QB : PICK_QB_ENC, PICK_QB_DESC;
-def PICK_PH : PICK_PH_ENC, PICK_PH_DESC;
-def LWX : LWX_ENC, LWX_DESC;
-def LHX : LHX_ENC, LHX_DESC;
-def LBUX : LBUX_ENC, LBUX_DESC;
+def PACKRL_PH : DspMMRel, PACKRL_PH_ENC, PACKRL_PH_DESC;
+def REPL_QB : DspMMRel, REPL_QB_ENC, REPL_QB_DESC;
+def REPL_PH : DspMMRel, REPL_PH_ENC, REPL_PH_DESC;
+def REPLV_QB : DspMMRel, REPLV_QB_ENC, REPLV_QB_DESC;
+def REPLV_PH : DspMMRel, REPLV_PH_ENC, REPLV_PH_DESC;
+def PICK_QB : DspMMRel, PICK_QB_ENC, PICK_QB_DESC;
+def PICK_PH : DspMMRel, PICK_PH_ENC, PICK_PH_DESC;
+def LWX : DspMMRel, LWX_ENC, LWX_DESC;
+def LHX : DspMMRel, LHX_ENC, LHX_DESC;
+def LBUX : DspMMRel, LBUX_ENC, LBUX_DESC;
 def BPOSGE32 : BPOSGE32_ENC, BPOSGE32_DESC;
-def INSV : INSV_ENC, INSV_DESC;
-def EXTP : EXTP_ENC, EXTP_DESC;
-def EXTPV : EXTPV_ENC, EXTPV_DESC;
-def EXTPDP : EXTPDP_ENC, EXTPDP_DESC;
-def EXTPDPV : EXTPDPV_ENC, EXTPDPV_DESC;
-def EXTR_W : EXTR_W_ENC, EXTR_W_DESC;
-def EXTRV_W : EXTRV_W_ENC, EXTRV_W_DESC;
-def EXTR_R_W : EXTR_R_W_ENC, EXTR_R_W_DESC;
-def EXTRV_R_W : EXTRV_R_W_ENC, EXTRV_R_W_DESC;
-def EXTR_RS_W : EXTR_RS_W_ENC, EXTR_RS_W_DESC;
-def EXTRV_RS_W : EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
-def EXTR_S_H : EXTR_S_H_ENC, EXTR_S_H_DESC;
-def EXTRV_S_H : EXTRV_S_H_ENC, EXTRV_S_H_DESC;
-def SHILO : SHILO_ENC, SHILO_DESC;
-def SHILOV : SHILOV_ENC, SHILOV_DESC;
-def MTHLIP : MTHLIP_ENC, MTHLIP_DESC;
-def RDDSP : RDDSP_ENC, RDDSP_DESC;
-def WRDSP : WRDSP_ENC, WRDSP_DESC;
+def INSV : DspMMRel, INSV_ENC, INSV_DESC;
+def EXTP : DspMMRel, EXTP_ENC, EXTP_DESC;
+def EXTPV : DspMMRel, EXTPV_ENC, EXTPV_DESC;
+def EXTPDP : DspMMRel, EXTPDP_ENC, EXTPDP_DESC;
+def EXTPDPV : DspMMRel, EXTPDPV_ENC, EXTPDPV_DESC;
+def EXTR_W : DspMMRel, EXTR_W_ENC, EXTR_W_DESC;
+def EXTRV_W : DspMMRel, EXTRV_W_ENC, EXTRV_W_DESC;
+def EXTR_R_W : DspMMRel, EXTR_R_W_ENC, EXTR_R_W_DESC;
+def EXTRV_R_W : DspMMRel, EXTRV_R_W_ENC, EXTRV_R_W_DESC;
+def EXTR_RS_W : DspMMRel, EXTR_RS_W_ENC, EXTR_RS_W_DESC;
+def EXTRV_RS_W : DspMMRel, EXTRV_RS_W_ENC, EXTRV_RS_W_DESC;
+def EXTR_S_H : DspMMRel, EXTR_S_H_ENC, EXTR_S_H_DESC;
+def EXTRV_S_H : DspMMRel, EXTRV_S_H_ENC, EXTRV_S_H_DESC;
+def SHILO : DspMMRel, SHILO_ENC, SHILO_DESC;
+def SHILOV : DspMMRel, SHILOV_ENC, SHILOV_DESC;
+def MTHLIP : DspMMRel, MTHLIP_ENC, MTHLIP_DESC;
+def RDDSP : DspMMRel, RDDSP_ENC, RDDSP_DESC;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def WRDSP : WRDSP_ENC, WRDSP_DESC;
+}
 
 // MIPS DSP Rev 2
-let Predicates = [HasDSPR2] in {
-
-def ADDU_PH : ADDU_PH_ENC, ADDU_PH_DESC;
-def ADDU_S_PH : ADDU_S_PH_ENC, ADDU_S_PH_DESC;
-def SUBU_PH : SUBU_PH_ENC, SUBU_PH_DESC;
-def SUBU_S_PH : SUBU_S_PH_ENC, SUBU_S_PH_DESC;
-def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC;
-def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC;
-def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC;
-def ABSQ_S_QB : ABSQ_S_QB_ENC, ABSQ_S_QB_DESC;
-def ADDUH_QB : ADDUH_QB_ENC, ADDUH_QB_DESC;
-def ADDUH_R_QB : ADDUH_R_QB_ENC, ADDUH_R_QB_DESC;
-def SUBUH_QB : SUBUH_QB_ENC, SUBUH_QB_DESC;
-def SUBUH_R_QB : SUBUH_R_QB_ENC, SUBUH_R_QB_DESC;
-def ADDQH_PH : ADDQH_PH_ENC, ADDQH_PH_DESC;
-def ADDQH_R_PH : ADDQH_R_PH_ENC, ADDQH_R_PH_DESC;
-def SUBQH_PH : SUBQH_PH_ENC, SUBQH_PH_DESC;
-def SUBQH_R_PH : SUBQH_R_PH_ENC, SUBQH_R_PH_DESC;
-def ADDQH_W : ADDQH_W_ENC, ADDQH_W_DESC;
-def ADDQH_R_W : ADDQH_R_W_ENC, ADDQH_R_W_DESC;
-def SUBQH_W : SUBQH_W_ENC, SUBQH_W_DESC;
-def SUBQH_R_W : SUBQH_R_W_ENC, SUBQH_R_W_DESC;
-def MUL_PH : MUL_PH_ENC, MUL_PH_DESC;
-def MUL_S_PH : MUL_S_PH_ENC, MUL_S_PH_DESC;
-def MULQ_S_W : MULQ_S_W_ENC, MULQ_S_W_DESC;
-def MULQ_RS_W : MULQ_RS_W_ENC, MULQ_RS_W_DESC;
-def MULQ_S_PH : MULQ_S_PH_ENC, MULQ_S_PH_DESC;
-def DPA_W_PH : DPA_W_PH_ENC, DPA_W_PH_DESC;
-def DPS_W_PH : DPS_W_PH_ENC, DPS_W_PH_DESC;
-def DPAQX_S_W_PH : DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC;
-def DPAQX_SA_W_PH : DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC;
-def DPAX_W_PH : DPAX_W_PH_ENC, DPAX_W_PH_DESC;
-def DPSX_W_PH : DPSX_W_PH_ENC, DPSX_W_PH_DESC;
-def DPSQX_S_W_PH : DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC;
-def DPSQX_SA_W_PH : DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC;
-def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC;
-def PRECR_QB_PH : PRECR_QB_PH_ENC, PRECR_QB_PH_DESC;
-def PRECR_SRA_PH_W : PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC;
-def PRECR_SRA_R_PH_W : PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC;
-def SHRA_QB : SHRA_QB_ENC, SHRA_QB_DESC;
-def SHRAV_QB : SHRAV_QB_ENC, SHRAV_QB_DESC;
-def SHRA_R_QB : SHRA_R_QB_ENC, SHRA_R_QB_DESC;
-def SHRAV_R_QB : SHRAV_R_QB_ENC, SHRAV_R_QB_DESC;
-def SHRL_PH : SHRL_PH_ENC, SHRL_PH_DESC;
-def SHRLV_PH : SHRLV_PH_ENC, SHRLV_PH_DESC;
-def APPEND : APPEND_ENC, APPEND_DESC;
-def BALIGN : BALIGN_ENC, BALIGN_DESC;
-def PREPEND : PREPEND_ENC, PREPEND_DESC;
-
-}
+def ADDU_PH : DspMMRel, ADDU_PH_ENC, ADDU_PH_DESC, ISA_DSPR2;
+def ADDU_S_PH : DspMMRel, ADDU_S_PH_ENC, ADDU_S_PH_DESC, ISA_DSPR2;
+def SUBU_PH : DspMMRel, SUBU_PH_ENC, SUBU_PH_DESC, ISA_DSPR2;
+def SUBU_S_PH : DspMMRel, SUBU_S_PH_ENC, SUBU_S_PH_DESC, ISA_DSPR2;
+def CMPGDU_EQ_QB : CMPGDU_EQ_QB_ENC, CMPGDU_EQ_QB_DESC, ISA_DSPR2;
+def CMPGDU_LT_QB : CMPGDU_LT_QB_ENC, CMPGDU_LT_QB_DESC, ISA_DSPR2;
+def CMPGDU_LE_QB : CMPGDU_LE_QB_ENC, CMPGDU_LE_QB_DESC, ISA_DSPR2;
+def ABSQ_S_QB : DspMMRel, ABSQ_S_QB_ENC, ABSQ_S_QB_DESC, ISA_DSPR2;
+def ADDUH_QB : DspMMRel, ADDUH_QB_ENC, ADDUH_QB_DESC, ISA_DSPR2;
+def ADDUH_R_QB : DspMMRel, ADDUH_R_QB_ENC, ADDUH_R_QB_DESC, ISA_DSPR2;
+def SUBUH_QB : DspMMRel, SUBUH_QB_ENC, SUBUH_QB_DESC, ISA_DSPR2;
+def SUBUH_R_QB : DspMMRel, SUBUH_R_QB_ENC, SUBUH_R_QB_DESC, ISA_DSPR2;
+def ADDQH_PH : DspMMRel, ADDQH_PH_ENC, ADDQH_PH_DESC, ISA_DSPR2;
+def ADDQH_R_PH : DspMMRel, ADDQH_R_PH_ENC, ADDQH_R_PH_DESC, ISA_DSPR2;
+def SUBQH_PH : DspMMRel, SUBQH_PH_ENC, SUBQH_PH_DESC, ISA_DSPR2;
+def SUBQH_R_PH : DspMMRel, SUBQH_R_PH_ENC, SUBQH_R_PH_DESC, ISA_DSPR2;
+def ADDQH_W : DspMMRel, ADDQH_W_ENC, ADDQH_W_DESC, ISA_DSPR2;
+def ADDQH_R_W : DspMMRel, ADDQH_R_W_ENC, ADDQH_R_W_DESC, ISA_DSPR2;
+def SUBQH_W : DspMMRel, SUBQH_W_ENC, SUBQH_W_DESC, ISA_DSPR2;
+def SUBQH_R_W : DspMMRel, SUBQH_R_W_ENC, SUBQH_R_W_DESC, ISA_DSPR2;
+def MUL_PH : DspMMRel, MUL_PH_ENC, MUL_PH_DESC, ISA_DSPR2;
+def MUL_S_PH : DspMMRel, MUL_S_PH_ENC, MUL_S_PH_DESC, ISA_DSPR2;
+def MULQ_S_W : DspMMRel, MULQ_S_W_ENC, MULQ_S_W_DESC, ISA_DSPR2;
+def MULQ_RS_W : DspMMRel, MULQ_RS_W_ENC, MULQ_RS_W_DESC, ISA_DSPR2;
+def MULQ_S_PH : DspMMRel, MULQ_S_PH_ENC, MULQ_S_PH_DESC, ISA_DSPR2;
+def DPA_W_PH : DspMMRel, DPA_W_PH_ENC, DPA_W_PH_DESC, ISA_DSPR2;
+def DPS_W_PH : DspMMRel, DPS_W_PH_ENC, DPS_W_PH_DESC, ISA_DSPR2;
+def DPAQX_S_W_PH : DspMMRel, DPAQX_S_W_PH_ENC, DPAQX_S_W_PH_DESC, ISA_DSPR2;
+def DPAQX_SA_W_PH : DspMMRel, DPAQX_SA_W_PH_ENC, DPAQX_SA_W_PH_DESC, ISA_DSPR2;
+def DPAX_W_PH : DspMMRel, DPAX_W_PH_ENC, DPAX_W_PH_DESC, ISA_DSPR2;
+def DPSX_W_PH : DspMMRel, DPSX_W_PH_ENC, DPSX_W_PH_DESC, ISA_DSPR2;
+def DPSQX_S_W_PH : DspMMRel, DPSQX_S_W_PH_ENC, DPSQX_S_W_PH_DESC, ISA_DSPR2;
+def DPSQX_SA_W_PH : DspMMRel, DPSQX_SA_W_PH_ENC, DPSQX_SA_W_PH_DESC, ISA_DSPR2;
+def MULSA_W_PH : MULSA_W_PH_ENC, MULSA_W_PH_DESC, ISA_DSPR2;
+def PRECR_QB_PH : DspMMRel, PRECR_QB_PH_ENC, PRECR_QB_PH_DESC, ISA_DSPR2;
+def PRECR_SRA_PH_W : DspMMRel, PRECR_SRA_PH_W_ENC, PRECR_SRA_PH_W_DESC, ISA_DSPR2;
+def PRECR_SRA_R_PH_W : DspMMRel, PRECR_SRA_R_PH_W_ENC, PRECR_SRA_R_PH_W_DESC, ISA_DSPR2;
+def SHRA_QB : DspMMRel, SHRA_QB_ENC, SHRA_QB_DESC, ISA_DSPR2;
+def SHRAV_QB : DspMMRel, SHRAV_QB_ENC, SHRAV_QB_DESC, ISA_DSPR2;
+def SHRA_R_QB : DspMMRel, SHRA_R_QB_ENC, SHRA_R_QB_DESC, ISA_DSPR2;
+def SHRAV_R_QB : DspMMRel, SHRAV_R_QB_ENC, SHRAV_R_QB_DESC, ISA_DSPR2;
+def SHRL_PH : DspMMRel, SHRL_PH_ENC, SHRL_PH_DESC, ISA_DSPR2;
+def SHRLV_PH : DspMMRel, SHRLV_PH_ENC, SHRLV_PH_DESC, ISA_DSPR2;
+def APPEND : APPEND_ENC, APPEND_DESC, ISA_DSPR2;
+def BALIGN : BALIGN_ENC, BALIGN_DESC, ISA_DSPR2;
+def PREPEND : DspMMRel, PREPEND_ENC, PREPEND_DESC, ISA_DSPR2;
 
 // Pseudos.
 let isPseudo = 1, isCodeGenOnly = 1 in {
@@ -1415,3 +1444,8 @@ let AddedComplexity = 20 in {
   def : IndexedLoadPat<sextloadi16, LHX>;
   def : IndexedLoadPat<load, LWX>;
 }
+
+// Instruction alias.
+let AdditionalPredicates = [NotInMicroMips] in {
+  def : DSPInstAlias<"wrdsp $rt", (WRDSP GPR32Opnd:$rt, 0x1F), 1>;
+}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 4faeb3321621..8313d909df2a 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -355,9 +355,8 @@ void RegDefsUses::addLiveOut(const MachineBasicBlock &MBB,
   for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
        SE = MBB.succ_end(); SI != SE; ++SI)
     if (*SI != &SuccBB)
-      for (MachineBasicBlock::livein_iterator LI = (*SI)->livein_begin(),
-           LE = (*SI)->livein_end(); LI != LE; ++LI)
-        Uses.set(*LI);
+      for (const auto &LI : (*SI)->liveins())
+        Uses.set(LI.PhysReg);
 }
 
 bool RegDefsUses::update(const MachineInstr &MI, unsigned Begin, unsigned End) {
@@ -431,7 +430,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
       (*MI.memoperands_begin())->getPseudoValue()) {
     if (isa<FixedStackPseudoSourceValue>(PSV))
       return false;
-    return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack();
+    return !PSV->isConstant(nullptr) && !PSV->isStack();
   }
 
   return true;
@@ -598,7 +597,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
         // Get instruction with delay slot.
         MachineBasicBlock::instr_iterator DSI(I);
 
-        if (InMicroMipsMode && TII->GetInstSizeInBytes(std::next(DSI)) == 2 &&
+        if (InMicroMipsMode && TII->GetInstSizeInBytes(&*std::next(DSI)) == 2 &&
             DSI->isCall()) {
           // If instruction in delay slot is 16b change opcode to
           // corresponding instruction with short delay slot.
@@ -713,8 +712,9 @@ bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
   if (DisableBackwardSearch)
     return false;
 
-  RegDefsUses RegDU(*MBB.getParent()->getSubtarget().getRegisterInfo());
-  MemDefsUses MemDU(*TM.getDataLayout(), MBB.getParent()->getFrameInfo());
+  auto *Fn = MBB.getParent();
+  RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
+  MemDefsUses MemDU(Fn->getDataLayout(), Fn->getFrameInfo());
   ReverseIter Filler;
 
   RegDU.init(*Slot);
@@ -763,6 +763,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
   BB2BrMap BrMap;
   std::unique_ptr<InspectMemInstr> IM;
   Iter Filler;
+  auto *Fn = MBB.getParent();
 
   // Iterate over SuccBB's predecessor list.
   for (MachineBasicBlock::pred_iterator PI = SuccBB->pred_begin(),
@@ -772,15 +773,15 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
 
   // Do not allow moving instructions which have unallocatable register operands
   // across basic block boundaries.
-  RegDU.setUnallocatableRegs(*MBB.getParent());
+  RegDU.setUnallocatableRegs(*Fn);
 
   // Only allow moving loads from stack or constants if any of the SuccBB's
   // predecessors have multiple successors.
   if (HasMultipleSuccs) {
     IM.reset(new LoadFromStackOrConst());
   } else {
-    const MachineFrameInfo *MFI = MBB.getParent()->getFrameInfo();
-    IM.reset(new MemDefsUses(*TM.getDataLayout(), MFI));
+    const MachineFrameInfo *MFI = Fn->getFrameInfo();
+    IM.reset(new MemDefsUses(Fn->getDataLayout(), MFI));
   }
 
   if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
@@ -800,12 +801,13 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
 
   // Select the successor with the larget edge weight.
   auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
-  MachineBasicBlock *S = *std::max_element(B.succ_begin(), B.succ_end(),
-                                           [&](const MachineBasicBlock *Dst0,
-                                               const MachineBasicBlock *Dst1) {
-    return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1);
-  });
-  return S->isLandingPad() ? nullptr : S;
+  MachineBasicBlock *S = *std::max_element(
+      B.succ_begin(), B.succ_end(),
+      [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) {
+        return Prob.getEdgeProbability(&B, Dst0) <
+               Prob.getEdgeProbability(&B, Dst1);
+      });
+  return S->isEHPad() ? nullptr : S;
 }
 
 std::pair<MipsInstrInfo::BranchType, MachineInstr *>
diff --git a/lib/Target/Mips/MipsEVAInstrFormats.td b/lib/Target/Mips/MipsEVAInstrFormats.td
new file mode 100644
index 000000000000..11e191ad6d82
--- /dev/null
+++ b/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -0,0 +1,84 @@
+//===- MipsEVAInstrFormats.td - Mips Instruction Formats ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips32r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class MipsEVAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+                    PredicateControl, StdArch {
+  let DecoderNamespace = "Mips";
+  let EncodingPredicates = [HasStdEnc];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Field Values
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA
+def OPCODE6_LBE        : OPCODE6<0b101100>;
+def OPCODE6_LBuE       : OPCODE6<0b101000>;
+def OPCODE6_LHE        : OPCODE6<0b101101>;
+def OPCODE6_LHuE       : OPCODE6<0b101001>;
+def OPCODE6_LWE        : OPCODE6<0b101111>;
+
+def OPCODE6_SBE        : OPCODE6<0b011100>;
+def OPCODE6_SHE        : OPCODE6<0b011101>;
+def OPCODE6_SWE        : OPCODE6<0b011111>;
+
+// load/store left/right EVA
+def OPCODE6_LWLE       : OPCODE6<0b011001>;
+def OPCODE6_LWRE       : OPCODE6<0b011010>;
+def OPCODE6_SWLE       : OPCODE6<0b100001>;
+def OPCODE6_SWRE       : OPCODE6<0b100010>;
+
+// Load-linked EVA, Store-conditional EVA
+def OPCODE6_LLE        : OPCODE6<0b101110>;
+def OPCODE6_SCE        : OPCODE6<0b011110>;
+
+def OPCODE6_TLBINV     : OPCODE6<0b000011>;
+def OPCODE6_TLBINVF    : OPCODE6<0b000100>;
+
+def OPCODE6_CACHEE     : OPCODE6<0b011011>;
+def OPCODE6_PREFE      : OPCODE6<0b100011>;
+
+def OPGROUP_COP0       : OPGROUP<0b010000>;
+
+//===----------------------------------------------------------------------===//
+//
+// Encoding Formats
+//
+//===----------------------------------------------------------------------===//
+
+class SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6 Operation> : MipsEVAInst {
+  bits<21> addr;
+  bits<5> hint;
+  bits<5> base = addr{20-16};
+  bits<9> offset = addr{8-0};
+
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+  let Inst{25-21} = base;
+  let Inst{20-16} = hint;
+  let Inst{15-7}  = offset;
+  let Inst{6}     = 0;
+  let Inst{5-0}   = Operation.Value;
+}
+
+class TLB_FM<OPCODE6 Operation> : MipsEVAInst {
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_COP0.Value;
+  let Inst{25} = 1;       // CO
+  let Inst{24-6} = 0;
+  let Inst{5-0} = Operation.Value;
+}
diff --git a/lib/Target/Mips/MipsEVAInstrInfo.td b/lib/Target/Mips/MipsEVAInstrInfo.td
new file mode 100644
index 000000000000..36c9694cbadd
--- /dev/null
+++ b/lib/Target/Mips/MipsEVAInstrInfo.td
@@ -0,0 +1,192 @@
+//===- MipsEVAInstrInfo.td - EVA ASE instructions -*- tablegen ------------*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips EVA ASE instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction encodings
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA encodings
+class LBE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBE>;
+class LBuE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LBuE>;
+class LHE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHE>;
+class LHuE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LHuE>;
+class LWE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWE>;
+
+class SBE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SBE>;
+class SHE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SHE>;
+class SWE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWE>;
+
+// load/store left/right EVA encodings
+class LWLE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWLE>;
+class LWRE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LWRE>;
+class SWLE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWLE>;
+class SWRE_ENC    : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SWRE>;
+
+// Load-linked EVA, Store-conditional EVA encodings
+class LLE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_LLE>;
+class SCE_ENC     : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_SCE>;
+
+class TLBINV_ENC  : TLB_FM<OPCODE6_TLBINV>;
+class TLBINVF_ENC : TLB_FM<OPCODE6_TLBINVF>;
+
+class CACHEE_ENC  : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_CACHEE>;
+class PREFE_ENC   : SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6_PREFE>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction descriptions
+//
+//===----------------------------------------------------------------------===//
+
+// Memory Load/Store EVA descriptions
+class LOAD_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins mem_simm9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeMemEVA";
+  bit canFoldAsLoad = 1;
+  bit mayLoad = 1;
+}
+
+class LBE_DESC  : LOAD_EVA_DESC_BASE<"lbe",  GPR32Opnd>;
+class LBuE_DESC : LOAD_EVA_DESC_BASE<"lbue", GPR32Opnd>;
+class LHE_DESC  : LOAD_EVA_DESC_BASE<"lhe",  GPR32Opnd>;
+class LHuE_DESC : LOAD_EVA_DESC_BASE<"lhue", GPR32Opnd>;
+class LWE_DESC  : LOAD_EVA_DESC_BASE<"lwe",  GPR32Opnd>;
+
+class STORE_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                          SDPatternOperator OpNode = null_frag> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeMemEVA";
+  bit mayStore = 1;
+}
+
+class SBE_DESC  : STORE_EVA_DESC_BASE<"sbe",  GPR32Opnd>;
+class SHE_DESC  : STORE_EVA_DESC_BASE<"she",  GPR32Opnd>;
+class SWE_DESC  : STORE_EVA_DESC_BASE<"swe",  GPR32Opnd>;
+
+// Load/Store Left/Right EVA descriptions
+class LOAD_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins mem_simm9:$addr, GPROpnd:$src);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeMemEVA";
+  string Constraints = "$src = $rt";
+  bit canFoldAsLoad = 1;
+}
+
+class LWLE_DESC  : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwle",  GPR32Opnd>;
+class LWRE_DESC  : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"lwre",  GPR32Opnd>;
+
+class STORE_LEFT_RIGHT_EVA_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeMemEVA";
+}
+
+class SWLE_DESC  : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swle",  GPR32Opnd>;
+class SWRE_DESC  : LOAD_LEFT_RIGHT_EVA_DESC_BASE<"swre",  GPR32Opnd>;
+
+// Load-linked EVA, Store-conditional EVA descriptions
+class LLE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+  dag OutOperandList = (outs GPROpnd:$rt);
+  dag InOperandList = (ins mem_simm9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  bit mayLoad = 1;
+  string DecoderMethod = "DecodeMemEVA";
+}
+
+class LLE_DESC : LLE_DESC_BASE<"lle", GPR32Opnd>;
+
+class SCE_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+  dag OutOperandList = (outs GPROpnd:$dst);
+  dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
+  list<dag> Pattern = [];
+  bit mayStore = 1;
+  string Constraints = "$rt = $dst";
+  string DecoderMethod = "DecodeMemEVA";
+}
+
+class SCE_DESC : SCE_DESC_BASE<"sce", GPR32Opnd>;
+
+class TLB_DESC_BASE<string instr_asm> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins);
+  string AsmString = instr_asm;
+  list<dag> Pattern = [];
+}
+
+class TLBINV_DESC  : TLB_DESC_BASE<"tlbinv">;
+class TLBINVF_DESC : TLB_DESC_BASE<"tlbinvf">;
+
+class CACHEE_DESC_BASE<string instr_asm, Operand MemOpnd> {
+  dag OutOperandList = (outs);
+  dag InOperandList = (ins  MemOpnd:$addr, uimm5:$hint);
+  string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
+  list<dag> Pattern = [];
+  string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
+}
+
+class CACHEE_DESC  : CACHEE_DESC_BASE<"cachee", mem>;
+class PREFE_DESC   : CACHEE_DESC_BASE<"prefe", mem>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction definitions
+//
+//===----------------------------------------------------------------------===//
+
+/// Load and Store EVA Instructions
+def LBE     : LBE_ENC, LBE_DESC, INSN_EVA;
+def LBuE    : LBuE_ENC, LBuE_DESC, INSN_EVA;
+def LHE     : LHE_ENC, LHE_DESC, INSN_EVA;
+def LHuE    : LHuE_ENC, LHuE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWE     : LWE_ENC, LWE_DESC, INSN_EVA;
+}
+def SBE     : SBE_ENC, SBE_DESC, INSN_EVA;
+def SHE     : SHE_ENC, SHE_DESC, INSN_EVA;
+let AdditionalPredicates = [NotInMicroMips] in {
+def SWE     : SWE_ENC, SWE_DESC, INSN_EVA;
+}
+
+/// load/store left/right EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LWLE    : LWLE_ENC, LWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def LWRE    : LWRE_ENC, LWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWLE    : SWLE_ENC, SWLE_DESC, INSN_EVA_NOT_32R6_64R6;
+def SWRE    : SWRE_ENC, SWRE_DESC, INSN_EVA_NOT_32R6_64R6;
+}
+
+/// Load-linked EVA, Store-conditional EVA
+let AdditionalPredicates = [NotInMicroMips] in {
+def LLE     : LLE_ENC, LLE_DESC, INSN_EVA;
+def SCE     : SCE_ENC, SCE_DESC, INSN_EVA;
+}
+
+def TLBINV  : TLBINV_ENC, TLBINV_DESC, INSN_EVA;
+def TLBINVF : TLBINVF_ENC, TLBINVF_DESC, INSN_EVA;
+
+def CACHEE  : CACHEE_ENC, CACHEE_DESC, INSN_EVA;
+def PREFE   : PREFE_ENC, PREFE_DESC, INSN_EVA;
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index 5152a072b3a2..e9eaf810637a 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -192,10 +192,10 @@ public:
         TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) {
     MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
     Context = &funcInfo.Fn->getContext();
+    bool ISASupported = !Subtarget->hasMips32r6() && Subtarget->hasMips32();
     TargetSupported =
-        ((TM.getRelocationModel() == Reloc::PIC_) &&
-         ((Subtarget->hasMips32r2() || Subtarget->hasMips32()) &&
-          (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32())));
+        ISASupported && (TM.getRelocationModel() == Reloc::PIC_) &&
+        (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32());
     UnsupportedFPMode = Subtarget->isFP64bit();
   }
 
@@ -236,32 +236,36 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
     std::swap(LHS, RHS);
 
   unsigned Opc;
-  if (ISDOpc == ISD::AND) {
+  switch (ISDOpc) {
+  case ISD::AND:
     Opc = Mips::AND;
-  } else if (ISDOpc == ISD::OR) {
+    break;
+  case ISD::OR:
     Opc = Mips::OR;
-  } else if (ISDOpc == ISD::XOR) {
+    break;
+  case ISD::XOR:
     Opc = Mips::XOR;
-  } else
+    break;
+  default:
     llvm_unreachable("unexpected opcode");
+  }
 
   unsigned LHSReg = getRegForValue(LHS);
-  unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
-  if (!ResultReg)
-    return 0;
-
-  unsigned RHSReg;
   if (!LHSReg)
     return 0;
 
+  unsigned RHSReg;
   if (const auto *C = dyn_cast<ConstantInt>(RHS))
     RHSReg = materializeInt(C, MVT::i32);
   else
     RHSReg = getRegForValue(RHS);
-
   if (!RHSReg)
     return 0;
 
+  unsigned ResultReg = createResultReg(&Mips::GPR32RegClass);
+  if (!ResultReg)
+    return 0;
+
   emitInst(Opc, ResultReg).addReg(LHSReg).addReg(RHSReg);
   return ResultReg;
 }
@@ -747,7 +751,7 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
     unsigned Offset = Addr.getOffset();
     MachineFrameInfo &MFI = *MF->getFrameInfo();
     MachineMemOperand *MMO = MF->getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
         MFI.getObjectSize(FI), Align);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
         .addFrameIndex(FI)
@@ -798,7 +802,7 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
     unsigned Offset = Addr.getOffset();
     MachineFrameInfo &MFI = *MF->getFrameInfo();
     MachineMemOperand *MMO = MF->getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(FI), MachineMemOperand::MOLoad,
+        MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
         MFI.getObjectSize(FI), Align);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
         .addReg(SrcReg)
@@ -912,8 +916,7 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
     BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
         .addReg(CondReg)
         .addMBB(TBB);
-    fastEmitBranch(FBB, DbgLoc);
-    FuncInfo.MBB->addSuccessor(TBB);
+    finishCondBranch(BI->getParent(), TBB, FBB);
     return true;
   }
   return false;
@@ -1057,22 +1060,16 @@ bool MipsFastISel::selectFPToInt(const Instruction *I, bool IsSigned) {
   // entirely within FPRs.
   unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
   unsigned TempReg = createResultReg(&Mips::FGR32RegClass);
-  unsigned Opc;
-
-  if (SrcVT == MVT::f32)
-    Opc = Mips::TRUNC_W_S;
-  else
-    Opc = Mips::TRUNC_W_D32;
+  unsigned Opc = (SrcVT == MVT::f32) ? Mips::TRUNC_W_S : Mips::TRUNC_W_D32;
 
   // Generate the convert.
   emitInst(Opc, TempReg).addReg(SrcReg);
-
   emitInst(Mips::MFC1, DestReg).addReg(TempReg);
 
   updateValueMap(I, DestReg);
   return true;
 }
-//
+
 bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
                                    SmallVectorImpl<MVT> &OutVTs,
                                    unsigned &NumBytes) {
@@ -1196,7 +1193,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
 
       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-          MachinePointerInfo::getStack(Addr.getOffset()),
+          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
       (void)(MMO);
       // if (!emitStore(ArgVT, ArgReg, Addr, MMO))
@@ -1607,19 +1604,23 @@ bool MipsFastISel::emitIntSExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
 
 bool MipsFastISel::emitIntZExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
                                unsigned DestReg) {
+  int64_t Imm;
+
   switch (SrcVT.SimpleTy) {
   default:
     return false;
   case MVT::i1:
-    emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(1);
+    Imm = 1;
     break;
   case MVT::i8:
-    emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xff);
+    Imm = 0xff;
     break;
   case MVT::i16:
-    emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(0xffff);
+    Imm = 0xffff;
     break;
   }
+
+  emitInst(Mips::ANDi, DestReg).addReg(SrcReg).addImm(Imm);
   return true;
 }
 
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index fab2fdfef8cf..6756c1702f76 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -117,6 +117,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::GPRel:             return "MipsISD::GPRel";
   case MipsISD::ThreadPointer:     return "MipsISD::ThreadPointer";
   case MipsISD::Ret:               return "MipsISD::Ret";
+  case MipsISD::ERet:              return "MipsISD::ERet";
   case MipsISD::EH_RETURN:         return "MipsISD::EH_RETURN";
   case MipsISD::FPBrcond:          return "MipsISD::FPBrcond";
   case MipsISD::FPCmp:             return "MipsISD::FPCmp";
@@ -390,10 +391,10 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
 
-  setOperationAction(ISD::ATOMIC_LOAD,       MVT::i32,    Expand);
-  setOperationAction(ISD::ATOMIC_LOAD,       MVT::i64,    Expand);
-  setOperationAction(ISD::ATOMIC_STORE,      MVT::i32,    Expand);
-  setOperationAction(ISD::ATOMIC_STORE,      MVT::i64,    Expand);
+  if (!Subtarget.isGP64bit()) {
+    setOperationAction(ISD::ATOMIC_LOAD,     MVT::i64,   Expand);
+    setOperationAction(ISD::ATOMIC_STORE,    MVT::i64,   Expand);
+  }
 
   setInsertFencesForAtomic(true);
 
@@ -437,9 +438,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
 
   setStackPointerRegisterToSaveRestore(ABI.IsN64() ? Mips::SP_64 : Mips::SP);
 
-  setExceptionPointerRegister(ABI.IsN64() ? Mips::A0_64 : Mips::A0);
-  setExceptionSelectorRegister(ABI.IsN64() ? Mips::A1_64 : Mips::A1);
-
   MaxStoresPerMemcpy = 16;
 
   isMicroMips = Subtarget.inMicroMipsMode();
@@ -836,6 +834,14 @@ SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   return SDValue();
 }
 
+bool MipsTargetLowering::isCheapToSpeculateCttz() const {
+  return Subtarget.hasMips32();
+}
+
+bool MipsTargetLowering::isCheapToSpeculateCtlz() const {
+  return Subtarget.hasMips32();
+}
+
 void
 MipsTargetLowering::LowerOperationWrapper(SDNode *N,
                                           SmallVectorImpl<SDValue> &Results,
@@ -1092,8 +1098,7 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
   MF->insert(It, loopMBB);
   MF->insert(It, exitMBB);
 
@@ -1204,8 +1209,7 @@ MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword(
   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
   MF->insert(It, loopMBB);
   MF->insert(It, sinkMBB);
   MF->insert(It, exitMBB);
@@ -1330,15 +1334,20 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   DebugLoc DL = MI->getDebugLoc();
   unsigned LL, SC, ZERO, BNE, BEQ;
 
-  if (Size == 4) {
-    LL = isMicroMips ? Mips::LL_MM : Mips::LL;
-    SC = isMicroMips ? Mips::SC_MM : Mips::SC;
+   if (Size == 4) {
+     if (isMicroMips) {
+       LL = Mips::LL_MM;
+       SC = Mips::SC_MM;
+     } else {
+       LL = Subtarget.hasMips32r6() ? Mips::LL_R6 : Mips::LL;
+       SC = Subtarget.hasMips32r6() ? Mips::SC_R6 : Mips::SC;
+     }
     ZERO = Mips::ZERO;
     BNE = Mips::BNE;
     BEQ = Mips::BEQ;
   } else {
-    LL = Mips::LLD;
-    SC = Mips::SCD;
+    LL = Subtarget.hasMips64r6() ? Mips::LLD_R6 : Mips::LLD;
+    SC = Subtarget.hasMips64r6() ? Mips::SCD_R6 : Mips::SCD;
     ZERO = Mips::ZERO_64;
     BNE = Mips::BNE64;
     BEQ = Mips::BEQ64;
@@ -1356,8 +1365,7 @@ MachineBasicBlock * MipsTargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
   MF->insert(It, loop1MBB);
   MF->insert(It, loop2MBB);
   MF->insert(It, exitMBB);
@@ -1440,8 +1448,7 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI,
   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
   MF->insert(It, loop1MBB);
   MF->insert(It, loop2MBB);
   MF->insert(It, sinkMBB);
@@ -1586,9 +1593,10 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
 
   EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-  Addr = DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
-                        MachinePointerInfo::getJumpTable(), MemVT, false, false,
-                        false, 0);
+  Addr =
+      DAG.getExtLoad(ISD::SEXTLOAD, DL, PTy, Chain, Addr,
+                     MachinePointerInfo::getJumpTable(DAG.getMachineFunction()),
+                     MemVT, false, false, false, 0);
   Chain = Addr.getValue(1);
 
   if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || ABI.IsN64()) {
@@ -1690,14 +1698,15 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
     return getAddrLocal(N, SDLoc(N), Ty, DAG, ABI.IsN32() || ABI.IsN64());
 
   if (LargeGOT)
-    return getAddrGlobalLargeGOT(N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16,
-                                 MipsII::MO_GOT_LO16, DAG.getEntryNode(),
-                                 MachinePointerInfo::getGOT());
+    return getAddrGlobalLargeGOT(
+        N, SDLoc(N), Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16,
+        DAG.getEntryNode(),
+        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
 
-  return getAddrGlobal(N, SDLoc(N), Ty, DAG,
-                       (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP
-                                                    : MipsII::MO_GOT16,
-                       DAG.getEntryNode(), MachinePointerInfo::getGOT());
+  return getAddrGlobal(
+      N, SDLoc(N), Ty, DAG,
+      (ABI.IsN32() || ABI.IsN64()) ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
+      DAG.getEntryNode(), MachinePointerInfo::getGOT(DAG.getMachineFunction()));
 }
 
 SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
@@ -1719,6 +1728,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
   // Local Exec TLS Model.
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
+
   SDLoc DL(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -1813,7 +1825,8 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
         static_cast<const MipsTargetObjectFile *>(
             getTargetMachine().getObjFileLowering());
 
-    if (TLOF->IsConstantInSmallSection(N->getConstVal(), getTargetMachine()))
+    if (TLOF->IsConstantInSmallSection(DAG.getDataLayout(), N->getConstVal(),
+                                       getTargetMachine()))
       // %gp_rel relocation
       return getAddrGPRel(N, SDLoc(N), Ty, DAG);
 
@@ -2946,8 +2959,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
   MipsCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                      *DAG.getContext());
   CCInfo.AllocateStack(ABI.GetCalleeAllocdArgSizeInBytes(CallConv), 1);
-  Function::const_arg_iterator FuncArg =
-    DAG.getMachineFunction().getFunction()->arg_begin();
+  const Function *Func = DAG.getMachineFunction().getFunction();
+  Function::const_arg_iterator FuncArg = Func->arg_begin();
+
+  if (Func->hasFnAttribute("interrupt") && !Func->arg_empty())
+    report_fatal_error(
+        "Functions with the interrupt attribute cannot have arguments!");
 
   CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
   MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
@@ -3019,7 +3036,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
         // We ought to be able to use LocVT directly but O32 sets it to i32
         // when allocating floating point values to integer registers.
         // This shouldn't influence how we load the value into registers unless
-        // we are targetting softfloat.
+        // we are targeting softfloat.
         if (VA.getValVT().isFloatingPoint() && !Subtarget.useSoftFloat())
           LocVT = VA.getValVT();
       }
@@ -3033,9 +3050,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-      SDValue ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
-                                     MachinePointerInfo::getFixedStack(FI),
-                                     false, false, false, 0);
+      SDValue ArgValue = DAG.getLoad(
+          LocVT, DL, Chain, FIN,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+          false, false, false, 0);
       OutChains.push_back(ArgValue.getValue(1));
 
       ArgValue = UnpackFromArgumentSlot(ArgValue, VA, Ins[i].ArgVT, DL, DAG);
@@ -3098,8 +3116,20 @@ MipsTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
 }
 
 SDValue
-MipsTargetLowering::LowerReturn(SDValue Chain,
-                                CallingConv::ID CallConv, bool IsVarArg,
+MipsTargetLowering::LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
+                                         SDLoc DL, SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  MipsFI->setISR();
+
+  return DAG.getNode(MipsISD::ERet, DL, MVT::Other, RetOps);
+}
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                                bool IsVarArg,
                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
                                 const SmallVectorImpl<SDValue> &OutVals,
                                 SDLoc DL, SelectionDAG &DAG) const {
@@ -3192,7 +3222,11 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  // Return on Mips is always a "jr $ra"
+  // ISRs must use "eret".
+  if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt"))
+    return LowerInterruptReturn(RetOps, DL, DAG);
+
+  // Standard return on Mips is a "jr $ra"
   return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps);
 }
 
@@ -3300,7 +3334,7 @@ static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
 
   // Search for the first numeric character.
   StringRef::const_iterator I, B = C.begin() + 1, E = C.end() - 1;
-  I = std::find_if(B, E, std::ptr_fun(isdigit));
+  I = std::find_if(B, E, isdigit);
 
   Prefix = StringRef(B, I - B);
 
@@ -3669,7 +3703,7 @@ void MipsTargetLowering::passByValArg(
   unsigned NumRegs = LastReg - FirstReg;
 
   if (NumRegs) {
-    const ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
+    ArrayRef<MCPhysReg> ArgRegs = ABI.GetByValArgRegs();
     bool LeftoverBytes = (NumRegs * RegSizeInBytes > ByValSizeInBytes);
     unsigned I = 0;
 
@@ -3755,7 +3789,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
                                          SDValue Chain, SDLoc DL,
                                          SelectionDAG &DAG,
                                          CCState &State) const {
-  const ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
+  ArrayRef<MCPhysReg> ArgRegs = ABI.GetVarArgRegs();
   unsigned Idx = State.getFirstUnallocated(ArgRegs);
   unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
   MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
@@ -3812,7 +3846,7 @@ void MipsTargetLowering::HandleByVal(CCState *State, unsigned &Size,
 
   if (State->getCallingConv() != CallingConv::Fast) {
     unsigned RegSizeInBytes = Subtarget.getGPRSizeInBytes();
-    const ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
+    ArrayRef<MCPhysReg> IntArgRegs = ABI.GetByValArgRegs();
     // FIXME: The O32 case actually describes no shadow registers.
     const MCPhysReg *ShadowRegs =
         ABI.IsO32() ? IntArgRegs.data() : Mips64DPRegs;
@@ -3860,8 +3894,7 @@ MipsTargetLowering::emitPseudoSELECT(MachineInstr *MI, MachineBasicBlock *BB,
   // destination vreg to set, the condition code register to branch on, the
   // true/false values to select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index b3d861d34da7..b33e125b81b7 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -67,6 +67,10 @@ namespace llvm {
       // Return
       Ret,
 
+      // Interrupt, exception, error trap Return
+      ERet,
+
+      // Software Exception Return.
       EH_RETURN,
 
       // Node used to extract integer from accumulator.
@@ -231,6 +235,9 @@ namespace llvm {
       return MVT::i32;
     }
 
+    bool isCheapToSpeculateCttz() const override;
+    bool isCheapToSpeculateCtlz() const override;
+
     void LowerOperationWrapper(SDNode *N,
                                SmallVectorImpl<SDValue> &Results,
                                SelectionDAG &DAG) const override;
@@ -258,17 +265,25 @@ namespace llvm {
     EmitInstrWithCustomInserter(MachineInstr *MI,
                                 MachineBasicBlock *MBB) const override;
 
-    struct LTStr {
-      bool operator()(const char *S1, const char *S2) const {
-        return strcmp(S1, S2) < 0;
-      }
-    };
-
     void HandleByVal(CCState *, unsigned &, unsigned) const override;
 
     unsigned getRegisterByName(const char* RegName, EVT VT,
                                SelectionDAG &DAG) const override;
 
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+      return ABI.IsN64() ? Mips::A0_64 : Mips::A0;
+    }
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+      return ABI.IsN64() ? Mips::A1_64 : Mips::A1;
+    }
+
     /// Returns true if a cast between SrcAS and DestAS is a noop.
     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
       // Mips doesn't have any special address spaces so we just reserve
@@ -290,9 +305,10 @@ namespace llvm {
       unsigned GOTFlag = IsN32OrN64 ? MipsII::MO_GOT_PAGE : MipsII::MO_GOT;
       SDValue GOT = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
                                 getTargetNode(N, Ty, DAG, GOTFlag));
-      SDValue Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
-                                 MachinePointerInfo::getGOT(), false, false,
-                                 false, 0);
+      SDValue Load =
+          DAG.getLoad(Ty, DL, DAG.getEntryNode(), GOT,
+                      MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                      false, false, false, 0);
       unsigned LoFlag = IsN32OrN64 ? MipsII::MO_GOT_OFST : MipsII::MO_ABS_LO;
       SDValue Lo = DAG.getNode(MipsISD::Lo, DL, Ty,
                                getTargetNode(N, Ty, DAG, LoFlag));
@@ -487,6 +503,9 @@ namespace llvm {
                         const SmallVectorImpl<SDValue> &OutVals,
                         SDLoc dl, SelectionDAG &DAG) const override;
 
+    SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, SDLoc DL,
+                                 SelectionDAG &DAG) const;
+
     bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
 
     // Inline asm support
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index cb912253b28c..377260f89d10 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -136,7 +136,7 @@ multiclass ABSS_M<string opstr, InstrItinClass Itin,
 
 multiclass ROUND_M<string opstr, InstrItinClass Itin> {
   def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>, FGR_32;
-  def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
+  def _D64 : StdMMR6Rel, ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>, FGR_64 {
     let DecoderNamespace = "Mips64";
   }
 }
@@ -267,24 +267,25 @@ defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6,
 //===----------------------------------------------------------------------===//
 // Floating Point Instructions
 //===----------------------------------------------------------------------===//
-def ROUND_W_S  : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
+def ROUND_W_S  : MMRel, StdMMR6Rel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
                  ABSS_FM<0xc, 16>, ISA_MIPS2;
-def TRUNC_W_S  : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
+defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
+def TRUNC_W_S  : MMRel, StdMMR6Rel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
                  ABSS_FM<0xd, 16>, ISA_MIPS2;
-def CEIL_W_S   : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
+def CEIL_W_S   : MMRel, StdMMR6Rel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
                  ABSS_FM<0xe, 16>, ISA_MIPS2;
-def FLOOR_W_S  : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
+def FLOOR_W_S  : MMRel, StdMMR6Rel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
                  ABSS_FM<0xf, 16>, ISA_MIPS2;
 def CVT_W_S    : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
                  ABSS_FM<0x24, 16>;
 
-defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
 defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2;
 defm CEIL_W  : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
 defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2;
 defm CVT_W   : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>;
 
 let DecoderNamespace = "Mips64" in {
+  let AdditionalPredicates = [NotInMicroMips] in {
   def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>,
                   ABSS_FM<0x8, 16>, FGR_64;
   def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>,
@@ -301,14 +302,17 @@ let DecoderNamespace = "Mips64" in {
                   ABSS_FM<0xb, 16>, FGR_64;
   def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>,
                     ABSS_FM<0xb, 17>, FGR_64;
+  }
 }
 
 def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>,
               ABSS_FM<0x20, 20>;
-def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
-              ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
-def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
-               ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+let AdditionalPredicates = [NotInMicroMips] in{
+  def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
+                ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
+  def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
+                 ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+}
 
 def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
                 ABSS_FM<0x20, 17>, FGR_32;
@@ -320,8 +324,10 @@ def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>,
 let DecoderNamespace = "Mips64" in {
   def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>,
                   ABSS_FM<0x20, 17>, FGR_64;
-  def CVT_S_L   : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
-                  ABSS_FM<0x20, 21>, FGR_64;
+  let AdditionalPredicates = [NotInMicroMips] in{
+    def CVT_S_L   : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
+                    ABSS_FM<0x20, 21>, FGR_64;
+  }
   def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>,
                   ABSS_FM<0x21, 20>, FGR_64;
   def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>,
@@ -345,8 +351,8 @@ def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>,
 defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>;
 defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>;
 
-def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>,
-              ABSS_FM<0x4, 16>, ISA_MIPS2;
+def FSQRT_S : MMRel, StdMMR6Rel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd,
+              II_SQRT_S, fsqrt>, ABSS_FM<0x4, 16>, ISA_MIPS2;
 defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2;
 
 // The odd-numbered registers are only referenced when doing loads,
@@ -503,13 +509,13 @@ let AdditionalPredicates = [NoNaNsFPMath],
 def MIPS_BRANCH_F  : PatLeaf<(i32 0)>;
 def MIPS_BRANCH_T  : PatLeaf<(i32 1)>;
 
-def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>,
+def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, II_BC1F, MIPS_BRANCH_F>,
            BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, IIBranch, MIPS_BRANCH_F, 0>,
+def BC1FL : MMRel, BC1F_FT<"bc1fl", brtarget, II_BC1FL, MIPS_BRANCH_F, 0>,
             BC1F_FM<1, 0>, ISA_MIPS2_NOT_32R6_64R6;
-def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>,
+def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, II_BC1T, MIPS_BRANCH_T>,
            BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6;
-def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, IIBranch, MIPS_BRANCH_T, 0>,
+def BC1TL : MMRel, BC1F_FT<"bc1tl", brtarget, II_BC1TL, MIPS_BRANCH_T, 0>,
             BC1F_FM<1, 1>, ISA_MIPS2_NOT_32R6_64R6;
 
 /// Floating Point Compare
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 5f4fcc354616..45baf27be518 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -132,7 +132,7 @@ class PseudoSE<dag outs, dag ins, list<dag> pattern,
 // These are aliases that require C++ handling to convert to the target
 // instruction, while InstAliases can be handled directly by tblgen.
 class MipsAsmPseudoInst<dag outs, dag ins, string asmstr>:
-  MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo> {
+  MipsInst<outs, ins, asmstr, [], IIPseudo, Pseudo>, PredicateControl {
   let isPseudo = 1;
   let Pattern = [];
 }
@@ -644,16 +644,16 @@ class BRK_FM<bits<6> funct> : StdArch
 //  Exception return format <Cop0|1|0|funct>
 //===----------------------------------------------------------------------===//
 
-class ER_FM<bits<6> funct> : StdArch
+class ER_FM<bits<6> funct, bit LLBit> : StdArch
 {
   bits<32> Inst;
   let Inst{31-26} = 0x10;
   let Inst{25}    = 1;
-  let Inst{24-6}  = 0;
+  let Inst{24-7}  = 0;
+  let Inst{6} = LLBit;
   let Inst{5-0}   = funct;
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Enable/disable interrupt instruction format <Cop0|MFMC0|rt|12|0|sc|0|0>
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index bb23cc04e696..b1d69506c16f 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -60,8 +60,8 @@ MachineMemOperand *MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
   MachineFrameInfo &MFI = *MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
-  return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), Flag,
-                                 MFI.getObjectSize(FI), Align);
+  return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
+                                 Flag, MFI.getObjectSize(FI), Align);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index ab98c9054e74..d9fb8c890739 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -77,6 +77,9 @@ def MipsThreadPointer: SDNode<"MipsISD::ThreadPointer", SDT_MipsThreadPointer>;
 def MipsRet : SDNode<"MipsISD::Ret", SDTNone,
                      [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 
+def MipsERet : SDNode<"MipsISD::ERet", SDTNone,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPSideEffect]>;
+
 // These are target-independent nodes, but have target-specific formats.
 def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
                            [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>;
@@ -157,7 +160,7 @@ def HasMips3     :    Predicate<"Subtarget->hasMips3()">,
 def HasMips4_32  :    Predicate<"Subtarget->hasMips4_32()">,
                       AssemblerPredicate<"FeatureMips4_32">;
 def NotMips4_32  :    Predicate<"!Subtarget->hasMips4_32()">,
-                      AssemblerPredicate<"FeatureMips4_32">;
+                      AssemblerPredicate<"!FeatureMips4_32">;
 def HasMips4_32r2 :   Predicate<"Subtarget->hasMips4_32r2()">,
                       AssemblerPredicate<"FeatureMips4_32r2">;
 def HasMips5_32r2 :   Predicate<"Subtarget->hasMips5_32r2()">,
@@ -166,6 +169,8 @@ def HasMips32    :    Predicate<"Subtarget->hasMips32()">,
                       AssemblerPredicate<"FeatureMips32">;
 def HasMips32r2  :    Predicate<"Subtarget->hasMips32r2()">,
                       AssemblerPredicate<"FeatureMips32r2">;
+def HasMips32r5  :    Predicate<"Subtarget->hasMips32r5()">,
+                      AssemblerPredicate<"FeatureMips32r5">;
 def HasMips32r6  :    Predicate<"Subtarget->hasMips32r6()">,
                       AssemblerPredicate<"FeatureMips32r6">;
 def NotMips32r6  :    Predicate<"!Subtarget->hasMips32r6()">,
@@ -176,6 +181,8 @@ def IsGP32bit    :    Predicate<"!Subtarget->isGP64bit()">,
                       AssemblerPredicate<"!FeatureGP64Bit">;
 def HasMips64    :    Predicate<"Subtarget->hasMips64()">,
                       AssemblerPredicate<"FeatureMips64">;
+def NotMips64    :    Predicate<"!Subtarget->hasMips64()">,
+                      AssemblerPredicate<"!FeatureMips64">;
 def HasMips64r2  :    Predicate<"Subtarget->hasMips64r2()">,
                       AssemblerPredicate<"FeatureMips64r2">;
 def HasMips64r6  :    Predicate<"Subtarget->hasMips64r6()">,
@@ -184,6 +191,8 @@ def NotMips64r6  :    Predicate<"!Subtarget->hasMips64r6()">,
                       AssemblerPredicate<"!FeatureMips64r6">;
 def HasMicroMips32r6 : Predicate<"Subtarget->inMicroMips32r6Mode()">,
                        AssemblerPredicate<"FeatureMicroMips,FeatureMips32r6">;
+def HasMicroMips64r6 : Predicate<"Subtarget->inMicroMips64r6Mode()">,
+                       AssemblerPredicate<"FeatureMicroMips,FeatureMips64r6">;
 def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
@@ -201,6 +210,12 @@ def NotInMicroMips :  Predicate<"!Subtarget->inMicroMipsMode()">,
 def IsLE           :  Predicate<"Subtarget->isLittle()">;
 def IsBE           :  Predicate<"!Subtarget->isLittle()">;
 def IsNotNaCl    :    Predicate<"!Subtarget->isTargetNaCl()">;
+def UseTCCInDIV    :  AssemblerPredicate<"FeatureUseTCCInDIV">;
+def HasEVA       :    Predicate<"Subtarget->hasEVA()">,
+                      AssemblerPredicate<"FeatureEVA,FeatureMips32r2">;
+def HasMSA : Predicate<"Subtarget->hasMSA()">,
+             AssemblerPredicate<"FeatureMSA">;
+
 
 //===----------------------------------------------------------------------===//
 // Mips GPR size adjectives.
@@ -242,6 +257,7 @@ class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
 class ISA_MIPS32R2_NOT_32R6_64R6 {
   list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6];
 }
+class ISA_MIPS32R5 { list<Predicate> InsnPredicates = [HasMips32r5]; }
 class ISA_MIPS64   { list<Predicate> InsnPredicates = [HasMips64]; }
 class ISA_MIPS64_NOT_64R6 {
   list<Predicate> InsnPredicates = [HasMips64, NotMips64r6];
@@ -249,9 +265,21 @@ class ISA_MIPS64_NOT_64R6 {
 class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
 class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
 class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
+class ISA_MICROMIPS { list<Predicate> InsnPredicates = [InMicroMips]; }
 class ISA_MICROMIPS32R6 {
   list<Predicate> InsnPredicates = [HasMicroMips32r6];
 }
+class ISA_MICROMIPS64R6 {
+  list<Predicate> InsnPredicates = [HasMicroMips64r6];
+}
+class ISA_MICROMIPS32_NOT_MIPS32R6 {
+  list<Predicate> InsnPredicates = [InMicroMips, NotMips32r6];
+}
+
+class INSN_EVA { list<Predicate> InsnPredicates = [HasEVA]; }
+class INSN_EVA_NOT_32R6_64R6 {
+  list<Predicate> InsnPredicates = [NotMips32r6, NotMips64r6, HasEVA];
+}
 
 // The portions of MIPS-III that were also added to MIPS32
 class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
@@ -283,6 +311,28 @@ class INSN_MIPS5_32R2_NOT_32R6_64R6 {
   list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6];
 }
 
+class ASE_CNMIPS {
+  list<Predicate> InsnPredicates = [HasCnMips];
+}
+
+class ASE_MSA {
+  list<Predicate> InsnPredicates = [HasMSA];
+}
+
+class ASE_MSA_NOT_MSA64 {
+  list<Predicate> InsnPredicates = [HasMSA, NotMips64];
+}
+
+class ASE_MSA64 {
+  list<Predicate> InsnPredicates = [HasMSA, HasMips64];
+}
+
+// Class used for separating microMIPSr6 and microMIPS (r3) instruction.
+// It can be used only on instructions that doesn't inherit PredicateControl.
+class ISA_MICROMIPS_NOT_32R6_64R6 : PredicateControl {
+  let InsnPredicates = [InMicroMips, NotMips32r6, NotMips64r6];
+}
+
 //===----------------------------------------------------------------------===//
 
 class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
@@ -335,6 +385,81 @@ include "MipsInstrFormats.td"
 // Mips Operand, Complex Patterns and Transformations Definitions.
 //===----------------------------------------------------------------------===//
 
+class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "ConstantSImm" # Bits;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isConstantSImm<" # Bits # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "SImm" # Bits;
+}
+
+class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+                                  int Offset = 0> : AsmOperandClass {
+  let Name = "ConstantUImm" # Bits # "_" # Offset;
+  let RenderMethod = "addConstantUImmOperands<" # Bits # ", " # Offset # ">";
+  let PredicateMethod = "isConstantUImm<" # Bits # ", " # Offset # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "UImm" # Bits # "_" # Offset;
+}
+
+def ConstantUImm10AsmOperandClass
+    : ConstantUImmAsmOperandClass<10, []>;
+def ConstantUImm8AsmOperandClass
+    : ConstantUImmAsmOperandClass<8, [ConstantUImm10AsmOperandClass]>;
+def ConstantUImm7AsmOperandClass
+    : ConstantUImmAsmOperandClass<7, [ConstantUImm8AsmOperandClass]>;
+def ConstantUImm6AsmOperandClass
+    : ConstantUImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantSImm6AsmOperandClass
+    : ConstantSImmAsmOperandClass<6, [ConstantUImm7AsmOperandClass]>;
+def ConstantUImm5Plus1AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 1>;
+def ConstantUImm5Plus32AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>;
+def ConstantUImm5Plus33AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 33>;
+def ConstantUImm5Plus32NormalizeAsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> {
+  let Name = "ConstantUImm5_32_Norm";
+  // We must also subtract 32 when we render the operand.
+  let RenderMethod = "addConstantUImmOperands<5, 32, -32>";
+}
+def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "UImm5Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledUImm<5, 2>";
+  let SuperClasses = [ConstantUImm6AsmOperandClass];
+  let DiagnosticType = "UImm5_Lsl2";
+}
+def ConstantUImm5ReportUImm6AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> {
+  let Name = "ConstantUImm5_0_Report_UImm6";
+  let DiagnosticType = "UImm5_0_Report_UImm6";
+}
+def ConstantUImm5AsmOperandClass
+    : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>;
+def ConstantUImm4AsmOperandClass
+    : ConstantUImmAsmOperandClass<
+          4, [ConstantUImm5AsmOperandClass,
+              ConstantUImm5Plus32AsmOperandClass,
+              ConstantUImm5Plus32NormalizeAsmOperandClass]>;
+def ConstantUImm3AsmOperandClass
+    : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>;
+def ConstantUImm2Plus1AsmOperandClass
+    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass], 1>;
+def ConstantUImm2AsmOperandClass
+    : ConstantUImmAsmOperandClass<2, [ConstantUImm3AsmOperandClass]>;
+def ConstantUImm1AsmOperandClass
+    : ConstantUImmAsmOperandClass<1, [ConstantUImm2AsmOperandClass]>;
+def ConstantImmzAsmOperandClass : AsmOperandClass {
+  let Name = "ConstantImmz";
+  let RenderMethod = "addConstantUImmOperands<1>";
+  let PredicateMethod = "isConstantImmz";
+  let SuperClasses = [ConstantUImm1AsmOperandClass];
+  let DiagnosticType = "Immz";
+}
+
 def MipsJumpTargetAsmOperand : AsmOperandClass {
   let Name = "JumpTarget";
   let ParserMethod = "parseJumpTarget";
@@ -360,6 +485,10 @@ def calltarget  : Operand<iPTR> {
 
 def imm64: Operand<i64>;
 
+def simm6 : Operand<i32> {
+  let ParserMatchClass = ConstantSImm6AsmOperandClass;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
 def simm9 : Operand<i32>;
 def simm10 : Operand<i32>;
 def simm11 : Operand<i32>;
@@ -380,23 +509,12 @@ def simm18_lsl3 : Operand<i32> {
   let ParserMatchClass = MipsJumpTargetAsmOperand;
 }
 
-def simm20      : Operand<i32> {
-}
+def simm20      : Operand<i32>;
+def simm32      : Operand<i32>;
 
 def uimm20      : Operand<i32> {
 }
 
-def MipsUImm10AsmOperand : AsmOperandClass {
-  let Name = "UImm10";
-  let RenderMethod = "addImmOperands";
-  let ParserMethod = "parseImm";
-  let PredicateMethod = "isUImm<10>";
-}
-
-def uimm10      : Operand<i32> {
-  let ParserMatchClass = MipsUImm10AsmOperand;
-}
-
 def simm16_64   : Operand<i64> {
   let DecoderMethod = "DecodeSimm16";
 }
@@ -404,23 +522,71 @@ def simm16_64   : Operand<i64> {
 // Zero
 def uimmz       : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
+  let ParserMatchClass = ConstantImmzAsmOperandClass;
 }
 
-// Unsigned Operand
-def uimm2 : Operand<i32> {
+// Unsigned Operands
+foreach I = {1, 2, 3, 4, 5, 6, 7, 8, 10} in
+  def uimm # I : Operand<i32> {
+    let PrintMethod = "printUnsignedImm";
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+  }
+
+def uimm2_plus1 : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
+  let EncoderMethod = "getUImmWithOffsetEncoding<2, 1>";
+  let DecoderMethod = "DecodeUImmWithOffset<2, 1>";
+  let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass;
 }
 
-def uimm3 : Operand<i32> {
+def uimm5_plus1 : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
+  let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+  let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+  let ParserMatchClass = ConstantUImm5Plus1AsmOperandClass;
 }
 
-def uimm5       : Operand<i32> {
+def uimm5_plus32 : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
+  let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass;
 }
 
-def uimm6 : Operand<i32> {
+def uimm5_plus33 : Operand<i32> {
   let PrintMethod = "printUnsignedImm";
+  let EncoderMethod = "getUImmWithOffsetEncoding<5, 1>";
+  let DecoderMethod = "DecodeUImmWithOffset<5, 1>";
+  let ParserMatchClass = ConstantUImm5Plus33AsmOperandClass;
+}
+
+def uimm5_plus32_normalize : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+  let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+def uimm5_lsl2 : Operand<OtherVT> {
+  let EncoderMethod = "getUImm5Lsl2Encoding";
+  let DecoderMethod = "DecodeUImm5lsl2";
+  let ParserMatchClass = ConstantUImm5Lsl2AsmOperandClass;
+}
+
+def uimm5_plus32_normalize_64 : Operand<i64> {
+  let PrintMethod = "printUnsignedImm";
+  let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass;
+}
+
+foreach I = {5} in
+  def uimm # I # _64 : Operand<i64> {
+    let PrintMethod = "printUnsignedImm";
+    let ParserMatchClass =
+        !cast<AsmOperandClass>("ConstantUImm" # I # "AsmOperandClass");
+  }
+
+// Like uimm5_64 but reports a less confusing error for 32-63 when
+// an instruction alias permits that.
+def uimm5_64_report_uimm6 : Operand<i64> {
+  let PrintMethod = "printUnsignedImm";
+  let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass;
 }
 
 def uimm16      : Operand<i32> {
@@ -435,6 +601,22 @@ def MipsMemAsmOperand : AsmOperandClass {
   let ParserMethod = "parseMemOperand";
 }
 
+def MipsMemSimm9AsmOperand : AsmOperandClass {
+  let Name = "MemOffsetSimm9";
+  let SuperClasses = [MipsMemAsmOperand];
+  let RenderMethod = "addMemOperands";
+  let ParserMethod = "parseMemOperand";
+  let PredicateMethod = "isMemWithSimmOffset<9>";
+}
+
+def MipsMemSimm9GPRAsmOperand : AsmOperandClass {
+  let Name = "MemOffsetSimm9GPR";
+  let SuperClasses = [MipsMemAsmOperand];
+  let RenderMethod = "addMemOperands";
+  let ParserMethod = "parseMemOperand";
+  let PredicateMethod = "isMemWithSimmOffsetGPR<9>";
+}
+
 def MipsMemSimm11AsmOperand : AsmOperandClass {
   let Name = "MemOffsetSimm11";
   let SuperClasses = [MipsMemAsmOperand];
@@ -485,6 +667,13 @@ def mem_msa : mem_generic {
 def mem_simm9 : mem_generic {
   let MIOperandInfo = (ops ptr_rc, simm9);
   let EncoderMethod = "getMemEncoding";
+  let ParserMatchClass = MipsMemSimm9AsmOperand;
+}
+
+def mem_simm9gpr : mem_generic {
+  let MIOperandInfo = (ops ptr_rc, simm9);
+  let EncoderMethod = "getMemEncoding";
+  let ParserMatchClass = MipsMemSimm9GPRAsmOperand;
 }
 
 def mem_simm11 : mem_generic {
@@ -512,12 +701,6 @@ def PtrRC : Operand<iPTR> {
   let ParserMatchClass = GPR32AsmOperand;
 }
 
-// size operand of ext instruction
-def size_ext : Operand<i32> {
-  let EncoderMethod = "getSizeExtEncoding";
-  let DecoderMethod = "DecodeExtSize";
-}
-
 // size operand of ins instruction
 def size_ins : Operand<i32> {
   let EncoderMethod = "getSizeInsEncoding";
@@ -657,7 +840,7 @@ class shift_rotate_reg<string opstr, RegisterOperand RO, InstrItinClass itin,
          [(set RO:$rd, (OpNode RO:$rt, GPR32Opnd:$rs))], itin, FrmR,
          opstr>;
 
-// Load Upper Imediate
+// Load Upper Immediate
 class LoadUpper<string opstr, RegisterOperand RO, Operand Imm>:
   InstSE<(outs RO:$rt), (ins Imm:$imm16), !strconcat(opstr, "\t$rt, $imm16"),
          [], II_LUI, FrmI, opstr>, IsAsCheapAsAMove {
@@ -675,14 +858,19 @@ class Load<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
   let mayLoad = 1;
 }
 
-class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+class StoreMemory<string opstr, DAGOperand RO, DAGOperand MO,
+            SDPatternOperator OpNode = null_frag,
             InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
-  InstSE<(outs), (ins RO:$rt, mem:$addr), !strconcat(opstr, "\t$rt, $addr"),
+  InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
   let DecoderMethod = "DecodeMem";
   let mayStore = 1;
 }
 
+class Store<string opstr, DAGOperand RO, SDPatternOperator OpNode = null_frag,
+            InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
+  StoreMemory<opstr, RO, mem, OpNode, Itin, Addr>;
+
 // Load/Store Left/Right
 let canFoldAsLoad = 1 in
 class LoadLeftRight<string opstr, SDNode OpNode, RegisterOperand RO,
@@ -740,7 +928,7 @@ class CBranch<string opstr, DAGOperand opnd, PatFrag cond_op,
               RegisterOperand RO, bit DelaySlot = 1> :
   InstSE<(outs), (ins RO:$rs, RO:$rt, opnd:$offset),
          !strconcat(opstr, "\t$rs, $rt, $offset"),
-         [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], IIBranch,
+         [(brcond (i32 (cond_op RO:$rs, RO:$rt)), bb:$offset)], II_BCC,
          FrmI, opstr> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -752,7 +940,7 @@ class CBranchZero<string opstr, DAGOperand opnd, PatFrag cond_op,
                   RegisterOperand RO, bit DelaySlot = 1> :
   InstSE<(outs), (ins RO:$rs, opnd:$offset),
          !strconcat(opstr, "\t$rs, $offset"),
-         [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], IIBranch,
+         [(brcond (i32 (cond_op RO:$rs, 0)), bb:$offset)], II_BCCZ,
          FrmI, opstr> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -778,7 +966,7 @@ class SetCC_I<string opstr, PatFrag cond_op, Operand Od, PatLeaf imm_type,
 class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
              SDPatternOperator targetoperator, string bopstr> :
   InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
-         [(operator targetoperator:$target)], IIBranch, FrmJ, bopstr> {
+         [(operator targetoperator:$target)], II_J, FrmJ, bopstr> {
   let isTerminator=1;
   let isBarrier=1;
   let hasDelaySlot = 1;
@@ -788,7 +976,7 @@ class JumpFJ<DAGOperand opnd, string opstr, SDPatternOperator operator,
 
 // Unconditional branch
 class UncondBranch<Instruction BEQInst> :
-  PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], IIBranch>,
+  PseudoSE<(outs), (ins brtarget:$offset), [(br bb:$offset)], II_B>,
   PseudoInstExpansion<(BEQInst ZERO, ZERO, brtarget:$offset)> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -802,7 +990,7 @@ class UncondBranch<Instruction BEQInst> :
 let isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
 class JumpFR<string opstr, RegisterOperand RO,
              SDPatternOperator operator = null_frag>:
-  InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], IIBranch,
+  InstSE<(outs), (ins RO:$rs), "jr\t$rs", [(operator RO:$rs)], II_JR,
          FrmR, opstr>;
 
 // Indirect branch
@@ -815,23 +1003,23 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
 let isCall=1, hasDelaySlot=1, Defs = [RA] in {
   class JumpLink<string opstr, DAGOperand opnd> :
     InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
-           [(MipsJmpLink imm:$target)], IIBranch, FrmJ, opstr> {
+           [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
     let DecoderMethod = "DecodeJumpTarget";
   }
 
   class JumpLinkRegPseudo<RegisterOperand RO, Instruction JALRInst,
                           Register RetReg, RegisterOperand ResRO = RO>:
-    PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], IIBranch>,
+    PseudoSE<(outs), (ins RO:$rs), [(MipsJmpLink RO:$rs)], II_JALR>,
     PseudoInstExpansion<(JALRInst RetReg, ResRO:$rs)>;
 
   class JumpLinkReg<string opstr, RegisterOperand RO>:
     InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
-           [], IIBranch, FrmR>;
+           [], II_JALR, FrmR, opstr>;
 
   class BGEZAL_FT<string opstr, DAGOperand opnd,
                   RegisterOperand RO, bit DelaySlot = 1> :
     InstSE<(outs), (ins RO:$rs, opnd:$offset),
-           !strconcat(opstr, "\t$rs, $offset"), [], IIBranch, FrmI, opstr> {
+           !strconcat(opstr, "\t$rs, $offset"), [], II_BCCZAL, FrmI, opstr> {
     let hasDelaySlot = DelaySlot;
   }
 
@@ -840,17 +1028,17 @@ let isCall=1, hasDelaySlot=1, Defs = [RA] in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
     hasExtraSrcRegAllocReq = 1, Defs = [AT] in {
   class TailCall<Instruction JumpInst> :
-    PseudoSE<(outs), (ins calltarget:$target), [], IIBranch>,
+    PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
     PseudoInstExpansion<(JumpInst jmptarget:$target)>;
 
   class TailCallReg<RegisterOperand RO, Instruction JRInst,
                     RegisterOperand ResRO = RO> :
-    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], IIBranch>,
+    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
     PseudoInstExpansion<(JRInst ResRO:$rs)>;
 }
 
 class BAL_BR_Pseudo<Instruction RealInst> :
-  PseudoSE<(outs), (ins brtarget:$offset), [], IIBranch>,
+  PseudoSE<(outs), (ins brtarget:$offset), [], II_BCCZAL>,
   PseudoInstExpansion<(RealInst ZERO, brtarget:$offset)> {
   let isBranch = 1;
   let isTerminator = 1;
@@ -997,9 +1185,10 @@ class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
          [(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr>;
 
 // Subword Swap
-class SubwordSwap<string opstr, RegisterOperand RO>:
-  InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
-         NoItinerary, FrmR, opstr> {
+class SubwordSwap<string opstr, RegisterOperand RO,
+                  InstrItinClass itin = NoItinerary>:
+  InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [], itin,
+         FrmR, opstr> {
   let hasSideEffects = 0;
 }
 
@@ -1010,8 +1199,8 @@ class ReadHardware<RegisterOperand CPURegOperand, RegisterOperand RO> :
 
 // Ext and Ins
 class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
-              SDPatternOperator Op = null_frag>:
-  InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
+              Operand SizeOpnd, SDPatternOperator Op = null_frag> :
+  InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, SizeOpnd:$size),
          !strconcat(opstr, " $rt, $rs, $pos, $size"),
          [(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], II_EXT,
          FrmR, opstr>, ISA_MIPS32R2;
@@ -1074,6 +1263,9 @@ class TrapBase<Instruction RealInst>
 let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, hasCtrlDep=1 in
 def RetRA : PseudoSE<(outs), (ins), [(MipsRet)]>;
 
+let isReturn=1, isTerminator=1, isBarrier=1, hasCtrlDep=1, hasSideEffects=1 in
+def ERet : PseudoSE<(outs), (ins), [(MipsERet)]>;
+
 let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
 def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins i32imm:$amt),
                                   [(callseq_start timm:$amt)]>;
@@ -1215,10 +1407,11 @@ def LH  : Load<"lh", GPR32Opnd, sextloadi16, II_LH, addrDefault>, MMRel,
           LW_FM<0x21>;
 def LHu : Load<"lhu", GPR32Opnd, zextloadi16, II_LHU>, MMRel, LW_FM<0x25>;
 let AdditionalPredicates = [NotInMicroMips] in {
-def LW  : Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
+def LW  : StdMMR6Rel, Load<"lw", GPR32Opnd, load, II_LW, addrDefault>, MMRel,
           LW_FM<0x23>;
 }
-def SB  : Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel, LW_FM<0x28>;
+def SB  : StdMMR6Rel, Store<"sb", GPR32Opnd, truncstorei8, II_SB>, MMRel,
+          LW_FM<0x28>;
 def SH  : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>;
 let AdditionalPredicates = [NotInMicroMips] in {
 def SW  : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
@@ -1259,15 +1452,17 @@ let DecoderNamespace = "COP3_" in {
 }
 }
 
-def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
-def SYNCI : MMRel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32;
+def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
 
-def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
-def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
-def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
-def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
-def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
-def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>, ISA_MIPS2;
+  def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>, ISA_MIPS2;
+  def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>, ISA_MIPS2;
+  def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>, ISA_MIPS2;
+  def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>, ISA_MIPS2;
+  def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>, ISA_MIPS2;
+}
 
 def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
            ISA_MIPS2_NOT_32R6_64R6;
@@ -1290,14 +1485,15 @@ def TRAP : TrapBase<BREAK>;
 def SDBBP : MMRel, SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6;
 
 let AdditionalPredicates = [NotInMicroMips] in {
-def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
+  def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18, 0x0>, INSN_MIPS3_32;
+  def ERETNC : MMRel, ER_FT<"eretnc">, ER_FM<0x18, 0x1>, ISA_MIPS32R5;
+  def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f, 0x0>, ISA_MIPS32;
 }
-def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
 
 let AdditionalPredicates = [NotInMicroMips] in {
-def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+  def EI : MMRel, StdMMR6Rel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+  def DI : MMRel, StdMMR6Rel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
 }
-def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
 
 let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
     AdditionalPredicates = [NotInMicroMips] in {
@@ -1359,7 +1555,8 @@ def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
 // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
 class PseudoIndirectBranchBase<RegisterOperand RO> :
-    MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> {
+    MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)],
+               II_IndirectBranchPseudo> {
   let isTerminator=1;
   let isBarrier=1;
   let hasDelaySlot = 1;
@@ -1369,12 +1566,12 @@ class PseudoIndirectBranchBase<RegisterOperand RO> :
 
 def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>;
 
-// Return instructions are matched as a RetRA instruction, then ar expanded
+// Return instructions are matched as a RetRA instruction, then are expanded
 // into PseudoReturn/PseudoReturn64 after register allocation. Finally,
 // MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the
 // ISA.
 class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs),
-                                                        [], IIBranch> {
+                                                        [], II_ReturnPseudo> {
   let isTerminator = 1;
   let isBarrier = 1;
   let hasDelaySlot = 1;
@@ -1441,8 +1638,11 @@ def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>,
 def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>,
           ISA_MIPS32_NOT_32R6_64R6;
 
-/// Word Swap Bytes Within Halfwords
-def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
+let AdditionalPredicates = [NotInMicroMips] in {
+  /// Word Swap Bytes Within Halfwords
+  def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd, II_WSBH>, SEB_FM<2, 0x20>,
+             ISA_MIPS32R2;
+}
 
 /// No operation.
 def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1485,10 +1685,12 @@ def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV,
                                0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
 def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU,
                                0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6;
-
+let AdditionalPredicates = [NotInMicroMips] in {
 def RDHWR : MMRel, ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM;
-
-def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
+}
+// TODO: Add '0 < pos+size <= 32' constraint check to ext instruction
+def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, uimm5_plus1, MipsExt>,
+          EXT_FM<0>;
 def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
 
 /// Move Control Registers From/To CPU Registers
@@ -1499,9 +1701,9 @@ def MTC2 : MTC3OP<"mtc2", COP2Opnd, GPR32Opnd>, MFC3OP_FM<0x12, 4>;
 
 class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
                                       FrmOther, asmstr>;
-def SSNOP : MMRel, Barrier<"ssnop">, BARRIER_FM<1>;
+def SSNOP : MMRel, StdMMR6Rel, Barrier<"ssnop">, BARRIER_FM<1>;
 def EHB : MMRel, Barrier<"ehb">, BARRIER_FM<3>;
-def PAUSE : MMRel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+def PAUSE : MMRel, StdMMR6Rel, Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
 
 // JR_HB and JALR_HB are defined here using the new style naming
 // scheme because some of this code is shared with Mips32r6InstrInfo.td
@@ -1562,11 +1764,60 @@ def CACHE : MMRel, CacheOp<"cache", mem>, CACHEOP_FM<0b101111>,
 def PREF :  MMRel, CacheOp<"pref", mem>, CACHEOP_FM<0b110011>,
             INSN_MIPS3_32_NOT_32R6_64R6;
 
+def ROL : MipsAsmPseudoInst<(outs),
+                            (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+                            "rol\t$rs, $rt, $rd">;
+def ROLImm : MipsAsmPseudoInst<(outs),
+                               (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+                               "rol\t$rs, $rt, $imm">;
+def : MipsInstAlias<"rol $rd, $rs",
+                    (ROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"rol $rd, $imm",
+                    (ROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def ROR : MipsAsmPseudoInst<(outs),
+                            (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+                            "ror\t$rs, $rt, $rd">;
+def RORImm : MipsAsmPseudoInst<(outs),
+                               (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+                               "ror\t$rs, $rt, $imm">;
+def : MipsInstAlias<"ror $rd, $rs",
+                    (ROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"ror $rd, $imm",
+                    (RORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>;
+
+def DROL : MipsAsmPseudoInst<(outs),
+                             (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+                             "drol\t$rs, $rt, $rd">, ISA_MIPS64;
+def DROLImm : MipsAsmPseudoInst<(outs),
+                                (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+                                "drol\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $rs",
+                    (DROL GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"drol $rd, $imm",
+                    (DROLImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
+def DROR : MipsAsmPseudoInst<(outs),
+                             (ins GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rd),
+                             "dror\t$rs, $rt, $rd">, ISA_MIPS64;
+def DRORImm : MipsAsmPseudoInst<(outs),
+                                (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+                                "dror\t$rs, $rt, $imm">, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $rs",
+                    (DROR GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>, ISA_MIPS64;
+def : MipsInstAlias<"dror $rd, $imm",
+                    (DRORImm GPR32Opnd:$rd, GPR32Opnd:$rd, simm16:$imm), 0>, ISA_MIPS64;
+
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
 def : MipsInstAlias<"move $dst, $src",
-                    (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>,
+                    (OR GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
+      GPR_32 {
+  let AdditionalPredicates = [NotInMicroMips];
+}
+def : MipsInstAlias<"move $dst, $src",
+                    (ADDu GPR32Opnd:$dst, GPR32Opnd:$src, ZERO), 1>,
       GPR_32 {
   let AdditionalPredicates = [NotInMicroMips];
 }
@@ -1630,27 +1881,27 @@ def : MipsInstAlias<"beqz $rs,$offset",
 def : MipsInstAlias<"beqzl $rs,$offset",
                     (BEQL GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
 def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
-    
+
 def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
 def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
 let AdditionalPredicates = [NotInMicroMips] in {
-def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+  def : MipsInstAlias<"ei", (EI ZERO), 1>, ISA_MIPS32R2;
+  def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
+}
+let AdditionalPredicates = [NotInMicroMips] in {
+  def : MipsInstAlias<"teq $rs, $rt",
+                      (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+  def : MipsInstAlias<"tge $rs, $rt",
+                      (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+  def : MipsInstAlias<"tgeu $rs, $rt",
+                      (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+  def : MipsInstAlias<"tlt $rs, $rt",
+                      (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+  def : MipsInstAlias<"tltu $rs, $rt",
+                      (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
+  def : MipsInstAlias<"tne $rs, $rt",
+                      (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
 }
-def : MipsInstAlias<"di", (DI ZERO), 1>, ISA_MIPS32R2;
-
-def : MipsInstAlias<"teq $rs, $rt",
-                    (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tge $rs, $rt",
-                    (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tgeu $rs, $rt",
-                    (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tlt $rs, $rt",
-                    (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tltu $rs, $rt",
-                    (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-def : MipsInstAlias<"tne $rs, $rt",
-                    (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>, ISA_MIPS2;
-
 def  : MipsInstAlias<"sll $rd, $rt, $rs",
                      (SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
 def : MipsInstAlias<"sub, $rd, $rs, $imm",
@@ -1678,7 +1929,7 @@ def : MipsInstAlias<"sync",
 class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
                      !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32 : LoadImmediate32<"li", uimm5, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", simm32, GPR32Opnd>;
 
 class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
                            RegisterOperand RO> :
@@ -1689,13 +1940,16 @@ def LoadAddrReg32 : LoadAddressFromReg32<"la", mem, GPR32Opnd>;
 class LoadAddressFromImm32<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
                      !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadAddrImm32 : LoadAddressFromImm32<"la", uimm5, GPR32Opnd>;
+def LoadAddrImm32 : LoadAddressFromImm32<"la", simm32, GPR32Opnd>;
 
 def JalTwoReg : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
                       "jal\t$rd, $rs"> ;
 def JalOneReg : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs),
                       "jal\t$rs"> ;
 
+def NORImm : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm),
+                               "nor\t$rs, $rt, $imm"> ;
+
 let hasDelaySlot = 1 in {
 def BneImm : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
                                (ins imm64:$imm64, brtarget:$offset),
@@ -1718,12 +1972,62 @@ def BLTU : CondBranchPseudo<"bltu">;
 def BLEU : CondBranchPseudo<"bleu">;
 def BGEU : CondBranchPseudo<"bgeu">;
 def BGTU : CondBranchPseudo<"bgtu">;
+def BLTL : CondBranchPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEL : CondBranchPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEL : CondBranchPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTL : CondBranchPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTUL: CondBranchPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEUL: CondBranchPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEUL: CondBranchPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTUL: CondBranchPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+class CondBranchImmPseudo<string instr_asm> :
+  MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, imm64:$imm, brtarget:$offset),
+                    !strconcat(instr_asm, "\t$rs, $imm, $offset")>;
+
+def BLTImmMacro  : CondBranchImmPseudo<"blt">;
+def BLEImmMacro  : CondBranchImmPseudo<"ble">;
+def BGEImmMacro  : CondBranchImmPseudo<"bge">;
+def BGTImmMacro  : CondBranchImmPseudo<"bgt">;
+def BLTUImmMacro : CondBranchImmPseudo<"bltu">;
+def BLEUImmMacro : CondBranchImmPseudo<"bleu">;
+def BGEUImmMacro : CondBranchImmPseudo<"bgeu">;
+def BGTUImmMacro : CondBranchImmPseudo<"bgtu">;
+def BLTLImmMacro : CondBranchImmPseudo<"bltl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLELImmMacro : CondBranchImmPseudo<"blel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGELImmMacro : CondBranchImmPseudo<"bgel">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTLImmMacro : CondBranchImmPseudo<"bgtl">, ISA_MIPS2_NOT_32R6_64R6;
+def BLTULImmMacro : CondBranchImmPseudo<"bltul">, ISA_MIPS2_NOT_32R6_64R6;
+def BLEULImmMacro : CondBranchImmPseudo<"bleul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGEULImmMacro : CondBranchImmPseudo<"bgeul">, ISA_MIPS2_NOT_32R6_64R6;
+def BGTULImmMacro : CondBranchImmPseudo<"bgtul">, ISA_MIPS2_NOT_32R6_64R6;
+
+// FIXME: Predicates are removed because instructions are matched regardless of
+// predicates, because PredicateControl was not in the hierarchy. This was
+// done to emit more precise error message from expansion function.
+// Once the tablegen-erated errors are made better, this needs to be fixed and
+// predicates needs to be restored.
+
+def SDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                                  "div\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def UDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                                  "divu\t$rs, $rt">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def DSDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                                   "ddiv\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def DUDivMacro : MipsAsmPseudoInst<(outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                                   "ddivu\t$rs, $rt">; //, ISA_MIPS64_NOT_64R6;
+
+def Ulh : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+                            "ulh\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
 
 def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
-                             "ulhu\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+                             "ulhu\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
 
 def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
-                            "ulw\t$rt, $addr">, ISA_MIPS1_NOT_32R6_64R6;
+                            "ulw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
 
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
@@ -1939,6 +2243,16 @@ let AddedComplexity = 40 in {
   }
 }
 
+// Atomic load patterns.
+def : MipsPat<(atomic_load_8 addr:$a), (LB addr:$a)>;
+def : MipsPat<(atomic_load_16 addr:$a), (LH addr:$a)>;
+def : MipsPat<(atomic_load_32 addr:$a), (LW addr:$a)>;
+
+// Atomic store patterns.
+def : MipsPat<(atomic_store_8 addr:$a, GPR32:$v), (SB GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_16 addr:$a, GPR32:$v), (SH GPR32:$v, addr:$a)>;
+def : MipsPat<(atomic_store_32 addr:$a, GPR32:$v), (SW GPR32:$v, addr:$a)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
@@ -1964,6 +2278,10 @@ include "MipsDSPInstrInfo.td"
 include "MipsMSAInstrFormats.td"
 include "MipsMSAInstrInfo.td"
 
+// EVA
+include "MipsEVAInstrFormats.td"
+include "MipsEVAInstrInfo.td"
+
 // Micromips
 include "MicroMipsInstrFormats.td"
 include "MicroMipsInstrInfo.td"
@@ -1972,3 +2290,11 @@ include "MicroMipsInstrFPU.td"
 // Micromips r6
 include "MicroMips32r6InstrFormats.td"
 include "MicroMips32r6InstrInfo.td"
+
+// Micromips64 r6
+include "MicroMips64r6InstrFormats.td"
+include "MicroMips64r6InstrInfo.td"
+
+// Micromips DSP
+include "MicroMipsDSPInstrFormats.td"
+include "MicroMipsDSPInstrInfo.td"
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 90f8cc0cacfd..49fb99a8ec43 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -148,7 +148,7 @@ void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
   // Insert NewMBB and fix control flow.
   MachineBasicBlock *Tgt = getTargetMBB(*FirstBr);
   NewMBB->transferSuccessors(MBB);
-  NewMBB->removeSuccessor(Tgt);
+  NewMBB->removeSuccessor(Tgt, true);
   MBB->addSuccessor(NewMBB);
   MBB->addSuccessor(Tgt);
   MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
@@ -161,7 +161,7 @@ void MipsLongBranch::initMBBInfo() {
   // Split the MBBs if they have two branches. Each basic block should have at
   // most one branch after this loop is executed.
   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E;)
-    splitMBB(I++);
+    splitMBB(&*I++);
 
   MF->RenumberBlocks();
   MBBInfos.clear();
@@ -262,8 +262,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
       static_cast<const MipsInstrInfo *>(Subtarget.getInstrInfo());
 
   MF->insert(FallThroughMBB, LongBrMBB);
-  MBB->removeSuccessor(TgtMBB);
-  MBB->addSuccessor(LongBrMBB);
+  MBB->replaceSuccessor(TgtMBB, LongBrMBB);
 
   if (IsPIC) {
     MachineBasicBlock *BalTgtMBB = MF->CreateMachineBasicBlock(BB);
@@ -434,7 +433,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
     I.Br->addOperand(MachineOperand::CreateMBB(LongBrMBB));
   } else
     // Change branch destination and reverse condition.
-    replaceBranch(*MBB, I.Br, DL, FallThroughMBB);
+    replaceBranch(*MBB, I.Br, DL, &*FallThroughMBB);
 }
 
 static void emitGPDisp(MachineFunction &F, const MipsInstrInfo *TII) {
diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td
index bff2d0fab1ec..7d25ea56e3d5 100644
--- a/lib/Target/Mips/MipsMSAInstrFormats.td
+++ b/lib/Target/Mips/MipsMSAInstrFormats.td
@@ -7,18 +7,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-def HasMSA : Predicate<"Subtarget->hasMSA()">,
-             AssemblerPredicate<"FeatureMSA">;
-
-class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> {
-  let Predicates = [HasMSA];
+class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+                PredicateControl, ASE_MSA {
+  let EncodingPredicates = [HasStdEnc];
   let Inst{31-26} = 0b011110;
 }
 
-class MSA64Inst : MSAInst {
-  let Predicates = [HasMSA, HasMips64];
-}
-
 class MSACBranch : MSAInst {
   let Inst{31-26} = 0b010001;
 }
@@ -27,10 +21,6 @@ class MSASpecial : MSAInst {
   let Inst{31-26} = 0b000000;
 }
 
-class MSA64Special : MSA64Inst {
-  let Inst{31-26} = 0b000000;
-}
-
 class MSAPseudo<dag outs, dag ins, list<dag> pattern,
                 InstrItinClass itin = IIPseudo>:
   MipsPseudo<outs, ins, pattern, itin> {
@@ -100,7 +90,7 @@ class MSA_2R_FILL_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
   let Inst{5-0} = minor;
 }
 
-class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSA64Inst {
+class MSA_2R_FILL_D_FMT<bits<8> major, bits<2> df, bits<6> minor>: MSAInst {
   bits<5> rs;
   bits<5> wd;
 
@@ -293,7 +283,7 @@ class MSA_ELM_COPY_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
   let Inst{5-0} = minor;
 }
 
-class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_COPY_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
   bits<4> n;
   bits<5> ws;
   bits<5> rd;
@@ -345,7 +335,7 @@ class MSA_ELM_INSERT_W_FMT<bits<4> major, bits<6> minor>: MSAInst {
   let Inst{5-0} = minor;
 }
 
-class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSA64Inst {
+class MSA_ELM_INSERT_D_FMT<bits<4> major, bits<6> minor>: MSAInst {
   bits<6> n;
   bits<5> rs;
   bits<5> wd;
@@ -450,7 +440,7 @@ class SPECIAL_LSA_FMT<bits<6> minor>: MSASpecial {
   let Inst{5-0} = minor;
 }
 
-class SPECIAL_DLSA_FMT<bits<6> minor>: MSA64Special {
+class SPECIAL_DLSA_FMT<bits<6> minor>: MSASpecial {
   bits<5> rs;
   bits<5> rt;
   bits<5> rd;
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 970e98ea9e1e..eacfcec78bc7 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -63,30 +63,13 @@ def MipsVExtractSExt : SDNode<"MipsISD::VEXTRACT_SEXT_ELT",
 def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
     SDTypeProfile<1, 3, [SDTCisPtrTy<2>]>, []>;
 
+def immZExt1Ptr : ImmLeaf<iPTR, [{return isUInt<1>(Imm);}]>;
+def immZExt2Ptr : ImmLeaf<iPTR, [{return isUInt<2>(Imm);}]>;
 def immZExt4Ptr : ImmLeaf<iPTR, [{return isUInt<4>(Imm);}]>;
 def immZExt6Ptr : ImmLeaf<iPTR, [{return isUInt<6>(Imm);}]>;
 
 // Operands
 
-// The immediate of an LSA instruction needs special handling
-// as the encoded value should be subtracted by one.
-def uimm2LSAAsmOperand : AsmOperandClass {
-  let Name = "LSAImm";
-  let ParserMethod = "parseLSAImm";
-  let RenderMethod = "addImmOperands";
-}
-
-def LSAImm : Operand<i32> {
-  let PrintMethod = "printUnsignedImm";
-  let EncoderMethod = "getLSAImmEncoding";
-  let DecoderMethod = "DecodeLSAImm";
-  let ParserMatchClass = uimm2LSAAsmOperand;
-}
-
-def uimm4 : Operand<i32> {
-  let PrintMethod = "printUnsignedImm8";
-}
-
 def uimm4_ptr : Operand<iPTR> {
   let PrintMethod = "printUnsignedImm8";
 }
@@ -95,10 +78,6 @@ def uimm6_ptr : Operand<iPTR> {
   let PrintMethod = "printUnsignedImm8";
 }
 
-def uimm8 : Operand<i32> {
-  let PrintMethod = "printUnsignedImm8";
-}
-
 def simm5 : Operand<i32>;
 
 def vsplat_uimm1 : Operand<vAny> {
@@ -639,7 +618,6 @@ class COPY_S_D_ENC : MSA_ELM_COPY_D_FMT<0b0010, 0b011001>;
 class COPY_U_B_ENC : MSA_ELM_COPY_B_FMT<0b0011, 0b011001>;
 class COPY_U_H_ENC : MSA_ELM_COPY_H_FMT<0b0011, 0b011001>;
 class COPY_U_W_ENC : MSA_ELM_COPY_W_FMT<0b0011, 0b011001>;
-class COPY_U_D_ENC : MSA_ELM_COPY_D_FMT<0b0011, 0b011001>;
 
 class CTCMSA_ENC : MSA_ELM_CTCMSA_FMT<0b0000111110, 0b011001>;
 
@@ -1195,47 +1173,14 @@ class MSA_BIT_D_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
   InstrItinClass Itinerary = itin;
 }
 
-// This class is deprecated and will be removed soon.
-class MSA_BIT_B_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                            RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
-                            InstrItinClass itin = NoItinerary> {
+class MSA_BIT_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
+                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
+                          InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWS:$ws, uimm3:$m);
+  dag InOperandList = (ins ROWS:$ws, ImmOp:$m);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))];
-  InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_H_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                            RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
-                            InstrItinClass itin = NoItinerary> {
-  dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWS:$ws, uimm4:$m);
-  string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))];
-  InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_W_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                            RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
-                            InstrItinClass itin = NoItinerary> {
-  dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWS:$ws, uimm5:$m);
-  string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))];
-  InstrItinClass Itinerary = itin;
-}
-
-// This class is deprecated and will be removed soon.
-class MSA_BIT_D_X_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                            RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
-                            InstrItinClass itin = NoItinerary> {
-  dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWS:$ws, uimm6:$m);
-  string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m");
-  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))];
+  list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))];
   InstrItinClass Itinerary = itin;
 }
 
@@ -1291,13 +1236,14 @@ class MSA_COPY_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 }
 
 class MSA_ELM_SLD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                            RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                            RegisterOperand ROWD, RegisterOperand ROWS,
+                            Operand ImmOp, ImmLeaf Imm,
                             InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, uimm4:$n);
+  dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, ImmOp:$n);
   string AsmString = !strconcat(instr_asm, "\t$wd, $ws[$n]");
   list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, ROWS:$ws,
-                                              immZExt4:$n))];
+                                              Imm:$n))];
   string Constraints = "$wd = $wd_in";
   InstrItinClass Itinerary = itin;
 }
@@ -1479,7 +1425,7 @@ class MSA_CBRANCH_DESC_BASE<string instr_asm, RegisterOperand ROWD> {
   dag InOperandList = (ins ROWD:$wt, brtarget:$offset);
   string AsmString = !strconcat(instr_asm, "\t$wt, $offset");
   list<dag> Pattern = [];
-  InstrItinClass Itinerary = IIBranch;
+  InstrItinClass Itinerary = NoItinerary;
   bit isBranch = 1;
   bit isTerminator = 1;
   bit hasDelaySlot = 1;
@@ -1519,13 +1465,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
 }
 
 class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
-                          RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
+                          Operand ImmOp, ImmLeaf Imm, RegisterOperand ROWD,
+                          RegisterOperand ROWS = ROWD,
                           InstrItinClass itin = NoItinerary> {
   dag OutOperandList = (outs ROWD:$wd);
-  dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2);
+  dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2);
   string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]");
   list<dag> Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in,
-                                              immZExt6:$n,
+                                              Imm:$n,
                                               ROWS:$ws,
                                               immz:$n2))];
   InstrItinClass Itinerary = itin;
@@ -1934,8 +1881,6 @@ class COPY_U_H_DESC : MSA_COPY_DESC_BASE<"copy_u.h", vextract_zext_i16, v8i16,
                                          GPR32Opnd, MSA128HOpnd>;
 class COPY_U_W_DESC : MSA_COPY_DESC_BASE<"copy_u.w", vextract_zext_i32, v4i32,
                                          GPR32Opnd, MSA128WOpnd>;
-class COPY_U_D_DESC : MSA_COPY_DESC_BASE<"copy_u.d", vextract_zext_i64, v2i64,
-                                         GPR64Opnd, MSA128DOpnd>;
 
 class COPY_FW_PSEUDO_DESC : MSA_COPY_PSEUDO_BASE<vector_extract, v4f32, FGR32,
                                                  MSA128W>;
@@ -2346,13 +2291,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC :
 class INSERT_FD_VIDX64_PSEUDO_DESC :
     MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd, GPR64Opnd>;
 
-class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8,
+class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4,
                                          MSA128BOpnd>;
-class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16,
+class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3,
                                          MSA128HOpnd>;
-class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32,
+class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2,
                                          MSA128WOpnd>;
-class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64,
+class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1,
                                          MSA128DOpnd>;
 
 class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -2381,7 +2326,7 @@ class LSA_DESC_BASE<string instr_asm, RegisterOperand RORD,
                     RegisterOperand RORS = RORD, RegisterOperand RORT = RORD,
                     InstrItinClass itin = NoItinerary > {
   dag OutOperandList = (outs RORD:$rd);
-  dag InOperandList = (ins RORS:$rs, RORT:$rt, LSAImm:$sa);
+  dag InOperandList = (ins RORS:$rs, RORT:$rt, uimm2_plus1:$sa);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $sa");
   list<dag> Pattern = [(set RORD:$rd, (add RORT:$rt,
                                                 (shl RORS:$rs,
@@ -2561,23 +2506,23 @@ class PCNT_H_DESC : MSA_2R_DESC_BASE<"pcnt.h", ctpop, MSA128HOpnd>;
 class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>;
 class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>;
 
-class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b,
-                                           MSA128BOpnd>;
-class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h,
-                                           MSA128HOpnd>;
-class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w,
-                                           MSA128WOpnd>;
-class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d,
-                                           MSA128DOpnd>;
+class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3,
+                                         immZExt3, MSA128BOpnd>;
+class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4,
+                                         immZExt4, MSA128HOpnd>;
+class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5,
+                                         immZExt5, MSA128WOpnd>;
+class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6,
+                                         immZExt6, MSA128DOpnd>;
 
-class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b,
-                                           MSA128BOpnd>;
-class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h,
-                                           MSA128HOpnd>;
-class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w,
-                                           MSA128WOpnd>;
-class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d,
-                                           MSA128DOpnd>;
+class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3,
+                                         immZExt3, MSA128BOpnd>;
+class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4,
+                                         immZExt4, MSA128HOpnd>;
+class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5,
+                                         immZExt5, MSA128WOpnd>;
+class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6,
+                                         immZExt6, MSA128DOpnd>;
 
 class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>;
 class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>;
@@ -2589,13 +2534,17 @@ class SLD_W_DESC : MSA_3R_SLD_DESC_BASE<"sld.w", int_mips_sld_w, MSA128WOpnd>;
 class SLD_D_DESC : MSA_3R_SLD_DESC_BASE<"sld.d", int_mips_sld_d, MSA128DOpnd>;
 
 class SLDI_B_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.b", int_mips_sldi_b,
-                                          MSA128BOpnd>;
+                                          MSA128BOpnd, MSA128BOpnd, uimm4,
+                                          immZExt4>;
 class SLDI_H_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.h", int_mips_sldi_h,
-                                          MSA128HOpnd>;
+                                          MSA128HOpnd, MSA128HOpnd, uimm3,
+                                          immZExt3>;
 class SLDI_W_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.w", int_mips_sldi_w,
-                                          MSA128WOpnd>;
+                                          MSA128WOpnd, MSA128WOpnd, uimm2,
+                                          immZExt2>;
 class SLDI_D_DESC : MSA_ELM_SLD_DESC_BASE<"sldi.d", int_mips_sldi_d,
-                                          MSA128DOpnd>;
+                                          MSA128DOpnd, MSA128DOpnd, uimm1,
+                                          immZExt1>;
 
 class SLL_B_DESC : MSA_3R_DESC_BASE<"sll.b", shl, MSA128BOpnd>;
 class SLL_H_DESC : MSA_3R_DESC_BASE<"sll.h", shl, MSA128HOpnd>;
@@ -2648,14 +2597,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>;
 class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>;
 class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>;
 
-class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b,
-                                           MSA128BOpnd>;
-class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h,
-                                           MSA128HOpnd>;
-class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w,
-                                           MSA128WOpnd>;
-class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d,
-                                           MSA128DOpnd>;
+class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3,
+                                         immZExt3, MSA128BOpnd>;
+class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4,
+                                         immZExt4, MSA128HOpnd>;
+class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5,
+                                         immZExt5, MSA128WOpnd>;
+class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6,
+                                         immZExt6, MSA128DOpnd>;
 
 class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>;
 class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>;
@@ -2676,14 +2625,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>;
 class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>;
 class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>;
 
-class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b,
-                                           MSA128BOpnd>;
-class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h,
-                                           MSA128HOpnd>;
-class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w,
-                                           MSA128WOpnd>;
-class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d,
-                                           MSA128DOpnd>;
+class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3,
+                                         immZExt3, MSA128BOpnd>;
+class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4,
+                                         immZExt4, MSA128HOpnd>;
+class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5,
+                                         immZExt5, MSA128WOpnd>;
+class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6,
+                                         immZExt6, MSA128DOpnd>;
 
 class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
                    ValueType TyNode, RegisterOperand ROWD,
@@ -2991,12 +2940,11 @@ def CLTI_U_D : CLTI_U_D_ENC, CLTI_U_D_DESC;
 def COPY_S_B : COPY_S_B_ENC, COPY_S_B_DESC;
 def COPY_S_H : COPY_S_H_ENC, COPY_S_H_DESC;
 def COPY_S_W : COPY_S_W_ENC, COPY_S_W_DESC;
-def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC;
+def COPY_S_D : COPY_S_D_ENC, COPY_S_D_DESC, ASE_MSA64;
 
 def COPY_U_B : COPY_U_B_ENC, COPY_U_B_DESC;
 def COPY_U_H : COPY_U_H_ENC, COPY_U_H_DESC;
-def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC;
-def COPY_U_D : COPY_U_D_ENC, COPY_U_D_DESC;
+def COPY_U_W : COPY_U_W_ENC, COPY_U_W_DESC, ASE_MSA64;
 
 def COPY_FW_PSEUDO : COPY_FW_PSEUDO_DESC;
 def COPY_FD_PSEUDO : COPY_FD_PSEUDO_DESC;
@@ -3108,7 +3056,7 @@ def FFQR_D : FFQR_D_ENC, FFQR_D_DESC;
 def FILL_B : FILL_B_ENC, FILL_B_DESC;
 def FILL_H : FILL_H_ENC, FILL_H_DESC;
 def FILL_W : FILL_W_ENC, FILL_W_DESC;
-def FILL_D : FILL_D_ENC, FILL_D_DESC;
+def FILL_D : FILL_D_ENC, FILL_D_DESC, ASE_MSA64;
 def FILL_FW_PSEUDO : FILL_FW_PSEUDO_DESC;
 def FILL_FD_PSEUDO : FILL_FD_PSEUDO_DESC;
 
@@ -3238,7 +3186,7 @@ def ILVR_D : ILVR_D_ENC, ILVR_D_DESC;
 def INSERT_B : INSERT_B_ENC, INSERT_B_DESC;
 def INSERT_H : INSERT_H_ENC, INSERT_H_DESC;
 def INSERT_W : INSERT_W_ENC, INSERT_W_DESC;
-def INSERT_D : INSERT_D_ENC, INSERT_D_DESC;
+def INSERT_D : INSERT_D_ENC, INSERT_D_DESC, ASE_MSA64;
 
 // INSERT_FW_PSEUDO defined after INSVE_W
 // INSERT_FD_PSEUDO defined after INSVE_D
@@ -3280,7 +3228,7 @@ def LDI_W : LDI_W_ENC, LDI_W_DESC;
 def LDI_D : LDI_D_ENC, LDI_D_DESC;
 
 def LSA : LSA_ENC, LSA_DESC;
-def DLSA : DLSA_ENC, DLSA_DESC;
+def DLSA : DLSA_ENC, DLSA_DESC, ASE_MSA64;
 
 def MADD_Q_H : MADD_Q_H_ENC, MADD_Q_H_DESC;
 def MADD_Q_W : MADD_Q_W_ENC, MADD_Q_W_DESC;
@@ -3787,6 +3735,28 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
 def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
                                                MSA128B, NoItinerary>;
 
+// Vector extraction with fixed index.
+//
+// Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
+// COPY_U_W, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a MIPS64 can execute MIPS32 code without getting
+// different register values.
+def : MSAPat<(vextract_zext_i32 (v4i32 MSA128W:$ws), immZExt2Ptr:$idx),
+             (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+def : MSAPat<(vextract_zext_i32 (v4f32 MSA128W:$ws), immZExt2Ptr:$idx),
+             (COPY_S_W MSA128W:$ws, immZExt2:$idx)>, ASE_MSA_NOT_MSA64;
+
+// Extracting 64-bit values on MSA64 should always use COPY_S_D rather than
+// COPY_U_D, even for the zero-extended case. This is because our forward
+// compatibility strategy is to consider registers to be infinitely
+// sign-extended so that a hypothetical MIPS128 would be able to execute MIPS64
+// code without getting different register values.
+def : MSAPat<(vextract_zext_i64 (v2i64 MSA128D:$ws), immZExt1Ptr:$idx),
+             (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+def : MSAPat<(vextract_zext_i64 (v2f64 MSA128D:$ws), immZExt1Ptr:$idx),
+             (COPY_S_D MSA128D:$ws, immZExt1:$idx)>, ASE_MSA64;
+
 // Vector extraction with variable index
 def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)),
              (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws,
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index 0d1ee046f0dc..c7d2738af1d4 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -24,42 +24,6 @@ static cl::opt<bool>
 FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
                  cl::desc("Always use $gp as the global base register."));
 
-// class MipsCallEntry.
-MipsCallEntry::MipsCallEntry(StringRef N) {
-#ifndef NDEBUG
-  Name = N;
-  Val = nullptr;
-#endif
-}
-
-MipsCallEntry::MipsCallEntry(const GlobalValue *V) {
-#ifndef NDEBUG
-  Val = V;
-#endif
-}
-
-bool MipsCallEntry::isConstant(const MachineFrameInfo *) const {
-  return false;
-}
-
-bool MipsCallEntry::isAliased(const MachineFrameInfo *) const {
-  return false;
-}
-
-bool MipsCallEntry::mayAlias(const MachineFrameInfo *) const {
-  return false;
-}
-
-void MipsCallEntry::printCustom(raw_ostream &O) const {
-  O << "MipsCallEntry: ";
-#ifndef NDEBUG
-  if (Val)
-    O << Val->getName();
-  else
-    O << Name;
-#endif
-}
-
 MipsFunctionInfo::~MipsFunctionInfo() {}
 
 bool MipsFunctionInfo::globalBaseRegSet() const {
@@ -111,27 +75,32 @@ void MipsFunctionInfo::createEhDataRegsFI() {
   }
 }
 
+void MipsFunctionInfo::createISRRegFI() {
+  // ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers.
+  // The current implementation only supports Mips32r2+ not Mips64rX. Status
+  // is always 32 bits, ErrorPC is 32 or 64 bits dependant on architecture,
+  // however Mips32r2+ is the supported architecture.
+  const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+
+  for (int I = 0; I < 2; ++I)
+    ISRDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(
+        RC->getSize(), RC->getAlignment(), false);
+}
+
 bool MipsFunctionInfo::isEhDataRegFI(int FI) const {
   return CallsEhReturn && (FI == EhDataRegFI[0] || FI == EhDataRegFI[1]
                         || FI == EhDataRegFI[2] || FI == EhDataRegFI[3]);
 }
 
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(StringRef Name) {
-  std::unique_ptr<const MipsCallEntry> &E = ExternalCallEntries[Name];
-
-  if (!E)
-    E = llvm::make_unique<MipsCallEntry>(Name);
-
-  return MachinePointerInfo(E.get());
+bool MipsFunctionInfo::isISRRegFI(int FI) const {
+  return IsISR && (FI == ISRDataRegFI[0] || FI == ISRDataRegFI[1]);
+}
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const char *ES) {
+  return MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES));
 }
 
-MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *Val) {
-  std::unique_ptr<const MipsCallEntry> &E = GlobalCallEntries[Val];
-
-  if (!E)
-    E = llvm::make_unique<MipsCallEntry>(Val);
-
-  return MachinePointerInfo(E.get());
+MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
+  return MachinePointerInfo(MF.getPSVManager().getGlobalValueCallEntry(GV));
 }
 
 int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 32436efa2eda..a2f6ee03604f 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -15,12 +15,10 @@
 #define LLVM_LIB_TARGET_MIPS_MIPSMACHINEFUNCTION_H
 
 #include "Mips16HardFloatInfo.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetMachine.h"
@@ -30,31 +28,13 @@
 
 namespace llvm {
 
-/// \brief A class derived from PseudoSourceValue that represents a GOT entry
-/// resolved by lazy-binding.
-class MipsCallEntry : public PseudoSourceValue {
-public:
-  explicit MipsCallEntry(StringRef N);
-  explicit MipsCallEntry(const GlobalValue *V);
-  bool isConstant(const MachineFrameInfo *) const override;
-  bool isAliased(const MachineFrameInfo *) const override;
-  bool mayAlias(const MachineFrameInfo *) const override;
-
-private:
-  void printCustom(raw_ostream &O) const override;
-#ifndef NDEBUG
-  std::string Name;
-  const GlobalValue *Val;
-#endif
-};
-
 /// MipsFunctionInfo - This class is derived from MachineFunction private
 /// Mips target-specific information for each MachineFunction.
 class MipsFunctionInfo : public MachineFunctionInfo {
 public:
   MipsFunctionInfo(MachineFunction &MF)
       : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), Mips16SPAliasReg(0),
-        VarArgsFrameIndex(0), CallsEhReturn(false), SaveS2(false),
+        VarArgsFrameIndex(0), CallsEhReturn(false), IsISR(false), SaveS2(false),
         MoveF64ViaSpillFI(-1) {}
 
   ~MipsFunctionInfo();
@@ -86,13 +66,21 @@ public:
   int getEhDataRegFI(unsigned Reg) const { return EhDataRegFI[Reg]; }
   bool isEhDataRegFI(int FI) const;
 
-  /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
-  /// representing a GOT entry for an external function.
-  MachinePointerInfo callPtrInfo(StringRef Name);
+  /// Create a MachinePointerInfo that has an ExternalSymbolPseudoSourceValue
+  /// object representing a GOT entry for an external function.
+  MachinePointerInfo callPtrInfo(const char *ES);
 
-  /// \brief Create a MachinePointerInfo that has a MipsCallEntr object
+  // Functions with the "interrupt" attribute require special prologues,
+  // epilogues and additional spill slots.
+  bool isISR() const { return IsISR; }
+  void setISR() { IsISR = true; }
+  void createISRRegFI();
+  int getISRRegFI(unsigned Reg) const { return ISRDataRegFI[Reg]; }
+  bool isISRRegFI(int FI) const;
+
+  /// Create a MachinePointerInfo that has a GlobalValuePseudoSourceValue object
   /// representing a GOT entry for a global function.
-  MachinePointerInfo callPtrInfo(const GlobalValue *Val);
+  MachinePointerInfo callPtrInfo(const GlobalValue *GV);
 
   void setSaveS2() { SaveS2 = true; }
   bool hasSaveS2() const { return SaveS2; }
@@ -136,17 +124,18 @@ private:
   /// Frame objects for spilling eh data registers.
   int EhDataRegFI[4];
 
+  /// ISR - Whether the function is an Interrupt Service Routine.
+  bool IsISR;
+
+  /// Frame objects for spilling C0_STATUS, C0_EPC
+  int ISRDataRegFI[2];
+
   // saveS2
   bool SaveS2;
 
   /// FrameIndex for expanding BuildPairF64 nodes to spill and reload when the
   /// O32 FPXX ABI is enabled. -1 is used to denote invalid index.
   int MoveF64ViaSpillFI;
-
-  /// MipsCallEntry maps.
-  StringMap<std::unique_ptr<const MipsCallEntry>> ExternalCallEntries;
-  ValueMap<const GlobalValue *, std::unique_ptr<const MipsCallEntry>>
-      GlobalCallEntries;
 };
 
 } // end of namespace llvm
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index f6647e6a8468..28e5a425849f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -84,6 +84,16 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 const MCPhysReg *
 MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const MipsSubtarget &Subtarget = MF->getSubtarget<MipsSubtarget>();
+  const Function *F = MF->getFunction();
+  if (F->hasFnAttribute("interrupt")) {
+    if (Subtarget.hasMips64())
+      return Subtarget.hasMips64r6() ? CSR_Interrupt_64R6_SaveList
+                                     : CSR_Interrupt_64_SaveList;
+    else
+      return Subtarget.hasMips32r6() ? CSR_Interrupt_32R6_SaveList
+                                     : CSR_Interrupt_32_SaveList;
+  }
+
   if (Subtarget.isSingleFloat())
     return CSR_SingleFloatOnly_SaveList;
 
@@ -284,6 +294,16 @@ getFrameRegister(const MachineFunction &MF) const {
 }
 
 bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  // Avoid realigning functions that explicitly do not want to be realigned.
+  // Normally, we should report an error when a function should be dynamically
+  // realigned but also has the attribute no-realign-stack. Unfortunately,
+  // with this attribute, MachineFrameInfo clamps each new object's alignment
+  // to that of the stack's alignment as specified by the ABI. As a result,
+  // the information of whether we have objects with larger alignment
+  // requirement than the stack's alignment is already lost at this point.
+  if (!TargetRegisterInfo::canRealignStack(MF))
+    return false;
+
   const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
   unsigned FP = Subtarget.isGP32bit() ? Mips::FP : Mips::FP_64;
   unsigned BP = Subtarget.isGP32bit() ? Mips::S7 : Mips::S7_64;
@@ -306,42 +326,3 @@ bool MipsRegisterInfo::canRealignStack(const MachineFunction &MF) const {
   // sized objects.
   return MF.getRegInfo().canReserveReg(BP);
 }
-
-bool MipsRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const MipsSubtarget &Subtarget = MF.getSubtarget<MipsSubtarget>();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-  bool CanRealign = canRealignStack(MF);
-
-  // Avoid realigning functions that explicitly do not want to be realigned.
-  // Normally, we should report an error when a function should be dynamically
-  // realigned but also has the attribute no-realign-stack. Unfortunately,
-  // with this attribute, MachineFrameInfo clamps each new object's alignment
-  // to that of the stack's alignment as specified by the ABI. As a result,
-  // the information of whether we have objects with larger alignment
-  // requirement than the stack's alignment is already lost at this point.
-  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
-    return false;
-
-  const Function *F = MF.getFunction();
-  if (F->hasFnAttribute(Attribute::StackAlignment)) {
-#ifdef DEBUG
-    if (!CanRealign)
-      DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
-            << F->getName() << "\n");
-#endif
-    return CanRealign;
-  }
-
-  unsigned StackAlignment = Subtarget.getFrameLowering()->getStackAlignment();
-  if (MFI->getMaxAlignment() > StackAlignment) {
-#ifdef DEBUG
-    if (!CanRealign)
-      DEBUG(dbgs() << "It's not possible to realign the stack of the function: "
-            << F->getName() << "\n");
-#endif
-    return CanRealign;
-  }
-
-  return false;
-}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index ee1f6bcd7390..5de68a21b73e 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -61,9 +61,7 @@ public:
                                        RegScavenger *RS = nullptr) const;
 
   // Stack realignment queries.
-  bool canRealignStack(const MachineFunction &MF) const;
-
-  bool needsStackRealignment(const MachineFunction &MF) const override;
+  bool canRealignStack(const MachineFunction &MF) const override;
 
   /// Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 096b3bee5d07..a4abd62ee607 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -17,6 +17,7 @@
 #include "MipsMachineFunction.h"
 #include "MipsSEInstrInfo.h"
 #include "MipsSubtarget.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -319,6 +320,15 @@ bool ExpandPseudo::expandBuildPairF64(MachineBasicBlock &MBB,
 bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator I,
                                            bool FP64) const {
+  const MachineOperand &Op1 = I->getOperand(1);
+  const MachineOperand &Op2 = I->getOperand(2);
+
+  if ((Op1.isReg() && Op1.isUndef()) || (Op2.isReg() && Op2.isUndef())) {
+    unsigned DstReg = I->getOperand(0).getReg();
+    BuildMI(MBB, I, I->getDebugLoc(), TII.get(Mips::IMPLICIT_DEF), DstReg);
+    return true;
+  }
+
   // For fpxx and when mfhc1 is not available, use:
   //   spill + reload via ldc1
   //
@@ -335,8 +345,8 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
   if ((Subtarget.isABI_FPXX() && !Subtarget.hasMTHC1()) ||
       (FP64 && !Subtarget.useOddSPReg())) {
     unsigned DstReg = I->getOperand(0).getReg();
-    unsigned SrcReg = I->getOperand(1).getReg();
-    unsigned N = I->getOperand(2).getImm();
+    unsigned SrcReg = Op1.getReg();
+    unsigned N = Op2.getImm();
     int64_t Offset = 4 * (Subtarget.isLittle() ? N : (1 - N));
 
     // It should be impossible to have FGR64 on MIPS-II or MIPS32r1 (which are
@@ -352,8 +362,7 @@ bool ExpandPseudo::expandExtractElementF64(MachineBasicBlock &MBB,
     // We re-use the same spill slot each time so that the stack frame doesn't
     // grow too much in functions with a large number of moves.
     int FI = MF.getInfo<MipsFunctionInfo>()->getMoveF64ViaSpillFI(RC);
-    TII.storeRegToStack(MBB, I, SrcReg, I->getOperand(1).isKill(), FI, RC,
-                        &RegInfo, 0);
+    TII.storeRegToStack(MBB, I, SrcReg, Op1.isKill(), FI, RC, &RegInfo, 0);
     TII.loadRegFromStack(MBB, I, DstReg, FI, RC2, &RegInfo, Offset);
     return true;
   }
@@ -376,12 +385,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
       *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
 
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  DebugLoc dl;
   MipsABIInfo ABI = STI.getABI();
   unsigned SP = ABI.GetStackPtr();
   unsigned FP = ABI.GetFramePtr();
   unsigned ZERO = ABI.GetNullPtr();
-  unsigned ADDu = ABI.GetPtrAdduOp();
+  unsigned MOVE = ABI.GetGPRMoveOp();
   unsigned ADDiu = ABI.GetPtrAddiuOp();
   unsigned AND = ABI.IsN64() ? Mips::AND64 : Mips::AND;
 
@@ -407,6 +416,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
+  if (MF.getFunction()->hasFnAttribute("interrupt"))
+    emitInterruptPrologueStub(MF, MBB);
+
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
 
   if (CSI.size()) {
@@ -491,7 +503,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
   // if framepointer enabled, set it to point to the stack pointer.
   if (hasFP(MF)) {
     // Insert instruction "move $fp, $sp" at this location.
-    BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO)
+    BuildMI(MBB, MBBI, dl, TII.get(MOVE), FP).addReg(SP).addReg(ZERO)
       .setMIFlag(MachineInstr::FrameSetup);
 
     // emit ".cfi_def_cfa_register $fp"
@@ -514,7 +526,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
       if (hasBP(MF)) {
         // move $s7, $sp
         unsigned BP = STI.isABI_N64() ? Mips::S7_64 : Mips::S7;
-        BuildMI(MBB, MBBI, dl, TII.get(ADDu), BP)
+        BuildMI(MBB, MBBI, dl, TII.get(MOVE), BP)
           .addReg(SP)
           .addReg(ZERO);
       }
@@ -522,6 +534,135 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
   }
 }
 
+void MipsSEFrameLowering::emitInterruptPrologueStub(
+    MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Report an error the target doesn't support Mips32r2 or later.
+  // The epilogue relies on the use of the "ehb" to clear execution
+  // hazards. Pre R2 Mips relies on an implementation defined number
+  // of "ssnop"s to clear the execution hazard. Support for ssnop hazard
+  // clearing is not provided so reject that configuration.
+  if (!STI.hasMips32r2())
+    report_fatal_error(
+        "\"interrupt\" attribute is not supported on pre-MIPS32R2 or "
+        "MIPS16 targets.");
+
+  // The GP register contains the "user" value, so we cannot perform
+  // any gp relative loads until we restore the "kernel" or "system" gp
+  // value. Until support is written we shall only accept the static
+  // relocation model.
+  if ((STI.getRelocationModel() != Reloc::Static))
+    report_fatal_error("\"interrupt\" attribute is only supported for the "
+                       "static relocation model on MIPS at the present time.");
+
+  if (!STI.isABI_O32() || STI.hasMips64())
+    report_fatal_error("\"interrupt\" attribute is only supported for the "
+                       "O32 ABI on MIPS32R2+ at the present time.");
+
+  // Perform ISR handling like GCC
+  StringRef IntKind =
+      MF.getFunction()->getFnAttribute("interrupt").getValueAsString();
+  const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+  // EIC interrupt handling needs to read the Cause register to disable
+  // interrupts.
+  if (IntKind == "eic") {
+    // Coprocessor registers are always live per se.
+    MBB.addLiveIn(Mips::COP013);
+    BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K0)
+        .addReg(Mips::COP013)
+        .addImm(0)
+        .setMIFlag(MachineInstr::FrameSetup);
+
+    BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EXT), Mips::K0)
+        .addReg(Mips::K0)
+        .addImm(10)
+        .addImm(6)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  // Fetch and spill EPC
+  MBB.addLiveIn(Mips::COP014);
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+      .addReg(Mips::COP014)
+      .addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+                                      MipsFI->getISRRegFI(0), PtrRC,
+                                      STI.getRegisterInfo(), 0);
+
+  // Fetch and Spill Status
+  MBB.addLiveIn(Mips::COP012);
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MFC0), Mips::K1)
+      .addReg(Mips::COP012)
+      .addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  STI.getInstrInfo()->storeRegToStack(MBB, MBBI, Mips::K1, false,
+                                      MipsFI->getISRRegFI(1), PtrRC,
+                                      STI.getRegisterInfo(), 0);
+
+  // Build the configuration for disabling lower priority interrupts. Non EIC
+  // interrupts need to be masked off with zero, EIC from the Cause register.
+  unsigned InsPosition = 8;
+  unsigned InsSize = 0;
+  unsigned SrcReg = Mips::ZERO;
+
+  // If the interrupt we're tied to is the EIC, switch the source for the
+  // masking off interrupts to the cause register.
+  if (IntKind == "eic") {
+    SrcReg = Mips::K0;
+    InsPosition = 10;
+    InsSize = 6;
+  } else
+    InsSize = StringSwitch<unsigned>(IntKind)
+                  .Case("sw0", 1)
+                  .Case("sw1", 2)
+                  .Case("hw0", 3)
+                  .Case("hw1", 4)
+                  .Case("hw2", 5)
+                  .Case("hw3", 6)
+                  .Case("hw4", 7)
+                  .Case("hw5", 8)
+                  .Default(0);
+  assert(InsSize != 0 && "Unknown interrupt type!");
+
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+      .addReg(SrcReg)
+      .addImm(InsPosition)
+      .addImm(InsSize)
+      .addReg(Mips::K1)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  // Mask off KSU, ERL, EXL
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+      .addReg(Mips::ZERO)
+      .addImm(1)
+      .addImm(4)
+      .addReg(Mips::K1)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  // Disable the FPU as we are not spilling those register sets.
+  if (!STI.useSoftFloat())
+    BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::INS), Mips::K1)
+        .addReg(Mips::ZERO)
+        .addImm(29)
+        .addImm(1)
+        .addReg(Mips::K1)
+        .setMIFlag(MachineInstr::FrameSetup);
+
+  // Set the new status
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+      .addReg(Mips::K1)
+      .addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
+}
+
 void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -533,12 +674,12 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
   const MipsRegisterInfo &RegInfo =
       *static_cast<const MipsRegisterInfo *>(STI.getRegisterInfo());
 
-  DebugLoc dl = MBBI->getDebugLoc();
+  DebugLoc DL = MBBI->getDebugLoc();
   MipsABIInfo ABI = STI.getABI();
   unsigned SP = ABI.GetStackPtr();
   unsigned FP = ABI.GetFramePtr();
   unsigned ZERO = ABI.GetNullPtr();
-  unsigned ADDu = ABI.GetPtrAdduOp();
+  unsigned MOVE = ABI.GetGPRMoveOp();
 
   // if framepointer enabled, restore the stack pointer.
   if (hasFP(MF)) {
@@ -549,7 +690,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
       --I;
 
     // Insert instruction "move $sp, $fp" at this location.
-    BuildMI(MBB, I, dl, TII.get(ADDu), SP).addReg(FP).addReg(ZERO);
+    BuildMI(MBB, I, DL, TII.get(MOVE), SP).addReg(FP).addReg(ZERO);
   }
 
   if (MipsFI->callsEhReturn()) {
@@ -568,6 +709,9 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
     }
   }
 
+  if (MF.getFunction()->hasFnAttribute("interrupt"))
+    emitInterruptEpilogueStub(MF, MBB);
+
   // Get the number of bytes from FrameInfo
   uint64_t StackSize = MFI->getStackSize();
 
@@ -578,13 +722,59 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
   TII.adjustStackPtr(SP, StackSize, MBB, MBBI);
 }
 
+void MipsSEFrameLowering::emitInterruptEpilogueStub(
+    MachineFunction &MF, MachineBasicBlock &MBB) const {
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Perform ISR handling like GCC
+  const TargetRegisterClass *PtrRC = &Mips::GPR32RegClass;
+
+  // Disable Interrupts.
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::DI), Mips::ZERO);
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::EHB));
+
+  // Restore EPC
+  STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+                                           MipsFI->getISRRegFI(0), PtrRC,
+                                           STI.getRegisterInfo());
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP014)
+      .addReg(Mips::K1)
+      .addImm(0);
+
+  // Restore Status
+  STI.getInstrInfo()->loadRegFromStackSlot(MBB, MBBI, Mips::K1,
+                                           MipsFI->getISRRegFI(1), PtrRC,
+                                           STI.getRegisterInfo());
+  BuildMI(MBB, MBBI, DL, STI.getInstrInfo()->get(Mips::MTC0), Mips::COP012)
+      .addReg(Mips::K1)
+      .addImm(0);
+}
+
+int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                int FI,
+                                                unsigned &FrameReg) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsABIInfo ABI = STI.getABI();
+
+  if (MFI->isFixedObjectIndex(FI))
+    FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr();
+  else
+    FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
+
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+         getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
 bool MipsSEFrameLowering::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
                           const std::vector<CalleeSavedInfo> &CSI,
                           const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
-  MachineBasicBlock *EntryBlock = MF->begin();
+  MachineBasicBlock *EntryBlock = &MF->front();
   const TargetInstrInfo &TII = *STI.getInstrInfo();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
@@ -599,6 +789,26 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
     if (!IsRAAndRetAddrIsTaken)
       EntryBlock->addLiveIn(Reg);
 
+    // ISRs require HI/LO to be spilled into kernel registers to be then
+    // spilled to the stack frame.
+    bool IsLOHI = (Reg == Mips::LO0 || Reg == Mips::LO0_64 ||
+                   Reg == Mips::HI0 || Reg == Mips::HI0_64);
+    const Function *Func = MBB.getParent()->getFunction();
+    if (IsLOHI && Func->hasFnAttribute("interrupt")) {
+      DebugLoc DL = MI->getDebugLoc();
+
+      unsigned Op = 0;
+      if (!STI.getABI().ArePtrs64bit()) {
+        Op = (Reg == Mips::HI0) ? Mips::MFHI : Mips::MFLO;
+        Reg = Mips::K0;
+      } else {
+        Op = (Reg == Mips::HI0) ? Mips::MFHI64 : Mips::MFLO64;
+        Reg = Mips::K0_64;
+      }
+      BuildMI(MBB, MI, DL, TII.get(Op), Mips::K0)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
     // Insert the spill to the stack frame.
     bool IsKill = !IsRAAndRetAddrIsTaken;
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -622,7 +832,8 @@ MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 }
 
 /// Mark \p Reg and all registers aliasing it in the bitset.
-void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs, unsigned Reg) {
+static void setAliasRegs(MachineFunction &MF, BitVector &SavedRegs,
+                         unsigned Reg) {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
     SavedRegs.set(*AI);
@@ -648,6 +859,10 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (MipsFI->callsEhReturn())
     MipsFI->createEhDataRegsFI();
 
+  // Create spill slots for Coprocessor 0 registers if function is an ISR.
+  if (MipsFI->isISR())
+    MipsFI->createISRRegFI();
+
   // Expand pseudo instructions which load, store or copy accumulators.
   // Add an emergency spill slot if a pseudo was expanded.
   if (ExpandPseudo(MF).expand()) {
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 9cb32e6c7829..63cd3cebc56a 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -27,6 +27,9 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
 
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -37,8 +40,13 @@ public:
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS) const override;
   unsigned ehDataReg(unsigned I) const;
-};
 
+private:
+  void emitInterruptEpilogueStub(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const;
+  void emitInterruptPrologueStub(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const;
+};
 } // End llvm namespace
 
 #endif
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 2ebfbd17d7d0..6f001ea74b30 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -136,7 +136,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   MachineBasicBlock::iterator I = MBB.begin();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg();
   const TargetRegisterClass *RC;
   const MipsABIInfo &ABI = static_cast<const MipsTargetMachine &>(TM).getABI();
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index b319fd07884b..efe22fba98ce 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -1181,6 +1181,10 @@ bool MipsSETargetLowering::isEligibleForTailCallOptimization(
   if (!EnableMipsTailCalls)
     return false;
 
+  // Exception has to be cleared with eret.
+  if (FI.isISR())
+    return false;
+
   // Return false if either the callee or caller has a byval argument.
   if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg())
     return false;
@@ -1786,9 +1790,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2));
   case Intrinsic::mips_fadd_w:
-  case Intrinsic::mips_fadd_d:
+  case Intrinsic::mips_fadd_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2));
+  }
   // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away
   case Intrinsic::mips_fceq_w:
   case Intrinsic::mips_fceq_d:
@@ -1831,9 +1837,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1),
                         Op->getOperand(2), ISD::SETUNE);
   case Intrinsic::mips_fdiv_w:
-  case Intrinsic::mips_fdiv_d:
+  case Intrinsic::mips_fdiv_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2));
+  }
   case Intrinsic::mips_ffint_u_w:
   case Intrinsic::mips_ffint_u_d:
     return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0),
@@ -1856,6 +1864,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   }
   case Intrinsic::mips_fexp2_w:
   case Intrinsic::mips_fexp2_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     EVT ResTy = Op->getValueType(0);
     return DAG.getNode(
         ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1),
@@ -1869,11 +1878,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0),
                        Op->getOperand(1), Op->getOperand(2), Op->getOperand(3));
   case Intrinsic::mips_fmul_w:
-  case Intrinsic::mips_fmul_d:
+  case Intrinsic::mips_fmul_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2));
+  }
   case Intrinsic::mips_fmsub_w:
   case Intrinsic::mips_fmsub_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     EVT ResTy = Op->getValueType(0);
     return DAG.getNode(ISD::FSUB, SDLoc(Op), ResTy, Op->getOperand(1),
                        DAG.getNode(ISD::FMUL, SDLoc(Op), ResTy,
@@ -1886,9 +1898,11 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::mips_fsqrt_d:
     return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1));
   case Intrinsic::mips_fsub_w:
-  case Intrinsic::mips_fsub_d:
+  case Intrinsic::mips_fsub_d: {
+    // TODO: If intrinsics have fast-math-flags, propagate them.
     return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1),
                        Op->getOperand(2));
+  }
   case Intrinsic::mips_ftrunc_u_w:
   case Intrinsic::mips_ftrunc_u_d:
     return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0),
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 786307b95f88..e6f7fe9aae1d 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -88,7 +88,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       if (isMicroMips)
         Opc = Mips::MOVE16_MM;
       else
-        Opc = Mips::ADDu, ZeroReg = Mips::ZERO;
+        Opc = Mips::OR, ZeroReg = Mips::ZERO;
     } else if (Mips::CCRRegClass.contains(SrcReg))
       Opc = Mips::CFC1;
     else if (Mips::FGR32RegClass.contains(SrcReg))
@@ -141,7 +141,7 @@ void MipsSEInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opc = Mips::FMOV_D64;
   else if (Mips::GPR64RegClass.contains(DestReg)) { // Copy to CPU64 Reg.
     if (Mips::GPR64RegClass.contains(SrcReg))
-      Opc = Mips::DADDu, ZeroReg = Mips::ZERO_64;
+      Opc = Mips::OR64, ZeroReg = Mips::ZERO_64;
     else if (Mips::HI64RegClass.contains(SrcReg))
       Opc = Mips::MFHI64, SrcReg = 0;
     else if (Mips::LO64RegClass.contains(SrcReg))
@@ -182,7 +182,6 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                 const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
                 int64_t Offset) const {
   DebugLoc DL;
-  if (I != MBB.end()) DL = I->getDebugLoc();
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOStore);
 
   unsigned Opc = 0;
@@ -213,6 +212,33 @@ storeRegToStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::ST_W;
   else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
     Opc = Mips::ST_D;
+  else if (Mips::LO32RegClass.hasSubClassEq(RC))
+    Opc = Mips::SW;
+  else if (Mips::LO64RegClass.hasSubClassEq(RC))
+    Opc = Mips::SD;
+  else if (Mips::HI32RegClass.hasSubClassEq(RC))
+    Opc = Mips::SW;
+  else if (Mips::HI64RegClass.hasSubClassEq(RC))
+    Opc = Mips::SD;
+
+  // Hi, Lo are normally caller save but they are callee save
+  // for interrupt handling.
+  const Function *Func = MBB.getParent()->getFunction();
+  if (Func->hasFnAttribute("interrupt")) {
+    if (Mips::HI32RegClass.hasSubClassEq(RC)) {
+      BuildMI(MBB, I, DL, get(Mips::MFHI), Mips::K0);
+      SrcReg = Mips::K0;
+    } else if (Mips::HI64RegClass.hasSubClassEq(RC)) {
+      BuildMI(MBB, I, DL, get(Mips::MFHI64), Mips::K0_64);
+      SrcReg = Mips::K0_64;
+    } else if (Mips::LO32RegClass.hasSubClassEq(RC)) {
+      BuildMI(MBB, I, DL, get(Mips::MFLO), Mips::K0);
+      SrcReg = Mips::K0;
+    } else if (Mips::LO64RegClass.hasSubClassEq(RC)) {
+      BuildMI(MBB, I, DL, get(Mips::MFLO64), Mips::K0_64);
+      SrcReg = Mips::K0_64;
+    }
+  }
 
   assert(Opc && "Register class not handled!");
   BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill))
@@ -228,6 +254,11 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   MachineMemOperand *MMO = GetMemOperand(MBB, FI, MachineMemOperand::MOLoad);
   unsigned Opc = 0;
 
+  const Function *Func = MBB.getParent()->getFunction();
+  bool ReqIndirectLoad = Func->hasFnAttribute("interrupt") &&
+                         (DestReg == Mips::LO0 || DestReg == Mips::LO0_64 ||
+                          DestReg == Mips::HI0 || DestReg == Mips::HI0_64);
+
   if (Mips::GPR32RegClass.hasSubClassEq(RC))
     Opc = Mips::LW;
   else if (Mips::GPR64RegClass.hasSubClassEq(RC))
@@ -254,10 +285,44 @@ loadRegFromStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     Opc = Mips::LD_W;
   else if (RC->hasType(MVT::v2i64) || RC->hasType(MVT::v2f64))
     Opc = Mips::LD_D;
+  else if (Mips::HI32RegClass.hasSubClassEq(RC))
+    Opc = Mips::LW;
+  else if (Mips::HI64RegClass.hasSubClassEq(RC))
+    Opc = Mips::LD;
+  else if (Mips::LO32RegClass.hasSubClassEq(RC))
+    Opc = Mips::LW;
+  else if (Mips::LO64RegClass.hasSubClassEq(RC))
+    Opc = Mips::LD;
 
   assert(Opc && "Register class not handled!");
-  BuildMI(MBB, I, DL, get(Opc), DestReg).addFrameIndex(FI).addImm(Offset)
-    .addMemOperand(MMO);
+
+  if (!ReqIndirectLoad)
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
+        .addFrameIndex(FI)
+        .addImm(Offset)
+        .addMemOperand(MMO);
+  else {
+    // Load HI/LO through K0. Notably the DestReg is encoded into the
+    // instruction itself.
+    unsigned Reg = Mips::K0;
+    unsigned LdOp = Mips::MTLO;
+    if (DestReg == Mips::HI0)
+      LdOp = Mips::MTHI;
+
+    if (Subtarget.getABI().ArePtrs64bit()) {
+      Reg = Mips::K0_64;
+      if (DestReg == Mips::HI0_64)
+        LdOp = Mips::MTHI64;
+      else
+        LdOp = Mips::MTLO64;
+    }
+
+    BuildMI(MBB, I, DL, get(Opc), Reg)
+        .addFrameIndex(FI)
+        .addImm(Offset)
+        .addMemOperand(MMO);
+    BuildMI(MBB, I, DL, get(LdOp)).addReg(Reg);
+  }
 }
 
 bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
@@ -271,6 +336,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   case Mips::RetRA:
     expandRetRA(MBB, MI);
     break;
+  case Mips::ERet:
+    expandERet(MBB, MI);
+    break;
   case Mips::PseudoMFHI:
     Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI;
     expandPseudoMFHiLo(MBB, MI, Opc);
@@ -360,7 +428,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
                                      MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator I) const {
   MipsABIInfo ABI = Subtarget.getABI();
-  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
   unsigned ADDu = ABI.GetPtrAdduOp();
   unsigned ADDiu = ABI.GetPtrAddiuOp();
 
@@ -438,6 +506,11 @@ void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
     BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA);
 }
 
+void MipsSEInstrInfo::expandERet(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I) const {
+  BuildMI(MBB, I, I->getDebugLoc(), get(Mips::ERET));
+}
+
 std::pair<bool, bool>
 MipsSEInstrInfo::compareOpndSize(unsigned Opc,
                                  const MachineFunction &MF) const {
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index bebbabf7b838..5d73545ef6b9 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -82,6 +82,8 @@ private:
 
   void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
 
+  void expandERet(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const;
+
   std::pair<bool, bool> compareOpndSize(unsigned Opc,
                                         const MachineFunction &MF) const;
 
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 132c3a1001ad..b1e2885f5ba3 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -126,17 +126,19 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   }
 
   bool EhDataRegFI = MipsFI->isEhDataRegFI(FrameIndex);
-
+  bool IsISRRegFI = MipsFI->isISRRegFI(FrameIndex);
   // The following stack frame objects are always referenced relative to $sp:
   //  1. Outgoing arguments.
   //  2. Pointer to dynamically allocated stack space.
   //  3. Locations for callee-saved registers.
   //  4. Locations for eh data registers.
+  //  5. Locations for ISR saved Coprocessor 0 registers 12 & 14.
   // Everything else is referenced relative to whatever register
   // getFrameRegister() returns.
   unsigned FrameReg;
 
-  if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI)
+  if ((FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) || EhDataRegFI ||
+      IsISRRegFI)
     FrameReg = ABI.GetStackPtr();
   else if (RegInfo->needsStackRealignment(MF)) {
     if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex))
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 54b5d2811701..37f9e491d546 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -16,8 +16,8 @@ def IMULDIV : FuncUnit;
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for Mips
 //===----------------------------------------------------------------------===//
-def IIAlu              : InstrItinClass;
-def IIBranch           : InstrItinClass;
+// IIM16Alu is a placeholder class for most MIPS16 instructions.
+def IIM16Alu           : InstrItinClass;
 def IIPseudo           : InstrItinClass;
 
 def II_ABS              : InstrItinClass;
@@ -28,7 +28,19 @@ def II_ADD_D            : InstrItinClass;
 def II_ADD_S            : InstrItinClass;
 def II_AND              : InstrItinClass;
 def II_ANDI             : InstrItinClass;
+def II_B                : InstrItinClass;
 def II_BADDU            : InstrItinClass;
+def II_BBIT             : InstrItinClass; // bbit[01], bbit[01]32
+def II_BC               : InstrItinClass;
+def II_BC1F             : InstrItinClass;
+def II_BC1FL            : InstrItinClass;
+def II_BC1T             : InstrItinClass;
+def II_BC1TL            : InstrItinClass;
+def II_BCC              : InstrItinClass; // beq and bne
+def II_BCCZ             : InstrItinClass; // b[gl][et]z
+def II_BCCZAL           : InstrItinClass; // bgezal and bltzal
+def II_BCCZALS          : InstrItinClass; // bgezals and bltzals
+def II_BCCZC            : InstrItinClass; // beqzc, bnezc
 def II_CEIL             : InstrItinClass;
 def II_CFC1             : InstrItinClass;
 def II_CLO              : InstrItinClass;
@@ -68,21 +80,39 @@ def II_DSUB             : InstrItinClass;
 def II_EXT              : InstrItinClass; // Any EXT instruction
 def II_FLOOR            : InstrItinClass;
 def II_INS              : InstrItinClass; // Any INS instruction
+def II_IndirectBranchPseudo : InstrItinClass; // Indirect branch pseudo.
+def II_J                : InstrItinClass;
+def II_JAL              : InstrItinClass;
+def II_JALR             : InstrItinClass;
+def II_JALRC            : InstrItinClass;
+def II_JALRS            : InstrItinClass;
+def II_JALS             : InstrItinClass;
+def II_JR               : InstrItinClass;
+def II_JRADDIUSP        : InstrItinClass;
+def II_JRC              : InstrItinClass;
+def II_ReturnPseudo     : InstrItinClass; // Return pseudo.
 def II_LB               : InstrItinClass;
+def II_LBE              : InstrItinClass;
 def II_LBU              : InstrItinClass;
+def II_LBUE             : InstrItinClass;
 def II_LD               : InstrItinClass;
 def II_LDC1             : InstrItinClass;
 def II_LDL              : InstrItinClass;
 def II_LDR              : InstrItinClass;
 def II_LDXC1            : InstrItinClass;
 def II_LH               : InstrItinClass;
+def II_LHE              : InstrItinClass;
 def II_LHU              : InstrItinClass;
+def II_LHUE             : InstrItinClass;
 def II_LUI              : InstrItinClass;
 def II_LUXC1            : InstrItinClass;
 def II_LW               : InstrItinClass;
+def II_LWE              : InstrItinClass;
 def II_LWC1             : InstrItinClass;
 def II_LWL              : InstrItinClass;
+def II_LWLE             : InstrItinClass;
 def II_LWR              : InstrItinClass;
+def II_LWRE             : InstrItinClass;
 def II_LWU              : InstrItinClass;
 def II_LWXC1            : InstrItinClass;
 def II_MADD             : InstrItinClass;
@@ -134,6 +164,7 @@ def II_ROTRV            : InstrItinClass;
 def II_ROUND            : InstrItinClass;
 def II_SAVE             : InstrItinClass;
 def II_SB               : InstrItinClass;
+def II_SBE              : InstrItinClass;
 def II_SD               : InstrItinClass;
 def II_SDC1             : InstrItinClass;
 def II_SDL              : InstrItinClass;
@@ -144,6 +175,7 @@ def II_SEH              : InstrItinClass;
 def II_SEQ_SNE          : InstrItinClass; // seq and sne
 def II_SEQI_SNEI        : InstrItinClass; // seqi and snei
 def II_SH               : InstrItinClass;
+def II_SHE              : InstrItinClass;
 def II_SLL              : InstrItinClass;
 def II_SLLV             : InstrItinClass;
 def II_SLTI_SLTIU       : InstrItinClass; // slti and sltiu
@@ -159,11 +191,15 @@ def II_SUB_D            : InstrItinClass;
 def II_SUB_S            : InstrItinClass;
 def II_SUXC1            : InstrItinClass;
 def II_SW               : InstrItinClass;
+def II_SWE              : InstrItinClass;
 def II_SWC1             : InstrItinClass;
 def II_SWL              : InstrItinClass;
+def II_SWLE             : InstrItinClass;
 def II_SWR              : InstrItinClass;
+def II_SWRE             : InstrItinClass;
 def II_SWXC1            : InstrItinClass;
 def II_TRUNC            : InstrItinClass;
+def II_WSBH             : InstrItinClass;
 def II_XOR              : InstrItinClass;
 def II_XORI             : InstrItinClass;
 
@@ -171,7 +207,7 @@ def II_XORI             : InstrItinClass;
 // Mips Generic instruction itineraries.
 //===----------------------------------------------------------------------===//
 def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
-  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIM16Alu           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_ADDI            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_ADDIU           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_ADDU            , [InstrStage<1,  [ALU]>]>,
@@ -240,7 +276,29 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_SAVE            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SEQ_SNE         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SEQI_SNEI       , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_B               , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BBIT            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC1F            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC1FL           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC1T            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC1TL           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BCC             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BCCZ            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BCCZAL          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BCCZALS         , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BCCZC           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_IndirectBranchPseudo, [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_J               , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JAL             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JALR            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JALRC           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JALRS           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JALS            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JR              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JRADDIUSP       , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_JRC             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_ReturnPseudo    , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_DMUL            , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_DMULT           , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_DMULTU          , [InstrStage<17, [IMULDIV]>]>,
@@ -313,3 +371,5 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MFHC1           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTHC1           , [InstrStage<2,  [ALU]>]>
 ]>;
+
+include "MipsScheduleP5600.td"
diff --git a/lib/Target/Mips/MipsScheduleP5600.td b/lib/Target/Mips/MipsScheduleP5600.td
new file mode 100644
index 000000000000..d32ae4f55eaf
--- /dev/null
+++ b/lib/Target/Mips/MipsScheduleP5600.td
@@ -0,0 +1,392 @@
+//==- MipsScheduleP5600.td - P5600 Scheduling Definitions --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def MipsP5600Model : SchedMachineModel {
+  int IssueWidth = 2; // 2x dispatched per cycle
+  int MicroOpBufferSize = 48; // min(48, 48, 64)
+  int LoadLatency = 4;
+  int MispredictPenalty = 8; // TODO: Estimated
+
+  let CompleteModel = 1;
+}
+
+let SchedModel = MipsP5600Model in {
+
+// ALQ Pipelines
+// =============
+
+def P5600ALQ : ProcResource<1> { let BufferSize = 16; }
+def P5600IssueALU : ProcResource<1> { let Super = P5600ALQ; }
+
+// ALU Pipeline
+// ------------
+
+def P5600WriteALU : SchedWriteRes<[P5600IssueALU]>;
+
+// and, lui, nor, or, slti, sltiu, sub, subu, xor
+def : ItinRW<[P5600WriteALU],
+             [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUBU, II_XOR]>;
+
+// AGQ Pipelines
+// =============
+
+def P5600AGQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueAL2 : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueCTISTD : ProcResource<1> { let Super = P5600AGQ; }
+def P5600IssueLDST : ProcResource<1> { let Super = P5600AGQ; }
+
+def P5600AL2Div : ProcResource<1>;
+// Pseudo-resource used to block CTISTD when handling multi-pipeline splits.
+def P5600CTISTD : ProcResource<1>;
+
+// CTISTD Pipeline
+// ---------------
+
+def P5600WriteJump : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]>;
+def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
+  let Latency = 2;
+}
+
+// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
+// jalr, jr.hb, jr
+def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR]>;
+def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR]>;
+
+// LDST Pipeline
+// -------------
+
+def P5600WriteLoad : SchedWriteRes<[P5600IssueLDST]> {
+  let Latency = 4;
+}
+
+def P5600WriteLoadShifted : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+  let Latency = 4;
+}
+
+def P5600WritePref : SchedWriteRes<[P5600IssueLDST]>;
+
+def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
+  // FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2
+  //        not during 0, 1, and 2.
+  let ResourceCycles = [ 1, 3 ];
+}
+
+def P5600WriteGPRFromBypass : SchedWriteRes<[P5600IssueLDST]> {
+  let Latency = 2;
+}
+
+def P5600WriteStoreFromOtherUnits : SchedWriteRes<[P5600IssueLDST]>;
+def P5600WriteLoadToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+  let Latency = 0;
+}
+
+// l[bhw], l[bh]u, ll
+def : ItinRW<[P5600WriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LWU]>;
+
+// lw[lr]
+def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWR]>;
+
+// s[bhw], sw[lr]
+def : ItinRW<[P5600WriteStore], [II_SB, II_SH, II_SW, II_SWL, II_SWR]>;
+
+// pref
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WritePref], []>;
+
+// sc
+// (this instruction does not exist in the backend yet)
+def : ItinRW<[P5600WriteStore], []>;
+
+// LDST is also used in moves from general purpose registers to floating point
+// and MSA.
+def P5600WriteMoveGPRToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
+  let Latency = 0;
+}
+
+// AL2 Pipeline
+// ------------
+
+def P5600WriteAL2 : SchedWriteRes<[P5600IssueAL2]>;
+def P5600WriteAL2BitExt : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2ShadowMov : SchedWriteRes<[P5600IssueAL2]> { let Latency = 2; }
+def P5600WriteAL2CondMov : SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+  let Latency = 2;
+}
+def P5600WriteAL2Div : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+  // Estimated worst case
+  let Latency = 34;
+  let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2DivU : SchedWriteRes<[P5600IssueAL2, P5600AL2Div]> {
+  // Estimated worst case
+  let Latency = 34;
+  let ResourceCycles = [1, 34];
+}
+def P5600WriteAL2Mul : SchedWriteRes<[P5600IssueAL2]> { let Latency = 3; }
+def P5600WriteAL2Mult: SchedWriteRes<[P5600IssueAL2]> { let Latency = 5; }
+def P5600WriteAL2MAdd: SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
+  let Latency = 5;
+}
+
+// clo, clz, di, mfhi, mflo
+def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_MFHI_MFLO]>;
+
+// ehb, rdhwr, rdpgpr, wrpgpr, wsbh
+def : ItinRW<[P5600WriteAL2ShadowMov], [II_RDHWR]>;
+
+// mov[nz]
+def : ItinRW<[P5600WriteAL2CondMov], [II_MOVN, II_MOVZ]>;
+
+// divu?
+def : ItinRW<[P5600WriteAL2Div], [II_DIV]>;
+def : ItinRW<[P5600WriteAL2DivU], [II_DIVU]>;
+
+// mul
+def : ItinRW<[P5600WriteAL2Mul], [II_MUL]>;
+// multu?, multu?
+def : ItinRW<[P5600WriteAL2Mult], [II_MULT, II_MULTU]>;
+// maddu?, msubu?, mthi, mtlo
+def : ItinRW<[P5600WriteAL2MAdd],
+             [II_MADD, II_MADDU, II_MSUB, II_MSUBU, II_MTHI_MTLO]>;
+
+// ext, ins
+def : ItinRW<[P5600WriteAL2BitExt],
+             [II_EXT, II_INS]>;
+
+// Either ALU or AL2 Pipelines
+// ---------------------------
+//
+// Some instructions can choose between ALU and AL2, but once dispatched to
+// ALQ or AGQ respectively they are committed to that path.
+// The decision is based on the outcome of the most recent selection when the
+// choice was last available. For now, we assume ALU is always chosen.
+
+def P5600WriteEitherALU : SchedWriteVariant<
+  // FIXME: Implement selection predicate
+  [SchedVar<SchedPredicate<[{1}]>, [P5600WriteALU]>,
+   SchedVar<SchedPredicate<[{0}]>, [P5600WriteAL2]>
+  ]>;
+
+// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
+// xori
+def : ItinRW<[P5600WriteEitherALU],
+             [II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH,
+              II_SLT_SLTU, II_SLL, II_SRA, II_SRL, II_XORI, II_ADDU, II_SLLV,
+              II_SRAV, II_SRLV]>;
+
+// FPU Pipelines
+// =============
+
+def P5600FPQ : ProcResource<3> { let BufferSize = 16; }
+def P5600IssueFPUS : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPUL : ProcResource<1> { let Super = P5600FPQ; }
+def P5600IssueFPULoad : ProcResource<1> { let Super = P5600FPQ; }
+
+def P5600FPUDivSqrt : ProcResource<2>;
+
+def P5600WriteFPUS : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteFPUL : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 4; }
+def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6; }
+def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 23 / 27
+  let Latency = 23; // Using common case
+  let ResourceCycles = [ 1, 23 ];
+}
+def P5600WriteFPUDivD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 31 / 35
+  let Latency = 31; // Using common case
+  let ResourceCycles = [ 1, 31 ];
+}
+def P5600WriteFPURcpS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 19 / 23
+  let Latency = 19; // Using common case
+  let ResourceCycles = [ 1, 19 ];
+}
+def P5600WriteFPURcpD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 27 / 31
+  let Latency = 27; // Using common case
+  let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 27 / 27
+  let Latency = 27; // Using common case
+  let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPURsqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 27 / 31
+  let Latency = 27; // Using common case
+  let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 27 / 31
+  let Latency = 27; // Using common case
+  let ResourceCycles = [ 1, 27 ];
+}
+def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 35 / 39
+  let Latency = 35; // Using common case
+  let ResourceCycles = [ 1, 35 ];
+}
+def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; }
+def P5600WriteMoveOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPUS]>;
+
+// FPUS is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPUSToOtherUnits : SchedWriteRes<[P5600IssueFPUS]> {
+  let Latency = 0;
+}
+
+// FPUL is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def P5600WriteMoveFPULToOtherUnits : SchedWriteRes<[P5600IssueFPUL]>;
+
+// Short Pipe
+// ----------
+//
+// abs.[ds], abs.ps, bc1[tf]l?, mov[tf].[ds], mov[tf], mov.[ds], [cm][ft]c1,
+// m[ft]hc1, neg.[ds], neg.ps, nor.v, nori.b, or.v, ori.b, xor.v, xori.b,
+// sdxc1, sdc1, st.[bhwd], swc1, swxc1
+def : ItinRW<[P5600WriteFPUS], [II_ABS, II_MOVF_D, II_MOVF_S, II_MOVT_D,
+                                II_MOVT_S, II_MOV_D, II_MOV_S, II_NEG]>;
+
+// adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd],
+// aver?_[us].[bhwd]
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>;
+// TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it.
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
+
+// and.v, andi.b, move.v, ldi.[bhwd]
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+
+// Long Pipe
+// ----------
+//
+// add.[ds], add.ps, cvt.d.[sw], cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps,
+// cvt.ps.[sw], c.<cc>.[ds], c.<cc>.ps, mul.[ds], mul.ps, sub.[ds], sub.ps,
+// trunc.w.[ds], trunc.w.ps
+def : ItinRW<[P5600WriteFPUL],
+             [II_ADD_D, II_ADD_S, II_CVT, II_C_CC_D, II_C_CC_S, II_MUL_D,
+              II_MUL_S, II_SUB_D, II_SUB_S, II_TRUNC]>;
+
+// div.[ds], div.ps
+def : ItinRW<[P5600WriteFPUDivS], [II_DIV_S]>;
+def : ItinRW<[P5600WriteFPUDivD], [II_DIV_D]>;
+
+// sqrt.[ds], sqrt.ps
+def : ItinRW<[P5600WriteFPUSqrtS], [II_SQRT_S]>;
+def : ItinRW<[P5600WriteFPUSqrtD], [II_SQRT_D]>;
+
+// madd.[ds], msub.[ds], nmadd.[ds], nmsub.[ds],
+// Operand 0 is read on cycle 5. All other operands are read on operand 0.
+def : ItinRW<[SchedReadAdvance<5>, P5600WriteFPUL_MADDSUB],
+             [II_MADD_D, II_MADD_S, II_MSUB_D, II_MSUB_S, II_NMADD_D,
+              II_NMADD_S, II_NMSUB_D, II_NMSUB_S]>;
+
+// madd.ps, msub.ps, nmadd.ps, nmsub.ps
+// Operand 0 and 1 are read on cycle 5. All others are read on operand 0.
+// (none of these instructions exist in the backend yet)
+
+// Load Pipe
+// ---------
+//
+// This is typically used in conjunction with the load pipeline under the AGQ
+// All the instructions are in the 'Tricky Instructions' section.
+
+def P5600WriteLoadOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPULoad]> {
+  let Latency = 4;
+}
+
+// Tricky Instructions
+// ===================
+//
+// These instructions are split across multiple uops (in different pipelines)
+// that must cooperate to complete the operation
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+//        current aggregates the resources and ignores the exact cycle they are
+//        used.
+def P5600WriteMoveGPRToFPU : WriteSequence<[P5600WriteMoveGPRToOtherUnits,
+                                            P5600WriteMoveOtherUnitsToFPU]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+//        current aggregates the resources and ignores the exact cycle they are
+//        used.
+def P5600WriteMoveFPUToGPR : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+                                            P5600WriteGPRFromBypass]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+//        current aggregates the resources and ignores the exact cycle they are
+//        used.
+def P5600WriteStoreFPUS : WriteSequence<[P5600WriteMoveFPUSToOtherUnits,
+                                         P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+//        current aggregates the resources and ignores the exact cycle they are
+//        used.
+def P5600WriteStoreFPUL : WriteSequence<[P5600WriteMoveFPULToOtherUnits,
+                                         P5600WriteStoreFromOtherUnits]>;
+
+// FIXME: This isn't quite right since the implementation of WriteSequence
+//        current aggregates the resources and ignores the exact cycle they are
+//        used.
+def P5600WriteLoadFPU : WriteSequence<[P5600WriteLoadToOtherUnits,
+                                       P5600WriteLoadOtherUnitsToFPU]>;
+
+// ctc1, mtc1, mthc1
+def : ItinRW<[P5600WriteMoveGPRToFPU], [II_CTC1, II_MTC1, II_MTHC1]>;
+
+// bc1[ft], cfc1, mfc1, mfhc1, movf, movt
+def : ItinRW<[P5600WriteMoveFPUToGPR],
+             [II_BC1F, II_BC1T, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>;
+
+// swc1, swxc1, st.[bhwd]
+def : ItinRW<[P5600WriteStoreFPUS], [II_SWC1, II_SWXC1]>;
+def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
+
+// movn.[ds], movz.[ds]
+def : ItinRW<[P5600WriteStoreFPUL], [II_MOVN_D, II_MOVN_S, II_MOVZ_D, II_MOVZ_S]>;
+
+// l[dw]x?c1, ld.[bhwd]
+def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1]>;
+def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
+
+// Unsupported Instructions
+// ========================
+//
+// The following instruction classes are never valid on P5600.
+//   II_DADDIU, II_DADDU, II_DMFC1, II_DMTC1, II_DMULT, II_DMULTU, II_DROTR,
+//   II_DROTR32, II_DROTRV, II_DDIV, II_DSLL, II_DSLL32, II_DSLLV, II_DSRA,
+//   II_DSRA32, II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV, II_DSUBU, II_DDIVU,
+//   II_JALRC, II_LD, II_LD[LR], II_LUXC1, II_RESTORE, II_SAVE, II_SD, II_SDC1,
+//   II_SDL, II_SDR, II_SDXC1
+//
+// The following instructions are never valid on P5600.
+//   addq.ph, rdhwr, repl.ph, repl.qb, subq.ph, subu_s.qb
+//
+// Guesswork
+// =========
+//
+// This section is largely temporary guesswork.
+
+// ceil.[lw].[ds], floor.[lw].[ds]
+// Reason behind guess: trunc.[lw].ds and the various cvt's are in FPUL
+def : ItinRW<[P5600WriteFPUL], [II_CEIL, II_FLOOR, II_ROUND]>;
+
+// rotrv
+// Reason behind guess: rotr is in the same category and the two register forms
+//                      generally follow the immediate forms in this category
+def : ItinRW<[P5600WriteEitherALU], [II_ROTRV]>;
+}
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 471b6e19a8bb..8a18b517d16b 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -69,8 +69,9 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU,
       HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false),
       HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false),
       InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false),
-      HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16),
-      HasMSA(false), TM(TM), TargetTriple(TT), TSInfo(),
+      HasDSPR2(false), HasDSPR3(false), AllowMixed16_32(Mixed16_32 | Mips_Os16),
+      Os16(Mips_Os16), HasMSA(false), UseTCCInDIV(false), HasEVA(false), TM(TM),
+      TargetTriple(TT), TSInfo(),
       InstrInfo(
           MipsInstrInfo::create(initializeSubtargetDependencies(CPU, FS, TM))),
       FrameLowering(MipsFrameLowering::create(*this)),
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 1db8881404c9..fbb01fe77029 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -42,9 +42,15 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
     Mips3, Mips4, Mips5, Mips64, Mips64r2, Mips64r3, Mips64r5, Mips64r6
   };
 
+  enum class CPU { P5600 };
+
   // Mips architecture version
   MipsArchEnum MipsArchVersion;
 
+  // Processor implementation (unused but required to exist by
+  // tablegen-erated code).
+  CPU ProcImpl;
+
   // IsLittle - The target is Little Endian
   bool IsLittle;
 
@@ -116,8 +122,8 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   // InMicroMips -- can process MicroMips instructions
   bool InMicroMipsMode;
 
-  // HasDSP, HasDSPR2 -- supports DSP ASE.
-  bool HasDSP, HasDSPR2;
+  // HasDSP, HasDSPR2, HasDSPR3 -- supports DSP ASE.
+  bool HasDSP, HasDSPR2, HasDSPR3;
 
   // Allow mixed Mips16 and Mips32 in one source file
   bool AllowMixed16_32;
@@ -130,6 +136,12 @@ class MipsSubtarget : public MipsGenSubtargetInfo {
   // HasMSA -- supports MSA ASE.
   bool HasMSA;
 
+  // UseTCCInDIV -- Enables the use of trapping in the assembler.
+  bool UseTCCInDIV;
+
+  // HasEVA -- supports EVA ASE.
+  bool HasEVA;
+
   InstrItineraryData InstrItins;
 
   // We can override the determination of whether we are in mips16 mode
@@ -189,7 +201,7 @@ public:
   }
   bool hasMips32r5() const {
     return (MipsArchVersion >= Mips32r5 && MipsArchVersion < Mips32Max) ||
-           hasMips64r2();
+           hasMips64r5();
   }
   bool hasMips32r6() const {
     return (MipsArchVersion >= Mips32r6 && MipsArchVersion < Mips32Max) ||
@@ -228,9 +240,12 @@ public:
   }
   bool inMicroMipsMode() const { return InMicroMipsMode; }
   bool inMicroMips32r6Mode() const { return InMicroMipsMode && hasMips32r6(); }
+  bool inMicroMips64r6Mode() const { return InMicroMipsMode && hasMips64r6(); }
   bool hasDSP() const { return HasDSP; }
   bool hasDSPR2() const { return HasDSPR2; }
+  bool hasDSPR3() const { return HasDSPR3; }
   bool hasMSA() const { return HasMSA; }
+  bool hasEVA() const { return HasEVA; }
   bool useSmallSection() const { return UseSmallSection; }
 
   bool hasStandardEncoding() const { return !inMips16Mode(); }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 1c77745d130b..3e638720e839 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -233,7 +233,7 @@ void MipsPassConfig::addPreRegAlloc() {
 }
 
 TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     if (Subtarget->allowMixed16_32()) {
       DEBUG(errs() << "No Target Transform Info Pass Added\n");
       // FIXME: This is no longer necessary as the TTI returned is per-function.
diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp
index 0f2db6039b6a..146f33bda249 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -76,7 +76,7 @@ bool MipsTargetObjectFile::
 IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
                        SectionKind Kind) const {
   return (IsGlobalInSmallSectionImpl(GV, TM) &&
-          (Kind.isDataRel() || Kind.isBSS() || Kind.isCommon()));
+          (Kind.isData() || Kind.isBSS() || Kind.isCommon()));
 }
 
 /// Return true if this global address should be placed into small data/bss
@@ -107,7 +107,8 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV,
     return false;
 
   Type *Ty = GV->getType()->getElementType();
-  return IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(Ty));
+  return IsInSmallSection(
+      GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
 }
 
 MCSection *
@@ -120,7 +121,7 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
   // Handle Small Section classification here.
   if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallBSSSection;
-  if (Kind.isDataRel() && IsGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
     return SmallDataSection;
 
   // Otherwise, we work the same as ELF.
@@ -128,21 +129,20 @@ MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
 }
 
 /// Return true if this constant should be placed into small data section.
-bool MipsTargetObjectFile::
-IsConstantInSmallSection(const Constant *CN, const TargetMachine &TM) const {
+bool MipsTargetObjectFile::IsConstantInSmallSection(
+    const DataLayout &DL, const Constant *CN, const TargetMachine &TM) const {
   return (static_cast<const MipsTargetMachine &>(TM)
               .getSubtargetImpl()
               ->useSmallSection() &&
-          LocalSData && IsInSmallSection(TM.getDataLayout()->getTypeAllocSize(
-                            CN->getType())));
+          LocalSData && IsInSmallSection(DL.getTypeAllocSize(CN->getType())));
 }
 
-MCSection *
-MipsTargetObjectFile::getSectionForConstant(SectionKind Kind,
-                                            const Constant *C) const {
-  if (IsConstantInSmallSection(C, *TM))
+/// Return true if this constant should be placed into small data section.
+MCSection *MipsTargetObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
+  if (IsConstantInSmallSection(DL, C, *TM))
     return SmallDataSection;
 
   // Otherwise, we work the same as ELF.
-  return TargetLoweringObjectFileELF::getSectionForConstant(Kind, C);
+  return TargetLoweringObjectFileELF::getSectionForConstant(DL, Kind, C);
 }
diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h
index 725f2ffd93dd..ba04343bad87 100644
--- a/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/lib/Target/Mips/MipsTargetObjectFile.h
@@ -36,10 +36,10 @@ class MipsTargetMachine;
                                       const TargetMachine &TM) const override;
 
     /// Return true if this constant should be placed into small data section.
-    bool IsConstantInSmallSection(const Constant *CN,
+    bool IsConstantInSmallSection(const DataLayout &DL, const Constant *CN,
                                   const TargetMachine &TM) const;
 
-    MCSection *getSectionForConstant(SectionKind Kind,
+    MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                      const Constant *C) const override;
   };
 } // end namespace llvm
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 6ce1be707d04..b3222f5d89ef 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -12,6 +12,7 @@
 
 #include "MCTargetDesc/MipsABIFlagsSection.h"
 #include "MCTargetDesc/MipsABIInfo.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -77,8 +78,12 @@ public:
 
   // PIC support
   virtual void emitDirectiveCpLoad(unsigned RegNo);
+  virtual void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+                                      int Offset);
   virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                                     const MCSymbol &Sym, bool IsReg);
+  virtual void emitDirectiveCpreturn(unsigned SaveLocation,
+                                     bool SaveLocationIsRegister);
 
   // FP abiflags directives
   virtual void emitDirectiveModuleFP();
@@ -97,18 +102,18 @@ public:
   // structure values.
   template <class PredicateLibrary>
   void updateABIInfo(const PredicateLibrary &P) {
-    ABI = &P.getABI();
+    ABI = P.getABI();
     ABIFlagsSection.setAllFromPredicates(P);
   }
 
   MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; }
   const MipsABIInfo &getABI() const {
-    assert(ABI && "ABI hasn't been set!");
+    assert(ABI.hasValue() && "ABI hasn't been set!");
     return *ABI;
   }
 
 protected:
-  const MipsABIInfo *ABI;
+  llvm::Optional<MipsABIInfo> ABI;
   MipsABIFlagsSection ABIFlagsSection;
 
   bool GPRInfoSet;
@@ -188,8 +193,12 @@ public:
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+                              int Offset) override;
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                             const MCSymbol &Sym, bool IsReg) override;
+  void emitDirectiveCpreturn(unsigned SaveLocation,
+                             bool SaveLocationIsRegister) override;
 
   // FP abiflags directives
   void emitDirectiveModuleFP() override;
@@ -237,8 +246,12 @@ public:
 
   // PIC support
   void emitDirectiveCpLoad(unsigned RegNo) override;
+  void emitDirectiveCpRestore(SmallVector<MCInst, 3> &StoreInsts,
+                              int Offset) override;
   void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
                             const MCSymbol &Sym, bool IsReg) override;
+  void emitDirectiveCpreturn(unsigned SaveLocation,
+                             bool SaveLocationIsRegister) override;
 
   void emitMipsAbiFlags();
 };
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 02c5a210d099..f0f223aa057b 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -15,11 +15,9 @@
 #define LLVM_LIB_TARGET_NVPTX_INSTPRINTER_NVPTXINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
-class MCOperand;
 class MCSubtargetInfo;
 
 class NVPTXInstPrinter : public MCInstPrinter {
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index b432e065c2f4..9ac3c8850f75 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -22,6 +22,7 @@ class Triple;
 
 class NVPTXMCAsmInfo : public MCAsmInfo {
   virtual void anchor();
+
 public:
   explicit NVPTXMCAsmInfo(const Triple &TheTriple);
 };
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index fe28214e9588..e5fae85bacf2 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -41,24 +41,6 @@ enum CondCodes {
 };
 }
 
-inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
-  switch (CC) {
-  case NVPTXCC::NE:
-    return "ne";
-  case NVPTXCC::EQ:
-    return "eq";
-  case NVPTXCC::LT:
-    return "lt";
-  case NVPTXCC::LE:
-    return "le";
-  case NVPTXCC::GT:
-    return "gt";
-  case NVPTXCC::GE:
-    return "ge";
-  }
-  llvm_unreachable("Unknown condition code");
-}
-
 FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
                                  llvm::CodeGenOpt::Level OptLevel);
 ModulePass *createNVPTXAssignValidGlobalNamesPass();
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index ecb0f0a1d0a1..e8c36089a779 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -355,7 +355,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
   if (isABI) {
     if (Ty->isFloatingPointTy() || Ty->isIntegerTy()) {
       unsigned size = 0;
-      if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+      if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
         size = ITy->getBitWidth();
         if (size < 32)
           size = 32;
@@ -635,9 +635,7 @@ static bool usedInGlobalVarDef(const Constant *C) {
     return false;
 
   if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
-    if (GV->getName() == "llvm.used")
-      return false;
-    return true;
+    return GV->getName() != "llvm.used";
   }
 
   for (const User *U : C->users())
@@ -682,7 +680,7 @@ static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
   if (!gv->hasInternalLinkage())
     return false;
-  const PointerType *Pty = gv->getType();
+  PointerType *Pty = gv->getType();
   if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
     return false;
 
@@ -720,7 +718,7 @@ static bool useFuncSeen(const Constant *C,
 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
   llvm::DenseMap<const Function *, bool> seenMap;
   for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
-    const Function *F = FI;
+    const Function *F = &*FI;
 
     if (F->isDeclaration()) {
       if (F->use_empty())
@@ -870,9 +868,8 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
   DenseSet<const GlobalVariable *> GVVisiting;
 
   // Visit each global variable, in order
-  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting);
+  for (const GlobalVariable &I : M.globals())
+    VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
 
   assert(GVVisited.size() == M.getGlobalList().size() &&
          "Missed a global variable");
@@ -1029,10 +1026,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
       GVar->getName().startswith("nvvm."))
     return;
 
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
-  const PointerType *PTy = GVar->getType();
+  PointerType *PTy = GVar->getType();
   Type *ETy = PTy->getElementType();
 
   if (GVar->hasExternalLinkage()) {
@@ -1159,7 +1156,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
   }
 
   if (GVar->getAlignment() == 0)
-    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
   else
     O << " .align " << GVar->getAlignment();
 
@@ -1185,9 +1182,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
           printScalarConstant(Initializer, O);
         }
       } else {
-        // The frontend adds zero-initializer to variables that don't have an
-        // initial value, so skip warning for this case.
-        if (!GVar->getInitializer()->isNullValue()) {
+        // The frontend adds zero-initializer to device and constant variables
+        // that don't have an initial value, and UndefValue to shared
+        // variables, so skip warning for this case.
+        if (!GVar->getInitializer()->isNullValue() &&
+            !isa<UndefValue>(GVar->getInitializer())) {
           report_fatal_error("initial value of '" + GVar->getName() +
                              "' is not allowed in addrspace(" +
                              Twine(PTy->getAddressSpace()) + ")");
@@ -1205,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
     case Type::StructTyID:
     case Type::ArrayTyID:
     case Type::VectorTyID:
-      ElementSize = TD->getTypeStoreSize(ETy);
+      ElementSize = DL.getTypeStoreSize(ETy);
       // Ptx allows variable initilization only for constant and
       // global state spaces.
       if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) ||
@@ -1299,7 +1298,7 @@ void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
 }
 
 std::string
-NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
+NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
   switch (Ty->getTypeID()) {
   default:
     llvm_unreachable("unexpected type");
@@ -1339,16 +1338,16 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
                                             raw_ostream &O) {
 
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
   // GlobalVariables are always constant pointers themselves.
-  const PointerType *PTy = GVar->getType();
+  PointerType *PTy = GVar->getType();
   Type *ETy = PTy->getElementType();
 
   O << ".";
   emitPTXAddressSpace(PTy->getAddressSpace(), O);
   if (GVar->getAlignment() == 0)
-    O << " .align " << (int) TD->getPrefTypeAlignment(ETy);
+    O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
   else
     O << " .align " << GVar->getAlignment();
 
@@ -1370,7 +1369,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
   case Type::StructTyID:
   case Type::ArrayTyID:
   case Type::VectorTyID:
-    ElementSize = TD->getTypeStoreSize(ETy);
+    ElementSize = DL.getTypeStoreSize(ETy);
     O << " .b8 ";
     getSymbol(GVar)->print(O, MAI);
     O << "[";
@@ -1385,32 +1384,32 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
   return;
 }
 
-static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
+static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
   if (Ty->isSingleValueType())
-    return TD->getPrefTypeAlignment(Ty);
+    return DL.getPrefTypeAlignment(Ty);
 
-  const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+  auto *ATy = dyn_cast<ArrayType>(Ty);
   if (ATy)
-    return getOpenCLAlignment(TD, ATy->getElementType());
+    return getOpenCLAlignment(DL, ATy->getElementType());
 
-  const StructType *STy = dyn_cast<StructType>(Ty);
+  auto *STy = dyn_cast<StructType>(Ty);
   if (STy) {
     unsigned int alignStruct = 1;
     // Go through each element of the struct and find the
     // largest alignment.
     for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
       Type *ETy = STy->getElementType(i);
-      unsigned int align = getOpenCLAlignment(TD, ETy);
+      unsigned int align = getOpenCLAlignment(DL, ETy);
       if (align > alignStruct)
         alignStruct = align;
     }
     return alignStruct;
   }
 
-  const FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+  auto *FTy = dyn_cast<FunctionType>(Ty);
   if (FTy)
-    return TD->getPointerPrefAlignment();
-  return TD->getPrefTypeAlignment(Ty);
+    return DL.getPointerPrefAlignment();
+  return DL.getPrefTypeAlignment(Ty);
 }
 
 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
@@ -1419,13 +1418,8 @@ void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
   O << "_param_" << paramIndex;
 }
 
-void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
-  CurrentFnSym->print(O, MAI);
-  O << "_param_" << paramIndex;
-}
-
 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const AttributeSet &PAL = F->getAttributes();
   const TargetLowering *TLI = nvptxSubtarget->getTargetLowering();
   Function::const_arg_iterator I, E;
@@ -1433,7 +1427,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
   bool first = true;
   bool isKernelFunc = llvm::isKernelFunction(*F);
   bool isABI = (nvptxSubtarget->getSmVersion() >= 20);
-  MVT thePointerTy = TLI->getPointerTy(*TD);
+  MVT thePointerTy = TLI->getPointerTy(DL);
 
   O << "(\n";
 
@@ -1485,9 +1479,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         // size = typeallocsize of element type
         unsigned align = PAL.getParamAlignment(paramIndex + 1);
         if (align == 0)
-          align = TD->getABITypeAlignment(Ty);
+          align = DL.getABITypeAlignment(Ty);
 
-        unsigned sz = TD->getTypeAllocSize(Ty);
+        unsigned sz = DL.getTypeAllocSize(Ty);
         O << "\t.param .align " << align << " .b8 ";
         printParamName(I, paramIndex, O);
         O << "[" << sz << "]";
@@ -1495,7 +1489,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
         continue;
       }
       // Just a scalar
-      const PointerType *PTy = dyn_cast<PointerType>(Ty);
+      auto *PTy = dyn_cast<PointerType>(Ty);
       if (isKernelFunc) {
         if (PTy) {
           // Special handling for pointer arguments to kernel
@@ -1519,7 +1513,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
               O << ".ptr .global ";
               break;
             }
-            O << ".align " << (int) getOpenCLAlignment(TD, ETy) << " ";
+            O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
           }
           printParamName(I, paramIndex, O);
           continue;
@@ -1556,7 +1550,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
     }
 
     // param has byVal attribute. So should be a pointer
-    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+    auto *PTy = dyn_cast<PointerType>(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
@@ -1566,9 +1560,9 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       // size = typeallocsize of element type
       unsigned align = PAL.getParamAlignment(paramIndex + 1);
       if (align == 0)
-        align = TD->getABITypeAlignment(ETy);
+        align = DL.getABITypeAlignment(ETy);
 
-      unsigned sz = TD->getTypeAllocSize(ETy);
+      unsigned sz = DL.getTypeAllocSize(ETy);
       O << "\t.param .align " << align << " .b8 ";
       printParamName(I, paramIndex, O);
       O << "[" << sz << "]";
@@ -1579,7 +1573,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       // Further, if a part is vector, print the above for
       // each vector element.
       SmallVector<EVT, 16> vtparts;
-      ComputeValueVTs(*TLI, getDataLayout(), ETy, vtparts);
+      ComputeValueVTs(*TLI, DL, ETy, vtparts);
       for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
         unsigned elems = 1;
         EVT elemtype = vtparts[i];
@@ -1786,10 +1780,10 @@ static void ConvertDoubleToBytes(unsigned char *p, double val) {
 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
                                    AggBuffer *aggBuffer) {
 
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
   if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
-    int s = TD->getTypeAllocSize(CPV->getType());
+    int s = DL.getTypeAllocSize(CPV->getType());
     if (s < Bytes)
       s = Bytes;
     aggBuffer->addZeros(s);
@@ -1800,7 +1794,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   switch (CPV->getType()->getTypeID()) {
 
   case Type::IntegerTyID: {
-    const Type *ETy = CPV->getType();
+    Type *ETy = CPV->getType();
     if (ETy == Type::getInt8Ty(CPV->getContext())) {
       unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
       ConvertIntToBytes<>(ptr, c);
@@ -1817,7 +1811,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         break;
       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, *TD))) {
+                ConstantFoldConstantExpression(Cexpr, DL))) {
           int int32 = (int)(constInt->getZExtValue());
           ConvertIntToBytes<>(ptr, int32);
           aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1839,7 +1833,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         break;
       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
         if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, *TD))) {
+                ConstantFoldConstantExpression(Cexpr, DL))) {
           long long int64 = (long long)(constInt->getZExtValue());
           ConvertIntToBytes<>(ptr, int64);
           aggBuffer->addBytes(ptr, 8, Bytes);
@@ -1860,7 +1854,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   case Type::FloatTyID:
   case Type::DoubleTyID: {
     const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
-    const Type *Ty = CFP->getType();
+    Type *Ty = CFP->getType();
     if (Ty == Type::getFloatTy(CPV->getContext())) {
       float float32 = (float) CFP->getValueAPF().convertToFloat();
       ConvertFloatToBytes(ptr, float32);
@@ -1881,7 +1875,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
       const Value *v = Cexpr->stripPointerCasts();
       aggBuffer->addSymbol(v, Cexpr);
     }
-    unsigned int s = TD->getTypeAllocSize(CPV->getType());
+    unsigned int s = DL.getTypeAllocSize(CPV->getType());
     aggBuffer->addZeros(s);
     break;
   }
@@ -1891,7 +1885,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
   case Type::StructTyID: {
     if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV) ||
         isa<ConstantStruct>(CPV) || isa<ConstantDataSequential>(CPV)) {
-      int ElementSize = TD->getTypeAllocSize(CPV->getType());
+      int ElementSize = DL.getTypeAllocSize(CPV->getType());
       bufferAggregateConstant(CPV, aggBuffer);
       if (Bytes > ElementSize)
         aggBuffer->addZeros(Bytes - ElementSize);
@@ -1909,7 +1903,7 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
 
 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
                                               AggBuffer *aggBuffer) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   int Bytes;
 
   // Old constants
@@ -1934,12 +1928,12 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
       StructType *ST = cast<StructType>(CPV->getType());
       for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
         if (i == (e - 1))
-          Bytes = TD->getStructLayout(ST)->getElementOffset(0) +
-                  TD->getTypeAllocSize(ST) -
-                  TD->getStructLayout(ST)->getElementOffset(i);
+          Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
+                  DL.getTypeAllocSize(ST) -
+                  DL.getStructLayout(ST)->getElementOffset(i);
         else
-          Bytes = TD->getStructLayout(ST)->getElementOffset(i + 1) -
-                  TD->getStructLayout(ST)->getElementOffset(i);
+          Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
+                  DL.getStructLayout(ST)->getElementOffset(i);
         bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
       }
     }
@@ -1951,18 +1945,6 @@ void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
 // buildTypeNameMap - Run through symbol table looking for type names.
 //
 
-bool NVPTXAsmPrinter::isImageType(const Type *Ty) {
-
-  std::map<const Type *, std::string>::iterator PI = TypeNameMap.find(Ty);
-
-  if (PI != TypeNameMap.end() && (!PI->second.compare("struct._image1d_t") ||
-                                  !PI->second.compare("struct._image2d_t") ||
-                                  !PI->second.compare("struct._image3d_t")))
-    return true;
-
-  return false;
-}
-
 
 bool NVPTXAsmPrinter::ignoreLoc(const MachineInstr &MI) {
   switch (MI.getOpcode()) {
@@ -2054,7 +2036,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(CE, *TM.getDataLayout()))
+    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
       if (C != CE)
         return lowerConstantForGV(C, ProcessingGeneric);
 
@@ -2083,7 +2065,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
   }
 
   case Instruction::GetElementPtr: {
-    const DataLayout &DL = *TM.getDataLayout();
+    const DataLayout &DL = getDataLayout();
 
     // Generate a symbolic expression for the byte address
     APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
@@ -2109,7 +2091,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
     return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
 
   case Instruction::IntToPtr: {
-    const DataLayout &DL = *TM.getDataLayout();
+    const DataLayout &DL = getDataLayout();
 
     // Handle casts to pointers by changing them into casts to the appropriate
     // integer type.  This promotes constant folding and simplifies this code.
@@ -2120,7 +2102,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
   }
 
   case Instruction::PtrToInt: {
-    const DataLayout &DL = *TM.getDataLayout();
+    const DataLayout &DL = getDataLayout();
 
     // Support only foldable casts to/from pointers that can be eliminated by
     // changing the pointer to the appropriately sized integer type.
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index f6f7685e76f9..76bf179896a8 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -212,28 +212,21 @@ private:
   MCOperand GetSymbolRef(const MCSymbol *Symbol);
   unsigned encodeVirtualRegister(unsigned Reg);
 
-  void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
-
   void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
                                  raw_ostream &O);
   void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                        const char *Modifier = nullptr);
-  void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
   void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
                           bool = false);
-  void printParamName(int paramIndex, raw_ostream &O);
   void printParamName(Function::const_arg_iterator I, int paramIndex,
                       raw_ostream &O);
   void emitGlobals(const Module &M);
   void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
   void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
   void emitVirtualRegister(unsigned int vr, raw_ostream &);
-  void emitFunctionExternParamList(const MachineFunction &MF);
   void emitFunctionParamList(const Function *, raw_ostream &O);
   void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
   void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
-  void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
-  bool isImageType(const Type *Ty);
   void printReturnValStr(const Function *, raw_ostream &O);
   void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -271,7 +264,7 @@ private:
 
   // Build the map between type name and ID based on module's type
   // symbol table.
-  std::map<const Type *, std::string> TypeNameMap;
+  std::map<Type *, std::string> TypeNameMap;
 
   // List of variables demoted to a function scope.
   std::map<const Function *, std::vector<const GlobalVariable *> > localDecls;
@@ -282,19 +275,15 @@ private:
 
   void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
   void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
-  std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
+  std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
   void printScalarConstant(const Constant *CPV, raw_ostream &O);
   void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
   void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
   void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
 
-  void printOperandProper(const MachineOperand &MO);
-
   void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
   void emitDeclarations(const Module &, raw_ostream &O);
   void emitDeclaration(const Function *, raw_ostream &O);
-
-  static const char *getRegisterName(unsigned RegNo);
   void emitDemotedVars(const Function *, raw_ostream &);
 
   bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index 69a229e32f43..95813c8430d1 100644
--- a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -98,7 +98,7 @@ private:
   /// This reordering exposes to optimizeMemoryInstruction more
   /// optimization opportunities on loads and stores.
   ///
-  /// If this function succesfully hoists an eliminable addrspacecast or V is
+  /// If this function successfully hoists an eliminable addrspacecast or V is
   /// already such an addrspacecast, it returns the transformed value (which is
   /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
   Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
@@ -267,14 +267,14 @@ bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
     return false;
 
   bool Changed = false;
-  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
-    for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+  for (BasicBlock &B : F) {
+    for (Instruction &I : B) {
       if (isa<LoadInst>(I)) {
         // V = load P
-        Changed |= optimizeMemoryInstruction(I, 0);
+        Changed |= optimizeMemoryInstruction(&I, 0);
       } else if (isa<StoreInst>(I)) {
         // store V, P
-        Changed |= optimizeMemoryInstruction(I, 1);
+        Changed |= optimizeMemoryInstruction(&I, 1);
       }
     }
   }
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 6fd09c405260..62ca5e9f9f62 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -81,7 +81,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
 
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E;) {
-    GlobalVariable *GV = I++;
+    GlobalVariable *GV = &*I++;
     if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC &&
         !llvm::isTexture(*GV) && !llvm::isSurface(*GV) &&
         !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) {
@@ -117,7 +117,7 @@ bool GenericToNVVM::runOnModule(Module &M) {
           Value *Operand = II->getOperand(i);
           if (isa<Constant>(Operand)) {
             II->setOperand(
-                i, remapConstant(&M, I, cast<Constant>(Operand), Builder));
+                i, remapConstant(&M, &*I, cast<Constant>(Operand), Builder));
           }
         }
       }
@@ -132,10 +132,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
 
   // Walk through the metadata section and update the debug information
   // associated with the global variables in the default address space.
-  for (Module::named_metadata_iterator I = M.named_metadata_begin(),
-                                       E = M.named_metadata_end();
-       I != E; I++) {
-    remapNamedMDNode(VM, I);
+  for (NamedMDNode &I : M.named_metadata()) {
+    remapNamedMDNode(VM, &I);
   }
 
   // Walk through the global variable  initializers, and replace any use of
@@ -318,9 +316,8 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
                               NewOperands[0], NewOperands[1]);
   case Instruction::FCmp:
     // CompareConstantExpr (fcmp)
-    assert(false && "Address space conversion should have no effect "
-                    "on float point CompareConstantExpr (fcmp)!");
-    return C;
+    llvm_unreachable("Address space conversion should have no effect "
+                     "on float point CompareConstantExpr (fcmp)!");
   case Instruction::ExtractElement:
     // ExtractElementConstantExpr
     return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]);
@@ -364,8 +361,7 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
       return Builder.CreateCast(Instruction::CastOps(C->getOpcode()),
                                 NewOperands[0], C->getType());
     }
-    assert(false && "GenericToNVVM encountered an unsupported ConstantExpr");
-    return C;
+    llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
   }
 }
 
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 232a611d1760..2d0098b392f4 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXISelDAGToDAG.h"
+#include "NVPTXUtilities.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/CommandLine.h"
@@ -530,7 +532,7 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
   if (!Src)
     return NVPTX::PTXLdStInstCode::GENERIC;
 
-  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
+  if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
     switch (PT->getAddressSpace()) {
     case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
     case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
@@ -544,6 +546,39 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
   return NVPTX::PTXLdStInstCode::GENERIC;
 }
 
+static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
+                          unsigned CodeAddrSpace, MachineFunction *F) {
+  // To use non-coherent caching, the load has to be from global
+  // memory and we have to prove that the memory area is not written
+  // to anywhere for the duration of the kernel call, not even after
+  // the load.
+  //
+  // To ensure that there are no writes to the memory, we require the
+  // underlying pointer to be a noalias (__restrict) kernel parameter
+  // that is never used for a write. We can only do this for kernel
+  // functions since from within a device function, we cannot know if
+  // there were or will be writes to the memory from the caller - or we
+  // could, but then we would have to do inter-procedural analysis.
+  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
+      !isKernelFunction(*F->getFunction())) {
+    return false;
+  }
+
+  // We use GetUnderlyingObjects() here instead of
+  // GetUnderlyingObject() mainly because the former looks through phi
+  // nodes while the latter does not. We need to look through phi
+  // nodes to handle pointer induction variables.
+  SmallVector<Value *, 8> Objs;
+  GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
+                       Objs, F->getDataLayout());
+  for (Value *Obj : Objs) {
+    auto *A = dyn_cast<const Argument>(Obj);
+    if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
+  }
+
+  return true;
+}
+
 SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
   unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   switch (IID) {
@@ -638,6 +673,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
   // Address Space Setting
   unsigned int codeAddrSpace = getCodeAddrSpace(LD);
 
+  if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
+    return SelectLDGLDU(N);
+  }
+
   // Volatile Setting
   // - .volatile is only availalble for .global and .shared
   bool isVolatile = LD->isVolatile();
@@ -872,6 +911,10 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
   // Address Space Setting
   unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
 
+  if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
+    return SelectLDGLDU(N);
+  }
+
   // Volatile Setting
   // - .volatile is only availalble for .global and .shared
   bool IsVolatile = MemSD->isVolatile();
@@ -1425,6 +1468,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::LOAD:
       case ISD::INTRINSIC_W_CHAIN:
         if (IsLDG) {
           switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1474,6 +1518,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           }
         }
         break;
+      case NVPTXISD::LoadV2:
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1522,6 +1567,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           break;
         }
         break;
+      case NVPTXISD::LoadV4:
       case NVPTXISD::LDGV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1563,6 +1609,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::LOAD:
       case ISD::INTRINSIC_W_CHAIN:
         if (IsLDG) {
           switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1612,6 +1659,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           }
         }
         break;
+      case NVPTXISD::LoadV2:
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1660,6 +1708,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           break;
         }
         break;
+      case NVPTXISD::LoadV4:
       case NVPTXISD::LDGV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1707,6 +1756,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::LOAD:
       case ISD::INTRINSIC_W_CHAIN:
         if (IsLDG) {
           switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1756,6 +1806,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           }
         }
         break;
+      case NVPTXISD::LoadV2:
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1804,6 +1855,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           break;
         }
         break;
+      case NVPTXISD::LoadV4:
       case NVPTXISD::LDGV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1845,6 +1897,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
       switch (N->getOpcode()) {
       default:
         return nullptr;
+      case ISD::LOAD:
       case ISD::INTRINSIC_W_CHAIN:
         if (IsLDG) {
           switch (EltVT.getSimpleVT().SimpleTy) {
@@ -1894,6 +1947,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           }
         }
         break;
+      case NVPTXISD::LoadV2:
       case NVPTXISD::LDGV2:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -1942,6 +1996,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
           break;
         }
         break;
+      case NVPTXISD::LoadV4:
       case NVPTXISD::LDGV4:
         switch (EltVT.getSimpleVT().SimpleTy) {
         default:
@@ -5039,7 +5094,7 @@ bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
   }
   if (!Src)
     return false;
-  if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+  if (auto *PT = dyn_cast<PointerType>(Src->getType()))
     return (PT->getAddressSpace() == spN);
   return false;
 }
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b75cf4040312..766369631e14 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -124,6 +124,10 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // condition branches.
   setJumpIsExpensive(true);
 
+  // Wide divides are _very_ slow. Try to reduce the width of the divide if
+  // possible.
+  addBypassSlowDiv(64, 32);
+
   // By default, use the Source scheduling
   if (sched4reg)
     setSchedulingPreference(Sched::RegPressure);
@@ -275,6 +279,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SHL);
+  setTargetDAGCombine(ISD::SELECT);
 
   // Now deduce the information based on the above mentioned
   // actions
@@ -910,7 +915,7 @@ std::string NVPTXTargetLowering::getPrototype(
     O << "(";
     if (retTy->isFloatingPointTy() || retTy->isIntegerTy()) {
       unsigned size = 0;
-      if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
+      if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
         size = ITy->getBitWidth();
         if (size < 32)
           size = 32;
@@ -981,7 +986,7 @@ std::string NVPTXTargetLowering::getPrototype(
       O << "_";
       continue;
     }
-    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+    auto *PTy = dyn_cast<PointerType>(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
     Type *ETy = PTy->getElementType();
 
@@ -1318,7 +1323,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // struct or vector
     SmallVector<EVT, 16> vtparts;
     SmallVector<uint64_t, 16> Offsets;
-    const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
+    auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
     assert(PTy && "Type of a byval parameter should be pointer");
     ComputePTXValueVTs(*this, DAG.getDataLayout(), PTy->getElementType(),
                        vtparts, &Offsets, 0);
@@ -2007,15 +2012,6 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
   return Result;
 }
 
-SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
-                                        int idx, EVT v) const {
-  std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
-  std::stringstream suffix;
-  suffix << idx;
-  *name += suffix.str();
-  return DAG.getTargetExternalSymbol(name->c_str(), v);
-}
-
 SDValue
 NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   std::string ParamSym;
@@ -2029,10 +2025,6 @@ NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
   return DAG.getTargetExternalSymbol(SavedStr->c_str(), v);
 }
 
-SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
-  return getExtSymb(DAG, ".HLPPARAM", idx);
-}
-
 // Check to see if the kernel argument is image*_t or sampler_t
 
 bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
@@ -2040,8 +2032,8 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
                                               "struct._image3d_t",
                                               "struct._sampler_t" };
 
-  const Type *Ty = arg->getType();
-  const PointerType *PTy = dyn_cast<PointerType>(Ty);
+  Type *Ty = arg->getType();
+  auto *PTy = dyn_cast<PointerType>(Ty);
 
   if (!PTy)
     return false;
@@ -2049,14 +2041,11 @@ bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
   if (!context)
     return false;
 
-  const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
+  auto *STy = dyn_cast<StructType>(PTy->getElementType());
   const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
 
-  for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
-    if (TypeName == specialTypes[i])
-      return true;
-
-  return false;
+  return std::find(std::begin(specialTypes), std::end(specialTypes),
+                   TypeName) != std::end(specialTypes);
 }
 
 SDValue NVPTXTargetLowering::LowerFormalArguments(
@@ -2082,10 +2071,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
 
   std::vector<Type *> argTypes;
   std::vector<const Argument *> theArgs;
-  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-       I != E; ++I) {
-    theArgs.push_back(I);
-    argTypes.push_back(I->getType());
+  for (const Argument &I : F->args()) {
+    theArgs.push_back(&I);
+    argTypes.push_back(I.getType());
   }
   // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
   // Ins.size() will be larger
@@ -2545,20 +2533,6 @@ void NVPTXTargetLowering::LowerAsmOperandForConstraint(
     TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-// NVPTX suuport vector of legal types of any length in Intrinsics because the
-// NVPTX specific type legalizer
-// will legalize them to the PTX supported length.
-bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
-  if (isTypeLegal(VT))
-    return true;
-  if (VT.isVector()) {
-    MVT eVT = VT.getVectorElementType();
-    if (isTypeLegal(eVT))
-      return true;
-  }
-  return false;
-}
-
 static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
   switch (Intrinsic) {
   default:
@@ -3747,9 +3721,7 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   // - [immAddr]
 
   if (AM.BaseGV) {
-    if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
-      return false;
-    return true;
+    return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
   }
 
   switch (AM.Scale) {
@@ -3820,11 +3792,6 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 }
 
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
-  return 4;
-}
-
 //===----------------------------------------------------------------------===//
 //                         NVPTX DAG Combining
 //===----------------------------------------------------------------------===//
@@ -4057,6 +4024,67 @@ static SDValue PerformANDCombine(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformSELECTCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI) {
+  // Currently this detects patterns for integer min and max and
+  // lowers them to PTX-specific intrinsics that enable hardware
+  // support.
+
+  const SDValue Cond = N->getOperand(0);
+  if (Cond.getOpcode() != ISD::SETCC) return SDValue();
+
+  const SDValue LHS = Cond.getOperand(0);
+  const SDValue RHS = Cond.getOperand(1);
+  const SDValue True = N->getOperand(1);
+  const SDValue False = N->getOperand(2);
+  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
+    return SDValue();
+
+  const EVT VT = N->getValueType(0);
+  if (VT != MVT::i32 && VT != MVT::i64) return SDValue();
+
+  const ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+  SDValue Larger;  // The larger of LHS and RHS when condition is true.
+  switch (CC) {
+    case ISD::SETULT:
+    case ISD::SETULE:
+    case ISD::SETLT:
+    case ISD::SETLE:
+      Larger = RHS;
+      break;
+
+    case ISD::SETGT:
+    case ISD::SETGE:
+    case ISD::SETUGT:
+    case ISD::SETUGE:
+      Larger = LHS;
+      break;
+
+    default:
+      return SDValue();
+  }
+  const bool IsMax = (Larger == True);
+  const bool IsSigned = ISD::isSignedIntSetCC(CC);
+
+  unsigned IntrinsicId;
+  if (VT == MVT::i32) {
+    if (IsSigned)
+      IntrinsicId = IsMax ? Intrinsic::nvvm_max_i : Intrinsic::nvvm_min_i;
+    else
+      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ui : Intrinsic::nvvm_min_ui;
+  } else {
+    assert(VT == MVT::i64);
+    if (IsSigned)
+      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ll : Intrinsic::nvvm_min_ll;
+    else
+      IntrinsicId = IsMax ? Intrinsic::nvvm_max_ull : Intrinsic::nvvm_min_ull;
+  }
+
+  SDLoc DL(N);
+  return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
+}
+
 enum OperandSignedness {
   Signed = 0,
   Unsigned,
@@ -4113,25 +4141,16 @@ static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS,
   if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) {
     APInt Val = CI->getAPIntValue();
     if (LHSSign == Unsigned) {
-      if (Val.isIntN(OptSize)) {
-        return true;
-      }
-      return false;
+      return Val.isIntN(OptSize);
     } else {
-      if (Val.isSignedIntN(OptSize)) {
-        return true;
-      }
-      return false;
+      return Val.isSignedIntN(OptSize);
     }
   } else {
     OperandSignedness RHSSign;
     if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign))
       return false;
 
-    if (LHSSign != RHSSign)
-      return false;
-
-    return true;
+    return LHSSign == RHSSign;
   }
 }
 
@@ -4247,6 +4266,8 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
       return PerformSHLCombine(N, DCI, OptLevel);
     case ISD::AND:
       return PerformANDCombine(N, DCI);
+    case ISD::SELECT:
+      return PerformSELECTCombine(N, DCI);
   }
   return SDValue();
 }
@@ -4509,25 +4530,25 @@ void NVPTXTargetLowering::ReplaceNodeResults(
 void NVPTXSection::anchor() {}
 
 NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
-  delete TextSection;
-  delete DataSection;
-  delete BSSSection;
-  delete ReadOnlySection;
+  delete static_cast<NVPTXSection *>(TextSection);
+  delete static_cast<NVPTXSection *>(DataSection);
+  delete static_cast<NVPTXSection *>(BSSSection);
+  delete static_cast<NVPTXSection *>(ReadOnlySection);
 
-  delete StaticCtorSection;
-  delete StaticDtorSection;
-  delete LSDASection;
-  delete EHFrameSection;
-  delete DwarfAbbrevSection;
-  delete DwarfInfoSection;
-  delete DwarfLineSection;
-  delete DwarfFrameSection;
-  delete DwarfPubTypesSection;
-  delete DwarfDebugInlineSection;
-  delete DwarfStrSection;
-  delete DwarfLocSection;
-  delete DwarfARangesSection;
-  delete DwarfRangesSection;
+  delete static_cast<NVPTXSection *>(StaticCtorSection);
+  delete static_cast<NVPTXSection *>(StaticDtorSection);
+  delete static_cast<NVPTXSection *>(LSDASection);
+  delete static_cast<NVPTXSection *>(EHFrameSection);
+  delete static_cast<NVPTXSection *>(DwarfAbbrevSection);
+  delete static_cast<NVPTXSection *>(DwarfInfoSection);
+  delete static_cast<NVPTXSection *>(DwarfLineSection);
+  delete static_cast<NVPTXSection *>(DwarfFrameSection);
+  delete static_cast<NVPTXSection *>(DwarfPubTypesSection);
+  delete static_cast<const NVPTXSection *>(DwarfDebugInlineSection);
+  delete static_cast<NVPTXSection *>(DwarfStrSection);
+  delete static_cast<NVPTXSection *>(DwarfLocSection);
+  delete static_cast<NVPTXSection *>(DwarfARangesSection);
+  delete static_cast<NVPTXSection *>(DwarfRangesSection);
 }
 
 MCSection *
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index e5c37321a33b..60914c1d09b4 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -441,13 +441,9 @@ public:
   SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
   SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
-                             SelectionDAG &DAG) const;
 
   const char *getTargetNodeName(unsigned Opcode) const override;
 
-  bool isTypeSupportedInIntrinsic(MVT VT) const;
-
   bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
                           unsigned Intrinsic) const override;
 
@@ -459,8 +455,13 @@ public:
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
 
-  /// getFunctionAlignment - Return the Log2 alignment of this function.
-  unsigned getFunctionAlignment(const Function *F) const;
+  bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
+    // Truncating 64-bit to 32-bit is free in SASS.
+    if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
+      return false;
+    return SrcTy->getPrimitiveSizeInBits() == 64 &&
+           DstTy->getPrimitiveSizeInBits() == 32;
+  }
 
   EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
                          EVT VT) const override {
@@ -515,11 +516,7 @@ public:
 
 private:
   const NVPTXSubtarget &STI; // cache the subtarget here
-
-  SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
-                     EVT = MVT::i32) const;
   SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
-  SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
 
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 76d6597c6e20..9f3cf4551955 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -37,30 +37,31 @@ void NVPTXInstrInfo::copyPhysReg(
   const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
   const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
 
-  if (DestRC != SrcRC)
-    report_fatal_error("Attempted to created cross-class register copy");
+  if (DestRC->getSize() != SrcRC->getSize())
+    report_fatal_error("Copy one register into another with a different width");
 
-  if (DestRC == &NVPTX::Int32RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (DestRC == &NVPTX::Int1RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (DestRC == &NVPTX::Float32RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (DestRC == &NVPTX::Int16RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (DestRC == &NVPTX::Int64RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (DestRC == &NVPTX::Float64RegsRegClass)
-    BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
-  else {
+  unsigned Op;
+  if (DestRC == &NVPTX::Int1RegsRegClass) {
+    Op = NVPTX::IMOV1rr;
+  } else if (DestRC == &NVPTX::Int16RegsRegClass) {
+    Op = NVPTX::IMOV16rr;
+  } else if (DestRC == &NVPTX::Int32RegsRegClass) {
+    Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
+                                             : NVPTX::BITCONVERT_32_F2I);
+  } else if (DestRC == &NVPTX::Int64RegsRegClass) {
+    Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
+                                             : NVPTX::BITCONVERT_64_F2I);
+  } else if (DestRC == &NVPTX::Float32RegsRegClass) {
+    Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
+                                               : NVPTX::BITCONVERT_32_I2F);
+  } else if (DestRC == &NVPTX::Float64RegsRegClass) {
+    Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
+                                               : NVPTX::BITCONVERT_64_I2F);
+  } else {
     llvm_unreachable("Bad register copy");
   }
+  BuildMI(MBB, I, DL, get(Op), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
 bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
@@ -86,27 +87,6 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
   return false;
 }
 
-bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
-  switch (MI.getOpcode()) {
-  default:
-    return false;
-  case NVPTX::INT_PTX_SREG_NTID_X:
-  case NVPTX::INT_PTX_SREG_NTID_Y:
-  case NVPTX::INT_PTX_SREG_NTID_Z:
-  case NVPTX::INT_PTX_SREG_TID_X:
-  case NVPTX::INT_PTX_SREG_TID_Y:
-  case NVPTX::INT_PTX_SREG_TID_Z:
-  case NVPTX::INT_PTX_SREG_CTAID_X:
-  case NVPTX::INT_PTX_SREG_CTAID_Y:
-  case NVPTX::INT_PTX_SREG_CTAID_Z:
-  case NVPTX::INT_PTX_SREG_NCTAID_X:
-  case NVPTX::INT_PTX_SREG_NCTAID_Y:
-  case NVPTX::INT_PTX_SREG_NCTAID_Z:
-  case NVPTX::INT_PTX_SREG_WARPSIZE:
-    return true;
-  }
-}
-
 bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
                                  unsigned &AddrSpace) const {
   bool isLoad = false;
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 179c06887198..3e407223f010 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -56,7 +56,6 @@ public:
                            unsigned &DestReg) const;
   bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
-  bool isReadSpecialReg(MachineInstr &MI) const;
 
   virtual bool CanTailMerge(const MachineInstr *MI) const;
   // Branch analysis.
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 0bf72febc4a0..f770c2acaab5 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -6,6 +6,8 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// \file
 // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
 // the size is large or is not a compile-time constant.
 //
@@ -18,19 +20,20 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 #define DEBUG_TYPE "nvptx"
 
 using namespace llvm;
 
 namespace {
+
 // actual analysis class, which is a functionpass
 struct NVPTXLowerAggrCopies : public FunctionPass {
   static char ID;
@@ -50,179 +53,299 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
     return "Lower aggregate copies/intrinsics into loops";
   }
 };
-} // namespace
 
 char NVPTXLowerAggrCopies::ID = 0;
 
-// Lower MemTransferInst or load-store pair to loop
-static void convertTransferToLoop(
-    Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
-    bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
-  Type *indType = len->getType();
+// Lower memcpy to loop.
+void convertMemCpyToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+                         Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+                         bool DstIsVolatile, LLVMContext &Context,
+                         Function &F) {
+  Type *TypeOfCopyLen = CopyLen->getType();
 
-  BasicBlock *origBB = splitAt->getParent();
-  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
-  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+  BasicBlock *OrigBB = ConvertedInst->getParent();
+  BasicBlock *NewBB =
+      ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+  BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
 
-  origBB->getTerminator()->setSuccessor(0, loopBB);
-  IRBuilder<> builder(origBB, origBB->getTerminator());
+  OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+  IRBuilder<> Builder(OrigBB->getTerminator());
 
-  // srcAddr and dstAddr are expected to be pointer types,
+  // SrcAddr and DstAddr are expected to be pointer types,
   // so no check is made here.
-  unsigned srcAS = cast<PointerType>(srcAddr->getType())->getAddressSpace();
-  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  unsigned SrcAS = cast<PointerType>(SrcAddr->getType())->getAddressSpace();
+  unsigned DstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
 
   // Cast pointers to (char *)
-  srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
-  dstAddr = builder.CreateBitCast(dstAddr, Type::getInt8PtrTy(Context, dstAS));
+  SrcAddr = Builder.CreateBitCast(SrcAddr, Builder.getInt8PtrTy(SrcAS));
+  DstAddr = Builder.CreateBitCast(DstAddr, Builder.getInt8PtrTy(DstAS));
 
-  IRBuilder<> loop(loopBB);
-  // The loop index (ind) is a phi node.
-  PHINode *ind = loop.CreatePHI(indType, 0);
-  // Incoming value for ind is 0
-  ind->addIncoming(ConstantInt::get(indType, 0), origBB);
+  IRBuilder<> LoopBuilder(LoopBB);
+  PHINode *LoopIndex = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+  LoopIndex->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), OrigBB);
 
-  // load from srcAddr+ind
+  // load from SrcAddr+LoopIndex
   // TODO: we can leverage the align parameter of llvm.memcpy for more efficient
   // word-sized loads and stores.
-  Value *val = loop.CreateLoad(loop.CreateGEP(loop.getInt8Ty(), srcAddr, ind),
-                               srcVolatile);
-  // store at dstAddr+ind
-  loop.CreateStore(val, loop.CreateGEP(loop.getInt8Ty(), dstAddr, ind),
-                   dstVolatile);
+  Value *Element =
+      LoopBuilder.CreateLoad(LoopBuilder.CreateInBoundsGEP(
+                                 LoopBuilder.getInt8Ty(), SrcAddr, LoopIndex),
+                             SrcIsVolatile);
+  // store at DstAddr+LoopIndex
+  LoopBuilder.CreateStore(Element,
+                          LoopBuilder.CreateInBoundsGEP(LoopBuilder.getInt8Ty(),
+                                                        DstAddr, LoopIndex),
+                          DstIsVolatile);
 
-  // The value for ind coming from backedge is (ind + 1)
-  Value *newind = loop.CreateAdd(ind, ConstantInt::get(indType, 1));
-  ind->addIncoming(newind, loopBB);
+  // The value for LoopIndex coming from backedge is (LoopIndex + 1)
+  Value *NewIndex =
+      LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(TypeOfCopyLen, 1));
+  LoopIndex->addIncoming(NewIndex, LoopBB);
 
-  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+  LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+                           NewBB);
 }
 
-// Lower MemSetInst to loop
-static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
-                                Value *len, Value *val, LLVMContext &Context,
-                                Function &F) {
-  BasicBlock *origBB = splitAt->getParent();
-  BasicBlock *newBB = splitAt->getParent()->splitBasicBlock(splitAt, "split");
-  BasicBlock *loopBB = BasicBlock::Create(Context, "loadstoreloop", &F, newBB);
+// Lower memmove to IR. memmove is required to correctly copy overlapping memory
+// regions; therefore, it has to check the relative positions of the source and
+// destination pointers and choose the copy direction accordingly.
+//
+// The code below is an IR rendition of this C function:
+//
+// void* memmove(void* dst, const void* src, size_t n) {
+//   unsigned char* d = dst;
+//   const unsigned char* s = src;
+//   if (s < d) {
+//     // copy backwards
+//     while (n--) {
+//       d[n] = s[n];
+//     }
+//   } else {
+//     // copy forward
+//     for (size_t i = 0; i < n; ++i) {
+//       d[i] = s[i];
+//     }
+//   }
+//   return dst;
+// }
+void convertMemMoveToLoop(Instruction *ConvertedInst, Value *SrcAddr,
+                          Value *DstAddr, Value *CopyLen, bool SrcIsVolatile,
+                          bool DstIsVolatile, LLVMContext &Context,
+                          Function &F) {
+  Type *TypeOfCopyLen = CopyLen->getType();
+  BasicBlock *OrigBB = ConvertedInst->getParent();
 
-  origBB->getTerminator()->setSuccessor(0, loopBB);
-  IRBuilder<> builder(origBB, origBB->getTerminator());
+  // Create the a comparison of src and dst, based on which we jump to either
+  // the forward-copy part of the function (if src >= dst) or the backwards-copy
+  // part (if src < dst).
+  // SplitBlockAndInsertIfThenElse conveniently creates the basic if-then-else
+  // structure. Its block terminators (unconditional branches) are replaced by
+  // the appropriate conditional branches when the loop is built.
+  ICmpInst *PtrCompare = new ICmpInst(ConvertedInst, ICmpInst::ICMP_ULT,
+                                      SrcAddr, DstAddr, "compare_src_dst");
+  TerminatorInst *ThenTerm, *ElseTerm;
+  SplitBlockAndInsertIfThenElse(PtrCompare, ConvertedInst, &ThenTerm,
+                                &ElseTerm);
 
-  unsigned dstAS = cast<PointerType>(dstAddr->getType())->getAddressSpace();
+  // Each part of the function consists of two blocks:
+  //   copy_backwards:        used to skip the loop when n == 0
+  //   copy_backwards_loop:   the actual backwards loop BB
+  //   copy_forward:          used to skip the loop when n == 0
+  //   copy_forward_loop:     the actual forward loop BB
+  BasicBlock *CopyBackwardsBB = ThenTerm->getParent();
+  CopyBackwardsBB->setName("copy_backwards");
+  BasicBlock *CopyForwardBB = ElseTerm->getParent();
+  CopyForwardBB->setName("copy_forward");
+  BasicBlock *ExitBB = ConvertedInst->getParent();
+  ExitBB->setName("memmove_done");
+
+  // Initial comparison of n == 0 that lets us skip the loops altogether. Shared
+  // between both backwards and forward copy clauses.
+  ICmpInst *CompareN =
+      new ICmpInst(OrigBB->getTerminator(), ICmpInst::ICMP_EQ, CopyLen,
+                   ConstantInt::get(TypeOfCopyLen, 0), "compare_n_to_0");
+
+  // Copying backwards.
+  BasicBlock *LoopBB =
+      BasicBlock::Create(Context, "copy_backwards_loop", &F, CopyForwardBB);
+  IRBuilder<> LoopBuilder(LoopBB);
+  PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
+  Value *IndexPtr = LoopBuilder.CreateSub(
+      LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
+  Value *Element = LoopBuilder.CreateLoad(
+      LoopBuilder.CreateInBoundsGEP(SrcAddr, IndexPtr), "element");
+  LoopBuilder.CreateStore(Element,
+                          LoopBuilder.CreateInBoundsGEP(DstAddr, IndexPtr));
+  LoopBuilder.CreateCondBr(
+      LoopBuilder.CreateICmpEQ(IndexPtr, ConstantInt::get(TypeOfCopyLen, 0)),
+      ExitBB, LoopBB);
+  LoopPhi->addIncoming(IndexPtr, LoopBB);
+  LoopPhi->addIncoming(CopyLen, CopyBackwardsBB);
+  BranchInst::Create(ExitBB, LoopBB, CompareN, ThenTerm);
+  ThenTerm->eraseFromParent();
+
+  // Copying forward.
+  BasicBlock *FwdLoopBB =
+      BasicBlock::Create(Context, "copy_forward_loop", &F, ExitBB);
+  IRBuilder<> FwdLoopBuilder(FwdLoopBB);
+  PHINode *FwdCopyPhi = FwdLoopBuilder.CreatePHI(TypeOfCopyLen, 0, "index_ptr");
+  Value *FwdElement = FwdLoopBuilder.CreateLoad(
+      FwdLoopBuilder.CreateInBoundsGEP(SrcAddr, FwdCopyPhi), "element");
+  FwdLoopBuilder.CreateStore(
+      FwdElement, FwdLoopBuilder.CreateInBoundsGEP(DstAddr, FwdCopyPhi));
+  Value *FwdIndexPtr = FwdLoopBuilder.CreateAdd(
+      FwdCopyPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_increment");
+  FwdLoopBuilder.CreateCondBr(FwdLoopBuilder.CreateICmpEQ(FwdIndexPtr, CopyLen),
+                              ExitBB, FwdLoopBB);
+  FwdCopyPhi->addIncoming(FwdIndexPtr, FwdLoopBB);
+  FwdCopyPhi->addIncoming(ConstantInt::get(TypeOfCopyLen, 0), CopyForwardBB);
+
+  BranchInst::Create(ExitBB, FwdLoopBB, CompareN, ElseTerm);
+  ElseTerm->eraseFromParent();
+}
+
+// Lower memset to loop.
+void convertMemSetToLoop(Instruction *ConvertedInst, Value *DstAddr,
+                         Value *CopyLen, Value *SetValue, LLVMContext &Context,
+                         Function &F) {
+  BasicBlock *OrigBB = ConvertedInst->getParent();
+  BasicBlock *NewBB =
+      ConvertedInst->getParent()->splitBasicBlock(ConvertedInst, "split");
+  BasicBlock *LoopBB = BasicBlock::Create(Context, "loadstoreloop", &F, NewBB);
+
+  OrigBB->getTerminator()->setSuccessor(0, LoopBB);
+  IRBuilder<> Builder(OrigBB->getTerminator());
 
   // Cast pointer to the type of value getting stored
-  dstAddr =
-      builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
+  unsigned dstAS = cast<PointerType>(DstAddr->getType())->getAddressSpace();
+  DstAddr = Builder.CreateBitCast(DstAddr,
+                                  PointerType::get(SetValue->getType(), dstAS));
 
-  IRBuilder<> loop(loopBB);
-  PHINode *ind = loop.CreatePHI(len->getType(), 0);
-  ind->addIncoming(ConstantInt::get(len->getType(), 0), origBB);
+  IRBuilder<> LoopBuilder(LoopBB);
+  PHINode *LoopIndex = LoopBuilder.CreatePHI(CopyLen->getType(), 0);
+  LoopIndex->addIncoming(ConstantInt::get(CopyLen->getType(), 0), OrigBB);
 
-  loop.CreateStore(val, loop.CreateGEP(val->getType(), dstAddr, ind), false);
+  LoopBuilder.CreateStore(
+      SetValue,
+      LoopBuilder.CreateInBoundsGEP(SetValue->getType(), DstAddr, LoopIndex),
+      false);
 
-  Value *newind = loop.CreateAdd(ind, ConstantInt::get(len->getType(), 1));
-  ind->addIncoming(newind, loopBB);
+  Value *NewIndex =
+      LoopBuilder.CreateAdd(LoopIndex, ConstantInt::get(CopyLen->getType(), 1));
+  LoopIndex->addIncoming(NewIndex, LoopBB);
 
-  loop.CreateCondBr(loop.CreateICmpULT(newind, len), loopBB, newBB);
+  LoopBuilder.CreateCondBr(LoopBuilder.CreateICmpULT(NewIndex, CopyLen), LoopBB,
+                           NewBB);
 }
 
 bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
-  SmallVector<LoadInst *, 4> aggrLoads;
-  SmallVector<MemTransferInst *, 4> aggrMemcpys;
-  SmallVector<MemSetInst *, 4> aggrMemsets;
+  SmallVector<LoadInst *, 4> AggrLoads;
+  SmallVector<MemIntrinsic *, 4> MemCalls;
 
   const DataLayout &DL = F.getParent()->getDataLayout();
   LLVMContext &Context = F.getParent()->getContext();
 
-  //
-  // Collect all the aggrLoads, aggrMemcpys and addrMemsets.
-  //
+  // Collect all aggregate loads and mem* calls.
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
          ++II) {
-      if (LoadInst *load = dyn_cast<LoadInst>(II)) {
-        if (!load->hasOneUse())
+      if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+        if (!LI->hasOneUse())
           continue;
 
-        if (DL.getTypeStoreSize(load->getType()) < MaxAggrCopySize)
+        if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
           continue;
 
-        User *use = load->user_back();
-        if (StoreInst *store = dyn_cast<StoreInst>(use)) {
-          if (store->getOperand(0) != load)
+        if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
+          if (SI->getOperand(0) != LI)
             continue;
-          aggrLoads.push_back(load);
+          AggrLoads.push_back(LI);
         }
-      } else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
-        Value *len = intr->getLength();
-        // If the number of elements being copied is greater
-        // than MaxAggrCopySize, lower it to a loop
-        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
-          if (len_int->getZExtValue() >= MaxAggrCopySize) {
-            aggrMemcpys.push_back(intr);
+      } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+        // Convert intrinsic calls with variable size or with constant size
+        // larger than the MaxAggrCopySize threshold.
+        if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
+          if (LenCI->getZExtValue() >= MaxAggrCopySize) {
+            MemCalls.push_back(IntrCall);
           }
         } else {
-          // turn variable length memcpy/memmov into loop
-          aggrMemcpys.push_back(intr);
-        }
-      } else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
-        Value *len = memsetintr->getLength();
-        if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
-          if (len_int->getZExtValue() >= MaxAggrCopySize) {
-            aggrMemsets.push_back(memsetintr);
-          }
-        } else {
-          // turn variable length memset into loop
-          aggrMemsets.push_back(memsetintr);
+          MemCalls.push_back(IntrCall);
         }
       }
     }
   }
-  if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
-      (aggrMemsets.size() == 0))
+
+  if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
     return false;
+  }
 
   //
   // Do the transformation of an aggr load/copy/set to a loop
   //
-  for (LoadInst *load : aggrLoads) {
-    StoreInst *store = dyn_cast<StoreInst>(*load->user_begin());
-    Value *srcAddr = load->getOperand(0);
-    Value *dstAddr = store->getOperand(1);
-    unsigned numLoads = DL.getTypeStoreSize(load->getType());
-    Value *len = ConstantInt::get(Type::getInt32Ty(Context), numLoads);
+  for (LoadInst *LI : AggrLoads) {
+    StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
+    Value *SrcAddr = LI->getOperand(0);
+    Value *DstAddr = SI->getOperand(1);
+    unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
+    Value *CopyLen = ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
 
-    convertTransferToLoop(store, srcAddr, dstAddr, len, load->isVolatile(),
-                          store->isVolatile(), Context, F);
+    convertMemCpyToLoop(/* ConvertedInst */ SI,
+                        /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
+                        /* CopyLen */ CopyLen,
+                        /* SrcIsVolatile */ LI->isVolatile(),
+                        /* DstIsVolatile */ SI->isVolatile(),
+                        /* Context */ Context,
+                        /* Function F */ F);
 
-    store->eraseFromParent();
-    load->eraseFromParent();
+    SI->eraseFromParent();
+    LI->eraseFromParent();
   }
 
-  for (MemTransferInst *cpy : aggrMemcpys) {
-    convertTransferToLoop(/* splitAt */ cpy,
-                          /* srcAddr */ cpy->getSource(),
-                          /* dstAddr */ cpy->getDest(),
-                          /* len */ cpy->getLength(),
-                          /* srcVolatile */ cpy->isVolatile(),
-                          /* dstVolatile */ cpy->isVolatile(),
+  // Transform mem* intrinsic calls.
+  for (MemIntrinsic *MemCall : MemCalls) {
+    if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
+      convertMemCpyToLoop(/* ConvertedInst */ Memcpy,
+                          /* SrcAddr */ Memcpy->getRawSource(),
+                          /* DstAddr */ Memcpy->getRawDest(),
+                          /* CopyLen */ Memcpy->getLength(),
+                          /* SrcIsVolatile */ Memcpy->isVolatile(),
+                          /* DstIsVolatile */ Memcpy->isVolatile(),
                           /* Context */ Context,
                           /* Function F */ F);
-    cpy->eraseFromParent();
-  }
+    } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
+      convertMemMoveToLoop(/* ConvertedInst */ Memmove,
+                           /* SrcAddr */ Memmove->getRawSource(),
+                           /* DstAddr */ Memmove->getRawDest(),
+                           /* CopyLen */ Memmove->getLength(),
+                           /* SrcIsVolatile */ Memmove->isVolatile(),
+                           /* DstIsVolatile */ Memmove->isVolatile(),
+                           /* Context */ Context,
+                           /* Function F */ F);
 
-  for (MemSetInst *memsetinst : aggrMemsets) {
-    Value *len = memsetinst->getLength();
-    Value *val = memsetinst->getValue();
-    convertMemSetToLoop(memsetinst, memsetinst->getDest(), len, val, Context,
-                        F);
-    memsetinst->eraseFromParent();
+    } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
+      convertMemSetToLoop(/* ConvertedInst */ Memset,
+                          /* DstAddr */ Memset->getRawDest(),
+                          /* CopyLen */ Memset->getLength(),
+                          /* SetValue */ Memset->getValue(),
+                          /* Context */ Context,
+                          /* Function F */ F);
+    }
+    MemCall->eraseFromParent();
   }
 
   return true;
 }
 
+} // namespace
+
+namespace llvm {
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
+}
+
+INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
+                "Lower aggregate copies, and llvm.mem* intrinsics into loops",
+                false, false)
+
 FunctionPass *llvm::createLowerAggrCopies() {
   return new NVPTXLowerAggrCopies();
 }
diff --git a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index 93d0025d8f53..624052e9b981 100644
--- a/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -81,7 +81,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
         // Check Load, Store, GEP, and BitCast Uses on alloca and make them
         // use the converted generic address, in order to expose non-generic
         // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
-        // of instructions this is unecessary and may introduce redudant
+        // of instructions this is unnecessary and may introduce redundant
         // address cast.
         const auto &AllocaUse = *UI++;
         auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
diff --git a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index b533f316d8a9..6656077348a1 100644
--- a/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -47,6 +47,36 @@
 //      ...
 //    }
 //
+// 3. Convert pointers in a byval kernel parameter to pointers in the global
+//    address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
+//
+//    struct S {
+//      int *x;
+//      int *y;
+//    };
+//    __global__ void foo(S s) {
+//      int *b = s.y;
+//      // use b
+//    }
+//
+//    "b" points to the global address space. In the IR level,
+//
+//    define void @foo({i32*, i32*}* byval %input) {
+//      %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+//      %b = load i32*, i32** %b_ptr
+//      ; use %b
+//    }
+//
+//    becomes
+//
+//    define void @foo({i32*, i32*}* byval %input) {
+//      %b_ptr = getelementptr {i32*, i32*}, {i32*, i32*}* %input, i64 0, i32 1
+//      %b = load i32*, i32** %b_ptr
+//      %b_global = addrspacecast i32* %b to i32 addrspace(1)*
+//      %b_generic = addrspacecast i32 addrspace(1)* %b_global to i32*
+//      ; use %b_generic
+//    }
+//
 // TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
 // don't cancel the addrspacecast pair this pass emits.
 //===----------------------------------------------------------------------===//
@@ -54,6 +84,7 @@
 #include "NVPTX.h"
 #include "NVPTXUtilities.h"
 #include "NVPTXTargetMachine.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -71,9 +102,12 @@ class NVPTXLowerKernelArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
   // handle byval parameters
-  void handleByValParam(Argument *);
-  // handle non-byval pointer parameters
-  void handlePointerParam(Argument *);
+  void handleByValParam(Argument *Arg);
+  // Knowing Ptr must point to the global address space, this function
+  // addrspacecasts Ptr to global and then back to generic. This allows
+  // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
+  // loads/stores that appear later.
+  void markPointerAsGlobal(Value *Ptr);
 
 public:
   static char ID; // Pass identification, replacement for typeid
@@ -104,7 +138,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
 //
 // The above code allocates some space in the stack and copies the incoming
 // struct from param space to local space.
-// Then replace all occurences of %d by %temp.
+// Then replace all occurrences of %d by %temp.
 // =============================================================================
 void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
   Function *Func = Arg->getParent();
@@ -128,26 +162,32 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
   new StoreInst(LI, AllocA, FirstInst);
 }
 
-void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
-  assert(!Arg->hasByValAttr() &&
-         "byval params should be handled by handleByValParam");
-
-  // Do nothing if the argument already points to the global address space.
-  if (Arg->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+  if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
     return;
 
-  Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
-  Instruction *ArgInGlobal = new AddrSpaceCastInst(
-      Arg, PointerType::get(Arg->getType()->getPointerElementType(),
-                            ADDRESS_SPACE_GLOBAL),
-      Arg->getName(), FirstInst);
-  Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
-                                              Arg->getName(), FirstInst);
-  // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
-  Arg->replaceAllUsesWith(ArgInGeneric);
-  ArgInGlobal->setOperand(0, Arg);
-}
+  // Deciding where to emit the addrspacecast pair.
+  BasicBlock::iterator InsertPt;
+  if (Argument *Arg = dyn_cast<Argument>(Ptr)) {
+    // Insert at the functon entry if Ptr is an argument.
+    InsertPt = Arg->getParent()->getEntryBlock().begin();
+  } else {
+    // Insert right after Ptr if Ptr is an instruction.
+    InsertPt = ++cast<Instruction>(Ptr)->getIterator();
+    assert(InsertPt != InsertPt->getParent()->end() &&
+           "We don't call this function with Ptr being a terminator.");
+  }
 
+  Instruction *PtrInGlobal = new AddrSpaceCastInst(
+      Ptr, PointerType::get(Ptr->getType()->getPointerElementType(),
+                            ADDRESS_SPACE_GLOBAL),
+      Ptr->getName(), &*InsertPt);
+  Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
+                                              Ptr->getName(), &*InsertPt);
+  // Replace with PtrInGeneric all uses of Ptr except PtrInGlobal.
+  Ptr->replaceAllUsesWith(PtrInGeneric);
+  PtrInGlobal->setOperand(0, Ptr);
+}
 
 // =============================================================================
 // Main function for this pass.
@@ -157,12 +197,32 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
   if (!isKernelFunction(F))
     return false;
 
+  if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
+    // Mark pointers in byval structs as global.
+    for (auto &B : F) {
+      for (auto &I : B) {
+        if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+          if (LI->getType()->isPointerTy()) {
+            Value *UO = GetUnderlyingObject(LI->getPointerOperand(),
+                                            F.getParent()->getDataLayout());
+            if (Argument *Arg = dyn_cast<Argument>(UO)) {
+              if (Arg->hasByValAttr()) {
+                // LI is a load from a pointer within a byval kernel parameter.
+                markPointerAsGlobal(LI);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   for (Argument &Arg : F.args()) {
     if (Arg.getType()->isPointerTy()) {
       if (Arg.hasByValAttr())
         handleByValParam(&Arg);
       else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
-        handlePointerParam(&Arg);
+        markPointerAsGlobal(&Arg);
     }
   }
   return true;
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 46b4b33e7e40..81a606d7535c 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -68,7 +68,7 @@ public:
     return false;
   }
   void visitUsedExpr(MCStreamer &Streamer) const override {};
-  MCSection *findAssociatedSection() const override { return nullptr; }
+  MCFragment *findAssociatedFragment() const override { return nullptr; }
 
   // There are no TLS NVPTXMCExprs at the moment.
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -110,7 +110,7 @@ public:
     return false;
   }
   void visitUsedExpr(MCStreamer &Streamer) const override {};
-  MCSection *findAssociatedSection() const override { return nullptr; }
+  MCFragment *findAssociatedFragment() const override { return nullptr; }
 
   // There are no TLS NVPTXMCExprs at the moment.
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 5fd69a6815a8..17019d7b364d 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -72,7 +72,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().isReturn())
+    if (I->isReturnBlock())
       TFI.emitEpilogue(MF, *I);
   }
 
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index 0d2627d62ebd..45a7309479ee 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -19,15 +19,14 @@
 #include <vector>
 
 namespace llvm {
-/// NVPTXSection - Represents a section in PTX
-/// PTX does not have sections. We create this class in order to use
-/// the ASMPrint interface.
+/// Represents a section in PTX PTX does not have sections. We create this class
+/// in order to use the ASMPrint interface.
 ///
-class NVPTXSection : public MCSection {
+class NVPTXSection final : public MCSection {
   virtual void anchor();
 public:
   NVPTXSection(SectionVariant V, SectionKind K) : MCSection(V, K, nullptr) {}
-  virtual ~NVPTXSection() {}
+  ~NVPTXSection() {}
 
   /// Override this as NVPTX has its own way of printing switching
   /// to a section.
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 248f9e117d83..aa931b134da9 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,6 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
 void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
 }
@@ -64,14 +65,15 @@ extern "C" void LLVMInitializeNVPTXTarget() {
 
   // FIXME: This pass is really intended to be invoked during IR optimization,
   // but it's very NVPTX-specific.
-  initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
-  initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
-  initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry());
-  initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
-  initializeNVPTXFavorNonGenericAddrSpacesPass(
-    *PassRegistry::getPassRegistry());
-  initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
-  initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
+  PassRegistry &PR = *PassRegistry::getPassRegistry();
+  initializeNVVMReflectPass(PR);
+  initializeGenericToNVVMPass(PR);
+  initializeNVPTXAllocaHoistingPass(PR);
+  initializeNVPTXAssignValidGlobalNamesPass(PR);
+  initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
+  initializeNVPTXLowerKernelArgsPass(PR);
+  initializeNVPTXLowerAllocaPass(PR);
+  initializeNVPTXLowerAggrCopiesPass(PR);
 }
 
 static std::string computeDataLayout(bool is64Bit) {
@@ -139,6 +141,10 @@ public:
   FunctionPass *createTargetRegisterAllocator(bool) override;
   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
+
+private:
+  // if the opt level is aggressive, add GVN; otherwise, add EarlyCSE.
+  void addEarlyCSEOrGVNPass();
 };
 } // end anonymous namespace
 
@@ -148,11 +154,18 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(NVPTXTTIImpl(this, F));
   });
 }
 
+void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
+  if (getOptLevel() == CodeGenOpt::Aggressive)
+    addPass(createGVNPass());
+  else
+    addPass(createEarlyCSEPass());
+}
+
 void NVPTXPassConfig::addIRPasses() {
   // The following passes are known to not play well with virtual regs hanging
   // around after register allocation (which in our case, is *all* registers).
@@ -161,13 +174,14 @@ void NVPTXPassConfig::addIRPasses() {
   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
   disablePass(&PrologEpilogCodeInserterID);
   disablePass(&MachineCopyPropagationID);
-  disablePass(&BranchFolderPassID);
   disablePass(&TailDuplicateID);
 
+  addPass(createNVVMReflectPass());
   addPass(createNVPTXImageOptimizerPass());
-  TargetPassConfig::addIRPasses();
   addPass(createNVPTXAssignValidGlobalNamesPass());
   addPass(createGenericToNVVMPass());
+
+  // === Propagate special address spaces ===
   addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
   // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
   // be eliminated by SROA.
@@ -178,22 +192,38 @@ void NVPTXPassConfig::addIRPasses() {
   // them unused. We could remove dead code in an ad-hoc manner, but that
   // requires manual work and might be error-prone.
   addPass(createDeadCodeEliminationPass());
+
+  // === Straight-line scalar optimizations ===
   addPass(createSeparateConstOffsetFromGEPPass());
+  addPass(createSpeculativeExecutionPass());
   // ReassociateGEPs exposes more opportunites for SLSR. See
   // the example in reassociate-geps-and-slsr.ll.
   addPass(createStraightLineStrengthReducePass());
   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
   // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
   // for some of our benchmarks.
-  if (getOptLevel() == CodeGenOpt::Aggressive)
-    addPass(createGVNPass());
-  else
-    addPass(createEarlyCSEPass());
+  addEarlyCSEOrGVNPass();
   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
   addPass(createNaryReassociatePass());
   // NaryReassociate on GEPs creates redundant common expressions, so run
   // EarlyCSE after it.
   addPass(createEarlyCSEPass());
+
+  // === LSR and other generic IR passes ===
+  TargetPassConfig::addIRPasses();
+  // EarlyCSE is not always strong enough to clean up what LSR produces. For
+  // example, GVN can combine
+  //
+  //   %0 = add %a, %b
+  //   %1 = add %b, %a
+  //
+  // and
+  //
+  //   %0 = shl nsw %a, 2
+  //   %1 = shl %a, 2
+  //
+  // but EarlyCSE can do neither of them.
+  addEarlyCSEOrGVNPass();
 }
 
 bool NVPTXPassConfig::addInstSelector() {
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 5ecdc8748830..0f88ddfaa934 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -48,8 +48,7 @@ public:
   void Initialize(MCContext &ctx, const TargetMachine &TM) override {
     TargetLoweringObjectFile::Initialize(ctx, TM);
     TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
-    DataSection =
-        new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
+    DataSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getData());
     BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
     ReadOnlySection =
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
@@ -84,7 +83,7 @@ public:
         new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
   }
 
-  MCSection *getSectionForConstant(SectionKind Kind,
+  MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                    const Constant *C) const override {
     return ReadOnlySection;
   }
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index e7250cdba5ac..6e679dd0257c 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -89,12 +89,12 @@ bool NVPTXTTIImpl::isSourceOfDivergence(const Value *V) {
   return false;
 }
 
-unsigned NVPTXTTIImpl::getArithmeticInstrCost(
+int NVPTXTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
     TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
 
diff --git a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 5bcd1e27a558..0946a3293eec 100644
--- a/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -52,7 +52,7 @@ public:
 
   bool isSourceOfDivergence(const Value *V);
 
-  unsigned getArithmeticInstrCost(
+  int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 1f178af41670..578b466568ae 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -335,106 +335,7 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
   return false;
 }
 
-bool llvm::isBarrierIntrinsic(Intrinsic::ID id) {
-  if ((id == Intrinsic::nvvm_barrier0) ||
-      (id == Intrinsic::nvvm_barrier0_popc) ||
-      (id == Intrinsic::nvvm_barrier0_and) ||
-      (id == Intrinsic::nvvm_barrier0_or) ||
-      (id == Intrinsic::cuda_syncthreads))
-    return true;
-  return false;
-}
-
-// Interface for checking all memory space transfer related intrinsics
-bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
-  if (id == Intrinsic::nvvm_ptr_local_to_gen ||
-      id == Intrinsic::nvvm_ptr_shared_to_gen ||
-      id == Intrinsic::nvvm_ptr_global_to_gen ||
-      id == Intrinsic::nvvm_ptr_constant_to_gen ||
-      id == Intrinsic::nvvm_ptr_gen_to_global ||
-      id == Intrinsic::nvvm_ptr_gen_to_shared ||
-      id == Intrinsic::nvvm_ptr_gen_to_local ||
-      id == Intrinsic::nvvm_ptr_gen_to_constant ||
-      id == Intrinsic::nvvm_ptr_gen_to_param) {
-    return true;
-  }
-
-  return false;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// provide an option to ignore GEP indicies for find out the base address only
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
-  V = V->stripPointerCasts();
-  while (true) {
-    if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
-      if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
-        V = IS->getArgOperand(0)->stripPointerCasts();
-        continue;
-      }
-    } else if (ignore_GEP_indices)
-      if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-        V = GEP->getPointerOperand()->stripPointerCasts();
-        continue;
-      }
-    break;
-  }
-  return V;
-}
-
-// consider several special intrinsics in striping pointer casts, and
-// - ignore GEP indicies for find out the base address only, and
-// - tracking PHINode
-// which could be used in simple alias disambigurate.
-const Value *
-llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
-  if (processed.find(V) != processed.end())
-    return nullptr;
-  processed.insert(V);
-
-  const Value *V2 = V->stripPointerCasts();
-  if (V2 != V && processed.find(V2) != processed.end())
-    return nullptr;
-  processed.insert(V2);
-
-  V = V2;
-
-  while (true) {
-    if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
-      if (isMemorySpaceTransferIntrinsic(IS->getIntrinsicID())) {
-        V = IS->getArgOperand(0)->stripPointerCasts();
-        continue;
-      }
-    } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
-      V = GEP->getPointerOperand()->stripPointerCasts();
-      continue;
-    } else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
-      if (V != V2 && processed.find(V) != processed.end())
-        return nullptr;
-      processed.insert(PN);
-      const Value *common = nullptr;
-      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
-        const Value *pv = PN->getIncomingValue(i);
-        const Value *base = skipPointerTransfer(pv, processed);
-        if (base) {
-          if (!common)
-            common = base;
-          else if (common != base)
-            return PN;
-        }
-      }
-      if (!common)
-        return PN;
-      V = common;
-    }
-    break;
-  }
-  return V;
-}
-
-// The following are some useful utilities for debuggung
+// The following are some useful utilities for debugging
 
 BasicBlock *llvm::getParentBlock(Value *v) {
   if (BasicBlock *B = dyn_cast<BasicBlock>(v))
@@ -466,7 +367,7 @@ void llvm::dumpBlock(Value *v, char *blockName) {
     return;
 
   for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
-    BasicBlock *B = it;
+    BasicBlock *B = &*it;
     if (strcmp(B->getName().data(), blockName) == 0) {
       B->dump();
       return;
@@ -490,7 +391,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
   return nullptr;
 }
 
-// Dump an instruction by nane
+// Dump an instruction by name
 void llvm::dumpInst(Value *base, char *instName) {
   Instruction *I = getInst(base, instName);
   if (I)
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index 7e2ce73daaa3..a5262cb7412f 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -61,27 +61,6 @@ bool isKernelFunction(const llvm::Function &);
 bool getAlign(const llvm::Function &, unsigned index, unsigned &);
 bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
 
-bool isBarrierIntrinsic(llvm::Intrinsic::ID);
-
-/// make_vector - Helper function which is useful for building temporary vectors
-/// to pass into type construction of CallInst ctors.  This turns a null
-/// terminated list of pointers (or other value types) into a real live vector.
-///
-template <typename T> inline std::vector<T> make_vector(T A, ...) {
-  va_list Args;
-  va_start(Args, A);
-  std::vector<T> Result;
-  Result.push_back(A);
-  while (T Val = va_arg(Args, T))
-    Result.push_back(Val);
-  va_end(Args);
-  return Result;
-}
-
-bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
-const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
-const Value *
-skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
 BasicBlock *getParentBlock(Value *v);
 Function *getParentFunction(Value *v);
 void dumpBlock(Value *v, char *blockName);
diff --git a/lib/Target/NVPTX/NVPTXVector.td b/lib/Target/NVPTX/NVPTXVector.td
index a237247e4833..e69bbba9f193 100644
--- a/lib/Target/NVPTX/NVPTXVector.td
+++ b/lib/Target/NVPTX/NVPTXVector.td
@@ -26,7 +26,7 @@ let isAsCheapAsAMove=1, VecInstType=isVecExtract.Value in {
 def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
   (ins V2I16Regs:$src, i8imm:$c),
                          "mov.u16 \t$dst, $src${c:vecelem};",
-                         [(set Int16Regs:$dst, (vector_extract
+                         [(set Int16Regs:$dst, (extractelt
                            (v2i16 V2I16Regs:$src), imm:$c))],
                          IMOV16rr>;
 
@@ -34,7 +34,7 @@ def V2i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
 def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
   (ins V4I16Regs:$src, i8imm:$c),
                          "mov.u16 \t$dst, $src${c:vecelem};",
-                         [(set Int16Regs:$dst, (vector_extract
+                         [(set Int16Regs:$dst, (extractelt
                            (v4i16 V4I16Regs:$src), imm:$c))],
                          IMOV16rr>;
 
@@ -42,7 +42,7 @@ def V4i16Extract : NVPTXVecInst<(outs Int16Regs:$dst),
 def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
   (ins V2I8Regs:$src, i8imm:$c),
                          "mov.u16 \t$dst, $src${c:vecelem};",
-                         [(set Int8Regs:$dst, (vector_extract
+                         [(set Int8Regs:$dst, (extractelt
                            (v2i8 V2I8Regs:$src), imm:$c))],
                          IMOV8rr>;
 
@@ -50,7 +50,7 @@ def V2i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
 def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
   (ins V4I8Regs:$src, i8imm:$c),
                          "mov.u16 \t$dst, $src${c:vecelem};",
-                         [(set Int8Regs:$dst, (vector_extract
+                         [(set Int8Regs:$dst, (extractelt
                            (v4i8 V4I8Regs:$src), imm:$c))],
                          IMOV8rr>;
 
@@ -58,7 +58,7 @@ def V4i8Extract : NVPTXVecInst<(outs Int8Regs:$dst),
 def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
   (ins V2I32Regs:$src, i8imm:$c),
                          "mov.u32 \t$dst, $src${c:vecelem};",
-                         [(set Int32Regs:$dst, (vector_extract
+                         [(set Int32Regs:$dst, (extractelt
                            (v2i32 V2I32Regs:$src), imm:$c))],
                          IMOV32rr>;
 
@@ -66,7 +66,7 @@ def V2i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
 def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
   (ins V2F32Regs:$src, i8imm:$c),
                          "mov.f32 \t$dst, $src${c:vecelem};",
-                         [(set Float32Regs:$dst, (vector_extract
+                         [(set Float32Regs:$dst, (extractelt
                            (v2f32 V2F32Regs:$src), imm:$c))],
                          FMOV32rr>;
 
@@ -74,7 +74,7 @@ def V2f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
 def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
   (ins V2I64Regs:$src, i8imm:$c),
                          "mov.u64 \t$dst, $src${c:vecelem};",
-                         [(set Int64Regs:$dst, (vector_extract
+                         [(set Int64Regs:$dst, (extractelt
                            (v2i64 V2I64Regs:$src), imm:$c))],
                          IMOV64rr>;
 
@@ -82,7 +82,7 @@ def V2i64Extract : NVPTXVecInst<(outs Int64Regs:$dst),
 def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
   (ins V2F64Regs:$src, i8imm:$c),
                          "mov.f64 \t$dst, $src${c:vecelem};",
-                         [(set Float64Regs:$dst, (vector_extract
+                         [(set Float64Regs:$dst, (extractelt
                            (v2f64 V2F64Regs:$src), imm:$c))],
                          FMOV64rr>;
 
@@ -90,7 +90,7 @@ def V2f64Extract : NVPTXVecInst<(outs Float64Regs:$dst),
 def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
   (ins V4I32Regs:$src, i8imm:$c),
                          "mov.u32 \t$dst, $src${c:vecelem};",
-                         [(set Int32Regs:$dst, (vector_extract
+                         [(set Int32Regs:$dst, (extractelt
                            (v4i32 V4I32Regs:$src), imm:$c))],
                          IMOV32rr>;
 
@@ -98,7 +98,7 @@ def V4i32Extract : NVPTXVecInst<(outs Int32Regs:$dst),
 def V4f32Extract : NVPTXVecInst<(outs Float32Regs:$dst),
   (ins V4F32Regs:$src, i8imm:$c),
                          "mov.f32 \t$dst, $src${c:vecelem};",
-                         [(set Float32Regs:$dst, (vector_extract
+                         [(set Float32Regs:$dst, (extractelt
                            (v4f32 V4F32Regs:$src), imm:$c))],
                          FMOV32rr>;
 }
@@ -110,8 +110,7 @@ def V2i8Insert : NVPTXVecInst<(outs V2I8Regs:$dst),
         "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
         "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
        [(set V2I8Regs:$dst,
-         (vector_insert V2I8Regs:$src, Int8Regs:$val, imm:$c))],
-                         IMOV8rr>;
+         (insertelt V2I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
 
 // Insert v4i8
 def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
@@ -119,8 +118,7 @@ def V4i8Insert : NVPTXVecInst<(outs V4I8Regs:$dst),
                        "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
        [(set V4I8Regs:$dst,
-         (vector_insert V4I8Regs:$src, Int8Regs:$val, imm:$c))],
-                         IMOV8rr>;
+         (insertelt V4I8Regs:$src, Int8Regs:$val, imm:$c))], IMOV8rr>;
 
 // Insert v2i16
 def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
@@ -128,8 +126,8 @@ def V2i16Insert : NVPTXVecInst<(outs V2I16Regs:$dst),
                        "mov.v2.u16 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
        [(set V2I16Regs:$dst,
-         (vector_insert V2I16Regs:$src, Int16Regs:$val, imm:$c))],
-                         IMOV16rr>;
+         (insertelt V2I16Regs:$src, Int16Regs:$val, imm:$c))],
+                    IMOV16rr>;
 
 // Insert v4i16
 def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
@@ -137,8 +135,8 @@ def V4i16Insert : NVPTXVecInst<(outs V4I16Regs:$dst),
                        "mov.v4.u16 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u16 \t$dst${c:vecelem}, $val;",
        [(set V4I16Regs:$dst,
-         (vector_insert V4I16Regs:$src, Int16Regs:$val, imm:$c))],
-                         IMOV16rr>;
+         (insertelt V4I16Regs:$src, Int16Regs:$val, imm:$c))],
+                    IMOV16rr>;
 
 // Insert v2i32
 def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
@@ -146,8 +144,8 @@ def V2i32Insert : NVPTXVecInst<(outs V2I32Regs:$dst),
                        "mov.v2.u32 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
        [(set V2I32Regs:$dst,
-         (vector_insert V2I32Regs:$src, Int32Regs:$val, imm:$c))],
-                         IMOV32rr>;
+         (insertelt V2I32Regs:$src, Int32Regs:$val, imm:$c))],
+                    IMOV32rr>;
 
 // Insert v2f32
 def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
@@ -155,8 +153,8 @@ def V2f32Insert : NVPTXVecInst<(outs V2F32Regs:$dst),
                        "mov.v2.f32 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
        [(set V2F32Regs:$dst,
-         (vector_insert V2F32Regs:$src, Float32Regs:$val, imm:$c))],
-                         FMOV32rr>;
+         (insertelt V2F32Regs:$src, Float32Regs:$val, imm:$c))],
+                    FMOV32rr>;
 
 // Insert v2i64
 def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
@@ -164,8 +162,8 @@ def V2i64Insert : NVPTXVecInst<(outs V2I64Regs:$dst),
                        "mov.v2.u64 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u64 \t$dst${c:vecelem}, $val;",
        [(set V2I64Regs:$dst,
-         (vector_insert V2I64Regs:$src, Int64Regs:$val, imm:$c))],
-                         IMOV64rr>;
+         (insertelt V2I64Regs:$src, Int64Regs:$val, imm:$c))],
+                    IMOV64rr>;
 
 // Insert v2f64
 def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
@@ -173,8 +171,8 @@ def V2f64Insert : NVPTXVecInst<(outs V2F64Regs:$dst),
                        "mov.v2.f64 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.f64 \t$dst${c:vecelem}, $val;",
        [(set V2F64Regs:$dst,
-         (vector_insert V2F64Regs:$src, Float64Regs:$val, imm:$c))],
-                         FMOV64rr>;
+         (insertelt V2F64Regs:$src, Float64Regs:$val, imm:$c))],
+                    FMOV64rr>;
 
 // Insert v4i32
 def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
@@ -182,8 +180,8 @@ def V4i32Insert : NVPTXVecInst<(outs V4I32Regs:$dst),
                        "mov.v4.u32 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.u32 \t$dst${c:vecelem}, $val;",
        [(set V4I32Regs:$dst,
-         (vector_insert V4I32Regs:$src, Int32Regs:$val, imm:$c))],
-                         IMOV32rr>;
+         (insertelt V4I32Regs:$src, Int32Regs:$val, imm:$c))],
+                    IMOV32rr>;
 
 // Insert v4f32
 def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
@@ -191,8 +189,8 @@ def V4f32Insert : NVPTXVecInst<(outs V4F32Regs:$dst),
                        "mov.v4.f32 \t${dst:vecfull}, ${src:vecfull};"
                        "\n\tmov.f32 \t$dst${c:vecelem}, $val;",
        [(set V4F32Regs:$dst,
-         (vector_insert V4F32Regs:$src, Float32Regs:$val, imm:$c))],
-                         FMOV32rr>;
+         (insertelt V4F32Regs:$src, Float32Regs:$val, imm:$c))],
+                    FMOV32rr>;
 }
 
 class BinOpAsmString<string c> {
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 5e375b7852e4..20ab5db584d2 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -109,10 +109,10 @@ void NVVMReflect::setVarMap() {
   for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) {
     DEBUG(dbgs() << "Option : "  << ReflectList[i] << "\n");
     SmallVector<StringRef, 4> NameValList;
-    StringRef(ReflectList[i]).split(NameValList, ",");
+    StringRef(ReflectList[i]).split(NameValList, ',');
     for (unsigned j = 0, ej = NameValList.size(); j != ej; ++j) {
       SmallVector<StringRef, 2> NameValPair;
-      NameValList[j].split(NameValPair, "=");
+      NameValList[j].split(NameValPair, '=');
       assert(NameValPair.size() == 2 && "name=val expected");
       std::stringstream ValStream(NameValPair[1]);
       int Val;
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a699a55d3cbf..220c70a48542 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -243,7 +243,6 @@ namespace {
 struct PPCOperand;
 
 class PPCAsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
   const MCInstrInfo &MII;
   bool IsPPC64;
   bool IsDarwin;
@@ -291,9 +290,9 @@ class PPCAsmParser : public MCTargetAsmParser {
 
 
 public:
-  PPCAsmParser(MCSubtargetInfo &STI, MCAsmParser &, const MCInstrInfo &MII,
-               const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(STI), MII(MII) {
+  PPCAsmParser(const MCSubtargetInfo &STI, MCAsmParser &,
+               const MCInstrInfo &MII, const MCTargetOptions &Options)
+    : MCTargetAsmParser(Options, STI), MII(MII) {
     // Check for 64-bit vs. 32-bit pointer mode.
     Triple TheTriple(STI.getTargetTriple());
     IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
@@ -1185,7 +1184,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     break;
   }
   case PPC::MFTB: {
-    if (STI.getFeatureBits()[PPC::FeatureMFTB]) {
+    if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) {
       assert(Inst.getNumOperands() == 2 && "Expecting two operands");
       Inst.setOpcode(PPC::MFSPR);
     }
@@ -1205,7 +1204,7 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     // Post-process instructions (typically extended mnemonics)
     ProcessInstruction(Inst, Operands);
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
+    Out.EmitInstruction(Inst, getSTI());
     return false;
   case Match_MissingFeature:
     return Error(IDLoc, "instruction use requires an option to be enabled");
@@ -1690,7 +1689,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   //  where th can be omitted when it is 0. dcbtst is the same. We take the
   //  server form to be the default, so swap the operands if we're parsing for
   //  an embedded core (they'll be swapped again upon printing).
-  if (STI.getFeatureBits()[PPC::FeatureBookE] &&
+  if (getSTI().getFeatureBits()[PPC::FeatureBookE] &&
       Operands.size() == 4 &&
       (Name == "dcbt" || Name == "dcbtst")) {
     std::swap(Operands[1], Operands[3]);
@@ -1730,10 +1729,19 @@ bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
+      SMLoc ExprLoc = getLexer().getLoc();
       if (getParser().parseExpression(Value))
         return false;
 
-      getParser().getStreamer().EmitValue(Value, Size);
+      if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size);
+      } else {
+        getStreamer().EmitValue(Value, Size, ExprLoc);
+      }
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index c0c83cc258b8..c31ababafbe7 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -13,6 +13,7 @@ tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(PowerPCCommonTableGen)
 
 add_llvm_target(PowerPCCodeGen
+  PPCBoolRetToInt.cpp
   PPCAsmPrinter.cpp
   PPCBranchSelector.cpp
   PPCCTRLoops.cpp
@@ -27,6 +28,7 @@ add_llvm_target(PowerPCCodeGen
   PPCLoopPreIncPrep.cpp
   PPCMCInstLower.cpp
   PPCMachineFunctionInfo.cpp
+  PPCMIPeephole.cpp
   PPCRegisterInfo.cpp
   PPCSubtarget.cpp
   PPCTargetMachine.cpp
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 93a503c3758d..1fc84fb76551 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -401,8 +401,6 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
       decodeInstruction(DecoderTableQPX32, MI, Inst, Address, this, STI);
     if (result != MCDisassembler::Fail)
       return result;
-
-    MI.clear();
   }
 
   return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8e1878344302..53eb727d0b07 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -18,8 +18,6 @@
 
 namespace llvm {
 
-class MCOperand;
-
 class PPCInstPrinter : public MCInstPrinter {
   bool IsDarwin;
 public:
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 992be5b966c1..dd994956870f 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -113,6 +113,10 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
         break;
       }
       break;
+    case PPC::fixup_ppc_half16ds:
+      Target.print(errs());
+      errs() << '\n';
+      report_fatal_error("Invalid PC-relative half16ds relocation");
     case FK_Data_4:
     case FK_PCRel_4:
       Type = ELF::R_PPC_REL32;
@@ -305,13 +309,13 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
         break;
       case MCSymbolRefExpr::VK_GOT:
         Type = ELF::R_PPC64_GOT16_DS;
-	break;
+        break;
       case MCSymbolRefExpr::VK_PPC_GOT_LO:
         Type = ELF::R_PPC64_GOT16_LO_DS;
         break;
       case MCSymbolRefExpr::VK_PPC_TOC:
         Type = ELF::R_PPC64_TOC16_DS;
-	break;
+        break;
       case MCSymbolRefExpr::VK_PPC_TOC_LO:
         Type = ELF::R_PPC64_TOC16_LO_DS;
         break;
@@ -372,16 +376,16 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
         break;
       case MCSymbolRefExpr::VK_None:
         Type = ELF::R_PPC64_ADDR64;
-	break;
+        break;
       case MCSymbolRefExpr::VK_PPC_DTPMOD:
         Type = ELF::R_PPC64_DTPMOD64;
-	break;
+        break;
       case MCSymbolRefExpr::VK_PPC_TPREL:
         Type = ELF::R_PPC64_TPREL64;
-	break;
+        break;
       case MCSymbolRefExpr::VK_PPC_DTPREL:
         Type = ELF::R_PPC64_DTPREL64;
-	break;
+        break;
       }
       break;
     case FK_Data_4:
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 86ad3859b72c..e252ac944d40 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -20,18 +20,19 @@
 namespace llvm {
 class Triple;
 
-  class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
-    virtual void anchor();
+class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+  virtual void anchor();
 
-  public:
-    explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
-  };
+public:
+  explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple &);
+};
 
-  class PPCELFMCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit PPCELFMCAsmInfo(bool is64Bit, const Triple&);
-  };
+class PPCELFMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit PPCELFMCAsmInfo(bool is64Bit, const Triple &);
+};
 
 } // namespace llvm
 
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index a641780516b3..d42a111cc43e 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -82,8 +82,8 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCSection *findAssociatedSection() const override {
-    return getSubExpr()->findAssociatedSection();
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
   }
 
   // There are no TLS PPCMCExprs at the moment.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index 9d7289658f0f..b54a0e1b86b1 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -241,12 +241,12 @@ bool PPCMachObjectWriter::recordScatteredRelocation(
     if (FixupOffset > 0xffffff) {
       char Buffer[32];
       format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+      Asm.getContext().reportError(Fixup.getLoc(),
                                   Twine("Section too large, can't encode "
                                         "r_address (") +
                                       Buffer + ") into 24 bits of scattered "
                                                "relocation entry.");
-      llvm_unreachable("fatal error returned?!");
+      return false;
     }
 
     // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 6075631a541f..acea600fbb0d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -56,6 +56,14 @@ namespace PPC {
     PRED_BIT_UNSET = 1025
   };
   
+  // Bit for branch taken (plus) or not-taken (minus) hint
+  enum BranchHintBit {
+    BR_NO_HINT       = 0x0,
+    BR_NONTAKEN_HINT = 0x2,
+    BR_TAKEN_HINT    = 0x3,
+    BR_HINT_MASK     = 0X3
+  };
+
   /// Invert the specified predicate.  != -> ==, < -> >=.
   Predicate InvertPredicate(Predicate Opcode);
 
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index ae8d8b4f5dfe..a259ed3fd327 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -41,13 +41,16 @@ namespace llvm {
   FunctionPass *createPPCVSXCopyPass();
   FunctionPass *createPPCVSXFMAMutatePass();
   FunctionPass *createPPCVSXSwapRemovalPass();
+  FunctionPass *createPPCMIPeepholePass();
   FunctionPass *createPPCBranchSelectionPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCTLSDynamicCallPass();
+  FunctionPass *createPPCBoolRetToIntPass();
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
 
   void initializePPCVSXFMAMutatePass(PassRegistry&);
+  void initializePPCBoolRetToIntPass(PassRegistry&);
   extern char &PPCVSXFMAMutateID;
 
   namespace PPCII {
diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td
index 641b2377de40..b03be12cfd97 100644
--- a/lib/Target/PowerPC/PPC.td
+++ b/lib/Target/PowerPC/PPC.td
@@ -50,6 +50,8 @@ def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
 
 def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
                                         "Enable 64-bit instructions">;
+def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                              "Use software emulation for floating point">;                                        
 def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
                               "Enable 64-bit registers usage for ppc32 [beta]">;
 def FeatureCRBits    : SubtargetFeature<"crbits", "UseCRBits", "true",
@@ -137,6 +139,12 @@ def FeatureHTM : SubtargetFeature<"htm", "HasHTM", "true",
                                   "Enable Hardware Transactional Memory instructions">;
 def FeatureMFTB   : SubtargetFeature<"", "FeatureMFTB", "true",
                                         "Implement mftb using the mfspr instruction">;
+def FeatureFusion : SubtargetFeature<"fusion", "HasFusion", "true",
+                                     "Target supports add/load integer fusion.">;
+def FeatureFloat128 :
+  SubtargetFeature<"float128", "HasFloat128", "true",
+                   "Enable the __float128 data type for IEEE-754R Binary128.",
+                   [FeatureVSX]>;
 
 def DeprecatedDST    : SubtargetFeature<"", "DeprecatedDST", "true",
   "Treat vector data stream cache control instructions as deprecated">;
@@ -168,7 +176,8 @@ def ProcessorFeatures {
        FeatureMFTB, DeprecatedDST];
   list<SubtargetFeature> Power8SpecificFeatures =
       [DirectivePwr8, FeatureP8Altivec, FeatureP8Vector, FeatureP8Crypto,
-       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic];
+       FeatureHTM, FeatureDirectMove, FeatureICBT, FeaturePartwordAtomic,
+       FeatureFusion];
   list<SubtargetFeature> Power8FeatureList =
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
 }
@@ -309,7 +318,7 @@ def : ProcessorModel<"g5", G5Model,
                    Feature64Bit /*, Feature64BitRegs */,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"e500mc", PPCE500mcModel,
-                  [DirectiveE500mc, FeatureMFOCRF,
+                  [DirectiveE500mc,
                    FeatureSTFIWX, FeatureICBT, FeatureBookE, 
                    FeatureISEL, FeatureMFTB]>;
 def : ProcessorModel<"e5500", PPCE5500Model,
@@ -403,6 +412,7 @@ def PPCAsmParserVariant : AsmParserVariant {
   // InstAlias definitions use immediate literals.  Set RegisterPrefix
   // so that those are not misinterpreted as registers.
   string RegisterPrefix = "%";
+  string BreakCharacters = ".";
 }
 
 def PPC : Target {
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 8e118ec27e67..9a63c14b5053 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -65,19 +65,20 @@ using namespace llvm;
 #define DEBUG_TYPE "asmprinter"
 
 namespace {
-  class PPCAsmPrinter : public AsmPrinter {
-  protected:
-    MapVector<MCSymbol*, MCSymbol*> TOC;
-    const PPCSubtarget *Subtarget;
-    StackMaps SM;
-  public:
-    explicit PPCAsmPrinter(TargetMachine &TM,
-                           std::unique_ptr<MCStreamer> Streamer)
-        : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+class PPCAsmPrinter : public AsmPrinter {
+protected:
+  MapVector<MCSymbol *, MCSymbol *> TOC;
+  const PPCSubtarget *Subtarget;
+  StackMaps SM;
 
-    const char *getPassName() const override {
-      return "PowerPC Assembly Printer";
-    }
+public:
+  explicit PPCAsmPrinter(TargetMachine &TM,
+                         std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
+
+  const char *getPassName() const override {
+    return "PowerPC Assembly Printer";
+  }
 
     MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
@@ -94,10 +95,8 @@ namespace {
 
     void EmitEndOfAsmFile(Module &M) override;
 
-    void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                       const MachineInstr &MI);
-    void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                         const MachineInstr &MI);
+    void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
+    void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
     void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
     bool runOnMachineFunction(MachineFunction &MF) override {
       Subtarget = &MF.getSubtarget<PPCSubtarget>();
@@ -157,15 +156,15 @@ static const char *stripRegisterPrefix(const char *RegName) {
       return RegName + 1;
     case 'c': if (RegName[1] == 'r') return RegName + 2;
   }
-  
+
   return RegName;
 }
 
 void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
                                  raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const MachineOperand &MO = MI->getOperand(OpNo);
-  
+
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
     const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
@@ -184,8 +183,8 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     MO.getMBB()->getSymbol()->print(O, MAI);
     return;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
+    O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
     return;
   case MachineOperand::MO_BlockAddress:
     GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
@@ -200,19 +199,19 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         !GV->isStrongDefinitionForLinker()) {
       if (!GV->hasHiddenVisibility()) {
         SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>()
-            .getGVStubEntry(SymToPrint);
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+            MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(
+                SymToPrint);
         if (!StubSym.getPointer())
           StubSym = MachineModuleInfoImpl::
             StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
       } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
                  GV->hasAvailableExternallyLinkage()) {
         SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
-        
-        MachineModuleInfoImpl::StubValueTy &StubSym = 
-          MMI->getObjFileInfo<MachineModuleInfoMachO>().
-                    getHiddenGVStubEntry(SymToPrint);
+
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+            MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
+                SymToPrint);
         if (!StubSym.getPointer())
           StubSym = MachineModuleInfoImpl::
             StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
@@ -295,16 +294,16 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
       }
     case 'U': // Print 'u' for update form.
     case 'X': // Print 'x' for indexed form.
-      {
-	// FIXME: Currently for PowerPC memory operands are always loaded
-	// into a register, so we never get an update or indexed form.
-	// This is bad even for offset forms, since even if we know we
-	// have a value in -16(r1), we will generate a load into r<n>
-	// and then load from 0(r<n>).  Until that issue is fixed,
-	// tolerate 'U' and 'X' but don't output anything.
-	assert(MI->getOperand(OpNo).isReg());
-	return false;
-      }
+    {
+      // FIXME: Currently for PowerPC memory operands are always loaded
+      // into a register, so we never get an update or indexed form.
+      // This is bad even for offset forms, since even if we know we
+      // have a value in -16(r1), we will generate a load into r<n>
+      // and then load from 0(r<n>).  Until that issue is fixed,
+      // tolerate 'U' and 'X' but don't output anything.
+      assert(MI->getOperand(OpNo).isReg());
+      return false;
+    }
     }
   }
 
@@ -315,7 +314,6 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
-
 /// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
 /// exists for it.  If not, create one.  Then return a symbol that references
 /// the TOC entry.
@@ -330,8 +328,7 @@ void PPCAsmPrinter::EmitEndOfAsmFile(Module &M) {
   SM.serializeToStackMapSection();
 }
 
-void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
-                                  const MachineInstr &MI) {
+void PPCAsmPrinter::LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI) {
   unsigned NumNOPBytes = MI.getOperand(1).getImm();
 
   SM.recordStackMap(MI);
@@ -353,13 +350,12 @@ void PPCAsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
 
   // Emit nops.
   for (unsigned i = 0; i < NumNOPBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
 // Lower a patchpoint of the form:
 // [<def>], <id>, <numBytes>, <target>, <numArgs>
-void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
-                                    const MachineInstr &MI) {
+void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
   SM.recordPatchPoint(MI);
   PatchPointOpers Opers(&MI);
 
@@ -375,60 +371,59 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
       unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
       EncodedBytes = 0;
       // Materialize the jump address:
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI8)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI8)
                                       .addReg(ScratchReg)
                                       .addImm((CallTarget >> 32) & 0xFFFF));
       ++EncodedBytes;
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::RLDIC)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::RLDIC)
                                       .addReg(ScratchReg)
                                       .addReg(ScratchReg)
                                       .addImm(32).addImm(16));
       ++EncodedBytes;
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORIS8)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORIS8)
                                       .addReg(ScratchReg)
                                       .addReg(ScratchReg)
                                       .addImm((CallTarget >> 16) & 0xFFFF));
       ++EncodedBytes;
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ORI8)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ORI8)
                                       .addReg(ScratchReg)
                                       .addReg(ScratchReg)
                                       .addImm(CallTarget & 0xFFFF));
 
       // Save the current TOC pointer before the remote call.
       int TOCSaveOffset = Subtarget->isELFv2ABI() ? 24 : 40;
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::STD)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::STD)
                                       .addReg(PPC::X2)
                                       .addImm(TOCSaveOffset)
                                       .addReg(PPC::X1));
       ++EncodedBytes;
 
-
       // If we're on ELFv1, then we need to load the actual function pointer
       // from the function descriptor.
       if (!Subtarget->isELFv2ABI()) {
-	// Load the new TOC pointer and the function address, but not r11
-	// (needing this is rare, and loading it here would prevent passing it
-	// via a 'nest' parameter.
-        EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+        // Load the new TOC pointer and the function address, but not r11
+        // (needing this is rare, and loading it here would prevent passing it
+        // via a 'nest' parameter.
+        EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
                                         .addReg(PPC::X2)
                                         .addImm(8)
                                         .addReg(ScratchReg));
         ++EncodedBytes;
-        EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+        EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
                                         .addReg(ScratchReg)
                                         .addImm(0)
                                         .addReg(ScratchReg));
         ++EncodedBytes;
       }
 
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR8)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::MTCTR8)
                                       .addReg(ScratchReg));
       ++EncodedBytes;
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTRL8));
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BCTRL8));
       ++EncodedBytes;
 
       // Restore the TOC pointer after the call.
-      EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LD)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
                                       .addReg(PPC::X2)
                                       .addImm(TOCSaveOffset)
                                       .addReg(PPC::X1));
@@ -439,7 +434,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     MCSymbol *MOSymbol = getSymbol(GValue);
     const MCExpr *SymVar = MCSymbolRefExpr::create(MOSymbol, OutContext);
 
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL8_NOP)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL8_NOP)
                                     .addExpr(SymVar));
     EncodedBytes += 2;
   }
@@ -454,7 +449,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
   assert((NumBytes - EncodedBytes) % 4 == 0 &&
          "Invalid number of NOP bytes requested!");
   for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
-    EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::NOP));
 }
 
 /// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
@@ -499,16 +494,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   bool isDarwin = TM.getTargetTriple().isOSDarwin();
   const Module *M = MF->getFunction()->getParent();
   PICLevel::Level PL = M->getPICLevel();
-  
+
   // Lower multi-instruction pseudo operations.
   switch (MI->getOpcode()) {
   default: break;
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
   case TargetOpcode::STACKMAP:
-    return LowerSTACKMAP(*OutStreamer, SM, *MI);
+    return LowerSTACKMAP(SM, *MI);
   case TargetOpcode::PATCHPOINT:
-    return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+    return LowerPATCHPOINT(SM, *MI);
 
   case PPC::MoveGOTtoLR: {
     // Transform %LR = MoveGOTtoLR
@@ -533,17 +528,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case PPC::MovePCtoLR:
   case PPC::MovePCtoLR8: {
     // Transform %LR = MovePCtoLR
-    // Into this, where the label is the PIC base: 
+    // Into this, where the label is the PIC base:
     //     bl L1$pb
     // L1$pb:
     MCSymbol *PICBase = MF->getPICBaseSymbol();
-    
+
     // Emit the 'bl'.
-    EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BL)
-      // FIXME: We would like an efficient form for this, so we don't have to do
-      // a lot of extra uniquing.
-      .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
-    
+    EmitToStreamer(*OutStreamer,
+                   MCInstBuilder(PPC::BL)
+                       // FIXME: We would like an efficient form for this, so we
+                       // don't have to do a lot of extra uniquing.
+                       .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
+
     // Emit the label.
     OutStreamer->EmitLabel(PICBase);
     return;
@@ -654,7 +650,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
   }
-      
+
   case PPC::ADDIStocHA: {
     // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
     LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, isDarwin);
@@ -669,28 +665,22 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
             MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA!");
     MCSymbol *MOSymbol = nullptr;
-    bool IsExternal = false;
-    bool IsNonLocalFunction = false;
-    bool IsCommon = false;
-    bool IsAvailExt = false;
+    bool GlobalToc = false;
 
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
       MOSymbol = getSymbol(GV);
-      IsExternal = GV->isDeclaration();
-      IsCommon = GV->hasCommonLinkage();
-      IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        !GV->isStrongDefinitionForLinker();
-      IsAvailExt = GV->hasAvailableExternallyLinkage();
-    } else if (MO.isCPI())
+      unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+      GlobalToc = (GVFlags & PPCII::MO_NLP_FLAG);
+    } else if (MO.isCPI()) {
       MOSymbol = GetCPISymbol(MO.getIndex());
-    else if (MO.isJTI())
+    } else if (MO.isJTI()) {
       MOSymbol = GetJTISymbol(MO.getIndex());
-    else if (MO.isBlockAddress())
+    } else if (MO.isBlockAddress()) {
       MOSymbol = GetBlockAddressSymbol(MO.getBlockAddress());
+    }
 
-    if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
-        MO.isJTI() || MO.isBlockAddress() ||
+    if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
         TM.getCodeModel() == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
 
@@ -727,13 +717,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
         MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
     }
     else if (MO.isGlobal()) {
-      const GlobalValue *GValue = MO.getGlobal();
-      MOSymbol = getSymbol(GValue);
-      if (GValue->getType()->getElementType()->isFunctionTy() ||
-          GValue->isDeclaration() || GValue->hasCommonLinkage() ||
-          GValue->hasAvailableExternallyLinkage() ||
-          TM.getCodeModel() == CodeModel::Large)
-        MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+      const GlobalValue *GV = MO.getGlobal();
+      MOSymbol = getSymbol(GV);
+      DEBUG(
+        unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+        assert((GVFlags & PPCII::MO_NLP_FLAG) &&
+               "LDtocL used on symbol that could be accessed directly is "
+               "invalid. Must match ADDIStocHA."));
+      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
     }
 
     const MCExpr *Exp =
@@ -754,21 +745,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &MO = MI->getOperand(2);
     assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
     MCSymbol *MOSymbol = nullptr;
-    bool IsExternal = false;
-    bool IsNonLocalFunction = false;
 
     if (MO.isGlobal()) {
       const GlobalValue *GV = MO.getGlobal();
+      DEBUG(
+        unsigned char GVFlags = Subtarget->classifyGlobalReference(GV);
+        assert (
+            !(GVFlags & PPCII::MO_NLP_FLAG) &&
+            "Interposable definitions must use indirect access."));
       MOSymbol = getSymbol(GV);
-      IsExternal = GV->isDeclaration();
-      IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
-        !GV->isStrongDefinitionForLinker();
-    } else if (MO.isCPI())
+    } else if (MO.isCPI()) {
       MOSymbol = GetCPISymbol(MO.getIndex());
-
-    if (IsNonLocalFunction || IsExternal ||
-        TM.getCodeModel() == CodeModel::Large)
-      MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+    }
 
     const MCExpr *Exp =
       MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
@@ -840,13 +828,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
   case PPC::PPC32GOT: {
-    MCSymbol *GOTSymbol = OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
-    const MCExpr *SymGotTlsL =
-      MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
-                              OutContext);
-    const MCExpr *SymGotTlsHA =                               
-      MCSymbolRefExpr::create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
-                              OutContext);
+    MCSymbol *GOTSymbol =
+        OutContext.getOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+    const MCExpr *SymGotTlsL = MCSymbolRefExpr::create(
+        GOTSymbol, MCSymbolRefExpr::VK_PPC_LO, OutContext);
+    const MCExpr *SymGotTlsHA = MCSymbolRefExpr::create(
+        GOTSymbol, MCSymbolRefExpr::VK_PPC_HA, OutContext);
     EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LI)
                                  .addReg(MI->getOperand(0).getReg())
                                  .addExpr(SymGotTlsL));
@@ -1079,14 +1066,14 @@ void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
 void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   // linux/ppc32 - Normal entry label.
-  if (!Subtarget->isPPC64() && 
-      (TM.getRelocationModel() != Reloc::PIC_ || 
+  if (!Subtarget->isPPC64() &&
+      (TM.getRelocationModel() != Reloc::PIC_ ||
        MF->getFunction()->getParent()->getPICLevel() == PICLevel::Small))
     return AsmPrinter::EmitFunctionEntryLabel();
 
   if (!Subtarget->isPPC64()) {
     const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
-  	if (PPCFI->usesPICBase()) {
+    if (PPCFI->usesPICBase()) {
       MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
       MCSymbol *PICBase = MF->getPICBaseSymbol();
       OutStreamer->EmitLabel(RelocSymbol);
@@ -1130,11 +1117,10 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
   OutStreamer->SwitchSection(Current.first, Current.second);
 }
 
-
 bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
 
-  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+  bool isPPC64 = DL.getPointerSizeInBits() == 64;
 
   PPCTargetStreamer &TS =
       static_cast<PPCTargetStreamer &>(*OutStreamer->getTargetStreamer());
@@ -1293,8 +1279,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
   // large data or debug section causes a branch to exceed 16M limit.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   OutStreamer->SwitchSection(TLOFMacho.getTextCoalSection());
   if (TM.getRelocationModel() == Reloc::PIC_) {
     OutStreamer->SwitchSection(
@@ -1325,7 +1311,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
 
 void PPCDarwinAsmPrinter::
 EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
 
   // Construct a local MCSubtargetInfo and shadow EmitToStreamer here.
   // This is because the MachineFunction won't exist (but have not yet been
@@ -1338,8 +1324,8 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     S.EmitInstruction(Inst, *STI);
   };
 
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
 
   // .lazy_symbol_pointer
   MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
@@ -1353,12 +1339,12 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       OutStreamer->SwitchSection(StubSection);
       EmitAlignment(4);
-      
+
       MCSymbol *Stub = Stubs[i].first;
       MCSymbol *RawSym = Stubs[i].second.getPointer();
       MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
       MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
-                                           
+
       OutStreamer->EmitLabel(Stub);
       OutStreamer->EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
 
@@ -1463,20 +1449,19 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
       OutStreamer->EmitSymbolValue(DyldStubBindingHelper, 4);
     }
   }
-  
+
   OutStreamer->AddBlankLine();
 }
 
-
 bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
-  bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+  bool isPPC64 = getDataLayout().getPointerSizeInBits() == 64;
 
   // Darwin/PPC always uses mach-o.
-  const TargetLoweringObjectFileMachO &TLOFMacho = 
-    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
   MachineModuleInfoMachO &MMIMacho =
-    MMI->getObjFileInfo<MachineModuleInfoMachO>();
-  
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
   MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
   if (!Stubs.empty())
     EmitFunctionStubs(Stubs);
@@ -1484,27 +1469,27 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (MAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
-         E = Personalities.end(); I != E; ++I) {
-      if (*I) {
-        MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+    for (const Function *Personality : MMI->getPersonalities()) {
+      if (Personality) {
+        MCSymbol *NLPSym =
+            getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
         MachineModuleInfoImpl::StubValueTy &StubSym =
-          MMIMacho.getGVStubEntry(NLPSym);
-        StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
+            MMIMacho.getGVStubEntry(NLPSym);
+        StubSym =
+            MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
       }
     }
   }
 
   // Output stubs for dynamically-linked functions.
   Stubs = MMIMacho.GetGVStubList();
-  
+
   // Output macho stubs for external and common global variables.
   if (!Stubs.empty()) {
     // Switch with ".non_lazy_symbol_pointer" directive.
     OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
     EmitAlignment(isPPC64 ? 3 : 2);
-    
+
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
       OutStreamer->EmitLabel(Stubs[i].first);
@@ -1535,7 +1520,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (!Stubs.empty()) {
     OutStreamer->SwitchSection(getObjFileLowering().getDataSection());
     EmitAlignment(isPPC64 ? 3 : 2);
-    
+
     for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
       // L_foo$stub:
       OutStreamer->EmitLabel(Stubs[i].first);
@@ -1573,7 +1558,7 @@ createPPCAsmPrinterPass(TargetMachine &tm,
 }
 
 // Force static initialization.
-extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
   TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
   TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
   TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
diff --git a/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
new file mode 100644
index 000000000000..7920240bc2b9
--- /dev/null
+++ b/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -0,0 +1,253 @@
+//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements converting i1 values to i32 if they could be more
+// profitably allocated as GPRs rather than CRs. This pass will become totally
+// unnecessary if Register Bank Allocation and Global Instruction Selection ever
+// go upstream.
+//
+// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// transitive closure of their uses includes only PHINodes, CallInsts, and
+// ReturnInsts. The rational is that arguments are generally passed and returned
+// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// actually save casts at the Machine Instruction level.
+//
+// It might be useful to expand this pass to add bit-wise operations to the list
+// of safe transitive closure types. Also, we miss some opportunities when LLVM
+// represents logical AND and OR operations with control flow rather than data
+// flow. For example by lowering the expression: return (A && B && C)
+//
+// as: return A ? true : B && C.
+//
+// There's code in SimplifyCFG that code be used to turn control flow in data
+// flow using SelectInsts. Selects are slow on some architectures (P7/P8), so
+// this probably isn't good in general, but for the special case of i1, the
+// Selects could be further lowered to bit operations that are fast everywhere.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+
+#define DEBUG_TYPE "bool-ret-to-int"
+
+STATISTIC(NumBoolRetPromotion,
+          "Number of times a bool feeding a RetInst was promoted to an int");
+STATISTIC(NumBoolCallPromotion,
+          "Number of times a bool feeding a CallInst was promoted to an int");
+STATISTIC(NumBoolToIntPromotion,
+          "Total number of times a bool was promoted to an int");
+
+class PPCBoolRetToInt : public FunctionPass {
+
+  static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
+    SmallPtrSet<Value *, 8> Defs;
+    SmallVector<Value *, 8> WorkList;
+    WorkList.push_back(V);
+    Defs.insert(V);
+    while (!WorkList.empty()) {
+      Value *Curr = WorkList.back();
+      WorkList.pop_back();
+      if (User *CurrUser = dyn_cast<User>(Curr))
+        for (auto &Op : CurrUser->operands())
+          if (Defs.insert(Op).second)
+            WorkList.push_back(Op);
+    }
+    return Defs;
+  }
+
+  // Translate a i1 value to an equivalent i32 value:
+  static Value *translate(Value *V) {
+    Type *Int32Ty = Type::getInt32Ty(V->getContext());
+    if (Constant *C = dyn_cast<Constant>(V))
+      return ConstantExpr::getZExt(C, Int32Ty);
+    if (PHINode *P = dyn_cast<PHINode>(V)) {
+      // Temporarily set the operands to 0. We'll fix this later in
+      // runOnUse.
+      Value *Zero = Constant::getNullValue(Int32Ty);
+      PHINode *Q =
+        PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+      for (unsigned i = 0; i < P->getNumOperands(); ++i)
+        Q->addIncoming(Zero, P->getIncomingBlock(i));
+      return Q;
+    }
+
+    Argument *A = dyn_cast<Argument>(V);
+    Instruction *I = dyn_cast<Instruction>(V);
+    assert((A || I) && "Unknown value type");
+
+    auto InstPt =
+      A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
+    return new ZExtInst(V, Int32Ty, "", InstPt);
+  }
+
+  typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
+
+  // A PHINode is Promotable if:
+  // 1. Its type is i1 AND
+  // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic
+  // AND
+  // 3. All of its operands are Constant or Argument or
+  //    CallInst or PHINode AND
+  // 4. All of its PHINode uses are Promotable AND
+  // 5. All of its PHINode operands are Promotable
+  static PHINodeSet getPromotablePHINodes(const Function &F) {
+    PHINodeSet Promotable;
+    // Condition 1
+    for (auto &BB : F)
+      for (auto &I : BB)
+        if (const PHINode *P = dyn_cast<PHINode>(&I))
+          if (P->getType()->isIntegerTy(1))
+            Promotable.insert(P);
+
+    SmallVector<const PHINode *, 8> ToRemove;
+    for (const auto &P : Promotable) {
+      // Condition 2 and 3
+      auto IsValidUser = [] (const Value *V) -> bool {
+        return isa<ReturnInst>(V) || isa<CallInst>(V) || isa<PHINode>(V) ||
+        isa<DbgInfoIntrinsic>(V);
+      };
+      auto IsValidOperand = [] (const Value *V) -> bool {
+        return isa<Constant>(V) || isa<Argument>(V) || isa<CallInst>(V) ||
+        isa<PHINode>(V);
+      };
+      const auto &Users = P->users();
+      const auto &Operands = P->operands();
+      if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
+          !std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
+        ToRemove.push_back(P);
+    }
+
+    // Iterate to convergence
+    auto IsPromotable = [&Promotable] (const Value *V) -> bool {
+      const PHINode *Phi = dyn_cast<PHINode>(V);
+      return !Phi || Promotable.count(Phi);
+    };
+    while (!ToRemove.empty()) {
+      for (auto &User : ToRemove)
+        Promotable.erase(User);
+      ToRemove.clear();
+
+      for (const auto &P : Promotable) {
+        // Condition 4 and 5
+        const auto &Users = P->users();
+        const auto &Operands = P->operands();
+        if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
+            !std::all_of(Operands.begin(), Operands.end(), IsPromotable))
+          ToRemove.push_back(P);
+      }
+    }
+
+    return Promotable;
+  }
+
+  typedef DenseMap<Value *, Value *> B2IMap;
+
+ public:
+  static char ID;
+  PPCBoolRetToInt() : FunctionPass(ID) {
+    initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) {
+    PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
+    B2IMap Bool2IntMap;
+    bool Changed = false;
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
+          if (F.getReturnType()->isIntegerTy(1))
+            Changed |=
+              runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
+
+        if (CallInst *CI = dyn_cast<CallInst>(&I))
+          for (auto &U : CI->operands())
+            if (U->getType()->isIntegerTy(1))
+              Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
+      }
+    }
+
+    return Changed;
+  }
+
+  static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+                       B2IMap &BoolToIntMap) {
+    auto Defs = findAllDefs(U);
+
+    // If the values are all Constants or Arguments, don't bother
+    if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
+      return false;
+
+    // Presently, we only know how to handle PHINode, Constant, and Arguments.
+    // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
+    // could also be handled in the future.
+    for (const auto &V : Defs)
+      if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
+        return false;
+
+    for (const auto &V : Defs)
+      if (const PHINode *P = dyn_cast<PHINode>(V))
+        if (!PromotablePHINodes.count(P))
+          return false;
+
+    if (isa<ReturnInst>(U.getUser()))
+      ++NumBoolRetPromotion;
+    if (isa<CallInst>(U.getUser()))
+      ++NumBoolCallPromotion;
+    ++NumBoolToIntPromotion;
+
+    for (const auto &V : Defs)
+      if (!BoolToIntMap.count(V))
+        BoolToIntMap[V] = translate(V);
+
+    // Replace the operands of the translated instructions. There were set to
+    // zero in the translate function.
+    for (auto &Pair : BoolToIntMap) {
+      User *First = dyn_cast<User>(Pair.first);
+      User *Second = dyn_cast<User>(Pair.second);
+      assert((!First || Second) && "translated from user to non-user!?");
+      if (First)
+        for (unsigned i = 0; i < First->getNumOperands(); ++i)
+          Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
+    }
+
+    Value *IntRetVal = BoolToIntMap[U];
+    Type *Int1Ty = Type::getInt1Ty(U->getContext());
+    Instruction *I = cast<Instruction>(U.getUser());
+    Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
+    U.set(BackToBool);
+
+    return true;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char PPCBoolRetToInt::ID = 0;
+INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
+                "Convert i1 constants to i32 if they are returned",
+                false, false)
+
+FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 940d55ac1f36..73a5305197ad 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -91,7 +91,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
   unsigned FuncSize = 0;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
-    MachineBasicBlock *MBB = MFI;
+    MachineBasicBlock *MBB = &*MFI;
 
     // The end of the previous block may have extra nops if this block has an
     // alignment requirement.
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index fd150beeb5a9..b6ac4d54d4c7 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -98,7 +98,7 @@ namespace {
       AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
     }
 
   private:
@@ -112,6 +112,7 @@ namespace {
     const DataLayout *DL;
     DominatorTree *DT;
     const TargetLibraryInfo *LibInfo;
+    bool PreserveLCSSA;
   };
 
   char PPCCTRLoops::ID = 0;
@@ -147,7 +148,7 @@ INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
                     false, false)
 
@@ -169,11 +170,12 @@ FunctionPass *llvm::createPPCCTRLoopsVerify() {
 
 bool PPCCTRLoops::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   DL = &F.getParent()->getDataLayout();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   bool MadeChange = false;
 
@@ -250,8 +252,8 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
           // If we have a call to ppc_is_decremented_ctr_nonzero, or ppc_mtctr
           // we're definitely using CTR.
           case Intrinsic::ppc_is_decremented_ctr_nonzero:
-	  case Intrinsic::ppc_mtctr:
-	    return true;
+          case Intrinsic::ppc_mtctr:
+            return true;
 
 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
@@ -369,7 +371,7 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
                                             true);
           if (VTy == MVT::Other)
             return true;
-          
+
           if (TLI->isOperationLegalOrCustom(Opcode, VTy))
             continue;
           else if (VTy.isVector() &&
@@ -537,7 +539,7 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   // the CTR register because some such uses might be reordered by the
   // selection DAG after the mtctr instruction).
   if (!Preheader || mightUseCTR(TT, Preheader))
-    Preheader = InsertPreheaderForLoop(L, this);
+    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
   if (!Preheader)
     return MadeChange;
 
@@ -554,10 +556,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
   if (!ExitCount->getType()->isPointerTy() &&
       ExitCount->getType() != CountType)
     ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
-  ExitCount = SE->getAddExpr(ExitCount,
-                             SE->getConstant(CountType, 1)); 
-  Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
-                                       Preheader->getTerminator());
+  ExitCount = SE->getAddExpr(ExitCount, SE->getOne(CountType));
+  Value *ECValue =
+      SCEVE.expandCodeFor(ExitCount, CountType, Preheader->getTerminator());
 
   IRBuilder<> CountBuilder(Preheader->getTerminator());
   Module *M = Preheader->getParent()->getParent();
@@ -677,7 +678,7 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
   // any other instructions that might clobber the ctr register.
   for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
        I != IE; ++I) {
-    MachineBasicBlock *MBB = I;
+    MachineBasicBlock *MBB = &*I;
     if (!MDT->isReachableFromEntry(MBB))
       continue;
 
@@ -694,4 +695,3 @@ bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 #endif // NDEBUG
-
diff --git a/lib/Target/PowerPC/PPCEarlyReturn.cpp b/lib/Target/PowerPC/PPCEarlyReturn.cpp
index fc89753ed94e..7cb1bb54c725 100644
--- a/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -71,15 +71,20 @@ protected:
       for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
            PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
         bool OtherReference = false, BlockChanged = false;
+
+        if ((*PI)->empty())
+          continue;
+        
         for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
-          MachineInstrBuilder MIB;
+          if (J == (*PI)->end())
+            break;
+
           if (J->getOpcode() == PPC::B) {
             if (J->getOperand(0).getMBB() == &ReturnMBB) {
               // This is an unconditional branch to the return. Replace the
               // branch with a blr.
-              MIB =
-                BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()));
-              MIB.copyImplicitOps(I);
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
+                  .copyImplicitOps(I);
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
@@ -90,10 +95,10 @@ protected:
             if (J->getOperand(2).getMBB() == &ReturnMBB) {
               // This is a conditional branch to the return. Replace the branch
               // with a bclr.
-              MIB = BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
-                      .addImm(J->getOperand(0).getImm())
-                      .addReg(J->getOperand(1).getReg());
-              MIB.copyImplicitOps(I);
+              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+                  .addImm(J->getOperand(0).getImm())
+                  .addReg(J->getOperand(1).getReg())
+                  .copyImplicitOps(I);
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
@@ -104,11 +109,11 @@ protected:
             if (J->getOperand(1).getMBB() == &ReturnMBB) {
               // This is a conditional branch to the return. Replace the branch
               // with a bclr.
-              MIB = BuildMI(**PI, J, J->getDebugLoc(),
-                            TII->get(J->getOpcode() == PPC::BC ?
-                                     PPC::BCLR : PPC::BCLRn))
-                      .addReg(J->getOperand(0).getReg());
-              MIB.copyImplicitOps(I);
+              BuildMI(
+                  **PI, J, J->getDebugLoc(),
+                  TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
+                  .addReg(J->getOperand(0).getReg())
+                  .copyImplicitOps(I);
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
@@ -146,7 +151,7 @@ protected:
       }
 
       for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
-        PredToRemove[i]->removeSuccessor(&ReturnMBB);
+        PredToRemove[i]->removeSuccessor(&ReturnMBB, true);
 
       if (Changed && !ReturnMBB.hasAddressTaken()) {
         // We now might be able to merge this blr-only block into its
@@ -156,7 +161,7 @@ protected:
           if (PrevMBB.isLayoutSuccessor(&ReturnMBB) && PrevMBB.canFallThrough()) {
             // Move the blr into the preceding block.
             PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
-            PrevMBB.removeSuccessor(&ReturnMBB);
+            PrevMBB.removeSuccessor(&ReturnMBB, true);
           }
         }
 
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index 5f236f744fc4..b451ebf7f27a 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -164,7 +164,8 @@ class PPCFastISel final : public FastISel {
                            unsigned DestReg, bool IsZExt);
     unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
     unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
-    unsigned PPCMaterializeInt(const Constant *C, MVT VT, bool UseSExt = true);
+    unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+                               bool UseSExt = true);
     unsigned PPCMaterialize32BitInt(int64_t Imm,
                                     const TargetRegisterClass *RC);
     unsigned PPCMaterialize64BitInt(int64_t Imm,
@@ -292,10 +293,7 @@ bool PPCFastISel::isValueAvailable(const Value *V) const {
     return true;
 
   const auto *I = cast<Instruction>(V);
-  if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB)
-    return true;
-
-  return false;
+  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
 }
 
 // Given a value Obj, create an Address object Addr that represents its
@@ -527,9 +525,9 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
     // VSX only provides an indexed load.
     if (Is32VSXLoad || Is64VSXLoad) return false;
 
-    MachineMemOperand *MMO =
-      FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+                                          Addr.Offset),
         MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
         MFI.getObjectAlignment(Addr.Base.FI));
 
@@ -660,9 +658,9 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
     // VSX only provides an indexed store.
     if (Is32VSXStore || Is64VSXStore) return false;
 
-    MachineMemOperand *MMO =
-      FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+    MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
+                                          Addr.Offset),
         MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
         MFI.getObjectAlignment(Addr.Base.FI));
 
@@ -774,8 +772,7 @@ bool PPCFastISel::SelectBranch(const Instruction *I) {
 
       BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
         .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
-      fastEmitBranch(FBB, DbgLoc);
-      FuncInfo.MBB->addSuccessor(TBB);
+      finishCondBranch(BI->getParent(), TBB, FBB);
       return true;
     }
   } else if (const ConstantInt *CI =
@@ -1607,21 +1604,18 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
     if (ValLocs.size() > 1)
       return false;
 
-    // Special case for returning a constant integer of any size.
-    // Materialize the constant as an i64 and copy it to the return
-    // register. We still need to worry about properly extending the sign. E.g:
-    // If the constant has only one bit, it means it is a boolean. Therefore
-    // we can't use PPCMaterializeInt because it extends the sign which will
-    // cause negations of the returned value to be incorrect as they are
-    // implemented as the flip of the least significant bit.
-    if (isa<ConstantInt>(*RV)) {
-      const Constant *C = cast<Constant>(RV);
-
+    // Special case for returning a constant integer of any size - materialize
+    // the constant as an i64 and copy it to the return register.
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
       CCValAssign &VA = ValLocs[0];
 
       unsigned RetReg = VA.getLocReg();
-      unsigned SrcReg = PPCMaterializeInt(C, MVT::i64,
-                                          VA.getLocInfo() == CCValAssign::SExt);
+      // We still need to worry about properly extending the sign. For example,
+      // we could have only a single bit or a constant that needs zero
+      // extension rather than sign extension. Make sure we pass the return
+      // value extension property to integer materialization.
+      unsigned SrcReg =
+          PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() == CCValAssign::SExt);
 
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
@@ -1761,8 +1755,8 @@ bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
 
   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
-  for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
-    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+  for (const BasicBlock *SuccBB : IB->successors())
+    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
 
   return true;
 }
@@ -1898,10 +1892,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
   CodeModel::Model CModel = TM.getCodeModel();
 
-  MachineMemOperand *MMO =
-    FuncInfo.MF->getMachineMemOperand(
-      MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
-      (VT == MVT::f32) ? 4 : 8, Align);
+  MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
+      MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+      MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Align);
 
   unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
   unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
@@ -1976,19 +1969,15 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
             HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
 
-    // If/when switches are implemented, jump tables should be handled
-    // on the "if" path here.
-    if (CModel == CodeModel::Large ||
-        (GV->getType()->getElementType()->isFunctionTy() &&
-         !GV->isStrongDefinitionForLinker()) ||
-        GV->isDeclaration() || GV->hasCommonLinkage() ||
-        GV->hasAvailableExternallyLinkage())
+    unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+    if (GVFlags & PPCII::MO_NLP_FLAG) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
               DestReg).addGlobalAddress(GV).addReg(HighPartReg);
-    else
+    } else {
       // Otherwise generate the ADDItocL.
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
               DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+    }
   }
 
   return DestReg;
@@ -2085,12 +2074,11 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
 
 // Materialize an integer constant into a register, and return
 // the register number (or zero if we failed to handle it).
-unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
-                                                           bool UseSExt) {
+unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
+                                        bool UseSExt) {
   // If we're using CR bit registers for i1 values, handle that as a special
   // case first.
   if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
-    const ConstantInt *CI = cast<ConstantInt>(C);
     unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2105,12 +2093,17 @@ unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT,
                                    &PPC::GPRCRegClass);
 
   // If the constant is in range, use a load-immediate.
-  const ConstantInt *CI = cast<ConstantInt>(C);
-  if (isInt<16>(CI->getSExtValue())) {
+  if (UseSExt && isInt<16>(CI->getSExtValue())) {
     unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
     unsigned ImmReg = createResultReg(RC);
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
-      .addImm( (UseSExt) ? CI->getSExtValue() : CI->getZExtValue() );
+        .addImm(CI->getSExtValue());
+    return ImmReg;
+  } else if (!UseSExt && isUInt<16>(CI->getZExtValue())) {
+    unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+    unsigned ImmReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+        .addImm(CI->getZExtValue());
     return ImmReg;
   }
 
@@ -2138,8 +2131,8 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
     return PPCMaterializeFP(CFP, VT);
   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return PPCMaterializeGV(GV, VT);
-  else if (isa<ConstantInt>(C))
-    return PPCMaterializeInt(C, VT, VT != MVT::i1);
+  else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+    return PPCMaterializeInt(CI, VT, VT != MVT::i1);
 
   return 0;
 }
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index 08ae7174244a..beab844c6025 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
 
 /// VRRegNo - Map from a numbered VR register to its enum value.
 ///
-static const uint16_t VRRegNo[] = {
+static const MCPhysReg VRRegNo[] = {
  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
@@ -270,7 +270,7 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
   // epilog blocks.
   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
     // If last instruction is a return instruction, add an epilogue
-    if (!I->empty() && I->back().isReturn()) {
+    if (I->isReturnBlock()) {
       bool FoundIt = false;
       for (MBBI = I->end(); MBBI != I->begin(); ) {
         --MBBI;
@@ -306,9 +306,10 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   DebugLoc dl = MI->getDebugLoc();
 
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
   unsigned UsedRegMask = 0;
   for (unsigned i = 0; i != 32; ++i)
-    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+    if (MRI.isPhysRegModified(VRRegNo[i]))
       UsedRegMask |= 1 << (31-i);
 
   // Live in and live out values already must be in the mask, so don't bother
@@ -325,7 +326,7 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
        UsedRegMask != 0 && BI != BE; ++BI) {
     const MachineBasicBlock &MBB = *BI;
-    if (MBB.empty() || !MBB.back().isReturn())
+    if (!MBB.isReturnBlock())
       continue;
     const MachineInstr &Ret = MBB.back();
     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
@@ -555,9 +556,67 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
     }
 }
 
+bool PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
+                                           bool UseAtEnd,
+                                           unsigned *ScratchRegister) const {
+  RegScavenger RS;
+  unsigned     R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
+
+  if (ScratchRegister)
+    *ScratchRegister = R0;
+
+  // If MBB is an entry or exit block, use R0 as the scratch register
+  if ((UseAtEnd && MBB->isReturnBlock()) ||
+      (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
+    return true;
+
+  RS.enterBasicBlock(MBB);
+
+  if (UseAtEnd && !MBB->empty()) {
+    // The scratch register will be used at the end of the block, so must consider
+    // all registers used within the block
+
+    MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
+    // If no terminator, back iterator up to previous instruction.
+    if (MBBI == MBB->end())
+      MBBI = std::prev(MBBI);
+
+    if (MBBI != MBB->begin())
+      RS.forward(MBBI);
+  }
+  
+  if (!RS.isRegUsed(R0)) 
+    return true;
+
+  unsigned Reg = RS.FindUnusedReg(Subtarget.isPPC64() ? &PPC::G8RCRegClass
+                                  : &PPC::GPRCRegClass);
+  
+  // Make sure the register scavenger was able to find an available register
+  // If not, use R0 but return false to indicate no register was available and
+  // R0 must be used (as recommended by the ABI)
+  if (Reg == 0)
+    return false;
+
+  if (ScratchRegister)
+    *ScratchRegister = Reg;
+
+  return true;
+}
+
+bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
+  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+  return findScratchRegister(TmpMBB, false, nullptr);
+}
+
+bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
+  MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
+
+  return findScratchRegister(TmpMBB, true, nullptr);
+}
+
 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const PPCInstrInfo &TII =
@@ -589,7 +648,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
       }
     }
 
-  // Move MBBI back to the beginning of the function.
+  // Move MBBI back to the beginning of the prologue block.
   MBBI = MBB.begin();
 
   // Work out frame sizes.
@@ -613,7 +672,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned BPReg       = RegInfo->getBaseRegister(MF);
   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
-  unsigned ScratchReg  = isPPC64 ? PPC::X0  : PPC::R0;
+  unsigned ScratchReg  = 0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
@@ -642,6 +701,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
 
+  findScratchRegister(&MBB, false, &ScratchReg);
+  assert(ScratchReg && "No scratch register!");
+         
   int LROffset = getReturnSaveOffset();
 
   int FPOffset = 0;
@@ -916,27 +978,18 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
 }
 
 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
-                                MachineBasicBlock &MBB) const {
-  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  assert(MBBI != MBB.end() && "Returning block has no terminator");
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
+  DebugLoc dl;
+
+  if (MBBI != MBB.end())
+    dl = MBBI->getDebugLoc();
+  
   const PPCInstrInfo &TII =
       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   const PPCRegisterInfo *RegInfo =
       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
-  unsigned RetOpcode = MBBI->getOpcode();
-  DebugLoc dl;
-
-  assert((RetOpcode == PPC::BLR ||
-          RetOpcode == PPC::BLR8 ||
-          RetOpcode == PPC::TCRETURNri ||
-          RetOpcode == PPC::TCRETURNdi ||
-          RetOpcode == PPC::TCRETURNai ||
-          RetOpcode == PPC::TCRETURNri8 ||
-          RetOpcode == PPC::TCRETURNdi8 ||
-          RetOpcode == PPC::TCRETURNai8) &&
-         "Can only insert epilog into returning blocks");
-
   // Get alignment info so we know how to restore the SP.
   const MachineFrameInfo *MFI = MF.getFrameInfo();
 
@@ -959,7 +1012,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
   unsigned BPReg      = RegInfo->getBaseRegister(MF);
   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
-  unsigned ScratchReg  = isPPC64 ? PPC::X0  : PPC::R0;
+  unsigned ScratchReg = 0;
   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
                                                  : PPC::MTLR );
@@ -973,10 +1026,14 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
                                                    : PPC::ADDI );
   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
                                                 : PPC::ADD4 );
-
+  
   int LROffset = getReturnSaveOffset();
 
   int FPOffset = 0;
+
+  findScratchRegister(&MBB, true, &ScratchReg);
+  assert(ScratchReg && "No scratch register!");
+  
   if (HasFP) {
     if (isSVR4ABI) {
       MachineFrameInfo *FFI = MF.getFrameInfo();
@@ -1008,25 +1065,30 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
     PBPOffset = FFI->getObjectOffset(PBPIndex);
   }
 
-  bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
-    RetOpcode == PPC::TCRETURNdi ||
-    RetOpcode == PPC::TCRETURNai ||
-    RetOpcode == PPC::TCRETURNri8 ||
-    RetOpcode == PPC::TCRETURNdi8 ||
-    RetOpcode == PPC::TCRETURNai8;
+  bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
+  
+  if (IsReturnBlock) {
+    unsigned RetOpcode = MBBI->getOpcode();
+    bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
+                      RetOpcode == PPC::TCRETURNdi ||
+                      RetOpcode == PPC::TCRETURNai ||
+                      RetOpcode == PPC::TCRETURNri8 ||
+                      RetOpcode == PPC::TCRETURNdi8 ||
+                      RetOpcode == PPC::TCRETURNai8;
 
-  if (UsesTCRet) {
-    int MaxTCRetDelta = FI->getTailCallSPDelta();
-    MachineOperand &StackAdjust = MBBI->getOperand(1);
-    assert(StackAdjust.isImm() && "Expecting immediate value.");
-    // Adjust stack pointer.
-    int StackAdj = StackAdjust.getImm();
-    int Delta = StackAdj - MaxTCRetDelta;
-    assert((Delta >= 0) && "Delta must be positive");
-    if (MaxTCRetDelta>0)
-      FrameSize += (StackAdj +Delta);
-    else
-      FrameSize += StackAdj;
+    if (UsesTCRet) {
+      int MaxTCRetDelta = FI->getTailCallSPDelta();
+      MachineOperand &StackAdjust = MBBI->getOperand(1);
+      assert(StackAdjust.isImm() && "Expecting immediate value.");
+      // Adjust stack pointer.
+      int StackAdj = StackAdjust.getImm();
+      int Delta = StackAdj - MaxTCRetDelta;
+      assert((Delta >= 0) && "Delta must be positive");
+      if (MaxTCRetDelta>0)
+        FrameSize += (StackAdj +Delta);
+      else
+        FrameSize += StackAdj;
+    }
   }
 
   // Frames of 32KB & larger require special handling because they cannot be
@@ -1066,7 +1128,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
         .addImm(0)
         .addReg(SPReg);
     }
-
   }
 
   if (MustSaveLR)
@@ -1109,52 +1170,55 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
   // call optimization
-  if (MF.getTarget().Options.GuaranteedTailCallOpt &&
-      (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
-      MF.getFunction()->getCallingConv() == CallingConv::Fast) {
-     PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
-     unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+  if (IsReturnBlock) {
+    unsigned RetOpcode = MBBI->getOpcode();
+    if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+        (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
+        MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+      PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+      unsigned CallerAllocatedAmt = FI->getMinReservedArea();
 
-     if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
-       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
-         .addReg(SPReg).addImm(CallerAllocatedAmt);
-     } else {
-       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+      if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+        BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+          .addReg(SPReg).addImm(CallerAllocatedAmt);
+      } else {
+        BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
           .addImm(CallerAllocatedAmt >> 16);
-       BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+        BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
           .addReg(ScratchReg, RegState::Kill)
           .addImm(CallerAllocatedAmt & 0xFFFF);
-       BuildMI(MBB, MBBI, dl, AddInst)
+        BuildMI(MBB, MBBI, dl, AddInst)
           .addReg(SPReg)
           .addReg(FPReg)
           .addReg(ScratchReg);
-     }
-  } else if (RetOpcode == PPC::TCRETURNdi) {
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
-  } else if (RetOpcode == PPC::TCRETURNri) {
-    MBBI = MBB.getLastNonDebugInstr();
-    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
-  } else if (RetOpcode == PPC::TCRETURNai) {
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
-  } else if (RetOpcode == PPC::TCRETURNdi8) {
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
-      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
-  } else if (RetOpcode == PPC::TCRETURNri8) {
-    MBBI = MBB.getLastNonDebugInstr();
-    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
-  } else if (RetOpcode == PPC::TCRETURNai8) {
-    MBBI = MBB.getLastNonDebugInstr();
-    MachineOperand &JumpTarget = MBBI->getOperand(0);
-    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+      }
+    } else if (RetOpcode == PPC::TCRETURNdi) {
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    } else if (RetOpcode == PPC::TCRETURNri) {
+      MBBI = MBB.getLastNonDebugInstr();
+      assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+    } else if (RetOpcode == PPC::TCRETURNai) {
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+    } else if (RetOpcode == PPC::TCRETURNdi8) {
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+        addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+    } else if (RetOpcode == PPC::TCRETURNri8) {
+      MBBI = MBB.getLastNonDebugInstr();
+      assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+    } else if (RetOpcode == PPC::TCRETURNai8) {
+      MBBI = MBB.getLastNonDebugInstr();
+      MachineOperand &JumpTarget = MBBI->getOperand(0);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+    }
   }
 }
 
@@ -1200,8 +1264,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // Reserve stack space for the PIC Base register (R30).
   // Only used in SVR4 32-bit.
   if (FI->usesPICBase()) {
-    int PBPSI = FI->getPICBasePointerSaveIndex();
-    PBPSI = MFI->CreateFixedObject(4, -8, true);
+    int PBPSI = MFI->CreateFixedObject(4, -8, true);
     FI->setPICBasePointerSaveIndex(PBPSI);
   }
 
@@ -1710,3 +1773,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   return true;
 }
+
+bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+  return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
+          MF.getSubtarget<PPCSubtarget>().isPPC64());
+}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index d6a389bfbf0d..bbe1329a5352 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -29,6 +29,30 @@ class PPCFrameLowering: public TargetFrameLowering {
   const unsigned LinkageSize;
   const unsigned BasePointerSaveOffset;
 
+  /**
+   * \brief Find a register that can be used in function prologue and epilogue
+   *
+   * Find a register that can be use as the scratch register in function
+   * prologue and epilogue to save various registers (Link Register, Base
+   * Pointer, etc.). Prefer R0, if it is available. If it is not available,
+   * then choose a different register.
+   *
+   * This method will return true if an available register was found (including
+   * R0). If no available registers are found, the method returns false and sets
+   * ScratchRegister to R0, as per the recommendation in the ABI.
+   *
+   * \param[in] MBB The machine basic block to find an available register for
+   * \param[in] UseAtEnd Specify whether the scratch register will be used at
+   *                     the end of the basic block (i.e., will the scratch
+   *                     register kill a register defined in the basic block)
+   * \param[out] ScratchRegister The scratch register to use
+   * \return true if a scratch register was found. false of a scratch register
+   *         was not found and R0 is being used as the default.
+   */
+  bool findScratchRegister(MachineBasicBlock *MBB,
+                           bool UseAtEnd,
+                           unsigned *ScratchRegister) const;
+
 public:
   PPCFrameLowering(const PPCSubtarget &STI);
 
@@ -92,6 +116,13 @@ public:
 
   const SpillSlot *
   getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
+  bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
+  /// Methods used by shrink wrapping to determine if MBB can be used for the
+  /// function prologue/epilogue.
+  bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
+  bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 932226842bb7..1eaa8118ba0a 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -16,6 +16,8 @@
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,6 +54,11 @@ static cl::opt<bool> BPermRewriterNoMasking(
              "bit permutations"),
     cl::Hidden);
 
+static cl::opt<bool> EnableBranchHint(
+  "ppc-use-branch-hint", cl::init(true),
+    cl::desc("Enable static hinting of branches on ppc"),
+    cl::Hidden);
+
 namespace llvm {
   void initializePPCDAGToDAGISelPass(PassRegistry&);
 }
@@ -286,7 +293,7 @@ void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
 
   // Find all return blocks, outputting a restore in each epilog.
   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
-    if (!BB->empty() && BB->back().isReturn()) {
+    if (BB->isReturnBlock()) {
       IP = BB->end(); --IP;
 
       // Skip over all terminator instructions, which are part of the return
@@ -393,6 +400,55 @@ static bool isInt32Immediate(SDValue N, unsigned &Imm) {
   return isInt32Immediate(N.getNode(), Imm);
 }
 
+static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
+                              const SDValue &DestMBB) {
+  assert(isa<BasicBlockSDNode>(DestMBB));
+
+  if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
+
+  const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
+  const TerminatorInst *BBTerm = BB->getTerminator();
+
+  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
+
+  const BasicBlock *TBB = BBTerm->getSuccessor(0);
+  const BasicBlock *FBB = BBTerm->getSuccessor(1);
+
+  auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
+  auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
+
+  // We only want to handle cases which are easy to predict at static time, e.g.
+  // C++ throw statement, that is very likely not taken, or calling never
+  // returned function, e.g. stdlib exit(). So we set Threshold to filter
+  // unwanted cases.
+  //
+  // Below is LLVM branch weight table, we only want to handle case 1, 2
+  //
+  // Case                  Taken:Nontaken  Example
+  // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
+  // 2. Invoke-terminating 1:1048575
+  // 3. Coldblock          4:64            __builtin_expect
+  // 4. Loop Branch        124:4           For loop
+  // 5. PH/ZH/FPH          20:12
+  const uint32_t Threshold = 10000;
+
+  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
+    return PPC::BR_NO_HINT;
+
+  DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
+               << BB->getName() << "'\n"
+               << " -> " << TBB->getName() << ": " << TProb << "\n"
+               << " -> " << FBB->getName() << ": " << FProb << "\n");
+
+  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
+
+  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
+  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
+  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
+    std::swap(TProb, FProb);
+
+  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
+}
 
 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 // opcode and that it has a immediate integer right operand.
@@ -564,7 +620,6 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
 
   // Handle first 32 bits.
   unsigned Lo = Imm & 0xFFFF;
-  unsigned Hi = (Imm >> 16) & 0xFFFF;
 
   // Simple value.
   if (isInt<16>(Imm)) {
@@ -586,9 +641,9 @@ static unsigned SelectInt64CountDirect(int64_t Imm) {
     ++Result;
 
   // Add in the last bits as required.
-  if ((Hi = (Remainder >> 16) & 0xFFFF))
+  if ((Remainder >> 16) & 0xFFFF)
     ++Result;
-  if ((Lo = Remainder & 0xFFFF))
+  if (Remainder & 0xFFFF)
     ++Result;
 
   return Result;
@@ -1028,7 +1083,7 @@ class BitPermutationSelector {
           BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
           BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
           BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
-        DEBUG(dbgs() << "\tcombining final bit group with inital one\n");
+        DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
         BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
         BitGroups.erase(BitGroups.begin());
       }
@@ -1557,10 +1612,7 @@ class BitPermutationSelector {
           return false;
         }
 
-        if (VRI.RLAmt != EffRLAmt)
-          return false;
-
-        return true;
+        return VRI.RLAmt == EffRLAmt;
       };
 
       for (auto &BG : BitGroups) {
@@ -2781,7 +2833,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
                                   N->getValueType(0) == MVT::v2i64)) {
       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
-      
+
       SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
               Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
       unsigned DM[2];
@@ -2798,7 +2850,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
         LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
         SDValue Base, Offset;
 
-        if (LD->isUnindexed() &&
+        if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
             (LD->getMemoryVT() == MVT::f64 ||
              LD->getMemoryVT() == MVT::i64) &&
             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
@@ -2841,8 +2893,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
     // Op #3 is the Dest MBB
     // Op #4 is the Flag.
     // Prevent PPC::PRED_* from being selected into LI.
-    SDValue Pred =
-      getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(), dl);
+    unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    if (EnableBranchHint)
+      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
+
+    SDValue Pred = getI32Imm(PCC, dl);
     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
       N->getOperand(0), N->getOperand(4) };
     return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
@@ -2871,6 +2926,9 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                   BitComp, N->getOperand(4), N->getOperand(0));
     }
 
+    if (EnableBranchHint)
+      PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
+
     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
                         N->getOperand(4), N->getOperand(0) };
@@ -2903,9 +2961,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
       break;
 
     // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
-    // If it is an externally defined symbol, a symbol with common linkage,
-    // a non-local function address, or a jump table address, or if we are
-    // generating code for large code model, we generate:
+    // If it must be toc-referenced according to PPCSubTarget, we generate:
     //   LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
     // Otherwise we generate:
     //   ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
@@ -2920,13 +2976,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
                                       MVT::i64, GA, SDValue(Tmp, 0)));
 
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
-      const GlobalValue *GValue = G->getGlobal();
-      if ((GValue->getType()->getElementType()->isFunctionTy() &&
-           !GValue->isStrongDefinitionForLinker()) ||
-          GValue->isDeclaration() || GValue->hasCommonLinkage() ||
-          GValue->hasAvailableExternallyLinkage())
+      const GlobalValue *GV = G->getGlobal();
+      unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
+      if (GVFlags & PPCII::MO_NLP_FLAG) {
         return transferMemOperands(N, CurDAG->getMachineNode(PPC::LDtocL, dl,
                                         MVT::i64, GA, SDValue(Tmp, 0)));
+      }
     }
 
     return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
@@ -3110,7 +3165,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
         if (!CurDAG->MaskedValueIsZero(Op0,
               APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
           return false;
-        
+
         LHS = Op0.getOperand(0);
         RHS = Op0.getOperand(1);
         return true;
@@ -3305,7 +3360,7 @@ void PPCDAGToDAGISel::PreprocessISelDAG() {
 
   bool MadeChange = false;
   while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = --Position;
+    SDNode *N = &*--Position;
     if (N->use_empty())
       continue;
 
@@ -3989,7 +4044,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
 
   bool MadeChange = false;
   while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = --Position;
+    SDNode *N = &*--Position;
     // Skip dead nodes and any non-machine opcodes.
     if (N->use_empty() || !N->isMachineOpcode())
       continue;
@@ -4145,7 +4200,7 @@ void PPCDAGToDAGISel::PeepholePPC64() {
   ++Position;
 
   while (Position != CurDAG->allnodes_begin()) {
-    SDNode *N = --Position;
+    SDNode *N = &*--Position;
     // Skip dead nodes and any non-machine opcodes.
     if (N->use_empty() || !N->isMachineOpcode())
       continue;
@@ -4184,16 +4239,24 @@ void PPCDAGToDAGISel::PeepholePPC64() {
       break;
     }
 
-    // If this is a load or store with a zero offset, we may be able to
-    // fold an add-immediate into the memory operation.
-    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
-        N->getConstantOperandVal(FirstOp) != 0)
+    // If this is a load or store with a zero offset, or within the alignment,
+    // we may be able to fold an add-immediate into the memory operation.
+    // The check against alignment is below, as it can't occur until we check
+    // the arguments to N
+    if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
       continue;
 
     SDValue Base = N->getOperand(FirstOp + 1);
     if (!Base.isMachineOpcode())
       continue;
 
+    // On targets with fusion, we don't want this to fire and remove a fusion
+    // opportunity, unless a) it results in another fusion opportunity or
+    // b) optimizing for size.
+    if (PPCSubTarget->hasFusion() &&
+        (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
+      continue;
+
     unsigned Flags = 0;
     bool ReplaceFlags = true;
 
@@ -4237,6 +4300,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
       break;
     }
 
+    SDValue ImmOpnd = Base.getOperand(1);
+    int MaxDisplacement = 0;
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+      const GlobalValue *GV = GA->getGlobal();
+      MaxDisplacement = GV->getAlignment() - 1;
+    }
+
+    int Offset = N->getConstantOperandVal(FirstOp);
+    if (Offset < 0 || Offset > MaxDisplacement)
+      continue;
+
     // We found an opportunity.  Reverse the operands from the add
     // immediate and substitute them into the load or store.  If
     // needed, update the target flags for the immediate operand to
@@ -4247,8 +4321,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     DEBUG(N->dump(CurDAG));
     DEBUG(dbgs() << "\n");
 
-    SDValue ImmOpnd = Base.getOperand(1);
-
     // If the relocation information isn't already present on the
     // immediate operand, add it now.
     if (ReplaceFlags) {
@@ -4259,17 +4331,17 @@ void PPCDAGToDAGISel::PeepholePPC64() {
         // is insufficient for the instruction encoding.
         if (GV->getAlignment() < 4 &&
             (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
-             StorageOpcode == PPC::LWA)) {
+             StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
           DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
           continue;
         }
-        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+        ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
       } else if (ConstantPoolSDNode *CP =
                  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
         const Constant *C = CP->getConstVal();
         ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
                                                 CP->getAlignment(),
-                                                0, Flags);
+                                                Offset, Flags);
       }
     }
 
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 1b8f8fb2f45b..af9ad077a7ce 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -42,10 +42,6 @@
 
 using namespace llvm;
 
-// FIXME: Remove this once soft-float is supported.
-static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic",
-cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden);
-
 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
 
@@ -72,8 +68,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
-  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
-  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+  if (!Subtarget.useSoftFloat()) {
+    addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+    addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+  }
 
   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
   for (MVT VT : MVT::integer_valuetypes()) {
@@ -107,8 +105,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
                          isPPC64 ? MVT::i64 : MVT::i32);
       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
-      AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 
-                         isPPC64 ? MVT::i64 : MVT::i32);
+      AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
+                        isPPC64 ? MVT::i64 : MVT::i32);
     } else {
       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
@@ -257,10 +255,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
 
-  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
-  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
-  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
-  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+  if (Subtarget.hasDirectMove()) {
+    setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+    setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+    setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+    setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+  } else {
+    setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+    setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+    setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+    setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+  }
 
   // We cannot sextinreg(i1).  Expand to shifts.
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
@@ -329,6 +334,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
+  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
+  setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
 
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -403,9 +410,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     // will selectively turn on ones that can be effectively codegen'd.
     for (MVT VT : MVT::vector_valuetypes()) {
       // add/sub are legal for all supported vector VT's.
-      setOperationAction(ISD::ADD , VT, Legal);
-      setOperationAction(ISD::SUB , VT, Legal);
-      
+      setOperationAction(ISD::ADD, VT, Legal);
+      setOperationAction(ISD::SUB, VT, Legal);
+
       // Vector instructions introduced in P8
       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
         setOperationAction(ISD::CTPOP, VT, Legal);
@@ -477,6 +484,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
       setOperationAction(ISD::VSELECT, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+      setOperationAction(ISD::ROTL, VT, Expand);
+      setOperationAction(ISD::ROTR, VT, Expand);
 
       for (MVT InnerVT : MVT::vector_valuetypes()) {
         setTruncStoreAction(VT, InnerVT, Expand);
@@ -519,12 +528,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
     }
 
-    
-    if (Subtarget.hasP8Altivec()) 
+    if (Subtarget.hasP8Altivec())
       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
     else
       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
-      
+
     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
 
@@ -545,6 +553,21 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     if (Subtarget.hasVSX()) {
       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+      if (Subtarget.hasP8Vector()) {
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
+      }
+      if (Subtarget.hasDirectMove()) {
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
+        setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
+        setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+      }
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
 
       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
@@ -813,15 +836,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setLibcallName(RTLIB::SRA_I128, nullptr);
   }
 
-  if (isPPC64) {
-    setStackPointerRegisterToSaveRestore(PPC::X1);
-    setExceptionPointerRegister(PPC::X3);
-    setExceptionSelectorRegister(PPC::X4);
-  } else {
-    setStackPointerRegisterToSaveRestore(PPC::R1);
-    setExceptionPointerRegister(PPC::R3);
-    setExceptionSelectorRegister(PPC::R4);
-  }
+  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::SINT_TO_FP);
@@ -942,9 +957,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
     if (EltAlign > MaxAlign)
       MaxAlign = EltAlign;
   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    for (auto *EltTy : STy->elements()) {
       unsigned EltAlign = 0;
-      getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+      getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
       if (EltAlign > MaxAlign)
         MaxAlign = EltAlign;
       if (MaxAlign == MaxMaxAlign)
@@ -969,6 +984,10 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
   return Align;
 }
 
+bool PPCTargetLowering::useSoftFloat() const {
+  return Subtarget.useSoftFloat();
+}
+
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;
@@ -992,6 +1011,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::Lo:              return "PPCISD::Lo";
   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
+  case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
   case PPCISD::SRL:             return "PPCISD::SRL";
   case PPCISD::SRA:             return "PPCISD::SRA";
@@ -1236,7 +1256,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
 
 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two 
+/// The ShuffleKind distinguishes between big-endian merges with two
 /// different inputs (0), either-endian merges with two identical inputs (1),
 /// and little-endian merges with two different inputs (2).  For the latter,
 /// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1261,7 +1281,7 @@ bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
 
 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
-/// The ShuffleKind distinguishes between big-endian merges with two 
+/// The ShuffleKind distinguishes between big-endian merges with two
 /// different inputs (0), either-endian merges with two identical inputs (1),
 /// and little-endian merges with two different inputs (2).  For the latter,
 /// the input operands are swapped (see PPCInstrAltivec.td).
@@ -1353,7 +1373,7 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
  *     little-endian merges).
  * \param[in] DAG The current SelectionDAG
- * \return true iff this shuffle mask 
+ * \return true iff this shuffle mask
  */
 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
                               unsigned ShuffleKind, SelectionDAG &DAG) {
@@ -1380,7 +1400,7 @@ bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
 
 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
 /// amount, otherwise return -1.
-/// The ShuffleKind distinguishes between big-endian operations with two 
+/// The ShuffleKind distinguishes between big-endian operations with two
 /// different inputs (0), either-endian operations with two identical inputs
 /// (1), and little-endian operations with two different inputs (2).  For the
 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
@@ -1513,8 +1533,8 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
     for (unsigned i = 0; i != Multiple-1; ++i) {
       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
 
-      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
-      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+      LeadingZero &= isNullConstant(UniquedVals[i]);
+      LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
     }
     // Finally, check the least significant entry.
     if (LeadingZero) {
@@ -1629,7 +1649,6 @@ static bool isIntS16Immediate(SDValue Op, short &Imm) {
   return isIntS16Immediate(Op.getNode(), Imm);
 }
 
-
 /// SelectAddressRegReg - Given the specified addressed, check to see if it
 /// can be represented as an indexed [r+r] operation.  Returns false if it
 /// can be more efficiently represented with [r+imm].
@@ -1998,10 +2017,10 @@ static SDValue getTOCEntry(SelectionDAG &DAG, SDLoc dl, bool Is64Bit,
                 DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
 
   SDValue Ops[] = { GA, Reg };
-  return DAG.getMemIntrinsicNode(PPCISD::TOC_ENTRY, dl,
-                                 DAG.getVTList(VT, MVT::Other), Ops, VT,
-                                 MachinePointerInfo::getGOT(), 0, false, true,
-                                 false, 0);
+  return DAG.getMemIntrinsicNode(
+      PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
+      MachinePointerInfo::getGOT(DAG.getMachineFunction()), 0, false, true,
+      false, 0);
 }
 
 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
@@ -2092,6 +2111,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   // large models could be added if users need it, at the cost of
   // additional complexity.
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
+
   SDLoc dl(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2480,7 +2502,6 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
   //                */
   // } va_list[1];
 
-
   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
 
@@ -2536,7 +2557,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
 
 #include "PPCGenCallingConv.inc"
 
-// Function whose sole purpose is to kill compiler warnings 
+// Function whose sole purpose is to kill compiler warnings
 // stemming from unused functions included from PPCGenCallingConv.inc.
 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
   return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
@@ -2933,8 +2954,9 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
       PPC::F8
     };
     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
-    if (DisablePPCFloatInVariadic)
-      NumFPArgRegs = 0;
+
+    if (Subtarget.useSoftFloat())
+       NumFPArgRegs = 0;
 
     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
@@ -3177,15 +3199,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
-                                      MachinePointerInfo(FuncArg),
-                                      ObjType, false, false, 0);
+                                      MachinePointerInfo(&*FuncArg), ObjType,
+                                      false, false, 0);
           } else {
             // For sizes that don't fit a truncating store (3, 5, 6, 7),
             // store the whole register as-is to the parameter save area
             // slot.
-            Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                 MachinePointerInfo(FuncArg),
-                                 false, false, 0);
+            Store =
+                DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                             MachinePointerInfo(&*FuncArg), false, false, 0);
           }
 
           MemOps.push_back(Store);
@@ -3212,9 +3234,9 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
           SDValue Off = DAG.getConstant(j, dl, PtrVT);
           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
         }
-        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
-                                     MachinePointerInfo(FuncArg, j),
-                                     false, false, 0);
+        SDValue Store =
+            DAG.getStore(Val.getValue(1), dl, Val, Addr,
+                         MachinePointerInfo(&*FuncArg, j), false, false, 0);
         MemOps.push_back(Store);
         ++GPR_idx;
       }
@@ -3592,7 +3614,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
-                                            MachinePointerInfo(FuncArg),
+                                            MachinePointerInfo(&*FuncArg),
                                             ObjType, false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
@@ -3615,9 +3637,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
-          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                                       MachinePointerInfo(FuncArg, j),
-                                       false, false, 0);
+          SDValue Store =
+              DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                           MachinePointerInfo(&*FuncArg, j), false, false, 0);
           MemOps.push_back(Store);
           ++GPR_idx;
           ArgOffset += PtrByteSize;
@@ -3880,7 +3902,6 @@ struct TailCallArgumentInfo {
 
   TailCallArgumentInfo() : FrameIdx(0) {}
 };
-
 }
 
 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
@@ -3895,9 +3916,10 @@ StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
     SDValue FIN = TailCallArgs[i].FrameIdxOp;
     int FI = TailCallArgs[i].FrameIdx;
     // Store relative to framepointer.
-    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
-                                       MachinePointerInfo::getFixedStack(FI),
-                                       false, false, 0));
+    MemOpChains.push_back(DAG.getStore(
+        Chain, dl, Arg, FIN,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+        false, 0));
   }
 }
 
@@ -3922,9 +3944,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
                                                           NewRetAddrLoc, true);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
-    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
-                         MachinePointerInfo::getFixedStack(NewRetAddr),
-                         false, false, 0);
+    Chain = DAG.getStore(
+        Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewRetAddr),
+        false, false, 0);
 
     // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
     // slot as the FP is never overwritten.
@@ -3933,9 +3956,10 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
       int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
                                                           true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
-      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
-                           MachinePointerInfo::getFixedStack(NewFPIdx),
-                           false, false, 0);
+      Chain = DAG.getStore(
+          Chain, dl, OldFP, NewFramePtrIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), NewFPIdx),
+          false, false, 0);
     }
   }
   return Chain;
@@ -4812,8 +4836,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
             continue;
           break;
         case MVT::v4f32:
-	  // When using QPX, this is handled like a FP register, otherwise, it
-	  // is an Altivec register.
+          // When using QPX, this is handled like a FP register, otherwise, it
+          // is an Altivec register.
           if (Subtarget.hasQPX()) {
             if (++NumFPRsUsed <= NumFPRs)
               continue;
@@ -5318,9 +5342,10 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
-    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
-                         MachinePointerInfo::getStack(TOCSaveOffset),
-                         false, false, 0);
+    Chain = DAG.getStore(
+        Val.getValue(1), dl, Val, AddPtr,
+        MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset),
+        false, false, 0);
     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
     // This does not mean the MTCTR instruction must use R12; it's easier
     // to model this as an extra parameter, so do that.
@@ -5341,9 +5366,9 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
     PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
                     FPOp, true, TailCallArguments);
 
-  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint,
-		    hasNest, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
-                    Callee, SPDiff, NumBytes, Ins, InVals, CS);
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, hasNest,
+                    DAG, RegsToPass, InFlag, Chain, CallSeqStart, Callee,
+                    SPDiff, NumBytes, Ins, InVals, CS);
 }
 
 SDValue
@@ -5798,6 +5823,22 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
 }
 
+SDValue PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(
+    SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) const {
+  SDLoc dl(Op);
+
+  // Get the corect type for integers.
+  EVT IntVT = Op.getValueType();
+
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+  // Build a DYNAREAOFFSET node.
+  SDValue Ops[2] = {Chain, FPSIdx};
+  SDVTList VTs = DAG.getVTList(IntVT);
+  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
+}
+
 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                    const PPCSubtarget &Subtarget) const {
   // When we pop the dynamic allocation we need to restore the SP link.
@@ -5828,10 +5869,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                       false, false, 0);
 }
 
-
-
-SDValue
-PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
   bool isPPC64 = Subtarget.isPPC64();
   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(MF.getDataLayout());
@@ -5983,6 +6021,10 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   if (!DAG.getTarget().Options.NoInfsFPMath ||
       !DAG.getTarget().Options.NoNaNsFPMath)
     return Op;
+  // TODO: Propagate flags from the select rather than global settings.
+  SDNodeFlags Flags;
+  Flags.setNoInfs(true);
+  Flags.setNoNaNs(true);
 
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
 
@@ -6033,7 +6075,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SETNE:
     std::swap(TV, FV);
   case ISD::SETEQ:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6043,25 +6085,25 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
   case ISD::SETULT:
   case ISD::SETLT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOGE:
   case ISD::SETGE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
   case ISD::SETUGT:
   case ISD::SETGT:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
   case ISD::SETOLE:
   case ISD::SETLE:
-    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, &Flags);
     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
@@ -6101,7 +6143,8 @@ void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
     (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
-  MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+  MachinePointerInfo MPI =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
 
   // Emit a store to the stack slot.
   SDValue Chain;
@@ -6291,11 +6334,11 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
     // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
     // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
     Value = DAG.getNode(PPCISD::QBFLT, dl, MVT::v4f64, Value);
-  
+
     SDValue FPHalfs = DAG.getConstantFP(0.5, dl, MVT::f64);
-    FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
-                          FPHalfs, FPHalfs, FPHalfs, FPHalfs);
-  
+    FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64, FPHalfs, FPHalfs,
+                          FPHalfs, FPHalfs);
+
     Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
     if (Op.getValueType() != MVT::v4f64)
@@ -6421,17 +6464,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-      SDValue Store =
-        DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
-                     MachinePointerInfo::getFixedStack(FrameIdx),
-                     false, false, 0);
+      SDValue Store = DAG.getStore(
+          DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+          false, false, 0);
 
       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
              "Expected an i32 store");
 
       RLI.Ptr = FIdx;
       RLI.Chain = Store;
-      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+      RLI.MPI =
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
       RLI.Alignment = 4;
 
       MachineMemOperand *MMO =
@@ -6472,16 +6516,18 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
       int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
-                                   MachinePointerInfo::getFixedStack(FrameIdx),
-                                   false, false, 0);
+      SDValue Store = DAG.getStore(
+          DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+          false, false, 0);
 
       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
              "Expected an i32 store");
 
       RLI.Ptr = FIdx;
       RLI.Chain = Store;
-      RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx);
+      RLI.MPI =
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
       RLI.Alignment = 4;
     }
 
@@ -6506,14 +6552,16 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                                 Op.getOperand(0));
 
     // STD the extended value into the stack slot.
-    SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
-                                 MachinePointerInfo::getFixedStack(FrameIdx),
-                                 false, false, 0);
+    SDValue Store = DAG.getStore(
+        DAG.getEntryNode(), dl, Ext64, FIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+        false, false, 0);
 
     // Load the value as a double.
-    Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
-                     MachinePointerInfo::getFixedStack(FrameIdx),
-                     false, false, false, 0);
+    Ld = DAG.getLoad(
+        MVT::f64, dl, Store, FIdx,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx),
+        false, false, false, 0);
   }
 
   // FCFID it and return it.
@@ -6735,7 +6783,6 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
 }
 
-
 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
 /// amount.  The result has the specified value type.
 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
@@ -6768,7 +6815,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // to a zero vector to get the boolean result.
     MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
     int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-    MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+    MachinePointerInfo PtrInfo =
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
@@ -6794,8 +6842,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       for (unsigned i = 0; i < 4; ++i) {
         if (BVN->getOperand(i).getOpcode() == ISD::UNDEF)
           CV[i] = UndefValue::get(Type::getFloatTy(*DAG.getContext()));
-        else if (cast<ConstantSDNode>(BVN->getOperand(i))->
-                   getConstantIntValue()->isZero())
+        else if (isNullConstant(BVN->getOperand(i)))
           continue;
         else
           CV[i] = One;
@@ -6814,9 +6861,9 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
       ValueVTs.push_back(MVT::Other); // chain
       SDVTList VTs = DAG.getVTList(ValueVTs);
 
-      return DAG.getMemIntrinsicNode(PPCISD::QVLFSb,
-        dl, VTs, Ops, MVT::v4f32,
-        MachinePointerInfo::getConstantPool());
+      return DAG.getMemIntrinsicNode(
+          PPCISD::QVLFSb, dl, VTs, Ops, MVT::v4f32,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
     }
 
     SmallVector<SDValue, 4> Stores;
@@ -6915,7 +6962,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   if (SextVal >= -16 && SextVal <= 15)
     return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
 
-
   // Two instruction sequences.
 
   // If this value is in the range [-32,30] and is even, use:
@@ -7304,11 +7350,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                        V1, V2, VPermMask);
 }
 
-/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
-/// altivec comparison.  If it is, return true and fill in Opc/isDot with
+/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
+/// vector comparison.  If it is, return true and fill in Opc/isDot with
 /// information about the intrinsic.
-static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
-                                  bool &isDot, const PPCSubtarget &Subtarget) {
+static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
+                                 bool &isDot, const PPCSubtarget &Subtarget) {
   unsigned IntrinsicID =
     cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
   CompareOpc = -1;
@@ -7321,12 +7367,11 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
-  case Intrinsic::ppc_altivec_vcmpequd_p: 
+  case Intrinsic::ppc_altivec_vcmpequd_p:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 199; 
-      isDot = 1; 
-    }
-    else 
+      CompareOpc = 199;
+      isDot = 1;
+    } else
       return false;
 
     break;
@@ -7335,28 +7380,48 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
-  case Intrinsic::ppc_altivec_vcmpgtsd_p: 
+  case Intrinsic::ppc_altivec_vcmpgtsd_p:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 967; 
-      isDot = 1; 
-    }
-    else 
+      CompareOpc = 967;
+      isDot = 1;
+    } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
-  case Intrinsic::ppc_altivec_vcmpgtud_p: 
+  case Intrinsic::ppc_altivec_vcmpgtud_p:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 711; 
-      isDot = 1; 
-    }
-    else 
+      CompareOpc = 711;
+      isDot = 1;
+    } else
       return false;
 
     break;
-      
+    // VSX predicate comparisons use the same infrastructure
+  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
+  case Intrinsic::ppc_vsx_xvcmpgedp_p:
+  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
+  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
+  case Intrinsic::ppc_vsx_xvcmpgesp_p:
+  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
+    if (Subtarget.hasVSX()) {
+      switch (IntrinsicID) {
+      case Intrinsic::ppc_vsx_xvcmpeqdp_p: CompareOpc = 99; break;
+      case Intrinsic::ppc_vsx_xvcmpgedp_p: CompareOpc = 115; break;
+      case Intrinsic::ppc_vsx_xvcmpgtdp_p: CompareOpc = 107; break;
+      case Intrinsic::ppc_vsx_xvcmpeqsp_p: CompareOpc = 67; break;
+      case Intrinsic::ppc_vsx_xvcmpgesp_p: CompareOpc = 83; break;
+      case Intrinsic::ppc_vsx_xvcmpgtsp_p: CompareOpc = 75; break;
+      }
+      isDot = 1;
+    }
+    else
+      return false;
+
+    break;
+
     // Normal Comparisons.
   case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
@@ -7365,10 +7430,9 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpequd:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 199; 
-      isDot = 0; 
-    }
-    else
+      CompareOpc = 199;
+      isDot = 0;
+    } else
       return false;
 
     break;
@@ -7377,24 +7441,22 @@ static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
-  case Intrinsic::ppc_altivec_vcmpgtsd:   
+  case Intrinsic::ppc_altivec_vcmpgtsd:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 967; 
-      isDot = 0; 
-    }
-    else
+      CompareOpc = 967;
+      isDot = 0;
+    } else
       return false;
 
     break;
   case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
-  case Intrinsic::ppc_altivec_vcmpgtud:   
+  case Intrinsic::ppc_altivec_vcmpgtud:
     if (Subtarget.hasP8Altivec()) {
-      CompareOpc = 711; 
-      isDot = 0; 
-    }
-    else
+      CompareOpc = 711;
+      isDot = 0;
+    } else
       return false;
 
     break;
@@ -7411,7 +7473,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   SDLoc dl(Op);
   int CompareOpc;
   bool isDot;
-  if (!getAltivecCompareInfo(Op, CompareOpc, isDot, Subtarget))
+  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
     return SDValue();    // Don't custom lower most intrinsics.
 
   // If this is a non-dot comparison, make the VCMP node and we are done.
@@ -7536,7 +7598,7 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
 
-  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
   // Now convert to an integer and store.
   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7545,7 +7607,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
@@ -7752,7 +7815,7 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
   FPHalfs = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f64,
                         FPHalfs, FPHalfs, FPHalfs, FPHalfs);
 
-  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs); 
+  Value = DAG.getNode(ISD::FMA, dl, MVT::v4f64, Value, FPHalfs, FPHalfs);
 
   // Now convert to an integer and store.
   Value = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f64,
@@ -7761,7 +7824,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
 
   MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
   int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
-  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FrameIdx);
+  MachinePointerInfo PtrInfo =
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
@@ -7798,11 +7862,10 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
     SDValue Idx = DAG.getConstant(i, dl, BasePtr.getValueType());
     Idx = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, Idx);
 
-    Stores.push_back(DAG.getTruncStore(StoreChain, dl, Loads[i], Idx,
-                                       SN->getPointerInfo().getWithOffset(i),
-                                       MVT::i8 /* memory type */,
-                                       SN->isNonTemporal(), SN->isVolatile(), 
-                                       1 /* alignment */, SN->getAAInfo()));
+    Stores.push_back(DAG.getTruncStore(
+        StoreChain, dl, Loads[i], Idx, SN->getPointerInfo().getWithOffset(i),
+        MVT::i8 /* memory type */, SN->isNonTemporal(), SN->isVolatile(),
+        1 /* alignment */, SN->getAAInfo()));
   }
 
   StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
@@ -7906,6 +7969,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, Subtarget);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
+  case ISD::GET_DYNAMIC_AREA_OFFSET: return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG, Subtarget);
 
   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
@@ -7971,7 +8035,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                  N->getValueType(0));
     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
-                                 N->getOperand(1)); 
+                                 N->getOperand(1));
 
     Results.push_back(NewInt);
     Results.push_back(NewInt.getValue(1));
@@ -8020,7 +8084,6 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
@@ -8089,8 +8152,7 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptrA = MI->getOperand(1).getReg();
@@ -8160,8 +8222,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   MachineFunction *F = BB->getParent();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   unsigned dest = MI->getOperand(0).getReg();
   unsigned ptrA = MI->getOperand(1).getReg();
@@ -8283,8 +8344,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   const BasicBlock *BB = MBB->getBasicBlock();
-  MachineFunction::iterator I = MBB;
-  ++I;
+  MachineFunction::iterator I = ++MBB->getIterator();
 
   // Memory Reference
   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -8384,8 +8444,8 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
           .addMBB(mainMBB);
   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
 
-  thisMBB->addSuccessor(mainMBB, /* weight */ 0);
-  thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
+  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
 
   // mainMBB:
   //  mainDstReg = 0
@@ -8562,8 +8622,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   // To "insert" these instructions we actually have to insert their
   // control-flow patterns.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   MachineFunction *F = BB->getParent();
 
@@ -8675,7 +8734,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     // mfspr Rx,TBU # load from TBU
     // mfspr Ry,TB  # load from TB
     // mfspr Rz,TBU # load from TBU
-    // cmpw crX,Rx,Rz # check if ‘old’=’new’
+    // cmpw crX,Rx,Rz # check if 'old'='new'
     // bne readLoop   # branch if they're not equal
     // ...
 
@@ -9137,7 +9196,7 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
   return SDValue();
 }
 
-bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
+unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
   // Note: This functionality is used only when unsafe-fp-math is enabled, and
   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
   // enabled for division), this functionality is redundant with the default
@@ -9150,12 +9209,26 @@ bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
   switch (Subtarget.getDarwinDirective()) {
   default:
-    return NumUsers > 2;
+    return 3;
   case PPC::DIR_440:
   case PPC::DIR_A2:
   case PPC::DIR_E500mc:
   case PPC::DIR_E5500:
-    return NumUsers > 1;
+    return 2;
+  }
+}
+
+// isConsecutiveLSLoc needs to work even if all adds have not yet been
+// collapsed, and so we need to look through chains of them.
+static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
+                                     int64_t& Offset, SelectionDAG &DAG) {
+  if (DAG.isBaseWithConstantOffset(Loc)) {
+    Base = Loc.getOperand(0);
+    Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
+
+    // The base might itself be a base plus an offset, and if so, accumulate
+    // that as well.
+    getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
   }
 }
 
@@ -9178,16 +9251,18 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
     return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
   }
 
-  // Handle X+C
-  if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
-      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+  SDValue Base1 = Loc, Base2 = BaseLoc;
+  int64_t Offset1 = 0, Offset2 = 0;
+  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
+  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
+  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
     return true;
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   const GlobalValue *GV1 = nullptr;
   const GlobalValue *GV2 = nullptr;
-  int64_t Offset1 = 0;
-  int64_t Offset2 = 0;
+  Offset1 = 0;
+  Offset2 = 0;
   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
   if (isGA1 && isGA2 && GV1 == GV2)
@@ -9343,7 +9418,7 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
        IE = LoadRoots.end(); I != IE; ++I) {
     Queue.push_back(*I);
-       
+
     while (!Queue.empty()) {
       SDNode *LoadRoot = Queue.pop_back_val();
       if (!Visited.insert(LoadRoot).second)
@@ -9470,7 +9545,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
   }
 
   // Visit all inputs, collect all binary operations (and, or, xor and
-  // select) that are all fed by extensions. 
+  // select) that are all fed by extensions.
   while (!BinOps.empty()) {
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
@@ -9492,7 +9567,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
           isa<ConstantSDNode>(BinOp.getOperand(i))) {
-        Inputs.push_back(BinOp.getOperand(i)); 
+        Inputs.push_back(BinOp.getOperand(i));
       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9572,7 +9647,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
     if (isa<ConstantSDNode>(Inputs[i]))
       continue;
     else
-      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 
+      DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
   }
 
   // Replace all operations (these are all the same, but have a different
@@ -9682,7 +9757,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
   SmallPtrSet<SDNode *, 16> Visited;
 
   // Visit all inputs, collect all binary operations (and, or, xor and
-  // select) that are all fed by truncations. 
+  // select) that are all fed by truncations.
   while (!BinOps.empty()) {
     SDValue BinOp = BinOps.back();
     BinOps.pop_back();
@@ -9701,7 +9776,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
 
       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
           isa<ConstantSDNode>(BinOp.getOperand(i))) {
-        Inputs.push_back(BinOp.getOperand(i)); 
+        Inputs.push_back(BinOp.getOperand(i));
       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
@@ -9915,10 +9990,11 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
          "Invalid extension type");
   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
   SDValue ShiftCst =
-    DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
-  return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 
-                     DAG.getNode(ISD::SHL, dl, N->getValueType(0),
-                                 N->getOperand(0), ShiftCst), ShiftCst);
+      DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
+  return DAG.getNode(
+      ISD::SRA, dl, N->getValueType(0),
+      DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
+      ShiftCst);
 }
 
 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
@@ -10102,16 +10178,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default: break;
   case PPCISD::SHL:
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
-      if (C->isNullValue())   // 0 << V -> 0.
+    if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
         return N->getOperand(0);
-    }
     break;
   case PPCISD::SRL:
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
-      if (C->isNullValue())   // 0 >>u V -> 0.
+    if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
         return N->getOperand(0);
-    }
     break;
   case PPCISD::SRA:
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
@@ -10122,7 +10194,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
-  case ISD::ANY_EXTEND: 
+  case ISD::ANY_EXTEND:
     return DAGCombineExtBoolTrunc(N, DCI);
   case ISD::TRUNCATE:
   case ISD::SETCC:
@@ -10277,7 +10349,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       // original unaligned load.
       MachineFunction &MF = DAG.getMachineFunction();
       MachineMemOperand *BaseMMO =
-        MF.getMachineMemOperand(LD->getMemOperand(), -MemVT.getStoreSize()+1,
+        MF.getMachineMemOperand(LD->getMemOperand(),
+                                -(long)MemVT.getStoreSize()+1,
                                 2*MemVT.getStoreSize()-1);
 
       // Create the new base load.
@@ -10527,7 +10600,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::BRCOND: {
     SDValue Cond = N->getOperand(1);
     SDValue Target = N->getOperand(2);
- 
+
     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
           Intrinsic::ppc_is_decremented_ctr_nonzero) {
@@ -10558,8 +10631,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
           Intrinsic::ppc_is_decremented_ctr_nonzero &&
         isa<ConstantSDNode>(LHS.getOperand(1)) &&
-        !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
-          isZero())
+        !isNullConstant(LHS.getOperand(1)))
       LHS = LHS.getOperand(0);
 
     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
@@ -10588,7 +10660,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
 
     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
-        getAltivecCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
+        getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
       assert(isDot && "Can't compare against a vector result!");
 
       // If this is a comparison against something other than 0/1, then we know
@@ -10739,8 +10811,11 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
     // boundary so that the entire loop fits in one instruction-cache line.
     uint64_t LoopSize = 0;
     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
-      for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J)
+      for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
         LoopSize += TII->GetInstSizeInBytes(J);
+        if (LoopSize > 32)
+          break;
+      }
 
     if (LoopSize > 16 && LoopSize <= 32)
       return 5;
@@ -10868,17 +10943,19 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
         return std::make_pair(0U, &PPC::QFRCRegClass);
       if (VT == MVT::v4f32 && Subtarget.hasQPX())
         return std::make_pair(0U, &PPC::QSRCRegClass);
-      return std::make_pair(0U, &PPC::VRRCRegClass);
+      if (Subtarget.hasAltivec())
+        return std::make_pair(0U, &PPC::VRRCRegClass);
     case 'y':   // crrc
       return std::make_pair(0U, &PPC::CRRCRegClass);
     }
-  } else if (Constraint == "wc") { // an individual CR bit.
+  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
+    // An individual CR bit.
     return std::make_pair(0U, &PPC::CRBITRCRegClass);
-  } else if (Constraint == "wa" || Constraint == "wd" ||
-             Constraint == "wf") {
+  } else if ((Constraint == "wa" || Constraint == "wd" ||
+             Constraint == "wf") && Subtarget.hasVSX()) {
     return std::make_pair(0U, &PPC::VSRCRegClass);
-  } else if (Constraint == "ws") {
-    if (VT == MVT::f32)
+  } else if (Constraint == "ws" && Subtarget.hasVSX()) {
+    if (VT == MVT::f32 && Subtarget.hasP8Vector())
       return std::make_pair(0U, &PPC::VSSRCRegClass);
     else
       return std::make_pair(0U, &PPC::VSFRCRegClass);
@@ -10908,7 +10985,6 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   return R;
 }
 
-
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -11358,9 +11434,7 @@ bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
-  if (BitSize == 0 || BitSize > 64)
-    return false;
-  return true;
+  return !(BitSize == 0 || BitSize > 64);
 }
 
 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
@@ -11477,11 +11551,21 @@ PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
   return ScratchRegs;
 }
 
+unsigned PPCTargetLowering::getExceptionPointerRegister(
+    const Constant *PersonalityFn) const {
+  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
+}
+
+unsigned PPCTargetLowering::getExceptionSelectorRegister(
+    const Constant *PersonalityFn) const {
+  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
+}
+
 bool
 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
                      EVT VT , unsigned DefinedValues) const {
   if (VT == MVT::v2i64)
-    return false;
+    return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
 
   if (Subtarget.hasQPX()) {
     if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 6e13533cfdb3..44bcb8942cfc 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -79,6 +79,11 @@ namespace llvm {
       /// compute an allocation on the stack.
       DYNALLOC,
 
+      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+      /// compute an offset from native SP to the address  of the most recent
+      /// dynamic alloca.
+      DYNAREAOFFSET,
+
       /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
       /// at function entry, used for PIC code.
       GlobalBaseReg,
@@ -423,6 +428,8 @@ namespace llvm {
     /// DAG node.
     const char *getTargetNodeName(unsigned Opcode) const override;
 
+    bool useSoftFloat() const override;
+
     MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
       return MVT::i32;
     }
@@ -655,8 +662,17 @@ namespace llvm {
       return Ty->isArrayTy();
     }
 
-  private:
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
 
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
+  private:
     struct ReuseLoadInfo {
       SDValue Ptr;
       SDValue Chain;
@@ -719,6 +735,8 @@ namespace llvm {
                         const PPCSubtarget &Subtarget) const;
     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
                                 const PPCSubtarget &Subtarget) const;
+    SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG,
+                                         const PPCSubtarget &Subtarget) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget) const;
     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -853,7 +871,7 @@ namespace llvm {
                              bool &UseOneConstNR) const override;
     SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
                              unsigned &RefinementSteps) const override;
-    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+    unsigned combineRepeatedFPDivisors() const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
   };
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index d62833037db5..075e093e41a1 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -369,6 +369,8 @@ let Defs = [X1], Uses = [X1] in
 def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
                        [(set i64:$result,
                              (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET8 : Pseudo<(outs i64imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET8",
+                       [(set i64:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
 
 let Defs = [LR8] in {
 def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index d4e666cc1f3e..c17603a7718a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -144,6 +144,9 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
                                                    UseMI, UseIdx);
 
+  if (!DefMI->getParent())
+    return Latency;
+
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
   unsigned Reg = DefMO.getReg();
 
@@ -186,6 +189,60 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   return Latency;
 }
 
+// This function does not list all associative and commutative operations, but
+// only those worth feeding through the machine combiner in an attempt to
+// reduce the critical path. Mostly, this means floating-point operations,
+// because they have high latencies (compared to other operations, such and
+// and/or, which are also associative and commutative, but have low latencies).
+bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+  switch (Inst.getOpcode()) {
+  // FP Add:
+  case PPC::FADD:
+  case PPC::FADDS:
+  // FP Multiply:
+  case PPC::FMUL:
+  case PPC::FMULS:
+  // Altivec Add:
+  case PPC::VADDFP:
+  // VSX Add:
+  case PPC::XSADDDP:
+  case PPC::XVADDDP:
+  case PPC::XVADDSP:
+  case PPC::XSADDSP:
+  // VSX Multiply:
+  case PPC::XSMULDP:
+  case PPC::XVMULDP:
+  case PPC::XVMULSP:
+  case PPC::XSMULSP:
+  // QPX Add:
+  case PPC::QVFADD:
+  case PPC::QVFADDS:
+  case PPC::QVFADDSs:
+  // QPX Multiply:
+  case PPC::QVFMUL:
+  case PPC::QVFMULS:
+  case PPC::QVFMULSs:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool PPCInstrInfo::getMachineCombinerPatterns(
+    MachineInstr &Root,
+    SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+  // Using the machine combiner in this way is potentially expensive, so
+  // restrict to when aggressive optimizations are desired.
+  if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive)
+    return false;
+
+  // FP reassociation is only legal when we don't need strict IEEE semantics.
+  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
+    return false;
+
+  return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
 // Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
 bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
                                          unsigned &SrcReg, unsigned &DstReg,
@@ -259,16 +316,16 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
   return 0;
 }
 
-// commuteInstruction - We can commute rlwimi instructions, but only if the
-// rotate amt is zero.  We also have to munge the immediates a bit.
-MachineInstr *
-PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                   bool NewMI,
+                                                   unsigned OpIdx1,
+                                                   unsigned OpIdx2) const {
   MachineFunction &MF = *MI->getParent()->getParent();
 
   // Normal instructions can be commuted the obvious way.
   if (MI->getOpcode() != PPC::RLWIMI &&
       MI->getOpcode() != PPC::RLWIMIo)
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   // Note that RLWIMI can be commuted as a 32-bit instruction, but not as a
   // 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because
   // changing the relative order of the mask operands might change what happens
@@ -286,6 +343,8 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   //   Op0 = (Op2 & ~M) | (Op1 & M)
 
   // Swap op1/op2
+  assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&
+         "Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMIo.");
   unsigned Reg0 = MI->getOperand(0).getReg();
   unsigned Reg1 = MI->getOperand(1).getReg();
   unsigned Reg2 = MI->getOperand(2).getReg();
@@ -353,9 +412,9 @@ bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
   if (AltOpc == -1)
     return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
 
-  SrcOpIdx1 = 2;
-  SrcOpIdx2 = 3;
-  return true;
+  // The commutable operand indices are 2 and 3. Return them in SrcOpIdx1
+  // and SrcOpIdx2.
+  return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
 }
 
 void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
@@ -996,11 +1055,10 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOStore,
-                            MFI.getObjectSize(FrameIdx),
-                            MFI.getObjectAlignment(FrameIdx));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FrameIdx),
+      MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
+      MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
 }
 
@@ -1109,11 +1167,10 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     MBB.insert(MI, NewMIs[i]);
 
   const MachineFrameInfo &MFI = *MF.getFrameInfo();
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
-                            MachineMemOperand::MOLoad,
-                            MFI.getObjectSize(FrameIdx),
-                            MFI.getObjectAlignment(FrameIdx));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FrameIdx),
+      MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
+      MFI.getObjectAlignment(FrameIdx));
   NewMIs.back()->addMemOperand(MF, MMO);
 }
 
@@ -1214,7 +1271,7 @@ bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
                      unsigned NumT, unsigned ExtraT,
                      MachineBasicBlock &FMBB,
                      unsigned NumF, unsigned ExtraF,
-                     const BranchProbability &Probability) const {
+                     BranchProbability Probability) const {
   return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
 }
 
@@ -1691,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
     MI->setDesc(NewDesc);
 
     if (NewDesc.ImplicitDefs)
-      for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+      for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs();
            *ImpDefs; ++ImpDefs)
         if (!MI->definesRegister(*ImpDefs))
           MI->addOperand(*MI->getParent()->getParent(),
                          MachineOperand::CreateReg(*ImpDefs, true, true));
     if (NewDesc.ImplicitUses)
-      for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+      for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses();
            *ImpUses; ++ImpUses)
         if (!MI->readsRegister(*ImpUses))
           MI->addOperand(*MI->getParent()->getParent(),
@@ -1737,3 +1794,35 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   }
 }
 
+std::pair<unsigned, unsigned>
+PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+  const unsigned Mask = PPCII::MO_ACCESS_MASK;
+  return std::make_pair(TF & Mask, TF & ~Mask);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+  using namespace PPCII;
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_LO, "ppc-lo"},
+      {MO_HA, "ppc-ha"},
+      {MO_TPREL_LO, "ppc-tprel-lo"},
+      {MO_TPREL_HA, "ppc-tprel-ha"},
+      {MO_DTPREL_LO, "ppc-dtprel-lo"},
+      {MO_TLSLD_LO, "ppc-tlsld-lo"},
+      {MO_TOC_LO, "ppc-toc-lo"},
+      {MO_TLS, "ppc-tls"}};
+  return makeArrayRef(TargetFlags);
+}
+
+ArrayRef<std::pair<unsigned, const char *>>
+PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
+  using namespace PPCII;
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_PLT_OR_STUB, "ppc-plt-or-stub"},
+      {MO_PIC_FLAG, "ppc-pic"},
+      {MO_NLP_FLAG, "ppc-nlp"},
+      {MO_NLP_HIDDEN_FLAG, "ppc-nlp-hidden"}};
+  return makeArrayRef(TargetFlags);
+}
+
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 40badae644d6..c3c3a480a6aa 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -79,6 +79,23 @@ class PPCInstrInfo : public PPCGenInstrInfo {
                             SmallVectorImpl<MachineInstr*> &NewMIs,
                             bool &NonRI, bool &SpillsVRS) const;
   virtual void anchor();
+
+protected:
+  /// Commutes the operands in the given instruction.
+  /// The commutable operands are specified by their indices OpIdx1 and OpIdx2.
+  ///
+  /// Do not call this method for a non-commutable instruction or for
+  /// non-commutable pair of operand indices OpIdx1 and OpIdx2.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  ///
+  /// For example, we can commute rlwimi instructions, but only if the
+  /// rotate amt is zero.  We also have to munge the immediates a bit.
+  MachineInstr *commuteInstructionImpl(MachineInstr *MI,
+                                       bool NewMI,
+                                       unsigned OpIdx1,
+                                       unsigned OpIdx2) const override;
+
 public:
   explicit PPCInstrInfo(PPCSubtarget &STI);
 
@@ -119,6 +136,19 @@ public:
     return false;
   }
 
+  bool useMachineCombiner() const override {
+    return true;
+  }
+
+  /// Return true when there is potentially a faster code sequence
+  /// for an instruction chain ending in <Root>. All potential patterns are
+  /// output in the <Pattern> array.
+  bool getMachineCombinerPatterns(
+      MachineInstr &Root,
+      SmallVectorImpl<MachineCombinerPattern> &P) const override;
+
+  bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
   bool isCoalescableExtInstr(const MachineInstr &MI,
                              unsigned &SrcReg, unsigned &DstReg,
                              unsigned &SubIdx) const override;
@@ -127,10 +157,6 @@ public:
   unsigned isStoreToStackSlot(const MachineInstr *MI,
                               int &FrameIndex) const override;
 
-  // commuteInstruction - We can commute rlwimi instructions, but only if the
-  // rotate amt is zero.  We also have to munge the immediates a bit.
-  MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
   bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
@@ -183,7 +209,7 @@ public:
   // profitable to use the predicated branches.
   bool isProfitableToIfCvt(MachineBasicBlock &MBB,
                           unsigned NumCycles, unsigned ExtraPredCycles,
-                          const BranchProbability &Probability) const override {
+                          BranchProbability Probability) const override {
     return true;
   }
 
@@ -191,12 +217,10 @@ public:
                            unsigned NumT, unsigned ExtraT,
                            MachineBasicBlock &FMBB,
                            unsigned NumF, unsigned ExtraF,
-                           const BranchProbability &Probability) const override;
+                           BranchProbability Probability) const override;
 
-  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
-                                 unsigned NumCycles,
-                                 const BranchProbability
-                                 &Probability) const override {
+  bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+                                 BranchProbability Probability) const override {
     return true;
   }
 
@@ -239,6 +263,15 @@ public:
   unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
 
   void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+  std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const override;
+
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableBitmaskMachineOperandTargetFlags() const override;
 };
 
 }
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 24fd9bd5c1f7..6c4364aad331 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -226,7 +226,9 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
 
 // Instructions to support dynamic alloca.
 def SDTDynOp  : SDTypeProfile<1, 2, []>;
+def SDTDynAreaOp  : SDTypeProfile<1, 1, []>;
 def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+def PPCdynareaoffset   : SDNode<"PPCISD::DYNAREAOFFSET", SDTDynAreaOp, [SDNPHasChain]>;
 
 //===----------------------------------------------------------------------===//
 // PowerPC specific transformation functions and pattern fragments.
@@ -1029,6 +1031,8 @@ let Defs = [R1], Uses = [R1] in
 def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
                        [(set i32:$result,
                              (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+def DYNAREAOFFSET : Pseudo<(outs i32imm:$result), (ins memri:$fpsi), "#DYNAREAOFFSET",
+                       [(set i32:$result, (PPCdynareaoffset iaddr:$fpsi))]>;
                          
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
@@ -3883,8 +3887,11 @@ def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0,
 def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
 def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
 
-def : InstAlias<"cntlz $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
-def : InstAlias<"cntlz. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw $rA, $rS", (CNTLZW gprc:$rA, gprc:$rS)>;
+def : InstAlias<"cntlzw. $rA, $rS", (CNTLZWo gprc:$rA, gprc:$rS)>;
+// The POWER variant
+def : MnemonicAlias<"cntlz",  "cntlzw">;
+def : MnemonicAlias<"cntlz.", "cntlzw.">;
 
 def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
                           (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
diff --git a/lib/Target/PowerPC/PPCInstrQPX.td b/lib/Target/PowerPC/PPCInstrQPX.td
index 0a044c5c6ea4..43120070d799 100644
--- a/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/lib/Target/PowerPC/PPCInstrQPX.td
@@ -839,31 +839,31 @@ def : Pat<(v4f64 (scalar_to_vector f64:$A)),
 def : Pat<(v4f32 (scalar_to_vector f32:$A)),
           (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), $A, sub_64)>;
 
-def : Pat<(f64 (vector_extract v4f64:$S, 0)),
+def : Pat<(f64 (extractelt v4f64:$S, 0)),
           (EXTRACT_SUBREG $S, sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+def : Pat<(f32 (extractelt v4f32:$S, 0)),
           (EXTRACT_SUBREG $S, sub_64)>;
 
-def : Pat<(f64 (vector_extract v4f64:$S, 1)),
+def : Pat<(f64 (extractelt v4f64:$S, 1)),
           (EXTRACT_SUBREG (QVESPLATI $S, 1), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 2)),
+def : Pat<(f64 (extractelt v4f64:$S, 2)),
           (EXTRACT_SUBREG (QVESPLATI $S, 2), sub_64)>;
-def : Pat<(f64 (vector_extract v4f64:$S, 3)),
+def : Pat<(f64 (extractelt v4f64:$S, 3)),
           (EXTRACT_SUBREG (QVESPLATI $S, 3), sub_64)>;
 
-def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+def : Pat<(f32 (extractelt v4f32:$S, 1)),
           (EXTRACT_SUBREG (QVESPLATIs $S, 1), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+def : Pat<(f32 (extractelt v4f32:$S, 2)),
           (EXTRACT_SUBREG (QVESPLATIs $S, 2), sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+def : Pat<(f32 (extractelt v4f32:$S, 3)),
           (EXTRACT_SUBREG (QVESPLATIs $S, 3), sub_64)>;
 
-def : Pat<(f64 (vector_extract v4f64:$S, i64:$F)),
+def : Pat<(f64 (extractelt v4f64:$S, i64:$F)),
           (EXTRACT_SUBREG (QVFPERM $S, $S,
                                    (QVLPCLSXint (RLDICR $F, 2,
                                                         /* 63-2 = */ 61))),
                           sub_64)>;
-def : Pat<(f32 (vector_extract v4f32:$S, i64:$F)),
+def : Pat<(f32 (extractelt v4f32:$S, i64:$F)),
           (EXTRACT_SUBREG (QVFPERMs $S, $S,
                                     (QVLPCLSXint (RLDICR $F, 2,
                                                          /* 63-2 = */ 61))),
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index ce63c22992e8..df1142cb42f3 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -67,17 +67,19 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>;
 def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>;
 def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
 
-multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
-                    string asmbase, string asmstr, InstrItinClass itin,
-                    list<dag> pattern> {
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
+                    string asmstr, InstrItinClass itin, Intrinsic Int,
+                    ValueType OutTy, ValueType InTy> {
   let BaseName = asmbase in {
-    def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+    def NAME : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
-                       pattern>;
+                       [(set OutTy:$XT, (Int InTy:$XA, InTy:$XB))]>;
     let Defs = [CR6] in
-    def o    : XX3Form_Rc<opcode, xo, OOL, IOL,
+    def o    : XX3Form_Rc<opcode, xo, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
-                       []>, isDOT;
+                       [(set InTy:$XT,
+                                (InTy (PPCvcmp_o InTy:$XA, InTy:$XB, xo)))]>,
+                       isDOT;
   }
 }
 
@@ -456,35 +458,23 @@ let Uses = [RM] in {
                            "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
 
   defm XVCMPEQDP : XX3Form_Rcr<60, 99,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v2i64:$XT,
-                                (int_ppc_vsx_xvcmpeqdp v2f64:$XA, v2f64:$XB))]>;
+                             int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
   defm XVCMPEQSP : XX3Form_Rcr<60, 67,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v4i32:$XT,
-                                (int_ppc_vsx_xvcmpeqsp v4f32:$XA, v4f32:$XB))]>;
+                             int_ppc_vsx_xvcmpeqsp, v4i32, v4f32>;
   defm XVCMPGEDP : XX3Form_Rcr<60, 115,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v2i64:$XT,
-                                (int_ppc_vsx_xvcmpgedp v2f64:$XA, v2f64:$XB))]>;
+                             int_ppc_vsx_xvcmpgedp, v2i64, v2f64>;
   defm XVCMPGESP : XX3Form_Rcr<60, 83,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v4i32:$XT,
-                                (int_ppc_vsx_xvcmpgesp v4f32:$XA, v4f32:$XB))]>;
+                             int_ppc_vsx_xvcmpgesp, v4i32, v4f32>;
   defm XVCMPGTDP : XX3Form_Rcr<60, 107,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v2i64:$XT,
-                                (int_ppc_vsx_xvcmpgtdp v2f64:$XA, v2f64:$XB))]>;
+                             int_ppc_vsx_xvcmpgtdp, v2i64, v2f64>;
   defm XVCMPGTSP : XX3Form_Rcr<60, 75,
-                             (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                              "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare,
-                             [(set v4i32:$XT,
-                                (int_ppc_vsx_xvcmpgtsp v4f32:$XA, v4f32:$XB))]>;
+                             int_ppc_vsx_xvcmpgtsp, v4i32, v4f32>;
 
   // Move Instructions
   def XSABSDP : XX2Form<60, 345,
@@ -845,9 +835,9 @@ let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
 
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
           (f64 (EXTRACT_SUBREG $S, sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
           (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
 }
 
@@ -856,9 +846,9 @@ def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $A, sub_64),
                            (SUBREG_TO_REG (i64 1), $A, sub_64), 0))>;
 
-def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+def : Pat<(f64 (extractelt v2f64:$S, 0)),
           (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
-def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+def : Pat<(f64 (extractelt v2f64:$S, 1)),
           (f64 (EXTRACT_SUBREG $S, sub_64))>;
 }
 
@@ -1206,6 +1196,23 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
                           AltVSXFMARel;
   }
+
+  // Single Precision Conversions (FP <-> INT)
+  def XSCVSXDSP : XX2Form<60, 312,
+                      (outs vssrc:$XT), (ins vsfrc:$XB),
+                      "xscvsxdsp $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfcfids f64:$XB))]>;
+  def XSCVUXDSP : XX2Form<60, 296,
+                      (outs vssrc:$XT), (ins vsfrc:$XB),
+                      "xscvuxdsp $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfcfidus f64:$XB))]>;
+
+  // Conversions between vector and scalar single precision
+  def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB),
+                          "xscvdpspn $XT, $XB", IIC_VecFP, []>;
+  def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
+                          "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+
 } // AddedComplexity = 400
 } // HasP8Vector
 
@@ -1229,3 +1236,550 @@ let Predicates = [HasDirectMove, HasVSX] in {
                                "mtvsrwz $XT, $rA", IIC_VecGeneral,
                                [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
 } // HasDirectMove, HasVSX
+
+/*  Direct moves of various widths from GPR's into VSR's. Each move lines
+    the value up into element 0 (both BE and LE). Namely, entities smaller than
+    a doubleword are shifted left and moved for BE. For LE, they're moved, then
+    swapped to go into the least significant element of the VSR.
+*/
+def MovesToVSR {
+  dag BE_BYTE_0 =
+    (MTVSRD
+      (RLDICR
+        (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7));
+  dag BE_HALF_0 =
+    (MTVSRD
+      (RLDICR
+        (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15));
+  dag BE_WORD_0 =
+    (MTVSRD
+      (RLDICR
+        (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31));
+  dag BE_DWORD_0 = (MTVSRD $A);
+
+  dag LE_MTVSRW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32));
+  dag LE_WORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+                                        LE_MTVSRW, sub_64));
+  dag LE_WORD_0 = (XXPERMDI LE_WORD_1, LE_WORD_1, 2);
+  dag LE_DWORD_1 = (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+                                         BE_DWORD_0, sub_64));
+  dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2);
+}
+
+/*  Patterns for extracting elements out of vectors. Integer elements are
+    extracted using direct move operations. Patterns for extracting elements
+    whose indices are not available at compile time are also provided with
+    various _VARIABLE_ patterns.
+    The numbering for the DAG's is for LE, but when used on BE, the correct
+    LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13).
+*/
+def VectorExtractions {
+  // Doubleword extraction
+  dag LE_DWORD_0 =
+    (MFVSRD
+      (EXTRACT_SUBREG
+        (XXPERMDI (COPY_TO_REGCLASS $S, VSRC),
+                  (COPY_TO_REGCLASS $S, VSRC), 2), sub_64));
+  dag LE_DWORD_1 = (MFVSRD
+                     (EXTRACT_SUBREG
+                       (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+
+  // Word extraction
+  dag LE_WORD_0 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 2), sub_64));
+  dag LE_WORD_1 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 1), sub_64));
+  dag LE_WORD_2 = (MFVSRWZ (EXTRACT_SUBREG
+                             (v2i64 (COPY_TO_REGCLASS $S, VSRC)), sub_64));
+  dag LE_WORD_3 = (MFVSRWZ (EXTRACT_SUBREG (XXSLDWI $S, $S, 3), sub_64));
+
+  // Halfword extraction
+  dag LE_HALF_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 48), sub_32));
+  dag LE_HALF_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 48), sub_32));
+  dag LE_HALF_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 48), sub_32));
+  dag LE_HALF_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 48), sub_32));
+  dag LE_HALF_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 48), sub_32));
+  dag LE_HALF_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 48), sub_32));
+  dag LE_HALF_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 48), sub_32));
+  dag LE_HALF_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 48), sub_32));
+
+  // Byte extraction
+  dag LE_BYTE_0 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 0, 56), sub_32));
+  dag LE_BYTE_1 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 56, 56), sub_32));
+  dag LE_BYTE_2 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 48, 56), sub_32));
+  dag LE_BYTE_3 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 40, 56), sub_32));
+  dag LE_BYTE_4 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 32, 56), sub_32));
+  dag LE_BYTE_5 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 24, 56), sub_32));
+  dag LE_BYTE_6 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 16, 56), sub_32));
+  dag LE_BYTE_7 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_0, 8, 56), sub_32));
+  dag LE_BYTE_8 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 0, 56), sub_32));
+  dag LE_BYTE_9 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 56, 56), sub_32));
+  dag LE_BYTE_10 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 48, 56), sub_32));
+  dag LE_BYTE_11 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 40, 56), sub_32));
+  dag LE_BYTE_12 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 32, 56), sub_32));
+  dag LE_BYTE_13 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 24, 56), sub_32));
+  dag LE_BYTE_14 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 16, 56), sub_32));
+  dag LE_BYTE_15 = (i32 (EXTRACT_SUBREG (RLDICL LE_DWORD_1, 8, 56), sub_32));
+
+  /* Variable element number (BE and LE patterns must be specified separately)
+     This is a rather involved process.
+
+     Conceptually, this is how the move is accomplished:
+     1. Identify which doubleword contains the element
+     2. Shift in the VMX register so that the correct doubleword is correctly
+        lined up for the MFVSRD
+     3. Perform the move so that the element (along with some extra stuff)
+        is in the GPR
+     4. Right shift within the GPR so that the element is right-justified
+
+     Of course, the index is an element number which has a different meaning
+     on LE/BE so the patterns have to be specified separately.
+
+     Note: The final result will be the element right-justified with high
+           order bits being arbitrarily defined (namely, whatever was in the
+           vector register to the left of the value originally).
+  */
+
+  /*  LE variable byte
+      Number 1. above:
+      - For elements 0-7, we shift left by 8 bytes since they're on the right
+      - For elements 8-15, we need not shift (shift left by zero bytes)
+      This is accomplished by inverting the bits of the index and AND-ing
+      with 0x8 (i.e. clearing all bits of the index and inverting bit 60).
+  */
+  dag LE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDC8 (LI8 8), $Idx));
+
+  //  Number 2. above:
+  //  - Now that we set up the shift amount, we shift in the VMX register
+  dag LE_VBYTE_PERMUTE = (VPERM $S, $S, LE_VBYTE_PERM_VEC);
+
+  //  Number 3. above:
+  //  - The doubleword containing our element is moved to a GPR
+  dag LE_MV_VBYTE = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS LE_VBYTE_PERMUTE, VSRC)),
+                        sub_64));
+
+  /*  Number 4. above:
+      - Truncate the element number to the range 0-7 (8-15 are symmetrical
+        and out of range values are truncated accordingly)
+      - Multiply by 8 as we need to shift right by the number of bits, not bytes
+      - Shift right in the GPR by the calculated value
+  */
+  dag LE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 7), $Idx), 3, 60),
+                                       sub_32);
+  dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT),
+                                         sub_32);
+
+  /*  LE variable halfword
+      Number 1. above:
+      - For elements 0-3, we shift left by 8 since they're on the right
+      - For elements 4-7, we need not shift (shift left by zero bytes)
+      Similarly to the byte pattern, we invert the bits of the index, but we
+      AND with 0x4 (i.e. clear all bits of the index and invert bit 61).
+      Of course, the shift is still by 8 bytes, so we must multiply by 2.
+  */
+  dag LE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 4), $Idx), 1, 62));
+
+  //  Number 2. above:
+  //  - Now that we set up the shift amount, we shift in the VMX register
+  dag LE_VHALF_PERMUTE = (VPERM $S, $S, LE_VHALF_PERM_VEC);
+
+  //  Number 3. above:
+  //  - The doubleword containing our element is moved to a GPR
+  dag LE_MV_VHALF = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS LE_VHALF_PERMUTE, VSRC)),
+                        sub_64));
+
+  /*  Number 4. above:
+      - Truncate the element number to the range 0-3 (4-7 are symmetrical
+        and out of range values are truncated accordingly)
+      - Multiply by 16 as we need to shift right by the number of bits
+      - Shift right in the GPR by the calculated value
+  */
+  dag LE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 3), $Idx), 4, 59),
+                                       sub_32);
+  dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT),
+                                         sub_32);
+
+  /*  LE variable word
+      Number 1. above:
+      - For elements 0-1, we shift left by 8 since they're on the right
+      - For elements 2-3, we need not shift
+  */
+  dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61));
+
+  //  Number 2. above:
+  //  - Now that we set up the shift amount, we shift in the VMX register
+  dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC);
+
+  //  Number 3. above:
+  //  - The doubleword containing our element is moved to a GPR
+  dag LE_MV_VWORD = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)),
+                        sub_64));
+
+  /*  Number 4. above:
+      - Truncate the element number to the range 0-1 (2-3 are symmetrical
+        and out of range values are truncated accordingly)
+      - Multiply by 32 as we need to shift right by the number of bits
+      - Shift right in the GPR by the calculated value
+  */
+  dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58),
+                                       sub_32);
+  dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT),
+                                         sub_32);
+
+  /*  LE variable doubleword
+      Number 1. above:
+      - For element 0, we shift left by 8 since it's on the right
+      - For element 1, we need not shift
+  */
+  dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60));
+
+  //  Number 2. above:
+  //  - Now that we set up the shift amount, we shift in the VMX register
+  dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC);
+
+  // Number 3. above:
+  //  - The doubleword containing our element is moved to a GPR
+  //  - Number 4. is not needed for the doubleword as the value is 64-bits
+  dag LE_VARIABLE_DWORD =
+        (MFVSRD (EXTRACT_SUBREG
+                  (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)),
+                  sub_64));
+
+  /*  LE variable float
+      - Shift the vector to line up the desired element to BE Word 0
+      - Convert 32-bit float to a 64-bit single precision float
+  */
+  dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61));
+  dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC);
+  dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE);
+
+  /*  LE variable double
+      Same as the LE doubleword except there is no move.
+  */
+  dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+                                  (COPY_TO_REGCLASS $S, VRRC),
+                                  LE_VDWORD_PERM_VEC);
+  dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC);
+
+  /*  BE variable byte
+      The algorithm here is the same as the LE variable byte except:
+      - The shift in the VMX register is by 0/8 for opposite element numbers so
+        we simply AND the element number with 0x8
+      - The order of elements after the move to GPR is reversed, so we invert
+        the bits of the index prior to truncating to the range 0-7
+  */
+  dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8));
+  dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC);
+  dag BE_MV_VBYTE = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)),
+                        sub_64));
+  dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60),
+                                       sub_32);
+  dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT),
+                                         sub_32);
+
+  /*  BE variable halfword
+      The algorithm here is the same as the LE variable halfword except:
+      - The shift in the VMX register is by 0/8 for opposite element numbers so
+        we simply AND the element number with 0x4 and multiply by 2
+      - The order of elements after the move to GPR is reversed, so we invert
+        the bits of the index prior to truncating to the range 0-3
+  */
+  dag BE_VHALF_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 4), 1, 62));
+  dag BE_VHALF_PERMUTE = (VPERM $S, $S, BE_VHALF_PERM_VEC);
+  dag BE_MV_VHALF = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)),
+                        sub_64));
+  dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59),
+                                       sub_32);
+  dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT),
+                                         sub_32);
+
+  /*  BE variable word
+      The algorithm is the same as the LE variable word except:
+      - The shift in the VMX register happens for opposite element numbers
+      - The order of elements after the move to GPR is reversed, so we invert
+        the bits of the index prior to truncating to the range 0-1
+  */
+  dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61));
+  dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC);
+  dag BE_MV_VWORD = (MFVSRD
+                      (EXTRACT_SUBREG
+                        (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)),
+                        sub_64));
+  dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58),
+                                       sub_32);
+  dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT),
+                                         sub_32);
+
+  /*  BE variable doubleword
+      Same as the LE doubleword except we shift in the VMX register for opposite
+      element indices.
+  */
+  dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60));
+  dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC);
+  dag BE_VARIABLE_DWORD =
+        (MFVSRD (EXTRACT_SUBREG
+                  (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)),
+                  sub_64));
+
+  /*  BE variable float
+      - Shift the vector to line up the desired element to BE Word 0
+      - Convert 32-bit float to a 64-bit single precision float
+  */
+  dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61));
+  dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC);
+  dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE);
+
+  /* BE variable double
+      Same as the BE doubleword except there is no move.
+  */
+  dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC),
+                                  (COPY_TO_REGCLASS $S, VRRC),
+                                  BE_VDWORD_PERM_VEC);
+  dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
+}
+
+// v4f32 scalar <-> vector conversions (BE)
+let Predicates = [IsBigEndian, HasP8Vector] in {
+  def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+            (v4f32 (XSCVDPSPN $A))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+            (f32 (XSCVSPDPN $S))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+            (f32 VectorExtractions.BE_VARIABLE_FLOAT)>;
+} // IsBigEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsBigEndian, HasVSX] in
+  def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+            (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsBigEndian, HasDirectMove] in {
+  // v16i8 scalar <-> vector conversions (BE)
+  def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+            (v16i8 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_BYTE_0, sub_64))>;
+  def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+            (v8i16 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_HALF_0, sub_64))>;
+  def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+            (v4i32 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_WORD_0, sub_64))>;
+  def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+            (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+            (i32 VectorExtractions.LE_BYTE_15)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+            (i32 VectorExtractions.LE_BYTE_14)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+            (i32 VectorExtractions.LE_BYTE_13)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+            (i32 VectorExtractions.LE_BYTE_12)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+            (i32 VectorExtractions.LE_BYTE_11)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+            (i32 VectorExtractions.LE_BYTE_10)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+            (i32 VectorExtractions.LE_BYTE_9)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+            (i32 VectorExtractions.LE_BYTE_8)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+            (i32 VectorExtractions.LE_BYTE_7)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+            (i32 VectorExtractions.LE_BYTE_6)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+            (i32 VectorExtractions.LE_BYTE_5)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+            (i32 VectorExtractions.LE_BYTE_4)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+            (i32 VectorExtractions.LE_BYTE_3)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+            (i32 VectorExtractions.LE_BYTE_2)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+            (i32 VectorExtractions.LE_BYTE_1)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+            (i32 VectorExtractions.LE_BYTE_0)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+            (i32 VectorExtractions.BE_VARIABLE_BYTE)>;
+
+  // v8i16 scalar <-> vector conversions (BE)
+  def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+            (i32 VectorExtractions.LE_HALF_7)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+            (i32 VectorExtractions.LE_HALF_6)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+            (i32 VectorExtractions.LE_HALF_5)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+            (i32 VectorExtractions.LE_HALF_4)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+            (i32 VectorExtractions.LE_HALF_3)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+            (i32 VectorExtractions.LE_HALF_2)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+            (i32 VectorExtractions.LE_HALF_1)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+            (i32 VectorExtractions.LE_HALF_0)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+            (i32 VectorExtractions.BE_VARIABLE_HALF)>;
+
+  // v4i32 scalar <-> vector conversions (BE)
+  def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+            (i32 VectorExtractions.LE_WORD_3)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+            (i32 VectorExtractions.LE_WORD_2)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+            (i32 VectorExtractions.LE_WORD_1)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+            (i32 VectorExtractions.LE_WORD_0)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+            (i32 VectorExtractions.BE_VARIABLE_WORD)>;
+
+  // v2i64 scalar <-> vector conversions (BE)
+  def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+            (i64 VectorExtractions.LE_DWORD_1)>;
+  def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+            (i64 VectorExtractions.LE_DWORD_0)>;
+  def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+            (i64 VectorExtractions.BE_VARIABLE_DWORD)>;
+} // IsBigEndian, HasDirectMove
+
+// v4f32 scalar <-> vector conversions (LE)
+let Predicates = [IsLittleEndian, HasP8Vector] in {
+  def : Pat<(v4f32 (scalar_to_vector f32:$A)),
+            (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 0)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 1)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 2)),
+            (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, 3)),
+            (f32 (XSCVSPDPN $S))>;
+  def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)),
+            (f32 VectorExtractions.LE_VARIABLE_FLOAT)>;
+} // IsLittleEndian, HasP8Vector
+
+// Variable index vector_extract for v2f64 does not require P8Vector
+let Predicates = [IsLittleEndian, HasVSX] in
+  def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
+            (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
+
+let Predicates = [IsLittleEndian, HasDirectMove] in {
+  // v16i8 scalar <-> vector conversions (LE)
+  def : Pat<(v16i8 (scalar_to_vector i32:$A)),
+            (v16i8 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+  def : Pat<(v8i16 (scalar_to_vector i32:$A)),
+            (v8i16 (COPY_TO_REGCLASS MovesToVSR.LE_WORD_0, VSRC))>;
+  def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+            (v4i32 MovesToVSR.LE_WORD_0)>;
+  def : Pat<(v2i64 (scalar_to_vector i64:$A)),
+            (v2i64 MovesToVSR.LE_DWORD_0)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 0)),
+            (i32 VectorExtractions.LE_BYTE_0)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 1)),
+            (i32 VectorExtractions.LE_BYTE_1)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 2)),
+            (i32 VectorExtractions.LE_BYTE_2)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 3)),
+            (i32 VectorExtractions.LE_BYTE_3)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 4)),
+            (i32 VectorExtractions.LE_BYTE_4)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 5)),
+            (i32 VectorExtractions.LE_BYTE_5)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 6)),
+            (i32 VectorExtractions.LE_BYTE_6)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 7)),
+            (i32 VectorExtractions.LE_BYTE_7)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 8)),
+            (i32 VectorExtractions.LE_BYTE_8)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 9)),
+            (i32 VectorExtractions.LE_BYTE_9)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 10)),
+            (i32 VectorExtractions.LE_BYTE_10)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 11)),
+            (i32 VectorExtractions.LE_BYTE_11)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 12)),
+            (i32 VectorExtractions.LE_BYTE_12)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 13)),
+            (i32 VectorExtractions.LE_BYTE_13)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 14)),
+            (i32 VectorExtractions.LE_BYTE_14)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, 15)),
+            (i32 VectorExtractions.LE_BYTE_15)>;
+  def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)),
+            (i32 VectorExtractions.LE_VARIABLE_BYTE)>;
+
+  // v8i16 scalar <-> vector conversions (LE)
+  def : Pat<(i32 (vector_extract v8i16:$S, 0)),
+            (i32 VectorExtractions.LE_HALF_0)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 1)),
+            (i32 VectorExtractions.LE_HALF_1)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 2)),
+            (i32 VectorExtractions.LE_HALF_2)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 3)),
+            (i32 VectorExtractions.LE_HALF_3)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 4)),
+            (i32 VectorExtractions.LE_HALF_4)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 5)),
+            (i32 VectorExtractions.LE_HALF_5)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 6)),
+            (i32 VectorExtractions.LE_HALF_6)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, 7)),
+            (i32 VectorExtractions.LE_HALF_7)>;
+  def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)),
+            (i32 VectorExtractions.LE_VARIABLE_HALF)>;
+
+  // v4i32 scalar <-> vector conversions (LE)
+  def : Pat<(i32 (vector_extract v4i32:$S, 0)),
+            (i32 VectorExtractions.LE_WORD_0)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 1)),
+            (i32 VectorExtractions.LE_WORD_1)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 2)),
+            (i32 VectorExtractions.LE_WORD_2)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, 3)),
+            (i32 VectorExtractions.LE_WORD_3)>;
+  def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)),
+            (i32 VectorExtractions.LE_VARIABLE_WORD)>;
+
+  // v2i64 scalar <-> vector conversions (LE)
+  def : Pat<(i64 (vector_extract v2i64:$S, 0)),
+            (i64 VectorExtractions.LE_DWORD_0)>;
+  def : Pat<(i64 (vector_extract v2i64:$S, 1)),
+            (i64 VectorExtractions.LE_DWORD_1)>;
+  def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)),
+            (i64 VectorExtractions.LE_VARIABLE_DWORD)>;
+} // IsLittleEndian, HasDirectMove
+
+let Predicates = [HasDirectMove, HasVSX] in {
+// bitconvert f32 -> i32
+// (convert to 32-bit fp single, shift right 1 word, move to GPR)
+def : Pat<(i32 (bitconvert f32:$S)),
+          (i32 (MFVSRWZ (EXTRACT_SUBREG
+                          (XXSLDWI (XSCVDPSPN $S),(XSCVDPSPN $S), 3),
+                          sub_64)))>;
+// bitconvert i32 -> f32
+// (move to FPR, shift left 1 word, convert to 64-bit fp single)
+def : Pat<(f32 (bitconvert i32:$A)),
+          (f32 (XSCVSPDPN
+                 (XXSLDWI MovesToVSR.LE_WORD_1, MovesToVSR.LE_WORD_1, 1)))>;
+
+// bitconvert f64 -> i64
+// (move to GPR, nothing else needed)
+def : Pat<(i64 (bitconvert f64:$S)),
+          (i64 (MFVSRD $S))>;
+
+// bitconvert i64 -> f64
+// (move to FPR, nothing else needed)
+def : Pat<(f64 (bitconvert i64:$S)),
+          (f64 (MTVSRD $S))>;
+}
diff --git a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
index b4e1c099f190..e3a35d5df358 100644
--- a/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
+++ b/lib/Target/PowerPC/PPCLoopDataPrefetch.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -71,10 +72,10 @@ namespace {
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
       // FIXME: For some reason, preserving SE here breaks LSR (even if
       // this pass changes nothing).
-      // AU.addPreserved<ScalarEvolution>();
+      // AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
     }
 
@@ -96,7 +97,7 @@ INITIALIZE_PASS_BEGIN(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(PPCLoopDataPrefetch, "ppc-loop-data-prefetch",
                     "PPC Loop Data Prefetch", false, false)
 
@@ -104,7 +105,7 @@ FunctionPass *llvm::createPPCLoopDataPrefetchPass() { return new PPCLoopDataPref
 
 bool PPCLoopDataPrefetch::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   DL = &F.getParent()->getDataLayout();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
diff --git a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index b6e7799402e1..5e188268fee9 100644
--- a/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -73,7 +73,7 @@ namespace {
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
     }
 
     bool runOnFunction(Function &F) override;
@@ -84,8 +84,10 @@ namespace {
 
   private:
     PPCTargetMachine *TM;
+    DominatorTree *DT;
     LoopInfo *LI;
     ScalarEvolution *SE;
+    bool PreserveLCSSA;
   };
 }
 
@@ -93,7 +95,7 @@ char PPCLoopPreIncPrep::ID = 0;
 static const char *name = "Prepare loop for pre-inc. addressing modes";
 INITIALIZE_PASS_BEGIN(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(PPCLoopPreIncPrep, DEBUG_TYPE, name, false, false)
 
 FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
@@ -101,17 +103,20 @@ FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
 }
 
 namespace {
-  struct SCEVLess : std::binary_function<const SCEV *, const SCEV *, bool>
-  {
-    SCEVLess(ScalarEvolution *SE) : SE(SE) {}
+  struct BucketElement {
+    BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
+    BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
 
-    bool operator() (const SCEV *X, const SCEV *Y) const {
-      const SCEV *Diff = SE->getMinusSCEV(X, Y);
-      return cast<SCEVConstant>(Diff)->getValue()->getSExtValue() < 0;
-    }
+    const SCEVConstant *Offset;
+    Instruction *Instr;
+  };
 
-  protected:
-    ScalarEvolution *SE;
+  struct Bucket {
+    Bucket(const SCEV *B, Instruction *I) : BaseSCEV(B),
+                                            Elements(1, BucketElement(I)) {}
+
+    const SCEV *BaseSCEV;
+    SmallVector<BucketElement, 16> Elements;
   };
 }
 
@@ -140,7 +145,10 @@ static Value *GetPointerOperand(Value *MemI) {
 
 bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   bool MadeChange = false;
 
@@ -169,7 +177,6 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     std::distance(pred_begin(Header), pred_end(Header));
 
   // Collect buckets of comparable addresses used by loads and stores.
-  typedef std::multimap<const SCEV *, Instruction *, SCEVLess> Bucket;
   SmallVector<Bucket, 16> Buckets;
   for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
        I != IE; ++I) {
@@ -212,25 +219,24 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
       }
 
       bool FoundBucket = false;
-      for (unsigned i = 0, e = Buckets.size(); i != e; ++i)
-        for (Bucket::iterator K = Buckets[i].begin(), KE = Buckets[i].end();
-             K != KE; ++K) {
-          const SCEV *Diff = SE->getMinusSCEV(K->first, LSCEV);
-          if (isa<SCEVConstant>(Diff)) {
-            Buckets[i].insert(std::make_pair(LSCEV, MemI));
-            FoundBucket = true;
-            break;
-          }
+      for (auto &B : Buckets) {
+        const SCEV *Diff = SE->getMinusSCEV(LSCEV, B.BaseSCEV);
+        if (const auto *CDiff = dyn_cast<SCEVConstant>(Diff)) {
+          B.Elements.push_back(BucketElement(CDiff, MemI));
+          FoundBucket = true;
+          break;
         }
+      }
 
       if (!FoundBucket) {
-        Buckets.push_back(Bucket(SCEVLess(SE)));
-        Buckets[Buckets.size()-1].insert(std::make_pair(LSCEV, MemI));
+        if (Buckets.size() == MaxVars)
+          return MadeChange;
+        Buckets.push_back(Bucket(LSCEV, MemI));
       }
     }
   }
 
-  if (Buckets.empty() || Buckets.size() > MaxVars)
+  if (Buckets.empty())
     return MadeChange;
 
   BasicBlock *LoopPredecessor = L->getLoopPredecessor();
@@ -239,7 +245,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
   // iteration space), insert a new preheader for the loop.
   if (!LoopPredecessor ||
       !LoopPredecessor->getTerminator()->getType()->isVoidTy()) {
-    LoopPredecessor = InsertPreheaderForLoop(L, this);
+    LoopPredecessor = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
     if (LoopPredecessor)
       MadeChange = true;
   }
@@ -253,8 +259,45 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     // The base address of each bucket is transformed into a phi and the others
     // are rewritten as offsets of that variable.
 
+    // We have a choice now of which instruction's memory operand we use as the
+    // base for the generated PHI. Always picking the first instruction in each
+    // bucket does not work well, specifically because that instruction might
+    // be a prefetch (and there are no pre-increment dcbt variants). Otherwise,
+    // the choice is somewhat arbitrary, because the backend will happily
+    // generate direct offsets from both the pre-incremented and
+    // post-incremented pointer values. Thus, we'll pick the first non-prefetch
+    // instruction in each bucket, and adjust the recurrence and other offsets
+    // accordingly. 
+    for (int j = 0, je = Buckets[i].Elements.size(); j != je; ++j) {
+      if (auto *II = dyn_cast<IntrinsicInst>(Buckets[i].Elements[j].Instr))
+        if (II->getIntrinsicID() == Intrinsic::prefetch)
+          continue;
+
+      // If we'd otherwise pick the first element anyway, there's nothing to do.
+      if (j == 0)
+        break;
+
+      // If our chosen element has no offset from the base pointer, there's
+      // nothing to do.
+      if (!Buckets[i].Elements[j].Offset ||
+          Buckets[i].Elements[j].Offset->isZero())
+        break;
+
+      const SCEV *Offset = Buckets[i].Elements[j].Offset;
+      Buckets[i].BaseSCEV = SE->getAddExpr(Buckets[i].BaseSCEV, Offset);
+      for (auto &E : Buckets[i].Elements) {
+        if (E.Offset)
+          E.Offset = cast<SCEVConstant>(SE->getMinusSCEV(E.Offset, Offset));
+        else
+          E.Offset = cast<SCEVConstant>(SE->getNegativeSCEV(Offset));
+      }
+
+      std::swap(Buckets[i].Elements[j], Buckets[i].Elements[0]);
+      break;
+    }
+
     const SCEVAddRecExpr *BasePtrSCEV =
-      cast<SCEVAddRecExpr>(Buckets[i].begin()->first);
+      cast<SCEVAddRecExpr>(Buckets[i].BaseSCEV);
     if (!BasePtrSCEV->isAffine())
       continue;
 
@@ -262,7 +305,9 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     assert(BasePtrSCEV->getLoop() == L &&
            "AddRec for the wrong loop?");
 
-    Instruction *MemI = Buckets[i].begin()->second;
+    // The instruction corresponding to the Bucket's BaseSCEV must be the first
+    // in the vector of elements.
+    Instruction *MemI = Buckets[i].Elements.begin()->Instr;
     Value *BasePtr = GetPointerOperand(MemI);
     assert(BasePtr && "No pointer operand");
 
@@ -302,7 +347,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
       NewPHI->addIncoming(BasePtrStart, LoopPredecessor);
     }
 
-    Instruction *InsPoint = Header->getFirstInsertionPt();
+    Instruction *InsPoint = &*Header->getFirstInsertionPt();
     GetElementPtrInst *PtrInc = GetElementPtrInst::Create(
         I8Ty, NewPHI, BasePtrIncSCEV->getValue(),
         MemI->hasName() ? MemI->getName() + ".inc" : "", InsPoint);
@@ -327,18 +372,20 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
     BasePtr->replaceAllUsesWith(NewBasePtr);
     RecursivelyDeleteTriviallyDeadInstructions(BasePtr);
 
-    Value *LastNewPtr = NewBasePtr;
-    for (Bucket::iterator I = std::next(Buckets[i].begin()),
-         IE = Buckets[i].end(); I != IE; ++I) {
-      Value *Ptr = GetPointerOperand(I->second);
+    // Keep track of the replacement pointer values we've inserted so that we
+    // don't generate more pointer values than necessary.
+    SmallPtrSet<Value *, 16> NewPtrs;
+    NewPtrs.insert( NewBasePtr);
+
+    for (auto I = std::next(Buckets[i].Elements.begin()),
+         IE = Buckets[i].Elements.end(); I != IE; ++I) {
+      Value *Ptr = GetPointerOperand(I->Instr);
       assert(Ptr && "No pointer operand");
-      if (Ptr == LastNewPtr)
+      if (NewPtrs.count(Ptr))
         continue;
 
       Instruction *RealNewPtr;
-      const SCEVConstant *Diff =
-        cast<SCEVConstant>(SE->getMinusSCEV(I->first, BasePtrSCEV));
-      if (Diff->isZero()) {
+      if (!I->Offset || I->Offset->getValue()->isZero()) {
         RealNewPtr = NewBasePtr;
       } else {
         Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
@@ -346,13 +393,13 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
             cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
           PtrIP = 0;
         else if (isa<PHINode>(PtrIP))
-          PtrIP = PtrIP->getParent()->getFirstInsertionPt();
+          PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
         else if (!PtrIP)
-          PtrIP = I->second;
+          PtrIP = I->Instr;
 
         GetElementPtrInst *NewPtr = GetElementPtrInst::Create(
-            I8Ty, PtrInc, Diff->getValue(),
-            I->second->hasName() ? I->second->getName() + ".off" : "", PtrIP);
+            I8Ty, PtrInc, I->Offset->getValue(),
+            I->Instr->hasName() ? I->Instr->getName() + ".off" : "", PtrIP);
         if (!PtrIP)
           NewPtr->insertAfter(cast<Instruction>(PtrInc));
         NewPtr->setIsInBounds(IsPtrInBounds(Ptr));
@@ -373,7 +420,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
       Ptr->replaceAllUsesWith(ReplNewPtr);
       RecursivelyDeleteTriviallyDeadInstructions(Ptr);
 
-      LastNewPtr = RealNewPtr;
+      NewPtrs.insert(RealNewPtr);
     }
 
     MadeChange = true;
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 76837ecb32de..44a692d4bb42 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -38,7 +38,7 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
 static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
   const TargetMachine &TM = AP.TM;
   Mangler *Mang = AP.Mang;
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = AP.getDataLayout();
   MCContext &Ctx = AP.OutContext;
   bool isDarwin = TM.getTargetTriple().isOSDarwin();
 
@@ -51,13 +51,13 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
     Suffix = "$non_lazy_ptr";
 
   if (!Suffix.empty())
-    Name += DL->getPrivateGlobalPrefix();
+    Name += DL.getPrivateGlobalPrefix();
 
   unsigned PrefixLen = Name.size();
 
   if (!MO.isGlobal()) {
     assert(MO.isSymbol() && "Isn't a symbol reference");
-    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
   } else {
     const GlobalValue *GV = MO.getGlobal();
     TM.getNameWithPrefix(Name, GV, *Mang);
diff --git a/lib/Target/PowerPC/PPCMIPeephole.cpp b/lib/Target/PowerPC/PPCMIPeephole.cpp
new file mode 100644
index 000000000000..fe339d70d7de
--- /dev/null
+++ b/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -0,0 +1,230 @@
+//===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// This pass performs peephole optimizations to clean up ugly code
+// sequences at the MachineInstruction layer.  It runs at the end of
+// the SSA phases, following VSX swap removal.  A pass of dead code
+// elimination follows this one for quick clean-up of any dead
+// instructions introduced here.  Although we could do this as callbacks
+// from the generic peephole pass, this would have a couple of bad
+// effects:  it might remove optimization opportunities for VSX swap
+// removal, and it would miss cleanups made possible following VSX
+// swap removal.
+//
+//===---------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-mi-peepholes"
+
+namespace llvm {
+  void initializePPCMIPeepholePass(PassRegistry&);
+}
+
+namespace {
+
+struct PPCMIPeephole : public MachineFunctionPass {
+
+  static char ID;
+  const PPCInstrInfo *TII;
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+
+  PPCMIPeephole() : MachineFunctionPass(ID) {
+    initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
+  }
+
+private:
+  // Initialize class variables.
+  void initialize(MachineFunction &MFParm);
+
+  // Perform peepholes.
+  bool simplifyCode(void);
+
+  // Find the "true" register represented by SrcReg (following chains
+  // of copies and subreg_to_reg operations).
+  unsigned lookThruCopyLike(unsigned SrcReg);
+
+public:
+  // Main entry point for this pass.
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    initialize(MF);
+    return simplifyCode();
+  }
+};
+
+// Initialize class variables.
+void PPCMIPeephole::initialize(MachineFunction &MFParm) {
+  MF = &MFParm;
+  MRI = &MF->getRegInfo();
+  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
+  DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
+  DEBUG(MF->dump());
+}
+
+// Perform peephole optimizations.
+bool PPCMIPeephole::simplifyCode(void) {
+  bool Simplified = false;
+  MachineInstr* ToErase = nullptr;
+
+  for (MachineBasicBlock &MBB : *MF) {
+    for (MachineInstr &MI : MBB) {
+
+      // If the previous instruction was marked for elimination,
+      // remove it now.
+      if (ToErase) {
+        ToErase->eraseFromParent();
+        ToErase = nullptr;
+      }
+
+      // Ignore debug instructions.
+      if (MI.isDebugValue())
+        continue;
+
+      // Per-opcode peepholes.
+      switch (MI.getOpcode()) {
+
+      default:
+        break;
+
+      case PPC::XXPERMDI: {
+        // Perform simplifications of 2x64 vector swaps and splats.
+        // A swap is identified by an immediate value of 2, and a splat
+        // is identified by an immediate value of 0 or 3.
+        int Immed = MI.getOperand(3).getImm();
+
+        if (Immed != 1) {
+
+          // For each of these simplifications, we need the two source
+          // regs to match.  Unfortunately, MachineCSE ignores COPY and
+          // SUBREG_TO_REG, so for example we can see
+          //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
+          // We have to look through chains of COPY and SUBREG_TO_REG
+          // to find the real source values for comparison.
+          unsigned TrueReg1 = lookThruCopyLike(MI.getOperand(1).getReg());
+          unsigned TrueReg2 = lookThruCopyLike(MI.getOperand(2).getReg());
+
+          if (TrueReg1 == TrueReg2
+              && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
+            MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+
+            // If this is a splat or a swap fed by another splat, we
+            // can replace it with a copy.
+            if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+              unsigned FeedImmed = DefMI->getOperand(3).getImm();
+              unsigned FeedReg1
+                = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              unsigned FeedReg2
+                = lookThruCopyLike(DefMI->getOperand(2).getReg());
+
+              if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs()
+                      << "Optimizing splat/swap or splat/splat "
+                      "to splat/copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(MI.getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+
+              // If this is a splat fed by a swap, we can simplify modify
+              // the splat to splat the other value from the swap's input
+              // parameter.
+              else if ((Immed == 0 || Immed == 3)
+                       && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
+                DEBUG(MI.dump());
+                MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
+                MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
+                MI.getOperand(3).setImm(3 - Immed);
+                Simplified = true;
+              }
+
+              // If this is a swap fed by a swap, we can replace it
+              // with a copy from the first swap's input.
+              else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
+                DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(DefMI->getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+            }
+          }
+        }
+        break;
+      }
+      }
+    }
+
+    // If the last instruction was marked for elimination,
+    // remove it now.
+    if (ToErase) {
+      ToErase->eraseFromParent();
+      ToErase = nullptr;
+    }
+  }
+
+  return Simplified;
+}
+
+// This is used to find the "true" source register for an
+// XXPERMDI instruction, since MachineCSE does not handle the
+// "copy-like" operations (Copy and SubregToReg).  Returns
+// the original SrcReg unless it is the target of a copy-like
+// operation, in which case we chain backwards through all
+// such operations to the ultimate source register.  If a
+// physical register is encountered, we stop the search.
+unsigned PPCMIPeephole::lookThruCopyLike(unsigned SrcReg) {
+
+  while (true) {
+
+    MachineInstr *MI = MRI->getVRegDef(SrcReg);
+    if (!MI->isCopyLike())
+      return SrcReg;
+
+    unsigned CopySrcReg;
+    if (MI->isCopy())
+      CopySrcReg = MI->getOperand(1).getReg();
+    else {
+      assert(MI->isSubregToReg() && "bad opcode for lookThruCopyLike");
+      CopySrcReg = MI->getOperand(2).getReg();
+    }
+
+    if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+      return CopySrcReg;
+
+    SrcReg = CopySrcReg;
+  }
+}
+
+} // end default namespace
+
+INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
+                      "PowerPC MI Peephole Optimization", false, false)
+INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
+                    "PowerPC MI Peephole Optimization", false, false)
+
+char PPCMIPeephole::ID = 0;
+FunctionPass*
+llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
+
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index ec4e0a5fa81b..95f163153c74 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -18,8 +18,8 @@ using namespace llvm;
 void PPCFunctionInfo::anchor() { }
 
 MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
-  const DataLayout *DL = MF.getTarget().getDataLayout();
-  return MF.getContext().getOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+  const DataLayout &DL = MF.getDataLayout();
+  return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
                                            Twine(MF.getFunctionNumber()) +
                                            "$poff");
 }
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2b09b2f625de..934bdf622418 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -200,7 +200,7 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(PPC::R2);  // System-reserved register
     Reserved.set(PPC::R13); // Small Data Area pointer register
   }
-  
+
   // On PPC64, r13 is the thread pointer. Never allocate this register.
   if (TM.isPPC64()) {
     Reserved.set(PPC::R13);
@@ -262,7 +262,7 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   default:
     return 0;
   case PPC::G8RC_NOX0RegClassID:
-  case PPC::GPRC_NOR0RegClassID: 
+  case PPC::GPRC_NOR0RegClassID:
   case PPC::G8RCRegClassID:
   case PPC::GPRCRegClassID: {
     unsigned FP = TFI->hasFP(MF) ? 1 : 0;
@@ -311,7 +311,7 @@ PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
 //===----------------------------------------------------------------------===//
 
 /// lowerDynamicAlloc - Generate the code for allocating an object in the
-/// current frame.  The sequence of code with be in the general form
+/// current frame.  The sequence of code will be in the general form
 ///
 ///   addi   R0, SP, \#frameSize ; get the address of the previous frame
 ///   stwxu  R0, SP, Rnegsize   ; add and update the SP with the negated size
@@ -337,7 +337,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
   // Get the total frame size.
   unsigned FrameSize = MFI->getStackSize();
-  
+
   // Get stack alignments.
   const PPCFrameLowering *TFI = getFrameLowering(MF);
   unsigned TargetAlign = TFI->getStackAlignment();
@@ -347,14 +347,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
 
   // Determine the previous frame's address.  If FrameSize can't be
   // represented as 16 bits or we need special alignment, then we load the
-  // previous frame's address from 0(SP).  Why not do an addis of the hi? 
-  // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. 
-  // Constructing the constant and adding would take 3 instructions. 
+  // previous frame's address from 0(SP).  Why not do an addis of the hi?
+  // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+  // Constructing the constant and adding would take 3 instructions.
   // Fortunately, a frame greater than 32K is rare.
   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
   unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
-  
+
   if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
     BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
       .addReg(PPC::R31)
@@ -425,11 +425,32 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
       .addReg(PPC::R1)
       .addImm(maxCallFrameSize);
   }
-  
+
   // Discard the DYNALLOC instruction.
   MBB.erase(II);
 }
 
+void PPCRegisterInfo::lowerDynamicAreaOffset(
+    MachineBasicBlock::iterator II) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  // Get the basic block's function.
+  MachineFunction &MF = *MBB.getParent();
+  // Get the frame info.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+  // Get the instruction info.
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  DebugLoc dl = MI.getDebugLoc();
+  BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
+      .addImm(maxCallFrameSize);
+  MBB.erase(II);
+}
+
 /// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
 /// reserving a whole register (R0), we scrounge for one here. This generates
 /// code like this:
@@ -459,8 +480,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
-          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-    
+      .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
   // If the saved register wasn't CR0, shift the bits left so that they are in
   // CR0's slot.
   if (SrcReg != PPC::CR0) {
@@ -549,8 +570,8 @@ void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
           .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
 
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
-          .addReg(getCRFromCRBit(SrcReg));
-    
+      .addReg(getCRFromCRBit(SrcReg));
+
   // If the saved register wasn't CR0LT, shift the bits left so that the bit to
   // store is the first one. Mask all but that bit.
   unsigned Reg1 = Reg;
@@ -602,17 +623,19 @@ void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
   unsigned ShiftBits = getEncodingValue(DestReg);
   // rlwimi r11, r10, 32-ShiftBits, ..., ...
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
-           .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
-           .addImm(ShiftBits ? 32-ShiftBits : 0)
-           .addImm(ShiftBits).addImm(ShiftBits);
-           
+      .addReg(RegO, RegState::Kill)
+      .addReg(Reg, RegState::Kill)
+      .addImm(ShiftBits ? 32 - ShiftBits : 0)
+      .addImm(ShiftBits)
+      .addImm(ShiftBits);
+
   BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
           getCRFromCRBit(DestReg))
-            .addReg(RegO, RegState::Kill)
-	    // Make sure we have a use dependency all the way through this
-	    // sequence of instructions. We can't have the other bits in the CR
-	    // modified in between the mfocrf and the mtocrf.
-            .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+      .addReg(RegO, RegState::Kill)
+      // Make sure we have a use dependency all the way through this
+      // sequence of instructions. We can't have the other bits in the CR
+      // modified in between the mfocrf and the mtocrf.
+      .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -634,11 +657,11 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
   unsigned SrcReg = MI.getOperand(0).getReg();
 
   BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
-          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
-    
-  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
-                    .addReg(Reg, RegState::Kill),
-                    FrameIndex);
+      .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+  addFrameReference(
+      BuildMI(MBB, II, dl, TII.get(PPC::STW)).addReg(Reg, RegState::Kill),
+      FrameIndex);
 
   // Discard the pseudo instruction.
   MBB.erase(II);
@@ -671,9 +694,8 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
   MBB.erase(II);
 }
 
-bool
-PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
-				      unsigned Reg, int &FrameIdx) const {
+bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+                                           unsigned Reg, int &FrameIdx) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
   // ABI, return true to prevent allocating an additional frame slot.
@@ -752,7 +774,12 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FPSI = FI->getFramePointerSaveIndex();
   // Get the instruction opcode.
   unsigned OpC = MI.getOpcode();
-  
+
+  if ((OpC == PPC::DYNAREAOFFSET || OpC == PPC::DYNAREAOFFSET8)) {
+    lowerDynamicAreaOffset(II);
+    return;
+  }
+
   // Special case for dynamic alloca.
   if (FPSI && FrameIndex == FPSI &&
       (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
@@ -800,8 +827,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // If we're not using a Frame Pointer that has been set to the value of the
   // SP before having the stack size subtracted from it, then add the stack size
   // to Offset to get the correct offset.
-  // Naked functions have stack size 0, although getStackSize may not reflect that
-  // because we didn't call all the pieces that compute it for naked functions.
+  // Naked functions have stack size 0, although getStackSize may not reflect
+  // that because we didn't call all the pieces that compute it for naked
+  // functions.
   if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
     if (!(hasBasePointer(MF) && FrameIndex < 0))
       Offset += MFI->getStackSize();
@@ -840,7 +868,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     .addImm(Offset);
 
   // Convert into indexed form of the instruction:
-  // 
+  //
   //   sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
   //   addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
   unsigned OperandBase;
@@ -898,24 +926,6 @@ bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   return needsStackRealignment(MF);
 }
 
-bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
-    return false;
-
-  return true;
-}
-
-bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const PPCFrameLowering *TFI = getFrameLowering(MF);
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const Function *F = MF.getFunction();
-  unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
-                              F->hasFnAttribute(Attribute::StackAlignment));
-
-  return requiresRealignment && canRealignStack(MF);
-}
-
 /// Returns true if the instruction's frame index
 /// reference would be better served by a base register other than FP
 /// or SP. Used by LocalStackFrameAllocation to determine which frame index
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index d304e1d8b5ec..b15fde83c9f3 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -54,13 +54,13 @@ inline static unsigned getCRFromCRBit(unsigned SrcReg) {
   return Reg;
 }
 
-
 class PPCRegisterInfo : public PPCGenRegisterInfo {
   DenseMap<unsigned, unsigned> ImmToIdxMap;
   const PPCTargetMachine &TM;
+
 public:
   PPCRegisterInfo(const PPCTargetMachine &TM);
-  
+
   /// getPointerRegClass - Return the register class to use to hold pointers.
   /// This is used for addressing modes.
   const TargetRegisterClass *
@@ -77,7 +77,7 @@ public:
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID CC) const override;
-  const uint32_t *getNoPreservedMask() const;
+  const uint32_t *getNoPreservedMask() const override;
 
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
@@ -101,6 +101,7 @@ public:
   }
 
   void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+  void lowerDynamicAreaOffset(MachineBasicBlock::iterator II) const;
   void lowerCRSpilling(MachineBasicBlock::iterator II,
                        unsigned FrameIndex) const;
   void lowerCRRestore(MachineBasicBlock::iterator II,
@@ -115,9 +116,9 @@ public:
                           unsigned FrameIndex) const;
 
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
-			    int &FrameIdx) const override;
-  void eliminateFrameIndex(MachineBasicBlock::iterator II,
-                           int SPAdj, unsigned FIOperandNum,
+                            int &FrameIdx) const override;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                           unsigned FIOperandNum,
                            RegScavenger *RS = nullptr) const override;
 
   // Support for virtual base registers.
@@ -136,8 +137,6 @@ public:
   // Base pointer (stack realignment) support.
   unsigned getBaseRegister(const MachineFunction &MF) const;
   bool hasBasePointer(const MachineFunction &MF) const;
-  bool canRealignStack(const MachineFunction &MF) const;
-  bool needsStackRealignment(const MachineFunction &MF) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 58daccae90f2..c0fcb6cbb9dc 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -62,6 +62,7 @@ void PPCSubtarget::initializeEnvironment() {
   Has64BitSupport = false;
   Use64BitRegs = false;
   UseCRBits = false;
+  UseSoftFloat = false;
   HasAltivec = false;
   HasSPE = false;
   HasQPX = false;
@@ -100,6 +101,8 @@ void PPCSubtarget::initializeEnvironment() {
   HasDirectMove = false;
   IsQPXStackUnaligned = false;
   HasHTM = false;
+  HasFusion = false;
+  HasFloat128 = false;
 }
 
 void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
@@ -210,5 +213,33 @@ bool PPCSubtarget::enableSubRegLiveness() const {
   return UseSubRegLiveness;
 }
 
+unsigned char PPCSubtarget::classifyGlobalReference(
+    const GlobalValue *GV) const {
+  // Note that currently we don't generate non-pic references.
+  // If a caller wants that, this will have to be updated.
+
+  // Large code model always uses the TOC even for local symbols.
+  if (TM.getCodeModel() == CodeModel::Large)
+    return PPCII::MO_PIC_FLAG | PPCII::MO_NLP_FLAG;
+
+  unsigned char flags = PPCII::MO_PIC_FLAG;
+
+  // Only if the relocation mode is PIC do we have to worry about
+  // interposition. In all other cases we can use a slightly looser standard to
+  // decide how to access the symbol.
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    // If it's local, or it's non-default, it can't be interposed.
+    if (!GV->hasLocalLinkage() &&
+        GV->hasDefaultVisibility()) {
+      flags |= PPCII::MO_NLP_FLAG;
+    }
+    return flags;
+  }
+
+  if (GV->isStrongDefinitionForLinker())
+    return flags;
+  return flags | PPCII::MO_NLP_FLAG;
+}
+
 bool PPCSubtarget::isELFv2ABI() const { return TM.isELFv2ABI(); }
 bool PPCSubtarget::isPPC64() const { return TM.isPPC64(); }
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 0616c1f65604..4f5c95c1483f 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -83,6 +83,7 @@ protected:
   bool Has64BitSupport;
   bool Use64BitRegs;
   bool UseCRBits;
+  bool UseSoftFloat;
   bool IsPPC64;
   bool HasAltivec;
   bool HasSPE;
@@ -119,6 +120,8 @@ protected:
   bool HasPartwordAtomics;
   bool HasDirectMove;
   bool HasHTM;
+  bool HasFusion;
+  bool HasFloat128;
 
   /// When targeting QPX running a stock PPC64 Linux kernel where the stack
   /// alignment has not been changed, we need to keep the 16-byte alignment
@@ -188,6 +191,8 @@ public:
   /// has64BitSupport - Return true if the selected CPU supports 64-bit
   /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
   bool has64BitSupport() const { return Has64BitSupport; }
+  // useSoftFloat - Return true if soft-float option is turned on.
+  bool useSoftFloat() const { return UseSoftFloat; }
 
   /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
   /// registers in 32-bit mode when possible.  This can only true if
@@ -254,6 +259,8 @@ public:
     return 16;
   }
   bool hasHTM() const { return HasHTM; }
+  bool hasFusion() const { return HasFusion; }
+  bool hasFloat128() const { return HasFloat128; }
 
   const Triple &getTargetTriple() const { return TargetTriple; }
 
@@ -285,6 +292,10 @@ public:
   bool useAA() const override;
 
   bool enableSubRegLiveness() const override;
+
+  /// classifyGlobalReference - Classify a global variable reference for the
+  /// current subtarget accourding to how we should reference it.
+  unsigned char classifyGlobalReference(const GlobalValue *GV) const;
 };
 } // End llvm namespace
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1daf244fed44..d24b590317f5 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -42,6 +42,10 @@ static cl::
 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
                                 cl::desc("Disable VSX Swap Removal for PPC"));
 
+static cl::
+opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
+                            cl::desc("Disable machine peepholes for PPC"));
+
 static cl::opt<bool>
 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
              cl::desc("Enable optimizations on complex GEPs"),
@@ -57,11 +61,19 @@ EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
                       cl::desc("Add extra TOC register dependencies"),
                       cl::init(true), cl::Hidden);
 
+static cl::opt<bool>
+EnableMachineCombinerPass("ppc-machine-combiner",
+                          cl::desc("Enable the machine combiner pass"),
+                          cl::init(true), cl::Hidden);
+
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
   RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+
+  PassRegistry &PR = *PassRegistry::getPassRegistry();
+  initializePPCBoolRetToIntPass(PR);
 }
 
 /// Return the datalayout string of a subtarget.
@@ -118,7 +130,7 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
   }
 
   if (OL != CodeGenOpt::None) {
-     if (!FullFS.empty())
+    if (!FullFS.empty())
       FullFS = "+invariant-function-descriptors," + FullFS;
     else
       FullFS = "+invariant-function-descriptors";
@@ -144,7 +156,7 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
     return PPCTargetMachine::PPC_ABI_ELFv2;
 
   assert(Options.MCOptions.getABIName().empty() &&
-	 "Unknown target-abi option!");
+         "Unknown target-abi option!");
 
   if (!TT.isMacOSX()) {
     switch (TT.getArch()) {
@@ -160,9 +172,9 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
   return PPCTargetMachine::PPC_ABI_UNKNOWN;
 }
 
-// The FeatureString here is a little subtle. We are modifying the feature string
-// with what are (currently) non-function specific overrides as it goes into the
-// LLVMTargetMachine constructor and then using the stored value in the
+// The FeatureString here is a little subtle. We are modifying the feature
+// string with what are (currently) non-function specific overrides as it goes
+// into the LLVMTargetMachine constructor and then using the stored value in the
 // Subtarget constructor below it.
 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
                                    StringRef CPU, StringRef FS,
@@ -227,6 +239,19 @@ PPCTargetMachine::getSubtargetImpl(const Function &F) const {
                        ? FSAttr.getValueAsString().str()
                        : TargetFS;
 
+  // FIXME: This is related to the code below to reset the target options,
+  // we need to know whether or not the soft float flag is set on the
+  // function before we can generate a subtarget. We also need to use
+  // it as a key for the subtarget since that can be the only difference
+  // between two functions.
+  bool SoftFloat =
+    F.hasFnAttribute("use-soft-float") &&
+    F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  // If the soft float attribute is set on the function turn on the soft float
+  // subtarget feature.
+  if (SoftFloat)
+    FS += FS.empty() ? "+soft-float" : ",+soft-float";
+
   auto &I = SubtargetMap[CPU + FS];
   if (!I) {
     // This needs to be done before we create a new subtarget since any
@@ -277,6 +302,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 void PPCPassConfig::addIRPasses() {
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createPPCBoolRetToIntPass());
   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
 
   // For the BG/Q (or if explicitly requested), add explicit data prefetch
@@ -316,6 +343,10 @@ bool PPCPassConfig::addPreISel() {
 
 bool PPCPassConfig::addILPOpts() {
   addPass(&EarlyIfConverterID);
+
+  if (EnableMachineCombinerPass)
+    addPass(&MachineCombinerID);
+
   return true;
 }
 
@@ -339,6 +370,12 @@ void PPCPassConfig::addMachineSSAOptimization() {
   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
       !DisableVSXSwapRemoval)
     addPass(createPPCVSXSwapRemovalPass());
+  // Target-specific peephole cleanups performed after instruction
+  // selection.
+  if (!DisableMIPeephole) {
+    addPass(createPPCMIPeepholePass());
+    addPass(&DeadMachineInstructionElimID);
+  }
 }
 
 void PPCPassConfig::addPreRegAlloc() {
@@ -364,6 +401,7 @@ void PPCPassConfig::addPreEmitPass() {
 }
 
 TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &F) { return TargetTransformInfo(PPCTTIImpl(this, F)); });
+  return TargetIRAnalysis([this](const Function &F) {
+    return TargetTransformInfo(PPCTTIImpl(this, F));
+  });
 }
diff --git a/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 9ee5db938b67..798bb9d6b892 100644
--- a/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -42,9 +42,7 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
   if (Kind.isReadOnly()) {
     const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
 
-    if (GVar && GVar->isConstant() &&
-        (GVar->getInitializer()->getRelocationInfo() ==
-         Constant::GlobalRelocations))
+    if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation())
       Kind = SectionKind::getReadOnlyWithRel();
   }
 
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index e21c2b77f4d7..cd86dabd5abe 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -35,7 +35,7 @@ PPCTTIImpl::getPopcntSupport(unsigned TyWidth) {
   return TTI::PSK_Software;
 }
 
-unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   if (DisablePPCConstHoist)
     return BaseT::getIntImmCost(Imm, Ty);
 
@@ -64,8 +64,8 @@ unsigned PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   return 4 * TTI::TCC_Basic;
 }
 
-unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                   const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                              Type *Ty) {
   if (DisablePPCConstHoist)
     return BaseT::getIntImmCost(IID, Idx, Imm, Ty);
 
@@ -98,8 +98,8 @@ unsigned PPCTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   return PPCTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                   const APInt &Imm, Type *Ty) {
+int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                              Type *Ty) {
   if (DisablePPCConstHoist)
     return BaseT::getIntImmCost(Opcode, Idx, Imm, Ty);
 
@@ -197,9 +197,20 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
 }
 
 bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
+  // On the A2, always unroll aggressively. For QPX unaligned loads, we depend
+  // on combining the loads generated for consecutive accesses, and failure to
+  // do so is particularly expensive. This makes it much more likely (compared
+  // to only using concatenation unrolling).
+  if (ST->getDarwinDirective() == PPC::DIR_A2)
+    return true;
+
   return LoopHasReductions;
 }
 
+bool PPCTTIImpl::enableInterleavedAccessVectorization() {
+  return true;
+}
+
 unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
   if (Vector && !ST->hasAltivec() && !ST->hasQPX())
     return 0;
@@ -246,7 +257,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
   return 2;
 }
 
-unsigned PPCTTIImpl::getArithmeticInstrCost(
+int PPCTTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
@@ -257,24 +268,30 @@ unsigned PPCTTIImpl::getArithmeticInstrCost(
                                        Opd1PropInfo, Opd2PropInfo);
 }
 
-unsigned PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                                    Type *SubTp) {
-  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+int PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                               Type *SubTp) {
+  // Legalize the type.
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+
+  // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+  // (at least in the sense that there need only be one non-loop-invariant
+  // instruction). We need one such shuffle instruction for each actual
+  // register (this is not true for arbitrary shuffles, but is true for the
+  // structured types of shuffles covered by TTI::ShuffleKind).
+  return LT.first;
 }
 
-unsigned PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
 
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                        Type *CondTy) {
+int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                        unsigned Index) {
+int PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
@@ -313,41 +330,83 @@ unsigned PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
-unsigned PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                     unsigned Alignment,
-                                     unsigned AddressSpace) {
+int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                unsigned AddressSpace) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
          "Invalid Opcode");
 
-  unsigned Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+  int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 
-  // VSX loads/stores support unaligned access.
-  if (ST->hasVSX()) {
-    if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
-      return Cost;
-  }
+  // Aligned loads and stores are easy.
+  unsigned SrcBytes = LT.second.getStoreSize();
+  if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+    return Cost;
 
-  bool UnalignedAltivec =
-    Src->isVectorTy() &&
-    Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
-    LT.second.getSizeInBits() == 128 &&
-    Opcode == Instruction::Load;
+  bool IsAltivecType = ST->hasAltivec() &&
+                       (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
+                        LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
+  bool IsVSXType = ST->hasVSX() &&
+                   (LT.second == MVT::v2f64 || LT.second == MVT::v2i64);
+  bool IsQPXType = ST->hasQPX() &&
+                   (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
+
+  // If we can use the permutation-based load sequence, then this is also
+  // relatively cheap (not counting loop-invariant instructions): one load plus
+  // one permute (the last load in a series has extra cost, but we're
+  // neglecting that here). Note that on the P7, we should do unaligned loads
+  // for Altivec types using the VSX instructions, but that's more expensive
+  // than using the permutation-based load sequence. On the P8, that's no
+  // longer true.
+  if (Opcode == Instruction::Load &&
+      ((!ST->hasP8Vector() && IsAltivecType) || IsQPXType) &&
+      Alignment >= LT.second.getScalarType().getStoreSize())
+    return Cost + LT.first; // Add the cost of the permutations.
+
+  // For VSX, we can do unaligned loads and stores on Altivec/VSX types. On the
+  // P7, unaligned vector loads are more expensive than the permutation-based
+  // load sequence, so that might be used instead, but regardless, the net cost
+  // is about the same (not counting loop-invariant instructions).
+  if (IsVSXType || (ST->hasVSX() && IsAltivecType))
+    return Cost;
 
   // PPC in general does not support unaligned loads and stores. They'll need
   // to be decomposed based on the alignment factor.
-  unsigned SrcBytes = LT.second.getStoreSize();
-  if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
-    Cost += LT.first*(SrcBytes/Alignment-1);
 
-    // For a vector type, there is also scalarization overhead (only for
-    // stores, loads are expanded using the vector-load + permutation sequence,
-    // which is much less expensive).
-    if (Src->isVectorTy() && Opcode == Instruction::Store)
-      for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
-        Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
-  }
+  // Add the cost of each scalar load or store.
+  Cost += LT.first*(SrcBytes/Alignment-1);
+
+  // For a vector type, there is also scalarization overhead (only for
+  // stores, loads are expanded using the vector-load + permutation sequence,
+  // which is much less expensive).
+  if (Src->isVectorTy() && Opcode == Instruction::Store)
+    for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+      Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+
+  return Cost;
+}
+
+int PPCTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                           unsigned Factor,
+                                           ArrayRef<unsigned> Indices,
+                                           unsigned Alignment,
+                                           unsigned AddressSpace) {
+  assert(isa<VectorType>(VecTy) &&
+         "Expect a vector type for interleaved memory op");
+
+  // Legalize the type.
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, VecTy);
+
+  // Firstly, the cost of load/store operation.
+  int Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
+
+  // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations
+  // (at least in the sense that there need only be one non-loop-invariant
+  // instruction). For each result vector, we need one shuffle per incoming
+  // vector (except that the first shuffle can take two incoming vectors
+  // because it does not need to take itself).
+  Cost += Factor*(LT.first-1);
 
   return Cost;
 }
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.h b/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 368bef93f0dd..04c1b02235f0 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -37,7 +37,7 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
   const PPCTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit PPCTTIImpl(const PPCTargetMachine *TM, Function &F)
+  explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -52,12 +52,11 @@ public:
   /// @{
 
   using BaseT::getIntImmCost;
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty);
 
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
+  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -68,22 +67,27 @@ public:
   /// @{
 
   bool enableAggressiveInterleaving(bool LoopHasReductions);
+  bool enableInterleavedAccessVectorization();
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
   unsigned getMaxInterleaveFactor(unsigned VF);
-  unsigned getArithmeticInstrCost(
+  int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
-  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                          Type *SubTp);
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace);
+  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace);
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                 unsigned Factor,
+                                 ArrayRef<unsigned> Indices,
+                                 unsigned Alignment,
+                                 unsigned AddressSpace);
 
   /// @}
 };
diff --git a/lib/Target/PowerPC/PPCVSXCopy.cpp b/lib/Target/PowerPC/PPCVSXCopy.cpp
index 5e3ae2a4471b..782583ce3423 100644
--- a/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -77,6 +77,14 @@ namespace {
       return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
     }
 
+    bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) {
+      return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI);
+    }
+
+    bool IsVSSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+      return IsRegInClass(Reg, &PPC::VSSRCRegClass, MRI);
+    }
+
 protected:
     bool processBlock(MachineBasicBlock &MBB) {
       bool Changed = false;
@@ -100,7 +108,9 @@ protected:
             IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
                                            &PPC::VSLRCRegClass;
           assert((IsF8Reg(SrcMO.getReg(), MRI) ||
-                  IsVRReg(SrcMO.getReg(), MRI)) &&
+                  IsVRReg(SrcMO.getReg(), MRI) ||
+                  IsVSSReg(SrcMO.getReg(), MRI) ||
+                  IsVSFReg(SrcMO.getReg(), MRI)) &&
                  "Unknown source for a VSX copy");
 
           unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
@@ -123,6 +133,8 @@ protected:
             IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
                                            &PPC::VSLRCRegClass;
           assert((IsF8Reg(DstMO.getReg(), MRI) ||
+                  IsVSFReg(DstMO.getReg(), MRI) ||
+                  IsVSSReg(DstMO.getReg(), MRI) ||
                   IsVRReg(DstMO.getReg(), MRI)) &&
                  "Unknown destination for a VSX copy");
 
diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 46b8d13e47b9..6b19a2f7118b 100644
--- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -103,10 +103,10 @@ protected:
 
         VNInfo *AddendValNo =
           LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
-        if (!AddendValNo) {
-          // This can be null if the register is undef.
+
+        // This can be null if the register is undef.
+        if (!AddendValNo)
           continue;
-        }
 
         MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
 
@@ -186,18 +186,17 @@ protected:
         if (!KilledProdOp)
           continue;
 
-	// If the addend copy is used only by this MI, then the addend source
-	// register is likely not live here. This could be fixed (based on the
-	// legality checks above, the live range for the addend source register
-	// could be extended), but it seems likely that such a trivial copy can
-	// be coalesced away later, and thus is not worth the effort.
-	if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
+        // If the addend copy is used only by this MI, then the addend source
+        // register is likely not live here. This could be fixed (based on the
+        // legality checks above, the live range for the addend source register
+        // could be extended), but it seems likely that such a trivial copy can
+        // be coalesced away later, and thus is not worth the effort.
+        if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg) &&
             !LIS->getInterval(AddendSrcReg).liveAt(FMAIdx))
           continue;
 
         // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
 
-        unsigned AddReg = AddendMI->getOperand(1).getReg();
         unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
         unsigned OtherProdReg  = MI->getOperand(OtherProdOp).getReg();
 
@@ -221,6 +220,14 @@ protected:
         if (OldFMAReg == KilledProdReg)
           continue;
 
+        // If there isn't a class that fits, we can't perform the transform.
+        // This is needed for correctness with a mixture of VSX and Altivec
+        // instructions to make sure that a low VSX register is not assigned to
+        // the Altivec instruction.
+        if (!MRI.constrainRegClass(KilledProdReg,
+                                   MRI.getRegClass(OldFMAReg)))
+          continue;
+
         assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
                "Addend copy not tied to old FMA output!");
 
@@ -228,7 +235,7 @@ protected:
 
         MI->getOperand(0).setReg(KilledProdReg);
         MI->getOperand(1).setReg(KilledProdReg);
-        MI->getOperand(3).setReg(AddReg);
+        MI->getOperand(3).setReg(AddendSrcReg);
         MI->getOperand(2).setReg(OtherProdReg);
 
         MI->getOperand(0).setSubReg(KilledProdSubReg);
@@ -263,8 +270,7 @@ protected:
           if (UseMI == AddendMI)
             continue;
 
-          UseMO.setReg(KilledProdReg);
-          UseMO.setSubReg(KilledProdSubReg);
+          UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI);
         }
 
         // Extend the live intervals of the killed product operand to hold the
@@ -286,6 +292,20 @@ protected:
         }
         DEBUG(dbgs() << "  extended: " << NewFMAInt << '\n');
 
+        // Extend the live interval of the addend source (it might end at the
+        // copy to be removed, or somewhere in between there and here). This
+        // is necessary only if it is a physical register.
+        if (!TargetRegisterInfo::isVirtualRegister(AddendSrcReg))
+          for (MCRegUnitIterator Units(AddendSrcReg, TRI); Units.isValid();
+               ++Units) {
+            unsigned Unit = *Units;
+
+            LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
+            AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
+                                         FMAIdx.getRegSlot());
+            DEBUG(dbgs() << "  extended: " << AddendSrcRange << '\n');
+          }
+
         FMAInt.removeValNo(FMAValNo);
         DEBUG(dbgs() << "  trimmed:  " << FMAInt << '\n');
 
@@ -347,7 +367,6 @@ INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
 char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
 
 char PPCVSXFMAMutate::ID = 0;
-FunctionPass*
-llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
-
-
+FunctionPass *llvm::createPPCVSXFMAMutatePass() {
+  return new PPCVSXFMAMutate();
+}
diff --git a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d7132d5272d8..27c540fcf211 100644
--- a/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -94,7 +94,7 @@ enum SHValues {
   SH_NOSWAP_ST,
   SH_SPLAT,
   SH_XXPERMDI,
-  SH_COPYSCALAR
+  SH_COPYWIDEN
 };
 
 struct PPCVSXSwapRemoval : public MachineFunctionPass {
@@ -149,6 +149,11 @@ private:
   // handling.  Return true iff any changes are made.
   bool removeSwaps();
 
+  // Insert a swap instruction from SrcReg to DstReg at the given
+  // InsertPoint.
+  void insertSwap(MachineInstr *MI, MachineBasicBlock::iterator InsertPoint,
+                  unsigned DstReg, unsigned SrcReg);
+
   // Update instructions requiring special handling.
   void handleSpecialSwappables(int EntryIdx);
 
@@ -159,9 +164,7 @@ private:
   bool isRegInClass(unsigned Reg, const TargetRegisterClass *RC) {
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       return RC->hasSubClassEq(MRI->getRegClass(Reg));
-    if (RC->contains(Reg))
-      return true;
-    return false;
+    return RC->contains(Reg);
   }
 
   // Return true iff the given register is a full vector register.
@@ -215,7 +218,7 @@ public:
 void PPCVSXSwapRemoval::initialize(MachineFunction &MFParm) {
   MF = &MFParm;
   MRI = &MF->getRegInfo();
-  TII = static_cast<const PPCInstrInfo*>(MF->getSubtarget().getInstrInfo());
+  TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
 
   // An initial vector size of 256 appears to work well in practice.
   // Small/medium functions with vector content tend not to incur a
@@ -343,6 +346,15 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         SwapVector[VecIdx].IsLoad = 1;
         SwapVector[VecIdx].IsSwap = 1;
         break;
+      case PPC::LXSDX:
+      case PPC::LXSSPX:
+        // A load of a floating-point value into the high-order half of
+        // a vector register is safe, provided that we introduce a swap
+        // following the load, which will be done by the SUBREG_TO_REG
+        // support.  So just mark these as safe.
+        SwapVector[VecIdx].IsLoad = 1;
+        SwapVector[VecIdx].IsSwappable = 1;
+        break;
       case PPC::STVX:
         // Non-permuting stores are currently unsafe.  We can use special
         // handling for this in the future.  By not marking these as
@@ -385,7 +397,7 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
         else if (isVecReg(MI.getOperand(0).getReg()) &&
                  isScalarVecReg(MI.getOperand(2).getReg())) {
           SwapVector[VecIdx].IsSwappable = 1;
-          SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYSCALAR;
+          SwapVector[VecIdx].SpecialHandling = SHValues::SH_COPYWIDEN;
         }
         break;
       }
@@ -420,7 +432,14 @@ bool PPCVSXSwapRemoval::gatherVectorInstructions() {
       case PPC::STVEHX:
       case PPC::STVEWX:
       case PPC::STVXL:
+        // We can handle STXSDX and STXSSPX similarly to LXSDX and LXSSPX,
+        // by adding special handling for narrowing copies as well as
+        // widening ones.  However, I've experimented with this, and in
+        // practice we currently do not appear to use STXSDX fed by 
+        // a narrowing copy from a full vector register.  Since I can't
+        // generate any useful test cases, I've left this alone for now.
       case PPC::STXSDX:
+      case PPC::STXSSPX:
       case PPC::VCIPHER:
       case PPC::VCIPHERLAST:
       case PPC::VMRGHB:
@@ -543,7 +562,8 @@ unsigned PPCVSXSwapRemoval::lookThruCopyLike(unsigned SrcReg,
   }
 
   if (!TargetRegisterInfo::isVirtualRegister(CopySrcReg)) {
-    SwapVector[VecIdx].MentionsPhysVR = 1;
+    if (!isScalarVecReg(CopySrcReg))
+      SwapVector[VecIdx].MentionsPhysVR = 1;
     return CopySrcReg;
   }
 
@@ -629,8 +649,8 @@ void PPCVSXSwapRemoval::recordUnoptimizableWebs() {
       SwapVector[Repr].WebRejected = 1;
 
       DEBUG(dbgs() <<
-            format("Web %d rejected for physreg, partial reg, or not swap[pable]\n",
-                   Repr));
+            format("Web %d rejected for physreg, partial reg, or not "
+                   "swap[pable]\n", Repr));
       DEBUG(dbgs() << "  in " << EntryIdx << ": ");
       DEBUG(SwapVector[EntryIdx].VSEMI->dump());
       DEBUG(dbgs() << "\n");
@@ -743,6 +763,21 @@ void PPCVSXSwapRemoval::markSwapsForRemoval() {
   }
 }
 
+// Create an xxswapd instruction and insert it prior to the given point.
+// MI is used to determine basic block and debug loc information.
+// FIXME: When inserting a swap, we should check whether SrcReg is
+// defined by another swap:  SrcReg = XXPERMDI Reg, Reg, 2;  If so,
+// then instead we should generate a copy from Reg to DstReg.
+void PPCVSXSwapRemoval::insertSwap(MachineInstr *MI,
+                                   MachineBasicBlock::iterator InsertPoint,
+                                   unsigned DstReg, unsigned SrcReg) {
+  BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
+          TII->get(PPC::XXPERMDI), DstReg)
+    .addReg(SrcReg)
+    .addReg(SrcReg)
+    .addImm(2);
+}
+
 // The identified swap entry requires special handling to allow its
 // containing computation to be optimized.  Perform that handling
 // here.
@@ -752,8 +787,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
   switch (SwapVector[EntryIdx].SpecialHandling) {
 
   default:
-    assert(false && "Unexpected special handling type");
-    break;
+    llvm_unreachable("Unexpected special handling type");
 
   // For splats based on an index into a vector, add N/2 modulo N
   // to the index, where N is the number of vector elements.
@@ -766,7 +800,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
 
     switch (MI->getOpcode()) {
     default:
-      assert(false && "Unexpected splat opcode");
+      llvm_unreachable("Unexpected splat opcode");
     case PPC::VSPLTB: NElts = 16; break;
     case PPC::VSPLTH: NElts = 8;  break;
     case PPC::VSPLTW: NElts = 4;  break;
@@ -811,7 +845,7 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
   // For a copy from a scalar floating-point register to a vector
   // register, removing swaps will leave the copied value in the
   // wrong lane.  Insert a swap following the copy to fix this.
-  case SHValues::SH_COPYSCALAR: {
+  case SHValues::SH_COPYWIDEN: {
     MachineInstr *MI = SwapVector[EntryIdx].VSEMI;
 
     DEBUG(dbgs() << "Changing SUBREG_TO_REG: ");
@@ -825,14 +859,13 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
     DEBUG(dbgs() << "  Into: ");
     DEBUG(MI->dump());
 
-    MachineBasicBlock::iterator InsertPoint = MI->getNextNode();
+    auto InsertPoint = ++MachineBasicBlock::iterator(MI);
 
     // Note that an XXPERMDI requires a VSRC, so if the SUBREG_TO_REG
     // is copying to a VRRC, we need to be careful to avoid a register
     // assignment problem.  In this case we must copy from VRRC to VSRC
     // prior to the swap, and from VSRC to VRRC following the swap.
     // Coalescing will usually remove all this mess.
-
     if (DstRC == &PPC::VRRCRegClass) {
       unsigned VSRCTmp1 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
       unsigned VSRCTmp2 = MRI->createVirtualRegister(&PPC::VSRCRegClass);
@@ -840,29 +873,19 @@ void PPCVSXSwapRemoval::handleSpecialSwappables(int EntryIdx) {
       BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
               TII->get(PPC::COPY), VSRCTmp1)
         .addReg(NewVReg);
-      DEBUG(MI->getNextNode()->dump());
+      DEBUG(std::prev(InsertPoint)->dump());
 
-      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
-              TII->get(PPC::XXPERMDI), VSRCTmp2)
-        .addReg(VSRCTmp1)
-        .addReg(VSRCTmp1)
-        .addImm(2);
-      DEBUG(MI->getNextNode()->getNextNode()->dump());
+      insertSwap(MI, InsertPoint, VSRCTmp2, VSRCTmp1);
+      DEBUG(std::prev(InsertPoint)->dump());
 
       BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
               TII->get(PPC::COPY), DstReg)
         .addReg(VSRCTmp2);
-      DEBUG(MI->getNextNode()->getNextNode()->getNextNode()->dump());
+      DEBUG(std::prev(InsertPoint)->dump());
 
     } else {
-
-      BuildMI(*MI->getParent(), InsertPoint, MI->getDebugLoc(),
-              TII->get(PPC::XXPERMDI), DstReg)
-        .addReg(NewVReg)
-        .addReg(NewVReg)
-        .addImm(2);
-
-      DEBUG(MI->getNextNode()->dump());
+      insertSwap(MI, InsertPoint, DstReg, NewVReg);
+      DEBUG(std::prev(InsertPoint)->dump());
     }
     break;
   }
@@ -947,8 +970,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
       case SH_XXPERMDI:
         DEBUG(dbgs() << "special:xxpermdi ");
         break;
-      case SH_COPYSCALAR:
-        DEBUG(dbgs() << "special:copyscalar ");
+      case SH_COPYWIDEN:
+        DEBUG(dbgs() << "special:copywiden ");
         break;
       }
     }
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 1c4e486da418..a55274744fd1 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -14,6 +14,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -34,7 +35,6 @@ namespace {
 class SparcOperand;
 class SparcAsmParser : public MCTargetAsmParser {
 
-  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
 
   /// @name Auto-generated Match Functions
@@ -69,6 +69,10 @@ class SparcAsmParser : public MCTargetAsmParser {
 
   OperandMatchResultTy parseBranchModifiers(OperandVector &Operands);
 
+  // Helper function for dealing with %lo / %hi in PIC mode.
+  const SparcMCExpr *adjustPICRelocation(SparcMCExpr::VariantKind VK,
+                                         const MCExpr *subExpr);
+
   // returns true if Tok is matched to a register and returns register in RegNo.
   bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo,
                          unsigned &RegKind);
@@ -77,24 +81,24 @@ class SparcAsmParser : public MCTargetAsmParser {
   bool parseDirectiveWord(unsigned Size, SMLoc L);
 
   bool is64Bit() const {
-    return STI.getTargetTriple().getArch() == Triple::sparcv9;
+    return getSTI().getTargetTriple().getArch() == Triple::sparcv9;
   }
 
   void expandSET(MCInst &Inst, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
 public:
-  SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+  SparcAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
                 const MCInstrInfo &MII,
                 const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(sti), Parser(parser) {
+      : MCTargetAsmParser(Options, sti), Parser(parser) {
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
   }
 
 };
 
-  static unsigned IntRegs[32] = {
+  static const MCPhysReg IntRegs[32] = {
     Sparc::G0, Sparc::G1, Sparc::G2, Sparc::G3,
     Sparc::G4, Sparc::G5, Sparc::G6, Sparc::G7,
     Sparc::O0, Sparc::O1, Sparc::O2, Sparc::O3,
@@ -104,7 +108,7 @@ public:
     Sparc::I0, Sparc::I1, Sparc::I2, Sparc::I3,
     Sparc::I4, Sparc::I5, Sparc::I6, Sparc::I7 };
 
-  static unsigned FloatRegs[32] = {
+  static const MCPhysReg FloatRegs[32] = {
     Sparc::F0,  Sparc::F1,  Sparc::F2,  Sparc::F3,
     Sparc::F4,  Sparc::F5,  Sparc::F6,  Sparc::F7,
     Sparc::F8,  Sparc::F9,  Sparc::F10, Sparc::F11,
@@ -114,7 +118,7 @@ public:
     Sparc::F24, Sparc::F25, Sparc::F26, Sparc::F27,
     Sparc::F28, Sparc::F29, Sparc::F30, Sparc::F31 };
 
-  static unsigned DoubleRegs[32] = {
+  static const MCPhysReg DoubleRegs[32] = {
     Sparc::D0,  Sparc::D1,  Sparc::D2,  Sparc::D3,
     Sparc::D4,  Sparc::D5,  Sparc::D6,  Sparc::D7,
     Sparc::D8,  Sparc::D7,  Sparc::D8,  Sparc::D9,
@@ -124,13 +128,13 @@ public:
     Sparc::D24, Sparc::D25, Sparc::D26, Sparc::D27,
     Sparc::D28, Sparc::D29, Sparc::D30, Sparc::D31 };
 
-  static unsigned QuadFPRegs[32] = {
+  static const MCPhysReg QuadFPRegs[32] = {
     Sparc::Q0,  Sparc::Q1,  Sparc::Q2,  Sparc::Q3,
     Sparc::Q4,  Sparc::Q5,  Sparc::Q6,  Sparc::Q7,
     Sparc::Q8,  Sparc::Q9,  Sparc::Q10, Sparc::Q11,
     Sparc::Q12, Sparc::Q13, Sparc::Q14, Sparc::Q15 };
 
-  static unsigned ASRRegs[32] = {
+  static const MCPhysReg ASRRegs[32] = {
     SP::Y,     SP::ASR1,  SP::ASR2,  SP::ASR3,
     SP::ASR4,  SP::ASR5,  SP::ASR6, SP::ASR7,
     SP::ASR8,  SP::ASR9,  SP::ASR10, SP::ASR11,
@@ -140,6 +144,12 @@ public:
     SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
     SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
 
+  static const MCPhysReg IntPairRegs[] = {
+    Sparc::G0_G1, Sparc::G2_G3, Sparc::G4_G5, Sparc::G6_G7,
+    Sparc::O0_O1, Sparc::O2_O3, Sparc::O4_O5, Sparc::O6_O7,
+    Sparc::L0_L1, Sparc::L2_L3, Sparc::L4_L5, Sparc::L6_L7,
+    Sparc::I0_I1, Sparc::I2_I3, Sparc::I4_I5, Sparc::I6_I7};
+
 /// SparcOperand - Instances of this class represent a parsed Sparc machine
 /// instruction.
 class SparcOperand : public MCParsedAsmOperand {
@@ -147,6 +157,7 @@ public:
   enum RegisterKind {
     rk_None,
     rk_IntReg,
+    rk_IntPairReg,
     rk_FloatReg,
     rk_DoubleReg,
     rk_QuadReg,
@@ -200,6 +211,10 @@ public:
   bool isMEMrr() const { return Kind == k_MemoryReg; }
   bool isMEMri() const { return Kind == k_MemoryImm; }
 
+  bool isIntReg() const {
+    return (Kind == k_Register && Reg.Kind == rk_IntReg);
+  }
+
   bool isFloatReg() const {
     return (Kind == k_Register && Reg.Kind == rk_FloatReg);
   }
@@ -330,6 +345,25 @@ public:
     return Op;
   }
 
+  static bool MorphToIntPairReg(SparcOperand &Op) {
+    unsigned Reg = Op.getReg();
+    assert(Op.Reg.Kind == rk_IntReg);
+    unsigned regIdx = 32;
+    if (Reg >= Sparc::G0 && Reg <= Sparc::G7)
+      regIdx = Reg - Sparc::G0;
+    else if (Reg >= Sparc::O0 && Reg <= Sparc::O7)
+      regIdx = Reg - Sparc::O0 + 8;
+    else if (Reg >= Sparc::L0 && Reg <= Sparc::L7)
+      regIdx = Reg - Sparc::L0 + 16;
+    else if (Reg >= Sparc::I0 && Reg <= Sparc::I7)
+      regIdx = Reg - Sparc::I0 + 24;
+    if (regIdx % 2 || regIdx > 31)
+      return false;
+    Op.Reg.RegNum = IntPairRegs[regIdx / 2];
+    Op.Reg.Kind = rk_IntPairReg;
+    return true;
+  }
+
   static bool MorphToDoubleReg(SparcOperand &Op) {
     unsigned Reg = Op.getReg();
     assert(Op.Reg.Kind == rk_FloatReg);
@@ -407,7 +441,22 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
 
   // the imm operand can be either an expression or an immediate.
   bool IsImm = Inst.getOperand(1).isImm();
-  uint64_t ImmValue = IsImm ? MCValOp.getImm() : 0;
+  int64_t RawImmValue = IsImm ? MCValOp.getImm() : 0;
+
+  // Allow either a signed or unsigned 32-bit immediate.
+  if (RawImmValue < -2147483648LL || RawImmValue > 4294967295LL) {
+    Error(IDLoc, "set: argument must be between -2147483648 and 4294967295");
+    return;
+  }
+
+  // If the value was expressed as a large unsigned number, that's ok.
+  // We want to see if it "looks like" a small signed number.
+  int32_t ImmValue = RawImmValue;
+  // For 'set' you can't use 'or' with a negative operand on V9 because
+  // that would splat the sign bit across the upper half of the destination
+  // register, whereas 'set' is defined to zero the high 32 bits.
+  bool IsEffectivelyImm13 =
+      IsImm && ((is64Bit() ? 0 : -4096) <= ImmValue && ImmValue < 4096);
   const MCExpr *ValExpr;
   if (IsImm)
     ValExpr = MCConstantExpr::create(ImmValue, getContext());
@@ -416,10 +465,12 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
 
   MCOperand PrevReg = MCOperand::createReg(Sparc::G0);
 
-  if (!IsImm || (ImmValue & ~0x1fff)) {
+  // If not just a signed imm13 value, then either we use a 'sethi' with a
+  // following 'or', or a 'sethi' by itself if there are no more 1 bits.
+  // In either case, start with the 'sethi'.
+  if (!IsEffectivelyImm13) {
     MCInst TmpInst;
-    const MCExpr *Expr =
-        SparcMCExpr::create(SparcMCExpr::VK_Sparc_HI, ValExpr, getContext());
+    const MCExpr *Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_HI, ValExpr);
     TmpInst.setLoc(IDLoc);
     TmpInst.setOpcode(SP::SETHIi);
     TmpInst.addOperand(MCRegOp);
@@ -428,10 +479,23 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
     PrevReg = MCRegOp;
   }
 
-  if (!IsImm || ((ImmValue & 0x1fff) != 0 || ImmValue == 0)) {
+  // The low bits require touching in 3 cases:
+  // * A non-immediate value will always require both instructions.
+  // * An effectively imm13 value needs only an 'or' instruction.
+  // * Otherwise, an immediate that is not effectively imm13 requires the
+  //   'or' only if bits remain after clearing the 22 bits that 'sethi' set.
+  // If the low bits are known zeros, there's nothing to do.
+  // In the second case, and only in that case, must we NOT clear
+  // bits of the immediate value via the %lo() assembler function.
+  // Note also, the 'or' instruction doesn't mind a large value in the case
+  // where the operand to 'set' was 0xFFFFFzzz - it does exactly what you mean.
+  if (!IsImm || IsEffectivelyImm13 || (ImmValue & 0x3ff)) {
     MCInst TmpInst;
-    const MCExpr *Expr =
-        SparcMCExpr::create(SparcMCExpr::VK_Sparc_LO, ValExpr, getContext());
+    const MCExpr *Expr;
+    if (IsEffectivelyImm13)
+      Expr = ValExpr;
+    else
+      Expr = adjustPICRelocation(SparcMCExpr::VK_Sparc_LO, ValExpr);
     TmpInst.setLoc(IDLoc);
     TmpInst.setOpcode(SP::ORri);
     TmpInst.addOperand(MCRegOp);
@@ -463,7 +527,7 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
 
     for (const MCInst &I : Instructions) {
-      Out.EmitInstruction(I, STI);
+      Out.EmitInstruction(I, getSTI());
     }
     return false;
   }
@@ -742,6 +806,9 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
       case Sparc::PSR:
         Op = SparcOperand::CreateToken("%psr", S);
         break;
+      case Sparc::FSR:
+        Op = SparcOperand::CreateToken("%fsr", S);
+        break;
       case Sparc::WIM:
         Op = SparcOperand::CreateToken("%wim", S);
         break;
@@ -766,6 +833,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
   case AsmToken::Minus:
   case AsmToken::Integer:
   case AsmToken::LParen:
+  case AsmToken::Dot:
     if (!getParser().parseExpression(EVal, E))
       Op = SparcOperand::CreateImm(EVal, S, E);
     break;
@@ -848,6 +916,13 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
       return true;
     }
 
+    // %fprs is an alias of %asr6.
+    if (name.equals("fprs")) {
+      RegNo = ASRRegs[6];
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
     if (name.equals("icc")) {
       RegNo = Sparc::ICC;
       RegKind = SparcOperand::rk_Special;
@@ -860,6 +935,12 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
       return true;
     }
 
+    if (name.equals("fsr")) {
+      RegNo = Sparc::FSR;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+
     if (name.equals("wim")) {
       RegNo = Sparc::WIM;
       RegKind = SparcOperand::rk_Special;
@@ -943,6 +1024,82 @@ bool SparcAsmParser::matchRegisterName(const AsmToken &Tok,
       RegKind = SparcOperand::rk_IntReg;
       return true;
     }
+
+    if (name.equals("tpc")) {
+      RegNo = Sparc::TPC;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tnpc")) {
+      RegNo = Sparc::TNPC;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tstate")) {
+      RegNo = Sparc::TSTATE;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tt")) {
+      RegNo = Sparc::TT;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tick")) {
+      RegNo = Sparc::TICK;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tba")) {
+      RegNo = Sparc::TBA;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("pstate")) {
+      RegNo = Sparc::PSTATE;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("tl")) {
+      RegNo = Sparc::TL;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("pil")) {
+      RegNo = Sparc::PIL;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("cwp")) {
+      RegNo = Sparc::CWP;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("cansave")) {
+      RegNo = Sparc::CANSAVE;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("canrestore")) {
+      RegNo = Sparc::CANRESTORE;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("cleanwin")) {
+      RegNo = Sparc::CLEANWIN;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("otherwin")) {
+      RegNo = Sparc::OTHERWIN;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
+    if (name.equals("wstate")) {
+      RegNo = Sparc::WSTATE;
+      RegKind = SparcOperand::rk_Special;
+      return true;
+    }
   }
   return false;
 }
@@ -975,6 +1132,32 @@ static bool hasGOTReference(const MCExpr *Expr) {
   return false;
 }
 
+const SparcMCExpr *
+SparcAsmParser::adjustPICRelocation(SparcMCExpr::VariantKind VK,
+                                    const MCExpr *subExpr)
+{
+  // When in PIC mode, "%lo(...)" and "%hi(...)" behave differently.
+  // If the expression refers contains _GLOBAL_OFFSETE_TABLE, it is
+  // actually a %pc10 or %pc22 relocation. Otherwise, they are interpreted
+  // as %got10 or %got22 relocation.
+
+  if (getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) {
+    switch(VK) {
+    default: break;
+    case SparcMCExpr::VK_Sparc_LO:
+      VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC10
+                                     : SparcMCExpr::VK_Sparc_GOT10);
+      break;
+    case SparcMCExpr::VK_Sparc_HI:
+      VK = (hasGOTReference(subExpr) ? SparcMCExpr::VK_Sparc_PC22
+                                     : SparcMCExpr::VK_Sparc_GOT22);
+      break;
+    }
+  }
+
+  return SparcMCExpr::create(VK, subExpr, getContext());
+}
+
 bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
                                             SMLoc &EndLoc)
 {
@@ -998,30 +1181,7 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
   if (Parser.parseParenExpression(subExpr, EndLoc))
     return false;
 
-  bool isPIC = getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_;
-
-  // Ugly: if a sparc assembly expression says "%hi(...)" but the
-  // expression within contains _GLOBAL_OFFSET_TABLE_, it REALLY means
-  // %pc22. Same with %lo -> %pc10. Worse, if it doesn't contain that,
-  // the meaning depends on whether the assembler was invoked with
-  // -KPIC or not: if so, it really means %got22/%got10; if not, it
-  // actually means what it said! Sigh, historical mistakes...
-
-  switch(VK) {
-  default: break;
-  case SparcMCExpr::VK_Sparc_LO:
-    VK =  (hasGOTReference(subExpr)
-           ? SparcMCExpr::VK_Sparc_PC10
-           : (isPIC ? SparcMCExpr::VK_Sparc_GOT10 : VK));
-    break;
-  case SparcMCExpr::VK_Sparc_HI:
-    VK =  (hasGOTReference(subExpr)
-           ? SparcMCExpr::VK_Sparc_PC22
-           : (isPIC ? SparcMCExpr::VK_Sparc_GOT22 : VK));
-    break;
-  }
-
-  EVal = SparcMCExpr::create(VK, subExpr, getContext());
+  EVal = adjustPICRelocation(VK, subExpr);
   return true;
 }
 
@@ -1051,5 +1211,9 @@ unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp,
       break;
     }
   }
+  if (Op.isIntReg() && Kind == MCK_IntPair) {
+    if (SparcOperand::MorphToIntPairReg(Op))
+      return MCTargetAsmParser::Match_Success;
+  }
   return Match_InvalidOperand;
 }
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 38bff44e7542..c689b7f7201e 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -122,6 +122,8 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
       continue;
     }
 
+    // TODO: If we ever want to support v7, this needs to be extended
+    // to cover all floating point operations.
     if (!Subtarget->isV9() &&
         (MI->getOpcode() == SP::FCMPS || MI->getOpcode() == SP::FCMPD
          || MI->getOpcode() == SP::FCMPQ)) {
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 3e56b9e9b883..51751ec511c9 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -117,6 +117,19 @@ static const unsigned ASRRegDecoderTable[] = {
   SP::ASR24, SP::ASR25, SP::ASR26, SP::ASR27,
   SP::ASR28, SP::ASR29, SP::ASR30, SP::ASR31};
 
+static const unsigned PRRegDecoderTable[] = {
+  SP::TPC, SP::TNPC, SP::TSTATE, SP::TT, SP::TICK, SP::TBA, SP::PSTATE,
+  SP::TL, SP::PIL, SP::CWP, SP::CANSAVE, SP::CANRESTORE, SP::CLEANWIN,
+  SP::OTHERWIN, SP::WSTATE
+};
+
+static const uint16_t IntPairDecoderTable[] = {
+  SP::G0_G1, SP::G2_G3, SP::G4_G5, SP::G6_G7,
+  SP::O0_O1, SP::O2_O3, SP::O4_O5, SP::O6_O7,
+  SP::L0_L1, SP::L2_L3, SP::L4_L5, SP::L6_L7,
+  SP::I0_I1, SP::I2_I3, SP::I4_I5, SP::I6_I7,
+};
+
 static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst,
                                                unsigned RegNo,
                                                uint64_t Address,
@@ -196,9 +209,34 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  if (RegNo >= array_lengthof(PRRegDecoderTable))
+    return MCDisassembler::Fail;
+  Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeIntPairRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  DecodeStatus S = MCDisassembler::Success;
+
+  if (RegNo > 31)
+    return MCDisassembler::Fail;
+
+  if ((RegNo & 1))
+    S = MCDisassembler::SoftFail;
+
+  unsigned RegisterPair = IntPairDecoderTable[RegNo/2];
+  Inst.addOperand(MCOperand::createReg(RegisterPair));
+  return S;
+}
 
 static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
                                   const void *Decoder);
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+                                  const void *Decoder);
 static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                  const void *Decoder);
 static DecodeStatus DecodeLoadDFP(MCInst &Inst, unsigned insn, uint64_t Address,
@@ -207,6 +245,8 @@ static DecodeStatus DecodeLoadQFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                   const void *Decoder);
 static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn,
                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeStoreDFP(MCInst &Inst, unsigned insn,
@@ -326,6 +366,12 @@ static DecodeStatus DecodeLoadInt(MCInst &Inst, unsigned insn, uint64_t Address,
                    DecodeIntRegsRegisterClass);
 }
 
+static DecodeStatus DecodeLoadIntPair(MCInst &Inst, unsigned insn, uint64_t Address,
+                                  const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, true,
+                   DecodeIntPairRegisterClass);
+}
+
 static DecodeStatus DecodeLoadFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                  const void *Decoder) {
   return DecodeMem(Inst, insn, Address, Decoder, true,
@@ -350,6 +396,12 @@ static DecodeStatus DecodeStoreInt(MCInst &Inst, unsigned insn,
                    DecodeIntRegsRegisterClass);
 }
 
+static DecodeStatus DecodeStoreIntPair(MCInst &Inst, unsigned insn,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeMem(Inst, insn, Address, Decoder, false,
+                   DecodeIntPairRegisterClass);
+}
+
 static DecodeStatus DecodeStoreFP(MCInst &Inst, unsigned insn, uint64_t Address,
                                   const void *Decoder) {
   return DecodeMem(Inst, insn, Address, Decoder, false,
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 0b01b88e5250..6f06d1ddae32 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -15,12 +15,9 @@
 #define LLVM_LIB_TARGET_SPARC_INSTPRINTER_SPARCINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
 
-class MCOperand;
-
 class SparcInstPrinter : public MCInstPrinter {
 public:
   SparcInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 12386f14443e..ad441227600e 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -21,6 +21,7 @@ class Triple;
 
 class SparcELFMCAsmInfo : public MCAsmInfoELF {
   void anchor() override;
+
 public:
   explicit SparcELFMCAsmInfo(const Triple &TheTriple);
   const MCExpr*
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index d08ad86dbe04..13f08195c764 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -90,8 +90,8 @@ public:
                                  const MCAsmLayout *Layout,
                                  const MCFixup *Fixup) const override;
   void visitUsedExpr(MCStreamer &Streamer) const override;
-  MCSection *findAssociatedSection() const override {
-    return getSubExpr()->findAssociatedSection();
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
   }
 
   void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index c5f046bfc5bb..e3b0f5266747 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -267,11 +267,11 @@ void SparcAsmPrinter::EmitInstruction(const MachineInstr *MI)
     LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo());
     return;
   }
-  MachineBasicBlock::const_instr_iterator I = MI;
+  MachineBasicBlock::const_instr_iterator I = MI->getIterator();
   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
   do {
     MCInst TmpInst;
-    LowerSparcMachineInstrToMCInst(I, TmpInst, *this);
+    LowerSparcMachineInstrToMCInst(&*I, TmpInst, *this);
     EmitToStreamer(*OutStreamer, TmpInst);
   } while ((++I != E) && I->isInsideBundle()); // Delay slot check.
 }
@@ -296,7 +296,7 @@ void SparcAsmPrinter::EmitFunctionBodyStart() {
 
 void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const MachineOperand &MO = MI->getOperand (opNum);
   SparcMCExpr::VariantKind TF = (SparcMCExpr::VariantKind) MO.getTargetFlags();
 
@@ -373,7 +373,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     O << MO.getSymbolName();
     break;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+    O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
       << MO.getIndex();
     break;
   default:
diff --git a/lib/Target/Sparc/SparcCallingConv.td b/lib/Target/Sparc/SparcCallingConv.td
index dfaaabf344a3..0aa29d186dc1 100644
--- a/lib/Target/Sparc/SparcCallingConv.td
+++ b/lib/Target/Sparc/SparcCallingConv.td
@@ -21,7 +21,11 @@ def CC_Sparc32 : CallingConv<[
   // i32 f32 arguments get passed in integer registers if there is space.
   CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
   // f64 arguments are split and passed through registers or through stack.
-  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_Split_64">>,
+  // As are v2i32 arguments (this would be the default behavior for
+  // v2i32 if it wasn't allocated to the IntPair register-class)
+  CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Split_64">>,
+
 
   // Alternatively, they are assigned to the stack in 4-byte aligned units.
   CCAssignToStack<4, 4>
@@ -30,7 +34,8 @@ def CC_Sparc32 : CallingConv<[
 def RetCC_Sparc32 : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
   CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
-  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>,
+  CCIfType<[v2i32], CCCustom<"CC_Sparc_Assign_Ret_Split_64">>
 ]>;
 
 
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index c0279daa63d9..39b5e809c9be 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -44,7 +44,7 @@ void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
                                           unsigned ADDrr,
                                           unsigned ADDri) const {
 
-  DebugLoc dl = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+  DebugLoc dl;
   const SparcInstrInfo &TII =
       *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
 
@@ -90,8 +90,23 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
   MachineFrameInfo *MFI = MF.getFrameInfo();
   const SparcInstrInfo &TII =
       *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
+  const SparcRegisterInfo &RegInfo =
+      *static_cast<const SparcRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc dl;
+  bool NeedsStackRealignment = RegInfo.needsStackRealignment(MF);
+
+  // FIXME: unfortunately, returning false from canRealignStack
+  // actually just causes needsStackRealignment to return false,
+  // rather than reporting an error, as would be sensible. This is
+  // poor, but fixing that bogosity is going to be a large project.
+  // For now, just see if it's lied, and report an error here.
+  if (!NeedsStackRealignment && MFI->getMaxAlignment() > getStackAlignment())
+    report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
+                       "stack re-alignment, but LLVM couldn't handle it "
+                       "(probably because it has a dynamic alloca).");
 
   // Get the number of bytes to allocate from the FrameInfo
   int NumBytes = (int) MFI->getStackSize();
@@ -104,12 +119,43 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
     SAVEri = SP::ADDri;
     SAVErr = SP::ADDrr;
   }
-  NumBytes = -MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
-  emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri);
+
+  // The SPARC ABI is a bit odd in that it requires a reserved 92-byte
+  // (128 in v9) area in the user's stack, starting at %sp. Thus, the
+  // first part of the stack that can actually be used is located at
+  // %sp + 92.
+  //
+  // We therefore need to add that offset to the total stack size
+  // after all the stack objects are placed by
+  // PrologEpilogInserter calculateFrameObjectOffsets. However, since the stack needs to be
+  // aligned *after* the extra size is added, we need to disable
+  // calculateFrameObjectOffsets's built-in stack alignment, by having
+  // targetHandlesStackFrameRounding return true.
+
+
+  // Add the extra call frame stack size, if needed. (This is the same
+  // code as in PrologEpilogInserter, but also gets disabled by
+  // targetHandlesStackFrameRounding)
+  if (MFI->adjustsStack() && hasReservedCallFrame(MF))
+    NumBytes += MFI->getMaxCallFrameSize();
+
+  // Adds the SPARC subtarget-specific spill area to the stack
+  // size. Also ensures target-required alignment.
+  NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
+
+  // Finally, ensure that the size is sufficiently aligned for the
+  // data on the stack.
+  if (MFI->getMaxAlignment() > 0) {
+    NumBytes = RoundUpToAlignment(NumBytes, MFI->getMaxAlignment());
+  }
+
+  // Update stack size with corrected value.
+  MFI->setStackSize(NumBytes);
+
+  emitSPAdjustment(MF, MBB, MBBI, -NumBytes, SAVErr, SAVEri);
 
   MachineModuleInfo &MMI = MF.getMMI();
-  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  unsigned regFP = MRI->getDwarfRegNum(SP::I6, true);
+  unsigned regFP = RegInfo.getDwarfRegNum(SP::I6, true);
 
   // Emit ".cfi_def_cfa_register 30".
   unsigned CFIIndex =
@@ -122,13 +168,19 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
-  unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
-  unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
+  unsigned regInRA = RegInfo.getDwarfRegNum(SP::I7, true);
+  unsigned regOutRA = RegInfo.getDwarfRegNum(SP::O7, true);
   // Emit ".cfi_register 15, 31".
   CFIIndex = MMI.addFrameInst(
       MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
+
+  if (NeedsStackRealignment) {
+    // andn %o6, MaxAlign-1, %o6
+    int MaxAlign = MFI->getMaxAlignment();
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1);
+  }
 }
 
 void SparcFrameLowering::
@@ -167,7 +219,6 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
   if (NumBytes == 0)
     return;
 
-  NumBytes = MF.getSubtarget<SparcSubtarget>().getAdjustedFrameSize(NumBytes);
   emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri);
 }
 
@@ -180,21 +231,69 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 // pointer register.  This is true if the function has variable sized allocas or
 // if frame pointer elimination is disabled.
 bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
+  const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
-    MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+      RegInfo->needsStackRealignment(MF) ||
+      MFI->hasVarSizedObjects() ||
+      MFI->isFrameAddressTaken();
 }
 
 
+int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                               unsigned &FrameReg) const {
+  const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+  bool isFixed = MFI->isFixedObjectIndex(FI);
+
+  // Addressable stack objects are accessed using neg. offsets from
+  // %fp, or positive offsets from %sp.
+  bool UseFP;
+
+  // Sparc uses FP-based references in general, even when "hasFP" is
+  // false. That function is rather a misnomer, because %fp is
+  // actually always available, unless isLeafProc.
+  if (FuncInfo->isLeafProc()) {
+    // If there's a leaf proc, all offsets need to be %sp-based,
+    // because we haven't caused %fp to actually point to our frame.
+    UseFP = false;
+  } else if (isFixed) {
+    // Otherwise, argument access should always use %fp.
+    UseFP = true;
+  } else if (RegInfo->needsStackRealignment(MF)) {
+    // If there is dynamic stack realignment, all local object
+    // references need to be via %sp, to take account of the
+    // re-alignment.
+    UseFP = false;
+  } else {
+    // Finally, default to using %fp.
+    UseFP = true;
+  }
+
+  int64_t FrameOffset = MF.getFrameInfo()->getObjectOffset(FI) +
+      Subtarget.getStackPointerBias();
+
+  if (UseFP) {
+    FrameReg = RegInfo->getFrameRegister(MF);
+    return FrameOffset;
+  } else {
+    FrameReg = SP::O6; // %sp
+    return FrameOffset + MF.getFrameInfo()->getStackSize();
+  }
+}
+
 static bool LLVM_ATTRIBUTE_UNUSED verifyLeafProcRegUse(MachineRegisterInfo *MRI)
 {
 
   for (unsigned reg = SP::I0; reg <= SP::I7; ++reg)
-    if (MRI->isPhysRegUsed(reg))
+    if (!MRI->reg_nodbg_empty(reg))
       return false;
 
   for (unsigned reg = SP::L0; reg <= SP::L7; ++reg)
-    if (MRI->isPhysRegUsed(reg))
+    if (!MRI->reg_nodbg_empty(reg))
       return false;
 
   return true;
@@ -206,33 +305,42 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
   MachineRegisterInfo &MRI = MF.getRegInfo();
   MachineFrameInfo    *MFI = MF.getFrameInfo();
 
-  return !(MFI->hasCalls()              // has calls
-           || MRI.isPhysRegUsed(SP::L0) // Too many registers needed
-           || MRI.isPhysRegUsed(SP::O6) // %SP is used
-           || hasFP(MF));               // need %FP
+  return !(MFI->hasCalls()                 // has calls
+           || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
+           || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
+           || hasFP(MF));                  // need %FP
 }
 
 void SparcFrameLowering::remapRegsForLeafProc(MachineFunction &MF) const {
-
   MachineRegisterInfo &MRI = MF.getRegInfo();
-
   // Remap %i[0-7] to %o[0-7].
   for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
-    if (!MRI.isPhysRegUsed(reg))
+    if (MRI.reg_nodbg_empty(reg))
       continue;
-    unsigned mapped_reg = (reg - SP::I0 + SP::O0);
-    assert(!MRI.isPhysRegUsed(mapped_reg));
+
+    unsigned mapped_reg = reg - SP::I0 + SP::O0;
+    assert(MRI.reg_nodbg_empty(mapped_reg));
 
     // Replace I register with O register.
     MRI.replaceRegWith(reg, mapped_reg);
 
-    // Mark the reg unused.
-    MRI.setPhysRegUnused(reg);
+    // Also replace register pair super-registers.
+    if ((reg - SP::I0) % 2 == 0) {
+      unsigned preg = (reg - SP::I0) / 2 + SP::I0_I1;
+      unsigned mapped_preg = preg - SP::I0_I1 + SP::O0_O1;
+      MRI.replaceRegWith(preg, mapped_preg);
+    }
   }
 
   // Rewrite MBB's Live-ins.
   for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
        MBB != E; ++MBB) {
+    for (unsigned reg = SP::I0_I1; reg <= SP::I6_I7; ++reg) {
+      if (!MBB->isLiveIn(reg))
+        continue;
+      MBB->removeLiveIn(reg);
+      MBB->addLiveIn(reg - SP::I0_I1 + SP::O0_O1);
+    }
     for (unsigned reg = SP::I0; reg <= SP::I7; ++reg) {
       if (!MBB->isLiveIn(reg))
         continue;
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 29fc7b7ba036..cbb4dc04fc23 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -39,6 +39,14 @@ public:
   void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs,
                             RegScavenger *RS = nullptr) const override;
 
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const override { return true; }
+
 private:
   // Remap input registers to output registers for leaf procedure.
   void remapRegsForLeafProc(MachineFunction &MF) const;
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 340b72e7940f..c4c641659df3 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/Compiler.h"
@@ -62,6 +63,7 @@ public:
 
 private:
   SDNode* getGlobalBaseReg();
+  SDNode *SelectInlineAsm(SDNode *N);
 };
 }  // end anonymous namespace
 
@@ -141,6 +143,181 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
   return true;
 }
 
+
+// Re-assemble i64 arguments split up in SelectionDAGBuilder's
+// visitInlineAsm / GetRegistersForValue functions.
+//
+// Note: This function was copied from, and is essentially identical
+// to ARMISelDAGToDAG::SelectInlineAsm. It is very unfortunate that
+// such hacking-up is necessary; a rethink of how inline asm operands
+// are handled may be in order to make doing this more sane.
+//
+// TODO: fix inline asm support so I can simply tell it that 'i64'
+// inputs to asm need to be allocated to the IntPair register type,
+// and have that work. Then, delete this function.
+SDNode *SparcDAGToDAGISel::SelectInlineAsm(SDNode *N){
+  std::vector<SDValue> AsmNodeOperands;
+  unsigned Flag, Kind;
+  bool Changed = false;
+  unsigned NumOps = N->getNumOperands();
+
+  // Normally, i64 data is bounded to two arbitrary GPRs for "%r"
+  // constraint.  However, some instructions (e.g. ldd/std) require
+  // (even/even+1) GPRs.
+
+  // So, here, we check for this case, and mutate the inlineasm to use
+  // a single IntPair register instead, which guarantees such even/odd
+  // placement.
+
+  SDLoc dl(N);
+  SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+                                   : SDValue(nullptr,0);
+
+  SmallVector<bool, 8> OpChanged;
+  // Glue node will be appended late.
+  for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
+    SDValue op = N->getOperand(i);
+    AsmNodeOperands.push_back(op);
+
+    if (i < InlineAsm::Op_FirstOperand)
+      continue;
+
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
+      Flag = C->getZExtValue();
+      Kind = InlineAsm::getKind(Flag);
+    }
+    else
+      continue;
+
+    // Immediate operands to inline asm in the SelectionDAG are modeled with
+    // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
+    // the second is a constant with the value of the immediate. If we get here
+    // and we have a Kind_Imm, skip the next operand, and continue.
+    if (Kind == InlineAsm::Kind_Imm) {
+      SDValue op = N->getOperand(++i);
+      AsmNodeOperands.push_back(op);
+      continue;
+    }
+
+    unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
+    if (NumRegs)
+      OpChanged.push_back(false);
+
+    unsigned DefIdx = 0;
+    bool IsTiedToChangedOp = false;
+    // If it's a use that is tied with a previous def, it has no
+    // reg class constraint.
+    if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
+      IsTiedToChangedOp = OpChanged[DefIdx];
+
+    if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
+        && Kind != InlineAsm::Kind_RegDefEarlyClobber)
+      continue;
+
+    unsigned RC;
+    bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
+    if ((!IsTiedToChangedOp && (!HasRC || RC != SP::IntRegsRegClassID))
+        || NumRegs != 2)
+      continue;
+
+    assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
+    SDValue V0 = N->getOperand(i+1);
+    SDValue V1 = N->getOperand(i+2);
+    unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
+    unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
+    SDValue PairedReg;
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+
+    if (Kind == InlineAsm::Kind_RegDef ||
+        Kind == InlineAsm::Kind_RegDefEarlyClobber) {
+      // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
+      // the original GPRs.
+
+      unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+      SDValue Chain = SDValue(N,0);
+
+      SDNode *GU = N->getGluedUser();
+      SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::v2i32,
+                                               Chain.getValue(1));
+
+      // Extract values from a GPRPair reg and copy to the original GPR reg.
+      SDValue Sub0 = CurDAG->getTargetExtractSubreg(SP::sub_even, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue Sub1 = CurDAG->getTargetExtractSubreg(SP::sub_odd, dl, MVT::i32,
+                                                    RegCopy);
+      SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
+                                        RegCopy.getValue(1));
+      SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
+
+      // Update the original glue user.
+      std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
+      Ops.push_back(T1.getValue(1));
+      CurDAG->UpdateNodeOperands(GU, Ops);
+    }
+    else {
+      // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
+      // GPRPair and then pass the GPRPair to the inline asm.
+      SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
+
+      // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
+      SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
+                                          Chain.getValue(1));
+      SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
+                                          T0.getValue(1));
+      SDValue Pair = SDValue(
+          CurDAG->getMachineNode(
+              TargetOpcode::REG_SEQUENCE, dl, MVT::v2i32,
+              {
+                  CurDAG->getTargetConstant(SP::IntPairRegClassID, dl,
+                                            MVT::i32),
+                  T0,
+                  CurDAG->getTargetConstant(SP::sub_even, dl, MVT::i32),
+                  T1,
+                  CurDAG->getTargetConstant(SP::sub_odd, dl, MVT::i32),
+              }),
+          0);
+
+      // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
+      // i32 VRs of inline asm with it.
+      unsigned GPVR = MRI.createVirtualRegister(&SP::IntPairRegClass);
+      PairedReg = CurDAG->getRegister(GPVR, MVT::v2i32);
+      Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
+
+      AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+      Glue = Chain.getValue(1);
+    }
+
+    Changed = true;
+
+    if(PairedReg.getNode()) {
+      OpChanged[OpChanged.size() -1 ] = true;
+      Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
+      if (IsTiedToChangedOp)
+        Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
+      else
+        Flag = InlineAsm::getFlagWordForRegClass(Flag, SP::IntPairRegClassID);
+      // Replace the current flag.
+      AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
+          Flag, dl, MVT::i32);
+      // Add the new register node and skip the original two GPRs.
+      AsmNodeOperands.push_back(PairedReg);
+      // Skip the next two GPRs.
+      i += 2;
+    }
+  }
+
+  if (Glue.getNode())
+    AsmNodeOperands.push_back(Glue);
+  if (!Changed)
+    return nullptr;
+
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
+      CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
+  New->setNodeId(-1);
+  return New.getNode();
+}
+
 SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
   SDLoc dl(N);
   if (N->isMachineOpcode()) {
@@ -150,6 +327,12 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
 
   switch (N->getOpcode()) {
   default: break;
+    case ISD::INLINEASM: {
+    SDNode *ResNode = SelectInlineAsm(N);
+    if (ResNode)
+      return ResNode;
+    break;
+  }
   case SPISD::GLOBAL_BASE_REG:
     return getGlobalBaseReg();
 
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 4879d4ee79e5..5e70ffe2223c 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -49,9 +49,9 @@ static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
-static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
-                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
-                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+static bool CC_Sparc_Assign_Split_64(unsigned &ValNo, MVT &ValVT,
+                                     MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags, CCState &State)
 {
   static const MCPhysReg RegList[] = {
     SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
@@ -77,6 +77,29 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
   return true;
 }
 
+static bool CC_Sparc_Assign_Ret_Split_64(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const MCPhysReg RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+
+  // Try to get first reg.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    return false;
+
+  // Try to get second reg.
+  if (unsigned Reg = State.AllocateReg(RegList))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    return false;
+
+  return true;
+}
+
 // Allocate a full-sized argument for the 64-bit ABI.
 static bool CC_Sparc64_Full(unsigned &ValNo, MVT &ValVT,
                             MVT &LocVT, CCValAssign::LocInfo &LocInfo,
@@ -202,12 +225,34 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
   RetOps.push_back(SDValue());
 
   // Copy the result values into the output registers.
-  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
 
-    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(),
-                             OutVals[i], Flag);
+    SDValue Arg = OutVals[realRVLocIdx];
+
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::v2i32);
+      // Legalize ret v2i32 -> ret 2 x i32 (Basically: do what would
+      // happen by default if this wasn't a legal type)
+
+      SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(0, DL, getVectorIdxTy(DAG.getDataLayout())));
+      SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
+
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
+      Flag = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
+                               Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
 
     // Guarantee that all emitted copies are stuck together with flags.
     Flag = Chain.getValue(1);
@@ -355,6 +400,7 @@ LowerFormalArguments_32(SDValue Chain,
   CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
 
   const unsigned StackOffset = 92;
+  bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
 
   unsigned InIdx = 0;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
@@ -375,7 +421,8 @@ LowerFormalArguments_32(SDValue Chain,
 
     if (VA.isRegLoc()) {
       if (VA.needsCustom()) {
-        assert(VA.getLocVT() == MVT::f64);
+        assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
+
         unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
         MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
         SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
@@ -396,9 +443,13 @@ LowerFormalArguments_32(SDValue Chain,
                                         &SP::IntRegsRegClass);
           LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
         }
+
+        if (IsLittleEndian)
+          std::swap(LoVal, HiVal);
+
         SDValue WholeValue =
           DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
-        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), WholeValue);
         InVals.push_back(WholeValue);
         continue;
       }
@@ -422,7 +473,7 @@ LowerFormalArguments_32(SDValue Chain,
     auto PtrVT = getPointerTy(DAG.getDataLayout());
 
     if (VA.needsCustom()) {
-      assert(VA.getValVT() == MVT::f64);
+      assert(VA.getValVT() == MVT::f64 || MVT::v2i32);
       // If it is double-word aligned, just load.
       if (Offset % 8 == 0) {
         int FI = MF.getFrameInfo()->CreateFixedObject(8,
@@ -452,9 +503,12 @@ LowerFormalArguments_32(SDValue Chain,
                                   MachinePointerInfo(),
                                   false, false, false, 0);
 
+      if (IsLittleEndian)
+        std::swap(LoVal, HiVal);
+
       SDValue WholeValue =
         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
-      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), WholeValue);
       InVals.push_back(WholeValue);
       continue;
     }
@@ -468,16 +522,12 @@ LowerFormalArguments_32(SDValue Chain,
       Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
                          MachinePointerInfo(),
                          false, false, false, 0);
+    } else if (VA.getValVT() == MVT::f128) {
+      report_fatal_error("SPARCv8 does not handle f128 in calls; "
+                         "pass indirectly");
     } else {
-      ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
-      // Sparc is big endian, so add an offset based on the ObjectVT.
-      unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
-      FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
-                          DAG.getConstant(Offset, dl, MVT::i32));
-      Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
-                            MachinePointerInfo(),
-                            VA.getValVT(), false, false, false,0);
-      Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+      // We shouldn't see any other value types here.
+      llvm_unreachable("Unexpected ValVT encountered in frame lowering.");
     }
     InVals.push_back(Load);
   }
@@ -612,7 +662,7 @@ LowerFormalArguments_64(SDValue Chain,
     InVals.push_back(DAG.getLoad(
         VA.getValVT(), DL, Chain,
         DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
-        MachinePointerInfo::getFixedStack(FI), false, false, false, 0));
+        MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0));
   }
 
   if (!IsVarArg)
@@ -640,9 +690,9 @@ LowerFormalArguments_64(SDValue Chain,
     SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
     int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
     auto PtrVT = getPointerTy(MF.getDataLayout());
-    OutChains.push_back(
-        DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
-                     MachinePointerInfo::getFixedStack(FI), false, false, 0));
+    OutChains.push_back(DAG.getStore(
+        Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
+        MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
   }
 
   if (!OutChains.empty())
@@ -788,7 +838,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
     }
 
     if (VA.needsCustom()) {
-      assert(VA.getLocVT() == MVT::f64);
+      assert(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2i32);
 
       if (VA.isMemLoc()) {
         unsigned Offset = VA.getLocMemOffset() + StackOffset;
@@ -804,49 +854,53 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
         }
       }
 
-      SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
-      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
-                                   Arg, StackPtr, MachinePointerInfo(),
-                                   false, false, 0);
-      // Sparc is big-endian, so the high part comes first.
-      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, false, 0);
-      // Increment the pointer to the other half.
-      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
-                             DAG.getIntPtrConstant(4, dl));
-      // Load the low part.
-      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
-                               MachinePointerInfo(), false, false, false, 0);
+      if (VA.getLocVT() == MVT::f64) {
+        // Move from the float value from float registers into the
+        // integer registers.
+
+        // TODO: The f64 -> v2i32 conversion is super-inefficient for
+        // constants: it sticks them in the constant pool, then loads
+        // to a fp register, then stores to temp memory, then loads to
+        // integer registers.
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, Arg);
+      }
+
+      SDValue Part0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(0, dl, getVectorIdxTy(DAG.getDataLayout())));
+      SDValue Part1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  Arg,
+                                  DAG.getConstant(1, dl, getVectorIdxTy(DAG.getDataLayout())));
 
       if (VA.isRegLoc()) {
-        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Part0));
         assert(i+1 != e);
         CCValAssign &NextVA = ArgLocs[++i];
         if (NextVA.isRegLoc()) {
-          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Part1));
         } else {
-          // Store the low part in stack.
+          // Store the second part in stack.
           unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
           SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
           SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
           PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
                                              MachinePointerInfo(),
                                              false, false, 0));
         }
       } else {
         unsigned Offset = VA.getLocMemOffset() + StackOffset;
-        // Store the high part.
+        // Store the first part.
         SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
         SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Part0, PtrOff,
                                            MachinePointerInfo(),
                                            false, false, 0));
-        // Store the low part.
+        // Store the second part.
         PtrOff = DAG.getIntPtrConstant(Offset + 4, dl);
         PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
-        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Part1, PtrOff,
                                            MachinePointerInfo(),
                                            false, false, 0));
       }
@@ -990,8 +1044,8 @@ SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
   if (!CalleeFn)
     return 0;
 
-  assert(CalleeFn->hasStructRetAttr() &&
-         "Callee does not have the StructRet attribute.");
+  // It would be nice to check for the sret attribute on CalleeFn here,
+  // but since it is not part of the function type, any check will misfire.
 
   PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
   Type *ElementTy = Ty->getElementType();
@@ -1370,15 +1424,60 @@ static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
 SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
                                          const SparcSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
-  auto &DL = *TM.getDataLayout();
+  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
+
+  // Instructions which use registers as conditionals examine all the
+  // bits (as does the pseudo SELECT_CC expansion). I don't think it
+  // matters much whether it's ZeroOrOneBooleanContent, or
+  // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
+  // former.
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrOneBooleanContent);
 
   // Set up the register classes.
   addRegisterClass(MVT::i32, &SP::IntRegsRegClass);
   addRegisterClass(MVT::f32, &SP::FPRegsRegClass);
   addRegisterClass(MVT::f64, &SP::DFPRegsRegClass);
   addRegisterClass(MVT::f128, &SP::QFPRegsRegClass);
-  if (Subtarget->is64Bit())
+  if (Subtarget->is64Bit()) {
     addRegisterClass(MVT::i64, &SP::I64RegsRegClass);
+  } else {
+    // On 32bit sparc, we define a double-register 32bit register
+    // class, as well. This is modeled in LLVM as a 2-vector of i32.
+    addRegisterClass(MVT::v2i32, &SP::IntPairRegClass);
+
+    // ...but almost all operations must be expanded, so set that as
+    // the default.
+    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
+      setOperationAction(Op, MVT::v2i32, Expand);
+    }
+    // Truncating/extending stores/loads are also not supported.
+    for (MVT VT : MVT::integer_vector_valuetypes()) {
+      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::v2i32, Expand);
+      setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Expand);
+
+      setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, VT, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, VT, Expand);
+      setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, VT, Expand);
+
+      setTruncStoreAction(VT, MVT::v2i32, Expand);
+      setTruncStoreAction(MVT::v2i32, VT, Expand);
+    }
+    // However, load and store *are* legal.
+    setOperationAction(ISD::LOAD, MVT::v2i32, Legal);
+    setOperationAction(ISD::STORE, MVT::v2i32, Legal);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Legal);
+
+    // And we need to promote i64 loads/stores into vector load/store
+    setOperationAction(ISD::LOAD, MVT::i64, Custom);
+    setOperationAction(ISD::STORE, MVT::i64, Custom);
+
+    // Sadly, this doesn't work:
+    //    AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
+    //    AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
+  }
 
   // Turn FP extload into load/fextend
   for (MVT VT : MVT::fp_valuetypes()) {
@@ -1396,10 +1495,10 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
 
   // Custom legalize GlobalAddress nodes into LO/HI parts.
-  setOperationAction(ISD::GlobalAddress, getPointerTy(DL), Custom);
-  setOperationAction(ISD::GlobalTLSAddress, getPointerTy(DL), Custom);
-  setOperationAction(ISD::ConstantPool, getPointerTy(DL), Custom);
-  setOperationAction(ISD::BlockAddress, getPointerTy(DL), Custom);
+  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
+  setOperationAction(ISD::ConstantPool, PtrVT, Custom);
+  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
 
   // Sparc doesn't have sext_inreg, replace them with shl/sra
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -1579,9 +1678,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
 
-  setExceptionPointerRegister(SP::I0);
-  setExceptionSelectorRegister(SP::I1);
-
   setStackPointerRegisterToSaveRestore(SP::O6);
 
   setOperationAction(ISD::CTPOP, MVT::i32,
@@ -1744,18 +1840,15 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
 // set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
 static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
                              ISD::CondCode CC, unsigned &SPCC) {
-  if (isa<ConstantSDNode>(RHS) &&
-      cast<ConstantSDNode>(RHS)->isNullValue() &&
+  if (isNullConstant(RHS) &&
       CC == ISD::SETNE &&
       (((LHS.getOpcode() == SPISD::SELECT_ICC ||
          LHS.getOpcode() == SPISD::SELECT_XCC) &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
        (LHS.getOpcode() == SPISD::SELECT_FCC &&
         LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
-      isa<ConstantSDNode>(LHS.getOperand(0)) &&
-      isa<ConstantSDNode>(LHS.getOperand(1)) &&
-      cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
-      cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+      isOneConstant(LHS.getOperand(0)) &&
+      isNullConstant(LHS.getOperand(1))) {
     SDValue CMPCC = LHS.getOperand(3);
     SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
     LHS = CMPCC.getOperand(0);
@@ -1821,7 +1914,8 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
     MFI->setHasCalls(true);
     return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
-                       MachinePointerInfo::getGOT(), false, false, false, 0);
+                       MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                       false, false, false, 0);
   }
 
   // This is one of the absolute code models.
@@ -1872,6 +1966,9 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
                                                    SelectionDAG &DAG) const {
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(GA, DAG);
+
   SDLoc DL(GA);
   const GlobalValue *GV = GA->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2601,6 +2698,17 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
   return DAG.getMergeValues(Ops, dl);
 }
 
+static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG)
+{
+  LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
+
+  EVT MemVT = LdNode->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return LowerF128Load(Op, DAG);
+
+  return Op;
+}
+
 // Lower a f128 store into two f64 stores.
 static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -2645,6 +2753,29 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
 }
 
+static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
+{
+  SDLoc dl(Op);
+  StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
+
+  EVT MemVT = St->getMemoryVT();
+  if (MemVT == MVT::f128)
+    return LowerF128Store(Op, DAG);
+
+  if (MemVT == MVT::i64) {
+    // Custom handling for i64 stores: turn it into a bitcast and a
+    // v2i32 store.
+    SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
+    SDValue Chain = DAG.getStore(
+        St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
+        St->isVolatile(), St->isNonTemporal(), St->getAlignment(),
+        St->getAAInfo());
+    return Chain;
+  }
+
+  return SDValue();
+}
+
 static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
   assert((Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::FABS)
          && "invalid opcode");
@@ -2752,7 +2883,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
 
   SDValue MulResult = TLI.makeLibCall(DAG,
                                       RTLIB::MUL_I128, WideVT,
-                                      Args, 4, isSigned, dl).first;
+                                      Args, isSigned, dl).first;
   SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
                                    MulResult, DAG.getIntPtrConstant(0, dl));
   SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
@@ -2783,7 +2914,6 @@ static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
   return SDValue();
 }
 
-
 SDValue SparcTargetLowering::
 LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 
@@ -2818,8 +2948,8 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG,
                                                                Subtarget);
 
-  case ISD::LOAD:               return LowerF128Load(Op, DAG);
-  case ISD::STORE:              return LowerF128Store(Op, DAG);
+  case ISD::LOAD:               return LowerLOAD(Op, DAG);
+  case ISD::STORE:              return LowerSTORE(Op, DAG);
   case ISD::FADD:               return LowerF128Op(Op, DAG,
                                        getLibcallName(RTLIB::ADD_F128), 2);
   case ISD::FSUB:               return LowerF128Op(Op, DAG,
@@ -2921,8 +3051,7 @@ SparcTargetLowering::expandSelectCC(MachineInstr *MI,
   // to set, the condition code register to branch on, the true/false values to
   // select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
@@ -3007,7 +3136,7 @@ SparcTargetLowering::expandAtomicRMW(MachineInstr *MI,
     .addReg(AddrReg).addImm(0);
 
   // Split the basic block MBB before MI and insert the loop block in the hole.
-  MachineFunction::iterator MFI = MBB;
+  MachineFunction::iterator MFI = MBB->getIterator();
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
   MachineFunction *MF = MBB->getParent();
   MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@@ -3149,9 +3278,12 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
-      return std::make_pair(0U, &SP::IntRegsRegClass);
+      if (VT == MVT::v2i32)
+        return std::make_pair(0U, &SP::IntPairRegClass);
+      else
+        return std::make_pair(0U, &SP::IntRegsRegClass);
     }
-  } else  if (!Constraint.empty() && Constraint.size() <= 5
+  } else if (!Constraint.empty() && Constraint.size() <= 5
               && Constraint[0] == '{' && *(Constraint.end()-1) == '}') {
     // constraint = '{r<d>}'
     // Remove the braces from around the name.
@@ -3227,5 +3359,24 @@ void SparcTargetLowering::ReplaceNodeResults(SDNode *N,
                                   getLibcallName(libCall),
                                   1));
     return;
+  case ISD::LOAD: {
+    LoadSDNode *Ld = cast<LoadSDNode>(N);
+    // Custom handling only for i64: turn i64 load into a v2i32 load,
+    // and a bitcast.
+    if (Ld->getValueType(0) != MVT::i64 || Ld->getMemoryVT() != MVT::i64)
+      return;
+
+    SDLoc dl(N);
+    SDValue LoadRes = DAG.getExtLoad(
+        Ld->getExtensionType(), dl, MVT::v2i32,
+        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+        MVT::v2i32, Ld->isVolatile(), Ld->isNonTemporal(),
+        Ld->isInvariant(), Ld->getAlignment(), Ld->getAAInfo());
+
+    SDValue Res = DAG.getNode(ISD::BITCAST, dl, MVT::i64, LoadRes);
+    Results.push_back(Res);
+    Results.push_back(LoadRes.getValue(1));
+    return;
+  }
   }
 }
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index bbc91a493c9d..4e46709cfc09 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -89,6 +89,20 @@ namespace llvm {
       return MVT::i32;
     }
 
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+      return SP::I0;
+    }
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+      return SP::I1;
+    }
+
     /// getSetCCResultType - Return the ISD::SETCC ValueType
     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
                            EVT VT) const override;
@@ -167,8 +181,8 @@ namespace llvm {
     }
 
     void ReplaceNodeResults(SDNode *N,
-                                    SmallVectorImpl<SDValue>& Results,
-                                    SelectionDAG &DAG) const override;
+                            SmallVectorImpl<SDValue>& Results,
+                            SelectionDAG &DAG) const override;
 
     MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
                                       unsigned BROpcode) const;
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 25cc652dbd9e..d51e2ccc8a35 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -250,6 +250,7 @@ defm : int_cond_alias<"n",    0b0000>;
 defm : int_cond_alias<"ne",   0b1001>;
 defm : int_cond_alias<"nz",   0b1001>; // same as ne
 defm : int_cond_alias<"e",    0b0001>;
+defm : int_cond_alias<"eq",    0b0001>; // same as e
 defm : int_cond_alias<"z",    0b0001>; // same as e
 defm : int_cond_alias<"g",    0b1010>;
 defm : int_cond_alias<"le",   0b0010>;
@@ -429,6 +430,9 @@ def : InstAlias<"wr $simm13, %tbr", (WRTBRri G0, i32imm:$simm13), 0>;
 def : InstAlias<"flush", (FLUSH), 0>;
 
 
+def : MnemonicAlias<"lduw", "ld">, Requires<[HasV9]>;
+def : MnemonicAlias<"lduwa", "lda">, Requires<[HasV9]>;
+
 def : MnemonicAlias<"return", "rett">, Requires<[HasV9]>;
 
 def : MnemonicAlias<"addc", "addx">, Requires<[HasV9]>;
@@ -450,3 +454,8 @@ def : InstAlias<"fcmpeq $rs1, $rs2", (V9FCMPEQ FCC0, QFPRegs:$rs1,
                                                      QFPRegs:$rs2)>,
                 Requires<[HasHardQuad]>;
 
+// signx rd -> sra rd, %g0, rd
+def : InstAlias<"signx $rd", (SRArr IntRegs:$rd, IntRegs:$rd, G0), 0>, Requires<[HasV9]>;
+
+// signx reg, rd -> sra reg, %g0, rd
+def : InstAlias<"signx $rs1, $rd", (SRArr IntRegs:$rd, IntRegs:$rs1, G0), 0>, Requires<[HasV9]>;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 6167c532db80..733027a5d2be 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -284,7 +284,9 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   unsigned numSubRegs = 0;
   unsigned movOpc     = 0;
   const unsigned *subRegIdx = nullptr;
+  bool ExtraG0 = false;
 
+  const unsigned DW_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd };
   const unsigned DFP_FP_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd };
   const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
   const unsigned QFP_FP_SubRegsIdx[]  = { SP::sub_even, SP::sub_odd,
@@ -294,7 +296,12 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
     BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
       .addReg(SrcReg, getKillRegState(KillSrc));
-  else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+  else if (SP::IntPairRegClass.contains(DestReg, SrcReg)) {
+    subRegIdx  = DW_SubRegsIdx;
+    numSubRegs = 2;
+    movOpc     = SP::ORrr;
+    ExtraG0 = true;
+  } else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
     BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
       .addReg(SrcReg, getKillRegState(KillSrc));
   else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) {
@@ -347,7 +354,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     unsigned Src = TRI->getSubReg(SrcReg,  subRegIdx[i]);
     assert(Dst && Src && "Bad sub-register");
 
-    MovMI = BuildMI(MBB, I, DL, get(movOpc), Dst).addReg(Src);
+    MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(movOpc), Dst);
+    if (ExtraG0)
+      MIB.addReg(SP::G0);
+    MIB.addReg(Src);
+    MovMI = MIB.getInstr();
   }
   // Add implicit super-register defs and kills to the last MovMI.
   MovMI->addRegisterDefined(DestReg, TRI);
@@ -365,19 +376,20 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   MachineFunction *MF = MBB.getParent();
   const MachineFrameInfo &MFI = *MF->getFrameInfo();
-  MachineMemOperand *MMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                             MachineMemOperand::MOStore,
-                             MFI.getObjectSize(FI),
-                             MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
+      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 
   // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
- if (RC == &SP::I64RegsRegClass)
+  if (RC == &SP::I64RegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STXri)).addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
   else if (RC == &SP::IntRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else if (RC == &SP::IntPairRegClass)
+    BuildMI(MBB, I, DL, get(SP::STDri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
   else if (RC == &SP::FPRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
       .addReg(SrcReg,  getKillRegState(isKill)).addMemOperand(MMO);
@@ -403,11 +415,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   MachineFunction *MF = MBB.getParent();
   const MachineFrameInfo &MFI = *MF->getFrameInfo();
-  MachineMemOperand *MMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
-                             MachineMemOperand::MOLoad,
-                             MFI.getObjectSize(FI),
-                             MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
+      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
 
   if (RC == &SP::I64RegsRegClass)
     BuildMI(MBB, I, DL, get(SP::LDXri), DestReg).addFrameIndex(FI).addImm(0)
@@ -415,6 +425,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   else if (RC == &SP::IntRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0)
       .addMemOperand(MMO);
+  else if (RC == &SP::IntPairRegClass)
+    BuildMI(MBB, I, DL, get(SP::LDDri), DestReg).addFrameIndex(FI).addImm(0)
+      .addMemOperand(MMO);
   else if (RC == &SP::FPRegsRegClass)
     BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0)
       .addMemOperand(MMO);
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 3b9e048ea8b3..ec37c22a5b33 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -283,17 +283,32 @@ multiclass Load<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
                  [(set Ty:$dst, (OpNode ADDRri:$addr))]>;
 }
 
+// TODO: Instructions of the LoadASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class LoadASI<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
+              RegisterClass RC, ValueType Ty> :
+  F3_1_asi<3, Op3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
+                !strconcat(OpcStr, "a [$addr] $asi, $dst"),
+                []>;
+
 // LoadA multiclass - As above, but also define alternate address space variant
 multiclass LoadA<string OpcStr, bits<6> Op3Val, bits<6> LoadAOp3Val,
                  SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
              Load<OpcStr, Op3Val, OpNode, RC, Ty> {
-  // TODO: The LD*Arr instructions are currently asm only; hooking up
-  // CodeGen's address spaces to use these is a future task.
-  def Arr  : F3_1_asi<3, LoadAOp3Val, (outs RC:$dst), (ins MEMrr:$addr, i8imm:$asi),
-                !strconcat(OpcStr, "a [$addr] $asi, $dst"),
-                []>;
+  def Arr  : LoadASI<OpcStr, LoadAOp3Val, OpNode, RC, Ty>;
 }
 
+// The LDSTUB instruction is supported for asm only.
+// It is unlikely that general-purpose code could make use of it.
+// CAS is preferred for sparc v9.
+def LDSTUBrr : F3_1<3, 0b001101, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldstub [$addr], $dst", []>;
+def LDSTUBri : F3_2<3, 0b001101, (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldstub [$addr], $dst", []>;
+def LDSTUBArr : F3_1_asi<3, 0b011101, (outs IntRegs:$dst),
+                         (ins MEMrr:$addr, i8imm:$asi),
+                         "ldstuba [$addr] $asi, $dst", []>;
+
 // Store multiclass - Define both Reg+Reg/Reg+Imm patterns in one shot.
 multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
            RegisterClass RC, ValueType Ty> {
@@ -307,14 +322,18 @@ multiclass Store<string OpcStr, bits<6> Op3Val, SDPatternOperator OpNode,
                  [(OpNode Ty:$rd, ADDRri:$addr)]>;
 }
 
+// TODO: Instructions of the StoreASI class are currently asm only; hooking up
+// CodeGen's address spaces to use these is a future task.
+class StoreASI<string OpcStr, bits<6> Op3Val,
+                  SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
+  F3_1_asi<3, Op3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
+                  !strconcat(OpcStr, "a $rd, [$addr] $asi"),
+                  []>;
+
 multiclass StoreA<string OpcStr, bits<6> Op3Val, bits<6> StoreAOp3Val,
                   SDPatternOperator OpNode, RegisterClass RC, ValueType Ty> :
              Store<OpcStr, Op3Val, OpNode, RC, Ty> {
-  // TODO: The ST*Arr instructions are currently asm only; hooking up
-  // CodeGen's address spaces to use these is a future task.
-  def Arr  : F3_1_asi<3, StoreAOp3Val, (outs), (ins MEMrr:$addr, RC:$rd, i8imm:$asi),
-                  !strconcat(OpcStr, "a $rd, [$addr] $asi"),
-                  []>;
+  def Arr : StoreASI<OpcStr, StoreAOp3Val, OpNode, RC, Ty>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -408,15 +427,40 @@ let DecoderMethod = "DecodeLoadInt" in {
   defm LD   : LoadA<"ld",   0b000000, 0b010000, load,        IntRegs, i32>;
 }
 
+let DecoderMethod = "DecodeLoadIntPair" in
+  defm LDD : LoadA<"ldd", 0b000011, 0b010011, load, IntPair, v2i32>;
+
 // Section B.2 - Load Floating-point Instructions, p. 92
-let DecoderMethod = "DecodeLoadFP" in
-  defm LDF   : Load<"ld",  0b100000, load, FPRegs,  f32>;
-let DecoderMethod = "DecodeLoadDFP" in
-  defm LDDF  : Load<"ldd", 0b100011, load, DFPRegs, f64>;
+let DecoderMethod = "DecodeLoadFP" in {
+  defm LDF   : Load<"ld",  0b100000, load,    FPRegs,  f32>;
+  def LDFArr : LoadASI<"ld",  0b110000, load, FPRegs,  f32>,
+                Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeLoadDFP" in {
+  defm LDDF   : Load<"ldd", 0b100011, load,    DFPRegs, f64>;
+  def LDDFArr : LoadASI<"ldd", 0b110011, load, DFPRegs, f64>,
+                 Requires<[HasV9]>;
+}
 let DecoderMethod = "DecodeLoadQFP" in
-  defm LDQF  : Load<"ldq", 0b100010, load, QFPRegs, f128>,
+  defm LDQF  : LoadA<"ldq", 0b100010, 0b110010, load, QFPRegs, f128>,
                Requires<[HasV9, HasHardQuad]>;
 
+let DecoderMethod = "DecodeLoadFP" in
+  let Defs = [FSR] in {
+    let rd = 0 in {
+      def LDFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+                     "ld [$addr], %fsr", []>;
+      def LDFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+                     "ld [$addr], %fsr", []>;
+    }
+    let rd = 1 in {
+      def LDXFSRrr : F3_1<3, 0b100001, (outs), (ins MEMrr:$addr),
+                     "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+      def LDXFSRri : F3_2<3, 0b100001, (outs), (ins MEMri:$addr),
+                     "ldx [$addr], %fsr", []>, Requires<[HasV9]>;
+    }
+  }
+
 // Section B.4 - Store Integer Instructions, p. 95
 let DecoderMethod = "DecodeStoreInt" in {
   defm STB   : StoreA<"stb", 0b000101, 0b010101, truncstorei8,  IntRegs, i32>;
@@ -424,15 +468,40 @@ let DecoderMethod = "DecodeStoreInt" in {
   defm ST    : StoreA<"st",  0b000100, 0b010100, store,         IntRegs, i32>;
 }
 
+let DecoderMethod = "DecodeStoreIntPair" in
+  defm STD   : StoreA<"std", 0b000111, 0b010111, store, IntPair, v2i32>;
+
 // Section B.5 - Store Floating-point Instructions, p. 97
-let DecoderMethod = "DecodeStoreFP" in
+let DecoderMethod = "DecodeStoreFP" in {
   defm STF   : Store<"st",  0b100100, store,         FPRegs,  f32>;
-let DecoderMethod = "DecodeStoreDFP" in
-  defm STDF  : Store<"std", 0b100111, store,         DFPRegs, f64>;
+  def STFArr : StoreASI<"st",  0b110100, store,      FPRegs,  f32>,
+               Requires<[HasV9]>;
+}
+let DecoderMethod = "DecodeStoreDFP" in {
+  defm STDF   : Store<"std", 0b100111, store,         DFPRegs, f64>;
+  def STDFArr : StoreASI<"std", 0b110111, store,      DFPRegs, f64>,
+                Requires<[HasV9]>;
+}
 let DecoderMethod = "DecodeStoreQFP" in
-  defm STQF  : Store<"stq", 0b100110, store,         QFPRegs, f128>,
+  defm STQF  : StoreA<"stq", 0b100110, 0b110110, store, QFPRegs, f128>,
                Requires<[HasV9, HasHardQuad]>;
 
+let DecoderMethod = "DecodeStoreFP" in
+  let Defs = [FSR] in {
+    let rd = 0 in {
+      def STFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+                     "st %fsr, [$addr]", []>;
+      def STFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+                     "st %fsr, [$addr]", []>;
+    }
+    let rd = 1 in {
+      def STXFSRrr : F3_1<3, 0b100101, (outs MEMrr:$addr), (ins),
+                     "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+      def STXFSRri : F3_2<3, 0b100101, (outs MEMri:$addr), (ins),
+                     "stx %fsr, [$addr]", []>, Requires<[HasV9]>;
+    }
+  }
+
 // Section B.8 - SWAP Register with Memory Instruction
 // (Atomic swap)
 let Constraints = "$val = $dst", DecoderMethod = "DecodeSWAP" in {
@@ -559,6 +628,10 @@ let Defs = [Y, ICC] in {
   defm SMULCC : F3_12np<"smulcc", 0b011011>;
 }
 
+let Defs = [Y, ICC], Uses = [Y, ICC] in {
+  defm MULSCC : F3_12np<"mulscc", 0b100100>;
+}
+
 // Section B.19 - Divide Instructions, p. 115
 let Uses = [Y], Defs = [Y] in {
   defm UDIV : F3_12np<"udiv", 0b001110>;
@@ -1221,8 +1294,8 @@ let Predicates = [HasV9] in {
 // the top 32-bits before using it.  To do this clearing, we use a SRLri X,0.
 let rs1 = 0 in
   def POPCrr : F3_1<2, 0b101110,
-                    (outs IntRegs:$dst), (ins IntRegs:$src),
-                    "popc $src, $dst", []>, Requires<[HasV9]>;
+                    (outs IntRegs:$rd), (ins IntRegs:$rs2),
+                    "popc $rs2, $rd", []>, Requires<[HasV9]>;
 def : Pat<(ctpop i32:$src),
           (POPCrr (SRLri $src, 0))>;
 
@@ -1254,6 +1327,25 @@ let hasSideEffects = 1 in {
 }
 }
 
+
+// Section A.43 - Read Privileged Register Instructions
+let Predicates = [HasV9] in {
+let rs2 = 0 in
+  def RDPR : F3_1<2, 0b101010,
+                 (outs IntRegs:$rd), (ins PRRegs:$rs1),
+                 "rdpr $rs1, $rd", []>;
+}
+
+// Section A.62 - Write Privileged Register Instructions
+let Predicates = [HasV9] in {
+  def WRPRrr : F3_1<2, 0b110010,
+                   (outs PRRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2),
+                   "wrpr $rs1, $rs2, $rd", []>;
+  def WRPRri : F3_2<2, 0b110010,
+                   (outs PRRegs:$rd), (ins IntRegs:$rs1, simm13Op:$simm13),
+                   "wrpr $rs1, $simm13, $rd", []>;
+}
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
@@ -1327,6 +1419,18 @@ def : Pat<(i32 (atomic_load ADDRri:$src)), (LDri ADDRri:$src)>;
 def : Pat<(atomic_store ADDRrr:$dst, i32:$val), (STrr ADDRrr:$dst, $val)>;
 def : Pat<(atomic_store ADDRri:$dst, i32:$val), (STri ADDRri:$dst, $val)>;
 
+// extract_vector
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 0),
+          (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_even))>;
+def : Pat<(extractelt (v2i32 IntPair:$Rn), 1),
+          (i32 (EXTRACT_SUBREG IntPair:$Rn, sub_odd))>;
+
+// build_vector
+def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)),
+          (INSERT_SUBREG
+	    (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even),
+            (i32 IntRegs:$a2), sub_odd)>;
+
 
 include "SparcInstr64Bit.td"
 include "SparcInstrVIS.td"
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9667bc059f18..da31783ba248 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -75,6 +75,18 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(SP::G6);
   Reserved.set(SP::G7);
 
+  // Also reserve the register pair aliases covering the above
+  // registers, with the same conditions.
+  Reserved.set(SP::G0_G1);
+  if (ReserveAppRegisters)
+    Reserved.set(SP::G2_G3);
+  if (ReserveAppRegisters || !Subtarget.is64Bit())
+    Reserved.set(SP::G4_G5);
+
+  Reserved.set(SP::O6_O7);
+  Reserved.set(SP::I6_I7);
+  Reserved.set(SP::G6_G7);
+
   // Unaliased double registers are not available in non-V9 targets.
   if (!Subtarget.isV9()) {
     for (unsigned n = 0; n != 16; ++n) {
@@ -158,21 +170,15 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineInstr &MI = *II;
   DebugLoc dl = MI.getDebugLoc();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-
-  // Addressable stack objects are accessed using neg. offsets from %fp
   MachineFunction &MF = *MI.getParent()->getParent();
   const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
-  int64_t Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-                   MI.getOperand(FIOperandNum + 1).getImm() +
-                   Subtarget.getStackPointerBias();
-  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
-  unsigned FramePtr = SP::I6;
-  if (FuncInfo->isLeafProc()) {
-    // Use %sp and adjust offset if needed.
-    FramePtr = SP::O6;
-    int stackSize = MF.getFrameInfo()->getStackSize();
-    Offset += (stackSize) ? Subtarget.getAdjustedFrameSize(stackSize) : 0 ;
-  }
+  const SparcFrameLowering *TFI = getFrameLowering(MF);
+
+  unsigned FrameReg;
+  int Offset;
+  Offset = TFI->getFrameIndexReference(MF, FrameIndex, FrameReg);
+
+  Offset += MI.getOperand(FIOperandNum + 1).getImm();
 
   if (!Subtarget.isV9() || !Subtarget.hasHardQuad()) {
     if (MI.getOpcode() == SP::STQFri) {
@@ -182,8 +188,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       unsigned SrcOddReg  = getSubReg(SrcReg, SP::sub_odd64);
       MachineInstr *StMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
-        .addReg(FramePtr).addImm(0).addReg(SrcEvenReg);
-      replaceFI(MF, II, *StMI, dl, 0, Offset, FramePtr);
+        .addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
+      replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
       MI.setDesc(TII.get(SP::STDFri));
       MI.getOperand(2).setReg(SrcOddReg);
       Offset += 8;
@@ -194,8 +200,8 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       unsigned DestOddReg  = getSubReg(DestReg, SP::sub_odd64);
       MachineInstr *StMI =
         BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
-        .addReg(FramePtr).addImm(0);
-      replaceFI(MF, II, *StMI, dl, 1, Offset, FramePtr);
+        .addReg(FrameReg).addImm(0);
+      replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
 
       MI.setDesc(TII.get(SP::LDDFri));
       MI.getOperand(0).setReg(DestOddReg);
@@ -203,7 +209,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
   }
 
-  replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FramePtr);
+  replaceFI(MF, II, MI, dl, FIOperandNum, Offset, FrameReg);
 
 }
 
@@ -211,3 +217,25 @@ unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return SP::I6;
 }
 
+// Sparc has no architectural need for stack realignment support,
+// except that LLVM unfortunately currently implements overaligned
+// stack objects by depending upon stack realignment support.
+// If that ever changes, this can probably be deleted.
+bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  if (!TargetRegisterInfo::canRealignStack(MF))
+    return false;
+
+  // Sparc always has a fixed frame pointer register, so don't need to
+  // worry about needing to reserve it. [even if we don't have a frame
+  // pointer for our frame, it still cannot be used for other things,
+  // or register window traps will be SADNESS.]
+
+  // If there's a reserved call frame, we can use SP to access locals.
+  if (getFrameLowering(MF)->hasReservedCallFrame(MF))
+    return true;
+
+  // Otherwise, we'd need a base pointer, but those aren't implemented
+  // for SPARC at the moment.
+
+  return false;
+}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 764a894fe9a3..32075b1df410 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -42,8 +42,10 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
   void processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                        RegScavenger *RS = nullptr) const;
 
-  // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+  bool canRealignStack(const MachineFunction &MF) const override;
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index db8a7e86962d..cca9463562a4 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -32,6 +32,12 @@ def sub_odd64  : SubRegIndex<64, 64>;
 // Ri - 32-bit integer registers
 class Ri<bits<16> Enc, string n> : SparcReg<Enc, n>;
 
+// Rdi - pairs of 32-bit integer registers
+class Rdi<bits<16> Enc, string n, list<Register> subregs> : SparcReg<Enc, n> {
+  let SubRegs = subregs;
+  let SubRegIndices = [sub_even, sub_odd];
+  let CoveredBySubRegs = 1;
+}
 // Rf - 32-bit floating-point registers
 class Rf<bits<16> Enc, string n> : SparcReg<Enc, n>;
 
@@ -54,6 +60,8 @@ def ICC : SparcCtrlReg<0, "ICC">; // This represents icc and xcc in 64-bit code.
 foreach I = 0-3 in
   def FCC#I : SparcCtrlReg<I, "FCC"#I>;
 
+def FSR : SparcCtrlReg<0, "FSR">; // Floating-point state register.
+
 // Y register
 def Y : SparcCtrlReg<0, "Y">, DwarfRegNum<[64]>;
 // Ancillary state registers (implementation defined)
@@ -94,6 +102,22 @@ def PSR : SparcCtrlReg<0, "PSR">;
 def WIM : SparcCtrlReg<0, "WIM">;
 def TBR : SparcCtrlReg<0, "TBR">;
 
+def TPC : SparcCtrlReg<0, "TPC">;
+def TNPC : SparcCtrlReg<1, "TNPC">;
+def TSTATE : SparcCtrlReg<2, "TSTATE">;
+def TT : SparcCtrlReg<3, "TT">;
+def TICK : SparcCtrlReg<4, "TICK">;
+def TBA : SparcCtrlReg<5, "TBA">;
+def PSTATE : SparcCtrlReg<6, "PSTATE">;
+def TL : SparcCtrlReg<7, "TL">;
+def PIL : SparcCtrlReg<8, "PIL">;
+def CWP : SparcCtrlReg<9, "CWP">;
+def CANSAVE : SparcCtrlReg<10, "CANSAVE">;
+def CANRESTORE : SparcCtrlReg<11, "CANRESTORE">;
+def CLEANWIN : SparcCtrlReg<12, "CLEANWIN">;
+def OTHERWIN : SparcCtrlReg<13, "OTHERWIN">;
+def WSTATE : SparcCtrlReg<14, "WSTATE">;
+
 // Integer registers
 def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
 def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
@@ -217,6 +241,24 @@ def Q13 : Rq<21, "F52", [D26, D27]>;
 def Q14 : Rq<25, "F56", [D28, D29]>;
 def Q15 : Rq<29, "F60", [D30, D31]>;
 
+// Aliases of the integer registers used for LDD/STD double-word operations
+def G0_G1 : Rdi<0, "G0", [G0, G1]>;
+def G2_G3 : Rdi<2, "G2", [G2, G3]>;
+def G4_G5 : Rdi<4, "G4", [G4, G5]>;
+def G6_G7 : Rdi<6, "G6", [G6, G7]>;
+def O0_O1 : Rdi<8, "O0", [O0, O1]>;
+def O2_O3 : Rdi<10, "O2", [O2, O3]>;
+def O4_O5 : Rdi<12, "O4", [O4, O5]>;
+def O6_O7 : Rdi<14, "O6", [O6, O7]>;
+def L0_L1 : Rdi<16, "L0", [L0, L1]>;
+def L2_L3 : Rdi<18, "L2", [L2, L3]>;
+def L4_L5 : Rdi<20, "L4", [L4, L5]>;
+def L6_L7 : Rdi<22, "L6", [L6, L7]>;
+def I0_I1 : Rdi<24, "I0", [I0, I1]>;
+def I2_I3 : Rdi<26, "I2", [I2, I3]>;
+def I4_I5 : Rdi<28, "I4", [I4, I5]>;
+def I6_I7 : Rdi<30, "I6", [I6, I7]>;
+
 // Register classes.
 //
 // FIXME: the register order should be defined in terms of the preferred
@@ -231,6 +273,13 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
                                  (sequence "L%u", 0, 7),
                                  (sequence "O%u", 0, 7))>;
 
+// Should be in the same order as IntRegs.
+def IntPair : RegisterClass<"SP", [v2i32], 64,
+    (add I0_I1, I2_I3, I4_I5, I6_I7,
+         G0_G1, G2_G3, G4_G5, G6_G7,
+         L0_L1, L2_L3, L4_L5, L6_L7,
+         O0_O1, O2_O3, O4_O5, O6_O7)>;
+
 // Register class for 64-bit mode, with a 64-bit spill slot size.
 // These are the same as the 32-bit registers, so TableGen will consider this
 // to be a sub-class of IntRegs. That works out because requiring a 64-bit
@@ -252,3 +301,8 @@ def ASRRegs : RegisterClass<"SP", [i32], 32,
                             (add Y, (sequence "ASR%u", 1, 31))> {
   let isAllocatable = 0;
 }
+
+// Privileged Registers
+def PRRegs : RegisterClass<"SP", [i64], 64,
+    (add TPC, TNPC, TSTATE, TT, TICK, TBA, PSTATE, TL, PIL, CWP,
+         CANSAVE, CANRESTORE, CLEANWIN, OTHERWIN, WSTATE)>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index d69da409e428..d701594d27af 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -64,7 +64,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
     frameSize += 128;
     // Frames with calls must also reserve space for 6 outgoing arguments
     // whether they are used or not. LowerCall_64 takes care of that.
-    assert(frameSize % 16 == 0 && "Stack size not 16-byte aligned");
+    frameSize = RoundUpToAlignment(frameSize, 16);
   } else {
     // Emit the correct save instruction based on the number of bytes in
     // the frame. Minimum stack frame size according to V8 ABI is:
@@ -81,3 +81,7 @@ int SparcSubtarget::getAdjustedFrameSize(int frameSize) const {
   }
   return frameSize;
 }
+
+bool SparcSubtarget::enableMachineScheduler() const {
+  return true;
+}
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index 9d21911d88f0..e2fd2f04528a 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -60,6 +60,8 @@ public:
     return &TSInfo;
   }
 
+  bool enableMachineScheduler() const override;
+
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
   bool isVIS2() const { return IsVIS2; }
@@ -85,7 +87,6 @@ public:
   /// returns adjusted framesize which includes space for register window
   /// spills and arguments.
   int getAdjustedFrameSize(int stackSize) const;
-
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 3aa4c6bd32d6..9c995bf42b0b 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -349,7 +349,6 @@ class SystemZAsmParser : public MCTargetAsmParser {
 #include "SystemZGenAsmMatcher.inc"
 
 private:
-  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
   enum RegisterGroup {
     RegGR,
@@ -386,14 +385,14 @@ private:
   bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
 
 public:
-  SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
+  SystemZAsmParser(const MCSubtargetInfo &sti, MCAsmParser &parser,
                    const MCInstrInfo &MII,
                    const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(sti), Parser(parser) {
+    : MCTargetAsmParser(Options, sti), Parser(parser) {
     MCAsmParserExtension::Initialize(Parser);
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
   }
 
   // Override MCTargetAsmParser.
@@ -533,14 +532,16 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
 }
 
 // Parse a register of group Group.  If Regs is nonnull, use it to map
-// the raw register number to LLVM numbering, with zero entries indicating
-// an invalid register.  IsAddress says whether the register appears in an
-// address context.
+// the raw register number to LLVM numbering, with zero entries
+// indicating an invalid register.  IsAddress says whether the
+// register appears in an address context. Allow FP Group if expecting
+// RegV Group, since the f-prefix yields the FP group even while used
+// with vector instructions.
 bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
                                      const unsigned *Regs, bool IsAddress) {
   if (parseRegister(Reg))
     return true;
-  if (Reg.Group != Group)
+  if (Reg.Group != Group && !(Reg.Group == RegFP && Group == RegV))
     return Error(Reg.StartLoc, "invalid operand for instruction");
   if (Regs && Regs[Reg.Num] == 0)
     return Error(Reg.StartLoc, "invalid register pair");
@@ -791,7 +792,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   switch (MatchResult) {
   case Match_Success:
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, STI);
+    Out.EmitInstruction(Inst, getSTI());
     return false;
 
   case Match_MissingFeature: {
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 059ae3f7fb09..6444cf8e464d 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -60,15 +60,15 @@ void SystemZInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
   O << '%' << getRegisterName(RegNo);
 }
 
-template<unsigned N>
-void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
   int64_t Value = MI->getOperand(OpNum).getImm();
   assert(isUInt<N>(Value) && "Invalid uimm argument");
   O << Value;
 }
 
-template<unsigned N>
-void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
+template <unsigned N>
+static void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
   int64_t Value = MI->getOperand(OpNum).getImm();
   assert(isInt<N>(Value) && "Invalid simm argument");
   O << Value;
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index ba55e686f3ef..7ca386fc4cb9 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -15,7 +15,6 @@
 #define LLVM_LIB_TARGET_SYSTEMZ_INSTPRINTER_SYSTEMZINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/Compiler.h"
 
 namespace llvm {
 class MCOperand;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 5fefa315a4cf..2115d4480eef 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -226,7 +226,7 @@ extern "C" void LLVMInitializeSystemZTargetMC() {
 
   // Register the MCCodeEmitter.
   TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
-					createSystemZMCCodeEmitter);
+                                        createSystemZMCCodeEmitter);
 
   // Register the MCInstrInfo.
   TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt
index e089047d013e..cd367d60bab7 100644
--- a/lib/Target/SystemZ/README.txt
+++ b/lib/Target/SystemZ/README.txt
@@ -52,12 +52,6 @@ We don't use the TEST DATA CLASS instructions.
 
 --
 
-We could use the generic floating-point forms of LOAD COMPLEMENT,
-LOAD NEGATIVE and LOAD POSITIVE in cases where we don't need the
-condition codes.  For example, we could use LCDFR instead of LCDBR.
-
---
-
 We only use MVC, XC and CLC for constant-length block operations.
 We could extend them to variable-length operations too,
 using EXECUTE RELATIVE LONG.
diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3dca7bd89f05..75273114d62f 100644
--- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -288,7 +288,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
     MCSymbolRefExpr::create(getSymbol(ZCPV->getGlobalValue()),
                             getModifierVariantKind(ZCPV->getModifier()),
                             OutContext);
-  uint64_t Size = TM.getDataLayout()->getTypeAllocSize(ZCPV->getType());
+  uint64_t Size = getDataLayout().getTypeAllocSize(ZCPV->getType());
 
   OutStreamer->EmitValue(Expr, Size);
 }
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
index 44ea1d25f08e..4a6beb67f182 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
@@ -26,21 +26,6 @@ SystemZConstantPoolValue::Create(const GlobalValue *GV,
   return new SystemZConstantPoolValue(GV, Modifier);
 }
 
-unsigned SystemZConstantPoolValue::getRelocationInfo() const {
-  switch (Modifier) {
-  case SystemZCP::TLSGD:
-  case SystemZCP::TLSLDM:
-  case SystemZCP::DTPOFF:
-    // May require a dynamic relocation.
-    return 2;
-  case SystemZCP::NTPOFF:
-    // May require a relocation, but the relocations are always resolved
-    // by the static linker.
-    return 1;
-  }
-  llvm_unreachable("Unknown modifier");
-}
-
 int SystemZConstantPoolValue::
 getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) {
   unsigned AlignMask = Alignment - 1;
diff --git a/lib/Target/SystemZ/SystemZConstantPoolValue.h b/lib/Target/SystemZ/SystemZConstantPoolValue.h
index e5f1bb18581b..a71b595560d2 100644
--- a/lib/Target/SystemZ/SystemZConstantPoolValue.h
+++ b/lib/Target/SystemZ/SystemZConstantPoolValue.h
@@ -43,7 +43,6 @@ public:
     Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier);
 
   // Override MachineConstantPoolValue.
-  unsigned getRelocationInfo() const override;
   int getExistingMachineCPValue(MachineConstantPool *CP,
                                 unsigned Alignment) override;
   void addSelectionDAGCSEId(FoldingSetNodeID &ID) override;
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index 16f9adc79f17..4818ed015522 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -37,13 +37,11 @@ namespace {
 // instructions.
 struct Reference {
   Reference()
-    : Def(false), Use(false), IndirectDef(false), IndirectUse(false) {}
+    : Def(false), Use(false) {}
 
   Reference &operator|=(const Reference &Other) {
     Def |= Other.Def;
-    IndirectDef |= Other.IndirectDef;
     Use |= Other.Use;
-    IndirectUse |= Other.IndirectUse;
     return *this;
   }
 
@@ -53,11 +51,6 @@ struct Reference {
   // via a sub- or super-register.
   bool Def;
   bool Use;
-
-  // True if the register is defined or used indirectly, by a sub- or
-  // super-register.
-  bool IndirectDef;
-  bool IndirectUse;
 };
 
 class SystemZElimCompare : public MachineFunctionPass {
@@ -104,14 +97,12 @@ static bool isCCLiveOut(MachineBasicBlock &MBB) {
   return false;
 }
 
-// Return true if any CC result of MI would reflect the value of subreg
-// SubReg of Reg.
-static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
+// Return true if any CC result of MI would reflect the value of Reg.
+static bool resultTests(MachineInstr *MI, unsigned Reg) {
   if (MI->getNumOperands() > 0 &&
       MI->getOperand(0).isReg() &&
       MI->getOperand(0).isDef() &&
-      MI->getOperand(0).getReg() == Reg &&
-      MI->getOperand(0).getSubReg() == SubReg)
+      MI->getOperand(0).getReg() == Reg)
     return true;
 
   switch (MI->getOpcode()) {
@@ -127,30 +118,25 @@ static bool resultTests(MachineInstr *MI, unsigned Reg, unsigned SubReg) {
   case SystemZ::LTEBR:
   case SystemZ::LTDBR:
   case SystemZ::LTXBR:
-    if (MI->getOperand(1).getReg() == Reg &&
-        MI->getOperand(1).getSubReg() == SubReg)
+    if (MI->getOperand(1).getReg() == Reg)
       return true;
   }
 
   return false;
 }
 
-// Describe the references to Reg in MI, including sub- and super-registers.
+// Describe the references to Reg or any of its aliases in MI.
 Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
   Reference Ref;
   for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
     const MachineOperand &MO = MI->getOperand(I);
     if (MO.isReg()) {
       if (unsigned MOReg = MO.getReg()) {
-        if (MOReg == Reg || TRI->regsOverlap(MOReg, Reg)) {
-          if (MO.isUse()) {
+        if (TRI->regsOverlap(MOReg, Reg)) {
+          if (MO.isUse())
             Ref.Use = true;
-            Ref.IndirectUse |= (MOReg != Reg);
-          }
-          if (MO.isDef()) {
+          else if (MO.isDef())
             Ref.Def = true;
-            Ref.IndirectDef |= (MOReg != Reg);
-          }
         }
       }
     }
@@ -158,6 +144,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
   return Ref;
 }
 
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+  // If we during isel used a load-and-test as a compare with 0, the
+  // def operand is dead.
+  return ((MI->getOpcode() == SystemZ::LTEBR ||
+           MI->getOpcode() == SystemZ::LTDBR ||
+           MI->getOpcode() == SystemZ::LTXBR) &&
+          MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+  unsigned reg = 0;
+  if (Compare->isCompare())
+    reg = Compare->getOperand(0).getReg();
+  else if (isLoadAndTestAsCmp(Compare))
+    reg = Compare->getOperand(1).getReg();
+  assert (reg);
+
+  return reg;
+}
+
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
 // and convert the branch to a BRCT(G).  Return true on success.
@@ -188,7 +198,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
   // We already know that there are no references to the register between
   // MI and Compare.  Make sure that there are also no references between
   // Compare and Branch.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
     if (getRegReferences(MBBI, SrcReg))
@@ -196,16 +206,15 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
 
   // The transformation is OK.  Rebuild Branch as a BRCT(G).
   MachineOperand Target(Branch->getOperand(2));
-  Branch->RemoveOperand(2);
-  Branch->RemoveOperand(1);
-  Branch->RemoveOperand(0);
+  while (Branch->getNumOperands())
+    Branch->RemoveOperand(0);
   Branch->setDesc(TII->get(BRCT));
   MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
     .addOperand(MI->getOperand(0))
     .addOperand(MI->getOperand(1))
     .addOperand(Target)
     .addReg(SystemZ::CC, RegState::ImplicitDefine);
-  MI->removeFromParent();
+  MI->eraseFromParent();
   return true;
 }
 
@@ -308,6 +317,10 @@ static bool isCompareZero(MachineInstr *Compare) {
     return true;
 
   default:
+
+    if (isLoadAndTestAsCmp(Compare))
+      return true;
+
     return (Compare->getNumExplicitOperands() == 2 &&
             Compare->getOperand(1).isImm() &&
             Compare->getOperand(1).getImm() == 0);
@@ -325,8 +338,7 @@ optimizeCompareZero(MachineInstr *Compare,
     return false;
 
   // Search back for CC results that are based on the first operand.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
-  unsigned SrcSubReg = Compare->getOperand(0).getSubReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock &MBB = *Compare->getParent();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
   Reference CCRefs;
@@ -334,7 +346,7 @@ optimizeCompareZero(MachineInstr *Compare,
   while (MBBI != MBBE) {
     --MBBI;
     MachineInstr *MI = MBBI;
-    if (resultTests(MI, SrcReg, SrcSubReg)) {
+    if (resultTests(MI, SrcReg)) {
       // Try to remove both MI and Compare by converting a branch to BRCT(G).
       // We don't care in this case whether CC is modified between MI and
       // Compare.
@@ -435,23 +447,21 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
   while (MBBI != MBB.begin()) {
     MachineInstr *MI = --MBBI;
     if (CompleteCCUsers &&
-        MI->isCompare() &&
+        (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
         (optimizeCompareZero(MI, CCUsers) ||
          fuseCompareAndBranch(MI, CCUsers))) {
       ++MBBI;
-      MI->removeFromParent();
+      MI->eraseFromParent();
       Changed = true;
       CCUsers.clear();
-      CompleteCCUsers = true;
       continue;
     }
 
-    Reference CCRefs(getRegReferences(MI, SystemZ::CC));
-    if (CCRefs.Def) {
+    if (MI->definesRegister(SystemZ::CC)) {
       CCUsers.clear();
-      CompleteCCUsers = !CCRefs.IndirectDef;
+      CompleteCCUsers = true;
     }
-    if (CompleteCCUsers && CCRefs.Use)
+    if (MI->readsRegister(SystemZ::CC) && CompleteCCUsers)
       CCUsers.push_back(MI);
   }
   return Changed;
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 397de472a6ee..e1b20d0536d1 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -48,7 +48,8 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = {
 
 SystemZFrameLowering::SystemZFrameLowering()
     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8,
-                          -SystemZMC::CallFrameSize, 8) {
+                          -SystemZMC::CallFrameSize, 8,
+                          false /* StackRealignable */) {
   // Create a mapping from register number to save slot offset.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
   for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I)
@@ -133,7 +134,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
   bool IsVarArg = MF.getFunction()->isVarArg();
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  DebugLoc DL;
 
   // Scan the call-saved GPRs and find the bounds of the register spill area.
   unsigned LowGPR = 0;
@@ -322,7 +323,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
   const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
   bool HasFP = hasFP(MF);
-  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
+  DebugLoc DL;
 
   // The current offset of the stack pointer from the CFA.
   int64_t SPOffsetFromCFA = -SystemZMC::CFAOffsetFromInitialSP;
@@ -394,7 +398,10 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
 
       // Add CFI for the this save.
       unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-      int64_t Offset = getFrameIndexOffset(MF, Save.getFrameIdx());
+      unsigned IgnoredFrameReg;
+      int64_t Offset =
+          getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
+
       unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
           nullptr, DwarfReg, SPOffsetFromCFA + Offset));
       CFIIndexes.push_back(CFIIndex);
@@ -455,9 +462,14 @@ bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
           MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
 }
 
-int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                              int FI) const {
+int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                                 int FI,
+                                                 unsigned &FrameReg) const {
   const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+
+  // Fill in FrameReg output argument.
+  FrameReg = RI->getFrameRegister(MF);
 
   // Start with the offset of FI from the top of the caller-allocated frame
   // (i.e. the top of the 160 bytes allocated by the caller).  This initial
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h
index 5ade757f17f7..46bb6b7a7573 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.h
+++ b/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -43,7 +43,8 @@ public:
   void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
   bool hasFP(const MachineFunction &MF) const override;
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
   void eliminateCallFramePseudoInstr(MachineFunction &MF,
                                      MachineBasicBlock &MBB,
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75fd37f01a19..a9093094d884 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -585,7 +585,7 @@ bool SystemZDAGToDAGISel::selectAddress(SDValue Addr,
 static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) {
   if (N.getNode()->getNodeId() == -1 ||
       N.getNode()->getNodeId() > Pos->getNodeId()) {
-    DAG->RepositionNode(Pos, N.getNode());
+    DAG->RepositionNode(Pos->getIterator(), N.getNode());
     N.getNode()->setNodeId(Pos->getNodeId());
   }
 }
@@ -801,7 +801,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
     RxSBG.Input = N.getOperand(0);
     return true;
   }
-      
+
   case ISD::ANY_EXTEND:
     // Bits above the extended operand are don't-care.
     RxSBG.Input = N.getOperand(0);
@@ -818,7 +818,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       return true;
     }
     // Fall through.
-    
+
   case ISD::SIGN_EXTEND: {
     // Check that the extension bits are don't-care (i.e. are masked out
     // by the final mask).
@@ -938,7 +938,23 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
       }
       return nullptr;
     }
-  }  
+  }
+
+  // If the RISBG operands require no rotation and just masks the bottom
+  // 8/16 bits, attempt to convert this to a LLC zero extension.
+  if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) {
+    unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR);
+    if (VT == MVT::i32) {
+      if (Subtarget->hasHighWord())
+        OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux);
+      else
+        OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR);
+    }
+
+    SDValue In = convertTo(DL, VT, RISBG.Input);
+    N = CurDAG->getMachineNode(OpCode, DL, VT, In);
+    return convertTo(DL, VT, SDValue(N, 0)).getNode();
+  }
 
   unsigned Opcode = SystemZ::RISBG;
   // Prefer RISBGN if available, since it does not clobber CC.
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 9a753c897519..ee732675fb39 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -84,8 +84,7 @@ static MachineOperand earlyUseOperand(MachineOperand Op) {
 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
                                              const SystemZSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
-  auto &DL = *TM.getDataLayout();
-  MVT PtrVT = getPointerTy(DL);
+  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
 
   // Set up the register classes.
   if (Subtarget.hasHighWord())
@@ -115,8 +114,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   computeRegisterProperties(Subtarget.getRegisterInfo());
 
   // Set up special registers.
-  setExceptionPointerRegister(SystemZ::R6D);
-  setExceptionSelectorRegister(SystemZ::R7D);
   setStackPointerRegisterToSaveRestore(SystemZ::R15D);
 
   // TODO: It may be better to default to latency-oriented scheduling, however
@@ -370,7 +367,9 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
       // No special instructions for these.
       setOperationAction(ISD::FSIN, VT, Expand);
       setOperationAction(ISD::FCOS, VT, Expand);
+      setOperationAction(ISD::FSINCOS, VT, Expand);
       setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::FPOW, VT, Expand);
     }
   }
 
@@ -776,9 +775,7 @@ bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
 }
 
 bool SystemZTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
-  if (!CI->isTailCall())
-    return false;
-  return true;
+  return CI->isTailCall();
 }
 
 // We do not yet support 128-bit single-element vector types.  If the user
@@ -939,8 +936,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
         FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
                           DAG.getIntPtrConstant(4, DL));
       ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
-                             MachinePointerInfo::getFixedStack(FI),
-                             false, false, false, 0);
+                             MachinePointerInfo::getFixedStack(MF, FI), false,
+                             false, false, 0);
     }
 
     // Convert the value of the argument register into the value that's
@@ -976,9 +973,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
                                      &SystemZ::FP64BitRegClass);
         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
         MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
-                                 MachinePointerInfo::getFixedStack(FI),
+                                 MachinePointerInfo::getFixedStack(MF, FI),
                                  false, false, 0);
-
       }
       // Join the stores, which are independent of one another.
       Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
@@ -1060,9 +1056,9 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
       // Store the argument in a stack slot and pass its address.
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-      MemOpChains.push_back(DAG.getStore(Chain, DL, ArgValue, SpillSlot,
-                                         MachinePointerInfo::getFixedStack(FI),
-                                         false, false, 0));
+      MemOpChains.push_back(DAG.getStore(
+          Chain, DL, ArgValue, SpillSlot,
+          MachinePointerInfo::getFixedStack(MF, FI), false, false, 0));
       ArgValue = SpillSlot;
     } else
       ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
@@ -1607,8 +1603,8 @@ static void adjustSubwordCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
   } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
     if (Value > Mask)
       return;
-    assert(C.ICmpType == SystemZICMP::Any &&
-           "Signedness shouldn't matter here.");
+    // If the constant is in range, we can use any comparison.
+    C.ICmpType = SystemZICMP::Any;
   } else
     return;
 
@@ -2439,7 +2435,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
     Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
     Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
   }
 
   // If there was a non-zero offset that we didn't fold, create an explicit
@@ -2499,7 +2496,9 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
 }
 
 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
-						     SelectionDAG &DAG) const {
+                                                     SelectionDAG &DAG) const {
+  if (DAG.getTarget().Options.EmulatedTLS)
+    return LowerToTLSEmulatedModel(Node, DAG);
   SDLoc DL(Node);
   const GlobalValue *GV = Node->getGlobal();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2529,9 +2528,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
 
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-                           Offset, MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Offset = DAG.getLoad(
+          PtrVT, DL, DAG.getEntryNode(), Offset,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
 
       // Call __tls_get_offset to retrieve the offset.
       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
@@ -2544,9 +2544,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
         SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
 
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-                           Offset, MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Offset = DAG.getLoad(
+          PtrVT, DL, DAG.getEntryNode(), Offset,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
 
       // Call __tls_get_offset to retrieve the module base offset.
       Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
@@ -2562,9 +2563,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
 
       SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
-      DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-                              DTPOffset, MachinePointerInfo::getConstantPool(),
-                              false, false, false, 0);
+      DTPOffset = DAG.getLoad(
+          PtrVT, DL, DAG.getEntryNode(), DTPOffset,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
 
       Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
       break;
@@ -2575,8 +2577,8 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
       Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
                                           SystemZII::MO_INDNTPOFF);
       Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
-      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-                           Offset, MachinePointerInfo::getGOT(),
+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
+                           MachinePointerInfo::getGOT(DAG.getMachineFunction()),
                            false, false, false, 0);
       break;
     }
@@ -2587,9 +2589,10 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
         SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
 
       Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-                           Offset, MachinePointerInfo::getConstantPool(),
-                           false, false, false, 0);
+      Offset = DAG.getLoad(
+          PtrVT, DL, DAG.getEntryNode(), Offset,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, 0);
       break;
     }
   }
@@ -2628,10 +2631,10 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
   SDValue Result;
   if (CP->isMachineConstantPoolEntry())
     Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
-				       CP->getAlignment());
+                                       CP->getAlignment());
   else
     Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
-				       CP->getAlignment(), CP->getOffset());
+                                       CP->getAlignment(), CP->getOffset());
 
   // Use LARL to load the address of the constant pool entry.
   return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
@@ -2736,17 +2739,37 @@ SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
 
 SDValue SystemZTargetLowering::
 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
+  bool RealignOpt = !DAG.getMachineFunction().getFunction()->
+    hasFnAttribute("no-realign-stack");
+
   SDValue Chain = Op.getOperand(0);
   SDValue Size  = Op.getOperand(1);
+  SDValue Align = Op.getOperand(2);
   SDLoc DL(Op);
 
+  // If user has set the no alignment function attribute, ignore
+  // alloca alignments.
+  uint64_t AlignVal = (RealignOpt ?
+                       dyn_cast<ConstantSDNode>(Align)->getZExtValue() : 0);
+
+  uint64_t StackAlign = TFI->getStackAlignment();
+  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
+  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
+
   unsigned SPReg = getStackPointerRegisterToSaveRestore();
+  SDValue NeededSpace = Size;
 
   // Get a reference to the stack pointer.
   SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
 
+  // Add extra space for alignment if needed.
+  if (ExtraAlignSpace)
+    NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
+                              DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 
+
   // Get the new stack pointer value.
-  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, Size);
+  SDValue NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
 
   // Copy the new stack pointer back.
   Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
@@ -2757,6 +2780,16 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
   SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
   SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
 
+  // Dynamically realign if needed.
+  if (RequiredAlign > StackAlign) {
+    Result =
+      DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
+                  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
+    Result =
+      DAG.getNode(ISD::AND, DL, MVT::i64, Result,
+                  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
+  }
+
   SDValue Ops[2] = { Result, Chain };
   return DAG.getMergeValues(Ops, DL);
 }
@@ -2837,7 +2870,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
   } else if (DAG.ComputeNumSignBits(Op1) > 32) {
     Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
     Opcode = SystemZISD::SDIVREM32;
-  } else    
+  } else
     Opcode = SystemZISD::SDIVREM64;
 
   // DSG(F) takes a 64-bit dividend, so the even register in the GR128
@@ -3247,8 +3280,8 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
     if (Op->getNumValues() == 1)
       return CC;
     assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
-    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
-		    Glued, CC);
+    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), Glued,
+                       CC);
   }
 
   unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -3890,7 +3923,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
       GS.addUndef();
     } else {
       GS.add(SDValue(), ResidueOps.size());
-      ResidueOps.push_back(Op);
+      ResidueOps.push_back(BVN->getOperand(I));
     }
   }
 
@@ -3901,7 +3934,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
   // Create the BUILD_VECTOR for the remaining elements, if any.
   if (!ResidueOps.empty()) {
     while (ResidueOps.size() < NumElements)
-      ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
+      ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
     for (auto &Op : GS.Ops) {
       if (!Op.getNode()) {
         Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
@@ -4204,7 +4237,7 @@ SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
 
 SDValue
 SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-					      unsigned UnpackHigh) const {
+                                              unsigned UnpackHigh) const {
   SDValue PackedOp = Op.getOperand(0);
   EVT OutVT = Op.getValueType();
   EVT InVT = PackedOp.getValueType();
@@ -4566,9 +4599,9 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
       }
       return Op;
     } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
-		Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
-		Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
-	       canTreatAsByteVector(Op.getValueType()) &&
+                Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+                Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+               canTreatAsByteVector(Op.getValueType()) &&
                canTreatAsByteVector(Op.getOperand(0).getValueType())) {
       // Make sure that only the unextended bits are significant.
       EVT ExtVT = Op.getValueType();
@@ -4579,14 +4612,14 @@ SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
       unsigned SubByte = Byte % ExtBytesPerElement;
       unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
       if (SubByte < MinSubByte ||
-	  SubByte + BytesPerElement > ExtBytesPerElement)
-	break;
+          SubByte + BytesPerElement > ExtBytesPerElement)
+        break;
       // Get the byte offset of the unextended element
       Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
       // ...then add the byte offset relative to that element.
       Byte += SubByte - MinSubByte;
       if (Byte % BytesPerElement != 0)
-	break;
+        break;
       Op = Op.getOperand(0);
       Index = Byte / BytesPerElement;
       Force = true;
@@ -5611,6 +5644,31 @@ SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
   return MBB;
 }
 
+MachineBasicBlock *
+SystemZTargetLowering::emitLoadAndTestCmp0(MachineInstr *MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned Opcode) const {
+  MachineFunction &MF = *MBB->getParent();
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  const SystemZInstrInfo *TII =
+      static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned SrcReg = MI->getOperand(0).getReg();
+
+  // Create new virtual register of the same class as source.
+  const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+  unsigned DstReg = MRI->createVirtualRegister(RC);
+
+  // Replace pseudo with a normal load-and-test that models the def as
+  // well.
+  BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
+    .addReg(SrcReg);
+  MI->eraseFromParent();
+
+  return MBB;
+}
+
 MachineBasicBlock *SystemZTargetLowering::
 EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
   switch (MI->getOpcode()) {
@@ -5858,6 +5916,13 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
   case SystemZ::TBEGINC:
     return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
+  case SystemZ::LTEBRCompare_VecPseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
+  case SystemZ::LTDBRCompare_VecPseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
+  case SystemZ::LTXBRCompare_VecPseudo:
+    return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
+
   default:
     llvm_unreachable("Unexpected instr type to insert");
   }
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 07ff25144581..391636e5467f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -409,6 +409,20 @@ public:
     return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
   }
 
+  /// If a physical register, this returns the register that receives the
+  /// exception address on entry to an EH pad.
+  unsigned
+  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+    return SystemZ::R6D;
+  }
+
+  /// If a physical register, this returns the register that receives the
+  /// exception typeid on entry to a landing pad.
+  unsigned
+  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+    return SystemZ::R7D;
+  }
+
   MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
                                                  MachineBasicBlock *BB) const
     override;
@@ -481,7 +495,7 @@ private:
   SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-				 unsigned UnpackHigh) const;
+                                 unsigned UnpackHigh) const;
   SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
 
   SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
@@ -530,6 +544,10 @@ private:
                                           MachineBasicBlock *MBB,
                                           unsigned Opcode,
                                           bool NoFloat) const;
+  MachineBasicBlock *emitLoadAndTestCmp0(MachineInstr *MI,
+                                         MachineBasicBlock *MBB,
+                                         unsigned Opcode) const;
+
 };
 } // end namespace llvm
 
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 464f79a3bac9..5a1c874dfa36 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -35,11 +35,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
   if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   int64_t Offset = 0;
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo(
-                              PseudoSourceValue::getFixedStack(FI), Offset),
-                            Flags, MFFrame->getObjectSize(FI),
-                            MFFrame->getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+      MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI));
   return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
 }
 
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 27fbd7df2882..0cb267290cc1 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -46,15 +46,28 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
   defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
 }
-// Note that the comparison against zero operation is not available if we
-// have vector support, since load-and-test instructions will partially
-// clobber the target (vector) register.
+// Note that LTxBRCompare is not available if we have vector support,
+// since load-and-test instructions will partially clobber the target
+// (vector) register.
 let Predicates = [FeatureNoVector] in {
   defm : CompareZeroFP<LTEBRCompare, FP32>;
   defm : CompareZeroFP<LTDBRCompare, FP64>;
   defm : CompareZeroFP<LTXBRCompare, FP128>;
 }
 
+// Use a normal load-and-test for compare against zero in case of
+// vector support (via a pseudo to simplify instruction selection).
+let Defs = [CC], usesCustomInserter = 1 in {
+  def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
+  def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
+  def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
+}
+let Predicates = [FeatureVector] in {
+  defm : CompareZeroFP<LTEBRCompare_VecPseudo, FP32>;
+  defm : CompareZeroFP<LTDBRCompare_VecPseudo, FP64>;
+  defm : CompareZeroFP<LTXBRCompare_VecPseudo, FP128>;
+}
+
 // Moves between 64-bit integer and floating-point registers.
 def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
 def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>;
@@ -238,26 +251,46 @@ let Predicates = [FeatureFPExtension] in {
 // Unary arithmetic
 //===----------------------------------------------------------------------===//
 
+// We prefer generic instructions during isel, because they do not
+// clobber CC and therefore give the scheduler more freedom. In cases
+// the CC is actually useful, the SystemZElimCompare pass will try to
+// convert generic instructions into opcodes that also set CC. Note
+// that lcdf / lpdf / lndf only affect the sign bit, and can therefore
+// be used with fp32 as well. This could be done for fp128, in which
+// case the operands would have to be tied.
+
 // Negation (Load Complement).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LCEBR : UnaryRRE<"lceb", 0xB303, fneg, FP32,  FP32>;
-  def LCDBR : UnaryRRE<"lcdb", 0xB313, fneg, FP64,  FP64>;
+  def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32,  FP32>;
+  def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64,  FP64>;
   def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32,  FP32>;
 
 // Absolute value (Load Positive).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LPEBR : UnaryRRE<"lpeb", 0xB300, fabs, FP32,  FP32>;
-  def LPDBR : UnaryRRE<"lpdb", 0xB310, fabs, FP64,  FP64>;
+  def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32,  FP32>;
+  def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64,  FP64>;
   def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32,  FP32>;
 
 // Negative absolute value (Load Negative).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LNEBR : UnaryRRE<"lneb", 0xB301, fnabs, FP32,  FP32>;
-  def LNDBR : UnaryRRE<"lndb", 0xB311, fnabs, FP64,  FP64>;
+  def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32,  FP32>;
+  def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64,  FP64>;
   def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
 }
+// Generic form, which does not set CC.
+def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64,  FP64>;
+let isCodeGenOnly = 1 in
+  def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32,  FP32>;
 
 // Square root.
 def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32,  FP32>;
@@ -414,6 +447,6 @@ let Defs = [CC], CCValues = 0xF in {
 // Peepholes
 //===----------------------------------------------------------------------===//
 
-def : Pat<(f32  fpimmneg0), (LCEBR (LZER))>;
-def : Pat<(f64  fpimmneg0), (LCDBR (LZDR))>;
+def : Pat<(f32  fpimmneg0), (LCDFR_32 (LZER))>;
+def : Pat<(f64  fpimmneg0), (LCDFR (LZDR))>;
 def : Pat<(f128 fpimmneg0), (LCXBR (LZXR))>;
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 71eb9986499b..01f4cdec05cb 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -2381,6 +2381,7 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
   def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
                    (ins GR64:$R1src, GR64:$R2src),
                    mnemonic#"\t$R1, $R2", []> {
+    let Uses = [R0L];
     let Constraints = "$R1 = $R1src, $R2 = $R2src";
     let DisableEncoding = "$R1src, $R2src";
   }
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5d4a34f7131c..e6b5fc8e6235 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -69,6 +69,11 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
   MachineOperand &LowOffsetOp = MI->getOperand(2);
   LowOffsetOp.setImm(LowOffsetOp.getImm() + 8);
 
+ // Clear the kill flags for the base and index registers in the first
+ // instruction.
+  EarlierMI->getOperand(1).setIsKill(false);
+  EarlierMI->getOperand(3).setIsKill(false);
+
   // Set the opcodes.
   unsigned HighOpcode = getOpcodeForOffset(NewOpcode, HighOffsetOp.getImm());
   unsigned LowOpcode = getOpcodeForOffset(NewOpcode, LowOffsetOp.getImm());
@@ -111,7 +116,7 @@ void SystemZInstrInfo::expandRIPseudo(MachineInstr *MI, unsigned LowOpcode,
 }
 
 // MI is a three-operand RIE-style pseudo instruction.  Replace it with
-// LowOpcode3 if the registers are both low GR32s, otherwise use a move
+// LowOpcodeK if the registers are both low GR32s, otherwise use a move
 // followed by HighOpcode or LowOpcode, depending on whether the target
 // is a high or low GR32.
 void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
@@ -129,6 +134,7 @@ void SystemZInstrInfo::expandRIEPseudo(MachineInstr *MI, unsigned LowOpcode,
                   MI->getOperand(1).isKill());
     MI->setDesc(get(DestIsHigh ? HighOpcode : LowOpcode));
     MI->getOperand(1).setReg(DestReg);
+    MI->tieOperands(0, 1);
   }
 }
 
@@ -486,11 +492,8 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare,
                                        const MachineRegisterInfo *MRI) const {
   assert(!SrcReg2 && "Only optimizing constant comparisons so far");
   bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0;
-  if (Value == 0 &&
-      !IsLogical &&
-      removeIPMBasedCompare(Compare, SrcReg, MRI, &RI))
-    return true;
-  return false;
+  return Value == 0 && !IsLogical &&
+         removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
 }
 
 // If Opcode is a move that has a conditional variant, return that variant,
@@ -505,16 +508,13 @@ static unsigned getConditionalMove(unsigned Opcode) {
 
 bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const {
   unsigned Opcode = MI->getOpcode();
-  if (STI.hasLoadStoreOnCond() &&
-      getConditionalMove(Opcode))
-    return true;
-  return false;
+  return STI.hasLoadStoreOnCond() && getConditionalMove(Opcode);
 }
 
 bool SystemZInstrInfo::
 isProfitableToIfCvt(MachineBasicBlock &MBB,
                     unsigned NumCycles, unsigned ExtraPredCycles,
-                    const BranchProbability &Probability) const {
+                    BranchProbability Probability) const {
   // For now only convert single instructions.
   return NumCycles == 1;
 }
@@ -524,7 +524,7 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
                     unsigned NumCyclesT, unsigned ExtraPredCyclesT,
                     MachineBasicBlock &FMBB,
                     unsigned NumCyclesF, unsigned ExtraPredCyclesF,
-                    const BranchProbability &Probability) const {
+                    BranchProbability Probability) const {
   // For now avoid converting mutually-exclusive cases.
   return false;
 }
@@ -548,11 +548,10 @@ PredicateInstruction(MachineInstr *MI, ArrayRef<MachineOperand> Pred) const {
   return false;
 }
 
-void
-SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
-			      MachineBasicBlock::iterator MBBI, DebugLoc DL,
-			      unsigned DestReg, unsigned SrcReg,
-			      bool KillSrc) const {
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   DebugLoc DL, unsigned DestReg,
+                                   unsigned SrcReg, bool KillSrc) const {
   // Split 128-bit GPR moves into two 64-bit moves.  This handles ADDR128 too.
   if (SystemZ::GR128BitRegClass.contains(DestReg, SrcReg)) {
     copyPhysReg(MBB, MBBI, DL, RI.getSubReg(DestReg, SystemZ::subreg_h64),
@@ -590,13 +589,10 @@ SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
-void
-SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-				      MachineBasicBlock::iterator MBBI,
-				      unsigned SrcReg, bool isKill,
-				      int FrameIdx,
-				      const TargetRegisterClass *RC,
-				      const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::storeRegToStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+    bool isKill, int FrameIdx, const TargetRegisterClass *RC,
+    const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Callers may expect a single instruction, so keep 128-bit moves
@@ -604,15 +600,14 @@ SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   unsigned LoadOpcode, StoreOpcode;
   getLoadStoreOpcodes(RC, LoadOpcode, StoreOpcode);
   addFrameReference(BuildMI(MBB, MBBI, DL, get(StoreOpcode))
-		    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+                        .addReg(SrcReg, getKillRegState(isKill)),
+                    FrameIdx);
 }
 
-void
-SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-				       MachineBasicBlock::iterator MBBI,
-				       unsigned DestReg, int FrameIdx,
-				       const TargetRegisterClass *RC,
-				       const TargetRegisterInfo *TRI) const {
+void SystemZInstrInfo::loadRegFromStackSlot(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+    int FrameIdx, const TargetRegisterClass *RC,
+    const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Callers may expect a single instruction, so keep 128-bit moves
@@ -681,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                                         LiveVariables *LV) const {
   MachineInstr *MI = MBBI;
   MachineBasicBlock *MBB = MI->getParent();
-  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
 
   unsigned Opcode = MI->getOpcode();
   unsigned NumOps = MI->getNumOperands();
@@ -708,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     }
     int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode);
     if (ThreeOperandOpcode >= 0) {
-      MachineInstrBuilder MIB =
-        BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode))
-        .addOperand(Dest);
+      // Create three address instruction without adding the implicit
+      // operands. Those will instead be copied over from the original
+      // instruction by the loop below.
+      MachineInstrBuilder MIB(*MF,
+                              MF->CreateMachineInstr(get(ThreeOperandOpcode),
+                                    MI->getDebugLoc(), /*NoImplicit=*/true));
+      MIB.addOperand(Dest);
       // Keep the kill state, but drop the tied flag.
       MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg());
       // Keep the remaining operands as-is.
       for (unsigned I = 2; I < NumOps; ++I)
         MIB.addOperand(MI->getOperand(I));
+      MBB->insert(MI, MIB);
       return finishConvertToThreeAddress(MI, MIB, LV);
     }
   }
@@ -1191,6 +1192,12 @@ unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
   case SystemZ::LER:    return SystemZ::LTEBR;
   case SystemZ::LDR:    return SystemZ::LTDBR;
   case SystemZ::LXR:    return SystemZ::LTXBR;
+  case SystemZ::LCDFR:  return SystemZ::LCDBR;
+  case SystemZ::LPDFR:  return SystemZ::LPDBR;
+  case SystemZ::LNDFR:  return SystemZ::LNDBR;
+  case SystemZ::LCDFR_32:  return SystemZ::LCEBR;
+  case SystemZ::LPDFR_32:  return SystemZ::LPEBR;
+  case SystemZ::LNDFR_32:  return SystemZ::LNEBR;
   // On zEC12 we prefer to use RISBGN.  But if there is a chance to
   // actually use the condition code, we may turn it back into RISGB.
   // Note that RISBG is not really a "load-and-test" instruction,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 31c9db209585..d9094ba93658 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -159,12 +159,12 @@ public:
   bool isPredicable(MachineInstr *MI) const override;
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                            unsigned ExtraPredCycles,
-                           const BranchProbability &Probability) const override;
+                           BranchProbability Probability) const override;
   bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
                            unsigned NumCyclesT, unsigned ExtraPredCyclesT,
                            MachineBasicBlock &FMBB,
                            unsigned NumCyclesF, unsigned ExtraPredCyclesF,
-                           const BranchProbability &Probability) const override;
+                           BranchProbability Probability) const override;
   bool PredicateInstruction(MachineInstr *MI,
                             ArrayRef<MachineOperand> Pred) const override;
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 820f30bc173d..b9f2eb5514a5 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -397,7 +397,7 @@ let mayLoad = 1, mayStore = 1 in
   defm MVC : MemorySS<"mvc", 0xD2, z_mvc, z_mvc_loop>;
 
 // String moves.
-let mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
   defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
 
 //===----------------------------------------------------------------------===//
@@ -424,7 +424,7 @@ let hasSideEffects = 0 in {
   def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
 }
 let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
-  def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR64>;
+  def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>;
 
 // Match 32-to-64-bit sign extensions in which the source is already
 // in a 64-bit register.
@@ -490,7 +490,7 @@ def : Pat<(and GR64:$src, 0xffffffff),
 def LLCMux : UnaryRXYPseudo<"llc", azextloadi8, GRX32, 1>,
              Requires<[FeatureHighWord]>;
 def LLC  : UnaryRXY<"llc", 0xE394, azextloadi8, GR32, 1>;
-def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
+def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GRH32, 1>,
            Requires<[FeatureHighWord]>;
 
 // 32-bit extensions from 16-bit memory.  LLHMux expands to LLH or LLHH,
@@ -498,7 +498,7 @@ def LLCH : UnaryRXY<"llch", 0xE3C2, azextloadi8, GR32, 1>,
 def LLHMux : UnaryRXYPseudo<"llh", azextloadi16, GRX32, 2>,
              Requires<[FeatureHighWord]>;
 def LLH   : UnaryRXY<"llh", 0xE395, azextloadi16, GR32, 2>;
-def LLHH  : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GR32, 2>,
+def LLHH  : UnaryRXY<"llhh", 0xE3C6, azextloadi16, GRH32, 2>,
             Requires<[FeatureHighWord]>;
 def LLHRL : UnaryRILPC<"llhrl", 0xC42, aligned_azextloadi16, GR32>;
 
@@ -1147,7 +1147,7 @@ let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
   def CLFIMux : CompareRIPseudo<z_ucmp, GRX32, uimm32>,
                 Requires<[FeatureHighWord]>;
   def CLFI  : CompareRIL<"clfi",  0xC2F, z_ucmp, GR32, uimm32>;
-  def CLIH  : CompareRIL<"clih",  0xCCF, z_ucmp, GR32, uimm32>,
+  def CLIH  : CompareRIL<"clih",  0xCCF, z_ucmp, GRH32, uimm32>,
               Requires<[FeatureHighWord]>;
   def CLGFI : CompareRIL<"clgfi", 0xC2E, z_ucmp, GR64, imm64zx32>;
 
@@ -1185,7 +1185,7 @@ let mayLoad = 1, Defs = [CC] in
   defm CLC : MemorySS<"clc", 0xD5, z_clc, z_clc_loop>;
 
 // String comparison.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
   defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
 
 // Test under mask.
@@ -1459,9 +1459,29 @@ let usesCustomInserter = 1 in {
 }
 
 // Search a block of memory for a character.
-let mayLoad = 1, Defs = [CC], Uses = [R0L] in
+let mayLoad = 1, Defs = [CC] in
   defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
 
+// Other instructions for inline assembly
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+  def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
+                       "stck\t$BD2",
+                       []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+  def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2),
+                       "stckf\t$BD2",
+                       []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+  def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2),
+                       "stcke\t$BD2",
+                       []>;
+let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
+  def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2),
+                       "stfle\t$BD2",
+                       []>;
+
+
+
 //===----------------------------------------------------------------------===//
 // Peepholes.
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
index 00572d0b9d79..1a7c0d7f687a 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//== SystemZMachineFuctionInfo.cpp - SystemZ machine function info-*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.cpp - SystemZ machine function info ------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 34fc36d6bf6c..f4a517bd54df 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//=== SystemZMachineFunctionInfo.h - SystemZ machine function info -*- C++ -*-//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index dc7bd25d7ed5..6fd24e3df625 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -69,8 +69,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
 
   // Decompose the frame index into a base and offset.
   int FrameIndex = MI->getOperand(FIOperandNum).getIndex();
-  unsigned BasePtr = getFrameRegister(MF);
-  int64_t Offset = (TFI->getFrameIndexOffset(MF, FrameIndex) +
+  unsigned BasePtr;
+  int64_t Offset = (TFI->getFrameIndexReference(MF, FrameIndex, BasePtr) +
                     MI->getOperand(FIOperandNum + 1).getImm());
 
   // Special handling of dbg_value instructions.
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 85aa0a62cc76..0d8b08b9cbdd 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -282,4 +282,5 @@ def v128any : TypedReg<untyped, VR128>;
 // The 2-bit condition code field of the PSW.  Every register named in an
 // inline asm needs a class associated with it.
 def CC : SystemZReg<"cc">;
-def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+let isAllocatable = 0 in
+  def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index d1a17c5500d6..846edd51341a 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -16,6 +16,8 @@
 #include "SystemZTargetMachine.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
 
@@ -35,19 +37,16 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override;
 
 private:
-  bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
-                  unsigned LLIxL, unsigned LLIxH);
+  bool shortenIIF(MachineInstr &MI, unsigned LLIxL, unsigned LLIxH);
   bool shortenOn0(MachineInstr &MI, unsigned Opcode);
   bool shortenOn01(MachineInstr &MI, unsigned Opcode);
   bool shortenOn001(MachineInstr &MI, unsigned Opcode);
+  bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
   bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
 
   const SystemZInstrInfo *TII;
-
-  // LowGPRs[I] has bit N set if LLVM register I includes the low
-  // word of GPR N.  HighGPRs is the same for the high word.
-  unsigned LowGPRs[SystemZ::NUM_TARGET_REGS];
-  unsigned HighGPRs[SystemZ::NUM_TARGET_REGS];
+  const TargetRegisterInfo *TRI;
+  LivePhysRegs LiveRegs;
 };
 
 char SystemZShortenInst::ID = 0;
@@ -58,33 +57,31 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
 }
 
 SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
-  : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() {
-  // Set up LowGPRs and HighGPRs.
-  for (unsigned I = 0; I < 16; ++I) {
-    LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
-    LowGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
-    HighGPRs[SystemZMC::GRH32Regs[I]] |= 1 << I;
-    HighGPRs[SystemZMC::GR64Regs[I]] |= 1 << I;
-    if (unsigned GR128 = SystemZMC::GR128Regs[I]) {
-      LowGPRs[GR128] |= 3 << I;
-      HighGPRs[GR128] |= 3 << I;
-    }
-  }
+  : MachineFunctionPass(ID), TII(nullptr) {}
+
+// Tie operands if MI has become a two-address instruction.
+static void tieOpsIfNeeded(MachineInstr &MI) {
+  if (MI.getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+      !MI.getOperand(0).isTied())
+    MI.tieOperands(0, 1);
 }
 
 // MI loads one word of a GPR using an IIxF instruction and LLIxL and LLIxH
 // are the halfword immediate loads for the same word.  Try to use one of them
-// instead of IIxF.  If MI loads the high word, GPRMap[X] is the set of high
-// words referenced by LLVM register X while LiveOther is the mask of low
-// words that are currently live, and vice versa.
-bool SystemZShortenInst::shortenIIF(MachineInstr &MI, unsigned *GPRMap,
-                                    unsigned LiveOther, unsigned LLIxL,
-                                    unsigned LLIxH) {
+// instead of IIxF. 
+bool SystemZShortenInst::shortenIIF(MachineInstr &MI,
+                                    unsigned LLIxL, unsigned LLIxH) {
   unsigned Reg = MI.getOperand(0).getReg();
-  assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
-  unsigned GPRs = GPRMap[Reg];
-  assert(GPRs != 0 && "Register must be a GPR");
-  if (GPRs & LiveOther)
+  // The new opcode will clear the other half of the GR64 reg, so
+  // cancel if that is live.
+  unsigned thisSubRegIdx = (SystemZ::GRH32BitRegClass.contains(Reg) ?
+			    SystemZ::subreg_h32 : SystemZ::subreg_l32);
+  unsigned otherSubRegIdx = (thisSubRegIdx == SystemZ::subreg_l32 ?
+			     SystemZ::subreg_h32 : SystemZ::subreg_l32);
+  unsigned GR64BitReg = TRI->getMatchingSuperReg(Reg, thisSubRegIdx,
+						 &SystemZ::GR64BitRegClass);
+  unsigned OtherReg = TRI->getSubReg(GR64BitReg, otherSubRegIdx);
+  if (LiveRegs.contains(OtherReg))
     return false;
 
   uint64_t Imm = MI.getOperand(1).getImm();
@@ -123,12 +120,26 @@ bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
 }
 
 // Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
-// 4-bit encoding and if operands 0 and 1 are tied.
+// 4-bit encoding and if operands 0 and 1 are tied. Also ties op 0
+// with op 1, if MI becomes 2-address.
 bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
   if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
       MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
       SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
     MI.setDesc(TII->get(Opcode));
+    tieOpsIfNeeded(MI);
+    return true;
+  }
+  return false;
+}
+
+// Calls shortenOn001 if CCLive is false. CC def operand is added in
+// case of success.
+bool SystemZShortenInst::shortenOn001AddCC(MachineInstr &MI,
+					   unsigned Opcode) {
+  if (!LiveRegs.contains(SystemZ::CC) && shortenOn001(MI, Opcode)) {
+    MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
+      .addReg(SystemZ::CC, RegState::ImplicitDefine);
     return true;
   }
   return false;
@@ -164,35 +175,24 @@ bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
 bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
   bool Changed = false;
 
-  // Work out which words are live on exit from the block.
-  unsigned LiveLow = 0;
-  unsigned LiveHigh = 0;
-  for (auto SI = MBB.succ_begin(), SE = MBB.succ_end(); SI != SE; ++SI) {
-    for (auto LI = (*SI)->livein_begin(), LE = (*SI)->livein_end();
-         LI != LE; ++LI) {
-      unsigned Reg = *LI;
-      assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
-      LiveLow |= LowGPRs[Reg];
-      LiveHigh |= HighGPRs[Reg];
-    }
-  }
+  // Set up the set of live registers at the end of MBB (live out)
+  LiveRegs.clear();
+  LiveRegs.addLiveOuts(&MBB);
 
   // Iterate backwards through the block looking for instructions to change.
   for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
     MachineInstr &MI = *MBBI;
     switch (MI.getOpcode()) {
     case SystemZ::IILF:
-      Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
-                            SystemZ::LLILH);
+      Changed |= shortenIIF(MI, SystemZ::LLILL, SystemZ::LLILH);
       break;
 
     case SystemZ::IIHF:
-      Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
-                            SystemZ::LLIHH);
+      Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
       break;
 
     case SystemZ::WFADB:
-      Changed |= shortenOn001(MI, SystemZ::ADBR);
+      Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
       break;
 
     case SystemZ::WFDDB:
@@ -216,15 +216,15 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       break;
 
     case SystemZ::WFLCDB:
-      Changed |= shortenOn01(MI, SystemZ::LCDBR);
+      Changed |= shortenOn01(MI, SystemZ::LCDFR);
       break;
 
     case SystemZ::WFLNDB:
-      Changed |= shortenOn01(MI, SystemZ::LNDBR);
+      Changed |= shortenOn01(MI, SystemZ::LNDFR);
       break;
 
     case SystemZ::WFLPDB:
-      Changed |= shortenOn01(MI, SystemZ::LPDBR);
+      Changed |= shortenOn01(MI, SystemZ::LPDFR);
       break;
 
     case SystemZ::WFSQDB:
@@ -232,7 +232,7 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       break;
 
     case SystemZ::WFSDB:
-      Changed |= shortenOn001(MI, SystemZ::SDBR);
+      Changed |= shortenOn001AddCC(MI, SystemZ::SDBR);
       break;
 
     case SystemZ::WFCDB:
@@ -257,33 +257,17 @@ bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
       break;
     }
 
-    unsigned UsedLow = 0;
-    unsigned UsedHigh = 0;
-    for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
-         MOI != MOE; ++MOI) {
-      MachineOperand &MO = *MOI;
-      if (MO.isReg()) {
-        if (unsigned Reg = MO.getReg()) {
-          assert(Reg < SystemZ::NUM_TARGET_REGS && "Invalid register number");
-          if (MO.isDef()) {
-            LiveLow &= ~LowGPRs[Reg];
-            LiveHigh &= ~HighGPRs[Reg];
-          } else if (!MO.isUndef()) {
-            UsedLow |= LowGPRs[Reg];
-            UsedHigh |= HighGPRs[Reg];
-          }
-        }
-      }
-    }
-    LiveLow |= UsedLow;
-    LiveHigh |= UsedHigh;
+    LiveRegs.stepBackward(MI);
   }
 
   return Changed;
 }
 
 bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
-  TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
+  const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = ST.getRegisterInfo();
+  LiveRegs.init(TRI);
 
   bool Changed = false;
   for (auto &MBB : F)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 00cbbd10a819..f305e85f6cfe 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -16,6 +16,7 @@
 
 using namespace llvm;
 
+extern cl::opt<bool> MISchedPostRA;
 extern "C" void LLVMInitializeSystemZTarget() {
   // Register the target.
   RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
@@ -32,7 +33,7 @@ static bool UsesVectorABI(StringRef CPU, StringRef FS) {
     VectorABI = false;
 
   SmallVector<StringRef, 3> Features;
-  FS.split(Features, ",", -1, false /* KeepEmpty */);
+  FS.split(Features, ',', -1, false /* KeepEmpty */);
   for (auto &Feature : Features) {
     if (Feature == "vector" || Feature == "+vector")
       VectorABI = true;
@@ -130,6 +131,13 @@ void SystemZPassConfig::addPreSched2() {
 }
 
 void SystemZPassConfig::addPreEmitPass() {
+
+  // Do instruction shortening before compare elimination because some
+  // vector instructions will be shortened into opcodes that compare
+  // elimination recognizes.
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
   // We eliminate comparisons here rather than earlier because some
   // transformations can change the set of available CC values and we
   // generally want those transformations to have priority.  This is
@@ -155,9 +163,17 @@ void SystemZPassConfig::addPreEmitPass() {
   // preventing that would be a win or not.
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
   addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
+
+  // Do final scheduling after all other optimizations, to get an
+  // optimal input for the decoder (branch relaxation must happen
+  // after block placement).
+  if (getOptLevel() != CodeGenOpt::None) {
+    if (MISchedPostRA)
+      addPass(&PostMachineSchedulerID);
+    else
+      addPass(&PostRASchedulerID);
+  }
 }
 
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
@@ -165,7 +181,7 @@ TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
 }
 
 TargetIRAnalysis SystemZTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(SystemZTTIImpl(this, F));
   });
 }
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 0a81e1f9fdf9..1a8f1f7f3aaa 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -43,6 +43,9 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
+
+  bool targetSchedulesPostRAScheduling() const override { return true; };
+
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5a87df1976c3..5ff5b21f49b8 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
 //
 //===----------------------------------------------------------------------===//
 
-unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -63,8 +63,8 @@ unsigned SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   return 4 * TTI::TCC_Basic;
 }
 
-unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
+                                  const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -181,8 +181,8 @@ unsigned SystemZTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   return SystemZTTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                       const APInt &Imm, Type *Ty) {
+int SystemZTTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                  const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 4b80973ed879..9ae736d8413a 100644
--- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -28,7 +28,7 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
   const SystemZTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit SystemZTTIImpl(const SystemZTargetMachine *TM, Function &F)
+  explicit SystemZTTIImpl(const SystemZTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -42,12 +42,11 @@ public:
   /// \name Scalar TTI Implementations
   /// @{
 
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
+  int getIntImmCost(const APInt &Imm, Type *Ty);
 
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
+  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty);
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
 
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 19b5e2a0f978..a0b0d8f24046 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,7 +43,6 @@ using namespace llvm;
 void TargetLoweringObjectFile::Initialize(MCContext &ctx,
                                           const TargetMachine &TM) {
   Ctx = &ctx;
-  DL = TM.getDataLayout();
   InitMCObjectFileInfo(TM.getTargetTriple(), TM.getRelocationModel(),
                        TM.getCodeModel(), *Ctx);
 }
@@ -107,7 +106,7 @@ MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
   assert(!Suffix.empty());
 
   SmallString<60> NameStr;
-  NameStr += DL->getPrivateGlobalPrefix();
+  NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix();
   TM.getNameWithPrefix(NameStr, GV, Mang);
   NameStr.append(Suffix.begin(), Suffix.end());
   return Ctx->getOrCreateSymbol(NameStr);
@@ -120,7 +119,7 @@ MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
 }
 
 void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
-                                                    const TargetMachine &TM,
+                                                    const DataLayout &,
                                                     const MCSymbol *Sym) const {
 }
 
@@ -170,14 +169,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     // If the initializer for the global contains something that requires a
     // relocation, then we may have to drop this into a writable data section
     // even though it is marked const.
-    switch (C->getRelocationInfo()) {
-    case Constant::NoRelocation:
+    if (!C->needsRelocation()) {
       // If the global is required to have a unique address, it can't be put
       // into a mergable section: just drop it into the general read-only
       // section instead.
       if (!GVar->hasUnnamedAddr())
         return SectionKind::getReadOnly();
-        
+
       // If initializer is a null-terminated string, put it in a "cstring"
       // section of the right width.
       if (ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
@@ -200,7 +198,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
       // Otherwise, just drop it into a mergable constant section.  If we have
       // a section for this size, use it, otherwise use the arbitrary sized
       // mergable section.
-      switch (TM.getDataLayout()->getTypeAllocSize(C->getType())) {
+      switch (GV->getParent()->getDataLayout().getTypeAllocSize(C->getType())) {
       case 4:  return SectionKind::getMergeableConst4();
       case 8:  return SectionKind::getMergeableConst8();
       case 16: return SectionKind::getMergeableConst16();
@@ -208,20 +206,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
         return SectionKind::getReadOnly();
       }
 
-    case Constant::LocalRelocation:
-      // In static relocation model, the linker will resolve all addresses, so
-      // the relocation entries will actually be constants by the time the app
-      // starts up.  However, we can't put this into a mergable section, because
-      // the linker doesn't take relocations into consideration when it tries to
-      // merge entries in the section.
-      if (ReloModel == Reloc::Static)
-        return SectionKind::getReadOnly();
-
-      // Otherwise, the dynamic linker needs to fix it up, put it in the
-      // writable data.rel.local section.
-      return SectionKind::getReadOnlyWithRelLocal();
-
-    case Constant::GlobalRelocations:
+    } else {
       // In static relocation model, the linker will resolve all addresses, so
       // the relocation entries will actually be constants by the time the app
       // starts up.  However, we can't put this into a mergable section, because
@@ -242,17 +227,11 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
   // globals together onto fewer pages, improving the locality of the dynamic
   // linker.
   if (ReloModel == Reloc::Static)
-    return SectionKind::getDataNoRel();
+    return SectionKind::getData();
 
-  switch (C->getRelocationInfo()) {
-  case Constant::NoRelocation:
-    return SectionKind::getDataNoRel();
-  case Constant::LocalRelocation:
-    return SectionKind::getDataRelLocal();
-  case Constant::GlobalRelocations:
-    return SectionKind::getDataRel();
-  }
-  llvm_unreachable("Invalid relocation");
+  if (C->needsRelocation())
+    return SectionKind::getData();
+  return SectionKind::getData();
 }
 
 /// This method computes the appropriate section to emit the specified global
@@ -273,7 +252,8 @@ TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV,
 
 MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
     const Function &F, Mangler &Mang, const TargetMachine &TM) const {
-  return getSectionForConstant(SectionKind::getReadOnly(), /*C=*/nullptr);
+  return getSectionForConstant(F.getParent()->getDataLayout(),
+                               SectionKind::getReadOnly(), /*C=*/nullptr);
 }
 
 bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
@@ -296,9 +276,8 @@ bool TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
 
 /// Given a mergable constant with the specified size and relocation
 /// information, return a section that it should be placed in.
-MCSection *
-TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind,
-                                                const Constant *C) const {
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
   if (Kind.isReadOnly() && ReadOnlySection != nullptr)
     return ReadOnlySection;
 
@@ -345,7 +324,7 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
 }
 
 void TargetLoweringObjectFile::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-    bool CannotUsePrivateLabel, Mangler &Mang, const TargetMachine &TM) const {
-  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    const TargetMachine &TM) const {
+  Mang.getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false);
 }
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 83174c20c8e9..850c93cb21b8 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -150,24 +150,11 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
 }
 
 TargetIRAnalysis TargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(F.getParent()->getDataLayout());
   });
 }
 
-static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
-                               const MCSection &Section) {
-  if (!AsmInfo.isSectionAtomizableBySymbols(Section))
-    return true;
-
-  // If it is not dead stripped, it is safe to use private labels.
-  const MCSectionMachO &SMO = cast<MCSectionMachO>(Section);
-  if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP))
-    return true;
-
-  return false;
-}
-
 void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
                                       const GlobalValue *GV, Mangler &Mang,
                                       bool MayAlwaysUsePrivate) const {
@@ -177,11 +164,8 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
     Mang.getNameWithPrefix(Name, GV, false);
     return;
   }
-  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, *this);
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
-  const MCSection *TheSection = TLOF->SectionForGlobal(GV, GVKind, Mang, *this);
-  bool CannotUsePrivateLabel = !canUsePrivateLabel(*AsmInfo, *TheSection);
-  TLOF->getNameWithPrefix(Name, GV, CannotUsePrivateLabel, Mang, *this);
+  TLOF->getNameWithPrefix(Name, GV, Mang, *this);
 }
 
 MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index 719923558de4..f82566c37baa 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -32,17 +32,25 @@
 
 using namespace llvm;
 
-inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
-  return reinterpret_cast<TargetMachine*>(P);
+namespace llvm {
+// Friend to the TargetMachine, access legacy API that are made private in C++
+struct C_API_PRIVATE_ACCESS {
+  static const DataLayout &getDataLayout(const TargetMachine &T) {
+    return T.getDataLayout();
+  }
+};
 }
-inline Target *unwrap(LLVMTargetRef P) {
+
+static TargetMachine *unwrap(LLVMTargetMachineRef P) {
+  return reinterpret_cast<TargetMachine *>(P);
+}
+static Target *unwrap(LLVMTargetRef P) {
   return reinterpret_cast<Target*>(P);
 }
-inline LLVMTargetMachineRef wrap(const TargetMachine *P) {
-  return
-    reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine*>(P));
+static LLVMTargetMachineRef wrap(const TargetMachine *P) {
+  return reinterpret_cast<LLVMTargetMachineRef>(const_cast<TargetMachine *>(P));
 }
-inline LLVMTargetRef wrap(const Target * P) {
+static LLVMTargetRef wrap(const Target * P) {
   return reinterpret_cast<LLVMTargetRef>(const_cast<Target*>(P));
 }
 
@@ -69,16 +77,16 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
 LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
                                  char **ErrorMessage) {
   std::string Error;
-  
+
   *T = wrap(TargetRegistry::lookupTarget(TripleStr, Error));
-  
+
   if (!*T) {
     if (ErrorMessage)
       *ErrorMessage = strdup(Error.c_str());
 
     return 1;
   }
-  
+
   return 0;
 }
 
@@ -145,10 +153,7 @@ LLVMTargetMachineRef LLVMCreateTargetMachine(LLVMTargetRef T,
     CM, OL));
 }
 
-
-void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) {
-  delete unwrap(T);
-}
+void LLVMDisposeTargetMachine(LLVMTargetMachineRef T) { delete unwrap(T); }
 
 LLVMTargetRef LLVMGetTargetMachineTarget(LLVMTargetMachineRef T) {
   const Target* target = &(unwrap(T)->getTarget());
@@ -170,8 +175,9 @@ char* LLVMGetTargetMachineFeatureString(LLVMTargetMachineRef T) {
   return strdup(StringRep.c_str());
 }
 
+/** Deprecated: use LLVMGetDataLayout(LLVMModuleRef M) instead. */
 LLVMTargetDataRef LLVMGetTargetMachineData(LLVMTargetMachineRef T) {
-  return wrap(unwrap(T)->getDataLayout());
+  return wrap(&C_API_PRIVATE_ACCESS::getDataLayout(*unwrap(T)));
 }
 
 void LLVMSetTargetMachineAsmVerbosity(LLVMTargetMachineRef T,
@@ -190,14 +196,7 @@ static LLVMBool LLVMTargetMachineEmit(LLVMTargetMachineRef T, LLVMModuleRef M,
 
   std::string error;
 
-  const DataLayout *td = TM->getDataLayout();
-
-  if (!td) {
-    error = "No DataLayout in TargetMachine";
-    *ErrorMessage = strdup(error.c_str());
-    return true;
-  }
-  Mod->setDataLayout(*td);
+  Mod->setDataLayout(TM->createDataLayout());
 
   TargetMachine::CodeGenFileType ft;
   switch (codegen) {
@@ -239,7 +238,6 @@ LLVMBool LLVMTargetMachineEmitToMemoryBuffer(LLVMTargetMachineRef T,
   SmallString<0> CodeString;
   raw_svector_ostream OStream(CodeString);
   bool Result = LLVMTargetMachineEmit(T, M, OStream, codegen, ErrorMessage);
-  OStream.flush();
 
   StringRef Data = OStream.str();
   *OutMemBuf =
diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp
index 42bc487fe6d8..d41b6436928b 100644
--- a/lib/Target/TargetRecip.cpp
+++ b/lib/Target/TargetRecip.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 // the key strings for queries and command-line inputs.
 // In addition, the command-line interface recognizes the global parameters
 // "all", "none", and "default".
-static const char *RecipOps[] = {
+static const char *const RecipOps[] = {
   "divd",
   "divf",
   "vec-divd",
@@ -46,7 +46,7 @@ TargetRecip::TargetRecip() {
     RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
 }
 
-static bool parseRefinementStep(const StringRef &In, size_t &Position,
+static bool parseRefinementStep(StringRef In, size_t &Position,
                                 uint8_t &Value) {
   const char RefStepToken = ':';
   Position = In.find(RefStepToken);
@@ -175,7 +175,7 @@ TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
   parseIndividualParams(Args);
 }
 
-bool TargetRecip::isEnabled(const StringRef &Key) const {
+bool TargetRecip::isEnabled(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
   assert(Iter->second.Enabled != Uninitialized &&
@@ -183,7 +183,7 @@ bool TargetRecip::isEnabled(const StringRef &Key) const {
   return Iter->second.Enabled;
 }
 
-unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
+unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
   assert(Iter->second.RefinementSteps != Uninitialized &&
@@ -192,7 +192,7 @@ unsigned TargetRecip::getRefinementSteps(const StringRef &Key) const {
 }
 
 /// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
+void TargetRecip::setDefaults(StringRef Key, bool Enable,
                               unsigned RefSteps) {
   if (Key == "all") {
     for (auto &KV : RecipMap) {
@@ -213,7 +213,7 @@ void TargetRecip::setDefaults(const StringRef &Key, bool Enable,
 
 bool TargetRecip::operator==(const TargetRecip &Other) const {
   for (const auto &KV : RecipMap) {
-    const StringRef &Op = KV.first;
+    StringRef Op = KV.first;
     const RecipParams &RP = KV.second;
     const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
     if (RP.RefinementSteps != OtherRP.RefinementSteps)
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index 25de9eee0831..284a7d91bc62 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -1,20 +1,39 @@
 set(LLVM_TARGET_DEFINITIONS WebAssembly.td)
 
+tablegen(LLVM WebAssemblyGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM WebAssemblyGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM WebAssemblyGenFastISel.inc -gen-fast-isel)
+tablegen(LLVM WebAssemblyGenInstrInfo.inc -gen-instr-info)
 tablegen(LLVM WebAssemblyGenMCCodeEmitter.inc -gen-emitter)
 tablegen(LLVM WebAssemblyGenRegisterInfo.inc -gen-register-info)
 tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget)
 add_public_tablegen_target(WebAssemblyCommonTableGen)
 
 add_llvm_target(WebAssemblyCodeGen
+  Relooper.cpp
+  WebAssemblyArgumentMove.cpp
+  WebAssemblyAsmPrinter.cpp
+  WebAssemblyCFGStackify.cpp
+  WebAssemblyFastISel.cpp
   WebAssemblyFrameLowering.cpp
-  WebAssemblyInstrInfo.cpp
   WebAssemblyISelDAGToDAG.cpp
   WebAssemblyISelLowering.cpp
+  WebAssemblyInstrInfo.cpp
+  WebAssemblyLowerBrUnless.cpp
   WebAssemblyMachineFunctionInfo.cpp
+  WebAssemblyMCInstLower.cpp
+  WebAssemblyOptimizeReturned.cpp
+  WebAssemblyPeephole.cpp
+  WebAssemblyPEI.cpp
   WebAssemblyRegisterInfo.cpp
+  WebAssemblyRegColoring.cpp
+  WebAssemblyRegNumbering.cpp
+  WebAssemblyRegStackify.cpp
   WebAssemblySelectionDAGInfo.cpp
+  WebAssemblyStoreResults.cpp
   WebAssemblySubtarget.cpp
   WebAssemblyTargetMachine.cpp
+  WebAssemblyTargetObjectFile.cpp
   WebAssemblyTargetTransformInfo.cpp
 )
 
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index fbb985aaafbb..7ce3a00ae360 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
@@ -21,11 +23,13 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include <cctype>
+#include "llvm/Target/TargetRegisterInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
+#include "WebAssemblyGenAsmWriter.inc"
+
 WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
                                                const MCInstrInfo &MII,
                                                const MCRegisterInfo &MRI)
@@ -33,11 +37,93 @@ WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
 
 void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
                                           unsigned RegNo) const {
-  llvm_unreachable("TODO: implement printRegName");
+  assert(RegNo != WebAssemblyFunctionInfo::UnusedReg);
+  // Note that there's an implicit get_local/set_local here!
+  OS << "$" << RegNo;
 }
 
 void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                                        StringRef Annot,
-                                       const MCSubtargetInfo &STI) {
-  llvm_unreachable("TODO: implement printInst");
+                                       const MCSubtargetInfo & /*STI*/) {
+  // Print the instruction (this uses the AsmStrings from the .td files).
+  printInstruction(MI, OS);
+
+  // Print any additional variadic operands.
+  const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+  if (Desc.isVariadic())
+    for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
+      if (i != 0)
+        OS << ", ";
+      printOperand(MI, i, OS);
+    }
+
+  // Print any added annotation.
+  printAnnotation(OS, Annot);
+}
+
+static std::string toString(const APFloat &FP) {
+  static const size_t BufBytes = 128;
+  char buf[BufBytes];
+  if (FP.isNaN())
+    assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) ||
+            FP.bitwiseIsEqual(
+                APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) &&
+           "convertToHexString handles neither SNaN nor NaN payloads");
+  // Use C99's hexadecimal floating-point representation.
+  auto Written = FP.convertToHexString(
+      buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven);
+  (void)Written;
+  assert(Written != 0);
+  assert(Written < BufBytes);
+  return buf;
+}
+
+void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned WAReg = Op.getReg();
+    if (int(WAReg) >= 0)
+      printRegName(O, WAReg);
+    else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs())
+      O << "$pop" << (WAReg & INT32_MAX);
+    else if (WAReg != WebAssemblyFunctionInfo::UnusedReg)
+      O << "$push" << (WAReg & INT32_MAX);
+    else
+      O << "$discard";
+    // Add a '=' suffix if this is a def.
+    if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
+      O << '=';
+  } else if (Op.isImm()) {
+    switch (MI->getOpcode()) {
+    case WebAssembly::PARAM:
+    case WebAssembly::RESULT:
+    case WebAssembly::LOCAL:
+      O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
+      break;
+    default:
+      O << Op.getImm();
+      break;
+    }
+  } else if (Op.isFPImm())
+    O << toString(APFloat(Op.getFPImm()));
+  else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    Op.getExpr()->print(O, &MAI);
+  }
+}
+
+const char *llvm::WebAssembly::TypeToString(MVT Ty) {
+  switch (Ty.SimpleTy) {
+  case MVT::i32:
+    return "i32";
+  case MVT::i64:
+    return "i64";
+  case MVT::f32:
+    return "f32";
+  case MVT::f64:
+    return "f64";
+  default:
+    llvm_unreachable("unsupported type");
+  }
 }
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 70fcef214ce2..39a16f59fd78 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -16,14 +16,13 @@
 #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/CodeGen/MachineValueType.h"
 
 namespace llvm {
 
-class MCOperand;
 class MCSubtargetInfo;
 
-class WebAssemblyInstPrinter : public MCInstPrinter {
+class WebAssemblyInstPrinter final : public MCInstPrinter {
 public:
   WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
                          const MCRegisterInfo &MRI);
@@ -31,8 +30,21 @@ public:
   void printRegName(raw_ostream &OS, unsigned RegNo) const override;
   void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot,
                  const MCSubtargetInfo &STI) override;
+
+  // Used by tblegen code.
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
 };
 
+namespace WebAssembly {
+
+const char *TypeToString(MVT Ty);
+
+} // end namespace WebAssembly
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt
index 04ef9c4e4bcf..9c4d6dcb35a3 100644
--- a/lib/Target/WebAssembly/LLVMBuild.txt
+++ b/lib/Target/WebAssembly/LLVMBuild.txt
@@ -28,5 +28,5 @@ has_asmprinter = 1
 type = Library
 name = WebAssemblyCodeGen
 parent = WebAssembly
-required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyDesc WebAssemblyInfo
+required_libraries = Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target WebAssemblyAsmPrinter WebAssemblyDesc WebAssemblyInfo
 add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
index ccc0f0d7ccbc..c8d1d821861a 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -1,4 +1,7 @@
 add_llvm_library(LLVMWebAssemblyDesc
+  WebAssemblyAsmBackend.cpp
+  WebAssemblyELFObjectWriter.cpp
   WebAssemblyMCAsmInfo.cpp
+  WebAssemblyMCCodeEmitter.cpp
   WebAssemblyMCTargetDesc.cpp
 )
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
new file mode 100644
index 000000000000..b158ccb46f99
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -0,0 +1,103 @@
+//===-- WebAssemblyAsmBackend.cpp - WebAssembly Assembler Backend ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyAsmBackend class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyAsmBackend final : public MCAsmBackend {
+  bool Is64Bit;
+
+public:
+  explicit WebAssemblyAsmBackend(bool Is64Bit)
+      : MCAsmBackend(), Is64Bit(Is64Bit) {}
+  ~WebAssemblyAsmBackend() override {}
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+  // No instruction requires relaxation
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    return false;
+  }
+
+  unsigned getNumFixupKinds() const override {
+    // We currently just use the generic fixups in MCFixup.h and don't have any
+    // target-specific fixups.
+    return 0;
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+  void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {}
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
+                                         MCObjectWriter *OW) const {
+  if (Count == 0)
+    return true;
+
+  // FIXME: Do something.
+  return false;
+}
+
+void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                       unsigned DataSize, uint64_t Value,
+                                       bool IsPCRel) const {
+  const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind());
+  unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8);
+  if (!Value)
+    return; // Doesn't change encoding.
+
+  // Shift the value into position.
+  Value <<= Info.TargetOffset;
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+MCObjectWriter *
+WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+  return createWebAssemblyELFObjectWriter(OS, Is64Bit, 0);
+}
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
+                                                const MCRegisterInfo &MRI,
+                                                const Triple &TT,
+                                                StringRef CPU) {
+  return new WebAssemblyAsmBackend(TT.isArch64Bit());
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
new file mode 100644
index 000000000000..c47a3d9094e5
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -0,0 +1,54 @@
+//===-- WebAssemblyELFObjectWriter.cpp - WebAssembly ELF Writer -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file handles ELF-specific object emission, converting LLVM's
+/// internal fixups into the appropriate relocations.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+namespace {
+class WebAssemblyELFObjectWriter final : public MCELFObjectTargetWriter {
+public:
+  WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+protected:
+  unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                        bool IsPCRel) const override;
+};
+} // end anonymous namespace
+
+// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
+// writing seriously, we should email generic-abi@googlegroups.com and ask
+// for our own ELF code.
+WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
+                                                       uint8_t OSABI)
+    : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
+                              /*HasRelocationAddend=*/true) {}
+
+unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
+                                                  const MCFixup &Fixup,
+                                                  bool IsPCRel) const {
+  // FIXME: Do we need our own relocs?
+  return Fixup.getKind();
+}
+
+MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+                                                       bool Is64Bit,
+                                                       uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW =
+      new WebAssemblyELFObjectWriter(Is64Bit, OSABI);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 55346f71c6fc..d2617796ca99 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 WebAssemblyMCAsmInfo::~WebAssemblyMCAsmInfo() {}
 
 WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
-  PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit();
+  PointerSize = CalleeSaveStackSlotSize = T.isArch64Bit() ? 8 : 4;
 
   // TODO: What should MaxInstLength be?
 
@@ -41,9 +41,6 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
   COMMDirectiveAlignmentIsInBytes = false;
   LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
 
-  HasDotTypeDotSizeDirective = false;
-  HasSingleParameterDotFile = false;
-
   SupportsDebugInformation = true;
 
   // For now, WebAssembly does not support exceptions.
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
index d2b8fb7748fc..2dcf2cd3c892 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.h
@@ -15,13 +15,13 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCASMINFO_H
 
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
 
 class Triple;
 
-class WebAssemblyMCAsmInfo final : public MCAsmInfo {
+class WebAssemblyMCAsmInfo final : public MCAsmInfoELF {
 public:
   explicit WebAssemblyMCAsmInfo(const Triple &T);
   ~WebAssemblyMCAsmInfo() override;
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
new file mode 100644
index 000000000000..7c6c79eb5db2
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -0,0 +1,100 @@
+//=- WebAssemblyMCCodeEmitter.cpp - Convert WebAssembly code to machine code -//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the WebAssemblyMCCodeEmitter class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+namespace {
+class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
+  const MCRegisterInfo &MRI;
+
+public:
+  WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
+                           MCContext &)
+      : MRI(mri) {}
+
+  ~WebAssemblyMCCodeEmitter() override {}
+
+  /// TableGen'erated function for getting the binary encoding for an
+  /// instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  /// Return binary encoding of operand. If the machine operand requires
+  /// relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+};
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+                                                    const MCRegisterInfo &MRI,
+                                                    MCContext &Ctx) {
+  return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
+}
+
+unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
+    const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+  if (MO.isReg())
+    return MRI.getEncodingValue(MO.getReg());
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  assert(MO.isExpr());
+
+  assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
+
+  assert(false && "FIXME: not implemented yet");
+
+  return 0;
+}
+
+void WebAssemblyMCCodeEmitter::encodeInstruction(
+    const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+    const MCSubtargetInfo &STI) const {
+  assert(false && "FIXME: not implemented yet");
+}
+
+// Encode WebAssembly Memory Operand
+uint64_t
+WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
+  assert(false && "FIXME: not implemented yet");
+  return 0;
+}
+
+#include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 224aa773a80e..14cd295353d5 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -26,25 +26,40 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-mc-target-desc"
 
+#define GET_INSTRINFO_MC_DESC
+#include "WebAssemblyGenInstrInfo.inc"
+
 #define GET_SUBTARGETINFO_MC_DESC
 #include "WebAssemblyGenSubtargetInfo.inc"
 
 #define GET_REGINFO_MC_DESC
 #include "WebAssemblyGenRegisterInfo.inc"
 
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo &MRI,
+static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
                                              const Triple &TT) {
-  MCAsmInfo *MAI = new WebAssemblyMCAsmInfo(TT);
-  return MAI;
+  return new WebAssemblyMCAsmInfo(TT);
+}
+
+static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitWebAssemblyMCInstrInfo(X);
+  return X;
+}
+
+static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
+                                               MCAsmBackend &MAB,
+                                               raw_pwrite_stream &OS,
+                                               MCCodeEmitter *Emitter,
+                                               bool RelaxAll) {
+  return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
 }
 
 static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple &T, unsigned SyntaxVariant,
+createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
                                const MCAsmInfo &MAI, const MCInstrInfo &MII,
                                const MCRegisterInfo &MRI) {
-  if (SyntaxVariant == 0 || SyntaxVariant == 1)
-    return new WebAssemblyInstPrinter(MAI, MII, MRI);
-  return nullptr;
+  assert(SyntaxVariant == 0);
+  return new WebAssemblyInstPrinter(MAI, MII, MRI);
 }
 
 // Force static initialization.
@@ -53,7 +68,19 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
 
+    // Register the MC instruction info.
+    TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+
+    // Register the object streamer
+    TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+
     // Register the MCInstPrinter.
     TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+
+    // Register the MC code emitter
+    TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+
+    // Register the ASM Backend
+    TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
   }
 }
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index eebf5b72f62b..e78f73e3da95 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -16,7 +16,6 @@
 #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
 
 #include "llvm/Support/DataTypes.h"
-#include <string>
 
 namespace llvm {
 
@@ -34,13 +33,21 @@ class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
+class raw_pwrite_stream;
 
 extern Target TheWebAssemblyTarget32;
 extern Target TheWebAssemblyTarget64;
 
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+                                              const MCRegisterInfo &MRI,
+                                              MCContext &Ctx);
+
 MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          StringRef TT, StringRef CPU);
+                                          const Triple &TT, StringRef CPU);
+
+MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
+                                                 bool Is64Bit, uint8_t OSABI);
 
 } // end namespace llvm
 
@@ -50,6 +57,11 @@ MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
 #define GET_REGINFO_ENUM
 #include "WebAssemblyGenRegisterInfo.inc"
 
+// Defines symbolic names for the WebAssembly instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "WebAssemblyGenInstrInfo.inc"
+
 #define GET_SUBTARGETINFO_ENUM
 #include "WebAssemblyGenSubtargetInfo.inc"
 
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
index f102d73f6e86..ccf63f0be554 100644
--- a/lib/Target/WebAssembly/Makefile
+++ b/lib/Target/WebAssembly/Makefile
@@ -12,8 +12,14 @@ LIBRARYNAME = LLVMWebAssemblyCodeGen
 TARGET = WebAssembly
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = WebAssemblyGenRegisterInfo.inc WebAssemblyGenSubtargetInfo.inc \
-		WebAssemblyGenMCCodeEmitter.inc
+BUILT_SOURCES = \
+	WebAssemblyGenAsmWriter.inc \
+	WebAssemblyGenDAGISel.inc \
+	WebAssemblyGenFastISel.inc \
+	WebAssemblyGenInstrInfo.inc \
+	WebAssemblyGenMCCodeEmitter.inc \
+	WebAssemblyGenRegisterInfo.inc \
+	WebAssemblyGenSubtargetInfo.inc
 
 DIRS = InstPrinter TargetInfo MCTargetDesc
 
diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt
index 63e02c455895..b97ea454165c 100644
--- a/lib/Target/WebAssembly/README.txt
+++ b/lib/Target/WebAssembly/README.txt
@@ -12,6 +12,16 @@ binary encoding of WebAssembly itself:
   * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
   * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
 
+The backend is built, tested and archived on the following waterfall:
+  https://build.chromium.org/p/client.wasm.llvm/console
+
+The backend's bringup is done using the GCC torture test suite first since it
+doesn't require C library support. Current known failures are in
+known_gcc_test_failures.txt, all other tests should pass. The waterfall will
+turn red if not. Once most of these pass, further testing will use LLVM's own
+test suite. The tests can be run locally using:
+  github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py
+
 Interesting work that remains to be done:
 * Write a pass to restructurize irreducible control flow. This needs to be done
   before register allocation to be efficient, because it may duplicate basic
@@ -19,8 +29,60 @@ Interesting work that remains to be done:
   level. Note that LLVM's GPU code has such a pass, but it linearizes control
   flow (e.g. both sides of branches execute and are masked) which is undesirable
   for WebAssembly.
-* Basic relooper to expose control flow as an AST.
-* Figure out how to properly use MC for virtual ISAs. This may require some
-  refactoring of MC.
+
+//===---------------------------------------------------------------------===//
+
+set_local instructions have a return value. We should (a) model this,
+and (b) write optimizations which take advantage of it. Keep in mind that
+many set_local instructions are implicit!
+
+//===---------------------------------------------------------------------===//
+
+Br, br_if, and tableswitch instructions can support having a value on the
+expression stack across the jump (sometimes). We should (a) model this, and
+(b) extend the stackifier to utilize it.
+
+//===---------------------------------------------------------------------===//
+
+The min/max operators aren't exactly a<b?a:b because of NaN and negative zero
+behavior. The ARM target has the same kind of min/max instructions and has
+implemented optimizations for them; we should do similar optimizations for
+WebAssembly.
+
+//===---------------------------------------------------------------------===//
+
+AArch64 runs SeparateConstOffsetFromGEPPass, followed by EarlyCSE and LICM.
+Would these be useful to run for WebAssembly too? Also, it has an option to
+run SimplifyCFG after running the AtomicExpand pass. Would this be useful for
+us too?
+
+//===---------------------------------------------------------------------===//
+
+When is it profitable to set isAsCheapAsAMove on instructions in WebAssembly?
+
+//===---------------------------------------------------------------------===//
+
+Register stackification uses the EXPR_STACK physical register to impose
+ordering dependencies on instructions with stack operands. This is pessimistic;
+we should consider alternate ways to model stack dependencies.
+
+//===---------------------------------------------------------------------===//
+
+Lots of things could be done in WebAssemblyTargetTransformInfo.cpp. Similarly,
+there are numerous optimization-related hooks that can be overridden in
+WebAssemblyTargetLowering.
+
+//===---------------------------------------------------------------------===//
+
+Instead of the OptimizeReturned pass, which should consider preserving the
+"returned" attribute through to MachineInstrs and extending the StoreResults
+pass to do this optimization on calls too. That would also let the
+WebAssemblyPeephole pass clean up dead defs for such calls, as it does for
+stores.
+
+//===---------------------------------------------------------------------===//
+
+Memset/memcpy/memmove should be marked with the "returned" attribute somehow,
+even when they are translated through intrinsics.
 
 //===---------------------------------------------------------------------===//
diff --git a/lib/Target/WebAssembly/Relooper.cpp b/lib/Target/WebAssembly/Relooper.cpp
new file mode 100644
index 000000000000..9b718ef094aa
--- /dev/null
+++ b/lib/Target/WebAssembly/Relooper.cpp
@@ -0,0 +1,984 @@
+//===-- Relooper.cpp - Top-level interface for WebAssembly  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This implements the Relooper algorithm. This implementation includes
+/// optimizations added since the original academic paper [1] was published.
+///
+/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
+/// Proceedings of the ACM international conference companion on Object
+/// oriented programming systems languages and applications companion
+/// (SPLASH '11). ACM, New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
+/// http://doi.acm.org/10.1145/2048147.2048224
+///
+//===-------------------------------------------------------------------===//
+
+#include "Relooper.h"
+#include "WebAssembly.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstring>
+#include <cstdlib>
+#include <functional>
+#include <list>
+#include <stack>
+#include <string>
+
+#define DEBUG_TYPE "relooper"
+
+using namespace llvm;
+using namespace Relooper;
+
+static cl::opt<int> RelooperSplittingFactor(
+    "relooper-splitting-factor",
+    cl::desc(
+        "How much to discount code size when deciding whether to split a node"),
+    cl::init(5));
+
+static cl::opt<unsigned> RelooperMultipleSwitchThreshold(
+    "relooper-multiple-switch-threshold",
+    cl::desc(
+        "How many entries to allow in a multiple before we use a switch"),
+    cl::init(10));
+
+static cl::opt<unsigned> RelooperNestingLimit(
+    "relooper-nesting-limit",
+    cl::desc(
+        "How much nesting is acceptable"),
+    cl::init(20));
+
+
+namespace {
+///
+/// Implements the relooper algorithm for a function's blocks.
+///
+/// Implementation details: The Relooper instance has
+/// ownership of the blocks and shapes, and frees them when done.
+///
+struct RelooperAlgorithm {
+  std::deque<Block *> Blocks;
+  std::deque<Shape *> Shapes;
+  Shape *Root;
+  bool MinSize;
+  int BlockIdCounter;
+  int ShapeIdCounter;
+
+  RelooperAlgorithm();
+  ~RelooperAlgorithm();
+
+  void AddBlock(Block *New, int Id = -1);
+
+  // Calculates the shapes
+  void Calculate(Block *Entry);
+
+  // Sets us to try to minimize size
+  void SetMinSize(bool MinSize_) { MinSize = MinSize_; }
+};
+
+struct RelooperAnalysis final : public FunctionPass {
+  static char ID;
+  RelooperAnalysis() : FunctionPass(ID) {}
+  const char *getPassName() const override { return "relooper"; }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+  bool runOnFunction(Function &F) override;
+};
+}
+
+// RelooperAnalysis
+
+char RelooperAnalysis::ID = 0;
+FunctionPass *llvm::createWebAssemblyRelooper() {
+  return new RelooperAnalysis();
+}
+
+bool RelooperAnalysis::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "Relooping function '" << F.getName() << "'\n");
+  RelooperAlgorithm R;
+  // FIXME: remove duplication between relooper's and LLVM's BBs.
+  std::map<const BasicBlock *, Block *> BB2B;
+  std::map<const Block *, const BasicBlock *> B2BB;
+  for (const BasicBlock &BB : F) {
+    // FIXME: getName is wrong here, Code is meant to represent amount of code.
+    // FIXME: use BranchVarInit for switch.
+    Block *B = new Block(BB.getName().str().data(), /*BranchVarInit=*/nullptr);
+    R.AddBlock(B);
+    assert(BB2B.find(&BB) == BB2B.end() && "Inserting the same block twice");
+    assert(B2BB.find(B) == B2BB.end() && "Inserting the same block twice");
+    BB2B[&BB] = B;
+    B2BB[B] = &BB;
+  }
+  for (Block *B : R.Blocks) {
+    const BasicBlock *BB = B2BB[B];
+    for (const BasicBlock *Successor : successors(BB))
+      // FIXME: add branch's Condition and Code below.
+      B->AddBranchTo(BB2B[Successor], /*Condition=*/nullptr, /*Code=*/nullptr);
+  }
+  R.Calculate(BB2B[&F.getEntryBlock()]);
+  return false; // Analysis passes don't modify anything.
+}
+
+// Helpers
+
+typedef MapVector<Block *, BlockSet> BlockBlockSetMap;
+typedef std::list<Block *> BlockList;
+
+template <class T, class U>
+static bool contains(const T &container, const U &contained) {
+  return container.count(contained);
+}
+
+
+// Branch
+
+Branch::Branch(const char *ConditionInit, const char *CodeInit)
+    : Ancestor(nullptr), Labeled(true) {
+  // FIXME: move from char* to LLVM data structures
+  Condition = ConditionInit ? strdup(ConditionInit) : nullptr;
+  Code = CodeInit ? strdup(CodeInit) : nullptr;
+}
+
+Branch::~Branch() {
+  // FIXME: move from char* to LLVM data structures
+  free(static_cast<void *>(const_cast<char *>(Condition)));
+  free(static_cast<void *>(const_cast<char *>(Code)));
+}
+
+// Block
+
+Block::Block(const char *CodeInit, const char *BranchVarInit)
+    : Parent(nullptr), Id(-1), IsCheckedMultipleEntry(false) {
+  // FIXME: move from char* to LLVM data structures
+  Code = strdup(CodeInit);
+  BranchVar = BranchVarInit ? strdup(BranchVarInit) : nullptr;
+}
+
+Block::~Block() {
+  // FIXME: move from char* to LLVM data structures
+  free(static_cast<void *>(const_cast<char *>(Code)));
+  free(static_cast<void *>(const_cast<char *>(BranchVar)));
+}
+
+void Block::AddBranchTo(Block *Target, const char *Condition,
+                        const char *Code) {
+  assert(!contains(BranchesOut, Target) &&
+         "cannot add more than one branch to the same target");
+  BranchesOut[Target] = make_unique<Branch>(Condition, Code);
+}
+
+// Relooper
+
+RelooperAlgorithm::RelooperAlgorithm()
+    : Root(nullptr), MinSize(false), BlockIdCounter(1),
+      ShapeIdCounter(0) { // block ID 0 is reserved for clearings
+}
+
+RelooperAlgorithm::~RelooperAlgorithm() {
+  for (auto Curr : Blocks)
+    delete Curr;
+  for (auto Curr : Shapes)
+    delete Curr;
+}
+
+void RelooperAlgorithm::AddBlock(Block *New, int Id) {
+  New->Id = Id == -1 ? BlockIdCounter++ : Id;
+  Blocks.push_back(New);
+}
+
+struct RelooperRecursor {
+  RelooperAlgorithm *Parent;
+  RelooperRecursor(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+};
+
+void RelooperAlgorithm::Calculate(Block *Entry) {
+  // Scan and optimize the input
+  struct PreOptimizer : public RelooperRecursor {
+    PreOptimizer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+    BlockSet Live;
+
+    void FindLive(Block *Root) {
+      BlockList ToInvestigate;
+      ToInvestigate.push_back(Root);
+      while (!ToInvestigate.empty()) {
+        Block *Curr = ToInvestigate.front();
+        ToInvestigate.pop_front();
+        if (contains(Live, Curr))
+          continue;
+        Live.insert(Curr);
+        for (const auto &iter : Curr->BranchesOut)
+          ToInvestigate.push_back(iter.first);
+      }
+    }
+
+    // If a block has multiple entries but no exits, and it is small enough, it
+    // is useful to split it. A common example is a C++ function where
+    // everything ends up at a final exit block and does some RAII cleanup.
+    // Without splitting, we will be forced to introduce labelled loops to
+    // allow reaching the final block
+    void SplitDeadEnds() {
+      unsigned TotalCodeSize = 0;
+      for (const auto &Curr : Live) {
+        TotalCodeSize += strlen(Curr->Code);
+      }
+      BlockSet Splits;
+      BlockSet Removed;
+      for (const auto &Original : Live) {
+        if (Original->BranchesIn.size() <= 1 ||
+            !Original->BranchesOut.empty())
+          continue; // only dead ends, for now
+        if (contains(Original->BranchesOut, Original))
+          continue; // cannot split a looping node
+        if (strlen(Original->Code) * (Original->BranchesIn.size() - 1) >
+            TotalCodeSize / RelooperSplittingFactor)
+          continue; // if splitting increases raw code size by a significant
+                    // amount, abort
+        // Split the node (for simplicity, we replace all the blocks, even
+        // though we could have reused the original)
+        DEBUG(dbgs() << "  Splitting '" << Original->Code << "'\n");
+        for (const auto &Prior : Original->BranchesIn) {
+          Block *Split = new Block(Original->Code, Original->BranchVar);
+          Parent->AddBlock(Split, Original->Id);
+          Split->BranchesIn.insert(Prior);
+          std::unique_ptr<Branch> Details;
+          Details.swap(Prior->BranchesOut[Original]);
+          Prior->BranchesOut[Split] = make_unique<Branch>(Details->Condition,
+                                                          Details->Code);
+          for (const auto &iter : Original->BranchesOut) {
+            Block *Post = iter.first;
+            Branch *Details = iter.second.get();
+            Split->BranchesOut[Post] = make_unique<Branch>(Details->Condition,
+                                                           Details->Code);
+            Post->BranchesIn.insert(Split);
+          }
+          Splits.insert(Split);
+          Removed.insert(Original);
+        }
+        for (const auto &iter : Original->BranchesOut) {
+          Block *Post = iter.first;
+          Post->BranchesIn.remove(Original);
+        }
+      }
+      for (const auto &iter : Splits)
+        Live.insert(iter);
+      for (const auto &iter : Removed)
+        Live.remove(iter);
+    }
+  };
+  PreOptimizer Pre(this);
+  Pre.FindLive(Entry);
+
+  // Add incoming branches from live blocks, ignoring dead code
+  for (unsigned i = 0; i < Blocks.size(); i++) {
+    Block *Curr = Blocks[i];
+    if (!contains(Pre.Live, Curr))
+      continue;
+    for (const auto &iter : Curr->BranchesOut)
+      iter.first->BranchesIn.insert(Curr);
+  }
+
+  if (!MinSize)
+    Pre.SplitDeadEnds();
+
+  // Recursively process the graph
+
+  struct Analyzer : public RelooperRecursor {
+    Analyzer(RelooperAlgorithm *Parent) : RelooperRecursor(Parent) {}
+
+    // Add a shape to the list of shapes in this Relooper calculation
+    void Notice(Shape *New) {
+      New->Id = Parent->ShapeIdCounter++;
+      Parent->Shapes.push_back(New);
+    }
+
+    // Create a list of entries from a block. If LimitTo is provided, only
+    // results in that set will appear
+    void GetBlocksOut(Block *Source, BlockSet &Entries,
+                      BlockSet *LimitTo = nullptr) {
+      for (const auto &iter : Source->BranchesOut)
+        if (!LimitTo || contains(*LimitTo, iter.first))
+          Entries.insert(iter.first);
+    }
+
+    // Converts/processes all branchings to a specific target
+    void Solipsize(Block *Target, Branch::FlowType Type, Shape *Ancestor,
+                   BlockSet &From) {
+      DEBUG(dbgs() << "  Solipsize '" << Target->Code << "' type " << Type
+                   << "\n");
+      for (auto iter = Target->BranchesIn.begin();
+           iter != Target->BranchesIn.end();) {
+        Block *Prior = *iter;
+        if (!contains(From, Prior)) {
+          iter++;
+          continue;
+        }
+        std::unique_ptr<Branch> PriorOut;
+        PriorOut.swap(Prior->BranchesOut[Target]);
+        PriorOut->Ancestor = Ancestor;
+        PriorOut->Type = Type;
+        if (MultipleShape *Multiple = dyn_cast<MultipleShape>(Ancestor))
+          Multiple->Breaks++; // We are breaking out of this Multiple, so need a
+                              // loop
+        iter++; // carefully increment iter before erasing
+        Target->BranchesIn.remove(Prior);
+        Target->ProcessedBranchesIn.insert(Prior);
+        Prior->ProcessedBranchesOut[Target].swap(PriorOut);
+      }
+    }
+
+    Shape *MakeSimple(BlockSet &Blocks, Block *Inner, BlockSet &NextEntries) {
+      DEBUG(dbgs() << "  MakeSimple inner block '" << Inner->Code << "'\n");
+      SimpleShape *Simple = new SimpleShape;
+      Notice(Simple);
+      Simple->Inner = Inner;
+      Inner->Parent = Simple;
+      if (Blocks.size() > 1) {
+        Blocks.remove(Inner);
+        GetBlocksOut(Inner, NextEntries, &Blocks);
+        BlockSet JustInner;
+        JustInner.insert(Inner);
+        for (const auto &iter : NextEntries)
+          Solipsize(iter, Branch::Direct, Simple, JustInner);
+      }
+      return Simple;
+    }
+
+    Shape *MakeLoop(BlockSet &Blocks, BlockSet &Entries,
+                    BlockSet &NextEntries) {
+      // Find the inner blocks in this loop. Proceed backwards from the entries
+      // until
+      // you reach a seen block, collecting as you go.
+      BlockSet InnerBlocks;
+      BlockSet Queue = Entries;
+      while (!Queue.empty()) {
+        Block *Curr = *(Queue.begin());
+        Queue.remove(*Queue.begin());
+        if (!contains(InnerBlocks, Curr)) {
+          // This element is new, mark it as inner and remove from outer
+          InnerBlocks.insert(Curr);
+          Blocks.remove(Curr);
+          // Add the elements prior to it
+          for (const auto &iter : Curr->BranchesIn)
+            Queue.insert(iter);
+        }
+      }
+      assert(!InnerBlocks.empty());
+
+      for (const auto &Curr : InnerBlocks) {
+        for (const auto &iter : Curr->BranchesOut) {
+          Block *Possible = iter.first;
+          if (!contains(InnerBlocks, Possible))
+            NextEntries.insert(Possible);
+        }
+      }
+
+      LoopShape *Loop = new LoopShape();
+      Notice(Loop);
+
+      // Solipsize the loop, replacing with break/continue and marking branches
+      // as Processed (will not affect later calculations)
+      // A. Branches to the loop entries become a continue to this shape
+      for (const auto &iter : Entries)
+        Solipsize(iter, Branch::Continue, Loop, InnerBlocks);
+      // B. Branches to outside the loop (a next entry) become breaks on this
+      // shape
+      for (const auto &iter : NextEntries)
+        Solipsize(iter, Branch::Break, Loop, InnerBlocks);
+      // Finish up
+      Shape *Inner = Process(InnerBlocks, Entries, nullptr);
+      Loop->Inner = Inner;
+      return Loop;
+    }
+
+    // For each entry, find the independent group reachable by it. The
+    // independent group is the entry itself, plus all the blocks it can
+    // reach that cannot be directly reached by another entry. Note that we
+    // ignore directly reaching the entry itself by another entry.
+    //   @param Ignore - previous blocks that are irrelevant
+    void FindIndependentGroups(BlockSet &Entries,
+                               BlockBlockSetMap &IndependentGroups,
+                               BlockSet *Ignore = nullptr) {
+      typedef std::map<Block *, Block *> BlockBlockMap;
+
+      struct HelperClass {
+        BlockBlockSetMap &IndependentGroups;
+        BlockBlockMap Ownership; // For each block, which entry it belongs to.
+                                 // We have reached it from there.
+
+        HelperClass(BlockBlockSetMap &IndependentGroupsInit)
+            : IndependentGroups(IndependentGroupsInit) {}
+        void InvalidateWithChildren(Block *New) {
+          // Being in the list means you need to be invalidated
+          BlockList ToInvalidate;
+          ToInvalidate.push_back(New);
+          while (!ToInvalidate.empty()) {
+            Block *Invalidatee = ToInvalidate.front();
+            ToInvalidate.pop_front();
+            Block *Owner = Ownership[Invalidatee];
+            // Owner may have been invalidated, do not add to
+            // IndependentGroups!
+            if (contains(IndependentGroups, Owner))
+              IndependentGroups[Owner].remove(Invalidatee);
+            if (Ownership[Invalidatee]) { // may have been seen before and
+                                          // invalidated already
+              Ownership[Invalidatee] = nullptr;
+              for (const auto &iter : Invalidatee->BranchesOut) {
+                Block *Target = iter.first;
+                BlockBlockMap::iterator Known = Ownership.find(Target);
+                if (Known != Ownership.end()) {
+                  Block *TargetOwner = Known->second;
+                  if (TargetOwner)
+                    ToInvalidate.push_back(Target);
+                }
+              }
+            }
+          }
+        }
+      };
+      HelperClass Helper(IndependentGroups);
+
+      // We flow out from each of the entries, simultaneously.
+      // When we reach a new block, we add it as belonging to the one we got to
+      // it from.
+      // If we reach a new block that is already marked as belonging to someone,
+      // it is reachable by two entries and is not valid for any of them.
+      // Remove it and all it can reach that have been visited.
+
+      // Being in the queue means we just added this item, and
+      // we need to add its children
+      BlockList Queue;
+      for (const auto &Entry : Entries) {
+        Helper.Ownership[Entry] = Entry;
+        IndependentGroups[Entry].insert(Entry);
+        Queue.push_back(Entry);
+      }
+      while (!Queue.empty()) {
+        Block *Curr = Queue.front();
+        Queue.pop_front();
+        Block *Owner = Helper.Ownership[Curr]; // Curr must be in the ownership
+                                               // map if we are in the queue
+        if (!Owner)
+          continue; // we have been invalidated meanwhile after being reached
+                    // from two entries
+        // Add all children
+        for (const auto &iter : Curr->BranchesOut) {
+          Block *New = iter.first;
+          BlockBlockMap::iterator Known = Helper.Ownership.find(New);
+          if (Known == Helper.Ownership.end()) {
+            // New node. Add it, and put it in the queue
+            Helper.Ownership[New] = Owner;
+            IndependentGroups[Owner].insert(New);
+            Queue.push_back(New);
+            continue;
+          }
+          Block *NewOwner = Known->second;
+          if (!NewOwner)
+            continue; // We reached an invalidated node
+          if (NewOwner != Owner)
+            // Invalidate this and all reachable that we have seen - we reached
+            // this from two locations
+            Helper.InvalidateWithChildren(New);
+          // otherwise, we have the same owner, so do nothing
+        }
+      }
+
+      // Having processed all the interesting blocks, we remain with just one
+      // potential issue:
+      // If a->b, and a was invalidated, but then b was later reached by
+      // someone else, we must invalidate b. To check for this, we go over all
+      // elements in the independent groups, if an element has a parent which
+      // does *not* have the same owner, we/ must remove it and all its
+      // children.
+
+      for (const auto &iter : Entries) {
+        BlockSet &CurrGroup = IndependentGroups[iter];
+        BlockList ToInvalidate;
+        for (const auto &iter : CurrGroup) {
+          Block *Child = iter;
+          for (const auto &iter : Child->BranchesIn) {
+            Block *Parent = iter;
+            if (Ignore && contains(*Ignore, Parent))
+              continue;
+            if (Helper.Ownership[Parent] != Helper.Ownership[Child])
+              ToInvalidate.push_back(Child);
+          }
+        }
+        while (!ToInvalidate.empty()) {
+          Block *Invalidatee = ToInvalidate.front();
+          ToInvalidate.pop_front();
+          Helper.InvalidateWithChildren(Invalidatee);
+        }
+      }
+
+      // Remove empty groups
+      for (const auto &iter : Entries)
+        if (IndependentGroups[iter].empty())
+          IndependentGroups.erase(iter);
+    }
+
+    Shape *MakeMultiple(BlockSet &Blocks, BlockSet &Entries,
+                        BlockBlockSetMap &IndependentGroups, Shape *Prev,
+                        BlockSet &NextEntries) {
+      bool Fused = isa<SimpleShape>(Prev);
+      MultipleShape *Multiple = new MultipleShape();
+      Notice(Multiple);
+      BlockSet CurrEntries;
+      for (auto &iter : IndependentGroups) {
+        Block *CurrEntry = iter.first;
+        BlockSet &CurrBlocks = iter.second;
+        // Create inner block
+        CurrEntries.clear();
+        CurrEntries.insert(CurrEntry);
+        for (const auto &CurrInner : CurrBlocks) {
+          // Remove the block from the remaining blocks
+          Blocks.remove(CurrInner);
+          // Find new next entries and fix branches to them
+          for (auto iter = CurrInner->BranchesOut.begin();
+               iter != CurrInner->BranchesOut.end();) {
+            Block *CurrTarget = iter->first;
+            auto Next = iter;
+            Next++;
+            if (!contains(CurrBlocks, CurrTarget)) {
+              NextEntries.insert(CurrTarget);
+              Solipsize(CurrTarget, Branch::Break, Multiple, CurrBlocks);
+            }
+            iter = Next; // increment carefully because Solipsize can remove us
+          }
+        }
+        Multiple->InnerMap[CurrEntry->Id] =
+            Process(CurrBlocks, CurrEntries, nullptr);
+        // If we are not fused, then our entries will actually be checked
+        if (!Fused)
+          CurrEntry->IsCheckedMultipleEntry = true;
+      }
+      // Add entries not handled as next entries, they are deferred
+      for (const auto &Entry : Entries)
+        if (!contains(IndependentGroups, Entry))
+          NextEntries.insert(Entry);
+      // The multiple has been created, we can decide how to implement it
+      if (Multiple->InnerMap.size() >= RelooperMultipleSwitchThreshold) {
+        Multiple->UseSwitch = true;
+        Multiple->Breaks++; // switch captures breaks
+      }
+      return Multiple;
+    }
+
+    // Main function.
+    // Process a set of blocks with specified entries, returns a shape
+    // The Make* functions receive a NextEntries. If they fill it with data,
+    // those are the entries for the ->Next block on them, and the blocks
+    // are what remains in Blocks (which Make* modify). In this way
+    // we avoid recursing on Next (imagine a long chain of Simples, if we
+    // recursed we could blow the stack).
+    Shape *Process(BlockSet &Blocks, BlockSet &InitialEntries, Shape *Prev) {
+      BlockSet *Entries = &InitialEntries;
+      BlockSet TempEntries[2];
+      int CurrTempIndex = 0;
+      BlockSet *NextEntries;
+      Shape *Ret = nullptr;
+
+      auto Make = [&](Shape *Temp) {
+        if (Prev)
+          Prev->Next = Temp;
+        if (!Ret)
+          Ret = Temp;
+        Prev = Temp;
+        Entries = NextEntries;
+      };
+
+      while (1) {
+        CurrTempIndex = 1 - CurrTempIndex;
+        NextEntries = &TempEntries[CurrTempIndex];
+        NextEntries->clear();
+
+        if (Entries->empty())
+          return Ret;
+        if (Entries->size() == 1) {
+          Block *Curr = *(Entries->begin());
+          if (Curr->BranchesIn.empty()) {
+            // One entry, no looping ==> Simple
+            Make(MakeSimple(Blocks, Curr, *NextEntries));
+            if (NextEntries->empty())
+              return Ret;
+            continue;
+          }
+          // One entry, looping ==> Loop
+          Make(MakeLoop(Blocks, *Entries, *NextEntries));
+          if (NextEntries->empty())
+            return Ret;
+          continue;
+        }
+
+        // More than one entry, try to eliminate through a Multiple groups of
+        // independent blocks from an entry/ies. It is important to remove
+        // through multiples as opposed to looping since the former is more
+        // performant.
+        BlockBlockSetMap IndependentGroups;
+        FindIndependentGroups(*Entries, IndependentGroups);
+
+        if (!IndependentGroups.empty()) {
+          // We can handle a group in a multiple if its entry cannot be reached
+          // by another group.
+          // Note that it might be reachable by itself - a loop. But that is
+          // fine, we will create a loop inside the multiple block (which
+          // is the performant order to do it).
+          for (auto iter = IndependentGroups.begin();
+               iter != IndependentGroups.end();) {
+            Block *Entry = iter->first;
+            BlockSet &Group = iter->second;
+            auto curr = iter++; // iterate carefully, we may delete
+            for (BlockSet::iterator iterBranch = Entry->BranchesIn.begin();
+                 iterBranch != Entry->BranchesIn.end(); iterBranch++) {
+              Block *Origin = *iterBranch;
+              if (!contains(Group, Origin)) {
+                // Reached from outside the group, so we cannot handle this
+                IndependentGroups.erase(curr);
+                break;
+              }
+            }
+          }
+
+          // As an optimization, if we have 2 independent groups, and one is a
+          // small dead end, we can handle only that dead end.
+          // The other then becomes a Next - without nesting in the code and
+          // recursion in the analysis.
+          // TODO: if the larger is the only dead end, handle that too
+          // TODO: handle >2 groups
+          // TODO: handle not just dead ends, but also that do not branch to the
+          // NextEntries. However, must be careful there since we create a
+          // Next, and that Next can prevent eliminating a break (since we no
+          // longer naturally reach the same place), which may necessitate a
+          // one-time loop, which makes the unnesting pointless.
+          if (IndependentGroups.size() == 2) {
+            // Find the smaller one
+            auto iter = IndependentGroups.begin();
+            Block *SmallEntry = iter->first;
+            auto SmallSize = iter->second.size();
+            iter++;
+            Block *LargeEntry = iter->first;
+            auto LargeSize = iter->second.size();
+            if (SmallSize != LargeSize) { // ignore the case where they are
+                                          // identical - keep things symmetrical
+                                          // there
+              if (SmallSize > LargeSize) {
+                Block *Temp = SmallEntry;
+                SmallEntry = LargeEntry;
+                LargeEntry = Temp; // Note: we did not flip the Sizes too, they
+                                   // are now invalid. TODO: use the smaller
+                                   // size as a limit?
+              }
+              // Check if dead end
+              bool DeadEnd = true;
+              BlockSet &SmallGroup = IndependentGroups[SmallEntry];
+              for (const auto &Curr : SmallGroup) {
+                for (const auto &iter : Curr->BranchesOut) {
+                  Block *Target = iter.first;
+                  if (!contains(SmallGroup, Target)) {
+                    DeadEnd = false;
+                    break;
+                  }
+                }
+                if (!DeadEnd)
+                  break;
+              }
+              if (DeadEnd)
+                IndependentGroups.erase(LargeEntry);
+            }
+          }
+
+          if (!IndependentGroups.empty())
+            // Some groups removable ==> Multiple
+            Make(MakeMultiple(Blocks, *Entries, IndependentGroups, Prev,
+                              *NextEntries));
+            if (NextEntries->empty())
+              return Ret;
+            continue;
+        }
+        // No independent groups, must be loopable ==> Loop
+        Make(MakeLoop(Blocks, *Entries, *NextEntries));
+        if (NextEntries->empty())
+          return Ret;
+        continue;
+      }
+    }
+  };
+
+  // Main
+
+  BlockSet AllBlocks;
+  for (const auto &Curr : Pre.Live) {
+    AllBlocks.insert(Curr);
+  }
+
+  BlockSet Entries;
+  Entries.insert(Entry);
+  Root = Analyzer(this).Process(AllBlocks, Entries, nullptr);
+  assert(Root);
+
+  ///
+  /// Relooper post-optimizer
+  ///
+  struct PostOptimizer {
+    RelooperAlgorithm *Parent;
+    std::stack<Shape *> LoopStack;
+
+    PostOptimizer(RelooperAlgorithm *ParentInit) : Parent(ParentInit) {}
+
+    void ShapeSwitch(Shape* var,
+                     std::function<void (SimpleShape*)> simple,
+                     std::function<void (MultipleShape*)> multiple,
+                     std::function<void (LoopShape*)> loop) {
+      switch (var->getKind()) {
+        case Shape::SK_Simple: {
+          simple(cast<SimpleShape>(var));
+          break;
+        }
+        case Shape::SK_Multiple: {
+          multiple(cast<MultipleShape>(var));
+          break;
+        }
+        case Shape::SK_Loop: {
+          loop(cast<LoopShape>(var));
+          break;
+        }
+      }
+    }
+
+    // Find the blocks that natural control flow can get us directly to, or
+    // through a multiple that we ignore
+    void FollowNaturalFlow(Shape *S, BlockSet &Out) {
+      ShapeSwitch(S, [&](SimpleShape* Simple) {
+        Out.insert(Simple->Inner);
+      }, [&](MultipleShape* Multiple) {
+        for (const auto &iter : Multiple->InnerMap) {
+          FollowNaturalFlow(iter.second, Out);
+        }
+        FollowNaturalFlow(Multiple->Next, Out);
+      }, [&](LoopShape* Loop) {
+        FollowNaturalFlow(Loop->Inner, Out);
+      });
+    }
+
+    void FindNaturals(Shape *Root, Shape *Otherwise = nullptr) {
+      if (Root->Next) {
+        Root->Natural = Root->Next;
+        FindNaturals(Root->Next, Otherwise);
+      } else {
+        Root->Natural = Otherwise;
+      }
+
+      ShapeSwitch(Root, [](SimpleShape* Simple) {
+      }, [&](MultipleShape* Multiple) {
+        for (const auto &iter : Multiple->InnerMap) {
+          FindNaturals(iter.second, Root->Natural);
+        }
+      }, [&](LoopShape* Loop){
+        FindNaturals(Loop->Inner, Loop->Inner);
+      });
+    }
+
+    // Remove unneeded breaks and continues.
+    // A flow operation is trivially unneeded if the shape we naturally get to
+    // by normal code execution is the same as the flow forces us to.
+    void RemoveUnneededFlows(Shape *Root, Shape *Natural = nullptr,
+                             LoopShape *LastLoop = nullptr,
+                             unsigned Depth = 0) {
+      BlockSet NaturalBlocks;
+      FollowNaturalFlow(Natural, NaturalBlocks);
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = nullptr;
+        ShapeSwitch(
+            Root,
+            [&](SimpleShape* Simple) {
+              if (Simple->Inner->BranchVar)
+                LastLoop =
+                    nullptr; // a switch clears out the loop (TODO: only for
+                             // breaks, not continue)
+
+              if (Simple->Next) {
+                if (!Simple->Inner->BranchVar &&
+                    Simple->Inner->ProcessedBranchesOut.size() == 2 &&
+                    Depth < RelooperNestingLimit) {
+                  // If there is a next block, we already know at Simple
+                  // creation time to make direct branches, and we can do
+                  // nothing more in general. But, we try to optimize the
+                  // case of a break and a direct: This would normally be
+                  //   if (break?) { break; } ..
+                  // but if we make sure to nest the else, we can save the
+                  // break,
+                  //   if (!break?) { .. }
+                  // This is also better because the more canonical nested
+                  // form is easier to further optimize later. The
+                  // downside is more nesting, which adds to size in builds with
+                  // whitespace.
+                  // Note that we avoid switches, as it complicates control flow
+                  // and is not relevant for the common case we optimize here.
+                  bool Found = false;
+                  bool Abort = false;
+                  for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                    Block *Target = iter.first;
+                    Branch *Details = iter.second.get();
+                    if (Details->Type == Branch::Break) {
+                      Found = true;
+                      if (!contains(NaturalBlocks, Target))
+                        Abort = true;
+                    } else if (Details->Type != Branch::Direct)
+                      Abort = true;
+                  }
+                  if (Found && !Abort) {
+                    for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                      Branch *Details = iter.second.get();
+                      if (Details->Type == Branch::Break) {
+                        Details->Type = Branch::Direct;
+                        if (MultipleShape *Multiple =
+                                dyn_cast<MultipleShape>(Details->Ancestor))
+                          Multiple->Breaks--;
+                      } else {
+                        assert(Details->Type == Branch::Direct);
+                        Details->Type = Branch::Nested;
+                      }
+                    }
+                  }
+                  Depth++; // this optimization increases depth, for us and all
+                           // our next chain (i.e., until this call returns)
+                }
+                Next = Simple->Next;
+              } else {
+                // If there is no next then Natural is where we will
+                // go to by doing nothing, so we can potentially optimize some
+                // branches to direct.
+                for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+                  Block *Target = iter.first;
+                  Branch *Details = iter.second.get();
+                  if (Details->Type != Branch::Direct &&
+                      contains(NaturalBlocks,
+                               Target)) { // note: cannot handle split blocks
+                    Details->Type = Branch::Direct;
+                    if (MultipleShape *Multiple =
+                            dyn_cast<MultipleShape>(Details->Ancestor))
+                      Multiple->Breaks--;
+                  } else if (Details->Type == Branch::Break && LastLoop &&
+                             LastLoop->Natural == Details->Ancestor->Natural) {
+                    // it is important to simplify breaks, as simpler breaks
+                    // enable other optimizations
+                    Details->Labeled = false;
+                    if (MultipleShape *Multiple =
+                            dyn_cast<MultipleShape>(Details->Ancestor))
+                      Multiple->Breaks--;
+                  }
+                }
+              }
+            }, [&](MultipleShape* Multiple)
+            {
+              for (const auto &iter : Multiple->InnerMap) {
+                RemoveUnneededFlows(iter.second, Multiple->Next,
+                                    Multiple->Breaks ? nullptr : LastLoop,
+                                    Depth + 1);
+              }
+              Next = Multiple->Next;
+            }, [&](LoopShape* Loop)
+            {
+              RemoveUnneededFlows(Loop->Inner, Loop->Inner, Loop, Depth + 1);
+              Next = Loop->Next;
+            });
+      }
+    }
+
+    // After we know which loops exist, we can calculate which need to be
+    // labeled
+    void FindLabeledLoops(Shape *Root) {
+      Shape *Next = Root;
+      while (Next) {
+        Root = Next;
+        Next = nullptr;
+
+        ShapeSwitch(
+            Root,
+            [&](SimpleShape *Simple) {
+          MultipleShape *Fused = dyn_cast<MultipleShape>(Root->Next);
+          // If we are fusing a Multiple with a loop into this Simple, then
+          // visit it now
+          if (Fused && Fused->Breaks)
+            LoopStack.push(Fused);
+          if (Simple->Inner->BranchVar)
+            LoopStack.push(nullptr); // a switch means breaks are now useless,
+                                     // push a dummy
+          if (Fused) {
+            if (Fused->UseSwitch)
+              LoopStack.push(nullptr); // a switch means breaks are now
+                                       // useless, push a dummy
+            for (const auto &iter : Fused->InnerMap) {
+              FindLabeledLoops(iter.second);
+            }
+          }
+          for (const auto &iter : Simple->Inner->ProcessedBranchesOut) {
+            Branch *Details = iter.second.get();
+            if (Details->Type == Branch::Break ||
+                Details->Type == Branch::Continue) {
+              assert(!LoopStack.empty());
+              if (Details->Ancestor != LoopStack.top() && Details->Labeled) {
+                if (MultipleShape *Multiple =
+                        dyn_cast<MultipleShape>(Details->Ancestor)) {
+                  Multiple->Labeled = true;
+                } else {
+                  LoopShape *Loop = cast<LoopShape>(Details->Ancestor);
+                  Loop->Labeled = true;
+                }
+              } else {
+                Details->Labeled = false;
+              }
+            }
+            if (Fused && Fused->UseSwitch)
+              LoopStack.pop();
+            if (Simple->Inner->BranchVar)
+              LoopStack.pop();
+            if (Fused && Fused->Breaks)
+              LoopStack.pop();
+            if (Fused)
+              Next = Fused->Next;
+            else
+              Next = Root->Next;
+          }
+          }
+          , [&](MultipleShape* Multiple) {
+            if (Multiple->Breaks)
+              LoopStack.push(Multiple);
+            for (const auto &iter : Multiple->InnerMap)
+              FindLabeledLoops(iter.second);
+            if (Multiple->Breaks)
+              LoopStack.pop();
+            Next = Root->Next;
+          }
+          , [&](LoopShape* Loop) {
+            LoopStack.push(Loop);
+            FindLabeledLoops(Loop->Inner);
+            LoopStack.pop();
+            Next = Root->Next;
+          });
+      }
+    }
+
+    void Process(Shape * Root) {
+      FindNaturals(Root);
+      RemoveUnneededFlows(Root);
+      FindLabeledLoops(Root);
+    }
+  };
+
+  PostOptimizer(this).Process(Root);
+}
diff --git a/lib/Target/WebAssembly/Relooper.h b/lib/Target/WebAssembly/Relooper.h
new file mode 100644
index 000000000000..7c564de82f34
--- /dev/null
+++ b/lib/Target/WebAssembly/Relooper.h
@@ -0,0 +1,186 @@
+//===-- Relooper.h - Top-level interface for WebAssembly  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===-------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This defines an optimized C++ implemention of the Relooper
+/// algorithm, originally developed as part of Emscripten, which
+/// generates a structured AST from arbitrary control flow.
+///
+//===-------------------------------------------------------------------===//
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Casting.h"
+
+#include <cassert>
+#include <cstdarg>
+#include <cstdio>
+#include <deque>
+#include <list>
+#include <map>
+#include <memory>
+#include <set>
+
+namespace llvm {
+
+namespace Relooper {
+
+struct Block;
+struct Shape;
+
+///
+/// Info about a branching from one block to another
+///
+struct Branch {
+  enum FlowType {
+    Direct = 0, // We will directly reach the right location through other
+                // means, no need for continue or break
+    Break = 1,
+    Continue = 2,
+    Nested = 3 // This code is directly reached, but we must be careful to
+               // ensure it is nested in an if - it is not reached
+    // unconditionally, other code paths exist alongside it that we need to make
+    // sure do not intertwine
+  };
+  Shape
+      *Ancestor; // If not nullptr, this shape is the relevant one for purposes
+                 // of getting to the target block. We break or continue on it
+  Branch::FlowType
+      Type;     // If Ancestor is not nullptr, this says whether to break or
+                // continue
+  bool Labeled; // If a break or continue, whether we need to use a label
+  const char *Condition; // The condition for which we branch. For example,
+                         // "my_var == 1". Conditions are checked one by one.
+                         // One of the conditions should have nullptr as the
+                         // condition, in which case it is the default
+                         // FIXME: move from char* to LLVM data structures
+  const char *Code; // If provided, code that is run right before the branch is
+                    // taken. This is useful for phis
+                    // FIXME: move from char* to LLVM data structures
+
+  Branch(const char *ConditionInit, const char *CodeInit = nullptr);
+  ~Branch();
+};
+
+typedef SetVector<Block *> BlockSet;
+typedef MapVector<Block *, Branch *> BlockBranchMap;
+typedef MapVector<Block *, std::unique_ptr<Branch>> OwningBlockBranchMap;
+
+///
+/// Represents a basic block of code - some instructions that end with a
+/// control flow modifier (a branch, return or throw).
+///
+struct Block {
+  // Branches become processed after we finish the shape relevant to them. For
+  // example, when we recreate a loop, branches to the loop start become
+  // continues and are now processed. When we calculate what shape to generate
+  // from a set of blocks, we ignore processed branches. Blocks own the Branch
+  // objects they use, and destroy them when done.
+  OwningBlockBranchMap BranchesOut;
+  BlockSet BranchesIn;
+  OwningBlockBranchMap ProcessedBranchesOut;
+  BlockSet ProcessedBranchesIn;
+  Shape *Parent; // The shape we are directly inside
+  int Id; // A unique identifier, defined when added to relooper. Note that this
+          // uniquely identifies a *logical* block - if we split it, the two
+          // instances have the same content *and* the same Id
+  const char *Code;      // The string representation of the code in this block.
+                         // Owning pointer (we copy the input)
+                         // FIXME: move from char* to LLVM data structures
+  const char *BranchVar; // A variable whose value determines where we go; if
+                         // this is not nullptr, emit a switch on that variable
+                         // FIXME: move from char* to LLVM data structures
+  bool IsCheckedMultipleEntry; // If true, we are a multiple entry, so reaching
+                               // us requires setting the label variable
+
+  Block(const char *CodeInit, const char *BranchVarInit);
+  ~Block();
+
+  void AddBranchTo(Block *Target, const char *Condition,
+                   const char *Code = nullptr);
+};
+
+///
+/// Represents a structured control flow shape
+///
+struct Shape {
+  int Id; // A unique identifier. Used to identify loops, labels are Lx where x
+          // is the Id. Defined when added to relooper
+  Shape *Next;    // The shape that will appear in the code right after this one
+  Shape *Natural; // The shape that control flow gets to naturally (if there is
+                  // Next, then this is Next)
+
+  /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.)
+  enum ShapeKind { SK_Simple, SK_Multiple, SK_Loop };
+
+private:
+  ShapeKind Kind;
+
+public:
+  ShapeKind getKind() const { return Kind; }
+
+  Shape(ShapeKind KindInit) : Id(-1), Next(nullptr), Kind(KindInit) {}
+};
+
+///
+/// Simple: No control flow at all, just instructions.
+///
+struct SimpleShape : public Shape {
+  Block *Inner;
+
+  SimpleShape() : Shape(SK_Simple), Inner(nullptr) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Simple; }
+};
+
+///
+/// A shape that may be implemented with a labeled loop.
+///
+struct LabeledShape : public Shape {
+  bool Labeled; // If we have a loop, whether it needs to be labeled
+
+  LabeledShape(ShapeKind KindInit) : Shape(KindInit), Labeled(false) {}
+};
+
+// Blocks with the same id were split and are identical, so we just care about
+// ids in Multiple entries
+typedef std::map<int, Shape *> IdShapeMap;
+
+///
+/// Multiple: A shape with more than one entry. If the next block to
+///           be entered is among them, we run it and continue to
+///           the next shape, otherwise we continue immediately to the
+///           next shape.
+///
+struct MultipleShape : public LabeledShape {
+  IdShapeMap InnerMap; // entry block ID -> shape
+  int Breaks; // If we have branches on us, we need a loop (or a switch). This
+              // is a counter of requirements,
+              // if we optimize it to 0, the loop is unneeded
+  bool UseSwitch; // Whether to switch on label as opposed to an if-else chain
+
+  MultipleShape() : LabeledShape(SK_Multiple), Breaks(0), UseSwitch(false) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Multiple; }
+};
+
+///
+/// Loop: An infinite loop.
+///
+struct LoopShape : public LabeledShape {
+  Shape *Inner;
+
+  LoopShape() : LabeledShape(SK_Loop), Inner(nullptr) {}
+
+  static bool classof(const Shape *S) { return S->getKind() == SK_Loop; }
+};
+
+} // namespace Relooper
+
+} // namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssembly.h b/lib/Target/WebAssembly/WebAssembly.h
index 3ff19d46f437..e972da5af74f 100644
--- a/lib/Target/WebAssembly/WebAssembly.h
+++ b/lib/Target/WebAssembly/WebAssembly.h
@@ -23,8 +23,22 @@ namespace llvm {
 class WebAssemblyTargetMachine;
 class FunctionPass;
 
+FunctionPass *createWebAssemblyOptimizeReturned();
+
 FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
+FunctionPass *createWebAssemblyArgumentMove();
+
+FunctionPass *createWebAssemblyStoreResults();
+FunctionPass *createWebAssemblyRegStackify();
+FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyPEI();
+FunctionPass *createWebAssemblyCFGStackify();
+FunctionPass *createWebAssemblyLowerBrUnless();
+FunctionPass *createWebAssemblyRegNumbering();
+FunctionPass *createWebAssemblyPeephole();
+
+FunctionPass *createWebAssemblyRelooper();
 
 } // end namespace llvm
 
diff --git a/lib/Target/WebAssembly/WebAssembly.td b/lib/Target/WebAssembly/WebAssembly.td
index a123bf6f66b6..551ad9345154 100644
--- a/lib/Target/WebAssembly/WebAssembly.td
+++ b/lib/Target/WebAssembly/WebAssembly.td
@@ -6,10 +6,11 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-// This is a target description file for the WebAssembly architecture, which is
-// also known as "wasm".
-//
+///
+/// \file
+/// \brief This is a target description file for the WebAssembly architecture,
+/// which is also known as "wasm".
+///
 //===----------------------------------------------------------------------===//
 
 //===----------------------------------------------------------------------===//
@@ -50,6 +51,9 @@ def WebAssemblyInstrInfo : InstrInfo;
 // Minimal Viable Product.
 def : ProcessorModel<"mvp", NoSchedModel, []>;
 
+// Generic processor: latest stable version.
+def : ProcessorModel<"generic", NoSchedModel, []>;
+
 // Latest and greatest experimental version of WebAssembly. Bugs included!
 def : ProcessorModel<"bleeding-edge", NoSchedModel, [FeatureSIMD128]>;
 
diff --git a/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
new file mode 100644
index 000000000000..3893c408cf63
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -0,0 +1,110 @@
+//===-- WebAssemblyArgumentMove.cpp - Argument instruction moving ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file moves ARGUMENT instructions after ScheduleDAG scheduling.
+///
+/// Arguments are really live-in registers, however, since we use virtual
+/// registers and LLVM doesn't support live-in virtual registers, we're
+/// currently making do with ARGUMENT instructions which are placed at the top
+/// of the entry block. The trick is to get them to *stay* at the top of the
+/// entry block.
+///
+/// The ARGUMENTS physical register keeps these instructions pinned in place
+/// during liveness-aware CodeGen passes, however one thing which does not
+/// respect this is the ScheduleDAG scheduler. This pass is therefore run
+/// immediately after that.
+///
+/// This is all hopefully a temporary solution until we find a better solution
+/// for describing the live-in nature of arguments.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-argument-move"
+
+namespace {
+class WebAssemblyArgumentMove final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyArgumentMove() : MachineFunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "WebAssembly Argument Move";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // end anonymous namespace
+
+char WebAssemblyArgumentMove::ID = 0;
+FunctionPass *llvm::createWebAssemblyArgumentMove() {
+  return new WebAssemblyArgumentMove();
+}
+
+/// Test whether the given instruction is an ARGUMENT.
+static bool IsArgument(const MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case WebAssembly::ARGUMENT_I32:
+  case WebAssembly::ARGUMENT_I64:
+  case WebAssembly::ARGUMENT_F32:
+  case WebAssembly::ARGUMENT_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Argument Move **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  bool Changed = false;
+  MachineBasicBlock &EntryMBB = MF.front();
+  MachineBasicBlock::iterator InsertPt = EntryMBB.end();
+
+  // Look for the first NonArg instruction.
+  for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) {
+    MachineInstr *MI = MII;
+    if (!IsArgument(MI)) {
+      InsertPt = MII;
+      break;
+    }
+  }
+
+  // Now move any argument instructions later in the block
+  // to before our first NonArg instruction.
+  for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) {
+    MachineInstr *MI = I;
+    if (IsArgument(MI)) {
+      EntryMBB.insert(InsertPt, MI->removeFromParent());
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
new file mode 100644
index 000000000000..0d2b4d9debb9
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -0,0 +1,285 @@
+//===-- WebAssemblyAsmPrinter.cpp - WebAssembly LLVM assembly writer ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains a printer that converts from our internal
+/// representation of machine-dependent LLVM code to the WebAssembly assembly
+/// language.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyRegisterInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+
+class WebAssemblyAsmPrinter final : public AsmPrinter {
+  const MachineRegisterInfo *MRI;
+  const WebAssemblyFunctionInfo *MFI;
+
+public:
+  WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
+
+private:
+  const char *getPassName() const override {
+    return "WebAssembly Assembly Printer";
+  }
+
+  //===------------------------------------------------------------------===//
+  // MachineFunctionPass Implementation.
+  //===------------------------------------------------------------------===//
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    MRI = &MF.getRegInfo();
+    MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+    return AsmPrinter::runOnMachineFunction(MF);
+  }
+
+  //===------------------------------------------------------------------===//
+  // AsmPrinter Implementation.
+  //===------------------------------------------------------------------===//
+
+  void EmitJumpTableInfo() override;
+  void EmitConstantPool() override;
+  void EmitFunctionBodyStart() override;
+  void EmitInstruction(const MachineInstr *MI) override;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &OS) override;
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &OS) override;
+
+  MVT getRegType(unsigned RegNo) const;
+  const char *toString(MVT VT) const;
+  std::string regToString(const MachineOperand &MO);
+};
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Helpers.
+//===----------------------------------------------------------------------===//
+
+MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
+  const TargetRegisterClass *TRC =
+      TargetRegisterInfo::isVirtualRegister(RegNo) ?
+      MRI->getRegClass(RegNo) :
+      MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+  for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+    if (TRC->hasType(T))
+      return T;
+  DEBUG(errs() << "Unknown type for register number: " << RegNo);
+  llvm_unreachable("Unknown register type");
+  return MVT::Other;
+}
+
+std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
+  unsigned RegNo = MO.getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
+         "Unlowered physical register encountered during assembly printing");
+  assert(!MFI->isVRegStackified(RegNo));
+  unsigned WAReg = MFI->getWAReg(RegNo);
+  assert(WAReg != WebAssemblyFunctionInfo::UnusedReg);
+  return '$' + utostr(WAReg);
+}
+
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+  return WebAssembly::TypeToString(VT);
+}
+
+//===----------------------------------------------------------------------===//
+// WebAssemblyAsmPrinter Implementation.
+//===----------------------------------------------------------------------===//
+
+void WebAssemblyAsmPrinter::EmitConstantPool() {
+  assert(MF->getConstantPool()->getConstants().empty() &&
+         "WebAssembly disables constant pools");
+}
+
+void WebAssemblyAsmPrinter::EmitJumpTableInfo() {
+  // Nothing to do; jump tables are incorporated into the instruction stream.
+}
+
+static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                                 Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+  const DataLayout &DL(F.getParent()->getDataLayout());
+  const WebAssemblyTargetLowering &TLI =
+      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+  SmallVector<EVT, 4> VTs;
+  ComputeValueVTs(TLI, DL, Ty, VTs);
+
+  for (EVT VT : VTs) {
+    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
+    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      ValueVTs.push_back(RegisterVT);
+  }
+}
+
+void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
+  if (!MFI->getParams().empty()) {
+    MCInst Param;
+    Param.setOpcode(WebAssembly::PARAM);
+    for (MVT VT : MFI->getParams())
+      Param.addOperand(MCOperand::createImm(VT.SimpleTy));
+    EmitToStreamer(*OutStreamer, Param);
+  }
+
+  SmallVector<MVT, 4> ResultVTs;
+  const Function &F(*MF->getFunction());
+  ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+  // If the return type needs to be legalized it will get converted into
+  // passing a pointer.
+  if (ResultVTs.size() == 1) {
+    MCInst Result;
+    Result.setOpcode(WebAssembly::RESULT);
+    Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
+    EmitToStreamer(*OutStreamer, Result);
+  }
+
+  bool AnyWARegs = false;
+  MCInst Local;
+  Local.setOpcode(WebAssembly::LOCAL);
+  for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
+    unsigned WAReg = MFI->getWAReg(VReg);
+    // Don't declare unused registers.
+    if (WAReg == WebAssemblyFunctionInfo::UnusedReg)
+      continue;
+    // Don't redeclare parameters.
+    if (WAReg < MFI->getParams().size())
+      continue;
+    // Don't declare stackified registers.
+    if (int(WAReg) < 0)
+      continue;
+    Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+    AnyWARegs = true;
+  }
+  auto &PhysRegs = MFI->getPhysRegs();
+  for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
+    if (PhysRegs[PReg] == -1U)
+      continue;
+    Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+    AnyWARegs = true;
+  }
+  if (AnyWARegs)
+    EmitToStreamer(*OutStreamer, Local);
+
+  AsmPrinter::EmitFunctionBodyStart();
+}
+
+void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
+
+  switch (MI->getOpcode()) {
+  case WebAssembly::ARGUMENT_I32:
+  case WebAssembly::ARGUMENT_I64:
+  case WebAssembly::ARGUMENT_F32:
+  case WebAssembly::ARGUMENT_F64:
+    // These represent values which are live into the function entry, so there's
+    // no instruction to emit.
+    break;
+  case WebAssembly::LOOP_END:
+    // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
+    // fallthrough which nevertheless requires a label for the destination here.
+    break;
+  default: {
+    WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
+    MCInst TmpInst;
+    MCInstLowering.Lower(MI, TmpInst);
+    EmitToStreamer(*OutStreamer, TmpInst);
+    break;
+  }
+  }
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &OS) {
+  if (AsmVariant != 0)
+    report_fatal_error("There are no defined alternate asm variants");
+
+  // First try the generic code, which knows about modifiers like 'c' and 'n'.
+  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, OS))
+    return false;
+
+  if (!ExtraCode) {
+    const MachineOperand &MO = MI->getOperand(OpNo);
+    switch (MO.getType()) {
+    case MachineOperand::MO_Immediate:
+      OS << MO.getImm();
+      return false;
+    case MachineOperand::MO_Register:
+      OS << regToString(MO);
+      return false;
+    case MachineOperand::MO_GlobalAddress:
+      getSymbol(MO.getGlobal())->print(OS, MAI);
+      printOffset(MO.getOffset(), OS);
+      return false;
+    case MachineOperand::MO_ExternalSymbol:
+      GetExternalSymbolSymbol(MO.getSymbolName())->print(OS, MAI);
+      printOffset(MO.getOffset(), OS);
+      return false;
+    case MachineOperand::MO_MachineBasicBlock:
+      MO.getMBB()->getSymbol()->print(OS, MAI);
+      return false;
+    default:
+      break;
+    }
+  }
+
+  return true;
+}
+
+bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                                  unsigned OpNo,
+                                                  unsigned AsmVariant,
+                                                  const char *ExtraCode,
+                                                  raw_ostream &OS) {
+  if (AsmVariant != 0)
+    report_fatal_error("There are no defined alternate asm variants");
+
+  if (!ExtraCode) {
+    // TODO: For now, we just hard-code 0 as the constant offset; teach
+    // SelectInlineAsmMemoryOperand how to do address mode matching.
+    OS << "0(" + regToString(MI->getOperand(OpNo)) + ')';
+    return false;
+  }
+
+  return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, AsmVariant, ExtraCode, OS);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32);
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
new file mode 100644
index 000000000000..e9671ee07e69
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -0,0 +1,468 @@
+//===-- WebAssemblyCFGStackify.cpp - CFG Stackification -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a CFG stacking pass.
+///
+/// This pass reorders the blocks in a function to put them into a reverse
+/// post-order [0], with special care to keep the order as similar as possible
+/// to the original order, and to keep loops contiguous even in the case of
+/// split backedges.
+///
+/// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since
+/// scope boundaries serve as the labels for WebAssembly's control transfers.
+///
+/// This is sufficient to convert arbitrary CFGs into a form that works on
+/// WebAssembly, provided that all loops are single-entry.
+///
+/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-cfg-stackify"
+
+namespace {
+class WebAssemblyCFGStackify final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly CFG Stackify";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addPreserved<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyCFGStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCFGStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyCFGStackify() {
+  return new WebAssemblyCFGStackify();
+}
+
+static void EliminateMultipleEntryLoops(MachineFunction &MF,
+                                        const MachineLoopInfo &MLI) {
+  SmallPtrSet<MachineBasicBlock *, 8> InSet;
+  for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF);
+       I != E; ++I) {
+    const std::vector<MachineBasicBlock *> &CurrentSCC = *I;
+
+    // Skip trivial SCCs.
+    if (CurrentSCC.size() == 1)
+      continue;
+
+    InSet.insert(CurrentSCC.begin(), CurrentSCC.end());
+    MachineBasicBlock *Header = nullptr;
+    for (MachineBasicBlock *MBB : CurrentSCC) {
+      for (MachineBasicBlock *Pred : MBB->predecessors()) {
+        if (InSet.count(Pred))
+          continue;
+        if (!Header) {
+          Header = MBB;
+          break;
+        }
+        // TODO: Implement multiple-entry loops.
+        report_fatal_error("multiple-entry loops are not supported yet");
+      }
+    }
+    assert(MLI.isLoopHeader(Header));
+
+    InSet.clear();
+  }
+}
+
+namespace {
+/// Post-order traversal stack entry.
+struct POStackEntry {
+  MachineBasicBlock *MBB;
+  SmallVector<MachineBasicBlock *, 0> Succs;
+
+  POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+               const MachineLoopInfo &MLI);
+};
+} // end anonymous namespace
+
+static bool LoopContains(const MachineLoop *Loop,
+                         const MachineBasicBlock *MBB) {
+  return Loop ? Loop->contains(MBB) : true;
+}
+
+POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
+                           const MachineLoopInfo &MLI)
+    : MBB(MBB), Succs(MBB->successors()) {
+  // RPO is not a unique form, since at every basic block with multiple
+  // successors, the DFS has to pick which order to visit the successors in.
+  // Sort them strategically (see below).
+  MachineLoop *Loop = MLI.getLoopFor(MBB);
+  MachineFunction::iterator Next = next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next;
+  std::stable_sort(
+      Succs.begin(), Succs.end(),
+      [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) {
+        if (A == B)
+          return false;
+
+        // Keep loops contiguous by preferring the block that's in the same
+        // loop.
+        bool LoopContainsA = LoopContains(Loop, A);
+        bool LoopContainsB = LoopContains(Loop, B);
+        if (LoopContainsA && !LoopContainsB)
+          return true;
+        if (!LoopContainsA && LoopContainsB)
+          return false;
+
+        // Minimize perturbation by preferring the block which is the immediate
+        // layout successor.
+        if (A == LayoutSucc)
+          return true;
+        if (B == LayoutSucc)
+          return false;
+
+        // TODO: More sophisticated orderings may be profitable here.
+
+        return false;
+      });
+}
+
+/// Return the "bottom" block of a loop. This differs from
+/// MachineLoop::getBottomBlock in that it works even if the loop is
+/// discontiguous.
+static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) {
+  MachineBasicBlock *Bottom = Loop->getHeader();
+  for (MachineBasicBlock *MBB : Loop->blocks())
+    if (MBB->getNumber() > Bottom->getNumber())
+      Bottom = MBB;
+  return Bottom;
+}
+
+/// Sort the blocks in RPO, taking special care to make sure that loops are
+/// contiguous even in the case of split backedges.
+///
+/// TODO: Determine whether RPO is actually worthwhile, or whether we should
+/// move to just a stable-topological-sort-based approach that would preserve
+/// more of the original order.
+static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
+  // Note that we do our own RPO rather than using
+  // "llvm/ADT/PostOrderIterator.h" because we want control over the order that
+  // successors are visited in (see above). Also, we can sort the blocks in the
+  // MachineFunction as we go.
+  SmallPtrSet<MachineBasicBlock *, 16> Visited;
+  SmallVector<POStackEntry, 16> Stack;
+
+  MachineBasicBlock *EntryBlock = &*MF.begin();
+  Visited.insert(EntryBlock);
+  Stack.push_back(POStackEntry(EntryBlock, MF, MLI));
+
+  for (;;) {
+    POStackEntry &Entry = Stack.back();
+    SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs;
+    if (!Succs.empty()) {
+      MachineBasicBlock *Succ = Succs.pop_back_val();
+      if (Visited.insert(Succ).second)
+        Stack.push_back(POStackEntry(Succ, MF, MLI));
+      continue;
+    }
+
+    // Put the block in its position in the MachineFunction.
+    MachineBasicBlock &MBB = *Entry.MBB;
+    MBB.moveBefore(&*MF.begin());
+
+    // Branch instructions may utilize a fallthrough, so update them if a
+    // fallthrough has been added or removed.
+    if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() &&
+        !MBB.back().isBarrier())
+      report_fatal_error(
+          "Non-branch terminator with fallthrough cannot yet be rewritten");
+    if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch())
+      MBB.updateTerminator();
+
+    Stack.pop_back();
+    if (Stack.empty())
+      break;
+  }
+
+  // Now that we've sorted the blocks in RPO, renumber them.
+  MF.RenumberBlocks();
+
+#ifndef NDEBUG
+  SmallSetVector<MachineLoop *, 8> OnStack;
+
+  // Insert a sentinel representing the degenerate loop that starts at the
+  // function entry block and includes the entire function as a "loop" that
+  // executes once.
+  OnStack.insert(nullptr);
+
+  for (auto &MBB : MF) {
+    assert(MBB.getNumber() >= 0 && "Renumbered blocks should be non-negative.");
+
+    MachineLoop *Loop = MLI.getLoopFor(&MBB);
+    if (Loop && &MBB == Loop->getHeader()) {
+      // Loop header. The loop predecessor should be sorted above, and the other
+      // predecessors should be backedges below.
+      for (auto Pred : MBB.predecessors())
+        assert(
+            (Pred->getNumber() < MBB.getNumber() || Loop->contains(Pred)) &&
+            "Loop header predecessors must be loop predecessors or backedges");
+      assert(OnStack.insert(Loop) && "Loops should be declared at most once.");
+    } else {
+      // Not a loop header. All predecessors should be sorted above.
+      for (auto Pred : MBB.predecessors())
+        assert(Pred->getNumber() < MBB.getNumber() &&
+               "Non-loop-header predecessors should be topologically sorted");
+      assert(OnStack.count(MLI.getLoopFor(&MBB)) &&
+             "Blocks must be nested in their loops");
+    }
+    while (OnStack.size() > 1 && &MBB == LoopBottom(OnStack.back()))
+      OnStack.pop_back();
+  }
+  assert(OnStack.pop_back_val() == nullptr &&
+         "The function entry block shouldn't actually be a loop header");
+  assert(OnStack.empty() &&
+         "Control flow stack pushes and pops should be balanced.");
+#endif
+}
+
+/// Test whether Pred has any terminators explicitly branching to MBB, as
+/// opposed to falling through. Note that it's possible (eg. in unoptimized
+/// code) for a branch instruction to both branch to a block and fallthrough
+/// to it, so we check the actual branch operands to see if there are any
+/// explicit mentions.
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+  for (MachineInstr &MI : Pred->terminators())
+    for (MachineOperand &MO : MI.explicit_operands())
+      if (MO.isMBB() && MO.getMBB() == MBB)
+        return true;
+  return false;
+}
+
+/// Insert a BLOCK marker for branches to MBB (if needed).
+static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+                             SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+                             const WebAssemblyInstrInfo &TII,
+                             const MachineLoopInfo &MLI,
+                             MachineDominatorTree &MDT) {
+  // First compute the nearest common dominator of all forward non-fallthrough
+  // predecessors so that we minimize the time that the BLOCK is on the stack,
+  // which reduces overall stack height.
+  MachineBasicBlock *Header = nullptr;
+  bool IsBranchedTo = false;
+  int MBBNumber = MBB.getNumber();
+  for (MachineBasicBlock *Pred : MBB.predecessors())
+    if (Pred->getNumber() < MBBNumber) {
+      Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
+      if (ExplicitlyBranchesTo(Pred, &MBB))
+        IsBranchedTo = true;
+    }
+  if (!Header)
+    return;
+  if (!IsBranchedTo)
+    return;
+
+  assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
+  MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
+
+  // If the nearest common dominator is inside a more deeply nested context,
+  // walk out to the nearest scope which isn't more deeply nested.
+  for (MachineFunction::iterator I(LayoutPred), E(Header); I != E; --I) {
+    if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
+      if (ScopeTop->getNumber() > Header->getNumber()) {
+        // Skip over an intervening scope.
+        I = next(MachineFunction::iterator(ScopeTop));
+      } else {
+        // We found a scope level at an appropriate depth.
+        Header = ScopeTop;
+        break;
+      }
+    }
+  }
+
+  // If there's a loop which ends just before MBB which contains Header, we can
+  // reuse its label instead of inserting a new BLOCK.
+  for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
+       Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
+    if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
+      return;
+
+  // Decide where in Header to put the BLOCK.
+  MachineBasicBlock::iterator InsertPos;
+  MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
+  if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) {
+    // Header is the header of a loop that does not lexically contain MBB, so
+    // the BLOCK needs to be above the LOOP.
+    InsertPos = Header->begin();
+  } else {
+    // Otherwise, insert the BLOCK as late in Header as we can, but before the
+    // beginning of the local expression tree and any nested BLOCKs.
+    InsertPos = Header->getFirstTerminator();
+    while (InsertPos != Header->begin() &&
+           prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
+           prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+      --InsertPos;
+  }
+
+  // Add the BLOCK.
+  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
+      .addMBB(&MBB);
+
+  // Track the farthest-spanning scope that ends at this point.
+  int Number = MBB.getNumber();
+  if (!ScopeTops[Number] ||
+      ScopeTops[Number]->getNumber() > Header->getNumber())
+    ScopeTops[Number] = Header;
+}
+
+/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
+static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
+                            SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+                            const WebAssemblyInstrInfo &TII,
+                            const MachineLoopInfo &MLI) {
+  MachineLoop *Loop = MLI.getLoopFor(&MBB);
+  if (!Loop || Loop->getHeader() != &MBB)
+    return;
+
+  // The operand of a LOOP is the first block after the loop. If the loop is the
+  // bottom of the function, insert a dummy block at the end.
+  MachineBasicBlock *Bottom = LoopBottom(Loop);
+  auto Iter = next(MachineFunction::iterator(Bottom));
+  if (Iter == MF.end()) {
+    MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
+    // Give it a fake predecessor so that AsmPrinter prints its label.
+    Label->addSuccessor(Label);
+    MF.push_back(Label);
+    Iter = next(MachineFunction::iterator(Bottom));
+  }
+  MachineBasicBlock *AfterLoop = &*Iter;
+  BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
+      .addMBB(AfterLoop);
+
+  // Emit a special no-op telling the asm printer that we need a label to close
+  // the loop scope, even though the destination is only reachable by
+  // fallthrough.
+  if (!Bottom->back().isBarrier())
+    BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+
+  assert((!ScopeTops[AfterLoop->getNumber()] ||
+          ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
+         "With RPO we should visit the outer-most loop for a block first.");
+  if (!ScopeTops[AfterLoop->getNumber()])
+    ScopeTops[AfterLoop->getNumber()] = &MBB;
+}
+
+/// Insert LOOP and BLOCK markers at appropriate places.
+static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
+                         const WebAssemblyInstrInfo &TII,
+                         MachineDominatorTree &MDT) {
+  // For each block whose label represents the end of a scope, record the block
+  // which holds the beginning of the scope. This will allow us to quickly skip
+  // over scoped regions when walking blocks. We allocate one more than the
+  // number of blocks in the function to accommodate for the possible fake block
+  // we may insert at the end.
+  SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
+
+  for (auto &MBB : MF) {
+    // Place the LOOP for MBB if MBB is the header of a loop.
+    PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+
+    // Place the BLOCK for MBB if MBB is branched to from above.
+    PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
+  }
+}
+
+#ifndef NDEBUG
+static bool
+IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
+          const MachineBasicBlock *MBB) {
+  for (const auto &Pair : Stack)
+    if (Pair.first == MBB)
+      return true;
+  return false;
+}
+#endif
+
+bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** CFG Stackifying **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  const auto &MLI = getAnalysis<MachineLoopInfo>();
+  auto &MDT = getAnalysis<MachineDominatorTree>();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  // RPO sorting needs all loops to be single-entry.
+  EliminateMultipleEntryLoops(MF, MLI);
+
+  // Sort the blocks in RPO, with contiguous loops.
+  SortBlocks(MF, MLI);
+
+  // Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
+  PlaceMarkers(MF, MLI, TII, MDT);
+
+#ifndef NDEBUG
+  // Verify that block and loop beginnings and endings are in LIFO order, and
+  // that all references to blocks are to blocks on the stack at the point of
+  // the reference.
+  SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
+  for (auto &MBB : MF) {
+    while (!Stack.empty() && Stack.back().first == &MBB)
+      if (Stack.back().second) {
+        assert(Stack.size() >= 2);
+        Stack.pop_back();
+        Stack.pop_back();
+      } else {
+        assert(Stack.size() >= 1);
+        Stack.pop_back();
+      }
+    for (auto &MI : MBB)
+      switch (MI.getOpcode()) {
+      case WebAssembly::LOOP:
+        Stack.push_back(std::make_pair(&MBB, false));
+        Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
+        break;
+      case WebAssembly::BLOCK:
+        Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
+        break;
+      default:
+        // Verify that all referenced blocks are in scope. A reference to a
+        // block with a negative number is invalid, but can happen with inline
+        // asm, so we shouldn't assert on it, but instead let CodeGen properly
+        // fail on it.
+        for (const MachineOperand &MO : MI.explicit_operands())
+          if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
+            assert(IsOnStack(Stack, MO.getMBB()));
+        break;
+      }
+  }
+  assert(Stack.empty());
+#endif
+
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
new file mode 100644
index 000000000000..1b761b1a9d73
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -0,0 +1,81 @@
+//===-- WebAssemblyFastISel.cpp - WebAssembly FastISel implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific support for the FastISel
+/// class. Some of the target-specific code is generated by tablegen in the file
+/// WebAssemblyGenFastISel.inc, which is #included here.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyTargetMachine.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-fastisel"
+
+namespace {
+
+class WebAssemblyFastISel final : public FastISel {
+  /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
+  /// right decision when generating code for different targets.
+  const WebAssemblySubtarget *Subtarget;
+  LLVMContext *Context;
+
+  // Call handling routines.
+private:
+public:
+  // Backend specific FastISel code.
+  WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo,
+                      const TargetLibraryInfo *LibInfo)
+      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
+    Subtarget = &FuncInfo.MF->getSubtarget<WebAssemblySubtarget>();
+    Context = &FuncInfo.Fn->getContext();
+  }
+
+  bool fastSelectInstruction(const Instruction *I) override;
+
+#include "WebAssemblyGenFastISel.inc"
+};
+
+} // end anonymous namespace
+
+bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) {
+  switch (I->getOpcode()) {
+  default:
+    break;
+    // TODO: add fast-isel selection cases here...
+  }
+
+  // Fall back to target-independent instruction selection.
+  return selectOperator(I, I->getOpcode());
+}
+
+FastISel *WebAssembly::createFastISel(FunctionLoweringInfo &FuncInfo,
+                                      const TargetLibraryInfo *LibInfo) {
+  return new WebAssemblyFastISel(FuncInfo, LibInfo);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index e4ca82e963c2..0eefd57f1f2c 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -35,11 +35,20 @@ using namespace llvm;
 #define DEBUG_TYPE "wasm-frame-info"
 
 // TODO: Implement a red zone?
+// TODO: wasm64
+// TODO: Prolog/epilog should be stackified too. This pass runs after register
+//       stackification, so we'll have to do it manually.
+// TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
 
 /// Return true if the specified function should have a dedicated frame pointer
 /// register.
 bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
-  llvm_unreachable("TODO: implement hasFP");
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const auto *RegInfo =
+      MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+         MFI->hasStackMap() || MFI->hasPatchPoint() ||
+         RegInfo->needsStackRealignment(MF);
 }
 
 /// Under normal circumstances, when a frame pointer is not required, we reserve
@@ -52,23 +61,115 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame(
   return !MF.getFrameInfo()->hasVarSizedObjects();
 }
 
+
+/// Adjust the stack pointer by a constant amount.
+static void adjustStackPointer(unsigned StackSize,
+                               bool AdjustUp,
+                               MachineFunction& MF,
+                               MachineBasicBlock& MBB,
+                               const TargetInstrInfo* TII,
+                               MachineBasicBlock::iterator InsertPt,
+                               const DebugLoc& DL) {
+  auto &MRI = MF.getRegInfo();
+  unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg)
+      .addExternalSymbol(SPSymbol);
+  // This MachinePointerInfo should reference __stack_pointer as well but
+  // doesn't because MachinePointerInfo() takes a GV which we don't have for
+  // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry
+  // is appropriate instead. (likewise for EmitEpologue below)
+  auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(),
+                                        MachineMemOperand::MOLoad, 4, 4);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+      .addImm(0)
+      .addReg(SPReg)
+      .addMemOperand(LoadMMO);
+  // Add/Subtract the frame size
+  unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+      .addImm(StackSize);
+  BuildMI(MBB, InsertPt, DL,
+          TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32),
+          WebAssembly::SP32)
+      .addReg(SPReg)
+      .addReg(OffsetReg);
+  // The SP32 register now has the new stacktop. Also write it back to memory.
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+      .addExternalSymbol(SPSymbol);
+  auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+                                    MachineMemOperand::MOStore, 4, 4);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+      .addImm(0)
+      .addReg(OffsetReg)
+      .addReg(WebAssembly::SP32)
+      .addMemOperand(MMO);
+}
+
 void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator I) const {
-  llvm_unreachable("TODO: implement eliminateCallFramePseudoInstr");
+  const auto *TII =
+      static_cast<const WebAssemblyInstrInfo*>(MF.getSubtarget().getInstrInfo());
+  DebugLoc DL = I->getDebugLoc();
+  unsigned Opc = I->getOpcode();
+  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
+  unsigned Amount = I->getOperand(0).getImm();
+  if (Amount)
+    adjustStackPointer(Amount, IsDestroy, MF, MBB,
+                       TII, I, DL);
+  MBB.erase(I);
 }
 
 void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
-  llvm_unreachable("TODO: implement emitPrologue");
+  // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions
+  auto *MFI = MF.getFrameInfo();
+  assert(MFI->getCalleeSavedInfo().empty() &&
+         "WebAssembly should not have callee-saved registers");
+  assert(!hasFP(MF) && "Functions needing frame pointers not yet supported");
+  uint64_t StackSize = MFI->getStackSize();
+  if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0))
+    return;
+
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+
+  auto InsertPt = MBB.begin();
+  DebugLoc DL;
+
+  adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL);
 }
 
 void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
-  llvm_unreachable("TODO: implement emitEpilogue");
-}
+  uint64_t StackSize = MF.getFrameInfo()->getStackSize();
+  if (!StackSize)
+    return;
+  const auto *TII = MF.getSubtarget().getInstrInfo();
+  auto &MRI = MF.getRegInfo();
+  unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+  auto InsertPt = MBB.getFirstTerminator();
+  DebugLoc DL;
 
-void WebAssemblyFrameLowering::processFunctionBeforeCalleeSavedScan(
-    MachineFunction &MF, RegScavenger *RS) const {
-  llvm_unreachable("TODO: implement processFunctionBeforeCalleeSavedScan");
+  if (InsertPt != MBB.end()) {
+    DL = InsertPt->getDebugLoc();
+  }
+
+  // Restore the stack pointer. Without FP its value is just SP32 - stacksize
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+      .addImm(StackSize);
+  auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer");
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32)
+      .addReg(WebAssembly::SP32)
+      .addReg(OffsetReg);
+  // Re-use OffsetReg to hold the address of the stacktop
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg)
+      .addExternalSymbol(SPSymbol);
+  auto *MMO = new MachineMemOperand(MachinePointerInfo(),
+                                    MachineMemOperand::MOStore, 4, 4);
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32)
+      .addImm(0)
+      .addReg(OffsetReg)
+      .addReg(WebAssembly::SP32)
+      .addMemOperand(MMO);
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index 0b112d02c0bf..5f4708fe77ed 100644
--- a/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -38,9 +38,6 @@ public:
 
   bool hasFP(const MachineFunction &MF) const override;
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
-
-  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-                                            RegScavenger *RS) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyISD.def b/lib/Target/WebAssembly/WebAssemblyISD.def
new file mode 100644
index 000000000000..3a03fa55b220
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyISD.def
@@ -0,0 +1,25 @@
+//- WebAssemblyISD.def - WebAssembly ISD ---------------------------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file describes the various WebAssembly ISD node types.
+///
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+HANDLE_NODETYPE(CALL1)
+HANDLE_NODETYPE(CALL0)
+HANDLE_NODETYPE(RETURN)
+HANDLE_NODETYPE(ARGUMENT)
+HANDLE_NODETYPE(Wrapper)
+HANDLE_NODETYPE(BR_IF)
+HANDLE_NODETYPE(TABLESWITCH)
+
+// add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
diff --git a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 518ef332a6c7..8390f797c43e 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -56,13 +56,68 @@ public:
 
   SDNode *Select(SDNode *Node) override;
 
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+                                    std::vector<SDValue> &OutOps) override;
+
+// Include the pieces autogenerated from the target description.
+#include "WebAssemblyGenDAGISel.inc"
+
 private:
   // add select functions here...
 };
 } // end anonymous namespace
 
 SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
-  llvm_unreachable("TODO: implement Select");
+  // Dump information about the Node being selected.
+  DEBUG(errs() << "Selecting: ");
+  DEBUG(Node->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    Node->setNodeId(-1);
+    return nullptr;
+  }
+
+  // Few custom selection stuff.
+  SDNode *ResNode = nullptr;
+  EVT VT = Node->getValueType(0);
+
+  switch (Node->getOpcode()) {
+  default:
+    break;
+    // If we need WebAssembly-specific selection, it would go here.
+    (void)VT;
+  }
+
+  // Select the default instruction.
+  ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == nullptr || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  return ResNode;
+}
+
+bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand(
+    const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+  switch (ConstraintID) {
+  case InlineAsm::Constraint_i:
+  case InlineAsm::Constraint_m:
+    // We just support simple memory operands that just have a single address
+    // operand and need no special handling.
+    OutOps.push_back(Op);
+    return false;
+  default:
+    break;
+  }
+
+  return true;
 }
 
 /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4184eb6dc5a6..7a89f788c1ad 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -17,10 +17,13 @@
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
 #include "WebAssemblyTargetMachine.h"
-#include "WebAssemblyTargetObjectFile.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/Support/CommandLine.h"
@@ -32,14 +35,254 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-lower"
 
+namespace {
+// Diagnostic information for unimplemented or unsupported feature reporting.
+// TODO: This code is copied from BPF and AMDGPU; consider factoring it out
+// and sharing code.
+class DiagnosticInfoUnsupported final : public DiagnosticInfo {
+private:
+  // Debug location where this diagnostic is triggered.
+  DebugLoc DLoc;
+  const Twine &Description;
+  const Function &Fn;
+  SDValue Value;
+
+  static int KindID;
+
+  static int getKindID() {
+    if (KindID == 0)
+      KindID = llvm::getNextAvailablePluginDiagnosticKind();
+    return KindID;
+  }
+
+public:
+  DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc,
+                            SDValue Value)
+      : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()),
+        Description(Desc), Fn(Fn), Value(Value) {}
+
+  void print(DiagnosticPrinter &DP) const override {
+    std::string Str;
+    raw_string_ostream OS(Str);
+
+    if (DLoc) {
+      auto DIL = DLoc.get();
+      StringRef Filename = DIL->getFilename();
+      unsigned Line = DIL->getLine();
+      unsigned Column = DIL->getColumn();
+      OS << Filename << ':' << Line << ':' << Column << ' ';
+    }
+
+    OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n'
+       << Description;
+    if (Value)
+      Value->print(OS);
+    OS << '\n';
+    OS.flush();
+    DP << Str;
+  }
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == getKindID();
+  }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+} // end anonymous namespace
+
 WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     const TargetMachine &TM, const WebAssemblySubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
+  auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
+
+  // Booleans always contain 0 or 1.
+  setBooleanContents(ZeroOrOneBooleanContent);
   // WebAssembly does not produce floating-point exceptions on normal floating
   // point operations.
   setHasFloatingPointExceptions(false);
   // We don't know the microarchitecture here, so just reduce register pressure.
   setSchedulingPreference(Sched::RegPressure);
+  // Tell ISel that we have a stack pointer.
+  setStackPointerRegisterToSaveRestore(
+      Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
+  addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
+  addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
+  addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
+  // Compute derived properties from the register classes.
+  computeRegisterProperties(Subtarget->getRegisterInfo());
+
+  setOperationAction(ISD::GlobalAddress, MVTPtr, Custom);
+  setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom);
+  setOperationAction(ISD::JumpTable, MVTPtr, Custom);
+
+  // Take the default expansion for va_arg, va_copy, and va_end. There is no
+  // default action for va_start, so we do that custom.
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAARG, MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+
+  for (auto T : {MVT::f32, MVT::f64}) {
+    // Don't expand the floating-point types to constant pools.
+    setOperationAction(ISD::ConstantFP, T, Legal);
+    // Expand floating-point comparisons.
+    for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
+                    ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
+      setCondCodeAction(CC, T, Expand);
+    // Expand floating-point library function operators.
+    for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW,
+                    ISD::FREM, ISD::FMA})
+      setOperationAction(Op, T, Expand);
+    // Note supported floating-point library function operators that otherwise
+    // default to expand.
+    for (auto Op :
+         {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
+      setOperationAction(Op, T, Legal);
+    // Support minnan and maxnan, which otherwise default to expand.
+    setOperationAction(ISD::FMINNAN, T, Legal);
+    setOperationAction(ISD::FMAXNAN, T, Legal);
+  }
+
+  for (auto T : {MVT::i32, MVT::i64}) {
+    // Expand unavailable integer operations.
+    for (auto Op :
+         {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+          ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS,
+          ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC,
+          ISD::SUBE}) {
+      setOperationAction(Op, T, Expand);
+    }
+  }
+
+  // As a special case, these operators use the type to mean the type to
+  // sign-extend from.
+  for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32})
+    setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand);
+
+  // Dynamic stack allocation: use the default expansion.
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
+
+  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+
+  // Expand these forms; we pattern-match the forms that we can handle in isel.
+  for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+    for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
+      setOperationAction(Op, T, Expand);
+
+  // We have custom switch handling.
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+  // WebAssembly doesn't have:
+  //  - Floating-point extending loads.
+  //  - Floating-point truncating stores.
+  //  - i1 extending loads.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  for (auto T : MVT::integer_valuetypes())
+    for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
+      setLoadExtAction(Ext, T, MVT::i1, Promote);
+
+  // Trap lowers to wasm unreachable
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+}
+
+FastISel *WebAssemblyTargetLowering::createFastISel(
+    FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
+  return WebAssembly::createFastISel(FuncInfo, LibInfo);
+}
+
+bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode * /*GA*/) const {
+  // All offsets can be folded.
+  return true;
+}
+
+MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
+                                                      EVT VT) const {
+  unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
+  if (BitWidth > 1 && BitWidth < 8)
+    BitWidth = 8;
+
+  if (BitWidth > 64) {
+    BitWidth = 64;
+    assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
+           "64-bit shift counts ought to be enough for anyone");
+  }
+
+  MVT Result = MVT::getIntegerVT(BitWidth);
+  assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+         "Unable to represent scalar shift amount type");
+  return Result;
+}
+
+const char *
+WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
+  case WebAssemblyISD::FIRST_NUMBER:
+    break;
+#define HANDLE_NODETYPE(NODE)                                                  \
+  case WebAssemblyISD::NODE:                                                   \
+    return "WebAssemblyISD::" #NODE;
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
+  }
+  return nullptr;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
+    const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
+  // First, see if this is a constraint that directly corresponds to a
+  // WebAssembly register class.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+      if (VT.isInteger() && !VT.isVector()) {
+        if (VT.getSizeInBits() <= 32)
+          return std::make_pair(0U, &WebAssembly::I32RegClass);
+        if (VT.getSizeInBits() <= 64)
+          return std::make_pair(0U, &WebAssembly::I64RegClass);
+      }
+      break;
+    default:
+      break;
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCttz() const {
+  // Assume ctz is a relatively cheap operation.
+  return true;
+}
+
+bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const {
+  // Assume clz is a relatively cheap operation.
+  return true;
+}
+
+bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                                      const AddrMode &AM,
+                                                      Type *Ty,
+                                                      unsigned AS) const {
+  // WebAssembly offsets are added as unsigned without wrapping. The
+  // isLegalAddressingMode gives us no way to determine if wrapping could be
+  // happening, so we approximate this by accepting only non-negative offsets.
+  if (AM.BaseOffs < 0)
+    return false;
+
+  // WebAssembly has no scale register operands.
+  if (AM.Scale != 0)
+    return false;
+
+  // Everything else is legal.
+  return true;
 }
 
 //===----------------------------------------------------------------------===//
@@ -50,16 +293,359 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
 // Lowering Code
 //===----------------------------------------------------------------------===//
 
+static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  DAG.getContext()->diagnose(
+      DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue()));
+}
+
+// Test whether the given calling convention is supported.
+static bool CallingConvSupported(CallingConv::ID CallConv) {
+  // We currently support the language-independent target-independent
+  // conventions. We don't yet have a way to annotate calls with properties like
+  // "cold", and we don't have any call-clobbered registers, so these are mostly
+  // all handled the same.
+  return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
+         CallConv == CallingConv::Cold ||
+         CallConv == CallingConv::PreserveMost ||
+         CallConv == CallingConv::PreserveAll ||
+         CallConv == CallingConv::CXX_FAST_TLS;
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  CallingConv::ID CallConv = CLI.CallConv;
+  if (!CallingConvSupported(CallConv))
+    fail(DL, DAG,
+         "WebAssembly doesn't support language-specific or target-specific "
+         "calling conventions yet");
+  if (CLI.IsPatchPoint)
+    fail(DL, DAG, "WebAssembly doesn't support patch point yet");
+
+  // WebAssembly doesn't currently support explicit tail calls. If they are
+  // required, fail. Otherwise, just disable them.
+  if ((CallConv == CallingConv::Fast && CLI.IsTailCall &&
+       MF.getTarget().Options.GuaranteedTailCallOpt) ||
+      (CLI.CS && CLI.CS->isMustTailCall()))
+    fail(DL, DAG, "WebAssembly doesn't support tail call yet");
+  CLI.IsTailCall = false;
+
+  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+
+  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+  if (Ins.size() > 1)
+    fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet");
+
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  for (const ISD::OutputArg &Out : Outs) {
+    if (Out.Flags.isByVal())
+      fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+    if (Out.Flags.isNest())
+      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+    if (Out.Flags.isInAlloca())
+      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+    if (Out.Flags.isInConsecutiveRegs())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+    if (Out.Flags.isInConsecutiveRegsLast())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+  }
+
+  bool IsVarArg = CLI.IsVarArg;
+  unsigned NumFixedArgs = CLI.NumFixedArgs;
+  auto PtrVT = getPointerTy(MF.getDataLayout());
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
+
+  if (IsVarArg) {
+    // Outgoing non-fixed arguments are placed at the top of the stack. First
+    // compute their offsets and the total amount of argument stack space
+    // needed.
+    for (SDValue Arg :
+         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+      EVT VT = Arg.getValueType();
+      assert(VT != MVT::iPTR && "Legalized args should be concrete");
+      Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+      unsigned Offset =
+          CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty),
+                               MF.getDataLayout().getABITypeAlignment(Ty));
+      CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
+                                        Offset, VT.getSimpleVT(),
+                                        CCValAssign::Full));
+    }
+  }
+
+  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
+
+  SDValue NB;
+  if (NumBytes) {
+    NB = DAG.getConstant(NumBytes, DL, PtrVT, true);
+    Chain = DAG.getCALLSEQ_START(Chain, NB, DL);
+  }
+
+  if (IsVarArg) {
+    // For non-fixed arguments, next emit stores to store the argument values
+    // to the stack at the offsets computed above.
+    SDValue SP = DAG.getCopyFromReg(
+        Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT);
+    unsigned ValNo = 0;
+    SmallVector<SDValue, 8> Chains;
+    for (SDValue Arg :
+         make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+      assert(ArgLocs[ValNo].getValNo() == ValNo &&
+             "ArgLocs should remain in order and only hold varargs args");
+      unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
+      SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP,
+                                DAG.getConstant(Offset, DL, PtrVT));
+      Chains.push_back(DAG.getStore(Chain, DL, Arg, Add,
+                                    MachinePointerInfo::getStack(MF, Offset),
+                                    false, false, 0));
+    }
+    if (!Chains.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+  }
+
+  // Compute the operands for the CALLn node.
+  SmallVector<SDValue, 16> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
+  // isn't reliable.
+  Ops.append(OutVals.begin(),
+             IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
+
+  SmallVector<EVT, 8> Tys;
+  for (const auto &In : Ins) {
+    assert(!In.Flags.isByVal() && "byval is not valid for return values");
+    assert(!In.Flags.isNest() && "nest is not valid for return values");
+    if (In.Flags.isInAlloca())
+      fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
+    if (In.Flags.isInConsecutiveRegs())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
+    if (In.Flags.isInConsecutiveRegsLast())
+      fail(DL, DAG,
+           "WebAssembly hasn't implemented cons regs last return values");
+    // Ignore In.getOrigAlign() because all our arguments are passed in
+    // registers.
+    Tys.push_back(In.VT);
+  }
+  Tys.push_back(MVT::Other);
+  SDVTList TyList = DAG.getVTList(Tys);
+  SDValue Res =
+      DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1,
+                  DL, TyList, Ops);
+  if (Ins.empty()) {
+    Chain = Res;
+  } else {
+    InVals.push_back(Res);
+    Chain = Res.getValue(1);
+  }
+
+  if (NumBytes) {
+    SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT);
+    Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL);
+  }
+
+  return Chain;
+}
+
+bool WebAssemblyTargetLowering::CanLowerReturn(
+    CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    LLVMContext & /*Context*/) const {
+  // WebAssembly can't currently handle returning tuples.
+  return Outs.size() <= 1;
+}
+
+SDValue WebAssemblyTargetLowering::LowerReturn(
+    SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
+    SelectionDAG &DAG) const {
+  assert(Outs.size() <= 1 && "WebAssembly can only return up to one value");
+  if (!CallingConvSupported(CallConv))
+    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+  RetOps.append(OutVals.begin(), OutVals.end());
+  Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
+
+  // Record the number and types of the return values.
+  for (const ISD::OutputArg &Out : Outs) {
+    assert(!Out.Flags.isByVal() && "byval is not valid for return values");
+    assert(!Out.Flags.isNest() && "nest is not valid for return values");
+    assert(Out.IsFixed && "non-fixed return value is not valid");
+    if (Out.Flags.isInAlloca())
+      fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
+    if (Out.Flags.isInConsecutiveRegs())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
+    if (Out.Flags.isInConsecutiveRegsLast())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
+  }
+
+  return Chain;
+}
+
+SDValue WebAssemblyTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  if (!CallingConvSupported(CallConv))
+    fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
+
+  // Set up the incoming ARGUMENTS value, which serves to represent the liveness
+  // of the incoming values before they're represented by virtual registers.
+  MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
+
+  for (const ISD::InputArg &In : Ins) {
+    if (In.Flags.isByVal())
+      fail(DL, DAG, "WebAssembly hasn't implemented byval arguments");
+    if (In.Flags.isInAlloca())
+      fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
+    if (In.Flags.isNest())
+      fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
+    if (In.Flags.isInConsecutiveRegs())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
+    if (In.Flags.isInConsecutiveRegsLast())
+      fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
+    // Ignore In.getOrigAlign() because all our arguments are passed in
+    // registers.
+    InVals.push_back(
+        In.Used
+            ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
+                          DAG.getTargetConstant(InVals.size(), DL, MVT::i32))
+            : DAG.getUNDEF(In.VT));
+
+    // Record the number and types of arguments.
+    MF.getInfo<WebAssemblyFunctionInfo>()->addParam(In.VT);
+  }
+
+  // Incoming varargs arguments are on the stack and will be accessed through
+  // va_arg, so we don't need to do anything for them here.
+
+  return Chain;
+}
+
 //===----------------------------------------------------------------------===//
-//  Other Lowering Code
+//  Custom lowering hooks.
 //===----------------------------------------------------------------------===//
 
+SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("unimplemented operation lowering");
+    return SDValue();
+  case ISD::FrameIndex:
+    return LowerFrameIndex(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::ExternalSymbol:
+    return LowerExternalSymbol(Op, DAG);
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG);
+  case ISD::BR_JT:
+    return LowerBR_JT(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  }
+}
+
+SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  int FI = cast<FrameIndexSDNode>(Op)->getIndex();
+  return DAG.getTargetFrameIndex(FI, Op.getValueType());
+}
+
+SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                      SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  const auto *GA = cast<GlobalAddressSDNode>(Op);
+  EVT VT = Op.getValueType();
+  assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+  if (GA->getAddressSpace() != 0)
+    fail(DL, DAG, "WebAssembly only expects the 0 address space");
+  return DAG.getNode(
+      WebAssemblyISD::Wrapper, DL, VT,
+      DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset()));
+}
+
+SDValue
+WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  const auto *ES = cast<ExternalSymbolSDNode>(Op);
+  EVT VT = Op.getValueType();
+  assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+  return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
+                     DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+}
+
+SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  // There's no need for a Wrapper node because we always incorporate a jump
+  // table operand into a TABLESWITCH instruction, rather than ever
+  // materializing it in a register.
+  const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
+                                JT->getTargetFlags());
+}
+
+SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  SDValue Chain = Op.getOperand(0);
+  const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
+  SDValue Index = Op.getOperand(2);
+  assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Index);
+
+  MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
+  const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
+
+  // TODO: For now, we just pick something arbitrary for a default case for now.
+  // We really want to sniff out the guard and put in the real default case (and
+  // delete the guard).
+  Ops.push_back(DAG.getBasicBlock(MBBs[0]));
+
+  // Add an operand for each case.
+  for (auto MBB : MBBs)
+    Ops.push_back(DAG.getBasicBlock(MBB));
+
+  return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops);
+}
+
+SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
+
+  // The incoming non-fixed arguments are placed on the top of the stack, with
+  // natural alignment, at the point of the call, so the base pointer is just
+  // the current frame pointer.
+  DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true);
+  unsigned FP =
+      Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
+}
+
 //===----------------------------------------------------------------------===//
 //                          WebAssembly Optimization Hooks
 //===----------------------------------------------------------------------===//
-
-MCSection *WebAssemblyTargetObjectFile::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
-  return getDataSection();
-}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index efd60a7bacd6..e7232a042e12 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -22,10 +22,11 @@ namespace llvm {
 
 namespace WebAssemblyISD {
 
-enum {
+enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-  // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here...
+#define HANDLE_NODETYPE(NODE) NODE,
+#include "WebAssemblyISD.def"
+#undef HANDLE_NODETYPE
 };
 
 } // end namespace WebAssemblyISD
@@ -42,8 +43,51 @@ private:
   /// Keep a pointer to the WebAssemblySubtarget around so that we can make the
   /// right decision when generating code for different targets.
   const WebAssemblySubtarget *Subtarget;
+
+  FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+                           const TargetLibraryInfo *LibInfo) const override;
+  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
+  const char *getTargetNodeName(unsigned Opcode) const override;
+  std::pair<unsigned, const TargetRegisterClass *>
+  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                               StringRef Constraint, MVT VT) const override;
+  bool isCheapToSpeculateCttz() const override;
+  bool isCheapToSpeculateCtlz() const override;
+  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+                             unsigned AS) const override;
+
+  SDValue LowerCall(CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const override;
+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                      bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      LLVMContext &Context) const override;
+  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
+                      SelectionDAG &DAG) const override;
+  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
+                               bool IsVarArg,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               SDLoc DL, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const override;
+
+  // Custom lowering hooks.
+  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 };
 
+namespace WebAssembly {
+FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+                         const TargetLibraryInfo *libInfo);
+} // end namespace WebAssembly
+
 } // end namespace llvm
 
 #endif
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 6b5b6cd54173..cfa1519e6d99 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -12,10 +12,63 @@
 ///
 //===----------------------------------------------------------------------===//
 
-/*
- * TODO(jfb): Add the following.
- *
- * call_direct: call function directly
- * call_indirect: call function indirectly
- * addressof: obtain a function pointer value for a given function
- */
+// TODO: addr64: These currently assume the callee address is 32-bit.
+
+let Defs = [ARGUMENTS] in {
+
+// Call sequence markers. These have an immediate which represents the amount of
+// stack space to allocate or free, which is used for varargs lowering.
+let Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 in {
+def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt),
+                         [(WebAssemblycallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2),
+                       [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
+} // isCodeGenOnly = 1
+
+multiclass CALL<WebAssemblyRegClass vt, string prefix> {
+  def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops),
+                   [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
+                   !strconcat(prefix, "call\t$dst, $callee")>;
+  def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+                            [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+                            !strconcat(prefix, "call_indirect\t$dst, $callee")>;
+}
+let Uses = [SP32, SP64], isCall = 1 in {
+  defm : CALL<I32, "i32.">;
+  defm : CALL<I64, "i64.">;
+  defm : CALL<F32, "f32.">;
+  defm : CALL<F64, "f64.">;
+
+  def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops),
+                    [(WebAssemblycall0 (i32 imm:$callee))],
+                    "call    \t$callee">;
+  def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+                             [(WebAssemblycall0 I32:$callee)],
+                             "call_indirect\t$callee">;
+} // Uses = [SP32,SP64], isCall = 1
+
+} // Defs = [ARGUMENTS]
+
+// Patterns for matching a direct call to a global address.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_I32 tglobaladdr:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_I64 tglobaladdr:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_F32 tglobaladdr:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_F64 tglobaladdr:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
+          (CALL_VOID tglobaladdr:$callee)>;
+
+// Patterns for matching a direct call to an external symbol.
+def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_I32 texternalsym:$callee)>;
+def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_I64 texternalsym:$callee)>;
+def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_F32 texternalsym:$callee)>;
+def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_F64 texternalsym:$callee)>;
+def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
+          (CALL_VOID texternalsym:$callee)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
new file mode 100644
index 000000000000..05efe8903413
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -0,0 +1,82 @@
+//===- WebAssemblyInstrControl.td-WebAssembly control-flow ------*- tablegen -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief WebAssembly control-flow code-gen constructs.
+///
+//===----------------------------------------------------------------------===//
+
+let Defs = [ARGUMENTS] in {
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+// The condition operand is a boolean value which WebAssembly represents as i32.
+def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst),
+              [(brcond I32:$cond, bb:$dst)],
+               "br_if   \t$cond, $dst">;
+let isCodeGenOnly = 1 in
+def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [],
+                   "br_unless\t$cond, $dst">;
+let isBarrier = 1 in {
+def BR   : I<(outs), (ins bb_op:$dst),
+             [(br bb:$dst)],
+             "br      \t$dst">;
+} // isBarrier = 1
+} // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
+          (BR_IF I32:$cond, bb_op:$dst)>;
+def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
+          (BR_UNLESS I32:$cond, bb_op:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// TODO: SelectionDAG's lowering insists on using a pointer as the index for
+// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
+// currently.
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
+                        [(WebAssemblytableswitch I32:$index, bb:$default)],
+                        "tableswitch\t$index, $default">;
+def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
+                        [(WebAssemblytableswitch I64:$index, bb:$default)],
+                        "tableswitch\t$index, $default">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+// Placemarkers to indicate the start of a block or loop scope. These
+// use/clobber EXPR_STACK to prevent them from being moved into the middle of
+// an expression tree.
+let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
+def BLOCK     : I<(outs), (ins bb_op:$dst), [], "block   \t$dst">;
+def LOOP      : I<(outs), (ins bb_op:$dst), [], "loop    \t$dst">;
+} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
+
+// No-op to indicate to the AsmPrinter that a loop ends here, so a
+// basic block label is needed even if it wouldn't otherwise appear so.
+let isTerminator = 1, hasCtrlDep = 1 in
+def LOOP_END : I<(outs), (ins), []>;
+
+multiclass RETURN<WebAssemblyRegClass vt> {
+  def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
+                     "return  \t$val">;
+}
+
+let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+let isReturn = 1 in {
+  defm : RETURN<I32>;
+  defm : RETURN<I64>;
+  defm : RETURN<F32>;
+  defm : RETURN<F64>;
+  def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">;
+} // isReturn = 1
+  def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">;
+} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
+
+} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 3fa29061b1de..931f4a913d0f 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -13,32 +13,99 @@
 ///
 //===----------------------------------------------------------------------===//
 
-/*
- * TODO(jfb): Add the following.
- *
- * int32.wrap[int64]: wrap a 64-bit integer to a 32-bit integer
- * int32.trunc_signed[float32]: truncate a 32-bit float to a signed 32-bit integer
- * int32.trunc_signed[float64]: truncate a 64-bit float to a signed 32-bit integer
- * int32.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 32-bit integer
- * int32.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 32-bit integer
- * int32.reinterpret[float32]: reinterpret the bits of a 32-bit float as a 32-bit integer
- * int64.extend_signed[int32]: extend a signed 32-bit integer to a 64-bit integer
- * int64.extend_unsigned[int32]: extend an unsigned 32-bit integer to a 64-bit integer
- * int64.trunc_signed[float32]: truncate a 32-bit float to a signed 64-bit integer
- * int64.trunc_signed[float64]: truncate a 64-bit float to a signed 64-bit integer
- * int64.trunc_unsigned[float32]: truncate a 32-bit float to an unsigned 64-bit integer
- * int64.trunc_unsigned[float64]: truncate a 64-bit float to an unsigned 64-bit integer
- * int64.reinterpret[float64]: reinterpret the bits of a 64-bit float as a 64-bit integer
- * float32.demote[float64]: demote a 64-bit float to a 32-bit float
- * float32.cvt_signed[int32]: convert a signed 32-bit integer to a 32-bit float
- * float32.cvt_signed[int64]: convert a signed 64-bit integer to a 32-bit float
- * float32.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 32-bit float
- * float32.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 32-bit float
- * float32.reinterpret[int32]: reinterpret the bits of a 32-bit integer as a 32-bit float
- * float64.promote[float32]: promote a 32-bit float to a 64-bit float
- * float64.cvt_signed[int32]: convert a signed 32-bit integer to a 64-bit float
- * float64.cvt_signed[int64]: convert a signed 64-bit integer to a 64-bit float
- * float64.cvt_unsigned[int32]: convert an unsigned 32-bit integer to a 64-bit float
- * float64.cvt_unsigned[int64]: convert an unsigned 64-bit integer to a 64-bit float
- * float64.reinterpret[int64]: reinterpret the bits of a 64-bit integer as a 64-bit float
- */
+let Defs = [ARGUMENTS] in {
+
+def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src),
+                      [(set I32:$dst, (trunc I64:$src))],
+                      "i32.wrap/i64\t$dst, $src">;
+
+def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src),
+                          [(set I64:$dst, (sext I32:$src))],
+                          "i64.extend_s/i32\t$dst, $src">;
+def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
+                         [(set I64:$dst, (zext I32:$src))],
+                         "i64.extend_u/i32\t$dst, $src">;
+
+} // defs = [ARGUMENTS]
+
+// Expand a "don't care" extend into zero-extend (chosen over sign-extend
+// somewhat arbitrarily, although it favors popular hardware architectures
+// and is conceptually a simpler operation).
+def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Conversion from floating point to integer traps on overflow and invalid.
+let hasSideEffects = 1 in {
+def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
+                        [(set I32:$dst, (fp_to_sint F32:$src))],
+                        "i32.trunc_s/f32\t$dst, $src">;
+def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
+                        [(set I32:$dst, (fp_to_uint F32:$src))],
+                        "i32.trunc_u/f32\t$dst, $src">;
+def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
+                        [(set I64:$dst, (fp_to_sint F32:$src))],
+                        "i64.trunc_s/f32\t$dst, $src">;
+def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
+                        [(set I64:$dst, (fp_to_uint F32:$src))],
+                        "i64.trunc_u/f32\t$dst, $src">;
+def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
+                        [(set I32:$dst, (fp_to_sint F64:$src))],
+                        "i32.trunc_s/f64\t$dst, $src">;
+def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
+                        [(set I32:$dst, (fp_to_uint F64:$src))],
+                        "i32.trunc_u/f64\t$dst, $src">;
+def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
+                        [(set I64:$dst, (fp_to_sint F64:$src))],
+                        "i64.trunc_s/f64\t$dst, $src">;
+def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
+                        [(set I64:$dst, (fp_to_uint F64:$src))],
+                        "i64.trunc_u/f64\t$dst, $src">;
+} // hasSideEffects = 1
+
+def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
+                          [(set F32:$dst, (sint_to_fp I32:$src))],
+                          "f32.convert_s/i32\t$dst, $src">;
+def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src),
+                          [(set F32:$dst, (uint_to_fp I32:$src))],
+                          "f32.convert_u/i32\t$dst, $src">;
+def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src),
+                          [(set F64:$dst, (sint_to_fp I32:$src))],
+                          "f64.convert_s/i32\t$dst, $src">;
+def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src),
+                          [(set F64:$dst, (uint_to_fp I32:$src))],
+                          "f64.convert_u/i32\t$dst, $src">;
+def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src),
+                          [(set F32:$dst, (sint_to_fp I64:$src))],
+                          "f32.convert_s/i64\t$dst, $src">;
+def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src),
+                          [(set F32:$dst, (uint_to_fp I64:$src))],
+                          "f32.convert_u/i64\t$dst, $src">;
+def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src),
+                          [(set F64:$dst, (sint_to_fp I64:$src))],
+                          "f64.convert_s/i64\t$dst, $src">;
+def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src),
+                          [(set F64:$dst, (uint_to_fp I64:$src))],
+                          "f64.convert_u/i64\t$dst, $src">;
+
+def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src),
+                        [(set F64:$dst, (fextend F32:$src))],
+                        "f64.promote/f32\t$dst, $src">;
+def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src),
+                       [(set F32:$dst, (fround F64:$src))],
+                       "f32.demote/f64\t$dst, $src">;
+
+def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src),
+                            [(set I32:$dst, (bitconvert F32:$src))],
+                            "i32.reinterpret/f32\t$dst, $src">;
+def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src),
+                            [(set F32:$dst, (bitconvert I32:$src))],
+                            "f32.reinterpret/i32\t$dst, $src">;
+def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src),
+                            [(set I64:$dst, (bitconvert F64:$src))],
+                            "i64.reinterpret/f64\t$dst, $src">;
+def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src),
+                            [(set F64:$dst, (bitconvert I64:$src))],
+                            "f64.reinterpret/i64\t$dst, $src">;
+
+} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 30ef6339d65a..5520c6de6732 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -12,33 +12,90 @@
 ///
 //===----------------------------------------------------------------------===//
 
-defm FADD : BinaryFP<fadd>;
-defm FSUB : BinaryFP<fsub>;
-defm FMUL : BinaryFP<fmul>;
-defm FDIV : BinaryFP<fdiv>;
-defm FABS : UnaryFP<fabs>;
-defm FNEG : UnaryFP<fneg>;
-defm COPYSIGN : BinaryFP<fcopysign>;
-defm CEIL : UnaryFP<fceil>;
-defm FLOOR : UnaryFP<ffloor>;
-defm TRUNC : UnaryFP<ftrunc>;
-defm NEARESTINT : UnaryFP<fnearbyint>;
+let Defs = [ARGUMENTS] in {
 
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.eq: compare equal
- * float32.lt: less than
- * float32.le: less than or equal
- * float32.gt: greater than
- * float32.ge: greater than or equal
- */
+let isCommutable = 1 in
+defm ADD : BinaryFP<fadd, "add ">;
+defm SUB : BinaryFP<fsub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryFP<fmul, "mul ">;
+defm DIV : BinaryFP<fdiv, "div ">;
+defm SQRT : UnaryFP<fsqrt, "sqrt">;
 
-defm SQRT : UnaryFP<fsqrt>;
+defm ABS : UnaryFP<fabs, "abs ">;
+defm NEG : UnaryFP<fneg, "neg ">;
+defm COPYSIGN : BinaryFP<fcopysign, "copysign">;
 
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * float32.min: minimum (binary operator); if either operand is NaN, returns NaN
- * float32.max: maximum (binary operator); if either operand is NaN, returns NaN
- */
+let isCommutable = 1 in {
+defm MIN : BinaryFP<fminnan, "min ">;
+defm MAX : BinaryFP<fmaxnan, "max ">;
+} // isCommutable = 1
+
+defm CEIL : UnaryFP<fceil, "ceil">;
+defm FLOOR : UnaryFP<ffloor, "floor">;
+defm TRUNC : UnaryFP<ftrunc, "trunc">;
+defm NEAREST : UnaryFP<fnearbyint, "nearest">;
+
+} // Defs = [ARGUMENTS]
+
+// DAGCombine oddly folds casts into the rhs of copysign. Unfold them.
+def : Pat<(fcopysign F64:$lhs, F32:$rhs),
+          (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>;
+def : Pat<(fcopysign F32:$lhs, F64:$rhs),
+          (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>;
+
+// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
+def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
+def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+let isCommutable = 1 in {
+defm EQ : ComparisonFP<SETOEQ, "eq  ">;
+defm NE : ComparisonFP<SETUNE, "ne  ">;
+} // isCommutable = 1
+defm LT : ComparisonFP<SETOLT, "lt  ">;
+defm LE : ComparisonFP<SETOLE, "le  ">;
+defm GT : ComparisonFP<SETOGT, "gt  ">;
+defm GE : ComparisonFP<SETOGE, "ge  ">;
+
+} // Defs = [ARGUMENTS]
+
+// Don't care floating-point comparisons, supported via other comparisons.
+def : Pat<(seteq f32:$lhs, f32:$rhs), (EQ_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setne f32:$lhs, f32:$rhs), (NE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setlt f32:$lhs, f32:$rhs), (LT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setle f32:$lhs, f32:$rhs), (LE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setgt f32:$lhs, f32:$rhs), (GT_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(setge f32:$lhs, f32:$rhs), (GE_F32 f32:$lhs, f32:$rhs)>;
+def : Pat<(seteq f64:$lhs, f64:$rhs), (EQ_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setne f64:$lhs, f64:$rhs), (NE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setlt f64:$lhs, f64:$rhs), (LT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setle f64:$lhs, f64:$rhs), (LE_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setgt f64:$lhs, f64:$rhs), (GT_F64 f64:$lhs, f64:$rhs)>;
+def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs),
+                   [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
+                   "f32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs),
+                   [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
+                   "f64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs),
+          (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs),
+          (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs),
+          (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs),
+          (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 513c36fa2ec2..8008dd32353a 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -1,4 +1,4 @@
-// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-//
+//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,44 +12,68 @@
 ///
 //===----------------------------------------------------------------------===//
 
-// WebAssembly Instruction Format
-class WebAssemblyInst<string cstr> : Instruction {
+// WebAssembly Instruction Format.
+class WebAssemblyInst<string asmstr> : Instruction {
   field bits<0> Inst; // Instruction encoding.
   let Namespace   = "WebAssembly";
   let Pattern     = [];
-  let Constraints = cstr;
+  let AsmString   = asmstr;
 }
 
-// Normal instructions
-class I<dag oops, dag iops, list<dag> pattern, string cstr = "">
-    : WebAssemblyInst<cstr> {
+// Normal instructions.
+class I<dag oops, dag iops, list<dag> pattern, string asmstr = "">
+    : WebAssemblyInst<asmstr> {
   dag OutOperandList = oops;
   dag InOperandList  = iops;
   let Pattern        = pattern;
 }
 
 // Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node> {
-  def _I32 : I<(outs Int32:$dst), (ins Int32:$src),
-               [(set Int32:$dst, (node Int32:$src))]>;
-  def _I64 : I<(outs Int64:$dst), (ins Int64:$src),
-               [(set Int64:$dst, (node Int64:$src))]>;
+multiclass UnaryInt<SDNode node, string name> {
+  def _I32 : I<(outs I32:$dst), (ins I32:$src),
+               [(set I32:$dst, (node I32:$src))],
+               !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>;
+  def _I64 : I<(outs I64:$dst), (ins I64:$src),
+               [(set I64:$dst, (node I64:$src))],
+               !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>;
 }
-multiclass BinaryInt<SDNode node> {
-  def _I32 : I<(outs Int32:$dst), (ins Int32:$lhs, Int32:$rhs),
-               [(set Int32:$dst, (node Int32:$lhs, Int32:$rhs))]>;
-  def _I64 : I<(outs Int64:$dst), (ins Int64:$lhs, Int64:$rhs),
-               [(set Int64:$dst, (node Int64:$lhs, Int64:$rhs))]>;
+multiclass BinaryInt<SDNode node, string name> {
+  def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+               [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs),
+               [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
 }
-multiclass UnaryFP<SDNode node> {
-  def _F32 : I<(outs Float32:$dst), (ins Float32:$src),
-               [(set Float32:$dst, (node Float32:$src))]>;
-  def _F64 : I<(outs Float64:$dst), (ins Float64:$src),
-               [(set Float64:$dst, (node Float64:$src))]>;
+multiclass UnaryFP<SDNode node, string name> {
+  def _F32 : I<(outs F32:$dst), (ins F32:$src),
+               [(set F32:$dst, (node F32:$src))],
+               !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>;
+  def _F64 : I<(outs F64:$dst), (ins F64:$src),
+               [(set F64:$dst, (node F64:$src))],
+               !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>;
 }
-multiclass BinaryFP<SDNode node> {
-  def _F32 : I<(outs Float32:$dst), (ins Float32:$lhs, Float32:$rhs),
-               [(set Float32:$dst, (node Float32:$lhs, Float32:$rhs))]>;
-  def _F64 : I<(outs Float64:$dst), (ins Float64:$lhs, Float64:$rhs),
-               [(set Float64:$dst, (node Float64:$lhs, Float64:$rhs))]>;
+multiclass BinaryFP<SDNode node, string name> {
+  def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs),
+               [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs),
+               [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonInt<CondCode cond, string name> {
+  def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
+               [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs),
+               [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+}
+multiclass ComparisonFP<CondCode cond, string name> {
+  def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs),
+               [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs),
+               [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index ea8937c8f9f2..5e7663cdb506 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -24,5 +24,136 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-instr-info"
 
+#define GET_INSTRINFO_CTOR_DTOR
+#include "WebAssemblyGenInstrInfo.inc"
+
 WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI)
-    : RI(STI.getTargetTriple()) {}
+    : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN,
+                              WebAssembly::ADJCALLSTACKUP),
+      RI(STI.getTargetTriple()) {}
+
+void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       DebugLoc DL, unsigned DestReg,
+                                       unsigned SrcReg, bool KillSrc) const {
+  // This method is called by post-RA expansion, which expects only pregs to
+  // exist. However we need to handle both here.
+  auto &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+      MRI.getRegClass(DestReg) :
+      MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg);
+
+  unsigned CopyLocalOpcode;
+  if (RC == &WebAssembly::I32RegClass)
+    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32;
+  else if (RC == &WebAssembly::I64RegClass)
+    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64;
+  else if (RC == &WebAssembly::F32RegClass)
+    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32;
+  else if (RC == &WebAssembly::F64RegClass)
+    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64;
+  else
+    llvm_unreachable("Unexpected register class");
+
+  BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg)
+      .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
+}
+
+// Branch analysis.
+bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                         MachineBasicBlock *&TBB,
+                                         MachineBasicBlock *&FBB,
+                                         SmallVectorImpl<MachineOperand> &Cond,
+                                         bool /*AllowModify*/) const {
+  bool HaveCond = false;
+  for (MachineInstr &MI : MBB.terminators()) {
+    switch (MI.getOpcode()) {
+    default:
+      // Unhandled instruction; bail out.
+      return true;
+    case WebAssembly::BR_IF:
+      if (HaveCond)
+        return true;
+      Cond.push_back(MachineOperand::CreateImm(true));
+      Cond.push_back(MI.getOperand(0));
+      TBB = MI.getOperand(1).getMBB();
+      HaveCond = true;
+      break;
+    case WebAssembly::BR_UNLESS:
+      if (HaveCond)
+        return true;
+      Cond.push_back(MachineOperand::CreateImm(false));
+      Cond.push_back(MI.getOperand(0));
+      TBB = MI.getOperand(1).getMBB();
+      HaveCond = true;
+      break;
+    case WebAssembly::BR:
+      if (!HaveCond)
+        TBB = MI.getOperand(0).getMBB();
+      else
+        FBB = MI.getOperand(0).getMBB();
+      break;
+    }
+    if (MI.isBarrier())
+      break;
+  }
+
+  return false;
+}
+
+unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::instr_iterator I = MBB.instr_end();
+  unsigned Count = 0;
+
+  while (I != MBB.instr_begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (!I->isTerminator())
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.instr_end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                                            MachineBasicBlock *TBB,
+                                            MachineBasicBlock *FBB,
+                                            ArrayRef<MachineOperand> Cond,
+                                            DebugLoc DL) const {
+  if (Cond.empty()) {
+    if (!TBB)
+      return 0;
+
+    BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(TBB);
+    return 1;
+  }
+
+  assert(Cond.size() == 2 && "Expected a flag and a successor block");
+
+  if (Cond[0].getImm()) {
+    BuildMI(&MBB, DL, get(WebAssembly::BR_IF))
+        .addOperand(Cond[1])
+        .addMBB(TBB);
+  } else {
+    BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS))
+        .addOperand(Cond[1])
+        .addMBB(TBB);
+  }
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(WebAssembly::BR)).addMBB(FBB);
+  return 2;
+}
+
+bool WebAssemblyInstrInfo::ReverseBranchCondition(
+    SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Expected a flag and a successor block");
+  Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
+  return false;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index 1c4ae22f16d6..5ddd9b36f243 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -19,17 +19,35 @@
 #include "WebAssemblyRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "WebAssemblyGenInstrInfo.inc"
+
 namespace llvm {
 
 class WebAssemblySubtarget;
 
-class WebAssemblyInstrInfo final {
+class WebAssemblyInstrInfo final : public WebAssemblyGenInstrInfo {
   const WebAssemblyRegisterInfo RI;
 
 public:
   explicit WebAssemblyInstrInfo(const WebAssemblySubtarget &STI);
 
   const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const override;
+
+  bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify = false) const override;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
+                        DebugLoc DL) const override;
+  bool
+  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index fe3ca76dc08a..f0b4ce7caf51 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -25,20 +25,48 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
 // WebAssembly-specific DAG Node Types.
 //===----------------------------------------------------------------------===//
 
+def SDT_WebAssemblyCallSeqStart : SDCallSeqStart<[SDTCisVT<0, iPTR>]>;
+def SDT_WebAssemblyCallSeqEnd :
+    SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
+def SDT_WebAssemblyCall0    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyCall1    : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>;
+def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>;
+def SDT_WebAssemblyReturn   : SDTypeProfile<0, -1, []>;
+def SDT_WebAssemblyWrapper  : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                   SDTCisPtrTy<0>]>;
+
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific DAG Nodes.
 //===----------------------------------------------------------------------===//
 
+def WebAssemblycallseq_start :
+    SDNode<"ISD::CALLSEQ_START", SDT_WebAssemblyCallSeqStart,
+           [SDNPHasChain, SDNPOutGlue]>;
+def WebAssemblycallseq_end :
+    SDNode<"ISD::CALLSEQ_END", SDT_WebAssemblyCallSeqEnd,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0",
+                              SDT_WebAssemblyCall0,
+                              [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1",
+                              SDT_WebAssemblyCall1,
+                              [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH",
+                                    SDT_WebAssemblyTableswitch,
+                                    [SDNPHasChain, SDNPVariadic]>;
+def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT",
+                                 SDT_WebAssemblyArgument>;
+def WebAssemblyreturn   : SDNode<"WebAssemblyISD::RETURN",
+                                 SDT_WebAssemblyReturn, [SDNPHasChain]>;
+def WebAssemblywrapper  : SDNode<"WebAssemblyISD::Wrapper",
+                                 SDT_WebAssemblyWrapper>;
+
 //===----------------------------------------------------------------------===//
 // WebAssembly-specific Operands.
 //===----------------------------------------------------------------------===//
 
-/*
- * TODO(jfb): Add the following.
- *
- * get_local: read the current value of a local variable
- * set_local: set the current value of a local variable
-*/
+def bb_op : Operand<OtherVT>;
 
 //===----------------------------------------------------------------------===//
 // WebAssembly Instruction Format Definitions.
@@ -46,14 +74,87 @@ def HasSIMD128 : Predicate<"Subtarget->hasSIMD128()">,
 
 include "WebAssemblyInstrFormats.td"
 
+//===----------------------------------------------------------------------===//
+// Additional instructions.
+//===----------------------------------------------------------------------===//
+
+multiclass ARGUMENT<WebAssemblyRegClass vt> {
+  let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
+  def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
+                       [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
+}
+defm : ARGUMENT<I32>;
+defm : ARGUMENT<I64>;
+defm : ARGUMENT<F32>;
+defm : ARGUMENT<F64>;
+
+let Defs = [ARGUMENTS] in {
+
+// get_local and set_local are not generated by instruction selection; they
+// are implied by virtual register uses and defs in most contexts. However,
+// they are explicitly emitted for special purposes.
+multiclass LOCAL<WebAssemblyRegClass vt> {
+  def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [],
+                        "get_local\t$res, $regno">;
+  // TODO: set_local returns its operand value
+  def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [],
+                        "set_local\t$regno, $src">;
+
+  // COPY_LOCAL is not an actual instruction in wasm, but since we allow
+  // get_local and set_local to be implicit, we can have a COPY_LOCAL which
+  // is actually a no-op because all the work is done in the implied
+  // get_local and set_local.
+  let isAsCheapAsAMove = 1 in
+  def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [],
+                         "copy_local\t$res, $src">;
+}
+defm : LOCAL<I32>;
+defm : LOCAL<I64>;
+defm : LOCAL<F32>;
+defm : LOCAL<F64>;
+
+let isMoveImm = 1 in {
+def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
+                  [(set I32:$res, imm:$imm)],
+                  "i32.const\t$res, $imm">;
+def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
+                  [(set I64:$res, imm:$imm)],
+                  "i64.const\t$res, $imm">;
+def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+                  [(set F32:$res, fpimm:$imm)],
+                  "f32.const\t$res, $imm">;
+def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+                  [(set F64:$res, fpimm:$imm)],
+                  "f64.const\t$res, $imm">;
+} // isMoveImm = 1
+
+} // Defs = [ARGUMENTS]
+
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
+          (CONST_I32 tglobaladdr:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
+          (CONST_I32 texternalsym:$dst)>;
+def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
+          (CONST_I32 tjumptable:$dst)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Function signature and local variable declaration "instructions".
+def PARAM  : I<(outs), (ins variable_ops), [], ".param  \t">;
+def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
+def LOCAL  : I<(outs), (ins variable_ops), [], ".local  \t">;
+
+} // Defs = [ARGUMENTS]
+
 //===----------------------------------------------------------------------===//
 // Additional sets of instructions.
 //===----------------------------------------------------------------------===//
 
 include "WebAssemblyInstrMemory.td"
 include "WebAssemblyInstrCall.td"
+include "WebAssemblyInstrControl.td"
 include "WebAssemblyInstrInteger.td"
-include "WebAssemblyInstrFloat.td"
 include "WebAssemblyInstrConv.td"
+include "WebAssemblyInstrFloat.td"
 include "WebAssemblyInstrAtomics.td"
 include "WebAssemblyInstrSIMD.td"
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 5f60fe81b1a2..09e5eafb85e9 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -12,34 +12,77 @@
 ///
 //===----------------------------------------------------------------------===//
 
-defm ADD : BinaryInt<add>;
-defm SUB : BinaryInt<sub>;
-defm MUL : BinaryInt<mul>;
-defm SDIV : BinaryInt<sdiv>;
-defm UDIV : BinaryInt<udiv>;
-defm SREM : BinaryInt<srem>;
-defm UREM : BinaryInt<urem>;
-defm AND : BinaryInt<and>;
-defm IOR : BinaryInt<or>;
-defm XOR : BinaryInt<xor>;
-defm SHL : BinaryInt<shl>;
-defm SHR : BinaryInt<srl>;
-defm SAR : BinaryInt<sra>;
+let Defs = [ARGUMENTS] in {
 
-/*
- * TODO(jfb): Add the following for 32-bit and 64-bit.
- *
- * int32.eq: signed-less compare equal
- * int32.slt: signed less than
- * int32.sle: signed less than or equal
- * int32.ult: unsigned less than
- * int32.ule: unsigned less than or equal
- * int32.sgt: signed greater than
- * int32.sge: signed greater than or equal
- * int32.ugt: unsigned greater than
- * int32.uge: unsigned greater than or equal
- */
+// The spaces after the names are for aesthetic purposes only, to make
+// operands line up vertically after tab expansion.
+let isCommutable = 1 in
+defm ADD : BinaryInt<add, "add ">;
+defm SUB : BinaryInt<sub, "sub ">;
+let isCommutable = 1 in
+defm MUL : BinaryInt<mul, "mul ">;
+// Divide and remainder trap on a zero denominator.
+let hasSideEffects = 1 in {
+defm DIV_S : BinaryInt<sdiv, "div_s">;
+defm DIV_U : BinaryInt<udiv, "div_u">;
+defm REM_S : BinaryInt<srem, "rem_s">;
+defm REM_U : BinaryInt<urem, "rem_u">;
+} // hasSideEffects = 1
+let isCommutable = 1 in {
+defm AND : BinaryInt<and, "and ">;
+defm OR : BinaryInt<or, "or  ">;
+defm XOR : BinaryInt<xor, "xor ">;
+} // isCommutable = 1
+defm SHL : BinaryInt<shl, "shl ">;
+defm SHR_U : BinaryInt<srl, "shr_u">;
+defm SHR_S : BinaryInt<sra, "shr_s">;
 
-defm CLZ : UnaryInt<ctlz>;
-defm CTZ : UnaryInt<cttz>;
-defm POPCNT : UnaryInt<ctpop>;
+let isCommutable = 1 in {
+defm EQ : ComparisonInt<SETEQ, "eq  ">;
+defm NE : ComparisonInt<SETNE, "ne  ">;
+} // isCommutable = 1
+defm LT_S : ComparisonInt<SETLT, "lt_s">;
+defm LE_S : ComparisonInt<SETLE, "le_s">;
+defm LT_U : ComparisonInt<SETULT, "lt_u">;
+defm LE_U : ComparisonInt<SETULE, "le_u">;
+defm GT_S : ComparisonInt<SETGT, "gt_s">;
+defm GE_S : ComparisonInt<SETGE, "ge_s">;
+defm GT_U : ComparisonInt<SETUGT, "gt_u">;
+defm GE_U : ComparisonInt<SETUGE, "ge_u">;
+
+defm CLZ : UnaryInt<ctlz, "clz ">;
+defm CTZ : UnaryInt<cttz, "ctz ">;
+defm POPCNT : UnaryInt<ctpop, "popcnt">;
+
+} // Defs = [ARGUMENTS]
+
+// Expand the "don't care" operations to supported operations.
+def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>;
+def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>;
+def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>;
+def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>;
+
+let Defs = [ARGUMENTS] in {
+
+def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs),
+                   [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
+                   "i32.select\t$dst, $cond, $lhs, $rhs">;
+def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs),
+                   [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
+                   "i64.select\t$dst, $cond, $lhs, $rhs">;
+
+} // Defs = [ARGUMENTS]
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs),
+          (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>;
+def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs),
+          (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs),
+          (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>;
+def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs),
+          (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 5ab40e826caa..74ec45d58644 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -12,35 +12,500 @@
 ///
 //===----------------------------------------------------------------------===//
 
-/*
- * TODO(jfb): Add the following.
- * Each has optional alignment and immediate byte offset.
- *
- * int32.load_sx[int8]: sign-extend to int32
- * int32.load_sx[int16]: sign-extend to int32
- * int32.load_zx[int8]: zero-extend to int32
- * int32.load_zx[int16]: zero-extend to int32
- * int32.load[int32]: (no conversion)
- * int64.load_sx[int8]: sign-extend to int64
- * int64.load_sx[int16]: sign-extend to int64
- * int64.load_sx[int32]: sign-extend to int64
- * int64.load_zx[int8]: zero-extend to int64
- * int64.load_zx[int16]: zero-extend to int64
- * int64.load_zx[int32]: zero-extend to int64
- * int64.load[int64]: (no conversion)
- * float32.load[float32]: (no conversion)
- * float64.load[float64]: (no conversion)
- * 
- * int32.store[int8]: wrap int32 to int8
- * int32.store[int16]: wrap int32 to int16
- * int32.store[int32]: (no conversion)
- * int64.store[int8]: wrap int64 to int8
- * int64.store[int16]: wrap int64 to int16
- * int64.store[int32]: wrap int64 to int32
- * int64.store[int64]: (no conversion)
- * float32.store[float32]: (no conversion)
- * float64.store[float64]: (no conversion)
- * 
- * load_global: load the value of a given global variable
- * store_global: store a given value to a given global variable
- */
+// TODO:
+//  - HasAddr64
+//  - WebAssemblyTargetLowering having to do with atomics
+//  - Each has optional alignment.
+
+// WebAssembly has i8/i16/i32/i64/f32/f64 memory types, but doesn't have i8/i16
+// local types. These memory-only types instead zero- or sign-extend into local
+// types when loading, and truncate when storing.
+
+// WebAssembly constant offsets are performed as unsigned with infinite
+// precision, so we need to check for NoUnsignedWrap so that we don't fold an
+// offset for an add that needs wrapping.
+def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+                         (add node:$addr, node:$off),
+                         [{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic load.
+def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+                 "i32.load\t$dst, ${off}(${addr})">;
+def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                 "i64.load\t$dst, ${off}(${addr})">;
+def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [],
+                 "f32.load\t$dst, ${off}(${addr})">;
+def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [],
+                 "f64.load\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select loads with no constant offset.
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
+
+// Select loads with a constant offset.
+def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+          (LOAD_I32 imm:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+          (LOAD_I64 imm:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+          (LOAD_F32 imm:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+          (LOAD_F64 imm:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD_F32 tglobaladdr:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD_F64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD_I64 texternalsym:$off, $addr)>;
+def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD_F32 texternalsym:$off, $addr)>;
+def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD_F64 texternalsym:$off, $addr)>;
+
+// Select loads with just a constant offset.
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
+          (LOAD_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
+          (LOAD_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
+          (LOAD_F32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
+          (LOAD_F64 texternalsym:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Extending load.
+def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i32.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i32.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i32.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i32.load16_u\t$dst, ${off}(${addr})">;
+def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load8_s\t$dst, ${off}(${addr})">;
+def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load8_u\t$dst, ${off}(${addr})">;
+def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load16_s\t$dst, ${off}(${addr})">;
+def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load16_u\t$dst, ${off}(${addr})">;
+def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load32_s\t$dst, ${off}(${addr})">;
+def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [],
+                     "i64.load32_u\t$dst, ${off}(${addr})">;
+
+} // Defs = [ARGUMENTS]
+
+// Select extending loads with no constant offset.
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select extending loads with a constant offset.
+def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_S_I32 imm:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD32_S_I64 imm:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_S_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD32_S_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_S_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD32_S_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select extending loads with just a constant offset.
+def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>;
+
+// Resolve "don't care" extending loads to zero-extending loads. This is
+// somewhat arbitrary, but zero-extending is conceptually simpler.
+
+// Select "don't care" extending loads with no constant offset.
+def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
+
+// Select "don't care" extending loads with a constant offset.
+def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_U_I32 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_U_I32 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD8_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD16_U_I64 imm:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+          (LOAD32_U_I64 imm:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_U_I32 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD8_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD16_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+          (LOAD32_U_I64 tglobaladdr:$off, $addr)>;
+def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_U_I32 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD8_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD16_U_I64 texternalsym:$off, $addr)>;
+def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+          (LOAD32_U_I64 texternalsym:$off, $addr)>;
+
+// Select "don't care" extending loads with just a constant offset.
+def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>;
+def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
+          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>;
+
+let Defs = [ARGUMENTS] in {
+
+// Basic store.
+// Note that we split the patterns out of the instruction definitions because
+// WebAssembly's stores return their operand value, and tablegen doesn't like
+// instruction definition patterns that don't reference all of the output
+// operands.
+// Note: WebAssembly inverts SelectionDAG's usual operand order.
+def STORE_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+                   "i32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+                   "i64.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F32  : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [],
+                   "f32.store\t$dst, ${off}(${addr}), $val">;
+def STORE_F64  : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [],
+                   "f64.store\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select stores with no constant offset.
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
+
+// Select stores with a constant offset.
+def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE_F32 imm:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE_F64 imm:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
+def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
+
+// Select stores with just a constant offset.
+def : Pat<(store I32:$val, imm:$off),
+          (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, imm:$off),
+          (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, imm:$off),
+          (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, imm:$off),
+          (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
+def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>;
+def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Truncating store.
+def STORE8_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+                    "i32.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [],
+                    "i32.store16\t$dst, ${off}(${addr}), $val">;
+def STORE8_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+                    "i64.store8\t$dst, ${off}(${addr}), $val">;
+def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+                    "i64.store16\t$dst, ${off}(${addr}), $val">;
+def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [],
+                    "i64.store32\t$dst, ${off}(${addr}), $val">;
+
+} // Defs = [ARGUMENTS]
+
+// Select truncating stores with no constant offset.
+def : Pat<(truncstorei8 I32:$val, I32:$addr),
+          (STORE8_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, I32:$addr),
+          (STORE16_I32 0, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, I32:$addr),
+          (STORE8_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, I32:$addr),
+          (STORE16_I64 0, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, I32:$addr),
+          (STORE32_I64 0, I32:$addr, I64:$val)>;
+
+// Select truncating stores with a constant offset.
+def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+          (STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+          (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+          (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
+
+// Select truncating stores with just a constant offset.
+def : Pat<(truncstorei8 I32:$val, imm:$off),
+          (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, imm:$off),
+          (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, imm:$off),
+          (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, imm:$off),
+          (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, imm:$off),
+          (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
+          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>;
+def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
+          (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>;
+
+let Defs = [ARGUMENTS] in {
+
+// Memory size.
+def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins),
+                        [(set I32:$dst, (int_wasm_memory_size))],
+                        "memory_size\t$dst">,
+                      Requires<[HasAddr32]>;
+def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins),
+                        [(set I64:$dst, (int_wasm_memory_size))],
+                        "memory_size\t$dst">,
+                      Requires<[HasAddr64]>;
+
+// Grow memory.
+def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta),
+                        [(int_wasm_grow_memory I32:$delta)],
+                        "grow_memory\t$delta">,
+                      Requires<[HasAddr32]>;
+def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta),
+                        [(int_wasm_grow_memory I64:$delta)],
+                        "grow_memory\t$delta">,
+                      Requires<[HasAddr64]>;
+
+} // Defs = [ARGUMENTS]
diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
new file mode 100644
index 000000000000..b009a4e054cc
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -0,0 +1,133 @@
+//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file lowers br_unless into br_if with an inverted condition.
+///
+/// br_unless is not currently in the spec, but it's very convenient for LLVM
+/// to use. This pass allows LLVM to use it, for now.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-br_unless"
+
+namespace {
+class WebAssemblyLowerBrUnless final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly Lower br_unless";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyLowerBrUnless::ID = 0;
+FunctionPass *llvm::createWebAssemblyLowerBrUnless() {
+  return new WebAssemblyLowerBrUnless();
+}
+
+bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Lowering br_unless **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  auto &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+  auto &MRI = MF.getRegInfo();
+
+  for (auto &MBB : MF) {
+    for (auto MII = MBB.begin(); MII != MBB.end(); ) {
+      MachineInstr *MI = &*MII++;
+      if (MI->getOpcode() != WebAssembly::BR_UNLESS)
+        continue;
+
+      unsigned Cond = MI->getOperand(0).getReg();
+      bool Inverted = false;
+
+      // Attempt to invert the condition in place.
+      if (MFI.isVRegStackified(Cond)) {
+        assert(MRI.hasOneDef(Cond));
+        MachineInstr *Def = MRI.getVRegDef(Cond);
+        switch (Def->getOpcode()) {
+        using namespace WebAssembly;
+        case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break;
+        case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break;
+        case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break;
+        case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break;
+        case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break;
+        case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break;
+        case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break;
+        case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break;
+        case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break;
+        case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break;
+        case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break;
+        case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break;
+        case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break;
+        case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break;
+        case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break;
+        case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break;
+        case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break;
+        case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break;
+        case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break;
+        case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break;
+        case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break;
+        case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break;
+        case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break;
+        case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break;
+        default: break;
+        }
+      }
+
+      // If we weren't able to invert the condition in place. Insert an
+      // expression to invert it.
+      if (!Inverted) {
+        unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+        MFI.stackifyVReg(ZeroReg);
+        BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg)
+            .addImm(0);
+        unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+        MFI.stackifyVReg(Tmp);
+        BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp)
+            .addReg(Cond)
+            .addReg(ZeroReg);
+        Cond = Tmp;
+        Inverted = true;
+      }
+
+      // The br_unless condition has now been inverted. Insert a br_if and
+      // delete the br_unless.
+      assert(Inverted);
+      BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF))
+          .addReg(Cond)
+          .addOperand(MI->getOperand(1));
+      MBB.erase(MI);
+    }
+  }
+
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
new file mode 100644
index 000000000000..a953f8247006
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -0,0 +1,106 @@
+// WebAssemblyMCInstLower.cpp - Convert WebAssembly MachineInstr to an MCInst //
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains code to lower WebAssembly MachineInstrs to their
+/// corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCInstLower.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSymbol *
+WebAssemblyMCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  return Printer.getSymbol(MO.getGlobal());
+}
+
+MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
+    const MachineOperand &MO) const {
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                                     MCSymbol *Sym) const {
+  assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+
+  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+
+  int64_t Offset = MO.getOffset();
+  if (Offset != 0) {
+    assert(!MO.isJTI() && "Unexpected offset with jump table index");
+    Expr =
+        MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
+  }
+
+  return MCOperand::createExpr(Expr);
+}
+
+void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
+                                   MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register: {
+      // Ignore all implicit register operands.
+      if (MO.isImplicit())
+        continue;
+      const WebAssemblyFunctionInfo &MFI =
+          *MI->getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>();
+      unsigned WAReg = MFI.getWAReg(MO.getReg());
+      MCOp = MCOperand::createReg(WAReg);
+      break;
+    }
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::createImm(MO.getImm());
+      break;
+    case MachineOperand::MO_FPImmediate: {
+      // TODO: MC converts all floating point immediate operands to double.
+      // This is fine for numeric values, but may cause NaNs to change bits.
+      const ConstantFP *Imm = MO.getFPImm();
+      if (Imm->getType()->isFloatTy())
+        MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToFloat());
+      else if (Imm->getType()->isDoubleTy())
+        MCOp = MCOperand::createFPImm(Imm->getValueAPF().convertToDouble());
+      else
+        llvm_unreachable("unknown floating point immediate type");
+      break;
+    }
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::createExpr(
+          MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
new file mode 100644
index 000000000000..6d704704f576
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -0,0 +1,45 @@
+//===-- WebAssemblyMCInstLower.h - Lower MachineInstr to MCInst -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares the class to lower WebAssembly MachineInstrs to
+/// their corresponding MCInst records.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMCINSTLOWER_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+class AsmPrinter;
+class MCContext;
+class MCSymbol;
+class MachineInstr;
+class MachineOperand;
+
+/// This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
+  MCContext &Ctx;
+  AsmPrinter &Printer;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+
+public:
+  WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
+      : Ctx(ctx), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 542d984b9006..225c5d32cb5d 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -17,3 +17,9 @@
 using namespace llvm;
 
 WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
+
+void WebAssemblyFunctionInfo::initWARegs() {
+  assert(WARegs.empty());
+  unsigned Reg = UnusedReg;
+  WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index fc5e910b09ef..6a60280900a9 100644
--- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-// WebAssemblyMachineFuctionInfo.h-WebAssembly machine function info -*- C++ -*-
+// WebAssemblyMachineFunctionInfo.h-WebAssembly machine function info-*- C++ -*-
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,8 +16,7 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H
 
-#include "WebAssemblyRegisterInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 namespace llvm {
@@ -27,9 +26,70 @@ namespace llvm {
 class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   MachineFunction &MF;
 
+  std::vector<MVT> Params;
+
+  /// A mapping from CodeGen vreg index to WebAssembly register number.
+  std::vector<unsigned> WARegs;
+
+  /// A mapping from CodeGen vreg index to a boolean value indicating whether
+  /// the given register is considered to be "stackified", meaning it has been
+  /// determined or made to meet the stack requirements:
+  ///   - single use (per path)
+  ///   - single def (per path)
+  ///   - defined and used in LIFO order with other stack registers
+  BitVector VRegStackified;
+
+  // One entry for each possible target reg. we expect it to be small.
+  std::vector<unsigned> PhysRegs;
+
 public:
-  explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
+  explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {
+    PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U);
+  }
   ~WebAssemblyFunctionInfo() override;
+
+  void addParam(MVT VT) { Params.push_back(VT); }
+  const std::vector<MVT> &getParams() const { return Params; }
+
+  static const unsigned UnusedReg = -1u;
+
+  void stackifyVReg(unsigned VReg) {
+    if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+      VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
+    VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
+  }
+  bool isVRegStackified(unsigned VReg) const {
+    if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
+      return false;
+    return VRegStackified.test(TargetRegisterInfo::virtReg2Index(VReg));
+  }
+
+  void initWARegs();
+  void setWAReg(unsigned VReg, unsigned WAReg) {
+    assert(WAReg != UnusedReg);
+    assert(TargetRegisterInfo::virtReg2Index(VReg) < WARegs.size());
+    WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg;
+  }
+  unsigned getWAReg(unsigned Reg) const {
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size());
+      return WARegs[TargetRegisterInfo::virtReg2Index(Reg)];
+    }
+    return PhysRegs[Reg];
+  }
+  // If new virtual registers are created after initWARegs has been called,
+  // this function can be used to add WebAssembly register mappings for them.
+  void addWAReg(unsigned VReg, unsigned WAReg) {
+    assert(VReg = WARegs.size());
+    WARegs.push_back(WAReg);
+  }
+
+  void addPReg(unsigned PReg, unsigned WAReg) {
+    assert(PReg < WebAssembly::NUM_TARGET_REGS);
+    assert(WAReg < -1U);
+    PhysRegs[PReg] = WAReg;
+  }
+  const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; }
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
new file mode 100644
index 000000000000..4dc401a2c7cc
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -0,0 +1,76 @@
+//===-- WebAssemblyOptimizeReturned.cpp - Optimize "returned" attributes --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Optimize calls with "returned" attributes for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-optimize-returned"
+
+namespace {
+class OptimizeReturned final : public FunctionPass,
+                               public InstVisitor<OptimizeReturned> {
+  const char *getPassName() const override {
+    return "WebAssembly Optimize Returned";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    FunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  DominatorTree *DT;
+
+public:
+  static char ID;
+  OptimizeReturned() : FunctionPass(ID), DT(nullptr) {}
+
+  void visitCallSite(CallSite CS);
+};
+} // End anonymous namespace
+
+char OptimizeReturned::ID = 0;
+FunctionPass *llvm::createWebAssemblyOptimizeReturned() {
+  return new OptimizeReturned();
+}
+
+void OptimizeReturned::visitCallSite(CallSite CS) {
+  for (unsigned i = 0, e = CS.getNumArgOperands(); i < e; ++i)
+    if (CS.paramHasAttr(1 + i, Attribute::Returned)) {
+      Instruction *Inst = CS.getInstruction();
+      Value *Arg = CS.getArgOperand(i);
+      // Ignore constants, globals, undef, etc.
+      if (isa<Constant>(Arg))
+        continue;
+      // Like replaceDominatedUsesWith but using Instruction/Use dominance.
+      for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) {
+        Use &U = *UI++;
+        if (DT->dominates(Inst, U))
+          U.set(Inst);
+      }
+    }
+}
+
+bool OptimizeReturned::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  visit(F);
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyPEI.cpp b/lib/Target/WebAssembly/WebAssemblyPEI.cpp
new file mode 100644
index 000000000000..d570d4266110
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyPEI.cpp
@@ -0,0 +1,1066 @@
+//===-- WebAssemblyPEI.cpp - Insert Prolog/Epilog code in function --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This is a copy of lib/CodeGen/PrologEpilogInserter.cpp except that it does
+// not assert that all virtual registers are gone (because WebAssembly currently
+// uses virtual rather than physical registers), and only runs
+// MRI.clearVirtRegs() if scavenging happened (which it never does). It also
+// uses a different class name so it can be registered via INITIALIZE_PASS.
+// It is otherwise unmodified, so any changes to the target-independent PEI
+// can be easily applied.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <climits>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pei"
+namespace llvm {
+void initializeWasmPEIPass(PassRegistry&);
+}
+namespace {
+class WasmPEI : public MachineFunctionPass {
+public:
+  static char ID;
+  WasmPEI() : MachineFunctionPass(ID) {
+    initializeWasmPEIPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+  /// frame indexes with appropriate references.
+  ///
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+private:
+  RegScavenger *RS;
+
+  // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+  // stack frame indexes.
+  unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+  // Save and Restore blocks of the current function. Typically there is a
+  // single save block, unless Windows EH funclets are involved.
+  SmallVector<MachineBasicBlock *, 1> SaveBlocks;
+  SmallVector<MachineBasicBlock *, 4> RestoreBlocks;
+
+  // Flag to control whether to use the register scavenger to resolve
+  // frame index materialization registers. Set according to
+  // TRI->requiresFrameIndexScavenging() for the current function.
+  bool FrameIndexVirtualScavenging;
+
+  void calculateSets(MachineFunction &Fn);
+  void calculateCallsInformation(MachineFunction &Fn);
+  void assignCalleeSavedSpillSlots(MachineFunction &Fn,
+                                   const BitVector &SavedRegs);
+  void insertCSRSpillsAndRestores(MachineFunction &Fn);
+  void calculateFrameObjectOffsets(MachineFunction &Fn);
+  void replaceFrameIndices(MachineFunction &Fn);
+  void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                           int &SPAdj);
+  void scavengeFrameVirtualRegs(MachineFunction &Fn);
+  void insertPrologEpilogCode(MachineFunction &Fn);
+};
+} // namespace
+
+char WasmPEI::ID = 0;
+
+namespace llvm {
+FunctionPass *createWebAssemblyPEI() {
+  return new WasmPEI();
+}
+}
+
+static cl::opt<unsigned>
+WarnStackSize("wasm-warn-stack-size", cl::Hidden, cl::init((unsigned)-1),
+              cl::desc("Warn for stack size bigger than the given"
+                       " number"));
+
+INITIALIZE_PASS_BEGIN(WasmPEI, "wasmprologepilog",
+                "Wasm Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(StackProtector)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(WasmPEI, "wasmprologepilog",
+                    "Wasm Prologue/Epilogue Insertion & Frame Finalization",
+                    false, false)
+
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+STATISTIC(NumBytesStackSpace,
+          "Number of bytes used for stack in all functions");
+
+void WasmPEI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<StackProtector>();
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Compute the set of return blocks
+void WasmPEI::calculateSets(MachineFunction &Fn) {
+  const MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Even when we do not change any CSR, we still want to insert the
+  // prologue and epilogue of the function.
+  // So set the save points for those.
+
+  // Use the points found by shrink-wrapping, if any.
+  if (MFI->getSavePoint()) {
+    SaveBlocks.push_back(MFI->getSavePoint());
+    assert(MFI->getRestorePoint() && "Both restore and save must be set");
+    MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+    // If RestoreBlock does not have any successor and is not a return block
+    // then the end point is unreachable and we do not need to insert any
+    // epilogue.
+    if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+      RestoreBlocks.push_back(RestoreBlock);
+    return;
+  }
+
+  // Save refs to entry and return blocks.
+  SaveBlocks.push_back(&Fn.front());
+  for (MachineBasicBlock &MBB : Fn) {
+    if (MBB.isEHFuncletEntry())
+      SaveBlocks.push_back(&MBB);
+    if (MBB.isReturnBlock())
+      RestoreBlocks.push_back(&MBB);
+  }
+}
+
+/// StackObjSet - A set of stack object indexes
+typedef SmallSetVector<int, 8> StackObjSet;
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool WasmPEI::runOnMachineFunction(MachineFunction &Fn) {
+  const Function* F = Fn.getFunction();
+  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+
+  // LOCALMOD: assert removed from target-independent PEI
+  //assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
+
+  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
+  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+  // function's frame information. Also eliminates call frame pseudo
+  // instructions.
+  calculateCallsInformation(Fn);
+
+  // Determine which of the registers in the callee save list should be saved.
+  BitVector SavedRegs;
+  TFI->determineCalleeSaves(Fn, SavedRegs, RS);
+
+  // Insert spill code for any callee saved registers that are modified.
+  assignCalleeSavedSpillSlots(Fn, SavedRegs);
+
+  // Determine placement of CSR spill/restore code:
+  // place all spills in the entry block, all restores in return blocks.
+  calculateSets(Fn);
+
+  // Add the code to save and restore the callee saved registers.
+  if (!F->hasFnAttribute(Attribute::Naked))
+    insertCSRSpillsAndRestores(Fn);
+
+  // Allow the target machine to make final modifications to the function
+  // before the frame layout is finalized.
+  TFI->processFunctionBeforeFrameFinalized(Fn, RS);
+
+  // Calculate actual frame offsets for all abstract stack objects...
+  calculateFrameObjectOffsets(Fn);
+
+  // Add prolog and epilog code to the function.  This function is required
+  // to align the stack frame as necessary for any stack variables or
+  // called functions.  Because of this, calculateCalleeSavedRegisters()
+  // must be called before this function in order to set the AdjustsStack
+  // and MaxCallFrameSize variables.
+  if (!F->hasFnAttribute(Attribute::Naked))
+    insertPrologEpilogCode(Fn);
+
+  // Replace all MO_FrameIndex operands with physical register references
+  // and actual offsets.
+  //
+  replaceFrameIndices(Fn);
+
+  // If register scavenging is needed, as we've enabled doing it as a
+  // post-pass, scavenge the virtual registers that frame index elimination
+  // inserted.
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
+    scavengeFrameVirtualRegs(Fn);
+    // Clear any vregs created by virtual scavenging.
+    // LOCALMOD: made this call conditional with scavengeFrameVirtualregs()
+    Fn.getRegInfo().clearVirtRegs();
+  }
+
+  // Warn on stack size when we exceeds the given limit.
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  uint64_t StackSize = MFI->getStackSize();
+  if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
+    DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
+    F->getContext().diagnose(DiagStackSize);
+  }
+
+  delete RS;
+  SaveBlocks.clear();
+  RestoreBlocks.clear();
+  return true;
+}
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void WasmPEI::calculateCallsInformation(MachineFunction &Fn) {
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  unsigned MaxCallFrameSize = 0;
+  bool AdjustsStack = MFI->adjustsStack();
+
+  // Get the function call frame set-up and tear-down instruction opcode
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+  // Early exit for targets which have no call frame setup/destroy pseudo
+  // instructions.
+  if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u)
+    return;
+
+  std::vector<MachineBasicBlock::iterator> FrameSDOps;
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+               " instructions should have a single immediate argument!");
+        unsigned Size = I->getOperand(0).getImm();
+        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+        AdjustsStack = true;
+        FrameSDOps.push_back(I);
+      } else if (I->isInlineAsm()) {
+        // Some inline asm's need a stack frame, as indicated by operand 1.
+        unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+        if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+          AdjustsStack = true;
+      }
+
+  MFI->setAdjustsStack(AdjustsStack);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+  for (std::vector<MachineBasicBlock::iterator>::iterator
+         i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+    MachineBasicBlock::iterator I = *i;
+
+    // If call frames are not being included as part of the stack frame, and
+    // the target doesn't indicate otherwise, remove the call frame pseudos
+    // here. The sub/add sp instruction pairs are still inserted, but we don't
+    // need to track the SP adjustment for frame index elimination.
+    if (TFI->canSimplifyCallFramePseudos(Fn))
+      TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+  }
+}
+
+void WasmPEI::assignCalleeSavedSpillSlots(MachineFunction &F,
+                                      const BitVector &SavedRegs) {
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  if (SavedRegs.empty())
+    return;
+
+  const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo();
+  const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+
+  std::vector<CalleeSavedInfo> CSI;
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    if (SavedRegs.test(Reg))
+      CSI.push_back(CalleeSavedInfo(Reg));
+  }
+
+  const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
+  MachineFrameInfo *MFI = F.getFrameInfo();
+  if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
+    // If target doesn't implement this, use generic code.
+
+    if (CSI.empty())
+      return; // Early exit if no callee saved registers are modified!
+
+    unsigned NumFixedSpillSlots;
+    const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+        TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+    // Now that we know which registers need to be saved and restored, allocate
+    // stack slots for them.
+    for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end();
+         I != E; ++I) {
+      unsigned Reg = I->getReg();
+      const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+      int FrameIdx;
+      if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) {
+        I->setFrameIdx(FrameIdx);
+        continue;
+      }
+
+      // Check to see if this physreg must be spilled to a particular stack slot
+      // on this target.
+      const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+      while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots &&
+             FixedSlot->Reg != Reg)
+        ++FixedSlot;
+
+      if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+        // Nope, just spill it anywhere convenient.
+        unsigned Align = RC->getAlignment();
+        unsigned StackAlign = TFI->getStackAlignment();
+
+        // We may not be able to satisfy the desired alignment specification of
+        // the TargetRegisterClass if the stack alignment is smaller. Use the
+        // min.
+        Align = std::min(Align, StackAlign);
+        FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+        if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+        if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+      } else {
+        // Spill it to the stack where we must.
+        FrameIdx =
+            MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+      }
+
+      I->setFrameIdx(FrameIdx);
+    }
+  }
+
+  MFI->setCalleeSavedInfo(CSI);
+}
+
+/// Helper function to update the liveness information for the callee-saved
+/// registers.
+static void updateLiveness(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Visited will contain all the basic blocks that are in the region
+  // where the callee saved registers are alive:
+  // - Anything that is not Save or Restore -> LiveThrough.
+  // - Save -> LiveIn.
+  // - Restore -> LiveOut.
+  // The live-out is not attached to the block, so no need to keep
+  // Restore in this set.
+  SmallPtrSet<MachineBasicBlock *, 8> Visited;
+  SmallVector<MachineBasicBlock *, 8> WorkList;
+  MachineBasicBlock *Entry = &MF.front();
+  MachineBasicBlock *Save = MFI->getSavePoint();
+
+  if (!Save)
+    Save = Entry;
+
+  if (Entry != Save) {
+    WorkList.push_back(Entry);
+    Visited.insert(Entry);
+  }
+  Visited.insert(Save);
+
+  MachineBasicBlock *Restore = MFI->getRestorePoint();
+  if (Restore)
+    // By construction Restore cannot be visited, otherwise it
+    // means there exists a path to Restore that does not go
+    // through Save.
+    WorkList.push_back(Restore);
+
+  while (!WorkList.empty()) {
+    const MachineBasicBlock *CurBB = WorkList.pop_back_val();
+    // By construction, the region that is after the save point is
+    // dominated by the Save and post-dominated by the Restore.
+    if (CurBB == Save && Save != Restore)
+      continue;
+    // Enqueue all the successors not already visited.
+    // Those are by construction either before Save or after Restore.
+    for (MachineBasicBlock *SuccBB : CurBB->successors())
+      if (Visited.insert(SuccBB).second)
+        WorkList.push_back(SuccBB);
+  }
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    for (MachineBasicBlock *MBB : Visited) {
+      MCPhysReg Reg = CSI[i].getReg();
+      // Add the callee-saved register as live-in.
+      // It's killed at the spill.
+      if (!MBB->isLiveIn(Reg))
+        MBB->addLiveIn(Reg);
+    }
+  }
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function.
+///
+void WasmPEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+  // Get callee saved register information.
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  MFI->setCalleeSavedInfoValid(true);
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty())
+    return;
+
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
+  MachineBasicBlock::iterator I;
+
+  // Spill using target interface.
+  for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+    I = SaveBlock->begin();
+    if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) {
+      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+        // Insert the spill to the stack frame.
+        unsigned Reg = CSI[i].getReg();
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(),
+                                RC, TRI);
+      }
+    }
+    // Update the live-in information of all the blocks up to the save point.
+    updateLiveness(Fn);
+  }
+
+  // Restore using target interface.
+  for (MachineBasicBlock *MBB : RestoreBlocks) {
+    I = MBB->end();
+
+    // Skip over all terminator instructions, which are part of the return
+    // sequence.
+    MachineBasicBlock::iterator I2 = I;
+    while (I2 != MBB->begin() && (--I2)->isTerminator())
+      I = I2;
+
+    bool AtStart = I == MBB->begin();
+    MachineBasicBlock::iterator BeforeI = I;
+    if (!AtStart)
+      --BeforeI;
+
+    // Restore all registers immediately before the return and any
+    // terminators that precede it.
+    if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+        unsigned Reg = CSI[i].getReg();
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
+        assert(I != MBB->begin() &&
+               "loadRegFromStackSlot didn't insert any code!");
+        // Insert in reverse order.  loadRegFromStackSlot can insert
+        // multiple instructions.
+        if (AtStart)
+          I = MBB->begin();
+        else {
+          I = BeforeI;
+          ++I;
+        }
+      }
+    }
+  }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+                  bool StackGrowsDown, int64_t &Offset,
+                  unsigned &MaxAlign, unsigned Skew) {
+  // If the stack grows down, add the object size to find the lowest address.
+  if (StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+  // If the alignment of this object is greater than that of the stack, then
+  // increase the stack alignment to match.
+  MaxAlign = std::max(MaxAlign, Align);
+
+  // Adjust to alignment boundary.
+  Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+  if (StackGrowsDown) {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+  } else {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
+  }
+}
+
+/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
+/// those required to be close to the Stack Protector) to stack offsets.
+static void
+AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
+                      SmallSet<int, 16> &ProtectedObjs,
+                      MachineFrameInfo *MFI, bool StackGrowsDown,
+                      int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
+
+  for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
+        E = UnassignedObjs.end(); I != E; ++I) {
+    int i = *I;
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+    ProtectedObjs.insert(i);
+  }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void WasmPEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+  StackProtector *SP = &getAnalysis<StackProtector>();
+
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Start at the beginning of the local area.
+  // The Offset is the distance from the stack top in the direction
+  // of stack growth -- so it's always nonnegative.
+  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+  if (StackGrowsDown)
+    LocalAreaOffset = -LocalAreaOffset;
+  assert(LocalAreaOffset >= 0
+         && "Local area offset should be in direction of stack growth");
+  int64_t Offset = LocalAreaOffset;
+
+  // Skew to be applied to alignment.
+  unsigned Skew = TFI.getStackAlignmentSkew(Fn);
+
+  // If there are fixed sized objects that are preallocated in the local area,
+  // non-fixed objects can't be allocated right at the start of local area.
+  // We currently don't support filling in holes in between fixed sized
+  // objects, so we adjust 'Offset' to point to the end of last fixed sized
+  // preallocated object.
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int64_t FixedOff;
+    if (StackGrowsDown) {
+      // The maximum distance from the stack pointer is at lower address of
+      // the object -- which is given by offset. For down growing stack
+      // the offset is negative, so we negate the offset to get the distance.
+      FixedOff = -MFI->getObjectOffset(i);
+    } else {
+      // The maximum distance from the start pointer is at the upper
+      // address of the object.
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+    }
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+
+  // First assign frame offsets to stack objects that are used to spill
+  // callee saved registers.
+  if (StackGrowsDown) {
+    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      // If the stack grows down, we need to add the size to find the lowest
+      // address of the object.
+      Offset += MFI->getObjectSize(i);
+
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
+    }
+  } else {
+    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
+    }
+  }
+
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // incoming stack pointer if a frame pointer is required and is closer
+  // to the incoming rather than the final stack pointer.
+  const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo();
+  bool EarlyScavengingSlots = (TFI.hasFP(Fn) &&
+                               TFI.isFPCloseToIncomingSP() &&
+                               RegInfo->useFPForScavengingIndex(Fn) &&
+                               !RegInfo->needsStackRealignment(Fn));
+  if (RS && EarlyScavengingSlots) {
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+           IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+  }
+
+  // FIXME: Once this is working, then enable flag will change to a target
+  // check for whether the frame is large enough to want to use virtual
+  // frame index registers. Functions which don't want/need this optimization
+  // will continue to use the existing code path.
+  if (MFI->getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI->getLocalFrameMaxAlign();
+
+    // Adjust to alignment boundary.
+    Offset = RoundUpToAlignment(Offset, Align, Skew);
+
+    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+    // Resolve offsets for objects in the local block.
+    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+            FIOffset << "]\n");
+      MFI->setObjectOffset(Entry.first, FIOffset);
+    }
+    // Allocate the local block
+    Offset += MFI->getLocalFrameSize();
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  // Make sure that the stack protector comes before the local variables on the
+  // stack.
+  SmallSet<int, 16> ProtectedObjs;
+  if (MFI->getStackProtectorIndex() >= 0) {
+    StackObjSet LargeArrayObjs;
+    StackObjSet SmallArrayObjs;
+    StackObjSet AddrOfObjs;
+
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+                      Offset, MaxAlign, Skew);
+
+    // Assign large stack objects first.
+    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+      if (MFI->isObjectPreAllocated(i) &&
+          MFI->getUseLocalStackAllocationBlock())
+        continue;
+      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+        continue;
+      if (RS && RS->isScavengingFrameIndex((int)i))
+        continue;
+      if (MFI->isDeadObjectIndex(i))
+        continue;
+      if (MFI->getStackProtectorIndex() == (int)i)
+        continue;
+
+      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      case StackProtector::SSPLK_None:
+        continue;
+      case StackProtector::SSPLK_SmallArray:
+        SmallArrayObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_AddrOf:
+        AddrOfObjs.insert(i);
+        continue;
+      case StackProtector::SSPLK_LargeArray:
+        LargeArrayObjs.insert(i);
+        continue;
+      }
+      llvm_unreachable("Unexpected SSPLayoutKind.");
+    }
+
+    AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign, Skew);
+    AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign, Skew);
+    AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
+                          Offset, MaxAlign, Skew);
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isObjectPreAllocated(i) &&
+        MFI->getUseLocalStackAllocationBlock())
+      continue;
+    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+      continue;
+    if (RS && RS->isScavengingFrameIndex((int)i))
+      continue;
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    if (MFI->getStackProtectorIndex() == (int)i)
+      continue;
+    if (ProtectedObjs.count(i))
+      continue;
+
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+  }
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // stack pointer.
+  if (RS && !EarlyScavengingSlots) {
+    SmallVector<int, 2> SFIs;
+    RS->getScavengingFrameIndices(SFIs);
+    for (SmallVectorImpl<int>::iterator I = SFIs.begin(),
+           IE = SFIs.end(); I != IE; ++I)
+      AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew);
+  }
+
+  if (!TFI.targetHandlesStackFrameRounding()) {
+    // If we have reserved argument space for call sites in the function
+    // immediately on entry to the current function, count it as part of the
+    // overall stack size.
+    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
+
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+
+    // If the frame pointer is eliminated, all frame offsets will be relative to
+    // SP not FP. Align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    Offset = RoundUpToAlignment(Offset, StackAlign, Skew);
+  }
+
+  // Update frame info to pretend that this is part of the stack...
+  int64_t StackSize = Offset - LocalAreaOffset;
+  MFI->setStackSize(StackSize);
+  NumBytesStackSpace += StackSize;
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void WasmPEI::insertPrologEpilogCode(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+
+  // Add prologue to the function...
+  for (MachineBasicBlock *SaveBlock : SaveBlocks)
+    TFI.emitPrologue(Fn, *SaveBlock);
+
+  // Add epilogue to restore the callee-save registers in each exiting block.
+  for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
+    TFI.emitEpilogue(Fn, *RestoreBlock);
+
+  for (MachineBasicBlock *SaveBlock : SaveBlocks)
+    TFI.inlineStackProbe(Fn, *SaveBlock);
+
+  // Emit additional code that is required to support segmented stacks, if
+  // we've been asked for it.  This, when linked with a runtime with support
+  // for segmented stacks (libgcc is one), will result in allocating stack
+  // space in small chunks instead of one large contiguous block.
+  if (Fn.shouldSplitStack()) {
+    for (MachineBasicBlock *SaveBlock : SaveBlocks)
+      TFI.adjustForSegmentedStacks(Fn, *SaveBlock);
+  }
+
+  // Emit additional code that is required to explicitly handle the stack in
+  // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
+  // approach is rather similar to that of Segmented Stacks, but it uses a
+  // different conditional check and another BIF for allocating more stack
+  // space.
+  if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
+    for (MachineBasicBlock *SaveBlock : SaveBlocks)
+      TFI.adjustForHiPEPrologue(Fn, *SaveBlock);
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void WasmPEI::replaceFrameIndices(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
+  if (!TFI.needsFrameIndexResolution(Fn)) return;
+
+  // Store SPAdj at exit of a basic block.
+  SmallVector<int, 8> SPState;
+  SPState.resize(Fn.getNumBlockIDs());
+  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+
+  // Iterate over the reachable blocks in DFS order.
+  for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
+       DFI != DFE; ++DFI) {
+    int SPAdj = 0;
+    // Check the exit state of the DFS stack predecessor.
+    if (DFI.getPathLength() >= 2) {
+      MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2);
+      assert(Reachable.count(StackPred) &&
+             "DFS stack predecessor is already visited.\n");
+      SPAdj = SPState[StackPred->getNumber()];
+    }
+    MachineBasicBlock *BB = *DFI;
+    replaceFrameIndices(BB, Fn, SPAdj);
+    SPState[BB->getNumber()] = SPAdj;
+  }
+
+  // Handle the unreachable blocks.
+  for (auto &BB : Fn) {
+    if (Reachable.count(&BB))
+      // Already handled in DFS traversal.
+      continue;
+    int SPAdj = 0;
+    replaceFrameIndices(&BB, Fn, SPAdj);
+  }
+}
+
+void WasmPEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
+                              int &SPAdj) {
+  assert(Fn.getSubtarget().getRegisterInfo() &&
+         "getRegisterInfo() must be implemented!");
+  const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
+  unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
+  unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
+
+  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+  bool InsideCallSequence = false;
+
+  for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+    if (I->getOpcode() == FrameSetupOpcode ||
+        I->getOpcode() == FrameDestroyOpcode) {
+      InsideCallSequence = (I->getOpcode() == FrameSetupOpcode);
+      SPAdj += TII.getSPAdjust(I);
+
+      MachineBasicBlock::iterator PrevI = BB->end();
+      if (I != BB->begin()) PrevI = std::prev(I);
+      TFI->eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+      // Visit the instructions created by eliminateCallFramePseudoInstr().
+      if (PrevI == BB->end())
+        I = BB->begin();     // The replaced instr was the first in the block.
+      else
+        I = std::next(PrevI);
+      continue;
+    }
+
+    MachineInstr *MI = I;
+    bool DoIncr = true;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      if (!MI->getOperand(i).isFI())
+        continue;
+
+      // Frame indices in debug values are encoded in a target independent
+      // way with simply the frame index and offset rather than any
+      // target-specific addressing mode.
+      if (MI->isDebugValue()) {
+        assert(i == 0 && "Frame indices can only appear as the first "
+                         "operand of a DBG_VALUE machine instruction");
+        unsigned Reg;
+        MachineOperand &Offset = MI->getOperand(1);
+        Offset.setImm(Offset.getImm() +
+                      TFI->getFrameIndexReference(
+                          Fn, MI->getOperand(0).getIndex(), Reg));
+        MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
+        continue;
+      }
+
+      // TODO: This code should be commoned with the code for
+      // PATCHPOINT. There's no good reason for the difference in
+      // implementation other than historical accident.  The only
+      // remaining difference is the unconditional use of the stack
+      // pointer as the base register.
+      if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
+        assert((!MI->isDebugValue() || i == 0) &&
+               "Frame indicies can only appear as the first operand of a "
+               "DBG_VALUE machine instruction");
+        unsigned Reg;
+        MachineOperand &Offset = MI->getOperand(i + 1);
+        const unsigned refOffset =
+          TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(),
+                                            Reg);
+
+        Offset.setImm(Offset.getImm() + refOffset);
+        MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/);
+        continue;
+      }
+
+      // Some instructions (e.g. inline asm instructions) can have
+      // multiple frame indices and/or cause eliminateFrameIndex
+      // to insert more than one instruction. We need the register
+      // scavenger to go through all of these instructions so that
+      // it can update its register information. We keep the
+      // iterator at the point before insertion so that we can
+      // revisit them in full.
+      bool AtBeginning = (I == BB->begin());
+      if (!AtBeginning) --I;
+
+      // If this instruction has a FrameIndex operand, we need to
+      // use that target machine register info object to eliminate
+      // it.
+      TRI.eliminateFrameIndex(MI, SPAdj, i,
+                              FrameIndexVirtualScavenging ?  nullptr : RS);
+
+      // Reset the iterator if we were at the beginning of the BB.
+      if (AtBeginning) {
+        I = BB->begin();
+        DoIncr = false;
+      }
+
+      MI = nullptr;
+      break;
+    }
+
+    // If we are looking at a call sequence, we need to keep track of
+    // the SP adjustment made by each instruction in the sequence.
+    // This includes both the frame setup/destroy pseudos (handled above),
+    // as well as other instructions that have side effects w.r.t the SP.
+    // Note that this must come after eliminateFrameIndex, because 
+    // if I itself referred to a frame index, we shouldn't count its own
+    // adjustment.
+    if (MI && InsideCallSequence)
+      SPAdj += TII.getSPAdjust(MI);
+
+    if (DoIncr && I != BB->end()) ++I;
+
+    // Update register states.
+    if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+  }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+///
+/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
+/// iterate over the vreg use list, which at this point only contains machine
+/// operands for which eliminateFrameIndex need a new scratch reg.
+void
+WasmPEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+  // Run through the instructions and find any virtual registers.
+  for (MachineFunction::iterator BB = Fn.begin(),
+       E = Fn.end(); BB != E; ++BB) {
+    RS->enterBasicBlock(&*BB);
+
+    int SPAdj = 0;
+
+    // The instruction stream may change in the loop, so check BB->end()
+    // directly.
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      // We might end up here again with a NULL iterator if we scavenged a
+      // register for which we inserted spill code for definition by what was
+      // originally the first instruction in BB.
+      if (I == MachineBasicBlock::iterator(nullptr))
+        I = BB->begin();
+
+      MachineInstr *MI = I;
+      MachineBasicBlock::iterator J = std::next(I);
+      MachineBasicBlock::iterator P =
+                         I == BB->begin() ? MachineBasicBlock::iterator(nullptr)
+                                          : std::prev(I);
+
+      // RS should process this instruction before we might scavenge at this
+      // location. This is because we might be replacing a virtual register
+      // defined by this instruction, and if so, registers killed by this
+      // instruction are available, and defined registers are not.
+      RS->forward(I);
+
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        if (MI->getOperand(i).isReg()) {
+          MachineOperand &MO = MI->getOperand(i);
+          unsigned Reg = MO.getReg();
+          if (Reg == 0)
+            continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+
+          // When we first encounter a new virtual register, it
+          // must be a definition.
+          assert(MI->getOperand(i).isDef() &&
+                 "frame index virtual missing def!");
+          // Scavenge a new scratch register
+          const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+          unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
+
+          ++NumScavengedRegs;
+
+          // Replace this reference to the virtual register with the
+          // scratch register.
+          assert (ScratchReg && "Missing scratch register!");
+          Fn.getRegInfo().replaceRegWith(Reg, ScratchReg);
+          
+          // Because this instruction was processed by the RS before this
+          // register was allocated, make sure that the RS now records the
+          // register as being used.
+          RS->setRegUsed(ScratchReg);
+        }
+      }
+
+      // If the scavenger needed to use one of its spill slots, the
+      // spill code will have been inserted in between I and J. This is a
+      // problem because we need the spill code before I: Move I to just
+      // prior to J.
+      if (I != std::prev(J)) {
+        BB->splice(J, &*BB, I);
+
+        // Before we move I, we need to prepare the RS to visit I again.
+        // Specifically, RS will assert if it sees uses of registers that
+        // it believes are undefined. Because we have already processed
+        // register kills in I, when it visits I again, it will believe that
+        // those registers are undefined. To avoid this situation, unprocess
+        // the instruction I.
+        assert(RS->getCurrentPosition() == I &&
+          "The register scavenger has an unexpected position");
+        I = P;
+        RS->unprocess(P);
+      } else
+        ++I;
+    }
+  }
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
new file mode 100644
index 000000000000..4ad6eed7385b
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -0,0 +1,86 @@
+//===-- WebAssemblyPeephole.cpp - WebAssembly Peephole Optimiztions -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Late peephole optimizations for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-peephole"
+
+namespace {
+class WebAssemblyPeephole final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly late peephole optimizer";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID;
+  WebAssemblyPeephole() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyPeephole::ID = 0;
+FunctionPass *llvm::createWebAssemblyPeephole() {
+  return new WebAssemblyPeephole();
+}
+
+bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+  for (auto &MBB : MF)
+    for (auto &MI : MBB)
+      switch (MI.getOpcode()) {
+      default:
+        break;
+      case WebAssembly::STORE8_I32:
+      case WebAssembly::STORE16_I32:
+      case WebAssembly::STORE8_I64:
+      case WebAssembly::STORE16_I64:
+      case WebAssembly::STORE32_I64:
+      case WebAssembly::STORE_F32:
+      case WebAssembly::STORE_F64:
+      case WebAssembly::STORE_I32:
+      case WebAssembly::STORE_I64: {
+        // Store instructions return their value operand. If we ended up using
+        // the same register for both, replace it with a dead def so that it
+        // can use $discard instead.
+        MachineOperand &MO = MI.getOperand(0);
+        unsigned OldReg = MO.getReg();
+        // TODO: Handle SP/physregs
+        if (OldReg == MI.getOperand(3).getReg()
+            && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) {
+          Changed = true;
+          unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+          MO.setReg(NewReg);
+          MO.setIsDead();
+          MFI.stackifyVReg(NewReg);
+          MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg);
+        }
+      }
+      }
+
+  return Changed;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
new file mode 100644
index 000000000000..9ec66595d8da
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -0,0 +1,175 @@
+//===-- WebAssemblyRegColoring.cpp - Register coloring --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a virtual register coloring pass.
+///
+/// WebAssembly doesn't have a fixed number of registers, but it is still
+/// desirable to minimize the total number of registers used in each function.
+///
+/// This code is modeled after lib/CodeGen/StackSlotColoring.cpp.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-coloring"
+
+namespace {
+class WebAssemblyRegColoring final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyRegColoring() : MachineFunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "WebAssembly Register Coloring";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<LiveIntervals>();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addPreservedID(MachineDominatorsID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyRegColoring::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegColoring() {
+  return new WebAssemblyRegColoring();
+}
+
+// Compute the total spill weight for VReg.
+static float computeWeight(const MachineRegisterInfo *MRI,
+                           const MachineBlockFrequencyInfo *MBFI,
+                           unsigned VReg) {
+  float weight = 0.0f;
+  for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg))
+    weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI,
+                                            MO.getParent());
+  return weight;
+}
+
+bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Register Coloring **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  // If there are calls to setjmp or sigsetjmp, don't perform coloring. Virtual
+  // registers could be modified before the longjmp is executed, resulting in
+  // the wrong value being used afterwards. (See <rdar://problem/8007500>.)
+  // TODO: Does WebAssembly need to care about setjmp for register coloring?
+  if (MF.exposesReturnsTwice())
+    return false;
+
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  LiveIntervals *Liveness = &getAnalysis<LiveIntervals>();
+  const MachineBlockFrequencyInfo *MBFI =
+      &getAnalysis<MachineBlockFrequencyInfo>();
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+
+  // Gather all register intervals into a list and sort them.
+  unsigned NumVRegs = MRI->getNumVirtRegs();
+  SmallVector<LiveInterval *, 0> SortedIntervals;
+  SortedIntervals.reserve(NumVRegs);
+
+  DEBUG(dbgs() << "Interesting register intervals:\n");
+  for (unsigned i = 0; i < NumVRegs; ++i) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+    if (MFI.isVRegStackified(VReg))
+      continue;
+    // Skip unused registers, which can use $discard.
+    if (MRI->use_empty(VReg))
+      continue;
+
+    LiveInterval *LI = &Liveness->getInterval(VReg);
+    assert(LI->weight == 0.0f);
+    LI->weight = computeWeight(MRI, MBFI, VReg);
+    DEBUG(LI->dump());
+    SortedIntervals.push_back(LI);
+  }
+  DEBUG(dbgs() << '\n');
+
+  // Sort them to put arguments first (since we don't want to rename live-in
+  // registers), by weight next, and then by position.
+  // TODO: Investigate more intelligent sorting heuristics. For starters, we
+  // should try to coalesce adjacent live intervals before non-adjacent ones.
+  std::sort(SortedIntervals.begin(), SortedIntervals.end(),
+            [MRI](LiveInterval *LHS, LiveInterval *RHS) {
+              if (MRI->isLiveIn(LHS->reg) != MRI->isLiveIn(RHS->reg))
+                return MRI->isLiveIn(LHS->reg);
+              if (LHS->weight != RHS->weight)
+                return LHS->weight > RHS->weight;
+              if (LHS->empty() || RHS->empty())
+                return !LHS->empty() && RHS->empty();
+              return *LHS < *RHS;
+            });
+
+  DEBUG(dbgs() << "Coloring register intervals:\n");
+  SmallVector<unsigned, 16> SlotMapping(SortedIntervals.size(), -1u);
+  SmallVector<SmallVector<LiveInterval *, 4>, 16> Assignments(
+      SortedIntervals.size());
+  BitVector UsedColors(SortedIntervals.size());
+  bool Changed = false;
+  for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+    LiveInterval *LI = SortedIntervals[i];
+    unsigned Old = LI->reg;
+    size_t Color = i;
+    const TargetRegisterClass *RC = MRI->getRegClass(Old);
+
+    // Check if it's possible to reuse any of the used colors.
+    if (!MRI->isLiveIn(Old))
+      for (int C(UsedColors.find_first()); C != -1;
+           C = UsedColors.find_next(C)) {
+        if (MRI->getRegClass(SortedIntervals[C]->reg) != RC)
+          continue;
+        for (LiveInterval *OtherLI : Assignments[C])
+          if (!OtherLI->empty() && OtherLI->overlaps(*LI))
+            goto continue_outer;
+        Color = C;
+        break;
+      continue_outer:;
+      }
+
+    unsigned New = SortedIntervals[Color]->reg;
+    SlotMapping[i] = New;
+    Changed |= Old != New;
+    UsedColors.set(Color);
+    Assignments[Color].push_back(LI);
+    DEBUG(dbgs() << "Assigning vreg"
+                 << TargetRegisterInfo::virtReg2Index(LI->reg) << " to vreg"
+                 << TargetRegisterInfo::virtReg2Index(New) << "\n");
+  }
+  if (!Changed)
+    return false;
+
+  // Rewrite register operands.
+  for (size_t i = 0, e = SortedIntervals.size(); i < e; ++i) {
+    unsigned Old = SortedIntervals[i]->reg;
+    unsigned New = SlotMapping[i];
+    if (Old != New)
+      MRI->replaceRegWith(Old, New);
+  }
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
new file mode 100644
index 000000000000..f621db070b5b
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -0,0 +1,109 @@
+//===-- WebAssemblyRegNumbering.cpp - Register Numbering ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a pass which assigns WebAssembly register
+/// numbers for CodeGen virtual registers.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-numbering"
+
+namespace {
+class WebAssemblyRegNumbering final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly Register Numbering";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyRegNumbering() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegNumbering::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegNumbering() {
+  return new WebAssemblyRegNumbering();
+}
+
+bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Register Numbering **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MachineFrameInfo &FrameInfo = *MF.getFrameInfo();
+
+  MFI.initWARegs();
+
+  // WebAssembly argument registers are in the same index space as local
+  // variables. Assign the numbers for them first.
+  MachineBasicBlock &EntryMBB = MF.front();
+  for (MachineInstr &MI : EntryMBB) {
+    switch (MI.getOpcode()) {
+    case WebAssembly::ARGUMENT_I32:
+    case WebAssembly::ARGUMENT_I64:
+    case WebAssembly::ARGUMENT_F32:
+    case WebAssembly::ARGUMENT_F64:
+      MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm());
+      break;
+    default:
+      break;
+    }
+  }
+
+  // Then assign regular WebAssembly registers for all remaining used
+  // virtual registers. TODO: Consider sorting the registers by frequency of
+  // use, to maximize usage of small immediate fields.
+  unsigned NumArgRegs = MFI.getParams().size();
+  unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs();
+  unsigned NumStackRegs = 0;
+  unsigned CurReg = 0;
+  for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx);
+    // Handle stackified registers.
+    if (MFI.isVRegStackified(VReg)) {
+      MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++);
+      continue;
+    }
+    // Skip unused registers.
+    if (MRI.use_empty(VReg))
+      continue;
+    if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg)
+      MFI.setWAReg(VReg, NumArgRegs + CurReg++);
+  }
+  // Allocate locals for used physical registers
+  if (FrameInfo.getStackSize() > 0)
+    MFI.addPReg(WebAssembly::SP32, CurReg++);
+
+  return true;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
new file mode 100644
index 000000000000..89ef5cdb2bef
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -0,0 +1,265 @@
+//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a register stacking pass.
+///
+/// This pass reorders instructions to put register uses and defs in an order
+/// such that they form single-use expression trees. Registers fitting this form
+/// are then marked as "stackified", meaning references to them are replaced by
+/// "push" and "pop" from the stack.
+///
+/// This is primarily a code size optimization, since temporary values on the
+/// expression don't need to be named.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-reg-stackify"
+
+namespace {
+class WebAssemblyRegStackify final : public MachineFunctionPass {
+  const char *getPassName() const override {
+    return "WebAssembly Register Stackify";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addRequired<LiveIntervals>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreservedID(MachineDominatorsID);
+    AU.addPreservedID(LiveVariablesID);
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyRegStackify::ID = 0;
+FunctionPass *llvm::createWebAssemblyRegStackify() {
+  return new WebAssemblyRegStackify();
+}
+
+// Decorate the given instruction with implicit operands that enforce the
+// expression stack ordering constraints for an instruction which is on
+// the expression stack.
+static void ImposeStackOrdering(MachineInstr *MI) {
+  // Write the opaque EXPR_STACK register.
+  if (!MI->definesRegister(WebAssembly::EXPR_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+                                             /*isDef=*/true,
+                                             /*isImp=*/true));
+
+  // Also read the opaque EXPR_STACK register.
+  if (!MI->readsRegister(WebAssembly::EXPR_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+                                             /*isDef=*/false,
+                                             /*isImp=*/true));
+}
+
+// Test whether it's safe to move Def to just before Insert.
+// TODO: Compute memory dependencies in a way that doesn't require always
+// walking the block.
+// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
+// more precise.
+static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
+                         AliasAnalysis &AA, LiveIntervals &LIS,
+                         MachineRegisterInfo &MRI) {
+  assert(Def->getParent() == Insert->getParent());
+  bool SawStore = false, SawSideEffects = false;
+  MachineBasicBlock::const_iterator D(Def), I(Insert);
+
+  // Check for register dependencies.
+  for (const MachineOperand &MO : Def->operands()) {
+    if (!MO.isReg() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+
+    // If the register is dead here and at Insert, ignore it.
+    if (MO.isDead() && Insert->definesRegister(Reg) &&
+        !Insert->readsRegister(Reg))
+      continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // If the physical register is never modified, ignore it.
+      if (!MRI.isPhysRegModified(Reg))
+        continue;
+      // Otherwise, it's a physical register with unknown liveness.
+      return false;
+    }
+
+    // Ask LiveIntervals whether moving this virtual register use or def to
+    // Insert will change value numbers are seen.
+    const LiveInterval &LI = LIS.getInterval(Reg);
+    VNInfo *DefVNI = MO.isDef() ?
+        LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) :
+        LI.getVNInfoBefore(LIS.getInstructionIndex(Def));
+    assert(DefVNI && "Instruction input missing value number");
+    VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert));
+    if (InsVNI && DefVNI != InsVNI)
+      return false;
+  }
+
+  // Check for memory dependencies and side effects.
+  for (--I; I != D; --I)
+    SawSideEffects |= I->isSafeToMove(&AA, SawStore);
+  return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) &&
+         !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore));
+}
+
+bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Register Stackifying **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  bool Changed = false;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+
+  // Walk the instructions from the bottom up. Currently we don't look past
+  // block boundaries, and the blocks aren't ordered so the block visitation
+  // order isn't significant, but we may want to change this in the future.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : reverse(MBB)) {
+      MachineInstr *Insert = &MI;
+      // Don't nest anything inside a phi.
+      if (Insert->getOpcode() == TargetOpcode::PHI)
+        break;
+
+      // Don't nest anything inside an inline asm, because we don't have
+      // constraints for $push inputs.
+      if (Insert->getOpcode() == TargetOpcode::INLINEASM)
+        break;
+
+      // Iterate through the inputs in reverse order, since we'll be pulling
+      // operands off the stack in LIFO order.
+      bool AnyStackified = false;
+      for (MachineOperand &Op : reverse(Insert->uses())) {
+        // We're only interested in explicit virtual register operands.
+        if (!Op.isReg() || Op.isImplicit() || !Op.isUse())
+          continue;
+
+        unsigned Reg = Op.getReg();
+
+        // Only consider registers with a single definition.
+        // TODO: Eventually we may relax this, to stackify phi transfers.
+        MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
+        if (!Def)
+          continue;
+
+        // There's no use in nesting implicit defs inside anything.
+        if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+          continue;
+
+        // Don't nest an INLINE_ASM def into anything, because we don't have
+        // constraints for $pop outputs.
+        if (Def->getOpcode() == TargetOpcode::INLINEASM)
+          continue;
+
+        // Don't nest PHIs inside of anything.
+        if (Def->getOpcode() == TargetOpcode::PHI)
+          continue;
+
+        // Argument instructions represent live-in registers and not real
+        // instructions.
+        if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
+            Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
+            Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
+            Def->getOpcode() == WebAssembly::ARGUMENT_F64)
+          continue;
+
+        // Single-use expression trees require defs that have one use.
+        // TODO: Eventually we'll relax this, to take advantage of set_local
+        // returning its result.
+        if (!MRI.hasOneUse(Reg))
+          continue;
+
+        // For now, be conservative and don't look across block boundaries.
+        // TODO: Be more aggressive?
+        if (Def->getParent() != &MBB)
+          continue;
+
+        // Don't move instructions that have side effects or memory dependencies
+        // or other complications.
+        if (!IsSafeToMove(Def, Insert, AA, LIS, MRI))
+          continue;
+
+        Changed = true;
+        AnyStackified = true;
+        // Move the def down and nest it in the current instruction.
+        MBB.splice(Insert, &MBB, Def);
+        LIS.handleMove(Def);
+        MFI.stackifyVReg(Reg);
+        ImposeStackOrdering(Def);
+        Insert = Def;
+      }
+      if (AnyStackified)
+        ImposeStackOrdering(&MI);
+    }
+  }
+
+  // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere
+  // so that it never looks like a use-before-def.
+  if (Changed) {
+    MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
+    for (MachineBasicBlock &MBB : MF)
+      MBB.addLiveIn(WebAssembly::EXPR_STACK);
+  }
+
+#ifndef NDEBUG
+  // Verify that pushes and pops are performed in FIFO order.
+  SmallVector<unsigned, 0> Stack;
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      for (MachineOperand &MO : reverse(MI.explicit_operands())) {
+        if (!MO.isReg())
+          continue;
+        unsigned VReg = MO.getReg();
+
+        // Don't stackify physregs like SP or FP.
+        if (!TargetRegisterInfo::isVirtualRegister(VReg))
+          continue;
+
+        if (MFI.isVRegStackified(VReg)) {
+          if (MO.isDef())
+            Stack.push_back(VReg);
+          else
+            assert(Stack.pop_back_val() == VReg);
+        }
+      }
+    }
+    // TODO: Generalize this code to support keeping values on the stack across
+    // basic block boundaries.
+    assert(Stack.empty());
+  }
+#endif
+
+  return Changed;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 385c40bf6693..dcada45f96d1 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -43,7 +43,7 @@ WebAssemblyRegisterInfo::getCalleeSavedRegs(const MachineFunction *) const {
 }
 
 BitVector
-WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const {
   BitVector Reserved(getNumRegs());
   for (auto Reg : {WebAssembly::SP32, WebAssembly::SP64, WebAssembly::FP32,
                    WebAssembly::FP64})
@@ -52,9 +52,37 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 }
 
 void WebAssemblyRegisterInfo::eliminateFrameIndex(
-    MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum,
-    RegScavenger *RS) const {
-  llvm_unreachable("WebAssemblyRegisterInfo::eliminateFrameIndex"); // FIXME
+    MachineBasicBlock::iterator II, int SPAdj,
+    unsigned FIOperandNum, RegScavenger * /*RS*/) const {
+  assert(SPAdj == 0);
+  MachineInstr &MI = *II;
+
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+  const MachineFrameInfo& MFI = *MF.getFrameInfo();
+  int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+
+  if (MI.mayLoadOrStore()) {
+    // If this is a load or store, make it relative to SP and fold the frame
+    // offset directly in
+    assert(MI.getOperand(1).getImm() == 0 &&
+           "Can't eliminate FI yet if offset is already set");
+    MI.getOperand(1).setImm(FrameOffset);
+    MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+  } else {
+    // Otherwise create an i32.add SP, offset and make it the operand
+    auto &MRI = MF.getRegInfo();
+    const auto *TII = MF.getSubtarget().getInstrInfo();
+
+    unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
+    BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg)
+        .addImm(FrameOffset);
+    BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg)
+        .addReg(WebAssembly::SP32)
+        .addReg(OffsetReg);
+    MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false);
+  }
 }
 
 unsigned
@@ -67,21 +95,11 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   return Regs[TFI->hasFP(MF)][TT.isArch64Bit()];
 }
 
-bool WebAssemblyRegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  return !MF.getFunction()->hasFnAttribute("no-realign-stack");
-}
-
-// FIXME: share this with other backends with identical implementation?
-bool WebAssemblyRegisterInfo::needsStackRealignment(
-    const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const WebAssemblyFrameLowering *TFI = getFrameLowering(MF);
-  const Function *F = MF.getFunction();
-  unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment =
-      ((MFI->getMaxAlignment() > StackAlign) ||
-       F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
-                                       Attribute::StackAlignment));
-
-  return requiresRealignment && canRealignStack(MF);
+const TargetRegisterClass *
+WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF,
+                                            unsigned Kind) const {
+  assert(Kind == 0 && "Only one kind of pointer on WebAssembly");
+  if (MF.getSubtarget<WebAssemblySubtarget>().hasAddr64())
+    return &WebAssembly::I64RegClass;
+  return &WebAssembly::I32RegClass;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
index dbdb9d0457af..ad1d71eebf22 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h
@@ -42,9 +42,9 @@ public:
   // Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 
-  // Base pointer (stack realignment) support.
-  bool canRealignStack(const MachineFunction &MF) const;
-  bool needsStackRealignment(const MachineFunction &MF) const override;
+  const TargetRegisterClass *
+  getPointerRegClass(const MachineFunction &MF,
+                     unsigned Kind = 0) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 2ba42eb94a40..80a83fa76b57 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -33,22 +33,26 @@ def FP64 : WebAssemblyReg<"%FP64">;
 def SP32 : WebAssemblyReg<"%SP32">;
 def SP64 : WebAssemblyReg<"%SP64">;
 
-// TODO(jfb) The following comes from NVPTX. Is it really needed, or can we do
-//           away with it? Try deleting once the backend works.
-// WebAssembly uses virtual registers, but the backend defines a few physical
-// registers here to keep SDAG and the MachineInstr layers happy.
-foreach i = 0-4 in {
-  def I#i : WebAssemblyReg<"%i."#i>; // i32
-  def L#i : WebAssemblyReg<"%l."#i>; // i64
-  def F#i : WebAssemblyReg<"%f."#i>; // f32
-  def D#i : WebAssemblyReg<"%d."#i>; // f64
-}
+// The register allocation framework requires register classes have at least
+// one register, so we define a few for the floating point register classes
+// since we otherwise don't need a physical register in those classes.
+def F32_0 : WebAssemblyReg<"%f32.0">;
+def F64_0 : WebAssemblyReg<"%f64.0">;
+
+// The expression stack "register". This is an opaque entity which serves to
+// order uses and defs that must remain in LIFO order.
+def EXPR_STACK : WebAssemblyReg<"STACK">;
+
+// The incoming arguments "register". This is an opaque entity which serves to
+// order the ARGUMENT instructions that are emulating live-in registers and
+// must not be scheduled below other instructions.
+def ARGUMENTS : WebAssemblyReg<"ARGUMENTS">;
 
 //===----------------------------------------------------------------------===//
 //  Register classes
 //===----------------------------------------------------------------------===//
 
-def Int32 : WebAssemblyRegClass<[i32], 32, (add (sequence "I%u", 0, 4), SP32)>;
-def Int64 : WebAssemblyRegClass<[i64], 64, (add (sequence "L%u", 0, 4), SP64)>;
-def Float32 : WebAssemblyRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
-def Float64 : WebAssemblyRegClass<[f64], 64, (add (sequence "D%u", 0, 4))>;
+def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
+def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
+def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
+def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
new file mode 100644
index 000000000000..4e08b2b079eb
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -0,0 +1,124 @@
+//===-- WebAssemblyStoreResults.cpp - Optimize using store result values --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements an optimization pass using store result values.
+///
+/// WebAssembly's store instructions return the stored value. This is to enable
+/// an optimization wherein uses of the stored value can be replaced by uses of
+/// the store's result value, making the stored value register more likely to
+/// be single-use, thus more likely to be useful to register stackifying, and
+/// potentially also exposing the store to register stackifying. These both can
+/// reduce get_local/set_local traffic.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-store-results"
+
+namespace {
+class WebAssemblyStoreResults final : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
+
+  const char *getPassName() const override {
+    return "WebAssembly Store Results";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<MachineBlockFrequencyInfo>();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+};
+} // end anonymous namespace
+
+char WebAssemblyStoreResults::ID = 0;
+FunctionPass *llvm::createWebAssemblyStoreResults() {
+  return new WebAssemblyStoreResults();
+}
+
+bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+    dbgs() << "********** Store Results **********\n"
+           << "********** Function: " << MF.getName() << '\n';
+  });
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  bool Changed = false;
+
+  assert(MRI.isSSA() && "StoreResults depends on SSA form");
+
+  for (auto &MBB : MF) {
+    DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n');
+    for (auto &MI : MBB)
+      switch (MI.getOpcode()) {
+      default:
+        break;
+      case WebAssembly::STORE8_I32:
+      case WebAssembly::STORE16_I32:
+      case WebAssembly::STORE8_I64:
+      case WebAssembly::STORE16_I64:
+      case WebAssembly::STORE32_I64:
+      case WebAssembly::STORE_F32:
+      case WebAssembly::STORE_F64:
+      case WebAssembly::STORE_I32:
+      case WebAssembly::STORE_I64:
+        unsigned ToReg = MI.getOperand(0).getReg();
+        unsigned FromReg = MI.getOperand(3).getReg();
+        for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+          MachineOperand &O = *I++;
+          MachineInstr *Where = O.getParent();
+          if (Where->getOpcode() == TargetOpcode::PHI) {
+            // PHIs use their operands on their incoming CFG edges rather than
+            // in their parent blocks. Get the basic block paired with this use
+            // of FromReg and check that MI's block dominates it.
+            MachineBasicBlock *Pred =
+                Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB();
+            if (!MDT.dominates(&MBB, Pred))
+              continue;
+          } else {
+            // For a non-PHI, check that MI dominates the instruction in the
+            // normal way.
+            if (&MI == Where || !MDT.dominates(&MI, Where))
+              continue;
+          }
+          Changed = true;
+          DEBUG(dbgs() << "Setting operand " << O << " in " << *Where
+                       << " from " << MI << "\n");
+          O.setReg(ToReg);
+          // If the store's def was previously dead, it is no longer. But the
+          // dead flag shouldn't be set yet.
+          assert(!MI.getOperand(0).isDead() && "Dead flag set on store result");
+        }
+      }
+  }
+
+  return Changed;
+}
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
index 3d9e7aacbfbf..cb2d5a63a19f 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp
@@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT,
       TLInfo(TM, *this) {}
 
 bool WebAssemblySubtarget::enableMachineScheduler() const { return true; }
+bool WebAssemblySubtarget::useAA() const { return true; }
diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h
index 6f1761940930..f530a290fa0e 100644
--- a/lib/Target/WebAssembly/WebAssemblySubtarget.h
+++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h
@@ -61,9 +61,15 @@ public:
   const WebAssemblyTargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
+  const WebAssemblyInstrInfo *getInstrInfo() const override {
+    return &InstrInfo;
+  }
+  const WebAssemblyRegisterInfo *getRegisterInfo() const override {
+    return &getInstrInfo()->getRegisterInfo();
+  }
   const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override;
-  bool useAA() const override { return true; }
+  bool useAA() const override;
 
   // Predicates used by WebAssemblyInstrInfo.td.
   bool hasAddr64() const { return TargetTriple.isArch64Bit(); }
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 6f93248bd13c..e31ea46de9f5 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,11 +45,16 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
     CodeGenOpt::Level OL)
-    : LLVMTargetMachine(T, TT.isArch64Bit()
-                               ? "e-p:64:64-i64:64-v128:8:128-n32:64-S128"
-                               : "e-p:32:32-i64:64-v128:8:128-n32:64-S128",
+    : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
+                                            : "e-p:32:32-i64:64-n32:64-S128",
                         TT, CPU, FS, Options, RM, CM, OL),
       TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
+  // WebAssembly type-checks expressions, but a noreturn function with a return
+  // type that doesn't match the context will cause a check failure. So we lower
+  // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
+  // 'unreachable' expression which is meant for that case.
+  this->Options.TrapUnreachable = true;
+
   initAsmInfo();
 
   // We need a reducible CFG, so disable some optimizations which tend to
@@ -77,7 +82,7 @@ WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const {
     // creation will depend on the TM and the code generation flags on the
     // function that reside in TargetOptions.
     resetTargetOptions(F);
-    I = make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
+    I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this);
   }
   return I.get();
 }
@@ -94,23 +99,18 @@ public:
   }
 
   FunctionPass *createTargetRegisterAllocator(bool) override;
-  void addFastRegAlloc(FunctionPass *RegAllocPass) override;
-  void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
 
   void addIRPasses() override;
-  bool addPreISel() override;
   bool addInstSelector() override;
   bool addILPOpts() override;
   void addPreRegAlloc() override;
-  void addRegAllocPasses(bool Optimized);
   void addPostRegAlloc() override;
-  void addPreSched2() override;
   void addPreEmitPass() override;
 };
 } // end anonymous namespace
 
 TargetIRAnalysis WebAssemblyTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(WebAssemblyTTIImpl(this, F));
   });
 }
@@ -124,50 +124,86 @@ FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) {
   return nullptr; // No reg alloc
 }
 
-void WebAssemblyPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
-  assert(!RegAllocPass && "WebAssembly uses no regalloc!");
-  addRegAllocPasses(false);
-}
-
-void WebAssemblyPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
-  assert(!RegAllocPass && "WebAssembly uses no regalloc!");
-  addRegAllocPasses(true);
-}
-
 //===----------------------------------------------------------------------===//
 // The following functions are called from lib/CodeGen/Passes.cpp to modify
 // the CodeGen pass sequence.
 //===----------------------------------------------------------------------===//
 
 void WebAssemblyPassConfig::addIRPasses() {
-  // FIXME: the default for this option is currently POSIX, whereas
-  // WebAssembly's MVP should default to Single.
   if (TM->Options.ThreadModel == ThreadModel::Single)
+    // In "single" mode, atomics get lowered to non-atomics.
     addPass(createLowerAtomicPass());
   else
     // Expand some atomic operations. WebAssemblyTargetLowering has hooks which
     // control specifically what gets lowered.
     addPass(createAtomicExpandPass(TM));
 
+  // Optimize "returned" function attributes.
+  addPass(createWebAssemblyOptimizeReturned());
+
   TargetPassConfig::addIRPasses();
 }
 
-bool WebAssemblyPassConfig::addPreISel() { return false; }
-
 bool WebAssemblyPassConfig::addInstSelector() {
+  (void)TargetPassConfig::addInstSelector();
   addPass(
       createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel()));
+  // Run the argument-move pass immediately after the ScheduleDAG scheduler
+  // so that we can fix up the ARGUMENT instructions before anything else
+  // sees them in the wrong place.
+  addPass(createWebAssemblyArgumentMove());
   return false;
 }
 
-bool WebAssemblyPassConfig::addILPOpts() { return true; }
+bool WebAssemblyPassConfig::addILPOpts() {
+  (void)TargetPassConfig::addILPOpts();
+  return true;
+}
 
-void WebAssemblyPassConfig::addPreRegAlloc() {}
+void WebAssemblyPassConfig::addPreRegAlloc() {
+  TargetPassConfig::addPreRegAlloc();
 
-void WebAssemblyPassConfig::addRegAllocPasses(bool Optimized) {}
+  // Prepare store instructions for register stackifying.
+  addPass(createWebAssemblyStoreResults());
+}
 
-void WebAssemblyPassConfig::addPostRegAlloc() {}
+void WebAssemblyPassConfig::addPostRegAlloc() {
+  // TODO: The following CodeGen passes don't currently support code containing
+  // virtual registers. Consider removing their restrictions and re-enabling
+  // them.
+  //
+  // We use our own PrologEpilogInserter which is very slightly modified to
+  // tolerate virtual registers.
+  disablePass(&PrologEpilogCodeInserterID);
+  // Fails with: should be run after register allocation.
+  disablePass(&MachineCopyPropagationID);
 
-void WebAssemblyPassConfig::addPreSched2() {}
+  // Mark registers as representing wasm's expression stack.
+  addPass(createWebAssemblyRegStackify());
 
-void WebAssemblyPassConfig::addPreEmitPass() {}
+  // Run the register coloring pass to reduce the total number of registers.
+  addPass(createWebAssemblyRegColoring());
+
+  TargetPassConfig::addPostRegAlloc();
+
+  // Run WebAssembly's version of the PrologEpilogInserter. Target-independent
+  // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run
+  // PEI before ShrinkWrap but otherwise in the same position in the order.
+  addPass(createWebAssemblyPEI());
+}
+
+void WebAssemblyPassConfig::addPreEmitPass() {
+  TargetPassConfig::addPreEmitPass();
+
+  // Put the CFG in structured form; insert BLOCK and LOOP markers.
+  addPass(createWebAssemblyCFGStackify());
+
+  // Lower br_unless into br_if.
+  addPass(createWebAssemblyLowerBrUnless());
+
+  // Create a mapping from LLVM CodeGen virtual registers to wasm registers.
+  addPass(createWebAssemblyRegNumbering());
+
+  // Perform the very last peephole optimizations on the code.
+  addPass(createWebAssemblyPeephole());
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
new file mode 100644
index 000000000000..74e33b93e00d
--- /dev/null
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.cpp
@@ -0,0 +1,24 @@
+//===-- WebAssemblyTargetObjectFile.cpp - WebAssembly Object Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the functions of the WebAssembly-specific subclass
+/// of TargetLoweringObjectFile.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetObjectFile.h"
+#include "WebAssemblyTargetMachine.h"
+using namespace llvm;
+
+void WebAssemblyTargetObjectFile::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
index ee78b945ada2..39e50c9c575d 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetObjectFile.h
@@ -16,50 +16,13 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYTARGETOBJECTFILE_H
 
-#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 
 namespace llvm {
 
-class GlobalVariable;
-
-class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFile {
+class WebAssemblyTargetObjectFile final : public TargetLoweringObjectFileELF {
 public:
-  WebAssemblyTargetObjectFile() {
-    TextSection = nullptr;
-    DataSection = nullptr;
-    BSSSection = nullptr;
-    ReadOnlySection = nullptr;
-
-    StaticCtorSection = nullptr;
-    StaticDtorSection = nullptr;
-    LSDASection = nullptr;
-    EHFrameSection = nullptr;
-    DwarfAbbrevSection = nullptr;
-    DwarfInfoSection = nullptr;
-    DwarfLineSection = nullptr;
-    DwarfFrameSection = nullptr;
-    DwarfPubTypesSection = nullptr;
-    DwarfDebugInlineSection = nullptr;
-    DwarfStrSection = nullptr;
-    DwarfLocSection = nullptr;
-    DwarfARangesSection = nullptr;
-    DwarfRangesSection = nullptr;
-  }
-
-  MCSection *getSectionForConstant(SectionKind Kind,
-                                   const Constant *C) const override {
-    return ReadOnlySection;
-  }
-
-  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
-                                      const TargetMachine &TM) const override {
-    return DataSection;
-  }
-
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
-                                    const TargetMachine &TM) const override;
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index fa88ed526df2..356631711921 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -21,8 +21,7 @@ using namespace llvm;
 #define DEBUG_TYPE "wasmtti"
 
 TargetTransformInfo::PopcntSupportKind
-WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) {
+WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
-  // TODO: Make Math.popcount32 happen in WebAssembly.
-  return TTI::PSK_Software;
+  return TargetTransformInfo::PSK_FastHardware;
 }
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 7ffb6047b963..26dc388cc922 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -38,7 +38,7 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase<WebAssemblyTTIImpl> {
   const WebAssemblyTargetLowering *getTLI() const { return TLI; }
 
 public:
-  WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, Function &F)
+  WebAssemblyTTIImpl(const WebAssemblyTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -54,7 +54,7 @@ public:
 
   // TODO: Implement more Scalar TTI for WebAssembly
 
-  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
+  TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
 
   /// @}
 
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
new file mode 100644
index 000000000000..ee9d060f339e
--- /dev/null
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -0,0 +1,311 @@
+# Tests which are known to fail from the GCC torture test suite.
+
+# Core dump.
+920908-1.c
+pr38151.c
+va-arg-22.c
+
+# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
+struct-ret-1.c
+va-arg-11.c
+va-arg-21.c
+va-arg-24.c
+va-arg-trap-1.c
+
+# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed.
+20000815-1.c
+20010129-1.c
+930628-1.c
+980707-1.c
+
+# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed.
+20030914-2.c
+20040703-1.c
+20081117-1.c
+920625-1.c
+931004-11.c
+931004-13.c
+980223.c
+bitfld-5.c
+complex-7.c
+pr38969.c
+pr51323.c
+pr52129.c
+pr57130.c
+
+# These were previously "Cannot select FrameIndex." Now most of them fail
+# because they contain call frame pseudos (e.g. call a vararg func),
+# frame pointers, or similar. This list will be updated again soon.
+20000519-1.c
+20000706-4.c
+20000706-5.c
+20000801-2.c
+20000801-4.c
+20011126-2.c
+
+20020529-1.c
+20021024-1.c
+
+20030828-1.c
+20030914-1.c
+
+20040302-1.c
+20040625-1.c
+20040823-1.c
+
+20041113-1.c
+
+20041214-1.c
+
+20050826-2.c
+
+20071213-1.c
+
+20080506-2.c
+20080519-1.c
+
+20081103-1.c
+20090113-1.c
+20090113-2.c
+20090113-3.c
+
+20090623-1.c
+
+920501-6.c
+920501-8.c
+920726-1.c
+930518-1.c
+
+931004-10.c
+931004-12.c
+931004-14.c
+931004-2.c
+931004-4.c
+931004-6.c
+931004-8.c
+
+980205.c
+980608-1.c
+980709-1.c
+980716-1.c
+990127-1.c
+
+991216-2.c
+
+#cbrt.c
+complex-5.c
+complex-6.c
+
+enum-3.c
+fprintf-chk-1.c
+frame-address.c
+loop-15.c
+loop-ivopts-2.c
+mayalias-3.c
+
+multi-ix.c
+
+pr20466-1.c
+
+
+pr28778.c
+pr28982b.c
+
+pr30778.c
+pr31448-2.c
+pr31448.c
+
+pr33870-1.c
+pr33870.c
+
+pr38051.c
+
+pr39100.c
+
+pr39339.c
+pr40022.c
+pr40657.c
+
+pr43987.c
+
+pr44575.c
+
+pr44942.c
+pr46309.c
+pr47538.c
+pr47925.c
+
+pr49390.c
+pr49419.c
+
+#pr51877.c
+
+#pr52979-1.c
+#pr52979-2.c
+pr53645-2.c
+pr53645.c
+
+pr56205.c
+
+pr56866.c
+
+pr57876.c
+pr58277-1.c
+
+pr59643.c
+
+printf-chk-1.c
+pta-field-1.c
+pta-field-2.c
+
+stdarg-1.c
+stdarg-2.c
+stdarg-3.c
+stdarg-4.c
+strct-stdarg-1.c
+strct-varg-1.c
+
+va-arg-1.c
+va-arg-10.c
+va-arg-12.c
+va-arg-13.c
+va-arg-14.c
+va-arg-15.c
+va-arg-16.c
+va-arg-17.c
+va-arg-18.c
+va-arg-19.c
+va-arg-2.c
+va-arg-20.c
+va-arg-23.c
+va-arg-26.c
+va-arg-4.c
+va-arg-5.c
+va-arg-6.c
+va-arg-7.c
+va-arg-8.c
+va-arg-9.c
+va-arg-pack-1.c
+vfprintf-1.c
+vfprintf-chk-1.c
+vprintf-1.c
+vprintf-chk-1.c
+
+# Cannot select callseq_end.
+20040811-1.c
+pr43220.c
+vla-dealloc-1.c
+
+# Cannot select brind.
+20071210-1.c
+920501-4.c
+920501-5.c
+
+# Cannot select BlockAddress.
+comp-goto-1.c
+980526-1.c
+990208-1.c
+
+# WebAssembly hasn't implemented byval arguments.
+20000412-3.c
+20000419-1.c
+20000706-1.c
+20000706-2.c
+20000707-1.c
+20000717-1.c
+20000717-5.c
+20000808-1.c
+20010605-2.c
+20011113-1.c
+20020215-1.c
+20020810-1.c
+20021118-1.c
+20040707-1.c
+20040709-1.c
+20040709-2.c
+20041201-1.c
+20050713-1.c
+20070614-1.c
+920908-2.c
+921112-1.c
+921117-1.c
+921123-2.c
+921204-1.c
+930126-1.c
+930208-1.c
+931004-5.c
+931004-9.c
+931031-1.c
+950607-2.c
+960416-1.c
+990525-1.c
+991118-1.c
+bf64-1.c
+complex-1.c
+complex-2.c
+pr15262-2.c
+pr20621-1.c
+pr23135.c
+pr30185.c
+pr42248.c
+
+# unimplemented operation lowering.
+20010122-1.c
+20030323-1.c
+20030811-1.c
+pr17377.c
+
+# Error: invalid output constraint '=t' in asm.
+990413-2.c
+990826-0.c
+
+# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target.
+built-in-setjmp.c
+pr60003.c
+
+# Error in the program / unsupported by Clang.
+scal-to-vec1.c
+scal-to-vec2.c
+scal-to-vec3.c
+20000822-1.c
+20010209-1.c
+20010605-1.c
+20030501-1.c
+20040520-1.c
+20061220-1.c
+20090219-1.c
+920415-1.c
+920428-2.c
+920501-7.c
+920612-2.c
+920721-4.c
+921017-1.c
+921215-1.c
+931002-1.c
+comp-goto-2.c
+nest-align-1.c
+nest-stdar-1.c
+nestfunc-1.c
+nestfunc-2.c
+nestfunc-3.c
+nestfunc-5.c
+nestfunc-6.c
+nestfunc-7.c
+pr22061-3.c
+pr22061-4.c
+pr24135.c
+pr51447.c
+20020412-1.c
+20040308-1.c
+20040423-1.c
+20041218-2.c
+20070919-1.c
+align-nest.c
+pr41935.c
+20050107-1.c
+20050119-1.c
+20050119-2.c
+920302-1.c
+920501-3.c
+920728-1.c
+pr28865.c
diff --git a/lib/Target/X86/AsmParser/CMakeLists.txt b/lib/Target/X86/AsmParser/CMakeLists.txt
index 2c1926edc26b..b022a41b192f 100644
--- a/lib/Target/X86/AsmParser/CMakeLists.txt
+++ b/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -1,7 +1,4 @@
 add_llvm_library(LLVMX86AsmParser
   X86AsmInstrumentation.cpp
   X86AsmParser.cpp
-
-  LINK_LIBS
-  LLVMX86CodeGen
   )
diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt
index 284bfd052ccf..9f94d5d38864 100644
--- a/lib/Target/X86/AsmParser/LLVMBuild.txt
+++ b/lib/Target/X86/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
 type = Library
 name = X86AsmParser
 parent = X86
-required_libraries = MC MCParser Support X86CodeGen X86Desc X86Info
+required_libraries = MC MCParser Support X86Desc X86Info
 add_to_library_groups = X86
diff --git a/lib/Target/X86/AsmParser/Makefile b/lib/Target/X86/AsmParser/Makefile
index fb9760796622..f834dfc300a1 100644
--- a/lib/Target/X86/AsmParser/Makefile
+++ b/lib/Target/X86/AsmParser/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ../../../..
 LIBRARYNAME = LLVMX86AsmParser
 
-# Hack: we need to include 'main' x86 target directory to grab private headers
+# Hack: we need to include 'main' X86 target directory to grab private headers
 CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index 9eee4a0f3d82..09cc53a8e6d3 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -10,10 +10,8 @@
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "X86AsmInstrumentation.h"
 #include "X86Operand.h"
-#include "X86RegisterInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
@@ -118,11 +116,6 @@ bool IsStackReg(unsigned Reg) { return Reg == X86::RSP || Reg == X86::ESP; }
 
 bool IsSmallMemAccess(unsigned AccessSize) { return AccessSize < 8; }
 
-std::string FuncName(unsigned AccessSize, bool IsWrite) {
-  return std::string("__asan_report_") + (IsWrite ? "store" : "load") +
-         utostr(AccessSize);
-}
-
 class X86AddressSanitizer : public X86AsmInstrumentation {
 public:
   struct RegisterContext {
@@ -136,26 +129,26 @@ public:
   public:
     RegisterContext(unsigned AddressReg, unsigned ShadowReg,
                     unsigned ScratchReg) {
-      BusyRegs.push_back(convReg(AddressReg, MVT::i64));
-      BusyRegs.push_back(convReg(ShadowReg, MVT::i64));
-      BusyRegs.push_back(convReg(ScratchReg, MVT::i64));
+      BusyRegs.push_back(convReg(AddressReg, 64));
+      BusyRegs.push_back(convReg(ShadowReg, 64));
+      BusyRegs.push_back(convReg(ScratchReg, 64));
     }
 
-    unsigned AddressReg(MVT::SimpleValueType VT) const {
-      return convReg(BusyRegs[REG_OFFSET_ADDRESS], VT);
+    unsigned AddressReg(unsigned Size) const {
+      return convReg(BusyRegs[REG_OFFSET_ADDRESS], Size);
     }
 
-    unsigned ShadowReg(MVT::SimpleValueType VT) const {
-      return convReg(BusyRegs[REG_OFFSET_SHADOW], VT);
+    unsigned ShadowReg(unsigned Size) const {
+      return convReg(BusyRegs[REG_OFFSET_SHADOW], Size);
     }
 
-    unsigned ScratchReg(MVT::SimpleValueType VT) const {
-      return convReg(BusyRegs[REG_OFFSET_SCRATCH], VT);
+    unsigned ScratchReg(unsigned Size) const {
+      return convReg(BusyRegs[REG_OFFSET_SCRATCH], Size);
     }
 
     void AddBusyReg(unsigned Reg) {
       if (Reg != X86::NoRegister)
-        BusyRegs.push_back(convReg(Reg, MVT::i64));
+        BusyRegs.push_back(convReg(Reg, 64));
     }
 
     void AddBusyRegs(const X86Operand &Op) {
@@ -163,36 +156,36 @@ public:
       AddBusyReg(Op.getMemIndexReg());
     }
 
-    unsigned ChooseFrameReg(MVT::SimpleValueType VT) const {
+    unsigned ChooseFrameReg(unsigned Size) const {
       static const MCPhysReg Candidates[] = { X86::RBP, X86::RAX, X86::RBX,
                                               X86::RCX, X86::RDX, X86::RDI,
                                               X86::RSI };
       for (unsigned Reg : Candidates) {
         if (!std::count(BusyRegs.begin(), BusyRegs.end(), Reg))
-          return convReg(Reg, VT);
+          return convReg(Reg, Size);
       }
       return X86::NoRegister;
     }
 
   private:
-    unsigned convReg(unsigned Reg, MVT::SimpleValueType VT) const {
-      return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, VT);
+    unsigned convReg(unsigned Reg, unsigned Size) const {
+      return Reg == X86::NoRegister ? Reg : getX86SubSuperRegister(Reg, Size);
     }
 
     std::vector<unsigned> BusyRegs;
   };
 
-  X86AddressSanitizer(const MCSubtargetInfo &STI)
+  X86AddressSanitizer(const MCSubtargetInfo *&STI)
       : X86AsmInstrumentation(STI), RepPrefix(false), OrigSPOffset(0) {}
 
-  virtual ~X86AddressSanitizer() {}
+  ~X86AddressSanitizer() override {}
 
   // X86AsmInstrumentation implementation:
-  virtual void InstrumentAndEmitInstruction(const MCInst &Inst,
-                                            OperandVector &Operands,
-                                            MCContext &Ctx,
-                                            const MCInstrInfo &MII,
-                                            MCStreamer &Out) override {
+  void InstrumentAndEmitInstruction(const MCInst &Inst,
+                                    OperandVector &Operands,
+                                    MCContext &Ctx,
+                                    const MCInstrInfo &MII,
+                                    MCStreamer &Out) override {
     InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
     if (RepPrefix)
       EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
@@ -240,17 +233,16 @@ public:
 protected:
   void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); }
 
-  void EmitLEA(X86Operand &Op, MVT::SimpleValueType VT, unsigned Reg,
-               MCStreamer &Out) {
-    assert(VT == MVT::i32 || VT == MVT::i64);
+  void EmitLEA(X86Operand &Op, unsigned Size, unsigned Reg, MCStreamer &Out) {
+    assert(Size == 32 || Size == 64);
     MCInst Inst;
-    Inst.setOpcode(VT == MVT::i32 ? X86::LEA32r : X86::LEA64r);
-    Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, VT)));
+    Inst.setOpcode(Size == 32 ? X86::LEA32r : X86::LEA64r);
+    Inst.addOperand(MCOperand::createReg(getX86SubSuperRegister(Reg, Size)));
     Op.addMemOperands(Inst, 5);
     EmitInstruction(Out, Inst);
   }
 
-  void ComputeMemOperandAddress(X86Operand &Op, MVT::SimpleValueType VT,
+  void ComputeMemOperandAddress(X86Operand &Op, unsigned Size,
                                 unsigned Reg, MCContext &Ctx, MCStreamer &Out);
 
   // Creates new memory operand with Displacement added to an original
@@ -261,13 +253,13 @@ protected:
                                               MCContext &Ctx, int64_t *Residue);
 
   bool is64BitMode() const {
-    return STI.getFeatureBits()[X86::Mode64Bit];
+    return STI->getFeatureBits()[X86::Mode64Bit];
   }
   bool is32BitMode() const {
-    return STI.getFeatureBits()[X86::Mode32Bit];
+    return STI->getFeatureBits()[X86::Mode32Bit];
   }
   bool is16BitMode() const {
-    return STI.getFeatureBits()[X86::Mode16Bit];
+    return STI->getFeatureBits()[X86::Mode16Bit];
   }
 
   unsigned getPointerWidth() {
@@ -437,7 +429,7 @@ void X86AddressSanitizer::InstrumentMOV(const MCInst &Inst,
 }
 
 void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
-                                                   MVT::SimpleValueType VT,
+                                                   unsigned Size,
                                                    unsigned Reg, MCContext &Ctx,
                                                    MCStreamer &Out) {
   int64_t Displacement = 0;
@@ -450,14 +442,14 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
 
   // Emit Op as is.
   if (Displacement == 0) {
-    EmitLEA(Op, VT, Reg, Out);
+    EmitLEA(Op, Size, Reg, Out);
     return;
   }
 
   int64_t Residue;
   std::unique_ptr<X86Operand> NewOp =
       AddDisplacement(Op, Displacement, Ctx, &Residue);
-  EmitLEA(*NewOp, VT, Reg, Out);
+  EmitLEA(*NewOp, Size, Reg, Out);
 
   while (Residue != 0) {
     const MCConstantExpr *Disp =
@@ -465,7 +457,7 @@ void X86AddressSanitizer::ComputeMemOperandAddress(X86Operand &Op,
     std::unique_ptr<X86Operand> DispOp =
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, Reg, 0, 1, SMLoc(),
                               SMLoc());
-    EmitLEA(*DispOp, VT, Reg, Out);
+    EmitLEA(*DispOp, Size, Reg, Out);
     Residue -= Disp->getValue();
   }
 }
@@ -503,16 +495,16 @@ class X86AddressSanitizer32 : public X86AddressSanitizer {
 public:
   static const long kShadowOffset = 0x20000000;
 
-  X86AddressSanitizer32(const MCSubtargetInfo &STI)
+  X86AddressSanitizer32(const MCSubtargetInfo *&STI)
       : X86AddressSanitizer(STI) {}
 
-  virtual ~X86AddressSanitizer32() {}
+  ~X86AddressSanitizer32() override {}
 
   unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
     unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
     if (FrameReg == X86::NoRegister)
       return FrameReg;
-    return getX86SubSuperRegister(FrameReg, MVT::i32);
+    return getX86SubSuperRegister(FrameReg, 32);
   }
 
   void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -535,10 +527,10 @@ public:
     OrigSPOffset += 4;
   }
 
-  virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+  void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+                                    MCContext &Ctx,
+                                    MCStreamer &Out) override {
+    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
     assert(LocalFrameReg != X86::NoRegister);
 
     const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -558,24 +550,24 @@ public:
           MRI->getDwarfRegNum(LocalFrameReg, true /* IsEH */));
     }
 
-    SpillReg(Out, RegCtx.AddressReg(MVT::i32));
-    SpillReg(Out, RegCtx.ShadowReg(MVT::i32));
-    if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
-      SpillReg(Out, RegCtx.ScratchReg(MVT::i32));
+    SpillReg(Out, RegCtx.AddressReg(32));
+    SpillReg(Out, RegCtx.ShadowReg(32));
+    if (RegCtx.ScratchReg(32) != X86::NoRegister)
+      SpillReg(Out, RegCtx.ScratchReg(32));
     StoreFlags(Out);
   }
 
-  virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i32);
+  void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+                                    MCContext &Ctx,
+                                    MCStreamer &Out) override {
+    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(32);
     assert(LocalFrameReg != X86::NoRegister);
 
     RestoreFlags(Out);
-    if (RegCtx.ScratchReg(MVT::i32) != X86::NoRegister)
-      RestoreReg(Out, RegCtx.ScratchReg(MVT::i32));
-    RestoreReg(Out, RegCtx.ShadowReg(MVT::i32));
-    RestoreReg(Out, RegCtx.AddressReg(MVT::i32));
+    if (RegCtx.ScratchReg(32) != X86::NoRegister)
+      RestoreReg(Out, RegCtx.ScratchReg(32));
+    RestoreReg(Out, RegCtx.ShadowReg(32));
+    RestoreReg(Out, RegCtx.AddressReg(32));
 
     unsigned FrameReg = GetFrameReg(Ctx, Out);
     if (Ctx.getRegisterInfo() && FrameReg != X86::NoRegister) {
@@ -586,18 +578,18 @@ public:
     }
   }
 
-  virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx,
-                                         MCStreamer &Out) override;
-  virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx,
-                                         MCStreamer &Out) override;
-  virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
-                                  MCStreamer &Out) override;
+  void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+                                 bool IsWrite,
+                                 const RegisterContext &RegCtx,
+                                 MCContext &Ctx,
+                                 MCStreamer &Out) override;
+  void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+                                 bool IsWrite,
+                                 const RegisterContext &RegCtx,
+                                 MCContext &Ctx,
+                                 MCStreamer &Out) override;
+  void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+                          MCStreamer &Out) override;
 
 private:
   void EmitCallAsanReport(unsigned AccessSize, bool IsWrite, MCContext &Ctx,
@@ -610,10 +602,11 @@ private:
                              .addReg(X86::ESP)
                              .addImm(-16));
     EmitInstruction(
-        Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(MVT::i32)));
+        Out, MCInstBuilder(X86::PUSH32r).addReg(RegCtx.AddressReg(32)));
 
-    const std::string &Fn = FuncName(AccessSize, IsWrite);
-    MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+    MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+                                            (IsWrite ? "store" : "load") +
+                                            llvm::Twine(AccessSize));
     const MCSymbolRefExpr *FnExpr =
         MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
     EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr));
@@ -623,14 +616,14 @@ private:
 void X86AddressSanitizer32::InstrumentMemOperandSmall(
     X86Operand &Op, unsigned AccessSize, bool IsWrite,
     const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
-  unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+  unsigned AddressRegI32 = RegCtx.AddressReg(32);
+  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+  unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
 
-  assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
-  unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+  assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+  unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
 
-  ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+  ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
 
   EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
                            AddressRegI32));
@@ -673,7 +666,7 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
     std::unique_ptr<X86Operand> Op(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
                               SMLoc(), SMLoc()));
-    EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+    EmitLEA(*Op, 32, ScratchRegI32, Out);
     break;
   }
   case 4:
@@ -698,10 +691,10 @@ void X86AddressSanitizer32::InstrumentMemOperandSmall(
 void X86AddressSanitizer32::InstrumentMemOperandLarge(
     X86Operand &Op, unsigned AccessSize, bool IsWrite,
     const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
+  unsigned AddressRegI32 = RegCtx.AddressReg(32);
+  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
 
-  ComputeMemOperandAddress(Op, MVT::i32, AddressRegI32, Ctx, Out);
+  ComputeMemOperandAddress(Op, 32, AddressRegI32, Ctx, Out);
 
   EmitInstruction(Out, MCInstBuilder(X86::MOV32rr).addReg(ShadowRegI32).addReg(
                            AddressRegI32));
@@ -760,16 +753,16 @@ class X86AddressSanitizer64 : public X86AddressSanitizer {
 public:
   static const long kShadowOffset = 0x7fff8000;
 
-  X86AddressSanitizer64(const MCSubtargetInfo &STI)
+  X86AddressSanitizer64(const MCSubtargetInfo *&STI)
       : X86AddressSanitizer(STI) {}
 
-  virtual ~X86AddressSanitizer64() {}
+  ~X86AddressSanitizer64() override {}
 
   unsigned GetFrameReg(const MCContext &Ctx, MCStreamer &Out) {
     unsigned FrameReg = GetFrameRegGeneric(Ctx, Out);
     if (FrameReg == X86::NoRegister)
       return FrameReg;
-    return getX86SubSuperRegister(FrameReg, MVT::i64);
+    return getX86SubSuperRegister(FrameReg, 64);
   }
 
   void SpillReg(MCStreamer &Out, unsigned Reg) {
@@ -792,10 +785,10 @@ public:
     OrigSPOffset += 8;
   }
 
-  virtual void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+  void InstrumentMemOperandPrologue(const RegisterContext &RegCtx,
+                                    MCContext &Ctx,
+                                    MCStreamer &Out) override {
+    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
     assert(LocalFrameReg != X86::NoRegister);
 
     const MCRegisterInfo *MRI = Ctx.getRegisterInfo();
@@ -816,24 +809,24 @@ public:
     }
 
     EmitAdjustRSP(Ctx, Out, -128);
-    SpillReg(Out, RegCtx.ShadowReg(MVT::i64));
-    SpillReg(Out, RegCtx.AddressReg(MVT::i64));
-    if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
-      SpillReg(Out, RegCtx.ScratchReg(MVT::i64));
+    SpillReg(Out, RegCtx.ShadowReg(64));
+    SpillReg(Out, RegCtx.AddressReg(64));
+    if (RegCtx.ScratchReg(64) != X86::NoRegister)
+      SpillReg(Out, RegCtx.ScratchReg(64));
     StoreFlags(Out);
   }
 
-  virtual void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
-                                            MCContext &Ctx,
-                                            MCStreamer &Out) override {
-    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(MVT::i64);
+  void InstrumentMemOperandEpilogue(const RegisterContext &RegCtx,
+                                    MCContext &Ctx,
+                                    MCStreamer &Out) override {
+    unsigned LocalFrameReg = RegCtx.ChooseFrameReg(64);
     assert(LocalFrameReg != X86::NoRegister);
 
     RestoreFlags(Out);
-    if (RegCtx.ScratchReg(MVT::i64) != X86::NoRegister)
-      RestoreReg(Out, RegCtx.ScratchReg(MVT::i64));
-    RestoreReg(Out, RegCtx.AddressReg(MVT::i64));
-    RestoreReg(Out, RegCtx.ShadowReg(MVT::i64));
+    if (RegCtx.ScratchReg(64) != X86::NoRegister)
+      RestoreReg(Out, RegCtx.ScratchReg(64));
+    RestoreReg(Out, RegCtx.AddressReg(64));
+    RestoreReg(Out, RegCtx.ShadowReg(64));
     EmitAdjustRSP(Ctx, Out, 128);
 
     unsigned FrameReg = GetFrameReg(Ctx, Out);
@@ -845,18 +838,18 @@ public:
     }
   }
 
-  virtual void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx,
-                                         MCStreamer &Out) override;
-  virtual void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
-                                         bool IsWrite,
-                                         const RegisterContext &RegCtx,
-                                         MCContext &Ctx,
-                                         MCStreamer &Out) override;
-  virtual void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
-                                  MCStreamer &Out) override;
+  void InstrumentMemOperandSmall(X86Operand &Op, unsigned AccessSize,
+                                 bool IsWrite,
+                                 const RegisterContext &RegCtx,
+                                 MCContext &Ctx,
+                                 MCStreamer &Out) override;
+  void InstrumentMemOperandLarge(X86Operand &Op, unsigned AccessSize,
+                                 bool IsWrite,
+                                 const RegisterContext &RegCtx,
+                                 MCContext &Ctx,
+                                 MCStreamer &Out) override;
+  void InstrumentMOVSImpl(unsigned AccessSize, MCContext &Ctx,
+                          MCStreamer &Out) override;
 
 private:
   void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) {
@@ -864,7 +857,7 @@ private:
     std::unique_ptr<X86Operand> Op(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, X86::RSP, 0, 1,
                               SMLoc(), SMLoc()));
-    EmitLEA(*Op, MVT::i64, X86::RSP, Out);
+    EmitLEA(*Op, 64, X86::RSP, Out);
     OrigSPOffset += Offset;
   }
 
@@ -878,12 +871,13 @@ private:
                              .addReg(X86::RSP)
                              .addImm(-16));
 
-    if (RegCtx.AddressReg(MVT::i64) != X86::RDI) {
+    if (RegCtx.AddressReg(64) != X86::RDI) {
       EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RDI).addReg(
-                               RegCtx.AddressReg(MVT::i64)));
+                               RegCtx.AddressReg(64)));
     }
-    const std::string &Fn = FuncName(AccessSize, IsWrite);
-    MCSymbol *FnSym = Ctx.getOrCreateSymbol(StringRef(Fn));
+    MCSymbol *FnSym = Ctx.getOrCreateSymbol(llvm::Twine("__asan_report_") +
+                                            (IsWrite ? "store" : "load") +
+                                            llvm::Twine(AccessSize));
     const MCSymbolRefExpr *FnExpr =
         MCSymbolRefExpr::create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx);
     EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr));
@@ -893,16 +887,16 @@ private:
 void X86AddressSanitizer64::InstrumentMemOperandSmall(
     X86Operand &Op, unsigned AccessSize, bool IsWrite,
     const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
-  unsigned AddressRegI32 = RegCtx.AddressReg(MVT::i32);
-  unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
-  unsigned ShadowRegI32 = RegCtx.ShadowReg(MVT::i32);
-  unsigned ShadowRegI8 = RegCtx.ShadowReg(MVT::i8);
+  unsigned AddressRegI64 = RegCtx.AddressReg(64);
+  unsigned AddressRegI32 = RegCtx.AddressReg(32);
+  unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
+  unsigned ShadowRegI32 = RegCtx.ShadowReg(32);
+  unsigned ShadowRegI8 = RegCtx.ShadowReg(8);
 
-  assert(RegCtx.ScratchReg(MVT::i32) != X86::NoRegister);
-  unsigned ScratchRegI32 = RegCtx.ScratchReg(MVT::i32);
+  assert(RegCtx.ScratchReg(32) != X86::NoRegister);
+  unsigned ScratchRegI32 = RegCtx.ScratchReg(32);
 
-  ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+  ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
 
   EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
                            AddressRegI64));
@@ -944,7 +938,7 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
     std::unique_ptr<X86Operand> Op(
         X86Operand::CreateMem(getPointerWidth(), 0, Disp, ScratchRegI32, 0, 1,
                               SMLoc(), SMLoc()));
-    EmitLEA(*Op, MVT::i32, ScratchRegI32, Out);
+    EmitLEA(*Op, 32, ScratchRegI32, Out);
     break;
   }
   case 4:
@@ -969,10 +963,10 @@ void X86AddressSanitizer64::InstrumentMemOperandSmall(
 void X86AddressSanitizer64::InstrumentMemOperandLarge(
     X86Operand &Op, unsigned AccessSize, bool IsWrite,
     const RegisterContext &RegCtx, MCContext &Ctx, MCStreamer &Out) {
-  unsigned AddressRegI64 = RegCtx.AddressReg(MVT::i64);
-  unsigned ShadowRegI64 = RegCtx.ShadowReg(MVT::i64);
+  unsigned AddressRegI64 = RegCtx.AddressReg(64);
+  unsigned ShadowRegI64 = RegCtx.ShadowReg(64);
 
-  ComputeMemOperandAddress(Op, MVT::i64, AddressRegI64, Ctx, Out);
+  ComputeMemOperandAddress(Op, 64, AddressRegI64, Ctx, Out);
 
   EmitInstruction(Out, MCInstBuilder(X86::MOV64rr).addReg(ShadowRegI64).addReg(
                            AddressRegI64));
@@ -1030,7 +1024,7 @@ void X86AddressSanitizer64::InstrumentMOVSImpl(unsigned AccessSize,
 
 } // End anonymous namespace
 
-X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo &STI)
+X86AsmInstrumentation::X86AsmInstrumentation(const MCSubtargetInfo *&STI)
     : STI(STI), InitialFrameReg(0) {}
 
 X86AsmInstrumentation::~X86AsmInstrumentation() {}
@@ -1043,7 +1037,7 @@ void X86AsmInstrumentation::InstrumentAndEmitInstruction(
 
 void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
                                             const MCInst &Inst) {
-  Out.EmitInstruction(Inst, STI);
+  Out.EmitInstruction(Inst, *STI);
 }
 
 unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
@@ -1067,17 +1061,17 @@ unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
 
 X86AsmInstrumentation *
 CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                            const MCContext &Ctx, const MCSubtargetInfo &STI) {
-  Triple T(STI.getTargetTriple());
+                            const MCContext &Ctx, const MCSubtargetInfo *&STI) {
+  Triple T(STI->getTargetTriple());
   const bool hasCompilerRTSupport = T.isOSLinux();
   if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
       MCOptions.SanitizeAddress) {
-    if (STI.getFeatureBits()[X86::Mode32Bit] != 0)
+    if (STI->getFeatureBits()[X86::Mode32Bit] != 0)
       return new X86AddressSanitizer32(STI);
-    if (STI.getFeatureBits()[X86::Mode64Bit] != 0)
+    if (STI->getFeatureBits()[X86::Mode64Bit] != 0)
       return new X86AddressSanitizer64(STI);
   }
   return new X86AsmInstrumentation(STI);
 }
 
-} // End llvm namespace
+} // end llvm namespace
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index 19ebcc44f61e..470ceadb0aa6 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -28,7 +28,8 @@ class X86AsmInstrumentation;
 
 X86AsmInstrumentation *
 CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                            const MCContext &Ctx, const MCSubtargetInfo &STI);
+                            const MCContext &Ctx,
+                            const MCSubtargetInfo *&STI);
 
 class X86AsmInstrumentation {
 public:
@@ -48,15 +49,16 @@ public:
 protected:
   friend X86AsmInstrumentation *
   CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
-                              const MCContext &Ctx, const MCSubtargetInfo &STI);
+                              const MCContext &Ctx,
+                              const MCSubtargetInfo *&STI);
 
-  X86AsmInstrumentation(const MCSubtargetInfo &STI);
+  X86AsmInstrumentation(const MCSubtargetInfo *&STI);
 
   unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
 
   void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
 
-  const MCSubtargetInfo &STI;
+  const MCSubtargetInfo *&STI;
 
   unsigned InitialFrameReg;
 };
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index bca059d8c383..4d8ffac1a82b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -11,7 +11,6 @@
 #include "X86AsmInstrumentation.h"
 #include "X86AsmParserCommon.h"
 #include "X86Operand.h"
-#include "X86ISelLowering.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
@@ -26,6 +25,7 @@
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
@@ -57,10 +57,10 @@ static const char OpPrecedence[] = {
 };
 
 class X86AsmParser : public MCTargetAsmParser {
-  MCSubtargetInfo &STI;
   const MCInstrInfo &MII;
   ParseInstructionInfo *InstInfo;
   std::unique_ptr<X86AsmInstrumentation> Instrumentation;
+
 private:
   SMLoc consumeToken() {
     MCAsmParser &Parser = getParser();
@@ -154,6 +154,7 @@ private:
       // Push the new operator.
       InfixOperatorStack.push_back(Op);
     }
+
     int64_t execute() {
       // Push any remaining operators onto the postfix stack.
       while (!InfixOperatorStack.empty()) {
@@ -268,6 +269,7 @@ private:
     bool StopOnLBrac, AddImmPrefix;
     InfixCalculator IC;
     InlineAsmIdentifierInfo Info;
+
   public:
     IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
       State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
@@ -712,10 +714,10 @@ private:
                         SMLoc End, unsigned Size, StringRef Identifier,
                         InlineAsmIdentifierInfo &Info);
 
+  bool parseDirectiveEven(SMLoc L);
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
 
-  bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
 
   /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
@@ -758,23 +760,24 @@ private:
 
   bool is64BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return STI.getFeatureBits()[X86::Mode64Bit];
+    return getSTI().getFeatureBits()[X86::Mode64Bit];
   }
   bool is32BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return STI.getFeatureBits()[X86::Mode32Bit];
+    return getSTI().getFeatureBits()[X86::Mode32Bit];
   }
   bool is16BitMode() const {
     // FIXME: Can tablegen auto-generate this?
-    return STI.getFeatureBits()[X86::Mode16Bit];
+    return getSTI().getFeatureBits()[X86::Mode16Bit];
   }
   void SwitchMode(unsigned mode) {
+    MCSubtargetInfo &STI = copySTI();
     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
     unsigned FB = ComputeAvailableFeatures(
       STI.ToggleFeature(OldMode.flip(mode)));
     setAvailableFeatures(FB);
-    
+
     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
   }
 
@@ -798,12 +801,12 @@ private:
   /// }
 
 public:
-  X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
+  X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
                const MCInstrInfo &mii, const MCTargetOptions &Options)
-      : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
+    : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
     Instrumentation.reset(
         CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
   }
@@ -912,6 +915,11 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
   if (RegNo == 0)
     RegNo = MatchRegisterName(Tok.getString().lower());
 
+  // The "flags" register cannot be referenced directly.
+  // Treat it as an identifier instead.
+  if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
+    RegNo = 0;
+
   if (!is64BitMode()) {
     // FIXME: This should be done using Requires<Not64BitMode> and
     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
@@ -1042,8 +1050,11 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
     .Cases("BYTE", "byte", 8)
     .Cases("WORD", "word", 16)
     .Cases("DWORD", "dword", 32)
+    .Cases("FWORD", "fword", 48)
     .Cases("QWORD", "qword", 64)
+    .Cases("MMWORD","mmword", 64)
     .Cases("XWORD", "xword", 80)
+    .Cases("TBYTE", "tbyte", 80)
     .Cases("XMMWORD", "xmmword", 128)
     .Cases("YMMWORD", "ymmword", 256)
     .Cases("ZMMWORD", "zmmword", 512)
@@ -1062,8 +1073,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
     // Insert an explicit size if the user didn't have one.
     if (!Size) {
       Size = getPointerWidth();
-      InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
-                                                  /*Len=*/0, Size));
+      InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+                                          /*Len=*/0, Size);
     }
 
     // Create an absolute memory reference in order to match against
@@ -1082,8 +1093,8 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
     if (!Size) {
       Size = Info.Type * 8; // Size is in terms of bits in this context.
       if (Size)
-        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
-                                                    /*Len=*/0, Size));
+        InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
+                                            /*Len=*/0, Size);
     }
   }
 
@@ -1097,13 +1108,13 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
 }
 
 static void
-RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
+RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
                            StringRef SymName, int64_t ImmDisp,
                            int64_t FinalImmDisp, SMLoc &BracLoc,
                            SMLoc &StartInBrac, SMLoc &End) {
   // Remove the '[' and ']' from the IR string.
-  AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
-  AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
+  AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
+  AsmRewrites.emplace_back(AOK_Skip, End, 1);
 
   // If ImmDisp is non-zero, then we parsed a displacement before the
   // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
@@ -1114,15 +1125,14 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
       // We have an immediate displacement before the bracketed expression.
       // Adjust this to match the final immediate displacement.
       bool Found = false;
-      for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
-             E = AsmRewrites->end(); I != E; ++I) {
-        if ((*I).Loc.getPointer() > BracLoc.getPointer())
+      for (AsmRewrite &AR : AsmRewrites) {
+        if (AR.Loc.getPointer() > BracLoc.getPointer())
           continue;
-        if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
+        if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
           assert (!Found && "ImmDisp already rewritten.");
-          (*I).Kind = AOK_Imm;
-          (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
-          (*I).Val = FinalImmDisp;
+          AR.Kind = AOK_Imm;
+          AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
+          AR.Val = FinalImmDisp;
           Found = true;
           break;
         }
@@ -1133,28 +1143,27 @@ RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
       // We have a symbolic and an immediate displacement, but no displacement
       // before the bracketed expression.  Put the immediate displacement
       // before the bracketed expression.
-      AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
+      AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
     }
   }
   // Remove all the ImmPrefix rewrites within the brackets.
-  for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
-         E = AsmRewrites->end(); I != E; ++I) {
-    if ((*I).Loc.getPointer() < StartInBrac.getPointer())
+  for (AsmRewrite &AR : AsmRewrites) {
+    if (AR.Loc.getPointer() < StartInBrac.getPointer())
       continue;
-    if ((*I).Kind == AOK_ImmPrefix)
-      (*I).Kind = AOK_Delete;
+    if (AR.Kind == AOK_ImmPrefix)
+      AR.Kind = AOK_Delete;
   }
   const char *SymLocPtr = SymName.data();
   // Skip everything before the symbol.
   if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
     assert(Len > 0 && "Expected a non-negative length.");
-    AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
+    AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
   }
   // Skip everything after the symbol.
   if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
     SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
     assert(Len > 0 && "Expected a non-negative length.");
-    AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
+    AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
   }
 }
 
@@ -1162,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
 
+  AsmToken::TokenKind PrevTK = AsmToken::Error;
   bool Done = false;
   while (!Done) {
     bool UpdateLocLex = true;
@@ -1205,7 +1215,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
             return Error(Tok.getLoc(), "Unexpected identifier!");
         } else {
           // This is a dot operator, not an adjacent identifier.
-          if (Identifier.find('.') != StringRef::npos) {
+          if (Identifier.find('.') != StringRef::npos &&
+              PrevTK == AsmToken::RBrac) {
             return false;
           } else {
             InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
@@ -1223,8 +1234,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
     case AsmToken::Integer: {
       StringRef ErrMsg;
       if (isParsingInlineAsm() && SM.getAddImmPrefix())
-        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
-                                                    Tok.getLoc()));
+        InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
       // Look for 'b' or 'f' following an Integer as a directional label
       SMLoc Loc = getTok().getLoc();
       int64_t IntVal = getTok().getIntVal();
@@ -1237,7 +1247,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
           const MCExpr *Val =
-	    MCSymbolRefExpr::create(Sym, Variant, getContext());
+              MCSymbolRefExpr::create(Sym, Variant, getContext());
           if (IDVal == "b" && Sym->isUndefined())
             return Error(Loc, "invalid reference to undefined symbol");
           StringRef Identifier = Sym->getName();
@@ -1275,6 +1285,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
 
     if (!Done && UpdateLocLex)
       End = consumeToken();
+
+    PrevTK = TK;
   }
   return false;
 }
@@ -1302,7 +1314,7 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
     // A symbolic displacement.
     Disp = Sym;
     if (isParsingInlineAsm())
-      RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
+      RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
                                  ImmDisp, SM.getImm(), BracLoc, StartInBrac,
                                  End);
   }
@@ -1359,7 +1371,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
                                         InlineAsmIdentifierInfo &Info,
                                         bool IsUnevaluatedOperand, SMLoc &End) {
   MCAsmParser &Parser = getParser();
-  assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
+  assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
   Val = nullptr;
 
   StringRef LineBuf(Identifier.data());
@@ -1372,15 +1384,17 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
   // Advance the token stream until the end of the current token is
   // after the end of what the frontend claimed.
   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
-  while (true) {
+  do {
     End = Tok.getEndLoc();
     getLexer().Lex();
-
-    assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
-    if (End.getPointer() == EndPtr) break;
-  }
+  } while (End.getPointer() < EndPtr);
   Identifier = LineBuf;
 
+  // The frontend should end parsing on an assembler token boundary, unless it
+  // failed parsing.
+  assert((End.getPointer() == EndPtr || !Result) &&
+         "frontend claimed part of a token?");
+
   // If the identifier lookup was unsuccessful, assume that we are dealing with
   // a label.
   if (!Result) {
@@ -1389,9 +1403,8 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
                                          Loc, false);
     assert(InternalName.size() && "We should have an internal name here.");
     // Push a rewrite for replacing the identifier name with the internal name.
-    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
-                                                Identifier.size(),
-                                                InternalName));
+    InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
+                                        InternalName);
   }
 
   // Create the symbol reference.
@@ -1418,8 +1431,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
     AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
 
     if (isParsingInlineAsm())
-      InstInfo->AsmRewrites->push_back(
-          AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
+      InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
 
     if (getLexer().isNot(AsmToken::LBrac)) {
       // An immediate following a 'segment register', 'colon' token sequence can
@@ -1588,8 +1600,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
     SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
     unsigned Len = DotDispStr.size();
     unsigned Val = OrigDispVal + DotDispVal;
-    InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
-                                                Val));
+    InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
   }
 
   NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
@@ -1613,7 +1624,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
     return nullptr;
 
   // Don't emit the offset operator.
-  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
+  InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
 
   // The offset operator will have an 'r' constraint, thus we need to create
   // register operand to ensure proper matching.  Just pick a GPR based on
@@ -1664,7 +1675,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
   // Rewrite the type operator and the C or C++ type or variable in terms of an
   // immediate.  E.g. TYPE foo -> $$4
   unsigned Len = End.getPointer() - TypeLoc.getPointer();
-  InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
+  InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
 
   const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
   return X86Operand::CreateImm(Imm, Start, End);
@@ -1688,12 +1699,14 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
       return ParseIntelOperator(IOK_TYPE);
   }
 
+  bool PtrInOperand = false;
   unsigned Size = getIntelMemOperandSize(Tok.getString());
   if (Size) {
     Parser.Lex(); // Eat operand size (e.g., byte, word).
     if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
       return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
     Parser.Lex(); // Eat ptr.
+    PtrInOperand = true;
   }
   Start = Tok.getLoc();
 
@@ -1711,10 +1724,10 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
       unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
       if (StartTok.getString().size() == Len)
         // Just add a prefix if this wasn't a complex immediate expression.
-        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
+        InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
       else
         // Otherwise, rewrite the complex expression as a single immediate.
-        InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
+        InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
     }
 
     if (getLexer().isNot(AsmToken::LBrac)) {
@@ -1740,7 +1753,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
   }
 
   // rounding mode token
-  if (STI.getFeatureBits()[X86::FeatureAVX512] &&
+  if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
       getLexer().is(AsmToken::LCurly))
     return ParseRoundingModeOp(Start, End);
 
@@ -1749,9 +1762,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
   if (!ParseRegister(RegNo, Start, End)) {
     // If this is a segment register followed by a ':', then this is the start
     // of a segment override, otherwise this is a normal register reference.
-    if (getLexer().isNot(AsmToken::Colon))
+    // In case it is a normal register and there is ptr in the operand this 
+    // is an error
+    if (getLexer().isNot(AsmToken::Colon)){
+      if (PtrInOperand){
+        return ErrorOperand(Start, "expected memory operand after "
+                                   "'ptr', found register operand instead");
+      }
       return X86Operand::CreateReg(RegNo, Start, End);
-
+    }
+    
     return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
   }
 
@@ -1798,7 +1818,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
   }
   case AsmToken::LCurly:{
     SMLoc Start = Parser.getTok().getLoc(), End;
-    if (STI.getFeatureBits()[X86::FeatureAVX512])
+    if (getSTI().getFeatureBits()[X86::FeatureAVX512])
       return ParseRoundingModeOp(Start, End);
     return ErrorOperand(Start, "unknown token in expression");
   }
@@ -1808,7 +1828,7 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
                                        const MCParsedAsmOperand &Op) {
   MCAsmParser &Parser = getParser();
-  if(STI.getFeatureBits()[X86::FeatureAVX512]) {
+  if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
     if (getLexer().is(AsmToken::LCurly)) {
       // Eat "{" and mark the current place.
       const SMLoc consumedToken = consumeToken();
@@ -1983,12 +2003,13 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
           }
 
           // Validate the scale amount.
-	  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
+          if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
               ScaleVal != 1) {
             Error(Loc, "scale factor in 16-bit address must be 1");
             return nullptr;
-	  }
-          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+          }
+          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
+              ScaleVal != 8) {
             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
             return nullptr;
           }
@@ -2175,7 +2196,6 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     Name == "repne" || Name == "repnz" ||
     Name == "rex64" || Name == "data16";
 
-
   // This does the actual operand parsing.  Don't parse any more if we have a
   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
   // just want to parse the "lock" as the first instruction and the "incl" as
@@ -2213,6 +2233,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       (isPrefix && getLexer().is(AsmToken::Slash)))
     Parser.Lex();
 
+  // This is for gas compatibility and cannot be done in td.
+  // Adding "p" for some floating point with no argument.
+  // For example: fsub --> fsubp
+  bool IsFp =
+    Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
+  if (IsFp && Operands.size() == 1) {
+    const char *Repl = StringSwitch<const char *>(Name)
+      .Case("fsub", "fsubp")
+      .Case("fdiv", "fdivp")
+      .Case("fsubr", "fsubrp")
+      .Case("fdivr", "fdivrp");
+    static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
+  }
+
   // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
   // documented form in various unofficial manuals, so a lot of code uses it.
@@ -2242,9 +2276,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // Append default arguments to "ins[bwld]"
   if (Name.startswith("ins") && Operands.size() == 1 &&
-      (Name == "insb" || Name == "insw" || Name == "insl" ||
-       Name == "insd" )) {
-    AddDefaultSrcDestOperands(Operands, 
+      (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd")) {
+    AddDefaultSrcDestOperands(Operands,
                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
                               DefaultMemDIOperand(NameLoc));
   }
@@ -2346,98 +2379,21 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // instalias with an immediate operand yet.
   if (Name == "int" && Operands.size() == 2) {
     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
-    if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
-        cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
-      Operands.erase(Operands.begin() + 1);
-      static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
-    }
+    if (Op1.isImm())
+      if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
+        if (CE->getValue() == 3) {
+          Operands.erase(Operands.begin() + 1);
+          static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
+        }
   }
 
   return false;
 }
 
-static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
-                            bool isCmp) {
-  MCInst TmpInst;
-  TmpInst.setOpcode(Opcode);
-  if (!isCmp)
-    TmpInst.addOperand(MCOperand::createReg(Reg));
-  TmpInst.addOperand(MCOperand::createReg(Reg));
-  TmpInst.addOperand(Inst.getOperand(0));
-  Inst = TmpInst;
-  return true;
-}
-
-static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
-                                bool isCmp = false) {
-  if (!Inst.getOperand(0).isImm() ||
-      !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
-    return false;
-
-  return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
-}
-
-static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
-                                bool isCmp = false) {
-  if (!Inst.getOperand(0).isImm() ||
-      !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
-    return false;
-
-  return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
-}
-
-static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
-                                bool isCmp = false) {
-  if (!Inst.getOperand(0).isImm() ||
-      !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
-    return false;
-
-  return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
-}
-
-bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
-  switch (Inst.getOpcode()) {
-  default: return true;
-  case X86::INT:
-    X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
-    assert(Op.isImm() && "expected immediate");
-    int64_t Res;
-    if (!Op.getImm()->evaluateAsAbsolute(Res) || Res > 255) {
-      Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
-      return false;
-    }
-    return true;
-  }
-  llvm_unreachable("handle the instruction appropriately");
-}
-
 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
   switch (Inst.getOpcode()) {
   default: return false;
-  case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
-  case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
-  case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
-  case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
-  case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
-  case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
-  case X86::OR16i16:  return convert16i16to16ri8(Inst, X86::OR16ri8);
-  case X86::OR32i32:  return convert32i32to32ri8(Inst, X86::OR32ri8);
-  case X86::OR64i32:  return convert64i32to64ri8(Inst, X86::OR64ri8);
-  case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
-  case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
-  case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
-  case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
-  case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
-  case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
-  case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
-  case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
-  case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
-  case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
-  case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
-  case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
-  case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
-  case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
-  case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
+  case X86::VMOVZPQILo2PQIrr:
   case X86::VMOVAPDrr:
   case X86::VMOVAPDYrr:
   case X86::VMOVAPSrr:
@@ -2457,18 +2413,19 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
     unsigned NewOpc;
     switch (Inst.getOpcode()) {
     default: llvm_unreachable("Invalid opcode");
-    case X86::VMOVAPDrr:  NewOpc = X86::VMOVAPDrr_REV;  break;
-    case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
-    case X86::VMOVAPSrr:  NewOpc = X86::VMOVAPSrr_REV;  break;
-    case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
-    case X86::VMOVDQArr:  NewOpc = X86::VMOVDQArr_REV;  break;
-    case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
-    case X86::VMOVDQUrr:  NewOpc = X86::VMOVDQUrr_REV;  break;
-    case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
-    case X86::VMOVUPDrr:  NewOpc = X86::VMOVUPDrr_REV;  break;
-    case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
-    case X86::VMOVUPSrr:  NewOpc = X86::VMOVUPSrr_REV;  break;
-    case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+    case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
+    case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
+    case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
+    case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
+    case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
+    case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
+    case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
+    case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
+    case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
+    case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
+    case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
+    case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
+    case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
     }
     Inst.setOpcode(NewOpc);
     return true;
@@ -2573,9 +2530,6 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
                                isParsingIntelSyntax())) {
   default: llvm_unreachable("Unexpected match result!");
   case Match_Success:
-    if (!validateInstruction(Inst, Operands))
-      return true;
-
     // Some instructions need post-processing to, for example, tweak which
     // encoding is selected. Loop on it while changes happen so the
     // individual transformations can chain off each other.
@@ -2819,9 +2773,6 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   unsigned NumSuccessfulMatches =
       std::count(std::begin(Match), std::end(Match), Match_Success);
   if (NumSuccessfulMatches == 1) {
-    if (!validateInstruction(Inst, Operands))
-      return true;
-
     // Some instructions need post-processing to, for example, tweak which
     // encoding is selected. Loop on it while changes happen so the individual
     // transformations can chain off each other.
@@ -2898,10 +2849,29 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
                                            "a '%' prefix in .intel_syntax");
     }
     return false;
-  }
+  } else if (IDVal == ".even")
+    return parseDirectiveEven(DirectiveID.getLoc());
   return true;
 }
 
+/// parseDirectiveEven
+///  ::= .even
+bool X86AsmParser::parseDirectiveEven(SMLoc L) {
+  const MCSection *Section = getStreamer().getCurrentSection().first;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    TokError("unexpected token in directive");
+    return false;  
+  }
+  if (!Section) {
+    getStreamer().InitSections(false);
+    Section = getStreamer().getCurrentSection().first;
+  }
+  if (Section->UseCodeAlign())
+    getStreamer().EmitCodeAlignment(2, 0);
+  else
+    getStreamer().EmitValueToAlignment(2, 0, 1, 0);
+  return false;
+}
 /// ParseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
@@ -2909,10 +2879,19 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     for (;;) {
       const MCExpr *Value;
+      SMLoc ExprLoc = getLexer().getLoc();
       if (getParser().parseExpression(Value))
         return false;
 
-      getParser().getStreamer().EmitValue(Value, Size);
+      if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size);
+      } else {
+        getStreamer().EmitValue(Value, Size, ExprLoc);
+      }
 
       if (getLexer().is(AsmToken::EndOfStatement))
         break;
diff --git a/lib/Target/X86/AsmParser/X86AsmParserCommon.h b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
index 7610806c4578..54538c804a03 100644
--- a/lib/Target/X86/AsmParser/X86AsmParserCommon.h
+++ b/lib/Target/X86/AsmParser/X86AsmParserCommon.h
@@ -13,30 +13,25 @@
 namespace llvm {
 
 inline bool isImmSExti16i8Value(uint64_t Value) {
-  return ((                                  Value <= 0x000000000000007FULL)||
-          (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
-          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  return isInt<8>(Value) ||
+         (isUInt<16>(Value) && isInt<8>(static_cast<int16_t>(Value)));
 }
 
 inline bool isImmSExti32i8Value(uint64_t Value) {
-  return ((                                  Value <= 0x000000000000007FULL)||
-          (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
-          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  return isInt<8>(Value) ||
+         (isUInt<32>(Value) && isInt<8>(static_cast<int32_t>(Value)));
 }
 
 inline bool isImmSExti64i8Value(uint64_t Value) {
-  return ((                                  Value <= 0x000000000000007FULL)||
-          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  return isInt<8>(Value);
 }
 
 inline bool isImmSExti64i32Value(uint64_t Value) {
-  return ((                                  Value <= 0x000000007FFFFFFFULL)||
-          (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  return isInt<32>(Value);
 }
 
 inline bool isImmUnsignedi8Value(uint64_t Value) {
-  return ((                                  Value <= 0x00000000000000FFULL)||
-          (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  return isUInt<8>(Value) || isInt<8>(Value);
 }
 
 } // End of namespace llvm
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index fba2c280d200..b23f5c353013 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -34,18 +34,9 @@ set(sources
   X86VZeroUpper.cpp
   X86FixupLEAs.cpp
   X86WinEHState.cpp
+  X86OptimizeLEAs.cpp
   )
 
-if( CMAKE_CL_64 )
-  enable_language(ASM_MASM)
-  ADD_CUSTOM_COMMAND(
-    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
-    MAIN_DEPENDENCY X86CompilationCallback_Win64.asm
-    COMMAND ${CMAKE_ASM_MASM_COMPILER} /nologo /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
-   )
-   set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
-endif()
-
 add_llvm_target(X86CodeGen ${sources})
 
 add_subdirectory(AsmParser)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index cfc3ee2fb08f..ce8fcf164668 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -95,11 +95,13 @@ X86GenericDisassembler::X86GenericDisassembler(
   llvm_unreachable("Invalid CPU mode");
 }
 
+namespace {
 struct Region {
   ArrayRef<uint8_t> Bytes;
   uint64_t Base;
   Region(ArrayRef<uint8_t> Bytes, uint64_t Base) : Bytes(Bytes), Base(Base) {}
 };
+} // end anonymous namespace
 
 /// A callback function that wraps the readByte method from Region.
 ///
@@ -831,8 +833,12 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
   case TYPE_XMM256:
   case TYPE_XMM512:
   case TYPE_VK1:
+  case TYPE_VK2:
+  case TYPE_VK4:
   case TYPE_VK8:
   case TYPE_VK16:
+  case TYPE_VK32:
+  case TYPE_VK64:
   case TYPE_DEBUGREG:
   case TYPE_CONTROLREG:
   case TYPE_BNDR:
@@ -962,6 +968,7 @@ static bool translateInstruction(MCInst &mcInst,
     return true;
   }
 
+  mcInst.clear();
   mcInst.setOpcode(insn.instructionID);
   // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
   // prefix bytes should be disassembled as xrelease and xacquire then set the
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index f73fa75f888e..040143b15587 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -361,7 +361,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
        * then it should be disassembled as a xacquire/xrelease not repne/rep.
        */
       if ((byte == 0xf2 || byte == 0xf3) &&
-          ((nextByte == 0xf0) |
+          ((nextByte == 0xf0) ||
           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
         insn->xAcquireRelease = true;
       /*
@@ -980,6 +980,47 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
       insn->opcode == 0xE3)
     attrMask ^= ATTR_ADSIZE;
 
+  /*
+   * In 64-bit mode all f64 superscripted opcodes ignore opcode size prefix
+   * CALL/JMP/JCC instructions need to ignore 0x66 and consume 4 bytes
+   */
+
+  if (insn->mode == MODE_64BIT &&
+      isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) {
+    switch (insn->opcode) {
+    case 0xE8:
+    case 0xE9:
+      // Take care of psubsb and other mmx instructions.
+      if (insn->opcodeType == ONEBYTE) {
+        attrMask ^= ATTR_OPSIZE;
+        insn->immediateSize = 4;
+        insn->displacementSize = 4;
+      }
+      break;
+    case 0x82:
+    case 0x83:
+    case 0x84:
+    case 0x85:
+    case 0x86:
+    case 0x87:
+    case 0x88:
+    case 0x89:
+    case 0x8A:
+    case 0x8B:
+    case 0x8C:
+    case 0x8D:
+    case 0x8E:
+    case 0x8F:
+      // Take care of lea and three byte ops.
+      if (insn->opcodeType == TWOBYTE) {
+        attrMask ^= ATTR_OPSIZE;
+        insn->immediateSize = 4;
+        insn->displacementSize = 4;
+      }
+      break;
+    }
+  }
+
   if (getIDWithAttrMask(&instructionID, insn, attrMask))
     return -1;
 
@@ -1447,8 +1488,12 @@ static int readModRM(struct InternalInstruction* insn) {
     case TYPE_XMM:                                        \
       return prefix##_XMM0 + index;                       \
     case TYPE_VK1:                                        \
+    case TYPE_VK2:                                        \
+    case TYPE_VK4:                                        \
     case TYPE_VK8:                                        \
     case TYPE_VK16:                                       \
+    case TYPE_VK32:                                       \
+    case TYPE_VK64:                                       \
       if (index > 7)                                      \
         *valid = 0;                                       \
       return prefix##_K0 + index;                         \
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index a79a923ac525..28a628e5066b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -572,8 +572,6 @@ struct InternalInstruction {
 
   // The last byte of the opcode, not counting any ModR/M extension
   uint8_t opcode;
-  // The ModR/M byte of the instruction, if it is an opcode extension
-  uint8_t modRMExtension;
 
   // decode state
 
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index ea727e6e82fb..b4c0bc4cd4d9 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -21,6 +21,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 62b6b73e7864..bbb309076610 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -15,12 +15,9 @@
 #define LLVM_LIB_TARGET_X86_INSTPRINTER_X86ATTINSTPRINTER_H
 
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
 
 namespace llvm {
 
-class MCOperand;
-
 class X86ATTInstPrinter final : public MCInstPrinter {
 public:
   X86ATTInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 91b144a44824..82f0ee5a5ebc 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -21,6 +21,27 @@
 
 using namespace llvm;
 
+static unsigned getVectorRegSize(unsigned RegNo) {
+  if (X86::ZMM0 <= RegNo && RegNo <= X86::ZMM31)
+    return 512;
+  if (X86::YMM0 <= RegNo && RegNo <= X86::YMM31)
+    return 256;
+  if (X86::XMM0 <= RegNo && RegNo <= X86::XMM31)
+    return 128;
+  if (X86::MM0 <= RegNo && RegNo <= X86::MM7)
+    return 64;
+
+  llvm_unreachable("Unknown vector reg!");
+  return 0;
+}
+
+static MVT getRegOperandVectorVT(const MCInst *MI, const MVT &ScalarVT,
+                                 unsigned OperandIndex) {
+  unsigned OpReg = MI->getOperand(OperandIndex).getReg();
+  return MVT::getVectorVT(ScalarVT,
+                          getVectorRegSize(OpReg)/ScalarVT.getSizeInBits());
+}
+
 /// \brief Extracts the src/dst types for a given zero extension instruction.
 /// \note While the number of elements in DstVT type correct, the
 /// number in the SrcVT type is expanded to fill the src xmm register and the
@@ -107,6 +128,75 @@ static void getZeroExtensionTypes(const MCInst *MI, MVT &SrcVT, MVT &DstVT) {
   }
 }
 
+#define CASE_MASK_INS_COMMON(Inst, Suffix, src)  \
+  case X86::V##Inst##Suffix##src:                \
+  case X86::V##Inst##Suffix##src##k:             \
+  case X86::V##Inst##Suffix##src##kz:
+
+#define CASE_SSE_INS_COMMON(Inst, src)           \
+  case X86::Inst##src:
+
+#define CASE_AVX_INS_COMMON(Inst, Suffix, src)  \
+  case X86::V##Inst##Suffix##src:
+
+#define CASE_MOVDUP(Inst, src)                  \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)         \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src)      \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src)      \
+  CASE_AVX_INS_COMMON(Inst, , r##src)           \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src)          \
+  CASE_SSE_INS_COMMON(Inst, r##src)             \
+
+#define CASE_UNPCK(Inst, src)                   \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src)         \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src)      \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src)      \
+  CASE_AVX_INS_COMMON(Inst, , r##src)           \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src)          \
+  CASE_SSE_INS_COMMON(Inst, r##src)             \
+
+#define CASE_SHUF(Inst, src)                    \
+  CASE_MASK_INS_COMMON(Inst, Z, r##src##i)      \
+  CASE_MASK_INS_COMMON(Inst, Z256, r##src##i)   \
+  CASE_MASK_INS_COMMON(Inst, Z128, r##src##i)   \
+  CASE_AVX_INS_COMMON(Inst, , r##src##i)        \
+  CASE_AVX_INS_COMMON(Inst, Y, r##src##i)       \
+  CASE_SSE_INS_COMMON(Inst, r##src##i)          \
+
+#define CASE_VPERM(Inst, src)                   \
+  CASE_MASK_INS_COMMON(Inst, Z, src##i)         \
+  CASE_MASK_INS_COMMON(Inst, Z256, src##i)      \
+  CASE_MASK_INS_COMMON(Inst, Z128, src##i)      \
+  CASE_AVX_INS_COMMON(Inst, , src##i)           \
+  CASE_AVX_INS_COMMON(Inst, Y, src##i)          \
+
+#define CASE_VSHUF(Inst, src)                          \
+  CASE_MASK_INS_COMMON(SHUFF##Inst, Z, r##src##i)      \
+  CASE_MASK_INS_COMMON(SHUFI##Inst, Z, r##src##i)      \
+  CASE_MASK_INS_COMMON(SHUFF##Inst, Z256, r##src##i)   \
+  CASE_MASK_INS_COMMON(SHUFI##Inst, Z256, r##src##i)   \
+
+/// \brief Extracts the types and if it has memory operand for a given
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) instruction.
+static void getVSHUF64x2FamilyInfo(const MCInst *MI, MVT &VT, bool &HasMemOp) {
+  HasMemOp = false;
+  switch (MI->getOpcode()) {
+  default:
+    llvm_unreachable("Unknown VSHUF64x2 family instructions.");
+    break;
+  CASE_VSHUF(64X2, m)
+    HasMemOp = true;        // FALL THROUGH.
+  CASE_VSHUF(64X2, r)
+    VT = getRegOperandVectorVT(MI, MVT::i64, 0);
+    break;
+  CASE_VSHUF(32X4, m)
+    HasMemOp = true;        // FALL THROUGH.
+  CASE_VSHUF(32X4, r)
+    VT = getRegOperandVectorVT(MI, MVT::i32, 0);
+    break;
+  }
+}
+
 //===----------------------------------------------------------------------===//
 // Top Level Entrypoint
 //===----------------------------------------------------------------------===//
@@ -127,23 +217,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::BLENDPDrri:
   case X86::VBLENDPDrri:
+  case X86::VBLENDPDYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::BLENDPDrmi:
   case X86::VBLENDPDrmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v2f64,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VBLENDPDYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
   case X86::VBLENDPDYrmi:
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v4f64,
+      DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f64, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -152,23 +233,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::BLENDPSrri:
   case X86::VBLENDPSrri:
+  case X86::VBLENDPSYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::BLENDPSrmi:
   case X86::VBLENDPSrmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v4f32,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VBLENDPSYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
   case X86::VBLENDPSYrmi:
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v8f32,
+      DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::f32, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -177,23 +249,14 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::PBLENDWrri:
   case X86::VPBLENDWrri:
+  case X86::VPBLENDWYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PBLENDWrmi:
   case X86::VPBLENDWrmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v8i16,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VPBLENDWYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
   case X86::VPBLENDWYrmi:
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v16i16,
+      DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i16, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -201,23 +264,13 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     break;
 
   case X86::VPBLENDDrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPBLENDDrmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v4i32,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-
   case X86::VPBLENDDYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
+  case X86::VPBLENDDrmi:
   case X86::VPBLENDDYrmi:
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeBLENDMask(MVT::v8i32,
+      DecodeBLENDMask(getRegOperandVectorVT(MI, MVT::i32, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -239,6 +292,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::MOVLHPSrr:
   case X86::VMOVLHPSrr:
+  case X86::VMOVLHPSZrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -247,569 +301,327 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::MOVHLPSrr:
   case X86::VMOVHLPSrr:
+  case X86::VMOVHLPSZrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodeMOVHLPSMask(2, ShuffleMask);
     break;
 
-  case X86::MOVSLDUPrr:
-  case X86::VMOVSLDUPrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  CASE_MOVDUP(MOVSLDUP, r)
+    Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
     // FALL THROUGH.
-  case X86::MOVSLDUPrm:
-  case X86::VMOVSLDUPrm:
+  CASE_MOVDUP(MOVSLDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSLDUPMask(MVT::v4f32, ShuffleMask);
+    DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     break;
 
-  case X86::VMOVSHDUPYrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  CASE_MOVDUP(MOVSHDUP, r)
+    Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
     // FALL THROUGH.
-  case X86::VMOVSHDUPYrm:
+  CASE_MOVDUP(MOVSHDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSHDUPMask(MVT::v8f32, ShuffleMask);
+    DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     break;
 
-  case X86::VMOVSLDUPYrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  CASE_MOVDUP(MOVDDUP, r)
+    Src1Name = getRegName(MI->getOperand(MI->getNumOperands() - 1).getReg());
     // FALL THROUGH.
-  case X86::VMOVSLDUPYrm:
+  CASE_MOVDUP(MOVDDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSLDUPMask(MVT::v8f32, ShuffleMask);
-    break;
-
-  case X86::MOVSHDUPrr:
-  case X86::VMOVSHDUPrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::MOVSHDUPrm:
-  case X86::VMOVSHDUPrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVSHDUPMask(MVT::v4f32, ShuffleMask);
-    break;
-
-  case X86::VMOVDDUPYrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::VMOVDDUPYrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVDDUPMask(MVT::v4f64, ShuffleMask);
-    break;
-
-  case X86::MOVDDUPrr:
-  case X86::VMOVDDUPrr:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::MOVDDUPrm:
-  case X86::VMOVDDUPrm:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeMOVDDUPMask(MVT::v2f64, ShuffleMask);
+    DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
     break;
 
   case X86::PSLLDQri:
   case X86::VPSLLDQri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSLLDQMask(MVT::v16i8,
-                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                       ShuffleMask);
-    break;
-
   case X86::VPSLLDQYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSLLDQMask(MVT::v32i8,
+      DecodePSLLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
   case X86::PSRLDQri:
   case X86::VPSRLDQri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSRLDQMask(MVT::v16i8,
-                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                       ShuffleMask);
-    break;
-
   case X86::VPSRLDQYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSRLDQMask(MVT::v32i8,
+      DecodePSRLDQMask(getRegOperandVectorVT(MI, MVT::i8, 0),
                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
                        ShuffleMask);
     break;
 
   case X86::PALIGNR128rr:
   case X86::VPALIGNR128rr:
+  case X86::VPALIGNR256rr:
     Src1Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
   case X86::PALIGNR128rm:
   case X86::VPALIGNR128rm:
-    Src2Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePALIGNRMask(MVT::v16i8,
-                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                        ShuffleMask);
-    break;
-  case X86::VPALIGNR256rr:
-    Src1Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
   case X86::VPALIGNR256rm:
     Src2Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePALIGNRMask(MVT::v32i8,
+      DecodePALIGNRMask(getRegOperandVectorVT(MI, MVT::i8, 0),
                         MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
 
   case X86::PSHUFDri:
   case X86::VPSHUFDri:
+  case X86::VPSHUFDYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFDmi:
   case X86::VPSHUFDmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v4i32,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    break;
-  case X86::VPSHUFDYri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
   case X86::VPSHUFDYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v8i32,
+      DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::i32, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     break;
 
   case X86::PSHUFHWri:
   case X86::VPSHUFHWri:
+  case X86::VPSHUFHWYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFHWmi:
   case X86::VPSHUFHWmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFHWMask(MVT::v8i16,
-                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                        ShuffleMask);
-    break;
-  case X86::VPSHUFHWYri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
   case X86::VPSHUFHWYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFHWMask(MVT::v16i16,
+      DecodePSHUFHWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
                         MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
+
   case X86::PSHUFLWri:
   case X86::VPSHUFLWri:
+  case X86::VPSHUFLWYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
   case X86::PSHUFLWmi:
   case X86::VPSHUFLWmi:
-    DestName = getRegName(MI->getOperand(0).getReg());
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFLWMask(MVT::v8i16,
-                        MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                        ShuffleMask);
-    break;
-  case X86::VPSHUFLWYri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
   case X86::VPSHUFLWYmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFLWMask(MVT::v16i16,
+      DecodePSHUFLWMask(getRegOperandVectorVT(MI, MVT::i16, 0),
                         MI->getOperand(MI->getNumOperands() - 1).getImm(),
                         ShuffleMask);
     break;
 
-  case X86::PUNPCKHBWrr:
-  case X86::VPUNPCKHBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHBWrm:
-  case X86::VPUNPCKHBWrm:
+  case X86::MMX_PSHUFWri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v16i8, ShuffleMask);
-    break;
-  case X86::VPUNPCKHBWYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::VPUNPCKHBWYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
+  case X86::MMX_PSHUFWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v32i8, ShuffleMask);
-    break;
-  case X86::PUNPCKHWDrr:
-  case X86::VPUNPCKHWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHWDrm:
-  case X86::VPUNPCKHWDrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v8i16, ShuffleMask);
-    break;
-  case X86::VPUNPCKHWDYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKHWDYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v16i16, ShuffleMask);
-    break;
-  case X86::PUNPCKHDQrr:
-  case X86::VPUNPCKHDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHDQrm:
-  case X86::VPUNPCKHDQrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v4i32, ShuffleMask);
-    break;
-  case X86::VPUNPCKHDQYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKHDQYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v8i32, ShuffleMask);
-    break;
-  case X86::VPUNPCKHDQZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKHDQZrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v16i32, ShuffleMask);
-    break;
-  case X86::PUNPCKHQDQrr:
-  case X86::VPUNPCKHQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKHQDQrm:
-  case X86::VPUNPCKHQDQrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v2i64, ShuffleMask);
-    break;
-  case X86::VPUNPCKHQDQYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKHQDQYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v4i64, ShuffleMask);
-    break;
-  case X86::VPUNPCKHQDQZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKHQDQZrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKHMask(MVT::v8i64, ShuffleMask);
-    break;
-
-  case X86::PUNPCKLBWrr:
-  case X86::VPUNPCKLBWrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLBWrm:
-  case X86::VPUNPCKLBWrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v16i8, ShuffleMask);
-    break;
-  case X86::VPUNPCKLBWYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLBWYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v32i8, ShuffleMask);
-    break;
-  case X86::PUNPCKLWDrr:
-  case X86::VPUNPCKLWDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLWDrm:
-  case X86::VPUNPCKLWDrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v8i16, ShuffleMask);
-    break;
-  case X86::VPUNPCKLWDYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLWDYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v16i16, ShuffleMask);
-    break;
-  case X86::PUNPCKLDQrr:
-  case X86::VPUNPCKLDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLDQrm:
-  case X86::VPUNPCKLDQrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v4i32, ShuffleMask);
-    break;
-  case X86::VPUNPCKLDQYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLDQYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v8i32, ShuffleMask);
-    break;
-  case X86::VPUNPCKLDQZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLDQZrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v16i32, ShuffleMask);
-    break;
-  case X86::PUNPCKLQDQrr:
-  case X86::VPUNPCKLQDQrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::PUNPCKLQDQrm:
-  case X86::VPUNPCKLQDQrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v2i64, ShuffleMask);
-    break;
-  case X86::VPUNPCKLQDQYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLQDQYrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v4i64, ShuffleMask);
-    break;
-  case X86::VPUNPCKLQDQZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VPUNPCKLQDQZrm:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    DecodeUNPCKLMask(MVT::v8i64, ShuffleMask);
-    break;
-
-  case X86::SHUFPDrri:
-  case X86::VSHUFPDrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::SHUFPDrmi:
-  case X86::VSHUFPDrmi:
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeSHUFPMask(MVT::v2f64,
+      DecodePSHUFMask(MVT::v4i16,
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::VSHUFPDYrri:
+
+  case X86::PSWAPDrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSWAPDrm:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSWAPMask(MVT::v2i32, ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHBW, r)
+  case X86::MMX_PUNPCKHBWirr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::VSHUFPDYrmi:
+  CASE_UNPCK(PUNPCKHBW, m)
+  case X86::MMX_PUNPCKHBWirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHWD, r)
+  case X86::MMX_PUNPCKHWDirr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKHWD, m)
+  case X86::MMX_PUNPCKHWDirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHDQ, r)
+  case X86::MMX_PUNPCKHDQirr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKHDQ, m)
+  case X86::MMX_PUNPCKHDQirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKHQDQ, r)
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKHQDQ, m)
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLBW, r)
+  case X86::MMX_PUNPCKLBWirr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKLBW, m)
+  case X86::MMX_PUNPCKLBWirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i8, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLWD, r)
+  case X86::MMX_PUNPCKLWDirr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKLWD, m)
+  case X86::MMX_PUNPCKLWDirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i16, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLDQ, r)
+  case X86::MMX_PUNPCKLDQirr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKLDQ, m)
+  case X86::MMX_PUNPCKLDQirm:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i32, 0), ShuffleMask);
+    break;
+
+  CASE_UNPCK(PUNPCKLQDQ, r)
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_UNPCK(PUNPCKLQDQ, m)
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::i64, 0), ShuffleMask);
+    break;
+
+  CASE_SHUF(SHUFPD, r)
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  CASE_SHUF(SHUFPD, m)
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeSHUFPMask(MVT::v4f64,
+      DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
-  case X86::SHUFPSrri:
-  case X86::VSHUFPSrri:
+  CASE_SHUF(SHUFPS, r)
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::SHUFPSrmi:
-  case X86::VSHUFPSrmi:
+  CASE_SHUF(SHUFPS, m)
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeSHUFPMask(MVT::v4f32,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VSHUFPSYrri:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VSHUFPSYrmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodeSHUFPMask(MVT::v8f32,
+      DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
 
-  case X86::UNPCKLPDrr:
-  case X86::VUNPCKLPDrr:
+  CASE_VSHUF(64X2, r)
+  CASE_VSHUF(64X2, m)
+  CASE_VSHUF(32X4, r)
+  CASE_VSHUF(32X4, m) {
+    MVT VT;
+    bool HasMemOp;
+    unsigned NumOp = MI->getNumOperands();
+    getVSHUF64x2FamilyInfo(MI, VT, HasMemOp);
+    decodeVSHUF64x2FamilyMask(VT, MI->getOperand(NumOp - 1).getImm(),
+                              ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (HasMemOp) {
+      assert((NumOp >= 8) && "Expected at least 8 operands!");
+      Src1Name = getRegName(MI->getOperand(NumOp - 7).getReg());
+    } else {
+      assert((NumOp >= 4) && "Expected at least 4 operands!");
+      Src2Name = getRegName(MI->getOperand(NumOp - 2).getReg());
+      Src1Name = getRegName(MI->getOperand(NumOp - 3).getReg());
+    }
+    break;
+  }
+
+  CASE_UNPCK(UNPCKLPD, r)
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::UNPCKLPDrm:
-  case X86::VUNPCKLPDrm:
-    DecodeUNPCKLMask(MVT::v2f64, ShuffleMask);
+  CASE_UNPCK(UNPCKLPD, m)
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::VUNPCKLPDYrr:
+
+  CASE_UNPCK(UNPCKLPS, r)
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::VUNPCKLPDYrm:
-    DecodeUNPCKLMask(MVT::v4f64, ShuffleMask);
+  CASE_UNPCK(UNPCKLPS, m)
+    DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::VUNPCKLPDZrr:
+
+  CASE_UNPCK(UNPCKHPD, r)
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::VUNPCKLPDZrm:
-    DecodeUNPCKLMask(MVT::v8f64, ShuffleMask);
+  CASE_UNPCK(UNPCKHPD, m)
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::UNPCKLPSrr:
-  case X86::VUNPCKLPSrr:
+
+  CASE_UNPCK(UNPCKHPS, r)
     Src2Name = getRegName(MI->getOperand(2).getReg());
     // FALL THROUGH.
-  case X86::UNPCKLPSrm:
-  case X86::VUNPCKLPSrm:
-    DecodeUNPCKLMask(MVT::v4f32, ShuffleMask);
+  CASE_UNPCK(UNPCKHPS, m)
+    DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::VUNPCKLPSYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKLPSYrm:
-    DecodeUNPCKLMask(MVT::v8f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VUNPCKLPSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKLPSZrm:
-    DecodeUNPCKLMask(MVT::v16f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::UNPCKHPDrr:
-  case X86::VUNPCKHPDrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPDrm:
-  case X86::VUNPCKHPDrm:
-    DecodeUNPCKHMask(MVT::v2f64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VUNPCKHPDYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKHPDYrm:
-    DecodeUNPCKHMask(MVT::v4f64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VUNPCKHPDZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKHPDZrm:
-    DecodeUNPCKHMask(MVT::v8f64, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::UNPCKHPSrr:
-  case X86::VUNPCKHPSrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::UNPCKHPSrm:
-  case X86::VUNPCKHPSrm:
-    DecodeUNPCKHMask(MVT::v4f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VUNPCKHPSYrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKHPSYrm:
-    DecodeUNPCKHMask(MVT::v8f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VUNPCKHPSZrr:
-    Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
-  case X86::VUNPCKHPSZrm:
-    DecodeUNPCKHMask(MVT::v16f32, ShuffleMask);
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VPERMILPSri:
+
+  CASE_VPERM(PERMILPS, r)
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
-  case X86::VPERMILPSmi:
+  CASE_VPERM(PERMILPS, m)
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v4f32,
+      DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
-  case X86::VPERMILPSYri:
+
+  CASE_VPERM(PERMILPD, r)
     Src1Name = getRegName(MI->getOperand(1).getReg());
     // FALL THROUGH.
-  case X86::VPERMILPSYmi:
+  CASE_VPERM(PERMILPD, m)
     if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v8f32,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VPERMILPDri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::VPERMILPDmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v2f64,
-                      MI->getOperand(MI->getNumOperands() - 1).getImm(),
-                      ShuffleMask);
-    DestName = getRegName(MI->getOperand(0).getReg());
-    break;
-  case X86::VPERMILPDYri:
-    Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
-  case X86::VPERMILPDYmi:
-    if (MI->getOperand(MI->getNumOperands() - 1).isImm())
-      DecodePSHUFMask(MVT::v4f64,
+      DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
                       MI->getOperand(MI->getNumOperands() - 1).getImm(),
                       ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+
   case X86::VPERM2F128rr:
   case X86::VPERM2I128rr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -824,6 +636,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     Src1Name = getRegName(MI->getOperand(1).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+
   case X86::VPERMQYri:
   case X86::VPERMPDYri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
@@ -846,6 +659,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
     DecodeScalarMoveMask(MVT::v2f64, nullptr == Src2Name, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+
   case X86::MOVSSrr:
   case X86::VMOVSSrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
@@ -861,6 +675,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MOVZPQILo2PQIrr:
   case X86::VMOVPQI2QIrr:
   case X86::VMOVZPQILo2PQIrr:
+  case X86::VMOVZPQILo2PQIZrr:
     Src1Name = getRegName(MI->getOperand(1).getReg());
   // FALL THROUGH.
   case X86::MOVQI2PQIrm:
@@ -869,9 +684,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VMOVQI2PQIrm:
   case X86::VMOVZQI2PQIrm:
   case X86::VMOVZPQILo2PQIrm:
+  case X86::VMOVZPQILo2PQIZrm:
     DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+
   case X86::MOVDI2PDIrm:
   case X86::VMOVDI2PDIrm:
     DecodeZeroMoveLowMask(MVT::v4i32, ShuffleMask);
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6e371da37290..20cd7ffb2e63 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -19,8 +19,6 @@
 
 namespace llvm {
 
-class MCOperand;
-
 class X86IntelInstPrinter final : public MCInstPrinter {
 public:
   X86IntelInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 629802f5dc5e..133bd0e1772a 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -69,15 +69,19 @@ public:
 class X86AsmBackend : public MCAsmBackend {
   const StringRef CPU;
   bool HasNopl;
-  const uint64_t MaxNopLength;
+  uint64_t MaxNopLength;
 public:
-  X86AsmBackend(const Target &T, StringRef CPU)
-      : MCAsmBackend(), CPU(CPU), MaxNopLength(CPU == "slm" ? 7 : 15) {
+  X86AsmBackend(const Target &T, StringRef CPU) : MCAsmBackend(), CPU(CPU) {
     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
               CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
               CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
               CPU != "c3" && CPU != "c3-2";
+    // Max length of true long nop instruction is 15 bytes.
+    // Max length of long nop replacement instruction is 7 bytes.
+    // Taking into account SilverMont architecture features max length of nops
+    // is reduced for it to achieve better performance.
+    MaxNopLength = (!HasNopl || CPU == "slm") ? 7 : 15;
   }
 
   unsigned getNumFixupKinds() const override {
@@ -200,6 +204,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
   case X86::ADD64ri8: return X86::ADD64ri32;
   case X86::ADD64mi8: return X86::ADD64mi32;
 
+   // ADC
+  case X86::ADC16ri8: return X86::ADC16ri;
+  case X86::ADC16mi8: return X86::ADC16mi;
+  case X86::ADC32ri8: return X86::ADC32ri;
+  case X86::ADC32mi8: return X86::ADC32mi;
+  case X86::ADC64ri8: return X86::ADC64ri32;
+  case X86::ADC64mi8: return X86::ADC64mi32;
+
     // SUB
   case X86::SUB16ri8: return X86::SUB16ri;
   case X86::SUB16mi8: return X86::SUB16mi;
@@ -208,6 +220,14 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
   case X86::SUB64ri8: return X86::SUB64ri32;
   case X86::SUB64mi8: return X86::SUB64mi32;
 
+   // SBB
+  case X86::SBB16ri8: return X86::SBB16ri;
+  case X86::SBB16mi8: return X86::SBB16mi;
+  case X86::SBB32ri8: return X86::SBB32ri;
+  case X86::SBB32mi8: return X86::SBB32mi;
+  case X86::SBB64ri8: return X86::SBB64ri32;
+  case X86::SBB64mi8: return X86::SBB64mi32;
+
     // CMP
   case X86::CMP16ri8: return X86::CMP16ri;
   case X86::CMP16mi8: return X86::CMP16mi;
@@ -279,7 +299,7 @@ void X86AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
 /// bytes.
 /// \return - true on success, false on failure
 bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
-  static const uint8_t Nops[10][10] = {
+  static const uint8_t TrueNops[10][10] = {
     // nop
     {0x90},
     // xchg %ax,%ax
@@ -302,17 +322,31 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
     {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
   };
 
-  // This CPU doesn't support long nops. If needed add more.
-  // FIXME: Can we get this from the subtarget somehow?
-  // FIXME: We could generated something better than plain 0x90.
-  if (!HasNopl) {
-    for (uint64_t i = 0; i < Count; ++i)
-      OW->write8(0x90);
-    return true;
-  }
+  // Alternative nop instructions for CPUs which don't support long nops.
+  static const uint8_t AltNops[7][10] = {
+      // nop
+      {0x90},
+      // xchg %ax,%ax
+      {0x66, 0x90},
+      // lea 0x0(%esi),%esi
+      {0x8d, 0x76, 0x00},
+      // lea 0x0(%esi),%esi
+      {0x8d, 0x74, 0x26, 0x00},
+      // nop + lea 0x0(%esi),%esi
+      {0x90, 0x8d, 0x74, 0x26, 0x00},
+      // lea 0x0(%esi),%esi
+      {0x8d, 0xb6, 0x00, 0x00, 0x00, 0x00 },
+      // lea 0x0(%esi),%esi
+      {0x8d, 0xb4, 0x26, 0x00, 0x00, 0x00, 0x00},
+  };
 
-  // 15 is the longest single nop instruction.  Emit as many 15-byte nops as
-  // needed, then emit a nop of the remaining length.
+  // Select the right NOP table.
+  // FIXME: Can we get if CPU supports long nops from the subtarget somehow?
+  const uint8_t (*Nops)[10] = HasNopl ? TrueNops : AltNops;
+  assert(HasNopl || MaxNopLength <= 7);
+
+  // Emit as many largest nops as needed, then emit a nop of the remaining
+  // length.
   do {
     const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);
     const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;
@@ -359,6 +393,17 @@ public:
   }
 };
 
+class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
+      : ELFX86AsmBackend(T, OSABI, CPU) {}
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
+    return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI,
+                                    ELF::EM_IAMCU);
+  }
+};
+
 class ELFX86_64AsmBackend : public ELFX86AsmBackend {
 public:
   ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU)
@@ -610,13 +655,13 @@ private:
   /// \brief Get the compact unwind number for a given register. The number
   /// corresponds to the enum lists in compact_unwind_encoding.h.
   int getCompactUnwindRegNum(unsigned Reg) const {
-    static const uint16_t CU32BitRegs[7] = {
+    static const MCPhysReg CU32BitRegs[7] = {
       X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0
     };
-    static const uint16_t CU64BitRegs[] = {
+    static const MCPhysReg CU64BitRegs[] = {
       X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
     };
-    const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
+    const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;
     for (int Idx = 1; *CURegs; ++CURegs, ++Idx)
       if (*CURegs == Reg)
         return Idx;
@@ -780,6 +825,10 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
     return new WindowsX86AsmBackend(T, false, CPU);
 
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
+
+  if (TheTriple.isOSIAMCU())
+    return new ELFX86_IAMCUAsmBackend(T, OSABI, CPU);
+
   return new ELFX86_32AsmBackend(T, OSABI, CPU);
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index f0d00b0c1bc3..9ff85b9154f8 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -41,6 +41,16 @@ namespace X86 {
     /// AddrNumOperands - Total number of operands in a memory reference.
     AddrNumOperands = 5
   };
+
+  /// AVX512 static rounding constants.  These need to match the values in
+  /// avx512fintrin.h.
+  enum STATIC_ROUNDING {
+    TO_NEAREST_INT = 0,
+    TO_NEG_INF = 1,
+    TO_POS_INF = 2,
+    TO_ZERO = 3,
+    CUR_DIRECTION = 4
+  };
 } // end namespace X86;
 
 /// X86II - This namespace holds all of the target specific flags that
@@ -675,7 +685,7 @@ namespace X86II {
     case X86II::RawFrmSrc:
     case X86II::RawFrmDst:
     case X86II::RawFrmDstSrc:
-       return -1;
+      return -1;
     case X86II::MRMDestMem:
       return 0;
     case X86II::MRMSrcMem:
@@ -696,23 +706,27 @@ namespace X86II {
       // Start from 0, skip registers encoded in VEX_VVVV or a mask register.
       return 0 + HasVEX_4V + HasEVEX_K;
     case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
-    case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C8:
+    case X86II::MRM_C3: case X86II::MRM_C4: case X86II::MRM_C5:
+    case X86II::MRM_C6: case X86II::MRM_C7: case X86II::MRM_C8:
     case X86II::MRM_C9: case X86II::MRM_CA: case X86II::MRM_CB:
+    case X86II::MRM_CC: case X86II::MRM_CD: case X86II::MRM_CE:
     case X86II::MRM_CF: case X86II::MRM_D0: case X86II::MRM_D1:
-    case X86II::MRM_D4: case X86II::MRM_D5: case X86II::MRM_D6:
-    case X86II::MRM_D7: case X86II::MRM_D8: case X86II::MRM_D9:
-    case X86II::MRM_DA: case X86II::MRM_DB: case X86II::MRM_DC:
-    case X86II::MRM_DD: case X86II::MRM_DE: case X86II::MRM_DF:
-    case X86II::MRM_E0: case X86II::MRM_E1: case X86II::MRM_E2:
-    case X86II::MRM_E3: case X86II::MRM_E4: case X86II::MRM_E5:
-    case X86II::MRM_E8: case X86II::MRM_E9: case X86II::MRM_EA:
-    case X86II::MRM_EB: case X86II::MRM_EC: case X86II::MRM_ED:
-    case X86II::MRM_EE: case X86II::MRM_F0: case X86II::MRM_F1:
-    case X86II::MRM_F2: case X86II::MRM_F3: case X86II::MRM_F4:
-    case X86II::MRM_F5: case X86II::MRM_F6: case X86II::MRM_F7:
-    case X86II::MRM_F8: case X86II::MRM_F9: case X86II::MRM_FA:
-    case X86II::MRM_FB: case X86II::MRM_FC: case X86II::MRM_FD:
-    case X86II::MRM_FE: case X86II::MRM_FF:
+    case X86II::MRM_D2: case X86II::MRM_D3: case X86II::MRM_D4:
+    case X86II::MRM_D5: case X86II::MRM_D6: case X86II::MRM_D7:
+    case X86II::MRM_D8: case X86II::MRM_D9: case X86II::MRM_DA:
+    case X86II::MRM_DB: case X86II::MRM_DC: case X86II::MRM_DD:
+    case X86II::MRM_DE: case X86II::MRM_DF: case X86II::MRM_E0:
+    case X86II::MRM_E1: case X86II::MRM_E2: case X86II::MRM_E3:
+    case X86II::MRM_E4: case X86II::MRM_E5: case X86II::MRM_E6:
+    case X86II::MRM_E7: case X86II::MRM_E8: case X86II::MRM_E9:
+    case X86II::MRM_EA: case X86II::MRM_EB: case X86II::MRM_EC:
+    case X86II::MRM_ED: case X86II::MRM_EE: case X86II::MRM_EF:
+    case X86II::MRM_F0: case X86II::MRM_F1: case X86II::MRM_F2:
+    case X86II::MRM_F3: case X86II::MRM_F4: case X86II::MRM_F5:
+    case X86II::MRM_F6: case X86II::MRM_F7: case X86II::MRM_F8:
+    case X86II::MRM_F9: case X86II::MRM_FA: case X86II::MRM_FB:
+    case X86II::MRM_FC: case X86II::MRM_FD: case X86II::MRM_FE:
+    case X86II::MRM_FF:
       return -1;
     }
   }
@@ -740,7 +754,7 @@ namespace X86II {
     case X86::R12B:  case X86::R13B:  case X86::R14B:  case X86::R15B:
     case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
     case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
-        return true;
+      return true;
     }
     return false;
   }
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index a33468dc4769..736c39dfb6f1 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -32,9 +32,11 @@ namespace {
 
 X86ELFObjectWriter::X86ELFObjectWriter(bool IsELF64, uint8_t OSABI,
                                        uint16_t EMachine)
-  : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
-                            // Only i386 uses Rel instead of RelA.
-                            /*HasRelocationAddend*/ EMachine != ELF::EM_386) {}
+    : MCELFObjectTargetWriter(IsELF64, OSABI, EMachine,
+                              // Only i386 and IAMCU use Rel instead of RelA.
+                              /*HasRelocationAddend*/
+                              (EMachine != ELF::EM_386) &&
+                                  (EMachine != ELF::EM_IAMCU)) {}
 
 X86ELFObjectWriter::~X86ELFObjectWriter()
 {}
@@ -246,7 +248,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
   if (getEMachine() == ELF::EM_X86_64)
     return getRelocType64(Modifier, Type, IsPCRel);
 
-  assert(getEMachine() == ELF::EM_386 && "Unsupported ELF machine type.");
+  assert((getEMachine() == ELF::EM_386 || getEMachine() == ELF::EM_IAMCU) &&
+         "Unsupported ELF machine type.");
   return getRelocType32(Modifier, getType32(Type), IsPCRel);
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index deaad2a5b8e8..30d5c802d1ed 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
@@ -20,39 +20,42 @@
 #include "llvm/MC/MCAsmInfoELF.h"
 
 namespace llvm {
-  class Triple;
+class Triple;
 
-  class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
-    virtual void anchor();
+class X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+  virtual void anchor();
 
-  public:
-    explicit X86MCAsmInfoDarwin(const Triple &Triple);
-  };
+public:
+  explicit X86MCAsmInfoDarwin(const Triple &Triple);
+};
 
-  struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
-    explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
-    const MCExpr *
-    getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
-                                MCStreamer &Streamer) const override;
-  };
+struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+  explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+  const MCExpr *
+  getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
+                              MCStreamer &Streamer) const override;
+};
 
-  class X86ELFMCAsmInfo : public MCAsmInfoELF {
-    void anchor() override;
-  public:
-    explicit X86ELFMCAsmInfo(const Triple &Triple);
-  };
+class X86ELFMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
 
-  class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
-    void anchor() override;
-  public:
-    explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
-  };
+public:
+  explicit X86ELFMCAsmInfo(const Triple &Triple);
+};
 
-  class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
-    void anchor() override;
-  public:
-    explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
-  };
+class X86MCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
+  void anchor() override;
+
+public:
+  explicit X86MCAsmInfoMicrosoft(const Triple &Triple);
+};
+
+class X86MCAsmInfoGNUCOFF : public MCAsmInfoGNUCOFF {
+  void anchor() override;
+
+public:
+  explicit X86MCAsmInfoGNUCOFF(const Triple &Triple);
+};
 } // namespace llvm
 
 #endif
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 10c434c8b1b4..dfab6ec10775 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -510,8 +510,8 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
     // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
     EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
-    EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
-                  Fixups);
+    EmitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
+                  CurByte, OS, Fixups);
     return;
   }
 
@@ -988,6 +988,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
                                    const MCInstrDesc &Desc) {
   unsigned REX = 0;
+  bool UsesHighByteReg = false;
+
   if (TSFlags & X86II::REX_W)
     REX |= 1 << 3; // set REX.W
 
@@ -1004,6 +1006,8 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     const MCOperand &MO = MI.getOperand(i);
     if (!MO.isReg()) continue;
     unsigned Reg = MO.getReg();
+    if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+      UsesHighByteReg = true;
     if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
     // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
     // that returns non-zero.
@@ -1073,6 +1077,9 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     }
     break;
   }
+  if (REX && UsesHighByteReg)
+    report_fatal_error("Cannot encode high byte register in REX-prefixed instruction");
+
   return REX;
 }
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 83b4091d7665..53a6550acdd5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -122,7 +122,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
   } else if (TheTriple.isOSBinFormatELF()) {
     // Force the use of an ELF container.
     MAI = new X86ELFMCAsmInfo(TheTriple);
-  } else if (TheTriple.isWindowsMSVCEnvironment()) {
+  } else if (TheTriple.isWindowsMSVCEnvironment() ||
+             TheTriple.isWindowsCoreCLREnvironment()) {
     MAI = new X86MCAsmInfoMicrosoft(TheTriple);
   } else if (TheTriple.isOSCygMing() ||
              TheTriple.isWindowsItaniumEnvironment()) {
@@ -267,3 +268,184 @@ extern "C" void LLVMInitializeX86TargetMC() {
   TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
                                        createX86_64AsmBackend);
 }
+
+unsigned llvm::getX86SubSuperRegisterOrZero(unsigned Reg, unsigned Size,
+                                            bool High) {
+  switch (Size) {
+  default: return 0;
+  case 8:
+    if (High) {
+      switch (Reg) {
+      default: return getX86SubSuperRegisterOrZero(Reg, 64);
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SI;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DI;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BP;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SP;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case 16:
+    switch (Reg) {
+    default: return 0;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case 32:
+    switch (Reg) {
+    default: return 0;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case 64:
+    switch (Reg) {
+    default: return 0;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+}
+
+unsigned llvm::getX86SubSuperRegister(unsigned Reg, unsigned Size, bool High) {
+  unsigned Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
+  assert(Res != 0 && "Unexpected register or VT");
+  return Res;
+}
+
+
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 6221baba1793..2d2836ff07c5 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -79,7 +79,7 @@ MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
 /// Takes ownership of \p AB and \p CE.
 MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
                                      raw_pwrite_stream &OS, MCCodeEmitter *CE,
-                                     bool RelaxAll);
+                                     bool RelaxAll, bool IncrementalLinkerCompatible);
 
 /// Construct an X86 Mach-O object writer.
 MCObjectWriter *createX86MachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
@@ -98,6 +98,17 @@ MCRelocationInfo *createX86_64MachORelocationInfo(MCContext &Ctx);
 
 /// Construct X86-64 ELF relocation info.
 MCRelocationInfo *createX86_64ELFRelocationInfo(MCContext &Ctx);
+
+/// Returns the sub or super register of a specific X86 register.
+/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX.
+/// Aborts on error.
+unsigned getX86SubSuperRegister(unsigned, unsigned, bool High=false);
+
+/// Returns the sub or super register of a specific X86 register.
+/// Like getX86SubSuperRegister() but returns 0 on error.
+unsigned getX86SubSuperRegisterOrZero(unsigned, unsigned,
+                                      bool High = false);
+
 } // End llvm namespace
 
 
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 9e801fc8f191..191ebeac7265 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation(
 
     // Neither symbol can be modified.
     if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-      report_fatal_error("unsupported relocation of modified symbol", false);
+        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) {
+      Asm.getContext().reportError(Fixup.getLoc(),
+                                   "unsupported relocation of modified symbol");
+      return;
+    }
 
     // We don't support PCrel relocations of differences. Darwin 'as' doesn't
     // implement most of these correctly.
-    if (IsPCRel)
-      report_fatal_error("unsupported pc-relative relocation of difference",
-                         false);
+    if (IsPCRel) {
+      Asm.getContext().reportError(
+          Fixup.getLoc(), "unsupported pc-relative relocation of difference");
+      return;
+    }
 
     // The support for the situation where one or both of the symbols would
     // require a local relocation is handled just like if the symbols were
@@ -168,16 +173,20 @@ void X86MachObjectWriter::RecordX86_64Relocation(
     // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
     // single SIGNED relocation); reject it for now.  Except the case where both
     // symbols don't have a base, equal but both NULL.
-    if (A_Base == B_Base && A_Base)
-      report_fatal_error("unsupported relocation with identical base", false);
+    if (A_Base == B_Base && A_Base) {
+      Asm.getContext().reportError(
+          Fixup.getLoc(), "unsupported relocation with identical base");
+      return;
+    }
 
     // A subtraction expression where either symbol is undefined is a
     // non-relocatable expression.
     if (A->isUndefined() || B->isUndefined()) {
       StringRef Name = A->isUndefined() ? A->getName() : B->getName();
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+      Asm.getContext().reportError(Fixup.getLoc(),
         "unsupported relocation with subtraction expression, symbol '" +
         Name + "' can not be undefined in a subtraction expression");
+      return;
     }
 
     Value += Writer->getSymbolAddress(*A, Layout) -
@@ -244,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation(
         FixedValue = Res;
         return;
       } else {
-        report_fatal_error("unsupported relocation of variable '" +
-                           Symbol->getName() + "'", false);
+        Asm.getContext().reportError(Fixup.getLoc(),
+                                     "unsupported relocation of variable '" +
+                                         Symbol->getName() + "'");
+        return;
       }
     } else {
-      report_fatal_error("unsupported relocation of undefined symbol '" +
-                         Symbol->getName() + "'", false);
+      Asm.getContext().reportError(
+          Fixup.getLoc(), "unsupported relocation of undefined symbol '" +
+                              Symbol->getName() + "'");
+      return;
     }
 
     MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -266,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(
         }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
           Type = MachO::X86_64_RELOC_TLV;
         }  else if (Modifier != MCSymbolRefExpr::VK_None) {
-          report_fatal_error("unsupported symbol modifier in relocation",
-                             false);
+          Asm.getContext().reportError(
+              Fixup.getLoc(), "unsupported symbol modifier in relocation");
+          return;
         } else {
           Type = MachO::X86_64_RELOC_SIGNED;
 
@@ -292,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation(
           }
         }
       } else {
-        if (Modifier != MCSymbolRefExpr::VK_None)
-          report_fatal_error("unsupported symbol modifier in branch "
-                             "relocation", false);
+        if (Modifier != MCSymbolRefExpr::VK_None) {
+          Asm.getContext().reportError(
+              Fixup.getLoc(),
+              "unsupported symbol modifier in branch relocation");
+          return;
+        }
 
         Type = MachO::X86_64_RELOC_BRANCH;
       }
@@ -309,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation(
         Type = MachO::X86_64_RELOC_GOT;
         IsPCRel = 1;
       } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-        report_fatal_error("TLVP symbol modifier should have been rip-rel",
-                           false);
-      } else if (Modifier != MCSymbolRefExpr::VK_None)
-        report_fatal_error("unsupported symbol modifier in relocation", false);
-      else {
+        Asm.getContext().reportError(
+            Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel");
+        return;
+      } else if (Modifier != MCSymbolRefExpr::VK_None) {
+        Asm.getContext().reportError(
+            Fixup.getLoc(), "unsupported symbol modifier in relocation");
+        return;
+      } else {
         Type = MachO::X86_64_RELOC_UNSIGNED;
         unsigned Kind = Fixup.getKind();
-        if (Kind == X86::reloc_signed_4byte)
-          report_fatal_error("32-bit absolute addressing is not supported in "
-                             "64-bit mode", false);
+        if (Kind == X86::reloc_signed_4byte) {
+          Asm.getContext().reportError(
+              Fixup.getLoc(),
+              "32-bit absolute addressing is not supported in 64-bit mode");
+          return;
+        }
       }
     }
   }
@@ -350,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
   // See <reloc.h>.
   const MCSymbol *A = &Target.getSymA()->getSymbol();
 
-  if (!A->getFragment())
-    report_fatal_error("symbol '" + A->getName() +
-                       "' can not be undefined in a subtraction expression",
-                       false);
+  if (!A->getFragment()) {
+    Asm.getContext().reportError(
+        Fixup.getLoc(),
+        "symbol '" + A->getName() +
+            "' can not be undefined in a subtraction expression");
+    return false;
+  }
 
   uint32_t Value = Writer->getSymbolAddress(*A, Layout);
   uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent());
@@ -363,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
   if (const MCSymbolRefExpr *B = Target.getSymB()) {
     const MCSymbol *SB = &B->getSymbol();
 
-    if (!SB->getFragment())
-      report_fatal_error("symbol '" + B->getSymbol().getName() +
-                         "' can not be undefined in a subtraction expression",
-                         false);
+    if (!SB->getFragment()) {
+      Asm.getContext().reportError(
+          Fixup.getLoc(),
+          "symbol '" + B->getSymbol().getName() +
+              "' can not be undefined in a subtraction expression");
+      return false;
+    }
 
     // Select the appropriate difference relocation type.
     //
@@ -387,12 +416,12 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer,
     if (FixupOffset > 0xffffff) {
       char Buffer[32];
       format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
-      Asm.getContext().reportFatalError(Fixup.getLoc(),
+      Asm.getContext().reportError(Fixup.getLoc(),
                          Twine("Section too large, can't encode "
                                 "r_address (") + Buffer +
                          ") into 24 bits of scattered "
                          "relocation entry.");
-      llvm_unreachable("fatal error returned?!");
+      return false;
     }
 
     MachO::any_relocation_info MRE;
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
index 92f42b68ae51..d04511873b46 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -50,9 +50,11 @@ void X86WinCOFFStreamer::FinishImpl() {
 
 MCStreamer *llvm::createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
                                            raw_pwrite_stream &OS,
-                                           MCCodeEmitter *CE, bool RelaxAll) {
+                                           MCCodeEmitter *CE, bool RelaxAll,
+                                           bool IncrementalLinkerCompatible) {
   X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
   S->getAssembler().setRelaxAll(RelaxAll);
+  S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible);
   return S;
 }
 
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index cae865a40819..4fdd527d87c8 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -140,13 +140,14 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm,
   }
 }
 
-/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
+/// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
 /// VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
 void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
 
   unsigned NumLanes = VT.getSizeInBits() / 128;
+  if (NumLanes == 0) NumLanes = 1;  // Handle MMX
   unsigned NumLaneElts = NumElts / NumLanes;
 
   unsigned NewImm = Imm;
@@ -191,6 +192,16 @@ void DecodePSHUFLWMask(MVT VT, unsigned Imm,
   }
 }
 
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumHalfElts = NumElts / 2;
+
+  for (unsigned l = 0; l != NumHalfElts; ++l)
+    ShuffleMask.push_back(l + NumHalfElts);
+  for (unsigned h = 0; h != NumHalfElts; ++h)
+    ShuffleMask.push_back(h);
+}
+
 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
 /// the type of the vector allowing it to handle different datatypes and vector
 /// widths.
@@ -222,7 +233,7 @@ void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
   // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
   // independently on 128-bit lanes.
   unsigned NumLanes = VT.getSizeInBits() / 128;
-  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
+  if (NumLanes == 0) NumLanes = 1;  // Handle MMX
   unsigned NumLaneElts = NumElts / NumLanes;
 
   for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
@@ -253,6 +264,26 @@ void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2)
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+                        SmallVectorImpl<int> &ShuffleMask) {
+  unsigned NumLanes = VT.getSizeInBits() / 128;
+  unsigned NumElementsInLane = 128 / VT.getScalarSizeInBits();
+  unsigned ControlBitsMask = NumLanes - 1;
+  unsigned NumControlBits  = NumLanes / 2;
+
+  for (unsigned l = 0; l != NumLanes; ++l) {
+    unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
+    // We actually need the other source.
+    if (l >= NumLanes / 2)
+      LaneMask += NumLanes;
+    for (unsigned i = 0; i != NumElementsInLane; ++i)
+      ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
+  }
+}
+
 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
                           SmallVectorImpl<int> &ShuffleMask) {
   unsigned HalfSize = VT.getVectorNumElements() / 2;
@@ -277,10 +308,10 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
   //              i32 -2147483648, i32 -2147483648>
 
+#ifndef NDEBUG
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-
-  if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
-    return;
+  assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
+#endif
 
   // This is a straightforward byte vector.
   if (MaskTy->isVectorTy() && MaskTy->getVectorElementType()->isIntegerTy(8)) {
@@ -290,7 +321,7 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
     for (int i = 0; i < NumElements; ++i) {
       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
       // lane of the vector we're inside.
-      int Base = i < 16 ? 0 : 16;
+      int Base = i & ~0xf;
       Constant *COp = C->getAggregateElement(i);
       if (!COp) {
         ShuffleMask.clear();
@@ -357,44 +388,66 @@ void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
   }
 }
 
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+                        SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-  assert(MaskTy->isVectorTy() && "Expected a vector constant mask!");
-  assert(MaskTy->getVectorElementType()->isIntegerTy() &&
-         "Expected integer constant mask elements!");
-  int ElementBits = MaskTy->getScalarSizeInBits();
-  int NumElements = MaskTy->getVectorNumElements();
+  // It is not an error for the PSHUFB mask to not be a vector of i8 because the
+  // constant pool uniques constants by their bit representation.
+  // e.g. the following take up the same space in the constant pool:
+  //   i128 -170141183420855150465331762880109871104
+  //
+  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
+  //
+  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
+  //              i32 -2147483648, i32 -2147483648>
+
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+
+  if (MaskTySize != 128 && MaskTySize != 256) // FIXME: Add support for AVX-512.
+    return;
+
+  // Only support vector types.
+  if (!MaskTy->isVectorTy())
+    return;
+
+  // Make sure its an integer type.
+  Type *VecEltTy = MaskTy->getVectorElementType();
+  if (!VecEltTy->isIntegerTy())
+    return;
+
+  // Support any element type from byte up to element size.
+  // This is necesary primarily because 64-bit elements get split to 32-bit
+  // in the constant pool on 32-bit target.
+  unsigned EltTySize = VecEltTy->getIntegerBitWidth();
+  if (EltTySize < 8 || EltTySize > ElSize)
+    return;
+
+  unsigned NumElements = MaskTySize / ElSize;
   assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
          "Unexpected number of vector elements.");
   ShuffleMask.reserve(NumElements);
-  if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
-    assert((unsigned)NumElements == CDS->getNumElements() &&
-           "Constant mask has a different number of elements!");
+  unsigned NumElementsPerLane = 128 / ElSize;
+  unsigned Factor = ElSize / EltTySize;
 
-    for (int i = 0; i < NumElements; ++i) {
-      int Base = (i * ElementBits / 128) * (128 / ElementBits);
-      uint64_t Element = CDS->getElementAsInteger(i);
-      // Only the least significant 2 bits of the integer are used.
-      int Index = Base + (Element & 0x3);
-      ShuffleMask.push_back(Index);
-    }
-  } else if (auto *CV = dyn_cast<ConstantVector>(C)) {
-    assert((unsigned)NumElements == C->getNumOperands() &&
-           "Constant mask has a different number of elements!");
-
-    for (int i = 0; i < NumElements; ++i) {
-      int Base = (i * ElementBits / 128) * (128 / ElementBits);
-      Constant *COp = CV->getOperand(i);
-      if (isa<UndefValue>(COp)) {
-        ShuffleMask.push_back(SM_SentinelUndef);
-        continue;
-      }
-      uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
-      // Only the least significant 2 bits of the integer are used.
-      int Index = Base + (Element & 0x3);
-      ShuffleMask.push_back(Index);
+  for (unsigned i = 0; i < NumElements; ++i) {
+    Constant *COp = C->getAggregateElement(i * Factor);
+    if (!COp) {
+      ShuffleMask.clear();
+      return;
+    } else if (isa<UndefValue>(COp)) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
     }
+    int Index = i & ~(NumElementsPerLane - 1);
+    uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+    if (ElSize == 64)
+      Index += (Element >> 1) & 0x1;
+    else
+      Index += Element & 0x3;
+    ShuffleMask.push_back(Index);
   }
+
+  // TODO: Handle funny-looking vectors too.
 }
 
 void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT, SmallVectorImpl<int> &Mask) {
@@ -503,4 +556,74 @@ void DecodeINSERTQIMask(int Len, int Idx,
     ShuffleMask.push_back(SM_SentinelUndef);
 }
 
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  for (int i = 0, e = RawMask.size(); i < e; ++i) {
+    uint64_t M = RawMask[i];
+    ShuffleMask.push_back((int)M);
+  }
+}
+
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  for (int i = 0, e = RawMask.size(); i < e; ++i) {
+    uint64_t M = RawMask[i];
+    ShuffleMask.push_back((int)M);
+  }
+}
+
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  Type *MaskTy = C->getType();
+  if (MaskTy->isVectorTy()) {
+    unsigned NumElements = MaskTy->getVectorNumElements();
+    if (NumElements == VT.getVectorNumElements()) {
+      for (unsigned i = 0; i < NumElements; ++i) {
+        Constant *COp = C->getAggregateElement(i);
+        if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
+          ShuffleMask.clear();
+          return;
+        }
+        if (isa<UndefValue>(COp))
+          ShuffleMask.push_back(SM_SentinelUndef);
+        else {
+          uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+          Element &= (1 << NumElements) - 1;
+          ShuffleMask.push_back(Element);
+        }
+      }
+    }
+    return;
+  }
+  // Scalar value; just broadcast it
+  if (!isa<ConstantInt>(C))
+    return;
+  uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
+  int NumElements = VT.getVectorNumElements();
+  Element &= (1 << NumElements) - 1;
+  for (int i = 0; i < NumElements; ++i)
+    ShuffleMask.push_back(Element);
+}
+
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+                       SmallVectorImpl<int> &ShuffleMask) {
+  Type *MaskTy = C->getType();
+  unsigned NumElements = MaskTy->getVectorNumElements();
+  if (NumElements == VT.getVectorNumElements()) {
+    for (unsigned i = 0; i < NumElements; ++i) {
+      Constant *COp = C->getAggregateElement(i);
+      if (!COp) {
+        ShuffleMask.clear();
+        return;
+      }
+      if (isa<UndefValue>(COp))
+        ShuffleMask.push_back(SM_SentinelUndef);
+      else {
+        uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+        Element &= (1 << NumElements*2) - 1;
+        ShuffleMask.push_back(Element);
+      }
+    }
+  }
+}
 } // llvm namespace
diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h
index 3d10d18e860e..ab18e6438ec9 100644
--- a/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -54,6 +54,9 @@ void DecodePSHUFHWMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 void DecodePSHUFLWMask(MVT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
+/// \brief Decodes a PSWAPD 3DNow! instruction.
+void DecodePSWAPMask(MVT VT, SmallVectorImpl<int> &ShuffleMask);
+
 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
 /// the type of the vector allowing it to handle different datatypes and vector
 /// widths.
@@ -83,12 +86,18 @@ void DecodeBLENDMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
                           SmallVectorImpl<int> &ShuffleMask);
 
+/// \brief Decode a shuffle packed values at 128-bit granularity
+/// immediate mask into a shuffle mask.
+void decodeVSHUF64x2FamilyMask(MVT VT, unsigned Imm,
+                               SmallVectorImpl<int> &ShuffleMask);
+
 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
 /// No VT provided since it only works on 256-bit, 4 element vectors.
 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a VPERMILP variable mask from an IR-level vector constant.
-void DecodeVPERMILPMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
+void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
+                        SmallVectorImpl<int> &ShuffleMask);
 
 /// \brief Decode a zero extension instruction as a shuffle mask.
 void DecodeZeroExtendMask(MVT SrcVT, MVT DstVT,
@@ -108,6 +117,22 @@ void DecodeEXTRQIMask(int Len, int Idx,
 /// \brief Decode a SSE4A INSERTQ instruction as a v16i8 shuffle mask.
 void DecodeINSERTQIMask(int Len, int Idx,
                         SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMVMask(const Constant *C, MVT VT,
+                      SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask,
+                      SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
+void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+                      SmallVectorImpl<int> &ShuffleMask);
+
+/// \brief Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
+void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask,
+                      SmallVectorImpl<int> &ShuffleMask);
 } // llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 8403ae6101df..fbec6626d99d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -23,56 +23,47 @@ class FunctionPass;
 class ImmutablePass;
 class X86TargetMachine;
 
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG, ready for
+/// instruction scheduling.
 FunctionPass *createX86ISelDag(X86TargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 
-/// createX86GlobalBaseRegPass - This pass initializes a global base
-/// register for PIC on x86-32.
+/// This pass initializes a global base register for PIC on x86-32.
 FunctionPass* createX86GlobalBaseRegPass();
 
-/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
-/// to local-dynamic TLS variables so that the TLS base address for the module
-/// is only fetched once per execution path through the function.
+/// This pass combines multiple accesses to local-dynamic TLS variables so that
+/// the TLS base address for the module is only fetched once per execution path
+/// through the function.
 FunctionPass *createCleanupLocalDynamicTLSPass();
 
-/// createX86FloatingPointStackifierPass - This function returns a pass which
-/// converts floating point register references and pseudo instructions into
-/// floating point stack references and physical instructions.
-///
+/// This function returns a pass which converts floating-point register
+/// references and pseudo instructions into floating-point stack references and
+/// physical instructions.
 FunctionPass *createX86FloatingPointStackifierPass();
 
-/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions
-/// before each call to avoid transition penalty between functions encoded with
-/// AVX and SSE.
+/// This pass inserts AVX vzeroupper instructions before each call to avoid
+/// transition penalty between functions encoded with AVX and SSE.
 FunctionPass *createX86IssueVZeroUpperPass();
 
-/// createX86EmitCodeToMemory - Returns a pass that converts a register
-/// allocated function into raw machine code in a dynamically
-/// allocated chunk of memory.
-///
-FunctionPass *createEmitX86CodeToMemory();
-
-/// createX86PadShortFunctions - Return a pass that pads short functions
-/// with NOOPs. This will prevent a stall when returning on the Atom.
+/// Return a pass that pads short functions with NOOPs.
+/// This will prevent a stall when returning on the Atom.
 FunctionPass *createX86PadShortFunctions();
-/// createX86FixupLEAs - Return a a pass that selectively replaces
-/// certain instructions (like add, sub, inc, dec, some shifts,
-/// and some multiplies) by equivalent LEA instructions, in order
-/// to eliminate execution delays in some Atom processors.
+
+/// Return a a pass that selectively replaces certain instructions (like add,
+/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
+/// instructions, in order to eliminate execution delays in some processors.
 FunctionPass *createX86FixupLEAs();
 
-/// createX86CallFrameOptimization - Return a pass that optimizes
-/// the code-size of x86 call sequences. This is done by replacing
-/// esp-relative movs with pushes.
+/// Return a pass that removes redundant address recalculations.
+FunctionPass *createX86OptimizeLEAs();
+
+/// Return a pass that optimizes the code-size of x86 call sequences. This is
+/// done by replacing esp-relative movs with pushes.
 FunctionPass *createX86CallFrameOptimization();
 
-/// createX86WinEHStatePass - Return an IR pass that inserts EH registration
-/// stack objects and explicit EH state updates. This pass must run after EH
-/// preparation, which does Windows-specific but architecture-neutral
-/// preparation.
+/// Return an IR pass that inserts EH registration stack objects and explicit
+/// EH state updates. This pass must run after EH preparation, which does
+/// Windows-specific but architecture-neutral preparation.
 FunctionPass *createX86WinEHStatePass();
 
 /// Return a Machine IR pass that expands X86-specific pseudo
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 852267400bba..8902a8534256 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -37,14 +37,26 @@ def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
                                        "Support POPCNT instruction">;
 
+def FeatureFXSR    : SubtargetFeature<"fxsr", "HasFXSR", "true",
+                                      "Support fxsave/fxrestore instructions">;
+
+def FeatureXSAVE   : SubtargetFeature<"xsave", "HasXSAVE", "true",
+                                       "Support xsave instructions">;
+
+def FeatureXSAVEOPT: SubtargetFeature<"xsaveopt", "HasXSAVEOPT", "true",
+                                       "Support xsaveopt instructions">;
+
+def FeatureXSAVEC  : SubtargetFeature<"xsavec", "HasXSAVEC", "true",
+                                       "Support xsavec instructions">;
+
+def FeatureXSAVES  : SubtargetFeature<"xsaves", "HasXSAVES", "true",
+                                       "Support xsaves instructions">;
 
-def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
-                                      "Enable MMX instructions">;
 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
                                       "Enable SSE instructions",
                                       // SSE codegen depends on cmovs, and all
                                       // SSE1+ processors support them.
-                                      [FeatureMMX, FeatureCMOV]>;
+                                      [FeatureCMOV]>;
 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
                                       "Enable SSE2 instructions",
                                       [FeatureSSE1]>;
@@ -60,6 +72,11 @@ def FeatureSSE41   : SubtargetFeature<"sse4.1", "X86SSELevel", "SSE41",
 def FeatureSSE42   : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42",
                                       "Enable SSE 4.2 instructions",
                                       [FeatureSSE41]>;
+// The MMX subtarget feature is separate from the rest of the SSE features
+// because it's important (for odd compatibility reasons) to be able to
+// turn it off explicitly while allowing SSE+ to be on.
+def FeatureMMX     : SubtargetFeature<"mmx","X863DNowLevel", "MMX",
+                                      "Enable MMX instructions">;
 def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
                                       "Enable 3DNow! instructions",
                                       [FeatureMMX]>;
@@ -79,16 +96,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
                                        "Bit testing of memory is slow">;
 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
                                        "SHLD instruction is slow">;
-// FIXME: This is a 16-byte (SSE/AVX) feature; we should rename it to make that
-// explicit. Also, it seems this would be the default state for most chips
-// going forward, so it would probably be better to negate the logic and
-// match the 32-byte "slow mem" feature below.
-def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
-                                        "IsUAMemFast", "true",
-                                        "Fast unaligned memory access">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+                                "IsUAMem16Slow", "true",
+                                "Slow unaligned 16-byte memory access">;
 def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
-                            "IsUAMem32Slow", "true",
-                            "Slow unaligned 32-byte memory access">;
+                                "IsUAMem32Slow", "true",
+                                "Slow unaligned 32-byte memory access">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions",
                                       [FeatureSSE3]>;
@@ -120,6 +134,8 @@ def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
 def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
                       "Enable AVX-512 Vector Length eXtensions",
                                       [FeatureAVX512]>;
+def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
+                      "Enable protection keys">;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;
@@ -168,9 +184,11 @@ def FeaturePRFCHW  : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
                                       "Support PRFCHW instructions">;
 def FeatureRDSEED  : SubtargetFeature<"rdseed", "HasRDSEED", "true",
                                       "Support RDSEED instruction">;
+def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true",
+                                       "Support LAHF and SAHF instructions">;
 def FeatureMPX     : SubtargetFeature<"mpx", "HasMPX", "true",
                                       "Support MPX instructions">;
-def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
+def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
                                      "Use LEA for adjusting the stack pointer">;
 def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
                                      "HasSlowDivide32", "true",
@@ -181,6 +199,11 @@ def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
 def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
                                      "PadShortFunctions", "true",
                                      "Pad short functions">;
+// TODO: This feature ought to be renamed.
+// What it really refers to are CPUs for which certain instructions
+// (which ones besides the example below?) are microcoded.
+// The best examples of this are the memory forms of CALL and PUSH
+// instructions, which should be avoided in favor of a MOV + register CALL/PUSH.
 def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
                                      "CallRegIndirect", "true",
                                      "Call register indirect">;
@@ -208,278 +231,473 @@ def ProcIntelSLM  : SubtargetFeature<"slm", "X86ProcFamily", "IntelSLM",
 class Proc<string Name, list<SubtargetFeature> Features>
  : ProcessorModel<Name, GenericModel, Features>;
 
-def : Proc<"generic",         []>;
-def : Proc<"i386",            []>;
-def : Proc<"i486",            []>;
-def : Proc<"i586",            []>;
-def : Proc<"pentium",         []>;
-def : Proc<"pentium-mmx",     [FeatureMMX]>;
-def : Proc<"i686",            []>;
-def : Proc<"pentiumpro",      [FeatureCMOV]>;
-def : Proc<"pentium2",        [FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3",        [FeatureSSE1]>;
-def : Proc<"pentium3m",       [FeatureSSE1, FeatureSlowBTMem]>;
-def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"pentium4",        [FeatureSSE2]>;
-def : Proc<"pentium4m",       [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"generic",         [FeatureSlowUAMem16]>;
+def : Proc<"i386",            [FeatureSlowUAMem16]>;
+def : Proc<"i486",            [FeatureSlowUAMem16]>;
+def : Proc<"i586",            [FeatureSlowUAMem16]>;
+def : Proc<"pentium",         [FeatureSlowUAMem16]>;
+def : Proc<"pentium-mmx",     [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"i686",            [FeatureSlowUAMem16]>;
+def : Proc<"pentiumpro",      [FeatureSlowUAMem16, FeatureCMOV]>;
+def : Proc<"pentium2",        [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV,
+                               FeatureFXSR]>;
+def : Proc<"pentium3",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+                               FeatureFXSR]>;
+def : Proc<"pentium3m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
+                               FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium-m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+                               FeatureFXSR, FeatureSlowBTMem]>;
+def : Proc<"pentium4",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+                               FeatureFXSR]>;
+def : Proc<"pentium4m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,
+                               FeatureFXSR, FeatureSlowBTMem]>;
 
 // Intel Core Duo.
 def : ProcessorModel<"yonah", SandyBridgeModel,
-                     [FeatureSSE3, FeatureSlowBTMem]>;
+                     [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+                      FeatureSlowBTMem]>;
 
 // NetBurst.
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona",   [FeatureSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : Proc<"prescott",
+           [FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureFXSR,
+            FeatureSlowBTMem]>;
+def : Proc<"nocona", [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSE3,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem
+]>;
 
 // Intel Core 2 Solo/Duo.
-def : ProcessorModel<"core2", SandyBridgeModel,
-                     [FeatureSSSE3, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
-def : ProcessorModel<"penryn", SandyBridgeModel,
-                     [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>;
+def : ProcessorModel<"core2", SandyBridgeModel, [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureLAHFSAHF
+]>;
+def : ProcessorModel<"penryn", SandyBridgeModel, [
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSE41,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureLAHFSAHF
+]>;
 
 // Atom CPUs.
 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
-                                   ProcIntelAtom,
-                                   FeatureSSSE3,
-                                   FeatureCMPXCHG16B,
-                                   FeatureMOVBE,
-                                   FeatureSlowBTMem,
-                                   FeatureLeaForSP,
-                                   FeatureSlowDivide32,
-                                   FeatureSlowDivide64,
-                                   FeatureCallRegIndirect,
-                                   FeatureLEAUsesAG,
-                                   FeaturePadShortFunctions
-                                 ]>;
+  ProcIntelAtom,
+  FeatureSlowUAMem16,
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureMOVBE,
+  FeatureSlowBTMem,
+  FeatureLEAForSP,
+  FeatureSlowDivide32,
+  FeatureSlowDivide64,
+  FeatureCallRegIndirect,
+  FeatureLEAUsesAG,
+  FeaturePadShortFunctions,
+  FeatureLAHFSAHF
+]>;
 def : BonnellProc<"bonnell">;
 def : BonnellProc<"atom">; // Pin the generic name to the baseline.
 
 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
-                                      ProcIntelSLM,
-                                      FeatureSSE42,
-                                      FeatureCMPXCHG16B,
-                                      FeatureMOVBE,
-                                      FeaturePOPCNT,
-                                      FeaturePCLMUL,
-                                      FeatureAES,
-                                      FeatureSlowDivide64,
-                                      FeatureCallRegIndirect,
-                                      FeaturePRFCHW,
-                                      FeatureSlowLEA,
-                                      FeatureSlowIncDec,
-                                      FeatureSlowBTMem,
-                                      FeatureFastUAMem
-                                    ]>;
+  ProcIntelSLM,
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureMOVBE,
+  FeaturePOPCNT,
+  FeaturePCLMUL,
+  FeatureAES,
+  FeatureSlowDivide64,
+  FeatureCallRegIndirect,
+  FeaturePRFCHW,
+  FeatureSlowLEA,
+  FeatureSlowIncDec,
+  FeatureSlowBTMem,
+  FeatureLAHFSAHF
+]>;
 def : SilvermontProc<"silvermont">;
 def : SilvermontProc<"slm">; // Legacy alias.
 
 // "Arrandale" along with corei3 and corei5
 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                   FeatureSSE42,
-                                   FeatureCMPXCHG16B,
-                                   FeatureSlowBTMem,
-                                   FeatureFastUAMem,
-                                   FeaturePOPCNT
-                                 ]>;
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureLAHFSAHF
+]>;
 def : NehalemProc<"nehalem">;
 def : NehalemProc<"corei7">;
 
 // Westmere is a similar machine to nehalem with some additional features.
 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge
 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                    FeatureSSE42,
-                                    FeatureCMPXCHG16B,
-                                    FeatureSlowBTMem,
-                                    FeatureFastUAMem,
-                                    FeaturePOPCNT,
-                                    FeatureAES,
-                                    FeaturePCLMUL
-                                  ]>;
+  FeatureMMX,
+  FeatureSSE42,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureLAHFSAHF
+]>;
 def : WestmereProc<"westmere">;
 
 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
 // rather than a superset.
 class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                       FeatureAVX,
-                                       FeatureCMPXCHG16B,
-                                       FeatureFastUAMem,
-                                       FeatureSlowUAMem32,
-                                       FeaturePOPCNT,
-                                       FeatureAES,
-                                       FeaturePCLMUL
-                                     ]>;
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureLAHFSAHF
+]>;
 def : SandyBridgeProc<"sandybridge">;
 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.
 
 class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
-                                     FeatureAVX,
-                                     FeatureCMPXCHG16B,
-                                     FeatureFastUAMem,
-                                     FeatureSlowUAMem32,
-                                     FeaturePOPCNT,
-                                     FeatureAES,
-                                     FeaturePCLMUL,
-                                     FeatureRDRAND,
-                                     FeatureF16C,
-                                     FeatureFSGSBase
-                                   ]>;
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeatureSlowUAMem32,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureLAHFSAHF
+]>;
 def : IvyBridgeProc<"ivybridge">;
 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.
 
 class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-                                   FeatureAVX2,
-                                   FeatureCMPXCHG16B,
-                                   FeatureFastUAMem,
-                                   FeaturePOPCNT,
-                                   FeatureAES,
-                                   FeaturePCLMUL,
-                                   FeatureRDRAND,
-                                   FeatureF16C,
-                                   FeatureFSGSBase,
-                                   FeatureMOVBE,
-                                   FeatureLZCNT,
-                                   FeatureBMI,
-                                   FeatureBMI2,
-                                   FeatureFMA,
-                                   FeatureRTM,
-                                   FeatureHLE,
-                                   FeatureSlowIncDec
-                                 ]>;
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureRDRAND,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureSlowIncDec,
+  FeatureLAHFSAHF
+]>;
 def : HaswellProc<"haswell">;
 def : HaswellProc<"core-avx2">; // Legacy alias.
 
 class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
-                                     FeatureAVX2,
-                                     FeatureCMPXCHG16B,
-                                     FeatureFastUAMem,
-                                     FeaturePOPCNT,
-                                     FeatureAES,
-                                     FeaturePCLMUL,
-                                     FeatureRDRAND,
-                                     FeatureF16C,
-                                     FeatureFSGSBase,
-                                     FeatureMOVBE,
-                                     FeatureLZCNT,
-                                     FeatureBMI,
-                                     FeatureBMI2,
-                                     FeatureFMA,
-                                     FeatureRTM,
-                                     FeatureHLE,
-                                     FeatureADX,
-                                     FeatureRDSEED,
-                                     FeatureSlowIncDec
-                                   ]>;
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureADX,
+  FeatureRDSEED,
+  FeatureSlowIncDec,
+  FeatureLAHFSAHF
+]>;
 def : BroadwellProc<"broadwell">;
 
 // FIXME: define KNL model
-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,
-                     [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,
-                      FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
-                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
-                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureMPX]>;
+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+  FeatureMMX,
+  FeatureAVX512,
+  FeatureFXSR,
+  FeatureERI,
+  FeatureCDI,
+  FeaturePFI,
+  FeatureCMPXCHG16B,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureSlowIncDec,
+  FeatureMPX,
+  FeatureLAHFSAHF
+]>;
 def : KnightsLandingProc<"knl">;
 
 // FIXME: define SKX model
-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,
-                     [FeatureAVX512, FeatureCDI,
-                      FeatureDQI, FeatureBWI, FeatureVLX,
-                      FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,
-                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,
-                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,
-                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,
-                      FeatureSlowIncDec, FeatureMPX]>;
+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+  FeatureMMX,
+  FeatureAVX512,
+  FeatureFXSR,
+  FeatureCDI,
+  FeatureDQI,
+  FeatureBWI,
+  FeatureVLX,
+  FeaturePKU,
+  FeatureCMPXCHG16B,
+  FeatureSlowBTMem,
+  FeaturePOPCNT,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureRDRAND,
+  FeatureF16C,
+  FeatureFSGSBase,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureFMA,
+  FeatureRTM,
+  FeatureHLE,
+  FeatureADX,
+  FeatureRDSEED,
+  FeatureSlowIncDec,
+  FeatureMPX,
+  FeatureXSAVEC,
+  FeatureXSAVES,
+  FeatureLAHFSAHF
+]>;
 def : SkylakeProc<"skylake">;
 def : SkylakeProc<"skx">; // Legacy alias.
 
 
 // AMD CPUs.
 
-def : Proc<"k6",              [FeatureMMX]>;
-def : Proc<"k6-2",            [Feature3DNow]>;
-def : Proc<"k6-3",            [Feature3DNow]>;
-def : Proc<"athlon",          [Feature3DNowA, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
-def : Proc<"athlon-tbird",    [Feature3DNowA, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
-def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
-def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
-def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem,
-                               FeatureSlowSHLD]>;
-def : Proc<"k8",              [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+def : Proc<"k6",              [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"k6-2",            [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6-3",            [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"athlon",          [FeatureSlowUAMem16, Feature3DNowA,
                                FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron",         [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+def : Proc<"athlon-tbird",    [FeatureSlowUAMem16, Feature3DNowA,
                                FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64",        [FeatureSSE2,   Feature3DNowA, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-fx",       [FeatureSSE2,   Feature3DNowA, Feature64Bit,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"k8-sse3",         [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"opteron-sse3",    [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon64-sse3",   [FeatureSSE3,   Feature3DNowA, FeatureCMPXCHG16B,
-                               FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"amdfam10",        [FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"athlon-4",        [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+                               FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-xp",       [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+                               FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"athlon-mp",       [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
+                               FeatureFXSR, FeatureSlowBTMem, FeatureSlowSHLD]>;
+def : Proc<"k8",              [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+                               FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
-def : Proc<"barcelona",       [FeatureSSE4A,
-                               Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowBTMem,
+def : Proc<"opteron",         [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
+                               FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
+def : Proc<"athlon64",        [FeatureSlowUAMem16, FeatureSSE2,   Feature3DNowA,
+                               FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
+                               FeatureSlowSHLD]>;
+def : Proc<"athlon-fx",       [FeatureSlowUAMem16, FeatureSSE2,   Feature3DNowA,
+                               FeatureFXSR, Feature64Bit, FeatureSlowBTMem,
+                               FeatureSlowSHLD]>;
+def : Proc<"k8-sse3",         [FeatureSlowUAMem16, FeatureSSE3,   Feature3DNowA,
+                               FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                               FeatureSlowSHLD]>;
+def : Proc<"opteron-sse3",    [FeatureSlowUAMem16, FeatureSSE3,   Feature3DNowA,
+                               FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                               FeatureSlowSHLD]>;
+def : Proc<"athlon64-sse3",   [FeatureSlowUAMem16, FeatureSSE3,   Feature3DNowA,
+                               FeatureFXSR, FeatureCMPXCHG16B, FeatureSlowBTMem,
+                               FeatureSlowSHLD]>;
+def : Proc<"amdfam10",        [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+                               FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+                               FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+def : Proc<"barcelona",       [FeatureSSE4A, Feature3DNowA, FeatureFXSR,
+                               FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
+                               FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>;
+
 // Bobcat
-def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
-                               FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
-                               FeatureSlowSHLD]>;
+def : Proc<"btver1", [
+  FeatureMMX,
+  FeatureSSSE3,
+  FeatureSSE4A,
+  FeatureFXSR,
+  FeatureCMPXCHG16B,
+  FeaturePRFCHW,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureSlowSHLD,
+  FeatureLAHFSAHF
+]>;
 
 // Jaguar
-def : ProcessorModel<"btver2", BtVer2Model,
-                     [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,
-                      FeaturePRFCHW, FeatureAES, FeaturePCLMUL,
-                      FeatureBMI, FeatureF16C, FeatureMOVBE,
-                      FeatureLZCNT, FeaturePOPCNT, FeatureFastUAMem,
-                      FeatureSlowSHLD]>;
-
-// TODO: We should probably add 'FeatureFastUAMem' to all of the AMD chips.
+def : ProcessorModel<"btver2", BtVer2Model, [
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureSSE4A,
+  FeatureCMPXCHG16B,
+  FeaturePRFCHW,
+  FeatureAES,
+  FeaturePCLMUL,
+  FeatureBMI,
+  FeatureF16C,
+  FeatureMOVBE,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureXSAVEOPT,
+  FeatureSlowSHLD,
+  FeatureLAHFSAHF
+]>;
 
 // Bulldozer
-def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowSHLD]>;
+def : Proc<"bdver1", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureSSE4A,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureSlowSHLD,
+  FeatureLAHFSAHF
+]>;
 // Piledriver
-def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureF16C,
-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
+def : Proc<"bdver2", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureSSE4A,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureBMI,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureSlowSHLD,
+  FeatureLAHFSAHF
+]>;
 
 // Steamroller
-def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
-                               FeatureAVX, FeatureSSE4A, FeatureF16C,
-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureFSGSBase]>;
+def : Proc<"bdver3", [
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureMMX,
+  FeatureAVX,
+  FeatureFXSR,
+  FeatureSSE4A,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureBMI,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureXSAVEOPT,
+  FeatureSlowSHLD,
+  FeatureFSGSBase,
+  FeatureLAHFSAHF
+]>;
 
 // Excavator
-def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,
-                               FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
-                               FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureBMI, FeatureBMI2,
-                               FeatureTBM, FeatureFMA, FeatureSSE4A,
-                               FeatureFSGSBase]>;
+def : Proc<"bdver4", [
+  FeatureMMX,
+  FeatureAVX2,
+  FeatureFXSR,
+  FeatureXOP,
+  FeatureFMA4,
+  FeatureCMPXCHG16B,
+  FeatureAES,
+  FeaturePRFCHW,
+  FeaturePCLMUL,
+  FeatureF16C,
+  FeatureLZCNT,
+  FeaturePOPCNT,
+  FeatureXSAVE,
+  FeatureBMI,
+  FeatureBMI2,
+  FeatureTBM,
+  FeatureFMA,
+  FeatureXSAVEOPT,
+  FeatureFSGSBase,
+  FeatureLAHFSAHF
+]>;
 
-def : Proc<"geode",           [Feature3DNowA]>;
+def : Proc<"geode",           [FeatureSlowUAMem16, Feature3DNowA]>;
 
-def : Proc<"winchip-c6",      [FeatureMMX]>;
-def : Proc<"winchip2",        [Feature3DNow]>;
-def : Proc<"c3",              [Feature3DNow]>;
-def : Proc<"c3-2",            [FeatureSSE1]>;
+def : Proc<"winchip-c6",      [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"winchip2",        [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3",              [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1, FeatureFXSR]>;
 
 // We also provide a generic 64-bit specific x86 processor model which tries to
 // be good for modern chips without enabling instruction set encodings past the
@@ -492,8 +710,8 @@ def : Proc<"c3-2",            [FeatureSSE1]>;
 // knobs which need to be tuned differently for AMD chips, we might consider
 // forming a common base for them.
 def : ProcessorModel<"x86-64", SandyBridgeModel,
-                     [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
-                      FeatureFastUAMem]>;
+                     [FeatureMMX, FeatureSSE2, FeatureFXSR, Feature64Bit,
+                      FeatureSlowBTMem ]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -520,10 +738,6 @@ include "X86CallingConv.td"
 // Assembly Parser
 //===----------------------------------------------------------------------===//
 
-def ATTAsmParser : AsmParser {
-  string AsmParserClassName = "AsmParser";
-}
-
 def ATTAsmParserVariant : AsmParserVariant {
   int Variant = 0;
 
@@ -568,7 +782,6 @@ def IntelAsmWriter : AsmWriter {
 def X86 : Target {
   // Information about the instructions...
   let InstructionSet = X86InstrInfo;
-  let AssemblyParsers = [ATTAsmParser];
   let AssemblyParserVariants = [ATTAsmParserVariant, IntelAsmParserVariant];
   let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
 }
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index ba33248d2039..2170e62e30fd 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -217,10 +217,10 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
     if (AsmVariant == 0) O << '%';
     unsigned Reg = MO.getReg();
     if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
-      MVT::SimpleValueType VT = (strcmp(Modifier+6,"64") == 0) ?
-        MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
-                    ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
-      Reg = getX86SubSuperRegister(Reg, VT);
+      unsigned Size = (strcmp(Modifier+6,"64") == 0) ? 64 :
+                      (strcmp(Modifier+6,"32") == 0) ? 32 :
+                      (strcmp(Modifier+6,"16") == 0) ? 16 : 8;
+      Reg = getX86SubSuperRegister(Reg, Size);
     }
     O << X86ATTInstPrinter::getRegisterName(Reg);
     return;
@@ -361,22 +361,21 @@ static bool printAsmMRegister(X86AsmPrinter &P, const MachineOperand &MO,
   switch (Mode) {
   default: return true;  // Unknown mode.
   case 'b': // Print QImode register
-    Reg = getX86SubSuperRegister(Reg, MVT::i8);
+    Reg = getX86SubSuperRegister(Reg, 8);
     break;
   case 'h': // Print QImode high register
-    Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+    Reg = getX86SubSuperRegister(Reg, 8, true);
     break;
   case 'w': // Print HImode register
-    Reg = getX86SubSuperRegister(Reg, MVT::i16);
+    Reg = getX86SubSuperRegister(Reg, 16);
     break;
   case 'k': // Print SImode register
-    Reg = getX86SubSuperRegister(Reg, MVT::i32);
+    Reg = getX86SubSuperRegister(Reg, 32);
     break;
   case 'q':
     // Print 64-bit register names if 64-bit integer registers are available.
     // Otherwise, print 32-bit register names.
-    MVT::SimpleValueType Ty = P.getSubtarget().is64Bit() ? MVT::i64 : MVT::i32;
-    Reg = getX86SubSuperRegister(Reg, Ty);
+    Reg = getX86SubSuperRegister(Reg, P.getSubtarget().is64Bit() ? 64 : 32);
     break;
   }
 
@@ -535,6 +534,7 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
           S, MCConstantExpr::create(int64_t(1), MMI->getContext()));
     }
   }
+  OutStreamer->EmitSyntaxDirective();
 }
 
 static void
@@ -565,10 +565,11 @@ MCSymbol *X86AsmPrinter::GetCPISymbol(unsigned CPID) const {
     const MachineConstantPoolEntry &CPE =
         MF->getConstantPool()->getConstants()[CPID];
     if (!CPE.isMachineConstantPoolEntry()) {
-      SectionKind Kind = CPE.getSectionKind(TM.getDataLayout());
+      const DataLayout &DL = MF->getDataLayout();
+      SectionKind Kind = CPE.getSectionKind(&DL);
       const Constant *C = CPE.Val.ConstVal;
       if (const MCSectionCOFF *S = dyn_cast<MCSectionCOFF>(
-            getObjFileLowering().getSectionForConstant(Kind, C))) {
+              getObjFileLowering().getSectionForConstant(DL, Kind, C))) {
         if (MCSymbol *Sym = S->getCOMDATSymbol()) {
           if (Sym->isUndefined())
             OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global);
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 7f5d127c68d5..9c8bd98dbade 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -78,8 +78,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   // outputting it to the OutStream. This allows the shadow tracker to minimise
   // the number of NOPs used for stackmap padding.
   void EmitAndCountInstruction(MCInst &Inst);
-
-  void InsertStackMapShadows(MachineFunction &MF);
   void LowerSTACKMAP(const MachineInstr &MI);
   void LowerPATCHPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
   void LowerSTATEPOINT(const MachineInstr &MI, X86MCInstLower &MCIL);
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp
index 031ba4ba9e66..fc6ee1752f1f 100644
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
@@ -53,10 +54,13 @@ private:
   // Information we know about a particular call site
   struct CallContext {
     CallContext()
-        : Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
-          MovVector(4, nullptr), NoStackParams(false), UsePush(false){};
+        : FrameSetup(nullptr), Call(nullptr), SPCopy(nullptr), ExpectedDist(0),
+          MovVector(4, nullptr), NoStackParams(false), UsePush(false){}
 
-    // Actuall call instruction
+    // Iterator referring to the frame setup instruction
+    MachineBasicBlock::iterator FrameSetup;
+
+    // Actual call instruction
     MachineInstr *Call;
 
     // A copy of the stack pointer
@@ -75,17 +79,16 @@ private:
     bool UsePush;
   };
 
-  typedef DenseMap<MachineInstr *, CallContext> ContextMap;
+  typedef SmallVector<CallContext, 8> ContextVector;
 
   bool isLegal(MachineFunction &MF);
 
-  bool isProfitable(MachineFunction &MF, ContextMap &CallSeqMap);
+  bool isProfitable(MachineFunction &MF, ContextVector &CallSeqMap);
 
   void collectCallInfo(MachineFunction &MF, MachineBasicBlock &MBB,
                        MachineBasicBlock::iterator I, CallContext &Context);
 
-  bool adjustCallSequence(MachineFunction &MF, MachineBasicBlock::iterator I,
-                          const CallContext &Context);
+  bool adjustCallSequence(MachineFunction &MF, const CallContext &Context);
 
   MachineInstr *canFoldIntoRegPush(MachineBasicBlock::iterator FrameSetup,
                                    unsigned Reg);
@@ -100,7 +103,8 @@ private:
   const char *getPassName() const override { return "X86 Optimize Call Frame"; }
 
   const TargetInstrInfo *TII;
-  const TargetFrameLowering *TFL;
+  const X86FrameLowering *TFL;
+  const X86Subtarget *STI;
   const MachineRegisterInfo *MRI;
   static char ID;
 };
@@ -124,8 +128,15 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
   // No point in running this in 64-bit mode, since some arguments are
   // passed in-register in all common calling conventions, so the pattern
   // we're looking for will never match.
-  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
-  if (STI.is64Bit())
+  if (STI->is64Bit())
+    return false;
+
+  // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset
+  // in the compact unwind encoding that Darwin uses. So, bail if there
+  // is a danger of that being generated.
+  if (STI->isTargetDarwin() && 
+     (!MF.getMMI().getLandingPads().empty() || 
+       (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
     return false;
 
   // You would expect straight-line code between call-frame setup and
@@ -161,7 +172,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
 // Check whether this trasnformation is profitable for a particular
 // function - in terms of code size.
 bool X86CallFrameOptimization::isProfitable(MachineFunction &MF, 
-  ContextMap &CallSeqMap) {
+  ContextVector &CallSeqVector) {
   // This transformation is always a win when we do not expect to have
   // a reserved call frame. Under other circumstances, it may be either
   // a win or a loss, and requires a heuristic.
@@ -170,24 +181,20 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
     return true;
 
   // Don't do this when not optimizing for size.
-  bool OptForSize =
-      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
-      MF.getFunction()->hasFnAttribute(Attribute::MinSize);
-
-  if (!OptForSize)
+  if (!MF.getFunction()->optForSize())
     return false;
 
   unsigned StackAlign = TFL->getStackAlignment();
 
   int64_t Advantage = 0;
-  for (auto CC : CallSeqMap) {
+  for (auto CC : CallSeqVector) {
     // Call sites where no parameters are passed on the stack
     // do not affect the cost, since there needs to be no
     // stack adjustment.
-    if (CC.second.NoStackParams)
+    if (CC.NoStackParams)
       continue;
 
-    if (!CC.second.UsePush) {
+    if (!CC.UsePush) {
       // If we don't use pushes for a particular call site,
       // we pay for not having a reserved call frame with an
       // additional sub/add esp pair. The cost is ~3 bytes per instruction,
@@ -200,11 +207,11 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
       // We'll need a add after the call.
       Advantage -= 3;
       // If we have to realign the stack, we'll also need and sub before
-      if (CC.second.ExpectedDist % StackAlign)
+      if (CC.ExpectedDist % StackAlign)
         Advantage -= 3;
       // Now, for each push, we save ~3 bytes. For small constants, we actually,
       // save more (up to 5 bytes), but 3 should be a good approximation.
-      Advantage += (CC.second.ExpectedDist / 4) * 3;
+      Advantage += (CC.ExpectedDist / 4) * 3;
     }
   }
 
@@ -212,8 +219,9 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
 }
 
 bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
-  TII = MF.getSubtarget().getInstrInfo();
-  TFL = MF.getSubtarget().getFrameLowering();
+  STI = &MF.getSubtarget<X86Subtarget>();
+  TII = STI->getInstrInfo();
+  TFL = STI->getFrameLowering();
   MRI = &MF.getRegInfo();
 
   if (!isLegal(MF))
@@ -223,21 +231,22 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
 
   bool Changed = false;
 
-  ContextMap CallSeqMap;
+  ContextVector CallSeqVector;
 
   for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
     for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
       if (I->getOpcode() == FrameSetupOpcode) {
-        CallContext &Context = CallSeqMap[I];
+        CallContext Context;
         collectCallInfo(MF, *BB, I, Context);
+        CallSeqVector.push_back(Context);
       }
 
-  if (!isProfitable(MF, CallSeqMap))
+  if (!isProfitable(MF, CallSeqVector))
     return false;
 
-  for (auto CC : CallSeqMap)
-    if (CC.second.UsePush)
-      Changed |= adjustCallSequence(MF, CC.first, CC.second);
+  for (auto CC : CallSeqVector)
+    if (CC.UsePush)
+      Changed |= adjustCallSequence(MF, CC);
 
   return Changed;
 }
@@ -307,13 +316,13 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
   // Check that this particular call sequence is amenable to the
   // transformation.
   const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
-                                       MF.getSubtarget().getRegisterInfo());
-  unsigned StackPtr = RegInfo.getStackRegister();
+                                       STI->getRegisterInfo());
   unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
 
   // We expect to enter this at the beginning of a call sequence
   assert(I->getOpcode() == TII->getCallFrameSetupOpcode());
   MachineBasicBlock::iterator FrameSetup = I++;
+  Context.FrameSetup = FrameSetup;
 
   // How much do we adjust the stack? This puts an upper bound on
   // the number of parameters actually passed on it.
@@ -338,7 +347,8 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
   if (!I->isCopy() || !I->getOperand(0).isReg())
     return;
   Context.SPCopy = I++;
-  StackPtr = Context.SPCopy->getOperand(0).getReg();
+
+  unsigned StackPtr = Context.SPCopy->getOperand(0).getReg();
 
   // Scan the call setup sequence for the pattern we're looking for.
   // We only handle a simple case - a sequence of MOV32mi or MOV32mr
@@ -434,22 +444,22 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
 }
 
 bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
-                                                  MachineBasicBlock::iterator I,
                                                   const CallContext &Context) {
   // Ok, we can in fact do the transformation for this call.
   // Do not remove the FrameSetup instruction, but adjust the parameters.
   // PEI will end up finalizing the handling of this.
-  MachineBasicBlock::iterator FrameSetup = I;
-  MachineBasicBlock &MBB = *(I->getParent());
+  MachineBasicBlock::iterator FrameSetup = Context.FrameSetup;
+  MachineBasicBlock &MBB = *(FrameSetup->getParent());
   FrameSetup->getOperand(1).setImm(Context.ExpectedDist);
 
-  DebugLoc DL = I->getDebugLoc();
+  DebugLoc DL = FrameSetup->getDebugLoc();
   // Now, iterate through the vector in reverse order, and replace the movs
   // with pushes. MOVmi/MOVmr doesn't have any defs, so no need to
   // replace uses.
   for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) {
     MachineBasicBlock::iterator MOV = *Context.MovVector[Idx];
     MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
+    MachineBasicBlock::iterator Push = nullptr;
     if (MOV->getOpcode() == X86::MOV32mi) {
       unsigned PushOpcode = X86::PUSHi32;
       // If the operand is a small (8-bit) immediate, we can use a
@@ -461,21 +471,20 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
         if (isInt<8>(Val))
           PushOpcode = X86::PUSH32i8;
       }
-      BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp);
+      Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode))
+          .addOperand(PushOp);
     } else {
       unsigned int Reg = PushOp.getReg();
 
       // If PUSHrmm is not slow on this target, try to fold the source of the
       // push into the instruction.
-      const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
-      bool SlowPUSHrmm = ST.isAtom() || ST.isSLM();
+      bool SlowPUSHrmm = STI->isAtom() || STI->isSLM();
 
       // Check that this is legal to fold. Right now, we're extremely
       // conservative about that.
       MachineInstr *DefMov = nullptr;
       if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) {
-        MachineInstr *Push =
-            BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm));
 
         unsigned NumOps = DefMov->getDesc().getNumOperands();
         for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -483,12 +492,19 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
 
         DefMov->eraseFromParent();
       } else {
-        BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
+        Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r))
             .addReg(Reg)
             .getInstr();
       }
     }
 
+    // For debugging, when using SP-based CFA, we need to adjust the CFA
+    // offset after each push.
+    // TODO: This is needed only if we require precise CFA.
+    if (!TFL->hasFP(MF))
+      TFL->BuildCFI(MBB, std::next(Push), DL, 
+                    MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
+
     MBB.erase(MOV);
   }
 
@@ -532,13 +548,10 @@ MachineInstr *X86CallFrameOptimization::canFoldIntoRegPush(
       DefMI->getParent() != FrameSetup->getParent())
     return nullptr;
 
-  // Now, make sure everything else up until the ADJCALLSTACK is a sequence
-  // of MOVs. To be less conservative would require duplicating a lot of the
-  // logic from PeepholeOptimizer.
-  // FIXME: A possibly better approach would be to teach the PeepholeOptimizer
-  // to be smarter about folding into pushes.
+  // Make sure we don't have any instructions between DefMI and the
+  // push that make folding the load illegal.
   for (auto I = DefMI; I != FrameSetup; ++I)
-    if (I->getOpcode() != X86::MOV32rm)
+    if (I->isLoadFoldBarrier())
       return nullptr;
 
   return DefMI;
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index 0eb2494f1d63..a08160f9feba 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
 #define LLVM_LIB_TARGET_X86_X86CALLINGCONV_H
 
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/IR/CallingConv.h"
 
@@ -42,6 +43,64 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
   return false;
 }
 
+inline bool CC_X86_32_MCUInReg(unsigned &ValNo, MVT &ValVT,
+                                         MVT &LocVT,
+                                         CCValAssign::LocInfo &LocInfo,
+                                         ISD::ArgFlagsTy &ArgFlags,
+                                         CCState &State) {
+  // This is similar to CCAssignToReg<[EAX, EDX, ECX]>, but makes sure
+  // not to split i64 and double between a register and stack
+  static const MCPhysReg RegList[] = {X86::EAX, X86::EDX, X86::ECX};
+  static const unsigned NumRegs = sizeof(RegList)/sizeof(RegList[0]);
+  
+  SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs();
+
+  // If this is the first part of an double/i64/i128, or if we're already
+  // in the middle of a split, add to the pending list. If this is not
+  // the end of the split, return, otherwise go on to process the pending
+  // list
+  if (ArgFlags.isSplit() || !PendingMembers.empty()) {
+    PendingMembers.push_back(
+        CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+    if (!ArgFlags.isSplitEnd())
+      return true;
+  }
+
+  // If there are no pending members, we are not in the middle of a split,
+  // so do the usual inreg stuff.
+  if (PendingMembers.empty()) {
+    if (unsigned Reg = State.AllocateReg(RegList)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return true;
+    }
+    return false;
+  }
+
+  assert(ArgFlags.isSplitEnd());
+
+  // We now have the entire original argument in PendingMembers, so decide
+  // whether to use registers or the stack.
+  // Per the MCU ABI:
+  // a) To use registers, we need to have enough of them free to contain
+  // the entire argument.
+  // b) We never want to use more than 2 registers for a single argument.
+
+  unsigned FirstFree = State.getFirstUnallocated(RegList);
+  bool UseRegs = PendingMembers.size() <= std::min(2U, NumRegs - FirstFree);
+
+  for (auto &It : PendingMembers) {
+    if (UseRegs)
+      It.convertToReg(State.AllocateReg(RegList[FirstFree++]));
+    else
+      It.convertToMem(State.AllocateStack(4, 4));
+    State.addLoc(It);
+  }
+
+  PendingMembers.clear();
+
+  return true;
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 8f88888f5ce3..54d88cbb244e 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -158,6 +158,7 @@ def RetCC_X86_64_C : CallingConv<[
   // The X86-64 calling convention always returns FP values in XMM0.
   CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
   CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+  CCIfType<[f128], CCAssignToReg<[XMM0, XMM1]>>,
 
   // MMX vector types are always returned in XMM0.
   CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
@@ -202,6 +203,16 @@ def RetCC_X86_64_AnyReg : CallingConv<[
   CCCustom<"CC_X86_AnyReg_Error">
 ]>;
 
+// X86-64 HHVM return-value convention.
+def RetCC_X86_64_HHVM: CallingConv<[
+  // Promote all types to i64
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Return: could return in any GP register save RSP and R12.
+  CCIfType<[i64], CCAssignToReg<[RBX, RBP, RDI, RSI, RDX, RCX, R8, R9,
+                                 RAX, R10, R11, R13, R14, R15]>>
+]>;
+
 // This is the root return-value convention for the X86-32 backend.
 def RetCC_X86_32 : CallingConv<[
   // If FastCC, use RetCC_X86_32_Fast.
@@ -227,6 +238,9 @@ def RetCC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
   CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
 
+  // Handle HHVM calls.
+  CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
+
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
 
@@ -280,7 +294,7 @@ def CC_X86_64_C : CallingConv<[
   CCIfType<[v64i1], CCPromoteToType<v64i8>>,
 
   // The first 8 FP/Vector arguments are passed in XMM registers.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+  CCIfType<[f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
             CCIfSubtarget<"hasSSE1()",
             CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
 
@@ -305,7 +319,7 @@ def CC_X86_64_C : CallingConv<[
 
   // Long doubles get stack slots whose size and alignment depends on the
   // subtarget.
-  CCIfType<[f80], CCAssignToStack<0, 0>>,
+  CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
 
   // Vectors get 16-byte stack slots that are 16-byte aligned.
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
@@ -319,6 +333,23 @@ def CC_X86_64_C : CallingConv<[
            CCAssignToStack<64, 64>>
 ]>;
 
+// Calling convention for X86-64 HHVM.
+def CC_X86_64_HHVM : CallingConv<[
+  // Use all/any GP registers for args, except RSP.
+  CCIfType<[i64], CCAssignToReg<[RBX, R12, RBP, R15,
+                                 RDI, RSI, RDX, RCX, R8, R9,
+                                 RAX, R10, R11, R13, R14]>>
+]>;
+
+// Calling convention for helper functions in HHVM.
+def CC_X86_64_HHVM_C : CallingConv<[
+  // Pass the first argument in RBP.
+  CCIfType<[i64], CCAssignToReg<[RBP]>>,
+
+  // Otherwise it's the same as the regular C calling convention.
+  CCDelegateTo<CC_X86_64_C>
+]>;
+
 // Calling convention used on Win64
 def CC_X86_Win64_C : CallingConv<[
   // FIXME: Handle byval stuff.
@@ -561,6 +592,23 @@ def CC_X86_32_C : CallingConv<[
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
+def CC_X86_32_MCU : CallingConv<[
+  // Handles byval parameters.  Note that, like FastCC, we can't rely on
+  // the delegation to CC_X86_32_Common because that happens after code that
+  // puts arguments in registers.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // Promote i1/i8/i16 arguments to i32.
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+  // If the call is not a vararg call, some arguments may be passed
+  // in integer registers.
+  CCIfNotVarArg<CCIfType<[i32], CCCustom<"CC_X86_32_MCUInReg">>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
 def CC_X86_32_FastCall : CallingConv<[
   // Promote i1/i8/i16 arguments to i32.
   CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
@@ -708,18 +756,28 @@ def CC_Intel_OCL_BI : CallingConv<[
   CCDelegateTo<CC_X86_32_C>
 ]>;
 
+def CC_X86_32_Intr : CallingConv<[
+  CCAssignToStack<4, 4>
+]>;
+
+def CC_X86_64_Intr : CallingConv<[
+  CCAssignToStack<8, 8>
+]>;
+
 //===----------------------------------------------------------------------===//
 // X86 Root Argument Calling Conventions
 //===----------------------------------------------------------------------===//
 
 // This is the root argument convention for the X86-32 backend.
 def CC_X86_32 : CallingConv<[
+  CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
   CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
 
   // Otherwise, drop to normal X86-32 CC
   CCDelegateTo<CC_X86_32_C>
@@ -734,6 +792,9 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<CC_X86_Win64_C>>,
   CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
+  CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
+  CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+  CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
 
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
@@ -764,6 +825,12 @@ def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
 def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
                                      (sequence "XMM%u", 6, 15))>;
 
+// The function used by Darwin to obtain the address of a thread-local variable
+// uses rdi to pass a single parameter and rax for the return value. All other
+// GPRs are preserved.
+def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
+                                             R8, R9, R10, R11)>;
+
 // All GPRs - except r11
 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
                                               R8, R9, R10, RSP)>;
@@ -778,6 +845,11 @@ def CSR_64_MostRegs : CalleeSavedRegs<(add RBX, RCX, RDX, RSI, RDI, R8, R9, R10,
                                            R11, R12, R13, R14, R15, RBP,
                                            (sequence "XMM%u", 0, 15))>;
 
+def CSR_32_AllRegs     : CalleeSavedRegs<(add EAX, EBX, ECX, EDX, EBP, ESI,
+                                              EDI, ESP)>;
+def CSR_32_AllRegs_SSE : CalleeSavedRegs<(add CSR_32_AllRegs,
+                                              (sequence "XMM%u", 0, 7))>;
+
 def CSR_64_AllRegs     : CalleeSavedRegs<(add CSR_64_MostRegs, RAX, RSP,
                                               (sequence "XMM%u", 16, 31))>;
 def CSR_64_AllRegs_AVX : CalleeSavedRegs<(sub (add CSR_64_MostRegs, RAX, RSP,
@@ -804,3 +876,6 @@ def CSR_64_Intel_OCL_BI_AVX    : CalleeSavedRegs<(add CSR_64,
 def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
                                                   (sequence "ZMM%u", 16, 31),
                                                   K4, K5, K6, K7)>;
+
+// Only R12 is preserved for PHP calls in HHVM.
+def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
diff --git a/lib/Target/X86/X86CompilationCallback_Win64.asm b/lib/Target/X86/X86CompilationCallback_Win64.asm
deleted file mode 100644
index 69b4c71651d7..000000000000
--- a/lib/Target/X86/X86CompilationCallback_Win64.asm
+++ /dev/null
@@ -1,68 +0,0 @@
-;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---===
-;;
-;;                     The LLVM Compiler Infrastructure
-;;
-;; This file is distributed under the University of Illinois Open Source
-;; License. See LICENSE.TXT for details.
-;;
-;;===----------------------------------------------------------------------===
-;;
-;; This file implements the JIT interfaces for the X86 target.
-;;
-;;===----------------------------------------------------------------------===
-
-extrn LLVMX86CompilationCallback2: PROC
-
-.code
-X86CompilationCallback proc
-    push    rbp
-
-    ; Save RSP.
-    mov     rbp, rsp
-
-    ; Save all int arg registers
-    ; WARNING: We cannot use register spill area - we're generating stubs by hands!
-    push    rcx
-    push    rdx
-    push    r8
-    push    r9
-
-    ; Align stack on 16-byte boundary.
-    and     rsp, -16
-
-    ; Save all XMM arg registers. Also allocate reg spill area.
-    sub     rsp, 96
-    movaps  [rsp   +32],  xmm0
-    movaps  [rsp+16+32],  xmm1
-    movaps  [rsp+32+32],  xmm2
-    movaps  [rsp+48+32],  xmm3
-
-    ; JIT callee
-
-    ; Pass prev frame and return address.
-    mov     rcx, rbp
-    mov     rdx, qword ptr [rbp+8]
-    call    LLVMX86CompilationCallback2
-
-    ; Restore all XMM arg registers.
-    movaps  xmm3, [rsp+48+32]
-    movaps  xmm2, [rsp+32+32]
-    movaps  xmm1, [rsp+16+32]
-    movaps  xmm0, [rsp   +32]
-
-    ; Restore RSP.
-    mov     rsp, rbp
-
-    ; Restore all int arg registers
-    sub     rsp, 32
-    pop     r9
-    pop     r8
-    pop     rdx
-    pop     rcx
-
-    ; Restore RBP.
-    pop     rbp
-    ret
-X86CompilationCallback endp
-
-End
diff --git a/lib/Target/X86/X86ExpandPseudo.cpp b/lib/Target/X86/X86ExpandPseudo.cpp
index 6a5a28e546f2..a09d06519376 100644
--- a/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/lib/Target/X86/X86ExpandPseudo.cpp
@@ -19,9 +19,10 @@
 #include "X86InstrInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
-#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
 #include "llvm/IR/GlobalValue.h"
 using namespace llvm;
 
@@ -141,6 +142,24 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     // The EH_RETURN pseudo is really removed during the MC Lowering.
     return true;
   }
+  case X86::IRET: {
+    // Adjust stack to erase error code
+    int64_t StackAdj = MBBI->getOperand(0).getImm();
+    X86FL->emitSPUpdate(MBB, MBBI, StackAdj, true);
+    // Replace pseudo with machine iret
+    BuildMI(MBB, MBBI, DL,
+            TII->get(STI->is64Bit() ? X86::IRET64 : X86::IRET32));
+    MBB.erase(MBBI);
+    return true;
+  }
+  case X86::EH_RESTORE: {
+    // Restore ESP and EBP, and optionally ESI if required.
+    bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(
+        MBB.getParent()->getFunction()->getPersonalityFn()));
+    X86FL->restoreWin32EHStackPointers(MBB, MBBI, DL, /*RestoreSP=*/IsSEH);
+    MBBI->eraseFromParent();
+    return true;
+  }
   }
   llvm_unreachable("Previous switch has a fallthrough?");
 }
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index b4319c8bb04f..de94a138d865 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -298,8 +298,8 @@ bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
     return false;
 
   // Make sure nothing is in the way
-  BasicBlock::const_iterator Start = I;
-  BasicBlock::const_iterator End = II;
+  BasicBlock::const_iterator Start(I);
+  BasicBlock::const_iterator End(II);
   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
     // We only expect extractvalue instructions between the intrinsic and the
     // instruction to be selected.
@@ -433,6 +433,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
                                    X86AddressMode &AM,
                                    MachineMemOperand *MMO, bool Aligned) {
+  bool HasSSE2 = Subtarget->hasSSE2();
+  bool HasSSE4A = Subtarget->hasSSE4A();
+  bool HasAVX = Subtarget->hasAVX();
+  bool IsNonTemporal = MMO && MMO->isNonTemporal();
+
   // Get opcode and regclass of the output for the given store instruction.
   unsigned Opc = 0;
   switch (VT.getSimpleVT().SimpleTy) {
@@ -449,35 +454,59 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
   // FALLTHROUGH, handling i1 as i8.
   case MVT::i8:  Opc = X86::MOV8mr;  break;
   case MVT::i16: Opc = X86::MOV16mr; break;
-  case MVT::i32: Opc = X86::MOV32mr; break;
-  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+  case MVT::i32:
+    Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTImr : X86::MOV32mr;
+    break;
+  case MVT::i64:
+    // Must be in x86-64 mode.
+    Opc = (IsNonTemporal && HasSSE2) ? X86::MOVNTI_64mr : X86::MOV64mr;
+    break;
   case MVT::f32:
-    Opc = X86ScalarSSEf32 ?
-          (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
+    if (X86ScalarSSEf32) {
+      if (IsNonTemporal && HasSSE4A)
+        Opc = X86::MOVNTSS;
+      else
+        Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+    } else
+      Opc = X86::ST_Fp32m;
     break;
   case MVT::f64:
-    Opc = X86ScalarSSEf64 ?
-          (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
+    if (X86ScalarSSEf32) {
+      if (IsNonTemporal && HasSSE4A)
+        Opc = X86::MOVNTSD;
+      else
+        Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+    } else
+      Opc = X86::ST_Fp64m;
     break;
   case MVT::v4f32:
-    if (Aligned)
-      Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
-    else
-      Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
+    if (Aligned) {
+      if (IsNonTemporal)
+        Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+      else
+        Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+    } else
+      Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
     break;
   case MVT::v2f64:
-    if (Aligned)
-      Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
-    else
-      Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
+    if (Aligned) {
+      if (IsNonTemporal)
+        Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+      else
+        Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+    } else
+      Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
     break;
   case MVT::v4i32:
   case MVT::v2i64:
   case MVT::v8i16:
   case MVT::v16i8:
-    if (Aligned)
-      Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
-    else
+    if (Aligned) {
+      if (IsNonTemporal)
+        Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+      else
+        Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+    } else
       Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
     break;
   }
@@ -1069,12 +1098,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
     RetRegs.push_back(VA.getLocReg());
   }
 
-  // The x86-64 ABI for returning structs by value requires that we copy
-  // the sret argument into %rax for the return. We saved the argument into
-  // a virtual register in the entry block, so now we copy the value out
-  // and into %rax. We also do the same with %eax for Win32.
-  if (F.hasStructRetAttr() &&
-      (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
+  // All x86 ABIs require that for returning structs by value we copy
+  // the sret argument into %rax/%eax (depending on ABI) for the return.
+  // We saved the argument into a virtual register in the entry block,
+  // so now we copy the value out and into %rax/%eax.
+  if (F.hasStructRetAttr()) {
     unsigned Reg = X86MFInfo->getSRetReturnReg();
     assert(Reg &&
            "SRetReturnReg should have been set in LowerFormalArguments()!");
@@ -1431,17 +1459,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
           .addMBB(TrueMBB);
       }
 
-      // Obtain the branch weight and add the TrueBB to the successor list.
-      uint32_t BranchWeight = 0;
-      if (FuncInfo.BPI)
-        BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                   TrueMBB->getBasicBlock());
-      FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
-
-      // Emits an unconditional branch to the FalseBB, obtains the branch
-      // weight, and adds it to the successor list.
-      fastEmitBranch(FalseMBB, DbgLoc);
-
+      finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
       return true;
     }
   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
@@ -1472,12 +1490,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
 
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
           .addMBB(TrueMBB);
-        fastEmitBranch(FalseMBB, DbgLoc);
-        uint32_t BranchWeight = 0;
-        if (FuncInfo.BPI)
-          BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                     TrueMBB->getBasicBlock());
-        FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+
+        finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
         return true;
       }
     }
@@ -1492,12 +1506,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
 
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
       .addMBB(TrueMBB);
-    fastEmitBranch(FalseMBB, DbgLoc);
-    uint32_t BranchWeight = 0;
-    if (FuncInfo.BPI)
-      BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                                 TrueMBB->getBasicBlock());
-    FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+    finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
     return true;
   }
 
@@ -1511,12 +1520,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
     .addReg(OpReg).addImm(1);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
     .addMBB(TrueMBB);
-  fastEmitBranch(FalseMBB, DbgLoc);
-  uint32_t BranchWeight = 0;
-  if (FuncInfo.BPI)
-    BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
-                                               TrueMBB->getBasicBlock());
-  FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
+  finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
   return true;
 }
 
@@ -1945,6 +1949,9 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
   unsigned ResultReg;
   
   if (Subtarget->hasAVX()) {
+    const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+    const TargetRegisterClass *VR128 = &X86::VR128RegClass;
+
     // If we have AVX, create 1 blendv instead of 3 logic instructions.
     // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
     // uses XMM0 as the selection register. That may need just as many
@@ -1955,10 +1962,13 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     unsigned BlendOpcode =
       (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
     
-    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
+    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
                                        CmpRHSReg, CmpRHSIsKill, CC);
-    ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
-                                 LHSReg, LHSIsKill, CmpReg, true);
+    unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
+                                          LHSReg, LHSIsKill, CmpReg, true);
+    ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
   } else {
     unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
                                        CmpRHSReg, CmpRHSIsKill, CC);
@@ -2806,10 +2816,12 @@ static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
   if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
       CC == CallingConv::HiPE)
     return 0;
-  if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
-    return 0;
-  if (CS && CS->paramHasAttr(1, Attribute::InReg))
-    return 0;
+
+  if (CS)
+    if (CS->arg_empty() || !CS->paramHasAttr(1, Attribute::StructRet) ||
+        CS->paramHasAttr(1, Attribute::InReg) || Subtarget->isTargetMCU())
+      return 0;
+
   return 4;
 }
 
@@ -2924,7 +2936,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
 
   // Get a count of how many bytes are to be pushed on the stack.
-  unsigned NumBytes = CCInfo.getNextStackOffset();
+  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
 
   // Issue CALLSEQ_START
   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -3020,8 +3032,8 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
-        ArgVT.getStoreSize(), Alignment);
+          MachinePointerInfo::getStack(*FuncInfo.MF, LocMemOffset),
+          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
       if (Flags.isByVal()) {
         X86AddressMode SrcAM;
         SrcAM.Base.Reg = ArgReg;
@@ -3252,6 +3264,30 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     updateValueMap(I, Reg);
     return true;
   }
+  case Instruction::BitCast: {
+    // Select SSE2/AVX bitcasts between 128/256 bit vector types.
+    if (!Subtarget->hasSSE2())
+      return false;
+
+    EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(DL, I->getType());
+
+    if (!SrcVT.isSimple() || !DstVT.isSimple())
+      return false;
+
+    if (!SrcVT.is128BitVector() &&
+        !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+      return false;
+
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0)
+      return false;
+      
+    // No instruction is needed for conversion. Reuse the register used by
+    // the fist operand.
+    updateValueMap(I, Reg);
+    return true;
+  }
   }
 
   return false;
@@ -3384,8 +3420,8 @@ unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
                                       TII.get(Opc), ResultReg);
     addDirectMem(MIB, AddrReg);
     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
-        MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
-        DL.getPointerSize(), Align);
+        MachinePointerInfo::getConstantPool(*FuncInfo.MF),
+        MachineMemOperand::MOLoad, DL.getPointerSize(), Align);
     MIB->addMemOperand(*FuncInfo.MF, MMO);
     return ResultReg;
   }
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index 5eb4faeedff4..1dd69e8a6a5f 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -9,6 +9,7 @@
 //
 // This file defines the pass that finds instructions that can be
 // re-written as LEA instructions in order to reduce pipeline delays.
+// When optimizing for size it replaces suitable LEAs with INC or DEC.
 //
 //===----------------------------------------------------------------------===//
 
@@ -61,6 +62,11 @@ class FixupLEAPass : public MachineFunctionPass {
   void processInstructionForSLM(MachineBasicBlock::iterator &I,
                                 MachineFunction::iterator MFI);
 
+  /// \brief Look for LEAs that add 1 to reg or subtract 1 from reg
+  /// and convert them to INC or DEC respectively.
+  bool fixupIncDec(MachineBasicBlock::iterator &I,
+                   MachineFunction::iterator MFI) const;
+
   /// \brief Determine if an instruction references a machine register
   /// and, if so, whether it reads or writes the register.
   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
@@ -89,6 +95,8 @@ public:
 private:
   MachineFunction *MF;
   const X86InstrInfo *TII; // Machine instruction info.
+  bool OptIncDec;
+  bool OptLEA;
 };
 char FixupLEAPass::ID = 0;
 }
@@ -150,7 +158,10 @@ FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
 bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
   MF = &Func;
   const X86Subtarget &ST = Func.getSubtarget<X86Subtarget>();
-  if (!ST.LEAusesAG() && !ST.slowLEA())
+  OptIncDec = !ST.slowIncDec() || Func.getFunction()->optForMinSize();
+  OptLEA = ST.LEAusesAG() || ST.slowLEA();
+
+  if (!OptLEA && !OptIncDec)
     return false;
 
   TII = ST.getInstrInfo();
@@ -187,7 +198,7 @@ FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
                                     MachineFunction::iterator MFI) {
   if (I == MFI->begin()) {
-    if (MFI->isPredecessor(MFI)) {
+    if (MFI->isPredecessor(&*MFI)) {
       I = --MFI->end();
       return true;
     } else
@@ -222,6 +233,60 @@ FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
   return nullptr;
 }
 
+static inline bool isLEA(const int opcode) {
+  return opcode == X86::LEA16r || opcode == X86::LEA32r ||
+         opcode == X86::LEA64r || opcode == X86::LEA64_32r;
+}
+
+/// isLEASimpleIncOrDec - Does this LEA have one these forms:
+/// lea  %reg, 1(%reg)
+/// lea  %reg, -1(%reg)
+static inline bool isLEASimpleIncOrDec(MachineInstr *LEA) {
+  unsigned SrcReg = LEA->getOperand(1 + X86::AddrBaseReg).getReg();
+  unsigned DstReg = LEA->getOperand(0).getReg();
+  unsigned AddrDispOp = 1 + X86::AddrDisp;
+  return SrcReg == DstReg &&
+         LEA->getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
+         LEA->getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
+         LEA->getOperand(AddrDispOp).isImm() &&
+         (LEA->getOperand(AddrDispOp).getImm() == 1 ||
+          LEA->getOperand(AddrDispOp).getImm() == -1);
+}
+
+bool FixupLEAPass::fixupIncDec(MachineBasicBlock::iterator &I,
+                               MachineFunction::iterator MFI) const {
+  MachineInstr *MI = I;
+  int Opcode = MI->getOpcode();
+  if (!isLEA(Opcode))
+    return false;
+
+  if (isLEASimpleIncOrDec(MI) && TII->isSafeToClobberEFLAGS(*MFI, I)) {
+    int NewOpcode;
+    bool isINC = MI->getOperand(4).getImm() == 1;
+    switch (Opcode) {
+    case X86::LEA16r:
+      NewOpcode = isINC ? X86::INC16r : X86::DEC16r;
+      break;
+    case X86::LEA32r:
+    case X86::LEA64_32r:
+      NewOpcode = isINC ? X86::INC32r : X86::DEC32r;
+      break;
+    case X86::LEA64r:
+      NewOpcode = isINC ? X86::INC64r : X86::DEC64r;
+      break;
+    }
+
+    MachineInstr *NewMI =
+        BuildMI(*MFI, I, MI->getDebugLoc(), TII->get(NewOpcode))
+            .addOperand(MI->getOperand(0))
+            .addOperand(MI->getOperand(1));
+    MFI->erase(I);
+    I = static_cast<MachineBasicBlock::iterator>(NewMI);
+    return true;
+  }
+  return false;
+}
+
 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
                                       MachineFunction::iterator MFI) {
   // Process a load, store, or LEA instruction.
@@ -265,8 +330,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
                                             MachineFunction::iterator MFI) {
   MachineInstr *MI = I;
   const int opcode = MI->getOpcode();
-  if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
-      opcode != X86::LEA64_32r)
+  if (!isLEA(opcode))
     return;
   if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
       !TII->isSafeToClobberEFLAGS(*MFI, I))
@@ -280,7 +344,8 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
     return;
   int addrr_opcode, addri_opcode;
   switch (opcode) {
-  default: llvm_unreachable("Unexpected LEA instruction");
+  default:
+    llvm_unreachable("Unexpected LEA instruction");
   case X86::LEA16r:
     addrr_opcode = X86::ADD16rr;
     addri_opcode = X86::ADD16ri;
@@ -330,10 +395,16 @@ bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
                                      MachineFunction::iterator MFI) {
 
   for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
-    if (MF.getSubtarget<X86Subtarget>().isSLM())
-      processInstructionForSLM(I, MFI);
-    else
-      processInstruction(I, MFI);
+    if (OptIncDec)
+      if (fixupIncDec(I, MFI))
+        continue;
+
+    if (OptLEA) {
+      if (MF.getSubtarget<X86Subtarget>().isSLM())
+        processInstructionForSLM(I, MFI);
+      else
+        processInstruction(I, MFI);
+    }
   }
   return false;
 }
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 40b9c8a863a3..97bb8ab653a6 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -120,12 +120,10 @@ namespace {
     // Return a bitmask of FP registers in block's live-in list.
     static unsigned calcLiveInMask(MachineBasicBlock *MBB) {
       unsigned Mask = 0;
-      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
-           E = MBB->livein_end(); I != E; ++I) {
-        unsigned Reg = *I;
-        if (Reg < X86::FP0 || Reg > X86::FP6)
+      for (const auto &LI : MBB->liveins()) {
+        if (LI.PhysReg < X86::FP0 || LI.PhysReg > X86::FP6)
           continue;
-        Mask |= 1 << (Reg - X86::FP0);
+        Mask |= 1 << (LI.PhysReg - X86::FP0);
       }
       return Mask;
     }
@@ -301,8 +299,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
   bool FPIsUsed = false;
 
   static_assert(X86::FP6 == X86::FP0+6, "Register enums aren't sorted right!");
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
   for (unsigned i = 0; i <= 6; ++i)
-    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+    if (!MRI.reg_nodbg_empty(X86::FP0 + i)) {
       FPIsUsed = true;
       break;
     }
@@ -321,7 +320,7 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
   // Process the function in depth first order so that we process at least one
   // of the predecessors for every reachable block in the function.
   SmallPtrSet<MachineBasicBlock*, 8> Processed;
-  MachineBasicBlock *Entry = MF.begin();
+  MachineBasicBlock *Entry = &MF.front();
 
   bool Changed = false;
   for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
@@ -329,9 +328,9 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
 
   // Process any unreachable blocks in arbitrary order now.
   if (MF.size() != Processed.size())
-    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
-      if (Processed.insert(BB).second)
-        Changed |= processBasicBlock(MF, *BB);
+    for (MachineBasicBlock &BB : MF)
+      if (Processed.insert(&BB).second)
+        Changed |= processBasicBlock(MF, BB);
 
   LiveBundles.clear();
 
@@ -348,13 +347,12 @@ void FPS::bundleCFG(MachineFunction &MF) {
   LiveBundles.resize(Bundles->getNumBundles());
 
   // Gather the actual live-in masks for all MBBs.
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = I;
-    const unsigned Mask = calcLiveInMask(MBB);
+  for (MachineBasicBlock &MBB : MF) {
+    const unsigned Mask = calcLiveInMask(&MBB);
     if (!Mask)
       continue;
     // Update MBB ingoing bundle mask.
-    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+    LiveBundles[Bundles->getBundle(MBB.getNumber(), false)].Mask |= Mask;
   }
 }
 
@@ -546,17 +544,9 @@ namespace {
   };
 }
 
-#ifndef NDEBUG
-static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
-  for (unsigned i = 0; i != NumEntries-1; ++i)
-    if (!(Table[i] < Table[i+1])) return false;
-  return true;
-}
-#endif
-
-static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
-  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
-  if (I != Table+N && I->from == Opcode)
+static int Lookup(ArrayRef<TableEntry> Table, unsigned Opcode) {
+  const TableEntry *I = std::lower_bound(Table.begin(), Table.end(), Opcode);
+  if (I != Table.end() && I->from == Opcode)
     return I->to;
   return -1;
 }
@@ -567,7 +557,7 @@ static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
 #define ASSERT_SORTED(TABLE)                                              \
   { static bool TABLE##Checked = false;                                   \
     if (!TABLE##Checked) {                                                \
-       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
+       assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) &&       \
               "All lookup tables must be sorted for efficient access!");  \
        TABLE##Checked = true;                                             \
     }                                                                     \
@@ -746,7 +736,7 @@ static const TableEntry OpcodeTable[] = {
 
 static unsigned getConcreteOpcode(unsigned Opcode) {
   ASSERT_SORTED(OpcodeTable);
-  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+  int Opc = Lookup(OpcodeTable, Opcode);
   assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
   return Opc;
 }
@@ -797,7 +787,7 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
   RegMap[Stack[--StackTop]] = ~0;     // Update state
 
   // Check to see if there is a popping version of this instruction...
-  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+  int Opcode = Lookup(PopTable, I->getOpcode());
   if (Opcode != -1) {
     I->setDesc(TII->get(Opcode));
     if (Opcode == X86::UCOM_FPPr)
@@ -1193,7 +1183,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
 
   // We decide which form to use based on what is on the top of the stack, and
   // which operand is killed by this instruction.
-  const TableEntry *InstTable;
+  ArrayRef<TableEntry> InstTable;
   bool isForward = TOS == Op0;
   bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
   if (updateST0) {
@@ -1208,8 +1198,7 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
       InstTable = ReverseSTiTable;
   }
 
-  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
-                      MI->getOpcode());
+  int Opcode = Lookup(InstTable, MI->getOpcode());
   assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
 
   // NotTOS - The register which is not on the top of stack...
@@ -1520,31 +1509,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
     return;
   }
 
-  case X86::WIN_FTOL_32:
-  case X86::WIN_FTOL_64: {
-    // Push the operand into ST0.
-    MachineOperand &Op = MI->getOperand(0);
-    assert(Op.isUse() && Op.isReg() &&
-      Op.getReg() >= X86::FP0 && Op.getReg() <= X86::FP6);
-    unsigned FPReg = getFPReg(Op);
-    if (Op.isKill())
-      moveToTop(FPReg, Inst);
-    else
-      duplicateToTop(FPReg, ScratchFPReg, Inst);
-
-    // Emit the call. This will pop the operand.
-    BuildMI(*MBB, Inst, MI->getDebugLoc(), TII->get(X86::CALLpcrel32))
-      .addExternalSymbol("_ftol2")
-      .addReg(X86::ST0, RegState::ImplicitKill)
-      .addReg(X86::ECX, RegState::ImplicitDefine)
-      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
-      .addReg(X86::EDX, RegState::Define | RegState::Implicit)
-      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-    --StackTop;
-
-    break;
-  }
-
   case X86::RETQ:
   case X86::RETL:
   case X86::RETIL:
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 3a21b57f0157..242d0333ef9a 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -18,25 +18,23 @@
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCSymbol.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include <cstdlib>
 
 using namespace llvm;
 
-// FIXME: completely move here.
-extern cl::opt<bool> ForceStackAlign;
-
 X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
                                    unsigned StackAlignOverride)
     : TargetFrameLowering(StackGrowsDown, StackAlignOverride,
@@ -80,6 +78,27 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
          MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
 }
 
+/// usesTheStack - This function checks if any of the users of EFLAGS
+/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
+/// to use the stack, and if we don't adjust the stack we clobber the first
+/// frame index.
+/// See X86InstrInfo::copyPhysReg.
+static bool usesTheStack(const MachineFunction &MF) {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Conservativley assume that inline assembly might use the stack.
+  if (MF.hasInlineAsm())
+    return true;
+
+  return any_of(MRI.reg_instructions(X86::EFLAGS),
+                [](const MachineInstr &RI) { return RI.isCopy(); });
+}
+
+static bool doesStackUseImplyFP(const MachineFunction &MF) {
+  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  return IsWin64Prologue && usesTheStack(MF);
+}
+
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -92,8 +111,9 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
           MFI->hasVarSizedObjects() ||
           MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
-          MMI.callsUnwindInit() || MMI.callsEHReturn() ||
-          MFI->hasStackMap() || MFI->hasPatchPoint());
+          MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
+          MFI->hasStackMap() || MFI->hasPatchPoint() ||
+          doesStackUseImplyFP(MF));
 }
 
 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -148,21 +168,14 @@ static unsigned getLEArOpcode(unsigned IsLP64) {
 /// to this register without worry about clobbering it.
 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator &MBBI,
-                                       const TargetRegisterInfo *TRI,
+                                       const X86RegisterInfo *TRI,
                                        bool Is64Bit) {
   const MachineFunction *MF = MBB.getParent();
   const Function *F = MF->getFunction();
   if (!F || MF->getMMI().callsEHReturn())
     return 0;
 
-  static const uint16_t CallerSavedRegs32Bit[] = {
-    X86::EAX, X86::EDX, X86::ECX, 0
-  };
-
-  static const uint16_t CallerSavedRegs64Bit[] = {
-    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
-    X86::R8,  X86::R9,  X86::R10, X86::R11, 0
-  };
+  const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
 
   unsigned Opc = MBBI->getOpcode();
   switch (Opc) {
@@ -191,10 +204,9 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
         Uses.insert(*AI);
     }
 
-    const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
-    for (; *CS; ++CS)
-      if (!Uses.count(*CS))
-        return *CS;
+    for (auto CS : AvailableRegs)
+      if (!Uses.count(CS) && CS != X86::RIP)
+        return CS;
   }
   }
 
@@ -214,8 +226,12 @@ static bool isEAXLiveIn(MachineFunction &MF) {
   return false;
 }
 
-/// Check whether or not the terminators of \p MBB needs to read EFLAGS.
-static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
+/// Check if the flags need to be preserved before the terminators.
+/// This would be the case, if the eflags is live-in of the region
+/// composed by the terminators or live-out of that region, without
+/// being defined by a terminator.
+static bool
+flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) {
   for (const MachineInstr &MI : MBB.terminators()) {
     bool BreakNext = false;
     for (const MachineOperand &MO : MI.operands()) {
@@ -225,15 +241,27 @@ static bool terminatorsNeedFlagsAsInput(const MachineBasicBlock &MBB) {
       if (Reg != X86::EFLAGS)
         continue;
 
-      // This terminator needs an eflag that is not defined
-      // by a previous terminator.
+      // This terminator needs an eflags that is not defined
+      // by a previous another terminator:
+      // EFLAGS is live-in of the region composed by the terminators.
       if (!MO.isDef())
         return true;
+      // This terminator defines the eflags, i.e., we don't need to preserve it.
+      // However, we still need to check this specific terminator does not
+      // read a live-in value.
       BreakNext = true;
     }
+    // We found a definition of the eflags, no need to preserve them.
     if (BreakNext)
-      break;
+      return false;
   }
+
+  // None of the terminators use or define the eflags.
+  // Check if they are live-out, that would imply we need to preserve them.
+  for (const MachineBasicBlock *Succ : MBB.successors())
+    if (Succ->isLiveIn(X86::EFLAGS))
+      return true;
+
   return false;
 }
 
@@ -289,6 +317,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
         if (isSub)
           MI->setFlag(MachineInstr::FrameSetup);
+        else
+          MI->setFlag(MachineInstr::FrameDestroy);
         Offset -= ThisVal;
         continue;
       }
@@ -298,6 +328,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
         MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue);
     if (isSub)
       MI.setMIFlag(MachineInstr::FrameSetup);
+    else
+      MI.setMIFlag(MachineInstr::FrameDestroy);
 
     Offset -= ThisVal;
   }
@@ -312,7 +344,11 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
   // is tricky.
   bool UseLEA;
   if (!InEpilogue) {
-    UseLEA = STI.useLeaForSP();
+    // Check if inserting the prologue at the beginning
+    // of MBB would require to use LEA operations.
+    // We need to use LEA operations if EFLAGS is live in, because
+    // it means an instruction will read it before it gets defined.
+    UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS);
   } else {
     // If we can use LEA for SP but we shouldn't, check that none
     // of the terminators uses the eflags. Otherwise we will insert
@@ -321,10 +357,10 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
     // and is an optimization anyway.
     UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent());
     if (UseLEA && !STI.useLeaForSP())
-      UseLEA = terminatorsNeedFlagsAsInput(MBB);
+      UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB);
     // If that assert breaks, that means we do not do the right thing
     // in canUseAsEpilogue.
-    assert((UseLEA || !terminatorsNeedFlagsAsInput(MBB)) &&
+    assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&
            "We shouldn't have allowed this insertion point");
   }
 
@@ -347,30 +383,6 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
   return MI;
 }
 
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
-static
-void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
-                      unsigned StackPtr, uint64_t *NumBytes = nullptr) {
-  if (MBBI == MBB.begin()) return;
-
-  MachineBasicBlock::iterator PI = std::prev(MBBI);
-  unsigned Opc = PI->getOpcode();
-  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
-       Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
-       Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
-      PI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes += PI->getOperand(2).getImm();
-    MBB.erase(PI);
-  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
-              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
-             PI->getOperand(0).getReg() == StackPtr) {
-    if (NumBytes)
-      *NumBytes -= PI->getOperand(2).getImm();
-    MBB.erase(PI);
-  }
-}
-
 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator &MBBI,
                                      bool doMergeWithPrevious) const {
@@ -436,27 +448,265 @@ X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
   }
 }
 
-/// usesTheStack - This function checks if any of the users of EFLAGS
-/// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
-/// to use the stack, and if we don't adjust the stack we clobber the first
-/// frame index.
-/// See X86InstrInfo::copyPhysReg.
-static bool usesTheStack(const MachineFunction &MF) {
-  const MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  for (MachineRegisterInfo::reg_instr_iterator
-       ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
-       ri != re; ++ri)
-    if (ri->isCopy())
-      return true;
-
-  return false;
+MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               DebugLoc DL,
+                                               bool InProlog) const {
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  if (STI.isTargetWindowsCoreCLR()) {
+    if (InProlog) {
+      return emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
+    } else {
+      return emitStackProbeInline(MF, MBB, MBBI, DL, false);
+    }
+  } else {
+    return emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+  }
 }
 
-void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
-                                          MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator MBBI,
-                                          DebugLoc DL) const {
+void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
+                                        MachineBasicBlock &PrologMBB) const {
+  const StringRef ChkStkStubSymbol = "__chkstk_stub";
+  MachineInstr *ChkStkStub = nullptr;
+
+  for (MachineInstr &MI : PrologMBB) {
+    if (MI.isCall() && MI.getOperand(0).isSymbol() &&
+        ChkStkStubSymbol == MI.getOperand(0).getSymbolName()) {
+      ChkStkStub = &MI;
+      break;
+    }
+  }
+
+  if (ChkStkStub != nullptr) {
+    MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
+    assert(std::prev(MBBI).operator==(ChkStkStub) &&
+      "MBBI expected after __chkstk_stub.");
+    DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
+    emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
+    ChkStkStub->eraseFromParent();
+  }
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeInline(
+  MachineFunction &MF, MachineBasicBlock &MBB,
+  MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  assert(STI.is64Bit() && "different expansion needed for 32 bit");
+  assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+
+  // RAX contains the number of bytes of desired stack adjustment.
+  // The handling here assumes this value has already been updated so as to
+  // maintain stack alignment.
+  //
+  // We need to exit with RSP modified by this amount and execute suitable
+  // page touches to notify the OS that we're growing the stack responsibly.
+  // All stack probing must be done without modifying RSP.
+  //
+  // MBB:
+  //    SizeReg = RAX;
+  //    ZeroReg = 0
+  //    CopyReg = RSP
+  //    Flags, TestReg = CopyReg - SizeReg
+  //    FinalReg = !Flags.Ovf ? TestReg : ZeroReg
+  //    LimitReg = gs magic thread env access
+  //    if FinalReg >= LimitReg goto ContinueMBB
+  // RoundBB:
+  //    RoundReg = page address of FinalReg
+  // LoopMBB:
+  //    LoopReg = PHI(LimitReg,ProbeReg)
+  //    ProbeReg = LoopReg - PageSize
+  //    [ProbeReg] = 0
+  //    if (ProbeReg > RoundReg) goto LoopMBB
+  // ContinueMBB:
+  //    RSP = RSP - RAX
+  //    [rest of original MBB]
+
+  // Set up the new basic blocks
+  MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB);
+
+  MachineFunction::iterator MBBIter = std::next(MBB.getIterator());
+  MF.insert(MBBIter, RoundMBB);
+  MF.insert(MBBIter, LoopMBB);
+  MF.insert(MBBIter, ContinueMBB);
+
+  // Split MBB and move the tail portion down to ContinueMBB.
+  MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI);
+  ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end());
+  ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+
+  // Some useful constants
+  const int64_t ThreadEnvironmentStackLimit = 0x10;
+  const int64_t PageSize = 0x1000;
+  const int64_t PageMask = ~(PageSize - 1);
+
+  // Registers we need. For the normal case we use virtual
+  // registers. For the prolog expansion we use RAX, RCX and RDX.
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterClass *RegClass = &X86::GR64RegClass;
+  const unsigned SizeReg = InProlog ? (unsigned)X86::RAX
+                                    : MRI.createVirtualRegister(RegClass),
+                 ZeroReg = InProlog ? (unsigned)X86::RCX
+                                    : MRI.createVirtualRegister(RegClass),
+                 CopyReg = InProlog ? (unsigned)X86::RDX
+                                    : MRI.createVirtualRegister(RegClass),
+                 TestReg = InProlog ? (unsigned)X86::RDX
+                                    : MRI.createVirtualRegister(RegClass),
+                 FinalReg = InProlog ? (unsigned)X86::RDX
+                                     : MRI.createVirtualRegister(RegClass),
+                 RoundedReg = InProlog ? (unsigned)X86::RDX
+                                       : MRI.createVirtualRegister(RegClass),
+                 LimitReg = InProlog ? (unsigned)X86::RCX
+                                     : MRI.createVirtualRegister(RegClass),
+                 JoinReg = InProlog ? (unsigned)X86::RCX
+                                    : MRI.createVirtualRegister(RegClass),
+                 ProbeReg = InProlog ? (unsigned)X86::RCX
+                                     : MRI.createVirtualRegister(RegClass);
+
+  // SP-relative offsets where we can save RCX and RDX.
+  int64_t RCXShadowSlot = 0;
+  int64_t RDXShadowSlot = 0;
+
+  // If inlining in the prolog, save RCX and RDX.     
+  // Future optimization: don't save or restore if not live in.
+  if (InProlog) {
+    // Compute the offsets. We need to account for things already
+    // pushed onto the stack at this point: return address, frame
+    // pointer (if used), and callee saves.
+    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize();
+    const bool HasFP = hasFP(MF);
+    RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0);
+    RDXShadowSlot = RCXShadowSlot + 8;
+    // Emit the saves.
+    addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+                 RCXShadowSlot)
+        .addReg(X86::RCX);
+    addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false,
+                 RDXShadowSlot)
+        .addReg(X86::RDX);
+  } else {
+    // Not in the prolog. Copy RAX to a virtual reg.
+    BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX);
+  }
+
+  // Add code to MBB to check for overflow and set the new target stack pointer
+  // to zero if so.
+  BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg)
+      .addReg(ZeroReg, RegState::Undef)
+      .addReg(ZeroReg, RegState::Undef);
+  BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP);
+  BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg)
+      .addReg(CopyReg)
+      .addReg(SizeReg);
+  BuildMI(&MBB, DL, TII.get(X86::CMOVB64rr), FinalReg)
+      .addReg(TestReg)
+      .addReg(ZeroReg);
+
+  // FinalReg now holds final stack pointer value, or zero if
+  // allocation would overflow. Compare against the current stack
+  // limit from the thread environment block. Note this limit is the
+  // lowest touched page on the stack, not the point at which the OS
+  // will cause an overflow exception, so this is just an optimization
+  // to avoid unnecessarily touching pages that are below the current
+  // SP but already commited to the stack by the OS.
+  BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
+      .addReg(0)
+      .addImm(1)
+      .addReg(0)
+      .addImm(ThreadEnvironmentStackLimit)
+      .addReg(X86::GS);
+  BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg);
+  // Jump if the desired stack pointer is at or above the stack limit.
+  BuildMI(&MBB, DL, TII.get(X86::JAE_1)).addMBB(ContinueMBB);
+
+  // Add code to roundMBB to round the final stack pointer to a page boundary.
+  BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg)
+      .addReg(FinalReg)
+      .addImm(PageMask);
+  BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB);
+
+  // LimitReg now holds the current stack limit, RoundedReg page-rounded
+  // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
+  // and probe until we reach RoundedReg.
+  if (!InProlog) {
+    BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg)
+        .addReg(LimitReg)
+        .addMBB(RoundMBB)
+        .addReg(ProbeReg)
+        .addMBB(LoopMBB);
+  }
+
+  addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg,
+               false, -PageSize);
+
+  // Probe by storing a byte onto the stack.
+  BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi))
+      .addReg(ProbeReg)
+      .addImm(1)
+      .addReg(0)
+      .addImm(0)
+      .addReg(0)
+      .addImm(0);
+  BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr))
+      .addReg(RoundedReg)
+      .addReg(ProbeReg);
+  BuildMI(LoopMBB, DL, TII.get(X86::JNE_1)).addMBB(LoopMBB);
+
+  MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI();
+
+  // If in prolog, restore RDX and RCX.
+  if (InProlog) {
+    addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+                         X86::RCX),
+                 X86::RSP, false, RCXShadowSlot);
+    addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::MOV64rm),
+                         X86::RDX),
+                 X86::RSP, false, RDXShadowSlot);
+  }
+
+  // Now that the probing is done, add code to continueMBB to update
+  // the stack pointer for real.
+  BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
+      .addReg(X86::RSP)
+      .addReg(SizeReg);
+
+  // Add the control flow edges we need.
+  MBB.addSuccessor(ContinueMBB);
+  MBB.addSuccessor(RoundMBB);
+  RoundMBB->addSuccessor(LoopMBB);
+  LoopMBB->addSuccessor(ContinueMBB);
+  LoopMBB->addSuccessor(LoopMBB);
+
+  // Mark all the instructions added to the prolog as frame setup.
+  if (InProlog) {
+    for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) {
+      BeforeMBBI->setFlag(MachineInstr::FrameSetup);
+    }
+    for (MachineInstr &MI : *RoundMBB) {
+      MI.setFlag(MachineInstr::FrameSetup);
+    }
+    for (MachineInstr &MI : *LoopMBB) {
+      MI.setFlag(MachineInstr::FrameSetup);
+    }
+    for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin();
+         CMBBI != ContinueMBBI; ++CMBBI) {
+      CMBBI->setFlag(MachineInstr::FrameSetup);
+    }
+  }
+
+  // Possible TODO: physreg liveness for InProlog case.
+
+  return ContinueMBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeCall(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
 
   unsigned CallOp;
@@ -478,6 +728,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
     Symbol = "_chkstk";
 
   MachineInstrBuilder CI;
+  MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI);
 
   // All current stack probes take AX and SP as input, clobber flags, and
   // preserve all registers. x86_64 probes leave RSP unmodified.
@@ -507,6 +758,26 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
         .addReg(X86::RSP)
         .addReg(X86::RAX);
   }
+
+  if (InProlog) {
+    // Apply the frame setup flag to all inserted instrs.
+    for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
+      ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
+  }
+
+  return MBBI;
+}
+
+MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
+
+  assert(InProlog && "ChkStkStub called outside prolog!");
+
+  BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+      .addExternalSymbol("__chkstk_stub");
+
+  return MBBI;
 }
 
 static unsigned calculateSetFPREG(uint64_t SPAdjust) {
@@ -526,7 +797,7 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
   unsigned StackAlign = getStackAlignment();
-  if (ForceStackAlign) {
+  if (MF.getFunction()->hasFnAttribute("stackrealign")) {
     if (MFI->hasCalls())
       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
     else if (MaxAlign < SlotSize)
@@ -537,15 +808,14 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
 
 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
                                           MachineBasicBlock::iterator MBBI,
-                                          DebugLoc DL,
+                                          DebugLoc DL, unsigned Reg,
                                           uint64_t MaxAlign) const {
   uint64_t Val = -MaxAlign;
-  MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
-              StackPtr)
-          .addReg(StackPtr)
-          .addImm(Val)
-          .setMIFlag(MachineInstr::FrameSetup);
+  unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val);
+  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg)
+                         .addReg(Reg)
+                         .addImm(Val)
+                         .setMIFlag(MachineInstr::FrameSetup);
 
   // The EFLAGS implicit def is dead.
   MI->getOperand(3).setIsDead();
@@ -646,6 +916,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool IsFunclet = MBB.isEHFuncletEntry();
+  EHPersonality Personality = EHPersonality::Unknown;
+  if (Fn->hasPersonalityFn())
+    Personality = classifyEHPersonality(Fn->getPersonalityFn());
+  bool FnHasClrFunclet =
+      MMI.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
+  bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
   bool HasFP = hasFP(MF);
   bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
@@ -655,9 +932,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   unsigned FramePtr = TRI->getFrameRegister(MF);
   const unsigned MachineFramePtr =
       STI.isTarget64BitILP32()
-          ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
-          : FramePtr;
+          ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
   unsigned BasePtr = TRI->getBaseRegister();
+  
+  // Debug location must be unknown since the first debug location is used
+  // to determine the end of the prologue.
   DebugLoc DL;
 
   // Add RETADDR move area to callee saved frame size.
@@ -723,6 +1002,24 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   uint64_t NumBytes = 0;
   int stackGrowth = -SlotSize;
 
+  // Find the funclet establisher parameter
+  unsigned Establisher = X86::NoRegister;
+  if (IsClrFunclet)
+    Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX;
+  else if (IsFunclet)
+    Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX;
+
+  if (IsWin64Prologue && IsFunclet && !IsClrFunclet) {
+    // Immediately spill establisher into the home slot.
+    // The runtime cares about this.
+    // MOV64mr %rdx, 16(%rsp)
+    unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16)
+        .addReg(Establisher)
+        .setMIFlag(MachineInstr::FrameSetup);
+    MBB.addLiveIn(Establisher);
+  }
+
   if (HasFP) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
@@ -739,7 +1036,11 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     // Get the offset of the stack slot for the EBP register, which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
     // Update the frame offset adjustment.
-    MFI->setOffsetAdjustment(-NumBytes);
+    if (!IsFunclet)
+      MFI->setOffsetAdjustment(-NumBytes);
+    else
+      assert(MFI->getOffsetAdjustment() == -(int)NumBytes &&
+             "should calculate same local variable offset for funclets");
 
     // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
@@ -765,35 +1066,46 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    if (!IsWin64Prologue) {
+    if (!IsWin64Prologue && !IsFunclet) {
       // Update EBP with the new base value.
       BuildMI(MBB, MBBI, DL,
               TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
               FramePtr)
           .addReg(StackPtr)
           .setMIFlag(MachineInstr::FrameSetup);
+
+      if (NeedsDwarfCFI) {
+        // Mark effective beginning of when frame pointer becomes valid.
+        // Define the current CFA to use the EBP/RBP register.
+        unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+        BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfaRegister(
+                                    nullptr, DwarfFramePtr));
+      }
     }
 
-    if (NeedsDwarfCFI) {
-      // Mark effective beginning of when frame pointer becomes valid.
-      // Define the current CFA to use the EBP/RBP register.
-      unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
-      BuildCFI(MBB, MBBI, DL,
-               MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+    // Mark the FramePtr as live-in in every block. Don't do this again for
+    // funclet prologues.
+    if (!IsFunclet) {
+      for (MachineBasicBlock &EveryMBB : MF)
+        EveryMBB.addLiveIn(MachineFramePtr);
     }
-
-    // Mark the FramePtr as live-in in every block.
-    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
-      I->addLiveIn(MachineFramePtr);
   } else {
+    assert(!IsFunclet && "funclets without FPs not yet implemented");
     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
   }
 
+  // For EH funclets, only allocate enough space for outgoing calls. Save the
+  // NumBytes value that we would've used for the parent frame.
+  unsigned ParentFrameNumBytes = NumBytes;
+  if (IsFunclet)
+    NumBytes = getWinEHFuncletFrameSize(MF);
+
   // Skip the callee-saved push instructions.
   bool PushedRegs = false;
   int StackOffset = 2 * stackGrowth;
 
   while (MBBI != MBB.end() &&
+         MBBI->getFlag(MachineInstr::FrameSetup) &&
          (MBBI->getOpcode() == X86::PUSH32r ||
           MBBI->getOpcode() == X86::PUSH64r)) {
     PushedRegs = true;
@@ -818,9 +1130,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
   // Don't do this for Win64, it needs to realign the stack after the prologue.
-  if (!IsWin64Prologue && TRI->needsStackRealignment(MF)) {
+  if (!IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
-    BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+    BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
   }
 
   // If there is an SUB32ri of ESP immediately before this instruction, merge
@@ -839,7 +1151,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // increments is necessary to ensure that the guard pages used by the OS
   // virtual memory manager are allocated in correct sequence.
   uint64_t AlignedNumBytes = NumBytes;
-  if (IsWin64Prologue && TRI->needsStackRealignment(MF))
+  if (IsWin64Prologue && !IsFunclet && TRI->needsStackRealignment(MF))
     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
     // Check whether EAX is livein for this function.
@@ -876,26 +1188,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
       // We'll also use 4 already allocated bytes for EAX.
       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
-        .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
-        .setMIFlag(MachineInstr::FrameSetup);
+          .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
+          .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    // Save a pointer to the MI where we set AX.
-    MachineBasicBlock::iterator SetRAX = MBBI;
-    --SetRAX;
-
     // Call __chkstk, __chkstk_ms, or __alloca.
-    emitStackProbeCall(MF, MBB, MBBI, DL);
-
-    // Apply the frame setup flag to all inserted instrs.
-    for (; SetRAX != MBBI; ++SetRAX)
-      SetRAX->setFlag(MachineInstr::FrameSetup);
+    emitStackProbe(MF, MBB, MBBI, DL, true);
 
     if (isEAXAlive) {
       // Restore EAX
-      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
-                                              X86::EAX),
-                                      StackPtr, false, NumBytes - 4);
+      MachineInstr *MI =
+          addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX),
+                       StackPtr, false, NumBytes - 4);
       MI->setFlag(MachineInstr::FrameSetup);
       MBB.insert(MBBI, MI);
     }
@@ -909,19 +1213,72 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
         .setMIFlag(MachineInstr::FrameSetup);
 
   int SEHFrameOffset = 0;
+  unsigned SPOrEstablisher;
+  if (IsFunclet) {
+    if (IsClrFunclet) {
+      // The establisher parameter passed to a CLR funclet is actually a pointer
+      // to the (mostly empty) frame of its nearest enclosing funclet; we have
+      // to find the root function establisher frame by loading the PSPSym from
+      // the intermediate frame.
+      unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+      MachinePointerInfo NoInfo;
+      MBB.addLiveIn(Establisher);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher),
+                   Establisher, false, PSPSlotOffset)
+          .addMemOperand(MF.getMachineMemOperand(
+              NoInfo, MachineMemOperand::MOLoad, SlotSize, SlotSize));
+      ;
+      // Save the root establisher back into the current funclet's (mostly
+      // empty) frame, in case a sub-funclet or the GC needs it.
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr,
+                   false, PSPSlotOffset)
+          .addReg(Establisher)
+          .addMemOperand(
+              MF.getMachineMemOperand(NoInfo, MachineMemOperand::MOStore |
+                                                  MachineMemOperand::MOVolatile,
+                                      SlotSize, SlotSize));
+    }
+    SPOrEstablisher = Establisher;
+  } else {
+    SPOrEstablisher = StackPtr;
+  }
+
   if (IsWin64Prologue && HasFP) {
-    SEHFrameOffset = calculateSetFPREG(NumBytes);
+    // Set RBP to a small fixed offset from RSP. In the funclet case, we base
+    // this calculation on the incoming establisher, which holds the value of
+    // RSP from the parent frame at the end of the prologue.
+    SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes);
     if (SEHFrameOffset)
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
-                   StackPtr, false, SEHFrameOffset);
+                   SPOrEstablisher, false, SEHFrameOffset);
     else
-      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr)
+          .addReg(SPOrEstablisher);
 
-    if (NeedsWinCFI)
+    // If this is not a funclet, emit the CFI describing our frame pointer.
+    if (NeedsWinCFI && !IsFunclet) {
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
           .setMIFlag(MachineInstr::FrameSetup);
+      if (isAsynchronousEHPersonality(Personality))
+        MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset;
+    }
+  } else if (IsFunclet && STI.is32Bit()) {
+    // Reset EBP / ESI to something good for funclets.
+    MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL);
+    // If we're a catch funclet, we can be returned to via catchret. Save ESP
+    // into the registration node so that the runtime will restore it for us.
+    if (!MBB.isCleanupFuncletEntry()) {
+      assert(Personality == EHPersonality::MSVC_CXX);
+      unsigned FrameReg;
+      int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex;
+      int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg);
+      // ESP is the first field, so no extra displacement is needed.
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg,
+                   false, EHRegOffset)
+          .addReg(X86::ESP);
+    }
   }
 
   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
@@ -932,7 +1289,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       int FI;
       if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
         if (X86::FR64RegClass.contains(Reg)) {
-          int Offset = getFrameIndexOffset(MF, FI);
+          unsigned IgnoredFrameReg;
+          int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
           Offset += SEHFrameOffset;
 
           BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
@@ -948,14 +1306,33 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
         .setMIFlag(MachineInstr::FrameSetup);
 
+  if (FnHasClrFunclet && !IsFunclet) {
+    // Save the so-called Initial-SP (i.e. the value of the stack pointer
+    // immediately after the prolog)  into the PSPSlot so that funclets
+    // and the GC can recover it.
+    unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF);
+    auto PSPInfo = MachinePointerInfo::getFixedStack(
+        MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx);
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false,
+                 PSPSlotOffset)
+        .addReg(StackPtr)
+        .addMemOperand(MF.getMachineMemOperand(
+            PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+            SlotSize, SlotSize));
+  }
+
   // Realign stack after we spilled callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
   // Win64 requires aligning the stack after the prologue.
   if (IsWin64Prologue && TRI->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
-    BuildStackAlignAND(MBB, MBBI, DL, MaxAlign);
+    BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign);
   }
 
+  // We already dealt with stack realignment and funclets above.
+  if (IsFunclet && STI.is32Bit())
+    return;
+
   // If we need a base pointer, set it up here. It's whatever the value
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
@@ -964,7 +1341,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     // Update the base pointer with the current stack pointer.
     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
-      .addReg(StackPtr)
+      .addReg(SPOrEstablisher)
       .setMIFlag(MachineInstr::FrameSetup);
     if (X86FI->getRestoreBasePointer()) {
       // Stash value of base pointer.  Saving RSP instead of EBP shortens
@@ -972,18 +1349,21 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
                    FramePtr, true, X86FI->getRestoreBasePointerOffset())
-        .addReg(StackPtr)
+        .addReg(SPOrEstablisher)
         .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    if (X86FI->getHasSEHFramePtrSave()) {
+    if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) {
       // Stash the value of the frame pointer relative to the base pointer for
       // Win32 EH. This supports Win32 EH, which does the inverse of the above:
       // it recovers the frame pointer from the base pointer rather than the
       // other way around.
       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
-      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), BasePtr, true,
-                   getFrameIndexOffset(MF, X86FI->getSEHFramePtrSaveIndex()))
+      unsigned UsedReg;
+      int Offset =
+          getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+      assert(UsedReg == BasePtr);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset)
           .addReg(FramePtr)
           .setMIFlag(MachineInstr::FrameSetup);
     }
@@ -1015,6 +1395,69 @@ bool X86FrameLowering::canUseLEAForSPInEpilogue(
   return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF);
 }
 
+static bool isFuncletReturnInstr(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case X86::CATCHRET:
+  case X86::CLEANUPRET:
+    return true;
+  default:
+    return false;
+  }
+  llvm_unreachable("impossible");
+}
+
+// CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
+// stack. It holds a pointer to the bottom of the root function frame.  The
+// establisher frame pointer passed to a nested funclet may point to the
+// (mostly empty) frame of its parent funclet, but it will need to find
+// the frame of the root function to access locals.  To facilitate this,
+// every funclet copies the pointer to the bottom of the root function
+// frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
+// same offset for the PSPSym in the root function frame that's used in the
+// funclets' frames allows each funclet to dynamically accept any ancestor
+// frame as its establisher argument (the runtime doesn't guarantee the
+// immediate parent for some reason lost to history), and also allows the GC,
+// which uses the PSPSym for some bookkeeping, to find it in any funclet's
+// frame with only a single offset reported for the entire method.
+unsigned
+X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const {
+  const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo();
+  // getFrameIndexReferenceFromSP has an out ref parameter for the stack
+  // pointer register; pass a dummy that we ignore
+  unsigned SPReg;
+  int Offset = getFrameIndexReferenceFromSP(MF, Info.PSPSymFrameIdx, SPReg);
+  assert(Offset >= 0);
+  return static_cast<unsigned>(Offset);
+}
+
+unsigned
+X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
+  // This is the size of the pushed CSRs.
+  unsigned CSSize =
+      MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+  // This is the amount of stack a funclet needs to allocate.
+  unsigned UsedSize;
+  EHPersonality Personality =
+      classifyEHPersonality(MF.getFunction()->getPersonalityFn());
+  if (Personality == EHPersonality::CoreCLR) {
+    // CLR funclets need to hold enough space to include the PSPSym, at the
+    // same offset from the stack pointer (immediately after the prolog) as it
+    // resides at in the main function.
+    UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
+  } else {
+    // Other funclets just need enough stack for outgoing call arguments.
+    UsedSize = MF.getFrameInfo()->getMaxCallFrameSize();
+  }
+  // RBP is not included in the callee saved register block. After pushing RBP,
+  // everything is 16 byte aligned. Everything we allocate before an outgoing
+  // call must also be 16 byte aligned.
+  unsigned FrameSizeMinusRBP =
+      RoundUpToAlignment(CSSize + UsedSize, getStackAlignment());
+  // Subtract out the size of the callee saved registers. This is how much stack
+  // each funclet will allocate.
+  return FrameSizeMinusRBP - CSSize;
+}
+
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1027,12 +1470,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   const bool Is64BitILP32 = STI.isTarget64BitILP32();
   unsigned FramePtr = TRI->getFrameRegister(MF);
   unsigned MachineFramePtr =
-      Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
-                   : FramePtr;
+      Is64BitILP32 ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
 
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool NeedsWinCFI =
       IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
+  bool IsFunclet = isFuncletReturnInstr(MBBI);
+  MachineBasicBlock *TargetMBB = nullptr;
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
@@ -1040,7 +1484,27 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  if (hasFP(MF)) {
+  if (MBBI->getOpcode() == X86::CATCHRET) {
+    // SEH shouldn't use catchret.
+    assert(!isAsynchronousEHPersonality(
+               classifyEHPersonality(MF.getFunction()->getPersonalityFn())) &&
+           "SEH should not use CATCHRET");
+
+    NumBytes = getWinEHFuncletFrameSize(MF);
+    assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+    TargetMBB = MBBI->getOperand(0).getMBB();
+
+    // Pop EBP.
+    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+            MachineFramePtr)
+        .setMIFlag(MachineInstr::FrameDestroy);
+  } else if (MBBI->getOpcode() == X86::CLEANUPRET) {
+    NumBytes = getWinEHFuncletFrameSize(MF);
+    assert(hasFP(MF) && "EH funclets without FP not yet implemented");
+    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+            MachineFramePtr)
+        .setMIFlag(MachineInstr::FrameDestroy);
+  } else if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
     NumBytes = FrameSize - CSSize;
@@ -1052,7 +1516,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Pop EBP.
     BuildMI(MBB, MBBI, DL,
-            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
+            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr)
+        .setMIFlag(MachineInstr::FrameDestroy);
   } else {
     NumBytes = StackSize - CSSize;
   }
@@ -1063,26 +1528,50 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     MachineBasicBlock::iterator PI = std::prev(MBBI);
     unsigned Opc = PI->getOpcode();
 
-    if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
-        !PI->isTerminator())
+    if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+        (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+        Opc != X86::DBG_VALUE && !PI->isTerminator())
       break;
 
     --MBBI;
   }
   MachineBasicBlock::iterator FirstCSPop = MBBI;
 
+  if (TargetMBB) {
+    // Fill EAX/RAX with the address of the target block.
+    unsigned ReturnReg = STI.is64Bit() ? X86::RAX : X86::EAX;
+    if (STI.is64Bit()) {
+      // LEA64r TargetMBB(%rip), %rax
+      BuildMI(MBB, FirstCSPop, DL, TII.get(X86::LEA64r), ReturnReg)
+          .addReg(X86::RIP)
+          .addImm(0)
+          .addReg(0)
+          .addMBB(TargetMBB)
+          .addReg(0);
+    } else {
+      // MOV32ri $TargetMBB, %eax
+      BuildMI(MBB, FirstCSPop, DL, TII.get(X86::MOV32ri), ReturnReg)
+          .addMBB(TargetMBB);
+    }
+    // Record that we've taken the address of TargetMBB and no longer just
+    // reference it in a terminator.
+    TargetMBB->setHasAddressTaken();
+  }
+
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
 
   // If there is an ADD32ri or SUB32ri of ESP immediately before this
   // instruction, merge the two instructions.
   if (NumBytes || MFI->hasVarSizedObjects())
-    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+    NumBytes += mergeSPUpdates(MBB, MBBI, true);
 
   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   // slot before popping them off! Same applies for the case, when stack was
-  // realigned.
-  if (TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
+  // realigned. Don't do this if this was a funclet epilogue, since the funclets
+  // will not do realignment or dynamic stack allocation.
+  if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) &&
+      !IsFunclet) {
     if (TRI->needsStackRealignment(MF))
       MBBI = FirstCSPop;
     unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
@@ -1134,9 +1623,24 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 }
 
-int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
-                                          int FI) const {
+// NOTE: this only has a subset of the full frame index logic. In
+// particular, the FI < 0 and AfterFPPop logic is handled in
+// X86RegisterInfo::eliminateFrameIndex, but not here. Possibly
+// (probably?) it should be moved into here.
+int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                             unsigned &FrameReg) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // We can't calculate offset from frame pointer if the stack is realigned,
+  // so enforce usage of stack/base pointer.  The base pointer is used when we
+  // have dynamic allocas in addition to dynamic realignment.
+  if (TRI->hasBasePointer(MF))
+    FrameReg = TRI->getBaseRegister();
+  else if (TRI->needsStackRealignment(MF))
+    FrameReg = TRI->getStackRegister();
+  else
+    FrameReg = TRI->getFrameRegister(MF);
+
   // Offset will hold the offset from the stack pointer at function entry to the
   // object.
   // We need to factor in additional offsets applied during the prologue to the
@@ -1207,48 +1711,62 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   return Offset + FPDelta;
 }
 
-int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
-                                             unsigned &FrameReg) const {
-  // We can't calculate offset from frame pointer if the stack is realigned,
-  // so enforce usage of stack/base pointer.  The base pointer is used when we
-  // have dynamic allocas in addition to dynamic realignment.
-  if (TRI->hasBasePointer(MF))
-    FrameReg = TRI->getBaseRegister();
-  else if (TRI->needsStackRealignment(MF))
-    FrameReg = TRI->getStackRegister();
-  else
-    FrameReg = TRI->getFrameRegister(MF);
-  return getFrameIndexOffset(MF, FI);
-}
-
-// Simplified from getFrameIndexOffset keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
+// Simplified from getFrameIndexReference keeping only StackPointer cases
+int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+                                                   int FI,
+                                                   unsigned &FrameReg) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   // Does not include any dynamic realign.
   const uint64_t StackSize = MFI->getStackSize();
   {
 #ifndef NDEBUG
-    // Note: LLVM arranges the stack as:
-    // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
-    //      > "Stack Slots" (<--SP)
-    // We can always address StackSlots from RSP.  We can usually (unless
-    // needsStackRealignment) address CSRs from RSP, but sometimes need to
-    // address them from RBP.  FixedObjects can be placed anywhere in the stack
-    // frame depending on their specific requirements (i.e. we can actually
-    // refer to arguments to the function which are stored in the *callers*
-    // frame).  As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
-    // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
+    // LLVM arranges the stack as follows:
+    //   ...
+    //   ARG2
+    //   ARG1
+    //   RETADDR
+    //   PUSH RBP   <-- RBP points here
+    //   PUSH CSRs
+    //   ~~~~~~~    <-- possible stack realignment (non-win64)
+    //   ...
+    //   STACK OBJECTS
+    //   ...        <-- RSP after prologue points here
+    //   ~~~~~~~    <-- possible stack realignment (win64)
+    //
+    // if (hasVarSizedObjects()):
+    //   ...        <-- "base pointer" (ESI/RBX) points here
+    //   DYNAMIC ALLOCAS
+    //   ...        <-- RSP points here
+    //
+    // Case 1: In the simple case of no stack realignment and no dynamic
+    // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
+    // with fixed offsets from RSP.
+    //
+    // Case 2: In the case of stack realignment with no dynamic allocas, fixed
+    // stack objects are addressed with RBP and regular stack objects with RSP.
+    //
+    // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
+    // to address stack arguments for outgoing calls and nothing else. The "base
+    // pointer" points to local variables, and RBP points to fixed objects.
+    //
+    // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
+    // answer we give is relative to the SP after the prologue, and not the
+    // SP in the middle of the function.
 
-    assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
+    assert((!MFI->isFixedObjectIndex(FI) || !TRI->needsStackRealignment(MF) ||
+            STI.isTargetWin64()) &&
+           "offset from fixed object to SP is not static");
 
-    // We don't handle tail calls, and shouldn't be seeing them
-    // either.
+    // We don't handle tail calls, and shouldn't be seeing them either.
     int TailCallReturnAddrDelta =
         MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
     assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
 #endif
   }
 
+  // Fill in FrameReg output argument.
+  FrameReg = TRI->getStackRegister();
+
   // This is how the math works out:
   //
   //  %rsp grows (i.e. gets lower) left to right. Each box below is
@@ -1280,15 +1798,6 @@ int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int F
 
   return Offset + StackSize;
 }
-// Simplified from getFrameIndexReference keeping only StackPointer cases
-int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
-                                                   int FI,
-                                                   unsigned &FrameReg) const {
-  assert(!TRI->hasBasePointer(MF) && "we don't handle this case");
-
-  FrameReg = TRI->getStackRegister();
-  return getFrameIndexOffsetFromSP(MF, FI);
-}
 
 bool X86FrameLowering::assignCalleeSavedSpillSlots(
     MachineFunction &MF, const TargetRegisterInfo *TRI,
@@ -1358,6 +1867,11 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
     const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
 
+  // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
+  // for us, and there are no XMM CSRs on Win32.
+  if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows())
+    return true;
+
   // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
@@ -1399,6 +1913,22 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
+  if (isFuncletReturnInstr(MI) && STI.isOSWindows()) {
+    // Don't restore CSRs in 32-bit EH funclets. Matches
+    // spillCalleeSavedRegisters.
+    if (STI.is32Bit())
+      return true;
+    // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
+    // funclets. emitEpilogue transforms these to normal jumps.
+    if (MI->getOpcode() == X86::CATCHRET) {
+      const Function *Func = MBB.getParent()->getFunction();
+      bool IsSEH = isAsynchronousEHPersonality(
+          classifyEHPersonality(Func->getPersonalityFn()));
+      if (IsSEH)
+        return true;
+    }
+  }
+
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   // Reload XMMs from stack frame.
@@ -1420,7 +1950,8 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
         !X86::GR32RegClass.contains(Reg))
       continue;
 
-    BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+    BuildMI(MBB, MI, DL, TII.get(Opc), Reg)
+        .setMIFlag(MachineInstr::FrameDestroy);
   }
   return true;
 }
@@ -1450,8 +1981,16 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
   }
 
   // Spill the BasePtr if it's used.
-  if (TRI->hasBasePointer(MF))
+  if (TRI->hasBasePointer(MF)) {
     SavedRegs.set(TRI->getBaseRegister());
+
+    // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
+    if (MF.getMMI().hasEHFunclets()) {
+      int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize);
+      X86FI->setHasSEHFramePtrSave(true);
+      X86FI->setSEHFramePtrSaveIndex(FI);
+    }
+  }
 }
 
 static bool
@@ -1545,11 +2084,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
   // allocMBB needs to be last (terminating) instruction.
 
-  for (MachineBasicBlock::livein_iterator i = PrologueMBB.livein_begin(),
-                                          e = PrologueMBB.livein_end();
-       i != e; i++) {
-    allocMBB->addLiveIn(*i);
-    checkMBB->addLiveIn(*i);
+  for (const auto &LI : PrologueMBB.liveins()) {
+    allocMBB->addLiveIn(LI);
+    checkMBB->addLiveIn(LI);
   }
 
   if (IsNested)
@@ -1682,8 +2219,6 @@ void X86FrameLowering::adjustForSegmentedStacks(
       .addImm(StackSize);
     BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
       .addImm(X86FI->getArgumentStackSize());
-    MF.getRegInfo().setPhysRegUsed(Reg10);
-    MF.getRegInfo().setPhysRegUsed(Reg11);
   } else {
     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
       .addImm(X86FI->getArgumentStackSize());
@@ -1821,11 +2356,9 @@ void X86FrameLowering::adjustForHiPEPrologue(
     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
 
-    for (MachineBasicBlock::livein_iterator I = PrologueMBB.livein_begin(),
-                                            E = PrologueMBB.livein_end();
-         I != E; I++) {
-      stackCheckMBB->addLiveIn(*I);
-      incStackMBB->addLiveIn(*I);
+    for (const auto &LI : PrologueMBB.liveins()) {
+      stackCheckMBB->addLiveIn(LI);
+      incStackMBB->addLiveIn(LI);
     }
 
     MF.push_front(incStackMBB);
@@ -1870,16 +2403,84 @@ void X86FrameLowering::adjustForHiPEPrologue(
                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
     BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
 
-    stackCheckMBB->addSuccessor(&PrologueMBB, 99);
-    stackCheckMBB->addSuccessor(incStackMBB, 1);
-    incStackMBB->addSuccessor(&PrologueMBB, 99);
-    incStackMBB->addSuccessor(incStackMBB, 1);
+    stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100});
+    stackCheckMBB->addSuccessor(incStackMBB, {1, 100});
+    incStackMBB->addSuccessor(&PrologueMBB, {99, 100});
+    incStackMBB->addSuccessor(incStackMBB, {1, 100});
   }
 #ifdef XDEBUG
   MF.verify();
 #endif
 }
 
+bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MBBI, DebugLoc DL, int Offset) const {
+
+  if (Offset <= 0)
+    return false;
+
+  if (Offset % SlotSize)
+    return false;
+
+  int NumPops = Offset / SlotSize;
+  // This is only worth it if we have at most 2 pops.
+  if (NumPops != 1 && NumPops != 2)
+    return false;
+
+  // Handle only the trivial case where the adjustment directly follows
+  // a call. This is the most common one, anyway.
+  if (MBBI == MBB.begin())
+    return false;
+  MachineBasicBlock::iterator Prev = std::prev(MBBI);
+  if (!Prev->isCall() || !Prev->getOperand(1).isRegMask())
+    return false;
+
+  unsigned Regs[2];
+  unsigned FoundRegs = 0;
+
+  auto RegMask = Prev->getOperand(1);
+
+  auto &RegClass =
+      Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass;
+  // Try to find up to NumPops free registers.
+  for (auto Candidate : RegClass) {
+
+    // Poor man's liveness:
+    // Since we're immediately after a call, any register that is clobbered
+    // by the call and not defined by it can be considered dead.
+    if (!RegMask.clobbersPhysReg(Candidate))
+      continue;
+
+    bool IsDef = false;
+    for (const MachineOperand &MO : Prev->implicit_operands()) {
+      if (MO.isReg() && MO.isDef() && MO.getReg() == Candidate) {
+        IsDef = true;
+        break;
+      }
+    }
+
+    if (IsDef)
+      continue;
+
+    Regs[FoundRegs++] = Candidate;
+    if (FoundRegs == (unsigned)NumPops)
+      break;
+  }
+
+  if (FoundRegs == 0)
+    return false;
+
+  // If we found only one free register, but need two, reuse the same one twice.
+  while (FoundRegs < (unsigned)NumPops)
+    Regs[FoundRegs++] = Regs[0];
+
+  for (int i = 0; i < NumPops; ++i)
+    BuildMI(MBB, MBBI, DL, 
+            TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]);
+
+  return true;
+}
+
 void X86FrameLowering::
 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I) const {
@@ -1895,8 +2496,6 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // If the stack pointer can be changed after prologue, turn the
     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
     // adjcallstackdown instruction into 'add ESP, <amt>'
-    if (Amount == 0)
-      return;
 
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
@@ -1904,15 +2503,68 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     unsigned StackAlign = getStackAlignment();
     Amount = RoundUpToAlignment(Amount, StackAlign);
 
+    MachineModuleInfo &MMI = MF.getMMI();
+    const Function *Fn = MF.getFunction();
+    bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+    bool DwarfCFI = !WindowsCFI && 
+                    (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
+
+    // If we have any exception handlers in this function, and we adjust
+    // the SP before calls, we may need to indicate this to the unwinder
+    // using GNU_ARGS_SIZE. Note that this may be necessary even when
+    // Amount == 0, because the preceding function may have set a non-0
+    // GNU_ARGS_SIZE.
+    // TODO: We don't need to reset this between subsequent functions,
+    // if it didn't change.
+    bool HasDwarfEHHandlers = !WindowsCFI &&
+                              !MF.getMMI().getLandingPads().empty();
+
+    if (HasDwarfEHHandlers && !isDestroy &&
+        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
+      BuildCFI(MBB, I, DL,
+               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
+
+    if (Amount == 0)
+      return;
+
     // Factor out the amount that gets handled inside the sequence
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
 
+    // TODO: This is needed only if we require precise CFA.
+    // If this is a callee-pop calling convention, emit a CFA adjust for
+    // the amount the callee popped.
+    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
+      BuildCFI(MBB, I, DL, 
+               MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
+
     if (Amount) {
       // Add Amount to SP to destroy a frame, and subtract to setup.
       int Offset = isDestroy ? Amount : -Amount;
-      BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+
+      if (!(Fn->optForMinSize() && 
+            adjustStackWithPops(MBB, I, DL, Offset)))
+        BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
     }
+
+    if (DwarfCFI && !hasFP(MF)) {
+      // If we don't have FP, but need to generate unwind information,
+      // we need to set the correct CFA offset after the stack adjustment.
+      // How much we adjust the CFA offset depends on whether we're emitting
+      // CFI only for EH purposes or for debugging. EH only requires the CFA
+      // offset to be correct at each call site, while for debugging we want
+      // it to be more precise.
+      int CFAOffset = Amount;
+      // TODO: When not using precise CFA, we also need to adjust for the
+      // InternalAmt here.
+
+      if (CFAOffset) {
+        CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+        BuildCFI(MBB, I, DL, 
+                 MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+      }
+    }
+
     return;
   }
 
@@ -1933,12 +2585,136 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
   assert(MBB.getParent() && "Block is not attached to a function!");
 
+  // Win64 has strict requirements in terms of epilogue and we are
+  // not taking a chance at messing with them.
+  // I.e., unless this block is already an exit block, we can't use
+  // it as an epilogue.
+  if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
+    return false;
+
   if (canUseLEAForSPInEpilogue(*MBB.getParent()))
     return true;
 
   // If we cannot use LEA to adjust SP, we may need to use ADD, which
-  // clobbers the EFLAGS. Check that none of the terminators reads the
-  // EFLAGS, and if one uses it, conservatively assume this is not
+  // clobbers the EFLAGS. Check that we do not need to preserve it,
+  // otherwise, conservatively assume this is not
   // safe to insert the epilogue here.
-  return !terminatorsNeedFlagsAsInput(MBB);
+  return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
+}
+
+bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+  // If we may need to emit frameless compact unwind information, give
+  // up as this is currently broken: PR25614.
+  return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF);
+}
+
+MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    DebugLoc DL, bool RestoreSP) const {
+  assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
+  assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32");
+  assert(STI.is32Bit() && !Uses64BitFramePtr &&
+         "restoring EBP/ESI on non-32-bit target");
+
+  MachineFunction &MF = *MBB.getParent();
+  unsigned FramePtr = TRI->getFrameRegister(MF);
+  unsigned BasePtr = TRI->getBaseRegister();
+  WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // FIXME: Don't set FrameSetup flag in catchret case.
+
+  int FI = FuncInfo.EHRegNodeFrameIndex;
+  int EHRegSize = MFI->getObjectSize(FI);
+
+  if (RestoreSP) {
+    // MOV32rm -EHRegSize(%ebp), %esp
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP),
+                 X86::EBP, true, -EHRegSize)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  unsigned UsedReg;
+  int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg);
+  int EndOffset = -EHRegOffset - EHRegSize;
+  FuncInfo.EHRegNodeEndOffset = EndOffset;
+
+  if (UsedReg == FramePtr) {
+    // ADD $offset, %ebp
+    unsigned ADDri = getADDriOpcode(false, EndOffset);
+    BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
+        .addReg(FramePtr)
+        .addImm(EndOffset)
+        .setMIFlag(MachineInstr::FrameSetup)
+        ->getOperand(3)
+        .setIsDead();
+    assert(EndOffset >= 0 &&
+           "end of registration object above normal EBP position!");
+  } else if (UsedReg == BasePtr) {
+    // LEA offset(%ebp), %esi
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr),
+                 FramePtr, false, EndOffset)
+        .setMIFlag(MachineInstr::FrameSetup);
+    // MOV32rm SavedEBPOffset(%esi), %ebp
+    assert(X86FI->getHasSEHFramePtrSave());
+    int Offset =
+        getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg);
+    assert(UsedReg == BasePtr);
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr),
+                 UsedReg, true, Offset)
+        .setMIFlag(MachineInstr::FrameSetup);
+  } else {
+    llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
+  }
+  return MBBI;
+}
+
+unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const {
+  // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
+  unsigned Offset = 16;
+  // RBP is immediately pushed.
+  Offset += SlotSize;
+  // All callee-saved registers are then pushed.
+  Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize();
+  // Every funclet allocates enough stack space for the largest outgoing call.
+  Offset += getWinEHFuncletFrameSize(MF);
+  return Offset;
+}
+
+void X86FrameLowering::processFunctionBeforeFrameFinalized(
+    MachineFunction &MF, RegScavenger *RS) const {
+  // If this function isn't doing Win64-style C++ EH, we don't need to do
+  // anything.
+  const Function *Fn = MF.getFunction();
+  if (!STI.is64Bit() || !MF.getMMI().hasEHFunclets() ||
+      classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX)
+    return;
+
+  // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
+  // relative to RSP after the prologue.  Find the offset of the last fixed
+  // object, so that we can allocate a slot immediately following it. If there
+  // were no fixed objects, use offset -SlotSize, which is immediately after the
+  // return address. Fixed objects have negative frame indices.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int64_t MinFixedObjOffset = -SlotSize;
+  for (int I = MFI->getObjectIndexBegin(); I < 0; ++I)
+    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI->getObjectOffset(I));
+
+  int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
+  int UnwindHelpFI =
+      MFI->CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+  MF.getWinEHFuncInfo()->UnwindHelpFrameIdx = UnwindHelpFI;
+
+  // Store -2 into UnwindHelp on function entry. We have to scan forwards past
+  // other frame setup instructions.
+  MachineBasicBlock &MBB = MF.front();
+  auto MBBI = MBB.begin();
+  while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
+    ++MBBI;
+
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+  addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)),
+                    UnwindHelpFI)
+      .addImm(-2);
 }
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index 495cfcd1c3f7..3ab41b4a5789 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -47,11 +47,17 @@ public:
 
   unsigned StackPtr;
 
-  /// Emit a call to the target's stack probe function. This is required for all
+  /// Emit target stack probe code. This is required for all
   /// large stack allocations on Windows. The caller is required to materialize
-  /// the number of bytes to probe in RAX/EAX.
-  void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MBBI, DebugLoc DL) const;
+  /// the number of bytes to probe in RAX/EAX. Returns instruction just
+  /// after the expansion.
+  MachineInstr *emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                               bool InProlog) const;
+
+  /// Replace a StackProbe inline-stub with the actual probe code inline.
+  void inlineStackProbe(MachineFunction &MF,
+                        MachineBasicBlock &PrologMBB) const override;
 
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
@@ -91,11 +97,9 @@ public:
   bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override;
   bool needsFrameIndexResolution(const MachineFunction &MF) const override;
 
-  int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
   int getFrameIndexReference(const MachineFunction &MF, int FI,
                              unsigned &FrameReg) const override;
 
-  int getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const;
   int getFrameIndexReferenceFromSP(const MachineFunction &MF, int FI,
                                    unsigned &FrameReg) const override;
 
@@ -103,6 +107,11 @@ public:
                                  MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI) const override;
 
+  unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+                                           RegScavenger *RS) const override;
+
   /// Check the instruction before/after the passed instruction. If
   /// it is an ADD/SUB/LEA instruction it is deleted argument and the
   /// stack adjustment is returned as a positive value for ADD/LEA and
@@ -125,7 +134,9 @@ public:
   /// \p MBB will be correctly handled by the target.
   bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
 
-private:
+  /// Returns true if the target will correctly handle shrink wrapping.
+  bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
   /// convertArgMovsToPushes - This method tries to convert a call sequence
   /// that uses sub and mov instructions to put the argument onto the stack
   /// into a series of pushes.
@@ -135,22 +146,56 @@ private:
                               MachineBasicBlock::iterator I, 
                               uint64_t Amount) const;
 
-  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
-
   /// Wraps up getting a CFI index and building a MachineInstr for it.
   void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 DebugLoc DL, MCCFIInstruction CFIInst) const;
 
+  /// Sets up EBP and optionally ESI based on the incoming EBP value.  Only
+  /// needed for 32-bit. Used in funclet prologues and at catchret destinations.
+  MachineBasicBlock::iterator
+  restoreWin32EHStackPointers(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                              bool RestoreSP = false) const;
+
+private:
+  uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
+
+  /// Emit target stack probe as a call to a helper function
+  MachineInstr *emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   DebugLoc DL, bool InProlog) const;
+
+  /// Emit target stack probe as an inline sequence.
+  MachineInstr *emitStackProbeInline(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     DebugLoc DL, bool InProlog) const;
+
+  /// Emit a stub to later inline the target stack probe.
+  MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
+                                         MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI,
+                                         DebugLoc DL, bool InProlog) const;
+
   /// Aligns the stack pointer by ANDing it with -MaxAlign.
   void BuildStackAlignAND(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
-                          uint64_t MaxAlign) const;
+                          unsigned Reg, uint64_t MaxAlign) const;
+
+  /// Make small positive stack adjustments using POPs.
+  bool adjustStackWithPops(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI, DebugLoc DL,
+                           int Offset) const;
 
   /// Adjusts the stack pointer using LEA, SUB, or ADD.
   MachineInstrBuilder BuildStackAdjustment(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MBBI,
                                            DebugLoc DL, int64_t Offset,
                                            bool InEpilogue) const;
+
+  unsigned getPSPSlotOffsetFromSP(const MachineFunction &MF) const;
+
+  unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index d5351d25d6ed..4414e478b99b 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -46,9 +46,8 @@ STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
 //===----------------------------------------------------------------------===//
 
 namespace {
-  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
-  /// SDValue's instead of register numbers for the leaves of the matched
-  /// tree.
+  /// This corresponds to X86AddressMode, but uses SDValue's instead of register
+  /// numbers for the leaves of the matched tree.
   struct X86ISelAddressMode {
     enum {
       RegBase,
@@ -87,8 +86,7 @@ namespace {
              IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
     }
 
-    /// isRIPRelative - Return true if this addressing mode is already RIP
-    /// relative.
+    /// Return true if this addressing mode is already RIP-relative.
     bool isRIPRelative() const {
       if (BaseType != RegBase) return false;
       if (RegisterSDNode *RegNode =
@@ -147,21 +145,25 @@ namespace {
 
 namespace {
   //===--------------------------------------------------------------------===//
-  /// ISel - X86 specific code to select X86 machine instructions for
+  /// ISel - X86-specific code to select X86 machine instructions for
   /// SelectionDAG operations.
   ///
   class X86DAGToDAGISel final : public SelectionDAGISel {
-    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
     const X86Subtarget *Subtarget;
 
-    /// OptForSize - If true, selector should try to optimize for code size
-    /// instead of performance.
+    /// If true, selector should try to optimize for code size instead of
+    /// performance.
     bool OptForSize;
 
+    /// If true, selector should try to optimize for minimum code size.
+    bool OptForMinSize;
+
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
-        : SelectionDAGISel(tm, OptLevel), OptForSize(false) {}
+        : SelectionDAGISel(tm, OptLevel), OptForSize(false),
+          OptForMinSize(false) {}
 
     const char *getPassName() const override {
       return "X86 DAG->DAG Instruction Selection";
@@ -184,8 +186,7 @@ namespace {
       return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
     }
 
-    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
-    // sign extended field.
+    // True if the 64-bit immediate fits in a 32-bit sign-extended field.
     inline bool i64immSExt32(SDNode *N) const {
       uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
       return (int64_t)v == (int32_t)v;
@@ -196,50 +197,50 @@ namespace {
 
   private:
     SDNode *Select(SDNode *N) override;
-    SDNode *SelectGather(SDNode *N, unsigned Opc);
-    SDNode *SelectAtomicLoadArith(SDNode *Node, MVT NVT);
+    SDNode *selectGather(SDNode *N, unsigned Opc);
+    SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
 
-    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
-    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
-    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
-    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
-    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+    bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
+    bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+    bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
+    bool matchAddress(SDValue N, X86ISelAddressMode &AM);
+    bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
+    bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                  unsigned Depth);
-    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
-    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+    bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
+    bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                     SDValue &Scale, SDValue &Index, SDValue &Disp,
                     SDValue &Segment);
-    bool SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+    bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
                           SDValue &Scale, SDValue &Index, SDValue &Disp,
                           SDValue &Segment);
-    bool SelectMOV64Imm32(SDValue N, SDValue &Imm);
-    bool SelectLEAAddr(SDValue N, SDValue &Base,
+    bool selectMOV64Imm32(SDValue N, SDValue &Imm);
+    bool selectLEAAddr(SDValue N, SDValue &Base,
                        SDValue &Scale, SDValue &Index, SDValue &Disp,
                        SDValue &Segment);
-    bool SelectLEA64_32Addr(SDValue N, SDValue &Base,
+    bool selectLEA64_32Addr(SDValue N, SDValue &Base,
                             SDValue &Scale, SDValue &Index, SDValue &Disp,
                             SDValue &Segment);
-    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+    bool selectTLSADDRAddr(SDValue N, SDValue &Base,
                            SDValue &Scale, SDValue &Index, SDValue &Disp,
                            SDValue &Segment);
-    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+    bool selectScalarSSELoad(SDNode *Root, SDValue N,
                              SDValue &Base, SDValue &Scale,
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &NodeWithChain);
 
-    bool TryFoldLoad(SDNode *P, SDValue N,
+    bool tryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
                      SDValue &Index, SDValue &Disp,
                      SDValue &Segment);
 
-    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
-    /// inline asm expressions.
+    /// Implement addressing mode selection for inline asm expressions.
     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
                                       unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override;
 
-    void EmitSpecialCodeForMain();
+    void emitSpecialCodeForMain();
 
     inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
                                    SDValue &Base, SDValue &Scale,
@@ -252,7 +253,7 @@ namespace {
                  : AM.Base_Reg;
       Scale = getI8Imm(AM.Scale, DL);
       Index = AM.IndexReg;
-      // These are 32-bit even in 64-bit mode since RIP relative offset
+      // These are 32-bit even in 64-bit mode since RIP-relative offset
       // is 32-bit.
       if (AM.GV)
         Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
@@ -283,32 +284,105 @@ namespace {
         Segment = CurDAG->getRegister(0, MVT::i32);
     }
 
-    /// getI8Imm - Return a target constant with the specified value, of type
-    /// i8.
+    // Utility function to determine whether we should avoid selecting
+    // immediate forms of instructions for better code size or not.
+    // At a high level, we'd like to avoid such instructions when
+    // we have similar constants used within the same basic block
+    // that can be kept in a register.
+    //
+    bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
+      uint32_t UseCount = 0;
+
+      // Do not want to hoist if we're not optimizing for size.
+      // TODO: We'd like to remove this restriction.
+      // See the comment in X86InstrInfo.td for more info.
+      if (!OptForSize)
+        return false;
+
+      // Walk all the users of the immediate.
+      for (SDNode::use_iterator UI = N->use_begin(),
+           UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
+
+        SDNode *User = *UI;
+
+        // This user is already selected. Count it as a legitimate use and
+        // move on.
+        if (User->isMachineOpcode()) {
+          UseCount++;
+          continue;
+        }
+
+        // We want to count stores of immediates as real uses.
+        if (User->getOpcode() == ISD::STORE &&
+            User->getOperand(1).getNode() == N) {
+          UseCount++;
+          continue;
+        }
+
+        // We don't currently match users that have > 2 operands (except
+        // for stores, which are handled above)
+        // Those instruction won't match in ISEL, for now, and would
+        // be counted incorrectly.
+        // This may change in the future as we add additional instruction
+        // types.
+        if (User->getNumOperands() != 2)
+          continue;
+        
+        // Immediates that are used for offsets as part of stack
+        // manipulation should be left alone. These are typically
+        // used to indicate SP offsets for argument passing and
+        // will get pulled into stores/pushes (implicitly).
+        if (User->getOpcode() == X86ISD::ADD ||
+            User->getOpcode() == ISD::ADD    ||
+            User->getOpcode() == X86ISD::SUB ||
+            User->getOpcode() == ISD::SUB) {
+
+          // Find the other operand of the add/sub.
+          SDValue OtherOp = User->getOperand(0);
+          if (OtherOp.getNode() == N)
+            OtherOp = User->getOperand(1);
+
+          // Don't count if the other operand is SP.
+          RegisterSDNode *RegNode;
+          if (OtherOp->getOpcode() == ISD::CopyFromReg &&
+              (RegNode = dyn_cast_or_null<RegisterSDNode>(
+                 OtherOp->getOperand(1).getNode())))
+            if ((RegNode->getReg() == X86::ESP) ||
+                (RegNode->getReg() == X86::RSP))
+              continue;
+        }
+
+        // ... otherwise, count this and move on.
+        UseCount++;
+      }
+
+      // If we have more than 1 use, then recommend for hoisting.
+      return (UseCount > 1);
+    }
+
+    /// Return a target constant with the specified value of type i8.
     inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
       return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
     }
 
-    /// getI32Imm - Return a target constant with the specified value, of type
-    /// i32.
+    /// Return a target constant with the specified value, of type i32.
     inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
       return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
     }
 
-    /// getGlobalBaseReg - Return an SDNode that returns the value of
-    /// the global base register. Output instructions required to
-    /// initialize the global base register, if necessary.
-    ///
+    /// Return an SDNode that returns the value of the global base register.
+    /// Output instructions required to initialize the global base register,
+    /// if necessary.
     SDNode *getGlobalBaseReg();
 
-    /// getTargetMachine - Return a reference to the TargetMachine, casted
-    /// to the target-specific type.
+    /// Return a reference to the TargetMachine, casted to the target-specific
+    /// type.
     const X86TargetMachine &getTargetMachine() const {
       return static_cast<const X86TargetMachine &>(TM);
     }
 
-    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
-    /// to the target-specific type.
+    /// Return a reference to the TargetInstrInfo, casted to the target-specific
+    /// type.
     const X86InstrInfo *getInstrInfo() const {
       return Subtarget->getInstrInfo();
     }
@@ -386,9 +460,9 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
   return true;
 }
 
-/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// Replace the original chain operand of the call with
 /// load's chain operand and move load below the call's chain operand.
-static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
                                SDValue Call, SDValue OrigChain) {
   SmallVector<SDValue, 8> Ops;
   SDValue Chain = OrigChain.getOperand(0);
@@ -418,7 +492,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
   CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
 }
 
-/// isCalleeLoad - Return true if call address is a load and it can be
+/// Return true if call address is a load and it can be
 /// moved below CALLSEQ_START and the chains leading up to the call.
 /// Return the CALLSEQ_START by reference as a second output.
 /// In the case of a tail call, there isn't a callseq node between the call
@@ -461,12 +535,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
 }
 
 void X86DAGToDAGISel::PreprocessISelDAG() {
-  // OptForSize is used in pattern predicates that isel is matching.
-  OptForSize = MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  // OptFor[Min]Size are used in pattern predicates that isel is matching.
+  OptForSize = MF->getFunction()->optForSize();
+  OptForMinSize = MF->getFunction()->optForMinSize();
+  assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
 
   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
        E = CurDAG->allnodes_end(); I != E; ) {
-    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+    SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
 
     if (OptLevel != CodeGenOpt::None &&
         // Only does this when target favors doesn't favor register indirect
@@ -500,7 +576,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
       SDValue Load  = N->getOperand(1);
       if (!isCalleeLoad(Load, Chain, HasCallSeq))
         continue;
-      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+      moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
       ++NumLoadMoved;
       continue;
     }
@@ -577,9 +653,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
 }
 
 
-/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
-/// the main function.
-void X86DAGToDAGISel::EmitSpecialCodeForMain() {
+/// Emit any code that needs to be executed only in the main function.
+void X86DAGToDAGISel::emitSpecialCodeForMain() {
   if (Subtarget->isTargetCygMing()) {
     TargetLowering::ArgListTy Args;
     auto &DL = CurDAG->getDataLayout();
@@ -599,7 +674,7 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
   // If this is main, emit special code for main.
   if (const Function *Fn = MF->getFunction())
     if (Fn->hasExternalLinkage() && Fn->getName() == "main")
-      EmitSpecialCodeForMain();
+      emitSpecialCodeForMain();
 }
 
 static bool isDispSafeForFrameIndex(int64_t Val) {
@@ -612,7 +687,7 @@ static bool isDispSafeForFrameIndex(int64_t Val) {
   return isInt<31>(Val);
 }
 
-bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
                                             X86ISelAddressMode &AM) {
   // Cannot combine ExternalSymbol displacements with integer offsets.
   if (Offset != 0 && (AM.ES || AM.MCSym))
@@ -634,7 +709,7 @@ bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
 
 }
 
-bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
 
   // load gs:0 -> GS segment register.
@@ -658,11 +733,10 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   return true;
 }
 
-/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
-/// into an addressing mode.  These wrap things that will resolve down into a
-/// symbol reference.  If no match is possible, this returns true, otherwise it
-/// returns false.
-bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+/// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
+/// mode. These wrap things that will resolve down into a symbol reference.
+/// If no match is possible, this returns true, otherwise it returns false.
+bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
   // If the addressing mode already has a symbol as the displacement, we can
   // never match another symbol.
   if (AM.hasSymbolicDisplacement())
@@ -685,7 +759,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       X86ISelAddressMode Backup = AM;
       AM.GV = G->getGlobal();
       AM.SymbolFlags = G->getTargetFlags();
-      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+      if (foldOffsetIntoAddress(G->getOffset(), AM)) {
         AM = Backup;
         return true;
       }
@@ -694,7 +768,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
       AM.SymbolFlags = CP->getTargetFlags();
-      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+      if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
         AM = Backup;
         return true;
       }
@@ -710,7 +784,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       X86ISelAddressMode Backup = AM;
       AM.BlockAddr = BA->getBlockAddress();
       AM.SymbolFlags = BA->getTargetFlags();
-      if (FoldOffsetIntoAddress(BA->getOffset(), AM)) {
+      if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
         AM = Backup;
         return true;
       }
@@ -758,11 +832,10 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
   return true;
 }
 
-/// MatchAddress - Add the specified node to the specified addressing mode,
-/// returning true if it cannot be done.  This just pattern matches for the
-/// addressing mode.
-bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
-  if (MatchAddressRecursively(N, AM, 0))
+/// Add the specified node to the specified addressing mode, returning true if
+/// it cannot be done. This just pattern matches for the addressing mode.
+bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
+  if (matchAddressRecursively(N, AM, 0))
     return true;
 
   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -790,15 +863,49 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
+bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
+                               unsigned Depth) {
+  // Add an artificial use to this node so that we can keep track of
+  // it if it gets CSE'd with a different node.
+  HandleSDNode Handle(N);
+
+  X86ISelAddressMode Backup = AM;
+  if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+      !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+    return false;
+  AM = Backup;
+
+  // Try again after commuting the operands.
+  if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
+      !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+    return false;
+  AM = Backup;
+
+  // If we couldn't fold both operands into the address at the same time,
+  // see if we can just put each operand into a register and fold at least
+  // the add.
+  if (AM.BaseType == X86ISelAddressMode::RegBase &&
+      !AM.Base_Reg.getNode() &&
+      !AM.IndexReg.getNode()) {
+    N = Handle.getValue();
+    AM.Base_Reg = N.getOperand(0);
+    AM.IndexReg = N.getOperand(1);
+    AM.Scale = 1;
+    return false;
+  }
+  N = Handle.getValue();
+  return true;
+}
+
 // Insert a node into the DAG at least before the Pos node's position. This
 // will reposition the node as needed, and will assign it a node ID that is <=
 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
 // IDs! The selection DAG must no longer depend on their uniqueness when this
 // is used.
-static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
+static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
   if (N.getNode()->getNodeId() == -1 ||
       N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
-    DAG.RepositionNode(Pos.getNode(), N.getNode());
+    DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
     N.getNode()->setNodeId(Pos.getNode()->getNodeId());
   }
 }
@@ -807,7 +914,7 @@ static void InsertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
 // safe. This allows us to convert the shift and and into an h-register
 // extract and a scaled index. Returns false if the simplification is
 // performed.
-static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
                                       uint64_t Mask,
                                       SDValue Shift, SDValue X,
                                       X86ISelAddressMode &AM) {
@@ -835,12 +942,12 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
   // these nodes. We continually insert before 'N' in sequence as this is
   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
   // hierarchy left to express.
-  InsertDAGNode(DAG, N, Eight);
-  InsertDAGNode(DAG, N, Srl);
-  InsertDAGNode(DAG, N, NewMask);
-  InsertDAGNode(DAG, N, And);
-  InsertDAGNode(DAG, N, ShlCount);
-  InsertDAGNode(DAG, N, Shl);
+  insertDAGNode(DAG, N, Eight);
+  insertDAGNode(DAG, N, Srl);
+  insertDAGNode(DAG, N, NewMask);
+  insertDAGNode(DAG, N, And);
+  insertDAGNode(DAG, N, ShlCount);
+  insertDAGNode(DAG, N, Shl);
   DAG.ReplaceAllUsesWith(N, Shl);
   AM.IndexReg = And;
   AM.Scale = (1 << ScaleLog);
@@ -850,7 +957,7 @@ static bool FoldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
 // allows us to fold the shift into this addressing mode. Returns false if the
 // transform succeeded.
-static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
+static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
                                         uint64_t Mask,
                                         SDValue Shift, SDValue X,
                                         X86ISelAddressMode &AM) {
@@ -880,9 +987,9 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
   // these nodes. We continually insert before 'N' in sequence as this is
   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
   // hierarchy left to express.
-  InsertDAGNode(DAG, N, NewMask);
-  InsertDAGNode(DAG, N, NewAnd);
-  InsertDAGNode(DAG, N, NewShift);
+  insertDAGNode(DAG, N, NewMask);
+  insertDAGNode(DAG, N, NewAnd);
+  insertDAGNode(DAG, N, NewShift);
   DAG.ReplaceAllUsesWith(N, NewShift);
 
   AM.Scale = 1 << ShiftAmt;
@@ -917,7 +1024,7 @@ static bool FoldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
 // Note that this function assumes the mask is provided as a mask *after* the
 // value is shifted. The input chain may or may not match that, but computing
 // such a mask is trivial.
-static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
+static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
                                     uint64_t Mask,
                                     SDValue Shift, SDValue X,
                                     X86ISelAddressMode &AM) {
@@ -973,7 +1080,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
     assert(X.getValueType() != VT);
     // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
     SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
-    InsertDAGNode(DAG, N, NewX);
+    insertDAGNode(DAG, N, NewX);
     X = NewX;
   }
   SDLoc DL(N);
@@ -987,10 +1094,10 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   // these nodes. We continually insert before 'N' in sequence as this is
   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
   // hierarchy left to express.
-  InsertDAGNode(DAG, N, NewSRLAmt);
-  InsertDAGNode(DAG, N, NewSRL);
-  InsertDAGNode(DAG, N, NewSHLAmt);
-  InsertDAGNode(DAG, N, NewSHL);
+  insertDAGNode(DAG, N, NewSRLAmt);
+  insertDAGNode(DAG, N, NewSRL);
+  insertDAGNode(DAG, N, NewSHLAmt);
+  insertDAGNode(DAG, N, NewSHL);
   DAG.ReplaceAllUsesWith(N, NewSHL);
 
   AM.Scale = 1 << AMShiftAmt;
@@ -998,7 +1105,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   return false;
 }
 
-bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
   SDLoc dl(N);
   DEBUG({
@@ -1007,7 +1114,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     });
   // Limit recursion.
   if (Depth > 5)
-    return MatchAddressBase(N, AM);
+    return matchAddressBase(N, AM);
 
   // If this is already a %rip relative address, we can only merge immediates
   // into it.  Instead of handling this in every case, we handle it here.
@@ -1020,7 +1127,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
       return true;
 
     if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
-      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
+      if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
         return false;
     return true;
   }
@@ -1038,19 +1145,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   }
   case ISD::Constant: {
     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    if (!FoldOffsetIntoAddress(Val, AM))
+    if (!foldOffsetIntoAddress(Val, AM))
       return false;
     break;
   }
 
   case X86ISD::Wrapper:
   case X86ISD::WrapperRIP:
-    if (!MatchWrapper(N, AM))
+    if (!matchWrapper(N, AM))
       return false;
     break;
 
   case ISD::LOAD:
-    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+    if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
       return false;
     break;
 
@@ -1087,7 +1194,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
           uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
-          if (!FoldOffsetIntoAddress(Disp, AM))
+          if (!foldOffsetIntoAddress(Disp, AM))
             return false;
         }
 
@@ -1119,7 +1226,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
     // Try to fold the mask and shift into the scale, and return false if we
     // succeed.
-    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
+    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
       return false;
     break;
   }
@@ -1153,7 +1260,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
             ConstantSDNode *AddVal =
               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
             uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
-            if (FoldOffsetIntoAddress(Disp, AM))
+            if (foldOffsetIntoAddress(Disp, AM))
               Reg = N.getNode()->getOperand(0);
           } else {
             Reg = N.getNode()->getOperand(0);
@@ -1179,7 +1286,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
 
     // Test if the LHS of the sub can be folded.
     X86ISelAddressMode Backup = AM;
-    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+    if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
       AM = Backup;
       break;
     }
@@ -1227,56 +1334,26 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     AM.Scale = 1;
 
     // Insert the new nodes into the topological ordering.
-    InsertDAGNode(*CurDAG, N, Zero);
-    InsertDAGNode(*CurDAG, N, Neg);
+    insertDAGNode(*CurDAG, N, Zero);
+    insertDAGNode(*CurDAG, N, Neg);
     return false;
   }
 
-  case ISD::ADD: {
-    // Add an artificial use to this node so that we can keep track of
-    // it if it gets CSE'd with a different node.
-    HandleSDNode Handle(N);
-
-    X86ISelAddressMode Backup = AM;
-    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
-        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+  case ISD::ADD:
+    if (!matchAdd(N, AM, Depth))
       return false;
-    AM = Backup;
-
-    // Try again after commuting the operands.
-    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
-        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
-      return false;
-    AM = Backup;
-
-    // If we couldn't fold both operands into the address at the same time,
-    // see if we can just put each operand into a register and fold at least
-    // the add.
-    if (AM.BaseType == X86ISelAddressMode::RegBase &&
-        !AM.Base_Reg.getNode() &&
-        !AM.IndexReg.getNode()) {
-      N = Handle.getValue();
-      AM.Base_Reg = N.getOperand(0);
-      AM.IndexReg = N.getOperand(1);
-      AM.Scale = 1;
-      return false;
-    }
-    N = Handle.getValue();
     break;
-  }
 
   case ISD::OR:
-    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
-    if (CurDAG->isBaseWithConstantOffset(N)) {
-      X86ISelAddressMode Backup = AM;
-      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-
-      // Start with the LHS as an addr mode.
-      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
-          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
-        return false;
-      AM = Backup;
-    }
+    // We want to look through a transform in InstCombine and DAGCombiner that
+    // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
+    // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
+    // An 'lea' can then be used to match the shift (multiply) and add:
+    // and $1, %esi
+    // lea (%rsi, %rdi, 8), %rax
+    if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
+        !matchAdd(N, AM, Depth))
+      return false;
     break;
 
   case ISD::AND: {
@@ -1299,27 +1376,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     uint64_t Mask = N.getConstantOperandVal(1);
 
     // Try to fold the mask and shift into an extract and scale.
-    if (!FoldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
+    if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
       return false;
 
     // Try to fold the mask and shift directly into the scale.
-    if (!FoldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
+    if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
       return false;
 
     // Try to swap the mask and shift to place shifts which can be done as
     // a scale on the outside of the mask.
-    if (!FoldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
+    if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
       return false;
     break;
   }
   }
 
-  return MatchAddressBase(N, AM);
+  return matchAddressBase(N, AM);
 }
 
-/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// Helper for MatchAddress. Add the specified node to the
 /// specified addressing mode without any further recursion.
-bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   // Is the base register already occupied?
   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
     // If so, check to see if the scale index register is set.
@@ -1339,7 +1416,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   return false;
 }
 
-bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                       SDValue &Scale, SDValue &Index,
                                       SDValue &Disp, SDValue &Segment) {
 
@@ -1362,7 +1439,7 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
 
   // If Base is 0, the whole address is in index and the Scale is 1
   if (isa<ConstantSDNode>(Base)) {
-    assert(dyn_cast<ConstantSDNode>(Base)->isNullValue() &&
+    assert(cast<ConstantSDNode>(Base)->isNullValue() &&
            "Unexpected base in gather/scatter");
     Scale = getI8Imm(1, DL);
     Base = CurDAG->getRegister(0, MVT::i32);
@@ -1375,14 +1452,14 @@ bool X86DAGToDAGISel::SelectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
   return true;
 }
 
-/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// Returns true if it is able to pattern match an addressing mode.
 /// It returns the operands which make up the maximal addressing mode it can
 /// match by reference.
 ///
 /// Parent is the parent node of the addr operand that is being matched.  It
 /// is always a load, store, atomic node, or null.  It is only null when
 /// checking memory operands for inline asm nodes.
-bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
                                  SDValue &Scale, SDValue &Index,
                                  SDValue &Disp, SDValue &Segment) {
   X86ISelAddressMode AM;
@@ -1404,7 +1481,7 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
       AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
   }
 
-  if (MatchAddress(N, AM))
+  if (matchAddress(N, AM))
     return false;
 
   MVT VT = N.getSimpleValueType();
@@ -1420,14 +1497,14 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
   return true;
 }
 
-/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
-/// match a load whose top elements are either undef or zeros.  The load flavor
-/// is derived from the type of N, which is either v4f32 or v2f64.
+/// Match a scalar SSE load. In particular, we want to match a load whose top
+/// elements are either undef or zeros. The load flavor is derived from the
+/// type of N, which is either v4f32 or v2f64.
 ///
 /// We also return:
 ///   PatternChainNode: this is the matched node that has a chain input and
 ///   output.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
                                           SDValue N, SDValue &Base,
                                           SDValue &Scale, SDValue &Index,
                                           SDValue &Disp, SDValue &Segment,
@@ -1439,7 +1516,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
         IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
-      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+      if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
         return false;
       return true;
     }
@@ -1457,7 +1534,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
       IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
     // Okay, this is a zero extending load.  Fold it.
     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
-    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+    if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
       return false;
     PatternNodeWithChain = SDValue(LD, 0);
     return true;
@@ -1466,7 +1543,7 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
 }
 
 
-bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
+bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
   if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
     uint64_t ImmVal = CN->getZExtValue();
     if ((uint32_t)ImmVal != (uint64_t)ImmVal)
@@ -1495,10 +1572,10 @@ bool X86DAGToDAGISel::SelectMOV64Imm32(SDValue N, SDValue &Imm) {
   return TM.getCodeModel() == CodeModel::Small;
 }
 
-bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
+bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
                                          SDValue &Scale, SDValue &Index,
                                          SDValue &Disp, SDValue &Segment) {
-  if (!SelectLEAAddr(N, Base, Scale, Index, Disp, Segment))
+  if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
     return false;
 
   SDLoc DL(N);
@@ -1533,9 +1610,9 @@ bool X86DAGToDAGISel::SelectLEA64_32Addr(SDValue N, SDValue &Base,
   return true;
 }
 
-/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// Calls SelectAddr and determines if the maximal addressing
 /// mode it matches can be cost effectively emitted as an LEA instruction.
-bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
                                     SDValue &Base, SDValue &Scale,
                                     SDValue &Index, SDValue &Disp,
                                     SDValue &Segment) {
@@ -1546,7 +1623,7 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   SDValue Copy = AM.Segment;
   SDValue T = CurDAG->getRegister(0, MVT::i32);
   AM.Segment = T;
-  if (MatchAddress(N, AM))
+  if (matchAddress(N, AM))
     return false;
   assert (T == AM.Segment);
   AM.Segment = Copy;
@@ -1572,13 +1649,12 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
     Complexity++;
 
   // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
-  // to a LEA. This is determined with some expermentation but is by no means
+  // to a LEA. This is determined with some experimentation but is by no means
   // optimal (especially for code size consideration). LEA is nice because of
   // its three-address nature. Tweak the cost function again when we can run
   // convertToThreeAddress() at register allocation time.
   if (AM.hasSymbolicDisplacement()) {
-    // For X86-64, we should always use lea to materialize RIP relative
-    // addresses.
+    // For X86-64, always use LEA to materialize RIP-relative addresses.
     if (Subtarget->is64Bit())
       Complexity = 4;
     else
@@ -1596,8 +1672,8 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
   return true;
 }
 
-/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
-bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+/// This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
                                         SDValue &Scale, SDValue &Index,
                                         SDValue &Disp, SDValue &Segment) {
   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
@@ -1621,7 +1697,7 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
 }
 
 
-bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
                                   SDValue &Index, SDValue &Disp,
                                   SDValue &Segment) {
@@ -1630,14 +1706,13 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
       !IsLegalToFold(N, P, P, OptLevel))
     return false;
 
-  return SelectAddr(N.getNode(),
+  return selectAddr(N.getNode(),
                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
 }
 
-/// getGlobalBaseReg - Return an SDNode that returns the value of
-/// the global base register. Output instructions required to
-/// initialize the global base register, if necessary.
-///
+/// Return an SDNode that returns the value of the global base register.
+/// Output instructions required to initialize the global base register,
+/// if necessary.
 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
   auto &DL = MF->getDataLayout();
@@ -1828,7 +1903,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
   return Val;
 }
 
-SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
+SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
   if (Node->hasAnyUseOfValue(0))
     return nullptr;
 
@@ -1841,7 +1916,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
   SDValue Ptr = Node->getOperand(1);
   SDValue Val = Node->getOperand(2);
   SDValue Base, Scale, Index, Disp, Segment;
-  if (!SelectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
+  if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
     return nullptr;
 
   // Which index into the table.
@@ -1933,9 +2008,9 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
   return CurDAG->getMergeValues(RetVals, dl).getNode();
 }
 
-/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
-/// any uses which require the SF or OF bits to be accurate.
-static bool HasNoSignedComparisonUses(SDNode *N) {
+/// Test whether the given X86ISD::CMP node has any uses which require the SF
+/// or OF bits to be accurate.
+static bool hasNoSignedComparisonUses(SDNode *N) {
   // Examine each user of the node.
   for (SDNode::use_iterator UI = N->use_begin(),
          UE = N->use_end(); UI != UE; ++UI) {
@@ -1995,9 +2070,8 @@ static bool HasNoSignedComparisonUses(SDNode *N) {
   return true;
 }
 
-/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode
-/// is suitable for doing the {load; increment or decrement; store} to modify
-/// transformation.
+/// Check whether or not the chain ending in StoreNode is suitable for doing
+/// the {load; increment or decrement; store} to modify transformation.
 static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
                                 SDValue StoredVal, SelectionDAG *CurDAG,
                                 LoadSDNode* &LoadNode, SDValue &InputChain) {
@@ -2081,8 +2155,8 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
   return true;
 }
 
-/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory
-/// increment or decrement. Opc should be X86ISD::DEC or X86ISD::INC.
+/// Get the appropriate X86 opcode for an in-memory increment or decrement.
+/// Opc should be X86ISD::DEC or X86ISD::INC.
 static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
   if (Opc == X86ISD::DEC) {
     if (LdVT == MVT::i64) return X86::DEC64m;
@@ -2099,9 +2173,8 @@ static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
   llvm_unreachable("unrecognized size for LdVT");
 }
 
-/// SelectGather - Customized ISel for GATHER operations.
-///
-SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
+/// Customized ISel for GATHER operations.
+SDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) {
   // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
   SDValue Chain = Node->getOperand(0);
   SDValue VSrc = Node->getOperand(2);
@@ -2148,6 +2221,27 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
 
   switch (Opcode) {
   default: break;
+  case ISD::BRIND: {
+    if (Subtarget->isTargetNaCl())
+      // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
+      // leave the instruction alone.
+      break;
+    if (Subtarget->isTarget64BitILP32()) {
+      // Converts a 32-bit register to a 64-bit, zero-extended version of
+      // it. This is needed because x86-64 can do many things, but jmp %r32
+      // ain't one of them.
+      const SDValue &Target = Node->getOperand(1);
+      assert(Target.getSimpleValueType() == llvm::MVT::i32);
+      SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
+      SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
+                                      Node->getOperand(0), ZextTarget);
+      ReplaceUses(SDValue(Node, 0), Brind);
+      SelectCode(ZextTarget.getNode());
+      SelectCode(Brind.getNode());
+      return nullptr;
+    }
+    break;
+  }
   case ISD::INTRINSIC_W_CHAIN: {
     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
     switch (IntNo) {
@@ -2190,7 +2284,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       case Intrinsic::x86_avx2_gather_q_d:      Opc = X86::VPGATHERQDrm;  break;
       case Intrinsic::x86_avx2_gather_q_d_256:  Opc = X86::VPGATHERQDYrm; break;
       }
-      SDNode *RetVal = SelectGather(Node, Opc);
+      SDNode *RetVal = selectGather(Node, Opc);
       if (RetVal)
         // We already called ReplaceUses inside SelectGather.
         return nullptr;
@@ -2217,7 +2311,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   case ISD::ATOMIC_LOAD_AND:
   case ISD::ATOMIC_LOAD_OR:
   case ISD::ATOMIC_LOAD_ADD: {
-    SDNode *RetVal = SelectAtomicLoadArith(Node, NVT);
+    SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
     if (RetVal)
       return RetVal;
     break;
@@ -2404,10 +2498,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
     // Multiply is commmutative.
     if (!foldedLoad) {
-      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
       if (foldedLoad)
         std::swap(N0, N1);
     }
@@ -2549,7 +2643,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     }
 
     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
-    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
     bool signBitIsZero = CurDAG->SignBitIsZero(N0);
 
     SDValue InFlag;
@@ -2557,7 +2651,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       // Special case for div8, just use a move with zero extension to AX to
       // clear the upper 8 bits (AH).
       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
-      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+      if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
         Move =
           SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
@@ -2692,7 +2786,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
     SDValue N1 = Node->getOperand(1);
 
     if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
-        HasNoSignedComparisonUses(Node))
+        hasNoSignedComparisonUses(Node))
       N0 = N0.getOperand(0);
 
     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
@@ -2709,7 +2803,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       // For example, convert "testl %eax, $8" to "testb %al, $8"
       if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
           (!(C->getZExtValue() & 0x80) ||
-           HasNoSignedComparisonUses(Node))) {
+           hasNoSignedComparisonUses(Node))) {
         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
         SDValue Reg = N0.getNode()->getOperand(0);
 
@@ -2743,7 +2837,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       // For example, "testl %eax, $2048" to "testb %ah, $8".
       if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
           (!(C->getZExtValue() & 0x8000) ||
-           HasNoSignedComparisonUses(Node))) {
+           hasNoSignedComparisonUses(Node))) {
         // Shift the immediate right by 8 bits.
         SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
                                                        dl, MVT::i8);
@@ -2781,7 +2875,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
           N0.getValueType() != MVT::i16 &&
           (!(C->getZExtValue() & 0x8000) ||
-           HasNoSignedComparisonUses(Node))) {
+           hasNoSignedComparisonUses(Node))) {
         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
                                                 MVT::i16);
         SDValue Reg = N0.getNode()->getOperand(0);
@@ -2804,7 +2898,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
           N0.getValueType() == MVT::i64 &&
           (!(C->getZExtValue() & 0x80000000) ||
-           HasNoSignedComparisonUses(Node))) {
+           hasNoSignedComparisonUses(Node))) {
         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
                                                 MVT::i32);
         SDValue Reg = N0.getNode()->getOperand(0);
@@ -2854,7 +2948,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
       break;
 
     SDValue Base, Scale, Index, Disp, Segment;
-    if (!SelectAddr(LoadNode, LoadNode->getBasePtr(),
+    if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
                     Base, Scale, Index, Disp, Segment))
       break;
 
@@ -2903,7 +2997,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   case InlineAsm::Constraint_v: // not offsetable    ??
   case InlineAsm::Constraint_m: // memory
   case InlineAsm::Constraint_X:
-    if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
+    if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
       return true;
     break;
   }
@@ -2916,9 +3010,8 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   return false;
 }
 
-/// createX86ISelDag - This pass converts a legalized DAG into a
-/// X86-specific DAG, ready for instruction scheduling.
-///
+/// This pass converts a legalized DAG into a X86-specific DAG,
+/// ready for instruction scheduling.
 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
                                      CodeGenOpt::Level OptLevel) {
   return new X86DAGToDAGISel(TM, OptLevel);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0f29b514146c..0927c2f4fa50 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/IntrinsicLowering.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -67,19 +68,14 @@ static cl::opt<bool> ExperimentalVectorWideningLegalization(
              "rather than promotion."),
     cl::Hidden);
 
-// Forward declarations.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
-                       SDValue V2);
-
 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
                                      const X86Subtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
   X86ScalarSSEf64 = Subtarget->hasSSE2();
   X86ScalarSSEf32 = Subtarget->hasSSE1();
-  TD = TM.getDataLayout();
+  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
 
   // Set up the TargetLowering object.
-  static const MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
 
   // X86 is weird. It always uses i8 for shift amounts and setcc results.
   setBooleanContents(ZeroOrOneBooleanContent);
@@ -118,13 +114,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
-
-    // The _ftol2 runtime function has an unusual calling conv, which
-    // is modeled by a special pseudo-instruction.
-    setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
-    setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
-    setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
-    setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
   }
 
   if (Subtarget->isTargetDarwin()) {
@@ -175,14 +164,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
 
   if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
+    if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512())
+      // f32/f64 are legal, f80 is custom.
+      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
+    else
+      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
   } else if (!Subtarget->useSoftFloat()) {
     // We have an algorithm for SSE2->double, and we turn this into a
     // 64-bit FILD followed by conditional FADD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
     // We have an algorithm for SSE2, and we turn this into a 64-bit
-    // FILD for other targets.
+    // FILD or VCVTUSI2SS/SD for other targets.
     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
   }
 
@@ -206,23 +199,29 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
   }
 
-  // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
-  // are Legal, f80 is custom lowered.
-  setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
-  setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
-
   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
   // this operation.
   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
 
-  if (X86ScalarSSEf32) {
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
-    // f32 and f64 cases are Legal, f80 case is not
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+  if (!Subtarget->useSoftFloat()) {
+    // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
+    // are Legal, f80 is custom lowered.
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
+    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
+
+    if (X86ScalarSSEf32) {
+      setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
+      // f32 and f64 cases are Legal, f80 case is not
+      setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+    } else {
+      setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
+      setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+    }
   } else {
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
-    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Expand);
   }
 
   // Handle FP_TO_UINT by promoting the destination to a larger signed
@@ -232,8 +231,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
 
   if (Subtarget->is64Bit()) {
-    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
-    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
+    if (!Subtarget->useSoftFloat() && Subtarget->hasAVX512()) {
+      // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
+    } else {
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
+    }
   } else if (!Subtarget->useSoftFloat()) {
     // Since AVX is a superset of SSE3, only check for SSE here.
     if (Subtarget->hasSSE1() && !Subtarget->hasSSE3())
@@ -242,14 +247,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       // the optimal thing for SSE vs. the default expansion in the legalizer.
       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
     else
+      // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
       // With SSE3 we can use fisttpll to convert to a signed i64; without
       // SSE, we're stuck with a fistpll.
       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
-  }
 
-  if (isTargetFTOL()) {
-    // Use the _ftol2 runtime function, which has a pseudo-instruction
-    // to handle its weird calling convention.
     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
   }
 
@@ -274,8 +276,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // (low) operations are left as Legal, as there are single-result
   // instructions for this in x86. Using the two-result multiply instructions
   // when both high and low results are needed must be arranged by dagcombine.
-  for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
-    MVT VT = IntVTs[i];
+  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
     setOperationAction(ISD::SDIV, VT, Expand);
@@ -295,6 +296,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::BR_CC            , MVT::f32,   Expand);
   setOperationAction(ISD::BR_CC            , MVT::f64,   Expand);
   setOperationAction(ISD::BR_CC            , MVT::f80,   Expand);
+  setOperationAction(ISD::BR_CC            , MVT::f128,  Expand);
   setOperationAction(ISD::BR_CC            , MVT::i8,    Expand);
   setOperationAction(ISD::BR_CC            , MVT::i16,   Expand);
   setOperationAction(ISD::BR_CC            , MVT::i32,   Expand);
@@ -302,6 +304,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::SELECT_CC        , MVT::f32,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::f64,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::f80,   Expand);
+  setOperationAction(ISD::SELECT_CC        , MVT::f128,  Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::i8,    Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::i16,   Expand);
   setOperationAction(ISD::SELECT_CC        , MVT::i32,   Expand);
@@ -312,7 +315,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
-  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
+
+  if (Subtarget->is32Bit() && Subtarget->isTargetKnownWindowsMSVC()) {
+    // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
+    // is. We should promote the value to 64-bits to solve this.
+    // This is what the CRT headers do - `fmodf` is an inline header
+    // function casting to f64 and calling `fmod`.
+    setOperationAction(ISD::FREM           , MVT::f32  , Promote);
+  } else {
+    setOperationAction(ISD::FREM           , MVT::f32  , Expand);
+  }
+
   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
@@ -404,15 +417,21 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
   setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f128 , Custom);
   setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
   setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
   setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
   setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f128 , Custom);
+  setOperationAction(ISD::SETCCE          , MVT::i8   , Custom);
+  setOperationAction(ISD::SETCCE          , MVT::i16  , Custom);
+  setOperationAction(ISD::SETCCE          , MVT::i32  , Custom);
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
     setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
+    setOperationAction(ISD::SETCCE        , MVT::i64  , Custom);
   }
   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
@@ -456,8 +475,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
 
   // Expand certain atomics
-  for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) {
-    MVT VT = IntVTs[i];
+  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
@@ -473,13 +491,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
   }
 
-  if (Subtarget->is64Bit()) {
-    setExceptionPointerRegister(X86::RAX);
-    setExceptionSelectorRegister(X86::RDX);
-  } else {
-    setExceptionPointerRegister(X86::EAX);
-    setExceptionSelectorRegister(X86::EDX);
-  }
   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
 
@@ -492,8 +503,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
-  if (Subtarget->is64Bit() && !Subtarget->isTargetWin64()) {
-    // TargetInfo::X86_64ABIBuiltinVaList
+  if (Subtarget->is64Bit()) {
     setOperationAction(ISD::VAARG           , MVT::Other, Custom);
     setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
   } else {
@@ -505,7 +515,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
 
-  setOperationAction(ISD::DYNAMIC_STACKALLOC, getPointerTy(*TD), Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
 
   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
@@ -613,8 +623,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::FMA, MVT::f64, Expand);
   setOperationAction(ISD::FMA, MVT::f32, Expand);
 
-  // Long double always uses X87.
+  // Long double always uses X87, except f128 in MMX.
   if (!Subtarget->useSoftFloat()) {
+    if (Subtarget->is64Bit() && Subtarget->hasMMX()) {
+      addRegisterClass(MVT::f128, &X86::FR128RegClass);
+      ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
+      setOperationAction(ISD::FABS , MVT::f128, Custom);
+      setOperationAction(ISD::FNEG , MVT::f128, Custom);
+      setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
+    }
+
     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
@@ -846,15 +864,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTPOP,              MVT::v4i32, Custom);
     setOperationAction(ISD::CTPOP,              MVT::v2i64, Custom);
 
+    setOperationAction(ISD::CTTZ,               MVT::v16i8, Custom);
+    setOperationAction(ISD::CTTZ,               MVT::v8i16, Custom);
+    setOperationAction(ISD::CTTZ,               MVT::v4i32, Custom);
+    // ISD::CTTZ v2i64 - scalarization is faster.
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v16i8, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v8i16, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,    MVT::v4i32, Custom);
+    // ISD::CTTZ_ZERO_UNDEF v2i64 - scalarization is faster.
+
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
-    for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
-      // Do not attempt to custom lower non-power-of-2 vectors
-      if (!isPowerOf2_32(VT.getVectorNumElements()))
-        continue;
-      // Do not attempt to custom lower non-128-bit vectors
-      if (!VT.is128BitVector())
-        continue;
+    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
       setOperationAction(ISD::VSELECT,            VT, Custom);
@@ -892,13 +912,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     }
 
     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
-    for (int i = MVT::v16i8; i != MVT::v2i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
-
-      // Do not attempt to promote non-128-bit vectors
-      if (!VT.is128BitVector())
-        continue;
-
+    for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
       setOperationAction(ISD::AND,    VT, Promote);
       AddPromotedToType (ISD::AND,    VT, MVT::v2i64);
       setOperationAction(ISD::OR,     VT, Promote);
@@ -1036,6 +1050,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
   }
 
+  if (Subtarget->hasXOP()) {
+    setOperationAction(ISD::ROTL,              MVT::v16i8, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v8i16, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v4i32, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v2i64, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v32i8, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v16i16, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v8i32, Custom);
+    setOperationAction(ISD::ROTL,              MVT::v4i64, Custom);
+  }
+
   if (!Subtarget->useSoftFloat() && Subtarget->hasFp256()) {
     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
@@ -1126,7 +1151,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTPOP,             MVT::v8i32, Custom);
     setOperationAction(ISD::CTPOP,             MVT::v4i64, Custom);
 
-    if (Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()) {
+    setOperationAction(ISD::CTTZ,              MVT::v32i8, Custom);
+    setOperationAction(ISD::CTTZ,              MVT::v16i16, Custom);
+    setOperationAction(ISD::CTTZ,              MVT::v8i32, Custom);
+    setOperationAction(ISD::CTTZ,              MVT::v4i64, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v32i8, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v16i16, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v8i32, Custom);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF,   MVT::v4i64, Custom);
+
+    if (Subtarget->hasAnyFMA()) {
       setOperationAction(ISD::FMA,             MVT::v8f32, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f64, Legal);
       setOperationAction(ISD::FMA,             MVT::v4f32, Legal);
@@ -1202,6 +1236,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MUL,             MVT::v8i32, Custom);
       setOperationAction(ISD::MUL,             MVT::v16i16, Custom);
       setOperationAction(ISD::MUL,             MVT::v32i8, Custom);
+
+      setOperationAction(ISD::SMAX,            MVT::v32i8,  Custom);
+      setOperationAction(ISD::SMAX,            MVT::v16i16, Custom);
+      setOperationAction(ISD::SMAX,            MVT::v8i32,  Custom);
+      setOperationAction(ISD::UMAX,            MVT::v32i8,  Custom);
+      setOperationAction(ISD::UMAX,            MVT::v16i16, Custom);
+      setOperationAction(ISD::UMAX,            MVT::v8i32,  Custom);
+      setOperationAction(ISD::SMIN,            MVT::v32i8,  Custom);
+      setOperationAction(ISD::SMIN,            MVT::v16i16, Custom);
+      setOperationAction(ISD::SMIN,            MVT::v8i32,  Custom);
+      setOperationAction(ISD::UMIN,            MVT::v32i8,  Custom);
+      setOperationAction(ISD::UMIN,            MVT::v16i16, Custom);
+      setOperationAction(ISD::UMIN,            MVT::v8i32,  Custom);
     }
 
     // In the customized shift lowering, the legal cases in AVX2 will be
@@ -1243,15 +1290,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     if (Subtarget->hasInt256())
       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
 
-
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
-    for (int i = MVT::v32i8; i != MVT::v4i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
-
-      // Do not attempt to promote non-256-bit vectors
-      if (!VT.is256BitVector())
-        continue;
-
+    for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
       setOperationAction(ISD::AND,    VT, Promote);
       AddPromotedToType (ISD::AND,    VT, MVT::v4i64);
       setOperationAction(ISD::OR,     VT, Promote);
@@ -1293,6 +1333,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
+    setOperationAction(ISD::SELECT_CC,          MVT::i1,    Expand);
     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
     setOperationAction(ISD::AND,                MVT::i1,    Legal);
@@ -1311,6 +1352,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);
     setOperationAction(ISD::FNEG,               MVT::v16f32, Custom);
+    setOperationAction(ISD::FABS,               MVT::v16f32, Custom);
 
     setOperationAction(ISD::FADD,               MVT::v8f64, Legal);
     setOperationAction(ISD::FSUB,               MVT::v8f64, Legal);
@@ -1318,19 +1360,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);
     setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v8f64, Custom);
+    setOperationAction(ISD::FABS,               MVT::v8f64, Custom);
     setOperationAction(ISD::FMA,                MVT::v8f64, Legal);
     setOperationAction(ISD::FMA,                MVT::v16f32, Legal);
 
-    setOperationAction(ISD::FP_TO_SINT,         MVT::i32, Legal);
-    setOperationAction(ISD::FP_TO_UINT,         MVT::i32, Legal);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::i32, Legal);
-    setOperationAction(ISD::UINT_TO_FP,         MVT::i32, Legal);
-    if (Subtarget->is64Bit()) {
-      setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Legal);
-      setOperationAction(ISD::FP_TO_SINT,       MVT::i64, Legal);
-      setOperationAction(ISD::SINT_TO_FP,       MVT::i64, Legal);
-      setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Legal);
-    }
     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
@@ -1348,12 +1381,62 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
 
+    setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
+    setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
+    setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
+    setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
+    setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
+    if (Subtarget->hasVLX()){
+      setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
+      setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
+      setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
+      setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
+      setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
+
+      setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
+      setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
+      setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
+      setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
+      setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
+    } else {
+      setOperationAction(ISD::MLOAD,    MVT::v8i32, Custom);
+      setOperationAction(ISD::MLOAD,    MVT::v8f32, Custom);
+      setOperationAction(ISD::MSTORE,   MVT::v8i32, Custom);
+      setOperationAction(ISD::MSTORE,   MVT::v8f32, Custom);
+    }
     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i1,  Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v16i1, Custom);
     if (Subtarget->hasDQI()) {
-      setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
-      setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
+      setOperationAction(ISD::TRUNCATE,         MVT::v2i1, Custom);
+      setOperationAction(ISD::TRUNCATE,         MVT::v4i1, Custom);
+
+      setOperationAction(ISD::SINT_TO_FP,       MVT::v8i64, Legal);
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
+      setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
+      setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
+      if (Subtarget->hasVLX()) {
+        setOperationAction(ISD::SINT_TO_FP,    MVT::v4i64, Legal);
+        setOperationAction(ISD::SINT_TO_FP,    MVT::v2i64, Legal);
+        setOperationAction(ISD::UINT_TO_FP,    MVT::v4i64, Legal);
+        setOperationAction(ISD::UINT_TO_FP,    MVT::v2i64, Legal);
+        setOperationAction(ISD::FP_TO_SINT,    MVT::v4i64, Legal);
+        setOperationAction(ISD::FP_TO_SINT,    MVT::v2i64, Legal);
+        setOperationAction(ISD::FP_TO_UINT,    MVT::v4i64, Legal);
+        setOperationAction(ISD::FP_TO_UINT,    MVT::v2i64, Legal);
+      }
+    }
+    if (Subtarget->hasVLX()) {
+      setOperationAction(ISD::SINT_TO_FP,       MVT::v8i32, Legal);
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v8i32, Legal);
+      setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
+      setOperationAction(ISD::FP_TO_UINT,       MVT::v8i32, Legal);
+      setOperationAction(ISD::SINT_TO_FP,       MVT::v4i32, Legal);
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v4i32, Legal);
+      setOperationAction(ISD::FP_TO_SINT,       MVT::v4i32, Legal);
+      setOperationAction(ISD::FP_TO_UINT,       MVT::v4i32, Legal);
     }
     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
@@ -1386,7 +1469,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1, Legal);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1,   Custom);
 
     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
@@ -1395,6 +1478,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
 
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
@@ -1439,9 +1523,49 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::XOR,                MVT::v16i32, Legal);
 
     if (Subtarget->hasCDI()) {
-      setOperationAction(ISD::CTLZ,             MVT::v8i64, Legal);
+      setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
-    }
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i64,  Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i32, Expand);
+
+      setOperationAction(ISD::CTLZ,             MVT::v8i16,  Custom);
+      setOperationAction(ISD::CTLZ,             MVT::v16i8,  Custom);
+      setOperationAction(ISD::CTLZ,             MVT::v16i16, Custom);
+      setOperationAction(ISD::CTLZ,             MVT::v32i8,  Custom);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i16,  Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i8,  Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v16i16, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v32i8,  Expand);
+
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i64,  Custom);
+      setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v16i32, Custom);
+
+      if (Subtarget->hasVLX()) {
+        setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
+        setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
+        setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
+        setOperationAction(ISD::CTLZ,             MVT::v4i32, Legal);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i64, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i32, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Expand);
+
+        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i64, Custom);
+        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i32, Custom);
+        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v2i64, Custom);
+        setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i32, Custom);
+      } else {
+        setOperationAction(ISD::CTLZ,             MVT::v4i64, Custom);
+        setOperationAction(ISD::CTLZ,             MVT::v8i32, Custom);
+        setOperationAction(ISD::CTLZ,             MVT::v2i64, Custom);
+        setOperationAction(ISD::CTLZ,             MVT::v4i32, Custom);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i64, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v8i32, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v2i64, Expand);
+        setOperationAction(ISD::CTLZ_ZERO_UNDEF,  MVT::v4i32, Expand);
+      }
+    } // Subtarget->hasCDI()
+
     if (Subtarget->hasDQI()) {
       setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
       setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
@@ -1455,7 +1579,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
         setOperationAction(ISD::OR,  VT, Legal);
         setOperationAction(ISD::XOR,  VT, Legal);
       }
-      if (EltSize >= 32 && VT.getSizeInBits() <= 512) {
+      if ((VT.is128BitVector() || VT.is256BitVector()) && EltSize >= 32) {
         setOperationAction(ISD::MGATHER,  VT, Custom);
         setOperationAction(ISD::MSCATTER, VT, Custom);
       }
@@ -1481,15 +1605,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
         setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
         setOperationAction(ISD::MLOAD,               VT, Legal);
         setOperationAction(ISD::MSTORE,              VT, Legal);
+        setOperationAction(ISD::MGATHER,  VT, Legal);
+        setOperationAction(ISD::MSCATTER, VT, Custom);
       }
     }
-    for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
-      MVT VT = (MVT::SimpleValueType)i;
-
-      // Do not attempt to promote non-512-bit vectors.
-      if (!VT.is512BitVector())
-        continue;
-
+    for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
       setOperationAction(ISD::SELECT, VT, Promote);
       AddPromotedToType (ISD::SELECT, VT, MVT::v8i64);
     }
@@ -1515,22 +1635,35 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i8, Custom);
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i16, Custom);
+    setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i8, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
     setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i8, Custom);
     setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
     setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
     setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
+    setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i1, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i1, Custom);
 
     setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
     setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
@@ -1541,19 +1674,31 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
     setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
 
-    for (int i = MVT::v32i8; i != MVT::v8i64; ++i) {
-      const MVT VT = (MVT::SimpleValueType)i;
+    setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
+    setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
+    if (Subtarget->hasVLX())
+      setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
 
-      const unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+    if (Subtarget->hasCDI()) {
+      setOperationAction(ISD::CTLZ,            MVT::v32i16, Custom);
+      setOperationAction(ISD::CTLZ,            MVT::v64i8,  Custom);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v32i16, Expand);
+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::v64i8,  Expand);
+    }
 
-      // Do not attempt to promote non-512-bit vectors.
-      if (!VT.is512BitVector())
-        continue;
+    for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
+      setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
+      setOperationAction(ISD::VSELECT,             VT, Legal);
+      setOperationAction(ISD::SRL,                 VT, Custom);
+      setOperationAction(ISD::SHL,                 VT, Custom);
+      setOperationAction(ISD::SRA,                 VT, Custom);
 
-      if (EltSize < 32) {
-        setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
-        setOperationAction(ISD::VSELECT,             VT, Legal);
-      }
+      setOperationAction(ISD::AND,    VT, Promote);
+      AddPromotedToType (ISD::AND,    VT, MVT::v8i64);
+      setOperationAction(ISD::OR,     VT, Promote);
+      AddPromotedToType (ISD::OR,     VT, MVT::v8i64);
+      setOperationAction(ISD::XOR,    VT, Promote);
+      AddPromotedToType (ISD::XOR,    VT, MVT::v8i64);
     }
   }
 
@@ -1571,6 +1716,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i1, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i1, Custom);
 
     setOperationAction(ISD::AND,                MVT::v8i32, Legal);
     setOperationAction(ISD::OR,                 MVT::v8i32, Legal);
@@ -1595,8 +1742,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
-  if (!Subtarget->is64Bit())
+  if (!Subtarget->is64Bit()) {
     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
+    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
+  }
 
   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
   // handle type legalization for these operations here.
@@ -1604,9 +1753,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // FIXME: We really should do custom legalization for addition and
   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
   // than generic legalization for 64-bit multiplication-with-overflow, though.
-  for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+    if (VT == MVT::i64 && !Subtarget->is64Bit())
+      continue;
     // Add/Sub/Mul with overflow operations are custom lowered.
-    MVT VT = IntVTs[i];
     setOperationAction(ISD::SADDO, VT, Custom);
     setOperationAction(ISD::UADDO, VT, Custom);
     setOperationAction(ISD::SSUBO, VT, Custom);
@@ -1615,7 +1765,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::UMULO, VT, Custom);
   }
 
-
   if (!Subtarget->is64Bit()) {
     // These libcalls are not available in 32-bit.
     setLibcallName(RTLIB::SHL_I128, nullptr);
@@ -1658,12 +1807,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::FSUB);
+  setTargetDAGCombine(ISD::FNEG);
   setTargetDAGCombine(ISD::FMA);
+  setTargetDAGCombine(ISD::FMINNUM);
+  setTargetDAGCombine(ISD::FMAXNUM);
   setTargetDAGCombine(ISD::SUB);
   setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::MLOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::MSTORE);
+  setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
@@ -1671,24 +1824,24 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
-  setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::XOR);
+  setTargetDAGCombine(ISD::MSCATTER);
+  setTargetDAGCombine(ISD::MGATHER);
 
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
-  // On Darwin, -Os means optimize for size without hurting performance,
-  // do not reduce the limit.
   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
-  MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+  MaxStoresPerMemsetOptSize = 8;
   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
-  MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemcpyOptSize = 4;
   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
-  MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  MaxStoresPerMemmoveOptSize = 4;
   setPrefLoopAlignment(4); // 2^4 bytes.
 
-  // Predictable cmov don't hurt on atom because it's in-order.
+  // A predictable cmov does not hurt on an in-order CPU.
+  // FIXME: Use a CPU attribute to trigger this, not a CPU model.
   PredictableSelectIsExpensive = !Subtarget->isAtom();
   EnableExtLdPromotion = true;
   setPrefFunctionAlignment(4); // 2^4 bytes.
@@ -1716,40 +1869,43 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
   if (!VT.isVector())
     return Subtarget->hasAVX512() ? MVT::i1: MVT::i8;
 
-  const unsigned NumElts = VT.getVectorNumElements();
-  const EVT EltVT = VT.getVectorElementType();
-  if (VT.is512BitVector()) {
-    if (Subtarget->hasAVX512())
-      if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
-          EltVT == MVT::f32 || EltVT == MVT::f64)
-        switch(NumElts) {
-        case  8: return MVT::v8i1;
-        case 16: return MVT::v16i1;
-      }
-    if (Subtarget->hasBWI())
-      if (EltVT == MVT::i8 || EltVT == MVT::i16)
-        switch(NumElts) {
-        case 32: return MVT::v32i1;
-        case 64: return MVT::v64i1;
-      }
-  }
+  if (VT.isSimple()) {
+    MVT VVT = VT.getSimpleVT();
+    const unsigned NumElts = VVT.getVectorNumElements();
+    const MVT EltVT = VVT.getVectorElementType();
+    if (VVT.is512BitVector()) {
+      if (Subtarget->hasAVX512())
+        if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+            EltVT == MVT::f32 || EltVT == MVT::f64)
+          switch(NumElts) {
+          case  8: return MVT::v8i1;
+          case 16: return MVT::v16i1;
+        }
+      if (Subtarget->hasBWI())
+        if (EltVT == MVT::i8 || EltVT == MVT::i16)
+          switch(NumElts) {
+          case 32: return MVT::v32i1;
+          case 64: return MVT::v64i1;
+        }
+    }
 
-  if (VT.is256BitVector() || VT.is128BitVector()) {
-    if (Subtarget->hasVLX())
-      if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
-          EltVT == MVT::f32 || EltVT == MVT::f64)
-        switch(NumElts) {
-        case 2: return MVT::v2i1;
-        case 4: return MVT::v4i1;
-        case 8: return MVT::v8i1;
-      }
-    if (Subtarget->hasBWI() && Subtarget->hasVLX())
-      if (EltVT == MVT::i8 || EltVT == MVT::i16)
-        switch(NumElts) {
-        case  8: return MVT::v8i1;
-        case 16: return MVT::v16i1;
-        case 32: return MVT::v32i1;
-      }
+    if (VVT.is256BitVector() || VVT.is128BitVector()) {
+      if (Subtarget->hasVLX())
+        if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
+            EltVT == MVT::f32 || EltVT == MVT::f64)
+          switch(NumElts) {
+          case 2: return MVT::v2i1;
+          case 4: return MVT::v4i1;
+          case 8: return MVT::v8i1;
+        }
+      if (Subtarget->hasBWI() && Subtarget->hasVLX())
+        if (EltVT == MVT::i8 || EltVT == MVT::i16)
+          switch(NumElts) {
+          case  8: return MVT::v8i1;
+          case 16: return MVT::v16i1;
+          case 32: return MVT::v32i1;
+        }
+    }
   }
 
   return VT.changeVectorElementTypeToInteger();
@@ -1769,9 +1925,9 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
     if (EltAlign > MaxAlign)
       MaxAlign = EltAlign;
   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    for (auto *EltTy : STy->elements()) {
       unsigned EltAlign = 0;
-      getMaxByValAlign(STy->getElementType(i), EltAlign);
+      getMaxByValAlign(EltTy, EltAlign);
       if (EltAlign > MaxAlign)
         MaxAlign = EltAlign;
       if (MaxAlign == 16)
@@ -1821,10 +1977,11 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
   if ((!IsMemset || ZeroMemset) &&
       !F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     if (Size >= 16 &&
-        (Subtarget->isUnalignedMemAccessFast() ||
+        (!Subtarget->isUnalignedMem16Slow() ||
          ((DstAlign == 0 || DstAlign >= 16) &&
           (SrcAlign == 0 || SrcAlign >= 16)))) {
       if (Size >= 32) {
+        // FIXME: Check if unaligned 32-byte accesses are slow.
         if (Subtarget->hasInt256())
           return MVT::v8i32;
         if (Subtarget->hasFp256())
@@ -1842,6 +1999,9 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size,
       return MVT::f64;
     }
   }
+  // This is a compromise. If we reach here, unaligned accesses may be slow on
+  // this target. However, creating smaller, aligned accesses could be even
+  // slower and would certainly be a lot more code.
   if (Subtarget->is64Bit() && Size >= 8)
     return MVT::i64;
   return MVT::i32;
@@ -1860,8 +2020,22 @@ X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
                                                   unsigned,
                                                   unsigned,
                                                   bool *Fast) const {
-  if (Fast)
-    *Fast = Subtarget->isUnalignedMemAccessFast();
+  if (Fast) {
+    switch (VT.getSizeInBits()) {
+    default:
+      // 8-byte and under are always assumed to be fast.
+      *Fast = true;
+      break;
+    case 128:
+      *Fast = !Subtarget->isUnalignedMem16Slow();
+      break;
+    case 256:
+      *Fast = !Subtarget->isUnalignedMem32Slow();
+      break;
+    // TODO: What about AVX-512 (512-bit) accesses?
+    }
+  }
+  // Misaligned accesses of any size are always allowed.
   return true;
 }
 
@@ -1964,6 +2138,32 @@ bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
   return true;
 }
 
+Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+  if (!Subtarget->isTargetAndroid())
+    return TargetLowering::getSafeStackPointerLocation(IRB);
+
+  // Android provides a fixed TLS slot for the SafeStack pointer. See the
+  // definition of TLS_SLOT_SAFESTACK in
+  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
+  unsigned AddressSpace, Offset;
+  if (Subtarget->is64Bit()) {
+    // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
+    Offset = 0x48;
+    if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+      AddressSpace = 256;
+    else
+      AddressSpace = 257;
+  } else {
+    // %gs:0x24 on i386
+    Offset = 0x24;
+    AddressSpace = 256;
+  }
+
+  return ConstantExpr::getIntToPtr(
+      ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
+      Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
+}
+
 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
                                             unsigned DestAS) const {
   assert(SrcAS != DestAS && "Expected different address spaces!");
@@ -1977,11 +2177,9 @@ bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
 
 #include "X86GenCallingConv.inc"
 
-bool
-X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
-                                  MachineFunction &MF, bool isVarArg,
-                        const SmallVectorImpl<ISD::OutputArg> &Outs,
-                        LLVMContext &Context) const {
+bool X86TargetLowering::CanLowerReturn(
+    CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   return CCInfo.CheckReturn(Outs, RetCC_X86);
@@ -2001,6 +2199,9 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
 
+  if (CallConv == CallingConv::X86_INTR && !Outs.empty())
+    report_fatal_error("X86 interrupts may not return any value");
+
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
@@ -2025,7 +2226,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
     else if (VA.getLocInfo() == CCValAssign::ZExt)
       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
     else if (VA.getLocInfo() == CCValAssign::AExt) {
-      if (ValVT.isVector() && ValVT.getScalarType() == MVT::i1)
+      if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
         ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
       else
         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
@@ -2114,7 +2315,10 @@ X86TargetLowering::LowerReturn(SDValue Chain,
   if (Flag.getNode())
     RetOps.push_back(Flag);
 
-  return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
+  X86ISD::NodeType opcode = X86ISD::RET_FLAG;
+  if (CallConv == CallingConv::X86_INTR)
+    opcode = X86ISD::IRET;
+  return DAG.getNode(opcode, dl, MVT::Other, RetOps);
 }
 
 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2193,7 +2397,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     EVT CopyVT = VA.getLocVT();
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
-    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
@@ -2244,28 +2448,28 @@ enum StructReturnType {
   StackStructReturn
 };
 static StructReturnType
-callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
   if (Outs.empty())
     return NotStructReturn;
 
   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
   if (!Flags.isSRet())
     return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
     return RegStructReturn;
   return StackStructReturn;
 }
 
 /// Determines whether a function uses struct return semantics.
 static StructReturnType
-argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
   if (Ins.empty())
     return NotStructReturn;
 
   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
   if (!Flags.isSRet())
     return NotStructReturn;
-  if (Flags.isInReg())
+  if (Flags.isInReg() || IsMCU)
     return RegStructReturn;
   return StackStructReturn;
 }
@@ -2285,17 +2489,34 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
-/// Return true if the calling convention is one that
-/// supports tail call optimization.
-static bool IsTailCallConvention(CallingConv::ID CC) {
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
-          CC == CallingConv::HiPE);
+          CC == CallingConv::HiPE || CC == CallingConv::HHVM);
 }
 
-/// \brief Return true if the calling convention is a C calling convention.
-static bool IsCCallConvention(CallingConv::ID CC) {
-  return (CC == CallingConv::C || CC == CallingConv::X86_64_Win64 ||
-          CC == CallingConv::X86_64_SysV);
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+  switch (CC) {
+  // C calling conventions:
+  case CallingConv::C:
+  case CallingConv::X86_64_Win64:
+  case CallingConv::X86_64_SysV:
+  // Callee pop conventions:
+  case CallingConv::X86_ThisCall:
+  case CallingConv::X86_StdCall:
+  case CallingConv::X86_VectorCall:
+  case CallingConv::X86_FastCall:
+    return true;
+  default:
+    return canGuaranteeTCO(CC);
+  }
+}
+
+/// Return true if the function is being made into a tailcall target by
+/// changing its ABI.
+static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
+  return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
 }
 
 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@@ -2306,19 +2527,12 @@ bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
 
   CallSite CS(CI);
   CallingConv::ID CalleeCC = CS.getCallingConv();
-  if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+  if (!mayTailCallThisCC(CalleeCC))
     return false;
 
   return true;
 }
 
-/// Return true if the function is being made into
-/// a tailcall target by changing its ABI.
-static bool FuncIsMadeTailCallSafe(CallingConv::ID CC,
-                                   bool GuaranteedTailCallOpt) {
-  return GuaranteedTailCallOpt && IsTailCallConvention(CC);
-}
-
 SDValue
 X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     CallingConv::ID CallConv,
@@ -2329,7 +2543,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
                                     unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
-  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(
+  bool AlwaysUseMutable = shouldGuaranteeTCO(
       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
   EVT ValVT;
@@ -2344,6 +2558,19 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
   else
     ValVT = VA.getValVT();
 
+  // Calculate SP offset of interrupt parameter, re-arrange the slot normally
+  // taken by a return address.
+  int Offset = 0;
+  if (CallConv == CallingConv::X86_INTR) {
+    const X86Subtarget& Subtarget =
+        static_cast<const X86Subtarget&>(DAG.getSubtarget());
+    // X86 interrupts may take one or two arguments.
+    // On the stack there will be no return address as in regular call.
+    // Offset of last argument need to be set to -4/-8 bytes.
+    // Where offset of the first argument out of two, should be set to 0 bytes.
+    Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
+  }
+
   // FIXME: For now, all byval parameter objects are marked mutable. This can be
   // changed with more analysis.
   // In case of tail call optimization mark all arguments mutable. Since they
@@ -2352,14 +2579,24 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
     unsigned Bytes = Flags.getByValSize();
     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+    // Adjust SP offset of interrupt parameter.
+    if (CallConv == CallingConv::X86_INTR) {
+      MFI->setObjectOffset(FI, Offset);
+    }
     return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   } else {
     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
                                     VA.getLocMemOffset(), isImmutable);
+    // Adjust SP offset of interrupt parameter.
+    if (CallConv == CallingConv::X86_INTR) {
+      MFI->setObjectOffset(FI, Offset);
+    }
+
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
-    SDValue Val =  DAG.getLoad(ValVT, dl, Chain, FIN,
-                               MachinePointerInfo::getFixedStack(FI),
-                               false, false, false, 0);
+    SDValue Val = DAG.getLoad(
+        ValVT, dl, Chain, FIN,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+        false, false, 0);
     return ExtendedInMem ?
       DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
   }
@@ -2413,15 +2650,10 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
 }
 
-SDValue
-X86TargetLowering::LowerFormalArguments(SDValue Chain,
-                                        CallingConv::ID CallConv,
-                                        bool isVarArg,
-                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-                                        SDLoc dl,
-                                        SelectionDAG &DAG,
-                                        SmallVectorImpl<SDValue> &InVals)
-                                          const {
+SDValue X86TargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
@@ -2436,9 +2668,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   bool Is64Bit = Subtarget->is64Bit();
   bool IsWin64 = Subtarget->isCallingConvWin64(CallConv);
 
-  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+  assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
          "Var args not supported with calling convention fastcc, ghc or hipe");
 
+  if (CallConv == CallingConv::X86_INTR) {
+    bool isLegal = Ins.size() == 1 ||
+                   (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
+                                        (!Is64Bit && Ins[1].VT == MVT::i32)));
+    if (!isLegal)
+      report_fatal_error("X86 interrupts may take one or two arguments");
+  }
+
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
@@ -2471,6 +2711,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         RC = &X86::FR32RegClass;
       else if (RegVT == MVT::f64)
         RC = &X86::FR64RegClass;
+      else if (RegVT == MVT::f128)
+        RC = &X86::FR128RegClass;
       else if (RegVT.is512BitVector())
         RC = &X86::VR512RegClass;
       else if (RegVT.is256BitVector())
@@ -2547,8 +2789,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   unsigned StackSize = CCInfo.getNextStackOffset();
   // Align stack specially for tail calls.
-  if (FuncIsMadeTailCallSafe(CallConv,
-                             MF.getTarget().Options.GuaranteedTailCallOpt))
+  if (shouldGuaranteeTCO(CallConv,
+                         MF.getTarget().Options.GuaranteedTailCallOpt))
     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
 
   // If the function takes variable number of arguments, make a frame index for
@@ -2561,13 +2803,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
         MFI->CreateFixedObject(1, StackSize, true));
   }
 
-  MachineModuleInfo &MMI = MF.getMMI();
-  const Function *WinEHParent = nullptr;
-  if (MMI.hasWinEHFuncInfo(Fn))
-    WinEHParent = MMI.getWinEHParent(Fn);
-  bool IsWinEHOutlined = WinEHParent && WinEHParent != Fn;
-  bool IsWinEHParent = WinEHParent && WinEHParent == Fn;
-
   // Figure out if XMM registers are in use.
   assert(!(Subtarget->useSoftFloat() &&
            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
@@ -2631,10 +2866,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
                                 RSFIN, DAG.getIntPtrConstant(Offset, dl));
       SDValue Store =
-        DAG.getStore(Val.getValue(1), dl, Val, FIN,
-                     MachinePointerInfo::getFixedStack(
-                       FuncInfo->getRegSaveFrameIndex(), Offset),
-                     false, false, 0);
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                       MachinePointerInfo::getFixedStack(
+                           DAG.getMachineFunction(),
+                           FuncInfo->getRegSaveFrameIndex(), Offset),
+                       false, false, 0);
       MemOps.push_back(Store);
       Offset += 8;
     }
@@ -2656,27 +2892,6 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
     if (!MemOps.empty())
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
-  } else if (IsWin64 && IsWinEHOutlined) {
-    // Get to the caller-allocated home save location.  Add 8 to account
-    // for the return address.
-    int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
-    FuncInfo->setRegSaveFrameIndex(MFI->CreateFixedObject(
-        /*Size=*/1, /*SPOffset=*/HomeOffset + 8, /*Immutable=*/false));
-
-    MMI.getWinEHFuncInfo(Fn)
-        .CatchHandlerParentFrameObjIdx[const_cast<Function *>(Fn)] =
-        FuncInfo->getRegSaveFrameIndex();
-
-    // Store the second integer parameter (rdx) into rsp+16 relative to the
-    // stack pointer at the entry of the function.
-    SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
-                                      getPointerTy(DAG.getDataLayout()));
-    unsigned GPR = MF.addLiveIn(X86::RDX, &X86::GR64RegClass);
-    SDValue Val = DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64);
-    Chain = DAG.getStore(
-        Val.getValue(1), dl, Val, RSFIN,
-        MachinePointerInfo::getFixedStack(FuncInfo->getRegSaveFrameIndex()),
-        /*isVolatile=*/true, /*isNonTemporal=*/false, /*Alignment=*/0);
   }
 
   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
@@ -2723,12 +2938,15 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+  } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
+    // X86 interrupts must pop the error code if present
+    FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
   } else {
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
     // If this is an sret function, the return should pop the hidden pointer.
-    if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+    if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
         !Subtarget->getTargetTriple().isOSMSVCRT() &&
-        argsAreStructReturn(Ins) == StackStructReturn)
+        argsAreStructReturn(Ins, Subtarget->isTargetMCU()) == StackStructReturn)
       FuncInfo->setBytesToPopOnReturn(4);
   }
 
@@ -2743,21 +2961,20 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
 
   FuncInfo->setArgumentStackSize(StackSize);
 
-  if (IsWinEHParent) {
-    if (Is64Bit) {
-      int UnwindHelpFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
-      SDValue StackSlot = DAG.getFrameIndex(UnwindHelpFI, MVT::i64);
-      MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = UnwindHelpFI;
-      SDValue Neg2 = DAG.getConstant(-2, dl, MVT::i64);
-      Chain = DAG.getStore(Chain, dl, Neg2, StackSlot,
-                           MachinePointerInfo::getFixedStack(UnwindHelpFI),
-                           /*isVolatile=*/true,
-                           /*isNonTemporal=*/false, /*Alignment=*/0);
-    } else {
-      // Functions using Win32 EH are considered to have opaque SP adjustments
-      // to force local variables to be addressed from the frame or base
-      // pointers.
-      MFI->setHasOpaqueSPAdjustment(true);
+  if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
+    EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
+    if (Personality == EHPersonality::CoreCLR) {
+      assert(Is64Bit);
+      // TODO: Add a mechanism to frame lowering that will allow us to indicate
+      // that we'd prefer this slot be allocated towards the bottom of the frame
+      // (i.e. near the stack pointer after allocating the frame).  Every
+      // funclet needs a copy of this slot in its (mostly empty) frame, and the
+      // offset from the bottom of this and each funclet's frame must be the
+      // same, so the size of funclets' (mostly empty) frames is dictated by
+      // how far this slot is from the bottom (since they allocate just enough
+      // space to accomodate holding this slot at the correct offset).
+      int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+      EHInfo->PSPSymFrameIdx = PSPSymFI;
     }
   }
 
@@ -2777,9 +2994,10 @@ X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
   if (Flags.isByVal())
     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
 
-  return DAG.getStore(Chain, dl, Arg, PtrOff,
-                      MachinePointerInfo::getStack(LocMemOffset),
-                      false, false, 0);
+  return DAG.getStore(
+      Chain, dl, Arg, PtrOff,
+      MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
+      false, false, 0);
 }
 
 /// Emit a load of return address if tail call
@@ -2813,11 +3031,24 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
                                          false);
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
-                       MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+                       MachinePointerInfo::getFixedStack(
+                           DAG.getMachineFunction(), NewReturnAddrFI),
                        false, false, 0);
   return Chain;
 }
 
+/// Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
+                       SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> Mask;
+  Mask.push_back(NumElems);
+  for (unsigned i = 1; i != NumElems; ++i)
+    Mask.push_back(i);
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
 SDValue
 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                              SmallVectorImpl<SDValue> &InVals) const {
@@ -2835,11 +3066,14 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   MachineFunction &MF = DAG.getMachineFunction();
   bool Is64Bit        = Subtarget->is64Bit();
   bool IsWin64        = Subtarget->isCallingConvWin64(CallConv);
-  StructReturnType SR = callIsStructReturn(Outs);
+  StructReturnType SR = callIsStructReturn(Outs, Subtarget->isTargetMCU());
   bool IsSibcall      = false;
   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
 
+  if (CallConv == CallingConv::X86_INTR)
+    report_fatal_error("X86 interrupts may not be called directly");
+
   if (Attr.getValueAsString() == "true")
     isTailCall = false;
 
@@ -2878,7 +3112,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       ++NumTailCalls;
   }
 
-  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+  assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
          "Var args not supported with calling convention fastcc, ghc or hipe");
 
   // Analyze operands of the call, assigning locations to each operand.
@@ -2892,13 +3126,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
 
   // Get a count of how many bytes are to be pushed on the stack.
-  unsigned NumBytes = CCInfo.getNextStackOffset();
+  unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
   if (IsSibcall)
     // This is a sibcall. The memory operands are available in caller's
     // own caller's stack.
     NumBytes = 0;
   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
-           IsTailCallConvention(CallConv))
+           canGuaranteeTCO(CallConv))
     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
 
   int FPDiff = 0;
@@ -2970,7 +3204,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       break;
     case CCValAssign::AExt:
       if (Arg.getValueType().isVector() &&
-          Arg.getValueType().getScalarType() == MVT::i1)
+          Arg.getValueType().getVectorElementType() == MVT::i1)
         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
       else if (RegVT.is128BitVector()) {
         // Special case: passing MMX values in XMM registers.
@@ -2987,9 +3221,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       // Store the argument.
       SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
-                           MachinePointerInfo::getFixedStack(FI),
-                           false, false, 0);
+      Chain = DAG.getStore(
+          Chain, dl, Arg, SpillSlot,
+          MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+          false, false, 0);
       Arg = SpillSlot;
       break;
     }
@@ -3125,10 +3360,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                                          Flags, DAG, dl));
       } else {
         // Store relative to framepointer.
-        MemOpChains2.push_back(
-          DAG.getStore(ArgChain, dl, Arg, FIN,
-                       MachinePointerInfo::getFixedStack(FI),
-                       false, false, 0));
+        MemOpChains2.push_back(DAG.getStore(
+            ArgChain, dl, Arg, FIN,
+            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
+            false, false, 0));
       }
     }
 
@@ -3207,7 +3442,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       if (ExtraLoad)
         Callee = DAG.getLoad(
             getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
-            MachinePointerInfo::getGOT(), false, false, false, 0);
+            MachinePointerInfo::getGOT(DAG.getMachineFunction()), false, false,
+            false, 0);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
@@ -3261,9 +3497,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
   assert(Mask && "Missing call preserved mask for calling convention");
 
-  // If this is an invoke in a 32-bit function using an MSVC personality, assume
-  // the function clobbers all registers. If an exception is thrown, the runtime
-  // will not restore CSRs.
+  // If this is an invoke in a 32-bit function using a funclet-based
+  // personality, assume the function clobbers all registers. If an exception
+  // is thrown, the runtime will not restore CSRs.
   // FIXME: Model this more precisely so that we can register allocate across
   // the normal edge and spill and fill across the exceptional edge.
   if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
@@ -3272,7 +3508,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         CallerFn->hasPersonalityFn()
             ? classifyEHPersonality(CallerFn->getPersonalityFn())
             : EHPersonality::Unknown;
-    if (isMSVCEHPersonality(Pers))
+    if (isFuncletEHPersonality(Pers))
       Mask = RegInfo->getNoPreservedMask();
   }
 
@@ -3300,7 +3536,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
                        DAG.getTarget().Options.GuaranteedTailCallOpt))
     NumBytesForCalleeToPop = NumBytes;    // Callee pops everything
-  else if (!Is64Bit && !IsTailCallConvention(CallConv) &&
+  else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
            !Subtarget->getTargetTriple().isOSMSVCRT() &&
            SR == StackStructReturn)
     // If this is a call to a struct-return function, the callee
@@ -3358,8 +3594,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 //    EDI
 //    local1 ..
 
-/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
-/// for a 16 byte align requirement.
+/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
+/// requirement.
 unsigned
 X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
                                                SelectionDAG& DAG) const {
@@ -3380,9 +3616,8 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
   return Offset;
 }
 
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
+/// Return true if the given stack call argument is already available in the
+/// same position (relatively) of the caller's incoming argument stack.
 static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
@@ -3435,25 +3670,19 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
 }
 
-/// IsEligibleForTailCallOptimization - Check whether the call is eligible
-/// for tail call optimization. Targets which want to do tail call
-/// optimization should implement this function.
-bool
-X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
-                                                     CallingConv::ID CalleeCC,
-                                                     bool isVarArg,
-                                                     bool isCalleeStructRet,
-                                                     bool isCallerStructRet,
-                                                     Type *RetTy,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<SDValue> &OutVals,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                                     SelectionDAG &DAG) const {
-  if (!IsTailCallConvention(CalleeCC) && !IsCCallConvention(CalleeCC))
+/// Check whether the call is eligible for tail call optimization. Targets
+/// that want to do tail call optimization should implement this function.
+bool X86TargetLowering::IsEligibleForTailCallOptimization(
+    SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+    bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
+    const SmallVectorImpl<ISD::OutputArg> &Outs,
+    const SmallVectorImpl<SDValue> &OutVals,
+    const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+  if (!mayTailCallThisCC(CalleeCC))
     return false;
 
   // If -tailcallopt is specified, make fastcc functions tail-callable.
-  const MachineFunction &MF = DAG.getMachineFunction();
+  MachineFunction &MF = DAG.getMachineFunction();
   const Function *CallerF = MF.getFunction();
 
   // If the function return type is x86_fp80 and the callee return type is not,
@@ -3474,7 +3703,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     return false;
 
   if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
-    if (IsTailCallConvention(CalleeCC) && CCMatch)
+    if (canGuaranteeTCO(CalleeCC) && CCMatch)
       return true;
     return false;
   }
@@ -3493,19 +3722,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
-  // An stdcall/thiscall caller is expected to clean up its arguments; the
-  // callee isn't going to do that.
-  // FIXME: this is more restrictive than needed. We could produce a tailcall
-  // when the stack adjustment matches. For example, with a thiscall that takes
-  // only one argument.
-  if (!CCMatch && (CallerCC == CallingConv::X86_StdCall ||
-                   CallerCC == CallingConv::X86_ThisCall))
-    return false;
-
   // Do not sibcall optimize vararg calls unless all arguments are passed via
   // registers.
   if (isVarArg && !Outs.empty()) {
-
     // Optimizing for varargs on Win64 is unlikely to be safe without
     // additional testing.
     if (IsCalleeWin64 || IsCallerWin64)
@@ -3573,6 +3792,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
+  unsigned StackArgsSize = 0;
+
   // If the callee takes no arguments then go on to check the results of the
   // call.
   if (!Outs.empty()) {
@@ -3587,11 +3808,9 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
       CCInfo.AllocateStack(32, 8);
 
     CCInfo.AnalyzeCallOperands(Outs, CC_X86);
-    if (CCInfo.getNextStackOffset()) {
-      MachineFunction &MF = DAG.getMachineFunction();
-      if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
-        return false;
+    StackArgsSize = CCInfo.getNextStackOffset();
 
+    if (CCInfo.getNextStackOffset()) {
       // Check if the arguments are already laid out in the right way as
       // the caller's fixed stack objects.
       MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -3642,6 +3861,21 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
+  bool CalleeWillPop =
+      X86::isCalleePop(CalleeCC, Subtarget->is64Bit(), isVarArg,
+                       MF.getTarget().Options.GuaranteedTailCallOpt);
+
+  if (unsigned BytesToPop =
+          MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
+    // If we have bytes to pop, the callee must pop them.
+    bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
+    if (!CalleePopMatches)
+      return false;
+  } else if (CalleeWillPop && StackArgsSize > 0) {
+    // If we don't have bytes to pop, make sure the callee doesn't pop any.
+    return false;
+  }
+
   return true;
 }
 
@@ -3688,11 +3922,13 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::VPERMILPI:
   case X86ISD::VPERM2X128:
   case X86ISD::VPERMI:
+  case X86ISD::VPERMV:
+  case X86ISD::VPERMV3:
     return true;
   }
 }
 
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
                                     SDValue V1, unsigned TargetMask,
                                     SelectionDAG &DAG) {
   switch(Opc) {
@@ -3707,7 +3943,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
   }
 }
 
-static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT,
+static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, MVT VT,
                                     SDValue V1, SDValue V2, SelectionDAG &DAG) {
   switch(Opc) {
   default: llvm_unreachable("Unknown x86 shuffle node");
@@ -3772,23 +4008,23 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
   return false;
 }
 
-/// isCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
+/// Determines whether the callee is required to pop its own arguments.
+/// Callee pop is necessary to support tail calls.
 bool X86::isCalleePop(CallingConv::ID CallingConv,
-                      bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
+  // If GuaranteeTCO is true, we force some calls to be callee pop so that we
+  // can guarantee TCO.
+  if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
+    return true;
+
   switch (CallingConv) {
   default:
     return false;
   case CallingConv::X86_StdCall:
   case CallingConv::X86_FastCall:
   case CallingConv::X86_ThisCall:
+  case CallingConv::X86_VectorCall:
     return !is64Bit;
-  case CallingConv::Fast:
-  case CallingConv::GHC:
-  case CallingConv::HiPE:
-    if (IsVarArg)
-      return false;
-    return TailCallOpt;
   }
 }
 
@@ -3807,11 +4043,26 @@ static bool isX86CCUnsigned(unsigned X86CC) {
   case X86::COND_BE:    return true;
   case X86::COND_AE:    return true;
   }
-  llvm_unreachable("covered switch fell through?!");
 }
 
-/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
-/// specific condition code, returning the condition code and the LHS/RHS of the
+static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
+  switch (SetCCOpcode) {
+  default: llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:  return X86::COND_E;
+  case ISD::SETGT:  return X86::COND_G;
+  case ISD::SETGE:  return X86::COND_GE;
+  case ISD::SETLT:  return X86::COND_L;
+  case ISD::SETLE:  return X86::COND_LE;
+  case ISD::SETNE:  return X86::COND_NE;
+  case ISD::SETULT: return X86::COND_B;
+  case ISD::SETUGT: return X86::COND_A;
+  case ISD::SETULE: return X86::COND_BE;
+  case ISD::SETUGE: return X86::COND_AE;
+  }
+}
+
+/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
+/// condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
 static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
                                SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
@@ -3833,19 +4084,7 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
       }
     }
 
-    switch (SetCCOpcode) {
-    default: llvm_unreachable("Invalid integer condition!");
-    case ISD::SETEQ:  return X86::COND_E;
-    case ISD::SETGT:  return X86::COND_G;
-    case ISD::SETGE:  return X86::COND_GE;
-    case ISD::SETLT:  return X86::COND_L;
-    case ISD::SETLE:  return X86::COND_LE;
-    case ISD::SETNE:  return X86::COND_NE;
-    case ISD::SETULT: return X86::COND_B;
-    case ISD::SETUGT: return X86::COND_A;
-    case ISD::SETULE: return X86::COND_BE;
-    case ISD::SETUGE: return X86::COND_AE;
-    }
+    return TranslateIntegerX86CC(SetCCOpcode);
   }
 
   // First determine if it is required or is profitable to flip the operands.
@@ -3898,8 +4137,8 @@ static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, SDLoc DL, bool isFP,
   }
 }
 
-/// hasFPCMov - is there a floating point cmov for the specific X86 condition
-/// code. Current x86 isa includes the following FP cmov instructions:
+/// Is there a floating point cmov for the specific X86 condition code?
+/// Current x86 isa includes the following FP cmov instructions:
 /// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
 static bool hasFPCMov(unsigned X86CC) {
   switch (X86CC) {
@@ -3917,7 +4156,7 @@ static bool hasFPCMov(unsigned X86CC) {
   }
 }
 
-/// isFPImmLegal - Returns true if the target can instruction select the
+/// Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
@@ -3970,7 +4209,7 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget->hasLZCNT();
 }
 
-/// isUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size is undef.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
   for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
@@ -3979,19 +4218,18 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
   return true;
 }
 
-/// isUndefOrInRange - Return true if Val is undef or if its value falls within
-/// the specified range (L, H].
+/// Return true if Val is undef or if its value falls within the
+/// specified range (L, H].
 static bool isUndefOrInRange(int Val, int Low, int Hi) {
   return (Val < 0) || (Val >= Low && Val < Hi);
 }
 
-/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
-/// specified value.
+/// Val is either less than zero (undef) or equal to the specified value.
 static bool isUndefOrEqual(int Val, int CmpVal) {
   return (Val < 0 || Val == CmpVal);
 }
 
-/// isSequentialOrUndefInRange - Return true if every element in Mask, beginning
+/// Return true if every element in Mask, beginning
 /// from position Pos and ending in Pos+Size, falls within the specified
 /// sequential range (Low, Low+Size]. or is undef.
 static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
@@ -4002,9 +4240,8 @@ static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
   return true;
 }
 
-/// isVEXTRACTIndex - Return true if the specified
-/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
-/// suitable for instruction that extract 128 or 256 bit vectors
+/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
+/// extract that is suitable for instruction that extract 128 or 256 bit vectors
 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
   assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width");
   if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
@@ -4021,7 +4258,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
   return Result;
 }
 
-/// isVINSERTIndex - Return true if the specified INSERT_SUBVECTOR
+/// Return true if the specified INSERT_SUBVECTOR
 /// operand specifies a subvector insert that is suitable for input to
 /// insertion of 128 or 256-bit subvectors
 static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
@@ -4057,8 +4294,8 @@ bool X86::isVEXTRACT256Index(SDNode *N) {
 
 static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
-  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
-    llvm_unreachable("Illegal extract subvector for VEXTRACT");
+  assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
+         "Illegal extract subvector for VEXTRACT");
 
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
@@ -4072,8 +4309,8 @@ static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
 
 static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
   assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width");
-  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
-    llvm_unreachable("Illegal insert subvector for VINSERT");
+  assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
+         "Illegal insert subvector for VINSERT");
 
   uint64_t Index =
     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
@@ -4085,53 +4322,71 @@ static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
   return Index / NumElemsPerChunk;
 }
 
-/// getExtractVEXTRACT128Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
 unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
   return getExtractVEXTRACTImmediate(N, 128);
 }
 
-/// getExtractVEXTRACT256Immediate - Return the appropriate immediate
-/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF64x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to extract the specified
+/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
 unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
   return getExtractVEXTRACTImmediate(N, 256);
 }
 
-/// getInsertVINSERT128Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
-/// and VINSERTI128 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
 unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
   return getInsertVINSERTImmediate(N, 128);
 }
 
-/// getInsertVINSERT256Immediate - Return the appropriate immediate
-/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF46x4
-/// and VINSERTI64x4 instructions.
+/// Return the appropriate immediate to insert at the specified
+/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
 unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
   return getInsertVINSERTImmediate(N, 256);
 }
 
-/// isZero - Returns true if Elt is a constant integer zero
-static bool isZero(SDValue V) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
-  return C && C->isNullValue();
-}
-
-/// isZeroNode - Returns true if Elt is a constant zero or a floating point
-/// constant +0.0.
+/// Returns true if Elt is a constant zero or a floating point constant +0.0.
 bool X86::isZeroNode(SDValue Elt) {
-  if (isZero(Elt))
-    return true;
-  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
-    return CFP->getValueAPF().isPosZero();
-  return false;
+  return isNullConstant(Elt) || isNullFPConstant(Elt);
 }
 
-/// getZeroVector - Returns a vector of specified type with all zero elements.
-///
-static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
+// Build a vector of constants
+// Use an UNDEF node if MaskElt == -1.
+// Spilt 64-bit constants in the 32-bit mode.
+static SDValue getConstVector(ArrayRef<int> Values, MVT VT,
+                              SelectionDAG &DAG,
+                              SDLoc dl, bool IsMask = false) {
+
+  SmallVector<SDValue, 32>  Ops;
+  bool Split = false;
+
+  MVT ConstVecVT = VT;
+  unsigned NumElts = VT.getVectorNumElements();
+  bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
+  if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
+    ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
+    Split = true;
+  }
+
+  MVT EltVT = ConstVecVT.getVectorElementType();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    bool IsUndef = Values[i] < 0 && IsMask;
+    SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
+      DAG.getConstant(Values[i], dl, EltVT);
+    Ops.push_back(OpNode);
+    if (Split)
+      Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
+                    DAG.getConstant(0, dl, EltVT));
+  }
+  SDValue ConstsNode = DAG.getNode(ISD::BUILD_VECTOR, dl, ConstVecVT, Ops);
+  if (Split)
+    ConstsNode = DAG.getBitcast(VT, ConstsNode);
+  return ConstsNode;
+}
+
+/// Returns a vector of specified type with all zero elements.
+static SDValue getZeroVector(MVT VT, const X86Subtarget *Subtarget,
                              SelectionDAG &DAG, SDLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
@@ -4163,7 +4418,7 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
                         Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
-  } else if (VT.getScalarType() == MVT::i1) {
+  } else if (VT.getVectorElementType() == MVT::i1) {
 
     assert((Subtarget->hasBWI() || VT.getVectorNumElements() <= 16)
             && "Unexpected vector type");
@@ -4195,19 +4450,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
 
   // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
   unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
 
   // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);
 
   // If the input is a buildvector just emit a smaller one.
   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
     return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
-                       makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
-                                    ElemsPerChunk));
+                       makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
 
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
 }
 
@@ -4245,13 +4499,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
 
   // Insert the relevant vectorWidth bits.
   unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
 
   // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);
 
-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
   return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
 }
 
@@ -4279,7 +4533,7 @@ static SDValue Insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
                                  Vec, ZeroIndex);
 
     // The blend instruction, and therefore its mask, depend on the data type.
-    MVT ScalarType = ResultVT.getScalarType().getSimpleVT();
+    MVT ScalarType = ResultVT.getVectorElementType().getSimpleVT();
     if (ScalarType.isFloatingPoint()) {
       // Choose either vblendps (float) or vblendpd (double).
       unsigned ScalarSize = ScalarType.getSizeInBits();
@@ -4316,6 +4570,81 @@ static SDValue Insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
   return InsertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
 }
 
+/// Insert i1-subvector to i1-vector.
+static SDValue Insert1BitVector(SDValue Op, SelectionDAG &DAG) {
+
+  SDLoc dl(Op);
+  SDValue Vec = Op.getOperand(0);
+  SDValue SubVec = Op.getOperand(1);
+  SDValue Idx = Op.getOperand(2);
+
+  if (!isa<ConstantSDNode>(Idx))
+    return SDValue();
+
+  unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0  && Vec.isUndef()) // the operation is legal
+    return Op;
+
+  MVT OpVT = Op.getSimpleValueType();
+  MVT SubVecVT = SubVec.getSimpleValueType();
+  unsigned NumElems = OpVT.getVectorNumElements();
+  unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
+
+  assert(IdxVal + SubVecNumElems <= NumElems &&
+         IdxVal % SubVecVT.getSizeInBits() == 0 &&
+         "Unexpected index value in INSERT_SUBVECTOR");
+
+  // There are 3 possible cases:
+  // 1. Subvector should be inserted in the lower part (IdxVal == 0)
+  // 2. Subvector should be inserted in the upper part
+  //    (IdxVal + SubVecNumElems == NumElems)
+  // 3. Subvector should be inserted in the middle (for example v2i1
+  //    to v16i1, index 2)
+
+  SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
+  SDValue Undef = DAG.getUNDEF(OpVT);
+  SDValue WideSubVec =
+    DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef, SubVec, ZeroIdx);
+  if (Vec.isUndef())
+    return DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+      DAG.getConstant(IdxVal, dl, MVT::i8));
+
+  if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
+    unsigned ShiftLeft = NumElems - SubVecNumElems;
+    unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
+    WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, WideSubVec,
+      DAG.getConstant(ShiftLeft, dl, MVT::i8));
+    return ShiftRight ? DAG.getNode(X86ISD::VSRLI, dl, OpVT, WideSubVec,
+      DAG.getConstant(ShiftRight, dl, MVT::i8)) : WideSubVec;
+  }
+
+  if (IdxVal == 0) {
+    // Zero lower bits of the Vec
+    SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+    Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+    Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+    // Merge them together
+    return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+  }
+
+  // Simple case when we put subvector in the upper part
+  if (IdxVal + SubVecNumElems == NumElems) {
+    // Zero upper bits of the Vec
+    WideSubVec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec,
+                        DAG.getConstant(IdxVal, dl, MVT::i8));
+    SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
+    Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
+    Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
+    return DAG.getNode(ISD::OR, dl, OpVT, Vec, WideSubVec);
+  }
+  // Subvector should be inserted in the middle - use shuffle
+  SmallVector<int, 64> Mask;
+  for (unsigned i = 0; i < NumElems; ++i)
+    Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
+                    i : i + NumElems);
+  return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
+}
+
 /// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
 /// instructions. This is used because creating CONCAT_VECTOR nodes of
 /// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
@@ -4334,18 +4663,22 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
   return Insert256BitVector(V, V2, NumElems/2, DAG, dl);
 }
 
-/// getOnesVector - Returns a vector of specified type with all bits set.
+/// Returns a vector of specified type with all bits set.
 /// Always build ones vectors as <4 x i32> or <8 x i32>. For 256-bit types with
 /// no AVX2 supprt, use two <4 x i32> inserted in a <8 x i32> appropriately.
 /// Then bitcast to their original type, ensuring they get CSE'd.
-static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
-                             SDLoc dl) {
+static SDValue getOnesVector(EVT VT, const X86Subtarget *Subtarget,
+                             SelectionDAG &DAG, SDLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
 
   SDValue Cst = DAG.getConstant(~0U, dl, MVT::i32);
   SDValue Vec;
-  if (VT.is256BitVector()) {
-    if (HasInt256) { // AVX2
+  if (VT.is512BitVector()) {
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
+                      Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
+  } else if (VT.is256BitVector()) {
+    if (Subtarget->hasInt256()) { // AVX2
       SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
       Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
     } else { // AVX
@@ -4360,19 +4693,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
   return DAG.getBitcast(VT, Vec);
 }
 
-/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
-/// operation of specified width.
-static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1,
-                       SDValue V2) {
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> Mask;
-  Mask.push_back(NumElems);
-  for (unsigned i = 1; i != NumElems; ++i)
-    Mask.push_back(i);
-  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
-}
-
-/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+/// Returns a vector_shuffle node for an unpackl operation.
 static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
                           SDValue V2) {
   unsigned NumElems = VT.getVectorNumElements();
@@ -4384,7 +4705,7 @@ static SDValue getUnpackl(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
 }
 
-/// getUnpackh - Returns a vector_shuffle node for an unpackh operation.
+/// Returns a vector_shuffle node for an unpackh operation.
 static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
                           SDValue V2) {
   unsigned NumElems = VT.getVectorNumElements();
@@ -4396,10 +4717,10 @@ static SDValue getUnpackh(SelectionDAG &DAG, SDLoc dl, MVT VT, SDValue V1,
   return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
 }
 
-/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
-/// vector of zero or undef vector.  This produces a shuffle where the low
-/// element of V2 is swizzled into the zero/undef vector, landing at element
-/// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
+/// Return a vector_shuffle of the specified vector of zero or undef vector.
+/// This produces a shuffle where the low element of V2 is swizzled into the
+/// zero/undef vector, landing at element Idx.
+/// This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
 static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
                                            bool IsZero,
                                            const X86Subtarget *Subtarget,
@@ -4415,10 +4736,10 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
   return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, &MaskVec[0]);
 }
 
-/// getTargetShuffleMask - Calculates the shuffle mask corresponding to the
-/// target specific opcode. Returns true if the Mask could be calculated. Sets
-/// IsUnary to true if only uses one source. Note that this will set IsUnary for
-/// shuffles which use a single input multiple times, and in those cases it will
+/// Calculates the shuffle mask corresponding to the target-specific opcode.
+/// Returns true if the Mask could be calculated. Sets IsUnary to true if only
+/// uses one source. Note that this will set IsUnary for shuffles which use a
+/// single input multiple times, and in those cases it will
 /// adjust the mask to only have indices within that single input.
 /// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
 static bool getTargetShuffleMask(SDNode *N, MVT VT,
@@ -4482,7 +4803,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
 
     if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
       // If we have a build-vector, then things are easy.
-      EVT VT = MaskNode.getValueType();
+      MVT VT = MaskNode.getSimpleValueType();
       assert(VT.isVector() &&
              "Can't produce a non-vector with a build_vector!");
       if (!VT.isInteger())
@@ -4572,6 +4893,119 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   case X86ISD::MOVLPS:
     // Not yet implemented
     return false;
+  case X86ISD::VPERMV: {
+    IsUnary = true;
+    SDValue MaskNode = N->getOperand(0);
+    while (MaskNode->getOpcode() == ISD::BITCAST)
+      MaskNode = MaskNode->getOperand(0);
+
+    unsigned MaskLoBits = Log2_64(VT.getVectorNumElements());
+    SmallVector<uint64_t, 32> RawMask;
+    if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+      // If we have a build-vector, then things are easy.
+      assert(MaskNode.getSimpleValueType().isInteger() &&
+             MaskNode.getSimpleValueType().getVectorNumElements() ==
+             VT.getVectorNumElements());
+
+      for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+        SDValue Op = MaskNode->getOperand(i);
+        if (Op->getOpcode() == ISD::UNDEF)
+          RawMask.push_back((uint64_t)SM_SentinelUndef);
+        else if (isa<ConstantSDNode>(Op)) {
+          APInt MaskElement = cast<ConstantSDNode>(Op)->getAPIntValue();
+          RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+        } else
+          return false;
+      }
+      DecodeVPERMVMask(RawMask, Mask);
+      break;
+    }
+    if (MaskNode->getOpcode() == X86ISD::VBROADCAST) {
+      unsigned NumEltsInMask = MaskNode->getNumOperands();
+      MaskNode = MaskNode->getOperand(0);
+      if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode)) {
+        APInt MaskEltValue = CN->getAPIntValue();
+        for (unsigned i = 0; i < NumEltsInMask; ++i)
+          RawMask.push_back(MaskEltValue.getLoBits(MaskLoBits).getZExtValue());
+        DecodeVPERMVMask(RawMask, Mask);
+        break;
+      }
+      // It may be a scalar load
+    }
+
+    auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+    if (!MaskLoad)
+      return false;
+
+    SDValue Ptr = MaskLoad->getBasePtr();
+    if (Ptr->getOpcode() == X86ISD::Wrapper ||
+        Ptr->getOpcode() == X86ISD::WrapperRIP)
+      Ptr = Ptr->getOperand(0);
+
+    auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+    if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+      return false;
+
+    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+      DecodeVPERMVMask(C, VT, Mask);
+      if (Mask.empty())
+        return false;
+      break;
+    }
+    return false;
+  }
+  case X86ISD::VPERMV3: {
+    IsUnary = false;
+    SDValue MaskNode = N->getOperand(1);
+    while (MaskNode->getOpcode() == ISD::BITCAST)
+      MaskNode = MaskNode->getOperand(1);
+
+    if (MaskNode->getOpcode() == ISD::BUILD_VECTOR) {
+      // If we have a build-vector, then things are easy.
+      assert(MaskNode.getSimpleValueType().isInteger() &&
+             MaskNode.getSimpleValueType().getVectorNumElements() ==
+             VT.getVectorNumElements());
+
+      SmallVector<uint64_t, 32> RawMask;
+      unsigned MaskLoBits = Log2_64(VT.getVectorNumElements()*2);
+
+      for (unsigned i = 0; i < MaskNode->getNumOperands(); ++i) {
+        SDValue Op = MaskNode->getOperand(i);
+        if (Op->getOpcode() == ISD::UNDEF)
+          RawMask.push_back((uint64_t)SM_SentinelUndef);
+        else {
+          auto *CN = dyn_cast<ConstantSDNode>(Op.getNode());
+          if (!CN)
+            return false;
+          APInt MaskElement = CN->getAPIntValue();
+          RawMask.push_back(MaskElement.getLoBits(MaskLoBits).getZExtValue());
+        }
+      }
+      DecodeVPERMV3Mask(RawMask, Mask);
+      break;
+    }
+
+    auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
+    if (!MaskLoad)
+      return false;
+
+    SDValue Ptr = MaskLoad->getBasePtr();
+    if (Ptr->getOpcode() == X86ISD::Wrapper ||
+        Ptr->getOpcode() == X86ISD::WrapperRIP)
+      Ptr = Ptr->getOperand(0);
+
+    auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
+    if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
+      return false;
+
+    if (auto *C = dyn_cast<Constant>(MaskCP->getConstVal())) {
+      DecodeVPERMV3Mask(C, VT, Mask);
+      if (Mask.empty())
+        return false;
+      break;
+    }
+    return false;
+  }
   default: llvm_unreachable("unknown target shuffle node");
   }
 
@@ -4586,7 +5020,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
   return true;
 }
 
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// Returns the scalar element that will make up the ith
 /// element of the result of the vector shuffle.
 static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
                                    unsigned Depth) {
@@ -4650,8 +5084,7 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
-///
+/// Custom lower build_vector of v16i8.
 static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
                                        unsigned NumNonZero, unsigned NumZero,
                                        SelectionDAG &DAG,
@@ -4721,8 +5154,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
   return DAG.getBitcast(MVT::v16i8, V);
 }
 
-/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
-///
+/// Custom lower build_vector of v8i16.
 static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
                                      unsigned NumNonZero, unsigned NumZero,
                                      SelectionDAG &DAG,
@@ -4753,7 +5185,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
   return V;
 }
 
-/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
+/// Custom lower build_vector of v4i32 or v4f32.
 static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
                                      const X86Subtarget *Subtarget,
                                      const TargetLowering &TLI) {
@@ -4924,7 +5356,7 @@ LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, SDLoc dl, SelectionDAG &DAG) {
       return SDValue();
     if ((Offset % RequiredAlign) & 3)
       return SDValue();
-    int64_t StartOffset = Offset & ~(RequiredAlign-1);
+    int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
     if (StartOffset) {
       SDLoc DL(Ptr);
       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -5157,8 +5589,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
   // TODO: If multiple splats are generated to load the same constant,
   // it may be detrimental to overall size. There needs to be a way to detect
   // that condition to know if this is truly a size win.
-  const Function *F = DAG.getMachineFunction().getFunction();
-  bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
 
   // Handle broadcasting a single constant scalar from the constant pool
   // into a vector.
@@ -5188,9 +5619,10 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
       SDValue CP =
           DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
       unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
-      Ld = DAG.getLoad(CVT, dl, DAG.getEntryNode(), CP,
-                       MachinePointerInfo::getConstantPool(),
-                       false, false, false, Alignment);
+      Ld = DAG.getLoad(
+          CVT, dl, DAG.getEntryNode(), CP,
+          MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false,
+          false, false, Alignment);
 
       return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
     }
@@ -5329,7 +5761,7 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
   return NV;
 }
 
-static SDValue ConvertI1VectorToInterger(SDValue Op, SelectionDAG &DAG) {
+static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
   assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
          Op.getScalarValueSizeInBits() == 1 &&
          "Can not convert non-constant vector");
@@ -5366,7 +5798,7 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
-    SDValue Imm = ConvertI1VectorToInterger(Op, DAG);
+    SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
     if (Imm.getValueSizeInBits() == VT.getSizeInBits())
       return DAG.getBitcast(VT, Imm);
     SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
@@ -5600,7 +6032,7 @@ static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
 /// node.
 static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
                              const X86Subtarget *Subtarget, SelectionDAG &DAG) {
-  EVT VT = BV->getValueType(0);
+  MVT VT = BV->getSimpleValueType(0);
   if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
       (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
     return SDValue();
@@ -5662,12 +6094,12 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
     // Update InVec0 and InVec1.
     if (InVec0.getOpcode() == ISD::UNDEF) {
       InVec0 = Op0.getOperand(0);
-      if (InVec0.getValueType() != VT)
+      if (InVec0.getSimpleValueType() != VT)
         return SDValue();
     }
     if (InVec1.getOpcode() == ISD::UNDEF) {
       InVec1 = Op1.getOperand(0);
-      if (InVec1.getValueType() != VT)
+      if (InVec1.getSimpleValueType() != VT)
         return SDValue();
     }
 
@@ -5703,7 +6135,7 @@ static SDValue LowerToAddSub(const BuildVectorSDNode *BV,
 static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
                                    const X86Subtarget *Subtarget,
                                    SelectionDAG &DAG) {
-  EVT VT = BV->getValueType(0);
+  MVT VT = BV->getSimpleValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
   unsigned NumUndefsLO = 0;
   unsigned NumUndefsHI = 0;
@@ -5845,7 +6277,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   unsigned NumElems = Op.getNumOperands();
 
   // Generate vectors for predicate vectors.
-  if (VT.getScalarType() == MVT::i1 && Subtarget->hasAVX512())
+  if (VT.getVectorElementType() == MVT::i1 && Subtarget->hasAVX512())
     return LowerBUILD_VECTORvXi1(Op, DAG);
 
   // Vectors containing all zeros can be matched by pxor and xorps later
@@ -5866,7 +6298,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       return Op;
 
     if (!VT.is512BitVector())
-      return getOnesVector(VT, Subtarget->hasInt256(), DAG, dl);
+      return getOnesVector(VT, Subtarget, DAG, dl);
   }
 
   BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
@@ -5881,7 +6313,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned NumZero  = 0;
   unsigned NumNonZero = 0;
-  unsigned NonZeros = 0;
+  uint64_t NonZeros = 0;
   bool IsAllConstants = true;
   SmallSet<SDValue, 8> Values;
   for (unsigned i = 0; i < NumElems; ++i) {
@@ -5895,7 +6327,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     if (X86::isZeroNode(Elt))
       NumZero++;
     else {
-      NonZeros |= (1 << i);
+      assert(i < sizeof(NonZeros) * 8); // Make sure the shift is within range.
+      NonZeros |= ((uint64_t)1 << i);
       NumNonZero++;
     }
   }
@@ -5919,7 +6352,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
         // Handle SSE only.
         assert(VT == MVT::v2i64 && "Expected an SSE value type!");
-        EVT VecVT = MVT::v4i32;
+        MVT VecVT = MVT::v4i32;
 
         // Truncate the value (which may itself be a constant) to i32, and
         // convert it to a vector with movd (S2V+shuffle to zero extend).
@@ -6051,7 +6484,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
       // One half is zero or undef.
       unsigned Idx = countTrailingZeros(NonZeros);
       SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
-                                 Op.getOperand(Idx));
+                               Op.getOperand(Idx));
       return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
     }
     return SDValue();
@@ -6059,13 +6492,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
   // If element VT is < 32 bits, convert it to inserts into a zero vector.
   if (EVTBits == 8 && NumElems == 16)
-    if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                        Subtarget, *this))
+    if (SDValue V = LowerBuildVectorv16i8(Op, NonZeros, NumNonZero, NumZero,
+                                          DAG, Subtarget, *this))
       return V;
 
   if (EVTBits == 16 && NumElems == 8)
-    if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
-                                      Subtarget, *this))
+    if (SDValue V = LowerBuildVectorv8i16(Op, NonZeros, NumNonZero, NumZero,
+                                          DAG, Subtarget, *this))
       return V;
 
   // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
@@ -6077,7 +6510,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   SmallVector<SDValue, 8> V(NumElems);
   if (NumElems == 4 && NumZero > 0) {
     for (unsigned i = 0; i < 4; ++i) {
-      bool isZero = !(NonZeros & (1 << i));
+      bool isZero = !(NonZeros & (1ULL << i));
       if (isZero)
         V[i] = getZeroVector(VT, Subtarget, DAG, dl);
       else
@@ -6177,7 +6610,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   return SDValue();
 }
 
-// LowerAVXCONCAT_VECTORS - 256-bit AVX can use the vinsertf128 instruction
+// 256-bit AVX can use the vinsertf128 instruction
 // to create 256-bit vectors from two other 128-bit ones.
 static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -6193,8 +6626,8 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
     return Concat128BitVectors(V1, V2, ResVT, NumElems, DAG, dl);
 
   if (Op.getNumOperands() == 4) {
-    MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
-                                ResVT.getVectorNumElements()/2);
+    MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
+                                  ResVT.getVectorNumElements()/2);
     SDValue V3 = Op.getOperand(2);
     SDValue V4 = Op.getOperand(3);
     return Concat256BitVectors(Concat128BitVectors(V1, V2, HalfVT, NumElems/2, DAG, dl),
@@ -6213,8 +6646,27 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
   assert(isPowerOf2_32(NumOfOperands) &&
          "Unexpected number of operands in CONCAT_VECTORS");
 
+  SDValue Undef = DAG.getUNDEF(ResVT);
   if (NumOfOperands > 2) {
-    MVT HalfVT = MVT::getVectorVT(ResVT.getScalarType(),
+    // Specialize the cases when all, or all but one, of the operands are undef.
+    unsigned NumOfDefinedOps = 0;
+    unsigned OpIdx = 0;
+    for (unsigned i = 0; i < NumOfOperands; i++)
+      if (!Op.getOperand(i).isUndef()) {
+        NumOfDefinedOps++;
+        OpIdx = i;
+      }
+    if (NumOfDefinedOps == 0)
+      return Undef;
+    if (NumOfDefinedOps == 1) {
+      unsigned SubVecNumElts =
+        Op.getOperand(OpIdx).getValueType().getVectorNumElements();
+      SDValue IdxVal = DAG.getIntPtrConstant(SubVecNumElts * OpIdx, dl);
+      return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef,
+                         Op.getOperand(OpIdx), IdxVal);
+    }
+
+    MVT HalfVT = MVT::getVectorVT(ResVT.getVectorElementType(),
                                   ResVT.getVectorNumElements()/2);
     SmallVector<SDValue, 2> Ops;
     for (unsigned i = 0; i < NumOfOperands/2; i++)
@@ -6227,31 +6679,38 @@ static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op,
     return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
   }
 
+  // 2 operands
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
+  unsigned NumElems = ResVT.getVectorNumElements();
+  assert(V1.getValueType() == V2.getValueType() &&
+         V1.getValueType().getVectorNumElements() == NumElems/2 &&
+         "Unexpected operands in CONCAT_VECTORS");
+
+  if (ResVT.getSizeInBits() >= 16)
+    return Op; // The operation is legal with KUNPCK
+
   bool IsZeroV1 = ISD::isBuildVectorAllZeros(V1.getNode());
   bool IsZeroV2 = ISD::isBuildVectorAllZeros(V2.getNode());
-
+  SDValue ZeroVec = getZeroVector(ResVT, Subtarget, DAG, dl);
   if (IsZeroV1 && IsZeroV2)
-    return getZeroVector(ResVT, Subtarget, DAG, dl);
+    return ZeroVec;
 
   SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
-  SDValue Undef = DAG.getUNDEF(ResVT);
-  unsigned NumElems = ResVT.getVectorNumElements();
-  SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+  if (V2.isUndef())
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
+  if (IsZeroV2)
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V1, ZeroIdx);
+
+  SDValue IdxVal = DAG.getIntPtrConstant(NumElems/2, dl);
+  if (V1.isUndef())
+    V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, IdxVal);
 
-  V2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V2, ZeroIdx);
-  V2 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V2, ShiftBits);
   if (IsZeroV1)
-    return V2;
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, ZeroVec, V2, IdxVal);
 
   V1 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Undef, V1, ZeroIdx);
-  // Zero the upper bits of V1
-  V1 = DAG.getNode(X86ISD::VSHLI, dl, ResVT, V1, ShiftBits);
-  V1 = DAG.getNode(X86ISD::VSRLI, dl, ResVT, V1, ShiftBits);
-  if (IsZeroV2)
-    return V1;
-  return DAG.getNode(ISD::OR, dl, ResVT, V1, V2);
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, V1, V2, IdxVal);
 }
 
 static SDValue LowerCONCAT_VECTORS(SDValue Op,
@@ -6272,7 +6731,6 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op,
   return LowerAVXCONCAT_VECTORS(Op, DAG);
 }
 
-
 //===----------------------------------------------------------------------===//
 // Vector shuffle lowering
 //
@@ -6422,6 +6880,127 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, SDLoc DL,
   return DAG.getConstant(Imm, DL, MVT::i8);
 }
 
+/// \brief Compute whether each element of a shuffle is zeroable.
+///
+/// A "zeroable" vector shuffle element is one which can be lowered to zero.
+/// Either it is an undef element in the shuffle mask, the element of the input
+/// referenced is undef, or the element of the input referenced is known to be
+/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
+/// as many lanes with this technique as possible to simplify the remaining
+/// shuffle.
+static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
+                                                     SDValue V1, SDValue V2) {
+  SmallBitVector Zeroable(Mask.size(), false);
+
+  while (V1.getOpcode() == ISD::BITCAST)
+    V1 = V1->getOperand(0);
+  while (V2.getOpcode() == ISD::BITCAST)
+    V2 = V2->getOperand(0);
+
+  bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+  bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+
+  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+    int M = Mask[i];
+    // Handle the easy cases.
+    if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
+      Zeroable[i] = true;
+      continue;
+    }
+
+    // If this is an index into a build_vector node (which has the same number
+    // of elements), dig out the input value and use it.
+    SDValue V = M < Size ? V1 : V2;
+    if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
+      continue;
+
+    SDValue Input = V.getOperand(M % Size);
+    // The UNDEF opcode check really should be dead code here, but not quite
+    // worth asserting on (it isn't invalid, just unexpected).
+    if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
+      Zeroable[i] = true;
+  }
+
+  return Zeroable;
+}
+
+// X86 has dedicated unpack instructions that can handle specific blend
+// operations: UNPCKH and UNPCKL.
+static SDValue lowerVectorShuffleWithUNPCK(SDLoc DL, MVT VT, ArrayRef<int> Mask,
+                                           SDValue V1, SDValue V2,
+                                           SelectionDAG &DAG) {
+  int NumElts = VT.getVectorNumElements();
+  int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+  SmallVector<int, 8> Unpckl;
+  SmallVector<int, 8> Unpckh;
+
+  for (int i = 0; i < NumElts; ++i) {
+    unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+    int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
+    int HiPos = LoPos + NumEltsInLane / 2;
+    Unpckl.push_back(LoPos);
+    Unpckh.push_back(HiPos);
+  }
+
+  if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+    return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+  if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+    return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
+
+  // Commute and try again.
+  ShuffleVectorSDNode::commuteMask(Unpckl);
+  if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
+    return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1);
+
+  ShuffleVectorSDNode::commuteMask(Unpckh);
+  if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
+    return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1);
+
+  return SDValue();
+}
+
+/// \brief Try to emit a bitmask instruction for a shuffle.
+///
+/// This handles cases where we can model a blend exactly as a bitmask due to
+/// one of the inputs being zeroable.
+static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
+                                           SDValue V2, ArrayRef<int> Mask,
+                                           SelectionDAG &DAG) {
+  MVT EltVT = VT.getVectorElementType();
+  int NumEltBits = EltVT.getSizeInBits();
+  MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
+  SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
+  SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
+                                    IntEltVT);
+  if (EltVT.isFloatingPoint()) {
+    Zero = DAG.getBitcast(EltVT, Zero);
+    AllOnes = DAG.getBitcast(EltVT, AllOnes);
+  }
+  SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  SDValue V;
+  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
+    if (Zeroable[i])
+      continue;
+    if (Mask[i] % Size != i)
+      return SDValue(); // Not a blend.
+    if (!V)
+      V = Mask[i] < Size ? V1 : V2;
+    else if (V != (Mask[i] < Size ? V1 : V2))
+      return SDValue(); // Can only let one input through the mask.
+
+    VMaskOps[i] = AllOnes;
+  }
+  if (!V)
+    return SDValue(); // No non-zeroable elements!
+
+  SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
+  V = DAG.getNode(VT.isFloatingPoint()
+                  ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
+                  DL, VT, V, VMask);
+  return V;
+}
+
 /// \brief Try to emit a blend instruction for a shuffle using bit math.
 ///
 /// This is used as a fallback approach when first class blend instructions are
@@ -6431,7 +7010,7 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
                                             SDValue V2, ArrayRef<int> Mask,
                                             SelectionDAG &DAG) {
   assert(VT.isInteger() && "Only supports integer vector types!");
-  MVT EltVT = VT.getScalarType();
+  MVT EltVT = VT.getVectorElementType();
   int NumEltBits = EltVT.getSizeInBits();
   SDValue Zero = DAG.getConstant(0, DL, EltVT);
   SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
@@ -6458,22 +7037,62 @@ static SDValue lowerVectorShuffleAsBitBlend(SDLoc DL, MVT VT, SDValue V1,
 /// This doesn't do any checks for the availability of instructions for blending
 /// these values. It relies on the availability of the X86ISD::BLENDI pattern to
 /// be matched in the backend with the type given. What it does check for is
-/// that the shuffle mask is in fact a blend.
+/// that the shuffle mask is a blend, or convertible into a blend with zero.
 static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
-                                         SDValue V2, ArrayRef<int> Mask,
+                                         SDValue V2, ArrayRef<int> Original,
                                          const X86Subtarget *Subtarget,
                                          SelectionDAG &DAG) {
+  bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
+  bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+  SmallVector<int, 8> Mask(Original.begin(), Original.end());
+  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+  bool ForceV1Zero = false, ForceV2Zero = false;
+
+  // Attempt to generate the binary blend mask. If an input is zero then
+  // we can use any lane.
+  // TODO: generalize the zero matching to any scalar like isShuffleEquivalent.
   unsigned BlendMask = 0;
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
-    if (Mask[i] >= Size) {
-      if (Mask[i] != i + Size)
-        return SDValue(); // Shuffled V2 input!
+    int M = Mask[i];
+    if (M < 0)
+      continue;
+    if (M == i)
+      continue;
+    if (M == i + Size) {
       BlendMask |= 1u << i;
       continue;
     }
-    if (Mask[i] >= 0 && Mask[i] != i)
-      return SDValue(); // Shuffled V1 input!
+    if (Zeroable[i]) {
+      if (V1IsZero) {
+        ForceV1Zero = true;
+        Mask[i] = i;
+        continue;
+      }
+      if (V2IsZero) {
+        ForceV2Zero = true;
+        BlendMask |= 1u << i;
+        Mask[i] = i + Size;
+        continue;
+      }
+    }
+    return SDValue(); // Shuffled input!
   }
+
+  // Create a REAL zero vector - ISD::isBuildVectorAllZeros allows UNDEFs.
+  if (ForceV1Zero)
+    V1 = getZeroVector(VT, Subtarget, DAG, DL);
+  if (ForceV2Zero)
+    V2 = getZeroVector(VT, Subtarget, DAG, DL);
+
+  auto ScaleBlendMask = [](unsigned BlendMask, int Size, int Scale) {
+    unsigned ScaledMask = 0;
+    for (int i = 0; i != Size; ++i)
+      if (BlendMask & (1u << i))
+        for (int j = 0; j != Scale; ++j)
+          ScaledMask |= 1u << (i * Scale + j);
+    return ScaledMask;
+  };
+
   switch (VT.SimpleTy) {
   case MVT::v2f64:
   case MVT::v4f32:
@@ -6493,12 +7112,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
     if (Subtarget->hasAVX2()) {
       // Scale the blend by the number of 32-bit dwords per element.
       int Scale =  VT.getScalarSizeInBits() / 32;
-      BlendMask = 0;
-      for (int i = 0, Size = Mask.size(); i < Size; ++i)
-        if (Mask[i] >= Size)
-          for (int j = 0; j < Scale; ++j)
-            BlendMask |= 1u << (i * Scale + j);
-
+      BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
       MVT BlendVT = VT.getSizeInBits() > 128 ? MVT::v8i32 : MVT::v4i32;
       V1 = DAG.getBitcast(BlendVT, V1);
       V2 = DAG.getBitcast(BlendVT, V2);
@@ -6511,12 +7125,7 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
     // For integer shuffles we need to expand the mask and cast the inputs to
     // v8i16s prior to blending.
     int Scale = 8 / VT.getVectorNumElements();
-    BlendMask = 0;
-    for (int i = 0, Size = Mask.size(); i < Size; ++i)
-      if (Mask[i] >= Size)
-        for (int j = 0; j < Scale; ++j)
-          BlendMask |= 1u << (i * Scale + j);
-
+    BlendMask = ScaleBlendMask(BlendMask, Mask.size(), Scale);
     V1 = DAG.getBitcast(MVT::v8i16, V1);
     V2 = DAG.getBitcast(MVT::v8i16, V2);
     return DAG.getBitcast(VT,
@@ -6541,9 +7150,13 @@ static SDValue lowerVectorShuffleAsBlend(SDLoc DL, MVT VT, SDValue V1,
     // FALLTHROUGH
   case MVT::v16i8:
   case MVT::v32i8: {
-    assert((VT.getSizeInBits() == 128 || Subtarget->hasAVX2()) &&
+    assert((VT.is128BitVector() || Subtarget->hasAVX2()) &&
            "256-bit byte-blends require AVX2 support!");
 
+    // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
+    if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+      return Masked;
+
     // Scale the blend by the number of bytes per element.
     int Scale = VT.getScalarSizeInBits() / 8;
 
@@ -6760,11 +7373,11 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
     Hi = DAG.getBitcast(AlignVT, Hi);
 
     return DAG.getBitcast(
-        VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Hi, Lo,
+        VT, DAG.getNode(X86ISD::PALIGNR, DL, AlignVT, Lo, Hi,
                         DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
   }
 
-  assert(VT.getSizeInBits() == 128 &&
+  assert(VT.is128BitVector() &&
          "Rotate-based lowering only supports 128-bit lowering!");
   assert(Mask.size() <= 16 &&
          "Can shuffle at most 16 bytes in a 128-bit vector!");
@@ -6785,92 +7398,6 @@ static SDValue lowerVectorShuffleAsByteRotate(SDLoc DL, MVT VT, SDValue V1,
                         DAG.getNode(ISD::OR, DL, MVT::v2i64, LoShift, HiShift));
 }
 
-/// \brief Compute whether each element of a shuffle is zeroable.
-///
-/// A "zeroable" vector shuffle element is one which can be lowered to zero.
-/// Either it is an undef element in the shuffle mask, the element of the input
-/// referenced is undef, or the element of the input referenced is known to be
-/// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle
-/// as many lanes with this technique as possible to simplify the remaining
-/// shuffle.
-static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
-                                                     SDValue V1, SDValue V2) {
-  SmallBitVector Zeroable(Mask.size(), false);
-
-  while (V1.getOpcode() == ISD::BITCAST)
-    V1 = V1->getOperand(0);
-  while (V2.getOpcode() == ISD::BITCAST)
-    V2 = V2->getOperand(0);
-
-  bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
-  bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
-
-  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
-    int M = Mask[i];
-    // Handle the easy cases.
-    if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) {
-      Zeroable[i] = true;
-      continue;
-    }
-
-    // If this is an index into a build_vector node (which has the same number
-    // of elements), dig out the input value and use it.
-    SDValue V = M < Size ? V1 : V2;
-    if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
-      continue;
-
-    SDValue Input = V.getOperand(M % Size);
-    // The UNDEF opcode check really should be dead code here, but not quite
-    // worth asserting on (it isn't invalid, just unexpected).
-    if (Input.getOpcode() == ISD::UNDEF || X86::isZeroNode(Input))
-      Zeroable[i] = true;
-  }
-
-  return Zeroable;
-}
-
-/// \brief Try to emit a bitmask instruction for a shuffle.
-///
-/// This handles cases where we can model a blend exactly as a bitmask due to
-/// one of the inputs being zeroable.
-static SDValue lowerVectorShuffleAsBitMask(SDLoc DL, MVT VT, SDValue V1,
-                                           SDValue V2, ArrayRef<int> Mask,
-                                           SelectionDAG &DAG) {
-  MVT EltVT = VT.getScalarType();
-  int NumEltBits = EltVT.getSizeInBits();
-  MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
-  SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
-  SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
-                                    IntEltVT);
-  if (EltVT.isFloatingPoint()) {
-    Zero = DAG.getBitcast(EltVT, Zero);
-    AllOnes = DAG.getBitcast(EltVT, AllOnes);
-  }
-  SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-  SDValue V;
-  for (int i = 0, Size = Mask.size(); i < Size; ++i) {
-    if (Zeroable[i])
-      continue;
-    if (Mask[i] % Size != i)
-      return SDValue(); // Not a blend.
-    if (!V)
-      V = Mask[i] < Size ? V1 : V2;
-    else if (V != (Mask[i] < Size ? V1 : V2))
-      return SDValue(); // Can only let one input through the mask.
-
-    VMaskOps[i] = AllOnes;
-  }
-  if (!V)
-    return SDValue(); // No non-zeroable elements!
-
-  SDValue VMask = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, VMaskOps);
-  V = DAG.getNode(VT.isFloatingPoint()
-                  ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
-                  DL, VT, V, VMask);
-  return V;
-}
-
 /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
 ///
 /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -6982,7 +7509,7 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
     // Determine the extraction length from the part of the
     // lower half that isn't zeroable.
     int Len = HalfSize;
-    for (; Len >= 0; --Len)
+    for (; Len > 0; --Len)
       if (!Zeroable[Len - 1])
         break;
     assert(Len > 0 && "Zeroable shuffle mask");
@@ -6997,8 +7524,9 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
       SDValue &V = (M < Size ? V1 : V2);
       M = M % Size;
 
-      // All mask elements must be in the lower half.
-      if (M > HalfSize)
+      // The extracted elements must start at a valid index and all mask
+      // elements must be in the lower half.
+      if (i > M || M >= HalfSize)
         return SDValue();
 
       if (Idx < 0 || (Src == V && Idx == (M - i))) {
@@ -7095,64 +7623,104 @@ static SDValue lowerVectorShuffleWithSSE4A(SDLoc DL, MVT VT, SDValue V1,
 ///
 /// Given a specific number of elements, element bit width, and extension
 /// stride, produce either a zero or any extension based on the available
-/// features of the subtarget.
+/// features of the subtarget. The extended elements are consecutive and
+/// begin and can start from an offseted element index in the input; to
+/// avoid excess shuffling the offset must either being in the bottom lane
+/// or at the start of a higher lane. All extended elements must be from
+/// the same lane.
 static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-    SDLoc DL, MVT VT, int Scale, bool AnyExt, SDValue InputV,
+    SDLoc DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV,
     ArrayRef<int> Mask, const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   assert(Scale > 1 && "Need a scale to extend.");
-  int NumElements = VT.getVectorNumElements();
   int EltBits = VT.getScalarSizeInBits();
+  int NumElements = VT.getVectorNumElements();
+  int NumEltsPerLane = 128 / EltBits;
+  int OffsetLane = Offset / NumEltsPerLane;
   assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
          "Only 8, 16, and 32 bit elements can be extended.");
   assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits.");
+  assert(0 <= Offset && "Extension offset must be positive.");
+  assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) &&
+         "Extension offset must be in the first lane or start an upper lane.");
+
+  // Check that an index is in same lane as the base offset.
+  auto SafeOffset = [&](int Idx) {
+    return OffsetLane == (Idx / NumEltsPerLane);
+  };
+
+  // Shift along an input so that the offset base moves to the first element.
+  auto ShuffleOffset = [&](SDValue V) {
+    if (!Offset)
+      return V;
+
+    SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+    for (int i = 0; i * Scale < NumElements; ++i) {
+      int SrcIdx = i + Offset;
+      ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1;
+    }
+    return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask);
+  };
 
   // Found a valid zext mask! Try various lowering strategies based on the
   // input type and available ISA extensions.
   if (Subtarget->hasSSE41()) {
+    // Not worth offseting 128-bit vectors if scale == 2, a pattern using
+    // PUNPCK will catch this in a later shuffle match.
+    if (Offset && Scale == 2 && VT.is128BitVector())
+      return SDValue();
     MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
                                  NumElements / Scale);
-    return DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV));
+    InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, ShuffleOffset(InputV));
+    return DAG.getBitcast(VT, InputV);
   }
 
+  assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
+
   // For any extends we can cheat for larger element sizes and use shuffle
   // instructions that can fold with a load and/or copy.
   if (AnyExt && EltBits == 32) {
-    int PSHUFDMask[4] = {0, -1, 1, -1};
+    int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1,
+                         -1};
     return DAG.getBitcast(
         VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
                         DAG.getBitcast(MVT::v4i32, InputV),
                         getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)));
   }
   if (AnyExt && EltBits == 16 && Scale > 2) {
-    int PSHUFDMask[4] = {0, -1, 0, -1};
+    int PSHUFDMask[4] = {Offset / 2, -1,
+                         SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1};
     InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32,
                          DAG.getBitcast(MVT::v4i32, InputV),
                          getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG));
-    int PSHUFHWMask[4] = {1, -1, -1, -1};
+    int PSHUFWMask[4] = {1, -1, -1, -1};
+    unsigned OddEvenOp = (Offset & 1 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW);
     return DAG.getBitcast(
-        VT, DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16,
+        VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16,
                         DAG.getBitcast(MVT::v8i16, InputV),
-                        getV4X86ShuffleImm8ForMask(PSHUFHWMask, DL, DAG)));
+                        getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG)));
   }
 
   // The SSE4A EXTRQ instruction can efficiently extend the first 2 lanes
   // to 64-bits.
   if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget->hasSSE4A()) {
     assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!");
-    assert(VT.getSizeInBits() == 128 && "Unexpected vector width!");
+    assert(VT.is128BitVector() && "Unexpected vector width!");
 
+    int LoIdx = Offset * EltBits;
     SDValue Lo = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
                              DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
                                          DAG.getConstant(EltBits, DL, MVT::i8),
-                                         DAG.getConstant(0, DL, MVT::i8)));
-    if (isUndefInRange(Mask, NumElements/2, NumElements/2))
+                                         DAG.getConstant(LoIdx, DL, MVT::i8)));
+
+    if (isUndefInRange(Mask, NumElements / 2, NumElements / 2) ||
+        !SafeOffset(Offset + 1))
       return DAG.getNode(ISD::BITCAST, DL, VT, Lo);
 
-    SDValue Hi =
-        DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
-                    DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
-                                DAG.getConstant(EltBits, DL, MVT::i8),
-                                DAG.getConstant(EltBits, DL, MVT::i8)));
+    int HiIdx = (Offset + 1) * EltBits;
+    SDValue Hi = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64,
+                             DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV,
+                                         DAG.getConstant(EltBits, DL, MVT::i8),
+                                         DAG.getConstant(HiIdx, DL, MVT::i8)));
     return DAG.getNode(ISD::BITCAST, DL, VT,
                        DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi));
   }
@@ -7163,9 +7731,11 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
   if (Scale > 4 && EltBits == 8 && Subtarget->hasSSSE3()) {
     assert(NumElements == 16 && "Unexpected byte vector width!");
     SDValue PSHUFBMask[16];
-    for (int i = 0; i < 16; ++i)
-      PSHUFBMask[i] =
-          DAG.getConstant((i % Scale == 0) ? i / Scale : 0x80, DL, MVT::i8);
+    for (int i = 0; i < 16; ++i) {
+      int Idx = Offset + (i / Scale);
+      PSHUFBMask[i] = DAG.getConstant(
+          (i % Scale == 0 && SafeOffset(Idx)) ? Idx : 0x80, DL, MVT::i8);
+    }
     InputV = DAG.getBitcast(MVT::v16i8, InputV);
     return DAG.getBitcast(VT,
                           DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV,
@@ -7173,13 +7743,30 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
                                                   MVT::v16i8, PSHUFBMask)));
   }
 
+  // If we are extending from an offset, ensure we start on a boundary that
+  // we can unpack from.
+  int AlignToUnpack = Offset % (NumElements / Scale);
+  if (AlignToUnpack) {
+    SmallVector<int, 8> ShMask((unsigned)NumElements, -1);
+    for (int i = AlignToUnpack; i < NumElements; ++i)
+      ShMask[i - AlignToUnpack] = i;
+    InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask);
+    Offset -= AlignToUnpack;
+  }
+
   // Otherwise emit a sequence of unpacks.
   do {
+    unsigned UnpackLoHi = X86ISD::UNPCKL;
+    if (Offset >= (NumElements / 2)) {
+      UnpackLoHi = X86ISD::UNPCKH;
+      Offset -= (NumElements / 2);
+    }
+
     MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements);
     SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT)
                          : getZeroVector(InputVT, Subtarget, DAG, DL);
     InputV = DAG.getBitcast(InputVT, InputV);
-    InputV = DAG.getNode(X86ISD::UNPCKL, DL, InputVT, InputV, Ext);
+    InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext);
     Scale /= 2;
     EltBits *= 2;
     NumElements /= 2;
@@ -7205,7 +7792,9 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
   SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
   int Bits = VT.getSizeInBits();
+  int NumLanes = Bits / 128;
   int NumElements = VT.getVectorNumElements();
+  int NumEltsPerLane = NumElements / NumLanes;
   assert(VT.getScalarSizeInBits() <= 32 &&
          "Exceeds 32-bit integer zero extension limit");
   assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size");
@@ -7215,8 +7804,11 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
   auto Lower = [&](int Scale) -> SDValue {
     SDValue InputV;
     bool AnyExt = true;
+    int Offset = 0;
+    int Matches = 0;
     for (int i = 0; i < NumElements; ++i) {
-      if (Mask[i] == -1)
+      int M = Mask[i];
+      if (M == -1)
         continue; // Valid anywhere but doesn't tell us anything.
       if (i % Scale != 0) {
         // Each of the extended elements need to be zeroable.
@@ -7230,14 +7822,29 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
 
       // Each of the base elements needs to be consecutive indices into the
       // same input vector.
-      SDValue V = Mask[i] < NumElements ? V1 : V2;
-      if (!InputV)
+      SDValue V = M < NumElements ? V1 : V2;
+      M = M % NumElements;
+      if (!InputV) {
         InputV = V;
-      else if (InputV != V)
+        Offset = M - (i / Scale);
+      } else if (InputV != V)
         return SDValue(); // Flip-flopping inputs.
 
-      if (Mask[i] % NumElements != i / Scale)
+      // Offset must start in the lowest 128-bit lane or at the start of an
+      // upper lane.
+      // FIXME: Is it ever worth allowing a negative base offset?
+      if (!((0 <= Offset && Offset < NumEltsPerLane) ||
+            (Offset % NumEltsPerLane) == 0))
+        return SDValue();
+
+      // If we are offsetting, all referenced entries must come from the same
+      // lane.
+      if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane))
+        return SDValue();
+
+      if ((M % NumElements) != (Offset + (i / Scale)))
         return SDValue(); // Non-consecutive strided elements.
+      Matches++;
     }
 
     // If we fail to find an input, we have a zero-shuffle which should always
@@ -7246,8 +7853,13 @@ static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
     if (!InputV)
       return SDValue();
 
+    // If we are offsetting, don't extend if we only match a single input, we
+    // can always do better by using a basic PSHUF or PUNPCK.
+    if (Offset != 0 && Matches < 2)
+      return SDValue();
+
     return lowerVectorShuffleAsSpecificZeroOrAnyExtend(
-        DL, VT, Scale, AnyExt, InputV, Mask, Subtarget, DAG);
+        DL, VT, Scale, Offset, AnyExt, InputV, Mask, Subtarget, DAG);
   };
 
   // The widest scale possible for extending is to a 64-bit integer.
@@ -7355,8 +7967,9 @@ static SDValue lowerVectorShuffleAsElementInsertion(
   // all the smarts here sunk into that routine. However, the current
   // lowering of BUILD_VECTOR makes that nearly impossible until the old
   // vector shuffle lowering is dead.
-  if (SDValue V2S = getScalarValueForVectorElement(
-          V2, Mask[V2Index] - Mask.size(), DAG)) {
+  SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
+                                               DAG);
+  if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
     // We need to zext the scalar if it is smaller than an i32.
     V2S = DAG.getBitcast(EltVT, V2S);
     if (EltVT == MVT::i8 || EltVT == MVT::i16) {
@@ -7431,11 +8044,65 @@ static SDValue lowerVectorShuffleAsElementInsertion(
   return V2;
 }
 
+/// \brief Try to lower broadcast of a single - truncated - integer element,
+/// coming from a scalar_to_vector/build_vector node \p V0 with larger elements.
+///
+/// This assumes we have AVX2.
+static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
+                                                  int BroadcastIdx,
+                                                  const X86Subtarget *Subtarget,
+                                                  SelectionDAG &DAG) {
+  assert(Subtarget->hasAVX2() &&
+         "We can only lower integer broadcasts with AVX2!");
+
+  EVT EltVT = VT.getVectorElementType();
+  EVT V0VT = V0.getValueType();
+
+  assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!");
+  assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!");
+
+  EVT V0EltVT = V0VT.getVectorElementType();
+  if (!V0EltVT.isInteger())
+    return SDValue();
+
+  const unsigned EltSize = EltVT.getSizeInBits();
+  const unsigned V0EltSize = V0EltVT.getSizeInBits();
+
+  // This is only a truncation if the original element type is larger.
+  if (V0EltSize <= EltSize)
+    return SDValue();
+
+  assert(((V0EltSize % EltSize) == 0) &&
+         "Scalar type sizes must all be powers of 2 on x86!");
+
+  const unsigned V0Opc = V0.getOpcode();
+  const unsigned Scale = V0EltSize / EltSize;
+  const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
+
+  if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
+      V0Opc != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  SDValue Scalar = V0.getOperand(V0BroadcastIdx);
+
+  // If we're extracting non-least-significant bits, shift so we can truncate.
+  // Hopefully, we can fold away the trunc/srl/load into the broadcast.
+  // Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
+  // vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
+  if (const int OffsetIdx = BroadcastIdx % Scale)
+    Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
+            DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));
+
+  return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
+                     DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
+}
+
 /// \brief Try to lower broadcast of a single element.
 ///
 /// For convenience, this code also bundles all of the subtarget feature set
 /// filtering. While a little annoying to re-dispatch on type here, there isn't
 /// a convenient way to factor it out.
+/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them?
 static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
                                              ArrayRef<int> Mask,
                                              const X86Subtarget *Subtarget,
@@ -7476,7 +8143,7 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
 
       int BeginIdx = (int)ConstantIdx->getZExtValue();
       int EndIdx =
-          BeginIdx + (int)VInner.getValueType().getVectorNumElements();
+          BeginIdx + (int)VInner.getSimpleValueType().getVectorNumElements();
       if (BroadcastIdx >= BeginIdx && BroadcastIdx < EndIdx) {
         BroadcastIdx -= BeginIdx;
         V = VInner;
@@ -7491,6 +8158,15 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
 
   // Check if this is a broadcast of a scalar. We special case lowering
   // for scalars so that we can more effectively fold with loads.
+  // First, look through bitcast: if the original value has a larger element
+  // type than the shuffle, the broadcast element is in essence truncated.
+  // Make that explicit to ease folding.
+  if (V.getOpcode() == ISD::BITCAST && VT.isInteger())
+    if (SDValue TruncBroadcast = lowerVectorShuffleAsTruncBroadcast(
+            DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
+      return TruncBroadcast;
+
+  // Also check the simpler case, where we can directly reuse the scalar.
   if (V.getOpcode() == ISD::BUILD_VECTOR ||
       (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
     V = V.getOperand(BroadcastIdx);
@@ -7499,6 +8175,20 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
     // Only AVX2 has register broadcasts.
     if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
       return SDValue();
+  } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+    // If we are broadcasting a load that is only used by the shuffle
+    // then we can reduce the vector load to the broadcasted scalar load.
+    LoadSDNode *Ld = cast<LoadSDNode>(V);
+    SDValue BaseAddr = Ld->getOperand(1);
+    EVT AddrVT = BaseAddr.getValueType();
+    EVT SVT = VT.getScalarType();
+    unsigned Offset = BroadcastIdx * SVT.getStoreSize();
+    SDValue NewAddr = DAG.getNode(
+        ISD::ADD, DL, AddrVT, BaseAddr,
+        DAG.getConstant(Offset, DL, AddrVT));
+    V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
+                    DAG.getMachineFunction().getMachineMemOperand(
+                        Ld->getMemOperand(), Offset, SVT.getStoreSize()));
   } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) {
     // We can't broadcast from a vector register without AVX2, and we can only
     // broadcast from the zero-element of a vector register.
@@ -7595,9 +8285,10 @@ static SDValue lowerVectorShuffleAsInsertPS(SDValue Op, SDValue V1, SDValue V2,
 /// because for floating point vectors we have a generalized SHUFPS lowering
 /// strategy that handles everything that doesn't *exactly* match an unpack,
 /// making this clever lowering unnecessary.
-static SDValue lowerVectorShuffleAsUnpack(SDLoc DL, MVT VT, SDValue V1,
-                                          SDValue V2, ArrayRef<int> Mask,
-                                          SelectionDAG &DAG) {
+static SDValue lowerVectorShuffleAsPermuteAndUnpack(SDLoc DL, MVT VT,
+                                                    SDValue V1, SDValue V2,
+                                                    ArrayRef<int> Mask,
+                                                    SelectionDAG &DAG) {
   assert(!VT.isFloatingPoint() &&
          "This routine only supports integer vectors.");
   assert(!isSingleInputShuffleMask(Mask) &&
@@ -7774,10 +8465,9 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG))
+    return V;
 
   unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
   return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2,
@@ -7869,10 +8559,9 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 2}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG))
+    return V;
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8077,14 +8766,9 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   }
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1);
-  if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
+    return V;
 
   // Otherwise fall back to a SHUFPS lowering strategy.
   return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
@@ -8161,14 +8845,9 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 1, 5}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {2, 6, 3, 7}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 5, 1}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1);
-  if (isShuffleEquivalent(V1, V2, Mask, {6, 2, 7, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG))
+    return V;
 
   // Try to use byte rotation instructions.
   // Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -8184,8 +8863,8 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                                       Mask, DAG);
 
   // Try to lower by permuting the inputs into an unpack instruction.
-  if (SDValue Unpack =
-          lowerVectorShuffleAsUnpack(DL, MVT::v4i32, V1, V2, Mask, DAG))
+  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1,
+                                                            V2, Mask, DAG))
     return Unpack;
 
   // We implement this with SHUFPS because it can blend from two vectors.
@@ -8218,7 +8897,7 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
 static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
     SDLoc DL, MVT VT, SDValue V, MutableArrayRef<int> Mask,
     const X86Subtarget *Subtarget, SelectionDAG &DAG) {
-  assert(VT.getScalarType() == MVT::i16 && "Bad input type!");
+  assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!");
   MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2);
 
   assert(Mask.size() == 8 && "Shuffle mask length doen't match!");
@@ -8286,16 +8965,18 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
     assert(AToAInputs.size() + BToAInputs.size() == 4 &&
            "Must call this with either 3:1 or 1:3 inputs (summing to 4).");
 
+    bool ThreeAInputs = AToAInputs.size() == 3;
+
     // Compute the index of dword with only one word among the three inputs in
     // a half by taking the sum of the half with three inputs and subtracting
     // the sum of the actual three inputs. The difference is the remaining
     // slot.
     int ADWord, BDWord;
-    int &TripleDWord = AToAInputs.size() == 3 ? ADWord : BDWord;
-    int &OneInputDWord = AToAInputs.size() == 3 ? BDWord : ADWord;
-    int TripleInputOffset = AToAInputs.size() == 3 ? AOffset : BOffset;
-    ArrayRef<int> TripleInputs = AToAInputs.size() == 3 ? AToAInputs : BToAInputs;
-    int OneInput = AToAInputs.size() == 3 ? BToAInputs[0] : AToAInputs[0];
+    int &TripleDWord = ThreeAInputs ? ADWord : BDWord;
+    int &OneInputDWord = ThreeAInputs ? BDWord : ADWord;
+    int TripleInputOffset = ThreeAInputs ? AOffset : BOffset;
+    ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs;
+    int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0];
     int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset);
     int TripleNonInputIdx =
         TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0);
@@ -8364,8 +9045,7 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
           FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs);
         } else {
           assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!");
-          int APinnedIdx =
-              AToAInputs.size() == 3 ? TripleNonInputIdx : OneInput;
+          int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput;
           FixFlippedInputs(APinnedIdx, ADWord, AToBInputs);
         }
       }
@@ -8751,10 +9431,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return Shift;
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(V1, V1, Mask, {0, 0, 1, 1, 2, 2, 3, 3}))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V1);
-    if (isShuffleEquivalent(V1, V1, Mask, {4, 4, 5, 5, 6, 6, 7, 7}))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V1);
+    if (SDValue V =
+            lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+      return V;
 
     // Try to use byte rotation instructions.
     if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i16, V1, V1,
@@ -8798,10 +9477,9 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 2, 10, 3, 11}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {4, 12, 5, 13, 6, 14, 7, 15}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i16, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG))
+    return V;
 
   // Try to use byte rotation instructions.
   if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
@@ -8812,8 +9490,8 @@ static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
           lowerVectorShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG))
     return BitBlend;
 
-  if (SDValue Unpack =
-          lowerVectorShuffleAsUnpack(DL, MVT::v8i16, V1, V2, Mask, DAG))
+  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1,
+                                                            V2, Mask, DAG))
     return Unpack;
 
   // If we can't directly blend but can use PSHUFB, that will be better as it
@@ -9037,17 +9715,14 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       return V;
   }
 
+  if (SDValue Masked =
+          lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG))
+    return Masked;
+
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
-                                         0, 16, 1, 17, 2, 18, 3, 19,
-                                         // High half.
-                                         4, 20, 5, 21, 6, 22, 7, 23}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {// Low half.
-                                         8, 24, 9, 25, 10, 26, 11, 27,
-                                         // High half.
-                                         12, 28, 13, 29, 14, 30, 15, 31}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG))
+    return V;
 
   // Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
   // with PSHUFB. It is important to do this before we attempt to generate any
@@ -9086,8 +9761,8 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
       // FIXME: It might be worth trying to detect if the unpack-feeding
       // shuffles will both be pshufb, in which case we shouldn't bother with
       // this.
-      if (SDValue Unpack =
-              lowerVectorShuffleAsUnpack(DL, MVT::v16i8, V1, V2, Mask, DAG))
+      if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+              DL, MVT::v16i8, V1, V2, Mask, DAG))
         return Unpack;
     }
 
@@ -9296,7 +9971,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
 
   int NumElements = VT.getVectorNumElements();
   int SplitNumElements = NumElements / 2;
-  MVT ScalarVT = VT.getScalarType();
+  MVT ScalarVT = VT.getVectorElementType();
   MVT SplitVT = MVT::getVectorVT(ScalarVT, NumElements / 2);
 
   // Rather than splitting build-vectors, just build two narrower build
@@ -9308,7 +9983,7 @@ static SDValue splitAndLowerVectorShuffle(SDLoc DL, MVT VT, SDValue V1,
     MVT OrigVT = V.getSimpleValueType();
     int OrigNumElements = OrigVT.getVectorNumElements();
     int OrigSplitNumElements = OrigNumElements / 2;
-    MVT OrigScalarVT = OrigVT.getScalarType();
+    MVT OrigScalarVT = OrigVT.getVectorElementType();
     MVT OrigSplitVT = MVT::getVectorVT(OrigScalarVT, OrigNumElements / 2);
 
     SDValue LoV, HiV;
@@ -9478,7 +10153,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(SDLoc DL, MVT VT,
                                                        ArrayRef<int> Mask,
                                                        SelectionDAG &DAG) {
   // FIXME: This should probably be generalized for 512-bit vectors as well.
-  assert(VT.getSizeInBits() == 256 && "Only for 256-bit vector shuffles!");
+  assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!");
   int LaneSize = Mask.size() / 2;
 
   // If there are only inputs from one 128-bit lane, splitting will in fact be
@@ -9682,6 +10357,108 @@ static SDValue lowerVectorShuffleByMerging128BitLanes(
   return DAG.getVectorShuffle(VT, DL, LaneShuffle, DAG.getUNDEF(VT), NewMask);
 }
 
+/// Lower shuffles where an entire half of a 256-bit vector is UNDEF.
+/// This allows for fast cases such as subvector extraction/insertion
+/// or shuffling smaller vector types which can lower more efficiently.
+static SDValue lowerVectorShuffleWithUndefHalf(SDLoc DL, MVT VT, SDValue V1,
+                                               SDValue V2, ArrayRef<int> Mask,
+                                               const X86Subtarget *Subtarget,
+                                               SelectionDAG &DAG) {
+  assert(VT.getSizeInBits() == 256 && "Expected 256-bit vector");
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfNumElts = NumElts / 2;
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(), HalfNumElts);
+
+  bool UndefLower = isUndefInRange(Mask, 0, HalfNumElts);
+  bool UndefUpper = isUndefInRange(Mask, HalfNumElts, HalfNumElts);
+  if (!UndefLower && !UndefUpper)
+    return SDValue();
+
+  // Upper half is undef and lower half is whole upper subvector.
+  // e.g. vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
+  if (UndefUpper &&
+      isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) {
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                             DAG.getIntPtrConstant(HalfNumElts, DL));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+                       DAG.getIntPtrConstant(0, DL));
+  }
+
+  // Lower half is undef and upper half is whole lower subvector.
+  // e.g. vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
+  if (UndefLower &&
+      isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) {
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                             DAG.getIntPtrConstant(0, DL));
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi,
+                       DAG.getIntPtrConstant(HalfNumElts, DL));
+  }
+
+  // AVX2 supports efficient immediate 64-bit element cross-lane shuffles.
+  if (UndefLower && Subtarget->hasAVX2() &&
+      (VT == MVT::v4f64 || VT == MVT::v4i64))
+    return SDValue();
+
+  // If the shuffle only uses the lower halves of the input operands,
+  // then extract them and perform the 'half' shuffle at half width.
+  // e.g. vector_shuffle <X, X, X, X, u, u, u, u> or <X, X, u, u>
+  int HalfIdx1 = -1, HalfIdx2 = -1;
+  SmallVector<int, 8> HalfMask;
+  unsigned Offset = UndefLower ? HalfNumElts : 0;
+  for (unsigned i = 0; i != HalfNumElts; ++i) {
+    int M = Mask[i + Offset];
+    if (M < 0) {
+      HalfMask.push_back(M);
+      continue;
+    }
+
+    // Determine which of the 4 half vectors this element is from.
+    // i.e. 0 = Lower V1, 1 = Upper V1, 2 = Lower V2, 3 = Upper V2.
+    int HalfIdx = M / HalfNumElts;
+
+    // Only shuffle using the lower halves of the inputs.
+    // TODO: Investigate usefulness of shuffling with upper halves.
+    if (HalfIdx != 0 && HalfIdx != 2)
+      return SDValue();
+
+    // Determine the element index into its half vector source.
+    int HalfElt = M % HalfNumElts;
+
+    // We can shuffle with up to 2 half vectors, set the new 'half'
+    // shuffle mask accordingly.
+    if (-1 == HalfIdx1 || HalfIdx1 == HalfIdx) {
+      HalfMask.push_back(HalfElt);
+      HalfIdx1 = HalfIdx;
+      continue;
+    }
+    if (-1 == HalfIdx2 || HalfIdx2 == HalfIdx) {
+      HalfMask.push_back(HalfElt + HalfNumElts);
+      HalfIdx2 = HalfIdx;
+      continue;
+    }
+
+    // Too many half vectors referenced.
+    return SDValue();
+  }
+  assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length");
+
+  auto GetHalfVector = [&](int HalfIdx) {
+    if (HalfIdx < 0)
+      return DAG.getUNDEF(HalfVT);
+    SDValue V = (HalfIdx < 2 ? V1 : V2);
+    HalfIdx = (HalfIdx % 2) * HalfNumElts;
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V,
+                       DAG.getIntPtrConstant(HalfIdx, DL));
+  };
+
+  SDValue Half1 = GetHalfVector(HalfIdx1);
+  SDValue Half2 = GetHalfVector(HalfIdx2);
+  SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask);
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V,
+                     DAG.getIntPtrConstant(Offset, DL));
+}
+
 /// \brief Test whether the specified input (0 or 1) is in-place blended by the
 /// given mask.
 ///
@@ -9776,16 +10553,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                                    DAG);
   }
 
-  // X86 has dedicated unpack instructions that can handle specific blend
-  // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
-  if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
+  // Use dedicated unpack instructions for masks that match their pattern.
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG))
+    return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
                                                 Subtarget, DAG))
@@ -9876,14 +10647,9 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Shift;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 4, 2, 6}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 5, 3, 7}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {4, 0, 6, 2}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1);
-  if (isShuffleEquivalent(V1, V2, Mask, {5, 1, 7, 3}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
+    return V;
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -9941,14 +10707,9 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
-    if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
-    if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1);
-    if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1);
+    if (SDValue V =
+            lowerVectorShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG))
+      return V;
 
     // Otherwise, fall back to a SHUFPS sequence. Here it is important that we
     // have already handled any direct blends. We also need to squash the
@@ -9974,9 +10735,7 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     if (Subtarget->hasAVX2())
       return DAG.getNode(
           X86ISD::VPERMV, DL, MVT::v8f32,
-          DAG.getBitcast(MVT::v8f32, DAG.getNode(ISD::BUILD_VECTOR, DL,
-                                                 MVT::v8i32, VPermMask)),
-          V1);
+          DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i32, VPermMask), V1);
 
     // Otherwise, fall back.
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -10041,14 +10800,9 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                          getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG));
 
     // Use dedicated unpack instructions for masks that match their pattern.
-    if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 1, 9, 4, 12, 5, 13}))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
-    if (isShuffleEquivalent(V1, V2, Mask, {2, 10, 3, 11, 6, 14, 7, 15}))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
-    if (isShuffleEquivalent(V1, V2, Mask, {8, 0, 9, 1, 12, 4, 13, 5}))
-      return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1);
-    if (isShuffleEquivalent(V1, V2, Mask, {10, 2, 11, 3, 14, 6, 15, 7}))
-      return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1);
+    if (SDValue V =
+            lowerVectorShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG))
+      return V;
   }
 
   // Try to use shift instructions.
@@ -10115,18 +10869,9 @@ static SDValue lowerV16I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane:
-                           0, 16, 1, 17, 2, 18, 3, 19,
-                           // Second 128-bit lane:
-                           8, 24, 9, 25, 10, 26, 11, 27}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i16, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane:
-                           4, 20, 5, 21, 6, 22, 7, 23,
-                           // Second 128-bit lane:
-                           12, 28, 13, 29, 14, 30, 15, 31}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i16, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG))
+    return V;
 
   // Try to use shift instructions.
   if (SDValue Shift =
@@ -10215,22 +10960,9 @@ static SDValue lowerV32I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
-  // Note that these are repeated 128-bit lane unpacks, not unpacks across all
-  // 256-bit lanes.
-  if (isShuffleEquivalent(
-          V1, V2, Mask,
-          {// First 128-bit lane:
-           0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39,
-           // Second 128-bit lane:
-           16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v32i8, V1, V2);
-  if (isShuffleEquivalent(
-          V1, V2, Mask,
-          {// First 128-bit lane:
-           8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47,
-           // Second 128-bit lane:
-           24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v32i8, V1, V2);
+  if (SDValue V =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG))
+    return V;
 
   // Try to use shift instructions.
   if (SDValue Shift =
@@ -10296,12 +11028,17 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                               DL, VT, V1, V2, Mask, Subtarget, DAG))
       return Insertion;
 
-  // There is a really nice hard cut-over between AVX1 and AVX2 that means we can
-  // check for those subtargets here and avoid much of the subtarget querying in
-  // the per-vector-type lowering routines. With AVX1 we have essentially *zero*
-  // ability to manipulate a 256-bit vector with integer types. Since we'll use
-  // floating point types there eventually, just immediately cast everything to
-  // a float and operate entirely in that domain.
+  // Handle special cases where the lower or upper half is UNDEF.
+  if (SDValue V =
+          lowerVectorShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG))
+    return V;
+
+  // There is a really nice hard cut-over between AVX1 and AVX2 that means we
+  // can check for those subtargets here and avoid much of the subtarget
+  // querying in the per-vector-type lowering routines. With AVX1 we have
+  // essentially *zero* ability to manipulate a 256-bit vector with integer
+  // types. Since we'll use floating point types there eventually, just
+  // immediately cast everything to a float and operate entirely in that domain.
   if (VT.isInteger() && !Subtarget->hasAVX2()) {
     int ElementBits = VT.getScalarSizeInBits();
     if (ElementBits < 32)
@@ -10334,6 +11071,57 @@ static SDValue lower256BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   }
 }
 
+/// \brief Try to lower a vector shuffle as a 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT,
+                                        ArrayRef<int> Mask,
+                                        SDValue V1, SDValue V2,
+                                        SelectionDAG &DAG) {
+  assert(VT.getScalarSizeInBits() == 64 &&
+         "Unexpected element type size for 128bit shuffle.");
+
+  // To handle 256 bit vector requires VLX and most probably
+  // function lowerV2X128VectorShuffle() is better solution.
+  assert(VT.is512BitVector() && "Unexpected vector size for 128bit shuffle.");
+
+  SmallVector<int, 4> WidenedMask;
+  if (!canWidenShuffleElements(Mask, WidenedMask))
+    return SDValue();
+
+  // Form a 128-bit permutation.
+  // Convert the 64-bit shuffle mask selection values into 128-bit selection
+  // bits defined by a vshuf64x2 instruction's immediate control byte.
+  unsigned PermMask = 0, Imm = 0;
+  unsigned ControlBitsNum = WidenedMask.size() / 2;
+
+  for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+    if (WidenedMask[i] == SM_SentinelZero)
+      return SDValue();
+
+    // Use first element in place of undef mask.
+    Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+    PermMask |= (Imm % WidenedMask.size()) << (i * ControlBitsNum);
+  }
+
+  return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+                     DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
+static SDValue lowerVectorShuffleWithPERMV(SDLoc DL, MVT VT,
+                                           ArrayRef<int> Mask, SDValue V1,
+                                           SDValue V2, SelectionDAG &DAG) {
+
+  assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
+
+  MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+  MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+  SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+  if (isSingleInputShuffleMask(Mask))
+    return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+  return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
+}
+
 /// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
 static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
                                        const X86Subtarget *Subtarget,
@@ -10345,21 +11133,21 @@ static SDValue lowerV8F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  // X86 has dedicated unpack instructions that can handle specific blend
-  // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f64, V1, V2);
+  if (SDValue Shuf128 =
+          lowerV4X128VectorShuffle(DL, MVT::v8f64, Mask, V1, V2, DAG))
+    return Shuf128;
 
-  // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v8f64, V1, V2, Mask, DAG);
+  if (SDValue Unpck =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG))
+    return Unpck;
+
+  return lowerVectorShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 16-lane 32-bit floating point shuffles.
 static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
-                                       SelectionDAG &DAG) {
+                                        const X86Subtarget *Subtarget,
+                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!");
@@ -10367,22 +11155,11 @@ static SDValue lowerV16F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane.
-                           0, 16, 1, 17, 4, 20, 5, 21,
-                           // Second 128-bit lane.
-                           8, 24, 9, 25, 12, 28, 13, 29}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16f32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane.
-                           2, 18, 3, 19, 6, 22, 7, 23,
-                           // Second 128-bit lane.
-                           10, 26, 11, 27, 14, 30, 15, 31}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16f32, V1, V2);
+  if (SDValue Unpck =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG))
+    return Unpck;
 
-  // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v16f32, V1, V2, Mask, DAG);
+  return lowerVectorShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 8-lane 64-bit integer shuffles.
@@ -10396,21 +11173,21 @@ static SDValue lowerV8I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
-  // X86 has dedicated unpack instructions that can handle specific blend
-  // operations: UNPCKH and UNPCKL.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i64, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {1, 9, 3, 11, 5, 13, 7, 15}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i64, V1, V2);
+  if (SDValue Shuf128 =
+          lowerV4X128VectorShuffle(DL, MVT::v8i64, Mask, V1, V2, DAG))
+    return Shuf128;
 
-  // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v8i64, V1, V2, Mask, DAG);
+  if (SDValue Unpck =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
+    return Unpck;
+
+  return lowerVectorShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
 static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
-                                       const X86Subtarget *Subtarget,
-                                       SelectionDAG &DAG) {
+                                        const X86Subtarget *Subtarget,
+                                        SelectionDAG &DAG) {
   SDLoc DL(Op);
   assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
@@ -10418,22 +11195,11 @@ static SDValue lowerV16I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   ArrayRef<int> Mask = SVOp->getMask();
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
-  // Use dedicated unpack instructions for masks that match their pattern.
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane.
-                           0, 16, 1, 17, 4, 20, 5, 21,
-                           // Second 128-bit lane.
-                           8, 24, 9, 25, 12, 28, 13, 29}))
-    return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask,
-                          {// First 128-bit lane.
-                           2, 18, 3, 19, 6, 22, 7, 23,
-                           // Second 128-bit lane.
-                           10, 26, 11, 27, 14, 30, 15, 31}))
-    return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i32, V1, V2);
+  if (SDValue Unpck =
+          lowerVectorShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG))
+    return Unpck;
 
-  // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v16i32, V1, V2, Mask, DAG);
+  return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 32-lane 16-bit integer shuffles.
@@ -10448,8 +11214,7 @@ static SDValue lowerV32I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
   assert(Subtarget->hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
 
-  // FIXME: Implement direct support for this type!
-  return splitAndLowerVectorShuffle(DL, MVT::v32i16, V1, V2, Mask, DAG);
+  return lowerVectorShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 64-lane 8-bit integer shuffles.
@@ -10517,6 +11282,60 @@ static SDValue lower512BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
   return splitAndLowerVectorShuffle(DL, VT, V1, V2, Mask, DAG);
 }
 
+// Lower vXi1 vector shuffles.
+// There is no a dedicated instruction on AVX-512 that shuffles the masks.
+// The only way to shuffle bits is to sign-extend the mask vector to SIMD
+// vector, shuffle and then truncate it back.
+static SDValue lower1BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2,
+                                      MVT VT, const X86Subtarget *Subtarget,
+                                      SelectionDAG &DAG) {
+  SDLoc DL(Op);
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  ArrayRef<int> Mask = SVOp->getMask();
+  assert(Subtarget->hasAVX512() &&
+         "Cannot lower 512-bit vectors w/o basic ISA!");
+  MVT ExtVT;
+  switch (VT.SimpleTy) {
+  default:
+    llvm_unreachable("Expected a vector of i1 elements");
+  case MVT::v2i1:
+    ExtVT = MVT::v2i64;
+    break;
+  case MVT::v4i1:
+    ExtVT = MVT::v4i32;
+    break;
+  case MVT::v8i1:
+    ExtVT = MVT::v8i64; // Take 512-bit type, more shuffles on KNL
+    break;
+  case MVT::v16i1:
+    ExtVT = MVT::v16i32;
+    break;
+  case MVT::v32i1:
+    ExtVT = MVT::v32i16;
+    break;
+  case MVT::v64i1:
+    ExtVT = MVT::v64i8;
+    break;
+  }
+
+  if (ISD::isBuildVectorAllZeros(V1.getNode()))
+    V1 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+  else if (ISD::isBuildVectorAllOnes(V1.getNode()))
+    V1 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+  else
+    V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1);
+
+  if (V2.isUndef())
+    V2 = DAG.getUNDEF(ExtVT);
+  else if (ISD::isBuildVectorAllZeros(V2.getNode()))
+    V2 = getZeroVector(ExtVT, Subtarget, DAG, DL);
+  else if (ISD::isBuildVectorAllOnes(V2.getNode()))
+    V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
+  else
+    V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
+  return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                     DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask));
+}
 /// \brief Top-level lowering for x86 vector shuffles.
 ///
 /// This handles decomposition, canonicalization, and lowering of all x86
@@ -10533,8 +11352,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
   MVT VT = Op.getSimpleValueType();
   int NumElements = VT.getVectorNumElements();
   SDLoc dl(Op);
+  bool Is1BitVector = (VT.getVectorElementType() == MVT::i1);
 
-  assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles");
+  assert((VT.getSizeInBits() != 64 || Is1BitVector) &&
+         "Can't lower MMX shuffles");
 
   bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
   bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
@@ -10572,7 +11393,7 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
   // elements wider than 64 bits, but it might be interesting to form i128
   // integers to handle flipping the low and high halves of AVX 256-bit vectors.
   SmallVector<int, 16> WidenedMask;
-  if (VT.getScalarSizeInBits() < 64 &&
+  if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
       canWidenShuffleElements(Mask, WidenedMask)) {
     MVT NewEltVT = VT.isFloatingPoint()
                        ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2)
@@ -10640,17 +11461,17 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
   }
 
   // For each vector width, delegate to a specialized lowering routine.
-  if (VT.getSizeInBits() == 128)
+  if (VT.is128BitVector())
     return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
 
-  if (VT.getSizeInBits() == 256)
+  if (VT.is256BitVector())
     return lower256BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
 
-  // Force AVX-512 vectors to be scalarized for now.
-  // FIXME: Implement AVX-512 support!
-  if (VT.getSizeInBits() == 512)
+  if (VT.is512BitVector())
     return lower512BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
 
+  if (Is1BitVector)
+    return lower1BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG);
   llvm_unreachable("Unimplemented!");
 }
 
@@ -10661,11 +11482,16 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
                                     unsigned &MaskValue) {
   MaskValue = 0;
   unsigned NumElems = BuildVector->getNumOperands();
+
   // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+  // We don't handle the >2 lanes case right now.
   unsigned NumLanes = (NumElems - 1) / 8 + 1;
+  if (NumLanes > 2)
+    return false;
+
   unsigned NumElemsInLane = NumElems / NumLanes;
 
-  // Blend for v16i16 should be symetric for the both lanes.
+  // Blend for v16i16 should be symmetric for the both lanes.
   for (unsigned i = 0; i < NumElemsInLane; ++i) {
     SDValue EltCond = BuildVector->getOperand(i);
     SDValue SndLaneEltCond =
@@ -10673,20 +11499,25 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
 
     int Lane1Cond = -1, Lane2Cond = -1;
     if (isa<ConstantSDNode>(EltCond))
-      Lane1Cond = !isZero(EltCond);
+      Lane1Cond = !isNullConstant(EltCond);
     if (isa<ConstantSDNode>(SndLaneEltCond))
-      Lane2Cond = !isZero(SndLaneEltCond);
+      Lane2Cond = !isNullConstant(SndLaneEltCond);
 
+    unsigned LaneMask = 0;
     if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
       // Lane1Cond != 0, means we want the first argument.
       // Lane1Cond == 0, means we want the second argument.
       // The encoding of this argument is 0 for the first argument, 1
       // for the second. Therefore, invert the condition.
-      MaskValue |= !Lane1Cond << i;
+      LaneMask = !Lane1Cond << i;
     else if (Lane1Cond < 0)
-      MaskValue |= !Lane2Cond << i;
+      LaneMask = !Lane2Cond << i;
     else
       return false;
+
+    MaskValue |= LaneMask;
+    if (NumLanes == 2)
+      MaskValue |= LaneMask << NumElemsInLane;
   }
   return true;
 }
@@ -10711,7 +11542,8 @@ static SDValue lowerVSELECTtoVectorShuffle(SDValue Op,
   for (int i = 0, Size = VT.getVectorNumElements(); i < Size; ++i) {
     SDValue CondElt = CondBV->getOperand(i);
     Mask.push_back(
-        isa<ConstantSDNode>(CondElt) ? i + (isZero(CondElt) ? Size : 0) : -1);
+        isa<ConstantSDNode>(CondElt) ? i + (isNullConstant(CondElt) ? Size : 0)
+                                     : -1);
   }
   return DAG.getVectorShuffle(VT, dl, LHS, RHS, Mask);
 }
@@ -10776,9 +11608,8 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
   }
 
   if (VT.getSizeInBits() == 16) {
-    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
     // If Idx is 0, it's cheaper to do a move instead of a pextrw.
-    if (Idx == 0)
+    if (isNullConstant(Op.getOperand(1)))
       return DAG.getNode(
           ISD::TRUNCATE, dl, MVT::i16,
           DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
@@ -10801,8 +11632,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
       return SDValue();
     SDNode *User = *Op.getNode()->use_begin();
     if ((User->getOpcode() != ISD::STORE ||
-         (isa<ConstantSDNode>(Op.getOperand(1)) &&
-          cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+         isNullConstant(Op.getOperand(1))) &&
         (User->getOpcode() != ISD::BITCAST ||
          User->getValueType(0) != MVT::i32))
       return SDValue();
@@ -10900,10 +11730,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     MVT EltVT = VecVT.getVectorElementType();
 
     unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+    assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
 
-    //if (IdxVal >= NumElems/2)
-    //  IdxVal -= NumElems/2;
-    IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+    // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+    // this can be done with a mask.
+    IdxVal &= ElemsPerChunk - 1;
     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
                        DAG.getConstant(IdxVal, dl, MVT::i32));
   }
@@ -10918,8 +11749,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   // TODO: handle v16i8.
   if (VT.getSizeInBits() == 16) {
     SDValue Vec = Op.getOperand(0);
-    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    if (Idx == 0)
+    if (isNullConstant(Op.getOperand(1)))
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
                                      DAG.getBitcast(MVT::v4i32, Vec),
@@ -10951,8 +11781,7 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
     // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
     //        to match extract_elt for f64.
-    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    if (Idx == 0)
+    if (isNullConstant(Op.getOperand(1)))
       return Op;
 
     // UNPCKHPD the element to the lowest double word, then movsd.
@@ -11039,7 +11868,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
 
     // Insert the element into the desired chunk.
     unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
-    unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+    assert(isPowerOf2_32(NumEltsIn128));
+    // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+    unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
 
     V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
                     DAG.getConstant(IdxIn128, dl, MVT::i32));
@@ -11078,8 +11909,7 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
       // Bits [3:0] of the constant are the zero mask. The DAG Combiner may
       //   combine either bitwise AND or insert of float 0.0 to set these bits.
 
-      const Function *F = DAG.getMachineFunction().getFunction();
-      bool MinSize = F->hasFnAttribute(Attribute::MinSize);
+      bool MinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
       if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) {
         // If this is an insertion of 32-bits into the low 32-bits of
         // a vector, we prefer to generate a blend with immediate rather
@@ -11199,14 +12029,25 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
   // --> load32 addr
   if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
       Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      OpVT.is256BitVector() && SubVecVT.is128BitVector() &&
-      !Subtarget->isUnalignedMem32Slow()) {
-    SDValue SubVec2 = Vec.getOperand(1);
-    if (auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2))) {
-      if (Idx2->getZExtValue() == 0) {
-        SDValue Ops[] = { SubVec2, SubVec };
-        if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
-          return Ld;
+      OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+    auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
+    if (Idx2 && Idx2->getZExtValue() == 0) {
+      SDValue SubVec2 = Vec.getOperand(1);
+      // If needed, look through a bitcast to get to the load.
+      if (SubVec2.getNode() && SubVec2.getOpcode() == ISD::BITCAST)
+        SubVec2 = SubVec2.getOperand(0);
+
+      if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+        bool Fast;
+        unsigned Alignment = FirstLd->getAlignment();
+        unsigned AS = FirstLd->getAddressSpace();
+        const X86TargetLowering *TLI = Subtarget->getTargetLowering();
+        if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+                                    OpVT, AS, Alignment, &Fast) && Fast) {
+          SDValue Ops[] = { SubVec2, SubVec };
+          if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
+            return Ld;
+        }
       }
     }
   }
@@ -11218,37 +12059,9 @@ static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget *Subtarget,
   if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
     return Insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
 
-  if (OpVT.getVectorElementType() == MVT::i1) {
-    if (IdxVal == 0  && Vec.getOpcode() == ISD::UNDEF) // the operation is legal
-      return Op;
-    SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
-    SDValue Undef = DAG.getUNDEF(OpVT);
-    unsigned NumElems = OpVT.getVectorNumElements();
-    SDValue ShiftBits = DAG.getConstant(NumElems/2, dl, MVT::i8);
+  if (OpVT.getVectorElementType() == MVT::i1)
+    return Insert1BitVector(Op, DAG);
 
-    if (IdxVal == OpVT.getVectorNumElements() / 2) {
-      // Zero upper bits of the Vec
-      Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
-      Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
-
-      SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
-                                 SubVec, ZeroIdx);
-      Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
-      return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
-    }
-    if (IdxVal == 0) {
-      SDValue Vec2 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
-                                 SubVec, ZeroIdx);
-      // Zero upper bits of the Vec2
-      Vec2 = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec2, ShiftBits);
-      Vec2 = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec2, ShiftBits);
-      // Zero lower bits of the Vec
-      Vec = DAG.getNode(X86ISD::VSRLI, dl, OpVT, Vec, ShiftBits);
-      Vec = DAG.getNode(X86ISD::VSHLI, dl, OpVT, Vec, ShiftBits);
-      // Merge them together
-      return DAG.getNode(ISD::OR, dl, OpVT, Vec, Vec2);
-    }
-  }
   return SDValue();
 }
 
@@ -11363,7 +12176,8 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   // load.
   if (isGlobalStubReference(OpFlag))
     Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
 
   return Result;
 }
@@ -11430,7 +12244,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl,
   // load.
   if (isGlobalStubReference(OpFlags))
     Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
 
   // If there was a non-zero offset that we didn't fold, create an explicit
   // addition for it.
@@ -11587,7 +12402,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
     }
 
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
-                         MachinePointerInfo::getGOT(), false, false, false, 0);
+                         MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+                         false, false, false, 0);
   }
 
   // The address of the thread local variable is the add of the thread
@@ -11599,10 +12415,18 @@ SDValue
 X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
 
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+
+  // Cygwin uses emutls.
+  // FIXME: It may be EmulatedTLS-generic also for X86-Android.
+  if (Subtarget->isTargetWindowsCygwin())
+    return LowerToTLSEmulatedModel(GA, DAG);
+
   const GlobalValue *GV = GA->getGlobal();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   if (Subtarget->isTargetELF()) {
+    if (DAG.getTarget().Options.EmulatedTLS)
+      return LowerToTLSEmulatedModel(GA, DAG);
     TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
     switch (model) {
       case TLSModel::GeneralDynamic:
@@ -11830,10 +12654,10 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   auto PtrVT = getPointerTy(MF.getDataLayout());
   int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
-  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
-                               StackSlot,
-                               MachinePointerInfo::getFixedStack(SSFI),
-                               false, false, 0);
+  SDValue Chain = DAG.getStore(
+      DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
+      false, 0);
   return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
 }
 
@@ -11855,10 +12679,9 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
   MachineMemOperand *MMO;
   if (FI) {
     int SSFI = FI->getIndex();
-    MMO =
-      DAG.getMachineFunction()
-      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                            MachineMemOperand::MOLoad, ByteSize, ByteSize);
+    MMO = DAG.getMachineFunction().getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+        MachineMemOperand::MOLoad, ByteSize, ByteSize);
   } else {
     MMO = cast<LoadSDNode>(StackSlot)->getMemOperand();
     StackSlot = StackSlot.getOperand(1);
@@ -11884,16 +12707,16 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     SDValue Ops[] = {
       Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
     };
-    MachineMemOperand *MMO =
-      DAG.getMachineFunction()
-      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                            MachineMemOperand::MOStore, SSFISize, SSFISize);
+    MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+        MachineMemOperand::MOStore, SSFISize, SSFISize);
 
     Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
                                     Ops, Op.getValueType(), MMO);
-    Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
-                         MachinePointerInfo::getFixedStack(SSFI),
-                         false, false, false, 0);
+    Result = DAG.getLoad(
+        Op.getValueType(), DL, Chain, StackSlot,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+        false, false, false, 0);
   }
 
   return Result;
@@ -11937,16 +12760,19 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
   // Load the 64-bit value into an XMM register.
   SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
                             Op.getOperand(0));
-  SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
-                              MachinePointerInfo::getConstantPool(),
-                              false, false, false, 16);
+  SDValue CLod0 =
+      DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 16);
   SDValue Unpck1 =
       getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0);
 
-  SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
-                              MachinePointerInfo::getConstantPool(),
-                              false, false, false, 16);
+  SDValue CLod1 =
+      DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 16);
   SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1);
+  // TODO: Are there any fast-math-flags to propagate here?
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
   SDValue Result;
 
@@ -11996,10 +12822,11 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
                   DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl));
 
   // Subtract the bias.
+  // TODO: Are there any fast-math-flags to propagate here?
   SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
 
   // Handle final rounding.
-  EVT DestVT = Op.getValueType();
+  MVT DestVT = Op.getSimpleValueType();
 
   if (DestVT.bitsLT(MVT::f64))
     return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
@@ -12025,14 +12852,23 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
   //     return (float4) lo + fhi;
 
+  // We shouldn't use it when unsafe-fp-math is enabled though: we might later
+  // reassociate the two FADDs, and if we do that, the algorithm fails
+  // spectacularly (PR24512).
+  // FIXME: If we ever have some kind of Machine FMF, this should be marked
+  // as non-fast and always be enabled. Why isn't SDAG FMF enough? Because
+  // there's also the MachineCombiner reassociations happening on Machine IR.
+  if (DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+
   SDLoc DL(Op);
   SDValue V = Op->getOperand(0);
-  EVT VecIntVT = V.getValueType();
+  MVT VecIntVT = V.getSimpleValueType();
   bool Is128 = VecIntVT == MVT::v4i32;
-  EVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
+  MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32;
   // If we convert to something else than the supported type, e.g., to v4f64,
   // abort early.
-  if (VecFloatVT != Op->getValueType(0))
+  if (VecFloatVT != Op->getSimpleValueType(0))
     return SDValue();
 
   unsigned NumElts = VecIntVT.getVectorNumElements();
@@ -12070,7 +12906,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
 
   SDValue Low, High;
   if (Subtarget.hasSSE41()) {
-    EVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
+    MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16;
     //     uint4 lo = _mm_blend_epi16( v, (uint4) 0x4b000000, 0xaa);
     SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow);
     SDValue VecBitcast = DAG.getBitcast(VecI16VT, V);
@@ -12108,6 +12944,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
 
   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
   SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
+  // TODO: Are there any fast-math-flags to propagate here?
   SDValue FHigh =
       DAG.getNode(ISD::FADD, DL, VecFloatVT, HighBitcast, VecCstFAdd);
   //     return (float4) lo + fhi;
@@ -12137,11 +12974,10 @@ SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
     return lowerUINT_TO_FP_vXi32(Op, DAG, *Subtarget);
   case MVT::v16i8:
   case MVT::v16i16:
-    if (Subtarget->hasAVX512())
-      return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
-                         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
+    assert(Subtarget->hasAVX512());
+    return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, N0));
   }
-  llvm_unreachable(nullptr);
 }
 
 SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -12150,7 +12986,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   SDLoc dl(Op);
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  if (Op.getValueType().isVector())
+  if (Op.getSimpleValueType().isVector())
     return lowerUINT_TO_FP_vec(Op, DAG);
 
   // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
@@ -12161,6 +12997,14 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   MVT SrcVT = N0.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
+
+  if (Subtarget->hasAVX512() && isScalarFPTypeInSSEReg(DstVT) &&
+      (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget->is64Bit()))) {
+    // Conversions from unsigned i32 to f32/f64 are legal,
+    // using VCVTUSI2SS/SD.  Same for i64 in 64-bit mode.
+    return Op;
+  }
+
   if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
     return LowerUINT_TO_FP_i64(Op, DAG);
   if (SrcVT == MVT::i32 && X86ScalarSSEf64)
@@ -12193,10 +13037,9 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   // we must be careful to do the computation in x87 extended precision, not
   // in SSE. (The generic code can't know it's OK to do this, or how to.)
   int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
-  MachineMemOperand *MMO =
-    DAG.getMachineFunction()
-    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                          MachineMemOperand::MOLoad, 8, 8);
+  MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI),
+      MachineMemOperand::MOLoad, 8, 8);
 
   SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
   SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
@@ -12223,24 +13066,52 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
 
   // Load the value out, extending it from f32 to f80.
   // FIXME: Avoid the extend by constructing the right constant pool?
-  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
-                                 FudgePtr, MachinePointerInfo::getConstantPool(),
-                                 MVT::f32, false, false, false, 4);
+  SDValue Fudge = DAG.getExtLoad(
+      ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), FudgePtr,
+      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32,
+      false, false, false, 4);
   // Extend everything to 80 bits to force it to be done on x87.
+  // TODO: Are there any fast-math-flags to propagate here?
   SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
   return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
                      DAG.getIntPtrConstant(0, dl));
 }
 
+// If the given FP_TO_SINT (IsSigned) or FP_TO_UINT (!IsSigned) operation
+// is legal, or has an fp128 or f16 source (which needs to be promoted to f32),
+// just return an <SDValue(), SDValue()> pair.
+// Otherwise it is assumed to be a conversion from one of f32, f64 or f80
+// to i16, i32 or i64, and we lower it to a legal sequence.
+// If lowered to the final integer result we return a <result, SDValue()> pair.
+// Otherwise we lower it to a sequence ending with a FIST, return a
+// <FIST, StackSlot> pair, and the caller is responsible for loading
+// the final integer result from StackSlot.
 std::pair<SDValue,SDValue>
-X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
-                                    bool IsSigned, bool IsReplace) const {
+X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+                                   bool IsSigned, bool IsReplace) const {
   SDLoc DL(Op);
 
   EVT DstTy = Op.getValueType();
+  EVT TheVT = Op.getOperand(0).getValueType();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  if (!IsSigned && !isIntegerTypeFTOL(DstTy)) {
+  if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) {
+    // f16 must be promoted before using the lowering in this routine.
+    // fp128 does not use this lowering.
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  // If using FIST to compute an unsigned i64, we'll need some fixup
+  // to handle values above the maximum signed i64.  A FIST is always
+  // used for the 32-bit subtarget, but also for f80 on a 64-bit target.
+  bool UnsignedFixup = !IsSigned &&
+                       DstTy == MVT::i64 &&
+                       (!Subtarget->is64Bit() ||
+                        !isScalarFPTypeInSSEReg(TheVT));
+
+  if (!IsSigned && DstTy != MVT::i64 && !Subtarget->hasAVX512()) {
+    // Replace the fp-to-uint32 operation with an fp-to-sint64 FIST.
+    // The low 32 bits of the fist result will have the correct uint32 result.
     assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
     DstTy = MVT::i64;
   }
@@ -12258,42 +13129,87 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
       isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
     return std::make_pair(SDValue(), SDValue());
 
-  // We lower FP->int64 either into FISTP64 followed by a load from a temporary
-  // stack slot, or into the FTOL runtime function.
+  // We lower FP->int64 into FISTP64 followed by a load from a temporary
+  // stack slot.
   MachineFunction &MF = DAG.getMachineFunction();
   unsigned MemSize = DstTy.getSizeInBits()/8;
   int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
 
   unsigned Opc;
-  if (!IsSigned && isIntegerTypeFTOL(DstTy))
-    Opc = X86ISD::WIN_FTOL;
-  else
-    switch (DstTy.getSimpleVT().SimpleTy) {
-    default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
-    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
-    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
-    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
-    }
+  switch (DstTy.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+  }
 
   SDValue Chain = DAG.getEntryNode();
   SDValue Value = Op.getOperand(0);
-  EVT TheVT = Op.getOperand(0).getValueType();
+  SDValue Adjust; // 0x0 or 0x80000000, for result sign bit adjustment.
+
+  if (UnsignedFixup) {
+    //
+    // Conversion to unsigned i64 is implemented with a select,
+    // depending on whether the source value fits in the range
+    // of a signed i64.  Let Thresh be the FP equivalent of
+    // 0x8000000000000000ULL.
+    //
+    //  Adjust i32 = (Value < Thresh) ? 0 : 0x80000000;
+    //  FistSrc    = (Value < Thresh) ? Value : (Value - Thresh);
+    //  Fist-to-mem64 FistSrc
+    //  Add 0 or 0x800...0ULL to the 64-bit result, which is equivalent
+    //  to XOR'ing the high 32 bits with Adjust.
+    //
+    // Being a power of 2, Thresh is exactly representable in all FP formats.
+    // For X87 we'd like to use the smallest FP type for this constant, but
+    // for DAG type consistency we have to match the FP operand type.
+
+    APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
+    LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
+    bool LosesInfo = false;
+    if (TheVT == MVT::f64)
+      // The rounding mode is irrelevant as the conversion should be exact.
+      Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                              &LosesInfo);
+    else if (TheVT == MVT::f80)
+      Status = Thresh.convert(APFloat::x87DoubleExtended,
+                              APFloat::rmNearestTiesToEven, &LosesInfo);
+
+    assert(Status == APFloat::opOK && !LosesInfo &&
+           "FP conversion should have been exact");
+
+    SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT);
+
+    SDValue Cmp = DAG.getSetCC(DL,
+                               getSetCCResultType(DAG.getDataLayout(),
+                                                  *DAG.getContext(), TheVT),
+                               Value, ThreshVal, ISD::SETLT);
+    Adjust = DAG.getSelect(DL, MVT::i32, Cmp,
+                           DAG.getConstant(0, DL, MVT::i32),
+                           DAG.getConstant(0x80000000, DL, MVT::i32));
+    SDValue Sub = DAG.getNode(ISD::FSUB, DL, TheVT, Value, ThreshVal);
+    Cmp = DAG.getSetCC(DL, getSetCCResultType(DAG.getDataLayout(),
+                                              *DAG.getContext(), TheVT),
+                       Value, ThreshVal, ISD::SETLT);
+    Value = DAG.getSelect(DL, TheVT, Cmp, Value, Sub);
+  }
+
   // FIXME This causes a redundant load/store if the SSE-class value is already
   // in memory, such as if it is on the callstack.
   if (isScalarFPTypeInSSEReg(TheVT)) {
     assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
     Chain = DAG.getStore(Chain, DL, Value, StackSlot,
-                         MachinePointerInfo::getFixedStack(SSFI),
-                         false, false, 0);
+                         MachinePointerInfo::getFixedStack(MF, SSFI), false,
+                         false, 0);
     SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
     SDValue Ops[] = {
       Chain, StackSlot, DAG.getValueType(TheVT)
     };
 
     MachineMemOperand *MMO =
-      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                              MachineMemOperand::MOLoad, MemSize, MemSize);
+        MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+                                MachineMemOperand::MOLoad, MemSize, MemSize);
     Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
     Chain = Value.getValue(1);
     SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
@@ -12301,28 +13217,52 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
   }
 
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                            MachineMemOperand::MOStore, MemSize, MemSize);
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+                              MachineMemOperand::MOStore, MemSize, MemSize);
 
-  if (Opc != X86ISD::WIN_FTOL) {
+  if (UnsignedFixup) {
+
+    // Insert the FIST, load its result as two i32's,
+    // and XOR the high i32 with Adjust.
+
+    SDValue FistOps[] = { Chain, Value, StackSlot };
+    SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                           FistOps, DstTy, MMO);
+
+    SDValue Low32 = DAG.getLoad(MVT::i32, DL, FIST, StackSlot,
+                                MachinePointerInfo(),
+                                false, false, false, 0);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackSlot,
+                                   DAG.getConstant(4, DL, PtrVT));
+
+    SDValue High32 = DAG.getLoad(MVT::i32, DL, FIST, HighAddr,
+                                 MachinePointerInfo(),
+                                 false, false, false, 0);
+    High32 = DAG.getNode(ISD::XOR, DL, MVT::i32, High32, Adjust);
+
+    if (Subtarget->is64Bit()) {
+      // Join High32 and Low32 into a 64-bit result.
+      // (High32 << 32) | Low32
+      Low32 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Low32);
+      High32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, High32);
+      High32 = DAG.getNode(ISD::SHL, DL, MVT::i64, High32,
+                           DAG.getConstant(32, DL, MVT::i8));
+      SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i64, High32, Low32);
+      return std::make_pair(Result, SDValue());
+    }
+
+    SDValue ResultOps[] = { Low32, High32 };
+
+    SDValue pair = IsReplace
+      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, ResultOps)
+      : DAG.getMergeValues(ResultOps, DL);
+    return std::make_pair(pair, SDValue());
+  } else {
     // Build the FP_TO_INT*_IN_MEM
     SDValue Ops[] = { Chain, Value, StackSlot };
     SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
                                            Ops, DstTy, MMO);
     return std::make_pair(FIST, StackSlot);
-  } else {
-    SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
-      DAG.getVTList(MVT::Other, MVT::Glue),
-      Chain, Value);
-    SDValue eax = DAG.getCopyFromReg(ftol, DL, X86::EAX,
-      MVT::i32, ftol.getValue(1));
-    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), DL, X86::EDX,
-      MVT::i32, eax.getValue(2));
-    SDValue Ops[] = { eax, edx };
-    SDValue pair = IsReplace
-      ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
-      : DAG.getMergeValues(Ops, DL);
-    return std::make_pair(pair, SDValue());
   }
 }
 
@@ -12333,7 +13273,7 @@ static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG,
   MVT InVT = In.getSimpleValueType();
   SDLoc dl(Op);
 
-  if (VT.is512BitVector() || InVT.getScalarType() == MVT::i1)
+  if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1)
     return DAG.getNode(ISD::ZERO_EXTEND, dl, VT, In);
 
   // Optimize vectors in AVX mode:
@@ -12426,6 +13366,62 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
   return SDValue();
 }
 
+static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
+                                  const X86Subtarget *Subtarget) {
+
+  SDLoc DL(Op);
+  MVT VT = Op.getSimpleValueType();
+  SDValue In = Op.getOperand(0);
+  MVT InVT = In.getSimpleValueType();
+
+  assert(VT.getVectorElementType() == MVT::i1 && "Unexected vector type.");
+
+  // Shift LSB to MSB and use VPMOVB2M - SKX.
+  unsigned ShiftInx = InVT.getScalarSizeInBits() - 1;
+  if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
+         Subtarget->hasBWI()) ||     // legal, will go to VPMOVB2M, VPMOVW2M
+      ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+             InVT.getScalarSizeInBits() <= 16 && Subtarget->hasBWI() &&
+             Subtarget->hasVLX())) { // legal, will go to VPMOVB2M, VPMOVW2M
+    // Shift packed bytes not supported natively, bitcast to dword
+    MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16);
+    SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, ExtVT,
+                                     DAG.getBitcast(ExtVT, In),
+                                     DAG.getConstant(ShiftInx, DL, ExtVT));
+    ShiftNode = DAG.getBitcast(InVT, ShiftNode);
+    return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+  }
+  if ((InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
+         Subtarget->hasDQI()) ||  // legal, will go to VPMOVD2M, VPMOVQ2M
+      ((InVT.is256BitVector() || InVT.is128BitVector()) &&
+         InVT.getScalarSizeInBits() >= 32 && Subtarget->hasDQI() &&
+         Subtarget->hasVLX())) {  // legal, will go to VPMOVD2M, VPMOVQ2M
+
+    SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+                                     DAG.getConstant(ShiftInx, DL, InVT));
+    return DAG.getNode(X86ISD::CVT2MASK, DL, VT, ShiftNode);
+  }
+
+  // Shift LSB to MSB, extend if necessary and use TESTM.
+  unsigned NumElts = InVT.getVectorNumElements();
+  if (InVT.getSizeInBits() < 512 &&
+      (InVT.getScalarType() == MVT::i8 || InVT.getScalarType() == MVT::i16 ||
+       !Subtarget->hasVLX())) {
+    assert((NumElts == 8 || NumElts == 16) && "Unexected vector type.");
+
+    // TESTD/Q should be used (if BW supported we use CVT2MASK above),
+    // so vector should be extended to packed dword/qword.
+    MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+    In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
+    InVT = ExtVT;
+    ShiftInx = InVT.getScalarSizeInBits() - 1;
+  }
+
+  SDValue  ShiftNode = DAG.getNode(ISD::SHL, DL, InVT, In,
+                                   DAG.getConstant(ShiftInx, DL, InVT));
+  return DAG.getNode(X86ISD::TESTM, DL, VT, ShiftNode, ShiftNode);
+}
+
 SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   SDLoc DL(Op);
   MVT VT = Op.getSimpleValueType();
@@ -12443,42 +13439,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
   assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
          "Invalid TRUNCATE operation");
 
-  // move vector to mask - truncate solution for SKX
-  if (VT.getVectorElementType() == MVT::i1) {
-    if (InVT.is512BitVector() && InVT.getScalarSizeInBits() <= 16 &&
-        Subtarget->hasBWI())
-      return Op; // legal, will go to VPMOVB2M, VPMOVW2M
-    if ((InVT.is256BitVector() || InVT.is128BitVector())
-        && InVT.getScalarSizeInBits() <= 16 &&
-        Subtarget->hasBWI() && Subtarget->hasVLX())
-      return Op; // legal, will go to VPMOVB2M, VPMOVW2M
-    if (InVT.is512BitVector() && InVT.getScalarSizeInBits() >= 32 &&
-        Subtarget->hasDQI())
-      return Op; // legal, will go to VPMOVD2M, VPMOVQ2M
-    if ((InVT.is256BitVector() || InVT.is128BitVector())
-        && InVT.getScalarSizeInBits() >= 32 &&
-        Subtarget->hasDQI() && Subtarget->hasVLX())
-      return Op; // legal, will go to VPMOVB2M, VPMOVQ2M
+  if (VT.getVectorElementType() == MVT::i1)
+    return LowerTruncateVecI1(Op, DAG, Subtarget);
+
+  // vpmovqb/w/d, vpmovdb/w, vpmovwb
+  if (Subtarget->hasAVX512()) {
+    // word to byte only under BWI
+    if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
+      return DAG.getNode(X86ISD::VTRUNC, DL, VT,
+                         DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
+    return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
   }
-  if (InVT.is512BitVector() || VT.getVectorElementType() == MVT::i1) {
-    if (VT.getVectorElementType().getSizeInBits() >=8)
-      return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
-
-    assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
-    unsigned NumElts = InVT.getVectorNumElements();
-    assert ((NumElts == 8 || NumElts == 16) && "Unexpected vector type");
-    if (InVT.getSizeInBits() < 512) {
-      MVT ExtVT = (NumElts == 16)? MVT::v16i32 : MVT::v8i64;
-      In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In);
-      InVT = ExtVT;
-    }
-
-    SDValue OneV =
-     DAG.getConstant(APInt::getSignBit(InVT.getScalarSizeInBits()), DL, InVT);
-    SDValue And = DAG.getNode(ISD::AND, DL, InVT, OneV, In);
-    return DAG.getNode(X86ISD::TESTM, DL, VT, And, And);
-  }
-
   if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
     // On AVX2, v4i64 -> v4i32 becomes VPERMD.
     if (Subtarget->hasInt256()) {
@@ -12583,7 +13554,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
     /*IsSigned=*/ true, /*IsReplace=*/ false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
-  if (!FIST.getNode()) return Op;
+  if (!FIST.getNode())
+    return Op;
 
   if (StackSlot.getNode())
     // Load the result.
@@ -12600,7 +13572,9 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
     /*IsSigned=*/ false, /*IsReplace=*/ false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
-  assert(FIST.getNode() && "Unexpected failure");
+  // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+  if (!FIST.getNode())
+    return Op;
 
   if (StackSlot.getNode())
     // Load the result.
@@ -12643,6 +13617,8 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
 
+  bool IsF128 = (VT == MVT::f128);
+
   // FIXME: Use function attribute "OptimizeForSize" and/or CodeGenOpt::Level to
   // decide if we should generate a 16-byte constant mask when we only need 4 or
   // 8 bytes for the scalar case.
@@ -12650,11 +13626,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
   MVT LogicVT;
   MVT EltVT;
   unsigned NumElts;
-  
+
   if (VT.isVector()) {
     LogicVT = VT;
     EltVT = VT.getVectorElementType();
     NumElts = VT.getVectorNumElements();
+  } else if (IsF128) {
+    // SSE instructions are used for optimized f128 logical operations.
+    LogicVT = MVT::f128;
+    EltVT = VT;
+    NumElts = 1;
   } else {
     // There are no scalar bitwise logical SSE/AVX instructions, so we
     // generate a 16-byte vector constant and logic op even for the scalar case.
@@ -12675,9 +13656,10 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-  SDValue Mask = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
-                             MachinePointerInfo::getConstantPool(),
-                             false, false, false, Alignment);
+  SDValue Mask =
+      DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, Alignment);
 
   SDValue Op0 = Op.getOperand(0);
   bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
@@ -12685,7 +13667,7 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
     IsFABS ? X86ISD::FAND : IsFNABS ? X86ISD::FOR : X86ISD::FXOR;
   SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0;
 
-  if (VT.isVector())
+  if (VT.isVector() || IsF128)
     return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask);
 
   // For the scalar case extend to a 128-bit vector, perform the logic op,
@@ -12704,6 +13686,7 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
   MVT SrcVT = Op1.getSimpleValueType();
+  bool IsF128 = (VT == MVT::f128);
 
   // If second operand is smaller, extend it first.
   if (SrcVT.bitsLT(VT)) {
@@ -12718,13 +13701,16 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
 
   // At this point the operands and the result should have the same
   // type, and that won't be f80 since that is not custom lowered.
+  assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
+         "Unexpected type in LowerFCOPYSIGN");
 
   const fltSemantics &Sem =
-      VT == MVT::f64 ? APFloat::IEEEdouble : APFloat::IEEEsingle;
+      VT == MVT::f64 ? APFloat::IEEEdouble :
+          (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
   const unsigned SizeInBits = VT.getSizeInBits();
 
   SmallVector<Constant *, 4> CV(
-      VT == MVT::f64 ? 2 : 4,
+      VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
       ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
 
   // First, clear all bits but the sign bit from the second operand (sign).
@@ -12737,11 +13723,13 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   // Perform all logic operations as 16-byte vectors because there are no
   // scalar FP logic instructions in SSE. This allows load folding of the
   // constants into the logic instructions.
-  MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
-  SDValue Mask1 = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
-                              MachinePointerInfo::getConstantPool(),
-                              false, false, false, 16);
-  Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
+  MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
+  SDValue Mask1 =
+      DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 16);
+  if (!IsF128)
+    Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
   SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
 
   // Next, clear the sign bit from the first operand (magnitude).
@@ -12750,8 +13738,9 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
     APFloat APF = Op0CN->getValueAPF();
     // If the magnitude is a positive zero, the sign bit alone is enough.
     if (APF.isPosZero())
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
-                         DAG.getIntPtrConstant(0, dl));
+      return IsF128 ? SignBit :
+          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
+                      DAG.getIntPtrConstant(0, dl));
     APF.clearSign();
     CV[0] = ConstantFP::get(*Context, APF);
   } else {
@@ -12761,18 +13750,21 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
   }
   C = ConstantVector::get(CV);
   CPIdx = DAG.getConstantPool(C, PtrVT, 16);
-  SDValue Val = DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
-                            MachinePointerInfo::getConstantPool(),
-                            false, false, false, 16);
+  SDValue Val =
+      DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
+                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+                  false, false, false, 16);
   // If the magnitude operand wasn't a constant, we need to AND out the sign.
   if (!isa<ConstantFPSDNode>(Op0)) {
-    Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
+    if (!IsF128)
+      Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
     Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
   }
   // OR the magnitude value with the sign bit.
   Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
-                     DAG.getIntPtrConstant(0, dl));
+  return IsF128 ? Val :
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
+                  DAG.getIntPtrConstant(0, dl));
 }
 
 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -12859,7 +13851,7 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
       return SDValue();
   }
 
-  EVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
+  MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
 
   // Cast all vectors into TestVT for PTEST.
   for (unsigned i = 0, e = VecIns.size(); i < e; ++i)
@@ -12999,14 +13991,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
     if (ConstantSDNode *C =
         dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
       // An add of one will be selected as an INC.
-      if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) {
+      if (C->isOne() && !Subtarget->slowIncDec()) {
         Opcode = X86ISD::INC;
         NumOperands = 1;
         break;
       }
 
       // An add of negative one (subtract of one) will be selected as a DEC.
-      if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) {
+      if (C->isAllOnesValue() && !Subtarget->slowIncDec()) {
         Opcode = X86ISD::DEC;
         NumOperands = 1;
         break;
@@ -13135,13 +14127,11 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
 /// equivalent.
 SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
                                    SDLoc dl, SelectionDAG &DAG) const {
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
-    if (C->getAPIntValue() == 0)
-      return EmitTest(Op0, X86CC, dl, DAG);
+  if (isNullConstant(Op1))
+    return EmitTest(Op0, X86CC, dl, DAG);
 
-     if (Op0.getValueType() == MVT::i1)
-       llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
-  }
+  assert(!(isa<ConstantSDNode>(Op1) && Op0.getValueType() == MVT::i1) &&
+         "Unexpected comparison operation for MVT::i1 operands");
 
   if ((Op0.getValueType() == MVT::i8 || Op0.getValueType() == MVT::i16 ||
        Op0.getValueType() == MVT::i32 || Op0.getValueType() == MVT::i64)) {
@@ -13150,8 +14140,7 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
     // if we're optimizing for size, however, as that'll allow better folding
     // of memory operations.
     if (Op0.getValueType() != MVT::i32 && Op0.getValueType() != MVT::i64 &&
-        !DAG.getMachineFunction().getFunction()->hasFnAttribute(
-            Attribute::MinSize) &&
+        !DAG.getMachineFunction().getFunction()->optForMinSize() &&
         !Subtarget->isAtom()) {
       unsigned ExtendOp =
           isX86CCUnsigned(X86CC) ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
@@ -13188,6 +14177,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
   SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW,
                             DAG.getConstant(8, dl, MVT::i8));
   SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl);
+
+  // Some 64-bit targets lack SAHF support, but they do support FCOMI.
+  assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?");
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
 
@@ -13261,13 +14253,8 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
 /// This is because we still need one division to calculate the reciprocal and
 /// then we need two multiplies by that reciprocal as replacements for the
 /// original divisions.
-bool X86TargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const {
-  return NumUsers > 1;
-}
-
-static bool isAllOnes(SDValue V) {
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
-  return C && C->isAllOnesValue();
+unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
+  return 2;
 }
 
 /// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
@@ -13285,8 +14272,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
   if (Op1.getOpcode() == ISD::SHL)
     std::swap(Op0, Op1);
   if (Op0.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
-      if (And00C->getZExtValue() == 1) {
+    if (isOneConstant(Op0.getOperand(0))) {
         // If we looked past a truncate, check that it's only truncating away
         // known zeros.
         unsigned BitWidth = Op0.getValueSizeInBits();
@@ -13423,7 +14409,7 @@ static SDValue LowerBoolVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
 
-  assert(Op0.getValueType().getVectorElementType() == MVT::i1 &&
+  assert(Op0.getSimpleValueType().getVectorElementType() == MVT::i1 &&
          "Unexpected type for boolean compare operation");
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   SDValue NotOp0 = DAG.getNode(ISD::XOR, dl, VT, Op0,
@@ -13467,8 +14453,8 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
   MVT VT = Op.getSimpleValueType();
   SDLoc dl(Op);
 
-  assert(Op0.getValueType().getVectorElementType().getSizeInBits() >= 8 &&
-         Op.getValueType().getScalarType() == MVT::i1 &&
+  assert(Op0.getSimpleValueType().getVectorElementType().getSizeInBits() >= 8 &&
+         Op.getSimpleValueType().getVectorElementType() == MVT::i1 &&
          "Cannot set masked compare for this operation");
 
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
@@ -13515,7 +14501,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
 
   for (unsigned i = 0; i < n; ++i) {
     ConstantSDNode *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i));
-    if (!Elt || Elt->isOpaque() || Elt->getValueType(0) != EVT)
+    if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EVT)
       return SDValue();
 
     // Avoid underflow.
@@ -13606,13 +14592,13 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   if (VT.is256BitVector() && !Subtarget->hasInt256())
     return Lower256IntVSETCC(Op, DAG);
 
-  EVT OpVT = Op1.getValueType();
+  MVT OpVT = Op1.getSimpleValueType();
   if (OpVT.getVectorElementType() == MVT::i1)
     return LowerBoolVSETCC_AVX512(Op, DAG);
 
   bool MaskResult = (VT.getVectorElementType() == MVT::i1);
   if (Subtarget->hasAVX512()) {
-    if (Op1.getValueType().is512BitVector() ||
+    if (Op1.getSimpleValueType().is512BitVector() ||
         (Subtarget->hasBWI() && Subtarget->hasVLX()) ||
         (MaskResult && OpVT.getVectorElementType().getSizeInBits() >= 32))
       return LowerIntVSETCC_AVX512(Op, DAG, Subtarget);
@@ -13628,6 +14614,33 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
                          DAG.getNode(ISD::SETCC, dl, OpVT, Op0, Op1, CC));
   }
 
+  // Lower using XOP integer comparisons.
+  if ((VT == MVT::v16i8 || VT == MVT::v8i16 ||
+       VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget->hasXOP()) {
+    // Translate compare code to XOP PCOM compare mode.
+    unsigned CmpMode = 0;
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Unexpected SETCC condition");
+    case ISD::SETULT:
+    case ISD::SETLT: CmpMode = 0x00; break;
+    case ISD::SETULE:
+    case ISD::SETLE: CmpMode = 0x01; break;
+    case ISD::SETUGT:
+    case ISD::SETGT: CmpMode = 0x02; break;
+    case ISD::SETUGE:
+    case ISD::SETGE: CmpMode = 0x03; break;
+    case ISD::SETEQ: CmpMode = 0x04; break;
+    case ISD::SETNE: CmpMode = 0x05; break;
+    }
+
+    // Are we comparing unsigned or signed integers?
+    unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
+      ? X86ISD::VPCOMU : X86ISD::VPCOM;
+
+    return DAG.getNode(Opc, dl, VT, Op0, Op1,
+                       DAG.getConstant(CmpMode, dl, MVT::i8));
+  }
+
   // We are handling one of the integer comparisons here.  Since SSE only has
   // GT and EQ comparisons for integer, swapping operands and multiple
   // operations may be required for some comparisons.
@@ -13777,7 +14790,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
   // Since SSE has no unsigned integer comparisons, we need to flip the sign
   // bits of the inputs before performing those operations.
   if (FlipSigns) {
-    EVT EltVT = VT.getVectorElementType();
+    MVT EltVT = VT.getVectorElementType();
     SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl,
                                  VT);
     Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB);
@@ -13818,11 +14831,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
   if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
-      Op1.getOpcode() == ISD::Constant &&
-      cast<ConstantSDNode>(Op1)->isNullValue() &&
+      isNullConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
-    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
-    if (NewSetCC.getNode()) {
+    if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
       if (VT == MVT::i1)
         return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
       return NewSetCC;
@@ -13831,17 +14842,14 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
   // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
   // these.
-  if (Op1.getOpcode() == ISD::Constant &&
-      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
-       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+  if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 
     // If the input is a setcc, then reuse the input setcc or use a new one with
     // the inverted condition.
     if (Op0.getOpcode() == X86ISD::SETCC) {
       X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
-      bool Invert = (CC == ISD::SETNE) ^
-        cast<ConstantSDNode>(Op1)->isNullValue();
+      bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
       if (!Invert)
         return Op0;
 
@@ -13854,8 +14862,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       return SetCC;
     }
   }
-  if ((Op0.getValueType() == MVT::i1) && (Op1.getOpcode() == ISD::Constant) &&
-      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1) &&
+  if ((Op0.getValueType() == MVT::i1) && isOneConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 
     ISD::CondCode NewCC = ISD::getSetCCInverse(CC, true);
@@ -13876,6 +14883,23 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   return SetCC;
 }
 
+SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Carry = Op.getOperand(2);
+  SDValue Cond = Op.getOperand(3);
+  SDLoc DL(Op);
+
+  assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.");
+  X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get());
+
+  assert(Carry.getOpcode() != ISD::CARRY_FALSE);
+  SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
+  SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
+  return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(),
+                     DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
+}
+
 // isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
 static bool isX86LogicalCmp(SDValue Op) {
   unsigned Opc = Op.getNode()->getOpcode();
@@ -13918,7 +14942,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op1 = Op.getOperand(1);
   SDValue Op2 = Op.getOperand(2);
   SDLoc DL(Op);
-  EVT VT = Op1.getValueType();
+  MVT VT = Op1.getSimpleValueType();
   SDValue CC;
 
   // Lower FP selects into a CMP/AND/ANDN/OR sequence when the necessary SSE ops
@@ -13927,7 +14951,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   if (Cond.getOpcode() == ISD::SETCC &&
       ((Subtarget->hasSSE2() && (VT == MVT::f32 || VT == MVT::f64)) ||
        (Subtarget->hasSSE1() && VT == MVT::f32)) &&
-      VT == Cond.getOperand(0).getValueType() && Cond->hasOneUse()) {
+      VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) {
     SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1);
     int SSECC = translateX86FSETCC(
         cast<CondCodeSDNode>(Cond.getOperand(2))->get(), CondOp0, CondOp1);
@@ -13961,12 +14985,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         // Convert to vectors, do a VSELECT, and convert back to scalar.
         // All of the conversions should be optimized away.
 
-        EVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
+        MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64;
         SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1);
         SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2);
         SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp);
 
-        EVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
+        MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64;
         VCmp = DAG.getBitcast(VCmpVT, VCmp);
 
         SDValue VSel = DAG.getNode(ISD::VSELECT, DL, VecVT, VCmp, VOp1, VOp2);
@@ -13980,26 +15004,26 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-    if (VT.isVector() && VT.getScalarType() == MVT::i1) {
-      SDValue Op1Scalar;
-      if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
-        Op1Scalar = ConvertI1VectorToInterger(Op1, DAG);
-      else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
-        Op1Scalar = Op1.getOperand(0);
-      SDValue Op2Scalar;
-      if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
-        Op2Scalar = ConvertI1VectorToInterger(Op2, DAG);
-      else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
-        Op2Scalar = Op2.getOperand(0);
-      if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
-        SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
-                                        Op1Scalar.getValueType(),
-                                        Cond, Op1Scalar, Op2Scalar);
-        if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
-          return DAG.getBitcast(VT, newSelect);
-        SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
-        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
-                           DAG.getIntPtrConstant(0, DL));
+  if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
+    SDValue Op1Scalar;
+    if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
+      Op1Scalar = ConvertI1VectorToInteger(Op1, DAG);
+    else if (Op1.getOpcode() == ISD::BITCAST && Op1.getOperand(0))
+      Op1Scalar = Op1.getOperand(0);
+    SDValue Op2Scalar;
+    if (ISD::isBuildVectorOfConstantSDNodes(Op2.getNode()))
+      Op2Scalar = ConvertI1VectorToInteger(Op2, DAG);
+    else if (Op2.getOpcode() == ISD::BITCAST && Op2.getOperand(0))
+      Op2Scalar = Op2.getOperand(0);
+    if (Op1Scalar.getNode() && Op2Scalar.getNode()) {
+      SDValue newSelect = DAG.getNode(ISD::SELECT, DL,
+                                      Op1Scalar.getValueType(),
+                                      Cond, Op1Scalar, Op2Scalar);
+      if (newSelect.getValueSizeInBits() == VT.getSizeInBits())
+        return DAG.getBitcast(VT, newSelect);
+      SDValue ExtVec = DAG.getBitcast(MVT::v8i1, newSelect);
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtVec,
+                         DAG.getIntPtrConstant(0, DL));
     }
   }
 
@@ -14026,22 +15050,21 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
   if (Cond.getOpcode() == X86ISD::SETCC &&
       Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
-      isZero(Cond.getOperand(1).getOperand(1))) {
+      isNullConstant(Cond.getOperand(1).getOperand(1))) {
     SDValue Cmp = Cond.getOperand(1);
 
     unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
 
-    if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+    if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
         (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
-      SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+      SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2;
 
       SDValue CmpOp0 = Cmp.getOperand(0);
       // Apply further optimizations for special cases
       // (select (x != 0), -1, 0) -> neg & sbb
       // (select (x == 0), 0, -1) -> neg & sbb
-      if (ConstantSDNode *YC = dyn_cast<ConstantSDNode>(Y))
-        if (YC->isNullValue() &&
-            (isAllOnes(Op1) == (CondCode == X86::COND_NE))) {
+      if (isNullConstant(Y) &&
+            (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) {
           SDVTList VTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32);
           SDValue Neg = DAG.getNode(X86ISD::SUB, DL, VTs,
                                     DAG.getConstant(0, DL,
@@ -14061,11 +15084,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
         DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
                     DAG.getConstant(X86::COND_B, DL, MVT::i8), Cmp);
 
-      if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+      if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E))
         Res = DAG.getNOT(DL, Res, Res.getValueType());
 
-      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
-      if (!N2C || !N2C->isNullValue())
+      if (!isNullConstant(Op2))
         Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
       return Res;
     }
@@ -14073,11 +15095,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
   // Look past (and (setcc_carry (cmp ...)), 1).
   if (Cond.getOpcode() == ISD::AND &&
-      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1)
-      Cond = Cond.getOperand(0);
-  }
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+      isOneConstant(Cond.getOperand(1)))
+    Cond = Cond.getOperand(0);
 
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
@@ -14136,15 +15156,14 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   }
 
   if (addTest) {
-    // Look pass the truncate if the high bits are known zero.
+    // Look past the truncate if the high bits are known zero.
     if (isTruncWithZeroHighBitsInput(Cond, DAG))
-        Cond = Cond.getOperand(0);
+      Cond = Cond.getOperand(0);
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
     if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
-      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
-      if (NewSetCC.getNode()) {
+      if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
         CC = NewSetCC.getOperand(0);
         Cond = NewSetCC.getOperand(1);
         addTest = false;
@@ -14166,11 +15185,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
 
     if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
-        (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+        (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) &&
+        (isNullConstant(Op1) || isNullConstant(Op2))) {
       SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
                                 DAG.getConstant(X86::COND_B, DL, MVT::i8),
                                 Cond);
-      if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+      if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B))
         return DAG.getNOT(DL, Res, Res.getValueType());
       return Res;
     }
@@ -14256,8 +15276,8 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
   MVT InVT = In.getSimpleValueType();
   assert(VT.getSizeInBits() == InVT.getSizeInBits());
 
-  MVT InSVT = InVT.getScalarType();
-  assert(VT.getScalarType().getScalarSizeInBits() > InSVT.getScalarSizeInBits());
+  MVT InSVT = InVT.getVectorElementType();
+  assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits());
 
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
     return SDValue();
@@ -14276,7 +15296,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
 
   // As SRAI is only available on i16/i32 types, we expand only up to i32
   // and handle i64 separately.
-  while (CurrVT != VT && CurrVT.getScalarType() != MVT::i32) {
+  while (CurrVT != VT && CurrVT.getVectorElementType() != MVT::i32) {
     Curr = DAG.getNode(X86ISD::UNPCKL, dl, CurrVT, DAG.getUNDEF(CurrVT), Curr);
     MVT CurrSVT = MVT::getIntegerVT(CurrVT.getScalarSizeInBits() * 2);
     CurrVT = MVT::getVectorVT(CurrSVT, CurrVT.getVectorNumElements() / 2);
@@ -14286,7 +15306,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
   SDValue SignExt = Curr;
   if (CurrVT != InVT) {
     unsigned SignExtShift =
-        CurrVT.getScalarSizeInBits() - InSVT.getScalarSizeInBits();
+        CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits();
     SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
                           DAG.getConstant(SignExtShift, dl, MVT::i8));
   }
@@ -14346,7 +15366,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget,
 
   SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, &ShufMask2[0]);
 
-  MVT HalfVT = MVT::getVectorVT(VT.getScalarType(),
+  MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
                                 VT.getVectorNumElements()/2);
 
   OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
@@ -14470,7 +15490,7 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
   // memory. In practice, we ''widen'' MemVT.
   EVT WideVecVT =
       EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
-                       loadRegZize / MemVT.getScalarType().getSizeInBits());
+                       loadRegZize / MemVT.getScalarSizeInBits());
 
   assert(WideVecVT.getSizeInBits() == LoadUnitVecVT.getSizeInBits() &&
          "Invalid vector type");
@@ -14518,29 +15538,12 @@ static SDValue LowerExtendedLoad(SDValue Op, const X86Subtarget *Subtarget,
       return Sext;
     }
 
-    // Otherwise we'll shuffle the small elements in the high bits of the
-    // larger type and perform an arithmetic shift. If the shift is not legal
-    // it's better to scalarize.
-    assert(TLI.isOperationLegalOrCustom(ISD::SRA, RegVT) &&
-           "We can't implement a sext load without an arithmetic right shift!");
-
-    // Redistribute the loaded elements into the different locations.
-    SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
-    for (unsigned i = 0; i != NumElems; ++i)
-      ShuffleVec[i * SizeRatio + SizeRatio - 1] = i;
-
-    SDValue Shuff = DAG.getVectorShuffle(
-        WideVecVT, dl, SlicedVec, DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
-
-    Shuff = DAG.getBitcast(RegVT, Shuff);
-
-    // Build the arithmetic shift.
-    unsigned Amt = RegVT.getVectorElementType().getSizeInBits() -
-                   MemVT.getVectorElementType().getSizeInBits();
-    Shuff =
-        DAG.getNode(ISD::SRA, dl, RegVT, Shuff,
-                    DAG.getConstant(Amt, dl, RegVT));
+    // Otherwise we'll use SIGN_EXTEND_VECTOR_INREG to sign extend the lowest
+    // lanes.
+    assert(TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND_VECTOR_INREG, RegVT) &&
+           "We can't implement a sext load without SIGN_EXTEND_VECTOR_INREG!");
 
+    SDValue Shuff = DAG.getSignExtendVectorInReg(SlicedVec, dl, RegVT);
     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), TF);
     return Shuff;
   }
@@ -14577,11 +15580,9 @@ static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
 static bool isXor1OfSetCC(SDValue Op) {
   if (Op.getOpcode() != ISD::XOR)
     return false;
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-  if (N1C && N1C->getAPIntValue() == 1) {
+  if (isOneConstant(Op.getOperand(1)))
     return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
-      Op.getOperand(0).hasOneUse();
-  }
+           Op.getOperand(0).hasOneUse();
   return false;
 }
 
@@ -14597,8 +15598,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   if (Cond.getOpcode() == ISD::SETCC) {
     // Check for setcc([su]{add,sub,mul}o == 0).
     if (cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ &&
-        isa<ConstantSDNode>(Cond.getOperand(1)) &&
-        cast<ConstantSDNode>(Cond.getOperand(1))->isNullValue() &&
+        isNullConstant(Cond.getOperand(1)) &&
         Cond.getOperand(0).getResNo() == 1 &&
         (Cond.getOperand(0).getOpcode() == ISD::SADDO ||
          Cond.getOperand(0).getOpcode() == ISD::UADDO ||
@@ -14625,11 +15625,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 
   // Look pass (and (setcc_carry (cmp ...)), 1).
   if (Cond.getOpcode() == ISD::AND &&
-      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
-    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
-    if (C && C->getAPIntValue() == 1)
-      Cond = Cond.getOperand(0);
-  }
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY &&
+      isOneConstant(Cond.getOperand(1)))
+    Cond = Cond.getOperand(0);
 
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
@@ -14673,16 +15671,14 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     switch (CondOpcode) {
     case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break;
     case ISD::SADDO:
-      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
-        if (C->isOne()) {
+      if (isOneConstant(RHS)) {
           X86Opcode = X86ISD::INC; X86Cond = X86::COND_O;
           break;
         }
       X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break;
     case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break;
     case ISD::SSUBO:
-      if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
-        if (C->isOne()) {
+      if (isOneConstant(RHS)) {
           X86Opcode = X86ISD::DEC; X86Cond = X86::COND_O;
           break;
         }
@@ -14844,8 +15840,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
     if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
-      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
-      if (NewSetCC.getNode()) {
+      if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
         CC = NewSetCC.getOperand(0);
         Cond = NewSetCC.getOperand(1);
         addTest = false;
@@ -14877,54 +15872,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
                SplitStack;
   SDLoc dl(Op);
 
+  // Get the inputs.
+  SDNode *Node = Op.getNode();
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  EVT VT = Node->getValueType(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl);
+
+  bool Is64Bit = Subtarget->is64Bit();
+  MVT SPTy = getPointerTy(DAG.getDataLayout());
+
+  SDValue Result;
   if (!Lower) {
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    SDNode* Node = Op.getNode();
-
     unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
     assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
-        " not tell us which reg is the stack pointer!");
+                    " not tell us which reg is the stack pointer!");
     EVT VT = Node->getValueType(0);
-    SDValue Tmp1 = SDValue(Node, 0);
-    SDValue Tmp2 = SDValue(Node, 1);
     SDValue Tmp3 = Node->getOperand(2);
-    SDValue Chain = Tmp1.getOperand(0);
 
-    // Chain the dynamic stack allocation so that it doesn't modify the stack
-    // pointer when other instructions are using the stack.
-    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true),
-        SDLoc(Node));
-
-    SDValue Size = Tmp2.getOperand(1);
     SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
     Chain = SP.getValue(1);
     unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
     const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
     unsigned StackAlign = TFI.getStackAlignment();
-    Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+    Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
     if (Align > StackAlign)
-      Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
-          DAG.getConstant(-(uint64_t)Align, dl, VT));
-    Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
-
-    Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
-        DAG.getIntPtrConstant(0, dl, true), SDValue(),
-        SDLoc(Node));
-
-    SDValue Ops[2] = { Tmp1, Tmp2 };
-    return DAG.getMergeValues(Ops, dl);
-  }
-
-  // Get the inputs.
-  SDValue Chain = Op.getOperand(0);
-  SDValue Size  = Op.getOperand(1);
-  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
-  EVT VT = Op.getNode()->getValueType(0);
-
-  bool Is64Bit = Subtarget->is64Bit();
-  MVT SPTy = getPointerTy(DAG.getDataLayout());
-
-  if (SplitStack) {
+      Result = DAG.getNode(ISD::AND, dl, VT, Result,
+                         DAG.getConstant(-(uint64_t)Align, dl, VT));
+    Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain
+  } else if (SplitStack) {
     MachineRegisterInfo &MRI = MF.getRegInfo();
 
     if (Is64Bit) {
@@ -14942,10 +15923,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
     unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
     Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
-    SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
+    Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
                                 DAG.getRegister(Vreg, SPTy));
-    SDValue Ops1[2] = { Value, Chain };
-    return DAG.getMergeValues(Ops1, dl);
   } else {
     SDValue Flag;
     const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);
@@ -14967,9 +15946,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
       Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP);
     }
 
-    SDValue Ops1[2] = { SP, Chain };
-    return DAG.getMergeValues(Ops1, dl);
+    Result = SP;
   }
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
+                             DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
+
+  SDValue Ops[2] = {Result, Chain};
+  return DAG.getMergeValues(Ops, dl);
 }
 
 SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -14980,7 +15964,8 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   SDLoc DL(Op);
 
-  if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+  if (!Subtarget->is64Bit() ||
+      Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) {
     // vastart just stores the address of the VarArgsFrameIndex slot into the
     // memory location argument.
     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -15019,10 +16004,11 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   MemOps.push_back(Store);
 
   // Store ptr to reg_save_area.
-  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(8, DL));
+  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(
+      Subtarget->isTarget64BitLP64() ? 8 : 4, DL));
   SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT);
-  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
-                       MachinePointerInfo(SV, 16), false, false, 0);
+  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN, MachinePointerInfo(
+      SV, Subtarget->isTarget64BitLP64() ? 16 : 12), false, false, 0);
   MemOps.push_back(Store);
   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
 }
@@ -15030,10 +16016,13 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget->is64Bit() &&
          "LowerVAARG only handles 64-bit va_arg!");
-  assert((Subtarget->isTargetLinux() ||
-          Subtarget->isTargetDarwin()) &&
-          "Unhandled target in LowerVAARG");
   assert(Op.getNode()->getNumOperands() == 4);
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
+    // The Win64 ABI uses char* instead of a structure.
+    return DAG.expandVAArg(Op.getNode());
+
   SDValue Chain = Op.getOperand(0);
   SDValue SrcPtr = Op.getOperand(1);
   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
@@ -15061,8 +16050,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   if (ArgMode == 2) {
     // Sanity Check: Make sure using fp_offset makes sense.
     assert(!Subtarget->useSoftFloat() &&
-           !(DAG.getMachineFunction().getFunction()->hasFnAttribute(
-               Attribute::NoImplicitFloat)) &&
+           !(MF.getFunction()->hasFnAttribute(Attribute::NoImplicitFloat)) &&
            Subtarget->hasSSE1());
   }
 
@@ -15091,8 +16079,14 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
 static SDValue LowerVACOPY(SDValue Op, const X86Subtarget *Subtarget,
                            SelectionDAG &DAG) {
-  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+  // X86-64 va_list is a struct { i32, i32, i8*, i8* }, except on Windows,
+  // where a va_list is still an i8*.
   assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+  if (Subtarget->isCallingConvWin64(
+        DAG.getMachineFunction().getFunction()->getCallingConv()))
+    // Probably a Win64 va_copy.
+    return DAG.expandVACopy(Op.getNode());
+
   SDValue Chain = Op.getOperand(0);
   SDValue DstPtr = Op.getOperand(1);
   SDValue SrcPtr = Op.getOperand(2);
@@ -15230,72 +16224,126 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
   // The return type has to be a 128-bit type with the same element
   // type as the input type.
   MVT EltVT = VT.getVectorElementType();
-  EVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
+  MVT ShVT = MVT::getVectorVT(EltVT, 128/EltVT.getSizeInBits());
 
   ShAmt = DAG.getBitcast(ShVT, ShAmt);
   return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt);
 }
 
-/// \brief Return (and \p Op, \p Mask) for compare instructions or
-/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
-/// necessary casting for \p Mask when lowering masking intrinsics.
-static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
-                                    SDValue PreservedSrc,
-                                    const X86Subtarget *Subtarget,
-                                    SelectionDAG &DAG) {
-    EVT VT = Op.getValueType();
-    EVT MaskVT = EVT::getVectorVT(*DAG.getContext(),
-                                  MVT::i1, VT.getVectorNumElements());
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
-    SDLoc dl(Op);
+/// \brief Return Mask with the necessary casting or extending
+/// for \p Mask according to \p MaskVT when lowering masking intrinsics
+static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
+                           const X86Subtarget *Subtarget,
+                           SelectionDAG &DAG, SDLoc dl) {
 
-    assert(MaskVT.isSimple() && "invalid mask type");
+  if (MaskVT.bitsGT(Mask.getSimpleValueType())) {
+    // Mask should be extended
+    Mask = DAG.getNode(ISD::ANY_EXTEND, dl,
+                       MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask);
+  }
 
-    if (isAllOnes(Mask))
-      return Op;
+  if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) {
+    if (MaskVT == MVT::v64i1) {
+      assert(Subtarget->hasBWI() && "Expected AVX512BW target!");
+      // In case 32bit mode, bitcast i64 is illegal, extend/split it.
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+                          DAG.getConstant(0, dl, MVT::i32));
+      Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
+                          DAG.getConstant(1, dl, MVT::i32));
 
+      Lo = DAG.getBitcast(MVT::v32i1, Lo);
+      Hi = DAG.getBitcast(MVT::v32i1, Hi);
+
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
+    } else {
+      // MaskVT require < 64bit. Truncate mask (should succeed in any case),
+      // and bitcast.
+      MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits());
+      return DAG.getBitcast(MaskVT,
+                            DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask));
+    }
+
+  } else {
+    MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                     Mask.getSimpleValueType().getSizeInBits());
     // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
     // are extracted by EXTRACT_SUBVECTOR.
-    SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                DAG.getBitcast(BitcastVT, Mask),
-                                DAG.getIntPtrConstant(0, dl));
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+                       DAG.getBitcast(BitcastVT, Mask),
+                       DAG.getIntPtrConstant(0, dl));
+  }
+}
 
-    switch (Op.getOpcode()) {
-      default: break;
-      case X86ISD::PCMPEQM:
-      case X86ISD::PCMPGTM:
-      case X86ISD::CMPM:
-      case X86ISD::CMPMU:
-        return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
-    }
-    if (PreservedSrc.getOpcode() == ISD::UNDEF)
-      PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
-    return DAG.getNode(ISD::VSELECT, dl, VT, VMask, Op, PreservedSrc);
+/// \brief Return (and \p Op, \p Mask) for compare instructions or
+/// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
+/// necessary casting or extending for \p Mask when lowering masking intrinsics
+static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
+                  SDValue PreservedSrc,
+                  const X86Subtarget *Subtarget,
+                  SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+  unsigned OpcodeSelect = ISD::VSELECT;
+  SDLoc dl(Op);
+
+  if (isAllOnesConstant(Mask))
+    return Op;
+
+  SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case X86ISD::PCMPEQM:
+  case X86ISD::PCMPGTM:
+  case X86ISD::CMPM:
+  case X86ISD::CMPMU:
+    return DAG.getNode(ISD::AND, dl, VT, Op, VMask);
+  case X86ISD::VFPCLASS:
+    case X86ISD::VFPCLASSS:
+    return DAG.getNode(ISD::OR, dl, VT, Op, VMask);
+  case X86ISD::VTRUNC:
+  case X86ISD::VTRUNCS:
+  case X86ISD::VTRUNCUS:
+    // We can't use ISD::VSELECT here because it is not always "Legal"
+    // for the destination type. For example vpmovqb require only AVX512
+    // and vselect that can operate on byte element type require BWI
+    OpcodeSelect = X86ISD::SELECT;
+    break;
+  }
+  if (PreservedSrc.getOpcode() == ISD::UNDEF)
+    PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+  return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc);
 }
 
 /// \brief Creates an SDNode for a predicated scalar operation.
 /// \returns (X86vselect \p Mask, \p Op, \p PreservedSrc).
-/// The mask is comming as MVT::i8 and it should be truncated
+/// The mask is coming as MVT::i8 and it should be truncated
 /// to MVT::i1 while lowering masking intrinsics.
 /// The main difference between ScalarMaskingNode and VectorMaskingNode is using
-/// "X86select" instead of "vselect". We just can't create the "vselect" node for
-/// a scalar instruction.
+/// "X86select" instead of "vselect". We just can't create the "vselect" node
+/// for a scalar instruction.
 static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
                                     SDValue PreservedSrc,
                                     const X86Subtarget *Subtarget,
                                     SelectionDAG &DAG) {
-    if (isAllOnes(Mask))
-      return Op;
+  if (isAllOnesConstant(Mask))
+    return Op;
 
-    EVT VT = Op.getValueType();
-    SDLoc dl(Op);
-    // The mask should be of type MVT::i1
-    SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
+  MVT VT = Op.getSimpleValueType();
+  SDLoc dl(Op);
+  // The mask should be of type MVT::i1
+  SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
 
-    if (PreservedSrc.getOpcode() == ISD::UNDEF)
-      PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
-    return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
+  if (Op.getOpcode() == X86ISD::FSETCC)
+    return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
+  if (Op.getOpcode() == X86ISD::VFPCLASS ||
+      Op.getOpcode() == X86ISD::VFPCLASSS)
+    return DAG.getNode(ISD::OR, dl, VT, Op, IMask);
+
+  if (PreservedSrc.getOpcode() == ISD::UNDEF)
+    PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
+  return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
 }
 
 static int getSEHRegistrationNodeSize(const Function *Fn) {
@@ -15309,15 +16357,16 @@ static int getSEHRegistrationNodeSize(const Function *Fn) {
   case EHPersonality::MSVC_CXX: return 16;
   default: break;
   }
-  report_fatal_error("can only recover FP for MSVC EH personality functions");
+  report_fatal_error(
+      "can only recover FP for 32-bit MSVC EH personality functions");
 }
 
-/// When the 32-bit MSVC runtime transfers control to us, either to an outlined
+/// When the MSVC runtime transfers control to us, either to an outlined
 /// function or when returning to a parent frame after catching an exception, we
 /// recover the parent frame pointer by doing arithmetic on the incoming EBP.
 /// Here's the math:
 ///   RegNodeBase = EntryEBP - RegNodeSize
-///   ParentFP = RegNodeBase - RegNodeFrameOffset
+///   ParentFP = RegNodeBase - ParentFrameOffset
 /// Subtracting RegNodeSize takes us to the offset of the registration node, and
 /// subtracting the offset (negative on x86) takes us back to the parent FP.
 static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
@@ -15334,29 +16383,35 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
   if (!Fn->hasPersonalityFn())
     return EntryEBP;
 
-  int RegNodeSize = getSEHRegistrationNodeSize(Fn);
-
   // Get an MCSymbol that will ultimately resolve to the frame offset of the EH
-  // registration.
+  // registration, or the .set_setframe offset.
   MCSymbol *OffsetSym =
       MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol(
           GlobalValue::getRealLinkageName(Fn->getName()));
   SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT);
-  SDValue RegNodeFrameOffset =
+  SDValue ParentFrameOffset =
       DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal);
 
+  // Return EntryEBP + ParentFrameOffset for x64. This adjusts from RSP after
+  // prologue to RBP in the parent function.
+  const X86Subtarget &Subtarget =
+      static_cast<const X86Subtarget &>(DAG.getSubtarget());
+  if (Subtarget.is64Bit())
+    return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset);
+
+  int RegNodeSize = getSEHRegistrationNodeSize(Fn);
   // RegNodeBase = EntryEBP - RegNodeSize
-  // ParentFP = RegNodeBase - RegNodeFrameOffset
+  // ParentFP = RegNodeBase - ParentFrameOffset
   SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP,
                                     DAG.getConstant(RegNodeSize, dl, PtrVT));
-  return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, RegNodeFrameOffset);
+  return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset);
 }
 
 static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   SDLoc dl(Op);
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  EVT VT = Op.getValueType();
+  MVT VT = Op.getSimpleValueType();
   const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo);
   if (IntrData) {
     switch(IntrData->Type) {
@@ -15365,6 +16420,9 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
     case INTR_TYPE_2OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2));
+    case INTR_TYPE_2OP_IMM8:
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
+                         DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
     case INTR_TYPE_3OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2), Op.getOperand(3));
@@ -15376,28 +16434,53 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
       SDValue RoundingMode;
+      // We allways add rounding mode to the Node.
+      // If the rounding mode is not specified, we add the
+      // "current direction" mode.
       if (Op.getNumOperands() == 4)
-        RoundingMode = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+        RoundingMode =
+          DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
       else
         RoundingMode = Op.getOperand(4);
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0) {
-        unsigned Round = cast<ConstantSDNode>(RoundingMode)->getZExtValue();
-        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION)
+      if (IntrWithRoundingModeOpcode != 0)
+        if (cast<ConstantSDNode>(RoundingMode)->getZExtValue() !=
+            X86::STATIC_ROUNDING::CUR_DIRECTION)
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                       dl, Op.getValueType(), Src, RoundingMode),
                                       Mask, PassThru, Subtarget, DAG);
-      }
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
                                               RoundingMode),
                                   Mask, PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_1OP_MASK: {
       SDValue Src = Op.getOperand(1);
-      SDValue Passthru = Op.getOperand(2);
+      SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
+      // We add rounding mode to the Node when
+      //   - RM Opcode is specified and
+      //   - RM is not "current direction".
+      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+      if (IntrWithRoundingModeOpcode != 0) {
+        SDValue Rnd = Op.getOperand(4);
+        unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+                                      dl, Op.getValueType(),
+                                      Src, Rnd),
+                                      Mask, PassThru, Subtarget, DAG);
+        }
+      }
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src),
-                                  Mask, Passthru, Subtarget, DAG);
+                                  Mask, PassThru, Subtarget, DAG);
+    }
+    case INTR_TYPE_SCALAR_MASK: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue passThru = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2),
+                                  Mask, passThru, Subtarget, DAG);
     }
     case INTR_TYPE_SCALAR_MASK_RM: {
       SDValue Src1 = Op.getOperand(1);
@@ -15405,7 +16488,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Src0 = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
       // There are 2 kinds of intrinsics in this group:
-      // (1) With supress-all-exceptions (sae) or rounding mode- 6 operands
+      // (1) With suppress-all-exceptions (sae) or rounding mode- 6 operands
       // (2) With rounding mode and sae - 7 operands.
       if (Op.getNumOperands() == 6) {
         SDValue Sae  = Op.getOperand(5);
@@ -15421,11 +16504,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               RoundingMode, Sae),
                                   Mask, Src0, Subtarget, DAG);
     }
-    case INTR_TYPE_2OP_MASK: {
+    case INTR_TYPE_2OP_MASK:
+    case INTR_TYPE_2OP_IMM8_MASK: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue PassThru = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
+
+      if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK)
+        Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2);
+
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
       // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15440,8 +16528,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                       Mask, PassThru, Subtarget, DAG);
         }
       }
-      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-                                              Src1,Src2),
+      // TODO: Intrinsics should have fast-math-flags to propagate.
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,Src1,Src2),
                                   Mask, PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_2OP_MASK_RM: {
@@ -15449,7 +16537,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Src2 = Op.getOperand(2);
       SDValue PassThru = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      // We specify 2 possible modes for intrinsics, with/without rounding modes.
+      // We specify 2 possible modes for intrinsics, with/without rounding
+      // modes.
       // First, we check if the intrinsic have rounding mode (6 operands),
       // if not, we set rounding mode to "current".
       SDValue Rnd;
@@ -15461,12 +16550,56 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1, Src2, Rnd),
                                   Mask, PassThru, Subtarget, DAG);
     }
-    case INTR_TYPE_3OP_MASK: {
+    case INTR_TYPE_3OP_SCALAR_MASK_RM: {
       SDValue Src1 = Op.getOperand(1);
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue PassThru = Op.getOperand(4);
       SDValue Mask = Op.getOperand(5);
+      SDValue Sae  = Op.getOperand(6);
+
+      return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1,
+                                              Src2, Src3, Sae),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
+    case INTR_TYPE_3OP_MASK_RM: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Imm = Op.getOperand(3);
+      SDValue PassThru = Op.getOperand(4);
+      SDValue Mask = Op.getOperand(5);
+      // We specify 2 possible modes for intrinsics, with/without rounding
+      // modes.
+      // First, we check if the intrinsic have rounding mode (7 operands),
+      // if not, we set rounding mode to "current".
+      SDValue Rnd;
+      if (Op.getNumOperands() == 7)
+        Rnd = Op.getOperand(6);
+      else
+        Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+        Src1, Src2, Imm, Rnd),
+        Mask, PassThru, Subtarget, DAG);
+    }
+    case INTR_TYPE_3OP_IMM8_MASK:
+    case INTR_TYPE_3OP_MASK:
+    case INSERT_SUBVEC: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Src3 = Op.getOperand(3);
+      SDValue PassThru = Op.getOperand(4);
+      SDValue Mask = Op.getOperand(5);
+
+      if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
+        Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
+      else if (IntrData->Type == INSERT_SUBVEC) {
+        // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
+        assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
+        unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
+        Imm *= Src2.getSimpleValueType().getVectorNumElements();
+        Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
+      }
+
       // We specify 2 possible opcodes for intrinsics with rounding modes.
       // First, we check if the intrinsic may have non-default rounding mode,
       // (IntrData->Opc1 != 0), then we check the rounding mode operand.
@@ -15486,7 +16619,27 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                   Mask, PassThru, Subtarget, DAG);
     }
     case VPERM_3OP_MASKZ:
-    case VPERM_3OP_MASK:
+    case VPERM_3OP_MASK:{
+      // Src2 is the PassThru
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Src3 = Op.getOperand(3);
+      SDValue Mask = Op.getOperand(4);
+      MVT VT = Op.getSimpleValueType();
+      SDValue PassThru = SDValue();
+
+      // set PassThru element
+      if (IntrData->Type == VPERM_3OP_MASKZ)
+        PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+      else
+        PassThru = DAG.getBitcast(VT, Src2);
+
+      // Swap Src1 and Src2 in the node creation
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
+                                              dl, Op.getValueType(),
+                                              Src2, Src1, Src3),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
     case FMA_OP_MASK3:
     case FMA_OP_MASKZ:
     case FMA_OP_MASK: {
@@ -15494,11 +16647,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Src2 = Op.getOperand(2);
       SDValue Src3 = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      EVT VT = Op.getValueType();
+      MVT VT = Op.getSimpleValueType();
       SDValue PassThru = SDValue();
 
       // set PassThru element
-      if (IntrData->Type == VPERM_3OP_MASKZ || IntrData->Type == FMA_OP_MASKZ)
+      if (IntrData->Type == FMA_OP_MASKZ)
         PassThru = getZeroVector(VT, Subtarget, DAG, dl);
       else if (IntrData->Type == FMA_OP_MASK3)
         PassThru = Src3;
@@ -15523,6 +16676,50 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                               Src1, Src2, Src3),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case TERLOG_OP_MASK:
+    case TERLOG_OP_MASKZ: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue Src3 = Op.getOperand(3);
+      SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4));
+      SDValue Mask = Op.getOperand(5);
+      MVT VT = Op.getSimpleValueType();
+      SDValue PassThru = Src1;
+      // Set PassThru element.
+      if (IntrData->Type == TERLOG_OP_MASKZ)
+        PassThru = getZeroVector(VT, Subtarget, DAG, dl);
+
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              Src1, Src2, Src3, Src4),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
+    case FPCLASS: {
+      // FPclass intrinsics with mask
+       SDValue Src1 = Op.getOperand(1);
+       MVT VT = Src1.getSimpleValueType();
+       MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+       SDValue Imm = Op.getOperand(2);
+       SDValue Mask = Op.getOperand(3);
+       MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                     Mask.getSimpleValueType().getSizeInBits());
+       SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MaskVT, Src1, Imm);
+       SDValue FPclassMask = getVectorMaskingNode(FPclass, Mask,
+                                                 DAG.getTargetConstant(0, dl, MaskVT),
+                                                 Subtarget, DAG);
+       SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+                                 DAG.getUNDEF(BitcastVT), FPclassMask,
+                                 DAG.getIntPtrConstant(0, dl));
+       return DAG.getBitcast(Op.getValueType(), Res);
+    }
+    case FPCLASSS: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Imm = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+      SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
+      SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
+        DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
+      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
+    }
     case CMP_MASK:
     case CMP_MASK_CC: {
       // Comparison intrinsics with masks.
@@ -15534,12 +16731,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       //           (v2i1 (and (PCMPEQM %a, %b),
       //                      (extract_subvector
       //                         (v8i1 (bitcast %mask)), 0))), 0))))
-      EVT VT = Op.getOperand(1).getValueType();
-      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                    VT.getVectorNumElements());
+      MVT VT = Op.getOperand(1).getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
       SDValue Mask = Op.getOperand((IntrData->Type == CMP_MASK_CC) ? 4 : 3);
-      EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                       Mask.getValueType().getSizeInBits());
+      MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                       Mask.getSimpleValueType().getSizeInBits());
       SDValue Cmp;
       if (IntrData->Type == CMP_MASK_CC) {
         SDValue CC = Op.getOperand(3);
@@ -15573,6 +16769,32 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                 DAG.getIntPtrConstant(0, dl));
       return DAG.getBitcast(Op.getValueType(), Res);
     }
+    case CMP_MASK_SCALAR_CC: {
+      SDValue Src1 = Op.getOperand(1);
+      SDValue Src2 = Op.getOperand(2);
+      SDValue CC = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(3));
+      SDValue Mask = Op.getOperand(4);
+
+      SDValue Cmp;
+      if (IntrData->Opc1 != 0) {
+        SDValue Rnd = Op.getOperand(5);
+        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
+            X86::STATIC_ROUNDING::CUR_DIRECTION)
+          Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
+      }
+      //default rounding mode
+      if(!Cmp.getNode())
+        Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Src2, CC);
+
+      SDValue CmpMask = getScalarMaskingNode(Cmp, Mask,
+                                             DAG.getTargetConstant(0, dl,
+                                                                   MVT::i1),
+                                             Subtarget, DAG);
+
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i8,
+                         DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, CmpMask),
+                         DAG.getValueType(MVT::i1));
+    }
     case COMI: { // Comparison intrinsics
       ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1;
       SDValue LHS = Op.getOperand(1);
@@ -15584,6 +16806,24 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
                                   DAG.getConstant(X86CC, dl, MVT::i8), Cond);
       return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
     }
+    case COMI_RM: { // Comparison intrinsics with Sae
+      SDValue LHS = Op.getOperand(1);
+      SDValue RHS = Op.getOperand(2);
+      SDValue CC = Op.getOperand(3);
+      SDValue Sae = Op.getOperand(4);
+      auto ComiType = TranslateX86ConstCondToX86CC(CC);
+      // choose between ordered and unordered (comi/ucomi)
+      unsigned comiOp = std::get<0>(ComiType) ? IntrData->Opc0 : IntrData->Opc1;
+      SDValue Cond;
+      if (cast<ConstantSDNode>(Sae)->getZExtValue() !=
+                                           X86::STATIC_ROUNDING::CUR_DIRECTION)
+        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS, Sae);
+      else
+        Cond = DAG.getNode(comiOp, dl, MVT::i32, LHS, RHS);
+      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+        DAG.getConstant(std::get<1>(ComiType), dl, MVT::i8), Cond);
+      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+    }
     case VSHIFT:
       return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(),
                                  Op.getOperand(1), Op.getOperand(2), DAG);
@@ -15598,27 +16838,75 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget
       SDValue Mask = Op.getOperand(3);
       SDValue DataToCompress = Op.getOperand(1);
       SDValue PassThru = Op.getOperand(2);
-      if (isAllOnes(Mask)) // return data as is
+      if (isAllOnesConstant(Mask)) // return data as is
         return Op.getOperand(1);
 
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
                                               DataToCompress),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case BROADCASTM: {
+      SDValue Mask = Op.getOperand(1);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1,
+                                    Mask.getSimpleValueType().getSizeInBits());
+      Mask = DAG.getBitcast(MaskVT, Mask);
+      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Mask);
+    }
     case BLEND: {
       SDValue Mask = Op.getOperand(3);
-      EVT VT = Op.getValueType();
-      EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                    VT.getVectorNumElements());
-      EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                       Mask.getValueType().getSizeInBits());
-      SDLoc dl(Op);
-      SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
-                                  DAG.getBitcast(BitcastVT, Mask),
-                                  DAG.getIntPtrConstant(0, dl));
+      MVT VT = Op.getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
       return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1),
                          Op.getOperand(2));
     }
+    case KUNPCK: {
+      MVT VT = Op.getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2);
+
+      SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl);
+      SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl);
+      // Arguments should be swapped.
+      SDValue Res = DAG.getNode(IntrData->Opc0, dl,
+                                MVT::getVectorVT(MVT::i1, VT.getSizeInBits()),
+                                Src2, Src1);
+      return DAG.getBitcast(VT, Res);
+    }
+    case CONVERT_TO_MASK: {
+      MVT SrcVT = Op.getOperand(1).getSimpleValueType();
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements());
+      MVT BitcastVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits());
+
+      SDValue CvtMask = DAG.getNode(IntrData->Opc0, dl, MaskVT,
+                                    Op.getOperand(1));
+      SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, BitcastVT,
+                                DAG.getUNDEF(BitcastVT), CvtMask,
+                                DAG.getIntPtrConstant(0, dl));
+      return DAG.getBitcast(Op.getValueType(), Res);
+    }
+    case CONVERT_MASK_TO_VEC: {
+      SDValue Mask = Op.getOperand(1);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+      return DAG.getNode(IntrData->Opc0, dl, VT, VMask);
+    }
+    case BRCST_SUBVEC_TO_VEC: {
+      SDValue Src = Op.getOperand(1);
+      SDValue Passthru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+      EVT resVT = Passthru.getValueType();
+      SDValue subVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, resVT,
+                                       DAG.getUNDEF(resVT), Src,
+                                       DAG.getIntPtrConstant(0, dl));
+      SDValue immVal;
+      if (Src.getSimpleValueType().is256BitVector() && resVT.is512BitVector())
+        immVal = DAG.getConstant(0x44, dl, MVT::i8);
+      else
+        immVal = DAG.getConstant(0, dl, MVT::i8);
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
+                                              subVec, subVec, immVal),
+                                  Mask, Passthru, Subtarget, DAG);
+    }
     default:
       break;
     }
@@ -15832,23 +17120,17 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                               SDValue Index, SDValue ScaleOp, SDValue Chain,
                               const X86Subtarget * Subtarget) {
   SDLoc dl(Op);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
-  if (!C)
-    llvm_unreachable("Invalid scale type");
-  unsigned ScaleVal = C->getZExtValue();
-  if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
-    llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+  auto *C = cast<ConstantSDNode>(ScaleOp);
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
-  EVT MaskVT = MVT::getVectorVT(MVT::i1,
+  MVT MaskVT = MVT::getVectorVT(MVT::i1,
                              Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
     MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
   else {
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
+    MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                     Mask.getSimpleValueType().getSizeInBits());
 
     // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
     // are extracted by EXTRACT_SUBVECTOR.
@@ -15860,7 +17142,7 @@ static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
   if (Src.getOpcode() == ISD::UNDEF)
-    Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
+    Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl);
   SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
   SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
   SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
@@ -15871,25 +17153,19 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                                SDValue Src, SDValue Mask, SDValue Base,
                                SDValue Index, SDValue ScaleOp, SDValue Chain) {
   SDLoc dl(Op);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
-  if (!C)
-    llvm_unreachable("Invalid scale type");
-  unsigned ScaleVal = C->getZExtValue();
-  if (ScaleVal > 2 && ScaleVal != 4 && ScaleVal != 8)
-    llvm_unreachable("Valid scale values are 1, 2, 4, 8");
-
+  auto *C = cast<ConstantSDNode>(ScaleOp);
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
-  EVT MaskVT = MVT::getVectorVT(MVT::i1,
+  MVT MaskVT = MVT::getVectorVT(MVT::i1,
                              Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
   if (MaskC)
     MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), dl, MaskVT);
   else {
-    EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
-                                     Mask.getValueType().getSizeInBits());
+    MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                     Mask.getSimpleValueType().getSizeInBits());
 
     // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
     // are extracted by EXTRACT_SUBVECTOR.
@@ -15907,12 +17183,11 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
                                SDValue Mask, SDValue Base, SDValue Index,
                                SDValue ScaleOp, SDValue Chain) {
   SDLoc dl(Op);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
-  assert(C && "Invalid scale type");
+  auto *C = cast<ConstantSDNode>(ScaleOp);
   SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32);
   SDValue Segment = DAG.getRegister(0, MVT::i32);
-  EVT MaskVT =
+  MVT MaskVT =
     MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
   SDValue MaskInReg;
   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
@@ -16034,64 +17309,59 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getMergeValues(Results, DL);
 }
 
-static SDValue LowerSEHRESTOREFRAME(SDValue Op, const X86Subtarget *Subtarget,
-                                    SelectionDAG &DAG) {
+static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) {
   MachineFunction &MF = DAG.getMachineFunction();
-  const Function *Fn = MF.getFunction();
+  SDValue Chain = Op.getOperand(0);
+  SDValue RegNode = Op.getOperand(2);
+  WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo();
+  if (!EHInfo)
+    report_fatal_error("EH registrations only live in functions using WinEH");
+
+  // Cast the operand to an alloca, and remember the frame index.
+  auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode);
+  if (!FINode)
+    report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca");
+  EHInfo->EHRegNodeFrameIndex = FINode->getIndex();
+
+  // Return the chain operand without making any DAG nodes.
+  return Chain;
+}
+
+/// \brief Lower intrinsics for TRUNCATE_TO_MEM case
+/// return truncate Store/MaskedStore Node
+static SDValue LowerINTRINSIC_TRUNCATE_TO_MEM(const SDValue & Op,
+                                               SelectionDAG &DAG,
+                                               MVT ElementType) {
   SDLoc dl(Op);
+  SDValue Mask = Op.getOperand(4);
+  SDValue DataToTruncate = Op.getOperand(3);
+  SDValue Addr = Op.getOperand(2);
   SDValue Chain = Op.getOperand(0);
 
-  assert(Subtarget->getFrameLowering()->hasFP(MF) &&
-         "using llvm.x86.seh.restoreframe requires a frame pointer");
+  MVT VT  = DataToTruncate.getSimpleValueType();
+  MVT SVT = MVT::getVectorVT(ElementType, VT.getVectorNumElements());
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  MVT VT = TLI.getPointerTy(DAG.getDataLayout());
+  if (isAllOnesConstant(Mask)) // return just a truncate store
+    return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr,
+                             MachinePointerInfo(), SVT, false, false,
+                             SVT.getScalarSizeInBits()/8);
 
-  const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
-  unsigned FrameReg =
-      RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction());
-  unsigned SPReg = RegInfo->getStackRegister();
-  unsigned SlotSize = RegInfo->getSlotSize();
+  MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+  MVT BitcastVT = MVT::getVectorVT(MVT::i1,
+                                   Mask.getSimpleValueType().getSizeInBits());
+  // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements
+  // are extracted by EXTRACT_SUBVECTOR.
+  SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT,
+                              DAG.getBitcast(BitcastVT, Mask),
+                              DAG.getIntPtrConstant(0, dl));
 
-  // Get incoming EBP.
-  SDValue IncomingEBP =
-      DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+  MachineMemOperand *MMO = DAG.getMachineFunction().
+    getMachineMemOperand(MachinePointerInfo(),
+                         MachineMemOperand::MOStore, SVT.getStoreSize(),
+                         SVT.getScalarSizeInBits()/8);
 
-  // SP is saved in the first field of every registration node, so load
-  // [EBP-RegNodeSize] into SP.
-  int RegNodeSize = getSEHRegistrationNodeSize(Fn);
-  SDValue SPAddr = DAG.getNode(ISD::ADD, dl, VT, IncomingEBP,
-                               DAG.getConstant(-RegNodeSize, dl, VT));
-  SDValue NewSP =
-      DAG.getLoad(VT, dl, Chain, SPAddr, MachinePointerInfo(), false, false,
-                  false, VT.getScalarSizeInBits() / 8);
-  Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP);
-
-  if (!RegInfo->needsStackRealignment(MF)) {
-    // Adjust EBP to point back to the original frame position.
-    SDValue NewFP = recoverFramePointer(DAG, Fn, IncomingEBP);
-    Chain = DAG.getCopyToReg(Chain, dl, FrameReg, NewFP);
-  } else {
-    assert(RegInfo->hasBasePointer(MF) &&
-           "functions with Win32 EH must use frame or base pointer register");
-
-    // Reload the base pointer (ESI) with the adjusted incoming EBP.
-    SDValue NewBP = recoverFramePointer(DAG, Fn, IncomingEBP);
-    Chain = DAG.getCopyToReg(Chain, dl, RegInfo->getBaseRegister(), NewBP);
-
-    // Reload the spilled EBP value, now that the stack and base pointers are
-    // set up.
-    X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-    X86FI->setHasSEHFramePtrSave(true);
-    int FI = MF.getFrameInfo()->CreateSpillStackObject(SlotSize, SlotSize);
-    X86FI->setSEHFramePtrSaveIndex(FI);
-    SDValue NewFP = DAG.getLoad(VT, dl, Chain, DAG.getFrameIndex(FI, VT),
-                                MachinePointerInfo(), false, false, false,
-                                VT.getScalarSizeInBits() / 8);
-    Chain = DAG.getCopyToReg(NewFP, dl, FrameReg, NewFP);
-  }
-
-  return Chain;
+  return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr,
+                            VMask, SVT, MMO, true);
 }
 
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
@@ -16100,16 +17370,14 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
 
   const IntrinsicData* IntrData = getIntrinsicWithChain(IntNo);
   if (!IntrData) {
-    if (IntNo == llvm::Intrinsic::x86_seh_restoreframe)
-      return LowerSEHRESTOREFRAME(Op, Subtarget, DAG);
+    if (IntNo == llvm::Intrinsic::x86_seh_ehregnode)
+      return MarkEHRegistrationNode(Op, DAG);
     return SDValue();
   }
 
   SDLoc dl(Op);
   switch(IntrData->Type) {
-  default:
-    llvm_unreachable("Unknown Intrinsic Type");
-    break;
+  default: llvm_unreachable("Unknown Intrinsic Type");
   case RDSEED:
   case RDRAND: {
     // Emit the node with the right value type.
@@ -16214,8 +17482,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
     SDValue Addr = Op.getOperand(2);
     SDValue Chain = Op.getOperand(0);
 
-    EVT VT = DataToCompress.getValueType();
-    if (isAllOnes(Mask)) // return just a store
+    MVT VT = DataToCompress.getSimpleValueType();
+    if (isAllOnesConstant(Mask)) // return just a store
       return DAG.getStore(Chain, dl, DataToCompress, Addr,
                           MachinePointerInfo(), false, false,
                           VT.getScalarSizeInBits()/8);
@@ -16227,15 +17495,21 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
                         MachinePointerInfo(), false, false,
                         VT.getScalarSizeInBits()/8);
   }
+  case TRUNCATE_TO_MEM_VI8:
+    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i8);
+  case TRUNCATE_TO_MEM_VI16:
+    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i16);
+  case TRUNCATE_TO_MEM_VI32:
+    return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
   case EXPAND_FROM_MEM: {
     SDLoc dl(Op);
     SDValue Mask = Op.getOperand(4);
     SDValue PassThru = Op.getOperand(3);
     SDValue Addr = Op.getOperand(2);
     SDValue Chain = Op.getOperand(0);
-    EVT VT = Op.getValueType();
+    MVT VT = Op.getSimpleValueType();
 
-    if (isAllOnes(Mask)) // return just a load
+    if (isAllOnesConstant(Mask)) // return just a load
       return DAG.getLoad(VT, dl, Chain, Addr, MachinePointerInfo(), false, false,
                          false, VT.getScalarSizeInBits()/8);
 
@@ -16359,6 +17633,21 @@ SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
   return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op));
 }
 
+unsigned X86TargetLowering::getExceptionPointerRegister(
+    const Constant *PersonalityFn) const {
+  if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR)
+    return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+
+  return Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX;
+}
+
+unsigned X86TargetLowering::getExceptionSelectorRegister(
+    const Constant *PersonalityFn) const {
+  // Funclet personalities don't use selectors (the runtime does the selection).
+  assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
+  return Subtarget->isTarget64BitLP64() ? X86::RDX : X86::EDX;
+}
+
 SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   SDValue Chain     = Op.getOperand(0);
   SDValue Offset    = Op.getOperand(1);
@@ -16497,9 +17786,11 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
 
         for (FunctionType::param_iterator I = FTy->param_begin(),
              E = FTy->param_end(); I != E; ++I, ++Idx)
-          if (Attrs.hasAttribute(Idx, Attribute::InReg))
+          if (Attrs.hasAttribute(Idx, Attribute::InReg)) {
+            auto &DL = DAG.getDataLayout();
             // FIXME: should only count parameters that are lowered to integers.
-            InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+            InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32;
+          }
 
         if (InRegCount > 2) {
           report_fatal_error("Nest register in use - reduce number of inreg"
@@ -16588,8 +17879,8 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
       DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
 
   MachineMemOperand *MMO =
-   MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
-                           MachineMemOperand::MOStore, 2, 2);
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI),
+                              MachineMemOperand::MOStore, 2, 2);
 
   SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
   SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
@@ -16623,12 +17914,75 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
                       ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
 }
 
-static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
+/// \brief Lower a vector CTLZ using native supported vector CTLZ instruction.
+//
+// 1. i32/i64 128/256-bit vector (native support require VLX) are expended
+//    to 512-bit vector.
+// 2. i8/i16 vector implemented using dword LZCNT vector instruction
+//    ( sub(trunc(lzcnt(zext32(x)))) ). In case zext32(x) is illegal,
+//    split the vector, perform operation on it's Lo a Hi part and
+//    concatenate the results.
+static SDValue LowerVectorCTLZ_AVX512(SDValue Op, SelectionDAG &DAG) {
+  SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
-  EVT OpVT = VT;
+  MVT EltVT = VT.getVectorElementType();
+  unsigned NumElems = VT.getVectorNumElements();
+
+  if (EltVT == MVT::i64 || EltVT == MVT::i32) {
+    // Extend to 512 bit vector.
+    assert((VT.is256BitVector() || VT.is128BitVector()) &&
+              "Unsupported value type for operation");
+
+    MVT NewVT = MVT::getVectorVT(EltVT, 512 / VT.getScalarSizeInBits());
+    SDValue Vec512 = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NewVT,
+                                 DAG.getUNDEF(NewVT),
+                                 Op.getOperand(0),
+                                 DAG.getIntPtrConstant(0, dl));
+    SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Vec512);
+
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CtlzNode,
+                       DAG.getIntPtrConstant(0, dl));
+  }
+
+  assert((EltVT == MVT::i8 || EltVT == MVT::i16) &&
+          "Unsupported element type");
+
+  if (16 < NumElems) {
+    // Split vector, it's Lo and Hi parts will be handled in next iteration.
+    SDValue Lo, Hi;
+    std::tie(Lo, Hi) = DAG.SplitVector(Op.getOperand(0), dl);
+    MVT OutVT = MVT::getVectorVT(EltVT, NumElems/2);
+
+    Lo = DAG.getNode(Op.getOpcode(), dl, OutVT, Lo);
+    Hi = DAG.getNode(Op.getOpcode(), dl, OutVT, Hi);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
+  }
+
+  MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems);
+
+  assert((NewVT.is256BitVector() || NewVT.is512BitVector()) &&
+          "Unsupported value type for operation");
+
+  // Use native supported vector instruction vplzcntd.
+  Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0));
+  SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op);
+  SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode);
+  SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT);
+
+  return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta);
+}
+
+static SDValue LowerCTLZ(SDValue Op, const X86Subtarget *Subtarget,
+                         SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  MVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
   SDLoc dl(Op);
 
+  if (VT.isVector() && Subtarget->hasAVX512())
+    return LowerVectorCTLZ_AVX512(Op, DAG);
+
   Op = Op.getOperand(0);
   if (VT == MVT::i8) {
     // Zero extend to i32 since there is not an i8 bsr.
@@ -16658,7 +18012,8 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
   return Op;
 }
 
-static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, const X86Subtarget *Subtarget,
+                                    SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   EVT OpVT = VT;
   unsigned NumBits = VT.getSizeInBits();
@@ -16686,13 +18041,39 @@ static SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
-  unsigned NumBits = VT.getSizeInBits();
+  unsigned NumBits = VT.getScalarSizeInBits();
   SDLoc dl(Op);
-  Op = Op.getOperand(0);
+
+  if (VT.isVector()) {
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+    SDValue N0 = Op.getOperand(0);
+    SDValue Zero = DAG.getConstant(0, dl, VT);
+
+    // lsb(x) = (x & -x)
+    SDValue LSB = DAG.getNode(ISD::AND, dl, VT, N0,
+                              DAG.getNode(ISD::SUB, dl, VT, Zero, N0));
+
+    // cttz_undef(x) = (width - 1) - ctlz(lsb)
+    if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
+        TLI.isOperationLegal(ISD::CTLZ, VT)) {
+      SDValue WidthMinusOne = DAG.getConstant(NumBits - 1, dl, VT);
+      return DAG.getNode(ISD::SUB, dl, VT, WidthMinusOne,
+                         DAG.getNode(ISD::CTLZ, dl, VT, LSB));
+    }
+
+    // cttz(x) = ctpop(lsb - 1)
+    SDValue One = DAG.getConstant(1, dl, VT);
+    return DAG.getNode(ISD::CTPOP, dl, VT,
+                       DAG.getNode(ISD::SUB, dl, VT, LSB, One));
+  }
+
+  assert(Op.getOpcode() == ISD::CTTZ &&
+         "Only scalar CTTZ requires custom lowering");
 
   // Issue a bsf (scan bits forward) which also sets EFLAGS.
   SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op.getOperand(0));
 
   // If src is zero (i.e. bsf sets ZF), returns NumBits.
   SDValue Ops[] = {
@@ -16753,6 +18134,13 @@ static SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) {
   return Lower256IntArith(Op, DAG);
 }
 
+static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) {
+  assert(Op.getSimpleValueType().is256BitVector() &&
+         Op.getSimpleValueType().isInteger() &&
+         "Only handle AVX 256-bit vector integer operation");
+  return Lower256IntArith(Op, DAG);
+}
+
 static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
                         SelectionDAG &DAG) {
   SDLoc dl(Op);
@@ -16885,7 +18273,7 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
   SDValue AhiBlo = Ahi;
   SDValue AloBhi = Bhi;
   // Bit cast to 32-bit vectors for MULUDQ
-  EVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
+  MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
                                   (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
   A = DAG.getBitcast(MulVT, A);
   B = DAG.getBitcast(MulVT, B);
@@ -16962,7 +18350,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons
 static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
                              SelectionDAG &DAG) {
   SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
-  EVT VT = Op0.getValueType();
+  MVT VT = Op0.getSimpleValueType();
   SDLoc dl(Op);
 
   assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
@@ -17034,7 +18422,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getMergeValues(Ops, dl);
 }
 
-// Return true if the requred (according to Opcode) shift-imm form is natively
+// Return true if the required (according to Opcode) shift-imm form is natively
 // supported by the Subtarget
 static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
                                         unsigned Opcode) {
@@ -17054,14 +18442,14 @@ static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget *Subtarget,
 }
 
 // The shift amount is a variable, but it is the same for all vector lanes.
-// These instrcutions are defined together with shift-immediate.
+// These instructions are defined together with shift-immediate.
 static
 bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget *Subtarget,
                                       unsigned Opcode) {
   return SupportedVectorShiftWithImm(VT, Subtarget, Opcode);
 }
 
-// Return true if the requred (according to Opcode) variable-shift form is
+// Return true if the required (according to Opcode) variable-shift form is
 // natively supported by the Subtarget
 static bool SupportedVectorVarShift(MVT VT, const X86Subtarget *Subtarget,
                                     unsigned Opcode) {
@@ -17133,27 +18521,37 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
 
       // i64 SRA needs to be performed as partial shifts.
       if ((VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
-          Op.getOpcode() == ISD::SRA)
+          Op.getOpcode() == ISD::SRA && !Subtarget->hasXOP())
         return ArithmeticShiftRight64(ShiftAmt);
 
-      if (VT == MVT::v16i8 || (Subtarget->hasInt256() && VT == MVT::v32i8)) {
+      if (VT == MVT::v16i8 ||
+          (Subtarget->hasInt256() && VT == MVT::v32i8) ||
+          VT == MVT::v64i8) {
         unsigned NumElts = VT.getVectorNumElements();
         MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
 
-        if (Op.getOpcode() == ISD::SHL) {
-          // Simple i8 add case
-          if (ShiftAmt == 1)
-            return DAG.getNode(ISD::ADD, dl, VT, R, R);
+        // Simple i8 add case
+        if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1)
+          return DAG.getNode(ISD::ADD, dl, VT, R, R);
 
+        // ashr(R, 7)  === cmp_slt(R, 0)
+        if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
+          SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+          return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
+        }
+
+        // XOP can shift v16i8 directly instead of as shift v8i16 + mask.
+        if (VT == MVT::v16i8 && Subtarget->hasXOP())
+          return SDValue();
+
+        if (Op.getOpcode() == ISD::SHL) {
           // Make a large shift.
           SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT,
                                                    R, ShiftAmt, DAG);
           SHL = DAG.getBitcast(VT, SHL);
           // Zero out the rightmost bits.
-          SmallVector<SDValue, 32> V(
-              NumElts, DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, MVT::i8));
           return DAG.getNode(ISD::AND, dl, VT, SHL,
-                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+                             DAG.getConstant(uint8_t(-1U << ShiftAmt), dl, VT));
         }
         if (Op.getOpcode() == ISD::SRL) {
           // Make a large shift.
@@ -17161,24 +18559,14 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
                                                    R, ShiftAmt, DAG);
           SRL = DAG.getBitcast(VT, SRL);
           // Zero out the leftmost bits.
-          SmallVector<SDValue, 32> V(
-              NumElts, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, MVT::i8));
           return DAG.getNode(ISD::AND, dl, VT, SRL,
-                             DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
+                             DAG.getConstant(uint8_t(-1U) >> ShiftAmt, dl, VT));
         }
         if (Op.getOpcode() == ISD::SRA) {
-          if (ShiftAmt == 7) {
-            // R s>> 7  ===  R s< 0
-            SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
-            return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
-          }
-
-          // R s>> a === ((R u>> a) ^ m) - m
+          // ashr(R, Amt) === sub(xor(lshr(R, Amt), Mask), Mask)
           SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
-          SmallVector<SDValue, 32> V(NumElts,
-                                     DAG.getConstant(128 >> ShiftAmt, dl,
-                                                     MVT::i8));
-          SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
+
+          SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT);
           Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
           Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
           return Res;
@@ -17189,35 +18577,51 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
   }
 
   // Special case in 32-bit mode, where i64 is expanded into high and low parts.
-  if (!Subtarget->is64Bit() &&
-      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
-      Amt.getOpcode() == ISD::BITCAST &&
-      Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+  if (!Subtarget->is64Bit() && !Subtarget->hasXOP() &&
+      (VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64))) {
+
+    // Peek through any splat that was introduced for i64 shift vectorization.
+    int SplatIndex = -1;
+    if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
+      if (SVN->isSplat()) {
+        SplatIndex = SVN->getSplatIndex();
+        Amt = Amt.getOperand(0);
+        assert(SplatIndex < (int)VT.getVectorNumElements() &&
+               "Splat shuffle referencing second operand");
+      }
+
+    if (Amt.getOpcode() != ISD::BITCAST ||
+        Amt.getOperand(0).getOpcode() != ISD::BUILD_VECTOR)
+      return SDValue();
+
     Amt = Amt.getOperand(0);
     unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
                      VT.getVectorNumElements();
     unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
     uint64_t ShiftAmt = 0;
+    unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
     for (unsigned i = 0; i != Ratio; ++i) {
-      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + BaseOp));
       if (!C)
         return SDValue();
       // 6 == Log2(64)
       ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
     }
-    // Check remaining shift amounts.
-    for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
-      uint64_t ShAmt = 0;
-      for (unsigned j = 0; j != Ratio; ++j) {
-        ConstantSDNode *C =
-          dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
-        if (!C)
+
+    // Check remaining shift amounts (if not a splat).
+    if (SplatIndex < 0) {
+      for (unsigned i = Ratio; i != Amt.getNumOperands(); i += Ratio) {
+        uint64_t ShAmt = 0;
+        for (unsigned j = 0; j != Ratio; ++j) {
+          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
+          if (!C)
+            return SDValue();
+          // 6 == Log2(64)
+          ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
+        }
+        if (ShAmt != ShiftAmt)
           return SDValue();
-        // 6 == Log2(64)
-        ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
       }
-      if (ShAmt != ShiftAmt)
-        return SDValue();
     }
 
     if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode()))
@@ -17245,7 +18649,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
 
   if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) {
     SDValue BaseShAmt;
-    EVT EltVT = VT.getVectorElementType();
+    MVT EltVT = VT.getVectorElementType();
 
     if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Amt)) {
       // Check if this build_vector node is doing a splat.
@@ -17262,7 +18666,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
         unsigned SplatIdx = (unsigned)SVN->getSplatIndex();
         SDValue InVec = Amt.getOperand(0);
         if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
-          assert((SplatIdx < InVec.getValueType().getVectorNumElements()) &&
+          assert((SplatIdx < InVec.getSimpleValueType().getVectorNumElements()) &&
                  "Unexpected shuffle index found!");
           BaseShAmt = InVec.getOperand(SplatIdx);
         } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
@@ -17327,11 +18731,26 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     return V;
 
   if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget))
-      return V;
+    return V;
 
   if (SupportedVectorVarShift(VT, Subtarget, Op.getOpcode()))
     return Op;
 
+  // XOP has 128-bit variable logical/arithmetic shifts.
+  // +ve/-ve Amt = shift left/right.
+  if (Subtarget->hasXOP() &&
+      (VT == MVT::v2i64 || VT == MVT::v4i32 ||
+       VT == MVT::v8i16 || VT == MVT::v16i8)) {
+    if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) {
+      SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
+      Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt);
+    }
+    if (Op.getOpcode() == ISD::SHL || Op.getOpcode() == ISD::SRL)
+      return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt);
+    if (Op.getOpcode() == ISD::SRA)
+      return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt);
+  }
+
   // 2i64 vector logical shifts can efficiently avoid scalarization - do the
   // shifts per-lane and then shuffle the partial results back together.
   if (VT == MVT::v2i64 && Op.getOpcode() != ISD::SRA) {
@@ -17343,6 +18762,19 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3});
   }
 
+  // i64 vector arithmetic shift can be emulated with the transform:
+  // M = lshr(SIGN_BIT, Amt)
+  // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M)
+  if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget->hasInt256())) &&
+      Op.getOpcode() == ISD::SRA) {
+    SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT);
+    SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt);
+    R = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
+    R = DAG.getNode(ISD::XOR, dl, VT, R, M);
+    R = DAG.getNode(ISD::SUB, dl, VT, R, M);
+    return R;
+  }
+
   // If possible, lower this packed shift into a vector multiply instead of
   // expanding it into a sequence of scalar shifts.
   // Do this only if the vector shift count is a constant build_vector.
@@ -17351,9 +18783,9 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
        (Subtarget->hasInt256() && VT == MVT::v16i16)) &&
       ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
     SmallVector<SDValue, 8> Elts;
-    EVT SVT = VT.getScalarType();
+    MVT SVT = VT.getVectorElementType();
     unsigned SVTBits = SVT.getSizeInBits();
-    const APInt &One = APInt(SVTBits, 1);
+    APInt One(SVTBits, 1);
     unsigned NumElems = VT.getVectorNumElements();
 
     for (unsigned i=0; i !=NumElems; ++i) {
@@ -17364,7 +18796,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
       }
 
       ConstantSDNode *ND = cast<ConstantSDNode>(Op);
-      const APInt &C = APInt(SVTBits, ND->getAPIntValue().getZExtValue());
+      APInt C(SVTBits, ND->getAPIntValue().getZExtValue());
       uint64_t ShAmt = C.getZExtValue();
       if (ShAmt >= SVTBits) {
         Elts.push_back(DAG.getUNDEF(SVT));
@@ -17443,7 +18875,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
         isa<ConstantSDNode>(Amt2)) {
       // Replace this node with two shifts followed by a MOVSS/MOVSD.
-      EVT CastVT = MVT::v4i32;
+      MVT CastVT = MVT::v4i32;
       SDValue Splat1 =
         DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
       SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
@@ -17507,7 +18939,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
     return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7});
   }
 
-  if (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget->hasInt256())) {
+  if (VT == MVT::v16i8 ||
+      (VT == MVT::v32i8 && Subtarget->hasInt256() && !Subtarget->hasXOP())) {
     MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2);
     unsigned ShiftOpcode = Op->getOpcode();
 
@@ -17627,7 +19060,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
                        DAG.getNode(Op.getOpcode(), dl, ExtVT, R, Amt));
   }
 
-  if (Subtarget->hasInt256() && VT == MVT::v16i16) {
+  if (Subtarget->hasInt256() && !Subtarget->hasXOP() && VT == MVT::v16i16) {
     MVT ExtVT = MVT::v8i32;
     SDValue Z = getZeroVector(VT, Subtarget, DAG, dl);
     SDValue ALo = DAG.getNode(X86ISD::UNPCKL, dl, VT, Amt, Z);
@@ -17710,7 +19143,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
   if (VT.is256BitVector()) {
     unsigned NumElems = VT.getVectorNumElements();
     MVT EltVT = VT.getVectorElementType();
-    EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
+    MVT NewVT = MVT::getVectorVT(EltVT, NumElems/2);
 
     // Extract the two vectors
     SDValue V1 = Extract128BitVector(R, 0, DAG, dl);
@@ -17743,6 +19176,40 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
   return SDValue();
 }
 
+static SDValue LowerRotate(SDValue Op, const X86Subtarget *Subtarget,
+                           SelectionDAG &DAG) {
+  MVT VT = Op.getSimpleValueType();
+  SDLoc DL(Op);
+  SDValue R = Op.getOperand(0);
+  SDValue Amt = Op.getOperand(1);
+
+  assert(VT.isVector() && "Custom lowering only for vector rotates!");
+  assert(Subtarget->hasXOP() && "XOP support required for vector rotates!");
+  assert((Op.getOpcode() == ISD::ROTL) && "Only ROTL supported");
+
+  // XOP has 128-bit vector variable + immediate rotates.
+  // +ve/-ve Amt = rotate left/right.
+
+  // Split 256-bit integers.
+  if (VT.is256BitVector())
+    return Lower256IntArith(Op, DAG);
+
+  assert(VT.is128BitVector() && "Only rotate 128-bit vectors!");
+
+  // Attempt to rotate by immediate.
+  if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) {
+    if (auto *RotateConst = BVAmt->getConstantSplatNode()) {
+      uint64_t RotateAmt = RotateConst->getAPIntValue().getZExtValue();
+      assert(RotateAmt < VT.getScalarSizeInBits() && "Rotation out of range");
+      return DAG.getNode(X86ISD::VPROTI, DL, VT, R,
+                         DAG.getConstant(RotateAmt, DL, MVT::i8));
+    }
+  }
+
+  // Use general rotate by variable (per-element).
+  return DAG.getNode(X86ISD::VPROT, DL, VT, R, Amt);
+}
+
 static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
   // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
@@ -17759,8 +19226,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   case ISD::SADDO:
     // A subtract of one will be selected as a INC. Note that INC doesn't
     // set CF, so we can't do this for UADDO.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
-      if (C->isOne()) {
+    if (isOneConstant(RHS)) {
         BaseOp = X86ISD::INC;
         Cond = X86::COND_O;
         break;
@@ -17775,8 +19241,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   case ISD::SSUBO:
     // A subtract of one will be selected as a DEC. Note that DEC doesn't
     // set CF, so we can't do this for USUBO.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
-      if (C->isOne()) {
+    if (isOneConstant(RHS)) {
         BaseOp = X86ISD::DEC;
         Cond = X86::COND_O;
         break;
@@ -17827,7 +19292,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 /// the corresponding cmpxchg8b or cmpxchg16b instruction is available.
 /// Used to know whether to use cmpxchg8/16b when expanding atomic operations
 /// (otherwise we leave them alone to become __sync_fetch_and_... calls).
-bool X86TargetLowering::needsCmpXchgNb(const Type *MemType) const {
+bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const {
   unsigned OpWidth = MemType->getPrimitiveSizeInBits();
 
   if (OpWidth == 64)
@@ -17844,21 +19309,23 @@ bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
 
 // Note: this turns large loads into lock cmpxchg8b/16b.
 // FIXME: On 32 bits x86, fild/movq might be faster than lock cmpxchg8b.
-bool X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
+TargetLowering::AtomicExpansionKind
+X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   auto PTy = cast<PointerType>(LI->getPointerOperand()->getType());
-  return needsCmpXchgNb(PTy->getElementType());
+  return needsCmpXchgNb(PTy->getElementType()) ? AtomicExpansionKind::CmpXChg
+                                               : AtomicExpansionKind::None;
 }
 
-TargetLoweringBase::AtomicRMWExpansionKind
+TargetLowering::AtomicExpansionKind
 X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
-  const Type *MemType = AI->getType();
+  Type *MemType = AI->getType();
 
   // If the operand is too big, we must see if cmpxchg8/16b is available
   // and default to library calls otherwise.
   if (MemType->getPrimitiveSizeInBits() > NativeWidth) {
-    return needsCmpXchgNb(MemType) ? AtomicRMWExpansionKind::CmpXChg
-                                   : AtomicRMWExpansionKind::None;
+    return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg
+                                   : AtomicExpansionKind::None;
   }
 
   AtomicRMWInst::BinOp Op = AI->getOperation();
@@ -17869,14 +19336,14 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::Add:
   case AtomicRMWInst::Sub:
     // It's better to use xadd, xsub or xchg for these in all cases.
-    return AtomicRMWExpansionKind::None;
+    return AtomicExpansionKind::None;
   case AtomicRMWInst::Or:
   case AtomicRMWInst::And:
   case AtomicRMWInst::Xor:
     // If the atomicrmw's result isn't actually used, we can just add a "lock"
     // prefix to a normal instruction for these operations.
-    return !AI->use_empty() ? AtomicRMWExpansionKind::CmpXChg
-                            : AtomicRMWExpansionKind::None;
+    return !AI->use_empty() ? AtomicExpansionKind::CmpXChg
+                            : AtomicExpansionKind::None;
   case AtomicRMWInst::Nand:
   case AtomicRMWInst::Max:
   case AtomicRMWInst::Min:
@@ -17884,7 +19351,7 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   case AtomicRMWInst::UMin:
     // These always require a non-trivial set of data operations on x86. We must
     // use a cmpxchg loop.
-    return AtomicRMWExpansionKind::CmpXChg;
+    return AtomicExpansionKind::CmpXChg;
   }
 }
 
@@ -17898,7 +19365,7 @@ static bool hasMFENCE(const X86Subtarget& Subtarget) {
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget->is64Bit() ? 64 : 32;
-  const Type *MemType = AI->getType();
+  Type *MemType = AI->getType();
   // Accesses larger than the native width are turned into cmpxchg/libcalls, so
   // there is no benefit in turning such RMWs into loads, and it is actually
   // harmful as it introduces a mfence.
@@ -17926,7 +19393,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   // lowered to just a load without a fence. A mfence flushes the store buffer,
   // making the optimization clearly correct.
   // FIXME: it is required if isAtLeastRelease(Order) but it is not clear
-  // otherwise, we might be able to be more agressive on relaxed idempotent
+  // otherwise, we might be able to be more aggressive on relaxed idempotent
   // rmw. In practice, they do not look useful, so we don't try to be
   // especially clever.
   if (SynchScope == SingleThread)
@@ -18043,7 +19510,7 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
     SDValue InVec = Op->getOperand(0);
     SDLoc dl(Op);
     unsigned NumElts = SrcVT.getVectorNumElements();
-    EVT SVT = SrcVT.getVectorElementType();
+    MVT SVT = SrcVT.getVectorElementType();
 
     // Widen the vector in input in the case of MVT::v2i32.
     // Example: from MVT::v2i32 to MVT::v4i32.
@@ -18103,7 +19570,8 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
   // chunks, thus directly computes the pop count for v2i64 and v4i64.
   if (EltVT == MVT::i64) {
     SDValue Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
-    V = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT, V, Zeros);
+    MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+    V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros);
     return DAG.getBitcast(VT, V);
   }
 
@@ -18119,9 +19587,10 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
 
     // Do the horizontal sums into two v2i64s.
     Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
-    Low = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+    MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64);
+    Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
                       DAG.getBitcast(ByteVecVT, Low), Zeros);
-    High = DAG.getNode(X86ISD::PSADBW, DL, ByteVecVT,
+    High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT,
                        DAG.getBitcast(ByteVecVT, High), Zeros);
 
     // Merge them together.
@@ -18311,7 +19780,7 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget *Subtarget,
 
 static SDValue LowerCTPOP(SDValue Op, const X86Subtarget *Subtarget,
                           SelectionDAG &DAG) {
-  assert(Op.getValueType().isVector() &&
+  assert(Op.getSimpleValueType().isVector() &&
          "We only do custom lowering for vector population count.");
   return LowerVectorCTPOP(Op, Subtarget, DAG);
 }
@@ -18357,7 +19826,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
-  EVT VT = Op.getNode()->getSimpleValueType(0);
+  MVT VT = Op.getNode()->getSimpleValueType(0);
 
   // Let legalize expand this if it isn't a legal type yet.
   if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
@@ -18435,31 +19904,203 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
   return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal);
 }
 
+/// Widen a vector input to a vector of NVT.  The
+/// input vector must have the same element type as NVT.
+static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG,
+                            bool FillWithZeroes = false) {
+  // Check if InOp already has the right width.
+  MVT InVT = InOp.getSimpleValueType();
+  if (InVT == NVT)
+    return InOp;
+
+  if (InOp.isUndef())
+    return DAG.getUNDEF(NVT);
+
+  assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+         "input and widen element type must match");
+
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned WidenNumElts = NVT.getVectorNumElements();
+  assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 &&
+         "Unexpected request for vector widening");
+
+  EVT EltVT = NVT.getVectorElementType();
+
+  SDLoc dl(InOp);
+  if (InOp.getOpcode() == ISD::CONCAT_VECTORS &&
+      InOp.getNumOperands() == 2) {
+    SDValue N1 = InOp.getOperand(1);
+    if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) ||
+        N1.isUndef()) {
+      InOp = InOp.getOperand(0);
+      InVT = InOp.getSimpleValueType();
+      InNumElts = InVT.getVectorNumElements();
+    }
+  }
+  if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) ||
+      ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) {
+    SmallVector<SDValue, 16> Ops;
+    for (unsigned i = 0; i < InNumElts; ++i)
+      Ops.push_back(InOp.getOperand(i));
+
+    SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) :
+      DAG.getUNDEF(EltVT);
+    for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i)
+      Ops.push_back(FillVal);
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
+  }
+  SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) :
+    DAG.getUNDEF(NVT);
+  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal,
+                     InOp, DAG.getIntPtrConstant(0, dl));
+}
+
 static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget,
                              SelectionDAG &DAG) {
   assert(Subtarget->hasAVX512() &&
          "MGATHER/MSCATTER are supported on AVX-512 arch only");
 
+  // X86 scatter kills mask register, so its type should be added to
+  // the list of return values.
+  // If the "scatter" has 2 return values, it is already handled.
+  if (Op.getNode()->getNumValues() == 2)
+    return Op;
+
   MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode());
-  EVT VT = N->getValue().getValueType();
+  SDValue Src = N->getValue();
+  MVT VT = Src.getSimpleValueType();
   assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op");
   SDLoc dl(Op);
 
-  // X86 scatter kills mask register, so its type should be added to
-  // the list of return values
-  if (N->getNumValues() == 1) {
-    SDValue Index = N->getIndex();
-    if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
-        !Index.getValueType().is512BitVector())
+  SDValue NewScatter;
+  SDValue Index = N->getIndex();
+  SDValue Mask = N->getMask();
+  SDValue Chain = N->getChain();
+  SDValue BasePtr = N->getBasePtr();
+  MVT MemVT = N->getMemoryVT().getSimpleVT();
+  MVT IndexVT = Index.getSimpleValueType();
+  MVT MaskVT = Mask.getSimpleValueType();
+
+  if (MemVT.getScalarSizeInBits() < VT.getScalarSizeInBits()) {
+    // The v2i32 value was promoted to v2i64.
+    // Now we "redo" the type legalizer's work and widen the original
+    // v2i32 value to v4i32. The original v2i32 is retrieved from v2i64
+    // with a shuffle.
+    assert((MemVT == MVT::v2i32 && VT == MVT::v2i64) &&
+           "Unexpected memory type");
+    int ShuffleMask[] = {0, 2, -1, -1};
+    Src = DAG.getVectorShuffle(MVT::v4i32, dl, DAG.getBitcast(MVT::v4i32, Src),
+                               DAG.getUNDEF(MVT::v4i32), ShuffleMask);
+    // Now we have 4 elements instead of 2.
+    // Expand the index.
+    MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), 4);
+    Index = ExtendToType(Index, NewIndexVT, DAG);
+
+    // Expand the mask with zeroes
+    // Mask may be <2 x i64> or <2 x i1> at this moment
+    assert((MaskVT == MVT::v2i1 || MaskVT == MVT::v2i64) &&
+           "Unexpected mask type");
+    MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), 4);
+    Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+    VT = MVT::v4i32;
+  }
+
+  unsigned NumElts = VT.getVectorNumElements();
+  if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
+      !Index.getSimpleValueType().is512BitVector()) {
+    // AVX512F supports only 512-bit vectors. Or data or index should
+    // be 512 bit wide. If now the both index and data are 256-bit, but
+    // the vector contains 8 elements, we just sign-extend the index
+    if (IndexVT == MVT::v8i32)
+      // Just extend index
       Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+    else {
+      // The minimal number of elts in scatter is 8
+      NumElts = 8;
+      // Index
+      MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+      // Use original index here, do not modify the index twice
+      Index = ExtendToType(N->getIndex(), NewIndexVT, DAG);
+      if (IndexVT.getScalarType() == MVT::i32)
+        Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
 
-    SDVTList VTs = DAG.getVTList(N->getMask().getValueType(), MVT::Other);
-    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),  N->getOperand(2),
-                      N->getOperand(3), Index };
+      // Mask
+      // At this point we have promoted mask operand
+      assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+      MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+      // Use the original mask here, do not modify the mask twice
+      Mask = ExtendToType(N->getMask(), ExtMaskVT, DAG, true);
 
-    SDValue NewScatter = DAG.getMaskedScatter(VTs, VT, dl, Ops, N->getMemOperand());
-    DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
-    return SDValue(NewScatter.getNode(), 0);
+      // The value that should be stored
+      MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+      Src = ExtendToType(Src, NewVT, DAG);
+    }
+  }
+  // If the mask is "wide" at this point - truncate it to i1 vector
+  MVT BitMaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+  Mask = DAG.getNode(ISD::TRUNCATE, dl, BitMaskVT, Mask);
+
+  // The mask is killed by scatter, add it to the values
+  SDVTList VTs = DAG.getVTList(BitMaskVT, MVT::Other);
+  SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index};
+  NewScatter = DAG.getMaskedScatter(VTs, N->getMemoryVT(), dl, Ops,
+                                    N->getMemOperand());
+  DAG.ReplaceAllUsesWith(Op, SDValue(NewScatter.getNode(), 1));
+  return SDValue(NewScatter.getNode(), 0);
+}
+
+static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget,
+                          SelectionDAG &DAG) {
+
+  MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
+  MVT VT = Op.getSimpleValueType();
+  SDValue Mask = N->getMask();
+  SDLoc dl(Op);
+
+  if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+      !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+    // This operation is legal for targets with VLX, but without
+    // VLX the vector should be widened to 512 bit
+    unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+    MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+    MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+    SDValue Src0 = N->getSrc0();
+    Src0 = ExtendToType(Src0, WideDataVT, DAG);
+    Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+    SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
+                                        N->getBasePtr(), Mask, Src0,
+                                        N->getMemoryVT(), N->getMemOperand(),
+                                        N->getExtensionType());
+
+    SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                 NewLoad.getValue(0),
+                                 DAG.getIntPtrConstant(0, dl));
+    SDValue RetOps[] = {Exract, NewLoad.getValue(1)};
+    return DAG.getMergeValues(RetOps, dl);
+  }
+  return Op;
+}
+
+static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget,
+                           SelectionDAG &DAG) {
+  MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode());
+  SDValue DataToStore = N->getValue();
+  MVT VT = DataToStore.getSimpleValueType();
+  SDValue Mask = N->getMask();
+  SDLoc dl(Op);
+
+  if (Subtarget->hasAVX512() && !Subtarget->hasVLX() &&
+      !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) {
+    // This operation is legal for targets with VLX, but without
+    // VLX the vector should be widened to 512 bit
+    unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+    MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec);
+    MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+    DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
+    Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+    return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
+                              Mask, N->getMemoryVT(), N->getMemOperand(),
+                              N->isTruncatingStore());
   }
   return Op;
 }
@@ -18470,17 +20111,59 @@ static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget,
          "MGATHER/MSCATTER are supported on AVX-512 arch only");
 
   MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode());
-  EVT VT = Op.getValueType();
-  assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
   SDLoc dl(Op);
-
+  MVT VT = Op.getSimpleValueType();
   SDValue Index = N->getIndex();
+  SDValue Mask = N->getMask();
+  SDValue Src0 = N->getValue();
+  MVT IndexVT = Index.getSimpleValueType();
+  MVT MaskVT = Mask.getSimpleValueType();
+
+  unsigned NumElts = VT.getVectorNumElements();
+  assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op");
+
   if (!Subtarget->hasVLX() && !VT.is512BitVector() &&
-      !Index.getValueType().is512BitVector()) {
-    Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
-    SDValue Ops[] = { N->getOperand(0), N->getOperand(1),  N->getOperand(2),
-                      N->getOperand(3), Index };
-    DAG.UpdateNodeOperands(N, Ops);
+      !Index.getSimpleValueType().is512BitVector()) {
+    // AVX512F supports only 512-bit vectors. Or data or index should
+    // be 512 bit wide. If now the both index and data are 256-bit, but
+    // the vector contains 8 elements, we just sign-extend the index
+    if (NumElts == 8) {
+      Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),  N->getOperand(2),
+                        N->getOperand(3), Index };
+      DAG.UpdateNodeOperands(N, Ops);
+      return Op;
+    }
+
+    // Minimal number of elements in Gather
+    NumElts = 8;
+    // Index
+    MVT NewIndexVT = MVT::getVectorVT(IndexVT.getScalarType(), NumElts);
+    Index = ExtendToType(Index, NewIndexVT, DAG);
+    if (IndexVT.getScalarType() == MVT::i32)
+      Index = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i64, Index);
+
+    // Mask
+    MVT MaskBitVT = MVT::getVectorVT(MVT::i1, NumElts);
+    // At this point we have promoted mask operand
+    assert(MaskVT.getScalarSizeInBits() >= 32 && "unexpected mask type");
+    MVT ExtMaskVT = MVT::getVectorVT(MaskVT.getScalarType(), NumElts);
+    Mask = ExtendToType(Mask, ExtMaskVT, DAG, true);
+    Mask = DAG.getNode(ISD::TRUNCATE, dl, MaskBitVT, Mask);
+
+    // The pass-thru value
+    MVT NewVT = MVT::getVectorVT(VT.getScalarType(), NumElts);
+    Src0 = ExtendToType(Src0, NewVT, DAG);
+
+    SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index };
+    SDValue NewGather = DAG.getMaskedGather(DAG.getVTList(NewVT, MVT::Other),
+                                            N->getMemoryVT(), dl, Ops,
+                                            N->getMemOperand());
+    SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                 NewGather.getValue(0),
+                                 DAG.getIntPtrConstant(0, dl));
+    SDValue RetOps[] = {Exract, NewGather.getValue(1)};
+    return DAG.getMergeValues(RetOps, dl);
   }
   return Op;
 }
@@ -18572,6 +20255,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
   case ISD::FGETSIGN:           return LowerFGETSIGN(Op, DAG);
   case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::SETCCE:             return LowerSETCCE(Op, DAG);
   case ISD::SELECT:             return LowerSELECT(Op, DAG);
   case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
@@ -18592,12 +20276,14 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
-  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
-  case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ_ZERO_UNDEF(Op, DAG);
-  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
+  case ISD::CTLZ:               return LowerCTLZ(Op, Subtarget, DAG);
+  case ISD::CTLZ_ZERO_UNDEF:    return LowerCTLZ_ZERO_UNDEF(Op, Subtarget, DAG);
+  case ISD::CTTZ:
+  case ISD::CTTZ_ZERO_UNDEF:    return LowerCTTZ(Op, DAG);
   case ISD::MUL:                return LowerMUL(Op, Subtarget, DAG);
   case ISD::UMUL_LOHI:
   case ISD::SMUL_LOHI:          return LowerMUL_LOHI(Op, Subtarget, DAG);
+  case ISD::ROTL:               return LowerRotate(Op, Subtarget, DAG);
   case ISD::SRA:
   case ISD::SRL:
   case ISD::SHL:                return LowerShift(Op, Subtarget, DAG);
@@ -18615,7 +20301,13 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   case ISD::ADD:                return LowerADD(Op, DAG);
   case ISD::SUB:                return LowerSUB(Op, DAG);
+  case ISD::SMAX:
+  case ISD::SMIN:
+  case ISD::UMAX:
+  case ISD::UMIN:               return LowerMINMAX(Op, DAG);
   case ISD::FSINCOS:            return LowerFSINCOS(Op, Subtarget, DAG);
+  case ISD::MLOAD:              return LowerMLOAD(Op, Subtarget, DAG);
+  case ISD::MSTORE:             return LowerMSTORE(Op, Subtarget, DAG);
   case ISD::MGATHER:            return LowerMGATHER(Op, Subtarget, DAG);
   case ISD::MSCATTER:           return LowerMSCATTER(Op, Subtarget, DAG);
   case ISD::GC_TRANSITION_START:
@@ -18634,14 +20326,43 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   switch (N->getOpcode()) {
   default:
     llvm_unreachable("Do not know how to custom type legalize this operation!");
+  case X86ISD::AVG: {
+    // Legalize types for X86ISD::AVG by expanding vectors.
+    assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+
+    auto InVT = N->getValueType(0);
+    auto InVTSize = InVT.getSizeInBits();
+    const unsigned RegSize =
+        (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
+    assert((!Subtarget->hasAVX512() || RegSize < 512) &&
+           "512-bit vector requires AVX512");
+    assert((!Subtarget->hasAVX2() || RegSize < 256) &&
+           "256-bit vector requires AVX2");
+
+    auto ElemVT = InVT.getVectorElementType();
+    auto RegVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
+                                  RegSize / ElemVT.getSizeInBits());
+    assert(RegSize % InVT.getSizeInBits() == 0);
+    unsigned NumConcat = RegSize / InVT.getSizeInBits();
+
+    SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
+    Ops[0] = N->getOperand(0);
+    SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+    Ops[0] = N->getOperand(1);
+    SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Ops);
+
+    SDValue Res = DAG.getNode(X86ISD::AVG, dl, RegVT, InVec0, InVec1);
+    Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InVT, Res,
+                                  DAG.getIntPtrConstant(0, dl)));
+    return;
+  }
   // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
   case X86ISD::FMINC:
   case X86ISD::FMIN:
   case X86ISD::FMAXC:
   case X86ISD::FMAX: {
     EVT VT = N->getValueType(0);
-    if (VT != MVT::v2f32)
-      llvm_unreachable("Unexpected type (!= v2f32) on FMIN/FMAX.");
+    assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX.");
     SDValue UNDEF = DAG.getUNDEF(VT);
     SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32,
                               N->getOperand(0), UNDEF);
@@ -18668,17 +20389,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   }
   case ISD::FP_TO_SINT:
-    // FP_TO_INT*_IN_MEM is not legal for f16 inputs.  Do not convert
-    // (FP_TO_SINT (load f16)) to FP_TO_INT*.
-    if (N->getOperand(0).getValueType() == MVT::f16)
-      break;
-    // fallthrough
   case ISD::FP_TO_UINT: {
     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
-    if (!IsSigned && !isIntegerTypeFTOL(SDValue(N, 0).getValueType()))
-      return;
-
     std::pair<SDValue,SDValue> Vals =
         FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
     SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -18707,6 +20420,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
                              DAG.getBitcast(MVT::v2i64, VBias));
     Or = DAG.getBitcast(MVT::v2f64, Or);
+    // TODO: Are there any fast-math-flags to propagate here?
     SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias);
     Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub));
     return;
@@ -18740,6 +20454,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
     }
   }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG))
+      Results.push_back(V);
+    return;
+  }
   case ISD::READCYCLECOUNTER: {
     return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
                                    Results);
@@ -18748,7 +20467,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     EVT T = N->getValueType(0);
     assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair");
     bool Regs64bit = T == MVT::i128;
-    EVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
+    MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
     SDValue cpInL, cpInH;
     cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
                         DAG.getConstant(0, dl, HalfT));
@@ -18884,6 +20603,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
+  case X86ISD::IRET:               return "X86ISD::IRET";
   case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
   case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
@@ -18910,6 +20630,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FHADD:              return "X86ISD::FHADD";
   case X86ISD::FHSUB:              return "X86ISD::FHSUB";
   case X86ISD::ABS:                return "X86ISD::ABS";
+  case X86ISD::CONFLICT:           return "X86ISD::CONFLICT";
   case X86ISD::FMAX:               return "X86ISD::FMAX";
   case X86ISD::FMAX_RND:           return "X86ISD::FMAX_RND";
   case X86ISD::FMIN:               return "X86ISD::FMIN";
@@ -18937,12 +20658,14 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VZEXT:              return "X86ISD::VZEXT";
   case X86ISD::VSEXT:              return "X86ISD::VSEXT";
   case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
-  case X86ISD::VTRUNCM:            return "X86ISD::VTRUNCM";
+  case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
+  case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
   case X86ISD::VINSERT:            return "X86ISD::VINSERT";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
   case X86ISD::CVTDQ2PD:           return "X86ISD::CVTDQ2PD";
   case X86ISD::CVTUDQ2PD:          return "X86ISD::CVTUDQ2PD";
+  case X86ISD::CVT2MASK:           return "X86ISD::CVT2MASK";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";
@@ -18978,6 +20701,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::TESTM:              return "X86ISD::TESTM";
   case X86ISD::TESTNM:             return "X86ISD::TESTNM";
   case X86ISD::KORTEST:            return "X86ISD::KORTEST";
+  case X86ISD::KTEST:              return "X86ISD::KTEST";
   case X86ISD::PACKSS:             return "X86ISD::PACKSS";
   case X86ISD::PACKUS:             return "X86ISD::PACKUS";
   case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
@@ -19000,6 +20724,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UNPCKL:             return "X86ISD::UNPCKL";
   case X86ISD::UNPCKH:             return "X86ISD::UNPCKH";
   case X86ISD::VBROADCAST:         return "X86ISD::VBROADCAST";
+  case X86ISD::VBROADCASTM:        return "X86ISD::VBROADCASTM";
   case X86ISD::SUBV_BROADCAST:     return "X86ISD::SUBV_BROADCAST";
   case X86ISD::VEXTRACT:           return "X86ISD::VEXTRACT";
   case X86ISD::VPERMILPV:          return "X86ISD::VPERMILPV";
@@ -19009,11 +20734,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VPERMV3:            return "X86ISD::VPERMV3";
   case X86ISD::VPERMIV3:           return "X86ISD::VPERMIV3";
   case X86ISD::VPERMI:             return "X86ISD::VPERMI";
+  case X86ISD::VPTERNLOG:          return "X86ISD::VPTERNLOG";
   case X86ISD::VFIXUPIMM:          return "X86ISD::VFIXUPIMM";
   case X86ISD::VRANGE:             return "X86ISD::VRANGE";
   case X86ISD::PMULUDQ:            return "X86ISD::PMULUDQ";
   case X86ISD::PMULDQ:             return "X86ISD::PMULDQ";
   case X86ISD::PSADBW:             return "X86ISD::PSADBW";
+  case X86ISD::DBPSADBW:           return "X86ISD::DBPSADBW";
   case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
   case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
   case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
@@ -19022,10 +20749,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SFENCE:             return "X86ISD::SFENCE";
   case X86ISD::LFENCE:             return "X86ISD::LFENCE";
   case X86ISD::SEG_ALLOCA:         return "X86ISD::SEG_ALLOCA";
-  case X86ISD::WIN_FTOL:           return "X86ISD::WIN_FTOL";
   case X86ISD::SAHF:               return "X86ISD::SAHF";
   case X86ISD::RDRAND:             return "X86ISD::RDRAND";
   case X86ISD::RDSEED:             return "X86ISD::RDSEED";
+  case X86ISD::VPMADDUBSW:         return "X86ISD::VPMADDUBSW";
+  case X86ISD::VPMADDWD:           return "X86ISD::VPMADDWD";
+  case X86ISD::VPROT:              return "X86ISD::VPROT";
+  case X86ISD::VPROTI:             return "X86ISD::VPROTI";
+  case X86ISD::VPSHA:              return "X86ISD::VPSHA";
+  case X86ISD::VPSHL:              return "X86ISD::VPSHL";
+  case X86ISD::VPCOM:              return "X86ISD::VPCOM";
+  case X86ISD::VPCOMU:             return "X86ISD::VPCOMU";
   case X86ISD::FMADD:              return "X86ISD::FMADD";
   case X86ISD::FMSUB:              return "X86ISD::FMSUB";
   case X86ISD::FNMADD:             return "X86ISD::FNMADD";
@@ -19038,7 +20772,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FNMSUB_RND:         return "X86ISD::FNMSUB_RND";
   case X86ISD::FMADDSUB_RND:       return "X86ISD::FMADDSUB_RND";
   case X86ISD::FMSUBADD_RND:       return "X86ISD::FMSUBADD_RND";
-  case X86ISD::RNDSCALE:           return "X86ISD::RNDSCALE";
+  case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
+  case X86ISD::VREDUCE:            return "X86ISD::VREDUCE";
+  case X86ISD::VGETMANT:           return "X86ISD::VGETMANT";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
   case X86ISD::XTEST:              return "X86ISD::XTEST";
@@ -19064,6 +20800,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
   case X86ISD::FP_TO_SINT_RND:     return "X86ISD::FP_TO_SINT_RND";
   case X86ISD::FP_TO_UINT_RND:     return "X86ISD::FP_TO_UINT_RND";
+  case X86ISD::VFPCLASS:           return "X86ISD::VFPCLASS";
+  case X86ISD::VFPCLASSS:          return "X86ISD::VFPCLASSS";
   }
   return nullptr;
 }
@@ -19218,7 +20956,7 @@ bool X86TargetLowering::isVectorLoadExtDesirable(SDValue) const { return true; }
 
 bool
 X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
-  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4() || Subtarget->hasAVX512()))
+  if (!Subtarget->hasAnyFMA())
     return false;
 
   VT = VT.getScalarType();
@@ -19253,11 +20991,11 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
     return false;
 
   // Not for i1 vectors
-  if (VT.getScalarType() == MVT::i1)
+  if (VT.getSimpleVT().getScalarType() == MVT::i1)
     return false;
 
   // Very little shuffling can be done for 64-bit vectors right now.
-  if (VT.getSizeInBits() == 64)
+  if (VT.getSimpleVT().getSizeInBits() == 64)
     return false;
 
   // We only care that the types being shuffled are legal. The lowering can
@@ -19282,8 +21020,7 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB,
   DebugLoc DL = MI->getDebugLoc();
 
   const BasicBlock *BB = MBB->getBasicBlock();
-  MachineFunction::iterator I = MBB;
-  ++I;
+  MachineFunction::iterator I = ++MBB->getIterator();
 
   // For the v = xbegin(), we generate
   //
@@ -19531,8 +21268,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(MachineInstr *MI,
     offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
     endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
 
-    MachineFunction::iterator MBBIter = MBB;
-    ++MBBIter;
+    MachineFunction::iterator MBBIter = ++MBB->getIterator();
 
     // Insert the new basic blocks
     MF->insert(MBBIter, offsetMBB);
@@ -19702,8 +21438,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   // stores were performed.
   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
   MachineFunction *F = MBB->getParent();
-  MachineFunction::iterator MBBIter = MBB;
-  ++MBBIter;
+  MachineFunction::iterator MBBIter = ++MBB->getIterator();
   MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
   MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
   F->insert(MBBIter, XMMSaveMBB);
@@ -19727,7 +21462,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
   int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
 
-  if (!Subtarget->isTargetWin64()) {
+  if (!Subtarget->isCallingConvWin64(F->getFunction()->getCallingConv())) {
     // If %al is 0, branch around the XMM save block.
     BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
     BuildMI(MBB, DL, TII->get(X86::JE_1)).addMBB(EndMBB);
@@ -19744,9 +21479,8 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
   // In the XMM save block, save all the XMM argument registers.
   for (int i = 3, e = MI->getNumOperands() - 1; i != e; ++i) {
     int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
-    MachineMemOperand *MMO =
-      F->getMachineMemOperand(
-          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+    MachineMemOperand *MMO = F->getMachineMemOperand(
+        MachinePointerInfo::getFixedStack(*F, RegSaveFrameIndex, Offset),
         MachineMemOperand::MOStore,
         /*Size=*/16, /*Align=*/16);
     BuildMI(XMMSaveMBB, DL, TII->get(MOVOpc))
@@ -19800,6 +21534,39 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr,
   return true;
 }
 
+// Return true if it is OK for this CMOV pseudo-opcode to be cascaded
+// together with other CMOV pseudo-opcodes into a single basic-block with
+// conditional jump around it.
+static bool isCMOVPseudo(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  case X86::CMOV_FR32:
+  case X86::CMOV_FR64:
+  case X86::CMOV_GR8:
+  case X86::CMOV_GR16:
+  case X86::CMOV_GR32:
+  case X86::CMOV_RFP32:
+  case X86::CMOV_RFP64:
+  case X86::CMOV_RFP80:
+  case X86::CMOV_V2F64:
+  case X86::CMOV_V2I64:
+  case X86::CMOV_V4F32:
+  case X86::CMOV_V4F64:
+  case X86::CMOV_V4I64:
+  case X86::CMOV_V16F32:
+  case X86::CMOV_V8F32:
+  case X86::CMOV_V8F64:
+  case X86::CMOV_V8I64:
+  case X86::CMOV_V8I1:
+  case X86::CMOV_V16I1:
+  case X86::CMOV_V32I1:
+  case X86::CMOV_V64I1:
+    return true;
+
+  default:
+    return false;
+  }
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
                                      MachineBasicBlock *BB) const {
@@ -19811,8 +21578,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   // destination vreg to set, the condition code register to branch on, the
   // true/false values to select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
@@ -19823,8 +21589,41 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   MachineBasicBlock *thisMBB = BB;
   MachineFunction *F = BB->getParent();
 
-  // We also lower double CMOVs:
+  // This code lowers all pseudo-CMOV instructions. Generally it lowers these
+  // as described above, by inserting a BB, and then making a PHI at the join
+  // point to select the true and false operands of the CMOV in the PHI.
+  //
+  // The code also handles two different cases of multiple CMOV opcodes
+  // in a row.
+  //
+  // Case 1:
+  // In this case, there are multiple CMOVs in a row, all which are based on
+  // the same condition setting (or the exact opposite condition setting).
+  // In this case we can lower all the CMOVs using a single inserted BB, and
+  // then make a number of PHIs at the join point to model the CMOVs. The only
+  // trickiness here, is that in a case like:
+  //
+  // t2 = CMOV cond1 t1, f1
+  // t3 = CMOV cond1 t2, f2
+  //
+  // when rewriting this into PHIs, we have to perform some renaming on the
+  // temps since you cannot have a PHI operand refer to a PHI result earlier
+  // in the same block.  The "simple" but wrong lowering would be:
+  //
+  // t2 = PHI t1(BB1), f1(BB2)
+  // t3 = PHI t2(BB1), f2(BB2)
+  //
+  // but clearly t2 is not defined in BB1, so that is incorrect. The proper
+  // renaming is to note that on the path through BB1, t2 is really just a
+  // copy of t1, and do that renaming, properly generating:
+  //
+  // t2 = PHI t1(BB1), f1(BB2)
+  // t3 = PHI t1(BB1), f2(BB2)
+  //
+  // Case 2, we lower cascaded CMOVs such as
+  //
   //   (CMOV (CMOV F, T, cc1), T, cc2)
+  //
   // to two successives branches.  For that, we look for another CMOV as the
   // following instruction.
   //
@@ -19890,19 +21689,42 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   // .LBB5_4:
   //         retq
   //
-  MachineInstr *NextCMOV = nullptr;
+  MachineInstr *CascadedCMOV = nullptr;
+  MachineInstr *LastCMOV = MI;
+  X86::CondCode CC = X86::CondCode(MI->getOperand(3).getImm());
+  X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
   MachineBasicBlock::iterator NextMIIt =
       std::next(MachineBasicBlock::iterator(MI));
-  if (NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
+
+  // Check for case 1, where there are multiple CMOVs with the same condition
+  // first.  Of the two cases of multiple CMOV lowerings, case 1 reduces the
+  // number of jumps the most.
+
+  if (isCMOVPseudo(MI)) {
+    // See if we have a string of CMOVS with the same condition.
+    while (NextMIIt != BB->end() &&
+           isCMOVPseudo(NextMIIt) &&
+           (NextMIIt->getOperand(3).getImm() == CC ||
+            NextMIIt->getOperand(3).getImm() == OppCC)) {
+      LastCMOV = &*NextMIIt;
+      ++NextMIIt;
+    }
+  }
+
+  // This checks for case 2, but only do this if we didn't already find
+  // case 1, as indicated by LastCMOV == MI.
+  if (LastCMOV == MI &&
+      NextMIIt != BB->end() && NextMIIt->getOpcode() == MI->getOpcode() &&
       NextMIIt->getOperand(2).getReg() == MI->getOperand(2).getReg() &&
-      NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg())
-    NextCMOV = &*NextMIIt;
+      NextMIIt->getOperand(1).getReg() == MI->getOperand(0).getReg()) {
+    CascadedCMOV = &*NextMIIt;
+  }
 
   MachineBasicBlock *jcc1MBB = nullptr;
 
-  // If we have a double CMOV, we lower it to two successive branches to
+  // If we have a cascaded CMOV, we lower it to two successive branches to
   // the same block.  EFLAGS is used by both, so mark it as live in the second.
-  if (NextCMOV) {
+  if (CascadedCMOV) {
     jcc1MBB = F->CreateMachineBasicBlock(LLVM_BB);
     F->insert(It, jcc1MBB);
     jcc1MBB->addLiveIn(X86::EFLAGS);
@@ -19917,7 +21739,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   // live into the sink and copy blocks.
   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
 
-  MachineInstr *LastEFLAGSUser = NextCMOV ? NextCMOV : MI;
+  MachineInstr *LastEFLAGSUser = CascadedCMOV ? CascadedCMOV : LastCMOV;
   if (!LastEFLAGSUser->killsRegister(X86::EFLAGS) &&
       !checkAndUpdateEFLAGSKill(LastEFLAGSUser, BB, TRI)) {
     copy0MBB->addLiveIn(X86::EFLAGS);
@@ -19926,12 +21748,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
 
   // Transfer the remainder of BB and its successor edges to sinkMBB.
   sinkMBB->splice(sinkMBB->begin(), BB,
-                  std::next(MachineBasicBlock::iterator(MI)), BB->end());
+                  std::next(MachineBasicBlock::iterator(LastCMOV)), BB->end());
   sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
 
   // Add the true and fallthrough blocks as its successors.
-  if (NextCMOV) {
-    // The fallthrough block may be jcc1MBB, if we have a double CMOV.
+  if (CascadedCMOV) {
+    // The fallthrough block may be jcc1MBB, if we have a cascaded CMOV.
     BB->addSuccessor(jcc1MBB);
 
     // In that case, jcc1MBB will itself fallthrough the copy0MBB, and
@@ -19946,13 +21768,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   BB->addSuccessor(sinkMBB);
 
   // Create the conditional branch instruction.
-  unsigned Opc =
-    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+  unsigned Opc = X86::GetCondBranchFromCond(CC);
   BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
 
-  if (NextCMOV) {
+  if (CascadedCMOV) {
     unsigned Opc2 = X86::GetCondBranchFromCond(
-        (X86::CondCode)NextCMOV->getOperand(3).getImm());
+        (X86::CondCode)CascadedCMOV->getOperand(3).getImm());
     BuildMI(jcc1MBB, DL, TII->get(Opc2)).addMBB(sinkMBB);
   }
 
@@ -19964,27 +21785,109 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
   //  sinkMBB:
   //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   //  ...
-  MachineInstrBuilder MIB =
-      BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI),
-              MI->getOperand(0).getReg())
-          .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
-          .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+  MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI);
+  MachineBasicBlock::iterator MIItEnd =
+    std::next(MachineBasicBlock::iterator(LastCMOV));
+  MachineBasicBlock::iterator SinkInsertionPoint = sinkMBB->begin();
+  DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
+  MachineInstrBuilder MIB;
 
-  // If we have a double CMOV, the second Jcc provides the same incoming
+  // As we are creating the PHIs, we have to be careful if there is more than
+  // one.  Later CMOVs may reference the results of earlier CMOVs, but later
+  // PHIs have to reference the individual true/false inputs from earlier PHIs.
+  // That also means that PHI construction must work forward from earlier to
+  // later, and that the code must maintain a mapping from earlier PHI's
+  // destination registers, and the registers that went into the PHI.
+
+  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) {
+    unsigned DestReg = MIIt->getOperand(0).getReg();
+    unsigned Op1Reg = MIIt->getOperand(1).getReg();
+    unsigned Op2Reg = MIIt->getOperand(2).getReg();
+
+    // If this CMOV we are generating is the opposite condition from
+    // the jump we generated, then we have to swap the operands for the
+    // PHI that is going to be generated.
+    if (MIIt->getOperand(3).getImm() == OppCC)
+        std::swap(Op1Reg, Op2Reg);
+
+    if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
+      Op1Reg = RegRewriteTable[Op1Reg].first;
+
+    if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
+      Op2Reg = RegRewriteTable[Op2Reg].second;
+
+    MIB = BuildMI(*sinkMBB, SinkInsertionPoint, DL,
+                  TII->get(X86::PHI), DestReg)
+          .addReg(Op1Reg).addMBB(copy0MBB)
+          .addReg(Op2Reg).addMBB(thisMBB);
+
+    // Add this PHI to the rewrite table.
+    RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
+  }
+
+  // If we have a cascaded CMOV, the second Jcc provides the same incoming
   // value as the first Jcc (the True operand of the SELECT_CC/CMOV nodes).
-  if (NextCMOV) {
+  if (CascadedCMOV) {
     MIB.addReg(MI->getOperand(2).getReg()).addMBB(jcc1MBB);
     // Copy the PHI result to the register defined by the second CMOV.
     BuildMI(*sinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())),
-            DL, TII->get(TargetOpcode::COPY), NextCMOV->getOperand(0).getReg())
+            DL, TII->get(TargetOpcode::COPY),
+            CascadedCMOV->getOperand(0).getReg())
         .addReg(MI->getOperand(0).getReg());
-    NextCMOV->eraseFromParent();
+    CascadedCMOV->eraseFromParent();
   }
 
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  // Now remove the CMOV(s).
+  for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; )
+    (MIIt++)->eraseFromParent();
+
   return sinkMBB;
 }
 
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI,
+                                       MachineBasicBlock *BB) const {
+  // Combine the following atomic floating-point modification pattern:
+  //   a.store(reg OP a.load(acquire), release)
+  // Transform them into:
+  //   OPss (%gpr), %xmm
+  //   movss %xmm, (%gpr)
+  // Or sd equivalent for 64-bit operations.
+  unsigned MOp, FOp;
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("unexpected instr type for EmitLoweredAtomicFP");
+  case X86::RELEASE_FADD32mr: MOp = X86::MOVSSmr; FOp = X86::ADDSSrm; break;
+  case X86::RELEASE_FADD64mr: MOp = X86::MOVSDmr; FOp = X86::ADDSDrm; break;
+  }
+  const X86InstrInfo *TII = Subtarget->getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  MachineOperand MSrc = MI->getOperand(0);
+  unsigned VSrc = MI->getOperand(5).getReg();
+  const MachineOperand &Disp = MI->getOperand(3);
+  MachineOperand ZeroDisp = MachineOperand::CreateImm(0);
+  bool hasDisp = Disp.isGlobal() || Disp.isImm();
+  if (hasDisp && MSrc.isReg())
+    MSrc.setIsKill(false);
+  MachineInstrBuilder MIM = BuildMI(*BB, MI, DL, TII->get(MOp))
+                                .addOperand(/*Base=*/MSrc)
+                                .addImm(/*Scale=*/1)
+                                .addReg(/*Index=*/0)
+                                .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+                                .addReg(0);
+  MachineInstr *MIO = BuildMI(*BB, (MachineInstr *)MIM, DL, TII->get(FOp),
+                              MRI.createVirtualRegister(MRI.getRegClass(VSrc)))
+                          .addReg(VSrc)
+                          .addOperand(/*Base=*/MSrc)
+                          .addImm(/*Scale=*/1)
+                          .addReg(/*Index=*/0)
+                          .addDisp(hasDisp ? Disp : ZeroDisp, /*off=*/0)
+                          .addReg(/*Segment=*/0);
+  MIM.addReg(MIO->getOperand(0).getReg(), RegState::Kill);
+  MI->eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
@@ -20032,8 +21935,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
     sizeVReg = MI->getOperand(1).getReg(),
     physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;
 
-  MachineFunction::iterator MBBIter = BB;
-  ++MBBIter;
+  MachineFunction::iterator MBBIter = ++BB->getIterator();
 
   MF->insert(MBBIter, bumpMBB);
   MF->insert(MBBIter, mallocMBB);
@@ -20120,14 +22022,60 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
 MachineBasicBlock *
 X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                         MachineBasicBlock *BB) const {
+  assert(!Subtarget->isTargetMachO());
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstr *ResumeMI = Subtarget->getFrameLowering()->emitStackProbe(
+      *BB->getParent(), *BB, MI, DL, false);
+  MachineBasicBlock *ResumeBB = ResumeMI->getParent();
+  MI->eraseFromParent(); // The pseudo instruction is gone now.
+  return ResumeBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchRet(MachineInstr *MI,
+                                       MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+  MachineBasicBlock *TargetMBB = MI->getOperand(0).getMBB();
   DebugLoc DL = MI->getDebugLoc();
 
-  assert(!Subtarget->isTargetMachO());
+  assert(!isAsynchronousEHPersonality(
+             classifyEHPersonality(MF->getFunction()->getPersonalityFn())) &&
+         "SEH does not use catchret!");
 
-  Subtarget->getFrameLowering()->emitStackProbeCall(*BB->getParent(), *BB, MI,
-                                                    DL);
+  // Only 32-bit EH needs to worry about manually restoring stack pointers.
+  if (!Subtarget->is32Bit())
+    return BB;
 
-  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  // C++ EH creates a new target block to hold the restore code, and wires up
+  // the new block to the return destination with a normal JMP_4.
+  MachineBasicBlock *RestoreMBB =
+      MF->CreateMachineBasicBlock(BB->getBasicBlock());
+  assert(BB->succ_size() == 1);
+  MF->insert(std::next(BB->getIterator()), RestoreMBB);
+  RestoreMBB->transferSuccessorsAndUpdatePHIs(BB);
+  BB->addSuccessor(RestoreMBB);
+  MI->getOperand(0).setMBB(RestoreMBB);
+
+  auto RestoreMBBI = RestoreMBB->begin();
+  BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::EH_RESTORE));
+  BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredCatchPad(MachineInstr *MI,
+                                       MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const Constant *PerFn = MF->getFunction()->getPersonalityFn();
+  bool IsSEH = isAsynchronousEHPersonality(classifyEHPersonality(PerFn));
+  // Only 32-bit SEH requires special handling for catchpad.
+  if (IsSEH && Subtarget->is32Bit()) {
+    const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+    BuildMI(*BB, MI, DL, TII.get(X86::EH_RESTORE));
+  }
+  MI->eraseFromParent();
   return BB;
 }
 
@@ -20149,6 +22097,8 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
   // FIXME: The 32-bit calls have non-standard calling conventions. Use a
   // proper register mask.
   const uint32_t *RegMask =
+      Subtarget->is64Bit() ?
+      Subtarget->getRegisterInfo()->getDarwinTLSCallPreservedMask() :
       Subtarget->getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
   if (Subtarget->is64Bit()) {
     MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -20198,8 +22148,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   MachineRegisterInfo &MRI = MF->getRegInfo();
 
   const BasicBlock *BB = MBB->getBasicBlock();
-  MachineFunction::iterator I = MBB;
-  ++I;
+  MachineFunction::iterator I = ++MBB->getIterator();
 
   // Memory Reference
   MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -20225,7 +22174,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   // For v = setjmp(buf), we generate
   //
   // thisMBB:
-  //  buf[LabelOffset] = restoreMBB
+  //  buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB
   //  SjLjSetup restoreMBB
   //
   // mainMBB:
@@ -20245,6 +22194,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
   MF->insert(I, mainMBB);
   MF->insert(I, sinkMBB);
   MF->push_back(restoreMBB);
+  restoreMBB->setHasAddressTaken();
 
   MachineInstrBuilder MIB;
 
@@ -20511,35 +22461,44 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
     return BB;
   case X86::WIN_ALLOCA:
     return EmitLoweredWinAlloca(MI, BB);
+  case X86::CATCHRET:
+    return EmitLoweredCatchRet(MI, BB);
+  case X86::CATCHPAD:
+    return EmitLoweredCatchPad(MI, BB);
   case X86::SEG_ALLOCA_32:
   case X86::SEG_ALLOCA_64:
     return EmitLoweredSegAlloca(MI, BB);
   case X86::TLSCall_32:
   case X86::TLSCall_64:
     return EmitLoweredTLSCall(MI, BB);
-  case X86::CMOV_GR8:
   case X86::CMOV_FR32:
   case X86::CMOV_FR64:
-  case X86::CMOV_V4F32:
-  case X86::CMOV_V2F64:
-  case X86::CMOV_V2I64:
-  case X86::CMOV_V8F32:
-  case X86::CMOV_V4F64:
-  case X86::CMOV_V4I64:
-  case X86::CMOV_V16F32:
-  case X86::CMOV_V8F64:
-  case X86::CMOV_V8I64:
+  case X86::CMOV_FR128:
+  case X86::CMOV_GR8:
   case X86::CMOV_GR16:
   case X86::CMOV_GR32:
   case X86::CMOV_RFP32:
   case X86::CMOV_RFP64:
   case X86::CMOV_RFP80:
+  case X86::CMOV_V2F64:
+  case X86::CMOV_V2I64:
+  case X86::CMOV_V4F32:
+  case X86::CMOV_V4F64:
+  case X86::CMOV_V4I64:
+  case X86::CMOV_V16F32:
+  case X86::CMOV_V8F32:
+  case X86::CMOV_V8F64:
+  case X86::CMOV_V8I64:
   case X86::CMOV_V8I1:
   case X86::CMOV_V16I1:
   case X86::CMOV_V32I1:
   case X86::CMOV_V64I1:
     return EmitLoweredSelect(MI, BB);
 
+  case X86::RELEASE_FADD32mr:
+  case X86::RELEASE_FADD64mr:
+    return EmitLoweredAtomicFP(MI, BB);
+
   case X86::FP32_TO_INT16_IN_MEM:
   case X86::FP32_TO_INT32_IN_MEM:
   case X86::FP32_TO_INT64_IN_MEM:
@@ -20793,7 +22752,7 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
   unsigned Depth) const {
   // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
   if (Op.getOpcode() == X86ISD::SETCC_CARRY)
-    return Op.getValueType().getScalarType().getSizeInBits();
+    return Op.getValueType().getScalarSizeInBits();
 
   // Fallback case.
   return 1;
@@ -20814,39 +22773,8 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
   return TargetLowering::isGAPlusOffset(N, GA, Offset);
 }
 
-/// isShuffleHigh128VectorInsertLow - Checks whether the shuffle node is the
-/// same as extracting the high 128-bit part of 256-bit vector and then
-/// inserting the result into the low part of a new 256-bit vector
-static bool isShuffleHigh128VectorInsertLow(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
-  for (unsigned i = 0, j = NumElems/2; i != NumElems/2; ++i, ++j)
-    if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
-        SVOp->getMaskElt(j) >= 0)
-      return false;
-
-  return true;
-}
-
-/// isShuffleLow128VectorInsertHigh - Checks whether the shuffle node is the
-/// same as extracting the low 128-bit part of 256-bit vector and then
-/// inserting the result into the high part of a new 256-bit vector
-static bool isShuffleLow128VectorInsertHigh(ShuffleVectorSDNode *SVOp) {
-  EVT VT = SVOp->getValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
-  for (unsigned i = NumElems/2, j = 0; i != NumElems; ++i, ++j)
-    if (!isUndefOrEqual(SVOp->getMaskElt(i), j) ||
-        SVOp->getMaskElt(j) >= 0)
-      return false;
-
-  return true;
-}
-
 /// PerformShuffleCombine256 - Performs shuffle combines for 256-bit vectors.
+/// FIXME: This could be expanded to support 512 bit vectors as well.
 static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
                                         const X86Subtarget* Subtarget) {
@@ -20854,7 +22782,7 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
   SDValue V1 = SVOp->getOperand(0);
   SDValue V2 = SVOp->getOperand(1);
-  EVT VT = SVOp->getValueType(0);
+  MVT VT = SVOp->getSimpleValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
 
   if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
@@ -20920,24 +22848,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
     return DCI.CombineTo(N, InsV);
   }
 
-  //===--------------------------------------------------------------------===//
-  // Combine some shuffles into subvector extracts and inserts:
-  //
-
-  // vector_shuffle <4, 5, 6, 7, u, u, u, u> or <2, 3, u, u>
-  if (isShuffleHigh128VectorInsertLow(SVOp)) {
-    SDValue V = Extract128BitVector(V1, NumElems/2, DAG, dl);
-    SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, 0, DAG, dl);
-    return DCI.CombineTo(N, InsV);
-  }
-
-  // vector_shuffle <u, u, u, u, 0, 1, 2, 3> or <u, u, 0, 1>
-  if (isShuffleLow128VectorInsertHigh(SVOp)) {
-    SDValue V = Extract128BitVector(V1, 0, DAG, dl);
-    SDValue InsV = Insert128BitVector(DAG.getUNDEF(VT), V, NumElems/2, DAG, dl);
-    return DCI.CombineTo(N, InsV);
-  }
-
   return SDValue();
 }
 
@@ -20966,10 +22876,22 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   MVT RootVT = Root.getSimpleValueType();
   SDLoc DL(Root);
 
-  // Just remove no-op shuffle masks.
   if (Mask.size() == 1) {
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
-                  /*AddTo*/ true);
+    int Index = Mask[0];
+    assert((Index >= 0 || Index == SM_SentinelUndef ||
+            Index == SM_SentinelZero) &&
+           "Invalid shuffle index found!");
+
+    // We may end up with an accumulated mask of size 1 as a result of
+    // widening of shuffle operands (see function canWidenShuffleElements).
+    // If the only shuffle index is equal to SM_SentinelZero then propagate
+    // a zero vector. Otherwise, the combine shuffle mask is a no-op shuffle
+    // mask, and therefore the entire chain of shuffles can be folded away.
+    if (Index == SM_SentinelZero)
+      DCI.CombineTo(Root.getNode(), getZeroVector(RootVT, Subtarget, DAG, DL));
+    else
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+                    /*AddTo*/ true);
     return true;
   }
 
@@ -20985,7 +22907,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   // doesn't preclude something switching to the shorter encoding post-RA.
   //
   // FIXME: Should teach these routines about AVX vector widths.
-  if (FloatDomain && VT.getSizeInBits() == 128) {
+  if (FloatDomain && VT.is128BitVector()) {
     if (Mask.equals({0, 0}) || Mask.equals({1, 1})) {
       bool Lo = Mask.equals({0, 0});
       unsigned Shuffle;
@@ -21049,7 +22971,7 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
   // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK
   // variants as none of these have single-instruction variants that are
   // superior to the UNPCK formulation.
-  if (!FloatDomain && VT.getSizeInBits() == 128 &&
+  if (!FloatDomain && VT.is128BitVector() &&
       (Mask.equals({0, 0, 1, 1, 2, 2, 3, 3}) ||
        Mask.equals({4, 4, 5, 5, 6, 6, 7, 7}) ||
        Mask.equals({0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7}) ||
@@ -21226,26 +23148,28 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
 
   // See if we can recurse into the operand to combine more things.
   switch (Op.getOpcode()) {
-    case X86ISD::PSHUFB:
-      HasPSHUFB = true;
-    case X86ISD::PSHUFD:
-    case X86ISD::PSHUFHW:
-    case X86ISD::PSHUFLW:
-      if (Op.getOperand(0).hasOneUse() &&
-          combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
-                                        HasPSHUFB, DAG, DCI, Subtarget))
-        return true;
-      break;
+  case X86ISD::PSHUFB:
+    HasPSHUFB = true;
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+    if (Op.getOperand(0).hasOneUse() &&
+        combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+                                      HasPSHUFB, DAG, DCI, Subtarget))
+      return true;
+    break;
 
-    case X86ISD::UNPCKL:
-    case X86ISD::UNPCKH:
-      assert(Op.getOperand(0) == Op.getOperand(1) && "We only combine unary shuffles!");
-      // We can't check for single use, we have to check that this shuffle is the only user.
-      if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
-          combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
-                                        HasPSHUFB, DAG, DCI, Subtarget))
-          return true;
-      break;
+  case X86ISD::UNPCKL:
+  case X86ISD::UNPCKH:
+    assert(Op.getOperand(0) == Op.getOperand(1) &&
+           "We only combine unary shuffles!");
+    // We can't check for single use, we have to check that this shuffle is the
+    // only user.
+    if (Op->isOnlyUserOf(Op.getOperand(0).getNode()) &&
+        combineX86ShufflesRecursively(Op.getOperand(0), Root, Mask, Depth + 1,
+                                      HasPSHUFB, DAG, DCI, Subtarget))
+      return true;
+    break;
   }
 
   // Minor canonicalization of the accumulated shuffle mask to make it easier
@@ -21360,8 +23284,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
     case X86ISD::UNPCKH:
       // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword
       // shuffle into a preceding word shuffle.
-      if (V.getSimpleValueType().getScalarType() != MVT::i8 &&
-          V.getSimpleValueType().getScalarType() != MVT::i16)
+      if (V.getSimpleValueType().getVectorElementType() != MVT::i8 &&
+          V.getSimpleValueType().getVectorElementType() != MVT::i16)
         return SDValue();
 
       // Search for a half-shuffle which we can combine with.
@@ -21438,7 +23362,8 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
   return V;
 }
 
-/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw.
+/// \brief Search for a combinable shuffle across a chain ending in pshuflw or
+/// pshufhw.
 ///
 /// We walk up the chain, skipping shuffles of the other half and looking
 /// through shuffles which switch halves trying to find a shuffle of the same
@@ -21520,6 +23445,41 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
     Mask = getPSHUFShuffleMask(N);
     assert(Mask.size() == 4);
     break;
+  case X86ISD::UNPCKL: {
+    // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
+    // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
+    // moves upper half elements into the lower half part. For example:
+    //
+    // t2: v16i8 = vector_shuffle<8,9,10,11,12,13,14,15,u,u,u,u,u,u,u,u> t1,
+    //     undef:v16i8
+    // t3: v16i8 = X86ISD::UNPCKL undef:v16i8, t2
+    //
+    // will be combined to:
+    //
+    // t3: v16i8 = X86ISD::UNPCKH undef:v16i8, t1
+
+    // This is only for 128-bit vectors. From SSE4.1 onward this combine may not
+    // happen due to advanced instructions.
+    if (!VT.is128BitVector())
+      return SDValue();
+
+    auto Op0 = N.getOperand(0);
+    auto Op1 = N.getOperand(1);
+    if (Op0.getOpcode() == ISD::UNDEF &&
+        Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+      ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
+
+      unsigned NumElts = VT.getVectorNumElements();
+      SmallVector<int, 8> ExpectedMask(NumElts, -1);
+      std::iota(ExpectedMask.begin(), ExpectedMask.begin() + NumElts / 2,
+                NumElts / 2);
+
+      auto ShufOp = Op1.getOperand(0);
+      if (isShuffleEquivalent(Op1, ShufOp, Mask, ExpectedMask))
+        return DAG.getNode(X86ISD::UNPCKH, DL, VT, N.getOperand(0), ShufOp);
+    }
+    return SDValue();
+  }
   default:
     return SDValue();
   }
@@ -21535,7 +23495,7 @@ static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG,
     break;
   case X86ISD::PSHUFLW:
   case X86ISD::PSHUFHW:
-    assert(VT.getScalarType() == MVT::i16 && "Bad word shuffle type!");
+    assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!");
 
     if (combineRedundantHalfShuffle(N, Mask, DAG, DCI))
       return SDValue(); // We combined away this shuffle, so we're done.
@@ -21624,14 +23584,19 @@ static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
 
   auto *SVN = cast<ShuffleVectorSDNode>(N);
-  ArrayRef<int> Mask = SVN->getMask();
+  SmallVector<int, 8> Mask;
+  for (int M : SVN->getMask())
+    Mask.push_back(M);
+
   SDValue V1 = N->getOperand(0);
   SDValue V2 = N->getOperand(1);
 
-  // We require the first shuffle operand to be the SUB node, and the second to
-  // be the ADD node.
-  // FIXME: We should support the commuted patterns.
-  if (V1->getOpcode() != ISD::FSUB || V2->getOpcode() != ISD::FADD)
+  // We require the first shuffle operand to be the FSUB node, and the second to
+  // be the FADD node.
+  if (V1.getOpcode() == ISD::FADD && V2.getOpcode() == ISD::FSUB) {
+    ShuffleVectorSDNode::commuteMask(Mask);
+    std::swap(V1, V2);
+  } else if (V1.getOpcode() != ISD::FSUB || V2.getOpcode() != ISD::FADD)
     return SDValue();
 
   // If there are other uses of these operations we can't fold them.
@@ -21682,7 +23647,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
       return AddSub;
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (Subtarget->hasFp256() && VT.is256BitVector() &&
+  if (TLI.isTypeLegal(VT) && Subtarget->hasFp256() && VT.is256BitVector() &&
       N->getOpcode() == ISD::VECTOR_SHUFFLE)
     return PerformShuffleCombine256(N, DAG, DCI, Subtarget);
 
@@ -21866,21 +23831,45 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
                      EltNo);
 }
 
-/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
-/// special and don't usually play with other vector types, it's better to
-/// handle them early to be sure we emit efficient code by avoiding
-/// store-load conversions.
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
-  if (N->getValueType(0) != MVT::x86mmx ||
-      N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
-      N->getOperand(0)->getValueType(0) != MVT::v2i32)
-    return SDValue();
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget *Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
 
-  SDValue V = N->getOperand(0);
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
-  if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
-    return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(V.getOperand(0)),
-                       N->getValueType(0), V.getOperand(0));
+  // Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+  // special and don't usually play with other vector types, it's better to
+  // handle them early to be sure we emit efficient code by avoiding
+  // store-load conversions.
+  if (VT == MVT::x86mmx && N0.getOpcode() == ISD::BUILD_VECTOR &&
+      N0.getValueType() == MVT::v2i32 &&
+      isNullConstant(N0.getOperand(1))) {
+    SDValue N00 = N0->getOperand(0);
+    if (N00.getValueType() == MVT::i32)
+      return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
+  }
+
+  // Convert a bitcasted integer logic operation that has one bitcasted
+  // floating-point operand and one constant operand into a floating-point
+  // logic operation. This may create a load of the constant, but that is
+  // cheaper than materializing the constant in an integer register and
+  // transferring it to an SSE register or transferring the SSE operand to
+  // integer register and back.
+  unsigned FPOpcode;
+  switch (N0.getOpcode()) {
+    case ISD::AND: FPOpcode = X86ISD::FAND; break;
+    case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
+    case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+    default: return SDValue();
+  }
+  if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+       (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+      N0.getOperand(0).getOperand(0).getValueType() == VT) {
+    SDValue N000 = N0.getOperand(0).getOperand(0);
+    SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+    return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+  }
 
   return SDValue();
 }
@@ -21910,26 +23899,26 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
                          InputVector.getNode()->getOperand(0));
 
     // The mmx is indirect: (i64 extract_elt (v1i64 bitcast (x86mmx ...))).
-    SDValue MMXSrcOp = MMXSrc.getOperand(0);
     if (MMXSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT && MMXSrc.hasOneUse() &&
-        MMXSrc.getValueType() == MVT::i64 && MMXSrcOp.hasOneUse() &&
-        MMXSrcOp.getOpcode() == ISD::BITCAST &&
-        MMXSrcOp.getValueType() == MVT::v1i64 &&
-        MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
-      return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
-                         N->getValueType(0),
-                         MMXSrcOp.getOperand(0));
+        MMXSrc.getValueType() == MVT::i64) {
+      SDValue MMXSrcOp = MMXSrc.getOperand(0);
+      if (MMXSrcOp.hasOneUse() && MMXSrcOp.getOpcode() == ISD::BITCAST &&
+          MMXSrcOp.getValueType() == MVT::v1i64 &&
+          MMXSrcOp.getOperand(0).getValueType() == MVT::x86mmx)
+        return DAG.getNode(X86ISD::MMX_MOVD2W, SDLoc(InputVector),
+                           N->getValueType(0), MMXSrcOp.getOperand(0));
+    }
   }
 
   EVT VT = N->getValueType(0);
 
-  if (VT == MVT::i1 && dyn_cast<ConstantSDNode>(N->getOperand(1)) &&
+  if (VT == MVT::i1 && isa<ConstantSDNode>(N->getOperand(1)) &&
       InputVector.getOpcode() == ISD::BITCAST &&
-      dyn_cast<ConstantSDNode>(InputVector.getOperand(0))) {
+      isa<ConstantSDNode>(InputVector.getOperand(0))) {
     uint64_t ExtractedElt =
-	  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+        cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
     uint64_t InputValue =
-	  cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
+        cast<ConstantSDNode>(InputVector.getOperand(0))->getZExtValue();
     uint64_t Res = (InputValue >> ExtractedElt) & 1;
     return DAG.getConstant(Res, dl, MVT::i1);
   }
@@ -22036,96 +24025,6 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-/// \brief Matches a VSELECT onto min/max or return 0 if the node doesn't match.
-static std::pair<unsigned, bool>
-matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
-                   SelectionDAG &DAG, const X86Subtarget *Subtarget) {
-  if (!VT.isVector())
-    return std::make_pair(0, false);
-
-  bool NeedSplit = false;
-  switch (VT.getSimpleVT().SimpleTy) {
-  default: return std::make_pair(0, false);
-  case MVT::v4i64:
-  case MVT::v2i64:
-    if (!Subtarget->hasVLX())
-      return std::make_pair(0, false);
-    break;
-  case MVT::v64i8:
-  case MVT::v32i16:
-    if (!Subtarget->hasBWI())
-      return std::make_pair(0, false);
-    break;
-  case MVT::v16i32:
-  case MVT::v8i64:
-    if (!Subtarget->hasAVX512())
-      return std::make_pair(0, false);
-    break;
-  case MVT::v32i8:
-  case MVT::v16i16:
-  case MVT::v8i32:
-    if (!Subtarget->hasAVX2())
-      NeedSplit = true;
-    if (!Subtarget->hasAVX())
-      return std::make_pair(0, false);
-    break;
-  case MVT::v16i8:
-  case MVT::v8i16:
-  case MVT::v4i32:
-    if (!Subtarget->hasSSE2())
-      return std::make_pair(0, false);
-  }
-
-  // SSE2 has only a small subset of the operations.
-  bool hasUnsigned = Subtarget->hasSSE41() ||
-                     (Subtarget->hasSSE2() && VT == MVT::v16i8);
-  bool hasSigned = Subtarget->hasSSE41() ||
-                   (Subtarget->hasSSE2() && VT == MVT::v8i16);
-
-  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
-  unsigned Opc = 0;
-  // Check for x CC y ? x : y.
-  if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
-      DAG.isEqualTo(RHS, Cond.getOperand(1))) {
-    switch (CC) {
-    default: break;
-    case ISD::SETULT:
-    case ISD::SETULE:
-      Opc = hasUnsigned ? ISD::UMIN : 0; break;
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-      Opc = hasUnsigned ? ISD::UMAX : 0; break;
-    case ISD::SETLT:
-    case ISD::SETLE:
-      Opc = hasSigned ? ISD::SMIN : 0; break;
-    case ISD::SETGT:
-    case ISD::SETGE:
-      Opc = hasSigned ? ISD::SMAX : 0; break;
-    }
-  // Check for x CC y ? y : x -- a min/max with reversed arms.
-  } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
-             DAG.isEqualTo(RHS, Cond.getOperand(0))) {
-    switch (CC) {
-    default: break;
-    case ISD::SETULT:
-    case ISD::SETULE:
-      Opc = hasUnsigned ? ISD::UMAX : 0; break;
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-      Opc = hasUnsigned ? ISD::UMIN : 0; break;
-    case ISD::SETLT:
-    case ISD::SETLE:
-      Opc = hasSigned ? ISD::SMAX : 0; break;
-    case ISD::SETGT:
-    case ISD::SETGE:
-      Opc = hasSigned ? ISD::SMIN : 0; break;
-    }
-  }
-
-  return std::make_pair(Opc, NeedSplit);
-}
-
 static SDValue
 transformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
                                       const X86Subtarget *Subtarget) {
@@ -22189,7 +24088,8 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   // ignored in unsafe-math mode).
   // We also try to create v2f32 min/max nodes, which we later widen to v4f32.
   if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() &&
-      VT != MVT::f80 && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
+      VT != MVT::f80 && VT != MVT::f128 &&
+      (TLI.isTypeLegal(VT) || VT == MVT::v2f32) &&
       (Subtarget->hasSSE2() ||
        (Subtarget->hasSSE1() && VT.getScalarType() == MVT::f32))) {
     ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -22535,32 +24435,6 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Try to match a min/max vector operation.
-  if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC) {
-    std::pair<unsigned, bool> ret = matchIntegerMINMAX(Cond, VT, LHS, RHS, DAG, Subtarget);
-    unsigned Opc = ret.first;
-    bool NeedSplit = ret.second;
-
-    if (Opc && NeedSplit) {
-      unsigned NumElems = VT.getVectorNumElements();
-      // Extract the LHS vectors
-      SDValue LHS1 = Extract128BitVector(LHS, 0, DAG, DL);
-      SDValue LHS2 = Extract128BitVector(LHS, NumElems/2, DAG, DL);
-
-      // Extract the RHS vectors
-      SDValue RHS1 = Extract128BitVector(RHS, 0, DAG, DL);
-      SDValue RHS2 = Extract128BitVector(RHS, NumElems/2, DAG, DL);
-
-      // Create min/max for each subvector
-      LHS = DAG.getNode(Opc, DL, LHS1.getValueType(), LHS1, RHS1);
-      RHS = DAG.getNode(Opc, DL, LHS2.getValueType(), LHS2, RHS2);
-
-      // Merge the result
-      return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LHS, RHS);
-    } else if (Opc)
-      return DAG.getNode(Opc, DL, VT, LHS, RHS);
-  }
-
   // Simplify vector selection if condition value type matches vselect
   // operand type
   if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
@@ -22635,7 +24509,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
   if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
       !DCI.isBeforeLegalize() &&
       !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
-    unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
+    unsigned BitWidth = Cond.getValueType().getScalarSizeInBits();
 
     // Don't optimize vector selects that map to mask-registers.
     if (BitWidth == 1)
@@ -22656,14 +24530,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
     // FIXME: We don't support i16-element blends currently. We could and
     // should support them by making *all* the bits in the condition be set
     // rather than just the high bit and using an i8-element blend.
-    if (VT.getScalarType() == MVT::i16)
+    if (VT.getVectorElementType() == MVT::i16)
       return SDValue();
     // Dynamic blending was only available from SSE4.1 onward.
-    if (VT.getSizeInBits() == 128 && !Subtarget->hasSSE41())
+    if (VT.is128BitVector() && !Subtarget->hasSSE41())
       return SDValue();
     // Byte blends are only available in AVX2
-    if (VT.getSizeInBits() == 256 && VT.getScalarType() == MVT::i8 &&
-        !Subtarget->hasAVX2())
+    if (VT == MVT::v32i8 && !Subtarget->hasAVX2())
       return SDValue();
 
     assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
@@ -22773,12 +24646,9 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
          SetCC.getOpcode() == ISD::AND) {
     if (SetCC.getOpcode() == ISD::AND) {
       int OpIdx = -1;
-      ConstantSDNode *CS;
-      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(0))) &&
-          CS->getZExtValue() == 1)
+      if (isOneConstant(SetCC.getOperand(0)))
         OpIdx = 1;
-      if ((CS = dyn_cast<ConstantSDNode>(SetCC.getOperand(1))) &&
-          CS->getZExtValue() == 1)
+      if (isOneConstant(SetCC.getOperand(1)))
         OpIdx = 0;
       if (OpIdx == -1)
         break;
@@ -22857,8 +24727,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
                                            X86::CondCode &CC1, SDValue &Flags,
                                            bool &isAnd) {
   if (Cond->getOpcode() == X86ISD::CMP) {
-    ConstantSDNode *CondOp1C = dyn_cast<ConstantSDNode>(Cond->getOperand(1));
-    if (!CondOp1C || !CondOp1C->isNullValue())
+    if (!isNullConstant(Cond->getOperand(1)))
       return false;
 
     Cond = Cond->getOperand(0);
@@ -23102,106 +24971,15 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
-                                                const X86Subtarget *Subtarget) {
-  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
-  switch (IntNo) {
-  default: return SDValue();
-  // SSE/AVX/AVX2 blend intrinsics.
-  case Intrinsic::x86_avx2_pblendvb:
-    // Don't try to simplify this intrinsic if we don't have AVX2.
-    if (!Subtarget->hasAVX2())
-      return SDValue();
-    // FALL-THROUGH
-  case Intrinsic::x86_avx_blendv_pd_256:
-  case Intrinsic::x86_avx_blendv_ps_256:
-    // Don't try to simplify this intrinsic if we don't have AVX.
-    if (!Subtarget->hasAVX())
-      return SDValue();
-    // FALL-THROUGH
-  case Intrinsic::x86_sse41_blendvps:
-  case Intrinsic::x86_sse41_blendvpd:
-  case Intrinsic::x86_sse41_pblendvb: {
-    SDValue Op0 = N->getOperand(1);
-    SDValue Op1 = N->getOperand(2);
-    SDValue Mask = N->getOperand(3);
-
-    // Don't try to simplify this intrinsic if we don't have SSE4.1.
-    if (!Subtarget->hasSSE41())
-      return SDValue();
-
-    // fold (blend A, A, Mask) -> A
-    if (Op0 == Op1)
-      return Op0;
-    // fold (blend A, B, allZeros) -> A
-    if (ISD::isBuildVectorAllZeros(Mask.getNode()))
-      return Op0;
-    // fold (blend A, B, allOnes) -> B
-    if (ISD::isBuildVectorAllOnes(Mask.getNode()))
-      return Op1;
-
-    // Simplify the case where the mask is a constant i32 value.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
-      if (C->isNullValue())
-        return Op0;
-      if (C->isAllOnesValue())
-        return Op1;
-    }
-
-    return SDValue();
-  }
-
-  // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
-  case Intrinsic::x86_sse2_psrai_w:
-  case Intrinsic::x86_sse2_psrai_d:
-  case Intrinsic::x86_avx2_psrai_w:
-  case Intrinsic::x86_avx2_psrai_d:
-  case Intrinsic::x86_sse2_psra_w:
-  case Intrinsic::x86_sse2_psra_d:
-  case Intrinsic::x86_avx2_psra_w:
-  case Intrinsic::x86_avx2_psra_d: {
-    SDValue Op0 = N->getOperand(1);
-    SDValue Op1 = N->getOperand(2);
-    EVT VT = Op0.getValueType();
-    assert(VT.isVector() && "Expected a vector type!");
-
-    if (isa<BuildVectorSDNode>(Op1))
-      Op1 = Op1.getOperand(0);
-
-    if (!isa<ConstantSDNode>(Op1))
-      return SDValue();
-
-    EVT SVT = VT.getVectorElementType();
-    unsigned SVTBits = SVT.getSizeInBits();
-
-    ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
-    const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
-    uint64_t ShAmt = C.getZExtValue();
-
-    // Don't try to convert this shift into a ISD::SRA if the shift
-    // count is bigger than or equal to the element size.
-    if (ShAmt >= SVTBits)
-      return SDValue();
-
-    // Trivial case: if the shift count is zero, then fold this
-    // into the first operand.
-    if (ShAmt == 0)
-      return Op0;
-
-    // Replace this packed shift intrinsic with a target independent
-    // shift dag node.
-    SDLoc DL(N);
-    SDValue Splat = DAG.getConstant(C, DL, VT);
-    return DAG.getNode(ISD::SRA, DL, VT, Op0, Splat);
-  }
-  }
-}
-
 /// PerformMulCombine - Optimize a single multiply with constant into two
 /// in order to implement it with two cheaper instructions, e.g.
 /// LEA + SHL, LEA + LEA.
 static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI) {
+  // An imul is usually smaller than the alternative sequence.
+  if (DAG.getMachineFunction().getFunction()->optForMinSize())
+    return SDValue();
+
   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
     return SDValue();
 
@@ -23228,9 +25006,11 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     MulAmt1 = 3;
     MulAmt2 = MulAmt / 3;
   }
+
+  SDLoc DL(N);
+  SDValue NewMul;
   if (MulAmt2 &&
       (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
-    SDLoc DL(N);
 
     if (isPowerOf2_64(MulAmt2) &&
         !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
@@ -23239,7 +25019,6 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
       // is an add.
       std::swap(MulAmt1, MulAmt2);
 
-    SDValue NewMul;
     if (isPowerOf2_64(MulAmt1))
       NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
                            DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
@@ -23253,10 +25032,31 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
     else
       NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
                            DAG.getConstant(MulAmt2, DL, VT));
+  }
 
+  if (!NewMul) {
+    assert(MulAmt != 0 && MulAmt != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX)
+           && "Both cases that could cause potential overflows should have "
+              "already been handled.");
+    if (isPowerOf2_64(MulAmt - 1))
+      // (mul x, 2^N + 1) => (add (shl x, N), x)
+      NewMul = DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0),
+                                DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                                DAG.getConstant(Log2_64(MulAmt - 1), DL,
+                                MVT::i8)));
+
+    else if (isPowerOf2_64(MulAmt + 1))
+      // (mul x, 2^N - 1) => (sub (shl x, N), x)
+      NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getNode(ISD::SHL, DL, VT,
+                                N->getOperand(0),
+                                DAG.getConstant(Log2_64(MulAmt + 1),
+                                DL, MVT::i8)), N->getOperand(0));
+  }
+
+  if (NewMul)
     // Do not add new nodes to DAG combiner worklist.
     DCI.CombineTo(N, NewMul, false);
-  }
+
   return SDValue();
 }
 
@@ -23272,18 +25072,34 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
       N1C && N0.getOpcode() == ISD::AND &&
       N0.getOperand(1).getOpcode() == ISD::Constant) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
-        ((N00.getOpcode() == ISD::ANY_EXTEND ||
-          N00.getOpcode() == ISD::ZERO_EXTEND) &&
-         N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
-      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
-      APInt ShAmt = N1C->getAPIntValue();
-      Mask = Mask.shl(ShAmt);
-      if (Mask != 0) {
-        SDLoc DL(N);
-        return DAG.getNode(ISD::AND, DL, VT,
-                           N00, DAG.getConstant(Mask, DL, VT));
-      }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    APInt ShAmt = N1C->getAPIntValue();
+    Mask = Mask.shl(ShAmt);
+    bool MaskOK = false;
+    // We can handle cases concerning bit-widening nodes containing setcc_c if
+    // we carefully interrogate the mask to make sure we are semantics
+    // preserving.
+    // The transform is not safe if the result of C1 << C2 exceeds the bitwidth
+    // of the underlying setcc_c operation if the setcc_c was zero extended.
+    // Consider the following example:
+    //   zext(setcc_c)                 -> i32 0x0000FFFF
+    //   c1                            -> i32 0x0000FFFF
+    //   c2                            -> i32 0x00000001
+    //   (shl (and (setcc_c), c1), c2) -> i32 0x0001FFFE
+    //   (and setcc_c, (c1 << c2))     -> i32 0x0000FFFE
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
+      MaskOK = true;
+    } else if (N00.getOpcode() == ISD::SIGN_EXTEND &&
+               N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+      MaskOK = true;
+    } else if ((N00.getOpcode() == ISD::ZERO_EXTEND ||
+                N00.getOpcode() == ISD::ANY_EXTEND) &&
+               N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+      MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits());
+    }
+    if (MaskOK && Mask != 0) {
+      SDLoc DL(N);
+      return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT));
     }
   }
 
@@ -23304,6 +25120,59 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static SDValue PerformSRACombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  unsigned Size = VT.getSizeInBits();
+
+  // fold (ashr (shl, a, [56,48,32,24,16]), SarConst)
+  // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or
+  // into (lshr, (sext (a), SarConst - [56,48,32,24,16]))
+  // depending on sign of (SarConst - [56,48,32,24,16])
+
+  // sexts in X86 are MOVs. The MOVs have the same code size
+  // as above SHIFTs (only SHIFT on 1 has lower code size).
+  // However the MOVs have 2 advantages to a SHIFT:
+  // 1. MOVs can write to a register that differs from source
+  // 2. MOVs accept memory operands
+
+  if (!VT.isInteger() || VT.isVector() || N1.getOpcode() != ISD::Constant ||
+      N0.getOpcode() != ISD::SHL || !N0.hasOneUse() ||
+      N0.getOperand(1).getOpcode() != ISD::Constant)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+  APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
+  APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
+  EVT CVT = N1.getValueType();
+
+  if (SarConst.isNegative())
+    return SDValue();
+
+  for (MVT SVT : MVT::integer_valuetypes()) {
+    unsigned ShiftSize = SVT.getSizeInBits();
+    // skipping types without corresponding sext/zext and
+    // ShlConst that is not one of [56,48,32,24,16]
+    if (ShiftSize < 8 || ShiftSize > 64 || ShlConst != Size - ShiftSize)
+      continue;
+    SDLoc DL(N);
+    SDValue NN =
+        DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT));
+    SarConst = SarConst - (Size - ShiftSize);
+    if (SarConst == 0)
+      return NN;
+    else if (SarConst.isNegative())
+      return DAG.getNode(ISD::SHL, DL, VT, NN,
+                         DAG.getConstant(-SarConst, DL, CVT));
+    else
+      return DAG.getNode(ISD::SRA, DL, VT, NN,
+                         DAG.getConstant(SarConst, DL, CVT));
+  }
+  return SDValue();
+}
+
 /// \brief Returns a vector of 0s if the node in input is a vector logical
 /// shift by a constant amount which is known to be bigger than or equal
 /// to the vector element size in bits.
@@ -23321,14 +25190,15 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
   if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt))
     if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
       APInt ShiftAmt = AmtSplat->getAPIntValue();
-      unsigned MaxAmount = VT.getVectorElementType().getSizeInBits();
+      unsigned MaxAmount =
+        VT.getSimpleVT().getVectorElementType().getSizeInBits();
 
       // SSE2/AVX2 logical shifts always return a vector of 0s
       // if the shift amount is bigger than or equal to
       // the element size. The constant shift amount will be
       // encoded as a 8-bit immediate.
       if (ShiftAmt.trunc(8).uge(MaxAmount))
-        return getZeroVector(VT, Subtarget, DAG, DL);
+        return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, DL);
     }
 
   return SDValue();
@@ -23342,6 +25212,10 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
     if (SDValue V = PerformSHLCombine(N, DAG))
       return V;
 
+  if (N->getOpcode() == ISD::SRA)
+    if (SDValue V = PerformSRACombine(N, DAG))
+      return V;
+
   // Try to fold this logical shift into a zero vector.
   if (N->getOpcode() != ISD::SRA)
     if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
@@ -23537,7 +25411,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
   // Set N0 and N1 to hold the inputs to the new wide operation.
   N0 = N0->getOperand(0);
   if (RHSConstSplat) {
-    N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getVectorElementType(),
                      SDValue(RHSConstSplat, 0));
     SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
     N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
@@ -23552,9 +25426,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
   case ISD::ANY_EXTEND:
     return Op;
   case ISD::ZERO_EXTEND: {
-    unsigned InBits = NarrowVT.getScalarType().getSizeInBits();
+    unsigned InBits = NarrowVT.getScalarSizeInBits();
     APInt Mask = APInt::getAllOnesValue(InBits);
-    Mask = Mask.zext(VT.getScalarType().getSizeInBits());
+    Mask = Mask.zext(VT.getScalarSizeInBits());
     return DAG.getNode(ISD::AND, DL, VT,
                        Op, DAG.getConstant(Mask, DL, VT));
   }
@@ -23656,6 +25530,41 @@ static SDValue VectorZextCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getBitcast(N0.getValueType(), NewShuffle);
 }
 
+/// If both input operands of a logic op are being cast from floating point
+/// types, try to convert this into a floating point logic node to avoid
+/// unnecessary moves from SSE to integer registers.
+static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG,
+                                        const X86Subtarget *Subtarget) {
+  unsigned FPOpcode = ISD::DELETED_NODE;
+  if (N->getOpcode() == ISD::AND)
+    FPOpcode = X86ISD::FAND;
+  else if (N->getOpcode() == ISD::OR)
+    FPOpcode = X86ISD::FOR;
+  else if (N->getOpcode() == ISD::XOR)
+    FPOpcode = X86ISD::FXOR;
+
+  assert(FPOpcode != ISD::DELETED_NODE &&
+         "Unexpected input node for FP logic conversion");
+
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDLoc DL(N);
+  if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST &&
+      ((Subtarget->hasSSE1() && VT == MVT::i32) ||
+       (Subtarget->hasSSE2() && VT == MVT::i64))) {
+    SDValue N00 = N0.getOperand(0);
+    SDValue N10 = N1.getOperand(0);
+    EVT N00Type = N00.getValueType();
+    EVT N10Type = N10.getValueType();
+    if (N00Type.isFloatingPoint() && N10Type.isFloatingPoint()) {
+      SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10);
+      return DAG.getBitcast(VT, FPLogic);
+    }
+  }
+  return SDValue();
+}
+
 static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
@@ -23668,6 +25577,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
     return R;
 
+  if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+    return FPLogic;
+
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -23728,6 +25640,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = CMPEQCombine(N, DAG, DCI, Subtarget))
     return R;
 
+  if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+    return FPLogic;
+
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
@@ -23799,7 +25714,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
       if (!Subtarget->hasSSE41())
         return SDValue();
 
-      EVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
+      MVT BlendVT = (VT == MVT::v4i64) ? MVT::v32i8 : MVT::v16i8;
 
       X = DAG.getBitcast(BlendVT, X);
       Y = DAG.getBitcast(BlendVT, Y);
@@ -23813,9 +25728,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
-  MachineFunction &MF = DAG.getMachineFunction();
-  bool OptForSize =
-      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize);
+  bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
 
   // SHLD/SHRD instructions have lower register pressure, but on some
   // platforms they have higher latency than the equivalent
@@ -23913,17 +25826,188 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+// Try to turn tests against the signbit in the form of:
+//   XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
+// into:
+//   SETGT(X, -1)
+static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
+  // This is only worth doing if the output type is i8.
+  if (N->getValueType(0) != MVT::i8)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // We should be performing an xor against a truncated shift.
+  if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse())
+    return SDValue();
+
+  // Make sure we are performing an xor against one.
+  if (!isOneConstant(N1))
+    return SDValue();
+
+  // SetCC on x86 zero extends so only act on this if it's a logical shift.
+  SDValue Shift = N0.getOperand(0);
+  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse())
+    return SDValue();
+
+  // Make sure we are truncating from one of i16, i32 or i64.
+  EVT ShiftTy = Shift.getValueType();
+  if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
+    return SDValue();
+
+  // Make sure the shift amount extracts the sign bit.
+  if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
+      Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+    return SDValue();
+
+  // Create a greater-than comparison against -1.
+  // N.B. Using SETGE against 0 works but we want a canonical looking
+  // comparison, using SETGT matches up with what TranslateX86CC.
+  SDLoc DL(N);
+  SDValue ShiftOp = Shift.getOperand(0);
+  EVT ShiftOpTy = ShiftOp.getValueType();
+  SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp,
+                              DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
+  return Cond;
+}
+
 static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
+    return RV;
+
   if (Subtarget->hasCMov())
     if (SDValue RV = performIntegerAbsCombine(N, DAG))
       return RV;
 
+  if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+    return FPLogic;
+
+  return SDValue();
+}
+
+/// This function detects the AVG pattern between vectors of unsigned i8/i16,
+/// which is c = (a + b + 1) / 2, and replace this operation with the efficient
+/// X86ISD::AVG instruction.
+static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
+                                const X86Subtarget *Subtarget, SDLoc DL) {
+  if (!VT.isVector() || !VT.isSimple())
+    return SDValue();
+  EVT InVT = In.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+
+  EVT ScalarVT = VT.getVectorElementType();
+  if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) &&
+        isPowerOf2_32(NumElems)))
+    return SDValue();
+
+  // InScalarVT is the intermediate type in AVG pattern and it should be greater
+  // than the original input type (i8/i16).
+  EVT InScalarVT = InVT.getVectorElementType();
+  if (InScalarVT.getSizeInBits() <= ScalarVT.getSizeInBits())
+    return SDValue();
+
+  if (Subtarget->hasAVX512()) {
+    if (VT.getSizeInBits() > 512)
+      return SDValue();
+  } else if (Subtarget->hasAVX2()) {
+    if (VT.getSizeInBits() > 256)
+      return SDValue();
+  } else {
+    if (VT.getSizeInBits() > 128)
+      return SDValue();
+  }
+
+  // Detect the following pattern:
+  //
+  //   %1 = zext <N x i8> %a to <N x i32>
+  //   %2 = zext <N x i8> %b to <N x i32>
+  //   %3 = add nuw nsw <N x i32> %1, <i32 1 x N>
+  //   %4 = add nuw nsw <N x i32> %3, %2
+  //   %5 = lshr <N x i32> %N, <i32 1 x N>
+  //   %6 = trunc <N x i32> %5 to <N x i8>
+  //
+  // In AVX512, the last instruction can also be a trunc store.
+
+  if (In.getOpcode() != ISD::SRL)
+    return SDValue();
+
+  // A lambda checking the given SDValue is a constant vector and each element
+  // is in the range [Min, Max].
+  auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) {
+    BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(V);
+    if (!BV || !BV->isConstant())
+      return false;
+    for (unsigned i = 0, e = V.getNumOperands(); i < e; i++) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(i));
+      if (!C)
+        return false;
+      uint64_t Val = C->getZExtValue();
+      if (Val < Min || Val > Max)
+        return false;
+    }
+    return true;
+  };
+
+  // Check if each element of the vector is left-shifted by one.
+  auto LHS = In.getOperand(0);
+  auto RHS = In.getOperand(1);
+  if (!IsConstVectorInRange(RHS, 1, 1))
+    return SDValue();
+  if (LHS.getOpcode() != ISD::ADD)
+    return SDValue();
+
+  // Detect a pattern of a + b + 1 where the order doesn't matter.
+  SDValue Operands[3];
+  Operands[0] = LHS.getOperand(0);
+  Operands[1] = LHS.getOperand(1);
+
+  // Take care of the case when one of the operands is a constant vector whose
+  // element is in the range [1, 256].
+  if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) &&
+      Operands[0].getOpcode() == ISD::ZERO_EXTEND &&
+      Operands[0].getOperand(0).getValueType() == VT) {
+    // The pattern is detected. Subtract one from the constant vector, then
+    // demote it and emit X86ISD::AVG instruction.
+    SDValue One = DAG.getConstant(1, DL, InScalarVT);
+    SDValue Ones = DAG.getNode(ISD::BUILD_VECTOR, DL, InVT,
+                               SmallVector<SDValue, 8>(NumElems, One));
+    Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], Ones);
+    Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]);
+    return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+                       Operands[1]);
+  }
+
+  if (Operands[0].getOpcode() == ISD::ADD)
+    std::swap(Operands[0], Operands[1]);
+  else if (Operands[1].getOpcode() != ISD::ADD)
+    return SDValue();
+  Operands[2] = Operands[1].getOperand(0);
+  Operands[1] = Operands[1].getOperand(1);
+
+  // Now we have three operands of two additions. Check that one of them is a
+  // constant vector with ones, and the other two are promoted from i8/i16.
+  for (int i = 0; i < 3; ++i) {
+    if (!IsConstVectorInRange(Operands[i], 1, 1))
+      continue;
+    std::swap(Operands[i], Operands[2]);
+
+    // Check if Operands[0] and Operands[1] are results of type promotion.
+    for (int j = 0; j < 2; ++j)
+      if (Operands[j].getOpcode() != ISD::ZERO_EXTEND ||
+          Operands[j].getOperand(0).getValueType() != VT)
+        return SDValue();
+
+    // The pattern is detected, emit X86ISD::AVG instruction.
+    return DAG.getNode(X86ISD::AVG, DL, VT, Operands[0].getOperand(0),
+                       Operands[1].getOperand(0));
+  }
+
   return SDValue();
 }
 
@@ -23940,10 +26024,13 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
   // For chips with slow 32-byte unaligned loads, break the 32-byte operation
   // into two 16-byte operations.
   ISD::LoadExtType Ext = Ld->getExtensionType();
+  bool Fast;
+  unsigned AddressSpace = Ld->getAddressSpace();
   unsigned Alignment = Ld->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment >= MemVT.getSizeInBits()/8;
-  if (RegVT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
-      !DCI.isBeforeLegalizeOps() && !IsAligned && Ext == ISD::NON_EXTLOAD) {
+  if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() &&
+      Ext == ISD::NON_EXTLOAD &&
+      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT,
+                             AddressSpace, Alignment, &Fast) && !Fast) {
     unsigned NumElems = RegVT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
@@ -24012,8 +26099,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
       ShuffleVec[i] = i * SizeRatio;
 
     // Can't shuffle using an illegal type.
-    assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
-	    && "WideVecVT should be legal");
+    assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+           "WideVecVT should be legal");
     WideSrc0 = DAG.getVectorShuffle(WideVecVT, dl, WideSrc0,
                                     DAG.getUNDEF(WideVecVT), &ShuffleVec[0]);
   }
@@ -24026,8 +26113,8 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
     SmallVector<int, 16> ShuffleVec(NumElems * SizeRatio, -1);
     for (unsigned i = 0; i != NumElems; ++i)
       ShuffleVec[i] = i * SizeRatio;
-    for (unsigned i = NumElems; i != NumElems*SizeRatio; ++i)
-      ShuffleVec[i] = NumElems*SizeRatio;
+    for (unsigned i = NumElems; i != NumElems * SizeRatio; ++i)
+      ShuffleVec[i] = NumElems * SizeRatio;
     NewMask = DAG.getVectorShuffle(WideVecVT, dl, NewMask,
                                    DAG.getConstant(0, dl, WideVecVT),
                                    &ShuffleVec[0]);
@@ -24055,7 +26142,6 @@ static SDValue PerformMLOADCombine(SDNode *N, SelectionDAG &DAG,
                                      ISD::NON_EXTLOAD);
   SDValue NewVec = DAG.getNode(X86ISD::VSEXT, dl, VT, WideLd);
   return DCI.CombineTo(N, NewVec, WideLd.getValue(1), true);
-
 }
 /// PerformMSTORECombine - Resolve truncating stores
 static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24073,6 +26159,15 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
   unsigned FromSz = VT.getVectorElementType().getSizeInBits();
   unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // The truncating store is legal in some cases. For example
+  // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+  // are designated for truncate store.
+  // In this case we don't need any further transformations.
+  if (TLI.isTruncStoreLegal(VT, StVT))
+    return SDValue();
+
   // From, To sizes and ElemCount must be pow of two
   assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
     "Unexpected size for truncating masked store");
@@ -24096,12 +26191,12 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     ShuffleVec[i] = i * SizeRatio;
 
   // Can't shuffle using an illegal type.
-  assert (DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT)
-	  && "WideVecVT should be legal");
+  assert(DAG.getTargetLoweringInfo().isTypeLegal(WideVecVT) &&
+         "WideVecVT should be legal");
 
   SDValue TruncatedVal = DAG.getVectorShuffle(WideVecVT, dl, WideVec,
-                                        DAG.getUNDEF(WideVecVT),
-                                        &ShuffleVec[0]);
+                                              DAG.getUNDEF(WideVecVT),
+                                              &ShuffleVec[0]);
 
   SDValue NewMask;
   SDValue Mask = Mst->getMask();
@@ -24133,8 +26228,9 @@ static SDValue PerformMSTORECombine(SDNode *N, SelectionDAG &DAG,
     NewMask = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewMaskVT, Ops);
   }
 
-  return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal, Mst->getBasePtr(),
-                            NewMask, StVT, Mst->getMemOperand(), false);
+  return DAG.getMaskedStore(Mst->getChain(), dl, TruncatedVal,
+                            Mst->getBasePtr(), NewMask, StVT,
+                            Mst->getMemOperand(), false);
 }
 /// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
 static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
@@ -24148,10 +26244,12 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
 
   // If we are saving a concatenation of two XMM registers and 32-byte stores
   // are slow, such as on Sandy Bridge, perform two 16-byte stores.
+  bool Fast;
+  unsigned AddressSpace = St->getAddressSpace();
   unsigned Alignment = St->getAlignment();
-  bool IsAligned = Alignment == 0 || Alignment >= VT.getSizeInBits()/8;
-  if (VT.is256BitVector() && Subtarget->isUnalignedMem32Slow() &&
-      StVT == VT && !IsAligned) {
+  if (VT.is256BitVector() && StVT == VT &&
+      TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+                             AddressSpace, Alignment, &Fast) && !Fast) {
     unsigned NumElems = VT.getVectorNumElements();
     if (NumElems < 2)
       return SDValue();
@@ -24178,12 +26276,29 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
   // First, pack all of the elements in one place. Next, store to memory
   // in fewer chunks.
   if (St->isTruncatingStore() && VT.isVector()) {
+    // Check if we can detect an AVG pattern from the truncation. If yes,
+    // replace the trunc store by a normal store with the result of X86ISD::AVG
+    // instruction.
+    SDValue Avg =
+        detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, Subtarget, dl);
+    if (Avg.getNode())
+      return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(),
+                          St->getPointerInfo(), St->isVolatile(),
+                          St->isNonTemporal(), St->getAlignment());
+
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
     unsigned FromSz = VT.getVectorElementType().getSizeInBits();
     unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
 
+    // The truncating store is legal in some cases. For example
+    // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
+    // are designated for truncate store.
+    // In this case we don't need any further transformations.
+    if (TLI.isTruncStoreLegal(VT, StVT))
+      return SDValue();
+
     // From, To sizes and ElemCount must be pow of two
     if (!isPowerOf2_32(NumElems * FromSz * ToSz)) return SDValue();
     // We are going to use the original vector elt for storing.
@@ -24306,7 +26421,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
     // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
     // pair instead.
     if (Subtarget->is64Bit() || F64IsLegal) {
-      EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+      MVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
       SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
                                   Ld->getPointerInfo(), Ld->isVolatile(),
                                   Ld->isNonTemporal(), Ld->isInvariant(),
@@ -24539,8 +26654,234 @@ static SDValue PerformFSUBCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Truncate a group of v4i32 into v16i8/v8i16 using X86ISD::PACKUS.
+static SDValue
+combineVectorTruncationWithPACKUS(SDNode *N, SelectionDAG &DAG,
+                                  SmallVector<SDValue, 8> &Regs) {
+  assert(Regs.size() > 0 && (Regs[0].getValueType() == MVT::v4i32 ||
+                             Regs[0].getValueType() == MVT::v2i64));
+  EVT OutVT = N->getValueType(0);
+  EVT OutSVT = OutVT.getVectorElementType();
+  EVT InVT = Regs[0].getValueType();
+  EVT InSVT = InVT.getVectorElementType();
+  SDLoc DL(N);
+
+  // First, use mask to unset all bits that won't appear in the result.
+  assert((OutSVT == MVT::i8 || OutSVT == MVT::i16) &&
+         "OutSVT can only be either i8 or i16.");
+  SDValue MaskVal =
+      DAG.getConstant(OutSVT == MVT::i8 ? 0xFF : 0xFFFF, DL, InSVT);
+  SDValue MaskVec = DAG.getNode(
+      ISD::BUILD_VECTOR, DL, InVT,
+      SmallVector<SDValue, 8>(InVT.getVectorNumElements(), MaskVal));
+  for (auto &Reg : Regs)
+    Reg = DAG.getNode(ISD::AND, DL, InVT, MaskVec, Reg);
+
+  MVT UnpackedVT, PackedVT;
+  if (OutSVT == MVT::i8) {
+    UnpackedVT = MVT::v8i16;
+    PackedVT = MVT::v16i8;
+  } else {
+    UnpackedVT = MVT::v4i32;
+    PackedVT = MVT::v8i16;
+  }
+
+  // In each iteration, truncate the type by a half size.
+  auto RegNum = Regs.size();
+  for (unsigned j = 1, e = InSVT.getSizeInBits() / OutSVT.getSizeInBits();
+       j < e; j *= 2, RegNum /= 2) {
+    for (unsigned i = 0; i < RegNum; i++)
+      Regs[i] = DAG.getNode(ISD::BITCAST, DL, UnpackedVT, Regs[i]);
+    for (unsigned i = 0; i < RegNum / 2; i++)
+      Regs[i] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[i * 2],
+                            Regs[i * 2 + 1]);
+  }
+
+  // If the type of the result is v8i8, we need do one more X86ISD::PACKUS, and
+  // then extract a subvector as the result since v8i8 is not a legal type.
+  if (OutVT == MVT::v8i8) {
+    Regs[0] = DAG.getNode(X86ISD::PACKUS, DL, PackedVT, Regs[0], Regs[0]);
+    Regs[0] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, Regs[0],
+                          DAG.getIntPtrConstant(0, DL));
+    return Regs[0];
+  } else if (RegNum > 1) {
+    Regs.resize(RegNum);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+  } else
+    return Regs[0];
+}
+
+/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
+static SDValue
+combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
+                                  SmallVector<SDValue, 8> &Regs) {
+  assert(Regs.size() > 0 && Regs[0].getValueType() == MVT::v4i32);
+  EVT OutVT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Shift left by 16 bits, then arithmetic-shift right by 16 bits.
+  SDValue ShAmt = DAG.getConstant(16, DL, MVT::i32);
+  for (auto &Reg : Regs) {
+    Reg = getTargetVShiftNode(X86ISD::VSHLI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+    Reg = getTargetVShiftNode(X86ISD::VSRAI, DL, MVT::v4i32, Reg, ShAmt, DAG);
+  }
+
+  for (unsigned i = 0, e = Regs.size() / 2; i < e; i++)
+    Regs[i] = DAG.getNode(X86ISD::PACKSS, DL, MVT::v8i16, Regs[i * 2],
+                          Regs[i * 2 + 1]);
+
+  if (Regs.size() > 2) {
+    Regs.resize(Regs.size() / 2);
+    return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Regs);
+  } else
+    return Regs[0];
+}
+
+/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
+/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
+/// legalization the truncation will be translated into a BUILD_VECTOR with each
+/// element that is extracted from a vector and then truncated, and it is
+/// diffcult to do this optimization based on them.
+static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
+                                       const X86Subtarget *Subtarget) {
+  EVT OutVT = N->getValueType(0);
+  if (!OutVT.isVector())
+    return SDValue();
+
+  SDValue In = N->getOperand(0);
+  if (!In.getValueType().isSimple())
+    return SDValue();
+
+  EVT InVT = In.getValueType();
+  unsigned NumElems = OutVT.getVectorNumElements();
+
+  // TODO: On AVX2, the behavior of X86ISD::PACKUS is different from that on
+  // SSE2, and we need to take care of it specially.
+  // AVX512 provides vpmovdb.
+  if (!Subtarget->hasSSE2() || Subtarget->hasAVX2())
+    return SDValue();
+
+  EVT OutSVT = OutVT.getVectorElementType();
+  EVT InSVT = InVT.getVectorElementType();
+  if (!((InSVT == MVT::i32 || InSVT == MVT::i64) &&
+        (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
+        NumElems >= 8))
+    return SDValue();
+
+  // SSSE3's pshufb results in less instructions in the cases below.
+  if (Subtarget->hasSSSE3() && NumElems == 8 &&
+      ((OutSVT == MVT::i8 && InSVT != MVT::i64) ||
+       (InSVT == MVT::i32 && OutSVT == MVT::i16)))
+    return SDValue();
+
+  SDLoc DL(N);
+
+  // Split a long vector into vectors of legal type.
+  unsigned RegNum = InVT.getSizeInBits() / 128;
+  SmallVector<SDValue, 8> SubVec(RegNum);
+  if (InSVT == MVT::i32) {
+    for (unsigned i = 0; i < RegNum; i++)
+      SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
+                              DAG.getIntPtrConstant(i * 4, DL));
+  } else {
+    for (unsigned i = 0; i < RegNum; i++)
+      SubVec[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i64, In,
+                              DAG.getIntPtrConstant(i * 2, DL));
+  }
+
+  // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PAKCUS
+  // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
+  // truncate 2 x v4i32 to v8i16.
+  if (Subtarget->hasSSE41() || OutSVT == MVT::i8)
+    return combineVectorTruncationWithPACKUS(N, DAG, SubVec);
+  else if (InSVT == MVT::i32)
+    return combineVectorTruncationWithPACKSS(N, DAG, SubVec);
+  else
+    return SDValue();
+}
+
+static SDValue PerformTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
+                                      const X86Subtarget *Subtarget) {
+  // Try to detect AVG pattern first.
+  SDValue Avg = detectAVGPattern(N->getOperand(0), N->getValueType(0), DAG,
+                                 Subtarget, SDLoc(N));
+  if (Avg.getNode())
+    return Avg;
+
+  return combineVectorTruncation(N, DAG, Subtarget);
+}
+
+/// Do target-specific dag combines on floating point negations.
+static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,
+                                  const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  EVT SVT = VT.getScalarType();
+  SDValue Arg = N->getOperand(0);
+  SDLoc DL(N);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  // If we're negating a FMUL node on a target with FMA, then we can avoid the
+  // use of a constant by performing (-0 - A*B) instead.
+  // FIXME: Check rounding control flags as well once it becomes available.
+  if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
+      Arg->getFlags()->hasNoSignedZeros() && Subtarget->hasAnyFMA()) {
+    SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
+    return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+                       Arg.getOperand(1), Zero);
+  }
+
+  // If we're negating a FMA node, then we can adjust the
+  // instruction to include the extra negation.
+  if (Arg.hasOneUse()) {
+    switch (Arg.getOpcode()) {
+    case X86ISD::FMADD:
+      return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+                         Arg.getOperand(1), Arg.getOperand(2));
+    case X86ISD::FMSUB:
+      return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
+                         Arg.getOperand(1), Arg.getOperand(2));
+    case X86ISD::FNMADD:
+      return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
+                         Arg.getOperand(1), Arg.getOperand(2));
+    case X86ISD::FNMSUB:
+      return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
+                         Arg.getOperand(1), Arg.getOperand(2));
+    }
+  }
+  return SDValue();
+}
+
+static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
+                              const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (VT.is512BitVector() && !Subtarget->hasDQI()) {
+    // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
+    // These logic operations may be executed in the integer domain.
+    SDLoc dl(N);
+    MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
+    MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+    SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1));
+    unsigned IntOpcode = 0;
+    switch (N->getOpcode()) {
+      default: llvm_unreachable("Unexpected FP logic op");
+      case X86ISD::FOR: IntOpcode = ISD::OR; break;
+      case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+      case X86ISD::FAND: IntOpcode = ISD::AND; break;
+      case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+    }
+    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
+    return  DAG.getNode(ISD::BITCAST, dl, VT, IntOp);
+  }
+  return SDValue();
+}
 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
-static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,
+                                 const X86Subtarget *Subtarget) {
   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
 
   // F[X]OR(0.0, x) -> x
@@ -24552,7 +26893,8 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
     if (C->getValueAPF().isPosZero())
       return N->getOperand(0);
-  return SDValue();
+
+  return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
 
 /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes.
@@ -24576,8 +26918,65 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {
                      N->getOperand(0), N->getOperand(1));
 }
 
+static SDValue performFMinNumFMaxNumCombine(SDNode *N, SelectionDAG &DAG,
+                                            const X86Subtarget *Subtarget) {
+  if (Subtarget->useSoftFloat())
+    return SDValue();
+
+  // TODO: Check for global or instruction-level "nnan". In that case, we
+  //       should be able to lower to FMAX/FMIN alone.
+  // TODO: If an operand is already known to be a NaN or not a NaN, this
+  //       should be an optional swap and FMAX/FMIN.
+
+  EVT VT = N->getValueType(0);
+  if (!((Subtarget->hasSSE1() && (VT == MVT::f32 || VT == MVT::v4f32)) ||
+        (Subtarget->hasSSE2() && (VT == MVT::f64 || VT == MVT::v2f64)) ||
+        (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))))
+    return SDValue();
+
+  // This takes at least 3 instructions, so favor a library call when operating
+  // on a scalar and minimizing code size.
+  if (!VT.isVector() && DAG.getMachineFunction().getFunction()->optForMinSize())
+    return SDValue();
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  SDLoc DL(N);
+  EVT SetCCType = DAG.getTargetLoweringInfo().getSetCCResultType(
+      DAG.getDataLayout(), *DAG.getContext(), VT);
+
+  // There are 4 possibilities involving NaN inputs, and these are the required
+  // outputs:
+  //                   Op1
+  //               Num     NaN
+  //            ----------------
+  //       Num  |  Max  |  Op0 |
+  // Op0        ----------------
+  //       NaN  |  Op1  |  NaN |
+  //            ----------------
+  //
+  // The SSE FP max/min instructions were not designed for this case, but rather
+  // to implement:
+  //   Min = Op1 < Op0 ? Op1 : Op0
+  //   Max = Op1 > Op0 ? Op1 : Op0
+  //
+  // So they always return Op0 if either input is a NaN. However, we can still
+  // use those instructions for fmaxnum by selecting away a NaN input.
+
+  // If either operand is NaN, the 2nd source operand (Op0) is passed through.
+  auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN;
+  SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
+  SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType , Op0, Op0, ISD::SETUO);
+
+  // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
+  // are NaN, the NaN value of Op1 is the result.
+  auto SelectOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
+  return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
+}
+
 /// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG,
+                                  const X86Subtarget *Subtarget) {
   // FAND(0.0, x) -> 0.0
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
@@ -24588,11 +26987,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
 
-  return SDValue();
+  return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
 
 /// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
   // FANDN(0.0, x) -> x
   if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
     if (C->getValueAPF().isPosZero())
@@ -24603,7 +27003,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {
     if (C->getValueAPF().isPosZero())
       return N->getOperand(1);
 
-  return SDValue();
+  return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
 
 static SDValue PerformBTCombine(SDNode *N,
@@ -24673,6 +27073,57 @@ static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
+/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities
+/// to combine math ops, use an LEA, or use a complex addressing mode. This can
+/// eliminate extend, add, and shift instructions.
+static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
+                                       const X86Subtarget *Subtarget) {
+  // TODO: This should be valid for other integer types.
+  EVT VT = Sext->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  // We need an 'add nsw' feeding into the 'sext'.
+  SDValue Add = Sext->getOperand(0);
+  if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap())
+    return SDValue();
+
+  // Having a constant operand to the 'add' ensures that we are not increasing
+  // the instruction count because the constant is extended for free below.
+  // A constant operand can also become the displacement field of an LEA.
+  auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1));
+  if (!AddOp1)
+    return SDValue();
+
+  // Don't make the 'add' bigger if there's no hope of combining it with some
+  // other 'add' or 'shl' instruction.
+  // TODO: It may be profitable to generate simpler LEA instructions in place
+  // of single 'add' instructions, but the cost model for selecting an LEA
+  // currently has a high threshold.
+  bool HasLEAPotential = false;
+  for (auto *User : Sext->uses()) {
+    if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
+      HasLEAPotential = true;
+      break;
+    }
+  }
+  if (!HasLEAPotential)
+    return SDValue();
+
+  // Everything looks good, so pull the 'sext' ahead of the 'add'.
+  int64_t AddConstant = AddOp1->getSExtValue();
+  SDValue AddOp0 = Add.getOperand(0);
+  SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0);
+  SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
+
+  // The wider add is guaranteed to not wrap because both operands are
+  // sign-extended.
+  SDNodeFlags Flags;
+  Flags.setNoSignedWrap(true);
+  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
+}
+
 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
@@ -24763,13 +27214,13 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  if (!Subtarget->hasFp256())
-    return SDValue();
-
-  if (VT.isVector() && VT.getSizeInBits() == 256)
+  if (Subtarget->hasAVX() && VT.is256BitVector())
     if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
 
+  if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget))
+    return NewAdd;
+
   return SDValue();
 }
 
@@ -24783,9 +27234,7 @@ static SDValue PerformFMACombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   EVT ScalarVT = VT.getScalarType();
-  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) ||
-      (!Subtarget->hasFMA() && !Subtarget->hasFMA4() &&
-       !Subtarget->hasAVX512()))
+  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget->hasAnyFMA())
     return SDValue();
 
   SDValue A = N->getOperand(0);
@@ -24830,8 +27279,7 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
       N0.getOperand(0).hasOneUse()) {
     SDValue N00 = N0.getOperand(0);
     if (N00.getOpcode() == X86ISD::SETCC_CARRY) {
-      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
-      if (!C || C->getZExtValue() != 1)
+      if (!isOneConstant(N0.getOperand(1)))
         return SDValue();
       return DAG.getNode(ISD::AND, dl, VT,
                          DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
@@ -24884,21 +27332,19 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
   SDLoc DL(N);
 
   if ((CC == ISD::SETNE || CC == ISD::SETEQ) && LHS.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(LHS.getOperand(0)))
-      if (C->getAPIntValue() == 0 && LHS.hasOneUse()) {
-        SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
-                                   LHS.getOperand(1));
-        return DAG.getSetCC(DL, N->getValueType(0), addV,
-                            DAG.getConstant(0, DL, addV.getValueType()), CC);
-      }
+    if (isNullConstant(LHS.getOperand(0)) && LHS.hasOneUse()) {
+      SDValue addV = DAG.getNode(ISD::ADD, DL, LHS.getValueType(), RHS,
+                                 LHS.getOperand(1));
+      return DAG.getSetCC(DL, N->getValueType(0), addV,
+                          DAG.getConstant(0, DL, addV.getValueType()), CC);
+    }
   if ((CC == ISD::SETNE || CC == ISD::SETEQ) && RHS.getOpcode() == ISD::SUB)
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS.getOperand(0)))
-      if (C->getAPIntValue() == 0 && RHS.hasOneUse()) {
-        SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
-                                   RHS.getOperand(1));
-        return DAG.getSetCC(DL, N->getValueType(0), addV,
-                            DAG.getConstant(0, DL, addV.getValueType()), CC);
-      }
+    if (isNullConstant(RHS.getOperand(0)) && RHS.hasOneUse()) {
+      SDValue addV = DAG.getNode(ISD::ADD, DL, RHS.getValueType(), LHS,
+                                 RHS.getOperand(1));
+      return DAG.getSetCC(DL, N->getValueType(0), addV,
+                          DAG.getConstant(0, DL, addV.getValueType()), CC);
+    }
 
   if (VT.getScalarType() == MVT::i1 &&
       (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) {
@@ -24936,52 +27382,6 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
-                                         SelectionDAG &DAG) {
-  SDLoc dl(Load);
-  MVT VT = Load->getSimpleValueType(0);
-  MVT EVT = VT.getVectorElementType();
-  SDValue Addr = Load->getOperand(1);
-  SDValue NewAddr = DAG.getNode(
-      ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
-      DAG.getConstant(Index * EVT.getStoreSize(), dl,
-                      Addr.getSimpleValueType()));
-
-  SDValue NewLoad =
-      DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
-                  DAG.getMachineFunction().getMachineMemOperand(
-                      Load->getMemOperand(), 0, EVT.getStoreSize()));
-  return NewLoad;
-}
-
-static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
-                                      const X86Subtarget *Subtarget) {
-  SDLoc dl(N);
-  MVT VT = N->getOperand(1)->getSimpleValueType(0);
-  assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
-         "X86insertps is only defined for v4x32");
-
-  SDValue Ld = N->getOperand(1);
-  if (MayFoldLoad(Ld)) {
-    // Extract the countS bits from the immediate so we can get the proper
-    // address when narrowing the vector load to a specific element.
-    // When the second source op is a memory address, insertps doesn't use
-    // countS and just gets an f32 from that address.
-    unsigned DestIndex =
-        cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
-
-    Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
-
-    // Create this as a scalar to vector to match the instruction pattern.
-    SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
-    // countS bits are ignored when loading from memory on insertps, which
-    // means we don't need to explicitly set them to 0.
-    return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
-                       LoadScalarToVector, N->getOperand(2));
-  }
-  return SDValue();
-}
-
 static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
   SDValue V0 = N->getOperand(0);
   SDValue V1 = N->getOperand(1);
@@ -25008,6 +27408,20 @@ static SDValue PerformBLENDICombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
+static SDValue PerformGatherScatterCombine(SDNode *N, SelectionDAG &DAG) {
+  SDLoc DL(N);
+  // Gather and Scatter instructions use k-registers for masks. The type of
+  // the masks is v*i1. So the mask will be truncated anyway.
+  // The SIGN_EXTEND_INREG my be dropped.
+  SDValue Mask = N->getOperand(2);
+  if (Mask.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+    SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
+    NewOps[2] = Mask.getOperand(0);
+    DAG.UpdateNodeOperands(N, NewOps);
+  }
+  return SDValue();
+}
+
 // Helper function of PerformSETCCCombine. It is to materialize "setb reg"
 // as "sbb reg,reg", since it can be extended without zext and produces
 // an all-ones bit which is more useful than 0/1 in some cases.
@@ -25182,7 +27596,7 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
 
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
-  if (Op0.getOpcode() == ISD::LOAD) {
+  if (!Subtarget->useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
     LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode());
     EVT LdVT = Ld->getValueType(0);
 
@@ -25357,15 +27771,14 @@ static SDValue performVZEXTCombine(SDNode *N, SelectionDAG &DAG,
   }
 
   // Check if we can bypass extracting and re-inserting an element of an input
-  // vector. Essentialy:
+  // vector. Essentially:
   // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
   if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
       V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
       V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
     SDValue ExtractedV = V.getOperand(0);
     SDValue OrigV = ExtractedV.getOperand(0);
-    if (auto *ExtractIdx = dyn_cast<ConstantSDNode>(ExtractedV.getOperand(1)))
-      if (ExtractIdx->getZExtValue() == 0) {
+    if (isNullConstant(ExtractedV.getOperand(1))) {
         MVT OrigVT = OrigV.getSimpleValueType();
         // Extract a subvector if necessary...
         if (OrigVT.getSizeInBits() > OpVT.getSizeInBits()) {
@@ -25394,7 +27807,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SELECT:
   case X86ISD::SHRUNKBLEND:
     return PerformSELECTCombine(N, DAG, DCI, Subtarget);
-  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG);
+  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
   case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);
@@ -25414,12 +27827,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::UINT_TO_FP:     return PerformUINT_TO_FPCombine(N, DAG, Subtarget);
   case ISD::FADD:           return PerformFADDCombine(N, DAG, Subtarget);
   case ISD::FSUB:           return PerformFSUBCombine(N, DAG, Subtarget);
+  case ISD::FNEG:           return PerformFNEGCombine(N, DAG, Subtarget);
+  case ISD::TRUNCATE:       return PerformTRUNCATECombine(N, DAG, Subtarget);
   case X86ISD::FXOR:
-  case X86ISD::FOR:         return PerformFORCombine(N, DAG);
+  case X86ISD::FOR:         return PerformFORCombine(N, DAG, Subtarget);
   case X86ISD::FMIN:
   case X86ISD::FMAX:        return PerformFMinFMaxCombine(N, DAG);
-  case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
-  case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG);
+  case ISD::FMINNUM:
+  case ISD::FMAXNUM:        return performFMinNumFMaxNumCombine(N, DAG,
+                                                                Subtarget);
+  case X86ISD::FAND:        return PerformFANDCombine(N, DAG, Subtarget);
+  case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG, Subtarget);
   case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
   case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
   case ISD::ANY_EXTEND:
@@ -25447,14 +27865,9 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERM2X128:
   case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
   case ISD::FMA:            return PerformFMACombine(N, DAG, Subtarget);
-  case ISD::INTRINSIC_WO_CHAIN:
-    return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
-  case X86ISD::INSERTPS: {
-    if (getTargetMachine().getOptLevel() > CodeGenOpt::None)
-      return PerformINSERTPSCombine(N, DAG, Subtarget);
-    break;
-  }
   case X86ISD::BLENDI:    return PerformBLENDICombine(N, DAG);
+  case ISD::MGATHER:
+  case ISD::MSCATTER:       return PerformGatherScatterCombine(N, DAG);
   }
 
   return SDValue();
@@ -26084,6 +28497,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case MVT::f64:
       case MVT::i64:
         return std::make_pair(0U, &X86::FR64RegClass);
+      // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
       // Vector types.
       case MVT::v16i8:
       case MVT::v8i16:
@@ -26168,17 +28582,13 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   if (Class == &X86::GR8RegClass || Class == &X86::GR16RegClass ||
       Class == &X86::GR32RegClass || Class == &X86::GR64RegClass) {
     unsigned Size = VT.getSizeInBits();
-    MVT::SimpleValueType SimpleTy = Size == 1 || Size == 8 ? MVT::i8
-                                  : Size == 16 ? MVT::i16
-                                  : Size == 32 ? MVT::i32
-                                  : Size == 64 ? MVT::i64
-                                  : MVT::Other;
-    unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, SimpleTy);
+    if (Size == 1) Size = 8;
+    unsigned DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
     if (DestReg > 0) {
       Res.first = DestReg;
-      Res.second = SimpleTy == MVT::i8 ? &X86::GR8RegClass
-                 : SimpleTy == MVT::i16 ? &X86::GR16RegClass
-                 : SimpleTy == MVT::i32 ? &X86::GR32RegClass
+      Res.second = Size == 8 ? &X86::GR8RegClass
+                 : Size == 16 ? &X86::GR16RegClass
+                 : Size == 32 ? &X86::GR32RegClass
                  : &X86::GR64RegClass;
       assert(Res.second->contains(Res.first) && "Register in register class");
     } else {
@@ -26196,6 +28606,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
     // target independent register mapper will just pick the first match it can
     // find, ignoring the required type.
 
+    // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
     if (VT == MVT::f32 || VT == MVT::i32)
       Res.second = &X86::FR32RegClass;
     else if (VT == MVT::f64 || VT == MVT::i64)
@@ -26244,6 +28655,15 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
   return -1;
 }
 
-bool X86TargetLowering::isTargetFTOL() const {
-  return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
+bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
+  // Integer division on x86 is expensive. However, when aggressively optimizing
+  // for code size, we prefer to use a div instruction, as it is usually smaller
+  // than the alternative sequence.
+  // The exception to this is vector division. Since x86 doesn't have vector
+  // integer division, leaving the division as-is is a loss even in terms of
+  // size, because it will have to be scalarized, while the alternative code
+  // sequence can be performed in vector form.
+  bool OptSize = Attr.hasAttribute(AttributeSet::FunctionIndex,
+                                   Attribute::MinSize);
+  return OptSize && !VT.isVector();
 }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 723d5304495c..a29dc9af54f6 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -126,6 +126,9 @@ namespace llvm {
       /// 1 is the number of bytes of stack to pop.
       RET_FLAG,
 
+      /// Return from interrupt. Operand 0 is the number of bytes to pop.
+      IRET,
+
       /// Repeat fill, corresponds to X86::REP_STOSx.
       REP_STOS,
 
@@ -182,6 +185,8 @@ namespace llvm {
 
       /// Compute Sum of Absolute Differences.
       PSADBW,
+      /// Compute Double Block Packed Sum-Absolute-Differences
+      DBPSADBW,
 
       /// Bitwise Logical AND NOT of Packed FP values.
       ANDNP,
@@ -211,6 +216,8 @@ namespace llvm {
 
       // FP vector get exponent 
       FGETEXP_RND,
+      // Extract Normalized Mantissas
+      VGETMANT,
       // FP Scale
       SCALEF,
       // Integer add/sub with unsigned saturation.
@@ -236,6 +243,9 @@ namespace llvm {
       // Integer absolute value
       ABS,
 
+      // Detect Conflicts Within a Vector
+      CONFLICT,
+
       /// Floating point max and min.
       FMAX, FMIN,
 
@@ -282,9 +292,8 @@ namespace llvm {
 
       // Vector integer truncate.
       VTRUNC,
-
-      // Vector integer truncate with mask.
-      VTRUNCM,
+      // Vector integer truncate with unsigned/signed saturation.
+      VTRUNCUS, VTRUNCS,
 
       // Vector FP extend.
       VFPEXT,
@@ -295,6 +304,9 @@ namespace llvm {
       // Vector signed/unsigned integer to double.
       CVTDQ2PD, CVTUDQ2PD,
 
+      // Convert a vector to mask, set bits base on MSB.
+      CVT2MASK,
+
       // 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -349,6 +361,7 @@ namespace llvm {
 
       // OR/AND test for masks
       KORTEST,
+      KTEST,
 
       // Several flavors of instructions with vector shuffle behaviors.
       PACKSS,
@@ -382,12 +395,24 @@ namespace llvm {
       VPERMIV3,
       VPERMI,
       VPERM2X128,
-      //Fix Up Special Packed Float32/64 values
+      // Bitwise ternary logic
+      VPTERNLOG,
+      // Fix Up Special Packed Float32/64 values
       VFIXUPIMM,
-      //Range Restriction Calculation For Packed Pairs of Float32/64 values
+      // Range Restriction Calculation For Packed Pairs of Float32/64 values
       VRANGE,
+      // Reduce - Perform Reduction Transformation on scalar\packed FP
+      VREDUCE,
+      // RndScale - Round FP Values To Include A Given Number Of Fraction Bits
+      VRNDSCALE,
+      // VFPCLASS - Tests Types Of a FP Values for packed types.
+      VFPCLASS, 
+      // VFPCLASSS - Tests Types Of a FP Values for scalar types.
+      VFPCLASSS, 
       // Broadcast scalar to vector
       VBROADCAST,
+      // Broadcast mask to vector
+      VBROADCASTM,
       // Broadcast subvector to vector
       SUBV_BROADCAST,
       // Insert/Extract vector element
@@ -397,13 +422,21 @@ namespace llvm {
       /// SSE4A Extraction and Insertion.
       EXTRQI, INSERTQI,
 
+      // XOP variable/immediate rotations
+      VPROT, VPROTI,
+      // XOP arithmetic/logical shifts
+      VPSHA, VPSHL,
+      // XOP signed/unsigned integer comparisons
+      VPCOM, VPCOMU,
+
       // Vector multiply packed unsigned doubleword integers
       PMULUDQ,
       // Vector multiply packed signed doubleword integers
       PMULDQ,
       // Vector Multiply Packed UnsignedIntegers with Round and Scale
       MULHRS,
-
+      // Multiply and Add Packed Integers
+      VPMADDUBSW, VPMADDWD,
       // FMA nodes
       FMADD,
       FNMADD,
@@ -418,7 +451,6 @@ namespace llvm {
       FNMSUB_RND,
       FMADDSUB_RND,
       FMSUBADD_RND,
-      RNDSCALE,
 
       // Compress and expand
       COMPRESS,
@@ -443,9 +475,6 @@ namespace llvm {
       // falls back to heap allocation if not.
       SEG_ALLOCA,
 
-      // Windows's _ftol2 runtime routine to do fptoui.
-      WIN_FTOL,
-
       // Memory barrier
       MEMBARRIER,
       MFENCE,
@@ -580,15 +609,6 @@ namespace llvm {
     bool isCalleePop(CallingConv::ID CallingConv,
                      bool is64Bit, bool IsVarArg, bool TailCallOpt);
 
-    /// AVX512 static rounding constants.  These need to match the values in
-    /// avx512fintrin.h.
-    enum STATIC_ROUNDING {
-      TO_NEAREST_INT = 0,
-      TO_NEG_INF = 1,
-      TO_POS_INF = 2,
-      TO_ZERO = 3,
-      CUR_DIRECTION = 4
-    };
   }
 
   //===--------------------------------------------------------------------===//
@@ -850,16 +870,7 @@ namespace llvm {
     /// register, not on the X87 floating point stack.
     bool isScalarFPTypeInSSEReg(EVT VT) const {
       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
-      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
-    }
-
-    /// Return true if the target uses the MSVC _ftol2 routine for fptoui.
-    bool isTargetFTOL() const;
-
-    /// Return true if the MSVC _ftol2 routine should be used for fptoui to the
-    /// given type.
-    bool isIntegerTypeFTOL(EVT VT) const {
-      return isTargetFTOL() && VT == MVT::i64;
+             (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
     }
 
     /// \brief Returns true if it is beneficial to convert a load of a constant
@@ -879,6 +890,16 @@ namespace llvm {
     unsigned getRegisterByName(const char* RegName, EVT VT,
                                SelectionDAG &DAG) const override;
 
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
     /// This method returns a target specific FastISel object,
     /// or null if the target does not support "fast" ISel.
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -890,6 +911,11 @@ namespace llvm {
     bool getStackCookieLocation(unsigned &AddressSpace,
                                 unsigned &Offset) const override;
 
+    /// Return true if the target stores SafeStack pointer at a fixed offset in
+    /// some non-standard address space, and populates the address space and
+    /// offset as appropriate.
+    Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
+
     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
                       SelectionDAG &DAG) const;
 
@@ -899,6 +925,8 @@ namespace llvm {
     /// \brief Customize the preferred legalization strategy for certain types.
     LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;
 
+    bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -908,7 +936,6 @@ namespace llvm {
     /// Keep a pointer to the X86Subtarget around so that we can
     /// make the right decision when generating code for different targets.
     const X86Subtarget *Subtarget;
-    const DataLayout *TD;
 
     /// Select between SSE or x87 floating point ops.
     /// When SSE is available, use it for f32 operations.
@@ -955,7 +982,6 @@ namespace llvm {
                                     const SmallVectorImpl<SDValue> &OutVals,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                            SelectionDAG& DAG) const;
-    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
                                 SDValue Chain, bool IsTailCall, bool Is64Bit,
                                 int FPDiff, SDLoc dl) const;
@@ -969,7 +995,6 @@ namespace llvm {
 
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
@@ -994,9 +1019,9 @@ namespace llvm {
     SDValue LowerToBT(SDValue And, ISD::CondCode CC,
                       SDLoc dl, SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
@@ -1042,27 +1067,16 @@ namespace llvm {
 
     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
-    bool shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
+    TargetLoweringBase::AtomicExpansionKind
+    shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    TargetLoweringBase::AtomicRMWExpansionKind
+    TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
     LoadInst *
     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
 
-    bool needsCmpXchgNb(const Type *MemType) const;
-
-    /// Utility function to emit atomic-load-arith operations (and, or, xor,
-    /// nand, max, min, umax, umin). It takes the corresponding instruction to
-    /// expand, the associated machine basic block, and the associated X86
-    /// opcodes for reg/reg.
-    MachineBasicBlock *EmitAtomicLoadArith(MachineInstr *MI,
-                                           MachineBasicBlock *MBB) const;
-
-    /// Utility function to emit atomic-load-arith operations (and, or, xor,
-    /// nand, add, sub, swap) for 64-bit operands on 32-bit target.
-    MachineBasicBlock *EmitAtomicLoadArith6432(MachineInstr *MI,
-                                               MachineBasicBlock *MBB) const;
+    bool needsCmpXchgNb(Type *MemType) const;
 
     // Utility function to emit the low-level va_arg code for X86-64.
     MachineBasicBlock *EmitVAARG64WithCustomInserter(
@@ -1077,18 +1091,24 @@ namespace llvm {
     MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
                                          MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I,
+                                           MachineBasicBlock *BB) const;
+
     MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
                                               MachineBasicBlock *BB) const;
 
+    MachineBasicBlock *EmitLoweredCatchRet(MachineInstr *MI,
+                                           MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredCatchPad(MachineInstr *MI,
+                                           MachineBasicBlock *BB) const;
+
     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
                                             MachineBasicBlock *BB) const;
 
     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                           MachineBasicBlock *BB) const;
 
-    MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
-                                          MachineBasicBlock *BB) const;
-
     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
                                         MachineBasicBlock *MBB) const;
 
@@ -1121,7 +1141,7 @@ namespace llvm {
                              unsigned &RefinementSteps) const override;
 
     /// Reassociate floating point divisions into multiply by reciprocal.
-    bool combineRepeatedFPDivisors(unsigned NumUsers) const override;
+    unsigned combineRepeatedFPDivisors() const override;
   };
 
   namespace X86 {
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index faa91500b181..8bf2925a75db 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -79,7 +79,7 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
                           !if (!eq (TypeVariantName, "i"),
                                 !if (!eq (Size, 128), "v2i64",
                                 !if (!eq (Size, 256), "v4i64",
-                                !if (!eq (Size, 512), 
+                                !if (!eq (Size, 512),
                                     !if (!eq (EltSize, 64), "v8i64", "v16i32"),
                                     VTName))), VTName));
 
@@ -145,6 +145,8 @@ def v2f64x_info  : X86VectorVTInfo<2,  f64, VR128X, "pd">;
 
 // We map scalar types to the smallest (128-bit) vector type
 // with the appropriate element type. This allows to use the same masking logic.
+def i32x_info    : X86VectorVTInfo<1,  i32, GR32, "si">;
+def i64x_info    : X86VectorVTInfo<1,  i64, GR64, "sq">;
 def f32x_info    : X86VectorVTInfo<1,  f32, VR128X, "ss">;
 def f64x_info    : X86VectorVTInfo<1,  f64, VR128X, "sd">;
 
@@ -274,6 +276,22 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
 
+// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
+// operand differs from the output VT. This requires a bitconvert on
+// the preserved vector going into the vselect.
+multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
+                                     X86VectorVTInfo InVT,
+                                     dag Outs, dag NonTiedIns, string OpcodeStr,
+                                     string AttSrcAsm, string IntelSrcAsm,
+                                     dag RHS> :
+   AVX512_maskable_common<O, F, OutVT, Outs,
+                          !con((ins InVT.RC:$src1), NonTiedIns),
+                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
+                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
+                          (vselect InVT.KRCWM:$mask, RHS,
+                           (bitconvert InVT.RC:$src1))>;
+
 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
                                      dag Outs, dag NonTiedIns, string OpcodeStr,
                                      string AttSrcAsm, string IntelSrcAsm,
@@ -471,84 +489,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_512_SET0)>;
 //===----------------------------------------------------------------------===//
 // AVX-512 - VECTOR INSERT
 //
-
-multiclass vinsert_for_size_no_alt<int Opcode,
-                                   X86VectorVTInfo From, X86VectorVTInfo To,
-                                   PatFrag vinsert_insert,
-                                   SDNodeXForm INSERT_get_vinsert_imm> {
+multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
+                                                       PatFrag vinsert_insert> {
   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
-    def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
-               (ins VR512:$src1, From.RC:$src2, u8imm:$src3),
-               "vinsert" # From.EltTypeName # "x" # From.NumElts #
-                                                "\t{$src3, $src2, $src1, $dst|"
-                                                   "$dst, $src1, $src2, $src3}",
-               [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
-                                                       (From.VT From.RC:$src2),
-                                                       (iPTR imm)))]>,
-             EVEX_4V, EVEX_V512;
+    defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
+                   (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
+                   "$src3, $src2, $src1", "$src1, $src2, $src3",
+                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
+                                         (From.VT From.RC:$src2),
+                                         (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
 
-    let mayLoad = 1 in
-    def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
-               (ins VR512:$src1, From.MemOp:$src2, u8imm:$src3),
-               "vinsert" # From.EltTypeName # "x" # From.NumElts #
-                                                "\t{$src3, $src2, $src1, $dst|"
-                                                   "$dst, $src1, $src2, $src3}",
-               []>,
-             EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+  let mayLoad = 1 in
+    defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
+                   (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+                   "vinsert" # From.EltTypeName # "x" # From.NumElts,
+                   "$src3, $src2, $src1", "$src1, $src2, $src3",
+                   (vinsert_insert:$src3 (To.VT To.RC:$src1),
+                               (From.VT (bitconvert (From.LdFrag addr:$src2))),
+                               (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
+                   EVEX_CD8<From.EltSize, From.CD8TupleForm>;
   }
 }
 
-multiclass vinsert_for_size<int Opcode,
-                            X86VectorVTInfo From, X86VectorVTInfo To,
-                            X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
-                            PatFrag vinsert_insert,
-                            SDNodeXForm INSERT_get_vinsert_imm> :
-  vinsert_for_size_no_alt<Opcode, From, To,
-                          vinsert_insert, INSERT_get_vinsert_imm> {
-  // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
-  // vinserti32x4.  Only add this if 64x2 and friends are not supported
-  // natively via AVX512DQ.
-  let Predicates = [NoDQI] in
+multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+                       X86VectorVTInfo To, PatFrag vinsert_insert,
+                       SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
+  let Predicates = p in {
     def : Pat<(vinsert_insert:$ins
-                 (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
-              (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
-                            VR512:$src1, From.RC:$src2,
-                            (INSERT_get_vinsert_imm VR512:$ins)))>;
+                     (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
+              (To.VT (!cast<Instruction>(InstrStr#"rr")
+                     To.RC:$src1, From.RC:$src2,
+                     (INSERT_get_vinsert_imm To.RC:$ins)))>;
+
+    def : Pat<(vinsert_insert:$ins
+                  (To.VT To.RC:$src1),
+                  (From.VT (bitconvert (From.LdFrag addr:$src2))),
+                  (iPTR imm)),
+              (To.VT (!cast<Instruction>(InstrStr#"rm")
+                  To.RC:$src1, addr:$src2,
+                  (INSERT_get_vinsert_imm To.RC:$ins)))>;
+  }
 }
 
 multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
                             ValueType EltVT64, int Opcode256> {
-  defm NAME # "32x4" : vinsert_for_size<Opcode128,
+
+  let Predicates = [HasVLX] in
+    defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
+                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
+                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
+                                 vinsert128_insert>, EVEX_V256;
+
+  defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
                                  X86VectorVTInfo<16, EltVT32, VR512>,
-                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
-                                 X86VectorVTInfo< 8, EltVT64, VR512>,
-                                 vinsert128_insert,
-                                 INSERT_get_vinsert128_imm>;
-  let Predicates = [HasDQI] in
-    defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
-                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
-                                 X86VectorVTInfo< 8, EltVT64, VR512>,
-                                 vinsert128_insert,
-                                 INSERT_get_vinsert128_imm>, VEX_W;
-  defm NAME # "64x4" : vinsert_for_size<Opcode256,
+                                 vinsert128_insert>, EVEX_V512;
+
+  defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
                                  X86VectorVTInfo< 8, EltVT64, VR512>,
-                                 X86VectorVTInfo< 8, EltVT32, VR256>,
-                                 X86VectorVTInfo<16, EltVT32, VR512>,
-                                 vinsert256_insert,
-                                 INSERT_get_vinsert256_imm>, VEX_W;
-  let Predicates = [HasDQI] in
-    defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
-                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
-                                 X86VectorVTInfo<16, EltVT32, VR512>,
-                                 vinsert256_insert,
-                                 INSERT_get_vinsert256_imm>;
+                                 vinsert256_insert>, VEX_W, EVEX_V512;
+
+  let Predicates = [HasVLX, HasDQI] in
+    defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
+                                   X86VectorVTInfo< 2, EltVT64, VR128X>,
+                                   X86VectorVTInfo< 4, EltVT64, VR256X>,
+                                   vinsert128_insert>, VEX_W, EVEX_V256;
+
+  let Predicates = [HasDQI] in {
+    defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
+                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
+                                 X86VectorVTInfo< 8, EltVT64, VR512>,
+                                 vinsert128_insert>, VEX_W, EVEX_V512;
+
+    defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
+                                   X86VectorVTInfo< 8, EltVT32, VR256X>,
+                                   X86VectorVTInfo<16, EltVT32, VR512>,
+                                   vinsert256_insert>, EVEX_V512;
+  }
 }
 
 defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
 defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
 
+// Codegen pattern with the alternative types,
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
+              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
+              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+
+// Codegen pattern with the alternative types insert VEC128 into VEC256
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+// Codegen pattern with the alternative types insert VEC128 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
+              vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
+               vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types insert VEC256 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
+              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
+              vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+
 // vinsertps - insert f32 to XMM
 def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
@@ -566,90 +623,158 @@ def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
 // AVX-512 VECTOR EXTRACT
 //---
 
-multiclass vextract_for_size<int Opcode,
-                             X86VectorVTInfo From, X86VectorVTInfo To,
-                             X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
-                             PatFrag vextract_extract,
-                             SDNodeXForm EXTRACT_get_vextract_imm> {
-  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
-    defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
-                (ins VR512:$src1, u8imm:$idx),
-                "vextract" # To.EltTypeName # "x4",
-                "$idx, $src1", "$src1, $idx",
-                [(set To.RC:$dst, (vextract_extract:$idx (From.VT VR512:$src1),
-                                                         (iPTR imm)))]>,
-              AVX512AIi8Base, EVEX, EVEX_V512;
-    let mayStore = 1 in
-    def rm : AVX512AIi8<Opcode, MRMDestMem, (outs),
-            (ins To.MemOp:$dst, VR512:$src1, u8imm:$src2),
-            "vextract" # To.EltTypeName # "x4\t{$src2, $src1, $dst|"
-                                               "$dst, $src1, $src2}",
-            []>, EVEX, EVEX_V512, EVEX_CD8<To.EltSize, CD8VT4>;
-  }
-
-  // Codegen pattern with the alternative types, e.g. v8i64 -> v2i64 for
-  // vextracti32x4
-  def : Pat<(vextract_extract:$ext (AltFrom.VT VR512:$src1), (iPTR imm)),
-            (AltTo.VT (!cast<Instruction>(NAME # To.EltSize # "x4rr")
-                          VR512:$src1,
-                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
-
-  // A 128/256-bit subvector extract from the first 512-bit vector position is
+multiclass vextract_for_size_first_position_lowering<X86VectorVTInfo From,
+                                                     X86VectorVTInfo To> {
+  // A subvector extract from the first vector position is
   // a subregister copy that needs no instruction.
-  def : Pat<(To.VT (extract_subvector (From.VT VR512:$src), (iPTR 0))),
-            (To.VT
-               (EXTRACT_SUBREG (From.VT VR512:$src), To.SubRegIdx))>;
+  def NAME # To.NumElts:
+      Pat<(To.VT (extract_subvector (From.VT From.RC:$src),(iPTR 0))),
+          (To.VT (EXTRACT_SUBREG (From.VT From.RC:$src), To.SubRegIdx))>;
+}
 
-  // And for the alternative types.
-  def : Pat<(AltTo.VT (extract_subvector (AltFrom.VT VR512:$src), (iPTR 0))),
-            (AltTo.VT
-               (EXTRACT_SUBREG (AltFrom.VT VR512:$src), AltTo.SubRegIdx))>;
+multiclass vextract_for_size<int Opcode,
+                                    X86VectorVTInfo From, X86VectorVTInfo To,
+                                    PatFrag vextract_extract> :
+  vextract_for_size_first_position_lowering<From, To> {
+
+  let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
+    // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
+    // vextract_extract), we interesting only in patterns without mask,
+    // intrinsics pattern match generated bellow.
+    defm rr : AVX512_maskable_in_asm<Opcode, MRMDestReg, To, (outs To.RC:$dst),
+                (ins From.RC:$src1, i32u8imm:$idx),
+                "vextract" # To.EltTypeName # "x" # To.NumElts,
+                "$idx, $src1", "$src1, $idx",
+                [(set To.RC:$dst, (vextract_extract:$idx (From.VT From.RC:$src1),
+                                                         (iPTR imm)))]>,
+              AVX512AIi8Base, EVEX;
+    let mayStore = 1 in {
+      def rm  : AVX512AIi8<Opcode, MRMDestMem, (outs),
+                      (ins To.MemOp:$dst, From.RC:$src1, i32u8imm:$src2),
+                      "vextract" # To.EltTypeName # "x" # To.NumElts #
+                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      []>, EVEX;
+
+      def rmk : AVX512AIi8<Opcode, MRMDestMem, (outs),
+                      (ins To.MemOp:$dst, To.KRCWM:$mask,
+                                          From.RC:$src1, i32u8imm:$src2),
+                       "vextract" # To.EltTypeName # "x" # To.NumElts #
+                            "\t{$src2, $src1, $dst {${mask}}|"
+                            "$dst {${mask}}, $src1, $src2}",
+                      []>, EVEX_K, EVEX;
+    }//mayStore = 1
+  }
 
   // Intrinsic call with masking.
   def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x4_512")
-                VR512:$src1, (iPTR imm:$idx), To.RC:$src0, GR8:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x4rrk") To.RC:$src0,
-                (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
-                VR512:$src1, imm:$idx)>;
+                              "x" # To.NumElts # "_" # From.Size)
+                From.RC:$src1, (iPTR imm:$idx), To.RC:$src0, To.MRC:$mask),
+            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+                                From.ZSuffix # "rrk")
+                To.RC:$src0,
+                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+                From.RC:$src1, imm:$idx)>;
 
   // Intrinsic call with zero-masking.
   def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x4_512")
-                VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, GR8:$mask),
-            (!cast<Instruction>(NAME # To.EltSize # "x4rrkz")
-                (v4i1 (COPY_TO_REGCLASS GR8:$mask, VK4WM)),
-                VR512:$src1, imm:$idx)>;
+                              "x" # To.NumElts # "_" # From.Size)
+                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, To.MRC:$mask),
+            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+                                From.ZSuffix # "rrkz")
+                (COPY_TO_REGCLASS To.MRC:$mask, To.KRCWM),
+                From.RC:$src1, imm:$idx)>;
 
   // Intrinsic call without masking.
   def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
-                              "x4_512")
-                VR512:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
-            (!cast<Instruction>(NAME # To.EltSize # "x4rr")
-                VR512:$src1, imm:$idx)>;
+                              "x" # To.NumElts # "_" # From.Size)
+                From.RC:$src1, (iPTR imm:$idx), To.ImmAllZerosV, (i8 -1)),
+            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+                                From.ZSuffix # "rr")
+                From.RC:$src1, imm:$idx)>;
 }
 
-multiclass vextract_for_type<ValueType EltVT32, int Opcode32,
-                             ValueType EltVT64, int Opcode64> {
-  defm NAME # "32x4" : vextract_for_size<Opcode32,
+// Codegen pattern for the alternative types
+multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+                X86VectorVTInfo To, PatFrag vextract_extract,
+                SDNodeXForm EXTRACT_get_vextract_imm, list<Predicate> p> :
+  vextract_for_size_first_position_lowering<From, To> {
+
+  let Predicates = p in
+     def : Pat<(vextract_extract:$ext (From.VT From.RC:$src1), (iPTR imm)),
+               (To.VT (!cast<Instruction>(InstrStr#"rr")
+                          From.RC:$src1,
+                          (EXTRACT_get_vextract_imm To.RC:$ext)))>;
+}
+
+multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
+                                             ValueType EltVT64, int Opcode256> {
+  defm NAME # "32x4Z" : vextract_for_size<Opcode128,
                                  X86VectorVTInfo<16, EltVT32, VR512>,
                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
-                                 X86VectorVTInfo< 8, EltVT64, VR512>,
-                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
-                                 vextract128_extract,
-                                 EXTRACT_get_vextract128_imm>;
-  defm NAME # "64x4" : vextract_for_size<Opcode64,
+                                 vextract128_extract>,
+                                     EVEX_V512, EVEX_CD8<32, CD8VT4>;
+  defm NAME # "64x4Z" : vextract_for_size<Opcode256,
                                  X86VectorVTInfo< 8, EltVT64, VR512>,
                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
+                                 vextract256_extract>,
+                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
+  let Predicates = [HasVLX] in
+    defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
+                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
+                                 X86VectorVTInfo< 4, EltVT32, VR128X>,
+                                 vextract128_extract>,
+                                     EVEX_V256, EVEX_CD8<32, CD8VT4>;
+  let Predicates = [HasVLX, HasDQI] in
+    defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
+                                 X86VectorVTInfo< 4, EltVT64, VR256X>,
+                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
+                                 vextract128_extract>,
+                                     VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
+  let Predicates = [HasDQI] in {
+    defm NAME # "64x2Z" : vextract_for_size<Opcode128,
+                                 X86VectorVTInfo< 8, EltVT64, VR512>,
+                                 X86VectorVTInfo< 2, EltVT64, VR128X>,
+                                 vextract128_extract>,
+                                     VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
+    defm NAME # "32x8Z" : vextract_for_size<Opcode256,
                                  X86VectorVTInfo<16, EltVT32, VR512>,
-                                 X86VectorVTInfo< 8, EltVT32, VR256>,
-                                 vextract256_extract,
-                                 EXTRACT_get_vextract256_imm>, VEX_W;
+                                 X86VectorVTInfo< 8, EltVT32, VR256X>,
+                                 vextract256_extract>,
+                                     EVEX_V512, EVEX_CD8<32, CD8VT8>;
+  }
 }
 
 defm VEXTRACTF : vextract_for_type<f32, 0x19, f64, 0x1b>;
 defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
 
+// extract_subvector codegen patterns with the alternative types.
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
+          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
+          vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
+          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
+          vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+
+defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
+          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
+          vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+
+// Codegen pattern with the alternative types extract VEC128 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
+                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v64i8_info, v16i8x_info,
+                 vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types extract VEC256 from VEC512
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v32i16_info, v16i16x_info,
+                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
+                 vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
+
 // A 128-bit subvector insert to the first 512-bit vector position
 // is a subregister copy that needs no instruction.
 def : Pat<(insert_subvector undef, (v2i64 VR128X:$src), (iPTR 0)),
@@ -677,6 +802,10 @@ def : Pat<(insert_subvector undef, (v8i32 VR256X:$src), (iPTR 0)),
           (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
 def : Pat<(insert_subvector undef, (v8f32 VR256X:$src), (iPTR 0)),
           (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v16i16 VR256X:$src), (iPTR 0)),
+          (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
+def : Pat<(insert_subvector undef, (v32i8 VR256X:$src), (iPTR 0)),
+          (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm)>;
 
 // vextractps - extract 32 bits from XMM
 def VEXTRACTPSzrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
@@ -694,50 +823,49 @@ def VEXTRACTPSzmr : AVX512AIi8<0x17, MRMDestMem, (outs),
 //===---------------------------------------------------------------------===//
 // AVX-512 BROADCAST
 //---
-multiclass avx512_fp_broadcast<bits<8> opc, SDNode OpNode, RegisterClass SrcRC,
-                              ValueType svt, X86VectorVTInfo _> {
-  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                   (ins SrcRC:$src), "vbroadcast"## !subst("p", "s", _.Suffix),
-                   "$src", "$src", (_.VT (OpNode (svt SrcRC:$src)))>,
-                   T8PD, EVEX;
 
-  let mayLoad = 1 in {
-    defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                     (ins _.ScalarMemOp:$src),
-                     "vbroadcast"##!subst("p", "s", _.Suffix), "$src", "$src",
-                     (_.VT (OpNode (_.ScalarLdFrag addr:$src)))>,
-                     T8PD, EVEX;
-  }
+multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr,
+                            X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> {
+
+  defm r : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+                   (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src",
+                   (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>,
+                   T8PD, EVEX;
+  let mayLoad = 1 in
+    defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+                     (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+                     (DestInfo.VT (X86VBroadcast
+                                     (SrcInfo.ScalarLdFrag addr:$src)))>,
+                     T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>;
 }
 
-multiclass avx512_fp_broadcast_vl<bits<8> opc, SDNode OpNode,
-                                  AVX512VLVectorVTInfo _> {
-  defm Z  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info512>,
+multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr,
+                                                       AVX512VLVectorVTInfo _> {
+  defm Z  : avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
                              EVEX_V512;
 
   let Predicates = [HasVLX] in {
-    defm Z256  : avx512_fp_broadcast<opc, OpNode, VR128X, _.info128.VT, _.info256>,
-                                  EVEX_V256;
+    defm Z256  : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+                             EVEX_V256;
   }
 }
 
 let ExeDomain = SSEPackedSingle in {
-  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, X86VBroadcast,
-                              avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>;
+  defm VBROADCASTSS  : avx512_fp_broadcast_vl<0x18, "vbroadcastss",
+                                         avx512vl_f32_info>;
    let Predicates = [HasVLX] in {
-     defm VBROADCASTSSZ128  : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X,
-                                     v4f32, v4f32x_info>, EVEX_V128,
-                                     EVEX_CD8<32, CD8VT1>;
+     defm VBROADCASTSSZ128  : avx512_broadcast_rm<0x18, "vbroadcastss",
+                                         v4f32x_info, v4f32x_info>, EVEX_V128;
    }
 }
 
 let ExeDomain = SSEPackedDouble in {
-  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, X86VBroadcast,
-                              avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>;
+  defm VBROADCASTSD  : avx512_fp_broadcast_vl<0x19, "vbroadcastsd",
+                                         avx512vl_f64_info>, VEX_W;
 }
 
 // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument.
-// Later, we can canonize broadcast instructions before ISel phase and 
+// Later, we can canonize broadcast instructions before ISel phase and
 // eliminate additional patterns on ISel.
 // SrcRC_v and SrcRC_s are RegisterClasses for vector and scalar
 // representations of source
@@ -834,70 +962,50 @@ def : Pat<(v8i64 (int_x86_avx512_mask_pbroadcast_q_gpr_512 (i64 GR64:$src),
                    (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))),
           (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>;
 
-multiclass avx512_int_broadcast_rm<bits<8> opc, string OpcodeStr,
-                          X86MemOperand x86memop, PatFrag ld_frag,
-                          RegisterClass DstRC, ValueType OpVT, ValueType SrcVT,
-                          RegisterClass KRC> {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins VR128X:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst,
-                    (OpVT (X86VBroadcast (SrcVT VR128X:$src))))]>, EVEX;
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         VR128X:$src),
-                    !strconcat(OpcodeStr,
-                    "\t{$src, ${dst} {${mask}} |${dst} {${mask}}, $src}"),
-                    []>, EVEX, EVEX_K;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         VR128X:$src),
-                    !strconcat(OpcodeStr,
-                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                    []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in {
-  def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set DstRC:$dst,
-                    (OpVT (X86VBroadcast (ld_frag addr:$src))))]>, EVEX;
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         x86memop:$src),
-                  !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}} , $src}"),
-                  []>, EVEX, EVEX_K;
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst), (ins KRC:$mask,
-                                                         x86memop:$src),
-                  !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                  [(set DstRC:$dst, (OpVT (vselect KRC:$mask,
-                             (X86VBroadcast (ld_frag addr:$src)), 
-                             (OpVT (bitconvert (v16i32 immAllZerosV))))))]>, EVEX, EVEX_KZ;
+// Provide aliases for broadcast from the same register class that
+// automatically does the extract.
+multiclass avx512_int_broadcast_rm_lowering<X86VectorVTInfo DestInfo,
+                                            X86VectorVTInfo SrcInfo> {
+  def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))),
+            (!cast<Instruction>(NAME#DestInfo.ZSuffix#"r")
+                (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>;
+}
+
+multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
+                                        AVX512VLVectorVTInfo _, Predicate prd> {
+  let Predicates = [prd] in {
+    defm Z :   avx512_broadcast_rm<opc, OpcodeStr, _.info512, _.info128>,
+               avx512_int_broadcast_rm_lowering<_.info512, _.info256>,
+                                  EVEX_V512;
+    // Defined separately to avoid redefinition.
+    defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>;
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _.info256, _.info128>,
+                avx512_int_broadcast_rm_lowering<_.info256, _.info256>,
+                                 EVEX_V256;
+    defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _.info128, _.info128>,
+                                 EVEX_V128;
   }
 }
 
-defm VPBROADCASTDZ  : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem,
-                      loadi32, VR512, v16i32, v4i32, VK16WM>,
-                      EVEX_V512, EVEX_CD8<32, CD8VT1>;
-defm VPBROADCASTQZ  : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem,
-                      loadi64, VR512, v8i64, v2i64, VK8WM>,  EVEX_V512, VEX_W,
-                      EVEX_CD8<64, CD8VT1>;
+defm VPBROADCASTB  : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb",
+                                           avx512vl_i8_info, HasBWI>;
+defm VPBROADCASTW  : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw",
+                                           avx512vl_i16_info, HasBWI>;
+defm VPBROADCASTD  : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd",
+                                           avx512vl_i32_info, HasAVX512>;
+defm VPBROADCASTQ  : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq",
+                                           avx512vl_i64_info, HasAVX512>, VEX_W;
 
 multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
                           X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
-  let mayLoad = 1 in {
-  def rm : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Src.MemOp:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set _Dst.RC:$dst, 
-                    (_Dst.VT (X86SubVBroadcast 
-                    (_Src.VT (bitconvert (_Src.LdFrag addr:$src))))))]>, EVEX;
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
-                                                         _Src.MemOp:$src),
-                  !strconcat(OpcodeStr,
-                      "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
-                  []>, EVEX, EVEX_K;
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs _Dst.RC:$dst), (ins _Dst.KRCWM:$mask,
-                                                         _Src.MemOp:$src),
-                  !strconcat(OpcodeStr,
-                    "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-                  []>, EVEX, EVEX_KZ;
-  }
+  let mayLoad = 1 in 
+    defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), 
+                             (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
+                             (_Dst.VT (X86SubVBroadcast
+                               (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, 
+                              AVX5128IBase, EVEX;
 }
 
 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
@@ -944,10 +1052,45 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
 }
 
-def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))),
-          (VPBROADCASTDZrr VR128X:$src)>;
-def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))),
-          (VPBROADCASTQZrr VR128X:$src)>;
+multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr,
+                                 X86VectorVTInfo _Dst, X86VectorVTInfo _Src,
+                                 SDNode OpNode = X86SubVBroadcast> {
+
+  defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
+                   (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
+                   (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>,
+                   T8PD, EVEX;
+  let mayLoad = 1 in
+    defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                   (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+                   (_Dst.VT (OpNode
+                              (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>,
+                   T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>;
+}
+
+multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
+                             AVX512VLVectorVTInfo _> {
+  let Predicates = [HasDQI] in
+    defm Z :    avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>,
+                                  EVEX_V512;
+  let Predicates = [HasDQI, HasVLX] in
+    defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>,
+                                  EVEX_V256;
+}
+
+multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
+                                                       AVX512VLVectorVTInfo _> :
+  avx512_common_broadcast_32x2<opc, OpcodeStr, _> {
+
+  let Predicates = [HasDQI, HasVLX] in
+    defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128,
+                                      X86SubV32x2Broadcast>, EVEX_V128;
+}
+
+defm VPBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
+                                           avx512vl_i32_info>;
+defm VPBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
+                                           avx512vl_f32_info>;
 
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
@@ -959,21 +1102,6 @@ def : Pat<(v8f64 (X86VBroadcast (v8f64 VR512:$src))),
 def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))),
           (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>;
 
-def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))),
-          (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>;
-def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))),
-          (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))),
-          (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>;
-def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))),
-          (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>;
-
-def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))),
-          (VBROADCASTSSZr VR128X:$src)>;
-def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))),
-          (VBROADCASTSDZr VR128X:$src)>;
-
 // Provide fallback in case the load node that is used in the patterns above
 // is used by additional users, which prevents the pattern selection.
 def : Pat<(v16f32 (X86VBroadcast FR32X:$src)),
@@ -985,170 +1113,178 @@ def : Pat<(v8f64 (X86VBroadcast FR64X:$src)),
 //===----------------------------------------------------------------------===//
 // AVX-512 BROADCAST MASK TO VECTOR REGISTER
 //---
-
-multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
-                       RegisterClass KRC> {
-let Predicates = [HasCDI] in
-def Zrr : AVX512XS8I<opc, MRMSrcReg, (outs VR512:$dst), (ins KRC:$src),
+multiclass avx512_mask_broadcastm<bits<8> opc, string OpcodeStr,
+                                  X86VectorVTInfo _, RegisterClass KRC> {
+  def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.RC:$dst), (ins KRC:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V512;
-
-let Predicates = [HasCDI, HasVLX] in {
-def Z128rr : AVX512XS8I<opc, MRMSrcReg, (outs VR128:$dst), (ins KRC:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V128;
-def Z256rr : AVX512XS8I<opc, MRMSrcReg, (outs VR256:$dst), (ins KRC:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  []>, EVEX, EVEX_V256;
-}
+                  [(set _.RC:$dst, (_.VT (X86VBroadcastm KRC:$src)))]>, EVEX;
 }
 
-let Predicates = [HasCDI] in {
-defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
-                                             VK16>;
-defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
-                                             VK8>, VEX_W;
-}
-
-//===----------------------------------------------------------------------===//
-// AVX-512 - VPERM
-//
-// -- immediate form --
-multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                           X86VectorVTInfo _> {
-  let ExeDomain = _.ExeDomain in {
-  def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst),
-                     (ins _.RC:$src1, u8imm:$src2),
-                     !strconcat(OpcodeStr,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set _.RC:$dst,
-                       (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>,
-                     EVEX;
-  def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst),
-                     (ins _.MemOp:$src1, u8imm:$src2),
-                     !strconcat(OpcodeStr,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set _.RC:$dst,
-                       (_.VT (OpNode (_.LdFrag addr:$src1),
-                              (i8 imm:$src2))))]>,
-           EVEX, EVEX_CD8<_.EltSize, CD8VF>;
-}
-}
-
-multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _,
-                         X86VectorVTInfo Ctrl> :
-     avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> {
-  let ExeDomain = _.ExeDomain in {
-    def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst),
-                     (ins _.RC:$src1, _.RC:$src2),
-                     !strconcat("vpermil" # _.Suffix,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set _.RC:$dst,
-                         (_.VT (X86VPermilpv _.RC:$src1,
-                                  (Ctrl.VT Ctrl.RC:$src2))))]>,
-             EVEX_4V;
-    def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst),
-                     (ins _.RC:$src1, Ctrl.MemOp:$src2),
-                     !strconcat("vpermil" # _.Suffix,
-                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                     [(set _.RC:$dst,
-                         (_.VT (X86VPermilpv _.RC:$src1,
-                                  (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>,
-             EVEX_4V;
+multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr, 
+                                 AVX512VLVectorVTInfo VTInfo, RegisterClass KRC> {
+  let Predicates = [HasCDI] in
+    defm Z : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info512, KRC>, EVEX_V512;
+  let Predicates = [HasCDI, HasVLX] in {
+    defm Z256 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info256, KRC>, EVEX_V256;
+    defm Z128 : avx512_mask_broadcastm<opc, OpcodeStr, VTInfo.info128, KRC>, EVEX_V128;
   }
 }
-defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>,
-                  EVEX_V512;
-defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>,
-                  EVEX_V512, VEX_W;
 
-def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
-          (VPERMILPSZri VR512:$src1, imm:$imm)>;
-def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),
-          (VPERMILPDZri VR512:$src1, imm:$imm)>;
+defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
+                                               avx512vl_i32_info, VK16>;
+defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
+                                               avx512vl_i64_info, VK8>, VEX_W;
 
-// -- VPERM2I - 3 source operands form --
-multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr,
-                            SDNode OpNode, X86VectorVTInfo _> {
+//===----------------------------------------------------------------------===//
+// -- VPERMI2 - 3 source operands form --
+multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
+                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
 let Constraints = "$src1 = $dst" in {
-  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+          (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
          AVX5128IBase;
 
   let mayLoad = 1 in
-  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
             (ins _.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
-            (_.VT (OpNode _.RC:$src1, _.RC:$src2,
+            (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
                    (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
             EVEX_4V, AVX5128IBase;
   }
 }
-multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr,
-                               SDNode OpNode, X86VectorVTInfo _> {
+multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
+                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
   let mayLoad = 1, Constraints = "$src1 = $dst" in
-  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
               !strconcat("$src2, ${src3}", _.BroadcastStr ),
-              (_.VT (OpNode _.RC:$src1,
-               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>, 
+              (_.VT (X86VPermi2X IdxVT.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
               AVX5128IBase, EVEX_4V, EVEX_B;
 }
 
-multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
-  let Predicates = [HasAVX512] in
-  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>, 
-            avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
+                               AVX512VLVectorVTInfo VTInfo,
+                               AVX512VLVectorVTInfo ShuffleMask> {
+  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+                           ShuffleMask.info512>,
+            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
+                             ShuffleMask.info512>, EVEX_V512;
   let Predicates = [HasVLX] in {
-  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>, 
-                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
-                 EVEX_V128;
-  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>, 
-                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
-                 EVEX_V256;
+  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+                               ShuffleMask.info128>,
+                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
+                                  ShuffleMask.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+                               ShuffleMask.info256>,
+                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
+                                  ShuffleMask.info256>,  EVEX_V256;
   }
 }
-multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr, 
-                                   SDNode OpNode, AVX512VLVectorVTInfo VTInfo> {
+
+multiclass avx512_perm_i_sizes_w<bits<8> opc, string OpcodeStr,
+                                 AVX512VLVectorVTInfo VTInfo,
+                                 AVX512VLVectorVTInfo Idx> {
   let Predicates = [HasBWI] in
-  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>, 
-             avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>,
-             EVEX_V512;
+  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
+                           Idx.info512>, EVEX_V512;
   let Predicates = [HasBWI, HasVLX] in {
-  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>, 
-                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>,
-                 EVEX_V128;
-  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>, 
-                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>,
-                 EVEX_V256;
+  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
+                               Idx.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
+                               Idx.info256>,  EVEX_V256;
   }
 }
-defm VPERMI2D  : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3,
-                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2Q  : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3,
-                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3,
-                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3,
-                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
 
-defm VPERMT2D  : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3,
-                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2Q  : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3,
-                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3,
-                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
-defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3,
-                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d",
+                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q",
+                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMI2W  : avx512_perm_i_sizes_w<0x75, "vpermi2w",
+                  avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
+                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
+                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
 
-defm VPERMT2W  : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3,
-                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
-defm VPERMI2W  : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3,
-                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+// VPERMT2
+multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
+                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+let Constraints = "$src1 = $dst" in {
+  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+          (ins IdxVT.RC:$src2, _.RC:$src3),
+          OpcodeStr, "$src3, $src2", "$src2, $src3",
+          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
+         AVX5128IBase;
+
+  let mayLoad = 1 in
+  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+            (ins IdxVT.RC:$src2, _.MemOp:$src3),
+            OpcodeStr, "$src3, $src2", "$src2, $src3",
+            (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
+                   (bitconvert (_.LdFrag addr:$src3))))>,
+            EVEX_4V, AVX5128IBase;
+  }
+}
+multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
+                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
+  let mayLoad = 1, Constraints = "$src1 = $dst" in
+  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+              (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
+              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
+              !strconcat("$src2, ${src3}", _.BroadcastStr ),
+              (_.VT (X86VPermt2 _.RC:$src1,
+               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
+              AVX5128IBase, EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
+                               AVX512VLVectorVTInfo VTInfo,
+                               AVX512VLVectorVTInfo ShuffleMask> {
+  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+                              ShuffleMask.info512>,
+            avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info512,
+                              ShuffleMask.info512>, EVEX_V512;
+  let Predicates = [HasVLX] in {
+  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+                              ShuffleMask.info128>,
+                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info128,
+                              ShuffleMask.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+                              ShuffleMask.info256>,
+                 avx512_perm_t_mb<opc, OpcodeStr, VTInfo.info256,
+                              ShuffleMask.info256>, EVEX_V256;
+  }
+}
+
+multiclass avx512_perm_t_sizes_w<bits<8> opc, string OpcodeStr,
+                                 AVX512VLVectorVTInfo VTInfo,
+                                 AVX512VLVectorVTInfo Idx> {
+  let Predicates = [HasBWI] in
+  defm NAME: avx512_perm_t<opc, OpcodeStr, VTInfo.info512,
+                           Idx.info512>, EVEX_V512;
+  let Predicates = [HasBWI, HasVLX] in {
+  defm NAME#128: avx512_perm_t<opc, OpcodeStr, VTInfo.info128,
+                               Idx.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_t<opc, OpcodeStr, VTInfo.info256,
+                               Idx.info256>, EVEX_V256;
+  }
+}
+
+defm VPERMT2D  : avx512_perm_t_sizes<0x7E, "vpermt2d",
+                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2Q  : avx512_perm_t_sizes<0x7E, "vpermt2q",
+                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+defm VPERMT2W  : avx512_perm_t_sizes_w<0x7D, "vpermt2w",
+                  avx512vl_i16_info, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps",
+                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd",
+                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - BLEND using mask
@@ -1265,41 +1401,85 @@ def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
 //===----------------------------------------------------------------------===//
 
 // avx512_cmp_scalar - AVX512 CMPSS and CMPSD
-multiclass avx512_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
-                            SDNode OpNode, ValueType VT,
-                            PatFrag ld_frag, string Suffix> {
-  def rr : AVX512Ii8<0xC2, MRMSrcReg,
-                (outs VK1:$dst), (ins RC:$src1, RC:$src2, AVXCC:$cc),
-                !strconcat("vcmp${cc}", Suffix,
-                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                [(set VK1:$dst, (OpNode (VT RC:$src1), RC:$src2, imm:$cc))],
-                IIC_SSE_ALU_F32S_RR>, EVEX_4V;
-  def rm : AVX512Ii8<0xC2, MRMSrcMem,
-                (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, AVXCC:$cc),
-                !strconcat("vcmp${cc}", Suffix,
-                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                [(set VK1:$dst, (OpNode (VT RC:$src1),
-                (ld_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+
+multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>{
+
+  defm  rr_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+                      (outs _.KRC:$dst),
+                      (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+                      "vcmp${cc}"#_.Suffix,
+                      "$src2, $src1", "$src1, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              imm:$cc)>, EVEX_4V;
+  let mayLoad = 1 in
+    defm  rm_Int  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
+                      (outs _.KRC:$dst),
+                      (ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
+                      "vcmp${cc}"#_.Suffix,
+                      "$src2, $src1", "$src1, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
+                          imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+  defm  rrb_Int  : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
+                     (outs _.KRC:$dst),
+                     (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
+                     "vcmp${cc}"#_.Suffix,
+                     "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+                     (OpNodeRnd (_.VT _.RC:$src1),
+                                (_.VT _.RC:$src2),
+                                imm:$cc,
+                                (i32 FROUND_NO_EXC))>, EVEX_4V, EVEX_B;
+  // Accept explicit immediate argument form instead of comparison code.
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
-    def rri_alt : AVX512Ii8<0xC2, MRMSrcReg,
-               (outs VK1:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc),
-               !strconcat("vcmp", Suffix,
-                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32S_RR>, EVEX_4V;
+    defm  rri_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+                        (outs VK1:$dst),
+                        (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                        "vcmp"#_.Suffix,
+                        "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V;
+    defm  rmi_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _,
+                        (outs _.KRC:$dst),
+                        (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc),
+                        "vcmp"#_.Suffix,
+                        "$cc, $src2, $src1", "$src1, $src2, $cc">,
+                        EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+
+    defm  rrb_alt  : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _,
+                       (outs _.KRC:$dst),
+                       (ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
+                       "vcmp"#_.Suffix,
+                       "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+                       EVEX_4V, EVEX_B;
+  }// let isAsmParserOnly = 1, hasSideEffects = 0
+
+  let isCodeGenOnly = 1 in {
+    def rr : AVX512Ii8<0xC2, MRMSrcReg,
+                (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
+                !strconcat("vcmp${cc}", _.Suffix,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+                                          _.FRC:$src2,
+                                          imm:$cc))],
+                IIC_SSE_ALU_F32S_RR>, EVEX_4V;
     let mayLoad = 1 in
-    def rmi_alt : AVX512Ii8<0xC2, MRMSrcMem,
-               (outs VK1:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc),
-               !strconcat("vcmp", Suffix,
-                          "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"),
-               [], IIC_SSE_ALU_F32P_RM>, EVEX_4V;
+      def rm : AVX512Ii8<0xC2, MRMSrcMem,
+                (outs _.KRC:$dst),
+                (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc),
+                !strconcat("vcmp${cc}", _.Suffix,
+                           "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                [(set _.KRC:$dst, (OpNode _.FRC:$src1,
+                                          (_.ScalarLdFrag addr:$src2),
+                                          imm:$cc))],
+                IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
   }
 }
 
 let Predicates = [HasAVX512] in {
-defm VCMPSSZ : avx512_cmp_scalar<FR32X, f32mem, X86cmpms, f32, loadf32, "ss">,
-                                 XS;
-defm VCMPSDZ : avx512_cmp_scalar<FR64X, f64mem, X86cmpms, f64, loadf64, "sd">,
-                                 XD, VEX_W;
+  defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd>,
+                                   AVX512XSIi8Base;
+  defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd>,
+                                   AVX512XDIi8Base, VEX_W;
 }
 
 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -1700,6 +1880,128 @@ def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
             (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
             imm:$cc), VK8)>;
 
+// ----------------------------------------------------------------
+// FPClass
+//handle fpclass instruction  mask =  op(reg_scalar,imm)
+//                                    op(mem_scalar,imm)
+multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                 X86VectorVTInfo _, Predicate prd> {
+  let Predicates = [prd] in {
+      def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2)))], NoItinerary>;
+      def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#
+                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, 
+                                      (OpNode (_.VT _.RC:$src1),
+                                      (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+    let mayLoad = 1, AddedComplexity = 20 in {
+      def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##
+                                "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,
+                            (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                    (i32 imm:$src2)))], NoItinerary>;
+      def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##
+                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, 
+                          (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                              (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+    }
+  }
+}
+
+//handle fpclass instruction mask = fpclass(reg_vec, reg_vec, imm)
+//                                  fpclass(reg_vec, mem_vec, imm)
+//                                  fpclass(reg_vec, broadcast(eltVt), imm)
+multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                 X86VectorVTInfo _, string mem, string broadcast>{
+  def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
+                                       (i32 imm:$src2)))], NoItinerary>;
+  def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix#
+                      "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, 
+                                       (OpNode (_.VT _.RC:$src1),
+                                       (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+  let mayLoad = 1 in {
+    def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##mem#
+                      "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+                      [(set _.KRC:$dst,(OpNode 
+                                       (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                       (i32 imm:$src2)))], NoItinerary>;
+    def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##mem#
+                      "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+                      [(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode 
+                                    (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                                    (i32 imm:$src2))))], NoItinerary>, EVEX_K;
+    def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+                                        _.BroadcastStr##", $dst | $dst, ${src1}"
+                                                    ##_.BroadcastStr##", $src2}",
+                      [(set _.KRC:$dst,(OpNode 
+                                       (_.VT (X86VBroadcast 
+                                             (_.ScalarLdFrag addr:$src1))),
+                                       (i32 imm:$src2)))], NoItinerary>,EVEX_B;
+    def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
+                      (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
+                            _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+                                                     _.BroadcastStr##", $src2}",
+                      [(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode 
+                                       (_.VT (X86VBroadcast 
+                                             (_.ScalarLdFrag addr:$src1))),
+                                       (i32 imm:$src2))))], NoItinerary>,
+                                                            EVEX_B, EVEX_K;
+  }
+}
+
+multiclass avx512_vector_fpclass_all<string OpcodeStr,
+            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd, 
+                                                              string broadcast>{
+  let Predicates = [prd] in {
+    defm Z    : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info512, "{z}", 
+                                      broadcast>, EVEX_V512;
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info128, "{x}",
+                                      broadcast>, EVEX_V128;
+    defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, _.info256, "{y}",
+                                      broadcast>, EVEX_V256;
+  }
+}
+
+multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
+             bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{
+  defm PS : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f32_info, opcVec, 
+                                      VecOpNode, prd, "{l}">, EVEX_CD8<32, CD8VF>;
+  defm PD : avx512_vector_fpclass_all<OpcodeStr,  avx512vl_f64_info, opcVec, 
+                                      VecOpNode, prd, "{q}">,EVEX_CD8<64, CD8VF> , VEX_W;
+  defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+                                      f32x_info, prd>, EVEX_CD8<32, CD8VT1>;
+  defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode,
+                                      f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+
+defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, X86Vfpclass,
+                                      X86Vfpclasss, HasDQI>, AVX512AIi8Base,EVEX;
+
 //-----------------------------------------------------------------
 // Mask register copy, including
 // - copy between mask registers
@@ -1786,6 +2088,11 @@ let Predicates = [HasDQI] in {
             (KMOVBmk addr:$dst, VK8:$src)>;
   def : Pat<(v8i1 (bitconvert (i8 (load addr:$src)))),
             (KMOVBkm addr:$src)>;
+
+  def : Pat<(store VK4:$src, addr:$dst),
+            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK4:$src, VK8))>;
+  def : Pat<(store VK2:$src, addr:$dst),
+            (KMOVBmk addr:$dst, (COPY_TO_REGCLASS VK2:$src, VK8))>;
 }
 let Predicates = [HasAVX512, NoDQI] in {
   def : Pat<(store (i8 (bitconvert (v8i1 VK8:$src))), addr:$dst),
@@ -1837,10 +2144,15 @@ let Predicates = [HasAVX512] in {
             (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
   def : Pat<(i32 (anyext VK1:$src)),
             (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16))>;
+
   def : Pat<(i8 (zext VK1:$src)),
             (EXTRACT_SUBREG
              (AND32ri (KMOVWrk
                        (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+  def : Pat<(i8 (anyext VK1:$src)),
+              (EXTRACT_SUBREG
+                (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_8bit)>;
+
   def : Pat<(i64 (zext VK1:$src)),
             (AND64ri8 (SUBREG_TO_REG (i64 0),
              (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
@@ -1848,17 +2160,19 @@ let Predicates = [HasAVX512] in {
             (EXTRACT_SUBREG
              (AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
               sub_16bit)>;
-  def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
-            (COPY_TO_REGCLASS VK1:$src, VK16)>;
-  def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
-            (COPY_TO_REGCLASS VK1:$src, VK8)>;
-}
-let Predicates = [HasBWI] in {
-  def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
-            (COPY_TO_REGCLASS VK1:$src, VK32)>;
-  def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
-            (COPY_TO_REGCLASS VK1:$src, VK64)>;
 }
+def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK16)>;
+def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK8)>;
+def : Pat<(v4i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK4)>;
+def : Pat<(v2i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK2)>;
+def : Pat<(v32i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK32)>;
+def : Pat<(v64i1 (scalar_to_vector VK1:$src)),
+          (COPY_TO_REGCLASS VK1:$src, VK64)>;
 
 
 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
@@ -1955,11 +2269,12 @@ multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr,
 }
 
 multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
-                               SDPatternOperator OpNode, bit IsCommutable> {
+                               SDPatternOperator OpNode, bit IsCommutable,
+                               Predicate prdW = HasAVX512> {
   defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode,
                              HasDQI, IsCommutable>, VEX_4V, VEX_L, PD;
   defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
-                             HasAVX512, IsCommutable>, VEX_4V, VEX_L, PS;
+                             prdW, IsCommutable>, VEX_4V, VEX_L, PS;
   defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
                              HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
   defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
@@ -1974,6 +2289,7 @@ defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,   1>;
 defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
 defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,  1>;
 defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
+defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,  1, HasDQI>;
 
 multiclass avx512_mask_binop_int<string IntName, string InstName> {
   let Predicates = [HasAVX512] in
@@ -2047,59 +2363,48 @@ def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
                              (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
 
 // Mask unpacking
-multiclass avx512_mask_unpck<bits<8> opc, string OpcodeStr,
-                           RegisterClass KRC> {
-  let Predicates = [HasAVX512] in
-    def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2),
-               !strconcat(OpcodeStr,
-                          "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
+                             RegisterClass KRCSrc, Predicate prd> {
+  let Predicates = [prd] in {
+    def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst),
+               (ins KRC:$src1, KRC:$src2),
+               "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+               VEX_4V, VEX_L;
+
+    def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)),
+              (!cast<Instruction>(NAME##rr)
+                        (COPY_TO_REGCLASS KRCSrc:$src2, KRC),
+                        (COPY_TO_REGCLASS KRCSrc:$src1, KRC))>;
+  }
 }
 
-multiclass avx512_mask_unpck_bw<bits<8> opc, string OpcodeStr> {
-  defm BW : avx512_mask_unpck<opc, !strconcat(OpcodeStr, "bw"), VK16>,
-                            VEX_4V, VEX_L, PD;
-}
-
-defm KUNPCK : avx512_mask_unpck_bw<0x4b, "kunpck">;
-def : Pat<(v16i1 (concat_vectors (v8i1 VK8:$src1), (v8i1 VK8:$src2))),
-          (KUNPCKBWrr (COPY_TO_REGCLASS VK8:$src2, VK16),
-                  (COPY_TO_REGCLASS VK8:$src1, VK16))>;
-
-
-multiclass avx512_mask_unpck_int<string IntName, string InstName> {
-  let Predicates = [HasAVX512] in
-    def : Pat<(!cast<Intrinsic>("int_x86_avx512_"##IntName##"_bw")
-                (i16 GR16:$src1), (i16 GR16:$src2)),
-              (COPY_TO_REGCLASS (!cast<Instruction>(InstName##"BWrr")
-              (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)),
-              (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>;
-}
-defm : avx512_mask_unpck_int<"kunpck",  "KUNPCK">;
+defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD;
+defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W;
 
 // Mask bit testing
 multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                            SDNode OpNode> {
-  let Predicates = [HasAVX512], Defs = [EFLAGS] in
+                              SDNode OpNode, Predicate prd> {
+  let Predicates = [prd], Defs = [EFLAGS] in
     def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2),
                !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
                [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>;
 }
 
-multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode> {
-  defm W : avx512_mask_testop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode>,
-                            VEX, PS;
-  let Predicates = [HasDQI] in
-  defm B : avx512_mask_testop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode>,
-                            VEX, PD;
-  let Predicates = [HasBWI] in {
-  defm Q : avx512_mask_testop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode>,
-                            VEX, PS, VEX_W;
-  defm D : avx512_mask_testop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
-                            VEX, PD, VEX_W;
-  }
+multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                Predicate prdW = HasAVX512> {
+  defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, HasDQI>,
+                                                                VEX, PD;
+  defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, prdW>,
+                                                                VEX, PS;
+  defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, HasBWI>,
+                                                                VEX, PS, VEX_W;
+  defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, HasBWI>,
+                                                                VEX, PD, VEX_W;
 }
 
 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
+defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
 
 // Mask shift
 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -2124,7 +2429,7 @@ multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
   let Predicates = [HasDQI] in
   defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode>,
                                VEX, TAPD;
-  }  
+  }
 }
 
 defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86vshli>;
@@ -2167,24 +2472,52 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))),
 def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))),
           (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>;
 
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))),
+          (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>;
+
+def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
+          (v16i1 (COPY_TO_REGCLASS (KSHIFTRDri VK32:$src, (i8 16)), VK16))>;
+
 def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))),
           (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>;
 
 def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
           (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
 
-let Predicates = [HasVLX] in {
-  def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
-            (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
-  def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
-            (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
-  def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
-            (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
-  def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
-            (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
-  def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
-            (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
-}
+def : Pat<(v4i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+          (v4i1 (COPY_TO_REGCLASS VK8:$src, VK4))>;
+
+def : Pat<(v2i1 (extract_subvector (v8i1 VK8:$src), (iPTR 0))),
+          (v2i1 (COPY_TO_REGCLASS VK8:$src, VK2))>;
+
+def : Pat<(v4i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+          (v4i1 (COPY_TO_REGCLASS VK2:$src, VK4))>;
+
+def : Pat<(v8i1 (insert_subvector undef, (v4i1 VK4:$src), (iPTR 0))),
+          (v8i1 (COPY_TO_REGCLASS VK4:$src, VK8))>;
+def : Pat<(v8i1 (insert_subvector undef, (v2i1 VK2:$src), (iPTR 0))),
+          (v8i1 (COPY_TO_REGCLASS VK2:$src, VK8))>;
+
+def : Pat<(v32i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+          (v32i1 (COPY_TO_REGCLASS VK2:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+          (v32i1 (COPY_TO_REGCLASS VK4:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+          (v32i1 (COPY_TO_REGCLASS VK8:$src, VK32))>;
+def : Pat<(v32i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+          (v32i1 (COPY_TO_REGCLASS VK16:$src, VK32))>;
+
+def : Pat<(v64i1 (insert_subvector undef, VK2:$src, (iPTR 0))),
+          (v64i1 (COPY_TO_REGCLASS VK2:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK4:$src, (iPTR 0))),
+          (v64i1 (COPY_TO_REGCLASS VK4:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK8:$src, (iPTR 0))),
+          (v64i1 (COPY_TO_REGCLASS VK8:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK16:$src, (iPTR 0))),
+          (v64i1 (COPY_TO_REGCLASS VK16:$src, VK64))>;
+def : Pat<(v64i1 (insert_subvector undef, VK32:$src, (iPTR 0))),
+          (v64i1 (COPY_TO_REGCLASS VK32:$src, VK64))>;
+
 
 def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
           (v8i1 (COPY_TO_REGCLASS
@@ -2304,23 +2637,21 @@ multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
 
 multiclass avx512_store<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                         PatFrag st_frag, PatFrag mstore> {
-  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
-  def rr_alt : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
-                        OpcodeStr # "\t{$src, $dst|$dst, $src}", [],
-                        _.ExeDomain>, EVEX;
-  let Constraints = "$src1 = $dst" in
-  def rrk_alt : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
-                         (ins _.RC:$src1, _.KRCWM:$mask, _.RC:$src2),
-                         OpcodeStr #
-                         "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}",
+
+  def rr_REV  : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
+                         OpcodeStr # ".s\t{$src, $dst|$dst, $src}",
+                         [], _.ExeDomain>, EVEX;
+  def rrk_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
+                         (ins _.KRCWM:$mask, _.RC:$src),
+                         OpcodeStr # ".s\t{$src, ${dst} {${mask}}|"#
+                         "${dst} {${mask}}, $src}",
                          [], _.ExeDomain>,  EVEX, EVEX_K;
-  def rrkz_alt : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
+  def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs  _.RC:$dst),
                           (ins _.KRCWM:$mask, _.RC:$src),
-                          OpcodeStr #
-                          "\t{$src, ${dst} {${mask}} {z}|" # 
+                          OpcodeStr # ".s\t{$src, ${dst} {${mask}} {z}|" #
                           "${dst} {${mask}} {z}, $src}",
                           [], _.ExeDomain>, EVEX, EVEX_KZ;
-  }
+
   let mayStore = 1 in {
   def mr : AVX512PI<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -2425,22 +2756,6 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src),
          (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
             VR512:$src)>;
 
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)),
-         (VMOVUPSZmrk addr:$ptr,
-         (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
-         (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
-         (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz 
-          (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-
-def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))),
-         (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk
-         (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm),
-          (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
-
 defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
                                        HasAVX512>,
                  avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info,
@@ -2502,17 +2817,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV),
                            (v16i32 VR512:$src))),
                   (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>;
 }
-// NoVLX patterns
-let Predicates = [HasAVX512, NoVLX] in {
-def: Pat<(masked_store addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)),
-         (VMOVDQU32Zmrk addr:$ptr,
-         (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)),
-         (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>;
-
-def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)),
-         (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz 
-          (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>;
-}
 
 // Move Int Doubleword to Packed Double Int
 //
@@ -2520,32 +2824,37 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set VR128X:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
-                        EVEX, VEX_LIG;
+                        EVEX;
 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set VR128X:$dst,
                         (v4i32 (scalar_to_vector (loadi32 addr:$src))))],
-                        IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                        IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
 def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
                       "vmovq\t{$src, $dst|$dst, $src}",
                         [(set VR128X:$dst,
                           (v2i64 (scalar_to_vector GR64:$src)))],
-                          IIC_SSE_MOVDQ>, EVEX, VEX_W, VEX_LIG;
+                          IIC_SSE_MOVDQ>, EVEX, VEX_W;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+                      (ins i64mem:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}", []>,
+                      EVEX, VEX_W, EVEX_CD8<64, CD8VT1>;
 let isCodeGenOnly = 1 in {
-def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
                        "vmovq\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (bitconvert GR64:$src))],
+                       [(set FR64X:$dst, (bitconvert GR64:$src))],
                        IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
-                         [(set GR64:$dst, (bitconvert FR64:$src))],
+                         [(set GR64:$dst, (bitconvert FR64X:$src))],
                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteMove]>;
-}
-def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
-                         [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
+                         [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
                          IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
                          EVEX_CD8<64, CD8VT1>;
+}
 
 // Move Int Doubleword to Single Scalar
 //
@@ -2553,27 +2862,27 @@ let isCodeGenOnly = 1 in {
 def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set FR32X:$dst, (bitconvert GR32:$src))],
-                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG;
+                      IIC_SSE_MOVDQ>, EVEX;
 
 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
-                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
 }
 
 // Move doubleword from xmm register to r/m32
 //
 def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
                        "vmovd\t{$src, $dst|$dst, $src}",
-                       [(set GR32:$dst, (vector_extract (v4i32 VR128X:$src),
+                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
-                       EVEX, VEX_LIG;
+                       EVEX;
 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128X:$src),
                        "vmovd\t{$src, $dst|$dst, $src}",
-                       [(store (i32 (vector_extract (v4i32 VR128X:$src),
+                       [(store (i32 (extractelt (v4i32 VR128X:$src),
                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
-                       EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                       EVEX, EVEX_CD8<32, CD8VT1>;
 
 // Move quadword from xmm1 register to r/m64
 //
@@ -2581,16 +2890,28 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
                       "vmovq\t{$src, $dst|$dst, $src}",
                       [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
                                                    (iPTR 0)))],
-                      IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_LIG, VEX_W,
+                      IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
                       Requires<[HasAVX512, In64BitMode]>;
 
-def VMOVPQIto64Zmr : I<0xD6, MRMDestMem, (outs),
-                       (ins i64mem:$dst, VR128X:$src),
-                       "vmovq\t{$src, $dst|$dst, $src}",
-                       [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
-                               addr:$dst)], IIC_SSE_MOVDQ>,
-                       EVEX, PD, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>,
-                       Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [], IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
+                      Requires<[HasAVX512, In64BitMode]>;
+
+def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
+                      (ins i64mem:$dst, VR128X:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
+                              addr:$dst)], IIC_SSE_MOVDQ>,
+                      EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
+                      Sched<[WriteStore]>, Requires<[HasAVX512, In64BitMode]>;
+
+let hasSideEffects = 0 in
+def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
+                             (ins VR128X:$src),
+                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
+                             EVEX, VEX_W;
 
 // Move Scalar Single to Double Int
 //
@@ -2599,92 +2920,95 @@ def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
                       (ins FR32X:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32X:$src))],
-                      IIC_SSE_MOVD_ToGP>, EVEX, VEX_LIG;
+                      IIC_SSE_MOVD_ToGP>, EVEX;
 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                       (ins i32mem:$dst, FR32X:$src),
                       "vmovd\t{$src, $dst|$dst, $src}",
                       [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>, EVEX, VEX_LIG, EVEX_CD8<32, CD8VT1>;
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
 }
 
 // Move Quadword Int to Packed Quadword Int
 //
-def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
                       (ins i64mem:$src),
                       "vmovq\t{$src, $dst|$dst, $src}",
                       [(set VR128X:$dst,
                         (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
-                      EVEX, VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  MOVSS, MOVSD
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_move_scalar <string asm, RegisterClass RC,
-                              SDNode OpNode, ValueType vt,
-                              X86MemOperand x86memop, PatFrag mem_pat> {
-  let hasSideEffects = 0 in {
-  def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
-              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-              [(set VR128X:$dst, (vt (OpNode VR128X:$src1,
-                                      (scalar_to_vector RC:$src2))))],
-              IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
-  let Constraints = "$src1 = $dst" in
-  def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
-              (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
-              !strconcat(asm,
-                "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
-              [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
-  def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-              [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
-              EVEX, VEX_LIG;
+multiclass avx512_move_scalar <string asm, SDNode OpNode, 
+                              X86VectorVTInfo _> {
+  defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), 
+                    (ins _.RC:$src1, _.RC:$src2),
+                    asm, "$src2, $src1","$src1, $src2", 
+                    (_.VT (OpNode (_.VT _.RC:$src1),
+                                   (_.VT _.RC:$src2))),
+                                   IIC_SSE_MOV_S_RR>, EVEX_4V;
+  let Constraints = "$src1 = $dst" , mayLoad = 1 in
+    defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
+                    (outs _.RC:$dst), 
+                    (ins _.ScalarMemOp:$src),
+                    asm,"$src","$src",
+                    (_.VT (OpNode (_.VT _.RC:$src1), 
+                               (_.VT (scalar_to_vector 
+                                     (_.ScalarLdFrag addr:$src)))))>, EVEX;
+  let isCodeGenOnly = 1 in {
+    def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), 
+               (ins _.RC:$src1, _.FRC:$src2),
+               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
+                                      (scalar_to_vector _.FRC:$src2))))],
+               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
+  let mayLoad = 1 in
+    def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+               [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+               _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+  }
   let mayStore = 1 in {
-  def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
-             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-             [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
-             EVEX, VEX_LIG;
-  def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
-             !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
-             [], IIC_SSE_MOV_S_MR>,
-             EVEX, VEX_LIG, EVEX_K;
+    def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
+               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+               [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
+               EVEX;
+    def mrk: AVX512PI<0x11, MRMDestMem, (outs), 
+                (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
+                !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
+                [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
   } // mayStore
-  } //hasSideEffects = 0
 }
 
-let ExeDomain = SSEPackedSingle in
-defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
-                                 loadf32>, XS, EVEX_CD8<32, CD8VT1>;
+defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
+                                  VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
 
-let ExeDomain = SSEPackedDouble in
-defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
-                                 loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
+                                  VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
-          (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
-           VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+          (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+           VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
 
 def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
-          (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
-           VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+          (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+           VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
 
 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
           (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),
            (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
 
-// For the disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
-  def VMOVSSZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
-                        (ins VR128X:$src1, FR32X:$src2),
-                        "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                        IIC_SSE_MOV_S_RR>,
-                        XS, EVEX_4V, VEX_LIG;
-  def VMOVSDZrr_REV : SI<0x11, MRMDestReg, (outs VR128X:$dst),
-                        (ins VR128X:$src1, FR64X:$src2),
-                        "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}", [],
-                        IIC_SSE_MOV_S_RR>,
-                        XD, EVEX_4V, VEX_LIG, VEX_W;
-}
+defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info,
+                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+                           "vmovss.s", "$src2, $src1", "$src1, $src2", []>,
+                           XS, EVEX_4V, VEX_LIG;
+
+defm VMOVSSDrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info,
+                           (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2),
+                           "vmovsd.s", "$src2, $src1", "$src1, $src2", []>,
+                           XD, EVEX_4V, VEX_LIG, VEX_W;
 
 let Predicates = [HasAVX512] in {
   let AddedComplexity = 15 in {
@@ -2768,10 +3092,10 @@ let Predicates = [HasAVX512] in {
                        (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
 
   // Extract and store.
-  def : Pat<(store (f32 (vector_extract (v4f32 VR128X:$src), (iPTR 0))),
+  def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
                    addr:$dst),
             (VMOVSSZmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128X:$src), FR32X))>;
-  def : Pat<(store (f64 (vector_extract (v2f64 VR128X:$src), (iPTR 0))),
+  def : Pat<(store (f64 (extractelt (v2f64 VR128X:$src), (iPTR 0))),
                    addr:$dst),
             (VMOVSDZmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128X:$src), FR64X))>;
 
@@ -2835,7 +3159,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
                                                    (v2i64 VR128X:$src))))],
                                 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
 
-let AddedComplexity = 20 in
+let AddedComplexity = 20 , isCodeGenOnly = 1 in
 def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
                                  (ins i128mem:$src),
                                  "vmovq\t{$src, $dst|$dst, $src}",
@@ -2964,7 +3288,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            X86VectorVTInfo _, OpndItins itins,
                            bit IsCommutable = 0> {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                     "$src2, $src1", "$src1, $src2",
                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
                     itins.rr, IsCommutable>,
@@ -2972,7 +3296,7 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
   let mayLoad = 1 in
     defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                     "$src2, $src1", "$src1, $src2",
                     (_.VT (OpNode _.RC:$src1,
                                   (bitconvert (_.LdFrag addr:$src2)))),
@@ -2986,7 +3310,7 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
            avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
   let mayLoad = 1 in
     defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+                    (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
                     "${src2}"##_.BroadcastStr##", $src1",
                     "$src1, ${src2}"##_.BroadcastStr,
                     (_.VT (OpNode _.RC:$src1,
@@ -3058,20 +3382,20 @@ multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
 multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
                                  SDNode OpNode, OpndItins itins, Predicate prd,
                                  bit IsCommutable = 0> {
-  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr, OpNode, itins, prd,
+  defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
                                    IsCommutable>;
 
-  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr, OpNode, itins, prd,
+  defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
                                    IsCommutable>;
 }
 
 multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
                                  SDNode OpNode, OpndItins itins, Predicate prd,
                                  bit IsCommutable = 0> {
-  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr, OpNode, itins, prd,
+  defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd,
                                    IsCommutable>;
 
-  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr, OpNode, itins, prd,
+  defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd,
                                    IsCommutable>;
 }
 
@@ -3086,15 +3410,15 @@ multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
 }
 
 multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
-                            SDNode OpNode,X86VectorVTInfo _Src, 
+                            SDNode OpNode,X86VectorVTInfo _Src,
                             X86VectorVTInfo _Dst, bit IsCommutable = 0> {
-  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
-                            "$src2, $src1","$src1, $src2", 
-                            (_Dst.VT (OpNode 
-                                         (_Src.VT _Src.RC:$src1), 
+                            "$src2, $src1","$src1, $src2",
+                            (_Dst.VT (OpNode
+                                         (_Src.VT _Src.RC:$src1),
                                          (_Src.VT _Src.RC:$src2))),
-                            itins.rr, IsCommutable>, 
+                            itins.rr, IsCommutable>,
                             AVX512BIBase, EVEX_4V;
   let mayLoad = 1 in {
       defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
@@ -3106,12 +3430,12 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
                             AVX512BIBase, EVEX_4V;
 
       defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
-                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2), 
+                        (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
                         OpcodeStr,
                         "${src2}"##_Dst.BroadcastStr##", $src1",
                          "$src1, ${src2}"##_Dst.BroadcastStr,
-                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 
-                                     (_Dst.VT (X86VBroadcast 
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+                                     (_Dst.VT (X86VBroadcast
                                               (_Dst.ScalarLdFrag addr:$src2)))))),
                         itins.rm>,
                         AVX512BIBase, EVEX_4V, EVEX_B;
@@ -3127,24 +3451,24 @@ defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds,
 defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs,
                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
 defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus,
-                                    SSE_INTALU_ITINS_P, HasBWI, 1>;
+                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
 defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus,
-                                    SSE_INTALU_ITINS_P, HasBWI, 0>;
-defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmull", mul,
-                                   SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmull", mul,
-                                   SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmull", mul,
-                                   SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
-defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulh", mulhs, SSE_INTALU_ITINS_P,
-                                    HasBWI, 1>;
-defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhu", mulhu, SSE_INTMUL_ITINS_P, 
-                                    HasBWI, 1>;
-defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrs", X86mulhrs, SSE_INTMUL_ITINS_P, 
-                                    HasBWI, 1>, T8PD;
-defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+                                     SSE_INTALU_ITINS_P, HasBWI, 0>;
+defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul,
+                                    SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul,
                                     SSE_INTALU_ITINS_P, HasBWI, 1>;
-                                   
+defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul,
+                                    SSE_INTALU_ITINS_P, HasDQI, 1>, T8PD;
+defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTALU_ITINS_P,
+                                    HasBWI, 1>;
+defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P,
+                                     HasBWI, 1>;
+defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P,
+                                      HasBWI, 1>, T8PD;
+defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg,
+                                   SSE_INTALU_ITINS_P, HasBWI, 1>;
+
 multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
                             SDNode OpNode, bit IsCommutable = 0> {
 
@@ -3159,7 +3483,7 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins,
                                       v4i32x_info, v2i64x_info, IsCommutable>,
                                      EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
   }
-}                            
+}
 
 defm VPMULDQ : avx512_binop_all<0x28, "vpmuldq", SSE_INTALU_ITINS_P,
                    X86pmuldq, 1>,T8PD;
@@ -3170,25 +3494,25 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _Src, X86VectorVTInfo _Dst> {
   let mayLoad = 1 in {
       defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
-                        (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), 
+                        (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2),
                         OpcodeStr,
                         "${src2}"##_Src.BroadcastStr##", $src1",
                          "$src1, ${src2}"##_Src.BroadcastStr,
-                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert 
-                                     (_Src.VT (X86VBroadcast 
+                        (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
+                                     (_Src.VT (X86VBroadcast
                                               (_Src.ScalarLdFrag addr:$src2))))))>,
                         EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>;
   }
 }
 
-multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, 
-                            SDNode OpNode,X86VectorVTInfo _Src, 
+multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
+                            SDNode OpNode,X86VectorVTInfo _Src,
                             X86VectorVTInfo _Dst> {
-  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), 
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
-                            "$src2, $src1","$src1, $src2", 
-                            (_Dst.VT (OpNode 
-                                         (_Src.VT _Src.RC:$src1), 
+                            "$src2, $src1","$src1, $src2",
+                            (_Dst.VT (OpNode
+                                         (_Src.VT _Src.RC:$src1),
                                          (_Src.VT _Src.RC:$src2)))>,
                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
   let mayLoad = 1 in {
@@ -3229,102 +3553,59 @@ multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
                                     v16i8x_info>, EVEX_V128;
   }
 }
+
+multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
+                            SDNode OpNode, AVX512VLVectorVTInfo _Src,
+                            AVX512VLVectorVTInfo _Dst> {
+  defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
+                                _Dst.info512>, EVEX_V512;
+  let Predicates = [HasVLX] in {
+    defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
+                                     _Dst.info256>, EVEX_V256;
+    defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
+                                     _Dst.info128>, EVEX_V128;
+  }
+}
+
 let Predicates = [HasBWI] in {
   defm VPACKSSDW : avx512_packs_all_i32_i16<0x6B, "vpackssdw", X86Packss>, PD;
   defm VPACKUSDW : avx512_packs_all_i32_i16<0x2b, "vpackusdw", X86Packus>, T8PD;
   defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512BIBase, VEX_W;
   defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase, VEX_W;
+
+  defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
+                       avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
+  defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
+                       avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
 }
 
-defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxs", smax,
+defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxs", smax,
+defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
 defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxu", umax,
+defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxu", umax,
+defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
 defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpmins", smin,
+defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
-defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpmins", smin,
+defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
 defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
 
-defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminu", umin,
+defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>;
-defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminu", umin,
+defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
-
-//===----------------------------------------------------------------------===//
-// AVX-512 - Unpack Instructions
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_unpack_fp<bits<8> opc, SDNode OpNode, ValueType vt,
-                                   PatFrag mem_frag, RegisterClass RC,
-                                   X86MemOperand x86memop, string asm,
-                                   Domain d> {
-    def rr : AVX512PI<opc, MRMSrcReg,
-                (outs RC:$dst), (ins RC:$src1, RC:$src2),
-                asm, [(set RC:$dst,
-                           (vt (OpNode RC:$src1, RC:$src2)))],
-                           d>, EVEX_4V;
-    def rm : AVX512PI<opc, MRMSrcMem,
-                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
-                asm, [(set RC:$dst,
-                       (vt (OpNode RC:$src1,
-                            (bitconvert (mem_frag addr:$src2)))))],
-                        d>, EVEX_4V;
-}
-
-defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64,
-      VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64,
-      VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64,
-      VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64,
-      VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-      SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-multiclass avx512_unpack_int<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                        ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
-                        X86MemOperand x86memop> {
-  def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, RC:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1), (OpVT RC:$src2))))],
-       IIC_SSE_UNPCK>, EVEX_4V;
-  def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, x86memop:$src2),
-       !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-       [(set RC:$dst, (OpVT (OpNode (OpVT RC:$src1),
-                                     (bitconvert (memop_frag addr:$src2)))))],
-                                     IIC_SSE_UNPCK>, EVEX_4V;
-}
-defm VPUNPCKLDQZ  : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32,
-                                VR512, loadv16i32, i512mem>, EVEX_V512,
-                                EVEX_CD8<32, CD8VF>;
-defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64,
-                                VR512, loadv8i64, i512mem>, EVEX_V512,
-                                VEX_W, EVEX_CD8<64, CD8VF>;
-defm VPUNPCKHDQZ  : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32,
-                                VR512, loadv16i32, i512mem>, EVEX_V512,
-                                EVEX_CD8<32, CD8VF>;
-defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64,
-                                VR512, loadv8i64, i512mem>, EVEX_V512,
-                                VEX_W, EVEX_CD8<64, CD8VF>;
 //===----------------------------------------------------------------------===//
 // AVX-512  Logical Instructions
 //===----------------------------------------------------------------------===//
@@ -3362,12 +3643,12 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   let isCodeGenOnly = 1, isCommutable = IsCommutable,
       Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
-                         (ins _.FRC:$src1, _.FRC:$src2), 
+                         (ins _.FRC:$src1, _.FRC:$src2),
                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
                           itins.rr>;
   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
-                         (ins _.FRC:$src1, _.ScalarMemOp:$src2), 
+                         (ins _.FRC:$src1, _.ScalarMemOp:$src2),
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))], itins.rr>;
@@ -3375,7 +3656,7 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
 }
 
 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
-                         SDNode VecNode, OpndItins itins, bit IsCommutable> {
+                         SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
 
   defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
@@ -3470,7 +3751,7 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
                   EVEX_4V, EVEX_B;
 }
 
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              bit IsCommutable = 0> {
   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
                               IsCommutable>, EVEX_V512, PS,
@@ -3514,7 +3795,7 @@ defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, 1>,
             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
 defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, 1>,
             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>, 
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub>,
             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
 defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv>,
             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
@@ -3550,13 +3831,34 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
   }//let mayLoad = 1
 }
 
-multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
-  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>, 
+multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
+                  "$src2, $src1", "$src1, $src2",
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>;
+  let mayLoad = 1 in {
+    defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+                    "$src2, $src1", "$src1, $src2",
+                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>;
+  }//let mayLoad = 1
+}
+
+multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> {
+  defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,
              avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,
                               EVEX_V512, EVEX_CD8<32, CD8VF>;
-  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>, 
+  defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,
              avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,
                               EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>,
+                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>,
+                              EVEX_4V,EVEX_CD8<32, CD8VT1>;
+  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>,
+                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>,
+                              EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+
   // Define only if AVX512VL feature is present.
   let Predicates = [HasVLX] in {
     defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v4f32x_info>,
@@ -3569,7 +3871,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
                                    EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
   }
 }
-defm VSCALEF : avx512_fp_scalef_all<0x2C, "vscalef", X86scalef>, T8PD;
+defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  VPTESTM instructions
@@ -3586,7 +3888,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
   defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst),
                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
                        "$src2, $src1", "$src1, $src2",
-                   (OpNode (_.VT _.RC:$src1), 
+                   (OpNode (_.VT _.RC:$src1),
                     (_.VT (bitconvert (_.LdFrag addr:$src2))))>,
                     EVEX_4V,
                    EVEX_CD8<_.EltSize, CD8VF>;
@@ -3748,12 +4050,12 @@ multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM,
                               VTInfo.info256>, EVEX_V256;
   defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode,
                               VTInfo.info128>,
-             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, 
+             avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode,
                               VTInfo.info128>, EVEX_V128;
   }
 }
 
-multiclass avx512_shift_rmi_w<bits<8> opcw, 
+multiclass avx512_shift_rmi_w<bits<8> opcw,
                                  Format ImmFormR, Format ImmFormM,
                                  string OpcodeStr, SDNode OpNode> {
   let Predicates = [HasBWI] in
@@ -3846,6 +4148,27 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
                                  avx512vl_i64_info>, VEX_W;
 }
 
+// Use 512bit version to implement 128/256 bit in case NoVLX.  
+multiclass avx512_var_shift_w_lowering<AVX512VLVectorVTInfo _, SDNode OpNode> {
+  let Predicates = [HasBWI, NoVLX] in {
+  def : Pat<(_.info256.VT (OpNode (_.info256.VT _.info256.RC:$src1), 
+                                  (_.info256.VT _.info256.RC:$src2))),
+            (EXTRACT_SUBREG                
+                (!cast<Instruction>(NAME#"WZrr")
+                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
+                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
+             sub_ymm)>;
+
+  def : Pat<(_.info128.VT (OpNode (_.info128.VT _.info128.RC:$src1), 
+                                  (_.info128.VT _.info128.RC:$src2))),
+            (EXTRACT_SUBREG                
+                (!cast<Instruction>(NAME#"WZrr")
+                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+                    (INSERT_SUBREG (_.info512.VT (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
+             sub_xmm)>;
+  }
+}
+
 multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
                                  SDNode OpNode> {
   let Predicates = [HasBWI] in
@@ -3861,11 +4184,14 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
 }
 
 defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
-              avx512_var_shift_w<0x12, "vpsllvw", shl>;
+              avx512_var_shift_w<0x12, "vpsllvw", shl>,
+              avx512_var_shift_w_lowering<avx512vl_i16_info, shl>;
 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
-              avx512_var_shift_w<0x11, "vpsravw", sra>;
+              avx512_var_shift_w<0x11, "vpsravw", sra>,
+              avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
-              avx512_var_shift_w<0x10, "vpsrlvw", srl>;
+              avx512_var_shift_w<0x10, "vpsrlvw", srl>,
+              avx512_var_shift_w_lowering<avx512vl_i16_info, srl>;
 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
 
@@ -3916,19 +4242,77 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
 defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
                              X86VPermi, avx512vl_f64_info>,
                              EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - VPERMIL 
+//===----------------------------------------------------------------------===//
 
+multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr,  SDNode OpNode,
+                             X86VectorVTInfo _, X86VectorVTInfo Ctrl> {
+  defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr,
+                  "$src2, $src1", "$src1, $src2",
+                  (_.VT (OpNode _.RC:$src1,
+                               (Ctrl.VT Ctrl.RC:$src2)))>,
+                  T8PD, EVEX_4V;
+  let mayLoad = 1 in {
+    defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr,
+                    "$src2, $src1", "$src1, $src2",
+                    (_.VT (OpNode
+                             _.RC:$src1,
+                             (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>,
+                    T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+    defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst),
+                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+                     "${src2}"##_.BroadcastStr##", $src1",
+                     "$src1, ${src2}"##_.BroadcastStr,
+                     (_.VT (OpNode
+                              _.RC:$src1,
+                              (Ctrl.VT (X86VBroadcast
+                                         (Ctrl.ScalarLdFrag addr:$src2)))))>,
+                     T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>;
+  }//let mayLoad = 1
+}
+
+multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar,
+                             AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+  let Predicates = [HasAVX512] in {
+    defm Z    : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512,
+                                  Ctrl.info512>, EVEX_V512;
+  }
+  let Predicates = [HasAVX512, HasVLX] in {
+    defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128,
+                                  Ctrl.info128>, EVEX_V128;
+    defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256,
+                                  Ctrl.info256>, EVEX_V256;
+  }
+}
+
+multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
+                         AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{
+
+  defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>;
+  defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr,
+                                    X86VPermilpi, _>,
+                    EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
+}
+
+defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
+                               avx512vl_i32_info>;
+defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
+                               avx512vl_i64_info>, VEX_W;
 //===----------------------------------------------------------------------===//
 // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW
 //===----------------------------------------------------------------------===//
 
 defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd",
-                             X86PShufd, avx512vl_i32_info>, 
+                             X86PShufd, avx512vl_i32_info>,
                              EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>;
 defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw",
-                                  X86PShufhw>, EVEX, AVX512XSIi8Base, VEX_W;
+                                  X86PShufhw>, EVEX, AVX512XSIi8Base;
 defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw",
-                                  X86PShuflw>, EVEX, AVX512XDIi8Base, VEX_W;
-                                  
+                                  X86PShuflw>, EVEX, AVX512XDIi8Base;
+
 multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
   let Predicates = [HasBWI] in
   defm Z:    avx512_var_shift<opc, OpcodeStr, OpNode, v64i8_info>, EVEX_V512;
@@ -3941,55 +4325,6 @@ multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 
 defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb>;
 
-//===----------------------------------------------------------------------===//
-// AVX-512 - MOVDDUP
-//===----------------------------------------------------------------------===//
-
-multiclass avx512_movddup<string OpcodeStr, RegisterClass RC, ValueType VT,
-                        X86MemOperand x86memop, PatFrag memop_frag> {
-def rr  : AVX512PDI<0x12, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set RC:$dst, (VT (X86Movddup RC:$src)))]>, EVEX;
-def rm  : AVX512PDI<0x12, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set RC:$dst,
-                      (VT (X86Movddup (memop_frag addr:$src))))]>, EVEX;
-}
-
-defm VMOVDDUPZ : avx512_movddup<"vmovddup", VR512, v8f64, f512mem, loadv8f64>,
-                 VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-def : Pat<(X86Movddup (v8f64 (scalar_to_vector (loadf64 addr:$src)))),
-          (VMOVDDUPZrm addr:$src)>;
-
-//===---------------------------------------------------------------------===//
-// Replicate Single FP - MOVSHDUP and MOVSLDUP
-//===---------------------------------------------------------------------===//
-multiclass avx512_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr,
-                              ValueType vt, RegisterClass RC, PatFrag mem_frag,
-                              X86MemOperand x86memop> {
-  def rr : AVX512XSI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(set RC:$dst, (vt (OpNode RC:$src)))]>, EVEX;
-  let mayLoad = 1 in
-  def rm : AVX512XSI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                      [(set RC:$dst, (OpNode (mem_frag addr:$src)))]>, EVEX;
-}
-
-defm VMOVSHDUPZ  : avx512_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
-                       v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
-                       EVEX_CD8<32, CD8VF>;
-defm VMOVSLDUPZ  : avx512_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
-                       v16f32, VR512, loadv16f32, f512mem>, EVEX_V512,
-                       EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16i32 (X86Movshdup VR512:$src)), (VMOVSHDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movshdup (loadv16i32 addr:$src))),
-           (VMOVSHDUPZrm addr:$src)>;
-def : Pat<(v16i32 (X86Movsldup VR512:$src)), (VMOVSLDUPZrr VR512:$src)>;
-def : Pat<(v16i32 (X86Movsldup (loadv16i32 addr:$src))),
-           (VMOVSLDUPZrm addr:$src)>;
-
 //===----------------------------------------------------------------------===//
 // Move Low to High and High to Low packed FP Instructions
 //===----------------------------------------------------------------------===//
@@ -4016,6 +4351,115 @@ let Predicates = [HasAVX512] in {
             (VMOVHLPSZrr VR128X:$src1, VR128X:$src2)>;
 }
 
+//===----------------------------------------------------------------------===//
+// VMOVHPS/PD VMOVLPS Instructions
+// All patterns was taken from SSS implementation.
+//===----------------------------------------------------------------------===//
+multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  X86VectorVTInfo _> {
+  let mayLoad = 1 in
+    def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
+                    (ins _.RC:$src1, f64mem:$src2),
+                    !strconcat(OpcodeStr,
+                               "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set _.RC:$dst,
+                       (OpNode _.RC:$src1,
+                         (_.VT (bitconvert
+                           (v2f64 (scalar_to_vector (loadf64 addr:$src2)))))))],
+                    IIC_SSE_MOV_LH>, EVEX_4V;
+}
+
+defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
+                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Movlhpd,
+                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
+                                  v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
+defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
+                                  v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+
+let Predicates = [HasAVX512] in {
+  // VMOVHPS patterns
+  def : Pat<(X86Movlhps VR128X:$src1,
+               (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
+          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(X86Movlhps VR128X:$src1,
+               (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (VMOVHPSZ128rm VR128X:$src1, addr:$src2)>;
+  // VMOVHPD patterns
+  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
+                    (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
+           (VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
+  // VMOVLPS patterns
+  def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(v4i32 (X86Movlps VR128X:$src1, (load addr:$src2))),
+          (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
+  // VMOVLPD patterns
+  def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(v2i64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
+          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+  def : Pat<(v2f64 (X86Movsd VR128X:$src1,
+                           (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
+}
+
+let mayStore = 1 in {
+def VMOVHPSZ128mr : AVX512PSI<0x17, MRMDestMem, (outs),
+                       (ins f64mem:$dst, VR128X:$src),
+                       "vmovhps\t{$src, $dst|$dst, $src}",
+                       [(store (f64 (vector_extract
+                                     (X86Unpckh (bc_v2f64 (v4f32 VR128X:$src)),
+                                                (bc_v2f64 (v4f32 VR128X:$src))),
+                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+                       EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
+                       (ins f64mem:$dst, VR128X:$src),
+                       "vmovhpd\t{$src, $dst|$dst, $src}",
+                       [(store (f64 (vector_extract
+                                     (v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
+                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>,
+                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
+                       (ins f64mem:$dst, VR128X:$src),
+                       "vmovlps\t{$src, $dst|$dst, $src}",
+                       [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128X:$src)),
+                                     (iPTR 0))), addr:$dst)],
+                                     IIC_SSE_MOV_LH>,
+                       EVEX, EVEX_CD8<32, CD8VT2>;
+def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
+                       (ins f64mem:$dst, VR128X:$src),
+                       "vmovlpd\t{$src, $dst|$dst, $src}",
+                       [(store (f64 (vector_extract (v2f64 VR128X:$src),
+                                     (iPTR 0))), addr:$dst)],
+                                     IIC_SSE_MOV_LH>,
+                       EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+}
+let Predicates = [HasAVX512] in {
+  // VMOVHPD patterns
+  def : Pat<(store (f64 (vector_extract
+                           (v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
+                           (iPTR 0))), addr:$dst),
+           (VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
+  // VMOVLPS patterns
+  def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
+                   addr:$src1),
+            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+  def : Pat<(store (v4i32 (X86Movlps
+                   (bc_v4i32 (loadv2i64 addr:$src1)), VR128X:$src2)), addr:$src1),
+            (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
+  // VMOVLPD patterns
+  def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+                   addr:$src1),
+            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+  def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
+                   addr:$src1),
+            (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
+}
 //===----------------------------------------------------------------------===//
 // FMA - Fused Multiply Operations
 //
@@ -4034,7 +4478,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
             (ins _.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
             (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
-            AVX512FMA3Base; 
+            AVX512FMA3Base;
 
     defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -4435,50 +4879,55 @@ def : Pat<(f64 (uint_to_fp GR64:$src)),
 //===----------------------------------------------------------------------===//
 // AVX-512  Scalar convert from float/double to integer
 //===----------------------------------------------------------------------===//
-multiclass avx512_cvt_s_int<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                          Intrinsic Int, Operand memop, ComplexPattern mem_cpat,
-                          string asm> {
-let hasSideEffects = 0 in {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG,
-              Requires<[HasAVX512]>;
-  let mayLoad = 1 in
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG,
-              Requires<[HasAVX512]>;
-} // hasSideEffects = 0
+multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC, 
+                                  RegisterClass DstRC, Intrinsic Int,
+                           Operand memop, ComplexPattern mem_cpat, string asm> {
+  let hasSideEffects = 0, Predicates = [HasAVX512] in {
+    def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG;
+    def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc),
+                !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>, 
+                EVEX, VEX_LIG, EVEX_B, EVEX_RC;
+    let mayLoad = 1 in
+    def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG;
+  } // hasSideEffects = 0, Predicates = [HasAVX512] 
 }
-let Predicates = [HasAVX512] in {
+
 // Convert float/double to signed/unsigned int 32/64
-defm VCVTSS2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
+defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si,
                                    ssmem, sse_load_f32, "cvtss2si">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse_cvtss2si64,
+defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, 
+                                  int_x86_sse_cvtss2si64,
                                    ssmem, sse_load_f32, "cvtss2si">,
                                    XS, VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtss2usi,
+defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, 
+                                  int_x86_avx512_cvtss2usi,
                                    ssmem, sse_load_f32, "cvtss2usi">,
                                    XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTSS2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
                                    int_x86_avx512_cvtss2usi64, ssmem,
                                    sse_load_f32, "cvtss2usi">, XS, VEX_W,
                                    EVEX_CD8<32, CD8VT1>;
-defm VCVTSD2SIZ:    avx512_cvt_s_int<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
+defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si,
                                    sdmem, sse_load_f64, "cvtsd2si">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2SI64Z:  avx512_cvt_s_int<0x2D, VR128X, GR64, int_x86_sse2_cvtsd2si64,
+defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, 
+                                   int_x86_sse2_cvtsd2si64,
                                    sdmem, sse_load_f64, "cvtsd2si">,
                                    XD, VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USIZ:   avx512_cvt_s_int<0x79, VR128X, GR32, int_x86_avx512_cvtsd2usi,
+defm VCVTSD2USIZ:   avx512_cvt_s_int_round<0x79, VR128X, GR32, 
+                                   int_x86_avx512_cvtsd2usi,
                                    sdmem, sse_load_f64, "cvtsd2usi">,
                                    XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTSD2USI64Z: avx512_cvt_s_int<0x79, VR128X, GR64,
+defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64,
                                    int_x86_avx512_cvtsd2usi64, sdmem,
                                    sse_load_f64, "cvtsd2usi">, XD, VEX_W,
                                    EVEX_CD8<64, CD8VT1>;
 
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
   defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
             SSE_CVT_Scalar, 0>, XS, EVEX_4V;
@@ -4495,121 +4944,170 @@ let isCodeGenOnly = 1 in {
   defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
             int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
             SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-} // isCodeGenOnly = 1
+} // isCodeGenOnly = 1, Predicates = [HasAVX512]
 
 // Convert float/double to signed/unsigned int 32/64 with truncation
-let isCodeGenOnly = 1 in {
-  defm Int_VCVTTSS2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse_cvttss2si,
-                                     ssmem, sse_load_f32, "cvttss2si">,
-                                     XS, EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSS2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
-                                     int_x86_sse_cvttss2si64, ssmem, sse_load_f32,
-                                     "cvttss2si">, XS, VEX_W,
-                                     EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSD2SIZ : avx512_cvt_s_int<0x2C, VR128X, GR32, int_x86_sse2_cvttsd2si,
-                                     sdmem, sse_load_f64, "cvttsd2si">, XD,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSD2SI64Z : avx512_cvt_s_int<0x2C, VR128X, GR64,
-                                     int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64,
-                                     "cvttsd2si">, XD, VEX_W,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSS2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
-                                     int_x86_avx512_cvttss2usi, ssmem, sse_load_f32,
-                                     "cvttss2usi">, XS, EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSS2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
-                                     int_x86_avx512_cvttss2usi64, ssmem,
-                                     sse_load_f32, "cvttss2usi">, XS, VEX_W,
-                                     EVEX_CD8<32, CD8VT1>;
-  defm Int_VCVTTSD2USIZ : avx512_cvt_s_int<0x78, VR128X, GR32,
-                                     int_x86_avx512_cvttsd2usi,
-                                     sdmem, sse_load_f64, "cvttsd2usi">, XD,
-                                     EVEX_CD8<64, CD8VT1>;
-  defm Int_VCVTTSD2USI64Z : avx512_cvt_s_int<0x78, VR128X, GR64,
-                                     int_x86_avx512_cvttsd2usi64, sdmem,
-                                     sse_load_f64, "cvttsd2usi">, XD, VEX_W,
-                                     EVEX_CD8<64, CD8VT1>;
-} // isCodeGenOnly = 1
+multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, 
+                            X86VectorVTInfo _DstRC, SDNode OpNode, 
+                            SDNode OpNodeRnd>{
+let Predicates = [HasAVX512] in {
+  def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+              [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
+  def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+                []>, EVEX, EVEX_B;
+  def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.MemOp:$src),
+              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+              [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, 
+              EVEX;
 
-multiclass avx512_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
-                         string asm> {
-  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpNode SrcRC:$src))]>, EVEX;
-  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
-              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, EVEX;
+  let isCodeGenOnly = 1,hasSideEffects = 0 in {
+      def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+               [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src,
+                                     (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
+      def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+                [(set _DstRC.RC:$dst, (OpNodeRnd _SrcRC.RC:$src, 
+                                      (i32 FROUND_NO_EXC)))]>, 
+                                      EVEX,VEX_LIG , EVEX_B;
+      let mayLoad = 1 in
+        def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), 
+                    (ins _SrcRC.MemOp:$src),
+                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                    []>, EVEX, VEX_LIG;
+
+  } // isCodeGenOnly = 1, hasSideEffects = 0
+} //HasAVX512
 }
 
-defm VCVTTSS2SIZ    : avx512_cvt_s<0x2C, FR32X, GR32, fp_to_sint, f32mem,
-                                  loadf32, "cvttss2si">, XS,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USIZ   : avx512_cvt_s<0x78, FR32X, GR32, fp_to_uint, f32mem,
-                                  loadf32, "cvttss2usi">, XS,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64Z  : avx512_cvt_s<0x2C, FR32X, GR64, fp_to_sint, f32mem,
-                                  loadf32, "cvttss2si">, XS, VEX_W,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z : avx512_cvt_s<0x78, FR32X, GR64, fp_to_uint, f32mem,
-                                  loadf32, "cvttss2usi">, XS, VEX_W,
-                                  EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIZ    : avx512_cvt_s<0x2C, FR64X, GR32, fp_to_sint, f64mem,
-                                  loadf64, "cvttsd2si">, XD,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USIZ   : avx512_cvt_s<0x78, FR64X, GR32, fp_to_uint, f64mem,
-                                  loadf64, "cvttsd2usi">, XD,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64Z  : avx512_cvt_s<0x2C, FR64X, GR64, fp_to_sint, f64mem,
-                                  loadf64, "cvttsd2si">, XD, VEX_W,
-                                  EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z : avx512_cvt_s<0x78, FR64X, GR64, fp_to_uint, f64mem,
-                                  loadf64, "cvttsd2usi">, XD, VEX_W,
-                                  EVEX_CD8<64, CD8VT1>;
+
+defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info, 
+                        fp_to_sint,X86cvttss2IntRnd>, 
+                        XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info, 
+                        fp_to_sint,X86cvttss2IntRnd>, 
+                        VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info, 
+                        fp_to_sint,X86cvttsd2IntRnd>,
+                        XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info, 
+                        fp_to_sint,X86cvttsd2IntRnd>, 
+                        VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info, 
+                        fp_to_uint,X86cvttss2UIntRnd>, 
+                        XS, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info, 
+                        fp_to_uint,X86cvttss2UIntRnd>, 
+                        XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info, 
+                        fp_to_uint,X86cvttsd2UIntRnd>, 
+                        XD, EVEX_CD8<64, CD8VT1>;
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info, 
+                        fp_to_uint,X86cvttsd2UIntRnd>, 
+                        XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+let Predicates = [HasAVX512] in {
+  def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
+            (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
+            (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+  def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
+            (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
+            (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+
 } // HasAVX512
 //===----------------------------------------------------------------------===//
 // AVX-512  Convert form float to double and back
 //===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in {
-def VCVTSS2SDZrr : AVX512XSI<0x5A, MRMSrcReg, (outs FR64X:$dst),
-                    (ins FR32X:$src1, FR32X:$src2),
-                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSS2SDZrm : AVX512XSI<0x5A, MRMSrcMem, (outs FR64X:$dst),
-                    (ins FR32X:$src1, f32mem:$src2),
-                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    []>, EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
-                    EVEX_CD8<32, CD8VT1>;
-
-// Convert scalar double to scalar single
-def VCVTSD2SSZrr  : AVX512XDI<0x5A, MRMSrcReg, (outs FR32X:$dst),
-                      (ins FR64X:$src1, FR64X:$src2),
-                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, EVEX_4V, VEX_LIG, VEX_W, Sched<[WriteCvtF2F]>;
-let mayLoad = 1 in
-def VCVTSD2SSZrm  : AVX512XDI<0x5A, MRMSrcMem, (outs FR32X:$dst),
-                      (ins FR64X:$src1, f64mem:$src2),
-                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                      []>, EVEX_4V, VEX_LIG, VEX_W,
-                      Sched<[WriteCvtF2FLd, ReadAfterLd]>, EVEX_CD8<64, CD8VT1>;
+multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNode> {
+  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, 
+                         "$src2, $src1", "$src1, $src2",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src1),
+                                       (_Src.VT _Src.RC:$src2)))>, 
+                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
+  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, 
+                         "$src2, $src1", "$src1, $src2",
+                         (_.VT (OpNode (_Src.VT _Src.RC:$src1), 
+                                  (_Src.VT (scalar_to_vector 
+                                            (_Src.ScalarLdFrag addr:$src2)))))>, 
+                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
-def : Pat<(f64 (fextend FR32X:$src)), (VCVTSS2SDZrr FR32X:$src, FR32X:$src)>,
-      Requires<[HasAVX512]>;
-def : Pat<(fextend (loadf32 addr:$src)),
-    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[HasAVX512]>;
+// Scalar Coversion with SAE - suppress all exceptions
+multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
+                        "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2),
+                                         (i32 FROUND_NO_EXC)))>,
+                        EVEX_4V, VEX_LIG, EVEX_B;
+}
 
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDZrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+// Scalar Conversion with rounding control (RC)
+multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+                         X86VectorVTInfo _Src, SDNode OpNodeRnd> {
+  defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _Src.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr,
+                        "$rc, $src2, $src1", "$src1, $src2, $rc",
+                        (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src1), 
+                                         (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>,
+                        EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
+                        EVEX_B, EVEX_RC;
+}
+multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode, 
+                                  SDNode OpNodeRnd, X86VectorVTInfo _src, 
+                                                        X86VectorVTInfo _dst> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+             avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
+                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
+                               EVEX_V512, XD;
+  }
+}
+
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode, 
+                                    SDNode OpNodeRnd, X86VectorVTInfo _src, 
+                                                          X86VectorVTInfo _dst> {
+  let Predicates = [HasAVX512] in {
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+             avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>, 
+             EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
+  }
+}
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
+                                         X86froundRnd, f64x_info, f32x_info>;
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext, 
+                                          X86fpextRnd,f32x_info, f64x_info >;
+
+def : Pat<(f64 (fextend FR32X:$src)), 
+          (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X), 
+                               (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
+          Requires<[HasAVX512]>;
+def : Pat<(f64 (fextend (loadf32 addr:$src))),
+          (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
+          Requires<[HasAVX512]>;
+
+def : Pat<(f64 (extloadf32 addr:$src)),
+      (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
       Requires<[HasAVX512, OptForSize]>;
 
-def : Pat<(extloadf32 addr:$src),
-    (VCVTSS2SDZrr (f32 (IMPLICIT_DEF)), (VMOVSSZrm addr:$src))>,
-    Requires<[HasAVX512, OptForSpeed]>;
+def : Pat<(f64 (extloadf32 addr:$src)),
+          (COPY_TO_REGCLASS (VCVTSS2SDZrr (v4f32 (IMPLICIT_DEF)), 
+                    (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
+          Requires<[HasAVX512, OptForSpeed]>;
 
-def : Pat<(f32 (fround FR64X:$src)), (VCVTSD2SSZrr FR64X:$src, FR64X:$src)>,
+def : Pat<(f32 (fround FR64X:$src)), 
+          (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X), 
+                    (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
            Requires<[HasAVX512]>;
-
 //===----------------------------------------------------------------------===//
 // AVX-512  Vector convert from signed/unsigned integer to float/double
 //          and from float/double to signed/unsigned integer
@@ -4992,7 +5490,7 @@ defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
 defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
                             X86VUlongToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
 
-let Predicates = [NoVLX] in {
+let Predicates = [HasAVX512, NoVLX] in {
 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
@@ -5024,40 +5522,102 @@ let Predicates = [HasAVX512] in {
 //===----------------------------------------------------------------------===//
 // Half precision conversion instructions
 //===----------------------------------------------------------------------===//
-multiclass avx512_cvtph2ps<RegisterClass destRC, RegisterClass srcRC,
-                             X86MemOperand x86memop> {
-  def rr : AVX5128I<0x13, MRMSrcReg, (outs destRC:$dst), (ins srcRC:$src),
-             "vcvtph2ps\t{$src, $dst|$dst, $src}",
-             []>, EVEX;
-  let hasSideEffects = 0, mayLoad = 1 in
-  def rm : AVX5128I<0x13, MRMSrcMem, (outs destRC:$dst), (ins x86memop:$src),
-             "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, EVEX;
+multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, 
+                           X86MemOperand x86memop, PatFrag ld_frag> {
+  defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+                    "vcvtph2ps", "$src", "$src",
+                   (X86cvtph2ps (_src.VT _src.RC:$src),
+                                                (i32 FROUND_CURRENT))>, T8PD;
+  let hasSideEffects = 0, mayLoad = 1 in {
+    defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src),
+                      "vcvtph2ps", "$src", "$src", 
+                      (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))),
+                                       (i32 FROUND_CURRENT))>, T8PD;
+  }
 }
 
-multiclass avx512_cvtps2ph<RegisterClass destRC, RegisterClass srcRC,
-                             X86MemOperand x86memop> {
-  def rr : AVX512AIi8<0x1D, MRMDestReg, (outs destRC:$dst),
-               (ins srcRC:$src1, i32u8imm:$src2),
-               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-               []>, EVEX;
-  let hasSideEffects = 0, mayStore = 1 in
-  def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
-               (ins x86memop:$dst, srcRC:$src1, i32u8imm:$src2),
-               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, EVEX;
+multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+  defm rb : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src),
+                    "vcvtph2ps", "{sae}, $src", "$src, {sae}",
+                   (X86cvtph2ps (_src.VT _src.RC:$src),
+                                                (i32 FROUND_NO_EXC))>, T8PD, EVEX_B;
+
 }
 
-defm VCVTPH2PSZ : avx512_cvtph2ps<VR512, VR256X, f256mem>, EVEX_V512,
-                                    EVEX_CD8<32, CD8VH>;
-defm VCVTPS2PHZ : avx512_cvtps2ph<VR256X, VR512, f256mem>, EVEX_V512,
-                                    EVEX_CD8<32, CD8VH>;
+let Predicates = [HasAVX512] in {
+  defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64>,
+                    avx512_cvtph2ps_sae<v16f32_info, v16i16x_info>, 
+                    EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+  let Predicates = [HasVLX] in {
+    defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, 
+                         loadv2i64>,EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+    defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem,
+                         loadv2i64>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+  }
+}
 
-def : Pat<(v16i16 (int_x86_avx512_mask_vcvtps2ph_512 (v16f32 VR512:$src),
-           imm:$rc, (bc_v16i16(v8i32 immAllZerosV)), (i16 -1))),
-           (VCVTPS2PHZrr VR512:$src, imm:$rc)>;
+multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, 
+                           X86MemOperand x86memop> {
+  defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+               (ins _src.RC:$src1, i32u8imm:$src2),
+                    "vcvtps2ph", "$src2, $src1", "$src1, $src2", 
+                   (X86cvtps2ph (_src.VT _src.RC:$src1),
+                                (i32 imm:$src2), 
+                                (i32 FROUND_CURRENT))>, AVX512AIi8Base;
+  let hasSideEffects = 0, mayStore = 1 in {
+    def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
+               (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 
+               [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
+                                       (i32 imm:$src2), (i32 FROUND_CURRENT) )),
+                                       addr:$dst)]>;
+    def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
+               (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
+               "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", 
+                []>, EVEX_K;
+  }
+}
+multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
+  defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+               (ins _src.RC:$src1, i32u8imm:$src2),
+                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, $src2, {sae}", 
+                   (X86cvtps2ph (_src.VT _src.RC:$src1),
+                                (i32 imm:$src2), 
+                                (i32 FROUND_NO_EXC))>, EVEX_B, AVX512AIi8Base;
+}
+let Predicates = [HasAVX512] in {
+  defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
+                    avx512_cvtps2ph_sae<v16i16x_info, v16f32_info>,
+                      EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>;
+  let Predicates = [HasVLX] in {
+    defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem>,
+                        EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>;
+    defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f128mem>,
+                        EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>;
+  }
+}
 
-def : Pat<(v16f32 (int_x86_avx512_mask_vcvtph2ps_512 (v16i16 VR256X:$src),
-           (bc_v16f32(v16i32 immAllZerosV)), (i16 -1), (i32 FROUND_CURRENT))),
-           (VCVTPH2PSZrr VR256X:$src)>;
+//  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
+multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
+                            string OpcodeStr> {
+  def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
+                 !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
+                 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2, 
+                                                        (i32 FROUND_NO_EXC)))],
+                 IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
+                 Sched<[WriteFAdd]>;
+}
+
+let Defs = [EFLAGS], Predicates = [HasAVX512] in {
+  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
+                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
+                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
+                                   AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
+  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
+                                   AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+}
 
 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
   defm VUCOMISSZ : sse12_ord_cmp<0x2E, FR32X, X86cmp, f32, f32mem, loadf32,
@@ -5067,10 +5627,10 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
                                   "ucomisd">, PD, EVEX,
                                   VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   let Pattern = []<dag> in {
-    defm VCOMISSZ  : sse12_ord_cmp<0x2F, VR128X, undef, v4f32, f128mem, load,
+    defm VCOMISSZ  : sse12_ord_cmp<0x2F, FR32X, undef, f32, f32mem, loadf32,
                                    "comiss">, PS, EVEX, VEX_LIG,
                                    EVEX_CD8<32, CD8VT1>;
-    defm VCOMISDZ  : sse12_ord_cmp<0x2F, VR128X, undef, v2f64, f128mem, load,
+    defm VCOMISDZ  : sse12_ord_cmp<0x2F, FR64X, undef, f64, f64mem, loadf64,
                                    "comisd">, PD, EVEX,
                                     VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
   }
@@ -5092,50 +5652,31 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
 }
 
 /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd
-multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                            X86MemOperand x86memop> {
-  let hasSideEffects = 0 in {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-               (ins RC:$src1, RC:$src2),
-               !strconcat(OpcodeStr,
-               "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _> {
+  let hasSideEffects = 0, AddedComplexity = 20 , Predicates = [HasAVX512] in {
+  defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                           "$src2, $src1", "$src1, $src2",
+                           (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, EVEX_4V;
   let mayLoad = 1 in {
-  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-               (ins RC:$src1, x86memop:$src2),
-               !strconcat(OpcodeStr,
-               "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, EVEX_4V;
+  defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+                         "$src2, $src1", "$src1, $src2",
+                         (OpNode (_.VT _.RC:$src1),
+                          (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))))>, EVEX_4V;
   }
 }
 }
 
-defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", FR32X, f32mem>,
-                  EVEX_CD8<32, CD8VT1>;
-defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", FR64X, f64mem>,
-                  VEX_W, EVEX_CD8<64, CD8VT1>;
-defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", FR32X, f32mem>,
-                  EVEX_CD8<32, CD8VT1>;
-defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", FR64X, f64mem>,
-                  VEX_W, EVEX_CD8<64, CD8VT1>;
-
-def : Pat <(v4f32 (int_x86_avx512_rcp14_ss (v4f32 VR128X:$src1),
-              (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
-           (COPY_TO_REGCLASS (VRCP14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
-                       (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rcp14_sd (v2f64 VR128X:$src1),
-              (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
-           (COPY_TO_REGCLASS (VRCP14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
-                       (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
-
-def : Pat <(v4f32 (int_x86_avx512_rsqrt14_ss (v4f32 VR128X:$src1),
-              (v4f32 VR128X:$src2), (bc_v4f32 (v4i32 immAllZerosV)), (i8 -1))),
-           (COPY_TO_REGCLASS (VRSQRT14SSrr (COPY_TO_REGCLASS VR128X:$src1, FR32X),
-                       (COPY_TO_REGCLASS VR128X:$src2, FR32X)), VR128X)>;
-
-def : Pat <(v2f64 (int_x86_avx512_rsqrt14_sd (v2f64 VR128X:$src1),
-              (v2f64 VR128X:$src2), (bc_v2f64 (v4i32 immAllZerosV)), (i8 -1))),
-           (COPY_TO_REGCLASS (VRSQRT14SDrr (COPY_TO_REGCLASS VR128X:$src1, FR64X),
-                       (COPY_TO_REGCLASS VR128X:$src2, FR64X)), VR128X)>;
+defm VRCP14SS   : avx512_fp14_s<0x4D, "vrcp14ss", X86frcp14s, f32x_info>,
+                  EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRCP14SD   : avx512_fp14_s<0x4D, "vrcp14sd", X86frcp14s, f64x_info>,
+                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
+defm VRSQRT14SS   : avx512_fp14_s<0x4F, "vrsqrt14ss", X86frsqrt14s, f32x_info>,
+                  EVEX_CD8<32, CD8VT1>, T8PD;
+defm VRSQRT14SD   : avx512_fp14_s<0x4F, "vrsqrt14sd", X86frsqrt14s, f64x_info>,
+                  VEX_W, EVEX_CD8<64, CD8VT1>, T8PD;
 
 /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd
 multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5183,20 +5724,6 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode> {
 defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86frsqrt>;
 defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86frcp>;
 
-def : Pat <(v16f32 (int_x86_avx512_rsqrt14_ps_512 (v16f32 VR512:$src),
-              (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
-           (VRSQRT14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rsqrt14_pd_512 (v8f64 VR512:$src),
-              (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VRSQRT14PDZr VR512:$src)>;
-
-def : Pat <(v16f32 (int_x86_avx512_rcp14_ps_512 (v16f32 VR512:$src),
-              (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
-           (VRCP14PSZr VR512:$src)>;
-def : Pat <(v8f64 (int_x86_avx512_rcp14_pd_512 (v8f64 VR512:$src),
-              (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
-           (VRCP14PDZr VR512:$src)>;
-
 /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd
 multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          SDNode OpNode> {
@@ -5232,6 +5759,8 @@ let hasSideEffects = 0, Predicates = [HasERI] in {
   defm VRCP28   : avx512_eri_s<0xCB, "vrcp28",   X86rcp28s>,   T8PD, EVEX_4V;
   defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s>, T8PD, EVEX_4V;
 }
+
+defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds>, T8PD, EVEX_4V;
 /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd
 
 multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
@@ -5322,67 +5851,6 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr,
   }
 }
 
-multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
-                          Intrinsic F32Int, Intrinsic F64Int,
-                          OpndItins itins_s, OpndItins itins_d> {
-  def SSZr : SI<opc, MRMSrcReg, (outs FR32X:$dst),
-               (ins FR32X:$src1, FR32X:$src2),
-               !strconcat(OpcodeStr,
-                          "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      [], itins_s.rr>, XS, EVEX_4V;
-  let isCodeGenOnly = 1 in
-  def SSZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
-               (ins VR128X:$src1, VR128X:$src2),
-               !strconcat(OpcodeStr,
-                "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               [(set VR128X:$dst,
-                 (F32Int VR128X:$src1, VR128X:$src2))],
-               itins_s.rr>, XS, EVEX_4V;
-  let mayLoad = 1 in {
-  def SSZm : SI<opc, MRMSrcMem, (outs FR32X:$dst),
-               (ins FR32X:$src1, f32mem:$src2),
-               !strconcat(OpcodeStr,
-                          "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                      [], itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-  let isCodeGenOnly = 1 in
-  def SSZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
-                   (ins VR128X:$src1, ssmem:$src2),
-                   !strconcat(OpcodeStr,
-                 "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                   [(set VR128X:$dst,
-                     (F32Int VR128X:$src1, sse_load_f32:$src2))],
-                   itins_s.rm>, XS, EVEX_4V, EVEX_CD8<32, CD8VT1>;
-  }
-  def SDZr : SI<opc, MRMSrcReg, (outs FR64X:$dst),
-               (ins FR64X:$src1, FR64X:$src2),
-               !strconcat(OpcodeStr,
-                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-                      XD, EVEX_4V, VEX_W;
-  let isCodeGenOnly = 1 in
-  def SDZr_Int : SIi8<opc, MRMSrcReg, (outs VR128X:$dst),
-               (ins VR128X:$src1, VR128X:$src2),
-               !strconcat(OpcodeStr,
-                "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               [(set VR128X:$dst,
-                 (F64Int VR128X:$src1, VR128X:$src2))],
-               itins_s.rr>, XD, EVEX_4V, VEX_W;
-  let mayLoad = 1 in {
-  def SDZm : SI<opc, MRMSrcMem, (outs FR64X:$dst),
-               (ins FR64X:$src1, f64mem:$src2),
-               !strconcat(OpcodeStr,
-                  "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-               XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
-  let isCodeGenOnly = 1 in
-  def SDZm_Int : SIi8<opc, MRMSrcMem, (outs VR128X:$dst),
-                  (ins VR128X:$src1, sdmem:$src2),
-                   !strconcat(OpcodeStr,
-                  "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                  [(set VR128X:$dst,
-                    (F64Int VR128X:$src1, sse_load_f64:$src2))]>,
-                  XD, EVEX_4V, VEX_W, EVEX_CD8<64, CD8VT1>;
-  }
-}
-
 multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
                                   SDNode OpNode> {
   defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
@@ -5416,93 +5884,77 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
                                 v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
 }
 
+multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
+                              string SUFF, SDNode OpNode, SDNode OpNodeRnd> {
+
+  defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                         "$src2, $src1", "$src1, $src2",
+                         (OpNodeRnd (_.VT _.RC:$src1),
+                                    (_.VT _.RC:$src2),
+                                    (i32 FROUND_CURRENT))>;
+  let mayLoad = 1 in
+    defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+                         "$src2, $src1", "$src1, $src2",
+                         (OpNodeRnd (_.VT _.RC:$src1),
+                                    (_.VT (scalar_to_vector
+                                              (_.ScalarLdFrag addr:$src2))),
+                                    (i32 FROUND_CURRENT))>;
+
+  defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
+                         "$rc, $src2, $src1", "$src1, $src2, $rc",
+                         (OpNodeRnd (_.VT _.RC:$src1),
+                                     (_.VT _.RC:$src2),
+                                     (i32 imm:$rc))>,
+                         EVEX_B, EVEX_RC;
+
+  let isCodeGenOnly = 1 in {
+    def r : I<opc, MRMSrcReg, (outs _.FRC:$dst),
+               (ins _.FRC:$src1, _.FRC:$src2),
+               OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+
+    let mayLoad = 1 in
+      def m : I<opc, MRMSrcMem, (outs _.FRC:$dst),
+                 (ins _.FRC:$src1, _.ScalarMemOp:$src2),
+                 OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>;
+  }
+
+  def : Pat<(_.EltVT (OpNode _.FRC:$src)),
+            (!cast<Instruction>(NAME#SUFF#Zr)
+                (_.EltVT (IMPLICIT_DEF)), _.FRC:$src)>;
+
+  def : Pat<(_.EltVT (OpNode (load addr:$src))),
+            (!cast<Instruction>(NAME#SUFF#Zm)
+                (_.EltVT (IMPLICIT_DEF)), addr:$src)>, Requires<[OptForSize]>;
+}
+
+multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> {
+  defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", f32x_info, "SS", fsqrt,
+                        X86fsqrtRnds>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
+  defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", f64x_info, "SD", fsqrt,
+                        X86fsqrtRnds>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
+}
+
 defm VSQRT   : avx512_sqrt_packed_all<0x51, "vsqrt", fsqrt>,
                avx512_sqrt_packed_all_round<0x51, "vsqrt", X86fsqrtRnd>;
 
-defm VSQRT  : avx512_sqrt_scalar<0x51, "sqrt",
-                int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
-                SSE_SQRTSS, SSE_SQRTSD>;
+defm VSQRT   : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG;
 
 let Predicates = [HasAVX512] in {
-  def : Pat<(f32 (fsqrt FR32X:$src)),
-            (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
-  def : Pat<(f32 (fsqrt (load addr:$src))),
-            (VSQRTSSZm (f32 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[OptForSize]>;
-  def : Pat<(f64 (fsqrt FR64X:$src)),
-            (VSQRTSDZr (f64 (IMPLICIT_DEF)), FR64X:$src)>;
-  def : Pat<(f64 (fsqrt (load addr:$src))),
-            (VSQRTSDZm (f64 (IMPLICIT_DEF)), addr:$src)>,
-            Requires<[OptForSize]>;
-
   def : Pat<(f32 (X86frsqrt FR32X:$src)),
-            (VRSQRT14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+            (COPY_TO_REGCLASS (VRSQRT14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>;
   def : Pat<(f32 (X86frsqrt (load addr:$src))),
-            (VRSQRT14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+            (COPY_TO_REGCLASS (VRSQRT14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
             Requires<[OptForSize]>;
-
   def : Pat<(f32 (X86frcp FR32X:$src)),
-            (VRCP14SSrr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
+            (COPY_TO_REGCLASS (VRCP14SSrr (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X )>;
   def : Pat<(f32 (X86frcp (load addr:$src))),
-            (VRCP14SSrm (f32 (IMPLICIT_DEF)), addr:$src)>,
+            (COPY_TO_REGCLASS (VRCP14SSrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
             Requires<[OptForSize]>;
-
-  def : Pat<(int_x86_sse_sqrt_ss VR128X:$src),
-            (COPY_TO_REGCLASS (VSQRTSSZr (f32 (IMPLICIT_DEF)),
-                                        (COPY_TO_REGCLASS VR128X:$src, FR32)),
-                              VR128X)>;
-  def : Pat<(int_x86_sse_sqrt_ss sse_load_f32:$src),
-            (VSQRTSSZm_Int (v4f32 (IMPLICIT_DEF)), sse_load_f32:$src)>;
-
-  def : Pat<(int_x86_sse2_sqrt_sd VR128X:$src),
-            (COPY_TO_REGCLASS (VSQRTSDZr (f64 (IMPLICIT_DEF)),
-                                        (COPY_TO_REGCLASS VR128X:$src, FR64)),
-                              VR128X)>;
-  def : Pat<(int_x86_sse2_sqrt_sd sse_load_f64:$src),
-            (VSQRTSDZm_Int (v2f64 (IMPLICIT_DEF)), sse_load_f64:$src)>;
 }
 
-
-multiclass avx512_rndscale<bits<8> opc, string OpcodeStr,
-                            X86MemOperand x86memop, RegisterClass RC,
-                            PatFrag mem_frag, Domain d> {
-let ExeDomain = d in {
-  // Intrinsic operation, reg.
-  // Vector intrinsic operation, reg
-  def r : AVX512AIi8<opc, MRMSrcReg,
-                    (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX;
-
-  // Vector intrinsic operation, mem
-  def m : AVX512AIi8<opc, MRMSrcMem,
-                    (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2),
-                    !strconcat(OpcodeStr,
-                    "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-                    []>, EVEX;
-} // ExeDomain
-}
-
-defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
-                                loadv16f32, SSEPackedSingle>, EVEX_V512,
-                                EVEX_CD8<32, CD8VF>;
-
-def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
-                   imm:$src2, (v16f32 VR512:$src1), (i16 -1),
-                   FROUND_CURRENT)),
-                   (VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
-
-
-defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
-                                loadv8f64, SSEPackedDouble>, EVEX_V512,
-                                VEX_W, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
-                  imm:$src2, (v8f64 VR512:$src1), (i8 -1),
-                  FROUND_CURRENT)),
-                   (VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
-
 multiclass
 avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
 
@@ -5510,20 +5962,20 @@ avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
   defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
                            "$src3, $src2, $src1", "$src1, $src2, $src3",
-                           (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                           (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                             (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
 
   defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr,
-                         "{sae}, $src3, $src2, $src1", "$src1, $src2, $src3, {sae}",
-                         (_.VT (X86RndScale (_.VT _.RC:$src1), (_.VT _.RC:$src2),
+                         "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3",
+                         (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                          (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B;
 
   let mayLoad = 1 in
   defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr,
                          "$src3, $src2, $src1", "$src1, $src2, $src3",
-                         (_.VT (X86RndScale (_.VT _.RC:$src1),
+                         (_.VT (X86RndScales (_.VT _.RC:$src1),
                           (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
                           (i32 imm:$src3), (i32 FROUND_CURRENT)))>;
   }
@@ -5568,109 +6020,238 @@ defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", f32x_info>,
 defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W,
                                 AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>;
 
-let Predicates = [HasAVX512] in {
-def : Pat<(v16f32 (ffloor VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x1))>;
-def : Pat<(v16f32 (fnearbyint VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0xC))>;
-def : Pat<(v16f32 (fceil VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x2))>;
-def : Pat<(v16f32 (frint VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x4))>;
-def : Pat<(v16f32 (ftrunc VR512:$src)),
-          (VRNDSCALEPSZr VR512:$src, (i32 0x3))>;
-
-def : Pat<(v8f64 (ffloor VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x1))>;
-def : Pat<(v8f64 (fnearbyint VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0xC))>;
-def : Pat<(v8f64 (fceil VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x2))>;
-def : Pat<(v8f64 (frint VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x4))>;
-def : Pat<(v8f64 (ftrunc VR512:$src)),
-          (VRNDSCALEPDZr VR512:$src, (i32 0x3))>;
-}
 //-------------------------------------------------
 // Integer truncate and extend operations
 //-------------------------------------------------
 
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
-                          RegisterClass dstRC, RegisterClass srcRC,
-                          RegisterClass KRC, X86MemOperand x86memop> {
-  def rr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
-               (ins srcRC:$src),
-               !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
+multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
+                              X86MemOperand x86memop> {
+
+  defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
+                      (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
+                      (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
+                       EVEX, T8XS;
+
+  // for intrinsic patter match
+  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+                           undef)),
+            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+                                      SrcInfo.RC:$src1)>;
+
+  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+                           DestInfo.ImmAllZerosV)),
+            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrkz) DestInfo.KRCWM:$mask ,
+                                      SrcInfo.RC:$src1)>;
+
+  def : Pat<(DestInfo.VT (X86select DestInfo.KRCWM:$mask,
+                           (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))),
+                           DestInfo.RC:$src0)),
+            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##rrk) DestInfo.RC:$src0,
+                                      DestInfo.KRCWM:$mask ,
+                                      SrcInfo.RC:$src1)>;
+
+  let mayStore = 1 in {
+    def mr : AVX512XS8I<opc, MRMDestMem, (outs),
+               (ins x86memop:$dst, SrcInfo.RC:$src),
+               OpcodeStr # "\t{$src, $dst |$dst, $src}",
                []>, EVEX;
 
-  def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
-               (ins KRC:$mask, srcRC:$src),
-               !strconcat(OpcodeStr,
-                 "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+    def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
+               (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
+               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
                []>, EVEX, EVEX_K;
-
-  def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
-               (ins KRC:$mask, srcRC:$src),
-               !strconcat(OpcodeStr,
-                 "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-               []>, EVEX, EVEX_KZ;
-
-  def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
-               !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-               []>, EVEX;
-
-  def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
-               (ins x86memop:$dst, KRC:$mask, srcRC:$src),
-               !strconcat(OpcodeStr, "\t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
-               []>, EVEX, EVEX_K;
-
+  }//mayStore = 1
 }
-defm VPMOVQB    : avx512_trunc_sat<0x32, "vpmovqb",   VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVSQB   : avx512_trunc_sat<0x22, "vpmovsqb",  VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVUSQB  : avx512_trunc_sat<0x12, "vpmovusqb", VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
-defm VPMOVQW    : avx512_trunc_sat<0x34, "vpmovqw",   VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVSQW   : avx512_trunc_sat<0x24, "vpmovsqw",  VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVUSQW  : avx512_trunc_sat<0x14, "vpmovusqw", VR128X, VR512, VK8WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<16, CD8VQ>;
-defm VPMOVQD    : avx512_trunc_sat<0x35, "vpmovqd",   VR256X, VR512, VK8WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVSQD   : avx512_trunc_sat<0x25, "vpmovsqd",  VR256X, VR512, VK8WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVUSQD  : avx512_trunc_sat<0x15, "vpmovusqd", VR256X, VR512, VK8WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<32, CD8VH>;
-defm VPMOVDW    : avx512_trunc_sat<0x33, "vpmovdw",   VR256X, VR512, VK16WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVSDW   : avx512_trunc_sat<0x23, "vpmovsdw",  VR256X, VR512, VK16WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVUSDW  : avx512_trunc_sat<0x13, "vpmovusdw", VR256X, VR512, VK16WM,
-                                 i256mem>, EVEX_V512, EVEX_CD8<16, CD8VH>;
-defm VPMOVDB    : avx512_trunc_sat<0x31, "vpmovdb",   VR128X, VR512, VK16WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVSDB   : avx512_trunc_sat<0x21, "vpmovsdb",  VR128X, VR512, VK16WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
-defm VPMOVUSDB  : avx512_trunc_sat<0x11, "vpmovusdb", VR128X, VR512, VK16WM,
-                                 i128mem>, EVEX_V512, EVEX_CD8<8, CD8VQ>;
 
-def : Pat<(v16i8  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQBrr  VR512:$src)>;
-def : Pat<(v8i16  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQWrr  VR512:$src)>;
-def : Pat<(v16i16 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDWrr  VR512:$src)>;
-def : Pat<(v16i8  (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr  VR512:$src)>;
-def : Pat<(v8i32  (X86vtrunc (v8i64  VR512:$src))), (VPMOVQDrr  VR512:$src)>;
+multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
+                                    X86VectorVTInfo DestInfo,
+                                    PatFrag truncFrag, PatFrag mtruncFrag > {
 
-def : Pat<(v16i8  (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
-                  (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
-                  (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
-def : Pat<(v8i16  (X86vtruncm VK8WM:$mask,  (v8i64 VR512:$src))),
-                  (VPMOVQWrrkz  VK8WM:$mask, VR512:$src)>;
-def : Pat<(v8i32  (X86vtruncm VK8WM:$mask,  (v8i64 VR512:$src))),
-                  (VPMOVQDrrkz  VK8WM:$mask, VR512:$src)>;
+  def : Pat<(truncFrag (SrcInfo.VT SrcInfo.RC:$src), addr:$dst),
+            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr)
+                                    addr:$dst, SrcInfo.RC:$src)>;
 
+  def : Pat<(mtruncFrag addr:$dst, SrcInfo.KRCWM:$mask,
+                                               (SrcInfo.VT SrcInfo.RC:$src)),
+            (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk)
+                            addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
+}
+
+multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
+                                        X86VectorVTInfo DestInfo, string sat > {
+
+  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
+           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
+                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
+                    (SrcInfo.VT SrcInfo.RC:$src))>;
+
+  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
+                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
+                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
+           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
+                    (SrcInfo.VT SrcInfo.RC:$src))>;
+}
+
+multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+         X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag,
+                                                     Predicate prd = HasAVX512>{
+
+  let Predicates = [HasVLX, prd] in {
+    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+                             DestInfoZ128, x86memopZ128>,
+                avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+                             truncFrag, mtruncFrag>, EVEX_V128;
+
+    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+                             DestInfoZ256, x86memopZ256>,
+                avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+                             truncFrag, mtruncFrag>, EVEX_V256;
+  }
+  let Predicates = [prd] in
+    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+                             DestInfoZ, x86memopZ>,
+                avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+                             truncFrag, mtruncFrag>, EVEX_V512;
+}
+
+multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
+         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
+         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
+         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
+
+  let Predicates = [HasVLX, prd] in {
+    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
+                             DestInfoZ128, x86memopZ128>,
+                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
+                             sat>, EVEX_V128;
+
+    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
+                             DestInfoZ256, x86memopZ256>,
+                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
+                             sat>, EVEX_V256;
+  }
+  let Predicates = [prd] in
+    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
+                             DestInfoZ, x86memopZ>,
+                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
+                             sat>, EVEX_V512;
+}
+
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
+}
+multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
+               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
+               sat>, EVEX_CD8<8, CD8VO>;
+}
+
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
+}
+multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
+               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
+               sat>, EVEX_CD8<16, CD8VQ>;
+}
+
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
+}
+multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
+               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
+               sat>, EVEX_CD8<32, CD8VH>;
+}
+
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
+}
+multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
+               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
+               sat>, EVEX_CD8<8, CD8VQ>;
+}
+
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
+              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
+              sat>, EVEX_CD8<16, CD8VH>;
+}
+
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
+              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
+  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
+              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
+              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+
+defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
+defm VPMOVSQB   : avx512_trunc_sat_qb<0x22, "s",   X86vtruncs>;
+defm VPMOVUSQB  : avx512_trunc_sat_qb<0x12, "us",  X86vtruncus>;
+
+defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
+defm VPMOVSQW   : avx512_trunc_sat_qw<0x24, "s",   X86vtruncs>;
+defm VPMOVUSQW  : avx512_trunc_sat_qw<0x14, "us",  X86vtruncus>;
+
+defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
+defm VPMOVSQD   : avx512_trunc_sat_qd<0x25, "s",   X86vtruncs>;
+defm VPMOVUSQD  : avx512_trunc_sat_qd<0x15, "us",  X86vtruncus>;
+
+defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
+defm VPMOVSDB   : avx512_trunc_sat_db<0x21, "s",   X86vtruncs>;
+defm VPMOVUSDB  : avx512_trunc_sat_db<0x11, "us",  X86vtruncus>;
+
+defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
+defm VPMOVSDW   : avx512_trunc_sat_dw<0x23, "s",   X86vtruncs>;
+defm VPMOVUSDW  : avx512_trunc_sat_dw<0x13, "us",  X86vtruncus>;
+
+defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
+defm VPMOVSWB   : avx512_trunc_sat_wb<0x20, "s",   X86vtruncs>;
+defm VPMOVUSWB  : avx512_trunc_sat_wb<0x10, "us",  X86vtruncus>;
+
+let Predicates = [HasAVX512, NoVLX] in {
+def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
+         (v8i16 (EXTRACT_SUBREG
+                 (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
+                                          VR256X:$src, sub_ymm)))), sub_xmm))>;
+def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
+         (v4i32 (EXTRACT_SUBREG
+                 (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
+                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
+}
+
+let Predicates = [HasBWI, NoVLX] in {
+def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
+         (v16i8 (EXTRACT_SUBREG  (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
+                                            VR256X:$src, sub_ymm))), sub_xmm))>;
+}
 
 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
                   X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
@@ -5985,163 +6566,11 @@ defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd
 
 defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
                      VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
-//===----------------------------------------------------------------------===//
-// VSHUFPS - VSHUFPD Operations
-
-multiclass avx512_shufp<RegisterClass RC, X86MemOperand x86memop,
-                      ValueType vt, string OpcodeStr, PatFrag mem_frag,
-                      Domain d> {
-  def rmi : AVX512PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
-                   (ins RC:$src1, x86memop:$src2, u8imm:$src3),
-                   !strconcat(OpcodeStr,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
-                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
-                   EVEX_4V, Sched<[WriteShuffleLd, ReadAfterLd]>;
-  def rri : AVX512PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
-                   (ins RC:$src1, RC:$src2, u8imm:$src3),
-                   !strconcat(OpcodeStr,
-                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                   [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
-                                       (i8 imm:$src3))))], d, IIC_SSE_SHUFP>,
-                   EVEX_4V, Sched<[WriteShuffle]>;
-}
-
-defm VSHUFPSZ  : avx512_shufp<VR512, f512mem, v16f32, "vshufps", loadv16f32,
-                  SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>;
-defm VSHUFPDZ  : avx512_shufp<VR512, f512mem, v8f64, "vshufpd", loadv8f64,
-                  SSEPackedDouble>, PD, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
-
-def : Pat<(v16i32 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
-          (VSHUFPSZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v16i32 (X86Shufp VR512:$src1,
-                    (loadv16i32 addr:$src2), (i8 imm:$imm))),
-          (VSHUFPSZrmi VR512:$src1, addr:$src2, imm:$imm)>;
-
-def : Pat<(v8i64 (X86Shufp VR512:$src1, VR512:$src2, (i8 imm:$imm))),
-          (VSHUFPDZrri VR512:$src1, VR512:$src2, imm:$imm)>;
-def : Pat<(v8i64 (X86Shufp VR512:$src1,
-                            (loadv8i64 addr:$src2), (i8 imm:$imm))),
-          (VSHUFPDZrmi VR512:$src1, addr:$src2, imm:$imm)>;
 
 // Helper fragments to match sext vXi1 to vXiY.
 def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
 def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
 
-multiclass avx512_conflict<bits<8> opc, string OpcodeStr,
-                        RegisterClass RC, RegisterClass KRC,
-                        X86MemOperand x86memop,
-                        X86MemOperand x86scalar_mop, string BrdcstStr> {
-  let hasSideEffects = 0 in {
-  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src),
-       !strconcat(OpcodeStr, "\t{$src, ${dst} |${dst}, $src}"),
-       []>, EVEX;
-  let mayLoad = 1 in
-  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins x86memop:$src),
-       !strconcat(OpcodeStr, "\t{$src, ${dst}|${dst}, $src}"),
-       []>, EVEX;
-  let mayLoad = 1 in
-  def rmb : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins x86scalar_mop:$src),
-       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                  ", ${dst}|${dst}, ${src}", BrdcstStr, "}"),
-       []>, EVEX, EVEX_B;
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins KRC:$mask, RC:$src),
-       !strconcat(OpcodeStr,
-                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-       []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in
-  def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins KRC:$mask, x86memop:$src),
-       !strconcat(OpcodeStr,
-                  "\t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
-       []>, EVEX, EVEX_KZ;
-  let mayLoad = 1 in
-  def rmbkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins KRC:$mask, x86scalar_mop:$src),
-       !strconcat(OpcodeStr, "\t{${src}", BrdcstStr,
-                  ", ${dst} {${mask}} {z}|${dst} {${mask}} {z}, ${src}",
-                  BrdcstStr, "}"),
-       []>, EVEX, EVEX_KZ, EVEX_B;
-
-  let Constraints = "$src1 = $dst" in {
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, RC:$src2),
-       !strconcat(OpcodeStr,
-                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-       []>, EVEX, EVEX_K;
-  let mayLoad = 1 in
-  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, x86memop:$src2),
-       !strconcat(OpcodeStr,
-                  "\t{$src2, ${dst} {${mask}}|${dst} {${mask}}, $src2}"),
-       []>, EVEX, EVEX_K;
-  let mayLoad = 1 in
-  def rmbk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, KRC:$mask, x86scalar_mop:$src2),
-       !strconcat(OpcodeStr, "\t{${src2}", BrdcstStr,
-                  ", ${dst} {${mask}}|${dst} {${mask}}, ${src2}", BrdcstStr, "}"),
-       []>, EVEX, EVEX_K, EVEX_B;
-  }
-  }
-}
-
-let Predicates = [HasCDI] in {
-defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM,
-                    i512mem, i32mem, "{1to16}">,
-                    EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM,
-                    i512mem, i64mem, "{1to8}">,
-                    EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1,
-                                              GR16:$mask),
-          (VPCONFLICTDrrk VR512:$src1,
-           (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
-                                              GR8:$mask),
-          (VPCONFLICTQrrk VR512:$src1,
-           (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-let Predicates = [HasCDI] in {
-defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM,
-                    i512mem, i32mem, "{1to16}">,
-                    EVEX_V512, EVEX_CD8<32, CD8VF>;
-
-
-defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM,
-                    i512mem, i64mem, "{1to8}">,
-                    EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-
-}
-
-def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1,
-                                              GR16:$mask),
-          (VPLZCNTDrrk VR512:$src1,
-           (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>;
-
-def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1,
-                                              GR8:$mask),
-          (VPLZCNTQrrk VR512:$src1,
-           (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
-
-def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))),
-          (VPLZCNTDrm addr:$src)>;
-def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))),
-          (VPLZCNTDrr VR512:$src)>;
-def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))),
-          (VPLZCNTQrm addr:$src)>;
-def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))),
-          (VPLZCNTQrr VR512:$src)>;
-
 def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
@@ -6197,7 +6626,7 @@ defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">;
 multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
 def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set _.KRC:$dst, (trunc (_.VT _.RC:$src)))]>, EVEX;
+                  [(set _.KRC:$dst, (X86cvt2mask (_.VT _.RC:$src)))]>, EVEX;
 }
 
 multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
@@ -6230,7 +6659,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
 multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
-              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
               (_.VT (X86compress _.RC:$src1))>, AVX5128IBase;
 
   let mayStore = 1 in {
@@ -6242,7 +6671,7 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
   def mrk : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
               OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
-              [(store (_.VT (vselect _.KRCWM:$mask, 
+              [(store (_.VT (vselect _.KRCWM:$mask,
                              (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
                 addr:$dst)]>,
               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
@@ -6272,7 +6701,7 @@ defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", avx512vl_f64_info
 multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", 
+              (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
               (_.VT (X86expand _.RC:$src1))>, AVX5128IBase;
 
   let mayLoad = 1 in
@@ -6302,6 +6731,62 @@ defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", avx512vl_f32_info>,
 defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
                                          EVEX, VEX_W;
 
+//handle instruction  reg_vec1 = op(reg_vec,imm)
+//                               op(mem_vec,imm)
+//                               op(broadcast(eltVt),imm)
+//all instruction created with FROUND_CURRENT
+multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _>{
+  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, $src1", "$src2, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>;
+  let mayLoad = 1 in {
+    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.MemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
+                      (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>;
+    defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.ScalarMemOp:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr,
+                      "${src1}"##_.BroadcastStr##", $src2",
+                      (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
+                              (i32 imm:$src2),
+                              (i32 FROUND_CURRENT))>, EVEX_B;
+  }
+}
+
+//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
+multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
+                                             SDNode OpNode, X86VectorVTInfo _>{
+  defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, i32u8imm:$src2),
+                      OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+                      "$src1, {sae}, $src2",
+                      (OpNode (_.VT _.RC:$src1),
+                              (i32 imm:$src2),
+                              (i32 FROUND_NO_EXC))>, EVEX_B;
+}
+
+multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
+            AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, Predicate prd>{
+  let Predicates = [prd] in {
+    defm Z    : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+                avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _.info512>,
+                                  EVEX_V512;
+  }
+  let Predicates = [prd, HasVLX] in {
+    defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info128>,
+                                  EVEX_V128;
+    defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, _.info256>,
+                                  EVEX_V256;
+  }
+}
+
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
 //                               op(reg_vec2,mem_vec,imm)
 //                               op(reg_vec2,broadcast(eltVt),imm)
@@ -6309,49 +6794,60 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                                             X86VectorVTInfo _>{
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
   let mayLoad = 1 in {
     defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (bitconvert (_.LdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
     defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
                       "$src1, ${src2}"##_.BroadcastStr##", $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>, EVEX_B;
   }
 }
 
+//handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
+//                               op(reg_vec2,mem_vec,imm)
+multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                             X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
+
+  defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+                  (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
+                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+                               (SrcInfo.VT SrcInfo.RC:$src2),
+                               (i8 imm:$src3)))>;
+  let mayLoad = 1 in
+    defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+                  (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
+                  OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
+                  (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
+                               (SrcInfo.VT (bitconvert
+                                                  (SrcInfo.LdFrag addr:$src2))),
+                               (i8 imm:$src3)))>;
+}
+
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
 //                               op(reg_vec2,mem_vec,imm)
 //                               op(reg_vec2,broadcast(eltVt),imm)
 multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _>{
-  defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
-                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT _.RC:$src2),
-                              (i8 imm:$src3))>;
-  let mayLoad = 1 in {
-    defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
-                      OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
-                      (OpNode (_.VT _.RC:$src1),
-                              (_.VT (bitconvert (_.LdFrag addr:$src2))),
-                              (i8 imm:$src3))>;
+                           X86VectorVTInfo _>:
+  avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+
+  let mayLoad = 1 in
     defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
                       OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -6359,7 +6855,6 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                               (i8 imm:$src3))>, EVEX_B;
-  }
 }
 
 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -6369,20 +6864,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                                            X86VectorVTInfo _> {
 
   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
   let mayLoad = 1 in {
     defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT (scalar_to_vector
                                         (_.ScalarLdFrag addr:$src2))),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
 
     let isAsmParserOnly = 1 in {
@@ -6398,18 +6893,25 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                      (ins _.RC:$src1, _.RC:$src2, u8imm:$src3),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3,{sae}, $src2, $src1",
                       "$src1, $src2,{sae}, $src3",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
-                              (i8 imm:$src3),
+                              (i32 imm:$src3),
                               (i32 FROUND_NO_EXC))>, EVEX_B;
 }
 //handle scalar instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
 multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _> {
-  defm NAME: avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNode, _>;
+  defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
+                      OpcodeStr, "$src3,{sae}, $src2, $src1",
+                      "$src1, $src2,{sae}, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (i32 imm:$src3),
+                              (i32 FROUND_NO_EXC))>, EVEX_B;
 }
 
 multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
@@ -6428,6 +6930,20 @@ multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr,
   }
 }
 
+multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
+                   AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
+  let Predicates = [HasBWI] in {
+    defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
+                           SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
+  }
+  let Predicates = [HasBWI, HasVLX] in {
+    defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
+                           SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
+    defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode,  DestInfo.info256,
+                           SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
+  }
+}
+
 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
                                 bits<8> opc, SDNode OpNode>{
   let Predicates = [HasAVX512] in {
@@ -6447,6 +6963,14 @@ multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr,
   }
 }
 
+multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
+                    bits<8> opcPs, bits<8> opcPd, SDNode OpNode, Predicate prd>{
+  defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info,
+                            opcPs, OpNode, prd>, EVEX_CD8<32, CD8VF>;
+  defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
+                            opcPd, OpNode, prd>, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
 defm VFIXUPIMMPD : avx512_common_fp_sae_packed_imm<"vfixupimmpd",
                               avx512vl_f64_info, 0x54, X86VFixupimm, HasAVX512>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6461,6 +6985,14 @@ defm VFIXUPIMMSS: avx512_common_fp_sae_scalar_imm<"vfixupimmss", f32x_info,
                                                  0x55, X86VFixupimm, HasAVX512>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+defm VREDUCE   : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
+                              X86VReduce, HasDQI>, AVX512AIi8Base, EVEX;
+defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09,
+                              X86VRndScale, HasAVX512>, AVX512AIi8Base, EVEX;
+defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26,
+                              X86VGetMant, HasAVX512>, AVX512AIi8Base, EVEX;
+
+
 defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
                                                        0x50, X86VRange, HasDQI>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
@@ -6475,6 +7007,19 @@ defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
                                                  0x51, X86VRange, HasDQI>,
       AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
+defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
+                                                 0x57, X86Reduces, HasDQI>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
+                                                 0x57, X86Reduces, HasDQI>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+
+defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
+                                                 0x27, X86GetMants, HasAVX512>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
+                                                 0x27, X86GetMants, HasAVX512>,
+      AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
 
 multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
                                        bits<8> opc, SDNode OpNode = X86Shuf128>{
@@ -6486,6 +7031,29 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
      defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
   }
 }
+let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 (ffloor VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v16f32 (fnearbyint VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v16f32 (fceil VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v16f32 (frint VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v16f32 (ftrunc VR512:$src)),
+          (VRNDSCALEPSZrri VR512:$src, (i32 0x3))>;
+
+def : Pat<(v8f64 (ffloor VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x1))>;
+def : Pat<(v8f64 (fnearbyint VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
+def : Pat<(v8f64 (fceil VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x2))>;
+def : Pat<(v8f64 (frint VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
+def : Pat<(v8f64 (ftrunc VR512:$src)),
+          (VRNDSCALEPDZrri VR512:$src, (i32 0x3))>;
+}
 
 defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
@@ -6496,31 +7064,51 @@ defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
 defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
       AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
 
-multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
-                                                AVX512VLVectorVTInfo VTInfo_FP>{
+multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
   defm NAME:       avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
                            AVX512AIi8Base, EVEX_4V;
-  let isCodeGenOnly = 1 in {
-    defm NAME#_FP: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0x03, X86VAlign>,
-                           AVX512AIi8Base, EVEX_4V;
-  }
 }
 
-defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info, avx512vl_f32_info>,
+defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
                                                   EVEX_CD8<32, CD8VF>;
-defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info, avx512vl_f64_info>,
+defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
                                                   EVEX_CD8<64, CD8VF>, VEX_W;
 
+multiclass avx512_vpalign_lowering<X86VectorVTInfo _ , list<Predicate> p>{
+  let Predicates = p in
+    def NAME#_.VTName#rri:
+          Pat<(_.VT (X86PAlignr _.RC:$src1, _.RC:$src2, (i8 imm:$imm))),
+              (!cast<Instruction>(NAME#_.ZSuffix#rri)
+                    _.RC:$src1, _.RC:$src2, imm:$imm)>;
+}
+
+multiclass avx512_vpalign_lowering_common<AVX512VLVectorVTInfo _>:
+      avx512_vpalign_lowering<_.info512, [HasBWI]>,
+      avx512_vpalign_lowering<_.info128, [HasBWI, HasVLX]>,
+      avx512_vpalign_lowering<_.info256, [HasBWI, HasVLX]>;
+
+defm VPALIGN:   avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
+                                          avx512vl_i8_info, avx512vl_i8_info>,
+                avx512_vpalign_lowering_common<avx512vl_i16_info>,
+                avx512_vpalign_lowering_common<avx512vl_i32_info>,
+                avx512_vpalign_lowering_common<avx512vl_f32_info>,
+                avx512_vpalign_lowering_common<avx512vl_i64_info>,
+                avx512_vpalign_lowering_common<avx512vl_f64_info>,
+                EVEX_CD8<8, CD8VF>;
+
+defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
+                    avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
+
 multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            X86VectorVTInfo _> {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1), OpcodeStr##_.Suffix,
+                    (ins _.RC:$src1), OpcodeStr,
                     "$src1", "$src1",
                     (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase;
 
   let mayLoad = 1 in
     defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                    (ins _.MemOp:$src1), OpcodeStr##_.Suffix,
+                    (ins _.MemOp:$src1), OpcodeStr,
                     "$src1", "$src1",
                     (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>,
               EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>;
@@ -6531,7 +7119,7 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
            avx512_unary_rm<opc, OpcodeStr, OpNode, _> {
   let mayLoad = 1 in
     defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                    (ins _.ScalarMemOp:$src1), OpcodeStr##_.Suffix,
+                    (ins _.ScalarMemOp:$src1), OpcodeStr,
                     "${src1}"##_.BroadcastStr,
                     "${src1}"##_.BroadcastStr,
                     (_.VT (OpNode (X86VBroadcast
@@ -6568,15 +7156,16 @@ multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
                                  SDNode OpNode, Predicate prd> {
-  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr, OpNode, avx512vl_i64_info,
+  defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, avx512vl_i64_info,
                                prd>, VEX_W;
-  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr, OpNode, avx512vl_i32_info, prd>;
+  defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, avx512vl_i32_info,
+                               prd>;
 }
 
 multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
                                  SDNode OpNode, Predicate prd> {
-  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr, OpNode, avx512vl_i16_info, prd>;
-  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr, OpNode, avx512vl_i8_info, prd>;
+  defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, avx512vl_i16_info, prd>;
+  defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, avx512vl_i8_info, prd>;
 }
 
 multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
@@ -6598,3 +7187,332 @@ def : Pat<(xor
           (bc_v8i64 (v8i1sextv8i64)),
           (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
           (VPABSQZrr VR512:$src)>;
+
+multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
+
+  defm NAME :          avx512_unary_rm_vl_dq<opc, opc, OpcodeStr, ctlz, prd>;
+}
+
+defm VPLZCNT    : avx512_ctlz<0x44, "vplzcnt", HasCDI>;
+defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>;
+
+//===---------------------------------------------------------------------===//
+// Replicate Single FP - MOVSHDUP and MOVSLDUP
+//===---------------------------------------------------------------------===//
+multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+  defm NAME:       avx512_unary_rm_vl<opc, OpcodeStr, OpNode, avx512vl_f32_info,
+                                      HasAVX512>, XS;
+}
+
+defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup>;
+defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - MOVDDUP
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                   (ins _.RC:$src), OpcodeStr, "$src", "$src",
+                   (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX;
+  let mayLoad = 1 in
+    defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                   (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src",
+                   (_.VT (OpNode (_.VT (scalar_to_vector
+                                         (_.ScalarLdFrag addr:$src)))))>,
+                   EVEX, EVEX_CD8<_.EltSize, CD8VH>;
+}
+
+multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                  AVX512VLVectorVTInfo VTInfo> {
+
+  defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512;
+
+  let Predicates = [HasAVX512, HasVLX] in {
+    defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, VTInfo.info256>,
+                               EVEX_V256;
+    defm Z128 : avx512_movddup_128<opc, OpcodeStr, OpNode, VTInfo.info128>,
+                               EVEX_V128;
+  }
+}
+
+multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
+  defm NAME:      avx512_movddup_common<opc, OpcodeStr, OpNode,
+                                        avx512vl_f64_info>, XD, VEX_W;
+}
+
+defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
+
+def : Pat<(X86Movddup (loadv2f64 addr:$src)),
+          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>;
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>;
+
+defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
+                                       SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh,
+                                       SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl,
+                                       SSE_INTALU_ITINS_P, HasBWI>;
+defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh,
+                                       SSE_INTALU_ITINS_P, HasBWI>;
+
+defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl,
+                                       SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh,
+                                       SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl,
+                                       SSE_INTALU_ITINS_P, HasAVX512>;
+defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh,
+                                       SSE_INTALU_ITINS_P, HasAVX512>;
+
+//===----------------------------------------------------------------------===//
+// AVX-512 - Extract & Insert Integer Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_extract_elt_bw_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _> {
+  let mayStore = 1 in
+    def mr : AVX512Ii8<opc, MRMDestMem, (outs),
+                (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+                OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                [(store (_.EltVT (trunc (assertzext (OpNode (_.VT _.RC:$src1),
+                                                            imm:$src2)))),
+                        addr:$dst)]>,
+                EVEX, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_extract_elt_b<string OpcodeStr, X86VectorVTInfo _> {
+  let Predicates = [HasBWI] in {
+    def rr : AVX512Ii8<0x14, MRMDestReg, (outs GR32orGR64:$dst),
+                  (ins _.RC:$src1, u8imm:$src2),
+                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  [(set GR32orGR64:$dst,
+                        (X86pextrb (_.VT _.RC:$src1), imm:$src2))]>,
+                  EVEX, TAPD;
+
+    defm NAME : avx512_extract_elt_bw_m<0x14, OpcodeStr, X86pextrb, _>, TAPD;
+  }
+}
+
+multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
+  let Predicates = [HasBWI] in {
+    def rr : AVX512Ii8<0xC5, MRMSrcReg, (outs GR32orGR64:$dst),
+                  (ins _.RC:$src1, u8imm:$src2),
+                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  [(set GR32orGR64:$dst,
+                        (X86pextrw (_.VT _.RC:$src1), imm:$src2))]>,
+                  EVEX, PD;
+
+    def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
+                   (ins _.RC:$src1, u8imm:$src2),
+                   OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                   EVEX, TAPD;
+
+    defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
+  }
+}
+
+multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
+                                                            RegisterClass GRC> {
+  let Predicates = [HasDQI] in {
+    def rr : AVX512Ii8<0x16, MRMDestReg, (outs GRC:$dst),
+                  (ins _.RC:$src1, u8imm:$src2),
+                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  [(set GRC:$dst,
+                      (extractelt (_.VT _.RC:$src1), imm:$src2))]>,
+                  EVEX, TAPD;
+
+    let mayStore = 1 in
+      def mr : AVX512Ii8<0x16, MRMDestMem, (outs),
+                  (ins _.ScalarMemOp:$dst, _.RC:$src1, u8imm:$src2),
+                  OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  [(store (extractelt (_.VT _.RC:$src1),
+                                      imm:$src2),addr:$dst)]>,
+                  EVEX, EVEX_CD8<_.EltSize, CD8VT1>, TAPD;
+  }
+}
+
+defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>;
+defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>;
+defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
+defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
+
+multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                            X86VectorVTInfo _, PatFrag LdFrag> {
+  def rm : AVX512Ii8<opc, MRMSrcMem, (outs _.RC:$dst),
+      (ins _.RC:$src1,  _.ScalarMemOp:$src2, u8imm:$src3),
+      OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+      [(set _.RC:$dst,
+          (_.VT (OpNode _.RC:$src1, (LdFrag addr:$src2), imm:$src3)))]>,
+      EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>;
+}
+
+multiclass avx512_insert_elt_bw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                            X86VectorVTInfo _, PatFrag LdFrag> {
+  let Predicates = [HasBWI] in {
+    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+        (ins _.RC:$src1, GR32orGR64:$src2, u8imm:$src3),
+        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+        [(set _.RC:$dst,
+            (OpNode _.RC:$src1, GR32orGR64:$src2, imm:$src3))]>, EVEX_4V;
+
+    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, OpNode, _, LdFrag>;
+  }
+}
+
+multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
+                                         X86VectorVTInfo _, RegisterClass GRC> {
+  let Predicates = [HasDQI] in {
+    def rr : AVX512Ii8<opc, MRMSrcReg, (outs _.RC:$dst),
+        (ins _.RC:$src1, GRC:$src2, u8imm:$src3),
+        OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+        [(set _.RC:$dst,
+            (_.VT (insertelt _.RC:$src1, GRC:$src2, imm:$src3)))]>,
+        EVEX_4V, TAPD;
+
+    defm NAME : avx512_insert_elt_m<opc, OpcodeStr, insertelt, _,
+                                    _.ScalarLdFrag>, TAPD;
+  }
+}
+
+defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
+                                     extloadi8>, TAPD;
+defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
+                                     extloadi16>, PD;
+defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
+defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+//===----------------------------------------------------------------------===//
+// VSHUFPS - VSHUFPD Operations
+//===----------------------------------------------------------------------===//
+multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
+                                                AVX512VLVectorVTInfo VTInfo_FP>{
+  defm NAME:     avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
+                                   EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
+                                   AVX512AIi8Base, EVEX_4V;
+}
+
+defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+//===----------------------------------------------------------------------===//
+// AVX-512 - Byte shift Left/Right
+//===----------------------------------------------------------------------===//
+
+multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr,
+                             Format MRMm, string OpcodeStr, X86VectorVTInfo _>{
+  def rr : AVX512<opc, MRMr,
+             (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>;
+  let mayLoad = 1 in
+    def rm : AVX512<opc, MRMm,
+             (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _.RC:$dst,(_.VT (OpNode 
+                                   (_.LdFrag addr:$src1), (i8 imm:$src2))))]>;
+}
+
+multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, 
+                                 Format MRMm, string OpcodeStr, Predicate prd>{
+  let Predicates = [prd] in
+    defm Z512 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v8i64_info>, EVEX_V512;
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v4i64x_info>, EVEX_V256;
+    defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, 
+                                    OpcodeStr, v2i64x_info>, EVEX_V128;
+  }
+}
+defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", 
+                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
+defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", 
+                                       HasBWI>, AVX512PDIi8Base, EVEX_4V;
+
+
+multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, 
+                                string OpcodeStr, X86VectorVTInfo _dst,
+                                X86VectorVTInfo _src>{
+  def rr : AVX512BI<opc, MRMSrcReg,
+             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _dst.RC:$dst,(_dst.VT
+                                (OpNode (_src.VT _src.RC:$src1),
+                                        (_src.VT _src.RC:$src2))))]>;
+  let mayLoad = 1 in
+    def rm : AVX512BI<opc, MRMSrcMem,
+             (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _dst.RC:$dst,(_dst.VT
+                                (OpNode (_src.VT _src.RC:$src1),
+                                (_src.VT (bitconvert
+                                          (_src.LdFrag addr:$src2))))))]>;
+}
+
+multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, 
+                                    string OpcodeStr, Predicate prd> {
+  let Predicates = [prd] in
+    defm Z512 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v8i64_info,
+                                    v64i8_info>, EVEX_V512;
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v4i64x_info,
+                                    v32i8x_info>, EVEX_V256;
+    defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, v2i64x_info,
+                                    v16i8x_info>, EVEX_V128;
+  }
+}
+
+defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", 
+                                       HasBWI>, EVEX_4V;
+
+multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                                            X86VectorVTInfo _>{
+  let Constraints = "$src1 = $dst" in {
+  defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                      (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
+                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (_.VT _.RC:$src3),
+                              (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
+  let mayLoad = 1 in {
+    defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
+                      OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src3",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (_.VT (bitconvert (_.LdFrag addr:$src3))),
+                              (i8 imm:$src4))>,
+                      AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+    defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                      (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
+                      OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2",
+                      "$src2, ${src3}"##_.BroadcastStr##", $src4",
+                      (OpNode (_.VT _.RC:$src1),
+                              (_.VT _.RC:$src2),
+                              (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+                              (i8 imm:$src4))>, EVEX_B,
+                      AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
+  }
+  }// Constraints = "$src1 = $dst"
+}
+
+multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
+  let Predicates = [HasAVX512] in
+    defm Z    : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512;
+  let Predicates = [HasAVX512, HasVLX] in {
+    defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128;
+    defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256;
+  }
+}
+
+defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>;
+defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
+
diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td
index 5e19ad448fc7..1a2e786661e9 100644
--- a/lib/Target/X86/X86InstrArithmetic.td
+++ b/lib/Target/X86/X86InstrArithmetic.td
@@ -615,14 +615,14 @@ class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
 def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
 
 
-def Xi8  : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
-                       Imm8 , i8imm ,    imm,          i8imm   , invalid_node,
+def Xi8  : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
+                       Imm8, i8imm, imm8_su, i8imm, invalid_node,
                        0, OpSizeFixed, 0>;
 def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
-                       Imm16, i16imm,    imm,          i16i8imm, i16immSExt8,
+                       Imm16, i16imm, imm16_su, i16i8imm, i16immSExt8_su,
                        1, OpSize16, 0>;
 def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
-                       Imm32, i32imm,    imm,          i32i8imm, i32immSExt8,
+                       Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
                        1, OpSize32, 0>;
 def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
                        Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
@@ -928,15 +928,22 @@ class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
   let hasSideEffects = 0;
 }
 
-// BinOpAI_FF - Instructions like "adc %eax, %eax, imm", that implicitly define
+// BinOpAI_RFF - Instructions like "adc %eax, %eax, imm", that implicitly define
 // and use EFLAGS.
-class BinOpAI_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
-                Register areg, string operands>
+class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  Register areg, string operands>
   : BinOpAI<opcode, mnemonic, typeinfo, areg, operands,
             IIC_BIN_CARRY_NONMEM> {
   let Uses = [areg, EFLAGS];
 }
 
+// BinOpAI_F - Instructions like "cmp %eax, %eax, imm", that imp-def EFLAGS.
+class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                Register areg, string operands>
+  : BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
+  let Defs = [EFLAGS];
+}
+
 /// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
 /// defined with "(set GPR:$dst, EFLAGS, (...".
 ///
@@ -1092,14 +1099,14 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
     }
   } // Uses = [EFLAGS], Defs = [EFLAGS]
 
-  def NAME#8i8   : BinOpAI_FF<BaseOpc4, mnemonic, Xi8 , AL,
-                              "{$src, %al|al, $src}">;
-  def NAME#16i16 : BinOpAI_FF<BaseOpc4, mnemonic, Xi16, AX,
-                              "{$src, %ax|ax, $src}">;
-  def NAME#32i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi32, EAX,
-                              "{$src, %eax|eax, $src}">;
-  def NAME#64i32 : BinOpAI_FF<BaseOpc4, mnemonic, Xi64, RAX,
-                              "{$src, %rax|rax, $src}">;
+  def NAME#8i8   : BinOpAI_RFF<BaseOpc4, mnemonic, Xi8 , AL,
+                               "{$src, %al|al, $src}">;
+  def NAME#16i16 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi16, AX,
+                               "{$src, %ax|ax, $src}">;
+  def NAME#32i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi32, EAX,
+                               "{$src, %eax|eax, $src}">;
+  def NAME#64i32 : BinOpAI_RFF<BaseOpc4, mnemonic, Xi64, RAX,
+                               "{$src, %rax|rax, $src}">;
 }
 
 /// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
@@ -1170,14 +1177,14 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
     }
   } // Defs = [EFLAGS]
 
-  def NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL,
-                           "{$src, %al|al, $src}">;
-  def NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX,
-                           "{$src, %ax|ax, $src}">;
-  def NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX,
-                           "{$src, %eax|eax, $src}">;
-  def NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX,
-                           "{$src, %rax|rax, $src}">;
+  def NAME#8i8   : BinOpAI_F<BaseOpc4, mnemonic, Xi8 , AL,
+                             "{$src, %al|al, $src}">;
+  def NAME#16i16 : BinOpAI_F<BaseOpc4, mnemonic, Xi16, AX,
+                             "{$src, %ax|ax, $src}">;
+  def NAME#32i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi32, EAX,
+                             "{$src, %eax|eax, $src}">;
+  def NAME#64i32 : BinOpAI_F<BaseOpc4, mnemonic, Xi64, RAX,
+                             "{$src, %rax|rax, $src}">;
 }
 
 
@@ -1246,14 +1253,14 @@ let isCompare = 1 in {
                           "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>;
   } // Defs = [EFLAGS]
 
-  def TEST8i8    : BinOpAI<0xA8, "test", Xi8 , AL,
-                           "{$src, %al|al, $src}">;
-  def TEST16i16  : BinOpAI<0xA8, "test", Xi16, AX,
-                           "{$src, %ax|ax, $src}">;
-  def TEST32i32  : BinOpAI<0xA8, "test", Xi32, EAX,
-                           "{$src, %eax|eax, $src}">;
-  def TEST64i32  : BinOpAI<0xA8, "test", Xi64, RAX,
-                           "{$src, %rax|rax, $src}">;
+  def TEST8i8    : BinOpAI_F<0xA8, "test", Xi8 , AL,
+                             "{$src, %al|al, $src}">;
+  def TEST16i16  : BinOpAI_F<0xA8, "test", Xi16, AX,
+                             "{$src, %ax|ax, $src}">;
+  def TEST32i32  : BinOpAI_F<0xA8, "test", Xi32, EAX,
+                             "{$src, %eax|eax, $src}">;
+  def TEST64i32  : BinOpAI_F<0xA8, "test", Xi64, RAX,
+                             "{$src, %rax|rax, $src}">;
 } // isCompare
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 2056056d23a5..787f15bc628e 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -156,10 +156,9 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
     Flags |= MachineMemOperand::MOLoad;
   if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
-  MachineMemOperand *MMO =
-    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
-                            Flags, MFI.getObjectSize(FI),
-                            MFI.getObjectAlignment(FI));
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
+      MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
   return addOffset(MIB.addFrameIndex(FI), Offset)
             .addMemOperand(MMO);
 }
diff --git a/lib/Target/X86/X86InstrCMovSetCC.td b/lib/Target/X86/X86InstrCMovSetCC.td
index 315f21308c0d..c73c95019f8d 100644
--- a/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/lib/Target/X86/X86InstrCMovSetCC.td
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 
-// SetCC instructions.
+// CMOV instructions.
 multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
   let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
       isCommutable = 1, SchedRW = [WriteALU] in {
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 7f850d6830e1..5d7283f7bd57 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -132,26 +132,6 @@ def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
                     Requires<[In64BitMode]>;
 }
 
-// The MSVC runtime contains an _ftol2 routine for converting floating-point
-// to integer values. It has a strange calling convention: the input is
-// popped from the x87 stack, and the return value is given in EDX:EAX. ECX is
-// used as a temporary register. No other registers (aside from flags) are
-// touched.
-// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80
-// variant is unnecessary.
-
-let Defs = [EAX, EDX, ECX, EFLAGS], FPForm = SpecialFP in {
-  def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src),
-                      "# win32 fptoui",
-                      [(X86WinFTOL RFP32:$src)]>,
-                    Requires<[Not64BitMode]>;
-
-  def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src),
-                      "# win32 fptoui",
-                      [(X86WinFTOL RFP64:$src)]>,
-                    Requires<[Not64BitMode]>;
-}
-
 //===----------------------------------------------------------------------===//
 // EH Pseudo Instructions
 //
@@ -172,6 +152,29 @@ def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
 
 }
 
+let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
+    isCodeGenOnly = 1, isReturn = 1 in {
+  def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", [(cleanupret)]>;
+
+  // CATCHRET needs a custom inserter for SEH.
+  let usesCustomInserter = 1 in
+    def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),
+                     "# CATCHRET",
+                     [(catchret bb:$dst, bb:$from)]>;
+}
+
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1,
+    usesCustomInserter = 1 in
+def CATCHPAD : I<0, Pseudo, (outs), (ins), "# CATCHPAD", [(catchpad)]>;
+
+// This instruction is responsible for re-establishing stack pointers after an
+// exception has been caught and we are rejoining normal control flow in the
+// parent function or funclet. It generally sets ESP and EBP, and optionally
+// ESI. It is only needed for 32-bit WinEH, as the runtime restores CSRs for us
+// elsewhere.
+let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in
+def EH_RESTORE : I<0, Pseudo, (outs), (ins), "# EH_RESTORE", []>;
+
 let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
     usesCustomInserter = 1 in {
   def EH_SjLj_SetJmp32  : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
@@ -247,7 +250,7 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins),
 // Alias instruction mapping movr0 to xor.
 // FIXME: remove when we can teach regalloc that xor reg, reg is ok.
 let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
-    isPseudo = 1 in
+    isPseudo = 1, AddedComplexity = 20 in
 def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
                  [(set GR32:$dst, 0)], IIC_ALU_NONMEM>, Sched<[WriteZero]>;
 
@@ -259,6 +262,33 @@ def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)> {
   let AddedComplexity = 20;
 }
 
+let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+    AddedComplexity = 15 in {
+  // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
+  // which only require 3 bytes compared to MOV32ri which requires 5.
+  let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
+    def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+                        [(set GR32:$dst, 1)]>;
+    def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
+                        [(set GR32:$dst, -1)]>;
+  }
+
+  // MOV16ri is 4 bytes, so the instructions above are smaller.
+  def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
+  def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
+}
+
+let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 10 in {
+// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
+// FIXME: Add itinerary class and Schedule.
+def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
+                       [(set GR32:$dst, i32immSExt8:$src)]>,
+                     Requires<[OptForMinSize]>;
+def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
+                       [(set GR64:$dst, i64immSExt8:$src)]>,
+                     Requires<[OptForMinSize, NotWin64WithoutFP]>;
+}
+
 // Materialize i64 constant where top 32-bits are zero. This could theoretically
 // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
 // that would make it more difficult to rematerialize.
@@ -268,9 +298,9 @@ def MOV32ri64 : Ii32<0xb8, AddRegFrm, (outs GR32:$dst), (ins i64i32imm:$src),
                      "", [], IIC_ALU_NONMEM>, Sched<[WriteALU]>;
 
 // This 64-bit pseudo-move can be used for both a 64-bit constant that is
-// actually the zero-extension of a 32-bit constant, and for labels in the
+// actually the zero-extension of a 32-bit constant and for labels in the
 // x86-64 small code model.
-def mov64imm32 : ComplexPattern<i64, 1, "SelectMOV64Imm32", [imm, X86Wrapper]>;
+def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [imm, X86Wrapper]>;
 
 let AddedComplexity = 1 in
 def : Pat<(i64 mov64imm32:$src),
@@ -509,6 +539,7 @@ let usesCustomInserter = 1, Uses = [EFLAGS] in {
 
   defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
   defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
+  defm _FR128  : CMOVrr_PSEUDO<FR128, f128>;
   defm _V4F32  : CMOVrr_PSEUDO<VR128, v4f32>;
   defm _V2F64  : CMOVrr_PSEUDO<VR128, v2f64>;
   defm _V2I64  : CMOVrr_PSEUDO<VR128, v2i64>;
@@ -752,67 +783,111 @@ defm LXADD : ATOMIC_LOAD_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add",
 
 /* The following multiclass tries to make sure that in code like
  *    x.store (immediate op x.load(acquire), release)
+ * and
+ *    x.store (register op x.load(acquire), release)
  * an operation directly on memory is generated instead of wasting a register.
  * It is not automatic as atomic_store/load are only lowered to MOV instructions
  * extremely late to prevent them from being accidentally reordered in the backend
  * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
  */
-multiclass RELEASE_BINOP_MI<string op> {
+multiclass RELEASE_BINOP_MI<SDNode op> {
     def NAME#8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
-        "#RELEASE_BINOP PSEUDO!",
-        [(atomic_store_8 addr:$dst, (!cast<PatFrag>(op)
+        "#BINOP "#NAME#"8mi PSEUDO!",
+        [(atomic_store_8 addr:$dst, (op
             (atomic_load_8 addr:$dst), (i8 imm:$src)))]>;
+    def NAME#8mr : I<0, Pseudo, (outs), (ins i8mem:$dst, GR8:$src),
+        "#BINOP "#NAME#"8mr PSEUDO!",
+        [(atomic_store_8 addr:$dst, (op
+            (atomic_load_8 addr:$dst), GR8:$src))]>;
     // NAME#16 is not generated as 16-bit arithmetic instructions are considered
     // costly and avoided as far as possible by this backend anyway
     def NAME#32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
-        "#RELEASE_BINOP PSEUDO!",
-        [(atomic_store_32 addr:$dst, (!cast<PatFrag>(op)
+        "#BINOP "#NAME#"32mi PSEUDO!",
+        [(atomic_store_32 addr:$dst, (op
             (atomic_load_32 addr:$dst), (i32 imm:$src)))]>;
+    def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
+        "#BINOP "#NAME#"32mr PSEUDO!",
+        [(atomic_store_32 addr:$dst, (op
+            (atomic_load_32 addr:$dst), GR32:$src))]>;
     def NAME#64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
-        "#RELEASE_BINOP PSEUDO!",
-        [(atomic_store_64 addr:$dst, (!cast<PatFrag>(op)
+        "#BINOP "#NAME#"64mi32 PSEUDO!",
+        [(atomic_store_64 addr:$dst, (op
             (atomic_load_64 addr:$dst), (i64immSExt32:$src)))]>;
+    def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
+        "#BINOP "#NAME#"64mr PSEUDO!",
+        [(atomic_store_64 addr:$dst, (op
+            (atomic_load_64 addr:$dst), GR64:$src))]>;
+}
+let Defs = [EFLAGS] in {
+  defm RELEASE_ADD : RELEASE_BINOP_MI<add>;
+  defm RELEASE_AND : RELEASE_BINOP_MI<and>;
+  defm RELEASE_OR  : RELEASE_BINOP_MI<or>;
+  defm RELEASE_XOR : RELEASE_BINOP_MI<xor>;
+  // Note: we don't deal with sub, because substractions of constants are
+  //       optimized into additions before this code can run.
+}
+
+// Same as above, but for floating-point.
+// FIXME: imm version.
+// FIXME: Version that doesn't clobber $src, using AVX's VADDSS.
+// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
+let usesCustomInserter = 1 in {
+multiclass RELEASE_FP_BINOP_MI<SDNode op> {
+    def NAME#32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, FR32:$src),
+        "#BINOP "#NAME#"32mr PSEUDO!",
+        [(atomic_store_32 addr:$dst,
+	   (i32 (bitconvert (op
+             (f32 (bitconvert (i32 (atomic_load_32 addr:$dst)))),
+	      FR32:$src))))]>, Requires<[HasSSE1]>;
+    def NAME#64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, FR64:$src),
+        "#BINOP "#NAME#"64mr PSEUDO!",
+        [(atomic_store_64 addr:$dst,
+	   (i64 (bitconvert (op
+             (f64 (bitconvert (i64 (atomic_load_64 addr:$dst)))),
+	      FR64:$src))))]>, Requires<[HasSSE2]>;
+}
+defm RELEASE_FADD : RELEASE_FP_BINOP_MI<fadd>;
+// FIXME: Add fsub, fmul, fdiv, ...
 }
-defm RELEASE_ADD : RELEASE_BINOP_MI<"add">;
-defm RELEASE_AND : RELEASE_BINOP_MI<"and">;
-defm RELEASE_OR  : RELEASE_BINOP_MI<"or">;
-defm RELEASE_XOR : RELEASE_BINOP_MI<"xor">;
-// Note: we don't deal with sub, because substractions of constants are
-// optimized into additions before this code can run
 
 multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
     def NAME#8m : I<0, Pseudo, (outs), (ins i8mem:$dst),
-        "#RELEASE_UNOP PSEUDO!",
+        "#UNOP "#NAME#"8m PSEUDO!",
         [(atomic_store_8 addr:$dst, dag8)]>;
     def NAME#16m : I<0, Pseudo, (outs), (ins i16mem:$dst),
-        "#RELEASE_UNOP PSEUDO!",
+        "#UNOP "#NAME#"16m PSEUDO!",
         [(atomic_store_16 addr:$dst, dag16)]>;
     def NAME#32m : I<0, Pseudo, (outs), (ins i32mem:$dst),
-        "#RELEASE_UNOP PSEUDO!",
+        "#UNOP "#NAME#"32m PSEUDO!",
         [(atomic_store_32 addr:$dst, dag32)]>;
     def NAME#64m : I<0, Pseudo, (outs), (ins i64mem:$dst),
-        "#RELEASE_UNOP PSEUDO!",
+        "#UNOP "#NAME#"64m PSEUDO!",
         [(atomic_store_64 addr:$dst, dag64)]>;
 }
 
-defm RELEASE_INC : RELEASE_UNOP<
-    (add (atomic_load_8  addr:$dst), (i8 1)),
-    (add (atomic_load_16 addr:$dst), (i16 1)),
-    (add (atomic_load_32 addr:$dst), (i32 1)),
-    (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
-defm RELEASE_DEC : RELEASE_UNOP<
-    (add (atomic_load_8  addr:$dst), (i8 -1)),
-    (add (atomic_load_16 addr:$dst), (i16 -1)),
-    (add (atomic_load_32 addr:$dst), (i32 -1)),
-    (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+let Defs = [EFLAGS] in {
+  defm RELEASE_INC : RELEASE_UNOP<
+      (add (atomic_load_8  addr:$dst), (i8 1)),
+      (add (atomic_load_16 addr:$dst), (i16 1)),
+      (add (atomic_load_32 addr:$dst), (i32 1)),
+      (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+  defm RELEASE_DEC : RELEASE_UNOP<
+      (add (atomic_load_8  addr:$dst), (i8 -1)),
+      (add (atomic_load_16 addr:$dst), (i16 -1)),
+      (add (atomic_load_32 addr:$dst), (i32 -1)),
+      (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+}
 /*
 TODO: These don't work because the type inference of TableGen fails.
 TODO: find a way to fix it.
-defm RELEASE_NEG : RELEASE_UNOP<
-    (ineg (atomic_load_8  addr:$dst)),
-    (ineg (atomic_load_16 addr:$dst)),
-    (ineg (atomic_load_32 addr:$dst)),
-    (ineg (atomic_load_64 addr:$dst))>;
+let Defs = [EFLAGS] in {
+  defm RELEASE_NEG : RELEASE_UNOP<
+      (ineg (atomic_load_8  addr:$dst)),
+      (ineg (atomic_load_16 addr:$dst)),
+      (ineg (atomic_load_32 addr:$dst)),
+      (ineg (atomic_load_64 addr:$dst))>;
+}
+// NOT doesn't set flags.
 defm RELEASE_NOT : RELEASE_UNOP<
     (not (atomic_load_8  addr:$dst)),
     (not (atomic_load_16 addr:$dst)),
@@ -821,42 +896,42 @@ defm RELEASE_NOT : RELEASE_UNOP<
 */
 
 def RELEASE_MOV8mi : I<0, Pseudo, (outs), (ins i8mem:$dst, i8imm:$src),
-			"#RELEASE_MOV PSEUDO !",
+			"#RELEASE_MOV8mi PSEUDO!",
 			[(atomic_store_8 addr:$dst, (i8 imm:$src))]>;
 def RELEASE_MOV16mi : I<0, Pseudo, (outs), (ins i16mem:$dst, i16imm:$src),
-			"#RELEASE_MOV PSEUDO !",
+			"#RELEASE_MOV16mi PSEUDO!",
 			[(atomic_store_16 addr:$dst, (i16 imm:$src))]>;
 def RELEASE_MOV32mi : I<0, Pseudo, (outs), (ins i32mem:$dst, i32imm:$src),
-			"#RELEASE_MOV PSEUDO !",
+			"#RELEASE_MOV32mi PSEUDO!",
 			[(atomic_store_32 addr:$dst, (i32 imm:$src))]>;
 def RELEASE_MOV64mi32 : I<0, Pseudo, (outs), (ins i64mem:$dst, i64i32imm:$src),
-			"#RELEASE_MOV PSEUDO !",
+			"#RELEASE_MOV64mi32 PSEUDO!",
 			[(atomic_store_64 addr:$dst, i64immSExt32:$src)]>;
 
 def RELEASE_MOV8mr  : I<0, Pseudo, (outs), (ins i8mem :$dst, GR8 :$src),
-                        "#RELEASE_MOV PSEUDO!",
+                        "#RELEASE_MOV8mr PSEUDO!",
                         [(atomic_store_8  addr:$dst, GR8 :$src)]>;
 def RELEASE_MOV16mr : I<0, Pseudo, (outs), (ins i16mem:$dst, GR16:$src),
-                        "#RELEASE_MOV PSEUDO!",
+                        "#RELEASE_MOV16mr PSEUDO!",
                         [(atomic_store_16 addr:$dst, GR16:$src)]>;
 def RELEASE_MOV32mr : I<0, Pseudo, (outs), (ins i32mem:$dst, GR32:$src),
-                        "#RELEASE_MOV PSEUDO!",
+                        "#RELEASE_MOV32mr PSEUDO!",
                         [(atomic_store_32 addr:$dst, GR32:$src)]>;
 def RELEASE_MOV64mr : I<0, Pseudo, (outs), (ins i64mem:$dst, GR64:$src),
-                        "#RELEASE_MOV PSEUDO!",
+                        "#RELEASE_MOV64mr PSEUDO!",
                         [(atomic_store_64 addr:$dst, GR64:$src)]>;
 
 def ACQUIRE_MOV8rm  : I<0, Pseudo, (outs GR8 :$dst), (ins i8mem :$src),
-                      "#ACQUIRE_MOV PSEUDO!",
+                      "#ACQUIRE_MOV8rm PSEUDO!",
                       [(set GR8:$dst,  (atomic_load_8  addr:$src))]>;
 def ACQUIRE_MOV16rm : I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$src),
-                      "#ACQUIRE_MOV PSEUDO!",
+                      "#ACQUIRE_MOV16rm PSEUDO!",
                       [(set GR16:$dst, (atomic_load_16 addr:$src))]>;
 def ACQUIRE_MOV32rm : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$src),
-                      "#ACQUIRE_MOV PSEUDO!",
+                      "#ACQUIRE_MOV32rm PSEUDO!",
                       [(set GR32:$dst, (atomic_load_32 addr:$src))]>;
 def ACQUIRE_MOV64rm : I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$src),
-                      "#ACQUIRE_MOV PSEUDO!",
+                      "#ACQUIRE_MOV64rm PSEUDO!",
                       [(set GR64:$dst, (atomic_load_64 addr:$src))]>;
 
 //===----------------------------------------------------------------------===//
@@ -1077,11 +1152,11 @@ defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
 
 // zextload bool -> zextload byte
 def : Pat<(zextloadi8i1  addr:$src), (AND8ri (MOV8rm addr:$src), (i8 1))>;
-def : Pat<(zextloadi16i1 addr:$src), (AND16ri (MOVZX16rm8 addr:$src), (i16 1))>;
-def : Pat<(zextloadi32i1 addr:$src), (AND32ri (MOVZX32rm8 addr:$src), (i32 1))>;
+def : Pat<(zextloadi16i1 addr:$src), (AND16ri8 (MOVZX16rm8 addr:$src), (i16 1))>;
+def : Pat<(zextloadi32i1 addr:$src), (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1))>;
 def : Pat<(zextloadi64i1 addr:$src),
           (SUBREG_TO_REG (i64 0),
-           (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
+           (AND32ri8 (MOVZX32rm8 addr:$src), (i32 1)), sub_32bit)>;
 
 // extload bool -> extload byte
 // When extloading from 16-bit and smaller memory locations into 64-bit
@@ -1298,7 +1373,6 @@ def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
                          (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
                          sub_32bit)>;
 // r & (2^16-1) ==> movz
-let AddedComplexity = 1 in // Give priority over i64immZExt32.
 def : Pat<(and GR64:$src, 0xffff),
           (SUBREG_TO_REG (i64 0),
                       (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td
index 4cd5563ce727..8c351a51c460 100644
--- a/lib/Target/X86/X86InstrControl.td
+++ b/lib/Target/X86/X86InstrControl.td
@@ -53,6 +53,19 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
                     "{l}ret{|f}q\t$amt", [], IIC_RET>, Requires<[In64BitMode]>;
   def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
                     "{l}ret{w|f}\t$amt", [], IIC_RET>, OpSize16;
+
+  // The machine return from interrupt instruction, but sometimes we need to
+  // perform a post-epilogue stack adjustment. Codegen emits the pseudo form
+  // which expands to include an SP adjustment if necessary.
+  def IRET16 : I   <0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>,
+               OpSize16;
+  def IRET32 : I   <0xcf, RawFrm, (outs), (ins), "iret{l|d}", [],
+                    IIC_IRET>, OpSize32;
+  def IRET64 : RI  <0xcf, RawFrm, (outs), (ins), "iretq", [],
+                    IIC_IRET>, Requires<[In64BitMode]>;
+  let isCodeGenOnly = 1 in
+  def IRET : PseudoI<(outs), (ins i16imm:$adj), [(X86iret timm:$adj)]>;
+  
 }
 
 // Unconditional branches.
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index 7cc3b599a737..fd800cf077f7 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -15,13 +15,31 @@
 // FMA3 - Intel 3 operand Fused Multiply-Add instructions
 //===----------------------------------------------------------------------===//
 
-let Constraints = "$src1 = $dst" in {
+// For all FMA opcodes declared in fma3p_rm and fma3s_rm milticlasses defined
+// below, both the register and memory variants are commutable.
+// For the register form the commutable operands are 1, 2 and 3.
+// For the memory variant the folded operand must be in 3. Thus,
+// in that case, only the operands 1 and 2 can be swapped.
+// Commuting some of operands may require the opcode change.
+// FMA*213*:
+//   operands 1 and 2 (memory & register forms): *213* --> *213*(no changes);
+//   operands 1 and 3 (register forms only):     *213* --> *231*;
+//   operands 2 and 3 (register forms only):     *213* --> *132*.
+// FMA*132*:
+//   operands 1 and 2 (memory & register forms): *132* --> *231*;
+//   operands 1 and 3 (register forms only):     *132* --> *132*(no changes);
+//   operands 2 and 3 (register forms only):     *132* --> *213*.
+// FMA*231*:
+//   operands 1 and 2 (memory & register forms): *231* --> *132*;
+//   operands 1 and 3 (register forms only):     *231* --> *213*;
+//   operands 2 and 3 (register forms only):     *231* --> *231*(no changes).
+
+let Constraints = "$src1 = $dst", hasSideEffects = 0, isCommutable = 1 in
 multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                     PatFrag MemFrag128, PatFrag MemFrag256,
                     ValueType OpVT128, ValueType OpVT256,
-                    bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
                     SDPatternOperator Op = null_frag> {
-  let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+  let usesCustomInserter = 1 in
   def r     : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, VR128:$src2, VR128:$src3),
                    !strconcat(OpcodeStr,
@@ -29,7 +47,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                    [(set VR128:$dst, (OpVT128 (Op VR128:$src2,
                                                VR128:$src1, VR128:$src3)))]>;
 
-  let mayLoad = 1, isCommutable = IsMVariantCommutable in
+  let mayLoad = 1 in
   def m     : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
                    (ins VR128:$src1, VR128:$src2, f128mem:$src3),
                    !strconcat(OpcodeStr,
@@ -37,7 +55,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                    [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
                                                (MemFrag128 addr:$src3))))]>;
 
-  let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+  let usesCustomInserter = 1 in
   def rY    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
                    (ins VR256:$src1, VR256:$src2, VR256:$src3),
                    !strconcat(OpcodeStr,
@@ -45,7 +63,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                    [(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
                                                VR256:$src3)))]>, VEX_L;
 
-  let mayLoad = 1, isCommutable = IsMVariantCommutable in
+  let mayLoad = 1 in
   def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
                    (ins VR256:$src1, VR256:$src2, f256mem:$src3),
                    !strconcat(OpcodeStr,
@@ -54,34 +72,20 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                      (OpVT256 (Op VR256:$src2, VR256:$src1,
                                (MemFrag256 addr:$src3))))]>, VEX_L;
 }
-} // Constraints = "$src1 = $dst"
 
 multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                        string OpcodeStr, string PackTy,
                        PatFrag MemFrag128, PatFrag MemFrag256,
                        SDNode Op, ValueType OpTy128, ValueType OpTy256> {
-  // For 213, both the register and memory variant are commutable.
-  // Indeed, the commutable operands are 1 and 2 and both live in registers
-  // for both variants.
   defm r213 : fma3p_rm<opc213,
                        !strconcat(OpcodeStr, "213", PackTy),
-                       MemFrag128, MemFrag256, OpTy128, OpTy256,
-                       /* IsRVariantCommutable */ 1,
-                       /* IsMVariantCommutable */ 1,
-                       Op>;
-let hasSideEffects = 0 in {
+                       MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
   defm r132 : fma3p_rm<opc132,
                        !strconcat(OpcodeStr, "132", PackTy),
                        MemFrag128, MemFrag256, OpTy128, OpTy256>;
-  // For 231, only the register variant is commutable.
-  // For the memory variant the folded operand must be in 3. Thus,
-  // in that case, it cannot be swapped with 2.
   defm r231 : fma3p_rm<opc231,
                        !strconcat(OpcodeStr, "231", PackTy),
-                       MemFrag128, MemFrag256, OpTy128, OpTy256,
-                       /* IsRVariantCommutable */ 1,
-                       /* IsMVariantCommutable */ 0>;
-} // hasSideEffects = 0
+                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
 }
 
 // Fused Multiply-Add
@@ -126,83 +130,122 @@ let ExeDomain = SSEPackedDouble in {
                                v4f64>, VEX_W;
 }
 
-let Constraints = "$src1 = $dst" in {
-multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
-                    RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
-                    bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
+// All source register operands of FMA opcodes defined in fma3s_rm multiclass
+// can be commuted. In many cases such commute transformation requres an opcode
+// adjustment, for example, commuting the operands 1 and 2 in FMA*132 form
+// would require an opcode change to FMA*231:
+//     FMA*132* reg1, reg2, reg3; // reg1 * reg3 + reg2;
+//     -->
+//     FMA*231* reg2, reg1, reg3; // reg1 * reg3 + reg2;
+// Please see more detailed comment at the very beginning of the section
+// defining FMA3 opcodes above.
+let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
+multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
+                    X86MemOperand x86memop, RegisterClass RC,
                     SDPatternOperator OpNode = null_frag> {
-  let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
+  let usesCustomInserter = 1 in
   def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, RC:$src3),
                    !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                   [(set RC:$dst,
-                     (OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
+                   [(set RC:$dst, (OpNode RC:$src2, RC:$src1, RC:$src3))]>;
 
-  let mayLoad = 1, isCommutable = IsMVariantCommutable in
+  let mayLoad = 1 in
   def m     : FMA3<opc, MRMSrcMem, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, x86memop:$src3),
                    !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
                    [(set RC:$dst,
-                     (OpVT (OpNode RC:$src2, RC:$src1,
-                            (mem_frag addr:$src3))))]>;
+                     (OpNode RC:$src2, RC:$src1, (load addr:$src3)))]>;
+}
+
+// These FMA*_Int instructions are defined specially for being used when
+// the scalar FMA intrinsics are lowered to machine instructions, and in that
+// sense, they are similar to existing ADD*_Int, SUB*_Int, MUL*_Int, etc.
+// instructions.
+//
+// All of the FMA*_Int opcodes are defined as commutable here.
+// Commuting the 2nd and 3rd source register operands of FMAs is quite trivial
+// and the corresponding optimizations have been developed.
+// Commuting the 1st operand of FMA*_Int requires some additional analysis,
+// the commute optimization is legal only if all users of FMA*_Int use only
+// the lowest element of the FMA*_Int instruction. Even though such analysis
+// may be not implemented yet we allow the routines doing the actual commute
+// transformation to decide if one or another instruction is commutable or not.
+let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1,
+    hasSideEffects = 0 in
+multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
+                        Operand memopr, RegisterClass RC> {
+  def r_Int : FMA3<opc, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, RC:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>;
+
+  let mayLoad = 1 in
+  def m_Int : FMA3<opc, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, memopr:$src3),
+                   !strconcat(OpcodeStr,
+                              "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                   []>;
 }
-} // Constraints = "$src1 = $dst"
 
 multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                       string OpStr, string PackTy, string PT2, Intrinsic Int,
-                       SDNode OpNode, RegisterClass RC, ValueType OpVT,
-                       X86MemOperand x86memop, Operand memop, PatFrag mem_frag,
-                       ComplexPattern mem_cpat> {
-let hasSideEffects = 0 in {
-  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
-                       x86memop, RC, OpVT, mem_frag>;
-  // See the other defm of r231 for the explanation regarding the
-  // commutable flags.
-  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
-                       x86memop, RC, OpVT, mem_frag,
-                       /* IsRVariantCommutable */ 1,
-                       /* IsMVariantCommutable */ 0>;
+                       string OpStr, string PackTy,
+                       SDNode OpNode, RegisterClass RC,
+                       X86MemOperand x86memop> {
+  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>;
+  defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC,
+                       OpNode>;
+  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>;
 }
 
-// See the other defm of r213 for the explanation regarding the
-// commutable flags.
-defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
-                     x86memop, RC, OpVT, mem_frag,
-                     /* IsRVariantCommutable */ 1,
-                     /* IsMVariantCommutable */ 1,
-                     OpNode>;
+// The FMA 213 form is created for lowering of scalar FMA intrinscis
+// to machine instructions.
+// The FMA 132 form can trivially be get by commuting the 2nd and 3rd operands
+// of FMA 213 form.
+// The FMA 231 form can be get only by commuting the 1st operand of 213 or 132
+// forms and is possible only after special analysis of all uses of the initial
+// instruction. Such analysis do not exist yet and thus introducing the 231
+// form of FMA*_Int instructions is done using an optimistic assumption that
+// such analysis will be implemented eventually.
+multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                           string OpStr, string PackTy,
+                           RegisterClass RC, Operand memop> {
+  defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+                           memop, RC>;
+  defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+                           memop, RC>;
+  defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+                           memop, RC>;
 }
 
 multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                  string OpStr, Intrinsic IntF32, Intrinsic IntF64,
                  SDNode OpNode> {
-  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", IntF32, OpNode,
-                        FR32, f32, f32mem, ssmem, loadf32, sse_load_f32>;
-  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "PD", IntF64, OpNode,
-                        FR64, f64, f64mem, sdmem, loadf64, sse_load_f64>, VEX_W;
+  let ExeDomain = SSEPackedSingle in
+  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode,
+                        FR32, f32mem>,
+            fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>;
 
-// These patterns use the 123 ordering, instead of 213, even though
-// they match the intrinsic to the 213 version of the instruction.
-// This is because src1 is tied to dest, and the scalar intrinsics
-// require the pass-through values to come from the first source
-// operand, not the second.
+  let ExeDomain = SSEPackedDouble in
+  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode,
+                        FR64, f64mem>,
+            fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>,
+            VEX_W;
+
+  // These patterns use the 123 ordering, instead of 213, even though
+  // they match the intrinsic to the 213 version of the instruction.
+  // This is because src1 is tied to dest, and the scalar intrinsics
+  // require the pass-through values to come from the first source
+  // operand, not the second.
   def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
-            (COPY_TO_REGCLASS
-              (!cast<Instruction>(NAME#"SSr213r")
-                (COPY_TO_REGCLASS $src1, FR32),
-                (COPY_TO_REGCLASS $src2, FR32),
-                (COPY_TO_REGCLASS $src3, FR32)),
-              VR128)>;
+            (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int") 
+             $src1, $src2, $src3), VR128)>;
 
   def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
-            (COPY_TO_REGCLASS
-              (!cast<Instruction>(NAME#"SDr213r")
-                (COPY_TO_REGCLASS $src1, FR64),
-                (COPY_TO_REGCLASS $src2, FR64),
-                (COPY_TO_REGCLASS $src3, FR64)),
-              VR128)>;
+            (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int") 
+             $src1, $src2, $src3), VR128)>;
 }
 
 defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
@@ -334,36 +377,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
 } // isCodeGenOnly = 1
 }
 
-defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
-                  fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
-                            int_x86_fma_vfmadd_ss>;
-defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
-                  fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
-                            int_x86_fma_vfmadd_sd>;
-defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
-                  fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
-                            int_x86_fma_vfmsub_ss>;
-defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
-                  fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
-                            int_x86_fma_vfmsub_sd>;
-defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
-                        X86Fnmadd, loadf32>,
-                  fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
-                            int_x86_fma_vfnmadd_ss>;
-defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
-                        X86Fnmadd, loadf64>,
-                  fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
-                            int_x86_fma_vfnmadd_sd>;
-defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
-                        X86Fnmsub, loadf32>,
-                  fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
-                            int_x86_fma_vfnmsub_ss>;
-defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
-                        X86Fnmsub, loadf64>,
-                  fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
-                            int_x86_fma_vfnmsub_sd>;
-
 let ExeDomain = SSEPackedSingle in {
+  // Scalar Instructions
+  defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
+                    fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32,
+                              int_x86_fma_vfmadd_ss>;
+  defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
+                    fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32,
+                              int_x86_fma_vfmsub_ss>;
+  defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
+                          X86Fnmadd, loadf32>,
+                    fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32,
+                              int_x86_fma_vfnmadd_ss>;
+  defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
+                          X86Fnmsub, loadf32>,
+                    fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32,
+                              int_x86_fma_vfnmsub_ss>;
+  // Packed Instructions
   defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
                             loadv4f32, loadv8f32>;
   defm VFMSUBPS4    : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
@@ -379,6 +409,22 @@ let ExeDomain = SSEPackedSingle in {
 }
 
 let ExeDomain = SSEPackedDouble in {
+  // Scalar Instructions
+  defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
+                    fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64,
+                              int_x86_fma_vfmadd_sd>;
+  defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
+                    fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64,
+                              int_x86_fma_vfmsub_sd>;
+  defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
+                          X86Fnmadd, loadf64>,
+                    fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64,
+                              int_x86_fma_vfnmadd_sd>;
+  defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
+                          X86Fnmsub, loadf64>,
+                    fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64,
+                              int_x86_fma_vfnmsub_sd>;
+  // Packed Instructions
   defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
                             loadv2f64, loadv4f64>;
   defm VFMSUBPD4    : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 49068e9c37d3..03ae21125b0e 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -137,69 +137,99 @@ def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
 // The FopST0 series are not included here because of the irregularities
 // in where the 'r' goes in assembly output.
 // These instructions cannot address 80-bit memory.
-multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring,
+                    bit Forward = 1> {
 // ST(0) = ST(0) + [mem]
 def _Fp32m  : FpIf32<(outs RFP32:$dst),
                      (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
-                  [(set RFP32:$dst,
-                    (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+                  [!if(Forward,
+                       (set RFP32:$dst,
+                        (OpNode RFP32:$src1, (loadf32 addr:$src2))),
+                       (set RFP32:$dst,
+                        (OpNode (loadf32 addr:$src2), RFP32:$src1)))]>;
 def _Fp64m  : FpIf64<(outs RFP64:$dst),
                      (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
-                  [(set RFP64:$dst,
-                    (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (loadf64 addr:$src2))),
+                       (set RFP64:$dst,
+                        (OpNode (loadf64 addr:$src2), RFP64:$src1)))]>;
 def _Fp64m32: FpIf64<(outs RFP64:$dst),
                      (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
-                  [(set RFP64:$dst,
-                    (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+                  [!if(Forward,
+                       (set RFP64:$dst,
+                        (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2)))),
+                       (set RFP64:$dst,
+                        (OpNode (f64 (extloadf32 addr:$src2)), RFP64:$src1)))]>;
 def _Fp80m32: FpI_<(outs RFP80:$dst),
                    (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
-                  [(set RFP80:$dst,
-                    (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf32 addr:$src2)), RFP80:$src1)))]>;
 def _Fp80m64: FpI_<(outs RFP80:$dst),
                    (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
-                  [(set RFP80:$dst,
-                    (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+                  [!if(Forward,
+                       (set RFP80:$dst,
+                        (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2)))),
+                       (set RFP80:$dst,
+                        (OpNode (f80 (extloadf64 addr:$src2)), RFP80:$src1)))]>;
+let mayLoad = 1 in
 def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src),
-                 !strconcat("f", asmstring, "{s}\t$src")> {
-  let mayLoad = 1;
-}
+                 !strconcat("f", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
 def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src),
-                 !strconcat("f", asmstring, "{l}\t$src")> {
-  let mayLoad = 1;
-}
+                 !strconcat("f", asmstring, "{l}\t$src")>;
 // ST(0) = ST(0) + [memint]
 def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2),
                        OneArgFPRW,
-                    [(set RFP32:$dst, (OpNode RFP32:$src1,
-                                       (X86fild addr:$src2, i16)))]>;
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP32:$src1)))]>;
 def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2),
                        OneArgFPRW,
-                    [(set RFP32:$dst, (OpNode RFP32:$src1,
-                                       (X86fild addr:$src2, i32)))]>;
+                       [!if(Forward,
+                            (set RFP32:$dst,
+                             (OpNode RFP32:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP32:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP32:$src1)))]>;
 def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2),
                        OneArgFPRW,
-                    [(set RFP64:$dst, (OpNode RFP64:$src1,
-                                       (X86fild addr:$src2, i16)))]>;
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i16))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i16), RFP64:$src1)))]>;
 def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2),
                        OneArgFPRW,
-                    [(set RFP64:$dst, (OpNode RFP64:$src1,
-                                       (X86fild addr:$src2, i32)))]>;
+                       [!if(Forward,
+                            (set RFP64:$dst,
+                             (OpNode RFP64:$src1, (X86fild addr:$src2, i32))),
+                            (set RFP64:$dst,
+                             (OpNode (X86fild addr:$src2, i32), RFP64:$src1)))]>;
 def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2),
-                       OneArgFPRW,
-                    [(set RFP80:$dst, (OpNode RFP80:$src1,
-                                       (X86fild addr:$src2, i16)))]>;
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i16))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i16), RFP80:$src1)))]>;
 def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2),
-                       OneArgFPRW,
-                    [(set RFP80:$dst, (OpNode RFP80:$src1,
-                                       (X86fild addr:$src2, i32)))]>;
+                     OneArgFPRW,
+                     [!if(Forward,
+                          (set RFP80:$dst,
+                           (OpNode RFP80:$src1, (X86fild addr:$src2, i32))),
+                          (set RFP80:$dst,
+                           (OpNode (X86fild addr:$src2, i32), RFP80:$src1)))]>;
+let mayLoad = 1 in
 def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src),
-                  !strconcat("fi", asmstring, "{s}\t$src")> {
-  let mayLoad = 1;
-}
+                  !strconcat("fi", asmstring, "{s}\t$src")>;
+let mayLoad = 1 in
 def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src),
-                  !strconcat("fi", asmstring, "{l}\t$src")> {
-  let mayLoad = 1;
-}
+                  !strconcat("fi", asmstring, "{l}\t$src")>;
 }
 
 let Defs = [FPSW] in {
@@ -213,14 +243,14 @@ defm DIV : FPBinary_rr<fdiv>;
 let SchedRW = [WriteFAddLd] in {
 defm ADD : FPBinary<fadd, MRM0m, "add">;
 defm SUB : FPBinary<fsub, MRM4m, "sub">;
-defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr", 0>;
 }
 let SchedRW = [WriteFMulLd] in {
 defm MUL : FPBinary<fmul, MRM1m, "mul">;
 }
 let SchedRW = [WriteFDivLd] in {
 defm DIV : FPBinary<fdiv, MRM6m, "div">;
-defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr", 0>;
 }
 }
 
@@ -306,13 +336,13 @@ def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
 
 def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
 def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
-def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+def FNSTSWm  : FPI<0xDD, MRM7m, (outs i16mem:$dst), (ins), "fnstsw\t$dst">;
 
 def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
 def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
 
-def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
-def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f80mem:$src), "fbld\t$src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs f80mem:$dst), (ins), "fbstp\t$dst">;
 
 // Floating point cmovs.
 class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
@@ -633,16 +663,18 @@ def FRNDINT : I<0xD9, MRM_FC, (outs), (ins), "frndint", [], IIC_FRNDINT>;
 def FSCALE : I<0xD9, MRM_FD, (outs), (ins), "fscale", [], IIC_FSCALE>;
 def FCOMPP : I<0xDE, MRM_D9, (outs), (ins), "fcompp", [], IIC_FCOMPP>;
 
-def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
-               "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
-def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
-                  "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)], 
-                  IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
-def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
-              "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
-def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
-                   "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
-                   IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+let Predicates = [HasFXSR] in {
+  def FXSAVE : I<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+                 "fxsave\t$dst", [(int_x86_fxsave addr:$dst)], IIC_FXSAVE>, TB;
+  def FXSAVE64 : RI<0xAE, MRM0m, (outs), (ins opaque512mem:$dst),
+                    "fxsave64\t$dst", [(int_x86_fxsave64 addr:$dst)],
+                    IIC_FXSAVE>, TB, Requires<[In64BitMode]>;
+  def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                "fxrstor\t$src", [(int_x86_fxrstor addr:$src)], IIC_FXRSTOR>, TB;
+  def FXRSTOR64 : RI<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                     "fxrstor64\t$src", [(int_x86_fxrstor64 addr:$src)],
+                     IIC_FXRSTOR>, TB, Requires<[In64BitMode]>;
+} // Predicates = [FeatureFXSR]
 } // SchedRW
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 1f61ffa84e9a..829cedd55fb3 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -38,6 +38,8 @@ def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
 def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
                                        SDTCisFP<1>, SDTCisVT<3, i8>,
                                        SDTCisVec<1>]>;
+def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, 
+                                     SDTCisSameAs<1, 2>, SDTCisInt<3>]>;
 
 def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
 def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
@@ -58,13 +60,17 @@ def X86fandn   : SDNode<"X86ISD::FANDN",     SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
 def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
+def X86frsqrt14s: SDNode<"X86ISD::FRSQRT",  SDTFPBinOp>;
+def X86frcp14s : SDNode<"X86ISD::FRCP",    SDTFPBinOp>;
 def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>;
 def X86fhadd   : SDNode<"X86ISD::FHADD",     SDTFPBinOp>;
 def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
 def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
 def X86hsub    : SDNode<"X86ISD::HSUB",      SDTIntBinOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
+def X86comiSae : SDNode<"X86ISD::COMI",      SDTX86CmpTestSae>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
+def X86ucomiSae: SDNode<"X86ISD::UCOMI",     SDTX86CmpTestSae>;
 def X86cmps    : SDNode<"X86ISD::FSETCC",     SDTX86Cmps>;
 //def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
 def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
@@ -74,11 +80,18 @@ def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
                  SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
                                       SDTCisVT<1, v4i32>]>>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
-                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86psadbw  : SDNode<"X86ISD::PSADBW",
-                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                      SDTCisSameAs<0,2>]>>;
+                 SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+                                      SDTCVecEltisVT<1, i8>,
+                                      SDTCisSameSizeAs<0,1>,
+                                      SDTCisSameAs<1,2>]>>;
+def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
+                  SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+                                       SDTCVecEltisVT<1, i8>,
+                                       SDTCisSameSizeAs<0,1>,
+                                       SDTCisSameAs<1,2>, SDTCisInt<3>]>>;
 def X86andnp   : SDNode<"X86ISD::ANDNP",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
@@ -86,9 +99,11 @@ def X86psign   : SDNode<"X86ISD::PSIGN",
                  SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86pextrb  : SDNode<"X86ISD::PEXTRB",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v16i8>,
+                                      SDTCisPtrTy<2>]>>;
 def X86pextrw  : SDNode<"X86ISD::PEXTRW",
-                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, v8i16>,
+                                      SDTCisPtrTy<2>]>>;
 def X86pinsrb  : SDNode<"X86ISD::PINSRB",
                  SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
@@ -114,19 +129,17 @@ def X86vsext   : SDNode<"X86ISD::VSEXT",
                                               SDTCisInt<0>, SDTCisInt<1>,
                                               SDTCisOpSmallerThanOp<1, 0>]>>;
 
-def X86vtrunc   : SDNode<"X86ISD::VTRUNC",
-                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
-                                              SDTCisInt<0>, SDTCisInt<1>,
-                                              SDTCisOpSmallerThanOp<0, 1>]>>;
+def SDTVtrunc    : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                       SDTCisInt<0>, SDTCisInt<1>,
+                                       SDTCisOpSmallerThanOp<0, 1>]>;
+
+def X86vtrunc    : SDNode<"X86ISD::VTRUNC",   SDTVtrunc>;
+def X86vtruncs   : SDNode<"X86ISD::VTRUNCS",  SDTVtrunc>;
+def X86vtruncus  : SDNode<"X86ISD::VTRUNCUS", SDTVtrunc>;
+
 def X86trunc    : SDNode<"X86ISD::TRUNC",
                          SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>,
                                               SDTCisOpSmallerThanOp<0, 1>]>>;
-
-def X86vtruncm   : SDNode<"X86ISD::VTRUNCM",
-                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
-                                              SDTCisInt<0>, SDTCisInt<1>,
-                                              SDTCisVec<2>, SDTCisInt<2>,
-                                              SDTCisOpSmallerThanOp<0, 2>]>>;
 def X86vfpext  : SDNode<"X86ISD::VFPEXT",
                         SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                              SDTCisFP<0>, SDTCisFP<1>,
@@ -136,6 +149,35 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
                                              SDTCisFP<0>, SDTCisFP<1>,
                                              SDTCisOpSmallerThanOp<0, 1>]>>;
 
+def X86fround: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f32>,
+                                             SDTCVecEltisVT<1, f64>,
+                                             SDTCVecEltisVT<2, f64>,
+                                             SDTCisOpSmallerThanOp<0, 1>]>>;
+def X86froundRnd: SDNode<"X86ISD::VFPROUND",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f32>,
+                                             SDTCVecEltisVT<1, f64>,
+                                             SDTCVecEltisVT<2, f64>,
+                                             SDTCisOpSmallerThanOp<0, 1>,
+                                             SDTCisInt<3>]>>;
+
+def X86fpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f64>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisOpSmallerThanOp<1, 0>]>>;
+
+def X86fpextRnd  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisFP<1>,SDTCisFP<2>,
+                                             SDTCVecEltisVT<0, f64>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCVecEltisVT<2, f32>,
+                                             SDTCisOpSmallerThanOp<1, 0>,
+                                             SDTCisInt<3>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;
@@ -159,10 +201,15 @@ def X86CmpMaskCCRound :
 def X86CmpMaskCCScalar :
       SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
 
-def X86cmpm    : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
-def X86cmpmRnd : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
-def X86cmpmu   : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
-def X86cmpms   : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
+def X86CmpMaskCCScalarRound :
+      SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
+                           SDTCisInt<4>]>;
+
+def X86cmpm     : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
+def X86cmpmRnd  : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
+def X86cmpmu    : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
+def X86cmpms    : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
+def X86cmpmsRnd : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalarRound>;
 
 def X86vshl    : SDNode<"X86ISD::VSHL",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -178,6 +225,29 @@ def X86vshli   : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
 def X86vsrli   : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
 def X86vsrai   : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
 
+def X86vprot   : SDNode<"X86ISD::VPROT",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>]>>;
+def X86vproti  : SDNode<"X86ISD::VPROTI",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisVT<2, i8>]>>;
+
+def X86vpshl   : SDNode<"X86ISD::VPSHL",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>]>>;
+def X86vpsha   : SDNode<"X86ISD::VPSHA",
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>]>>;
+
+def X86vpcom   : SDNode<"X86ISD::VPCOM",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>,
+                                             SDTCisVT<3, i8>]>>;
+def X86vpcomu  : SDNode<"X86ISD::VPCOMU",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>,
+                                             SDTCisVT<3, i8>]>>;
+
 def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
                                           SDTCisVec<1>,
                                           SDTCisSameAs<2, 1>]>;
@@ -190,6 +260,7 @@ def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
+def X86ktest   : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
 def X86testm   : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
                                           SDTCisVec<1>, SDTCisSameAs<2, 1>,
                                           SDTCVecEltisVT<0, i1>,
@@ -201,11 +272,15 @@ def X86testnm  : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
 def X86select  : SDNode<"X86ISD::SELECT"     , SDTSelect>;
 
 def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
-                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
-                                      SDTCisSameAs<1,2>]>>;
+                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+                                             SDTCVecEltisVT<1, i32>,
+                                             SDTCisSameSizeAs<0,1>,
+                                             SDTCisSameAs<1,2>]>>;
 def X86pmuldq  : SDNode<"X86ISD::PMULDQ",
-                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
-                                       SDTCisSameAs<1,2>]>>;
+                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
+                                             SDTCVecEltisVT<1, i32>,
+                                             SDTCisSameSizeAs<0,1>,
+                                             SDTCisSameAs<1,2>]>>;
 
 def X86extrqi : SDNode<"X86ISD::EXTRQI",
                   SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
@@ -221,24 +296,30 @@ def X86insertqi : SDNode<"X86ISD::INSERTQI",
 def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
 def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                 SDTCisSameAs<0,2>]>;
-def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>;
 
 def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                        SDTCisVec<2>]>;
+                                        SDTCisSameSizeAs<0,2>,
+                                        SDTCisSameNumEltsAs<0,2>]>;
 def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
-                                 SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+                                 SDTCisSameAs<0,1>, SDTCisVT<2, i8>]>;
 def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
-                                 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+                                 SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>;
 def SDTFPBinOpImmRound: SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                              SDTCisSameAs<0,2>, SDTCisInt<3>, SDTCisInt<4>]>;
+def SDTFPUnaryOpImmRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                              SDTCisInt<2>, SDTCisInt<3>]>;
 
 def SDTVBroadcast  : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>;
+def SDTVBroadcastm : SDTypeProfile<1, 1, [SDTCisVec<0>,
+                                          SDTCisInt<0>, SDTCisInt<1>]>;
 
 def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                              SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>;
 
+def SDTTernlog  : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                SDTCisSameAs<0,2>, SDTCisSameAs<0,3>,
+                                SDTCisVT<4, i8>]>;
+
 def SDTFPBinOpRound : SDTypeProfile<1, 3, [      // fadd_round, fmul_round, etc.
   SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>;
 
@@ -250,15 +331,17 @@ def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
 def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
                            SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>;
 def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>,
-                           SDTCisVec<0>, SDTCisInt<2>]>;
+                           SDTCisVec<0>, SDTCisVT<2, i32>]>;
 def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>,
-                           SDTCisVec<0>, SDTCisInt<3>]>;
+                           SDTCisVec<0>, SDTCisVT<3, i32>]>;
 def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>,
-                           SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>;
+                           SDTCisVec<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
 
 def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
-def X86Abs     : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+
+def X86Abs      : SDNode<"X86ISD::ABS", SDTIntUnaryOp>;
+def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
 def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
@@ -281,33 +364,74 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
 def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
 def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
 
-def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>;
+def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                   SDTCisSameSizeAs<0,1>,
+                                   SDTCisSameAs<1,2>]>;
 def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>;
 def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>;
 
 def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
+def X86vpmaddubsw  : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
+def X86vpmaddwd    : SDNode<"X86ISD::VPMADDWD"   , SDTPack>;
+
 def X86VPermilpv  : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
 def X86VPermilpi  : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
-def X86VPermv     : SDNode<"X86ISD::VPERMV",    SDTShuff2Op>;
+def X86VPermv     : SDNode<"X86ISD::VPERMV",
+                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<1>,
+                                                SDTCisSameNumEltsAs<0,1>,
+                                                SDTCisSameSizeAs<0,1>,
+                                                SDTCisSameAs<0,2>]>>;
 def X86VPermi     : SDNode<"X86ISD::VPERMI",    SDTShuff2OpI>;
-def X86VPermv3    : SDNode<"X86ISD::VPERMV3",   SDTShuff3Op>;
-def X86VPermiv3   : SDNode<"X86ISD::VPERMIV3",  SDTShuff3Op>;
+def X86VPermt2     : SDNode<"X86ISD::VPERMV3",
+                    SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                         SDTCisSameAs<0,1>, SDTCisInt<2>,
+                                         SDTCisVec<2>, SDTCisSameNumEltsAs<0, 2>,
+                                         SDTCisSameSizeAs<0,2>,
+                                         SDTCisSameAs<0,3>]>, []>;
+
+def X86VPermi2X   : SDNode<"X86ISD::VPERMIV3",
+                    SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
+                                         SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
+                                         SDTCisSameSizeAs<0,1>,
+                                         SDTCisSameAs<0,2>,
+                                         SDTCisSameAs<0,3>]>, []>;
+
+def X86vpternlog  : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
 
 def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;
 
-def X86VFixupimm       : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
-def X86VRange          : SDNode<"X86ISD::VRANGE", SDTFPBinOpImmRound>;
+def X86VFixupimm   : SDNode<"X86ISD::VFIXUPIMM", SDTFPBinOpImmRound>;
+def X86VRange      : SDNode<"X86ISD::VRANGE",    SDTFPBinOpImmRound>;
+def X86VReduce     : SDNode<"X86ISD::VREDUCE",   SDTFPUnaryOpImmRound>;
+def X86VRndScale   : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImmRound>;
+def X86VGetMant    : SDNode<"X86ISD::VGETMANT",  SDTFPUnaryOpImmRound>;
+def X86Vfpclass    : SDNode<"X86ISD::VFPCLASS",
+                       SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>,
+                                            SDTCisVec<1>, SDTCisFP<1>,
+                                            SDTCisSameNumEltsAs<0,1>,
+                                            SDTCisVT<2, i32>]>, []>;
+def X86Vfpclasss   : SDNode<"X86ISD::VFPCLASSS",
+                       SDTypeProfile<1, 2, [SDTCisVT<0, i1>,
+                                            SDTCisFP<1>, SDTCisVT<2, i32>]>,[]>;
 
 def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
                     SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisSubVecOfVec<1, 0>]>, []>;
+// SDTCisSubVecOfVec restriction cannot be applied for 128 bit version of VBROADCASTI32x2.
+def X86SubV32x2Broadcast : SDNode<"X86ISD::SUBV_BROADCAST",
+                    SDTypeProfile<1, 1, [SDTCisVec<0>,
+                                         SDTCisSameAs<0,1>]>, []>;
+
 def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
+def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
 def X86Vinsert   : SDNode<"X86ISD::VINSERT",  SDTypeProfile<1, 3,
-                              [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+                              [SDTCisSameAs<0, 1>, SDTCisEltOfVec<2, 1>,
+                               SDTCisPtrTy<3>]>, []>;
 def X86Vextract   : SDNode<"X86ISD::VEXTRACT",  SDTypeProfile<1, 2,
-                              [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>;
+                              [SDTCisEltOfVec<0, 1>, SDTCisVec<1>,
+                               SDTCisPtrTy<2>]>, []>;
 
 def X86Blendi    : SDNode<"X86ISD::BLENDI",   SDTBlend>;
 
@@ -317,11 +441,13 @@ def X86faddRnd   : SDNode<"X86ISD::FADD_RND",  SDTFPBinOpRound>;
 def X86fsubRnd   : SDNode<"X86ISD::FSUB_RND",  SDTFPBinOpRound>;
 def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;
 def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;
-def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",      SDTFPBinOpRound>;
-def X86scalef    : SDNode<"X86ISD::SCALEF",    SDTFPBinOpRound>;
-def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",      SDTFPBinOpRound>;
-def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",  SDTFPUnaryOpRound>;
-def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND",  SDTFPUnaryOpRound>;
+def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",       SDTFPBinOpRound>;
+def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOpRound>;
+def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",       SDTFPBinOpRound>;
+def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",   SDTFPUnaryOpRound>;
+def X86fsqrtRnds    : SDNode<"X86ISD::FSQRT_RND",   STDFp2SrcRm>;
+def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
+def X86fgetexpRnds  : SDNode<"X86ISD::FGETEXP_RND", STDFp2SrcRm>;
 
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
@@ -341,9 +467,11 @@ def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  STDFp1SrcRm>;
 def X86rcp28     : SDNode<"X86ISD::RCP28",    STDFp1SrcRm>;
 def X86exp2      : SDNode<"X86ISD::EXP2",     STDFp1SrcRm>;
 
-def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",  STDFp2SrcRm>;
-def X86rcp28s    : SDNode<"X86ISD::RCP28",    STDFp2SrcRm>;
-def X86RndScale  : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>;
+def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",   STDFp2SrcRm>;
+def X86rcp28s    : SDNode<"X86ISD::RCP28",     STDFp2SrcRm>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALE", STDFp3SrcRm>;
+def X86Reduces   : SDNode<"X86ISD::VREDUCE",   STDFp3SrcRm>;
+def X86GetMants  : SDNode<"X86ISD::VGETMANT",  STDFp3SrcRm>;
 
 def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                          SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -362,7 +490,8 @@ def X86expand  : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1,
                               [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>;
 
 def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
-                               SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>;
+                                          SDTCisSameAs<0,1>, SDTCisInt<2>,
+                                          SDTCisVT<3, i32>]>;
 
 def SDTDoubleToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
@@ -371,9 +500,12 @@ def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
 
 def SDTDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCVecEltisVT<1, f64>]>;
+def SDTSDoubleToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>,SDTCisFP<1>, 
+                                             SDTCVecEltisVT<1, f64>, SDTCisInt<2>]>;
 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                          SDTCisInt<0>, SDTCVecEltisVT<1, f32>]>;
-
+def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
+                                            SDTCVecEltisVT<1, f32>, SDTCisInt<2>]>;
 def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisFP<0>, SDTCVecEltisVT<1, i32>,
                                            SDTCisInt<2>]>;
@@ -392,6 +524,10 @@ def SDTVFPToLongRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
 def X86SintToFpRnd  : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
 def X86UintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
 
+def X86cvttss2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSFloatToIntRnd>;
+def X86cvttss2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSFloatToIntRnd>;
+def X86cvttsd2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSDoubleToIntRnd>;
+def X86cvttsd2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSDoubleToIntRnd>;
 // Vector with rounding mode
 
 // cvtt fp-to-int staff
@@ -417,17 +553,35 @@ def X86cvtps2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToInt>;
 def X86cvtpd2Int      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTDoubleToInt>;
 def X86cvtpd2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTDoubleToInt>;
 
+def X86cvtph2ps     : SDNode<"ISD::FP16_TO_FP",
+                              SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                                   SDTCVecEltisVT<0, f32>,
+                                                   SDTCVecEltisVT<1, i16>,
+                                                   SDTCisFP<0>,
+                                                   SDTCisVT<2, i32>]> >;
+
+def X86cvtps2ph   : SDNode<"ISD::FP_TO_FP16",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCVecEltisVT<0, i16>,
+                                             SDTCVecEltisVT<1, f32>,
+                                             SDTCisFP<1>, SDTCisVT<2, i32>,
+                                             SDTCisVT<3, i32>]> >;
 def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                              SDTCisFP<0>, SDTCisFP<1>,
+                                             SDTCVecEltisVT<0, f64>,
+                                             SDTCVecEltisVT<1, f32>,
                                              SDTCisOpSmallerThanOp<1, 0>,
-                                             SDTCisInt<2>]>>;
+                                             SDTCisVT<2, i32>]>>;
 def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                              SDTCisFP<0>, SDTCisFP<1>,
                                              SDTCVecEltisVT<0, f32>,
                                              SDTCVecEltisVT<1, f64>,
-                                             SDTCisInt<2>]>>;
+                                             SDTCisOpSmallerThanOp<0, 1>,
+                                             SDTCisVT<2, i32>]>>;
+
+def X86cvt2mask   : SDNode<"X86ISD::CVT2MASK", SDTIntTruncOp>;
 
 //===----------------------------------------------------------------------===//
 // SSE Complex Patterns
@@ -436,10 +590,10 @@ def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
 // These are 'extloads' from a scalar to the low element of a vector, zeroing
 // the top elements.  These are used for the SSE 'ss' and 'sd' instruction
 // forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+def sse_load_f32 : ComplexPattern<v4f32, 5, "selectScalarSSELoad", [],
                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
                                    SDNPWantRoot]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+def sse_load_f64 : ComplexPattern<v2f64, 5, "selectScalarSSELoad", [],
                                   [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
                                    SDNPWantRoot]>;
 
@@ -490,9 +644,9 @@ def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
 // The memory operand is required to be a 128-bit load, so it must be converted
 // from a vector to a scalar.
 def loadf32_128 : PatFrag<(ops node:$ptr),
-  (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>;
+  (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
 def loadf64_128 : PatFrag<(ops node:$ptr),
-  (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>;
+  (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
 
 // Like 'store', but always requires 128-bit vector alignment.
 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
@@ -590,9 +744,9 @@ def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
 // The memory operand is required to be a 128-bit load, so it must be converted
 // from a vector to a scalar.
 def memopfsf32_128 : PatFrag<(ops node:$ptr),
-  (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>;
+  (f32 (extractelt (memopv4f32 node:$ptr), (iPTR 0)))>;
 def memopfsf64_128 : PatFrag<(ops node:$ptr),
-  (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>;
+  (f64 (extractelt (memopv2f64 node:$ptr), (iPTR 0)))>;
 
 
 // SSSE3 uses MMX registers for some instructions. They aren't aligned on a
@@ -604,32 +758,6 @@ def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
 
 def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
 
-// MOVNT Support
-// Like 'store', but requires the non-temporal bit to be set
-def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
-                           (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
-    return ST->isNonTemporal();
-  return false;
-}]>;
-
-def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
-                                    (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
-    return ST->isNonTemporal() && !ST->isTruncatingStore() &&
-           ST->getAddressingMode() == ISD::UNINDEXED &&
-           ST->getAlignment() >= 16;
-  return false;
-}]>;
-
-def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
-                                      (st node:$val, node:$ptr), [{
-  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
-    return ST->isNonTemporal() &&
-           ST->getAlignment() < 16;
-  return false;
-}]>;
-
 def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   (masked_gather node:$src1, node:$src2, node:$src3) , [{
   if (MaskedGatherSDNode *Mgt = dyn_cast<MaskedGatherSDNode>(N))
@@ -851,29 +979,59 @@ def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
   return isa<MaskedLoadSDNode>(N);
 }]>;
 
+// masked store fragments.
+// X86mstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                        (masked_store node:$src1, node:$src2, node:$src3), [{
+  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+
 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_store node:$src1, node:$src2, node:$src3), [{
+                         (X86mstore node:$src1, node:$src2, node:$src3), [{
   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
     return Store->getAlignment() >= 16;
   return false;
 }]>;
 
 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_store node:$src1, node:$src2, node:$src3), [{
+                         (X86mstore node:$src1, node:$src2, node:$src3), [{
   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
     return Store->getAlignment() >= 32;
   return false;
 }]>;
 
 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_store node:$src1, node:$src2, node:$src3), [{
+                         (X86mstore node:$src1, node:$src2, node:$src3), [{
   if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
     return Store->getAlignment() >= 64;
   return false;
 }]>;
 
 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_store node:$src1, node:$src2, node:$src3), [{
+                         (X86mstore node:$src1, node:$src2, node:$src3), [{
   return isa<MaskedStoreSDNode>(N);
 }]>;
 
+// masked truncstore fragments
+// X86mtruncstore can't be implemented in core DAG files because some targets
+// doesn't support vector type ( llvm-tblgen will fail)
+def X86mtruncstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                             (masked_store node:$src1, node:$src2, node:$src3), [{
+    return cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+}]>;
+def masked_truncstorevi8 :
+  PatFrag<(ops node:$src1, node:$src2, node:$src3),
+          (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+def masked_truncstorevi16 :
+  PatFrag<(ops node:$src1, node:$src2, node:$src3),
+          (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+def masked_truncstorevi32 :
+  PatFrag<(ops node:$src1, node:$src2, node:$src3),
+          (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index cf68ef053361..63e78de69bc9 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -101,9 +102,11 @@ struct X86MemoryFoldTableEntry {
 void X86InstrInfo::anchor() {}
 
 X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
-    : X86GenInstrInfo(
-          (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
-          (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+    : X86GenInstrInfo((STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64
+                                               : X86::ADJCALLSTACKDOWN32),
+                      (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64
+                                               : X86::ADJCALLSTACKUP32),
+                      X86::CATCHRET),
       Subtarget(STI), RI(STI.getTargetTriple()) {
 
   static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
@@ -332,6 +335,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MUL8r,       X86::MUL8m,         TB_FOLDED_LOAD },
     { X86::PEXTRDrr,    X86::PEXTRDmr,      TB_FOLDED_STORE },
     { X86::PEXTRQrr,    X86::PEXTRQmr,      TB_FOLDED_STORE },
+    { X86::PUSH16r,     X86::PUSH16rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH32r,     X86::PUSH32rmm,     TB_FOLDED_LOAD },
+    { X86::PUSH64r,     X86::PUSH64rmm,     TB_FOLDED_LOAD },
     { X86::SETAEr,      X86::SETAEm,        TB_FOLDED_STORE },
     { X86::SETAr,       X86::SETAm,         TB_FOLDED_STORE },
     { X86::SETBEr,      X86::SETBEm,        TB_FOLDED_STORE },
@@ -495,7 +501,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MOVSX64rr8,      X86::MOVSX64rm8,          0 },
     { X86::MOVUPDrr,        X86::MOVUPDrm,            TB_ALIGN_16 },
     { X86::MOVUPSrr,        X86::MOVUPSrm,            0 },
-    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm,        0 },
     { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm,     TB_ALIGN_16 },
     { X86::MOVZX16rr8,      X86::MOVZX16rm8,          0 },
     { X86::MOVZX32rr16,     X86::MOVZX32rm16,         0 },
@@ -605,7 +610,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         0 },
     { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
     { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
-    { X86::VMOVZQI2PQIrr,   X86::VMOVZQI2PQIrm,       0 },
     { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 },
     { X86::VPABSBrr128,     X86::VPABSBrm128,         0 },
     { X86::VPABSDrr128,     X86::VPABSDrm128,         0 },
@@ -1647,6 +1651,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PEXT32rr,          X86::PEXT32rm,            0 },
     { X86::PEXT64rr,          X86::PEXT64rm,            0 },
 
+    // ADX foldable instructions
+    { X86::ADCX32rr,          X86::ADCX32rm,            0 },
+    { X86::ADCX64rr,          X86::ADCX64rm,            0 },
+    { X86::ADOX32rr,          X86::ADOX32rm,            0 },
+    { X86::ADOX64rr,          X86::ADOX64rm,            0 },
+
     // AVX-512 foldable instructions
     { X86::VADDPSZrr,         X86::VADDPSZrm,           0 },
     { X86::VADDPDZrr,         X86::VADDPDZrm,           0 },
@@ -1729,11 +1739,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
   static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
     // FMA foldable instructions
     { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         TB_ALIGN_NONE },
+    { X86::VFMADDSSr231r_Int,     X86::VFMADDSSr231m_Int,     TB_ALIGN_NONE },
     { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         TB_ALIGN_NONE },
+    { X86::VFMADDSDr231r_Int,     X86::VFMADDSDr231m_Int,     TB_ALIGN_NONE },
     { X86::VFMADDSSr132r,         X86::VFMADDSSr132m,         TB_ALIGN_NONE },
+    { X86::VFMADDSSr132r_Int,     X86::VFMADDSSr132m_Int,     TB_ALIGN_NONE },
     { X86::VFMADDSDr132r,         X86::VFMADDSDr132m,         TB_ALIGN_NONE },
+    { X86::VFMADDSDr132r_Int,     X86::VFMADDSDr132m_Int,     TB_ALIGN_NONE },
     { X86::VFMADDSSr213r,         X86::VFMADDSSr213m,         TB_ALIGN_NONE },
+    { X86::VFMADDSSr213r_Int,     X86::VFMADDSSr213m_Int,     TB_ALIGN_NONE },
     { X86::VFMADDSDr213r,         X86::VFMADDSDr213m,         TB_ALIGN_NONE },
+    { X86::VFMADDSDr213r_Int,     X86::VFMADDSDr213m_Int,     TB_ALIGN_NONE },
 
     { X86::VFMADDPSr231r,         X86::VFMADDPSr231m,         TB_ALIGN_NONE },
     { X86::VFMADDPDr231r,         X86::VFMADDPDr231m,         TB_ALIGN_NONE },
@@ -1749,11 +1765,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VFMADDPDr213rY,        X86::VFMADDPDr213mY,        TB_ALIGN_NONE },
 
     { X86::VFNMADDSSr231r,        X86::VFNMADDSSr231m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSSr231r_Int,    X86::VFNMADDSSr231m_Int,    TB_ALIGN_NONE },
     { X86::VFNMADDSDr231r,        X86::VFNMADDSDr231m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSDr231r_Int,    X86::VFNMADDSDr231m_Int,    TB_ALIGN_NONE },
     { X86::VFNMADDSSr132r,        X86::VFNMADDSSr132m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSSr132r_Int,    X86::VFNMADDSSr132m_Int,    TB_ALIGN_NONE },
     { X86::VFNMADDSDr132r,        X86::VFNMADDSDr132m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSDr132r_Int,    X86::VFNMADDSDr132m_Int,    TB_ALIGN_NONE },
     { X86::VFNMADDSSr213r,        X86::VFNMADDSSr213m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSSr213r_Int,    X86::VFNMADDSSr213m_Int,    TB_ALIGN_NONE },
     { X86::VFNMADDSDr213r,        X86::VFNMADDSDr213m,        TB_ALIGN_NONE },
+    { X86::VFNMADDSDr213r_Int,    X86::VFNMADDSDr213m_Int,    TB_ALIGN_NONE },
 
     { X86::VFNMADDPSr231r,        X86::VFNMADDPSr231m,        TB_ALIGN_NONE },
     { X86::VFNMADDPDr231r,        X86::VFNMADDPDr231m,        TB_ALIGN_NONE },
@@ -1769,11 +1791,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VFNMADDPDr213rY,       X86::VFNMADDPDr213mY,       TB_ALIGN_NONE },
 
     { X86::VFMSUBSSr231r,         X86::VFMSUBSSr231m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSSr231r_Int,     X86::VFMSUBSSr231m_Int,     TB_ALIGN_NONE },
     { X86::VFMSUBSDr231r,         X86::VFMSUBSDr231m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSDr231r_Int,     X86::VFMSUBSDr231m_Int,     TB_ALIGN_NONE },
     { X86::VFMSUBSSr132r,         X86::VFMSUBSSr132m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSSr132r_Int,     X86::VFMSUBSSr132m_Int,     TB_ALIGN_NONE },
     { X86::VFMSUBSDr132r,         X86::VFMSUBSDr132m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSDr132r_Int,     X86::VFMSUBSDr132m_Int,     TB_ALIGN_NONE },
     { X86::VFMSUBSSr213r,         X86::VFMSUBSSr213m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSSr213r_Int,     X86::VFMSUBSSr213m_Int,     TB_ALIGN_NONE },
     { X86::VFMSUBSDr213r,         X86::VFMSUBSDr213m,         TB_ALIGN_NONE },
+    { X86::VFMSUBSDr213r_Int,     X86::VFMSUBSDr213m_Int,     TB_ALIGN_NONE },
 
     { X86::VFMSUBPSr231r,         X86::VFMSUBPSr231m,         TB_ALIGN_NONE },
     { X86::VFMSUBPDr231r,         X86::VFMSUBPDr231m,         TB_ALIGN_NONE },
@@ -1789,11 +1817,17 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VFMSUBPDr213rY,        X86::VFMSUBPDr213mY,        TB_ALIGN_NONE },
 
     { X86::VFNMSUBSSr231r,        X86::VFNMSUBSSr231m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSSr231r_Int,    X86::VFNMSUBSSr231m_Int,    TB_ALIGN_NONE },
     { X86::VFNMSUBSDr231r,        X86::VFNMSUBSDr231m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSDr231r_Int,    X86::VFNMSUBSDr231m_Int,    TB_ALIGN_NONE },
     { X86::VFNMSUBSSr132r,        X86::VFNMSUBSSr132m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSSr132r_Int,    X86::VFNMSUBSSr132m_Int,    TB_ALIGN_NONE },
     { X86::VFNMSUBSDr132r,        X86::VFNMSUBSDr132m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSDr132r_Int,    X86::VFNMSUBSDr132m_Int,    TB_ALIGN_NONE },
     { X86::VFNMSUBSSr213r,        X86::VFNMSUBSSr213m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSSr213r_Int,    X86::VFNMSUBSSr213m_Int,    TB_ALIGN_NONE },
     { X86::VFNMSUBSDr213r,        X86::VFNMSUBSDr213m,        TB_ALIGN_NONE },
+    { X86::VFNMSUBSDr213r_Int,    X86::VFNMSUBSDr213m_Int,    TB_ALIGN_NONE },
 
     { X86::VFNMSUBPSr231r,        X86::VFNMSUBPSr231m,        TB_ALIGN_NONE },
     { X86::VFNMSUBPDr231r,        X86::VFNMSUBPDr231m,        TB_ALIGN_NONE },
@@ -2282,7 +2316,35 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
   case X86::FsVMOVAPSrm:
   case X86::FsVMOVAPDrm:
   case X86::FsMOVAPSrm:
-  case X86::FsMOVAPDrm: {
+  case X86::FsMOVAPDrm:
+  // AVX-512
+  case X86::VMOVAPDZ128rm:
+  case X86::VMOVAPDZ256rm:
+  case X86::VMOVAPDZrm:
+  case X86::VMOVAPSZ128rm:
+  case X86::VMOVAPSZ256rm:
+  case X86::VMOVAPSZrm:
+  case X86::VMOVDQA32Z128rm:
+  case X86::VMOVDQA32Z256rm:
+  case X86::VMOVDQA32Zrm:
+  case X86::VMOVDQA64Z128rm:
+  case X86::VMOVDQA64Z256rm:
+  case X86::VMOVDQA64Zrm:
+  case X86::VMOVDQU16Z128rm:
+  case X86::VMOVDQU16Z256rm:
+  case X86::VMOVDQU16Zrm:
+  case X86::VMOVDQU32Z128rm:
+  case X86::VMOVDQU32Z256rm:
+  case X86::VMOVDQU32Zrm:
+  case X86::VMOVDQU64Z128rm:
+  case X86::VMOVDQU64Z256rm:
+  case X86::VMOVDQU64Zrm:
+  case X86::VMOVDQU8Z128rm:
+  case X86::VMOVDQU8Z256rm:
+  case X86::VMOVDQU8Zrm:
+  case X86::VMOVUPSZ128rm:
+  case X86::VMOVUPSZ256rm:
+  case X86::VMOVUPSZrm: {
     // Loads from constant pools are trivially rematerializable.
     if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
         MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
@@ -2363,9 +2425,8 @@ bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
   // It is safe to clobber EFLAGS at the end of a block of no successor has it
   // live in.
   if (Iter == E) {
-    for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
-           SE = MBB.succ_end(); SI != SE; ++SI)
-      if ((*SI)->isLiveIn(X86::EFLAGS))
+    for (MachineBasicBlock *S : MBB.successors())
+      if (S->isLiveIn(X86::EFLAGS))
         return false;
     return true;
   }
@@ -2411,13 +2472,29 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
                                  unsigned DestReg, unsigned SubIdx,
                                  const MachineInstr *Orig,
                                  const TargetRegisterInfo &TRI) const {
-  // MOV32r0 is implemented with a xor which clobbers condition code.
-  // Re-materialize it as movri instructions to avoid side effects.
-  unsigned Opc = Orig->getOpcode();
-  if (Opc == X86::MOV32r0 && !isSafeToClobberEFLAGS(MBB, I)) {
+  bool ClobbersEFLAGS = false;
+  for (const MachineOperand &MO : Orig->operands()) {
+    if (MO.isReg() && MO.isDef() && MO.getReg() == X86::EFLAGS) {
+      ClobbersEFLAGS = true;
+      break;
+    }
+  }
+
+  if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
+    // The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
+    // effects.
+    int Value;
+    switch (Orig->getOpcode()) {
+    case X86::MOV32r0:  Value = 0; break;
+    case X86::MOV32r1:  Value = 1; break;
+    case X86::MOV32r_1: Value = -1; break;
+    default:
+      llvm_unreachable("Unexpected instruction!");
+    }
+
     DebugLoc DL = Orig->getDebugLoc();
     BuildMI(MBB, I, DL, get(X86::MOV32ri)).addOperand(Orig->getOperand(0))
-      .addImm(0);
+      .addImm(Value);
   } else {
     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
     MBB.insert(I, MI);
@@ -2428,7 +2505,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
 }
 
 /// True if MI has a condition code def, e.g. EFLAGS, that is not marked dead.
-static bool hasLiveCondCodeDef(MachineInstr *MI) {
+bool X86InstrInfo::hasLiveCondCodeDef(MachineInstr *MI) const {
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg() && MO.isDef() &&
@@ -2453,7 +2530,7 @@ inline static unsigned getTruncatedShiftCount(MachineInstr *MI,
 inline static bool isTruncatedShiftCountForLEA(unsigned ShAmt) {
   // Left shift instructions can be transformed into load-effective-address
   // instructions if we can encode them appropriately.
-  // A LEA instruction utilizes a SIB byte to encode it's scale factor.
+  // A LEA instruction utilizes a SIB byte to encode its scale factor.
   // The SIB.scale field is two bits wide which means that we can encode any
   // shift amount less than 4.
   return ShAmt < 4 && ShAmt > 0;
@@ -2493,7 +2570,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr *MI, const MachineOperand &Src,
     ImplicitOp = Src;
     ImplicitOp.setImplicit();
 
-    NewSrc = getX86SubSuperRegister(Src.getReg(), MVT::i64);
+    NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
     MachineBasicBlock::LivenessQueryResult LQR =
       MI->getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
 
@@ -2914,10 +2991,162 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return NewMI;
 }
 
-/// We have a few instructions that must be hacked on to commute them.
-///
-MachineInstr *
-X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+/// Returns true if the given instruction opcode is FMA3.
+/// Otherwise, returns false.
+/// The second parameter is optional and is used as the second return from
+/// the function. It is set to true if the given instruction has FMA3 opcode
+/// that is used for lowering of scalar FMA intrinsics, and it is set to false
+/// otherwise.
+static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) {
+  if (IsIntrinsic)
+    *IsIntrinsic = false;
+
+  switch (Opcode) {
+    case X86::VFMADDSDr132r:      case X86::VFMADDSDr132m:
+    case X86::VFMADDSSr132r:      case X86::VFMADDSSr132m:
+    case X86::VFMSUBSDr132r:      case X86::VFMSUBSDr132m:
+    case X86::VFMSUBSSr132r:      case X86::VFMSUBSSr132m:
+    case X86::VFNMADDSDr132r:     case X86::VFNMADDSDr132m:
+    case X86::VFNMADDSSr132r:     case X86::VFNMADDSSr132m:
+    case X86::VFNMSUBSDr132r:     case X86::VFNMSUBSDr132m:
+    case X86::VFNMSUBSSr132r:     case X86::VFNMSUBSSr132m:
+
+    case X86::VFMADDSDr213r:      case X86::VFMADDSDr213m:
+    case X86::VFMADDSSr213r:      case X86::VFMADDSSr213m:
+    case X86::VFMSUBSDr213r:      case X86::VFMSUBSDr213m:
+    case X86::VFMSUBSSr213r:      case X86::VFMSUBSSr213m:
+    case X86::VFNMADDSDr213r:     case X86::VFNMADDSDr213m:
+    case X86::VFNMADDSSr213r:     case X86::VFNMADDSSr213m:
+    case X86::VFNMSUBSDr213r:     case X86::VFNMSUBSDr213m:
+    case X86::VFNMSUBSSr213r:     case X86::VFNMSUBSSr213m:
+
+    case X86::VFMADDSDr231r:      case X86::VFMADDSDr231m:
+    case X86::VFMADDSSr231r:      case X86::VFMADDSSr231m:
+    case X86::VFMSUBSDr231r:      case X86::VFMSUBSDr231m:
+    case X86::VFMSUBSSr231r:      case X86::VFMSUBSSr231m:
+    case X86::VFNMADDSDr231r:     case X86::VFNMADDSDr231m:
+    case X86::VFNMADDSSr231r:     case X86::VFNMADDSSr231m:
+    case X86::VFNMSUBSDr231r:     case X86::VFNMSUBSDr231m:
+    case X86::VFNMSUBSSr231r:     case X86::VFNMSUBSSr231m:
+
+    case X86::VFMADDSUBPDr132r:   case X86::VFMADDSUBPDr132m:
+    case X86::VFMADDSUBPSr132r:   case X86::VFMADDSUBPSr132m:
+    case X86::VFMSUBADDPDr132r:   case X86::VFMSUBADDPDr132m:
+    case X86::VFMSUBADDPSr132r:   case X86::VFMSUBADDPSr132m:
+    case X86::VFMADDSUBPDr132rY:  case X86::VFMADDSUBPDr132mY:
+    case X86::VFMADDSUBPSr132rY:  case X86::VFMADDSUBPSr132mY:
+    case X86::VFMSUBADDPDr132rY:  case X86::VFMSUBADDPDr132mY:
+    case X86::VFMSUBADDPSr132rY:  case X86::VFMSUBADDPSr132mY:
+
+    case X86::VFMADDPDr132r:      case X86::VFMADDPDr132m:
+    case X86::VFMADDPSr132r:      case X86::VFMADDPSr132m:
+    case X86::VFMSUBPDr132r:      case X86::VFMSUBPDr132m:
+    case X86::VFMSUBPSr132r:      case X86::VFMSUBPSr132m:
+    case X86::VFNMADDPDr132r:     case X86::VFNMADDPDr132m:
+    case X86::VFNMADDPSr132r:     case X86::VFNMADDPSr132m:
+    case X86::VFNMSUBPDr132r:     case X86::VFNMSUBPDr132m:
+    case X86::VFNMSUBPSr132r:     case X86::VFNMSUBPSr132m:
+    case X86::VFMADDPDr132rY:     case X86::VFMADDPDr132mY:
+    case X86::VFMADDPSr132rY:     case X86::VFMADDPSr132mY:
+    case X86::VFMSUBPDr132rY:     case X86::VFMSUBPDr132mY:
+    case X86::VFMSUBPSr132rY:     case X86::VFMSUBPSr132mY:
+    case X86::VFNMADDPDr132rY:    case X86::VFNMADDPDr132mY:
+    case X86::VFNMADDPSr132rY:    case X86::VFNMADDPSr132mY:
+    case X86::VFNMSUBPDr132rY:    case X86::VFNMSUBPDr132mY:
+    case X86::VFNMSUBPSr132rY:    case X86::VFNMSUBPSr132mY:
+
+    case X86::VFMADDSUBPDr213r:   case X86::VFMADDSUBPDr213m:
+    case X86::VFMADDSUBPSr213r:   case X86::VFMADDSUBPSr213m:
+    case X86::VFMSUBADDPDr213r:   case X86::VFMSUBADDPDr213m:
+    case X86::VFMSUBADDPSr213r:   case X86::VFMSUBADDPSr213m:
+    case X86::VFMADDSUBPDr213rY:  case X86::VFMADDSUBPDr213mY:
+    case X86::VFMADDSUBPSr213rY:  case X86::VFMADDSUBPSr213mY:
+    case X86::VFMSUBADDPDr213rY:  case X86::VFMSUBADDPDr213mY:
+    case X86::VFMSUBADDPSr213rY:  case X86::VFMSUBADDPSr213mY:
+
+    case X86::VFMADDPDr213r:      case X86::VFMADDPDr213m:
+    case X86::VFMADDPSr213r:      case X86::VFMADDPSr213m:
+    case X86::VFMSUBPDr213r:      case X86::VFMSUBPDr213m:
+    case X86::VFMSUBPSr213r:      case X86::VFMSUBPSr213m:
+    case X86::VFNMADDPDr213r:     case X86::VFNMADDPDr213m:
+    case X86::VFNMADDPSr213r:     case X86::VFNMADDPSr213m:
+    case X86::VFNMSUBPDr213r:     case X86::VFNMSUBPDr213m:
+    case X86::VFNMSUBPSr213r:     case X86::VFNMSUBPSr213m:
+    case X86::VFMADDPDr213rY:     case X86::VFMADDPDr213mY:
+    case X86::VFMADDPSr213rY:     case X86::VFMADDPSr213mY:
+    case X86::VFMSUBPDr213rY:     case X86::VFMSUBPDr213mY:
+    case X86::VFMSUBPSr213rY:     case X86::VFMSUBPSr213mY:
+    case X86::VFNMADDPDr213rY:    case X86::VFNMADDPDr213mY:
+    case X86::VFNMADDPSr213rY:    case X86::VFNMADDPSr213mY:
+    case X86::VFNMSUBPDr213rY:    case X86::VFNMSUBPDr213mY:
+    case X86::VFNMSUBPSr213rY:    case X86::VFNMSUBPSr213mY:
+
+    case X86::VFMADDSUBPDr231r:   case X86::VFMADDSUBPDr231m:
+    case X86::VFMADDSUBPSr231r:   case X86::VFMADDSUBPSr231m:
+    case X86::VFMSUBADDPDr231r:   case X86::VFMSUBADDPDr231m:
+    case X86::VFMSUBADDPSr231r:   case X86::VFMSUBADDPSr231m:
+    case X86::VFMADDSUBPDr231rY:  case X86::VFMADDSUBPDr231mY:
+    case X86::VFMADDSUBPSr231rY:  case X86::VFMADDSUBPSr231mY:
+    case X86::VFMSUBADDPDr231rY:  case X86::VFMSUBADDPDr231mY:
+    case X86::VFMSUBADDPSr231rY:  case X86::VFMSUBADDPSr231mY:
+
+    case X86::VFMADDPDr231r:      case X86::VFMADDPDr231m:
+    case X86::VFMADDPSr231r:      case X86::VFMADDPSr231m:
+    case X86::VFMSUBPDr231r:      case X86::VFMSUBPDr231m:
+    case X86::VFMSUBPSr231r:      case X86::VFMSUBPSr231m:
+    case X86::VFNMADDPDr231r:     case X86::VFNMADDPDr231m:
+    case X86::VFNMADDPSr231r:     case X86::VFNMADDPSr231m:
+    case X86::VFNMSUBPDr231r:     case X86::VFNMSUBPDr231m:
+    case X86::VFNMSUBPSr231r:     case X86::VFNMSUBPSr231m:
+    case X86::VFMADDPDr231rY:     case X86::VFMADDPDr231mY:
+    case X86::VFMADDPSr231rY:     case X86::VFMADDPSr231mY:
+    case X86::VFMSUBPDr231rY:     case X86::VFMSUBPDr231mY:
+    case X86::VFMSUBPSr231rY:     case X86::VFMSUBPSr231mY:
+    case X86::VFNMADDPDr231rY:    case X86::VFNMADDPDr231mY:
+    case X86::VFNMADDPSr231rY:    case X86::VFNMADDPSr231mY:
+    case X86::VFNMSUBPDr231rY:    case X86::VFNMSUBPDr231mY:
+    case X86::VFNMSUBPSr231rY:    case X86::VFNMSUBPSr231mY:
+      return true;
+
+    case X86::VFMADDSDr132r_Int:  case X86::VFMADDSDr132m_Int:
+    case X86::VFMADDSSr132r_Int:  case X86::VFMADDSSr132m_Int:
+    case X86::VFMSUBSDr132r_Int:  case X86::VFMSUBSDr132m_Int:
+    case X86::VFMSUBSSr132r_Int:  case X86::VFMSUBSSr132m_Int:
+    case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
+    case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
+    case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
+    case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
+
+    case X86::VFMADDSDr213r_Int:  case X86::VFMADDSDr213m_Int:
+    case X86::VFMADDSSr213r_Int:  case X86::VFMADDSSr213m_Int:
+    case X86::VFMSUBSDr213r_Int:  case X86::VFMSUBSDr213m_Int:
+    case X86::VFMSUBSSr213r_Int:  case X86::VFMSUBSSr213m_Int:
+    case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
+    case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
+    case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
+    case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
+
+    case X86::VFMADDSDr231r_Int:  case X86::VFMADDSDr231m_Int:
+    case X86::VFMADDSSr231r_Int:  case X86::VFMADDSSr231m_Int:
+    case X86::VFMSUBSDr231r_Int:  case X86::VFMSUBSDr231m_Int:
+    case X86::VFMSUBSSr231r_Int:  case X86::VFMSUBSSr231m_Int:
+    case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
+    case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
+    case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
+    case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
+      if (IsIntrinsic)
+        *IsIntrinsic = true;
+      return true;
+    default:
+      return false;
+  }
+  llvm_unreachable("Opcode not handled by the switch");
+}
+
+MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr *MI,
+                                                   bool NewMI,
+                                                   unsigned OpIdx1,
+                                                   unsigned OpIdx2) const {
   switch (MI->getOpcode()) {
   case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
   case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
@@ -2944,7 +3173,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     }
     MI->setDesc(get(Opc));
     MI->getOperand(3).setImm(Size-Amt);
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
   case X86::BLENDPDrri:
   case X86::BLENDPSrri:
@@ -2980,7 +3209,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
       NewMI = false;
     }
     MI->getOperand(3).setImm(Mask ^ Imm);
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
   case X86::PCLMULQDQrr:
   case X86::VPCLMULQDQrr:{
@@ -2995,7 +3224,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
       NewMI = false;
     }
     MI->getOperand(3).setImm((Src1Hi << 4) | (Src2Hi >> 4));
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
   case X86::CMPPDrri:
   case X86::CMPPSrri:
@@ -3016,7 +3245,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
         MI = MF.CloneMachineInstr(MI);
         NewMI = false;
       }
-      return TargetInstrInfo::commuteInstruction(MI, NewMI);
+      return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
     default:
       return nullptr;
     }
@@ -3045,7 +3274,7 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
       NewMI = false;
     }
     MI->getOperand(3).setImm(Imm);
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
   case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
   case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
@@ -3124,11 +3353,272 @@ X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
     // Fallthrough intended.
   }
   default:
-    return TargetInstrInfo::commuteInstruction(MI, NewMI);
+    if (isFMA3(MI->getOpcode())) {
+      unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2);
+      if (Opc == 0)
+        return nullptr;
+      if (NewMI) {
+        MachineFunction &MF = *MI->getParent()->getParent();
+        MI = MF.CloneMachineInstr(MI);
+        NewMI = false;
+      }
+      MI->setDesc(get(Opc));
+    }
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
 }
 
-bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr *MI,
+                                             unsigned &SrcOpIdx1,
+                                             unsigned &SrcOpIdx2) const {
+
+  unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3;
+
+  // Only the first RegOpsNum operands are commutable.
+  // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
+  // that the operand is not specified/fixed.
+  if (SrcOpIdx1 != CommuteAnyOperandIndex &&
+      (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum))
+    return false;
+  if (SrcOpIdx2 != CommuteAnyOperandIndex &&
+      (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum))
+    return false;
+
+  // Look for two different register operands assumed to be commutable
+  // regardless of the FMA opcode. The FMA opcode is adjusted later.
+  if (SrcOpIdx1 == CommuteAnyOperandIndex ||
+      SrcOpIdx2 == CommuteAnyOperandIndex) {
+    unsigned CommutableOpIdx1 = SrcOpIdx1;
+    unsigned CommutableOpIdx2 = SrcOpIdx2;
+
+    // At least one of operands to be commuted is not specified and
+    // this method is free to choose appropriate commutable operands.
+    if (SrcOpIdx1 == SrcOpIdx2)
+      // Both of operands are not fixed. By default set one of commutable
+      // operands to the last register operand of the instruction.
+      CommutableOpIdx2 = RegOpsNum;
+    else if (SrcOpIdx2 == CommuteAnyOperandIndex)
+      // Only one of operands is not fixed.
+      CommutableOpIdx2 = SrcOpIdx1;
+
+    // CommutableOpIdx2 is well defined now. Let's choose another commutable
+    // operand and assign its index to CommutableOpIdx1.
+    unsigned Op2Reg = MI->getOperand(CommutableOpIdx2).getReg();
+    for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {
+      // The commuted operands must have different registers.
+      // Otherwise, the commute transformation does not change anything and
+      // is useless then.
+      if (Op2Reg != MI->getOperand(CommutableOpIdx1).getReg())
+        break;
+    }
+
+    // No appropriate commutable operands were found.
+    if (CommutableOpIdx1 == 0)
+      return false;
+
+    // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
+    // to return those values.
+    if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+                              CommutableOpIdx1, CommutableOpIdx2))
+      return false;
+  }
+
+  // Check if we can adjust the opcode to preserve the semantics when
+  // commute the register operands.
+  return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2) != 0;
+}
+
+unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+                                                      unsigned SrcOpIdx1,
+                                                      unsigned SrcOpIdx2) const {
+  unsigned Opc = MI->getOpcode();
+
+  // Define the array that holds FMA opcodes in groups
+  // of 3 opcodes(132, 213, 231) in each group.
+  static const unsigned RegularOpcodeGroups[][3] = {
+    { X86::VFMADDSSr132r,   X86::VFMADDSSr213r,   X86::VFMADDSSr231r  },
+    { X86::VFMADDSDr132r,   X86::VFMADDSDr213r,   X86::VFMADDSDr231r  },
+    { X86::VFMADDPSr132r,   X86::VFMADDPSr213r,   X86::VFMADDPSr231r  },
+    { X86::VFMADDPDr132r,   X86::VFMADDPDr213r,   X86::VFMADDPDr231r  },
+    { X86::VFMADDPSr132rY,  X86::VFMADDPSr213rY,  X86::VFMADDPSr231rY },
+    { X86::VFMADDPDr132rY,  X86::VFMADDPDr213rY,  X86::VFMADDPDr231rY },
+    { X86::VFMADDSSr132m,   X86::VFMADDSSr213m,   X86::VFMADDSSr231m  },
+    { X86::VFMADDSDr132m,   X86::VFMADDSDr213m,   X86::VFMADDSDr231m  },
+    { X86::VFMADDPSr132m,   X86::VFMADDPSr213m,   X86::VFMADDPSr231m  },
+    { X86::VFMADDPDr132m,   X86::VFMADDPDr213m,   X86::VFMADDPDr231m  },
+    { X86::VFMADDPSr132mY,  X86::VFMADDPSr213mY,  X86::VFMADDPSr231mY },
+    { X86::VFMADDPDr132mY,  X86::VFMADDPDr213mY,  X86::VFMADDPDr231mY },
+
+    { X86::VFMSUBSSr132r,   X86::VFMSUBSSr213r,   X86::VFMSUBSSr231r  },
+    { X86::VFMSUBSDr132r,   X86::VFMSUBSDr213r,   X86::VFMSUBSDr231r  },
+    { X86::VFMSUBPSr132r,   X86::VFMSUBPSr213r,   X86::VFMSUBPSr231r  },
+    { X86::VFMSUBPDr132r,   X86::VFMSUBPDr213r,   X86::VFMSUBPDr231r  },
+    { X86::VFMSUBPSr132rY,  X86::VFMSUBPSr213rY,  X86::VFMSUBPSr231rY },
+    { X86::VFMSUBPDr132rY,  X86::VFMSUBPDr213rY,  X86::VFMSUBPDr231rY },
+    { X86::VFMSUBSSr132m,   X86::VFMSUBSSr213m,   X86::VFMSUBSSr231m  },
+    { X86::VFMSUBSDr132m,   X86::VFMSUBSDr213m,   X86::VFMSUBSDr231m  },
+    { X86::VFMSUBPSr132m,   X86::VFMSUBPSr213m,   X86::VFMSUBPSr231m  },
+    { X86::VFMSUBPDr132m,   X86::VFMSUBPDr213m,   X86::VFMSUBPDr231m  },
+    { X86::VFMSUBPSr132mY,  X86::VFMSUBPSr213mY,  X86::VFMSUBPSr231mY },
+    { X86::VFMSUBPDr132mY,  X86::VFMSUBPDr213mY,  X86::VFMSUBPDr231mY },
+
+    { X86::VFNMADDSSr132r,  X86::VFNMADDSSr213r,  X86::VFNMADDSSr231r  },
+    { X86::VFNMADDSDr132r,  X86::VFNMADDSDr213r,  X86::VFNMADDSDr231r  },
+    { X86::VFNMADDPSr132r,  X86::VFNMADDPSr213r,  X86::VFNMADDPSr231r  },
+    { X86::VFNMADDPDr132r,  X86::VFNMADDPDr213r,  X86::VFNMADDPDr231r  },
+    { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
+    { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
+    { X86::VFNMADDSSr132m,  X86::VFNMADDSSr213m,  X86::VFNMADDSSr231m  },
+    { X86::VFNMADDSDr132m,  X86::VFNMADDSDr213m,  X86::VFNMADDSDr231m  },
+    { X86::VFNMADDPSr132m,  X86::VFNMADDPSr213m,  X86::VFNMADDPSr231m  },
+    { X86::VFNMADDPDr132m,  X86::VFNMADDPDr213m,  X86::VFNMADDPDr231m  },
+    { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
+    { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
+
+    { X86::VFNMSUBSSr132r,  X86::VFNMSUBSSr213r,  X86::VFNMSUBSSr231r  },
+    { X86::VFNMSUBSDr132r,  X86::VFNMSUBSDr213r,  X86::VFNMSUBSDr231r  },
+    { X86::VFNMSUBPSr132r,  X86::VFNMSUBPSr213r,  X86::VFNMSUBPSr231r  },
+    { X86::VFNMSUBPDr132r,  X86::VFNMSUBPDr213r,  X86::VFNMSUBPDr231r  },
+    { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
+    { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
+    { X86::VFNMSUBSSr132m,  X86::VFNMSUBSSr213m,  X86::VFNMSUBSSr231m  },
+    { X86::VFNMSUBSDr132m,  X86::VFNMSUBSDr213m,  X86::VFNMSUBSDr231m  },
+    { X86::VFNMSUBPSr132m,  X86::VFNMSUBPSr213m,  X86::VFNMSUBPSr231m  },
+    { X86::VFNMSUBPDr132m,  X86::VFNMSUBPDr213m,  X86::VFNMSUBPDr231m  },
+    { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
+    { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
+
+    { X86::VFMADDSUBPSr132r,  X86::VFMADDSUBPSr213r,  X86::VFMADDSUBPSr231r  },
+    { X86::VFMADDSUBPDr132r,  X86::VFMADDSUBPDr213r,  X86::VFMADDSUBPDr231r  },
+    { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
+    { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
+    { X86::VFMADDSUBPSr132m,  X86::VFMADDSUBPSr213m,  X86::VFMADDSUBPSr231m  },
+    { X86::VFMADDSUBPDr132m,  X86::VFMADDSUBPDr213m,  X86::VFMADDSUBPDr231m  },
+    { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
+    { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
+
+    { X86::VFMSUBADDPSr132r,  X86::VFMSUBADDPSr213r,  X86::VFMSUBADDPSr231r  },
+    { X86::VFMSUBADDPDr132r,  X86::VFMSUBADDPDr213r,  X86::VFMSUBADDPDr231r  },
+    { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
+    { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
+    { X86::VFMSUBADDPSr132m,  X86::VFMSUBADDPSr213m,  X86::VFMSUBADDPSr231m  },
+    { X86::VFMSUBADDPDr132m,  X86::VFMSUBADDPDr213m,  X86::VFMSUBADDPDr231m  },
+    { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
+    { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
+  };
+
+  // Define the array that holds FMA*_Int opcodes in groups
+  // of 3 opcodes(132, 213, 231) in each group.
+  static const unsigned IntrinOpcodeGroups[][3] = {
+    { X86::VFMADDSSr132r_Int,  X86::VFMADDSSr213r_Int,  X86::VFMADDSSr231r_Int },
+    { X86::VFMADDSDr132r_Int,  X86::VFMADDSDr213r_Int,  X86::VFMADDSDr231r_Int },
+    { X86::VFMADDSSr132m_Int,  X86::VFMADDSSr213m_Int,  X86::VFMADDSSr231m_Int },
+    { X86::VFMADDSDr132m_Int,  X86::VFMADDSDr213m_Int,  X86::VFMADDSDr231m_Int },
+
+    { X86::VFMSUBSSr132r_Int,  X86::VFMSUBSSr213r_Int,  X86::VFMSUBSSr231r_Int },
+    { X86::VFMSUBSDr132r_Int,  X86::VFMSUBSDr213r_Int,  X86::VFMSUBSDr231r_Int },
+    { X86::VFMSUBSSr132m_Int,  X86::VFMSUBSSr213m_Int,  X86::VFMSUBSSr231m_Int },
+    { X86::VFMSUBSDr132m_Int,  X86::VFMSUBSDr213m_Int,  X86::VFMSUBSDr231m_Int },
+
+    { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
+    { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
+    { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
+    { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
+
+    { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
+    { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
+    { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
+    { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
+  };
+
+  const unsigned Form132Index = 0;
+  const unsigned Form213Index = 1;
+  const unsigned Form231Index = 2;
+  const unsigned FormsNum = 3;
+
+  bool IsIntrinOpcode;
+  isFMA3(Opc, &IsIntrinOpcode);
+
+  size_t GroupsNum;
+  const unsigned (*OpcodeGroups)[3];
+  if (IsIntrinOpcode) {
+    GroupsNum = array_lengthof(IntrinOpcodeGroups);
+    OpcodeGroups = IntrinOpcodeGroups;
+  } else {
+    GroupsNum = array_lengthof(RegularOpcodeGroups);
+    OpcodeGroups = RegularOpcodeGroups;
+  }
+
+  const unsigned *FoundOpcodesGroup = nullptr;
+  size_t FormIndex;
+
+  // Look for the input opcode in the corresponding opcodes table.
+  for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
+         ++GroupIndex) {
+    for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
+      if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
+        FoundOpcodesGroup = OpcodeGroups[GroupIndex];
+        break;
+      }
+    }
+  }
+
+  // The input opcode does not match with any of the opcodes from the tables.
+  // The unsupported FMA opcode must be added to one of the two opcode groups
+  // defined above.
+  assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
+
+  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
+  if (SrcOpIdx1 > SrcOpIdx2)
+    std::swap(SrcOpIdx1, SrcOpIdx2);
+
+  // TODO: Commuting the 1st operand of FMA*_Int requires some additional
+  // analysis. The commute optimization is legal only if all users of FMA*_Int
+  // use only the lowest element of the FMA*_Int instruction. Such analysis are
+  // not implemented yet. So, just return 0 in that case.
+  // When such analysis are available this place will be the right place for
+  // calling it.
+  if (IsIntrinOpcode && SrcOpIdx1 == 1)
+    return 0;
+
+  unsigned Case;
+  if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
+    Case = 0;
+  else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
+    Case = 1;
+  else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3)
+    Case = 2;
+  else
+    return 0;
+
+  // Define the FMA forms mapping array that helps to map input FMA form
+  // to output FMA form to preserve the operation semantics after
+  // commuting the operands.
+  static const unsigned FormMapping[][3] = {
+    // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
+    // FMA132 A, C, b; ==> FMA231 C, A, b;
+    // FMA213 B, A, c; ==> FMA213 A, B, c;
+    // FMA231 C, A, b; ==> FMA132 A, C, b;
+    { Form231Index, Form213Index, Form132Index },
+    // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
+    // FMA132 A, c, B; ==> FMA132 B, c, A;
+    // FMA213 B, a, C; ==> FMA231 C, a, B;
+    // FMA231 C, a, B; ==> FMA213 B, a, C;
+    { Form132Index, Form231Index, Form213Index },
+    // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
+    // FMA132 a, C, B; ==> FMA213 a, B, C;
+    // FMA213 b, A, C; ==> FMA132 b, C, A;
+    // FMA231 c, A, B; ==> FMA231 c, B, A;
+    { Form213Index, Form132Index, Form231Index }
+  };
+
+  // Everything is ready, just adjust the FMA opcode and return it.
+  FormIndex = FormMapping[Case][FormIndex];
+  return FoundOpcodesGroup[FormIndex];
+}
+
+bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI,
+                                         unsigned &SrcOpIdx1,
                                          unsigned &SrcOpIdx2) const {
   switch (MI->getOpcode()) {
     case X86::CMPPDrri:
@@ -3141,46 +3631,22 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
       // Ordered/Unordered/Equal/NotEqual tests
       unsigned Imm = MI->getOperand(3).getImm() & 0x7;
       switch (Imm) {
-      case 0x00: // EQUAL
-      case 0x03: // UNORDERED
-      case 0x04: // NOT EQUAL
-      case 0x07: // ORDERED
-        SrcOpIdx1 = 1;
-        SrcOpIdx2 = 2;
-        return true;
+        case 0x00: // EQUAL
+        case 0x03: // UNORDERED
+        case 0x04: // NOT EQUAL
+        case 0x07: // ORDERED
+          // The indices of the commutable operands are 1 and 2.
+          // Assign them to the returned operand indices here.
+          return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
       }
       return false;
     }
-    case X86::VFMADDPDr231r:
-    case X86::VFMADDPSr231r:
-    case X86::VFMADDSDr231r:
-    case X86::VFMADDSSr231r:
-    case X86::VFMSUBPDr231r:
-    case X86::VFMSUBPSr231r:
-    case X86::VFMSUBSDr231r:
-    case X86::VFMSUBSSr231r:
-    case X86::VFNMADDPDr231r:
-    case X86::VFNMADDPSr231r:
-    case X86::VFNMADDSDr231r:
-    case X86::VFNMADDSSr231r:
-    case X86::VFNMSUBPDr231r:
-    case X86::VFNMSUBPSr231r:
-    case X86::VFNMSUBSDr231r:
-    case X86::VFNMSUBSSr231r:
-    case X86::VFMADDPDr231rY:
-    case X86::VFMADDPSr231rY:
-    case X86::VFMSUBPDr231rY:
-    case X86::VFMSUBPSr231rY:
-    case X86::VFNMADDPDr231rY:
-    case X86::VFNMADDPSr231rY:
-    case X86::VFNMSUBPDr231rY:
-    case X86::VFNMSUBPSr231rY:
-      SrcOpIdx1 = 2;
-      SrcOpIdx2 = 3;
-      return true;
     default:
+      if (isFMA3(MI->getOpcode()))
+        return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
       return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
   }
+  return false;
 }
 
 static X86::CondCode getCondFromBranchOpc(unsigned BrOpc) {
@@ -3821,15 +4287,58 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   return 0;
 }
 
-inline static bool MaskRegClassContains(unsigned Reg) {
+static bool MaskRegClassContains(unsigned Reg) {
   return X86::VK8RegClass.contains(Reg) ||
          X86::VK16RegClass.contains(Reg) ||
          X86::VK32RegClass.contains(Reg) ||
          X86::VK64RegClass.contains(Reg) ||
          X86::VK1RegClass.contains(Reg);
 }
+
+static bool GRRegClassContains(unsigned Reg) {
+  return X86::GR64RegClass.contains(Reg) ||
+         X86::GR32RegClass.contains(Reg) ||
+         X86::GR16RegClass.contains(Reg) ||
+         X86::GR8RegClass.contains(Reg);
+}
 static
-unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
+unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) {
+  if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) {
+    DestReg = getX86SubSuperRegister(DestReg, 32);
+    return X86::KMOVBrk;
+  }
+  if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) {
+    SrcReg = getX86SubSuperRegister(SrcReg, 32);
+    return X86::KMOVBkr;
+  }
+  return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) {
+  if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg))
+    return X86::KMOVQkk;
+  if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg))
+    return X86::KMOVDrk;
+  if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg))
+    return X86::KMOVQrk;
+  if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg))
+    return X86::KMOVDkr;
+  if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg))
+    return X86::KMOVQkr;
+  return 0;
+}
+
+static
+unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
+                                  const X86Subtarget &Subtarget)
+{
+  if (Subtarget.hasDQI())
+    if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg))
+      return Opc;
+  if (Subtarget.hasBWI())
+    if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
+      return Opc;
   if (X86::VR128XRegClass.contains(DestReg, SrcReg) ||
       X86::VR256XRegClass.contains(DestReg, SrcReg) ||
       X86::VR512RegClass.contains(DestReg, SrcReg)) {
@@ -3837,21 +4346,14 @@ unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg) {
      SrcReg = get512BitSuperRegister(SrcReg);
      return X86::VMOVAPSZrr;
   }
-  if (MaskRegClassContains(DestReg) &&
-      MaskRegClassContains(SrcReg))
+  if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
     return X86::KMOVWkk;
-  if (MaskRegClassContains(DestReg) &&
-      (X86::GR32RegClass.contains(SrcReg) ||
-       X86::GR16RegClass.contains(SrcReg) ||
-       X86::GR8RegClass.contains(SrcReg))) {
-    SrcReg = getX86SubSuperRegister(SrcReg, MVT::i32);
+  if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
+    SrcReg = getX86SubSuperRegister(SrcReg, 32);
     return X86::KMOVWkr;
   }
-  if ((X86::GR32RegClass.contains(DestReg) ||
-       X86::GR16RegClass.contains(DestReg) ||
-       X86::GR8RegClass.contains(DestReg)) &&
-       MaskRegClassContains(SrcReg)) {
-    DestReg = getX86SubSuperRegister(DestReg, MVT::i32);
+  if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) {
+    DestReg = getX86SubSuperRegister(DestReg, 32);
     return X86::KMOVWrk;
   }
   return 0;
@@ -3886,7 +4388,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (X86::VR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MMX_MOVQ64rr;
   else if (HasAVX512)
-    Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg);
+    Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
   else if (X86::VR128RegClass.contains(DestReg, SrcReg))
     Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
   else if (X86::VR256RegClass.contains(DestReg, SrcReg))
@@ -3900,34 +4402,86 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  // Moving EFLAGS to / from another register requires a push and a pop.
-  // Notice that we have to adjust the stack if we don't want to clobber the
-  // first frame index. See X86FrameLowering.cpp - clobbersTheStack.
-  if (SrcReg == X86::EFLAGS) {
-    if (X86::GR64RegClass.contains(DestReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
-      BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+  bool FromEFLAGS = SrcReg == X86::EFLAGS;
+  bool ToEFLAGS = DestReg == X86::EFLAGS;
+  int Reg = FromEFLAGS ? DestReg : SrcReg;
+  bool is32 = X86::GR32RegClass.contains(Reg);
+  bool is64 = X86::GR64RegClass.contains(Reg);
+
+  if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) {
+    int Mov = is64 ? X86::MOV64rr : X86::MOV32rr;
+    int Push = is64 ? X86::PUSH64r : X86::PUSH32r;
+    int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32;
+    int Pop = is64 ? X86::POP64r : X86::POP32r;
+    int PopF = is64 ? X86::POPF64 : X86::POPF32;
+    int AX = is64 ? X86::RAX : X86::EAX;
+
+    if (!Subtarget.hasLAHFSAHF()) {
+      assert(Subtarget.is64Bit() &&
+             "Not having LAHF/SAHF only happens on 64-bit.");
+      // Moving EFLAGS to / from another register requires a push and a pop.
+      // Notice that we have to adjust the stack if we don't want to clobber the
+      // first frame index. See X86FrameLowering.cpp - usesTheStack.
+      if (FromEFLAGS) {
+        BuildMI(MBB, MI, DL, get(PushF));
+        BuildMI(MBB, MI, DL, get(Pop), DestReg);
+      }
+      if (ToEFLAGS) {
+        BuildMI(MBB, MI, DL, get(Push))
+            .addReg(SrcReg, getKillRegState(KillSrc));
+        BuildMI(MBB, MI, DL, get(PopF));
+      }
       return;
     }
-    if (X86::GR32RegClass.contains(DestReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
-      BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
-      return;
+
+    // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is
+    // inefficient. Instead:
+    //   - Save the overflow flag OF into AL using SETO, and restore it using a
+    //     signed 8-bit addition of AL and INT8_MAX.
+    //   - Save/restore the bottom 8 EFLAGS bits (CF, PF, AF, ZF, SF) to/from AH
+    //     using LAHF/SAHF.
+    //   - When RAX/EAX is live and isn't the destination register, make sure it
+    //     isn't clobbered by PUSH/POP'ing it before and after saving/restoring
+    //     the flags.
+    // This approach is ~2.25x faster than using PUSHF/POPF.
+    //
+    // This is still somewhat inefficient because we don't know which flags are
+    // actually live inside EFLAGS. Were we able to do a single SETcc instead of
+    // SETO+LAHF / ADDB+SAHF the code could be 1.02x faster.
+    //
+    // PUSHF/POPF is also potentially incorrect because it affects other flags
+    // such as TF/IF/DF, which LLVM doesn't model.
+    //
+    // Notice that we have to adjust the stack if we don't want to clobber the
+    // first frame index. See X86FrameLowering.cpp - usesTheStack.
+
+
+    bool AXDead = (Reg == AX) ||
+                  (MachineBasicBlock::LQR_Dead ==
+                   MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI));
+    if (!AXDead) {
+      // FIXME: If computeRegisterLiveness() reported LQR_Unknown then AX may
+      // actually be dead. This is not a problem for correctness as we are just
+      // (unnecessarily) saving+restoring a dead register. However the
+      // MachineVerifier expects operands that read from dead registers
+      // to be marked with the "undef" flag.
+      BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true));
     }
-  }
-  if (DestReg == X86::EFLAGS) {
-    if (X86::GR64RegClass.contains(SrcReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSH64r))
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      BuildMI(MBB, MI, DL, get(X86::POPF64));
-      return;
+    if (FromEFLAGS) {
+      BuildMI(MBB, MI, DL, get(X86::SETOr), X86::AL);
+      BuildMI(MBB, MI, DL, get(X86::LAHF));
+      BuildMI(MBB, MI, DL, get(Mov), Reg).addReg(AX);
     }
-    if (X86::GR32RegClass.contains(SrcReg)) {
-      BuildMI(MBB, MI, DL, get(X86::PUSH32r))
-        .addReg(SrcReg, getKillRegState(KillSrc));
-      BuildMI(MBB, MI, DL, get(X86::POPF32));
-      return;
+    if (ToEFLAGS) {
+      BuildMI(MBB, MI, DL, get(Mov), AX).addReg(Reg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::ADD8ri), X86::AL)
+          .addReg(X86::AL)
+          .addImm(INT8_MAX);
+      BuildMI(MBB, MI, DL, get(X86::SAHF));
     }
+    if (!AXDead)
+      BuildMI(MBB, MI, DL, get(Pop), AX);
+    return;
   }
 
   DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
@@ -4602,9 +5156,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   // live-out. If it is live-out, do not optimize.
   if ((IsCmpZero || IsSwapped) && !IsSafe) {
     MachineBasicBlock *MBB = CmpInstr->getParent();
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-             SE = MBB->succ_end(); SI != SE; ++SI)
-      if ((*SI)->isLiveIn(X86::EFLAGS))
+    for (MachineBasicBlock *Successor : MBB->successors())
+      if (Successor->isLiveIn(X86::EFLAGS))
         return false;
   }
 
@@ -4645,8 +5198,8 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   CmpInstr->eraseFromParent();
 
   // Modify the condition code of instructions in OpsToUpdate.
-  for (unsigned i = 0, e = OpsToUpdate.size(); i < e; i++)
-    OpsToUpdate[i].first->setDesc(get(OpsToUpdate[i].second));
+  for (auto &Op : OpsToUpdate)
+    Op.first->setDesc(get(Op.second));
   return true;
 }
 
@@ -4694,8 +5247,7 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr *MI,
     return nullptr;
 
   // Check whether we can fold the def into SrcOperandId.
-  MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI);
-  if (FoldMI) {
+  if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, DefMI)) {
     FoldAsLoadDefReg = 0;
     return FoldMI;
   }
@@ -4725,6 +5277,82 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB,
   return true;
 }
 
+/// Expand a single-def pseudo instruction to a two-addr
+/// instruction with two %k0 reads.
+/// This is used for mapping:
+///   %k4 = K_SET1
+/// to:
+///   %k4 = KXNORrr %k0, %k0
+static bool Expand2AddrKreg(MachineInstrBuilder &MIB,
+                            const MCInstrDesc &Desc, unsigned Reg) {
+  assert(Desc.getNumOperands() == 3 && "Expected two-addr instruction.");
+  MIB->setDesc(Desc);
+  MIB.addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+  return true;
+}
+
+static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
+                          bool MinusOne) {
+  MachineBasicBlock &MBB = *MIB->getParent();
+  DebugLoc DL = MIB->getDebugLoc();
+  unsigned Reg = MIB->getOperand(0).getReg();
+
+  // Insert the XOR.
+  BuildMI(MBB, MIB.getInstr(), DL, TII.get(X86::XOR32rr), Reg)
+      .addReg(Reg, RegState::Undef)
+      .addReg(Reg, RegState::Undef);
+
+  // Turn the pseudo into an INC or DEC.
+  MIB->setDesc(TII.get(MinusOne ? X86::DEC32r : X86::INC32r));
+  MIB.addReg(Reg);
+
+  return true;
+}
+
+bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
+  MachineBasicBlock &MBB = *MIB->getParent();
+  DebugLoc DL = MIB->getDebugLoc();
+  int64_t Imm = MIB->getOperand(1).getImm();
+  assert(Imm != 0 && "Using push/pop for 0 is not efficient.");
+  MachineBasicBlock::iterator I = MIB.getInstr();
+
+  int StackAdjustment;
+
+  if (Subtarget.is64Bit()) {
+    assert(MIB->getOpcode() == X86::MOV64ImmSExti8 ||
+           MIB->getOpcode() == X86::MOV32ImmSExti8);
+    // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
+    // widen the register if necessary.
+    StackAdjustment = 8;
+    BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
+    MIB->setDesc(get(X86::POP64r));
+    MIB->getOperand(0)
+        .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
+  } else {
+    assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
+    StackAdjustment = 4;
+    BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
+    MIB->setDesc(get(X86::POP32r));
+  }
+
+  // Build CFI if necessary.
+  MachineFunction &MF = *MBB.getParent();
+  const X86FrameLowering *TFL = Subtarget.getFrameLowering();
+  bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  bool NeedsDwarfCFI =
+      !IsWin64Prologue &&
+      (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
+  bool EmitCFI = !TFL->hasFP(MF) && NeedsDwarfCFI;
+  if (EmitCFI) {
+    TFL->BuildCFI(MBB, I, DL,
+        MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment));
+    TFL->BuildCFI(MBB, std::next(I), DL,
+        MCCFIInstruction::createAdjustCfaOffset(nullptr, -StackAdjustment));
+  }
+
+  return true;
+}
+
 // LoadStackGuard has so far only been implemented for 64-bit MachO. Different
 // code sequence is needed for other targets.
 static void expandLoadStackGuard(MachineInstrBuilder &MIB,
@@ -4735,8 +5363,8 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
   const GlobalValue *GV =
       cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
   unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
-  MachineMemOperand *MMO = MBB.getParent()->
-      getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 8, 8);
+  MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
+      MachinePointerInfo::getGOT(*MBB.getParent()), Flag, 8, 8);
   MachineBasicBlock::iterator I = MIB.getInstr();
 
   BuildMI(MBB, I, DL, TII.get(X86::MOV64rm), Reg).addReg(X86::RIP).addImm(1)
@@ -4753,6 +5381,13 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   switch (MI->getOpcode()) {
   case X86::MOV32r0:
     return Expand2AddrUndef(MIB, get(X86::XOR32rr));
+  case X86::MOV32r1:
+    return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
+  case X86::MOV32r_1:
+    return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
+  case X86::MOV32ImmSExti8:
+  case X86::MOV64ImmSExti8:
+    return ExpandMOVImmSExti8(MIB);
   case X86::SETB_C8r:
     return Expand2AddrUndef(MIB, get(X86::SBB8rr));
   case X86::SETB_C16r:
@@ -4777,10 +5412,22 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   case X86::TEST8ri_NOREX:
     MI->setDesc(get(X86::TEST8ri));
     return true;
+ 
+  // KNL does not recognize dependency-breaking idioms for mask registers,
+  // so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
+  // Using %k0 as the undef input register is a performance heuristic based
+  // on the assumption that %k0 is used less frequently than the other mask
+  // registers, since it is not usable as a write mask.
+  // FIXME: A more advanced approach would be to choose the best input mask
+  // register based on context.
   case X86::KSET0B:
-  case X86::KSET0W: return Expand2AddrUndef(MIB, get(X86::KXORWrr));
+  case X86::KSET0W: return Expand2AddrKreg(MIB, get(X86::KXORWrr), X86::K0);
+  case X86::KSET0D: return Expand2AddrKreg(MIB, get(X86::KXORDrr), X86::K0);
+  case X86::KSET0Q: return Expand2AddrKreg(MIB, get(X86::KXORQrr), X86::K0);
   case X86::KSET1B:
-  case X86::KSET1W: return Expand2AddrUndef(MIB, get(X86::KXNORWrr));
+  case X86::KSET1W: return Expand2AddrKreg(MIB, get(X86::KXNORWrr), X86::K0);
+  case X86::KSET1D: return Expand2AddrKreg(MIB, get(X86::KXNORDrr), X86::K0);
+  case X86::KSET1Q: return Expand2AddrKreg(MIB, get(X86::KXNORQrr), X86::K0);
   case TargetOpcode::LOAD_STACK_GUARD:
     expandLoadStackGuard(MIB, *this);
     return true;
@@ -4788,12 +5435,28 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
   return false;
 }
 
-static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs) {
+static void addOperands(MachineInstrBuilder &MIB, ArrayRef<MachineOperand> MOs,
+                        int PtrOffset = 0) {
   unsigned NumAddrOps = MOs.size();
-  for (unsigned i = 0; i != NumAddrOps; ++i)
-    MIB.addOperand(MOs[i]);
-  if (NumAddrOps < 4) // FrameIndex only
-    addOffset(MIB, 0);
+
+  if (NumAddrOps < 4) {
+    // FrameIndex only - add an immediate offset (whether its zero or not).
+    for (unsigned i = 0; i != NumAddrOps; ++i)
+      MIB.addOperand(MOs[i]);
+    addOffset(MIB, PtrOffset);
+  } else {
+    // General Memory Addressing - we need to add any offset to an existing
+    // offset.
+    assert(MOs.size() == 5 && "Unexpected memory operand list length");
+    for (unsigned i = 0; i != NumAddrOps; ++i) {
+      const MachineOperand &MO = MOs[i];
+      if (i == 3 && PtrOffset != 0) {
+        MIB.addDisp(MO, PtrOffset);
+      } else {
+        MIB.addOperand(MO);
+      }
+    }
+  }
 }
 
 static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
@@ -4828,7 +5491,8 @@ static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
 static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
                               unsigned OpNo, ArrayRef<MachineOperand> MOs,
                               MachineBasicBlock::iterator InsertPt,
-                              MachineInstr *MI, const TargetInstrInfo &TII) {
+                              MachineInstr *MI, const TargetInstrInfo &TII,
+                              int PtrOffset = 0) {
   // Omit the implicit operands, something BuildMI can't do.
   MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                               MI->getDebugLoc(), true);
@@ -4838,7 +5502,7 @@ static MachineInstr *FuseInst(MachineFunction &MF, unsigned Opcode,
     MachineOperand &MO = MI->getOperand(i);
     if (i == OpNo) {
       assert(MO.isReg() && "Expected to fold into reg operand!");
-      addOperands(MIB, MOs);
+      addOperands(MIB, MOs, PtrOffset);
     } else {
       MIB.addOperand(MO);
     }
@@ -4860,6 +5524,40 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
   return MIB.addImm(0);
 }
 
+MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
+    MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
+    ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
+    unsigned Size, unsigned Align) const {
+  switch (MI->getOpcode()) {
+  case X86::INSERTPSrr:
+  case X86::VINSERTPSrr:
+    // Attempt to convert the load of inserted vector into a fold load
+    // of a single float.
+    if (OpNum == 2) {
+      unsigned Imm = MI->getOperand(MI->getNumOperands() - 1).getImm();
+      unsigned ZMask = Imm & 15;
+      unsigned DstIdx = (Imm >> 4) & 3;
+      unsigned SrcIdx = (Imm >> 6) & 3;
+
+      unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
+      if (Size <= RCSize && 4 <= Align) {
+        int PtrOffset = SrcIdx * 4;
+        unsigned NewImm = (DstIdx << 4) | ZMask;
+        unsigned NewOpCode =
+            (MI->getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
+                                                 : X86::INSERTPSrm);
+        MachineInstr *NewMI =
+            FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
+        NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
+        return NewMI;
+      }
+    }
+    break;
+  };
+
+  return nullptr;
+}
+
 MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     MachineFunction &MF, MachineInstr *MI, unsigned OpNum,
     ArrayRef<MachineOperand> MOs, MachineBasicBlock::iterator InsertPt,
@@ -4869,10 +5567,13 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   bool isCallRegIndirect = Subtarget.callRegIndirect();
   bool isTwoAddrFold = false;
 
-  // For CPUs that favor the register form of a call,
-  // do not fold loads into calls.
-  if (isCallRegIndirect &&
-    (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r))
+  // For CPUs that favor the register form of a call or push,
+  // do not fold loads into calls or pushes, unless optimizing for size
+  // aggressively.
+  if (isCallRegIndirect && !MF.getFunction()->optForMinSize() &&
+      (MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r ||
+       MI->getOpcode() == X86::PUSH16r || MI->getOpcode() == X86::PUSH32r ||
+       MI->getOpcode() == X86::PUSH64r))
     return nullptr;
 
   unsigned NumOps = MI->getDesc().getNumOperands();
@@ -4886,6 +5587,12 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     return nullptr;
 
   MachineInstr *NewMI = nullptr;
+
+  // Attempt to fold any custom cases we have.
+  if (MachineInstr *CustomMI =
+          foldMemoryOperandCustom(MF, MI, OpNum, MOs, InsertPt, Size, Align))
+    return CustomMI;
+
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
   // replacing the *two* registers with the memory location.
@@ -4963,60 +5670,56 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   // If the instruction and target operand are commutable, commute the
   // instruction and try again.
   if (AllowCommute) {
-    unsigned OriginalOpIdx = OpNum, CommuteOpIdx1, CommuteOpIdx2;
+    unsigned CommuteOpIdx1 = OpNum, CommuteOpIdx2 = CommuteAnyOperandIndex;
     if (findCommutedOpIndices(MI, CommuteOpIdx1, CommuteOpIdx2)) {
       bool HasDef = MI->getDesc().getNumDefs();
       unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0;
       unsigned Reg1 = MI->getOperand(CommuteOpIdx1).getReg();
       unsigned Reg2 = MI->getOperand(CommuteOpIdx2).getReg();
-      bool Tied0 =
-          0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
       bool Tied1 =
+          0 == MI->getDesc().getOperandConstraint(CommuteOpIdx1, MCOI::TIED_TO);
+      bool Tied2 =
           0 == MI->getDesc().getOperandConstraint(CommuteOpIdx2, MCOI::TIED_TO);
 
       // If either of the commutable operands are tied to the destination
       // then we can not commute + fold.
-      if ((HasDef && Reg0 == Reg1 && Tied0) ||
-          (HasDef && Reg0 == Reg2 && Tied1))
+      if ((HasDef && Reg0 == Reg1 && Tied1) ||
+          (HasDef && Reg0 == Reg2 && Tied2))
         return nullptr;
 
-      if ((CommuteOpIdx1 == OriginalOpIdx) ||
-          (CommuteOpIdx2 == OriginalOpIdx)) {
-        MachineInstr *CommutedMI = commuteInstruction(MI, false);
-        if (!CommutedMI) {
-          // Unable to commute.
-          return nullptr;
-        }
-        if (CommutedMI != MI) {
-          // New instruction. We can't fold from this.
-          CommutedMI->eraseFromParent();
-          return nullptr;
-        }
-
-        // Attempt to fold with the commuted version of the instruction.
-        unsigned CommuteOp =
-            (CommuteOpIdx1 == OriginalOpIdx ? CommuteOpIdx2 : CommuteOpIdx1);
-        NewMI =
-            foldMemoryOperandImpl(MF, MI, CommuteOp, MOs, InsertPt, Size, Align,
-                                  /*AllowCommute=*/false);
-        if (NewMI)
-          return NewMI;
-
-        // Folding failed again - undo the commute before returning.
-        MachineInstr *UncommutedMI = commuteInstruction(MI, false);
-        if (!UncommutedMI) {
-          // Unable to commute.
-          return nullptr;
-        }
-        if (UncommutedMI != MI) {
-          // New instruction. It doesn't need to be kept.
-          UncommutedMI->eraseFromParent();
-          return nullptr;
-        }
-
-        // Return here to prevent duplicate fuse failure report.
+      MachineInstr *CommutedMI =
+          commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+      if (!CommutedMI) {
+        // Unable to commute.
         return nullptr;
       }
+      if (CommutedMI != MI) {
+        // New instruction. We can't fold from this.
+        CommutedMI->eraseFromParent();
+        return nullptr;
+      }
+
+      // Attempt to fold with the commuted version of the instruction.
+      NewMI = foldMemoryOperandImpl(MF, MI, CommuteOpIdx2, MOs, InsertPt,
+                                    Size, Align, /*AllowCommute=*/false);
+      if (NewMI)
+        return NewMI;
+
+      // Folding failed again - undo the commute before returning.
+      MachineInstr *UncommutedMI =
+          commuteInstruction(MI, false, CommuteOpIdx1, CommuteOpIdx2);
+      if (!UncommutedMI) {
+        // Unable to commute.
+        return nullptr;
+      }
+      if (UncommutedMI != MI) {
+        // New instruction. It doesn't need to be kept.
+        UncommutedMI->eraseFromParent();
+        return nullptr;
+      }
+
+      // Return here to prevent duplicate fuse failure report.
+      return nullptr;
     }
   }
 
@@ -5208,13 +5911,14 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
   // If MI kills this register, the false dependence is already broken.
   if (MI->killsRegister(Reg, TRI))
     return;
+
   if (X86::VR128RegClass.contains(Reg)) {
     // These instructions are all floating point domain, so xorps is the best
     // choice.
-    bool HasAVX = Subtarget.hasAVX();
-    unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr;
+    unsigned Opc = Subtarget.hasAVX() ? X86::VXORPSrr : X86::XORPSrr;
     BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg)
       .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef);
+    MI->addRegisterKilled(Reg, TRI, true);
   } else if (X86::VR256RegClass.contains(Reg)) {
     // Use vxorps to clear the full ymm register.
     // It wants to read and write the xmm sub-register.
@@ -5222,21 +5926,20 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum,
     BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg)
       .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef)
       .addReg(Reg, RegState::ImplicitDefine);
-  } else
-    return;
-  MI->addRegisterKilled(Reg, TRI, true);
+    MI->addRegisterKilled(Reg, TRI, true);
+  }
 }
 
 MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     MachineFunction &MF, MachineInstr *MI, ArrayRef<unsigned> Ops,
     MachineBasicBlock::iterator InsertPt, int FrameIndex) const {
   // Check switch flag
-  if (NoFusing) return nullptr;
+  if (NoFusing)
+    return nullptr;
 
   // Unless optimizing for size, don't fold to avoid partial
   // register update stalls
-  if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
-      hasPartialRegUpdate(MI->getOpcode()))
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
     return nullptr;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -5303,6 +6006,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int:
     case X86::MULSSrr_Int: case X86::VMULSSrr_Int:
     case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int:
+    case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
+    case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
+    case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
+    case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
+    case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
+    case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
       return false;
     default:
       return true;
@@ -5318,6 +6027,12 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int:
     case X86::MULSDrr_Int: case X86::VMULSDrr_Int:
     case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int:
+    case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
+    case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
+    case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
+    case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
+    case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
+    case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
       return false;
     default:
       return true;
@@ -5342,10 +6057,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   // Check switch flag
   if (NoFusing) return nullptr;
 
-  // Unless optimizing for size, don't fold to avoid partial
-  // register update stalls
-  if (!MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) &&
-      hasPartialRegUpdate(MI->getOpcode()))
+  // Avoid partial register update stalls unless optimizing for size.
+  if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI->getOpcode()))
     return nullptr;
 
   // Determine the alignment of the load.
@@ -5460,62 +6173,6 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
                                /*Size=*/0, Alignment, /*AllowCommute=*/true);
 }
 
-bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-                                        ArrayRef<unsigned> Ops) const {
-  // Check switch flag
-  if (NoFusing) return 0;
-
-  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
-    switch (MI->getOpcode()) {
-    default: return false;
-    case X86::TEST8rr:
-    case X86::TEST16rr:
-    case X86::TEST32rr:
-    case X86::TEST64rr:
-      return true;
-    case X86::ADD32ri:
-      // FIXME: AsmPrinter doesn't know how to handle
-      // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
-      if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
-        return false;
-      break;
-    }
-  }
-
-  if (Ops.size() != 1)
-    return false;
-
-  unsigned OpNum = Ops[0];
-  unsigned Opc = MI->getOpcode();
-  unsigned NumOps = MI->getDesc().getNumOperands();
-  bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
-
-  // Folding a memory location into the two-address part of a two-address
-  // instruction is different than folding it other places.  It requires
-  // replacing the *two* registers with the memory location.
-  const DenseMap<unsigned,
-                 std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
-  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
-    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
-  } else if (OpNum == 0) {
-    if (Opc == X86::MOV32r0)
-      return true;
-
-    OpcodeTablePtr = &RegOp2MemOpTable0;
-  } else if (OpNum == 1) {
-    OpcodeTablePtr = &RegOp2MemOpTable1;
-  } else if (OpNum == 2) {
-    OpcodeTablePtr = &RegOp2MemOpTable2;
-  } else if (OpNum == 3) {
-    OpcodeTablePtr = &RegOp2MemOpTable3;
-  }
-
-  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
-    return true;
-  return TargetInstrInfo::canFoldMemoryOperand(MI, Ops);
-}
-
 bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                 unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
@@ -5536,9 +6193,10 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   const MCInstrDesc &MCID = get(Opc);
   const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF);
+  // TODO: Check if 32-byte or greater accesses are slow too?
   if (!MI->hasOneMemOperand() &&
       RC == &X86::VR128RegClass &&
-      !Subtarget.isUnalignedMemAccessFast())
+      Subtarget.isUnalignedMem16Slow())
     // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
     // conservatively assume the address is unaligned. That's bad for
     // performance.
@@ -5582,20 +6240,19 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   if (FoldedStore)
     MIB.addReg(Reg, RegState::Define);
-  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
-    MIB.addOperand(BeforeOps[i]);
+  for (MachineOperand &BeforeOp : BeforeOps)
+    MIB.addOperand(BeforeOp);
   if (FoldedLoad)
     MIB.addReg(Reg);
-  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
-    MIB.addOperand(AfterOps[i]);
-  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
-    MachineOperand &MO = ImpOps[i];
-    MIB.addReg(MO.getReg(),
-               getDefRegState(MO.isDef()) |
+  for (MachineOperand &AfterOp : AfterOps)
+    MIB.addOperand(AfterOp);
+  for (MachineOperand &ImpOp : ImpOps) {
+    MIB.addReg(ImpOp.getReg(),
+               getDefRegState(ImpOp.isDef()) |
                RegState::Implicit |
-               getKillRegState(MO.isKill()) |
-               getDeadRegState(MO.isDead()) |
-               getUndefRegState(MO.isUndef()));
+               getKillRegState(ImpOp.isKill()) |
+               getDeadRegState(ImpOp.isDead()) |
+               getUndefRegState(ImpOp.isUndef()));
   }
   // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
   switch (DataMI->getOpcode()) {
@@ -5686,9 +6343,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                             cast<MachineSDNode>(N)->memoperands_end());
     if (!(*MMOs.first) &&
         RC == &X86::VR128RegClass &&
-        !Subtarget.isUnalignedMemAccessFast())
+        Subtarget.isUnalignedMem16Slow())
       // Do not introduce a slow unaligned load.
       return false;
+    // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+    // memory access is slow above.
     unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
@@ -5729,9 +6388,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                              cast<MachineSDNode>(N)->memoperands_end());
     if (!(*MMOs.first) &&
         RC == &X86::VR128RegClass &&
-        !Subtarget.isUnalignedMemAccessFast())
+        Subtarget.isUnalignedMem16Slow())
       // Do not introduce a slow unaligned store.
       return false;
+    // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
+    // memory access is slow above.
     unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
@@ -6192,16 +6853,16 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
 // domains, but they require a bit more work than just switching opcodes.
 
 static const uint16_t *lookup(unsigned opcode, unsigned domain) {
-  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
-    if (ReplaceableInstrs[i][domain-1] == opcode)
-      return ReplaceableInstrs[i];
+  for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+    if (Row[domain-1] == opcode)
+      return Row;
   return nullptr;
 }
 
 static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
-  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
-    if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
-      return ReplaceableInstrsAVX2[i];
+  for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
+    if (Row[domain-1] == opcode)
+      return Row;
   return nullptr;
 }
 
@@ -6347,230 +7008,181 @@ hasHighOperandLatency(const TargetSchedModel &SchedModel,
   return isHighLatencyDef(DefMI->getOpcode());
 }
 
-static bool hasVirtualRegDefsInBasicBlock(const MachineInstr &Inst,
-                                          const MachineBasicBlock *MBB) {
-  assert(Inst.getNumOperands() == 3 && "Reassociation needs binary operators");
-  const MachineOperand &Op1 = Inst.getOperand(1);
-  const MachineOperand &Op2 = Inst.getOperand(2);
-  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+bool X86InstrInfo::hasReassociableOperands(const MachineInstr &Inst,
+                                           const MachineBasicBlock *MBB) const {
+  assert((Inst.getNumOperands() == 3 || Inst.getNumOperands() == 4) &&
+         "Reassociation needs binary operators");
 
-  // We need virtual register definitions.
-  MachineInstr *MI1 = nullptr;
-  MachineInstr *MI2 = nullptr;
-  if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
-    MI1 = MRI.getUniqueVRegDef(Op1.getReg());
-  if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
-    MI2 = MRI.getUniqueVRegDef(Op2.getReg());
+  // Integer binary math/logic instructions have a third source operand:
+  // the EFLAGS register. That operand must be both defined here and never
+  // used; ie, it must be dead. If the EFLAGS operand is live, then we can
+  // not change anything because rearranging the operands could affect other
+  // instructions that depend on the exact status flags (zero, sign, etc.)
+  // that are set by using these particular operands with this operation.
+  if (Inst.getNumOperands() == 4) {
+    assert(Inst.getOperand(3).isReg() &&
+           Inst.getOperand(3).getReg() == X86::EFLAGS &&
+           "Unexpected operand in reassociable instruction");
+    if (!Inst.getOperand(3).isDead())
+      return false;
+  }
 
-  // And they need to be in the trace (otherwise, they won't have a depth).
-  if (MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB)
-    return true;
-
-  return false;
-}
-
-static bool hasReassocSibling(const MachineInstr &Inst, bool &Commuted) {
-  const MachineBasicBlock *MBB = Inst.getParent();
-  const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg());
-  MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg());
-  unsigned AssocOpcode = Inst.getOpcode();
-
-  // If only one operand has the same opcode and it's the second source operand,
-  // the operands must be commuted.
-  Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode;
-  if (Commuted)
-    std::swap(MI1, MI2);
-
-  // 1. The previous instruction must be the same type as Inst.
-  // 2. The previous instruction must have virtual register definitions for its
-  //    operands in the same basic block as Inst.
-  // 3. The previous instruction's result must only be used by Inst.
-  if (MI1->getOpcode() == AssocOpcode &&
-      hasVirtualRegDefsInBasicBlock(*MI1, MBB) &&
-      MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()))
-    return true;
-
-  return false;
+  return TargetInstrInfo::hasReassociableOperands(Inst, MBB);
 }
 
 // TODO: There are many more machine instruction opcodes to match:
 //       1. Other data types (integer, vectors)
-//       2. Other math / logic operations (and, or)
-static bool isAssociativeAndCommutative(unsigned Opcode) {
-  switch (Opcode) {
+//       2. Other math / logic operations (xor, or)
+//       3. Other forms of the same operation (intrinsics and other variants)
+bool X86InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+  switch (Inst.getOpcode()) {
+  case X86::AND8rr:
+  case X86::AND16rr:
+  case X86::AND32rr:
+  case X86::AND64rr:
+  case X86::OR8rr:
+  case X86::OR16rr:
+  case X86::OR32rr:
+  case X86::OR64rr:
+  case X86::XOR8rr:
+  case X86::XOR16rr:
+  case X86::XOR32rr:
+  case X86::XOR64rr:
+  case X86::IMUL16rr:
+  case X86::IMUL32rr:
+  case X86::IMUL64rr:
+  case X86::PANDrr:
+  case X86::PORrr:
+  case X86::PXORrr:
+  case X86::VPANDrr:
+  case X86::VPANDYrr:
+  case X86::VPORrr:
+  case X86::VPORYrr:
+  case X86::VPXORrr:
+  case X86::VPXORYrr:
+  // Normal min/max instructions are not commutative because of NaN and signed
+  // zero semantics, but these are. Thus, there's no need to check for global
+  // relaxed math; the instructions themselves have the properties we need.
+  case X86::MAXCPDrr:
+  case X86::MAXCPSrr:
+  case X86::MAXCSDrr:
+  case X86::MAXCSSrr:
+  case X86::MINCPDrr:
+  case X86::MINCPSrr:
+  case X86::MINCSDrr:
+  case X86::MINCSSrr:
+  case X86::VMAXCPDrr:
+  case X86::VMAXCPSrr:
+  case X86::VMAXCPDYrr:
+  case X86::VMAXCPSYrr:
+  case X86::VMAXCSDrr:
+  case X86::VMAXCSSrr:
+  case X86::VMINCPDrr:
+  case X86::VMINCPSrr:
+  case X86::VMINCPDYrr:
+  case X86::VMINCPSYrr:
+  case X86::VMINCSDrr:
+  case X86::VMINCSSrr:
+    return true;
+  case X86::ADDPDrr:
+  case X86::ADDPSrr:
   case X86::ADDSDrr:
   case X86::ADDSSrr:
-  case X86::VADDSDrr:
-  case X86::VADDSSrr:
+  case X86::MULPDrr:
+  case X86::MULPSrr:
   case X86::MULSDrr:
   case X86::MULSSrr:
+  case X86::VADDPDrr:
+  case X86::VADDPSrr:
+  case X86::VADDPDYrr:
+  case X86::VADDPSYrr:
+  case X86::VADDSDrr:
+  case X86::VADDSSrr:
+  case X86::VMULPDrr:
+  case X86::VMULPSrr:
+  case X86::VMULPDYrr:
+  case X86::VMULPSYrr:
   case X86::VMULSDrr:
   case X86::VMULSSrr:
-    return true;
+    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
   default:
     return false;
   }
 }
 
-/// Return true if the input instruction is part of a chain of dependent ops
-/// that are suitable for reassociation, otherwise return false.
-/// If the instruction's operands must be commuted to have a previous
-/// instruction of the same type define the first source operand, Commuted will
-/// be set to true.
-static bool isReassocCandidate(const MachineInstr &Inst, bool &Commuted) {
-  // 1. The operation must be associative and commutative.
-  // 2. The instruction must have virtual register definitions for its
-  //    operands in the same basic block.
-  // 3. The instruction must have a reassociable sibling.
-  if (isAssociativeAndCommutative(Inst.getOpcode()) &&
-      hasVirtualRegDefsInBasicBlock(Inst, Inst.getParent()) &&
-      hasReassocSibling(Inst, Commuted))
-    return true;
+/// This is an architecture-specific helper function of reassociateOps.
+/// Set special operand attributes for new instructions after reassociation.
+void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
+                                         MachineInstr &OldMI2,
+                                         MachineInstr &NewMI1,
+                                         MachineInstr &NewMI2) const {
+  // Integer instructions define an implicit EFLAGS source register operand as
+  // the third source (fourth total) operand.
+  if (OldMI1.getNumOperands() != 4 || OldMI2.getNumOperands() != 4)
+    return;
 
-  return false;
+  assert(NewMI1.getNumOperands() == 4 && NewMI2.getNumOperands() == 4 &&
+         "Unexpected instruction type for reassociation");
+
+  MachineOperand &OldOp1 = OldMI1.getOperand(3);
+  MachineOperand &OldOp2 = OldMI2.getOperand(3);
+  MachineOperand &NewOp1 = NewMI1.getOperand(3);
+  MachineOperand &NewOp2 = NewMI2.getOperand(3);
+
+  assert(OldOp1.isReg() && OldOp1.getReg() == X86::EFLAGS && OldOp1.isDead() &&
+         "Must have dead EFLAGS operand in reassociable instruction");
+  assert(OldOp2.isReg() && OldOp2.getReg() == X86::EFLAGS && OldOp2.isDead() &&
+         "Must have dead EFLAGS operand in reassociable instruction");
+
+  (void)OldOp1;
+  (void)OldOp2;
+
+  assert(NewOp1.isReg() && NewOp1.getReg() == X86::EFLAGS &&
+         "Unexpected operand in reassociable instruction");
+  assert(NewOp2.isReg() && NewOp2.getReg() == X86::EFLAGS &&
+         "Unexpected operand in reassociable instruction");
+
+  // Mark the new EFLAGS operands as dead to be helpful to subsequent iterations
+  // of this pass or other passes. The EFLAGS operands must be dead in these new
+  // instructions because the EFLAGS operands in the original instructions must
+  // be dead in order for reassociation to occur.
+  NewOp1.setIsDead();
+  NewOp2.setIsDead();
 }
 
-// FIXME: This has the potential to be expensive (compile time) while not
-// improving the code at all. Some ways to limit the overhead:
-// 1. Track successful transforms; bail out if hit rate gets too low.
-// 2. Only enable at -O3 or some other non-default optimization level.
-// 3. Pre-screen pattern candidates here: if an operand of the previous
-//    instruction is known to not increase the critical path, then don't match
-//    that pattern.
-bool X86InstrInfo::getMachineCombinerPatterns(MachineInstr &Root,
-        SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &Patterns) const {
-  if (!Root.getParent()->getParent()->getTarget().Options.UnsafeFPMath)
-    return false;
-
-  // TODO: There is nothing x86-specific here except the instruction type.
-  // This logic could be hoisted into the machine combiner pass itself.
-
-  // Look for this reassociation pattern:
-  //   B = A op X (Prev)
-  //   C = B op Y (Root)
-
-  bool Commute;
-  if (isReassocCandidate(Root, Commute)) {
-    // We found a sequence of instructions that may be suitable for a
-    // reassociation of operands to increase ILP. Specify each commutation
-    // possibility for the Prev instruction in the sequence and let the
-    // machine combiner decide if changing the operands is worthwhile.
-    if (Commute) {
-      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_YB);
-      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_YB);
-    } else {
-      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_AX_BY);
-      Patterns.push_back(MachineCombinerPattern::MC_REASSOC_XA_BY);
-    }
-    return true;
-  }
-
-  return false;
+std::pair<unsigned, unsigned>
+X86InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
+  return std::make_pair(TF, 0u);
 }
 
-/// Attempt the following reassociation to reduce critical path length:
-///   B = A op X (Prev)
-///   C = B op Y (Root)
-///   ===>
-///   B = X op Y
-///   C = A op B
-static void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
-                           MachineCombinerPattern::MC_PATTERN Pattern,
-                           SmallVectorImpl<MachineInstr *> &InsInstrs,
-                           SmallVectorImpl<MachineInstr *> &DelInstrs,
-                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
-  MachineFunction *MF = Root.getParent()->getParent();
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-  const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-  const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI);
-
-  // This array encodes the operand index for each parameter because the
-  // operands may be commuted. Each row corresponds to a pattern value,
-  // and each column specifies the index of A, B, X, Y.
-  unsigned OpIdx[4][4] = {
-    { 1, 1, 2, 2 },
-    { 1, 2, 2, 1 },
-    { 2, 1, 1, 2 },
-    { 2, 2, 1, 1 }
-  };
-
-  MachineOperand &OpA = Prev.getOperand(OpIdx[Pattern][0]);
-  MachineOperand &OpB = Root.getOperand(OpIdx[Pattern][1]);
-  MachineOperand &OpX = Prev.getOperand(OpIdx[Pattern][2]);
-  MachineOperand &OpY = Root.getOperand(OpIdx[Pattern][3]);
-  MachineOperand &OpC = Root.getOperand(0);
-
-  unsigned RegA = OpA.getReg();
-  unsigned RegB = OpB.getReg();
-  unsigned RegX = OpX.getReg();
-  unsigned RegY = OpY.getReg();
-  unsigned RegC = OpC.getReg();
-
-  if (TargetRegisterInfo::isVirtualRegister(RegA))
-    MRI.constrainRegClass(RegA, RC);
-  if (TargetRegisterInfo::isVirtualRegister(RegB))
-    MRI.constrainRegClass(RegB, RC);
-  if (TargetRegisterInfo::isVirtualRegister(RegX))
-    MRI.constrainRegClass(RegX, RC);
-  if (TargetRegisterInfo::isVirtualRegister(RegY))
-    MRI.constrainRegClass(RegY, RC);
-  if (TargetRegisterInfo::isVirtualRegister(RegC))
-    MRI.constrainRegClass(RegC, RC);
-
-  // Create a new virtual register for the result of (X op Y) instead of
-  // recycling RegB because the MachineCombiner's computation of the critical
-  // path requires a new register definition rather than an existing one.
-  unsigned NewVR = MRI.createVirtualRegister(RC);
-  InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
-
-  unsigned Opcode = Root.getOpcode();
-  bool KillA = OpA.isKill();
-  bool KillX = OpX.isKill();
-  bool KillY = OpY.isKill();
-
-  // Create new instructions for insertion.
-  MachineInstrBuilder MIB1 =
-    BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR)
-      .addReg(RegX, getKillRegState(KillX))
-      .addReg(RegY, getKillRegState(KillY));
-  InsInstrs.push_back(MIB1);
-
-  MachineInstrBuilder MIB2 =
-    BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC)
-      .addReg(RegA, getKillRegState(KillA))
-      .addReg(NewVR, getKillRegState(true));
-  InsInstrs.push_back(MIB2);
-
-  // Record old instructions for deletion.
-  DelInstrs.push_back(&Prev);
-  DelInstrs.push_back(&Root);
-}
-
-void X86InstrInfo::genAlternativeCodeSequence(
-    MachineInstr &Root,
-    MachineCombinerPattern::MC_PATTERN Pattern,
-    SmallVectorImpl<MachineInstr *> &InsInstrs,
-    SmallVectorImpl<MachineInstr *> &DelInstrs,
-    DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const {
-  MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo();
-
-  // Select the previous instruction in the sequence based on the input pattern.
-  MachineInstr *Prev = nullptr;
-  switch (Pattern) {
-    case MachineCombinerPattern::MC_REASSOC_AX_BY:
-    case MachineCombinerPattern::MC_REASSOC_XA_BY:
-      Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
-      break;
-    case MachineCombinerPattern::MC_REASSOC_AX_YB:
-    case MachineCombinerPattern::MC_REASSOC_XA_YB:
-      Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
-  }
-  assert(Prev && "Unknown pattern for machine combiner");
-
-  reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
-  return;
+ArrayRef<std::pair<unsigned, const char *>>
+X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
+  using namespace X86II;
+  static const std::pair<unsigned, const char *> TargetFlags[] = {
+      {MO_GOT_ABSOLUTE_ADDRESS, "x86-got-absolute-address"},
+      {MO_PIC_BASE_OFFSET, "x86-pic-base-offset"},
+      {MO_GOT, "x86-got"},
+      {MO_GOTOFF, "x86-gotoff"},
+      {MO_GOTPCREL, "x86-gotpcrel"},
+      {MO_PLT, "x86-plt"},
+      {MO_TLSGD, "x86-tlsgd"},
+      {MO_TLSLD, "x86-tlsld"},
+      {MO_TLSLDM, "x86-tlsldm"},
+      {MO_GOTTPOFF, "x86-gottpoff"},
+      {MO_INDNTPOFF, "x86-indntpoff"},
+      {MO_TPOFF, "x86-tpoff"},
+      {MO_DTPOFF, "x86-dtpoff"},
+      {MO_NTPOFF, "x86-ntpoff"},
+      {MO_GOTNTPOFF, "x86-gotntpoff"},
+      {MO_DLLIMPORT, "x86-dllimport"},
+      {MO_DARWIN_STUB, "x86-darwin-stub"},
+      {MO_DARWIN_NONLAZY, "x86-darwin-nonlazy"},
+      {MO_DARWIN_NONLAZY_PIC_BASE, "x86-darwin-nonlazy-pic-base"},
+      {MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, "x86-darwin-hidden-nonlazy-pic-base"},
+      {MO_TLVP, "x86-tlvp"},
+      {MO_TLVP_PIC_BASE, "x86-tlvp-pic-base"},
+      {MO_SECREL, "x86-secrel"}};
+  return makeArrayRef(TargetFlags);
 }
 
 namespace {
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index bf63336c7005..9d40334206b2 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -23,22 +23,10 @@
 #include "X86GenInstrInfo.inc"
 
 namespace llvm {
+  class MachineInstrBuilder;
   class X86RegisterInfo;
   class X86Subtarget;
 
-  namespace MachineCombinerPattern {
-    enum MC_PATTERN : int {
-      // These are commutative variants for reassociating a computation chain
-      // of the form:
-      //   B = A op X (Prev)
-      //   C = B op Y (Root)
-      MC_REASSOC_AX_BY = 0,
-      MC_REASSOC_AX_YB = 1,
-      MC_REASSOC_XA_BY = 2,
-      MC_REASSOC_XA_YB = 3,
-    };
-  } // end namespace MachineCombinerPattern
-
 namespace X86 {
   // X86 specific condition code. These correspond to X86_*_COND in
   // X86InstrInfo.td. They must be kept in synch.
@@ -259,14 +247,64 @@ public:
                                       MachineBasicBlock::iterator &MBBI,
                                       LiveVariables *LV) const override;
 
-  /// commuteInstruction - We have a few instructions that must be hacked on to
-  /// commute them.
+  /// Returns true iff the routine could find two commutable operands in the
+  /// given machine instruction.
+  /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their
+  /// input values can be re-defined in this method only if the input values
+  /// are not pre-defined, which is designated by the special value
+  /// 'CommuteAnyOperandIndex' assigned to it.
+  /// If both of indices are pre-defined and refer to some operands, then the
+  /// method simply returns true if the corresponding operands are commutable
+  /// and returns false otherwise.
   ///
-  MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
-
+  /// For example, calling this method this way:
+  ///     unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+  ///     findCommutedOpIndices(MI, Op1, Op2);
+  /// can be interpreted as a query asking to find an operand that would be
+  /// commutable with the operand#1.
   bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
+  /// Returns true if the routine could find two commutable operands
+  /// in the given FMA instruction. Otherwise, returns false.
+  ///
+  /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
+  /// The output indices of the commuted operands are returned in these
+  /// arguments. Also, the input values of these arguments may be preset either
+  /// to indices of operands that must be commuted or be equal to a special
+  /// value 'CommuteAnyOperandIndex' which means that the corresponding
+  /// operand index is not set and this method is free to pick any of
+  /// available commutable operands.
+  ///
+  /// For example, calling this method this way:
+  ///     unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
+  ///     findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+  /// can be interpreted as a query asking if the operand #1 can be swapped
+  /// with any other available operand (e.g. operand #2, operand #3, etc.).
+  ///
+  /// The returned FMA opcode may differ from the opcode in the given MI.
+  /// For example, commuting the operands #1 and #3 in the following FMA
+  ///     FMA213 #1, #2, #3
+  /// results into instruction with adjusted opcode:
+  ///     FMA231 #3, #2, #1
+  bool findFMA3CommutedOpIndices(MachineInstr *MI,
+                                 unsigned &SrcOpIdx1,
+                                 unsigned &SrcOpIdx2) const;
+
+  /// Returns an adjusted FMA opcode that must be used in FMA instruction that
+  /// performs the same computations as the given MI but which has the operands
+  /// \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+  /// It may return 0 if it is unsafe to commute the operands.
+  ///
+  /// The returned FMA opcode may differ from the opcode in the given \p MI.
+  /// For example, commuting the operands #1 and #3 in the following FMA
+  ///     FMA213 #1, #2, #3
+  /// results into instruction with adjusted opcode:
+  ///     FMA231 #3, #2, #1
+  unsigned getFMA3OpcodeToCommuteOperands(MachineInstr *MI,
+                                          unsigned SrcOpIdx1,
+                                          unsigned SrcOpIdx2) const;
+
   // Branch analysis.
   bool isUnpredicatedTerminator(const MachineInstr* MI) const override;
   bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -342,11 +380,6 @@ public:
                                       MachineBasicBlock::iterator InsertPt,
                                       MachineInstr *LoadMI) const override;
 
-  /// canFoldMemoryOperand - Returns true if the specified load / store is
-  /// folding is possible.
-  bool canFoldMemoryOperand(const MachineInstr *,
-                            ArrayRef<unsigned>) const override;
-
   /// unfoldMemoryOperand - Separate a single instruction which folded a load or
   /// a store or a load and a store into two or more instruction. If this is
   /// possible, returns true as well as the new instructions by reference.
@@ -406,10 +439,9 @@ public:
   bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator I) const;
 
-  static bool isX86_64ExtendedReg(const MachineOperand &MO) {
-    if (!MO.isReg()) return false;
-    return X86II::isX86_64ExtendedReg(MO.getReg());
-  }
+  /// True if MI has a condition code def, e.g. EFLAGS, that is
+  /// not marked dead.
+  bool hasLiveCondCodeDef(MachineInstr *MI) const;
 
   /// getGlobalBaseReg - Return a virtual register initialized with the
   /// the global base register value. Output instructions required to
@@ -452,26 +484,19 @@ public:
                              const MachineInstr *DefMI, unsigned DefIdx,
                              const MachineInstr *UseMI,
                              unsigned UseIdx) const override;
-
   
   bool useMachineCombiner() const override {
     return true;
   }
-  
-  /// Return true when there is potentially a faster code sequence
-  /// for an instruction chain ending in <Root>. All potential patterns are
-  /// output in the <Pattern> array.
-  bool getMachineCombinerPatterns(
-      MachineInstr &Root,
-      SmallVectorImpl<MachineCombinerPattern::MC_PATTERN> &P) const override;
-  
-  /// When getMachineCombinerPatterns() finds a pattern, this function generates
-  /// the instructions that could replace the original code sequence.
-  void genAlternativeCodeSequence(
-          MachineInstr &Root, MachineCombinerPattern::MC_PATTERN P,
-          SmallVectorImpl<MachineInstr *> &InsInstrs,
-          SmallVectorImpl<MachineInstr *> &DelInstrs,
-          DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+  bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
+
+  bool hasReassociableOperands(const MachineInstr &Inst,
+                               const MachineBasicBlock *MBB) const override;
+
+  void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
+                             MachineInstr &NewMI1,
+                             MachineInstr &NewMI2) const override;
 
   /// analyzeCompare - For a comparison instruction, return the source registers
   /// in SrcReg and SrcReg2 if having two register operands, and the value it
@@ -500,16 +525,49 @@ public:
                                   unsigned &FoldAsLoadDefReg,
                                   MachineInstr *&DefMI) const override;
 
+  std::pair<unsigned, unsigned>
+  decomposeMachineOperandsTargetFlags(unsigned TF) const override;
+
+  ArrayRef<std::pair<unsigned, const char *>>
+  getSerializableDirectMachineOperandTargetFlags() const override;
+
+protected:
+  /// Commutes the operands in the given instruction by changing the operands
+  /// order and/or changing the instruction's opcode and/or the immediate value
+  /// operand.
+  ///
+  /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+  /// to be commuted.
+  ///
+  /// Do not call this method for a non-commutable instruction or
+  /// non-commutable operands.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  MachineInstr *commuteInstructionImpl(MachineInstr *MI, bool NewMI,
+                                       unsigned CommuteOpIdx1,
+                                       unsigned CommuteOpIdx2) const override;
+
 private:
   MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
                                               MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
 
+  /// Handles memory folding for special case instructions, for instance those
+  /// requiring custom manipulation of the address.
+  MachineInstr *foldMemoryOperandCustom(MachineFunction &MF, MachineInstr *MI,
+                                        unsigned OpNum,
+                                        ArrayRef<MachineOperand> MOs,
+                                        MachineBasicBlock::iterator InsertPt,
+                                        unsigned Size, unsigned Align) const;
+
   /// isFrameOperand - Return true and the FrameIndex if the specified
   /// operand and follow operands form a reference to the stack frame.
   bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
                       int &FrameIndex) const;
+
+  /// Expand the MOVImmSExti8 pseudo-instructions.
+  bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 52bab9c79b45..f4ca2b880bad 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -106,8 +106,6 @@ def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
 
-def SDT_X86WIN_FTOL : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-
 def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
@@ -158,6 +156,8 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair,
 
 def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
                         [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
+                        [SDNPHasChain, SDNPOptInGlue]>;
 
 def X86vastart_save_xmm_regs :
                  SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
@@ -250,9 +250,6 @@ def X86SegAlloca : SDNode<"X86ISD::SEG_ALLOCA", SDT_X86SEG_ALLOCA,
 def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
                         [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-def X86WinFTOL : SDNode<"X86ISD::WIN_FTOL", SDT_X86WIN_FTOL,
-                        [SDNPHasChain, SDNPOutGlue]>;
-
 //===----------------------------------------------------------------------===//
 // X86 Operand Definitions.
 //
@@ -344,18 +341,21 @@ def vy64xmem : X86VMemOperand<VR256X, "printi64mem", X86MemVY64XOperand>;
 def vz32mem  : X86VMemOperand<VR512,  "printi32mem", X86MemVZ32Operand>;
 def vz64mem  : X86VMemOperand<VR512,  "printi64mem", X86MemVZ64Operand>;
 
-// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
-// plain GR64, so that it doesn't potentially require a REX prefix.
-def i8mem_NOREX : Operand<i64> {
+// A version of i8mem for use on x86-64 and x32 that uses a NOREX GPR instead
+// of a plain GPR, so that it doesn't potentially require a REX prefix.
+def ptr_rc_norex : PointerLikeRegClass<2>;
+def ptr_rc_norex_nosp : PointerLikeRegClass<3>;
+
+def i8mem_NOREX : Operand<iPTR> {
   let PrintMethod = "printi8mem";
-  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+  let MIOperandInfo = (ops ptr_rc_norex, i8imm, ptr_rc_norex_nosp, i32imm, i8imm);
   let ParserMatchClass = X86Mem8AsmOperand;
   let OperandType = "OPERAND_MEMORY";
 }
 
 // GPRs available for tailcall.
 // It represents GR32_TC, GR64_TC or GR64_TCW64.
-def ptr_rc_tailcall : PointerLikeRegClass<2>;
+def ptr_rc_tailcall : PointerLikeRegClass<4>;
 
 // Special i32mem for addresses of load folding tail calls. These are not
 // allowed to use callee-saved registers since they must be scheduled
@@ -697,34 +697,34 @@ def lea64mem : Operand<i64> {
 // X86 Complex Pattern Definitions.
 //
 
-// Define X86 specific addressing mode.
-def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
-def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+// Define X86-specific addressing mode.
+def addr      : ComplexPattern<iPTR, 5, "selectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "selectLEAAddr",
                                [add, sub, mul, X86mul_imm, shl, or, frameindex],
                                []>;
 // In 64-bit mode 32-bit LEAs can use RIP-relative addressing.
-def lea64_32addr : ComplexPattern<i32, 5, "SelectLEA64_32Addr",
+def lea64_32addr : ComplexPattern<i32, 5, "selectLEA64_32Addr",
                                   [add, sub, mul, X86mul_imm, shl, or,
                                    frameindex, X86WrapperRIP],
                                   []>;
 
-def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32addr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
-def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+def tls32baseaddr : ComplexPattern<i32, 5, "selectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
-def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+def lea64addr : ComplexPattern<i64, 5, "selectLEAAddr",
                         [add, sub, mul, X86mul_imm, shl, or, frameindex,
                          X86WrapperRIP], []>;
 
-def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64addr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
-def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
                                [tglobaltlsaddr], []>;
 
-def vectoraddr : ComplexPattern<iPTR, 5, "SelectVectorAddr", [],[SDNPWantParent]>;
+def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
@@ -767,12 +767,21 @@ def HasDQI       : Predicate<"Subtarget->hasDQI()">,
 def NoDQI        : Predicate<"!Subtarget->hasDQI()">;
 def HasBWI       : Predicate<"Subtarget->hasBWI()">,
                      AssemblerPredicate<"FeatureBWI", "AVX-512 BW ISA">;
+def NoBWI        : Predicate<"!Subtarget->hasBWI()">;
 def HasVLX       : Predicate<"Subtarget->hasVLX()">,
                      AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;
 def NoVLX        : Predicate<"!Subtarget->hasVLX()">;
+def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
+def PKU        : Predicate<"!Subtarget->hasPKU()">;
 
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
 def HasAES       : Predicate<"Subtarget->hasAES()">;
+def HasFXSR      : Predicate<"Subtarget->hasFXSR()">;
+def HasXSAVE     : Predicate<"Subtarget->hasXSAVE()">;
+def HasXSAVEOPT  : Predicate<"Subtarget->hasXSAVEOPT()">;
+def HasXSAVEC    : Predicate<"Subtarget->hasXSAVEC()">;
+def HasXSAVES    : Predicate<"Subtarget->hasXSAVES()">;
 def HasPCLMUL    : Predicate<"Subtarget->hasPCLMUL()">;
 def HasFMA       : Predicate<"Subtarget->hasFMA()">;
 def UseFMAOnAVX  : Predicate<"Subtarget->hasFMA() && !Subtarget->hasAVX512()">;
@@ -794,6 +803,7 @@ def HasSHA       : Predicate<"Subtarget->hasSHA()">;
 def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasRDSEED    : Predicate<"Subtarget->hasRDSEED()">;
 def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasLAHFSAHF  : Predicate<"Subtarget->hasLAHFSAHF()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def HasMPX       : Predicate<"Subtarget->hasMPX()">;
@@ -812,6 +822,8 @@ def In32BitMode  : Predicate<"Subtarget->is32Bit()">,
                              AssemblerPredicate<"Mode32Bit", "32-bit mode">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
+def NotWin64WithoutFP : Predicate<"!Subtarget->isTargetWin64() ||"
+                                  "Subtarget->getFrameLowering()->hasFP(*MF)">;
 def IsPS4        : Predicate<"Subtarget->isTargetPS4()">;
 def NotPS4       : Predicate<"!Subtarget->isTargetPS4()">;
 def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
@@ -825,6 +837,7 @@ def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
 def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
 def IsNotPIC     : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
 def OptForSize   : Predicate<"OptForSize">;
+def OptForMinSize : Predicate<"OptForMinSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
@@ -867,20 +880,54 @@ def X86_COND_E_OR_NE : ImmLeaf<i8, [{
 }]>;
 
 
-def i16immSExt8  : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
-def i32immSExt8  : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
-def i64immSExt8  : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+def i16immSExt8  : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
+def i32immSExt8  : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
+def i64immSExt8  : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
+
+// If we have multiple users of an immediate, it's much smaller to reuse
+// the register, rather than encode the immediate in every instruction.
+// This has the risk of increasing register pressure from stretched live
+// ranges, however, the immediates should be trivial to rematerialize by
+// the RA in the event of high register pressure.
+// TODO : This is currently enabled for stores and binary ops. There are more
+// cases for which this can be enabled, though this catches the bulk of the
+// issues.
+// TODO2 : This should really also be enabled under O2, but there's currently
+// an issue with RA where we don't pull the constants into their users
+// when we rematerialize them. I'll follow-up on enabling O2 after we fix that
+// issue.
+// TODO3 : This is currently limited to single basic blocks (DAG creation
+// pulls block immediates to the top and merges them if necessary).
+// Eventually, it would be nice to allow ConstantHoisting to merge constants
+// globally for potentially added savings.
+//
+def imm8_su : PatLeaf<(i8 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm16_su : PatLeaf<(i16 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm32_su : PatLeaf<(i32 imm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+
+def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
 
 
-def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
 
 
 // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
 // unsigned field.
-def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
+def i64immZExt32 : ImmLeaf<i64, [{ return isUInt<32>(Imm); }]>;
 
 def i64immZExt32SExt8 : ImmLeaf<i64, [{
-  return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
+  return isUInt<32>(Imm) && isInt<8>(static_cast<int32_t>(Imm));
 }]>;
 
 // Helper fragments for loads.
@@ -914,11 +961,12 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
   return false;
 }]>;
 
-def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadi8   : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
+def loadi64  : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32  : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64  : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80  : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+def loadf128 : PatFrag<(ops node:$ptr), (f128 (load node:$ptr))>;
 
 def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
 def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -1020,12 +1068,8 @@ def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
                  IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
 def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[],
                  IIC_PUSH_REG>, OpSize16;
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
-                 IIC_PUSH_MEM>, OpSize16;
 def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[],
                  IIC_PUSH_REG>, OpSize32, Requires<[Not64BitMode]>;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
-                 IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
 
 def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
                    "push{w}\t$imm", [], IIC_PUSH_IMM>, OpSize16;
@@ -1039,6 +1083,14 @@ def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
                    "push{l}\t$imm", [], IIC_PUSH_IMM>, OpSize32,
                    Requires<[Not64BitMode]>;
 } // mayStore, SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[],
+                 IIC_PUSH_MEM>, OpSize16;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[],
+                 IIC_PUSH_MEM>, OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+
 }
 
 let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
@@ -1071,9 +1123,11 @@ def PUSH64r  : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
                  IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
 def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", [],
                  IIC_PUSH_REG>, OpSize32, Requires<[In64BitMode]>;
+} // mayStore, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteRMW] in {
 def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", [],
                  IIC_PUSH_MEM>, OpSize32, Requires<[In64BitMode]>;
-} // mayStore, SchedRW
+} // mayLoad, mayStore, SchedRW
 }
 
 let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
@@ -1275,13 +1329,13 @@ def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
 let SchedRW = [WriteStore] in {
 def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
                    "mov{b}\t{$src, $dst|$dst, $src}",
-                   [(store (i8 imm:$src), addr:$dst)], IIC_MOV_MEM>;
+                   [(store (i8 imm8_su:$src), addr:$dst)], IIC_MOV_MEM>;
 def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
                    "mov{w}\t{$src, $dst|$dst, $src}",
-                   [(store (i16 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
+                   [(store (i16 imm16_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize16;
 def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(store (i32 imm:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
+                   [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
 def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
                        [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
@@ -1457,10 +1511,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
 let SchedRW = [WriteALU] in {
 let Defs = [EFLAGS], Uses = [AH] in
 def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf",
-                 [(set EFLAGS, (X86sahf AH))], IIC_AHF>;
+                 [(set EFLAGS, (X86sahf AH))], IIC_AHF>,
+               Requires<[HasLAHFSAHF]>;
 let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", [],
-                IIC_AHF>;  // AH = flags
+                IIC_AHF>,  // AH = flags
+               Requires<[HasLAHFSAHF]>;
 } // SchedRW
 
 //===----------------------------------------------------------------------===//
@@ -1894,37 +1950,38 @@ def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", [], IIC_CLTS>, TB;
 }
 
 // Table lookup instructions
+let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
 def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", [], IIC_XLAT>,
            Sched<[WriteLoad]>;
 
 let SchedRW = [WriteMicrocoded] in {
 // ASCII Adjust After Addition
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
 def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", [], IIC_AAA>,
             Requires<[Not64BitMode]>;
 
 // ASCII Adjust AX Before Division
-// sets AL, AH and EFLAGS and uses AL and AH
+let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in
 def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
                  "aad\t$src", [], IIC_AAD>, Requires<[Not64BitMode]>;
 
 // ASCII Adjust AX After Multiply
-// sets AL, AH and EFLAGS and uses AL
+let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in
 def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
                  "aam\t$src", [], IIC_AAM>, Requires<[Not64BitMode]>;
 
 // ASCII Adjust AL After Subtraction - sets
-// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
 def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", [], IIC_AAS>,
             Requires<[Not64BitMode]>;
 
 // Decimal Adjust AL after Addition
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
 def DAA : I<0x27, RawFrm, (outs), (ins), "daa", [], IIC_DAA>,
             Requires<[Not64BitMode]>;
 
 // Decimal Adjust AL after Subtraction
-// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
 def DAS : I<0x2F, RawFrm, (outs), (ins), "das", [], IIC_DAS>,
             Requires<[Not64BitMode]>;
 } // SchedRW
@@ -2356,6 +2413,32 @@ defm T1MSKC  : tbm_binary_intr<0x01, "t1mskc", MRM7r, MRM7m>;
 defm TZMSK   : tbm_binary_intr<0x01, "tzmsk", MRM4r, MRM4m>;
 } // HasTBM, EFLAGS
 
+//===----------------------------------------------------------------------===//
+// MONITORX/MWAITX Instructions
+//
+let SchedRW = [WriteSystem] in {
+let Uses = [EAX, ECX, EDX] in
+def MONITORXrrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", [],
+                    IIC_SSE_MONITOR>, TB;
+let Uses = [ECX, EAX, EBX] in
+def MWAITXrr   : I<0x01, MRM_FB, (outs), (ins), "mwaitx", [], IIC_SSE_MWAIT>,
+                 TB;
+} // SchedRW
+
+def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrr)>, Requires<[Not64BitMode]>;
+def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORXrrr)>,
+      Requires<[Not64BitMode]>;
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORXrrr)>,
+      Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// CLZERO Instruction
+//
+let Uses = [EAX] in
+def CLZEROr : I<0x01, MRM_FC, (outs), (ins), "clzero", []>, TB;
+
 //===----------------------------------------------------------------------===//
 // Pattern fragments to auto generate TBM instructions.
 //===----------------------------------------------------------------------===//
@@ -2498,8 +2581,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
 def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
 def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
 
-def : MnemonicAlias<"loopz",  "loope",  "att">;
-def : MnemonicAlias<"loopnz", "loopne", "att">;
+def : MnemonicAlias<"loopz",  "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
 
 def : MnemonicAlias<"pop",   "popw",  "att">, Requires<[In16BitMode]>;
 def : MnemonicAlias<"pop",   "popl",  "att">, Requires<[In32BitMode]>;
@@ -2532,14 +2615,15 @@ def : MnemonicAlias<"pusha",  "pushaw", "att">, Requires<[In16BitMode]>;
 def : MnemonicAlias<"popa",   "popal",  "att">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"pusha",  "pushal", "att">, Requires<[In32BitMode]>;
 
-def : MnemonicAlias<"repe",  "rep",   "att">;
-def : MnemonicAlias<"repz",  "rep",   "att">;
-def : MnemonicAlias<"repnz", "repne", "att">;
+def : MnemonicAlias<"repe",  "rep">;
+def : MnemonicAlias<"repz",  "rep">;
+def : MnemonicAlias<"repnz", "repne">;
 
 def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
 def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
 
+def : MnemonicAlias<"sal", "shl", "intel">;
 def : MnemonicAlias<"salb", "shlb", "att">;
 def : MnemonicAlias<"salw", "shlw", "att">;
 def : MnemonicAlias<"sall", "shll", "att">;
@@ -2579,14 +2663,14 @@ def : MnemonicAlias<"fcmova",   "fcmovnbe", "att">;
 def : MnemonicAlias<"fcmovnae", "fcmovb",   "att">;
 def : MnemonicAlias<"fcmovna",  "fcmovbe",  "att">;
 def : MnemonicAlias<"fcmovae",  "fcmovnb",  "att">;
-def : MnemonicAlias<"fcomip",   "fcompi",   "att">;
+def : MnemonicAlias<"fcomip",   "fcompi">;
 def : MnemonicAlias<"fildq",    "fildll",   "att">;
 def : MnemonicAlias<"fistpq",   "fistpll",  "att">;
 def : MnemonicAlias<"fisttpq",  "fisttpll", "att">;
 def : MnemonicAlias<"fldcww",   "fldcw",    "att">;
 def : MnemonicAlias<"fnstcww",  "fnstcw",   "att">;
 def : MnemonicAlias<"fnstsww",  "fnstsw",   "att">;
-def : MnemonicAlias<"fucomip",  "fucompi",  "att">;
+def : MnemonicAlias<"fucomip",  "fucompi">;
 def : MnemonicAlias<"fwait",    "wait">;
 
 def : MnemonicAlias<"fxsaveq",   "fxsave64",   "att">;
@@ -2594,7 +2678,9 @@ def : MnemonicAlias<"fxrstorq",  "fxrstor64",  "att">;
 def : MnemonicAlias<"xsaveq",    "xsave64",    "att">;
 def : MnemonicAlias<"xrstorq",   "xrstor64",   "att">;
 def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
-
+def : MnemonicAlias<"xrstorsq",  "xrstors64",  "att">;
+def : MnemonicAlias<"xsavecq",   "xsavec64",   "att">;
+def : MnemonicAlias<"xsavesq",   "xsaves64",   "att">;
 
 class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
                     string VariantName>
@@ -2640,8 +2726,8 @@ defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">;
 //===----------------------------------------------------------------------===//
 
 // aad/aam default to base 10 if no operand is specified.
-def : InstAlias<"aad", (AAD8i8 10)>;
-def : InstAlias<"aam", (AAM8i8 10)>;
+def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>;
+def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
 
 // Disambiguate the mem/imm form of bt-without-a-suffix as btl.
 // Likewise for btc/btr/bts.
@@ -2719,8 +2805,10 @@ def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>;
 // Various unary fpstack operations default to operating on on ST1.
 // For example, "fxch" -> "fxch %st(1)"
 def : InstAlias<"faddp",        (ADD_FPrST0  ST1), 0>;
+def:  InstAlias<"fadd",         (ADD_FPrST0  ST1), 0>;
 def : InstAlias<"fsub{|r}p",    (SUBR_FPrST0 ST1), 0>;
 def : InstAlias<"fsub{r|}p",    (SUB_FPrST0  ST1), 0>;
+def : InstAlias<"fmul",         (MUL_FPrST0  ST1), 0>;
 def : InstAlias<"fmulp",        (MUL_FPrST0  ST1), 0>;
 def : InstAlias<"fdiv{|r}p",    (DIVR_FPrST0 ST1), 0>;
 def : InstAlias<"fdiv{r|}p",    (DIV_FPrST0  ST1), 0>;
@@ -2798,20 +2886,20 @@ def : InstAlias<"jmp {*}$dst",      (JMP16m  i16mem:$dst), 0>, Requires<[In16Bit
 
 
 // "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imulw {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imull {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imulq {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
 
 // inb %dx -> inb %al, %dx
 def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
 def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;
 def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>;
-def : InstAlias<"inb\t$port", (IN8ri i8imm:$port), 0>;
-def : InstAlias<"inw\t$port", (IN16ri i8imm:$port), 0>;
-def : InstAlias<"inl\t$port", (IN32ri i8imm:$port), 0>;
+def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>;
+def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>;
+def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
 
 
 // jmp and call aliases for lcall and ljmp.  jmp $42,$5 -> ljmp
@@ -2861,9 +2949,9 @@ def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:
 def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>;
 def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>;
 def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>;
-def : InstAlias<"outb\t$port", (OUT8ir i8imm:$port), 0>;
-def : InstAlias<"outw\t$port", (OUT16ir i8imm:$port), 0>;
-def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>;
+def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>;
+def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>;
+def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>;
 
 // 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
 // effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
@@ -2940,3 +3028,34 @@ def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
 def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
                 (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>;
 def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
+
+// These aliases exist to get the parser to prioritize matching 8-bit
+// immediate encodings over matching the implicit ax/eax/rax encodings. By
+// explicitly mentioning the A register here, these entries will be ordered
+// first due to the more explicit immediate type.
+def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}",  (OR16ri8 AX,  i16i8imm:$imm), 0>;
+def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>;
+
+def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}",  (OR32ri8 EAX,  i32i8imm:$imm), 0>;
+def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>;
+
+def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}",  (OR64ri8 RAX,  i64i8imm:$imm), 0>;
+def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index eaa7894004cb..11dc1e7d466b 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -249,6 +249,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
                           (MMX_X86movd2w (x86mmx VR64:$src)))],
                           IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>;
 
+let isBitcast = 1 in
 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
                              "movd\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst, (bitconvert GR64:$src))],
@@ -262,7 +263,7 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst),
 // These are 64 bit moves, but since the OS X assembler doesn't
 // recognize a register-register movq, we write them as
 // movd.
-let SchedRW = [WriteMove] in {
+let SchedRW = [WriteMove], isBitcast = 1 in {
 def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
                                (outs GR64:$dst), (ins VR64:$src),
                                "movd\t{$src, $dst|$dst, $src}",
@@ -303,7 +304,7 @@ def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
                              (ins VR128:$src), "movdq2q\t{$src, $dst|$dst, $src}",
                              [(set VR64:$dst,
                                (x86mmx (bitconvert
-                               (i64 (vector_extract (v2i64 VR128:$src),
+                               (i64 (extractelt (v2i64 VR128:$src),
                                      (iPTR 0))))))],
                              IIC_MMX_MOVQ_RR>;
 
@@ -326,6 +327,7 @@ def MMX_MOVFR642Qrr: MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
 }
 } // SchedRW
 
+let Predicates = [HasSSE1] in
 def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
                          "movntq\t{$src, $dst|$dst, $src}",
                          [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)],
@@ -355,6 +357,7 @@ defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w,
                                    MMX_INTALU_ITINS, 1>;
 defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d,
                                    MMX_INTALU_ITINS, 1>;
+let Predicates = [HasSSE2] in
 defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q,
                                    MMX_INTALUQ_ITINS, 1>;
 defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b,
@@ -382,6 +385,7 @@ defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w,
                                    MMX_INTALU_ITINS>;
 defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d,
                                    MMX_INTALU_ITINS>;
+let Predicates = [HasSSE2] in
 defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q,
                                    MMX_INTALUQ_ITINS>;
 
@@ -408,8 +412,10 @@ defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w,
 
 defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,
                                      MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE1] in
 defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
                                      MMX_PMUL_ITINS, 1>;
+let Predicates = [HasSSE2] in
 defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
                                      MMX_PMUL_ITINS, 1>;
 let isCommutable = 1 in
@@ -422,6 +428,7 @@ defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
 
 defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
                                      int_x86_ssse3_pmadd_ub_sw, MMX_PMUL_ITINS>;
+let Predicates = [HasSSE1] in {
 defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b,
                                      MMX_MISC_FUNC_ITINS, 1>;
 defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w,
@@ -439,6 +446,7 @@ defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w,
 
 defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw,
                                      MMX_PSADBW_ITINS, 1>;
+}
 
 defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b,
                                         MMX_MISC_FUNC_ITINS>;
@@ -594,6 +602,7 @@ let Constraints = "$src1 = $dst" in {
 }
 
 // Extract / Insert
+let Predicates = [HasSSE1] in
 def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
                        (outs GR32orGR64:$dst), (ins VR64:$src1, i32u8imm:$src2),
                        "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -601,6 +610,7 @@ def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
                                                imm:$src2))],
                        IIC_MMX_PEXTR>, Sched<[WriteShuffle]>;
 let Constraints = "$src1 = $dst" in {
+let Predicates = [HasSSE1] in {
   def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
                       (outs VR64:$dst),
                       (ins VR64:$src1, GR32orGR64:$src2, i32u8imm:$src3),
@@ -618,8 +628,10 @@ let Constraints = "$src1 = $dst" in {
                                        imm:$src3))],
                      IIC_MMX_PINSRW>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
+}
 
 // Mask creation
+let Predicates = [HasSSE1] in
 def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
                           (ins VR64:$src),
                           "pmovmskb\t{$src, $dst|$dst, $src}",
@@ -639,12 +651,12 @@ def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
 
 // Misc.
 let SchedRW = [WriteShuffle] in {
-let Uses = [EDI] in
+let Uses = [EDI], Predicates = [HasSSE1,In32BitMode] in
 def MMX_MASKMOVQ : MMXI32<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                           "maskmovq\t{$mask, $src|$src, $mask}",
                           [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)],
                           IIC_MMX_MASKMOV>;
-let Uses = [RDI] in
+let Uses = [RDI], Predicates = [HasSSE1,In64BitMode] in
 def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)],
@@ -653,10 +665,6 @@ def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
 
 // 64-bit bit convert.
 let Predicates = [HasSSE2] in {
-def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
-          (MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
-          (MMX_MOVD64from64rr VR64:$src)>;
 def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
           (MMX_MOVQ2FR64rr VR64:$src)>;
 def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 99386b0658ad..7a44212bd829 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -330,9 +330,9 @@ multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
 //===----------------------------------------------------------------------===//
 
 // A vector extract of the first f32/f64 position is a subregister copy
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+def : Pat<(f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
           (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>;
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+def : Pat<(f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
           (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>;
 
 // A 128-bit subvector extract from the first 256-bit vector position
@@ -413,6 +413,8 @@ let Predicates = [HasSSE2] in {
   def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
   def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
   def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(f128  (bitconvert (i128  FR128:$src))), (f128  FR128:$src)>;
+  def : Pat<(i128  (bitconvert (f128  FR128:$src))), (i128  FR128:$src)>;
 }
 
 // Bitcasts between 256-bit vector types. Return the original type since
@@ -650,10 +652,10 @@ let Predicates = [UseAVX] in {
   }
 
   // Extract and store.
-  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+  def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
                    addr:$dst),
             (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>;
-  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+  def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
                    addr:$dst),
             (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>;
 
@@ -736,7 +738,7 @@ let Predicates = [UseSSE1] in {
   }
 
   // Extract and store.
-  def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+  def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
                    addr:$dst),
             (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>;
 
@@ -770,7 +772,7 @@ let Predicates = [UseSSE2] in {
   }
 
   // Extract and store.
-  def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+  def : Pat<(store (f64 (extractelt (v2f64 VR128:$src), (iPTR 0))),
                    addr:$dst),
             (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>;
 
@@ -935,22 +937,6 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
                             IIC_SSE_MOVU_P_RR>, VEX, VEX_L;
 }
 
-let Predicates = [HasAVX] in {
-def : Pat<(v8i32 (X86vzmovl
-                  (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))),
-          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4i64 (X86vzmovl
-                  (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))),
-          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v8f32 (X86vzmovl
-                  (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))),
-          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-def : Pat<(v4f64 (X86vzmovl
-                  (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))),
-          (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>;
-}
-
-
 def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
           (VMOVUPSYmr addr:$dst, VR256:$src)>;
 def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
@@ -1172,12 +1158,13 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
 
 multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
                                  string base_opc, InstrItinClass itin> {
-  defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+  let Predicates = [UseAVX] in
+    defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
                                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                                     itin>, VEX_4V;
 
-let Constraints = "$src1 = $dst" in
-  defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
+  let Constraints = "$src1 = $dst" in
+    defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
                                     "\t{$src2, $dst|$dst, $src2}",
                                     itin>;
 }
@@ -1188,29 +1175,31 @@ let AddedComplexity = 20 in {
 }
 
 let SchedRW = [WriteStore] in {
+let Predicates = [UseAVX] in {
 def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                   [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>, VEX;
 def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                   [(store (f64 (extractelt (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>, VEX;
+}// UseAVX
 def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                   [(store (f64 (extractelt (bc_v2f64 (v4f32 VR128:$src)),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>;
 def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movlpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                   [(store (f64 (extractelt (v2f64 VR128:$src),
                                  (iPTR 0))), addr:$dst)],
                                  IIC_SSE_MOV_LH>;
 } // SchedRW
 
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
   // Shuffle with VMOVLPS
   def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
             (VMOVLPSrm VR128:$src1, addr:$src2)>;
@@ -1243,7 +1232,7 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [UseSSE1] in {
   // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)),
+  def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
                                  (iPTR 0))), addr:$src1),
             (MOVLPSmr addr:$src1, VR128:$src2)>;
 
@@ -1297,31 +1286,33 @@ let AddedComplexity = 20 in {
 let SchedRW = [WriteStore] in {
 // v2f64 extract element 1 is always custom lowered to unpack high to low
 // and extract element 0 so the non-store version isn't too horrible.
+let Predicates = [UseAVX] in {
 def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
+                   [(store (f64 (extractelt
                                  (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
                                             (bc_v2f64 (v4f32 VR128:$src))),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
 def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
+                   [(store (f64 (extractelt
                                  (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX;
+} // UseAVX
 def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhps\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
+                   [(store (f64 (extractelt
                                  (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)),
                                             (bc_v2f64 (v4f32 VR128:$src))),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
 def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
                    "movhpd\t{$src, $dst|$dst, $src}",
-                   [(store (f64 (vector_extract
+                   [(store (f64 (extractelt
                                  (v2f64 (X86Unpckh VR128:$src, VR128:$src)),
                                  (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>;
 } // SchedRW
 
-let Predicates = [HasAVX] in {
+let Predicates = [UseAVX] in {
   // VMOVHPS patterns
   def : Pat<(X86Movlhps VR128:$src1,
                  (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
@@ -1345,7 +1336,7 @@ let Predicates = [HasAVX] in {
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
-  def : Pat<(store (f64 (vector_extract
+  def : Pat<(store (f64 (extractelt
                           (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (VMOVHPDmr addr:$dst, VR128:$src)>;
@@ -1377,7 +1368,7 @@ let Predicates = [UseSSE2] in {
                       (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
 
-  def : Pat<(store (f64 (vector_extract
+  def : Pat<(store (f64 (extractelt
                           (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (MOVHPDmr addr:$dst, VR128:$src)>;
@@ -2073,15 +2064,17 @@ def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
 let Predicates = [HasAVX] in {
-  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
-            (VCVTDQ2PSrr VR128:$src)>;
-  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (VCVTDQ2PSrm addr:$src)>;
-
   def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
             (VCVTDQ2PSrr VR128:$src)>;
   def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
             (VCVTDQ2PSrm addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+            (VCVTDQ2PSrr VR128:$src)>;
+  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
+            (VCVTDQ2PSrm addr:$src)>;
 
   def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
             (VCVTTPS2DQrr VR128:$src)>;
@@ -2149,7 +2142,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
 def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
             (VCVTTPD2DQYrr VR256:$src)>;
   def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
@@ -2306,7 +2299,9 @@ let Predicates = [HasAVX] in {
             (VCVTDQ2PSYrr VR256:$src)>;
   def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
             (VCVTDQ2PSYrm addr:$src)>;
+}
 
+let Predicates = [HasAVX, NoVLX] in {
   // Match fround and fextend for 128/256-bit conversions
   def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
             (VCVTPD2PSrr VR128:$src)>;
@@ -2452,9 +2447,9 @@ let Defs = [EFLAGS] in {
   defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
                                   "ucomisd">, PD, VEX, VEX_LIG;
   let Pattern = []<dag> in {
-    defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+    defm VCOMISS  : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
                                     "comiss">, PS, VEX, VEX_LIG;
-    defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+    defm VCOMISD  : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
                                     "comisd">, PD, VEX, VEX_LIG;
   }
 
@@ -2475,9 +2470,9 @@ let Defs = [EFLAGS] in {
                                   "ucomisd">, PD;
 
   let Pattern = []<dag> in {
-    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+    defm COMISS  : sse12_ord_cmp<0x2F, FR32, undef, f32, f32mem, loadf32,
                                     "comiss">, PS;
-    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+    defm COMISD  : sse12_ord_cmp<0x2F, FR64, undef, f64, f64mem, loadf64,
                                     "comisd">, PD;
   }
 
@@ -2605,19 +2600,20 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
             Sched<[WriteFShuffle]>;
 }
 
-defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
+let Predicates = [HasAVX, NoVLX] in {
+  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv4f32, SSEPackedSingle>, PS, VEX_4V;
-defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
            "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L;
-defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
+  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv2f64, SSEPackedDouble>, PD, VEX_4V;
-defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
            "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}
 let Constraints = "$src1 = $dst" in {
   defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
                     "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
@@ -2627,7 +2623,7 @@ let Constraints = "$src1 = $dst" in {
                     memopv2f64, SSEPackedDouble>, PD;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (X86Shufp VR128:$src1,
                        (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))),
             (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
@@ -2694,6 +2690,7 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
              Sched<[WriteFShuffleLd, ReadAfterLd]>;
 }
 
+let Predicates = [HasAVX, NoVLX] in {
 defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32,
       VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SSEPackedSingle>, PS, VEX_4V;
@@ -2719,7 +2716,7 @@ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32,
 defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64,
       VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      SSEPackedDouble>, PD, VEX_4V, VEX_L;
-
+}// Predicates = [HasAVX, NoVLX]
 let Constraints = "$src1 = $dst" in {
   defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32,
         VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
@@ -2845,8 +2842,8 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
                          ValueType OpVT128, ValueType OpVT256,
-                         OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX, NoVLX] in
+                         OpndItins itins, bit IsCommutable = 0, Predicate prd> {
+let Predicates = [HasAVX, prd] in
   defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
                     VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
 
@@ -2854,7 +2851,7 @@ let Constraints = "$src1 = $dst" in
   defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
                            memopv2i64, i128mem, itins, IsCommutable, 1>;
 
-let Predicates = [HasAVX2, NoVLX] in
+let Predicates = [HasAVX2, prd] in
   defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
                                OpVT256, VR256, loadv4i64, i256mem, itins,
                                IsCommutable, 0>, VEX_4V, VEX_L;
@@ -2863,13 +2860,13 @@ let Predicates = [HasAVX2, NoVLX] in
 // These are ordered here for pattern ordering requirements with the fp versions
 
 defm PAND  : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64,
-                           SSE_VEC_BIT_ITINS_P, 1>;
+                           SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
 defm POR   : PDI_binop_all<0xEB, "por", or, v2i64, v4i64,
-                           SSE_VEC_BIT_ITINS_P, 1>;
+                           SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
 defm PXOR  : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64,
-                           SSE_VEC_BIT_ITINS_P, 1>;
+                           SSE_VEC_BIT_ITINS_P, 1, NoVLX>;
 defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
-                           SSE_VEC_BIT_ITINS_P, 0>;
+                           SSE_VEC_BIT_ITINS_P, 0, NoVLX>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Logical Instructions
@@ -2911,7 +2908,7 @@ let isCodeGenOnly = 1 in {
 // Multiclass for vectors using the X86 logical operation aliases for FP.
 multiclass sse12_fp_packed_vector_logical_alias<
     bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
-  let Predicates = [HasAVX, NoVLX] in {
+  let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
   defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
               VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
               PS, VEX_4V;
@@ -2923,7 +2920,7 @@ multiclass sse12_fp_packed_vector_logical_alias<
   defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
         VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
         PS, VEX_4V, VEX_L;
-        
+
   defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
         VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
         PD, VEX_4V, VEX_L;
@@ -3183,7 +3180,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [UseSSE1] in {
     // extracted scalar math op with insert via movss
     def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
           FR32:$src))))),
       (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
           (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3198,7 +3195,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [UseSSE41] in {
     // extracted scalar math op with insert via blend
     def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
           FR32:$src))), (i8 1))),
       (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst,
           (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3215,7 +3212,7 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [HasAVX] in {
     // extracted scalar math op with insert via blend
     def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
-          (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
+          (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
           FR32:$src))), (i8 1))),
       (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
           (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3241,7 +3238,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [UseSSE2] in {
     // extracted scalar math op with insert via movsd
     def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
-          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
           FR64:$src))))),
       (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
           (COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3256,7 +3253,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [UseSSE41] in {
     // extracted scalar math op with insert via blend
     def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
-          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
           FR64:$src))), (i8 1))),
       (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst,
           (COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3271,14 +3268,14 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
   let Predicates = [HasAVX] in {
     // extracted scalar math op with insert via movsd
     def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
-          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
           FR64:$src))))),
       (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
           (COPY_TO_REGCLASS FR64:$src, VR128))>;
 
     // extracted scalar math op with insert via blend
     def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
-          (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
+          (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
           FR64:$src))), (i8 1))),
       (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst,
           (COPY_TO_REGCLASS FR64:$src, VR128))>;
@@ -3449,8 +3446,8 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
 
 /// sse1_fp_unop_p - SSE1 unops in packed form.
 multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                          OpndItins itins> {
-let Predicates = [HasAVX] in {
+                          OpndItins itins, list<Predicate> prds> {
+let Predicates = prds in {
   def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        !strconcat("v", OpcodeStr,
                                   "ps\t{$src, $dst|$dst, $src}"),
@@ -3546,16 +3543,16 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 // Square root.
 defm SQRT  : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>,
-             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>,
+             sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS, [HasAVX]>,
              sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>,
              sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>;
 
 // Reciprocal approximations. Note that these typically require refinement
 // in order to obtain suitable precision.
 defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>,
-             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>;
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS, [HasAVX, NoVLX] >;
 defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>,
-             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>;
+             sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP, [HasAVX, NoVLX]>;
 
 // There is no f64 version of the reciprocal approximation instructions.
 
@@ -4018,39 +4015,43 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
 } // ExeDomain = SSEPackedInt
 
 defm PADDB   : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PADDW   : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PADDD   : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX>;
 defm PADDQ   : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
-                             SSE_INTALUQ_ITINS_P, 1>;
+                             SSE_INTALUQ_ITINS_P, 1, NoVLX>;
 defm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
-                             SSE_INTMUL_ITINS_P, 1>;
+                             SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
-                             SSE_INTMUL_ITINS_P, 1>;
+                             SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMULHW  : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
-                             SSE_INTMUL_ITINS_P, 1>;
+                             SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PSUBB   : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PSUBW   : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PSUBD   : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX>;
 defm PSUBQ   : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64,
-                             SSE_INTALUQ_ITINS_P, 0>;
+                             SSE_INTALUQ_ITINS_P, 0, NoVLX>;
 defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PMINUB  : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMINSW  : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMAXUB  : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMAXSW  : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGB   : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
+defm PAVGW   : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 
 // Intrinsic forms
 defm PSUBSB  : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b,
@@ -4067,26 +4068,18 @@ defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
                                  int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
 defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
                                  int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
-defm PAVGB   : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
-                                 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>;
-defm PAVGW   : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w,
-                                 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>;
-defm PSADBW  : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw,
-                                 int_x86_avx2_psad_bw, SSE_PMADD, 1>;
-
-let Predicates = [HasAVX2] in
-  def : Pat<(v32i8 (X86psadbw (v32i8 VR256:$src1),
-                              (v32i8 VR256:$src2))),
-            (VPSADBWYrr VR256:$src2, VR256:$src1)>;
 
 let Predicates = [HasAVX] in
-  def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
-                              (v16i8 VR128:$src2))),
-            (VPSADBWrr VR128:$src2, VR128:$src1)>;
-
-def : Pat<(v16i8 (X86psadbw (v16i8 VR128:$src1),
-                            (v16i8 VR128:$src2))),
-          (PSADBWrr VR128:$src2, VR128:$src1)>;
+defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
+                             loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                             VEX_4V;
+let Predicates = [HasAVX2] in
+defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
+                             loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                             VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
+                            memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>;
 
 let Predicates = [HasAVX] in
 defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
@@ -4105,9 +4098,6 @@ defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
 //===---------------------------------------------------------------------===//
 
 let Predicates = [HasAVX, NoVLX] in {
-defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
                             VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4115,9 +4105,6 @@ defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
                             VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
                             VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
@@ -4125,14 +4112,26 @@ defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
                             VR128, v2i64, v2i64, bc_v2i64, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 
-defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
 defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                             VR128, v4i32, v4i32, bc_v4i32, loadv2i64,
                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX]
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+                            VR128, v8i16, v8i16, bc_v8i16, loadv2i64,
+                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
+
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
+                                    Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
   // 128-bit logical shifts.
   def VPSLLDQri : PDIi8<0x73, MRM7r,
                     (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
@@ -4147,13 +4146,9 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in {
                       (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
                     VEX_4V;
   // PSRADQri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
 
 let Predicates = [HasAVX2, NoVLX] in {
-defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
                              VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4161,9 +4156,6 @@ defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
                              VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
                              VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
@@ -4171,14 +4163,25 @@ defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
                              VR256, v4i64, v2i64, bc_v2i64, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 
-defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
 defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
                              VR256, v8i32, v4i32, bc_v4i32, loadv2i64,
                              SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX]
 
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
+                             VR256, v16i16, v8i16, bc_v8i16, loadv2i64,
+                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
+}// Predicates = [HasAVX2, NoVLX_Or_NoBWI]
+
+let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 ,
+                                    Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   // 256-bit logical shifts.
   def VPSLLDQYri : PDIi8<0x73, MRM7r,
                     (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
@@ -4193,8 +4196,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
                       (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
                     VEX_4V, VEX_L;
   // PSRADQYri doesn't exist in SSE[1-3].
-}
-} // Predicates = [HasAVX2]
+} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
 
 let Constraints = "$src1 = $dst" in {
 defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
@@ -4247,17 +4249,17 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
 //===---------------------------------------------------------------------===//
 
 defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32,
-                             SSE_INTALU_ITINS_P, 1>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX>;
 defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
 defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32,
-                             SSE_INTALU_ITINS_P, 0>;
+                             SSE_INTALU_ITINS_P, 0, NoVLX>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Shuffle Instructions
@@ -4511,40 +4513,43 @@ multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
       Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl,
                                  bc_v16i8, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl,
                                  bc_v8i16, loadv2i64, 0>, VEX_4V;
-  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
-                                 bc_v4i32, loadv2i64, 0>, VEX_4V;
-  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
-                                 bc_v2i64, loadv2i64, 0>, VEX_4V;
-
   defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh,
                                  bc_v16i8, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh,
                                  bc_v8i16, loadv2i64, 0>, VEX_4V;
+}
+let Predicates = [HasAVX, NoVLX] in {
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl,
+                                 bc_v4i32, loadv2i64, 0>, VEX_4V;
+  defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl,
+                                 bc_v2i64, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh,
                                  bc_v4i32, loadv2i64, 0>, VEX_4V;
   defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh,
                                  bc_v2i64, loadv2i64, 0>, VEX_4V;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   defm VPUNPCKLBW  : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl,
                                    bc_v32i8>, VEX_4V, VEX_L;
   defm VPUNPCKLWD  : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl,
                                    bc_v16i16>, VEX_4V, VEX_L;
-  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
-                                   bc_v8i32>, VEX_4V, VEX_L;
-  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
-                                   bc_v4i64>, VEX_4V, VEX_L;
-
   defm VPUNPCKHBW  : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh,
                                    bc_v32i8>, VEX_4V, VEX_L;
   defm VPUNPCKHWD  : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh,
                                    bc_v16i16>, VEX_4V, VEX_L;
+}
+let Predicates = [HasAVX2, NoVLX] in {
+  defm VPUNPCKLDQ  : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl,
+                                   bc_v8i32>, VEX_4V, VEX_L;
+  defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl,
+                                   bc_v4i64>, VEX_4V, VEX_L;
   defm VPUNPCKHDQ  : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh,
                                    bc_v8i32>, VEX_4V, VEX_L;
   defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh,
@@ -4600,7 +4605,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
 }
 
 // Extract
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
 def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
                     (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
                     "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4615,7 +4620,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
                Sched<[WriteShuffleLd, ReadAfterLd]>;
 
 // Insert
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
 defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V;
 
 let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
@@ -4683,7 +4688,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
 } // ExeDomain = SSEPackedInt
 
 //===---------------------------------------------------------------------===//
-// SSE2 - Move Doubleword
+// SSE2 - Move Doubleword/Quadword
 //===---------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------===//
@@ -4770,23 +4775,23 @@ let isCodeGenOnly = 1 in {
 //
 def VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
-                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                       [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX,
                     Sched<[WriteMove]>;
 def VMOVPDI2DImr  : VS2I<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
-                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                       [(store (i32 (extractelt (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
                                      VEX, Sched<[WriteStore]>;
 def MOVPDI2DIrr  : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
-                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                       [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
                    Sched<[WriteMove]>;
 def MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
-                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                       [(store (i32 (extractelt (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)],
                                      IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
@@ -4808,24 +4813,25 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
 let SchedRW = [WriteMove] in {
 def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movq\t{$src, $dst|$dst, $src}",
-                          [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
-                                                           (iPTR 0)))],
+                          [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
+                                                        (iPTR 0)))],
                                                            IIC_SSE_MOVD_ToGP>,
                       VEX;
 
 def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
-                        [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+                        [(set GR64:$dst, (extractelt (v2i64 VR128:$src),
                                                          (iPTR 0)))],
                                                          IIC_SSE_MOVD_ToGP>;
 } //SchedRW
 
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst),
-                          (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}",
+def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs),
+                          (ins i64mem:$dst, VR128:$src),
+                          "movq\t{$src, $dst|$dst, $src}",
                           [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>;
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
-def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src),
+def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 
@@ -4883,30 +4889,18 @@ let isCodeGenOnly = 1 in {
                         IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
 }
 
-//===---------------------------------------------------------------------===//
-// Patterns and instructions to describe movd/movq to XMM register zero-extends
-//
-let isCodeGenOnly = 1, SchedRW = [WriteMove] in {
-let AddedComplexity = 15 in {
-def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                       "movq\t{$src, $dst|$dst, $src}", // X86-64 only
-                       [(set VR128:$dst, (v2i64 (X86vzmovl
-                                      (v2i64 (scalar_to_vector GR64:$src)))))],
-                                      IIC_SSE_MOVDQ>,
-                                      VEX, VEX_W;
-def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
-                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
-                       [(set VR128:$dst, (v2i64 (X86vzmovl
-                                      (v2i64 (scalar_to_vector GR64:$src)))))],
-                                      IIC_SSE_MOVDQ>;
-}
-} // isCodeGenOnly, SchedRW
-
 let Predicates = [UseAVX] in {
-  let AddedComplexity = 15 in
+  let AddedComplexity = 15 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
               (VMOVDI2PDIrr GR32:$src)>;
 
+    def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+              (VMOV64toPQIrr GR64:$src)>;
+
+    def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+                (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+              (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>;
+  }
   // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
   // These instructions also write zeros in the high part of a 256-bit register.
   let AddedComplexity = 20 in {
@@ -4924,16 +4918,16 @@ let Predicates = [UseAVX] in {
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>;
-  def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
-                               (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2] in {
-  let AddedComplexity = 15 in
+  let AddedComplexity = 15 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
               (MOVDI2PDIrr GR32:$src)>;
 
+    def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+              (MOV64toPQIrr GR64:$src)>;
+  }
   let AddedComplexity = 20 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
               (MOVDI2PDIrm addr:$src)>;
@@ -4985,12 +4979,12 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
 let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in {
 def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
-                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                      [(store (i64 (extractelt (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>, VEX;
 def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                       "movq\t{$src, $dst|$dst, $src}",
-                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                      [(store (i64 (extractelt (v2i64 VR128:$src),
                                     (iPTR 0))), addr:$dst)],
                                     IIC_SSE_MOVDQ>;
 } // ExeDomain, SchedRW
@@ -5119,7 +5113,7 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
                       IIC_SSE_MOV_LH>, Sched<[WriteLoad]>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   defm VMOVSHDUP  : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
                                        v4f32, VR128, loadv4f32, f128mem>, VEX;
   defm VMOVSLDUP  : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
@@ -5134,7 +5128,7 @@ defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
 defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128,
                                    memopv4f32, f128mem>;
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v4i32 (X86Movshdup VR128:$src)),
             (VMOVSHDUPrr VR128:$src)>;
   def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))),
@@ -5190,21 +5184,30 @@ def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
 def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                     [(set VR256:$dst,
-                      (v4f64 (X86Movddup
-                              (scalar_to_vector (loadf64 addr:$src)))))]>,
+                      (v4f64 (X86Movddup (loadv4f64 addr:$src))))]>,
                     Sched<[WriteLoad]>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
   defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L;
 }
 
 defm MOVDDUP : sse3_replicate_dfp<"movddup">;
 
-let Predicates = [HasAVX] in {
+
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(X86Movddup (loadv2f64 addr:$src)),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+
+  // 256-bit version
+  def : Pat<(X86Movddup (loadv4i64 addr:$src)),
+            (VMOVDDUPYrm addr:$src)>;
+  def : Pat<(X86Movddup (v4i64 VR256:$src)),
+            (VMOVDDUPYrr VR256:$src)>;
+}
+
+let Predicates = [HasAVX] in {
   def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
   def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))),
@@ -5212,16 +5215,6 @@ let Predicates = [HasAVX] in {
   def : Pat<(X86Movddup (bc_v2f64
                              (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
-
-  // 256-bit version
-  def : Pat<(X86Movddup (loadv4f64 addr:$src)),
-            (VMOVDDUPYrm addr:$src)>;
-  def : Pat<(X86Movddup (loadv4i64 addr:$src)),
-            (VMOVDDUPYrm addr:$src)>;
-  def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))),
-            (VMOVDDUPYrm addr:$src)>;
-  def : Pat<(X86Movddup (v4i64 VR256:$src)),
-            (VMOVDDUPYrr VR256:$src)>;
 }
 
 let Predicates = [UseAVX, OptForSize] in {
@@ -5791,37 +5784,37 @@ let Predicates = [HasAVX2] in
 let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
   defm PALIGN : ssse3_palignr<"palignr">;
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
 def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+          (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
 def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+          (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
 def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+          (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
 def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))),
-          (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>;
+          (VPALIGNR256rr VR256:$src1, VR256:$src2, imm:$imm)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
 def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (VPALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 }
 
 let Predicates = [UseSSSE3] in {
 def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))),
-          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+          (PALIGNR128rr VR128:$src1, VR128:$src2, imm:$imm)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -6145,7 +6138,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
                                                  imm:$src2)))), addr:$dst)]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
   defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
 
 defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
@@ -6170,7 +6163,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
                                                   imm:$src2)))), addr:$dst)]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
   defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
 
 defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
@@ -6194,7 +6187,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
   defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
 
 defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
@@ -6217,7 +6210,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
                           addr:$dst)]>, REX_W;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
   defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
 
 defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
@@ -6285,7 +6278,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
                    imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoBWI] in
   defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
@@ -6311,7 +6304,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
                           imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
   defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
 let Constraints = "$src1 = $dst" in
   defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -6337,7 +6330,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
                           imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoDQI] in
   defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
 let Constraints = "$src1 = $dst" in
   defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -6543,71 +6536,71 @@ let Predicates = [HasAVX] in {
 
 let Predicates = [UseAVX] in {
   def : Pat<(ffloor FR32:$src),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
   def : Pat<(f64 (ffloor FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
   def : Pat<(f32 (fnearbyint FR32:$src)),
             (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
   def : Pat<(f64 (fnearbyint FR64:$src)),
             (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
   def : Pat<(f32 (fceil FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
   def : Pat<(f64 (fceil FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
   def : Pat<(f32 (frint FR32:$src)),
             (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
   def : Pat<(f64 (frint FR64:$src)),
             (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
   def : Pat<(f32 (ftrunc FR32:$src)),
-            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+            (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
   def : Pat<(f64 (ftrunc FR64:$src)),
-            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+            (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
 }
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f32 (ffloor VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x1))>;
+            (VROUNDPSr VR128:$src, (i32 0x9))>;
   def : Pat<(v4f32 (fnearbyint VR128:$src)),
             (VROUNDPSr VR128:$src, (i32 0xC))>;
   def : Pat<(v4f32 (fceil VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x2))>;
+            (VROUNDPSr VR128:$src, (i32 0xA))>;
   def : Pat<(v4f32 (frint VR128:$src)),
             (VROUNDPSr VR128:$src, (i32 0x4))>;
   def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (VROUNDPSr VR128:$src, (i32 0x3))>;
+            (VROUNDPSr VR128:$src, (i32 0xB))>;
 
   def : Pat<(v2f64 (ffloor VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x1))>;
+            (VROUNDPDr VR128:$src, (i32 0x9))>;
   def : Pat<(v2f64 (fnearbyint VR128:$src)),
             (VROUNDPDr VR128:$src, (i32 0xC))>;
   def : Pat<(v2f64 (fceil VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x2))>;
+            (VROUNDPDr VR128:$src, (i32 0xA))>;
   def : Pat<(v2f64 (frint VR128:$src)),
             (VROUNDPDr VR128:$src, (i32 0x4))>;
   def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (VROUNDPDr VR128:$src, (i32 0x3))>;
+            (VROUNDPDr VR128:$src, (i32 0xB))>;
 
   def : Pat<(v8f32 (ffloor VR256:$src)),
-            (VROUNDYPSr VR256:$src, (i32 0x1))>;
+            (VROUNDYPSr VR256:$src, (i32 0x9))>;
   def : Pat<(v8f32 (fnearbyint VR256:$src)),
             (VROUNDYPSr VR256:$src, (i32 0xC))>;
   def : Pat<(v8f32 (fceil VR256:$src)),
-            (VROUNDYPSr VR256:$src, (i32 0x2))>;
+            (VROUNDYPSr VR256:$src, (i32 0xA))>;
   def : Pat<(v8f32 (frint VR256:$src)),
             (VROUNDYPSr VR256:$src, (i32 0x4))>;
   def : Pat<(v8f32 (ftrunc VR256:$src)),
-            (VROUNDYPSr VR256:$src, (i32 0x3))>;
+            (VROUNDYPSr VR256:$src, (i32 0xB))>;
 
   def : Pat<(v4f64 (ffloor VR256:$src)),
-            (VROUNDYPDr VR256:$src, (i32 0x1))>;
+            (VROUNDYPDr VR256:$src, (i32 0x9))>;
   def : Pat<(v4f64 (fnearbyint VR256:$src)),
             (VROUNDYPDr VR256:$src, (i32 0xC))>;
   def : Pat<(v4f64 (fceil VR256:$src)),
-            (VROUNDYPDr VR256:$src, (i32 0x2))>;
+            (VROUNDYPDr VR256:$src, (i32 0xA))>;
   def : Pat<(v4f64 (frint VR256:$src)),
             (VROUNDYPDr VR256:$src, (i32 0x4))>;
   def : Pat<(v4f64 (ftrunc VR256:$src)),
-            (VROUNDYPDr VR256:$src, (i32 0x3))>;
+            (VROUNDYPDr VR256:$src, (i32 0xB))>;
 }
 
 defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
@@ -6619,47 +6612,47 @@ defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
 
 let Predicates = [UseSSE41] in {
   def : Pat<(ffloor FR32:$src),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>;
+            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
   def : Pat<(f64 (ffloor FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>;
+            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
   def : Pat<(f32 (fnearbyint FR32:$src)),
             (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
   def : Pat<(f64 (fnearbyint FR64:$src)),
             (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
   def : Pat<(f32 (fceil FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>;
+            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
   def : Pat<(f64 (fceil FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>;
+            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
   def : Pat<(f32 (frint FR32:$src)),
             (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
   def : Pat<(f64 (frint FR64:$src)),
             (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
   def : Pat<(f32 (ftrunc FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>;
+            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
   def : Pat<(f64 (ftrunc FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>;
+            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
 
   def : Pat<(v4f32 (ffloor VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x1))>;
+            (ROUNDPSr VR128:$src, (i32 0x9))>;
   def : Pat<(v4f32 (fnearbyint VR128:$src)),
             (ROUNDPSr VR128:$src, (i32 0xC))>;
   def : Pat<(v4f32 (fceil VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x2))>;
+            (ROUNDPSr VR128:$src, (i32 0xA))>;
   def : Pat<(v4f32 (frint VR128:$src)),
             (ROUNDPSr VR128:$src, (i32 0x4))>;
   def : Pat<(v4f32 (ftrunc VR128:$src)),
-            (ROUNDPSr VR128:$src, (i32 0x3))>;
+            (ROUNDPSr VR128:$src, (i32 0xB))>;
 
   def : Pat<(v2f64 (ffloor VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x1))>;
+            (ROUNDPDr VR128:$src, (i32 0x9))>;
   def : Pat<(v2f64 (fnearbyint VR128:$src)),
             (ROUNDPDr VR128:$src, (i32 0xC))>;
   def : Pat<(v2f64 (fceil VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x2))>;
+            (ROUNDPDr VR128:$src, (i32 0xA))>;
   def : Pat<(v2f64 (frint VR128:$src)),
             (ROUNDPDr VR128:$src, (i32 0x4))>;
   def : Pat<(v2f64 (ftrunc VR128:$src)),
-            (ROUNDPDr VR128:$src, (i32 0x3))>;
+            (ROUNDPDr VR128:$src, (i32 0xB))>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7815,13 +7808,7 @@ def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
 // VBROADCAST - Load from memory and broadcast to all elements of the
 //              destination operand
 //
-class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                    X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> :
-  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
-        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-        [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX;
-
-class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
                            X86MemOperand x86memop, ValueType VT,
                            PatFrag ld_frag, SchedWrite Sched> :
   AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
@@ -7832,38 +7819,33 @@ class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
 }
 
 // AVX2 adds register forms
-class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                         Intrinsic Int, SchedWrite Sched> :
+class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                        ValueType ResVT, ValueType OpVT, SchedWrite Sched> :
   AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
          !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-         [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX;
+         [(set RC:$dst, (ResVT (X86VBroadcast (OpVT VR128:$src))))]>,
+         Sched<[Sched]>, VEX;
 
 let ExeDomain = SSEPackedSingle in {
-  def VBROADCASTSSrm  : avx_broadcast_no_int<0x18, "vbroadcastss", VR128,
+  def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
                                              f32mem, v4f32, loadf32, WriteLoad>;
-  def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256,
+  def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
                                              f32mem, v8f32, loadf32,
                                              WriteFShuffleLd>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrm  : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem,
+def VBROADCASTSDYrm  : avx_broadcast_rm<0x19, "vbroadcastsd", VR256, f64mem,
                                     v4f64, loadf64, WriteFShuffleLd>, VEX_L;
-def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
-                                   int_x86_avx_vbroadcastf128_pd_256,
-                                   WriteFShuffleLd>, VEX_L;
 
 let ExeDomain = SSEPackedSingle in {
-  def VBROADCASTSSrr  : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
-                                           int_x86_avx2_vbroadcast_ss_ps,
-                                           WriteFShuffle>;
-  def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
-                                      int_x86_avx2_vbroadcast_ss_ps_256,
-                                      WriteFShuffle256>, VEX_L;
+  def VBROADCASTSSrr  : avx2_broadcast_rr<0x18, "vbroadcastss", VR128,
+                                          v4f32, v4f32, WriteFShuffle>;
+  def VBROADCASTSSYrr : avx2_broadcast_rr<0x18, "vbroadcastss", VR256,
+                                          v8f32, v4f32, WriteFShuffle256>, VEX_L;
 }
 let ExeDomain = SSEPackedDouble in
-def VBROADCASTSDYrr  : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
-                                      int_x86_avx2_vbroadcast_sd_pd_256,
-                                      WriteFShuffle256>, VEX_L;
+def VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
+                                         v4f64, v2f64, WriteFShuffle256>, VEX_L;
 
 let mayLoad = 1, Predicates = [HasAVX2] in
 def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
@@ -7871,6 +7853,13 @@ def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
                            "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
                            Sched<[WriteLoad]>, VEX, VEX_L;
 
+def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
+                           (ins f128mem:$src),
+                           "vbroadcastf128\t{$src, $dst|$dst, $src}",
+                           [(set VR256:$dst,
+                              (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>,
+                           Sched<[WriteFShuffleLd]>, VEX, VEX_L;
+
 let Predicates = [HasAVX] in
 def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
           (VBROADCASTF128 addr:$src)>;
@@ -7891,7 +7880,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
           []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
                                    (iPTR imm)),
           (VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -8080,17 +8069,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
                              (bitconvert (i_frag addr:$src2))))]>, VEX_4V,
              Sched<[WriteFShuffleLd, ReadAfterLd]>;
 
-  def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+  let Predicates = [HasAVX, NoVLX] in {
+    def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
              (ins RC:$src1, u8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX,
              Sched<[WriteFShuffle]>;
-  def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+    def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
              (ins x86memop_f:$src1, u8imm:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst,
                (vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX,
              Sched<[WriteFShuffleLd]>;
+  }// Predicates = [HasAVX, NoVLX]
 }
 
 let ExeDomain = SSEPackedSingle in {
@@ -8106,7 +8097,7 @@ let ExeDomain = SSEPackedDouble in {
                        loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))),
           (VPERMILPSYrr VR256:$src1, VR256:$src2)>;
 def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))),
@@ -8245,11 +8236,11 @@ let Predicates = [HasF16C] in {
   def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
             (VCVTPH2PSrm addr:$src)>;
 
-  def : Pat<(store (f64 (vector_extract (bc_v2f64 (v8i16
+  def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
                   (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
                    addr:$dst),
                    (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
-  def : Pat<(store (i64 (vector_extract (bc_v2i64 (v8i16
+  def : Pat<(store (i64 (extractelt (bc_v2i64 (v8i16
                   (int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
                    addr:$dst),
                    (VCVTPS2PHmr addr:$dst, VR128:$src1, imm:$src2)>;
@@ -8309,97 +8300,62 @@ defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32,
 //
 multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
                           X86MemOperand x86memop, PatFrag ld_frag,
-                          Intrinsic Int128, Intrinsic Int256> {
-  def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                  [(set VR128:$dst, (Int128 VR128:$src))]>,
-                  Sched<[WriteShuffle]>, VEX;
-  def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+                          ValueType OpVT128, ValueType OpVT256, Predicate prd> {
+  let Predicates = [HasAVX2, prd] in {
+    def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                   [(set VR128:$dst,
-                    (Int128 (scalar_to_vector (ld_frag addr:$src))))]>,
+                   (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+                  Sched<[WriteShuffle]>, VEX;
+    def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR128:$dst,
+                   (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
                   Sched<[WriteLoad]>, VEX;
-  def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
-                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                   [(set VR256:$dst, (Int256 VR128:$src))]>,
-                   Sched<[WriteShuffle256]>, VEX, VEX_L;
-  def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+    def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
                    [(set VR256:$dst,
-                    (Int256 (scalar_to_vector (ld_frag addr:$src))))]>,
+                    (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>,
+                   Sched<[WriteShuffle256]>, VEX, VEX_L;
+    def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR256:$dst,
+                    (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
                    Sched<[WriteLoad]>, VEX, VEX_L;
+
+    // Provide aliases for broadcast from the same register class that
+    // automatically does the extract.
+    def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))),
+              (!cast<Instruction>(NAME#"Yrr")
+                  (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>;
+  }
 }
 
 defm VPBROADCASTB  : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
-                                    int_x86_avx2_pbroadcastb_128,
-                                    int_x86_avx2_pbroadcastb_256>;
+                                    v16i8, v32i8, NoVLX_Or_NoBWI>;
 defm VPBROADCASTW  : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
-                                    int_x86_avx2_pbroadcastw_128,
-                                    int_x86_avx2_pbroadcastw_256>;
+                                    v8i16, v16i16, NoVLX_Or_NoBWI>;
 defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
-                                    int_x86_avx2_pbroadcastd_128,
-                                    int_x86_avx2_pbroadcastd_256>;
+                                    v4i32, v8i32, NoVLX>;
 defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
-                                    int_x86_avx2_pbroadcastq_128,
-                                    int_x86_avx2_pbroadcastq_256>;
+                                    v2i64, v4i64, NoVLX>;
 
 let Predicates = [HasAVX2] in {
-  def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))),
-          (VPBROADCASTBrm addr:$src)>;
-  def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))),
-          (VPBROADCASTBYrm addr:$src)>;
-  def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))),
-          (VPBROADCASTWrm addr:$src)>;
-  def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))),
-          (VPBROADCASTWYrm addr:$src)>;
-  def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VPBROADCASTDrm addr:$src)>;
-  def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
-          (VPBROADCASTDYrm addr:$src)>;
-  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
-          (VPBROADCASTQrm addr:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
-          (VPBROADCASTQYrm addr:$src)>;
-
-  def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))),
-          (VPBROADCASTBrr VR128:$src)>;
-  def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))),
-          (VPBROADCASTBYrr VR128:$src)>;
-  def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))),
-          (VPBROADCASTWrr VR128:$src)>;
-  def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))),
-          (VPBROADCASTWYrr VR128:$src)>;
-  def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))),
-          (VPBROADCASTDrr VR128:$src)>;
-  def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))),
-          (VPBROADCASTDYrr VR128:$src)>;
-  def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))),
-          (VPBROADCASTQrr VR128:$src)>;
-  def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))),
-          (VPBROADCASTQYrr VR128:$src)>;
-  def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))),
-          (VBROADCASTSSrr VR128:$src)>;
-  def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))),
-          (VBROADCASTSSYrr VR128:$src)>;
-  def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))),
-          (VPBROADCASTQrr VR128:$src)>;
-  def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))),
-          (VBROADCASTSDYrr VR128:$src)>;
+  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+  // This means we'll encounter truncated i32 loads; match that here.
+  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWrm addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWYrm addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWrm addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWYrm addr:$src)>;
 
   // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
-  def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))),
-            (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))),
-            (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))),
-            (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src),
-                                                    sub_xmm)))>;
-  def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))),
-            (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src),
-                                                    sub_xmm)))>;
   def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
             (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src),
                                                     sub_xmm)))>;
@@ -8598,7 +8554,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
           []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
 def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
                                    (iPTR imm)),
           (VINSERTI128rr VR256:$src1, VR128:$src2,
@@ -8722,16 +8678,16 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
                                 int_x86_avx2_maskstore_q,
                                 int_x86_avx2_maskstore_q_256>, VEX_W;
 
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)),
          (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>;
 
-def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)),
          (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
 
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)),
          (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>;
 
-def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)),
          (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>;
 
 def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)),
@@ -8776,10 +8732,10 @@ def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0)
          (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr),
                        VR128:$mask)>;
 
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)),
          (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>;
 
-def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
+def: Pat<(X86mstore addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)),
          (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>;
 
 def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)),
@@ -8804,10 +8760,10 @@ def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0)
          (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr),
                        VR256:$mask)>;
 
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)),
          (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>;
 
-def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
+def: Pat<(X86mstore addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)),
          (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>;
 
 def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)),
@@ -8865,12 +8821,13 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
              VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>;
 }
 
-defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
-defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
-defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
-defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
-defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-
+let Predicates = [HasAVX2, NoVLX] in {
+  defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>;
+  defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W;
+  defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
+  defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
+  defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
+}
 //===----------------------------------------------------------------------===//
 // VGATHER - GATHER Operations
 multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
@@ -8905,3 +8862,59 @@ let mayLoad = 1, Constraints
     defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>;
   }
 }
+
+//===----------------------------------------------------------------------===//
+// Extra selection patterns for FR128, f128, f128mem
+
+// movaps is shorter than movdqa. movaps is in SSE and movdqa is in SSE2.
+def : Pat<(store (f128 FR128:$src), addr:$dst),
+          (MOVAPSmr addr:$dst, (COPY_TO_REGCLASS (f128 FR128:$src), VR128))>;
+
+def : Pat<(loadf128 addr:$src),
+          (COPY_TO_REGCLASS (MOVAPSrm addr:$src), FR128)>;
+
+// andps is shorter than andpd or pand. andps is SSE and andpd/pand are in SSE2
+def : Pat<(X86fand FR128:$src1, (loadf128 addr:$src2)),
+          (COPY_TO_REGCLASS
+           (ANDPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+           FR128)>;
+
+def : Pat<(X86fand FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                    (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(and FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (ANDPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                    (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86for FR128:$src1, (loadf128 addr:$src2)),
+          (COPY_TO_REGCLASS
+           (ORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+           FR128)>;
+
+def : Pat<(X86for FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                   (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(or FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (ORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                   (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, (loadf128 addr:$src2)),
+          (COPY_TO_REGCLASS
+           (XORPSrm (COPY_TO_REGCLASS FR128:$src1, VR128), f128mem:$src2),
+           FR128)>;
+
+def : Pat<(X86fxor FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                    (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
+
+def : Pat<(xor FR128:$src1, FR128:$src2),
+          (COPY_TO_REGCLASS
+           (XORPSrr (COPY_TO_REGCLASS FR128:$src1, VR128),
+                    (COPY_TO_REGCLASS FR128:$src2, VR128)), FR128)>;
diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td
index caecf7001ef5..c1df9780a0e0 100644
--- a/lib/Target/X86/X86InstrShiftRotate.td
+++ b/lib/Target/X86/X86InstrShiftRotate.td
@@ -31,21 +31,21 @@ def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (shl GR64:$src1, CL))], IIC_SR>;
 } // Uses = [CL]
 
-def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "shl{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
 
 let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
-def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "shl{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))], IIC_SR>,
                    OpSize16;
-def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "shl{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))], IIC_SR>,
                    OpSize32;
 def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
+                    (ins GR64:$src1, u8imm:$src2),
                     "shl{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))],
                     IIC_SR>;
@@ -85,19 +85,19 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
                   "shl{q}\t{%cl, $dst|$dst, cl}",
                   [(store (shl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
 }
-def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "shl{b}\t{$src, $dst|$dst, $src}",
                 [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
                 IIC_SR>;
-def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, u8imm:$src),
                    "shl{w}\t{$src, $dst|$dst, $src}",
                [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize16;
-def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, u8imm:$src),
                    "shl{l}\t{$src, $dst|$dst, $src}",
                [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize32;
-def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src),
                   "shl{q}\t{$src, $dst|$dst, $src}",
                  [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
                  IIC_SR>;
@@ -137,18 +137,18 @@ def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (srl GR64:$src1, CL))], IIC_SR>;
 }
 
-def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$src2),
                    "shr{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "shr{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize16;
-def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "shr{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize32;
-def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$src2),
                   "shr{q}\t{$src2, $dst|$dst, $src2}",
                   [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))], IIC_SR>;
 
@@ -185,19 +185,19 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
                   "shr{q}\t{%cl, $dst|$dst, cl}",
                   [(store (srl (loadi64 addr:$dst), CL), addr:$dst)], IIC_SR>;
 }
-def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "shr{b}\t{$src, $dst|$dst, $src}",
                 [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
                 IIC_SR>;
-def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, u8imm:$src),
                    "shr{w}\t{$src, $dst|$dst, $src}",
                [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize16;
-def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, u8imm:$src),
                    "shr{l}\t{$src, $dst|$dst, $src}",
                [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize32;
-def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src),
                   "shr{q}\t{$src, $dst|$dst, $src}",
                  [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
                  IIC_SR>;
@@ -241,20 +241,20 @@ def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
                  IIC_SR>;
 }
 
-def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "sar{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))],
                    IIC_SR>;
-def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "sar{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize16;
-def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "sar{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize32;
 def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
+                    (ins GR64:$src1, u8imm:$src2),
                     "sar{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))],
                     IIC_SR>;
@@ -298,19 +298,19 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
                  [(store (sra (loadi64 addr:$dst), CL), addr:$dst)],
                  IIC_SR>;
 }
-def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "sar{b}\t{$src, $dst|$dst, $src}",
                 [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
                 IIC_SR>;
-def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, u8imm:$src),
                    "sar{w}\t{$src, $dst|$dst, $src}",
                [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize16;
-def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, u8imm:$src),
                    "sar{l}\t{$src, $dst|$dst, $src}",
                [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>, OpSize32;
-def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src),
                     "sar{q}\t{$src, $dst|$dst, $src}",
                  [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
                  IIC_SR>;
@@ -342,7 +342,7 @@ let hasSideEffects = 0 in {
 let Constraints = "$src1 = $dst", SchedRW = [WriteShift] in {
 def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
                "rcl{b}\t$dst", [], IIC_SR>;
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
                  "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
@@ -350,7 +350,7 @@ def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
 
 def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
                 "rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
                   "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
 let Uses = [CL] in
 def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
@@ -358,7 +358,7 @@ def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
 
 def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
                 "rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
                   "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
 let Uses = [CL] in
 def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
@@ -367,7 +367,7 @@ def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
 
 def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
                  "rcl{q}\t$dst", [], IIC_SR>;
-def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
                    "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
@@ -376,7 +376,7 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
 
 def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
                "rcr{b}\t$dst", [], IIC_SR>;
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, u8imm:$cnt),
                  "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
@@ -384,7 +384,7 @@ def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
 
 def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
                 "rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$cnt),
                   "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
 let Uses = [CL] in
 def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
@@ -392,7 +392,7 @@ def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
 
 def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
                 "rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
 let Uses = [CL] in
 def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
@@ -400,7 +400,7 @@ def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
 
 def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
                  "rcr{q}\t$dst", [], IIC_SR>;
-def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
                    "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 let Uses = [CL] in
 def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
@@ -411,36 +411,36 @@ def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
 let SchedRW = [WriteShiftLd, WriteRMW] in {
 def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
                "rcl{b}\t$dst", [], IIC_SR>;
-def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, u8imm:$cnt),
                  "rcl{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
                 "rcl{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, u8imm:$cnt),
                   "rcl{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
 def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
                 "rcl{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, u8imm:$cnt),
                   "rcl{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
 def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
                  "rcl{q}\t$dst", [], IIC_SR>;
-def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, u8imm:$cnt),
                    "rcl{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 
 def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
                "rcr{b}\t$dst", [], IIC_SR>;
-def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, u8imm:$cnt),
                  "rcr{b}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
                 "rcr{w}\t$dst", [], IIC_SR>, OpSize16;
-def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, u8imm:$cnt),
                   "rcr{w}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize16;
 def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
                 "rcr{l}\t$dst", [], IIC_SR>, OpSize32;
-def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, u8imm:$cnt),
                   "rcr{l}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>, OpSize32;
 def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
                  "rcr{q}\t$dst", [], IIC_SR>;
-def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, u8imm:$cnt),
                    "rcr{q}\t{$cnt, $dst|$dst, $cnt}", [], IIC_SR>;
 
 let Uses = [CL] in {
@@ -482,19 +482,19 @@ def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (rotl GR64:$src1, CL))], IIC_SR>;
 }
 
-def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "rol{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "rol{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize16;
-def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "rol{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize32;
 def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
+                    (ins GR64:$src1, u8imm:$src2),
                     "rol{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))],
                     IIC_SR>;
@@ -537,19 +537,19 @@ def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
                    [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)],
                    IIC_SR>;
 }
-def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
                    "rol{b}\t{$src1, $dst|$dst, $src1}",
                [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)],
                IIC_SR>;
-def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, u8imm:$src1),
                    "rol{w}\t{$src1, $dst|$dst, $src1}",
               [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)],
               IIC_SR>, OpSize16;
-def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, u8imm:$src1),
                    "rol{l}\t{$src1, $dst|$dst, $src1}",
               [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)],
               IIC_SR>, OpSize32;
-def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1),
                     "rol{q}\t{$src1, $dst|$dst, $src1}",
                 [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)],
                 IIC_SR>;
@@ -589,19 +589,19 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   [(set GR64:$dst, (rotr GR64:$src1, CL))], IIC_SR>;
 }
 
-def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "ror{b}\t{$src2, $dst|$dst, $src2}",
                    [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
-def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "ror{w}\t{$src2, $dst|$dst, $src2}",
                    [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize16;
-def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "ror{l}\t{$src2, $dst|$dst, $src2}",
                    [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
                    IIC_SR>, OpSize32;
 def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst),
-                    (ins GR64:$src1, i8imm:$src2),
+                    (ins GR64:$src1, u8imm:$src2),
                     "ror{q}\t{$src2, $dst|$dst, $src2}",
                     [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
                     IIC_SR>;
@@ -644,19 +644,19 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
                   [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)],
                   IIC_SR>;
 }
-def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
                    "ror{b}\t{$src, $dst|$dst, $src}",
                [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)],
                IIC_SR>;
-def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, u8imm:$src),
                    "ror{w}\t{$src, $dst|$dst, $src}",
               [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)],
               IIC_SR>, OpSize16;
-def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, u8imm:$src),
                    "ror{l}\t{$src, $dst|$dst, $src}",
               [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)],
               IIC_SR>, OpSize32;
-def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
                     "ror{q}\t{$src, $dst|$dst, $src}",
                 [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)],
                 IIC_SR>;
@@ -727,42 +727,42 @@ def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
 let isCommutable = 1 in {  // These instructions commute to each other.
 def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
                      (outs GR16:$dst),
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     (ins GR16:$src1, GR16:$src2, u8imm:$src3),
                      "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
                                       (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
                      TB, OpSize16;
 def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR16:$dst),
-                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     (ins GR16:$src1, GR16:$src2, u8imm:$src3),
                      "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
                                       (i8 imm:$src3)))], IIC_SHD16_REG_IM>,
                      TB, OpSize16;
 def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
                      (outs GR32:$dst),
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     (ins GR32:$src1, GR32:$src2, u8imm:$src3),
                      "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
                                       (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
                  TB, OpSize32;
 def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
                      (outs GR32:$dst),
-                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     (ins GR32:$src1, GR32:$src2, u8imm:$src3),
                      "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
                                       (i8 imm:$src3)))], IIC_SHD32_REG_IM>,
                  TB, OpSize32;
 def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
                       (outs GR64:$dst),
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      (ins GR64:$src1, GR64:$src2, u8imm:$src3),
                       "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
                                        (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
                  TB;
 def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
                       (outs GR64:$dst),
-                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      (ins GR64:$src1, GR64:$src2, u8imm:$src3),
                       "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
                                        (i8 imm:$src3)))], IIC_SHD64_REG_IM>,
@@ -801,14 +801,14 @@ def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
 }
 
 def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
-                    (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                    (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
                                       (i8 imm:$src3)), addr:$dst)],
                                       IIC_SHD16_MEM_IM>,
                     TB, OpSize16;
 def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
-                     (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                     (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
                      "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
                                       (i8 imm:$src3)), addr:$dst)],
@@ -816,14 +816,14 @@ def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
                      TB, OpSize16;
 
 def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
-                    (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                    (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                     [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
                                       (i8 imm:$src3)), addr:$dst)],
                                       IIC_SHD32_MEM_IM>,
                     TB, OpSize32;
 def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
-                     (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                     (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
                      "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                      [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
                                        (i8 imm:$src3)), addr:$dst)],
@@ -831,14 +831,14 @@ def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
                      TB, OpSize32;
 
 def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
                       "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
                                        (i8 imm:$src3)), addr:$dst)],
                                        IIC_SHD64_MEM_IM>,
                  TB;
 def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
-                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
                       "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
                       [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
                                        (i8 imm:$src3)), addr:$dst)],
@@ -860,12 +860,12 @@ def ROT64L2R_imm8  : SDNodeXForm<imm, [{
 
 multiclass bmi_rotate<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let hasSideEffects = 0 in {
-  def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2),
+  def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                []>, TAXD, VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
   def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst),
-               (ins x86memop:$src1, i8imm:$src2),
+               (ins x86memop:$src1, u8imm:$src2),
                !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
                []>, TAXD, VEX, Sched<[WriteShiftLd]>;
 }
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 0350566f8b9b..85e17f516f91 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -44,7 +44,7 @@ def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
 
 let SchedRW = [WriteSystem] in {
 
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+def INT : Ii8<0xcd, RawFrm, (outs), (ins u8imm:$trap), "int\t$trap",
               [(int_x86_int imm:$trap)], IIC_INT>;
 
 
@@ -60,12 +60,6 @@ def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit{l}", [],
                  IIC_SYS_ENTER_EXIT>, TB;
 def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit{q}", [],
                  IIC_SYS_ENTER_EXIT>, TB, Requires<[In64BitMode]>;
-
-def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", [], IIC_IRET>, OpSize16;
-def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", [], IIC_IRET>,
-             OpSize32;
-def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", [], IIC_IRET>,
-             Requires<[In64BitMode]>;
 } // SchedRW
 
 def : Pat<(debugtrap),
@@ -88,13 +82,13 @@ def IN32rr : I<0xED, RawFrm, (outs), (ins),
                "in{l}\t{%dx, %eax|eax, dx}", [], IIC_IN_RR>, OpSize32;
 
 let Defs = [AL] in
-def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins u8imm:$port),
                   "in{b}\t{$port, %al|al, $port}", [], IIC_IN_RI>;
 let Defs = [AX] in
-def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
                   "in{w}\t{$port, %ax|ax, $port}", [], IIC_IN_RI>, OpSize16;
 let Defs = [EAX] in
-def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins u8imm:$port),
                   "in{l}\t{$port, %eax|eax, $port}", [], IIC_IN_RI>, OpSize32;
 
 let Uses = [DX, AL] in
@@ -108,13 +102,13 @@ def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
                 "out{l}\t{%eax, %dx|dx, eax}", [], IIC_OUT_RR>, OpSize32;
 
 let Uses = [AL] in
-def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins u8imm:$port),
                    "out{b}\t{%al, $port|$port, al}", [], IIC_OUT_IR>;
 let Uses = [AX] in
-def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
                    "out{w}\t{%ax, $port|$port, ax}", [], IIC_OUT_IR>, OpSize16;
 let Uses = [EAX] in
-def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins u8imm:$port),
                   "out{l}\t{%eax, $port|$port, eax}", [], IIC_OUT_IR>, OpSize32;
 
 } // SchedRW
@@ -478,39 +472,60 @@ def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", [], IIC_INVD>, TB;
 //===----------------------------------------------------------------------===//
 // XSAVE instructions
 let SchedRW = [WriteSystem] in {
+let Predicates = [HasXSAVE] in {
 let Defs = [EDX, EAX], Uses = [ECX] in
   def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
 
 let Uses = [EDX, EAX, ECX] in
   def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
-
-let Uses = [RDX, RAX] in {
-  def XSAVE : I<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-               "xsave\t$dst", []>, TB;
-  def XSAVE64 : RI<0xAE, MRM4m, (outs opaque512mem:$dst), (ins),
-                 "xsave64\t$dst", []>, TB, Requires<[In64BitMode]>;
-  def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-               "xrstor\t$dst", []>, TB;
-  def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
-                 "xrstor64\t$dst", []>, TB, Requires<[In64BitMode]>;
-  def XSAVEOPT : I<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                  "xsaveopt\t$dst", []>, PS;
-  def XSAVEOPT64 : RI<0xAE, MRM6m, (outs opaque512mem:$dst), (ins),
-                    "xsaveopt64\t$dst", []>, PS, Requires<[In64BitMode]>;
-
-  def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
-                "xrstors\t$dst", []>, TB;
-  def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
-                  "xrstors64\t$dst", []>, TB, Requires<[In64BitMode]>;
-  def XSAVEC : I<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
-                "xsavec\t$dst", []>, TB;
-  def XSAVEC64 : RI<0xC7, MRM4m, (outs opaque512mem:$dst), (ins),
-                  "xsavec64\t$dst", []>, TB, Requires<[In64BitMode]>;
-  def XSAVES : I<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
-                "xsaves\t$dst", []>, TB;
-  def XSAVES64 : RI<0xC7, MRM5m, (outs opaque512mem:$dst), (ins),
-                  "xsaves64\t$dst", []>, TB, Requires<[In64BitMode]>;
 }
+
+let Uses = [EDX, EAX] in {
+let Predicates = [HasXSAVE] in {
+  def XSAVE : I<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+                "xsave\t$dst",
+                [(int_x86_xsave addr:$dst, EDX, EAX)]>, TB;
+  def XSAVE64 : RI<0xAE, MRM4m, (outs), (ins opaque512mem:$dst),
+                   "xsave64\t$dst",
+                   [(int_x86_xsave64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+  def XRSTOR : I<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+                 "xrstor\t$dst",
+                 [(int_x86_xrstor addr:$dst, EDX, EAX)]>, TB;
+  def XRSTOR64 : RI<0xAE, MRM5m, (outs), (ins opaque512mem:$dst),
+                    "xrstor64\t$dst",
+                    [(int_x86_xrstor64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEOPT] in {
+  def XSAVEOPT : I<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+                   "xsaveopt\t$dst",
+                   [(int_x86_xsaveopt addr:$dst, EDX, EAX)]>, TB;
+  def XSAVEOPT64 : RI<0xAE, MRM6m, (outs), (ins opaque512mem:$dst),
+                      "xsaveopt64\t$dst",
+                      [(int_x86_xsaveopt64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVEC] in {
+  def XSAVEC : I<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+                 "xsavec\t$dst",
+                 [(int_x86_xsavec addr:$dst, EDX, EAX)]>, TB;
+  def XSAVEC64 : RI<0xC7, MRM4m, (outs), (ins opaque512mem:$dst),
+                   "xsavec64\t$dst",
+                   [(int_x86_xsavec64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+let Predicates = [HasXSAVES] in {
+  def XSAVES : I<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+                 "xsaves\t$dst",
+                 [(int_x86_xsaves addr:$dst, EDX, EAX)]>, TB;
+  def XSAVES64 : RI<0xC7, MRM5m, (outs), (ins opaque512mem:$dst),
+                    "xsaves64\t$dst",
+                    [(int_x86_xsaves64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+  def XRSTORS : I<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
+                  "xrstors\t$dst",
+                  [(int_x86_xrstors addr:$dst, EDX, EAX)]>, TB;
+  def XRSTORS64 : RI<0xC7, MRM3m, (outs), (ins opaque512mem:$dst),
+                     "xrstors64\t$dst",
+                     [(int_x86_xrstors64 addr:$dst, EDX, EAX)]>, TB, Requires<[In64BitMode]>;
+}
+} // Uses
 } // SchedRW
 
 //===----------------------------------------------------------------------===//
@@ -534,6 +549,12 @@ let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
 }
 let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
   def MONTMUL : I<0xa6, MRM_C0, (outs), (ins), "montmul", []>, TB;
+//==-----------------------------------------------------------------------===//
+// PKU  - enable protection key
+let Defs = [EAX, EDX], Uses = [ECX] in 
+  def RDPKRU : I<0x01, MRM_EE, (outs), (ins), "rdpkru", []>, TB;
+let Uses = [EAX, ECX, EDX] in
+  def WRPKRU : I<0x01, MRM_EF, (outs), (ins), "wrpkru", []>, TB;
 
 //===----------------------------------------------------------------------===//
 // FS/GS Base Instructions
diff --git a/lib/Target/X86/X86InstrXOP.td b/lib/Target/X86/X86InstrXOP.td
index 8455b8d8467c..4cb2304e464d 100644
--- a/lib/Target/X86/X86InstrXOP.td
+++ b/lib/Target/X86/X86InstrXOP.td
@@ -83,57 +83,64 @@ let ExeDomain = SSEPackedDouble in {
   defm VFRCZPD : xop2op256<0x81, "vfrczpd", int_x86_xop_vfrcz_pd_256, loadv4f64>;
 }
 
-multiclass xop3op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                  ValueType vt128> {
   def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2))]>, XOP_4VOp3;
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
+           XOP_4VOp3, Sched<[WriteVarVecShift]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2))))]>,
-           XOP_4V, VEX_W;
+              (vt128 (OpNode (vt128 VR128:$src1),
+                             (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
+           XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>;
   def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
            (ins i128mem:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-              (Int (bitconvert (loadv2i64 addr:$src1)), VR128:$src2))]>,
-             XOP_4VOp3;
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
+                             (vt128 VR128:$src2))))]>,
+             XOP_4VOp3, Sched<[WriteVarVecShift, ReadAfterLd]>;
 }
 
 let ExeDomain = SSEPackedInt in {
-  defm VPSHLW : xop3op<0x95, "vpshlw", int_x86_xop_vpshlw>;
-  defm VPSHLQ : xop3op<0x97, "vpshlq", int_x86_xop_vpshlq>;
-  defm VPSHLD : xop3op<0x96, "vpshld", int_x86_xop_vpshld>;
-  defm VPSHLB : xop3op<0x94, "vpshlb", int_x86_xop_vpshlb>;
-  defm VPSHAW : xop3op<0x99, "vpshaw", int_x86_xop_vpshaw>;
-  defm VPSHAQ : xop3op<0x9B, "vpshaq", int_x86_xop_vpshaq>;
-  defm VPSHAD : xop3op<0x9A, "vpshad", int_x86_xop_vpshad>;
-  defm VPSHAB : xop3op<0x98, "vpshab", int_x86_xop_vpshab>;
-  defm VPROTW : xop3op<0x91, "vprotw", int_x86_xop_vprotw>;
-  defm VPROTQ : xop3op<0x93, "vprotq", int_x86_xop_vprotq>;
-  defm VPROTD : xop3op<0x92, "vprotd", int_x86_xop_vprotd>;
-  defm VPROTB : xop3op<0x90, "vprotb", int_x86_xop_vprotb>;
+  defm VPROTB : xop3op<0x90, "vprotb", X86vprot, v16i8>;
+  defm VPROTD : xop3op<0x92, "vprotd", X86vprot, v4i32>;
+  defm VPROTQ : xop3op<0x93, "vprotq", X86vprot, v2i64>;
+  defm VPROTW : xop3op<0x91, "vprotw", X86vprot, v8i16>;
+  defm VPSHAB : xop3op<0x98, "vpshab", X86vpsha, v16i8>;
+  defm VPSHAD : xop3op<0x9A, "vpshad", X86vpsha, v4i32>;
+  defm VPSHAQ : xop3op<0x9B, "vpshaq", X86vpsha, v2i64>;
+  defm VPSHAW : xop3op<0x99, "vpshaw", X86vpsha, v8i16>;
+  defm VPSHLB : xop3op<0x94, "vpshlb", X86vpshl, v16i8>;
+  defm VPSHLD : xop3op<0x96, "vpshld", X86vpshl, v4i32>;
+  defm VPSHLQ : xop3op<0x97, "vpshlq", X86vpshl, v2i64>;
+  defm VPSHLW : xop3op<0x95, "vpshlw", X86vpshl, v8i16>;
 }
 
-multiclass xop3opimm<bits<8> opc, string OpcodeStr, Intrinsic Int> {
+multiclass xop3opimm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                     ValueType vt128> {
   def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
-           (ins VR128:$src1, i8imm:$src2),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-           [(set VR128:$dst, (Int VR128:$src1, imm:$src2))]>, XOP;
-  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
-           (ins i128mem:$src1, i8imm:$src2),
+           (ins VR128:$src1, u8imm:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-             (Int (bitconvert (loadv2i64 addr:$src1)), imm:$src2))]>, XOP;
+              (vt128 (OpNode (vt128 VR128:$src1), imm:$src2)))]>, XOP;
+  def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins i128mem:$src1, u8imm:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))), imm:$src2)))]>, XOP;
 }
 
 let ExeDomain = SSEPackedInt in {
-  defm VPROTW : xop3opimm<0xC1, "vprotw", int_x86_xop_vprotwi>;
-  defm VPROTQ : xop3opimm<0xC3, "vprotq", int_x86_xop_vprotqi>;
-  defm VPROTD : xop3opimm<0xC2, "vprotd", int_x86_xop_vprotdi>;
-  defm VPROTB : xop3opimm<0xC0, "vprotb", int_x86_xop_vprotbi>;
+  defm VPROTB : xop3opimm<0xC0, "vprotb", X86vproti, v16i8>;
+  defm VPROTD : xop3opimm<0xC2, "vprotd", X86vproti, v4i32>;
+  defm VPROTQ : xop3opimm<0xC3, "vprotq", X86vproti, v2i64>;
+  defm VPROTW : xop3opimm<0xC1, "vprotw", X86vproti, v8i16>;
 }
 
 // Instruction where second source can be memory, but third must be register
@@ -170,30 +177,34 @@ let ExeDomain = SSEPackedInt in {
 }
 
 // Instruction where second source can be memory, third must be imm8
-multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
+multiclass xopvpcom<bits<8> opc, string Suffix, SDNode OpNode, ValueType vt128> {
   let isCommutable = 1 in
   def ri : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, XOPCC:$cc),
            !strconcat("vpcom${cc}", Suffix,
            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-           [(set VR128:$dst, (Int VR128:$src1, VR128:$src2, i8immZExt3:$cc))]>,
+           [(set VR128:$dst,
+              (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
+                             i8immZExt3:$cc)))]>,
            XOP_4V;
   def mi : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, XOPCC:$cc),
            !strconcat("vpcom${cc}", Suffix,
            "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
-             (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
-              i8immZExt3:$cc))]>, XOP_4V;
+              (vt128 (OpNode (vt128 VR128:$src1),
+                             (vt128 (bitconvert (loadv2i64 addr:$src2))),
+                              i8immZExt3:$cc)))]>,
+           XOP_4V;
   let isAsmParserOnly = 1, hasSideEffects = 0 in {
     def ri_alt : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
-                 (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+                 (ins VR128:$src1, VR128:$src2, u8imm:$src3),
                  !strconcat("vpcom", Suffix,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                  []>, XOP_4V;
     let mayLoad = 1 in
     def mi_alt : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
-                 (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+                 (ins VR128:$src1, i128mem:$src2, u8imm:$src3),
                  !strconcat("vpcom", Suffix,
                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                  []>, XOP_4V;
@@ -201,14 +212,14 @@ multiclass xopvpcom<bits<8> opc, string Suffix, Intrinsic Int> {
 }
 
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
-  defm VPCOMB  : xopvpcom<0xCC, "b", int_x86_xop_vpcomb>;
-  defm VPCOMW  : xopvpcom<0xCD, "w", int_x86_xop_vpcomw>;
-  defm VPCOMD  : xopvpcom<0xCE, "d", int_x86_xop_vpcomd>;
-  defm VPCOMQ  : xopvpcom<0xCF, "q", int_x86_xop_vpcomq>;
-  defm VPCOMUB : xopvpcom<0xEC, "ub", int_x86_xop_vpcomub>;
-  defm VPCOMUW : xopvpcom<0xED, "uw", int_x86_xop_vpcomuw>;
-  defm VPCOMUD : xopvpcom<0xEE, "ud", int_x86_xop_vpcomud>;
-  defm VPCOMUQ : xopvpcom<0xEF, "uq", int_x86_xop_vpcomuq>;
+  defm VPCOMB  : xopvpcom<0xCC, "b", X86vpcom, v16i8>;
+  defm VPCOMW  : xopvpcom<0xCD, "w", X86vpcom, v8i16>;
+  defm VPCOMD  : xopvpcom<0xCE, "d", X86vpcom, v4i32>;
+  defm VPCOMQ  : xopvpcom<0xCF, "q", X86vpcom, v2i64>;
+  defm VPCOMUB : xopvpcom<0xEC, "ub", X86vpcomu, v16i8>;
+  defm VPCOMUW : xopvpcom<0xED, "uw", X86vpcomu, v8i16>;
+  defm VPCOMUD : xopvpcom<0xEE, "ud", X86vpcomu, v4i32>;
+  defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>;
 }
 
 // Instruction where either second or third source can be memory
@@ -270,42 +281,52 @@ multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
 let ExeDomain = SSEPackedInt in
   defm VPCMOV : xop4op256<0xA2, "vpcmov", int_x86_xop_vpcmov_256>;
 
+let Predicates = [HasXOP] in {
+  def : Pat<(v2i64 (or (and VR128:$src3, VR128:$src1),
+                       (X86andnp VR128:$src3, VR128:$src2))),
+            (VPCMOVrr VR128:$src1, VR128:$src2, VR128:$src3)>;
+
+  def : Pat<(v4i64 (or (and VR256:$src3, VR256:$src1),
+                       (X86andnp VR256:$src3, VR256:$src2))),
+            (VPCMOVrrY VR256:$src1, VR256:$src2, VR256:$src3)>;
+}
+
 multiclass xop5op<bits<8> opc, string OpcodeStr, Intrinsic Int128,
                   Intrinsic Int256, PatFrag ld_128, PatFrag ld_256> {
   def rr : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
-        (ins VR128:$src1, VR128:$src2, VR128:$src3, i8imm:$src4),
+        (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR128:$dst,
            (Int128 VR128:$src1, VR128:$src2, VR128:$src3, imm:$src4))]>;
   def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
-        (ins VR128:$src1, VR128:$src2, f128mem:$src3, i8imm:$src4),
+        (ins VR128:$src1, VR128:$src2, f128mem:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR128:$dst,
            (Int128 VR128:$src1, VR128:$src2, (ld_128 addr:$src3), imm:$src4))]>,
         VEX_W, MemOp4;
   def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
-        (ins VR128:$src1, f128mem:$src2, VR128:$src3, i8imm:$src4),
+        (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR128:$dst,
            (Int128 VR128:$src1, (ld_128 addr:$src2), VR128:$src3, imm:$src4))]>;
   def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
-        (ins VR256:$src1, VR256:$src2, VR256:$src3, i8imm:$src4),
+        (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
           (Int256 VR256:$src1, VR256:$src2, VR256:$src3, imm:$src4))]>, VEX_L;
   def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
-        (ins VR256:$src1, VR256:$src2, f256mem:$src3, i8imm:$src4),
+        (ins VR256:$src1, VR256:$src2, f256mem:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
           (Int256 VR256:$src1, VR256:$src2, (ld_256 addr:$src3), imm:$src4))]>,
         VEX_W, MemOp4, VEX_L;
   def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
-        (ins VR256:$src1, f256mem:$src2, VR256:$src3, i8imm:$src4),
+        (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 2c8b95bcba22..dc6d85d582c8 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -18,14 +18,19 @@ namespace llvm {
 
 enum IntrinsicType {
   INTR_NO_TYPE,
-  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX,
-  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
-  CMP_MASK, CMP_MASK_CC, VSHIFT, VSHIFT_MASK, COMI,
-  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM,
-  INTR_TYPE_3OP_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
-  VPERM_3OP_MASKZ,
-  INTR_TYPE_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
-  EXPAND_FROM_MEM, BLEND
+  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
+  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
+  CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, COMI_RM,
+  INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
+  INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
+  INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
+  FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK,
+  VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
+  INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
+  COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
+  TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
+  EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+  TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
 };
 
 struct IntrinsicData {
@@ -138,6 +143,42 @@ static const IntrinsicData IntrinsicsWithChain[] = {
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -209,6 +250,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(avx2_pavg_b,  INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(avx2_pavg_w,  INTR_TYPE_2OP, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
@@ -241,6 +284,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(avx2_psad_bw, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
   X86_INTRINSIC_DATA(avx2_pshuf_b, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(avx2_psign_b, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
   X86_INTRINSIC_DATA(avx2_psign_d, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
@@ -274,16 +318,56 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_psubus_b, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_psubus_w, INTR_TYPE_2OP, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx2_vperm2i128, INTR_TYPE_3OP, X86ISD::VPERM2X128, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmb_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_128, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_256, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_broadcastmw_512, BROADCASTM, X86ISD::VBROADCASTM, 0),
+  X86_INTRINSIC_DATA(avx512_cvtb2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtb2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtb2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtd2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtd2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtd2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2b_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2b_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2b_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2d_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2d_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2d_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2q_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2q_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2q_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2w_128, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2w_256, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtmask2w_512, CONVERT_MASK_TO_VEC, X86ISD::VSEXT, 0),
+  X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2sd32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
+  X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
   X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0),
-
+  X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0),
+  X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
+  X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
@@ -371,6 +455,50 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_blend_w_128,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_256,  BLEND, X86ISD::SELECT, 0),
   X86_INTRINSIC_DATA(avx512_mask_blend_w_512,  BLEND, X86ISD::SELECT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcast_sd_pd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf32x8_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_256, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf64x2_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::SUBV_BROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti32x8_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_256, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti64x2_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_broadcasti64x4_512, BRCST_SUBVEC_TO_VEC,
+                     X86ISD::SHUF128, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_b_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -388,6 +516,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+                     X86ISD::FSETCC),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
+                     X86ISD::FSETCC),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -415,7 +547,184 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
-
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::CONFLICT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTDQ2PD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0), // no rm
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, ISD::SINT_TO_FP), //er
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_256, INTR_TYPE_1OP_MASK,
+                    X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
+                    X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     INTR_TYPE_1OP_MASK,
+                    X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, INTR_TYPE_1OP_MASK_RM,
+                     ISD::FP_ROUND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK_RM,
+                     ISD::FP_ROUND, X86ISD::VFPROUND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VFPEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_EXTEND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_EXTEND, X86ISD::VFPEXT),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_128, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_256, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
+                     ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::VFPEXT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
+                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::CVTUDQ2PD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0), // no rm
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_128, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_256, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
+                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
@@ -452,6 +761,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_128, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_256, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_pd_512, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_128, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_256, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ps_512, FPCLASS, X86ISD::VFPCLASS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_sd, FPCLASSS, X86ISD::VFPCLASSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_fpclass_ss, FPCLASSS, X86ISD::VFPCLASSS, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_pd_128, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_pd_256, INTR_TYPE_1OP_MASK_RM,
@@ -464,6 +781,62 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FGETEXP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_pd_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_128, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_256, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
+                     X86ISD::VGETMANT, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
+                     ISD::INSERT_SUBVECTOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
+  X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK,
+                     ISD::CTLZ, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
@@ -472,10 +845,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_max_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
                      X86ISD::FMAX_RND),
-  X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
-  X86ISD::FMAX_RND),
-  X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMAX,
-  X86ISD::FMAX_RND),
+  X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMAX, X86ISD::FMAX_RND),
+  X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMAX, X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
@@ -484,10 +857,32 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_min_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
                      X86ISD::FMIN_RND),
-  X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
-  X86ISD::FMIN_RND),
-  X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FMIN,
-  X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMIN, X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMIN, X86ISD::FMIN_RND),
+  X86_INTRINSIC_DATA(avx512_mask_movddup_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVDDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movddup_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVDDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVDDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, 
+                     X86ISD::MOVSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, 
+                     X86ISD::MOVSS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSHDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSHDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movshdup_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSHDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movsldup_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSLDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movsldup_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSLDUP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_movsldup_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::MOVSLDUP, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
@@ -554,6 +949,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_128, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_256, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_paddus_w_512, INTR_TYPE_2OP_MASK, X86ISD::ADDUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_palignr_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::PALIGNR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_128, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_256, INTR_TYPE_2OP_MASK, ISD::AND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pand_d_512, INTR_TYPE_2OP_MASK, ISD::AND, 0),
@@ -596,6 +997,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_128,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_256,  CMP_MASK,  X86ISD::PCMPGTM, 0),
   X86_INTRINSIC_DATA(avx512_mask_pcmpgt_w_512,  CMP_MASK,  X86ISD::PCMPGTM, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDUBSW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDUBSW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddubs_w_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDUBSW, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDWD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDWD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
@@ -644,6 +1057,114 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmov_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
                      X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -700,6 +1221,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psrav_q,       INTR_TYPE_2OP_MASK, ISD::SRA, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrl_q,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_w_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_w_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_w_512,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128,   INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256,   INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512,   INTR_TYPE_2OP_MASK, X86ISD::VSRLI, 0), 
   X86_INTRINSIC_DATA(avx512_mask_psrli_d,       VSHIFT_MASK, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrli_q,       VSHIFT_MASK, X86ISD::VSRLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_psrlv_d,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
@@ -728,16 +1255,98 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_pxor_q_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_pd_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_128, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_256, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ps_512, INTR_TYPE_3OP_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_range_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VRANGE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::RNDSCALE, 0),
+                     X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::RNDSCALE, 0),
+                     X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -750,6 +1359,38 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::SCALEF, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_f64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i32x4_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUF128, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::SHUFP, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
@@ -758,6 +1399,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
                      X86ISD::FSQRT_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FSQRT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FSQRT_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
@@ -782,9 +1427,54 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0),
-
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::UNPCKL, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
+                     X86ISD::VALIGN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
+                     ISD::FP16_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
+                     ISD::FP16_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
+                     ISD::FP16_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM,
+                     ISD::FP_TO_FP16, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM,
+                     ISD::FP_TO_FP16, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM,
+                     ISD::FP_TO_FP16, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
@@ -821,7 +1511,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
                      X86ISD::FNMSUB_RND),
 
-
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -852,54 +1541,56 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK,
+                     X86ISD::VPERMILPI, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_512, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_256, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_hi_512, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_256, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_pd_512, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_256, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_ps_512, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_256, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_q_512, VPERM_3OP_MASK,
@@ -910,7 +1601,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ,
+                     X86ISD::VPTERNLOG, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD,
@@ -959,14 +1661,59 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ,
                      X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_pd,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ps,   INTR_TYPE_1OP_MASK_RM,X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_sd,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ss,   INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK,
+                     X86ISD::VBROADCAST, 0),
+  X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCP, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_ps_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_ps_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+  X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
+  X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx_hadd_pd_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(avx_hadd_ps_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(avx_hsub_pd_256,   INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -1017,6 +1764,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
+  X86_INTRINSIC_DATA(sse2_pavg_b,       INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(sse2_pavg_w,       INTR_TYPE_2OP, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(sse2_pmaxs_w,      INTR_TYPE_2OP, ISD::SMAX, 0),
   X86_INTRINSIC_DATA(sse2_pmaxu_b,      INTR_TYPE_2OP, ISD::UMAX, 0),
   X86_INTRINSIC_DATA(sse2_pmins_w,      INTR_TYPE_2OP, ISD::SMIN, 0),
@@ -1024,6 +1773,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(sse2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(sse2_pmulu_dq,     INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
+  X86_INTRINSIC_DATA(sse2_psad_bw,      INTR_TYPE_2OP, X86ISD::PSADBW, 0),
   X86_INTRINSIC_DATA(sse2_pshuf_d,      INTR_TYPE_2OP, X86ISD::PSHUFD, 0),
   X86_INTRINSIC_DATA(sse2_pshufh_w,     INTR_TYPE_2OP, X86ISD::PSHUFHW, 0),
   X86_INTRINSIC_DATA(sse2_pshufl_w,     INTR_TYPE_2OP, X86ISD::PSHUFLW, 0),
@@ -1066,12 +1816,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse41_pminsd,      INTR_TYPE_2OP, ISD::SMIN, 0),
   X86_INTRINSIC_DATA(sse41_pminud,      INTR_TYPE_2OP, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(sse41_pminuw,      INTR_TYPE_2OP, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxbw,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxdq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxwd,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(sse41_pmovsxwq,    INTR_TYPE_1OP, X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbd,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbq,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
   X86_INTRINSIC_DATA(sse41_pmovzxbw,    INTR_TYPE_1OP, X86ISD::VZEXT, 0),
@@ -1105,7 +1849,31 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(ssse3_psign_b_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
   X86_INTRINSIC_DATA(ssse3_psign_d_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
-  X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0)
+  X86_INTRINSIC_DATA(ssse3_psign_w_128, INTR_TYPE_2OP, X86ISD::PSIGN, 0),
+  X86_INTRINSIC_DATA(xop_vpcomb,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+  X86_INTRINSIC_DATA(xop_vpcomd,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+  X86_INTRINSIC_DATA(xop_vpcomq,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+  X86_INTRINSIC_DATA(xop_vpcomub,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+  X86_INTRINSIC_DATA(xop_vpcomud,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+  X86_INTRINSIC_DATA(xop_vpcomuq,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+  X86_INTRINSIC_DATA(xop_vpcomuw,       INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
+  X86_INTRINSIC_DATA(xop_vpcomw,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
+  X86_INTRINSIC_DATA(xop_vprotb,        INTR_TYPE_2OP, X86ISD::VPROT, 0),
+  X86_INTRINSIC_DATA(xop_vprotbi,       INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+  X86_INTRINSIC_DATA(xop_vprotd,        INTR_TYPE_2OP, X86ISD::VPROT, 0),
+  X86_INTRINSIC_DATA(xop_vprotdi,       INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+  X86_INTRINSIC_DATA(xop_vprotq,        INTR_TYPE_2OP, X86ISD::VPROT, 0),
+  X86_INTRINSIC_DATA(xop_vprotqi,       INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+  X86_INTRINSIC_DATA(xop_vprotw,        INTR_TYPE_2OP, X86ISD::VPROT, 0),
+  X86_INTRINSIC_DATA(xop_vprotwi,       INTR_TYPE_2OP, X86ISD::VPROTI, 0),
+  X86_INTRINSIC_DATA(xop_vpshab,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+  X86_INTRINSIC_DATA(xop_vpshad,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+  X86_INTRINSIC_DATA(xop_vpshaq,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+  X86_INTRINSIC_DATA(xop_vpshaw,        INTR_TYPE_2OP, X86ISD::VPSHA, 0),
+  X86_INTRINSIC_DATA(xop_vpshlb,        INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+  X86_INTRINSIC_DATA(xop_vpshld,        INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+  X86_INTRINSIC_DATA(xop_vpshlq,        INTR_TYPE_2OP, X86ISD::VPSHL, 0),
+  X86_INTRINSIC_DATA(xop_vpshlw,        INTR_TYPE_2OP, X86ISD::VPSHL, 0)
 };
 
 /*
@@ -1128,6 +1896,102 @@ static void verifyIntrinsicTables() {
          std::is_sorted(std::begin(IntrinsicsWithChain),
                         std::end(IntrinsicsWithChain)) &&
          "Intrinsic data tables should be sorted by Intrinsic ID");
+  assert((std::adjacent_find(std::begin(IntrinsicsWithoutChain),
+                             std::end(IntrinsicsWithoutChain)) ==
+          std::end(IntrinsicsWithoutChain)) &&
+         (std::adjacent_find(std::begin(IntrinsicsWithChain),
+                             std::end(IntrinsicsWithChain)) ==
+          std::end(IntrinsicsWithChain)) &&
+         "Intrinsic data tables should have unique entries");
+}
+
+// X86 specific compare constants.
+// They must be kept in synch with avxintrin.h
+#define _X86_CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
+#define _X86_CMP_LT_OS    0x01 /* Less-than (ordered, signaling)  */
+#define _X86_CMP_LE_OS    0x02 /* Less-than-or-equal (ordered, signaling)  */
+#define _X86_CMP_UNORD_Q  0x03 /* Unordered (non-signaling)  */
+#define _X86_CMP_NEQ_UQ   0x04 /* Not-equal (unordered, non-signaling)  */
+#define _X86_CMP_NLT_US   0x05 /* Not-less-than (unordered, signaling)  */
+#define _X86_CMP_NLE_US   0x06 /* Not-less-than-or-equal (unordered, signaling)  */
+#define _X86_CMP_ORD_Q    0x07 /* Ordered (nonsignaling)   */
+#define _X86_CMP_EQ_UQ    0x08 /* Equal (unordered, non-signaling)  */
+#define _X86_CMP_NGE_US   0x09 /* Not-greater-than-or-equal (unord, signaling)  */
+#define _X86_CMP_NGT_US   0x0a /* Not-greater-than (unordered, signaling)  */
+#define _X86_CMP_FALSE_OQ 0x0b /* False (ordered, non-signaling)  */
+#define _X86_CMP_NEQ_OQ   0x0c /* Not-equal (ordered, non-signaling)  */
+#define _X86_CMP_GE_OS    0x0d /* Greater-than-or-equal (ordered, signaling)  */
+#define _X86_CMP_GT_OS    0x0e /* Greater-than (ordered, signaling)  */
+#define _X86_CMP_TRUE_UQ  0x0f /* True (unordered, non-signaling)  */
+#define _X86_CMP_EQ_OS    0x10 /* Equal (ordered, signaling)  */
+#define _X86_CMP_LT_OQ    0x11 /* Less-than (ordered, non-signaling)  */
+#define _X86_CMP_LE_OQ    0x12 /* Less-than-or-equal (ordered, non-signaling)  */
+#define _X86_CMP_UNORD_S  0x13 /* Unordered (signaling)  */
+#define _X86_CMP_NEQ_US   0x14 /* Not-equal (unordered, signaling)  */
+#define _X86_CMP_NLT_UQ   0x15 /* Not-less-than (unordered, non-signaling)  */
+#define _X86_CMP_NLE_UQ   0x16 /* Not-less-than-or-equal (unord, non-signaling)  */
+#define _X86_CMP_ORD_S    0x17 /* Ordered (signaling)  */
+#define _X86_CMP_EQ_US    0x18 /* Equal (unordered, signaling)  */
+#define _X86_CMP_NGE_UQ   0x19 /* Not-greater-than-or-equal (unord, non-sign)  */
+#define _X86_CMP_NGT_UQ   0x1a /* Not-greater-than (unordered, non-signaling)  */
+#define _X86_CMP_FALSE_OS 0x1b /* False (ordered, signaling)  */
+#define _X86_CMP_NEQ_OS   0x1c /* Not-equal (ordered, signaling)  */
+#define _X86_CMP_GE_OQ    0x1d /* Greater-than-or-equal (ordered, non-signaling)  */
+#define _X86_CMP_GT_OQ    0x1e /* Greater-than (ordered, non-signaling)  */
+#define _X86_CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
+
+/*
+* Get comparison modifier from _mm_comi_round_sd/ss intrinsic
+* Return tuple <isOrdered, X86 condcode>
+*/
+static std::tuple<bool,unsigned> TranslateX86ConstCondToX86CC(SDValue &imm) {
+  ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(imm);
+  unsigned IntImm = CImm->getZExtValue();
+  // On a floating point condition, the flags are set as follows:
+  // ZF  PF  CF   op
+  //  0 | 0 | 0 | X > Y
+  //  0 | 0 | 1 | X < Y
+  //  1 | 0 | 0 | X == Y
+  //  1 | 1 | 1 | unordered
+  switch (IntImm) {
+  default: llvm_unreachable("Invalid floating point compare value for Comi!");
+  case _X86_CMP_EQ_OQ:      // 0x00 - Equal (ordered, nonsignaling)
+  case _X86_CMP_EQ_OS:      // 0x10 - Equal (ordered, signaling)
+    return std::make_tuple(true, X86::COND_E);
+  case _X86_CMP_EQ_UQ:      // 0x08 - Equal (unordered, non-signaling)
+  case _X86_CMP_EQ_US:      // 0x18 - Equal (unordered, signaling)
+    return std::make_tuple(false , X86::COND_E);
+  case _X86_CMP_LT_OS:      // 0x01 - Less-than (ordered, signaling)
+  case _X86_CMP_LT_OQ:      // 0x11 - Less-than (ordered, nonsignaling)
+    return std::make_tuple(true, X86::COND_B);
+  case _X86_CMP_NGE_US:     // 0x09 - Not-greater-than-or-equal (unordered, signaling)
+  case _X86_CMP_NGE_UQ:     // 0x19 - Not-greater-than-or-equal (unordered, nonsignaling)
+    return std::make_tuple(false , X86::COND_B);
+  case _X86_CMP_LE_OS:      // 0x02 - Less-than-or-equal (ordered, signaling)
+  case _X86_CMP_LE_OQ:      // 0x12 - Less-than-or-equal (ordered, nonsignaling)
+    return std::make_tuple(true, X86::COND_BE);
+  case _X86_CMP_NGT_US:     // 0x0A - Not-greater-than (unordered, signaling)
+  case _X86_CMP_NGT_UQ:     // 0x1A - Not-greater-than (unordered, nonsignaling)
+    return std::make_tuple(false, X86::COND_BE);
+  case _X86_CMP_GT_OS:      // 0x0E - Greater-than (ordered, signaling)
+  case _X86_CMP_GT_OQ:      // 0x1E - Greater-than (ordered, nonsignaling)
+    return std::make_tuple(true, X86::COND_A);
+  case _X86_CMP_NLE_US:     // 0x06 - Not-less-than-or-equal (unordered,signaling)
+  case _X86_CMP_NLE_UQ:     // 0x16 - Not-less-than-or-equal (unordered, nonsignaling)
+    return std::make_tuple(false, X86::COND_A);
+  case _X86_CMP_GE_OS:      // 0x0D - Greater-than-or-equal (ordered, signaling)
+  case _X86_CMP_GE_OQ:      // 0x1D - Greater-than-or-equal (ordered, nonsignaling)
+    return std::make_tuple(true, X86::COND_AE);
+  case _X86_CMP_NLT_US:     // 0x05 - Not-less-than (unordered, signaling)
+  case _X86_CMP_NLT_UQ:     // 0x15 - Not-less-than (unordered, nonsignaling)
+    return std::make_tuple(false, X86::COND_AE);
+  case _X86_CMP_NEQ_OQ:     // 0x0C - Not-equal (ordered, non-signaling)
+  case _X86_CMP_NEQ_OS:     // 0x1C - Not-equal (ordered, signaling)
+    return std::make_tuple(true, X86::COND_NE);
+  case _X86_CMP_NEQ_UQ:     // 0x04 - Not-equal (unordered, nonsignaling)
+  case _X86_CMP_NEQ_US:     // 0x14 - Not-equal (unordered, signaling)
+    return std::make_tuple(false, X86::COND_NE);
+  }
 }
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 3415cedc6fea..e186f7039b43 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -92,7 +92,6 @@ namespace llvm {
       SmallVector<MCFixup, 4> Fixups;
       raw_svector_ostream VecOS(Code);
       CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
-      VecOS.flush();
       CurrentShadowSize += Code.size();
       if (CurrentShadowSize >= RequiredShadowSize)
         InShadow = false; // The shadow is big enough. Stop counting.
@@ -128,7 +127,7 @@ MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
 /// operand to an MCSymbol.
 MCSymbol *X86MCInstLower::
 GetSymbolFromOperand(const MachineOperand &MO) const {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = MF.getDataLayout();
   assert((MO.isGlobal() || MO.isSymbol() || MO.isMBB()) && "Isn't a symbol reference");
 
   MCSymbol *Sym = nullptr;
@@ -151,7 +150,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
   }
 
   if (!Suffix.empty())
-    Name += DL->getPrivateGlobalPrefix();
+    Name += DL.getPrivateGlobalPrefix();
 
   unsigned PrefixLen = Name.size();
 
@@ -159,7 +158,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
     const GlobalValue *GV = MO.getGlobal();
     AsmPrinter.getNameWithPrefix(Name, GV);
   } else if (MO.isSymbol()) {
-    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), *DL);
+    Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
   } else if (MO.isMBB()) {
     assert(Suffix.empty());
     Sym = MO.getMBB()->getSymbol();
@@ -461,6 +460,7 @@ ReSimplify:
 
   // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
   // if one of the registers is extended, but other isn't.
+  case X86::VMOVZPQILo2PQIrr:
   case X86::VMOVAPDrr:
   case X86::VMOVAPDYrr:
   case X86::VMOVAPSrr:
@@ -478,18 +478,19 @@ ReSimplify:
       unsigned NewOpc;
       switch (OutMI.getOpcode()) {
       default: llvm_unreachable("Invalid opcode");
-      case X86::VMOVAPDrr:  NewOpc = X86::VMOVAPDrr_REV;  break;
-      case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
-      case X86::VMOVAPSrr:  NewOpc = X86::VMOVAPSrr_REV;  break;
-      case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
-      case X86::VMOVDQArr:  NewOpc = X86::VMOVDQArr_REV;  break;
-      case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
-      case X86::VMOVDQUrr:  NewOpc = X86::VMOVDQUrr_REV;  break;
-      case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
-      case X86::VMOVUPDrr:  NewOpc = X86::VMOVUPDrr_REV;  break;
-      case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
-      case X86::VMOVUPSrr:  NewOpc = X86::VMOVUPSrr_REV;  break;
-      case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
+      case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr;   break;
+      case X86::VMOVAPDrr:        NewOpc = X86::VMOVAPDrr_REV;  break;
+      case X86::VMOVAPDYrr:       NewOpc = X86::VMOVAPDYrr_REV; break;
+      case X86::VMOVAPSrr:        NewOpc = X86::VMOVAPSrr_REV;  break;
+      case X86::VMOVAPSYrr:       NewOpc = X86::VMOVAPSYrr_REV; break;
+      case X86::VMOVDQArr:        NewOpc = X86::VMOVDQArr_REV;  break;
+      case X86::VMOVDQAYrr:       NewOpc = X86::VMOVDQAYrr_REV; break;
+      case X86::VMOVDQUrr:        NewOpc = X86::VMOVDQUrr_REV;  break;
+      case X86::VMOVDQUYrr:       NewOpc = X86::VMOVDQUYrr_REV; break;
+      case X86::VMOVUPDrr:        NewOpc = X86::VMOVUPDrr_REV;  break;
+      case X86::VMOVUPDYrr:       NewOpc = X86::VMOVUPDYrr_REV; break;
+      case X86::VMOVUPSrr:        NewOpc = X86::VMOVUPSrr_REV;  break;
+      case X86::VMOVUPSYrr:       NewOpc = X86::VMOVUPSYrr_REV; break;
       }
       OutMI.setOpcode(NewOpc);
     }
@@ -532,6 +533,23 @@ ReSimplify:
     break;
   }
 
+  case X86::CLEANUPRET: {
+    // Replace CATCHRET with the appropriate RET.
+    OutMI = MCInst();
+    OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
+    break;
+  }
+
+  case X86::CATCHRET: {
+    // Replace CATCHRET with the appropriate RET.
+    const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
+    unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+    OutMI = MCInst();
+    OutMI.setOpcode(getRetOpcode(Subtarget));
+    OutMI.addOperand(MCOperand::createReg(ReturnReg));
+    break;
+  }
+
   // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
   case X86::TAILJMPr:
   case X86::TAILJMPd:
@@ -598,17 +616,29 @@ ReSimplify:
   case X86::RELEASE_MOV32mi:   OutMI.setOpcode(X86::MOV32mi); goto ReSimplify;
   case X86::RELEASE_MOV64mi32: OutMI.setOpcode(X86::MOV64mi32); goto ReSimplify;
   case X86::RELEASE_ADD8mi:    OutMI.setOpcode(X86::ADD8mi); goto ReSimplify;
+  case X86::RELEASE_ADD8mr:    OutMI.setOpcode(X86::ADD8mr); goto ReSimplify;
   case X86::RELEASE_ADD32mi:   OutMI.setOpcode(X86::ADD32mi); goto ReSimplify;
+  case X86::RELEASE_ADD32mr:   OutMI.setOpcode(X86::ADD32mr); goto ReSimplify;
   case X86::RELEASE_ADD64mi32: OutMI.setOpcode(X86::ADD64mi32); goto ReSimplify;
+  case X86::RELEASE_ADD64mr:   OutMI.setOpcode(X86::ADD64mr); goto ReSimplify;
   case X86::RELEASE_AND8mi:    OutMI.setOpcode(X86::AND8mi); goto ReSimplify;
+  case X86::RELEASE_AND8mr:    OutMI.setOpcode(X86::AND8mr); goto ReSimplify;
   case X86::RELEASE_AND32mi:   OutMI.setOpcode(X86::AND32mi); goto ReSimplify;
+  case X86::RELEASE_AND32mr:   OutMI.setOpcode(X86::AND32mr); goto ReSimplify;
   case X86::RELEASE_AND64mi32: OutMI.setOpcode(X86::AND64mi32); goto ReSimplify;
+  case X86::RELEASE_AND64mr:   OutMI.setOpcode(X86::AND64mr); goto ReSimplify;
   case X86::RELEASE_OR8mi:     OutMI.setOpcode(X86::OR8mi); goto ReSimplify;
+  case X86::RELEASE_OR8mr:     OutMI.setOpcode(X86::OR8mr); goto ReSimplify;
   case X86::RELEASE_OR32mi:    OutMI.setOpcode(X86::OR32mi); goto ReSimplify;
+  case X86::RELEASE_OR32mr:    OutMI.setOpcode(X86::OR32mr); goto ReSimplify;
   case X86::RELEASE_OR64mi32:  OutMI.setOpcode(X86::OR64mi32); goto ReSimplify;
+  case X86::RELEASE_OR64mr:    OutMI.setOpcode(X86::OR64mr); goto ReSimplify;
   case X86::RELEASE_XOR8mi:    OutMI.setOpcode(X86::XOR8mi); goto ReSimplify;
+  case X86::RELEASE_XOR8mr:    OutMI.setOpcode(X86::XOR8mr); goto ReSimplify;
   case X86::RELEASE_XOR32mi:   OutMI.setOpcode(X86::XOR32mi); goto ReSimplify;
+  case X86::RELEASE_XOR32mr:   OutMI.setOpcode(X86::XOR32mr); goto ReSimplify;
   case X86::RELEASE_XOR64mi32: OutMI.setOpcode(X86::XOR64mi32); goto ReSimplify;
+  case X86::RELEASE_XOR64mr:   OutMI.setOpcode(X86::XOR64mr); goto ReSimplify;
   case X86::RELEASE_INC8m:     OutMI.setOpcode(X86::INC8m); goto ReSimplify;
   case X86::RELEASE_INC16m:    OutMI.setOpcode(X86::INC16m); goto ReSimplify;
   case X86::RELEASE_INC32m:    OutMI.setOpcode(X86::INC32m); goto ReSimplify;
@@ -875,7 +905,10 @@ void X86AsmPrinter::LowerFAULTING_LOAD_OP(const MachineInstr &MI,
 
   MCInst LoadMI;
   LoadMI.setOpcode(LoadOpcode);
-  LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
+  if (LoadDefRegister != X86::NoRegister)
+    LoadMI.addOperand(MCOperand::createReg(LoadDefRegister));
+
   for (auto I = MI.operands_begin() + LoadOperandsBeginIdx,
             E = MI.operands_end();
        I != E; ++I)
@@ -1062,6 +1095,18 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
                             X86ATTInstPrinter::getRegisterName(Reg));
     break;
   }
+  case X86::CLEANUPRET: {
+    // Lower these as normal, but add some comments.
+    OutStreamer->AddComment("CLEANUPRET");
+    break;
+  }
+
+  case X86::CATCHRET: {
+    // Lower these as normal, but add some comments.
+    OutStreamer->AddComment("CATCHRET");
+    break;
+  }
+
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
@@ -1095,12 +1140,30 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32)
       .addExpr(MCSymbolRefExpr::create(PICBase, OutContext)));
 
+    const X86FrameLowering* FrameLowering =
+        MF->getSubtarget<X86Subtarget>().getFrameLowering();
+    bool hasFP = FrameLowering->hasFP(*MF);
+    
+    // TODO: This is needed only if we require precise CFA.
+    bool HasActiveDwarfFrame = OutStreamer->getNumFrameInfos() &&
+                               !OutStreamer->getDwarfFrameInfos().back().End;
+
+    int stackGrowth = -RI->getSlotSize();
+
+    if (HasActiveDwarfFrame && !hasFP) {
+      OutStreamer->EmitCFIAdjustCfaOffset(-stackGrowth);
+    }
+
     // Emit the label.
     OutStreamer->EmitLabel(PICBase);
 
     // popl $reg
     EmitAndCountInstruction(MCInstBuilder(X86::POP32r)
                             .addReg(MI->getOperand(0).getReg()));
+
+    if (HasActiveDwarfFrame && !hasFP) {
+      OutStreamer->EmitCFIAdjustCfaOffset(stackGrowth);
+    }
     return;
   }
 
@@ -1206,19 +1269,48 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
-    // Lower PSHUFB and VPERMILP normally but add a comment if we can find
-    // a constant shuffle mask. We won't be able to do this at the MC layer
-    // because the mask isn't an immediate.
+  // Lower PSHUFB and VPERMILP normally but add a comment if we can find
+  // a constant shuffle mask. We won't be able to do this at the MC layer
+  // because the mask isn't an immediate.
   case X86::PSHUFBrm:
   case X86::VPSHUFBrm:
-  case X86::VPSHUFBYrm: {
+  case X86::VPSHUFBYrm:
+  case X86::VPSHUFBZ128rm:
+  case X86::VPSHUFBZ128rmk:
+  case X86::VPSHUFBZ128rmkz:
+  case X86::VPSHUFBZ256rm:
+  case X86::VPSHUFBZ256rmk:
+  case X86::VPSHUFBZ256rmkz:
+  case X86::VPSHUFBZrm:
+  case X86::VPSHUFBZrmk:
+  case X86::VPSHUFBZrmkz: {
     if (!OutStreamer->isVerboseAsm())
       break;
-    assert(MI->getNumOperands() > 5 &&
-           "We should always have at least 5 operands!");
+    unsigned SrcIdx, MaskIdx;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Invalid opcode");
+    case X86::PSHUFBrm:
+    case X86::VPSHUFBrm:
+    case X86::VPSHUFBYrm:
+    case X86::VPSHUFBZ128rm:
+    case X86::VPSHUFBZ256rm:
+    case X86::VPSHUFBZrm:
+      SrcIdx = 1; MaskIdx = 5; break;
+    case X86::VPSHUFBZ128rmkz:
+    case X86::VPSHUFBZ256rmkz:
+    case X86::VPSHUFBZrmkz:
+      SrcIdx = 2; MaskIdx = 6; break;
+    case X86::VPSHUFBZ128rmk:
+    case X86::VPSHUFBZ256rmk:
+    case X86::VPSHUFBZrmk:
+      SrcIdx = 3; MaskIdx = 7; break;
+    }
+
+    assert(MI->getNumOperands() >= 6 &&
+           "We should always have at least 6 operands!");
     const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp = MI->getOperand(1);
-    const MachineOperand &MaskOp = MI->getOperand(5);
+    const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
+    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
 
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
       SmallVector<int, 16> Mask;
@@ -1240,35 +1332,53 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MachineOperand &SrcOp = MI->getOperand(1);
     const MachineOperand &MaskOp = MI->getOperand(5);
 
+    unsigned ElSize;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Invalid opcode");
+    case X86::VPERMILPSrm: case X86::VPERMILPSYrm: ElSize = 32; break;
+    case X86::VPERMILPDrm: case X86::VPERMILPDYrm: ElSize = 64; break;
+    }
+
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
       SmallVector<int, 16> Mask;
-      DecodeVPERMILPMask(C, Mask);
+      DecodeVPERMILPMask(C, ElSize, Mask);
       if (!Mask.empty())
         OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, Mask));
     }
     break;
   }
 
-    // For loads from a constant pool to a vector register, print the constant
-    // loaded.
-  case X86::MOVAPDrm:
-  case X86::VMOVAPDrm:
-  case X86::VMOVAPDYrm:
-  case X86::MOVUPDrm:
-  case X86::VMOVUPDrm:
-  case X86::VMOVUPDYrm:
-  case X86::MOVAPSrm:
-  case X86::VMOVAPSrm:
-  case X86::VMOVAPSYrm:
-  case X86::MOVUPSrm:
-  case X86::VMOVUPSrm:
-  case X86::VMOVUPSYrm:
-  case X86::MOVDQArm:
-  case X86::VMOVDQArm:
-  case X86::VMOVDQAYrm:
-  case X86::MOVDQUrm:
-  case X86::VMOVDQUrm:
-  case X86::VMOVDQUYrm:
+#define MOV_CASE(Prefix, Suffix)        \
+  case X86::Prefix##MOVAPD##Suffix##rm: \
+  case X86::Prefix##MOVAPS##Suffix##rm: \
+  case X86::Prefix##MOVUPD##Suffix##rm: \
+  case X86::Prefix##MOVUPS##Suffix##rm: \
+  case X86::Prefix##MOVDQA##Suffix##rm: \
+  case X86::Prefix##MOVDQU##Suffix##rm:
+
+#define MOV_AVX512_CASE(Suffix)         \
+  case X86::VMOVDQA64##Suffix##rm:      \
+  case X86::VMOVDQA32##Suffix##rm:      \
+  case X86::VMOVDQU64##Suffix##rm:      \
+  case X86::VMOVDQU32##Suffix##rm:      \
+  case X86::VMOVDQU16##Suffix##rm:      \
+  case X86::VMOVDQU8##Suffix##rm:       \
+  case X86::VMOVAPS##Suffix##rm:        \
+  case X86::VMOVAPD##Suffix##rm:        \
+  case X86::VMOVUPS##Suffix##rm:        \
+  case X86::VMOVUPD##Suffix##rm:
+
+#define CASE_ALL_MOV_RM()               \
+  MOV_CASE(, )   /* SSE */              \
+  MOV_CASE(V, )  /* AVX-128 */          \
+  MOV_CASE(V, Y) /* AVX-256 */          \
+  MOV_AVX512_CASE(Z)                    \
+  MOV_AVX512_CASE(Z256)                 \
+  MOV_AVX512_CASE(Z128)
+
+  // For loads from a constant pool to a vector register, print the constant
+  // loaded.
+  CASE_ALL_MOV_RM()
     if (!OutStreamer->isVerboseAsm())
       break;
     if (MI->getNumOperands() > 4)
@@ -1302,7 +1412,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
           if (isa<UndefValue>(COp)) {
             CS << "u";
           } else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
-            CS << CI->getZExtValue();
+            if (CI->getBitWidth() <= 64) {
+              CS << CI->getZExtValue();
+            } else {
+              // print multi-word constant as (w0,w1)
+              auto Val = CI->getValue();
+              CS << "(";
+              for (int i = 0, N = Val.getNumWords(); i < N; ++i) {
+                if (i > 0)
+                  CS << ",";
+                CS << Val.getRawData()[i];
+              }
+              CS << ")";
+            }
           } else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
             SmallString<32> Str;
             CF->getValueAPF().toString(Str);
diff --git a/lib/Target/X86/X86MachineFunctionInfo.cpp b/lib/Target/X86/X86MachineFunctionInfo.cpp
index ac2cdc8c6567..c9e636f1eb00 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.cpp
+++ b/lib/Target/X86/X86MachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.cpp - X86 machine function info -------------===//
+//===-- X86MachineFunctionInfo.cpp - X86 machine function info ------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index e6db9708b677..3a7a98db50f4 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+//===-- X86MachineFunctionInfo.h - X86 machine function info ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -84,8 +84,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// of pushes to pass function parameters.
   bool HasPushSequences = false;
 
-  /// True if the function uses llvm.x86.seh.restoreframe, and it needed a spill
-  /// slot for the frame pointer.
+  /// True if the function recovers from an SEH exception, and therefore needs
+  /// to spill and restore the frame pointer.
   bool HasSEHFramePtrSave = false;
 
   /// The frame index of a stack object containing the original frame pointer
@@ -100,7 +100,7 @@ private:
 public:
   X86MachineFunctionInfo() = default;
 
-  explicit X86MachineFunctionInfo(MachineFunction &MF) {};
+  explicit X86MachineFunctionInfo(MachineFunction &MF) {}
 
   bool getForceFramePointer() const { return ForceFramePointer;}
   void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
new file mode 100644
index 000000000000..58020d909a43
--- /dev/null
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -0,0 +1,326 @@
+//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass that performs some optimizations with LEA
+// instructions in order to improve code size.
+// Currently, it does one thing:
+// 1) Address calculations in load and store instructions are replaced by
+//    existing LEA def registers where possible.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-optimize-LEAs"
+
+static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
+                                     cl::desc("X86: Enable LEA optimizations."),
+                                     cl::init(false));
+
+STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+
+namespace {
+class OptimizeLEAPass : public MachineFunctionPass {
+public:
+  OptimizeLEAPass() : MachineFunctionPass(ID) {}
+
+  const char *getPassName() const override { return "X86 LEA Optimize"; }
+
+  /// \brief Loop over all of the basic blocks, replacing address
+  /// calculations in load and store instructions, if it's already
+  /// been calculated by LEA. Also, remove redundant LEAs.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  /// \brief Returns a distance between two instructions inside one basic block.
+  /// Negative result means, that instructions occur in reverse order.
+  int calcInstrDist(const MachineInstr &First, const MachineInstr &Last);
+
+  /// \brief Choose the best \p LEA instruction from the \p List to replace
+  /// address calculation in \p MI instruction. Return the address displacement
+  /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift
+  /// and \p Dist.
+  bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+                     const MachineInstr &MI, MachineInstr *&LEA,
+                     int64_t &AddrDispShift, int &Dist);
+
+  /// \brief Returns true if two machine operand are identical and they are not
+  /// physical registers.
+  bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2);
+
+  /// \brief Returns true if the instruction is LEA.
+  bool isLEA(const MachineInstr &MI);
+
+  /// \brief Returns true if two instructions have memory operands that only
+  /// differ by displacement. The numbers of the first memory operands for both
+  /// instructions are specified through \p N1 and \p N2. The address
+  /// displacement is returned through AddrDispShift.
+  bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+                      const MachineInstr &MI2, unsigned N2,
+                      int64_t &AddrDispShift);
+
+  /// \brief Find all LEA instructions in the basic block.
+  void findLEAs(const MachineBasicBlock &MBB,
+                SmallVectorImpl<MachineInstr *> &List);
+
+  /// \brief Removes redundant address calculations.
+  bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
+
+  MachineRegisterInfo *MRI;
+  const X86InstrInfo *TII;
+  const X86RegisterInfo *TRI;
+
+  static char ID;
+};
+char OptimizeLEAPass::ID = 0;
+}
+
+FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
+
+int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
+                                   const MachineInstr &Last) {
+  const MachineBasicBlock *MBB = First.getParent();
+
+  // Both instructions must be in the same basic block.
+  assert(Last.getParent() == MBB &&
+         "Instructions are in different basic blocks");
+
+  return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
+         std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+}
+
+// Find the best LEA instruction in the List to replace address recalculation in
+// MI. Such LEA must meet these requirements:
+// 1) The address calculated by the LEA differs only by the displacement from
+//    the address used in MI.
+// 2) The register class of the definition of the LEA is compatible with the
+//    register class of the address base register of MI.
+// 3) Displacement of the new memory operand should fit in 1 byte if possible.
+// 4) The LEA should be as close to MI as possible, and prior to it if
+//    possible.
+bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
+                                    const MachineInstr &MI, MachineInstr *&LEA,
+                                    int64_t &AddrDispShift, int &Dist) {
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const MCInstrDesc &Desc = MI.getDesc();
+  int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+                X86II::getOperandBias(Desc);
+
+  LEA = nullptr;
+
+  // Loop over all LEA instructions.
+  for (auto DefMI : List) {
+    int64_t AddrDispShiftTemp = 0;
+
+    // Compare instructions memory operands.
+    if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp))
+      continue;
+
+    // Make sure address displacement fits 4 bytes.
+    if (!isInt<32>(AddrDispShiftTemp))
+      continue;
+
+    // Check that LEA def register can be used as MI address base. Some
+    // instructions can use a limited set of registers as address base, for
+    // example MOV8mr_NOREX. We could constrain the register class of the LEA
+    // def to suit MI, however since this case is very rare and hard to
+    // reproduce in a test it's just more reliable to skip the LEA.
+    if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) !=
+        MRI->getRegClass(DefMI->getOperand(0).getReg()))
+      continue;
+
+    // Choose the closest LEA instruction from the list, prior to MI if
+    // possible. Note that we took into account resulting address displacement
+    // as well. Also note that the list is sorted by the order in which the LEAs
+    // occur, so the break condition is pretty simple.
+    int DistTemp = calcInstrDist(*DefMI, MI);
+    assert(DistTemp != 0 &&
+           "The distance between two different instructions cannot be zero");
+    if (DistTemp > 0 || LEA == nullptr) {
+      // Do not update return LEA, if the current one provides a displacement
+      // which fits in 1 byte, while the new candidate does not.
+      if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) &&
+          isInt<8>(AddrDispShift))
+        continue;
+
+      LEA = DefMI;
+      AddrDispShift = AddrDispShiftTemp;
+      Dist = DistTemp;
+    }
+
+    // FIXME: Maybe we should not always stop at the first LEA after MI.
+    if (DistTemp < 0)
+      break;
+  }
+
+  return LEA != nullptr;
+}
+
+bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1,
+                                    const MachineOperand &MO2) {
+  return MO1.isIdenticalTo(MO2) &&
+         (!MO1.isReg() ||
+          !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
+}
+
+bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
+  unsigned Opcode = MI.getOpcode();
+  return Opcode == X86::LEA16r || Opcode == X86::LEA32r ||
+         Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
+}
+
+// Check if MI1 and MI2 have memory operands which represent addresses that
+// differ only by displacement.
+bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
+                                     const MachineInstr &MI2, unsigned N2,
+                                     int64_t &AddrDispShift) {
+  // Address base, scale, index and segment operands must be identical.
+  static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt,
+                                        X86::AddrIndexReg, X86::AddrSegmentReg};
+  for (auto &N : IdenticalOpNums)
+    if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N)))
+      return false;
+
+  // Address displacement operands may differ by a constant.
+  const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp);
+  const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp);
+  if (!isIdenticalOp(*Op1, *Op2)) {
+    if (Op1->isImm() && Op2->isImm())
+      AddrDispShift = Op1->getImm() - Op2->getImm();
+    else if (Op1->isGlobal() && Op2->isGlobal() &&
+             Op1->getGlobal() == Op2->getGlobal())
+      AddrDispShift = Op1->getOffset() - Op2->getOffset();
+    else
+      return false;
+  }
+
+  return true;
+}
+
+void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
+                               SmallVectorImpl<MachineInstr *> &List) {
+  for (auto &MI : MBB) {
+    if (isLEA(MI))
+      List.push_back(const_cast<MachineInstr *>(&MI));
+  }
+}
+
+// Try to find load and store instructions which recalculate addresses already
+// calculated by some LEA and replace their memory operands with its def
+// register.
+bool OptimizeLEAPass::removeRedundantAddrCalc(
+    const SmallVectorImpl<MachineInstr *> &List) {
+  bool Changed = false;
+
+  assert(List.size() > 0);
+  MachineBasicBlock *MBB = List[0]->getParent();
+
+  // Process all instructions in basic block.
+  for (auto I = MBB->begin(), E = MBB->end(); I != E;) {
+    MachineInstr &MI = *I++;
+    unsigned Opcode = MI.getOpcode();
+
+    // Instruction must be load or store.
+    if (!MI.mayLoadOrStore())
+      continue;
+
+    // Get the number of the first memory operand.
+    const MCInstrDesc &Desc = MI.getDesc();
+    int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode);
+
+    // If instruction has no memory operand - skip it.
+    if (MemOpNo < 0)
+      continue;
+
+    MemOpNo += X86II::getOperandBias(Desc);
+
+    // Get the best LEA instruction to replace address calculation.
+    MachineInstr *DefMI;
+    int64_t AddrDispShift;
+    int Dist;
+    if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist))
+      continue;
+
+    // If LEA occurs before current instruction, we can freely replace
+    // the instruction. If LEA occurs after, we can lift LEA above the
+    // instruction and this way to be able to replace it. Since LEA and the
+    // instruction have similar memory operands (thus, the same def
+    // instructions for these operands), we can always do that, without
+    // worries of using registers before their defs.
+    if (Dist < 0) {
+      DefMI->removeFromParent();
+      MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+    }
+
+    // Since we can possibly extend register lifetime, clear kill flags.
+    MRI->clearKillFlags(DefMI->getOperand(0).getReg());
+
+    ++NumSubstLEAs;
+    DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump(););
+
+    // Change instruction operands.
+    MI.getOperand(MemOpNo + X86::AddrBaseReg)
+        .ChangeToRegister(DefMI->getOperand(0).getReg(), false);
+    MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1);
+    MI.getOperand(MemOpNo + X86::AddrIndexReg)
+        .ChangeToRegister(X86::NoRegister, false);
+    MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift);
+    MI.getOperand(MemOpNo + X86::AddrSegmentReg)
+        .ChangeToRegister(X86::NoRegister, false);
+
+    DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump(););
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+
+  // Perform this optimization only if we care about code size.
+  if (!EnableX86LEAOpt || !MF.getFunction()->optForSize())
+    return false;
+
+  MRI = &MF.getRegInfo();
+  TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+  TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
+
+  // Process all basic blocks.
+  for (auto &MBB : MF) {
+    SmallVector<MachineInstr *, 16> LEAs;
+
+    // Find all LEA instructions in basic block.
+    findLEAs(MBB, LEAs);
+
+    // If current basic block has no LEAs, move on to the next one.
+    if (LEAs.empty())
+      continue;
+
+    // Remove redundant address calculations.
+    Changed |= removeRedundantAddrCalc(LEAs);
+  }
+
+  return Changed;
+}
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 143e70bda9e7..0f425e28fa7d 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -93,8 +93,7 @@ FunctionPass *llvm::createX86PadShortFunctions() {
 /// runOnMachineFunction - Loop over all of the basic blocks, inserting
 /// NOOP instructions before early exits.
 bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
-  if (MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize) ||
-      MF.getFunction()->hasFnAttribute(Attribute::MinSize)) {
+  if (MF.getFunction()->optForSize()) {
     return false;
   }
 
@@ -107,7 +106,7 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
   // Search through basic blocks and mark the ones that have early returns
   ReturnBBs.clear();
   VisitedBBs.clear();
-  findReturns(MF.begin());
+  findReturns(&MF.front());
 
   bool MadeChange = false;
 
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index d8495e53e0e3..58404433e1ae 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -27,7 +27,6 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
@@ -44,12 +43,6 @@ using namespace llvm;
 #define GET_REGINFO_TARGET_DESC
 #include "X86GenRegisterInfo.inc"
 
-cl::opt<bool>
-ForceStackAlign("force-align-stack",
-                 cl::desc("Force align the stack to the minimum alignment"
-                           " needed for the function."),
-                 cl::init(false), cl::Hidden);
-
 static cl::opt<bool>
 EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
           cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -174,20 +167,33 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF,
     if (Subtarget.isTarget64BitLP64())
       return &X86::GR64_NOSPRegClass;
     return &X86::GR32_NOSPRegClass;
-  case 2: // Available for tailcall (not callee-saved GPRs).
-    const Function *F = MF.getFunction();
-    if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
-      return &X86::GR64_TCW64RegClass;
-    else if (Is64Bit)
-      return &X86::GR64_TCRegClass;
-
-    bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
-    if (hasHipeCC)
-      return &X86::GR32RegClass;
-    return &X86::GR32_TCRegClass;
+  case 2: // NOREX GPRs.
+    if (Subtarget.isTarget64BitLP64())
+      return &X86::GR64_NOREXRegClass;
+    return &X86::GR32_NOREXRegClass;
+  case 3: // NOREX GPRs except the stack pointer (for encoding reasons).
+    if (Subtarget.isTarget64BitLP64())
+      return &X86::GR64_NOREX_NOSPRegClass;
+    return &X86::GR32_NOREX_NOSPRegClass;
+  case 4: // Available for tailcall (not callee-saved GPRs).
+    return getGPRsForTailCall(MF);
   }
 }
 
+const TargetRegisterClass *
+X86RegisterInfo::getGPRsForTailCall(const MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  if (IsWin64 || (F && F->getCallingConv() == CallingConv::X86_64_Win64))
+    return &X86::GR64_TCW64RegClass;
+  else if (Is64Bit)
+    return &X86::GR64_TCRegClass;
+
+  bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
+  if (hasHipeCC)
+    return &X86::GR32RegClass;
+  return &X86::GR32_TCRegClass;
+}
+
 const TargetRegisterClass *
 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
   if (RC == &X86::CCRRegClass) {
@@ -222,6 +228,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 const MCPhysReg *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
+  bool HasSSE = Subtarget.hasSSE1();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
   bool CallsEHReturn = MF->getMMI().callsEHReturn();
@@ -241,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (HasAVX)
       return CSR_64_RT_AllRegs_AVX_SaveList;
     return CSR_64_RT_AllRegs_SaveList;
+  case CallingConv::CXX_FAST_TLS:
+    if (Is64Bit)
+      return CSR_64_TLS_Darwin_SaveList;
+    break;
   case CallingConv::Intel_OCL_BI: {
     if (HasAVX512 && IsWin64)
       return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;
@@ -254,6 +265,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
       return CSR_64_Intel_OCL_BI_SaveList;
     break;
   }
+  case CallingConv::HHVM:
+    return CSR_64_HHVM_SaveList;
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_SaveList;
@@ -264,6 +277,18 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
     if (CallsEHReturn)
       return CSR_64EHRet_SaveList;
     return CSR_64_SaveList;
+  case CallingConv::X86_INTR:
+    if (Is64Bit) {
+      if (HasAVX)
+        return CSR_64_AllRegs_AVX_SaveList;
+      else
+        return CSR_64_AllRegs_SaveList;
+    } else {
+      if (HasSSE)
+        return CSR_32_AllRegs_SSE_SaveList;
+      else
+        return CSR_32_AllRegs_SaveList;
+    }
   default:
     break;
   }
@@ -284,6 +309,7 @@ const uint32_t *
 X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
                                       CallingConv::ID CC) const {
   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+  bool HasSSE = Subtarget.hasSSE1();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
 
@@ -301,6 +327,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
     if (HasAVX)
       return CSR_64_RT_AllRegs_AVX_RegMask;
     return CSR_64_RT_AllRegs_RegMask;
+  case CallingConv::CXX_FAST_TLS:
+    if (Is64Bit)
+      return CSR_64_TLS_Darwin_RegMask;
+    break;
   case CallingConv::Intel_OCL_BI: {
     if (HasAVX512 && IsWin64)
       return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;
@@ -314,16 +344,30 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
       return CSR_64_Intel_OCL_BI_RegMask;
     break;
   }
+  case CallingConv::HHVM:
+    return CSR_64_HHVM_RegMask;
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_RegMask;
     break;
-  default:
-    break;
   case CallingConv::X86_64_Win64:
     return CSR_Win64_RegMask;
   case CallingConv::X86_64_SysV:
     return CSR_64_RegMask;
+  case CallingConv::X86_INTR:
+    if (Is64Bit) {
+      if (HasAVX)
+        return CSR_64_AllRegs_AVX_RegMask;
+      else
+        return CSR_64_AllRegs_RegMask;
+    } else {
+      if (HasSSE)
+        return CSR_32_AllRegs_SSE_RegMask;
+      else
+        return CSR_32_AllRegs_RegMask;
+    }
+    default:
+      break;
   }
 
   // Unlike getCalleeSavedRegs(), we don't have MMI so we can't check
@@ -341,6 +385,10 @@ X86RegisterInfo::getNoPreservedMask() const {
   return CSR_NoRegs_RegMask;
 }
 
+const uint32_t *X86RegisterInfo::getDarwinTLSCallPreservedMask() const {
+  return CSR_64_TLS_Darwin_RegMask;
+}
+
 BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   BitVector Reserved(getNumRegs());
   const X86FrameLowering *TFI = getFrameLowering(MF);
@@ -371,8 +419,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
         "Stack realignment in presence of dynamic allocas is not supported with"
         "this calling convention.");
 
-    unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), MVT::i64,
-                                              false);
+    unsigned BasePtr = getX86SubSuperRegister(getBaseRegister(), 64);
     for (MCSubRegIterator I(BasePtr, this, /*IncludeSelf=*/true);
          I.isValid(); ++I)
       Reserved.set(*I);
@@ -439,6 +486,10 @@ void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
+static bool CantUseSP(const MachineFrameInfo *MFI) {
+  return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
+}
+
 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
    const MachineFrameInfo *MFI = MF.getFrameInfo();
 
@@ -451,13 +502,11 @@ bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
    // reference locals while also adjusting the stack pointer.  When we can't
    // use both the SP and the FP, we need a separate base pointer register.
    bool CantUseFP = needsStackRealignment(MF);
-   bool CantUseSP =
-       MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
-   return CantUseFP && CantUseSP;
+   return CantUseFP && CantUseSP(MFI);
 }
 
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
-  if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+  if (!TargetRegisterInfo::canRealignStack(MF))
     return false;
 
   const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -470,26 +519,11 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
   // If a base pointer is necessary.  Check that it isn't too late to reserve
   // it.
-  if (MFI->hasVarSizedObjects())
+  if (CantUseSP(MFI))
     return MRI->canReserveReg(BasePtr);
   return true;
 }
 
-bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const X86FrameLowering *TFI = getFrameLowering(MF);
-  const Function *F = MF.getFunction();
-  unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
-                              F->hasFnAttribute(Attribute::StackAlignment));
-
-  // If we've requested that we force align the stack do so now.
-  if (ForceStackAlign)
-    return canRealignStack(MF);
-
-  return requiresRealignment && canRealignStack(MF);
-}
-
 bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
                                            unsigned Reg, int &FrameIdx) const {
   // Since X86 defines assignCalleeSavedSpillSlots which always return true
@@ -510,6 +544,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   unsigned Opc = MI.getOpcode();
   bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm ||
                     Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64;
+
   if (hasBasePointer(MF))
     BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister());
   else if (needsStackRealignment(MF))
@@ -524,14 +559,11 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // offset is from the traditional base pointer location.  On 64-bit, the
   // offset is from the SP at the end of the prologue, not the FP location. This
   // matches the behavior of llvm.frameaddress.
+  unsigned IgnoredFrameReg;
   if (Opc == TargetOpcode::LOCAL_ESCAPE) {
     MachineOperand &FI = MI.getOperand(FIOperandNum);
-    bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
     int Offset;
-    if (IsWinEH)
-      Offset = TFI->getFrameIndexOffsetFromSP(MF, FrameIndex);
-    else
-      Offset = TFI->getFrameIndexOffset(MF, FrameIndex);
+    Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
     FI.ChangeToImmediate(Offset);
     return;
   }
@@ -540,7 +572,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // register as source operand, semantic is the same and destination is
   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
-    BasePtr = getX86SubSuperRegister(BasePtr, MVT::i64, false);
+    BasePtr = getX86SubSuperRegister(BasePtr, 64);
 
   // This must be part of a four operand memory reference.  Replace the
   // FrameIndex with base register with EBP.  Add an offset to the offset.
@@ -553,7 +585,7 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     const MachineFrameInfo *MFI = MF.getFrameInfo();
     FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
   } else
-    FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+    FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
 
   if (BasePtr == StackPtr)
     FIOffset += SPAdj;
@@ -592,193 +624,11 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
   const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
   unsigned FrameReg = getFrameRegister(MF);
   if (Subtarget.isTarget64BitILP32())
-    FrameReg = getX86SubSuperRegister(FrameReg, MVT::i32, false);
+    FrameReg = getX86SubSuperRegister(FrameReg, 32);
   return FrameReg;
 }
 
-namespace llvm {
-unsigned getX86SubSuperRegisterOrZero(unsigned Reg, MVT::SimpleValueType VT,
-                                      bool High) {
-  switch (VT) {
-  default: return 0;
-  case MVT::i8:
-    if (High) {
-      switch (Reg) {
-      default: return getX86SubSuperRegister(Reg, MVT::i64);
-      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-        return X86::SI;
-      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-        return X86::DI;
-      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-        return X86::BP;
-      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-        return X86::SP;
-      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
-        return X86::AH;
-      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
-        return X86::DH;
-      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
-        return X86::CH;
-      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
-        return X86::BH;
-      }
-    } else {
-      switch (Reg) {
-      default: return 0;
-      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
-        return X86::AL;
-      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
-        return X86::DL;
-      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
-        return X86::CL;
-      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
-        return X86::BL;
-      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-        return X86::SIL;
-      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-        return X86::DIL;
-      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-        return X86::BPL;
-      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-        return X86::SPL;
-      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
-        return X86::R8B;
-      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
-        return X86::R9B;
-      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
-        return X86::R10B;
-      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
-        return X86::R11B;
-      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
-        return X86::R12B;
-      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
-        return X86::R13B;
-      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
-        return X86::R14B;
-      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
-        return X86::R15B;
-      }
-    }
-  case MVT::i16:
-    switch (Reg) {
-    default: return 0;
-    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
-      return X86::AX;
-    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
-      return X86::DX;
-    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
-      return X86::CX;
-    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
-      return X86::BX;
-    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-      return X86::SI;
-    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-      return X86::DI;
-    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-      return X86::BP;
-    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-      return X86::SP;
-    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
-      return X86::R8W;
-    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
-      return X86::R9W;
-    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
-      return X86::R10W;
-    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
-      return X86::R11W;
-    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
-      return X86::R12W;
-    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
-      return X86::R13W;
-    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
-      return X86::R14W;
-    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
-      return X86::R15W;
-    }
-  case MVT::i32:
-    switch (Reg) {
-    default: return 0;
-    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
-      return X86::EAX;
-    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
-      return X86::EDX;
-    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
-      return X86::ECX;
-    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
-      return X86::EBX;
-    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-      return X86::ESI;
-    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-      return X86::EDI;
-    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-      return X86::EBP;
-    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-      return X86::ESP;
-    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
-      return X86::R8D;
-    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
-      return X86::R9D;
-    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
-      return X86::R10D;
-    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
-      return X86::R11D;
-    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
-      return X86::R12D;
-    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
-      return X86::R13D;
-    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
-      return X86::R14D;
-    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
-      return X86::R15D;
-    }
-  case MVT::i64:
-    switch (Reg) {
-    default: return 0;
-    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
-      return X86::RAX;
-    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
-      return X86::RDX;
-    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
-      return X86::RCX;
-    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
-      return X86::RBX;
-    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
-      return X86::RSI;
-    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
-      return X86::RDI;
-    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
-      return X86::RBP;
-    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
-      return X86::RSP;
-    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
-      return X86::R8;
-    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
-      return X86::R9;
-    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
-      return X86::R10;
-    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
-      return X86::R11;
-    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
-      return X86::R12;
-    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
-      return X86::R13;
-    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
-      return X86::R14;
-    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
-      return X86::R15;
-    }
-  }
-}
-
-unsigned getX86SubSuperRegister(unsigned Reg, MVT::SimpleValueType VT,
-                                bool High) {
-  unsigned Res = getX86SubSuperRegisterOrZero(Reg, VT, High);
-  if (Res == 0)
-    llvm_unreachable("Unexpected register or VT");
-  return Res;
-}
-
-unsigned get512BitSuperRegister(unsigned Reg) {
+unsigned llvm::get512BitSuperRegister(unsigned Reg) {
   if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
     return X86::ZMM0 + (Reg - X86::XMM0);
   if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
@@ -787,5 +637,3 @@ unsigned get512BitSuperRegister(unsigned Reg) {
     return Reg;
   llvm_unreachable("Unexpected SIMD register");
 }
-
-}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 8de1d0bf8ec8..f014c8f6ff61 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -87,6 +87,11 @@ public:
   const TargetRegisterClass *
   getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
 
+  /// getGPRsForTailCall - Returns a register class with registers that can be
+  /// used in forming tail calls.
+  const TargetRegisterClass *
+  getGPRsForTailCall(const MachineFunction &MF) const;
+
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
 
@@ -96,7 +101,11 @@ public:
   getCalleeSavedRegs(const MachineFunction* MF) const override;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
-  const uint32_t *getNoPreservedMask() const;
+  const uint32_t *getNoPreservedMask() const override;
+
+  // Calls involved in thread-local variable lookup save more registers than
+  // normal calls, so they need a different mask to represent this.
+  const uint32_t *getDarwinTLSCallPreservedMask() const;
 
   /// getReservedRegs - Returns a bitset indexed by physical register number
   /// indicating if a register is a special register that has particular uses and
@@ -108,9 +117,7 @@ public:
 
   bool hasBasePointer(const MachineFunction &MF) const;
 
-  bool canRealignStack(const MachineFunction &MF) const;
-
-  bool needsStackRealignment(const MachineFunction &MF) const override;
+  bool canRealignStack(const MachineFunction &MF) const override;
 
   bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
                             int &FrameIdx) const override;
@@ -128,16 +135,6 @@ public:
   unsigned getSlotSize() const { return SlotSize; }
 };
 
-/// Returns the sub or super register of a specific X86 register.
-/// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) returns X86::AX.
-/// Aborts on error.
-unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);
-
-/// Returns the sub or super register of a specific X86 register.
-/// Like getX86SubSuperRegister() but returns 0 on error.
-unsigned getX86SubSuperRegisterOrZero(unsigned, MVT::SimpleValueType,
-                                      bool High = false);
-
 //get512BitRegister - X86 utility - returns 512-bit super register
 unsigned get512BitSuperRegister(unsigned Reg);
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index cdb151c26a05..56f0d9352d30 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -225,15 +225,15 @@ let SubRegIndices = [sub_ymm] in {
   }
 }
 
-  // Mask Registers, used by AVX-512 instructions.
-  def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
-  def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
-  def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
-  def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
-  def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
-  def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
-  def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
-  def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
+// Mask Registers, used by AVX-512 instructions.
+def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;
+def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;
+def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;
+def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;
+def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;
+def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;
+def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;
+def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;
 
 // Floating point stack registers. These don't map one-to-one to the FP
 // pseudo registers, but we still mark them as aliasing FP registers. That
@@ -375,7 +375,7 @@ def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)>;
 def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
                                                      R8, R9, R11, RIP)>;
 def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
-                                                      R8, R9, R11)>;
+                                                      R8, R9, R10, R11, RIP)>;
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
@@ -423,6 +423,8 @@ def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
 
 def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
 
+def FR128 : RegisterClass<"X86", [i128, f128], 128, (add FR32)>;
+
 
 // FIXME: This sets up the floating point register files as though they are f64
 // values, though they really are f80 values.  This will cause us to spill
@@ -442,10 +444,11 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
 }
 
 // Generic vector registers: VR64 and VR128.
+// Ensure that float types are declared first - only float is legal on SSE1.
 def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+def VR128 : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
                           128, (add FR32)>;
-def VR256 : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+def VR256 : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
                           256, (sequence "YMM%u", 0, 15)>;
 
 // Status flags registers.
@@ -459,8 +462,8 @@ def FPCCR : RegisterClass<"X86", [i16], 16, (add FPSW)> {
 }
 
 // AVX-512 vector/mask registers.
-def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64], 512,
-    (sequence "ZMM%u", 0, 31)>;
+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i64],
+                          512, (sequence "ZMM%u", 0, 31)>;
 
 // Scalar AVX-512 floating point registers.
 def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
@@ -468,10 +471,10 @@ def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
 def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
 
 // Extended VR128 and VR256 for AVX-512 instructions
-def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                          128, (add FR32X)>;
-def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-                          256, (sequence "YMM%u", 0, 31)>;
+def VR128X : RegisterClass<"X86", [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
+                           128, (add FR32X)>;
+def VR256X : RegisterClass<"X86", [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+                           256, (sequence "YMM%u", 0, 31)>;
 
 // Mask registers
 def VK1     : RegisterClass<"X86", [i1],    8,  (sequence "K%u", 0, 7)> {let Size = 8;}
@@ -491,4 +494,4 @@ def VK32WM  : RegisterClass<"X86", [v32i1], 32, (add VK16WM)> {let Size = 32;}
 def VK64WM  : RegisterClass<"X86", [v64i1], 64, (add VK32WM)> {let Size = 64;}
 
 // Bound registers
-def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
\ No newline at end of file
+def BNDR : RegisterClass<"X86", [v2i64], 128, (sequence "BND%u", 0, 3)>;
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index ce79fcf9ad81..b1a01614b4a1 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
   return false;
 }
 
-SDValue
-X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
-                                             SDValue Chain,
-                                             SDValue Dst, SDValue Src,
-                                             SDValue Size, unsigned Align,
-                                             bool isVolatile,
-                                         MachinePointerInfo DstPtrInfo) const {
+SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
+    SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+    SDValue Size, unsigned Align, bool isVolatile,
+    MachinePointerInfo DstPtrInfo) const {
   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
   const X86Subtarget &Subtarget =
       DAG.getMachineFunction().getSubtarget<X86Subtarget>();
@@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     // Check to see if there is a specialized entry-point for memory zeroing.
     ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
 
-    if (const char *bzeroEntry =  V &&
+    if (const char *bzeroEntry = V &&
         V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) {
-      EVT IntPtr =
-          DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
       Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
       TargetLowering::ArgListTy Args;
       TargetLowering::ArgListEntry Entry;
@@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
                    0)
         .setDiscardResult();
 
-      std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI);
+      std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
       return CallResult.second;
     }
 
@@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
       BytesLeft = SizeVal % UBytes;
     }
 
-    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
-                              InFlag);
+    Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
+                             InFlag);
     InFlag = Chain.getValue(1);
   } else {
     AVT = MVT::i8;
@@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
     SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
                                DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl,
                                                CVT));
-    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
-                                                             X86::ECX,
-                              Left, InFlag);
+    Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
+                             Left, InFlag);
     InFlag = Chain.getValue(1);
     Tys = DAG.getVTList(MVT::Other, MVT::Glue);
     SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
@@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
   unsigned BytesLeft = SizeVal % UBytes;
 
   SDValue InFlag;
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX :
-                                                              X86::ECX,
-                            Count, InFlag);
+  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX,
+                           Count, InFlag);
   InFlag = Chain.getValue(1);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI :
-                                                              X86::EDI,
-                            Dst, InFlag);
+  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI,
+                           Dst, InFlag);
   InFlag = Chain.getValue(1);
-  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI :
-                                                              X86::ESI,
-                            Src, InFlag);
+  Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI,
+                           Src, InFlag);
   InFlag = Chain.getValue(1);
 
   SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index dff3624b7efe..8ef08c960f0b 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -44,9 +44,8 @@ X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
                cl::desc("Enable early if-conversion on X86"));
 
 
-/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a blockaddress reference for the current subtarget according to how
+/// we should reference it in a non-pcrel context.
 unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
   if (isPICStyleGOT())    // 32-bit ELF targets.
     return X86II::MO_GOTOFF;
@@ -58,9 +57,8 @@ unsigned char X86Subtarget::ClassifyBlockAddressReference() const {
   return X86II::MO_NO_FLAG;
 }
 
-/// ClassifyGlobalReference - Classify a global variable reference for the
-/// current subtarget according to how we should reference it in a non-pcrel
-/// context.
+/// Classify a global variable reference for the current subtarget according to
+/// how we should reference it in a non-pcrel context.
 unsigned char X86Subtarget::
 ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
   // DLLImport only exists on windows, it is implemented as a load from a
@@ -147,9 +145,9 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
 }
 
 
-/// getBZeroEntry - This function returns the name of a function which has an
-/// interface like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over memset with zero
+/// This function returns the name of a function which has an interface like
+/// the non-standard bzero function, if such a function exists on the
+/// current subtarget and it is considered preferable over memset with zero
 /// passed as the second argument. Otherwise it returns null.
 const char *X86Subtarget::getBZeroEntry() const {
   // Darwin 10 has a __bzero entry point for this purpose.
@@ -166,8 +164,7 @@ bool X86Subtarget::hasSinCos() const {
     is64Bit();
 }
 
-/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
-/// to immediate address.
+/// Return true if the subtarget allows calls to immediate address.
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
   // FIXME: I386 PE/COFF supports PC relative calls using IMAGE_REL_I386_REL32
   // but WinCOFFObjectWriter::RecordRelocation cannot emit them.  Once it does,
@@ -192,9 +189,25 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       FullFS = "+64bit,+sse2";
   }
 
+  // LAHF/SAHF are always supported in non-64-bit mode.
+  if (!In64BitMode) {
+    if (!FullFS.empty())
+      FullFS = "+sahf," + FullFS;
+    else
+      FullFS = "+sahf";
+  }
+
+
   // Parse features string and set the CPU.
   ParseSubtargetFeatures(CPUName, FullFS);
 
+  // All CPUs that implement SSE4.2 or SSE4A support unaligned accesses of
+  // 16-bytes and under that are reasonably fast. These features were
+  // introduced with Intel's Nehalem/Silvermont and AMD's Family10h
+  // micro-architectures respectively.
+  if (hasSSE42() || hasSSE4A())
+    IsUAMem16Slow = false;
+  
   InstrItins = getInstrItineraryForCPU(CPUName);
 
   // It's important to keep the MCSubtargetInfo feature bits in sync with
@@ -224,13 +237,18 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 }
 
 void X86Subtarget::initializeEnvironment() {
-  X86SSELevel = NoMMXSSE;
+  X86SSELevel = NoSSE;
   X863DNowLevel = NoThreeDNow;
   HasCMov = false;
   HasX86_64 = false;
   HasPOPCNT = false;
   HasSSE4A = false;
   HasAES = false;
+  HasFXSR = false;
+  HasXSAVE = false;
+  HasXSAVEOPT = false;
+  HasXSAVEC = false;
+  HasXSAVES = false;
   HasPCLMUL = false;
   HasFMA = false;
   HasFMA4 = false;
@@ -252,13 +270,15 @@ void X86Subtarget::initializeEnvironment() {
   HasBWI = false;
   HasVLX = false;
   HasADX = false;
+  HasPKU = false;
   HasSHA = false;
   HasPRFCHW = false;
   HasRDSEED = false;
+  HasLAHFSAHF = false;
   HasMPX = false;
   IsBTMemSlow = false;
   IsSHLDSlow = false;
-  IsUAMemFast = false;
+  IsUAMem16Slow = false;
   IsUAMem32Slow = false;
   HasSSEUnalignedMem = false;
   HasCmpxchg16b = false;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f026d4295f71..13d1026dcaa0 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -47,11 +47,11 @@ class X86Subtarget final : public X86GenSubtargetInfo {
 
 protected:
   enum X86SSEEnum {
-    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
+    NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
   };
 
   enum X863DNowEnum {
-    NoThreeDNow, ThreeDNow, ThreeDNowA
+    NoThreeDNow, MMX, ThreeDNow, ThreeDNowA
   };
 
   enum X86ProcFamilyEnum {
@@ -64,10 +64,10 @@ protected:
   /// Which PIC style to use
   PICStyles::Style PICStyle;
 
-  /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
+  /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.
   X86SSEEnum X86SSELevel;
 
-  /// 3DNow, 3DNow Athlon, or none supported.
+  /// MMX, 3DNow, 3DNow Athlon, or none supported.
   X863DNowEnum X863DNowLevel;
 
   /// True if this processor has conditional move instructions
@@ -86,6 +86,18 @@ protected:
   /// Target has AES instructions
   bool HasAES;
 
+  /// Target has FXSAVE/FXRESTOR instructions
+  bool HasFXSR;
+
+  /// Target has XSAVE instructions
+  bool HasXSAVE;
+  /// Target has XSAVEOPT instructions
+  bool HasXSAVEOPT;
+  /// Target has XSAVEC instructions
+  bool HasXSAVEC;
+  /// Target has XSAVES instructions
+  bool HasXSAVES;
+
   /// Target has carry-less multiplication
   bool HasPCLMUL;
 
@@ -140,16 +152,19 @@ protected:
   /// Processor has RDSEED instructions.
   bool HasRDSEED;
 
+  /// Processor has LAHF/SAHF instructions.
+  bool HasLAHFSAHF;
+
   /// True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
   /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
-  /// True if unaligned memory access is fast.
-  bool IsUAMemFast;
+  /// True if unaligned memory accesses of 16-bytes are slow.
+  bool IsUAMem16Slow;
 
-  /// True if unaligned 32-byte memory accesses are slow.
+  /// True if unaligned memory accesses of 32-bytes are slow.
   bool IsUAMem32Slow;
 
   /// True if SSE operations can have unaligned memory operands.
@@ -208,6 +223,9 @@ protected:
   /// Processor has AVX-512 Vector Length eXtenstions
   bool HasVLX;
 
+  /// Processor has PKU extenstions
+  bool HasPKU;
+
   /// Processot supports MPX - Memory Protection Extensions
   bool HasMPX;
 
@@ -319,7 +337,6 @@ public:
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
 
   bool hasCMov() const { return HasCMov; }
-  bool hasMMX() const { return X86SSELevel >= MMX; }
   bool hasSSE1() const { return X86SSELevel >= SSE1; }
   bool hasSSE2() const { return X86SSELevel >= SSE2; }
   bool hasSSE3() const { return X86SSELevel >= SSE3; }
@@ -332,14 +349,22 @@ public:
   bool hasFp256() const { return hasAVX(); }
   bool hasInt256() const { return hasAVX2(); }
   bool hasSSE4A() const { return HasSSE4A; }
+  bool hasMMX() const { return X863DNowLevel >= MMX; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
   bool hasPOPCNT() const { return HasPOPCNT; }
   bool hasAES() const { return HasAES; }
+  bool hasFXSR() const { return HasFXSR; }
+  bool hasXSAVE() const { return HasXSAVE; }
+  bool hasXSAVEOPT() const { return HasXSAVEOPT; }
+  bool hasXSAVEC() const { return HasXSAVEC; }
+  bool hasXSAVES() const { return HasXSAVES; }
   bool hasPCLMUL() const { return HasPCLMUL; }
-  bool hasFMA() const { return HasFMA; }
-  // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
-  bool hasFMA4() const { return HasFMA4 && !HasFMA; }
+  // Prefer FMA4 to FMA - its better for commutation/memory folding and
+  // has equal or better performance on all supported targets.
+  bool hasFMA() const { return HasFMA && !HasFMA4; }
+  bool hasFMA4() const { return HasFMA4; }
+  bool hasAnyFMA() const { return hasFMA() || hasFMA4() || hasAVX512(); }
   bool hasXOP() const { return HasXOP; }
   bool hasTBM() const { return HasTBM; }
   bool hasMOVBE() const { return HasMOVBE; }
@@ -355,9 +380,10 @@ public:
   bool hasSHA() const { return HasSHA; }
   bool hasPRFCHW() const { return HasPRFCHW; }
   bool hasRDSEED() const { return HasRDSEED; }
+  bool hasLAHFSAHF() const { return HasLAHFSAHF; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
-  bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+  bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
@@ -375,6 +401,7 @@ public:
   bool hasDQI() const { return HasDQI; }
   bool hasBWI() const { return HasBWI; }
   bool hasVLX() const { return HasVLX; }
+  bool hasPKU() const { return HasPKU; }
   bool hasMPX() const { return HasMPX; }
 
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
@@ -394,9 +421,11 @@ public:
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
 
   bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+  bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
   bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
   bool isTargetNaCl32() const { return isTargetNaCl() && !is64Bit(); }
   bool isTargetNaCl64() const { return isTargetNaCl() && is64Bit(); }
+  bool isTargetMCU() const { return TargetTriple.isOSIAMCU(); }
 
   bool isTargetWindowsMSVC() const {
     return TargetTriple.isWindowsMSVCEnvironment();
@@ -406,6 +435,10 @@ public:
     return TargetTriple.isKnownWindowsMSVCEnvironment();
   }
 
+  bool isTargetWindowsCoreCLR() const {
+    return TargetTriple.isWindowsCoreCLREnvironment();
+  }
+
   bool isTargetWindowsCygwin() const {
     return TargetTriple.isWindowsCygwinEnvironment();
   }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index fb9cb4ba4c86..0e7e4c0c84a9 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -28,10 +28,17 @@ static cl::opt<bool> EnableMachineCombinerPass("x86-machine-combiner",
                                cl::desc("Enable the machine combiner pass"),
                                cl::init(true), cl::Hidden);
 
+namespace llvm {
+void initializeWinEHStatePassPass(PassRegistry &);
+}
+
 extern "C" void LLVMInitializeX86Target() {
   // Register the target.
   RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
   RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
+
+  PassRegistry &PR = *PassRegistry::getPassRegistry();
+  initializeWinEHStatePassPass(PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -45,7 +52,7 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
     return make_unique<X86LinuxNaClTargetObjectFile>();
   if (TT.isOSBinFormatELF())
     return make_unique<X86ELFTargetObjectFile>();
-  if (TT.isKnownWindowsMSVCEnvironment())
+  if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
     return make_unique<X86WindowsTargetObjectFile>();
   if (TT.isOSBinFormatCOFF())
     return make_unique<TargetLoweringObjectFileCOFF>();
@@ -175,8 +182,9 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
 //===----------------------------------------------------------------------===//
 
 TargetIRAnalysis X86TargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis(
-      [this](Function &F) { return TargetTransformInfo(X86TTIImpl(this, F)); });
+  return TargetIRAnalysis([this](const Function &F) {
+    return TargetTransformInfo(X86TTIImpl(this, F));
+  });
 }
 
 
@@ -246,6 +254,9 @@ bool X86PassConfig::addPreISel() {
 }
 
 void X86PassConfig::addPreRegAlloc() {
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createX86OptimizeLEAs());
+
   addPass(createX86CallFrameOptimization());
 }
 
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 6f900ea351ef..782768d0ab16 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/COFF.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Target/TargetLowering.h"
 
@@ -152,9 +153,8 @@ static std::string scalarConstantToHexString(const Constant *C) {
   }
 }
 
-MCSection *
-X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
-                                                  const Constant *C) const {
+MCSection *X86WindowsTargetObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
   if (Kind.isMergeableConst() && C) {
     const unsigned Characteristics = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
                                      COFF::IMAGE_SCN_MEM_READ |
@@ -171,5 +171,5 @@ X86WindowsTargetObjectFile::getSectionForConstant(SectionKind Kind,
                                          COFF::IMAGE_COMDAT_SELECT_ANY);
   }
 
-  return TargetLoweringObjectFile::getSectionForConstant(Kind, C);
+  return TargetLoweringObjectFile::getSectionForConstant(DL, Kind, C);
 }
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 66366b2373cd..6b2448cc9de6 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -58,7 +58,7 @@ namespace llvm {
 
     /// \brief Given a mergeable constant with the specified size and relocation
     /// information, return a section that it should be placed in.
-    MCSection *getSectionForConstant(SectionKind Kind,
+    MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                      const Constant *C) const override;
   };
 
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index 7df726091843..2e7bbb208743 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Target/CostTable.h"
 #include "llvm/Target/TargetLowering.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "x86tti"
@@ -62,8 +63,8 @@ unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
 
   if (ST->is64Bit())
     return 64;
-  return 32;
 
+  return 32;
 }
 
 unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
@@ -84,12 +85,12 @@ unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
   return 2;
 }
 
-unsigned X86TTIImpl::getArithmeticInstrCost(
+int X86TTIImpl::getArithmeticInstrCost(
     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     TTI::OperandValueProperties Opd2PropInfo) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
@@ -101,10 +102,9 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     // normally expanded to the sequence SRA + SRL + ADD + SRA.
     // The OperandValue properties many not be same as that of previous
     // operation;conservatively assume OP_None.
-    unsigned Cost =
-        2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
-                                   TargetTransformInfo::OP_None,
-                                   TargetTransformInfo::OP_None);
+    int Cost = 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info,
+                                          Op2Info, TargetTransformInfo::OP_None,
+                                          TargetTransformInfo::OP_None);
     Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
                                    TargetTransformInfo::OP_None,
                                    TargetTransformInfo::OP_None);
@@ -115,8 +115,7 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     return Cost;
   }
 
-  static const CostTblEntry<MVT::SimpleValueType>
-  AVX2UniformConstCostTable[] = {
+  static const CostTblEntry AVX2UniformConstCostTable[] = {
     { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v16i16,  6 }, // vpmulhw sequence
@@ -127,12 +126,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
 
   if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
       ST->hasAVX2()) {
-    int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * AVX2UniformConstCostTable[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
   }
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
+  static const CostTblEntry AVX512CostTable[] = {
     { ISD::SHL,     MVT::v16i32,    1 },
     { ISD::SRL,     MVT::v16i32,    1 },
     { ISD::SRA,     MVT::v16i32,    1 },
@@ -141,7 +140,12 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,     MVT::v8i64,    1 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
+  if (ST->hasAVX512()) {
+    if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX2CostTable[] = {
     // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
     // customize them to detect the cases where shift amount is a scalar one.
     { ISD::SHL,     MVT::v4i32,    1 },
@@ -154,7 +158,57 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRL,     MVT::v2i64,    1 },
     { ISD::SHL,     MVT::v4i64,    1 },
     { ISD::SRL,     MVT::v4i64,    1 },
+  };
 
+  // Look for AVX2 lowering tricks.
+  if (ST->hasAVX2()) {
+    if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
+        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
+         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
+      // On AVX2, a packed v16i16 shift left by a constant build_vector
+      // is lowered into a vector multiply (vpmullw).
+      return LT.first;
+
+    if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry XOPCostTable[] = {
+    // 128bit shifts take 1cy, but right shifts require negation beforehand.
+    { ISD::SHL,     MVT::v16i8,    1 },
+    { ISD::SRL,     MVT::v16i8,    2 },
+    { ISD::SRA,     MVT::v16i8,    2 },
+    { ISD::SHL,     MVT::v8i16,    1 },
+    { ISD::SRL,     MVT::v8i16,    2 },
+    { ISD::SRA,     MVT::v8i16,    2 },
+    { ISD::SHL,     MVT::v4i32,    1 },
+    { ISD::SRL,     MVT::v4i32,    2 },
+    { ISD::SRA,     MVT::v4i32,    2 },
+    { ISD::SHL,     MVT::v2i64,    1 },
+    { ISD::SRL,     MVT::v2i64,    2 },
+    { ISD::SRA,     MVT::v2i64,    2 },
+    // 256bit shifts require splitting if AVX2 didn't catch them above.
+    { ISD::SHL,     MVT::v32i8,    2 },
+    { ISD::SRL,     MVT::v32i8,    4 },
+    { ISD::SRA,     MVT::v32i8,    4 },
+    { ISD::SHL,     MVT::v16i16,   2 },
+    { ISD::SRL,     MVT::v16i16,   4 },
+    { ISD::SRA,     MVT::v16i16,   4 },
+    { ISD::SHL,     MVT::v8i32,    2 },
+    { ISD::SRL,     MVT::v8i32,    4 },
+    { ISD::SRA,     MVT::v8i32,    4 },
+    { ISD::SHL,     MVT::v4i64,    2 },
+    { ISD::SRL,     MVT::v4i64,    4 },
+    { ISD::SRA,     MVT::v4i64,    4 },
+  };
+
+  // Look for XOP lowering tricks.
+  if (ST->hasXOP()) {
+    if (const auto *Entry = CostTableLookup(XOPCostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX2CustomCostTable[] = {
     { ISD::SHL,  MVT::v32i8,      11 }, // vpblendvb sequence.
     { ISD::SHL,  MVT::v16i16,     10 }, // extend/vpsrlvd/pack sequence.
 
@@ -163,7 +217,8 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
 
     { ISD::SRA,  MVT::v32i8,      24 }, // vpblendvb sequence.
     { ISD::SRA,  MVT::v16i16,     10 }, // extend/vpsravd/pack sequence.
-    { ISD::SRA,  MVT::v4i64,    4*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v2i64,       4 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,       4 }, // srl/xor/sub sequence.
 
     // Vectorizing division is a bad idea. See the SSE2 table for more comments.
     { ISD::SDIV,  MVT::v32i8,  32*20 },
@@ -176,44 +231,44 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     { ISD::UDIV,  MVT::v4i64,  4*20 },
   };
 
-  if (ST->hasAVX512()) {
-    int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * AVX512CostTable[Idx].Cost;
-  }
-  // Look for AVX2 lowering tricks.
+  // Look for AVX2 lowering tricks for custom cases.
   if (ST->hasAVX2()) {
-    if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
-        (Op2Info == TargetTransformInfo::OK_UniformConstantValue ||
-         Op2Info == TargetTransformInfo::OK_NonUniformConstantValue))
-      // On AVX2, a packed v16i16 shift left by a constant build_vector
-      // is lowered into a vector multiply (vpmullw).
-      return LT.first;
-
-    int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * AVX2CostTable[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
   }
 
-  static const CostTblEntry<MVT::SimpleValueType>
+  static const CostTblEntry
   SSE2UniformConstCostTable[] = {
     // We don't correctly identify costs of casts because they are marked as
     // custom.
     // Constant splats are cheaper for the following instructions.
     { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
+    { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
     { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
+    { ISD::SHL,  MVT::v16i16, 2 }, // psllw.
     { ISD::SHL,  MVT::v4i32,  1 }, // pslld
+    { ISD::SHL,  MVT::v8i32,  2 }, // pslld
     { ISD::SHL,  MVT::v2i64,  1 }, // psllq.
+    { ISD::SHL,  MVT::v4i64,  2 }, // psllq.
 
     { ISD::SRL,  MVT::v16i8,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v32i8,  2 }, // psrlw.
     { ISD::SRL,  MVT::v8i16,  1 }, // psrlw.
+    { ISD::SRL,  MVT::v16i16, 2 }, // psrlw.
     { ISD::SRL,  MVT::v4i32,  1 }, // psrld.
+    { ISD::SRL,  MVT::v8i32,  2 }, // psrld.
     { ISD::SRL,  MVT::v2i64,  1 }, // psrlq.
+    { ISD::SRL,  MVT::v4i64,  2 }, // psrlq.
 
     { ISD::SRA,  MVT::v16i8,  4 }, // psrlw, pand, pxor, psubb.
+    { ISD::SRA,  MVT::v32i8,  8 }, // psrlw, pand, pxor, psubb.
     { ISD::SRA,  MVT::v8i16,  1 }, // psraw.
+    { ISD::SRA,  MVT::v16i16, 2 }, // psraw.
     { ISD::SRA,  MVT::v4i32,  1 }, // psrad.
+    { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
     { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
+    { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.
 
     { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
     { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
@@ -227,27 +282,34 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
       return LT.first * 15;
 
-    int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * SSE2UniformConstCostTable[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
   }
 
   if (ISD == ISD::SHL &&
       Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) {
-    EVT VT = LT.second;
+    MVT VT = LT.second;
+    // Vector shift left by non uniform constant can be lowered
+    // into vector multiply (pmullw/pmulld).
     if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
         (VT == MVT::v4i32 && ST->hasSSE41()))
-      // Vector shift left by non uniform constant can be lowered
-      // into vector multiply (pmullw/pmulld).
       return LT.first;
+
+    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
+    // sequence of extract + two vector multiply + insert.
+    if ((VT == MVT::v8i32 || VT == MVT::v16i16) &&
+       (ST->hasAVX() && !ST->hasAVX2()))
+      ISD = ISD::MUL;
+
+    // A vector shift left by non uniform constant is converted
+    // into a vector multiply; the new multiply is eventually
+    // lowered into a sequence of shuffles and 2 x pmuludq.
     if (VT == MVT::v4i32 && ST->hasSSE2())
-      // A vector shift left by non uniform constant is converted
-      // into a vector multiply; the new multiply is eventually
-      // lowered into a sequence of shuffles and 2 x pmuludq.
       ISD = ISD::MUL;
   }
 
-  static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
+  static const CostTblEntry SSE2CostTable[] = {
     // We don't correctly identify costs of casts because they are marked as
     // custom.
     // For some cases, where the shift amount is a scalar we would be able
@@ -257,20 +319,31 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
     // used for vectorization and we don't want to make vectorized code worse
     // than scalar code.
     { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
-    { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
-    { ISD::SHL,  MVT::v4i64,  4*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v8i32, 2*2*5 }, // We optimized this using mul.
+    { ISD::SHL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SHL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
-    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SRL,  MVT::v4i64,   2*4 }, // splat+shuffle sequence.
 
     { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*54 }, // unpacked cmpgtb sequence.
     { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*32 }, // cmpgtb sequence.
     { ISD::SRA,  MVT::v4i32,    16 }, // Shift each lane + blend.
-    { ISD::SRA,  MVT::v2i64,  2*10 }, // Scalarized.
+    { ISD::SRA,  MVT::v8i32,  2*16 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v2i64,    12 }, // srl/xor/sub sequence.
+    { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.
 
     // It is not a good idea to vectorize division. We have to scalarize it and
     // in the process we will often end up having to spilling regular
@@ -289,12 +362,11 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
   };
 
   if (ST->hasSSE2()) {
-    int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
-    if (Idx != -1)
-      return LT.first * SSE2CostTable[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
   }
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
+  static const CostTblEntry AVX1CostTable[] = {
     // We don't have to scalarize unsupported ops. We can issue two half-sized
     // operations and we only need to extract the upper YMM half.
     // Two ops + 1 extract + 1 insert = 4.
@@ -314,29 +386,21 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
 
   // Look for AVX1 lowering tricks.
   if (ST->hasAVX() && !ST->hasAVX2()) {
-    EVT VT = LT.second;
+    MVT VT = LT.second;
 
-    // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
-    // sequence of extract + two vector multiply + insert.
-    if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
-        Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)
-      ISD = ISD::MUL;
-
-    int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
-    if (Idx != -1)
-      return LT.first * AVX1CostTable[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, VT))
+      return LT.first * Entry->Cost;
   }
 
   // Custom lowering of vectors.
-  static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
+  static const CostTblEntry CustomLowered[] = {
     // A v2i64/v4i64 and multiply is custom lowered as a series of long
     // multiplies(3), shifts(4) and adds(2).
     { ISD::MUL,     MVT::v2i64,    9 },
     { ISD::MUL,     MVT::v4i64,    9 },
   };
-  int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
-  if (Idx != -1)
-    return LT.first * CustomLowered[Idx].Cost;
+  if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
+    return LT.first * Entry->Cost;
 
   // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
   // 2x pmuludq, 2x shuffle.
@@ -348,15 +412,15 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
   return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
 }
 
-unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                                    Type *SubTp) {
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+                               Type *SubTp) {
   // We only estimate the cost of reverse and alternate shuffles.
   if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
   if (Kind == TTI::SK_Reverse) {
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-    unsigned Cost = 1;
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    int Cost = 1;
     if (LT.second.getSizeInBits() > 128)
       Cost = 3; // Extract + insert + copy.
 
@@ -367,14 +431,14 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
   if (Kind == TTI::SK_Alternate) {
     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
     // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
 
     // The backend knows how to generate a single VEX.256 version of
     // instruction VPBLENDW if the target supports AVX2.
     if (ST->hasAVX2() && LT.second == MVT::v16i16)
       return LT.first;
 
-    static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
+    static const CostTblEntry AVXAltShuffleTbl[] = {
       {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vblendpd
       {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vblendpd
 
@@ -390,13 +454,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9}
     };
 
-    if (ST->hasAVX()) {
-      int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-      if (Idx != -1)
-        return LT.first * AVXAltShuffleTbl[Idx].Cost;
-    }
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVXAltShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
 
-    static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
+    static const CostTblEntry SSE41AltShuffleTbl[] = {
       // These are lowered into movsd.
       {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
       {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
@@ -414,13 +477,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}
     };
 
-    if (ST->hasSSE41()) {
-      int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-      if (Idx != -1)
-        return LT.first * SSE41AltShuffleTbl[Idx].Cost;
-    }
+    if (ST->hasSSE41())
+      if (const auto *Entry = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE,
+                                              LT.second))
+        return LT.first * Entry->Cost;
 
-    static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
+    static const CostTblEntry SSSE3AltShuffleTbl[] = {
       {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
       {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
 
@@ -433,13 +495,12 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}  // pshufb + pshufb + or
     };
 
-    if (ST->hasSSSE3()) {
-      int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-      if (Idx != -1)
-        return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
-    }
+    if (ST->hasSSSE3())
+      if (const auto *Entry = CostTableLookup(SSSE3AltShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
 
-    static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
+    static const CostTblEntry SSEAltShuffleTbl[] = {
       {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // movsd
       {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // movsd
 
@@ -454,65 +515,47 @@ unsigned X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     };
 
     // Fall-back (SSE3 and SSE2).
-    int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
-    if (Idx != -1)
-      return LT.first * SSEAltShuffleTbl[Idx].Cost;
+    if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
+                                            ISD::VECTOR_SHUFFLE, LT.second))
+      return LT.first * Entry->Cost;
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
   }
 
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 }
 
-unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
+int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
-  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
+  // FIXME: Need a better design of the cost table to handle non-simple types of
+  // potential massive combinations (elem_num x src_type x dst_type).
 
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  SSE2ConvTbl[] = {
-    // These are somewhat magic numbers justified by looking at the output of
-    // Intel's IACA, running some kernels and making sure when we take
-    // legalization into account the throughput will be overestimated.
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
-    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
-    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
-    // There are faster sequences for float conversions.
-    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
-    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
-    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
-    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
-    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
-    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
-    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
-    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+  static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+    { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  1 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  1 },
+    { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  1 },
+    { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  1 },
+
+    { ISD::FP_TO_UINT,  MVT::v2i64, MVT::v2f64, 1 },
+    { ISD::FP_TO_UINT,  MVT::v4i64, MVT::v4f64, 1 },
+    { ISD::FP_TO_UINT,  MVT::v8i64, MVT::v8f64, 1 },
+    { ISD::FP_TO_UINT,  MVT::v2i64, MVT::v2f32, 1 },
+    { ISD::FP_TO_UINT,  MVT::v4i64, MVT::v4f32, 1 },
+    { ISD::FP_TO_UINT,  MVT::v8i64, MVT::v8f32, 1 },
   };
 
-  if (ST->hasSSE2() && !ST->hasAVX()) {
-    int Idx =
-        ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
-    if (Idx != -1)
-      return LTSrc.first * SSE2ConvTbl[Idx].Cost;
-  }
-
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  AVX512ConversionTbl[] = {
+  static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v8f32,  1 },
     { ISD::FP_EXTEND, MVT::v8f64,   MVT::v16f32, 3 },
     { ISD::FP_ROUND,  MVT::v8f32,   MVT::v8f64,  1 },
-    { ISD::FP_ROUND,  MVT::v16f32,  MVT::v8f64,  3 },
 
     { ISD::TRUNCATE,  MVT::v16i8,   MVT::v16i32, 1 },
     { ISD::TRUNCATE,  MVT::v16i16,  MVT::v16i32, 1 },
     { ISD::TRUNCATE,  MVT::v8i16,   MVT::v8i64,  1 },
     { ISD::TRUNCATE,  MVT::v8i32,   MVT::v8i64,  1 },
-    { ISD::TRUNCATE,  MVT::v16i32,  MVT::v8i64,  4 },
 
     // v16i1 -> v16i32 - load + broadcast
     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1,  2 },
@@ -522,33 +565,49 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  1 },
     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
-    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },
-    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v16i32, 3 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i32,  1 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i32,  1 },
+    { ISD::ZERO_EXTEND, MVT::v8i64,  MVT::v8i16,  1 },
+    { ISD::SIGN_EXTEND, MVT::v8i64,  MVT::v8i16,  1 },
 
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i1,  3 },
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i8,  2 },
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   4 },
+    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i8,   2 },
     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  2 },
     { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
+
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i1,  3 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i8,  2 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 2 },
+    { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 1 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  1 },
+    { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i32,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i1,   4 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i16,  2 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i32,  1 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i8,   2 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i8,   2 },
+    { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i16,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i8,   2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i16,  2 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i32,  1 },
+    { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i8,   2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i16,  5 },
+    { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i32,  2 },
+    { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  5 },
+    { ISD::UINT_TO_FP,  MVT::v4f64,  MVT::v4i64, 12 },
+    { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64, 26 },
+
+    { ISD::FP_TO_UINT,  MVT::v2i32,  MVT::v2f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v4i32,  MVT::v4f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v8i32,  MVT::v8f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v16i32, MVT::v16f32, 1 },
   };
 
-  if (ST->hasAVX512()) {
-    int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
-                                     LTSrc.second);
-    if (Idx != -1)
-      return AVX512ConversionTbl[Idx].Cost;
-  }
-  EVT SrcTy = TLI->getValueType(DL, Src);
-  EVT DstTy = TLI->getValueType(DL, Dst);
-
-  // The function getSimpleVT only handles simple value types.
-  if (!SrcTy.isSimple() || !DstTy.isSimple())
-    return BaseT::getCastInstrCost(Opcode, Dst, Src);
-
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  AVX2ConversionTbl[] = {
+  static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  1 },
     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  1 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,   3 },
@@ -579,8 +638,7 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i32,  8 },
   };
 
-  static const TypeConversionCostTblEntry<MVT::SimpleValueType>
-  AVXConversionTbl[] = {
+  static const TypeConversionCostTblEntry AVXConversionTbl[] = {
     { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
     { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
     { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i1,  7 },
@@ -650,34 +708,158 @@ unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f64, 4*4 },
   };
 
+  static const TypeConversionCostTblEntry SSE41ConversionTbl[] = {
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  2 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  2 },
+    { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i16,  1 },
+    { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i16,  1 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  4 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8,  4 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,   2 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,   2 },
+    { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i8,   1 },
+    { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  2 },
+    { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  2 },
+    { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i8,   1 },
+    { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i8,   2 },
+
+    { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, 6 },
+    { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  3 },
+    { ISD::TRUNCATE,    MVT::v4i16,  MVT::v4i32,  1 },
+    { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i32, 30 },
+    { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i32,  3 },
+    { ISD::TRUNCATE,    MVT::v4i8,   MVT::v4i32,  1 },
+    { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, 3 },
+    { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i16,  1 },
+    { ISD::TRUNCATE,    MVT::v4i8,   MVT::v4i16,  2 },
+  };
+
+  static const TypeConversionCostTblEntry SSE2ConversionTbl[] = {
+    // These are somewhat magic numbers justified by looking at the output of
+    // Intel's IACA, running some kernels and making sure when we take
+    // legalization into account the throughput will be overestimated.
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 8*10 },
+    { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 16*10 },
+    // There are faster sequences for float conversions.
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 8 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 },
+    { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 },
+
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i16,  3 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i16,  4 },
+    { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i16,  1 },
+    { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i16,  2 },
+    { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8,  9 },
+    { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8,  12 },
+    { ISD::ZERO_EXTEND, MVT::v8i32,  MVT::v8i8,   6 },
+    { ISD::SIGN_EXTEND, MVT::v8i32,  MVT::v8i8,   6 },
+    { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i8,   2 },
+    { ISD::SIGN_EXTEND, MVT::v4i32,  MVT::v4i8,   3 },
+    { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8,  3 },
+    { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8,  4 },
+    { ISD::ZERO_EXTEND, MVT::v8i16,  MVT::v8i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v8i16,  MVT::v8i8,   2 },
+    { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i8,   1 },
+    { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i8,   6 },
+
+    { ISD::TRUNCATE,    MVT::v16i16, MVT::v16i32, 10 },
+    { ISD::TRUNCATE,    MVT::v8i16,  MVT::v8i32,  5 },
+    { ISD::TRUNCATE,    MVT::v4i16,  MVT::v4i32,  3 },
+    { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i32, 7 },
+    { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i32,  4 },
+    { ISD::TRUNCATE,    MVT::v4i8,   MVT::v4i32,  3 },
+    { ISD::TRUNCATE,    MVT::v16i8,  MVT::v16i16, 3 },
+    { ISD::TRUNCATE,    MVT::v8i8,   MVT::v8i16,  2 },
+    { ISD::TRUNCATE,    MVT::v4i8,   MVT::v4i16,  4 },
+  };
+
+  std::pair<int, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<int, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
+
+  if (ST->hasSSE2() && !ST->hasAVX()) {
+    if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+                                                   LTDest.second, LTSrc.second))
+      return LTSrc.first * Entry->Cost;
+  }
+
+  EVT SrcTy = TLI->getValueType(DL, Src);
+  EVT DstTy = TLI->getValueType(DL, Dst);
+
+  // The function getSimpleVT only handles simple value types.
+  if (!SrcTy.isSimple() || !DstTy.isSimple())
+    return BaseT::getCastInstrCost(Opcode, Dst, Src);
+
+  if (ST->hasDQI())
+    if (const auto *Entry = ConvertCostTableLookup(AVX512DQConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
+
+  if (ST->hasAVX512())
+    if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
+
   if (ST->hasAVX2()) {
-    int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
-                                     DstTy.getSimpleVT(), SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return AVX2ConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   if (ST->hasAVX()) {
-    int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
-                                     SrcTy.getSimpleVT());
-    if (Idx != -1)
-      return AVXConversionTbl[Idx].Cost;
+    if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
+  }
+
+  if (ST->hasSSE41()) {
+    if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
+  }
+
+  if (ST->hasSSE2()) {
+    if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD,
+                                                   DstTy.getSimpleVT(),
+                                                   SrcTy.getSimpleVT()))
+      return Entry->Cost;
   }
 
   return BaseT::getCastInstrCost(Opcode, Dst, Src);
 }
 
-unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
-                                        Type *CondTy) {
+int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
 
   MVT MTy = LT.second;
 
   int ISD = TLI->InstructionOpcodeToISD(Opcode);
   assert(ISD && "Invalid opcode");
 
-  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
+  static const CostTblEntry SSE42CostTbl[] = {
     { ISD::SETCC,   MVT::v2f64,   1 },
     { ISD::SETCC,   MVT::v4f32,   1 },
     { ISD::SETCC,   MVT::v2i64,   1 },
@@ -686,7 +868,7 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v16i8,   1 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
+  static const CostTblEntry AVX1CostTbl[] = {
     { ISD::SETCC,   MVT::v4f64,   1 },
     { ISD::SETCC,   MVT::v8f32,   1 },
     // AVX1 does not support 8-wide integer compare.
@@ -696,54 +878,45 @@ unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
     { ISD::SETCC,   MVT::v32i8,   4 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
+  static const CostTblEntry AVX2CostTbl[] = {
     { ISD::SETCC,   MVT::v4i64,   1 },
     { ISD::SETCC,   MVT::v8i32,   1 },
     { ISD::SETCC,   MVT::v16i16,  1 },
     { ISD::SETCC,   MVT::v32i8,   1 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
+  static const CostTblEntry AVX512CostTbl[] = {
     { ISD::SETCC,   MVT::v8i64,   1 },
     { ISD::SETCC,   MVT::v16i32,  1 },
     { ISD::SETCC,   MVT::v8f64,   1 },
     { ISD::SETCC,   MVT::v16f32,  1 },
   };
 
-  if (ST->hasAVX512()) {
-    int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
-    if (Idx != -1)
-      return LT.first * AVX512CostTbl[Idx].Cost;
-  }
+  if (ST->hasAVX512())
+    if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
 
-  if (ST->hasAVX2()) {
-    int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
-    if (Idx != -1)
-      return LT.first * AVX2CostTbl[Idx].Cost;
-  }
+  if (ST->hasAVX2())
+    if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
 
-  if (ST->hasAVX()) {
-    int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
-    if (Idx != -1)
-      return LT.first * AVX1CostTbl[Idx].Cost;
-  }
+  if (ST->hasAVX())
+    if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
 
-  if (ST->hasSSE42()) {
-    int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
-    if (Idx != -1)
-      return LT.first * SSE42CostTbl[Idx].Cost;
-  }
+  if (ST->hasSSE42())
+    if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
 
   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
 }
 
-unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
-                                        unsigned Index) {
+int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
   assert(Val->isVectorTy() && "This must be a vector type");
 
   if (Index != -1U) {
     // Legalize the type.
-    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
 
     // This type is legalized to a scalar type.
     if (!LT.second.isVector())
@@ -761,10 +934,9 @@ unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   return BaseT::getVectorInstrCost(Opcode, Val, Index);
 }
 
-unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
-                                              bool Extract) {
+int X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
   assert (Ty->isVectorTy() && "Can only scalarize vectors");
-  unsigned Cost = 0;
+  int Cost = 0;
 
   for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
     if (Insert)
@@ -776,9 +948,8 @@ unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
   return Cost;
 }
 
-unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
-                                     unsigned Alignment,
-                                     unsigned AddressSpace) {
+int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                                unsigned AddressSpace) {
   // Handle non-power-of-two vectors such as <3 x float>
   if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
     unsigned NumElem = VTy->getVectorNumElements();
@@ -796,22 +967,21 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 
     // Assume that all other non-power-of-two numbers are scalarized.
     if (!isPowerOf2_32(NumElem)) {
-      unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
-                                             Alignment, AddressSpace);
-      unsigned SplitCost = getScalarizationOverhead(Src,
-                                                    Opcode == Instruction::Load,
-                                                    Opcode==Instruction::Store);
+      int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment,
+                                        AddressSpace);
+      int SplitCost = getScalarizationOverhead(Src, Opcode == Instruction::Load,
+                                               Opcode == Instruction::Store);
       return NumElem * Cost + SplitCost;
     }
   }
 
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
   assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
          "Invalid Opcode");
 
   // Each load/store unit costs 1.
-  unsigned Cost = LT.first * 1;
+  int Cost = LT.first * 1;
 
   // On Sandybridge 256bit load/stores are double pumped
   // (but not on Haswell).
@@ -821,9 +991,9 @@ unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   return Cost;
 }
 
-unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
-                                           unsigned Alignment,
-                                           unsigned AddressSpace) {
+int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
+                                      unsigned Alignment,
+                                      unsigned AddressSpace) {
   VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
   if (!SrcVTy)
     // To calculate scalar take the regular cost, without mask
@@ -832,34 +1002,33 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
   unsigned NumElem = SrcVTy->getVectorNumElements();
   VectorType *MaskTy =
     VectorType::get(Type::getInt8Ty(getGlobalContext()), NumElem);
-  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
-      (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
+  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy)) ||
+      (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy)) ||
       !isPowerOf2_32(NumElem)) {
     // Scalarization
-    unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
-    unsigned ScalarCompareCost =
-      getCmpSelInstrCost(Instruction::ICmp,
-                         Type::getInt8Ty(getGlobalContext()), NULL);
-    unsigned BranchCost = getCFInstrCost(Instruction::Br);
-    unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
+    int MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
+    int ScalarCompareCost = getCmpSelInstrCost(
+        Instruction::ICmp, Type::getInt8Ty(getGlobalContext()), nullptr);
+    int BranchCost = getCFInstrCost(Instruction::Br);
+    int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
 
-    unsigned ValueSplitCost =
-      getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
-                               Opcode == Instruction::Store);
-    unsigned MemopCost =
+    int ValueSplitCost = getScalarizationOverhead(
+        SrcVTy, Opcode == Instruction::Load, Opcode == Instruction::Store);
+    int MemopCost =
         NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
                                          Alignment, AddressSpace);
     return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
   }
 
   // Legalize the type.
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
-  unsigned Cost = 0;
-  if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+  auto VT = TLI->getValueType(DL, SrcVTy);
+  int Cost = 0;
+  if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
       LT.second.getVectorNumElements() == NumElem)
     // Promotion requires expand/truncate for data and a shuffle for mask.
-    Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
-            getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
+    Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
+            getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
 
   else if (LT.second.getVectorNumElements() > NumElem) {
     VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
@@ -874,7 +1043,7 @@ unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
   return Cost+LT.first;
 }
 
-unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
+int X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   // Address computations in vectorized code with non-consecutive addresses will
   // likely result in more instructions compared to scalar code where the
   // computation can more often be merged into the index mode. The resulting
@@ -887,10 +1056,10 @@ unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
   return BaseT::getAddressComputationCost(Ty, IsComplex);
 }
 
-unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
-                                      bool IsPairwise) {
+int X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
+                                 bool IsPairwise) {
 
-  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
+  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
 
   MVT MTy = LT.second;
 
@@ -900,7 +1069,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
   // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
   // and make it as the cost.
 
-  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
+  static const CostTblEntry SSE42CostTblPairWise[] = {
     { ISD::FADD,  MVT::v2f64,   2 },
     { ISD::FADD,  MVT::v4f32,   4 },
     { ISD::ADD,   MVT::v2i64,   2 },      // The data reported by the IACA tool is "1.6".
@@ -908,7 +1077,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::ADD,   MVT::v8i16,   5 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
+  static const CostTblEntry AVX1CostTblPairWise[] = {
     { ISD::FADD,  MVT::v4f32,   4 },
     { ISD::FADD,  MVT::v4f64,   5 },
     { ISD::FADD,  MVT::v8f32,   7 },
@@ -919,7 +1088,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::ADD,   MVT::v8i32,   5 },
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
+  static const CostTblEntry SSE42CostTblNoPairWise[] = {
     { ISD::FADD,  MVT::v2f64,   2 },
     { ISD::FADD,  MVT::v4f32,   4 },
     { ISD::ADD,   MVT::v2i64,   2 },      // The data reported by the IACA tool is "1.6".
@@ -927,7 +1096,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
     { ISD::ADD,   MVT::v8i16,   4 },      // The data reported by the IACA tool is "4.3".
   };
 
-  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
+  static const CostTblEntry AVX1CostTblNoPairWise[] = {
     { ISD::FADD,  MVT::v4f32,   3 },
     { ISD::FADD,  MVT::v4f64,   3 },
     { ISD::FADD,  MVT::v8f32,   4 },
@@ -939,29 +1108,21 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
   };
 
   if (IsPairwise) {
-    if (ST->hasAVX()) {
-      int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
-      if (Idx != -1)
-        return LT.first * AVX1CostTblPairWise[Idx].Cost;
-    }
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-    if (ST->hasSSE42()) {
-      int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
-      if (Idx != -1)
-        return LT.first * SSE42CostTblPairWise[Idx].Cost;
-    }
+    if (ST->hasSSE42())
+      if (const auto *Entry = CostTableLookup(SSE42CostTblPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
   } else {
-    if (ST->hasAVX()) {
-      int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
-      if (Idx != -1)
-        return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
-    }
+    if (ST->hasAVX())
+      if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
 
-    if (ST->hasSSE42()) {
-      int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
-      if (Idx != -1)
-        return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
-    }
+    if (ST->hasSSE42())
+      if (const auto *Entry = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy))
+        return LT.first * Entry->Cost;
   }
 
   return BaseT::getReductionCost(Opcode, ValTy, IsPairwise);
@@ -970,7 +1131,7 @@ unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
 /// \brief Calculate the cost of materializing a 64-bit value. This helper
 /// method might only calculate a fraction of a larger immediate. Therefore it
 /// is valid to return a cost of ZERO.
-unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
+int X86TTIImpl::getIntImmCost(int64_t Val) {
   if (Val == 0)
     return TTI::TCC_Free;
 
@@ -980,7 +1141,7 @@ unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
   return 2 * TTI::TCC_Basic;
 }
 
-unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1004,18 +1165,18 @@ unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
 
   // Split the constant into 64-bit chunks and calculate the cost for each
   // chunk.
-  unsigned Cost = 0;
+  int Cost = 0;
   for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
     APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
     int64_t Val = Tmp.getSExtValue();
     Cost += getIntImmCost(Val);
   }
   // We need at least one instruction to materialze the constant.
-  return std::max(1U, Cost);
+  return std::max(1, Cost);
 }
 
-unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
-                                   const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+                              Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1038,6 +1199,26 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   case Instruction::Store:
     ImmIdx = 0;
     break;
+  case Instruction::ICmp:
+    // This is an imperfect hack to prevent constant hoisting of
+    // compares that might be trying to check if a 64-bit value fits in
+    // 32-bits. The backend can optimize these cases using a right shift by 32.
+    // Ideally we would check the compare predicate here. There also other
+    // similar immediates the backend can use shifts for.
+    if (Idx == 1 && Imm.getBitWidth() == 64) {
+      uint64_t ImmVal = Imm.getZExtValue();
+      if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff)
+        return TTI::TCC_Free;
+    }
+    ImmIdx = 1;
+    break;
+  case Instruction::And:
+    // We support 64-bit ANDs with immediates with 32-bits of leading zeroes
+    // by using a 32-bit operation with implicit zero extension. Detect such
+    // immediates here as the normal path expects bit 31 to be sign extended.
+    if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
+      return TTI::TCC_Free;
+    // Fallthrough
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
@@ -1045,10 +1226,8 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   case Instruction::SDiv:
   case Instruction::URem:
   case Instruction::SRem:
-  case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
-  case Instruction::ICmp:
     ImmIdx = 1;
     break;
   // Always return TCC_Free for the shift value of a shift instruction.
@@ -1073,18 +1252,18 @@ unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
   }
 
   if (Idx == ImmIdx) {
-    unsigned NumConstants = (BitSize + 63) / 64;
-    unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
+    int NumConstants = (BitSize + 63) / 64;
+    int Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
     return (Cost <= NumConstants * TTI::TCC_Basic)
-               ? static_cast<unsigned>(TTI::TCC_Free)
+               ? static_cast<int>(TTI::TCC_Free)
                : Cost;
   }
 
   return X86TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-                                   const APInt &Imm, Type *Ty) {
+int X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                              Type *Ty) {
   assert(Ty->isIntegerTy());
 
   unsigned BitSize = Ty->getPrimitiveSizeInBits();
@@ -1118,23 +1297,181 @@ unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
   return X86TTIImpl::getIntImmCost(Imm, Ty);
 }
 
-bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
-  int DataWidth = DataTy->getPrimitiveSizeInBits();
+// Return an average cost of Gather / Scatter instruction, maybe improved later
+int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
+                                unsigned Alignment, unsigned AddressSpace) {
 
-  // Todo: AVX512 allows gather/scatter, works with strided and random as well
-  if ((DataWidth < 32) || (Consecutive == 0))
+  assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
+  unsigned VF = SrcVTy->getVectorNumElements();
+
+  // Try to reduce index size from 64 bit (default for GEP)
+  // to 32. It is essential for VF 16. If the index can't be reduced to 32, the
+  // operation will use 16 x 64 indices which do not fit in a zmm and needs
+  // to split. Also check that the base pointer is the same for all lanes,
+  // and that there's at most one variable index.
+  auto getIndexSizeInBits = [](Value *Ptr, const DataLayout& DL) {
+    unsigned IndexSize = DL.getPointerSizeInBits();
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+    if (IndexSize < 64 || !GEP)
+      return IndexSize;
+ 
+    unsigned NumOfVarIndices = 0;
+    Value *Ptrs = GEP->getPointerOperand();
+    if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
+      return IndexSize;
+    for (unsigned i = 1; i < GEP->getNumOperands(); ++i) {
+      if (isa<Constant>(GEP->getOperand(i)))
+        continue;
+      Type *IndxTy = GEP->getOperand(i)->getType();
+      if (IndxTy->isVectorTy())
+        IndxTy = IndxTy->getVectorElementType();
+      if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
+          !isa<SExtInst>(GEP->getOperand(i))) ||
+         ++NumOfVarIndices > 1)
+        return IndexSize; // 64
+    }
+    return (unsigned)32;
+  };
+
+
+  // Trying to reduce IndexSize to 32 bits for vector 16.
+  // By default the IndexSize is equal to pointer size.
+  unsigned IndexSize = (VF >= 16) ? getIndexSizeInBits(Ptr, DL) :
+    DL.getPointerSizeInBits();
+
+  Type *IndexVTy = VectorType::get(IntegerType::get(getGlobalContext(),
+                                                    IndexSize), VF);
+  std::pair<int, MVT> IdxsLT = TLI->getTypeLegalizationCost(DL, IndexVTy);
+  std::pair<int, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, SrcVTy);
+  int SplitFactor = std::max(IdxsLT.first, SrcLT.first);
+  if (SplitFactor > 1) {
+    // Handle splitting of vector of pointers
+    Type *SplitSrcTy = VectorType::get(SrcVTy->getScalarType(), VF / SplitFactor);
+    return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment,
+                                         AddressSpace);
+  }
+
+  // The gather / scatter cost is given by Intel architects. It is a rough
+  // number since we are looking at one instruction in a time.
+  const int GSOverhead = 2;
+  return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+                                           Alignment, AddressSpace);
+}
+
+/// Return the cost of full scalarization of gather / scatter operation.
+///
+/// Opcode - Load or Store instruction.
+/// SrcVTy - The type of the data vector that should be gathered or scattered.
+/// VariableMask - The mask is non-constant at compile time.
+/// Alignment - Alignment for one element.
+/// AddressSpace - pointer[s] address space.
+///
+int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
+                                bool VariableMask, unsigned Alignment,
+                                unsigned AddressSpace) {
+  unsigned VF = SrcVTy->getVectorNumElements();
+
+  int MaskUnpackCost = 0;
+  if (VariableMask) {
+    VectorType *MaskTy =
+      VectorType::get(Type::getInt1Ty(getGlobalContext()), VF);
+    MaskUnpackCost = getScalarizationOverhead(MaskTy, false, true);
+    int ScalarCompareCost =
+      getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(getGlobalContext()),
+                         nullptr);
+    int BranchCost = getCFInstrCost(Instruction::Br);
+    MaskUnpackCost += VF * (BranchCost + ScalarCompareCost);
+  }
+
+  // The cost of the scalar loads/stores.
+  int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+                                          Alignment, AddressSpace);
+
+  int InsertExtractCost = 0;
+  if (Opcode == Instruction::Load)
+    for (unsigned i = 0; i < VF; ++i)
+      // Add the cost of inserting each scalar load into the vector
+      InsertExtractCost +=
+        getVectorInstrCost(Instruction::InsertElement, SrcVTy, i);
+  else
+    for (unsigned i = 0; i < VF; ++i)
+      // Add the cost of extracting each element out of the data vector
+      InsertExtractCost +=
+        getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i);
+
+  return MemoryOpCost + MaskUnpackCost + InsertExtractCost;
+}
+
+/// Calculate the cost of Gather / Scatter operation
+int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
+                                       Value *Ptr, bool VariableMask,
+                                       unsigned Alignment) {
+  assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
+  unsigned VF = SrcVTy->getVectorNumElements();
+  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+  if (!PtrTy && Ptr->getType()->isVectorTy())
+    PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
+  assert(PtrTy && "Unexpected type for Ptr argument");
+  unsigned AddressSpace = PtrTy->getAddressSpace();
+
+  bool Scalarize = false;
+  if ((Opcode == Instruction::Load && !isLegalMaskedGather(SrcVTy)) ||
+      (Opcode == Instruction::Store && !isLegalMaskedScatter(SrcVTy)))
+    Scalarize = true;
+  // Gather / Scatter for vector 2 is not profitable on KNL / SKX
+  // Vector-4 of gather/scatter instruction does not exist on KNL.
+  // We can extend it to 8 elements, but zeroing upper bits of
+  // the mask vector will add more instructions. Right now we give the scalar
+  // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction is
+  // better in the VariableMask case.
+  if (VF == 2 || (VF == 4 && !ST->hasVLX()))
+    Scalarize = true;
+
+  if (Scalarize)
+    return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, AddressSpace);
+
+  return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
+}
+
+bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
+  Type *ScalarTy = DataTy->getScalarType();
+  int DataWidth = isa<PointerType>(ScalarTy) ?
+    DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+  return (DataWidth >= 32 && ST->hasAVX2());
+}
+
+bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
+  return isLegalMaskedLoad(DataType);
+}
+
+bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
+  // This function is called now in two cases: from the Loop Vectorizer
+  // and from the Scalarizer.
+  // When the Loop Vectorizer asks about legality of the feature,
+  // the vectorization factor is not calculated yet. The Loop Vectorizer
+  // sends a scalar type and the decision is based on the width of the
+  // scalar element.
+  // Later on, the cost model will estimate usage this intrinsic based on
+  // the vector type.
+  // The Scalarizer asks again about legality. It sends a vector type.
+  // In this case we can reject non-power-of-2 vectors.
+  if (isa<VectorType>(DataTy) && !isPowerOf2_32(DataTy->getVectorNumElements()))
     return false;
-  if (ST->hasAVX512() || ST->hasAVX2())
-    return true;
-  return false;
+  Type *ScalarTy = DataTy->getScalarType();
+  int DataWidth = isa<PointerType>(ScalarTy) ?
+    DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
+
+  // AVX-512 allows gather and scatter
+  return DataWidth >= 32 && ST->hasAVX512();
 }
 
-bool X86TTIImpl::isLegalMaskedStore(Type *DataType, int Consecutive) {
-  return isLegalMaskedLoad(DataType, Consecutive);
+bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
+  return isLegalMaskedGather(DataType);
 }
 
-bool X86TTIImpl::hasCompatibleFunctionAttributes(const Function *Caller,
-                                                 const Function *Callee) const {
+bool X86TTIImpl::areInlineCompatible(const Function *Caller,
+                                     const Function *Callee) const {
   const TargetMachine &TM = getTLI()->getTargetMachine();
 
   // Work this as a subsetting of subtarget features.
diff --git a/lib/Target/X86/X86TargetTransformInfo.h b/lib/Target/X86/X86TargetTransformInfo.h
index da3f36c2e27e..adb745e912d1 100644
--- a/lib/Target/X86/X86TargetTransformInfo.h
+++ b/lib/Target/X86/X86TargetTransformInfo.h
@@ -33,13 +33,13 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   const X86Subtarget *ST;
   const X86TargetLowering *TLI;
 
-  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
+  int getScalarizationOverhead(Type *Ty, bool Insert, bool Extract);
 
   const X86Subtarget *getST() const { return ST; }
   const X86TargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit X86TTIImpl(const X86TargetMachine *TM, Function &F)
+  explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
@@ -62,38 +62,44 @@ public:
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
   unsigned getMaxInterleaveFactor(unsigned VF);
-  unsigned getArithmeticInstrCost(
+  int getArithmeticInstrCost(
       unsigned Opcode, Type *Ty,
       TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
       TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
       TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
       TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None);
-  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
-                          Type *SubTp);
-  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
-  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
-  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
-  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                           unsigned AddressSpace);
-  unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
-                                 unsigned AddressSpace);
+  int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp);
+  int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src);
+  int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy);
+  int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index);
+  int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                      unsigned AddressSpace);
+  int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+                            unsigned AddressSpace);
+  int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                             bool VariableMask, unsigned Alignment);
+  int getAddressComputationCost(Type *PtrTy, bool IsComplex);
 
-  unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex);
+  int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
 
-  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
+  int getIntImmCost(int64_t);
 
-  unsigned getIntImmCost(int64_t);
+  int getIntImmCost(const APInt &Imm, Type *Ty);
 
-  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
-
-  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
-  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-                         Type *Ty);
-  bool isLegalMaskedLoad(Type *DataType, int Consecutive);
-  bool isLegalMaskedStore(Type *DataType, int Consecutive);
-  bool hasCompatibleFunctionAttributes(const Function *Caller,
-                                       const Function *Callee) const;
+  int getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty);
+  int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+                    Type *Ty);
+  bool isLegalMaskedLoad(Type *DataType);
+  bool isLegalMaskedStore(Type *DataType);
+  bool isLegalMaskedGather(Type *DataType);
+  bool isLegalMaskedScatter(Type *DataType);
+  bool areInlineCompatible(const Function *Caller,
+                           const Function *Callee) const;
+private:
+  int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
+                      unsigned Alignment, unsigned AddressSpace);
+  int getGSVectorCost(unsigned Opcode, Type *DataTy, Value *Ptr,
+                      unsigned Alignment, unsigned AddressSpace);
 
   /// @}
 };
diff --git a/lib/Target/X86/X86WinEHState.cpp b/lib/Target/X86/X86WinEHState.cpp
index 9190d0be9e4d..dce94a9e9ef7 100644
--- a/lib/Target/X86/X86WinEHState.cpp
+++ b/lib/Target/X86/X86WinEHState.cpp
@@ -15,7 +15,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/WinEHFuncInfo.h"
@@ -38,12 +39,16 @@ using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "winehstate"
 
+namespace llvm { void initializeWinEHStatePassPass(PassRegistry &); }
+
 namespace {
 class WinEHStatePass : public FunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid.
 
-  WinEHStatePass() : FunctionPass(ID) {}
+  WinEHStatePass() : FunctionPass(ID) {
+    initializeWinEHStatePassPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnFunction(Function &Fn) override;
 
@@ -62,18 +67,13 @@ private:
 
   void linkExceptionRegistration(IRBuilder<> &Builder, Function *Handler);
   void unlinkExceptionRegistration(IRBuilder<> &Builder);
-  void addCXXStateStores(Function &F, MachineModuleInfo &MMI);
-  void addSEHStateStores(Function &F, MachineModuleInfo &MMI);
-  void addCXXStateStoresToFunclet(Value *ParentRegNode, WinEHFuncInfo &FuncInfo,
-                                  Function &F, int BaseState);
+  void addStateStores(Function &F, WinEHFuncInfo &FuncInfo);
   void insertStateNumberStore(Value *ParentRegNode, Instruction *IP, int State);
 
   Value *emitEHLSDA(IRBuilder<> &Builder, Function *F);
 
   Function *generateLSDAInEAXThunk(Function *ParentFunc);
 
-  int escapeRegNode(Function &F);
-
   // Module-level type getters.
   Type *getEHLinkRegistrationType();
   Type *getSEHRegistrationType();
@@ -111,6 +111,9 @@ FunctionPass *llvm::createX86WinEHStatePass() { return new WinEHStatePass(); }
 
 char WinEHStatePass::ID = 0;
 
+INITIALIZE_PASS(WinEHStatePass, "x86-winehstate",
+                "Insert stores for EH state numbers", false, false)
+
 bool WinEHStatePass::doInitialization(Module &M) {
   TheModule = &M;
   FrameEscape = Intrinsic::getDeclaration(TheModule, Intrinsic::localescape);
@@ -138,14 +141,7 @@ void WinEHStatePass::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool WinEHStatePass::runOnFunction(Function &F) {
-  // If this is an outlined handler, don't do anything. We'll do state insertion
-  // for it in the parent.
-  StringRef WinEHParentName =
-      F.getFnAttribute("wineh-parent").getValueAsString();
-  if (WinEHParentName != F.getName() && !WinEHParentName.empty())
-    return false;
-
-  // Check the personality. Do nothing if this is not an MSVC personality.
+  // Check the personality. Do nothing if this personality doesn't use funclets.
   if (!F.hasPersonalityFn())
     return false;
   PersonalityFn =
@@ -153,7 +149,19 @@ bool WinEHStatePass::runOnFunction(Function &F) {
   if (!PersonalityFn)
     return false;
   Personality = classifyEHPersonality(PersonalityFn);
-  if (!isMSVCEHPersonality(Personality))
+  if (!isFuncletEHPersonality(Personality))
+    return false;
+
+  // Skip this function if there are no EH pads and we aren't using IR-level
+  // outlining.
+  bool HasPads = false;
+  for (BasicBlock &BB : F) {
+    if (BB.isEHPad()) {
+      HasPads = true;
+      break;
+    }
+  }
+  if (!HasPads)
     return false;
 
   // Disable frame pointer elimination in this function.
@@ -163,14 +171,13 @@ bool WinEHStatePass::runOnFunction(Function &F) {
 
   emitExceptionRegistrationRecord(&F);
 
-  auto *MMIPtr = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMIPtr && "MachineModuleInfo should always be available");
-  MachineModuleInfo &MMI = *MMIPtr;
-  switch (Personality) {
-  default: llvm_unreachable("unexpected personality function");
-  case EHPersonality::MSVC_CXX:    addCXXStateStores(F, MMI); break;
-  case EHPersonality::MSVC_X86SEH: addSEHStateStores(F, MMI); break;
-  }
+  // The state numbers calculated here in IR must agree with what we calculate
+  // later on for the MachineFunction. In particular, if an IR pass deletes an
+  // unreachable EH pad after this point before machine CFG construction, we
+  // will be in trouble. If this assumption is ever broken, we should turn the
+  // numbers into an immutable analysis pass.
+  WinEHFuncInfo FuncInfo;
+  addStateStores(F, FuncInfo);
 
   // Reset per-function state.
   PersonalityFn = nullptr;
@@ -261,7 +268,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
     Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
     // TryLevel = -1
     StateFieldIndex = 2;
-    insertStateNumberStore(RegNode, Builder.GetInsertPoint(), -1);
+    insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(), -1);
     // Handler = __ehhandler$F
     Function *Trampoline = generateLSDAInEAXThunk(F);
     Link = Builder.CreateStructGEP(RegNodeTy, RegNode, 1);
@@ -278,7 +285,7 @@ void WinEHStatePass::emitExceptionRegistrationRecord(Function *F) {
     Builder.CreateStore(SP, Builder.CreateStructGEP(RegNodeTy, RegNode, 0));
     // TryLevel = -2 / -1
     StateFieldIndex = 4;
-    insertStateNumberStore(RegNode, Builder.GetInsertPoint(),
+    insertStateNumberStore(RegNode, &*Builder.GetInsertPoint(),
                            UseStackGuard ? -2 : -1);
     // ScopeTable = llvm.x86.seh.lsda(F)
     Value *FI8 = Builder.CreateBitCast(F, Int8PtrType);
@@ -347,7 +354,7 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
   Value *CastPersonality =
       Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
   auto AI = Trampoline->arg_begin();
-  Value *Args[5] = {LSDA, AI++, AI++, AI++, AI++};
+  Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
   CallInst *Call = Builder.CreateCall(CastPersonality, Args);
   // Can't use musttail due to prototype mismatch, but we can use tail.
   Call->setTailCall(true);
@@ -391,160 +398,53 @@ void WinEHStatePass::unlinkExceptionRegistration(IRBuilder<> &Builder) {
   Builder.CreateStore(Next, FSZero);
 }
 
-void WinEHStatePass::addCXXStateStores(Function &F, MachineModuleInfo &MMI) {
-  WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
-  calculateWinCXXEHStateNumbers(&F, FuncInfo);
+void WinEHStatePass::addStateStores(Function &F, WinEHFuncInfo &FuncInfo) {
+  // Mark the registration node. The backend needs to know which alloca it is so
+  // that it can recover the original frame pointer.
+  IRBuilder<> Builder(RegNode->getParent(), std::next(RegNode->getIterator()));
+  Value *RegNodeI8 = Builder.CreateBitCast(RegNode, Builder.getInt8PtrTy());
+  Builder.CreateCall(
+      Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_ehregnode),
+      {RegNodeI8});
 
-  // The base state for the parent is -1.
-  addCXXStateStoresToFunclet(RegNode, FuncInfo, F, -1);
+  // Calculate state numbers.
+  if (isAsynchronousEHPersonality(Personality))
+    calculateSEHStateNumbers(&F, FuncInfo);
+  else
+    calculateWinCXXEHStateNumbers(&F, FuncInfo);
 
-  // Set up RegNodeEscapeIndex
-  int RegNodeEscapeIndex = escapeRegNode(F);
-  FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
-  // Only insert stores in catch handlers.
-  Constant *FI8 =
-      ConstantExpr::getBitCast(&F, Type::getInt8PtrTy(TheModule->getContext()));
-  for (auto P : FuncInfo.HandlerBaseState) {
-    Function *Handler = const_cast<Function *>(P.first);
-    int BaseState = P.second;
-    IRBuilder<> Builder(&Handler->getEntryBlock(),
-                        Handler->getEntryBlock().begin());
-    // FIXME: Find and reuse such a call if present.
-    Value *ParentFP = Builder.CreateCall(FrameAddress, {Builder.getInt32(1)});
-    Value *RecoveredRegNode = Builder.CreateCall(
-        FrameRecover, {FI8, ParentFP, Builder.getInt32(RegNodeEscapeIndex)});
-    RecoveredRegNode =
-        Builder.CreateBitCast(RecoveredRegNode, RegNodeTy->getPointerTo(0));
-    addCXXStateStoresToFunclet(RecoveredRegNode, FuncInfo, *Handler, BaseState);
-  }
-}
-
-/// Escape RegNode so that we can access it from child handlers. Find the call
-/// to localescape, if any, in the entry block and append RegNode to the list
-/// of arguments.
-int WinEHStatePass::escapeRegNode(Function &F) {
-  // Find the call to localescape and extract its arguments.
-  IntrinsicInst *EscapeCall = nullptr;
-  for (Instruction &I : F.getEntryBlock()) {
-    IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
-    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
-      EscapeCall = II;
-      break;
-    }
-  }
-  SmallVector<Value *, 8> Args;
-  if (EscapeCall) {
-    auto Ops = EscapeCall->arg_operands();
-    Args.append(Ops.begin(), Ops.end());
-  }
-  Args.push_back(RegNode);
-
-  // Replace the call (if it exists) with new one. Otherwise, insert at the end
-  // of the entry block.
-  Instruction *InsertPt = EscapeCall;
-  if (!EscapeCall)
-    InsertPt = F.getEntryBlock().getTerminator();
-  IRBuilder<> Builder(&F.getEntryBlock(), InsertPt);
-  Builder.CreateCall(FrameEscape, Args);
-  if (EscapeCall)
-    EscapeCall->eraseFromParent();
-  return Args.size() - 1;
-}
-
-void WinEHStatePass::addCXXStateStoresToFunclet(Value *ParentRegNode,
-                                                WinEHFuncInfo &FuncInfo,
-                                                Function &F, int BaseState) {
   // Iterate all the instructions and emit state number stores.
+  DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(F);
   for (BasicBlock &BB : F) {
+    // Figure out what state we should assign calls in this block.
+    int BaseState = -1;
+    auto &BBColors = BlockColors[&BB];
+
+    assert(BBColors.size() == 1 &&
+           "multi-color BB not removed by preparation");
+    BasicBlock *FuncletEntryBB = BBColors.front();
+    if (auto *FuncletPad =
+            dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI())) {
+      auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad);
+      if (BaseStateI != FuncInfo.FuncletBaseStateMap.end())
+        BaseState = BaseStateI->second;
+    }
+
     for (Instruction &I : BB) {
       if (auto *CI = dyn_cast<CallInst>(&I)) {
         // Possibly throwing call instructions have no actions to take after
         // an unwind. Ensure they are in the -1 state.
         if (CI->doesNotThrow())
           continue;
-        insertStateNumberStore(ParentRegNode, CI, BaseState);
+        insertStateNumberStore(RegNode, CI, BaseState);
       } else if (auto *II = dyn_cast<InvokeInst>(&I)) {
         // Look up the state number of the landingpad this unwinds to.
-        LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
-        // FIXME: Why does this assertion fail?
-        //assert(FuncInfo.LandingPadStateMap.count(LPI) && "LP has no state!");
-        int State = FuncInfo.LandingPadStateMap[LPI];
-        insertStateNumberStore(ParentRegNode, II, State);
-      }
-    }
-  }
-}
-
-/// Assign every distinct landingpad a unique state number for SEH. Unlike C++
-/// EH, we can use this very simple algorithm while C++ EH cannot because catch
-/// handlers aren't outlined and the runtime doesn't have to figure out which
-/// catch handler frame to unwind to.
-/// FIXME: __finally blocks are outlined, so this approach may break down there.
-void WinEHStatePass::addSEHStateStores(Function &F, MachineModuleInfo &MMI) {
-  WinEHFuncInfo &FuncInfo = MMI.getWinEHFuncInfo(&F);
-
-  // Remember and return the index that we used. We save it in WinEHFuncInfo so
-  // that we can lower llvm.x86.seh.recoverfp later in filter functions without
-  // too much trouble.
-  int RegNodeEscapeIndex = escapeRegNode(F);
-  FuncInfo.EHRegNodeEscapeIndex = RegNodeEscapeIndex;
-
-  // Iterate all the instructions and emit state number stores.
-  int CurState = 0;
-  SmallPtrSet<BasicBlock *, 4> ExceptBlocks;
-  for (BasicBlock &BB : F) {
-    for (auto I = BB.begin(), E = BB.end(); I != E; ++I) {
-      if (auto *CI = dyn_cast<CallInst>(I)) {
-        auto *Intrin = dyn_cast<IntrinsicInst>(CI);
-        if (Intrin) {
-          // Calls that "don't throw" are considered to be able to throw asynch
-          // exceptions, but intrinsics cannot.
-          continue;
-        }
-        insertStateNumberStore(RegNode, CI, -1);
-      } else if (auto *II = dyn_cast<InvokeInst>(I)) {
-        // Look up the state number of the landingpad this unwinds to.
-        LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst();
-        auto InsertionPair =
-            FuncInfo.LandingPadStateMap.insert(std::make_pair(LPI, CurState));
-        auto Iter = InsertionPair.first;
-        int &State = Iter->second;
-        bool Inserted = InsertionPair.second;
-        if (Inserted) {
-          // Each action consumes a state number.
-          auto *EHActions = cast<IntrinsicInst>(LPI->getNextNode());
-          SmallVector<std::unique_ptr<ActionHandler>, 4> ActionList;
-          parseEHActions(EHActions, ActionList);
-          assert(!ActionList.empty());
-          CurState += ActionList.size();
-          State += ActionList.size() - 1;
-
-          // Remember all the __except block targets.
-          for (auto &Handler : ActionList) {
-            if (auto *CH = dyn_cast<CatchHandler>(Handler.get())) {
-              auto *BA = cast<BlockAddress>(CH->getHandlerBlockOrFunc());
-#ifndef NDEBUG
-              for (BasicBlock *Pred : predecessors(BA->getBasicBlock()))
-                assert(Pred->isLandingPad() &&
-                       "WinEHPrepare failed to split block");
-#endif
-              ExceptBlocks.insert(BA->getBasicBlock());
-            }
-          }
-        }
+        assert(FuncInfo.InvokeStateMap.count(II) && "invoke has no state!");
+        int State = FuncInfo.InvokeStateMap[II];
         insertStateNumberStore(RegNode, II, State);
       }
     }
   }
-
-  // Insert llvm.x86.seh.restoreframe() into each __except block.
-  Function *RestoreFrame =
-      Intrinsic::getDeclaration(TheModule, Intrinsic::x86_seh_restoreframe);
-  for (BasicBlock *ExceptBB : ExceptBlocks) {
-    IRBuilder<> Builder(ExceptBB->begin());
-    Builder.CreateCall(RestoreFrame, {});
-  }
 }
 
 void WinEHStatePass::insertStateNumberStore(Value *ParentRegNode,
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 2e44ac949b2c..aaf267af5311 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -224,7 +224,7 @@ static DecodeStatus DecodeBitpOperand(MCInst &Inst, unsigned Val,
                                       uint64_t Address, const void *Decoder) {
   if (Val > 11)
     return MCDisassembler::Fail;
-  static unsigned Values[] = {
+  static const unsigned Values[] = {
     32 /*bpw*/, 1, 2, 3, 4, 5, 6, 7, 8, 16, 24, 32
   };
   Inst.addOperand(MCOperand::createImm(Values[Val]));
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 6fd2dec1d13e..dc513f7b225b 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -19,8 +19,6 @@
 
 namespace llvm {
 
-class TargetMachine;
-
 class XCoreInstPrinter : public MCInstPrinter {
 public:
   XCoreInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 702056d781d0..b00cdd5040eb 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -115,14 +115,14 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
       EmitSpecialLLVMGlobal(GV))
     return;
 
-  const DataLayout *TD = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   OutStreamer->SwitchSection(
       getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
 
   MCSymbol *GVSym = getSymbol(GV);
   const Constant *C = GV->getInitializer();
-  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
-  
+  unsigned Align = (unsigned)DL.getPreferredTypeAlignmentShift(C->getType());
+
   // Mark the start of the global
   getTargetStreamer().emitCCTopData(GVSym->getName());
 
@@ -154,15 +154,15 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GV->isThreadLocal()) {
     report_fatal_error("TLS is not supported by this target!");
   }
-  unsigned Size = TD->getTypeAllocSize(C->getType());
+  unsigned Size = DL.getTypeAllocSize(C->getType());
   if (MAI->hasDotTypeDotSizeDirective()) {
     OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
     OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
                              MCConstantExpr::create(Size, OutContext));
   }
   OutStreamer->EmitLabel(GVSym);
-  
-  EmitGlobalConstant(C);
+
+  EmitGlobalConstant(DL, C);
   // The ABI requires that unsigned scalar types smaller than 32 bits
   // are padded to 32 bits.
   if (Size < 4)
@@ -208,7 +208,7 @@ printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
 
 void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                    raw_ostream &O) {
-  const DataLayout *DL = TM.getDataLayout();
+  const DataLayout &DL = getDataLayout();
   const MachineOperand &MO = MI->getOperand(opNum);
   switch (MO.getType()) {
   case MachineOperand::MO_Register:
@@ -224,8 +224,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     getSymbol(MO.getGlobal())->print(O, MAI);
     break;
   case MachineOperand::MO_ConstantPoolIndex:
-    O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
-      << '_' << MO.getIndex();
+    O << DL.getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
     break;
   case MachineOperand::MO_BlockAddress:
     GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI);
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 76c3d8130e75..ae493de083b8 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -160,27 +160,26 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
 /// As offsets are negative, the largest offsets will be first.
 static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
                            MachineFrameInfo *MFI, XCoreFunctionInfo *XFI,
+                           const Constant *PersonalityFn,
                            const TargetLowering *TL) {
   assert(XFI->hasEHSpillSlot() && "There are no EH register spill slots");
-  const int* EHSlot = XFI->getEHSpillSlot();
-  SpillList.push_back(StackSlotInfo(EHSlot[0],
-                                    MFI->getObjectOffset(EHSlot[0]),
-                                    TL->getExceptionPointerRegister()));
-  SpillList.push_back(StackSlotInfo(EHSlot[0],
-                                    MFI->getObjectOffset(EHSlot[1]),
-                                    TL->getExceptionSelectorRegister()));
+  const int *EHSlot = XFI->getEHSpillSlot();
+  SpillList.push_back(
+      StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[0]),
+                    TL->getExceptionPointerRegister(PersonalityFn)));
+  SpillList.push_back(
+      StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[1]),
+                    TL->getExceptionSelectorRegister(PersonalityFn)));
   std::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
 }
 
-
 static MachineMemOperand *
 getFrameIndexMMO(MachineBasicBlock &MBB, int FrameIndex, unsigned flags) {
   MachineFunction *MF = MBB.getParent();
   const MachineFrameInfo &MFI = *MF->getFrameInfo();
-  MachineMemOperand *MMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
-                             flags, MFI.getObjectSize(FrameIndex),
-                             MFI.getObjectAlignment(FrameIndex));
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags,
+      MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex));
   return MMO;
 }
 
@@ -323,8 +322,11 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
     if (XFI->hasEHSpillSlot()) {
       // The unwinder requires stack slot & CFI offsets for the exception info.
       // We do not save/spill these registers.
-      SmallVector<StackSlotInfo,2> SpillList;
-      GetEHSpillList(SpillList, MFI, XFI,
+      const Function *Fn = MF.getFunction();
+      const Constant *PersonalityFn =
+          Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+      SmallVector<StackSlotInfo, 2> SpillList;
+      GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
                      MF.getSubtarget().getTargetLowering());
       assert(SpillList.size()==2 && "Unexpected SpillList size");
       EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
@@ -355,8 +357,12 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
   if (RetOpcode == XCore::EH_RETURN) {
     // 'Restore' the exception info the unwinder has placed into the stack
     // slots.
-    SmallVector<StackSlotInfo,2> SpillList;
-    GetEHSpillList(SpillList, MFI, XFI, MF.getSubtarget().getTargetLowering());
+    const Function *Fn = MF.getFunction();
+    const Constant *PersonalityFn =
+        Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr;
+    SmallVector<StackSlotInfo, 2> SpillList;
+    GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
+                   MF.getSubtarget().getTargetLowering());
     RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
 
     // Return to the landing pad.
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 9d4a966dfba4..9f61c84cd445 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -151,8 +151,9 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
                                             MVT::Other, CPIdx,
                                             CurDAG->getEntryNode());
       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
-      MemOp[0] = MF->getMachineMemOperand(
-        MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad, 4, 4);      
+      MemOp[0] =
+          MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(*MF),
+                                   MachineMemOperand::MOLoad, 4, 4);
       cast<MachineSDNode>(node)->setMemRefs(MemOp, MemOp + 1);
       return node;
     }
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index d62e7428299d..105b2cfb1be6 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -79,9 +79,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
   // Compute derived properties from the register classes
   computeRegisterProperties(Subtarget.getRegisterInfo());
 
-  // Division is expensive
-  setIntDivIsCheap(false);
-
   setStackPointerRegisterToSaveRestore(XCore::SP);
 
   setSchedulingPreference(Sched::Source);
@@ -154,8 +151,6 @@ XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM,
 
   // Exception handling
   setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
-  setExceptionPointerRegister(XCore::R0);
-  setExceptionSelectorRegister(XCore::R1);
   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
 
   // Atomic operations
@@ -839,7 +834,7 @@ LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
   SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
   return DAG.getLoad(
       getPointerTy(DAG.getDataLayout()), SDLoc(Op), DAG.getEntryNode(), FIN,
-      MachinePointerInfo::getFixedStack(FI), false, false, false, 0);
+      MachinePointerInfo::getFixedStack(MF, FI), false, false, false, 0);
 }
 
 SDValue XCoreTargetLowering::
@@ -1367,8 +1362,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
       //from this parameter
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
       ArgIn = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
-                          MachinePointerInfo::getFixedStack(FI),
-                          false, false, false, 0);
+                          MachinePointerInfo::getFixedStack(MF, FI), false,
+                          false, false, 0);
     }
     const ArgDataPair ADP = { ArgIn, Ins[i].Flags };
     ArgData.push_back(ADP);
@@ -1517,9 +1512,10 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
     // Create a SelectionDAG node corresponding to a store
     // to this memory location.
     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
-    MemOpChains.push_back(DAG.getStore(Chain, dl, OutVals[i], FIN,
-                          MachinePointerInfo::getFixedStack(FI), false, false,
-                          0));
+    MemOpChains.push_back(DAG.getStore(
+        Chain, dl, OutVals[i], FIN,
+        MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
+        false, 0));
   }
 
   // Transform all store nodes into one single node because
@@ -1567,8 +1563,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   // to set, the condition code register to branch on, the true/false values to
   // select between, and a branch opcode to use.
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
-  MachineFunction::iterator It = BB;
-  ++It;
+  MachineFunction::iterator It = ++BB->getIterator();
 
   //  thisMBB:
   //  ...
@@ -1828,9 +1823,8 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
     SDValue Chain = ST->getChain();
 
     unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
-    if (StoreBits % 8) {
-      break;
-    }
+    assert((StoreBits % 8) == 0 &&
+           "Store size in bits must be a multiple of 8");
     unsigned ABIAlignment = DAG.getDataLayout().getABITypeAlignment(
         ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
     unsigned Alignment = ST->getAlignment();
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index ddd675c5164d..b6f09ff418b5 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -125,6 +125,20 @@ namespace llvm {
     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
                                Type *Ty, unsigned AS) const override;
 
+    /// If a physical register, this returns the register that receives the
+    /// exception address on entry to an EH pad.
+    unsigned
+    getExceptionPointerRegister(const Constant *PersonalityFn) const override {
+      return XCore::R0;
+    }
+
+    /// If a physical register, this returns the register that receives the
+    /// exception typeid on entry to a landing pad.
+    unsigned
+    getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
+      return XCore::R1;
+    }
+
   private:
     const TargetMachine &TM;
     const XCoreSubtarget &Subtarget;
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index ee30344dcc25..e4129aee9479 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -368,11 +368,10 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
     DL = I->getDebugLoc();
   MachineFunction *MF = MBB.getParent();
   const MachineFrameInfo &MFI = *MF->getFrameInfo();
-  MachineMemOperand *MMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
-                             MachineMemOperand::MOStore,
-                             MFI.getObjectSize(FrameIndex),
-                             MFI.getObjectAlignment(FrameIndex));
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+      MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
+      MFI.getObjectAlignment(FrameIndex));
   BuildMI(MBB, I, DL, get(XCore::STWFI))
     .addReg(SrcReg, getKillRegState(isKill))
     .addFrameIndex(FrameIndex)
@@ -391,11 +390,10 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     DL = I->getDebugLoc();
   MachineFunction *MF = MBB.getParent();
   const MachineFrameInfo &MFI = *MF->getFrameInfo();
-  MachineMemOperand *MMO =
-    MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIndex),
-                             MachineMemOperand::MOLoad,
-                             MFI.getObjectSize(FrameIndex),
-                             MFI.getObjectAlignment(FrameIndex));
+  MachineMemOperand *MMO = MF->getMachineMemOperand(
+      MachinePointerInfo::getFixedStack(*MF, FrameIndex),
+      MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
+      MFI.getObjectAlignment(FrameIndex));
   BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
     .addFrameIndex(FrameIndex)
     .addImm(0)
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index 996c6f59346d..f0b720151b17 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -228,12 +228,9 @@ bool XCoreLowerThreadLocal::runOnModule(Module &M) {
   // Find thread local globals.
   bool MadeChange = false;
   SmallVector<GlobalVariable *, 16> ThreadLocalGlobals;
-  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
-       GVI != E; ++GVI) {
-    GlobalVariable *GV = GVI;
-    if (GV->isThreadLocal())
-      ThreadLocalGlobals.push_back(GV);
-  }
+  for (GlobalVariable &GV : M.globals())
+    if (GV.isThreadLocal())
+      ThreadLocalGlobals.push_back(&GV);
   for (unsigned I = 0, E = ThreadLocalGlobals.size(); I != E; ++I) {
     MadeChange |= lowerGlobal(ThreadLocalGlobals[I]);
   }
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index 9ef9752d0a5b..6c770969e32e 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.cpp - XCore machine function info ---------===//
+//===-- XCoreMachineFunctionInfo.cpp - XCore machine function info --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h
index 078ffde18fb9..cdcc52fdc32d 100644
--- a/lib/Target/XCore/XCoreMachineFunctionInfo.h
+++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//===-- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//===- XCoreMachineFunctionInfo.h - XCore machine function info -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index f420081868f9..4a79dac0bed9 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -85,7 +85,7 @@ extern "C" void LLVMInitializeXCoreTarget() {
 }
 
 TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
-  return TargetIRAnalysis([this](Function &F) {
+  return TargetIRAnalysis([this](const Function &F) {
     return TargetTransformInfo(XCoreTTIImpl(this, F));
   });
 }
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.cpp b/lib/Target/XCore/XCoreTargetObjectFile.cpp
index b5a99058f46e..aa16ecc148db 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -123,18 +123,21 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
     if (Kind.isMergeableConst16())      return MergeableConst16Section;
   }
   Type *ObjType = GV->getType()->getPointerElementType();
+  auto &DL = GV->getParent()->getDataLayout();
   if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
-      TM.getDataLayout()->getTypeAllocSize(ObjType) < CodeModelLargeSize) {
+      DL.getTypeAllocSize(ObjType) < CodeModelLargeSize) {
     if (Kind.isReadOnly())              return UseCPRel? ReadOnlySection
                                                        : DataRelROSection;
     if (Kind.isBSS() || Kind.isCommon())return BSSSection;
-    if (Kind.isDataRel())               return DataSection;
+    if (Kind.isData())
+      return DataSection;
     if (Kind.isReadOnlyWithRel())       return DataRelROSection;
   } else {
     if (Kind.isReadOnly())              return UseCPRel? ReadOnlySectionLarge
                                                        : DataRelROSectionLarge;
     if (Kind.isBSS() || Kind.isCommon())return BSSSectionLarge;
-    if (Kind.isDataRel())               return DataSectionLarge;
+    if (Kind.isData())
+      return DataSectionLarge;
     if (Kind.isReadOnlyWithRel())       return DataRelROSectionLarge;
   }
 
@@ -142,9 +145,8 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
   report_fatal_error("Target does not support TLS or Common sections");
 }
 
-MCSection *
-XCoreTargetObjectFile::getSectionForConstant(SectionKind Kind,
-                                             const Constant *C) const {
+MCSection *XCoreTargetObjectFile::getSectionForConstant(
+    const DataLayout &DL, SectionKind Kind, const Constant *C) const {
   if (Kind.isMergeableConst4())           return MergeableConst4Section;
   if (Kind.isMergeableConst8())           return MergeableConst8Section;
   if (Kind.isMergeableConst16())          return MergeableConst16Section;
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 2a5ac238a447..6701c661a73e 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -33,7 +33,7 @@ static const unsigned CodeModelLargeSize = 256;
                                       Mangler &Mang,
                                       const TargetMachine &TM) const override;
 
-    MCSection *getSectionForConstant(SectionKind Kind,
+    MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
                                      const Constant *C) const override;
   };
 } // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.h b/lib/Target/XCore/XCoreTargetTransformInfo.h
index e23aef3e3b4a..b2cb889f1fc0 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -37,7 +37,7 @@ class XCoreTTIImpl : public BasicTTIImplBase<XCoreTTIImpl> {
   const XCoreTargetLowering *getTLI() const { return TLI; }
 
 public:
-  explicit XCoreTTIImpl(const XCoreTargetMachine *TM, Function &F)
+  explicit XCoreTTIImpl(const XCoreTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
         TLI(ST->getTargetLowering()) {}
 
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 4762011d63d8..0e05129b5261 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -34,8 +34,11 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
@@ -63,7 +66,8 @@ namespace {
   ///
   struct ArgPromotion : public CallGraphSCCPass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       CallGraphSCCPass::getAnalysisUsage(AU);
     }
 
@@ -81,7 +85,8 @@ namespace {
     bool isDenselyPacked(Type *type, const DataLayout &DL);
     bool canPaddingBeAccessed(Argument *Arg);
     CallGraphNode *PromoteArguments(CallGraphNode *CGN);
-    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal,
+                                 AAResults &AAR) const;
     CallGraphNode *DoPromotion(Function *F,
                               SmallPtrSetImpl<Argument*> &ArgsToPromote,
                               SmallPtrSetImpl<Argument*> &ByValArgsToTransform);
@@ -90,15 +95,15 @@ namespace {
     bool doInitialization(CallGraph &CG) override;
     /// The maximum number of elements to expand, or 0 for unlimited.
     unsigned maxElements;
-    DenseMap<const Function *, DISubprogram *> FunctionDIs;
   };
 }
 
 char ArgPromotion::ID = 0;
 INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
                 "Promote 'by reference' arguments to scalars", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
                 "Promote 'by reference' arguments to scalars", false, false)
 
@@ -217,9 +222,9 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
 
   // First check: see if there are any pointer arguments!  If not, quick exit.
   SmallVector<Argument*, 16> PointerArgs;
-  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
-    if (I->getType()->isPointerTy())
-      PointerArgs.push_back(I);
+  for (Argument &I : F->args())
+    if (I.getType()->isPointerTy())
+      PointerArgs.push_back(&I);
   if (PointerArgs.empty()) return nullptr;
 
   // Second check: make sure that all callers are direct callers.  We can't
@@ -237,6 +242,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
   
   const DataLayout &DL = F->getParent()->getDataLayout();
 
+  // We need to manually construct BasicAA directly in order to disable its use
+  // of other function analyses.
+  BasicAAResult BAR(createLegacyPMBasicAAResult(*this, *F));
+
+  // Construct our own AA results for this function. We do this manually to
+  // work around the limitations of the legacy pass manager.
+  AAResults AAR(createLegacyPMAAResults(*this, *F, BAR));
+
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
   SmallPtrSet<Argument*, 8> ArgsToPromote;
@@ -281,8 +294,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
         
         // If all the elements are single-value types, we can promote it.
         bool AllSimple = true;
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          if (!STy->getElementType(i)->isSingleValueType()) {
+        for (const auto *EltTy : STy->elements()) {
+          if (!EltTy->isSingleValueType()) {
             AllSimple = false;
             break;
           }
@@ -303,8 +316,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
     if (isSelfRecursive) {
       if (StructType *STy = dyn_cast<StructType>(AgTy)) {
         bool RecursiveType = false;
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          if (STy->getElementType(i) == PtrArg->getType()) {
+        for (const auto *EltTy : STy->elements()) {
+          if (EltTy == PtrArg->getType()) {
             RecursiveType = true;
             break;
           }
@@ -315,7 +328,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
     }
     
     // Otherwise, see if we can promote the pointer to its value.
-    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr()))
+    if (isSafeToPromoteArgument(PtrArg, PtrArg->hasByValOrInAllocaAttr(), AAR))
       ArgsToPromote.insert(PtrArg);
   }
 
@@ -416,7 +429,8 @@ static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
 /// elements of the aggregate in order to avoid exploding the number of
 /// arguments passed in.
 bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
-                                           bool isByValOrInAlloca) const {
+                                           bool isByValOrInAlloca,
+                                           AAResults &AAR) const {
   typedef std::set<IndicesVector> GEPIndicesSet;
 
   // Quick exit for unused arguments
@@ -453,12 +467,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
 
   // First, iterate the entry block and mark loads of (geps of) arguments as
   // safe.
-  BasicBlock *EntryBlock = Arg->getParent()->begin();
+  BasicBlock &EntryBlock = Arg->getParent()->front();
   // Declare this here so we can reuse it
   IndicesVector Indices;
-  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
-       I != E; ++I)
-    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+  for (Instruction &I : EntryBlock)
+    if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
       Value *V = LI->getPointerOperand();
       if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
         V = GEP->getPointerOperand();
@@ -501,12 +514,11 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
       if (GEP->use_empty()) {
         // Dead GEP's cause trouble later.  Just remove them if we run into
         // them.
-        getAnalysis<AliasAnalysis>().deleteValue(GEP);
         GEP->eraseFromParent();
         // TODO: This runs the above loop over and over again for dead GEPs
         // Couldn't we just do increment the UI iterator earlier and erase the
         // use?
-        return isSafeToPromoteArgument(Arg, isByValOrInAlloca);
+        return isSafeToPromoteArgument(Arg, isByValOrInAlloca, AAR);
       }
 
       // Ensure that all of the indices are constants.
@@ -563,8 +575,6 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
   // blocks we know to be transparent to the load.
   SmallPtrSet<BasicBlock*, 16> TranspBlocks;
 
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
   for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
     // Check to see if the load is invalidated from the start of the block to
     // the load itself.
@@ -572,8 +582,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
     BasicBlock *BB = Load->getParent();
 
     MemoryLocation Loc = MemoryLocation::get(Load);
-    if (AA.canInstructionRangeModRef(BB->front(), *Load, Loc,
-        AliasAnalysis::Mod))
+    if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, MRI_Mod))
       return false;  // Pointer is invalidated!
 
     // Now check every path from the entry block to the load for transparency.
@@ -581,7 +590,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
     // loading block.
     for (BasicBlock *P : predecessors(BB)) {
       for (BasicBlock *TranspBB : inverse_depth_first_ext(P, TranspBlocks))
-        if (AA.canBasicBlockModify(*TranspBB, Loc))
+        if (AAR.canBasicBlockModify(*TranspBB, Loc))
           return false;
     }
   }
@@ -637,13 +646,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   unsigned ArgIndex = 1;
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
        ++I, ++ArgIndex) {
-    if (ByValArgsToTransform.count(I)) {
+    if (ByValArgsToTransform.count(&*I)) {
       // Simple byval argument? Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
       StructType *STy = cast<StructType>(AgTy);
       Params.insert(Params.end(), STy->element_begin(), STy->element_end());
       ++NumByValArgsPromoted;
-    } else if (!ArgsToPromote.count(I)) {
+    } else if (!ArgsToPromote.count(&*I)) {
       // Unchanged argument
       Params.push_back(I->getType());
       AttributeSet attrs = PAL.getParamAttributes(ArgIndex);
@@ -661,7 +670,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 
       // In this table, we will track which indices are loaded from the argument
       // (where direct loads are tracked as no indices).
-      ScalarizeTable &ArgIndices = ScalarizedElements[I];
+      ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
       for (User *U : I->users()) {
         Instruction *UI = cast<Instruction>(U);
         Type *SrcTy;
@@ -687,7 +696,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         else
           // Take any load, we will use it only to update Alias Analysis
           OrigLoad = cast<LoadInst>(UI->user_back());
-        OriginalLoads[std::make_pair(I, Indices)] = OrigLoad;
+        OriginalLoads[std::make_pair(&*I, Indices)] = OrigLoad;
       }
 
       // Add a parameter to the function for each element passed in.
@@ -722,15 +731,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   NF->copyAttributesFrom(F);
 
   // Patch the pointer to LLVM function in debug info descriptor.
-  auto DI = FunctionDIs.find(F);
-  if (DI != FunctionDIs.end()) {
-    DISubprogram *SP = DI->second;
-    SP->replaceFunction(NF);
-    // Ensure the map is updated so it can be reused on subsequent argument
-    // promotions of the same function.
-    FunctionDIs.erase(DI);
-    FunctionDIs[NF] = SP;
-  }
+  NF->setSubprogram(F->getSubprogram());
+  F->setSubprogram(nullptr);
 
   DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
         << "From: " << *F);
@@ -740,13 +742,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   NF->setAttributes(AttributeSet::get(F->getContext(), AttributesVec));
   AttributesVec.clear();
 
-  F->getParent()->getFunctionList().insert(F, NF);
+  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
   NF->takeName(F);
 
-  // Get the alias analysis information that we need to update to reflect our
-  // changes.
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-
   // Get the callgraph information that we need to update to reflect our
   // changes.
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
@@ -775,7 +773,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     ArgIndex = 1;
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
          I != E; ++I, ++AI, ++ArgIndex)
-      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+      if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
         Args.push_back(*AI);          // Unmodified argument
 
         if (CallPAL.hasAttributes(ArgIndex)) {
@@ -783,7 +781,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
           AttributesVec.
             push_back(AttributeSet::get(F->getContext(), Args.size(), B));
         }
-      } else if (ByValArgsToTransform.count(I)) {
+      } else if (ByValArgsToTransform.count(&*I)) {
         // Emit a GEP and load for each element of the struct.
         Type *AgTy = cast<PointerType>(I->getType())->getElementType();
         StructType *STy = cast<StructType>(AgTy);
@@ -798,14 +796,14 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         }
       } else if (!I->use_empty()) {
         // Non-dead argument: insert GEPs and loads as appropriate.
-        ScalarizeTable &ArgIndices = ScalarizedElements[I];
+        ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
         // Store the Value* version of the indices in here, but declare it now
         // for reuse.
         std::vector<Value*> Ops;
         for (ScalarizeTable::iterator SI = ArgIndices.begin(),
                E = ArgIndices.end(); SI != E; ++SI) {
           Value *V = *AI;
-          LoadInst *OrigLoad = OriginalLoads[std::make_pair(I, SI->second)];
+          LoadInst *OrigLoad = OriginalLoads[std::make_pair(&*I, SI->second)];
           if (!SI->second.empty()) {
             Ops.reserve(SI->second.size());
             Type *ElTy = V->getType();
@@ -873,10 +871,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     Args.clear();
     AttributesVec.clear();
 
-    // Update the alias analysis implementation to know that we are replacing
-    // the old call with a new one.
-    AA.replaceWithNewValue(Call, New);
-
     // Update the callgraph to know that the callsite has been transformed.
     CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
     CalleeNode->replaceCallEdge(CS, CallSite(New), NF_CGN);
@@ -901,20 +895,19 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   //
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
        I2 = NF->arg_begin(); I != E; ++I) {
-    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+    if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {
       // If this is an unmodified argument, move the name and users over to the
       // new version.
-      I->replaceAllUsesWith(I2);
-      I2->takeName(I);
-      AA.replaceWithNewValue(I, I2);
+      I->replaceAllUsesWith(&*I2);
+      I2->takeName(&*I);
       ++I2;
       continue;
     }
 
-    if (ByValArgsToTransform.count(I)) {
+    if (ByValArgsToTransform.count(&*I)) {
       // In the callee, we create an alloca, and store each of the new incoming
       // arguments into the alloca.
-      Instruction *InsertPt = NF->begin()->begin();
+      Instruction *InsertPt = &NF->begin()->front();
 
       // Just add all the struct element types.
       Type *AgTy = cast<PointerType>(I->getType())->getElementType();
@@ -929,13 +922,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
             AgTy, TheAlloca, Idxs, TheAlloca->getName() + "." + Twine(i),
             InsertPt);
         I2->setName(I->getName()+"."+Twine(i));
-        new StoreInst(I2++, Idx, InsertPt);
+        new StoreInst(&*I2++, Idx, InsertPt);
       }
 
       // Anything that used the arg should now use the alloca.
       I->replaceAllUsesWith(TheAlloca);
-      TheAlloca->takeName(I);
-      AA.replaceWithNewValue(I, TheAlloca);
+      TheAlloca->takeName(&*I);
 
       // If the alloca is used in a call, we must clear the tail flag since
       // the callee now uses an alloca from the caller.
@@ -948,23 +940,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
       continue;
     }
 
-    if (I->use_empty()) {
-      AA.deleteValue(I);
+    if (I->use_empty())
       continue;
-    }
 
     // Otherwise, if we promoted this argument, then all users are load
     // instructions (or GEPs with only load users), and all loads should be
     // using the new argument that we added.
-    ScalarizeTable &ArgIndices = ScalarizedElements[I];
+    ScalarizeTable &ArgIndices = ScalarizedElements[&*I];
 
     while (!I->use_empty()) {
       if (LoadInst *LI = dyn_cast<LoadInst>(I->user_back())) {
         assert(ArgIndices.begin()->second.empty() &&
                "Load element should sort to front!");
         I2->setName(I->getName()+".val");
-        LI->replaceAllUsesWith(I2);
-        AA.replaceWithNewValue(LI, I2);
+        LI->replaceAllUsesWith(&*I2);
         LI->eraseFromParent();
         DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
               << "' in function '" << F->getName() << "'\n");
@@ -1000,11 +989,9 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
         // the argument specified by ArgNo.
         while (!GEP->use_empty()) {
           LoadInst *L = cast<LoadInst>(GEP->user_back());
-          L->replaceAllUsesWith(TheArg);
-          AA.replaceWithNewValue(L, TheArg);
+          L->replaceAllUsesWith(&*TheArg);
           L->eraseFromParent();
         }
-        AA.deleteValue(GEP);
         GEP->eraseFromParent();
       }
     }
@@ -1013,10 +1000,6 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     std::advance(I2, ArgIndices.size());
   }
 
-  // Tell the alias analysis that the old function is about to disappear.
-  AA.replaceWithNewValue(F, NF);
-
-  
   NF_CGN->stealCalledFunctionsFrom(CG[F]);
   
   // Now that the old function is dead, delete it.  If there is a dangling
@@ -1032,6 +1015,5 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
 }
 
 bool ArgPromotion::doInitialization(CallGraph &CG) {
-  FunctionDIs = makeSubprogramMap(CG.getModule());
   return CallGraphSCCPass::doInitialization(CG);
 }
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 336dac45e13a..351b88fe2aa0 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -2,14 +2,18 @@ add_llvm_library(LLVMipo
   ArgumentPromotion.cpp
   BarrierNoopPass.cpp
   ConstantMerge.cpp
+  CrossDSOCFI.cpp
   DeadArgumentElimination.cpp
   ElimAvailExtern.cpp
   ExtractGV.cpp
+  ForceFunctionAttrs.cpp
   FunctionAttrs.cpp
+  FunctionImport.cpp
   GlobalDCE.cpp
   GlobalOpt.cpp
   IPConstantPropagation.cpp
   IPO.cpp
+  InferFunctionAttrs.cpp
   InlineAlways.cpp
   InlineSimple.cpp
   Inliner.cpp
@@ -20,6 +24,7 @@ add_llvm_library(LLVMipo
   PartialInlining.cpp
   PassManagerBuilder.cpp
   PruneEH.cpp
+  SampleProfile.cpp
   StripDeadPrototypes.cpp
   StripSymbols.cpp
 
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 8ce7646621ff..0aa49d6fde01 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -119,7 +119,7 @@ bool ConstantMerge::runOnModule(Module &M) {
     // First: Find the canonical constants others will be merged with.
     for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
          GVI != E; ) {
-      GlobalVariable *GV = GVI++;
+      GlobalVariable *GV = &*GVI++;
 
       // If this GV is dead, remove it.
       GV->removeDeadConstantUsers();
@@ -160,7 +160,7 @@ bool ConstantMerge::runOnModule(Module &M) {
     // invalidating the Constant* pointers in CMap.
     for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
          GVI != E; ) {
-      GlobalVariable *GV = GVI++;
+      GlobalVariable *GV = &*GVI++;
 
       // Only process constants with initializers in the default address space.
       if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
diff --git a/lib/Transforms/IPO/CrossDSOCFI.cpp b/lib/Transforms/IPO/CrossDSOCFI.cpp
new file mode 100644
index 000000000000..5bbb7513005c
--- /dev/null
+++ b/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -0,0 +1,166 @@
+//===-- CrossDSOCFI.cpp - Externalize this module's CFI checks ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass exports all llvm.bitset's found in the module in the form of a
+// __cfi_check function, which can be used to verify cross-DSO call targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "cross-dso-cfi"
+
+STATISTIC(TypeIds, "Number of unique type identifiers");
+
+namespace {
+
+struct CrossDSOCFI : public ModulePass {
+  static char ID;
+  CrossDSOCFI() : ModulePass(ID) {
+    initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
+  }
+
+  Module *M;
+  MDNode *VeryLikelyWeights;
+
+  ConstantInt *extractBitSetTypeId(MDNode *MD);
+  void buildCFICheck();
+
+  bool doInitialization(Module &M) override;
+  bool runOnModule(Module &M) override;
+};
+
+} // anonymous namespace
+
+INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
+                      false)
+INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
+char CrossDSOCFI::ID = 0;
+
+ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
+
+bool CrossDSOCFI::doInitialization(Module &Mod) {
+  M = &Mod;
+  VeryLikelyWeights =
+      MDBuilder(M->getContext()).createBranchWeights((1U << 20) - 1, 1);
+
+  return false;
+}
+
+/// extractBitSetTypeId - Extracts TypeId from a hash-based bitset MDNode.
+ConstantInt *CrossDSOCFI::extractBitSetTypeId(MDNode *MD) {
+  // This check excludes vtables for classes inside anonymous namespaces.
+  auto TM = dyn_cast<ValueAsMetadata>(MD->getOperand(0));
+  if (!TM)
+    return nullptr;
+  auto C = dyn_cast_or_null<ConstantInt>(TM->getValue());
+  if (!C) return nullptr;
+  // We are looking for i64 constants.
+  if (C->getBitWidth() != 64) return nullptr;
+
+  // Sanity check.
+  auto FM = dyn_cast_or_null<ValueAsMetadata>(MD->getOperand(1));
+  // Can be null if a function was removed by an optimization.
+  if (FM) {
+    auto F = dyn_cast<Function>(FM->getValue());
+    // But can never be a function declaration.
+    assert(!F || !F->isDeclaration());
+    (void)F; // Suppress unused variable warning in the no-asserts build.
+  }
+  return C;
+}
+
+/// buildCFICheck - emits __cfi_check for the current module.
+void CrossDSOCFI::buildCFICheck() {
+  // FIXME: verify that __cfi_check ends up near the end of the code section,
+  // but before the jump slots created in LowerBitSets.
+  llvm::DenseSet<uint64_t> BitSetIds;
+  NamedMDNode *BitSetNM = M->getNamedMetadata("llvm.bitsets");
+
+  if (BitSetNM)
+    for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I)
+      if (ConstantInt *TypeId = extractBitSetTypeId(BitSetNM->getOperand(I)))
+        BitSetIds.insert(TypeId->getZExtValue());
+
+  LLVMContext &Ctx = M->getContext();
+  Constant *C = M->getOrInsertFunction(
+      "__cfi_check",
+      FunctionType::get(
+          Type::getVoidTy(Ctx),
+          {Type::getInt64Ty(Ctx), PointerType::getUnqual(Type::getInt8Ty(Ctx))},
+          false));
+  Function *F = dyn_cast<Function>(C);
+  F->setAlignment(4096);
+  auto args = F->arg_begin();
+  Argument &CallSiteTypeId = *(args++);
+  CallSiteTypeId.setName("CallSiteTypeId");
+  Argument &Addr = *(args++);
+  Addr.setName("Addr");
+  assert(args == F->arg_end());
+
+  BasicBlock *BB = BasicBlock::Create(Ctx, "entry", F);
+
+  BasicBlock *TrapBB = BasicBlock::Create(Ctx, "trap", F);
+  IRBuilder<> IRBTrap(TrapBB);
+  Function *TrapFn = Intrinsic::getDeclaration(M, Intrinsic::trap);
+  llvm::CallInst *TrapCall = IRBTrap.CreateCall(TrapFn);
+  TrapCall->setDoesNotReturn();
+  TrapCall->setDoesNotThrow();
+  IRBTrap.CreateUnreachable();
+
+  BasicBlock *ExitBB = BasicBlock::Create(Ctx, "exit", F);
+  IRBuilder<> IRBExit(ExitBB);
+  IRBExit.CreateRetVoid();
+
+  IRBuilder<> IRB(BB);
+  SwitchInst *SI = IRB.CreateSwitch(&CallSiteTypeId, TrapBB, BitSetIds.size());
+  for (uint64_t TypeId : BitSetIds) {
+    ConstantInt *CaseTypeId = ConstantInt::get(Type::getInt64Ty(Ctx), TypeId);
+    BasicBlock *TestBB = BasicBlock::Create(Ctx, "test", F);
+    IRBuilder<> IRBTest(TestBB);
+    Function *BitsetTestFn =
+        Intrinsic::getDeclaration(M, Intrinsic::bitset_test);
+
+    Value *Test = IRBTest.CreateCall(
+        BitsetTestFn, {&Addr, MetadataAsValue::get(
+                                  Ctx, ConstantAsMetadata::get(CaseTypeId))});
+    BranchInst *BI = IRBTest.CreateCondBr(Test, ExitBB, TrapBB);
+    BI->setMetadata(LLVMContext::MD_prof, VeryLikelyWeights);
+
+    SI->addCase(CaseTypeId, TestBB);
+    ++TypeIds;
+  }
+}
+
+bool CrossDSOCFI::runOnModule(Module &M) {
+  if (M.getModuleFlag("Cross-DSO CFI") == nullptr)
+    return false;
+  buildCFICheck();
+  return true;
+}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d0447640259e..4de3d95ab11d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <map>
 #include <set>
 #include <tuple>
@@ -121,14 +122,6 @@ namespace {
 
     typedef SmallVector<RetOrArg, 5> UseVector;
 
-    // Map each LLVM function to corresponding metadata with debug info. If
-    // the function is replaced with another one, we should patch the pointer
-    // to LLVM function in metadata.
-    // As the code generation for module is finished (and DIBuilder is
-    // finalized) we assume that subprogram descriptors won't be changed, and
-    // they are stored in map for short duration anyway.
-    DenseMap<const Function *, DISubprogram *> FunctionDIs;
-
   protected:
     // DAH uses this to specify a different ID.
     explicit DAE(char &ID) : ModulePass(ID) {}
@@ -198,6 +191,13 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   if (Fn.hasAddressTaken())
     return false;
 
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (Fn.hasFnAttribute(Attribute::Naked)) {
+    return false;
+  }
+
   // Okay, we know we can transform this function if safe.  Scan its body
   // looking for calls marked musttail or calls to llvm.vastart.
   for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
@@ -229,7 +229,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // Create the new function body and insert it into the module...
   Function *NF = Function::Create(NFTy, Fn.getLinkage());
   NF->copyAttributesFrom(&Fn);
-  Fn.getParent()->getFunctionList().insert(&Fn, NF);
+  Fn.getParent()->getFunctionList().insert(Fn.getIterator(), NF);
   NF->takeName(&Fn);
 
   // Loop over all of the callers of the function, transforming the call sites
@@ -296,20 +296,12 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
        I2 = NF->arg_begin(); I != E; ++I, ++I2) {
     // Move the name and users over to the new version.
-    I->replaceAllUsesWith(I2);
-    I2->takeName(I);
+    I->replaceAllUsesWith(&*I2);
+    I2->takeName(&*I);
   }
 
   // Patch the pointer to LLVM function in debug info descriptor.
-  auto DI = FunctionDIs.find(&Fn);
-  if (DI != FunctionDIs.end()) {
-    DISubprogram *SP = DI->second;
-    SP->replaceFunction(NF);
-    // Ensure the map is updated so it can be reused on non-varargs argument
-    // eliminations of the same function.
-    FunctionDIs.erase(DI);
-    FunctionDIs[NF] = SP;
-  }
+  NF->setSubprogram(Fn.getSubprogram());
 
   // Fix up any BlockAddresses that refer to the function.
   Fn.replaceAllUsesWith(ConstantExpr::getBitCast(NF, Fn.getType()));
@@ -345,16 +337,19 @@ bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
   if (Fn.hasLocalLinkage() && !Fn.getFunctionType()->isVarArg())
     return false;
 
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (Fn.hasFnAttribute(Attribute::Naked))
+    return false;
+
   if (Fn.use_empty())
     return false;
 
   SmallVector<unsigned, 8> UnusedArgs;
-  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); 
-       I != E; ++I) {
-    Argument *Arg = I;
-
-    if (Arg->use_empty() && !Arg->hasByValOrInAllocaAttr())
-      UnusedArgs.push_back(Arg->getArgNo());
+  for (Argument &Arg : Fn.args()) {
+    if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+      UnusedArgs.push_back(Arg.getArgNo());
   }
 
   if (UnusedArgs.empty())
@@ -485,6 +480,10 @@ DAE::Liveness DAE::SurveyUse(const Use *U,
       if (F) {
         // Used in a direct call.
 
+        // The function argument is live if it is used as a bundle operand.
+        if (CS.isBundleOperand(U))
+          return Live;
+
         // Find the argument number. We know for sure that this use is an
         // argument, since if it was the function argument this would be an
         // indirect call and the we know can't be looking at a value of the
@@ -543,6 +542,14 @@ void DAE::SurveyFunction(const Function &F) {
     return;
   }
 
+  // Don't touch naked functions. The assembly might be using an argument, or
+  // otherwise rely on the frame layout in a way that this analysis will not
+  // see.
+  if (F.hasFnAttribute(Attribute::Naked)) {
+    MarkLive(F);
+    return;
+  }
+
   unsigned RetCount = NumRetVals(&F);
   // Assume all return values are dead
   typedef SmallVector<Liveness, 5> RetVals;
@@ -648,7 +655,7 @@ void DAE::SurveyFunction(const Function &F) {
     } else {
       // See what the effect of this use is (recording any uses that cause
       // MaybeLive in MaybeLiveArgUses). 
-      Result = SurveyUses(AI, MaybeLiveArgUses);
+      Result = SurveyUses(&*AI, MaybeLiveArgUses);
     }
 
     // Mark the result.
@@ -878,7 +885,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   NF->setAttributes(NewPAL);
   // Insert the new function before the old function, so we won't be processing
   // it again.
-  F->getParent()->getFunctionList().insert(F, NF);
+  F->getParent()->getFunctionList().insert(F->getIterator(), NF);
   NF->takeName(F);
 
   // Loop over all of the callers of the function, transforming the call sites
@@ -946,7 +953,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args, "", Call);
+                               Args, "", Call->getParent());
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(NewCallPAL);
     } else {
@@ -976,9 +983,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
                " must have been a struct or an array!");
         Instruction *InsertPt = Call;
         if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
-          BasicBlock::iterator IP = II->getNormalDest()->begin();
-          while (isa<PHINode>(IP)) ++IP;
-          InsertPt = IP;
+          BasicBlock *NewEdge = SplitEdge(New->getParent(), II->getNormalDest());
+          InsertPt = &*NewEdge->getFirstInsertionPt();
         }
 
         // We used to return a struct or array. Instead of doing smart stuff
@@ -1026,8 +1032,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     if (ArgAlive[i]) {
       // If this is a live argument, move the name and users over to the new
       // version.
-      I->replaceAllUsesWith(I2);
-      I2->takeName(I);
+      I->replaceAllUsesWith(&*I2);
+      I2->takeName(&*I);
       ++I2;
     } else {
       // If this argument is dead, replace any uses of it with null constants
@@ -1079,9 +1085,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
       }
 
   // Patch the pointer to LLVM function in debug info descriptor.
-  auto DI = FunctionDIs.find(F);
-  if (DI != FunctionDIs.end())
-    DI->second->replaceFunction(NF);
+  NF->setSubprogram(F->getSubprogram());
 
   // Now that the old function is dead, delete it.
   F->eraseFromParent();
@@ -1092,9 +1096,6 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 bool DAE::runOnModule(Module &M) {
   bool Changed = false;
 
-  // Collect debug info descriptors for functions.
-  FunctionDIs = makeSubprogramMap(M);
-
   // First pass: Do a simple check to see if any functions can have their "..."
   // removed.  We can do this if they never call va_start.  This loop cannot be
   // fused with the next loop, because deleting a function invalidates
@@ -1119,7 +1120,7 @@ bool DAE::runOnModule(Module &M) {
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
     // Increment now, because the function will probably get removed (ie.
     // replaced by a new one).
-    Function *F = I++;
+    Function *F = &*I++;
     Changed |= RemoveDeadStuffFromFunction(F);
   }
 
diff --git a/lib/Transforms/IPO/ElimAvailExtern.cpp b/lib/Transforms/IPO/ElimAvailExtern.cpp
index 67ba72d6a360..af313a6b001d 100644
--- a/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -1,4 +1,5 @@
-//===-- ElimAvailExtern.cpp - DCE unreachable internal functions ----------------===//
+//===-- ElimAvailExtern.cpp - DCE unreachable internal functions
+//----------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,9 +16,7 @@
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Transforms/Utils/CtorUtils.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
 #include "llvm/Pass.h"
 using namespace llvm;
@@ -28,18 +27,18 @@ STATISTIC(NumFunctions, "Number of functions removed");
 STATISTIC(NumVariables, "Number of global variables removed");
 
 namespace {
-  struct EliminateAvailableExternally : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    EliminateAvailableExternally() : ModulePass(ID) {
-      initializeEliminateAvailableExternallyPass(
-          *PassRegistry::getPassRegistry());
-    }
+struct EliminateAvailableExternally : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  EliminateAvailableExternally() : ModulePass(ID) {
+    initializeEliminateAvailableExternallyPass(
+        *PassRegistry::getPassRegistry());
+  }
 
-    // run - Do the EliminateAvailableExternally pass on the specified module,
-    // optionally updating the specified callgraph to reflect the changes.
-    //
-    bool runOnModule(Module &M) override;
-  };
+  // run - Do the EliminateAvailableExternally pass on the specified module,
+  // optionally updating the specified callgraph to reflect the changes.
+  //
+  bool runOnModule(Module &M) override;
+};
 }
 
 char EliminateAvailableExternally::ID = 0;
@@ -54,30 +53,31 @@ bool EliminateAvailableExternally::runOnModule(Module &M) {
   bool Changed = false;
 
   // Drop initializers of available externally global variables.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    if (!I->hasAvailableExternallyLinkage())
+  for (GlobalVariable &GV : M.globals()) {
+    if (!GV.hasAvailableExternallyLinkage())
       continue;
-    if (I->hasInitializer()) {
-      Constant *Init = I->getInitializer();
-      I->setInitializer(nullptr);
+    if (GV.hasInitializer()) {
+      Constant *Init = GV.getInitializer();
+      GV.setInitializer(nullptr);
       if (isSafeToDestroyConstant(Init))
         Init->destroyConstant();
     }
-    I->removeDeadConstantUsers();
-    I->setLinkage(GlobalValue::ExternalLinkage);
+    GV.removeDeadConstantUsers();
+    GV.setLinkage(GlobalValue::ExternalLinkage);
     NumVariables++;
+    Changed = true;
   }
 
   // Drop the bodies of available externally functions.
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (!I->hasAvailableExternallyLinkage())
+  for (Function &F : M) {
+    if (!F.hasAvailableExternallyLinkage())
       continue;
-    if (!I->isDeclaration())
+    if (!F.isDeclaration())
       // This will set the linkage to external
-      I->deleteBody();
-    I->removeDeadConstantUsers();
+      F.deleteBody();
+    F.removeDeadConstantUsers();
     NumFunctions++;
+    Changed = true;
   }
 
   return Changed;
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index b9462f2ffc72..1a3b9253d72f 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -83,7 +83,7 @@ namespace {
       for (Module::global_iterator I = M.global_begin(), E = M.global_end();
            I != E; ++I) {
         bool Delete =
-          deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+            deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
         if (!Delete) {
           if (I->hasAvailableExternallyLinkage())
             continue;
@@ -103,7 +103,7 @@ namespace {
       // Visit the Functions.
       for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
         bool Delete =
-          deleteStuff == (bool)Named.count(I) && !I->isDeclaration();
+            deleteStuff == (bool)Named.count(&*I) && !I->isDeclaration();
         if (!Delete) {
           if (I->hasAvailableExternallyLinkage())
             continue;
@@ -124,7 +124,7 @@ namespace {
         Module::alias_iterator CurI = I;
         ++I;
 
-        bool Delete = deleteStuff == (bool)Named.count(CurI);
+        bool Delete = deleteStuff == (bool)Named.count(&*CurI);
         makeVisible(*CurI, Delete);
 
         if (Delete) {
@@ -143,7 +143,7 @@ namespace {
 
           }
           CurI->replaceAllUsesWith(Declaration);
-          delete CurI;
+          delete &*CurI;
         }
       }
 
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
new file mode 100644
index 000000000000..816291dac9e8
--- /dev/null
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -0,0 +1,121 @@
+//===- ForceFunctionAttrs.cpp - Force function attrs for debugging --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "forceattrs"
+
+static cl::list<std::string>
+    ForceAttributes("force-attribute", cl::Hidden,
+                    cl::desc("Add an attribute to a function. This should be a "
+                             "pair of 'function-name:attribute-name', for "
+                             "example -force-add-attribute=foo:noinline. This "
+                             "option can be specified multiple times."));
+
+static Attribute::AttrKind parseAttrKind(StringRef Kind) {
+  return StringSwitch<Attribute::AttrKind>(Kind)
+      .Case("alwaysinline", Attribute::AlwaysInline)
+      .Case("builtin", Attribute::Builtin)
+      .Case("cold", Attribute::Cold)
+      .Case("convergent", Attribute::Convergent)
+      .Case("inlinehint", Attribute::InlineHint)
+      .Case("jumptable", Attribute::JumpTable)
+      .Case("minsize", Attribute::MinSize)
+      .Case("naked", Attribute::Naked)
+      .Case("nobuiltin", Attribute::NoBuiltin)
+      .Case("noduplicate", Attribute::NoDuplicate)
+      .Case("noimplicitfloat", Attribute::NoImplicitFloat)
+      .Case("noinline", Attribute::NoInline)
+      .Case("nonlazybind", Attribute::NonLazyBind)
+      .Case("noredzone", Attribute::NoRedZone)
+      .Case("noreturn", Attribute::NoReturn)
+      .Case("norecurse", Attribute::NoRecurse)
+      .Case("nounwind", Attribute::NoUnwind)
+      .Case("optnone", Attribute::OptimizeNone)
+      .Case("optsize", Attribute::OptimizeForSize)
+      .Case("readnone", Attribute::ReadNone)
+      .Case("readonly", Attribute::ReadOnly)
+      .Case("argmemonly", Attribute::ArgMemOnly)
+      .Case("returns_twice", Attribute::ReturnsTwice)
+      .Case("safestack", Attribute::SafeStack)
+      .Case("sanitize_address", Attribute::SanitizeAddress)
+      .Case("sanitize_memory", Attribute::SanitizeMemory)
+      .Case("sanitize_thread", Attribute::SanitizeThread)
+      .Case("ssp", Attribute::StackProtect)
+      .Case("sspreq", Attribute::StackProtectReq)
+      .Case("sspstrong", Attribute::StackProtectStrong)
+      .Case("uwtable", Attribute::UWTable)
+      .Default(Attribute::None);
+}
+
+/// If F has any forced attributes given on the command line, add them.
+static void addForcedAttributes(Function &F) {
+  for (auto &S : ForceAttributes) {
+    auto KV = StringRef(S).split(':');
+    if (KV.first != F.getName())
+      continue;
+
+    auto Kind = parseAttrKind(KV.second);
+    if (Kind == Attribute::None) {
+      DEBUG(dbgs() << "ForcedAttribute: " << KV.second
+                   << " unknown or not handled!\n");
+      continue;
+    }
+    if (F.hasFnAttribute(Kind))
+      continue;
+    F.addFnAttr(Kind);
+  }
+}
+
+PreservedAnalyses ForceFunctionAttrsPass::run(Module &M) {
+  if (ForceAttributes.empty())
+    return PreservedAnalyses::all();
+
+  for (Function &F : M.functions())
+    addForcedAttributes(F);
+
+  // Just conservatively invalidate analyses, this isn't likely to be important.
+  return PreservedAnalyses::none();
+}
+
+namespace {
+struct ForceFunctionAttrsLegacyPass : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
+    initializeForceFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    if (ForceAttributes.empty())
+      return false;
+
+    for (Function &F : M.functions())
+      addForcedAttributes(F);
+
+    // Conservatively assume we changed something.
+    return true;
+  }
+};
+}
+
+char ForceFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
+                "Force set function attributes", false, false)
+
+Pass *llvm::createForceFunctionAttrsLegacyPass() {
+  return new ForceFunctionAttrsLegacyPass();
+}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index bb5e64aef338..6dcfb3f83004 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -23,14 +23,21 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 using namespace llvm;
 
@@ -42,230 +49,191 @@ STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
-STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+STATISTIC(NumNonNullReturn, "Number of function returns marked nonnull");
+STATISTIC(NumNoRecurse, "Number of functions marked as norecurse");
 
 namespace {
-  struct FunctionAttrs : public CallGraphSCCPass {
-    static char ID; // Pass identification, replacement for typeid
-    FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
-      initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
-    }
+typedef SmallSetVector<Function *, 8> SCCNodeSet;
+}
 
-    // runOnSCC - Analyze the SCC, performing the transformation if possible.
-    bool runOnSCC(CallGraphSCC &SCC) override;
+namespace {
+struct FunctionAttrs : public CallGraphSCCPass {
+  static char ID; // Pass identification, replacement for typeid
+  FunctionAttrs() : CallGraphSCCPass(ID) {
+    initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+  }
 
-    // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-    bool AddReadAttrs(const CallGraphSCC &SCC);
+  bool runOnSCC(CallGraphSCC &SCC) override;
+  bool doInitialization(CallGraph &CG) override {
+    Revisit.clear();
+    return false;
+  }
+  bool doFinalization(CallGraph &CG) override;
+  
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
 
-    // AddArgumentAttrs - Deduce nocapture attributes for the SCC.
-    bool AddArgumentAttrs(const CallGraphSCC &SCC);
-
-    // IsFunctionMallocLike - Does this function allocate new memory?
-    bool IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<Function*, 8> &) const;
-
-    // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-    bool AddNoAliasAttrs(const CallGraphSCC &SCC);
-
-    // Utility methods used by inferPrototypeAttributes to add attributes
-    // and maintain annotation statistics.
-
-    void setDoesNotAccessMemory(Function &F) {
-      if (!F.doesNotAccessMemory()) {
-        F.setDoesNotAccessMemory();
-        ++NumAnnotated;
-      }
-    }
-
-    void setOnlyReadsMemory(Function &F) {
-      if (!F.onlyReadsMemory()) {
-        F.setOnlyReadsMemory();
-        ++NumAnnotated;
-      }
-    }
-
-    void setDoesNotThrow(Function &F) {
-      if (!F.doesNotThrow()) {
-        F.setDoesNotThrow();
-        ++NumAnnotated;
-      }
-    }
-
-    void setDoesNotCapture(Function &F, unsigned n) {
-      if (!F.doesNotCapture(n)) {
-        F.setDoesNotCapture(n);
-        ++NumAnnotated;
-      }
-    }
-
-    void setOnlyReadsMemory(Function &F, unsigned n) {
-      if (!F.onlyReadsMemory(n)) {
-        F.setOnlyReadsMemory(n);
-        ++NumAnnotated;
-      }
-    }
-
-    void setDoesNotAlias(Function &F, unsigned n) {
-      if (!F.doesNotAlias(n)) {
-        F.setDoesNotAlias(n);
-        ++NumAnnotated;
-      }
-    }
-
-    // inferPrototypeAttributes - Analyze the name and prototype of the
-    // given function and set any applicable attributes.  Returns true
-    // if any attributes were set and false otherwise.
-    bool inferPrototypeAttributes(Function &F);
-
-    // annotateLibraryCalls - Adds attributes to well-known standard library
-    // call declarations.
-    bool annotateLibraryCalls(const CallGraphSCC &SCC);
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.setPreservesCFG();
-      AU.addRequired<AliasAnalysis>();
-      AU.addRequired<TargetLibraryInfoWrapperPass>();
-      CallGraphSCCPass::getAnalysisUsage(AU);
-    }
-
-  private:
-    AliasAnalysis *AA;
-    TargetLibraryInfo *TLI;
-  };
+private:
+  TargetLibraryInfo *TLI;
+  SmallVector<WeakVH,16> Revisit;
+};
 }
 
 char FunctionAttrs::ID = 0;
 INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
-                "Deduce function attributes", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+                      "Deduce function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
-                "Deduce function attributes", false, false)
+                    "Deduce function attributes", false, false)
 
 Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
 
+namespace {
+/// The three kinds of memory access relevant to 'readonly' and
+/// 'readnone' attributes.
+enum MemoryAccessKind {
+  MAK_ReadNone = 0,
+  MAK_ReadOnly = 1,
+  MAK_MayWrite = 2
+};
+}
 
-/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
-bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
-  SmallPtrSet<Function*, 8> SCCNodes;
+static MemoryAccessKind checkFunctionMemoryAccess(Function &F, AAResults &AAR,
+                                                  const SCCNodeSet &SCCNodes) {
+  FunctionModRefBehavior MRB = AAR.getModRefBehavior(&F);
+  if (MRB == FMRB_DoesNotAccessMemory)
+    // Already perfect!
+    return MAK_ReadNone;
 
-  // Fill SCCNodes with the elements of the SCC.  Used for quickly
-  // looking up whether a given CallGraphNode is in this SCC.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
-    SCCNodes.insert((*I)->getFunction());
+  // Definitions with weak linkage may be overridden at linktime with
+  // something that writes memory, so treat them like declarations.
+  if (F.isDeclaration() || F.mayBeOverridden()) {
+    if (AliasAnalysis::onlyReadsMemory(MRB))
+      return MAK_ReadOnly;
 
+    // Conservatively assume it writes to memory.
+    return MAK_MayWrite;
+  }
+
+  // Scan the function body for instructions that may read or write memory.
+  bool ReadsMemory = false;
+  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+    Instruction *I = &*II;
+
+    // Some instructions can be ignored even if they read or write memory.
+    // Detect these now, skipping to the next instruction if one is found.
+    CallSite CS(cast<Value>(I));
+    if (CS) {
+      // Ignore calls to functions in the same SCC.
+      if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+        continue;
+      FunctionModRefBehavior MRB = AAR.getModRefBehavior(CS);
+
+      // If the call doesn't access memory, we're done.
+      if (!(MRB & MRI_ModRef))
+        continue;
+
+      if (!AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+        // The call could access any memory. If that includes writes, give up.
+        if (MRB & MRI_Mod)
+          return MAK_MayWrite;
+        // If it reads, note it.
+        if (MRB & MRI_Ref)
+          ReadsMemory = true;
+        continue;
+      }
+
+      // Check whether all pointer arguments point to local memory, and
+      // ignore calls that only access local memory.
+      for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+           CI != CE; ++CI) {
+        Value *Arg = *CI;
+        if (!Arg->getType()->isPtrOrPtrVectorTy())
+          continue;
+
+        AAMDNodes AAInfo;
+        I->getAAMetadata(AAInfo);
+        MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
+
+        // Skip accesses to local or constant memory as they don't impact the
+        // externally visible mod/ref behavior.
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+
+        if (MRB & MRI_Mod)
+          // Writes non-local memory.  Give up.
+          return MAK_MayWrite;
+        if (MRB & MRI_Ref)
+          // Ok, it reads non-local memory.
+          ReadsMemory = true;
+      }
+      continue;
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      // Ignore non-volatile loads from local memory. (Atomic is okay here.)
+      if (!LI->isVolatile()) {
+        MemoryLocation Loc = MemoryLocation::get(LI);
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Ignore non-volatile stores to local memory. (Atomic is okay here.)
+      if (!SI->isVolatile()) {
+        MemoryLocation Loc = MemoryLocation::get(SI);
+        if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+    } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+      // Ignore vaargs on local memory.
+      MemoryLocation Loc = MemoryLocation::get(VI);
+      if (AAR.pointsToConstantMemory(Loc, /*OrLocal=*/true))
+        continue;
+    }
+
+    // Any remaining instructions need to be taken seriously!  Check if they
+    // read or write memory.
+    if (I->mayWriteToMemory())
+      // Writes memory.  Just give up.
+      return MAK_MayWrite;
+
+    // If this instruction may read memory, remember that.
+    ReadsMemory |= I->mayReadFromMemory();
+  }
+
+  return ReadsMemory ? MAK_ReadOnly : MAK_ReadNone;
+}
+
+/// Deduce readonly/readnone attributes for the SCC.
+template <typename AARGetterT>
+static bool addReadAttrs(const SCCNodeSet &SCCNodes, AARGetterT AARGetter) {
   // Check if any of the functions in the SCC read or write memory.  If they
   // write memory then they can't be marked readnone or readonly.
   bool ReadsMemory = false;
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
+  for (Function *F : SCCNodes) {
+    // Call the callable parameter to look up AA results for this function.
+    AAResults &AAR = AARGetter(*F);
 
-    if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
-      // External node or node we don't want to optimize - assume it may write
-      // memory and give up.
+    switch (checkFunctionMemoryAccess(*F, AAR, SCCNodes)) {
+    case MAK_MayWrite:
       return false;
-
-    AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
-    if (MRB == AliasAnalysis::DoesNotAccessMemory)
-      // Already perfect!
-      continue;
-
-    // Definitions with weak linkage may be overridden at linktime with
-    // something that writes memory, so treat them like declarations.
-    if (F->isDeclaration() || F->mayBeOverridden()) {
-      if (!AliasAnalysis::onlyReadsMemory(MRB))
-        // May write memory.  Just give up.
-        return false;
-
+    case MAK_ReadOnly:
       ReadsMemory = true;
-      continue;
-    }
-
-    // Scan the function body for instructions that may read or write memory.
-    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
-      Instruction *I = &*II;
-
-      // Some instructions can be ignored even if they read or write memory.
-      // Detect these now, skipping to the next instruction if one is found.
-      CallSite CS(cast<Value>(I));
-      if (CS) {
-        // Ignore calls to functions in the same SCC.
-        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
-          continue;
-        AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
-        // If the call doesn't access arbitrary memory, we may be able to
-        // figure out something.
-        if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
-          // If the call does access argument pointees, check each argument.
-          if (AliasAnalysis::doesAccessArgPointees(MRB))
-            // Check whether all pointer arguments point to local memory, and
-            // ignore calls that only access local memory.
-            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
-                 CI != CE; ++CI) {
-              Value *Arg = *CI;
-              if (Arg->getType()->isPointerTy()) {
-                AAMDNodes AAInfo;
-                I->getAAMetadata(AAInfo);
-
-                MemoryLocation Loc(Arg, MemoryLocation::UnknownSize, AAInfo);
-                if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
-                  if (MRB & AliasAnalysis::Mod)
-                    // Writes non-local memory.  Give up.
-                    return false;
-                  if (MRB & AliasAnalysis::Ref)
-                    // Ok, it reads non-local memory.
-                    ReadsMemory = true;
-                }
-              }
-            }
-          continue;
-        }
-        // The call could access any memory. If that includes writes, give up.
-        if (MRB & AliasAnalysis::Mod)
-          return false;
-        // If it reads, note it.
-        if (MRB & AliasAnalysis::Ref)
-          ReadsMemory = true;
-        continue;
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        // Ignore non-volatile loads from local memory. (Atomic is okay here.)
-        if (!LI->isVolatile()) {
-          MemoryLocation Loc = MemoryLocation::get(LI);
-          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
-            continue;
-        }
-      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        // Ignore non-volatile stores to local memory. (Atomic is okay here.)
-        if (!SI->isVolatile()) {
-          MemoryLocation Loc = MemoryLocation::get(SI);
-          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
-            continue;
-        }
-      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
-        // Ignore vaargs on local memory.
-        MemoryLocation Loc = MemoryLocation::get(VI);
-        if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
-          continue;
-      }
-
-      // Any remaining instructions need to be taken seriously!  Check if they
-      // read or write memory.
-      if (I->mayWriteToMemory())
-        // Writes memory.  Just give up.
-        return false;
-
-      // If this instruction may read memory, remember that.
-      ReadsMemory |= I->mayReadFromMemory();
+      break;
+    case MAK_ReadNone:
+      // Nothing to do!
+      break;
     }
   }
 
   // Success!  Functions in this SCC do not access memory, or only read memory.
   // Give them the appropriate attribute.
   bool MadeChange = false;
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
-
+  for (Function *F : SCCNodes) {
     if (F->doesNotAccessMemory())
       // Already perfect!
       continue;
@@ -278,11 +246,10 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
 
     // Clear out any existing attributes.
     AttrBuilder B;
-    B.addAttribute(Attribute::ReadOnly)
-      .addAttribute(Attribute::ReadNone);
-    F->removeAttributes(AttributeSet::FunctionIndex,
-                        AttributeSet::get(F->getContext(),
-                                          AttributeSet::FunctionIndex, B));
+    B.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
+    F->removeAttributes(
+        AttributeSet::FunctionIndex,
+        AttributeSet::get(F->getContext(), AttributeSet::FunctionIndex, B));
 
     // Add in the new attribute.
     F->addAttribute(AttributeSet::FunctionIndex,
@@ -298,124 +265,140 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
 }
 
 namespace {
-  // For a given pointer Argument, this retains a list of Arguments of functions
-  // in the same SCC that the pointer data flows into. We use this to build an
-  // SCC of the arguments.
-  struct ArgumentGraphNode {
-    Argument *Definition;
-    SmallVector<ArgumentGraphNode*, 4> Uses;
-  };
+/// For a given pointer Argument, this retains a list of Arguments of functions
+/// in the same SCC that the pointer data flows into. We use this to build an
+/// SCC of the arguments.
+struct ArgumentGraphNode {
+  Argument *Definition;
+  SmallVector<ArgumentGraphNode *, 4> Uses;
+};
 
-  class ArgumentGraph {
-    // We store pointers to ArgumentGraphNode objects, so it's important that
-    // that they not move around upon insert.
-    typedef std::map<Argument*, ArgumentGraphNode> ArgumentMapTy;
+class ArgumentGraph {
+  // We store pointers to ArgumentGraphNode objects, so it's important that
+  // that they not move around upon insert.
+  typedef std::map<Argument *, ArgumentGraphNode> ArgumentMapTy;
 
-    ArgumentMapTy ArgumentMap;
+  ArgumentMapTy ArgumentMap;
 
-    // There is no root node for the argument graph, in fact:
-    //   void f(int *x, int *y) { if (...) f(x, y); }
-    // is an example where the graph is disconnected. The SCCIterator requires a
-    // single entry point, so we maintain a fake ("synthetic") root node that
-    // uses every node. Because the graph is directed and nothing points into
-    // the root, it will not participate in any SCCs (except for its own).
-    ArgumentGraphNode SyntheticRoot;
+  // There is no root node for the argument graph, in fact:
+  //   void f(int *x, int *y) { if (...) f(x, y); }
+  // is an example where the graph is disconnected. The SCCIterator requires a
+  // single entry point, so we maintain a fake ("synthetic") root node that
+  // uses every node. Because the graph is directed and nothing points into
+  // the root, it will not participate in any SCCs (except for its own).
+  ArgumentGraphNode SyntheticRoot;
 
-  public:
-    ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
+public:
+  ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
 
-    typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
+  typedef SmallVectorImpl<ArgumentGraphNode *>::iterator iterator;
 
-    iterator begin() { return SyntheticRoot.Uses.begin(); }
-    iterator end() { return SyntheticRoot.Uses.end(); }
-    ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
+  iterator begin() { return SyntheticRoot.Uses.begin(); }
+  iterator end() { return SyntheticRoot.Uses.end(); }
+  ArgumentGraphNode *getEntryNode() { return &SyntheticRoot; }
 
-    ArgumentGraphNode *operator[](Argument *A) {
-      ArgumentGraphNode &Node = ArgumentMap[A];
-      Node.Definition = A;
-      SyntheticRoot.Uses.push_back(&Node);
-      return &Node;
-    }
-  };
+  ArgumentGraphNode *operator[](Argument *A) {
+    ArgumentGraphNode &Node = ArgumentMap[A];
+    Node.Definition = A;
+    SyntheticRoot.Uses.push_back(&Node);
+    return &Node;
+  }
+};
 
-  // This tracker checks whether callees are in the SCC, and if so it does not
-  // consider that a capture, instead adding it to the "Uses" list and
-  // continuing with the analysis.
-  struct ArgumentUsesTracker : public CaptureTracker {
-    ArgumentUsesTracker(const SmallPtrSet<Function*, 8> &SCCNodes)
+/// This tracker checks whether callees are in the SCC, and if so it does not
+/// consider that a capture, instead adding it to the "Uses" list and
+/// continuing with the analysis.
+struct ArgumentUsesTracker : public CaptureTracker {
+  ArgumentUsesTracker(const SCCNodeSet &SCCNodes)
       : Captured(false), SCCNodes(SCCNodes) {}
 
-    void tooManyUses() override { Captured = true; }
+  void tooManyUses() override { Captured = true; }
 
-    bool captured(const Use *U) override {
-      CallSite CS(U->getUser());
-      if (!CS.getInstruction()) { Captured = true; return true; }
-
-      Function *F = CS.getCalledFunction();
-      if (!F || !SCCNodes.count(F)) { Captured = true; return true; }
-
-      bool Found = false;
-      Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-      for (CallSite::arg_iterator PI = CS.arg_begin(), PE = CS.arg_end();
-           PI != PE; ++PI, ++AI) {
-        if (AI == AE) {
-          assert(F->isVarArg() && "More params than args in non-varargs call");
-          Captured = true;
-          return true;
-        }
-        if (PI == U) {
-          Uses.push_back(AI);
-          Found = true;
-          break;
-        }
-      }
-      assert(Found && "Capturing call-site captured nothing?");
-      (void)Found;
-      return false;
+  bool captured(const Use *U) override {
+    CallSite CS(U->getUser());
+    if (!CS.getInstruction()) {
+      Captured = true;
+      return true;
     }
 
-    bool Captured;  // True only if certainly captured (used outside our SCC).
-    SmallVector<Argument*, 4> Uses;  // Uses within our SCC.
+    Function *F = CS.getCalledFunction();
+    if (!F || F->isDeclaration() || F->mayBeOverridden() ||
+        !SCCNodes.count(F)) {
+      Captured = true;
+      return true;
+    }
 
-    const SmallPtrSet<Function*, 8> &SCCNodes;
-  };
+    // Note: the callee and the two successor blocks *follow* the argument
+    // operands.  This means there is no need to adjust UseIndex to account for
+    // these.
+
+    unsigned UseIndex =
+        std::distance(const_cast<const Use *>(CS.arg_begin()), U);
+
+    assert(UseIndex < CS.data_operands_size() &&
+           "Indirect function calls should have been filtered above!");
+
+    if (UseIndex >= CS.getNumArgOperands()) {
+      // Data operand, but not a argument operand -- must be a bundle operand
+      assert(CS.hasOperandBundles() && "Must be!");
+
+      // CaptureTracking told us that we're being captured by an operand bundle
+      // use.  In this case it does not matter if the callee is within our SCC
+      // or not -- we've been captured in some unknown way, and we have to be
+      // conservative.
+      Captured = true;
+      return true;
+    }
+
+    if (UseIndex >= F->arg_size()) {
+      assert(F->isVarArg() && "More params than args in non-varargs call");
+      Captured = true;
+      return true;
+    }
+
+    Uses.push_back(&*std::next(F->arg_begin(), UseIndex));
+    return false;
+  }
+
+  bool Captured; // True only if certainly captured (used outside our SCC).
+  SmallVector<Argument *, 4> Uses; // Uses within our SCC.
+
+  const SCCNodeSet &SCCNodes;
+};
 }
 
 namespace llvm {
-  template<> struct GraphTraits<ArgumentGraphNode*> {
-    typedef ArgumentGraphNode NodeType;
-    typedef SmallVectorImpl<ArgumentGraphNode*>::iterator ChildIteratorType;
+template <> struct GraphTraits<ArgumentGraphNode *> {
+  typedef ArgumentGraphNode NodeType;
+  typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
 
-    static inline NodeType *getEntryNode(NodeType *A) { return A; }
-    static inline ChildIteratorType child_begin(NodeType *N) {
-      return N->Uses.begin();
-    }
-    static inline ChildIteratorType child_end(NodeType *N) {
-      return N->Uses.end();
-    }
-  };
-  template<> struct GraphTraits<ArgumentGraph*>
-    : public GraphTraits<ArgumentGraphNode*> {
-    static NodeType *getEntryNode(ArgumentGraph *AG) {
-      return AG->getEntryNode();
-    }
-    static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
-      return AG->begin();
-    }
-    static ChildIteratorType nodes_end(ArgumentGraph *AG) {
-      return AG->end();
-    }
-  };
+  static inline NodeType *getEntryNode(NodeType *A) { return A; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->Uses.begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->Uses.end();
+  }
+};
+template <>
+struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
+  static NodeType *getEntryNode(ArgumentGraph *AG) {
+    return AG->getEntryNode();
+  }
+  static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
+    return AG->begin();
+  }
+  static ChildIteratorType nodes_end(ArgumentGraph *AG) { return AG->end(); }
+};
 }
 
-// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
+/// Returns Attribute::None, Attribute::ReadOnly or Attribute::ReadNone.
 static Attribute::AttrKind
 determinePointerReadAttrs(Argument *A,
-                          const SmallPtrSet<Argument*, 8> &SCCNodes) {
-                                                       
-  SmallVector<Use*, 32> Worklist;
-  SmallSet<Use*, 32> Visited;
-  int Count = 0;
+                          const SmallPtrSet<Argument *, 8> &SCCNodes) {
+
+  SmallVector<Use *, 32> Worklist;
+  SmallSet<Use *, 32> Visited;
 
   // inalloca arguments are always clobbered by the call.
   if (A->hasInAllocaAttr())
@@ -425,9 +408,6 @@ determinePointerReadAttrs(Argument *A,
   // We don't need to track IsWritten. If A is written to, return immediately.
 
   for (Use &U : A->uses()) {
-    if (Count++ >= 20)
-      return Attribute::None;
-
     Visited.insert(&U);
     Worklist.push_back(&U);
   }
@@ -435,7 +415,6 @@ determinePointerReadAttrs(Argument *A,
   while (!Worklist.empty()) {
     Use *U = Worklist.pop_back_val();
     Instruction *I = cast<Instruction>(U->getUser());
-    Value *V = U->get();
 
     switch (I->getOpcode()) {
     case Instruction::BitCast:
@@ -479,24 +458,44 @@ determinePointerReadAttrs(Argument *A,
         return Attribute::None;
       }
 
-      Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
-      CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
-      for (CallSite::arg_iterator A = B; A != E; ++A, ++AI) {
-        if (A->get() == V) {
-          if (AI == AE) {
-            assert(F->isVarArg() &&
-                   "More params than args in non-varargs call.");
-            return Attribute::None;
-          }
-          Captures &= !CS.doesNotCapture(A - B);
-          if (SCCNodes.count(AI))
-            continue;
-          if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B))
-            return Attribute::None;
-          if (!CS.doesNotAccessMemory(A - B))
-            IsRead = true;
-        }
+      // Note: the callee and the two successor blocks *follow* the argument
+      // operands.  This means there is no need to adjust UseIndex to account
+      // for these.
+
+      unsigned UseIndex = std::distance(CS.arg_begin(), U);
+
+      // U cannot be the callee operand use: since we're exploring the
+      // transitive uses of an Argument, having such a use be a callee would
+      // imply the CallSite is an indirect call or invoke; and we'd take the
+      // early exit above.
+      assert(UseIndex < CS.data_operands_size() &&
+             "Data operand use expected!");
+
+      bool IsOperandBundleUse = UseIndex >= CS.getNumArgOperands();
+
+      if (UseIndex >= F->arg_size() && !IsOperandBundleUse) {
+        assert(F->isVarArg() && "More params than args in non-varargs call");
+        return Attribute::None;
       }
+
+      Captures &= !CS.doesNotCapture(UseIndex);
+
+      // Since the optimizer (by design) cannot see the data flow corresponding
+      // to a operand bundle use, these cannot participate in the optimistic SCC
+      // analysis.  Instead, we model the operand bundle uses as arguments in
+      // call to a function external to the SCC.
+      if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
+          IsOperandBundleUse) {
+
+        // The accessors used on CallSite here do the right thing for calls and
+        // invokes with operand bundles.
+
+        if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(UseIndex))
+          return Attribute::None;
+        if (!CS.doesNotAccessMemory(UseIndex))
+          IsRead = true;
+      }
+
       AddUsersToWorklistIfCapturing();
       break;
     }
@@ -517,21 +516,10 @@ determinePointerReadAttrs(Argument *A,
   return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
 }
 
-/// AddArgumentAttrs - Deduce nocapture attributes for the SCC.
-bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
+/// Deduce nocapture attributes for the SCC.
+static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
   bool Changed = false;
 
-  SmallPtrSet<Function*, 8> SCCNodes;
-
-  // Fill SCCNodes with the elements of the SCC.  Used for quickly
-  // looking up whether a given CallGraphNode is in this SCC.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
-    if (F && !F->isDeclaration() && !F->mayBeOverridden() &&
-        !F->hasFnAttribute(Attribute::OptimizeNone))
-      SCCNodes.insert(F);
-  }
-
   ArgumentGraph AG;
 
   AttrBuilder B;
@@ -539,14 +527,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
 
   // Check each function in turn, determining which pointer arguments are not
   // captured.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
-
-    if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
-      // External node or function we're trying not to optimize - only a problem
-      // for arguments that we pass to it.
-      continue;
-
+  for (Function *F : SCCNodes) {
     // Definitions with weak linkage may be overridden at linktime with
     // something that captures pointers, so treat them like declarations.
     if (F->isDeclaration() || F->mayBeOverridden())
@@ -556,8 +537,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
     // a value can't capture arguments. Don't analyze them.
     if (F->onlyReadsMemory() && F->doesNotThrow() &&
         F->getReturnType()->isVoidTy()) {
-      for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
-           A != E; ++A) {
+      for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+           ++A) {
         if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr()) {
           A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
           ++NumNoCapture;
@@ -567,26 +548,30 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
       continue;
     }
 
-    for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end();
-         A != E; ++A) {
-      if (!A->getType()->isPointerTy()) continue;
+    for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A != E;
+         ++A) {
+      if (!A->getType()->isPointerTy())
+        continue;
       bool HasNonLocalUses = false;
       if (!A->hasNoCaptureAttr()) {
         ArgumentUsesTracker Tracker(SCCNodes);
-        PointerMayBeCaptured(A, &Tracker);
+        PointerMayBeCaptured(&*A, &Tracker);
         if (!Tracker.Captured) {
           if (Tracker.Uses.empty()) {
             // If it's trivially not captured, mark it nocapture now.
-            A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo()+1, B));
+            A->addAttr(
+                AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
             ++NumNoCapture;
             Changed = true;
           } else {
             // If it's not trivially captured and not trivially not captured,
             // then it must be calling into another function in our SCC. Save
             // its particulars for Argument-SCC analysis later.
-            ArgumentGraphNode *Node = AG[A];
-            for (SmallVectorImpl<Argument*>::iterator UI = Tracker.Uses.begin(),
-                     UE = Tracker.Uses.end(); UI != UE; ++UI) {
+            ArgumentGraphNode *Node = AG[&*A];
+            for (SmallVectorImpl<Argument *>::iterator
+                     UI = Tracker.Uses.begin(),
+                     UE = Tracker.Uses.end();
+                 UI != UE; ++UI) {
               Node->Uses.push_back(AG[*UI]);
               if (*UI != A)
                 HasNonLocalUses = true;
@@ -600,9 +585,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
         // Note that we don't allow any calls at all here, or else our result
         // will be dependent on the iteration order through the functions in the
         // SCC.
-        SmallPtrSet<Argument*, 8> Self;
-        Self.insert(A);
-        Attribute::AttrKind R = determinePointerReadAttrs(A, Self);
+        SmallPtrSet<Argument *, 8> Self;
+        Self.insert(&*A);
+        Attribute::AttrKind R = determinePointerReadAttrs(&*A, Self);
         if (R != Attribute::None) {
           AttrBuilder B;
           B.addAttribute(R);
@@ -621,10 +606,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
   // made.  If the definition doesn't have a 'nocapture' attribute by now, it
   // captures.
 
-  for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
+  for (scc_iterator<ArgumentGraph *> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
     const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
     if (ArgumentSCC.size() == 1) {
-      if (!ArgumentSCC[0]->Definition) continue;  // synthetic root node
+      if (!ArgumentSCC[0]->Definition)
+        continue; // synthetic root node
 
       // eg. "void f(int* x) { if (...) f(x); }"
       if (ArgumentSCC[0]->Uses.size() == 1 &&
@@ -646,9 +632,10 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
           SCCCaptured = true;
       }
     }
-    if (SCCCaptured) continue;
+    if (SCCCaptured)
+      continue;
 
-    SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
+    SmallPtrSet<Argument *, 8> ArgumentSCCNodes;
     // Fill ArgumentSCCNodes with the elements of the ArgumentSCC.  Used for
     // quickly looking up whether a given Argument is in this ArgumentSCC.
     for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
@@ -658,8 +645,9 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
     for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
          I != E && !SCCCaptured; ++I) {
       ArgumentGraphNode *N = *I;
-      for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
-             UE = N->Uses.end(); UI != UE; ++UI) {
+      for (SmallVectorImpl<ArgumentGraphNode *>::iterator UI = N->Uses.begin(),
+                                                          UE = N->Uses.end();
+           UI != UE; ++UI) {
         Argument *A = (*UI)->Definition;
         if (A->hasNoCaptureAttr() || ArgumentSCCNodes.count(A))
           continue;
@@ -667,7 +655,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
         break;
       }
     }
-    if (SCCCaptured) continue;
+    if (SCCCaptured)
+      continue;
 
     for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
       Argument *A = ArgumentSCC[i]->Definition;
@@ -704,8 +693,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
     if (ReadAttr != Attribute::None) {
       AttrBuilder B, R;
       B.addAttribute(ReadAttr);
-      R.addAttribute(Attribute::ReadOnly)
-        .addAttribute(Attribute::ReadNone);
+      R.addAttribute(Attribute::ReadOnly).addAttribute(Attribute::ReadNone);
       for (unsigned i = 0, e = ArgumentSCC.size(); i != e; ++i) {
         Argument *A = ArgumentSCC[i]->Definition;
         // Clear out existing readonly/readnone attributes
@@ -720,10 +708,11 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
   return Changed;
 }
 
-/// IsFunctionMallocLike - A function is malloc-like if it returns either null
-/// or a pointer that doesn't alias any other pointer visible to the caller.
-bool FunctionAttrs::IsFunctionMallocLike(Function *F,
-                              SmallPtrSet<Function*, 8> &SCCNodes) const {
+/// Tests whether a function is "malloc-like".
+///
+/// A function is "malloc-like" if it returns either null or a pointer that
+/// doesn't alias any other pointer visible to the caller.
+static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
   SmallSetVector<Value *, 8> FlowsToReturn;
   for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -744,39 +733,38 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
 
     if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
       switch (RVI->getOpcode()) {
-        // Extend the analysis by looking upwards.
-        case Instruction::BitCast:
-        case Instruction::GetElementPtr:
-        case Instruction::AddrSpaceCast:
-          FlowsToReturn.insert(RVI->getOperand(0));
-          continue;
-        case Instruction::Select: {
-          SelectInst *SI = cast<SelectInst>(RVI);
-          FlowsToReturn.insert(SI->getTrueValue());
-          FlowsToReturn.insert(SI->getFalseValue());
-          continue;
-        }
-        case Instruction::PHI: {
-          PHINode *PN = cast<PHINode>(RVI);
-          for (Value *IncValue : PN->incoming_values())
-            FlowsToReturn.insert(IncValue);
-          continue;
-        }
+      // Extend the analysis by looking upwards.
+      case Instruction::BitCast:
+      case Instruction::GetElementPtr:
+      case Instruction::AddrSpaceCast:
+        FlowsToReturn.insert(RVI->getOperand(0));
+        continue;
+      case Instruction::Select: {
+        SelectInst *SI = cast<SelectInst>(RVI);
+        FlowsToReturn.insert(SI->getTrueValue());
+        FlowsToReturn.insert(SI->getFalseValue());
+        continue;
+      }
+      case Instruction::PHI: {
+        PHINode *PN = cast<PHINode>(RVI);
+        for (Value *IncValue : PN->incoming_values())
+          FlowsToReturn.insert(IncValue);
+        continue;
+      }
 
-        // Check whether the pointer came from an allocation.
-        case Instruction::Alloca:
+      // Check whether the pointer came from an allocation.
+      case Instruction::Alloca:
+        break;
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        CallSite CS(RVI);
+        if (CS.paramHasAttr(0, Attribute::NoAlias))
           break;
-        case Instruction::Call:
-        case Instruction::Invoke: {
-          CallSite CS(RVI);
-          if (CS.paramHasAttr(0, Attribute::NoAlias))
-            break;
-          if (CS.getCalledFunction() &&
-              SCCNodes.count(CS.getCalledFunction()))
-            break;
-        } // fall-through
-        default:
-          return false;  // Did not come from an allocation.
+        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+          break;
+      } // fall-through
+      default:
+        return false; // Did not come from an allocation.
       }
 
     if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
@@ -786,24 +774,11 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F,
   return true;
 }
 
-/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
-bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
-  SmallPtrSet<Function*, 8> SCCNodes;
-
-  // Fill SCCNodes with the elements of the SCC.  Used for quickly
-  // looking up whether a given CallGraphNode is in this SCC.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
-    SCCNodes.insert((*I)->getFunction());
-
+/// Deduce noalias attributes for the SCC.
+static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {
   // Check each function in turn, determining which functions return noalias
   // pointers.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
-
-    if (!F || F->hasFnAttribute(Attribute::OptimizeNone))
-      // External node or node we don't want to optimize - skip it;
-      return false;
-
+  for (Function *F : SCCNodes) {
     // Already noalias.
     if (F->doesNotAlias(0))
       continue;
@@ -813,18 +788,17 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
     if (F->isDeclaration() || F->mayBeOverridden())
       return false;
 
-    // We annotate noalias return values, which are only applicable to 
+    // We annotate noalias return values, which are only applicable to
     // pointer types.
     if (!F->getReturnType()->isPointerTy())
       continue;
 
-    if (!IsFunctionMallocLike(F, SCCNodes))
+    if (!isFunctionMallocLike(F, SCCNodes))
       return false;
   }
 
   bool MadeChange = false;
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
+  for (Function *F : SCCNodes) {
     if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
       continue;
 
@@ -836,880 +810,249 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
   return MadeChange;
 }
 
-/// inferPrototypeAttributes - Analyze the name and prototype of the
-/// given function and set any applicable attributes.  Returns true
-/// if any attributes were set and false otherwise.
-bool FunctionAttrs::inferPrototypeAttributes(Function &F) {
-  if (F.hasFnAttribute(Attribute::OptimizeNone))
-    return false;
+/// Tests whether this function is known to not return null.
+///
+/// Requires that the function returns a pointer.
+///
+/// Returns true if it believes the function will not return a null, and sets
+/// \p Speculative based on whether the returned conclusion is a speculative
+/// conclusion due to SCC calls.
+static bool isReturnNonNull(Function *F, const SCCNodeSet &SCCNodes,
+                            const TargetLibraryInfo &TLI, bool &Speculative) {
+  assert(F->getReturnType()->isPointerTy() &&
+         "nonnull only meaningful on pointer types");
+  Speculative = false;
 
-  FunctionType *FTy = F.getFunctionType();
-  LibFunc::Func TheLibFunc;
-  if (!(TLI->getLibFunc(F.getName(), TheLibFunc) && TLI->has(TheLibFunc)))
-    return false;
+  SmallSetVector<Value *, 8> FlowsToReturn;
+  for (BasicBlock &BB : *F)
+    if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator()))
+      FlowsToReturn.insert(Ret->getReturnValue());
 
-  switch (TheLibFunc) {
-  case LibFunc::strlen:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::strchr:
-  case LibFunc::strrchr:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isIntegerTy())
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    break;
-  case LibFunc::strtol:
-  case LibFunc::strtod:
-  case LibFunc::strtof:
-  case LibFunc::strtoul:
-  case LibFunc::strtoll:
-  case LibFunc::strtold:
-  case LibFunc::strtoull:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::strcpy:
-  case LibFunc::stpcpy:
-  case LibFunc::strcat:
-  case LibFunc::strncat:
-  case LibFunc::strncpy:
-  case LibFunc::stpncpy:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::strxfrm:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::strcmp: //0,1
-    case LibFunc::strspn: // 0,1
-    case LibFunc::strncmp: // 0,1
-    case LibFunc::strcspn: //0,1
-    case LibFunc::strcoll: //0,1
-    case LibFunc::strcasecmp:  // 0,1
-    case LibFunc::strncasecmp: // 
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::strstr:
-  case LibFunc::strpbrk:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::strtok:
-  case LibFunc::strtok_r:
-    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::scanf:
-    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::setbuf:
-  case LibFunc::setvbuf:
-    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::strdup:
-  case LibFunc::strndup:
-    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::stat:
-  case LibFunc::statvfs:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::sscanf:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::sprintf:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::snprintf:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 3);
-    setOnlyReadsMemory(F, 3);
-    break;
-  case LibFunc::setitimer:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(1)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setDoesNotCapture(F, 3);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::system:
-    if (FTy->getNumParams() != 1 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    // May throw; "system" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::malloc:
-    if (FTy->getNumParams() != 1 ||
-        !FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::memcmp:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::memchr:
-  case LibFunc::memrchr:
-    if (FTy->getNumParams() != 3)
-      return false;
-    setOnlyReadsMemory(F);
-    setDoesNotThrow(F);
-    break;
-  case LibFunc::modf:
-  case LibFunc::modff:
-  case LibFunc::modfl:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::memcpy:
-  case LibFunc::memccpy:
-  case LibFunc::memmove:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::memalign:
-    if (!FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::mkdir:
-    if (FTy->getNumParams() == 0 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::mktime:
-    if (FTy->getNumParams() == 0 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::realloc:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::read:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    // May throw; "read" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::rewind:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::rmdir:
-  case LibFunc::remove:
-  case LibFunc::realpath:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::rename:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::readlink:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::write:
-    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    // May throw; "write" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::bcopy:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::bcmp:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setOnlyReadsMemory(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::bzero:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::calloc:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::chmod:
-  case LibFunc::chown:
-    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::ctermid:
-  case LibFunc::clearerr:
-  case LibFunc::closedir:
-    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::atoi:
-  case LibFunc::atol:
-  case LibFunc::atof:
-  case LibFunc::atoll:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setOnlyReadsMemory(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::access:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::fopen:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::fdopen:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::feof:
-  case LibFunc::free:
-  case LibFunc::fseek:
-  case LibFunc::ftell:
-  case LibFunc::fgetc:
-  case LibFunc::fseeko:
-  case LibFunc::ftello:
-  case LibFunc::fileno:
-  case LibFunc::fflush:
-  case LibFunc::fclose:
-  case LibFunc::fsetpos:
-  case LibFunc::flockfile:
-  case LibFunc::funlockfile:
-  case LibFunc::ftrylockfile:
-    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::ferror:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F);
-    break;
-  case LibFunc::fputc:
-  case LibFunc::fstat:
-  case LibFunc::frexp:
-  case LibFunc::frexpf:
-  case LibFunc::frexpl:
-  case LibFunc::fstatvfs:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::fgets:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 3);
-    break;
-  case LibFunc::fread:
-    if (FTy->getNumParams() != 4 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(3)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 4);
-    break;
-  case LibFunc::fwrite:
-    if (FTy->getNumParams() != 4 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(3)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 4);
-    break;
-  case LibFunc::fputs:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::fscanf:
-  case LibFunc::fprintf:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::fgetpos:
-    if (FTy->getNumParams() < 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::getc:
-  case LibFunc::getlogin_r:
-  case LibFunc::getc_unlocked:
-    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::getenv:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setOnlyReadsMemory(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::gets:
-  case LibFunc::getchar:
-    setDoesNotThrow(F);
-    break;
-  case LibFunc::getitimer:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::getpwnam:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::ungetc:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::uname:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::unlink:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::unsetenv:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::utime:
-  case LibFunc::utimes:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::putc:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::puts:
-  case LibFunc::printf:
-  case LibFunc::perror:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::pread:
-    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    // May throw; "pread" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::pwrite:
-    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    // May throw; "pwrite" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::putchar:
-    setDoesNotThrow(F);
-    break;
-  case LibFunc::popen:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::pclose:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::vscanf:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::vsscanf:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(1)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::vfscanf:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(1)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::valloc:
-    if (!FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::vprintf:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::vfprintf:
-  case LibFunc::vsprintf:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::vsnprintf:
-    if (FTy->getNumParams() != 4 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(2)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 3);
-    setOnlyReadsMemory(F, 3);
-    break;
-  case LibFunc::open:
-    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    // May throw; "open" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::opendir:
-    if (FTy->getNumParams() != 1 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::tmpfile:
-    if (!FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::times:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::htonl:
-  case LibFunc::htons:
-  case LibFunc::ntohl:
-  case LibFunc::ntohs:
-    setDoesNotThrow(F);
-    setDoesNotAccessMemory(F);
-    break;
-  case LibFunc::lstat:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::lchown:
-    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::qsort:
-    if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
-      return false;
-    // May throw; places call through function pointer.
-    setDoesNotCapture(F, 4);
-    break;
-  case LibFunc::dunder_strdup:
-  case LibFunc::dunder_strndup:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::dunder_strtok_r:
-    if (FTy->getNumParams() != 3 ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::under_IO_getc:
-    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::under_IO_putc:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::dunder_isoc99_scanf:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::stat64:
-  case LibFunc::lstat64:
-  case LibFunc::statvfs64:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::dunder_isoc99_sscanf:
-    if (FTy->getNumParams() < 1 ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::fopen64:
-    if (FTy->getNumParams() != 2 ||
-        !FTy->getReturnType()->isPointerTy() ||
-        !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    setOnlyReadsMemory(F, 1);
-    setOnlyReadsMemory(F, 2);
-    break;
-  case LibFunc::fseeko64:
-  case LibFunc::ftello64:
-    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    break;
-  case LibFunc::tmpfile64:
-    if (!FTy->getReturnType()->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotAlias(F, 0);
-    break;
-  case LibFunc::fstat64:
-  case LibFunc::fstatvfs64:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
-      return false;
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 2);
-    break;
-  case LibFunc::open64:
-    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
-      return false;
-    // May throw; "open" is a valid pthread cancellation point.
-    setDoesNotCapture(F, 1);
-    setOnlyReadsMemory(F, 1);
-    break;
-  case LibFunc::gettimeofday:
-    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
-        !FTy->getParamType(1)->isPointerTy())
-      return false;
-    // Currently some platforms have the restrict keyword on the arguments to
-    // gettimeofday. To be conservative, do not add noalias to gettimeofday's
-    // arguments.
-    setDoesNotThrow(F);
-    setDoesNotCapture(F, 1);
-    setDoesNotCapture(F, 2);
-    break;
-  default:
-    // Didn't mark any attributes.
-    return false;
+  for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+    Value *RetVal = FlowsToReturn[i];
+
+    // If this value is locally known to be non-null, we're good
+    if (isKnownNonNull(RetVal, &TLI))
+      continue;
+
+    // Otherwise, we need to look upwards since we can't make any local
+    // conclusions.
+    Instruction *RVI = dyn_cast<Instruction>(RetVal);
+    if (!RVI)
+      return false;
+    switch (RVI->getOpcode()) {
+    // Extend the analysis by looking upwards.
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::AddrSpaceCast:
+      FlowsToReturn.insert(RVI->getOperand(0));
+      continue;
+    case Instruction::Select: {
+      SelectInst *SI = cast<SelectInst>(RVI);
+      FlowsToReturn.insert(SI->getTrueValue());
+      FlowsToReturn.insert(SI->getFalseValue());
+      continue;
+    }
+    case Instruction::PHI: {
+      PHINode *PN = cast<PHINode>(RVI);
+      for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        FlowsToReturn.insert(PN->getIncomingValue(i));
+      continue;
+    }
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      CallSite CS(RVI);
+      Function *Callee = CS.getCalledFunction();
+      // A call to a node within the SCC is assumed to return null until
+      // proven otherwise
+      if (Callee && SCCNodes.count(Callee)) {
+        Speculative = true;
+        continue;
+      }
+      return false;
+    }
+    default:
+      return false; // Unknown source, may be null
+    };
+    llvm_unreachable("should have either continued or returned");
   }
 
   return true;
 }
 
-/// annotateLibraryCalls - Adds attributes to well-known standard library
-/// call declarations.
-bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
+/// Deduce nonnull attributes for the SCC.
+static bool addNonNullAttrs(const SCCNodeSet &SCCNodes,
+                            const TargetLibraryInfo &TLI) {
+  // Speculative that all functions in the SCC return only nonnull
+  // pointers.  We may refute this as we analyze functions.
+  bool SCCReturnsNonNull = true;
+
   bool MadeChange = false;
 
-  // Check each function in turn annotating well-known library function
-  // declarations with attributes.
-  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-    Function *F = (*I)->getFunction();
+  // Check each function in turn, determining which functions return nonnull
+  // pointers.
+  for (Function *F : SCCNodes) {
+    // Already nonnull.
+    if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                        Attribute::NonNull))
+      continue;
 
-    if (F && F->isDeclaration())
-      MadeChange |= inferPrototypeAttributes(*F);
+    // Definitions with weak linkage may be overridden at linktime, so
+    // treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      return false;
+
+    // We annotate nonnull return values, which are only applicable to
+    // pointer types.
+    if (!F->getReturnType()->isPointerTy())
+      continue;
+
+    bool Speculative = false;
+    if (isReturnNonNull(F, SCCNodes, TLI, Speculative)) {
+      if (!Speculative) {
+        // Mark the function eagerly since we may discover a function
+        // which prevents us from speculating about the entire SCC
+        DEBUG(dbgs() << "Eagerly marking " << F->getName() << " as nonnull\n");
+        F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+        ++NumNonNullReturn;
+        MadeChange = true;
+      }
+      continue;
+    }
+    // At least one function returns something which could be null, can't
+    // speculate any more.
+    SCCReturnsNonNull = false;
+  }
+
+  if (SCCReturnsNonNull) {
+    for (Function *F : SCCNodes) {
+      if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex,
+                                          Attribute::NonNull) ||
+          !F->getReturnType()->isPointerTy())
+        continue;
+
+      DEBUG(dbgs() << "SCC marking " << F->getName() << " as nonnull\n");
+      F->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+      ++NumNonNullReturn;
+      MadeChange = true;
+    }
   }
 
   return MadeChange;
 }
 
-bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
-  AA = &getAnalysis<AliasAnalysis>();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+static bool setDoesNotRecurse(Function &F) {
+  if (F.doesNotRecurse())
+    return false;
+  F.setDoesNotRecurse();
+  ++NumNoRecurse;
+  return true;
+}
 
-  bool Changed = annotateLibraryCalls(SCC);
-  Changed |= AddReadAttrs(SCC);
-  Changed |= AddArgumentAttrs(SCC);
-  Changed |= AddNoAliasAttrs(SCC);
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
+                              SmallVectorImpl<WeakVH> &Revisit) {
+  // Try and identify functions that do not recurse.
+
+  // If the SCC contains multiple nodes we know for sure there is recursion.
+  if (!SCC.isSingular())
+    return false;
+
+  const CallGraphNode *CGN = *SCC.begin();
+  Function *F = CGN->getFunction();
+  if (!F || F->isDeclaration() || F->doesNotRecurse())
+    return false;
+
+  // If all of the calls in F are identifiable and are to norecurse functions, F
+  // is norecurse. This check also detects self-recursion as F is not currently
+  // marked norecurse, so any called from F to F will not be marked norecurse.
+  if (std::all_of(CGN->begin(), CGN->end(),
+                  [](const CallGraphNode::CallRecord &CR) {
+                    Function *F = CR.second->getFunction();
+                    return F && F->doesNotRecurse();
+                  }))
+    // Function calls a potentially recursive function.
+    return setDoesNotRecurse(*F);
+
+  // We know that F is not obviously recursive, but we haven't been able to
+  // prove that it doesn't actually recurse. Add it to the Revisit list to try
+  // again top-down later.
+  Revisit.push_back(F);
+  return false;
+}
+
+static bool addNoRecurseAttrsTopDownOnly(Function *F) {
+  // If F is internal and all uses are in norecurse functions, then F is also
+  // norecurse.
+  if (F->doesNotRecurse())
+    return false;
+  if (F->hasInternalLinkage()) {
+    for (auto *U : F->users())
+      if (auto *I = dyn_cast<Instruction>(U)) {
+        if (!I->getParent()->getParent()->doesNotRecurse())
+          return false;
+      } else {
+        return false;
+      }
+    return setDoesNotRecurse(*F);
+  }
+  return false;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  bool Changed = false;
+
+  // We compute dedicated AA results for each function in the SCC as needed. We
+  // use a lambda referencing external objects so that they live long enough to
+  // be queried, but we re-use them each time.
+  Optional<BasicAAResult> BAR;
+  Optional<AAResults> AAR;
+  auto AARGetter = [&](Function &F) -> AAResults & {
+    BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+    AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+    return *AAR;
+  };
+
+  // Fill SCCNodes with the elements of the SCC. Used for quickly looking up
+  // whether a given CallGraphNode is in this SCC. Also track whether there are
+  // any external or opt-none nodes that will prevent us from optimizing any
+  // part of the SCC.
+  SCCNodeSet SCCNodes;
+  bool ExternalNode = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (!F || F->hasFnAttribute(Attribute::OptimizeNone)) {
+      // External node or function we're trying not to optimize - we both avoid
+      // transform them and avoid leveraging information they provide.
+      ExternalNode = true;
+      continue;
+    }
+
+    SCCNodes.insert(F);
+  }
+
+  Changed |= addReadAttrs(SCCNodes, AARGetter);
+  Changed |= addArgumentAttrs(SCCNodes);
+
+  // If we have no external nodes participating in the SCC, we can deduce some
+  // more precise attributes as well.
+  if (!ExternalNode) {
+    Changed |= addNoAliasAttrs(SCCNodes);
+    Changed |= addNonNullAttrs(SCCNodes, *TLI);
+  }
+  
+  Changed |= addNoRecurseAttrs(SCC, Revisit);
+  return Changed;
+}
+
+bool FunctionAttrs::doFinalization(CallGraph &CG) {
+  bool Changed = false;
+  // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
+  // the rules we have for identifying norecurse functions work best with a
+  // top-down walk, so look again at all the functions we previously marked as
+  // worth revisiting, in top-down order.
+  for (auto &F : reverse(Revisit))
+    if (F)
+      Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
   return Changed;
 }
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
new file mode 100644
index 000000000000..d8b677b966f2
--- /dev/null
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -0,0 +1,433 @@
+//===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Function import based on summaries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <map>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "function-import"
+
+/// Limit on instruction count of imported functions.
+static cl::opt<unsigned> ImportInstrLimit(
+    "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
+    cl::desc("Only import functions with less than N instructions"));
+
+// Load lazily a module from \p FileName in \p Context.
+static std::unique_ptr<Module> loadFile(const std::string &FileName,
+                                        LLVMContext &Context) {
+  SMDiagnostic Err;
+  DEBUG(dbgs() << "Loading '" << FileName << "'\n");
+  std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+  if (!Result) {
+    Err.print("function-import", errs());
+    return nullptr;
+  }
+
+  Result->materializeMetadata();
+  UpgradeDebugInfo(*Result);
+
+  return Result;
+}
+
+namespace {
+/// Helper to load on demand a Module from file and cache it for subsequent
+/// queries. It can be used with the FunctionImporter.
+class ModuleLazyLoaderCache {
+  /// Cache of lazily loaded module for import.
+  StringMap<std::unique_ptr<Module>> ModuleMap;
+
+  /// Retrieve a Module from the cache or lazily load it on demand.
+  std::function<std::unique_ptr<Module>(StringRef FileName)> createLazyModule;
+
+public:
+  /// Create the loader, Module will be initialized in \p Context.
+  ModuleLazyLoaderCache(std::function<
+      std::unique_ptr<Module>(StringRef FileName)> createLazyModule)
+      : createLazyModule(createLazyModule) {}
+
+  /// Retrieve a Module from the cache or lazily load it on demand.
+  Module &operator()(StringRef FileName);
+
+  std::unique_ptr<Module> takeModule(StringRef FileName) {
+    auto I = ModuleMap.find(FileName);
+    assert(I != ModuleMap.end());
+    std::unique_ptr<Module> Ret = std::move(I->second);
+    ModuleMap.erase(I);
+    return Ret;
+  }
+};
+
+// Get a Module for \p FileName from the cache, or load it lazily.
+Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) {
+  auto &Module = ModuleMap[Identifier];
+  if (!Module)
+    Module = createLazyModule(Identifier);
+  return *Module;
+}
+} // anonymous namespace
+
+/// Walk through the instructions in \p F looking for external
+/// calls not already in the \p CalledFunctions set. If any are
+/// found they are added to the \p Worklist for importing.
+static void findExternalCalls(const Module &DestModule, Function &F,
+                              const FunctionInfoIndex &Index,
+                              StringSet<> &CalledFunctions,
+                              SmallVector<StringRef, 64> &Worklist) {
+  // We need to suffix internal function calls imported from other modules,
+  // prepare the suffix ahead of time.
+  std::string Suffix;
+  if (F.getParent() != &DestModule)
+    Suffix =
+        (Twine(".llvm.") +
+         Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str();
+
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (isa<CallInst>(I)) {
+        auto CalledFunction = cast<CallInst>(I).getCalledFunction();
+        // Insert any new external calls that have not already been
+        // added to set/worklist.
+        if (!CalledFunction || !CalledFunction->hasName())
+          continue;
+        // Ignore intrinsics early
+        if (CalledFunction->isIntrinsic()) {
+          assert(CalledFunction->getIntrinsicID() != 0);
+          continue;
+        }
+        auto ImportedName = CalledFunction->getName();
+        auto Renamed = (ImportedName + Suffix).str();
+        // Rename internal functions
+        if (CalledFunction->hasInternalLinkage()) {
+          ImportedName = Renamed;
+        }
+        auto It = CalledFunctions.insert(ImportedName);
+        if (!It.second) {
+          // This is a call to a function we already considered, skip.
+          continue;
+        }
+        // Ignore functions already present in the destination module
+        auto *SrcGV = DestModule.getNamedValue(ImportedName);
+        if (SrcGV) {
+          assert(isa<Function>(SrcGV) && "Name collision during import");
+          if (!cast<Function>(SrcGV)->isDeclaration()) {
+            DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
+                         << ImportedName << " already in DestinationModule\n");
+            continue;
+          }
+        }
+
+        Worklist.push_back(It.first->getKey());
+        DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                     << ": Adding callee for : " << ImportedName << " : "
+                     << F.getName() << "\n");
+      }
+    }
+  }
+}
+
+// Helper function: given a worklist and an index, will process all the worklist
+// and decide what to import based on the summary information.
+//
+// Nothing is actually imported, functions are materialized in their source
+// module and analyzed there.
+//
+// \p ModuleToFunctionsToImportMap is filled with the set of Function to import
+// per Module.
+static void GetImportList(Module &DestModule,
+                          SmallVector<StringRef, 64> &Worklist,
+                          StringSet<> &CalledFunctions,
+                          std::map<StringRef, DenseSet<const GlobalValue *>>
+                              &ModuleToFunctionsToImportMap,
+                          const FunctionInfoIndex &Index,
+                          ModuleLazyLoaderCache &ModuleLoaderCache) {
+  while (!Worklist.empty()) {
+    auto CalledFunctionName = Worklist.pop_back_val();
+    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for "
+                 << CalledFunctionName << "\n");
+
+    // Try to get a summary for this function call.
+    auto InfoList = Index.findFunctionInfoList(CalledFunctionName);
+    if (InfoList == Index.end()) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for "
+                   << CalledFunctionName << " Ignoring.\n");
+      continue;
+    }
+    assert(!InfoList->second.empty() && "No summary, error at import?");
+
+    // Comdat can have multiple entries, FIXME: what do we do with them?
+    auto &Info = InfoList->second[0];
+    assert(Info && "Nullptr in list, error importing summaries?\n");
+
+    auto *Summary = Info->functionSummary();
+    if (!Summary) {
+      // FIXME: in case we are lazyloading summaries, we can do it now.
+      DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                   << ": Missing summary for  " << CalledFunctionName
+                   << ", error at import?\n");
+      llvm_unreachable("Missing summary");
+    }
+
+    if (Summary->instCount() > ImportInstrLimit) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of "
+                   << CalledFunctionName << " with " << Summary->instCount()
+                   << " instructions (limit " << ImportInstrLimit << ")\n");
+      continue;
+    }
+
+    // Get the module path from the summary.
+    auto ModuleIdentifier = Summary->modulePath();
+    DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing "
+                 << CalledFunctionName << " from " << ModuleIdentifier << "\n");
+
+    auto &SrcModule = ModuleLoaderCache(ModuleIdentifier);
+
+    // The function that we will import!
+    GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName);
+
+    if (!SGV) {
+      // The destination module is referencing function using their renamed name
+      // when importing a function that was originally local in the source
+      // module. The source module we have might not have been renamed so we try
+      // to remove the suffix added during the renaming to recover the original
+      // name in the source module.
+      std::pair<StringRef, StringRef> Split =
+          CalledFunctionName.split(".llvm.");
+      SGV = SrcModule.getNamedValue(Split.first);
+      assert(SGV && "Can't find function to import in source module");
+    }
+    if (!SGV) {
+      report_fatal_error(Twine("Can't load function '") + CalledFunctionName +
+                         "' in Module '" + SrcModule.getModuleIdentifier() +
+                         "', error in the summary?\n");
+    }
+
+    Function *F = dyn_cast<Function>(SGV);
+    if (!F && isa<GlobalAlias>(SGV)) {
+      auto *SGA = dyn_cast<GlobalAlias>(SGV);
+      F = dyn_cast<Function>(SGA->getBaseObject());
+      CalledFunctionName = F->getName();
+    }
+    assert(F && "Imported Function is ... not a Function");
+
+    // We cannot import weak_any functions/aliases without possibly affecting
+    // the order they are seen and selected by the linker, changing program
+    // semantics.
+    if (SGV->hasWeakAnyLinkage()) {
+      DEBUG(dbgs() << DestModule.getModuleIdentifier()
+                   << ": Ignoring import request for weak-any "
+                   << (isa<Function>(SGV) ? "function " : "alias ")
+                   << CalledFunctionName << " from "
+                   << SrcModule.getModuleIdentifier() << "\n");
+      continue;
+    }
+
+    // Add the function to the import list
+    auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()];
+    Entry.insert(F);
+
+    // Process the newly imported functions and add callees to the worklist.
+    F->materialize();
+    findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist);
+  }
+}
+
+// Automatically import functions in Module \p DestModule based on the summaries
+// index.
+//
+// The current implementation imports every called functions that exists in the
+// summaries index.
+bool FunctionImporter::importFunctions(Module &DestModule) {
+  DEBUG(dbgs() << "Starting import for Module "
+               << DestModule.getModuleIdentifier() << "\n");
+  unsigned ImportedCount = 0;
+
+  /// First step is collecting the called external functions.
+  StringSet<> CalledFunctions;
+  SmallVector<StringRef, 64> Worklist;
+  for (auto &F : DestModule) {
+    if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+    findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist);
+  }
+  if (Worklist.empty())
+    return false;
+
+  /// Second step: for every call to an external function, try to import it.
+
+  // Linker that will be used for importing function
+  Linker TheLinker(DestModule);
+
+  // Map of Module -> List of Function to import from the Module
+  std::map<StringRef, DenseSet<const GlobalValue *>>
+      ModuleToFunctionsToImportMap;
+
+  // Analyze the summaries and get the list of functions to import by
+  // populating ModuleToFunctionsToImportMap
+  ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader);
+  GetImportList(DestModule, Worklist, CalledFunctions,
+                ModuleToFunctionsToImportMap, Index, ModuleLoaderCache);
+  assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList");
+
+  StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+      ModuleToTempMDValsMap;
+
+  // Do the actual import of functions now, one Module at a time
+  for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) {
+    // Get the module for the import
+    auto &FunctionsToImport = FunctionsToImportPerModule.second;
+    std::unique_ptr<Module> SrcModule =
+        ModuleLoaderCache.takeModule(FunctionsToImportPerModule.first);
+    assert(&DestModule.getContext() == &SrcModule->getContext() &&
+           "Context mismatch");
+
+    // Save the mapping of value ids to temporary metadata created when
+    // importing this function. If we have already imported from this module,
+    // add new temporary metadata to the existing mapping.
+    auto &TempMDVals = ModuleToTempMDValsMap[SrcModule->getModuleIdentifier()];
+    if (!TempMDVals)
+      TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+    // Link in the specified functions.
+    if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
+                               &Index, &FunctionsToImport, TempMDVals.get()))
+      report_fatal_error("Function Import: link error");
+
+    ImportedCount += FunctionsToImport.size();
+  }
+
+  // Now link in metadata for all modules from which we imported functions.
+  for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+       ModuleToTempMDValsMap) {
+    // Load the specified source module.
+    auto &SrcModule = ModuleLoaderCache(SME.getKey());
+
+    // Link in all necessary metadata from this module.
+    if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
+               << DestModule.getModuleIdentifier() << "\n");
+  return ImportedCount;
+}
+
+/// Summary file to use for function importing when using -function-import from
+/// the command line.
+static cl::opt<std::string>
+    SummaryFile("summary-file",
+                cl::desc("The summary file to use for function importing."));
+
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+  raw_ostream &OS = errs();
+  DiagnosticPrinterRawOStream DP(OS);
+  DI.print(DP);
+  OS << '\n';
+}
+
+/// Parse the function index out of an IR file and return the function
+/// index object if found, or nullptr if not.
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(StringRef Path, std::string &Error,
+                        DiagnosticHandlerFunction DiagnosticHandler) {
+  std::unique_ptr<MemoryBuffer> Buffer;
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
+      MemoryBuffer::getFile(Path);
+  if (std::error_code EC = BufferOrErr.getError()) {
+    Error = EC.message();
+    return nullptr;
+  }
+  Buffer = std::move(BufferOrErr.get());
+  ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+      object::FunctionIndexObjectFile::create(Buffer->getMemBufferRef(),
+                                              DiagnosticHandler);
+  if (std::error_code EC = ObjOrErr.getError()) {
+    Error = EC.message();
+    return nullptr;
+  }
+  return (*ObjOrErr)->takeIndex();
+}
+
+namespace {
+/// Pass that performs cross-module function import provided a summary file.
+class FunctionImportPass : public ModulePass {
+  /// Optional function summary index to use for importing, otherwise
+  /// the summary-file option must be specified.
+  const FunctionInfoIndex *Index;
+
+public:
+  /// Pass identification, replacement for typeid
+  static char ID;
+
+  /// Specify pass name for debug output
+  const char *getPassName() const override {
+    return "Function Importing";
+  }
+
+  explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr)
+      : ModulePass(ID), Index(Index) {}
+
+  bool runOnModule(Module &M) override {
+    if (SummaryFile.empty() && !Index)
+      report_fatal_error("error: -function-import requires -summary-file or "
+                         "file from frontend\n");
+    std::unique_ptr<FunctionInfoIndex> IndexPtr;
+    if (!SummaryFile.empty()) {
+      if (Index)
+        report_fatal_error("error: -summary-file and index from frontend\n");
+      std::string Error;
+      IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler);
+      if (!IndexPtr) {
+        errs() << "Error loading file '" << SummaryFile << "': " << Error
+               << "\n";
+        return false;
+      }
+      Index = IndexPtr.get();
+    }
+
+    // Perform the import now.
+    auto ModuleLoader = [&M](StringRef Identifier) {
+      return loadFile(Identifier, M.getContext());
+    };
+    FunctionImporter Importer(*Index, ModuleLoader);
+    return Importer.importFunctions(M);
+
+    return false;
+  }
+};
+} // anonymous namespace
+
+char FunctionImportPass::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
+                      "Summary Based Function Import", false, false)
+INITIALIZE_PASS_END(FunctionImportPass, "function-import",
+                    "Summary Based Function Import", false, false)
+
+namespace llvm {
+Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) {
+  return new FunctionImportPass(Index);
+}
+}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 61d0ff94a343..9b276ed28e2e 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -92,33 +92,28 @@ bool GlobalDCE::runOnModule(Module &M) {
       ComdatMembers.insert(std::make_pair(C, &GA));
 
   // Loop over the module, adding globals which are obviously necessary.
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    Changed |= RemoveUnusedGlobalValue(*I);
+  for (Function &F : M) {
+    Changed |= RemoveUnusedGlobalValue(F);
     // Functions with external linkage are needed if they have a body
-    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
-      if (!I->isDiscardableIfUnused())
-        GlobalIsNeeded(I);
-    }
+    if (!F.isDeclaration() && !F.hasAvailableExternallyLinkage())
+      if (!F.isDiscardableIfUnused())
+        GlobalIsNeeded(&F);
   }
 
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I) {
-    Changed |= RemoveUnusedGlobalValue(*I);
+  for (GlobalVariable &GV : M.globals()) {
+    Changed |= RemoveUnusedGlobalValue(GV);
     // Externally visible & appending globals are needed, if they have an
     // initializer.
-    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) {
-      if (!I->isDiscardableIfUnused())
-        GlobalIsNeeded(I);
-    }
+    if (!GV.isDeclaration() && !GV.hasAvailableExternallyLinkage())
+      if (!GV.isDiscardableIfUnused())
+        GlobalIsNeeded(&GV);
   }
 
-  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
-       I != E; ++I) {
-    Changed |= RemoveUnusedGlobalValue(*I);
+  for (GlobalAlias &GA : M.aliases()) {
+    Changed |= RemoveUnusedGlobalValue(GA);
     // Externally visible aliases are needed.
-    if (!I->isDiscardableIfUnused()) {
-      GlobalIsNeeded(I);
-    }
+    if (!GA.isDiscardableIfUnused())
+      GlobalIsNeeded(&GA);
   }
 
   // Now that all globals which are needed are in the AliveGlobals set, we loop
@@ -126,52 +121,50 @@ bool GlobalDCE::runOnModule(Module &M) {
   //
 
   // The first pass is to drop initializers of global variables which are dead.
-  std::vector<GlobalVariable*> DeadGlobalVars;   // Keep track of dead globals
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E; ++I)
-    if (!AliveGlobals.count(I)) {
-      DeadGlobalVars.push_back(I);         // Keep track of dead globals
-      if (I->hasInitializer()) {
-        Constant *Init = I->getInitializer();
-        I->setInitializer(nullptr);
+  std::vector<GlobalVariable *> DeadGlobalVars; // Keep track of dead globals
+  for (GlobalVariable &GV : M.globals())
+    if (!AliveGlobals.count(&GV)) {
+      DeadGlobalVars.push_back(&GV);         // Keep track of dead globals
+      if (GV.hasInitializer()) {
+        Constant *Init = GV.getInitializer();
+        GV.setInitializer(nullptr);
         if (isSafeToDestroyConstant(Init))
           Init->destroyConstant();
       }
     }
 
   // The second pass drops the bodies of functions which are dead...
-  std::vector<Function*> DeadFunctions;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
-    if (!AliveGlobals.count(I)) {
-      DeadFunctions.push_back(I);         // Keep track of dead globals
-      if (!I->isDeclaration())
-        I->deleteBody();
+  std::vector<Function *> DeadFunctions;
+  for (Function &F : M)
+    if (!AliveGlobals.count(&F)) {
+      DeadFunctions.push_back(&F);         // Keep track of dead globals
+      if (!F.isDeclaration())
+        F.deleteBody();
     }
 
   // The third pass drops targets of aliases which are dead...
   std::vector<GlobalAlias*> DeadAliases;
-  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
-       ++I)
-    if (!AliveGlobals.count(I)) {
-      DeadAliases.push_back(I);
-      I->setAliasee(nullptr);
+  for (GlobalAlias &GA : M.aliases())
+    if (!AliveGlobals.count(&GA)) {
+      DeadAliases.push_back(&GA);
+      GA.setAliasee(nullptr);
     }
 
   if (!DeadFunctions.empty()) {
     // Now that all interferences have been dropped, delete the actual objects
     // themselves.
-    for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
-      RemoveUnusedGlobalValue(*DeadFunctions[i]);
-      M.getFunctionList().erase(DeadFunctions[i]);
+    for (Function *F : DeadFunctions) {
+      RemoveUnusedGlobalValue(*F);
+      M.getFunctionList().erase(F);
     }
     NumFunctions += DeadFunctions.size();
     Changed = true;
   }
 
   if (!DeadGlobalVars.empty()) {
-    for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
-      RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
-      M.getGlobalList().erase(DeadGlobalVars[i]);
+    for (GlobalVariable *GV : DeadGlobalVars) {
+      RemoveUnusedGlobalValue(*GV);
+      M.getGlobalList().erase(GV);
     }
     NumVariables += DeadGlobalVars.size();
     Changed = true;
@@ -179,9 +172,9 @@ bool GlobalDCE::runOnModule(Module &M) {
 
   // Now delete any dead aliases.
   if (!DeadAliases.empty()) {
-    for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
-      RemoveUnusedGlobalValue(*DeadAliases[i]);
-      M.getAliasList().erase(DeadAliases[i]);
+    for (GlobalAlias *GA : DeadAliases) {
+      RemoveUnusedGlobalValue(*GA);
+      M.getAliasList().erase(GA);
     }
     NumAliases += DeadAliases.size();
     Changed = true;
@@ -222,21 +215,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
     // any globals used will be marked as needed.
     Function *F = cast<Function>(G);
 
-    if (F->hasPrefixData())
-      MarkUsedGlobalsAsNeeded(F->getPrefixData());
+    for (Use &U : F->operands())
+      MarkUsedGlobalsAsNeeded(cast<Constant>(U.get()));
 
-    if (F->hasPrologueData())
-      MarkUsedGlobalsAsNeeded(F->getPrologueData());
-
-    if (F->hasPersonalityFn())
-      MarkUsedGlobalsAsNeeded(F->getPersonalityFn());
-
-    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-        for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
-          if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+    for (BasicBlock &BB : *F)
+      for (Instruction &I : BB)
+        for (Use &U : I.operands())
+          if (GlobalValue *GV = dyn_cast<GlobalValue>(U))
             GlobalIsNeeded(GV);
-          else if (Constant *C = dyn_cast<Constant>(*U))
+          else if (Constant *C = dyn_cast<Constant>(U))
             MarkUsedGlobalsAsNeeded(C);
   }
 }
@@ -247,9 +234,9 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
 
   // Loop over all of the operands of the constant, adding any globals they
   // use to the list of needed globals.
-  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
+  for (Use &U : C->operands()) {
     // If we've already processed this constant there's no need to do it again.
-    Constant *Op = dyn_cast<Constant>(*I);
+    Constant *Op = dyn_cast<Constant>(U);
     if (Op && SeenConstants.insert(Op).second)
       MarkUsedGlobalsAsNeeded(Op);
   }
@@ -262,7 +249,8 @@ void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
 // might make it deader.
 //
 bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
-  if (GV.use_empty()) return false;
+  if (GV.use_empty())
+    return false;
   GV.removeDeadConstantUsers();
   return GV.use_empty();
 }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 5ffe15dbd31d..fd7736905fe8 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -54,7 +55,6 @@ STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
 STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
 STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
 STATISTIC(NumDeleted   , "Number of globals deleted");
-STATISTIC(NumFnDeleted , "Number of functions deleted");
 STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
 STATISTIC(NumLocalized , "Number of globals localized");
 STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
@@ -69,6 +69,7 @@ namespace {
   struct GlobalOpt : public ModulePass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addRequired<DominatorTreeWrapperPass>();
     }
     static char ID; // Pass identification, replacement for typeid
     GlobalOpt() : ModulePass(ID) {
@@ -81,11 +82,14 @@ namespace {
     bool OptimizeFunctions(Module &M);
     bool OptimizeGlobalVars(Module &M);
     bool OptimizeGlobalAliases(Module &M);
-    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
-    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
-                               const GlobalStatus &GS);
+    bool deleteIfDead(GlobalValue &GV);
+    bool processGlobal(GlobalValue &GV);
+    bool processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS);
     bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn);
 
+    bool isPointerValueDeadOnEntryToFunction(const Function *F,
+                                             GlobalValue *GV);
+
     TargetLibraryInfo *TLI;
     SmallSet<const Comdat *, 8> NotDiscardableComdats;
   };
@@ -95,13 +99,14 @@ char GlobalOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(GlobalOpt, "globalopt",
                 "Global Variable Optimizer", false, false)
 
 ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
 
-/// isLeakCheckerRoot - Is this global variable possibly used by a leak checker
-/// as a root?  If so, we might not really want to eliminate the stores to it.
+/// Is this global variable possibly used by a leak checker as a root?  If so,
+/// we might not really want to eliminate the stores to it.
 static bool isLeakCheckerRoot(GlobalVariable *GV) {
   // A global variable is a root if it is a pointer, or could plausibly contain
   // a pointer.  There are two challenges; one is that we could have a struct
@@ -176,10 +181,9 @@ static bool IsSafeComputationToRemove(Value *V, const TargetLibraryInfo *TLI) {
   } while (1);
 }
 
-/// CleanupPointerRootUsers - This GV is a pointer root.  Loop over all users
-/// of the global and clean up any that obviously don't assign the global a
-/// value that isn't dynamically allocated.
-///
+/// This GV is a pointer root.  Loop over all users of the global and clean up
+/// any that obviously don't assign the global a value that isn't dynamically
+/// allocated.
 static bool CleanupPointerRootUsers(GlobalVariable *GV,
                                     const TargetLibraryInfo *TLI) {
   // A brief explanation of leak checkers.  The goal is to find bugs where
@@ -263,10 +267,9 @@ static bool CleanupPointerRootUsers(GlobalVariable *GV,
   return Changed;
 }
 
-/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
-/// users of the global, cleaning up the obvious ones.  This is largely just a
-/// quick scan over the use list to clean up the easy and obvious cruft.  This
-/// returns true if it made a change.
+/// We just marked GV constant.  Loop over all users of the global, cleaning up
+/// the obvious ones.  This is largely just a quick scan over the use list to
+/// clean up the easy and obvious cruft.  This returns true if it made a change.
 static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
                                        const DataLayout &DL,
                                        TargetLibraryInfo *TLI) {
@@ -353,8 +356,8 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
   return Changed;
 }
 
-/// isSafeSROAElementUse - Return true if the specified instruction is a safe
-/// user of a derived expression from a global that we want to SROA.
+/// Return true if the specified instruction is a safe user of a derived
+/// expression from a global that we want to SROA.
 static bool isSafeSROAElementUse(Value *V) {
   // We might have a dead and dangling constant hanging off of here.
   if (Constant *C = dyn_cast<Constant>(V))
@@ -385,9 +388,8 @@ static bool isSafeSROAElementUse(Value *V) {
 }
 
 
-/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
-/// Look at it and its uses and decide whether it is safe to SROA this global.
-///
+/// U is a direct user of the specified global value.  Look at it and its uses
+/// and decide whether it is safe to SROA this global.
 static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
   // The user of the global must be a GEP Inst or a ConstantExpr GEP.
   if (!isa<GetElementPtrInst>(U) &&
@@ -452,9 +454,8 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
   return true;
 }
 
-/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
-/// is safe for us to perform this transformation.
-///
+/// Look at all uses of the global and decide whether it is safe for us to
+/// perform this transformation.
 static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
   for (User *U : GV->users())
     if (!IsUserOfGlobalSafeForSRA(U, GV))
@@ -464,10 +465,10 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
 }
 
 
-/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
-/// variable.  This opens the door for other optimizations by exposing the
-/// behavior of the program in a more fine-grained way.  We have determined that
-/// this transformation is safe already.  We return the first global variable we
+/// Perform scalar replacement of aggregates on the specified global variable.
+/// This opens the door for other optimizations by exposing the behavior of the
+/// program in a more fine-grained way.  We have determined that this
+/// transformation is safe already.  We return the first global variable we
 /// insert so that the caller can reprocess it.
 static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
   // Make sure this global only has simple uses that we can SRA.
@@ -497,7 +498,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
                                                In, GV->getName()+"."+Twine(i),
                                                GV->getThreadLocalMode(),
                                               GV->getType()->getAddressSpace());
-      Globals.insert(GV, NGV);
+      NGV->setExternallyInitialized(GV->isExternallyInitialized());
+      Globals.push_back(NGV);
       NewGlobals.push_back(NGV);
 
       // Calculate the known alignment of the field.  If the original aggregate
@@ -530,7 +532,8 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
                                                In, GV->getName()+"."+Twine(i),
                                                GV->getThreadLocalMode(),
                                               GV->getType()->getAddressSpace());
-      Globals.insert(GV, NGV);
+      NGV->setExternallyInitialized(GV->isExternallyInitialized());
+      Globals.push_back(NGV);
       NewGlobals.push_back(NGV);
 
       // Calculate the known alignment of the field.  If the original aggregate
@@ -545,7 +548,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
   if (NewGlobals.empty())
     return nullptr;
 
-  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
 
   Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
 
@@ -610,9 +613,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
   return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
 }
 
-/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
-/// value will trap if the value is dynamically null.  PHIs keeps track of any
-/// phi nodes we've seen to avoid reprocessing them.
+/// Return true if all users of the specified value will trap if the value is
+/// dynamically null.  PHIs keeps track of any phi nodes we've seen to avoid
+/// reprocessing them.
 static bool AllUsesOfValueWillTrapIfNull(const Value *V,
                                         SmallPtrSetImpl<const PHINode*> &PHIs) {
   for (const User *U : V->users())
@@ -653,9 +656,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
   return true;
 }
 
-/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
-/// from GV will trap if the loaded value is null.  Note that this also permits
-/// comparisons of the loaded value against null, as a special case.
+/// Return true if all uses of any loads from GV will trap if the loaded value
+/// is null.  Note that this also permits comparisons of the loaded value
+/// against null, as a special case.
 static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
   for (const User *U : GV->users())
     if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
@@ -735,10 +738,10 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
 }
 
 
-/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
-/// value stored into it.  If there are uses of the loaded value that would trap
-/// if the loaded value is dynamically null, then we know that they cannot be
-/// reachable with a null optimize away the load.
+/// The specified global has only one non-null value stored into it.  If there
+/// are uses of the loaded value that would trap if the loaded value is
+/// dynamically null, then we know that they cannot be reachable with a null
+/// optimize away the load.
 static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
                                             const DataLayout &DL,
                                             TargetLibraryInfo *TLI) {
@@ -778,7 +781,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
   }
 
   if (Changed) {
-    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV << "\n");
     ++NumGlobUses;
   }
 
@@ -801,8 +804,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
   return Changed;
 }
 
-/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
-/// instructions that are foldable.
+/// Walk the use list of V, constant folding all of the instructions that are
+/// foldable.
 static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
                                 TargetLibraryInfo *TLI) {
   for (Value::user_iterator UI = V->user_begin(), E = V->user_end(); UI != E; )
@@ -818,11 +821,11 @@ static void ConstantPropUsersOf(Value *V, const DataLayout &DL,
       }
 }
 
-/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
-/// variable, and transforms the program as if it always contained the result of
-/// the specified malloc.  Because it is always the result of the specified
-/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
-/// malloc into a global, and any loads of GV as uses of the new global.
+/// This function takes the specified global variable, and transforms the
+/// program as if it always contained the result of the specified malloc.
+/// Because it is always the result of the specified malloc, there is no reason
+/// to actually DO the malloc.  Instead, turn the malloc into a global, and any
+/// loads of GV as uses of the new global.
 static GlobalVariable *
 OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
                               ConstantInt *NElements, const DataLayout &DL,
@@ -838,13 +841,10 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
 
   // Create the new global variable.  The contents of the malloc'd memory is
   // undefined, so initialize with an undef value.
-  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
-                                             GlobalType, false,
-                                             GlobalValue::InternalLinkage,
-                                             UndefValue::get(GlobalType),
-                                             GV->getName()+".body",
-                                             GV,
-                                             GV->getThreadLocalMode());
+  GlobalVariable *NewGV = new GlobalVariable(
+      *GV->getParent(), GlobalType, false, GlobalValue::InternalLinkage,
+      UndefValue::get(GlobalType), GV->getName() + ".body", nullptr,
+      GV->getThreadLocalMode());
 
   // If there are bitcast users of the malloc (which is typical, usually we have
   // a malloc + bitcast) then replace them with uses of the new global.  Update
@@ -935,7 +935,7 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
       cast<StoreInst>(InitBool->user_back())->eraseFromParent();
     delete InitBool;
   } else
-    GV->getParent()->getGlobalList().insert(GV, InitBool);
+    GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
 
   // Now the GV is dead, nuke it and the malloc..
   GV->eraseFromParent();
@@ -951,10 +951,9 @@ OptimizeGlobalAddressOfMalloc(GlobalVariable *GV, CallInst *CI, Type *AllocTy,
   return NewGV;
 }
 
-/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
-/// to make sure that there are no complex uses of V.  We permit simple things
-/// like dereferencing the pointer, but not storing through the address, unless
-/// it is to the specified global.
+/// Scan the use-list of V checking to make sure that there are no complex uses
+/// of V.  We permit simple things like dereferencing the pointer, but not
+/// storing through the address, unless it is to the specified global.
 static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
                                                       const GlobalVariable *GV,
                                         SmallPtrSetImpl<const PHINode*> &PHIs) {
@@ -998,10 +997,9 @@ static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
   return true;
 }
 
-/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
-/// somewhere.  Transform all uses of the allocation into loads from the
-/// global and uses of the resultant pointer.  Further, delete the store into
-/// GV.  This assumes that these value pass the
+/// The Alloc pointer is stored into GV somewhere.  Transform all uses of the
+/// allocation into loads from the global and uses of the resultant pointer.
+/// Further, delete the store into GV.  This assumes that these value pass the
 /// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
 static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
                                           GlobalVariable *GV) {
@@ -1043,9 +1041,9 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
   }
 }
 
-/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
-/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
-/// that index through the array and struct field, icmps of null, and PHIs.
+/// Verify that all uses of V (a load, or a phi of a load) are simple enough to
+/// perform heap SRA on.  This permits GEP's that index through the array and
+/// struct field, icmps of null, and PHIs.
 static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
                         SmallPtrSetImpl<const PHINode*> &LoadUsingPHIs,
                         SmallPtrSetImpl<const PHINode*> &LoadUsingPHIsPerLoad) {
@@ -1096,8 +1094,8 @@ static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
 }
 
 
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
-/// GV are simple enough to perform HeapSRA, return true.
+/// If all users of values loaded from GV are simple enough to perform HeapSRA,
+/// return true.
 static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
                                                     Instruction *StoredVal) {
   SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
@@ -1186,8 +1184,8 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
   return FieldVals[FieldNo] = Result;
 }
 
-/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
-/// the load, rewrite the derived value to use the HeapSRoA'd load.
+/// Given a load instruction and a value derived from the load, rewrite the
+/// derived value to use the HeapSRoA'd load.
 static void RewriteHeapSROALoadUser(Instruction *LoadUser,
              DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1248,10 +1246,9 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
   }
 }
 
-/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
-/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
-/// use FieldGlobals instead.  All uses of loaded values satisfy
-/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+/// We are performing Heap SRoA on a global.  Ptr is a value loaded from the
+/// global.  Eliminate all uses of Ptr, making them use FieldGlobals instead.
+/// All uses of loaded values satisfy AllGlobalLoadUsesSimpleEnoughForHeapSRA.
 static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
                DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
                    std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
@@ -1266,8 +1263,8 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
   }
 }
 
-/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
-/// it up into multiple allocations of arrays of the fields.
+/// CI is an allocation of an array of structures.  Break it up into multiple
+/// allocations of arrays of the fields.
 static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
                                             Value *NElems, const DataLayout &DL,
                                             const TargetLibraryInfo *TLI) {
@@ -1291,12 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
     Type *FieldTy = STy->getElementType(FieldNo);
     PointerType *PFieldTy = PointerType::get(FieldTy, AS);
 
-    GlobalVariable *NGV =
-      new GlobalVariable(*GV->getParent(),
-                         PFieldTy, false, GlobalValue::InternalLinkage,
-                         Constant::getNullValue(PFieldTy),
-                         GV->getName() + ".f" + Twine(FieldNo), GV,
-                         GV->getThreadLocalMode());
+    GlobalVariable *NGV = new GlobalVariable(
+        *GV->getParent(), PFieldTy, false, GlobalValue::InternalLinkage,
+        Constant::getNullValue(PFieldTy), GV->getName() + ".f" + Twine(FieldNo),
+        nullptr, GV->getThreadLocalMode());
     FieldGlobals.push_back(NGV);
 
     unsigned TypeSize = DL.getTypeAllocSize(FieldTy);
@@ -1336,7 +1331,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
 
   // Split the basic block at the old malloc.
   BasicBlock *OrigBB = CI->getParent();
-  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+  BasicBlock *ContBB =
+      OrigBB->splitBasicBlock(CI->getIterator(), "malloc_cont");
 
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
@@ -1376,9 +1372,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   // CI is no longer needed, remove it.
   CI->eraseFromParent();
 
-  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
-  /// update all uses of the load, keep track of what scalarized loads are
-  /// inserted for a given load.
+  /// As we process loads, if we can't immediately update all uses of the load,
+  /// keep track of what scalarized loads are inserted for a given load.
   DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
   InsertedScalarizedValues[GV] = FieldGlobals;
 
@@ -1454,13 +1449,11 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
   return cast<GlobalVariable>(FieldGlobals[0]);
 }
 
-/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
-/// pointer global variable with a single value stored it that is a malloc or
-/// cast of malloc.
-static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
+/// This function is called when we see a pointer global variable with a single
+/// value stored it that is a malloc or cast of malloc.
+static bool tryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
                                                Type *AllocTy,
                                                AtomicOrdering Ordering,
-                                               Module::global_iterator &GVI,
                                                const DataLayout &DL,
                                                TargetLibraryInfo *TLI) {
   // If this is a malloc of an abstract type, don't touch it.
@@ -1499,7 +1492,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
     // (2048 bytes currently), as we don't want to introduce a 16M global or
     // something.
     if (NElements->getZExtValue() * DL.getTypeAllocSize(AllocTy) < 2048) {
-      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
+      OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, DL, TLI);
       return true;
     }
 
@@ -1544,19 +1537,18 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, CallInst *CI,
         CI = cast<CallInst>(Malloc);
     }
 
-    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true),
-                               DL, TLI);
+    PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, DL, TLI, true), DL,
+                         TLI);
     return true;
   }
 
   return false;
 }
 
-// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
-// that only one value (besides its initializer) is ever stored to the global.
-static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+// Try to optimize globals based on the knowledge that only one value (besides
+// its initializer) is ever stored to the global.
+static bool optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
                                      AtomicOrdering Ordering,
-                                     Module::global_iterator &GVI,
                                      const DataLayout &DL,
                                      TargetLibraryInfo *TLI) {
   // Ignore no-op GEPs and bitcasts.
@@ -1577,9 +1569,8 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal, TLI)) {
       Type *MallocType = getMallocAllocatedType(CI, TLI);
-      if (MallocType &&
-          TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, Ordering, GVI,
-                                             DL, TLI))
+      if (MallocType && tryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+                                                           Ordering, DL, TLI))
         return true;
     }
   }
@@ -1587,10 +1578,10 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
   return false;
 }
 
-/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
-/// two values ever stored into GV are its initializer and OtherVal.  See if we
-/// can shrink the global into a boolean and select between the two values
-/// whenever it is used.  This exposes the values to other scalar optimizations.
+/// At this point, we have learned that the only two values ever stored into GV
+/// are its initializer and OtherVal.  See if we can shrink the global into a
+/// boolean and select between the two values whenever it is used.  This exposes
+/// the values to other scalar optimizations.
 static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   Type *GVElType = GV->getType()->getElementType();
 
@@ -1610,7 +1601,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
     if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
       return false;
 
-  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
+  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV << "\n");
 
   // Create the new global, initializing it to false.
   GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
@@ -1620,7 +1611,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
                                              GV->getName()+".b",
                                              GV->getThreadLocalMode(),
                                              GV->getType()->getAddressSpace());
-  GV->getParent()->getGlobalList().insert(GV, NewGV);
+  GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
 
   Constant *InitVal = GV->getInitializer();
   assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
@@ -1688,61 +1679,213 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
   return true;
 }
 
+bool GlobalOpt::deleteIfDead(GlobalValue &GV) {
+  GV.removeDeadConstantUsers();
 
-/// ProcessGlobal - Analyze the specified global variable and optimize it if
-/// possible.  If we make a change, return true.
-bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
-                              Module::global_iterator &GVI) {
+  if (!GV.isDiscardableIfUnused())
+    return false;
+
+  if (const Comdat *C = GV.getComdat())
+    if (!GV.hasLocalLinkage() && NotDiscardableComdats.count(C))
+      return false;
+
+  bool Dead;
+  if (auto *F = dyn_cast<Function>(&GV))
+    Dead = F->isDefTriviallyDead();
+  else
+    Dead = GV.use_empty();
+  if (!Dead)
+    return false;
+
+  DEBUG(dbgs() << "GLOBAL DEAD: " << GV << "\n");
+  GV.eraseFromParent();
+  ++NumDeleted;
+  return true;
+}
+
+/// Analyze the specified global variable and optimize it if possible.  If we
+/// make a change, return true.
+bool GlobalOpt::processGlobal(GlobalValue &GV) {
   // Do more involved optimizations if the global is internal.
-  GV->removeDeadConstantUsers();
-
-  if (GV->use_empty()) {
-    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
-    GV->eraseFromParent();
-    ++NumDeleted;
-    return true;
-  }
-
-  if (!GV->hasLocalLinkage())
+  if (!GV.hasLocalLinkage())
     return false;
 
   GlobalStatus GS;
 
-  if (GlobalStatus::analyzeGlobal(GV, GS))
+  if (GlobalStatus::analyzeGlobal(&GV, GS))
     return false;
 
-  if (!GS.IsCompared && !GV->hasUnnamedAddr()) {
-    GV->setUnnamedAddr(true);
+  bool Changed = false;
+  if (!GS.IsCompared && !GV.hasUnnamedAddr()) {
+    GV.setUnnamedAddr(true);
     NumUnnamed++;
+    Changed = true;
   }
 
-  if (GV->isConstant() || !GV->hasInitializer())
-    return false;
+  auto *GVar = dyn_cast<GlobalVariable>(&GV);
+  if (!GVar)
+    return Changed;
 
-  return ProcessInternalGlobal(GV, GVI, GS);
+  if (GVar->isConstant() || !GVar->hasInitializer())
+    return Changed;
+
+  return processInternalGlobal(GVar, GS) || Changed;
 }
 
-/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+bool GlobalOpt::isPointerValueDeadOnEntryToFunction(const Function *F, GlobalValue *GV) {
+  // Find all uses of GV. We expect them all to be in F, and if we can't
+  // identify any of the uses we bail out.
+  //
+  // On each of these uses, identify if the memory that GV points to is
+  // used/required/live at the start of the function. If it is not, for example
+  // if the first thing the function does is store to the GV, the GV can
+  // possibly be demoted.
+  //
+  // We don't do an exhaustive search for memory operations - simply look
+  // through bitcasts as they're quite common and benign.
+  const DataLayout &DL = GV->getParent()->getDataLayout();
+  SmallVector<LoadInst *, 4> Loads;
+  SmallVector<StoreInst *, 4> Stores;
+  for (auto *U : GV->users()) {
+    if (Operator::getOpcode(U) == Instruction::BitCast) {
+      for (auto *UU : U->users()) {
+        if (auto *LI = dyn_cast<LoadInst>(UU))
+          Loads.push_back(LI);
+        else if (auto *SI = dyn_cast<StoreInst>(UU))
+          Stores.push_back(SI);
+        else
+          return false;
+      }
+      continue;
+    }
+
+    Instruction *I = dyn_cast<Instruction>(U);
+    if (!I)
+      return false;
+    assert(I->getParent()->getParent() == F);
+
+    if (auto *LI = dyn_cast<LoadInst>(I))
+      Loads.push_back(LI);
+    else if (auto *SI = dyn_cast<StoreInst>(I))
+      Stores.push_back(SI);
+    else
+      return false;
+  }
+
+  // We have identified all uses of GV into loads and stores. Now check if all
+  // of them are known not to depend on the value of the global at the function
+  // entry point. We do this by ensuring that every load is dominated by at
+  // least one store.
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>(*const_cast<Function *>(F))
+                 .getDomTree();
+
+  // The below check is quadratic. Check we're not going to do too many tests.
+  // FIXME: Even though this will always have worst-case quadratic time, we
+  // could put effort into minimizing the average time by putting stores that
+  // have been shown to dominate at least one load at the beginning of the
+  // Stores array, making subsequent dominance checks more likely to succeed
+  // early.
+  //
+  // The threshold here is fairly large because global->local demotion is a
+  // very powerful optimization should it fire.
+  const unsigned Threshold = 100;
+  if (Loads.size() * Stores.size() > Threshold)
+    return false;
+
+  for (auto *L : Loads) {
+    auto *LTy = L->getType();
+    if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+          auto *STy = S->getValueOperand()->getType();
+          // The load is only dominated by the store if DomTree says so
+          // and the number of bits loaded in L is less than or equal to
+          // the number of bits stored in S.
+          return DT.dominates(S, L) &&
+                 DL.getTypeStoreSize(LTy) <= DL.getTypeStoreSize(STy);
+        }))
+      return false;
+  }
+  // All loads have known dependences inside F, so the global can be localized.
+  return true;
+}
+
+/// C may have non-instruction users. Can all of those users be turned into
+/// instructions?
+static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
+  // We don't do this exhaustively. The most common pattern that we really need
+  // to care about is a constant GEP or constant bitcast - so just looking
+  // through one single ConstantExpr.
+  //
+  // The set of constants that this function returns true for must be able to be
+  // handled by makeAllConstantUsesInstructions.
+  for (auto *U : C->users()) {
+    if (isa<Instruction>(U))
+      continue;
+    if (!isa<ConstantExpr>(U))
+      // Non instruction, non-constantexpr user; cannot convert this.
+      return false;
+    for (auto *UU : U->users())
+      if (!isa<Instruction>(UU))
+        // A constantexpr used by another constant. We don't try and recurse any
+        // further but just bail out at this point.
+        return false;
+  }
+
+  return true;
+}
+
+/// C may have non-instruction users, and
+/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
+/// non-instruction users to instructions.
+static void makeAllConstantUsesInstructions(Constant *C) {
+  SmallVector<ConstantExpr*,4> Users;
+  for (auto *U : C->users()) {
+    if (isa<ConstantExpr>(U))
+      Users.push_back(cast<ConstantExpr>(U));
+    else
+      // We should never get here; allNonInstructionUsersCanBeMadeInstructions
+      // should not have returned true for C.
+      assert(
+          isa<Instruction>(U) &&
+          "Can't transform non-constantexpr non-instruction to instruction!");
+  }
+
+  SmallVector<Value*,4> UUsers;
+  for (auto *U : Users) {
+    UUsers.clear();
+    for (auto *UU : U->users())
+      UUsers.push_back(UU);
+    for (auto *UU : UUsers) {
+      Instruction *UI = cast<Instruction>(UU);
+      Instruction *NewU = U->getAsInstruction();
+      NewU->insertBefore(UI);
+      UI->replaceUsesOfWith(U, NewU);
+    }
+    U->dropAllReferences();
+  }
+}
+
+/// Analyze the specified global variable and optimize
 /// it if possible.  If we make a change, return true.
-bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
-                                      Module::global_iterator &GVI,
+bool GlobalOpt::processInternalGlobal(GlobalVariable *GV,
                                       const GlobalStatus &GS) {
   auto &DL = GV->getParent()->getDataLayout();
-  // If this is a first class global and has only one accessing function
-  // and this function is main (which we know is not recursive), we replace
-  // the global with a local alloca in this function.
+  // If this is a first class global and has only one accessing function and
+  // this function is non-recursive, we replace the global with a local alloca
+  // in this function.
   //
   // NOTE: It doesn't make sense to promote non-single-value types since we
   // are just replacing static memory to stack memory.
   //
   // If the global is in different address space, don't bring it to stack.
   if (!GS.HasMultipleAccessingFunctions &&
-      GS.AccessingFunction && !GS.HasNonInstructionUser &&
+      GS.AccessingFunction &&
       GV->getType()->getElementType()->isSingleValueType() &&
-      GS.AccessingFunction->getName() == "main" &&
-      GS.AccessingFunction->hasExternalLinkage() &&
-      GV->getType()->getAddressSpace() == 0) {
-    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+      GV->getType()->getAddressSpace() == 0 &&
+      !GV->isExternallyInitialized() &&
+      allNonInstructionUsersCanBeMadeInstructions(GV) &&
+      GS.AccessingFunction->doesNotRecurse() &&
+      isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV) ) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV << "\n");
     Instruction &FirstI = const_cast<Instruction&>(*GS.AccessingFunction
                                                    ->getEntryBlock().begin());
     Type *ElemTy = GV->getType()->getElementType();
@@ -1752,6 +1895,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     if (!isa<UndefValue>(GV->getInitializer()))
       new StoreInst(GV->getInitializer(), Alloca, &FirstI);
 
+    makeAllConstantUsesInstructions(GV);
+    
     GV->replaceAllUsesWith(Alloca);
     GV->eraseFromParent();
     ++NumLocalized;
@@ -1761,7 +1906,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   // If the global is never loaded (but may be stored to), it is dead.
   // Delete it now.
   if (!GS.IsLoaded) {
-    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV << "\n");
 
     bool Changed;
     if (isLeakCheckerRoot(GV)) {
@@ -1800,11 +1945,9 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
     return true;
   } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
     const DataLayout &DL = GV->getParent()->getDataLayout();
-    if (GlobalVariable *FirstNewGV = SRAGlobal(GV, DL)) {
-      GVI = FirstNewGV; // Don't skip the newly produced globals!
+    if (SRAGlobal(GV, DL))
       return true;
-    }
-  } else if (GS.StoredType == GlobalStatus::StoredOnce) {
+  } else if (GS.StoredType == GlobalStatus::StoredOnce && GS.StoredOnceValue) {
     // If the initial value for the global was an undef value, and if only
     // one other value was stored into it, we can just change the
     // initializer to be the stored value, then delete all stores to the
@@ -1822,8 +1965,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
                        << "simplify all users and delete global!\n");
           GV->eraseFromParent();
           ++NumDeleted;
-        } else {
-          GVI = GV;
         }
         ++NumSubstitute;
         return true;
@@ -1831,8 +1972,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
 
     // Try to optimize globals based on the knowledge that only one value
     // (besides its initializer) is ever stored to the global.
-    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, GVI,
-                                 DL, TLI))
+    if (optimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GS.Ordering, DL, TLI))
       return true;
 
     // Otherwise, if the global was not a boolean, we can shrink it to be a
@@ -1850,8 +1990,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
   return false;
 }
 
-/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
-/// function, changing them to FastCC.
+/// Walk all of the direct calls of the specified function, changing them to
+/// FastCC.
 static void ChangeCalleesToFastCall(Function *F) {
   for (User *U : F->users()) {
     if (isa<BlockAddress>(U))
@@ -1898,38 +2038,38 @@ bool GlobalOpt::OptimizeFunctions(Module &M) {
   bool Changed = false;
   // Optimize functions.
   for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
-    Function *F = FI++;
+    Function *F = &*FI++;
     // Functions without names cannot be referenced outside this module.
     if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage())
       F->setLinkage(GlobalValue::InternalLinkage);
 
-    const Comdat *C = F->getComdat();
-    bool inComdat = C && NotDiscardableComdats.count(C);
-    F->removeDeadConstantUsers();
-    if ((!inComdat || F->hasLocalLinkage()) && F->isDefTriviallyDead()) {
-      F->eraseFromParent();
+    if (deleteIfDead(*F)) {
       Changed = true;
-      ++NumFnDeleted;
-    } else if (F->hasLocalLinkage()) {
-      if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
-          !F->hasAddressTaken()) {
-        // If this function has a calling convention worth changing, is not a
-        // varargs function, and is only called directly, promote it to use the
-        // Fast calling convention.
-        F->setCallingConv(CallingConv::Fast);
-        ChangeCalleesToFastCall(F);
-        ++NumFastCallFns;
-        Changed = true;
-      }
+      continue;
+    }
 
-      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
-          !F->hasAddressTaken()) {
-        // The function is not used by a trampoline intrinsic, so it is safe
-        // to remove the 'nest' attribute.
-        RemoveNestAttribute(F);
-        ++NumNestRemoved;
-        Changed = true;
-      }
+    Changed |= processGlobal(*F);
+
+    if (!F->hasLocalLinkage())
+      continue;
+    if (isProfitableToMakeFastCC(F) && !F->isVarArg() &&
+        !F->hasAddressTaken()) {
+      // If this function has a calling convention worth changing, is not a
+      // varargs function, and is only called directly, promote it to use the
+      // Fast calling convention.
+      F->setCallingConv(CallingConv::Fast);
+      ChangeCalleesToFastCall(F);
+      ++NumFastCallFns;
+      Changed = true;
+    }
+
+    if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+        !F->hasAddressTaken()) {
+      // The function is not used by a trampoline intrinsic, so it is safe
+      // to remove the 'nest' attribute.
+      RemoveNestAttribute(F);
+      ++NumNestRemoved;
+      Changed = true;
     }
   }
   return Changed;
@@ -1940,7 +2080,7 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
 
   for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
        GVI != E; ) {
-    GlobalVariable *GV = GVI++;
+    GlobalVariable *GV = &*GVI++;
     // Global variables without names cannot be referenced outside this module.
     if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage())
       GV->setLinkage(GlobalValue::InternalLinkage);
@@ -1953,12 +2093,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
           GV->setInitializer(New);
       }
 
-    if (GV->isDiscardableIfUnused()) {
-      if (const Comdat *C = GV->getComdat())
-        if (NotDiscardableComdats.count(C) && !GV->hasLocalLinkage())
-          continue;
-      Changed |= ProcessGlobal(GV, GVI);
+    if (deleteIfDead(*GV)) {
+      Changed = true;
+      continue;
     }
+
+    Changed |= processGlobal(*GV);
   }
   return Changed;
 }
@@ -1968,8 +2108,8 @@ isSimpleEnoughValueToCommit(Constant *C,
                             SmallPtrSetImpl<Constant *> &SimpleConstants,
                             const DataLayout &DL);
 
-/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
-/// handled by the code generator.  We don't want to generate something like:
+/// Return true if the specified constant can be handled by the code generator.
+/// We don't want to generate something like:
 ///   void *X = &X/42;
 /// because the code generator doesn't have a relocation that can handle that.
 ///
@@ -2044,11 +2184,11 @@ isSimpleEnoughValueToCommit(Constant *C,
 }
 
 
-/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
-/// enough for us to understand.  In particular, if it is a cast to anything
-/// other than from one pointer type to another pointer type, we punt.
-/// We basically just support direct accesses to globals and GEP's of
-/// globals.  This should be kept up to date with CommitValueTo.
+/// Return true if this constant is simple enough for us to understand.  In
+/// particular, if it is a cast to anything other than from one pointer type to
+/// another pointer type, we punt.  We basically just support direct accesses to
+/// globals and GEP's of globals.  This should be kept up to date with
+/// CommitValueTo.
 static bool isSimpleEnoughPointerToCommit(Constant *C) {
   // Conservatively, avoid aggregate types. This is because we don't
   // want to worry about them partially overlapping other stores.
@@ -2095,9 +2235,9 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
   return false;
 }
 
-/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
-/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
-/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+/// Evaluate a piece of a constantexpr store into a global initializer.  This
+/// returns 'Init' modified to reflect 'Val' stored into it.  At this point, the
+/// GEP operands of Addr [0, OpNo) have been stepped into.
 static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
                                    ConstantExpr *Addr, unsigned OpNo) {
   // Base case of the recursion.
@@ -2144,7 +2284,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
   return ConstantVector::get(Elts);
 }
 
-/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// We have decided that Addr (which satisfies the predicate
 /// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
 static void CommitValueTo(Constant *Val, Constant *Addr) {
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
@@ -2160,10 +2300,10 @@ static void CommitValueTo(Constant *Val, Constant *Addr) {
 
 namespace {
 
-/// Evaluator - This class evaluates LLVM IR, producing the Constant
-/// representing each SSA instruction.  Changes to global variables are stored
-/// in a mapping that can be iterated over after the evaluation is complete.
-/// Once an evaluation call fails, the evaluation object should not be reused.
+/// This class evaluates LLVM IR, producing the Constant representing each SSA
+/// instruction.  Changes to global variables are stored in a mapping that can
+/// be iterated over after the evaluation is complete.  Once an evaluation call
+/// fails, the evaluation object should not be reused.
 class Evaluator {
 public:
   Evaluator(const DataLayout &DL, const TargetLibraryInfo *TLI)
@@ -2180,15 +2320,15 @@ public:
         Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
   }
 
-  /// EvaluateFunction - Evaluate a call to function F, returning true if
-  /// successful, false if we can't evaluate it.  ActualArgs contains the formal
-  /// arguments for the function.
+  /// Evaluate a call to function F, returning true if successful, false if we
+  /// can't evaluate it.  ActualArgs contains the formal arguments for the
+  /// function.
   bool EvaluateFunction(Function *F, Constant *&RetVal,
                         const SmallVectorImpl<Constant*> &ActualArgs);
 
-  /// EvaluateBlock - Evaluate all instructions in block BB, returning true if
-  /// successful, false if we can't evaluate it.  NewBB returns the next BB that
-  /// control flows into, or null upon return.
+  /// Evaluate all instructions in block BB, returning true if successful, false
+  /// if we can't evaluate it.  NewBB returns the next BB that control flows
+  /// into, or null upon return.
   bool EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB);
 
   Constant *getVal(Value *V) {
@@ -2213,32 +2353,31 @@ public:
 private:
   Constant *ComputeLoadResult(Constant *P);
 
-  /// ValueStack - As we compute SSA register values, we store their contents
-  /// here. The back of the deque contains the current function and the stack
-  /// contains the values in the calling frames.
+  /// As we compute SSA register values, we store their contents here. The back
+  /// of the deque contains the current function and the stack contains the
+  /// values in the calling frames.
   std::deque<DenseMap<Value*, Constant*>> ValueStack;
 
-  /// CallStack - This is used to detect recursion.  In pathological situations
-  /// we could hit exponential behavior, but at least there is nothing
-  /// unbounded.
+  /// This is used to detect recursion.  In pathological situations we could hit
+  /// exponential behavior, but at least there is nothing unbounded.
   SmallVector<Function*, 4> CallStack;
 
-  /// MutatedMemory - For each store we execute, we update this map.  Loads
-  /// check this to get the most up-to-date value.  If evaluation is successful,
-  /// this state is committed to the process.
+  /// For each store we execute, we update this map.  Loads check this to get
+  /// the most up-to-date value.  If evaluation is successful, this state is
+  /// committed to the process.
   DenseMap<Constant*, Constant*> MutatedMemory;
 
-  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
-  /// to represent its body.  This vector is needed so we can delete the
-  /// temporary globals when we are done.
+  /// To 'execute' an alloca, we create a temporary global variable to represent
+  /// its body.  This vector is needed so we can delete the temporary globals
+  /// when we are done.
   SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
 
-  /// Invariants - These global variables have been marked invariant by the
-  /// static constructor.
+  /// These global variables have been marked invariant by the static
+  /// constructor.
   SmallPtrSet<GlobalVariable*, 8> Invariants;
 
-  /// SimpleConstants - These are constants we have checked and know to be
-  /// simple enough to live in a static initializer of a global.
+  /// These are constants we have checked and know to be simple enough to live
+  /// in a static initializer of a global.
   SmallPtrSet<Constant*, 8> SimpleConstants;
 
   const DataLayout &DL;
@@ -2247,9 +2386,8 @@ private:
 
 }  // anonymous namespace
 
-/// ComputeLoadResult - Return the value that would be computed by a load from
-/// P after the stores reflected by 'memory' have been performed.  If we can't
-/// decide, return null.
+/// Return the value that would be computed by a load from P after the stores
+/// reflected by 'memory' have been performed.  If we can't decide, return null.
 Constant *Evaluator::ComputeLoadResult(Constant *P) {
   // If this memory location has been recently stored, use the stored value: it
   // is the most up-to-date.
@@ -2275,9 +2413,9 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
   return nullptr;  // don't know how to evaluate.
 }
 
-/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
-/// successful, false if we can't evaluate it.  NewBB returns the next BB that
-/// control flows into, or null upon return.
+/// Evaluate all instructions in block BB, returning true if successful, false
+/// if we can't evaluate it.  NewBB returns the next BB that control flows into,
+/// or null upon return.
 bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
                               BasicBlock *&NextBB) {
   // This is the main evaluation loop.
@@ -2438,7 +2576,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       InstResult = AllocaTmps.back().get();
       DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
     } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
-      CallSite CS(CurInst);
+      CallSite CS(&*CurInst);
 
       // Debug info can safely be ignored here.
       if (isa<DbgInfoIntrinsic>(CS.getInstruction())) {
@@ -2504,6 +2642,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
           // Continue even if we do nothing.
           ++CurInst;
           continue;
+        } else if (II->getIntrinsicID() == Intrinsic::assume) {
+          DEBUG(dbgs() << "Skipping assume intrinsic.\n");
+          ++CurInst;
+          continue;
         }
 
         DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");
@@ -2600,7 +2742,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
         InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
 
-      setVal(CurInst, InstResult);
+      setVal(&*CurInst, InstResult);
     }
 
     // If we just processed an invoke, we finished evaluating the block.
@@ -2615,9 +2757,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
   }
 }
 
-/// EvaluateFunction - Evaluate a call to function F, returning true if
-/// successful, false if we can't evaluate it.  ActualArgs contains the formal
-/// arguments for the function.
+/// Evaluate a call to function F, returning true if successful, false if we
+/// can't evaluate it.  ActualArgs contains the formal arguments for the
+/// function.
 bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
                                  const SmallVectorImpl<Constant*> &ActualArgs) {
   // Check to see if this function is already executing (recursion).  If so,
@@ -2631,7 +2773,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   unsigned ArgNo = 0;
   for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
        ++AI, ++ArgNo)
-    setVal(AI, ActualArgs[ArgNo]);
+    setVal(&*AI, ActualArgs[ArgNo]);
 
   // ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
   // we can only evaluate any one basic block at most once.  This set keeps
@@ -2639,7 +2781,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
 
   // CurBB - The current basic block we're evaluating.
-  BasicBlock *CurBB = F->begin();
+  BasicBlock *CurBB = &F->front();
 
   BasicBlock::iterator CurInst = CurBB->begin();
 
@@ -2679,8 +2821,8 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
   }
 }
 
-/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
-/// we can.  Return true if we can, false otherwise.
+/// Evaluate static constructors in the function, if we can.  Return true if we
+/// can, false otherwise.
 static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
                                       const TargetLibraryInfo *TLI) {
   // Call the function.
@@ -2708,7 +2850,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout &DL,
 }
 
 static int compareNames(Constant *const *A, Constant *const *B) {
-  return (*A)->getName().compare((*B)->getName());
+  return (*A)->stripPointerCasts()->getName().compare(
+      (*B)->stripPointerCasts()->getName());
 }
 
 static void setUsedInitializer(GlobalVariable &V,
@@ -2742,7 +2885,7 @@ static void setUsedInitializer(GlobalVariable &V,
 }
 
 namespace {
-/// \brief An easy to access representation of llvm.used and llvm.compiler.used.
+/// An easy to access representation of llvm.used and llvm.compiler.used.
 class LLVMUsed {
   SmallPtrSet<GlobalValue *, 8> Used;
   SmallPtrSet<GlobalValue *, 8> CompilerUsed;
@@ -2861,10 +3004,17 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
 
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
        I != E;) {
-    Module::alias_iterator J = I++;
+    GlobalAlias *J = &*I++;
+
     // Aliases without names cannot be referenced outside this module.
     if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage())
       J->setLinkage(GlobalValue::InternalLinkage);
+
+    if (deleteIfDead(*J)) {
+      Changed = true;
+      continue;
+    }
+
     // If the aliasee may change at link time, nothing can be done - bail out.
     if (J->mayBeOverridden())
       continue;
@@ -2889,15 +3039,15 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
 
     if (RenameTarget) {
       // Give the aliasee the name, linkage and other attributes of the alias.
-      Target->takeName(J);
+      Target->takeName(&*J);
       Target->setLinkage(J->getLinkage());
       Target->setVisibility(J->getVisibility());
       Target->setDLLStorageClass(J->getDLLStorageClass());
 
-      if (Used.usedErase(J))
+      if (Used.usedErase(&*J))
         Used.usedInsert(Target);
 
-      if (Used.compilerUsedErase(J))
+      if (Used.compilerUsedErase(&*J))
         Used.compilerUsedInsert(Target);
     } else if (mayHaveOtherReferences(*J, Used))
       continue;
@@ -2936,8 +3086,8 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
   return Fn;
 }
 
-/// cxxDtorIsEmpty - Returns whether the given function is an empty C++
-/// destructor and can therefore be eliminated.
+/// Returns whether the given function is an empty C++ destructor and can
+/// therefore be eliminated.
 /// Note that we assume that other optimization passes have already simplified
 /// the code so we only look for a function with a single basic block, where
 /// the only allowed instructions are 'ret', 'call' to an empty C++ dtor and
@@ -3081,3 +3231,4 @@ bool GlobalOpt::runOnModule(Module &M) {
 
   return Changed;
 }
+
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 50f56b0f2afe..7ea6c08b2e66 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the common infrastructure (including C bindings) for 
-// libLLVMIPO.a, which implements several transformations over the LLVM 
+// This file implements the common infrastructure (including C bindings) for
+// libLLVMIPO.a, which implements several transformations over the LLVM
 // intermediate representation.
 //
 //===----------------------------------------------------------------------===//
@@ -24,14 +24,17 @@ using namespace llvm;
 void llvm::initializeIPO(PassRegistry &Registry) {
   initializeArgPromotionPass(Registry);
   initializeConstantMergePass(Registry);
+  initializeCrossDSOCFIPass(Registry);
   initializeDAEPass(Registry);
   initializeDAHPass(Registry);
+  initializeForceFunctionAttrsLegacyPassPass(Registry);
   initializeFunctionAttrsPass(Registry);
   initializeGlobalDCEPass(Registry);
   initializeGlobalOptPass(Registry);
   initializeIPCPPass(Registry);
   initializeAlwaysInlinerPass(Registry);
   initializeSimpleInlinerPass(Registry);
+  initializeInferFunctionAttrsLegacyPassPass(Registry);
   initializeInternalizePassPass(Registry);
   initializeLoopExtractorPass(Registry);
   initializeBlockExtractorPassPass(Registry);
@@ -40,13 +43,15 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeMergeFunctionsPass(Registry);
   initializePartialInlinerPass(Registry);
   initializePruneEHPass(Registry);
-  initializeStripDeadPrototypesPassPass(Registry);
+  initializeStripDeadPrototypesLegacyPassPass(Registry);
   initializeStripSymbolsPass(Registry);
   initializeStripDebugDeclarePass(Registry);
   initializeStripDeadDebugInfoPass(Registry);
   initializeStripNonDebugSymbolsPass(Registry);
   initializeBarrierNoopPass(Registry);
   initializeEliminateAvailableExternallyPass(Registry);
+  initializeSampleProfileLoaderPass(Registry);
+  initializeFunctionImportPassPass(Registry);
 }
 
 void LLVMInitializeIPO(LLVMPassRegistryRef R) {
diff --git a/lib/Transforms/IPO/InferFunctionAttrs.cpp b/lib/Transforms/IPO/InferFunctionAttrs.cpp
new file mode 100644
index 000000000000..d02c861a2948
--- /dev/null
+++ b/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -0,0 +1,937 @@
+//===- InferFunctionAttrs.cpp - Infer implicit function attributes --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "inferattrs"
+
+STATISTIC(NumReadNone, "Number of functions inferred as readnone");
+STATISTIC(NumReadOnly, "Number of functions inferred as readonly");
+STATISTIC(NumNoUnwind, "Number of functions inferred as nounwind");
+STATISTIC(NumNoCapture, "Number of arguments inferred as nocapture");
+STATISTIC(NumReadOnlyArg, "Number of arguments inferred as readonly");
+STATISTIC(NumNoAlias, "Number of function returns inferred as noalias");
+
+static bool setDoesNotAccessMemory(Function &F) {
+  if (F.doesNotAccessMemory())
+    return false;
+  F.setDoesNotAccessMemory();
+  ++NumReadNone;
+  return true;
+}
+
+static bool setOnlyReadsMemory(Function &F) {
+  if (F.onlyReadsMemory())
+    return false;
+  F.setOnlyReadsMemory();
+  ++NumReadOnly;
+  return true;
+}
+
+static bool setDoesNotThrow(Function &F) {
+  if (F.doesNotThrow())
+    return false;
+  F.setDoesNotThrow();
+  ++NumNoUnwind;
+  return true;
+}
+
+static bool setDoesNotCapture(Function &F, unsigned n) {
+  if (F.doesNotCapture(n))
+    return false;
+  F.setDoesNotCapture(n);
+  ++NumNoCapture;
+  return true;
+}
+
+static bool setOnlyReadsMemory(Function &F, unsigned n) {
+  if (F.onlyReadsMemory(n))
+    return false;
+  F.setOnlyReadsMemory(n);
+  ++NumReadOnlyArg;
+  return true;
+}
+
+static bool setDoesNotAlias(Function &F, unsigned n) {
+  if (F.doesNotAlias(n))
+    return false;
+  F.setDoesNotAlias(n);
+  ++NumNoAlias;
+  return true;
+}
+
+/// Analyze the name and prototype of the given function and set any applicable
+/// attributes.
+///
+/// Returns true if any attributes were set and false otherwise.
+static bool inferPrototypeAttributes(Function &F,
+                                     const TargetLibraryInfo &TLI) {
+  if (F.hasFnAttribute(Attribute::OptimizeNone))
+    return false;
+
+  FunctionType *FTy = F.getFunctionType();
+  LibFunc::Func TheLibFunc;
+  if (!(TLI.getLibFunc(F.getName(), TheLibFunc) && TLI.has(TheLibFunc)))
+    return false;
+
+  bool Changed = false;
+
+  switch (TheLibFunc) {
+  case LibFunc::strlen:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::strchr:
+  case LibFunc::strrchr:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isIntegerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::strtol:
+  case LibFunc::strtod:
+  case LibFunc::strtof:
+  case LibFunc::strtoul:
+  case LibFunc::strtoll:
+  case LibFunc::strtold:
+  case LibFunc::strtoull:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::strcpy:
+  case LibFunc::stpcpy:
+  case LibFunc::strcat:
+  case LibFunc::strncat:
+  case LibFunc::strncpy:
+  case LibFunc::stpncpy:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::strxfrm:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::strcmp:      // 0,1
+  case LibFunc::strspn:      // 0,1
+  case LibFunc::strncmp:     // 0,1
+  case LibFunc::strcspn:     // 0,1
+  case LibFunc::strcoll:     // 0,1
+  case LibFunc::strcasecmp:  // 0,1
+  case LibFunc::strncasecmp: //
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::strstr:
+  case LibFunc::strpbrk:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::strtok:
+  case LibFunc::strtok_r:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::scanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::setbuf:
+  case LibFunc::setvbuf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::strdup:
+  case LibFunc::strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::stat:
+  case LibFunc::statvfs:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::sscanf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::sprintf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::snprintf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 3);
+    return Changed;
+  case LibFunc::setitimer:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::system:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "system" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::malloc:
+    if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::memcmp:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::memchr:
+  case LibFunc::memrchr:
+    if (FTy->getNumParams() != 3)
+      return false;
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::modf:
+  case LibFunc::modff:
+  case LibFunc::modfl:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::memcpy:
+  case LibFunc::memccpy:
+  case LibFunc::memmove:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::memalign:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::mkdir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::mktime:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::realloc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::read:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "read" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::rewind:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::rmdir:
+  case LibFunc::remove:
+  case LibFunc::realpath:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::rename:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::readlink:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::write:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "write" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::bcopy:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::bcmp:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::bzero:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::calloc:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::chmod:
+  case LibFunc::chown:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::ctermid:
+  case LibFunc::clearerr:
+  case LibFunc::closedir:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::atoi:
+  case LibFunc::atol:
+  case LibFunc::atof:
+  case LibFunc::atoll:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::access:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::fopen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fdopen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::feof:
+  case LibFunc::free:
+  case LibFunc::fseek:
+  case LibFunc::ftell:
+  case LibFunc::fgetc:
+  case LibFunc::fseeko:
+  case LibFunc::ftello:
+  case LibFunc::fileno:
+  case LibFunc::fflush:
+  case LibFunc::fclose:
+  case LibFunc::fsetpos:
+  case LibFunc::flockfile:
+  case LibFunc::funlockfile:
+  case LibFunc::ftrylockfile:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::ferror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F);
+    return Changed;
+  case LibFunc::fputc:
+  case LibFunc::fstat:
+  case LibFunc::frexp:
+  case LibFunc::frexpf:
+  case LibFunc::frexpl:
+  case LibFunc::fstatvfs:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::fgets:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 3);
+    return Changed;
+  case LibFunc::fread:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::fwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(3)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::fputs:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::fscanf:
+  case LibFunc::fprintf:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fgetpos:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::getc:
+  case LibFunc::getlogin_r:
+  case LibFunc::getc_unlocked:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::getenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setOnlyReadsMemory(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::gets:
+  case LibFunc::getchar:
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::getitimer:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::getpwnam:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::ungetc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::uname:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::unlink:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::unsetenv:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::utime:
+  case LibFunc::utimes:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::puts:
+  case LibFunc::printf:
+  case LibFunc::perror:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::pread:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "pread" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::pwrite:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // May throw; "pwrite" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::putchar:
+    Changed |= setDoesNotThrow(F);
+    return Changed;
+  case LibFunc::popen:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::pclose:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::vscanf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::vsscanf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::vfscanf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::valloc:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::vprintf:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::vfprintf:
+  case LibFunc::vsprintf:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::vsnprintf:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(2)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 3);
+    Changed |= setOnlyReadsMemory(F, 3);
+    return Changed;
+  case LibFunc::open:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::opendir:
+    if (FTy->getNumParams() != 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::tmpfile:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::times:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::htonl:
+  case LibFunc::htons:
+  case LibFunc::ntohl:
+  case LibFunc::ntohs:
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAccessMemory(F);
+    return Changed;
+  case LibFunc::lstat:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::lchown:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::qsort:
+    if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+      return false;
+    // May throw; places call through function pointer.
+    Changed |= setDoesNotCapture(F, 4);
+    return Changed;
+  case LibFunc::dunder_strdup:
+  case LibFunc::dunder_strndup:
+    if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::dunder_strtok_r:
+    if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::under_IO_getc:
+    if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::under_IO_putc:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::dunder_isoc99_scanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::stat64:
+  case LibFunc::lstat64:
+  case LibFunc::statvfs64:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::dunder_isoc99_sscanf:
+    if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fopen64:
+    if (FTy->getNumParams() != 2 || !FTy->getReturnType()->isPointerTy() ||
+        !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    Changed |= setOnlyReadsMemory(F, 1);
+    Changed |= setOnlyReadsMemory(F, 2);
+    return Changed;
+  case LibFunc::fseeko64:
+  case LibFunc::ftello64:
+    if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    return Changed;
+  case LibFunc::tmpfile64:
+    if (!FTy->getReturnType()->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotAlias(F, 0);
+    return Changed;
+  case LibFunc::fstat64:
+  case LibFunc::fstatvfs64:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+      return false;
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+  case LibFunc::open64:
+    if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+      return false;
+    // May throw; "open" is a valid pthread cancellation point.
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setOnlyReadsMemory(F, 1);
+    return Changed;
+  case LibFunc::gettimeofday:
+    if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy() ||
+        !FTy->getParamType(1)->isPointerTy())
+      return false;
+    // Currently some platforms have the restrict keyword on the arguments to
+    // gettimeofday. To be conservative, do not add noalias to gettimeofday's
+    // arguments.
+    Changed |= setDoesNotThrow(F);
+    Changed |= setDoesNotCapture(F, 1);
+    Changed |= setDoesNotCapture(F, 2);
+    return Changed;
+
+  default:
+    // FIXME: It'd be really nice to cover all the library functions we're
+    // aware of here.
+    return false;
+  }
+}
+
+static bool inferAllPrototypeAttributes(Module &M,
+                                        const TargetLibraryInfo &TLI) {
+  bool Changed = false;
+
+  for (Function &F : M.functions())
+    // We only infer things using the prototype if the definition isn't around
+    // to analyze directly.
+    if (F.isDeclaration())
+      Changed |= inferPrototypeAttributes(F, TLI);
+
+  return Changed;
+}
+
+PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
+                                              AnalysisManager<Module> *AM) {
+  auto &TLI = AM->getResult<TargetLibraryAnalysis>(M);
+
+  if (!inferAllPrototypeAttributes(M, TLI))
+    // If we didn't infer anything, preserve all analyses.
+    return PreservedAnalyses::all();
+
+  // Otherwise, we may have changed fundamental function attributes, so clear
+  // out all the passes.
+  return PreservedAnalyses::none();
+}
+
+namespace {
+struct InferFunctionAttrsLegacyPass : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  InferFunctionAttrsLegacyPass() : ModulePass(ID) {
+    initializeInferFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool runOnModule(Module &M) override {
+    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    return inferAllPrototypeAttributes(M, TLI);
+  }
+};
+}
+
+char InferFunctionAttrsLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
+                      "Infer set function attributes", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
+                    "Infer set function attributes", false, false)
+
+Pass *llvm::createInferFunctionAttrsLegacyPass() {
+  return new InferFunctionAttrsLegacyPass();
+}
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index dc56a02e7b7d..1704bfea0b86 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -14,10 +14,10 @@
 
 #include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
@@ -35,17 +35,15 @@ namespace {
 
 /// \brief Inliner pass which only handles "always inline" functions.
 class AlwaysInliner : public Inliner {
-  InlineCostAnalysis *ICA;
 
 public:
   // Use extremely low threshold.
-  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
-                    ICA(nullptr) {
+  AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true) {
     initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
   }
 
   AlwaysInliner(bool InsertLifetime)
-      : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
+      : Inliner(ID, -2000000000, InsertLifetime) {
     initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -53,9 +51,6 @@ public:
 
   InlineCost getInlineCost(CallSite CS) override;
 
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-  bool runOnSCC(CallGraphSCC &SCC) override;
-
   using llvm::Pass::doFinalization;
   bool doFinalization(CallGraph &CG) override {
     return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
@@ -67,10 +62,9 @@ public:
 char AlwaysInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
                 "Inliner for always_inline functions", false, false)
 
@@ -99,19 +93,8 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
   // that are viable for inlining. FIXME: We shouldn't even get here for
   // declarations.
   if (Callee && !Callee->isDeclaration() &&
-      CS.hasFnAttr(Attribute::AlwaysInline) &&
-      ICA->isInlineViable(*Callee))
+      CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
     return InlineCost::getAlways();
 
   return InlineCost::getNever();
 }
-
-bool AlwaysInliner::runOnSCC(CallGraphSCC &SCC) {
-  ICA = &getAnalysis<InlineCostAnalysis>();
-  return Inliner::runOnSCC(SCC);
-}
-
-void AlwaysInliner::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<InlineCostAnalysis>();
-  Inliner::getAnalysisUsage(AU);
-}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 9b01d81b3c7c..45609f891ed8 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
@@ -23,6 +23,7 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/InlinerPass.h"
 
 using namespace llvm;
@@ -37,26 +38,30 @@ namespace {
 /// inliner pass and the always inliner pass. The two passes use different cost
 /// analyses to determine when to inline.
 class SimpleInliner : public Inliner {
-  InlineCostAnalysis *ICA;
 
 public:
-  SimpleInliner() : Inliner(ID), ICA(nullptr) {
+  SimpleInliner() : Inliner(ID) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
   SimpleInliner(int Threshold)
-      : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
+      : Inliner(ID, Threshold, /*InsertLifetime*/ true) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
   static char ID; // Pass identification, replacement for typeid
 
   InlineCost getInlineCost(CallSite CS) override {
-    return ICA->getInlineCost(CS, getInlineThreshold(CS));
+    Function *Callee = CS.getCalledFunction();
+    TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
+    return llvm::getInlineCost(CS, getInlineThreshold(CS), TTI, ACT);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+private:
+  TargetTransformInfoWrapperPass *TTIWP;
 };
 
 static int computeThresholdFromOptLevels(unsigned OptLevel,
@@ -75,10 +80,10 @@ static int computeThresholdFromOptLevels(unsigned OptLevel,
 char SimpleInliner::ID = 0;
 INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InlineCostAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(SimpleInliner, "inline",
                 "Function Integration/Inlining", false, false)
 
@@ -95,11 +100,11 @@ Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
 }
 
 bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
-  ICA = &getAnalysis<InlineCostAnalysis>();
+  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
   return Inliner::runOnSCC(SCC);
 }
 
 void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<InlineCostAnalysis>();
+  AU.addRequired<TargetTransformInfoWrapperPass>();
   Inliner::getAnalysisUsage(AU);
 }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 5273c3dc3ca2..bbe5f8761d5f 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -64,20 +65,22 @@ ColdThreshold("inlinecold-threshold", cl::Hidden, cl::init(225),
 // Threshold to use when optsize is specified (and there is no -inline-limit).
 const int OptSizeThreshold = 75;
 
-Inliner::Inliner(char &ID) 
-  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {}
+Inliner::Inliner(char &ID)
+    : CallGraphSCCPass(ID), InlineThreshold(InlineLimit), InsertLifetime(true) {
+}
 
 Inliner::Inliner(char &ID, int Threshold, bool InsertLifetime)
-  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
-                                          InlineLimit : Threshold),
-    InsertLifetime(InsertLifetime) {}
+    : CallGraphSCCPass(ID),
+      InlineThreshold(InlineLimit.getNumOccurrences() > 0 ? InlineLimit
+                                                          : Threshold),
+      InsertLifetime(InsertLifetime) {}
 
 /// For this class, we declare that we require and preserve the call graph.
 /// If the derived class implements this method, it should
 /// always explicitly call the implementation here.
 void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
   AU.addRequired<AssumptionCacheTracker>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
   CallGraphSCCPass::getAnalysisUsage(AU);
 }
 
@@ -85,39 +88,6 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
 typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
 InlinedArrayAllocasTy;
 
-/// \brief If the inlined function had a higher stack protection level than the
-/// calling function, then bump up the caller's stack protection level.
-static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
-  // If upgrading the SSP attribute, clear out the old SSP Attributes first.
-  // Having multiple SSP attributes doesn't actually hurt, but it adds useless
-  // clutter to the IR.
-  AttrBuilder B;
-  B.addAttribute(Attribute::StackProtect)
-    .addAttribute(Attribute::StackProtectStrong)
-    .addAttribute(Attribute::StackProtectReq);
-  AttributeSet OldSSPAttr = AttributeSet::get(Caller->getContext(),
-                                              AttributeSet::FunctionIndex,
-                                              B);
-
-  if (Callee->hasFnAttribute(Attribute::SafeStack)) {
-    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
-    Caller->addFnAttr(Attribute::SafeStack);
-  } else if (Callee->hasFnAttribute(Attribute::StackProtectReq) &&
-             !Caller->hasFnAttribute(Attribute::SafeStack)) {
-    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
-    Caller->addFnAttr(Attribute::StackProtectReq);
-  } else if (Callee->hasFnAttribute(Attribute::StackProtectStrong) &&
-             !Caller->hasFnAttribute(Attribute::SafeStack) &&
-             !Caller->hasFnAttribute(Attribute::StackProtectReq)) {
-    Caller->removeAttributes(AttributeSet::FunctionIndex, OldSSPAttr);
-    Caller->addFnAttr(Attribute::StackProtectStrong);
-  } else if (Callee->hasFnAttribute(Attribute::StackProtect) &&
-             !Caller->hasFnAttribute(Attribute::SafeStack) &&
-             !Caller->hasFnAttribute(Attribute::StackProtectReq) &&
-             !Caller->hasFnAttribute(Attribute::StackProtectStrong))
-    Caller->addFnAttr(Attribute::StackProtect);
-}
-
 /// If it is possible to inline the specified call site,
 /// do so and update the CallGraph for this operation.
 ///
@@ -126,18 +96,26 @@ static void AdjustCallerSSPLevel(Function *Caller, Function *Callee) {
 /// available from other functions inlined into the caller.  If we are able to
 /// inline this call site we attempt to reuse already available allocas or add
 /// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
                                  InlinedArrayAllocasTy &InlinedArrayAllocas,
                                  int InlineHistory, bool InsertLifetime) {
   Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
 
+  // We need to manually construct BasicAA directly in order to disable
+  // its use of other function analyses.
+  BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
+
+  // Construct our own AA results for this function. We do this manually to
+  // work around the limitations of the legacy pass manager.
+  AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
+
   // Try to inline the function.  Get the list of static allocas that were
   // inlined.
-  if (!InlineFunction(CS, IFI, InsertLifetime))
+  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
     return false;
 
-  AdjustCallerSSPLevel(Caller, Callee);
+  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
 
   // Look at all of the allocas that we inlined through this call site.  If we
   // have already inlined other allocas through other calls into this function,
@@ -219,6 +197,14 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
       DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
                    << *AvailableAlloca << '\n');
       
+      // Move affected dbg.declare calls immediately after the new alloca to
+      // avoid the situation when a dbg.declare preceeds its alloca.
+      if (auto *L = LocalAsMetadata::getIfExists(AI))
+        if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
+          for (User *U : MDV->users())
+            if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
+              DDI->moveBefore(AvailableAlloca->getNextNode());
+
       AI->replaceAllUsesWith(AvailableAlloca);
 
       if (Align1 != Align2) {
@@ -258,39 +244,64 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
 }
 
 unsigned Inliner::getInlineThreshold(CallSite CS) const {
-  int thres = InlineThreshold; // -inline-threshold or else selected by
-                               // overall opt level
+  int Threshold = InlineThreshold; // -inline-threshold or else selected by
+                                   // overall opt level
 
   // If -inline-threshold is not given, listen to the optsize attribute when it
   // would decrease the threshold.
   Function *Caller = CS.getCaller();
   bool OptSize = Caller && !Caller->isDeclaration() &&
+                 // FIXME: Use Function::optForSize().
                  Caller->hasFnAttribute(Attribute::OptimizeForSize);
   if (!(InlineLimit.getNumOccurrences() > 0) && OptSize &&
-      OptSizeThreshold < thres)
-    thres = OptSizeThreshold;
+      OptSizeThreshold < Threshold)
+    Threshold = OptSizeThreshold;
 
-  // Listen to the inlinehint attribute when it would increase the threshold
-  // and the caller does not need to minimize its size.
   Function *Callee = CS.getCalledFunction();
-  bool InlineHint = Callee && !Callee->isDeclaration() &&
-                    Callee->hasFnAttribute(Attribute::InlineHint);
-  if (InlineHint && HintThreshold > thres &&
-      !Caller->hasFnAttribute(Attribute::MinSize))
-    thres = HintThreshold;
+  if (!Callee || Callee->isDeclaration())
+    return Threshold;
 
-  // Listen to the cold attribute when it would decrease the threshold.
-  bool ColdCallee = Callee && !Callee->isDeclaration() &&
-                    Callee->hasFnAttribute(Attribute::Cold);
+  // If profile information is available, use that to adjust threshold of hot
+  // and cold functions.
+  // FIXME: The heuristic used below for determining hotness and coldness are
+  // based on preliminary SPEC tuning and may not be optimal. Replace this with
+  // a well-tuned heuristic based on *callsite* hotness and not callee hotness.
+  uint64_t FunctionCount = 0, MaxFunctionCount = 0;
+  bool HasPGOCounts = false;
+  if (Callee->getEntryCount() &&
+      Callee->getParent()->getMaximumFunctionCount()) {
+    HasPGOCounts = true;
+    FunctionCount = Callee->getEntryCount().getValue();
+    MaxFunctionCount =
+        Callee->getParent()->getMaximumFunctionCount().getValue();
+  }
+
+  // Listen to the inlinehint attribute or profile based hotness information
+  // when it would increase the threshold and the caller does not need to
+  // minimize its size.
+  bool InlineHint =
+      Callee->hasFnAttribute(Attribute::InlineHint) ||
+      (HasPGOCounts &&
+       FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount));
+  if (InlineHint && HintThreshold > Threshold &&
+      !Caller->hasFnAttribute(Attribute::MinSize))
+    Threshold = HintThreshold;
+
+  // Listen to the cold attribute or profile based coldness information
+  // when it would decrease the threshold.
+  bool ColdCallee =
+      Callee->hasFnAttribute(Attribute::Cold) ||
+      (HasPGOCounts &&
+       FunctionCount <= (uint64_t)(0.01 * (double)MaxFunctionCount));
   // Command line argument for InlineLimit will override the default
   // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
   // do not use the default cold threshold even if it is smaller.
   if ((InlineLimit.getNumOccurrences() == 0 ||
        ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
-      ColdThreshold < thres)
-    thres = ColdThreshold;
+      ColdThreshold < Threshold)
+    Threshold = ColdThreshold;
 
-  return thres;
+  return Threshold;
 }
 
 static void emitAnalysis(CallSite CS, const Twine &Msg) {
@@ -430,10 +441,8 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
 
 bool Inliner::runOnSCC(CallGraphSCC &SCC) {
   CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-  AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
-  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
-  const TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
-  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  ACT = &getAnalysis<AssumptionCacheTracker>();
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   SmallPtrSet<Function*, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
@@ -469,8 +478,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         // If this is a direct call to an external function, we can never inline
         // it.  If it is an indirect call, inlining may resolve it to be a
         // direct call, so we keep it.
-        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
-          continue;
+        if (Function *Callee = CS.getCalledFunction())
+          if (Callee->isDeclaration())
+            continue;
         
         CallSites.push_back(std::make_pair(CS, -1));
       }
@@ -492,7 +502,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
 
   
   InlinedArrayAllocasTy InlinedArrayAllocas;
-  InlineFunctionInfo InlineInfo(&CG, AA, ACT);
+  InlineFunctionInfo InlineInfo(&CG, ACT);
 
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
@@ -513,7 +523,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
       // just delete the call instead of trying to inline it, regardless of
       // size.  This happens because IPSCCP propagates the result out of the
       // call and then we're left with the dead call.
-      if (isInstructionTriviallyDead(CS.getInstruction(), TLI)) {
+      if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
         DEBUG(dbgs() << "    -> Deleting dead call: "
                      << *CS.getInstruction() << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
@@ -550,7 +560,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
         }
 
         // Attempt to inline the function.
-        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+        if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
                                   InlineHistoryID, InsertLifetime)) {
           emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
                                        Twine(Callee->getName() +
@@ -647,8 +657,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
 
   // Scan for all of the functions, looking for ones that should now be removed
   // from the program.  Insert the dead ones in the FunctionsToRemove set.
-  for (auto I : CG) {
-    CallGraphNode *CGN = I.second;
+  for (const auto &I : CG) {
+    CallGraphNode *CGN = I.second.get();
     Function *F = CGN->getFunction();
     if (!F || F->isDeclaration())
       continue;
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 7950163f757d..21bb5d000bc7 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -60,6 +60,10 @@ namespace {
     explicit InternalizePass();
     explicit InternalizePass(ArrayRef<const char *> ExportList);
     void LoadFile(const char *Filename);
+    bool maybeInternalize(GlobalValue &GV,
+                          const std::set<const Comdat *> &ExternalComdats);
+    void checkComdatVisibility(GlobalValue &GV,
+                               std::set<const Comdat *> &ExternalComdats);
     bool runOnModule(Module &M) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -105,40 +109,85 @@ void InternalizePass::LoadFile(const char *Filename) {
   }
 }
 
-static bool shouldInternalize(const GlobalValue &GV,
-                              const std::set<std::string> &ExternalNames) {
+static bool isExternallyVisible(const GlobalValue &GV,
+                                const std::set<std::string> &ExternalNames) {
   // Function must be defined here
   if (GV.isDeclaration())
-    return false;
+    return true;
 
   // Available externally is really just a "declaration with a body".
   if (GV.hasAvailableExternallyLinkage())
-    return false;
+    return true;
 
   // Assume that dllexported symbols are referenced elsewhere
   if (GV.hasDLLExportStorageClass())
-    return false;
-
-  // Already has internal linkage
-  if (GV.hasLocalLinkage())
-    return false;
+    return true;
 
   // Marked to keep external?
-  if (ExternalNames.count(GV.getName()))
-    return false;
+  if (!GV.hasLocalLinkage() && ExternalNames.count(GV.getName()))
+    return true;
 
+  return false;
+}
+
+// Internalize GV if it is possible to do so, i.e. it is not externally visible
+// and is not a member of an externally visible comdat.
+bool InternalizePass::maybeInternalize(
+    GlobalValue &GV, const std::set<const Comdat *> &ExternalComdats) {
+  if (Comdat *C = GV.getComdat()) {
+    if (ExternalComdats.count(C))
+      return false;
+
+    // If a comdat is not externally visible we can drop it.
+    if (auto GO = dyn_cast<GlobalObject>(&GV))
+      GO->setComdat(nullptr);
+
+    if (GV.hasLocalLinkage())
+      return false;
+  } else {
+    if (GV.hasLocalLinkage())
+      return false;
+
+    if (isExternallyVisible(GV, ExternalNames))
+      return false;
+  }
+
+  GV.setVisibility(GlobalValue::DefaultVisibility);
+  GV.setLinkage(GlobalValue::InternalLinkage);
   return true;
 }
 
+// If GV is part of a comdat and is externally visible, keep track of its
+// comdat so that we don't internalize any of its members.
+void InternalizePass::checkComdatVisibility(
+    GlobalValue &GV, std::set<const Comdat *> &ExternalComdats) {
+  Comdat *C = GV.getComdat();
+  if (!C)
+    return;
+
+  if (isExternallyVisible(GV, ExternalNames))
+    ExternalComdats.insert(C);
+}
+
 bool InternalizePass::runOnModule(Module &M) {
   CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
   CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
   CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
-  bool Changed = false;
 
   SmallPtrSet<GlobalValue *, 8> Used;
   collectUsedGlobalVariables(M, Used, false);
 
+  // Collect comdat visiblity information for the module.
+  std::set<const Comdat *> ExternalComdats;
+  if (!M.getComdatSymbolTable().empty()) {
+    for (Function &F : M)
+      checkComdatVisibility(F, ExternalComdats);
+    for (GlobalVariable &GV : M.globals())
+      checkComdatVisibility(GV, ExternalComdats);
+    for (GlobalAlias &GA : M.aliases())
+      checkComdatVisibility(GA, ExternalComdats);
+  }
+
   // We must assume that globals in llvm.used have a reference that not even
   // the linker can see, so we don't internalize them.
   // For llvm.compiler.used the situation is a bit fuzzy. The assembler and
@@ -153,20 +202,16 @@ bool InternalizePass::runOnModule(Module &M) {
   }
 
   // Mark all functions not in the api as internal.
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+  for (Function &I : M) {
+    if (!maybeInternalize(I, ExternalComdats))
       continue;
 
-    I->setVisibility(GlobalValue::DefaultVisibility);
-    I->setLinkage(GlobalValue::InternalLinkage);
-
     if (ExternalNode)
       // Remove a callgraph edge from the external node to this function.
-      ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+      ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
 
-    Changed = true;
     ++NumFunctions;
-    DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+    DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
   }
 
   // Never internalize the llvm.used symbol.  It is used to implement
@@ -191,12 +236,9 @@ bool InternalizePass::runOnModule(Module &M) {
   // internal as well.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+    if (!maybeInternalize(*I, ExternalComdats))
       continue;
 
-    I->setVisibility(GlobalValue::DefaultVisibility);
-    I->setLinkage(GlobalValue::InternalLinkage);
-    Changed = true;
     ++NumGlobals;
     DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
   }
@@ -204,17 +246,20 @@ bool InternalizePass::runOnModule(Module &M) {
   // Mark all aliases that are not in the api as internal as well.
   for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
        I != E; ++I) {
-    if (!shouldInternalize(*I, ExternalNames))
+    if (!maybeInternalize(*I, ExternalComdats))
       continue;
 
-    I->setVisibility(GlobalValue::DefaultVisibility);
-    I->setLinkage(GlobalValue::InternalLinkage);
-    Changed = true;
     ++NumAliases;
     DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
   }
 
-  return Changed;
+  // We do not keep track of whether this pass changed the module because
+  // it adds unnecessary complexity:
+  // 1) This pass will generally be near the start of the pass pipeline, so
+  //    there will be no analyses to invalidate.
+  // 2) This pass will most likely end up changing the module and it isn't worth
+  //    worrying about optimizing the case where the module is unchanged.
+  return true;
 }
 
 ModulePass *llvm::createInternalizePass() { return new InternalizePass(); }
diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt
index 575dce4b33df..b5410f5f7757 100644
--- a/lib/Transforms/IPO/LLVMBuild.txt
+++ b/lib/Transforms/IPO/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = IPO
 parent = Transforms
 library_name = ipo
-required_libraries = Analysis Core IPA InstCombine Scalar Support TransformUtils Vectorize
+required_libraries = Analysis Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 41334ca5b429..8e4ad642ddd5 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -43,12 +43,13 @@ namespace {
         initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
       }
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+    bool runOnLoop(Loop *L, LPPassManager &) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequiredID(BreakCriticalEdgesID);
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequired<DominatorTreeWrapperPass>();
+      AU.addRequired<LoopInfoWrapperPass>();
     }
   };
 }
@@ -79,7 +80,7 @@ INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
 //
 Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
 
-bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
   if (skipOptnoneFunction(L))
     return false;
 
@@ -92,6 +93,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
 
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   bool Changed = false;
 
   // If there is more than one top-level loop in this function, extract all of
@@ -120,14 +122,14 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
   }
 
   if (ShouldExtractLoop) {
-    // We must omit landing pads. Landing pads must accompany the invoke
+    // We must omit EH pads. EH pads must accompany the invoke
     // instruction. But this would result in a loop in the extracted
     // function. An infinite cycle occurs when it tries to extract that loop as
     // well.
     SmallVector<BasicBlock*, 8> ExitBlocks;
     L->getExitBlocks(ExitBlocks);
     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-      if (ExitBlocks[i]->isLandingPad()) {
+      if (ExitBlocks[i]->isEHPad()) {
         ShouldExtractLoop = false;
         break;
       }
@@ -141,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
       Changed = true;
       // After extraction, the loop is replaced by a function call, so
       // we shouldn't try to run any more loop passes on it.
-      LPM.deleteLoopFromQueue(L);
+      LI.updateUnloop(L);
     }
     ++NumExtracted;
   }
@@ -259,7 +261,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
     // Figure out which index the basic block is in its function.
     Function::iterator BBI = MF->begin();
     std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
-    TranslatedBlocksToNotExtract.insert(BBI);
+    TranslatedBlocksToNotExtract.insert(&*BBI);
   }
 
   while (!BlocksToNotExtractByName.empty()) {
@@ -278,7 +280,7 @@ bool BlockExtractorPass::runOnModule(Module &M) {
         BasicBlock &BB = *BI;
         if (BB.getName() != BlockName) continue;
 
-        TranslatedBlocksToNotExtract.insert(BI);
+        TranslatedBlocksToNotExtract.insert(&*BI);
       }
     }
 
@@ -291,8 +293,8 @@ bool BlockExtractorPass::runOnModule(Module &M) {
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
     SplitLandingPadPreds(&*F);
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
-      if (!TranslatedBlocksToNotExtract.count(BB))
-        BlocksToExtract.push_back(BB);
+      if (!TranslatedBlocksToNotExtract.count(&*BB))
+        BlocksToExtract.push_back(&*BB);
   }
 
   for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i) {
diff --git a/lib/Transforms/IPO/LowerBitSets.cpp b/lib/Transforms/IPO/LowerBitSets.cpp
index c6795c623eff..7b515745c312 100644
--- a/lib/Transforms/IPO/LowerBitSets.cpp
+++ b/lib/Transforms/IPO/LowerBitSets.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
@@ -26,6 +28,8 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
@@ -59,9 +63,9 @@ bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
 
 bool BitSetInfo::containsValue(
     const DataLayout &DL,
-    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout, Value *V,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout, Value *V,
     uint64_t COffset) const {
-  if (auto GV = dyn_cast<GlobalVariable>(V)) {
+  if (auto GV = dyn_cast<GlobalObject>(V)) {
     auto I = GlobalLayout.find(GV);
     if (I == GlobalLayout.end())
       return false;
@@ -90,6 +94,21 @@ bool BitSetInfo::containsValue(
   return false;
 }
 
+void BitSetInfo::print(raw_ostream &OS) const {
+  OS << "offset " << ByteOffset << " size " << BitSize << " align "
+     << (1 << AlignLog2);
+
+  if (isAllOnes()) {
+    OS << " all-ones\n";
+    return;
+  }
+
+  OS << " { ";
+  for (uint64_t B : Bits)
+    OS << B << ' ';
+  OS << "}\n";
+}
+
 BitSetInfo BitSetBuilder::build() {
   if (Min > Max)
     Min = 0;
@@ -193,34 +212,48 @@ struct LowerBitSets : public ModulePass {
   Module *M;
 
   bool LinkerSubsectionsViaSymbols;
+  Triple::ArchType Arch;
+  Triple::ObjectFormatType ObjectFormat;
   IntegerType *Int1Ty;
   IntegerType *Int8Ty;
   IntegerType *Int32Ty;
   Type *Int32PtrTy;
   IntegerType *Int64Ty;
-  Type *IntPtrTy;
+  IntegerType *IntPtrTy;
 
   // The llvm.bitsets named metadata.
   NamedMDNode *BitSetNM;
 
-  // Mapping from bitset mdstrings to the call sites that test them.
-  DenseMap<MDString *, std::vector<CallInst *>> BitSetTestCallSites;
+  // Mapping from bitset identifiers to the call sites that test them.
+  DenseMap<Metadata *, std::vector<CallInst *>> BitSetTestCallSites;
 
   std::vector<ByteArrayInfo> ByteArrayInfos;
 
   BitSetInfo
-  buildBitSet(MDString *BitSet,
-              const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
+  buildBitSet(Metadata *BitSet,
+              const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
   ByteArrayInfo *createByteArray(BitSetInfo &BSI);
   void allocateByteArrays();
   Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
                           Value *BitOffset);
+  void lowerBitSetCalls(ArrayRef<Metadata *> BitSets,
+                        Constant *CombinedGlobalAddr,
+                        const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
   Value *
   lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
-                  GlobalVariable *CombinedGlobal,
-                  const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout);
-  void buildBitSetsFromGlobals(const std::vector<MDString *> &BitSets,
-                               const std::vector<GlobalVariable *> &Globals);
+                  Constant *CombinedGlobal,
+                  const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+  void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> BitSets,
+                                       ArrayRef<GlobalVariable *> Globals);
+  unsigned getJumpTableEntrySize();
+  Type *getJumpTableEntryType();
+  Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
+                                 unsigned Distance);
+  void verifyBitSetMDNode(MDNode *Op);
+  void buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+                                 ArrayRef<Function *> Functions);
+  void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> BitSets,
+                                   ArrayRef<GlobalObject *> Globals);
   bool buildBitSets();
   bool eraseBitSetMetadata();
 
@@ -228,7 +261,7 @@ struct LowerBitSets : public ModulePass {
   bool runOnModule(Module &M) override;
 };
 
-} // namespace
+} // anonymous namespace
 
 INITIALIZE_PASS_BEGIN(LowerBitSets, "lowerbitsets",
                 "Lower bitset metadata", false, false)
@@ -244,6 +277,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
 
   Triple TargetTriple(M->getTargetTriple());
   LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+  Arch = TargetTriple.getArch();
+  ObjectFormat = TargetTriple.getObjectFormat();
 
   Int1Ty = Type::getInt1Ty(M->getContext());
   Int8Ty = Type::getInt8Ty(M->getContext());
@@ -262,8 +297,8 @@ bool LowerBitSets::doInitialization(Module &Mod) {
 /// Build a bit set for BitSet using the object layouts in
 /// GlobalLayout.
 BitSetInfo LowerBitSets::buildBitSet(
-    MDString *BitSet,
-    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+    Metadata *BitSet,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
   BitSetBuilder BSB;
 
   // Compute the byte offset of each element of this bitset.
@@ -271,8 +306,11 @@ BitSetInfo LowerBitSets::buildBitSet(
     for (MDNode *Op : BitSetNM->operands()) {
       if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
         continue;
-      auto OpGlobal = dyn_cast<GlobalVariable>(
-          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      Constant *OpConst =
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue();
+      if (auto GA = dyn_cast<GlobalAlias>(OpConst))
+        OpConst = GA->getAliasee();
+      auto OpGlobal = dyn_cast<GlobalObject>(OpConst);
       if (!OpGlobal)
         continue;
       uint64_t Offset =
@@ -360,9 +398,8 @@ void LowerBitSets::allocateByteArrays() {
     if (LinkerSubsectionsViaSymbols) {
       BAI->ByteArray->replaceAllUsesWith(GEP);
     } else {
-      GlobalAlias *Alias =
-          GlobalAlias::create(PointerType::getUnqual(Int8Ty),
-                              GlobalValue::PrivateLinkage, "bits", GEP, M);
+      GlobalAlias *Alias = GlobalAlias::create(
+          Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
       BAI->ByteArray->replaceAllUsesWith(Alias);
     }
     BAI->ByteArray->eraseFromParent();
@@ -404,7 +441,7 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
       // Each use of the byte array uses a different alias. This makes the
       // backend less likely to reuse previously computed byte array addresses,
       // improving the security of the CFI mechanism based on this pass.
-      ByteArray = GlobalAlias::create(BAI->ByteArray->getType(),
+      ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
                                       GlobalValue::PrivateLinkage, "bits_use",
                                       ByteArray, M);
     }
@@ -421,17 +458,16 @@ Value *LowerBitSets::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
 /// replace the call with.
 Value *LowerBitSets::lowerBitSetCall(
     CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
-    GlobalVariable *CombinedGlobal,
-    const DenseMap<GlobalVariable *, uint64_t> &GlobalLayout) {
+    Constant *CombinedGlobalIntAddr,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
   Value *Ptr = CI->getArgOperand(0);
   const DataLayout &DL = M->getDataLayout();
 
   if (BSI.containsValue(DL, GlobalLayout, Ptr))
-    return ConstantInt::getTrue(CombinedGlobal->getParent()->getContext());
+    return ConstantInt::getTrue(M->getContext());
 
-  Constant *GlobalAsInt = ConstantExpr::getPtrToInt(CombinedGlobal, IntPtrTy);
   Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
-      GlobalAsInt, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
+      CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
 
   BasicBlock *InitialBB = CI->getParent();
 
@@ -490,18 +526,19 @@ Value *LowerBitSets::lowerBitSetCall(
 
 /// Given a disjoint set of bitsets and globals, layout the globals, build the
 /// bit sets and lower the llvm.bitset.test calls.
-void LowerBitSets::buildBitSetsFromGlobals(
-    const std::vector<MDString *> &BitSets,
-    const std::vector<GlobalVariable *> &Globals) {
+void LowerBitSets::buildBitSetsFromGlobalVariables(
+    ArrayRef<Metadata *> BitSets, ArrayRef<GlobalVariable *> Globals) {
   // Build a new global with the combined contents of the referenced globals.
+  // This global is a struct whose even-indexed elements contain the original
+  // contents of the referenced globals and whose odd-indexed elements contain
+  // any padding required to align the next element to the next power of 2.
   std::vector<Constant *> GlobalInits;
   const DataLayout &DL = M->getDataLayout();
   for (GlobalVariable *G : Globals) {
     GlobalInits.push_back(G->getInitializer());
-    uint64_t InitSize = DL.getTypeAllocSize(G->getInitializer()->getType());
+    uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
 
-    // Compute the amount of padding required to align the next element to the
-    // next power of 2.
+    // Compute the amount of padding required.
     uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
 
     // Cap at 128 was found experimentally to have a good data/instruction
@@ -515,34 +552,20 @@ void LowerBitSets::buildBitSetsFromGlobals(
   if (!GlobalInits.empty())
     GlobalInits.pop_back();
   Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
-  auto CombinedGlobal =
+  auto *CombinedGlobal =
       new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
                          GlobalValue::PrivateLinkage, NewInit);
 
-  const StructLayout *CombinedGlobalLayout =
-      DL.getStructLayout(cast<StructType>(NewInit->getType()));
+  StructType *NewTy = cast<StructType>(NewInit->getType());
+  const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
 
   // Compute the offsets of the original globals within the new global.
-  DenseMap<GlobalVariable *, uint64_t> GlobalLayout;
+  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
   for (unsigned I = 0; I != Globals.size(); ++I)
     // Multiply by 2 to account for padding elements.
     GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
 
-  // For each bitset in this disjoint set...
-  for (MDString *BS : BitSets) {
-    // Build the bitset.
-    BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
-
-    ByteArrayInfo *BAI = 0;
-
-    // Lower each call to llvm.bitset.test for this bitset.
-    for (CallInst *CI : BitSetTestCallSites[BS]) {
-      ++NumBitSetCallsLowered;
-      Value *Lowered = lowerBitSetCall(CI, BSI, BAI, CombinedGlobal, GlobalLayout);
-      CI->replaceAllUsesWith(Lowered);
-      CI->eraseFromParent();
-    }
-  }
+  lowerBitSetCalls(BitSets, CombinedGlobal, GlobalLayout);
 
   // Build aliases pointing to offsets into the combined global for each
   // global from which we built the combined global, and replace references
@@ -556,9 +579,11 @@ void LowerBitSets::buildBitSetsFromGlobals(
     if (LinkerSubsectionsViaSymbols) {
       Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
     } else {
-      GlobalAlias *GAlias =
-          GlobalAlias::create(Globals[I]->getType(), Globals[I]->getLinkage(),
-                              "", CombinedGlobalElemPtr, M);
+      assert(Globals[I]->getType()->getAddressSpace() == 0);
+      GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
+                                                Globals[I]->getLinkage(), "",
+                                                CombinedGlobalElemPtr, M);
+      GAlias->setVisibility(Globals[I]->getVisibility());
       GAlias->takeName(Globals[I]);
       Globals[I]->replaceAllUsesWith(GAlias);
     }
@@ -566,6 +591,331 @@ void LowerBitSets::buildBitSetsFromGlobals(
   }
 }
 
+void LowerBitSets::lowerBitSetCalls(
+    ArrayRef<Metadata *> BitSets, Constant *CombinedGlobalAddr,
+    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+  Constant *CombinedGlobalIntAddr =
+      ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+
+  // For each bitset in this disjoint set...
+  for (Metadata *BS : BitSets) {
+    // Build the bitset.
+    BitSetInfo BSI = buildBitSet(BS, GlobalLayout);
+    DEBUG({
+      if (auto BSS = dyn_cast<MDString>(BS))
+        dbgs() << BSS->getString() << ": ";
+      else
+        dbgs() << "<unnamed>: ";
+      BSI.print(dbgs());
+    });
+
+    ByteArrayInfo *BAI = nullptr;
+
+    // Lower each call to llvm.bitset.test for this bitset.
+    for (CallInst *CI : BitSetTestCallSites[BS]) {
+      ++NumBitSetCallsLowered;
+      Value *Lowered =
+          lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+      CI->replaceAllUsesWith(Lowered);
+      CI->eraseFromParent();
+    }
+  }
+}
+
+void LowerBitSets::verifyBitSetMDNode(MDNode *Op) {
+  if (Op->getNumOperands() != 3)
+    report_fatal_error(
+        "All operands of llvm.bitsets metadata must have 3 elements");
+  if (!Op->getOperand(1))
+    return;
+
+  auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
+  if (!OpConstMD)
+    report_fatal_error("Bit set element must be a constant");
+  auto OpGlobal = dyn_cast<GlobalObject>(OpConstMD->getValue());
+  if (!OpGlobal)
+    return;
+
+  if (OpGlobal->isThreadLocal())
+    report_fatal_error("Bit set element may not be thread-local");
+  if (OpGlobal->hasSection())
+    report_fatal_error("Bit set element may not have an explicit section");
+
+  if (isa<GlobalVariable>(OpGlobal) && OpGlobal->isDeclarationForLinker())
+    report_fatal_error("Bit set global var element must be a definition");
+
+  auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
+  if (!OffsetConstMD)
+    report_fatal_error("Bit set element offset must be a constant");
+  auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
+  if (!OffsetInt)
+    report_fatal_error("Bit set element offset must be an integer constant");
+}
+
+static const unsigned kX86JumpTableEntrySize = 8;
+
+unsigned LowerBitSets::getJumpTableEntrySize() {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  return kX86JumpTableEntrySize;
+}
+
+// Create a constant representing a jump table entry for the target. This
+// consists of an instruction sequence containing a relative branch to Dest. The
+// constant will be laid out at address Src+(Len*Distance) where Len is the
+// target-specific jump table entry size.
+Constant *LowerBitSets::createJumpTableEntry(GlobalObject *Src, Function *Dest,
+                                             unsigned Distance) {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  const unsigned kJmpPCRel32Code = 0xe9;
+  const unsigned kInt3Code = 0xcc;
+
+  ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+
+  // Build a constant representing the displacement between the constant's
+  // address and Dest. This will resolve to a PC32 relocation referring to Dest.
+  Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
+  Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
+  Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
+  ConstantInt *DispOffset =
+      ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
+  Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
+  OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+
+  ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+
+  Constant *Fields[] = {
+      Jmp, OffsetedDisp, Int3, Int3, Int3,
+  };
+  return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+}
+
+Type *LowerBitSets::getJumpTableEntryType() {
+  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+    report_fatal_error("Unsupported architecture for jump tables");
+
+  return StructType::get(M->getContext(),
+                         {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
+                         /*Packed=*/true);
+}
+
+/// Given a disjoint set of bitsets and functions, build a jump table for the
+/// functions, build the bit sets and lower the llvm.bitset.test calls.
+void LowerBitSets::buildBitSetsFromFunctions(ArrayRef<Metadata *> BitSets,
+                                             ArrayRef<Function *> Functions) {
+  // Unlike the global bitset builder, the function bitset builder cannot
+  // re-arrange functions in a particular order and base its calculations on the
+  // layout of the functions' entry points, as we have no idea how large a
+  // particular function will end up being (the size could even depend on what
+  // this pass does!) Instead, we build a jump table, which is a block of code
+  // consisting of one branch instruction for each of the functions in the bit
+  // set that branches to the target function, and redirect any taken function
+  // addresses to the corresponding jump table entry. In the object file's
+  // symbol table, the symbols for the target functions also refer to the jump
+  // table entries, so that addresses taken outside the module will pass any
+  // verification done inside the module.
+  //
+  // In more concrete terms, suppose we have three functions f, g, h which are
+  // members of a single bitset, and a function foo that returns their
+  // addresses:
+  //
+  // f:
+  // mov 0, %eax
+  // ret
+  //
+  // g:
+  // mov 1, %eax
+  // ret
+  //
+  // h:
+  // mov 2, %eax
+  // ret
+  //
+  // foo:
+  // mov f, %eax
+  // mov g, %edx
+  // mov h, %ecx
+  // ret
+  //
+  // To create a jump table for these functions, we instruct the LLVM code
+  // generator to output a jump table in the .text section. This is done by
+  // representing the instructions in the jump table as an LLVM constant and
+  // placing them in a global variable in the .text section. The end result will
+  // (conceptually) look like this:
+  //
+  // f:
+  // jmp .Ltmp0 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // g:
+  // jmp .Ltmp1 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // h:
+  // jmp .Ltmp2 ; 5 bytes
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  // int3       ; 1 byte
+  //
+  // .Ltmp0:
+  // mov 0, %eax
+  // ret
+  //
+  // .Ltmp1:
+  // mov 1, %eax
+  // ret
+  //
+  // .Ltmp2:
+  // mov 2, %eax
+  // ret
+  //
+  // foo:
+  // mov f, %eax
+  // mov g, %edx
+  // mov h, %ecx
+  // ret
+  //
+  // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
+  // normal case the check can be carried out using the same kind of simple
+  // arithmetic that we normally use for globals.
+
+  assert(!Functions.empty());
+
+  // Build a simple layout based on the regular layout of jump tables.
+  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+  unsigned EntrySize = getJumpTableEntrySize();
+  for (unsigned I = 0; I != Functions.size(); ++I)
+    GlobalLayout[Functions[I]] = I * EntrySize;
+
+  // Create a constant to hold the jump table.
+  ArrayType *JumpTableType =
+      ArrayType::get(getJumpTableEntryType(), Functions.size());
+  auto JumpTable = new GlobalVariable(*M, JumpTableType,
+                                      /*isConstant=*/true,
+                                      GlobalValue::PrivateLinkage, nullptr);
+  JumpTable->setSection(ObjectFormat == Triple::MachO
+                            ? "__TEXT,__text,regular,pure_instructions"
+                            : ".text");
+  lowerBitSetCalls(BitSets, JumpTable, GlobalLayout);
+
+  // Build aliases pointing to offsets into the jump table, and replace
+  // references to the original functions with references to the aliases.
+  for (unsigned I = 0; I != Functions.size(); ++I) {
+    Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
+        ConstantExpr::getGetElementPtr(
+            JumpTableType, JumpTable,
+            ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
+                                 ConstantInt::get(IntPtrTy, I)}),
+        Functions[I]->getType());
+    if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
+      Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+    } else {
+      assert(Functions[I]->getType()->getAddressSpace() == 0);
+      GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
+                                                Functions[I]->getLinkage(), "",
+                                                CombinedGlobalElemPtr, M);
+      GAlias->setVisibility(Functions[I]->getVisibility());
+      GAlias->takeName(Functions[I]);
+      Functions[I]->replaceAllUsesWith(GAlias);
+    }
+    if (!Functions[I]->isDeclarationForLinker())
+      Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+  }
+
+  // Build and set the jump table's initializer.
+  std::vector<Constant *> JumpTableEntries;
+  for (unsigned I = 0; I != Functions.size(); ++I)
+    JumpTableEntries.push_back(
+        createJumpTableEntry(JumpTable, Functions[I], I));
+  JumpTable->setInitializer(
+      ConstantArray::get(JumpTableType, JumpTableEntries));
+}
+
+void LowerBitSets::buildBitSetsFromDisjointSet(
+    ArrayRef<Metadata *> BitSets, ArrayRef<GlobalObject *> Globals) {
+  llvm::DenseMap<Metadata *, uint64_t> BitSetIndices;
+  llvm::DenseMap<GlobalObject *, uint64_t> GlobalIndices;
+  for (unsigned I = 0; I != BitSets.size(); ++I)
+    BitSetIndices[BitSets[I]] = I;
+  for (unsigned I = 0; I != Globals.size(); ++I)
+    GlobalIndices[Globals[I]] = I;
+
+  // For each bitset, build a set of indices that refer to globals referenced by
+  // the bitset.
+  std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
+  if (BitSetNM) {
+    for (MDNode *Op : BitSetNM->operands()) {
+      // Op = { bitset name, global, offset }
+      if (!Op->getOperand(1))
+        continue;
+      auto I = BitSetIndices.find(Op->getOperand(0));
+      if (I == BitSetIndices.end())
+        continue;
+
+      auto OpGlobal = dyn_cast<GlobalObject>(
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
+      if (!OpGlobal)
+        continue;
+      BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
+    }
+  }
+
+  // Order the sets of indices by size. The GlobalLayoutBuilder works best
+  // when given small index sets first.
+  std::stable_sort(
+      BitSetMembers.begin(), BitSetMembers.end(),
+      [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
+        return O1.size() < O2.size();
+      });
+
+  // Create a GlobalLayoutBuilder and provide it with index sets as layout
+  // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
+  // close together as possible.
+  GlobalLayoutBuilder GLB(Globals.size());
+  for (auto &&MemSet : BitSetMembers)
+    GLB.addFragment(MemSet);
+
+  // Build the bitsets from this disjoint set.
+  if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+    // Build a vector of global variables with the computed layout.
+    std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+    auto OGI = OrderedGVs.begin();
+    for (auto &&F : GLB.Fragments) {
+      for (auto &&Offset : F) {
+        auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+        if (!GV)
+          report_fatal_error(
+              "Bit set may not contain both global variables and functions");
+        *OGI++ = GV;
+      }
+    }
+
+    buildBitSetsFromGlobalVariables(BitSets, OrderedGVs);
+  } else {
+    // Build a vector of functions with the computed layout.
+    std::vector<Function *> OrderedFns(Globals.size());
+    auto OFI = OrderedFns.begin();
+    for (auto &&F : GLB.Fragments) {
+      for (auto &&Offset : F) {
+        auto Fn = dyn_cast<Function>(Globals[Offset]);
+        if (!Fn)
+          report_fatal_error(
+              "Bit set may not contain both global variables and functions");
+        *OFI++ = Fn;
+      }
+    }
+
+    buildBitSetsFromFunctions(BitSets, OrderedFns);
+  }
+}
+
 /// Lower all bit sets in this module.
 bool LowerBitSets::buildBitSets() {
   Function *BitSetTestFunc =
@@ -576,24 +926,36 @@ bool LowerBitSets::buildBitSets() {
   // Equivalence class set containing bitsets and the globals they reference.
   // This is used to partition the set of bitsets in the module into disjoint
   // sets.
-  typedef EquivalenceClasses<PointerUnion<GlobalVariable *, MDString *>>
+  typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
       GlobalClassesTy;
   GlobalClassesTy GlobalClasses;
 
+  // Verify the bitset metadata and build a mapping from bitset identifiers to
+  // their last observed index in BitSetNM. This will used later to
+  // deterministically order the list of bitset identifiers.
+  llvm::DenseMap<Metadata *, unsigned> BitSetIdIndices;
+  if (BitSetNM) {
+    for (unsigned I = 0, E = BitSetNM->getNumOperands(); I != E; ++I) {
+      MDNode *Op = BitSetNM->getOperand(I);
+      verifyBitSetMDNode(Op);
+      BitSetIdIndices[Op->getOperand(0)] = I;
+    }
+  }
+
   for (const Use &U : BitSetTestFunc->uses()) {
     auto CI = cast<CallInst>(U.getUser());
 
     auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
-    if (!BitSetMDVal || !isa<MDString>(BitSetMDVal->getMetadata()))
+    if (!BitSetMDVal)
       report_fatal_error(
-          "Second argument of llvm.bitset.test must be metadata string");
-    auto BitSet = cast<MDString>(BitSetMDVal->getMetadata());
+          "Second argument of llvm.bitset.test must be metadata");
+    auto BitSet = BitSetMDVal->getMetadata();
 
     // Add the call site to the list of call sites for this bit set. We also use
     // BitSetTestCallSites to keep track of whether we have seen this bit set
     // before. If we have, we don't need to re-add the referenced globals to the
     // equivalence class.
-    std::pair<DenseMap<MDString *, std::vector<CallInst *>>::iterator,
+    std::pair<DenseMap<Metadata *, std::vector<CallInst *>>::iterator,
               bool> Ins =
         BitSetTestCallSites.insert(
             std::make_pair(BitSet, std::vector<CallInst *>()));
@@ -608,31 +970,16 @@ bool LowerBitSets::buildBitSets() {
     if (!BitSetNM)
       continue;
 
-    // Verify the bitset metadata and add the referenced globals to the bitset's
-    // equivalence class.
+    // Add the referenced globals to the bitset's equivalence class.
     for (MDNode *Op : BitSetNM->operands()) {
-      if (Op->getNumOperands() != 3)
-        report_fatal_error(
-            "All operands of llvm.bitsets metadata must have 3 elements");
-
       if (Op->getOperand(0) != BitSet || !Op->getOperand(1))
         continue;
 
-      auto OpConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(1));
-      if (!OpConstMD)
-        report_fatal_error("Bit set element must be a constant");
-      auto OpGlobal = dyn_cast<GlobalVariable>(OpConstMD->getValue());
+      auto OpGlobal = dyn_cast<GlobalObject>(
+          cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
       if (!OpGlobal)
         continue;
 
-      auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
-      if (!OffsetConstMD)
-        report_fatal_error("Bit set element offset must be a constant");
-      auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
-      if (!OffsetInt)
-        report_fatal_error(
-            "Bit set element offset must be an integer constant");
-
       CurSet = GlobalClasses.unionSets(
           CurSet, GlobalClasses.findLeader(GlobalClasses.insert(OpGlobal)));
     }
@@ -641,79 +988,51 @@ bool LowerBitSets::buildBitSets() {
   if (GlobalClasses.empty())
     return false;
 
-  // For each disjoint set we found...
+  // Build a list of disjoint sets ordered by their maximum BitSetNM index
+  // for determinism.
+  std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
   for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
                                  E = GlobalClasses.end();
        I != E; ++I) {
     if (!I->isLeader()) continue;
-
     ++NumBitSetDisjointSets;
 
-    // Build the list of bitsets and referenced globals in this disjoint set.
-    std::vector<MDString *> BitSets;
-    std::vector<GlobalVariable *> Globals;
-    llvm::DenseMap<MDString *, uint64_t> BitSetIndices;
-    llvm::DenseMap<GlobalVariable *, uint64_t> GlobalIndices;
+    unsigned MaxIndex = 0;
     for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
          MI != GlobalClasses.member_end(); ++MI) {
-      if ((*MI).is<MDString *>()) {
-        BitSetIndices[MI->get<MDString *>()] = BitSets.size();
-        BitSets.push_back(MI->get<MDString *>());
-      } else {
-        GlobalIndices[MI->get<GlobalVariable *>()] = Globals.size();
-        Globals.push_back(MI->get<GlobalVariable *>());
-      }
+      if ((*MI).is<Metadata *>())
+        MaxIndex = std::max(MaxIndex, BitSetIdIndices[MI->get<Metadata *>()]);
+    }
+    Sets.emplace_back(I, MaxIndex);
+  }
+  std::sort(Sets.begin(), Sets.end(),
+            [](const std::pair<GlobalClassesTy::iterator, unsigned> &S1,
+               const std::pair<GlobalClassesTy::iterator, unsigned> &S2) {
+              return S1.second < S2.second;
+            });
+
+  // For each disjoint set we found...
+  for (const auto &S : Sets) {
+    // Build the list of bitsets in this disjoint set.
+    std::vector<Metadata *> BitSets;
+    std::vector<GlobalObject *> Globals;
+    for (GlobalClassesTy::member_iterator MI =
+             GlobalClasses.member_begin(S.first);
+         MI != GlobalClasses.member_end(); ++MI) {
+      if ((*MI).is<Metadata *>())
+        BitSets.push_back(MI->get<Metadata *>());
+      else
+        Globals.push_back(MI->get<GlobalObject *>());
     }
 
-    // For each bitset, build a set of indices that refer to globals referenced
-    // by the bitset.
-    std::vector<std::set<uint64_t>> BitSetMembers(BitSets.size());
-    if (BitSetNM) {
-      for (MDNode *Op : BitSetNM->operands()) {
-        // Op = { bitset name, global, offset }
-        if (!Op->getOperand(1))
-          continue;
-        auto I = BitSetIndices.find(cast<MDString>(Op->getOperand(0)));
-        if (I == BitSetIndices.end())
-          continue;
-
-        auto OpGlobal = dyn_cast<GlobalVariable>(
-            cast<ConstantAsMetadata>(Op->getOperand(1))->getValue());
-        if (!OpGlobal)
-          continue;
-        BitSetMembers[I->second].insert(GlobalIndices[OpGlobal]);
-      }
-    }
-
-    // Order the sets of indices by size. The GlobalLayoutBuilder works best
-    // when given small index sets first.
-    std::stable_sort(
-        BitSetMembers.begin(), BitSetMembers.end(),
-        [](const std::set<uint64_t> &O1, const std::set<uint64_t> &O2) {
-          return O1.size() < O2.size();
-        });
-
-    // Create a GlobalLayoutBuilder and provide it with index sets as layout
-    // fragments. The GlobalLayoutBuilder tries to lay out members of fragments
-    // as close together as possible.
-    GlobalLayoutBuilder GLB(Globals.size());
-    for (auto &&MemSet : BitSetMembers)
-      GLB.addFragment(MemSet);
-
-    // Build a vector of globals with the computed layout.
-    std::vector<GlobalVariable *> OrderedGlobals(Globals.size());
-    auto OGI = OrderedGlobals.begin();
-    for (auto &&F : GLB.Fragments)
-      for (auto &&Offset : F)
-        *OGI++ = Globals[Offset];
-
-    // Order bitsets by name for determinism.
-    std::sort(BitSets.begin(), BitSets.end(), [](MDString *S1, MDString *S2) {
-      return S1->getString() < S2->getString();
+    // Order bitsets by BitSetNM index for determinism. This ordering is stable
+    // as there is a one-to-one mapping between metadata and indices.
+    std::sort(BitSets.begin(), BitSets.end(), [&](Metadata *M1, Metadata *M2) {
+      return BitSetIdIndices[M1] < BitSetIdIndices[M2];
     });
 
-    // Build the bitsets from this disjoint set.
-    buildBitSetsFromGlobals(BitSets, OrderedGlobals);
+    // Lower the bitsets in this disjoint set.
+    buildBitSetsFromDisjointSet(BitSets, Globals);
   }
 
   allocateByteArrays();
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 2e3519eac6a5..8a209a18c540 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -27,6 +27,14 @@
 // -- We define Function* container class with custom "operator<" (FunctionPtr).
 // -- "FunctionPtr" instances are stored in std::set collection, so every
 //    std::set::insert operation will give you result in log(N) time.
+// 
+// As an optimization, a hash of the function structure is calculated first, and
+// two functions are only compared if they have the same hash. This hash is
+// cheap to compute, and has the property that if function F == G according to
+// the comparison function, then hash(F) == hash(G). This consistency property
+// is critical to ensuring all possible merging opportunities are exploited.
+// Collisions in the hash affect the speed of the pass but not the correctness
+// or determinism of the resulting transformation.
 //
 // When a match is found the functions are folded. If both functions are
 // overridable, we move the functionality into a new internal function and
@@ -87,6 +95,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -97,12 +106,14 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "mergefunc"
@@ -121,21 +132,64 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
 
 namespace {
 
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+  struct Config : ValueMapConfig<GlobalValue*> {
+    enum { FollowRAUW = false };
+  };
+  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+  // occurs, the mapping does not change. Tracking changes is unnecessary, and
+  // also problematic for weak symbols (which may be overwritten).
+  typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+  ValueNumberMap GlobalNumbers;
+  // The next unused serial number to assign to a global.
+  uint64_t NextNumber;
+  public:
+    GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
+    uint64_t getNumber(GlobalValue* Global) {
+      ValueNumberMap::iterator MapIter;
+      bool Inserted;
+      std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+      if (Inserted)
+        NextNumber++;
+      return MapIter->second;
+    }
+    void clear() {
+      GlobalNumbers.clear();
+    }
+};
+
 /// FunctionComparator - Compares two functions to determine whether or not
 /// they will generate machine code with the same behaviour. DataLayout is
 /// used if available. The comparator always fails conservatively (erring on the
 /// side of claiming that two functions are different).
 class FunctionComparator {
 public:
-  FunctionComparator(const Function *F1, const Function *F2)
-      : FnL(F1), FnR(F2) {}
+  FunctionComparator(const Function *F1, const Function *F2,
+                     GlobalNumberState* GN)
+      : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
 
   /// Test whether the two functions have equivalent behaviour.
   int compare();
+  /// Hash a function. Equivalent functions will have the same hash, and unequal
+  /// functions will have different hashes with high probability.
+  typedef uint64_t FunctionHash;
+  static FunctionHash functionHash(Function &);
 
 private:
   /// Test whether two basic blocks have equivalent behaviour.
-  int compare(const BasicBlock *BBL, const BasicBlock *BBR);
+  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR);
 
   /// Constants comparison.
   /// Its analog to lexicographical comparison between hypothetical numbers
@@ -241,6 +295,10 @@ private:
   /// If these properties are equal - compare their contents.
   int cmpConstants(const Constant *L, const Constant *R);
 
+  /// Compares two global values by number. Uses the GlobalNumbersState to
+  /// identify the same gobals across function calls.
+  int cmpGlobalValues(GlobalValue *L, GlobalValue *R);
+
   /// Assign or look up previously assigned numbers for the two values, and
   /// return whether the numbers are equal. Numbers are assigned in the order
   /// visited.
@@ -320,8 +378,9 @@ private:
   ///
   /// 1. If types are of different kind (different type IDs).
   ///    Return result of type IDs comparison, treating them as numbers.
-  /// 2. If types are vectors or integers, compare Type* values as numbers.
-  /// 3. Types has same ID, so check whether they belongs to the next group:
+  /// 2. If types are integers, check that they have the same width. If they
+  /// are vectors, check that they have the same count and subtype.
+  /// 3. Types have the same ID, so check whether they are one of:
   /// * Void
   /// * Float
   /// * Double
@@ -330,8 +389,7 @@ private:
   /// * PPC_FP128
   /// * Label
   /// * Metadata
-  /// If so - return 0, yes - we can treat these types as equal only because
-  /// their IDs are same.
+  /// We can treat these types as equal whenever their IDs are same.
   /// 4. If Left and Right are pointers, return result of address space
   /// comparison (numbers comparison). We can treat pointer types of same
   /// address space as equal.
@@ -343,11 +401,13 @@ private:
   int cmpTypes(Type *TyL, Type *TyR) const;
 
   int cmpNumbers(uint64_t L, uint64_t R) const;
-
   int cmpAPInts(const APInt &L, const APInt &R) const;
   int cmpAPFloats(const APFloat &L, const APFloat &R) const;
-  int cmpStrings(StringRef L, StringRef R) const;
+  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+  int cmpMem(StringRef L, StringRef R) const;
   int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+  int cmpRangeMetadata(const MDNode* L, const MDNode* R) const;
+  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
 
   // The two functions undergoing comparison.
   const Function *FnL, *FnR;
@@ -386,30 +446,30 @@ private:
   /// could be operands from further BBs we didn't scan yet.
   /// So it's impossible to use dominance properties in general.
   DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+  // The global state we will use
+  GlobalNumberState* GlobalNumbers;
 };
 
 class FunctionNode {
   mutable AssertingVH<Function> F;
-
+  FunctionComparator::FunctionHash Hash;
 public:
-  FunctionNode(Function *F) : F(F) {}
+  // Note the hash is recalculated potentially multiple times, but it is cheap.
+  FunctionNode(Function *F)
+    : F(F), Hash(FunctionComparator::functionHash(*F))  {}
   Function *getFunc() const { return F; }
+  FunctionComparator::FunctionHash getHash() const { return Hash; }
 
   /// Replace the reference to the function F by the function G, assuming their
   /// implementations are equal.
   void replaceBy(Function *G) const {
-    assert(!(*this < FunctionNode(G)) && !(FunctionNode(G) < *this) &&
-           "The two functions must be equal");
-
     F = G;
   }
 
-  void release() { F = 0; }
-  bool operator<(const FunctionNode &RHS) const {
-    return (FunctionComparator(F, RHS.getFunc()).compare()) == -1;
-  }
+  void release() { F = nullptr; }
 };
-}
+} // end anonymous namespace
 
 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
   if (L < R) return -1;
@@ -426,13 +486,25 @@ int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
 }
 
 int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
-  if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
-                           (uint64_t)&R.getSemantics()))
+  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+  // then by value interpreted as a bitstring (aka APInt).
+  const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+  if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+                           APFloat::semanticsPrecision(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+                           APFloat::semanticsMaxExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+                           APFloat::semanticsMinExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+                           APFloat::semanticsSizeInBits(SR)))
     return Res;
   return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
 }
 
-int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
   // Prevent heavy comparison, compare sizes first.
   if (int Res = cmpNumbers(L.size(), R.size()))
     return Res;
@@ -466,6 +538,59 @@ int FunctionComparator::cmpAttrs(const AttributeSet L,
   return 0;
 }
 
+int FunctionComparator::cmpRangeMetadata(const MDNode* L,
+                                         const MDNode* R) const {
+  if (L == R)
+    return 0;
+  if (!L)
+    return -1;
+  if (!R)
+    return 1;
+  // Range metadata is a sequence of numbers. Make sure they are the same
+  // sequence. 
+  // TODO: Note that as this is metadata, it is possible to drop and/or merge
+  // this data when considering functions to merge. Thus this comparison would
+  // return 0 (i.e. equivalent), but merging would become more complicated
+  // because the ranges would need to be unioned. It is not likely that
+  // functions differ ONLY in this metadata if they are actually the same
+  // function semantically.
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+  for (size_t I = 0; I < L->getNumOperands(); ++I) {
+    ConstantInt* LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+    ConstantInt* RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+      return Res;
+  }
+  return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+                                                const Instruction *R) const {
+  ImmutableCallSite LCS(L);
+  ImmutableCallSite RCS(R);
+
+  assert(LCS && RCS && "Must be calls or invokes!");
+  assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+  if (int Res =
+          cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+    return Res;
+
+  for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+    auto OBL = LCS.getOperandBundleAt(i);
+    auto OBR = RCS.getOperandBundleAt(i);
+
+    if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+      return Res;
+
+    if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+      return Res;
+  }
+
+  return 0;
+}
+
 /// Constants comparison:
 /// 1. Check whether type of L constant could be losslessly bitcasted to R
 /// type.
@@ -500,9 +625,9 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
     unsigned TyLWidth = 0;
     unsigned TyRWidth = 0;
 
-    if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+    if (auto *VecTyL = dyn_cast<VectorType>(TyL))
       TyLWidth = VecTyL->getBitWidth();
-    if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+    if (auto *VecTyR = dyn_cast<VectorType>(TyR))
       TyRWidth = VecTyR->getBitWidth();
 
     if (TyLWidth != TyRWidth)
@@ -538,11 +663,29 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
   if (!L->isNullValue() && R->isNullValue())
     return -1;
 
+  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+  if (GlobalValueL && GlobalValueR) {
+    return cmpGlobalValues(GlobalValueL, GlobalValueR);
+  }
+
   if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
     return Res;
 
+  if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+    const auto *SeqR = cast<ConstantDataSequential>(R);
+    // This handles ConstantDataArray and ConstantDataVector. Note that we
+    // compare the two raw data arrays, which might differ depending on the host
+    // endianness. This isn't a problem though, because the endiness of a module
+    // will affect the order of the constants, but this order is the same
+    // for a given input module and host platform.
+    return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+  }
+
   switch (L->getValueID()) {
-  case Value::UndefValueVal: return TypesRes;
+  case Value::UndefValueVal:
+  case Value::ConstantTokenNoneVal:
+    return TypesRes;
   case Value::ConstantIntVal: {
     const APInt &LInt = cast<ConstantInt>(L)->getValue();
     const APInt &RInt = cast<ConstantInt>(R)->getValue();
@@ -609,19 +752,55 @@ int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
     }
     return 0;
   }
-  case Value::FunctionVal:
-  case Value::GlobalVariableVal:
-  case Value::GlobalAliasVal:
-  default: // Unknown constant, cast L and R pointers to numbers and compare.
-    return cmpNumbers((uint64_t)L, (uint64_t)R);
+  case Value::BlockAddressVal: {
+    const BlockAddress *LBA = cast<BlockAddress>(L);
+    const BlockAddress *RBA = cast<BlockAddress>(R);
+    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+      return Res;
+    if (LBA->getFunction() == RBA->getFunction()) {
+      // They are BBs in the same function. Order by which comes first in the
+      // BB order of the function. This order is deterministic.
+      Function* F = LBA->getFunction();
+      BasicBlock *LBB = LBA->getBasicBlock();
+      BasicBlock *RBB = RBA->getBasicBlock();
+      if (LBB == RBB)
+        return 0;
+      for(BasicBlock &BB : F->getBasicBlockList()) {
+        if (&BB == LBB) {
+          assert(&BB != RBB);
+          return -1;
+        }
+        if (&BB == RBB)
+          return 1;
+      }
+      llvm_unreachable("Basic Block Address does not point to a basic block in "
+                       "its function.");
+      return -1;
+    } else {
+      // cmpValues said the functions are the same. So because they aren't
+      // literally the same pointer, they must respectively be the left and
+      // right functions.
+      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+      // cmpValues will tell us if these are equivalent BasicBlocks, in the
+      // context of their respective functions.
+      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+    }
   }
+  default: // Unknown constant, abort.
+    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+    llvm_unreachable("Constant ValueID not recognized.");
+    return -1;
+  }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue* R) {
+  return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
 }
 
 /// cmpType - compares two types,
 /// defines total ordering among the types set.
 /// See method declaration comments for more details.
 int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
-
   PointerType *PTyL = dyn_cast<PointerType>(TyL);
   PointerType *PTyR = dyn_cast<PointerType>(TyR);
 
@@ -642,10 +821,15 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
     llvm_unreachable("Unknown type!");
     // Fall through in Release mode.
   case Type::IntegerTyID:
-  case Type::VectorTyID:
-    // TyL == TyR would have returned true earlier.
-    return cmpNumbers((uint64_t)TyL, (uint64_t)TyR);
-
+    return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+                      cast<IntegerType>(TyR)->getBitWidth());
+  case Type::VectorTyID: {
+    VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
+    if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
+      return Res;
+    return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
+  }
+  // TyL == TyR would have returned true earlier, because types are uniqued.
   case Type::VoidTyID:
   case Type::FloatTyID:
   case Type::DoubleTyID:
@@ -654,6 +838,7 @@ int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
   case Type::PPC_FP128TyID:
   case Type::LabelTyID:
   case Type::MetadataTyID:
+  case Type::TokenTyID:
     return 0;
 
   case Type::PointerTyID: {
@@ -759,8 +944,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res =
             cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
       return Res;
-    return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range),
-                      (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
   }
   if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
     if (int Res =
@@ -783,20 +968,24 @@ int FunctionComparator::cmpOperations(const Instruction *L,
     if (int Res =
             cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
       return Res;
-    return cmpNumbers(
-        (uint64_t)CI->getMetadata(LLVMContext::MD_range),
-        (uint64_t)cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+    if (int Res = cmpOperandBundlesSchema(CI, R))
+      return Res;
+    return cmpRangeMetadata(
+        CI->getMetadata(LLVMContext::MD_range),
+        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
   }
-  if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
-    if (int Res = cmpNumbers(CI->getCallingConv(),
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+    if (int Res = cmpNumbers(II->getCallingConv(),
                              cast<InvokeInst>(R)->getCallingConv()))
       return Res;
     if (int Res =
-            cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
       return Res;
-    return cmpNumbers(
-        (uint64_t)CI->getMetadata(LLVMContext::MD_range),
-        (uint64_t)cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+    if (int Res = cmpOperandBundlesSchema(II, R))
+      return Res;
+    return cmpRangeMetadata(
+        II->getMetadata(LLVMContext::MD_range),
+        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
   }
   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
     ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -876,9 +1065,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
   if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
       GEPR->accumulateConstantOffset(DL, OffsetR))
     return cmpAPInts(OffsetL, OffsetR);
-
-  if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
-                           (uint64_t)GEPR->getPointerOperand()->getType()))
+  if (int Res = cmpTypes(GEPL->getSourceElementType(),
+                         GEPR->getSourceElementType()))
     return Res;
 
   if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
@@ -892,6 +1080,28 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
   return 0;
 }
 
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+                                     const InlineAsm *R) const {
+  // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+  // the same, otherwise compare the fields.
+  if (L == R)
+    return 0;
+  if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+    return Res;
+  if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+    return Res;
+  if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+    return Res;
+  if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+    return Res;
+  if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+    return Res;
+  if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+    return Res;
+  llvm_unreachable("InlineAsm blocks were not uniqued.");
+  return 0;
+}
+
 /// Compare two values used by the two functions under pair-wise comparison. If
 /// this is the first time the values are seen, they're added to the mapping so
 /// that we will detect mismatches on next use.
@@ -926,7 +1136,7 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
   const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
 
   if (InlineAsmL && InlineAsmR)
-    return cmpNumbers((uint64_t)L, (uint64_t)R);
+    return cmpInlineAsm(InlineAsmL, InlineAsmR);
   if (InlineAsmL)
     return 1;
   if (InlineAsmR)
@@ -938,12 +1148,13 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) {
   return cmpNumbers(LeftSN.first->second, RightSN.first->second);
 }
 // Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+                                       const BasicBlock *BBR) {
   BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
   BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
 
   do {
-    if (int Res = cmpValues(InstL, InstR))
+    if (int Res = cmpValues(&*InstL, &*InstR))
       return Res;
 
     const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
@@ -961,7 +1172,7 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
       if (int Res = cmpGEPs(GEPL, GEPR))
         return Res;
     } else {
-      if (int Res = cmpOperations(InstL, InstR))
+      if (int Res = cmpOperations(&*InstL, &*InstR))
         return Res;
       assert(InstL->getNumOperands() == InstR->getNumOperands());
 
@@ -970,11 +1181,8 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
         Value *OpR = InstR->getOperand(i);
         if (int Res = cmpValues(OpL, OpR))
           return Res;
-        if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID()))
-          return Res;
-        // TODO: Already checked in cmpOperation
-        if (int Res = cmpTypes(OpL->getType(), OpR->getType()))
-          return Res;
+        // cmpValues should ensure this is true.
+        assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
       }
     }
 
@@ -990,7 +1198,6 @@ int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) {
 
 // Test whether the two functions have equivalent behaviour.
 int FunctionComparator::compare() {
-
   sn_mapL.clear();
   sn_mapR.clear();
 
@@ -1001,7 +1208,7 @@ int FunctionComparator::compare() {
     return Res;
 
   if (FnL->hasGC()) {
-    if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC()))
+    if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
       return Res;
   }
 
@@ -1009,7 +1216,7 @@ int FunctionComparator::compare() {
     return Res;
 
   if (FnL->hasSection()) {
-    if (int Res = cmpStrings(FnL->getSection(), FnR->getSection()))
+    if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
       return Res;
   }
 
@@ -1033,7 +1240,7 @@ int FunctionComparator::compare() {
                                     ArgRI = FnR->arg_begin(),
                                     ArgLE = FnL->arg_end();
        ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
-    if (cmpValues(ArgLI, ArgRI) != 0)
+    if (cmpValues(&*ArgLI, &*ArgRI) != 0)
       llvm_unreachable("Arguments repeat!");
   }
 
@@ -1055,7 +1262,7 @@ int FunctionComparator::compare() {
     if (int Res = cmpValues(BBL, BBR))
       return Res;
 
-    if (int Res = compare(BBL, BBR))
+    if (int Res = cmpBasicBlocks(BBL, BBR))
       return Res;
 
     const TerminatorInst *TermL = BBL->getTerminator();
@@ -1073,6 +1280,68 @@ int FunctionComparator::compare() {
   return 0;
 }
 
+namespace {
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+  uint64_t Hash;
+public:
+  // Initialize to random constant, so the state isn't zero.
+  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+  void add(uint64_t V) {
+     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+  }
+  // No finishing is required, because the entire hash value is used.
+  uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+  HashAccumulator64 H;
+  H.add(F.isVarArg());
+  H.add(F.arg_size());
+  
+  SmallVector<const BasicBlock *, 8> BBs;
+  SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+  // accumulating the hash of the function "structure." (BB and opcode sequence)
+  BBs.push_back(&F.getEntryBlock());
+  VisitedBBs.insert(BBs[0]);
+  while (!BBs.empty()) {
+    const BasicBlock *BB = BBs.pop_back_val();
+    // This random value acts as a block header, as otherwise the partition of
+    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+    H.add(45798); 
+    for (auto &Inst : *BB) {
+      H.add(Inst.getOpcode());
+    }
+    const TerminatorInst *Term = BB->getTerminator();
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+        continue;
+      BBs.push_back(Term->getSuccessor(i));
+    }
+  }
+  return H.getHash();
+}
+
+
 namespace {
 
 /// MergeFunctions finds functions which will generate identical machine code,
@@ -1084,14 +1353,31 @@ class MergeFunctions : public ModulePass {
 public:
   static char ID;
   MergeFunctions()
-    : ModulePass(ID), HasGlobalAliases(false) {
+    : ModulePass(ID), FnTree(FunctionNodeCmp(&GlobalNumbers)), FNodesInTree(),
+      HasGlobalAliases(false) {
     initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnModule(Module &M) override;
 
 private:
-  typedef std::set<FunctionNode> FnTreeType;
+  // The function comparison operator is provided here so that FunctionNodes do
+  // not need to become larger with another pointer.
+  class FunctionNodeCmp {
+    GlobalNumberState* GlobalNumbers;
+  public:
+    FunctionNodeCmp(GlobalNumberState* GN) : GlobalNumbers(GN) {}
+    bool operator()(const FunctionNode &LHS, const FunctionNode &RHS) const {
+      // Order first by hashes, then full function comparison.
+      if (LHS.getHash() != RHS.getHash())
+        return LHS.getHash() < RHS.getHash();
+      FunctionComparator FCmp(LHS.getFunc(), RHS.getFunc(), GlobalNumbers);
+      return FCmp.compare() == -1;
+    }
+  };
+  typedef std::set<FunctionNode, FunctionNodeCmp> FnTreeType;
+
+  GlobalNumberState GlobalNumbers;
 
   /// A work queue of functions that may have been modified and should be
   /// analyzed again.
@@ -1133,17 +1419,23 @@ private:
   void writeAlias(Function *F, Function *G);
 
   /// Replace function F with function G in the function tree.
-  void replaceFunctionInTree(FnTreeType::iterator &IterToF, Function *G);
+  void replaceFunctionInTree(const FunctionNode &FN, Function *G);
 
   /// The set of all distinct functions. Use the insert() and remove() methods
-  /// to modify it.
+  /// to modify it. The map allows efficient lookup and deferring of Functions.
   FnTreeType FnTree;
+  // Map functions to the iterators of the FunctionNode which contains them
+  // in the FnTree. This must be updated carefully whenever the FnTree is
+  // modified, i.e. in insert(), remove(), and replaceFunctionInTree(), to avoid
+  // dangling iterators into FnTree. The invariant that preserves this is that
+  // there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
+  ValueMap<Function*, FnTreeType::iterator> FNodesInTree;
 
   /// Whether or not the target supports global aliases.
   bool HasGlobalAliases;
 };
 
-}  // end anonymous namespace
+} // end anonymous namespace
 
 char MergeFunctions::ID = 0;
 INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
@@ -1166,8 +1458,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
       for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) {
         Function *F1 = cast<Function>(*I);
         Function *F2 = cast<Function>(*J);
-        int Res1 = FunctionComparator(F1, F2).compare();
-        int Res2 = FunctionComparator(F2, F1).compare();
+        int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare();
+        int Res2 = FunctionComparator(F2, F1, &GlobalNumbers).compare();
 
         // If F1 <= F2, then F2 >= F1, otherwise report failure.
         if (Res1 != -Res2) {
@@ -1188,8 +1480,8 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
             continue;
 
           Function *F3 = cast<Function>(*K);
-          int Res3 = FunctionComparator(F1, F3).compare();
-          int Res4 = FunctionComparator(F2, F3).compare();
+          int Res3 = FunctionComparator(F1, F3, &GlobalNumbers).compare();
+          int Res4 = FunctionComparator(F2, F3, &GlobalNumbers).compare();
 
           bool Transitive = true;
 
@@ -1227,11 +1519,33 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {
 bool MergeFunctions::runOnModule(Module &M) {
   bool Changed = false;
 
-  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
-      Deferred.push_back(WeakVH(I));
+  // All functions in the module, ordered by hash. Functions with a unique
+  // hash value are easily eliminated.
+  std::vector<std::pair<FunctionComparator::FunctionHash, Function *>>
+    HashedFuncs;
+  for (Function &Func : M) {
+    if (!Func.isDeclaration() && !Func.hasAvailableExternallyLinkage()) {
+      HashedFuncs.push_back({FunctionComparator::functionHash(Func), &Func});
+    } 
   }
 
+  std::stable_sort(
+      HashedFuncs.begin(), HashedFuncs.end(),
+      [](const std::pair<FunctionComparator::FunctionHash, Function *> &a,
+         const std::pair<FunctionComparator::FunctionHash, Function *> &b) {
+        return a.first < b.first;
+      });
+
+  auto S = HashedFuncs.begin();
+  for (auto I = HashedFuncs.begin(), IE = HashedFuncs.end(); I != IE; ++I) {
+    // If the hash value matches the previous value or the next one, we must
+    // consider merging it. Otherwise it is dropped and never considered again.
+    if ((I != S && std::prev(I)->first == I->first) ||
+        (std::next(I) != IE && std::next(I)->first == I->first) ) {
+      Deferred.push_back(WeakVH(I->second));
+    }
+  }
+  
   do {
     std::vector<WeakVH> Worklist;
     Deferred.swap(Worklist);
@@ -1270,6 +1584,7 @@ bool MergeFunctions::runOnModule(Module &M) {
   } while (!Deferred.empty());
 
   FnTree.clear();
+  GlobalNumbers.clear();
 
   return Changed;
 }
@@ -1282,6 +1597,32 @@ void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
     ++UI;
     CallSite CS(U->getUser());
     if (CS && CS.isCallee(U)) {
+      // Transfer the called function's attributes to the call site. Due to the
+      // bitcast we will 'lose' ABI changing attributes because the 'called
+      // function' is no longer a Function* but the bitcast. Code that looks up
+      // the attributes from the called function will fail.
+
+      // FIXME: This is not actually true, at least not anymore. The callsite
+      // will always have the same ABI affecting attributes as the callee,
+      // because otherwise the original input has UB. Note that Old and New
+      // always have matching ABI, so no attributes need to be changed.
+      // Transferring other attributes may help other optimizations, but that
+      // should be done uniformly and not in this ad-hoc way.
+      auto &Context = New->getContext();
+      auto NewFuncAttrs = New->getAttributes();
+      auto CallSiteAttrs = CS.getAttributes();
+
+      CallSiteAttrs = CallSiteAttrs.addAttributes(
+          Context, AttributeSet::ReturnIndex, NewFuncAttrs.getRetAttributes());
+
+      for (unsigned argIdx = 0; argIdx < CS.arg_size(); argIdx++) {
+        AttributeSet Attrs = NewFuncAttrs.getParamAttributes(argIdx);
+        if (Attrs.getNumSlots())
+          CallSiteAttrs = CallSiteAttrs.addAttributes(Context, argIdx, Attrs);
+      }
+
+      CS.setAttributes(CallSiteAttrs);
+
       remove(CS.getInstruction()->getParent()->getParent());
       U->set(BitcastNew);
     }
@@ -1352,15 +1693,15 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
   SmallVector<Value *, 16> Args;
   unsigned i = 0;
   FunctionType *FFTy = F->getFunctionType();
-  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
-       AI != AE; ++AI) {
-    Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i)));
+  for (Argument & AI : NewG->args()) {
+    Args.push_back(createCast(Builder, &AI, FFTy->getParamType(i)));
     ++i;
   }
 
   CallInst *CI = Builder.CreateCall(F, Args);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
+  CI->setAttributes(F->getAttributes());
   if (NewG->getReturnType()->isVoidTy()) {
     Builder.CreateRetVoid();
   } else {
@@ -1379,8 +1720,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
 
 // Replace G with an alias to F and delete G.
 void MergeFunctions::writeAlias(Function *F, Function *G) {
-  PointerType *PTy = G->getType();
-  auto *GA = GlobalAlias::create(PTy, G->getLinkage(), "", F);
+  auto *GA = GlobalAlias::create(G->getLinkage(), "", F);
   F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
   GA->takeName(G);
   GA->setVisibility(G->getVisibility());
@@ -1425,19 +1765,24 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
   ++NumFunctionsMerged;
 }
 
-/// Replace function F for function G in the map.
-void MergeFunctions::replaceFunctionInTree(FnTreeType::iterator &IterToF,
+/// Replace function F by function G.
+void MergeFunctions::replaceFunctionInTree(const FunctionNode &FN,
                                            Function *G) {
-  Function *F = IterToF->getFunc();
-
-  // A total order is already guaranteed otherwise because we process strong
-  // functions before weak functions.
-  assert(((F->mayBeOverridden() && G->mayBeOverridden()) ||
-          (!F->mayBeOverridden() && !G->mayBeOverridden())) &&
-         "Only change functions if both are strong or both are weak");
-  (void)F;
-
-  IterToF->replaceBy(G);
+  Function *F = FN.getFunc();
+  assert(FunctionComparator(F, G, &GlobalNumbers).compare() == 0 &&
+         "The two functions must be equal");
+  
+  auto I = FNodesInTree.find(F);
+  assert(I != FNodesInTree.end() && "F should be in FNodesInTree");
+  assert(FNodesInTree.count(G) == 0 && "FNodesInTree should not contain G");
+  
+  FnTreeType::iterator IterToFNInFnTree = I->second;
+  assert(&(*IterToFNInFnTree) == &FN && "F should map to FN in FNodesInTree.");
+  // Remove F -> FN and insert G -> FN
+  FNodesInTree.erase(I);
+  FNodesInTree.insert({G, IterToFNInFnTree});
+  // Replace F with G in FN, which is stored inside the FnTree.
+  FN.replaceBy(G);
 }
 
 // Insert a ComparableFunction into the FnTree, or merge it away if equal to one
@@ -1447,6 +1792,8 @@ bool MergeFunctions::insert(Function *NewFunction) {
       FnTree.insert(FunctionNode(NewFunction));
 
   if (Result.second) {
+    assert(FNodesInTree.count(NewFunction) == 0);
+    FNodesInTree.insert({NewFunction, Result.first});
     DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n');
     return false;
   }
@@ -1476,7 +1823,7 @@ bool MergeFunctions::insert(Function *NewFunction) {
     if (OldF.getFunc()->getName() > NewFunction->getName()) {
       // Swap the two functions.
       Function *F = OldF.getFunc();
-      replaceFunctionInTree(Result.first, NewFunction);
+      replaceFunctionInTree(*Result.first, NewFunction);
       NewFunction = F;
       assert(OldF.getFunc() != F && "Must have swapped the functions.");
     }
@@ -1495,18 +1842,13 @@ bool MergeFunctions::insert(Function *NewFunction) {
 // Remove a function from FnTree. If it was already in FnTree, add
 // it to Deferred so that we'll look at it in the next round.
 void MergeFunctions::remove(Function *F) {
-  // We need to make sure we remove F, not a function "equal" to F per the
-  // function equality comparator.
-  FnTreeType::iterator found = FnTree.find(FunctionNode(F));
-  size_t Erased = 0;
-  if (found != FnTree.end() && found->getFunc() == F) {
-    Erased = 1;
-    FnTree.erase(found);
-  }
-
-  if (Erased) {
-    DEBUG(dbgs() << "Removed " << F->getName()
-                 << " from set and deferred it.\n");
+  auto I = FNodesInTree.find(F);
+  if (I != FNodesInTree.end()) {
+    DEBUG(dbgs() << "Deferred " << F->getName()<< ".\n");
+    FnTree.erase(I->second);
+    // I->second has been invalidated, remove it from the FNodesInTree map to
+    // preserve the invariant.
+    FNodesInTree.erase(I);
     Deferred.emplace_back(F);
   }
 }
@@ -1516,6 +1858,8 @@ void MergeFunctions::remove(Function *F) {
 void MergeFunctions::removeUsers(Value *V) {
   std::vector<Value *> Worklist;
   Worklist.push_back(V);
+  SmallSet<Value*, 8> Visited;
+  Visited.insert(V);
   while (!Worklist.empty()) {
     Value *V = Worklist.back();
     Worklist.pop_back();
@@ -1526,8 +1870,10 @@ void MergeFunctions::removeUsers(Value *V) {
       } else if (isa<GlobalValue>(U)) {
         // do nothing
       } else if (Constant *C = dyn_cast<Constant>(U)) {
-        for (User *UU : C->users())
-          Worklist.push_back(UU);
+        for (User *UU : C->users()) {
+          if (!Visited.insert(UU).second)
+            Worklist.push_back(UU);
+        }
       }
     }
   }
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 4a7cb7ba7d12..0c5c84bbccab 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -50,7 +50,7 @@ ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
 
 Function* PartialInliner::unswitchFunction(Function* F) {
   // First, verify that this function is an unswitching candidate...
-  BasicBlock* entryBlock = F->begin();
+  BasicBlock *entryBlock = &F->front();
   BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
     return nullptr;
@@ -89,18 +89,18 @@ Function* PartialInliner::unswitchFunction(Function* F) {
   // of which will go outside.
   BasicBlock* preReturn = newReturnBlock;
   newReturnBlock = newReturnBlock->splitBasicBlock(
-                                              newReturnBlock->getFirstNonPHI());
+      newReturnBlock->getFirstNonPHI()->getIterator());
   BasicBlock::iterator I = preReturn->begin();
-  BasicBlock::iterator Ins = newReturnBlock->begin();
+  Instruction *Ins = &newReturnBlock->front();
   while (I != preReturn->end()) {
     PHINode* OldPhi = dyn_cast<PHINode>(I);
     if (!OldPhi) break;
-    
-    PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+
+    PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
     OldPhi->replaceAllUsesWith(retPhi);
     Ins = newReturnBlock->getFirstNonPHI();
-    
-    retPhi->addIncoming(I, preReturn);
+
+    retPhi->addIncoming(&*I, preReturn);
     retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
                         newEntryBlock);
     OldPhi->removeIncomingValue(newEntryBlock);
@@ -116,8 +116,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
        FE = duplicateFunction->end(); FI != FE; ++FI)
     if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
         &*FI != newNonReturnBlock)
-      toExtract.push_back(FI);
-      
+      toExtract.push_back(&*FI);
+
   // The CodeExtractor needs a dominator tree.
   DominatorTree DT;
   DT.recalculate(*duplicateFunction);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 909baae92548..9876efa7b235 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -12,19 +12,26 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm-c/Transforms/PassManagerBuilder.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/CFLAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Verifier.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Vectorize.h"
 
@@ -89,11 +96,21 @@ static cl::opt<bool> EnableLoopDistribute(
     "enable-loop-distribute", cl::init(false), cl::Hidden,
     cl::desc("Enable the new, experimental LoopDistribution Pass"));
 
+static cl::opt<bool> EnableNonLTOGlobalsModRef(
+    "enable-non-lto-gmr", cl::init(true), cl::Hidden,
+    cl::desc(
+        "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline."));
+
+static cl::opt<bool> EnableLoopLoadElim(
+    "enable-loop-load-elim", cl::init(false), cl::Hidden,
+    cl::desc("Enable the new, experimental LoopLoadElimination Pass"));
+
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
     LibraryInfo = nullptr;
     Inliner = nullptr;
+    FunctionIndex = nullptr;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
     BBVectorize = RunBBVectorization;
@@ -143,10 +160,9 @@ void PassManagerBuilder::addInitialAliasAnalysisPasses(
   // BasicAliasAnalysis wins if they disagree. This is intended to help
   // support "obvious" type-punning idioms.
   if (UseCFLAA)
-    PM.add(createCFLAliasAnalysisPass());
-  PM.add(createTypeBasedAliasAnalysisPass());
-  PM.add(createScopedNoAliasAAPass());
-  PM.add(createBasicAliasAnalysisPass());
+    PM.add(createCFLAAWrapperPass());
+  PM.add(createTypeBasedAAWrapperPass());
+  PM.add(createScopedNoAliasAAWrapperPass());
 }
 
 void PassManagerBuilder::populateFunctionPassManager(
@@ -172,6 +188,9 @@ void PassManagerBuilder::populateFunctionPassManager(
 
 void PassManagerBuilder::populateModulePassManager(
     legacy::PassManagerBase &MPM) {
+  // Allow forcing function attributes as a debugging and tuning aid.
+  MPM.add(createForceFunctionAttrsLegacyPass());
+
   // If all optimizations are disabled, just run the always-inline pass and,
   // if enabled, the function merging pass.
   if (OptLevel == 0) {
@@ -201,10 +220,15 @@ void PassManagerBuilder::populateModulePassManager(
   addInitialAliasAnalysisPasses(MPM);
 
   if (!DisableUnitAtATime) {
+    // Infer attributes about declarations if possible.
+    MPM.add(createInferFunctionAttrsLegacyPass());
+
     addExtensionsToPM(EP_ModuleOptimizerEarly, MPM);
 
     MPM.add(createIPSCCPPass());              // IP SCCP
     MPM.add(createGlobalOptimizerPass());     // Optimize out global vars
+    // Promote any localized global vars
+    MPM.add(createPromoteMemoryToRegisterPass());
 
     MPM.add(createDeadArgEliminationPass());  // Dead argument elimination
 
@@ -213,6 +237,12 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(createCFGSimplificationPass());   // Clean up after IPCP & DAE
   }
 
+  if (EnableNonLTOGlobalsModRef)
+    // We add a module alias analysis pass here. In part due to bugs in the
+    // analysis infrastructure this "works" in that the analysis stays alive
+    // for the entire SCC pass run below.
+    MPM.add(createGlobalsAAWrapperPass());
+
   // Start of CallGraph SCC passes.
   if (!DisableUnitAtATime)
     MPM.add(createPruneEHPass());             // Remove dead EH info
@@ -245,6 +275,7 @@ void PassManagerBuilder::populateModulePassManager(
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
   MPM.add(createLICMPass());                  // Hoist loop invariants
   MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3));
+  MPM.add(createCFGSimplificationPass());
   MPM.add(createInstructionCombiningPass());
   MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
   MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
@@ -315,9 +346,42 @@ void PassManagerBuilder::populateModulePassManager(
   // we must insert a no-op module pass to reset the pass manager.
   MPM.add(createBarrierNoopPass());
 
+  if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
+    // Remove avail extern fns and globals definitions if we aren't
+    // compiling an object file for later LTO. For LTO we want to preserve
+    // these so they are eligible for inlining at link-time. Note if they
+    // are unreferenced they will be removed by GlobalDCE later, so
+    // this only impacts referenced available externally globals.
+    // Eventually they will be suppressed during codegen, but eliminating
+    // here enables more opportunity for GlobalDCE as it may make
+    // globals referenced by available external functions dead
+    // and saves running remaining passes on the eliminated functions.
+    MPM.add(createEliminateAvailableExternallyPass());
+  }
+
+  if (EnableNonLTOGlobalsModRef)
+    // We add a fresh GlobalsModRef run at this point. This is particularly
+    // useful as the above will have inlined, DCE'ed, and function-attr
+    // propagated everything. We should at this point have a reasonably minimal
+    // and richly annotated call graph. By computing aliasing and mod/ref
+    // information for all local globals here, the late loop passes and notably
+    // the vectorizer will be able to use them to help recognize vectorizable
+    // memory operations.
+    //
+    // Note that this relies on a bug in the pass manager which preserves
+    // a module analysis into a function pass pipeline (and throughout it) so
+    // long as the first function pass doesn't invalidate the module analysis.
+    // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
+    // this to work. Fortunately, it is trivial to preserve AliasAnalysis
+    // (doing nothing preserves it as it is required to be conservatively
+    // correct in the face of IR changes).
+    MPM.add(createGlobalsAAWrapperPass());
+
   if (RunFloat2Int)
     MPM.add(createFloat2IntPass());
 
+  addExtensionsToPM(EP_VectorizerStart, MPM);
+
   // Re-rotate loops in all our loop nests. These may have fallout out of
   // rotated form due to GVN or other transformations, and the vectorizer relies
   // on the rotated form. Disable header duplication at -Oz.
@@ -329,6 +393,12 @@ void PassManagerBuilder::populateModulePassManager(
     MPM.add(createLoopDistributePass());
 
   MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
+
+  // Eliminate loads by forwarding stores from the previous iteration to loads
+  // of the current iteration.
+  if (EnableLoopLoadElim)
+    MPM.add(createLoopLoadEliminationPass());
+
   // FIXME: Because of #pragma vectorize enable, the passes below are always
   // inserted in the pipeline, even when the vectorizer doesn't run (ex. when
   // on -O1 and no #pragma is found). Would be good to have these two passes
@@ -402,17 +472,6 @@ void PassManagerBuilder::populateModulePassManager(
     // GlobalOpt already deletes dead functions and globals, at -O2 try a
     // late pass of GlobalDCE.  It is capable of deleting dead cycles.
     if (OptLevel > 1) {
-      if (!PrepareForLTO) {
-        // Remove avail extern fns and globals definitions if we aren't
-        // compiling an object file for later LTO. For LTO we want to preserve
-        // these so they are eligible for inlining at link-time. Note if they
-        // are unreferenced they will be removed by GlobalDCE below, so
-        // this only impacts referenced available externally globals.
-        // Eventually they will be suppressed during codegen, but eliminating
-        // here enables more opportunity for GlobalDCE as it may make
-        // globals referenced by available external functions dead.
-        MPM.add(createEliminateAvailableExternallyPass());
-      }
       MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
       MPM.add(createConstantMergePass());     // Merge dup global constants
     }
@@ -428,13 +487,25 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // Provide AliasAnalysis services for optimizations.
   addInitialAliasAnalysisPasses(PM);
 
+  if (FunctionIndex)
+    PM.add(createFunctionImportPass(FunctionIndex));
+
+  // Allow forcing function attributes as a debugging and tuning aid.
+  PM.add(createForceFunctionAttrsLegacyPass());
+
+  // Infer attributes about declarations if possible.
+  PM.add(createInferFunctionAttrsLegacyPass());
+
   // Propagate constants at call sites into the functions they call.  This
   // opens opportunities for globalopt (and inlining) by substituting function
   // pointers passed as arguments to direct uses of functions.
   PM.add(createIPSCCPPass());
 
   // Now that we internalized some globals, see if we can hack on them!
+  PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
   PM.add(createGlobalOptimizerPass());
+  // Promote any localized global vars.
+  PM.add(createPromoteMemoryToRegisterPass());
 
   // Linking modules together can lead to duplicated global constants, only
   // keep one copy of each constant.
@@ -481,7 +552,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
 
   // Run a few AA driven optimizations here and now, to cleanup the code.
   PM.add(createFunctionAttrsPass()); // Add nocapture.
-  PM.add(createGlobalsModRefPass()); // IP alias analysis.
+  PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
 
   PM.add(createLICMPass());                 // Hoist loop invariants.
   if (EnableMLSM)
@@ -500,6 +571,15 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
 
   PM.add(createLoopVectorizePass(true, LoopVectorize));
 
+  // Now that we've optimized loops (in particular loop induction variables),
+  // we may have exposed more scalar opportunities. Run parts of the scalar
+  // optimizer again at this point.
+  PM.add(createInstructionCombiningPass()); // Initial cleanup
+  PM.add(createCFGSimplificationPass()); // if-convert
+  PM.add(createSCCPPass()); // Propagate exposed constants
+  PM.add(createInstructionCombiningPass()); // Clean up again
+  PM.add(createBitTrackingDCEPass());
+
   // More scalar chains could be vectorized due to more alias information
   if (RunSLPAfterLoopVectorization)
     if (SLPVectorize)
@@ -524,6 +604,9 @@ void PassManagerBuilder::addLateLTOOptimizationPasses(
   // Delete basic blocks, which optimization passes may have killed.
   PM.add(createCFGSimplificationPass());
 
+  // Drop bodies of available externally objects to improve GlobalDCE.
+  PM.add(createEliminateAvailableExternallyPass());
+
   // Now that we have optimized the program, discard unreachable functions.
   PM.add(createGlobalDCEPass());
 
@@ -543,6 +626,10 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
   if (OptLevel > 1)
     addLTOOptimizationPasses(PM);
 
+  // Create a function that performs CFI checks for cross-DSO calls with targets
+  // in the current module.
+  PM.add(createCrossDSOCFIPass());
+
   // Lower bit sets to globals. This pass supports Clang's control flow
   // integrity mechanisms (-fsanitize=cfi*) and needs to run at link time if CFI
   // is enabled. The pass does nothing if CFI is disabled.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index b2f1010c9a07..3af4afb903fe 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -21,7 +21,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CallGraphSCCPass.h"
-#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -153,21 +153,16 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
   // If the SCC doesn't unwind or doesn't throw, note this fact.
   if (!SCCMightUnwind || !SCCMightReturn)
     for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
-      AttrBuilder NewAttributes;
-
-      if (!SCCMightUnwind)
-        NewAttributes.addAttribute(Attribute::NoUnwind);
-      if (!SCCMightReturn)
-        NewAttributes.addAttribute(Attribute::NoReturn);
-
       Function *F = (*I)->getFunction();
-      const AttributeSet &PAL = F->getAttributes().getFnAttributes();
-      const AttributeSet &NPAL = AttributeSet::get(
-          F->getContext(), AttributeSet::FunctionIndex, NewAttributes);
 
-      if (PAL != NPAL) {
+      if (!SCCMightUnwind && !F->hasFnAttribute(Attribute::NoUnwind)) {
+        F->addFnAttr(Attribute::NoUnwind);
+        MadeChange = true;
+      }
+
+      if (!SCCMightReturn && !F->hasFnAttribute(Attribute::NoReturn)) {
+        F->addFnAttr(Attribute::NoReturn);
         MadeChange = true;
-        F->addAttributes(AttributeSet::FunctionIndex, NPAL);
       }
     }
 
@@ -191,9 +186,13 @@ bool PruneEH::SimplifyFunction(Function *F) {
   for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
       if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) {
-        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+        SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+        SmallVector<OperandBundleDef, 1> OpBundles;
+        II->getOperandBundlesAsDefs(OpBundles);
+
         // Insert a call instruction before the invoke.
-        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+                                          "", II);
         Call->takeName(II);
         Call->setCallingConv(II->getCallingConv());
         Call->setAttributes(II->getAttributes());
@@ -233,7 +232,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
 
           // Remove the uncond branch and add an unreachable.
           BB->getInstList().pop_back();
-          new UnreachableInst(BB->getContext(), BB);
+          new UnreachableInst(BB->getContext(), &*BB);
 
           DeleteBasicBlock(New);  // Delete the new BB.
           MadeChange = true;
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/IPO/SampleProfile.cpp
similarity index 50%
rename from lib/Transforms/Scalar/SampleProfile.cpp
rename to lib/Transforms/IPO/SampleProfile.cpp
index c8dfa54a4aa0..928d92ef9d12 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/IPO/SampleProfile.cpp
@@ -22,7 +22,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -44,7 +43,11 @@
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include <cctype>
 
 using namespace llvm;
@@ -61,27 +64,51 @@ static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
     "sample-profile-max-propagate-iterations", cl::init(100),
     cl::desc("Maximum number of iterations to go through when propagating "
              "sample block/edge weights through the CFG."));
+static cl::opt<unsigned> SampleProfileRecordCoverage(
+    "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
+    cl::desc("Emit a warning if less than N% of records in the input profile "
+             "are matched to the IR."));
+static cl::opt<unsigned> SampleProfileSampleCoverage(
+    "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
+    cl::desc("Emit a warning if less than N% of samples in the input profile "
+             "are matched to the IR."));
+static cl::opt<double> SampleProfileHotThreshold(
+    "sample-profile-inline-hot-threshold", cl::init(0.1), cl::value_desc("N"),
+    cl::desc("Inlined functions that account for more than N% of all samples "
+             "collected in the parent function, will be inlined again."));
+static cl::opt<double> SampleProfileGlobalHotThreshold(
+    "sample-profile-global-hot-threshold", cl::init(30), cl::value_desc("N"),
+    cl::desc("Top-level functions that account for more than N% of all samples "
+             "collected in the profile, will be marked as hot for the inliner "
+             "to consider."));
+static cl::opt<double> SampleProfileGlobalColdThreshold(
+    "sample-profile-global-cold-threshold", cl::init(0.5), cl::value_desc("N"),
+    cl::desc("Top-level functions that account for less than N% of all samples "
+             "collected in the profile, will be marked as cold for the inliner "
+             "to consider."));
 
 namespace {
-typedef DenseMap<BasicBlock *, unsigned> BlockWeightMap;
-typedef DenseMap<BasicBlock *, BasicBlock *> EquivalenceClassMap;
-typedef std::pair<BasicBlock *, BasicBlock *> Edge;
-typedef DenseMap<Edge, unsigned> EdgeWeightMap;
-typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
+typedef DenseMap<const BasicBlock *, uint64_t> BlockWeightMap;
+typedef DenseMap<const BasicBlock *, const BasicBlock *> EquivalenceClassMap;
+typedef std::pair<const BasicBlock *, const BasicBlock *> Edge;
+typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
+typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
+    BlockEdgeMap;
 
 /// \brief Sample profile pass.
 ///
 /// This pass reads profile data from the file specified by
 /// -sample-profile-file and annotates every affected function with the
 /// profile information found in that file.
-class SampleProfileLoader : public FunctionPass {
+class SampleProfileLoader : public ModulePass {
 public:
   // Class identification, replacement for typeinfo
   static char ID;
 
   SampleProfileLoader(StringRef Name = SampleProfileFile)
-      : FunctionPass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Ctx(nullptr),
-        Reader(), Samples(nullptr), Filename(Name), ProfileIsValid(false) {
+      : ModulePass(ID), DT(nullptr), PDT(nullptr), LI(nullptr), Reader(),
+        Samples(nullptr), Filename(Name), ProfileIsValid(false),
+        TotalCollectedSamples(0) {
     initializeSampleProfileLoaderPass(*PassRegistry::getPassRegistry());
   }
 
@@ -91,36 +118,37 @@ public:
 
   const char *getPassName() const override { return "Sample profile pass"; }
 
-  bool runOnFunction(Function &F) override;
+  bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<LoopInfoWrapperPass>();
-    AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<PostDominatorTree>();
   }
 
 protected:
+  bool runOnFunction(Function &F);
   unsigned getFunctionLoc(Function &F);
   bool emitAnnotations(Function &F);
-  unsigned getInstWeight(Instruction &I);
-  unsigned getBlockWeight(BasicBlock *BB);
+  ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
+  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
+  const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+  const FunctionSamples *findFunctionSamples(const Instruction &I) const;
+  bool inlineHotFunctions(Function &F);
+  bool emitInlineHints(Function &F);
   void printEdgeWeight(raw_ostream &OS, Edge E);
-  void printBlockWeight(raw_ostream &OS, BasicBlock *BB);
-  void printBlockEquivalence(raw_ostream &OS, BasicBlock *BB);
+  void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
+  void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
   bool computeBlockWeights(Function &F);
   void findEquivalenceClasses(Function &F);
   void findEquivalencesFor(BasicBlock *BB1,
                            SmallVector<BasicBlock *, 8> Descendants,
                            DominatorTreeBase<BasicBlock> *DomTree);
   void propagateWeights(Function &F);
-  unsigned visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
+  uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(Function &F);
   bool propagateThroughEdges(Function &F);
-
-  /// \brief Line number for the function header. Used to compute absolute
-  /// line numbers from the relative line numbers found in the profile.
-  unsigned HeaderLineno;
+  void computeDominanceAndLoopInfo(Function &F);
+  unsigned getOffset(unsigned L, unsigned H) const;
+  void clearFunctionData();
 
   /// \brief Map basic blocks to their computed weights.
   ///
@@ -135,7 +163,7 @@ protected:
   EdgeWeightMap EdgeWeights;
 
   /// \brief Set of visited blocks during propagation.
-  SmallPtrSet<BasicBlock *, 128> VisitedBlocks;
+  SmallPtrSet<const BasicBlock *, 128> VisitedBlocks;
 
   /// \brief Set of visited edges during propagation.
   SmallSet<Edge, 128> VisitedEdges;
@@ -149,9 +177,9 @@ protected:
   EquivalenceClassMap EquivalenceClass;
 
   /// \brief Dominance, post-dominance and loop information.
-  DominatorTree *DT;
-  PostDominatorTree *PDT;
-  LoopInfo *LI;
+  std::unique_ptr<DominatorTree> DT;
+  std::unique_ptr<DominatorTreeBase<BasicBlock>> PDT;
+  std::unique_ptr<LoopInfo> LI;
 
   /// \brief Predecessors for each basic block in the CFG.
   BlockEdgeMap Predecessors;
@@ -159,9 +187,6 @@ protected:
   /// \brief Successors for each basic block in the CFG.
   BlockEdgeMap Successors;
 
-  /// \brief LLVM context holding the debug data we need.
-  LLVMContext *Ctx;
-
   /// \brief Profile reader object.
   std::unique_ptr<SampleProfileReader> Reader;
 
@@ -173,7 +198,207 @@ protected:
 
   /// \brief Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid;
+
+  /// \brief Total number of samples collected in this profile.
+  ///
+  /// This is the sum of all the samples collected in all the functions executed
+  /// at runtime.
+  uint64_t TotalCollectedSamples;
 };
+
+class SampleCoverageTracker {
+public:
+  SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+                       uint32_t Discriminator, uint64_t Samples);
+  unsigned computeCoverage(unsigned Used, unsigned Total) const;
+  unsigned countUsedRecords(const FunctionSamples *FS) const;
+  unsigned countBodyRecords(const FunctionSamples *FS) const;
+  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+  uint64_t countBodySamples(const FunctionSamples *FS) const;
+  void clear() {
+    SampleCoverage.clear();
+    TotalUsedSamples = 0;
+  }
+
+private:
+  typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+  typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+      FunctionSamplesCoverageMap;
+
+  /// Coverage map for sampling records.
+  ///
+  /// This map keeps a record of sampling records that have been matched to
+  /// an IR instruction. This is used to detect some form of staleness in
+  /// profiles (see flag -sample-profile-check-coverage).
+  ///
+  /// Each entry in the map corresponds to a FunctionSamples instance.  This is
+  /// another map that counts how many times the sample record at the
+  /// given location has been used.
+  FunctionSamplesCoverageMap SampleCoverage;
+
+  /// Number of samples used from the profile.
+  ///
+  /// When a sampling record is used for the first time, the samples from
+  /// that record are added to this accumulator.  Coverage is later computed
+  /// based on the total number of samples available in this function and
+  /// its callsites.
+  ///
+  /// Note that this accumulator tracks samples used from a single function
+  /// and all the inlined callsites. Strictly, we should have a map of counters
+  /// keyed by FunctionSamples pointers, but these stats are cleared after
+  /// every function, so we just need to keep a single counter.
+  uint64_t TotalUsedSamples;
+};
+
+SampleCoverageTracker CoverageTracker;
+
+/// Return true if the given callsite is hot wrt to its caller.
+///
+/// Functions that were inlined in the original binary will be represented
+/// in the inline stack in the sample profile. If the profile shows that
+/// the original inline decision was "good" (i.e., the callsite is executed
+/// frequently), then we will recreate the inline decision and apply the
+/// profile from the inlined callsite.
+///
+/// To decide whether an inlined callsite is hot, we compute the fraction
+/// of samples used by the callsite with respect to the total number of samples
+/// collected in the caller.
+///
+/// If that fraction is larger than the default given by
+/// SampleProfileHotThreshold, the callsite will be inlined again.
+bool callsiteIsHot(const FunctionSamples *CallerFS,
+                   const FunctionSamples *CallsiteFS) {
+  if (!CallsiteFS)
+    return false; // The callsite was not inlined in the original binary.
+
+  uint64_t ParentTotalSamples = CallerFS->getTotalSamples();
+  if (ParentTotalSamples == 0)
+    return false; // Avoid division by zero.
+
+  uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
+  if (CallsiteTotalSamples == 0)
+    return false; // Callsite is trivially cold.
+
+  double PercentSamples =
+      (double)CallsiteTotalSamples / (double)ParentTotalSamples * 100.0;
+  return PercentSamples >= SampleProfileHotThreshold;
+}
+
+}
+
+/// Mark as used the sample record for the given function samples at
+/// (LineOffset, Discriminator).
+///
+/// \returns true if this is the first time we mark the given record.
+bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
+                                            uint32_t LineOffset,
+                                            uint32_t Discriminator,
+                                            uint64_t Samples) {
+  LineLocation Loc(LineOffset, Discriminator);
+  unsigned &Count = SampleCoverage[FS][Loc];
+  bool FirstTime = (++Count == 1);
+  if (FirstTime)
+    TotalUsedSamples += Samples;
+  return FirstTime;
+}
+
+/// Return the number of sample records that were applied from this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS) const {
+  auto I = SampleCoverage.find(FS);
+
+  // The size of the coverage map for FS represents the number of records
+  // that were marked used at least once.
+  unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
+
+  // If there are inlined callsites in this function, count the samples found
+  // in the respective bodies. However, do not bother counting callees with 0
+  // total samples, these are callees that were never invoked at runtime.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Count += countUsedRecords(CalleeSamples);
+  }
+
+  return Count;
+}
+
+/// Return the number of sample records in the body of this profile.
+///
+/// This count does not include records from cold inlined callsites.
+unsigned
+SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS) const {
+  unsigned Count = FS->getBodySamples().size();
+
+  // Only count records in hot callsites.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Count += countBodyRecords(CalleeSamples);
+  }
+
+  return Count;
+}
+
+/// Return the number of samples collected in the body of this profile.
+///
+/// This count does not include samples from cold inlined callsites.
+uint64_t
+SampleCoverageTracker::countBodySamples(const FunctionSamples *FS) const {
+  uint64_t Total = 0;
+  for (const auto &I : FS->getBodySamples())
+    Total += I.second.getSamples();
+
+  // Only count samples in hot callsites.
+  for (const auto &I : FS->getCallsiteSamples()) {
+    const FunctionSamples *CalleeSamples = &I.second;
+    if (callsiteIsHot(FS, CalleeSamples))
+      Total += countBodySamples(CalleeSamples);
+  }
+
+  return Total;
+}
+
+/// Return the fraction of sample records used in this profile.
+///
+/// The returned value is an unsigned integer in the range 0-100 indicating
+/// the percentage of sample records that were used while applying this
+/// profile to the associated function.
+unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
+                                                unsigned Total) const {
+  assert(Used <= Total &&
+         "number of used records cannot exceed the total number of records");
+  return Total > 0 ? Used * 100 / Total : 100;
+}
+
+/// Clear all the per-function data used to load samples and propagate weights.
+void SampleProfileLoader::clearFunctionData() {
+  BlockWeights.clear();
+  EdgeWeights.clear();
+  VisitedBlocks.clear();
+  VisitedEdges.clear();
+  EquivalenceClass.clear();
+  DT = nullptr;
+  PDT = nullptr;
+  LI = nullptr;
+  Predecessors.clear();
+  Successors.clear();
+  CoverageTracker.clear();
+}
+
+/// \brief Returns the offset of lineno \p L to head_lineno \p H
+///
+/// \param L  Lineno
+/// \param H  Header lineno of the function
+///
+/// \returns offset to the header lineno. 16 bits are used to represent offset.
+/// We assume that a single function will not exceed 65535 LOC.
+unsigned SampleProfileLoader::getOffset(unsigned L, unsigned H) const {
+  return (L - H) & 0xffff;
 }
 
 /// \brief Print the weight of edge \p E on stream \p OS.
@@ -190,8 +415,8 @@ void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
 /// \param OS  Stream to emit the output to.
 /// \param BB  Block to print.
 void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
-                                                BasicBlock *BB) {
-  BasicBlock *Equiv = EquivalenceClass[BB];
+                                                const BasicBlock *BB) {
+  const BasicBlock *Equiv = EquivalenceClass[BB];
   OS << "equivalence[" << BB->getName()
      << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
 }
@@ -200,8 +425,11 @@ void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
 ///
 /// \param OS  Stream to emit the output to.
 /// \param BB  Block to print.
-void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
-  OS << "weight[" << BB->getName() << "]: " << BlockWeights[BB] << "\n";
+void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
+                                           const BasicBlock *BB) const {
+  const auto &I = BlockWeights.find(BB);
+  uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
+  OS << "weight[" << BB->getName() << "]: " << W << "\n";
 }
 
 /// \brief Get the weight for an instruction.
@@ -214,51 +442,67 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS, BasicBlock *BB) {
 ///
 /// \param Inst Instruction to query.
 ///
-/// \returns The profiled weight of I.
-unsigned SampleProfileLoader::getInstWeight(Instruction &Inst) {
+/// \returns the weight of \p Inst.
+ErrorOr<uint64_t>
+SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
   DebugLoc DLoc = Inst.getDebugLoc();
   if (!DLoc)
-    return 0;
+    return std::error_code();
 
-  unsigned Lineno = DLoc.getLine();
-  if (Lineno < HeaderLineno)
-    return 0;
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  if (!FS)
+    return std::error_code();
 
   const DILocation *DIL = DLoc;
-  int LOffset = Lineno - HeaderLineno;
-  unsigned Discriminator = DIL->getDiscriminator();
-  unsigned Weight = Samples->samplesAt(LOffset, Discriminator);
-  DEBUG(dbgs() << "    " << Lineno << "." << Discriminator << ":" << Inst
-               << " (line offset: " << LOffset << "." << Discriminator
-               << " - weight: " << Weight << ")\n");
-  return Weight;
+  unsigned Lineno = DLoc.getLine();
+  unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
+
+  uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
+  uint32_t Discriminator = DIL->getDiscriminator();
+  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+  if (R) {
+    bool FirstMark =
+        CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
+    if (FirstMark) {
+      const Function *F = Inst.getParent()->getParent();
+      LLVMContext &Ctx = F->getContext();
+      emitOptimizationRemark(
+          Ctx, DEBUG_TYPE, *F, DLoc,
+          Twine("Applied ") + Twine(*R) + " samples from profile (offset: " +
+              Twine(LineOffset) +
+              ((Discriminator) ? Twine(".") + Twine(Discriminator) : "") + ")");
+    }
+    DEBUG(dbgs() << "    " << Lineno << "." << DIL->getDiscriminator() << ":"
+                 << Inst << " (line offset: " << Lineno - HeaderLineno << "."
+                 << DIL->getDiscriminator() << " - weight: " << R.get()
+                 << ")\n");
+  }
+  return R;
 }
 
 /// \brief Compute the weight of a basic block.
 ///
 /// The weight of basic block \p BB is the maximum weight of all the
-/// instructions in BB. The weight of \p BB is computed and cached in
-/// the BlockWeights map.
+/// instructions in BB.
 ///
 /// \param BB The basic block to query.
 ///
-/// \returns The computed weight of BB.
-unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
-  // If we've computed BB's weight before, return it.
-  std::pair<BlockWeightMap::iterator, bool> Entry =
-      BlockWeights.insert(std::make_pair(BB, 0));
-  if (!Entry.second)
-    return Entry.first->second;
-
-  // Otherwise, compute and cache BB's weight.
-  unsigned Weight = 0;
+/// \returns the weight for \p BB.
+ErrorOr<uint64_t>
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
+  bool Found = false;
+  uint64_t Weight = 0;
   for (auto &I : BB->getInstList()) {
-    unsigned InstWeight = getInstWeight(I);
-    if (InstWeight > Weight)
-      Weight = InstWeight;
+    const ErrorOr<uint64_t> &R = getInstWeight(I);
+    if (R && R.get() >= Weight) {
+      Weight = R.get();
+      Found = true;
+    }
   }
-  Entry.first->second = Weight;
-  return Weight;
+  if (Found)
+    return Weight;
+  else
+    return std::error_code();
 }
 
 /// \brief Compute and store the weights of every basic block.
@@ -270,15 +514,199 @@ unsigned SampleProfileLoader::getBlockWeight(BasicBlock *BB) {
 bool SampleProfileLoader::computeBlockWeights(Function &F) {
   bool Changed = false;
   DEBUG(dbgs() << "Block weights\n");
-  for (auto &BB : F) {
-    unsigned Weight = getBlockWeight(&BB);
-    Changed |= (Weight > 0);
+  for (const auto &BB : F) {
+    ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
+    if (Weight) {
+      BlockWeights[&BB] = Weight.get();
+      VisitedBlocks.insert(&BB);
+      Changed = true;
+    }
     DEBUG(printBlockWeight(dbgs(), &BB));
   }
 
   return Changed;
 }
 
+/// \brief Get the FunctionSamples for a call instruction.
+///
+/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// instance in which that call instruction is calling to. It contains
+/// all samples that resides in the inlined instance. We first find the
+/// inlined instance in which the call instruction is from, then we
+/// traverse its children to find the callsite with the matching
+/// location and callee function name.
+///
+/// \param Inst Call instruction to query.
+///
+/// \returns The FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+  const DILocation *DIL = Inst.getDebugLoc();
+  if (!DIL) {
+    return nullptr;
+  }
+  DISubprogram *SP = DIL->getScope()->getSubprogram();
+  if (!SP)
+    return nullptr;
+
+  Function *CalleeFunc = Inst.getCalledFunction();
+  if (!CalleeFunc) {
+    return nullptr;
+  }
+
+  StringRef CalleeName = CalleeFunc->getName();
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  if (FS == nullptr)
+    return nullptr;
+
+  return FS->findFunctionSamplesAt(
+      CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+                       DIL->getDiscriminator(), CalleeName));
+}
+
+/// \brief Get the FunctionSamples for an instruction.
+///
+/// The FunctionSamples of an instruction \p Inst is the inlined instance
+/// in which that instruction is coming from. We traverse the inline stack
+/// of that instruction, and match it with the tree nodes in the profile.
+///
+/// \param Inst Instruction to query.
+///
+/// \returns the FunctionSamples pointer to the inlined instance.
+const FunctionSamples *
+SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
+  SmallVector<CallsiteLocation, 10> S;
+  const DILocation *DIL = Inst.getDebugLoc();
+  if (!DIL) {
+    return Samples;
+  }
+  StringRef CalleeName;
+  for (const DILocation *DIL = Inst.getDebugLoc(); DIL;
+       DIL = DIL->getInlinedAt()) {
+    DISubprogram *SP = DIL->getScope()->getSubprogram();
+    if (!SP)
+      return nullptr;
+    if (!CalleeName.empty()) {
+      S.push_back(CallsiteLocation(getOffset(DIL->getLine(), SP->getLine()),
+                                   DIL->getDiscriminator(), CalleeName));
+    }
+    CalleeName = SP->getLinkageName();
+  }
+  if (S.size() == 0)
+    return Samples;
+  const FunctionSamples *FS = Samples;
+  for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {
+    FS = FS->findFunctionSamplesAt(S[i]);
+  }
+  return FS;
+}
+
+/// \brief Emit an inline hint if \p F is globally hot or cold.
+///
+/// If \p F consumes a significant fraction of samples (indicated by
+/// SampleProfileGlobalHotThreshold), apply the InlineHint attribute for the
+/// inliner to consider the function hot.
+///
+/// If \p F consumes a small fraction of samples (indicated by
+/// SampleProfileGlobalColdThreshold), apply the Cold attribute for the inliner
+/// to consider the function cold.
+///
+/// FIXME - This setting of inline hints is sub-optimal. Instead of marking a
+/// function globally hot or cold, we should be annotating individual callsites.
+/// This is not currently possible, but work on the inliner will eventually
+/// provide this ability. See http://reviews.llvm.org/D15003 for details and
+/// discussion.
+///
+/// \returns True if either attribute was applied to \p F.
+bool SampleProfileLoader::emitInlineHints(Function &F) {
+  if (TotalCollectedSamples == 0)
+    return false;
+
+  uint64_t FunctionSamples = Samples->getTotalSamples();
+  double SamplesPercent =
+      (double)FunctionSamples / (double)TotalCollectedSamples * 100.0;
+
+  // If the function collected more samples than the hot threshold, mark
+  // it globally hot.
+  if (SamplesPercent >= SampleProfileGlobalHotThreshold) {
+    F.addFnAttr(llvm::Attribute::InlineHint);
+    std::string Msg;
+    raw_string_ostream S(Msg);
+    S << "Applied inline hint to globally hot function '" << F.getName()
+      << "' with " << format("%.2f", SamplesPercent)
+      << "% of samples (threshold: "
+      << format("%.2f", SampleProfileGlobalHotThreshold.getValue()) << "%)";
+    S.flush();
+    emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+    return true;
+  }
+
+  // If the function collected fewer samples than the cold threshold, mark
+  // it globally cold.
+  if (SamplesPercent <= SampleProfileGlobalColdThreshold) {
+    F.addFnAttr(llvm::Attribute::Cold);
+    std::string Msg;
+    raw_string_ostream S(Msg);
+    S << "Applied cold hint to globally cold function '" << F.getName()
+      << "' with " << format("%.2f", SamplesPercent)
+      << "% of samples (threshold: "
+      << format("%.2f", SampleProfileGlobalColdThreshold.getValue()) << "%)";
+    S.flush();
+    emitOptimizationRemark(F.getContext(), DEBUG_TYPE, F, DebugLoc(), Msg);
+    return true;
+  }
+
+  return false;
+}
+
+/// \brief Iteratively inline hot callsites of a function.
+///
+/// Iteratively traverse all callsites of the function \p F, and find if
+/// the corresponding inlined instance exists and is hot in profile. If
+/// it is hot enough, inline the callsites and adds new callsites of the
+/// callee into the caller.
+///
+/// TODO: investigate the possibility of not invoking InlineFunction directly.
+///
+/// \param F function to perform iterative inlining.
+///
+/// \returns True if there is any inline happened.
+bool SampleProfileLoader::inlineHotFunctions(Function &F) {
+  bool Changed = false;
+  LLVMContext &Ctx = F.getContext();
+  while (true) {
+    bool LocalChanged = false;
+    SmallVector<CallInst *, 10> CIS;
+    for (auto &BB : F) {
+      for (auto &I : BB.getInstList()) {
+        CallInst *CI = dyn_cast<CallInst>(&I);
+        if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
+          CIS.push_back(CI);
+      }
+    }
+    for (auto CI : CIS) {
+      InlineFunctionInfo IFI;
+      Function *CalledFunction = CI->getCalledFunction();
+      DebugLoc DLoc = CI->getDebugLoc();
+      uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
+      if (InlineFunction(CI, IFI)) {
+        LocalChanged = true;
+        emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
+                               Twine("inlined hot callee '") +
+                                   CalledFunction->getName() + "' with " +
+                                   Twine(NumSamples) + " samples into '" +
+                                   F.getName() + "'");
+      }
+    }
+    if (LocalChanged) {
+      Changed = true;
+    } else {
+      break;
+    }
+  }
+  return Changed;
+}
+
 /// \brief Find equivalence classes for the given block.
 ///
 /// This finds all the blocks that are guaranteed to execute the same
@@ -305,12 +733,13 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
 void SampleProfileLoader::findEquivalencesFor(
     BasicBlock *BB1, SmallVector<BasicBlock *, 8> Descendants,
     DominatorTreeBase<BasicBlock> *DomTree) {
-  for (auto *BB2 : Descendants) {
+  const BasicBlock *EC = EquivalenceClass[BB1];
+  uint64_t Weight = BlockWeights[EC];
+  for (const auto *BB2 : Descendants) {
     bool IsDomParent = DomTree->dominates(BB2, BB1);
     bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
-    if (BB1 != BB2 && VisitedBlocks.insert(BB2).second && IsDomParent &&
-        IsInSameLoop) {
-      EquivalenceClass[BB2] = BB1;
+    if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
+      EquivalenceClass[BB2] = EC;
 
       // If BB2 is heavier than BB1, make BB2 have the same weight
       // as BB1.
@@ -320,11 +749,10 @@ void SampleProfileLoader::findEquivalencesFor(
       // during the propagation phase. Right now, we just want to
       // make sure that BB1 has the largest weight of all the
       // members of its equivalence set.
-      unsigned &BB1Weight = BlockWeights[BB1];
-      unsigned &BB2Weight = BlockWeights[BB2];
-      BB1Weight = std::max(BB1Weight, BB2Weight);
+      Weight = std::max(Weight, BlockWeights[BB2]);
     }
   }
+  BlockWeights[EC] = Weight;
 }
 
 /// \brief Find equivalence classes.
@@ -364,19 +792,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
     // class by making BB2's equivalence class be BB1.
     DominatedBBs.clear();
     DT->getDescendants(BB1, DominatedBBs);
-    findEquivalencesFor(BB1, DominatedBBs, PDT->DT);
-
-    // Repeat the same logic for all the blocks post-dominated by BB1.
-    // We are looking for every basic block BB2 such that:
-    //
-    // 1- BB1 post-dominates BB2.
-    // 2- BB2 dominates BB1.
-    // 3- BB1 and BB2 are in the same loop nest.
-    //
-    // If all those conditions hold, BB2's equivalence class is BB1.
-    DominatedBBs.clear();
-    PDT->getDescendants(BB1, DominatedBBs);
-    findEquivalencesFor(BB1, DominatedBBs, DT);
+    findEquivalencesFor(BB1, DominatedBBs, PDT.get());
 
     DEBUG(printBlockEquivalence(dbgs(), BB1));
   }
@@ -389,8 +805,8 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
   // to all the blocks in that equivalence class.
   DEBUG(dbgs() << "\nAssign the same weight to all blocks in the same class\n");
   for (auto &BI : F) {
-    BasicBlock *BB = &BI;
-    BasicBlock *EquivBB = EquivalenceClass[BB];
+    const BasicBlock *BB = &BI;
+    const BasicBlock *EquivBB = EquivalenceClass[BB];
     if (BB != EquivBB)
       BlockWeights[BB] = BlockWeights[EquivBB];
     DEBUG(printBlockWeight(dbgs(), BB));
@@ -407,7 +823,7 @@ void SampleProfileLoader::findEquivalenceClasses(Function &F) {
 /// \param UnknownEdge  Set if E has not been visited before.
 ///
 /// \returns E's weight, if known. Otherwise, return 0.
-unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
+uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
                                         Edge *UnknownEdge) {
   if (!VisitedEdges.count(E)) {
     (*NumUnknownEdges)++;
@@ -432,8 +848,9 @@ unsigned SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
 bool SampleProfileLoader::propagateThroughEdges(Function &F) {
   bool Changed = false;
   DEBUG(dbgs() << "\nPropagation through edges\n");
-  for (auto &BI : F) {
-    BasicBlock *BB = &BI;
+  for (const auto &BI : F) {
+    const BasicBlock *BB = &BI;
+    const BasicBlock *EC = EquivalenceClass[BB];
 
     // Visit all the predecessor and successor edges to determine
     // which ones have a weight assigned already. Note that it doesn't
@@ -441,7 +858,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
     // only case we are interested in handling is when only a single
     // edge is unknown (see setEdgeOrBlockWeight).
     for (unsigned i = 0; i < 2; i++) {
-      unsigned TotalWeight = 0;
+      uint64_t TotalWeight = 0;
       unsigned NumUnknownEdges = 0;
       Edge UnknownEdge, SelfReferentialEdge;
 
@@ -485,7 +902,7 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
       // all edges will get a weight, or iteration will stop when
       // it reaches SampleProfileMaxPropagateIterations.
       if (NumUnknownEdges <= 1) {
-        unsigned &BBWeight = BlockWeights[BB];
+        uint64_t &BBWeight = BlockWeights[EC];
         if (NumUnknownEdges == 0) {
           // If we already know the weight of all edges, the weight of the
           // basic block can be computed. It should be no larger than the sum
@@ -497,9 +914,9 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
                          << " known. Set weight for block: ";
                   printBlockWeight(dbgs(), BB););
           }
-          if (VisitedBlocks.insert(BB).second)
+          if (VisitedBlocks.insert(EC).second)
             Changed = true;
-        } else if (NumUnknownEdges == 1 && VisitedBlocks.count(BB)) {
+        } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
           // If there is a single unknown edge and the block has been
           // visited, then we can compute E's weight.
           if (BBWeight >= TotalWeight)
@@ -511,8 +928,8 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
           DEBUG(dbgs() << "Set weight for edge: ";
                 printEdgeWeight(dbgs(), UnknownEdge));
         }
-      } else if (SelfReferentialEdge.first && VisitedBlocks.count(BB)) {
-        unsigned &BBWeight = BlockWeights[BB];
+      } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
+        uint64_t &BBWeight = BlockWeights[BB];
         // We have a self-referential edge and the weight of BB is known.
         if (BBWeight >= TotalWeight)
           EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
@@ -578,7 +995,7 @@ void SampleProfileLoader::buildEdges(Function &F) {
 ///   known).
 void SampleProfileLoader::propagateWeights(Function &F) {
   bool Changed = true;
-  unsigned i = 0;
+  unsigned I = 0;
 
   // Add an entry count to the function using the samples gathered
   // at the function entry.
@@ -592,14 +1009,15 @@ void SampleProfileLoader::propagateWeights(Function &F) {
   buildEdges(F);
 
   // Propagate until we converge or we go past the iteration limit.
-  while (Changed && i++ < SampleProfileMaxPropagateIterations) {
+  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
     Changed = propagateThroughEdges(F);
   }
 
   // Generate MD_prof metadata for every branch instruction using the
   // edge weights computed during propagation.
   DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
-  MDBuilder MDB(F.getContext());
+  LLVMContext &Ctx = F.getContext();
+  MDBuilder MDB(Ctx);
   for (auto &BI : F) {
     BasicBlock *BB = &BI;
     TerminatorInst *TI = BB->getTerminator();
@@ -610,24 +1028,44 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 
     DEBUG(dbgs() << "\nGetting weights for branch at line "
                  << TI->getDebugLoc().getLine() << ".\n");
-    SmallVector<unsigned, 4> Weights;
-    bool AllWeightsZero = true;
+    SmallVector<uint32_t, 4> Weights;
+    uint32_t MaxWeight = 0;
+    DebugLoc MaxDestLoc;
     for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
       BasicBlock *Succ = TI->getSuccessor(I);
       Edge E = std::make_pair(BB, Succ);
-      unsigned Weight = EdgeWeights[E];
+      uint64_t Weight = EdgeWeights[E];
       DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
-      Weights.push_back(Weight);
-      if (Weight != 0)
-        AllWeightsZero = false;
+      // Use uint32_t saturated arithmetic to adjust the incoming weights,
+      // if needed. Sample counts in profiles are 64-bit unsigned values,
+      // but internally branch weights are expressed as 32-bit values.
+      if (Weight > std::numeric_limits<uint32_t>::max()) {
+        DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
+        Weight = std::numeric_limits<uint32_t>::max();
+      }
+      Weights.push_back(static_cast<uint32_t>(Weight));
+      if (Weight != 0) {
+        if (Weight > MaxWeight) {
+          MaxWeight = Weight;
+          MaxDestLoc = Succ->getFirstNonPHIOrDbgOrLifetime()->getDebugLoc();
+        }
+      }
     }
 
     // Only set weights if there is at least one non-zero weight.
     // In any other case, let the analyzer set weights.
-    if (!AllWeightsZero) {
+    if (MaxWeight > 0) {
       DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
       TI->setMetadata(llvm::LLVMContext::MD_prof,
                       MDB.createBranchWeights(Weights));
+      DebugLoc BranchLoc = TI->getDebugLoc();
+      emitOptimizationRemark(
+          Ctx, DEBUG_TYPE, F, MaxDestLoc,
+          Twine("most popular destination for conditional branches at ") +
+              ((BranchLoc) ? Twine(BranchLoc->getFilename() + ":" +
+                                   Twine(BranchLoc.getLine()) + ":" +
+                                   Twine(BranchLoc.getCol()))
+                           : Twine("<UNKNOWN LOCATION>")));
     } else {
       DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
     }
@@ -649,7 +1087,7 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
   if (DISubprogram *S = getDISubprogram(&F))
     return S->getLine();
 
-  // If could not find the start of \p F, emit a diagnostic to inform the user
+  // If the start of \p F is missing, emit a diagnostic to inform the user
   // about the missed opportunity.
   F.getContext().diagnose(DiagnosticInfoSampleProfile(
       "No debug information found in function " + F.getName() +
@@ -658,6 +1096,17 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
   return 0;
 }
 
+void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
+  DT.reset(new DominatorTree);
+  DT->recalculate(F);
+
+  PDT.reset(new DominatorTreeBase<BasicBlock>(true));
+  PDT->recalculate(F);
+
+  LI.reset(new LoopInfo);
+  LI->analyze(*DT);
+}
+
 /// \brief Generate branch weight metadata for all branches in \p F.
 ///
 /// Branch weights are computed out of instruction samples using a
@@ -710,18 +1159,23 @@ unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
 bool SampleProfileLoader::emitAnnotations(Function &F) {
   bool Changed = false;
 
-  // Initialize invariants used during computation and propagation.
-  HeaderLineno = getFunctionLoc(F);
-  if (HeaderLineno == 0)
+  if (getFunctionLoc(F) == 0)
     return false;
 
   DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
-               << ": " << HeaderLineno << "\n");
+               << ": " << getFunctionLoc(F) << "\n");
+
+  Changed |= emitInlineHints(F);
+
+  Changed |= inlineHotFunctions(F);
 
   // Compute basic block weights.
   Changed |= computeBlockWeights(F);
 
   if (Changed) {
+    // Compute dominance and loop info needed for propagation.
+    computeDominanceAndLoopInfo(F);
+
     // Find equivalence classes.
     findEquivalenceClasses(F);
 
@@ -729,24 +1183,48 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
     propagateWeights(F);
   }
 
+  // If coverage checking was requested, compute it now.
+  if (SampleProfileRecordCoverage) {
+    unsigned Used = CoverageTracker.countUsedRecords(Samples);
+    unsigned Total = CoverageTracker.countBodyRecords(Samples);
+    unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+    if (Coverage < SampleProfileRecordCoverage) {
+      F.getContext().diagnose(DiagnosticInfoSampleProfile(
+          getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+          Twine(Used) + " of " + Twine(Total) + " available profile records (" +
+              Twine(Coverage) + "%) were applied",
+          DS_Warning));
+    }
+  }
+
+  if (SampleProfileSampleCoverage) {
+    uint64_t Used = CoverageTracker.getTotalUsedSamples();
+    uint64_t Total = CoverageTracker.countBodySamples(Samples);
+    unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
+    if (Coverage < SampleProfileSampleCoverage) {
+      F.getContext().diagnose(DiagnosticInfoSampleProfile(
+          getDISubprogram(&F)->getFilename(), getFunctionLoc(F),
+          Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
+              Twine(Coverage) + "%) were applied",
+          DS_Warning));
+    }
+  }
   return Changed;
 }
 
 char SampleProfileLoader::ID = 0;
 INITIALIZE_PASS_BEGIN(SampleProfileLoader, "sample-profile",
                       "Sample Profile loader", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AddDiscriminators)
 INITIALIZE_PASS_END(SampleProfileLoader, "sample-profile",
                     "Sample Profile loader", false, false)
 
 bool SampleProfileLoader::doInitialization(Module &M) {
-  auto ReaderOrErr = SampleProfileReader::create(Filename, M.getContext());
+  auto &Ctx = M.getContext();
+  auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
   if (std::error_code EC = ReaderOrErr.getError()) {
     std::string Msg = "Could not open profile: " + EC.message();
-    M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg));
+    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
     return false;
   }
   Reader = std::move(ReaderOrErr.get());
@@ -754,22 +1232,32 @@ bool SampleProfileLoader::doInitialization(Module &M) {
   return true;
 }
 
-FunctionPass *llvm::createSampleProfileLoaderPass() {
+ModulePass *llvm::createSampleProfileLoaderPass() {
   return new SampleProfileLoader(SampleProfileFile);
 }
 
-FunctionPass *llvm::createSampleProfileLoaderPass(StringRef Name) {
+ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
   return new SampleProfileLoader(Name);
 }
 
-bool SampleProfileLoader::runOnFunction(Function &F) {
+bool SampleProfileLoader::runOnModule(Module &M) {
   if (!ProfileIsValid)
     return false;
 
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  PDT = &getAnalysis<PostDominatorTree>();
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  Ctx = &F.getParent()->getContext();
+  // Compute the total number of samples collected in this profile.
+  for (const auto &I : Reader->getProfiles())
+    TotalCollectedSamples += I.second.getTotalSamples();
+
+  bool retval = false;
+  for (auto &F : M)
+    if (!F.isDeclaration()) {
+      clearFunctionData();
+      retval |= runOnFunction(F);
+    }
+  return retval;
+}
+
+bool SampleProfileLoader::runOnFunction(Function &F) {
   Samples = Reader->getSamplesFor(F);
   if (!Samples->empty())
     return emitAnnotations(F);
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 956991ad1f95..c94cc7c74a89 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -7,47 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass loops over all of the functions in the input module, looking for 
+// This pass loops over all of the functions in the input module, looking for
 // dead declarations and removes them. Dead declarations are declarations of
 // functions for which no implementation is available (i.e., declarations for
 // unused library functions).
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/IPO.h"
+
 using namespace llvm;
 
 #define DEBUG_TYPE "strip-dead-prototypes"
 
 STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
 
-namespace {
-
-/// @brief Pass to remove unused function declarations.
-class StripDeadPrototypesPass : public ModulePass {
-public:
-  static char ID; // Pass identification, replacement for typeid
-  StripDeadPrototypesPass() : ModulePass(ID) {
-    initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
-  }
-  bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace
-
-char StripDeadPrototypesPass::ID = 0;
-INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
-                "Strip Unused Function Prototypes", false, false)
-
-bool StripDeadPrototypesPass::runOnModule(Module &M) {
+static bool stripDeadPrototypes(Module &M) {
   bool MadeChange = false;
-  
+
   // Erase dead function prototypes.
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
-    Function *F = I++;
+    Function *F = &*I++;
     // Function must be a prototype and unused.
     if (F->isDeclaration() && F->use_empty()) {
       F->eraseFromParent();
@@ -59,16 +43,42 @@ bool StripDeadPrototypesPass::runOnModule(Module &M) {
   // Erase dead global var prototypes.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ) {
-    GlobalVariable *GV = I++;
+    GlobalVariable *GV = &*I++;
     // Global must be a prototype and unused.
     if (GV->isDeclaration() && GV->use_empty())
       GV->eraseFromParent();
   }
-  
+
   // Return an indication of whether we changed anything or not.
   return MadeChange;
 }
 
-ModulePass *llvm::createStripDeadPrototypesPass() {
-  return new StripDeadPrototypesPass();
+PreservedAnalyses StripDeadPrototypesPass::run(Module &M) {
+  if (stripDeadPrototypes(M))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
+}
+
+namespace {
+
+class StripDeadPrototypesLegacyPass : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  StripDeadPrototypesLegacyPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) override {
+    return stripDeadPrototypes(M);
+  }
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesLegacyPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
+                "Strip Unused Function Prototypes", false, false)
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+  return new StripDeadPrototypesLegacyPass();
 }
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index a4f30c58f936..46f352f7f9f1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -211,13 +211,13 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
 
   for (Module::global_iterator I = M.global_begin(), E = M.global_end();
        I != E; ++I) {
-    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+    if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
       if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
   }
 
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
-    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+    if (I->hasLocalLinkage() && llvmUsedValues.count(&*I) == 0)
       if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
         I->setName("");     // Internal symbols can't participate in linkage
     StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
@@ -305,6 +305,12 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   SmallVector<Metadata *, 64> LiveSubprograms;
   DenseSet<const MDNode *> VisitedSet;
 
+  std::set<DISubprogram *> LiveSPs;
+  for (Function &F : M) {
+    if (DISubprogram *SP = F.getSubprogram())
+      LiveSPs.insert(SP);
+  }
+
   for (DICompileUnit *DIC : F.compile_units()) {
     // Create our live subprogram list.
     bool SubprogramChange = false;
@@ -314,7 +320,7 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
         continue;
 
       // If the function referenced by DISP is not null, the function is live.
-      if (DISP->getFunction())
+      if (LiveSPs.count(DISP))
         LiveSubprograms.push_back(DISP);
       else
         SubprogramChange = true;
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2d2c109f3243..6f49399f57bf 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1,4 +1,4 @@
-//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//===- InstCombineAddSub.cpp ------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,6 +17,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/PatternMatch.h"
+
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -67,17 +68,17 @@ namespace {
 
   private:
     bool insaneIntVal(int V) { return V > 4 || V < -4; }
-    APFloat *getFpValPtr(void)
+    APFloat *getFpValPtr()
       { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); }
-    const APFloat *getFpValPtr(void) const
+    const APFloat *getFpValPtr() const
       { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); }
 
-    const APFloat &getFpVal(void) const {
+    const APFloat &getFpVal() const {
       assert(IsFp && BufHasFpVal && "Incorret state");
       return *getFpValPtr();
     }
 
-    APFloat &getFpVal(void) {
+    APFloat &getFpVal() {
       assert(IsFp && BufHasFpVal && "Incorret state");
       return *getFpValPtr();
     }
@@ -92,8 +93,8 @@ namespace {
     // TODO: We should get rid of this function when APFloat can be constructed
     //       from an *SIGNED* integer.
     APFloat createAPFloatFromInt(const fltSemantics &Sem, int Val);
-  private:
 
+  private:
     bool IsFp;
 
     // True iff FpValBuf contains an instance of APFloat.
@@ -114,10 +115,10 @@ namespace {
   ///
   class FAddend {
   public:
-    FAddend() { Val = nullptr; }
+    FAddend() : Val(nullptr) {}
 
-    Value *getSymVal (void) const { return Val; }
-    const FAddendCoef &getCoef(void) const { return Coeff; }
+    Value *getSymVal() const { return Val; }
+    const FAddendCoef &getCoef() const { return Coeff; }
 
     bool isConstant() const { return Val == nullptr; }
     bool isZero() const { return Coeff.isZero(); }
@@ -182,7 +183,6 @@ namespace {
     InstCombiner::BuilderTy *Builder;
     Instruction *Instr;
 
-  private:
      // Debugging stuff are clustered here.
     #ifndef NDEBUG
       unsigned CreateInstrNum;
@@ -193,7 +193,8 @@ namespace {
       void incCreateInstNum() {}
     #endif
   };
-}
+
+} // anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //
@@ -602,7 +603,6 @@ Value *FAddCombine::simplify(Instruction *I) {
 }
 
 Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
-
   unsigned AddendNum = Addends.size();
   assert(AddendNum <= 4 && "Too many addends");
 
@@ -886,7 +886,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,
   return Op0ZeroPosition >= Op1OnePosition;
 }
 
-/// WillNotOverflowSignedAdd - Return true if we can prove that:
+/// Return true if we can prove that:
 ///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
 /// This basically requires proving that the add in the original type would not
 /// overflow to change the sign bit or have a carry out.
@@ -1118,8 +1118,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
       // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C,
       // transform them into (X + (signbit ^ C))
       if (XorRHS->getValue().isSignBit())
-          return BinaryOperator::CreateAdd(XorLHS,
-                                           ConstantExpr::getXor(XorRHS, CI));
+        return BinaryOperator::CreateAdd(XorLHS,
+                                         ConstantExpr::getXor(XorRHS, CI));
     }
   }
 
@@ -1421,7 +1421,6 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
-
 /// Optimize pointer differences into the same array into a size.  Consider:
 ///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
@@ -1589,7 +1588,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     }
   }
 
-
   {
     Value *Y;
     // X-(X+Y) == -Y    X-(Y+X) == -Y
@@ -1611,32 +1609,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(A, B);
   }
 
-  // (sub (select (a, c, b)), (select (a, d, b))) -> (select (a, (sub c, d), 0))
-  // (sub (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (sub c, d)))
-  if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
-    if (auto *SI1 = dyn_cast<SelectInst>(Op1)) {
-      if (SI0->getCondition() == SI1->getCondition()) {
-        if (Value *V = SimplifySubInst(
-                SI0->getFalseValue(), SI1->getFalseValue(), I.hasNoSignedWrap(),
-                I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
-          return SelectInst::Create(
-              SI0->getCondition(),
-              Builder->CreateSub(SI0->getTrueValue(), SI1->getTrueValue(), "",
-                                 /*HasNUW=*/I.hasNoUnsignedWrap(),
-                                 /*HasNSW=*/I.hasNoSignedWrap()),
-              V);
-        if (Value *V = SimplifySubInst(SI0->getTrueValue(), SI1->getTrueValue(),
-                                       I.hasNoSignedWrap(),
-                                       I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
-          return SelectInst::Create(
-              SI0->getCondition(), V,
-              Builder->CreateSub(SI0->getFalseValue(), SI1->getFalseValue(), "",
-                                 /*HasNUW=*/I.hasNoUnsignedWrap(),
-                                 /*HasNSW=*/I.hasNoSignedWrap()));
-      }
-    }
-  }
-
   if (Op0->hasOneUse()) {
     Value *Y = nullptr;
     // ((X | Y) - X) --> (~X & Y)
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 15e0889b51b7..95c50d32c820 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -37,9 +37,9 @@ static inline Value *dyn_castNotVal(Value *V) {
   return nullptr;
 }
 
-/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
-/// predicate into a three bit mask. It also returns whether it is an ordered
-/// predicate by reference.
+/// Similar to getICmpCode but for FCmpInst. This encodes a fcmp predicate into
+/// a three bit mask. It also returns whether it is an ordered predicate by
+/// reference.
 static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
   isOrdered = false;
   switch (CC) {
@@ -64,10 +64,10 @@ static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
   }
 }
 
-/// getNewICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or a brand
-/// new ICmp instruction. The sign is passed in to determine which kind
-/// of predicate to use in the new icmp instruction.
+/// This is the complement of getICmpCode, which turns an opcode and two
+/// operands into either a constant true or false, or a brand new ICmp
+/// instruction. The sign is passed in to determine which kind of predicate to
+/// use in the new icmp instruction.
 static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
                               InstCombiner::BuilderTy *Builder) {
   ICmpInst::Predicate NewPred;
@@ -76,9 +76,9 @@ static Value *getNewICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
   return Builder->CreateICmp(NewPred, LHS, RHS);
 }
 
-/// getFCmpValue - This is the complement of getFCmpCode, which turns an
-/// opcode and two operands into either a FCmp instruction. isordered is passed
-/// in to determine which kind of predicate to use in the new fcmp instruction.
+/// This is the complement of getFCmpCode, which turns an opcode and two
+/// operands into either a FCmp instruction. isordered is passed in to determine
+/// which kind of predicate to use in the new fcmp instruction.
 static Value *getFCmpValue(bool isordered, unsigned code,
                            Value *LHS, Value *RHS,
                            InstCombiner::BuilderTy *Builder) {
@@ -150,14 +150,13 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
   else //if (Op == Instruction::Xor)
     BinOp = Builder->CreateXor(NewLHS, NewRHS);
 
-  Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
   return Builder->CreateCall(F, BinOp);
 }
 
-// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
-// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
-// guaranteed to be a binary operator.
+/// This handles expressions of the form ((val OP C1) & C2).  Where
+/// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
+/// guaranteed to be a binary operator.
 Instruction *InstCombiner::OptAndOp(Instruction *Op,
                                     ConstantInt *OpRHS,
                                     ConstantInt *AndRHS,
@@ -341,10 +340,10 @@ Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
   return Builder->CreateICmpUGT(Add, LowerBound);
 }
 
-// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
-// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
-// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
-// not, since all 1s are not contiguous.
+/// Returns true iff Val consists of one contiguous run of 1s with any number
+/// of 0s on either side.  The 1s are allowed to wrap from LSB to MSB,
+/// so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
+/// not, since all 1s are not contiguous.
 static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
   const APInt& V = Val->getValue();
   uint32_t BitWidth = Val->getType()->getBitWidth();
@@ -357,9 +356,8 @@ static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
   return true;
 }
 
-/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
-/// where isSub determines whether the operator is a sub.  If we can fold one of
-/// the following xforms:
+/// This is part of an expression (LHS +/- RHS) & Mask, where isSub determines
+/// whether the operator is a sub. If we can fold one of the following xforms:
 ///
 /// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
 /// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
@@ -449,8 +447,8 @@ enum MaskedICmpType {
   FoldMskICmp_BMask_NotMixed          =   512
 };
 
-/// return the set of pattern classes (from MaskedICmpType)
-/// that (icmp SCC (A & B), C) satisfies
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies.
 static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
                                     ICmpInst::Predicate SCC)
 {
@@ -538,8 +536,8 @@ static unsigned conjugateICmpMask(unsigned Mask) {
   return NewMask;
 }
 
-/// decomposeBitTestICmp - Decompose an icmp into the form ((X & Y) pred Z)
-/// if possible. The returned predicate is either == or !=. Returns false if
+/// Decompose an icmp into the form ((X & Y) pred Z) if possible.
+/// The returned predicate is either == or !=. Returns false if
 /// decomposition fails.
 static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
                                  Value *&X, Value *&Y, Value *&Z) {
@@ -585,10 +583,9 @@ static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
   return true;
 }
 
-/// foldLogOpOfMaskedICmpsHelper:
-/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// return the set of pattern classes (from MaskedICmpType)
-/// that both LHS and RHS satisfy
+/// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// Return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy.
 static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
                                              Value*& B, Value*& C,
                                              Value*& D, Value*& E,
@@ -700,9 +697,9 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
   unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
   return left_type & right_type;
 }
-/// foldLogOpOfMaskedICmps:
-/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
-/// into a single (icmp(A & X) ==/!= Y)
+
+/// Try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y).
 static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
                                      llvm::InstCombiner::BuilderTy *Builder) {
   Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
@@ -879,7 +876,7 @@ Value *InstCombiner::simplifyRangeCheck(ICmpInst *Cmp0, ICmpInst *Cmp1,
   return Builder->CreateICmp(NewPred, Input, RangeEnd);
 }
 
-/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+/// Fold (icmp)&(icmp) if possible.
 Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
 
@@ -1123,9 +1120,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
   return nullptr;
 }
 
-/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
 Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
       RHS->getPredicate() == FCmpInst::FCMP_ORD) {
@@ -1203,6 +1199,54 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   return nullptr;
 }
 
+/// Match De Morgan's Laws:
+/// (~A & ~B) == (~(A | B))
+/// (~A | ~B) == (~(A & B))
+static Instruction *matchDeMorgansLaws(BinaryOperator &I,
+                                       InstCombiner::BuilderTy *Builder) {
+  auto Opcode = I.getOpcode();
+  assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+         "Trying to match De Morgan's Laws with something other than and/or");
+  // Flip the logic operation.
+  if (Opcode == Instruction::And)
+    Opcode = Instruction::Or;
+  else
+    Opcode = Instruction::And;
+
+  Value *Op0 = I.getOperand(0);
+  Value *Op1 = I.getOperand(1);
+  // TODO: Use pattern matchers instead of dyn_cast.
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *LogicOp = Builder->CreateBinOp(Opcode, Op0NotVal, Op1NotVal,
+                                              I.getName() + ".demorgan");
+        return BinaryOperator::CreateNot(LogicOp);
+      }
+
+  // De Morgan's Law in disguise:
+  // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B))
+  // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B))
+  Value *A = nullptr;
+  Value *B = nullptr;
+  ConstantInt *C1 = nullptr;
+  if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) &&
+      match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) {
+    // TODO: This check could be loosened to handle different type sizes.
+    // Alternatively, we could fix the definition of m_Not to recognize a not
+    // operation hidden by a zext?
+    if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) &&
+        C1->isOne()) {
+      Value *LogicOp = Builder->CreateBinOp(Opcode, A, B,
+                                            I.getName() + ".demorgan");
+      Value *Not = Builder->CreateNot(LogicOp);
+      return CastInst::CreateZExtOrBitCast(Not, I.getType());
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1273,6 +1317,10 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
           return BinaryOperator::CreateAnd(V, AndRHS);
 
+        // -x & 1 -> x & 1
+        if (AndRHSMask == 1 && match(Op0LHS, m_Zero()))
+          return BinaryOperator::CreateAnd(Op0RHS, AndRHS);
+
         // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
         // has 1's for all bits that the subtraction with A might affect.
         if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
@@ -1329,15 +1377,8 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return NV;
   }
 
-
-  // (~A & ~B) == (~(A | B)) - De Morgan's Law
-  if (Value *Op0NotVal = dyn_castNotVal(Op0))
-    if (Value *Op1NotVal = dyn_castNotVal(Op1))
-      if (Op0->hasOneUse() && Op1->hasOneUse()) {
-        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
-                                      I.getName()+".demorgan");
-        return BinaryOperator::CreateNot(Or);
-      }
+  if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+    return DeMorgan;
 
   {
     Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
@@ -1446,14 +1487,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
         return ReplaceInstUsesWith(I, Res);
 
 
-  // fold (and (cast A), (cast B)) -> (cast (and A, B))
-  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    Value *Op0COp = Op0C->getOperand(0);
+    Type *SrcTy = Op0COp->getType();
+    // fold (and (cast A), (cast B)) -> (cast (and A, B))
     if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
-      Type *SrcTy = Op0C->getOperand(0)->getType();
       if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
           SrcTy == Op1C->getOperand(0)->getType() &&
           SrcTy->isIntOrIntVectorTy()) {
-        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+        Value *Op1COp = Op1C->getOperand(0);
 
         // Only do this if the casts both really cause code to be generated.
         if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
@@ -1478,6 +1520,20 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       }
     }
 
+    // If we are masking off the sign bit of a floating-point value, convert
+    // this to the canonical fabs intrinsic call and cast back to integer.
+    // The backend should know how to optimize fabs().
+    // TODO: This transform should also apply to vectors.
+    ConstantInt *CI;
+    if (isa<BitCastInst>(Op0C) && SrcTy->isFloatingPointTy() &&
+        match(Op1, m_ConstantInt(CI)) && CI->isMaxValue(true)) {
+      Module *M = I.getModule();
+      Function *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, SrcTy);
+      Value *Call = Builder->CreateCall(Fabs, Op0COp, "fabs");
+      return CastInst::CreateBitOrPointerCast(Call, I.getType());
+    }
+  }
+
   {
     Value *X = nullptr;
     bool OpsSwapped = false;
@@ -1509,163 +1565,195 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
-/// CollectBSwapParts - Analyze the specified subexpression and see if it is
-/// capable of providing pieces of a bswap.  The subexpression provides pieces
-/// of a bswap if it is proven that each of the non-zero bytes in the output of
-/// the expression came from the corresponding "byte swapped" byte in some other
-/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
-/// we know that the expression deposits the low byte of %X into the high byte
-/// of the bswap result and that all other bytes are zero.  This expression is
-/// accepted, the high byte of ByteValues is set to X to indicate a correct
-/// match.
+
+/// Analyze the specified subexpression and see if it is capable of providing
+/// pieces of a bswap or bitreverse. The subexpression provides a potential
+/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in
+/// the output of the expression came from a corresponding bit in some other
+/// value. This function is recursive, and the end result is a mapping of
+/// (value, bitnumber) to bitnumber. It is the caller's responsibility to
+/// validate that all `value`s are identical and that the bitnumber to bitnumber
+/// mapping is correct for a bswap or bitreverse.
+///
+/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know
+/// that the expression deposits the low byte of %X into the high byte of the
+/// result and that all other bits are zero. This expression is accepted,
+/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7].
 ///
 /// This function returns true if the match was unsuccessful and false if so.
 /// On entry to the function the "OverallLeftShift" is a signed integer value
-/// indicating the number of bytes that the subexpression is later shifted.  For
+/// indicating the number of bits that the subexpression is later shifted.  For
 /// example, if the expression is later right shifted by 16 bits, the
-/// OverallLeftShift value would be -2 on entry.  This is used to specify which
-/// byte of ByteValues is actually being set.
+/// OverallLeftShift value would be -16 on entry.  This is used to specify which
+/// bits of BitValues are actually being set.
 ///
-/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
-/// byte is masked to zero by a user.  For example, in (X & 255), X will be
-/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
-/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
-/// always in the local (OverallLeftShift) coordinate space.
+/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
+/// bit is masked to zero by a user.  For example, in (X & 255), X will be
+/// processed with a bytemask of 255. BitMask is always in the local
+/// (OverallLeftShift) coordinate space.
 ///
-static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
-                              SmallVectorImpl<Value *> &ByteValues) {
+static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
+                            SmallVectorImpl<Value *> &BitValues,
+                            SmallVectorImpl<int> &BitProvenance) {
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     // If this is an or instruction, it may be an inner node of the bswap.
-    if (I->getOpcode() == Instruction::Or) {
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
-                               ByteValues) ||
-             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
-                               ByteValues);
-    }
+    if (I->getOpcode() == Instruction::Or)
+      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+                             BitValues, BitProvenance) ||
+             CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
+                             BitValues, BitProvenance);
 
-    // If this is a logical shift by a constant multiple of 8, recurse with
-    // OverallLeftShift and ByteMask adjusted.
+    // If this is a logical shift by a constant, recurse with OverallLeftShift
+    // and BitMask adjusted.
     if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
       unsigned ShAmt =
-        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
-      // Ensure the shift amount is defined and of a byte value.
-      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+          cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+      // Ensure the shift amount is defined.
+      if (ShAmt > BitValues.size())
         return true;
 
-      unsigned ByteShift = ShAmt >> 3;
+      unsigned BitShift = ShAmt;
       if (I->getOpcode() == Instruction::Shl) {
-        // X << 2 -> collect(X, +2)
-        OverallLeftShift += ByteShift;
-        ByteMask >>= ByteShift;
+        // X << C -> collect(X, +C)
+        OverallLeftShift += BitShift;
+        BitMask = BitMask.lshr(BitShift);
       } else {
-        // X >>u 2 -> collect(X, -2)
-        OverallLeftShift -= ByteShift;
-        ByteMask <<= ByteShift;
-        ByteMask &= (~0U >> (32-ByteValues.size()));
+        // X >>u C -> collect(X, -C)
+        OverallLeftShift -= BitShift;
+        BitMask = BitMask.shl(BitShift);
       }
 
-      if (OverallLeftShift >= (int)ByteValues.size()) return true;
-      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+      if (OverallLeftShift >= (int)BitValues.size())
+        return true;
+      if (OverallLeftShift <= -(int)BitValues.size())
+        return true;
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
-                               ByteValues);
+      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+                             BitValues, BitProvenance);
     }
 
-    // If this is a logical 'and' with a mask that clears bytes, clear the
-    // corresponding bytes in ByteMask.
+    // If this is a logical 'and' with a mask that clears bits, clear the
+    // corresponding bits in BitMask.
     if (I->getOpcode() == Instruction::And &&
         isa<ConstantInt>(I->getOperand(1))) {
-      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
-      unsigned NumBytes = ByteValues.size();
-      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+      unsigned NumBits = BitValues.size();
+      APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
       const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
 
-      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
-        // If this byte is masked out by a later operation, we don't care what
+      for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) {
+        // If this bit is masked out by a later operation, we don't care what
         // the and mask is.
-        if ((ByteMask & (1 << i)) == 0)
+        if (BitMask[i] == 0)
           continue;
 
-        // If the AndMask is all zeros for this byte, clear the bit.
-        APInt MaskB = AndMask & Byte;
+        // If the AndMask is zero for this bit, clear the bit.
+        APInt MaskB = AndMask & Bit;
         if (MaskB == 0) {
-          ByteMask &= ~(1U << i);
+          BitMask.clearBit(i);
           continue;
         }
 
-        // If the AndMask is not all ones for this byte, it's not a bytezap.
-        if (MaskB != Byte)
-          return true;
-
-        // Otherwise, this byte is kept.
+        // Otherwise, this bit is kept.
       }
 
-      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
-                               ByteValues);
+      return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
+                             BitValues, BitProvenance);
     }
   }
 
   // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
-  // the input value to the bswap.  Some observations: 1) if more than one byte
-  // is demanded from this input, then it could not be successfully assembled
-  // into a byteswap.  At least one of the two bytes would not be aligned with
-  // their ultimate destination.
-  if (!isPowerOf2_32(ByteMask)) return true;
-  unsigned InputByteNo = countTrailingZeros(ByteMask);
-
-  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
-  // is demanded, it needs to go into byte 0 of the result.  This means that the
-  // byte needs to be shifted until it lands in the right byte bucket.  The
-  // shift amount depends on the position: if the byte is coming from the high
-  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
-  // low part, it must be shifted left.
-  unsigned DestByteNo = InputByteNo + OverallLeftShift;
-  if (ByteValues.size()-1-DestByteNo != InputByteNo)
+  // the input value to the bswap/bitreverse. To be part of a bswap or
+  // bitreverse we must be demanding a contiguous range of bits from it.
+  unsigned InputBitLen = BitMask.countPopulation();
+  unsigned InputBitNo = BitMask.countTrailingZeros();
+  if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo !=
+      InputBitLen)
+    // Not a contiguous set range of bits!
     return true;
 
-  // If the destination byte value is already defined, the values are or'd
-  // together, which isn't a bswap (unless it's an or of the same bits).
-  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+  // We know we're moving a contiguous range of bits from the input to the
+  // output. Record which bits in the output came from which bits in the input.
+  unsigned DestBitNo = InputBitNo + OverallLeftShift;
+  for (unsigned I = 0; I < InputBitLen; ++I)
+    BitProvenance[DestBitNo + I] = InputBitNo + I;
+
+  // If the destination bit value is already defined, the values are or'd
+  // together, which isn't a bswap/bitreverse (unless it's an or of the same
+  // bits).
+  if (BitValues[DestBitNo] && BitValues[DestBitNo] != V)
     return true;
-  ByteValues[DestByteNo] = V;
+  for (unsigned I = 0; I < InputBitLen; ++I)
+    BitValues[DestBitNo + I] = V;
+
   return false;
 }
 
-/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
-/// If so, insert the new bswap intrinsic and return it.
-Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To,
+                                          unsigned BitWidth) {
+  if (From % 8 != To % 8)
+    return false;
+  // Convert from bit indices to byte indices and check for a byte reversal.
+  From >>= 3;
+  To >>= 3;
+  BitWidth >>= 3;
+  return From == BitWidth - To - 1;
+}
+
+static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To,
+                                               unsigned BitWidth) {
+  return From == BitWidth - To - 1;
+}
+
+/// Given an OR instruction, check to see if this is a bswap or bitreverse
+/// idiom. If so, insert the new intrinsic and return it.
+Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
   IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
-  if (!ITy || ITy->getBitWidth() % 16 ||
-      // ByteMask only allows up to 32-byte values.
-      ITy->getBitWidth() > 32*8)
-    return nullptr;   // Can only bswap pairs of bytes.  Can't do vectors.
-
-  /// ByteValues - For each byte of the result, we keep track of which value
-  /// defines each byte.
-  SmallVector<Value*, 8> ByteValues;
-  ByteValues.resize(ITy->getBitWidth()/8);
-
+  if (!ITy)
+    return nullptr;   // Can't do vectors.
+  unsigned BW = ITy->getBitWidth();
+  
+  /// We keep track of which bit (BitProvenance) inside which value (BitValues)
+  /// defines each bit in the result.
+  SmallVector<Value *, 8> BitValues(BW, nullptr);
+  SmallVector<int, 8> BitProvenance(BW, -1);
+  
   // Try to find all the pieces corresponding to the bswap.
-  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
-  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+  APInt BitMask = APInt::getAllOnesValue(BitValues.size());
+  if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance))
     return nullptr;
 
-  // Check to see if all of the bytes come from the same value.
-  Value *V = ByteValues[0];
-  if (!V) return nullptr;  // Didn't find a byte?  Must be zero.
+  // Check to see if all of the bits come from the same value.
+  Value *V = BitValues[0];
+  if (!V) return nullptr;  // Didn't find a bit?  Must be zero.
 
-  // Check to make sure that all of the bytes come from the same value.
-  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
-    if (ByteValues[i] != V)
-      return nullptr;
-  Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
+  if (!std::all_of(BitValues.begin(), BitValues.end(),
+                   [&](const Value *X) { return X == V; }))
+    return nullptr;
+
+  // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+  // only byteswap values with an even number of bytes.
+  bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;;
+  for (unsigned i = 0, e = BitValues.size(); i != e; ++i) {
+    OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW);
+    OKForBitReverse &=
+        bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW);
+  }
+
+  Intrinsic::ID Intrin;
+  if (OKForBSwap)
+    Intrin = Intrinsic::bswap;
+  else if (OKForBitReverse)
+    Intrin = Intrinsic::bitreverse;
+  else
+    return nullptr;
+
+  Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy);
   return CallInst::Create(F, V);
 }
 
-/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
-/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
-/// we can simplify this expression to "cond ? C : D or B".
+/// We have an expression of the form (A&C)|(B&D).  Check if A is (cond?-1:0)
+/// and either B or D is ~(cond?-1,0) or (cond?0,-1), then we can simplify this
+/// expression to "cond ? C : D or B".
 static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
                                          Value *C, Value *D) {
   // If A is not a select of -1/0, this cannot match.
@@ -1688,7 +1776,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
   return nullptr;
 }
 
-/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+/// Fold (icmp)|(icmp) if possible.
 Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
                                    Instruction *CxtI) {
   ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
@@ -1905,14 +1993,14 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
     case ICmpInst::ICMP_EQ:
       if (LHS->getOperand(0) == RHS->getOperand(0)) {
         // if LHSCst and RHSCst differ only by one bit:
-        // (A == C1 || A == C2) -> (A & ~(C1 ^ C2)) == C1
+        // (A == C1 || A == C2) -> (A | (C1 ^ C2)) == C2
         assert(LHSCst->getValue().ule(LHSCst->getValue()));
 
         APInt Xor = LHSCst->getValue() ^ RHSCst->getValue();
         if (Xor.isPowerOf2()) {
-          Value *NegCst = Builder->getInt(~Xor);
-          Value *And = Builder->CreateAnd(LHS->getOperand(0), NegCst);
-          return Builder->CreateICmp(ICmpInst::ICMP_EQ, And, LHSCst);
+          Value *Cst = Builder->getInt(Xor);
+          Value *Or = Builder->CreateOr(LHS->getOperand(0), Cst);
+          return Builder->CreateICmp(ICmpInst::ICMP_EQ, Or, RHSCst);
         }
       }
 
@@ -2020,9 +2108,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return nullptr;
 }
 
-/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of
-/// instcombine, this returns a Value which should already be inserted into the
-/// function.
+/// Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of instcombine, this returns
+/// a Value which should already be inserted into the function.
 Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
       RHS->getPredicate() == FCmpInst::FCMP_UNO &&
@@ -2080,7 +2167,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
   return nullptr;
 }
 
-/// FoldOrWithConstants - This helper function folds:
+/// This helper function folds:
 ///
 ///     ((A | B) & C1) | (B & C2)
 ///
@@ -2199,14 +2286,18 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   ConstantInt *C1 = nullptr, *C2 = nullptr;
 
   // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
+  bool OrOfOrs = match(Op0, m_Or(m_Value(), m_Value())) ||
+                 match(Op1, m_Or(m_Value(), m_Value()));
   // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
-  if (match(Op0, m_Or(m_Value(), m_Value())) ||
-      match(Op1, m_Or(m_Value(), m_Value())) ||
-      (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
-       match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
-    if (Instruction *BSwap = MatchBSwap(I))
+  bool OrOfShifts = match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+                    match(Op1, m_LogicalShift(m_Value(), m_Value()));
+  // (A & B) | (C & D)                              -> bswap if possible.
+  bool OrOfAnds = match(Op0, m_And(m_Value(), m_Value())) &&
+                  match(Op1, m_And(m_Value(), m_Value()));
+
+  if (OrOfOrs || OrOfShifts || OrOfAnds)
+    if (Instruction *BSwap = MatchBSwapOrBitReverse(I))
       return BSwap;
-  }
 
   // (X^C)|Y -> (X|Y)^C iff Y&C == 0
   if (Op0->hasOneUse() &&
@@ -2360,14 +2451,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (match(Op0, m_And(m_Or(m_Specific(Op1), m_Value(C)), m_Value(A))))
     return BinaryOperator::CreateOr(Op1, Builder->CreateAnd(A, C));
 
-  // (~A | ~B) == (~(A & B)) - De Morgan's Law
-  if (Value *Op0NotVal = dyn_castNotVal(Op0))
-    if (Value *Op1NotVal = dyn_castNotVal(Op1))
-      if (Op0->hasOneUse() && Op1->hasOneUse()) {
-        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
-                                        I.getName()+".demorgan");
-        return BinaryOperator::CreateNot(And);
-      }
+  if (Instruction *DeMorgan = matchDeMorgansLaws(I, Builder))
+    return DeMorgan;
 
   // Canonicalize xor to the RHS.
   bool SwappedForXor = false;
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 6de380bcad67..e3634f269cf5 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -67,8 +67,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   unsigned CopyAlign = MI->getAlignment();
 
   if (CopyAlign < MinAlign) {
-    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
-                                             MinAlign, false));
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), MinAlign, false));
     return MI;
   }
 
@@ -198,12 +197,140 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
   return nullptr;
 }
 
+static Value *SimplifyX86immshift(const IntrinsicInst &II,
+                                  InstCombiner::BuilderTy &Builder) {
+  bool LogicalShift = false;
+  bool ShiftLeft = false;
+
+  switch (II.getIntrinsicID()) {
+  default:
+    return nullptr;
+  case Intrinsic::x86_sse2_psra_d:
+  case Intrinsic::x86_sse2_psra_w:
+  case Intrinsic::x86_sse2_psrai_d:
+  case Intrinsic::x86_sse2_psrai_w:
+  case Intrinsic::x86_avx2_psra_d:
+  case Intrinsic::x86_avx2_psra_w:
+  case Intrinsic::x86_avx2_psrai_d:
+  case Intrinsic::x86_avx2_psrai_w:
+    LogicalShift = false; ShiftLeft = false;
+    break;
+  case Intrinsic::x86_sse2_psrl_d:
+  case Intrinsic::x86_sse2_psrl_q:
+  case Intrinsic::x86_sse2_psrl_w:
+  case Intrinsic::x86_sse2_psrli_d:
+  case Intrinsic::x86_sse2_psrli_q:
+  case Intrinsic::x86_sse2_psrli_w:
+  case Intrinsic::x86_avx2_psrl_d:
+  case Intrinsic::x86_avx2_psrl_q:
+  case Intrinsic::x86_avx2_psrl_w:
+  case Intrinsic::x86_avx2_psrli_d:
+  case Intrinsic::x86_avx2_psrli_q:
+  case Intrinsic::x86_avx2_psrli_w:
+    LogicalShift = true; ShiftLeft = false;
+    break;
+  case Intrinsic::x86_sse2_psll_d:
+  case Intrinsic::x86_sse2_psll_q:
+  case Intrinsic::x86_sse2_psll_w:
+  case Intrinsic::x86_sse2_pslli_d:
+  case Intrinsic::x86_sse2_pslli_q:
+  case Intrinsic::x86_sse2_pslli_w:
+  case Intrinsic::x86_avx2_psll_d:
+  case Intrinsic::x86_avx2_psll_q:
+  case Intrinsic::x86_avx2_psll_w:
+  case Intrinsic::x86_avx2_pslli_d:
+  case Intrinsic::x86_avx2_pslli_q:
+  case Intrinsic::x86_avx2_pslli_w:
+    LogicalShift = true; ShiftLeft = true;
+    break;
+  }
+  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
+
+  // Simplify if count is constant.
+  auto Arg1 = II.getArgOperand(1);
+  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
+  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
+  auto CInt = dyn_cast<ConstantInt>(Arg1);
+  if (!CAZ && !CDV && !CInt)
+    return nullptr;
+
+  APInt Count(64, 0);
+  if (CDV) {
+    // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
+    // operand to compute the shift amount.
+    auto VT = cast<VectorType>(CDV->getType());
+    unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
+    assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
+    unsigned NumSubElts = 64 / BitWidth;
+
+    // Concatenate the sub-elements to create the 64-bit value.
+    for (unsigned i = 0; i != NumSubElts; ++i) {
+      unsigned SubEltIdx = (NumSubElts - 1) - i;
+      auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
+      Count = Count.shl(BitWidth);
+      Count |= SubElt->getValue().zextOrTrunc(64);
+    }
+  }
+  else if (CInt)
+    Count = CInt->getValue();
+
+  auto Vec = II.getArgOperand(0);
+  auto VT = cast<VectorType>(Vec->getType());
+  auto SVT = VT->getElementType();
+  unsigned VWidth = VT->getNumElements();
+  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
+
+  // If shift-by-zero then just return the original value.
+  if (Count == 0)
+    return Vec;
+
+  // Handle cases when Shift >= BitWidth.
+  if (Count.uge(BitWidth)) {
+    // If LogicalShift - just return zero.
+    if (LogicalShift)
+      return ConstantAggregateZero::get(VT);
+
+    // If ArithmeticShift - clamp Shift to (BitWidth - 1).
+    Count = APInt(64, BitWidth - 1);
+  }
+
+  // Get a constant vector of the same type as the first operand.
+  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
+  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
+
+  if (ShiftLeft)
+    return Builder.CreateShl(Vec, ShiftVec);
+
+  if (LogicalShift)
+    return Builder.CreateLShr(Vec, ShiftVec);
+
+  return Builder.CreateAShr(Vec, ShiftVec);
+}
+
+static Value *SimplifyX86extend(const IntrinsicInst &II,
+                                InstCombiner::BuilderTy &Builder,
+                                bool SignExtend) {
+  VectorType *SrcTy = cast<VectorType>(II.getArgOperand(0)->getType());
+  VectorType *DstTy = cast<VectorType>(II.getType());
+  unsigned NumDstElts = DstTy->getNumElements();
+
+  // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
+  SmallVector<int, 8> ShuffleMask;
+  for (int i = 0; i != (int)NumDstElts; ++i)
+    ShuffleMask.push_back(i);
+
+  Value *SV = Builder.CreateShuffleVector(II.getArgOperand(0),
+                                          UndefValue::get(SrcTy), ShuffleMask);
+  return SignExtend ? Builder.CreateSExt(SV, DstTy)
+                    : Builder.CreateZExt(SV, DstTy);
+}
+
 static Value *SimplifyX86insertps(const IntrinsicInst &II,
                                   InstCombiner::BuilderTy &Builder) {
   if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
     VectorType *VecTy = cast<VectorType>(II.getType());
     assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
-    
+
     // The immediate permute control byte looks like this:
     //    [3:0] - zero mask for each 32-bit lane
     //    [5:4] - select one 32-bit destination lane
@@ -248,12 +375,202 @@ static Value *SimplifyX86insertps(const IntrinsicInst &II,
       // Replace the selected destination lane with the selected source lane.
       ShuffleMask[DestLane] = SourceLane + 4;
     }
-  
+
     return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
   }
   return nullptr;
 }
 
+/// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
+/// or conversion to a shuffle vector.
+static Value *SimplifyX86extrq(IntrinsicInst &II, Value *Op0,
+                               ConstantInt *CILength, ConstantInt *CIIndex,
+                               InstCombiner::BuilderTy &Builder) {
+  auto LowConstantHighUndef = [&](uint64_t Val) {
+    Type *IntTy64 = Type::getInt64Ty(II.getContext());
+    Constant *Args[] = {ConstantInt::get(IntTy64, Val),
+                        UndefValue::get(IntTy64)};
+    return ConstantVector::get(Args);
+  };
+
+  // See if we're dealing with constant values.
+  Constant *C0 = dyn_cast<Constant>(Op0);
+  ConstantInt *CI0 =
+      C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+         : nullptr;
+
+  // Attempt to constant fold.
+  if (CILength && CIIndex) {
+    // From AMD documentation: "The bit index and field length are each six
+    // bits in length other bits of the field are ignored."
+    APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
+    APInt APLength = CILength->getValue().zextOrTrunc(6);
+
+    unsigned Index = APIndex.getZExtValue();
+
+    // From AMD documentation: "a value of zero in the field length is
+    // defined as length of 64".
+    unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+    // From AMD documentation: "If the sum of the bit index + length field
+    // is greater than 64, the results are undefined".
+    unsigned End = Index + Length;
+
+    // Note that both field index and field length are 8-bit quantities.
+    // Since variables 'Index' and 'Length' are unsigned values
+    // obtained from zero-extending field index and field length
+    // respectively, their sum should never wrap around.
+    if (End > 64)
+      return UndefValue::get(II.getType());
+
+    // If we are inserting whole bytes, we can convert this to a shuffle.
+    // Lowering can recognize EXTRQI shuffle masks.
+    if ((Length % 8) == 0 && (Index % 8) == 0) {
+      // Convert bit indices to byte indices.
+      Length /= 8;
+      Index /= 8;
+
+      Type *IntTy8 = Type::getInt8Ty(II.getContext());
+      Type *IntTy32 = Type::getInt32Ty(II.getContext());
+      VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+      SmallVector<Constant *, 16> ShuffleMask;
+      for (int i = 0; i != (int)Length; ++i)
+        ShuffleMask.push_back(
+            Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
+      for (int i = Length; i != 8; ++i)
+        ShuffleMask.push_back(
+            Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+      for (int i = 8; i != 16; ++i)
+        ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+      Value *SV = Builder.CreateShuffleVector(
+          Builder.CreateBitCast(Op0, ShufTy),
+          ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
+      return Builder.CreateBitCast(SV, II.getType());
+    }
+
+    // Constant Fold - shift Index'th bit to lowest position and mask off
+    // Length bits.
+    if (CI0) {
+      APInt Elt = CI0->getValue();
+      Elt = Elt.lshr(Index).zextOrTrunc(Length);
+      return LowConstantHighUndef(Elt.getZExtValue());
+    }
+
+    // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
+    if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
+      Value *Args[] = {Op0, CILength, CIIndex};
+      Module *M = II.getModule();
+      Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
+      return Builder.CreateCall(F, Args);
+    }
+  }
+
+  // Constant Fold - extraction from zero is always {zero, undef}.
+  if (CI0 && CI0->equalsInt(0))
+    return LowConstantHighUndef(0);
+
+  return nullptr;
+}
+
+/// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
+/// folding or conversion to a shuffle vector.
+static Value *SimplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
+                                 APInt APLength, APInt APIndex,
+                                 InstCombiner::BuilderTy &Builder) {
+
+  // From AMD documentation: "The bit index and field length are each six bits
+  // in length other bits of the field are ignored."
+  APIndex = APIndex.zextOrTrunc(6);
+  APLength = APLength.zextOrTrunc(6);
+
+  // Attempt to constant fold.
+  unsigned Index = APIndex.getZExtValue();
+
+  // From AMD documentation: "a value of zero in the field length is
+  // defined as length of 64".
+  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
+
+  // From AMD documentation: "If the sum of the bit index + length field
+  // is greater than 64, the results are undefined".
+  unsigned End = Index + Length;
+
+  // Note that both field index and field length are 8-bit quantities.
+  // Since variables 'Index' and 'Length' are unsigned values
+  // obtained from zero-extending field index and field length
+  // respectively, their sum should never wrap around.
+  if (End > 64)
+    return UndefValue::get(II.getType());
+
+  // If we are inserting whole bytes, we can convert this to a shuffle.
+  // Lowering can recognize INSERTQI shuffle masks.
+  if ((Length % 8) == 0 && (Index % 8) == 0) {
+    // Convert bit indices to byte indices.
+    Length /= 8;
+    Index /= 8;
+
+    Type *IntTy8 = Type::getInt8Ty(II.getContext());
+    Type *IntTy32 = Type::getInt32Ty(II.getContext());
+    VectorType *ShufTy = VectorType::get(IntTy8, 16);
+
+    SmallVector<Constant *, 16> ShuffleMask;
+    for (int i = 0; i != (int)Index; ++i)
+      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+    for (int i = 0; i != (int)Length; ++i)
+      ShuffleMask.push_back(
+          Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+    for (int i = Index + Length; i != 8; ++i)
+      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+    for (int i = 8; i != 16; ++i)
+      ShuffleMask.push_back(UndefValue::get(IntTy32));
+
+    Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
+                                            Builder.CreateBitCast(Op1, ShufTy),
+                                            ConstantVector::get(ShuffleMask));
+    return Builder.CreateBitCast(SV, II.getType());
+  }
+
+  // See if we're dealing with constant values.
+  Constant *C0 = dyn_cast<Constant>(Op0);
+  Constant *C1 = dyn_cast<Constant>(Op1);
+  ConstantInt *CI00 =
+      C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+         : nullptr;
+  ConstantInt *CI10 =
+      C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+         : nullptr;
+
+  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
+  if (CI00 && CI10) {
+    APInt V00 = CI00->getValue();
+    APInt V10 = CI10->getValue();
+    APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
+    V00 = V00 & ~Mask;
+    V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
+    APInt Val = V00 | V10;
+    Type *IntTy64 = Type::getInt64Ty(II.getContext());
+    Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
+                        UndefValue::get(IntTy64)};
+    return ConstantVector::get(Args);
+  }
+
+  // If we were an INSERTQ call, we'll save demanded elements if we convert to
+  // INSERTQI.
+  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
+    Type *IntTy8 = Type::getInt8Ty(II.getContext());
+    Constant *CILength = ConstantInt::get(IntTy8, Length, false);
+    Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
+
+    Value *Args[] = {Op0, Op1, CILength, CIIndex};
+    Module *M = II.getModule();
+    Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+    return Builder.CreateCall(F, Args);
+  }
+
+  return nullptr;
+}
+
 /// The shuffle mask for a perm2*128 selects any two halves of two 256-bit
 /// source vectors, unless a zero bit is set. If a zero bit is set,
 /// then ignore that half of the mask and clear that half of the vector.
@@ -289,7 +606,7 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
     // The high bit of the selection field chooses the 1st or 2nd operand.
     bool LowInputSelect = Imm & 0x02;
     bool HighInputSelect = Imm & 0x20;
-    
+
     // The low bit of the selection field chooses the low or high half
     // of the selected operand.
     bool LowHalfSelect = Imm & 0x01;
@@ -298,11 +615,11 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
     // Determine which operand(s) are actually in use for this instruction.
     Value *V0 = LowInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
     Value *V1 = HighInputSelect ? II.getArgOperand(1) : II.getArgOperand(0);
-    
+
     // If needed, replace operands based on zero mask.
     V0 = LowHalfZero ? ZeroVector : V0;
     V1 = HighHalfZero ? ZeroVector : V1;
-    
+
     // Permute low half of result.
     unsigned StartIndex = LowHalfSelect ? HalfSize : 0;
     for (unsigned i = 0; i < HalfSize; ++i)
@@ -319,6 +636,43 @@ static Value *SimplifyX86vperm2(const IntrinsicInst &II,
   return nullptr;
 }
 
+/// Decode XOP integer vector comparison intrinsics.
+static Value *SimplifyX86vpcom(const IntrinsicInst &II,
+                               InstCombiner::BuilderTy &Builder, bool IsSigned) {
+  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
+    uint64_t Imm = CInt->getZExtValue() & 0x7;
+    VectorType *VecTy = cast<VectorType>(II.getType());
+    CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+
+    switch (Imm) {
+    case 0x0:
+      Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+      break;
+    case 0x1:
+      Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+      break;
+    case 0x2:
+      Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+      break;
+    case 0x3:
+      Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+      break;
+    case 0x4:
+      Pred = ICmpInst::ICMP_EQ; break;
+    case 0x5:
+      Pred = ICmpInst::ICMP_NE; break;
+    case 0x6:
+      return ConstantInt::getSigned(VecTy, 0); // FALSE
+    case 0x7:
+      return ConstantInt::getSigned(VecTy, -1); // TRUE
+    }
+
+    if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0), II.getArgOperand(1)))
+      return Builder.CreateSExtOrTrunc(Cmp, VecTy);
+  }
+  return nullptr;
+}
+
 /// visitCallInst - CallInst simplification.  This mostly only handles folding
 /// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
 /// the heavy lifting.
@@ -371,7 +725,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
       if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
         if (GVSrc->isConstant()) {
-          Module *M = CI.getParent()->getParent()->getParent();
+          Module *M = CI.getModule();
           Intrinsic::ID MemCpyID = Intrinsic::memcpy;
           Type *Tys[3] = { CI.getArgOperand(0)->getType(),
                            CI.getArgOperand(1)->getType(),
@@ -400,6 +754,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     if (Changed) return II;
   }
 
+  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width, unsigned DemandedWidth)
+  {
+    APInt UndefElts(Width, 0);
+    APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
+    return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
+  };
+
   switch (II->getIntrinsicID()) {
   default: break;
   case Intrinsic::objectsize: {
@@ -427,6 +788,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
+  case Intrinsic::bitreverse: {
+    Value *IIOperand = II->getArgOperand(0);
+    Value *X = nullptr;
+
+    // bitreverse(bitreverse(x)) -> x
+    if (match(IIOperand, m_Intrinsic<Intrinsic::bitreverse>(m_Value(X))))
+      return ReplaceInstUsesWith(CI, X);
+    break;
+  }
+
   case Intrinsic::powi:
     if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
       // powi(x, 0) -> 1.0
@@ -669,6 +1040,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return new StoreInst(II->getArgOperand(0), Ptr);
     }
     break;
+
   case Intrinsic::x86_sse_storeu_ps:
   case Intrinsic::x86_sse2_storeu_pd:
   case Intrinsic::x86_sse2_storeu_dq:
@@ -682,6 +1054,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
     break;
 
+  case Intrinsic::x86_vcvtph2ps_128:
+  case Intrinsic::x86_vcvtph2ps_256: {
+    auto Arg = II->getArgOperand(0);
+    auto ArgType = cast<VectorType>(Arg->getType());
+    auto RetType = cast<VectorType>(II->getType());
+    unsigned ArgWidth = ArgType->getNumElements();
+    unsigned RetWidth = RetType->getNumElements();
+    assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
+    assert(ArgType->isIntOrIntVectorTy() &&
+           ArgType->getScalarSizeInBits() == 16 &&
+           "CVTPH2PS input type should be 16-bit integer vector");
+    assert(RetType->getScalarType()->isFloatTy() &&
+           "CVTPH2PS output type should be 32-bit float vector");
+
+    // Constant folding: Convert to generic half to single conversion.
+    if (isa<ConstantAggregateZero>(Arg))
+      return ReplaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
+
+    if (isa<ConstantDataVector>(Arg)) {
+      auto VectorHalfAsShorts = Arg;
+      if (RetWidth < ArgWidth) {
+        SmallVector<int, 8> SubVecMask;
+        for (unsigned i = 0; i != RetWidth; ++i)
+          SubVecMask.push_back((int)i);
+        VectorHalfAsShorts = Builder->CreateShuffleVector(
+            Arg, UndefValue::get(ArgType), SubVecMask);
+      }
+
+      auto VectorHalfType =
+          VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
+      auto VectorHalfs =
+          Builder->CreateBitCast(VectorHalfAsShorts, VectorHalfType);
+      auto VectorFloats = Builder->CreateFPExt(VectorHalfs, RetType);
+      return ReplaceInstUsesWith(*II, VectorFloats);
+    }
+
+    // We only use the lowest lanes of the argument.
+    if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    break;
+  }
+
   case Intrinsic::x86_sse_cvtss2si:
   case Intrinsic::x86_sse_cvtss2si64:
   case Intrinsic::x86_sse_cvttss2si:
@@ -692,194 +1108,229 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_cvttsd2si64: {
     // These intrinsics only demand the 0th element of their input vectors. If
     // we can simplify the input based on that, do so now.
-    unsigned VWidth =
-      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
-    APInt DemandedElts(VWidth, 1);
-    APInt UndefElts(VWidth, 0);
-    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
-                                              DemandedElts, UndefElts)) {
+    Value *Arg = II->getArgOperand(0);
+    unsigned VWidth = Arg->getType()->getVectorNumElements();
+    if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
       II->setArgOperand(0, V);
       return II;
     }
     break;
   }
 
-  // Constant fold <A x Bi> << Ci.
-  // FIXME: We don't handle _dq because it's a shift of an i128, but is
-  // represented in the IR as <2 x i64>. A per element shift is wrong.
-  case Intrinsic::x86_sse2_psll_d:
-  case Intrinsic::x86_sse2_psll_q:
-  case Intrinsic::x86_sse2_psll_w:
-  case Intrinsic::x86_sse2_pslli_d:
-  case Intrinsic::x86_sse2_pslli_q:
-  case Intrinsic::x86_sse2_pslli_w:
-  case Intrinsic::x86_avx2_psll_d:
-  case Intrinsic::x86_avx2_psll_q:
-  case Intrinsic::x86_avx2_psll_w:
-  case Intrinsic::x86_avx2_pslli_d:
-  case Intrinsic::x86_avx2_pslli_q:
-  case Intrinsic::x86_avx2_pslli_w:
-  case Intrinsic::x86_sse2_psrl_d:
-  case Intrinsic::x86_sse2_psrl_q:
-  case Intrinsic::x86_sse2_psrl_w:
+  // Constant fold ashr( <A x Bi>, Ci ).
+  // Constant fold lshr( <A x Bi>, Ci ).
+  // Constant fold shl( <A x Bi>, Ci ).
+  case Intrinsic::x86_sse2_psrai_d:
+  case Intrinsic::x86_sse2_psrai_w:
+  case Intrinsic::x86_avx2_psrai_d:
+  case Intrinsic::x86_avx2_psrai_w:
   case Intrinsic::x86_sse2_psrli_d:
   case Intrinsic::x86_sse2_psrli_q:
   case Intrinsic::x86_sse2_psrli_w:
+  case Intrinsic::x86_avx2_psrli_d:
+  case Intrinsic::x86_avx2_psrli_q:
+  case Intrinsic::x86_avx2_psrli_w:
+  case Intrinsic::x86_sse2_pslli_d:
+  case Intrinsic::x86_sse2_pslli_q:
+  case Intrinsic::x86_sse2_pslli_w:
+  case Intrinsic::x86_avx2_pslli_d:
+  case Intrinsic::x86_avx2_pslli_q:
+  case Intrinsic::x86_avx2_pslli_w:
+    if (Value *V = SimplifyX86immshift(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
+  case Intrinsic::x86_sse2_psra_d:
+  case Intrinsic::x86_sse2_psra_w:
+  case Intrinsic::x86_avx2_psra_d:
+  case Intrinsic::x86_avx2_psra_w:
+  case Intrinsic::x86_sse2_psrl_d:
+  case Intrinsic::x86_sse2_psrl_q:
+  case Intrinsic::x86_sse2_psrl_w:
   case Intrinsic::x86_avx2_psrl_d:
   case Intrinsic::x86_avx2_psrl_q:
   case Intrinsic::x86_avx2_psrl_w:
-  case Intrinsic::x86_avx2_psrli_d:
-  case Intrinsic::x86_avx2_psrli_q:
-  case Intrinsic::x86_avx2_psrli_w: {
-    // Simplify if count is constant. To 0 if >= BitWidth,
-    // otherwise to shl/lshr.
-    auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
-    auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
-    if (!CDV && !CInt)
-      break;
-    ConstantInt *Count;
-    if (CDV)
-      Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
-    else
-      Count = CInt;
+  case Intrinsic::x86_sse2_psll_d:
+  case Intrinsic::x86_sse2_psll_q:
+  case Intrinsic::x86_sse2_psll_w:
+  case Intrinsic::x86_avx2_psll_d:
+  case Intrinsic::x86_avx2_psll_q:
+  case Intrinsic::x86_avx2_psll_w: {
+    if (Value *V = SimplifyX86immshift(*II, *Builder))
+      return ReplaceInstUsesWith(*II, V);
 
-    auto Vec = II->getArgOperand(0);
-    auto VT = cast<VectorType>(Vec->getType());
-    if (Count->getZExtValue() >
-        VT->getElementType()->getPrimitiveSizeInBits() - 1)
-      return ReplaceInstUsesWith(
-          CI, ConstantAggregateZero::get(Vec->getType()));
+    // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
+    // operand to compute the shift amount.
+    Value *Arg1 = II->getArgOperand(1);
+    assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
+           "Unexpected packed shift size");
+    unsigned VWidth = Arg1->getType()->getVectorNumElements();
 
-    bool isPackedShiftLeft = true;
-    switch (II->getIntrinsicID()) {
-    default : break;
-    case Intrinsic::x86_sse2_psrl_d:
-    case Intrinsic::x86_sse2_psrl_q:
-    case Intrinsic::x86_sse2_psrl_w:
-    case Intrinsic::x86_sse2_psrli_d:
-    case Intrinsic::x86_sse2_psrli_q:
-    case Intrinsic::x86_sse2_psrli_w:
-    case Intrinsic::x86_avx2_psrl_d:
-    case Intrinsic::x86_avx2_psrl_q:
-    case Intrinsic::x86_avx2_psrl_w:
-    case Intrinsic::x86_avx2_psrli_d:
-    case Intrinsic::x86_avx2_psrli_q:
-    case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
-    }
-
-    unsigned VWidth = VT->getNumElements();
-    // Get a constant vector of the same type as the first operand.
-    auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
-    if (isPackedShiftLeft)
-      return BinaryOperator::CreateShl(Vec,
-          Builder->CreateVectorSplat(VWidth, VTCI));
-
-    return BinaryOperator::CreateLShr(Vec,
-        Builder->CreateVectorSplat(VWidth, VTCI));
-  }
-
-  case Intrinsic::x86_sse41_pmovsxbw:
-  case Intrinsic::x86_sse41_pmovsxwd:
-  case Intrinsic::x86_sse41_pmovsxdq:
-  case Intrinsic::x86_sse41_pmovzxbw:
-  case Intrinsic::x86_sse41_pmovzxwd:
-  case Intrinsic::x86_sse41_pmovzxdq: {
-    // pmov{s|z}x ignores the upper half of their input vectors.
-    unsigned VWidth =
-      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
-    unsigned LowHalfElts = VWidth / 2;
-    APInt InputDemandedElts(APInt::getBitsSet(VWidth, 0, LowHalfElts));
-    APInt UndefElts(VWidth, 0);
-    if (Value *TmpV = SimplifyDemandedVectorElts(
-            II->getArgOperand(0), InputDemandedElts, UndefElts)) {
-      II->setArgOperand(0, TmpV);
+    if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
+      II->setArgOperand(1, V);
       return II;
     }
     break;
   }
+
+  case Intrinsic::x86_avx2_pmovsxbd:
+  case Intrinsic::x86_avx2_pmovsxbq:
+  case Intrinsic::x86_avx2_pmovsxbw:
+  case Intrinsic::x86_avx2_pmovsxdq:
+  case Intrinsic::x86_avx2_pmovsxwd:
+  case Intrinsic::x86_avx2_pmovsxwq:
+    if (Value *V = SimplifyX86extend(*II, *Builder, true))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
+  case Intrinsic::x86_sse41_pmovzxbd:
+  case Intrinsic::x86_sse41_pmovzxbq:
+  case Intrinsic::x86_sse41_pmovzxbw:
+  case Intrinsic::x86_sse41_pmovzxdq:
+  case Intrinsic::x86_sse41_pmovzxwd:
+  case Intrinsic::x86_sse41_pmovzxwq:
+  case Intrinsic::x86_avx2_pmovzxbd:
+  case Intrinsic::x86_avx2_pmovzxbq:
+  case Intrinsic::x86_avx2_pmovzxbw:
+  case Intrinsic::x86_avx2_pmovzxdq:
+  case Intrinsic::x86_avx2_pmovzxwd:
+  case Intrinsic::x86_avx2_pmovzxwq:
+    if (Value *V = SimplifyX86extend(*II, *Builder, false))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
   case Intrinsic::x86_sse41_insertps:
     if (Value *V = SimplifyX86insertps(*II, *Builder))
       return ReplaceInstUsesWith(*II, V);
     break;
-    
+
+  case Intrinsic::x86_sse4a_extrq: {
+    Value *Op0 = II->getArgOperand(0);
+    Value *Op1 = II->getArgOperand(1);
+    unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+    unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+    assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+           Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+           VWidth1 == 16 && "Unexpected operand sizes");
+
+    // See if we're dealing with constant values.
+    Constant *C1 = dyn_cast<Constant>(Op1);
+    ConstantInt *CILength =
+        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+           : nullptr;
+    ConstantInt *CIIndex =
+        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+           : nullptr;
+
+    // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
+    if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+
+    // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
+    // operands and the lowest 16-bits of the second.
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
+      II->setArgOperand(1, V);
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::x86_sse4a_extrqi: {
+    // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
+    // bits of the lower 64-bits. The upper 64-bits are undefined.
+    Value *Op0 = II->getArgOperand(0);
+    unsigned VWidth = Op0->getType()->getVectorNumElements();
+    assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+           "Unexpected operand size");
+
+    // See if we're dealing with constant values.
+    ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
+    ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
+
+    // Attempt to simplify to a constant or shuffle vector.
+    if (Value *V = SimplifyX86extrq(*II, Op0, CILength, CIIndex, *Builder))
+      return ReplaceInstUsesWith(*II, V);
+
+    // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
+    // operand.
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::x86_sse4a_insertq: {
+    Value *Op0 = II->getArgOperand(0);
+    Value *Op1 = II->getArgOperand(1);
+    unsigned VWidth = Op0->getType()->getVectorNumElements();
+    assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+           Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
+           Op1->getType()->getVectorNumElements() == 2 &&
+           "Unexpected operand size");
+
+    // See if we're dealing with constant values.
+    Constant *C1 = dyn_cast<Constant>(Op1);
+    ConstantInt *CI11 =
+        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+           : nullptr;
+
+    // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
+    if (CI11) {
+      APInt V11 = CI11->getValue();
+      APInt Len = V11.zextOrTrunc(6);
+      APInt Idx = V11.lshr(8).zextOrTrunc(6);
+      if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+        return ReplaceInstUsesWith(*II, V);
+    }
+
+    // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
+    // operand.
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    break;
+  }
+
   case Intrinsic::x86_sse4a_insertqi: {
-    // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
-    // ones undef
-    // TODO: eventually we should lower this intrinsic to IR
-    if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
-      if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
-        unsigned Index = CIStart->getZExtValue();
-        // From AMD documentation: "a value of zero in the field length is
-        // defined as length of 64".
-        unsigned Length = CIWidth->equalsInt(0) ? 64 : CIWidth->getZExtValue();
+    // INSERTQI: Extract lowest Length bits from lower half of second source and
+    // insert over first source starting at Index bit. The upper 64-bits are
+    // undefined.
+    Value *Op0 = II->getArgOperand(0);
+    Value *Op1 = II->getArgOperand(1);
+    unsigned VWidth0 = Op0->getType()->getVectorNumElements();
+    unsigned VWidth1 = Op1->getType()->getVectorNumElements();
+    assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
+           Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
+           VWidth1 == 2 && "Unexpected operand sizes");
 
-        // From AMD documentation: "If the sum of the bit index + length field
-        // is greater than 64, the results are undefined".
+    // See if we're dealing with constant values.
+    ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
+    ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
 
-        // Note that both field index and field length are 8-bit quantities.
-        // Since variables 'Index' and 'Length' are unsigned values
-        // obtained from zero-extending field index and field length
-        // respectively, their sum should never wrap around.
-        if ((Index + Length) > 64)
-          return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+    // Attempt to simplify to a constant or shuffle vector.
+    if (CILength && CIIndex) {
+      APInt Len = CILength->getValue().zextOrTrunc(6);
+      APInt Idx = CIIndex->getValue().zextOrTrunc(6);
+      if (Value *V = SimplifyX86insertq(*II, Op0, Op1, Len, Idx, *Builder))
+        return ReplaceInstUsesWith(*II, V);
+    }
 
-        if (Length == 64 && Index == 0) {
-          Value *Vec = II->getArgOperand(1);
-          Value *Undef = UndefValue::get(Vec->getType());
-          const uint32_t Mask[] = { 0, 2 };
-          return ReplaceInstUsesWith(
-              CI,
-              Builder->CreateShuffleVector(
-                  Vec, Undef, ConstantDataVector::get(
-                                  II->getContext(), makeArrayRef(Mask))));
+    // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
+    // operands.
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
 
-        } else if (auto Source =
-                       dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
-          if (Source->hasOneUse() &&
-              Source->getArgOperand(1) == II->getArgOperand(1)) {
-            // If the source of the insert has only one use and it's another
-            // insert (and they're both inserting from the same vector), try to
-            // bundle both together.
-            auto CISourceWidth =
-                dyn_cast<ConstantInt>(Source->getArgOperand(2));
-            auto CISourceStart =
-                dyn_cast<ConstantInt>(Source->getArgOperand(3));
-            if (CISourceStart && CISourceWidth) {
-              unsigned Start = CIStart->getZExtValue();
-              unsigned Width = CIWidth->getZExtValue();
-              unsigned End = Start + Width;
-              unsigned SourceStart = CISourceStart->getZExtValue();
-              unsigned SourceWidth = CISourceWidth->getZExtValue();
-              unsigned SourceEnd = SourceStart + SourceWidth;
-              unsigned NewStart, NewWidth;
-              bool ShouldReplace = false;
-              if (Start <= SourceStart && SourceStart <= End) {
-                NewStart = Start;
-                NewWidth = std::max(End, SourceEnd) - NewStart;
-                ShouldReplace = true;
-              } else if (SourceStart <= Start && Start <= SourceEnd) {
-                NewStart = SourceStart;
-                NewWidth = std::max(SourceEnd, End) - NewStart;
-                ShouldReplace = true;
-              }
-
-              if (ShouldReplace) {
-                Constant *ConstantWidth = ConstantInt::get(
-                    II->getArgOperand(2)->getType(), NewWidth, false);
-                Constant *ConstantStart = ConstantInt::get(
-                    II->getArgOperand(3)->getType(), NewStart, false);
-                Value *Args[4] = { Source->getArgOperand(0),
-                                   II->getArgOperand(1), ConstantWidth,
-                                   ConstantStart };
-                Module *M = CI.getParent()->getParent()->getParent();
-                Value *F =
-                    Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
-                return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
-              }
-            }
-          }
-        }
-      }
+    if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
+      II->setArgOperand(1, V);
+      return II;
     }
     break;
   }
@@ -894,7 +1345,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // This optimization is convoluted because the intrinsic is defined as
     // getting a vector of floats or doubles for the ps and pd versions.
     // FIXME: That should be changed.
+
+    Value *Op0 = II->getArgOperand(0);
+    Value *Op1 = II->getArgOperand(1);
     Value *Mask = II->getArgOperand(2);
+
+    // fold (blend A, A, Mask) -> A
+    if (Op0 == Op1)
+      return ReplaceInstUsesWith(CI, Op0);
+
+    // Zero Mask - select 1st argument.
+    if (isa<ConstantAggregateZero>(Mask))
+      return ReplaceInstUsesWith(CI, Op0);
+
+    // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
     if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
       auto Tyi1 = Builder->getInt1Ty();
       auto SelectorType = cast<VectorType>(Mask->getType());
@@ -917,11 +1381,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
       }
       auto NewSelector = ConstantVector::get(Selectors);
-      return SelectInst::Create(NewSelector, II->getArgOperand(1),
-                                II->getArgOperand(0), "blendv");
-    } else {
-      break;
+      return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
     }
+    break;
+  }
+
+  case Intrinsic::x86_ssse3_pshuf_b_128:
+  case Intrinsic::x86_avx2_pshuf_b: {
+    // Turn pshufb(V1,mask) -> shuffle(V1,Zero,mask) if mask is a constant.
+    auto *V = II->getArgOperand(1);
+    auto *VTy = cast<VectorType>(V->getType());
+    unsigned NumElts = VTy->getNumElements();
+    assert((NumElts == 16 || NumElts == 32) &&
+           "Unexpected number of elements in shuffle mask!");
+    // Initialize the resulting shuffle mask to all zeroes.
+    uint32_t Indexes[32] = {0};
+
+    if (auto *Mask = dyn_cast<ConstantDataVector>(V)) {
+      // Each byte in the shuffle control mask forms an index to permute the
+      // corresponding byte in the destination operand.
+      for (unsigned I = 0; I < NumElts; ++I) {
+        int8_t Index = Mask->getElementAsInteger(I);
+        // If the most significant bit (bit[7]) of each byte of the shuffle
+        // control mask is set, then zero is written in the result byte.
+        // The zero vector is in the right-hand side of the resulting
+        // shufflevector.
+
+        // The value of each index is the least significant 4 bits of the
+        // shuffle control byte.
+        Indexes[I] = (Index < 0) ? NumElts : Index & 0xF;
+      }
+    } else if (!isa<ConstantAggregateZero>(V))
+      break;
+
+    // The value of each index for the high 128-bit lane is the least
+    // significant 4 bits of the respective shuffle control byte.
+    for (unsigned I = 16; I < NumElts; ++I)
+      Indexes[I] += I & 0xF0;
+
+    auto NewC = ConstantDataVector::get(V->getContext(),
+                                        makeArrayRef(Indexes, NumElts));
+    auto V1 = II->getArgOperand(0);
+    auto V2 = Constant::getNullValue(II->getType());
+    auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
+    return ReplaceInstUsesWith(CI, Shuffle);
   }
 
   case Intrinsic::x86_avx_vpermilvar_ps:
@@ -972,6 +1475,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return ReplaceInstUsesWith(*II, V);
     break;
 
+  case Intrinsic::x86_xop_vpcomb:
+  case Intrinsic::x86_xop_vpcomd:
+  case Intrinsic::x86_xop_vpcomq:
+  case Intrinsic::x86_xop_vpcomw:
+    if (Value *V = SimplifyX86vpcom(*II, *Builder, true))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
+  case Intrinsic::x86_xop_vpcomub:
+  case Intrinsic::x86_xop_vpcomud:
+  case Intrinsic::x86_xop_vpcomuq:
+  case Intrinsic::x86_xop_vpcomuw:
+    if (Value *V = SimplifyX86vpcom(*II, *Builder, false))
+      return ReplaceInstUsesWith(*II, V);
+    break;
+
   case Intrinsic::ppc_altivec_vperm:
     // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
     // Note that ppc_altivec_vperm has a big-endian bias, so when creating
@@ -1115,15 +1634,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // happen when variable allocas are DCE'd.
     if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
       if (SS->getIntrinsicID() == Intrinsic::stacksave) {
-        BasicBlock::iterator BI = SS;
-        if (&*++BI == II)
+        if (&*++SS->getIterator() == II)
           return EraseInstFromFunction(CI);
       }
     }
 
     // Scan down this block to see if there is another stack restore in the
     // same block without an intervening call/alloca.
-    BasicBlock::iterator BI = II;
+    BasicBlock::iterator BI(II);
     TerminatorInst *TI = II->getParent()->getTerminator();
     bool CannotRemove = false;
     for (++BI; &*BI != TI; ++BI) {
@@ -1153,6 +1671,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return EraseInstFromFunction(CI);
     break;
   }
+  case Intrinsic::lifetime_start: {
+    // Remove trivially empty lifetime_start/end ranges, i.e. a start
+    // immediately followed by an end (ignoring debuginfo or other
+    // lifetime markers in between).
+    BasicBlock::iterator BI = II->getIterator(), BE = II->getParent()->end();
+    for (++BI; BI != BE; ++BI) {
+      if (IntrinsicInst *LTE = dyn_cast<IntrinsicInst>(BI)) {
+        if (isa<DbgInfoIntrinsic>(LTE) ||
+            LTE->getIntrinsicID() == Intrinsic::lifetime_start)
+          continue;
+        if (LTE->getIntrinsicID() == Intrinsic::lifetime_end) {
+          if (II->getOperand(0) == LTE->getOperand(0) &&
+              II->getOperand(1) == LTE->getOperand(1)) {
+            EraseInstFromFunction(*LTE);
+            return EraseInstFromFunction(*II);
+          }
+          continue;
+        }
+      }
+      break;
+    }
+    break;
+  }
   case Intrinsic::assume: {
     // Canonicalize assume(a && b) -> assume(a); assume(b);
     // Note: New assumption intrinsics created here are registered by
@@ -1233,7 +1774,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     }
 
     // isKnownNonNull -> nonnull attribute
-    if (isKnownNonNull(DerivedPtr))
+    if (isKnownNonNullAt(DerivedPtr, II, DT, TLI))
       II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
 
     // isDereferenceablePointer -> deref attribute
@@ -1355,9 +1896,10 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
                                                Value *TrampMem) {
   // Visit all the previous instructions in the basic block, and try to find a
   // init.trampoline which has a direct path to the adjust.trampoline.
-  for (BasicBlock::iterator I = AdjustTramp,
-       E = AdjustTramp->getParent()->begin(); I != E; ) {
-    Instruction *Inst = --I;
+  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
+                            E = AdjustTramp->getParent()->begin();
+       I != E;) {
+    Instruction *Inst = &*--I;
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
       if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
           II->getOperand(0) == TrampMem)
@@ -1400,20 +1942,27 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   // Mark any parameters that are known to be non-null with the nonnull
   // attribute.  This is helpful for inlining calls to functions with null
   // checks on their arguments.
+  SmallVector<unsigned, 4> Indices;
   unsigned ArgNo = 0;
+
   for (Value *V : CS.args()) {
-    if (!CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
-        isKnownNonNull(V)) {
-      AttributeSet AS = CS.getAttributes();
-      AS = AS.addAttribute(CS.getInstruction()->getContext(), ArgNo+1,
-                           Attribute::NonNull);
-      CS.setAttributes(AS);
-      Changed = true;
-    }
+    if (V->getType()->isPointerTy() && !CS.paramHasAttr(ArgNo+1, Attribute::NonNull) &&
+        isKnownNonNullAt(V, CS.getInstruction(), DT, TLI))
+      Indices.push_back(ArgNo + 1);
     ArgNo++;
   }
+
   assert(ArgNo == CS.arg_size() && "sanity check");
 
+  if (!Indices.empty()) {
+    AttributeSet AS = CS.getAttributes();
+    LLVMContext &Ctx = CS.getInstruction()->getContext();
+    AS = AS.addAttribute(Ctx, Indices,
+                         Attribute::get(Ctx, Attribute::NonNull));
+    CS.setAttributes(AS);
+    Changed = true;
+  }
+
   // If the callee is a pointer to a function, attempt to move any casts to the
   // arguments of the call/invoke.
   Value *Callee = CS.getCalledValue();
@@ -1725,16 +2274,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   const AttributeSet &NewCallerPAL = AttributeSet::get(Callee->getContext(),
                                                        attrVec);
 
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  CS.getOperandBundlesAsDefs(OpBundles);
+
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
-    NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
-                               II->getUnwindDest(), Args);
+    NC = Builder->CreateInvoke(Callee, II->getNormalDest(), II->getUnwindDest(),
+                               Args, OpBundles);
     NC->takeName(II);
     cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
     cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
   } else {
     CallInst *CI = cast<CallInst>(Caller);
-    NC = Builder->CreateCall(Callee, Args);
+    NC = Builder->CreateCall(Callee, Args, OpBundles);
     NC->takeName(CI);
     if (CI->isTailCall())
       cast<CallInst>(NC)->setTailCall();
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 48ab0eb2c1b9..da835a192322 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -21,11 +21,11 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
-/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
-/// expression.  If so, decompose it, returning some value X, such that Val is
+/// Analyze 'Val', seeing if it is a simple linear expression.
+/// If so, decompose it, returning some value X, such that Val is
 /// X*Scale+Offset.
 ///
-static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
                                         uint64_t &Offset) {
   if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
     Offset = CI->getZExtValue();
@@ -62,7 +62,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
         // where C1 is divisible by C2.
         unsigned SubScale;
         Value *SubVal =
-          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+          decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
         Offset += RHS->getZExtValue();
         Scale = SubScale;
         return SubVal;
@@ -76,14 +76,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
   return Val;
 }
 
-/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
-/// try to eliminate the cast by moving the type information into the alloc.
+/// If we find a cast of an allocation instruction, try to eliminate the cast by
+/// moving the type information into the alloc.
 Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
                                                    AllocaInst &AI) {
   PointerType *PTy = cast<PointerType>(CI.getType());
 
   BuilderTy AllocaBuilder(*Builder);
-  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+  AllocaBuilder.SetInsertPoint(&AI);
 
   // Get the type really allocated and the type casted to.
   Type *AllocElTy = AI.getAllocatedType();
@@ -114,7 +114,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   unsigned ArraySizeScale;
   uint64_t ArrayOffset;
   Value *NumElements = // See if the array size is a decomposable linear expr.
-    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+    decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
 
   // If we can now satisfy the modulus, by using a non-1 scale, we really can
   // do the xform.
@@ -154,9 +154,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
   return ReplaceInstUsesWith(CI, New);
 }
 
-/// EvaluateInDifferentType - Given an expression that
-/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
-/// insert the code to evaluate the expression.
+/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
+/// true for, actually insert the code to evaluate the expression.
 Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
                                              bool isSigned) {
   if (Constant *C = dyn_cast<Constant>(V)) {
@@ -261,9 +260,9 @@ isEliminableCastPair(const CastInst *CI, ///< First cast instruction
   return Instruction::CastOps(Res);
 }
 
-/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
-/// results in any code being generated and is interesting to optimize out. If
-/// the cast can be eliminated by some other simple transformation, we prefer
+/// Return true if the cast from "V to Ty" actually results in any code being
+/// generated and is interesting to optimize out.
+/// If the cast can be eliminated by some other simple transformation, we prefer
 /// to do the simplification first.
 bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
                                       Type *Ty) {
@@ -318,9 +317,9 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   return nullptr;
 }
 
-/// CanEvaluateTruncated - Return true if we can evaluate the specified
-/// expression tree as type Ty instead of its larger type, and arrive with the
-/// same value.  This is used by code that tries to eliminate truncates.
+/// Return true if we can evaluate the specified expression tree as type Ty
+/// instead of its larger type, and arrive with the same value.
+/// This is used by code that tries to eliminate truncates.
 ///
 /// Ty will always be a type smaller than V.  We should return true if trunc(V)
 /// can be computed by computing V in the smaller type.  If V is an instruction,
@@ -329,7 +328,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
 ///
 /// This function works on both vectors and scalars.
 ///
-static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
+static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
                                  Instruction *CxtI) {
   // We can always evaluate constants in another type.
   if (isa<Constant>(V))
@@ -359,8 +358,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
   case Instruction::Or:
   case Instruction::Xor:
     // These operators can all arbitrarily be extended or truncated.
-    return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
-           CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+    return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+           canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
 
   case Instruction::UDiv:
   case Instruction::URem: {
@@ -371,8 +370,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
       APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
       if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
           IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
-               CanEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
+        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
+               canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
       }
     }
     break;
@@ -383,7 +382,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
       uint32_t BitWidth = Ty->getScalarSizeInBits();
       if (CI->getLimitedValue(BitWidth) < BitWidth)
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
     }
     break;
   case Instruction::LShr:
@@ -396,7 +395,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
       if (IC.MaskedValueIsZero(I->getOperand(0),
             APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth), 0, CxtI) &&
           CI->getLimitedValue(BitWidth) < BitWidth) {
-        return CanEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
+        return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI);
       }
     }
     break;
@@ -410,8 +409,8 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     return true;
   case Instruction::Select: {
     SelectInst *SI = cast<SelectInst>(I);
-    return CanEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
-           CanEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
+    return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
+           canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
   }
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
@@ -419,7 +418,7 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
     for (Value *IncValue : PN->incoming_values())
-      if (!CanEvaluateTruncated(IncValue, Ty, IC, CxtI))
+      if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
         return false;
     return true;
   }
@@ -431,6 +430,50 @@ static bool CanEvaluateTruncated(Value *V, Type *Ty, InstCombiner &IC,
   return false;
 }
 
+/// Given a vector that is bitcast to an integer, optionally logically
+/// right-shifted, and truncated, convert it to an extractelement.
+/// Example (big endian):
+///   trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
+///   --->
+///   extractelement <4 x i32> %X, 1
+static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
+                                         const DataLayout &DL) {
+  Value *TruncOp = Trunc.getOperand(0);
+  Type *DestType = Trunc.getType();
+  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
+    return nullptr;
+
+  Value *VecInput = nullptr;
+  ConstantInt *ShiftVal = nullptr;
+  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
+                                  m_LShr(m_BitCast(m_Value(VecInput)),
+                                         m_ConstantInt(ShiftVal)))) ||
+      !isa<VectorType>(VecInput->getType()))
+    return nullptr;
+
+  VectorType *VecType = cast<VectorType>(VecInput->getType());
+  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
+
+  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
+    return nullptr;
+
+  // If the element type of the vector doesn't match the result type,
+  // bitcast it to a vector type that we can extract from.
+  unsigned NumVecElts = VecWidth / DestWidth;
+  if (VecType->getElementType() != DestType) {
+    VecType = VectorType::get(DestType, NumVecElts);
+    VecInput = IC.Builder->CreateBitCast(VecInput, VecType, "bc");
+  }
+
+  unsigned Elt = ShiftAmount / DestWidth;
+  if (DL.isBigEndian())
+    Elt = NumVecElts - 1 - Elt;
+
+  return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+}
+
 Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
@@ -441,7 +484,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   // min/max.
   Value *LHS, *RHS;
   if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
-    if (matchSelectPattern(SI, LHS, RHS) != SPF_UNKNOWN)
+    if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
       return nullptr;
   
   // See if we can simplify any instructions used by the input whose sole
@@ -457,7 +500,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   // expression tree to something weird like i93 unless the source is also
   // strange.
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateTruncated(Src, DestTy, *this, &CI)) {
+      canEvaluateTruncated(Src, DestTy, *this, &CI)) {
 
     // If this cast is a truncate, evaluting in a different type always
     // eliminates the cast, so it is always a win.
@@ -470,7 +513,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
 
   // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
   if (DestTy->getScalarSizeInBits() == 1) {
-    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Constant *One = ConstantInt::get(SrcTy, 1);
     Src = Builder->CreateAnd(Src, One);
     Value *Zero = Constant::getNullValue(Src->getType());
     return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
@@ -489,31 +532,54 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     // If the shift amount is larger than the size of A, then the result is
     // known to be zero because all the input bits got shifted out.
     if (Cst->getZExtValue() >= ASize)
-      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(DestTy));
 
     // Since we're doing an lshr and a zero extend, and know that the shift
     // amount is smaller than ASize, it is always safe to do the shift in A's
     // type, then zero extend or truncate to the result.
     Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
     Shift->takeName(Src);
-    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+    return CastInst::CreateIntegerCast(Shift, DestTy, false);
+  }
+
+  // Transform trunc(lshr (sext A), Cst) to ashr A, Cst to eliminate type
+  // conversion.
+  // It works because bits coming from sign extension have the same value as
+  // the sign bit of the original value; performing ashr instead of lshr
+  // generates bits of the same value as the sign bit.
+  if (Src->hasOneUse() &&
+      match(Src, m_LShr(m_SExt(m_Value(A)), m_ConstantInt(Cst))) &&
+      cast<Instruction>(Src)->getOperand(0)->hasOneUse()) {
+    const unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    // This optimization can be only performed when zero bits generated by
+    // the original lshr aren't pulled into the value after truncation, so we
+    // can only shift by values smaller than the size of destination type (in
+    // bits).
+    if (Cst->getValue().ult(ASize)) {
+      Value *Shift = Builder->CreateAShr(A, Cst->getZExtValue());
+      Shift->takeName(Src);
+      return CastInst::CreateIntegerCast(Shift, CI.getType(), true);
+    }
   }
 
   // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
   // type isn't non-native.
-  if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
-      ShouldChangeType(Src->getType(), CI.getType()) &&
+  if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
+      ShouldChangeType(SrcTy, DestTy) &&
       match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
-    Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+    Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
     return BinaryOperator::CreateAnd(NewTrunc,
-                                     ConstantExpr::getTrunc(Cst, CI.getType()));
+                                     ConstantExpr::getTrunc(Cst, DestTy));
   }
 
+  if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
+    return I;
+
   return nullptr;
 }
 
-/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
+/// the icmp.
 Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
                                              bool DoXform) {
   // If we are just checking for a icmp eq of a single bit and zext'ing it
@@ -637,8 +703,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
   return nullptr;
 }
 
-/// CanEvaluateZExtd - Determine if the specified value can be computed in the
-/// specified wider type and produce the same low bits.  If not, return false.
+/// Determine if the specified value can be computed in the specified wider type
+/// and produce the same low bits. If not, return false.
 ///
 /// If this function returns true, it can also return a non-zero number of bits
 /// (in BitsToClear) which indicates that the value it computes is correct for
@@ -655,7 +721,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 /// clear the top bits anyway, doing this has no extra cost.
 ///
 /// This function works on both vectors and scalars.
-static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
+static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
                              InstCombiner &IC, Instruction *CxtI) {
   BitsToClear = 0;
   if (isa<Constant>(V))
@@ -685,8 +751,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
-    if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
-        !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
+    if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
+        !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
       return false;
     // These can all be promoted if neither operand has 'bits to clear'.
     if (BitsToClear == 0 && Tmp == 0)
@@ -713,7 +779,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
     // We can promote shl(x, cst) if we can promote x.  Since shl overwrites the
     // upper bits we can reduce BitsToClear by the shift amount.
     if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+      if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
         return false;
       uint64_t ShiftAmt = Amt->getZExtValue();
       BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
@@ -724,7 +790,7 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
     // We can promote lshr(x, cst) if we can promote x.  This requires the
     // ultimate 'and' to clear out the high zero bits we're clearing out though.
     if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
-      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
+      if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
         return false;
       BitsToClear += Amt->getZExtValue();
       if (BitsToClear > V->getType()->getScalarSizeInBits())
@@ -734,8 +800,8 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
     // Cannot promote variable LSHR.
     return false;
   case Instruction::Select:
-    if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
-        !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
+    if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
+        !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
         // TODO: If important, we could handle the case when the BitsToClear are
         // known zero in the disagreeing side.
         Tmp != BitsToClear)
@@ -747,10 +813,10 @@ static bool CanEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
     // get into trouble with cyclic PHIs here because we only consider
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
-    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
+    if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
       return false;
     for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
+      if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
           // TODO: If important, we could handle the case when the BitsToClear
           // are known zero in the disagreeing input.
           Tmp != BitsToClear)
@@ -787,13 +853,13 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   // strange.
   unsigned BitsToClear;
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
+      canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
     assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
            "Unreasonable BitsToClear");
 
     // Okay, we can transform this!  Insert the new expression now.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid zero extend: " << CI);
+          " to avoid zero extend: " << CI << '\n');
     Value *Res = EvaluateInDifferentType(Src, DestTy, false);
     assert(Res->getType() == DestTy);
 
@@ -897,8 +963,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
   return nullptr;
 }
 
-/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
-/// in order to eliminate the icmp.
+/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
 Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
   Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
   ICmpInst::Predicate Pred = ICI->getPredicate();
@@ -985,15 +1050,14 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
   return nullptr;
 }
 
-/// CanEvaluateSExtd - Return true if we can take the specified value
-/// and return it as type Ty without inserting any new casts and without
-/// changing the value of the common low bits.  This is used by code that tries
-/// to promote integer operations to a wider types will allow us to eliminate
-/// the extension.
+/// Return true if we can take the specified value and return it as type Ty
+/// without inserting any new casts and without changing the value of the common
+/// low bits.  This is used by code that tries to promote integer operations to
+/// a wider types will allow us to eliminate the extension.
 ///
 /// This function works on both vectors and scalars.
 ///
-static bool CanEvaluateSExtd(Value *V, Type *Ty) {
+static bool canEvaluateSExtd(Value *V, Type *Ty) {
   assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
          "Can't sign extend type to a smaller type");
   // If this is a constant, it can be trivially promoted.
@@ -1023,15 +1087,15 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
   case Instruction::Sub:
   case Instruction::Mul:
     // These operators can all arbitrarily be extended if their inputs can.
-    return CanEvaluateSExtd(I->getOperand(0), Ty) &&
-           CanEvaluateSExtd(I->getOperand(1), Ty);
+    return canEvaluateSExtd(I->getOperand(0), Ty) &&
+           canEvaluateSExtd(I->getOperand(1), Ty);
 
   //case Instruction::Shl:   TODO
   //case Instruction::LShr:  TODO
 
   case Instruction::Select:
-    return CanEvaluateSExtd(I->getOperand(1), Ty) &&
-           CanEvaluateSExtd(I->getOperand(2), Ty);
+    return canEvaluateSExtd(I->getOperand(1), Ty) &&
+           canEvaluateSExtd(I->getOperand(2), Ty);
 
   case Instruction::PHI: {
     // We can change a phi if we can change all operands.  Note that we never
@@ -1039,7 +1103,7 @@ static bool CanEvaluateSExtd(Value *V, Type *Ty) {
     // instructions with a single use.
     PHINode *PN = cast<PHINode>(I);
     for (Value *IncValue : PN->incoming_values())
-      if (!CanEvaluateSExtd(IncValue, Ty)) return false;
+      if (!canEvaluateSExtd(IncValue, Ty)) return false;
     return true;
   }
   default:
@@ -1081,10 +1145,10 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   // expression tree to something weird like i93 unless the source is also
   // strange.
   if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
-      CanEvaluateSExtd(Src, DestTy)) {
+      canEvaluateSExtd(Src, DestTy)) {
     // Okay, we can transform this!  Insert the new expression now.
     DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
-          " to avoid sign extend: " << CI);
+          " to avoid sign extend: " << CI << '\n');
     Value *Res = EvaluateInDifferentType(Src, DestTy, true);
     assert(Res->getType() == DestTy);
 
@@ -1149,9 +1213,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
 }
 
 
-/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// Return a Constant* for the specified floating-point constant if it fits
 /// in the specified FP type without changing its value.
-static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+static Constant *fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
   bool losesInfo;
   APFloat F = CFP->getValueAPF();
   (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
@@ -1160,12 +1224,12 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
   return nullptr;
 }
 
-/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// If this is a floating-point extension instruction, look
 /// through it until we get the source value.
-static Value *LookThroughFPExtensions(Value *V) {
+static Value *lookThroughFPExtensions(Value *V) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (I->getOpcode() == Instruction::FPExt)
-      return LookThroughFPExtensions(I->getOperand(0));
+      return lookThroughFPExtensions(I->getOperand(0));
 
   // If this value is a constant, return the constant in the smallest FP type
   // that can accurately represent it.  This allows us to turn
@@ -1174,14 +1238,14 @@ static Value *LookThroughFPExtensions(Value *V) {
     if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
       return V;  // No constant folding of this.
     // See if the value can be truncated to half and then reextended.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEhalf))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
       return V;
     // See if the value can be truncated to float and then reextended.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
       return V;
     if (CFP->getType()->isDoubleTy())
       return V;  // Won't shrink.
-    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
       return V;
     // Don't try to shrink to various long double types.
   }
@@ -1193,7 +1257,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   if (Instruction *I = commonCastTransforms(CI))
     return I;
   // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
-  // simpilify this expression to avoid one or more of the trunc/extend
+  // simplify this expression to avoid one or more of the trunc/extend
   // operations if we can do so without changing the numerical results.
   //
   // The exact manner in which the widths of the operands interact to limit
@@ -1201,8 +1265,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   // is explained below in the various case statements.
   BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
   if (OpI && OpI->hasOneUse()) {
-    Value *LHSOrig = LookThroughFPExtensions(OpI->getOperand(0));
-    Value *RHSOrig = LookThroughFPExtensions(OpI->getOperand(1));
+    Value *LHSOrig = lookThroughFPExtensions(OpI->getOperand(0));
+    Value *RHSOrig = lookThroughFPExtensions(OpI->getOperand(1));
     unsigned OpWidth = OpI->getType()->getFPMantissaWidth();
     unsigned LHSWidth = LHSOrig->getType()->getFPMantissaWidth();
     unsigned RHSWidth = RHSOrig->getType()->getFPMantissaWidth();
@@ -1307,10 +1371,16 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
 
   // (fptrunc (select cond, R1, Cst)) -->
   // (select cond, (fptrunc R1), (fptrunc Cst))
+  //
+  //  - but only if this isn't part of a min/max operation, else we'll
+  // ruin min/max canonical form which is to have the select and
+  // compare's operands be of the same type with no casts to look through.
+  Value *LHS, *RHS;
   SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0));
   if (SI &&
       (isa<ConstantFP>(SI->getOperand(1)) ||
-       isa<ConstantFP>(SI->getOperand(2)))) {
+       isa<ConstantFP>(SI->getOperand(2))) &&
+      matchSelectPattern(SI, LHS, RHS).Flavor == SPF_UNKNOWN) {
     Value *LHSTrunc = Builder->CreateFPTrunc(SI->getOperand(1),
                                              CI.getType());
     Value *RHSTrunc = Builder->CreateFPTrunc(SI->getOperand(2),
@@ -1327,9 +1397,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
         Value *InnerTrunc = Builder->CreateFPTrunc(II->getArgOperand(0),
                                                    CI.getType());
         Type *IntrinsicType[] = { CI.getType() };
-        Function *Overload =
-          Intrinsic::getDeclaration(CI.getParent()->getParent()->getParent(),
-                                    II->getIntrinsicID(), IntrinsicType);
+        Function *Overload = Intrinsic::getDeclaration(
+            CI.getModule(), II->getIntrinsicID(), IntrinsicType);
 
         Value *Args[] = { InnerTrunc };
         return CallInst::Create(Overload, Args, II->getName());
@@ -1483,12 +1552,12 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
   return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
 }
 
-/// OptimizeVectorResize - This input value (which is known to have vector type)
-/// is being zero extended or truncated to the specified vector type.  Try to
-/// replace it with a shuffle (and vector/vector bitcast) if possible.
+/// This input value (which is known to have vector type) is being zero extended
+/// or truncated to the specified vector type.
+/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
 ///
 /// The source and destination vector types may have different element types.
-static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
+static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
                                          InstCombiner &IC) {
   // We can only do this optimization if the output is a multiple of the input
   // element size, or the input is a multiple of the output element size.
@@ -1548,8 +1617,8 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
   return Value / Ty->getPrimitiveSizeInBits();
 }
 
-/// CollectInsertionElements - V is a value which is inserted into a vector of
-/// VecEltTy.  Look through the value to see if we can decompose it into
+/// V is a value which is inserted into a vector of VecEltTy.
+/// Look through the value to see if we can decompose it into
 /// insertions into the vector.  See the example in the comment for
 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
 /// The type of V is always a non-zero multiple of VecEltTy's size.
@@ -1558,7 +1627,7 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
 ///
 /// This returns false if the pattern can't be matched or true if it can,
 /// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned Shift,
+static bool collectInsertionElements(Value *V, unsigned Shift,
                                      SmallVectorImpl<Value *> &Elements,
                                      Type *VecEltTy, bool isBigEndian) {
   assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
@@ -1595,7 +1664,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
     // If the constant is the size of a vector element, we just need to bitcast
     // it to the right type so it gets properly inserted.
     if (NumElts == 1)
-      return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+      return collectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
                                       Shift, Elements, VecEltTy, isBigEndian);
 
     // Okay, this is a constant that covers multiple elements.  Slice it up into
@@ -1611,7 +1680,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
       Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
                                                                   ShiftI));
       Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
-      if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
+      if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
                                     isBigEndian))
         return false;
     }
@@ -1625,19 +1694,19 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
   switch (I->getOpcode()) {
   default: return false; // Unhandled case.
   case Instruction::BitCast:
-    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+    return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
                                     isBigEndian);
   case Instruction::ZExt:
     if (!isMultipleOfTypeSize(
                           I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                               VecEltTy))
       return false;
-    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+    return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
                                     isBigEndian);
   case Instruction::Or:
-    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+    return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
                                     isBigEndian) &&
-           CollectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
+           collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
                                     isBigEndian);
   case Instruction::Shl: {
     // Must be shifting by a constant that is a multiple of the element size.
@@ -1645,7 +1714,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
     if (!CI) return false;
     Shift += CI->getZExtValue();
     if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
-    return CollectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
+    return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
                                     isBigEndian);
   }
 
@@ -1653,8 +1722,8 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
 }
 
 
-/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
-/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// If the input is an 'or' instruction, we may be doing shifts and ors to
+/// assemble the elements of the vector manually.
 /// Try to rip the code out and replace it with insertelements.  This is to
 /// optimize code like this:
 ///
@@ -1667,13 +1736,13 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
 ///    %tmp43 = bitcast i64 %ins35 to <2 x float>
 ///
 /// Into two insertelements that do "buildvector{%inc, %inc5}".
-static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
                                                 InstCombiner &IC) {
   VectorType *DestVecTy = cast<VectorType>(CI.getType());
   Value *IntInput = CI.getOperand(0);
 
   SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
-  if (!CollectInsertionElements(IntInput, 0, Elements,
+  if (!collectInsertionElements(IntInput, 0, Elements,
                                 DestVecTy->getElementType(),
                                 IC.getDataLayout().isBigEndian()))
     return nullptr;
@@ -1692,63 +1761,29 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
   return Result;
 }
 
-
-/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
-/// bitcast.  The various long double bitcasts can't get in here.
-static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI, InstCombiner &IC,
+/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
+/// vector followed by extract element. The backend tends to handle bitcasts of
+/// vectors better than bitcasts of scalars because vector registers are
+/// usually not type-specific like scalar integer or scalar floating-point.
+static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
+                                              InstCombiner &IC,
                                               const DataLayout &DL) {
-  Value *Src = CI.getOperand(0);
-  Type *DestTy = CI.getType();
+  // TODO: Create and use a pattern matcher for ExtractElementInst.
+  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+  if (!ExtElt || !ExtElt->hasOneUse())
+    return nullptr;
 
-  // If this is a bitcast from int to float, check to see if the int is an
-  // extraction from a vector.
-  Value *VecInput = nullptr;
-  // bitcast(trunc(bitcast(somevector)))
-  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+  // The bitcast must be to a vectorizable type, otherwise we can't make a new
+  // type to extract from.
+  Type *DestType = BitCast.getType();
+  if (!VectorType::isValidElementType(DestType))
+    return nullptr;
 
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
-
-      unsigned Elt = 0;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
-
-  // bitcast(trunc(lshr(bitcast(somevector), cst))
-  ConstantInt *ShAmt = nullptr;
-  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
-                                m_ConstantInt(ShAmt)))) &&
-      isa<VectorType>(VecInput->getType())) {
-    VectorType *VecTy = cast<VectorType>(VecInput->getType());
-    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
-    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
-        ShAmt->getZExtValue() % DestWidth == 0) {
-      // If the element type of the vector doesn't match the result type,
-      // bitcast it to be a vector type we can extract from.
-      if (VecTy->getElementType() != DestTy) {
-        VecTy = VectorType::get(DestTy,
-                                VecTy->getPrimitiveSizeInBits() / DestWidth);
-        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
-      }
-
-      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
-      if (DL.isBigEndian())
-        Elt = VecTy->getPrimitiveSizeInBits() / DestWidth - 1 - Elt;
-      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
-    }
-  }
-  return nullptr;
+  unsigned NumElts = ExtElt->getVectorOperandType()->getNumElements();
+  auto *NewVecType = VectorType::get(DestType, NumElts);
+  auto *NewBC = IC.Builder->CreateBitCast(ExtElt->getVectorOperand(),
+                                          NewVecType, "bc");
+  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
 }
 
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
@@ -1794,11 +1829,6 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     }
   }
 
-  // Try to optimize int -> float bitcasts.
-  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
-    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this, DL))
-      return I;
-
   if (VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
     if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
       Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
@@ -1815,7 +1845,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
         CastInst *SrcCast = cast<CastInst>(Src);
         if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
           if (isa<VectorType>(BCIn->getOperand(0)->getType()))
-            if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+            if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
                                                cast<VectorType>(DestTy), *this))
               return I;
       }
@@ -1823,7 +1853,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
       // If the input is an 'or' instruction, we may be doing shifts and ors to
       // assemble the elements of the vector manually.  Try to rip the code out
       // and replace it with insertelements.
-      if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+      if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
         return ReplaceInstUsesWith(CI, V);
     }
   }
@@ -1872,6 +1902,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     }
   }
 
+  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
+    return I;
+
   if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 95bba3c7af7d..c0786afe965e 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -216,8 +216,6 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
   Max = KnownOne|UnknownBits;
 }
 
-
-
 /// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
 ///   cmp pred (load (gep GV, ...)), cmpcst
 /// where GV is a global variable with a constant initializer.  Try to simplify
@@ -371,7 +369,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
       }
     }
 
-
     // If this element is in range, update our magic bitvector.
     if (i < 64 && IsTrueForElt)
       MagicBitvector |= 1ULL << i;
@@ -469,7 +466,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
     return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
   }
 
-
   // If a magic bitvector captures the entire comparison state
   // of this load, replace it with computation that does:
   //   ((magic_cst >> i) & 1) != 0
@@ -496,7 +492,6 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
   return nullptr;
 }
 
-
 /// EvaluateGEPOffsetExpression - Return a value that can be used to compare
 /// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
 /// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
@@ -562,8 +557,6 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
     }
   }
 
-
-
   // Okay, we know we have a single variable index, which must be a
   // pointer/array/vector index.  If there is no offset, life is simple, return
   // the index.
@@ -737,6 +730,83 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
   return nullptr;
 }
 
+Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
+                                         Value *Other) {
+  assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
+
+  // It would be tempting to fold away comparisons between allocas and any
+  // pointer not based on that alloca (e.g. an argument). However, even
+  // though such pointers cannot alias, they can still compare equal.
+  //
+  // But LLVM doesn't specify where allocas get their memory, so if the alloca
+  // doesn't escape we can argue that it's impossible to guess its value, and we
+  // can therefore act as if any such guesses are wrong.
+  //
+  // The code below checks that the alloca doesn't escape, and that it's only
+  // used in a comparison once (the current instruction). The
+  // single-comparison-use condition ensures that we're trivially folding all
+  // comparisons against the alloca consistently, and avoids the risk of
+  // erroneously folding a comparison of the pointer with itself.
+
+  unsigned MaxIter = 32; // Break cycles and bound to constant-time.
+
+  SmallVector<Use *, 32> Worklist;
+  for (Use &U : Alloca->uses()) {
+    if (Worklist.size() >= MaxIter)
+      return nullptr;
+    Worklist.push_back(&U);
+  }
+
+  unsigned NumCmps = 0;
+  while (!Worklist.empty()) {
+    assert(Worklist.size() <= MaxIter);
+    Use *U = Worklist.pop_back_val();
+    Value *V = U->getUser();
+    --MaxIter;
+
+    if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
+        isa<SelectInst>(V)) {
+      // Track the uses.
+    } else if (isa<LoadInst>(V)) {
+      // Loading from the pointer doesn't escape it.
+      continue;
+    } else if (auto *SI = dyn_cast<StoreInst>(V)) {
+      // Storing *to* the pointer is fine, but storing the pointer escapes it.
+      if (SI->getValueOperand() == U->get())
+        return nullptr;
+      continue;
+    } else if (isa<ICmpInst>(V)) {
+      if (NumCmps++)
+        return nullptr; // Found more than one cmp.
+      continue;
+    } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
+      switch (Intrin->getIntrinsicID()) {
+        // These intrinsics don't escape or compare the pointer. Memset is safe
+        // because we don't allow ptrtoint. Memcpy and memmove are safe because
+        // we don't allow stores, so src cannot point to V.
+        case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
+        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+        case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
+          continue;
+        default:
+          return nullptr;
+      }
+    } else {
+      return nullptr;
+    }
+    for (Use &U : V->uses()) {
+      if (Worklist.size() >= MaxIter)
+        return nullptr;
+      Worklist.push_back(&U);
+    }
+  }
+
+  Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
+  return ReplaceInstUsesWith(
+      ICI,
+      ConstantInt::get(CmpTy, !CmpInst::isTrueWhenEqual(ICI.getPredicate())));
+}
+
 /// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
 Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
                                             Value *X, ConstantInt *CI,
@@ -851,7 +921,6 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
       // to the same result value.
       HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
     }
-
   } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
     if (CmpRHSV == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
@@ -996,7 +1065,6 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
     return Res;
   }
 
-
   // If we are comparing against bits always shifted out, the
   // comparison cannot succeed.
   APInt Comp = CmpRHSV << ShAmtVal;
@@ -1074,18 +1142,22 @@ Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
   if (AP1 == AP2)
     return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
 
-  // Get the distance between the highest bit that's set.
   int Shift;
-  // Both the constants are negative, take their positive to calculate log.
   if (IsAShr && AP1.isNegative())
-    // Get the ones' complement of AP2 and AP1 when computing the distance.
-    Shift = (~AP2).logBase2() - (~AP1).logBase2();
+    Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
   else
-    Shift = AP2.logBase2() - AP1.logBase2();
+    Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
 
   if (Shift > 0) {
-    if (IsAShr ? AP1 == AP2.ashr(Shift) : AP1 == AP2.lshr(Shift))
+    if (IsAShr && AP1 == AP2.ashr(Shift)) {
+      // There are multiple solutions if we are comparing against -1 and the LHS
+      // of the ashr is not a power of two.
+      if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+        return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
       return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+    } else if (AP1 == AP2.lshr(Shift)) {
+      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+    }
   }
   // Shifting const2 will never be equal to const1.
   return getConstant(false);
@@ -1145,6 +1217,14 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
 
   switch (LHSI->getOpcode()) {
   case Instruction::Trunc:
+    if (RHS->isOne() && RHSV.getBitWidth() > 1) {
+      // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
+      Value *V = nullptr;
+      if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+          match(LHSI->getOperand(0), m_Signum(m_Value(V))))
+        return new ICmpInst(ICmpInst::ICMP_SLT, V,
+                            ConstantInt::get(V->getType(), 1));
+    }
     if (ICI.isEquality() && LHSI->hasOneUse()) {
       // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
       // of the high bits truncated out of x are known.
@@ -1447,9 +1527,35 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
           ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
                                                   : ICmpInst::ICMP_ULE,
           LHSI->getOperand(0), SubOne(RHS));
+
+    // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
+    //   iff C is a power of 2
+    if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
+      if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+        const APInt &AI = CI->getValue();
+        int32_t ExactLogBase2 = AI.exactLogBase2();
+        if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+          Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
+          Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
+          return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
+                                  ? ICmpInst::ICMP_SGE
+                                  : ICmpInst::ICMP_SLT,
+                              Trunc, Constant::getNullValue(NTy));
+        }
+      }
+    }
     break;
 
   case Instruction::Or: {
+    if (RHS->isOne()) {
+      // icmp slt signum(V) 1 --> icmp slt V, 1
+      Value *V = nullptr;
+      if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
+          match(LHSI, m_Signum(m_Value(V))))
+        return new ICmpInst(ICmpInst::ICMP_SLT, V,
+                            ConstantInt::get(V->getType(), 1));
+    }
+
     if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
       break;
     Value *P, *Q;
@@ -2083,11 +2189,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // If the pattern matches, truncate the inputs to the narrower type and
   // use the sadd_with_overflow intrinsic to efficiently compute both the
   // result and the overflow bit.
-  Module *M = I.getParent()->getParent()->getParent();
-
   Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
-                                       NewType);
+  Value *F = Intrinsic::getDeclaration(I.getModule(),
+                                       Intrinsic::sadd_with_overflow, NewType);
 
   InstCombiner::BuilderTy *Builder = IC.Builder;
 
@@ -2123,6 +2227,12 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
     return true;
   };
 
+  // If the overflow check was an add followed by a compare, the insertion point
+  // may be pointing to the compare.  We want to insert the new instructions
+  // before the add in case there are uses of the add between the add and the
+  // compare.
+  Builder->SetInsertPoint(&OrigI);
+
   switch (OCF) {
   case OCF_INVALID:
     llvm_unreachable("bad overflow check kind!");
@@ -2223,7 +2333,9 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
 
   assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
   assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
-  Instruction *MulInstr = cast<Instruction>(MulVal);
+  auto *MulInstr = dyn_cast<Instruction>(MulVal);
+  if (!MulInstr)
+    return nullptr;
   assert(MulInstr->getOpcode() == Instruction::Mul);
 
   auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
@@ -2357,7 +2469,6 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
 
   InstCombiner::BuilderTy *Builder = IC.Builder;
   Builder->SetInsertPoint(MulInstr);
-  Module *M = I.getParent()->getParent()->getParent();
 
   // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
   Value *MulA = A, *MulB = B;
@@ -2365,8 +2476,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
     MulA = Builder->CreateZExt(A, MulType);
   if (WidthB < MulWidth)
     MulB = Builder->CreateZExt(B, MulType);
-  Value *F =
-      Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
+  Value *F = Intrinsic::getDeclaration(I.getModule(),
+                                       Intrinsic::umul_with_overflow, MulType);
   CallInst *Call = Builder->CreateCall(F, {MulA, MulB}, "umul");
   IC.Worklist.Add(MulInstr);
 
@@ -2468,7 +2579,6 @@ static APInt DemandedBitsLHSMask(ICmpInst &I,
   default:
     return APInt::getAllOnesValue(BitWidth);
   }
-
 }
 
 /// \brief Check if the order of \p Op0 and \p Op1 as operand in an ICmpInst
@@ -2905,7 +3015,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                               ConstantInt::get(X->getType(),
                                                CI->countTrailingZeros()));
       }
-
       break;
     }
     case ICmpInst::ICMP_NE: {
@@ -2950,7 +3059,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                               ConstantInt::get(X->getType(),
                                                CI->countTrailingZeros()));
       }
-
       break;
     }
     case ICmpInst::ICMP_ULT:
@@ -3103,7 +3211,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         // comparison into the select arms, which will cause one to be
         // constant folded and the select turned into a bitwise or.
         Value *Op1 = nullptr, *Op2 = nullptr;
-        ConstantInt *CI = 0;
+        ConstantInt *CI = nullptr;
         if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
           Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
           CI = dyn_cast<ConstantInt>(Op1);
@@ -3177,6 +3285,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
       return NI;
 
+  // Try to optimize equality comparisons against alloca-based pointers.
+  if (Op0->getType()->isPointerTy() && I.isEquality()) {
+    assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
+    if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
+      if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
+        return New;
+    if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
+      if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
+        return New;
+  }
+
   // Test to see if the operands of the icmp are casted versions of other
   // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
   // now.
@@ -3304,6 +3423,26 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         match(B, m_One()))
       return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
 
+    // icmp sgt X, (Y + -1) -> icmp sge X, Y
+    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
+        match(D, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+
+    // icmp sle X, (Y + -1) -> icmp slt X, Y
+    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
+        match(D, m_AllOnes()))
+      return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+
+    // icmp sge X, (Y + 1) -> icmp sgt X, Y
+    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
+        match(D, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+
+    // icmp slt X, (Y + 1) -> icmp sle X, Y
+    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
+        match(D, m_One()))
+      return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+
     // if C1 has greater magnitude than C2:
     //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
     //  s.t. C3 = C1 - C2
@@ -3473,6 +3612,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
       }
       }
     }
+
+    if (BO0) {
+      // Transform  A & (L - 1) `ult` L --> L != 0
+      auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
+      auto BitwiseAnd =
+          m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+
+      if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
+        auto *Zero = Constant::getNullValue(BO0->getType());
+        return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
+      }
+    }
   }
 
   { Value *A, *B;
@@ -3697,15 +3848,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
 
   IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
 
-  // Check to see that the input is converted from an integer type that is small
-  // enough that preserves all bits.  TODO: check here for "known" sign bits.
-  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
-  unsigned InputSize = IntTy->getScalarSizeInBits();
-
-  // If this is a uitofp instruction, we need an extra bit to hold the sign.
   bool LHSUnsigned = isa<UIToFPInst>(LHSI);
-  if (LHSUnsigned)
-    ++InputSize;
 
   if (I.isEquality()) {
     FCmpInst::Predicate P = I.getPredicate();
@@ -3732,13 +3875,30 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
     // equality compares as integer?
   }
 
-  // Comparisons with zero are a special case where we know we won't lose
-  // information.
-  bool IsCmpZero = RHS.isPosZero();
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = IntTy->getScalarSizeInBits();
 
-  // If the conversion would lose info, don't hack on this.
-  if ((int)InputSize > MantissaWidth && !IsCmpZero)
-    return nullptr;
+  // Following test does NOT adjust InputSize downwards for signed inputs, 
+  // because the most negative value still requires all the mantissa bits 
+  // to distinguish it from one less than that value.
+  if ((int)InputSize > MantissaWidth) {
+    // Conversion would lose accuracy. Check if loss can impact comparison.
+    int Exp = ilogb(RHS);
+    if (Exp == APFloat::IEK_Inf) {
+      int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
+      if (MaxExponent < (int)InputSize - !LHSUnsigned) 
+        // Conversion could create infinity.
+        return nullptr;
+    } else {
+      // Note that if RHS is zero or NaN, then Exp is negative 
+      // and first condition is trivially false.
+      if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned) 
+        // Conversion could affect comparison.
+        return nullptr;
+    }
+  }
 
   // Otherwise, we can potentially simplify the comparison.  We know that it
   // will always come through as an integer value and we know the constant is
diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h
index ac934f1bd85c..534f67008150 100644
--- a/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -281,6 +281,7 @@ public:
                                 ICmpInst::Predicate Pred);
   Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                            ICmpInst::Predicate Cond, Instruction &I);
+  Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other);
   Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
                                    BinaryOperator &I);
   Instruction *commonCastTransforms(CastInst &CI);
@@ -341,6 +342,7 @@ public:
                                  const unsigned SIOpd);
 
 private:
+  bool ShouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
   bool ShouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
   Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
@@ -360,6 +362,11 @@ private:
   /// \brief Try to optimize a sequence of instructions checking if an operation
   /// on LHS and RHS overflows.
   ///
+  /// If this overflow check is done via one of the overflow check intrinsics,
+  /// then CtxI has to be the call instruction calling that intrinsic.  If this
+  /// overflow check is done by arithmetic followed by a compare, then CtxI has
+  /// to be the arithmetic instruction.
+  ///
   /// If a simplification is possible, stores the simplified result of the
   /// operation in OperationResult and result of the overflow check in
   /// OverflowResult, and return true.  If no simplification is possible,
@@ -393,7 +400,7 @@ public:
     assert(New && !New->getParent() &&
            "New instruction already inserted into a basic block!");
     BasicBlock *BB = Old.getParent();
-    BB->getInstList().insert(&Old, New); // Insert inst
+    BB->getInstList().insert(Old.getIterator(), New); // Insert inst
     Worklist.Add(New);
     return New;
   }
@@ -539,6 +546,7 @@ private:
   Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
 
   Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
                         ConstantInt *AndRHS, BinaryOperator &TheAnd);
@@ -548,7 +556,7 @@ private:
   Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
                          bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
-  Instruction *MatchBSwap(BinaryOperator &I);
+  Instruction *MatchBSwapOrBitReverse(BinaryOperator &I);
   bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
   Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
   Instruction *SimplifyMemSet(MemSetInst *MI);
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e3179dbeece8..47406b9a1632 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/IR/DataLayout.h"
@@ -90,21 +91,23 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         if (CS.isCallee(&U))
           continue;
 
+        unsigned DataOpNo = CS.getDataOperandNo(&U);
+        bool IsArgOperand = CS.isArgOperand(&U);
+
         // Inalloca arguments are clobbered by the call.
-        unsigned ArgNo = CS.getArgumentNo(&U);
-        if (CS.isInAllocaArgument(ArgNo))
+        if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
           return false;
 
         // If this is a readonly/readnone call site, then we know it is just a
         // load (but one that potentially returns the value itself), so we can
         // ignore it if we know that the value isn't captured.
         if (CS.onlyReadsMemory() &&
-            (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo)))
+            (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
           continue;
 
         // If this is being passed as a byval argument, the caller is making a
         // copy, so it is only a read of the alloca.
-        if (CS.isByValArgument(ArgNo))
+        if (IsArgOperand && CS.isByValArgument(DataOpNo))
           continue;
       }
 
@@ -186,7 +189,7 @@ static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) {
     // Scan to the end of the allocation instructions, to skip over a block of
     // allocas if possible...also skip interleaved debug info
     //
-    BasicBlock::iterator It = New;
+    BasicBlock::iterator It(New);
     while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
       ++It;
 
@@ -367,7 +370,13 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
                              MDB.createRange(NonNullInt, NullInt));
       }
       break;
-
+    case LLVMContext::MD_align:
+    case LLVMContext::MD_dereferenceable:
+    case LLVMContext::MD_dereferenceable_or_null:
+      // These only directly apply if the new type is also a pointer.
+      if (NewTy->isPointerTy())
+        NewLoad->setMetadata(ID, N);
+      break;
     case LLVMContext::MD_range:
       // FIXME: It would be nice to propagate this in some way, but the type
       // conversions make it hard. If the new type is a pointer, we could
@@ -418,6 +427,9 @@ static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value
     case LLVMContext::MD_invariant_load:
     case LLVMContext::MD_nonnull:
     case LLVMContext::MD_range:
+    case LLVMContext::MD_align:
+    case LLVMContext::MD_dereferenceable:
+    case LLVMContext::MD_dereferenceable_or_null:
       // These don't apply for stores.
       break;
     }
@@ -511,16 +523,46 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
   if (!T->isAggregateType())
     return nullptr;
 
-  assert(LI.getAlignment() && "Alignement must be set at this point");
+  assert(LI.getAlignment() && "Alignment must be set at this point");
 
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
-    if (ST->getNumElements() == 1) {
+    unsigned Count = ST->getNumElements();
+    if (Count == 1) {
       LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
                                                ".unpack");
       return IC.ReplaceInstUsesWith(LI, IC.Builder->CreateInsertValue(
         UndefValue::get(T), NewLoad, 0, LI.getName()));
     }
+
+    // We don't want to break loads with padding here as we'd loose
+    // the knowledge that padding exists for the rest of the pipeline.
+    const DataLayout &DL = IC.getDataLayout();
+    auto *SL = DL.getStructLayout(ST);
+    if (SL->hasPadding())
+      return nullptr;
+
+    auto Name = LI.getName();
+    SmallString<16> LoadName = Name;
+    LoadName += ".unpack";
+    SmallString<16> EltName = Name;
+    EltName += ".elt";
+    auto *Addr = LI.getPointerOperand();
+    Value *V = UndefValue::get(T);
+    auto *IdxType = Type::getInt32Ty(ST->getContext());
+    auto *Zero = ConstantInt::get(IdxType, 0);
+    for (unsigned i = 0; i < Count; i++) {
+      Value *Indices[2] = {
+        Zero,
+        ConstantInt::get(IdxType, i),
+      };
+      auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), EltName);
+      auto *L = IC.Builder->CreateLoad(ST->getTypeAtIndex(i), Ptr, LoadName);
+      V = IC.Builder->CreateInsertValue(V, L, i);
+    }
+
+    V->setName(Name);
+    return IC.ReplaceInstUsesWith(LI, V);
   }
 
   if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -681,7 +723,7 @@ static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI,
   // FIXME: If the GEP is not inbounds, and there are extra indices after the
   // one we'll replace, those could cause the address computation to wrap
   // (rendering the IsAllNonNegative() check below insufficient). We can do
-  // better, ignoring zero indicies (and other indicies we can prove small
+  // better, ignoring zero indices (and other indices we can prove small
   // enough not to wrap).
   if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
     return false;
@@ -748,19 +790,19 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // Do really simple store-to-load forwarding and load CSE, to catch cases
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
-  BasicBlock::iterator BBI = &LI;
+  BasicBlock::iterator BBI(LI);
   AAMDNodes AATags;
-  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,
-                                                     6, AA, &AATags)) {
+  if (Value *AvailableVal =
+      FindAvailableLoadedValue(Op, LI.getParent(), BBI,
+                               DefMaxInstsToScan, AA, &AATags)) {
     if (LoadInst *NLI = dyn_cast<LoadInst>(AvailableVal)) {
       unsigned KnownIDs[] = {
-        LLVMContext::MD_tbaa,
-        LLVMContext::MD_alias_scope,
-        LLVMContext::MD_noalias,
-        LLVMContext::MD_range,
-        LLVMContext::MD_invariant_load,
-        LLVMContext::MD_nonnull,
-      };
+          LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
+          LLVMContext::MD_noalias,         LLVMContext::MD_range,
+          LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
+          LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+          LLVMContext::MD_dereferenceable,
+          LLVMContext::MD_dereferenceable_or_null};
       combineMetadata(NLI, &LI, KnownIDs);
     };
 
@@ -822,7 +864,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
       }
 
       // load (select (cond, null, P)) -> load P
-      if (isa<ConstantPointerNull>(SI->getOperand(1)) && 
+      if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
           LI.getPointerAddressSpace() == 0) {
         LI.setOperand(0, SI->getOperand(2));
         return &LI;
@@ -857,7 +899,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
 ///
 /// \returns true if the store was successfully combined away. This indicates
 /// the caller must erase the store instruction. We have to let the caller erase
-/// the store instruction sas otherwise there is no way to signal whether it was
+/// the store instruction as otherwise there is no way to signal whether it was
 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
   // FIXME: We could probably with some care handle both volatile and atomic
@@ -893,11 +935,38 @@ static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI) {
 
   if (auto *ST = dyn_cast<StructType>(T)) {
     // If the struct only have one element, we unpack.
-    if (ST->getNumElements() == 1) {
+    unsigned Count = ST->getNumElements();
+    if (Count == 1) {
       V = IC.Builder->CreateExtractValue(V, 0);
       combineStoreToNewValue(IC, SI, V);
       return true;
     }
+
+    // We don't want to break loads with padding here as we'd loose
+    // the knowledge that padding exists for the rest of the pipeline.
+    const DataLayout &DL = IC.getDataLayout();
+    auto *SL = DL.getStructLayout(ST);
+    if (SL->hasPadding())
+      return false;
+
+    SmallString<16> EltName = V->getName();
+    EltName += ".elt";
+    auto *Addr = SI.getPointerOperand();
+    SmallString<16> AddrName = Addr->getName();
+    AddrName += ".repack";
+    auto *IdxType = Type::getInt32Ty(ST->getContext());
+    auto *Zero = ConstantInt::get(IdxType, 0);
+    for (unsigned i = 0; i < Count; i++) {
+      Value *Indices[2] = {
+        Zero,
+        ConstantInt::get(IdxType, i),
+      };
+      auto *Ptr = IC.Builder->CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices), AddrName);
+      auto *Val = IC.Builder->CreateExtractValue(V, i, EltName);
+      IC.Builder->CreateStore(Val, Ptr);
+    }
+
+    return true;
   }
 
   if (auto *AT = dyn_cast<ArrayType>(T)) {
@@ -971,9 +1040,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
       return &SI;
   }
 
-  // Don't hack volatile/atomic stores.
-  // FIXME: Some bits are legal for atomic stores; needs refactoring.
-  if (!SI.isSimple()) return nullptr;
+  // Don't hack volatile/ordered stores.
+  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
+  if (!SI.isUnordered()) return nullptr;
 
   // If the RHS is an alloca with a single use, zapify the store, making the
   // alloca dead.
@@ -991,7 +1060,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   // Do really simple DSE, to catch cases where there are several consecutive
   // stores to the same location, separated by a few arithmetic operations. This
   // situation often occurs with bitfield accesses.
-  BasicBlock::iterator BBI = &SI;
+  BasicBlock::iterator BBI(SI);
   for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
        --ScanInsts) {
     --BBI;
@@ -1005,7 +1074,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 
     if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
       // Prev store isn't volatile, and stores to the same location?
-      if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1),
+      if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
                                                         SI.getOperand(1))) {
         ++NumDeadStore;
         ++BBI;
@@ -1019,9 +1088,10 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
     // the pointer we're loading and is producing the pointer we're storing,
     // then *this* store is dead (X = load P; store X -> P).
     if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
-      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
-          LI->isSimple())
+      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
+        assert(SI.isUnordered() && "can't eliminate ordering operation");
         return EraseInstFromFunction(SI);
+      }
 
       // Otherwise, this is a load from some other location.  Stores before it
       // may not be dead.
@@ -1047,10 +1117,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (isa<UndefValue>(Val))
     return EraseInstFromFunction(SI);
 
+  // The code below needs to be audited and adjusted for unordered atomics
+  if (!SI.isSimple())
+    return nullptr;
+
   // If this store is the last instruction in the basic block (possibly
   // excepting debug info instructions), and if the block ends with an
   // unconditional branch, try to move it to the successor block.
-  BBI = &SI;
+  BBI = SI.getIterator();
   do {
     ++BBI;
   } while (isa<DbgInfoIntrinsic>(BBI) ||
@@ -1106,7 +1180,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
     return false;
 
   // Verify that the other block ends in a branch and is not otherwise empty.
-  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BasicBlock::iterator BBI(OtherBB->getTerminator());
   BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
   if (!OtherBr || BBI == OtherBB->begin())
     return false;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index a554e9f628e0..7ad0efc42fb4 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -22,9 +22,9 @@ using namespace PatternMatch;
 #define DEBUG_TYPE "instcombine"
 
 
-/// simplifyValueKnownNonZero - The specific integer value is used in a context
-/// where it is known to be non-zero.  If this allows us to simplify the
-/// computation, do so and return the new operand, otherwise return null.
+/// The specific integer value is used in a context where it is known to be
+/// non-zero.  If this allows us to simplify the computation, do so and return
+/// the new operand, otherwise return null.
 static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
                                         Instruction &CxtI) {
   // If V has multiple uses, then we would have to do more analysis to determine
@@ -76,8 +76,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
 }
 
 
-/// MultiplyOverflows - True if the multiply can not be expressed in an int
-/// this size.
+/// True if the multiply can not be expressed in an int this size.
 static bool MultiplyOverflows(const APInt &C1, const APInt &C2, APInt &Product,
                               bool IsSigned) {
   bool Overflow;
@@ -95,6 +94,14 @@ static bool IsMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
   assert(C1.getBitWidth() == C2.getBitWidth() &&
          "Inconsistent width of constants!");
 
+  // Bail if we will divide by zero.
+  if (C2.isMinValue())
+    return false;
+
+  // Bail if we would divide INT_MIN by -1.
+  if (IsSigned && C1.isMinSignedValue() && C2.isAllOnesValue())
+    return false;
+
   APInt Remainder(C1.getBitWidth(), /*Val=*/0ULL, IsSigned);
   if (IsSigned)
     APInt::sdivrem(C1, C2, Quotient, Remainder);
@@ -705,8 +712,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
-/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
-/// instruction.
+/// Try to fold a divide or remainder of a select instruction.
 bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
   SelectInst *SI = cast<SelectInst>(I.getOperand(1));
 
@@ -740,7 +746,7 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
     return true;
 
   // Scan the current block backward, looking for other uses of SI.
-  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+  BasicBlock::iterator BBI = I.getIterator(), BBFront = I.getParent()->begin();
 
   while (BBI != BBFront) {
     --BBI;
@@ -754,10 +760,10 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
          I != E; ++I) {
       if (*I == SI) {
         *I = SI->getOperand(NonNullOperand);
-        Worklist.Add(BBI);
+        Worklist.Add(&*BBI);
       } else if (*I == SelectCond) {
         *I = Builder->getInt1(NonNullOperand == 1);
-        Worklist.Add(BBI);
+        Worklist.Add(&*BBI);
       }
     }
 
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 460f6eb6a825..f1aa98b5e359 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "instcombine"
@@ -245,7 +246,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
 /// non-address-taken alloca.  Doing so will cause us to not promote the alloca
 /// to a register.
 static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
-  BasicBlock::iterator BBI = L, E = L->getParent()->end();
+  BasicBlock::iterator BBI = L->getIterator(), E = L->getParent()->end();
 
   for (++BBI; BBI != E; ++BBI)
     if (BBI->mayWriteToMemory())
@@ -349,24 +350,40 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
 
   Value *InVal = FirstLI->getOperand(0);
   NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+  LoadInst *NewLI = new LoadInst(NewPN, "", isVolatile, LoadAlignment);
 
-  // Add all operands to the new PHI.
+  unsigned KnownIDs[] = {
+    LLVMContext::MD_tbaa,
+    LLVMContext::MD_range,
+    LLVMContext::MD_invariant_load,
+    LLVMContext::MD_alias_scope,
+    LLVMContext::MD_noalias,
+    LLVMContext::MD_nonnull,
+    LLVMContext::MD_align,
+    LLVMContext::MD_dereferenceable,
+    LLVMContext::MD_dereferenceable_or_null,
+  };
+
+  for (unsigned ID : KnownIDs)
+    NewLI->setMetadata(ID, FirstLI->getMetadata(ID));
+
+  // Add all operands to the new PHI and combine TBAA metadata.
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
-    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+    LoadInst *LI = cast<LoadInst>(PN.getIncomingValue(i));
+    combineMetadata(NewLI, LI, KnownIDs);
+    Value *NewInVal = LI->getOperand(0);
     if (NewInVal != InVal)
       InVal = nullptr;
     NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
   }
 
-  Value *PhiVal;
   if (InVal) {
     // The new PHI unions all of the same values together.  This is really
     // common, so we handle it intelligently here for compile-time speed.
-    PhiVal = InVal;
+    NewLI->setOperand(0, InVal);
     delete NewPN;
   } else {
     InsertNewInstBefore(NewPN, PN);
-    PhiVal = NewPN;
   }
 
   // If this was a volatile load that we are merging, make sure to loop through
@@ -376,17 +393,94 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     for (Value *IncValue : PN.incoming_values())
       cast<LoadInst>(IncValue)->setVolatile(false);
 
-  LoadInst *NewLI = new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
   NewLI->setDebugLoc(FirstLI->getDebugLoc());
   return NewLI;
 }
 
+/// TODO: This function could handle other cast types, but then it might
+/// require special-casing a cast from the 'i1' type. See the comment in
+/// FoldPHIArgOpIntoPHI() about pessimizing illegal integer types.
+Instruction *InstCombiner::FoldPHIArgZextsIntoPHI(PHINode &Phi) {
+  // We cannot create a new instruction after the PHI if the terminator is an
+  // EHPad because there is no valid insertion point.
+  if (TerminatorInst *TI = Phi.getParent()->getTerminator())
+    if (TI->isEHPad())
+      return nullptr;
 
+  // Early exit for the common case of a phi with two operands. These are
+  // handled elsewhere. See the comment below where we check the count of zexts
+  // and constants for more details.
+  unsigned NumIncomingValues = Phi.getNumIncomingValues();
+  if (NumIncomingValues < 3)
+    return nullptr;
+
+  // Find the narrower type specified by the first zext.
+  Type *NarrowType = nullptr;
+  for (Value *V : Phi.incoming_values()) {
+    if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+      NarrowType = Zext->getSrcTy();
+      break;
+    }
+  }
+  if (!NarrowType)
+    return nullptr;
+
+  // Walk the phi operands checking that we only have zexts or constants that
+  // we can shrink for free. Store the new operands for the new phi.
+  SmallVector<Value *, 4> NewIncoming;
+  unsigned NumZexts = 0;
+  unsigned NumConsts = 0;
+  for (Value *V : Phi.incoming_values()) {
+    if (auto *Zext = dyn_cast<ZExtInst>(V)) {
+      // All zexts must be identical and have one use.
+      if (Zext->getSrcTy() != NarrowType || !Zext->hasOneUse())
+        return nullptr;
+      NewIncoming.push_back(Zext->getOperand(0));
+      NumZexts++;
+    } else if (auto *C = dyn_cast<Constant>(V)) {
+      // Make sure that constants can fit in the new type.
+      Constant *Trunc = ConstantExpr::getTrunc(C, NarrowType);
+      if (ConstantExpr::getZExt(Trunc, C->getType()) != C)
+        return nullptr;
+      NewIncoming.push_back(Trunc);
+      NumConsts++;
+    } else {
+      // If it's not a cast or a constant, bail out.
+      return nullptr;
+    }
+  }
+
+  // The more common cases of a phi with no constant operands or just one
+  // variable operand are handled by FoldPHIArgOpIntoPHI() and FoldOpIntoPhi()
+  // respectively. FoldOpIntoPhi() wants to do the opposite transform that is
+  // performed here. It tries to replicate a cast in the phi operand's basic
+  // block to expose other folding opportunities. Thus, InstCombine will
+  // infinite loop without this check.
+  if (NumConsts == 0 || NumZexts < 2)
+    return nullptr;
+
+  // All incoming values are zexts or constants that are safe to truncate.
+  // Create a new phi node of the narrow type, phi together all of the new
+  // operands, and zext the result back to the original type.
+  PHINode *NewPhi = PHINode::Create(NarrowType, NumIncomingValues,
+                                    Phi.getName() + ".shrunk");
+  for (unsigned i = 0; i != NumIncomingValues; ++i)
+    NewPhi->addIncoming(NewIncoming[i], Phi.getIncomingBlock(i));
+
+  InsertNewInstBefore(NewPhi, Phi);
+  return CastInst::CreateZExtOrBitCast(NewPhi, Phi.getType());
+}
 
 /// If all operands to a PHI node are the same "unary" operator and they all are
 /// only used by the PHI, PHI together their inputs, and do the operation once,
 /// to the result of the PHI.
 Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+  // We cannot create a new instruction after the PHI if the terminator is an
+  // EHPad because there is no valid insertion point.
+  if (TerminatorInst *TI = PN.getParent()->getTerminator())
+    if (TI->isEHPad())
+      return nullptr;
+
   Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
 
   if (isa<GetElementPtrInst>(FirstInst))
@@ -740,7 +834,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         }
 
         // Otherwise, do an extract in the predecessor.
-        Builder->SetInsertPoint(Pred, Pred->getTerminator());
+        Builder->SetInsertPoint(Pred->getTerminator());
         Value *Res = InVal;
         if (Offset)
           Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
@@ -787,6 +881,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
   if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC))
     return ReplaceInstUsesWith(PN, V);
 
+  if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
+    return Result;
+
   // If all PHI operands are the same operation, pull them through the PHI,
   // reducing code size.
   if (isa<Instruction>(PN.getIncomingValue(0)) &&
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f51442a9f36d..776704d1efa9 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -38,7 +38,8 @@ getInverseMinMaxSelectPattern(SelectPatternFlavor SPF) {
   }
 }
 
-static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
+static CmpInst::Predicate getCmpPredicateForMinMax(SelectPatternFlavor SPF,
+                                                   bool Ordered=false) {
   switch (SPF) {
   default:
     llvm_unreachable("unhandled!");
@@ -51,17 +52,22 @@ static CmpInst::Predicate getICmpPredicateForMinMax(SelectPatternFlavor SPF) {
     return ICmpInst::ICMP_SGT;
   case SPF_UMAX:
     return ICmpInst::ICMP_UGT;
+  case SPF_FMINNUM:
+    return Ordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT;
+  case SPF_FMAXNUM:
+    return Ordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT;
   }
 }
 
 static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
                                           SelectPatternFlavor SPF, Value *A,
                                           Value *B) {
-  CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
+  CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF);
+  assert(CmpInst::isIntPredicate(Pred));
   return Builder->CreateSelect(Builder->CreateICmp(Pred, A, B), A, B);
 }
 
-/// GetSelectFoldableOperands - We want to turn code that looks like this:
+/// We want to turn code that looks like this:
 ///   %C = or %A, %B
 ///   %D = select %cond, %C, %A
 /// into:
@@ -90,8 +96,8 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
   }
 }
 
-/// GetSelectFoldableConstant - For the same transformation as the previous
-/// function, return the identity constant that goes into the select.
+/// For the same transformation as the previous function, return the identity
+/// constant that goes into the select.
 static Constant *GetSelectFoldableConstant(Instruction *I) {
   switch (I->getOpcode()) {
   default: llvm_unreachable("This cannot happen!");
@@ -110,7 +116,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
   }
 }
 
-/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// Here we have (select c, TI, FI), and we know that TI and FI
 /// have the same opcode and only one use each.  Try to simplify this.
 Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
                                           Instruction *FI) {
@@ -197,8 +203,8 @@ static bool isSelect01(Constant *C1, Constant *C2) {
          C2I->isOne() || C2I->isAllOnesValue();
 }
 
-/// FoldSelectIntoOp - Try fold the select into one of the operands to
-/// facilitate further optimization.
+/// Try to fold the select into one of the operands to allow further
+/// optimization.
 Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
                                             Value *FalseVal) {
   // See the comment above GetSelectFoldableOperands for a description of the
@@ -276,7 +282,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
   return nullptr;
 }
 
-/// foldSelectICmpAndOr - We want to turn:
+/// We want to turn:
 ///   (select (icmp eq (and X, C1), 0), Y, (or Y, C2))
 /// into:
 ///   (or (shl (and X, C1), C3), y)
@@ -394,9 +400,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
   return nullptr;
 }
 
-/// visitSelectInstWithICmp - Visit a SelectInst that has an
-/// ICmpInst as its first operand.
-///
+/// Visit a SelectInst that has an ICmpInst as its first operand.
 Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
                                                    ICmpInst *ICI) {
   bool Changed = false;
@@ -595,10 +599,9 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
 }
 
 
-/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
-/// PHI node (but the two may be in different blocks).  See if the true/false
-/// values (V) are live in all of the predecessor blocks of the PHI.  For
-/// example, cases like this cannot be mapped:
+/// SI is a select whose condition is a PHI node (but the two may be in
+/// different blocks). See if the true/false values (V) are live in all of the
+/// predecessor blocks of the PHI. For example, cases like this can't be mapped:
 ///
 ///   X = phi [ C1, BB1], [C2, BB2]
 ///   Y = add
@@ -632,7 +635,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
   return false;
 }
 
-/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+/// We have an SPF (e.g. a min or max) of an SPF of the form:
 ///   SPF2(SPF1(A, B), C)
 Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
                                         SelectPatternFlavor SPF1,
@@ -745,10 +748,10 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
   return nullptr;
 }
 
-/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
-/// both be) and we have an icmp instruction with zero, and we have an 'and'
-/// with the non-constant value and a power of two we can turn the select
-/// into a shift on the result of the 'and'.
+/// If one of the constants is zero (we know they can't both be) and we have an
+/// icmp instruction with zero, and we have an 'and' with the non-constant value
+/// and a power of two we can turn the select into a shift on the result of the
+/// 'and'.
 static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
                                 ConstantInt *FalseVal,
                                 InstCombiner::BuilderTy *Builder) {
@@ -926,6 +929,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // (X ugt Y) ? X : Y -> (X ole Y) ? Y : X
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+        IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+        Builder->SetFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
                                              FCI->getName() + ".inv");
 
@@ -967,6 +972,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // (X ugt Y) ? X : Y -> (X ole Y) ? X : Y
       if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
         FCmpInst::Predicate InvPred = FCI->getInversePredicate();
+        IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+        Builder->SetFastMathFlags(FCI->getFastMathFlags());
         Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
                                              FCI->getName() + ".inv");
 
@@ -1054,35 +1061,50 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       }
 
   // See if we can fold the select into one of our operands.
-  if (SI.getType()->isIntOrIntVectorTy()) {
+  if (SI.getType()->isIntOrIntVectorTy() || SI.getType()->isFPOrFPVectorTy()) {
     if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
 
     Value *LHS, *RHS, *LHS2, *RHS2;
     Instruction::CastOps CastOp;
-    SelectPatternFlavor SPF = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+    SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp);
+    auto SPF = SPR.Flavor;
 
-    if (SPF) {
+    if (SelectPatternResult::isMinOrMax(SPF)) {
       // Canonicalize so that type casts are outside select patterns.
       if (LHS->getType()->getPrimitiveSizeInBits() !=
           SI.getType()->getPrimitiveSizeInBits()) {
-        CmpInst::Predicate Pred = getICmpPredicateForMinMax(SPF);
-        Value *Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+        CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
+
+        Value *Cmp;
+        if (CmpInst::isIntPredicate(Pred)) {
+          Cmp = Builder->CreateICmp(Pred, LHS, RHS);
+        } else {
+          IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
+          auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
+          Builder->SetFastMathFlags(FMF);
+          Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
+        }
+
         Value *NewSI = Builder->CreateCast(CastOp,
                                            Builder->CreateSelect(Cmp, LHS, RHS),
                                            SI.getType());
         return ReplaceInstUsesWith(SI, NewSI);
       }
+    }
 
+    if (SPF) {
       // MAX(MAX(a, b), a) -> MAX(a, b)
       // MIN(MIN(a, b), a) -> MIN(a, b)
       // MAX(MIN(a, b), a) -> a
       // MIN(MAX(a, b), a) -> a
-      if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2))
+      // ABS(ABS(a)) -> ABS(a)
+      // NABS(NABS(a)) -> NABS(a)
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
         if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
-      if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2))
+      if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
         if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
                                           SI, SPF, LHS))
           return R;
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index d04ed58b014f..0c7defa5fff8 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -55,7 +55,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
   return nullptr;
 }
 
-/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// See if we can compute the specified value, but shifted
 /// logically to the left or right by some number of bits.  This should return
 /// true if the expression can be computed for the same cost as the current
 /// expression tree.  This is used to eliminate extraneous shifting from things
@@ -184,7 +184,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
   }
 }
 
-/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// When CanEvaluateShifted returned true for an expression,
 /// this value inserts the new computation that produces the shifted value.
 static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
                               InstCombiner &IC, const DataLayout &DL) {
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 80628b23f111..743d51483ea1 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -410,9 +410,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     // If this is a select as part of a min/max pattern, don't simplify any
     // further in case we break the structure.
     Value *LHS, *RHS;
-    if (matchSelectPattern(I, LHS, RHS) != SPF_UNKNOWN)
+    if (matchSelectPattern(I, LHS, RHS).Flavor != SPF_UNKNOWN)
       return nullptr;
-      
+
     if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask, RHSKnownZero,
                              RHSKnownOne, Depth + 1) ||
         SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, LHSKnownZero,
@@ -1057,7 +1057,13 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     APInt LeftDemanded(DemandedElts), RightDemanded(DemandedElts);
     if (ConstantVector* CV = dyn_cast<ConstantVector>(I->getOperand(0))) {
       for (unsigned i = 0; i < VWidth; i++) {
-        if (CV->getAggregateElement(i)->isNullValue())
+        Constant *CElt = CV->getAggregateElement(i);
+        // Method isNullValue always returns false when called on a
+        // ConstantExpr. If CElt is a ConstantExpr then skip it in order to
+        // to avoid propagating incorrect information.
+        if (isa<ConstantExpr>(CElt))
+          continue;
+        if (CElt->isNullValue())
           LeftDemanded.clearBit(i);
         else
           RightDemanded.clearBit(i);
@@ -1082,6 +1088,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     if (!VTy) break;
     unsigned InVWidth = VTy->getNumElements();
     APInt InputDemandedElts(InVWidth, 0);
+    UndefElts2 = APInt(InVWidth, 0);
     unsigned Ratio;
 
     if (VWidth == InVWidth) {
@@ -1089,29 +1096,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       // elements as are demanded of us.
       Ratio = 1;
       InputDemandedElts = DemandedElts;
-    } else if (VWidth > InVWidth) {
-      // Untested so far.
-      break;
-
-      // If there are more elements in the result than there are in the source,
-      // then an input element is live if any of the corresponding output
-      // elements are live.
-      Ratio = VWidth/InVWidth;
-      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+    } else if ((VWidth % InVWidth) == 0) {
+      // If the number of elements in the output is a multiple of the number of
+      // elements in the input then an input element is live if any of the
+      // corresponding output elements are live.
+      Ratio = VWidth / InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
         if (DemandedElts[OutIdx])
-          InputDemandedElts.setBit(OutIdx/Ratio);
-      }
-    } else {
-      // Untested so far.
-      break;
-
-      // If there are more elements in the source than there are in the result,
-      // then an input element is live if the corresponding output element is
-      // live.
-      Ratio = InVWidth/VWidth;
+          InputDemandedElts.setBit(OutIdx / Ratio);
+    } else if ((InVWidth % VWidth) == 0) {
+      // If the number of elements in the input is a multiple of the number of
+      // elements in the output then an input element is live if the
+      // corresponding output element is live.
+      Ratio = InVWidth / VWidth;
       for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
-        if (DemandedElts[InIdx/Ratio])
+        if (DemandedElts[InIdx / Ratio])
           InputDemandedElts.setBit(InIdx);
+    } else {
+      // Unsupported so far.
+      break;
     }
 
     // div/rem demand all inputs, because they don't want divide by zero.
@@ -1122,24 +1125,26 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       MadeChange = true;
     }
 
-    UndefElts = UndefElts2;
-    if (VWidth > InVWidth) {
-      llvm_unreachable("Unimp");
-      // If there are more elements in the result than there are in the source,
-      // then an output element is undef if the corresponding input element is
-      // undef.
+    if (VWidth == InVWidth) {
+      UndefElts = UndefElts2;
+    } else if ((VWidth % InVWidth) == 0) {
+      // If the number of elements in the output is a multiple of the number of
+      // elements in the input then an output element is undef if the
+      // corresponding input element is undef.
       for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
-        if (UndefElts2[OutIdx/Ratio])
+        if (UndefElts2[OutIdx / Ratio])
           UndefElts.setBit(OutIdx);
-    } else if (VWidth < InVWidth) {
+    } else if ((InVWidth % VWidth) == 0) {
+      // If the number of elements in the input is a multiple of the number of
+      // elements in the output then an output element is undef if all of the
+      // corresponding input elements are undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
+        if (SubUndef.countPopulation() == Ratio)
+          UndefElts.setBit(OutIdx);
+      }
+    } else {
       llvm_unreachable("Unimp");
-      // If there are more elements in the source than there are in the result,
-      // then a result element is undef if all of the corresponding input
-      // elements are undef.
-      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
-      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
-        if (!UndefElts2[InIdx])            // Not undef?
-          UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
     }
     break;
   }
@@ -1237,6 +1242,15 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       // like undef&0.  The result is known zero, not undef.
       UndefElts &= UndefElts2;
       break;
+
+    // SSE4A instructions leave the upper 64-bits of the 128-bit result
+    // in an undefined state.
+    case Intrinsic::x86_sse4a_extrq:
+    case Intrinsic::x86_sse4a_extrqi:
+    case Intrinsic::x86_sse4a_insertq:
+    case Intrinsic::x86_sse4a_insertqi:
+      UndefElts |= APInt::getHighBitsSet(VWidth, VWidth / 2);
+      break;
     }
     break;
   }
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 273047279e90..e25639ae943b 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -22,10 +22,10 @@ using namespace PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
-/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
-/// is to leave as a vector operation.  isConstant indicates whether we're
-/// extracting one known element.  If false we're extracting a variable index.
-static bool CheapToScalarize(Value *V, bool isConstant) {
+/// Return true if the value is cheaper to scalarize than it is to leave as a
+/// vector operation. isConstant indicates whether we're extracting one known
+/// element. If false we're extracting a variable index.
+static bool cheapToScalarize(Value *V, bool isConstant) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (isConstant) return true;
 
@@ -50,13 +50,13 @@ static bool CheapToScalarize(Value *V, bool isConstant) {
     return true;
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
     if (BO->hasOneUse() &&
-        (CheapToScalarize(BO->getOperand(0), isConstant) ||
-         CheapToScalarize(BO->getOperand(1), isConstant)))
+        (cheapToScalarize(BO->getOperand(0), isConstant) ||
+         cheapToScalarize(BO->getOperand(1), isConstant)))
       return true;
   if (CmpInst *CI = dyn_cast<CmpInst>(I))
     if (CI->hasOneUse() &&
-        (CheapToScalarize(CI->getOperand(0), isConstant) ||
-         CheapToScalarize(CI->getOperand(1), isConstant)))
+        (cheapToScalarize(CI->getOperand(0), isConstant) ||
+         cheapToScalarize(CI->getOperand(1), isConstant)))
       return true;
 
   return false;
@@ -82,7 +82,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
   // and that it is a binary operation which is cheap to scalarize.
   // otherwise return NULL.
   if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
-      !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
+      !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
     return nullptr;
 
   // Create a scalar PHI node that will replace the vector PHI node
@@ -115,8 +115,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
       Instruction *pos = dyn_cast<Instruction>(PHIInVal);
       BasicBlock::iterator InsertPos;
       if (pos && !isa<PHINode>(pos)) {
-        InsertPos = pos;
-        ++InsertPos;
+        InsertPos = ++pos->getIterator();
       } else {
         InsertPos = inBB->getFirstInsertionPt();
       }
@@ -137,7 +136,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   // If vector val is constant with all elements the same, replace EI with
   // that element.  We handle a known element # below.
   if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
-    if (CheapToScalarize(C, false))
+    if (cheapToScalarize(C, false))
       return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
 
   // If extracting a specified index from the vector, see if we can recursively
@@ -163,7 +162,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
       }
     }
 
-    // If the this extractelement is directly using a bitcast from a vector of
+    // If this extractelement is directly using a bitcast from a vector of
     // the same number of elements, see if we can find the source element from
     // it.  In this case, we will end up needing to bitcast the scalars.
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
@@ -184,10 +183,10 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
 
   if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
     // Push extractelement into predecessor operation if legal and
-    // profitable to do so
+    // profitable to do so.
     if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
       if (I->hasOneUse() &&
-          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+          cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
         Value *newEI0 =
           Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
                                         EI.getName()+".lhs");
@@ -230,8 +229,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
                                                            SrcIdx, false));
       }
     } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
-      // Canonicalize extractelement(cast) -> cast(extractelement)
-      // bitcasts can change the number of vector elements and they cost nothing
+      // Canonicalize extractelement(cast) -> cast(extractelement).
+      // Bitcasts can change the number of vector elements, and they cost
+      // nothing.
       if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
         Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
                                                   EI.getIndexOperand());
@@ -245,7 +245,8 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
         // fight the vectorizer.
 
         // If we are extracting an element from a vector select or a select on
-        // vectors, a select on the scalars extracted from the vector arguments.
+        // vectors, create a select on the scalars extracted from the vector
+        // arguments.
         Value *TrueVal = SI->getTrueValue();
         Value *FalseVal = SI->getFalseValue();
 
@@ -275,10 +276,9 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   return nullptr;
 }
 
-/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
-/// elements from either LHS or RHS, return the shuffle mask and true.
-/// Otherwise, return false.
-static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+/// If V is a shuffle of values that ONLY returns elements from either LHS or
+/// RHS, return the shuffle mask and true. Otherwise, return false.
+static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
                                          SmallVectorImpl<Constant*> &Mask) {
   assert(LHS->getType() == RHS->getType() &&
          "Invalid CollectSingleShuffleElements");
@@ -315,7 +315,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
     if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
       // We can handle this if the vector we are inserting into is
       // transitively ok.
-      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+      if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
         // If so, update the mask to reflect the inserted undef.
         Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
         return true;
@@ -330,7 +330,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
         if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
           // We can handle this if the vector we are inserting into is
           // transitively ok.
-          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+          if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
             // If so, update the mask to reflect the inserted value.
             if (EI->getOperand(0) == LHS) {
               Mask[InsertedIdx % NumElts] =
@@ -352,6 +352,48 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
   return false;
 }
 
+/// If we have insertion into a vector that is wider than the vector that we
+/// are extracting from, try to widen the source vector to allow a single
+/// shufflevector to replace one or more insert/extract pairs.
+static void replaceExtractElements(InsertElementInst *InsElt,
+                                   ExtractElementInst *ExtElt,
+                                   InstCombiner &IC) {
+  VectorType *InsVecType = InsElt->getType();
+  VectorType *ExtVecType = ExtElt->getVectorOperandType();
+  unsigned NumInsElts = InsVecType->getVectorNumElements();
+  unsigned NumExtElts = ExtVecType->getVectorNumElements();
+
+  // The inserted-to vector must be wider than the extracted-from vector.
+  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
+      NumExtElts >= NumInsElts)
+    return;
+
+  // Create a shuffle mask to widen the extended-from vector using undefined
+  // values. The mask selects all of the values of the original vector followed
+  // by as many undefined values as needed to create a vector of the same length
+  // as the inserted-to vector.
+  SmallVector<Constant *, 16> ExtendMask;
+  IntegerType *IntType = Type::getInt32Ty(InsElt->getContext());
+  for (unsigned i = 0; i < NumExtElts; ++i)
+    ExtendMask.push_back(ConstantInt::get(IntType, i));
+  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
+    ExtendMask.push_back(UndefValue::get(IntType));
+
+  Value *ExtVecOp = ExtElt->getVectorOperand();
+  auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
+                                        ConstantVector::get(ExtendMask));
+
+  // Replace all extracts from the original narrow vector with extracts from
+  // the new wide vector.
+  WideVec->insertBefore(ExtElt);
+  for (User *U : ExtVecOp->users()) {
+    if (ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U)) {
+      auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
+      NewExt->insertAfter(WideVec);
+      IC.ReplaceInstUsesWith(*OldExt, NewExt);
+    }
+  }
+}
 
 /// We are building a shuffle to create V, which is a sequence of insertelement,
 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
@@ -363,9 +405,10 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
 /// often been chosen carefully to be efficiently implementable on the target.
 typedef std::pair<Value *, Value *> ShuffleOps;
 
-static ShuffleOps CollectShuffleElements(Value *V,
+static ShuffleOps collectShuffleElements(Value *V,
                                          SmallVectorImpl<Constant *> &Mask,
-                                         Value *PermittedRHS) {
+                                         Value *PermittedRHS,
+                                         InstCombiner &IC) {
   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
   unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
 
@@ -396,10 +439,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
         // otherwise we'd end up with a shuffle of three inputs.
         if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
           Value *RHS = EI->getOperand(0);
-          ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
+          ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
           assert(LR.second == nullptr || LR.second == RHS);
 
           if (LR.first->getType() != RHS->getType()) {
+            // Although we are giving up for now, see if we can create extracts
+            // that match the inserts for another round of combining.
+            replaceExtractElements(IEI, EI, IC);
+
             // We tried our best, but we can't find anything compatible with RHS
             // further up the chain. Return a trivial shuffle.
             for (unsigned i = 0; i < NumElts; ++i)
@@ -429,14 +476,14 @@ static ShuffleOps CollectShuffleElements(Value *V,
         // If this insertelement is a chain that comes from exactly these two
         // vectors, return the vector and the effective shuffle.
         if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
-            CollectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
+            collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
                                          Mask))
           return std::make_pair(EI->getOperand(0), PermittedRHS);
       }
     }
   }
 
-  // Otherwise, can't do anything fancy.  Return an identity vector.
+  // Otherwise, we can't do anything fancy. Return an identity vector.
   for (unsigned i = 0; i != NumElts; ++i)
     Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
   return std::make_pair(V, nullptr);
@@ -512,7 +559,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
       // (and any insertelements it points to), into one big shuffle.
       if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
         SmallVector<Constant*, 16> Mask;
-        ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
+        ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
 
         // The proposed shuffle may be trivial, in which case we shouldn't
         // perform the combine.
@@ -588,8 +635,8 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
     case Instruction::FPTrunc:
     case Instruction::FPExt:
     case Instruction::GetElementPtr: {
-      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
-        if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
+      for (Value *Operand : I->operands()) {
+        if (!CanEvaluateShuffled(Operand, Mask, Depth-1))
           return false;
       }
       return true;
@@ -617,7 +664,7 @@ static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
 
 /// Rebuild a new instruction just like 'I' but with the new operands given.
 /// In the event of type mismatch, the type of the operands is correct.
-static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
   // We don't want to use the IRBuilder here because we want the replacement
   // instructions to appear next to 'I', not the builder's insertion point.
   switch (I->getOpcode()) {
@@ -760,7 +807,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
         NeedsRebuild |= (V != I->getOperand(i));
       }
       if (NeedsRebuild) {
-        return BuildNew(I, NewOps);
+        return buildNew(I, NewOps);
       }
       return I;
     }
@@ -792,7 +839,7 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
   llvm_unreachable("failed to reorder elements of vector instruction!");
 }
 
-static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
                                   bool &isLHSID, bool &isRHSID) {
   isLHSID = isRHSID = true;
 
@@ -891,7 +938,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (VWidth == LHSWidth) {
     // Analyze the shuffle, are the LHS or RHS and identity shuffles?
     bool isLHSID, isRHSID;
-    RecognizeIdentityMask(Mask, isLHSID, isRHSID);
+    recognizeIdentityMask(Mask, isLHSID, isRHSID);
 
     // Eliminate identity shuffles.
     if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -1177,7 +1224,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   // If the result mask is an identity, replace uses of this instruction with
   // corresponding argument.
   bool isLHSID, isRHSID;
-  RecognizeIdentityMask(newMask, isLHSID, isRHSID);
+  recognizeIdentityMask(newMask, isLHSID, isRHSID);
   if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
   if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
 
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index fd34a244f271..7c46cfd28fc9 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -42,8 +42,9 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -79,14 +80,12 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) {
   return llvm::EmitGEPOffset(Builder, DL, GEP);
 }
 
-/// ShouldChangeType - Return true if it is desirable to convert a computation
-/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
-/// type for example, or from a smaller to a larger illegal type.
-bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
-  assert(From->isIntegerTy() && To->isIntegerTy());
-
-  unsigned FromWidth = From->getPrimitiveSizeInBits();
-  unsigned ToWidth = To->getPrimitiveSizeInBits();
+/// Return true if it is desirable to convert an integer computation from a
+/// given bit width to a new bit width.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(unsigned FromWidth,
+                                    unsigned ToWidth) const {
   bool FromLegal = DL.isLegalInteger(FromWidth);
   bool ToLegal = DL.isLegalInteger(ToWidth);
 
@@ -103,6 +102,17 @@ bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
   return true;
 }
 
+/// Return true if it is desirable to convert a computation from 'From' to 'To'.
+/// We don't want to convert from a legal to an illegal type for example or from
+/// a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(Type *From, Type *To) const {
+  assert(From->isIntegerTy() && To->isIntegerTy());
+
+  unsigned FromWidth = From->getPrimitiveSizeInBits();
+  unsigned ToWidth = To->getPrimitiveSizeInBits();
+  return ShouldChangeType(FromWidth, ToWidth);
+}
+
 // Return true, if No Signed Wrap should be maintained for I.
 // The No Signed Wrap flag can be kept if the operation "B (I.getOpcode) C",
 // where both B and C should be ConstantInts, results in a constant that does
@@ -156,27 +166,26 @@ static void ClearSubclassDataAfterReassociation(BinaryOperator &I) {
   I.setFastMathFlags(FMF);
 }
 
-/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
-/// operators which are associative or commutative:
-//
-//  Commutative operators:
-//
-//  1. Order operands such that they are listed from right (least complex) to
-//     left (most complex).  This puts constants before unary operators before
-//     binary operators.
-//
-//  Associative operators:
-//
-//  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
-//  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
-//
-//  Associative and commutative operators:
-//
-//  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
-//  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
-//  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
-//     if C1 and C2 are constants.
-//
+/// This performs a few simplifications for operators that are associative or
+/// commutative:
+///
+///  Commutative operators:
+///
+///  1. Order operands such that they are listed from right (least complex) to
+///     left (most complex).  This puts constants before unary operators before
+///     binary operators.
+///
+///  Associative operators:
+///
+///  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+///  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+///
+///  Associative and commutative operators:
+///
+///  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+///  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+///  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+///     if C1 and C2 are constants.
 bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
   Instruction::BinaryOps Opcode = I.getOpcode();
   bool Changed = false;
@@ -322,7 +331,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
   } while (1);
 }
 
-/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// Return whether "X LOp (Y ROp Z)" is always equal to
 /// "(X LOp Y) ROp (X LOp Z)".
 static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
                                      Instruction::BinaryOps ROp) {
@@ -361,7 +370,7 @@ static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
   }
 }
 
-/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// Return whether "(X LOp Y) ROp Z" is always equal to
 /// "(X ROp Z) LOp (Y ROp Z)".
 static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
                                      Instruction::BinaryOps ROp) {
@@ -519,7 +528,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
           if (isa<OverflowingBinaryOperator>(Op1))
             HasNSW &= Op1->hasNoSignedWrap();
 
-        // We can propogate 'nsw' if we know that
+        // We can propagate 'nsw' if we know that
         //  %Y = mul nsw i16 %X, C
         //  %Z = add nsw i16 %Y, %X
         // =>
@@ -537,11 +546,11 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,
   return SimplifiedInst;
 }
 
-/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
-/// which some other binary operation distributes over either by factorizing
-/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
-/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
-/// a win).  Returns the simplified value, or null if it didn't simplify.
+/// This tries to simplify binary operations which some other binary operation
+/// distributes over either by factorizing out common terms
+/// (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this results in
+/// simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is a win).
+/// Returns the simplified value, or null if it didn't simplify.
 Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
   Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
   BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
@@ -623,12 +632,38 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
       }
   }
 
+  // (op (select (a, c, b)), (select (a, d, b))) -> (select (a, (op c, d), 0))
+  // (op (select (a, b, c)), (select (a, b, d))) -> (select (a, 0, (op c, d)))
+  if (auto *SI0 = dyn_cast<SelectInst>(LHS)) {
+    if (auto *SI1 = dyn_cast<SelectInst>(RHS)) {
+      if (SI0->getCondition() == SI1->getCondition()) {
+        Value *SI = nullptr;
+        if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
+                                     SI1->getFalseValue(), DL, TLI, DT, AC))
+          SI = Builder->CreateSelect(SI0->getCondition(),
+                                     Builder->CreateBinOp(TopLevelOpcode,
+                                                          SI0->getTrueValue(),
+                                                          SI1->getTrueValue()),
+                                     V);
+        if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
+                                     SI1->getTrueValue(), DL, TLI, DT, AC))
+          SI = Builder->CreateSelect(
+              SI0->getCondition(), V,
+              Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
+                                   SI1->getFalseValue()));
+        if (SI) {
+          SI->takeName(&I);
+          return SI;
+        }
+      }
+    }
+  }
+
   return nullptr;
 }
 
-// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
-// if the LHS is a constant zero (which is the 'negate' form).
-//
+/// Given a 'sub' instruction, return the RHS of the instruction if the LHS is a
+/// constant zero (which is the 'negate' form).
 Value *InstCombiner::dyn_castNegVal(Value *V) const {
   if (BinaryOperator::isNeg(V))
     return BinaryOperator::getNegArgument(V);
@@ -644,10 +679,8 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
   return nullptr;
 }
 
-// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
-// instruction if the LHS is a constant negative zero (which is the 'negate'
-// form).
-//
+/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is
+/// a constant negative zero (which is the 'negate' form).
 Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
   if (BinaryOperator::isFNeg(V, IgnoreZeroSign))
     return BinaryOperator::getFNegArgument(V);
@@ -700,10 +733,10 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
   llvm_unreachable("Unknown binary instruction type!");
 }
 
-// FoldOpIntoSelect - Given an instruction with a select as one operand and a
-// constant as the other operand, try to fold the binary operator into the
-// select arguments.  This also works for Cast instructions, which obviously do
-// not have a second operand.
+/// Given an instruction with a select as one operand and a constant as the
+/// other operand, try to fold the binary operator into the select arguments.
+/// This also works for Cast instructions, which obviously do not have a second
+/// operand.
 Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
   // Don't modify shared select instructions
   if (!SI->hasOneUse()) return nullptr;
@@ -752,10 +785,9 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
   return nullptr;
 }
 
-/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
-/// has a PHI node as operand #0, see if we can fold the instruction into the
-/// PHI (which is only possible if all operands to the PHI are constants).
-///
+/// Given a binary operator, cast instruction, or select which has a PHI node as
+/// operand #0, see if we can fold the instruction into the PHI (which is only
+/// possible if all operands to the PHI are constants).
 Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   PHINode *PN = cast<PHINode>(I.getOperand(0));
   unsigned NumPHIValues = PN->getNumIncomingValues();
@@ -819,7 +851,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   NewPN->takeName(PN);
 
   // If we are going to have to insert a new computation, do so right before the
-  // predecessors terminator.
+  // predecessor's terminator.
   if (NonConstBB)
     Builder->SetInsertPoint(NonConstBB->getTerminator());
 
@@ -893,10 +925,10 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
   return ReplaceInstUsesWith(I, NewPN);
 }
 
-/// FindElementAtOffset - Given a pointer type and a constant offset, determine
-/// whether or not there is a sequence of GEP indices into the pointed type that
-/// will land us at the specified offset.  If so, fill them into NewIndices and
-/// return the resultant element type, otherwise return null.
+/// Given a pointer type and a constant offset, determine whether or not there
+/// is a sequence of GEP indices into the pointed type that will land us at the
+/// specified offset. If so, fill them into NewIndices and return the resultant
+/// element type, otherwise return null.
 Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
                                         SmallVectorImpl<Value *> &NewIndices) {
   Type *Ty = PtrTy->getElementType();
@@ -965,8 +997,8 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
   return true;
 }
 
-/// Descale - Return a value X such that Val = X * Scale, or null if none.  If
-/// the multiplication is known not to overflow then NoSignedWrap is set.
+/// Return a value X such that Val = X * Scale, or null if none.
+/// If the multiplication is known not to overflow, then NoSignedWrap is set.
 Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
   assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
   assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
@@ -1008,11 +1040,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
   // 0'th operand of Val.
   std::pair<Instruction*, unsigned> Parent;
 
-  // RequireNoSignedWrap - Set if the transform requires a descaling at deeper
-  // levels that doesn't overflow.
+  // Set if the transform requires a descaling at deeper levels that doesn't
+  // overflow.
   bool RequireNoSignedWrap = false;
 
-  // logScale - log base 2 of the scale.  Negative if not a power of 2.
+  // Log base 2 of the scale. Negative if not a power of 2.
   int32_t logScale = Scale.exactLogBase2();
 
   for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
@@ -1213,16 +1245,11 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
 /// specified one but with other operands.
 static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
                                  InstCombiner::BuilderTy *B) {
-  Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
-  if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
-    if (isa<OverflowingBinaryOperator>(NewBO)) {
-      NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
-      NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
-    }
-    if (isa<PossiblyExactOperator>(NewBO))
-      NewBO->setIsExact(Inst.isExact());
-  }
-  return BORes;
+  Value *BO = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+  // If LHS and RHS are constant, BO won't be a binary operator.
+  if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO))
+    NewBO->copyIRFlags(&Inst);
+  return BO;
 }
 
 /// \brief Makes transformation of binary operation specific for vector types.
@@ -1256,9 +1283,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
         LShuf->getMask() == RShuf->getMask()) {
       Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
           RShuf->getOperand(0), Builder);
-      Value *Res = Builder->CreateShuffleVector(NewBO,
+      return Builder->CreateShuffleVector(NewBO,
           UndefValue::get(NewBO->getType()), LShuf->getMask());
-      return Res;
     }
   }
 
@@ -1294,18 +1320,11 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
     }
     if (MayChange) {
       Constant *C2 = ConstantVector::get(C2M);
-      Value *NewLHS, *NewRHS;
-      if (isa<Constant>(LHS)) {
-        NewLHS = C2;
-        NewRHS = Shuffle->getOperand(0);
-      } else {
-        NewLHS = Shuffle->getOperand(0);
-        NewRHS = C2;
-      }
+      Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0);
+      Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2;
       Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
-      Value *Res = Builder->CreateShuffleVector(NewBO,
+      return Builder->CreateShuffleVector(NewBO,
           UndefValue::get(Inst.getType()), Shuffle->getMask());
-      return Res;
     }
   }
 
@@ -1323,7 +1342,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // Eliminate unneeded casts for indices, and replace indices which displace
   // by multiples of a zero size type with zero.
   bool MadeChange = false;
-  Type *IntPtrTy = DL.getIntPtrType(GEP.getPointerOperandType());
+  Type *IntPtrTy =
+    DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType());
 
   gep_type_iterator GTI = gep_type_begin(GEP);
   for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
@@ -1333,21 +1353,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     if (!SeqTy)
       continue;
 
+    // Index type should have the same width as IntPtr
+    Type *IndexTy = (*I)->getType();
+    Type *NewIndexType = IndexTy->isVectorTy() ?
+      VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy;
+ 
     // If the element type has zero size then any index over it is equivalent
     // to an index of zero, so replace it with zero if it is not zero already.
     if (SeqTy->getElementType()->isSized() &&
         DL.getTypeAllocSize(SeqTy->getElementType()) == 0)
       if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
-        *I = Constant::getNullValue(IntPtrTy);
+        *I = Constant::getNullValue(NewIndexType);
         MadeChange = true;
       }
 
-    Type *IndexTy = (*I)->getType();
-    if (IndexTy != IntPtrTy) {
+    if (IndexTy != NewIndexType) {
       // If we are using a wider index than needed for this platform, shrink
       // it to what we need.  If narrower, sign-extend it to what we need.
       // This explicit cast can make subsequent optimizations more obvious.
-      *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+      *I = Builder->CreateIntCast(*I, NewIndexType, true);
       MadeChange = true;
     }
   }
@@ -1421,8 +1445,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       }
     }
 
-    GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
+    // If not all GEPs are identical we'll have to create a new PHI node.
+    // Check that the old PHI node has only one use so that it will get
+    // removed.
+    if (DI != -1 && !PN->hasOneUse())
+      return nullptr;
 
+    GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone());
     if (DI == -1) {
       // All the GEPs feeding the PHI are identical. Clone one down into our
       // BB so that it can be merged with the current GEP.
@@ -1432,11 +1461,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       // All the GEPs feeding the PHI differ at a single offset. Clone a GEP
       // into the current block so it can be merged, and create a new PHI to
       // set that index.
-      Instruction *InsertPt = Builder->GetInsertPoint();
-      Builder->SetInsertPoint(PN);
-      PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
-                                          PN->getNumOperands());
-      Builder->SetInsertPoint(InsertPt);
+      PHINode *NewPN;
+      {
+        IRBuilderBase::InsertPointGuard Guard(*Builder);
+        Builder->SetInsertPoint(PN);
+        NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(),
+                                   PN->getNumOperands());
+      }
 
       for (auto &I : PN->operands())
         NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI),
@@ -1790,7 +1821,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
               I->takeName(BCI);
-              BCI->getParent()->getInstList().insert(BCI, I);
+              BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
               ReplaceInstUsesWith(*BCI, I);
             }
             return &GEP;
@@ -1931,7 +1962,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
 
     if (InvokeInst *II = dyn_cast<InvokeInst>(&MI)) {
       // Replace invoke with a NOP intrinsic to maintain the original CFG
-      Module *M = II->getParent()->getParent()->getParent();
+      Module *M = II->getModule();
       Function *F = Intrinsic::getDeclaration(M, Intrinsic::donothing);
       InvokeInst::Create(F, II->getNormalDest(), II->getUnwindDest(),
                          None, "", II->getParent());
@@ -2280,9 +2311,10 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   }
   if (LoadInst *L = dyn_cast<LoadInst>(Agg))
     // If the (non-volatile) load only has one use, we can rewrite this to a
-    // load from a GEP. This reduces the size of the load.
-    // FIXME: If a load is used only by extractvalue instructions then this
-    //        could be done regardless of having multiple uses.
+    // load from a GEP. This reduces the size of the load. If a load is used
+    // only by extractvalue instructions then this either must have been
+    // optimized before, or it is a struct with padding, in which case we
+    // don't want to do the transformation as it loses padding knowledge.
     if (L->isSimple() && L->hasOneUse()) {
       // extractvalue has integer indices, getelementptr has Value*s. Convert.
       SmallVector<Value*, 4> Indices;
@@ -2294,7 +2326,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
 
       // We need to insert these at the location of the old load, not at that of
       // the extractvalue.
-      Builder->SetInsertPoint(L->getParent(), L);
+      Builder->SetInsertPoint(L);
       Value *GEP = Builder->CreateInBoundsGEP(L->getType(),
                                               L->getPointerOperand(), Indices);
       // Returning the load directly will cause the main loop to insert it in
@@ -2312,7 +2344,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
   return nullptr;
 }
 
-/// isCatchAll - Return 'true' if the given typeinfo will match anything.
+/// Return 'true' if the given typeinfo will match anything.
 static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
   switch (Personality) {
   case EHPersonality::GNU_C:
@@ -2330,6 +2362,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) {
   case EHPersonality::MSVC_X86SEH:
   case EHPersonality::MSVC_Win64SEH:
   case EHPersonality::MSVC_CXX:
+  case EHPersonality::CoreCLR:
     return TypeInfo->isNullValue();
   }
   llvm_unreachable("invalid enum");
@@ -2441,10 +2474,24 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
             SawCatchAll = true;
             break;
           }
-          if (AlreadyCaught.count(TypeInfo))
-            // Already caught by an earlier clause, so having it in the filter
-            // is pointless.
-            continue;
+
+          // Even if we've seen a type in a catch clause, we don't want to
+          // remove it from the filter.  An unexpected type handler may be
+          // set up for a call site which throws an exception of the same
+          // type caught.  In order for the exception thrown by the unexpected
+          // handler to propogate correctly, the filter must be correctly
+          // described for the call site.
+          //
+          // Example:
+          //
+          // void unexpected() { throw 1;}
+          // void foo() throw (int) {
+          //   std::set_unexpected(unexpected);
+          //   try {
+          //     throw 2.0;
+          //   } catch (int i) {}
+          // }
+
           // There is no point in having multiple copies of the same typeinfo in
           // a filter, so only add it if we didn't already.
           if (SeenInFilter.insert(TypeInfo).second)
@@ -2637,15 +2684,15 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
   return nullptr;
 }
 
-/// TryToSinkInstruction - Try to move the specified instruction from its
-/// current block into the beginning of DestBlock, which can only happen if it's
-/// safe to move the instruction past all of the instructions between it and the
-/// end of its block.
+/// Try to move the specified instruction from its current block into the
+/// beginning of DestBlock, which can only happen if it's safe to move the
+/// instruction past all of the instructions between it and the end of its
+/// block.
 static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   assert(I->hasOneUse() && "Invariants didn't hold!");
 
   // Cannot move control-flow-involving, volatile loads, vaarg, etc.
-  if (isa<PHINode>(I) || isa<LandingPadInst>(I) || I->mayHaveSideEffects() ||
+  if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() ||
       isa<TerminatorInst>(I))
     return false;
 
@@ -2654,17 +2701,24 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
         &DestBlock->getParent()->getEntryBlock())
     return false;
 
+  // Do not sink convergent call instructions.
+  if (auto *CI = dyn_cast<CallInst>(I)) {
+    if (CI->isConvergent())
+      return false;
+  }
+
   // We can only sink load instructions if there is nothing between the load and
   // the end of block that could change the value.
   if (I->mayReadFromMemory()) {
-    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+    for (BasicBlock::iterator Scan = I->getIterator(),
+                              E = I->getParent()->end();
          Scan != E; ++Scan)
       if (Scan->mayWriteToMemory())
         return false;
   }
 
   BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt();
-  I->moveBefore(InsertPos);
+  I->moveBefore(&*InsertPos);
   ++NumSunkInst;
   return true;
 }
@@ -2698,6 +2752,27 @@ bool InstCombiner::run() {
       }
     }
 
+    // In general, it is possible for computeKnownBits to determine all bits in a
+    // value even when the operands are not all constants.
+    if (!I->use_empty() && I->getType()->isIntegerTy()) {
+      unsigned BitWidth = I->getType()->getScalarSizeInBits();
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
+      if ((KnownZero | KnownOne).isAllOnesValue()) {
+        Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+        DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
+                        " from: " << *I << '\n');
+
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
+    }
+
     // See if we can trivially sink this instruction to a successor basic block.
     if (I->hasOneUse()) {
       BasicBlock *BB = I->getParent();
@@ -2738,7 +2813,7 @@ bool InstCombiner::run() {
     }
 
     // Now that we have an instruction, try combining it to simplify it.
-    Builder->SetInsertPoint(I->getParent(), I);
+    Builder->SetInsertPoint(I);
     Builder->SetCurrentDebugLocation(I->getDebugLoc());
 
 #ifndef NDEBUG
@@ -2768,7 +2843,7 @@ bool InstCombiner::run() {
 
         // Insert the new instruction into the basic block...
         BasicBlock *InstParent = I->getParent();
-        BasicBlock::iterator InsertPos = I;
+        BasicBlock::iterator InsertPos = I->getIterator();
 
         // If we replace a PHI with something that isn't a PHI, fix up the
         // insertion point.
@@ -2801,8 +2876,8 @@ bool InstCombiner::run() {
   return MadeIRChange;
 }
 
-/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
-/// all reachable code to the worklist.
+/// Walk the function in depth-first order, adding all reachable code to the
+/// worklist.
 ///
 /// This has a couple of tricks to make the code faster and more powerful.  In
 /// particular, we constant fold and DCE instructions as we go, to avoid adding
@@ -2829,7 +2904,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       continue;
 
     for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
-      Instruction *Inst = BBI++;
+      Instruction *Inst = &*BBI++;
 
       // DCE instruction if trivially dead.
       if (isInstructionTriviallyDead(Inst, TLI)) {
@@ -2900,8 +2975,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       }
     }
 
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      Worklist.push_back(TI->getSuccessor(i));
+    for (BasicBlock *SuccBB : TI->successors())
+      Worklist.push_back(SuccBB);
   } while (!Worklist.empty());
 
   // Once we've found all of the instructions to add to instcombine's worklist,
@@ -2909,8 +2984,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
   // of the function down.  This jives well with the way that it adds all uses
   // of instructions to the worklist after doing a transformation, thus avoiding
   // some N^2 behavior in pathological cases.
-  ICWorklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
-                             InstrsForInstCombineWorklist.size());
+  ICWorklist.AddInitialGroup(InstrsForInstCombineWorklist);
 
   return MadeIRChange;
 }
@@ -2930,13 +3004,13 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
   // track of which blocks we visit.
   SmallPtrSet<BasicBlock *, 64> Visited;
   MadeIRChange |=
-      AddReachableCodeToWorklist(F.begin(), DL, Visited, ICWorklist, TLI);
+      AddReachableCodeToWorklist(&F.front(), DL, Visited, ICWorklist, TLI);
 
   // Do a quick scan over the function.  If we find any blocks that are
   // unreachable, remove any instructions inside of them.  This prevents
   // the instcombine code from having to deal with some bad special cases.
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Visited.count(BB))
+    if (Visited.count(&*BB))
       continue;
 
     // Delete the instructions backwards, as it has a reduced likelihood of
@@ -2944,11 +3018,10 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
     Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
     while (EndInst != BB->begin()) {
       // Delete the next to last instruction.
-      BasicBlock::iterator I = EndInst;
-      Instruction *Inst = --I;
-      if (!Inst->use_empty())
+      Instruction *Inst = &*--EndInst->getIterator();
+      if (!Inst->use_empty() && !Inst->getType()->isTokenTy())
         Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
-      if (isa<LandingPadInst>(Inst)) {
+      if (Inst->isEHPad()) {
         EndInst = Inst;
         continue;
       }
@@ -2956,7 +3029,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
         ++NumDeadInst;
         MadeIRChange = true;
       }
-      Inst->eraseFromParent();
+      if (!Inst->getType()->isTokenTy())
+        Inst->eraseFromParent();
     }
   }
 
@@ -2968,8 +3042,6 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
                                 AliasAnalysis *AA, AssumptionCache &AC,
                                 TargetLibraryInfo &TLI, DominatorTree &DT,
                                 LoopInfo *LI = nullptr) {
-  // Minimizing size?
-  bool MinimizeSize = F.hasFnAttribute(Attribute::MinSize);
   auto &DL = F.getParent()->getDataLayout();
 
   /// Builder - This is an IRBuilder that automatically inserts new
@@ -2992,7 +3064,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
     if (prepareICWorklistFromFunction(F, DL, &TLI, Worklist))
       Changed = true;
 
-    InstCombiner IC(Worklist, &Builder, MinimizeSize,
+    InstCombiner IC(Worklist, &Builder, F.optForMinSize(),
                     AA, &AC, &TLI, &DT, DL, LI);
     if (IC.run())
       Changed = true;
@@ -3046,11 +3118,12 @@ public:
 
 void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
 }
 
 bool InstructionCombiningPass::runOnFunction(Function &F) {
@@ -3058,7 +3131,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
     return false;
 
   // Required analyses.
-  auto AA = &getAnalysis<AliasAnalysis>();
+  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -3076,7 +3149,8 @@ INITIALIZE_PASS_BEGIN(InstructionCombiningPass, "instcombine",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",
                     "Combine redundant instructions", false, false)
 
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e7ef9f96edc2..a9df5e5898ae 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
@@ -90,7 +91,9 @@ static const char *const kAsanUnregisterGlobalsName =
     "__asan_unregister_globals";
 static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
 static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
-static const char *const kAsanInitName = "__asan_init_v5";
+static const char *const kAsanInitName = "__asan_init";
+static const char *const kAsanVersionCheckName =
+    "__asan_version_mismatch_check_v6";
 static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
 static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
 static const char *const kAsanHandleNoReturnName = "__asan_handle_no_return";
@@ -119,6 +122,10 @@ static const unsigned kAllocaRzSize = 32;
 static cl::opt<bool> ClEnableKasan(
     "asan-kernel", cl::desc("Enable KernelAddressSanitizer instrumentation"),
     cl::Hidden, cl::init(false));
+static cl::opt<bool> ClRecover(
+    "asan-recover",
+    cl::desc("Enable recovery mode (continue-after-error)."),
+    cl::Hidden, cl::init(false));
 
 // This flag may need to be replaced with -f[no-]asan-reads.
 static cl::opt<bool> ClInstrumentReads("asan-instrument-reads",
@@ -177,7 +184,7 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
     cl::init("__asan_"));
 static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
                                          cl::desc("instrument dynamic allocas"),
-                                         cl::Hidden, cl::init(false));
+                                         cl::Hidden, cl::init(true));
 static cl::opt<bool> ClSkipPromotableAllocas(
     "asan-skip-promotable-allocas",
     cl::desc("Do not instrument promotable allocas"), cl::Hidden,
@@ -273,6 +280,11 @@ class GlobalsMetadata {
 
   GlobalsMetadata() : inited_(false) {}
 
+  void reset() {
+    inited_ = false;
+    Entries.clear();
+  }
+
   void init(Module &M) {
     assert(!inited_);
     inited_ = true;
@@ -321,7 +333,7 @@ struct ShadowMapping {
 
 static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
                                       bool IsKasan) {
-  bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
+  bool IsAndroid = TargetTriple.isAndroid();
   bool IsIOS = TargetTriple.isiOS();
   bool IsFreeBSD = TargetTriple.isOSFreeBSD();
   bool IsLinux = TargetTriple.isOSLinux();
@@ -338,6 +350,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
   ShadowMapping Mapping;
 
   if (LongSize == 32) {
+    // Android is always PIE, which means that the beginning of the address
+    // space is always available.
     if (IsAndroid)
       Mapping.Offset = 0;
     else if (IsMIPS32)
@@ -376,7 +390,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
   // OR-ing shadow offset if more efficient (at least on x86) if the offset
   // is a power of two, but on ppc64 we have to use add since the shadow
   // offset is not necessary 1/8-th of the address space.
-  Mapping.OrShadowOffset = !IsPPC64 && !(Mapping.Offset & (Mapping.Offset - 1));
+  Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64
+                           && !(Mapping.Offset & (Mapping.Offset - 1));
 
   return Mapping;
 }
@@ -389,8 +404,9 @@ static size_t RedzoneSizeForScale(int MappingScale) {
 
 /// AddressSanitizer: instrument the code in module to find memory bugs.
 struct AddressSanitizer : public FunctionPass {
-  explicit AddressSanitizer(bool CompileKernel = false)
-      : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan) {
+  explicit AddressSanitizer(bool CompileKernel = false, bool Recover = false)
+      : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+        Recover(Recover || ClRecover) {
     initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
   }
   const char *getPassName() const override {
@@ -437,7 +453,9 @@ struct AddressSanitizer : public FunctionPass {
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool runOnFunction(Function &F) override;
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+  void markEscapedLocalAllocas(Function &F);
   bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
   static char ID;  // Pass identification, replacement for typeid
 
   DominatorTree &getDominatorTree() const { return *DT; }
@@ -450,10 +468,21 @@ struct AddressSanitizer : public FunctionPass {
   bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
                     uint64_t TypeSize) const;
 
+  /// Helper to cleanup per-function state.
+  struct FunctionStateRAII {
+    AddressSanitizer *Pass;
+    FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
+      assert(Pass->ProcessedAllocas.empty() &&
+             "last pass forgot to clear cache");
+    }
+    ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); }
+  };
+
   LLVMContext *C;
   Triple TargetTriple;
   int LongSize;
   bool CompileKernel;
+  bool Recover;
   Type *IntptrTy;
   ShadowMapping Mapping;
   DominatorTree *DT;
@@ -477,8 +506,10 @@ struct AddressSanitizer : public FunctionPass {
 
 class AddressSanitizerModule : public ModulePass {
  public:
-  explicit AddressSanitizerModule(bool CompileKernel = false)
-      : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan) {}
+  explicit AddressSanitizerModule(bool CompileKernel = false,
+                                  bool Recover = false)
+      : ModulePass(ID), CompileKernel(CompileKernel || ClEnableKasan),
+        Recover(Recover || ClRecover) {}
   bool runOnModule(Module &M) override;
   static char ID;  // Pass identification, replacement for typeid
   const char *getPassName() const override { return "AddressSanitizerModule"; }
@@ -496,6 +527,7 @@ class AddressSanitizerModule : public ModulePass {
 
   GlobalsMetadata GlobalsMD;
   bool CompileKernel;
+  bool Recover;
   Type *IntptrTy;
   LLVMContext *C;
   Triple TargetTriple;
@@ -525,6 +557,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   ShadowMapping Mapping;
 
   SmallVector<AllocaInst *, 16> AllocaVec;
+  SmallSetVector<AllocaInst *, 16> NonInstrumentedStaticAllocaVec;
   SmallVector<Instruction *, 8> RetVec;
   unsigned StackAlignment;
 
@@ -545,12 +578,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   SmallVector<AllocaInst *, 1> DynamicAllocaVec;
   SmallVector<IntrinsicInst *, 1> StackRestoreVec;
   AllocaInst *DynamicAllocaLayout = nullptr;
+  IntrinsicInst *LocalEscapeCall = nullptr;
 
   // Maps Value to an AllocaInst from which the Value is originated.
   typedef DenseMap<Value *, AllocaInst *> AllocaForValueMapTy;
   AllocaForValueMapTy AllocaForValue;
 
-  bool HasNonEmptyInlineAsm;
+  bool HasNonEmptyInlineAsm = false;
+  bool HasReturnsTwiceCall = false;
   std::unique_ptr<CallInst> EmptyInlineAsm;
 
   FunctionStackPoisoner(Function &F, AddressSanitizer &ASan)
@@ -562,7 +597,6 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
         IntptrPtrTy(PointerType::get(IntptrTy, 0)),
         Mapping(ASan.Mapping),
         StackAlignment(1 << Mapping.Scale),
-        HasNonEmptyInlineAsm(false),
         EmptyInlineAsm(CallInst::Create(ASan.EmptyAsm)) {}
 
   bool runOnFunction() {
@@ -596,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
                                         Value *SavedStack) {
     IRBuilder<> IRB(InstBefore);
+    Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy);
+    // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we
+    // need to adjust extracted SP to compute the address of the most recent
+    // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for
+    // this purpose.
+    if (!isa<ReturnInst>(InstBefore)) {
+      Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration(
+          InstBefore->getModule(), Intrinsic::get_dynamic_area_offset,
+          {IntptrTy});
+
+      Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {});
+
+      DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy),
+                                     DynamicAreaOffset);
+    }
+
     IRB.CreateCall(AsanAllocasUnpoisonFunc,
-                   {IRB.CreateLoad(DynamicAllocaLayout),
-                    IRB.CreatePtrToInt(SavedStack, IntptrTy)});
+                   {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr});
   }
 
   // Unpoison dynamic allocas redzones.
@@ -625,7 +674,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
   /// \brief Collect Alloca instructions we want (and can) handle.
   void visitAllocaInst(AllocaInst &AI) {
-    if (!ASan.isInterestingAlloca(AI)) return;
+    if (!ASan.isInterestingAlloca(AI)) {
+      if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI);
+      return;
+    }
 
     StackAlignment = std::max(StackAlignment, AI.getAlignment());
     if (ASan.isDynamicAlloca(AI))
@@ -639,6 +691,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   void visitIntrinsicInst(IntrinsicInst &II) {
     Intrinsic::ID ID = II.getIntrinsicID();
     if (ID == Intrinsic::stackrestore) StackRestoreVec.push_back(&II);
+    if (ID == Intrinsic::localescape) LocalEscapeCall = &II;
     if (!ClCheckLifetime) return;
     if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end)
       return;
@@ -660,9 +713,13 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     AllocaPoisonCallVec.push_back(APC);
   }
 
-  void visitCallInst(CallInst &CI) {
-    HasNonEmptyInlineAsm |=
-        CI.isInlineAsm() && !CI.isIdenticalTo(EmptyInlineAsm.get());
+  void visitCallSite(CallSite CS) {
+    Instruction *I = CS.getInstruction();
+    if (CallInst *CI = dyn_cast<CallInst>(I)) {
+      HasNonEmptyInlineAsm |=
+          CI->isInlineAsm() && !CI->isIdenticalTo(EmptyInlineAsm.get());
+      HasReturnsTwiceCall |= CI->canReturnTwice();
+    }
   }
 
   // ---------------------- Helpers.
@@ -689,7 +746,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
                      Instruction *ThenTerm, Value *ValueIfFalse);
 };
 
-}  // namespace
+} // anonymous namespace
 
 char AddressSanitizer::ID = 0;
 INITIALIZE_PASS_BEGIN(
@@ -697,12 +754,15 @@ INITIALIZE_PASS_BEGIN(
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
     false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(
     AddressSanitizer, "asan",
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false,
     false)
-FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel) {
-  return new AddressSanitizer(CompileKernel);
+FunctionPass *llvm::createAddressSanitizerFunctionPass(bool CompileKernel,
+                                                       bool Recover) {
+  assert(!CompileKernel || Recover);
+  return new AddressSanitizer(CompileKernel, Recover);
 }
 
 char AddressSanitizerModule::ID = 0;
@@ -711,8 +771,10 @@ INITIALIZE_PASS(
     "AddressSanitizer: detects use-after-free and out-of-bounds bugs."
     "ModulePass",
     false, false)
-ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel) {
-  return new AddressSanitizerModule(CompileKernel);
+ModulePass *llvm::createAddressSanitizerModulePass(bool CompileKernel,
+                                                   bool Recover) {
+  assert(!CompileKernel || Recover);
+  return new AddressSanitizerModule(CompileKernel, Recover);
 }
 
 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
@@ -799,8 +861,10 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
        getAllocaSizeInBytes(&AI) > 0 &&
        // We are only interested in allocas not promotable to registers.
        // Promotable allocas are common under -O0.
-       (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI) ||
-        isDynamicAlloca(AI)));
+       (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
+       // inalloca allocas are not treated as static, and we don't want
+       // dynamic alloca instrumentation for them as well.
+       !AI.isUsedWithInAlloca());
 
   ProcessedAllocas[&AI] = IsInteresting;
   return IsInteresting;
@@ -868,10 +932,8 @@ static bool isInterestingPointerComparisonOrSubtraction(Instruction *I) {
   } else {
     return false;
   }
-  if (!isPointerOperand(I->getOperand(0)) ||
-      !isPointerOperand(I->getOperand(1)))
-    return false;
-  return true;
+  return isPointerOperand(I->getOperand(0)) &&
+         isPointerOperand(I->getOperand(1));
 }
 
 bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) {
@@ -919,7 +981,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
     // If initialization order checking is disabled, a simple access to a
     // dynamically initialized global is always valid.
     GlobalVariable *G = dyn_cast<GlobalVariable>(GetUnderlyingObject(Addr, DL));
-    if (G != NULL && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
+    if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
         isSafeAccess(ObjSizeVis, Addr, TypeSize)) {
       NumOptimizedAccessesToGlobalVar++;
       return;
@@ -1041,13 +1103,17 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
     BasicBlock *NextBB = CheckTerm->getSuccessor(0);
     IRB.SetInsertPoint(CheckTerm);
     Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
-    BasicBlock *CrashBlock =
+    if (Recover) {
+      CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
+    } else {
+      BasicBlock *CrashBlock =
         BasicBlock::Create(*C, "", NextBB->getParent(), NextBB);
-    CrashTerm = new UnreachableInst(*C, CrashBlock);
-    BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
-    ReplaceInstWithInst(CheckTerm, NewTerm);
+      CrashTerm = new UnreachableInst(*C, CrashBlock);
+      BranchInst *NewTerm = BranchInst::Create(CrashBlock, NextBB, Cmp2);
+      ReplaceInstWithInst(CheckTerm, NewTerm);
+    }
   } else {
-    CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, true);
+    CrashTerm = SplitBlockAndInsertIfThen(Cmp, InsertBefore, !Recover);
   }
 
   Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
@@ -1084,7 +1150,8 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
 void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,
                                                   GlobalValue *ModuleName) {
   // Set up the arguments to our poison/unpoison functions.
-  IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt());
+  IRBuilder<> IRB(&GlobalInit.front(),
+                  GlobalInit.front().getFirstInsertionPt());
 
   // Add a call to poison all external globals before the given function starts.
   Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy);
@@ -1147,6 +1214,14 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
     // Do not instrument globals from special LLVM sections.
     if (Section.find("__llvm") != StringRef::npos) return false;
 
+    // Do not instrument function pointers to initialization and termination
+    // routines: dynamic linker will not properly handle redzones.
+    if (Section.startswith(".preinit_array") ||
+        Section.startswith(".init_array") ||
+        Section.startswith(".fini_array")) {
+      return false;
+    }
+
     // Callbacks put into the CRT initializer/terminator sections
     // should not be instrumented.
     // See https://code.google.com/p/address-sanitizer/issues/detail?id=305
@@ -1162,10 +1237,7 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
       bool TAAParsed;
       std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(
           Section, ParsedSegment, ParsedSection, TAA, TAAParsed, StubSize);
-      if (!ErrorCode.empty()) {
-        assert(false && "Invalid section specifier.");
-        return false;
-      }
+      assert(ErrorCode.empty() && "Invalid section specifier.");
 
       // Ignore the globals from the __OBJC section. The ObjC runtime assumes
       // those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
@@ -1383,13 +1455,11 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
       const std::string TypeStr = AccessIsWrite ? "store" : "load";
       const std::string ExpStr = Exp ? "exp_" : "";
       const std::string SuffixStr = CompileKernel ? "N" : "_n";
-      const std::string EndingStr = CompileKernel ? "_noabort" : "";
-      const Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
-      // TODO(glider): for KASan builds add _noabort to error reporting
-      // functions and make them actually noabort (remove the UnreachableInst).
+      const std::string EndingStr = Recover ? "_noabort" : "";
+      Type *ExpType = Exp ? Type::getInt32Ty(*C) : nullptr;
       AsanErrorCallbackSized[AccessIsWrite][Exp] =
           checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-              kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr,
+              kAsanReportErrorTemplate + ExpStr + TypeStr + SuffixStr + EndingStr,
               IRB.getVoidTy(), IntptrTy, IntptrTy, ExpType, nullptr));
       AsanMemoryAccessCallbackSized[AccessIsWrite][Exp] =
           checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1400,7 +1470,7 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
         const std::string Suffix = TypeStr + itostr(1 << AccessSizeIndex);
         AsanErrorCallback[AccessIsWrite][Exp][AccessSizeIndex] =
             checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-                kAsanReportErrorTemplate + ExpStr + Suffix,
+                kAsanReportErrorTemplate + ExpStr + Suffix + EndingStr,
                 IRB.getVoidTy(), IntptrTy, ExpType, nullptr));
         AsanMemoryAccessCallback[AccessIsWrite][Exp][AccessSizeIndex] =
             checkSanitizerInterfaceFunction(M.getOrInsertFunction(
@@ -1448,15 +1518,20 @@ bool AddressSanitizer::doInitialization(Module &M) {
 
   if (!CompileKernel) {
     std::tie(AsanCtorFunction, AsanInitFunction) =
-        createSanitizerCtorAndInitFunctions(M, kAsanModuleCtorName, kAsanInitName,
-                                            /*InitArgTypes=*/{},
-                                            /*InitArgs=*/{});
+        createSanitizerCtorAndInitFunctions(
+            M, kAsanModuleCtorName, kAsanInitName,
+            /*InitArgTypes=*/{}, /*InitArgs=*/{}, kAsanVersionCheckName);
     appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority);
   }
   Mapping = getShadowMapping(TargetTriple, LongSize, CompileKernel);
   return true;
 }
 
+bool AddressSanitizer::doFinalization(Module &M) {
+  GlobalsMD.reset();
+  return false;
+}
+
 bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // For each NSObject descendant having a +load method, this method is invoked
   // by the ObjC runtime before any of the static constructors is called.
@@ -1466,13 +1541,41 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   // We cannot just ignore these methods, because they may call other
   // instrumented functions.
   if (F.getName().find(" load]") != std::string::npos) {
-    IRBuilder<> IRB(F.begin()->begin());
+    IRBuilder<> IRB(&F.front(), F.front().begin());
     IRB.CreateCall(AsanInitFunction, {});
     return true;
   }
   return false;
 }
 
+void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
+  // Find the one possible call to llvm.localescape and pre-mark allocas passed
+  // to it as uninteresting. This assumes we haven't started processing allocas
+  // yet. This check is done up front because iterating the use list in
+  // isInterestingAlloca would be algorithmically slower.
+  assert(ProcessedAllocas.empty() && "must process localescape before allocas");
+
+  // Try to get the declaration of llvm.localescape. If it's not in the module,
+  // we can exit early.
+  if (!F.getParent()->getFunction("llvm.localescape")) return;
+
+  // Look for a call to llvm.localescape call in the entry block. It can't be in
+  // any other block.
+  for (Instruction &I : F.getEntryBlock()) {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I);
+    if (II && II->getIntrinsicID() == Intrinsic::localescape) {
+      // We found a call. Mark all the allocas passed in as uninteresting.
+      for (Value *Arg : II->arg_operands()) {
+        AllocaInst *AI = dyn_cast<AllocaInst>(Arg->stripPointerCasts());
+        assert(AI && AI->isStaticAlloca() &&
+               "non-static alloca arg to localescape");
+        ProcessedAllocas[AI] = false;
+      }
+      break;
+    }
+  }
+}
+
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (&F == AsanCtorFunction) return false;
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
@@ -1488,6 +1591,12 @@ bool AddressSanitizer::runOnFunction(Function &F) {
 
   if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
 
+  FunctionStateRAII CleanupObj(this);
+
+  // We can't instrument allocas used with llvm.localescape. Only static allocas
+  // can be passed to that intrinsic.
+  markEscapedLocalAllocas(F);
+
   // We want to instrument every address only once per basic block (unless there
   // are calls between uses).
   SmallSet<Value *, 16> TempsToInstrument;
@@ -1715,6 +1824,16 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
 void FunctionStackPoisoner::poisonStack() {
   assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
 
+  // Insert poison calls for lifetime intrinsics for alloca.
+  bool HavePoisonedAllocas = false;
+  for (const auto &APC : AllocaPoisonCallVec) {
+    assert(APC.InsBefore);
+    assert(APC.AI);
+    IRBuilder<> IRB(APC.InsBefore);
+    poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
+    HavePoisonedAllocas |= APC.DoPoison;
+  }
+
   if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
     // Handle dynamic allocas.
     createDynamicAllocasInitStorage();
@@ -1723,7 +1842,7 @@ void FunctionStackPoisoner::poisonStack() {
     unpoisonDynamicAllocas();
   }
 
-  if (AllocaVec.size() == 0) return;
+  if (AllocaVec.empty()) return;
 
   int StackMallocIdx = -1;
   DebugLoc EntryDebugLocation;
@@ -1734,6 +1853,19 @@ void FunctionStackPoisoner::poisonStack() {
   IRBuilder<> IRB(InsBefore);
   IRB.SetCurrentDebugLocation(EntryDebugLocation);
 
+  // Make sure non-instrumented allocas stay in the entry block. Otherwise,
+  // debug info is broken, because only entry-block allocas are treated as
+  // regular stack slots.
+  auto InsBeforeB = InsBefore->getParent();
+  assert(InsBeforeB == &F.getEntryBlock());
+  for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I)
+    if (auto *AI = dyn_cast<AllocaInst>(I))
+      if (NonInstrumentedStaticAllocaVec.count(AI) > 0)
+        AI->moveBefore(InsBefore);
+
+  // If we have a call to llvm.localescape, keep it in the entry block.
+  if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
+
   SmallVector<ASanStackVariableDescription, 16> SVD;
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
@@ -1751,10 +1883,15 @@ void FunctionStackPoisoner::poisonStack() {
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
                        LocalStackSize <= kMaxStackMallocSize;
-  // Don't do dynamic alloca or stack malloc in presence of inline asm:
-  // too often it makes assumptions on which registers are available.
-  bool DoDynamicAlloca = ClDynamicAllocaStack && !HasNonEmptyInlineAsm;
-  DoStackMalloc &= !HasNonEmptyInlineAsm;
+  bool DoDynamicAlloca = ClDynamicAllocaStack;
+  // Don't do dynamic alloca or stack malloc if:
+  // 1) There is inline asm: too often it makes assumptions on which registers
+  //    are available.
+  // 2) There is a returns_twice call (typically setjmp), which is
+  //    optimization-hostile, and doesn't play well with introduced indirect
+  //    register-relative calculation of local variable addresses.
+  DoDynamicAlloca &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
+  DoStackMalloc &= !HasNonEmptyInlineAsm && !HasReturnsTwiceCall;
 
   Value *StaticAlloca =
       DoDynamicAlloca ? nullptr : createAllocaForLayout(IRB, L, false);
@@ -1804,16 +1941,6 @@ void FunctionStackPoisoner::poisonStack() {
         DoDynamicAlloca ? createAllocaForLayout(IRB, L, true) : StaticAlloca;
   }
 
-  // Insert poison calls for lifetime intrinsics for alloca.
-  bool HavePoisonedAllocas = false;
-  for (const auto &APC : AllocaPoisonCallVec) {
-    assert(APC.InsBefore);
-    assert(APC.AI);
-    IRBuilder<> IRB(APC.InsBefore);
-    poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
-    HavePoisonedAllocas |= APC.DoPoison;
-  }
-
   // Replace Alloca instructions with base+offset.
   for (const auto &Desc : SVD) {
     AllocaInst *AI = Desc.AI;
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index f6858034d79e..fd3dfd9af033 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -106,7 +106,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
   }
   ++ChecksAdded;
 
-  Instruction *Inst = Builder->GetInsertPoint();
+  BasicBlock::iterator Inst = Builder->GetInsertPoint();
   BasicBlock *OldBB = Inst->getParent();
   BasicBlock *Cont = OldBB->splitBasicBlock(Inst);
   OldBB->getTerminator()->eraseFromParent();
diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h
new file mode 100644
index 000000000000..c47fdbf68996
--- /dev/null
+++ b/lib/Transforms/Instrumentation/CFGMST.h
@@ -0,0 +1,217 @@
+//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a Union-find algorithm to compute Minimum Spanning Tree
+// for a given CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+#define DEBUG_TYPE "cfgmst"
+
+/// \brief An union-find based Minimum Spanning Tree for CFG
+///
+/// Implements a Union-find algorithm to compute Minimum Spanning Tree
+/// for a given CFG.
+template <class Edge, class BBInfo> class CFGMST {
+public:
+  Function &F;
+
+  // Store all the edges in CFG. It may contain some stale edges
+  // when Removed is set.
+  std::vector<std::unique_ptr<Edge>> AllEdges;
+
+  // This map records the auxiliary information for each BB.
+  DenseMap<const BasicBlock *, std::unique_ptr<BBInfo>> BBInfos;
+
+  // Find the root group of the G and compress the path from G to the root.
+  BBInfo *findAndCompressGroup(BBInfo *G) {
+    if (G->Group != G)
+      G->Group = findAndCompressGroup(static_cast<BBInfo *>(G->Group));
+    return static_cast<BBInfo *>(G->Group);
+  }
+
+  // Union BB1 and BB2 into the same group and return true.
+  // Returns false if BB1 and BB2 are already in the same group.
+  bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) {
+    BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1));
+    BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2));
+
+    if (BB1G == BB2G)
+      return false;
+
+    // Make the smaller rank tree a direct child or the root of high rank tree.
+    if (BB1G->Rank < BB2G->Rank)
+      BB1G->Group = BB2G;
+    else {
+      BB2G->Group = BB1G;
+      // If the ranks are the same, increment root of one tree by one.
+      if (BB1G->Rank == BB2G->Rank)
+        BB1G->Rank++;
+    }
+    return true;
+  }
+
+  // Give BB, return the auxiliary information.
+  BBInfo &getBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    assert(It->second.get() != nullptr);
+    return *It->second.get();
+  }
+
+  // Traverse the CFG using a stack. Find all the edges and assign the weight.
+  // Edges with large weight will be put into MST first so they are less likely
+  // to be instrumented.
+  void buildEdges() {
+    DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
+
+    const BasicBlock *BB = &(F.getEntryBlock());
+    uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
+    // Add a fake edge to the entry.
+    addEdge(nullptr, BB, EntryWeight);
+
+    // Special handling for single BB functions.
+    if (succ_empty(BB)) {
+      addEdge(BB, nullptr, EntryWeight);
+      return;
+    }
+
+    static const uint32_t CriticalEdgeMultiplier = 1000;
+
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+      uint64_t BBWeight =
+          (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2);
+      uint64_t Weight = 2;
+      if (int successors = TI->getNumSuccessors()) {
+        for (int i = 0; i != successors; ++i) {
+          BasicBlock *TargetBB = TI->getSuccessor(i);
+          bool Critical = isCriticalEdge(TI, i);
+          uint64_t scaleFactor = BBWeight;
+          if (Critical) {
+            if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
+              scaleFactor *= CriticalEdgeMultiplier;
+            else
+              scaleFactor = UINT64_MAX;
+          }
+          if (BPI != nullptr)
+            Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor);
+          addEdge(&*BB, TargetBB, Weight).IsCritical = Critical;
+          DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to "
+                       << TargetBB->getName() << "  w=" << Weight << "\n");
+        }
+      } else {
+        addEdge(&*BB, nullptr, BBWeight);
+        DEBUG(dbgs() << "  Edge: from " << BB->getName() << " to exit"
+                     << " w = " << BBWeight << "\n");
+      }
+    }
+  }
+
+  // Sort CFG edges based on its weight.
+  void sortEdgesByWeight() {
+    std::stable_sort(AllEdges.begin(), AllEdges.end(),
+                     [](const std::unique_ptr<Edge> &Edge1,
+                        const std::unique_ptr<Edge> &Edge2) {
+                       return Edge1->Weight > Edge2->Weight;
+                     });
+  }
+
+  // Traverse all the edges and compute the Minimum Weight Spanning Tree
+  // using union-find algorithm.
+  void computeMinimumSpanningTree() {
+    // First, put all the critical edge with landing-pad as the Dest to MST.
+    // This works around the insufficient support of critical edges split
+    // when destination BB is a landing pad.
+    for (auto &Ei : AllEdges) {
+      if (Ei->Removed)
+        continue;
+      if (Ei->IsCritical) {
+        if (Ei->DestBB && Ei->DestBB->isLandingPad()) {
+          if (unionGroups(Ei->SrcBB, Ei->DestBB))
+            Ei->InMST = true;
+        }
+      }
+    }
+
+    for (auto &Ei : AllEdges) {
+      if (Ei->Removed)
+        continue;
+      if (unionGroups(Ei->SrcBB, Ei->DestBB))
+        Ei->InMST = true;
+    }
+  }
+
+  // Dump the Debug information about the instrumentation.
+  void dumpEdges(raw_ostream &OS, const Twine &Message) const {
+    if (!Message.str().empty())
+      OS << Message << "\n";
+    OS << "  Number of Basic Blocks: " << BBInfos.size() << "\n";
+    for (auto &BI : BBInfos) {
+      const BasicBlock *BB = BI.first;
+      OS << "  BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << "  "
+         << BI.second->infoString() << "\n";
+    }
+
+    OS << "  Number of Edges: " << AllEdges.size()
+       << " (*: Instrument, C: CriticalEdge, -: Removed)\n";
+    uint32_t Count = 0;
+    for (auto &EI : AllEdges)
+      OS << "  Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->"
+         << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n";
+  }
+
+  // Add an edge to AllEdges with weight W.
+  Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+    uint32_t Index = BBInfos.size();
+    auto Iter = BBInfos.end();
+    bool Inserted;
+    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr));
+    if (Inserted) {
+      // Newly inserted, update the real info.
+      Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+      Index++;
+    }
+    std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr));
+    if (Inserted)
+      // Newly inserted, update the real info.
+      Iter->second = std::move(llvm::make_unique<BBInfo>(Index));
+    AllEdges.emplace_back(new Edge(Src, Dest, W));
+    return *AllEdges.back();
+  }
+
+  BranchProbabilityInfo *BPI;
+  BlockFrequencyInfo *BFI;
+
+public:
+  CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr,
+         BlockFrequencyInfo *BFI_ = nullptr)
+      : F(Func), BPI(BPI_), BFI(BFI_) {
+    buildEdges();
+    sortEdgesByWeight();
+    computeMinimumSpanningTree();
+  }
+};
+
+#undef DEBUG_TYPE // "cfgmst"
+} // end namespace llvm
diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt
index 9b81f4bb1619..cae1e5af7ac7 100644
--- a/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMInstrumentation
   MemorySanitizer.cpp
   Instrumentation.cpp
   InstrProfiling.cpp
+  PGOInstrumentation.cpp
   SafeStack.cpp
   SanitizerCoverage.cpp
   ThreadSanitizer.cpp
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index 2de6e1afaba9..d459fc50d136 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -72,6 +72,11 @@
 
 using namespace llvm;
 
+// External symbol to be used when generating the shadow address for
+// architectures with multiple VMAs. Instead of using a constant integer
+// the runtime will set the external mask based on the VMA range.
+static const char *const kDFSanExternShadowPtrMask = "__dfsan_shadow_ptr_mask";
+
 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
 // alignment requirements provided by the input IR are correct.  For example,
 // if the input IR contains a load with alignment 8, this flag will cause
@@ -124,6 +129,7 @@ static cl::opt<bool> ClDebugNonzeroLabels(
              "load or return with a nonzero label"),
     cl::Hidden);
 
+
 namespace {
 
 StringRef GetGlobalTypeString(const GlobalValue &G) {
@@ -231,6 +237,7 @@ class DataFlowSanitizer : public ModulePass {
   void *(*GetRetvalTLSPtr)();
   Constant *GetArgTLS;
   Constant *GetRetvalTLS;
+  Constant *ExternalShadowMask;
   FunctionType *DFSanUnionFnTy;
   FunctionType *DFSanUnionLoadFnTy;
   FunctionType *DFSanUnimplementedFnTy;
@@ -248,7 +255,7 @@ class DataFlowSanitizer : public ModulePass {
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
   AttributeSet ReadOnlyNoneAttrs;
-  DenseMap<const Function *, DISubprogram *> FunctionDIs;
+  bool DFSanRuntimeShadowMask;
 
   Value *getShadowAddress(Value *Addr, Instruction *Pos);
   bool isInstrumented(const Function *F);
@@ -362,7 +369,8 @@ llvm::createDataFlowSanitizerPass(const std::vector<std::string> &ABIListFiles,
 DataFlowSanitizer::DataFlowSanitizer(
     const std::vector<std::string> &ABIListFiles, void *(*getArgTLS)(),
     void *(*getRetValTLS)())
-    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS) {
+    : ModulePass(ID), GetArgTLSPtr(getArgTLS), GetRetvalTLSPtr(getRetValTLS),
+      DFSanRuntimeShadowMask(false) {
   std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
   AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(),
                          ClABIListFiles.end());
@@ -420,6 +428,8 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
   bool IsX86_64 = TargetTriple.getArch() == llvm::Triple::x86_64;
   bool IsMIPS64 = TargetTriple.getArch() == llvm::Triple::mips64 ||
                   TargetTriple.getArch() == llvm::Triple::mips64el;
+  bool IsAArch64 = TargetTriple.getArch() == llvm::Triple::aarch64 ||
+                   TargetTriple.getArch() == llvm::Triple::aarch64_be;
 
   const DataLayout &DL = M.getDataLayout();
 
@@ -434,6 +444,9 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
     ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0x700000000000LL);
   else if (IsMIPS64)
     ShadowPtrMask = ConstantInt::getSigned(IntptrTy, ~0xF000000000LL);
+  // AArch64 supports multiple VMAs and the shadow mask is set at runtime.
+  else if (IsAArch64)
+    DFSanRuntimeShadowMask = true;
   else
     report_fatal_error("unsupported triple");
 
@@ -578,7 +591,7 @@ Constant *DataFlowSanitizer::getOrBuildTrampolineFunction(FunctionType *FT,
     DFSanFunction DFSF(*this, F, /*IsNativeABI=*/true);
     Function::arg_iterator ValAI = F->arg_begin(), ShadowAI = AI; ++ValAI;
     for (unsigned N = FT->getNumParams(); N != 0; ++ValAI, ++ShadowAI, --N)
-      DFSF.ValShadowMap[ValAI] = ShadowAI;
+      DFSF.ValShadowMap[&*ValAI] = &*ShadowAI;
     DFSanVisitor(DFSF).visitCallInst(*CI);
     if (!FT->getReturnType()->isVoidTy())
       new StoreInst(DFSF.getShadow(RI->getReturnValue()),
@@ -592,8 +605,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
   if (ABIList.isIn(M, "skip"))
     return false;
 
-  FunctionDIs = makeSubprogramMap(M);
-
   if (!GetArgTLSPtr) {
     Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
     ArgTLS = Mod->getOrInsertGlobal("__dfsan_arg_tls", ArgTLSTy);
@@ -606,6 +617,9 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
       G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
   }
 
+  ExternalShadowMask =
+      Mod->getOrInsertGlobal(kDFSanExternShadowPtrMask, IntptrTy);
+
   DFSanUnionFn = Mod->getOrInsertFunction("__dfsan_union", DFSanUnionFnTy);
   if (Function *F = dyn_cast<Function>(DFSanUnionFn)) {
     F->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
@@ -643,16 +657,16 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
 
   std::vector<Function *> FnsToInstrument;
   llvm::SmallPtrSet<Function *, 2> FnsWithNativeABI;
-  for (Module::iterator i = M.begin(), e = M.end(); i != e; ++i) {
-    if (!i->isIntrinsic() &&
-        i != DFSanUnionFn &&
-        i != DFSanCheckedUnionFn &&
-        i != DFSanUnionLoadFn &&
-        i != DFSanUnimplementedFn &&
-        i != DFSanSetLabelFn &&
-        i != DFSanNonzeroLabelFn &&
-        i != DFSanVarargWrapperFn)
-      FnsToInstrument.push_back(&*i);
+  for (Function &i : M) {
+    if (!i.isIntrinsic() &&
+        &i != DFSanUnionFn &&
+        &i != DFSanCheckedUnionFn &&
+        &i != DFSanUnionLoadFn &&
+        &i != DFSanUnimplementedFn &&
+        &i != DFSanSetLabelFn &&
+        &i != DFSanNonzeroLabelFn &&
+        &i != DFSanVarargWrapperFn)
+      FnsToInstrument.push_back(&i);
   }
 
   // Give function aliases prefixes when necessary, and build wrappers where the
@@ -710,7 +724,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
                                     NewFArg = NewF->arg_begin(),
                                     FArgEnd = F.arg_end();
              FArg != FArgEnd; ++FArg, ++NewFArg) {
-          FArg->replaceAllUsesWith(NewFArg);
+          FArg->replaceAllUsesWith(&*NewFArg);
         }
         NewF->getBasicBlockList().splice(NewF->begin(), F.getBasicBlockList());
 
@@ -750,11 +764,6 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
           ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
       F.replaceAllUsesWith(WrappedFnCst);
 
-      // Patch the pointer to LLVM function in debug info descriptor.
-      auto DI = FunctionDIs.find(&F);
-      if (DI != FunctionDIs.end())
-        DI->second->replaceFunction(&F);
-
       UnwrappedFnMap[WrappedFnCst] = &F;
       *i = NewF;
 
@@ -842,7 +851,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
         if (Instruction *I = dyn_cast<Instruction>(V))
           Pos = I->getNextNode();
         else
-          Pos = DFSF.F->getEntryBlock().begin();
+          Pos = &DFSF.F->getEntryBlock().front();
         while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
           Pos = Pos->getNextNode();
         IRBuilder<> IRB(Pos);
@@ -864,7 +873,7 @@ Value *DFSanFunction::getArgTLSPtr() {
   if (DFS.ArgTLS)
     return ArgTLSPtr = DFS.ArgTLS;
 
-  IRBuilder<> IRB(F->getEntryBlock().begin());
+  IRBuilder<> IRB(&F->getEntryBlock().front());
   return ArgTLSPtr = IRB.CreateCall(DFS.GetArgTLS, {});
 }
 
@@ -874,7 +883,7 @@ Value *DFSanFunction::getRetvalTLS() {
   if (DFS.RetvalTLS)
     return RetvalTLSPtr = DFS.RetvalTLS;
 
-  IRBuilder<> IRB(F->getEntryBlock().begin());
+  IRBuilder<> IRB(&F->getEntryBlock().front());
   return RetvalTLSPtr = IRB.CreateCall(DFS.GetRetvalTLS, {});
 }
 
@@ -906,7 +915,7 @@ Value *DFSanFunction::getShadow(Value *V) {
         Function::arg_iterator i = F->arg_begin();
         while (ArgIdx--)
           ++i;
-        Shadow = i;
+        Shadow = &*i;
         assert(Shadow->getType() == DFS.ShadowTy);
         break;
       }
@@ -928,9 +937,15 @@ void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
   assert(Addr != RetvalTLS && "Reinstrumenting?");
   IRBuilder<> IRB(Pos);
+  Value *ShadowPtrMaskValue;
+  if (DFSanRuntimeShadowMask)
+    ShadowPtrMaskValue = IRB.CreateLoad(IntptrTy, ExternalShadowMask);
+  else
+    ShadowPtrMaskValue = ShadowPtrMask;
   return IRB.CreateIntToPtr(
       IRB.CreateMul(
-          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy), ShadowPtrMask),
+          IRB.CreateAnd(IRB.CreatePtrToInt(Addr, IntptrTy),
+                        IRB.CreatePtrToInt(ShadowPtrMaskValue, IntptrTy)),
           ShadowPtrMul),
       ShadowPtrTy);
 }
@@ -991,7 +1006,7 @@ Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
     Call->addAttribute(2, Attribute::ZExt);
 
     BasicBlock *Tail = BI->getSuccessor(0);
-    PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", Tail->begin());
+    PHINode *Phi = PHINode::Create(DFS.ShadowTy, 2, "", &Tail->front());
     Phi->addIncoming(Call, Call->getParent());
     Phi->addIncoming(V1, Head);
 
@@ -1105,7 +1120,7 @@ Value *DFSanFunction::loadShadow(Value *Addr, uint64_t Size, uint64_t Align,
     Value *ShadowsEq = IRB.CreateICmpEQ(WideShadow, RotShadow);
 
     BasicBlock *Head = Pos->getParent();
-    BasicBlock *Tail = Head->splitBasicBlock(Pos);
+    BasicBlock *Tail = Head->splitBasicBlock(Pos->getIterator());
 
     if (DomTreeNode *OldNode = DT.getNode(Head)) {
       std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
@@ -1475,8 +1490,8 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
         if (FT->isVarArg()) {
           auto *LabelVATy = ArrayType::get(DFSF.DFS.ShadowTy,
                                            CS.arg_size() - FT->getNumParams());
-          auto *LabelVAAlloca = new AllocaInst(LabelVATy, "labelva",
-                                               DFSF.F->getEntryBlock().begin());
+          auto *LabelVAAlloca = new AllocaInst(
+              LabelVATy, "labelva", &DFSF.F->getEntryBlock().front());
 
           for (unsigned n = 0; i != CS.arg_end(); ++i, ++n) {
             auto LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, n);
@@ -1490,7 +1505,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
           if (!DFSF.LabelReturnAlloca) {
             DFSF.LabelReturnAlloca =
                 new AllocaInst(DFSF.DFS.ShadowTy, "labelreturn",
-                               DFSF.F->getEntryBlock().begin());
+                               &DFSF.F->getEntryBlock().front());
           }
           Args.push_back(DFSF.LabelReturnAlloca);
         }
@@ -1529,13 +1544,14 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
   if (!CS.getType()->isVoidTy()) {
     if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       if (II->getNormalDest()->getSinglePredecessor()) {
-        Next = II->getNormalDest()->begin();
+        Next = &II->getNormalDest()->front();
       } else {
         BasicBlock *NewBB =
             SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
-        Next = NewBB->begin();
+        Next = &NewBB->front();
       }
     } else {
+      assert(CS->getIterator() != CS->getParent()->end());
       Next = CS->getNextNode();
     }
 
@@ -1568,7 +1584,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
       unsigned VarArgSize = CS.arg_size() - FT->getNumParams();
       ArrayType *VarArgArrayTy = ArrayType::get(DFSF.DFS.ShadowTy, VarArgSize);
       AllocaInst *VarArgShadow =
-          new AllocaInst(VarArgArrayTy, "", DFSF.F->getEntryBlock().begin());
+          new AllocaInst(VarArgArrayTy, "", &DFSF.F->getEntryBlock().front());
       Args.push_back(IRB.CreateConstGEP2_32(VarArgArrayTy, VarArgShadow, 0, 0));
       for (unsigned n = 0; i != e; ++i, ++n) {
         IRB.CreateStore(
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9a3ed5c04efc..fa939aee252a 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -138,6 +138,7 @@ namespace {
     Module *M;
     LLVMContext *Ctx;
     SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
+    DenseMap<DISubprogram *, Function *> FnMap;
   };
 }
 
@@ -309,13 +310,12 @@ namespace {
   // object users can construct, the blocks and lines will be rooted here.
   class GCOVFunction : public GCOVRecord {
    public:
-     GCOVFunction(const DISubprogram *SP, raw_ostream *os, uint32_t Ident,
-                  bool UseCfgChecksum, bool ExitBlockBeforeBody)
+     GCOVFunction(const DISubprogram *SP, Function *F, raw_ostream *os,
+                  uint32_t Ident, bool UseCfgChecksum, bool ExitBlockBeforeBody)
          : SP(SP), Ident(Ident), UseCfgChecksum(UseCfgChecksum), CfgChecksum(0),
            ReturnBlock(1, os) {
       this->os = os;
 
-      Function *F = SP->getFunction();
       DEBUG(dbgs() << "Function: " << getFunctionName(SP) << "\n");
 
       uint32_t i = 0;
@@ -347,8 +347,8 @@ namespace {
       std::string EdgeDestinations;
       raw_string_ostream EDOS(EdgeDestinations);
       Function *F = Blocks.begin()->first->getParent();
-      for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-        GCOVBlock &Block = getBlock(I);
+      for (BasicBlock &I : *F) {
+        GCOVBlock &Block = getBlock(&I);
         for (int i = 0, e = Block.OutEdges.size(); i != e; ++i)
           EDOS << Block.OutEdges[i]->Number;
       }
@@ -389,8 +389,8 @@ namespace {
       // Emit edges between blocks.
       if (Blocks.empty()) return;
       Function *F = Blocks.begin()->first->getParent();
-      for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-        GCOVBlock &Block = getBlock(I);
+      for (BasicBlock &I : *F) {
+        GCOVBlock &Block = getBlock(&I);
         if (Block.OutEdges.empty()) continue;
 
         writeBytes(EdgeTag, 4);
@@ -405,9 +405,8 @@ namespace {
       }
 
       // Emit lines for each block.
-      for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
-        getBlock(I).writeOut();
-      }
+      for (BasicBlock &I : *F)
+        getBlock(&I).writeOut();
     }
 
    private:
@@ -451,6 +450,12 @@ bool GCOVProfiler::runOnModule(Module &M) {
   this->M = &M;
   Ctx = &M.getContext();
 
+  FnMap.clear();
+  for (Function &F : M) {
+    if (DISubprogram *SP = F.getSubprogram())
+      FnMap[SP] = &F;
+  }
+
   if (Options.EmitNotes) emitProfileNotes();
   if (Options.EmitData) return emitProfileArcs();
   return false;
@@ -495,7 +500,7 @@ void GCOVProfiler::emitProfileNotes() {
 
     unsigned FunctionIdent = 0;
     for (auto *SP : CU->getSubprograms()) {
-      Function *F = SP->getFunction();
+      Function *F = FnMap[SP];
       if (!F) continue;
       if (!functionHasLines(F)) continue;
 
@@ -507,13 +512,13 @@ void GCOVProfiler::emitProfileNotes() {
         ++It;
       EntryBlock.splitBasicBlock(It);
 
-      Funcs.push_back(make_unique<GCOVFunction>(SP, &out, FunctionIdent++,
+      Funcs.push_back(make_unique<GCOVFunction>(SP, F, &out, FunctionIdent++,
                                                 Options.UseCfgChecksum,
                                                 Options.ExitBlockBeforeBody));
       GCOVFunction &Func = *Funcs.back();
 
       for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-        GCOVBlock &Block = Func.getBlock(BB);
+        GCOVBlock &Block = Func.getBlock(&*BB);
         TerminatorInst *TI = BB->getTerminator();
         if (int successors = TI->getNumSuccessors()) {
           for (int i = 0; i != successors; ++i) {
@@ -574,7 +579,7 @@ bool GCOVProfiler::emitProfileArcs() {
     auto *CU = cast<DICompileUnit>(CU_Nodes->getOperand(i));
     SmallVector<std::pair<GlobalVariable *, MDNode *>, 8> CountersBySP;
     for (auto *SP : CU->getSubprograms()) {
-      Function *F = SP->getFunction();
+      Function *F = FnMap[SP];
       if (!F) continue;
       if (!functionHasLines(F)) continue;
       if (!Result) Result = true;
@@ -605,7 +610,7 @@ bool GCOVProfiler::emitProfileArcs() {
         int Successors = isa<ReturnInst>(TI) ? 1 : TI->getNumSuccessors();
         if (Successors) {
           if (Successors == 1) {
-            IRBuilder<> Builder(BB->getFirstInsertionPt());
+            IRBuilder<> Builder(&*BB->getFirstInsertionPt());
             Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                                 Edge);
             Value *Count = Builder.CreateLoad(Counter);
@@ -625,7 +630,7 @@ bool GCOVProfiler::emitProfileArcs() {
             Count = Builder.CreateAdd(Count, Builder.getInt64(1));
             Builder.CreateStore(Count, Counter);
           } else {
-            ComplexEdgePreds.insert(BB);
+            ComplexEdgePreds.insert(&*BB);
             for (int i = 0; i != Successors; ++i)
               ComplexEdgeSuccs.insert(TI->getSuccessor(i));
           }
@@ -641,13 +646,13 @@ bool GCOVProfiler::emitProfileArcs() {
         GlobalVariable *EdgeState = getEdgeStateValue();
 
         for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) {
-          IRBuilder<> Builder(ComplexEdgePreds[i + 1]->getFirstInsertionPt());
+          IRBuilder<> Builder(&*ComplexEdgePreds[i + 1]->getFirstInsertionPt());
           Builder.CreateStore(Builder.getInt32(i), EdgeState);
         }
 
         for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) {
           // Call runtime to perform increment.
-          IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstInsertionPt());
+          IRBuilder<> Builder(&*ComplexEdgeSuccs[i + 1]->getFirstInsertionPt());
           Value *CounterPtrArray =
             Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0,
                                                i * ComplexEdgePreds.size());
@@ -731,8 +736,8 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
         IRBuilder<> Builder(Succ);
         Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0,
                                                             Edge + i);
-        EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) +
-                  (Preds.idFor(BB)-1)] = cast<Constant>(Counter);
+        EdgeTable[((Succs.idFor(Succ) - 1) * Preds.size()) +
+                  (Preds.idFor(&*BB) - 1)] = cast<Constant>(Counter);
       }
     }
     Edge += Successors;
@@ -901,7 +906,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
 
   // uint32_t pred = *predecessor;
   // if (pred == 0xffffffff) return;
-  Argument *Arg = Fn->arg_begin();
+  Argument *Arg = &*Fn->arg_begin();
   Arg->setName("predecessor");
   Value *Pred = Builder.CreateLoad(Arg, "pred");
   Value *Cond = Builder.CreateICmpEQ(Pred, Builder.getInt32(0xffffffff));
@@ -912,7 +917,7 @@ void GCOVProfiler::insertIndirectCounterIncrement() {
   // uint64_t *counter = counters[pred];
   // if (!counter) return;
   Value *ZExtPred = Builder.CreateZExt(Pred, Builder.getInt64Ty());
-  Arg = std::next(Fn->arg_begin());
+  Arg = &*std::next(Fn->arg_begin());
   Arg->setName("counters");
   Value *GEP = Builder.CreateGEP(Type::getInt64PtrTy(*Ctx), Arg, ZExtPred);
   Value *Counter = Builder.CreateLoad(GEP, "counter");
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 712bf8edc7ea..92e41ee27c09 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -7,18 +7,18 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This pass lowers instrprof_increment intrinsics emitted by a frontend for
-// profiling. It also builds the data structures and initialization code needed
-// for updating execution counts and emitting the profile at runtime.
+// This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
+// It also builds the data structures and initialization code needed for
+// updating execution counts and emitting the profile at runtime.
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Instrumentation.h"
-
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
@@ -49,7 +49,15 @@ public:
 private:
   InstrProfOptions Options;
   Module *M;
-  DenseMap<GlobalVariable *, GlobalVariable *> RegionCounters;
+  typedef struct PerFunctionProfileData {
+    uint32_t NumValueSites[IPVK_Last+1];
+    GlobalVariable* RegionCounters;
+    GlobalVariable* DataVar;
+    PerFunctionProfileData() : RegionCounters(nullptr), DataVar(nullptr) {
+      memset(NumValueSites, 0, sizeof(uint32_t) * (IPVK_Last+1));
+    }
+  } PerFunctionProfileData;
+  DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
   std::vector<Value *> UsedVars;
 
   bool isMachO() const {
@@ -58,24 +66,30 @@ private:
 
   /// Get the section name for the counter variables.
   StringRef getCountersSection() const {
-    return isMachO() ? "__DATA,__llvm_prf_cnts" : "__llvm_prf_cnts";
+    return getInstrProfCountersSectionName(isMachO());
   }
 
   /// Get the section name for the name variables.
   StringRef getNameSection() const {
-    return isMachO() ? "__DATA,__llvm_prf_names" : "__llvm_prf_names";
+    return getInstrProfNameSectionName(isMachO());
   }
 
   /// Get the section name for the profile data variables.
   StringRef getDataSection() const {
-    return isMachO() ? "__DATA,__llvm_prf_data" : "__llvm_prf_data";
+    return getInstrProfDataSectionName(isMachO());
   }
 
   /// Get the section name for the coverage mapping data.
   StringRef getCoverageSection() const {
-    return isMachO() ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+    return getInstrProfCoverageSectionName(isMachO());
   }
 
+  /// Count the number of instrumented value sites for the function.
+  void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
+
+  /// Replace instrprof_value_profile with a call to runtime library.
+  void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
+
   /// Replace instrprof_increment with an increment of the appropriate value.
   void lowerIncrement(InstrProfIncrementInst *Inc);
 
@@ -117,20 +131,37 @@ bool InstrProfiling::runOnModule(Module &M) {
   bool MadeChange = false;
 
   this->M = &M;
-  RegionCounters.clear();
+  ProfileDataMap.clear();
   UsedVars.clear();
 
+  // We did not know how many value sites there would be inside
+  // the instrumented function. This is counting the number of instrumented
+  // target value sites to enter it as field in the profile data variable.
   for (Function &F : M)
     for (BasicBlock &BB : F)
       for (auto I = BB.begin(), E = BB.end(); I != E;)
-        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(I++)) {
+        if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I++))
+          computeNumValueSiteCounts(Ind);
+
+  for (Function &F : M)
+    for (BasicBlock &BB : F)
+      for (auto I = BB.begin(), E = BB.end(); I != E;) {
+        auto Instr = I++;
+        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
           lowerIncrement(Inc);
           MadeChange = true;
+        } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
+          lowerValueProfileInst(Ind);
+          MadeChange = true;
         }
-  if (GlobalVariable *Coverage = M.getNamedGlobal("__llvm_coverage_mapping")) {
+      }
+
+  if (GlobalVariable *Coverage =
+          M.getNamedGlobal(getCoverageMappingVarName())) {
     lowerCoverageData(Coverage);
     MadeChange = true;
   }
+
   if (!MadeChange)
     return false;
 
@@ -141,10 +172,59 @@ bool InstrProfiling::runOnModule(Module &M) {
   return true;
 }
 
+static Constant *getOrInsertValueProfilingCall(Module &M) {
+  LLVMContext &Ctx = M.getContext();
+  auto *ReturnTy = Type::getVoidTy(M.getContext());
+  Type *ParamTypes[] = {
+#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
+#include "llvm/ProfileData/InstrProfData.inc"
+  };
+  auto *ValueProfilingCallTy =
+      FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
+  return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+                               ValueProfilingCallTy);
+}
+
+void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
+
+  GlobalVariable *Name = Ind->getName();
+  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+  uint64_t Index = Ind->getIndex()->getZExtValue();
+  auto It = ProfileDataMap.find(Name);
+  if (It == ProfileDataMap.end()) {
+    PerFunctionProfileData PD;
+    PD.NumValueSites[ValueKind] = Index + 1;
+    ProfileDataMap[Name] = PD;
+  } else if (It->second.NumValueSites[ValueKind] <= Index)
+    It->second.NumValueSites[ValueKind] = Index + 1;
+}
+
+void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
+
+  GlobalVariable *Name = Ind->getName();
+  auto It = ProfileDataMap.find(Name);
+  assert(It != ProfileDataMap.end() && It->second.DataVar &&
+    "value profiling detected in function with no counter incerement");
+
+  GlobalVariable *DataVar = It->second.DataVar;
+  uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
+  uint64_t Index = Ind->getIndex()->getZExtValue();
+  for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
+    Index += It->second.NumValueSites[Kind];
+
+  IRBuilder<> Builder(Ind);
+  Value* Args[3] = {Ind->getTargetValue(),
+      Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+      Builder.getInt32(Index)};
+  Ind->replaceAllUsesWith(
+      Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args));
+  Ind->eraseFromParent();
+}
+
 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
 
-  IRBuilder<> Builder(Inc->getParent(), *Inc);
+  IRBuilder<> Builder(Inc);
   uint64_t Index = Inc->getIndex()->getZExtValue();
   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
   Value *Count = Builder.CreateLoad(Addr, "pgocount");
@@ -172,9 +252,10 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
     GlobalVariable *Name = cast<GlobalVariable>(V);
 
     // If we have region counters for this name, we've already handled it.
-    auto It = RegionCounters.find(Name);
-    if (It != RegionCounters.end())
-      continue;
+    auto It = ProfileDataMap.find(Name);
+    if (It != ProfileDataMap.end())
+      if (It->second.RegionCounters)
+        continue;
 
     // Move the name variable to the right section.
     Name->setSection(getNameSection());
@@ -183,69 +264,108 @@ void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
 }
 
 /// Get the name of a profiling variable for a particular function.
-static std::string getVarName(InstrProfIncrementInst *Inc, StringRef VarName) {
-  auto *Arr = cast<ConstantDataArray>(Inc->getName()->getInitializer());
-  StringRef Name = Arr->isCString() ? Arr->getAsCString() : Arr->getAsString();
-  return ("__llvm_profile_" + VarName + "_" + Name).str();
+static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
+  StringRef NamePrefix = getInstrProfNameVarPrefix();
+  StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
+  return (Prefix + Name).str();
+}
+
+static inline bool shouldRecordFunctionAddr(Function *F) {
+  // Check the linkage
+  if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+      !F->hasAvailableExternallyLinkage())
+    return true;
+  // Check uses of this function for other than direct calls or invokes to it.
+  return F->hasAddressTaken();
+}
+
+static inline Comdat *getOrCreateProfileComdat(Module &M,
+                                               InstrProfIncrementInst *Inc) {
+  // COFF format requires a COMDAT section to have a key symbol with the same
+  // name. The linker targeting COFF also requires that the COMDAT section
+  // a section is associated to must precede the associating section. For this
+  // reason, we must choose the name var's name as the name of the comdat.
+  StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
+                                ? getInstrProfNameVarPrefix()
+                                : getInstrProfComdatPrefix());
+  return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
 }
 
 GlobalVariable *
 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
-  GlobalVariable *Name = Inc->getName();
-  auto It = RegionCounters.find(Name);
-  if (It != RegionCounters.end())
-    return It->second;
+  GlobalVariable *NamePtr = Inc->getName();
+  auto It = ProfileDataMap.find(NamePtr);
+  PerFunctionProfileData PD;
+  if (It != ProfileDataMap.end()) {
+    if (It->second.RegionCounters)
+      return It->second.RegionCounters;
+    PD = It->second;
+  }
 
-  // Move the name variable to the right section. Make sure it is placed in the
-  // same comdat as its associated function. Otherwise, we may get multiple
-  // counters for the same function in certain cases.
+  // Move the name variable to the right section. Place them in a COMDAT group
+  // if the associated function is a COMDAT. This will make sure that
+  // only one copy of counters of the COMDAT function will be emitted after
+  // linking.
   Function *Fn = Inc->getParent()->getParent();
-  Name->setSection(getNameSection());
-  Name->setAlignment(1);
-  Name->setComdat(Fn->getComdat());
+  Comdat *ProfileVarsComdat = nullptr;
+  if (Fn->hasComdat())
+    ProfileVarsComdat = getOrCreateProfileComdat(*M, Inc);
+  NamePtr->setSection(getNameSection());
+  NamePtr->setAlignment(1);
+  NamePtr->setComdat(ProfileVarsComdat);
 
   uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
   LLVMContext &Ctx = M->getContext();
   ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
 
   // Create the counters variable.
-  auto *Counters = new GlobalVariable(*M, CounterTy, false, Name->getLinkage(),
-                                      Constant::getNullValue(CounterTy),
-                                      getVarName(Inc, "counters"));
-  Counters->setVisibility(Name->getVisibility());
-  Counters->setSection(getCountersSection());
-  Counters->setAlignment(8);
-  Counters->setComdat(Fn->getComdat());
-
-  RegionCounters[Inc->getName()] = Counters;
+  auto *CounterPtr =
+      new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
+                         Constant::getNullValue(CounterTy),
+                         getVarName(Inc, getInstrProfCountersVarPrefix()));
+  CounterPtr->setVisibility(NamePtr->getVisibility());
+  CounterPtr->setSection(getCountersSection());
+  CounterPtr->setAlignment(8);
+  CounterPtr->setComdat(ProfileVarsComdat);
 
   // Create data variable.
-  auto *NameArrayTy = Name->getType()->getPointerElementType();
-  auto *Int32Ty = Type::getInt32Ty(Ctx);
-  auto *Int64Ty = Type::getInt64Ty(Ctx);
   auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
-  auto *Int64PtrTy = Type::getInt64PtrTy(Ctx);
-
-  Type *DataTypes[] = {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int64PtrTy};
+  auto *Int16Ty = Type::getInt16Ty(Ctx);
+  auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last+1);
+  Type *DataTypes[] = {
+    #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
+    #include "llvm/ProfileData/InstrProfData.inc"
+  };
   auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
+
+  Constant *FunctionAddr = shouldRecordFunctionAddr(Fn) ?
+                           ConstantExpr::getBitCast(Fn, Int8PtrTy) :
+                           ConstantPointerNull::get(Int8PtrTy);
+
+  Constant *Int16ArrayVals[IPVK_Last+1];
+  for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+    Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
+
   Constant *DataVals[] = {
-      ConstantInt::get(Int32Ty, NameArrayTy->getArrayNumElements()),
-      ConstantInt::get(Int32Ty, NumCounters),
-      ConstantInt::get(Int64Ty, Inc->getHash()->getZExtValue()),
-      ConstantExpr::getBitCast(Name, Int8PtrTy),
-      ConstantExpr::getBitCast(Counters, Int64PtrTy)};
-  auto *Data = new GlobalVariable(*M, DataTy, true, Name->getLinkage(),
+    #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
+    #include "llvm/ProfileData/InstrProfData.inc"
+  };
+  auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
                                   ConstantStruct::get(DataTy, DataVals),
-                                  getVarName(Inc, "data"));
-  Data->setVisibility(Name->getVisibility());
+                                  getVarName(Inc, getInstrProfDataVarPrefix()));
+  Data->setVisibility(NamePtr->getVisibility());
   Data->setSection(getDataSection());
-  Data->setAlignment(8);
-  Data->setComdat(Fn->getComdat());
+  Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
+  Data->setComdat(ProfileVarsComdat);
+
+  PD.RegionCounters = CounterPtr;
+  PD.DataVar = Data;
+  ProfileDataMap[NamePtr] = PD;
 
   // Mark the data variable as used so that it isn't stripped out.
   UsedVars.push_back(Data);
 
-  return Counters;
+  return CounterPtr;
 }
 
 void InstrProfiling::emitRegistration() {
@@ -253,20 +373,24 @@ void InstrProfiling::emitRegistration() {
   if (Triple(M->getTargetTriple()).isOSDarwin())
     return;
 
+  // Use linker script magic to get data/cnts/name start/end.
+  if (Triple(M->getTargetTriple()).isOSLinux() ||
+      Triple(M->getTargetTriple()).isOSFreeBSD())
+    return;
+
   // Construct the function.
   auto *VoidTy = Type::getVoidTy(M->getContext());
   auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
   auto *RegisterFTy = FunctionType::get(VoidTy, false);
   auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
-                                     "__llvm_profile_register_functions", M);
+                                     getInstrProfRegFuncsName(), M);
   RegisterF->setUnnamedAddr(true);
-  if (Options.NoRedZone)
-    RegisterF->addFnAttr(Attribute::NoRedZone);
+  if (Options.NoRedZone) RegisterF->addFnAttr(Attribute::NoRedZone);
 
   auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
   auto *RuntimeRegisterF =
       Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
-                       "__llvm_profile_register_function", M);
+                       getInstrProfRegFuncName(), M);
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
   for (Value *Data : UsedVars)
@@ -275,26 +399,27 @@ void InstrProfiling::emitRegistration() {
 }
 
 void InstrProfiling::emitRuntimeHook() {
-  const char *const RuntimeVarName = "__llvm_profile_runtime";
-  const char *const RuntimeUserName = "__llvm_profile_runtime_user";
+
+  // We expect the linker to be invoked with -u<hook_var> flag for linux,
+  // for which case there is no need to emit the user function.
+  if (Triple(M->getTargetTriple()).isOSLinux())
+    return;
 
   // If the module's provided its own runtime, we don't need to do anything.
-  if (M->getGlobalVariable(RuntimeVarName))
-    return;
+  if (M->getGlobalVariable(getInstrProfRuntimeHookVarName())) return;
 
   // Declare an external variable that will pull in the runtime initialization.
   auto *Int32Ty = Type::getInt32Ty(M->getContext());
   auto *Var =
       new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                         nullptr, RuntimeVarName);
+                         nullptr, getInstrProfRuntimeHookVarName());
 
   // Make a function that uses it.
-  auto *User =
-      Function::Create(FunctionType::get(Int32Ty, false),
-                       GlobalValue::LinkOnceODRLinkage, RuntimeUserName, M);
+  auto *User = Function::Create(FunctionType::get(Int32Ty, false),
+                                GlobalValue::LinkOnceODRLinkage,
+                                getInstrProfRuntimeHookVarUseFuncName(), M);
   User->addFnAttr(Attribute::NoInline);
-  if (Options.NoRedZone)
-    User->addFnAttr(Attribute::NoRedZone);
+  if (Options.NoRedZone) User->addFnAttr(Attribute::NoRedZone);
   User->setVisibility(GlobalValue::HiddenVisibility);
 
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
@@ -330,26 +455,23 @@ void InstrProfiling::emitUses() {
   LLVMUsed =
       new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
                          ConstantArray::get(ATy, MergedVars), "llvm.used");
-
   LLVMUsed->setSection("llvm.metadata");
 }
 
 void InstrProfiling::emitInitialization() {
   std::string InstrProfileOutput = Options.InstrProfileOutput;
 
-  Constant *RegisterF = M->getFunction("__llvm_profile_register_functions");
-  if (!RegisterF && InstrProfileOutput.empty())
-    return;
+  Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
+  if (!RegisterF && InstrProfileOutput.empty()) return;
 
   // Create the initialization function.
   auto *VoidTy = Type::getVoidTy(M->getContext());
-  auto *F =
-      Function::Create(FunctionType::get(VoidTy, false),
-                       GlobalValue::InternalLinkage, "__llvm_profile_init", M);
+  auto *F = Function::Create(FunctionType::get(VoidTy, false),
+                             GlobalValue::InternalLinkage,
+                             getInstrProfInitFuncName(), M);
   F->setUnnamedAddr(true);
   F->addFnAttr(Attribute::NoInline);
-  if (Options.NoRedZone)
-    F->addFnAttr(Attribute::NoRedZone);
+  if (Options.NoRedZone) F->addFnAttr(Attribute::NoRedZone);
 
   // Add the basic block and the necessary calls.
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
@@ -358,9 +480,8 @@ void InstrProfiling::emitInitialization() {
   if (!InstrProfileOutput.empty()) {
     auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
     auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
-    auto *SetNameF =
-        Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
-                         "__llvm_profile_override_default_filename", M);
+    auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
+                                      getInstrProfFileOverriderFuncName(), M);
 
     // Create variable for profile name.
     Constant *ProfileNameConst =
diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp
index 27505859100b..a05a5fa09f9a 100644
--- a/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -12,12 +12,47 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/Instrumentation.h"
 #include "llvm-c/Initialization.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/PassRegistry.h"
 
 using namespace llvm;
 
+/// Moves I before IP. Returns new insert point.
+static BasicBlock::iterator moveBeforeInsertPoint(BasicBlock::iterator I, BasicBlock::iterator IP) {
+  // If I is IP, move the insert point down.
+  if (I == IP)
+    return ++IP;
+  // Otherwise, move I before IP and return IP.
+  I->moveBefore(&*IP);
+  return IP;
+}
+
+/// Instrumentation passes often insert conditional checks into entry blocks.
+/// Call this function before splitting the entry block to move instructions
+/// that must remain in the entry block up before the split point. Static
+/// allocas and llvm.localescape calls, for example, must remain in the entry
+/// block.
+BasicBlock::iterator llvm::PrepareToSplitEntryBlock(BasicBlock &BB,
+                                                    BasicBlock::iterator IP) {
+  assert(&BB.getParent()->getEntryBlock() == &BB);
+  for (auto I = IP, E = BB.end(); I != E; ++I) {
+    bool KeepInEntry = false;
+    if (auto *AI = dyn_cast<AllocaInst>(I)) {
+      if (AI->isStaticAlloca())
+        KeepInEntry = true;
+    } else if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (II->getIntrinsicID() == llvm::Intrinsic::localescape)
+        KeepInEntry = true;
+    }
+    if (KeepInEntry)
+      IP = moveBeforeInsertPoint(I, IP);
+  }
+  return IP;
+}
+
 /// initializeInstrumentation - Initialize all passes in the TransformUtils
 /// library.
 void llvm::initializeInstrumentation(PassRegistry &Registry) {
@@ -25,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
   initializeAddressSanitizerModulePass(Registry);
   initializeBoundsCheckingPass(Registry);
   initializeGCOVProfilerPass(Registry);
+  initializePGOInstrumentationGenPass(Registry);
+  initializePGOInstrumentationUsePass(Registry);
   initializeInstrProfilingPass(Registry);
   initializeMemorySanitizerPass(Registry);
   initializeThreadSanitizerPass(Registry);
diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt
index 14c174332ee4..bcefe795c193 100644
--- a/lib/Transforms/Instrumentation/LLVMBuild.txt
+++ b/lib/Transforms/Instrumentation/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = Instrumentation
 parent = Transforms
-required_libraries = Analysis Core MC Support TransformUtils
+required_libraries = Analysis Core MC Support TransformUtils ProfileData
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 286a56330248..5a7bce5a5413 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -148,7 +148,7 @@ static cl::opt<bool> ClPoisonStackWithCall("msan-poison-stack-with-call",
        cl::desc("poison uninitialized stack variables with a call"),
        cl::Hidden, cl::init(false));
 static cl::opt<int> ClPoisonStackPattern("msan-poison-stack-pattern",
-       cl::desc("poison uninitialized stack variables with the given patter"),
+       cl::desc("poison uninitialized stack variables with the given pattern"),
        cl::Hidden, cl::init(0xff));
 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
        cl::desc("poison undef temps"),
@@ -222,10 +222,17 @@ static const MemoryMapParams Linux_I386_MemoryMapParams = {
 
 // x86_64 Linux
 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
+#ifdef MSAN_LINUX_X86_64_OLD_MAPPING
   0x400000000000,  // AndMask
   0,               // XorMask (not used)
   0,               // ShadowBase (not used)
   0x200000000000,  // OriginBase
+#else
+  0,               // AndMask (not used)
+  0x500000000000,  // XorMask
+  0,               // ShadowBase (not used)
+  0x100000000000,  // OriginBase
+#endif
 };
 
 // mips64 Linux
@@ -244,6 +251,14 @@ static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
   0x1C0000000000,  // OriginBase
 };
 
+// aarch64 Linux
+static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
+  0,               // AndMask (not used)
+  0x06000000000,   // XorMask
+  0,               // ShadowBase (not used)
+  0x01000000000,   // OriginBase
+};
+
 // i386 FreeBSD
 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
   0x000180000000,  // AndMask
@@ -266,15 +281,20 @@ static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
 };
 
 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
-  NULL,
+  nullptr,
   &Linux_MIPS64_MemoryMapParams,
 };
 
 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
-  NULL,
+  nullptr,
   &Linux_PowerPC64_MemoryMapParams,
 };
 
+static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
+  nullptr,
+  &Linux_AArch64_MemoryMapParams,
+};
+
 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
   &FreeBSD_I386_MemoryMapParams,
   &FreeBSD_X86_64_MemoryMapParams,
@@ -353,8 +373,9 @@ class MemorySanitizer : public FunctionPass {
   friend struct MemorySanitizerVisitor;
   friend struct VarArgAMD64Helper;
   friend struct VarArgMIPS64Helper;
+  friend struct VarArgAArch64Helper;
 };
-}  // namespace
+} // anonymous namespace
 
 char MemorySanitizer::ID = 0;
 INITIALIZE_PASS(MemorySanitizer, "msan",
@@ -377,7 +398,6 @@ static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
                             GlobalValue::PrivateLinkage, StrConst, "");
 }
 
-
 /// \brief Insert extern declaration of runtime-provided functions and globals.
 void MemorySanitizer::initializeCallbacks(Module &M) {
   // Only do this once.
@@ -496,6 +516,10 @@ bool MemorySanitizer::doInitialization(Module &M) {
         case Triple::ppc64le:
           MapParams = Linux_PowerPC_MemoryMapParams.bits64;
           break;
+        case Triple::aarch64:
+        case Triple::aarch64_be:
+          MapParams = Linux_ARM_MemoryMapParams.bits64;
+          break;
         default:
           report_fatal_error("unsupported architecture");
       }
@@ -697,7 +721,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         Value *Cmp = IRB.CreateICmpNE(
             ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
         Instruction *CheckTerm = SplitBlockAndInsertIfThen(
-            Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+            Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
         IRBuilder<> IRBNew(CheckTerm);
         paintOrigin(IRBNew, updateOrigin(Origin, IRBNew),
                     getOriginPtr(Addr, IRBNew, Alignment), StoreSize,
@@ -893,16 +917,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   ///
   /// Offset = (Addr & ~AndMask) ^ XorMask
   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
+    Value *OffsetLong = IRB.CreatePointerCast(Addr, MS.IntptrTy);
+
     uint64_t AndMask = MS.MapParams->AndMask;
-    assert(AndMask != 0 && "AndMask shall be specified");
-    Value *OffsetLong =
-      IRB.CreateAnd(IRB.CreatePointerCast(Addr, MS.IntptrTy),
-                    ConstantInt::get(MS.IntptrTy, ~AndMask));
+    if (AndMask)
+      OffsetLong =
+          IRB.CreateAnd(OffsetLong, ConstantInt::get(MS.IntptrTy, ~AndMask));
 
     uint64_t XorMask = MS.MapParams->XorMask;
-    if (XorMask != 0)
-      OffsetLong = IRB.CreateXor(OffsetLong,
-                                 ConstantInt::get(MS.IntptrTy, XorMask));
+    if (XorMask)
+      OffsetLong =
+          IRB.CreateXor(OffsetLong, ConstantInt::get(MS.IntptrTy, XorMask));
     return OffsetLong;
   }
 
@@ -1339,6 +1364,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   }
 
   void visitBitCastInst(BitCastInst &I) {
+    // Special case: if this is the bitcast (there is exactly 1 allowed) between
+    // a musttail call and a ret, don't instrument. New instructions are not
+    // allowed after a musttail call.
+    if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
+      if (CI->isMustTailCall())
+        return;
     IRBuilder<> IRB(&I);
     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
     setOrigin(&I, getOrigin(&I, 0));
@@ -1570,18 +1601,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       Type *EltTy = Ty->getSequentialElementType();
       SmallVector<Constant *, 16> Elements;
       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
-        ConstantInt *Elt =
-            dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx));
-        APInt V = Elt->getValue();
-        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
-        Elements.push_back(ConstantInt::get(EltTy, V2));
+        if (ConstantInt *Elt =
+                dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
+          APInt V = Elt->getValue();
+          APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+          Elements.push_back(ConstantInt::get(EltTy, V2));
+        } else {
+          Elements.push_back(ConstantInt::get(EltTy, 1));
+        }
       }
       ShadowMul = ConstantVector::get(Elements);
     } else {
-      ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg);
-      APInt V = Elt->getValue();
-      APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
-      ShadowMul = ConstantInt::get(Elt->getType(), V2);
+      if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
+        APInt V = Elt->getValue();
+        APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+        ShadowMul = ConstantInt::get(Ty, V2);
+      } else {
+        ShadowMul = ConstantInt::get(Ty, 1);
+      }
     }
 
     IRBuilder<> IRB(&I);
@@ -1730,25 +1767,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
   /// \brief Instrument signed relational comparisons.
   ///
-  /// Handle (x<0) and (x>=0) comparisons (essentially, sign bit tests) by
-  /// propagating the highest bit of the shadow. Everything else is delegated
-  /// to handleShadowOr().
+  /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
+  /// bit of the shadow. Everything else is delegated to handleShadowOr().
   void handleSignedRelationalComparison(ICmpInst &I) {
-    Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
-    Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
-    Value* op = nullptr;
-    CmpInst::Predicate pre = I.getPredicate();
-    if (constOp0 && constOp0->isNullValue() &&
-        (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
-      op = I.getOperand(1);
-    } else if (constOp1 && constOp1->isNullValue() &&
-               (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) {
+    Constant *constOp;
+    Value *op = nullptr;
+    CmpInst::Predicate pre;
+    if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
       op = I.getOperand(0);
+      pre = I.getPredicate();
+    } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
+      op = I.getOperand(1);
+      pre = I.getSwappedPredicate();
+    } else {
+      handleShadowOr(I);
+      return;
     }
-    if (op) {
+
+    if ((constOp->isNullValue() &&
+         (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
+        (constOp->isAllOnesValue() &&
+         (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
       IRBuilder<> IRB(&I);
-      Value* Shadow =
-        IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op), "_msprop_icmpslt");
+      Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
+                                        "_msprop_icmp_s");
       setShadow(&I, Shadow);
       setOrigin(&I, getOrigin(op));
     } else {
@@ -1860,25 +1902,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     VAHelper->visitVACopyInst(I);
   }
 
-  enum IntrinsicKind {
-    IK_DoesNotAccessMemory,
-    IK_OnlyReadsMemory,
-    IK_WritesMemory
-  };
-
-  static IntrinsicKind getIntrinsicKind(Intrinsic::ID iid) {
-    const int DoesNotAccessMemory = IK_DoesNotAccessMemory;
-    const int OnlyReadsArgumentPointees = IK_OnlyReadsMemory;
-    const int OnlyReadsMemory = IK_OnlyReadsMemory;
-    const int OnlyAccessesArgumentPointees = IK_WritesMemory;
-    const int UnknownModRefBehavior = IK_WritesMemory;
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#define ModRefBehavior IntrinsicKind
-#include "llvm/IR/Intrinsics.gen"
-#undef ModRefBehavior
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
-  }
-
   /// \brief Handle vector store-like intrinsics.
   ///
   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
@@ -1978,17 +2001,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (NumArgOperands == 0)
       return false;
 
-    Intrinsic::ID iid = I.getIntrinsicID();
-    IntrinsicKind IK = getIntrinsicKind(iid);
-    bool OnlyReadsMemory = IK == IK_OnlyReadsMemory;
-    bool WritesMemory = IK == IK_WritesMemory;
-    assert(!(OnlyReadsMemory && WritesMemory));
-
     if (NumArgOperands == 2 &&
         I.getArgOperand(0)->getType()->isPointerTy() &&
         I.getArgOperand(1)->getType()->isVectorTy() &&
         I.getType()->isVoidTy() &&
-        WritesMemory) {
+        !I.onlyReadsMemory()) {
       // This looks like a vector store.
       return handleVectorStoreIntrinsic(I);
     }
@@ -1996,12 +2013,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (NumArgOperands == 1 &&
         I.getArgOperand(0)->getType()->isPointerTy() &&
         I.getType()->isVectorTy() &&
-        OnlyReadsMemory) {
+        I.onlyReadsMemory()) {
       // This looks like a vector load.
       return handleVectorLoadIntrinsic(I);
     }
 
-    if (!OnlyReadsMemory && !WritesMemory)
+    if (I.doesNotAccessMemory())
       if (maybeHandleSimpleNomemIntrinsic(I))
         return true;
 
@@ -2493,13 +2510,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     // Now, get the shadow for the RetVal.
     if (!I.getType()->isSized()) return;
+    // Don't emit the epilogue for musttail call returns.
+    if (CS.isCall() && cast<CallInst>(&I)->isMustTailCall()) return;
     IRBuilder<> IRBBefore(&I);
     // Until we have full dynamic coverage, make sure the retval shadow is 0.
     Value *Base = getShadowPtrForRetval(&I, IRBBefore);
     IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
-    Instruction *NextInsn = nullptr;
+    BasicBlock::iterator NextInsn;
     if (CS.isCall()) {
-      NextInsn = I.getNextNode();
+      NextInsn = ++I.getIterator();
+      assert(NextInsn != I.getParent()->end());
     } else {
       BasicBlock *NormalDest = cast<InvokeInst>(&I)->getNormalDest();
       if (!NormalDest->getSinglePredecessor()) {
@@ -2511,10 +2531,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         return;
       }
       NextInsn = NormalDest->getFirstInsertionPt();
-      assert(NextInsn &&
+      assert(NextInsn != NormalDest->end() &&
              "Could not find insertion point for retval shadow load");
     }
-    IRBuilder<> IRBAfter(NextInsn);
+    IRBuilder<> IRBAfter(&*NextInsn);
     Value *RetvalShadow =
       IRBAfter.CreateAlignedLoad(getShadowPtrForRetval(&I, IRBAfter),
                                  kShadowTLSAlignment, "_msret");
@@ -2523,10 +2543,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       setOrigin(&I, IRBAfter.CreateLoad(getOriginPtrForRetval(IRBAfter)));
   }
 
+  bool isAMustTailRetVal(Value *RetVal) {
+    if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
+      RetVal = I->getOperand(0);
+    }
+    if (auto *I = dyn_cast<CallInst>(RetVal)) {
+      return I->isMustTailCall();
+    }
+    return false;
+  }
+
   void visitReturnInst(ReturnInst &I) {
     IRBuilder<> IRB(&I);
     Value *RetVal = I.getReturnValue();
     if (!RetVal) return;
+    // Don't emit the epilogue for musttail call returns.
+    if (isAMustTailRetVal(RetVal)) return;
     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
     if (CheckReturnValue) {
       insertShadowCheck(RetVal, &I);
@@ -2653,6 +2685,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOrigin(&I, getCleanOrigin());
   }
 
+  void visitCatchSwitchInst(CatchSwitchInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
+  void visitFuncletPadInst(FuncletPadInst &I) {
+    setShadow(&I, getCleanShadow(&I));
+    setOrigin(&I, getCleanOrigin());
+  }
+
   void visitGetElementPtrInst(GetElementPtrInst &I) {
     handleShadowOr(I);
   }
@@ -2696,6 +2738,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Nothing to do here.
   }
 
+  void visitCleanupReturnInst(CleanupReturnInst &CRI) {
+    DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
+    // Nothing to do here.
+  }
+
+  void visitCatchReturnInst(CatchReturnInst &CRI) {
+    DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
+    // Nothing to do here.
+  }
+
   void visitInstruction(Instruction &I) {
     // Everything else: stop propagating and check for poisoned shadow.
     if (ClDumpStrictInstructions)
@@ -2808,6 +2860,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
   }
 
   void visitVAStartInst(VAStartInst &I) override {
+    if (F.getCallingConv() == CallingConv::X86_64_Win64)
+      return;
     IRBuilder<> IRB(&I);
     VAStartInstrumentationList.push_back(&I);
     Value *VAListTag = I.getArgOperand(0);
@@ -2820,6 +2874,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
   }
 
   void visitVACopyInst(VACopyInst &I) override {
+    if (F.getCallingConv() == CallingConv::X86_64_Win64)
+      return;
     IRBuilder<> IRB(&I);
     Value *VAListTag = I.getArgOperand(0);
     Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
@@ -2979,6 +3035,242 @@ struct VarArgMIPS64Helper : public VarArgHelper {
   }
 };
 
+
+/// \brief AArch64-specific implementation of VarArgHelper.
+struct VarArgAArch64Helper : public VarArgHelper {
+  static const unsigned kAArch64GrArgSize = 56;
+  static const unsigned kAArch64VrArgSize = 128;
+
+  static const unsigned AArch64GrBegOffset = 0;
+  static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
+  // Make VR space aligned to 16 bytes.
+  static const unsigned AArch64VrBegOffset = AArch64GrEndOffset + 8;
+  static const unsigned AArch64VrEndOffset = AArch64VrBegOffset
+                                             + kAArch64VrArgSize;
+  static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
+
+  Function &F;
+  MemorySanitizer &MS;
+  MemorySanitizerVisitor &MSV;
+  Value *VAArgTLSCopy;
+  Value *VAArgOverflowSize;
+
+  SmallVector<CallInst*, 16> VAStartInstrumentationList;
+
+  VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
+                    MemorySanitizerVisitor &MSV)
+    : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+      VAArgOverflowSize(nullptr) {}
+
+  enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
+
+  ArgKind classifyArgument(Value* arg) {
+    Type *T = arg->getType();
+    if (T->isFPOrFPVectorTy())
+      return AK_FloatingPoint;
+    if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
+        || (T->isPointerTy()))
+      return AK_GeneralPurpose;
+    return AK_Memory;
+  }
+
+  // The instrumentation stores the argument shadow in a non ABI-specific
+  // format because it does not know which argument is named (since Clang,
+  // like x86_64 case, lowers the va_args in the frontend and this pass only
+  // sees the low level code that deals with va_list internals).
+  // The first seven GR registers are saved in the first 56 bytes of the
+  // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
+  // the remaining arguments.
+  // Using constant offset within the va_arg TLS array allows fast copy
+  // in the finalize instrumentation.
+  void visitCallSite(CallSite &CS, IRBuilder<> &IRB) override {
+    unsigned GrOffset = AArch64GrBegOffset;
+    unsigned VrOffset = AArch64VrBegOffset;
+    unsigned OverflowOffset = AArch64VAEndOffset;
+
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (CallSite::arg_iterator ArgIt = CS.arg_begin() + 1, End = CS.arg_end();
+         ArgIt != End; ++ArgIt) {
+      Value *A = *ArgIt;
+      ArgKind AK = classifyArgument(A);
+      if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
+        AK = AK_Memory;
+      if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
+        AK = AK_Memory;
+      Value *Base;
+      switch (AK) {
+        case AK_GeneralPurpose:
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset);
+          GrOffset += 8;
+          break;
+        case AK_FloatingPoint:
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset);
+          VrOffset += 16;
+          break;
+        case AK_Memory:
+          uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
+          Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset);
+          OverflowOffset += RoundUpToAlignment(ArgSize, 8);
+          break;
+      }
+      IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
+    }
+    Constant *OverflowSize =
+      ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
+    IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
+  }
+
+  /// Compute the shadow address for a given va_arg.
+  Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
+                                   int ArgOffset) {
+    Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
+    Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
+    return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
+                              "_msarg");
+  }
+
+  void visitVAStartInst(VAStartInst &I) override {
+    IRBuilder<> IRB(&I);
+    VAStartInstrumentationList.push_back(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants (size of va_list).
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */32, /* alignment */8, false);
+  }
+
+  void visitVACopyInst(VACopyInst &I) override {
+    IRBuilder<> IRB(&I);
+    Value *VAListTag = I.getArgOperand(0);
+    Value *ShadowPtr = MSV.getShadowPtr(VAListTag, IRB.getInt8Ty(), IRB);
+    // Unpoison the whole __va_list_tag.
+    // FIXME: magic ABI constants (size of va_list).
+    IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
+                     /* size */32, /* alignment */8, false);
+  }
+
+  // Retrieve a va_list field of 'void*' size.
+  Value* getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+    Value *SaveAreaPtrPtr =
+      IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                      ConstantInt::get(MS.IntptrTy, offset)),
+        Type::getInt64PtrTy(*MS.C));
+    return IRB.CreateLoad(SaveAreaPtrPtr);
+  }
+
+  // Retrieve a va_list field of 'int' size.
+  Value* getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
+    Value *SaveAreaPtr =
+      IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
+                      ConstantInt::get(MS.IntptrTy, offset)),
+        Type::getInt32PtrTy(*MS.C));
+    Value *SaveArea32 = IRB.CreateLoad(SaveAreaPtr);
+    return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
+  }
+
+  void finalizeInstrumentation() override {
+    assert(!VAArgOverflowSize && !VAArgTLSCopy &&
+           "finalizeInstrumentation called twice");
+    if (!VAStartInstrumentationList.empty()) {
+      // If there is a va_start in this function, make a backup copy of
+      // va_arg_tls somewhere in the function entry block.
+      IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+      VAArgOverflowSize = IRB.CreateLoad(MS.VAArgOverflowSizeTLS);
+      Value *CopySize =
+        IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset),
+                      VAArgOverflowSize);
+      VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
+      IRB.CreateMemCpy(VAArgTLSCopy, MS.VAArgTLS, CopySize, 8);
+    }
+
+    Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
+    Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
+
+    // Instrument va_start, copy va_list shadow from the backup copy of
+    // the TLS contents.
+    for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
+      CallInst *OrigInst = VAStartInstrumentationList[i];
+      IRBuilder<> IRB(OrigInst->getNextNode());
+
+      Value *VAListTag = OrigInst->getArgOperand(0);
+
+      // The variadic ABI for AArch64 creates two areas to save the incoming
+      // argument registers (one for 64-bit general register xn-x7 and another
+      // for 128-bit FP/SIMD vn-v7).
+      // We need then to propagate the shadow arguments on both regions
+      // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
+      // The remaning arguments are saved on shadow for 'va::stack'.
+      // One caveat is it requires only to propagate the non-named arguments,
+      // however on the call site instrumentation 'all' the arguments are
+      // saved. So to copy the shadow values from the va_arg TLS array
+      // we need to adjust the offset for both GR and VR fields based on
+      // the __{gr,vr}_offs value (since they are stores based on incoming
+      // named arguments).
+
+      // Read the stack pointer from the va_list.
+      Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
+
+      // Read both the __gr_top and __gr_off and add them up.
+      Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
+      Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
+
+      Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
+
+      // Read both the __vr_top and __vr_off and add them up.
+      Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
+      Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
+
+      Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
+
+      // It does not know how many named arguments is being used and, on the
+      // callsite all the arguments were saved.  Since __gr_off is defined as
+      // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
+      // argument by ignoring the bytes of shadow from named arguments.
+      Value *GrRegSaveAreaShadowPtrOff =
+        IRB.CreateAdd(GrArgSize, GrOffSaveArea);
+
+      Value *GrRegSaveAreaShadowPtr =
+        MSV.getShadowPtr(GrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+      Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                                              GrRegSaveAreaShadowPtrOff);
+      Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
+
+      IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, GrSrcPtr, GrCopySize, 8);
+
+      // Again, but for FP/SIMD values.
+      Value *VrRegSaveAreaShadowPtrOff =
+          IRB.CreateAdd(VrArgSize, VrOffSaveArea);
+
+      Value *VrRegSaveAreaShadowPtr =
+        MSV.getShadowPtr(VrRegSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+      Value *VrSrcPtr = IRB.CreateInBoundsGEP(
+        IRB.getInt8Ty(),
+        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                              IRB.getInt32(AArch64VrBegOffset)),
+        VrRegSaveAreaShadowPtrOff);
+      Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
+
+      IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, VrSrcPtr, VrCopySize, 8);
+
+      // And finally for remaining arguments.
+      Value *StackSaveAreaShadowPtr =
+        MSV.getShadowPtr(StackSaveAreaPtr, IRB.getInt8Ty(), IRB);
+
+      Value *StackSrcPtr =
+        IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
+                              IRB.getInt32(AArch64VAEndOffset));
+
+      IRB.CreateMemCpy(StackSaveAreaShadowPtr, StackSrcPtr,
+                       VAArgOverflowSize, 16);
+    }
+  }
+};
+
 /// \brief A no-op implementation of VarArgHelper.
 struct VarArgNoOpHelper : public VarArgHelper {
   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
@@ -3003,11 +3295,13 @@ VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
   else if (TargetTriple.getArch() == llvm::Triple::mips64 ||
            TargetTriple.getArch() == llvm::Triple::mips64el)
     return new VarArgMIPS64Helper(Func, Msan, Visitor);
+  else if (TargetTriple.getArch() == llvm::Triple::aarch64)
+    return new VarArgAArch64Helper(Func, Msan, Visitor);
   else
     return new VarArgNoOpHelper(Func, Msan, Visitor);
 }
 
-}  // namespace
+} // anonymous namespace
 
 bool MemorySanitizer::runOnFunction(Function &F) {
   if (&F == MsanCtorFunction)
diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
new file mode 100644
index 000000000000..4b59b93b325f
--- /dev/null
+++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -0,0 +1,718 @@
+//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements PGO instrumentation using a minimum spanning tree based
+// on the following paper:
+//   [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
+//   for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
+//   Issue 3, pp 313-322
+// The idea of the algorithm based on the fact that for each node (except for
+// the entry and exit), the sum of incoming edge counts equals the sum of
+// outgoing edge counts. The count of edge on spanning tree can be derived from
+// those edges not on the spanning tree. Knuth proves this method instruments
+// the minimum number of edges.
+//
+// The minimal spanning tree here is actually a maximum weight tree -- on-tree
+// edges have higher frequencies (more likely to execute). The idea is to
+// instrument those less frequently executed edges to reduce the runtime
+// overhead of instrumented binaries.
+//
+// This file contains two passes:
+// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
+// count profile, and
+// (2) Pass PGOInstrumentationUse which reads the edge count profile and
+// annotates the branch weights.
+// To get the precise counter information, These two passes need to invoke at
+// the same compilation point (so they see the same IR). For pass
+// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
+// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
+// the profile is opened in module level and passed to each PGOUseFunc instance.
+// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
+// in class FuncPGOInstrumentation.
+//
+// Class PGOEdge represents a CFG edge and some auxiliary information. Class
+// BBInfo contains auxiliary information for each BB. These two classes are used
+// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
+// class of PGOEdge and BBInfo, respectively. They contains extra data structure
+// used in populating profile counters.
+// The MST implementation is in Class CFGMST (CFGMST.h).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation.h"
+#include "CFGMST.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/JamCRC.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-instrumentation"
+
+STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOEdge, "Number of edges.");
+STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
+STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
+STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
+STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
+STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+
+// Command line option to specify the file to read profile from. This is
+// mainly used for testing.
+static cl::opt<std::string>
+    PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
+                       cl::value_desc("filename"),
+                       cl::desc("Specify the path of profile data file. This is"
+                                "mainly for test purpose."));
+
+namespace {
+class PGOInstrumentationGen : public ModulePass {
+public:
+  static char ID;
+
+  PGOInstrumentationGen() : ModulePass(ID) {
+    initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry());
+  }
+
+  const char *getPassName() const override {
+    return "PGOInstrumentationGenPass";
+  }
+
+private:
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+  }
+};
+
+class PGOInstrumentationUse : public ModulePass {
+public:
+  static char ID;
+
+  // Provide the profile filename as the parameter.
+  PGOInstrumentationUse(std::string Filename = "")
+      : ModulePass(ID), ProfileFileName(Filename) {
+    if (!PGOTestProfileFile.empty())
+      ProfileFileName = PGOTestProfileFile;
+    initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry());
+  }
+
+  const char *getPassName() const override {
+    return "PGOInstrumentationUsePass";
+  }
+
+private:
+  std::string ProfileFileName;
+  std::unique_ptr<IndexedInstrProfReader> PGOReader;
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+  }
+};
+} // end anonymous namespace
+
+char PGOInstrumentationGen::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen",
+                      "PGO instrumentation.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen",
+                    "PGO instrumentation.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationGenPass() {
+  return new PGOInstrumentationGen();
+}
+
+char PGOInstrumentationUse::ID = 0;
+INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use",
+                      "Read PGO instrumentation profile.", false, false)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use",
+                    "Read PGO instrumentation profile.", false, false)
+
+ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) {
+  return new PGOInstrumentationUse(Filename.str());
+}
+
+namespace {
+/// \brief An MST based instrumentation for PGO
+///
+/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
+/// in the function level.
+struct PGOEdge {
+  // This class implements the CFG edges. Note the CFG can be a multi-graph.
+  // So there might be multiple edges with same SrcBB and DestBB.
+  const BasicBlock *SrcBB;
+  const BasicBlock *DestBB;
+  uint64_t Weight;
+  bool InMST;
+  bool Removed;
+  bool IsCritical;
+  PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+      : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false),
+        IsCritical(false) {}
+  // Return the information string of an edge.
+  const std::string infoString() const {
+    return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
+            (IsCritical ? "c" : " ") + "  W=" + Twine(Weight)).str();
+  }
+};
+
+// This class stores the auxiliary information for each BB.
+struct BBInfo {
+  BBInfo *Group;
+  uint32_t Index;
+  uint32_t Rank;
+
+  BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {}
+
+  // Return the information string of this object.
+  const std::string infoString() const {
+    return (Twine("Index=") + Twine(Index)).str();
+  }
+};
+
+// This class implements the CFG edges. Note the CFG can be a multi-graph.
+template <class Edge, class BBInfo> class FuncPGOInstrumentation {
+private:
+  Function &F;
+  void computeCFGHash();
+
+public:
+  std::string FuncName;
+  GlobalVariable *FuncNameVar;
+  // CFG hash value for this function.
+  uint64_t FunctionHash;
+
+  // The Minimum Spanning Tree of function CFG.
+  CFGMST<Edge, BBInfo> MST;
+
+  // Give an edge, find the BB that will be instrumented.
+  // Return nullptr if there is no BB to be instrumented.
+  BasicBlock *getInstrBB(Edge *E);
+
+  // Return the auxiliary BB information.
+  BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
+
+  // Dump edges and BB information.
+  void dumpInfo(std::string Str = "") const {
+    MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
+                          Twine(FunctionHash) + "\t" + Str);
+  }
+
+  FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false,
+                         BranchProbabilityInfo *BPI = nullptr,
+                         BlockFrequencyInfo *BFI = nullptr)
+      : F(Func), FunctionHash(0), MST(F, BPI, BFI) {
+    FuncName = getPGOFuncName(F);
+    computeCFGHash();
+    DEBUG(dumpInfo("after CFGMST"));
+
+    NumOfPGOBB += MST.BBInfos.size();
+    for (auto &E : MST.AllEdges) {
+      if (E->Removed)
+        continue;
+      NumOfPGOEdge++;
+      if (!E->InMST)
+        NumOfPGOInstrument++;
+    }
+
+    if (CreateGlobalVar)
+      FuncNameVar = createPGOFuncNameVar(F, FuncName);
+  };
+};
+
+// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
+// value of each BB in the CFG. The higher 32 bits record the number of edges.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
+  std::vector<char> Indexes;
+  JamCRC JC;
+  for (auto &BB : F) {
+    const TerminatorInst *TI = BB.getTerminator();
+    for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
+      BasicBlock *Succ = TI->getSuccessor(I);
+      uint32_t Index = getBBInfo(Succ).Index;
+      for (int J = 0; J < 4; J++)
+        Indexes.push_back((char)(Index >> (J * 8)));
+    }
+  }
+  JC.update(Indexes);
+  FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+}
+
+// Given a CFG E to be instrumented, find which BB to place the instrumented
+// code. The function will split the critical edge if necessary.
+template <class Edge, class BBInfo>
+BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
+  if (E->InMST || E->Removed)
+    return nullptr;
+
+  BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+  BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+  // For a fake edge, instrument the real BB.
+  if (SrcBB == nullptr)
+    return DestBB;
+  if (DestBB == nullptr)
+    return SrcBB;
+
+  // Instrument the SrcBB if it has a single successor,
+  // otherwise, the DestBB if this is not a critical edge.
+  TerminatorInst *TI = SrcBB->getTerminator();
+  if (TI->getNumSuccessors() <= 1)
+    return SrcBB;
+  if (!E->IsCritical)
+    return DestBB;
+
+  // For a critical edge, we have to split. Instrument the newly
+  // created BB.
+  NumOfPGOSplit++;
+  DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> "
+               << getBBInfo(DestBB).Index << "\n");
+  unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+  BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
+  assert(InstrBB && "Critical edge is not split");
+
+  E->Removed = true;
+  return InstrBB;
+}
+
+// Visit all edge and instrument the edges not in MST.
+// Critical edges will be split.
+static void instrumentOneFunc(Function &F, Module *M,
+                              BranchProbabilityInfo *BPI,
+                              BlockFrequencyInfo *BFI) {
+  unsigned NumCounters = 0;
+  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI);
+  for (auto &E : FuncInfo.MST.AllEdges) {
+    if (!E->InMST && !E->Removed)
+      NumCounters++;
+  }
+
+  uint32_t I = 0;
+  for (auto &E : FuncInfo.MST.AllEdges) {
+    BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get());
+    if (!InstrBB)
+      continue;
+
+    IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
+    assert(Builder.GetInsertPoint() != InstrBB->end() &&
+           "Cannot get the Instrumentation point");
+    Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
+    Builder.CreateCall(
+        Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+        {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
+         Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
+         Builder.getInt32(I++)});
+  }
+}
+
+// This class represents a CFG edge in profile use compilation.
+struct PGOUseEdge : public PGOEdge {
+  bool CountValid;
+  uint64_t CountValue;
+  PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1)
+      : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {}
+
+  // Set edge count value
+  void setEdgeCount(uint64_t Value) {
+    CountValue = Value;
+    CountValid = true;
+  }
+
+  // Return the information string for this object.
+  const std::string infoString() const {
+    if (!CountValid)
+      return PGOEdge::infoString();
+    return (Twine(PGOEdge::infoString()) + "  Count=" + Twine(CountValue)).str();
+  }
+};
+
+typedef SmallVector<PGOUseEdge *, 2> DirectEdges;
+
+// This class stores the auxiliary information for each BB.
+struct UseBBInfo : public BBInfo {
+  uint64_t CountValue;
+  bool CountValid;
+  int32_t UnknownCountInEdge;
+  int32_t UnknownCountOutEdge;
+  DirectEdges InEdges;
+  DirectEdges OutEdges;
+  UseBBInfo(unsigned IX)
+      : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0),
+        UnknownCountOutEdge(0) {}
+  UseBBInfo(unsigned IX, uint64_t C)
+      : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0),
+        UnknownCountOutEdge(0) {}
+
+  // Set the profile count value for this BB.
+  void setBBInfoCount(uint64_t Value) {
+    CountValue = Value;
+    CountValid = true;
+  }
+
+  // Return the information string of this object.
+  const std::string infoString() const {
+    if (!CountValid)
+      return BBInfo::infoString();
+    return (Twine(BBInfo::infoString()) + "  Count=" + Twine(CountValue)).str();
+  }
+};
+
+// Sum up the count values for all the edges.
+static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
+  uint64_t Total = 0;
+  for (auto &E : Edges) {
+    if (E->Removed)
+      continue;
+    Total += E->CountValue;
+  }
+  return Total;
+}
+
+class PGOUseFunc {
+private:
+  Function &F;
+  Module *M;
+  // This member stores the shared information with class PGOGenFunc.
+  FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+
+  // Return the auxiliary BB information.
+  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.getBBInfo(BB);
+  }
+
+  // The maximum count value in the profile. This is only used in PGO use
+  // compilation.
+  uint64_t ProgramMaxCount;
+
+  // Find the Instrumented BB and set the value.
+  void setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
+
+  // Set the edge counter value for the unknown edge -- there should be only
+  // one unknown edge.
+  void setEdgeCount(DirectEdges &Edges, uint64_t Value);
+
+  // Return FuncName string;
+  const std::string getFuncName() const { return FuncInfo.FuncName; }
+
+  // Set the hot/cold inline hints based on the count values.
+  // FIXME: This function should be removed once the functionality in
+  // the inliner is implemented.
+  void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
+    if (ProgramMaxCount == 0)
+      return;
+    // Threshold of the hot functions.
+    const BranchProbability HotFunctionThreshold(1, 100);
+    // Threshold of the cold functions.
+    const BranchProbability ColdFunctionThreshold(2, 10000);
+    if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
+      F.addFnAttr(llvm::Attribute::InlineHint);
+    else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
+      F.addFnAttr(llvm::Attribute::Cold);
+  }
+
+public:
+  PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr,
+             BlockFrequencyInfo *BFI = nullptr)
+      : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {}
+
+  // Read counts for the instrumented BB from profile.
+  bool readCounters(IndexedInstrProfReader *PGOReader);
+
+  // Populate the counts for all BBs.
+  void populateCounters();
+
+  // Set the branch weights based on the count values.
+  void setBranchWeights();
+};
+
+// Visit all the edges and assign the count value for the instrumented
+// edges and the BB.
+void PGOUseFunc::setInstrumentedCounts(
+    const std::vector<uint64_t> &CountFromProfile) {
+
+  // Use a worklist as we will update the vector during the iteration.
+  std::vector<PGOUseEdge *> WorkList;
+  for (auto &E : FuncInfo.MST.AllEdges)
+    WorkList.push_back(E.get());
+
+  uint32_t I = 0;
+  for (auto &E : WorkList) {
+    BasicBlock *InstrBB = FuncInfo.getInstrBB(E);
+    if (!InstrBB)
+      continue;
+    uint64_t CountValue = CountFromProfile[I++];
+    if (!E->Removed) {
+      getBBInfo(InstrBB).setBBInfoCount(CountValue);
+      E->setEdgeCount(CountValue);
+      continue;
+    }
+
+    // Need to add two new edges.
+    BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
+    BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+    // Add new edge of SrcBB->InstrBB.
+    PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0);
+    NewEdge.setEdgeCount(CountValue);
+    // Add new edge of InstrBB->DestBB.
+    PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0);
+    NewEdge1.setEdgeCount(CountValue);
+    NewEdge1.InMST = true;
+    getBBInfo(InstrBB).setBBInfoCount(CountValue);
+  }
+}
+
+// Set the count value for the unknown edge. There should be one and only one
+// unknown edge in Edges vector.
+void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
+  for (auto &E : Edges) {
+    if (E->CountValid)
+      continue;
+    E->setEdgeCount(Value);
+
+    getBBInfo(E->SrcBB).UnknownCountOutEdge--;
+    getBBInfo(E->DestBB).UnknownCountInEdge--;
+    return;
+  }
+  llvm_unreachable("Cannot find the unknown count edge");
+}
+
+// Read the profile from ProfileFileName and assign the value to the
+// instrumented BB and the edges. This function also updates ProgramMaxCount.
+// Return true if the profile are successfully read, and false on errors.
+bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
+  auto &Ctx = M->getContext();
+  ErrorOr<InstrProfRecord> Result =
+      PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
+  if (std::error_code EC = Result.getError()) {
+    if (EC == instrprof_error::unknown_function)
+      NumOfPGOMissing++;
+    else if (EC == instrprof_error::hash_mismatch ||
+             EC == llvm::instrprof_error::malformed)
+      NumOfPGOMismatch++;
+
+    std::string Msg = EC.message() + std::string(" ") + F.getName().str();
+    Ctx.diagnose(
+        DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+    return false;
+  }
+  std::vector<uint64_t> &CountFromProfile = Result.get().Counts;
+
+  NumOfPGOFunc++;
+  DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
+  uint64_t ValueSum = 0;
+  for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
+    DEBUG(dbgs() << "  " << I << ": " << CountFromProfile[I] << "\n");
+    ValueSum += CountFromProfile[I];
+  }
+
+  DEBUG(dbgs() << "SUM =  " << ValueSum << "\n");
+
+  getBBInfo(nullptr).UnknownCountOutEdge = 2;
+  getBBInfo(nullptr).UnknownCountInEdge = 2;
+
+  setInstrumentedCounts(CountFromProfile);
+  ProgramMaxCount = PGOReader->getMaximumFunctionCount();
+  return true;
+}
+
+// Populate the counters from instrumented BBs to all BBs.
+// In the end of this operation, all BBs should have a valid count value.
+void PGOUseFunc::populateCounters() {
+  // First set up Count variable for all BBs.
+  for (auto &E : FuncInfo.MST.AllEdges) {
+    if (E->Removed)
+      continue;
+
+    const BasicBlock *SrcBB = E->SrcBB;
+    const BasicBlock *DestBB = E->DestBB;
+    UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+    UseBBInfo &DestInfo = getBBInfo(DestBB);
+    SrcInfo.OutEdges.push_back(E.get());
+    DestInfo.InEdges.push_back(E.get());
+    SrcInfo.UnknownCountOutEdge++;
+    DestInfo.UnknownCountInEdge++;
+
+    if (!E->CountValid)
+      continue;
+    DestInfo.UnknownCountInEdge--;
+    SrcInfo.UnknownCountOutEdge--;
+  }
+
+  bool Changes = true;
+  unsigned NumPasses = 0;
+  while (Changes) {
+    NumPasses++;
+    Changes = false;
+
+    // For efficient traversal, it's better to start from the end as most
+    // of the instrumented edges are at the end.
+    for (auto &BB : reverse(F)) {
+      UseBBInfo &Count = getBBInfo(&BB);
+      if (!Count.CountValid) {
+        if (Count.UnknownCountOutEdge == 0) {
+          Count.CountValue = sumEdgeCount(Count.OutEdges);
+          Count.CountValid = true;
+          Changes = true;
+        } else if (Count.UnknownCountInEdge == 0) {
+          Count.CountValue = sumEdgeCount(Count.InEdges);
+          Count.CountValid = true;
+          Changes = true;
+        }
+      }
+      if (Count.CountValid) {
+        if (Count.UnknownCountOutEdge == 1) {
+          uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges);
+          setEdgeCount(Count.OutEdges, Total);
+          Changes = true;
+        }
+        if (Count.UnknownCountInEdge == 1) {
+          uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges);
+          setEdgeCount(Count.InEdges, Total);
+          Changes = true;
+        }
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
+  // Assert every BB has a valid counter.
+  uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
+  uint64_t FuncMaxCount = FuncEntryCount;
+  for (auto &BB : F) {
+    assert(getBBInfo(&BB).CountValid && "BB count is not valid");
+    uint64_t Count = getBBInfo(&BB).CountValue;
+    if (Count > FuncMaxCount)
+      FuncMaxCount = Count;
+  }
+  applyFunctionAttributes(FuncEntryCount, FuncMaxCount);
+
+  DEBUG(FuncInfo.dumpInfo("after reading profile."));
+}
+
+// Assign the scaled count values to the BB with multiple out edges.
+void PGOUseFunc::setBranchWeights() {
+  // Generate MD_prof metadata for every branch instruction.
+  DEBUG(dbgs() << "\nSetting branch weights.\n");
+  MDBuilder MDB(M->getContext());
+  for (auto &BB : F) {
+    TerminatorInst *TI = BB.getTerminator();
+    if (TI->getNumSuccessors() < 2)
+      continue;
+    if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
+      continue;
+    if (getBBInfo(&BB).CountValue == 0)
+      continue;
+
+    // We have a non-zero Branch BB.
+    const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+    unsigned Size = BBCountInfo.OutEdges.size();
+    SmallVector<unsigned, 2> EdgeCounts(Size, 0);
+    uint64_t MaxCount = 0;
+    for (unsigned s = 0; s < Size; s++) {
+      const PGOUseEdge *E = BBCountInfo.OutEdges[s];
+      const BasicBlock *SrcBB = E->SrcBB;
+      const BasicBlock *DestBB = E->DestBB;
+      if (DestBB == 0)
+        continue;
+      unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
+      uint64_t EdgeCount = E->CountValue;
+      if (EdgeCount > MaxCount)
+        MaxCount = EdgeCount;
+      EdgeCounts[SuccNum] = EdgeCount;
+    }
+    assert(MaxCount > 0 && "Bad max count");
+    uint64_t Scale = calculateCountScale(MaxCount);
+    SmallVector<unsigned, 4> Weights;
+    for (const auto &ECI : EdgeCounts)
+      Weights.push_back(scaleBranchCount(ECI, Scale));
+
+    TI->setMetadata(llvm::LLVMContext::MD_prof,
+                    MDB.createBranchWeights(Weights));
+    DEBUG(dbgs() << "Weight is: ";
+          for (const auto &W : Weights) { dbgs() << W << " "; }
+          dbgs() << "\n";);
+  }
+}
+} // end anonymous namespace
+
+bool PGOInstrumentationGen::runOnModule(Module &M) {
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    BranchProbabilityInfo *BPI =
+        &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+    BlockFrequencyInfo *BFI =
+        &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+    instrumentOneFunc(F, &M, BPI, BFI);
+  }
+  return true;
+}
+
+static void setPGOCountOnFunc(PGOUseFunc &Func,
+                              IndexedInstrProfReader *PGOReader) {
+  if (Func.readCounters(PGOReader)) {
+    Func.populateCounters();
+    Func.setBranchWeights();
+  }
+}
+
+bool PGOInstrumentationUse::runOnModule(Module &M) {
+  DEBUG(dbgs() << "Read in profile counters: ");
+  auto &Ctx = M.getContext();
+  // Read the counter array from file.
+  auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName);
+  if (std::error_code EC = ReaderOrErr.getError()) {
+    Ctx.diagnose(
+        DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message()));
+    return false;
+  }
+
+  PGOReader = std::move(ReaderOrErr.get());
+  if (!PGOReader) {
+    Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
+                                          "Cannot get PGOReader"));
+    return false;
+  }
+
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+    BranchProbabilityInfo *BPI =
+        &(getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI());
+    BlockFrequencyInfo *BFI =
+        &(getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI());
+    PGOUseFunc Func(F, &M, BPI, BFI);
+    setPGOCountOnFunc(Func, PGOReader.get());
+  }
+  return true;
+}
diff --git a/lib/Transforms/Instrumentation/SafeStack.cpp b/lib/Transforms/Instrumentation/SafeStack.cpp
index 6b185a2b127b..abed465f102d 100644
--- a/lib/Transforms/Instrumentation/SafeStack.cpp
+++ b/lib/Transforms/Instrumentation/SafeStack.cpp
@@ -18,8 +18,9 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -37,6 +38,8 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -44,6 +47,17 @@ using namespace llvm;
 
 #define DEBUG_TYPE "safestack"
 
+enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
+
+static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
+    cl::Hidden, cl::init(ThreadLocalUSP),
+    cl::desc("Type of storage for the unsafe stack pointer"),
+    cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
+                          "Thread-local storage"),
+               clEnumValN(SingleThreadUSP, "single-thread",
+                          "Non-thread-local storage"),
+               clEnumValEnd));
+
 namespace llvm {
 
 STATISTIC(NumFunctions, "Total number of functions");
@@ -54,118 +68,48 @@ STATISTIC(NumUnsafeStackRestorePointsFunctions,
 STATISTIC(NumAllocas, "Total number of allocas");
 STATISTIC(NumUnsafeStaticAllocas, "Number of unsafe static allocas");
 STATISTIC(NumUnsafeDynamicAllocas, "Number of unsafe dynamic allocas");
+STATISTIC(NumUnsafeByValArguments, "Number of unsafe byval arguments");
 STATISTIC(NumUnsafeStackRestorePoints, "Number of setjmps and landingpads");
 
 } // namespace llvm
 
 namespace {
 
-/// Check whether a given alloca instruction (AI) should be put on the safe
-/// stack or not. The function analyzes all uses of AI and checks whether it is
-/// only accessed in a memory safe way (as decided statically).
-bool IsSafeStackAlloca(const AllocaInst *AI) {
-  // Go through all uses of this alloca and check whether all accesses to the
-  // allocated object are statically known to be memory safe and, hence, the
-  // object can be placed on the safe stack.
+/// Rewrite an SCEV expression for a memory access address to an expression that
+/// represents offset from the given alloca.
+///
+/// The implementation simply replaces all mentions of the alloca with zero.
+class AllocaOffsetRewriter : public SCEVRewriteVisitor<AllocaOffsetRewriter> {
+  const Value *AllocaPtr;
 
-  SmallPtrSet<const Value *, 16> Visited;
-  SmallVector<const Instruction *, 8> WorkList;
-  WorkList.push_back(AI);
+public:
+  AllocaOffsetRewriter(ScalarEvolution &SE, const Value *AllocaPtr)
+      : SCEVRewriteVisitor(SE), AllocaPtr(AllocaPtr) {}
 
-  // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
-  while (!WorkList.empty()) {
-    const Instruction *V = WorkList.pop_back_val();
-    for (const Use &UI : V->uses()) {
-      auto I = cast<const Instruction>(UI.getUser());
-      assert(V == UI.get());
-
-      switch (I->getOpcode()) {
-      case Instruction::Load:
-        // Loading from a pointer is safe.
-        break;
-      case Instruction::VAArg:
-        // "va-arg" from a pointer is safe.
-        break;
-      case Instruction::Store:
-        if (V == I->getOperand(0))
-          // Stored the pointer - conservatively assume it may be unsafe.
-          return false;
-        // Storing to the pointee is safe.
-        break;
-
-      case Instruction::GetElementPtr:
-        if (!cast<const GetElementPtrInst>(I)->hasAllConstantIndices())
-          // GEP with non-constant indices can lead to memory errors.
-          // This also applies to inbounds GEPs, as the inbounds attribute
-          // represents an assumption that the address is in bounds, rather than
-          // an assertion that it is.
-          return false;
-
-        // We assume that GEP on static alloca with constant indices is safe,
-        // otherwise a compiler would detect it and warn during compilation.
-
-        if (!isa<const ConstantInt>(AI->getArraySize()))
-          // However, if the array size itself is not constant, the access
-          // might still be unsafe at runtime.
-          return false;
-
-      /* fallthrough */
-
-      case Instruction::BitCast:
-      case Instruction::IntToPtr:
-      case Instruction::PHI:
-      case Instruction::PtrToInt:
-      case Instruction::Select:
-        // The object can be safe or not, depending on how the result of the
-        // instruction is used.
-        if (Visited.insert(I).second)
-          WorkList.push_back(cast<const Instruction>(I));
-        break;
-
-      case Instruction::Call:
-      case Instruction::Invoke: {
-        // FIXME: add support for memset and memcpy intrinsics.
-        ImmutableCallSite CS(I);
-
-        // LLVM 'nocapture' attribute is only set for arguments whose address
-        // is not stored, passed around, or used in any other non-trivial way.
-        // We assume that passing a pointer to an object as a 'nocapture'
-        // argument is safe.
-        // FIXME: a more precise solution would require an interprocedural
-        // analysis here, which would look at all uses of an argument inside
-        // the function being called.
-        ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
-        for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
-          if (A->get() == V && !CS.doesNotCapture(A - B))
-            // The parameter is not marked 'nocapture' - unsafe.
-            return false;
-        continue;
-      }
-
-      default:
-        // The object is unsafe if it is used in any other way.
-        return false;
-      }
-    }
+  const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+    if (Expr->getValue() == AllocaPtr)
+      return SE.getZero(Expr->getType());
+    return Expr;
   }
+};
 
-  // All uses of the alloca are safe, we can place it on the safe stack.
-  return true;
-}
-
-/// The SafeStack pass splits the stack of each function into the
-/// safe stack, which is only accessed through memory safe dereferences
-/// (as determined statically), and the unsafe stack, which contains all
-/// local variables that are accessed in unsafe ways.
+/// The SafeStack pass splits the stack of each function into the safe
+/// stack, which is only accessed through memory safe dereferences (as
+/// determined statically), and the unsafe stack, which contains all
+/// local variables that are accessed in ways that we can't prove to
+/// be safe.
 class SafeStack : public FunctionPass {
+  const TargetMachine *TM;
+  const TargetLoweringBase *TL;
   const DataLayout *DL;
+  ScalarEvolution *SE;
 
   Type *StackPtrTy;
   Type *IntPtrTy;
   Type *Int32Ty;
   Type *Int8Ty;
 
-  Constant *UnsafeStackPtr = nullptr;
+  Value *UnsafeStackPtr = nullptr;
 
   /// Unsafe stack alignment. Each stack frame must ensure that the stack is
   /// aligned to this value. We need to re-align the unsafe stack if the
@@ -175,26 +119,31 @@ class SafeStack : public FunctionPass {
   /// might expect to appear on the stack on most common targets.
   enum { StackAlignment = 16 };
 
-  /// \brief Build a constant representing a pointer to the unsafe stack
-  /// pointer.
-  Constant *getOrCreateUnsafeStackPtr(Module &M);
+  /// \brief Build a value representing a pointer to the unsafe stack pointer.
+  Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
 
   /// \brief Find all static allocas, dynamic allocas, return instructions and
   /// stack restore points (exception unwind blocks and setjmp calls) in the
   /// given function and append them to the respective vectors.
   void findInsts(Function &F, SmallVectorImpl<AllocaInst *> &StaticAllocas,
                  SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+                 SmallVectorImpl<Argument *> &ByValArguments,
                  SmallVectorImpl<ReturnInst *> &Returns,
                  SmallVectorImpl<Instruction *> &StackRestorePoints);
 
+  /// \brief Calculate the allocation size of a given alloca. Returns 0 if the
+  /// size can not be statically determined.
+  uint64_t getStaticAllocaAllocationSize(const AllocaInst* AI);
+
   /// \brief Allocate space for all static allocas in \p StaticAllocas,
   /// replace allocas with pointers into the unsafe stack and generate code to
   /// restore the stack pointer before all return instructions in \p Returns.
   ///
   /// \returns A pointer to the top of the unsafe stack after all unsafe static
   /// allocas are allocated.
-  Value *moveStaticAllocasToUnsafeStack(Function &F,
+  Value *moveStaticAllocasToUnsafeStack(IRBuilder<> &IRB, Function &F,
                                         ArrayRef<AllocaInst *> StaticAllocas,
+                                        ArrayRef<Argument *> ByValArguments,
                                         ArrayRef<ReturnInst *> Returns);
 
   /// \brief Generate code to restore the stack after all stack restore points
@@ -203,7 +152,7 @@ class SafeStack : public FunctionPass {
   /// \returns A local variable in which to maintain the dynamic top of the
   /// unsafe stack if needed.
   AllocaInst *
-  createStackRestorePoints(Function &F,
+  createStackRestorePoints(IRBuilder<> &IRB, Function &F,
                            ArrayRef<Instruction *> StackRestorePoints,
                            Value *StaticTop, bool NeedDynamicTop);
 
@@ -214,17 +163,26 @@ class SafeStack : public FunctionPass {
                                        AllocaInst *DynamicTop,
                                        ArrayRef<AllocaInst *> DynamicAllocas);
 
+  bool IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize);
+
+  bool IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+                          const Value *AllocaPtr, uint64_t AllocaSize);
+  bool IsAccessSafe(Value *Addr, uint64_t Size, const Value *AllocaPtr,
+                    uint64_t AllocaSize);
+
 public:
   static char ID; // Pass identification, replacement for typeid.
-  SafeStack() : FunctionPass(ID), DL(nullptr) {
+  SafeStack(const TargetMachine *TM)
+      : FunctionPass(ID), TM(TM), TL(nullptr), DL(nullptr) {
     initializeSafeStackPass(*PassRegistry::getPassRegistry());
   }
+  SafeStack() : SafeStack(nullptr) {}
 
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<AliasAnalysis>();
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ScalarEvolutionWrapperPass>();
   }
 
-  virtual bool doInitialization(Module &M) {
+  bool doInitialization(Module &M) override {
     DL = &M.getDataLayout();
 
     StackPtrTy = Type::getInt8PtrTy(M.getContext());
@@ -235,51 +193,203 @@ public:
     return false;
   }
 
-  bool runOnFunction(Function &F);
-
+  bool runOnFunction(Function &F) override;
 }; // class SafeStack
 
-Constant *SafeStack::getOrCreateUnsafeStackPtr(Module &M) {
-  // The unsafe stack pointer is stored in a global variable with a magic name.
-  const char *kUnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+uint64_t SafeStack::getStaticAllocaAllocationSize(const AllocaInst* AI) {
+  uint64_t Size = DL->getTypeAllocSize(AI->getAllocatedType());
+  if (AI->isArrayAllocation()) {
+    auto C = dyn_cast<ConstantInt>(AI->getArraySize());
+    if (!C)
+      return 0;
+    Size *= C->getZExtValue();
+  }
+  return Size;
+}
 
-  auto UnsafeStackPtr =
-      dyn_cast_or_null<GlobalVariable>(M.getNamedValue(kUnsafeStackPtrVar));
+bool SafeStack::IsAccessSafe(Value *Addr, uint64_t AccessSize,
+                             const Value *AllocaPtr, uint64_t AllocaSize) {
+  AllocaOffsetRewriter Rewriter(*SE, AllocaPtr);
+  const SCEV *Expr = Rewriter.visit(SE->getSCEV(Addr));
 
-  if (!UnsafeStackPtr) {
-    // The global variable is not defined yet, define it ourselves.
-    // We use the initial-exec TLS model because we do not support the variable
-    // living anywhere other than in the main executable.
-    UnsafeStackPtr = new GlobalVariable(
-        /*Module=*/M, /*Type=*/StackPtrTy,
-        /*isConstant=*/false, /*Linkage=*/GlobalValue::ExternalLinkage,
-        /*Initializer=*/0, /*Name=*/kUnsafeStackPtrVar,
-        /*InsertBefore=*/nullptr,
-        /*ThreadLocalMode=*/GlobalValue::InitialExecTLSModel);
-  } else {
-    // The variable exists, check its type and attributes.
-    if (UnsafeStackPtr->getValueType() != StackPtrTy) {
-      report_fatal_error(Twine(kUnsafeStackPtrVar) + " must have void* type");
-    }
+  uint64_t BitWidth = SE->getTypeSizeInBits(Expr->getType());
+  ConstantRange AccessStartRange = SE->getUnsignedRange(Expr);
+  ConstantRange SizeRange =
+      ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AccessSize));
+  ConstantRange AccessRange = AccessStartRange.add(SizeRange);
+  ConstantRange AllocaRange =
+      ConstantRange(APInt(BitWidth, 0), APInt(BitWidth, AllocaSize));
+  bool Safe = AllocaRange.contains(AccessRange);
 
-    if (!UnsafeStackPtr->isThreadLocal()) {
-      report_fatal_error(Twine(kUnsafeStackPtrVar) + " must be thread-local");
+  DEBUG(dbgs() << "[SafeStack] "
+               << (isa<AllocaInst>(AllocaPtr) ? "Alloca " : "ByValArgument ")
+               << *AllocaPtr << "\n"
+               << "            Access " << *Addr << "\n"
+               << "            SCEV " << *Expr
+               << " U: " << SE->getUnsignedRange(Expr)
+               << ", S: " << SE->getSignedRange(Expr) << "\n"
+               << "            Range " << AccessRange << "\n"
+               << "            AllocaRange " << AllocaRange << "\n"
+               << "            " << (Safe ? "safe" : "unsafe") << "\n");
+
+  return Safe;
+}
+
+bool SafeStack::IsMemIntrinsicSafe(const MemIntrinsic *MI, const Use &U,
+                                   const Value *AllocaPtr,
+                                   uint64_t AllocaSize) {
+  // All MemIntrinsics have destination address in Arg0 and size in Arg2.
+  if (MI->getRawDest() != U) return true;
+  const auto *Len = dyn_cast<ConstantInt>(MI->getLength());
+  // Non-constant size => unsafe. FIXME: try SCEV getRange.
+  if (!Len) return false;
+  return IsAccessSafe(U, Len->getZExtValue(), AllocaPtr, AllocaSize);
+}
+
+/// Check whether a given allocation must be put on the safe
+/// stack or not. The function analyzes all uses of AI and checks whether it is
+/// only accessed in a memory safe way (as decided statically).
+bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
+  // Go through all uses of this alloca and check whether all accesses to the
+  // allocated object are statically known to be memory safe and, hence, the
+  // object can be placed on the safe stack.
+  SmallPtrSet<const Value *, 16> Visited;
+  SmallVector<const Value *, 8> WorkList;
+  WorkList.push_back(AllocaPtr);
+
+  // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc.
+  while (!WorkList.empty()) {
+    const Value *V = WorkList.pop_back_val();
+    for (const Use &UI : V->uses()) {
+      auto I = cast<const Instruction>(UI.getUser());
+      assert(V == UI.get());
+
+      switch (I->getOpcode()) {
+      case Instruction::Load: {
+        if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getType()), AllocaPtr,
+                          AllocaSize))
+          return false;
+        break;
+      }
+      case Instruction::VAArg:
+        // "va-arg" from a pointer is safe.
+        break;
+      case Instruction::Store: {
+        if (V == I->getOperand(0)) {
+          // Stored the pointer - conservatively assume it may be unsafe.
+          DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+                       << "\n            store of address: " << *I << "\n");
+          return false;
+        }
+
+        if (!IsAccessSafe(UI, DL->getTypeStoreSize(I->getOperand(0)->getType()),
+                          AllocaPtr, AllocaSize))
+          return false;
+        break;
+      }
+      case Instruction::Ret: {
+        // Information leak.
+        return false;
+      }
+
+      case Instruction::Call:
+      case Instruction::Invoke: {
+        ImmutableCallSite CS(I);
+
+        if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+          if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+              II->getIntrinsicID() == Intrinsic::lifetime_end)
+            continue;
+        }
+
+        if (const MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+          if (!IsMemIntrinsicSafe(MI, UI, AllocaPtr, AllocaSize)) {
+            DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+                         << "\n            unsafe memintrinsic: " << *I
+                         << "\n");
+            return false;
+          }
+          continue;
+        }
+
+        // LLVM 'nocapture' attribute is only set for arguments whose address
+        // is not stored, passed around, or used in any other non-trivial way.
+        // We assume that passing a pointer to an object as a 'nocapture
+        // readnone' argument is safe.
+        // FIXME: a more precise solution would require an interprocedural
+        // analysis here, which would look at all uses of an argument inside
+        // the function being called.
+        ImmutableCallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+        for (ImmutableCallSite::arg_iterator A = B; A != E; ++A)
+          if (A->get() == V)
+            if (!(CS.doesNotCapture(A - B) && (CS.doesNotAccessMemory(A - B) ||
+                                               CS.doesNotAccessMemory()))) {
+              DEBUG(dbgs() << "[SafeStack] Unsafe alloca: " << *AllocaPtr
+                           << "\n            unsafe call: " << *I << "\n");
+              return false;
+            }
+        continue;
+      }
+
+      default:
+        if (Visited.insert(I).second)
+          WorkList.push_back(cast<const Instruction>(I));
+      }
     }
   }
 
+  // All uses of the alloca are safe, we can place it on the safe stack.
+  return true;
+}
+
+Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
+  // Check if there is a target-specific location for the unsafe stack pointer.
+  if (TL)
+    if (Value *V = TL->getSafeStackPointerLocation(IRB))
+      return V;
+
+  // Otherwise, assume the target links with compiler-rt, which provides a
+  // thread-local variable with a magic name.
+  Module &M = *F.getParent();
+  const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+  auto UnsafeStackPtr =
+      dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
+
+  bool UseTLS = USPStorage == ThreadLocalUSP;
+
+  if (!UnsafeStackPtr) {
+    auto TLSModel = UseTLS ?
+        GlobalValue::InitialExecTLSModel :
+        GlobalValue::NotThreadLocal;
+    // The global variable is not defined yet, define it ourselves.
+    // We use the initial-exec TLS model because we do not support the
+    // variable living anywhere other than in the main executable.
+    UnsafeStackPtr = new GlobalVariable(
+        M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+        UnsafeStackPtrVar, nullptr, TLSModel);
+  } else {
+    // The variable exists, check its type and attributes.
+    if (UnsafeStackPtr->getValueType() != StackPtrTy)
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+    if (UseTLS != UnsafeStackPtr->isThreadLocal())
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+                         (UseTLS ? "" : "not ") + "be thread-local");
+  }
   return UnsafeStackPtr;
 }
 
 void SafeStack::findInsts(Function &F,
                           SmallVectorImpl<AllocaInst *> &StaticAllocas,
                           SmallVectorImpl<AllocaInst *> &DynamicAllocas,
+                          SmallVectorImpl<Argument *> &ByValArguments,
                           SmallVectorImpl<ReturnInst *> &Returns,
                           SmallVectorImpl<Instruction *> &StackRestorePoints) {
-  for (Instruction &I : inst_range(&F)) {
+  for (Instruction &I : instructions(&F)) {
     if (auto AI = dyn_cast<AllocaInst>(&I)) {
       ++NumAllocas;
 
-      if (IsSafeStackAlloca(AI))
+      uint64_t Size = getStaticAllocaAllocationSize(AI);
+      if (IsSafeStackAlloca(AI, Size))
         continue;
 
       if (AI->isStaticAlloca()) {
@@ -304,19 +414,26 @@ void SafeStack::findInsts(Function &F,
             "gcroot intrinsic not compatible with safestack attribute");
     }
   }
+  for (Argument &Arg : F.args()) {
+    if (!Arg.hasByValAttr())
+      continue;
+    uint64_t Size =
+        DL->getTypeStoreSize(Arg.getType()->getPointerElementType());
+    if (IsSafeStackAlloca(&Arg, Size))
+      continue;
+
+    ++NumUnsafeByValArguments;
+    ByValArguments.push_back(&Arg);
+  }
 }
 
 AllocaInst *
-SafeStack::createStackRestorePoints(Function &F,
+SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
                                     ArrayRef<Instruction *> StackRestorePoints,
                                     Value *StaticTop, bool NeedDynamicTop) {
   if (StackRestorePoints.empty())
     return nullptr;
 
-  IRBuilder<> IRB(StaticTop
-                      ? cast<Instruction>(StaticTop)->getNextNode()
-                      : (Instruction *)F.getEntryBlock().getFirstInsertionPt());
-
   // We need the current value of the shadow stack pointer to restore
   // after longjmp or exception catching.
 
@@ -342,7 +459,7 @@ SafeStack::createStackRestorePoints(Function &F,
   for (Instruction *I : StackRestorePoints) {
     ++NumUnsafeStackRestorePoints;
 
-    IRB.SetInsertPoint(cast<Instruction>(I->getNextNode()));
+    IRB.SetInsertPoint(I->getNextNode());
     Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
     IRB.CreateStore(CurrentTop, UnsafeStackPtr);
   }
@@ -350,14 +467,12 @@ SafeStack::createStackRestorePoints(Function &F,
   return DynamicTop;
 }
 
-Value *
-SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
-                                          ArrayRef<AllocaInst *> StaticAllocas,
-                                          ArrayRef<ReturnInst *> Returns) {
-  if (StaticAllocas.empty())
+Value *SafeStack::moveStaticAllocasToUnsafeStack(
+    IRBuilder<> &IRB, Function &F, ArrayRef<AllocaInst *> StaticAllocas,
+    ArrayRef<Argument *> ByValArguments, ArrayRef<ReturnInst *> Returns) {
+  if (StaticAllocas.empty() && ByValArguments.empty())
     return nullptr;
 
-  IRBuilder<> IRB(F.getEntryBlock().getFirstInsertionPt());
   DIBuilder DIB(*F.getParent());
 
   // We explicitly compute and set the unsafe stack layout for all unsafe
@@ -377,6 +492,13 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
 
   // Compute maximum alignment among static objects on the unsafe stack.
   unsigned MaxAlignment = 0;
+  for (Argument *Arg : ByValArguments) {
+    Type *Ty = Arg->getType()->getPointerElementType();
+    unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+                              Arg->getParamAlignment());
+    if (Align > MaxAlignment)
+      MaxAlignment = Align;
+  }
   for (AllocaInst *AI : StaticAllocas) {
     Type *Ty = AI->getAllocatedType();
     unsigned Align =
@@ -388,22 +510,51 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
   if (MaxAlignment > StackAlignment) {
     // Re-align the base pointer according to the max requested alignment.
     assert(isPowerOf2_32(MaxAlignment));
-    IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+    IRB.SetInsertPoint(BasePointer->getNextNode());
     BasePointer = cast<Instruction>(IRB.CreateIntToPtr(
         IRB.CreateAnd(IRB.CreatePtrToInt(BasePointer, IntPtrTy),
                       ConstantInt::get(IntPtrTy, ~uint64_t(MaxAlignment - 1))),
         StackPtrTy));
   }
 
-  // Allocate space for every unsafe static AllocaInst on the unsafe stack.
   int64_t StaticOffset = 0; // Current stack top.
+  IRB.SetInsertPoint(BasePointer->getNextNode());
+
+  for (Argument *Arg : ByValArguments) {
+    Type *Ty = Arg->getType()->getPointerElementType();
+
+    uint64_t Size = DL->getTypeStoreSize(Ty);
+    if (Size == 0)
+      Size = 1; // Don't create zero-sized stack objects.
+
+    // Ensure the object is properly aligned.
+    unsigned Align = std::max((unsigned)DL->getPrefTypeAlignment(Ty),
+                              Arg->getParamAlignment());
+
+    // Add alignment.
+    // NOTE: we ensure that BasePointer itself is aligned to >= Align.
+    StaticOffset += Size;
+    StaticOffset = RoundUpToAlignment(StaticOffset, Align);
+
+    Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+                               ConstantInt::get(Int32Ty, -StaticOffset));
+    Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
+                                     Arg->getName() + ".unsafe-byval");
+
+    // Replace alloc with the new location.
+    replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
+                      /*Deref=*/true, -StaticOffset);
+    Arg->replaceAllUsesWith(NewArg);
+    IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
+    IRB.CreateMemCpy(Off, Arg, Size, Arg->getParamAlignment());
+  }
+
+  // Allocate space for every unsafe static AllocaInst on the unsafe stack.
   for (AllocaInst *AI : StaticAllocas) {
     IRB.SetInsertPoint(AI);
 
-    auto CArraySize = cast<ConstantInt>(AI->getArraySize());
     Type *Ty = AI->getAllocatedType();
-
-    uint64_t Size = DL->getTypeAllocSize(Ty) * CArraySize->getZExtValue();
+    uint64_t Size = getStaticAllocaAllocationSize(AI);
     if (Size == 0)
       Size = 1; // Don't create zero-sized stack objects.
 
@@ -423,7 +574,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
       cast<Instruction>(NewAI)->takeName(AI);
 
     // Replace alloc with the new location.
-    replaceDbgDeclareForAlloca(AI, NewAI, DIB, /*Deref=*/true);
+    replaceDbgDeclareForAlloca(AI, BasePointer, DIB, /*Deref=*/true, -StaticOffset);
     AI->replaceAllUsesWith(NewAI);
     AI->eraseFromParent();
   }
@@ -434,7 +585,7 @@ SafeStack::moveStaticAllocasToUnsafeStack(Function &F,
   StaticOffset = RoundUpToAlignment(StaticOffset, StackAlignment);
 
   // Update shadow stack pointer in the function epilogue.
-  IRB.SetInsertPoint(cast<Instruction>(BasePointer->getNextNode()));
+  IRB.SetInsertPoint(BasePointer->getNextNode());
 
   Value *StaticTop =
       IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -StaticOffset),
@@ -478,7 +629,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
     if (DynamicTop)
       IRB.CreateStore(NewTop, DynamicTop);
 
-    Value *NewAI = IRB.CreateIntToPtr(SP, AI->getType());
+    Value *NewAI = IRB.CreatePointerCast(NewTop, AI->getType());
     if (AI->hasName() && isa<Instruction>(NewAI))
       NewAI->takeName(AI);
 
@@ -513,8 +664,6 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
 }
 
 bool SafeStack::runOnFunction(Function &F) {
-  auto AA = &getAnalysis<AliasAnalysis>();
-
   DEBUG(dbgs() << "[SafeStack] Function: " << F.getName() << "\n");
 
   if (!F.hasFnAttribute(Attribute::SafeStack)) {
@@ -529,6 +678,9 @@ bool SafeStack::runOnFunction(Function &F) {
     return false;
   }
 
+  TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+
   {
     // Make sure the regular stack protector won't run on this function
     // (safestack attribute takes precedence).
@@ -541,16 +693,11 @@ bool SafeStack::runOnFunction(Function &F) {
         AttributeSet::get(F.getContext(), AttributeSet::FunctionIndex, B));
   }
 
-  if (AA->onlyReadsMemory(&F)) {
-    // XXX: we don't protect against information leak attacks for now.
-    DEBUG(dbgs() << "[SafeStack]     function only reads memory\n");
-    return false;
-  }
-
   ++NumFunctions;
 
   SmallVector<AllocaInst *, 16> StaticAllocas;
   SmallVector<AllocaInst *, 4> DynamicAllocas;
+  SmallVector<Argument *, 4> ByValArguments;
   SmallVector<ReturnInst *, 4> Returns;
 
   // Collect all points where stack gets unwound and needs to be restored
@@ -562,23 +709,26 @@ bool SafeStack::runOnFunction(Function &F) {
 
   // Find all static and dynamic alloca instructions that must be moved to the
   // unsafe stack, all return instructions and stack restore points.
-  findInsts(F, StaticAllocas, DynamicAllocas, Returns, StackRestorePoints);
+  findInsts(F, StaticAllocas, DynamicAllocas, ByValArguments, Returns,
+            StackRestorePoints);
 
   if (StaticAllocas.empty() && DynamicAllocas.empty() &&
-      StackRestorePoints.empty())
+      ByValArguments.empty() && StackRestorePoints.empty())
     return false; // Nothing to do in this function.
 
-  if (!StaticAllocas.empty() || !DynamicAllocas.empty())
+  if (!StaticAllocas.empty() || !DynamicAllocas.empty() ||
+      !ByValArguments.empty())
     ++NumUnsafeStackFunctions; // This function has the unsafe stack.
 
   if (!StackRestorePoints.empty())
     ++NumUnsafeStackRestorePointsFunctions;
 
-  if (!UnsafeStackPtr)
-    UnsafeStackPtr = getOrCreateUnsafeStackPtr(*F.getParent());
+  IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
+  UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
 
   // The top of the unsafe stack after all unsafe static allocas are allocated.
-  Value *StaticTop = moveStaticAllocasToUnsafeStack(F, StaticAllocas, Returns);
+  Value *StaticTop = moveStaticAllocasToUnsafeStack(IRB, F, StaticAllocas,
+                                                    ByValArguments, Returns);
 
   // Safe stack object that stores the current unsafe stack top. It is updated
   // as unsafe dynamic (non-constant-sized) allocas are allocated and freed.
@@ -587,7 +737,7 @@ bool SafeStack::runOnFunction(Function &F) {
   // FIXME: a better alternative might be to store the unsafe stack pointer
   // before setjmp / invoke instructions.
   AllocaInst *DynamicTop = createStackRestorePoints(
-      F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
+      IRB, F, StackRestorePoints, StaticTop, !DynamicAllocas.empty());
 
   // Handle dynamic allocas.
   moveDynamicAllocasToUnsafeStack(F, UnsafeStackPtr, DynamicTop,
@@ -597,13 +747,14 @@ bool SafeStack::runOnFunction(Function &F) {
   return true;
 }
 
-} // end anonymous namespace
+} // anonymous namespace
 
 char SafeStack::ID = 0;
-INITIALIZE_PASS_BEGIN(SafeStack, "safe-stack",
-                      "Safe Stack instrumentation pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(SafeStack, "safe-stack", "Safe Stack instrumentation pass",
-                    false, false)
+INITIALIZE_TM_PASS_BEGIN(SafeStack, "safe-stack",
+                         "Safe Stack instrumentation pass", false, false)
+INITIALIZE_TM_PASS_END(SafeStack, "safe-stack",
+                       "Safe Stack instrumentation pass", false, false)
 
-FunctionPass *llvm::createSafeStackPass() { return new SafeStack(); }
+FunctionPass *llvm::createSafeStackPass(const llvm::TargetMachine *TM) {
+  return new SafeStack(TM);
+}
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7a5b4cb0178b..09de7a2cda2b 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
@@ -59,6 +60,7 @@ static const char *const kSanCovIndirCallName = "__sanitizer_cov_indir_call16";
 static const char *const kSanCovTraceEnter = "__sanitizer_cov_trace_func_enter";
 static const char *const kSanCovTraceBB = "__sanitizer_cov_trace_basic_block";
 static const char *const kSanCovTraceCmp = "__sanitizer_cov_trace_cmp";
+static const char *const kSanCovTraceSwitch = "__sanitizer_cov_trace_switch";
 static const char *const kSanCovModuleCtorName = "sancov.module_ctor";
 static const uint64_t    kSanCtorAndDtorPriority = 2;
 
@@ -148,19 +150,25 @@ class SanitizerCoverageModule : public ModulePass {
   void InjectCoverageForIndirectCalls(Function &F,
                                       ArrayRef<Instruction *> IndirCalls);
   void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+  void InjectTraceForSwitch(Function &F,
+                            ArrayRef<Instruction *> SwitchTraceTargets);
   bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
   void SetNoSanitizeMetadata(Instruction *I);
   void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
   unsigned NumberOfInstrumentedBlocks() {
-    return SanCovFunction->getNumUses() + SanCovWithCheckFunction->getNumUses();
+    return SanCovFunction->getNumUses() +
+           SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
+           SanCovTraceEnter->getNumUses();
   }
   Function *SanCovFunction;
   Function *SanCovWithCheckFunction;
   Function *SanCovIndirCallFunction;
   Function *SanCovTraceEnter, *SanCovTraceBB;
   Function *SanCovTraceCmpFunction;
+  Function *SanCovTraceSwitchFunction;
   InlineAsm *EmptyAsm;
-  Type *IntptrTy, *Int64Ty;
+  Type *IntptrTy, *Int64Ty, *Int64PtrTy;
+  Module *CurModule;
   LLVMContext *C;
   const DataLayout *DL;
 
@@ -177,11 +185,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
     return false;
   C = &(M.getContext());
   DL = &M.getDataLayout();
+  CurModule = &M;
   IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
   Type *VoidTy = Type::getVoidTy(*C);
   IRBuilder<> IRB(*C);
   Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
   Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+  Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
   Int64Ty = IRB.getInt64Ty();
 
   SanCovFunction = checkSanitizerInterfaceFunction(
@@ -194,18 +204,19 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   SanCovTraceCmpFunction =
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           kSanCovTraceCmp, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+  SanCovTraceSwitchFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          kSanCovTraceSwitch, VoidTy, Int64Ty, Int64PtrTy, nullptr));
 
   // We insert an empty inline asm after cov callbacks to avoid callback merge.
   EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
                             StringRef(""), StringRef(""),
                             /*hasSideEffects=*/true);
 
-  if (Options.TraceBB) {
-    SanCovTraceEnter = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
-    SanCovTraceBB = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
-  }
+  SanCovTraceEnter = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kSanCovTraceEnter, VoidTy, Int32PtrTy, nullptr));
+  SanCovTraceBB = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(kSanCovTraceBB, VoidTy, Int32PtrTy, nullptr));
 
   // At this point we create a dummy array of guards because we don't
   // know how many elements we will need.
@@ -280,11 +291,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
   if (F.empty()) return false;
   if (F.getName().find(".module_ctor") != std::string::npos)
     return false;  // Should not instrument sanitizer init functions.
+  // Don't instrument functions using SEH for now. Splitting basic blocks like
+  // we do for coverage breaks WinEHPrepare.
+  // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
+  if (F.hasPersonalityFn() &&
+      isAsynchronousEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
+    return false;
   if (Options.CoverageType >= SanitizerCoverageOptions::SCK_Edge)
     SplitAllCriticalEdges(F);
   SmallVector<Instruction*, 8> IndirCalls;
   SmallVector<BasicBlock*, 16> AllBlocks;
   SmallVector<Instruction*, 8> CmpTraceTargets;
+  SmallVector<Instruction*, 8> SwitchTraceTargets;
   for (auto &BB : F) {
     AllBlocks.push_back(&BB);
     for (auto &Inst : BB) {
@@ -293,13 +311,18 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
         if (CS && !CS.getCalledFunction())
           IndirCalls.push_back(&Inst);
       }
-      if (Options.TraceCmp && isa<ICmpInst>(&Inst))
-        CmpTraceTargets.push_back(&Inst);
+      if (Options.TraceCmp) {
+        if (isa<ICmpInst>(&Inst))
+          CmpTraceTargets.push_back(&Inst);
+        if (isa<SwitchInst>(&Inst))
+          SwitchTraceTargets.push_back(&Inst);
+      }
     }
   }
   InjectCoverage(F, AllBlocks);
   InjectCoverageForIndirectCalls(F, IndirCalls);
   InjectTraceForCmp(F, CmpTraceTargets);
+  InjectTraceForSwitch(F, SwitchTraceTargets);
   return true;
 }
 
@@ -348,6 +371,45 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
   }
 }
 
+// For every switch statement we insert a call:
+// __sanitizer_cov_trace_switch(CondValue,
+//      {NumCases, ValueSizeInBits, Case0Value, Case1Value, Case2Value, ... })
+
+void SanitizerCoverageModule::InjectTraceForSwitch(
+    Function &F, ArrayRef<Instruction *> SwitchTraceTargets) {
+  for (auto I : SwitchTraceTargets) {
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+      IRBuilder<> IRB(I);
+      SmallVector<Constant *, 16> Initializers;
+      Value *Cond = SI->getCondition();
+      if (Cond->getType()->getScalarSizeInBits() >
+          Int64Ty->getScalarSizeInBits())
+        continue;
+      Initializers.push_back(ConstantInt::get(Int64Ty, SI->getNumCases()));
+      Initializers.push_back(
+          ConstantInt::get(Int64Ty, Cond->getType()->getScalarSizeInBits()));
+      if (Cond->getType()->getScalarSizeInBits() <
+          Int64Ty->getScalarSizeInBits())
+        Cond = IRB.CreateIntCast(Cond, Int64Ty, false);
+      for (auto It: SI->cases()) {
+        Constant *C = It.getCaseValue();
+        if (C->getType()->getScalarSizeInBits() <
+            Int64Ty->getScalarSizeInBits())
+          C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
+        Initializers.push_back(C);
+      }
+      ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
+      GlobalVariable *GV = new GlobalVariable(
+          *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
+          ConstantArray::get(ArrayOfInt64Ty, Initializers),
+          "__sancov_gen_cov_switch_values");
+      IRB.CreateCall(SanCovTraceSwitchFunction,
+                     {Cond, IRB.CreatePointerCast(GV, Int64PtrTy)});
+    }
+  }
+}
+
+
 void SanitizerCoverageModule::InjectTraceForCmp(
     Function &F, ArrayRef<Instruction *> CmpTraceTargets) {
   for (auto I : CmpTraceTargets) {
@@ -369,8 +431,7 @@ void SanitizerCoverageModule::InjectTraceForCmp(
 
 void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
   I->setMetadata(
-      I->getParent()->getParent()->getParent()->getMDKindID("nosanitize"),
-      MDNode::get(*C, None));
+      I->getModule()->getMDKindID("nosanitize"), MDNode::get(*C, None));
 }
 
 void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
@@ -382,34 +443,31 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   // locations.
   if (isa<UnreachableInst>(BB.getTerminator()))
     return;
-  BasicBlock::iterator IP = BB.getFirstInsertionPt(), BE = BB.end();
-  // Skip static allocas at the top of the entry block so they don't become
-  // dynamic when we split the block.  If we used our optimized stack layout,
-  // then there will only be one alloca and it will come first.
-  for (; IP != BE; ++IP) {
-    AllocaInst *AI = dyn_cast<AllocaInst>(IP);
-    if (!AI || !AI->isStaticAlloca())
-      break;
-  }
+  BasicBlock::iterator IP = BB.getFirstInsertionPt();
 
   bool IsEntryBB = &BB == &F.getEntryBlock();
   DebugLoc EntryLoc;
   if (IsEntryBB) {
     if (auto SP = getDISubprogram(&F))
       EntryLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+    // Keep static allocas and llvm.localescape calls in the entry block.  Even
+    // if we aren't splitting the block, it's nice for allocas to be before
+    // calls.
+    IP = PrepareToSplitEntryBlock(BB, IP);
   } else {
     EntryLoc = IP->getDebugLoc();
   }
 
-  IRBuilder<> IRB(IP);
+  IRBuilder<> IRB(&*IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
-  SmallVector<Value *, 1> Indices;
   Value *GuardP = IRB.CreateAdd(
       IRB.CreatePointerCast(GuardArray, IntptrTy),
       ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
   Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
   GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
-  if (UseCalls) {
+  if (Options.TraceBB) {
+    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
+  } else if (UseCalls) {
     IRB.CreateCall(SanCovWithCheckFunction, GuardP);
   } else {
     LoadInst *Load = IRB.CreateLoad(GuardP);
@@ -418,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     SetNoSanitizeMetadata(Load);
     Value *Cmp = IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
     Instruction *Ins = SplitBlockAndInsertIfThen(
-        Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+        Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
     IRB.SetInsertPoint(Ins);
     IRB.SetCurrentDebugLocation(EntryLoc);
     // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
@@ -427,7 +485,7 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
   }
 
   if (Options.Use8bitCounters) {
-    IRB.SetInsertPoint(IP);
+    IRB.SetInsertPoint(&*IP);
     Value *P = IRB.CreateAdd(
         IRB.CreatePointerCast(EightBitCounterArray, IntptrTy),
         ConstantInt::get(IntptrTy, NumberOfInstrumentedBlocks() - 1));
@@ -438,13 +496,6 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
     SetNoSanitizeMetadata(LI);
     SetNoSanitizeMetadata(SI);
   }
-
-  if (Options.TraceBB) {
-    // Experimental support for tracing.
-    // Insert a callback with the same guard variable as used for coverage.
-    IRB.SetInsertPoint(IP);
-    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
-  }
 }
 
 char SanitizerCoverageModule::ID = 0;
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 1a46bbb86122..9331e1d2b3fd 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -142,37 +142,35 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
       M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
-    const size_t ByteSize = 1 << i;
-    const size_t BitSize = ByteSize * 8;
-    SmallString<32> ReadName("__tsan_read" + itostr(ByteSize));
+    const unsigned ByteSize = 1U << i;
+    const unsigned BitSize = ByteSize * 8;
+    std::string ByteSizeStr = utostr(ByteSize);
+    std::string BitSizeStr = utostr(BitSize);
+    SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
     TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
-    SmallString<32> WriteName("__tsan_write" + itostr(ByteSize));
+    SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
     TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
-    SmallString<64> UnalignedReadName("__tsan_unaligned_read" +
-        itostr(ByteSize));
+    SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
     TsanUnalignedRead[i] =
         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
             UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
-    SmallString<64> UnalignedWriteName("__tsan_unaligned_write" +
-        itostr(ByteSize));
+    SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
     TsanUnalignedWrite[i] =
         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
             UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
-    SmallString<32> AtomicLoadName("__tsan_atomic" + itostr(BitSize) +
-                                   "_load");
+    SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
     TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
         M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
 
-    SmallString<32> AtomicStoreName("__tsan_atomic" + itostr(BitSize) +
-                                    "_store");
+    SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
     TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
 
@@ -201,7 +199,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
           M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
     }
 
-    SmallString<32> AtomicCASName("__tsan_atomic" + itostr(BitSize) +
+    SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
                                   "_compare_exchange_val");
     TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
         AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
@@ -513,8 +511,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
-    const size_t ByteSize = 1 << Idx;
-    const size_t BitSize = ByteSize * 8;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -527,8 +525,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
-    const size_t ByteSize = 1 << Idx;
-    const size_t BitSize = ByteSize * 8;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -544,8 +542,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
     if (!F)
       return false;
-    const size_t ByteSize = 1 << Idx;
-    const size_t BitSize = ByteSize * 8;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
@@ -558,8 +556,8 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     int Idx = getMemoryAccessFuncIndex(Addr, DL);
     if (Idx < 0)
       return false;
-    const size_t ByteSize = 1 << Idx;
-    const size_t BitSize = ByteSize * 8;
+    const unsigned ByteSize = 1U << Idx;
+    const unsigned BitSize = ByteSize * 8;
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
diff --git a/lib/Transforms/ObjCARC/CMakeLists.txt b/lib/Transforms/ObjCARC/CMakeLists.txt
index fbcae29044c6..98ad37f5d230 100644
--- a/lib/Transforms/ObjCARC/CMakeLists.txt
+++ b/lib/Transforms/ObjCARC/CMakeLists.txt
@@ -3,8 +3,6 @@ add_llvm_library(LLVMObjCARCOpts
   ObjCARCOpts.cpp
   ObjCARCExpand.cpp
   ObjCARCAPElim.cpp
-  ObjCARCAliasAnalysis.cpp
-  ARCInstKind.cpp
   ObjCARCContract.cpp
   DependencyAnalysis.cpp
   ProvenanceAnalysis.cpp
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4edd02904b22..9d78e5ae3b9b 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -49,7 +49,7 @@ bool llvm::objcarc::CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
   assert(CS && "Only calls can alter reference counts!");
 
   // See if AliasAnalysis can help us with the call.
-  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  FunctionModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
   if (AliasAnalysis::onlyReadsMemory(MRB))
     return false;
   if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
@@ -226,7 +226,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
                                 SmallPtrSetImpl<Instruction *> &DependingInsts,
                                 SmallPtrSetImpl<const BasicBlock *> &Visited,
                                 ProvenanceAnalysis &PA) {
-  BasicBlock::iterator StartPos = StartInst;
+  BasicBlock::iterator StartPos = StartInst->getIterator();
 
   SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
   Worklist.push_back(std::make_pair(StartBB, StartPos));
@@ -252,7 +252,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
         break;
       }
 
-      Instruction *Inst = --LocalStartPos;
+      Instruction *Inst = &*--LocalStartPos;
       if (Depends(Flavor, Inst, Arg, PA)) {
         DependingInsts.insert(Inst);
         break;
diff --git a/lib/Transforms/ObjCARC/ObjCARC.cpp b/lib/Transforms/ObjCARC/ObjCARC.cpp
index 6ea038b8ba8c..d860723bb460 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARC.cpp
@@ -26,18 +26,10 @@ namespace llvm {
 using namespace llvm;
 using namespace llvm::objcarc;
 
-/// \brief A handy option to enable/disable all ARC Optimizations.
-bool llvm::objcarc::EnableARCOpts;
-static cl::opt<bool, true>
-EnableARCOptimizations("enable-objc-arc-opts",
-                       cl::desc("enable/disable all ARC Optimizations"),
-                       cl::location(EnableARCOpts),
-                       cl::init(true));
-
 /// initializeObjCARCOptsPasses - Initialize all passes linked into the
 /// ObjCARCOpts library.
 void llvm::initializeObjCARCOpts(PassRegistry &Registry) {
-  initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCAAWrapperPassPass(Registry);
   initializeObjCARCAPElimPass(Registry);
   initializeObjCARCExpandPass(Registry);
   initializeObjCARCContractPass(Registry);
diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h
index 7595e2db1a7a..5fd45b00af17 100644
--- a/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/lib/Transforms/ObjCARC/ObjCARC.h
@@ -26,6 +26,8 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CallSite.h"
@@ -34,7 +36,6 @@
 #include "llvm/Pass.h"
 #include "llvm/Transforms/ObjCARC.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include "ARCInstKind.h"
 
 namespace llvm {
 class raw_ostream;
@@ -43,99 +44,6 @@ class raw_ostream;
 namespace llvm {
 namespace objcarc {
 
-/// \brief A handy option to enable/disable all ARC Optimizations.
-extern bool EnableARCOpts;
-
-/// \brief Test if the given module looks interesting to run ARC optimization
-/// on.
-static inline bool ModuleHasARC(const Module &M) {
-  return
-    M.getNamedValue("objc_retain") ||
-    M.getNamedValue("objc_release") ||
-    M.getNamedValue("objc_autorelease") ||
-    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
-    M.getNamedValue("objc_retainBlock") ||
-    M.getNamedValue("objc_autoreleaseReturnValue") ||
-    M.getNamedValue("objc_autoreleasePoolPush") ||
-    M.getNamedValue("objc_loadWeakRetained") ||
-    M.getNamedValue("objc_loadWeak") ||
-    M.getNamedValue("objc_destroyWeak") ||
-    M.getNamedValue("objc_storeWeak") ||
-    M.getNamedValue("objc_initWeak") ||
-    M.getNamedValue("objc_moveWeak") ||
-    M.getNamedValue("objc_copyWeak") ||
-    M.getNamedValue("objc_retainedObject") ||
-    M.getNamedValue("objc_unretainedObject") ||
-    M.getNamedValue("objc_unretainedPointer") ||
-    M.getNamedValue("clang.arc.use");
-}
-
-/// \brief This is a wrapper around getUnderlyingObject which also knows how to
-/// look through objc_retain and objc_autorelease calls, which we know to return
-/// their argument verbatim.
-static inline const Value *GetUnderlyingObjCPtr(const Value *V,
-                                                const DataLayout &DL) {
-  for (;;) {
-    V = GetUnderlyingObject(V, DL);
-    if (!IsForwarding(GetBasicARCInstKind(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-
-  return V;
-}
-
-/// The RCIdentity root of a value \p V is a dominating value U for which
-/// retaining or releasing U is equivalent to retaining or releasing V. In other
-/// words, ARC operations on \p V are equivalent to ARC operations on \p U.
-///
-/// We use this in the ARC optimizer to make it easier to match up ARC
-/// operations by always mapping ARC operations to RCIdentityRoots instead of
-/// pointers themselves.
-///
-/// The two ways that we see RCIdentical values in ObjC are via:
-///
-///   1. PointerCasts
-///   2. Forwarding Calls that return their argument verbatim.
-///
-/// Thus this function strips off pointer casts and forwarding calls. *NOTE*
-/// This implies that two RCIdentical values must alias.
-static inline const Value *GetRCIdentityRoot(const Value *V) {
-  for (;;) {
-    V = V->stripPointerCasts();
-    if (!IsForwarding(GetBasicARCInstKind(V)))
-      break;
-    V = cast<CallInst>(V)->getArgOperand(0);
-  }
-  return V;
-}
-
-/// Helper which calls const Value *GetRCIdentityRoot(const Value *V) and just
-/// casts away the const of the result. For documentation about what an
-/// RCIdentityRoot (and by extension GetRCIdentityRoot is) look at that
-/// function.
-static inline Value *GetRCIdentityRoot(Value *V) {
-  return const_cast<Value *>(GetRCIdentityRoot((const Value *)V));
-}
-
-/// \brief Assuming the given instruction is one of the special calls such as
-/// objc_retain or objc_release, return the RCIdentity root of the argument of
-/// the call.
-static inline Value *GetArgRCIdentityRoot(Value *Inst) {
-  return GetRCIdentityRoot(cast<CallInst>(Inst)->getArgOperand(0));
-}
-
-static inline bool IsNullOrUndef(const Value *V) {
-  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
-}
-
-static inline bool IsNoopInstruction(const Instruction *I) {
-  return isa<BitCastInst>(I) ||
-    (isa<GetElementPtrInst>(I) &&
-     cast<GetElementPtrInst>(I)->hasAllZeroIndices());
-}
-
-
 /// \brief Erase the given instruction.
 ///
 /// Many ObjC calls return their argument verbatim,
@@ -162,152 +70,6 @@ static inline void EraseInstruction(Instruction *CI) {
     RecursivelyDeleteTriviallyDeadInstructions(OldArg);
 }
 
-/// \brief Test whether the given value is possible a retainable object pointer.
-static inline bool IsPotentialRetainableObjPtr(const Value *Op) {
-  // Pointers to static or stack storage are not valid retainable object
-  // pointers.
-  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
-    return false;
-  // Special arguments can not be a valid retainable object pointer.
-  if (const Argument *Arg = dyn_cast<Argument>(Op))
-    if (Arg->hasByValAttr() ||
-        Arg->hasInAllocaAttr() ||
-        Arg->hasNestAttr() ||
-        Arg->hasStructRetAttr())
-      return false;
-  // Only consider values with pointer types.
-  //
-  // It seemes intuitive to exclude function pointer types as well, since
-  // functions are never retainable object pointers, however clang occasionally
-  // bitcasts retainable object pointers to function-pointer type temporarily.
-  PointerType *Ty = dyn_cast<PointerType>(Op->getType());
-  if (!Ty)
-    return false;
-  // Conservatively assume anything else is a potential retainable object
-  // pointer.
-  return true;
-}
-
-static inline bool IsPotentialRetainableObjPtr(const Value *Op,
-                                               AliasAnalysis &AA) {
-  // First make the rudimentary check.
-  if (!IsPotentialRetainableObjPtr(Op))
-    return false;
-
-  // Objects in constant memory are not reference-counted.
-  if (AA.pointsToConstantMemory(Op))
-    return false;
-
-  // Pointers in constant memory are not pointing to reference-counted objects.
-  if (const LoadInst *LI = dyn_cast<LoadInst>(Op))
-    if (AA.pointsToConstantMemory(LI->getPointerOperand()))
-      return false;
-
-  // Otherwise assume the worst.
-  return true;
-}
-
-/// \brief Helper for GetARCInstKind. Determines what kind of construct CS
-/// is.
-static inline ARCInstKind GetCallSiteClass(ImmutableCallSite CS) {
-  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
-       I != E; ++I)
-    if (IsPotentialRetainableObjPtr(*I))
-      return CS.onlyReadsMemory() ? ARCInstKind::User : ARCInstKind::CallOrUser;
-
-  return CS.onlyReadsMemory() ? ARCInstKind::None : ARCInstKind::Call;
-}
-
-/// \brief Return true if this value refers to a distinct and identifiable
-/// object.
-///
-/// This is similar to AliasAnalysis's isIdentifiedObject, except that it uses
-/// special knowledge of ObjC conventions.
-static inline bool IsObjCIdentifiedObject(const Value *V) {
-  // Assume that call results and arguments have their own "provenance".
-  // Constants (including GlobalVariables) and Allocas are never
-  // reference-counted.
-  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
-      isa<Argument>(V) || isa<Constant>(V) ||
-      isa<AllocaInst>(V))
-    return true;
-
-  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
-    const Value *Pointer =
-      GetRCIdentityRoot(LI->getPointerOperand());
-    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
-      // A constant pointer can't be pointing to an object on the heap. It may
-      // be reference-counted, but it won't be deleted.
-      if (GV->isConstant())
-        return true;
-      StringRef Name = GV->getName();
-      // These special variables are known to hold values which are not
-      // reference-counted pointers.
-      if (Name.startswith("\01l_objc_msgSend_fixup_"))
-        return true;
-
-      StringRef Section = GV->getSection();
-      if (Section.find("__message_refs") != StringRef::npos ||
-          Section.find("__objc_classrefs") != StringRef::npos ||
-          Section.find("__objc_superrefs") != StringRef::npos ||
-          Section.find("__objc_methname") != StringRef::npos ||
-          Section.find("__cstring") != StringRef::npos)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-enum class ARCMDKindID {
-  ImpreciseRelease,
-  CopyOnEscape,
-  NoObjCARCExceptions,
-};
-
-/// A cache of MDKinds used by various ARC optimizations.
-class ARCMDKindCache {
-  Module *M;
-
-  /// The Metadata Kind for clang.imprecise_release metadata.
-  llvm::Optional<unsigned> ImpreciseReleaseMDKind;
-
-  /// The Metadata Kind for clang.arc.copy_on_escape metadata.
-  llvm::Optional<unsigned> CopyOnEscapeMDKind;
-
-  /// The Metadata Kind for clang.arc.no_objc_arc_exceptions metadata.
-  llvm::Optional<unsigned> NoObjCARCExceptionsMDKind;
-
-public:
-  void init(Module *Mod) {
-    M = Mod;
-    ImpreciseReleaseMDKind = NoneType::None;
-    CopyOnEscapeMDKind = NoneType::None;
-    NoObjCARCExceptionsMDKind = NoneType::None;
-  }
-
-  unsigned get(ARCMDKindID ID) {
-    switch (ID) {
-    case ARCMDKindID::ImpreciseRelease:
-      if (!ImpreciseReleaseMDKind)
-        ImpreciseReleaseMDKind =
-            M->getContext().getMDKindID("clang.imprecise_release");
-      return *ImpreciseReleaseMDKind;
-    case ARCMDKindID::CopyOnEscape:
-      if (!CopyOnEscapeMDKind)
-        CopyOnEscapeMDKind =
-            M->getContext().getMDKindID("clang.arc.copy_on_escape");
-      return *CopyOnEscapeMDKind;
-    case ARCMDKindID::NoObjCARCExceptions:
-      if (!NoObjCARCExceptionsMDKind)
-        NoObjCARCExceptionsMDKind =
-            M->getContext().getMDKindID("clang.arc.no_objc_arc_exceptions");
-      return *NoObjCARCExceptionsMDKind;
-    }
-    llvm_unreachable("Covered switch isn't covered?!");
-  }
-};
-
 } // end namespace objcarc
 } // end namespace llvm
 
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index d318643a359a..969e77c1f888 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -72,12 +72,9 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
   if (const Function *Callee = CS.getCalledFunction()) {
     if (Callee->isDeclaration() || Callee->mayBeOverridden())
       return true;
-    for (Function::const_iterator I = Callee->begin(), E = Callee->end();
-         I != E; ++I) {
-      const BasicBlock *BB = I;
-      for (BasicBlock::const_iterator J = BB->begin(), F = BB->end();
-           J != F; ++J)
-        if (ImmutableCallSite JCS = ImmutableCallSite(J))
+    for (const BasicBlock &BB : *Callee) {
+      for (const Instruction &I : BB)
+        if (ImmutableCallSite JCS = ImmutableCallSite(&I))
           // This recursion depth limit is arbitrary. It's just great
           // enough to cover known interesting testcases.
           if (Depth < 3 &&
@@ -96,7 +93,7 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
 
   Instruction *Push = nullptr;
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-    Instruction *Inst = I++;
+    Instruction *Inst = &*I++;
     switch (GetBasicARCInstKind(Inst)) {
     case ARCInstKind::AutoreleasepoolPush:
       Push = Inst;
@@ -169,7 +166,7 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
     if (std::next(F->begin()) != F->end())
       continue;
     // Ok, a single-block constructor function definition. Try to optimize it.
-    Changed |= OptimizeBB(F->begin());
+    Changed |= OptimizeBB(&F->front());
   }
 
   return Changed;
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
deleted file mode 100644
index eecc82fe572c..000000000000
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===- ObjCARCAliasAnalysis.h - ObjC ARC Optimization -*- C++ -*-----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file declares a simple ARC-aware AliasAnalysis using special knowledge
-/// of Objective C to enhance other optimization passes which rely on the Alias
-/// Analysis infrastructure.
-///
-/// WARNING: This file knows about certain library functions. It recognizes them
-/// by name, and hardwires knowledge of their semantics.
-///
-/// WARNING: This file knows about how certain Objective-C library functions are
-/// used. Naive LLVM IR transformations which would otherwise be
-/// behavior-preserving may break these assumptions.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARCALIASANALYSIS_H
-
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Pass.h"
-
-namespace llvm {
-namespace objcarc {
-
-  /// \brief This is a simple alias analysis implementation that uses knowledge
-  /// of ARC constructs to answer queries.
-  ///
-  /// TODO: This class could be generalized to know about other ObjC-specific
-  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
-  /// even though their offsets are dynamic.
-  class ObjCARCAliasAnalysis : public ImmutablePass,
-                               public AliasAnalysis {
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
-      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
-    }
-
-  private:
-    bool doInitialization(Module &M) override;
-
-    /// This method is used when a pass implements an analysis interface through
-    /// multiple inheritance.  If needed, it should override this to adjust the
-    /// this pointer as needed for the specified pass info.
-    void *getAdjustedAnalysisPointer(const void *PI) override {
-      if (PI == &AliasAnalysis::ID)
-        return static_cast<AliasAnalysis *>(this);
-      return this;
-    }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    AliasResult alias(const MemoryLocation &LocA,
-                      const MemoryLocation &LocB) override;
-    bool pointsToConstantMemory(const MemoryLocation &Loc,
-                                bool OrLocal) override;
-    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override;
-    ModRefBehavior getModRefBehavior(const Function *F) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS,
-                               const MemoryLocation &Loc) override;
-    ModRefResult getModRefInfo(ImmutableCallSite CS1,
-                               ImmutableCallSite CS2) override;
-  };
-
-} // namespace objcarc
-} // namespace llvm
-
-#endif
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index baca76ba3f2a..1cdf5689f42a 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -119,9 +119,9 @@ bool ObjCARCContract::optimizeRetainCall(Function &F, Instruction *Retain) {
     return false;
 
   // Check that the call is next to the retain.
-  BasicBlock::const_iterator I = Call;
-  ++I;
-  while (IsNoopInstruction(I)) ++I;
+  BasicBlock::const_iterator I = ++Call->getIterator();
+  while (IsNoopInstruction(&*I))
+    ++I;
   if (&*I != Retain)
     return false;
 
@@ -247,7 +247,7 @@ static StoreInst *findSafeStoreForStoreStrongContraction(LoadInst *Load,
 
     // Ok, now we know we have not seen a store yet. See if Inst can write to
     // our load location, if it can not, just ignore the instruction.
-    if (!(AA->getModRefInfo(Inst, Loc) & AliasAnalysis::Mod))
+    if (!(AA->getModRefInfo(Inst, Loc) & MRI_Mod))
       continue;
 
     Store = dyn_cast<StoreInst>(Inst);
@@ -282,9 +282,9 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
                                     Instruction *Release,
                                     ProvenanceAnalysis &PA) {
   // Walk up from the Store to find the retain.
-  BasicBlock::iterator I = Store;
+  BasicBlock::iterator I = Store->getIterator();
   BasicBlock::iterator Begin = Store->getParent()->begin();
-  while (I != Begin && GetBasicARCInstKind(I) != ARCInstKind::Retain) {
+  while (I != Begin && GetBasicARCInstKind(&*I) != ARCInstKind::Retain) {
     Instruction *Inst = &*I;
 
     // It is only safe to move the retain to the store if we can prove
@@ -294,7 +294,7 @@ findRetainForStoreStrongContraction(Value *New, StoreInst *Store,
       return nullptr;
     --I;
   }
-  Instruction *Retain = I;
+  Instruction *Retain = &*I;
   if (GetBasicARCInstKind(Retain) != ARCInstKind::Retain)
     return nullptr;
   if (GetArgRCIdentityRoot(Retain) != New)
@@ -429,7 +429,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
       // insert it now.
       if (!RetainRVMarker)
         return false;
-      BasicBlock::iterator BBI = Inst;
+      BasicBlock::iterator BBI = Inst->getIterator();
       BasicBlock *InstParent = Inst->getParent();
 
       // Step up to see if the call immediately precedes the RetainRV call.
@@ -440,11 +440,11 @@ bool ObjCARCContract::tryToPeepholeInstruction(
           BasicBlock *Pred = InstParent->getSinglePredecessor();
           if (!Pred)
             goto decline_rv_optimization;
-          BBI = Pred->getTerminator();
+          BBI = Pred->getTerminator()->getIterator();
           break;
         }
         --BBI;
-      } while (IsNoopInstruction(BBI));
+      } while (IsNoopInstruction(&*BBI));
 
       if (&*BBI == GetArgRCIdentityRoot(Inst)) {
         DEBUG(dbgs() << "Adding inline asm marker for "
@@ -511,10 +511,10 @@ bool ObjCARCContract::runOnFunction(Function &F) {
     return false;
 
   Changed = false;
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
-  PA.setAA(&getAnalysis<AliasAnalysis>());
+  PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
 
   DEBUG(llvm::dbgs() << "**** ObjCARC Contract ****\n");
 
@@ -629,13 +629,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
 char ObjCARCContract::ID = 0;
 INITIALIZE_PASS_BEGIN(ObjCARCContract, "objc-arc-contract",
                       "ObjC ARC contraction", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(ObjCARCContract, "objc-arc-contract",
                     "ObjC ARC contraction", false, false)
 
 void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.setPreservesCFG();
 }
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 9edbb17e8d1b..f0ee6e2be487 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -28,7 +28,6 @@
 #include "ARCRuntimeEntryPoints.h"
 #include "BlotMapVector.h"
 #include "DependencyAnalysis.h"
-#include "ObjCARCAliasAnalysis.h"
 #include "ProvenanceAnalysis.h"
 #include "PtrState.h"
 #include "llvm/ADT/DenseMap.h"
@@ -36,6 +35,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
@@ -482,7 +482,7 @@ namespace {
     /// A flag indicating whether this optimization pass should run.
     bool Run;
 
-    /// Flags which determine whether each of the interesting runtine functions
+    /// Flags which determine whether each of the interesting runtime functions
     /// is in fact used in the current function.
     unsigned UsedInThisFunction;
 
@@ -556,7 +556,7 @@ namespace {
 char ObjCARCOpt::ID = 0;
 INITIALIZE_PASS_BEGIN(ObjCARCOpt,
                       "objc-arc", "ObjC ARC optimization", false, false)
-INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass)
 INITIALIZE_PASS_END(ObjCARCOpt,
                     "objc-arc", "ObjC ARC optimization", false, false)
 
@@ -565,8 +565,8 @@ Pass *llvm::createObjCARCOptPass() {
 }
 
 void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<ObjCARCAliasAnalysis>();
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<ObjCARCAAWrapperPass>();
+  AU.addRequired<AAResultsWrapperPass>();
   // ARC optimization doesn't currently split critical edges.
   AU.setPreservesCFG();
 }
@@ -581,16 +581,18 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
   ImmutableCallSite CS(Arg);
   if (const Instruction *Call = CS.getInstruction()) {
     if (Call->getParent() == RetainRV->getParent()) {
-      BasicBlock::const_iterator I = Call;
+      BasicBlock::const_iterator I(Call);
       ++I;
-      while (IsNoopInstruction(I)) ++I;
+      while (IsNoopInstruction(&*I))
+        ++I;
       if (&*I == RetainRV)
         return false;
     } else if (const InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       BasicBlock *RetainRVParent = RetainRV->getParent();
       if (II->getNormalDest() == RetainRVParent) {
         BasicBlock::const_iterator I = RetainRVParent->begin();
-        while (IsNoopInstruction(I)) ++I;
+        while (IsNoopInstruction(&*I))
+          ++I;
         if (&*I == RetainRV)
           return false;
       }
@@ -599,18 +601,21 @@ ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
 
   // Check for being preceded by an objc_autoreleaseReturnValue on the same
   // pointer. In this case, we can delete the pair.
-  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+  BasicBlock::iterator I = RetainRV->getIterator(),
+                       Begin = RetainRV->getParent()->begin();
   if (I != Begin) {
-    do --I; while (I != Begin && IsNoopInstruction(I));
-    if (GetBasicARCInstKind(I) == ARCInstKind::AutoreleaseRV &&
-        GetArgRCIdentityRoot(I) == Arg) {
+    do
+      --I;
+    while (I != Begin && IsNoopInstruction(&*I));
+    if (GetBasicARCInstKind(&*I) == ARCInstKind::AutoreleaseRV &&
+        GetArgRCIdentityRoot(&*I) == Arg) {
       Changed = true;
       ++NumPeeps;
 
       DEBUG(dbgs() << "Erasing autoreleaseRV,retainRV pair: " << *I << "\n"
                    << "Erasing " << *RetainRV << "\n");
 
-      EraseInstruction(I);
+      EraseInstruction(&*I);
       EraseInstruction(RetainRV);
       return true;
     }
@@ -1216,7 +1221,7 @@ bool ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
 
   // Visit all the instructions, bottom-up.
   for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
-    Instruction *Inst = std::prev(I);
+    Instruction *Inst = &*std::prev(I);
 
     // Invoke instructions are visited as part of their successors (below).
     if (isa<InvokeInst>(Inst))
@@ -1264,7 +1269,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
     Arg = GetArgRCIdentityRoot(Inst);
     TopDownPtrState &S = MyStates.getPtrTopDownState(Arg);
     NestingDetected |= S.InitTopDown(Class, Inst);
-    // A retain can be a potential use; procede to the generic checking
+    // A retain can be a potential use; proceed to the generic checking
     // code below.
     break;
   }
@@ -1342,12 +1347,10 @@ ObjCARCOpt::VisitTopDown(BasicBlock *BB,
                      << "Performing Dataflow:\n");
 
   // Visit all the instructions, top-down.
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-    Instruction *Inst = I;
+  for (Instruction &Inst : *BB) {
+    DEBUG(dbgs() << "    Visiting " << Inst << "\n");
 
-    DEBUG(dbgs() << "    Visiting " << *Inst << "\n");
-
-    NestingDetected |= VisitInstructionTopDown(Inst, Releases, MyStates);
+    NestingDetected |= VisitInstructionTopDown(&Inst, Releases, MyStates);
   }
 
   DEBUG(llvm::dbgs() << "\nState Before Checking for CFG Hazards:\n"
@@ -1413,16 +1416,15 @@ ComputePostOrders(Function &F,
   // Functions may have many exits, and there also blocks which we treat
   // as exits due to ignored edges.
   SmallVector<std::pair<BasicBlock *, BBState::edge_iterator>, 16> PredStack;
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *ExitBB = I;
-    BBState &MyStates = BBStates[ExitBB];
+  for (BasicBlock &ExitBB : F) {
+    BBState &MyStates = BBStates[&ExitBB];
     if (!MyStates.isExit())
       continue;
 
     MyStates.SetAsExit();
 
-    PredStack.push_back(std::make_pair(ExitBB, MyStates.pred_begin()));
-    Visited.insert(ExitBB);
+    PredStack.push_back(std::make_pair(&ExitBB, MyStates.pred_begin()));
+    Visited.insert(&ExitBB);
     while (!PredStack.empty()) {
     reverse_dfs_next_succ:
       BBState::edge_iterator PE = BBStates[PredStack.back().first].pred_end();
@@ -1830,7 +1832,7 @@ void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
     // analysis too, but that would want caching. A better approach would be to
     // use the technique that EarlyCSE uses.
     inst_iterator Current = std::prev(I);
-    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+    BasicBlock *CurrentBB = &*Current.getBasicBlockIterator();
     for (BasicBlock::iterator B = CurrentBB->begin(),
                               J = Current.getInstructionIterator();
          J != B; --J) {
@@ -2008,10 +2010,7 @@ HasSafePathToPredecessorCall(const Value *Arg, Instruction *Retain,
 
   // Check that the call is a regular call.
   ARCInstKind Class = GetBasicARCInstKind(Call);
-  if (Class != ARCInstKind::CallOrUser && Class != ARCInstKind::Call)
-    return false;
-
-  return true;
+  return Class == ARCInstKind::CallOrUser || Class == ARCInstKind::Call;
 }
 
 /// Find a dependent retain that precedes the given autorelease for which there
@@ -2081,9 +2080,8 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
 
   SmallPtrSet<Instruction *, 4> DependingInstructions;
   SmallPtrSet<const BasicBlock *, 4> Visited;
-  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
-    BasicBlock *BB = FI;
-    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+  for (BasicBlock &BB: F) {
+    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
 
     DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
 
@@ -2095,19 +2093,16 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
     // dependent on Arg such that there are no instructions dependent on Arg
     // that need a positive ref count in between the autorelease and Ret.
-    CallInst *Autorelease =
-      FindPredecessorAutoreleaseWithSafePath(Arg, BB, Ret,
-                                             DependingInstructions, Visited,
-                                             PA);
+    CallInst *Autorelease = FindPredecessorAutoreleaseWithSafePath(
+        Arg, &BB, Ret, DependingInstructions, Visited, PA);
     DependingInstructions.clear();
     Visited.clear();
 
     if (!Autorelease)
       continue;
 
-    CallInst *Retain =
-      FindPredecessorRetainWithSafePath(Arg, BB, Autorelease,
-                                        DependingInstructions, Visited, PA);
+    CallInst *Retain = FindPredecessorRetainWithSafePath(
+        Arg, &BB, Autorelease, DependingInstructions, Visited, PA);
     DependingInstructions.clear();
     Visited.clear();
 
@@ -2192,7 +2187,7 @@ bool ObjCARCOpt::runOnFunction(Function &F) {
   DEBUG(dbgs() << "<<< ObjCARCOpt: Visiting Function: " << F.getName() << " >>>"
         "\n");
 
-  PA.setAA(&getAnalysis<AliasAnalysis>());
+  PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
 
 #ifndef NDEBUG
   if (AreStatisticsEnabled()) {
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
index 0ac41d3ea326..1a12b659e5a3 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysis.h
@@ -26,10 +26,10 @@
 #define LLVM_LIB_TRANSFORMS_OBJCARC_PROVENANCEANALYSIS_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 
 namespace llvm {
   class Value;
-  class AliasAnalysis;
   class DataLayout;
   class PHINode;
   class SelectInst;
diff --git a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
index 0be75af52014..c274e8182fb5 100644
--- a/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
+++ b/lib/Transforms/ObjCARC/ProvenanceAnalysisEvaluator.cpp
@@ -35,7 +35,7 @@ char PAEval::ID = 0;
 PAEval::PAEval() : FunctionPass(ID) {}
 
 void PAEval::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
 }
 
 static StringRef getName(Value *V) {
@@ -65,7 +65,7 @@ bool PAEval::runOnFunction(Function &F) {
   }
 
   ProvenanceAnalysis PA;
-  PA.setAA(&getAnalysis<AliasAnalysis>());
+  PA.setAA(&getAnalysis<AAResultsWrapperPass>().getAAResults());
   const DataLayout &DL = F.getParent()->getDataLayout();
 
   for (Value *V1 : Values) {
@@ -89,6 +89,6 @@ FunctionPass *llvm::createPAEvalPass() { return new PAEval(); }
 
 INITIALIZE_PASS_BEGIN(PAEval, "pa-eval",
                       "Evaluate ProvenanceAnalysis on all pairs", false, true)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(PAEval, "pa-eval",
                     "Evaluate ProvenanceAnalysis on all pairs", false, true)
diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp
index ae20e7e6d347..df64fa32f3f8 100644
--- a/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/lib/Transforms/ObjCARC/PtrState.cpp
@@ -256,9 +256,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
       // one of its successor blocks, since we can't insert code after it
       // in its own block, and we don't want to split critical edges.
       if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(BB->getFirstInsertionPt());
+        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
       else
-        InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+        InsertReverseInsertPt(&*++Inst->getIterator());
       SetSeq(S_Use);
     } else if (Seq == S_Release && IsUser(Class)) {
       DEBUG(dbgs() << "            PreciseReleaseUse: Seq: " << GetSeq() << "; "
@@ -268,9 +268,9 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,
       assert(!HasReverseInsertPts());
       // As above; handle invoke specially.
       if (isa<InvokeInst>(Inst))
-        InsertReverseInsertPt(BB->getFirstInsertionPt());
+        InsertReverseInsertPt(&*BB->getFirstInsertionPt());
       else
-        InsertReverseInsertPt(std::next(BasicBlock::iterator(Inst)));
+        InsertReverseInsertPt(&*++Inst->getIterator());
     }
     break;
   case S_Stop:
diff --git a/lib/Transforms/ObjCARC/PtrState.h b/lib/Transforms/ObjCARC/PtrState.h
index e45e1ea96c53..9749e44822b2 100644
--- a/lib/Transforms/ObjCARC/PtrState.h
+++ b/lib/Transforms/ObjCARC/PtrState.h
@@ -17,8 +17,8 @@
 #ifndef LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
 #define LLVM_LIB_TRANSFORMS_OBJCARC_PTRSTATE_H
 
-#include "ARCInstKind.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/ObjCARCInstKind.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/raw_ostream.h"
@@ -96,7 +96,7 @@ struct RRInfo {
 };
 
 /// \brief This class summarizes several per-pointer runtime properties which
-/// are propogated through the flow graph.
+/// are propagated through the flow graph.
 class PtrState {
 protected:
   /// True if the reference count is known to be incremented.
@@ -172,7 +172,7 @@ struct BottomUpPtrState : PtrState {
   bool InitBottomUp(ARCMDKindCache &Cache, Instruction *I);
 
   /// Return true if this set of releases can be paired with a release. Modifies
-  /// state appropriately to reflect that the matching occured if it is
+  /// state appropriately to reflect that the matching occurred if it is
   /// successful.
   ///
   /// It is assumed that one has already checked that the RCIdentity of the
@@ -194,7 +194,7 @@ struct TopDownPtrState : PtrState {
 
   /// Return true if this set of retains can be paired with the given
   /// release. Modifies state appropriately to reflect that the matching
-  /// occured.
+  /// occurred.
   bool MatchWithRelease(ARCMDKindCache &Cache, Instruction *Release);
 
   void HandlePotentialUse(Instruction *Inst, const Value *Ptr,
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index d6fc91641588..590a52da6b19 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -1,4 +1,4 @@
-//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,52 +14,33 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "adce"
 
 STATISTIC(NumRemoved, "Number of instructions removed");
 
-namespace {
-struct ADCE : public FunctionPass {
-  static char ID; // Pass identification, replacement for typeid
-  ADCE() : FunctionPass(ID) {
-    initializeADCEPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnFunction(Function& F) override;
-
-  void getAnalysisUsage(AnalysisUsage& AU) const override {
-    AU.setPreservesCFG();
-  }
-};
-}
-
-char ADCE::ID = 0;
-INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
-
-bool ADCE::runOnFunction(Function& F) {
-  if (skipOptnoneFunction(F))
-    return false;
-
+static bool aggressiveDCE(Function& F) {
   SmallPtrSet<Instruction*, 128> Alive;
   SmallVector<Instruction*, 128> Worklist;
 
   // Collect the set of "root" instructions that are known live.
-  for (Instruction &I : inst_range(F)) {
-    if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
-        isa<LandingPadInst>(I) || I.mayHaveSideEffects()) {
+  for (Instruction &I : instructions(F)) {
+    if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || I.isEHPad() ||
+        I.mayHaveSideEffects()) {
       Alive.insert(&I);
       Worklist.push_back(&I);
     }
@@ -79,7 +60,7 @@ bool ADCE::runOnFunction(Function& F) {
   // which have no side effects and do not influence the control flow or return
   // value of the function, and may therefore be deleted safely.
   // NOTE: We reuse the Worklist vector here for memory efficiency.
-  for (Instruction &I : inst_range(F)) {
+  for (Instruction &I : instructions(F)) {
     if (!Alive.count(&I)) {
       Worklist.push_back(&I);
       I.dropAllReferences();
@@ -94,6 +75,34 @@ bool ADCE::runOnFunction(Function& F) {
   return !Worklist.empty();
 }
 
-FunctionPass *llvm::createAggressiveDCEPass() {
-  return new ADCE();
+PreservedAnalyses ADCEPass::run(Function &F) {
+  if (aggressiveDCE(F))
+    return PreservedAnalyses::none();
+  return PreservedAnalyses::all();
 }
+
+namespace {
+struct ADCELegacyPass : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  ADCELegacyPass() : FunctionPass(ID) {
+    initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function& F) override {
+    if (skipOptnoneFunction(F))
+      return false;
+    return aggressiveDCE(F);
+  }
+
+  void getAnalysisUsage(AnalysisUsage& AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+}
+
+char ADCELegacyPass::ID = 0;
+INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+                false, false)
+
+FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 8918909f484a..4b721d38adba 100644
--- a/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -54,13 +56,15 @@ struct AlignmentFromAssumptions : public FunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
 
     AU.setPreservesCFG();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
-    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
   }
 
   // For memory transfers, we need a common alignment for both the source and
@@ -84,7 +88,7 @@ INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
                       aip_name, false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
                     aip_name, false, false)
 
@@ -249,8 +253,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
 
   // The mask must have some trailing ones (otherwise the condition is
   // trivial and tells us nothing about the alignment of the left operand).
-  unsigned TrailingOnes =
-    MaskSCEV->getValue()->getValue().countTrailingOnes();
+  unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
   if (!TrailingOnes)
     return false;
 
@@ -270,7 +273,7 @@ bool AlignmentFromAssumptions::extractAlignmentInfo(CallInst *I,
   OffSCEV = nullptr;
   if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
     AAPtr = PToI->getPointerOperand();
-    OffSCEV = SE->getConstant(Int64Ty, 0);
+    OffSCEV = SE->getZero(Int64Ty);
   } else if (const SCEVAddExpr* AndLHSAddSCEV =
              dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
     // Try to find the ptrtoint; subtract it and the rest is the offset.
@@ -410,7 +413,7 @@ bool AlignmentFromAssumptions::processAssumption(CallInst *ACall) {
 bool AlignmentFromAssumptions::runOnFunction(Function &F) {
   bool Changed = false;
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   NewDestAlignments.clear();
diff --git a/lib/Transforms/Scalar/BDCE.cpp b/lib/Transforms/Scalar/BDCE.cpp
index 09c605e76737..cb9b8b6fffc8 100644
--- a/lib/Transforms/Scalar/BDCE.cpp
+++ b/lib/Transforms/Scalar/BDCE.cpp
@@ -15,26 +15,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/BasicBlock.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/DemandedBits.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-
 using namespace llvm;
 
 #define DEBUG_TYPE "bdce"
@@ -53,342 +45,42 @@ struct BDCE : public FunctionPass {
 
   void getAnalysisUsage(AnalysisUsage& AU) const override {
     AU.setPreservesCFG();
-    AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<DemandedBits>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
   }
-
-  void determineLiveOperandBits(const Instruction *UserI,
-                                const Instruction *I, unsigned OperandNo,
-                                const APInt &AOut, APInt &AB,
-                                APInt &KnownZero, APInt &KnownOne,
-                                APInt &KnownZero2, APInt &KnownOne2);
-
-  AssumptionCache *AC;
-  DominatorTree *DT;
 };
 }
 
 char BDCE::ID = 0;
 INITIALIZE_PASS_BEGIN(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
                       false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
 INITIALIZE_PASS_END(BDCE, "bdce", "Bit-Tracking Dead Code Elimination",
                     false, false)
 
-static bool isAlwaysLive(Instruction *I) {
-  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
-         isa<LandingPadInst>(I) || I->mayHaveSideEffects();
-}
-
-void BDCE::determineLiveOperandBits(const Instruction *UserI,
-                                    const Instruction *I, unsigned OperandNo,
-                                    const APInt &AOut, APInt &AB,
-                                    APInt &KnownZero, APInt &KnownOne,
-                                    APInt &KnownZero2, APInt &KnownOne2) {
-  unsigned BitWidth = AB.getBitWidth();
-
-  // We're called once per operand, but for some instructions, we need to
-  // compute known bits of both operands in order to determine the live bits of
-  // either (when both operands are instructions themselves). We don't,
-  // however, want to do this twice, so we cache the result in APInts that live
-  // in the caller. For the two-relevant-operands case, both operand values are
-  // provided here.
-  auto ComputeKnownBits =
-      [&](unsigned BitWidth, const Value *V1, const Value *V2) {
-        const DataLayout &DL = I->getModule()->getDataLayout();
-        KnownZero = APInt(BitWidth, 0);
-        KnownOne = APInt(BitWidth, 0);
-        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0,
-                         AC, UserI, DT);
-
-        if (V2) {
-          KnownZero2 = APInt(BitWidth, 0);
-          KnownOne2 = APInt(BitWidth, 0);
-          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL,
-                           0, AC, UserI, DT);
-        }
-      };
-
-  switch (UserI->getOpcode()) {
-  default: break;
-  case Instruction::Call:
-  case Instruction::Invoke:
-    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI))
-      switch (II->getIntrinsicID()) {
-      default: break;
-      case Intrinsic::bswap:
-        // The alive bits of the input are the swapped alive bits of
-        // the output.
-        AB = AOut.byteSwap();
-        break;
-      case Intrinsic::ctlz:
-        if (OperandNo == 0) {
-          // We need some output bits, so we need all bits of the
-          // input to the left of, and including, the leftmost bit
-          // known to be one.
-          ComputeKnownBits(BitWidth, I, nullptr);
-          AB = APInt::getHighBitsSet(BitWidth,
-                 std::min(BitWidth, KnownOne.countLeadingZeros()+1));
-        }
-        break;
-      case Intrinsic::cttz:
-        if (OperandNo == 0) {
-          // We need some output bits, so we need all bits of the
-          // input to the right of, and including, the rightmost bit
-          // known to be one.
-          ComputeKnownBits(BitWidth, I, nullptr);
-          AB = APInt::getLowBitsSet(BitWidth,
-                 std::min(BitWidth, KnownOne.countTrailingZeros()+1));
-        }
-        break;
-      }
-    break;
-  case Instruction::Add:
-  case Instruction::Sub:
-    // Find the highest live output bit. We don't need any more input
-    // bits than that (adds, and thus subtracts, ripple only to the
-    // left).
-    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits());
-    break;
-  case Instruction::Shl:
-    if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
-        AB = AOut.lshr(ShiftAmt);
-
-        // If the shift is nuw/nsw, then the high bits are not dead
-        // (because we've promised that they *must* be zero).
-        const ShlOperator *S = cast<ShlOperator>(UserI);
-        if (S->hasNoSignedWrap())
-          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
-        else if (S->hasNoUnsignedWrap())
-          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
-      }
-    break;
-  case Instruction::LShr:
-    if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
-        AB = AOut.shl(ShiftAmt);
-
-        // If the shift is exact, then the low bits are not dead
-        // (they must be zero).
-        if (cast<LShrOperator>(UserI)->isExact())
-          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      }
-    break;
-  case Instruction::AShr:
-    if (OperandNo == 0)
-      if (ConstantInt *CI =
-            dyn_cast<ConstantInt>(UserI->getOperand(1))) {
-        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1);
-        AB = AOut.shl(ShiftAmt);
-        // Because the high input bit is replicated into the
-        // high-order bits of the result, if we need any of those
-        // bits, then we must keep the highest input bit.
-        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt))
-            .getBoolValue())
-          AB.setBit(BitWidth-1);
-
-        // If the shift is exact, then the low bits are not dead
-        // (they must be zero).
-        if (cast<AShrOperator>(UserI)->isExact())
-          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
-      }
-    break;
-  case Instruction::And:
-    AB = AOut;
-
-    // For bits that are known zero, the corresponding bits in the
-    // other operand are dead (unless they're both zero, in which
-    // case they can't both be dead, so just mark the LHS bits as
-    // dead).
-    if (OperandNo == 0) {
-      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
-      AB &= ~KnownZero2;
-    } else {
-      if (!isa<Instruction>(UserI->getOperand(0)))
-        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
-      AB &= ~(KnownZero & ~KnownZero2);
-    }
-    break;
-  case Instruction::Or:
-    AB = AOut;
-
-    // For bits that are known one, the corresponding bits in the
-    // other operand are dead (unless they're both one, in which
-    // case they can't both be dead, so just mark the LHS bits as
-    // dead).
-    if (OperandNo == 0) {
-      ComputeKnownBits(BitWidth, I, UserI->getOperand(1));
-      AB &= ~KnownOne2;
-    } else {
-      if (!isa<Instruction>(UserI->getOperand(0)))
-        ComputeKnownBits(BitWidth, UserI->getOperand(0), I);
-      AB &= ~(KnownOne & ~KnownOne2);
-    }
-    break;
-  case Instruction::Xor:
-  case Instruction::PHI:
-    AB = AOut;
-    break;
-  case Instruction::Trunc:
-    AB = AOut.zext(BitWidth);
-    break;
-  case Instruction::ZExt:
-    AB = AOut.trunc(BitWidth);
-    break;
-  case Instruction::SExt:
-    AB = AOut.trunc(BitWidth);
-    // Because the high input bit is replicated into the
-    // high-order bits of the result, if we need any of those
-    // bits, then we must keep the highest input bit.
-    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(),
-                                      AOut.getBitWidth() - BitWidth))
-        .getBoolValue())
-      AB.setBit(BitWidth-1);
-    break;
-  case Instruction::Select:
-    if (OperandNo != 0)
-      AB = AOut;
-    break;
-  }
-}
-
 bool BDCE::runOnFunction(Function& F) {
   if (skipOptnoneFunction(F))
     return false;
+  DemandedBits &DB = getAnalysis<DemandedBits>();
 
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
-  DenseMap<Instruction *, APInt> AliveBits;
   SmallVector<Instruction*, 128> Worklist;
-
-  // The set of visited instructions (non-integer-typed only).
-  SmallPtrSet<Instruction*, 128> Visited;
-
-  // Collect the set of "root" instructions that are known live.
-  for (Instruction &I : inst_range(F)) {
-    if (!isAlwaysLive(&I))
-      continue;
-
-    DEBUG(dbgs() << "BDCE: Root: " << I << "\n");
-    // For integer-valued instructions, set up an initial empty set of alive
-    // bits and add the instruction to the work list. For other instructions
-    // add their operands to the work list (for integer values operands, mark
-    // all bits as live).
-    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (!AliveBits.count(&I)) {
-        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
-        Worklist.push_back(&I);
-      }
-
-      continue;
-    }
-
-    // Non-integer-typed instructions...
-    for (Use &OI : I.operands()) {
-      if (Instruction *J = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType()))
-          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth());
-        Worklist.push_back(J);
-      }
-    }
-    // To save memory, we don't add I to the Visited set here. Instead, we
-    // check isAlwaysLive on every instruction when searching for dead
-    // instructions later (we need to check isAlwaysLive for the
-    // integer-typed instructions anyway).
-  }
-
-  // Propagate liveness backwards to operands.
-  while (!Worklist.empty()) {
-    Instruction *UserI = Worklist.pop_back_val();
-
-    DEBUG(dbgs() << "BDCE: Visiting: " << *UserI);
-    APInt AOut;
-    if (UserI->getType()->isIntegerTy()) {
-      AOut = AliveBits[UserI];
-      DEBUG(dbgs() << " Alive Out: " << AOut);
-    }
-    DEBUG(dbgs() << "\n");
-
-    if (!UserI->getType()->isIntegerTy())
-      Visited.insert(UserI);
-
-    APInt KnownZero, KnownOne, KnownZero2, KnownOne2;
-    // Compute the set of alive bits for each operand. These are anded into the
-    // existing set, if any, and if that changes the set of alive bits, the
-    // operand is added to the work-list.
-    for (Use &OI : UserI->operands()) {
-      if (Instruction *I = dyn_cast<Instruction>(OI)) {
-        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) {
-          unsigned BitWidth = IT->getBitWidth();
-          APInt AB = APInt::getAllOnesValue(BitWidth);
-          if (UserI->getType()->isIntegerTy() && !AOut &&
-              !isAlwaysLive(UserI)) {
-            AB = APInt(BitWidth, 0);
-          } else {
-            // If all bits of the output are dead, then all bits of the input 
-            // Bits of each operand that are used to compute alive bits of the
-            // output are alive, all others are dead.
-            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB,
-                                     KnownZero, KnownOne,
-                                     KnownZero2, KnownOne2);
-          }
-
-          // If we've added to the set of alive bits (or the operand has not
-          // been previously visited), then re-queue the operand to be visited
-          // again.
-          APInt ABPrev(BitWidth, 0);
-          auto ABI = AliveBits.find(I);
-          if (ABI != AliveBits.end())
-            ABPrev = ABI->second;
-
-          APInt ABNew = AB | ABPrev;
-          if (ABNew != ABPrev || ABI == AliveBits.end()) {
-            AliveBits[I] = std::move(ABNew);
-            Worklist.push_back(I);
-          }
-        } else if (!Visited.count(I)) {
-          Worklist.push_back(I);
-        }
-      }
-    }
-  }
-
   bool Changed = false;
-  // The inverse of the live set is the dead set.  These are those instructions
-  // which have no side effects and do not influence the control flow or return
-  // value of the function, and may therefore be deleted safely.
-  // NOTE: We reuse the Worklist vector here for memory efficiency.
-  for (Instruction &I : inst_range(F)) {
-    // For live instructions that have all dead bits, first make them dead by
-    // replacing all uses with something else. Then, if they don't need to
-    // remain live (because they have side effects, etc.) we can remove them.
-    if (I.getType()->isIntegerTy()) {
-      auto ABI = AliveBits.find(&I);
-      if (ABI != AliveBits.end()) {
-        if (ABI->second.getBoolValue())
-          continue;
-
-        DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
-        // FIXME: In theory we could substitute undef here instead of zero.
-        // This should be reconsidered once we settle on the semantics of
-        // undef, poison, etc.
-        Value *Zero = ConstantInt::get(I.getType(), 0);
-        ++NumSimplified;
-        I.replaceAllUsesWith(Zero);
-        Changed = true;
-      }
-    } else if (Visited.count(&I)) {
-      continue;
+  for (Instruction &I : instructions(F)) {
+    if (I.getType()->isIntegerTy() &&
+        !DB.getDemandedBits(&I).getBoolValue()) {
+      // For live instructions that have all dead bits, first make them dead by
+      // replacing all uses with something else. Then, if they don't need to
+      // remain live (because they have side effects, etc.) we can remove them.
+      DEBUG(dbgs() << "BDCE: Trivializing: " << I << " (all bits dead)\n");
+      // FIXME: In theory we could substitute undef here instead of zero.
+      // This should be reconsidered once we settle on the semantics of
+      // undef, poison, etc.
+      Value *Zero = ConstantInt::get(I.getType(), 0);
+      ++NumSimplified;
+      I.replaceAllUsesWith(Zero);
+      Changed = true;
     }
-
-    if (isAlwaysLive(&I))
+    if (!DB.isInstructionDead(&I))
       continue;
 
     Worklist.push_back(&I);
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 7ee279a56600..a0ddbd085206 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_library(LLVMScalarOpts
   LoopIdiomRecognize.cpp
   LoopInstSimplify.cpp
   LoopInterchange.cpp
+  LoopLoadElimination.cpp
   LoopRerollPass.cpp
   LoopRotation.cpp
   LoopStrengthReduce.cpp
@@ -38,7 +39,6 @@ add_llvm_library(LLVMScalarOpts
   RewriteStatepointsForGC.cpp
   SCCP.cpp
   SROA.cpp
-  SampleProfile.cpp
   Scalar.cpp
   ScalarReplAggregates.cpp
   Scalarizer.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 4288742dd3eb..84f7f5fff5b5 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -223,10 +223,10 @@ Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
   }
 
   // The simple and common case. This also includes constant expressions.
-  if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
+  if (!isa<PHINode>(Inst) && !Inst->isEHPad())
     return Inst;
 
-  // We can't insert directly before a phi node or landing pad. Insert before
+  // We can't insert directly before a phi node or an eh pad. Insert before
   // the terminator of the incoming or dominating block.
   assert(Entry != Inst->getParent() && "PHI or landing pad in entry block!");
   if (Idx != ~0U && isa<PHINode>(Inst))
@@ -365,9 +365,9 @@ void ConstantHoisting::collectConstantCandidates(ConstCandMapType &ConstCandMap,
 /// into an instruction itself.
 void ConstantHoisting::collectConstantCandidates(Function &Fn) {
   ConstCandMapType ConstCandMap;
-  for (Function::iterator BB : Fn)
-    for (BasicBlock::iterator Inst : *BB)
-      collectConstantCandidates(ConstCandMap, Inst);
+  for (BasicBlock &BB : Fn)
+    for (Instruction &Inst : BB)
+      collectConstantCandidates(ConstCandMap, &Inst);
 }
 
 /// \brief Find the base constant within the given range and rebase all other
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 79624b2e4c47..686bd4071104 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/IR/CFG.h"
@@ -32,6 +33,7 @@ STATISTIC(NumPhis,      "Number of phis propagated");
 STATISTIC(NumSelects,   "Number of selects propagated");
 STATISTIC(NumMemAccess, "Number of memory access targets propagated");
 STATISTIC(NumCmps,      "Number of comparisons propagated");
+STATISTIC(NumReturns,   "Number of return values propagated");
 STATISTIC(NumDeadCases, "Number of switch cases removed");
 
 namespace {
@@ -43,6 +45,11 @@ namespace {
     bool processMemAccess(Instruction *I);
     bool processCmp(CmpInst *C);
     bool processSwitch(SwitchInst *SI);
+    bool processCallSite(CallSite CS);
+
+    /// Return a constant value for V usable at At and everything it
+    /// dominates.  If no such Constant can be found, return nullptr.
+    Constant *getConstantAt(Value *V, Instruction *At);
 
   public:
     static char ID;
@@ -54,6 +61,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<LazyValueInfo>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
   };
 }
@@ -178,44 +186,33 @@ bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
   return true;
 }
 
-/// processCmp - If the value of this comparison could be determined locally,
-/// constant propagation would already have figured it out.  Instead, walk
-/// the predecessors and statically evaluate the comparison based on information
-/// available on that edge.  If a given static evaluation is true on ALL
-/// incoming edges, then it's true universally and we can simplify the compare.
+/// processCmp - See if LazyValueInfo's ability to exploit edge conditions,
+/// or range information is sufficient to prove this comparison.  Even for
+/// local conditions, this can sometimes prove conditions instcombine can't by
+/// exploiting range information.
 bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
   Value *Op0 = C->getOperand(0);
-  if (isa<Instruction>(Op0) &&
-      cast<Instruction>(Op0)->getParent() == C->getParent())
-    return false;
-
   Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
   if (!Op1) return false;
 
-  pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
-  if (PI == PE) return false;
+  // As a policy choice, we choose not to waste compile time on anything where
+  // the comparison is testing local values.  While LVI can sometimes reason
+  // about such cases, it's not its primary purpose.  We do make sure to do
+  // the block local query for uses from terminator instructions, but that's
+  // handled in the code for each terminator.
+  auto *I = dyn_cast<Instruction>(Op0);
+  if (I && I->getParent() == C->getParent())
+    return false;
 
-  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
-                                    C->getOperand(0), Op1, *PI,
-                                    C->getParent(), C);
+  LazyValueInfo::Tristate Result =
+    LVI->getPredicateAt(C->getPredicate(), Op0, Op1, C);
   if (Result == LazyValueInfo::Unknown) return false;
 
-  ++PI;
-  while (PI != PE) {
-    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
-                                    C->getOperand(0), Op1, *PI,
-                                    C->getParent(), C);
-    if (Res != Result) return false;
-    ++PI;
-  }
-
   ++NumCmps;
-
   if (Result == LazyValueInfo::True)
     C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
   else
     C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
-
   C->eraseFromParent();
 
   return true;
@@ -307,6 +304,59 @@ bool CorrelatedValuePropagation::processSwitch(SwitchInst *SI) {
   return Changed;
 }
 
+/// processCallSite - Infer nonnull attributes for the arguments at the
+/// specified callsite.
+bool CorrelatedValuePropagation::processCallSite(CallSite CS) {
+  SmallVector<unsigned, 4> Indices;
+  unsigned ArgNo = 0;
+
+  for (Value *V : CS.args()) {
+    PointerType *Type = dyn_cast<PointerType>(V->getType());
+
+    if (Type && !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
+        LVI->getPredicateAt(ICmpInst::ICMP_EQ, V,
+                            ConstantPointerNull::get(Type),
+                            CS.getInstruction()) == LazyValueInfo::False)
+      Indices.push_back(ArgNo + 1);
+    ArgNo++;
+  }
+
+  assert(ArgNo == CS.arg_size() && "sanity check");
+
+  if (Indices.empty())
+    return false;
+
+  AttributeSet AS = CS.getAttributes();
+  LLVMContext &Ctx = CS.getInstruction()->getContext();
+  AS = AS.addAttribute(Ctx, Indices, Attribute::get(Ctx, Attribute::NonNull));
+  CS.setAttributes(AS);
+
+  return true;
+}
+
+Constant *CorrelatedValuePropagation::getConstantAt(Value *V, Instruction *At) {
+  if (Constant *C = LVI->getConstant(V, At->getParent(), At))
+    return C;
+
+  // TODO: The following really should be sunk inside LVI's core algorithm, or
+  // at least the outer shims around such.
+  auto *C = dyn_cast<CmpInst>(V);
+  if (!C) return nullptr;
+
+  Value *Op0 = C->getOperand(0);
+  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+  if (!Op1) return nullptr;
+  
+  LazyValueInfo::Tristate Result =
+    LVI->getPredicateAt(C->getPredicate(), Op0, Op1, At);
+  if (Result == LazyValueInfo::Unknown)
+    return nullptr;
+  
+  return (Result == LazyValueInfo::True) ?
+    ConstantInt::getTrue(C->getContext()) :
+    ConstantInt::getFalse(C->getContext());
+}
+
 bool CorrelatedValuePropagation::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
@@ -318,7 +368,7 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
     bool BBChanged = false;
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
-      Instruction *II = BI++;
+      Instruction *II = &*BI++;
       switch (II->getOpcode()) {
       case Instruction::Select:
         BBChanged |= processSelect(cast<SelectInst>(II));
@@ -334,6 +384,10 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
       case Instruction::Store:
         BBChanged |= processMemAccess(II);
         break;
+      case Instruction::Call:
+      case Instruction::Invoke:
+        BBChanged |= processCallSite(CallSite(II));
+        break;
       }
     }
 
@@ -342,7 +396,21 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {
     case Instruction::Switch:
       BBChanged |= processSwitch(cast<SwitchInst>(Term));
       break;
+    case Instruction::Ret: {
+      auto *RI = cast<ReturnInst>(Term);
+      // Try to determine the return value if we can.  This is mainly here to
+      // simplify the writing of unit tests, but also helps to enable IPO by
+      // constant folding the return values of callees.
+      auto *RetVal = RI->getReturnValue();
+      if (!RetVal) break; // handle "ret void"
+      if (isa<Constant>(RetVal)) break; // nothing to do
+      if (auto *C = getConstantAt(RetVal, RI)) {
+        ++NumReturns;
+        RI->replaceUsesOfWith(RetVal, C);
+        BBChanged = true;        
+      }
     }
+    };
 
     FnChanged |= BBChanged;
   }
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 3b262a23091f..b67c3c7742fd 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -17,6 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instruction.h"
@@ -46,7 +47,7 @@ namespace {
       TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
-        Instruction *Inst = DI++;
+        Instruction *Inst = &*DI++;
         if (isInstructionTriviallyDead(Inst, TLI)) {
           Inst->eraseFromParent();
           Changed = true;
@@ -92,6 +93,34 @@ namespace {
 char DCE::ID = 0;
 INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
 
+static bool DCEInstruction(Instruction *I,
+                           SmallSetVector<Instruction *, 16> &WorkList,
+                           const TargetLibraryInfo *TLI) {
+  if (isInstructionTriviallyDead(I, TLI)) {
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      Value *OpV = I->getOperand(i);
+      I->setOperand(i, nullptr);
+
+      if (!OpV->use_empty() || I == OpV)
+        continue;
+
+      // If the operand is an instruction that became dead as we nulled out the
+      // operand, and if it is 'trivially' dead, delete it in a future loop
+      // iteration.
+      if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+        if (isInstructionTriviallyDead(OpI, TLI))
+          WorkList.insert(OpI);
+    }
+
+    I->eraseFromParent();
+    ++DCEEliminated;
+    return true;
+  }
+  return false;
+}
+
 bool DCE::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
@@ -99,39 +128,24 @@ bool DCE::runOnFunction(Function &F) {
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   TargetLibraryInfo *TLI = TLIP ? &TLIP->getTLI() : nullptr;
 
-  // Start out with all of the instructions in the worklist...
-  std::vector<Instruction*> WorkList;
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
-    WorkList.push_back(&*i);
-
-  // Loop over the worklist finding instructions that are dead.  If they are
-  // dead make them drop all of their uses, making other instructions
-  // potentially dead, and work until the worklist is empty.
-  //
   bool MadeChange = false;
+  SmallSetVector<Instruction *, 16> WorkList;
+  // Iterate over the original function, only adding insts to the worklist
+  // if they actually need to be revisited. This avoids having to pre-init
+  // the worklist with the entire function's worth of instructions.
+  for (inst_iterator FI = inst_begin(F), FE = inst_end(F); FI != FE;) {
+    Instruction *I = &*FI;
+    ++FI;
+
+    // We're visiting this instruction now, so make sure it's not in the
+    // worklist from an earlier visit.
+    if (!WorkList.count(I))
+      MadeChange |= DCEInstruction(I, WorkList, TLI);
+  }
+
   while (!WorkList.empty()) {
-    Instruction *I = WorkList.back();
-    WorkList.pop_back();
-
-    if (isInstructionTriviallyDead(I, TLI)) { // If the instruction is dead.
-      // Loop over all of the values that the instruction uses, if there are
-      // instructions being used, add them to the worklist, because they might
-      // go dead after this one is removed.
-      //
-      for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
-        if (Instruction *Used = dyn_cast<Instruction>(*OI))
-          WorkList.push_back(Used);
-
-      // Remove the instruction.
-      I->eraseFromParent();
-
-      // Remove the instruction from the worklist if it still exists in it.
-      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), I),
-                     WorkList.end());
-
-      MadeChange = true;
-      ++DCEEliminated;
-    }
+    Instruction *I = WorkList.pop_back_val();
+    MadeChange |= DCEInstruction(I, WorkList, TLI);
   }
   return MadeChange;
 }
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index c50558434da2..36ad0a5f7b91 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -40,6 +41,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "dse"
 
+STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
 STATISTIC(NumFastStores, "Number of stores deleted");
 STATISTIC(NumFastOther , "Number of other instrs removed");
 
@@ -59,23 +61,24 @@ namespace {
       if (skipOptnoneFunction(F))
         return false;
 
-      AA = &getAnalysis<AliasAnalysis>();
+      AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
       MD = &getAnalysis<MemoryDependenceAnalysis>();
       DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-      TLI = AA->getTargetLibraryInfo();
+      TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
       bool Changed = false;
-      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+      for (BasicBlock &I : F)
         // Only check non-dead blocks.  Dead blocks may have strange pointer
         // cycles that will confuse alias analysis.
-        if (DT->isReachableFromEntry(I))
-          Changed |= runOnBasicBlock(*I);
+        if (DT->isReachableFromEntry(&I))
+          Changed |= runOnBasicBlock(I);
 
       AA = nullptr; MD = nullptr; DT = nullptr;
       return Changed;
     }
 
     bool runOnBasicBlock(BasicBlock &BB);
+    bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI);
     bool HandleFree(CallInst *F);
     bool handleEndBlock(BasicBlock &BB);
     void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
@@ -85,10 +88,11 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<MemoryDependenceAnalysis>();
-      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
   };
@@ -97,8 +101,10 @@ namespace {
 char DSE::ID = 0;
 INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
 
 FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -115,7 +121,7 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
 ///
 static void DeleteDeadInstruction(Instruction *I,
                                MemoryDependenceAnalysis &MD,
-                               const TargetLibraryInfo *TLI,
+                               const TargetLibraryInfo &TLI,
                                SmallSetVector<Value*, 16> *ValueSet = nullptr) {
   SmallVector<Instruction*, 32> NowDeadInsts;
 
@@ -140,7 +146,7 @@ static void DeleteDeadInstruction(Instruction *I,
       if (!Op->use_empty()) continue;
 
       if (Instruction *OpI = dyn_cast<Instruction>(Op))
-        if (isInstructionTriviallyDead(OpI, TLI))
+        if (isInstructionTriviallyDead(OpI, &TLI))
           NowDeadInsts.push_back(OpI);
     }
 
@@ -153,7 +159,7 @@ static void DeleteDeadInstruction(Instruction *I,
 
 /// hasMemoryWrite - Does this instruction write some memory?  This only returns
 /// true for things that we can analyze with other helpers below.
-static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
+static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
   if (isa<StoreInst>(I))
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
@@ -170,20 +176,20 @@ static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo *TLI) {
   }
   if (auto CS = CallSite(I)) {
     if (Function *F = CS.getCalledFunction()) {
-      if (TLI && TLI->has(LibFunc::strcpy) &&
-          F->getName() == TLI->getName(LibFunc::strcpy)) {
+      if (TLI.has(LibFunc::strcpy) &&
+          F->getName() == TLI.getName(LibFunc::strcpy)) {
         return true;
       }
-      if (TLI && TLI->has(LibFunc::strncpy) &&
-          F->getName() == TLI->getName(LibFunc::strncpy)) {
+      if (TLI.has(LibFunc::strncpy) &&
+          F->getName() == TLI.getName(LibFunc::strncpy)) {
         return true;
       }
-      if (TLI && TLI->has(LibFunc::strcat) &&
-          F->getName() == TLI->getName(LibFunc::strcat)) {
+      if (TLI.has(LibFunc::strcat) &&
+          F->getName() == TLI.getName(LibFunc::strcat)) {
         return true;
       }
-      if (TLI && TLI->has(LibFunc::strncat) &&
-          F->getName() == TLI->getName(LibFunc::strncat)) {
+      if (TLI.has(LibFunc::strncat) &&
+          F->getName() == TLI.getName(LibFunc::strncat)) {
         return true;
       }
     }
@@ -224,9 +230,9 @@ static MemoryLocation getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
 
 /// getLocForRead - Return the location read by the specified "hasMemoryWrite"
 /// instruction if any.
-static MemoryLocation getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
-  assert(hasMemoryWrite(Inst, AA.getTargetLibraryInfo()) &&
-         "Unknown instruction case");
+static MemoryLocation getLocForRead(Instruction *Inst,
+                                    const TargetLibraryInfo &TLI) {
+  assert(hasMemoryWrite(Inst, TLI) && "Unknown instruction case");
 
   // The only instructions that both read and write are the mem transfer
   // instructions (memcpy/memmove).
@@ -313,9 +319,9 @@ static Value *getStoredPointerOperand(Instruction *I) {
 }
 
 static uint64_t getPointerSize(const Value *V, const DataLayout &DL,
-                               const TargetLibraryInfo *TLI) {
+                               const TargetLibraryInfo &TLI) {
   uint64_t Size;
-  if (getObjectSize(V, Size, DL, TLI))
+  if (getObjectSize(V, Size, DL, &TLI))
     return Size;
   return MemoryLocation::UnknownSize;
 }
@@ -336,7 +342,7 @@ namespace {
 static OverwriteResult isOverwrite(const MemoryLocation &Later,
                                    const MemoryLocation &Earlier,
                                    const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI,
+                                   const TargetLibraryInfo &TLI,
                                    int64_t &EarlierOff, int64_t &LaterOff) {
   const Value *P1 = Earlier.Ptr->stripPointerCasts();
   const Value *P2 = Later.Ptr->stripPointerCasts();
@@ -442,10 +448,12 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
 /// because the DSE inducing instruction may be a self-read.
 static bool isPossibleSelfRead(Instruction *Inst,
                                const MemoryLocation &InstStoreLoc,
-                               Instruction *DepWrite, AliasAnalysis &AA) {
+                               Instruction *DepWrite,
+                               const TargetLibraryInfo &TLI,
+                               AliasAnalysis &AA) {
   // Self reads can only happen for instructions that read memory.  Get the
   // location read.
-  MemoryLocation InstReadLoc = getLocForRead(Inst, AA);
+  MemoryLocation InstReadLoc = getLocForRead(Inst, TLI);
   if (!InstReadLoc.Ptr) return false;  // Not a reading instruction.
 
   // If the read and written loc obviously don't alias, it isn't a read.
@@ -459,7 +467,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
   // Here we don't know if A/B may alias, but we do know that B/B are must
   // aliases, so removing the first memcpy is safe (assuming it writes <= #
   // bytes as the second one.
-  MemoryLocation DepReadLoc = getLocForRead(DepWrite, AA);
+  MemoryLocation DepReadLoc = getLocForRead(DepWrite, TLI);
 
   if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
     return false;
@@ -475,11 +483,12 @@ static bool isPossibleSelfRead(Instruction *Inst,
 //===----------------------------------------------------------------------===//
 
 bool DSE::runOnBasicBlock(BasicBlock &BB) {
+  const DataLayout &DL = BB.getModule()->getDataLayout();
   bool MadeChange = false;
 
   // Do a top-down walk on the BB.
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
-    Instruction *Inst = BBI++;
+    Instruction *Inst = &*BBI++;
 
     // Handle 'free' calls specially.
     if (CallInst *F = isFreeCall(Inst, TLI)) {
@@ -488,9 +497,61 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     }
 
     // If we find something that writes memory, get its memory dependence.
-    if (!hasMemoryWrite(Inst, TLI))
+    if (!hasMemoryWrite(Inst, *TLI))
       continue;
 
+    // If we're storing the same value back to a pointer that we just
+    // loaded from, then the store can be removed.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+
+      auto RemoveDeadInstAndUpdateBBI = [&](Instruction *DeadInst) {
+        // DeleteDeadInstruction can delete the current instruction.  Save BBI
+        // in case we need it.
+        WeakVH NextInst(&*BBI);
+
+        DeleteDeadInstruction(DeadInst, *MD, *TLI);
+
+        if (!NextInst) // Next instruction deleted.
+          BBI = BB.begin();
+        else if (BBI != BB.begin()) // Revisit this instruction if possible.
+          --BBI;
+        ++NumRedundantStores;
+        MadeChange = true;
+      };
+
+      if (LoadInst *DepLoad = dyn_cast<LoadInst>(SI->getValueOperand())) {
+        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+            isRemovable(SI) &&
+            MemoryIsNotModifiedBetween(DepLoad, SI)) {
+
+          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
+                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
+
+          RemoveDeadInstAndUpdateBBI(SI);
+          continue;
+        }
+      }
+
+      // Remove null stores into the calloc'ed objects
+      Constant *StoredConstant = dyn_cast<Constant>(SI->getValueOperand());
+
+      if (StoredConstant && StoredConstant->isNullValue() &&
+          isRemovable(SI)) {
+        Instruction *UnderlyingPointer = dyn_cast<Instruction>(
+            GetUnderlyingObject(SI->getPointerOperand(), DL));
+
+        if (UnderlyingPointer && isCallocLikeFn(UnderlyingPointer, TLI) &&
+            MemoryIsNotModifiedBetween(UnderlyingPointer, SI)) {
+          DEBUG(dbgs()
+                << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
+                << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
+
+          RemoveDeadInstAndUpdateBBI(SI);
+          continue;
+        }
+      }
+    }
+
     MemDepResult InstDep = MD->getDependency(Inst);
 
     // Ignore any store where we can't find a local dependence.
@@ -498,32 +559,6 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (!InstDep.isDef() && !InstDep.isClobber())
       continue;
 
-    // If we're storing the same value back to a pointer that we just
-    // loaded from, then the store can be removed.
-    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
-        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
-            SI->getOperand(0) == DepLoad && isRemovable(SI)) {
-          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
-                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
-
-          // DeleteDeadInstruction can delete the current instruction.  Save BBI
-          // in case we need it.
-          WeakVH NextInst(BBI);
-
-          DeleteDeadInstruction(SI, *MD, TLI);
-
-          if (!NextInst)  // Next instruction deleted.
-            BBI = BB.begin();
-          else if (BBI != BB.begin())  // Revisit this instruction if possible.
-            --BBI;
-          ++NumFastStores;
-          MadeChange = true;
-          continue;
-        }
-      }
-    }
-
     // Figure out what location is being stored to.
     MemoryLocation Loc = getLocForWrite(Inst, *AA);
 
@@ -549,24 +584,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       // completely obliterated by the store to 'Loc', and c) which we know that
       // 'Inst' doesn't load from, then we can remove it.
       if (isRemovable(DepWrite) &&
-          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+          !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) {
         int64_t InstWriteOffset, DepWriteOffset;
-        const DataLayout &DL = BB.getModule()->getDataLayout();
         OverwriteResult OR =
-            isOverwrite(Loc, DepLoc, DL, AA->getTargetLibraryInfo(),
-                        DepWriteOffset, InstWriteOffset);
+            isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset);
         if (OR == OverwriteComplete) {
           DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          DeleteDeadInstruction(DepWrite, *MD, TLI);
+          DeleteDeadInstruction(DepWrite, *MD, *TLI);
           ++NumFastStores;
           MadeChange = true;
 
           // DeleteDeadInstruction can delete the current instruction in loop
           // cases, reset BBI.
-          BBI = Inst;
+          BBI = Inst->getIterator();
           if (BBI != BB.begin())
             --BBI;
           break;
@@ -609,10 +642,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
       if (DepWrite == &BB.front()) break;
 
       // Can't look past this instruction if it might read 'Loc'.
-      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+      if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
         break;
 
-      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+      InstDep = MD->getPointerDependencyFrom(Loc, false,
+                                             DepWrite->getIterator(), &BB);
     }
   }
 
@@ -624,6 +658,64 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
   return MadeChange;
 }
 
+/// Returns true if the memory which is accessed by the second instruction is not
+/// modified between the first and the second instruction.
+/// Precondition: Second instruction must be dominated by the first
+/// instruction.
+bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI,
+                                     Instruction *SecondI) {
+  SmallVector<BasicBlock *, 16> WorkList;
+  SmallPtrSet<BasicBlock *, 8> Visited;
+  BasicBlock::iterator FirstBBI(FirstI);
+  ++FirstBBI;
+  BasicBlock::iterator SecondBBI(SecondI);
+  BasicBlock *FirstBB = FirstI->getParent();
+  BasicBlock *SecondBB = SecondI->getParent();
+  MemoryLocation MemLoc = MemoryLocation::get(SecondI);
+
+  // Start checking the store-block.
+  WorkList.push_back(SecondBB);
+  bool isFirstBlock = true;
+
+  // Check all blocks going backward until we reach the load-block.
+  while (!WorkList.empty()) {
+    BasicBlock *B = WorkList.pop_back_val();
+
+    // Ignore instructions before LI if this is the FirstBB.
+    BasicBlock::iterator BI = (B == FirstBB ? FirstBBI : B->begin());
+
+    BasicBlock::iterator EI;
+    if (isFirstBlock) {
+      // Ignore instructions after SI if this is the first visit of SecondBB.
+      assert(B == SecondBB && "first block is not the store block");
+      EI = SecondBBI;
+      isFirstBlock = false;
+    } else {
+      // It's not SecondBB or (in case of a loop) the second visit of SecondBB.
+      // In this case we also have to look at instructions after SI.
+      EI = B->end();
+    }
+    for (; BI != EI; ++BI) {
+      Instruction *I = &*BI;
+      if (I->mayWriteToMemory() && I != SecondI) {
+        auto Res = AA->getModRefInfo(I, MemLoc);
+        if (Res != MRI_NoModRef)
+          return false;
+      }
+    }
+    if (B != FirstBB) {
+      assert(B != &FirstBB->getParent()->getEntryBlock() &&
+          "Should not hit the entry block because SI must be dominated by LI");
+      for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) {
+        if (!Visited.insert(*PredI).second)
+          continue;
+        WorkList.push_back(*PredI);
+      }
+    }
+  }
+  return true;
+}
+
 /// Find all blocks that will unconditionally lead to the block BB and append
 /// them to F.
 static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
@@ -655,10 +747,11 @@ bool DSE::HandleFree(CallInst *F) {
     Instruction *InstPt = BB->getTerminator();
     if (BB == F->getParent()) InstPt = F;
 
-    MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB);
+    MemDepResult Dep =
+        MD->getPointerDependencyFrom(Loc, false, InstPt->getIterator(), BB);
     while (Dep.isDef() || Dep.isClobber()) {
       Instruction *Dependency = Dep.getInst();
-      if (!hasMemoryWrite(Dependency, TLI) || !isRemovable(Dependency))
+      if (!hasMemoryWrite(Dependency, *TLI) || !isRemovable(Dependency))
         break;
 
       Value *DepPointer =
@@ -668,10 +761,10 @@ bool DSE::HandleFree(CallInst *F) {
       if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
         break;
 
-      Instruction *Next = std::next(BasicBlock::iterator(Dependency));
+      auto Next = ++Dependency->getIterator();
 
       // DCE instructions only used to calculate that store
-      DeleteDeadInstruction(Dependency, *MD, TLI);
+      DeleteDeadInstruction(Dependency, *MD, *TLI);
       ++NumFastStores;
       MadeChange = true;
 
@@ -704,23 +797,22 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
   SmallSetVector<Value*, 16> DeadStackObjects;
 
   // Find all of the alloca'd pointers in the entry block.
-  BasicBlock *Entry = BB.getParent()->begin();
-  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) {
-    if (isa<AllocaInst>(I))
-      DeadStackObjects.insert(I);
+  BasicBlock &Entry = BB.getParent()->front();
+  for (Instruction &I : Entry) {
+    if (isa<AllocaInst>(&I))
+      DeadStackObjects.insert(&I);
 
     // Okay, so these are dead heap objects, but if the pointer never escapes
     // then it's leaked by this function anyways.
-    else if (isAllocLikeFn(I, TLI) && !PointerMayBeCaptured(I, true, true))
-      DeadStackObjects.insert(I);
+    else if (isAllocLikeFn(&I, TLI) && !PointerMayBeCaptured(&I, true, true))
+      DeadStackObjects.insert(&I);
   }
 
   // Treat byval or inalloca arguments the same, stores to them are dead at the
   // end of the function.
-  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
-       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
-    if (AI->hasByValOrInAllocaAttr())
-      DeadStackObjects.insert(AI);
+  for (Argument &AI : BB.getParent()->args())
+    if (AI.hasByValOrInAllocaAttr())
+      DeadStackObjects.insert(&AI);
 
   const DataLayout &DL = BB.getModule()->getDataLayout();
 
@@ -729,10 +821,10 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     --BBI;
 
     // If we find a store, check to see if it points into a dead stack value.
-    if (hasMemoryWrite(BBI, TLI) && isRemovable(BBI)) {
+    if (hasMemoryWrite(&*BBI, *TLI) && isRemovable(&*BBI)) {
       // See through pointer-to-pointer bitcasts
       SmallVector<Value *, 4> Pointers;
-      GetUnderlyingObjects(getStoredPointerOperand(BBI), Pointers, DL);
+      GetUnderlyingObjects(getStoredPointerOperand(&*BBI), Pointers, DL);
 
       // Stores to stack values are valid candidates for removal.
       bool AllDead = true;
@@ -744,7 +836,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
         }
 
       if (AllDead) {
-        Instruction *Dead = BBI++;
+        Instruction *Dead = &*BBI++;
 
         DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
                      << *Dead << "\n  Objects: ";
@@ -757,7 +849,7 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
               dbgs() << '\n');
 
         // DCE instructions only used to calculate that store.
-        DeleteDeadInstruction(Dead, *MD, TLI, &DeadStackObjects);
+        DeleteDeadInstruction(Dead, *MD, *TLI, &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -765,9 +857,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     }
 
     // Remove any dead non-memory-mutating instructions.
-    if (isInstructionTriviallyDead(BBI, TLI)) {
-      Instruction *Inst = BBI++;
-      DeleteDeadInstruction(Inst, *MD, TLI, &DeadStackObjects);
+    if (isInstructionTriviallyDead(&*BBI, TLI)) {
+      Instruction *Inst = &*BBI++;
+      DeleteDeadInstruction(Inst, *MD, *TLI, &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -776,15 +868,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
     if (isa<AllocaInst>(BBI)) {
       // Remove allocas from the list of dead stack objects; there can't be
       // any references before the definition.
-      DeadStackObjects.remove(BBI);
+      DeadStackObjects.remove(&*BBI);
       continue;
     }
 
-    if (auto CS = CallSite(BBI)) {
+    if (auto CS = CallSite(&*BBI)) {
       // Remove allocation function calls from the list of dead stack objects; 
       // there can't be any references before the definition.
-      if (isAllocLikeFn(BBI, TLI))
-        DeadStackObjects.remove(BBI);
+      if (isAllocLikeFn(&*BBI, TLI))
+        DeadStackObjects.remove(&*BBI);
 
       // If this call does not access memory, it can't be loading any of our
       // pointers.
@@ -795,10 +887,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
       // the call is live.
       DeadStackObjects.remove_if([&](Value *I) {
         // See if the call site touches the value.
-        AliasAnalysis::ModRefResult A = AA->getModRefInfo(
-            CS, I, getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+        ModRefInfo A = AA->getModRefInfo(CS, I, getPointerSize(I, DL, *TLI));
 
-        return A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref;
+        return A == MRI_ModRef || A == MRI_Ref;
       });
 
       // If all of the allocas were clobbered by the call then we're not going
@@ -864,8 +955,7 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
   // Remove objects that could alias LoadedLoc.
   DeadStackObjects.remove_if([&](Value *I) {
     // See if the loaded location could alias the stack location.
-    MemoryLocation StackLoc(I,
-                            getPointerSize(I, DL, AA->getTargetLibraryInfo()));
+    MemoryLocation StackLoc(I, getPointerSize(I, DL, *TLI));
     return !AA->isNoAlias(StackLoc, LoadedLoc);
   });
 }
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index 029b44c2ea80..7ef062e71ff3 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/ScopedHashTable.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -263,7 +264,6 @@ namespace {
 /// expected that a later pass of GVN will catch the interesting/hard cases.
 class EarlyCSE {
 public:
-  Function &F;
   const TargetLibraryInfo &TLI;
   const TargetTransformInfo &TTI;
   DominatorTree &DT;
@@ -281,20 +281,37 @@ public:
   /// that dominated values can succeed in their lookup.
   ScopedHTType AvailableValues;
 
-  /// \brief A scoped hash table of the current values of loads.
+  /// A scoped hash table of the current values of previously encounted memory
+  /// locations.
   ///
-  /// This allows us to get efficient access to dominating loads when we have
-  /// a fully redundant load.  In addition to the most recent load, we keep
-  /// track of a generation count of the read, which is compared against the
-  /// current generation count.  The current generation count is incremented
+  /// This allows us to get efficient access to dominating loads or stores when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is incremented
   /// after every possibly writing memory operation, which ensures that we only
-  /// CSE loads with other loads that have no intervening store.
-  typedef RecyclingAllocator<
-      BumpPtrAllocator,
-      ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
+  /// CSE loads with other loads that have no intervening store.  Ordering
+  /// events (such as fences or atomic instructions) increment the generation
+  /// count as well; essentially, we model these as writes to all possible
+  /// locations.  Note that atomic and/or volatile loads and stores can be
+  /// present the table; it is the responsibility of the consumer to inspect
+  /// the atomicity/volatility if needed.
+  struct LoadValue {
+    Value *Data;
+    unsigned Generation;
+    int MatchingId;
+    bool IsAtomic;
+    LoadValue()
+      : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
+    LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
+              bool IsAtomic)
+      : Data(Data), Generation(Generation), MatchingId(MatchingId),
+        IsAtomic(IsAtomic) {}
+  };
+  typedef RecyclingAllocator<BumpPtrAllocator,
+                             ScopedHashTableVal<Value *, LoadValue>>
       LoadMapAllocator;
-  typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
-                          DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
+  typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+                          LoadMapAllocator> LoadHTType;
   LoadHTType AvailableLoads;
 
   /// \brief A scoped hash table of the current values of read-only call
@@ -308,10 +325,9 @@ public:
   unsigned CurrentGeneration;
 
   /// \brief Set up the EarlyCSE runner for a particular function.
-  EarlyCSE(Function &F, const TargetLibraryInfo &TLI,
-           const TargetTransformInfo &TTI, DominatorTree &DT,
-           AssumptionCache &AC)
-      : F(F), TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+  EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
+           DominatorTree &DT, AssumptionCache &AC)
+      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
 
   bool run();
 
@@ -382,57 +398,91 @@ private:
   class ParseMemoryInst {
   public:
     ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
-        : Load(false), Store(false), Vol(false), MayReadFromMemory(false),
-          MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) {
-      MayReadFromMemory = Inst->mayReadFromMemory();
-      MayWriteToMemory = Inst->mayWriteToMemory();
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
-        MemIntrinsicInfo Info;
-        if (!TTI.getTgtMemIntrinsic(II, Info))
-          return;
-        if (Info.NumMemRefs == 1) {
-          Store = Info.WriteMem;
-          Load = Info.ReadMem;
-          MatchingId = Info.MatchingId;
-          MayReadFromMemory = Info.ReadMem;
-          MayWriteToMemory = Info.WriteMem;
-          Vol = Info.Vol;
-          Ptr = Info.PtrVal;
-        }
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Load = true;
-        Vol = !LI->isSimple();
-        Ptr = LI->getPointerOperand();
-      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        Store = true;
-        Vol = !SI->isSimple();
-        Ptr = SI->getPointerOperand();
+      : IsTargetMemInst(false), Inst(Inst) {
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+        if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
+          IsTargetMemInst = true;
+    }
+    bool isLoad() const {
+      if (IsTargetMemInst) return Info.ReadMem;
+      return isa<LoadInst>(Inst);
+    }
+    bool isStore() const {
+      if (IsTargetMemInst) return Info.WriteMem;
+      return isa<StoreInst>(Inst);
+    }
+    bool isAtomic() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return false;
       }
+      return Inst->isAtomic();
     }
-    bool isLoad() { return Load; }
-    bool isStore() { return Store; }
-    bool isVolatile() { return Vol; }
-    bool isMatchingMemLoc(const ParseMemoryInst &Inst) {
-      return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId;
+    bool isUnordered() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return true;
+      }
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->isUnordered();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->isUnordered();
+      }
+      // Conservative answer
+      return !Inst->isAtomic();
     }
-    bool isValid() { return Ptr != nullptr; }
-    int getMatchingId() { return MatchingId; }
-    Value *getPtr() { return Ptr; }
-    bool mayReadFromMemory() { return MayReadFromMemory; }
-    bool mayWriteToMemory() { return MayWriteToMemory; }
 
-  private:
-    bool Load;
-    bool Store;
-    bool Vol;
-    bool MayReadFromMemory;
-    bool MayWriteToMemory;
+    bool isVolatile() const {
+      if (IsTargetMemInst) {
+        assert(Info.IsSimple && "need to refine IsSimple in TTI");
+        return false;
+      }
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->isVolatile();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->isVolatile();
+      }
+      // Conservative answer
+      return true;
+    }
+
+    
+    bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+      return (getPointerOperand() == Inst.getPointerOperand() &&
+              getMatchingId() == Inst.getMatchingId());
+    }
+    bool isValid() const { return getPointerOperand() != nullptr; }
+
     // For regular (non-intrinsic) loads/stores, this is set to -1. For
     // intrinsic loads/stores, the id is retrieved from the corresponding
     // field in the MemIntrinsicInfo structure.  That field contains
     // non-negative values only.
-    int MatchingId;
-    Value *Ptr;
+    int getMatchingId() const {
+      if (IsTargetMemInst) return Info.MatchingId;
+      return -1;
+    }
+    Value *getPointerOperand() const {
+      if (IsTargetMemInst) return Info.PtrVal;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->getPointerOperand();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->getPointerOperand();
+      }
+      return nullptr;
+    }
+    bool mayReadFromMemory() const {
+      if (IsTargetMemInst) return Info.ReadMem;
+      return Inst->mayReadFromMemory();
+    }
+    bool mayWriteToMemory() const {
+      if (IsTargetMemInst) return Info.WriteMem;
+      return Inst->mayWriteToMemory();
+    }
+
+  private:
+    bool IsTargetMemInst;
+    MemIntrinsicInfo Info;
+    Instruction *Inst;
   };
 
   bool processNode(DomTreeNode *Node);
@@ -497,7 +547,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
   // See if any instructions in the block can be eliminated.  If so, do it.  If
   // not, add them to AvailableValues.
   for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
-    Instruction *Inst = I++;
+    Instruction *Inst = &*I++;
 
     // Dead instructions should just be removed.
     if (isInstructionTriviallyDead(Inst, &TLI)) {
@@ -548,24 +598,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     ParseMemoryInst MemInst(Inst, TTI);
     // If this is a non-volatile load, process it.
     if (MemInst.isValid() && MemInst.isLoad()) {
-      // Ignore volatile loads.
-      if (MemInst.isVolatile()) {
+      // (conservatively) we can't peak past the ordering implied by this
+      // operation, but we can add this load to our set of available values
+      if (MemInst.isVolatile() || !MemInst.isUnordered()) {
         LastStore = nullptr;
-        // Don't CSE across synchronization boundaries.
-        if (Inst->mayWriteToMemory())
-          ++CurrentGeneration;
-        continue;
+        ++CurrentGeneration;
       }
 
       // If we have an available version of this load, and if it is the right
       // generation, replace this instruction.
-      std::pair<Value *, unsigned> InVal =
-          AvailableLoads.lookup(MemInst.getPtr());
-      if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
-        Value *Op = getOrCreateResult(InVal.first, Inst->getType());
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing loads with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered() &&
+          // We can't replace an atomic load with one which isn't also atomic.
+          InVal.IsAtomic >= MemInst.isAtomic()) {
+        Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
         if (Op != nullptr) {
           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
-                       << "  to: " << *InVal.first << '\n');
+                       << "  to: " << *InVal.Data << '\n');
           if (!Inst->use_empty())
             Inst->replaceAllUsesWith(Op);
           Inst->eraseFromParent();
@@ -576,8 +628,10 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       }
 
       // Otherwise, remember that we have this instruction.
-      AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
-                                                  Inst, CurrentGeneration));
+      AvailableLoads.insert(
+          MemInst.getPointerOperand(),
+          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                    MemInst.isAtomic()));
       LastStore = nullptr;
       continue;
     }
@@ -613,6 +667,44 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
+    // A release fence requires that all stores complete before it, but does
+    // not prevent the reordering of following loads 'before' the fence.  As a
+    // result, we don't need to consider it as writing to memory and don't need
+    // to advance the generation.  We do need to prevent DSE across the fence,
+    // but that's handled above.
+    if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+      if (FI->getOrdering() == Release) {
+        assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+        continue;
+      }
+
+    // write back DSE - If we write back the same value we just loaded from
+    // the same location and haven't passed any intervening writes or ordering
+    // operations, we can remove the write.  The primary benefit is in allowing
+    // the available load table to remain valid and value forward past where
+    // the store originally was.
+    if (MemInst.isValid() && MemInst.isStore()) {
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.Data &&
+          InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
+          InVal.Generation == CurrentGeneration &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing stores with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered()) {
+        assert((!LastStore ||
+                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+                MemInst.getPointerOperand()) &&
+               "can't have an intervening store!");
+        DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumDSE;
+        // We can avoid incrementing the generation count since we were able
+        // to eliminate this store.
+        continue;
+      }
+    }
+
     // Okay, this isn't something we can CSE at all.  Check to see if it is
     // something that could modify memory.  If so, our available memory values
     // cannot be used so bump the generation count.
@@ -622,8 +714,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       if (MemInst.isValid() && MemInst.isStore()) {
         // We do a trivial form of DSE if there are two stores to the same
         // location with no intervening loads.  Delete the earlier store.
+        // At the moment, we don't remove ordered stores, but do remove
+        // unordered atomic stores.  There's no special requirement (for
+        // unordered atomics) about removing atomic stores only in favor of
+        // other atomic stores since we we're going to execute the non-atomic
+        // one anyway and the atomic one might never have become visible.
         if (LastStore) {
           ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+          assert(LastStoreMemInst.isUnordered() &&
+                 !LastStoreMemInst.isVolatile() &&
+                 "Violated invariant");
           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
                          << "  due to: " << *Inst << '\n');
@@ -640,12 +740,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         // version of the pointer.  It is safe to forward from volatile stores
         // to non-volatile loads, so we don't have to check for volatility of
         // the store.
-        AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
-                                                    Inst, CurrentGeneration));
+        AvailableLoads.insert(
+            MemInst.getPointerOperand(),
+            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                      MemInst.isAtomic()));
 
-        // Remember that this was the last store we saw for DSE.
-        if (!MemInst.isVolatile())
+        // Remember that this was the last unordered store we saw for DSE. We
+        // don't yet handle DSE on ordered or volatile stores since we don't
+        // have a good way to model the ordering requirement for following
+        // passes  once the store is removed.  We could insert a fence, but
+        // since fences are slightly stronger than stores in their ordering,
+        // it's not clear this is a profitable transform. Another option would
+        // be to merge the ordering with that of the post dominating store.
+        if (MemInst.isUnordered() && !MemInst.isVolatile())
           LastStore = Inst;
+        else
+          LastStore = nullptr;
       }
     }
   }
@@ -714,7 +824,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
   auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
   auto &AC = AM->getResult<AssumptionAnalysis>(F);
 
-  EarlyCSE CSE(F, TLI, TTI, DT, AC);
+  EarlyCSE CSE(TLI, TTI, DT, AC);
 
   if (!CSE.run())
     return PreservedAnalyses::all();
@@ -751,7 +861,7 @@ public:
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
 
-    EarlyCSE CSE(F, TLI, TTI, DT, AC);
+    EarlyCSE CSE(TLI, TTI, DT, AC);
 
     return CSE.run();
   }
@@ -761,6 +871,7 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
     AU.setPreservesCFG();
   }
 };
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index 0430c1898c8d..185cdbdda378 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -30,7 +30,7 @@ public:
   bool runOnFunction(Function &F) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<AAResultsWrapperPass>();
   }
 
 private:
@@ -41,7 +41,7 @@ private:
 char FlattenCFGPass::ID = 0;
 INITIALIZE_PASS_BEGIN(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
                       false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(FlattenCFGPass, "flattencfg", "Flatten the CFG", false,
                     false)
 
@@ -59,7 +59,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
     // Loop over all of the basic blocks and remove them if they are unneeded...
     //
     for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
-      if (FlattenCFG(BBIt++, AA)) {
+      if (FlattenCFG(&*BBIt++, AA)) {
         LocalChange = true;
       }
     }
@@ -69,7 +69,7 @@ static bool iterativelyFlattenCFG(Function &F, AliasAnalysis *AA) {
 }
 
 bool FlattenCFGPass::runOnFunction(Function &F) {
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   bool EverChanged = false;
   // iterativelyFlattenCFG can make some blocks dead.
   while (iterativelyFlattenCFG(F, AA)) {
diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp
index c9314229c38b..7f5d78656b50 100644
--- a/lib/Transforms/Scalar/Float2Int.cpp
+++ b/lib/Transforms/Scalar/Float2Int.cpp
@@ -19,6 +19,8 @@
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/IRBuilder.h"
@@ -41,7 +43,7 @@ using namespace llvm;
 // integer domain inputs, produce an integer output; fadd, for example.
 //
 // If a non-mappable instruction is seen, this entire def-use graph is marked
-// as non-transformable. If we see an instruction that converts from the 
+// as non-transformable. If we see an instruction that converts from the
 // integer domain to FP domain (uitofp,sitofp), we terminate our walk.
 
 /// The largest integer type worth dealing with.
@@ -60,6 +62,7 @@ namespace {
     bool runOnFunction(Function &F) override;
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
 
     void findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots);
@@ -82,7 +85,9 @@ namespace {
 }
 
 char Float2Int::ID = 0;
-INITIALIZE_PASS(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_BEGIN(Float2Int, "float2int", "Float to int", false, false)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(Float2Int, "float2int", "Float to int", false, false)
 
 // Given a FCmp predicate, return a matching ICmp predicate if one
 // exists, otherwise return BAD_ICMP_PREDICATE.
@@ -125,7 +130,9 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
 // Find the roots - instructions that convert from the FP domain to
 // integer domain.
 void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
-  for (auto &I : inst_range(F)) {
+  for (auto &I : instructions(F)) {
+    if (isa<VectorType>(I.getType()))
+      continue;
     switch (I.getOpcode()) {
     default: break;
     case Instruction::FPToUI:
@@ -133,7 +140,7 @@ void Float2Int::findRoots(Function &F, SmallPtrSet<Instruction*,8> &Roots) {
       Roots.insert(&I);
       break;
     case Instruction::FCmp:
-      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) != 
+      if (mapFCmpPred(cast<CmpInst>(&I)->getPredicate()) !=
           CmpInst::BAD_ICMP_PREDICATE)
         Roots.insert(&I);
       break;
@@ -176,7 +183,7 @@ ConstantRange Float2Int::validateRange(ConstantRange R) {
 //   - walkForwards:  Iterate over SeenInsts in reverse order, so we visit
 //                     defs before their uses. Calculate the real range info.
 
-// Breadth-first walk of the use-def graph; determine the set of nodes 
+// Breadth-first walk of the use-def graph; determine the set of nodes
 // we care about and eagerly determine if some of them are poisonous.
 void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
   std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
@@ -222,14 +229,14 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
       seen(I, unknownRange());
       break;
     }
-  
+
     for (Value *O : I->operands()) {
       if (Instruction *OI = dyn_cast<Instruction>(O)) {
         // Unify def-use chains if they interfere.
         ECs.unionSets(I, OI);
-	if (SeenInsts.find(I)->second != badRange())
+        if (SeenInsts.find(I)->second != badRange())
           Worklist.push_back(OI);
-      } else if (!isa<ConstantFP>(O)) {      
+      } else if (!isa<ConstantFP>(O)) {
         // Not an instruction or ConstantFP? we can't do anything.
         seen(I, badRange());
       }
@@ -240,11 +247,11 @@ void Float2Int::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
 // Walk forwards down the list of seen instructions, so we visit defs before
 // uses.
 void Float2Int::walkForwards() {
-  for (auto It = SeenInsts.rbegin(), E = SeenInsts.rend(); It != E; ++It) {
-    if (It->second != unknownRange())
+  for (auto &It : make_range(SeenInsts.rbegin(), SeenInsts.rend())) {
+    if (It.second != unknownRange())
       continue;
 
-    Instruction *I = It->first;
+    Instruction *I = It.first;
     std::function<ConstantRange(ArrayRef<ConstantRange>)> Op;
     switch (I->getOpcode()) {
       // FIXME: Handle select and phi nodes.
@@ -299,7 +306,7 @@ void Float2Int::walkForwards() {
     for (Value *O : I->operands()) {
       if (Instruction *OI = dyn_cast<Instruction>(O)) {
         assert(SeenInsts.find(OI) != SeenInsts.end() &&
-	       "def not seen before use!");
+               "def not seen before use!");
         OpRanges.push_back(SeenInsts.find(OI)->second);
       } else if (ConstantFP *CF = dyn_cast<ConstantFP>(O)) {
         // Work out if the floating point number can be losslessly represented
@@ -314,11 +321,11 @@ void Float2Int::walkForwards() {
         APFloat F = CF->getValueAPF();
 
         // First, weed out obviously incorrect values. Non-finite numbers
-        // can't be represented and neither can negative zero, unless 
+        // can't be represented and neither can negative zero, unless
         // we're in fast math mode.
         if (!F.isFinite() ||
             (F.isZero() && F.isNegative() && isa<FPMathOperator>(I) &&
-	     !I->hasNoSignedZeros())) {
+             !I->hasNoSignedZeros())) {
           seen(I, badRange());
           Abort = true;
           break;
@@ -345,7 +352,7 @@ void Float2Int::walkForwards() {
 
     // Reduce the operands' ranges to a single range and return.
     if (!Abort)
-      seen(I, Op(OpRanges));    
+      seen(I, Op(OpRanges));
   }
 }
 
@@ -395,7 +402,7 @@ bool Float2Int::validateAndTransform() {
         R.isFullSet() || R.isSignWrappedSet())
       continue;
     assert(ConvertedToTy && "Must have set the convertedtoty by this point!");
-    
+
     // The number of bits required is the maximum of the upper and
     // lower limits, plus one so it can be signed.
     unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
@@ -505,9 +512,8 @@ Value *Float2Int::convert(Instruction *I, Type *ToTy) {
 
 // Perform dead code elimination on the instructions we just modified.
 void Float2Int::cleanup() {
-  for (auto I = ConvertedInsts.rbegin(), E = ConvertedInsts.rend();
-       I != E; ++I)
-    I->first->eraseFromParent();
+  for (auto &I : make_range(ConvertedInsts.rbegin(), ConvertedInsts.rend()))
+    I.first->eraseFromParent();
 }
 
 bool Float2Int::runOnFunction(Function &F) {
@@ -534,7 +540,4 @@ bool Float2Int::runOnFunction(Function &F) {
   return Modified;
 }
 
-FunctionPass *llvm::createFloat2IntPass() {
-  return new Float2Int();
-}
-
+FunctionPass *llvm::createFloat2IntPass() { return new Float2Int(); }
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 89a0d0af93be..a028b8c444ba 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -128,6 +129,7 @@ namespace {
     uint32_t lookup(Value *V) const;
     uint32_t lookup_or_add_cmp(unsigned Opcode, CmpInst::Predicate Pred,
                                Value *LHS, Value *RHS);
+    bool exists(Value *V) const;
     void add(Value *V, uint32_t num);
     void clear();
     void erase(Value *v);
@@ -388,6 +390,9 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
   }
 }
 
+/// Returns true if a value number exists for the specified value.
+bool ValueTable::exists(Value *V) const { return valueNumbering.count(V) != 0; }
+
 /// lookup_or_add - Returns the value number for the specified value, assigning
 /// it a new number if it did not have one before.
 uint32_t ValueTable::lookup_or_add(Value *V) {
@@ -608,6 +613,10 @@ namespace {
     DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
     BumpPtrAllocator TableAllocator;
 
+    // Block-local map of equivalent values to their leader, does not
+    // propagate to any successors. Entries added mid-block are applied
+    // to the remaining instructions in the block.
+    SmallMapVector<llvm::Value *, llvm::Constant *, 4> ReplaceWithConstMap;
     SmallVector<Instruction*, 8> InstrsToErase;
 
     typedef SmallVector<NonLocalDepResult, 64> LoadDepVect;
@@ -689,16 +698,17 @@ namespace {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       if (!NoLoads)
         AU.addRequired<MemoryDependenceAnalysis>();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
 
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
 
 
-    // Helper fuctions of redundant load elimination 
+    // Helper functions of redundant load elimination 
     bool processLoad(LoadInst *L);
     bool processNonLocalLoad(LoadInst *L);
+    bool processAssumeIntrinsic(IntrinsicInst *II);
     void AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, 
                                  AvailValInBlkVect &ValuesPerBlock,
                                  UnavailBlkVect &UnavailableBlocks);
@@ -719,7 +729,9 @@ namespace {
     void verifyRemoved(const Instruction *I) const;
     bool splitCriticalEdges();
     BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
-    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root);
+    bool replaceOperandsWithConsts(Instruction *I) const;
+    bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+                           bool DominatesByEdge);
     bool processFoldableCondBr(BranchInst *BI);
     void addDeadBlock(BasicBlock *BB);
     void assignValNumForDeadCode();
@@ -738,7 +750,8 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1290,8 +1303,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   SSAUpdater SSAUpdate(&NewPHIs);
   SSAUpdate.Initialize(LI->getType(), LI->getName());
 
-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+  for (const AvailableValueInBlock &AV : ValuesPerBlock) {
     BasicBlock *BB = AV.BB;
 
     if (SSAUpdate.HasValueForBlock(BB))
@@ -1301,24 +1313,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   }
 
   // Perform PHI construction.
-  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
-
-  // If new PHI nodes were created, notify alias analysis.
-  if (V->getType()->getScalarType()->isPointerTy()) {
-    AliasAnalysis *AA = gvn.getAliasAnalysis();
-
-    // Scan the new PHIs and inform alias analysis that we've added potentially
-    // escaping uses to any values that are operands to these PHIs.
-    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
-      PHINode *P = NewPHIs[i];
-      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
-        unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
-        AA->addEscapingUse(P->getOperandUse(jj));
-      }
-    }
-  }
-
-  return V;
+  return SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
 }
 
 Value *AvailableValueInBlock::MaterializeAdjustedValue(LoadInst *LI,
@@ -1518,9 +1513,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // that we only have to insert *one* load (which means we're basically moving
   // the load, not inserting a new one).
 
-  SmallPtrSet<BasicBlock *, 4> Blockers;
-  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
-    Blockers.insert(UnavailableBlocks[i]);
+  SmallPtrSet<BasicBlock *, 4> Blockers(UnavailableBlocks.begin(),
+                                        UnavailableBlocks.end());
 
   // Let's find the first basic block with more than one predecessor.  Walk
   // backwards through predecessors if needed.
@@ -1550,15 +1544,22 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // available.
   MapVector<BasicBlock *, Value *> PredLoads;
   DenseMap<BasicBlock*, char> FullyAvailableBlocks;
-  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
-  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
-    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+  for (const AvailableValueInBlock &AV : ValuesPerBlock)
+    FullyAvailableBlocks[AV.BB] = true;
+  for (BasicBlock *UnavailableBB : UnavailableBlocks)
+    FullyAvailableBlocks[UnavailableBB] = false;
 
   SmallVector<BasicBlock *, 4> CriticalEdgePred;
-  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
-       PI != E; ++PI) {
-    BasicBlock *Pred = *PI;
+  for (BasicBlock *Pred : predecessors(LoadBB)) {
+    // If any predecessor block is an EH pad that does not allow non-PHI
+    // instructions before the terminator, we can't PRE the load.
+    if (Pred->getTerminator()->isEHPad()) {
+      DEBUG(dbgs()
+            << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD PREDECESSOR '"
+            << Pred->getName() << "': " << *LI << '\n');
+      return false;
+    }
+
     if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
       continue;
     }
@@ -1570,9 +1571,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
         return false;
       }
 
-      if (LoadBB->isLandingPad()) {
+      if (LoadBB->isEHPad()) {
         DEBUG(dbgs()
-              << "COULD NOT PRE LOAD BECAUSE OF LANDING PAD CRITICAL EDGE '"
+              << "COULD NOT PRE LOAD BECAUSE OF AN EH PAD CRITICAL EDGE '"
               << Pred->getName() << "': " << *LI << '\n');
         return false;
       }
@@ -1655,12 +1656,12 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
                  << *NewInsts.back() << '\n');
 
   // Assign value numbers to the new instructions.
-  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+  for (Instruction *I : NewInsts) {
     // FIXME: We really _ought_ to insert these value numbers into their
     // parent's availability map.  However, in doing so, we risk getting into
     // ordering issues.  If a block hasn't been processed yet, we would be
     // marking a value as AVAIL-IN, which isn't what we intend.
-    VN.lookup_or_add(NewInsts[i]);
+    VN.lookup_or_add(I);
   }
 
   for (const auto &PredLoad : PredLoads) {
@@ -1677,6 +1678,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
     if (Tags)
       NewLoad->setAAMetadata(Tags);
 
+    if (auto *MD = LI->getMetadata(LLVMContext::MD_invariant_load))
+      NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD);
+    if (auto *InvGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group))
+      NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD);
+
     // Transfer DebugLoc.
     NewLoad->setDebugLoc(LI->getDebugLoc());
 
@@ -1704,6 +1710,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
 /// Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI) {
+  // non-local speculations are not allowed under asan.
+  if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeAddress))
+    return false;
+
   // Step 1: Find the non-local dependencies of the load.
   LoadDepVect Deps;
   MD->getNonLocalPointerDependency(LI, Deps);
@@ -1777,6 +1787,63 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
   return PerformLoadPRE(LI, ValuesPerBlock, UnavailableBlocks);
 }
 
+bool GVN::processAssumeIntrinsic(IntrinsicInst *IntrinsicI) {
+  assert(IntrinsicI->getIntrinsicID() == Intrinsic::assume &&
+         "This function can only be called with llvm.assume intrinsic");
+  Value *V = IntrinsicI->getArgOperand(0);
+
+  if (ConstantInt *Cond = dyn_cast<ConstantInt>(V)) {
+    if (Cond->isZero()) {
+      Type *Int8Ty = Type::getInt8Ty(V->getContext());
+      // Insert a new store to null instruction before the load to indicate that
+      // this code is not reachable.  FIXME: We could insert unreachable
+      // instruction directly because we can modify the CFG.
+      new StoreInst(UndefValue::get(Int8Ty),
+                    Constant::getNullValue(Int8Ty->getPointerTo()),
+                    IntrinsicI);
+    }
+    markInstructionForDeletion(IntrinsicI);
+    return false;
+  }
+
+  Constant *True = ConstantInt::getTrue(V->getContext());
+  bool Changed = false;
+
+  for (BasicBlock *Successor : successors(IntrinsicI->getParent())) {
+    BasicBlockEdge Edge(IntrinsicI->getParent(), Successor);
+
+    // This property is only true in dominated successors, propagateEquality
+    // will check dominance for us.
+    Changed |= propagateEquality(V, True, Edge, false);
+  }
+
+  // We can replace assume value with true, which covers cases like this:
+  // call void @llvm.assume(i1 %cmp)
+  // br i1 %cmp, label %bb1, label %bb2 ; will change %cmp to true
+  ReplaceWithConstMap[V] = True;
+
+  // If one of *cmp *eq operand is const, adding it to map will cover this:
+  // %cmp = fcmp oeq float 3.000000e+00, %0 ; const on lhs could happen
+  // call void @llvm.assume(i1 %cmp)
+  // ret float %0 ; will change it to ret float 3.000000e+00
+  if (auto *CmpI = dyn_cast<CmpInst>(V)) {
+    if (CmpI->getPredicate() == CmpInst::Predicate::ICMP_EQ ||
+        CmpI->getPredicate() == CmpInst::Predicate::FCMP_OEQ ||
+        (CmpI->getPredicate() == CmpInst::Predicate::FCMP_UEQ &&
+         CmpI->getFastMathFlags().noNaNs())) {
+      Value *CmpLHS = CmpI->getOperand(0);
+      Value *CmpRHS = CmpI->getOperand(1);
+      if (isa<Constant>(CmpLHS))
+        std::swap(CmpLHS, CmpRHS);
+      auto *RHSConst = dyn_cast<Constant>(CmpRHS);
+
+      // If only one operand is constant.
+      if (RHSConst != nullptr && !isa<Constant>(CmpLHS))
+        ReplaceWithConstMap[CmpLHS] = RHSConst;
+    }
+  }
+  return Changed;
+}
 
 static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   // Patch the replacement so that it is not more restrictive than the value
@@ -1789,7 +1856,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
   if (Instruction *ReplInst = dyn_cast<Instruction>(Repl)) {
     // FIXME: If both the original and replacement value are part of the
     // same control-flow region (meaning that the execution of one
-    // guarentees the executation of the other), then we can combine the
+    // guarantees the execution of the other), then we can combine the
     // noalias scopes here and do better than the general conservative
     // answer used in combineMetadata().
 
@@ -1797,13 +1864,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
     // regions, and so we need a conservative combination of the noalias
     // scopes.
     static const unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,
-      LLVMContext::MD_alias_scope,
-      LLVMContext::MD_noalias,
-      LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,
-      LLVMContext::MD_invariant_load,
-    };
+        LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
+        LLVMContext::MD_noalias,        LLVMContext::MD_range,
+        LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
+        LLVMContext::MD_invariant_group};
     combineMetadata(ReplInst, I, KnownIDs);
   }
 }
@@ -1890,10 +1954,8 @@ bool GVN::processLoad(LoadInst *L) {
       ++NumGVNLoad;
       return true;
     }
-  }
 
-  // If the value isn't available, don't do anything!
-  if (Dep.isClobber()) {
+    // If the value isn't available, don't do anything!
     DEBUG(
       // fast print dep, using operator<< on instruction is too slow.
       dbgs() << "GVN: load ";
@@ -2049,11 +2111,31 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
   return Pred != nullptr;
 }
 
+// Tries to replace instruction with const, using information from
+// ReplaceWithConstMap.
+bool GVN::replaceOperandsWithConsts(Instruction *Instr) const {
+  bool Changed = false;
+  for (unsigned OpNum = 0; OpNum < Instr->getNumOperands(); ++OpNum) {
+    Value *Operand = Instr->getOperand(OpNum);
+    auto it = ReplaceWithConstMap.find(Operand);
+    if (it != ReplaceWithConstMap.end()) {
+      assert(!isa<Constant>(Operand) &&
+             "Replacing constants with constants is invalid");
+      DEBUG(dbgs() << "GVN replacing: " << *Operand << " with " << *it->second
+                   << " in instruction " << *Instr << '\n');
+      Instr->setOperand(OpNum, it->second);
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
 /// The given values are known to be equal in every block
 /// dominated by 'Root'.  Exploit this, for example by replacing 'LHS' with
 /// 'RHS' everywhere in the scope.  Returns whether a change was made.
-bool GVN::propagateEquality(Value *LHS, Value *RHS,
-                            const BasicBlockEdge &Root) {
+/// If DominatesByEdge is false, then it means that it is dominated by Root.End.
+bool GVN::propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root,
+                            bool DominatesByEdge) {
   SmallVector<std::pair<Value*, Value*>, 4> Worklist;
   Worklist.push_back(std::make_pair(LHS, RHS));
   bool Changed = false;
@@ -2065,11 +2147,13 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
     std::pair<Value*, Value*> Item = Worklist.pop_back_val();
     LHS = Item.first; RHS = Item.second;
 
-    if (LHS == RHS) continue;
+    if (LHS == RHS)
+      continue;
     assert(LHS->getType() == RHS->getType() && "Equality but unequal types!");
 
     // Don't try to propagate equalities between constants.
-    if (isa<Constant>(LHS) && isa<Constant>(RHS)) continue;
+    if (isa<Constant>(LHS) && isa<Constant>(RHS))
+      continue;
 
     // Prefer a constant on the right-hand side, or an Argument if no constants.
     if (isa<Constant>(LHS) || (isa<Argument>(LHS) && !isa<Constant>(RHS)))
@@ -2108,7 +2192,11 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
     // LHS always has at least one use that is not dominated by Root, this will
     // never do anything if LHS has only one use.
     if (!LHS->hasOneUse()) {
-      unsigned NumReplacements = replaceDominatedUsesWith(LHS, RHS, *DT, Root);
+      unsigned NumReplacements =
+          DominatesByEdge
+              ? replaceDominatedUsesWith(LHS, RHS, *DT, Root)
+              : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getEnd());
+
       Changed |= NumReplacements > 0;
       NumGVNEqProp += NumReplacements;
     }
@@ -2180,7 +2268,10 @@ bool GVN::propagateEquality(Value *LHS, Value *RHS,
         Value *NotCmp = findLeader(Root.getEnd(), Num);
         if (NotCmp && isa<Instruction>(NotCmp)) {
           unsigned NumReplacements =
-            replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root);
+              DominatesByEdge
+                  ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root)
+                  : replaceDominatedUsesWith(NotCmp, NotVal, *DT,
+                                             Root.getEnd());
           Changed |= NumReplacements > 0;
           NumGVNEqProp += NumReplacements;
         }
@@ -2220,6 +2311,10 @@ bool GVN::processInstruction(Instruction *I) {
     return true;
   }
 
+  if (IntrinsicInst *IntrinsicI = dyn_cast<IntrinsicInst>(I))
+    if (IntrinsicI->getIntrinsicID() == Intrinsic::assume)
+      return processAssumeIntrinsic(IntrinsicI);
+
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (processLoad(LI))
       return true;
@@ -2250,11 +2345,11 @@ bool GVN::processInstruction(Instruction *I) {
 
     Value *TrueVal = ConstantInt::getTrue(TrueSucc->getContext());
     BasicBlockEdge TrueE(Parent, TrueSucc);
-    Changed |= propagateEquality(BranchCond, TrueVal, TrueE);
+    Changed |= propagateEquality(BranchCond, TrueVal, TrueE, true);
 
     Value *FalseVal = ConstantInt::getFalse(FalseSucc->getContext());
     BasicBlockEdge FalseE(Parent, FalseSucc);
-    Changed |= propagateEquality(BranchCond, FalseVal, FalseE);
+    Changed |= propagateEquality(BranchCond, FalseVal, FalseE, true);
 
     return Changed;
   }
@@ -2276,7 +2371,7 @@ bool GVN::processInstruction(Instruction *I) {
       // If there is only a single edge, propagate the case value into it.
       if (SwitchEdges.lookup(Dst) == 1) {
         BasicBlockEdge E(Parent, Dst);
-        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E);
+        Changed |= propagateEquality(SwitchCond, i.getCaseValue(), E, true);
       }
     }
     return Changed;
@@ -2284,7 +2379,8 @@ bool GVN::processInstruction(Instruction *I) {
 
   // Instructions with void type don't return a value, so there's
   // no point in trying to find redundancies in them.
-  if (I->getType()->isVoidTy()) return false;
+  if (I->getType()->isVoidTy())
+    return false;
 
   uint32_t NextNum = VN.getNextUnusedValueNumber();
   unsigned Num = VN.lookup_or_add(I);
@@ -2306,17 +2402,21 @@ bool GVN::processInstruction(Instruction *I) {
 
   // Perform fast-path value-number based elimination of values inherited from
   // dominators.
-  Value *repl = findLeader(I->getParent(), Num);
-  if (!repl) {
+  Value *Repl = findLeader(I->getParent(), Num);
+  if (!Repl) {
     // Failure, just remember this instance for future use.
     addToLeaderTable(Num, I, I->getParent());
     return false;
+  } else if (Repl == I) {
+    // If I was the result of a shortcut PRE, it might already be in the table
+    // and the best replacement for itself. Nothing to do.
+    return false;
   }
 
   // Remove it!
-  patchAndReplaceAllUsesWith(I, repl);
-  if (MD && repl->getType()->getScalarType()->isPointerTy())
-    MD->invalidateCachedPointerInfo(repl);
+  patchAndReplaceAllUsesWith(I, Repl);
+  if (MD && Repl->getType()->getScalarType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(Repl);
   markInstructionForDeletion(I);
   return true;
 }
@@ -2331,7 +2431,7 @@ bool GVN::runOnFunction(Function& F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+  VN.setAliasAnalysis(&getAnalysis<AAResultsWrapperPass>().getAAResults());
   VN.setMemDep(MD);
   VN.setDomTree(DT);
 
@@ -2341,10 +2441,10 @@ bool GVN::runOnFunction(Function& F) {
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
-    BasicBlock *BB = FI++;
+    BasicBlock *BB = &*FI++;
 
-    bool removedBlock = MergeBlockIntoPredecessor(
-        BB, DT, /* LoopInfo */ nullptr, VN.getAliasAnalysis(), MD);
+    bool removedBlock =
+        MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
     if (removedBlock) ++NumGVNBlocks;
 
     Changed |= removedBlock;
@@ -2382,7 +2482,6 @@ bool GVN::runOnFunction(Function& F) {
   return Changed;
 }
 
-
 bool GVN::processBlock(BasicBlock *BB) {
   // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function
   // (and incrementing BI before processing an instruction).
@@ -2391,11 +2490,16 @@ bool GVN::processBlock(BasicBlock *BB) {
   if (DeadBlocks.count(BB))
     return false;
 
+  // Clearing map before every BB because it can be used only for single BB.
+  ReplaceWithConstMap.clear();
   bool ChangedFunction = false;
 
   for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
        BI != BE;) {
-    ChangedFunction |= processInstruction(BI);
+    if (!ReplaceWithConstMap.empty())
+      ChangedFunction |= replaceOperandsWithConsts(&*BI);
+    ChangedFunction |= processInstruction(&*BI);
+
     if (InstrsToErase.empty()) {
       ++BI;
       continue;
@@ -2439,7 +2543,14 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred,
     Value *Op = Instr->getOperand(i);
     if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
       continue;
-
+    // This could be a newly inserted instruction, in which case, we won't
+    // find a value number, and should give up before we hurt ourselves.
+    // FIXME: Rewrite the infrastructure to let it easier to value number
+    // and process newly inserted instructions.
+    if (!VN.exists(Op)) {
+      success = false;
+      break;
+    }
     if (Value *V = findLeader(Pred, VN.lookup(Op))) {
       Instr->setOperand(i, V);
     } else {
@@ -2499,9 +2610,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   BasicBlock *CurrentBlock = CurInst->getParent();
   predMap.clear();
 
-  for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock);
-       PI != PE; ++PI) {
-    BasicBlock *P = *PI;
+  for (BasicBlock *P : predecessors(CurrentBlock)) {
     // We're not interested in PRE where the block is its
     // own predecessor, or in blocks with predecessors
     // that are not reachable.
@@ -2570,7 +2679,7 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   // Create a PHI to make the value available in this block.
   PHINode *Phi =
       PHINode::Create(CurInst->getType(), predMap.size(),
-                      CurInst->getName() + ".pre-phi", CurrentBlock->begin());
+                      CurInst->getName() + ".pre-phi", &CurrentBlock->front());
   for (unsigned i = 0, e = predMap.size(); i != e; ++i) {
     if (Value *V = predMap[i].first)
       Phi->addIncoming(V, predMap[i].second);
@@ -2582,18 +2691,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) {
   addToLeaderTable(ValNo, Phi, CurrentBlock);
   Phi->setDebugLoc(CurInst->getDebugLoc());
   CurInst->replaceAllUsesWith(Phi);
-  if (Phi->getType()->getScalarType()->isPointerTy()) {
-    // Because we have added a PHI-use of the pointer value, it has now
-    // "escaped" from alias analysis' perspective.  We need to inform
-    // AA of this.
-    for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii) {
-      unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
-      VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
-    }
-
-    if (MD)
-      MD->invalidateCachedPointerInfo(Phi);
-  }
+  if (MD && Phi->getType()->getScalarType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(Phi);
   VN.erase(CurInst);
   removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
 
@@ -2616,15 +2715,15 @@ bool GVN::performPRE(Function &F) {
     if (CurrentBlock == &F.getEntryBlock())
       continue;
 
-    // Don't perform PRE on a landing pad.
-    if (CurrentBlock->isLandingPad())
+    // Don't perform PRE on an EH pad.
+    if (CurrentBlock->isEHPad())
       continue;
 
     for (BasicBlock::iterator BI = CurrentBlock->begin(),
                               BE = CurrentBlock->end();
          BI != BE;) {
-      Instruction *CurInst = BI++;
-      Changed = performScalarPRE(CurInst);
+      Instruction *CurInst = &*BI++;
+      Changed |= performScalarPRE(CurInst);
     }
   }
 
@@ -2637,8 +2736,8 @@ bool GVN::performPRE(Function &F) {
 /// Split the critical edge connecting the given two blocks, and return
 /// the block inserted to the critical edge.
 BasicBlock *GVN::splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ) {
-  BasicBlock *BB = SplitCriticalEdge(
-      Pred, Succ, CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+  BasicBlock *BB =
+      SplitCriticalEdge(Pred, Succ, CriticalEdgeSplittingOptions(DT));
   if (MD)
     MD->invalidateCachedPredecessors();
   return BB;
@@ -2652,7 +2751,7 @@ bool GVN::splitCriticalEdges() {
   do {
     std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
     SplitCriticalEdge(Edge.first, Edge.second,
-                      CriticalEdgeSplittingOptions(getAliasAnalysis(), DT));
+                      CriticalEdgeSplittingOptions(DT));
   } while (!toSplit.empty());
   if (MD) MD->invalidateCachedPredecessors();
   return true;
@@ -2728,17 +2827,14 @@ void GVN::addDeadBlock(BasicBlock *BB) {
     DeadBlocks.insert(Dom.begin(), Dom.end());
     
     // Figure out the dominance-frontier(D).
-    for (SmallVectorImpl<BasicBlock *>::iterator I = Dom.begin(),
-           E = Dom.end(); I != E; I++) {
-      BasicBlock *B = *I;
-      for (succ_iterator SI = succ_begin(B), SE = succ_end(B); SI != SE; SI++) {
-        BasicBlock *S = *SI;
+    for (BasicBlock *B : Dom) {
+      for (BasicBlock *S : successors(B)) {
         if (DeadBlocks.count(S))
           continue;
 
         bool AllPredDead = true;
-        for (pred_iterator PI = pred_begin(S), PE = pred_end(S); PI != PE; PI++)
-          if (!DeadBlocks.count(*PI)) {
+        for (BasicBlock *P : predecessors(S))
+          if (!DeadBlocks.count(P)) {
             AllPredDead = false;
             break;
           }
@@ -2766,10 +2862,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
       continue;
 
     SmallVector<BasicBlock *, 4> Preds(pred_begin(B), pred_end(B));
-    for (SmallVectorImpl<BasicBlock *>::iterator PI = Preds.begin(),
-           PE = Preds.end(); PI != PE; PI++) {
-      BasicBlock *P = *PI;
-
+    for (BasicBlock *P : Preds) {
       if (!DeadBlocks.count(P))
         continue;
 
@@ -2794,7 +2887,7 @@ void GVN::addDeadBlock(BasicBlock *BB) {
 //     R be the target of the dead out-coming edge.
 //  1) Identify the set of dead blocks implied by the branch's dead outcoming
 //     edge. The result of this step will be {X| X is dominated by R}
-//  2) Identify those blocks which haves at least one dead prodecessor. The
+//  2) Identify those blocks which haves at least one dead predecessor. The
 //     result of this step will be dominance-frontier(R).
 //  3) Update the PHIs in DF(R) by replacing the operands corresponding to 
 //     dead blocks with "UndefVal" in an hope these PHIs will optimized away.
@@ -2829,14 +2922,10 @@ bool GVN::processFoldableCondBr(BranchInst *BI) {
 // instructions, it makes more sense just to "fabricate" a val-number for the
 // dead code than checking if instruction involved is dead or not.
 void GVN::assignValNumForDeadCode() {
-  for (SetVector<BasicBlock *>::iterator I = DeadBlocks.begin(),
-        E = DeadBlocks.end(); I != E; I++) {
-    BasicBlock *BB = *I;
-    for (BasicBlock::iterator II = BB->begin(), EE = BB->end();
-          II != EE; II++) {
-      Instruction *Inst = &*II;
-      unsigned ValNum = VN.lookup_or_add(Inst);
-      addToLeaderTable(ValNum, Inst, BB);
+  for (BasicBlock *BB : DeadBlocks) {
+    for (Instruction &Inst : *BB) {
+      unsigned ValNum = VN.lookup_or_add(&Inst);
+      addToLeaderTable(ValNum, &Inst, BB);
     }
   }
 }
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 2a954d9961f2..ec5e15f0b8f8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -28,9 +28,11 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -48,6 +50,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SimplifyIndVar.h"
 using namespace llvm;
 
@@ -83,64 +86,62 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
 
 namespace {
 struct RewritePhi;
-}
 
-namespace {
-  class IndVarSimplify : public LoopPass {
-    LoopInfo                  *LI;
-    ScalarEvolution           *SE;
-    DominatorTree             *DT;
-    TargetLibraryInfo         *TLI;
-    const TargetTransformInfo *TTI;
+class IndVarSimplify : public LoopPass {
+  LoopInfo                  *LI;
+  ScalarEvolution           *SE;
+  DominatorTree             *DT;
+  TargetLibraryInfo         *TLI;
+  const TargetTransformInfo *TTI;
 
-    SmallVector<WeakVH, 16> DeadInsts;
-    bool Changed;
-  public:
+  SmallVector<WeakVH, 16> DeadInsts;
+  bool Changed;
+public:
 
-    static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify()
-        : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
-      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
-    }
+  static char ID; // Pass identification, replacement for typeid
+  IndVarSimplify()
+    : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr), Changed(false) {
+    initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+  }
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addRequiredID(LCSSAID);
-      AU.addPreserved<ScalarEvolution>();
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addPreservedID(LCSSAID);
-      AU.setPreservesCFG();
-    }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreservedID(LoopSimplifyID);
+    AU.addPreservedID(LCSSAID);
+    AU.setPreservesCFG();
+  }
 
-  private:
-    void releaseMemory() override {
-      DeadInsts.clear();
-    }
+private:
+  void releaseMemory() override {
+    DeadInsts.clear();
+  }
 
-    bool isValidRewrite(Value *FromVal, Value *ToVal);
+  bool isValidRewrite(Value *FromVal, Value *ToVal);
 
-    void HandleFloatingPointIV(Loop *L, PHINode *PH);
-    void RewriteNonIntegerIVs(Loop *L);
+  void handleFloatingPointIV(Loop *L, PHINode *PH);
+  void rewriteNonIntegerIVs(Loop *L);
 
-    void SimplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LPPassManager &LPM);
+  void simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
 
-    bool CanLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
-    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
+  void rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
 
-    Value *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
-                                     PHINode *IndVar, SCEVExpander &Rewriter);
+  Value *linearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                   PHINode *IndVar, SCEVExpander &Rewriter);
 
-    void SinkUnusedInvariants(Loop *L);
+  void sinkUnusedInvariants(Loop *L);
 
-    Value *ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
-                              Instruction *InsertPt, Type *Ty,
-                              bool &IsHighCostExpansion);
-  };
+  Value *expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, Loop *L,
+                            Instruction *InsertPt, Type *Ty);
+};
 }
 
 char IndVarSimplify::ID = 0;
@@ -148,7 +149,7 @@ INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
                 "Induction Variable Simplification", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(IndVarSimplify, "indvars",
@@ -158,10 +159,10 @@ Pass *llvm::createIndVarSimplifyPass() {
   return new IndVarSimplify();
 }
 
-/// isValidRewrite - Return true if the SCEV expansion generated by the
-/// rewriter can replace the original value. SCEV guarantees that it
-/// produces the same value, but the way it is produced may be illegal IR.
-/// Ideally, this function will only be called for verification.
+/// Return true if the SCEV expansion generated by the rewriter can replace the
+/// original value. SCEV guarantees that it produces the same value, but the way
+/// it is produced may be illegal IR.  Ideally, this function will only be
+/// called for verification.
 bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
   // If an SCEV expression subsumed multiple pointers, its expansion could
   // reassociate the GEP changing the base pointer. This is illegal because the
@@ -175,10 +176,10 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
   // because it understands lcssa phis while SCEV does not.
   Value *FromPtr = FromVal;
   Value *ToPtr = ToVal;
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(FromVal)) {
+  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
     FromPtr = GEP->getPointerOperand();
   }
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(ToVal)) {
+  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
     ToPtr = GEP->getPointerOperand();
   }
   if (FromPtr != FromVal || ToPtr != ToVal) {
@@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
 /// loop. For PHI nodes, there may be multiple uses, so compute the nearest
 /// common dominator for the incoming blocks.
 static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
-                                          DominatorTree *DT) {
+                                          DominatorTree *DT, LoopInfo *LI) {
   PHINode *PHI = dyn_cast<PHINode>(User);
   if (!PHI)
     return User;
@@ -234,17 +235,28 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
     InsertPt = InsertBB->getTerminator();
   }
   assert(InsertPt && "Missing phi operand");
-  assert((!isa<Instruction>(Def) ||
-          DT->dominates(cast<Instruction>(Def), InsertPt)) &&
-         "def does not dominate all uses");
-  return InsertPt;
+
+  auto *DefI = dyn_cast<Instruction>(Def);
+  if (!DefI)
+    return InsertPt;
+
+  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
+
+  auto *L = LI->getLoopFor(DefI->getParent());
+  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
+
+  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
+    if (LI->getLoopFor(DTN->getBlock()) == L)
+      return DTN->getBlock()->getTerminator();
+
+  llvm_unreachable("DefI dominates InsertPt!");
 }
 
 //===----------------------------------------------------------------------===//
-// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
 //===----------------------------------------------------------------------===//
 
-/// ConvertToSInt - Convert APF to an integer, if possible.
+/// Convert APF to an integer, if possible.
 static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
   bool isExact = false;
   // See if we can convert this to an int64_t
@@ -256,8 +268,8 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
   return true;
 }
 
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
+/// If the loop has floating induction variable then insert corresponding
+/// integer induction variable if possible.
 /// For example,
 /// for(double i = 0; i < 10000; ++i)
 ///   bar(i)
@@ -265,13 +277,12 @@ static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
 /// for(int i = 0; i < 10000; ++i)
 ///   bar((double)i);
 ///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
   unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
   unsigned BackEdge     = IncomingEdge^1;
 
   // Check incoming value.
-  ConstantFP *InitValueVal =
-    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 
   int64_t InitValue;
   if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
@@ -279,8 +290,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
 
   // Check IV increment. Reject this PN if increment operation is not
   // an add or increment value can not be represented by an integer.
-  BinaryOperator *Incr =
-    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
   if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
 
   // If this is not an add of the PHI with a constantfp, or if the constant fp
@@ -456,14 +466,14 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
   // platforms.
   if (WeakPH) {
     Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
-                                 PN->getParent()->getFirstInsertionPt());
+                                 &*PN->getParent()->getFirstInsertionPt());
     PN->replaceAllUsesWith(Conv);
     RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
   }
   Changed = true;
 }
 
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
   // First step.  Check to see if there are any floating-point recurrences.
   // If there are, change them into integer recurrences, permitting analysis by
   // the SCEV routines.
@@ -477,7 +487,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
 
   for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
     if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
-      HandleFloatingPointIV(L, PN);
+      handleFloatingPointIV(L, PN);
 
   // If the loop previously had floating-point IV, ScalarEvolution
   // may not have been able to compute a trip count. Now that we've done some
@@ -488,7 +498,7 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
 
 namespace {
 // Collect information about PHI nodes which can be transformed in
-// RewriteLoopExitValues.
+// rewriteLoopExitValues.
 struct RewritePhi {
   PHINode *PN;
   unsigned Ith;  // Ith incoming value.
@@ -501,70 +511,37 @@ struct RewritePhi {
 };
 }
 
-Value *IndVarSimplify::ExpandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
+Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
                                           Loop *L, Instruction *InsertPt,
-                                          Type *ResultTy,
-                                          bool &IsHighCostExpansion) {
-  using namespace llvm::PatternMatch;
-
-  if (!Rewriter.isHighCostExpansion(S, L)) {
-    IsHighCostExpansion = false;
-    return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
-  }
-
+                                          Type *ResultTy) {
   // Before expanding S into an expensive LLVM expression, see if we can use an
-  // already existing value as the expansion for S.  There is potential to make
-  // this significantly smarter, but this simple heuristic already gets some
-  // interesting cases.
-
-  SmallVector<BasicBlock *, 4> Latches;
-  L->getLoopLatches(Latches);
-
-  for (BasicBlock *BB : Latches) {
-    ICmpInst::Predicate Pred;
-    Instruction *LHS, *RHS;
-    BasicBlock *TrueBB, *FalseBB;
-
-    if (!match(BB->getTerminator(),
-               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
-                    TrueBB, FalseBB)))
-      continue;
-
-    if (SE->getSCEV(LHS) == S && DT->dominates(LHS, InsertPt)) {
-      IsHighCostExpansion = false;
-      return LHS;
-    }
-
-    if (SE->getSCEV(RHS) == S && DT->dominates(RHS, InsertPt)) {
-      IsHighCostExpansion = false;
-      return RHS;
-    }
-  }
+  // already existing value as the expansion for S.
+  if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
+    if (ExistingValue->getType() == ResultTy)
+      return ExistingValue;
 
   // We didn't find anything, fall back to using SCEVExpander.
-  assert(Rewriter.isHighCostExpansion(S, L) && "this should not have changed!");
-  IsHighCostExpansion = true;
   return Rewriter.expandCodeFor(S, ResultTy, InsertPt);
 }
 
 //===----------------------------------------------------------------------===//
-// RewriteLoopExitValues - Optimize IV users outside the loop.
+// rewriteLoopExitValues - Optimize IV users outside the loop.
 // As a side effect, reduces the amount of IV processing within the loop.
 //===----------------------------------------------------------------------===//
 
-/// RewriteLoopExitValues - Check to see if this loop has a computable
-/// loop-invariant execution count.  If so, this means that we can compute the
-/// final value of any expressions that are recurrent in the loop, and
-/// substitute the exit values from the loop into any instructions outside of
-/// the loop that use the final values of the current expressions.
+/// Check to see if this loop has a computable loop-invariant execution count.
+/// If so, this means that we can compute the final value of any expressions
+/// that are recurrent in the loop, and substitute the exit values from the loop
+/// into any instructions outside of the loop that use the final values of the
+/// current expressions.
 ///
 /// This is mostly redundant with the regular IndVarSimplify activities that
 /// happen later, except that it's more powerful in some cases, because it's
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
-void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
-  // Verify the input to the pass in already in LCSSA form.
-  assert(L->isLCSSAForm(*DT));
+void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+  // Check a pre-condition.
+  assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
@@ -679,9 +656,9 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
             continue;
         }
 
-        bool HighCost = false;
-        Value *ExitVal = ExpandSCEVIfNeeded(Rewriter, ExitValue, L, Inst,
-                                            PN->getType(), HighCost);
+        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
+        Value *ExitVal =
+            expandSCEVIfNeeded(Rewriter, ExitValue, L, Inst, PN->getType());
 
         DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
                      << "  LoopVal = " << *Inst << "\n");
@@ -698,7 +675,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
     }
   }
 
-  bool LoopCanBeDel = CanLoopBeDeleted(L, RewritePhiSet);
+  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
 
   // Transformation.
   for (const RewritePhi &Phi : RewritePhiSet) {
@@ -735,10 +712,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   Rewriter.clearInsertPoint();
 }
 
-/// CanLoopBeDeleted - Check whether it is possible to delete the loop after
-/// rewriting exit value. If it is possible, ignore ReplaceExitValue and
-/// do rewriting aggressively.
-bool IndVarSimplify::CanLoopBeDeleted(
+/// Check whether it is possible to delete the loop after rewriting exit
+/// value. If it is possible, ignore ReplaceExitValue and do rewriting
+/// aggressively.
+bool IndVarSimplify::canLoopBeDeleted(
     Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
 
   BasicBlock *Preheader = L->getLoopPreheader();
@@ -782,14 +759,9 @@ bool IndVarSimplify::CanLoopBeDeleted(
     ++BI;
   }
 
-  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
-       LI != LE; ++LI) {
-    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE;
-         ++BI) {
-      if (BI->mayHaveSideEffects())
-        return false;
-    }
-  }
+  for (auto *BB : L->blocks())
+    if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); }))
+      return false;
 
   return true;
 }
@@ -799,22 +771,19 @@ bool IndVarSimplify::CanLoopBeDeleted(
 //===----------------------------------------------------------------------===//
 
 namespace {
-  // Collect information about induction variables that are used by sign/zero
-  // extend operations. This information is recorded by CollectExtend and
-  // provides the input to WidenIV.
-  struct WideIVInfo {
-    PHINode *NarrowIV;
-    Type *WidestNativeType; // Widest integer type created [sz]ext
-    bool IsSigned;          // Was a sext user seen before a zext?
-
-    WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
-                   IsSigned(false) {}
-  };
+// Collect information about induction variables that are used by sign/zero
+// extend operations. This information is recorded by CollectExtend and provides
+// the input to WidenIV.
+struct WideIVInfo {
+  PHINode *NarrowIV = nullptr;
+  Type *WidestNativeType = nullptr; // Widest integer type created [sz]ext
+  bool IsSigned = false;            // Was a sext user seen before a zext?
+};
 }
 
-/// visitCast - Update information about the induction variable that is
-/// extended by this sign or zero extend operation. This is used to determine
-/// the final width of the IV before actually widening it.
+/// Update information about the induction variable that is extended by this
+/// sign or zero extend operation. This is used to determine the final width of
+/// the IV before actually widening it.
 static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
                         const TargetTransformInfo *TTI) {
   bool IsSigned = Cast->getOpcode() == Instruction::SExt;
@@ -855,24 +824,29 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
 
 namespace {
 
-/// NarrowIVDefUse - Record a link in the Narrow IV def-use chain along with the
-/// WideIV that computes the same value as the Narrow IV def.  This avoids
-/// caching Use* pointers.
+/// Record a link in the Narrow IV def-use chain along with the WideIV that
+/// computes the same value as the Narrow IV def.  This avoids caching Use*
+/// pointers.
 struct NarrowIVDefUse {
-  Instruction *NarrowDef;
-  Instruction *NarrowUse;
-  Instruction *WideDef;
+  Instruction *NarrowDef = nullptr;
+  Instruction *NarrowUse = nullptr;
+  Instruction *WideDef = nullptr;
 
-  NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
+  // True if the narrow def is never negative.  Tracking this information lets
+  // us use a sign extension instead of a zero extension or vice versa, when
+  // profitable and legal.
+  bool NeverNegative = false;
 
-  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
-    NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
+  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
+                 bool NeverNegative)
+      : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
+        NeverNegative(NeverNegative) {}
 };
 
-/// WidenIV - The goal of this transform is to remove sign and zero extends
-/// without creating any new induction variables. To do this, it creates a new
-/// phi of the wider type and redirects all users, either removing extends or
-/// inserting truncs whenever we stop propagating the type.
+/// The goal of this transform is to remove sign and zero extends without
+/// creating any new induction variables. To do this, it creates a new phi of
+/// the wider type and redirects all users, either removing extends or inserting
+/// truncs whenever we stop propagating the type.
 ///
 class WidenIV {
   // Parameters
@@ -913,32 +887,35 @@ public:
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
   }
 
-  PHINode *CreateWideIV(SCEVExpander &Rewriter);
+  PHINode *createWideIV(SCEVExpander &Rewriter);
 
 protected:
-  Value *getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
-                   Instruction *Use);
+  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
+                          Instruction *Use);
 
-  Instruction *CloneIVUser(NarrowIVDefUse DU);
+  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
+  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
+                                     const SCEVAddRecExpr *WideAR);
+  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
 
-  const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
+  const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
 
-  const SCEVAddRecExpr* GetExtendedOperandRecurrence(NarrowIVDefUse DU);
+  const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
 
-  const SCEV *GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
                               unsigned OpCode) const;
 
-  Instruction *WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
+  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
 
-  bool WidenLoopCompare(NarrowIVDefUse DU);
+  bool widenLoopCompare(NarrowIVDefUse DU);
 
   void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 };
 } // anonymous namespace
 
-/// isLoopInvariant - Perform a quick domtree based check for loop invariance
-/// assuming that V is used within the loop. LoopInfo::isLoopInvariant() seems
-/// gratuitous for this purpose.
+/// Perform a quick domtree based check for loop invariance assuming that V is
+/// used within the loop. LoopInfo::isLoopInvariant() seems gratuitous for this
+/// purpose.
 static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
   Instruction *Inst = dyn_cast<Instruction>(V);
   if (!Inst)
@@ -947,8 +924,8 @@ static bool isLoopInvariant(Value *V, const Loop *L, const DominatorTree *DT) {
   return DT->properlyDominates(Inst->getParent(), L->getHeader());
 }
 
-Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
-                          Instruction *Use) {
+Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
+                                 bool IsSigned, Instruction *Use) {
   // Set the debug location and conservative insertion point.
   IRBuilder<> Builder(Use);
   // Hoist the insertion point into loop preheaders as far as possible.
@@ -961,10 +938,11 @@ Value *WidenIV::getExtend(Value *NarrowOper, Type *WideType, bool IsSigned,
                     Builder.CreateZExt(NarrowOper, WideType);
 }
 
-/// CloneIVUser - Instantiate a wide operation to replace a narrow
-/// operation. This only needs to handle operations that can evaluation to
-/// SCEVAddRec. It can safely return 0 for any operation we decide not to clone.
-Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
+/// Instantiate a wide operation to replace a narrow operation. This only needs
+/// to handle operations that can evaluation to SCEVAddRec. It can safely return
+/// 0 for any operation we decide not to clone.
+Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
+                                  const SCEVAddRecExpr *WideAR) {
   unsigned Opcode = DU.NarrowUse->getOpcode();
   switch (Opcode) {
   default:
@@ -973,40 +951,140 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
   case Instruction::Mul:
   case Instruction::UDiv:
   case Instruction::Sub:
+    return cloneArithmeticIVUser(DU, WideAR);
+
   case Instruction::And:
   case Instruction::Or:
   case Instruction::Xor:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
-    DEBUG(dbgs() << "Cloning IVUser: " << *DU.NarrowUse << "\n");
-
-    // Replace NarrowDef operands with WideDef. Otherwise, we don't know
-    // anything about the narrow operand yet so must insert a [sz]ext. It is
-    // probably loop invariant and will be folded or hoisted. If it actually
-    // comes from a widened IV, it should be removed during a future call to
-    // WidenIVUse.
-    Value *LHS = (DU.NarrowUse->getOperand(0) == DU.NarrowDef) ? DU.WideDef :
-      getExtend(DU.NarrowUse->getOperand(0), WideType, IsSigned, DU.NarrowUse);
-    Value *RHS = (DU.NarrowUse->getOperand(1) == DU.NarrowDef) ? DU.WideDef :
-      getExtend(DU.NarrowUse->getOperand(1), WideType, IsSigned, DU.NarrowUse);
-
-    BinaryOperator *NarrowBO = cast<BinaryOperator>(DU.NarrowUse);
-    BinaryOperator *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(),
-                                                    LHS, RHS,
-                                                    NarrowBO->getName());
-    IRBuilder<> Builder(DU.NarrowUse);
-    Builder.Insert(WideBO);
-    if (const OverflowingBinaryOperator *OBO =
-        dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
-      if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
-      if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-    }
-    return WideBO;
+    return cloneBitwiseIVUser(DU);
   }
 }
 
-const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
+Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
+  Instruction *NarrowUse = DU.NarrowUse;
+  Instruction *NarrowDef = DU.NarrowDef;
+  Instruction *WideDef = DU.WideDef;
+
+  DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
+
+  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
+  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
+  // invariant and will be folded or hoisted. If it actually comes from a
+  // widened IV, it should be removed during a future call to widenIVUse.
+  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(0), WideType,
+                                      IsSigned, NarrowUse);
+  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(1), WideType,
+                                      IsSigned, NarrowUse);
+
+  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+                                        NarrowBO->getName());
+  IRBuilder<> Builder(NarrowUse);
+  Builder.Insert(WideBO);
+  WideBO->copyIRFlags(NarrowBO);
+  return WideBO;
+}
+
+Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
+                                            const SCEVAddRecExpr *WideAR) {
+  Instruction *NarrowUse = DU.NarrowUse;
+  Instruction *NarrowDef = DU.NarrowDef;
+  Instruction *WideDef = DU.WideDef;
+
+  DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
+
+  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
+
+  // We're trying to find X such that
+  //
+  //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
+  //
+  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
+  // and check using SCEV if any of them are correct.
+
+  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
+  // correct solution to X.
+  auto GuessNonIVOperand = [&](bool SignExt) {
+    const SCEV *WideLHS;
+    const SCEV *WideRHS;
+
+    auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
+      if (SignExt)
+        return SE->getSignExtendExpr(S, Ty);
+      return SE->getZeroExtendExpr(S, Ty);
+    };
+
+    if (IVOpIdx == 0) {
+      WideLHS = SE->getSCEV(WideDef);
+      const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
+      WideRHS = GetExtend(NarrowRHS, WideType);
+    } else {
+      const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
+      WideLHS = GetExtend(NarrowLHS, WideType);
+      WideRHS = SE->getSCEV(WideDef);
+    }
+
+    // WideUse is "WideDef `op.wide` X" as described in the comment.
+    const SCEV *WideUse = nullptr;
+
+    switch (NarrowUse->getOpcode()) {
+    default:
+      llvm_unreachable("No other possibility!");
+
+    case Instruction::Add:
+      WideUse = SE->getAddExpr(WideLHS, WideRHS);
+      break;
+
+    case Instruction::Mul:
+      WideUse = SE->getMulExpr(WideLHS, WideRHS);
+      break;
+
+    case Instruction::UDiv:
+      WideUse = SE->getUDivExpr(WideLHS, WideRHS);
+      break;
+
+    case Instruction::Sub:
+      WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
+      break;
+    }
+
+    return WideUse == WideAR;
+  };
+
+  bool SignExtend = IsSigned;
+  if (!GuessNonIVOperand(SignExtend)) {
+    SignExtend = !SignExtend;
+    if (!GuessNonIVOperand(SignExtend))
+      return nullptr;
+  }
+
+  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(0), WideType,
+                                      SignExtend, NarrowUse);
+  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
+                   ? WideDef
+                   : createExtendInst(NarrowUse->getOperand(1), WideType,
+                                      SignExtend, NarrowUse);
+
+  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
+  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
+                                        NarrowBO->getName());
+
+  IRBuilder<> Builder(NarrowUse);
+  Builder.Insert(WideBO);
+  WideBO->copyIRFlags(NarrowBO);
+  return WideBO;
+}
+
+const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
                                      unsigned OpCode) const {
   if (OpCode == Instruction::Add)
     return SE->getAddExpr(LHS, RHS);
@@ -1022,7 +1100,7 @@ const SCEV *WidenIV::GetSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
 /// operands. Generate the SCEV value for the widened operation without
 /// actually modifying the IR yet. If the expression after extending the
 /// operands is an AddRec for this loop, return it.
-const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
+const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
 
   // Handle the common case of add<nsw/nuw>
   const unsigned OpCode = DU.NarrowUse->getOpcode();
@@ -1062,19 +1140,18 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
   if (ExtendOperIdx == 0)
     std::swap(lhs, rhs);
   const SCEVAddRecExpr *AddRec =
-      dyn_cast<SCEVAddRecExpr>(GetSCEVByOpCode(lhs, rhs, OpCode));
+      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
 
   if (!AddRec || AddRec->getLoop() != L)
     return nullptr;
   return AddRec;
 }
 
-/// GetWideRecurrence - Is this instruction potentially interesting for further
-/// simplification after widening it's type? In other words, can the
-/// extend be safely hoisted out of the loop with SCEV reducing the value to a
-/// recurrence on the same loop. If so, return the sign or zero extended
-/// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+/// Is this instruction potentially interesting for further simplification after
+/// widening it's type? In other words, can the extend be safely hoisted out of
+/// the loop with SCEV reducing the value to a recurrence on the same loop. If
+/// so, return the sign or zero extended recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
   if (!SE->isSCEVable(NarrowUse->getType()))
     return nullptr;
 
@@ -1097,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
 
 /// This IV user cannot be widen. Replace this use of the original narrow IV
 /// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
-static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
+static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
   DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef
         << " for user " << *DU.NarrowUse << "\n");
-  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+  IRBuilder<> Builder(
+      getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
 }
@@ -1108,13 +1186,27 @@ static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) {
 /// If the narrow use is a compare instruction, then widen the compare
 //  (and possibly the other operand).  The extend operation is hoisted into the
 // loop preheader as far as possible.
-bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
+bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
   if (!Cmp)
     return false;
 
-  // Sign of IV user and compare must match.
-  if (IsSigned != CmpInst::isSigned(Cmp->getPredicate()))
+  // We can legally widen the comparison in the following two cases:
+  //
+  //  - The signedness of the IV extension and comparison match
+  //
+  //  - The narrow IV is always positive (and thus its sign extension is equal
+  //    to its zero extension).  For instance, let's say we're zero extending
+  //    %narrow for the following use
+  //
+  //      icmp slt i32 %narrow, %val   ... (A)
+  //
+  //    and %narrow is always positive.  Then
+  //
+  //      (A) == icmp slt i32 sext(%narrow), sext(%val)
+  //          == icmp slt i32 zext(%narrow), sext(%val)
+
+  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
     return false;
 
   Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
@@ -1123,20 +1215,21 @@ bool WidenIV::WidenLoopCompare(NarrowIVDefUse DU) {
   assert (CastWidth <= IVWidth && "Unexpected width while widening compare.");
 
   // Widen the compare instruction.
-  IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT));
+  IRBuilder<> Builder(
+      getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI));
   DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
 
   // Widen the other operand of the compare, if necessary.
   if (CastWidth < IVWidth) {
-    Value *ExtOp = getExtend(Op, WideType, IsSigned, Cmp);
+    Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
     DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
   }
   return true;
 }
 
-/// WidenIVUse - Determine whether an individual user of the narrow IV can be
-/// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+/// Determine whether an individual user of the narrow IV can be widened. If so,
+/// return the wide clone of the user.
+Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
 
   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
@@ -1145,13 +1238,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
       // After SimplifyCFG most loop exit targets have a single predecessor.
       // Otherwise fall back to a truncate within the loop.
       if (UsePhi->getNumOperands() != 1)
-        truncateIVUse(DU, DT);
+        truncateIVUse(DU, DT, LI);
       else {
         PHINode *WidePhi =
           PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
                           UsePhi);
         WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
-        IRBuilder<> Builder(WidePhi->getParent()->getFirstInsertionPt());
+        IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
         Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
         UsePhi->replaceAllUsesWith(Trunc);
         DeadInsts.emplace_back(UsePhi);
@@ -1200,20 +1293,20 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   }
 
   // Does this user itself evaluate to a recurrence after widening?
-  const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(DU.NarrowUse);
+  const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
   if (!WideAddRec)
-    WideAddRec = GetExtendedOperandRecurrence(DU);
+    WideAddRec = getExtendedOperandRecurrence(DU);
 
   if (!WideAddRec) {
     // If use is a loop condition, try to promote the condition instead of
     // truncating the IV first.
-    if (WidenLoopCompare(DU))
+    if (widenLoopCompare(DU))
       return nullptr;
 
     // This user does not evaluate to a recurence after widening, so don't
     // follow it. Instead insert a Trunc to kill off the original use,
     // eventually isolating the original narrow IV so it can be removed.
-    truncateIVUse(DU, DT);
+    truncateIVUse(DU, DT, LI);
     return nullptr;
   }
   // Assume block terminators cannot evaluate to a recurrence. We can't to
@@ -1228,7 +1321,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
       && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
     WideUse = WideInc;
   else {
-    WideUse = CloneIVUser(DU);
+    WideUse = cloneIVUser(DU, WideAddRec);
     if (!WideUse)
       return nullptr;
   }
@@ -1248,9 +1341,13 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   return WideUse;
 }
 
-/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+/// Add eligible users of NarrowDef to NarrowIVUsers.
 ///
 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
+  bool NeverNegative =
+      SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
+                           SE->getConstant(NarrowSCEV->getType(), 0));
   for (User *U : NarrowDef->users()) {
     Instruction *NarrowUser = cast<Instruction>(U);
 
@@ -1258,21 +1355,21 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
     if (!Widened.insert(NarrowUser).second)
       continue;
 
-    NarrowIVUsers.push_back(NarrowIVDefUse(NarrowDef, NarrowUser, WideDef));
+    NarrowIVUsers.push_back(
+        NarrowIVDefUse(NarrowDef, NarrowUser, WideDef, NeverNegative));
   }
 }
 
-/// CreateWideIV - Process a single induction variable. First use the
-/// SCEVExpander to create a wide induction variable that evaluates to the same
-/// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
-/// interesting IV users, the narrow IV will be isolated for removal by
-/// DeleteDeadPHIs.
+/// Process a single induction variable. First use the SCEVExpander to create a
+/// wide induction variable that evaluates to the same recurrence as the
+/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
+/// def-use chain. After widenIVUse has processed all interesting IV users, the
+/// narrow IV will be isolated for removal by DeleteDeadPHIs.
 ///
 /// It would be simpler to delete uses as they are processed, but we must avoid
 /// invalidating SCEV expressions.
 ///
-PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   // Is this phi an induction variable?
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   if (!AddRec)
@@ -1302,11 +1399,11 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // either find an existing phi or materialize a new one. Either way, we
   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   // of the phi-SCC dominates the loop entry.
-  Instruction *InsertPt = L->getHeader()->begin();
+  Instruction *InsertPt = &L->getHeader()->front();
   WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
 
   // Remembering the WideIV increment generated by SCEVExpander allows
-  // WidenIVUse to reuse it when widening the narrow IV's increment. We don't
+  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
   // employ a general reuse mechanism because the call above is the only call to
   // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
   if (BasicBlock *LatchBlock = L->getLoopLatch()) {
@@ -1329,13 +1426,13 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
 
     // Process a def-use edge. This may replace the use, so don't hold a
     // use_iterator across it.
-    Instruction *WideUse = WidenIVUse(DU, Rewriter);
+    Instruction *WideUse = widenIVUse(DU, Rewriter);
 
     // Follow all def-use edges from the previous narrow use.
     if (WideUse)
       pushNarrowIVUsers(DU.NarrowUse, WideUse);
 
-    // WidenIVUse may have removed the def-use edge.
+    // widenIVUse may have removed the def-use edge.
     if (DU.NarrowDef->use_empty())
       DeadInsts.emplace_back(DU.NarrowDef);
   }
@@ -1352,38 +1449,38 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
 //===----------------------------------------------------------------------===//
 
 namespace {
-  class IndVarSimplifyVisitor : public IVVisitor {
-    ScalarEvolution *SE;
-    const TargetTransformInfo *TTI;
-    PHINode *IVPhi;
+class IndVarSimplifyVisitor : public IVVisitor {
+  ScalarEvolution *SE;
+  const TargetTransformInfo *TTI;
+  PHINode *IVPhi;
 
-  public:
-    WideIVInfo WI;
+public:
+  WideIVInfo WI;
 
-    IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
-                          const TargetTransformInfo *TTI,
-                          const DominatorTree *DTree)
-        : SE(SCEV), TTI(TTI), IVPhi(IV) {
-      DT = DTree;
-      WI.NarrowIV = IVPhi;
-      if (ReduceLiveIVs)
-        setSplitOverflowIntrinsics();
-    }
+  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
+                        const TargetTransformInfo *TTI,
+                        const DominatorTree *DTree)
+    : SE(SCEV), TTI(TTI), IVPhi(IV) {
+    DT = DTree;
+    WI.NarrowIV = IVPhi;
+    if (ReduceLiveIVs)
+      setSplitOverflowIntrinsics();
+  }
 
-    // Implement the interface used by simplifyUsersOfIV.
-    void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
-  };
+  // Implement the interface used by simplifyUsersOfIV.
+  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
+};
 }
 
-/// SimplifyAndExtend - Iteratively perform simplification on a worklist of IV
-/// users. Each successive simplification may push more users which may
-/// themselves be candidates for simplification.
+/// Iteratively perform simplification on a worklist of IV users. Each
+/// successive simplification may push more users which may themselves be
+/// candidates for simplification.
 ///
 /// Sign/Zero extend elimination is interleaved with IV simplification.
 ///
-void IndVarSimplify::SimplifyAndExtend(Loop *L,
+void IndVarSimplify::simplifyAndExtend(Loop *L,
                                        SCEVExpander &Rewriter,
-                                       LPPassManager &LPM) {
+                                       LoopInfo *LI) {
   SmallVector<WideIVInfo, 8> WideIVs;
 
   SmallVector<PHINode*, 8> LoopPhis;
@@ -1400,14 +1497,14 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
     // extension. The first time SCEV attempts to normalize sign/zero extension,
     // the result becomes final. So for the most predictable results, we delay
     // evaluation of sign/zero extend evaluation until needed, and avoid running
-    // other SCEV based analysis prior to SimplifyAndExtend.
+    // other SCEV based analysis prior to simplifyAndExtend.
     do {
       PHINode *CurrIV = LoopPhis.pop_back_val();
 
       // Information about sign/zero extensions of CurrIV.
       IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
 
-      Changed |= simplifyUsersOfIV(CurrIV, SE, &LPM, DeadInsts, &Visitor);
+      Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, &Visitor);
 
       if (Visitor.WI.WidestNativeType) {
         WideIVs.push_back(Visitor.WI);
@@ -1416,7 +1513,7 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 
     for (; !WideIVs.empty(); WideIVs.pop_back()) {
       WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
-      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+      if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
         Changed = true;
         LoopPhis.push_back(WidePhi);
       }
@@ -1425,12 +1522,12 @@ void IndVarSimplify::SimplifyAndExtend(Loop *L,
 }
 
 //===----------------------------------------------------------------------===//
-//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
 //===----------------------------------------------------------------------===//
 
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
+/// Return true if this loop's backedge taken count expression can be safely and
+/// cheaply expanded into an instruction sequence that can be used by
+/// linearFunctionTestReplace.
 ///
 /// TODO: This fails for pointer-type loop counters with greater than one byte
 /// strides, consequently preventing LFTR from running. For the purpose of LFTR
@@ -1461,8 +1558,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE,
   return true;
 }
 
-/// getLoopPhiForCounter - Return the loop header phi IFF IncV adds a loop
-/// invariant value to the phi.
+/// Return the loop header phi IFF IncV adds a loop invariant value to the phi.
 static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
   Instruction *IncI = dyn_cast<Instruction>(IncV);
   if (!IncI)
@@ -1513,8 +1609,8 @@ static ICmpInst *getLoopTest(Loop *L) {
   return dyn_cast<ICmpInst>(BI->getCondition());
 }
 
-/// needsLFTR - LinearFunctionTestReplace policy. Return true unless we can show
-/// that the current exit test is already sufficiently canonical.
+/// linearFunctionTestReplace policy. Return true unless we can show that the
+/// current exit test is already sufficiently canonical.
 static bool needsLFTR(Loop *L, DominatorTree *DT) {
   // Do LFTR to simplify the exit condition to an ICMP.
   ICmpInst *Cond = getLoopTest(L);
@@ -1574,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
     return false;
 
   // Optimistically handle other instructions.
-  for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
-    if (!Visited.insert(*OI).second)
+  for (Value *Op : I->operands()) {
+    if (!Visited.insert(Op).second)
       continue;
-    if (!hasConcreteDefImpl(*OI, Visited, Depth+1))
+    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
       return false;
   }
   return true;
@@ -1594,8 +1690,8 @@ static bool hasConcreteDef(Value *V) {
   return hasConcreteDefImpl(V, Visited, 0);
 }
 
-/// AlmostDeadIV - Return true if this IV has any uses other than the (soon to
-/// be rewritten) loop exit test.
+/// Return true if this IV has any uses other than the (soon to be rewritten)
+/// loop exit test.
 static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
   Value *IncV = Phi->getIncomingValue(LatchIdx);
@@ -1608,7 +1704,7 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
   return true;
 }
 
-/// FindLoopCounter - Find an affine IV in canonical form.
+/// Find an affine IV in canonical form.
 ///
 /// BECount may be an i8* pointer type. The pointer difference is already
 /// valid count without scaling the address stride, so it remains a pointer
@@ -1702,8 +1798,8 @@ static PHINode *FindLoopCounter(Loop *L, const SCEV *BECount,
   return BestPhi;
 }
 
-/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that
-/// holds the RHS of the new loop test.
+/// Help linearFunctionTestReplace by generating a value that holds the RHS of
+/// the new loop test.
 static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
                            SCEVExpander &Rewriter, ScalarEvolution *SE) {
   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -1785,13 +1881,13 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
   }
 }
 
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable.  This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
+/// This method rewrites the exit condition of the loop to be a canonical !=
+/// comparison against the incremented loop induction variable.  This pass is
+/// able to rewrite the exit tests of any loop where the SCEV analysis can
+/// determine a loop-invariant trip count of the loop, which is actually a much
+/// broader range than just linear tests.
 Value *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
+linearFunctionTestReplace(Loop *L,
                           const SCEV *BackedgeTakenCount,
                           PHINode *IndVar,
                           SCEVExpander &Rewriter) {
@@ -1809,7 +1905,7 @@ LinearFunctionTestReplace(Loop *L,
     // This addition may overflow, which is valid as long as the comparison is
     // truncated to BackedgeTakenCount->getType().
     IVCount = SE->getAddExpr(BackedgeTakenCount,
-                             SE->getConstant(BackedgeTakenCount->getType(), 1));
+                             SE->getOne(BackedgeTakenCount->getType()));
     // The BackedgeTaken expression contains the number of times that the
     // backedge branches to the loop header.  This is one less than the
     // number of times the loop executes, so use the incremented indvar.
@@ -1847,8 +1943,8 @@ LinearFunctionTestReplace(Loop *L,
     const SCEV *ARStep = AR->getStepRecurrence(*SE);
     // For constant IVCount, avoid truncation.
     if (isa<SCEVConstant>(ARStart) && isa<SCEVConstant>(IVCount)) {
-      const APInt &Start = cast<SCEVConstant>(ARStart)->getValue()->getValue();
-      APInt Count = cast<SCEVConstant>(IVCount)->getValue()->getValue();
+      const APInt &Start = cast<SCEVConstant>(ARStart)->getAPInt();
+      APInt Count = cast<SCEVConstant>(IVCount)->getAPInt();
       // Note that the post-inc value of BackedgeTakenCount may have overflowed
       // above such that IVCount is now zero.
       if (IVCount != BackedgeTakenCount && Count == 0) {
@@ -1886,21 +1982,21 @@ LinearFunctionTestReplace(Loop *L,
 }
 
 //===----------------------------------------------------------------------===//
-//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
 //===----------------------------------------------------------------------===//
 
 /// If there's a single exit block, sink any loop-invariant values that
 /// were defined in the preheader but not used inside the loop into the
 /// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   BasicBlock *ExitBlock = L->getExitBlock();
   if (!ExitBlock) return;
 
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) return;
 
-  Instruction *InsertPt = ExitBlock->getFirstInsertionPt();
-  BasicBlock::iterator I = Preheader->getTerminator();
+  Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
+  BasicBlock::iterator I(Preheader->getTerminator());
   while (I != Preheader->begin()) {
     --I;
     // New instructions were inserted at the end of the preheader.
@@ -1920,8 +2016,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
-    // Skip landingpad instructions.
-    if (isa<LandingPadInst>(I))
+    // Skip eh pad instructions.
+    if (I->isEHPad())
       continue;
 
     // Don't sink alloca: we never want to sink static alloca's out of the
@@ -1953,7 +2049,7 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
       continue;
 
     // Otherwise, sink it to the exit block.
-    Instruction *ToMove = I;
+    Instruction *ToMove = &*I;
     bool Done = false;
 
     if (I != Preheader->begin()) {
@@ -1994,7 +2090,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
 
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
   TLI = TLIP ? &TLIP->getTLI() : nullptr;
@@ -2007,7 +2103,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // If there are any floating-point recurrences, attempt to
   // transform them to use integer recurrences.
-  RewriteNonIntegerIVs(L);
+  rewriteNonIntegerIVs(L);
 
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
@@ -2024,7 +2120,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // other expressions involving loop IVs have been evaluated. This helps SCEV
   // set no-wrap flags before normalizing sign/zero extension.
   Rewriter.disableCanonicalMode();
-  SimplifyAndExtend(L, Rewriter, LPM);
+  simplifyAndExtend(L, Rewriter, LI);
 
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
@@ -2034,7 +2130,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   //
   if (ReplaceExitValue != NeverRepl &&
       !isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    RewriteLoopExitValues(L, Rewriter);
+    rewriteLoopExitValues(L, Rewriter);
 
   // Eliminate redundant IV cycles.
   NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
@@ -2054,7 +2150,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       // explicitly check any assumptions made by SCEV. Brittle.
       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
       if (!AR || AR->getLoop()->getLoopPreheader())
-        (void)LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+        (void)linearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
                                         Rewriter);
     }
   }
@@ -2074,13 +2170,13 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Loop-invariant instructions in the preheader that aren't used in the
   // loop may be sunk below the loop to reduce register pressure.
-  SinkUnusedInvariants(L);
+  sinkUnusedInvariants(L);
 
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
+
   // Check a post-condition.
-  assert(L->isLCSSAForm(*DT) &&
-         "Indvars did not leave the loop in lcssa form!");
+  assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
 
   // Verify that LFTR, and any other change have not interfered with SCEV's
   // ability to compute trip count.
diff --git a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index cbdacad8f28b..dea61f6ff3d7 100644
--- a/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -214,8 +214,8 @@ public:
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequiredID(LCSSAID);
-    AU.addRequired<ScalarEvolution>();
-    AU.addRequired<BranchProbabilityInfo>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<BranchProbabilityInfoWrapperPass>();
   }
 
   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -224,8 +224,15 @@ public:
 char InductiveRangeCheckElimination::ID = 0;
 }
 
-INITIALIZE_PASS(InductiveRangeCheckElimination, "irce",
-                "Inductive range check elimination", false, false)
+INITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce",
+                      "Inductive range check elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce",
+                    "Inductive range check elimination", false, false)
 
 const char *InductiveRangeCheck::rangeCheckKindToStr(
     InductiveRangeCheck::RangeCheckKind RCK) {
@@ -1044,9 +1051,9 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
 
   auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
   RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
-                                        &F, BBInsertLocation);
+                                        &F, &*BBInsertLocation);
   RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
-                                      BBInsertLocation);
+                                      &*BBInsertLocation);
 
   BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin());
   bool Increasing = LS.IndVarIncreasing;
@@ -1399,8 +1406,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
   LLVMContext &Context = Preheader->getContext();
   InductiveRangeCheck::AllocatorTy IRCAlloc;
   SmallVector<InductiveRangeCheck *, 16> RangeChecks;
-  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
-  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  BranchProbabilityInfo &BPI =
+      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
 
   for (auto BBI : L->getBlocks())
     if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1130d228acb8..087ce8ac50d4 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,15 +18,22 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
@@ -36,6 +43,8 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <algorithm>
+#include <memory>
 using namespace llvm;
 
 #define DEBUG_TYPE "jump-threading"
@@ -49,6 +58,13 @@ BBDuplicateThreshold("jump-threading-threshold",
           cl::desc("Max block size to duplicate for jump threading"),
           cl::init(6), cl::Hidden);
 
+static cl::opt<unsigned>
+ImplicationSearchThreshold(
+  "jump-threading-implication-search-threshold",
+  cl::desc("The number of predecessors to search for a stronger "
+           "condition to use to thread over a weaker condition"),
+  cl::init(3), cl::Hidden);
+
 namespace {
   // These are at global scope so static functions can use them too.
   typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
@@ -80,6 +96,9 @@ namespace {
   class JumpThreading : public FunctionPass {
     TargetLibraryInfo *TLI;
     LazyValueInfo *LVI;
+    std::unique_ptr<BlockFrequencyInfo> BFI;
+    std::unique_ptr<BranchProbabilityInfo> BPI;
+    bool HasProfileData;
 #ifdef NDEBUG
     SmallPtrSet<BasicBlock*, 16> LoopHeaders;
 #else
@@ -114,9 +133,15 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<LazyValueInfo>();
       AU.addPreserved<LazyValueInfo>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
+    void releaseMemory() override {
+      BFI.reset();
+      BPI.reset();
+    }
+
     void FindLoopHeaders(Function &F);
     bool ProcessBlock(BasicBlock *BB);
     bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
@@ -134,9 +159,16 @@ namespace {
 
     bool ProcessBranchOnPHI(PHINode *PN);
     bool ProcessBranchOnXOR(BinaryOperator *BO);
+    bool ProcessImpliedCondition(BasicBlock *BB);
 
     bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
     bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+
+  private:
+    BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
+                                const char *Suffix);
+    void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
+                                      BasicBlock *NewBB, BasicBlock *SuccBB);
   };
 }
 
@@ -160,11 +192,21 @@ bool JumpThreading::runOnFunction(Function &F) {
   DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   LVI = &getAnalysis<LazyValueInfo>();
+  BFI.reset();
+  BPI.reset();
+  // When profile data is available, we need to update edge weights after
+  // successful jump threading, which requires both BPI and BFI being available.
+  HasProfileData = F.getEntryCount().hasValue();
+  if (HasProfileData) {
+    LoopInfo LI{DominatorTree(F)};
+    BPI.reset(new BranchProbabilityInfo(F, LI));
+    BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+  }
 
   // Remove unreachable blocks from function as they may result in infinite
   // loop. We do threading if we found something profitable. Jump threading a
   // branch can create other opportunities. If these opportunities form a cycle
-  // i.e. if any jump treading is undoing previous threading in the path, then
+  // i.e. if any jump threading is undoing previous threading in the path, then
   // we will loop forever. We take care of this issue by not jump threading for
   // back edges. This works for normal cases but not for unreachable blocks as
   // they may have cycle with no back edge.
@@ -176,7 +218,7 @@ bool JumpThreading::runOnFunction(Function &F) {
   do {
     Changed = false;
     for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
-      BasicBlock *BB = I;
+      BasicBlock *BB = &*I;
       // Thread all of the branches we can over this block.
       while (ProcessBlock(BB))
         Changed = true;
@@ -239,11 +281,26 @@ bool JumpThreading::runOnFunction(Function &F) {
 static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
                                              unsigned Threshold) {
   /// Ignore PHI nodes, these will be flattened when duplication happens.
-  BasicBlock::const_iterator I = BB->getFirstNonPHI();
+  BasicBlock::const_iterator I(BB->getFirstNonPHI());
 
   // FIXME: THREADING will delete values that are just used to compute the
   // branch, so they shouldn't count against the duplication cost.
 
+  unsigned Bonus = 0;
+  const TerminatorInst *BBTerm = BB->getTerminator();
+  // Threading through a switch statement is particularly profitable.  If this
+  // block ends in a switch, decrease its cost to make it more likely to happen.
+  if (isa<SwitchInst>(BBTerm))
+    Bonus = 6;
+
+  // The same holds for indirect branches, but slightly more so.
+  if (isa<IndirectBrInst>(BBTerm))
+    Bonus = 8;
+
+  // Bump the threshold up so the early exit from the loop doesn't skip the
+  // terminator-based Size adjustment at the end.
+  Threshold += Bonus;
+
   // Sum up the cost of each instruction until we get to the terminator.  Don't
   // include the terminator because the copy won't include it.
   unsigned Size = 0;
@@ -260,6 +317,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
     if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
       continue;
 
+    // Bail out if this instruction gives back a token type, it is not possible
+    // to duplicate it if it is used outside this BB.
+    if (I->getType()->isTokenTy() && I->isUsedOutsideOfBlock(BB))
+      return ~0U;
+
     // All other instructions count for at least one unit.
     ++Size;
 
@@ -268,7 +330,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
     // as having cost of 2 total, and if they are a vector intrinsic, we model
     // them as having cost 1.
     if (const CallInst *CI = dyn_cast<CallInst>(I)) {
-      if (CI->cannotDuplicate())
+      if (CI->cannotDuplicate() || CI->isConvergent())
         // Blocks with NoDuplicate are modelled as having infinite cost, so they
         // are never duplicated.
         return ~0U;
@@ -279,16 +341,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
     }
   }
 
-  // Threading through a switch statement is particularly profitable.  If this
-  // block ends in a switch, decrease its cost to make it more likely to happen.
-  if (isa<SwitchInst>(I))
-    Size = Size > 6 ? Size-6 : 0;
-
-  // The same holds for indirect branches, but slightly more so.
-  if (isa<IndirectBrInst>(I))
-    Size = Size > 8 ? Size-8 : 0;
-
-  return Size;
+  return Size > Bonus ? Size - Bonus : 0;
 }
 
 /// FindLoopHeaders - We do not want jump threading to turn proper loop
@@ -669,7 +722,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
   // because now the condition in this block can be threaded through
   // predecessors of our predecessor block.
   if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
-    if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+    const TerminatorInst *TI = SinglePred->getTerminator();
+    if (!TI->isExceptional() && TI->getNumSuccessors() == 1 &&
         SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
       // If SinglePred was a loop header, BB becomes one.
       if (LoopHeaders.erase(SinglePred))
@@ -761,7 +815,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
     // If we're branching on a conditional, LVI might be able to determine
     // it's value at the branch instruction.  We only handle comparisons
     // against a constant at this time.
-    // TODO: This should be extended to handle switches as well.  
+    // TODO: This should be extended to handle switches as well.
     BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
     Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
     if (CondBr && CondConst && CondBr->isConditional()) {
@@ -829,9 +883,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
       CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
     return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
 
+  // Search for a stronger dominating condition that can be used to simplify a
+  // conditional branch leaving BB.
+  if (ProcessImpliedCondition(BB))
+    return true;
 
-  // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
-  // "(X == 4)", thread through this block.
+  return false;
+}
+
+bool JumpThreading::ProcessImpliedCondition(BasicBlock *BB) {
+  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isConditional())
+    return false;
+
+  Value *Cond = BI->getCondition();
+  BasicBlock *CurrentBB = BB;
+  BasicBlock *CurrentPred = BB->getSinglePredecessor();
+  unsigned Iter = 0;
+
+  auto &DL = BB->getModule()->getDataLayout();
+
+  while (CurrentPred && Iter++ < ImplicationSearchThreshold) {
+    auto *PBI = dyn_cast<BranchInst>(CurrentPred->getTerminator());
+    if (!PBI || !PBI->isConditional() || PBI->getSuccessor(0) != CurrentBB)
+      return false;
+
+    if (isImpliedCondition(PBI->getCondition(), Cond, DL)) {
+      BI->getSuccessor(1)->removePredecessor(BB);
+      BranchInst::Create(BI->getSuccessor(0), BI);
+      BI->eraseFromParent();
+      return true;
+    }
+    CurrentBB = CurrentPred;
+    CurrentPred = CurrentBB->getSinglePredecessor();
+  }
 
   return false;
 }
@@ -850,10 +935,10 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   if (LoadBB->getSinglePredecessor())
     return false;
 
-  // If the load is defined in a landing pad, it can't be partially redundant,
-  // because the edges between the invoke and the landing pad cannot have other
+  // If the load is defined in an EH pad, it can't be partially redundant,
+  // because the edges between the invoke and the EH pad cannot have other
   // instructions between them.
-  if (LoadBB->isLandingPad())
+  if (LoadBB->isEHPad())
     return false;
 
   Value *LoadedPtr = LI->getOperand(0);
@@ -866,11 +951,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
   // Scan a few instructions up from the load, to see if it is obviously live at
   // the entry to its block.
-  BasicBlock::iterator BBIt = LI;
+  BasicBlock::iterator BBIt(LI);
 
   if (Value *AvailableVal =
-        FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
-    // If the value if the load is locally available within the block, just use
+        FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, DefMaxInstsToScan)) {
+    // If the value of the load is locally available within the block, just use
     // it.  This frequently occurs for reg2mem'd allocas.
     //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
 
@@ -914,7 +999,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     // Scan the predecessor to see if the value is available in the pred.
     BBIt = PredBB->end();
     AAMDNodes ThisAATags;
-    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
+    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt,
+                                                    DefMaxInstsToScan,
                                                     nullptr, &ThisAATags);
     if (!PredAvailable) {
       OneUnavailablePred = PredBB;
@@ -968,8 +1054,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     }
 
     // Split them out to their own block.
-    UnavailablePred =
-      SplitBlockPredecessors(LoadBB, PredsToSplit, "thread-pre-split");
+    UnavailablePred = SplitBlockPreds(LoadBB, PredsToSplit, "thread-pre-split");
   }
 
   // If the value isn't available in all predecessors, then there will be
@@ -995,7 +1080,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   // Create a PHI node at the start of the block for the PRE'd load value.
   pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB);
   PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "",
-                                LoadBB->begin());
+                                &LoadBB->front());
   PN->takeName(LI);
   PN->setDebugLoc(LI->getDebugLoc());
 
@@ -1262,7 +1347,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
   // Into:
   //  BB':
   //    %Y = icmp ne i32 %A, %B
-  //    br i1 %Z, ...
+  //    br i1 %Y, ...
 
   PredValueInfoTy XorOpValues;
   bool isLHS = true;
@@ -1387,14 +1472,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     return false;
   }
 
-  // And finally, do it!  Start by factoring the predecessors is needed.
+  // And finally, do it!  Start by factoring the predecessors if needed.
   BasicBlock *PredBB;
   if (PredBBs.size() == 1)
     PredBB = PredBBs[0];
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+    PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
   }
 
   // And finally, do it!
@@ -1415,6 +1500,13 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
                                          BB->getParent(), BB);
   NewBB->moveAfter(PredBB);
 
+  // Set the block frequency of NewBB.
+  if (HasProfileData) {
+    auto NewBBFreq =
+        BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
+    BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
+  }
+
   BasicBlock::iterator BI = BB->begin();
   for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
     ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
@@ -1425,7 +1517,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     Instruction *New = BI->clone();
     New->setName(BI->getName());
     NewBB->getInstList().push_back(New);
-    ValueMapping[BI] = New;
+    ValueMapping[&*BI] = New;
 
     // Remap operands to patch up intra-block references.
     for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -1438,7 +1530,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
 
   // We didn't copy the terminator from BB over to NewBB, because there is now
   // an unconditional jump to SuccBB.  Insert the unconditional jump.
-  BranchInst *NewBI =BranchInst::Create(SuccBB, NewBB);
+  BranchInst *NewBI = BranchInst::Create(SuccBB, NewBB);
   NewBI->setDebugLoc(BB->getTerminator()->getDebugLoc());
 
   // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
@@ -1475,8 +1567,8 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
     SSAUpdate.Initialize(I->getType(), I->getName());
-    SSAUpdate.AddAvailableValue(BB, I);
-    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+    SSAUpdate.AddAvailableValue(BB, &*I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
 
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1499,11 +1591,98 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
   // frequently happens because of phi translation.
   SimplifyInstructionsInBlock(NewBB, TLI);
 
+  // Update the edge weight from BB to SuccBB, which should be less than before.
+  UpdateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+
   // Threaded an edge!
   ++NumThreads;
   return true;
 }
 
+/// Create a new basic block that will be the predecessor of BB and successor of
+/// all blocks in Preds. When profile data is availble, update the frequency of
+/// this new block.
+BasicBlock *JumpThreading::SplitBlockPreds(BasicBlock *BB,
+                                           ArrayRef<BasicBlock *> Preds,
+                                           const char *Suffix) {
+  // Collect the frequencies of all predecessors of BB, which will be used to
+  // update the edge weight on BB->SuccBB.
+  BlockFrequency PredBBFreq(0);
+  if (HasProfileData)
+    for (auto Pred : Preds)
+      PredBBFreq += BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB);
+
+  BasicBlock *PredBB = SplitBlockPredecessors(BB, Preds, Suffix);
+
+  // Set the block frequency of the newly created PredBB, which is the sum of
+  // frequencies of Preds.
+  if (HasProfileData)
+    BFI->setBlockFreq(PredBB, PredBBFreq.getFrequency());
+  return PredBB;
+}
+
+/// Update the block frequency of BB and branch weight and the metadata on the
+/// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
+/// Freq(PredBB->BB) / Freq(BB->SuccBB).
+void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
+                                                 BasicBlock *BB,
+                                                 BasicBlock *NewBB,
+                                                 BasicBlock *SuccBB) {
+  if (!HasProfileData)
+    return;
+
+  assert(BFI && BPI && "BFI & BPI should have been created here");
+
+  // As the edge from PredBB to BB is deleted, we have to update the block
+  // frequency of BB.
+  auto BBOrigFreq = BFI->getBlockFreq(BB);
+  auto NewBBFreq = BFI->getBlockFreq(NewBB);
+  auto BB2SuccBBFreq = BBOrigFreq * BPI->getEdgeProbability(BB, SuccBB);
+  auto BBNewFreq = BBOrigFreq - NewBBFreq;
+  BFI->setBlockFreq(BB, BBNewFreq.getFrequency());
+
+  // Collect updated outgoing edges' frequencies from BB and use them to update
+  // edge probabilities.
+  SmallVector<uint64_t, 4> BBSuccFreq;
+  for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
+    auto SuccFreq = (*I == SuccBB)
+                        ? BB2SuccBBFreq - NewBBFreq
+                        : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+    BBSuccFreq.push_back(SuccFreq.getFrequency());
+  }
+
+  uint64_t MaxBBSuccFreq =
+      *std::max_element(BBSuccFreq.begin(), BBSuccFreq.end());
+
+  SmallVector<BranchProbability, 4> BBSuccProbs;
+  if (MaxBBSuccFreq == 0)
+    BBSuccProbs.assign(BBSuccFreq.size(),
+                       {1, static_cast<unsigned>(BBSuccFreq.size())});
+  else {
+    for (uint64_t Freq : BBSuccFreq)
+      BBSuccProbs.push_back(
+          BranchProbability::getBranchProbability(Freq, MaxBBSuccFreq));
+    // Normalize edge probabilities so that they sum up to one.
+    BranchProbability::normalizeProbabilities(BBSuccProbs.begin(),
+                                              BBSuccProbs.end());
+  }
+
+  // Update edge probabilities in BPI.
+  for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
+    BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
+
+  if (BBSuccProbs.size() >= 2) {
+    SmallVector<uint32_t, 4> Weights;
+    for (auto Prob : BBSuccProbs)
+      Weights.push_back(Prob.getNumerator());
+
+    auto TI = BB->getTerminator();
+    TI->setMetadata(
+        LLVMContext::MD_prof,
+        MDBuilder(TI->getParent()->getContext()).createBranchWeights(Weights));
+  }
+}
+
 /// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
 /// to BB which contains an i1 PHI node and a conditional branch on that PHI.
 /// If we can duplicate the contents of BB up into PredBB do so now, this
@@ -1530,14 +1709,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     return false;
   }
 
-  // And finally, do it!  Start by factoring the predecessors is needed.
+  // And finally, do it!  Start by factoring the predecessors if needed.
   BasicBlock *PredBB;
   if (PredBBs.size() == 1)
     PredBB = PredBBs[0];
   else {
     DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
           << " common predecessors.\n");
-    PredBB = SplitBlockPredecessors(BB, PredBBs, ".thr_comm");
+    PredBB = SplitBlockPreds(BB, PredBBs, ".thr_comm");
   }
 
   // Okay, we decided to do this!  Clone all the instructions in BB onto the end
@@ -1581,12 +1760,12 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     if (Value *IV =
             SimplifyInstruction(New, BB->getModule()->getDataLayout())) {
       delete New;
-      ValueMapping[BI] = IV;
+      ValueMapping[&*BI] = IV;
     } else {
       // Otherwise, insert the new instruction into the block.
       New->setName(BI->getName());
-      PredBB->getInstList().insert(OldPredBranch, New);
-      ValueMapping[BI] = New;
+      PredBB->getInstList().insert(OldPredBranch->getIterator(), New);
+      ValueMapping[&*BI] = New;
     }
   }
 
@@ -1628,8 +1807,8 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
     // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
     // with the two values we know.
     SSAUpdate.Initialize(I->getType(), I->getName());
-    SSAUpdate.AddAvailableValue(BB, I);
-    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+    SSAUpdate.AddAvailableValue(BB, &*I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
 
     while (!UsesToRename.empty())
       SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 43fc50e588f8..6d70cdc3ade2 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -34,10 +34,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
@@ -118,9 +121,12 @@ namespace {
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<AliasAnalysis>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<AAResultsWrapperPass>();
+      AU.addPreserved<AAResultsWrapperPass>();
+      AU.addPreserved<BasicAAWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
+      AU.addPreserved<SCEVAAWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
@@ -164,9 +170,12 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 
 Pass *llvm::createLICMPass() { return new LICM(); }
@@ -183,7 +192,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Get our Loop and Alias Analysis information...
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
@@ -264,9 +273,10 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     // FIXME: This is really heavy handed. It would be a bit better to use an
     // SSAUpdater strategy during promotion that was LCSSA aware and reformed
     // it as it went.
-    if (Changed)
-      formLCSSARecursively(*L, *DT, LI,
-                           getAnalysisIfAvailable<ScalarEvolution>());
+    if (Changed) {
+      auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+      formLCSSARecursively(*L, *DT, LI, SEWP ? &SEWP->getSE() : nullptr);
+    }
   }
 
   // Check that neither this loop nor its parent have had LCSSA broken. LICM is
@@ -402,7 +412,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
 }
 
 /// Computes loop safety information, checks loop body & header
-/// for the possiblity of may throw exception.
+/// for the possibility of may throw exception.
 ///
 void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
   assert(CurLoop != nullptr && "CurLoop cant be null");
@@ -410,7 +420,7 @@ void llvm::computeLICMSafetyInfo(LICMSafetyInfo * SafetyInfo, Loop * CurLoop) {
   // Setting default safety values.
   SafetyInfo->MayThrow = false;
   SafetyInfo->HeaderMayThrow = false;
-  // Iterate over header and compute dafety info.
+  // Iterate over header and compute safety info.
   for (BasicBlock::iterator I = Header->begin(), E = Header->end();
        (I != E) && !SafetyInfo->HeaderMayThrow; ++I)
     SafetyInfo->HeaderMayThrow |= I->mayThrow();
@@ -445,7 +455,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
-      Size = AA->getTypeStoreSize(LI->getType());
+      Size = I.getModule()->getDataLayout().getTypeStoreSize(LI->getType());
 
     AAMDNodes AAInfo;
     LI->getAAMetadata(AAInfo);
@@ -457,10 +467,21 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
       return false;
 
     // Handle simple cases by querying alias analysis.
-    AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
-    if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+    FunctionModRefBehavior Behavior = AA->getModRefBehavior(CI);
+    if (Behavior == FMRB_DoesNotAccessMemory)
       return true;
     if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+      // A readonly argmemonly function only reads from memory pointed to by
+      // it's arguments with arbitrary offsets.  If we can prove there are no
+      // writes to this memory in the loop, we can hoist or sink.
+      if (AliasAnalysis::onlyAccessesArgPointees(Behavior)) {
+        for (Value *Op : CI->arg_operands())
+          if (Op->getType()->isPointerTy() &&
+              pointerInvalidatedByLoop(Op, MemoryLocation::UnknownSize,
+                                       AAMDNodes(), CurAST))
+            return false;
+        return true;
+      }
       // If this call only reads from memory and there are no writes to memory
       // in the loop, we can hoist or sink the call as appropriate.
       bool FoundMod = false;
@@ -566,7 +587,7 @@ static Instruction *CloneInstructionInExitBlock(const Instruction &I,
         if (!OLoop->contains(&PN)) {
           PHINode *OpPN =
               PHINode::Create(OInst->getType(), PN.getNumIncomingValues(),
-                              OInst->getName() + ".lcssa", ExitBlock.begin());
+                              OInst->getName() + ".lcssa", &ExitBlock.front());
           for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
             OpPN->addIncoming(OInst, PN.getIncomingBlock(i));
           *OI = OpPN;
@@ -651,6 +672,10 @@ static bool hoist(Instruction &I, BasicBlock *Preheader) {
   // Move the new node to the Preheader, before its terminator.
   I.moveBefore(Preheader->getTerminator());
 
+  // Metadata can be dependent on the condition we are hoisting above.
+  // Conservatively strip all metadata on the instruction.
+  I.dropUnknownNonDebugMetadata();
+
   if (isa<LoadInst>(I)) ++NumMovedLoads;
   else if (isa<CallInst>(I)) ++NumMovedCalls;
   ++NumHoisted;
@@ -730,9 +755,9 @@ namespace {
           if (!L->contains(BB)) {
             // We need to create an LCSSA PHI node for the incoming value and
             // store that.
-            PHINode *PN = PHINode::Create(
-                I->getType(), PredCache.size(BB),
-                I->getName() + ".lcssa", BB->begin());
+            PHINode *PN =
+                PHINode::Create(I->getType(), PredCache.size(BB),
+                                I->getName() + ".lcssa", &BB->front());
             for (BasicBlock *Pred : PredCache.get(BB))
               PN->addIncoming(I, Pred);
             return PN;
@@ -942,7 +967,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
     CurLoop->getUniqueExitBlocks(ExitBlocks);
     InsertPts.resize(ExitBlocks.size());
     for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-      InsertPts[i] = ExitBlocks[i]->getFirstInsertionPt();
+      InsertPts[i] = &*ExitBlocks[i]->getFirstInsertionPt();
   }
 
   // We use the SSAUpdater interface to insert phi nodes as required.
@@ -973,7 +998,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
   return Changed;
 }
 
-/// Simple Analysis hook. Clone alias set info.
+/// Simple analysis hook. Clone alias set info.
 ///
 void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
   AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt
index deea9e2d0102..8a99df86b84a 100644
--- a/lib/Transforms/Scalar/LLVMBuild.txt
+++ b/lib/Transforms/Scalar/LLVMBuild.txt
@@ -20,4 +20,4 @@ type = Library
 name = Scalar
 parent = Transforms
 library_name = ScalarOpts
-required_libraries = Analysis Core InstCombine ProfileData Support TransformUtils
+required_libraries = Analysis Core InstCombine Support TransformUtils
diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp
index c19cd19059b2..1648878b0628 100644
--- a/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/lib/Transforms/Scalar/LoadCombine.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -56,7 +57,7 @@ class LoadCombine : public BasicBlockPass {
 
 public:
   LoadCombine() : BasicBlockPass(ID), C(nullptr), AA(nullptr) {
-    initializeSROAPass(*PassRegistry::getPassRegistry());
+    initializeLoadCombinePass(*PassRegistry::getPassRegistry());
   }
   
   using llvm::Pass::doInitialization;
@@ -223,7 +224,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
   if (skipOptnoneFunction(BB))
     return false;
 
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   IRBuilder<true, TargetFolder> TheBuilder(
       BB.getContext(), TargetFolder(BB.getModule()->getDataLayout()));
@@ -262,8 +263,8 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
 void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
 
-  AU.addRequired<AliasAnalysis>();
-  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<AAResultsWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
 }
 
 char LoadCombine::ID = 0;
@@ -274,7 +275,8 @@ BasicBlockPass *llvm::createLoadCombinePass() {
 
 INITIALIZE_PASS_BEGIN(LoadCombine, "load-combine", "Combine Adjacent Loads",
                       false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(LoadCombine, "load-combine", "Combine Adjacent Loads",
                     false, false)
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 98b068edf582..bc00ff3f3a42 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Dominators.h"
@@ -35,18 +36,19 @@ namespace {
     }
 
     // Possibly eliminate loop L if it is dead.
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+    bool runOnLoop(Loop *L, LPPassManager &) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
 
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
     }
@@ -64,7 +66,7 @@ INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
                 "Delete dead loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
@@ -130,7 +132,7 @@ bool LoopDeletion::isLoopDead(Loop *L,
 /// so could change the halting/non-halting nature of a program.
 /// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
 /// in order to make various safety checks work.
-bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
   if (skipOptnoneFunction(L))
     return false;
 
@@ -169,7 +171,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Don't remove loops for which we can't solve the trip count.
   // They could be infinite, in which case we'd be changing program behavior.
-  ScalarEvolution &SE = getAnalysis<ScalarEvolution>();
+  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   const SCEV *S = SE.getMaxBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(S))
     return Changed;
@@ -242,9 +244,8 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &LPM) {
   for (BasicBlock *BB : blocks)
     loopInfo.removeBlock(BB);
 
-  // The last step is to inform the loop pass manager that we've
-  // eliminated this loop.
-  LPM.deleteLoopFromQueue(L);
+  // The last step is to update LoopInfo now that we've eliminated this loop.
+  loopInfo.updateUnloop(L);
   Changed = true;
 
   ++NumDeleted;
diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp
index 1b9859b57790..3d3cf3e2890b 100644
--- a/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include <list>
 
@@ -54,6 +55,11 @@ static cl::opt<bool> DistributeNonIfConvertible(
              "if-convertible by the loop vectorizer"),
     cl::init(false));
 
+static cl::opt<unsigned> DistributeSCEVCheckThreshold(
+    "loop-distribute-scev-check-threshold", cl::init(8), cl::Hidden,
+    cl::desc("The maximum number of SCEV checks allowed for Loop "
+             "Distribution"));
+
 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
 
 namespace {
@@ -164,9 +170,7 @@ public:
 
     // Delete the instructions backwards, as it has a reduced likelihood of
     // having to update as many def-use and use-def chains.
-    for (auto I = Unused.rbegin(), E = Unused.rend(); I != E; ++I) {
-      auto *Inst = *I;
-
+    for (auto *Inst : make_range(Unused.rbegin(), Unused.rend())) {
       if (!Inst->use_empty())
         Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
       Inst->eraseFromParent();
@@ -373,7 +377,7 @@ public:
 
   /// \brief This performs the main chunk of the work of cloning the loops for
   /// the partitions.
-  void cloneLoops(Pass *P) {
+  void cloneLoops() {
     BasicBlock *OrigPH = L->getLoopPreheader();
     // At this point the predecessor of the preheader is either the memcheck
     // block or the top part of the original preheader.
@@ -547,11 +551,11 @@ public:
 
   MemoryInstructionDependences(
       const SmallVectorImpl<Instruction *> &Instructions,
-      const SmallVectorImpl<Dependence> &InterestingDependences) {
+      const SmallVectorImpl<Dependence> &Dependences) {
     Accesses.append(Instructions.begin(), Instructions.end());
 
     DEBUG(dbgs() << "Backward dependences:\n");
-    for (auto &Dep : InterestingDependences)
+    for (auto &Dep : Dependences)
       if (Dep.isPossiblyBackward()) {
         // Note that the designations source and destination follow the program
         // order, i.e. source is always first.  (The direction is given by the
@@ -567,25 +571,6 @@ private:
   AccessesType Accesses;
 };
 
-/// \brief Returns the instructions that use values defined in the loop.
-static SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L) {
-  SmallVector<Instruction *, 8> UsedOutside;
-
-  for (auto *Block : L->getBlocks())
-    // FIXME: I believe that this could use copy_if if the Inst reference could
-    // be adapted into a pointer.
-    for (auto &Inst : *Block) {
-      auto Users = Inst.users();
-      if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
-            auto *Use = cast<Instruction>(U);
-            return !L->contains(Use->getParent());
-          }))
-        UsedOutside.push_back(&Inst);
-    }
-
-  return UsedOutside;
-}
-
 /// \brief The pass class.
 class LoopDistribute : public FunctionPass {
 public:
@@ -597,6 +582,7 @@ public:
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     LAA = &getAnalysis<LoopAccessAnalysis>();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
 
     // Build up a worklist of inner-loops to vectorize. This is necessary as the
     // act of distributing a loop creates new loops and can invalidate iterators
@@ -619,6 +605,7 @@ public:
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addRequired<LoopAccessAnalysis>();
@@ -629,6 +616,45 @@ public:
   static char ID;
 
 private:
+  /// \brief Filter out checks between pointers from the same partition.
+  ///
+  /// \p PtrToPartition contains the partition number for pointers.  Partition
+  /// number -1 means that the pointer is used in multiple partitions.  In this
+  /// case we can't safely omit the check.
+  SmallVector<RuntimePointerChecking::PointerCheck, 4>
+  includeOnlyCrossPartitionChecks(
+      const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &AllChecks,
+      const SmallVectorImpl<int> &PtrToPartition,
+      const RuntimePointerChecking *RtPtrChecking) {
+    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+    std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+                 [&](const RuntimePointerChecking::PointerCheck &Check) {
+                   for (unsigned PtrIdx1 : Check.first->Members)
+                     for (unsigned PtrIdx2 : Check.second->Members)
+                       // Only include this check if there is a pair of pointers
+                       // that require checking and the pointers fall into
+                       // separate partitions.
+                       //
+                       // (Note that we already know at this point that the two
+                       // pointer groups need checking but it doesn't follow
+                       // that each pair of pointers within the two groups need
+                       // checking as well.
+                       //
+                       // In other words we don't want to include a check just
+                       // because there is a pair of pointers between the two
+                       // pointer groups that require checks and a different
+                       // pair whose pointers fall into different partitions.)
+                       if (RtPtrChecking->needsChecking(PtrIdx1, PtrIdx2) &&
+                           !RuntimePointerChecking::arePointersInSamePartition(
+                               PtrToPartition, PtrIdx1, PtrIdx2))
+                         return true;
+                   return false;
+                 });
+
+    return Checks;
+  }
+
   /// \brief Try to distribute an inner-most loop.
   bool processLoop(Loop *L) {
     assert(L->empty() && "Only process inner loops.");
@@ -655,9 +681,8 @@ private:
       DEBUG(dbgs() << "Skipping; memory operations are safe for vectorization");
       return false;
     }
-    auto *InterestingDependences =
-        LAI.getDepChecker().getInterestingDependences();
-    if (!InterestingDependences || InterestingDependences->empty()) {
+    auto *Dependences = LAI.getDepChecker().getDependences();
+    if (!Dependences || Dependences->empty()) {
       DEBUG(dbgs() << "Skipping; No unsafe dependences to isolate");
       return false;
     }
@@ -685,7 +710,7 @@ private:
     // NumUnsafeDependencesActive reaches 0.
     const MemoryDepChecker &DepChecker = LAI.getDepChecker();
     MemoryInstructionDependences MID(DepChecker.getMemoryInstructions(),
-                                     *InterestingDependences);
+                                     *Dependences);
 
     int NumUnsafeDependencesActive = 0;
     for (auto &InstDep : MID) {
@@ -735,6 +760,13 @@ private:
         return false;
     }
 
+    // Don't distribute the loop if we need too many SCEV run-time checks.
+    const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+    if (Pred.getComplexity() > DistributeSCEVCheckThreshold) {
+      DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+      return false;
+    }
+
     DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
     // We're done forming the partitions set up the reverse mapping from
     // instructions to partitions.
@@ -746,20 +778,25 @@ private:
     if (!PH->getSinglePredecessor() || &*PH->begin() != PH->getTerminator())
       SplitBlock(PH, PH->getTerminator(), DT, LI);
 
-    // If we need run-time checks to disambiguate pointers are run-time, version
-    // the loop now.
+    // If we need run-time checks, version the loop now.
     auto PtrToPartition = Partitions.computePartitionSetForPointers(LAI);
-    LoopVersioning LVer(LAI, L, LI, DT, &PtrToPartition);
-    if (LVer.needsRuntimeChecks()) {
+    const auto *RtPtrChecking = LAI.getRuntimePointerChecking();
+    const auto &AllChecks = RtPtrChecking->getChecks();
+    auto Checks = includeOnlyCrossPartitionChecks(AllChecks, PtrToPartition,
+                                                  RtPtrChecking);
+
+    if (!Pred.isAlwaysTrue() || !Checks.empty()) {
       DEBUG(dbgs() << "\nPointers:\n");
-      DEBUG(LAI.getRuntimePointerChecking()->print(dbgs(), 0, &PtrToPartition));
-      LVer.versionLoop(this);
-      LVer.addPHINodes(DefsUsedOutside);
+      DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+      LoopVersioning LVer(LAI, L, LI, DT, SE, false);
+      LVer.setAliasChecks(std::move(Checks));
+      LVer.setSCEVChecks(LAI.PSE.getUnionPredicate());
+      LVer.versionLoop(DefsUsedOutside);
     }
 
     // Create identical copies of the original loop for each partition and hook
     // them up sequentially.
-    Partitions.cloneLoops(this);
+    Partitions.cloneLoops();
 
     // Now, we remove the instruction from each loop that don't belong to that
     // partition.
@@ -780,6 +817,7 @@ private:
   LoopInfo *LI;
   LoopAccessAnalysis *LAA;
   DominatorTree *DT;
+  ScalarEvolution *SE;
 };
 } // anonymous namespace
 
@@ -790,6 +828,7 @@ INITIALIZE_PASS_BEGIN(LoopDistribute, LDIST_NAME, ldist_name, false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(LoopDistribute, LDIST_NAME, ldist_name, false, false)
 
 namespace llvm {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index a21ca2417ca1..2d577de7c2b8 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -31,11 +31,6 @@
 //   void foo(_Complex float *P)
 //     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
 //
-// We should enhance this to handle negative strides through memory.
-// Alternatively (and perhaps better) we could rely on an earlier pass to force
-// forward iteration through memory, which is generally better for cache
-// behavior.  Negative strides *do* happen for memset/memcpy loops.
-//
 // This could recognize common matrix multiplies and dot product idioms and
 // replace them with calls to BLAS (if linked in??).
 //
@@ -44,7 +39,10 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -67,149 +65,85 @@ STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 
 namespace {
 
-  class LoopIdiomRecognize;
+class LoopIdiomRecognize : public LoopPass {
+  Loop *CurLoop;
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+  TargetLibraryInfo *TLI;
+  const TargetTransformInfo *TTI;
+  const DataLayout *DL;
 
-  /// This class defines some utility functions for loop idiom recognization.
-  class LIRUtil {
-  public:
-    /// Return true iff the block contains nothing but an uncondition branch
-    /// (aka goto instruction).
-    static bool isAlmostEmpty(BasicBlock *);
+public:
+  static char ID;
+  explicit LoopIdiomRecognize() : LoopPass(ID) {
+    initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+  }
 
-    static BranchInst *getBranch(BasicBlock *BB) {
-      return dyn_cast<BranchInst>(BB->getTerminator());
-    }
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 
-    /// Derive the precondition block (i.e the block that guards the loop
-    /// preheader) from the given preheader.
-    static BasicBlock *getPrecondBb(BasicBlock *PreHead);
-  };
+  /// This transformation requires natural loop information & requires that
+  /// loop preheaders be inserted into the CFG.
+  ///
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addPreservedID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addPreservedID(LCSSAID);
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<SCEVAAWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addPreserved<BasicAAWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
 
-  /// This class is to recoginize idioms of population-count conducted in
-  /// a noncountable loop. Currently it only recognizes this pattern:
-  /// \code
-  ///   while(x) {cnt++; ...; x &= x - 1; ...}
-  /// \endcode
-  class NclPopcountRecognize {
-    LoopIdiomRecognize &LIR;
-    Loop *CurLoop;
-    BasicBlock *PreCondBB;
+private:
+  typedef SmallVector<StoreInst *, 8> StoreList;
+  StoreList StoreRefs;
 
-    typedef IRBuilder<> IRBuilderTy;
+  /// \name Countable Loop Idiom Handling
+  /// @{
 
-  public:
-    explicit NclPopcountRecognize(LoopIdiomRecognize &TheLIR);
-    bool recognize();
+  bool runOnCountableLoop();
+  bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                      SmallVectorImpl<BasicBlock *> &ExitBlocks);
 
-  private:
-    /// Take a glimpse of the loop to see if we need to go ahead recoginizing
-    /// the idiom.
-    bool preliminaryScreen();
+  void collectStores(BasicBlock *BB);
+  bool isLegalStore(StoreInst *SI);
+  bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+  bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
 
-    /// Check if the given conditional branch is based on the comparison
-    /// between a variable and zero, and if the variable is non-zero, the
-    /// control yields to the loop entry. If the branch matches the behavior,
-    /// the variable involved in the comparion is returned. This function will
-    /// be called to see if the precondition and postcondition of the loop
-    /// are in desirable form.
-    Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const;
+  bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                               unsigned StoreAlignment, Value *SplatValue,
+                               Instruction *TheStore, const SCEVAddRecExpr *Ev,
+                               const SCEV *BECount, bool NegStride);
+  bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                                  const SCEVAddRecExpr *StoreEv,
+                                  const SCEV *BECount, bool NegStride);
 
-    /// Return true iff the idiom is detected in the loop. and 1) \p CntInst
-    /// is set to the instruction counting the population bit. 2) \p CntPhi
-    /// is set to the corresponding phi node. 3) \p Var is set to the value
-    /// whose population bits are being counted.
-    bool detectIdiom
-      (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const;
+  /// @}
+  /// \name Noncountable Loop Idiom Handling
+  /// @{
 
-    /// Insert ctpop intrinsic function and some obviously dead instructions.
-    void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var);
+  bool runOnNoncountableLoop();
 
-    /// Create llvm.ctpop.* intrinsic function.
-    CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL);
-  };
+  bool recognizePopcount();
+  void transformLoopToPopcount(BasicBlock *PreCondBB, Instruction *CntInst,
+                               PHINode *CntPhi, Value *Var);
 
-  class LoopIdiomRecognize : public LoopPass {
-    Loop *CurLoop;
-    DominatorTree *DT;
-    ScalarEvolution *SE;
-    TargetLibraryInfo *TLI;
-    const TargetTransformInfo *TTI;
-  public:
-    static char ID;
-    explicit LoopIdiomRecognize() : LoopPass(ID) {
-      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
-      DT = nullptr;
-      SE = nullptr;
-      TLI = nullptr;
-      TTI = nullptr;
-    }
+  /// @}
+};
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
-                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
-
-    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
-    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
-
-    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
-                                 unsigned StoreAlignment,
-                                 Value *SplatValue, Instruction *TheStore,
-                                 const SCEVAddRecExpr *Ev,
-                                 const SCEV *BECount);
-    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
-                                    const SCEVAddRecExpr *StoreEv,
-                                    const SCEVAddRecExpr *LoadEv,
-                                    const SCEV *BECount);
-
-    /// This transformation requires natural loop information & requires that
-    /// loop preheaders be inserted into the CFG.
-    ///
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addRequiredID(LCSSAID);
-      AU.addPreservedID(LCSSAID);
-      AU.addRequired<AliasAnalysis>();
-      AU.addPreserved<AliasAnalysis>();
-      AU.addRequired<ScalarEvolution>();
-      AU.addPreserved<ScalarEvolution>();
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<TargetLibraryInfoWrapperPass>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
-
-    DominatorTree *getDominatorTree() {
-      return DT ? DT
-                : (DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree());
-    }
-
-    ScalarEvolution *getScalarEvolution() {
-      return SE ? SE : (SE = &getAnalysis<ScalarEvolution>());
-    }
-
-    TargetLibraryInfo *getTargetLibraryInfo() {
-      if (!TLI)
-        TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-      return TLI;
-    }
-
-    const TargetTransformInfo *getTargetTransformInfo() {
-      return TTI ? TTI
-                 : (TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-                        *CurLoop->getHeader()->getParent()));
-    }
-
-    Loop *getLoop() const { return CurLoop; }
-
-  private:
-    bool runOnNoncountableLoop();
-    bool runOnCountableLoop();
-  };
-}
+} // End anonymous namespace.
 
 char LoopIdiomRecognize::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
@@ -218,9 +152,12 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
                     false, false)
@@ -240,429 +177,17 @@ static void deleteDeadInstruction(Instruction *I,
     RecursivelyDeleteTriviallyDeadInstructions(Op, TLI);
 }
 
-//===----------------------------------------------------------------------===//
-//
-//          Implementation of LIRUtil
-//
-//===----------------------------------------------------------------------===//
-
-// This function will return true iff the given block contains nothing but goto.
-// A typical usage of this function is to check if the preheader function is
-// "almost" empty such that generated intrinsic functions can be moved across
-// the preheader and be placed at the end of the precondition block without
-// the concern of breaking data dependence.
-bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
-  if (BranchInst *Br = getBranch(BB)) {
-    return Br->isUnconditional() && Br == BB->begin();
-  }
-  return false;
-}
-
-BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
-  if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
-    BranchInst *Br = getBranch(BB);
-    return Br && Br->isConditional() ? BB : nullptr;
-  }
-  return nullptr;
-}
-
-//===----------------------------------------------------------------------===//
-//
-//          Implementation of NclPopcountRecognize
-//
-//===----------------------------------------------------------------------===//
-
-NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
-  LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
-}
-
-bool NclPopcountRecognize::preliminaryScreen() {
-  const TargetTransformInfo *TTI = LIR.getTargetTransformInfo();
-  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
-    return false;
-
-  // Counting population are usually conducted by few arithmetic instructions.
-  // Such instructions can be easilly "absorbed" by vacant slots in a
-  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
-  // in a compact loop.
-
-  // Give up if the loop has multiple blocks or multiple backedges.
-  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
-    return false;
-
-  BasicBlock *LoopBody = *(CurLoop->block_begin());
-  if (LoopBody->size() >= 20) {
-    // The loop is too big, bail out.
-    return false;
-  }
-
-  // It should have a preheader containing nothing but a goto instruction.
-  BasicBlock *PreHead = CurLoop->getLoopPreheader();
-  if (!PreHead || !LIRUtil::isAlmostEmpty(PreHead))
-    return false;
-
-  // It should have a precondition block where the generated popcount instrinsic
-  // function will be inserted.
-  PreCondBB = LIRUtil::getPrecondBb(PreHead);
-  if (!PreCondBB)
-    return false;
-
-  return true;
-}
-
-Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
-                                            BasicBlock *LoopEntry) const {
-  if (!Br || !Br->isConditional())
-    return nullptr;
-
-  ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
-  if (!Cond)
-    return nullptr;
-
-  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
-  if (!CmpZero || !CmpZero->isZero())
-    return nullptr;
-
-  ICmpInst::Predicate Pred = Cond->getPredicate();
-  if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
-      (Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
-    return Cond->getOperand(0);
-
-  return nullptr;
-}
-
-bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
-                                       PHINode *&CntPhi,
-                                       Value *&Var) const {
-  // Following code tries to detect this idiom:
-  //
-  //    if (x0 != 0)
-  //      goto loop-exit // the precondition of the loop
-  //    cnt0 = init-val;
-  //    do {
-  //       x1 = phi (x0, x2);
-  //       cnt1 = phi(cnt0, cnt2);
-  //
-  //       cnt2 = cnt1 + 1;
-  //        ...
-  //       x2 = x1 & (x1 - 1);
-  //        ...
-  //    } while(x != 0);
-  //
-  // loop-exit:
-  //
-
-  // step 1: Check to see if the look-back branch match this pattern:
-  //    "if (a!=0) goto loop-entry".
-  BasicBlock *LoopEntry;
-  Instruction *DefX2, *CountInst;
-  Value *VarX1, *VarX0;
-  PHINode *PhiX, *CountPhi;
-
-  DefX2 = CountInst = nullptr;
-  VarX1 = VarX0 = nullptr;
-  PhiX = CountPhi = nullptr;
-  LoopEntry = *(CurLoop->block_begin());
-
-  // step 1: Check if the loop-back branch is in desirable form.
-  {
-    if (Value *T = matchCondition (LIRUtil::getBranch(LoopEntry), LoopEntry))
-      DefX2 = dyn_cast<Instruction>(T);
-    else
-      return false;
-  }
-
-  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
-  {
-    if (!DefX2 || DefX2->getOpcode() != Instruction::And)
-      return false;
-
-    BinaryOperator *SubOneOp;
-
-    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
-      VarX1 = DefX2->getOperand(1);
-    else {
-      VarX1 = DefX2->getOperand(0);
-      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
-    }
-    if (!SubOneOp)
-      return false;
-
-    Instruction *SubInst = cast<Instruction>(SubOneOp);
-    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
-    if (!Dec ||
-        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
-          (SubInst->getOpcode() == Instruction::Add && Dec->isAllOnesValue()))) {
-      return false;
-    }
-  }
-
-  // step 3: Check the recurrence of variable X
-  {
-    PhiX = dyn_cast<PHINode>(VarX1);
-    if (!PhiX ||
-        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
-      return false;
-    }
-  }
-
-  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
-  {
-    CountInst = nullptr;
-    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
-           IterE = LoopEntry->end(); Iter != IterE; Iter++) {
-      Instruction *Inst = Iter;
-      if (Inst->getOpcode() != Instruction::Add)
-        continue;
-
-      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
-      if (!Inc || !Inc->isOne())
-        continue;
-
-      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
-      if (!Phi || Phi->getParent() != LoopEntry)
-        continue;
-
-      // Check if the result of the instruction is live of the loop.
-      bool LiveOutLoop = false;
-      for (User *U : Inst->users()) {
-        if ((cast<Instruction>(U))->getParent() != LoopEntry) {
-          LiveOutLoop = true; break;
-        }
-      }
-
-      if (LiveOutLoop) {
-        CountInst = Inst;
-        CountPhi = Phi;
-        break;
-      }
-    }
-
-    if (!CountInst)
-      return false;
-  }
-
-  // step 5: check if the precondition is in this form:
-  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
-  {
-    BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
-    Value *T = matchCondition (PreCondBr, CurLoop->getLoopPreheader());
-    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
-      return false;
-
-    CntInst = CountInst;
-    CntPhi = CountPhi;
-    Var = T;
-  }
-
-  return true;
-}
-
-void NclPopcountRecognize::transform(Instruction *CntInst,
-                                     PHINode *CntPhi, Value *Var) {
-
-  ScalarEvolution *SE = LIR.getScalarEvolution();
-  TargetLibraryInfo *TLI = LIR.getTargetLibraryInfo();
-  BasicBlock *PreHead = CurLoop->getLoopPreheader();
-  BranchInst *PreCondBr = LIRUtil::getBranch(PreCondBB);
-  const DebugLoc DL = CntInst->getDebugLoc();
-
-  // Assuming before transformation, the loop is following:
-  //  if (x) // the precondition
-  //     do { cnt++; x &= x - 1; } while(x);
-
-  // Step 1: Insert the ctpop instruction at the end of the precondition block
-  IRBuilderTy Builder(PreCondBr);
-  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
-  {
-    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
-    NewCount = PopCntZext =
-      Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
-
-    if (NewCount != PopCnt)
-      (cast<Instruction>(NewCount))->setDebugLoc(DL);
-
-    // TripCnt is exactly the number of iterations the loop has
-    TripCnt = NewCount;
-
-    // If the population counter's initial value is not zero, insert Add Inst.
-    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
-    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
-    if (!InitConst || !InitConst->isZero()) {
-      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
-      (cast<Instruction>(NewCount))->setDebugLoc(DL);
-    }
-  }
-
-  // Step 2: Replace the precondition from "if(x == 0) goto loop-exit" to
-  //   "if(NewCount == 0) loop-exit". Withtout this change, the intrinsic
-  //   function would be partial dead code, and downstream passes will drag
-  //   it back from the precondition block to the preheader.
-  {
-    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
-
-    Value *Opnd0 = PopCntZext;
-    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
-    if (PreCond->getOperand(0) != Var)
-      std::swap(Opnd0, Opnd1);
-
-    ICmpInst *NewPreCond =
-      cast<ICmpInst>(Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
-    PreCondBr->setCondition(NewPreCond);
-
-    RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
-  }
-
-  // Step 3: Note that the population count is exactly the trip count of the
-  // loop in question, which enble us to to convert the loop from noncountable
-  // loop into a countable one. The benefit is twofold:
-  //
-  //  - If the loop only counts population, the entire loop become dead after
-  //    the transformation. It is lots easier to prove a countable loop dead
-  //    than to prove a noncountable one. (In some C dialects, a infite loop
-  //    isn't dead even if it computes nothing useful. In general, DCE needs
-  //    to prove a noncountable loop finite before safely delete it.)
-  //
-  //  - If the loop also performs something else, it remains alive.
-  //    Since it is transformed to countable form, it can be aggressively
-  //    optimized by some optimizations which are in general not applicable
-  //    to a noncountable loop.
-  //
-  // After this step, this loop (conceptually) would look like following:
-  //   newcnt = __builtin_ctpop(x);
-  //   t = newcnt;
-  //   if (x)
-  //     do { cnt++; x &= x-1; t--) } while (t > 0);
-  BasicBlock *Body = *(CurLoop->block_begin());
-  {
-    BranchInst *LbBr = LIRUtil::getBranch(Body);
-    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
-    Type *Ty = TripCnt->getType();
-
-    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", Body->begin());
-
-    Builder.SetInsertPoint(LbCond);
-    Value *Opnd1 = cast<Value>(TcPhi);
-    Value *Opnd2 = cast<Value>(ConstantInt::get(Ty, 1));
-    Instruction *TcDec =
-      cast<Instruction>(Builder.CreateSub(Opnd1, Opnd2, "tcdec", false, true));
-
-    TcPhi->addIncoming(TripCnt, PreHead);
-    TcPhi->addIncoming(TcDec, Body);
-
-    CmpInst::Predicate Pred = (LbBr->getSuccessor(0) == Body) ?
-      CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
-    LbCond->setPredicate(Pred);
-    LbCond->setOperand(0, TcDec);
-    LbCond->setOperand(1, cast<Value>(ConstantInt::get(Ty, 0)));
-  }
-
-  // Step 4: All the references to the original population counter outside
-  //  the loop are replaced with the NewCount -- the value returned from
-  //  __builtin_ctpop().
-  CntInst->replaceUsesOutsideBlock(NewCount, Body);
-
-  // step 5: Forget the "non-computable" trip-count SCEV associated with the
-  //   loop. The loop would otherwise not be deleted even if it becomes empty.
-  SE->forgetLoop(CurLoop);
-}
-
-CallInst *NclPopcountRecognize::createPopcntIntrinsic(IRBuilderTy &IRBuilder,
-                                                      Value *Val, DebugLoc DL) {
-  Value *Ops[] = { Val };
-  Type *Tys[] = { Val->getType() };
-
-  Module *M = (*(CurLoop->block_begin()))->getParent()->getParent();
-  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
-  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
-  CI->setDebugLoc(DL);
-
-  return CI;
-}
-
-/// recognize - detect population count idiom in a non-countable loop. If
-///   detected, transform the relevant code to popcount intrinsic function
-///   call, and return true; otherwise, return false.
-bool NclPopcountRecognize::recognize() {
-
-  if (!LIR.getTargetTransformInfo())
-    return false;
-
-  LIR.getScalarEvolution();
-
-  if (!preliminaryScreen())
-    return false;
-
-  Instruction *CntInst;
-  PHINode *CntPhi;
-  Value *Val;
-  if (!detectIdiom(CntInst, CntPhi, Val))
-    return false;
-
-  transform(CntInst, CntPhi, Val);
-  return true;
-}
-
 //===----------------------------------------------------------------------===//
 //
 //          Implementation of LoopIdiomRecognize
 //
 //===----------------------------------------------------------------------===//
 
-bool LoopIdiomRecognize::runOnCountableLoop() {
-  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
-  assert(!isa<SCEVCouldNotCompute>(BECount) &&
-    "runOnCountableLoop() called on a loop without a predictable"
-    "backedge-taken count");
-
-  // If this loop executes exactly one time, then it should be peeled, not
-  // optimized by this pass.
-  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
-    if (BECst->getValue()->getValue() == 0)
-      return false;
-
-  // set DT
-  (void)getDominatorTree();
-
-  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-  // set TLI
-  (void)getTargetLibraryInfo();
-
-  SmallVector<BasicBlock*, 8> ExitBlocks;
-  CurLoop->getUniqueExitBlocks(ExitBlocks);
-
-  DEBUG(dbgs() << "loop-idiom Scanning: F["
-               << CurLoop->getHeader()->getParent()->getName()
-               << "] Loop %" << CurLoop->getHeader()->getName() << "\n");
-
-  bool MadeChange = false;
-  // Scan all the blocks in the loop that are not in subloops.
-  for (auto *BB : CurLoop->getBlocks()) {
-    // Ignore blocks in subloops.
-    if (LI.getLoopFor(BB) != CurLoop)
-      continue;
-
-    MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
-  }
-  return MadeChange;
-}
-
-bool LoopIdiomRecognize::runOnNoncountableLoop() {
-  NclPopcountRecognize Popcount(*this);
-  if (Popcount.recognize())
-    return true;
-
-  return false;
-}
-
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (skipOptnoneFunction(L))
     return false;
 
   CurLoop = L;
-
   // If the loop could not be converted to canonical form, it must have an
   // indirectbr in it, just give up.
   if (!L->getLoopPreheader())
@@ -673,17 +198,155 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (Name == "memset" || Name == "memcpy")
     return false;
 
-  SE = &getAnalysis<ScalarEvolution>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+      *CurLoop->getHeader()->getParent());
+  DL = &CurLoop->getHeader()->getModule()->getDataLayout();
+
   if (SE->hasLoopInvariantBackedgeTakenCount(L))
     return runOnCountableLoop();
+
   return runOnNoncountableLoop();
 }
 
+bool LoopIdiomRecognize::runOnCountableLoop() {
+  const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
+  assert(!isa<SCEVCouldNotCompute>(BECount) &&
+         "runOnCountableLoop() called on a loop without a predictable"
+         "backedge-taken count");
+
+  // If this loop executes exactly one time, then it should be peeled, not
+  // optimized by this pass.
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    if (BECst->getAPInt() == 0)
+      return false;
+
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  DEBUG(dbgs() << "loop-idiom Scanning: F["
+               << CurLoop->getHeader()->getParent()->getName() << "] Loop %"
+               << CurLoop->getHeader()->getName() << "\n");
+
+  bool MadeChange = false;
+  // Scan all the blocks in the loop that are not in subloops.
+  for (auto *BB : CurLoop->getBlocks()) {
+    // Ignore blocks in subloops.
+    if (LI->getLoopFor(BB) != CurLoop)
+      continue;
+
+    MadeChange |= runOnLoopBlock(BB, BECount, ExitBlocks);
+  }
+  return MadeChange;
+}
+
+static unsigned getStoreSizeInBytes(StoreInst *SI, const DataLayout *DL) {
+  uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
+  assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
+         "Don't overflow unsigned.");
+  return (unsigned)SizeInBits >> 3;
+}
+
+static unsigned getStoreStride(const SCEVAddRecExpr *StoreEv) {
+  const SCEVConstant *ConstStride = cast<SCEVConstant>(StoreEv->getOperand(1));
+  return ConstStride->getAPInt().getZExtValue();
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in.  Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
+  // If the value isn't a constant, we can't promote it to being in a constant
+  // array.  We could theoretically do a store to an alloca or something, but
+  // that doesn't seem worthwhile.
+  Constant *C = dyn_cast<Constant>(V);
+  if (!C)
+    return nullptr;
+
+  // Only handle simple values that are a power of two bytes in size.
+  uint64_t Size = DL->getTypeSizeInBits(V->getType());
+  if (Size == 0 || (Size & 7) || (Size & (Size - 1)))
+    return nullptr;
+
+  // Don't care enough about darwin/ppc to implement this.
+  if (DL->isBigEndian())
+    return nullptr;
+
+  // Convert to size in bytes.
+  Size /= 8;
+
+  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+  // if the top and bottom are the same (e.g. for vectors and large integers).
+  if (Size > 16)
+    return nullptr;
+
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16)
+    return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16 / Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
+}
+
+bool LoopIdiomRecognize::isLegalStore(StoreInst *SI) {
+  // Don't touch volatile stores.
+  if (!SI->isSimple())
+    return false;
+
+  Value *StoredVal = SI->getValueOperand();
+  Value *StorePtr = SI->getPointerOperand();
+
+  // Reject stores that are so large that they overflow an unsigned.
+  uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
+  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+    return false;
+
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *StoreEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+    return false;
+
+  // Check to see if we have a constant stride.
+  if (!isa<SCEVConstant>(StoreEv->getOperand(1)))
+    return false;
+
+  return true;
+}
+
+void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
+  StoreRefs.clear();
+  for (Instruction &I : *BB) {
+    StoreInst *SI = dyn_cast<StoreInst>(&I);
+    if (!SI)
+      continue;
+
+    // Make sure this is a strided store with a constant stride.
+    if (!isLegalStore(SI))
+      continue;
+
+    // Save the store locations.
+    StoreRefs.push_back(SI);
+  }
+}
+
 /// runOnLoopBlock - Process the specified block, which lives in a counted loop
 /// with the specified backedge count.  This block is known to be in the current
 /// loop and not in any subloops.
-bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
-                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+bool LoopIdiomRecognize::runOnLoopBlock(
+    BasicBlock *BB, const SCEV *BECount,
+    SmallVectorImpl<BasicBlock *> &ExitBlocks) {
   // We can only promote stores in this block if they are unconditionally
   // executed in the loop.  For a block to be unconditionally executed, it has
   // to dominate all the exit blocks of the loop.  Verify this now.
@@ -692,25 +355,18 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
       return false;
 
   bool MadeChange = false;
-  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
-    Instruction *Inst = I++;
-    // Look for store instructions, which may be optimized to memset/memcpy.
-    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
-      WeakVH InstPtr(I);
-      if (!processLoopStore(SI, BECount)) continue;
-      MadeChange = true;
-
-      // If processing the store invalidated our iterator, start over from the
-      // top of the block.
-      if (!InstPtr)
-        I = BB->begin();
-      continue;
-    }
+  // Look for store instructions, which may be optimized to memset/memcpy.
+  collectStores(BB);
+  for (auto &SI : StoreRefs)
+    MadeChange |= processLoopStore(SI, BECount);
 
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+    Instruction *Inst = &*I++;
     // Look for memset instructions, which may be optimized to a larger memset.
-    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
-      WeakVH InstPtr(I);
-      if (!processLoopMemSet(MSI, BECount)) continue;
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
+      WeakVH InstPtr(&*I);
+      if (!processLoopMemSet(MSI, BECount))
+        continue;
       MadeChange = true;
 
       // If processing the memset invalidated our iterator, start over from the
@@ -724,71 +380,38 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
   return MadeChange;
 }
 
-
 /// processLoopStore - See if this store can be promoted to a memset or memcpy.
 bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
-  if (!SI->isSimple()) return false;
+  assert(SI->isSimple() && "Expected only non-volatile stores.");
 
   Value *StoredVal = SI->getValueOperand();
   Value *StorePtr = SI->getPointerOperand();
 
-  // Reject stores that are so large that they overflow an unsigned.
-  auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
-  uint64_t SizeInBits = DL.getTypeSizeInBits(StoredVal->getType());
-  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
-    return false;
-
-  // See if the pointer expression is an AddRec like {base,+,1} on the current
-  // loop, which indicates a strided store.  If we have something else, it's a
-  // random store we can't handle.
-  const SCEVAddRecExpr *StoreEv =
-    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
-  if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
-    return false;
-
   // Check to see if the stride matches the size of the store.  If so, then we
   // know that every byte is touched in the loop.
-  unsigned StoreSize = (unsigned)SizeInBits >> 3;
-  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
-
-  if (!Stride || StoreSize != Stride->getValue()->getValue()) {
-    // TODO: Could also handle negative stride here someday, that will require
-    // the validity check in mayLoopAccessLocation to be updated though.
-    // Enable this to print exact negative strides.
-    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
-      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
-      dbgs() << "BB: " << *SI->getParent();
-    }
-
+  const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  unsigned Stride = getStoreStride(StoreEv);
+  unsigned StoreSize = getStoreSizeInBytes(SI, DL);
+  if (StoreSize != Stride && StoreSize != -Stride)
     return false;
-  }
+
+  bool NegStride = StoreSize == -Stride;
 
   // See if we can optimize just this store in isolation.
   if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
-                              StoredVal, SI, StoreEv, BECount))
+                              StoredVal, SI, StoreEv, BECount, NegStride))
     return true;
 
-  // If the stored value is a strided load in the same loop with the same stride
-  // this this may be transformable into a memcpy.  This kicks in for stuff like
-  //   for (i) A[i] = B[i];
-  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
-    const SCEVAddRecExpr *LoadEv =
-      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
-    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
-        StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple())
-      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
-        return true;
-  }
-  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
-
-  return false;
+  // Optimize the store into a memcpy, if it feeds an similarly strided load.
+  return processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, BECount, NegStride);
 }
 
 /// processLoopMemSet - See if this memset can be promoted to a large memset.
-bool LoopIdiomRecognize::
-processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
+                                           const SCEV *BECount) {
   // We can only handle non-volatile memsets with a constant size.
-  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
+    return false;
 
   // If we're not allowed to hack on memset, we fail.
   if (!TLI->has(LibFunc::memset))
@@ -818,17 +441,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
     return false;
 
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
-                                 MSI->getAlignment(), MSI->getValue(),
-                                 MSI, Ev, BECount);
+                                 MSI->getAlignment(), MSI->getValue(), MSI, Ev,
+                                 BECount, /*NegStride=*/false);
 }
 
-
 /// mayLoopAccessLocation - Return true if the specified loop might access the
 /// specified pointer location, which is a loop-strided access.  The 'Access'
 /// argument specifies what the verboten forms of access are (read or write).
-static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
-                                  Loop *L, const SCEV *BECount,
-                                  unsigned StoreSize, AliasAnalysis &AA,
+static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
+                                  const SCEV *BECount, unsigned StoreSize,
+                                  AliasAnalysis &AA,
                                   Instruction *IgnoredStore) {
   // Get the location that may be stored across the loop.  Since the access is
   // strided positively through memory, we say that the modified location starts
@@ -838,7 +460,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   // If the loop iterates a fixed number of times, we can refine the access size
   // to be exactly the size of the memset, which is (BECount+1)*StoreSize
   if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
-    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+    AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
 
   // TODO: For this to be really effective, we have to dive into the pointer
   // operand in the store.  Store to &A[i] of 100 will always return may alias
@@ -849,59 +471,31 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
   for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
        ++BI)
     for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
-      if (&*I != IgnoredStore &&
-          (AA.getModRefInfo(I, StoreLoc) & Access))
+      if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
         return true;
 
   return false;
 }
 
-/// getMemSetPatternValue - If a strided store of the specified value is safe to
-/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
-/// be passed in.  Otherwise, return null.
-///
-/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
-/// just replicate their input array and then pass on to memset_pattern16.
-static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
-  // If the value isn't a constant, we can't promote it to being in a constant
-  // array.  We could theoretically do a store to an alloca or something, but
-  // that doesn't seem worthwhile.
-  Constant *C = dyn_cast<Constant>(V);
-  if (!C) return nullptr;
-
-  // Only handle simple values that are a power of two bytes in size.
-  uint64_t Size = DL.getTypeSizeInBits(V->getType());
-  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
-    return nullptr;
-
-  // Don't care enough about darwin/ppc to implement this.
-  if (DL.isBigEndian())
-    return nullptr;
-
-  // Convert to size in bytes.
-  Size /= 8;
-
-  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
-  // if the top and bottom are the same (e.g. for vectors and large integers).
-  if (Size > 16) return nullptr;
-
-  // If the constant is exactly 16 bytes, just use it.
-  if (Size == 16) return C;
-
-  // Otherwise, we'll use an array of the constants.
-  unsigned ArraySize = 16/Size;
-  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
-  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+// If we have a negative stride, Start refers to the end of the memory location
+// we're trying to memset.  Therefore, we need to recompute the base pointer,
+// which is just Start - BECount*Size.
+static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
+                                        Type *IntPtr, unsigned StoreSize,
+                                        ScalarEvolution *SE) {
+  const SCEV *Index = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  if (StoreSize != 1)
+    Index = SE->getMulExpr(Index, SE->getConstant(IntPtr, StoreSize),
+                           SCEV::FlagNUW);
+  return SE->getMinusSCEV(Start, Index);
 }
 
-
 /// processLoopStridedStore - We see a strided store of some value.  If we can
 /// transform this into a memset or memset_pattern in the loop preheader, do so.
-bool LoopIdiomRecognize::
-processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
-                        unsigned StoreAlignment, Value *StoredVal,
-                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
-                        const SCEV *BECount) {
+bool LoopIdiomRecognize::processLoopStridedStore(
+    Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
+    Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
+    const SCEV *BECount, bool NegStride) {
 
   // If the stored value is a byte-wise value (like i32 -1), then it may be
   // turned into a memset of i8 -1, assuming that all the consecutive bytes
@@ -909,7 +503,6 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // but it can be turned into memset_pattern if the target supports it.
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = nullptr;
-  auto &DL = CurLoop->getHeader()->getModule()->getDataLayout();
   unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
 
   // If we're allowed to form a memset, and the stored value would be acceptable
@@ -936,9 +529,15 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE, DL, "loop-idiom");
+  SCEVExpander Expander(*SE, *DL, "loop-idiom");
 
   Type *DestInt8PtrTy = Builder.getInt8PtrTy(DestAS);
+  Type *IntPtr = Builder.getIntPtrTy(*DL, DestAS);
+
+  const SCEV *Start = Ev->getStart();
+  // Handle negative strided loops.
+  if (NegStride)
+    Start = getStartForNegStride(Start, BECount, IntPtr, StoreSize, SE);
 
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
@@ -946,12 +545,9 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // or write to the aliased location.  Check for any overlap by generating the
   // base pointer and checking the region.
   Value *BasePtr =
-    Expander.expandCodeFor(Ev->getStart(), DestInt8PtrTy,
-                           Preheader->getTerminator());
-
-  if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef,
-                            CurLoop, BECount,
-                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore)) {
+      Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
+  if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
+                            *AA, TheStore)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
     RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
@@ -962,36 +558,30 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  Type *IntPtr = Builder.getIntPtrTy(DL, DestAS);
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
 
-  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
-                                         SCEV::FlagNUW);
+  const SCEV *NumBytesS =
+      SE->getAddExpr(BECount, SE->getOne(IntPtr), SCEV::FlagNUW);
   if (StoreSize != 1) {
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
                                SCEV::FlagNUW);
   }
 
   Value *NumBytes =
-    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+      Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
 
   CallInst *NewCall;
   if (SplatValue) {
-    NewCall = Builder.CreateMemSet(BasePtr,
-                                   SplatValue,
-                                   NumBytes,
-                                   StoreAlignment);
+    NewCall =
+        Builder.CreateMemSet(BasePtr, SplatValue, NumBytes, StoreAlignment);
   } else {
     // Everything is emitted in default address space
     Type *Int8PtrTy = DestInt8PtrTy;
 
-    Module *M = TheStore->getParent()->getParent()->getParent();
-    Value *MSP = M->getOrInsertFunction("memset_pattern16",
-                                        Builder.getVoidTy(),
-                                        Int8PtrTy,
-                                        Int8PtrTy,
-                                        IntPtr,
-                                        (void*)nullptr);
+    Module *M = TheStore->getModule();
+    Value *MSP =
+        M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(),
+                               Int8PtrTy, Int8PtrTy, IntPtr, (void *)nullptr);
 
     // Otherwise we should form a memset_pattern16.  PatternValue is known to be
     // an constant array of 16-bytes.  Plop the value into a mergable global.
@@ -1015,26 +605,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   return true;
 }
 
-/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
-/// same-strided load.
-bool LoopIdiomRecognize::
-processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
-                           const SCEVAddRecExpr *StoreEv,
-                           const SCEVAddRecExpr *LoadEv,
-                           const SCEV *BECount) {
+/// If the stored value is a strided load in the same loop with the same stride
+/// this may be transformable into a memcpy.  This kicks in for stuff like
+///   for (i) A[i] = B[i];
+bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(
+    StoreInst *SI, unsigned StoreSize, const SCEVAddRecExpr *StoreEv,
+    const SCEV *BECount, bool NegStride) {
   // If we're not allowed to form memcpy, we fail.
   if (!TLI->has(LibFunc::memcpy))
     return false;
 
-  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+  // The store must be feeding a non-volatile load.
+  LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
+  if (!LI || !LI->isSimple())
+    return false;
+
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided load.  If we have something else, it's a
+  // random load we can't handle.
+  const SCEVAddRecExpr *LoadEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
+  if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
+    return false;
+
+  // The store and load must share the same stride.
+  if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
+    return false;
 
   // The trip count of the loop and the base pointer of the addrec SCEV is
   // guaranteed to be loop invariant, which means that it should dominate the
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  const DataLayout &DL = Preheader->getModule()->getDataLayout();
-  SCEVExpander Expander(*SE, DL, "loop-idiom");
+  SCEVExpander Expander(*SE, *DL, "loop-idiom");
+
+  const SCEV *StrStart = StoreEv->getStart();
+  unsigned StrAS = SI->getPointerAddressSpace();
+  Type *IntPtrTy = Builder.getIntPtrTy(*DL, StrAS);
+
+  // Handle negative strided loops.
+  if (NegStride)
+    StrStart = getStartForNegStride(StrStart, BECount, IntPtrTy, StoreSize, SE);
 
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
@@ -1042,29 +653,31 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // or write the memory region we're storing to.  This includes the load that
   // feeds the stores.  Check for an alias by generating the base address and
   // checking everything.
-  Value *StoreBasePtr =
-    Expander.expandCodeFor(StoreEv->getStart(),
-                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
-                           Preheader->getTerminator());
+  Value *StoreBasePtr = Expander.expandCodeFor(
+      StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
 
-  if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
-                            CurLoop, BECount, StoreSize,
-                            getAnalysis<AliasAnalysis>(), SI)) {
+  if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
+                            StoreSize, *AA, SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
     RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
     return false;
   }
 
+  const SCEV *LdStart = LoadEv->getStart();
+  unsigned LdAS = LI->getPointerAddressSpace();
+
+  // Handle negative strided loops.
+  if (NegStride)
+    LdStart = getStartForNegStride(LdStart, BECount, IntPtrTy, StoreSize, SE);
+
   // For a memcpy, we have to make sure that the input array is not being
   // mutated by the loop.
-  Value *LoadBasePtr =
-    Expander.expandCodeFor(LoadEv->getStart(),
-                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
-                           Preheader->getTerminator());
+  Value *LoadBasePtr = Expander.expandCodeFor(
+      LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
 
-  if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount,
-                            StoreSize, getAnalysis<AliasAnalysis>(), SI)) {
+  if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
+                            *AA, SI)) {
     Expander.clear();
     // If we generated new code for the base pointer, clean up.
     RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
@@ -1074,34 +687,368 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
 
   // Okay, everything is safe, we can transform this!
 
-
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
-  Type *IntPtrTy = Builder.getIntPtrTy(DL, SI->getPointerAddressSpace());
   BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
 
-  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtrTy, 1),
-                                         SCEV::FlagNUW);
+  const SCEV *NumBytesS =
+      SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
   if (StoreSize != 1)
     NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
                                SCEV::FlagNUW);
 
   Value *NumBytes =
-    Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
+      Expander.expandCodeFor(NumBytesS, IntPtrTy, Preheader->getTerminator());
 
   CallInst *NewCall =
-    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
-                         std::min(SI->getAlignment(), LI->getAlignment()));
+      Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+                           std::min(SI->getAlignment(), LI->getAlignment()));
   NewCall->setDebugLoc(SI->getDebugLoc());
 
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
                << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
                << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
 
-
-  // Okay, the memset has been formed.  Zap the original store and anything that
+  // Okay, the memcpy has been formed.  Zap the original store and anything that
   // feeds into it.
   deleteDeadInstruction(SI, TLI);
   ++NumMemCpy;
   return true;
 }
+
+bool LoopIdiomRecognize::runOnNoncountableLoop() {
+  return recognizePopcount();
+}
+
+/// Check if the given conditional branch is based on the comparison between
+/// a variable and zero, and if the variable is non-zero, the control yields to
+/// the loop entry. If the branch matches the behavior, the variable involved
+/// in the comparion is returned. This function will be called to see if the
+/// precondition and postcondition of the loop are in desirable form.
+static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) {
+  if (!BI || !BI->isConditional())
+    return nullptr;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return nullptr;
+
+  ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
+  if (!CmpZero || !CmpZero->isZero())
+    return nullptr;
+
+  ICmpInst::Predicate Pred = Cond->getPredicate();
+  if ((Pred == ICmpInst::ICMP_NE && BI->getSuccessor(0) == LoopEntry) ||
+      (Pred == ICmpInst::ICMP_EQ && BI->getSuccessor(1) == LoopEntry))
+    return Cond->getOperand(0);
+
+  return nullptr;
+}
+
+/// Return true iff the idiom is detected in the loop.
+///
+/// Additionally:
+/// 1) \p CntInst is set to the instruction counting the population bit.
+/// 2) \p CntPhi is set to the corresponding phi node.
+/// 3) \p Var is set to the value whose population bits are being counted.
+///
+/// The core idiom we are trying to detect is:
+/// \code
+///    if (x0 != 0)
+///      goto loop-exit // the precondition of the loop
+///    cnt0 = init-val;
+///    do {
+///       x1 = phi (x0, x2);
+///       cnt1 = phi(cnt0, cnt2);
+///
+///       cnt2 = cnt1 + 1;
+///        ...
+///       x2 = x1 & (x1 - 1);
+///        ...
+///    } while(x != 0);
+///
+/// loop-exit:
+/// \endcode
+static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB,
+                                Instruction *&CntInst, PHINode *&CntPhi,
+                                Value *&Var) {
+  // step 1: Check to see if the look-back branch match this pattern:
+  //    "if (a!=0) goto loop-entry".
+  BasicBlock *LoopEntry;
+  Instruction *DefX2, *CountInst;
+  Value *VarX1, *VarX0;
+  PHINode *PhiX, *CountPhi;
+
+  DefX2 = CountInst = nullptr;
+  VarX1 = VarX0 = nullptr;
+  PhiX = CountPhi = nullptr;
+  LoopEntry = *(CurLoop->block_begin());
+
+  // step 1: Check if the loop-back branch is in desirable form.
+  {
+    if (Value *T = matchCondition(
+            dyn_cast<BranchInst>(LoopEntry->getTerminator()), LoopEntry))
+      DefX2 = dyn_cast<Instruction>(T);
+    else
+      return false;
+  }
+
+  // step 2: detect instructions corresponding to "x2 = x1 & (x1 - 1)"
+  {
+    if (!DefX2 || DefX2->getOpcode() != Instruction::And)
+      return false;
+
+    BinaryOperator *SubOneOp;
+
+    if ((SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(0))))
+      VarX1 = DefX2->getOperand(1);
+    else {
+      VarX1 = DefX2->getOperand(0);
+      SubOneOp = dyn_cast<BinaryOperator>(DefX2->getOperand(1));
+    }
+    if (!SubOneOp)
+      return false;
+
+    Instruction *SubInst = cast<Instruction>(SubOneOp);
+    ConstantInt *Dec = dyn_cast<ConstantInt>(SubInst->getOperand(1));
+    if (!Dec ||
+        !((SubInst->getOpcode() == Instruction::Sub && Dec->isOne()) ||
+          (SubInst->getOpcode() == Instruction::Add &&
+           Dec->isAllOnesValue()))) {
+      return false;
+    }
+  }
+
+  // step 3: Check the recurrence of variable X
+  {
+    PhiX = dyn_cast<PHINode>(VarX1);
+    if (!PhiX ||
+        (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) {
+      return false;
+    }
+  }
+
+  // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
+  {
+    CountInst = nullptr;
+    for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI()->getIterator(),
+                              IterE = LoopEntry->end();
+         Iter != IterE; Iter++) {
+      Instruction *Inst = &*Iter;
+      if (Inst->getOpcode() != Instruction::Add)
+        continue;
+
+      ConstantInt *Inc = dyn_cast<ConstantInt>(Inst->getOperand(1));
+      if (!Inc || !Inc->isOne())
+        continue;
+
+      PHINode *Phi = dyn_cast<PHINode>(Inst->getOperand(0));
+      if (!Phi || Phi->getParent() != LoopEntry)
+        continue;
+
+      // Check if the result of the instruction is live of the loop.
+      bool LiveOutLoop = false;
+      for (User *U : Inst->users()) {
+        if ((cast<Instruction>(U))->getParent() != LoopEntry) {
+          LiveOutLoop = true;
+          break;
+        }
+      }
+
+      if (LiveOutLoop) {
+        CountInst = Inst;
+        CountPhi = Phi;
+        break;
+      }
+    }
+
+    if (!CountInst)
+      return false;
+  }
+
+  // step 5: check if the precondition is in this form:
+  //   "if (x != 0) goto loop-head ; else goto somewhere-we-don't-care;"
+  {
+    auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+    Value *T = matchCondition(PreCondBr, CurLoop->getLoopPreheader());
+    if (T != PhiX->getOperand(0) && T != PhiX->getOperand(1))
+      return false;
+
+    CntInst = CountInst;
+    CntPhi = CountPhi;
+    Var = T;
+  }
+
+  return true;
+}
+
+/// Recognizes a population count idiom in a non-countable loop.
+///
+/// If detected, transforms the relevant code to issue the popcount intrinsic
+/// function call, and returns true; otherwise, returns false.
+bool LoopIdiomRecognize::recognizePopcount() {
+  if (TTI->getPopcntSupport(32) != TargetTransformInfo::PSK_FastHardware)
+    return false;
+
+  // Counting population are usually conducted by few arithmetic instructions.
+  // Such instructions can be easily "absorbed" by vacant slots in a
+  // non-compact loop. Therefore, recognizing popcount idiom only makes sense
+  // in a compact loop.
+
+  // Give up if the loop has multiple blocks or multiple backedges.
+  if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 1)
+    return false;
+
+  BasicBlock *LoopBody = *(CurLoop->block_begin());
+  if (LoopBody->size() >= 20) {
+    // The loop is too big, bail out.
+    return false;
+  }
+
+  // It should have a preheader containing nothing but an unconditional branch.
+  BasicBlock *PH = CurLoop->getLoopPreheader();
+  if (!PH)
+    return false;
+  if (&PH->front() != PH->getTerminator())
+    return false;
+  auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
+  if (!EntryBI || EntryBI->isConditional())
+    return false;
+
+  // It should have a precondition block where the generated popcount instrinsic
+  // function can be inserted.
+  auto *PreCondBB = PH->getSinglePredecessor();
+  if (!PreCondBB)
+    return false;
+  auto *PreCondBI = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+  if (!PreCondBI || PreCondBI->isUnconditional())
+    return false;
+
+  Instruction *CntInst;
+  PHINode *CntPhi;
+  Value *Val;
+  if (!detectPopcountIdiom(CurLoop, PreCondBB, CntInst, CntPhi, Val))
+    return false;
+
+  transformLoopToPopcount(PreCondBB, CntInst, CntPhi, Val);
+  return true;
+}
+
+static CallInst *createPopcntIntrinsic(IRBuilder<> &IRBuilder, Value *Val,
+                                       DebugLoc DL) {
+  Value *Ops[] = {Val};
+  Type *Tys[] = {Val->getType()};
+
+  Module *M = IRBuilder.GetInsertBlock()->getParent()->getParent();
+  Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, Tys);
+  CallInst *CI = IRBuilder.CreateCall(Func, Ops);
+  CI->setDebugLoc(DL);
+
+  return CI;
+}
+
+void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
+                                                 Instruction *CntInst,
+                                                 PHINode *CntPhi, Value *Var) {
+  BasicBlock *PreHead = CurLoop->getLoopPreheader();
+  auto *PreCondBr = dyn_cast<BranchInst>(PreCondBB->getTerminator());
+  const DebugLoc DL = CntInst->getDebugLoc();
+
+  // Assuming before transformation, the loop is following:
+  //  if (x) // the precondition
+  //     do { cnt++; x &= x - 1; } while(x);
+
+  // Step 1: Insert the ctpop instruction at the end of the precondition block
+  IRBuilder<> Builder(PreCondBr);
+  Value *PopCnt, *PopCntZext, *NewCount, *TripCnt;
+  {
+    PopCnt = createPopcntIntrinsic(Builder, Var, DL);
+    NewCount = PopCntZext =
+        Builder.CreateZExtOrTrunc(PopCnt, cast<IntegerType>(CntPhi->getType()));
+
+    if (NewCount != PopCnt)
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+
+    // TripCnt is exactly the number of iterations the loop has
+    TripCnt = NewCount;
+
+    // If the population counter's initial value is not zero, insert Add Inst.
+    Value *CntInitVal = CntPhi->getIncomingValueForBlock(PreHead);
+    ConstantInt *InitConst = dyn_cast<ConstantInt>(CntInitVal);
+    if (!InitConst || !InitConst->isZero()) {
+      NewCount = Builder.CreateAdd(NewCount, CntInitVal);
+      (cast<Instruction>(NewCount))->setDebugLoc(DL);
+    }
+  }
+
+  // Step 2: Replace the precondition from "if (x == 0) goto loop-exit" to
+  //   "if (NewCount == 0) loop-exit". Without this change, the intrinsic
+  //   function would be partial dead code, and downstream passes will drag
+  //   it back from the precondition block to the preheader.
+  {
+    ICmpInst *PreCond = cast<ICmpInst>(PreCondBr->getCondition());
+
+    Value *Opnd0 = PopCntZext;
+    Value *Opnd1 = ConstantInt::get(PopCntZext->getType(), 0);
+    if (PreCond->getOperand(0) != Var)
+      std::swap(Opnd0, Opnd1);
+
+    ICmpInst *NewPreCond = cast<ICmpInst>(
+        Builder.CreateICmp(PreCond->getPredicate(), Opnd0, Opnd1));
+    PreCondBr->setCondition(NewPreCond);
+
+    RecursivelyDeleteTriviallyDeadInstructions(PreCond, TLI);
+  }
+
+  // Step 3: Note that the population count is exactly the trip count of the
+  // loop in question, which enable us to to convert the loop from noncountable
+  // loop into a countable one. The benefit is twofold:
+  //
+  //  - If the loop only counts population, the entire loop becomes dead after
+  //    the transformation. It is a lot easier to prove a countable loop dead
+  //    than to prove a noncountable one. (In some C dialects, an infinite loop
+  //    isn't dead even if it computes nothing useful. In general, DCE needs
+  //    to prove a noncountable loop finite before safely delete it.)
+  //
+  //  - If the loop also performs something else, it remains alive.
+  //    Since it is transformed to countable form, it can be aggressively
+  //    optimized by some optimizations which are in general not applicable
+  //    to a noncountable loop.
+  //
+  // After this step, this loop (conceptually) would look like following:
+  //   newcnt = __builtin_ctpop(x);
+  //   t = newcnt;
+  //   if (x)
+  //     do { cnt++; x &= x-1; t--) } while (t > 0);
+  BasicBlock *Body = *(CurLoop->block_begin());
+  {
+    auto *LbBr = dyn_cast<BranchInst>(Body->getTerminator());
+    ICmpInst *LbCond = cast<ICmpInst>(LbBr->getCondition());
+    Type *Ty = TripCnt->getType();
+
+    PHINode *TcPhi = PHINode::Create(Ty, 2, "tcphi", &Body->front());
+
+    Builder.SetInsertPoint(LbCond);
+    Instruction *TcDec = cast<Instruction>(
+        Builder.CreateSub(TcPhi, ConstantInt::get(Ty, 1),
+                          "tcdec", false, true));
+
+    TcPhi->addIncoming(TripCnt, PreHead);
+    TcPhi->addIncoming(TcDec, Body);
+
+    CmpInst::Predicate Pred =
+        (LbBr->getSuccessor(0) == Body) ? CmpInst::ICMP_UGT : CmpInst::ICMP_SLE;
+    LbCond->setPredicate(Pred);
+    LbCond->setOperand(0, TcDec);
+    LbCond->setOperand(1, ConstantInt::get(Ty, 0));
+  }
+
+  // Step 4: All the references to the original population counter outside
+  //  the loop are replaced with the NewCount -- the value returned from
+  //  __builtin_ctpop().
+  CntInst->replaceUsesOutsideBlock(NewCount, Body);
+
+  // step 5: Forget the "non-computable" trip-count SCEV associated with the
+  //   loop. The loop would otherwise not be deleted even if it becomes empty.
+  SE->forgetLoop(CurLoop);
+}
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index e12502654751..b4102fe9ba34 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -48,7 +48,7 @@ namespace {
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
   };
@@ -112,7 +112,7 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
       // Simplify instructions in the current basic block.
       for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
-        Instruction *I = BI++;
+        Instruction *I = &*BI++;
 
         // The first time through the loop ToSimplify is empty and we try to
         // simplify all instructions. On later iterations ToSimplify is not
diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp
index 9d7e57ffebac..4295235a3f36 100644
--- a/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -99,7 +99,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
         return false;
       if (St && !St->isSimple())
         return false;
-      MemInstr.push_back(I);
+      MemInstr.push_back(&*I);
     }
   }
 
@@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
     }
   }
 
-  // We don't have a DepMatrix to check legality return false
+  // We don't have a DepMatrix to check legality return false.
   if (DepMatrix.size() == 0)
     return false;
   return true;
@@ -331,9 +331,9 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
 class LoopInterchangeLegality {
 public:
   LoopInterchangeLegality(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
-                          LoopInterchange *Pass)
-      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), CurrentPass(Pass),
-        InnerLoopHasReduction(false) {}
+                          LoopInfo *LI, DominatorTree *DT, bool PreserveLCSSA)
+      : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
+        PreserveLCSSA(PreserveLCSSA), InnerLoopHasReduction(false) {}
 
   /// Check if the loops can be interchanged.
   bool canInterchangeLoops(unsigned InnerLoopId, unsigned OuterLoopId,
@@ -357,9 +357,10 @@ private:
   Loop *OuterLoop;
   Loop *InnerLoop;
 
-  /// Scev analysis.
   ScalarEvolution *SE;
-  LoopInterchange *CurrentPass;
+  LoopInfo *LI;
+  DominatorTree *DT;
+  bool PreserveLCSSA;
 
   bool InnerLoopHasReduction;
 };
@@ -371,7 +372,7 @@ public:
   LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {}
 
-  /// Check if the loop interchange is profitable
+  /// Check if the loop interchange is profitable.
   bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId,
                     CharMatrix &DepMatrix);
 
@@ -385,12 +386,12 @@ private:
   ScalarEvolution *SE;
 };
 
-/// LoopInterchangeTransform interchanges the loop
+/// LoopInterchangeTransform interchanges the loop.
 class LoopInterchangeTransform {
 public:
   LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE,
                            LoopInfo *LI, DominatorTree *DT,
-                           LoopInterchange *Pass, BasicBlock *LoopNestExit,
+                           BasicBlock *LoopNestExit,
                            bool InnerLoopContainsReductions)
       : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT),
         LoopExit(LoopNestExit),
@@ -424,21 +425,22 @@ private:
   bool InnerLoopHasReduction;
 };
 
-// Main LoopInterchange Pass
+// Main LoopInterchange Pass.
 struct LoopInterchange : public FunctionPass {
   static char ID;
   ScalarEvolution *SE;
   LoopInfo *LI;
   DependenceAnalysis *DA;
   DominatorTree *DT;
+  bool PreserveLCSSA;
   LoopInterchange()
       : FunctionPass(ID), SE(nullptr), LI(nullptr), DA(nullptr), DT(nullptr) {
     initializeLoopInterchangePass(*PassRegistry::getPassRegistry());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<ScalarEvolution>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DependenceAnalysis>();
@@ -447,11 +449,13 @@ struct LoopInterchange : public FunctionPass {
   }
 
   bool runOnFunction(Function &F) override {
-    SE = &getAnalysis<ScalarEvolution>();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DA = &getAnalysis<DependenceAnalysis>();
     auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
     DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
     // Build up a worklist of loop pairs to analyze.
     SmallVector<LoopVector, 8> Worklist;
 
@@ -489,7 +493,7 @@ struct LoopInterchange : public FunctionPass {
 
   unsigned selectLoopForInterchange(LoopVector LoopList) {
     // TODO: Add a better heuristic to select the loop to be interchanged based
-    // on the dependece matrix. Currently we select the innermost loop.
+    // on the dependence matrix. Currently we select the innermost loop.
     return LoopList.size() - 1;
   }
 
@@ -544,7 +548,7 @@ struct LoopInterchange : public FunctionPass {
     }
 
     unsigned SelecLoopId = selectLoopForInterchange(LoopList);
-    // Move the selected loop outwards to the best posible position.
+    // Move the selected loop outwards to the best possible position.
     for (unsigned i = SelecLoopId; i > 0; i--) {
       bool Interchanged =
           processLoop(LoopList, i, i - 1, LoopNestExit, DependencyMatrix);
@@ -574,7 +578,8 @@ struct LoopInterchange : public FunctionPass {
     Loop *InnerLoop = LoopList[InnerLoopId];
     Loop *OuterLoop = LoopList[OuterLoopId];
 
-    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, this);
+    LoopInterchangeLegality LIL(OuterLoop, InnerLoop, SE, LI, DT,
+                                PreserveLCSSA);
     if (!LIL.canInterchangeLoops(InnerLoopId, OuterLoopId, DependencyMatrix)) {
       DEBUG(dbgs() << "Not interchanging Loops. Cannot prove legality\n");
       return false;
@@ -586,7 +591,7 @@ struct LoopInterchange : public FunctionPass {
       return false;
     }
 
-    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, this,
+    LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT,
                                  LoopNestExit, LIL.hasInnerLoopReduction());
     LIT.transform();
     DEBUG(dbgs() << "Loops interchanged\n");
@@ -655,7 +660,7 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
 
   DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
   // We do not have any basic block in between now make sure the outer header
-  // and outer loop latch doesnt contain any unsafe instructions.
+  // and outer loop latch doesn't contain any unsafe instructions.
   if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
       containsUnsafeInstructionsInLatch(OuterLoopLatch))
     return false;
@@ -698,9 +703,9 @@ bool LoopInterchangeLegality::findInductionAndReductions(
     return false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     RecurrenceDescriptor RD;
+    InductionDescriptor ID;
     PHINode *PHI = cast<PHINode>(I);
-    ConstantInt *StepValue = nullptr;
-    if (isInductionPHI(PHI, SE, StepValue))
+    if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
       Inductions.push_back(PHI);
     else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
       Reductions.push_back(PHI);
@@ -836,7 +841,7 @@ bool LoopInterchangeLegality::currentLimitations() {
     else
       FoundInduction = true;
   }
-  // The loop latch ended and we didnt find the induction variable return as
+  // The loop latch ended and we didn't find the induction variable return as
   // current limitation.
   if (!FoundInduction)
     return true;
@@ -867,12 +872,14 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
   if (!OuterLoopPreHeader || OuterLoopPreHeader == OuterLoop->getHeader() ||
       isa<PHINode>(OuterLoopPreHeader->begin()) ||
       !OuterLoopPreHeader->getUniquePredecessor()) {
-    OuterLoopPreHeader = InsertPreheaderForLoop(OuterLoop, CurrentPass);
+    OuterLoopPreHeader =
+        InsertPreheaderForLoop(OuterLoop, DT, LI, PreserveLCSSA);
   }
 
   if (!InnerLoopPreHeader || InnerLoopPreHeader == InnerLoop->getHeader() ||
       InnerLoopPreHeader == OuterLoop->getHeader()) {
-    InnerLoopPreHeader = InsertPreheaderForLoop(InnerLoop, CurrentPass);
+    InnerLoopPreHeader =
+        InsertPreheaderForLoop(InnerLoop, DT, LI, PreserveLCSSA);
   }
 
   // TODO: The loops could not be interchanged due to current limitations in the
@@ -966,7 +973,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
                                                 unsigned OuterLoopId,
                                                 CharMatrix &DepMatrix) {
 
-  // TODO: Add Better Profitibility checks.
+  // TODO: Add better profitability checks.
   // e.g
   // 1) Construct dependency matrix and move the one with no loop carried dep
   //    inside to enable vectorization.
@@ -980,7 +987,7 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
   if (Cost < 0)
     return true;
 
-  // It is not profitable as per current cache profitibility model. But check if
+  // It is not profitable as per current cache profitability model. But check if
   // we can move this loop outside to improve parallelism.
   bool ImprovesPar =
       isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
@@ -996,7 +1003,7 @@ void LoopInterchangeTransform::removeChildLoop(Loop *OuterLoop,
       return;
     }
   }
-  assert(false && "Couldn't find loop");
+  llvm_unreachable("Couldn't find loop");
 }
 
 void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
@@ -1045,7 +1052,7 @@ bool LoopInterchangeTransform::transform() {
     splitInnerLoopLatch(InnerIndexVar);
     DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
 
-    // Splits the inner loops phi nodes out into a seperate basic block.
+    // Splits the inner loops phi nodes out into a separate basic block.
     splitInnerLoopHeader();
     DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
   }
@@ -1113,8 +1120,8 @@ static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) {
   auto &ToList = InsertBefore->getParent()->getInstList();
   auto &FromList = FromBB->getInstList();
 
-  ToList.splice(InsertBefore, FromList, FromList.begin(),
-                FromBB->getTerminator());
+  ToList.splice(InsertBefore->getIterator(), FromList, FromList.begin(),
+                FromBB->getTerminator()->getIterator());
 }
 
 void LoopInterchangeTransform::adjustOuterLoopPreheader() {
@@ -1181,8 +1188,8 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
 
   if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI)
     return false;
-  BasicBlock *InnerLoopHeaderSucessor = InnerLoopHeader->getUniqueSuccessor();
-  if (!InnerLoopHeaderSucessor)
+  BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor();
+  if (!InnerLoopHeaderSuccessor)
     return false;
 
   // Adjust Loop Preheader and headers
@@ -1198,11 +1205,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
     if (OuterLoopHeaderBI->getSuccessor(i) == OuterLoopLatch)
       OuterLoopHeaderBI->setSuccessor(i, LoopExit);
     else if (OuterLoopHeaderBI->getSuccessor(i) == InnerLoopPreHeader)
-      OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSucessor);
+      OuterLoopHeaderBI->setSuccessor(i, InnerLoopHeaderSuccessor);
   }
 
   // Adjust reduction PHI's now that the incoming block has changed.
-  updateIncomingBlock(InnerLoopHeaderSucessor, InnerLoopHeader,
+  updateIncomingBlock(InnerLoopHeaderSuccessor, InnerLoopHeader,
                       OuterLoopHeader);
 
   BranchInst::Create(OuterLoopPreHeader, InnerLoopHeaderBI);
@@ -1286,10 +1293,10 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
 char LoopInterchange::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopInterchange, "loop-interchange",
                       "Interchanges loops for cache reuse", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp
new file mode 100644
index 000000000000..1064d088514d
--- /dev/null
+++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -0,0 +1,566 @@
+//===- LoopLoadElimination.cpp - Loop Load Elimination Pass ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implement a loop-aware load elimination pass.
+//
+// It uses LoopAccessAnalysis to identify loop-carried dependences with a
+// distance of one between stores and loads.  These form the candidates for the
+// transformation.  The source value of each store then propagated to the user
+// of the corresponding load.  This makes the load dead.
+//
+// The pass can also version the loop and add memchecks in order to prove that
+// may-aliasing stores can't change the value in memory before it's read by the
+// load.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include <forward_list>
+
+#define LLE_OPTION "loop-load-elim"
+#define DEBUG_TYPE LLE_OPTION
+
+using namespace llvm;
+
+static cl::opt<unsigned> CheckPerElim(
+    "runtime-check-per-loop-load-elim", cl::Hidden,
+    cl::desc("Max number of memchecks allowed per eliminated load on average"),
+    cl::init(1));
+
+static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
+    "loop-load-elimination-scev-check-threshold", cl::init(8), cl::Hidden,
+    cl::desc("The maximum number of SCEV checks allowed for Loop "
+             "Load Elimination"));
+
+
+STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
+
+namespace {
+
+/// \brief Represent a store-to-forwarding candidate.
+struct StoreToLoadForwardingCandidate {
+  LoadInst *Load;
+  StoreInst *Store;
+
+  StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
+      : Load(Load), Store(Store) {}
+
+  /// \brief Return true if the dependence from the store to the load has a
+  /// distance of one.  E.g. A[i+1] = A[i]
+  bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
+    Value *LoadPtr = Load->getPointerOperand();
+    Value *StorePtr = Store->getPointerOperand();
+    Type *LoadPtrType = LoadPtr->getType();
+    Type *LoadType = LoadPtrType->getPointerElementType();
+
+    assert(LoadPtrType->getPointerAddressSpace() ==
+               StorePtr->getType()->getPointerAddressSpace() &&
+           LoadType == StorePtr->getType()->getPointerElementType() &&
+           "Should be a known dependence");
+
+    auto &DL = Load->getParent()->getModule()->getDataLayout();
+    unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
+
+    auto *LoadPtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(LoadPtr));
+    auto *StorePtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(StorePtr));
+
+    // We don't need to check non-wrapping here because forward/backward
+    // dependence wouldn't be valid if these weren't monotonic accesses.
+    auto *Dist = cast<SCEVConstant>(
+        PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
+    const APInt &Val = Dist->getAPInt();
+    return Val.abs() == TypeByteSize;
+  }
+
+  Value *getLoadPtr() const { return Load->getPointerOperand(); }
+
+#ifndef NDEBUG
+  friend raw_ostream &operator<<(raw_ostream &OS,
+                                 const StoreToLoadForwardingCandidate &Cand) {
+    OS << *Cand.Store << " -->\n";
+    OS.indent(2) << *Cand.Load << "\n";
+    return OS;
+  }
+#endif
+};
+
+/// \brief Check if the store dominates all latches, so as long as there is no
+/// intervening store this value will be loaded in the next iteration.
+bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
+                                  DominatorTree *DT) {
+  SmallVector<BasicBlock *, 8> Latches;
+  L->getLoopLatches(Latches);
+  return std::all_of(Latches.begin(), Latches.end(),
+                     [&](const BasicBlock *Latch) {
+                       return DT->dominates(StoreBlock, Latch);
+                     });
+}
+
+/// \brief The per-loop class that does most of the work.
+class LoadEliminationForLoop {
+public:
+  LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI,
+                         DominatorTree *DT)
+      : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {}
+
+  /// \brief Look through the loop-carried and loop-independent dependences in
+  /// this loop and find store->load dependences.
+  ///
+  /// Note that no candidate is returned if LAA has failed to analyze the loop
+  /// (e.g. if it's not bottom-tested, contains volatile memops, etc.)
+  std::forward_list<StoreToLoadForwardingCandidate>
+  findStoreToLoadDependences(const LoopAccessInfo &LAI) {
+    std::forward_list<StoreToLoadForwardingCandidate> Candidates;
+
+    const auto *Deps = LAI.getDepChecker().getDependences();
+    if (!Deps)
+      return Candidates;
+
+    // Find store->load dependences (consequently true dep).  Both lexically
+    // forward and backward dependences qualify.  Disqualify loads that have
+    // other unknown dependences.
+
+    SmallSet<Instruction *, 4> LoadsWithUnknownDepedence;
+
+    for (const auto &Dep : *Deps) {
+      Instruction *Source = Dep.getSource(LAI);
+      Instruction *Destination = Dep.getDestination(LAI);
+
+      if (Dep.Type == MemoryDepChecker::Dependence::Unknown) {
+        if (isa<LoadInst>(Source))
+          LoadsWithUnknownDepedence.insert(Source);
+        if (isa<LoadInst>(Destination))
+          LoadsWithUnknownDepedence.insert(Destination);
+        continue;
+      }
+
+      if (Dep.isBackward())
+        // Note that the designations source and destination follow the program
+        // order, i.e. source is always first.  (The direction is given by the
+        // DepType.)
+        std::swap(Source, Destination);
+      else
+        assert(Dep.isForward() && "Needs to be a forward dependence");
+
+      auto *Store = dyn_cast<StoreInst>(Source);
+      if (!Store)
+        continue;
+      auto *Load = dyn_cast<LoadInst>(Destination);
+      if (!Load)
+        continue;
+      Candidates.emplace_front(Load, Store);
+    }
+
+    if (!LoadsWithUnknownDepedence.empty())
+      Candidates.remove_if([&](const StoreToLoadForwardingCandidate &C) {
+        return LoadsWithUnknownDepedence.count(C.Load);
+      });
+
+    return Candidates;
+  }
+
+  /// \brief Return the index of the instruction according to program order.
+  unsigned getInstrIndex(Instruction *Inst) {
+    auto I = InstOrder.find(Inst);
+    assert(I != InstOrder.end() && "No index for instruction");
+    return I->second;
+  }
+
+  /// \brief If a load has multiple candidates associated (i.e. different
+  /// stores), it means that it could be forwarding from multiple stores
+  /// depending on control flow.  Remove these candidates.
+  ///
+  /// Here, we rely on LAA to include the relevant loop-independent dependences.
+  /// LAA is known to omit these in the very simple case when the read and the
+  /// write within an alias set always takes place using the *same* pointer.
+  ///
+  /// However, we know that this is not the case here, i.e. we can rely on LAA
+  /// to provide us with loop-independent dependences for the cases we're
+  /// interested.  Consider the case for example where a loop-independent
+  /// dependece S1->S2 invalidates the forwarding S3->S2.
+  ///
+  ///         A[i]   = ...   (S1)
+  ///         ...    = A[i]  (S2)
+  ///         A[i+1] = ...   (S3)
+  ///
+  /// LAA will perform dependence analysis here because there are two
+  /// *different* pointers involved in the same alias set (&A[i] and &A[i+1]).
+  void removeDependencesFromMultipleStores(
+      std::forward_list<StoreToLoadForwardingCandidate> &Candidates) {
+    // If Store is nullptr it means that we have multiple stores forwarding to
+    // this store.
+    typedef DenseMap<LoadInst *, const StoreToLoadForwardingCandidate *>
+        LoadToSingleCandT;
+    LoadToSingleCandT LoadToSingleCand;
+
+    for (const auto &Cand : Candidates) {
+      bool NewElt;
+      LoadToSingleCandT::iterator Iter;
+
+      std::tie(Iter, NewElt) =
+          LoadToSingleCand.insert(std::make_pair(Cand.Load, &Cand));
+      if (!NewElt) {
+        const StoreToLoadForwardingCandidate *&OtherCand = Iter->second;
+        // Already multiple stores forward to this load.
+        if (OtherCand == nullptr)
+          continue;
+
+        // Handle the very basic of case when the two stores are in the same
+        // block so deciding which one forwards is easy.  The later one forwards
+        // as long as they both have a dependence distance of one to the load.
+        if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
+            Cand.isDependenceDistanceOfOne(PSE) &&
+            OtherCand->isDependenceDistanceOfOne(PSE)) {
+          // They are in the same block, the later one will forward to the load.
+          if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
+            OtherCand = &Cand;
+        } else
+          OtherCand = nullptr;
+      }
+    }
+
+    Candidates.remove_if([&](const StoreToLoadForwardingCandidate &Cand) {
+      if (LoadToSingleCand[Cand.Load] != &Cand) {
+        DEBUG(dbgs() << "Removing from candidates: \n" << Cand
+                     << "  The load may have multiple stores forwarding to "
+                     << "it\n");
+        return true;
+      }
+      return false;
+    });
+  }
+
+  /// \brief Given two pointers operations by their RuntimePointerChecking
+  /// indices, return true if they require an alias check.
+  ///
+  /// We need a check if one is a pointer for a candidate load and the other is
+  /// a pointer for a possibly intervening store.
+  bool needsChecking(unsigned PtrIdx1, unsigned PtrIdx2,
+                     const SmallSet<Value *, 4> &PtrsWrittenOnFwdingPath,
+                     const std::set<Value *> &CandLoadPtrs) {
+    Value *Ptr1 =
+        LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx1).PointerValue;
+    Value *Ptr2 =
+        LAI.getRuntimePointerChecking()->getPointerInfo(PtrIdx2).PointerValue;
+    return ((PtrsWrittenOnFwdingPath.count(Ptr1) && CandLoadPtrs.count(Ptr2)) ||
+            (PtrsWrittenOnFwdingPath.count(Ptr2) && CandLoadPtrs.count(Ptr1)));
+  }
+
+  /// \brief Return pointers that are possibly written to on the path from a
+  /// forwarding store to a load.
+  ///
+  /// These pointers need to be alias-checked against the forwarding candidates.
+  SmallSet<Value *, 4> findPointersWrittenOnForwardingPath(
+      const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+    // From FirstStore to LastLoad neither of the elimination candidate loads
+    // should overlap with any of the stores.
+    //
+    // E.g.:
+    //
+    // st1 C[i]
+    // ld1 B[i] <-------,
+    // ld0 A[i] <----,  |              * LastLoad
+    // ...           |  |
+    // st2 E[i]      |  |
+    // st3 B[i+1] -- | -'              * FirstStore
+    // st0 A[i+1] ---'
+    // st4 D[i]
+    //
+    // st0 forwards to ld0 if the accesses in st4 and st1 don't overlap with
+    // ld0.
+
+    LoadInst *LastLoad =
+        std::max_element(Candidates.begin(), Candidates.end(),
+                         [&](const StoreToLoadForwardingCandidate &A,
+                             const StoreToLoadForwardingCandidate &B) {
+                           return getInstrIndex(A.Load) < getInstrIndex(B.Load);
+                         })
+            ->Load;
+    StoreInst *FirstStore =
+        std::min_element(Candidates.begin(), Candidates.end(),
+                         [&](const StoreToLoadForwardingCandidate &A,
+                             const StoreToLoadForwardingCandidate &B) {
+                           return getInstrIndex(A.Store) <
+                                  getInstrIndex(B.Store);
+                         })
+            ->Store;
+
+    // We're looking for stores after the first forwarding store until the end
+    // of the loop, then from the beginning of the loop until the last
+    // forwarded-to load.  Collect the pointer for the stores.
+    SmallSet<Value *, 4> PtrsWrittenOnFwdingPath;
+
+    auto InsertStorePtr = [&](Instruction *I) {
+      if (auto *S = dyn_cast<StoreInst>(I))
+        PtrsWrittenOnFwdingPath.insert(S->getPointerOperand());
+    };
+    const auto &MemInstrs = LAI.getDepChecker().getMemoryInstructions();
+    std::for_each(MemInstrs.begin() + getInstrIndex(FirstStore) + 1,
+                  MemInstrs.end(), InsertStorePtr);
+    std::for_each(MemInstrs.begin(), &MemInstrs[getInstrIndex(LastLoad)],
+                  InsertStorePtr);
+
+    return PtrsWrittenOnFwdingPath;
+  }
+
+  /// \brief Determine the pointer alias checks to prove that there are no
+  /// intervening stores.
+  SmallVector<RuntimePointerChecking::PointerCheck, 4> collectMemchecks(
+      const SmallVectorImpl<StoreToLoadForwardingCandidate> &Candidates) {
+
+    SmallSet<Value *, 4> PtrsWrittenOnFwdingPath =
+        findPointersWrittenOnForwardingPath(Candidates);
+
+    // Collect the pointers of the candidate loads.
+    // FIXME: SmallSet does not work with std::inserter.
+    std::set<Value *> CandLoadPtrs;
+    std::transform(Candidates.begin(), Candidates.end(),
+                   std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
+                   std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
+
+    const auto &AllChecks = LAI.getRuntimePointerChecking()->getChecks();
+    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks;
+
+    std::copy_if(AllChecks.begin(), AllChecks.end(), std::back_inserter(Checks),
+                 [&](const RuntimePointerChecking::PointerCheck &Check) {
+                   for (auto PtrIdx1 : Check.first->Members)
+                     for (auto PtrIdx2 : Check.second->Members)
+                       if (needsChecking(PtrIdx1, PtrIdx2,
+                                         PtrsWrittenOnFwdingPath, CandLoadPtrs))
+                         return true;
+                   return false;
+                 });
+
+    DEBUG(dbgs() << "\nPointer Checks (count: " << Checks.size() << "):\n");
+    DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks));
+
+    return Checks;
+  }
+
+  /// \brief Perform the transformation for a candidate.
+  void
+  propagateStoredValueToLoadUsers(const StoreToLoadForwardingCandidate &Cand,
+                                  SCEVExpander &SEE) {
+    //
+    // loop:
+    //      %x = load %gep_i
+    //         = ... %x
+    //      store %y, %gep_i_plus_1
+    //
+    // =>
+    //
+    // ph:
+    //      %x.initial = load %gep_0
+    // loop:
+    //      %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+    //      %x = load %gep_i            <---- now dead
+    //         = ... %x.storeforward
+    //      store %y, %gep_i_plus_1
+
+    Value *Ptr = Cand.Load->getPointerOperand();
+    auto *PtrSCEV = cast<SCEVAddRecExpr>(PSE.getSCEV(Ptr));
+    auto *PH = L->getLoopPreheader();
+    Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(),
+                                          PH->getTerminator());
+    Value *Initial =
+        new LoadInst(InitialPtr, "load_initial", PH->getTerminator());
+    PHINode *PHI = PHINode::Create(Initial->getType(), 2, "store_forwarded",
+                                   &L->getHeader()->front());
+    PHI->addIncoming(Initial, PH);
+    PHI->addIncoming(Cand.Store->getOperand(0), L->getLoopLatch());
+
+    Cand.Load->replaceAllUsesWith(PHI);
+  }
+
+  /// \brief Top-level driver for each loop: find store->load forwarding
+  /// candidates, add run-time checks and perform transformation.
+  bool processLoop() {
+    DEBUG(dbgs() << "\nIn \"" << L->getHeader()->getParent()->getName()
+                 << "\" checking " << *L << "\n");
+    // Look for store-to-load forwarding cases across the
+    // backedge. E.g.:
+    //
+    // loop:
+    //      %x = load %gep_i
+    //         = ... %x
+    //      store %y, %gep_i_plus_1
+    //
+    // =>
+    //
+    // ph:
+    //      %x.initial = load %gep_0
+    // loop:
+    //      %x.storeforward = phi [%x.initial, %ph] [%y, %loop]
+    //      %x = load %gep_i            <---- now dead
+    //         = ... %x.storeforward
+    //      store %y, %gep_i_plus_1
+
+    // First start with store->load dependences.
+    auto StoreToLoadDependences = findStoreToLoadDependences(LAI);
+    if (StoreToLoadDependences.empty())
+      return false;
+
+    // Generate an index for each load and store according to the original
+    // program order.  This will be used later.
+    InstOrder = LAI.getDepChecker().generateInstructionOrderMap();
+
+    // To keep things simple for now, remove those where the load is potentially
+    // fed by multiple stores.
+    removeDependencesFromMultipleStores(StoreToLoadDependences);
+    if (StoreToLoadDependences.empty())
+      return false;
+
+    // Filter the candidates further.
+    SmallVector<StoreToLoadForwardingCandidate, 4> Candidates;
+    unsigned NumForwarding = 0;
+    for (const StoreToLoadForwardingCandidate Cand : StoreToLoadDependences) {
+      DEBUG(dbgs() << "Candidate " << Cand);
+      // Make sure that the stored values is available everywhere in the loop in
+      // the next iteration.
+      if (!doesStoreDominatesAllLatches(Cand.Store->getParent(), L, DT))
+        continue;
+
+      // Check whether the SCEV difference is the same as the induction step,
+      // thus we load the value in the next iteration.
+      if (!Cand.isDependenceDistanceOfOne(PSE))
+        continue;
+
+      ++NumForwarding;
+      DEBUG(dbgs()
+            << NumForwarding
+            << ". Valid store-to-load forwarding across the loop backedge\n");
+      Candidates.push_back(Cand);
+    }
+    if (Candidates.empty())
+      return false;
+
+    // Check intervening may-alias stores.  These need runtime checks for alias
+    // disambiguation.
+    SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks =
+        collectMemchecks(Candidates);
+
+    // Too many checks are likely to outweigh the benefits of forwarding.
+    if (Checks.size() > Candidates.size() * CheckPerElim) {
+      DEBUG(dbgs() << "Too many run-time checks needed.\n");
+      return false;
+    }
+
+    if (LAI.PSE.getUnionPredicate().getComplexity() >
+        LoadElimSCEVCheckThreshold) {
+      DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n");
+      return false;
+    }
+
+    // Point of no-return, start the transformation.  First, version the loop if
+    // necessary.
+    if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) {
+      LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false);
+      LV.setAliasChecks(std::move(Checks));
+      LV.setSCEVChecks(LAI.PSE.getUnionPredicate());
+      LV.versionLoop();
+    }
+
+    // Next, propagate the value stored by the store to the users of the load.
+    // Also for the first iteration, generate the initial value of the load.
+    SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(),
+                     "storeforward");
+    for (const auto &Cand : Candidates)
+      propagateStoredValueToLoadUsers(Cand, SEE);
+    NumLoopLoadEliminted += NumForwarding;
+
+    return true;
+  }
+
+private:
+  Loop *L;
+
+  /// \brief Maps the load/store instructions to their index according to
+  /// program order.
+  DenseMap<Instruction *, unsigned> InstOrder;
+
+  // Analyses used.
+  LoopInfo *LI;
+  const LoopAccessInfo &LAI;
+  DominatorTree *DT;
+  PredicatedScalarEvolution PSE;
+};
+
+/// \brief The pass.  Most of the work is delegated to the per-loop
+/// LoadEliminationForLoop class.
+class LoopLoadElimination : public FunctionPass {
+public:
+  LoopLoadElimination() : FunctionPass(ID) {
+    initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    auto *LAA = &getAnalysis<LoopAccessAnalysis>();
+    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+
+    // Build up a worklist of inner-loops to vectorize. This is necessary as the
+    // act of distributing a loop creates new loops and can invalidate iterators
+    // across the loops.
+    SmallVector<Loop *, 8> Worklist;
+
+    for (Loop *TopLevelLoop : *LI)
+      for (Loop *L : depth_first(TopLevelLoop))
+        // We only handle inner-most loops.
+        if (L->empty())
+          Worklist.push_back(L);
+
+    // Now walk the identified inner loops.
+    bool Changed = false;
+    for (Loop *L : Worklist) {
+      const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap());
+      // The actual work is performed by LoadEliminationForLoop.
+      LoadEliminationForLoop LEL(L, LI, LAI, DT);
+      Changed |= LEL.processLoop();
+    }
+
+    // Process each loop nest in the function.
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<LoopAccessAnalysis>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+  }
+
+  static char ID;
+};
+}
+
+char LoopLoadElimination::ID;
+static const char LLE_name[] = "Loop Load Elimination";
+
+INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
+
+namespace llvm {
+FunctionPass *createLoopLoadEliminationPass() {
+  return new LoopLoadElimination();
+}
+}
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index ed103e6b8ed6..27c2d8824df0 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -147,12 +147,12 @@ namespace {
     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
 
@@ -162,11 +162,15 @@ namespace {
     ScalarEvolution *SE;
     TargetLibraryInfo *TLI;
     DominatorTree *DT;
+    bool PreserveLCSSA;
 
     typedef SmallVector<Instruction *, 16> SmallInstructionVector;
     typedef SmallSet<Instruction *, 16>   SmallInstructionSet;
 
-    // A chain of isomorphic instructions, indentified by a single-use PHI,
+    // Map between induction variable and its increment
+    DenseMap<Instruction *, int64_t> IVToIncMap;
+
+    // A chain of isomorphic instructions, identified by a single-use PHI
     // representing a reduction. Only the last value may be used outside the
     // loop.
     struct SimpleLoopReduction {
@@ -300,22 +304,6 @@ namespace {
       // The functions below can be called after we've finished processing all
       // instructions in the loop, and we know which reductions were selected.
 
-      // Is the provided instruction the PHI of a reduction selected for
-      // rerolling?
-      bool isSelectedPHI(Instruction *J) {
-        if (!isa<PHINode>(J))
-          return false;
-
-        for (DenseSet<int>::iterator RI = Reds.begin(), RIE = Reds.end();
-             RI != RIE; ++RI) {
-          int i = *RI;
-          if (cast<Instruction>(J) == PossibleReds[i].getPHI())
-            return true;
-        }
-
-        return false;
-      }
-
       bool validateSelected();
       void replaceSelected();
 
@@ -335,7 +323,7 @@ namespace {
     //   x[i*3+1] = y2
     //   x[i*3+2] = y3
     //
-    //   Base instruction -> i*3               
+    //   Base instruction -> i*3
     //                    +---+----+
     //                   /    |     \
     //               ST[y1]  +1     +2  <-- Roots
@@ -366,8 +354,11 @@ namespace {
     struct DAGRootTracker {
       DAGRootTracker(LoopReroll *Parent, Loop *L, Instruction *IV,
                      ScalarEvolution *SE, AliasAnalysis *AA,
-                     TargetLibraryInfo *TLI)
-          : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), IV(IV) {}
+                     TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
+                     bool PreserveLCSSA,
+                     DenseMap<Instruction *, int64_t> &IncrMap)
+          : Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
+            PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
 
       /// Stage 1: Find all the DAG roots for the induction variable.
       bool findRoots();
@@ -413,11 +404,14 @@ namespace {
       ScalarEvolution *SE;
       AliasAnalysis *AA;
       TargetLibraryInfo *TLI;
+      DominatorTree *DT;
+      LoopInfo *LI;
+      bool PreserveLCSSA;
 
       // The loop induction variable.
       Instruction *IV;
       // Loop step amount.
-      uint64_t Inc;
+      int64_t Inc;
       // Loop reroll count; if Inc == 1, this records the scaling applied
       // to the indvar: a[i*2+0] = ...; a[i*2+1] = ... ;
       // If Inc is not 1, Scale = Inc.
@@ -430,6 +424,8 @@ namespace {
       // they are used in (or specially, IL_All for instructions
       // used in the loop increment mechanism).
       UsesTy Uses;
+      // Map between induction variable and its increment
+      DenseMap<Instruction *, int64_t> &IVToIncMap;
     };
 
     void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
@@ -442,10 +438,10 @@ namespace {
 
 char LoopReroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopReroll, "loop-reroll", "Reroll loops", false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopReroll, "loop-reroll", "Reroll loops", false, false)
 
@@ -477,21 +473,20 @@ void LoopReroll::collectPossibleIVs(Loop *L,
       continue;
 
     if (const SCEVAddRecExpr *PHISCEV =
-        dyn_cast<SCEVAddRecExpr>(SE->getSCEV(I))) {
+            dyn_cast<SCEVAddRecExpr>(SE->getSCEV(&*I))) {
       if (PHISCEV->getLoop() != L)
         continue;
       if (!PHISCEV->isAffine())
         continue;
       if (const SCEVConstant *IncSCEV =
           dyn_cast<SCEVConstant>(PHISCEV->getStepRecurrence(*SE))) {
-        if (!IncSCEV->getValue()->getValue().isStrictlyPositive())
+        const APInt &AInt = IncSCEV->getAPInt().abs();
+        if (IncSCEV->getValue()->isZero() || AInt.uge(MaxInc))
           continue;
-        if (IncSCEV->getValue()->uge(MaxInc))
-          continue;
-
-        DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " <<
-              *PHISCEV << "\n");
-        PossibleIVs.push_back(I);
+        IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
+        DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
+                     << "\n");
+        PossibleIVs.push_back(&*I);
       }
     }
   }
@@ -552,7 +547,7 @@ void LoopReroll::collectPossibleReductions(Loop *L,
     if (!I->getType()->isSingleValueType())
       continue;
 
-    SimpleLoopReduction SLR(I, L);
+    SimpleLoopReduction SLR(&*I, L);
     if (!SLR.valid())
       continue;
 
@@ -699,17 +694,11 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
       }
     }
 
-    int64_t V = CI->getValue().getSExtValue();
+    int64_t V = std::abs(CI->getValue().getSExtValue());
     if (Roots.find(V) != Roots.end())
       // No duplicates, please.
       return false;
 
-    // FIXME: Add support for negative values.
-    if (V < 0) {
-      DEBUG(dbgs() << "LRR: Aborting due to negative value: " << V << "\n");
-      return false;
-    }
-
     Roots[V] = cast<Instruction>(I);
   }
 
@@ -731,7 +720,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
   unsigned NumBaseUses = BaseUsers.size();
   if (NumBaseUses == 0)
     NumBaseUses = Roots.begin()->second->getNumUses();
-  
+
   // Check that every node has the same number of users.
   for (auto &KV : Roots) {
     if (KV.first == 0)
@@ -744,7 +733,7 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
     }
   }
 
-  return true; 
+  return true;
 }
 
 bool LoopReroll::DAGRootTracker::
@@ -787,7 +776,7 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
   if (!collectPossibleRoots(IVU, V))
     return false;
 
-  // If we didn't get a root for index zero, then IVU must be 
+  // If we didn't get a root for index zero, then IVU must be
   // subsumed.
   if (V.find(0) == V.end())
     SubsumedInsts.insert(IVU);
@@ -818,13 +807,10 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
 }
 
 bool LoopReroll::DAGRootTracker::findRoots() {
-
-  const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(IV));
-  Inc = cast<SCEVConstant>(RealIVSCEV->getOperand(1))->
-    getValue()->getZExtValue();
+  Inc = IVToIncMap[IV];
 
   assert(RootSets.empty() && "Unclean state!");
-  if (Inc == 1) {
+  if (std::abs(Inc) == 1) {
     for (auto *IVU : IV->users()) {
       if (isLoopIncrement(IVU, IV))
         LoopIncs.push_back(cast<Instruction>(IVU));
@@ -996,6 +982,25 @@ bool LoopReroll::DAGRootTracker::instrDependsOn(Instruction *I,
   return false;
 }
 
+static bool isIgnorableInst(const Instruction *I) {
+  if (isa<DbgInfoIntrinsic>(I))
+    return true;
+  const IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+  if (!II)
+    return false;
+  switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case llvm::Intrinsic::annotation:
+    case Intrinsic::ptr_annotation:
+    case Intrinsic::var_annotation:
+    // TODO: the following intrinsics may also be whitelisted:
+    //   lifetime_start, lifetime_end, invariant_start, invariant_end
+      return true;
+  }
+  return false;
+}
+
 bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
   // We now need to check for equivalence of the use graph of each root with
   // that of the primary induction variable (excluding the roots). Our goal
@@ -1029,7 +1034,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
   // Make sure all instructions in the loop are in one and only one
   // set.
   for (auto &KV : Uses) {
-    if (KV.second.count() != 1) {
+    if (KV.second.count() != 1 && !isIgnorableInst(KV.first)) {
       DEBUG(dbgs() << "LRR: Aborting - instruction is not used in 1 iteration: "
             << *KV.first << " (#uses=" << KV.second.count() << ")\n");
       return false;
@@ -1103,15 +1108,15 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
                 " vs. " << *RootInst << "\n");
           return false;
         }
-        
+
         RootIt = TryIt;
         RootInst = TryIt->first;
       }
 
       // All instructions between the last root and this root
-      // may belong to some other iteration. If they belong to a 
+      // may belong to some other iteration. If they belong to a
       // future iteration, then they're dangerous to alias with.
-      // 
+      //
       // Note that because we allow a limited amount of flexibility in the order
       // that we visit nodes, LastRootIt might be *before* RootIt, in which
       // case we've already checked this set of instructions so we shouldn't
@@ -1267,6 +1272,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
 
     ++J;
   }
+  bool Negative = IVToIncMap[IV] < 0;
   const DataLayout &DL = Header->getModule()->getDataLayout();
 
   // We need to create a new induction variable for each different BaseInst.
@@ -1275,13 +1281,12 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
     const SCEVAddRecExpr *RealIVSCEV =
       cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
     const SCEV *Start = RealIVSCEV->getStart();
-    const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>
-      (SE->getAddRecExpr(Start,
-                         SE->getConstant(RealIVSCEV->getType(), 1),
-                         L, SCEV::FlagAnyWrap));
+    const SCEVAddRecExpr *H = cast<SCEVAddRecExpr>(SE->getAddRecExpr(
+        Start, SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1), L,
+        SCEV::FlagAnyWrap));
     { // Limit the lifetime of SCEVExpander.
       SCEVExpander Expander(*SE, DL, "reroll");
-      Value *NewIV = Expander.expandCodeFor(H, IV->getType(), Header->begin());
+      Value *NewIV = Expander.expandCodeFor(H, IV->getType(), &Header->front());
 
       for (auto &KV : Uses) {
         if (KV.second.find_first() == 0)
@@ -1294,8 +1299,8 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
           const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
 
           // Iteration count SCEV minus 1
-          const SCEV *ICMinus1SCEV =
-            SE->getMinusSCEV(ICSCEV, SE->getConstant(ICSCEV->getType(), 1));
+          const SCEV *ICMinus1SCEV = SE->getMinusSCEV(
+              ICSCEV, SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1));
 
           Value *ICMinus1; // Iteration count minus 1
           if (isa<SCEVConstant>(ICMinus1SCEV)) {
@@ -1303,7 +1308,7 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
           } else {
             BasicBlock *Preheader = L->getLoopPreheader();
             if (!Preheader)
-              Preheader = InsertPreheaderForLoop(L, Parent);
+              Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
 
             ICMinus1 = Expander.expandCodeFor(ICMinus1SCEV, NewIV->getType(),
                                               Preheader->getTerminator());
@@ -1444,13 +1449,14 @@ void LoopReroll::ReductionTracker::replaceSelected() {
 bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
                         const SCEV *IterCount,
                         ReductionTracker &Reductions) {
-  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI);
+  DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
+                          IVToIncMap);
 
   if (!DAGRoots.findRoots())
     return false;
   DEBUG(dbgs() << "LRR: Found all root induction increments for: " <<
                   *IV << "\n");
-  
+
   if (!DAGRoots.validate(Reductions))
     return false;
   if (!Reductions.validateSelected())
@@ -1469,11 +1475,12 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (skipOptnoneFunction(L))
     return false;
 
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "LRR: F[" << Header->getParent()->getName() <<
@@ -1490,13 +1497,13 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return Changed;
 
   const SCEV *LIBETC = SE->getBackedgeTakenCount(L);
-  const SCEV *IterCount =
-    SE->getAddExpr(LIBETC, SE->getConstant(LIBETC->getType(), 1));
+  const SCEV *IterCount = SE->getAddExpr(LIBETC, SE->getOne(LIBETC->getType()));
   DEBUG(dbgs() << "LRR: iteration count = " << *IterCount << "\n");
 
   // First, we need to find the induction variable with respect to which we can
   // reroll (there may be several possible options).
   SmallInstructionVector PossibleIVs;
+  IVToIncMap.clear();
   collectPossibleIVs(L, PossibleIVs);
 
   if (PossibleIVs.empty()) {
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index a675e1289baf..5e6c2da08cc3 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -13,11 +13,15 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
@@ -41,95 +45,6 @@ DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
        cl::desc("The default maximum header size for automatic loop rotation"));
 
 STATISTIC(NumRotated, "Number of loops rotated");
-namespace {
-
-  class LoopRotate : public LoopPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
-      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
-      if (SpecifiedMaxHeaderSize == -1)
-        MaxHeaderSize = DefaultRotationThreshold;
-      else
-        MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
-    }
-
-    // LCSSA form makes instruction renaming easier.
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequiredID(LoopSimplifyID);
-      AU.addPreservedID(LoopSimplifyID);
-      AU.addRequiredID(LCSSAID);
-      AU.addPreservedID(LCSSAID);
-      AU.addPreserved<ScalarEvolution>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
-
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-    bool simplifyLoopLatch(Loop *L);
-    bool rotateLoop(Loop *L, bool SimplifiedLatch);
-
-  private:
-    unsigned MaxHeaderSize;
-    LoopInfo *LI;
-    const TargetTransformInfo *TTI;
-    AssumptionCache *AC;
-    DominatorTree *DT;
-  };
-}
-
-char LoopRotate::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-
-Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
-  return new LoopRotate(MaxHeaderSize);
-}
-
-/// Rotate Loop L as many times as possible. Return true if
-/// the loop is rotated at least once.
-bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
-  if (skipOptnoneFunction(L))
-    return false;
-
-  // Save the loop metadata.
-  MDNode *LoopMD = L->getLoopID();
-
-  Function &F = *L->getHeader()->getParent();
-
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
-  // Simplify the loop latch before attempting to rotate the header
-  // upward. Rotation may not be needed if the loop tail can be folded into the
-  // loop exit.
-  bool SimplifiedLatch = simplifyLoopLatch(L);
-
-  // One loop can be rotated multiple times.
-  bool MadeChange = false;
-  while (rotateLoop(L, SimplifiedLatch)) {
-    MadeChange = true;
-    SimplifiedLatch = false;
-  }
-
-  // Restore the loop metadata.
-  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
-  if ((MadeChange || SimplifiedLatch) && LoopMD)
-    L->setLoopID(LoopMD);
-
-  return MadeChange;
-}
 
 /// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
 /// old header into the preheader.  If there were uses of the values produced by
@@ -147,7 +62,7 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
   // as necessary.
   SSAUpdater SSA;
   for (I = OrigHeader->begin(); I != E; ++I) {
-    Value *OrigHeaderVal = I;
+    Value *OrigHeaderVal = &*I;
 
     // If there are no uses of the value (e.g. because it returns void), there
     // is nothing to rewrite.
@@ -196,127 +111,6 @@ static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
   }
 }
 
-/// Determine whether the instructions in this range may be safely and cheaply
-/// speculated. This is not an important enough situation to develop complex
-/// heuristics. We handle a single arithmetic instruction along with any type
-/// conversions.
-static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
-                                  BasicBlock::iterator End, Loop *L) {
-  bool seenIncrement = false;
-  bool MultiExitLoop = false;
-
-  if (!L->getExitingBlock())
-    MultiExitLoop = true;
-
-  for (BasicBlock::iterator I = Begin; I != End; ++I) {
-
-    if (!isSafeToSpeculativelyExecute(I))
-      return false;
-
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    switch (I->getOpcode()) {
-    default:
-      return false;
-    case Instruction::GetElementPtr:
-      // GEPs are cheap if all indices are constant.
-      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
-        return false;
-      // fall-thru to increment case
-    case Instruction::Add:
-    case Instruction::Sub:
-    case Instruction::And:
-    case Instruction::Or:
-    case Instruction::Xor:
-    case Instruction::Shl:
-    case Instruction::LShr:
-    case Instruction::AShr: {
-      Value *IVOpnd = !isa<Constant>(I->getOperand(0))
-                          ? I->getOperand(0)
-                          : !isa<Constant>(I->getOperand(1))
-                                ? I->getOperand(1)
-                                : nullptr;
-      if (!IVOpnd)
-        return false;
-
-      // If increment operand is used outside of the loop, this speculation
-      // could cause extra live range interference.
-      if (MultiExitLoop) {
-        for (User *UseI : IVOpnd->users()) {
-          auto *UserInst = cast<Instruction>(UseI);
-          if (!L->contains(UserInst))
-            return false;
-        }
-      }
-
-      if (seenIncrement)
-        return false;
-      seenIncrement = true;
-      break;
-    }
-    case Instruction::Trunc:
-    case Instruction::ZExt:
-    case Instruction::SExt:
-      // ignore type conversions
-      break;
-    }
-  }
-  return true;
-}
-
-/// Fold the loop tail into the loop exit by speculating the loop tail
-/// instructions. Typically, this is a single post-increment. In the case of a
-/// simple 2-block loop, hoisting the increment can be much better than
-/// duplicating the entire loop header. In the case of loops with early exits,
-/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
-/// canonical form so downstream passes can handle it.
-///
-/// I don't believe this invalidates SCEV.
-bool LoopRotate::simplifyLoopLatch(Loop *L) {
-  BasicBlock *Latch = L->getLoopLatch();
-  if (!Latch || Latch->hasAddressTaken())
-    return false;
-
-  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
-  if (!Jmp || !Jmp->isUnconditional())
-    return false;
-
-  BasicBlock *LastExit = Latch->getSinglePredecessor();
-  if (!LastExit || !L->isLoopExiting(LastExit))
-    return false;
-
-  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
-  if (!BI)
-    return false;
-
-  if (!shouldSpeculateInstrs(Latch->begin(), Jmp, L))
-    return false;
-
-  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
-        << LastExit->getName() << "\n");
-
-  // Hoist the instructions from Latch into LastExit.
-  LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
-
-  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
-  BasicBlock *Header = Jmp->getSuccessor(0);
-  assert(Header == L->getHeader() && "expected a backward branch");
-
-  // Remove Latch from the CFG so that LastExit becomes the new Latch.
-  BI->setSuccessor(FallThruPath, Header);
-  Latch->replaceSuccessorsPhiUsesWith(LastExit);
-  Jmp->eraseFromParent();
-
-  // Nuke the Latch block.
-  assert(Latch->empty() && "unable to evacuate Latch");
-  LI->removeBlock(Latch);
-  if (DT)
-    DT->eraseNode(Latch);
-  Latch->eraseFromParent();
-  return true;
-}
-
 /// Rotate loop LP. Return true if the loop is rotated.
 ///
 /// \param SimplifiedLatch is true if the latch was just folded into the final
@@ -327,7 +121,10 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) {
 /// rotation. LoopRotate should be repeatable and converge to a canonical
 /// form. This property is satisfied because simplifying the loop latch can only
 /// happen once across multiple invocations of the LoopRotate pass.
-bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
+static bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+                       const TargetTransformInfo *TTI, AssumptionCache *AC,
+                       DominatorTree *DT, ScalarEvolution *SE,
+                       bool SimplifiedLatch) {
   // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
@@ -382,7 +179,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
 
   // Anything ScalarEvolution may know about this loop or the PHI nodes
   // in its header will soon be invalidated.
-  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+  if (SE)
     SE->forgetLoop(L);
 
   DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
@@ -420,7 +217,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // possible or create a clone in the OldPreHeader if not.
   TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
   while (I != E) {
-    Instruction *Inst = I++;
+    Instruction *Inst = &*I++;
 
     // If the instruction's operands are invariant and it doesn't read or write
     // memory, then it is safe to hoist.  Doing this doesn't change the order of
@@ -465,8 +262,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
   // successors by duplicating their incoming values for OrigHeader.
   TerminatorInst *TI = OrigHeader->getTerminator();
-  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+  for (BasicBlock *SuccBB : TI->successors())
+    for (BasicBlock::iterator BI = SuccBB->begin();
          PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
       PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
 
@@ -607,3 +404,221 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
   ++NumRotated;
   return true;
 }
+
+/// Determine whether the instructions in this range may be safely and cheaply
+/// speculated. This is not an important enough situation to develop complex
+/// heuristics. We handle a single arithmetic instruction along with any type
+/// conversions.
+static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
+                                  BasicBlock::iterator End, Loop *L) {
+  bool seenIncrement = false;
+  bool MultiExitLoop = false;
+
+  if (!L->getExitingBlock())
+    MultiExitLoop = true;
+
+  for (BasicBlock::iterator I = Begin; I != End; ++I) {
+
+    if (!isSafeToSpeculativelyExecute(&*I))
+      return false;
+
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    switch (I->getOpcode()) {
+    default:
+      return false;
+    case Instruction::GetElementPtr:
+      // GEPs are cheap if all indices are constant.
+      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
+        return false;
+      // fall-thru to increment case
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr: {
+      Value *IVOpnd = !isa<Constant>(I->getOperand(0))
+                          ? I->getOperand(0)
+                          : !isa<Constant>(I->getOperand(1))
+                                ? I->getOperand(1)
+                                : nullptr;
+      if (!IVOpnd)
+        return false;
+
+      // If increment operand is used outside of the loop, this speculation
+      // could cause extra live range interference.
+      if (MultiExitLoop) {
+        for (User *UseI : IVOpnd->users()) {
+          auto *UserInst = cast<Instruction>(UseI);
+          if (!L->contains(UserInst))
+            return false;
+        }
+      }
+
+      if (seenIncrement)
+        return false;
+      seenIncrement = true;
+      break;
+    }
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // ignore type conversions
+      break;
+    }
+  }
+  return true;
+}
+
+/// Fold the loop tail into the loop exit by speculating the loop tail
+/// instructions. Typically, this is a single post-increment. In the case of a
+/// simple 2-block loop, hoisting the increment can be much better than
+/// duplicating the entire loop header. In the case of loops with early exits,
+/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
+/// canonical form so downstream passes can handle it.
+///
+/// I don't believe this invalidates SCEV.
+static bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch || Latch->hasAddressTaken())
+    return false;
+
+  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!Jmp || !Jmp->isUnconditional())
+    return false;
+
+  BasicBlock *LastExit = Latch->getSinglePredecessor();
+  if (!LastExit || !L->isLoopExiting(LastExit))
+    return false;
+
+  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
+  if (!BI)
+    return false;
+
+  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
+    return false;
+
+  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
+        << LastExit->getName() << "\n");
+
+  // Hoist the instructions from Latch into LastExit.
+  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
+                                 Latch->begin(), Jmp->getIterator());
+
+  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
+  BasicBlock *Header = Jmp->getSuccessor(0);
+  assert(Header == L->getHeader() && "expected a backward branch");
+
+  // Remove Latch from the CFG so that LastExit becomes the new Latch.
+  BI->setSuccessor(FallThruPath, Header);
+  Latch->replaceSuccessorsPhiUsesWith(LastExit);
+  Jmp->eraseFromParent();
+
+  // Nuke the Latch block.
+  assert(Latch->empty() && "unable to evacuate Latch");
+  LI->removeBlock(Latch);
+  if (DT)
+    DT->eraseNode(Latch);
+  Latch->eraseFromParent();
+  return true;
+}
+
+/// Rotate \c L as many times as possible. Return true if the loop is rotated
+/// at least once.
+static bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
+                                  const TargetTransformInfo *TTI,
+                                  AssumptionCache *AC, DominatorTree *DT,
+                                  ScalarEvolution *SE) {
+  // Save the loop metadata.
+  MDNode *LoopMD = L->getLoopID();
+
+  // Simplify the loop latch before attempting to rotate the header
+  // upward. Rotation may not be needed if the loop tail can be folded into the
+  // loop exit.
+  bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
+
+  // One loop can be rotated multiple times.
+  bool MadeChange = false;
+  while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
+    MadeChange = true;
+    SimplifiedLatch = false;
+  }
+
+  // Restore the loop metadata.
+  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+  if ((MadeChange || SimplifiedLatch) && LoopMD)
+    L->setLoopID(LoopMD);
+
+  return MadeChange;
+}
+
+namespace {
+
+class LoopRotate : public LoopPass {
+  unsigned MaxHeaderSize;
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
+    initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+    if (SpecifiedMaxHeaderSize == -1)
+      MaxHeaderSize = DefaultRotationThreshold;
+    else
+      MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
+  }
+
+  // LCSSA form makes instruction renaming easier.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequiredID(LoopSimplifyID);
+    AU.addPreservedID(LoopSimplifyID);
+    AU.addRequiredID(LCSSAID);
+    AU.addPreservedID(LCSSAID);
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<SCEVAAWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addPreserved<BasicAAWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+    if (skipOptnoneFunction(L))
+      return false;
+    Function &F = *L->getHeader()->getParent();
+
+    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+    auto *SE = SEWP ? &SEWP->getSE() : nullptr;
+
+    return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
+  }
+};
+}
+
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+  return new LoopRotate(MaxHeaderSize);
+}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4b59f3d2f6cc..2101225ed9f7 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -105,10 +105,33 @@ static bool StressIVChain = false;
 
 namespace {
 
-/// RegSortData - This class holds data which is used to order reuse candidates.
+struct MemAccessTy {
+  /// Used in situations where the accessed memory type is unknown.
+  static const unsigned UnknownAddressSpace = ~0u;
+
+  Type *MemTy;
+  unsigned AddrSpace;
+
+  MemAccessTy() : MemTy(nullptr), AddrSpace(UnknownAddressSpace) {}
+
+  MemAccessTy(Type *Ty, unsigned AS) :
+    MemTy(Ty), AddrSpace(AS) {}
+
+  bool operator==(MemAccessTy Other) const {
+    return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
+  }
+
+  bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
+
+  static MemAccessTy getUnknown(LLVMContext &Ctx) {
+    return MemAccessTy(Type::getVoidTy(Ctx), UnknownAddressSpace);
+  }
+};
+
+/// This class holds data which is used to order reuse candidates.
 class RegSortData {
 public:
-  /// UsedByIndices - This represents the set of LSRUse indices which reference
+  /// This represents the set of LSRUse indices which reference
   /// a particular register.
   SmallBitVector UsedByIndices;
 
@@ -122,16 +145,14 @@ void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void RegSortData::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
 namespace {
 
-/// RegUseTracker - Map register candidates to information about how they are
-/// used.
+/// Map register candidates to information about how they are used.
 class RegUseTracker {
   typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
 
@@ -139,9 +160,9 @@ class RegUseTracker {
   SmallVector<const SCEV *, 16> RegSequence;
 
 public:
-  void CountRegister(const SCEV *Reg, size_t LUIdx);
-  void DropRegister(const SCEV *Reg, size_t LUIdx);
-  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+  void countRegister(const SCEV *Reg, size_t LUIdx);
+  void dropRegister(const SCEV *Reg, size_t LUIdx);
+  void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
 
   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
 
@@ -160,7 +181,7 @@ public:
 }
 
 void
-RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
   std::pair<RegUsesTy::iterator, bool> Pair =
     RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
   RegSortData &RSD = Pair.first->second;
@@ -171,7 +192,7 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
 }
 
 void
-RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
   RegUsesTy::iterator It = RegUsesMap.find(Reg);
   assert(It != RegUsesMap.end());
   RegSortData &RSD = It->second;
@@ -180,7 +201,7 @@ RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
 }
 
 void
-RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
   assert(LUIdx <= LastLUIdx);
 
   // Update RegUses. The data structure is not optimized for this purpose;
@@ -219,9 +240,8 @@ void RegUseTracker::clear() {
 
 namespace {
 
-/// Formula - This class holds information that describes a formula for
-/// computing satisfying a use. It may include broken-out immediates and scaled
-/// registers.
+/// This class holds information that describes a formula for computing
+/// satisfying a use. It may include broken-out immediates and scaled registers.
 struct Formula {
   /// Global base address used for complex addressing.
   GlobalValue *BaseGV;
@@ -235,8 +255,8 @@ struct Formula {
   /// The scale of any complex addressing.
   int64_t Scale;
 
-  /// BaseRegs - The list of "base" registers for this use. When this is
-  /// non-empty. The canonical representation of a formula is
+  /// The list of "base" registers for this use. When this is non-empty. The
+  /// canonical representation of a formula is
   /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
   /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
   /// #1 enforces that the scaled register is always used when at least two
@@ -247,31 +267,31 @@ struct Formula {
   /// form.
   SmallVector<const SCEV *, 4> BaseRegs;
 
-  /// ScaledReg - The 'scaled' register for this use. This should be non-null
-  /// when Scale is not zero.
+  /// The 'scaled' register for this use. This should be non-null when Scale is
+  /// not zero.
   const SCEV *ScaledReg;
 
-  /// UnfoldedOffset - An additional constant offset which added near the
-  /// use. This requires a temporary register, but the offset itself can
-  /// live in an add immediate field rather than a register.
+  /// An additional constant offset which added near the use. This requires a
+  /// temporary register, but the offset itself can live in an add immediate
+  /// field rather than a register.
   int64_t UnfoldedOffset;
 
   Formula()
       : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
         ScaledReg(nullptr), UnfoldedOffset(0) {}
 
-  void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+  void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
 
   bool isCanonical() const;
 
-  void Canonicalize();
+  void canonicalize();
 
-  bool Unscale();
+  bool unscale();
 
   size_t getNumRegs() const;
   Type *getType() const;
 
-  void DeleteBaseReg(const SCEV *&S);
+  void deleteBaseReg(const SCEV *&S);
 
   bool referencesReg(const SCEV *S) const;
   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
@@ -283,7 +303,7 @@ struct Formula {
 
 }
 
-/// DoInitialMatch - Recursion helper for InitialMatch.
+/// Recursion helper for initialMatch.
 static void DoInitialMatch(const SCEV *S, Loop *L,
                            SmallVectorImpl<const SCEV *> &Good,
                            SmallVectorImpl<const SCEV *> &Bad,
@@ -336,10 +356,9 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
   Bad.push_back(S);
 }
 
-/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
-/// attempting to keep all loop-invariant and loop-computable values in a
-/// single base register.
-void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+/// Incorporate loop-variant parts of S into this Formula, attempting to keep
+/// all loop-invariant and loop-computable values in a single base register.
+void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
   SmallVector<const SCEV *, 4> Good;
   SmallVector<const SCEV *, 4> Bad;
   DoInitialMatch(S, L, Good, Bad, SE);
@@ -355,7 +374,7 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
       BaseRegs.push_back(Sum);
     HasBaseReg = true;
   }
-  Canonicalize();
+  canonicalize();
 }
 
 /// \brief Check whether or not this formula statisfies the canonical
@@ -373,7 +392,7 @@ bool Formula::isCanonical() const {
 /// field. Otherwise, we would have to do special cases everywhere in LSR
 /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
 /// On the other hand, 1*reg should be canonicalized into reg.
-void Formula::Canonicalize() {
+void Formula::canonicalize() {
   if (isCanonical())
     return;
   // So far we did not need this case. This is easy to implement but it is
@@ -394,7 +413,7 @@ void Formula::Canonicalize() {
 /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
 /// \return true if it was possible to get rid of the scale, false otherwise.
 /// \note After this operation the formula may not be in the canonical form.
-bool Formula::Unscale() {
+bool Formula::unscale() {
   if (Scale != 1)
     return false;
   Scale = 0;
@@ -403,15 +422,14 @@ bool Formula::Unscale() {
   return true;
 }
 
-/// getNumRegs - Return the total number of register operands used by this
-/// formula. This does not include register uses implied by non-constant
-/// addrec strides.
+/// Return the total number of register operands used by this formula. This does
+/// not include register uses implied by non-constant addrec strides.
 size_t Formula::getNumRegs() const {
   return !!ScaledReg + BaseRegs.size();
 }
 
-/// getType - Return the type of this formula, if it has one, or null
-/// otherwise. This type is meaningless except for the bit size.
+/// Return the type of this formula, if it has one, or null otherwise. This type
+/// is meaningless except for the bit size.
 Type *Formula::getType() const {
   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
          ScaledReg ? ScaledReg->getType() :
@@ -419,21 +437,21 @@ Type *Formula::getType() const {
          nullptr;
 }
 
-/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
-void Formula::DeleteBaseReg(const SCEV *&S) {
+/// Delete the given base reg from the BaseRegs list.
+void Formula::deleteBaseReg(const SCEV *&S) {
   if (&S != &BaseRegs.back())
     std::swap(S, BaseRegs.back());
   BaseRegs.pop_back();
 }
 
-/// referencesReg - Test if this formula references the given register.
+/// Test if this formula references the given register.
 bool Formula::referencesReg(const SCEV *S) const {
   return S == ScaledReg ||
          std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
 }
 
-/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
-/// which are used by uses other than the use with the given index.
+/// Test whether this formula uses registers which are used by uses other than
+/// the use with the given index.
 bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
                                          const RegUseTracker &RegUses) const {
   if (ScaledReg)
@@ -481,30 +499,29 @@ void Formula::print(raw_ostream &OS) const {
   }
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void Formula::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
-/// isAddRecSExtable - Return true if the given addrec can be sign-extended
-/// without changing its value.
+/// Return true if the given addrec can be sign-extended without changing its
+/// value.
 static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
   Type *WideTy =
     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
 }
 
-/// isAddSExtable - Return true if the given add can be sign-extended
-/// without changing its value.
+/// Return true if the given add can be sign-extended without changing its
+/// value.
 static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
   Type *WideTy =
     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
 }
 
-/// isMulSExtable - Return true if the given mul can be sign-extended
-/// without changing its value.
+/// Return true if the given mul can be sign-extended without changing its
+/// value.
 static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
   Type *WideTy =
     IntegerType::get(SE.getContext(),
@@ -512,12 +529,11 @@ static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
   return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
 }
 
-/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
-/// and if the remainder is known to be zero,  or null otherwise. If
-/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
-/// to Y, ignoring that the multiplication may overflow, which is useful when
-/// the result will be used in a context where the most significant bits are
-/// ignored.
+/// Return an expression for LHS /s RHS, if it can be determined and if the
+/// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
+/// is true, expressions like (X * Y) /s Y are simplified to Y, ignoring that
+/// the multiplication may overflow, which is useful when the result will be
+/// used in a context where the most significant bits are ignored.
 static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
                                 ScalarEvolution &SE,
                                 bool IgnoreSignificantBits = false) {
@@ -528,7 +544,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
   // Handle a few RHS special cases.
   const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
   if (RC) {
-    const APInt &RA = RC->getValue()->getValue();
+    const APInt &RA = RC->getAPInt();
     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
     // some folding.
     if (RA.isAllOnesValue())
@@ -542,8 +558,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
     if (!RC)
       return nullptr;
-    const APInt &LA = C->getValue()->getValue();
-    const APInt &RA = RC->getValue()->getValue();
+    const APInt &LA = C->getAPInt();
+    const APInt &RA = RC->getAPInt();
     if (LA.srem(RA) != 0)
       return nullptr;
     return SE.getConstant(LA.sdiv(RA));
@@ -603,12 +619,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
   return nullptr;
 }
 
-/// ExtractImmediate - If S involves the addition of a constant integer value,
-/// return that integer value, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a constant integer value, return that integer
+/// value, and mutate S to point to a new SCEV with that value excluded.
 static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-    if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+    if (C->getAPInt().getMinSignedBits() <= 64) {
       S = SE.getConstant(C->getType(), 0);
       return C->getValue()->getSExtValue();
     }
@@ -630,9 +645,8 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
   return 0;
 }
 
-/// ExtractSymbol - If S involves the addition of a GlobalValue address,
-/// return that symbol, and mutate S to point to a new SCEV with that
-/// value excluded.
+/// If S involves the addition of a GlobalValue address, return that symbol, and
+/// mutate S to point to a new SCEV with that value excluded.
 static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
@@ -657,8 +671,8 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
   return nullptr;
 }
 
-/// isAddressUse - Returns true if the specified instruction is using the
-/// specified value as an address.
+/// Returns true if the specified instruction is using the specified value as an
+/// address.
 static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
   bool isAddress = isa<LoadInst>(Inst);
   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
@@ -682,12 +696,15 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
   return isAddress;
 }
 
-/// getAccessType - Return the type of the memory being accessed.
-static Type *getAccessType(const Instruction *Inst) {
-  Type *AccessTy = Inst->getType();
-  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
-    AccessTy = SI->getOperand(0)->getType();
-  else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+/// Return the type of the memory being accessed.
+static MemAccessTy getAccessType(const Instruction *Inst) {
+  MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    AccessTy.MemTy = SI->getOperand(0)->getType();
+    AccessTy.AddrSpace = SI->getPointerAddressSpace();
+  } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    AccessTy.AddrSpace = LI->getPointerAddressSpace();
+  } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
     // Addressing modes can also be folded into prefetches and a variety
     // of intrinsics.
     switch (II->getIntrinsicID()) {
@@ -696,21 +713,21 @@ static Type *getAccessType(const Instruction *Inst) {
     case Intrinsic::x86_sse2_storeu_pd:
     case Intrinsic::x86_sse2_storeu_dq:
     case Intrinsic::x86_sse2_storel_dq:
-      AccessTy = II->getArgOperand(0)->getType();
+      AccessTy.MemTy = II->getArgOperand(0)->getType();
       break;
     }
   }
 
   // All pointers have the same requirements, so canonicalize them to an
   // arbitrary pointer type to minimize variation.
-  if (PointerType *PTy = dyn_cast<PointerType>(AccessTy))
-    AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
-                                PTy->getAddressSpace());
+  if (PointerType *PTy = dyn_cast<PointerType>(AccessTy.MemTy))
+    AccessTy.MemTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                      PTy->getAddressSpace());
 
   return AccessTy;
 }
 
-/// isExistingPhi - Return true if this AddRec is already a phi in its loop.
+/// Return true if this AddRec is already a phi in its loop.
 static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
   for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
@@ -793,9 +810,8 @@ static bool isHighCostExpansion(const SCEV *S,
   return true;
 }
 
-/// DeleteTriviallyDeadInstructions - If any of the instructions is the
-/// specified set are trivially dead, delete them and see if this makes any of
-/// their operands subsequently dead.
+/// If any of the instructions is the specified set are trivially dead, delete
+/// them and see if this makes any of their operands subsequently dead.
 static bool
 DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
   bool Changed = false;
@@ -842,7 +858,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
 
 namespace {
 
-/// Cost - This class is used to measure and compare candidate formulae.
+/// This class is used to measure and compare candidate formulae.
 class Cost {
   /// TODO: Some of these could be merged. Also, a lexical ordering
   /// isn't always optimal.
@@ -905,7 +921,7 @@ private:
 
 }
 
-/// RateRegister - Tally up interesting quantities from the given register.
+/// Tally up interesting quantities from the given register.
 void Cost::RateRegister(const SCEV *Reg,
                         SmallPtrSetImpl<const SCEV *> &Regs,
                         const Loop *L,
@@ -951,9 +967,9 @@ void Cost::RateRegister(const SCEV *Reg,
                  SE.hasComputableLoopEvolution(Reg, L);
 }
 
-/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
-/// before, rate it. Optional LoserRegs provides a way to declare any formula
-/// that refers to one of those regs an instant loser.
+/// Record this register in the set. If we haven't seen it before, rate
+/// it. Optional LoserRegs provides a way to declare any formula that refers to
+/// one of those regs an instant loser.
 void Cost::RatePrimaryRegister(const SCEV *Reg,
                                SmallPtrSetImpl<const SCEV *> &Regs,
                                const Loop *L,
@@ -1024,7 +1040,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
   assert(isValid() && "invalid cost");
 }
 
-/// Lose - Set this cost to a losing value.
+/// Set this cost to a losing value.
 void Cost::Lose() {
   NumRegs = ~0u;
   AddRecCost = ~0u;
@@ -1035,7 +1051,7 @@ void Cost::Lose() {
   ScaleCost = ~0u;
 }
 
-/// operator< - Choose the lower cost.
+/// Choose the lower cost.
 bool Cost::operator<(const Cost &Other) const {
   return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
                   ImmCost, SetupCost) <
@@ -1061,37 +1077,35 @@ void Cost::print(raw_ostream &OS) const {
     OS << ", plus " << SetupCost << " setup cost";
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void Cost::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
 namespace {
 
-/// LSRFixup - An operand value in an instruction which is to be replaced
-/// with some equivalent, possibly strength-reduced, replacement.
+/// An operand value in an instruction which is to be replaced with some
+/// equivalent, possibly strength-reduced, replacement.
 struct LSRFixup {
-  /// UserInst - The instruction which will be updated.
+  /// The instruction which will be updated.
   Instruction *UserInst;
 
-  /// OperandValToReplace - The operand of the instruction which will
-  /// be replaced. The operand may be used more than once; every instance
-  /// will be replaced.
+  /// The operand of the instruction which will be replaced. The operand may be
+  /// used more than once; every instance will be replaced.
   Value *OperandValToReplace;
 
-  /// PostIncLoops - If this user is to use the post-incremented value of an
-  /// induction variable, this variable is non-null and holds the loop
-  /// associated with the induction variable.
+  /// If this user is to use the post-incremented value of an induction
+  /// variable, this variable is non-null and holds the loop associated with the
+  /// induction variable.
   PostIncLoopSet PostIncLoops;
 
-  /// LUIdx - The index of the LSRUse describing the expression which
-  /// this fixup needs, minus an offset (below).
+  /// The index of the LSRUse describing the expression which this fixup needs,
+  /// minus an offset (below).
   size_t LUIdx;
 
-  /// Offset - A constant offset to be added to the LSRUse expression.
-  /// This allows multiple fixups to share the same LSRUse with different
-  /// offsets, for example in an unrolled loop.
+  /// A constant offset to be added to the LSRUse expression.  This allows
+  /// multiple fixups to share the same LSRUse with different offsets, for
+  /// example in an unrolled loop.
   int64_t Offset;
 
   bool isUseFullyOutsideLoop(const Loop *L) const;
@@ -1108,8 +1122,7 @@ LSRFixup::LSRFixup()
   : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
     Offset(0) {}
 
-/// isUseFullyOutsideLoop - Test whether this fixup always uses its
-/// value outside of the given loop.
+/// Test whether this fixup always uses its value outside of the given loop.
 bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
   // PHI nodes use their value in their incoming blocks.
   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
@@ -1149,16 +1162,15 @@ void LSRFixup::print(raw_ostream &OS) const {
     OS << ", Offset=" << Offset;
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void LSRFixup::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
 namespace {
 
-/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
-/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
+/// SmallVectors of const SCEV*.
 struct UniquifierDenseMapInfo {
   static SmallVector<const SCEV *, 4> getEmptyKey() {
     SmallVector<const SCEV *, 4>  V;
@@ -1182,17 +1194,17 @@ struct UniquifierDenseMapInfo {
   }
 };
 
-/// LSRUse - This class holds the state that LSR keeps for each use in
-/// IVUsers, as well as uses invented by LSR itself. It includes information
-/// about what kinds of things can be folded into the user, information about
-/// the user itself, and information about how the use may be satisfied.
-/// TODO: Represent multiple users of the same expression in common?
+/// This class holds the state that LSR keeps for each use in IVUsers, as well
+/// as uses invented by LSR itself. It includes information about what kinds of
+/// things can be folded into the user, information about the user itself, and
+/// information about how the use may be satisfied.  TODO: Represent multiple
+/// users of the same expression in common?
 class LSRUse {
   DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
 
 public:
-  /// KindType - An enum for a kind of use, indicating what types of
-  /// scaled and immediate operands it might support.
+  /// An enum for a kind of use, indicating what types of scaled and immediate
+  /// operands it might support.
   enum KindType {
     Basic,   ///< A normal use, with no folding.
     Special, ///< A special case of basic, allowing -1 scales.
@@ -1204,15 +1216,14 @@ public:
   typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
 
   KindType Kind;
-  Type *AccessTy;
+  MemAccessTy AccessTy;
 
   SmallVector<int64_t, 8> Offsets;
   int64_t MinOffset;
   int64_t MaxOffset;
 
-  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
-  /// LSRUse are outside of the loop, in which case some special-case heuristics
-  /// may be used.
+  /// This records whether all of the fixups using this LSRUse are outside of
+  /// the loop, in which case some special-case heuristics may be used.
   bool AllFixupsOutsideLoop;
 
   /// RigidFormula is set to true to guarantee that this use will be associated
@@ -1222,26 +1233,24 @@ public:
   /// changing the formula.
   bool RigidFormula;
 
-  /// WidestFixupType - This records the widest use type for any fixup using
-  /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
-  /// max fixup widths to be equivalent, because the narrower one may be relying
-  /// on the implicit truncation to truncate away bogus bits.
+  /// This records the widest use type for any fixup using this
+  /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
+  /// fixup widths to be equivalent, because the narrower one may be relying on
+  /// the implicit truncation to truncate away bogus bits.
   Type *WidestFixupType;
 
-  /// Formulae - A list of ways to build a value that can satisfy this user.
-  /// After the list is populated, one of these is selected heuristically and
-  /// used to formulate a replacement for OperandValToReplace in UserInst.
+  /// A list of ways to build a value that can satisfy this user.  After the
+  /// list is populated, one of these is selected heuristically and used to
+  /// formulate a replacement for OperandValToReplace in UserInst.
   SmallVector<Formula, 12> Formulae;
 
-  /// Regs - The set of register candidates used by all formulae in this LSRUse.
+  /// The set of register candidates used by all formulae in this LSRUse.
   SmallPtrSet<const SCEV *, 4> Regs;
 
-  LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T),
-                                      MinOffset(INT64_MAX),
-                                      MaxOffset(INT64_MIN),
-                                      AllFixupsOutsideLoop(true),
-                                      RigidFormula(false),
-                                      WidestFixupType(nullptr) {}
+  LSRUse(KindType K, MemAccessTy AT)
+      : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
+        AllFixupsOutsideLoop(true), RigidFormula(false),
+        WidestFixupType(nullptr) {}
 
   bool HasFormulaWithSameRegs(const Formula &F) const;
   bool InsertFormula(const Formula &F);
@@ -1254,8 +1263,8 @@ public:
 
 }
 
-/// HasFormula - Test whether this use as a formula which has the same
-/// registers as the given formula.
+/// Test whether this use as a formula which has the same registers as the given
+/// formula.
 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
   if (F.ScaledReg) Key.push_back(F.ScaledReg);
@@ -1264,9 +1273,8 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
   return Uniquifier.count(Key);
 }
 
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
-/// The formula must be in canonical form.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise.  The formula must be in canonical form.
 bool LSRUse::InsertFormula(const Formula &F) {
   assert(F.isCanonical() && "Invalid canonical representation");
 
@@ -1300,14 +1308,14 @@ bool LSRUse::InsertFormula(const Formula &F) {
   return true;
 }
 
-/// DeleteFormula - Remove the given formula from this use's list.
+/// Remove the given formula from this use's list.
 void LSRUse::DeleteFormula(Formula &F) {
   if (&F != &Formulae.back())
     std::swap(F, Formulae.back());
   Formulae.pop_back();
 }
 
-/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+/// Recompute the Regs field, and update RegUses.
 void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
   // Now that we've filtered out some formulae, recompute the Regs set.
   SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
@@ -1320,7 +1328,7 @@ void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
   // Update the RegTracker.
   for (const SCEV *S : OldRegs)
     if (!Regs.count(S))
-      RegUses.DropRegister(S, LUIdx);
+      RegUses.dropRegister(S, LUIdx);
 }
 
 void LSRUse::print(raw_ostream &OS) const {
@@ -1331,10 +1339,13 @@ void LSRUse::print(raw_ostream &OS) const {
   case ICmpZero: OS << "ICmpZero"; break;
   case Address:
     OS << "Address of ";
-    if (AccessTy->isPointerTy())
+    if (AccessTy.MemTy->isPointerTy())
       OS << "pointer"; // the full pointer type could be really verbose
-    else
-      OS << *AccessTy;
+    else {
+      OS << *AccessTy.MemTy;
+    }
+
+    OS << " in addrspace(" << AccessTy.AddrSpace << ')';
   }
 
   OS << ", Offsets={";
@@ -1353,19 +1364,19 @@ void LSRUse::print(raw_ostream &OS) const {
     OS << ", widest fixup type: " << *WidestFixupType;
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void LSRUse::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
-                                 LSRUse::KindType Kind, Type *AccessTy,
+                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
                                  GlobalValue *BaseGV, int64_t BaseOffset,
                                  bool HasBaseReg, int64_t Scale) {
   switch (Kind) {
   case LSRUse::Address:
-    return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+    return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
+                                     HasBaseReg, Scale, AccessTy.AddrSpace);
 
   case LSRUse::ICmpZero:
     // There's not even a target hook for querying whether it would be legal to
@@ -1412,7 +1423,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
 
 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
                                  int64_t MinOffset, int64_t MaxOffset,
-                                 LSRUse::KindType Kind, Type *AccessTy,
+                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
                                  GlobalValue *BaseGV, int64_t BaseOffset,
                                  bool HasBaseReg, int64_t Scale) {
   // Check for overflow.
@@ -1433,7 +1444,7 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
 
 static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
                                  int64_t MinOffset, int64_t MaxOffset,
-                                 LSRUse::KindType Kind, Type *AccessTy,
+                                 LSRUse::KindType Kind, MemAccessTy AccessTy,
                                  const Formula &F) {
   // For the purpose of isAMCompletelyFolded either having a canonical formula
   // or a scale not equal to zero is correct.
@@ -1447,11 +1458,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
                               F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
 }
 
-/// isLegalUse - Test whether we know how to expand the current formula.
+/// Test whether we know how to expand the current formula.
 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
-                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
-                       GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
-                       int64_t Scale) {
+                       int64_t MaxOffset, LSRUse::KindType Kind,
+                       MemAccessTy AccessTy, GlobalValue *BaseGV,
+                       int64_t BaseOffset, bool HasBaseReg, int64_t Scale) {
   // We know how to expand completely foldable formulae.
   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
                               BaseOffset, HasBaseReg, Scale) ||
@@ -1463,8 +1474,8 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
 }
 
 static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
-                       int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
-                       const Formula &F) {
+                       int64_t MaxOffset, LSRUse::KindType Kind,
+                       MemAccessTy AccessTy, const Formula &F) {
   return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
                     F.BaseOffset, F.HasBaseReg, F.Scale);
 }
@@ -1490,14 +1501,12 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
   switch (LU.Kind) {
   case LSRUse::Address: {
     // Check the scaling factor cost with both the min and max offsets.
-    int ScaleCostMinOffset =
-      TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
-                               F.BaseOffset + LU.MinOffset,
-                               F.HasBaseReg, F.Scale);
-    int ScaleCostMaxOffset =
-      TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV,
-                               F.BaseOffset + LU.MaxOffset,
-                               F.HasBaseReg, F.Scale);
+    int ScaleCostMinOffset = TTI.getScalingFactorCost(
+        LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MinOffset, F.HasBaseReg,
+        F.Scale, LU.AccessTy.AddrSpace);
+    int ScaleCostMaxOffset = TTI.getScalingFactorCost(
+        LU.AccessTy.MemTy, F.BaseGV, F.BaseOffset + LU.MaxOffset, F.HasBaseReg,
+        F.Scale, LU.AccessTy.AddrSpace);
 
     assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 &&
            "Legal addressing mode has an illegal cost!");
@@ -1515,7 +1524,7 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
 }
 
 static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
-                             LSRUse::KindType Kind, Type *AccessTy,
+                             LSRUse::KindType Kind, MemAccessTy AccessTy,
                              GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg) {
   // Fast-path: zero is always foldable.
@@ -1539,7 +1548,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
 static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
                              ScalarEvolution &SE, int64_t MinOffset,
                              int64_t MaxOffset, LSRUse::KindType Kind,
-                             Type *AccessTy, const SCEV *S, bool HasBaseReg) {
+                             MemAccessTy AccessTy, const SCEV *S,
+                             bool HasBaseReg) {
   // Fast-path: zero is always foldable.
   if (S->isZero()) return true;
 
@@ -1564,9 +1574,9 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
 
 namespace {
 
-/// IVInc - An individual increment in a Chain of IV increments.
-/// Relate an IV user to an expression that computes the IV it uses from the IV
-/// used by the previous link in the Chain.
+/// An individual increment in a Chain of IV increments.  Relate an IV user to
+/// an expression that computes the IV it uses from the IV used by the previous
+/// link in the Chain.
 ///
 /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
 /// original IVOperand. The head of the chain's IVOperand is only valid during
@@ -1582,8 +1592,8 @@ struct IVInc {
     UserInst(U), IVOperand(O), IncExpr(E) {}
 };
 
-// IVChain - The list of IV increments in program order.
-// We typically add the head of a chain without finding subsequent links.
+// The list of IV increments in program order.  We typically add the head of a
+// chain without finding subsequent links.
 struct IVChain {
   SmallVector<IVInc,1> Incs;
   const SCEV *ExprBase;
@@ -1595,7 +1605,7 @@ struct IVChain {
 
   typedef SmallVectorImpl<IVInc>::const_iterator const_iterator;
 
-  // begin - return the first increment in the chain.
+  // Return the first increment in the chain.
   const_iterator begin() const {
     assert(!Incs.empty());
     return std::next(Incs.begin());
@@ -1604,32 +1614,30 @@ struct IVChain {
     return Incs.end();
   }
 
-  // hasIncs - Returns true if this chain contains any increments.
+  // Returns true if this chain contains any increments.
   bool hasIncs() const { return Incs.size() >= 2; }
 
-  // add - Add an IVInc to the end of this chain.
+  // Add an IVInc to the end of this chain.
   void add(const IVInc &X) { Incs.push_back(X); }
 
-  // tailUserInst - Returns the last UserInst in the chain.
+  // Returns the last UserInst in the chain.
   Instruction *tailUserInst() const { return Incs.back().UserInst; }
 
-  // isProfitableIncrement - Returns true if IncExpr can be profitably added to
-  // this chain.
+  // Returns true if IncExpr can be profitably added to this chain.
   bool isProfitableIncrement(const SCEV *OperExpr,
                              const SCEV *IncExpr,
                              ScalarEvolution&);
 };
 
-/// ChainUsers - Helper for CollectChains to track multiple IV increment uses.
-/// Distinguish between FarUsers that definitely cross IV increments and
-/// NearUsers that may be used between IV increments.
+/// Helper for CollectChains to track multiple IV increment uses.  Distinguish
+/// between FarUsers that definitely cross IV increments and NearUsers that may
+/// be used between IV increments.
 struct ChainUsers {
   SmallPtrSet<Instruction*, 4> FarUsers;
   SmallPtrSet<Instruction*, 4> NearUsers;
 };
 
-/// LSRInstance - This class holds state for the main loop strength reduction
-/// logic.
+/// This class holds state for the main loop strength reduction logic.
 class LSRInstance {
   IVUsers &IU;
   ScalarEvolution &SE;
@@ -1639,25 +1647,25 @@ class LSRInstance {
   Loop *const L;
   bool Changed;
 
-  /// IVIncInsertPos - This is the insert position that the current loop's
-  /// induction variable increment should be placed. In simple loops, this is
-  /// the latch block's terminator. But in more complicated cases, this is a
-  /// position which will dominate all the in-loop post-increment users.
+  /// This is the insert position that the current loop's induction variable
+  /// increment should be placed. In simple loops, this is the latch block's
+  /// terminator. But in more complicated cases, this is a position which will
+  /// dominate all the in-loop post-increment users.
   Instruction *IVIncInsertPos;
 
-  /// Factors - Interesting factors between use strides.
+  /// Interesting factors between use strides.
   SmallSetVector<int64_t, 8> Factors;
 
-  /// Types - Interesting use types, to facilitate truncation reuse.
+  /// Interesting use types, to facilitate truncation reuse.
   SmallSetVector<Type *, 4> Types;
 
-  /// Fixups - The list of operands which are to be replaced.
+  /// The list of operands which are to be replaced.
   SmallVector<LSRFixup, 16> Fixups;
 
-  /// Uses - The list of interesting uses.
+  /// The list of interesting uses.
   SmallVector<LSRUse, 16> Uses;
 
-  /// RegUses - Track which uses use which register candidates.
+  /// Track which uses use which register candidates.
   RegUseTracker RegUses;
 
   // Limit the number of chains to avoid quadratic behavior. We don't expect to
@@ -1665,10 +1673,10 @@ class LSRInstance {
   // back to normal LSR behavior for those uses.
   static const unsigned MaxChains = 8;
 
-  /// IVChainVec - IV users can form a chain of IV increments.
+  /// IV users can form a chain of IV increments.
   SmallVector<IVChain, MaxChains> IVChainVec;
 
-  /// IVIncSet - IV users that belong to profitable IVChains.
+  /// IV users that belong to profitable IVChains.
   SmallPtrSet<Use*, MaxChains> IVIncSet;
 
   void OptimizeShadowIV();
@@ -1696,11 +1704,10 @@ class LSRInstance {
   UseMapTy UseMap;
 
   bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
-                          LSRUse::KindType Kind, Type *AccessTy);
+                          LSRUse::KindType Kind, MemAccessTy AccessTy);
 
-  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
-                                    LSRUse::KindType Kind,
-                                    Type *AccessTy);
+  std::pair<size_t, int64_t> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
+                                    MemAccessTy AccessTy);
 
   void DeleteUse(LSRUse &LU, size_t LUIdx);
 
@@ -1769,18 +1776,16 @@ class LSRInstance {
   void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
                      const Formula &F,
                      SCEVExpander &Rewriter,
-                     SmallVectorImpl<WeakVH> &DeadInsts,
-                     Pass *P) const;
+                     SmallVectorImpl<WeakVH> &DeadInsts) const;
   void Rewrite(const LSRFixup &LF,
                const Formula &F,
                SCEVExpander &Rewriter,
-               SmallVectorImpl<WeakVH> &DeadInsts,
-               Pass *P) const;
-  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
-                         Pass *P);
+               SmallVectorImpl<WeakVH> &DeadInsts) const;
+  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
 
 public:
-  LSRInstance(Loop *L, Pass *P);
+  LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
+              LoopInfo &LI, const TargetTransformInfo &TTI);
 
   bool getChanged() const { return Changed; }
 
@@ -1793,8 +1798,8 @@ public:
 
 }
 
-/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast operation.
+/// If IV is used in a int-to-float cast inside the loop then try to eliminate
+/// the cast operation.
 void LSRInstance::OptimizeShadowIV() {
   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1902,9 +1907,8 @@ void LSRInstance::OptimizeShadowIV() {
   }
 }
 
-/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
-/// set the IV user and stride information and return true, otherwise return
-/// false.
+/// If Cond has an operand that is an expression of an IV, set the IV user and
+/// stride information and return true, otherwise return false.
 bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
   for (IVStrideUse &U : IU)
     if (U.getUser() == Cond) {
@@ -1917,8 +1921,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
   return false;
 }
 
-/// OptimizeMax - Rewrite the loop's terminating condition if it uses
-/// a max computation.
+/// Rewrite the loop's terminating condition if it uses a max computation.
 ///
 /// This is a narrow solution to a specific, but acute, problem. For loops
 /// like this:
@@ -2076,8 +2079,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
   return NewCond;
 }
 
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
-/// postinc iv when possible.
+/// Change loop terminating condition to use the postinc iv when possible.
 void
 LSRInstance::OptimizeLoopTermCond() {
   SmallPtrSet<Instruction *, 4> PostIncs;
@@ -2152,16 +2154,18 @@ LSRInstance::OptimizeLoopTermCond() {
                 C->getValue().isMinSignedValue())
               goto decline_post_inc;
             // Check for possible scaled-address reuse.
-            Type *AccessTy = getAccessType(UI->getUser());
+            MemAccessTy AccessTy = getAccessType(UI->getUser());
             int64_t Scale = C->getSExtValue();
-            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
-                                          /*BaseOffset=*/ 0,
-                                          /*HasBaseReg=*/ false, Scale))
+            if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+                                          /*BaseOffset=*/0,
+                                          /*HasBaseReg=*/false, Scale,
+                                          AccessTy.AddrSpace))
               goto decline_post_inc;
             Scale = -Scale;
-            if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
-                                          /*BaseOffset=*/ 0,
-                                          /*HasBaseReg=*/ false, Scale))
+            if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
+                                          /*BaseOffset=*/0,
+                                          /*HasBaseReg=*/false, Scale,
+                                          AccessTy.AddrSpace))
               goto decline_post_inc;
           }
         }
@@ -2180,7 +2184,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ICmpInst *OldCond = Cond;
         Cond = cast<ICmpInst>(Cond->clone());
         Cond->setName(L->getHeader()->getName() + ".termcond");
-        ExitingBlock->getInstList().insert(TermBr, Cond);
+        ExitingBlock->getInstList().insert(TermBr->getIterator(), Cond);
 
         // Clone the IVUse, as the old use still exists!
         CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
@@ -2213,15 +2217,14 @@ LSRInstance::OptimizeLoopTermCond() {
   }
 }
 
-/// reconcileNewOffset - Determine if the given use can accommodate a fixup
-/// at the given offset and other details. If so, update the use and
-/// return true.
-bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
-                                LSRUse::KindType Kind, Type *AccessTy) {
+/// Determine if the given use can accommodate a fixup at the given offset and
+/// other details. If so, update the use and return true.
+bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+                                     bool HasBaseReg, LSRUse::KindType Kind,
+                                     MemAccessTy AccessTy) {
   int64_t NewMinOffset = LU.MinOffset;
   int64_t NewMaxOffset = LU.MaxOffset;
-  Type *NewAccessTy = AccessTy;
+  MemAccessTy NewAccessTy = AccessTy;
 
   // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
   // something conservative, however this can pessimize in the case that one of
@@ -2232,8 +2235,10 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
   // Check for a mismatched access type, and fall back conservatively as needed.
   // TODO: Be less conservative when the type is similar and can use the same
   // addressing modes.
-  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
-    NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+  if (Kind == LSRUse::Address) {
+    if (AccessTy != LU.AccessTy)
+      NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext());
+  }
 
   // Conservatively assume HasBaseReg is true for now.
   if (NewOffset < LU.MinOffset) {
@@ -2257,12 +2262,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
   return true;
 }
 
-/// getUse - Return an LSRUse index and an offset value for a fixup which
-/// needs the given expression, with the given kind and optional access type.
-/// Either reuse an existing use or create a new one, as needed.
-std::pair<size_t, int64_t>
-LSRInstance::getUse(const SCEV *&Expr,
-                    LSRUse::KindType Kind, Type *AccessTy) {
+/// Return an LSRUse index and an offset value for a fixup which needs the given
+/// expression, with the given kind and optional access type.  Either reuse an
+/// existing use or create a new one, as needed.
+std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
+                                               LSRUse::KindType Kind,
+                                               MemAccessTy AccessTy) {
   const SCEV *Copy = Expr;
   int64_t Offset = ExtractImmediate(Expr, SE);
 
@@ -2300,18 +2305,18 @@ LSRInstance::getUse(const SCEV *&Expr,
   return std::make_pair(LUIdx, Offset);
 }
 
-/// DeleteUse - Delete the given use from the Uses list.
+/// Delete the given use from the Uses list.
 void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
   if (&LU != &Uses.back())
     std::swap(LU, Uses.back());
   Uses.pop_back();
 
   // Update RegUses.
-  RegUses.SwapAndDropUse(LUIdx, Uses.size());
+  RegUses.swapAndDropUse(LUIdx, Uses.size());
 }
 
-/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
-/// a formula that has the same registers as the given formula.
+/// Look for a use distinct from OrigLU which is has a formula that has the same
+/// registers as the given formula.
 LSRUse *
 LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
                                        const LSRUse &OrigLU) {
@@ -2396,14 +2401,14 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
       if (const SCEVConstant *Factor =
             dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
                                                         SE, true))) {
-        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
-          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+        if (Factor->getAPInt().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getAPInt().getSExtValue());
       } else if (const SCEVConstant *Factor =
                    dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
                                                                NewStride,
                                                                SE, true))) {
-        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
-          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+        if (Factor->getAPInt().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getAPInt().getSExtValue());
       }
     }
 
@@ -2415,9 +2420,9 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
   DEBUG(print_factors_and_types(dbgs()));
 }
 
-/// findIVOperand - Helper for CollectChains that finds an IV operand (computed
-/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped
-/// Instructions to IVStrideUses, we could partially skip this.
+/// Helper for CollectChains that finds an IV operand (computed by an AddRec in
+/// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
+/// IVStrideUses, we could partially skip this.
 static User::op_iterator
 findIVOperand(User::op_iterator OI, User::op_iterator OE,
               Loop *L, ScalarEvolution &SE) {
@@ -2436,29 +2441,28 @@ findIVOperand(User::op_iterator OI, User::op_iterator OE,
   return OI;
 }
 
-/// getWideOperand - IVChain logic must consistenctly peek base TruncInst
-/// operands, so wrap it in a convenient helper.
+/// IVChain logic must consistenctly peek base TruncInst operands, so wrap it in
+/// a convenient helper.
 static Value *getWideOperand(Value *Oper) {
   if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
     return Trunc->getOperand(0);
   return Oper;
 }
 
-/// isCompatibleIVType - Return true if we allow an IV chain to include both
-/// types.
+/// Return true if we allow an IV chain to include both types.
 static bool isCompatibleIVType(Value *LVal, Value *RVal) {
   Type *LType = LVal->getType();
   Type *RType = RVal->getType();
   return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy());
 }
 
-/// getExprBase - Return an approximation of this SCEV expression's "base", or
-/// NULL for any constant. Returning the expression itself is
-/// conservative. Returning a deeper subexpression is more precise and valid as
-/// long as it isn't less complex than another subexpression. For expressions
-/// involving multiple unscaled values, we need to return the pointer-type
-/// SCEVUnknown. This avoids forming chains across objects, such as:
-/// PrevOper==a[i], IVOper==b[i], IVInc==b-a.
+/// Return an approximation of this SCEV expression's "base", or NULL for any
+/// constant. Returning the expression itself is conservative. Returning a
+/// deeper subexpression is more precise and valid as long as it isn't less
+/// complex than another subexpression. For expressions involving multiple
+/// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
+/// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
+/// IVInc==b-a.
 ///
 /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
 /// SCEVUnknown, we simply return the rightmost SCEV operand.
@@ -2601,8 +2605,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSetImpl<Instruction*> &Users,
   return cost < 0;
 }
 
-/// ChainInstruction - Add this IV user to an existing chain or make it the head
-/// of a new chain.
+/// Add this IV user to an existing chain or make it the head of a new chain.
 void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
                                    SmallVectorImpl<ChainUsers> &ChainUsersVec) {
   // When IVs are used as types of varying widths, they are generally converted
@@ -2714,7 +2717,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
   ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
 }
 
-/// CollectChains - Populate the vector of Chains.
+/// Populate the vector of Chains.
 ///
 /// This decreases ILP at the architecture level. Targets with ample registers,
 /// multiple memory ports, and no register renaming probably don't want
@@ -2755,19 +2758,19 @@ void LSRInstance::CollectChains() {
     for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end();
          I != E; ++I) {
       // Skip instructions that weren't seen by IVUsers analysis.
-      if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I))
+      if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&*I))
         continue;
 
       // Ignore users that are part of a SCEV expression. This way we only
       // consider leaf IV Users. This effectively rediscovers a portion of
       // IVUsers analysis but in program order this time.
-      if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I)))
+      if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(&*I)))
         continue;
 
       // Remove this instruction from any NearUsers set it may be in.
       for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
            ChainIdx < NChains; ++ChainIdx) {
-        ChainUsersVec[ChainIdx].NearUsers.erase(I);
+        ChainUsersVec[ChainIdx].NearUsers.erase(&*I);
       }
       // Search for operands that can be chained.
       SmallPtrSet<Instruction*, 4> UniqueOperands;
@@ -2776,7 +2779,7 @@ void LSRInstance::CollectChains() {
       while (IVOpIter != IVOpEnd) {
         Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
         if (UniqueOperands.insert(IVOpInst).second)
-          ChainInstruction(I, IVOpInst, ChainUsersVec);
+          ChainInstruction(&*I, IVOpInst, ChainUsersVec);
         IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
       }
     } // Continue walking down the instructions.
@@ -2828,20 +2831,20 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
   if (!IncConst || !isAddressUse(UserInst, Operand))
     return false;
 
-  if (IncConst->getValue()->getValue().getMinSignedBits() > 64)
+  if (IncConst->getAPInt().getMinSignedBits() > 64)
     return false;
 
+  MemAccessTy AccessTy = getAccessType(UserInst);
   int64_t IncOffset = IncConst->getValue()->getSExtValue();
-  if (!isAlwaysFoldable(TTI, LSRUse::Address,
-                        getAccessType(UserInst), /*BaseGV=*/ nullptr,
-                        IncOffset, /*HaseBaseReg=*/ false))
+  if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
+                        IncOffset, /*HaseBaseReg=*/false))
     return false;
 
   return true;
 }
 
-/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to
-/// materialize the IV user's operand from the previous IV user's operand.
+/// Generate an add or subtract for each IVInc in a chain to materialize the IV
+/// user's operand from the previous IV user's operand.
 void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
                                   SmallVectorImpl<WeakVH> &DeadInsts) {
   // Find the new IVOperand for the head of the chain. It may have been replaced
@@ -2961,7 +2964,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
     LF.PostIncLoops = U.getPostIncLoops();
 
     LSRUse::KindType Kind = LSRUse::Basic;
-    Type *AccessTy = nullptr;
+    MemAccessTy AccessTy;
     if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
       Kind = LSRUse::Address;
       AccessTy = getAccessType(LF.UserInst);
@@ -3027,9 +3030,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
   DEBUG(print_fixups(dbgs()));
 }
 
-/// InsertInitialFormula - Insert a formula for the given expression into
-/// the given use, separating out loop-variant portions from loop-invariant
-/// and loop-computable portions.
+/// Insert a formula for the given expression into the given use, separating out
+/// loop-variant portions from loop-invariant and loop-computable portions.
 void
 LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
   // Mark uses whose expressions cannot be expanded.
@@ -3037,13 +3039,13 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
     LU.RigidFormula = true;
 
   Formula F;
-  F.InitialMatch(S, L, SE);
+  F.initialMatch(S, L, SE);
   bool Inserted = InsertFormula(LU, LUIdx, F);
   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
 }
 
-/// InsertSupplementalFormula - Insert a simple single-register formula for
-/// the given expression into the given use.
+/// Insert a simple single-register formula for the given expression into the
+/// given use.
 void
 LSRInstance::InsertSupplementalFormula(const SCEV *S,
                                        LSRUse &LU, size_t LUIdx) {
@@ -3054,17 +3056,16 @@ LSRInstance::InsertSupplementalFormula(const SCEV *S,
   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
 }
 
-/// CountRegisters - Note which registers are used by the given formula,
-/// updating RegUses.
+/// Note which registers are used by the given formula, updating RegUses.
 void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
   if (F.ScaledReg)
-    RegUses.CountRegister(F.ScaledReg, LUIdx);
+    RegUses.countRegister(F.ScaledReg, LUIdx);
   for (const SCEV *BaseReg : F.BaseRegs)
-    RegUses.CountRegister(BaseReg, LUIdx);
+    RegUses.countRegister(BaseReg, LUIdx);
 }
 
-/// InsertFormula - If the given formula has not yet been inserted, add it to
-/// the list, and return true. Return false otherwise.
+/// If the given formula has not yet been inserted, add it to the list, and
+/// return true. Return false otherwise.
 bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
   // Do not insert formula that we will not be able to expand.
   assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
@@ -3076,9 +3077,9 @@ bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
   return true;
 }
 
-/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
-/// loop-invariant values which we're tracking. These other uses will pin these
-/// values in registers, making them less profitable for elimination.
+/// Check for other uses of loop-invariant values which we're tracking. These
+/// other uses will pin these values in registers, making them less profitable
+/// for elimination.
 /// TODO: This currently misses non-constant addrec step registers.
 /// TODO: Should this give more weight to users inside the loop?
 void
@@ -3124,6 +3125,9 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
             PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
         if (!DT.dominates(L->getHeader(), UseBB))
           continue;
+        // Don't bother if the instruction is in a BB which ends in an EHPad.
+        if (UseBB->getTerminator()->isEHPad())
+          continue;
         // Ignore uses which are part of other SCEV expressions, to avoid
         // analyzing them multiple times.
         if (SE.isSCEVable(UserInst->getType())) {
@@ -3148,7 +3152,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
         LSRFixup &LF = getNewFixup();
         LF.UserInst = const_cast<Instruction *>(UserInst);
         LF.OperandValToReplace = U;
-        std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
+        std::pair<size_t, int64_t> P = getUse(
+            S, LSRUse::Basic, MemAccessTy());
         LF.LUIdx = P.first;
         LF.Offset = P.second;
         LSRUse &LU = Uses[LF.LUIdx];
@@ -3165,8 +3170,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
   }
 }
 
-/// CollectSubexprs - Split S into subexpressions which can be pulled out into
-/// separate registers. If C is non-null, multiply each subexpression by C.
+/// Split S into subexpressions which can be pulled out into separate
+/// registers. If C is non-null, multiply each subexpression by C.
 ///
 /// Return remainder expression after factoring the subexpressions captured by
 /// Ops. If Ops is complete, return NULL.
@@ -3300,7 +3305,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
       F.BaseRegs.push_back(*J);
     // We may have changed the number of register in base regs, adjust the
     // formula accordingly.
-    F.Canonicalize();
+    F.canonicalize();
 
     if (InsertFormula(LU, LUIdx, F))
       // If that formula hadn't been seen before, recurse to find more like
@@ -3309,8 +3314,7 @@ void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
   }
 }
 
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
+/// Split out subexpressions from adds and the bases of addrecs.
 void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
                                          Formula Base, unsigned Depth) {
   assert(Base.isCanonical() && "Input must be in the canonical form");
@@ -3326,8 +3330,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
                                /* Idx */ -1, /* IsScaledReg */ true);
 }
 
-/// GenerateCombinations - Generate a formula consisting of all of the
-/// loop-dominating registers added into a single register.
+///  Generate a formula consisting of all of the loop-dominating registers added
+/// into a single register.
 void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
                                        Formula Base) {
   // This method is only interesting on a plurality of registers.
@@ -3336,7 +3340,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
 
   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
   // processing the formula.
-  Base.Unscale();
+  Base.unscale();
   Formula F = Base;
   F.BaseRegs.clear();
   SmallVector<const SCEV *, 4> Ops;
@@ -3354,7 +3358,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
     // rather than proceed with zero in a register.
     if (!Sum->isZero()) {
       F.BaseRegs.push_back(Sum);
-      F.Canonicalize();
+      F.canonicalize();
       (void)InsertFormula(LU, LUIdx, F);
     }
   }
@@ -3379,7 +3383,7 @@ void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
   (void)InsertFormula(LU, LUIdx, F);
 }
 
-/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+/// Generate reuse formulae using symbolic offsets.
 void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
                                           Formula Base) {
   // We can't add a symbolic offset if the address already contains one.
@@ -3410,8 +3414,8 @@ void LSRInstance::GenerateConstantOffsetsImpl(
           F.Scale = 0;
           F.ScaledReg = nullptr;
         } else
-          F.DeleteBaseReg(F.BaseRegs[Idx]);
-        F.Canonicalize();
+          F.deleteBaseReg(F.BaseRegs[Idx]);
+        F.canonicalize();
       } else if (IsScaledReg)
         F.ScaledReg = NewG;
       else
@@ -3452,8 +3456,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
                                 /* IsScaledReg */ true);
 }
 
-/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
-/// the comparison. For example, x == y -> x*c == y*c.
+/// For ICmpZero, check to see if we can scale up the comparison. For example, x
+/// == y -> x*c == y*c.
 void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
                                          Formula Base) {
   if (LU.Kind != LSRUse::ICmpZero) return;
@@ -3538,8 +3542,8 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
   }
 }
 
-/// GenerateScales - Generate stride factor reuse formulae by making use of
-/// scaled-offset address modes, for example.
+/// Generate stride factor reuse formulae by making use of scaled-offset address
+/// modes, for example.
 void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // Determine the integer type for the base formula.
   Type *IntTy = Base.getType();
@@ -3547,10 +3551,10 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
 
   // If this Formula already has a scaled register, we can't add another one.
   // Try to unscale the formula to generate a better scale.
-  if (Base.Scale != 0 && !Base.Unscale())
+  if (Base.Scale != 0 && !Base.unscale())
     return;
 
-  assert(Base.Scale == 0 && "Unscale did not did its job!");
+  assert(Base.Scale == 0 && "unscale did not did its job!");
 
   // Check each interesting stride.
   for (int64_t Factor : Factors) {
@@ -3587,7 +3591,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
           // TODO: This could be optimized to avoid all the copying.
           Formula F = Base;
           F.ScaledReg = Quotient;
-          F.DeleteBaseReg(F.BaseRegs[i]);
+          F.deleteBaseReg(F.BaseRegs[i]);
           // The canonical representation of 1*reg is reg, which is already in
           // Base. In that case, do not try to insert the formula, it will be
           // rejected anyway.
@@ -3599,7 +3603,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
   }
 }
 
-/// GenerateTruncates - Generate reuse formulae from different IV types.
+/// Generate reuse formulae from different IV types.
 void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
   // Don't bother truncating symbolic values.
   if (Base.BaseGV) return;
@@ -3629,9 +3633,9 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
 
 namespace {
 
-/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
-/// defer modifications so that the search phase doesn't have to worry about
-/// the data structures moving underneath it.
+/// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
+/// modifications so that the search phase doesn't have to worry about the data
+/// structures moving underneath it.
 struct WorkItem {
   size_t LUIdx;
   int64_t Imm;
@@ -3651,14 +3655,13 @@ void WorkItem::print(raw_ostream &OS) const {
      << " , add offset " << Imm;
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void WorkItem::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
-/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
-/// distance apart and try to form reuse opportunities between them.
+/// Look for registers which are a constant distance apart and try to form reuse
+/// opportunities between them.
 void LSRInstance::GenerateCrossUseConstantOffsets() {
   // Group the registers by their value without any added constant offset.
   typedef std::map<int64_t, const SCEV *> ImmMapTy;
@@ -3751,7 +3754,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       // very similar but slightly different. Investigate if they
       // could be merged. That way, we would not have to unscale the
       // Formula.
-      F.Unscale();
+      F.unscale();
       // Use the immediate in the scaled register.
       if (F.ScaledReg == OrigReg) {
         int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3770,14 +3773,13 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // value to the immediate would produce a value closer to zero than the
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
-          if (C->getValue()->isNegative() !=
-                (NewF.BaseOffset < 0) &&
-              (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale))
-                .ule(std::abs(NewF.BaseOffset)))
+          if (C->getValue()->isNegative() != (NewF.BaseOffset < 0) &&
+              (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
+                  .ule(std::abs(NewF.BaseOffset)))
             continue;
 
         // OK, looks good.
-        NewF.Canonicalize();
+        NewF.canonicalize();
         (void)InsertFormula(LU, LUIdx, NewF);
       } else {
         // Use the immediate in a base register.
@@ -3801,15 +3803,15 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
           // zero than the immediate itself, then the formula isn't worthwhile.
           for (const SCEV *NewReg : NewF.BaseRegs)
             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg))
-              if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt(
-                   std::abs(NewF.BaseOffset)) &&
-                  (C->getValue()->getValue() +
-                   NewF.BaseOffset).countTrailingZeros() >=
-                   countTrailingZeros<uint64_t>(NewF.BaseOffset))
+              if ((C->getAPInt() + NewF.BaseOffset)
+                      .abs()
+                      .slt(std::abs(NewF.BaseOffset)) &&
+                  (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
+                      countTrailingZeros<uint64_t>(NewF.BaseOffset))
                 goto skip_formula;
 
           // Ok, looks good.
-          NewF.Canonicalize();
+          NewF.canonicalize();
           (void)InsertFormula(LU, LUIdx, NewF);
           break;
         skip_formula:;
@@ -3819,7 +3821,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
   }
 }
 
-/// GenerateAllReuseFormulae - Generate formulae for each use.
+/// Generate formulae for each use.
 void
 LSRInstance::GenerateAllReuseFormulae() {
   // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
@@ -3959,10 +3961,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 // This is a rough guess that seems to work fairly well.
 static const size_t ComplexityLimit = UINT16_MAX;
 
-/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
-/// solutions the solver might have to consider. It almost never considers
-/// this many solutions because it prune the search space, but the pruning
-/// isn't always sufficient.
+/// Estimate the worst-case number of solutions the solver might have to
+/// consider. It almost never considers this many solutions because it prune the
+/// search space, but the pruning isn't always sufficient.
 size_t LSRInstance::EstimateSearchSpaceComplexity() const {
   size_t Power = 1;
   for (const LSRUse &LU : Uses) {
@@ -3978,10 +3979,9 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
   return Power;
 }
 
-/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
-/// of the registers of another formula, it won't help reduce register
-/// pressure (though it may not necessarily hurt register pressure); remove
-/// it to simplify the system.
+/// When one formula uses a superset of the registers of another formula, it
+/// won't help reduce register pressure (though it may not necessarily hurt
+/// register pressure); remove it to simplify the system.
 void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
     DEBUG(dbgs() << "The search space is too complex.\n");
@@ -4042,9 +4042,8 @@ void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
   }
 }
 
-/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
-/// for expressions like A, A+1, A+2, etc., allocate a single register for
-/// them.
+/// When there are many registers for expressions like A, A+1, A+2, etc.,
+/// allocate a single register for them.
 void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
     return;
@@ -4121,8 +4120,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
   DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
 }
 
-/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
-/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
 /// we've done more filtering, as it may be able to find more formulae to
 /// eliminate.
 void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
@@ -4139,9 +4137,9 @@ void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
   }
 }
 
-/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
-/// to be profitable, and then in any use which has any reference to that
-/// register, delete all formulae which do not reference that register.
+/// Pick a register which seems likely to be profitable, and then in any use
+/// which has any reference to that register, delete all formulae which do not
+/// reference that register.
 void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
   // With all other options exhausted, loop until the system is simple
   // enough to handle.
@@ -4202,10 +4200,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
   }
 }
 
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
+/// If there are an extraordinary number of formulae to choose from, use some
+/// rough heuristics to prune down the number of formulae. This keeps the main
+/// solver from taking an extraordinary amount of time in some worst-case
+/// scenarios.
 void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
   NarrowSearchSpaceByDetectingSupersets();
   NarrowSearchSpaceByCollapsingUnrolledCode();
@@ -4213,7 +4211,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
   NarrowSearchSpaceByPickingWinnerRegs();
 }
 
-/// SolveRecurse - This is the recursive solver.
+/// This is the recursive solver.
 void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
                                Cost &SolutionCost,
                                SmallVectorImpl<const Formula *> &Workspace,
@@ -4291,8 +4289,8 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
   }
 }
 
-/// Solve - Choose one formula from each use. Return the results in the given
-/// Solution vector.
+/// Choose one formula from each use. Return the results in the given Solution
+/// vector.
 void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   SmallVector<const Formula *, 8> Workspace;
   Cost SolutionCost;
@@ -4326,10 +4324,9 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
   assert(Solution.size() == Uses.size() && "Malformed solution!");
 }
 
-/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
-/// the dominator tree far as we can go while still being dominated by the
-/// input positions. This helps canonicalize the insert position, which
-/// encourages sharing.
+/// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
+/// we can go while still being dominated by the input positions. This helps
+/// canonicalize the insert position, which encourages sharing.
 BasicBlock::iterator
 LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
                                  const SmallVectorImpl<Instruction *> &Inputs)
@@ -4365,21 +4362,21 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
       // instead of at the end, so that it can be used for other expansions.
       if (IDom == Inst->getParent() &&
           (!BetterPos || !DT.dominates(Inst, BetterPos)))
-        BetterPos = std::next(BasicBlock::iterator(Inst));
+        BetterPos = &*std::next(BasicBlock::iterator(Inst));
     }
     if (!AllDominate)
       break;
     if (BetterPos)
-      IP = BetterPos;
+      IP = BetterPos->getIterator();
     else
-      IP = Tentative;
+      IP = Tentative->getIterator();
   }
 
   return IP;
 }
 
-/// AdjustInsertPositionForExpand - Determine an input position which will be
-/// dominated by the operands and which will dominate the result.
+/// Determine an input position which will be dominated by the operands and
+/// which will dominate the result.
 BasicBlock::iterator
 LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
                                            const LSRFixup &LF,
@@ -4417,7 +4414,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
     }
   }
 
-  assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP)
+  assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
          && !isa<DbgInfoIntrinsic>(LowestIP) &&
          "Insertion point must be a normal instruction");
 
@@ -4429,7 +4426,7 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
   while (isa<PHINode>(IP)) ++IP;
 
   // Ignore landingpad instructions.
-  while (isa<LandingPadInst>(IP)) ++IP;
+  while (!isa<TerminatorInst>(IP) && IP->isEHPad()) ++IP;
 
   // Ignore debug intrinsics.
   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
@@ -4437,13 +4434,14 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
   // Set IP below instructions recently inserted by SCEVExpander. This keeps the
   // IP consistent across expansions and allows the previously inserted
   // instructions to be reused by subsequent expansion.
-  while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP;
+  while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
+    ++IP;
 
   return IP;
 }
 
-/// Expand - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding").
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding").
 Value *LSRInstance::Expand(const LSRFixup &LF,
                            const Formula &F,
                            BasicBlock::iterator IP,
@@ -4487,7 +4485,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
                                  LF.UserInst, LF.OperandValToReplace,
                                  Loops, SE, DT);
 
-    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
   }
 
   // Expand the ScaledReg portion.
@@ -4505,14 +4503,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Expand ScaleReg as if it was part of the base regs.
       if (F.Scale == 1)
         Ops.push_back(
-            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
       else {
         // An interesting way of "folding" with an icmp is to use a negated
         // scale, which we'll implement by inserting it into the other operand
         // of the icmp.
         assert(F.Scale == -1 &&
                "The only scale supported by ICmpZero uses is -1!");
-        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
       }
     } else {
       // Otherwise just expand the scaled register and an explicit scale,
@@ -4522,11 +4520,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Unless the addressing mode will not be folded.
       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
           isAMCompletelyFolded(TTI, LU, F)) {
-        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
         Ops.clear();
         Ops.push_back(SE.getUnknown(FullV));
       }
-      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
       if (F.Scale != 1)
         ScaledS =
             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4538,7 +4536,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (F.BaseGV) {
     // Flush the operand list to suppress SCEVExpander hoisting.
     if (!Ops.empty()) {
-      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
       Ops.clear();
       Ops.push_back(SE.getUnknown(FullV));
     }
@@ -4548,7 +4546,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
   // unfolded offsets. LSR assumes they both live next to their uses.
   if (!Ops.empty()) {
-    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
     Ops.clear();
     Ops.push_back(SE.getUnknown(FullV));
   }
@@ -4584,7 +4582,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   const SCEV *FullS = Ops.empty() ?
                       SE.getConstant(IntTy, 0) :
                       SE.getAddExpr(Ops);
-  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
 
   // We're done expanding now, so reset the rewriter.
   Rewriter.clearPostInc();
@@ -4626,15 +4624,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   return FullV;
 }
 
-/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
-/// of their operands effectively happens in their predecessor blocks, so the
-/// expression may need to be expanded in multiple places.
+/// Helper for Rewrite. PHI nodes are special because the use of their operands
+/// effectively happens in their predecessor blocks, so the expression may need
+/// to be expanded in multiple places.
 void LSRInstance::RewriteForPHI(PHINode *PN,
                                 const LSRFixup &LF,
                                 const Formula &F,
                                 SCEVExpander &Rewriter,
-                                SmallVectorImpl<WeakVH> &DeadInsts,
-                                Pass *P) const {
+                                SmallVectorImpl<WeakVH> &DeadInsts) const {
   DenseMap<BasicBlock *, Value *> Inserted;
   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
@@ -4658,8 +4655,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
                                           .setDontDeleteUselessPHIs());
           } else {
             SmallVector<BasicBlock*, 2> NewBBs;
-            SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs,
-                                        /*AliasAnalysis*/ nullptr, &DT, &LI);
+            SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI);
             NewBB = NewBBs[0];
           }
           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
@@ -4685,7 +4681,8 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
       if (!Pair.second)
         PN->setIncomingValue(i, Pair.first->second);
       else {
-        Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+        Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
+                              Rewriter, DeadInsts);
 
         // If this is reuse-by-noop-cast, insert the noop cast.
         Type *OpTy = LF.OperandValToReplace->getType();
@@ -4702,20 +4699,20 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
     }
 }
 
-/// Rewrite - Emit instructions for the leading candidate expression for this
-/// LSRUse (this is called "expanding"), and update the UserInst to reference
-/// the newly expanded value.
+/// Emit instructions for the leading candidate expression for this LSRUse (this
+/// is called "expanding"), and update the UserInst to reference the newly
+/// expanded value.
 void LSRInstance::Rewrite(const LSRFixup &LF,
                           const Formula &F,
                           SCEVExpander &Rewriter,
-                          SmallVectorImpl<WeakVH> &DeadInsts,
-                          Pass *P) const {
+                          SmallVectorImpl<WeakVH> &DeadInsts) const {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
-    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
   } else {
-    Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+    Value *FullV =
+        Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
 
     // If this is reuse-by-noop-cast, insert the noop cast.
     Type *OpTy = LF.OperandValToReplace->getType();
@@ -4740,11 +4737,10 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
   DeadInsts.emplace_back(LF.OperandValToReplace);
 }
 
-/// ImplementSolution - Rewrite all the fixup locations with new values,
-/// following the chosen solution.
-void
-LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
-                               Pass *P) {
+/// Rewrite all the fixup locations with new values, following the chosen
+/// solution.
+void LSRInstance::ImplementSolution(
+    const SmallVectorImpl<const Formula *> &Solution) {
   // Keep track of instructions we may have made dead, so that
   // we can remove them after we are done working.
   SmallVector<WeakVH, 16> DeadInsts;
@@ -4766,7 +4762,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
 
   // Expand the new value definitions and update the users.
   for (const LSRFixup &Fixup : Fixups) {
-    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
 
     Changed = true;
   }
@@ -4782,13 +4778,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
 }
 
-LSRInstance::LSRInstance(Loop *L, Pass *P)
-    : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
-      DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
-      LI(P->getAnalysis<LoopInfoWrapperPass>().getLoopInfo()),
-      TTI(P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-          *L->getHeader()->getParent())),
-      L(L), Changed(false), IVIncInsertPos(nullptr) {
+LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
+                         DominatorTree &DT, LoopInfo &LI,
+                         const TargetTransformInfo &TTI)
+    : IU(IU), SE(SE), DT(DT), LI(LI), TTI(TTI), L(L), Changed(false),
+      IVIncInsertPos(nullptr) {
   // If LoopSimplify form is not available, stay out of trouble.
   if (!L->isLoopSimplifyForm())
     return;
@@ -4879,7 +4873,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
 #endif
 
   // Now that we've decided what we want, make it so.
-  ImplementSolution(Solution, P);
+  ImplementSolution(Solution);
 }
 
 void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
@@ -4931,11 +4925,10 @@ void LSRInstance::print(raw_ostream &OS) const {
   print_uses(OS);
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD
 void LSRInstance::dump() const {
   print(errs()); errs() << '\n';
 }
-#endif
 
 namespace {
 
@@ -4956,7 +4949,7 @@ INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
                 "Loop Strength Reduction", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(IVUsers)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
@@ -4982,8 +4975,8 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredID(LoopSimplifyID);
   AU.addRequired<DominatorTreeWrapperPass>();
   AU.addPreserved<DominatorTreeWrapperPass>();
-  AU.addRequired<ScalarEvolution>();
-  AU.addPreserved<ScalarEvolution>();
+  AU.addRequired<ScalarEvolutionWrapperPass>();
+  AU.addPreserved<ScalarEvolutionWrapperPass>();
   // Requiring LoopSimplify a second time here prevents IVUsers from running
   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
   AU.addRequiredID(LoopSimplifyID);
@@ -4996,17 +4989,24 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   if (skipOptnoneFunction(L))
     return false;
 
+  auto &IU = getAnalysis<IVUsers>();
+  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+      *L->getHeader()->getParent());
   bool Changed = false;
 
   // Run the main LSR transformation.
-  Changed |= LSRInstance(L, this).getChanged();
+  Changed |= LSRInstance(L, IU, SE, DT, LI, TTI).getChanged();
 
   // Remove any extra phis created by processing inner loops.
   Changed |= DeleteDeadPHIs(L->getHeader());
   if (EnablePhiElim && L->isLoopSimplifyForm()) {
     SmallVector<WeakVH, 16> DeadInsts;
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), DL, "lsr");
+    SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
+                          "lsr");
 #ifndef NDEBUG
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index d78db6c369b3..56ae5c010411 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -130,27 +131,29 @@ namespace {
     bool UserAllowPartial;
     bool UserRuntime;
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+    bool runOnLoop(Loop *L, LPPassManager &) override;
 
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG...
     ///
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AssumptionCacheTracker>();
+      AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addPreservedID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
-      AU.addRequired<ScalarEvolution>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
       // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
       // If loop unroll does not preserve dom info then LCSSA pass on next
       // loop will receive invalid dom info.
       // For now, recreate dom info, if loop is unrolled.
       AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
 
     // Fill in the UnrollingPreferences parameter with values from the
@@ -186,7 +189,7 @@ namespace {
     // total unrolled size.  Parameters Threshold and PartialThreshold
     // are set to the maximum unrolled size for fully and partially
     // unrolled loops respectively.
-    void selectThresholds(const Loop *L, bool HasPragma,
+    void selectThresholds(const Loop *L, bool UsePragmaThreshold,
                           const TargetTransformInfo::UnrollingPreferences &UP,
                           unsigned &Threshold, unsigned &PartialThreshold,
                           unsigned &PercentDynamicCostSavedThreshold,
@@ -207,12 +210,13 @@ namespace {
                                        : UP.DynamicCostSavingsDiscount;
 
       if (!UserThreshold &&
+          // FIXME: Use Function::optForSize().
           L->getHeader()->getParent()->hasFnAttribute(
               Attribute::OptimizeForSize)) {
         Threshold = UP.OptSizeThreshold;
         PartialThreshold = UP.PartialOptSizeThreshold;
       }
-      if (HasPragma) {
+      if (UsePragmaThreshold) {
         // If the loop has an unrolling pragma, we want to be more
         // aggressive with unrolling limits.  Set thresholds to at
         // least the PragmaTheshold value which is larger than the
@@ -235,10 +239,11 @@ char LoopUnroll::ID = 0;
 INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
@@ -278,8 +283,8 @@ class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
 public:
   UnrolledInstAnalyzer(unsigned Iteration,
                        DenseMap<Value *, Constant *> &SimplifiedValues,
-                       const Loop *L, ScalarEvolution &SE)
-      : Iteration(Iteration), SimplifiedValues(SimplifiedValues), L(L), SE(SE) {
+                       ScalarEvolution &SE)
+      : SimplifiedValues(SimplifiedValues), SE(SE) {
       IterationNumber = SE.getConstant(APInt(64, Iteration));
   }
 
@@ -295,13 +300,6 @@ private:
   /// results saved.
   DenseMap<Value *, SimplifiedAddress> SimplifiedAddresses;
 
-  /// \brief Number of currently simulated iteration.
-  ///
-  /// If an expression is ConstAddress+Constant, then the Constant is
-  /// Start + Iteration*Step, where Start and Step could be obtained from
-  /// SCEVGEPCache.
-  unsigned Iteration;
-
   /// \brief SCEV expression corresponding to number of currently simulated
   /// iteration.
   const SCEV *IterationNumber;
@@ -316,7 +314,6 @@ private:
   /// post-unrolling.
   DenseMap<Value *, Constant *> &SimplifiedValues;
 
-  const Loop *L;
   ScalarEvolution &SE;
 
   /// \brief Try to simplify instruction \param I using its SCEV expression.
@@ -368,11 +365,9 @@ private:
     return simplifyInstWithSCEV(&I);
   }
 
-  /// TODO: Add visitors for other instruction types, e.g. ZExt, SExt.
-
   /// Try to simplify binary operator I.
   ///
-  /// TODO: Probaly it's worth to hoist the code for estimating the
+  /// TODO: Probably it's worth to hoist the code for estimating the
   /// simplifications effects to a separate class, since we have a very similar
   /// code in InlineCost already.
   bool visitBinaryOperator(BinaryOperator &I) {
@@ -412,7 +407,7 @@ private:
     auto *GV = dyn_cast<GlobalVariable>(AddressIt->second.Base);
     // We're only interested in loads that can be completely folded to a
     // constant.
-    if (!GV || !GV->hasInitializer())
+    if (!GV || !GV->hasDefinitiveInitializer() || !GV->isConstant())
       return false;
 
     ConstantDataSequential *CDS =
@@ -420,6 +415,12 @@ private:
     if (!CDS)
       return false;
 
+    // We might have a vector load from an array. FIXME: for now we just bail
+    // out in this case, but we should be able to resolve and simplify such
+    // loads.
+    if(!CDS->isElementTypeCompatible(I.getType()))
+      return false;
+
     int ElemSize = CDS->getElementType()->getPrimitiveSizeInBits() / 8U;
     assert(SimplifiedAddrOp->getValue().getActiveBits() < 64 &&
            "Unexpectedly large index value.");
@@ -436,6 +437,59 @@ private:
 
     return true;
   }
+
+  bool visitCastInst(CastInst &I) {
+    // Propagate constants through casts.
+    Constant *COp = dyn_cast<Constant>(I.getOperand(0));
+    if (!COp)
+      COp = SimplifiedValues.lookup(I.getOperand(0));
+    if (COp)
+      if (Constant *C =
+              ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) {
+        SimplifiedValues[&I] = C;
+        return true;
+      }
+
+    return Base::visitCastInst(I);
+  }
+
+  bool visitCmpInst(CmpInst &I) {
+    Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+    // First try to handle simplified comparisons.
+    if (!isa<Constant>(LHS))
+      if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS))
+        LHS = SimpleLHS;
+    if (!isa<Constant>(RHS))
+      if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS))
+        RHS = SimpleRHS;
+
+    if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
+      auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
+      if (SimplifiedLHS != SimplifiedAddresses.end()) {
+        auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
+        if (SimplifiedRHS != SimplifiedAddresses.end()) {
+          SimplifiedAddress &LHSAddr = SimplifiedLHS->second;
+          SimplifiedAddress &RHSAddr = SimplifiedRHS->second;
+          if (LHSAddr.Base == RHSAddr.Base) {
+            LHS = LHSAddr.Offset;
+            RHS = RHSAddr.Offset;
+          }
+        }
+      }
+    }
+
+    if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+      if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+        if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) {
+          SimplifiedValues[&I] = C;
+          return true;
+        }
+      }
+    }
+
+    return Base::visitCmpInst(I);
+  }
 };
 } // namespace
 
@@ -443,11 +497,11 @@ private:
 namespace {
 struct EstimatedUnrollCost {
   /// \brief The estimated cost after unrolling.
-  unsigned UnrolledCost;
+  int UnrolledCost;
 
   /// \brief The estimated dynamic cost of executing the instructions in the
   /// rolled form.
-  unsigned RolledDynamicCost;
+  int RolledDynamicCost;
 };
 }
 
@@ -464,10 +518,10 @@ struct EstimatedUnrollCost {
 /// \returns Optional value, holding the RolledDynamicCost and UnrolledCost. If
 /// the analysis failed (no benefits expected from the unrolling, or the loop is
 /// too big to analyze), the returned value is None.
-Optional<EstimatedUnrollCost>
-analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
-                      const TargetTransformInfo &TTI,
-                      unsigned MaxUnrolledLoopSize) {
+static Optional<EstimatedUnrollCost>
+analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
+                      ScalarEvolution &SE, const TargetTransformInfo &TTI,
+                      int MaxUnrolledLoopSize) {
   // We want to be able to scale offsets by the trip count and add more offsets
   // to them without checking for overflows, and we already don't want to
   // analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -481,24 +535,61 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
 
   SmallSetVector<BasicBlock *, 16> BBWorklist;
   DenseMap<Value *, Constant *> SimplifiedValues;
+  SmallVector<std::pair<Value *, Constant *>, 4> SimplifiedInputValues;
 
   // The estimated cost of the unrolled form of the loop. We try to estimate
   // this by simplifying as much as we can while computing the estimate.
-  unsigned UnrolledCost = 0;
+  int UnrolledCost = 0;
   // We also track the estimated dynamic (that is, actually executed) cost in
   // the rolled form. This helps identify cases when the savings from unrolling
   // aren't just exposing dead control flows, but actual reduced dynamic
   // instructions due to the simplifications which we expect to occur after
   // unrolling.
-  unsigned RolledDynamicCost = 0;
+  int RolledDynamicCost = 0;
+
+  // Ensure that we don't violate the loop structure invariants relied on by
+  // this analysis.
+  assert(L->isLoopSimplifyForm() && "Must put loop into normal form first.");
+  assert(L->isLCSSAForm(DT) &&
+         "Must have loops in LCSSA form to track live-out values.");
+
+  DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n");
 
   // Simulate execution of each iteration of the loop counting instructions,
   // which would be simplified.
   // Since the same load will take different values on different iterations,
   // we literally have to go through all loop's iterations.
   for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) {
+    DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n");
+
+    // Prepare for the iteration by collecting any simplified entry or backedge
+    // inputs.
+    for (Instruction &I : *L->getHeader()) {
+      auto *PHI = dyn_cast<PHINode>(&I);
+      if (!PHI)
+        break;
+
+      // The loop header PHI nodes must have exactly two input: one from the
+      // loop preheader and one from the loop latch.
+      assert(
+          PHI->getNumIncomingValues() == 2 &&
+          "Must have an incoming value only for the preheader and the latch.");
+
+      Value *V = PHI->getIncomingValueForBlock(
+          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
+      Constant *C = dyn_cast<Constant>(V);
+      if (Iteration != 0 && !C)
+        C = SimplifiedValues.lookup(V);
+      if (C)
+        SimplifiedInputValues.push_back({PHI, C});
+    }
+
+    // Now clear and re-populate the map for the next iteration.
     SimplifiedValues.clear();
-    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE);
+    while (!SimplifiedInputValues.empty())
+      SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
+
+    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE);
 
     BBWorklist.clear();
     BBWorklist.insert(L->getHeader());
@@ -510,21 +601,67 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
       // it.  We don't change the actual IR, just count optimization
       // opportunities.
       for (Instruction &I : *BB) {
-        unsigned InstCost = TTI.getUserCost(&I);
+        int InstCost = TTI.getUserCost(&I);
 
         // Visit the instruction to analyze its loop cost after unrolling,
         // and if the visitor returns false, include this instruction in the
         // unrolled cost.
         if (!Analyzer.visit(I))
           UnrolledCost += InstCost;
+        else {
+          DEBUG(dbgs() << "  " << I
+                       << " would be simplified if loop is unrolled.\n");
+          (void)0;
+        }
 
         // Also track this instructions expected cost when executing the rolled
         // loop form.
         RolledDynamicCost += InstCost;
 
         // If unrolled body turns out to be too big, bail out.
-        if (UnrolledCost > MaxUnrolledLoopSize)
+        if (UnrolledCost > MaxUnrolledLoopSize) {
+          DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
+                       << "  UnrolledCost: " << UnrolledCost
+                       << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+                       << "\n");
           return None;
+        }
+      }
+
+      TerminatorInst *TI = BB->getTerminator();
+
+      // Add in the live successors by first checking whether we have terminator
+      // that may be simplified based on the values simplified by this call.
+      if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+        if (BI->isConditional()) {
+          if (Constant *SimpleCond =
+                  SimplifiedValues.lookup(BI->getCondition())) {
+            BasicBlock *Succ = nullptr;
+            // Just take the first successor if condition is undef
+            if (isa<UndefValue>(SimpleCond))
+              Succ = BI->getSuccessor(0);
+            else
+              Succ = BI->getSuccessor(
+                  cast<ConstantInt>(SimpleCond)->isZero() ? 1 : 0);
+            if (L->contains(Succ))
+              BBWorklist.insert(Succ);
+            continue;
+          }
+        }
+      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+        if (Constant *SimpleCond =
+                SimplifiedValues.lookup(SI->getCondition())) {
+          BasicBlock *Succ = nullptr;
+          // Just take the first successor if condition is undef
+          if (isa<UndefValue>(SimpleCond))
+            Succ = SI->getSuccessor(0);
+          else
+            Succ = SI->findCaseValue(cast<ConstantInt>(SimpleCond))
+                       .getCaseSuccessor();
+          if (L->contains(Succ))
+            BBWorklist.insert(Succ);
+          continue;
+        }
       }
 
       // Add BB's successors to the worklist.
@@ -535,9 +672,15 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE,
 
     // If we found no optimization opportunities on the first iteration, we
     // won't find them on later ones too.
-    if (UnrolledCost == RolledDynamicCost)
+    if (UnrolledCost == RolledDynamicCost) {
+      DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
+                   << "  UnrolledCost: " << UnrolledCost << "\n");
       return None;
+    }
   }
+  DEBUG(dbgs() << "Analysis finished:\n"
+               << "UnrolledCost: " << UnrolledCost << ", "
+               << "RolledDynamicCost: " << RolledDynamicCost << "\n");
   return {{UnrolledCost, RolledDynamicCost}};
 }
 
@@ -583,6 +726,12 @@ static bool HasUnrollFullPragma(const Loop *L) {
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
 }
 
+// Returns true if the loop has an unroll(enable) pragma. This metadata is used
+// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives.
+static bool HasUnrollEnablePragma(const Loop *L) {
+  return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable");
+}
+
 // Returns true if the loop has an unroll(disable) pragma.
 static bool HasUnrollDisablePragma(const Loop *L) {
   return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
@@ -708,7 +857,7 @@ unsigned LoopUnroll::selectUnrollCount(
   unsigned Count = UserCount ? CurrentCount : 0;
 
   // If there is no user-specified count, unroll pragmas have the next
-  // highest precendence.
+  // highest precedence.
   if (Count == 0) {
     if (PragmaCount) {
       Count = PragmaCount;
@@ -737,17 +886,19 @@ unsigned LoopUnroll::selectUnrollCount(
   return Count;
 }
 
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
   if (skipOptnoneFunction(L))
     return false;
 
   Function &F = *L->getHeader()->getParent();
 
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   const TargetTransformInfo &TTI =
       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   BasicBlock *Header = L->getHeader();
   DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
@@ -757,8 +908,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     return false;
   }
   bool PragmaFullUnroll = HasUnrollFullPragma(L);
+  bool PragmaEnableUnroll = HasUnrollEnablePragma(L);
   unsigned PragmaCount = UnrollCountPragmaValue(L);
-  bool HasPragma = PragmaFullUnroll || PragmaCount > 0;
+  bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
 
   TargetTransformInfo::UnrollingPreferences UP;
   getUnrollingPreferences(L, TTI, UP);
@@ -806,7 +958,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
   unsigned Threshold, PartialThreshold;
   unsigned PercentDynamicCostSavedThreshold;
   unsigned DynamicCostSavingsDiscount;
-  selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold,
+  // Only use the high pragma threshold when we have a target unroll factor such
+  // as with "#pragma unroll N" or a pragma indicating full unrolling and the
+  // trip count is known. Otherwise we rely on the standard threshold to
+  // heuristically select a reasonable unroll count.
+  bool UsePragmaThreshold =
+      PragmaCount > 0 ||
+      ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
+
+  selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
                    PercentDynamicCostSavedThreshold,
                    DynamicCostSavingsDiscount);
 
@@ -824,8 +984,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
       // The loop isn't that small, but we still can fully unroll it if that
       // helps to remove a significant number of instructions.
       // To check that, run additional analysis on the loop.
-      if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
-              L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount))
+      if (Optional<EstimatedUnrollCost> Cost =
+              analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
+                                    Threshold + DynamicCostSavingsDiscount))
         if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
                                 DynamicCostSavingsDiscount, Cost->UnrolledCost,
                                 Cost->RolledDynamicCost)) {
@@ -840,14 +1001,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Reduce count based on the type of unrolling and the threshold values.
   unsigned OriginalCount = Count;
-  bool AllowRuntime =
-      (PragmaCount > 0) || (UserRuntime ? CurrentRuntime : UP.Runtime);
+  bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
+                      (UserRuntime ? CurrentRuntime : UP.Runtime);
   // Don't unroll a runtime trip count loop with unroll full pragma.
   if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
     AllowRuntime = false;
   }
   if (Unrolling == Partial) {
-    bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
+    bool AllowPartial = PragmaEnableUnroll ||
+                        (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
     if (!AllowPartial && !CountSetExplicitly) {
       DEBUG(dbgs() << "  will not try to unroll partially because "
                    << "-unroll-allow-partial not given\n");
@@ -887,23 +1049,27 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
     DebugLoc LoopLoc = L->getStartLoc();
     Function *F = Header->getParent();
     LLVMContext &Ctx = F->getContext();
-    if (PragmaFullUnroll && PragmaCount == 0) {
-      if (TripCount && Count != TripCount) {
-        emitOptimizationRemarkMissed(
-            Ctx, DEBUG_TYPE, *F, LoopLoc,
-            "Unable to fully unroll loop as directed by unroll(full) pragma "
-            "because unrolled size is too large.");
-      } else if (!TripCount) {
-        emitOptimizationRemarkMissed(
-            Ctx, DEBUG_TYPE, *F, LoopLoc,
-            "Unable to fully unroll loop as directed by unroll(full) pragma "
-            "because loop has a runtime trip count.");
-      }
-    } else if (PragmaCount > 0 && Count != OriginalCount) {
+    if ((PragmaCount > 0) && Count != OriginalCount) {
       emitOptimizationRemarkMissed(
           Ctx, DEBUG_TYPE, *F, LoopLoc,
           "Unable to unroll loop the number of times directed by "
           "unroll_count pragma because unrolled size is too large.");
+    } else if (PragmaFullUnroll && !TripCount) {
+      emitOptimizationRemarkMissed(
+          Ctx, DEBUG_TYPE, *F, LoopLoc,
+          "Unable to fully unroll loop as directed by unroll(full) pragma "
+          "because loop has a runtime trip count.");
+    } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) {
+      emitOptimizationRemarkMissed(
+          Ctx, DEBUG_TYPE, *F, LoopLoc,
+          "Unable to unroll loop as directed by unroll(enable) pragma because "
+          "unrolled size is too large.");
+    } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
+               Count != TripCount) {
+      emitOptimizationRemarkMissed(
+          Ctx, DEBUG_TYPE, *F, LoopLoc,
+          "Unable to fully unroll loop as directed by unroll pragma because "
+          "unrolled size is too large.");
     }
   }
 
@@ -915,7 +1081,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // Unroll the loop.
   if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount,
-                  TripMultiple, LI, this, &LPM, &AC))
+                  TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA))
     return false;
 
   return true;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index cbc563bd8998..95d7f8a3beda 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -37,6 +38,10 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
@@ -70,6 +75,19 @@ static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
           cl::init(100), cl::Hidden);
 
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+    cl::init(false), cl::Hidden,
+    cl::desc("Enable the use of the block frequency analysis to access PGO "
+             "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+    cl::desc("Coldness threshold in percentage. The loop header frequency "
+             "(relative to the entry frequency) is compared with this "
+             "threshold to determine if non-trivial unswitching should be "
+             "enabled."));
+
 namespace {
 
   class LUAnalysisCache {
@@ -148,12 +166,19 @@ namespace {
     LPPassManager *LPM;
     AssumptionCache *AC;
 
-    // LoopProcessWorklist - Used to check if second loop needs processing
-    // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+    // Used to check if second loop needs processing after
+    // RewriteLoopBodyWithConditionConstant rewrites first loop.
     std::vector<Loop*> LoopProcessWorklist;
 
     LUAnalysisCache BranchesInfo;
 
+    bool EnabledPGO;
+
+    // BFI and ColdEntryFreq are only used when PGO and
+    // LoopUnswitchWithBlockFrequency are enabled.
+    BlockFrequencyInfo BFI;
+    BlockFrequency ColdEntryFreq;
+
     bool OptimizeForSize;
     bool redoLoop;
 
@@ -192,9 +217,11 @@ namespace {
       AU.addPreserved<LoopInfoWrapperPass>();
       AU.addRequiredID(LCSSAID);
       AU.addPreservedID(LCSSAID);
+      AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
 
   private:
@@ -210,7 +237,10 @@ namespace {
 
     /// Split all of the edges from inside the loop to their exit blocks.
     /// Update the appropriate Phi nodes as we do so.
-    void SplitExitEdges(Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+    void SplitExitEdges(Loop *L,
+                        const SmallVectorImpl<BasicBlock *> &ExitBlocks);
+
+    bool TryTrivialLoopUnswitch(bool &Changed);
 
     bool UnswitchIfProfitable(Value *LoopCond, Constant *Val,
                               TerminatorInst *TI = nullptr);
@@ -229,9 +259,6 @@ namespace {
                                         TerminatorInst *TI);
 
     void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
-    bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
-                                    BasicBlock **LoopExit = nullptr);
-
   };
 }
 
@@ -367,9 +394,8 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
   return new LoopUnswitch(Os);
 }
 
-/// FindLIVLoopCondition - Cond is a condition that occurs in L.  If it is
-/// invariant in the loop, or has an invariant piece, return the invariant.
-/// Otherwise, return null.
+/// Cond is a condition that occurs in L. If it is invariant in the loop, or has
+/// an invariant piece, return the invariant. Otherwise, return null.
 static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
 
   // We started analyze new instruction, increment scanned instructions counter.
@@ -411,11 +437,23 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
       *L->getHeader()->getParent());
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   LPM = &LPM_Ref;
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   currentLoop = L;
   Function *F = currentLoop->getHeader()->getParent();
+
+  EnabledPGO = F->getEntryCount().hasValue();
+
+  if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+    BranchProbabilityInfo BPI(*F, *LI);
+    BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+    // Use BranchProbability to compute a minimum frequency based on
+    // function entry baseline frequency. Loops with headers below this
+    // frequency are considered as cold.
+    const BranchProbability ColdProb(ColdnessThreshold, 100);
+    ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+  }
+
   bool Changed = false;
   do {
     assert(currentLoop->isLCSSAForm(*DT));
@@ -423,16 +461,13 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
     Changed |= processCurrentLoop();
   } while(redoLoop);
 
-  if (Changed) {
-    // FIXME: Reconstruct dom info, because it is not preserved properly.
-    if (DT)
-      DT->recalculate(*F);
-  }
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  if (Changed)
+    DT->recalculate(*F);
   return Changed;
 }
 
-/// processCurrentLoop - Do actual work and unswitch loop if possible
-/// and profitable.
+/// Do actual work and unswitch loop if possible and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
 
@@ -452,14 +487,48 @@ bool LoopUnswitch::processCurrentLoop() {
 
   LLVMContext &Context = loopHeader->getContext();
 
-  // Probably we reach the quota of branches for this loop. If so
-  // stop unswitching.
+  // Analyze loop cost, and stop unswitching if loop content can not be duplicated.
   if (!BranchesInfo.countLoop(
           currentLoop, getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
                            *currentLoop->getHeader()->getParent()),
           AC))
     return false;
 
+  // Try trivial unswitch first before loop over other basic blocks in the loop.
+  if (TryTrivialLoopUnswitch(Changed)) {
+    return true;
+  }
+
+  // Do not unswitch loops containing convergent operations, as we might be
+  // making them control dependent on the unswitch value when they were not
+  // before.
+  // FIXME: This could be refined to only bail if the convergent operation is
+  // not already control-dependent on the unswitch value.
+  for (const auto BB : currentLoop->blocks()) {
+    for (auto &I : *BB) {
+      auto CS = CallSite(&I);
+      if (!CS) continue;
+      if (CS.hasFnAttr(Attribute::Convergent))
+        return false;
+    }
+  }
+
+  // Do not do non-trivial unswitch while optimizing for size.
+  // FIXME: Use Function::optForSize().
+  if (OptimizeForSize ||
+      loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
+    return false;
+
+  if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+    // Compute the weighted frequency of the hottest block in the
+    // loop (loopHeader in this case since inner loops should be
+    // processed before outer loop). If it is less than ColdFrequency,
+    // we should not unswitch.
+    BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+    if (LoopEntryFreq < ColdEntryFreq)
+      return false;
+  }
+
   // Loop over all of the basic blocks in the loop.  If we find an interior
   // block that is branching on a loop-invariant condition, we can unswitch this
   // loop.
@@ -528,8 +597,8 @@ bool LoopUnswitch::processCurrentLoop() {
   return Changed;
 }
 
-/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
-/// loop with no side effects (including infinite loops).
+/// Check to see if all paths from BB exit the loop with no side effects
+/// (including infinite loops).
 ///
 /// If true, we return true and set ExitBB to the block we
 /// exit through.
@@ -566,9 +635,9 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
   return true;
 }
 
-/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
-/// leads to an exit from the specified loop, and has no side-effects in the
-/// process.  If so, return the block that is exited to, otherwise return null.
+/// Return true if the specified block unconditionally leads to an exit from
+/// the specified loop, and has no side-effects in the process. If so, return
+/// the block that is exited to, otherwise return null.
 static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
   std::set<BasicBlock*> Visited;
   Visited.insert(L->getHeader());  // Branches to header make infinite loops.
@@ -578,105 +647,11 @@ static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
   return nullptr;
 }
 
-/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
-/// trivial: that is, that the condition controls whether or not the loop does
-/// anything at all.  If this is a trivial condition, unswitching produces no
-/// code duplications (equivalently, it produces a simpler loop and a new empty
-/// loop, which gets deleted).
-///
-/// If this is a trivial condition, return true, otherwise return false.  When
-/// returning true, this sets Cond and Val to the condition that controls the
-/// trivial condition: when Cond dynamically equals Val, the loop is known to
-/// exit.  Finally, this sets LoopExit to the BB that the loop exits to when
-/// Cond == Val.
-///
-bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
-                                       BasicBlock **LoopExit) {
-  BasicBlock *Header = currentLoop->getHeader();
-  TerminatorInst *HeaderTerm = Header->getTerminator();
-  LLVMContext &Context = Header->getContext();
-
-  BasicBlock *LoopExitBB = nullptr;
-  if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
-    // If the header block doesn't end with a conditional branch on Cond, we
-    // can't handle it.
-    if (!BI->isConditional() || BI->getCondition() != Cond)
-      return false;
-
-    // Check to see if a successor of the branch is guaranteed to
-    // exit through a unique exit block without having any
-    // side-effects.  If so, determine the value of Cond that causes it to do
-    // this.
-    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
-                                             BI->getSuccessor(0)))) {
-      if (Val) *Val = ConstantInt::getTrue(Context);
-    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
-                                                    BI->getSuccessor(1)))) {
-      if (Val) *Val = ConstantInt::getFalse(Context);
-    }
-  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
-    // If this isn't a switch on Cond, we can't handle it.
-    if (SI->getCondition() != Cond) return false;
-
-    // Check to see if a successor of the switch is guaranteed to go to the
-    // latch block or exit through a one exit block without having any
-    // side-effects.  If so, determine the value of Cond that causes it to do
-    // this.
-    // Note that we can't trivially unswitch on the default case or
-    // on already unswitched cases.
-    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
-         i != e; ++i) {
-      BasicBlock *LoopExitCandidate;
-      if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
-                                               i.getCaseSuccessor()))) {
-        // Okay, we found a trivial case, remember the value that is trivial.
-        ConstantInt *CaseVal = i.getCaseValue();
-
-        // Check that it was not unswitched before, since already unswitched
-        // trivial vals are looks trivial too.
-        if (BranchesInfo.isUnswitched(SI, CaseVal))
-          continue;
-        LoopExitBB = LoopExitCandidate;
-        if (Val) *Val = CaseVal;
-        break;
-      }
-    }
-  }
-
-  // If we didn't find a single unique LoopExit block, or if the loop exit block
-  // contains phi nodes, this isn't trivial.
-  if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
-    return false;   // Can't handle this.
-
-  if (LoopExit) *LoopExit = LoopExitBB;
-
-  // We already know that nothing uses any scalar values defined inside of this
-  // loop.  As such, we just have to check to see if this loop will execute any
-  // side-effecting instructions (e.g. stores, calls, volatile loads) in the
-  // part of the loop that the code *would* execute.  We already checked the
-  // tail, check the header now.
-  for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
-    if (I->mayHaveSideEffects())
-      return false;
-  return true;
-}
-
-/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
-/// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
+/// We have found that we can unswitch currentLoop when LoopCond == Val to
+/// simplify the loop.  If we decide that this is profitable,
 /// unswitch the loop, reprocess the pieces, then return true.
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
                                         TerminatorInst *TI) {
-  Function *F = loopHeader->getParent();
-  Constant *CondVal = nullptr;
-  BasicBlock *ExitBlock = nullptr;
-
-  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
-    // If the condition is trivial, always unswitch. There is no code growth
-    // for this case.
-    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock, TI);
-    return true;
-  }
-
   // Check to see if it would be profitable to unswitch current loop.
   if (!BranchesInfo.CostAllowsUnswitching()) {
     DEBUG(dbgs() << "NOT unswitching loop %"
@@ -687,32 +662,27 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val,
     return false;
   }
 
-  // Do not do non-trivial unswitch while optimizing for size.
-  if (OptimizeForSize || F->hasFnAttribute(Attribute::OptimizeForSize))
-    return false;
-
   UnswitchNontrivialCondition(LoopCond, Val, currentLoop, TI);
   return true;
 }
 
-/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// Recursively clone the specified loop and all of its children,
 /// mapping the blocks with the specified map.
 static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
                        LoopInfo *LI, LPPassManager *LPM) {
-  Loop *New = new Loop();
-  LPM->insertLoop(New, PL);
+  Loop &New = LPM->addLoop(PL);
 
   // Add all of the blocks in L to the new loop.
   for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
        I != E; ++I)
     if (LI->getLoopFor(*I) == L)
-      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
+      New.addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), *LI);
 
   // Add all of the subloops to the new loop.
   for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    CloneLoop(*I, New, VM, LI, LPM);
+    CloneLoop(*I, &New, VM, LI, LPM);
 
-  return New;
+  return &New;
 }
 
 static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
@@ -744,15 +714,15 @@ static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
         }
       }
       // fallthrough.
+    case LLVMContext::MD_make_implicit:
     case LLVMContext::MD_dbg:
       DstInst->setMetadata(MD.first, MD.second);
     }
   }
 }
 
-/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
-/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
-/// code immediately before InsertPt.
+/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
+/// otherwise branch to FalseDest. Insert the code immediately before InsertPt.
 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
                                                   BasicBlock *TrueDest,
                                                   BasicBlock *FalseDest,
@@ -782,11 +752,11 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
   SplitCriticalEdge(BI, 1, Options);
 }
 
-/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
-/// condition in it (a cond branch from its header block to its latch block,
-/// where the path through the loop that doesn't execute its body has no
-/// side-effects), unswitch it.  This doesn't involve any code duplication, just
-/// moving the conditional branch outside of the loop and updating loop info.
+/// Given a loop that has a trivial unswitchable condition in it (a cond branch
+/// from its header block to its latch block, where the path through the loop
+/// that doesn't execute its body has no side-effects), unswitch it. This
+/// doesn't involve any code duplication, just moving the conditional branch
+/// outside of the loop and updating loop info.
 void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
                                             BasicBlock *ExitBlock,
                                             TerminatorInst *TI) {
@@ -810,7 +780,7 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
   // without actually branching to it (the exit block should be dominated by the
   // loop header, not the preheader).
   assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
-  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), DT, LI);
+  BasicBlock *NewExit = SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI);
 
   // Okay, now we have a position to branch from and a position to branch to,
   // insert the new conditional branch.
@@ -829,8 +799,155 @@ void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
   ++NumTrivial;
 }
 
-/// SplitExitEdges - Split all of the edges from inside the loop to their exit
-/// blocks.  Update the appropriate Phi nodes as we do so.
+/// Check if the first non-constant condition starting from the loop header is
+/// a trivial unswitch condition: that is, a condition controls whether or not
+/// the loop does anything at all. If it is a trivial condition, unswitching
+/// produces no code duplications (equivalently, it produces a simpler loop and
+/// a new empty loop, which gets deleted). Therefore always unswitch trivial
+/// condition.
+bool LoopUnswitch::TryTrivialLoopUnswitch(bool &Changed) {
+  BasicBlock *CurrentBB = currentLoop->getHeader();
+  TerminatorInst *CurrentTerm = CurrentBB->getTerminator();
+  LLVMContext &Context = CurrentBB->getContext();
+
+  // If loop header has only one reachable successor (currently via an
+  // unconditional branch or constant foldable conditional branch, but
+  // should also consider adding constant foldable switch instruction in
+  // future), we should keep looking for trivial condition candidates in
+  // the successor as well. An alternative is to constant fold conditions
+  // and merge successors into loop header (then we only need to check header's
+  // terminator). The reason for not doing this in LoopUnswitch pass is that
+  // it could potentially break LoopPassManager's invariants. Folding dead
+  // branches could either eliminate the current loop or make other loops
+  // unreachable. LCSSA form might also not be preserved after deleting
+  // branches. The following code keeps traversing loop header's successors
+  // until it finds the trivial condition candidate (condition that is not a
+  // constant). Since unswitching generates branches with constant conditions,
+  // this scenario could be very common in practice.
+  SmallSet<BasicBlock*, 8> Visited;
+
+  while (true) {
+    // If we exit loop or reach a previous visited block, then
+    // we can not reach any trivial condition candidates (unfoldable
+    // branch instructions or switch instructions) and no unswitch
+    // can happen. Exit and return false.
+    if (!currentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second)
+      return false;
+
+    // Check if this loop will execute any side-effecting instructions (e.g.
+    // stores, calls, volatile loads) in the part of the loop that the code
+    // *would* execute. Check the header first.
+    for (Instruction &I : *CurrentBB)
+      if (I.mayHaveSideEffects())
+        return false;
+
+    // FIXME: add check for constant foldable switch instructions.
+    if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+      if (BI->isUnconditional()) {
+        CurrentBB = BI->getSuccessor(0);
+      } else if (BI->getCondition() == ConstantInt::getTrue(Context)) {
+        CurrentBB = BI->getSuccessor(0);
+      } else if (BI->getCondition() == ConstantInt::getFalse(Context)) {
+        CurrentBB = BI->getSuccessor(1);
+      } else {
+        // Found a trivial condition candidate: non-foldable conditional branch.
+        break;
+      }
+    } else {
+      break;
+    }
+
+    CurrentTerm = CurrentBB->getTerminator();
+  }
+
+  // CondVal is the condition that controls the trivial condition.
+  // LoopExitBB is the BasicBlock that loop exits when meets trivial condition.
+  Constant *CondVal = nullptr;
+  BasicBlock *LoopExitBB = nullptr;
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) {
+    // If this isn't branching on an invariant condition, we can't unswitch it.
+    if (!BI->isConditional())
+      return false;
+
+    Value *LoopCond = FindLIVLoopCondition(BI->getCondition(),
+                                           currentLoop, Changed);
+
+    // Unswitch only if the trivial condition itself is an LIV (not
+    // partial LIV which could occur in and/or)
+    if (!LoopCond || LoopCond != BI->getCondition())
+      return false;
+
+    // Check to see if a successor of the branch is guaranteed to
+    // exit through a unique exit block without having any
+    // side-effects.  If so, determine the value of Cond that causes
+    // it to do this.
+    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+                                             BI->getSuccessor(0)))) {
+      CondVal = ConstantInt::getTrue(Context);
+    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
+                                                    BI->getSuccessor(1)))) {
+      CondVal = ConstantInt::getFalse(Context);
+    }
+
+    // If we didn't find a single unique LoopExit block, or if the loop exit
+    // block contains phi nodes, this isn't trivial.
+    if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+      return false;   // Can't handle this.
+
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+                             CurrentTerm);
+    ++NumBranches;
+    return true;
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
+    // If this isn't switching on an invariant condition, we can't unswitch it.
+    Value *LoopCond = FindLIVLoopCondition(SI->getCondition(),
+                                           currentLoop, Changed);
+
+    // Unswitch only if the trivial condition itself is an LIV (not
+    // partial LIV which could occur in and/or)
+    if (!LoopCond || LoopCond != SI->getCondition())
+      return false;
+
+    // Check to see if a successor of the switch is guaranteed to go to the
+    // latch block or exit through a one exit block without having any
+    // side-effects.  If so, determine the value of Cond that causes it to do
+    // this.
+    // Note that we can't trivially unswitch on the default case or
+    // on already unswitched cases.
+    for (SwitchInst::CaseIt i = SI->case_begin(), e = SI->case_end();
+         i != e; ++i) {
+      BasicBlock *LoopExitCandidate;
+      if ((LoopExitCandidate = isTrivialLoopExitBlock(currentLoop,
+                                               i.getCaseSuccessor()))) {
+        // Okay, we found a trivial case, remember the value that is trivial.
+        ConstantInt *CaseVal = i.getCaseValue();
+
+        // Check that it was not unswitched before, since already unswitched
+        // trivial vals are looks trivial too.
+        if (BranchesInfo.isUnswitched(SI, CaseVal))
+          continue;
+        LoopExitBB = LoopExitCandidate;
+        CondVal = CaseVal;
+        break;
+      }
+    }
+
+    // If we didn't find a single unique LoopExit block, or if the loop exit
+    // block contains phi nodes, this isn't trivial.
+    if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+      return false;   // Can't handle this.
+
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, LoopExitBB,
+                             nullptr);
+    ++NumSwitches;
+    return true;
+  }
+  return false;
+}
+
+/// Split all of the edges from inside the loop to their exit blocks.
+/// Update the appropriate Phi nodes as we do so.
 void LoopUnswitch::SplitExitEdges(Loop *L,
                                const SmallVectorImpl<BasicBlock *> &ExitBlocks){
 
@@ -841,15 +958,14 @@ void LoopUnswitch::SplitExitEdges(Loop *L,
 
     // Although SplitBlockPredecessors doesn't preserve loop-simplify in
     // general, if we call it on all predecessors of all exits then it does.
-    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa",
-                           /*AliasAnalysis*/ nullptr, DT, LI,
+    SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI,
                            /*PreserveLCSSA*/ true);
   }
 }
 
-/// UnswitchNontrivialCondition - We determined that the loop is profitable
-/// to unswitch when LIC equal Val.  Split it into loop versions and test the
-/// condition outside of either loop.  Return the loops created as Out1/Out2.
+/// We determined that the loop is profitable to unswitch when LIC equal Val.
+/// Split it into loop versions and test the condition outside of either loop.
+/// Return the loops created as Out1/Out2.
 void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
                                                Loop *L, TerminatorInst *TI) {
   Function *F = loopHeader->getParent();
@@ -858,8 +974,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
         << " blocks] in Function " << F->getName()
         << " when '" << *Val << "' == " << *LIC << "\n");
 
-  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
-    SE->forgetLoop(L);
+  if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>())
+    SEWP->getSE().forgetLoop(L);
 
   LoopBlocks.clear();
   NewBlocks.clear();
@@ -901,8 +1017,9 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
   // Splice the newly inserted blocks into the function right before the
   // original preheader.
-  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
-                                NewBlocks[0], F->end());
+  F->getBasicBlockList().splice(NewPreheader->getIterator(),
+                                F->getBasicBlockList(),
+                                NewBlocks[0]->getIterator(), F->end());
 
   // FIXME: We could register any cloned assumptions instead of clearing the
   // whole function's cache.
@@ -944,7 +1061,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
 
     if (LandingPadInst *LPad = NewExit->getLandingPadInst()) {
       PHINode *PN = PHINode::Create(LPad->getType(), 0, "",
-                                    ExitSucc->getFirstInsertionPt());
+                                    &*ExitSucc->getFirstInsertionPt());
 
       for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc);
            I != E; ++I) {
@@ -960,7 +1077,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
     for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+      RemapInstruction(&*I, VMap,
+                       RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
 
   // Rewrite the original preheader to select between versions of the loop.
   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
@@ -994,8 +1112,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
     RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
 }
 
-/// RemoveFromWorklist - Remove all instances of I from the worklist vector
-/// specified.
+/// Remove all instances of I from the worklist vector specified.
 static void RemoveFromWorklist(Instruction *I,
                                std::vector<Instruction*> &Worklist) {
 
@@ -1003,7 +1120,7 @@ static void RemoveFromWorklist(Instruction *I,
                  Worklist.end());
 }
 
-/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// When we find that I really equals V, remove I from the
 /// program, replacing all uses with V and update the worklist.
 static void ReplaceUsesOfWith(Instruction *I, Value *V,
                               std::vector<Instruction*> &Worklist,
@@ -1025,9 +1142,9 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
   ++NumSimplify;
 }
 
-// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
-// the value specified by Val in the specified loop, or we know it does NOT have
-// that value.  Rewrite any uses of LIC or of properties correlated to it.
+/// We know either that the value LIC has the value specified by Val in the
+/// specified loop, or we know it does NOT have that value.
+/// Rewrite any uses of LIC or of properties correlated to it.
 void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
                                                         Constant *Val,
                                                         bool IsEqual) {
@@ -1138,18 +1255,16 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
     // domtree here -- instead we force it to do a full recomputation
     // after the pass is complete -- but we do need to inform it of
     // new blocks.
-    if (DT)
-      DT->addNewBlock(Abort, NewSISucc);
+    DT->addNewBlock(Abort, NewSISucc);
   }
 
   SimplifyCode(Worklist, L);
 }
 
-/// SimplifyCode - Okay, now that we have simplified some instructions in the
-/// loop, walk over it and constant prop, dce, and fold control flow where
-/// possible.  Note that this is effectively a very simple loop-structure-aware
-/// optimizer.  During processing of this loop, L could very well be deleted, so
-/// it must not be used.
+/// Now that we have simplified some instructions in the loop, walk over it and
+/// constant prop, dce, and fold control flow where possible. Note that this is
+/// effectively a very simple loop-structure-aware optimizer. During processing
+/// of this loop, L could very well be deleted, so it must not be used.
 ///
 /// FIXME: When the loop optimizer is more mature, separate this out to a new
 /// pass.
@@ -1207,8 +1322,8 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         Succ->replaceAllUsesWith(Pred);
 
         // Move all of the successor contents from Succ to Pred.
-        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
-                                   Succ->end());
+        Pred->getInstList().splice(BI->getIterator(), Succ->getInstList(),
+                                   Succ->begin(), Succ->end());
         LPM->deleteSimpleAnalysisValue(BI, L);
         BI->eraseFromParent();
         RemoveFromWorklist(BI, Worklist);
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 3314e1ed41ab..41511bcb7b04 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 #define DEBUG_TYPE "loweratomic"
 
 static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
-  IRBuilder<> Builder(CXI->getParent(), CXI);
+  IRBuilder<> Builder(CXI);
   Value *Ptr = CXI->getPointerOperand();
   Value *Cmp = CXI->getCompareOperand();
   Value *Val = CXI->getNewValOperand();
@@ -41,7 +41,7 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
 }
 
 static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
-  IRBuilder<> Builder(RMWI->getParent(), RMWI);
+  IRBuilder<> Builder(RMWI);
   Value *Ptr = RMWI->getPointerOperand();
   Value *Val = RMWI->getValOperand();
 
@@ -120,7 +120,7 @@ namespace {
         return false;
       bool Changed = false;
       for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
-        Instruction *Inst = DI++;
+        Instruction *Inst = &*DI++;
         if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
           Changed |= LowerFenceInst(FI);
         else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(Inst))
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 0c47cbd5bfda..2ace902a7a1b 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -139,7 +139,7 @@ static bool lowerExpectIntrinsic(Function &F) {
         ExpectIntrinsicsHandled++;
     }
 
-    // remove llvm.expect intrinsics.
+    // Remove llvm.expect intrinsics.
     for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
       CallInst *CI = dyn_cast<CallInst>(BI++);
       if (!CI)
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 85012afc80ac..0333bf2284e1 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -30,7 +31,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
-#include <list>
+#include <algorithm>
 using namespace llvm;
 
 #define DEBUG_TYPE "memcpyopt"
@@ -71,9 +72,9 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
   return Offset;
 }
 
-/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
-/// constant offset, and return that constant offset.  For example, Ptr1 might
-/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
+/// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
+/// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
+/// might be &A[40]. In this case offset would be -8.
 static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
                             const DataLayout &DL) {
   Ptr1 = Ptr1->stripPointerCasts();
@@ -125,7 +126,7 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
 }
 
 
-/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// Represents a range of memset'd bytes with the ByteVal value.
 /// This allows us to analyze stores like:
 ///   store 0 -> P+1
 ///   store 0 -> P+0
@@ -164,8 +165,8 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
 
   // If any of the stores are a memset, then it is always good to extend the
   // memset.
-  for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
-    if (!isa<StoreInst>(TheStores[i]))
+  for (Instruction *SI : TheStores)
+    if (!isa<StoreInst>(SI))
       return true;
 
   // Assume that the code generator is capable of merging pairs of stores
@@ -189,7 +190,7 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
   unsigned NumPointerStores = Bytes / MaxIntSize;
 
   // Assume the remaining bytes if any are done a byte at a time.
-  unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
+  unsigned NumByteStores = Bytes % MaxIntSize;
 
   // If we will reduce the # stores (according to this heuristic), do the
   // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
@@ -200,15 +201,14 @@ bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
 
 namespace {
 class MemsetRanges {
-  /// Ranges - A sorted list of the memset ranges.  We use std::list here
-  /// because each element is relatively large and expensive to copy.
-  std::list<MemsetRange> Ranges;
-  typedef std::list<MemsetRange>::iterator range_iterator;
+  /// A sorted list of the memset ranges.
+  SmallVector<MemsetRange, 8> Ranges;
+  typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
   const DataLayout &DL;
 public:
   MemsetRanges(const DataLayout &DL) : DL(DL) {}
 
-  typedef std::list<MemsetRange>::const_iterator const_iterator;
+  typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
   const_iterator begin() const { return Ranges.begin(); }
   const_iterator end() const { return Ranges.end(); }
   bool empty() const { return Ranges.empty(); }
@@ -240,26 +240,20 @@ public:
 } // end anon namespace
 
 
-/// addRange - Add a new store to the MemsetRanges data structure.  This adds a
+/// Add a new store to the MemsetRanges data structure.  This adds a
 /// new range for the specified store at the specified offset, merging into
 /// existing ranges as appropriate.
-///
-/// Do a linear search of the ranges to see if this can be joined and/or to
-/// find the insertion point in the list.  We keep the ranges sorted for
-/// simplicity here.  This is a linear search of a linked list, which is ugly,
-/// however the number of ranges is limited, so this won't get crazy slow.
 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
                             unsigned Alignment, Instruction *Inst) {
   int64_t End = Start+Size;
-  range_iterator I = Ranges.begin(), E = Ranges.end();
 
-  while (I != E && Start > I->End)
-    ++I;
+  range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
+    [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
 
   // We now know that I == E, in which case we didn't find anything to merge
   // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
   // to insert a new range.  Handle this now.
-  if (I == E || End < I->Start) {
+  if (I == Ranges.end() || End < I->Start) {
     MemsetRange &R = *Ranges.insert(I, MemsetRange());
     R.Start        = Start;
     R.End          = End;
@@ -295,7 +289,7 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
   if (End > I->End) {
     I->End = End;
     range_iterator NextI = I;
-    while (++NextI != E && End >= NextI->Start) {
+    while (++NextI != Ranges.end() && End >= NextI->Start) {
       // Merge the range in.
       I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
       if (NextI->End > I->End)
@@ -331,9 +325,9 @@ namespace {
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<MemoryDependenceAnalysis>();
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
-      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
       AU.addPreserved<MemoryDependenceAnalysis>();
     }
 
@@ -357,7 +351,7 @@ namespace {
   char MemCpyOpt::ID = 0;
 }
 
-// createMemCpyOptPass - The public interface to this file...
+/// The public interface to this file...
 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
 
 INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
@@ -366,14 +360,15 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
                     false, false)
 
-/// tryMergingIntoMemset - When scanning forward over instructions, we look for
-/// some other patterns to fold away.  In particular, this looks for stores to
-/// neighboring locations of memory.  If it sees enough consecutive ones, it
-/// attempts to merge them together into a memcpy/memset.
+/// When scanning forward over instructions, we look for some other patterns to
+/// fold away. In particular, this looks for stores to neighboring locations of
+/// memory. If it sees enough consecutive ones, it attempts to merge them
+/// together into a memcpy/memset.
 Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
                                              Value *StartPtr, Value *ByteVal) {
   const DataLayout &DL = StartInst->getModule()->getDataLayout();
@@ -384,7 +379,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
   // are stored.
   MemsetRanges Ranges(DL);
 
-  BasicBlock::iterator BI = StartInst;
+  BasicBlock::iterator BI(StartInst);
   for (++BI; !isa<TerminatorInst>(BI); ++BI) {
     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
       // If the instruction is readnone, ignore it, otherwise bail out.  We
@@ -439,14 +434,12 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
   // If we create any memsets, we put it right before the first instruction that
   // isn't part of the memset block.  This ensure that the memset is dominated
   // by any addressing instruction needed by the start of the block.
-  IRBuilder<> Builder(BI);
+  IRBuilder<> Builder(&*BI);
 
   // Now that we have full information about ranges, loop over the ranges and
   // emit memset's for anything big enough to be worthwhile.
   Instruction *AMemSet = nullptr;
-  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
-       I != E; ++I) {
-    const MemsetRange &Range = *I;
+  for (const MemsetRange &Range : Ranges) {
 
     if (Range.TheStores.size() == 1) continue;
 
@@ -470,19 +463,17 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
       Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
 
     DEBUG(dbgs() << "Replace stores:\n";
-          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
-            dbgs() << *Range.TheStores[i] << '\n';
+          for (Instruction *SI : Range.TheStores)
+            dbgs() << *SI << '\n';
           dbgs() << "With: " << *AMemSet << '\n');
 
     if (!Range.TheStores.empty())
       AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
 
     // Zap all the stores.
-    for (SmallVectorImpl<Instruction *>::const_iterator
-         SI = Range.TheStores.begin(),
-         SE = Range.TheStores.end(); SI != SE; ++SI) {
-      MD->removeInstruction(*SI);
-      (*SI)->eraseFromParent();
+    for (Instruction *SI : Range.TheStores) {
+      MD->removeInstruction(SI);
+      SI->eraseFromParent();
     }
     ++NumMemSetInfer;
   }
@@ -493,6 +484,16 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
 
 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (!SI->isSimple()) return false;
+
+  // Avoid merging nontemporal stores since the resulting
+  // memcpy/memset would not be able to preserve the nontemporal hint.
+  // In theory we could teach how to propagate the !nontemporal metadata to
+  // memset calls. However, that change would force the backend to
+  // conservatively expand !nontemporal memset calls back to sequences of
+  // store instructions (effectively undoing the merging).
+  if (SI->getMetadata(LLVMContext::MD_nontemporal))
+    return false;
+
   const DataLayout &DL = SI->getModule()->getDataLayout();
 
   // Detect cases where we're performing call slot forwarding, but
@@ -509,11 +510,11 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       if (C) {
         // Check that nothing touches the dest of the "copy" between
         // the call and the store.
-        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+        AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
         MemoryLocation StoreLoc = MemoryLocation::get(SI);
-        for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
-                                  E = C; I != E; --I) {
-          if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
+        for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
+             I != E; --I) {
+          if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
             C = nullptr;
             break;
           }
@@ -554,7 +555,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
     if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
                                               ByteVal)) {
-      BBI = I;  // Don't invalidate iterator.
+      BBI = I->getIterator(); // Don't invalidate iterator.
       return true;
     }
 
@@ -567,14 +568,14 @@ bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
   if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
     if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
                                               MSI->getValue())) {
-      BBI = I;  // Don't invalidate iterator.
+      BBI = I->getIterator(); // Don't invalidate iterator.
       return true;
     }
   return false;
 }
 
 
-/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// Takes a memcpy and a call that it depends on,
 /// and checks for the possibility of a call slot optimization by having
 /// the call write its result directly into the destination of the memcpy.
 bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
@@ -710,12 +711,12 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   // unexpected manner, for example via a global, which we deduce from
   // the use analysis, we also need to know that it does not sneakily
   // access dest.  We rely on AA to figure this out for us.
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-  AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
   // If necessary, perform additional analysis.
-  if (MR != AliasAnalysis::NoModRef)
+  if (MR != MRI_NoModRef)
     MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
-  if (MR != AliasAnalysis::NoModRef)
+  if (MR != MRI_NoModRef)
     return false;
 
   // All the checks have passed, so do the transformation.
@@ -749,11 +750,9 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   // Update AA metadata
   // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
   // handled here, but combineMetadata doesn't support them yet
-  unsigned KnownIDs[] = {
-    LLVMContext::MD_tbaa,
-    LLVMContext::MD_alias_scope,
-    LLVMContext::MD_noalias,
-  };
+  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+                         LLVMContext::MD_noalias,
+                         LLVMContext::MD_invariant_group};
   combineMetadata(C, cpy, KnownIDs);
 
   // Remove the memcpy.
@@ -763,10 +762,8 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
   return true;
 }
 
-/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
-/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
-/// copy from MDep's input if we can.
-///
+/// We've found that the (upward scanning) memory dependence of memcpy 'M' is
+/// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
 bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
   // We can only transforms memcpy's where the dest of one is the source of the
   // other.
@@ -788,7 +785,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
   if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
     return false;
 
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   // Verify that the copied-from memory doesn't change in between the two
   // transfers.  For example, in:
@@ -802,8 +799,9 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
   //
   // NOTE: This is conservative, it will stop on any read from the source loc,
   // not just the defining memcpy.
-  MemDepResult SourceDep = MD->getPointerDependencyFrom(
-      MemoryLocation::getForSource(MDep), false, M, M->getParent());
+  MemDepResult SourceDep =
+      MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
+                                   M->getIterator(), M->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
@@ -860,8 +858,9 @@ bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
     return false;
 
   // Check that there are no other dependencies on the memset destination.
-  MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
-      MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
+  MemDepResult DstDepInfo =
+      MD->getPointerDependencyFrom(MemoryLocation::getForDest(MemSet), false,
+                                   MemCpy->getIterator(), MemCpy->getParent());
   if (DstDepInfo.getInst() != MemSet)
     return false;
 
@@ -936,7 +935,7 @@ bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
   return true;
 }
 
-/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
+/// Perform simplification of memcpy's.  If we have memcpy A
 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
 /// circumstances). This allows later passes to remove the first memcpy
@@ -998,8 +997,8 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   }
 
   MemoryLocation SrcLoc = MemoryLocation::getForSource(M);
-  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
-                                                         M, M->getParent());
+  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
+      SrcLoc, true, M->getIterator(), M->getParent());
 
   if (SrcDepInfo.isClobber()) {
     if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
@@ -1037,10 +1036,10 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
   return false;
 }
 
-/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
-/// are guaranteed not to alias.
+/// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
+/// not to alias.
 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
-  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   if (!TLI->has(LibFunc::memmove))
     return false;
@@ -1053,12 +1052,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
 
   // If not, then we know we can transform this.
-  Module *Mod = M->getParent()->getParent()->getParent();
   Type *ArgTys[3] = { M->getRawDest()->getType(),
                       M->getRawSource()->getType(),
                       M->getLength()->getType() };
-  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
-                                                 ArgTys));
+  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
+                                                 Intrinsic::memcpy, ArgTys));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
@@ -1068,7 +1066,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   return true;
 }
 
-/// processByValArgument - This is called on every byval argument in call sites.
+/// This is called on every byval argument in call sites.
 bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
   const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
   // Find out what feeds this byval argument.
@@ -1076,8 +1074,8 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo = MD->getPointerDependencyFrom(
-      MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
-      CS.getInstruction()->getParent());
+      MemoryLocation(ByValArg, ByValSize), true,
+      CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
   if (!DepInfo.isClobber())
     return false;
 
@@ -1119,9 +1117,9 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
   //
   // NOTE: This is conservative, it will stop on any read from the source loc,
   // not just the defining memcpy.
-  MemDepResult SourceDep =
-      MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
-                                   CS.getInstruction(), MDep->getParent());
+  MemDepResult SourceDep = MD->getPointerDependencyFrom(
+      MemoryLocation::getForSource(MDep), false,
+      CS.getInstruction()->getIterator(), MDep->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
@@ -1140,7 +1138,7 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
   return true;
 }
 
-/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+/// Executes one iteration of MemCpyOpt.
 bool MemCpyOpt::iterateOnFunction(Function &F) {
   bool MadeChange = false;
 
@@ -1148,7 +1146,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
   for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
     for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
       // Avoid invalidating the iterator.
-      Instruction *I = BI++;
+      Instruction *I = &*BI++;
 
       bool RepeatInstruction = false;
 
@@ -1177,9 +1175,7 @@ bool MemCpyOpt::iterateOnFunction(Function &F) {
   return MadeChange;
 }
 
-// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
-// function.
-//
+/// This is the main transformation entry point for a function.
 bool MemCpyOpt::runOnFunction(Function &F) {
   if (skipOptnoneFunction(F))
     return false;
diff --git a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 643f3740eedd..c812d618c16a 100644
--- a/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
@@ -91,6 +92,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "mldst-motion"
@@ -106,7 +108,7 @@ class MergedLoadStoreMotion : public FunctionPass {
 
 public:
   static char ID; // Pass identification, replacement for typeid
-  explicit MergedLoadStoreMotion(void)
+  MergedLoadStoreMotion()
       : FunctionPass(ID), MD(nullptr), MagicCompileTimeControl(250) {
     initializeMergedLoadStoreMotionPass(*PassRegistry::getPassRegistry());
   }
@@ -116,10 +118,11 @@ public:
 private:
   // This transformation requires dominator postdominator info
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<MemoryDependenceAnalysis>();
-    AU.addPreserved<AliasAnalysis>();
   }
 
   // Helper routines
@@ -156,7 +159,7 @@ private:
 };
 
 char MergedLoadStoreMotion::ID = 0;
-}
+} // anonymous namespace
 
 ///
 /// \brief createMergedLoadStoreMotionPass - The public interface to this file.
@@ -169,7 +172,8 @@ INITIALIZE_PASS_BEGIN(MergedLoadStoreMotion, "mldst-motion",
                       "MergedLoadStoreMotion", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_END(MergedLoadStoreMotion, "mldst-motion",
                     "MergedLoadStoreMotion", false, false)
 
@@ -236,12 +240,11 @@ bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) {
 /// being loaded or protect against the load from happening
 /// it is considered a hoist barrier.
 ///
-
 bool MergedLoadStoreMotion::isLoadHoistBarrierInRange(const Instruction& Start, 
                                                       const Instruction& End,
                                                       LoadInst* LI) {
   MemoryLocation Loc = MemoryLocation::get(LI);
-  return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::Mod);
+  return AA->canInstructionRangeModRef(Start, End, Loc, MRI_Mod);
 }
 
 ///
@@ -256,7 +259,7 @@ LoadInst *MergedLoadStoreMotion::canHoistFromBlock(BasicBlock *BB1,
 
   for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end(); BBI != BBE;
        ++BBI) {
-    Instruction *Inst = BBI;
+    Instruction *Inst = &*BBI;
 
     // Only merge and hoist loads when their result in used only in BB
     if (!isa<LoadInst>(Inst) || Inst->isUsedOutsideOfBlock(BB1))
@@ -293,7 +296,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
 
   // Intersect optional metadata.
   HoistCand->intersectOptionalDataWith(ElseInst);
-  HoistCand->dropUnknownMetadata();
+  HoistCand->dropUnknownNonDebugMetadata();
 
   // Prepend point for instruction insert
   Instruction *HoistPt = BB->getTerminator();
@@ -363,8 +366,7 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
   int NLoads = 0;
   for (BasicBlock::iterator BBI = Succ0->begin(), BBE = Succ0->end();
        BBI != BBE;) {
-
-    Instruction *I = BBI;
+    Instruction *I = &*BBI;
     ++BBI;
 
     // Only move non-simple (atomic, volatile) loads.
@@ -394,11 +396,10 @@ bool MergedLoadStoreMotion::mergeLoads(BasicBlock *BB) {
 /// value being stored or protect against the store from
 /// happening it is considered a sink barrier.
 ///
-
 bool MergedLoadStoreMotion::isStoreSinkBarrierInRange(const Instruction &Start,
                                                       const Instruction &End,
                                                       MemoryLocation Loc) {
-  return AA->canInstructionRangeModRef(Start, End, Loc, AliasAnalysis::ModRef);
+  return AA->canInstructionRangeModRef(Start, End, Loc, MRI_ModRef);
 }
 
 ///
@@ -438,23 +439,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
 PHINode *MergedLoadStoreMotion::getPHIOperand(BasicBlock *BB, StoreInst *S0,
                                               StoreInst *S1) {
   // Create a phi if the values mismatch.
-  PHINode *NewPN = 0;
+  PHINode *NewPN = nullptr;
   Value *Opd1 = S0->getValueOperand();
   Value *Opd2 = S1->getValueOperand();
   if (Opd1 != Opd2) {
     NewPN = PHINode::Create(Opd1->getType(), 2, Opd2->getName() + ".sink",
-                            BB->begin());
+                            &BB->front());
     NewPN->addIncoming(Opd1, S0->getParent());
     NewPN->addIncoming(Opd2, S1->getParent());
-    if (NewPN->getType()->getScalarType()->isPointerTy()) {
-      // AA needs to be informed when a PHI-use of the pointer value is added
-      for (unsigned I = 0, E = NewPN->getNumIncomingValues(); I != E; ++I) {
-        unsigned J = PHINode::getOperandNumForIncomingValue(I);
-        AA->addEscapingUse(NewPN->getOperandUse(J));
-      }
-      if (MD)
-        MD->invalidateCachedPointerInfo(NewPN);
-    }
+    if (MD && NewPN->getType()->getScalarType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(NewPN);
   }
   return NewPN;
 }
@@ -479,12 +473,12 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
     BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
     // Intersect optional metadata.
     S0->intersectOptionalDataWith(S1);
-    S0->dropUnknownMetadata();
+    S0->dropUnknownNonDebugMetadata();
 
     // Create the new store to be inserted at the join point.
     StoreInst *SNew = (StoreInst *)(S0->clone());
     Instruction *ANew = A0->clone();
-    SNew->insertBefore(InsertPt);
+    SNew->insertBefore(&*InsertPt);
     ANew->insertBefore(SNew);
 
     assert(S0->getParent() == A0->getParent());
@@ -566,12 +560,13 @@ bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) {
   }
   return MergedStores;
 }
+
 ///
 /// \brief Run the transformation for each function
 ///
 bool MergedLoadStoreMotion::runOnFunction(Function &F) {
   MD = getAnalysisIfAvailable<MemoryDependenceAnalysis>();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   bool Changed = false;
   DEBUG(dbgs() << "Instruction Merger\n");
@@ -579,7 +574,7 @@ bool MergedLoadStoreMotion::runOnFunction(Function &F) {
   // Merge unconditional branches, allowing PRE to catch more
   // optimization opportunities.
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) {
-    BasicBlock *BB = FI++;
+    BasicBlock *BB = &*FI++;
 
     // Hoist equivalent loads and sink stores
     // outside diamonds when possible
diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp
index f42f8306fccc..c8f885e7eec5 100644
--- a/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -71,8 +71,8 @@
 //
 // Limitations and TODO items:
 //
-// 1) We only considers n-ary adds for now. This should be extended and
-// generalized.
+// 1) We only considers n-ary adds and muls for now. This should be extended
+// and generalized.
 //
 //===----------------------------------------------------------------------===//
 
@@ -110,11 +110,11 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addPreserved<DominatorTreeWrapperPass>();
-    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
     AU.addPreserved<TargetLibraryInfoWrapperPass>();
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.setPreservesCFG();
@@ -145,12 +145,23 @@ private:
                                               unsigned I, Value *LHS,
                                               Value *RHS, Type *IndexedType);
 
-  // Reassociate Add for better CSE.
-  Instruction *tryReassociateAdd(BinaryOperator *I);
-  // A helper function for tryReassociateAdd. LHS and RHS are explicitly passed.
-  Instruction *tryReassociateAdd(Value *LHS, Value *RHS, Instruction *I);
-  // Rewrites I to LHS + RHS if LHS is computed already.
-  Instruction *tryReassociatedAdd(const SCEV *LHS, Value *RHS, Instruction *I);
+  // Reassociate binary operators for better CSE.
+  Instruction *tryReassociateBinaryOp(BinaryOperator *I);
+
+  // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
+  // passed.
+  Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
+                                      BinaryOperator *I);
+  // Rewrites I to (LHS op RHS) if LHS is computed already.
+  Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
+                                       BinaryOperator *I);
+
+  // Tries to match Op1 and Op2 by using V.
+  bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
+
+  // Gets SCEV for (LHS op RHS).
+  const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+                            const SCEV *RHS);
 
   // Returns the closest dominator of \c Dominatee that computes
   // \c CandidateExpr. Returns null if not found.
@@ -161,11 +172,6 @@ private:
   // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
   // to be an index of GEP.
   bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
-  // Returns whether V is known to be non-negative at context \c Ctxt.
-  bool isKnownNonNegative(Value *V, Instruction *Ctxt);
-  // Returns whether AO may sign overflow at context \c Ctxt. It computes a
-  // conservative result -- it answers true when not sure.
-  bool maySignOverflow(AddOperator *AO, Instruction *Ctxt);
 
   AssumptionCache *AC;
   const DataLayout *DL;
@@ -182,7 +188,7 @@ private:
   //     foo(a + b);
   //   if (p2)
   //     bar(a + b);
-  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> SeenExprs;
+  DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
 };
 } // anonymous namespace
 
@@ -191,7 +197,7 @@ INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
@@ -207,7 +213,7 @@ bool NaryReassociate::runOnFunction(Function &F) {
 
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
@@ -224,6 +230,7 @@ static bool isPotentiallyNaryReassociable(Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::GetElementPtr:
+  case Instruction::Mul:
     return true;
   default:
     return false;
@@ -239,19 +246,21 @@ bool NaryReassociate::doOneIteration(Function &F) {
        Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
     BasicBlock *BB = Node->getBlock();
     for (auto I = BB->begin(); I != BB->end(); ++I) {
-      if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(I)) {
-        const SCEV *OldSCEV = SE->getSCEV(I);
-        if (Instruction *NewI = tryReassociate(I)) {
+      if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
+        const SCEV *OldSCEV = SE->getSCEV(&*I);
+        if (Instruction *NewI = tryReassociate(&*I)) {
           Changed = true;
-          SE->forgetValue(I);
+          SE->forgetValue(&*I);
           I->replaceAllUsesWith(NewI);
-          RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
-          I = NewI;
+          // If SeenExprs constains I's WeakVH, that entry will be replaced with
+          // nullptr.
+          RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);
+          I = NewI->getIterator();
         }
         // Add the rewritten instruction to SeenExprs; the original instruction
         // is deleted.
-        const SCEV *NewSCEV = SE->getSCEV(I);
-        SeenExprs[NewSCEV].push_back(I);
+        const SCEV *NewSCEV = SE->getSCEV(&*I);
+        SeenExprs[NewSCEV].push_back(WeakVH(&*I));
         // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)
         // is equivalent to I. However, ScalarEvolution::getSCEV may
         // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose
@@ -271,7 +280,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
         //
         // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.
         if (NewSCEV != OldSCEV)
-          SeenExprs[OldSCEV].push_back(I);
+          SeenExprs[OldSCEV].push_back(WeakVH(&*I));
       }
     }
   }
@@ -281,7 +290,8 @@ bool NaryReassociate::doOneIteration(Function &F) {
 Instruction *NaryReassociate::tryReassociate(Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
-    return tryReassociateAdd(cast<BinaryOperator>(I));
+  case Instruction::Mul:
+    return tryReassociateBinaryOp(cast<BinaryOperator>(I));
   case Instruction::GetElementPtr:
     return tryReassociateGEP(cast<GetElementPtrInst>(I));
   default:
@@ -352,27 +362,6 @@ bool NaryReassociate::requiresSignExtension(Value *Index,
   return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
 }
 
-bool NaryReassociate::isKnownNonNegative(Value *V, Instruction *Ctxt) {
-  bool NonNegative, Negative;
-  // TODO: ComputeSignBits is expensive. Consider caching the results.
-  ComputeSignBit(V, NonNegative, Negative, *DL, 0, AC, Ctxt, DT);
-  return NonNegative;
-}
-
-bool NaryReassociate::maySignOverflow(AddOperator *AO, Instruction *Ctxt) {
-  if (AO->hasNoSignedWrap())
-    return false;
-
-  Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
-  // If LHS or RHS has the same sign as the sum, AO doesn't sign overflow.
-  // TODO: handle the negative case as well.
-  if (isKnownNonNegative(AO, Ctxt) &&
-      (isKnownNonNegative(LHS, Ctxt) || isKnownNonNegative(RHS, Ctxt)))
-    return false;
-
-  return true;
-}
-
 GetElementPtrInst *
 NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
                                           Type *IndexedType) {
@@ -381,7 +370,7 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
     IndexToSplit = SExt->getOperand(0);
   } else if (ZExtInst *ZExt = dyn_cast<ZExtInst>(IndexToSplit)) {
     // zext can be treated as sext if the source is non-negative.
-    if (isKnownNonNegative(ZExt->getOperand(0), GEP))
+    if (isKnownNonNegative(ZExt->getOperand(0), *DL, 0, AC, GEP, DT))
       IndexToSplit = ZExt->getOperand(0);
   }
 
@@ -389,8 +378,11 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
     // If the I-th index needs sext and the underlying add is not equipped with
     // nsw, we cannot split the add because
     //   sext(LHS + RHS) != sext(LHS) + sext(RHS).
-    if (requiresSignExtension(IndexToSplit, GEP) && maySignOverflow(AO, GEP))
+    if (requiresSignExtension(IndexToSplit, GEP) &&
+        computeOverflowForSignedAdd(AO, *DL, AC, GEP, DT) !=
+            OverflowResult::NeverOverflows)
       return nullptr;
+
     Value *LHS = AO->getOperand(0), *RHS = AO->getOperand(1);
     // IndexToSplit = LHS + RHS.
     if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I, LHS, RHS, IndexedType))
@@ -415,7 +407,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
     IndexExprs.push_back(SE->getSCEV(*Index));
   // Replace the I-th index with LHS.
   IndexExprs[I] = SE->getSCEV(LHS);
-  if (isKnownNonNegative(LHS, GEP) &&
+  if (isKnownNonNegative(LHS, *DL, 0, AC, GEP, DT) &&
       DL->getTypeSizeInBits(LHS->getType()) <
           DL->getTypeSizeInBits(GEP->getOperand(I)->getType())) {
     // Zero-extend LHS if it is non-negative. InstCombine canonicalizes sext to
@@ -429,19 +421,20 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
       GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
       IndexExprs, GEP->isInBounds());
 
-  auto *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
+  Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
   if (Candidate == nullptr)
     return nullptr;
 
-  PointerType *TypeOfCandidate = dyn_cast<PointerType>(Candidate->getType());
-  // Pretty rare but theoretically possible when a numeric value happens to
-  // share CandidateExpr.
-  if (TypeOfCandidate == nullptr)
-    return nullptr;
+  IRBuilder<> Builder(GEP);
+  // Candidate does not necessarily have the same pointer type as GEP. Use
+  // bitcast or pointer cast to make sure they have the same type, so that the
+  // later RAUW doesn't complain.
+  Candidate = Builder.CreateBitOrPointerCast(Candidate, GEP->getType());
+  assert(Candidate->getType() == GEP->getType());
 
   // NewGEP = (char *)Candidate + RHS * sizeof(IndexedType)
   uint64_t IndexedSize = DL->getTypeAllocSize(IndexedType);
-  Type *ElementType = TypeOfCandidate->getElementType();
+  Type *ElementType = GEP->getType()->getElementType();
   uint64_t ElementSize = DL->getTypeAllocSize(ElementType);
   // Another less rare case: because I is not necessarily the last index of the
   // GEP, the size of the type at the I-th index (IndexedSize) is not
@@ -461,8 +454,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
     return nullptr;
 
   // NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
-  IRBuilder<> Builder(GEP);
-  Type *IntPtrTy = DL->getIntPtrType(TypeOfCandidate);
+  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
   if (RHS->getType() != IntPtrTy)
     RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
   if (IndexedSize != ElementSize) {
@@ -476,54 +468,89 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
   return NewGEP;
 }
 
-Instruction *NaryReassociate::tryReassociateAdd(BinaryOperator *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
-  if (auto *NewI = tryReassociateAdd(LHS, RHS, I))
+  if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
     return NewI;
-  if (auto *NewI = tryReassociateAdd(RHS, LHS, I))
+  if (auto *NewI = tryReassociateBinaryOp(RHS, LHS, I))
     return NewI;
   return nullptr;
 }
 
-Instruction *NaryReassociate::tryReassociateAdd(Value *LHS, Value *RHS,
-                                                Instruction *I) {
+Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
+                                                     BinaryOperator *I) {
   Value *A = nullptr, *B = nullptr;
-  // To be conservative, we reassociate I only when it is the only user of A+B.
-  if (LHS->hasOneUse() && match(LHS, m_Add(m_Value(A), m_Value(B)))) {
-    // I = (A + B) + RHS
-    //   = (A + RHS) + B or (B + RHS) + A
+  // To be conservative, we reassociate I only when it is the only user of (A op
+  // B).
+  if (LHS->hasOneUse() && matchTernaryOp(I, LHS, A, B)) {
+    // I = (A op B) op RHS
+    //   = (A op RHS) op B or (B op RHS) op A
     const SCEV *AExpr = SE->getSCEV(A), *BExpr = SE->getSCEV(B);
     const SCEV *RHSExpr = SE->getSCEV(RHS);
     if (BExpr != RHSExpr) {
-      if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(AExpr, RHSExpr), B, I))
+      if (auto *NewI =
+              tryReassociatedBinaryOp(getBinarySCEV(I, AExpr, RHSExpr), B, I))
         return NewI;
     }
     if (AExpr != RHSExpr) {
-      if (auto *NewI = tryReassociatedAdd(SE->getAddExpr(BExpr, RHSExpr), A, I))
+      if (auto *NewI =
+              tryReassociatedBinaryOp(getBinarySCEV(I, BExpr, RHSExpr), A, I))
         return NewI;
     }
   }
   return nullptr;
 }
 
-Instruction *NaryReassociate::tryReassociatedAdd(const SCEV *LHSExpr,
-                                                 Value *RHS, Instruction *I) {
-  auto Pos = SeenExprs.find(LHSExpr);
-  // Bail out if LHSExpr is not previously seen.
-  if (Pos == SeenExprs.end())
-    return nullptr;
-
+Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
+                                                      Value *RHS,
+                                                      BinaryOperator *I) {
   // Look for the closest dominator LHS of I that computes LHSExpr, and replace
-  // I with LHS + RHS.
+  // I with LHS op RHS.
   auto *LHS = findClosestMatchingDominator(LHSExpr, I);
   if (LHS == nullptr)
     return nullptr;
 
-  Instruction *NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+  Instruction *NewI = nullptr;
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+    NewI = BinaryOperator::CreateAdd(LHS, RHS, "", I);
+    break;
+  case Instruction::Mul:
+    NewI = BinaryOperator::CreateMul(LHS, RHS, "", I);
+    break;
+  default:
+    llvm_unreachable("Unexpected instruction.");
+  }
   NewI->takeName(I);
   return NewI;
 }
 
+bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
+                                     Value *&Op2) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+    return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
+  case Instruction::Mul:
+    return match(V, m_Mul(m_Value(Op1), m_Value(Op2)));
+  default:
+    llvm_unreachable("Unexpected instruction.");
+  }
+  return false;
+}
+
+const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+                                           const SCEV *RHS) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+    return SE->getAddExpr(LHS, RHS);
+  case Instruction::Mul:
+    return SE->getMulExpr(LHS, RHS);
+  default:
+    llvm_unreachable("Unexpected instruction.");
+  }
+  return nullptr;
+}
+
 Instruction *
 NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
                                               Instruction *Dominatee) {
@@ -537,9 +564,13 @@ NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
   // future instruction either. Therefore, we pop it out of the stack. This
   // optimization makes the algorithm O(n).
   while (!Candidates.empty()) {
-    Instruction *Candidate = Candidates.back();
-    if (DT->dominates(Candidate, Dominatee))
-      return Candidate;
+    // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed
+    // during rewriting.
+    if (Value *Candidate = Candidates.back()) {
+      Instruction *CandidateInstruction = cast<Instruction>(Candidate);
+      if (DT->dominates(CandidateInstruction, Dominatee))
+        return CandidateInstruction;
+    }
     Candidates.pop_back();
   }
   return nullptr;
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 31d7df39c781..9f26f78892c6 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -154,7 +154,7 @@ bool PartiallyInlineLibCalls::optimizeSQRT(CallInst *Call,
   Phi->addIncoming(Call, &CurrBB);
   Phi->addIncoming(LibCall, LibCallBB);
 
-  BB = JoinBB;
+  BB = JoinBB->getIterator();
   return true;
 }
 
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 366301ad731a..28c610c2486a 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -27,7 +27,7 @@
 // well defined state for inspection by the collector.  In the current
 // implementation, this is done via the insertion of poll sites at method entry
 // and the backedge of most loops.  We try to avoid inserting more polls than
-// are neccessary to ensure a finite period between poll sites.  This is not
+// are necessary to ensure a finite period between poll sites.  This is not
 // because the poll itself is expensive in the generated code; it's not.  Polls
 // do tend to impact the optimizer itself in negative ways; we'd like to avoid
 // perturbing the optimization of the method as much as we can.
@@ -91,13 +91,15 @@ STATISTIC(FiniteExecution, "Number of loops w/o safepoints finite execution");
 
 using namespace llvm;
 
-// Ignore oppurtunities to avoid placing safepoints on backedges, useful for
+// Ignore opportunities to avoid placing safepoints on backedges, useful for
 // validation
 static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
                                   cl::init(false));
 
-/// If true, do not place backedge safepoints in counted loops.
-static cl::opt<bool> SkipCounted("spp-counted", cl::Hidden, cl::init(true));
+/// How narrow does the trip count of a loop have to be to have to be considered
+/// "counted"?  Counted loops do not get safepoints at backedges.
+static cl::opt<int> CountedLoopTripWidth("spp-counted-loop-trip-width",
+                                         cl::Hidden, cl::init(32));
 
 // If true, split the backedge of a loop when placing the safepoint, otherwise
 // split the latch block itself.  Both are useful to support for
@@ -121,7 +123,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
   std::vector<TerminatorInst *> PollLocations;
 
   /// True unless we're running spp-no-calls in which case we need to disable
-  /// the call dependend placement opts.
+  /// the call-dependent placement opts.
   bool CallSafepointsEnabled;
 
   ScalarEvolution *SE = nullptr;
@@ -142,7 +144,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
   }
 
   bool runOnFunction(Function &F) override {
-    SE = &getAnalysis<ScalarEvolution>();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     for (auto I = LI->begin(), E = LI->end(); I != E; I++) {
@@ -153,7 +155,7 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     // We no longer modify the IR at all in this pass.  Thus all
     // analysis are preserved.
@@ -190,10 +192,8 @@ static void
 InsertSafepointPoll(Instruction *InsertBefore,
                     std::vector<CallSite> &ParsePointsNeeded /*rval*/);
 
-static bool isGCLeafFunction(const CallSite &CS);
-
 static bool needsStatepoint(const CallSite &CS) {
-  if (isGCLeafFunction(CS))
+  if (callsGCLeafFunction(CS))
     return false;
   if (CS.isCall()) {
     CallInst *call = cast<CallInst>(CS.getInstruction());
@@ -206,7 +206,7 @@ static bool needsStatepoint(const CallSite &CS) {
   return true;
 }
 
-static Value *ReplaceWithStatepoint(const CallSite &CS, Pass *P);
+static Value *ReplaceWithStatepoint(const CallSite &CS);
 
 /// Returns true if this loop is known to contain a call safepoint which
 /// must unconditionally execute on any iteration of the loop which returns
@@ -220,7 +220,7 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
   // For the moment, we look only for the 'cuts' that consist of a single call
   // instruction in a block which is dominated by the Header and dominates the
   // loop latch (Pred) block.  Somewhat surprisingly, walking the entire chain
-  // of such dominating blocks gets substaintially more occurences than just
+  // of such dominating blocks gets substantially more occurrences than just
   // checking the Pred and Header blocks themselves.  This may be due to the
   // density of loop exit conditions caused by range and null checks.
   // TODO: structure this as an analysis pass, cache the result for subloops,
@@ -255,18 +255,12 @@ static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
 /// conservatism in the analysis.
 static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
                                     BasicBlock *Pred) {
-  // Only used when SkipCounted is off
-  const unsigned upperTripBound = 8192;
-
   // A conservative bound on the loop as a whole.
   const SCEV *MaxTrips = SE->getMaxBackedgeTakenCount(L);
-  if (MaxTrips != SE->getCouldNotCompute()) {
-    if (SE->getUnsignedRange(MaxTrips).getUnsignedMax().ult(upperTripBound))
-      return true;
-    if (SkipCounted &&
-        SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(32))
-      return true;
-  }
+  if (MaxTrips != SE->getCouldNotCompute() &&
+      SE->getUnsignedRange(MaxTrips).getUnsignedMax().isIntN(
+          CountedLoopTripWidth))
+    return true;
 
   // If this is a conditional branch to the header with the alternate path
   // being outside the loop, we can ask questions about the execution frequency
@@ -275,13 +269,10 @@ static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
     // This returns an exact expression only.  TODO: We really only need an
     // upper bound here, but SE doesn't expose that.
     const SCEV *MaxExec = SE->getExitCount(L, Pred);
-    if (MaxExec != SE->getCouldNotCompute()) {
-      if (SE->getUnsignedRange(MaxExec).getUnsignedMax().ult(upperTripBound))
+    if (MaxExec != SE->getCouldNotCompute() &&
+        SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(
+            CountedLoopTripWidth))
         return true;
-      if (SkipCounted &&
-          SE->getUnsignedRange(MaxExec).getUnsignedMax().isIntN(32))
-        return true;
-    }
   }
 
   return /* not finite */ false;
@@ -432,14 +423,14 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
     assert(hasNextInstruction(I) &&
            "first check if there is a next instruction!");
     if (I->isTerminator()) {
-      return I->getParent()->getUniqueSuccessor()->begin();
+      return &I->getParent()->getUniqueSuccessor()->front();
     } else {
-      return std::next(BasicBlock::iterator(I));
+      return &*++I->getIterator();
     }
   };
 
   Instruction *cursor = nullptr;
-  for (cursor = F.getEntryBlock().begin(); hasNextInstruction(cursor);
+  for (cursor = &F.getEntryBlock().front(); hasNextInstruction(cursor);
        cursor = nextInstruction(cursor)) {
 
     // We need to ensure a safepoint poll occurs before any 'real' call.  The
@@ -466,7 +457,7 @@ static Instruction *findLocationForEntrySafepoint(Function &F,
 static void findCallSafepoints(Function &F,
                                std::vector<CallSite> &Found /*rval*/) {
   assert(Found.empty() && "must be empty!");
-  for (Instruction &I : inst_range(F)) {
+  for (Instruction &I : instructions(F)) {
     Instruction *inst = &I;
     if (isa<CallInst>(inst) || isa<InvokeInst>(inst)) {
       CallSite CS(inst);
@@ -713,7 +704,7 @@ bool PlaceSafepoints::runOnFunction(Function &F) {
                                   Invoke->getParent());
     }
 
-    Value *GCResult = ReplaceWithStatepoint(CS, nullptr);
+    Value *GCResult = ReplaceWithStatepoint(CS);
     Results.push_back(GCResult);
   }
   assert(Results.size() == ParsePointNeeded.size());
@@ -747,7 +738,7 @@ FunctionPass *llvm::createPlaceSafepointsPass() {
 INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
                       "place-backedge-safepoints-impl",
                       "Place Backedge Safepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
@@ -759,31 +750,6 @@ INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
 INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
                     false, false)
 
-static bool isGCLeafFunction(const CallSite &CS) {
-  Instruction *inst = CS.getInstruction();
-  if (isa<IntrinsicInst>(inst)) {
-    // Most LLVM intrinsics are things which can never take a safepoint.
-    // As a result, we don't need to have the stack parsable at the
-    // callsite.  This is a highly useful optimization since intrinsic
-    // calls are fairly prevelent, particularly in debug builds.
-    return true;
-  }
-
-  // If this function is marked explicitly as a leaf call, we don't need to
-  // place a safepoint of it.  In fact, for correctness we *can't* in many
-  // cases.  Note: Indirect calls return Null for the called function,
-  // these obviously aren't runtime functions with attributes
-  // TODO: Support attributes on the call site as well.
-  const Function *F = CS.getCalledFunction();
-  bool isLeaf =
-      F &&
-      F->getFnAttribute("gc-leaf-function").getValueAsString().equals("true");
-  if (isLeaf) {
-    return true;
-  }
-  return false;
-}
-
 static void
 InsertSafepointPoll(Instruction *InsertBefore,
                     std::vector<CallSite> &ParsePointsNeeded /*rval*/) {
@@ -796,6 +762,7 @@ InsertSafepointPoll(Instruction *InsertBefore,
   // path call - where we need to insert a safepoint (parsepoint).
 
   auto *F = M->getFunction(GCSafepointPollName);
+  assert(F && "gc.safepoint_poll function is missing");
   assert(F->getType()->getElementType() ==
          FunctionType::get(Type::getVoidTy(M->getContext()), false) &&
          "gc.safepoint_poll declared with wrong type");
@@ -864,10 +831,8 @@ InsertSafepointPoll(Instruction *InsertBefore,
 /// Replaces the given call site (Call or Invoke) with a gc.statepoint
 /// intrinsic with an empty deoptimization arguments list.  This does
 /// NOT do explicit relocation for GC support.
-static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
-                                    Pass *P) {
-  assert(CS.getInstruction()->getParent()->getParent()->getParent() &&
-         "must be set");
+static Value *ReplaceWithStatepoint(const CallSite &CS /* to replace */) {
+  assert(CS.getInstruction()->getModule() && "must be set");
 
   // TODO: technically, a pass is not allowed to get functions from within a
   // function pass since it might trigger a new function addition.  Refactor
@@ -917,15 +882,10 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
       CS.getInstruction()->getContext(), AttributeSet::FunctionIndex,
       AttrsToRemove);
 
-  Value *StatepointTarget = NumPatchBytes == 0
-                                ? CS.getCalledValue()
-                                : ConstantPointerNull::get(cast<PointerType>(
-                                      CS.getCalledValue()->getType()));
-
   if (CS.isCall()) {
     CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
     CallInst *Call = Builder.CreateGCStatepointCall(
-        ID, NumPatchBytes, StatepointTarget,
+        ID, NumPatchBytes, CS.getCalledValue(),
         makeArrayRef(CS.arg_begin(), CS.arg_end()), None, None,
         "safepoint_token");
     Call->setTailCall(ToReplace->isTailCall());
@@ -938,7 +898,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
 
     Token = Call;
 
-    // Put the following gc_result and gc_relocate calls immediately after the
+    // Put the following gc_result and gc_relocate calls immediately after
     // the old call (which we're about to delete).
     assert(ToReplace->getNextNode() && "not a terminator, must have next");
     Builder.SetInsertPoint(ToReplace->getNextNode());
@@ -951,7 +911,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
     // original block.
     Builder.SetInsertPoint(ToReplace->getParent());
     InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
-        ID, NumPatchBytes, StatepointTarget, ToReplace->getNormalDest(),
+        ID, NumPatchBytes, CS.getCalledValue(), ToReplace->getNormalDest(),
         ToReplace->getUnwindDest(), makeArrayRef(CS.arg_begin(), CS.arg_end()),
         None, None, "safepoint_token");
 
@@ -967,7 +927,7 @@ static Value *ReplaceWithStatepoint(const CallSite &CS, /* to replace */
     // We'll insert the gc.result into the normal block
     BasicBlock *NormalDest = ToReplace->getNormalDest();
     // Can not insert gc.result in case of phi nodes preset.
-    // Should have removed this cases prior to runnning this function
+    // Should have removed this cases prior to running this function
     assert(!isa<PHINode>(NormalDest->begin()));
     Instruction *IP = &*(NormalDest->getFirstInsertionPt());
     Builder.SetInsertPoint(IP);
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index d1acf785d07e..fb970c747ce1 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -26,6 +26,8 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -62,7 +64,7 @@ namespace {
 /// Print out the expression identified in the Ops list.
 ///
 static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
-  Module *M = I->getParent()->getParent()->getParent();
+  Module *M = I->getModule();
   dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
        << *Ops[0].Op->getType() << '\t';
   for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
@@ -82,20 +84,6 @@ namespace {
 
     Factor(Value *Base, unsigned Power) : Base(Base), Power(Power) {}
 
-    /// \brief Sort factors by their Base.
-    struct BaseSorter {
-      bool operator()(const Factor &LHS, const Factor &RHS) {
-        return LHS.Base < RHS.Base;
-      }
-    };
-
-    /// \brief Compare factors for equal bases.
-    struct BaseEqual {
-      bool operator()(const Factor &LHS, const Factor &RHS) {
-        return LHS.Base == RHS.Base;
-      }
-    };
-
     /// \brief Sort factors in descending order by their power.
     struct PowerDescendingSorter {
       bool operator()(const Factor &LHS, const Factor &RHS) {
@@ -172,6 +160,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
   private:
     void BuildRankMap(Function &F);
@@ -255,27 +244,6 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   return nullptr;
 }
 
-static bool isUnmovableInstruction(Instruction *I) {
-  switch (I->getOpcode()) {
-  case Instruction::PHI:
-  case Instruction::LandingPad:
-  case Instruction::Alloca:
-  case Instruction::Load:
-  case Instruction::Invoke:
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::FDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
-  case Instruction::FRem:
-    return true;
-  case Instruction::Call:
-    return !isa<DbgInfoIntrinsic>(I);
-  default:
-    return false;
-  }
-}
-
 void Reassociate::BuildRankMap(Function &F) {
   unsigned i = 2;
 
@@ -295,7 +263,7 @@ void Reassociate::BuildRankMap(Function &F) {
     // we cannot move.  This ensures that the ranks for these instructions are
     // all different in the block.
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      if (isUnmovableInstruction(I))
+      if (mayBeMemoryDependent(*I))
         ValueRankMap[&*I] = ++BBRank;
   }
 }
@@ -913,7 +881,11 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
 /// that computes the negative version of the value specified.  The negative
 /// version of the value is returned, and BI is left pointing at the instruction
 /// that should be processed next by the reassociation pass.
-static Value *NegateValue(Value *V, Instruction *BI) {
+/// Also add intermediate instructions to the redo list that are modified while
+/// pushing the negates through adds.  These will be revisited to see if
+/// additional opportunities have been exposed.
+static Value *NegateValue(Value *V, Instruction *BI,
+                          SetVector<AssertingVH<Instruction>> &ToRedo) {
   if (Constant *C = dyn_cast<Constant>(V)) {
     if (C->getType()->isFPOrFPVectorTy()) {
       return ConstantExpr::getFNeg(C);
@@ -934,8 +906,8 @@ static Value *NegateValue(Value *V, Instruction *BI) {
   if (BinaryOperator *I =
           isReassociableOp(V, Instruction::Add, Instruction::FAdd)) {
     // Push the negates through the add.
-    I->setOperand(0, NegateValue(I->getOperand(0), BI));
-    I->setOperand(1, NegateValue(I->getOperand(1), BI));
+    I->setOperand(0, NegateValue(I->getOperand(0), BI, ToRedo));
+    I->setOperand(1, NegateValue(I->getOperand(1), BI, ToRedo));
     if (I->getOpcode() == Instruction::Add) {
       I->setHasNoUnsignedWrap(false);
       I->setHasNoSignedWrap(false);
@@ -948,6 +920,10 @@ static Value *NegateValue(Value *V, Instruction *BI) {
     //
     I->moveBefore(BI);
     I->setName(I->getName()+".neg");
+
+    // Add the intermediate negates to the redo list as processing them later
+    // could expose more reassociating opportunities.
+    ToRedo.insert(I);
     return I;
   }
 
@@ -972,26 +948,28 @@ static Value *NegateValue(Value *V, Instruction *BI) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
         InsertPt = II->getNormalDest()->begin();
       } else {
-        InsertPt = InstInput;
-        ++InsertPt;
+        InsertPt = ++InstInput->getIterator();
       }
       while (isa<PHINode>(InsertPt)) ++InsertPt;
     } else {
       InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
     }
-    TheNeg->moveBefore(InsertPt);
+    TheNeg->moveBefore(&*InsertPt);
     if (TheNeg->getOpcode() == Instruction::Sub) {
       TheNeg->setHasNoUnsignedWrap(false);
       TheNeg->setHasNoSignedWrap(false);
     } else {
       TheNeg->andIRFlags(BI);
     }
+    ToRedo.insert(TheNeg);
     return TheNeg;
   }
 
   // Insert a 'neg' instruction that subtracts the value from zero to get the
   // negation.
-  return CreateNeg(V, V->getName() + ".neg", BI, BI);
+  BinaryOperator *NewNeg = CreateNeg(V, V->getName() + ".neg", BI, BI);
+  ToRedo.insert(NewNeg);
+  return NewNeg;
 }
 
 /// Return true if we should break up this subtract of X-Y into (X + -Y).
@@ -1025,14 +1003,15 @@ static bool ShouldBreakUpSubtract(Instruction *Sub) {
 
 /// If we have (X-Y), and if either X is an add, or if this is only used by an
 /// add, transform this into (X+(0-Y)) to promote better reassociation.
-static BinaryOperator *BreakUpSubtract(Instruction *Sub) {
+static BinaryOperator *
+BreakUpSubtract(Instruction *Sub, SetVector<AssertingVH<Instruction>> &ToRedo) {
   // Convert a subtract into an add and a neg instruction. This allows sub
   // instructions to be commuted with other add instructions.
   //
   // Calculate the negative value of Operand 1 of the sub instruction,
   // and set it as the RHS of the add instruction we just made.
   //
-  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub, ToRedo);
   BinaryOperator *New = CreateAdd(Sub->getOperand(0), NegVal, "", Sub, Sub);
   Sub->setOperand(0, Constant::getNullValue(Sub->getType())); // Drop use of op.
   Sub->setOperand(1, Constant::getNullValue(Sub->getType())); // Drop use of op.
@@ -1166,7 +1145,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
     return nullptr;
   }
 
-  BasicBlock::iterator InsertPt = BO; ++InsertPt;
+  BasicBlock::iterator InsertPt = ++BO->getIterator();
 
   // If this was just a single multiply, remove the multiply and return the only
   // remaining operand.
@@ -1179,7 +1158,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
   }
 
   if (NeedsNegate)
-    V = CreateNeg(V, "neg", InsertPt, BO);
+    V = CreateNeg(V, "neg", &*InsertPt, BO);
 
   return V;
 }
@@ -1250,7 +1229,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
   return nullptr;
 }
 
-/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
+/// Helper function of CombineXorOpnd(). It creates a bitwise-and
 /// instruction with the given two operands, and return the resulting
 /// instruction. There are two special cases: 1) if the constant operand is 0,
 /// it will return NULL. 2) if the constant is ~0, the symbolic operand will
@@ -2083,7 +2062,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
     return;
 
   // Don't optimize floating point instructions that don't have unsafe algebra.
-  if (I->getType()->isFloatingPointTy() && !I->hasUnsafeAlgebra())
+  if (I->getType()->isFPOrFPVectorTy() && !I->hasUnsafeAlgebra())
     return;
 
   // Do not reassociate boolean (i1) expressions.  We want to preserve the
@@ -2099,7 +2078,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
   // see if we can convert it to X+-Y.
   if (I->getOpcode() == Instruction::Sub) {
     if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
       RedoInsts.insert(I);
       MadeChange = true;
       I = NI;
@@ -2110,6 +2089,12 @@ void Reassociate::OptimizeInst(Instruction *I) {
           (!I->hasOneUse() ||
            !isReassociableOp(I->user_back(), Instruction::Mul))) {
         Instruction *NI = LowerNegateToMultiply(I);
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
         RedoInsts.insert(I);
         MadeChange = true;
         I = NI;
@@ -2117,7 +2102,7 @@ void Reassociate::OptimizeInst(Instruction *I) {
     }
   } else if (I->getOpcode() == Instruction::FSub) {
     if (ShouldBreakUpSubtract(I)) {
-      Instruction *NI = BreakUpSubtract(I);
+      Instruction *NI = BreakUpSubtract(I, RedoInsts);
       RedoInsts.insert(I);
       MadeChange = true;
       I = NI;
@@ -2127,7 +2112,13 @@ void Reassociate::OptimizeInst(Instruction *I) {
       if (isReassociableOp(I->getOperand(1), Instruction::FMul) &&
           (!I->hasOneUse() ||
            !isReassociableOp(I->user_back(), Instruction::FMul))) {
+        // If the negate was simplified, revisit the users to see if we can
+        // reassociate further.
         Instruction *NI = LowerNegateToMultiply(I);
+        for (User *U : NI->users()) {
+          if (BinaryOperator *Tmp = dyn_cast<BinaryOperator>(U))
+            RedoInsts.insert(Tmp);
+        }
         RedoInsts.insert(I);
         MadeChange = true;
         I = NI;
@@ -2142,8 +2133,14 @@ void Reassociate::OptimizeInst(Instruction *I) {
   // If this is an interior node of a reassociable tree, ignore it until we
   // get to the root of the tree, to avoid N^2 analysis.
   unsigned Opcode = BO->getOpcode();
-  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode)
+  if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) {
+    // During the initial run we will get to the root of the tree.
+    // But if we get here while we are redoing instructions, there is no
+    // guarantee that the root will be visited. So Redo later
+    if (BO->user_back() != BO)
+      RedoInsts.insert(BO->user_back());
     return;
+  }
 
   // If this is an add tree that is used by a sub instruction, ignore it
   // until we process the subtract.
@@ -2250,10 +2247,10 @@ bool Reassociate::runOnFunction(Function &F) {
   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
     // Optimize every instruction in the basic block.
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; )
-      if (isInstructionTriviallyDead(II)) {
-        EraseInst(II++);
+      if (isInstructionTriviallyDead(&*II)) {
+        EraseInst(&*II++);
       } else {
-        OptimizeInst(II);
+        OptimizeInst(&*II);
         assert(II->getParent() == BI && "Moved to a different block!");
         ++II;
       }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 1b46727c17bb..915f89780c08 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -82,10 +82,9 @@ bool RegToMem::runOnFunction(Function &F) {
   BasicBlock::iterator I = BBEntry->begin();
   while (isa<AllocaInst>(I)) ++I;
 
-  CastInst *AllocaInsertionPoint =
-    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
-                    Type::getInt32Ty(F.getContext()),
-                    "reg2mem alloca point", I);
+  CastInst *AllocaInsertionPoint = new BitCastInst(
+      Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+      Type::getInt32Ty(F.getContext()), "reg2mem alloca point", &*I);
 
   // Find the escaped instructions. But don't create stack slots for
   // allocas in entry block.
@@ -95,7 +94,7 @@ bool RegToMem::runOnFunction(Function &F) {
     for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
          iib != iie; ++iib) {
       if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
-          valueEscapes(iib)) {
+          valueEscapes(&*iib)) {
         WorkList.push_front(&*iib);
       }
     }
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index ae2ae3af0c7a..db127c3f7b4e 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -14,12 +14,14 @@
 
 #include "llvm/Pass.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
@@ -46,10 +48,6 @@
 
 using namespace llvm;
 
-// Print tracing output
-static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden,
-                              cl::init(false));
-
 // Print the liveset found at the insert location
 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
                                   cl::init(false));
@@ -74,6 +72,12 @@ static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
                                                   cl::location(ClobberNonLive),
                                                   cl::Hidden);
 
+static cl::opt<bool> UseDeoptBundles("rs4gc-use-deopt-bundles", cl::Hidden,
+                                     cl::init(false));
+static cl::opt<bool>
+    AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
+                                   cl::Hidden, cl::init(true));
+
 namespace {
 struct RewriteStatepointsForGC : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
@@ -88,10 +92,10 @@ struct RewriteStatepointsForGC : public ModulePass {
       Changed |= runOnFunction(F);
 
     if (Changed) {
-      // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn
+      // stripNonValidAttributes asserts that shouldRewriteStatepointsIn
       // returns true for at least one function in the module.  Since at least
       // one function changed, we know that the precondition is satisfied.
-      stripDereferenceabilityInfo(M);
+      stripNonValidAttributes(M);
     }
 
     return Changed;
@@ -108,15 +112,16 @@ struct RewriteStatepointsForGC : public ModulePass {
   /// dereferenceability that are no longer valid/correct after
   /// RewriteStatepointsForGC has run.  This is because semantically, after
   /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
-  /// heap.  stripDereferenceabilityInfo (conservatively) restores correctness
+  /// heap.  stripNonValidAttributes (conservatively) restores correctness
   /// by erasing all attributes in the module that externally imply
   /// dereferenceability.
-  ///
-  void stripDereferenceabilityInfo(Module &M);
+  /// Similar reasoning also applies to the noalias attributes. gc.statepoint
+  /// can touch the entire heap including noalias objects.
+  void stripNonValidAttributes(Module &M);
 
-  // Helpers for stripDereferenceabilityInfo
-  void stripDereferenceabilityInfoFromBody(Function &F);
-  void stripDereferenceabilityInfoFromPrototype(Function &F);
+  // Helpers for stripNonValidAttributes
+  void stripNonValidAttributesFromBody(Function &F);
+  void stripNonValidAttributesFromPrototype(Function &F);
 };
 } // namespace
 
@@ -160,15 +165,16 @@ struct GCPtrLivenessData {
 // base relation will remain.  Internally, we add a mixture of the two
 // types, then update all the second type to the first type
 typedef DenseMap<Value *, Value *> DefiningValueMapTy;
-typedef DenseSet<llvm::Value *> StatepointLiveSetTy;
-typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy;
+typedef DenseSet<Value *> StatepointLiveSetTy;
+typedef DenseMap<AssertingVH<Instruction>, AssertingVH<Value>>
+  RematerializedValueMapTy;
 
 struct PartiallyConstructedSafepointRecord {
-  /// The set of values known to be live accross this safepoint
-  StatepointLiveSetTy liveset;
+  /// The set of values known to be live across this safepoint
+  StatepointLiveSetTy LiveSet;
 
   /// Mapping from live pointers to a base-defining-value
-  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
+  DenseMap<Value *, Value *> PointerToBase;
 
   /// The *new* gc.statepoint instruction itself.  This produces the token
   /// that normal path gc.relocates and the gc.result are tied to.
@@ -179,12 +185,26 @@ struct PartiallyConstructedSafepointRecord {
   Instruction *UnwindToken;
 
   /// Record live values we are rematerialized instead of relocating.
-  /// They are not included into 'liveset' field.
+  /// They are not included into 'LiveSet' field.
   /// Maps rematerialized copy to it's original value.
   RematerializedValueMapTy RematerializedValues;
 };
 }
 
+static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
+  assert(UseDeoptBundles && "Should not be called otherwise!");
+
+  Optional<OperandBundleUse> DeoptBundle = CS.getOperandBundle("deopt");
+
+  if (!DeoptBundle.hasValue()) {
+    assert(AllowStatepointWithNoDeoptInfo &&
+           "Found non-leaf call without deopt info!");
+    return None;
+  }
+
+  return DeoptBundle.getValue().Inputs;
+}
+
 /// Compute the live-in set for every basic block in the function
 static void computeLiveInValues(DominatorTree &DT, Function &F,
                                 GCPtrLivenessData &Data);
@@ -195,10 +215,10 @@ static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
                               StatepointLiveSetTy &out);
 
 // TODO: Once we can get to the GCStrategy, this becomes
-// Optional<bool> isGCManagedPointer(const Value *V) const override {
+// Optional<bool> isGCManagedPointer(const Type *Ty) const override {
 
-static bool isGCPointerType(const Type *T) {
-  if (const PointerType *PT = dyn_cast<PointerType>(T))
+static bool isGCPointerType(Type *T) {
+  if (auto *PT = dyn_cast<PointerType>(T))
     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
     // GC managed heap.  We know that a pointer into this heap needs to be
     // updated and that no other pointer does.
@@ -233,9 +253,8 @@ static bool containsGCPtrType(Type *Ty) {
   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
     return containsGCPtrType(AT->getElementType());
   if (StructType *ST = dyn_cast<StructType>(Ty))
-    return std::any_of(
-        ST->subtypes().begin(), ST->subtypes().end(),
-        [](Type *SubType) { return containsGCPtrType(SubType); });
+    return std::any_of(ST->subtypes().begin(), ST->subtypes().end(),
+                       containsGCPtrType);
   return false;
 }
 
@@ -247,7 +266,7 @@ static bool isUnhandledGCPointerType(Type *Ty) {
 }
 #endif
 
-static bool order_by_name(llvm::Value *a, llvm::Value *b) {
+static bool order_by_name(Value *a, Value *b) {
   if (a->hasName() && b->hasName()) {
     return -1 == a->getName().compare(b->getName());
   } else if (a->hasName() && !b->hasName()) {
@@ -260,6 +279,13 @@ static bool order_by_name(llvm::Value *a, llvm::Value *b) {
   }
 }
 
+// Return the name of the value suffixed with the provided value, or if the
+// value didn't have a name, the default value specified.
+static std::string suffixed_name_or(Value *V, StringRef Suffix,
+                                    StringRef DefaultName) {
+  return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
+}
+
 // Conservatively identifies any definitions which might be live at the
 // given instruction. The  analysis is performed immediately before the
 // given instruction. Values defined by that instruction are not considered
@@ -269,30 +295,56 @@ static void analyzeParsePointLiveness(
     const CallSite &CS, PartiallyConstructedSafepointRecord &result) {
   Instruction *inst = CS.getInstruction();
 
-  StatepointLiveSetTy liveset;
-  findLiveSetAtInst(inst, OriginalLivenessData, liveset);
+  StatepointLiveSetTy LiveSet;
+  findLiveSetAtInst(inst, OriginalLivenessData, LiveSet);
 
   if (PrintLiveSet) {
     // Note: This output is used by several of the test cases
-    // The order of elemtns in a set is not stable, put them in a vec and sort
+    // The order of elements in a set is not stable, put them in a vec and sort
     // by name
-    SmallVector<Value *, 64> temp;
-    temp.insert(temp.end(), liveset.begin(), liveset.end());
-    std::sort(temp.begin(), temp.end(), order_by_name);
+    SmallVector<Value *, 64> Temp;
+    Temp.insert(Temp.end(), LiveSet.begin(), LiveSet.end());
+    std::sort(Temp.begin(), Temp.end(), order_by_name);
     errs() << "Live Variables:\n";
-    for (Value *V : temp) {
-      errs() << " " << V->getName(); // no newline
-      V->dump();
-    }
+    for (Value *V : Temp)
+      dbgs() << " " << V->getName() << " " << *V << "\n";
   }
   if (PrintLiveSetSize) {
     errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
-    errs() << "Number live values: " << liveset.size() << "\n";
+    errs() << "Number live values: " << LiveSet.size() << "\n";
   }
-  result.liveset = liveset;
+  result.LiveSet = LiveSet;
 }
 
-static Value *findBaseDefiningValue(Value *I);
+static bool isKnownBaseResult(Value *V);
+namespace {
+/// A single base defining value - An immediate base defining value for an
+/// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
+/// For instructions which have multiple pointer [vector] inputs or that
+/// transition between vector and scalar types, there is no immediate base
+/// defining value.  The 'base defining value' for 'Def' is the transitive
+/// closure of this relation stopping at the first instruction which has no
+/// immediate base defining value.  The b.d.v. might itself be a base pointer,
+/// but it can also be an arbitrary derived pointer. 
+struct BaseDefiningValueResult {
+  /// Contains the value which is the base defining value.
+  Value * const BDV;
+  /// True if the base defining value is also known to be an actual base
+  /// pointer.
+  const bool IsKnownBase;
+  BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
+    : BDV(BDV), IsKnownBase(IsKnownBase) {
+#ifndef NDEBUG
+    // Check consistency between new and old means of checking whether a BDV is
+    // a base.
+    bool MustBeBase = isKnownBaseResult(BDV);
+    assert(!MustBeBase || MustBeBase == IsKnownBase);
+#endif
+  }
+};
+}
+
+static BaseDefiningValueResult findBaseDefiningValue(Value *I);
 
 /// Return a base defining value for the 'Index' element of the given vector
 /// instruction 'I'.  If Index is null, returns a BDV for the entire vector
@@ -303,8 +355,8 @@ static Value *findBaseDefiningValue(Value *I);
 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
 /// If the later, the return pointer is a BDV (or possibly a base) for the
 /// particular element in 'I'.  
-static std::pair<Value *, bool>
-findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
+static BaseDefiningValueResult
+findBaseDefiningValueOfVector(Value *I) {
   assert(I->getType()->isVectorTy() &&
          cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
@@ -314,7 +366,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
 
   if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
-    return std::make_pair(I, true);
+    return BaseDefiningValueResult(I, true);
 
   // We shouldn't see the address of a global as a vector value?
   assert(!isa<GlobalVariable>(I) &&
@@ -325,7 +377,7 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
   if (isa<UndefValue>(I))
     // utterly meaningless, but useful for dealing with partially optimized
     // code.
-    return std::make_pair(I, true);
+    return BaseDefiningValueResult(I, true);
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
@@ -333,31 +385,17 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
     assert(Con->isNullValue() && "null is the only case which makes sense");
-    return std::make_pair(Con, true);
+    return BaseDefiningValueResult(Con, true);
   }
   
   if (isa<LoadInst>(I))
-    return std::make_pair(I, true);
-  
-  // For an insert element, we might be able to look through it if we know
-  // something about the indexes.
-  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
-    if (Index) {
-      Value *InsertIndex = IEI->getOperand(2);
-      // This index is inserting the value, look for its BDV
-      if (InsertIndex == Index)
-        return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false);
-      // Both constant, and can't be equal per above. This insert is definitely
-      // not relevant, look back at the rest of the vector and keep trying.
-      if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex))
-        return findBaseDefiningValueOfVector(IEI->getOperand(0), Index);
-    }
-    
+    return BaseDefiningValueResult(I, true);
+
+  if (isa<InsertElementInst>(I))
     // We don't know whether this vector contains entirely base pointers or
     // not.  To be conservatively correct, we treat it as a BDV and will
     // duplicate code as needed to construct a parallel vector of bases.
-    return std::make_pair(IEI, false);
-  }
+    return BaseDefiningValueResult(I, false);
 
   if (isa<ShuffleVectorInst>(I))
     // We don't know whether this vector contains entirely base pointers or
@@ -365,105 +403,62 @@ findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) {
     // duplicate code as needed to construct a parallel vector of bases.
     // TODO: There a number of local optimizations which could be applied here
     // for particular sufflevector patterns.
-    return std::make_pair(I, false);
+    return BaseDefiningValueResult(I, false);
 
   // A PHI or Select is a base defining value.  The outer findBasePointer
   // algorithm is responsible for constructing a base value for this BDV.
   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
          "unknown vector instruction - no base found for vector element");
-  return std::make_pair(I, false);
+  return BaseDefiningValueResult(I, false);
 }
 
-static bool isKnownBaseResult(Value *V);
-
 /// Helper function for findBasePointer - Will return a value which either a)
-/// defines the base pointer for the input or b) blocks the simple search
-/// (i.e. a PHI or Select of two derived pointers)
-static Value *findBaseDefiningValue(Value *I) {
+/// defines the base pointer for the input, b) blocks the simple search
+/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
+/// from pointer to vector type or back.
+static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
   if (I->getType()->isVectorTy())
-    return findBaseDefiningValueOfVector(I).first;
+    return findBaseDefiningValueOfVector(I);
   
   assert(I->getType()->isPointerTy() &&
          "Illegal to ask for the base pointer of a non-pointer type");
 
-  // This case is a bit of a hack - it only handles extracts from vectors which
-  // trivially contain only base pointers or cases where we can directly match
-  // the index of the original extract element to an insertion into the vector.
-  // See note inside the function for how to improve this.
-  if (auto *EEI = dyn_cast<ExtractElementInst>(I)) {
-    Value *VectorOperand = EEI->getVectorOperand();
-    Value *Index = EEI->getIndexOperand();
-    std::pair<Value *, bool> pair =
-      findBaseDefiningValueOfVector(VectorOperand, Index);
-    Value *VectorBase = pair.first;
-    if (VectorBase->getType()->isPointerTy())
-      // We found a BDV for this specific element with the vector.  This is an
-      // optimization, but in practice it covers most of the useful cases
-      // created via scalarization.
-      return VectorBase;
-    else {
-      assert(VectorBase->getType()->isVectorTy());
-      if (pair.second)
-        // If the entire vector returned is known to be entirely base pointers,
-        // then the extractelement is valid base for this value.
-        return EEI;
-      else {
-        // Otherwise, we have an instruction which potentially produces a
-        // derived pointer and we need findBasePointers to clone code for us
-        // such that we can create an instruction which produces the
-        // accompanying base pointer.
-        // Note: This code is currently rather incomplete.  We don't currently
-        // support the general form of shufflevector of insertelement.
-        // Conceptually, these are just 'base defining values' of the same
-        // variety as phi or select instructions.  We need to update the
-        // findBasePointers algorithm to insert new 'base-only' versions of the
-        // original instructions. This is relative straight forward to do, but
-        // the case which would motivate the work hasn't shown up in real
-        // workloads yet.  
-        assert((isa<PHINode>(VectorBase) || isa<SelectInst>(VectorBase)) &&
-               "need to extend findBasePointers for generic vector"
-               "instruction cases");
-        return VectorBase;
-      }
-    }
-  }
-
   if (isa<Argument>(I))
     // An incoming argument to the function is a base pointer
     // We should have never reached here if this argument isn't an gc value
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   if (isa<GlobalVariable>(I))
     // base case
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   // inlining could possibly introduce phi node that contains
   // undef if callee has multiple returns
   if (isa<UndefValue>(I))
     // utterly meaningless, but useful for dealing with
     // partially optimized code.
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   // Due to inheritance, this must be _after_ the global variable and undef
   // checks
-  if (Constant *Con = dyn_cast<Constant>(I)) {
+  if (isa<Constant>(I)) {
     assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
            "order of checks wrong!");
-    // Note: Finding a constant base for something marked for relocation
-    // doesn't really make sense.  The most likely case is either a) some
-    // screwed up the address space usage or b) your validating against
-    // compiled C++ code w/o the proper separation.  The only real exception
-    // is a null pointer.  You could have generic code written to index of
-    // off a potentially null value and have proven it null.  We also use
-    // null pointers in dead paths of relocation phis (which we might later
-    // want to find a base pointer for).
-    assert(isa<ConstantPointerNull>(Con) &&
-           "null is the only case which makes sense");
-    return Con;
+    // Note: Even for frontends which don't have constant references, we can
+    // see constants appearing after optimizations.  A simple example is
+    // specialization of an address computation on null feeding into a merge
+    // point where the actual use of the now-constant input is protected by
+    // another null check.  (e.g. test4 in constants.ll)
+    return BaseDefiningValueResult(I, true);
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(I)) {
     Value *Def = CI->stripPointerCasts();
+    // If stripping pointer casts changes the address space there is an
+    // addrspacecast in between.
+    assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
+               cast<PointerType>(CI->getType())->getAddressSpace() &&
+           "unsupported addrspacecast");
     // If we find a cast instruction here, it means we've found a cast which is
     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
     // handle int->ptr conversion.
@@ -472,7 +467,9 @@ static Value *findBaseDefiningValue(Value *I) {
   }
 
   if (isa<LoadInst>(I))
-    return I; // The value loaded is an gc base itself
+    // The value loaded is an gc base itself
+    return BaseDefiningValueResult(I, true);
+  
 
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
     // The base of this GEP is the base
@@ -480,14 +477,11 @@ static Value *findBaseDefiningValue(Value *I) {
 
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
-    case Intrinsic::experimental_gc_result_ptr:
     default:
       // fall through to general call handling
       break;
     case Intrinsic::experimental_gc_statepoint:
-    case Intrinsic::experimental_gc_result_float:
-    case Intrinsic::experimental_gc_result_int:
-      llvm_unreachable("these don't produce pointers");
+      llvm_unreachable("statepoints don't produce pointers");
     case Intrinsic::experimental_gc_relocate: {
       // Rerunning safepoint insertion after safepoints are already
       // inserted is not supported.  It could probably be made to work,
@@ -506,17 +500,17 @@ static Value *findBaseDefiningValue(Value *I) {
   // pointers.  This should probably be generalized via attributes to support
   // both source language and internal functions.
   if (isa<CallInst>(I) || isa<InvokeInst>(I))
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   // I have absolutely no idea how to implement this part yet.  It's not
-  // neccessarily hard, I just haven't really looked at it yet.
+  // necessarily hard, I just haven't really looked at it yet.
   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 
   if (isa<AtomicCmpXchgInst>(I))
     // A CAS is effectively a atomic store and load combined under a
     // predicate.  From the perspective of base pointers, we just treat it
     // like a load.
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
                                    "binary ops which don't apply to pointers");
@@ -525,34 +519,41 @@ static Value *findBaseDefiningValue(Value *I) {
   // stack, but in either case, this is simply a field load.  As a result,
   // this is a defining definition of the base just like a load is.
   if (isa<ExtractValueInst>(I))
-    return I;
+    return BaseDefiningValueResult(I, true);
 
   // We should never see an insert vector since that would require we be
   // tracing back a struct value not a pointer value.
   assert(!isa<InsertValueInst>(I) &&
          "Base pointer for a struct is meaningless");
 
+  // An extractelement produces a base result exactly when it's input does.
+  // We may need to insert a parallel instruction to extract the appropriate
+  // element out of the base vector corresponding to the input. Given this,
+  // it's analogous to the phi and select case even though it's not a merge.
+  if (isa<ExtractElementInst>(I))
+    // Note: There a lot of obvious peephole cases here.  This are deliberately
+    // handled after the main base pointer inference algorithm to make writing
+    // test cases to exercise that code easier.
+    return BaseDefiningValueResult(I, false);
+
   // The last two cases here don't return a base pointer.  Instead, they
-  // return a value which dynamically selects from amoung several base
+  // return a value which dynamically selects from among several base
   // derived pointers (each with it's own base potentially).  It's the job of
   // the caller to resolve these.
   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
          "missing instruction case in findBaseDefiningValing");
-  return I;
+  return BaseDefiningValueResult(I, false);
 }
 
 /// Returns the base defining value for this value.
 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
   Value *&Cached = Cache[I];
   if (!Cached) {
-    Cached = findBaseDefiningValue(I);
+    Cached = findBaseDefiningValue(I).BDV;
+    DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
+                 << Cached->getName() << "\n");
   }
   assert(Cache[I] != nullptr);
-
-  if (TraceLSP) {
-    dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName()
-           << "\n";
-  }
   return Cached;
 }
 
@@ -572,7 +573,9 @@ static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
 /// is it known to be a base pointer?  Or do we need to continue searching.
 static bool isKnownBaseResult(Value *V) {
-  if (!isa<PHINode>(V) && !isa<SelectInst>(V)) {
+  if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
+      !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
+      !isa<ShuffleVectorInst>(V)) {
     // no recursion possible
     return true;
   }
@@ -587,17 +590,19 @@ static bool isKnownBaseResult(Value *V) {
   return false;
 }
 
-// TODO: find a better name for this
 namespace {
-class PhiState {
+/// Models the state of a single base defining value in the findBasePointer
+/// algorithm for determining where a new instruction is needed to propagate
+/// the base of this BDV.
+class BDVState {
 public:
   enum Status { Unknown, Base, Conflict };
 
-  PhiState(Status s, Value *b = nullptr) : status(s), base(b) {
+  BDVState(Status s, Value *b = nullptr) : status(s), base(b) {
     assert(status != Base || b);
   }
-  PhiState(Value *b) : status(Base), base(b) {}
-  PhiState() : status(Unknown), base(nullptr) {}
+  explicit BDVState(Value *b) : status(Base), base(b) {}
+  BDVState() : status(Unknown), base(nullptr) {}
 
   Status getStatus() const { return status; }
   Value *getBase() const { return base; }
@@ -606,72 +611,80 @@ public:
   bool isUnknown() const { return getStatus() == Unknown; }
   bool isConflict() const { return getStatus() == Conflict; }
 
-  bool operator==(const PhiState &other) const {
+  bool operator==(const BDVState &other) const {
     return base == other.base && status == other.status;
   }
 
-  bool operator!=(const PhiState &other) const { return !(*this == other); }
+  bool operator!=(const BDVState &other) const { return !(*this == other); }
 
-  void dump() {
-    errs() << status << " (" << base << " - "
-           << (base ? base->getName() : "nullptr") << "): ";
+  LLVM_DUMP_METHOD
+  void dump() const { print(dbgs()); dbgs() << '\n'; }
+  
+  void print(raw_ostream &OS) const {
+    switch (status) {
+    case Unknown:
+      OS << "U";
+      break;
+    case Base:
+      OS << "B";
+      break;
+    case Conflict:
+      OS << "C";
+      break;
+    };
+    OS << " (" << base << " - "
+       << (base ? base->getName() : "nullptr") << "): ";
   }
 
 private:
   Status status;
-  Value *base; // non null only if status == base
+  AssertingVH<Value> base; // non null only if status == base
 };
+}
 
-typedef DenseMap<Value *, PhiState> ConflictStateMapTy;
-// Values of type PhiState form a lattice, and this is a helper
+#ifndef NDEBUG
+static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
+  State.print(OS);
+  return OS;
+}
+#endif
+
+namespace {
+// Values of type BDVState form a lattice, and this is a helper
 // class that implementes the meet operation.  The meat of the meet
-// operation is implemented in MeetPhiStates::pureMeet
-class MeetPhiStates {
+// operation is implemented in MeetBDVStates::pureMeet
+class MeetBDVStates {
 public:
-  // phiStates is a mapping from PHINodes and SelectInst's to PhiStates.
-  explicit MeetPhiStates(const ConflictStateMapTy &phiStates)
-      : phiStates(phiStates) {}
+  /// Initializes the currentResult to the TOP state so that if can be met with
+  /// any other state to produce that state.
+  MeetBDVStates() {}
 
-  // Destructively meet the current result with the base V.  V can
-  // either be a merge instruction (SelectInst / PHINode), in which
-  // case its status is looked up in the phiStates map; or a regular
-  // SSA value, in which case it is assumed to be a base.
-  void meetWith(Value *V) {
-    PhiState otherState = getStateForBDV(V);
-    assert((MeetPhiStates::pureMeet(otherState, currentResult) ==
-            MeetPhiStates::pureMeet(currentResult, otherState)) &&
-           "math is wrong: meet does not commute!");
-    currentResult = MeetPhiStates::pureMeet(otherState, currentResult);
+  // Destructively meet the current result with the given BDVState
+  void meetWith(BDVState otherState) {
+    currentResult = meet(otherState, currentResult);
   }
 
-  PhiState getResult() const { return currentResult; }
+  BDVState getResult() const { return currentResult; }
 
 private:
-  const ConflictStateMapTy &phiStates;
-  PhiState currentResult;
+  BDVState currentResult;
 
-  /// Return a phi state for a base defining value.  We'll generate a new
-  /// base state for known bases and expect to find a cached state otherwise
-  PhiState getStateForBDV(Value *baseValue) {
-    if (isKnownBaseResult(baseValue)) {
-      return PhiState(baseValue);
-    } else {
-      return lookupFromMap(baseValue);
-    }
+  /// Perform a meet operation on two elements of the BDVState lattice.
+  static BDVState meet(BDVState LHS, BDVState RHS) {
+    assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) &&
+           "math is wrong: meet does not commute!");
+    BDVState Result = pureMeet(LHS, RHS);
+    DEBUG(dbgs() << "meet of " << LHS << " with " << RHS
+                 << " produced " << Result << "\n");
+    return Result;
   }
 
-  PhiState lookupFromMap(Value *V) {
-    auto I = phiStates.find(V);
-    assert(I != phiStates.end() && "lookup failed!");
-    return I->second;
-  }
-
-  static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) {
+  static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) {
     switch (stateA.getStatus()) {
-    case PhiState::Unknown:
+    case BDVState::Unknown:
       return stateB;
 
-    case PhiState::Base:
+    case BDVState::Base:
       assert(stateA.getBase() && "can't be null");
       if (stateB.isUnknown())
         return stateA;
@@ -681,18 +694,20 @@ private:
           assert(stateA == stateB && "equality broken!");
           return stateA;
         }
-        return PhiState(PhiState::Conflict);
+        return BDVState(BDVState::Conflict);
       }
       assert(stateB.isConflict() && "only three states!");
-      return PhiState(PhiState::Conflict);
+      return BDVState(BDVState::Conflict);
 
-    case PhiState::Conflict:
+    case BDVState::Conflict:
       return stateA;
     }
     llvm_unreachable("only three states!");
   }
 };
 }
+
+
 /// For a given value or instruction, figure out what base ptr it's derived
 /// from.  For gc objects, this is simply itself.  On success, returns a value
 /// which is the base pointer.  (This is reliable and can be used for
@@ -723,171 +738,252 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
   //
   // Note: A simpler form of this would be to add the conflict form of all
   // PHIs without running the optimistic algorithm.  This would be
-  // analougous to pessimistic data flow and would likely lead to an
+  // analogous to pessimistic data flow and would likely lead to an
   // overall worse solution.
 
-  ConflictStateMapTy states;
-  states[def] = PhiState();
-  // Recursively fill in all phis & selects reachable from the initial one
-  // for which we don't already know a definite base value for
-  // TODO: This should be rewritten with a worklist
-  bool done = false;
-  while (!done) {
-    done = true;
-    // Since we're adding elements to 'states' as we run, we can't keep
-    // iterators into the set.
-    SmallVector<Value *, 16> Keys;
-    Keys.reserve(states.size());
-    for (auto Pair : states) {
-      Value *V = Pair.first;
-      Keys.push_back(V);
-    }
-    for (Value *v : Keys) {
-      assert(!isKnownBaseResult(v) && "why did it get added?");
-      if (PHINode *phi = dyn_cast<PHINode>(v)) {
-        assert(phi->getNumIncomingValues() > 0 &&
-               "zero input phis are illegal");
-        for (Value *InVal : phi->incoming_values()) {
-          Value *local = findBaseOrBDV(InVal, cache);
-          if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
-            states[local] = PhiState();
-            done = false;
-          }
-        }
-      } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) {
-        Value *local = findBaseOrBDV(sel->getTrueValue(), cache);
-        if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
-          states[local] = PhiState();
-          done = false;
-        }
-        local = findBaseOrBDV(sel->getFalseValue(), cache);
-        if (!isKnownBaseResult(local) && states.find(local) == states.end()) {
-          states[local] = PhiState();
-          done = false;
-        }
+#ifndef NDEBUG
+  auto isExpectedBDVType = [](Value *BDV) {
+    return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
+           isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
+  };
+#endif
+
+  // Once populated, will contain a mapping from each potentially non-base BDV
+  // to a lattice value (described above) which corresponds to that BDV.
+  // We use the order of insertion (DFS over the def/use graph) to provide a
+  // stable deterministic ordering for visiting DenseMaps (which are unordered)
+  // below.  This is important for deterministic compilation.
+  MapVector<Value *, BDVState> States;
+
+  // Recursively fill in all base defining values reachable from the initial
+  // one for which we don't already know a definite base value for
+  /* scope */ {
+    SmallVector<Value*, 16> Worklist;
+    Worklist.push_back(def);
+    States.insert(std::make_pair(def, BDVState()));
+    while (!Worklist.empty()) {
+      Value *Current = Worklist.pop_back_val();
+      assert(!isKnownBaseResult(Current) && "why did it get added?");
+
+      auto visitIncomingValue = [&](Value *InVal) {
+        Value *Base = findBaseOrBDV(InVal, cache);
+        if (isKnownBaseResult(Base))
+          // Known bases won't need new instructions introduced and can be
+          // ignored safely
+          return;
+        assert(isExpectedBDVType(Base) && "the only non-base values "
+               "we see should be base defining values");
+        if (States.insert(std::make_pair(Base, BDVState())).second)
+          Worklist.push_back(Base);
+      };
+      if (PHINode *Phi = dyn_cast<PHINode>(Current)) {
+        for (Value *InVal : Phi->incoming_values())
+          visitIncomingValue(InVal);
+      } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) {
+        visitIncomingValue(Sel->getTrueValue());
+        visitIncomingValue(Sel->getFalseValue());
+      } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
+        visitIncomingValue(EE->getVectorOperand());
+      } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
+        visitIncomingValue(IE->getOperand(0)); // vector operand
+        visitIncomingValue(IE->getOperand(1)); // scalar operand
+      } else {
+        // There is one known class of instructions we know we don't handle.
+        assert(isa<ShuffleVectorInst>(Current));
+        llvm_unreachable("unimplemented instruction case");
       }
     }
   }
 
-  if (TraceLSP) {
-    errs() << "States after initialization:\n";
-    for (auto Pair : states) {
-      Instruction *v = cast<Instruction>(Pair.first);
-      PhiState state = Pair.second;
-      state.dump();
-      v->dump();
-    }
+#ifndef NDEBUG
+  DEBUG(dbgs() << "States after initialization:\n");
+  for (auto Pair : States) {
+    DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
   }
+#endif
 
-  // TODO: come back and revisit the state transitions around inputs which
-  // have reached conflict state.  The current version seems too conservative.
+  // Return a phi state for a base defining value.  We'll generate a new
+  // base state for known bases and expect to find a cached state otherwise.
+  auto getStateForBDV = [&](Value *baseValue) {
+    if (isKnownBaseResult(baseValue))
+      return BDVState(baseValue);
+    auto I = States.find(baseValue);
+    assert(I != States.end() && "lookup failed!");
+    return I->second;
+  };
 
   bool progress = true;
   while (progress) {
 #ifndef NDEBUG
-    size_t oldSize = states.size();
+    const size_t oldSize = States.size();
 #endif
     progress = false;
-    // We're only changing keys in this loop, thus safe to keep iterators
-    for (auto Pair : states) {
-      MeetPhiStates calculateMeet(states);
-      Value *v = Pair.first;
-      assert(!isKnownBaseResult(v) && "why did it get added?");
-      if (SelectInst *select = dyn_cast<SelectInst>(v)) {
-        calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache));
-        calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache));
-      } else
-        for (Value *Val : cast<PHINode>(v)->incoming_values())
-          calculateMeet.meetWith(findBaseOrBDV(Val, cache));
+    // We're only changing values in this loop, thus safe to keep iterators.
+    // Since this is computing a fixed point, the order of visit does not
+    // effect the result.  TODO: We could use a worklist here and make this run
+    // much faster.
+    for (auto Pair : States) {
+      Value *BDV = Pair.first;
+      assert(!isKnownBaseResult(BDV) && "why did it get added?");
 
-      PhiState oldState = states[v];
-      PhiState newState = calculateMeet.getResult();
+      // Given an input value for the current instruction, return a BDVState
+      // instance which represents the BDV of that value.
+      auto getStateForInput = [&](Value *V) mutable {
+        Value *BDV = findBaseOrBDV(V, cache);
+        return getStateForBDV(BDV);
+      };
+
+      MeetBDVStates calculateMeet;
+      if (SelectInst *select = dyn_cast<SelectInst>(BDV)) {
+        calculateMeet.meetWith(getStateForInput(select->getTrueValue()));
+        calculateMeet.meetWith(getStateForInput(select->getFalseValue()));
+      } else if (PHINode *Phi = dyn_cast<PHINode>(BDV)) {
+        for (Value *Val : Phi->incoming_values())
+          calculateMeet.meetWith(getStateForInput(Val));
+      } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
+        // The 'meet' for an extractelement is slightly trivial, but it's still
+        // useful in that it drives us to conflict if our input is.
+        calculateMeet.meetWith(getStateForInput(EE->getVectorOperand()));
+      } else {
+        // Given there's a inherent type mismatch between the operands, will
+        // *always* produce Conflict.
+        auto *IE = cast<InsertElementInst>(BDV);
+        calculateMeet.meetWith(getStateForInput(IE->getOperand(0)));
+        calculateMeet.meetWith(getStateForInput(IE->getOperand(1)));
+      }
+
+      BDVState oldState = States[BDV];
+      BDVState newState = calculateMeet.getResult();
       if (oldState != newState) {
         progress = true;
-        states[v] = newState;
+        States[BDV] = newState;
       }
     }
 
-    assert(oldSize <= states.size());
-    assert(oldSize == states.size() || progress);
+    assert(oldSize == States.size() &&
+           "fixed point shouldn't be adding any new nodes to state");
   }
 
-  if (TraceLSP) {
-    errs() << "States after meet iteration:\n";
-    for (auto Pair : states) {
-      Instruction *v = cast<Instruction>(Pair.first);
-      PhiState state = Pair.second;
-      state.dump();
-      v->dump();
-    }
+#ifndef NDEBUG
+  DEBUG(dbgs() << "States after meet iteration:\n");
+  for (auto Pair : States) {
+    DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
   }
-
+#endif
+  
   // Insert Phis for all conflicts
-  // We want to keep naming deterministic in the loop that follows, so
-  // sort the keys before iteration.  This is useful in allowing us to
-  // write stable tests. Note that there is no invalidation issue here.
-  SmallVector<Value *, 16> Keys;
-  Keys.reserve(states.size());
-  for (auto Pair : states) {
-    Value *V = Pair.first;
-    Keys.push_back(V);
-  }
-  std::sort(Keys.begin(), Keys.end(), order_by_name);
   // TODO: adjust naming patterns to avoid this order of iteration dependency
-  for (Value *V : Keys) {
-    Instruction *v = cast<Instruction>(V);
-    PhiState state = states[V];
-    assert(!isKnownBaseResult(v) && "why did it get added?");
-    assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
-    if (!state.isConflict())
+  for (auto Pair : States) {
+    Instruction *I = cast<Instruction>(Pair.first);
+    BDVState State = Pair.second;
+    assert(!isKnownBaseResult(I) && "why did it get added?");
+    assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+
+    // extractelement instructions are a bit special in that we may need to
+    // insert an extract even when we know an exact base for the instruction.
+    // The problem is that we need to convert from a vector base to a scalar
+    // base for the particular indice we're interested in.
+    if (State.isBase() && isa<ExtractElementInst>(I) &&
+        isa<VectorType>(State.getBase()->getType())) {
+      auto *EE = cast<ExtractElementInst>(I);
+      // TODO: In many cases, the new instruction is just EE itself.  We should
+      // exploit this, but can't do it here since it would break the invariant
+      // about the BDV not being known to be a base.
+      auto *BaseInst = ExtractElementInst::Create(State.getBase(),
+                                                  EE->getIndexOperand(),
+                                                  "base_ee", EE);
+      BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+      States[I] = BDVState(BDVState::Base, BaseInst);
+    }
+
+    // Since we're joining a vector and scalar base, they can never be the
+    // same.  As a result, we should always see insert element having reached
+    // the conflict state.
+    if (isa<InsertElementInst>(I)) {
+      assert(State.isConflict());
+    }
+    
+    if (!State.isConflict())
       continue;
 
-    if (isa<PHINode>(v)) {
-      int num_preds =
-          std::distance(pred_begin(v->getParent()), pred_end(v->getParent()));
-      assert(num_preds > 0 && "how did we reach here");
-      PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v);
-      // Add metadata marking this as a base value
-      auto *const_1 = ConstantInt::get(
-          Type::getInt32Ty(
-              v->getParent()->getParent()->getParent()->getContext()),
-          1);
-      auto MDConst = ConstantAsMetadata::get(const_1);
-      MDNode *md = MDNode::get(
-          v->getParent()->getParent()->getParent()->getContext(), MDConst);
-      phi->setMetadata("is_base_value", md);
-      states[v] = PhiState(PhiState::Conflict, phi);
-    } else {
-      SelectInst *sel = cast<SelectInst>(v);
-      // The undef will be replaced later
-      UndefValue *undef = UndefValue::get(sel->getType());
-      SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef,
-                                               undef, "base_select", sel);
-      // Add metadata marking this as a base value
-      auto *const_1 = ConstantInt::get(
-          Type::getInt32Ty(
-              v->getParent()->getParent()->getParent()->getContext()),
-          1);
-      auto MDConst = ConstantAsMetadata::get(const_1);
-      MDNode *md = MDNode::get(
-          v->getParent()->getParent()->getParent()->getContext(), MDConst);
-      basesel->setMetadata("is_base_value", md);
-      states[v] = PhiState(PhiState::Conflict, basesel);
-    }
+    /// Create and insert a new instruction which will represent the base of
+    /// the given instruction 'I'.
+    auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
+      if (isa<PHINode>(I)) {
+        BasicBlock *BB = I->getParent();
+        int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
+        assert(NumPreds > 0 && "how did we reach here");
+        std::string Name = suffixed_name_or(I, ".base", "base_phi");
+        return PHINode::Create(I->getType(), NumPreds, Name, I);
+      } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) {
+        // The undef will be replaced later
+        UndefValue *Undef = UndefValue::get(Sel->getType());
+        std::string Name = suffixed_name_or(I, ".base", "base_select");
+        return SelectInst::Create(Sel->getCondition(), Undef,
+                                  Undef, Name, Sel);
+      } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
+        UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
+        std::string Name = suffixed_name_or(I, ".base", "base_ee");
+        return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
+                                          EE);
+      } else {
+        auto *IE = cast<InsertElementInst>(I);
+        UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
+        UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
+        std::string Name = suffixed_name_or(I, ".base", "base_ie");
+        return InsertElementInst::Create(VecUndef, ScalarUndef,
+                                         IE->getOperand(2), Name, IE);
+      }
+
+    };
+    Instruction *BaseInst = MakeBaseInstPlaceholder(I);
+    // Add metadata marking this as a base value
+    BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
+    States[I] = BDVState(BDVState::Conflict, BaseInst);
   }
 
-  // Fixup all the inputs of the new PHIs
-  for (auto Pair : states) {
-    Instruction *v = cast<Instruction>(Pair.first);
-    PhiState state = Pair.second;
+  // Returns a instruction which produces the base pointer for a given
+  // instruction.  The instruction is assumed to be an input to one of the BDVs
+  // seen in the inference algorithm above.  As such, we must either already
+  // know it's base defining value is a base, or have inserted a new
+  // instruction to propagate the base of it's BDV and have entered that newly
+  // introduced instruction into the state table.  In either case, we are
+  // assured to be able to determine an instruction which produces it's base
+  // pointer. 
+  auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
+    Value *BDV = findBaseOrBDV(Input, cache);
+    Value *Base = nullptr;
+    if (isKnownBaseResult(BDV)) {
+      Base = BDV;
+    } else {
+      // Either conflict or base.
+      assert(States.count(BDV));
+      Base = States[BDV].getBase();
+    }
+    assert(Base && "can't be null");
+    // The cast is needed since base traversal may strip away bitcasts
+    if (Base->getType() != Input->getType() &&
+        InsertPt) {
+      Base = new BitCastInst(Base, Input->getType(), "cast",
+                             InsertPt);
+    }
+    return Base;
+  };
 
-    assert(!isKnownBaseResult(v) && "why did it get added?");
-    assert(!state.isUnknown() && "Optimistic algorithm didn't complete!");
-    if (!state.isConflict())
+  // Fixup all the inputs of the new PHIs.  Visit order needs to be
+  // deterministic and predictable because we're naming newly created
+  // instructions.
+  for (auto Pair : States) {
+    Instruction *BDV = cast<Instruction>(Pair.first);
+    BDVState State = Pair.second;
+
+    assert(!isKnownBaseResult(BDV) && "why did it get added?");
+    assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
+    if (!State.isConflict())
       continue;
 
-    if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) {
-      PHINode *phi = cast<PHINode>(v);
+    if (PHINode *basephi = dyn_cast<PHINode>(State.getBase())) {
+      PHINode *phi = cast<PHINode>(BDV);
       unsigned NumPHIValues = phi->getNumIncomingValues();
       for (unsigned i = 0; i < NumPHIValues; i++) {
         Value *InVal = phi->getIncomingValue(i);
@@ -906,104 +1002,145 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
         if (blockIndex != -1) {
           Value *oldBase = basephi->getIncomingValue(blockIndex);
           basephi->addIncoming(oldBase, InBB);
+          
 #ifndef NDEBUG
-          Value *base = findBaseOrBDV(InVal, cache);
-          if (!isKnownBaseResult(base)) {
-            // Either conflict or base.
-            assert(states.count(base));
-            base = states[base].getBase();
-            assert(base != nullptr && "unknown PhiState!");
-          }
-
-          // In essense this assert states: the only way two
+          Value *Base = getBaseForInput(InVal, nullptr);
+          // In essence this assert states: the only way two
           // values incoming from the same basic block may be
           // different is by being different bitcasts of the same
           // value.  A cleanup that remains TODO is changing
           // findBaseOrBDV to return an llvm::Value of the correct
           // type (and still remain pure).  This will remove the
           // need to add bitcasts.
-          assert(base->stripPointerCasts() == oldBase->stripPointerCasts() &&
+          assert(Base->stripPointerCasts() == oldBase->stripPointerCasts() &&
                  "sanity -- findBaseOrBDV should be pure!");
 #endif
           continue;
         }
 
-        // Find either the defining value for the PHI or the normal base for
-        // a non-phi node
-        Value *base = findBaseOrBDV(InVal, cache);
-        if (!isKnownBaseResult(base)) {
-          // Either conflict or base.
-          assert(states.count(base));
-          base = states[base].getBase();
-          assert(base != nullptr && "unknown PhiState!");
-        }
-        assert(base && "can't be null");
-        // Must use original input BB since base may not be Instruction
-        // The cast is needed since base traversal may strip away bitcasts
-        if (base->getType() != basephi->getType()) {
-          base = new BitCastInst(base, basephi->getType(), "cast",
-                                 InBB->getTerminator());
-        }
-        basephi->addIncoming(base, InBB);
+        // Find the instruction which produces the base for each input.  We may
+        // need to insert a bitcast in the incoming block.
+        // TODO: Need to split critical edges if insertion is needed
+        Value *Base = getBaseForInput(InVal, InBB->getTerminator());
+        basephi->addIncoming(Base, InBB);
       }
       assert(basephi->getNumIncomingValues() == NumPHIValues);
-    } else {
-      SelectInst *basesel = cast<SelectInst>(state.getBase());
-      SelectInst *sel = cast<SelectInst>(v);
+    } else if (SelectInst *BaseSel = dyn_cast<SelectInst>(State.getBase())) {
+      SelectInst *Sel = cast<SelectInst>(BDV);
       // Operand 1 & 2 are true, false path respectively. TODO: refactor to
       // something more safe and less hacky.
       for (int i = 1; i <= 2; i++) {
-        Value *InVal = sel->getOperand(i);
-        // Find either the defining value for the PHI or the normal base for
-        // a non-phi node
-        Value *base = findBaseOrBDV(InVal, cache);
-        if (!isKnownBaseResult(base)) {
-          // Either conflict or base.
-          assert(states.count(base));
-          base = states[base].getBase();
-          assert(base != nullptr && "unknown PhiState!");
-        }
-        assert(base && "can't be null");
-        // Must use original input BB since base may not be Instruction
-        // The cast is needed since base traversal may strip away bitcasts
-        if (base->getType() != basesel->getType()) {
-          base = new BitCastInst(base, basesel->getType(), "cast", basesel);
-        }
-        basesel->setOperand(i, base);
+        Value *InVal = Sel->getOperand(i);
+        // Find the instruction which produces the base for each input.  We may
+        // need to insert a bitcast.
+        Value *Base = getBaseForInput(InVal, BaseSel);
+        BaseSel->setOperand(i, Base);
       }
+    } else if (auto *BaseEE = dyn_cast<ExtractElementInst>(State.getBase())) {
+      Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
+      // Find the instruction which produces the base for each input.  We may
+      // need to insert a bitcast.
+      Value *Base = getBaseForInput(InVal, BaseEE);
+      BaseEE->setOperand(0, Base);
+    } else {
+      auto *BaseIE = cast<InsertElementInst>(State.getBase());
+      auto *BdvIE = cast<InsertElementInst>(BDV);
+      auto UpdateOperand = [&](int OperandIdx) {
+        Value *InVal = BdvIE->getOperand(OperandIdx);
+        Value *Base = getBaseForInput(InVal, BaseIE);
+        BaseIE->setOperand(OperandIdx, Base);
+      };
+      UpdateOperand(0); // vector operand
+      UpdateOperand(1); // scalar operand
+    }
+
+  }
+
+  // Now that we're done with the algorithm, see if we can optimize the 
+  // results slightly by reducing the number of new instructions needed. 
+  // Arguably, this should be integrated into the algorithm above, but 
+  // doing as a post process step is easier to reason about for the moment.
+  DenseMap<Value *, Value *> ReverseMap;
+  SmallPtrSet<Instruction *, 16> NewInsts;
+  SmallSetVector<AssertingVH<Instruction>, 16> Worklist;
+  // Note: We need to visit the states in a deterministic order.  We uses the
+  // Keys we sorted above for this purpose.  Note that we are papering over a
+  // bigger problem with the algorithm above - it's visit order is not
+  // deterministic.  A larger change is needed to fix this.
+  for (auto Pair : States) {
+    auto *BDV = Pair.first;
+    auto State = Pair.second;
+    Value *Base = State.getBase();
+    assert(BDV && Base);
+    assert(!isKnownBaseResult(BDV) && "why did it get added?");
+    assert(isKnownBaseResult(Base) &&
+           "must be something we 'know' is a base pointer");
+    if (!State.isConflict())
+      continue;
+
+    ReverseMap[Base] = BDV;
+    if (auto *BaseI = dyn_cast<Instruction>(Base)) {
+      NewInsts.insert(BaseI);
+      Worklist.insert(BaseI);
+    }
+  }
+  auto ReplaceBaseInstWith = [&](Value *BDV, Instruction *BaseI,
+                                 Value *Replacement) {
+    // Add users which are new instructions (excluding self references)
+    for (User *U : BaseI->users())
+      if (auto *UI = dyn_cast<Instruction>(U))
+        if (NewInsts.count(UI) && UI != BaseI)
+          Worklist.insert(UI);
+    // Then do the actual replacement
+    NewInsts.erase(BaseI);
+    ReverseMap.erase(BaseI);
+    BaseI->replaceAllUsesWith(Replacement);
+    assert(States.count(BDV));
+    assert(States[BDV].isConflict() && States[BDV].getBase() == BaseI);
+    States[BDV] = BDVState(BDVState::Conflict, Replacement);
+    BaseI->eraseFromParent();
+  };
+  const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout();
+  while (!Worklist.empty()) {
+    Instruction *BaseI = Worklist.pop_back_val();
+    assert(NewInsts.count(BaseI));
+    Value *Bdv = ReverseMap[BaseI];
+    if (auto *BdvI = dyn_cast<Instruction>(Bdv))
+      if (BaseI->isIdenticalTo(BdvI)) {
+        DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n");
+        ReplaceBaseInstWith(Bdv, BaseI, Bdv);
+        continue;
+      }
+    if (Value *V = SimplifyInstruction(BaseI, DL)) {
+      DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n");
+      ReplaceBaseInstWith(Bdv, BaseI, V);
+      continue;
     }
   }
 
   // Cache all of our results so we can cheaply reuse them
   // NOTE: This is actually two caches: one of the base defining value
   // relation and one of the base pointer relation!  FIXME
-  for (auto item : states) {
-    Value *v = item.first;
-    Value *base = item.second.getBase();
-    assert(v && base);
-    assert(!isKnownBaseResult(v) && "why did it get added?");
+  for (auto Pair : States) {
+    auto *BDV = Pair.first;
+    Value *base = Pair.second.getBase();
+    assert(BDV && base);
 
-    if (TraceLSP) {
-      std::string fromstr =
-          cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "")
-                         : "none";
-      errs() << "Updating base value cache"
-             << " for: " << (v->hasName() ? v->getName() : "")
-             << " from: " << fromstr
-             << " to: " << (base->hasName() ? base->getName() : "") << "\n";
-    }
+    std::string fromstr = cache.count(BDV) ? cache[BDV]->getName() : "none";
+    DEBUG(dbgs() << "Updating base value cache"
+          << " for: " << BDV->getName()
+          << " from: " << fromstr
+          << " to: " << base->getName() << "\n");
 
-    assert(isKnownBaseResult(base) &&
-           "must be something we 'know' is a base pointer");
-    if (cache.count(v)) {
+    if (cache.count(BDV)) {
       // Once we transition from the BDV relation being store in the cache to
       // the base relation being stored, it must be stable
-      assert((!isKnownBaseResult(cache[v]) || cache[v] == base) &&
+      assert((!isKnownBaseResult(cache[BDV]) || cache[BDV] == base) &&
              "base relation should be stable");
     }
-    cache[v] = base;
+    cache[BDV] = base;
   }
-  assert(cache.find(def) != cache.end());
+  assert(cache.count(def));
   return cache[def];
 }
 
@@ -1024,7 +1161,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) {
 // pointer was a base pointer.
 static void
 findBasePointers(const StatepointLiveSetTy &live,
-                 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase,
+                 DenseMap<Value *, Value *> &PointerToBase,
                  DominatorTree *DT, DefiningValueMapTy &DVCache) {
   // For the naming of values inserted to be deterministic - which makes for
   // much cleaner and more stable tests - we need to assign an order to the
@@ -1043,7 +1180,7 @@ findBasePointers(const StatepointLiveSetTy &live,
 
     // If you see this trip and like to live really dangerously, the code should
     // be correct, just with idioms the verifier can't handle.  You can try
-    // disabling the verifier at your own substaintial risk.
+    // disabling the verifier at your own substantial risk.
     assert(!isa<ConstantPointerNull>(base) &&
            "the relocation code needs adjustment to handle the relocation of "
            "a null pointer constant without causing false positives in the "
@@ -1056,8 +1193,8 @@ findBasePointers(const StatepointLiveSetTy &live,
 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
                              const CallSite &CS,
                              PartiallyConstructedSafepointRecord &result) {
-  DenseMap<llvm::Value *, llvm::Value *> PointerToBase;
-  findBasePointers(result.liveset, PointerToBase, &DT, DVCache);
+  DenseMap<Value *, Value *> PointerToBase;
+  findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
 
   if (PrintBasePointers) {
     // Note: Need to print these in a stable order since this is checked in
@@ -1071,8 +1208,11 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
     std::sort(Temp.begin(), Temp.end(), order_by_name);
     for (Value *Ptr : Temp) {
       Value *Base = PointerToBase[Ptr];
-      errs() << " derived %" << Ptr->getName() << " base %" << Base->getName()
-             << "\n";
+      errs() << " derived ";
+      Ptr->printAsOperand(errs(), false);
+      errs() << " base ";
+      Base->printAsOperand(errs(), false);
+      errs() << "\n";;
     }
   }
 
@@ -1086,10 +1226,10 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
                                   PartiallyConstructedSafepointRecord &result);
 
 static void recomputeLiveInValues(
-    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
   // TODO-PERF: reuse the original liveness, then simply run the dataflow
-  // again.  The old values are still live and will help it stablize quickly.
+  // again.  The old values are still live and will help it stabilize quickly.
   GCPtrLivenessData RevisedLivenessData;
   computeLiveInValues(DT, F, RevisedLivenessData);
   for (size_t i = 0; i < records.size(); i++) {
@@ -1099,69 +1239,66 @@ static void recomputeLiveInValues(
   }
 }
 
-// When inserting gc.relocate calls, we need to ensure there are no uses
-// of the original value between the gc.statepoint and the gc.relocate call.
-// One case which can arise is a phi node starting one of the successor blocks.
-// We also need to be able to insert the gc.relocates only on the path which
-// goes through the statepoint.  We might need to split an edge to make this
-// possible.
+// When inserting gc.relocate and gc.result calls, we need to ensure there are
+// no uses of the original value / return value between the gc.statepoint and
+// the gc.relocate / gc.result call.  One case which can arise is a phi node
+// starting one of the successor blocks.  We also need to be able to insert the
+// gc.relocates only on the path which goes through the statepoint.  We might
+// need to split an edge to make this possible.
 static BasicBlock *
 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
                             DominatorTree &DT) {
   BasicBlock *Ret = BB;
-  if (!BB->getUniquePredecessor()) {
-    Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, &DT);
-  }
+  if (!BB->getUniquePredecessor())
+    Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
 
-  // Now that 'ret' has unique predecessor we can safely remove all phi nodes
+  // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
   // from it
   FoldSingleEntryPHINodes(Ret);
-  assert(!isa<PHINode>(Ret->begin()));
+  assert(!isa<PHINode>(Ret->begin()) &&
+         "All PHI nodes should have been removed!");
 
-  // At this point, we can safely insert a gc.relocate as the first instruction
-  // in Ret if needed.
+  // At this point, we can safely insert a gc.relocate or gc.result as the first
+  // instruction in Ret if needed.
   return Ret;
 }
 
-static int find_index(ArrayRef<Value *> livevec, Value *val) {
-  auto itr = std::find(livevec.begin(), livevec.end(), val);
-  assert(livevec.end() != itr);
-  size_t index = std::distance(livevec.begin(), itr);
-  assert(index < livevec.size());
-  return index;
-}
-
-// Create new attribute set containing only attributes which can be transfered
+// Create new attribute set containing only attributes which can be transferred
 // from original call to the safepoint.
 static AttributeSet legalizeCallAttributes(AttributeSet AS) {
-  AttributeSet ret;
+  AttributeSet Ret;
 
   for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) {
-    unsigned index = AS.getSlotIndex(Slot);
+    unsigned Index = AS.getSlotIndex(Slot);
 
-    if (index == AttributeSet::ReturnIndex ||
-        index == AttributeSet::FunctionIndex) {
+    if (Index == AttributeSet::ReturnIndex ||
+        Index == AttributeSet::FunctionIndex) {
 
-      for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end;
-           ++it) {
-        Attribute attr = *it;
+      for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) {
 
         // Do not allow certain attributes - just skip them
         // Safepoint can not be read only or read none.
-        if (attr.hasAttribute(Attribute::ReadNone) ||
-            attr.hasAttribute(Attribute::ReadOnly))
+        if (Attr.hasAttribute(Attribute::ReadNone) ||
+            Attr.hasAttribute(Attribute::ReadOnly))
           continue;
 
-        ret = ret.addAttributes(
-            AS.getContext(), index,
-            AttributeSet::get(AS.getContext(), index, AttrBuilder(attr)));
+        // These attributes control the generation of the gc.statepoint call /
+        // invoke itself; and once the gc.statepoint is in place, they're of no
+        // use.
+        if (Attr.hasAttribute("statepoint-num-patch-bytes") ||
+            Attr.hasAttribute("statepoint-id"))
+          continue;
+
+        Ret = Ret.addAttributes(
+            AS.getContext(), Index,
+            AttributeSet::get(AS.getContext(), Index, AttrBuilder(Attr)));
       }
     }
 
     // Just skip parameter attributes for now
   }
 
-  return ret;
+  return Ret;
 }
 
 /// Helper function to place all gc relocates necessary for the given
@@ -1173,225 +1310,290 @@ static AttributeSet legalizeCallAttributes(AttributeSet AS) {
 ///   statepointToken - statepoint instruction to which relocates should be
 ///   bound.
 ///   Builder - Llvm IR builder to be used to construct new calls.
-static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables,
+static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
                               const int LiveStart,
-                              ArrayRef<llvm::Value *> BasePtrs,
+                              ArrayRef<Value *> BasePtrs,
                               Instruction *StatepointToken,
                               IRBuilder<> Builder) {
-  SmallVector<Instruction *, 64> NewDefs;
-  NewDefs.reserve(LiveVariables.size());
+  if (LiveVariables.empty())
+    return;
 
-  Module *M = StatepointToken->getParent()->getParent()->getParent();
+  auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
+    auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
+    assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
+    size_t Index = std::distance(LiveVec.begin(), ValIt);
+    assert(Index < LiveVec.size() && "Bug in std::find?");
+    return Index;
+  };
+
+  // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
+  // unique declarations for each pointer type, but this proved problematic
+  // because the intrinsic mangling code is incomplete and fragile.  Since
+  // we're moving towards a single unified pointer type anyways, we can just
+  // cast everything to an i8* of the right address space.  A bitcast is added
+  // later to convert gc_relocate to the actual value's type. 
+  Module *M = StatepointToken->getModule();
+  auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
+  Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
+  Value *GCRelocateDecl =
+    Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
 
   for (unsigned i = 0; i < LiveVariables.size(); i++) {
-    // We generate a (potentially) unique declaration for every pointer type
-    // combination.  This results is some blow up the function declarations in
-    // the IR, but removes the need for argument bitcasts which shrinks the IR
-    // greatly and makes it much more readable.
-    SmallVector<Type *, 1> Types;                 // one per 'any' type
-    // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid
-    // cases where the actual value's type mangling is not supported by llvm. A
-    // bitcast is added later to convert gc_relocate to the actual value's type.
-    Types.push_back(Type::getInt8PtrTy(M->getContext(), 1));
-    Value *GCRelocateDecl = Intrinsic::getDeclaration(
-        M, Intrinsic::experimental_gc_relocate, Types);
-
     // Generate the gc.relocate call and save the result
     Value *BaseIdx =
-        ConstantInt::get(Type::getInt32Ty(M->getContext()),
-                         LiveStart + find_index(LiveVariables, BasePtrs[i]));
-    Value *LiveIdx = ConstantInt::get(
-        Type::getInt32Ty(M->getContext()),
-        LiveStart + find_index(LiveVariables, LiveVariables[i]));
+      Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
+    Value *LiveIdx = Builder.getInt32(LiveStart + i);
 
     // only specify a debug name if we can give a useful one
-    Value *Reloc = Builder.CreateCall(
+    CallInst *Reloc = Builder.CreateCall(
         GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
-        LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated"
-                                    : "");
+        suffixed_name_or(LiveVariables[i], ".relocated", ""));
     // Trick CodeGen into thinking there are lots of free registers at this
     // fake call.
-    cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold);
-
-    NewDefs.push_back(cast<Instruction>(Reloc));
+    Reloc->setCallingConv(CallingConv::Cold);
   }
-  assert(NewDefs.size() == LiveVariables.size() &&
-         "missing or extra redefinition at safepoint");
+}
+
+namespace {
+
+/// This struct is used to defer RAUWs and `eraseFromParent` s.  Using this
+/// avoids having to worry about keeping around dangling pointers to Values.
+class DeferredReplacement {
+  AssertingVH<Instruction> Old;
+  AssertingVH<Instruction> New;
+
+public:
+  explicit DeferredReplacement(Instruction *Old, Instruction *New) :
+    Old(Old), New(New) {
+    assert(Old != New && "Not allowed!");
+  }
+
+  /// Does the task represented by this instance.
+  void doReplacement() {
+    Instruction *OldI = Old;
+    Instruction *NewI = New;
+
+    assert(OldI != NewI && "Disallowed at construction?!");
+
+    Old = nullptr;
+    New = nullptr;
+
+    if (NewI)
+      OldI->replaceAllUsesWith(NewI);
+    OldI->eraseFromParent();
+  }
+};
 }
 
 static void
-makeStatepointExplicitImpl(const CallSite &CS, /* to replace */
-                           const SmallVectorImpl<llvm::Value *> &basePtrs,
-                           const SmallVectorImpl<llvm::Value *> &liveVariables,
-                           Pass *P,
-                           PartiallyConstructedSafepointRecord &result) {
-  assert(basePtrs.size() == liveVariables.size());
-  assert(isStatepoint(CS) &&
+makeStatepointExplicitImpl(const CallSite CS, /* to replace */
+                           const SmallVectorImpl<Value *> &BasePtrs,
+                           const SmallVectorImpl<Value *> &LiveVariables,
+                           PartiallyConstructedSafepointRecord &Result,
+                           std::vector<DeferredReplacement> &Replacements) {
+  assert(BasePtrs.size() == LiveVariables.size());
+  assert((UseDeoptBundles || isStatepoint(CS)) &&
          "This method expects to be rewriting a statepoint");
 
-  BasicBlock *BB = CS.getInstruction()->getParent();
-  assert(BB);
-  Function *F = BB->getParent();
-  assert(F && "must be set");
-  Module *M = F->getParent();
-  (void)M;
-  assert(M && "must be set");
-
-  // We're not changing the function signature of the statepoint since the gc
-  // arguments go into the var args section.
-  Function *gc_statepoint_decl = CS.getCalledFunction();
-
   // Then go ahead and use the builder do actually do the inserts.  We insert
   // immediately before the previous instruction under the assumption that all
   // arguments will be available here.  We can't insert afterwards since we may
   // be replacing a terminator.
-  Instruction *insertBefore = CS.getInstruction();
-  IRBuilder<> Builder(insertBefore);
-  // Copy all of the arguments from the original statepoint - this includes the
-  // target, call args, and deopt args
-  SmallVector<llvm::Value *, 64> args;
-  args.insert(args.end(), CS.arg_begin(), CS.arg_end());
-  // TODO: Clear the 'needs rewrite' flag
+  Instruction *InsertBefore = CS.getInstruction();
+  IRBuilder<> Builder(InsertBefore);
 
-  // add all the pointers to be relocated (gc arguments)
-  // Capture the start of the live variable list for use in the gc_relocates
-  const int live_start = args.size();
-  args.insert(args.end(), liveVariables.begin(), liveVariables.end());
+  ArrayRef<Value *> GCArgs(LiveVariables);
+  uint64_t StatepointID = 0xABCDEF00;
+  uint32_t NumPatchBytes = 0;
+  uint32_t Flags = uint32_t(StatepointFlags::None);
+
+  ArrayRef<Use> CallArgs;
+  ArrayRef<Use> DeoptArgs;
+  ArrayRef<Use> TransitionArgs;
+
+  Value *CallTarget = nullptr;
+
+  if (UseDeoptBundles) {
+    CallArgs = {CS.arg_begin(), CS.arg_end()};
+    DeoptArgs = GetDeoptBundleOperands(CS);
+    // TODO: we don't fill in TransitionArgs or Flags in this branch, but we
+    // could have an operand bundle for that too.
+    AttributeSet OriginalAttrs = CS.getAttributes();
+
+    Attribute AttrID = OriginalAttrs.getAttribute(AttributeSet::FunctionIndex,
+                                                  "statepoint-id");
+    if (AttrID.isStringAttribute())
+      AttrID.getValueAsString().getAsInteger(10, StatepointID);
+
+    Attribute AttrNumPatchBytes = OriginalAttrs.getAttribute(
+        AttributeSet::FunctionIndex, "statepoint-num-patch-bytes");
+    if (AttrNumPatchBytes.isStringAttribute())
+      AttrNumPatchBytes.getValueAsString().getAsInteger(10, NumPatchBytes);
+
+    CallTarget = CS.getCalledValue();
+  } else {
+    // This branch will be gone soon, and we will soon only support the
+    // UseDeoptBundles == true configuration.
+    Statepoint OldSP(CS);
+    StatepointID = OldSP.getID();
+    NumPatchBytes = OldSP.getNumPatchBytes();
+    Flags = OldSP.getFlags();
+
+    CallArgs = {OldSP.arg_begin(), OldSP.arg_end()};
+    DeoptArgs = {OldSP.vm_state_begin(), OldSP.vm_state_end()};
+    TransitionArgs = {OldSP.gc_transition_args_begin(),
+                      OldSP.gc_transition_args_end()};
+    CallTarget = OldSP.getCalledValue();
+  }
 
   // Create the statepoint given all the arguments
-  Instruction *token = nullptr;
-  AttributeSet return_attributes;
+  Instruction *Token = nullptr;
+  AttributeSet ReturnAttrs;
   if (CS.isCall()) {
-    CallInst *toReplace = cast<CallInst>(CS.getInstruction());
-    CallInst *call =
-        Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token");
-    call->setTailCall(toReplace->isTailCall());
-    call->setCallingConv(toReplace->getCallingConv());
+    CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
+    CallInst *Call = Builder.CreateGCStatepointCall(
+        StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
+        TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
+
+    Call->setTailCall(ToReplace->isTailCall());
+    Call->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
     // function attributes.
-    AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
-    // In case if we can handle this set of sttributes - set up function attrs
+    AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+    // In case if we can handle this set of attributes - set up function attrs
     // directly on statepoint and return attrs later for gc_result intrinsic.
-    call->setAttributes(new_attrs.getFnAttributes());
-    return_attributes = new_attrs.getRetAttributes();
+    Call->setAttributes(NewAttrs.getFnAttributes());
+    ReturnAttrs = NewAttrs.getRetAttributes();
 
-    token = call;
+    Token = Call;
 
     // Put the following gc_result and gc_relocate calls immediately after the
     // the old call (which we're about to delete)
-    BasicBlock::iterator next(toReplace);
-    assert(BB->end() != next && "not a terminator, must have next");
-    next++;
-    Instruction *IP = &*(next);
-    Builder.SetInsertPoint(IP);
-    Builder.SetCurrentDebugLocation(IP->getDebugLoc());
-
+    assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
+    Builder.SetInsertPoint(ToReplace->getNextNode());
+    Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
   } else {
-    InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction());
+    InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
 
     // Insert the new invoke into the old block.  We'll remove the old one in a
     // moment at which point this will become the new terminator for the
     // original block.
-    InvokeInst *invoke = InvokeInst::Create(
-        gc_statepoint_decl, toReplace->getNormalDest(),
-        toReplace->getUnwindDest(), args, "", toReplace->getParent());
-    invoke->setCallingConv(toReplace->getCallingConv());
+    InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
+        StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
+        ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
+        GCArgs, "statepoint_token");
+
+    Invoke->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
     // function attributes.
-    AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes());
-    // In case if we can handle this set of sttributes - set up function attrs
+    AttributeSet NewAttrs = legalizeCallAttributes(ToReplace->getAttributes());
+    // In case if we can handle this set of attributes - set up function attrs
     // directly on statepoint and return attrs later for gc_result intrinsic.
-    invoke->setAttributes(new_attrs.getFnAttributes());
-    return_attributes = new_attrs.getRetAttributes();
+    Invoke->setAttributes(NewAttrs.getFnAttributes());
+    ReturnAttrs = NewAttrs.getRetAttributes();
 
-    token = invoke;
+    Token = Invoke;
 
     // Generate gc relocates in exceptional path
-    BasicBlock *unwindBlock = toReplace->getUnwindDest();
-    assert(!isa<PHINode>(unwindBlock->begin()) &&
-           unwindBlock->getUniquePredecessor() &&
+    BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
+    assert(!isa<PHINode>(UnwindBlock->begin()) &&
+           UnwindBlock->getUniquePredecessor() &&
            "can't safely insert in this block!");
 
-    Instruction *IP = &*(unwindBlock->getFirstInsertionPt());
-    Builder.SetInsertPoint(IP);
-    Builder.SetCurrentDebugLocation(toReplace->getDebugLoc());
+    Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
+    Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
 
-    // Extract second element from landingpad return value. We will attach
-    // exceptional gc relocates to it.
-    const unsigned idx = 1;
-    Instruction *exceptional_token =
-        cast<Instruction>(Builder.CreateExtractValue(
-            unwindBlock->getLandingPadInst(), idx, "relocate_token"));
-    result.UnwindToken = exceptional_token;
+    // Attach exceptional gc relocates to the landingpad.
+    Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
+    Result.UnwindToken = ExceptionalToken;
 
-    // Just throw away return value. We will use the one we got for normal
-    // block.
-    (void)CreateGCRelocates(liveVariables, live_start, basePtrs,
-                            exceptional_token, Builder);
+    const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+    CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
+                      Builder);
 
     // Generate gc relocates and returns for normal block
-    BasicBlock *normalDest = toReplace->getNormalDest();
-    assert(!isa<PHINode>(normalDest->begin()) &&
-           normalDest->getUniquePredecessor() &&
+    BasicBlock *NormalDest = ToReplace->getNormalDest();
+    assert(!isa<PHINode>(NormalDest->begin()) &&
+           NormalDest->getUniquePredecessor() &&
            "can't safely insert in this block!");
 
-    IP = &*(normalDest->getFirstInsertionPt());
-    Builder.SetInsertPoint(IP);
+    Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
 
     // gc relocates will be generated later as if it were regular call
     // statepoint
   }
-  assert(token);
+  assert(Token && "Should be set in one of the above branches!");
 
-  // Take the name of the original value call if it had one.
-  token->takeName(CS.getInstruction());
+  if (UseDeoptBundles) {
+    Token->setName("statepoint_token");
+    if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
+      StringRef Name =
+          CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
+      CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
+      GCResult->setAttributes(CS.getAttributes().getRetAttributes());
 
-// The GCResult is already inserted, we just need to find it
-#ifndef NDEBUG
-  Instruction *toReplace = CS.getInstruction();
-  assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) &&
-         "only valid use before rewrite is gc.result");
-  assert(!toReplace->hasOneUse() ||
-         isGCResult(cast<Instruction>(*toReplace->user_begin())));
-#endif
+      // We cannot RAUW or delete CS.getInstruction() because it could be in the
+      // live set of some other safepoint, in which case that safepoint's
+      // PartiallyConstructedSafepointRecord will hold a raw pointer to this
+      // llvm::Instruction.  Instead, we defer the replacement and deletion to
+      // after the live sets have been made explicit in the IR, and we no longer
+      // have raw pointers to worry about.
+      Replacements.emplace_back(CS.getInstruction(), GCResult);
+    } else {
+      Replacements.emplace_back(CS.getInstruction(), nullptr);
+    }
+  } else {
+    assert(!CS.getInstruction()->hasNUsesOrMore(2) &&
+           "only valid use before rewrite is gc.result");
+    assert(!CS.getInstruction()->hasOneUse() ||
+           isGCResult(cast<Instruction>(*CS.getInstruction()->user_begin())));
 
-  // Update the gc.result of the original statepoint (if any) to use the newly
-  // inserted statepoint.  This is safe to do here since the token can't be
-  // considered a live reference.
-  CS.getInstruction()->replaceAllUsesWith(token);
+    // Take the name of the original statepoint token if there was one.
+    Token->takeName(CS.getInstruction());
 
-  result.StatepointToken = token;
+    // Update the gc.result of the original statepoint (if any) to use the newly
+    // inserted statepoint.  This is safe to do here since the token can't be
+    // considered a live reference.
+    CS.getInstruction()->replaceAllUsesWith(Token);
+    CS.getInstruction()->eraseFromParent();
+  }
+
+  Result.StatepointToken = Token;
 
   // Second, create a gc.relocate for every live variable
-  CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder);
+  const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
+  CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
 }
 
 namespace {
-struct name_ordering {
-  Value *base;
-  Value *derived;
-  bool operator()(name_ordering const &a, name_ordering const &b) {
-    return -1 == a.derived->getName().compare(b.derived->getName());
+struct NameOrdering {
+  Value *Base;
+  Value *Derived;
+
+  bool operator()(NameOrdering const &a, NameOrdering const &b) {
+    return -1 == a.Derived->getName().compare(b.Derived->getName());
   }
 };
 }
-static void stablize_order(SmallVectorImpl<Value *> &basevec,
-                           SmallVectorImpl<Value *> &livevec) {
-  assert(basevec.size() == livevec.size());
 
-  SmallVector<name_ordering, 64> temp;
-  for (size_t i = 0; i < basevec.size(); i++) {
-    name_ordering v;
-    v.base = basevec[i];
-    v.derived = livevec[i];
-    temp.push_back(v);
+static void StabilizeOrder(SmallVectorImpl<Value *> &BaseVec,
+                           SmallVectorImpl<Value *> &LiveVec) {
+  assert(BaseVec.size() == LiveVec.size());
+
+  SmallVector<NameOrdering, 64> Temp;
+  for (size_t i = 0; i < BaseVec.size(); i++) {
+    NameOrdering v;
+    v.Base = BaseVec[i];
+    v.Derived = LiveVec[i];
+    Temp.push_back(v);
   }
-  std::sort(temp.begin(), temp.end(), name_ordering());
-  for (size_t i = 0; i < basevec.size(); i++) {
-    basevec[i] = temp[i].base;
-    livevec[i] = temp[i].derived;
+
+  std::sort(Temp.begin(), Temp.end(), NameOrdering());
+  for (size_t i = 0; i < BaseVec.size(); i++) {
+    BaseVec[i] = Temp[i].Base;
+    LiveVec[i] = Temp[i].Derived;
   }
 }
 
@@ -1401,40 +1603,39 @@ static void stablize_order(SmallVectorImpl<Value *> &basevec,
 // WARNING: Does not do any fixup to adjust users of the original live
 // values.  That's the callers responsibility.
 static void
-makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P,
-                       PartiallyConstructedSafepointRecord &result) {
-  auto liveset = result.liveset;
-  auto PointerToBase = result.PointerToBase;
+makeStatepointExplicit(DominatorTree &DT, const CallSite &CS,
+                       PartiallyConstructedSafepointRecord &Result,
+                       std::vector<DeferredReplacement> &Replacements) {
+  const auto &LiveSet = Result.LiveSet;
+  const auto &PointerToBase = Result.PointerToBase;
 
   // Convert to vector for efficient cross referencing.
-  SmallVector<Value *, 64> basevec, livevec;
-  livevec.reserve(liveset.size());
-  basevec.reserve(liveset.size());
-  for (Value *L : liveset) {
-    livevec.push_back(L);
-
-    assert(PointerToBase.find(L) != PointerToBase.end());
-    Value *base = PointerToBase[L];
-    basevec.push_back(base);
+  SmallVector<Value *, 64> BaseVec, LiveVec;
+  LiveVec.reserve(LiveSet.size());
+  BaseVec.reserve(LiveSet.size());
+  for (Value *L : LiveSet) {
+    LiveVec.push_back(L);
+    assert(PointerToBase.count(L));
+    Value *Base = PointerToBase.find(L)->second;
+    BaseVec.push_back(Base);
   }
-  assert(livevec.size() == basevec.size());
+  assert(LiveVec.size() == BaseVec.size());
 
   // To make the output IR slightly more stable (for use in diffs), ensure a
   // fixed order of the values in the safepoint (by sorting the value name).
   // The order is otherwise meaningless.
-  stablize_order(basevec, livevec);
+  StabilizeOrder(BaseVec, LiveVec);
 
   // Do the actual rewriting and delete the old statepoint
-  makeStatepointExplicitImpl(CS, basevec, livevec, P, result);
-  CS.getInstruction()->eraseFromParent();
+  makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
 }
 
 // Helper function for the relocationViaAlloca.
-// It receives iterator to the statepoint gc relocates and emits store to the
-// assigned
-// location (via allocaMap) for the each one of them.
-// Add visited values into the visitedLiveValues set we will later use them
-// for sanity check.
+//
+// It receives iterator to the statepoint gc relocates and emits a store to the
+// assigned location (via allocaMap) for the each one of them.  It adds the
+// visited values into the visitedLiveValues set, which we will later use them
+// for sanity checking.
 static void
 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
                        DenseMap<Value *, Value *> &AllocaMap,
@@ -1459,13 +1660,15 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
     Value *Alloca = AllocaMap[OriginalValue];
 
     // Emit store into the related alloca
-    // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to
+    // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
     // the correct type according to alloca.
-    assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator");
+    assert(RelocatedValue->getNextNode() &&
+           "Should always have one since it's not a terminator");
     IRBuilder<> Builder(RelocatedValue->getNextNode());
     Value *CastedRelocatedValue =
-        Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(),
-        RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : "");
+      Builder.CreateBitCast(RelocatedValue,
+                            cast<AllocaInst>(Alloca)->getAllocatedType(),
+                            suffixed_name_or(RelocatedValue, ".casted", ""));
 
     StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
     Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
@@ -1501,10 +1704,10 @@ insertRematerializationStores(
   }
 }
 
-/// do all the relocation update via allocas and mem2reg
+/// Do all the relocation update via allocas and mem2reg
 static void relocationViaAlloca(
     Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
-    ArrayRef<struct PartiallyConstructedSafepointRecord> Records) {
+    ArrayRef<PartiallyConstructedSafepointRecord> Records) {
 #ifndef NDEBUG
   // record initial number of (static) allocas; we'll check we have the same
   // number when we get done.
@@ -1531,15 +1734,12 @@ static void relocationViaAlloca(
     PromotableAllocas.push_back(Alloca);
   };
 
-  // emit alloca for each live gc pointer
-  for (unsigned i = 0; i < Live.size(); i++) {
-    emitAllocaFor(Live[i]);
-  }
-
-  // emit allocas for rematerialized values
-  for (size_t i = 0; i < Records.size(); i++) {
-    const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+  // Emit alloca for each live gc pointer
+  for (Value *V : Live)
+    emitAllocaFor(V);
 
+  // Emit allocas for rematerialized values
+  for (const auto &Info : Records)
     for (auto RematerializedValuePair : Info.RematerializedValues) {
       Value *OriginalValue = RematerializedValuePair.second;
       if (AllocaMap.count(OriginalValue) != 0)
@@ -1548,20 +1748,17 @@ static void relocationViaAlloca(
       emitAllocaFor(OriginalValue);
       ++NumRematerializedValues;
     }
-  }
 
   // The next two loops are part of the same conceptual operation.  We need to
   // insert a store to the alloca after the original def and at each
   // redefinition.  We need to insert a load before each use.  These are split
   // into distinct loops for performance reasons.
 
-  // update gc pointer after each statepoint
-  // either store a relocated value or null (if no relocated value found for
-  // this gc pointer and it is not a gc_result)
-  // this must happen before we update the statepoint with load of alloca
-  // otherwise we lose the link between statepoint and old def
-  for (size_t i = 0; i < Records.size(); i++) {
-    const struct PartiallyConstructedSafepointRecord &Info = Records[i];
+  // Update gc pointer after each statepoint: either store a relocated value or
+  // null (if no relocated value was found for this gc pointer and it is not a
+  // gc_result).  This must happen before we update the statepoint with load of
+  // alloca otherwise we lose the link between statepoint and old def.
+  for (const auto &Info : Records) {
     Value *Statepoint = Info.StatepointToken;
 
     // This will be used for consistency check
@@ -1582,7 +1779,7 @@ static void relocationViaAlloca(
                                   VisitedLiveValues);
 
     if (ClobberNonLive) {
-      // As a debuging aid, pretend that an unrelocated pointer becomes null at
+      // As a debugging aid, pretend that an unrelocated pointer becomes null at
       // the gc.statepoint.  This will turn some subtle GC problems into
       // slightly easier to debug SEGVs.  Note that on large IR files with
       // lots of gc.statepoints this is extremely costly both memory and time
@@ -1612,23 +1809,22 @@ static void relocationViaAlloca(
       // Insert the clobbering stores.  These may get intermixed with the
       // gc.results and gc.relocates, but that's fine.
       if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
-        InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt());
-        InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt());
+        InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
+        InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
       } else {
-        BasicBlock::iterator Next(cast<CallInst>(Statepoint));
-        Next++;
-        InsertClobbersAt(Next);
+        InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
       }
     }
   }
-  // update use with load allocas and add store for gc_relocated
+
+  // Update use with load allocas and add store for gc_relocated.
   for (auto Pair : AllocaMap) {
     Value *Def = Pair.first;
     Value *Alloca = Pair.second;
 
-    // we pre-record the uses of allocas so that we dont have to worry about
-    // later update
-    // that change the user information.
+    // We pre-record the uses of allocas so that we dont have to worry about
+    // later update that changes the user information..
+
     SmallVector<Instruction *, 20> Uses;
     // PERF: trade a linear scan for repeated reallocation
     Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
@@ -1663,9 +1859,9 @@ static void relocationViaAlloca(
       }
     }
 
-    // emit store for the initial gc value
-    // store must be inserted after load, otherwise store will be in alloca's
-    // use list and an extra load will be inserted before it
+    // Emit store for the initial gc value.  Store must be inserted after load,
+    // otherwise store will be in alloca's use list and an extra load will be
+    // inserted before it.
     StoreInst *Store = new StoreInst(Def, Alloca);
     if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
       if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
@@ -1688,14 +1884,13 @@ static void relocationViaAlloca(
   assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
          "we must have the same allocas with lives");
   if (!PromotableAllocas.empty()) {
-    // apply mem2reg to promote alloca to SSA
+    // Apply mem2reg to promote alloca to SSA
     PromoteMemToReg(PromotableAllocas, DT);
   }
 
 #ifndef NDEBUG
-  for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E;
-       I++)
-    if (isa<AllocaInst>(*I))
+  for (auto &I : F.getEntryBlock())
+    if (isa<AllocaInst>(I))
       InitialAllocaNum--;
   assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
 #endif
@@ -1719,28 +1914,27 @@ static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
     // No values to hold live, might as well not insert the empty holder
     return;
 
-  Module *M = CS.getInstruction()->getParent()->getParent()->getParent();
+  Module *M = CS.getInstruction()->getModule();
   // Use a dummy vararg function to actually hold the values live
   Function *Func = cast<Function>(M->getOrInsertFunction(
       "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
   if (CS.isCall()) {
     // For call safepoints insert dummy calls right after safepoint
-    BasicBlock::iterator Next(CS.getInstruction());
-    Next++;
-    Holders.push_back(CallInst::Create(Func, Values, "", Next));
+    Holders.push_back(CallInst::Create(Func, Values, "",
+                                       &*++CS.getInstruction()->getIterator()));
     return;
   }
   // For invoke safepooints insert dummy calls both in normal and
   // exceptional destination blocks
   auto *II = cast<InvokeInst>(CS.getInstruction());
   Holders.push_back(CallInst::Create(
-      Func, Values, "", II->getNormalDest()->getFirstInsertionPt()));
+      Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
   Holders.push_back(CallInst::Create(
-      Func, Values, "", II->getUnwindDest()->getFirstInsertionPt()));
+      Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
 }
 
 static void findLiveReferences(
-    Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate,
+    Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
   GCPtrLivenessData OriginalLivenessData;
   computeLiveInValues(DT, F, OriginalLivenessData);
@@ -1751,12 +1945,12 @@ static void findLiveReferences(
   }
 }
 
-/// Remove any vector of pointers from the liveset by scalarizing them over the
-/// statepoint instruction.  Adds the scalarized pieces to the liveset.  It
-/// would be preferrable to include the vector in the statepoint itself, but
+/// Remove any vector of pointers from the live set by scalarizing them over the
+/// statepoint instruction.  Adds the scalarized pieces to the live set.  It
+/// would be preferable to include the vector in the statepoint itself, but
 /// the lowering code currently does not handle that.  Extending it would be
 /// slightly non-trivial since it requires a format change.  Given how rare
-/// such cases are (for the moment?) scalarizing is an acceptable comprimise.
+/// such cases are (for the moment?) scalarizing is an acceptable compromise.
 static void splitVectorValues(Instruction *StatepointInst,
                               StatepointLiveSetTy &LiveSet,
                               DenseMap<Value *, Value *>& PointerToBase,
@@ -1887,7 +2081,7 @@ static void splitVectorValues(Instruction *StatepointInst,
 // Helper function for the "rematerializeLiveValues". It walks use chain
 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
 // values are visited (currently it is GEP's and casts). Returns true if it
-// sucessfully reached "BaseValue" and false otherwise.
+// successfully reached "BaseValue" and false otherwise.
 // Fills "ChainToBase" array with all visited values. "BaseValue" is not
 // recorded.
 static bool findRematerializableChainToBasePointer(
@@ -1907,16 +2101,12 @@ static bool findRematerializableChainToBasePointer(
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
-    Value *Def = CI->stripPointerCasts();
-
-    // This two checks are basically similar. First one is here for the
-    // consistency with findBasePointers logic.
-    assert(!isa<CastInst>(Def) && "not a pointer cast found");
     if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
       return false;
 
     ChainToBase.push_back(CI);
-    return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue);
+    return findRematerializableChainToBasePointer(ChainToBase,
+                                                  CI->getOperand(0), BaseValue);
   }
 
   // Not supported instruction in the chain
@@ -1957,8 +2147,8 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
   return Cost;
 }
 
-// From the statepoint liveset pick values that are cheaper to recompute then to
-// relocate. Remove this values from the liveset, rematerialize them after
+// From the statepoint live set pick values that are cheaper to recompute then
+// to relocate. Remove this values from the live set, rematerialize them after
 // statepoint and record them in "Info" structure. Note that similar to
 // relocated values we don't do any user adjustments here.
 static void rematerializeLiveValues(CallSite CS,
@@ -1970,10 +2160,10 @@ static void rematerializeLiveValues(CallSite CS,
   // We can not di this in following loop due to iterator invalidation.
   SmallVector<Value *, 32> LiveValuesToBeDeleted;
 
-  for (Value *LiveValue: Info.liveset) {
+  for (Value *LiveValue: Info.LiveSet) {
     // For each live pointer find it's defining chain
     SmallVector<Instruction *, 3> ChainToBase;
-    assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end());
+    assert(Info.PointerToBase.count(LiveValue));
     bool FoundChain =
       findRematerializableChainToBasePointer(ChainToBase,
                                              LiveValue,
@@ -2059,9 +2249,9 @@ static void rematerializeLiveValues(CallSite CS,
       InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
 
       Instruction *NormalInsertBefore =
-          Invoke->getNormalDest()->getFirstInsertionPt();
+          &*Invoke->getNormalDest()->getFirstInsertionPt();
       Instruction *UnwindInsertBefore =
-          Invoke->getUnwindDest()->getFirstInsertionPt();
+          &*Invoke->getUnwindDest()->getFirstInsertionPt();
 
       Instruction *NormalRematerializedValue =
           rematerializeChain(NormalInsertBefore);
@@ -2075,22 +2265,23 @@ static void rematerializeLiveValues(CallSite CS,
 
   // Remove rematerializaed values from the live set
   for (auto LiveValue: LiveValuesToBeDeleted) {
-    Info.liveset.erase(LiveValue);
+    Info.LiveSet.erase(LiveValue);
   }
 }
 
-static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
-                              SmallVectorImpl<CallSite> &toUpdate) {
+static bool insertParsePoints(Function &F, DominatorTree &DT,
+                              TargetTransformInfo &TTI,
+                              SmallVectorImpl<CallSite> &ToUpdate) {
 #ifndef NDEBUG
   // sanity check the input
-  std::set<CallSite> uniqued;
-  uniqued.insert(toUpdate.begin(), toUpdate.end());
-  assert(uniqued.size() == toUpdate.size() && "no duplicates please!");
+  std::set<CallSite> Uniqued;
+  Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
+  assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
 
-  for (size_t i = 0; i < toUpdate.size(); i++) {
-    CallSite &CS = toUpdate[i];
+  for (CallSite CS : ToUpdate) {
     assert(CS.getInstruction()->getParent()->getParent() == &F);
-    assert(isStatepoint(CS) && "expected to already be a deopt statepoint");
+    assert((UseDeoptBundles || isStatepoint(CS)) &&
+           "expected to already be a deopt statepoint");
   }
 #endif
 
@@ -2098,50 +2289,45 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // the top of the successor blocks.  See the comment on
   // normalForInvokeSafepoint on exactly what is needed.  Note that this step
   // may restructure the CFG.
-  for (CallSite CS : toUpdate) {
+  for (CallSite CS : ToUpdate) {
     if (!CS.isInvoke())
       continue;
-    InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction());
-    normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(),
-                                DT);
-    normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(),
-                                DT);
+    auto *II = cast<InvokeInst>(CS.getInstruction());
+    normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
+    normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
   }
 
   // A list of dummy calls added to the IR to keep various values obviously
   // live in the IR.  We'll remove all of these when done.
-  SmallVector<CallInst *, 64> holders;
+  SmallVector<CallInst *, 64> Holders;
 
   // Insert a dummy call with all of the arguments to the vm_state we'll need
   // for the actual safepoint insertion.  This ensures reference arguments in
   // the deopt argument list are considered live through the safepoint (and
   // thus makes sure they get relocated.)
-  for (size_t i = 0; i < toUpdate.size(); i++) {
-    CallSite &CS = toUpdate[i];
-    Statepoint StatepointCS(CS);
-
+  for (CallSite CS : ToUpdate) {
     SmallVector<Value *, 64> DeoptValues;
-    for (Use &U : StatepointCS.vm_state_args()) {
-      Value *Arg = cast<Value>(&U);
+
+    iterator_range<const Use *> DeoptStateRange =
+        UseDeoptBundles
+            ? iterator_range<const Use *>(GetDeoptBundleOperands(CS))
+            : iterator_range<const Use *>(Statepoint(CS).vm_state_args());
+
+    for (Value *Arg : DeoptStateRange) {
       assert(!isUnhandledGCPointerType(Arg->getType()) &&
              "support for FCA unimplemented");
       if (isHandledGCPointerType(Arg->getType()))
         DeoptValues.push_back(Arg);
     }
-    insertUseHolderAfter(CS, DeoptValues, holders);
+
+    insertUseHolderAfter(CS, DeoptValues, Holders);
   }
 
-  SmallVector<struct PartiallyConstructedSafepointRecord, 64> records;
-  records.reserve(toUpdate.size());
-  for (size_t i = 0; i < toUpdate.size(); i++) {
-    struct PartiallyConstructedSafepointRecord info;
-    records.push_back(info);
-  }
-  assert(records.size() == toUpdate.size());
+  SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
 
-  // A) Identify all gc pointers which are staticly live at the given call
+  // A) Identify all gc pointers which are statically live at the given call
   // site.
-  findLiveReferences(F, DT, P, toUpdate, records);
+  findLiveReferences(F, DT, ToUpdate, Records);
 
   // B) Find the base pointers for each live pointer
   /* scope for caching */ {
@@ -2150,10 +2336,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
     // large numbers of duplicate base_phis.
     DefiningValueMapTy DVCache;
 
-    for (size_t i = 0; i < records.size(); i++) {
-      struct PartiallyConstructedSafepointRecord &info = records[i];
-      CallSite &CS = toUpdate[i];
-      findBasePointers(DT, DVCache, CS, info);
+    for (size_t i = 0; i < Records.size(); i++) {
+      PartiallyConstructedSafepointRecord &info = Records[i];
+      findBasePointers(DT, DVCache, ToUpdate[i], info);
     }
   } // end of cache scope
 
@@ -2170,63 +2355,75 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // the base pointers which were identified for that safepoint.  We'll then
   // ask liveness for _every_ base inserted to see what is now live.  Then we
   // remove the dummy calls.
-  holders.reserve(holders.size() + records.size());
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    CallSite &CS = toUpdate[i];
+  Holders.reserve(Holders.size() + Records.size());
+  for (size_t i = 0; i < Records.size(); i++) {
+    PartiallyConstructedSafepointRecord &Info = Records[i];
 
     SmallVector<Value *, 128> Bases;
-    for (auto Pair : info.PointerToBase) {
+    for (auto Pair : Info.PointerToBase)
       Bases.push_back(Pair.second);
-    }
-    insertUseHolderAfter(CS, Bases, holders);
+
+    insertUseHolderAfter(ToUpdate[i], Bases, Holders);
   }
 
   // By selecting base pointers, we've effectively inserted new uses. Thus, we
   // need to rerun liveness.  We may *also* have inserted new defs, but that's
   // not the key issue.
-  recomputeLiveInValues(F, DT, P, toUpdate, records);
+  recomputeLiveInValues(F, DT, ToUpdate, Records);
 
   if (PrintBasePointers) {
-    for (size_t i = 0; i < records.size(); i++) {
-      struct PartiallyConstructedSafepointRecord &info = records[i];
+    for (auto &Info : Records) {
       errs() << "Base Pairs: (w/Relocation)\n";
-      for (auto Pair : info.PointerToBase) {
-        errs() << " derived %" << Pair.first->getName() << " base %"
-               << Pair.second->getName() << "\n";
+      for (auto Pair : Info.PointerToBase) {
+        errs() << " derived ";
+        Pair.first->printAsOperand(errs(), false);
+        errs() << " base ";
+        Pair.second->printAsOperand(errs(), false);
+        errs() << "\n";
       }
     }
   }
-  for (size_t i = 0; i < holders.size(); i++) {
-    holders[i]->eraseFromParent();
-    holders[i] = nullptr;
-  }
-  holders.clear();
+
+  // It is possible that non-constant live variables have a constant base.  For
+  // example, a GEP with a variable offset from a global.  In this case we can
+  // remove it from the liveset.  We already don't add constants to the liveset
+  // because we assume they won't move at runtime and the GC doesn't need to be
+  // informed about them.  The same reasoning applies if the base is constant.
+  // Note that the relocation placement code relies on this filtering for
+  // correctness as it expects the base to be in the liveset, which isn't true
+  // if the base is constant.
+  for (auto &Info : Records)
+    for (auto &BasePair : Info.PointerToBase)
+      if (isa<Constant>(BasePair.second))
+        Info.LiveSet.erase(BasePair.first);
+
+  for (CallInst *CI : Holders)
+    CI->eraseFromParent();
+
+  Holders.clear();
 
   // Do a limited scalarization of any live at safepoint vector values which
   // contain pointers.  This enables this pass to run after vectorization at
   // the cost of some possible performance loss.  TODO: it would be nice to
   // natively support vectors all the way through the backend so we don't need
   // to scalarize here.
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    Instruction *statepoint = toUpdate[i].getInstruction();
-    splitVectorValues(cast<Instruction>(statepoint), info.liveset,
-                      info.PointerToBase, DT);
+  for (size_t i = 0; i < Records.size(); i++) {
+    PartiallyConstructedSafepointRecord &Info = Records[i];
+    Instruction *Statepoint = ToUpdate[i].getInstruction();
+    splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+                      Info.PointerToBase, DT);
   }
 
   // In order to reduce live set of statepoint we might choose to rematerialize
-  // some values instead of relocating them. This is purelly an optimization and
+  // some values instead of relocating them. This is purely an optimization and
   // does not influence correctness.
-  TargetTransformInfo &TTI =
-    P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  for (size_t i = 0; i < Records.size(); i++)
+    rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
 
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    CallSite &CS = toUpdate[i];
-
-    rematerializeLiveValues(CS, info, TTI);
-  }
+  // We need this to safely RAUW and delete call or invoke return values that
+  // may themselves be live over a statepoint.  For details, please see usage in
+  // makeStatepointExplicitImpl.
+  std::vector<DeferredReplacement> Replacements;
 
   // Now run through and replace the existing statepoints with new ones with
   // the live variables listed.  We do not yet update uses of the values being
@@ -2234,61 +2431,77 @@ static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P,
   // survive to the last iteration of this loop.  (By construction, the
   // previous statepoint can not be a live variable, thus we can and remove
   // the old statepoint calls as we go.)
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
-    CallSite &CS = toUpdate[i];
-    makeStatepointExplicit(DT, CS, P, info);
+  for (size_t i = 0; i < Records.size(); i++)
+    makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
+
+  ToUpdate.clear(); // prevent accident use of invalid CallSites
+
+  for (auto &PR : Replacements)
+    PR.doReplacement();
+
+  Replacements.clear();
+
+  for (auto &Info : Records) {
+    // These live sets may contain state Value pointers, since we replaced calls
+    // with operand bundles with calls wrapped in gc.statepoint, and some of
+    // those calls may have been def'ing live gc pointers.  Clear these out to
+    // avoid accidentally using them.
+    //
+    // TODO: We should create a separate data structure that does not contain
+    // these live sets, and migrate to using that data structure from this point
+    // onward.
+    Info.LiveSet.clear();
+    Info.PointerToBase.clear();
   }
-  toUpdate.clear(); // prevent accident use of invalid CallSites
 
   // Do all the fixups of the original live variables to their relocated selves
-  SmallVector<Value *, 128> live;
-  for (size_t i = 0; i < records.size(); i++) {
-    struct PartiallyConstructedSafepointRecord &info = records[i];
+  SmallVector<Value *, 128> Live;
+  for (size_t i = 0; i < Records.size(); i++) {
+    PartiallyConstructedSafepointRecord &Info = Records[i];
+
     // We can't simply save the live set from the original insertion.  One of
     // the live values might be the result of a call which needs a safepoint.
     // That Value* no longer exists and we need to use the new gc_result.
-    // Thankfully, the liveset is embedded in the statepoint (and updated), so
+    // Thankfully, the live set is embedded in the statepoint (and updated), so
     // we just grab that.
-    Statepoint statepoint(info.StatepointToken);
-    live.insert(live.end(), statepoint.gc_args_begin(),
-                statepoint.gc_args_end());
+    Statepoint Statepoint(Info.StatepointToken);
+    Live.insert(Live.end(), Statepoint.gc_args_begin(),
+                Statepoint.gc_args_end());
 #ifndef NDEBUG
     // Do some basic sanity checks on our liveness results before performing
     // relocation.  Relocation can and will turn mistakes in liveness results
     // into non-sensical code which is must harder to debug.
     // TODO: It would be nice to test consistency as well
-    assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) &&
+    assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
            "statepoint must be reachable or liveness is meaningless");
-    for (Value *V : statepoint.gc_args()) {
+    for (Value *V : Statepoint.gc_args()) {
       if (!isa<Instruction>(V))
         // Non-instruction values trivial dominate all possible uses
         continue;
-      auto LiveInst = cast<Instruction>(V);
+      auto *LiveInst = cast<Instruction>(V);
       assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
              "unreachable values should never be live");
-      assert(DT.dominates(LiveInst, info.StatepointToken) &&
+      assert(DT.dominates(LiveInst, Info.StatepointToken) &&
              "basic SSA liveness expectation violated by liveness analysis");
     }
 #endif
   }
-  unique_unsorted(live);
+  unique_unsorted(Live);
 
 #ifndef NDEBUG
   // sanity check
-  for (auto ptr : live) {
-    assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type");
-  }
+  for (auto *Ptr : Live)
+    assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
 #endif
 
-  relocationViaAlloca(F, DT, live, records);
-  return !records.empty();
+  relocationViaAlloca(F, DT, Live, Records);
+  return !Records.empty();
 }
 
 // Handles both return values and arguments for Functions and CallSites.
 template <typename AttrHolder>
-static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
-                                   unsigned Index) {
+static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
+                                      unsigned Index) {
   AttrBuilder R;
   if (AH.getDereferenceableBytes(Index))
     R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
@@ -2296,6 +2509,8 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
   if (AH.getDereferenceableOrNullBytes(Index))
     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
                                   AH.getDereferenceableOrNullBytes(Index)));
+  if (AH.doesNotAlias(Index))
+    R.addAttribute(Attribute::NoAlias);
 
   if (!R.empty())
     AH.setAttributes(AH.getAttributes().removeAttributes(
@@ -2303,25 +2518,25 @@ static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
 }
 
 void
-RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) {
+RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
   LLVMContext &Ctx = F.getContext();
 
   for (Argument &A : F.args())
     if (isa<PointerType>(A.getType()))
-      RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1);
+      RemoveNonValidAttrAtIndex(Ctx, F, A.getArgNo() + 1);
 
   if (isa<PointerType>(F.getReturnType()))
-    RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
+    RemoveNonValidAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex);
 }
 
-void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
+void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
   if (F.empty())
     return;
 
   LLVMContext &Ctx = F.getContext();
   MDBuilder Builder(Ctx);
 
-  for (Instruction &I : inst_range(F)) {
+  for (Instruction &I : instructions(F)) {
     if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
       assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
       bool IsImmutableTBAA =
@@ -2344,9 +2559,9 @@ void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) {
     if (CallSite CS = CallSite(&I)) {
       for (int i = 0, e = CS.arg_size(); i != e; i++)
         if (isa<PointerType>(CS.getArgument(i)->getType()))
-          RemoveDerefAttrAtIndex(Ctx, CS, i + 1);
+          RemoveNonValidAttrAtIndex(Ctx, CS, i + 1);
       if (isa<PointerType>(CS.getType()))
-        RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
+        RemoveNonValidAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex);
     }
   }
 }
@@ -2365,17 +2580,17 @@ static bool shouldRewriteStatepointsIn(Function &F) {
     return false;
 }
 
-void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) {
+void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
 #ifndef NDEBUG
   assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
          "precondition!");
 #endif
 
   for (Function &F : M)
-    stripDereferenceabilityInfoFromPrototype(F);
+    stripNonValidAttributesFromPrototype(F);
 
   for (Function &F : M)
-    stripDereferenceabilityInfoFromBody(F);
+    stripNonValidAttributesFromBody(F);
 }
 
 bool RewriteStatepointsForGC::runOnFunction(Function &F) {
@@ -2389,15 +2604,27 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
     return false;
 
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+  TargetTransformInfo &TTI =
+      getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  auto NeedsRewrite = [](Instruction &I) {
+    if (UseDeoptBundles) {
+      if (ImmutableCallSite CS = ImmutableCallSite(&I))
+        return !callsGCLeafFunction(CS);
+      return false;
+    }
+
+    return isStatepoint(I);
+  };
 
   // Gather all the statepoints which need rewritten.  Be careful to only
   // consider those in reachable code since we need to ask dominance queries
   // when rewriting.  We'll delete the unreachable ones in a moment.
   SmallVector<CallSite, 64> ParsePointNeeded;
   bool HasUnreachableStatepoint = false;
-  for (Instruction &I : inst_range(F)) {
+  for (Instruction &I : instructions(F)) {
     // TODO: only the ones with the flag set!
-    if (isStatepoint(I)) {
+    if (NeedsRewrite(I)) {
       if (DT.isReachableFromEntry(I.getParent()))
         ParsePointNeeded.push_back(CallSite(&I));
       else
@@ -2428,7 +2655,38 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F) {
       FoldSingleEntryPHINodes(&BB);
     }
 
-  MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded);
+  // Before we start introducing relocations, we want to tweak the IR a bit to
+  // avoid unfortunate code generation effects.  The main example is that we 
+  // want to try to make sure the comparison feeding a branch is after any
+  // safepoints.  Otherwise, we end up with a comparison of pre-relocation
+  // values feeding a branch after relocation.  This is semantically correct,
+  // but results in extra register pressure since both the pre-relocation and
+  // post-relocation copies must be available in registers.  For code without
+  // relocations this is handled elsewhere, but teaching the scheduler to
+  // reverse the transform we're about to do would be slightly complex.
+  // Note: This may extend the live range of the inputs to the icmp and thus
+  // increase the liveset of any statepoint we move over.  This is profitable
+  // as long as all statepoints are in rare blocks.  If we had in-register
+  // lowering for live values this would be a much safer transform.
+  auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
+    if (auto *BI = dyn_cast<BranchInst>(TI))
+      if (BI->isConditional())
+        return dyn_cast<Instruction>(BI->getCondition());
+    // TODO: Extend this to handle switches
+    return nullptr;
+  };
+  for (BasicBlock &BB : F) {
+    TerminatorInst *TI = BB.getTerminator();
+    if (auto *Cond = getConditionInst(TI))
+      // TODO: Handle more than just ICmps here.  We should be able to move
+      // most instructions without side effects or memory access.  
+      if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
+        MadeChange = true;
+        Cond->moveBefore(TI);
+      }
+  }
+
+  MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
   return MadeChange;
 }
 
@@ -2461,7 +2719,7 @@ static void computeLiveInValues(BasicBlock::reverse_iterator rbegin,
              "support for FCA unimplemented");
       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
         // The choice to exclude all things constant here is slightly subtle.
-        // There are two idependent reasons:
+        // There are two independent reasons:
         // - We assume that things which are constant (from LLVM's definition)
         // do not move at runtime.  For example, the address of a global
         // variable is fixed, even though it's contents may not be.
@@ -2599,7 +2857,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
   } // while( !worklist.empty() )
 
 #ifndef NDEBUG
-  // Sanity check our ouput against SSA properties.  This helps catch any
+  // Sanity check our output against SSA properties.  This helps catch any
   // missing kills during the above iteration.
   for (BasicBlock &BB : F) {
     checkBasicSSA(DT, Data, BB);
@@ -2620,7 +2878,7 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
   // call result is not live (normal), nor are it's arguments
   // (unless they're used again later).  This adjustment is
   // specifically what we need to relocate
-  BasicBlock::reverse_iterator rend(Inst);
+  BasicBlock::reverse_iterator rend(Inst->getIterator());
   computeLiveInValues(BB->rbegin(), rend, LiveOut);
   LiveOut.erase(Inst);
   Out.insert(LiveOut.begin(), LiveOut.end());
@@ -2669,5 +2927,5 @@ static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
     assert(Updated.count(KVPair.first) && "record for non-live value");
 #endif
 
-  Info.liveset = Updated;
+  Info.LiveSet = Updated;
 }
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 4d3a708fa20e..2fca803adde8 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
@@ -479,6 +480,13 @@ private:
   void visitExtractValueInst(ExtractValueInst &EVI);
   void visitInsertValueInst(InsertValueInst &IVI);
   void visitLandingPadInst(LandingPadInst &I) { markAnythingOverdefined(&I); }
+  void visitFuncletPadInst(FuncletPadInst &FPI) {
+    markAnythingOverdefined(&FPI);
+  }
+  void visitCatchSwitchInst(CatchSwitchInst &CPI) {
+    markAnythingOverdefined(&CPI);
+    visitTerminatorInst(CPI);
+  }
 
   // Instructions that cannot be folded away.
   void visitStoreInst     (StoreInst &I);
@@ -539,9 +547,9 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
     return;
   }
 
-  if (isa<InvokeInst>(TI)) {
-    // Invoke instructions successors are always executable.
-    Succs[0] = Succs[1] = true;
+  // Unwinding instructions successors are always executable.
+  if (TI.isExceptional()) {
+    Succs.assign(TI.getNumSuccessors(), true);
     return;
   }
 
@@ -605,8 +613,8 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
     return BI->getSuccessor(CI->isZero()) == To;
   }
 
-  // Invoke instructions successors are always executable.
-  if (isa<InvokeInst>(TI))
+  // Unwinding instructions successors are always executable.
+  if (TI->isExceptional())
     return true;
 
   if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
@@ -630,7 +638,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 #ifndef NDEBUG
   dbgs() << "Unknown terminator instruction: " << *TI << '\n';
 #endif
-  llvm_unreachable(nullptr);
+  llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
 
 // visit Implementations - Something changed in this instruction, either an
@@ -1126,7 +1134,7 @@ CallOverdefined:
   // entry block executable and merge in the actual arguments to the call into
   // the formal arguments of the function.
   if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
-    MarkBlockExecutable(F->begin());
+    MarkBlockExecutable(&F->front());
 
     // Propagate information from this call site into the callee.
     CallSite::arg_iterator CAI = CS.arg_begin();
@@ -1135,17 +1143,17 @@ CallOverdefined:
       // If this argument is byval, and if the function is not readonly, there
       // will be an implicit copy formed of the input aggregate.
       if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
-        markOverdefined(AI);
+        markOverdefined(&*AI);
         continue;
       }
 
       if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           LatticeVal CallArg = getStructValueState(*CAI, i);
-          mergeInValue(getStructValueState(AI, i), AI, CallArg);
+          mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg);
         }
       } else {
-        mergeInValue(AI, getValueState(*CAI));
+        mergeInValue(&*AI, getValueState(*CAI));
       }
     }
   }
@@ -1246,18 +1254,18 @@ void SCCPSolver::Solve() {
 /// even if X isn't defined.
 bool SCCPSolver::ResolvedUndefsIn(Function &F) {
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (!BBExecutable.count(BB))
+    if (!BBExecutable.count(&*BB))
       continue;
 
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (Instruction &I : *BB) {
       // Look for instructions which produce undef values.
-      if (I->getType()->isVoidTy()) continue;
+      if (I.getType()->isVoidTy()) continue;
 
-      if (StructType *STy = dyn_cast<StructType>(I->getType())) {
+      if (StructType *STy = dyn_cast<StructType>(I.getType())) {
         // Only a few things that can be structs matter for undef.
 
         // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
-        if (CallSite CS = CallSite(I))
+        if (CallSite CS = CallSite(&I))
           if (Function *F = CS.getCalledFunction())
             if (MRVFunctionsTracked.count(F))
               continue;
@@ -1270,14 +1278,14 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // Send the results of everything else to overdefined.  We could be
         // more precise than this but it isn't worth bothering.
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          LatticeVal &LV = getStructValueState(I, i);
+          LatticeVal &LV = getStructValueState(&I, i);
           if (LV.isUndefined())
-            markOverdefined(LV, I);
+            markOverdefined(LV, &I);
         }
         continue;
       }
 
-      LatticeVal &LV = getValueState(I);
+      LatticeVal &LV = getValueState(&I);
       if (!LV.isUndefined()) continue;
 
       // extractvalue is safe; check here because the argument is a struct.
@@ -1287,24 +1295,24 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // Compute the operand LatticeVals, for convenience below.
       // Anything taking a struct is conservatively assumed to require
       // overdefined markings.
-      if (I->getOperand(0)->getType()->isStructTy()) {
-        markOverdefined(I);
+      if (I.getOperand(0)->getType()->isStructTy()) {
+        markOverdefined(&I);
         return true;
       }
-      LatticeVal Op0LV = getValueState(I->getOperand(0));
+      LatticeVal Op0LV = getValueState(I.getOperand(0));
       LatticeVal Op1LV;
-      if (I->getNumOperands() == 2) {
-        if (I->getOperand(1)->getType()->isStructTy()) {
-          markOverdefined(I);
+      if (I.getNumOperands() == 2) {
+        if (I.getOperand(1)->getType()->isStructTy()) {
+          markOverdefined(&I);
           return true;
         }
 
-        Op1LV = getValueState(I->getOperand(1));
+        Op1LV = getValueState(I.getOperand(1));
       }
       // If this is an instructions whose result is defined even if the input is
       // not fully defined, propagate the information.
-      Type *ITy = I->getType();
-      switch (I->getOpcode()) {
+      Type *ITy = I.getType();
+      switch (I.getOpcode()) {
       case Instruction::Add:
       case Instruction::Sub:
       case Instruction::Trunc:
@@ -1318,9 +1326,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       case Instruction::FRem:
         // Floating-point binary operation: be conservative.
         if (Op0LV.isUndefined() && Op1LV.isUndefined())
-          markForcedConstant(I, Constant::getNullValue(ITy));
+          markForcedConstant(&I, Constant::getNullValue(ITy));
         else
-          markOverdefined(I);
+          markOverdefined(&I);
         return true;
       case Instruction::ZExt:
       case Instruction::SExt:
@@ -1332,7 +1340,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       case Instruction::SIToFP:
       case Instruction::UIToFP:
         // undef -> 0; some outputs are impossible
-        markForcedConstant(I, Constant::getNullValue(ITy));
+        markForcedConstant(&I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Mul:
       case Instruction::And:
@@ -1341,7 +1349,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
           break;
         // undef * X -> 0.   X could be zero.
         // undef & X -> 0.   X could be zero.
-        markForcedConstant(I, Constant::getNullValue(ITy));
+        markForcedConstant(&I, Constant::getNullValue(ITy));
         return true;
 
       case Instruction::Or:
@@ -1349,7 +1357,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         if (Op0LV.isUndefined() && Op1LV.isUndefined())
           break;
         // undef | X -> -1.   X could be -1.
-        markForcedConstant(I, Constant::getAllOnesValue(ITy));
+        markForcedConstant(&I, Constant::getAllOnesValue(ITy));
         return true;
 
       case Instruction::Xor:
@@ -1357,7 +1365,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // necessary, but we try to be nice to people who expect this
         // behavior in simple cases
         if (Op0LV.isUndefined() && Op1LV.isUndefined()) {
-          markForcedConstant(I, Constant::getNullValue(ITy));
+          markForcedConstant(&I, Constant::getNullValue(ITy));
           return true;
         }
         // undef ^ X -> undef
@@ -1373,7 +1381,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
 
         // undef / X -> 0.   X could be maxint.
         // undef % X -> 0.   X could be 1.
-        markForcedConstant(I, Constant::getNullValue(ITy));
+        markForcedConstant(&I, Constant::getNullValue(ITy));
         return true;
 
       case Instruction::AShr:
@@ -1381,7 +1389,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         if (Op1LV.isUndefined()) break;
 
         // undef >>a X -> all ones
-        markForcedConstant(I, Constant::getAllOnesValue(ITy));
+        markForcedConstant(&I, Constant::getAllOnesValue(ITy));
         return true;
       case Instruction::LShr:
       case Instruction::Shl:
@@ -1391,17 +1399,17 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
 
         // undef << X -> 0
         // undef >> X -> 0
-        markForcedConstant(I, Constant::getNullValue(ITy));
+        markForcedConstant(&I, Constant::getNullValue(ITy));
         return true;
       case Instruction::Select:
-        Op1LV = getValueState(I->getOperand(1));
+        Op1LV = getValueState(I.getOperand(1));
         // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
         if (Op0LV.isUndefined()) {
           if (!Op1LV.isConstant())  // Pick the constant one if there is any.
-            Op1LV = getValueState(I->getOperand(2));
+            Op1LV = getValueState(I.getOperand(2));
         } else if (Op1LV.isUndefined()) {
           // c ? undef : undef -> undef.  No change.
-          Op1LV = getValueState(I->getOperand(2));
+          Op1LV = getValueState(I.getOperand(2));
           if (Op1LV.isUndefined())
             break;
           // Otherwise, c ? undef : x -> x.
@@ -1410,9 +1418,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         }
 
         if (Op1LV.isConstant())
-          markForcedConstant(I, Op1LV.getConstant());
+          markForcedConstant(&I, Op1LV.getConstant());
         else
-          markOverdefined(I);
+          markOverdefined(&I);
         return true;
       case Instruction::Load:
         // A load here means one of two things: a load of undef from a global,
@@ -1421,9 +1429,9 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         break;
       case Instruction::ICmp:
         // X == undef -> undef.  Other comparisons get more complicated.
-        if (cast<ICmpInst>(I)->isEquality())
+        if (cast<ICmpInst>(&I)->isEquality())
           break;
-        markOverdefined(I);
+        markOverdefined(&I);
         return true;
       case Instruction::Call:
       case Instruction::Invoke: {
@@ -1432,19 +1440,19 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
         // 2. It could be constant-foldable.
         // Because of the way we solve return values, tracked calls must
         // never be marked overdefined in ResolvedUndefsIn.
-        if (Function *F = CallSite(I).getCalledFunction())
+        if (Function *F = CallSite(&I).getCalledFunction())
           if (TrackedRetVals.count(F))
             break;
 
         // If the call is constant-foldable, we mark it overdefined because
         // we do not know what return values are valid.
-        markOverdefined(I);
+        markOverdefined(&I);
         return true;
       }
       default:
         // If we don't know what should happen here, conservatively mark it
         // overdefined.
-        markOverdefined(I);
+        markOverdefined(&I);
         return true;
       }
     }
@@ -1462,7 +1470,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // false.
       if (isa<UndefValue>(BI->getCondition())) {
         BI->setCondition(ConstantInt::getFalse(BI->getContext()));
-        markEdgeExecutable(BB, TI->getSuccessor(1));
+        markEdgeExecutable(&*BB, TI->getSuccessor(1));
         return true;
       }
 
@@ -1484,7 +1492,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // the first constant.
       if (isa<UndefValue>(SI->getCondition())) {
         SI->setCondition(SI->case_begin().getCaseValue());
-        markEdgeExecutable(BB, SI->case_begin().getCaseSuccessor());
+        markEdgeExecutable(&*BB, SI->case_begin().getCaseSuccessor());
         return true;
       }
 
@@ -1506,6 +1514,7 @@ namespace {
   struct SCCP : public FunctionPass {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<TargetLibraryInfoWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
     }
     static char ID; // Pass identification, replacement for typeid
     SCCP() : FunctionPass(ID) {
@@ -1541,11 +1550,10 @@ static void DeleteInstructionInBlock(BasicBlock *BB) {
   Instruction *EndInst = BB->getTerminator(); // Last not to be deleted.
   while (EndInst != BB->begin()) {
     // Delete the next to last instruction.
-    BasicBlock::iterator I = EndInst;
-    Instruction *Inst = --I;
+    Instruction *Inst = &*--EndInst->getIterator();
     if (!Inst->use_empty())
       Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
-    if (isa<LandingPadInst>(Inst)) {
+    if (Inst->isEHPad()) {
       EndInst = Inst;
       continue;
     }
@@ -1568,11 +1576,11 @@ bool SCCP::runOnFunction(Function &F) {
   SCCPSolver Solver(DL, TLI);
 
   // Mark the first block of the function as being executable.
-  Solver.MarkBlockExecutable(F.begin());
+  Solver.MarkBlockExecutable(&F.front());
 
   // Mark all arguments to the function as being overdefined.
-  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
-    Solver.markAnythingOverdefined(AI);
+  for (Argument &AI : F.args())
+    Solver.markAnythingOverdefined(&AI);
 
   // Solve for constants.
   bool ResolvedUndefs = true;
@@ -1589,8 +1597,8 @@ bool SCCP::runOnFunction(Function &F) {
   // as we cannot modify the CFG of the function.
 
   for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-    if (!Solver.isBlockExecutable(BB)) {
-      DeleteInstructionInBlock(BB);
+    if (!Solver.isBlockExecutable(&*BB)) {
+      DeleteInstructionInBlock(&*BB);
       MadeChanges = true;
       continue;
     }
@@ -1599,7 +1607,7 @@ bool SCCP::runOnFunction(Function &F) {
     // constants if we have found them to be of constant values.
     //
     for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
-      Instruction *Inst = BI++;
+      Instruction *Inst = &*BI++;
       if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
         continue;
 
@@ -1713,36 +1721,34 @@ bool IPSCCP::runOnModule(Module &M) {
     // If this is a strong or ODR definition of this function, then we can
     // propagate information about its result into callsites of it.
     if (!F->mayBeOverridden())
-      Solver.AddTrackedFunction(F);
+      Solver.AddTrackedFunction(&*F);
 
     // If this function only has direct calls that we can see, we can track its
     // arguments and return value aggressively, and can assume it is not called
     // unless we see evidence to the contrary.
     if (F->hasLocalLinkage()) {
-      if (AddressIsTaken(F))
-        AddressTakenFunctions.insert(F);
+      if (AddressIsTaken(&*F))
+        AddressTakenFunctions.insert(&*F);
       else {
-        Solver.AddArgumentTrackedFunction(F);
+        Solver.AddArgumentTrackedFunction(&*F);
         continue;
       }
     }
 
     // Assume the function is called.
-    Solver.MarkBlockExecutable(F->begin());
+    Solver.MarkBlockExecutable(&F->front());
 
     // Assume nothing about the incoming arguments.
-    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
-         AI != E; ++AI)
-      Solver.markAnythingOverdefined(AI);
+    for (Argument &AI : F->args())
+      Solver.markAnythingOverdefined(&AI);
   }
 
   // Loop over global variables.  We inform the solver about any internal global
   // variables that do not have their 'addresses taken'.  If they don't have
   // their addresses taken, we can propagate constants through them.
-  for (Module::global_iterator G = M.global_begin(), E = M.global_end();
-       G != E; ++G)
-    if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
-      Solver.TrackValueOfGlobalVariable(G);
+  for (GlobalVariable &G : M.globals())
+    if (!G.isConstant() && G.hasLocalLinkage() && !AddressIsTaken(&G))
+      Solver.TrackValueOfGlobalVariable(&G);
 
   // Solve for constants.
   bool ResolvedUndefs = true;
@@ -1763,7 +1769,10 @@ bool IPSCCP::runOnModule(Module &M) {
   SmallVector<BasicBlock*, 512> BlocksToErase;
 
   for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
-    if (Solver.isBlockExecutable(F->begin())) {
+    if (F->isDeclaration())
+      continue;
+
+    if (Solver.isBlockExecutable(&F->front())) {
       for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
            AI != E; ++AI) {
         if (AI->use_empty() || AI->getType()->isStructTy()) continue;
@@ -1771,7 +1780,7 @@ bool IPSCCP::runOnModule(Module &M) {
         // TODO: Could use getStructLatticeValueFor to find out if the entire
         // result is a constant and replace it entirely if so.
 
-        LatticeVal IV = Solver.getLatticeValueFor(AI);
+        LatticeVal IV = Solver.getLatticeValueFor(&*AI);
         if (IV.isOverdefined()) continue;
 
         Constant *CST = IV.isConstant() ?
@@ -1786,28 +1795,27 @@ bool IPSCCP::runOnModule(Module &M) {
     }
 
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
-      if (!Solver.isBlockExecutable(BB)) {
-        DeleteInstructionInBlock(BB);
+      if (!Solver.isBlockExecutable(&*BB)) {
+        DeleteInstructionInBlock(&*BB);
         MadeChanges = true;
 
         TerminatorInst *TI = BB->getTerminator();
-        for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
-          BasicBlock *Succ = TI->getSuccessor(i);
+        for (BasicBlock *Succ : TI->successors()) {
           if (!Succ->empty() && isa<PHINode>(Succ->begin()))
-            TI->getSuccessor(i)->removePredecessor(BB);
+            Succ->removePredecessor(&*BB);
         }
         if (!TI->use_empty())
           TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
         TI->eraseFromParent();
-        new UnreachableInst(M.getContext(), BB);
+        new UnreachableInst(M.getContext(), &*BB);
 
         if (&*BB != &F->front())
-          BlocksToErase.push_back(BB);
+          BlocksToErase.push_back(&*BB);
         continue;
       }
 
       for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
-        Instruction *Inst = BI++;
+        Instruction *Inst = &*BI++;
         if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
           continue;
 
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 947513a36572..a7361b5fe083 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -23,12 +23,12 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SROA.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/PtrUseVisitor.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -37,8 +37,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
@@ -53,9 +51,9 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TimeValue.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
 
 #if __cplusplus >= 201103L && !defined(NDEBUG)
 // We only use this for a debug check in C++11
@@ -63,6 +61,7 @@
 #endif
 
 using namespace llvm;
+using namespace llvm::sroa;
 
 #define DEBUG_TYPE "sroa"
 
@@ -77,11 +76,6 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
 STATISTIC(NumDeleted, "Number of instructions deleted");
 STATISTIC(NumVectorized, "Number of vectorized aggregates");
 
-/// Hidden option to force the pass to not use DomTree and mem2reg, instead
-/// forming SSA values through the SSAUpdater infrastructure.
-static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
-                                     cl::Hidden);
-
 /// Hidden option to enable randomly shuffling the slices to help uncover
 /// instability in their order.
 static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
@@ -205,7 +199,6 @@ template <typename T> struct isPodLike;
 template <> struct isPodLike<Slice> { static const bool value = true; };
 }
 
-namespace {
 /// \brief Representation of the alloca slices.
 ///
 /// This class represents the slices of an alloca which are formed by its
@@ -213,7 +206,7 @@ namespace {
 /// for the slices used and we reflect that in this structure. The uses are
 /// stored, sorted by increasing beginning offset and with unsplittable slices
 /// starting at a particular offset before splittable slices.
-class AllocaSlices {
+class llvm::sroa::AllocaSlices {
 public:
   /// \brief Construct the slices of a particular alloca.
   AllocaSlices(const DataLayout &DL, AllocaInst &AI);
@@ -253,281 +246,10 @@ public:
     std::inplace_merge(Slices.begin(), SliceI, Slices.end());
   }
 
-  // Forward declare an iterator to befriend it.
+  // Forward declare the iterator and range accessor for walking the
+  // partitions.
   class partition_iterator;
-
-  /// \brief A partition of the slices.
-  ///
-  /// An ephemeral representation for a range of slices which can be viewed as
-  /// a partition of the alloca. This range represents a span of the alloca's
-  /// memory which cannot be split, and provides access to all of the slices
-  /// overlapping some part of the partition.
-  ///
-  /// Objects of this type are produced by traversing the alloca's slices, but
-  /// are only ephemeral and not persistent.
-  class Partition {
-  private:
-    friend class AllocaSlices;
-    friend class AllocaSlices::partition_iterator;
-
-    /// \brief The begining and ending offsets of the alloca for this partition.
-    uint64_t BeginOffset, EndOffset;
-
-    /// \brief The start end end iterators of this partition.
-    iterator SI, SJ;
-
-    /// \brief A collection of split slice tails overlapping the partition.
-    SmallVector<Slice *, 4> SplitTails;
-
-    /// \brief Raw constructor builds an empty partition starting and ending at
-    /// the given iterator.
-    Partition(iterator SI) : SI(SI), SJ(SI) {}
-
-  public:
-    /// \brief The start offset of this partition.
-    ///
-    /// All of the contained slices start at or after this offset.
-    uint64_t beginOffset() const { return BeginOffset; }
-
-    /// \brief The end offset of this partition.
-    ///
-    /// All of the contained slices end at or before this offset.
-    uint64_t endOffset() const { return EndOffset; }
-
-    /// \brief The size of the partition.
-    ///
-    /// Note that this can never be zero.
-    uint64_t size() const {
-      assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
-      return EndOffset - BeginOffset;
-    }
-
-    /// \brief Test whether this partition contains no slices, and merely spans
-    /// a region occupied by split slices.
-    bool empty() const { return SI == SJ; }
-
-    /// \name Iterate slices that start within the partition.
-    /// These may be splittable or unsplittable. They have a begin offset >= the
-    /// partition begin offset.
-    /// @{
-    // FIXME: We should probably define a "concat_iterator" helper and use that
-    // to stitch together pointee_iterators over the split tails and the
-    // contiguous iterators of the partition. That would give a much nicer
-    // interface here. We could then additionally expose filtered iterators for
-    // split, unsplit, and unsplittable splices based on the usage patterns.
-    iterator begin() const { return SI; }
-    iterator end() const { return SJ; }
-    /// @}
-
-    /// \brief Get the sequence of split slice tails.
-    ///
-    /// These tails are of slices which start before this partition but are
-    /// split and overlap into the partition. We accumulate these while forming
-    /// partitions.
-    ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
-  };
-
-  /// \brief An iterator over partitions of the alloca's slices.
-  ///
-  /// This iterator implements the core algorithm for partitioning the alloca's
-  /// slices. It is a forward iterator as we don't support backtracking for
-  /// efficiency reasons, and re-use a single storage area to maintain the
-  /// current set of split slices.
-  ///
-  /// It is templated on the slice iterator type to use so that it can operate
-  /// with either const or non-const slice iterators.
-  class partition_iterator
-      : public iterator_facade_base<partition_iterator,
-                                    std::forward_iterator_tag, Partition> {
-    friend class AllocaSlices;
-
-    /// \brief Most of the state for walking the partitions is held in a class
-    /// with a nice interface for examining them.
-    Partition P;
-
-    /// \brief We need to keep the end of the slices to know when to stop.
-    AllocaSlices::iterator SE;
-
-    /// \brief We also need to keep track of the maximum split end offset seen.
-    /// FIXME: Do we really?
-    uint64_t MaxSplitSliceEndOffset;
-
-    /// \brief Sets the partition to be empty at given iterator, and sets the
-    /// end iterator.
-    partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
-        : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
-      // If not already at the end, advance our state to form the initial
-      // partition.
-      if (SI != SE)
-        advance();
-    }
-
-    /// \brief Advance the iterator to the next partition.
-    ///
-    /// Requires that the iterator not be at the end of the slices.
-    void advance() {
-      assert((P.SI != SE || !P.SplitTails.empty()) &&
-             "Cannot advance past the end of the slices!");
-
-      // Clear out any split uses which have ended.
-      if (!P.SplitTails.empty()) {
-        if (P.EndOffset >= MaxSplitSliceEndOffset) {
-          // If we've finished all splits, this is easy.
-          P.SplitTails.clear();
-          MaxSplitSliceEndOffset = 0;
-        } else {
-          // Remove the uses which have ended in the prior partition. This
-          // cannot change the max split slice end because we just checked that
-          // the prior partition ended prior to that max.
-          P.SplitTails.erase(
-              std::remove_if(
-                  P.SplitTails.begin(), P.SplitTails.end(),
-                  [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
-              P.SplitTails.end());
-          assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
-                             [&](Slice *S) {
-                               return S->endOffset() == MaxSplitSliceEndOffset;
-                             }) &&
-                 "Could not find the current max split slice offset!");
-          assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
-                             [&](Slice *S) {
-                               return S->endOffset() <= MaxSplitSliceEndOffset;
-                             }) &&
-                 "Max split slice end offset is not actually the max!");
-        }
-      }
-
-      // If P.SI is already at the end, then we've cleared the split tail and
-      // now have an end iterator.
-      if (P.SI == SE) {
-        assert(P.SplitTails.empty() && "Failed to clear the split slices!");
-        return;
-      }
-
-      // If we had a non-empty partition previously, set up the state for
-      // subsequent partitions.
-      if (P.SI != P.SJ) {
-        // Accumulate all the splittable slices which started in the old
-        // partition into the split list.
-        for (Slice &S : P)
-          if (S.isSplittable() && S.endOffset() > P.EndOffset) {
-            P.SplitTails.push_back(&S);
-            MaxSplitSliceEndOffset =
-                std::max(S.endOffset(), MaxSplitSliceEndOffset);
-          }
-
-        // Start from the end of the previous partition.
-        P.SI = P.SJ;
-
-        // If P.SI is now at the end, we at most have a tail of split slices.
-        if (P.SI == SE) {
-          P.BeginOffset = P.EndOffset;
-          P.EndOffset = MaxSplitSliceEndOffset;
-          return;
-        }
-
-        // If the we have split slices and the next slice is after a gap and is
-        // not splittable immediately form an empty partition for the split
-        // slices up until the next slice begins.
-        if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
-            !P.SI->isSplittable()) {
-          P.BeginOffset = P.EndOffset;
-          P.EndOffset = P.SI->beginOffset();
-          return;
-        }
-      }
-
-      // OK, we need to consume new slices. Set the end offset based on the
-      // current slice, and step SJ past it. The beginning offset of the
-      // parttion is the beginning offset of the next slice unless we have
-      // pre-existing split slices that are continuing, in which case we begin
-      // at the prior end offset.
-      P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
-      P.EndOffset = P.SI->endOffset();
-      ++P.SJ;
-
-      // There are two strategies to form a partition based on whether the
-      // partition starts with an unsplittable slice or a splittable slice.
-      if (!P.SI->isSplittable()) {
-        // When we're forming an unsplittable region, it must always start at
-        // the first slice and will extend through its end.
-        assert(P.BeginOffset == P.SI->beginOffset());
-
-        // Form a partition including all of the overlapping slices with this
-        // unsplittable slice.
-        while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
-          if (!P.SJ->isSplittable())
-            P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
-          ++P.SJ;
-        }
-
-        // We have a partition across a set of overlapping unsplittable
-        // partitions.
-        return;
-      }
-
-      // If we're starting with a splittable slice, then we need to form
-      // a synthetic partition spanning it and any other overlapping splittable
-      // splices.
-      assert(P.SI->isSplittable() && "Forming a splittable partition!");
-
-      // Collect all of the overlapping splittable slices.
-      while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
-             P.SJ->isSplittable()) {
-        P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
-        ++P.SJ;
-      }
-
-      // Back upiP.EndOffset if we ended the span early when encountering an
-      // unsplittable slice. This synthesizes the early end offset of
-      // a partition spanning only splittable slices.
-      if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
-        assert(!P.SJ->isSplittable());
-        P.EndOffset = P.SJ->beginOffset();
-      }
-    }
-
-  public:
-    bool operator==(const partition_iterator &RHS) const {
-      assert(SE == RHS.SE &&
-             "End iterators don't match between compared partition iterators!");
-
-      // The observed positions of partitions is marked by the P.SI iterator and
-      // the emptyness of the split slices. The latter is only relevant when
-      // P.SI == SE, as the end iterator will additionally have an empty split
-      // slices list, but the prior may have the same P.SI and a tail of split
-      // slices.
-      if (P.SI == RHS.P.SI &&
-          P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
-        assert(P.SJ == RHS.P.SJ &&
-               "Same set of slices formed two different sized partitions!");
-        assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
-               "Same slice position with differently sized non-empty split "
-               "slice tails!");
-        return true;
-      }
-      return false;
-    }
-
-    partition_iterator &operator++() {
-      advance();
-      return *this;
-    }
-
-    Partition &operator*() { return P; }
-  };
-
-  /// \brief A forward range over the partitions of the alloca's slices.
-  ///
-  /// This accesses an iterator range over the partitions of the alloca's
-  /// slices. It computes these partitions on the fly based on the overlapping
-  /// offsets of the slices and the ability to split them. It will visit "empty"
-  /// partitions to cover regions of the alloca only accessed via split
-  /// slices.
-  iterator_range<partition_iterator> partitions() {
-    return make_range(partition_iterator(begin(), end()),
-                      partition_iterator(end(), end()));
-  }
+  iterator_range<partition_iterator> partitions();
 
   /// \brief Access the dead users for this alloca.
   ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
@@ -595,6 +317,280 @@ private:
   /// the alloca.
   SmallVector<Use *, 8> DeadOperands;
 };
+
+/// \brief A partition of the slices.
+///
+/// An ephemeral representation for a range of slices which can be viewed as
+/// a partition of the alloca. This range represents a span of the alloca's
+/// memory which cannot be split, and provides access to all of the slices
+/// overlapping some part of the partition.
+///
+/// Objects of this type are produced by traversing the alloca's slices, but
+/// are only ephemeral and not persistent.
+class llvm::sroa::Partition {
+private:
+  friend class AllocaSlices;
+  friend class AllocaSlices::partition_iterator;
+
+  typedef AllocaSlices::iterator iterator;
+
+  /// \brief The beginning and ending offsets of the alloca for this
+  /// partition.
+  uint64_t BeginOffset, EndOffset;
+
+  /// \brief The start end end iterators of this partition.
+  iterator SI, SJ;
+
+  /// \brief A collection of split slice tails overlapping the partition.
+  SmallVector<Slice *, 4> SplitTails;
+
+  /// \brief Raw constructor builds an empty partition starting and ending at
+  /// the given iterator.
+  Partition(iterator SI) : SI(SI), SJ(SI) {}
+
+public:
+  /// \brief The start offset of this partition.
+  ///
+  /// All of the contained slices start at or after this offset.
+  uint64_t beginOffset() const { return BeginOffset; }
+
+  /// \brief The end offset of this partition.
+  ///
+  /// All of the contained slices end at or before this offset.
+  uint64_t endOffset() const { return EndOffset; }
+
+  /// \brief The size of the partition.
+  ///
+  /// Note that this can never be zero.
+  uint64_t size() const {
+    assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
+    return EndOffset - BeginOffset;
+  }
+
+  /// \brief Test whether this partition contains no slices, and merely spans
+  /// a region occupied by split slices.
+  bool empty() const { return SI == SJ; }
+
+  /// \name Iterate slices that start within the partition.
+  /// These may be splittable or unsplittable. They have a begin offset >= the
+  /// partition begin offset.
+  /// @{
+  // FIXME: We should probably define a "concat_iterator" helper and use that
+  // to stitch together pointee_iterators over the split tails and the
+  // contiguous iterators of the partition. That would give a much nicer
+  // interface here. We could then additionally expose filtered iterators for
+  // split, unsplit, and unsplittable splices based on the usage patterns.
+  iterator begin() const { return SI; }
+  iterator end() const { return SJ; }
+  /// @}
+
+  /// \brief Get the sequence of split slice tails.
+  ///
+  /// These tails are of slices which start before this partition but are
+  /// split and overlap into the partition. We accumulate these while forming
+  /// partitions.
+  ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
+};
+
+/// \brief An iterator over partitions of the alloca's slices.
+///
+/// This iterator implements the core algorithm for partitioning the alloca's
+/// slices. It is a forward iterator as we don't support backtracking for
+/// efficiency reasons, and re-use a single storage area to maintain the
+/// current set of split slices.
+///
+/// It is templated on the slice iterator type to use so that it can operate
+/// with either const or non-const slice iterators.
+class AllocaSlices::partition_iterator
+    : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
+                                  Partition> {
+  friend class AllocaSlices;
+
+  /// \brief Most of the state for walking the partitions is held in a class
+  /// with a nice interface for examining them.
+  Partition P;
+
+  /// \brief We need to keep the end of the slices to know when to stop.
+  AllocaSlices::iterator SE;
+
+  /// \brief We also need to keep track of the maximum split end offset seen.
+  /// FIXME: Do we really?
+  uint64_t MaxSplitSliceEndOffset;
+
+  /// \brief Sets the partition to be empty at given iterator, and sets the
+  /// end iterator.
+  partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
+      : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
+    // If not already at the end, advance our state to form the initial
+    // partition.
+    if (SI != SE)
+      advance();
+  }
+
+  /// \brief Advance the iterator to the next partition.
+  ///
+  /// Requires that the iterator not be at the end of the slices.
+  void advance() {
+    assert((P.SI != SE || !P.SplitTails.empty()) &&
+           "Cannot advance past the end of the slices!");
+
+    // Clear out any split uses which have ended.
+    if (!P.SplitTails.empty()) {
+      if (P.EndOffset >= MaxSplitSliceEndOffset) {
+        // If we've finished all splits, this is easy.
+        P.SplitTails.clear();
+        MaxSplitSliceEndOffset = 0;
+      } else {
+        // Remove the uses which have ended in the prior partition. This
+        // cannot change the max split slice end because we just checked that
+        // the prior partition ended prior to that max.
+        P.SplitTails.erase(
+            std::remove_if(
+                P.SplitTails.begin(), P.SplitTails.end(),
+                [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
+            P.SplitTails.end());
+        assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
+                           [&](Slice *S) {
+                             return S->endOffset() == MaxSplitSliceEndOffset;
+                           }) &&
+               "Could not find the current max split slice offset!");
+        assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
+                           [&](Slice *S) {
+                             return S->endOffset() <= MaxSplitSliceEndOffset;
+                           }) &&
+               "Max split slice end offset is not actually the max!");
+      }
+    }
+
+    // If P.SI is already at the end, then we've cleared the split tail and
+    // now have an end iterator.
+    if (P.SI == SE) {
+      assert(P.SplitTails.empty() && "Failed to clear the split slices!");
+      return;
+    }
+
+    // If we had a non-empty partition previously, set up the state for
+    // subsequent partitions.
+    if (P.SI != P.SJ) {
+      // Accumulate all the splittable slices which started in the old
+      // partition into the split list.
+      for (Slice &S : P)
+        if (S.isSplittable() && S.endOffset() > P.EndOffset) {
+          P.SplitTails.push_back(&S);
+          MaxSplitSliceEndOffset =
+              std::max(S.endOffset(), MaxSplitSliceEndOffset);
+        }
+
+      // Start from the end of the previous partition.
+      P.SI = P.SJ;
+
+      // If P.SI is now at the end, we at most have a tail of split slices.
+      if (P.SI == SE) {
+        P.BeginOffset = P.EndOffset;
+        P.EndOffset = MaxSplitSliceEndOffset;
+        return;
+      }
+
+      // If the we have split slices and the next slice is after a gap and is
+      // not splittable immediately form an empty partition for the split
+      // slices up until the next slice begins.
+      if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
+          !P.SI->isSplittable()) {
+        P.BeginOffset = P.EndOffset;
+        P.EndOffset = P.SI->beginOffset();
+        return;
+      }
+    }
+
+    // OK, we need to consume new slices. Set the end offset based on the
+    // current slice, and step SJ past it. The beginning offset of the
+    // partition is the beginning offset of the next slice unless we have
+    // pre-existing split slices that are continuing, in which case we begin
+    // at the prior end offset.
+    P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
+    P.EndOffset = P.SI->endOffset();
+    ++P.SJ;
+
+    // There are two strategies to form a partition based on whether the
+    // partition starts with an unsplittable slice or a splittable slice.
+    if (!P.SI->isSplittable()) {
+      // When we're forming an unsplittable region, it must always start at
+      // the first slice and will extend through its end.
+      assert(P.BeginOffset == P.SI->beginOffset());
+
+      // Form a partition including all of the overlapping slices with this
+      // unsplittable slice.
+      while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+        if (!P.SJ->isSplittable())
+          P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+        ++P.SJ;
+      }
+
+      // We have a partition across a set of overlapping unsplittable
+      // partitions.
+      return;
+    }
+
+    // If we're starting with a splittable slice, then we need to form
+    // a synthetic partition spanning it and any other overlapping splittable
+    // splices.
+    assert(P.SI->isSplittable() && "Forming a splittable partition!");
+
+    // Collect all of the overlapping splittable slices.
+    while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
+           P.SJ->isSplittable()) {
+      P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
+      ++P.SJ;
+    }
+
+    // Back upiP.EndOffset if we ended the span early when encountering an
+    // unsplittable slice. This synthesizes the early end offset of
+    // a partition spanning only splittable slices.
+    if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
+      assert(!P.SJ->isSplittable());
+      P.EndOffset = P.SJ->beginOffset();
+    }
+  }
+
+public:
+  bool operator==(const partition_iterator &RHS) const {
+    assert(SE == RHS.SE &&
+           "End iterators don't match between compared partition iterators!");
+
+    // The observed positions of partitions is marked by the P.SI iterator and
+    // the emptiness of the split slices. The latter is only relevant when
+    // P.SI == SE, as the end iterator will additionally have an empty split
+    // slices list, but the prior may have the same P.SI and a tail of split
+    // slices.
+    if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
+      assert(P.SJ == RHS.P.SJ &&
+             "Same set of slices formed two different sized partitions!");
+      assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
+             "Same slice position with differently sized non-empty split "
+             "slice tails!");
+      return true;
+    }
+    return false;
+  }
+
+  partition_iterator &operator++() {
+    advance();
+    return *this;
+  }
+
+  Partition &operator*() { return P; }
+};
+
+/// \brief A forward range over the partitions of the alloca's slices.
+///
+/// This accesses an iterator range over the partitions of the alloca's
+/// slices. It computes these partitions on the fly based on the overlapping
+/// offsets of the slices and the ability to split them. It will visit "empty"
+/// partitions to cover regions of the alloca only accessed via split
+/// slices.
+iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
+  return make_range(partition_iterator(begin(), end()),
+                    partition_iterator(end(), end()));
 }
 
 static Value *foldSelectInst(SelectInst &SI) {
@@ -1072,217 +1068,6 @@ LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
 
 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 
-namespace {
-/// \brief Implementation of LoadAndStorePromoter for promoting allocas.
-///
-/// This subclass of LoadAndStorePromoter adds overrides to handle promoting
-/// the loads and stores of an alloca instruction, as well as updating its
-/// debug information. This is used when a domtree is unavailable and thus
-/// mem2reg in its full form can't be used to handle promotion of allocas to
-/// scalar values.
-class AllocaPromoter : public LoadAndStorePromoter {
-  AllocaInst &AI;
-  DIBuilder &DIB;
-
-  SmallVector<DbgDeclareInst *, 4> DDIs;
-  SmallVector<DbgValueInst *, 4> DVIs;
-
-public:
-  AllocaPromoter(ArrayRef<const Instruction *> Insts,
-                 SSAUpdater &S,
-                 AllocaInst &AI, DIBuilder &DIB)
-      : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
-
-  void run(const SmallVectorImpl<Instruction *> &Insts) {
-    // Retain the debug information attached to the alloca for use when
-    // rewriting loads and stores.
-    if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
-      if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
-        for (User *U : DINode->users())
-          if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
-            DDIs.push_back(DDI);
-          else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
-            DVIs.push_back(DVI);
-      }
-    }
-
-    LoadAndStorePromoter::run(Insts);
-
-    // While we have the debug information, clear it off of the alloca. The
-    // caller takes care of deleting the alloca.
-    while (!DDIs.empty())
-      DDIs.pop_back_val()->eraseFromParent();
-    while (!DVIs.empty())
-      DVIs.pop_back_val()->eraseFromParent();
-  }
-
-  bool
-  isInstInList(Instruction *I,
-               const SmallVectorImpl<Instruction *> &Insts) const override {
-    Value *Ptr;
-    if (LoadInst *LI = dyn_cast<LoadInst>(I))
-      Ptr = LI->getOperand(0);
-    else
-      Ptr = cast<StoreInst>(I)->getPointerOperand();
-
-    // Only used to detect cycles, which will be rare and quickly found as
-    // we're walking up a chain of defs rather than down through uses.
-    SmallPtrSet<Value *, 4> Visited;
-
-    do {
-      if (Ptr == &AI)
-        return true;
-
-      if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
-        Ptr = BCI->getOperand(0);
-      else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
-        Ptr = GEPI->getPointerOperand();
-      else
-        return false;
-
-    } while (Visited.insert(Ptr).second);
-
-    return false;
-  }
-
-  void updateDebugInfo(Instruction *Inst) const override {
-    for (DbgDeclareInst *DDI : DDIs)
-      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
-        ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
-      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
-        ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
-    for (DbgValueInst *DVI : DVIs) {
-      Value *Arg = nullptr;
-      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
-        // If an argument is zero extended then use argument directly. The ZExt
-        // may be zapped by an optimization pass in future.
-        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
-          Arg = dyn_cast<Argument>(ZExt->getOperand(0));
-        else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
-          Arg = dyn_cast<Argument>(SExt->getOperand(0));
-        if (!Arg)
-          Arg = SI->getValueOperand();
-      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
-        Arg = LI->getPointerOperand();
-      } else {
-        continue;
-      }
-      DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
-                                  DVI->getExpression(), DVI->getDebugLoc(),
-                                  Inst);
-    }
-  }
-};
-} // end anon namespace
-
-namespace {
-/// \brief An optimization pass providing Scalar Replacement of Aggregates.
-///
-/// This pass takes allocations which can be completely analyzed (that is, they
-/// don't escape) and tries to turn them into scalar SSA values. There are
-/// a few steps to this process.
-///
-/// 1) It takes allocations of aggregates and analyzes the ways in which they
-///    are used to try to split them into smaller allocations, ideally of
-///    a single scalar data type. It will split up memcpy and memset accesses
-///    as necessary and try to isolate individual scalar accesses.
-/// 2) It will transform accesses into forms which are suitable for SSA value
-///    promotion. This can be replacing a memset with a scalar store of an
-///    integer value, or it can involve speculating operations on a PHI or
-///    select to be a PHI or select of the results.
-/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
-///    onto insert and extract operations on a vector value, and convert them to
-///    this form. By doing so, it will enable promotion of vector aggregates to
-///    SSA vector values.
-class SROA : public FunctionPass {
-  const bool RequiresDomTree;
-
-  LLVMContext *C;
-  DominatorTree *DT;
-  AssumptionCache *AC;
-
-  /// \brief Worklist of alloca instructions to simplify.
-  ///
-  /// Each alloca in the function is added to this. Each new alloca formed gets
-  /// added to it as well to recursively simplify unless that alloca can be
-  /// directly promoted. Finally, each time we rewrite a use of an alloca other
-  /// the one being actively rewritten, we add it back onto the list if not
-  /// already present to ensure it is re-visited.
-  SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
-
-  /// \brief A collection of instructions to delete.
-  /// We try to batch deletions to simplify code and make things a bit more
-  /// efficient.
-  SetVector<Instruction *, SmallVector<Instruction *, 8>> DeadInsts;
-
-  /// \brief Post-promotion worklist.
-  ///
-  /// Sometimes we discover an alloca which has a high probability of becoming
-  /// viable for SROA after a round of promotion takes place. In those cases,
-  /// the alloca is enqueued here for re-processing.
-  ///
-  /// Note that we have to be very careful to clear allocas out of this list in
-  /// the event they are deleted.
-  SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
-
-  /// \brief A collection of alloca instructions we can directly promote.
-  std::vector<AllocaInst *> PromotableAllocas;
-
-  /// \brief A worklist of PHIs to speculate prior to promoting allocas.
-  ///
-  /// All of these PHIs have been checked for the safety of speculation and by
-  /// being speculated will allow promoting allocas currently in the promotable
-  /// queue.
-  SetVector<PHINode *, SmallVector<PHINode *, 2>> SpeculatablePHIs;
-
-  /// \brief A worklist of select instructions to speculate prior to promoting
-  /// allocas.
-  ///
-  /// All of these select instructions have been checked for the safety of
-  /// speculation and by being speculated will allow promoting allocas
-  /// currently in the promotable queue.
-  SetVector<SelectInst *, SmallVector<SelectInst *, 2>> SpeculatableSelects;
-
-public:
-  SROA(bool RequiresDomTree = true)
-      : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
-        DT(nullptr) {
-    initializeSROAPass(*PassRegistry::getPassRegistry());
-  }
-  bool runOnFunction(Function &F) override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-  const char *getPassName() const override { return "SROA"; }
-  static char ID;
-
-private:
-  friend class PHIOrSelectSpeculator;
-  friend class AllocaSliceRewriter;
-
-  bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
-  AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
-                               AllocaSlices::Partition &P);
-  bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
-  bool runOnAlloca(AllocaInst &AI);
-  void clobberUse(Use &U);
-  void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
-  bool promoteAllocas(Function &F);
-};
-}
-
-char SROA::ID = 0;
-
-FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
-  return new SROA(RequiresDomTree);
-}
-
-INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
-                      false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
-                    false)
-
 /// Walk the range of a partitioning looking for a common type to cover this
 /// sequence of slices.
 static Type *findCommonType(AllocaSlices::const_iterator B,
@@ -1373,7 +1158,7 @@ static bool isSafePHIToSpeculate(PHINode &PN) {
 
     // Ensure that there are no instructions between the PHI and the load that
     // could store.
-    for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
+    for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
       if (BBI->mayWriteToMemory())
         return false;
 
@@ -1934,10 +1719,10 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
 
 /// \brief Test whether the given slice use can be promoted to a vector.
 ///
-/// This function is called to test each entry in a partioning which is slated
+/// This function is called to test each entry in a partition which is slated
 /// for a single slice.
-static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
-                                            const Slice &S, VectorType *Ty,
+static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
+                                            VectorType *Ty,
                                             uint64_t ElementSize,
                                             const DataLayout &DL) {
   // First validate the slice offsets.
@@ -2012,8 +1797,7 @@ static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
 /// SSA value. We only can ensure this for a limited set of operations, and we
 /// don't want to do the rewrites unless we are confident that the result will
 /// be promotable, so we have an early test here.
-static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
-                                           const DataLayout &DL) {
+static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   // Collect the candidate types for vector-based promotion. Also track whether
   // we have different element types.
   SmallVector<VectorType *, 4> CandidateTys;
@@ -2130,7 +1914,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
   uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
 
   // We can't reasonably handle cases where the load or store extends past
-  // the end of the aloca's type and into its padding.
+  // the end of the alloca's type and into its padding.
   if (RelEnd > Size)
     return false;
 
@@ -2199,7 +1983,7 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
 /// This is a quick test to check whether we can rewrite the integer loads and
 /// stores to a particular alloca into wider loads and stores and be able to
 /// promote the resulting alloca.
-static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy,
+static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
                                     const DataLayout &DL) {
   uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
   // Don't create integer types larger than the maximum bitwidth.
@@ -2368,14 +2152,14 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
   return V;
 }
 
-namespace {
 /// \brief Visitor to rewrite instructions using p particular slice of an alloca
 /// to use a new alloca.
 ///
 /// Also implements the rewriting to vector-based accesses when the partition
 /// passes the isVectorPromotionViable predicate. Most of the rewriting logic
 /// lives here.
-class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
+class llvm::sroa::AllocaSliceRewriter
+    : public InstVisitor<AllocaSliceRewriter, bool> {
   // Befriend the base class so it can delegate to private visit methods.
   friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
   typedef llvm::InstVisitor<AllocaSliceRewriter, bool> Base;
@@ -2583,9 +2367,19 @@ private:
     V = convertValue(DL, IRB, V, IntTy);
     assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
     uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
-    if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
-      V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
-                         "extract");
+    if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
+      IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
+      V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
+    }
+    // It is possible that the extracted type is not the load type. This
+    // happens if there is a load past the end of the alloca, and as
+    // a consequence the slice is narrower but still a candidate for integer
+    // lowering. To handle this case, we just zero extend the extracted
+    // integer.
+    assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
+           "Can only handle an extract for an overly wide load");
+    if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
+      V = IRB.CreateZExt(V, LI.getType());
     return V;
   }
 
@@ -2648,7 +2442,7 @@ private:
                  DL.getTypeStoreSizeInBits(LI.getType()) &&
              "Non-byte-multiple bit width");
       // Move the insertion point just past the load so that we can refer to it.
-      IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
+      IRB.SetInsertPoint(&*std::next(BasicBlock::iterator(&LI)));
       // Create a placeholder value with the same type as LI to use as the
       // basis for the new value. This allows us to replace the uses of LI with
       // the computed value, and then replace the placeholder with LI, leaving
@@ -3126,7 +2920,7 @@ private:
     // dominate the PHI.
     IRBuilderTy PtrBuilder(IRB);
     if (isa<PHINode>(OldPtr))
-      PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
+      PtrBuilder.SetInsertPoint(&*OldPtr->getParent()->getFirstInsertionPt());
     else
       PtrBuilder.SetInsertPoint(OldPtr);
     PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
@@ -3169,7 +2963,6 @@ private:
     return true;
   }
 };
-}
 
 namespace {
 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
@@ -3181,8 +2974,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
   // Befriend the base class so it can delegate to private visit methods.
   friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
 
-  const DataLayout &DL;
-
   /// Queue of pointer uses to analyze and potentially rewrite.
   SmallVector<Use *, 8> Queue;
 
@@ -3194,8 +2985,6 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
   Use *U;
 
 public:
-  AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
-
   /// Rewrite loads and stores through a pointer and all pointers derived from
   /// it.
   bool rewrite(Instruction &I) {
@@ -3711,7 +3500,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                        return true;
                      }),
       Stores.end());
-  // Now we have to go *back* through all te stores, because a later store may
+  // Now we have to go *back* through all the stores, because a later store may
   // have caused an earlier store's load to become unsplittable and if it is
   // unsplittable for the later store, then we can't rely on it being split in
   // the earlier store either.
@@ -3773,7 +3562,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
            "Cannot represent alloca access size using 64-bit integers!");
 
     Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
-    IRB.SetInsertPoint(BasicBlock::iterator(LI));
+    IRB.SetInsertPoint(LI);
 
     DEBUG(dbgs() << "  Splitting load: " << *LI << "\n");
 
@@ -3825,7 +3614,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       }
 
       Value *StoreBasePtr = SI->getPointerOperand();
-      IRB.SetInsertPoint(BasicBlock::iterator(SI));
+      IRB.SetInsertPoint(SI);
 
       DEBUG(dbgs() << "    Splitting store of load: " << *SI << "\n");
 
@@ -3914,7 +3703,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       if (SplitLoads) {
         PLoad = (*SplitLoads)[Idx];
       } else {
-        IRB.SetInsertPoint(BasicBlock::iterator(LI));
+        IRB.SetInsertPoint(LI);
         PLoad = IRB.CreateAlignedLoad(
             getAdjustedPtr(IRB, DL, LoadBasePtr,
                            APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3924,7 +3713,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
       }
 
       // And store this partition.
-      IRB.SetInsertPoint(BasicBlock::iterator(SI));
+      IRB.SetInsertPoint(SI);
       StoreInst *PStore = IRB.CreateAlignedStore(
           PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
                                 APInt(DL.getPointerSizeInBits(), PartOffset),
@@ -3972,7 +3761,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 
     // Mark the original store as dead now that we've split it up and kill its
     // slice. Note that we leave the original load in place unless this store
-    // was its ownly use. It may in turn be split up if it is an alloca load
+    // was its only use. It may in turn be split up if it is an alloca load
     // for some other alloca, but it may be a normal load. This may introduce
     // redundant loads, but where those can be merged the rest of the optimizer
     // should handle the merging, and this uncovers SSA splits which is more
@@ -4024,7 +3813,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
 /// at enabling promotion and if it was successful queues the alloca to be
 /// promoted.
 AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
-                                   AllocaSlices::Partition &P) {
+                                   Partition &P) {
   // Try to compute a friendly type for this partition of the alloca. This
   // won't always succeed, in which case we fall back to a legal integer type
   // or an i8 array of an appropriate size.
@@ -4230,12 +4019,11 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
       std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
 
   // Migrate debug information from the old alloca to the new alloca(s)
-  // and the individial partitions.
+  // and the individual partitions.
   if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
     auto *Var = DbgDecl->getVariable();
     auto *Expr = DbgDecl->getExpression();
-    DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
-                  /*AllowUnresolved*/ false);
+    DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
     bool IsSplit = Pieces.size() > 1;
     for (auto Piece : Pieces) {
       // Create a piece expression describing the new partition or reuse AI's
@@ -4308,7 +4096,7 @@ bool SROA::runOnAlloca(AllocaInst &AI) {
 
   // First, split any FCA loads and stores touching this alloca to promote
   // better splitting and promotion opportunities.
-  AggLoadStoreRewriter AggRewriter(DL);
+  AggLoadStoreRewriter AggRewriter;
   Changed |= AggRewriter.rewrite(AI);
 
   // Build the slices using a recursive instruction-visiting builder.
@@ -4388,107 +4176,29 @@ void SROA::deleteDeadInstructions(
   }
 }
 
-static void enqueueUsersInWorklist(Instruction &I,
-                                   SmallVectorImpl<Instruction *> &Worklist,
-                                   SmallPtrSetImpl<Instruction *> &Visited) {
-  for (User *U : I.users())
-    if (Visited.insert(cast<Instruction>(U)).second)
-      Worklist.push_back(cast<Instruction>(U));
-}
-
 /// \brief Promote the allocas, using the best available technique.
 ///
 /// This attempts to promote whatever allocas have been identified as viable in
 /// the PromotableAllocas list. If that list is empty, there is nothing to do.
-/// If there is a domtree available, we attempt to promote using the full power
-/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
-/// based on the SSAUpdater utilities. This function returns whether any
-/// promotion occurred.
+/// This function returns whether any promotion occurred.
 bool SROA::promoteAllocas(Function &F) {
   if (PromotableAllocas.empty())
     return false;
 
   NumPromoted += PromotableAllocas.size();
 
-  if (DT && !ForceSSAUpdater) {
-    DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
-    PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
-    PromotableAllocas.clear();
-    return true;
-  }
-
-  DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
-  SSAUpdater SSA;
-  DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
-  SmallVector<Instruction *, 64> Insts;
-
-  // We need a worklist to walk the uses of each alloca.
-  SmallVector<Instruction *, 8> Worklist;
-  SmallPtrSet<Instruction *, 8> Visited;
-  SmallVector<Instruction *, 32> DeadInsts;
-
-  for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
-    AllocaInst *AI = PromotableAllocas[Idx];
-    Insts.clear();
-    Worklist.clear();
-    Visited.clear();
-
-    enqueueUsersInWorklist(*AI, Worklist, Visited);
-
-    while (!Worklist.empty()) {
-      Instruction *I = Worklist.pop_back_val();
-
-      // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
-      // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
-      // leading to them) here. Eventually it should use them to optimize the
-      // scalar values produced.
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-        assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
-               II->getIntrinsicID() == Intrinsic::lifetime_end);
-        II->eraseFromParent();
-        continue;
-      }
-
-      // Push the loads and stores we find onto the list. SROA will already
-      // have validated that all loads and stores are viable candidates for
-      // promotion.
-      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-        assert(LI->getType() == AI->getAllocatedType());
-        Insts.push_back(LI);
-        continue;
-      }
-      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-        assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
-        Insts.push_back(SI);
-        continue;
-      }
-
-      // For everything else, we know that only no-op bitcasts and GEPs will
-      // make it this far, just recurse through them and recall them for later
-      // removal.
-      DeadInsts.push_back(I);
-      enqueueUsersInWorklist(*I, Worklist, Visited);
-    }
-    AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
-    while (!DeadInsts.empty())
-      DeadInsts.pop_back_val()->eraseFromParent();
-    AI->eraseFromParent();
-  }
-
+  DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+  PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
   PromotableAllocas.clear();
   return true;
 }
 
-bool SROA::runOnFunction(Function &F) {
-  if (skipOptnoneFunction(F))
-    return false;
-
+PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
+                                AssumptionCache &RunAC) {
   DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
   C = &F.getContext();
-  DominatorTreeWrapperPass *DTWP =
-      getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  DT = &RunDT;
+  AC = &RunAC;
 
   BasicBlock &EntryBB = F.getEntryBlock();
   for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
@@ -4527,12 +4237,55 @@ bool SROA::runOnFunction(Function &F) {
     PostPromotionWorklist.clear();
   } while (!Worklist.empty());
 
-  return Changed;
+  // FIXME: Even when promoting allocas we should preserve some abstract set of
+  // CFG-specific analyses.
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
 }
 
-void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<AssumptionCacheTracker>();
-  if (RequiresDomTree)
-    AU.addRequired<DominatorTreeWrapperPass>();
-  AU.setPreservesCFG();
+PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> *AM) {
+  return runImpl(F, AM->getResult<DominatorTreeAnalysis>(F),
+                 AM->getResult<AssumptionAnalysis>(F));
 }
+
+/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
+///
+/// This is in the llvm namespace purely to allow it to be a friend of the \c
+/// SROA pass.
+class llvm::sroa::SROALegacyPass : public FunctionPass {
+  /// The SROA implementation.
+  SROA Impl;
+
+public:
+  SROALegacyPass() : FunctionPass(ID) {
+    initializeSROALegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnFunction(Function &F) override {
+    if (skipOptnoneFunction(F))
+      return false;
+
+    auto PA = Impl.runImpl(
+        F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+        getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
+    return !PA.areAllPreserved();
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.setPreservesCFG();
+  }
+
+  const char *getPassName() const override { return "SROA"; }
+  static char ID;
+};
+
+char SROALegacyPass::ID = 0;
+
+FunctionPass *llvm::createSROAPass() { return new SROALegacyPass(); }
+
+INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
+                      "Scalar Replacement Of Aggregates", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
+                    false, false)
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index d5d360571f88..52d477cc9573 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -16,7 +16,10 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm-c/Initialization.h"
 #include "llvm-c/Transforms/Scalar.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
@@ -27,10 +30,9 @@ using namespace llvm;
 /// initializeScalarOptsPasses - Initialize all passes linked into the
 /// ScalarOpts library.
 void llvm::initializeScalarOpts(PassRegistry &Registry) {
-  initializeADCEPass(Registry);
+  initializeADCELegacyPassPass(Registry);
   initializeBDCEPass(Registry);
   initializeAlignmentFromAssumptionsPass(Registry);
-  initializeSampleProfileLoaderPass(Registry);
   initializeConstantHoistingPass(Registry);
   initializeConstantPropagationPass(Registry);
   initializeCorrelatedValuePropagationPass(Registry);
@@ -66,7 +68,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeRewriteStatepointsForGCPass(Registry);
   initializeSCCPPass(Registry);
   initializeIPSCCPPass(Registry);
-  initializeSROAPass(Registry);
+  initializeSROALegacyPassPass(Registry);
   initializeSROA_DTPass(Registry);
   initializeSROA_SSAUpPass(Registry);
   initializeCFGSimplifyPassPass(Registry);
@@ -81,6 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializePlaceSafepointsPass(Registry);
   initializeFloat2IntPass(Registry);
   initializeLoopDistributePass(Registry);
+  initializeLoopLoadEliminationPass(Registry);
 }
 
 void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -225,15 +228,15 @@ void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createTypeBasedAliasAnalysisPass());
+  unwrap(PM)->add(createTypeBasedAAWrapperPass());
 }
 
 void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createScopedNoAliasAAPass());
+  unwrap(PM)->add(createScopedNoAliasAAWrapperPass());
 }
 
 void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createBasicAliasAnalysisPass());
+  unwrap(PM)->add(createBasicAAWrapperPass());
 }
 
 void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index d955da7ce75d..114d22ddf2e4 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -60,6 +60,7 @@ STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
 STATISTIC(NumConverted, "Number of aggregates converted to scalar");
 
 namespace {
+#define SROA SROA_
   struct SROA : public FunctionPass {
     SROA(int T, bool hasDT, char &ID, int ST, int AT, int SLT)
       : FunctionPass(ID), HasDomTree(hasDT) {
@@ -382,8 +383,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
     // Create and insert the integer alloca.
     NewTy = IntegerType::get(AI->getContext(), BitWidth);
   }
-  AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
-                                     AI->getParent()->begin());
+  AllocaInst *NewAI =
+      new AllocaInst(NewTy, nullptr, "", &AI->getParent()->front());
   ConvertUsesToScalar(AI, NewAI, 0, nullptr);
   return NewAI;
 }
@@ -1195,7 +1196,7 @@ static bool isSafePHIToSpeculate(PHINode *PN) {
 
     // Ensure that there are no instructions between the PHI and the load that
     // could store.
-    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+    for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
       if (BBI->mayWriteToMemory())
         return false;
 
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 049300350857..054bacdc706b 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -253,10 +253,10 @@ bool Scalarizer::doInitialization(Module &M) {
 }
 
 bool Scalarizer::runOnFunction(Function &F) {
-  for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
-    BasicBlock *BB = BBI;
-    for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
-      Instruction *I = II;
+  assert(Gathered.empty() && Scattered.empty());
+  for (BasicBlock &BB : F) {
+    for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
+      Instruction *I = &*II;
       bool Done = visit(I);
       ++II;
       if (Done && I->getType()->isVoidTy())
@@ -285,7 +285,7 @@ Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
   }
   // In the fallback case, just put the scattered before Point and
   // keep the result local to Point.
-  return Scatterer(Point->getParent(), Point, V);
+  return Scatterer(Point->getParent(), Point->getIterator(), V);
 }
 
 // Replace Op with the gathered form of the components in CV.  Defer the
@@ -377,7 +377,7 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
     return false;
 
   unsigned NumElems = VT->getNumElements();
-  IRBuilder<> Builder(I.getParent(), &I);
+  IRBuilder<> Builder(&I);
   Scatterer Op0 = scatter(&I, I.getOperand(0));
   Scatterer Op1 = scatter(&I, I.getOperand(1));
   assert(Op0.size() == NumElems && "Mismatched binary operation");
@@ -397,7 +397,7 @@ bool Scalarizer::visitSelectInst(SelectInst &SI) {
     return false;
 
   unsigned NumElems = VT->getNumElements();
-  IRBuilder<> Builder(SI.getParent(), &SI);
+  IRBuilder<> Builder(&SI);
   Scatterer Op1 = scatter(&SI, SI.getOperand(1));
   Scatterer Op2 = scatter(&SI, SI.getOperand(2));
   assert(Op1.size() == NumElems && "Mismatched select");
@@ -438,7 +438,7 @@ bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
   if (!VT)
     return false;
 
-  IRBuilder<> Builder(GEPI.getParent(), &GEPI);
+  IRBuilder<> Builder(&GEPI);
   unsigned NumElems = VT->getNumElements();
   unsigned NumIndices = GEPI.getNumIndices();
 
@@ -472,7 +472,7 @@ bool Scalarizer::visitCastInst(CastInst &CI) {
     return false;
 
   unsigned NumElems = VT->getNumElements();
-  IRBuilder<> Builder(CI.getParent(), &CI);
+  IRBuilder<> Builder(&CI);
   Scatterer Op0 = scatter(&CI, CI.getOperand(0));
   assert(Op0.size() == NumElems && "Mismatched cast");
   ValueVector Res;
@@ -492,7 +492,7 @@ bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
 
   unsigned DstNumElems = DstVT->getNumElements();
   unsigned SrcNumElems = SrcVT->getNumElements();
-  IRBuilder<> Builder(BCI.getParent(), &BCI);
+  IRBuilder<> Builder(&BCI);
   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
   ValueVector Res;
   Res.resize(DstNumElems);
@@ -569,7 +569,7 @@ bool Scalarizer::visitPHINode(PHINode &PHI) {
     return false;
 
   unsigned NumElems = VT->getNumElements();
-  IRBuilder<> Builder(PHI.getParent(), &PHI);
+  IRBuilder<> Builder(&PHI);
   ValueVector Res;
   Res.resize(NumElems);
 
@@ -600,7 +600,7 @@ bool Scalarizer::visitLoadInst(LoadInst &LI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
-  IRBuilder<> Builder(LI.getParent(), &LI);
+  IRBuilder<> Builder(&LI);
   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
   ValueVector Res;
   Res.resize(NumElems);
@@ -625,7 +625,7 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
     return false;
 
   unsigned NumElems = Layout.VecTy->getNumElements();
-  IRBuilder<> Builder(SI.getParent(), &SI);
+  IRBuilder<> Builder(&SI);
   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
   Scatterer Val = scatter(&SI, FullValue);
 
@@ -642,7 +642,9 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
 // Delete the instructions that we scalarized.  If a full vector result
 // is still needed, recreate it using InsertElements.
 bool Scalarizer::finish() {
-  if (Gathered.empty())
+  // The presence of data in Gathered or Scattered indicates changes
+  // made to the Function.
+  if (Gathered.empty() && Scattered.empty())
     return false;
   for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
        GMI != GME; ++GMI) {
@@ -655,7 +657,7 @@ bool Scalarizer::finish() {
       Value *Res = UndefValue::get(Ty);
       BasicBlock *BB = Op->getParent();
       unsigned Count = Ty->getVectorNumElements();
-      IRBuilder<> Builder(BB, Op);
+      IRBuilder<> Builder(Op);
       if (isa<PHINode>(Op))
         Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
       for (unsigned I = 0; I < Count; ++I)
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4a875311881a..86a10d2a1612 100644
--- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -156,6 +156,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
@@ -164,6 +168,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
@@ -174,6 +179,7 @@
 #include "llvm/IR/IRBuilder.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
     "disable-separate-const-offset-from-gep", cl::init(false),
@@ -319,8 +325,11 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 
   bool doInitialization(Module &M) override {
@@ -373,15 +382,42 @@ private:
   ///
   /// Verified in @i32_add in split-gep.ll
   bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+  /// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
+  /// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
+  /// the constant offset. After extraction, it becomes desirable to reunion the
+  /// distributed sexts. For example,
+  ///
+  ///                              &a[sext(i +nsw (j +nsw 5)]
+  ///   => distribute              &a[sext(i) +nsw (sext(j) +nsw 5)]
+  ///   => constant extraction     &a[sext(i) + sext(j)] + 5
+  ///   => reunion                 &a[sext(i +nsw j)] + 5
+  bool reuniteExts(Function &F);
+  /// A helper that reunites sexts in an instruction.
+  bool reuniteExts(Instruction *I);
+  /// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
+  Instruction *findClosestMatchingDominator(const SCEV *Key,
+                                            Instruction *Dominatee);
   /// Verify F is free of dead code.
   void verifyNoDeadCode(Function &F);
 
+  bool hasMoreThanOneUseInLoop(Value *v, Loop *L);
+  // Swap the index operand of two GEP.
+  void swapGEPOperand(GetElementPtrInst *First, GetElementPtrInst *Second);
+  // Check if it is safe to swap operand of two GEP.
+  bool isLegalToSwapOperand(GetElementPtrInst *First, GetElementPtrInst *Second,
+                            Loop *CurLoop);
+
   const DataLayout *DL;
-  const DominatorTree *DT;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
   const TargetMachine *TM;
+
+  LoopInfo *LI;
+  TargetLibraryInfo *TLI;
   /// Whether to lower a GEP with multiple indices into arithmetic operations or
   /// multiple GEPs with a single index.
   bool LowerGEP;
+  DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingExprs;
 };
 }  // anonymous namespace
 
@@ -391,7 +427,10 @@ INITIALIZE_PASS_BEGIN(
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
     false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(
     SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
     "Split GEPs to a variadic base and a constant offset for better CSE", false,
@@ -734,6 +773,13 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
   Type *I8PtrTy =
       Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
   Value *ResultPtr = Variadic->getOperand(0);
+  Loop *L = LI->getLoopFor(Variadic->getParent());
+  // Check if the base is not loop invariant or used more than once.
+  bool isSwapCandidate =
+      L && L->isLoopInvariant(ResultPtr) &&
+      !hasMoreThanOneUseInLoop(ResultPtr, L);
+  Value *FirstResult = nullptr;
+
   if (ResultPtr->getType() != I8PtrTy)
     ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
 
@@ -762,6 +808,8 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
       // Create an ugly GEP with a single index for each index.
       ResultPtr =
           Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Idx, "uglygep");
+      if (FirstResult == nullptr)
+        FirstResult = ResultPtr;
     }
   }
 
@@ -770,7 +818,17 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
     Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
     ResultPtr =
         Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
-  }
+  } else
+    isSwapCandidate = false;
+
+  // If we created a GEP with constant index, and the base is loop invariant,
+  // then we swap the first one with it, so LICM can move constant GEP out
+  // later.
+  GetElementPtrInst *FirstGEP = dyn_cast<GetElementPtrInst>(FirstResult);
+  GetElementPtrInst *SecondGEP = dyn_cast<GetElementPtrInst>(ResultPtr);
+  if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
+    swapGEPOperand(FirstGEP, SecondGEP);
+
   if (ResultPtr->getType() != Variadic->getType())
     ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
 
@@ -891,13 +949,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // Clear the inbounds attribute because the new index may be off-bound.
   // e.g.,
   //
-  // b = add i64 a, 5
-  // addr = gep inbounds float* p, i64 b
+  //   b     = add i64 a, 5
+  //   addr  = gep inbounds float, float* p, i64 b
   //
   // is transformed to:
   //
-  // addr2 = gep float* p, i64 a
-  // addr = gep float* addr2, i64 5
+  //   addr2 = gep float, float* p, i64 a ; inbounds removed
+  //   addr  = gep inbounds float, float* addr2, i64 5
   //
   // If a is -4, although the old index b is in bounds, the new index a is
   // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
@@ -907,6 +965,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   //
   // TODO(jingyue): do some range analysis to keep as many inbounds as
   // possible. GEPs with inbounds are more friendly to alias analysis.
+  bool GEPWasInBounds = GEP->isInBounds();
   GEP->setIsInBounds(false);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
@@ -968,6 +1027,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
     NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
                                        ConstantInt::get(IntPtrTy, Index, true),
                                        GEP->getName(), GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
   } else {
     // Unlikely but possible. For example,
     // #pragma pack(1)
@@ -990,6 +1051,8 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
         Type::getInt8Ty(GEP->getContext()), NewGEP,
         ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true), "uglygep",
         GEP);
+    // Inherit the inbounds attribute of the original GEP.
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
     if (GEP->getType() != I8PtrTy)
       NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
   }
@@ -1008,24 +1071,96 @@ bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
     return false;
 
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   bool Changed = false;
   for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
-    for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+    for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE;)
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++))
         Changed |= splitGEP(GEP);
-      }
-      // No need to split GEP ConstantExprs because all its indices are constant
-      // already.
-    }
+    // No need to split GEP ConstantExprs because all its indices are constant
+    // already.
   }
 
+  Changed |= reuniteExts(F);
+
   if (VerifyNoDeadCode)
     verifyNoDeadCode(F);
 
   return Changed;
 }
 
+Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
+    const SCEV *Key, Instruction *Dominatee) {
+  auto Pos = DominatingExprs.find(Key);
+  if (Pos == DominatingExprs.end())
+    return nullptr;
+
+  auto &Candidates = Pos->second;
+  // Because we process the basic blocks in pre-order of the dominator tree, a
+  // candidate that doesn't dominate the current instruction won't dominate any
+  // future instruction either. Therefore, we pop it out of the stack. This
+  // optimization makes the algorithm O(n).
+  while (!Candidates.empty()) {
+    Instruction *Candidate = Candidates.back();
+    if (DT->dominates(Candidate, Dominatee))
+      return Candidate;
+    Candidates.pop_back();
+  }
+  return nullptr;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  //   Dom: LHS+RHS
+  //   I: sext(LHS)+sext(RHS)
+  // If Dom can't sign overflow and Dom dominates I, optimize I to sext(Dom).
+  // TODO: handle zext
+  Value *LHS = nullptr, *RHS = nullptr;
+  if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS)))) ||
+      match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
+    if (LHS->getType() == RHS->getType()) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      if (auto *Dom = findClosestMatchingDominator(Key, I)) {
+        Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
+        NewSExt->takeName(I);
+        I->replaceAllUsesWith(NewSExt);
+        RecursivelyDeleteTriviallyDeadInstructions(I);
+        return true;
+      }
+    }
+  }
+
+  // Add I to DominatingExprs if it's an add/sub that can't sign overflow.
+  if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||
+      match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
+    if (isKnownNotFullPoison(I)) {
+      const SCEV *Key =
+          SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+      DominatingExprs[Key].push_back(I);
+    }
+  }
+  return false;
+}
+
+bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
+  bool Changed = false;
+  DominatingExprs.clear();
+  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
+       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+    BasicBlock *BB = Node->getBlock();
+    for (auto I = BB->begin(); I != BB->end(); ) {
+      Instruction *Cur = &*I++;
+      Changed |= reuniteExts(Cur);
+    }
+  }
+  return Changed;
+}
+
 void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
   for (auto &B : F) {
     for (auto &I : B) {
@@ -1038,3 +1173,93 @@ void SeparateConstOffsetFromGEP::verifyNoDeadCode(Function &F) {
     }
   }
 }
+
+bool SeparateConstOffsetFromGEP::isLegalToSwapOperand(
+    GetElementPtrInst *FirstGEP, GetElementPtrInst *SecondGEP, Loop *CurLoop) {
+  if (!FirstGEP || !FirstGEP->hasOneUse())
+    return false;
+
+  if (!SecondGEP || FirstGEP->getParent() != SecondGEP->getParent())
+    return false;
+
+  if (FirstGEP == SecondGEP)
+    return false;
+
+  unsigned FirstNum = FirstGEP->getNumOperands();
+  unsigned SecondNum = SecondGEP->getNumOperands();
+  // Give up if the number of operands are not 2.
+  if (FirstNum != SecondNum || FirstNum != 2)
+    return false;
+
+  Value *FirstBase = FirstGEP->getOperand(0);
+  Value *SecondBase = SecondGEP->getOperand(0);
+  Value *FirstOffset = FirstGEP->getOperand(1);
+  // Give up if the index of the first GEP is loop invariant.
+  if (CurLoop->isLoopInvariant(FirstOffset))
+    return false;
+
+  // Give up if base doesn't have same type.
+  if (FirstBase->getType() != SecondBase->getType())
+    return false;
+
+  Instruction *FirstOffsetDef = dyn_cast<Instruction>(FirstOffset);
+
+  // Check if the second operand of first GEP has constant coefficient.
+  // For an example, for the following code,  we won't gain anything by
+  // hoisting the second GEP out because the second GEP can be folded away.
+  //   %scevgep.sum.ur159 = add i64 %idxprom48.ur, 256
+  //   %67 = shl i64 %scevgep.sum.ur159, 2
+  //   %uglygep160 = getelementptr i8* %65, i64 %67
+  //   %uglygep161 = getelementptr i8* %uglygep160, i64 -1024
+
+  // Skip constant shift instruction which may be generated by Splitting GEPs.
+  if (FirstOffsetDef && FirstOffsetDef->isShift() &&
+      isa<ConstantInt>(FirstOffsetDef->getOperand(1)))
+    FirstOffsetDef = dyn_cast<Instruction>(FirstOffsetDef->getOperand(0));
+
+  // Give up if FirstOffsetDef is an Add or Sub with constant.
+  // Because it may not profitable at all due to constant folding.
+  if (FirstOffsetDef)
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FirstOffsetDef)) {
+      unsigned opc = BO->getOpcode();
+      if ((opc == Instruction::Add || opc == Instruction::Sub) &&
+          (isa<ConstantInt>(BO->getOperand(0)) ||
+           isa<ConstantInt>(BO->getOperand(1))))
+        return false;
+    }
+  return true;
+}
+
+bool SeparateConstOffsetFromGEP::hasMoreThanOneUseInLoop(Value *V, Loop *L) {
+  int UsesInLoop = 0;
+  for (User *U : V->users()) {
+    if (Instruction *User = dyn_cast<Instruction>(U))
+      if (L->contains(User))
+        if (++UsesInLoop > 1)
+          return true;
+  }
+  return false;
+}
+
+void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
+                                                GetElementPtrInst *Second) {
+  Value *Offset1 = First->getOperand(1);
+  Value *Offset2 = Second->getOperand(1);
+  First->setOperand(1, Offset2);
+  Second->setOperand(1, Offset1);
+
+  // We changed p+o+c to p+c+o, p+c may not be inbound anymore.
+  const DataLayout &DAL = First->getModule()->getDataLayout();
+  APInt Offset(DAL.getPointerSizeInBits(
+                   cast<PointerType>(First->getType())->getAddressSpace()),
+               0);
+  Value *NewBase =
+      First->stripAndAccumulateInBoundsConstantOffsets(DAL, Offset);
+  uint64_t ObjectSize;
+  if (!getObjectSize(NewBase, ObjectSize, DAL, TLI) ||
+     Offset.ugt(ObjectSize)) {
+    First->setIsInBounds(false);
+    Second->setIsInBounds(false);
+  } else
+    First->setIsInBounds(true);
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 231411a16c05..63c8836bf381 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Attributes.h"
@@ -67,15 +68,14 @@ static bool mergeEmptyReturnBlocks(Function &F) {
     // single PHI node that is the operand to the return.
     if (Ret != &BB.front()) {
       // Check for something else in the block.
-      BasicBlock::iterator I = Ret;
+      BasicBlock::iterator I(Ret);
       --I;
       // Skip over debug info.
       while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
         --I;
       if (!isa<DbgInfoIntrinsic>(I) &&
-          (!isa<PHINode>(I) || I != BB.begin() ||
-           Ret->getNumOperands() == 0 ||
-           Ret->getOperand(0) != I))
+          (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 ||
+           Ret->getOperand(0) != &*I))
         continue;
     }
 
@@ -136,7 +136,7 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
 
     // Loop over all of the basic blocks and remove them if they are unneeded.
     for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
-      if (SimplifyCFG(BBIt++, TTI, BonusInstThreshold, AC)) {
+      if (SimplifyCFG(&*BBIt++, TTI, BonusInstThreshold, AC)) {
         LocalChange = true;
         ++NumSimpl;
       }
@@ -217,6 +217,7 @@ struct CFGSimplifyPass : public FunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
   }
 };
 }
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index f49f4eaaedcb..64109b2df117 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -48,7 +48,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       FunctionPass::getAnalysisUsage(AU);
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
@@ -66,7 +66,7 @@ char Sinking::ID = 0;
 INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
 
 FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
@@ -99,7 +99,7 @@ bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
 bool Sinking::runOnFunction(Function &F) {
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  AA = &getAnalysis<AliasAnalysis>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   bool MadeChange, EverMadeChange = false;
 
@@ -119,7 +119,7 @@ bool Sinking::runOnFunction(Function &F) {
 
 bool Sinking::ProcessBlock(BasicBlock &BB) {
   // Can't sink anything out of a block that has less than two successors.
-  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+  if (BB.getTerminator()->getNumSuccessors() <= 1) return false;
 
   // Don't bother sinking code out of unreachable blocks. In addition to being
   // unprofitable, it can also lead to infinite looping, because in an
@@ -134,7 +134,7 @@ bool Sinking::ProcessBlock(BasicBlock &BB) {
   bool ProcessedBegin = false;
   SmallPtrSet<Instruction *, 8> Stores;
   do {
-    Instruction *Inst = I;  // The instruction to sink.
+    Instruction *Inst = &*I; // The instruction to sink.
 
     // Predecrement I (if it's not begin) so that it isn't invalidated by
     // sinking.
@@ -165,14 +165,16 @@ static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
   if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
     MemoryLocation Loc = MemoryLocation::get(L);
     for (Instruction *S : Stores)
-      if (AA->getModRefInfo(S, Loc) & AliasAnalysis::Mod)
+      if (AA->getModRefInfo(S, Loc) & MRI_Mod)
         return false;
   }
 
-  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst) || Inst->isEHPad() ||
+      Inst->mayThrow())
     return false;
 
-  // Convergent operations can only be moved to control equivalent blocks.
+  // Convergent operations cannot be made control-dependent on additional
+  // values.
   if (auto CS = CallSite(Inst)) {
     if (CS.hasFnAttr(Attribute::Convergent))
       return false;
@@ -193,6 +195,11 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
   if (Inst->getParent() == SuccToSinkTo)
     return false;
 
+  // It's never legal to sink an instruction into a block which terminates in an
+  // EH-pad.
+  if (SuccToSinkTo->getTerminator()->isExceptional())
+    return false;
+
   // If the block has multiple predecessors, this would introduce computation
   // on different code paths.  We could split the critical edge, but for now we
   // just punt.
@@ -278,6 +285,6 @@ bool Sinking::SinkInstruction(Instruction *Inst,
         dbgs() << ")\n");
 
   // Move the instruction.
-  Inst->moveBefore(SuccToSinkTo->getFirstInsertionPt());
+  Inst->moveBefore(&*SuccToSinkTo->getFirstInsertionPt());
   return true;
 }
diff --git a/lib/Transforms/Scalar/SpeculativeExecution.cpp b/lib/Transforms/Scalar/SpeculativeExecution.cpp
index ff3f00a2e2f8..147d615488ff 100644
--- a/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -227,7 +227,7 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
     // changes the list that I is iterating through.
     auto Current = I;
     ++I;
-    if (!NotHoisted.count(Current)) {
+    if (!NotHoisted.count(&*Current)) {
       Current->moveBefore(ToBlock.getTerminator());
     }
   }
diff --git a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 6d9d417ef943..1faa65eb3417 100644
--- a/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -131,7 +131,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
-    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     // We do not modify the shape of the CFG.
     AU.setPreservesCFG();
@@ -212,7 +212,7 @@ char StraightLineStrengthReduce::ID = 0;
 INITIALIZE_PASS_BEGIN(StraightLineStrengthReduce, "slsr",
                       "Straight line strength reduction", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(StraightLineStrengthReduce, "slsr",
                     "Straight line strength reduction", false, false)
@@ -234,6 +234,7 @@ bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis,
           Basis.CandidateKind == C.CandidateKind);
 }
 
+// TODO: use TTI->getGEPCost.
 static bool isGEPFoldable(GetElementPtrInst *GEP,
                           const TargetTransformInfo *TTI,
                           const DataLayout *DL) {
@@ -523,7 +524,7 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
       continue;
 
     const SCEV *OrigIndexExpr = IndexExprs[I - 1];
-    IndexExprs[I - 1] = SE->getConstant(OrigIndexExpr->getType(), 0);
+    IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType());
 
     // The base of this candidate is GEP's base plus the offsets of all
     // indices except this current one.
@@ -689,7 +690,7 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
 
   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = &getAnalysis<ScalarEvolution>();
+  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   // Traverse the dominator tree in the depth-first order. This order makes sure
   // all bases of a candidate are in Candidates when we process it.
   for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 4f23e20d251d..662513c7d8ae 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -358,13 +358,9 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) {
     BasicBlock *BB = N->getNodeAs<BasicBlock>();
     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
 
-    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-      BasicBlock *Succ = Term->getSuccessor(i);
-
-      if (Visited.count(Succ)) {
+    for (BasicBlock *Succ : Term->successors())
+      if (Visited.count(Succ))
         Loops[Succ] = BB;
-      }
-    }
   }
 }
 
@@ -903,14 +899,14 @@ void StructurizeCFG::rebuildSSA() {
             continue;
         }
 
-        if (DT->dominates(II, User))
+        if (DT->dominates(&*II, User))
           continue;
 
         if (!Initialized) {
           Value *Undef = UndefValue::get(II->getType());
           Updater.Initialize(II->getType(), "");
           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
-          Updater.AddAvailableValue(BB, II);
+          Updater.AddAvailableValue(BB, &*II);
           Initialized = true;
         }
         Updater.RewriteUseAfterInsertions(U);
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index c7de2e2965c7..0e0b00df85bb 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -54,6 +54,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/InlineCost.h"
@@ -136,6 +137,7 @@ FunctionPass *llvm::createTailCallEliminationPass() {
 
 void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
+  AU.addPreserved<GlobalsAAWrapperPass>();
 }
 
 /// \brief Scan the specified function for alloca instructions.
@@ -195,8 +197,8 @@ struct AllocaDerivedValueTracker {
       case Instruction::Call:
       case Instruction::Invoke: {
         CallSite CS(I);
-        bool IsNocapture = !CS.isCallee(U) &&
-                           CS.doesNotCapture(CS.getArgumentNo(U));
+        bool IsNocapture =
+            CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U));
         callUsesLocalStack(CS, IsNocapture);
         if (IsNocapture) {
           // If the alloca-derived argument is passed in as nocapture, then it
@@ -302,7 +304,9 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
       if (!CI || CI->isTailCall())
         continue;
 
-      if (CI->doesNotAccessMemory()) {
+      bool IsNoTail = CI->isNoTailCall();
+
+      if (!IsNoTail && CI->doesNotAccessMemory()) {
         // A call to a readnone function whose arguments are all things computed
         // outside this function can be marked tail. Even if you stored the
         // alloca address into a global, a readnone function can't load the
@@ -330,7 +334,7 @@ bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
         }
       }
 
-      if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
         DeferredTails.push_back(CI);
       } else {
         AllCallsAreTailCalls = false;
@@ -404,7 +408,7 @@ bool TailCallElim::runTRE(Function &F) {
   // Until this is resolved, disable this transformation if that would ever
   // happen.  This bug is PR962.
   for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
-    BasicBlock *BB = BBI++; // FoldReturnAndProcessPred may delete BB.
+    BasicBlock *BB = &*BBI++; // FoldReturnAndProcessPred may delete BB.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
       bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
                                           ArgumentPHIs, !CanTRETailMarkedCall);
@@ -574,7 +578,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
   // Scan backwards from the return, checking to see if there is a tail call in
   // this block.  If so, set CI to it.
   CallInst *CI = nullptr;
-  BasicBlock::iterator BBI = TI;
+  BasicBlock::iterator BBI(TI);
   while (true) {
     CI = dyn_cast<CallInst>(BBI);
     if (CI && CI->getCalledFunction() == F)
@@ -595,9 +599,8 @@ TailCallElim::FindTRECandidate(Instruction *TI,
   // and disable this xform in this case, because the code generator will
   // lower the call to fabs into inline code.
   if (BB == &F->getEntryBlock() &&
-      FirstNonDbg(BB->front()) == CI &&
-      FirstNonDbg(std::next(BB->begin())) == TI &&
-      CI->getCalledFunction() &&
+      FirstNonDbg(BB->front().getIterator()) == CI &&
+      FirstNonDbg(std::next(BB->begin())) == TI && CI->getCalledFunction() &&
       !TTI->isLoweredToCall(CI->getCalledFunction())) {
     // A single-block function with just a call and a return. Check that
     // the arguments match.
@@ -636,19 +639,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
   // tail call if all of the instructions between the call and the return are
   // movable to above the call itself, leaving the call next to the return.
   // Check that this is the case now.
-  BasicBlock::iterator BBI = CI;
+  BasicBlock::iterator BBI(CI);
   for (++BBI; &*BBI != Ret; ++BBI) {
-    if (CanMoveAboveCall(BBI, CI)) continue;
+    if (CanMoveAboveCall(&*BBI, CI)) continue;
 
     // If we can't move the instruction above the call, it might be because it
     // is an associative and commutative operation that could be transformed
     // using accumulator recursion elimination.  Check to see if this is the
     // case, and if so, remember the initial accumulator value for later.
     if ((AccumulatorRecursionEliminationInitVal =
-                           CanTransformAccumulatorRecursion(BBI, CI))) {
+             CanTransformAccumulatorRecursion(&*BBI, CI))) {
       // Yes, this is accumulator recursion.  Remember which instruction
       // accumulates.
-      AccumulatorRecursionInstr = BBI;
+      AccumulatorRecursionInstr = &*BBI;
     } else {
       return false;   // Otherwise, we cannot eliminate the tail recursion!
     }
@@ -698,19 +701,19 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
              NEBI = NewEntry->begin(); OEBI != E; )
         if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
           if (isa<ConstantInt>(AI->getArraySize()))
-            AI->moveBefore(NEBI);
+            AI->moveBefore(&*NEBI);
 
     // Now that we have created a new block, which jumps to the entry
     // block, insert a PHI node for each argument of the function.
     // For now, we initialize each PHI to only have the real arguments
     // which are passed in.
-    Instruction *InsertPos = OldEntry->begin();
+    Instruction *InsertPos = &OldEntry->front();
     for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
          I != E; ++I) {
       PHINode *PN = PHINode::Create(I->getType(), 2,
                                     I->getName() + ".tr", InsertPos);
       I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
-      PN->addIncoming(I, NewEntry);
+      PN->addIncoming(&*I, NewEntry);
       ArgumentPHIs.push_back(PN);
     }
   }
@@ -739,10 +742,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
     Instruction *AccRecInstr = AccumulatorRecursionInstr;
     // Start by inserting a new PHI node for the accumulator.
     pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry);
-    PHINode *AccPN =
-      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
-                      std::distance(PB, PE) + 1,
-                      "accumulator.tr", OldEntry->begin());
+    PHINode *AccPN = PHINode::Create(
+        AccumulatorRecursionEliminationInitVal->getType(),
+        std::distance(PB, PE) + 1, "accumulator.tr", &OldEntry->front());
 
     // Loop over all of the predecessors of the tail recursion block.  For the
     // real entry into the function we seed the PHI with the initial value,
diff --git a/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 03c3a80170a3..409326eba401 100644
--- a/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
 namespace llvm {
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index e9f62391a44f..0262358fa3d5 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,32 +52,34 @@
 // http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 
 using namespace llvm;
 
 #define DEBUG_TYPE "add-discriminators"
 
 namespace {
-  struct AddDiscriminators : public FunctionPass {
-    static char ID; // Pass identification, replacement for typeid
-    AddDiscriminators() : FunctionPass(ID) {
-      initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
-    }
+struct AddDiscriminators : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  AddDiscriminators() : FunctionPass(ID) {
+    initializeAddDiscriminatorsPass(*PassRegistry::getPassRegistry());
+  }
 
-    bool runOnFunction(Function &F) override;
-  };
+  bool runOnFunction(Function &F) override;
+};
 }
 
 char AddDiscriminators::ID = 0;
@@ -89,17 +91,17 @@ INITIALIZE_PASS_END(AddDiscriminators, "add-discriminators",
 // Command line option to disable discriminator generation even in the
 // presence of debug information. This is only needed when debugging
 // debug info generation issues.
-static cl::opt<bool>
-NoDiscriminators("no-discriminators", cl::init(false),
-                 cl::desc("Disable generation of discriminator information."));
+static cl::opt<bool> NoDiscriminators(
+    "no-discriminators", cl::init(false),
+    cl::desc("Disable generation of discriminator information."));
 
 FunctionPass *llvm::createAddDiscriminatorsPass() {
   return new AddDiscriminators();
 }
 
 static bool hasDebugInfo(const Function &F) {
-  NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
-  return CUNodes != nullptr;
+  DISubprogram *S = getDISubprogram(&F);
+  return S != nullptr;
 }
 
 /// \brief Assign DWARF discriminators.
@@ -159,8 +161,7 @@ bool AddDiscriminators::runOnFunction(Function &F) {
   // Simlarly, if the function has no debug info, do nothing.
   // Finally, if this module is built with dwarf versions earlier than 4,
   // do nothing (discriminator support is a DWARF 4 feature).
-  if (NoDiscriminators ||
-      !hasDebugInfo(F) ||
+  if (NoDiscriminators || !hasDebugInfo(F) ||
       F.getParent()->getDwarfVersion() < 4)
     return false;
 
@@ -169,59 +170,77 @@ bool AddDiscriminators::runOnFunction(Function &F) {
   LLVMContext &Ctx = M->getContext();
   DIBuilder Builder(*M, /*AllowUnresolved*/ false);
 
-  // Traverse all the blocks looking for instructions in different
-  // blocks that are at the same file:line location.
-  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
-    BasicBlock *B = I;
-    TerminatorInst *Last = B->getTerminator();
-    const DILocation *LastDIL = Last->getDebugLoc();
-    if (!LastDIL)
-      continue;
+  typedef std::pair<StringRef, unsigned> Location;
+  typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap;
+  typedef DenseMap<Location, BBScopeMap> LocationBBMap;
 
-    for (unsigned I = 0; I < Last->getNumSuccessors(); ++I) {
-      BasicBlock *Succ = Last->getSuccessor(I);
-      Instruction *First = Succ->getFirstNonPHIOrDbgOrLifetime();
-      const DILocation *FirstDIL = First->getDebugLoc();
-      if (!FirstDIL)
+  LocationBBMap LBM;
+
+  // Traverse all instructions in the function. If the source line location
+  // of the instruction appears in other basic block, assign a new
+  // discriminator for this instruction.
+  for (BasicBlock &B : F) {
+    for (auto &I : B.getInstList()) {
+      if (isa<DbgInfoIntrinsic>(&I))
+        continue;
+      const DILocation *DIL = I.getDebugLoc();
+      if (!DIL)
+        continue;
+      Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
+      auto &BBMap = LBM[L];
+      auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr));
+      if (BBMap.size() == 1)
+        continue;
+      bool InsertSuccess = R.second;
+      Metadata *&NewScope = R.first->second;
+      // If we could insert a different block in the same location, a
+      // discriminator is needed to distinguish both instructions.
+      if (InsertSuccess) {
+        auto *Scope = DIL->getScope();
+        auto *File =
+            Builder.createFile(DIL->getFilename(), Scope->getDirectory());
+        NewScope = Builder.createLexicalBlockFile(
+            Scope, File, DIL->computeNewDiscriminator());
+      }
+      I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
+                                    NewScope, DIL->getInlinedAt()));
+      DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+                   << DIL->getColumn() << ":"
+                   << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator()
+                   << I << "\n");
+      Changed = true;
+    }
+  }
+
+  // Traverse all instructions and assign new discriminators to call
+  // instructions with the same lineno that are in the same basic block.
+  // Sample base profile needs to distinguish different function calls within
+  // a same source line for correct profile annotation.
+  for (BasicBlock &B : F) {
+    const DILocation *FirstDIL = NULL;
+    for (auto &I : B.getInstList()) {
+      CallInst *Current = dyn_cast<CallInst>(&I);
+      if (!Current || isa<DbgInfoIntrinsic>(&I))
         continue;
 
-      // If the first instruction (First) of Succ is at the same file
-      // location as B's last instruction (Last), add a new
-      // discriminator for First's location and all the instructions
-      // in Succ that share the same location with First.
-      if (!FirstDIL->canDiscriminate(*LastDIL)) {
-        // Create a new lexical scope and compute a new discriminator
-        // number for it.
-        StringRef Filename = FirstDIL->getFilename();
-        auto *Scope = FirstDIL->getScope();
-        auto *File = Builder.createFile(Filename, Scope->getDirectory());
-
-        // FIXME: Calculate the discriminator here, based on local information,
-        // and delete DILocation::computeNewDiscriminator().  The current
-        // solution gives different results depending on other modules in the
-        // same context.  All we really need is to discriminate between
-        // FirstDIL and LastDIL -- a local map would suffice.
-        unsigned Discriminator = FirstDIL->computeNewDiscriminator();
-        auto *NewScope =
-            Builder.createLexicalBlockFile(Scope, File, Discriminator);
-        auto *NewDIL =
-            DILocation::get(Ctx, FirstDIL->getLine(), FirstDIL->getColumn(),
-                            NewScope, FirstDIL->getInlinedAt());
-        DebugLoc newDebugLoc = NewDIL;
-
-        // Attach this new debug location to First and every
-        // instruction following First that shares the same location.
-        for (BasicBlock::iterator I1(*First), E1 = Succ->end(); I1 != E1;
-             ++I1) {
-          if (I1->getDebugLoc().get() != FirstDIL)
-            break;
-          I1->setDebugLoc(newDebugLoc);
-          DEBUG(dbgs() << NewDIL->getFilename() << ":" << NewDIL->getLine()
-                       << ":" << NewDIL->getColumn() << ":"
-                       << NewDIL->getDiscriminator() << *I1 << "\n");
+      DILocation *CurrentDIL = Current->getDebugLoc();
+      if (FirstDIL) {
+        if (CurrentDIL && CurrentDIL->getLine() == FirstDIL->getLine() &&
+            CurrentDIL->getFilename() == FirstDIL->getFilename()) {
+          auto *Scope = FirstDIL->getScope();
+          auto *File = Builder.createFile(FirstDIL->getFilename(),
+                                          Scope->getDirectory());
+          auto *NewScope = Builder.createLexicalBlockFile(
+              Scope, File, FirstDIL->computeNewDiscriminator());
+          Current->setDebugLoc(DILocation::get(
+              Ctx, CurrentDIL->getLine(), CurrentDIL->getColumn(), NewScope,
+              CurrentDIL->getInlinedAt()));
+          Changed = true;
+        } else {
+          FirstDIL = CurrentDIL;
         }
-        DEBUG(dbgs() << "\n");
-        Changed = true;
+      } else {
+        FirstDIL = CurrentDIL;
       }
     }
   }
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index ef7dacac79cb..a5137e933e83 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -41,8 +41,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 
   // Loop through all of our successors and make sure they know that one
   // of their predecessors is going away.
-  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
-    BBTerm->getSuccessor(i)->removePredecessor(BB);
+  for (BasicBlock *Succ : BBTerm->successors())
+    Succ->removePredecessor(BB);
 
   // Zap all the instructions in the block.
   while (!BB->empty()) {
@@ -65,7 +65,7 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
 /// any single-entry PHI nodes in it, fold them away.  This handles the case
 /// when all entries to the PHI nodes in a block are guaranteed equal, such as
 /// when the block has exactly one predecessor.
-void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,
                                    MemoryDependenceAnalysis *MemDep) {
   if (!isa<PHINode>(BB->begin())) return;
 
@@ -77,8 +77,6 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, AliasAnalysis *AA,
 
     if (MemDep)
       MemDep->removeInstruction(PN);  // Memdep updates AA itself.
-    else if (AA && isa<PointerType>(PN->getType()))
-      AA->deleteValue(PN);
 
     PN->eraseFromParent();
   }
@@ -108,7 +106,7 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {
 /// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
 /// if possible.  The return value indicates success or failure.
 bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
-                                     LoopInfo *LI, AliasAnalysis *AA,
+                                     LoopInfo *LI,
                                      MemoryDependenceAnalysis *MemDep) {
   // Don't merge away blocks who have their address taken.
   if (BB->hasAddressTaken()) return false;
@@ -119,8 +117,9 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
 
   // Don't break self-loops.
   if (PredBB == BB) return false;
-  // Don't break invokes.
-  if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+  // Don't break unwinding instructions.
+  if (PredBB->getTerminator()->isExceptional())
+    return false;
 
   succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
   BasicBlock *OnlySucc = BB;
@@ -145,7 +144,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,
 
   // Begin by getting rid of unneeded PHIs.
   if (isa<PHINode>(BB->front()))
-    FoldSingleEntryPHINodes(BB, AA, MemDep);
+    FoldSingleEntryPHINodes(BB, MemDep);
 
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
@@ -253,7 +252,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT,
     // block.
     assert(SP == BB && "CFG broken");
     SP = nullptr;
-    return SplitBlock(Succ, Succ->begin(), DT, LI);
+    return SplitBlock(Succ, &Succ->front(), DT, LI);
   }
 
   // Otherwise, if BB has a single successor, split it at the bottom of the
@@ -284,8 +283,8 @@ llvm::SplitAllCriticalEdges(Function &F,
 ///
 BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt,
                              DominatorTree *DT, LoopInfo *LI) {
-  BasicBlock::iterator SplitIt = SplitPt;
-  while (isa<PHINode>(SplitIt) || isa<LandingPadInst>(SplitIt))
+  BasicBlock::iterator SplitIt = SplitPt->getIterator();
+  while (isa<PHINode>(SplitIt) || SplitIt->isEHPad())
     ++SplitIt;
   BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
 
@@ -393,7 +392,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
 /// from NewBB. This also updates AliasAnalysis, if available.
 static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
                            ArrayRef<BasicBlock *> Preds, BranchInst *BI,
-                           AliasAnalysis *AA, bool HasLoopExit) {
+                           bool HasLoopExit) {
   // Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
   SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
   for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
@@ -474,17 +473,20 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
 ///
 BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
                                          ArrayRef<BasicBlock *> Preds,
-                                         const char *Suffix, AliasAnalysis *AA,
-                                         DominatorTree *DT, LoopInfo *LI,
-                                         bool PreserveLCSSA) {
+                                         const char *Suffix, DominatorTree *DT,
+                                         LoopInfo *LI, bool PreserveLCSSA) {
+  // Do not attempt to split that which cannot be split.
+  if (!BB->canSplitPredecessors())
+    return nullptr;
+
   // For the landingpads we need to act a bit differently.
   // Delegate this work to the SplitLandingPadPredecessors.
   if (BB->isLandingPad()) {
     SmallVector<BasicBlock*, 2> NewBBs;
     std::string NewName = std::string(Suffix) + ".split-lp";
 
-    SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(),
-                                NewBBs, AA, DT, LI, PreserveLCSSA);
+    SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT,
+                                LI, PreserveLCSSA);
     return NewBBs[0];
   }
 
@@ -523,7 +525,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
                             HasLoopExit);
 
   // Update the PHI nodes in BB with the values coming from NewBB.
-  UpdatePHINodes(BB, NewBB, Preds, BI, AA, HasLoopExit);
+  UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
   return NewBB;
 }
 
@@ -544,8 +546,8 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                                        ArrayRef<BasicBlock *> Preds,
                                        const char *Suffix1, const char *Suffix2,
                                        SmallVectorImpl<BasicBlock *> &NewBBs,
-                                       AliasAnalysis *AA, DominatorTree *DT,
-                                       LoopInfo *LI, bool PreserveLCSSA) {
+                                       DominatorTree *DT, LoopInfo *LI,
+                                       bool PreserveLCSSA) {
   assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!");
 
   // Create a new basic block for OrigBB's predecessors listed in Preds. Insert
@@ -574,7 +576,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                             HasLoopExit);
 
   // Update the PHI nodes in OrigBB with the values coming from NewBB1.
-  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, AA, HasLoopExit);
+  UpdatePHINodes(OrigBB, NewBB1, Preds, BI1, HasLoopExit);
 
   // Move the remaining edges from OrigBB to point to NewBB2.
   SmallVector<BasicBlock*, 8> NewBB2Preds;
@@ -611,7 +613,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
                               PreserveLCSSA, HasLoopExit);
 
     // Update the PHI nodes in OrigBB with the values coming from NewBB2.
-    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, AA, HasLoopExit);
+    UpdatePHINodes(OrigBB, NewBB2, NewBB2Preds, BI2, HasLoopExit);
   }
 
   LandingPadInst *LPad = OrigBB->getLandingPadInst();
@@ -661,7 +663,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
       // return instruction.
       V = BCI->getOperand(0);
       NewBC = BCI->clone();
-      Pred->getInstList().insert(NewRet, NewBC);
+      Pred->getInstList().insert(NewRet->getIterator(), NewBC);
       *i = NewBC;
     }
     if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -707,7 +709,7 @@ TerminatorInst *llvm::SplitBlockAndInsertIfThen(Value *Cond,
                                                 MDNode *BranchWeights,
                                                 DominatorTree *DT) {
   BasicBlock *Head = SplitBefore->getParent();
-  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
   TerminatorInst *HeadOldTerm = Head->getTerminator();
   LLVMContext &C = Head->getContext();
   BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
@@ -757,7 +759,7 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
                                          TerminatorInst **ElseTerm,
                                          MDNode *BranchWeights) {
   BasicBlock *Head = SplitBefore->getParent();
-  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore);
+  BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
   TerminatorInst *HeadOldTerm = Head->getTerminator();
   LLVMContext &C = Head->getContext();
   BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 7e83c9eeceb7..95825991cee9 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -101,10 +101,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,
         continue;
 
     // Otherwise a new PHI is needed. Create one and populate it.
-    PHINode *NewPN =
-      PHINode::Create(PN->getType(), Preds.size(), "split",
-                      SplitBB->isLandingPad() ?
-                      SplitBB->begin() : SplitBB->getTerminator());
+    PHINode *NewPN = PHINode::Create(
+        PN->getType(), Preds.size(), "split",
+        SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());
     for (unsigned i = 0, e = Preds.size(); i != e; ++i)
       NewPN->addIncoming(V, Preds[i]);
 
@@ -141,9 +140,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   BasicBlock *TIBB = TI->getParent();
   BasicBlock *DestBB = TI->getSuccessor(SuccNum);
 
-  // Splitting the critical edge to a landing pad block is non-trivial. Don't do
+  // Splitting the critical edge to a pad block is non-trivial. Don't do
   // it in this generic function.
-  if (DestBB->isLandingPad()) return nullptr;
+  if (DestBB->isEHPad()) return nullptr;
 
   // Create a new basic block, linking it into the CFG.
   BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -157,7 +156,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
 
   // Insert the block into the function... right after the block TI lives in.
   Function &F = *TIBB->getParent();
-  Function::iterator FBBI = TIBB;
+  Function::iterator FBBI = TIBB->getIterator();
   F.getBasicBlockList().insert(++FBBI, NewBB);
 
   // If there are any PHI nodes in DestBB, we need to update them so that they
@@ -197,7 +196,6 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   }
 
   // If we have nothing to update, just return.
-  auto *AA = Options.AA;
   auto *DT = Options.DT;
   auto *LI = Options.LI;
   if (!DT && !LI)
@@ -319,10 +317,9 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
           LoopPreds.push_back(P);
         }
         if (!LoopPreds.empty()) {
-          assert(!DestBB->isLandingPad() &&
-                 "We don't split edges to landing pads!");
+          assert(!DestBB->isEHPad() && "We don't split edges to EH pads!");
           BasicBlock *NewExitBB = SplitBlockPredecessors(
-              DestBB, LoopPreds, "split", AA, DT, LI, Options.PreserveLCSSA);
+              DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA);
           if (Options.PreserveLCSSA)
             createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB);
         }
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 8aa7b2a65ba9..64b44a6b7919 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Function.h"
@@ -21,7 +22,6 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 
 using namespace llvm;
 
@@ -55,32 +55,6 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout &DL,
   return CI;
 }
 
-/// EmitStrNLen - Emit a call to the strnlen function to the builder, for the
-/// specified pointer.  Ptr is required to be some pointer type, MaxLen must
-/// be of size_t type, and the return value has 'intptr_t' type.
-Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
-                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
-  if (!TLI->has(LibFunc::strnlen))
-    return nullptr;
-
-  Module *M = B.GetInsertBlock()->getParent()->getParent();
-  AttributeSet AS[2];
-  AS[0] = AttributeSet::get(M->getContext(), 1, Attribute::NoCapture);
-  Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
-  AS[1] = AttributeSet::get(M->getContext(), AttributeSet::FunctionIndex, AVs);
-
-  LLVMContext &Context = B.GetInsertBlock()->getContext();
-  Constant *StrNLen =
-      M->getOrInsertFunction("strnlen", AttributeSet::get(M->getContext(), AS),
-                             DL.getIntPtrType(Context), B.getInt8PtrTy(),
-                             DL.getIntPtrType(Context), nullptr);
-  CallInst *CI = B.CreateCall(StrNLen, {CastToCStr(Ptr, B), MaxLen}, "strnlen");
-  if (const Function *F = dyn_cast<Function>(StrNLen->stripPointerCasts()))
-    CI->setCallingConv(F->getCallingConv());
-
-  return CI;
-}
-
 /// EmitStrChr - Emit a call to the strchr function to the builder, for the
 /// specified pointer and character.  Ptr is required to be some pointer type,
 /// and the return value has 'i8*' type.
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index f2d5e0745035..0914699a2e38 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -82,7 +82,7 @@ static bool insertFastDiv(Function &F,
                           bool UseSignedOp,
                           DivCacheTy &PerBBDivCache) {
   // Get instruction operands
-  Instruction *Instr = J;
+  Instruction *Instr = &*J;
   Value *Dividend = Instr->getOperand(0);
   Value *Divisor = Instr->getOperand(1);
 
@@ -94,7 +94,7 @@ static bool insertFastDiv(Function &F,
   }
 
   // Basic Block is split before divide
-  BasicBlock *MainBB = I;
+  BasicBlock *MainBB = &*I;
   BasicBlock *SuccessorBB = I->splitBasicBlock(J);
   ++I; //advance iterator I to successorBB
 
@@ -190,7 +190,7 @@ static bool reuseOrInsertFastDiv(Function &F,
                                  bool UseSignedOp,
                                  DivCacheTy &PerBBDivCache) {
   // Get instruction operands
-  Instruction *Instr = J;
+  Instruction *Instr = &*J;
   DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1));
   DivCacheTy::iterator CacheI = PerBBDivCache.find(Key);
 
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 716e655affb9..8308a9b69149 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_library(LLVMTransformUtils
   SimplifyIndVar.cpp
   SimplifyInstructions.cpp
   SimplifyLibCalls.cpp
+  SplitModule.cpp
   SymbolRewriter.cpp
   UnifyFunctionExitNodes.cpp
   Utils.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index cc4d6c6fb192..854a3b855f54 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -52,8 +52,8 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
-    VMap[II] = NewInst;                // Add instruction map to value.
-    
+    VMap[&*II] = NewInst; // Add instruction map to value.
+
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (isa<ConstantInt>(AI->getArraySize()))
@@ -85,9 +85,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   assert(NameSuffix && "NameSuffix cannot be null!");
 
 #ifndef NDEBUG
-  for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
-       E = OldFunc->arg_end(); I != E; ++I)
-    assert(VMap.count(I) && "No mapping from source argument specified!");
+  for (const Argument &I : OldFunc->args())
+    assert(VMap.count(&I) && "No mapping from source argument specified!");
 #endif
 
   // Copy all attributes other than those stored in the AttributeSet.  We need
@@ -96,6 +95,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
   NewFunc->copyAttributesFrom(OldFunc);
   NewFunc->setAttributes(NewAttrs);
 
+  // Fix up the personality function that got copied over.
+  if (OldFunc->hasPersonalityFn())
+    NewFunc->setPersonalityFn(
+        MapValue(OldFunc->getPersonalityFn(), VMap,
+                 ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+                 TypeMapper, Materializer));
+
   AttributeSet OldAttrs = OldFunc->getAttributes();
   // Clone any argument attributes that are present in the VMap.
   for (const Argument &OldArg : OldFunc->args())
@@ -136,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
     if (BB.hasAddressTaken()) {
       Constant *OldBBAddr = BlockAddress::get(const_cast<Function*>(OldFunc),
                                               const_cast<BasicBlock*>(&BB));
-      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);                                         
+      VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB);
     }
 
     // Note return instructions for the caller.
@@ -146,11 +152,13 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 
   // Loop over all of the instructions in the function, fixing up operand
   // references as we go.  This uses VMap to do all the hard work.
-  for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
-         BE = NewFunc->end(); BB != BE; ++BB)
+  for (Function::iterator BB =
+           cast<BasicBlock>(VMap[&OldFunc->front()])->getIterator(),
+                          BE = NewFunc->end();
+       BB != BE; ++BB)
     // Loop over all instructions, fixing each one as we find it...
-    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
-      RemapInstruction(II, VMap,
+    for (Instruction &II : *BB)
+      RemapInstruction(&II, VMap,
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
                        TypeMapper, Materializer);
 }
@@ -187,11 +195,9 @@ static void CloneDebugInfoMetadata(Function *NewFunc, const Function *OldFunc,
   const DISubprogram *OldSubprogramMDNode = FindSubprogram(OldFunc, Finder);
   if (!OldSubprogramMDNode) return;
 
-  // Ensure that OldFunc appears in the map.
-  // (if it's already there it must point to NewFunc anyway)
-  VMap[OldFunc] = NewFunc;
   auto *NewSubprogram =
       cast<DISubprogram>(MapMetadata(OldSubprogramMDNode, VMap));
+  NewFunc->setSubprogram(NewSubprogram);
 
   for (auto *CU : Finder.compile_units()) {
     auto Subprograms = CU->getSubprograms();
@@ -222,10 +228,9 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
   // The user might be deleting arguments to the function by specifying them in
   // the VMap.  If so, we need to not add the arguments to the arg ty vector
   //
-  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-       I != E; ++I)
-    if (VMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
-      ArgTypes.push_back(I->getType());
+  for (const Argument &I : F->args())
+    if (VMap.count(&I) == 0) // Haven't mapped the argument to anything yet?
+      ArgTypes.push_back(I.getType());
 
   // Create a new function type...
   FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
@@ -236,11 +241,10 @@ Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
 
   // Loop over the arguments, copying the names of the mapped arguments over...
   Function::arg_iterator DestI = NewF->arg_begin();
-  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
-       I != E; ++I)
-    if (VMap.count(I) == 0) {   // Is this argument preserved?
-      DestI->setName(I->getName()); // Copy the name over...
-      VMap[I] = DestI++;        // Add mapping to VMap
+  for (const Argument & I : F->args())
+    if (VMap.count(&I) == 0) {     // Is this argument preserved?
+      DestI->setName(I.getName()); // Copy the name over...
+      VMap[&I] = &*DestI++;        // Add mapping to VMap
     }
 
   if (ModuleLevelChanges)
@@ -330,8 +334,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
        II != IE; ++II) {
     // If the "Director" remaps the instruction, don't clone it.
     if (Director) {
-      CloningDirector::CloningAction Action 
-                              = Director->handleInstruction(VMap, II, NewBB);
+      CloningDirector::CloningAction Action =
+          Director->handleInstruction(VMap, &*II, NewBB);
       // If the cloning director says stop, we want to stop everything, not
       // just break out of the loop (which would cause the terminator to be
       // cloned).  The cloning director is responsible for inserting a proper
@@ -365,7 +369,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
         if (Value *MappedV = VMap.lookup(V))
           V = MappedV;
 
-        VMap[II] = V;
+        VMap[&*II] = V;
         delete NewInst;
         continue;
       }
@@ -373,9 +377,15 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
 
     if (II->hasName())
       NewInst->setName(II->getName()+NameSuffix);
-    VMap[II] = NewInst;                // Add instruction map to value.
+    VMap[&*II] = NewInst; // Add instruction map to value.
     NewBB->getInstList().push_back(NewInst);
     hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+
+    if (CodeInfo)
+      if (auto CS = ImmutableCallSite(&*II))
+        if (CS.hasOperandBundles())
+          CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
       if (isa<ConstantInt>(AI->getArraySize()))
         hasStaticAllocas = true;
@@ -400,8 +410,8 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       // If the director says to skip with a terminate instruction, we still
       // need to clone this block's successors.
       const TerminatorInst *TI = NewBB->getTerminator();
-      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-        ToClone.push_back(TI->getSuccessor(i));
+      for (const BasicBlock *Succ : TI->successors())
+        ToClone.push_back(Succ);
       return;
     }
     assert(Action != CloningDirector::SkipInstruction && 
@@ -447,11 +457,16 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
       NewInst->setName(OldTI->getName()+NameSuffix);
     NewBB->getInstList().push_back(NewInst);
     VMap[OldTI] = NewInst;             // Add instruction map to value.
-    
+
+    if (CodeInfo)
+      if (auto CS = ImmutableCallSite(OldTI))
+        if (CS.hasOperandBundles())
+          CodeInfo->OperandBundleCallSites.push_back(NewInst);
+
     // Recursively clone any reachable successor blocks.
     const TerminatorInst *TI = BB->getTerminator();
-    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
-      ToClone.push_back(TI->getSuccessor(i));
+    for (const BasicBlock *Succ : TI->successors())
+      ToClone.push_back(Succ);
   }
   
   if (CodeInfo) {
@@ -484,12 +499,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   }
 
 #ifndef NDEBUG
-  // If the cloning starts at the begining of the function, verify that
+  // If the cloning starts at the beginning of the function, verify that
   // the function arguments are mapped.
   if (!StartingInst)
-    for (Function::const_arg_iterator II = OldFunc->arg_begin(),
-         E = OldFunc->arg_end(); II != E; ++II)
-      assert(VMap.count(II) && "No mapping from source argument specified!");
+    for (const Argument &II : OldFunc->args())
+      assert(VMap.count(&II) && "No mapping from source argument specified!");
 #endif
 
   PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
@@ -499,12 +513,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
     StartingBB = StartingInst->getParent();
   else {
     StartingBB = &OldFunc->getEntryBlock();
-    StartingInst = StartingBB->begin();
+    StartingInst = &StartingBB->front();
   }
 
   // Clone the entry block, and anything recursively reachable from it.
   std::vector<const BasicBlock*> CloneWorklist;
-  PFC.CloneBlock(StartingBB, StartingInst, CloneWorklist);
+  PFC.CloneBlock(StartingBB, StartingInst->getIterator(), CloneWorklist);
   while (!CloneWorklist.empty()) {
     const BasicBlock *BB = CloneWorklist.back();
     CloneWorklist.pop_back();
@@ -517,9 +531,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   //
   // Defer PHI resolution until rest of function is resolved.
   SmallVector<const PHINode*, 16> PHIToResolve;
-  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
-       BI != BE; ++BI) {
-    Value *V = VMap[BI];
+  for (const BasicBlock &BI : *OldFunc) {
+    Value *V = VMap[&BI];
     BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
     if (!NewBB) continue;  // Dead block.
 
@@ -528,7 +541,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
 
     // Handle PHI nodes specially, as we have to remove references to dead
     // blocks.
-    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); I != E; ++I) {
+    for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) {
       // PHI nodes may have been remapped to non-PHI nodes by the caller or
       // during the cloning process.
       if (const PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -621,8 +634,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
       while ((PN = dyn_cast<PHINode>(I++))) {
         Value *NV = UndefValue::get(PN->getType());
         PN->replaceAllUsesWith(NV);
-        assert(VMap[OldI] == PN && "VMap mismatch");
-        VMap[OldI] = NV;
+        assert(VMap[&*OldI] == PN && "VMap mismatch");
+        VMap[&*OldI] = NV;
         PN->eraseFromParent();
         ++OldI;
       }
@@ -644,15 +657,15 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   // and zap unconditional fall-through branches. This happens all the time when
   // specializing code: code specialization turns conditional branches into
   // uncond branches, and this code folds them.
-  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB]);
+  Function::iterator Begin = cast<BasicBlock>(VMap[StartingBB])->getIterator();
   Function::iterator I = Begin;
   while (I != NewFunc->end()) {
     // Check if this block has become dead during inlining or other
     // simplifications. Note that the first block will appear dead, as it has
     // not yet been wired up properly.
-    if (I != Begin && (pred_begin(I) == pred_end(I) ||
-                       I->getSinglePredecessor() == I)) {
-      BasicBlock *DeadBB = I++;
+    if (I != Begin && (pred_begin(&*I) == pred_end(&*I) ||
+                       I->getSinglePredecessor() == &*I)) {
+      BasicBlock *DeadBB = &*I++;
       DeleteDeadBlock(DeadBB);
       continue;
     }
@@ -662,7 +675,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
     // simplification required looking through PHI nodes, those are only
     // available after forming the full basic block. That may leave some here,
     // and we still want to prune the dead code as early as possible.
-    ConstantFoldTerminator(I);
+    ConstantFoldTerminator(&*I);
 
     BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
     if (!BI || BI->isConditional()) { ++I; continue; }
@@ -681,7 +694,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
     BI->eraseFromParent();
     
     // Make all PHI nodes that referred to Dest now refer to I as their source.
-    Dest->replaceAllUsesWith(I);
+    Dest->replaceAllUsesWith(&*I);
 
     // Move all the instructions in the succ to the pred.
     I->getInstList().splice(I->end(), Dest->getInstList());
@@ -695,7 +708,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
   // Make a final pass over the basic blocks from the old function to gather
   // any return instructions which survived folding. We have to do this here
   // because we can iteratively remove and merge returns above.
-  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB]),
+  for (Function::iterator I = cast<BasicBlock>(VMap[StartingBB])->getIterator(),
                           E = NewFunc->end();
        I != E; ++I)
     if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator()))
@@ -717,7 +730,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
                                      const char *NameSuffix, 
                                      ClonedCodeInfo *CodeInfo,
                                      Instruction *TheCall) {
-  CloneAndPruneIntoFromInst(NewFunc, OldFunc, OldFunc->front().begin(), VMap,
+  CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
                             ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
                             nullptr);
 }
@@ -780,9 +793,10 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
   }
 
   // Move them physically from the end of the block list.
-  F->getBasicBlockList().splice(Before, F->getBasicBlockList(), NewPH);
-  F->getBasicBlockList().splice(Before, F->getBasicBlockList(),
-                                NewLoop->getHeader(), F->end());
+  F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+                                NewPH);
+  F->getBasicBlockList().splice(Before->getIterator(), F->getBasicBlockList(),
+                                NewLoop->getHeader()->getIterator(), F->end());
 
   return NewLoop;
 }
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 61f1811e7b4a..ab083353ece6 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -20,21 +20,28 @@
 #include "llvm-c/Core.h"
 using namespace llvm;
 
-/// CloneModule - Return an exact copy of the specified module.  This is not as
-/// easy as it might seem because we have to worry about making copies of global
-/// variables and functions, and making their (initializers and references,
-/// respectively) refer to the right globals.
+/// This is not as easy as it might seem because we have to worry about making
+/// copies of global variables and functions, and making their (initializers and
+/// references, respectively) refer to the right globals.
 ///
-Module *llvm::CloneModule(const Module *M) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M) {
   // Create the value map that maps things from the old module over to the new
   // module.
   ValueToValueMapTy VMap;
   return CloneModule(M, VMap);
 }
 
-Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+std::unique_ptr<Module> llvm::CloneModule(const Module *M,
+                                          ValueToValueMapTy &VMap) {
+  return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });
+}
+
+std::unique_ptr<Module> llvm::CloneModule(
+    const Module *M, ValueToValueMapTy &VMap,
+    std::function<bool(const GlobalValue *)> ShouldCloneDefinition) {
   // First off, we need to create the new module.
-  Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+  std::unique_ptr<Module> New =
+      llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
@@ -52,26 +59,48 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
                                             (GlobalVariable*) nullptr,
                                             I->getThreadLocalMode(),
                                             I->getType()->getAddressSpace());
-    GV->copyAttributesFrom(I);
-    VMap[I] = GV;
+    GV->copyAttributesFrom(&*I);
+    VMap[&*I] = GV;
   }
 
   // Loop over the functions in the module, making external functions as before
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
     Function *NF =
-      Function::Create(cast<FunctionType>(I->getType()->getElementType()),
-                       I->getLinkage(), I->getName(), New);
-    NF->copyAttributesFrom(I);
-    VMap[I] = NF;
+        Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+                         I->getLinkage(), I->getName(), New.get());
+    NF->copyAttributesFrom(&*I);
+    VMap[&*I] = NF;
   }
 
   // Loop over the aliases in the module
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I) {
-    auto *PTy = cast<PointerType>(I->getType());
-    auto *GA = GlobalAlias::create(PTy, I->getLinkage(), I->getName(), New);
-    GA->copyAttributesFrom(I);
-    VMap[I] = GA;
+    if (!ShouldCloneDefinition(&*I)) {
+      // An alias cannot act as an external reference, so we need to create
+      // either a function or a global variable depending on the value type.
+      // FIXME: Once pointee types are gone we can probably pick one or the
+      // other.
+      GlobalValue *GV;
+      if (I->getValueType()->isFunctionTy())
+        GV = Function::Create(cast<FunctionType>(I->getValueType()),
+                              GlobalValue::ExternalLinkage, I->getName(),
+                              New.get());
+      else
+        GV = new GlobalVariable(
+            *New, I->getValueType(), false, GlobalValue::ExternalLinkage,
+            (Constant *)nullptr, I->getName(), (GlobalVariable *)nullptr,
+            I->getThreadLocalMode(), I->getType()->getAddressSpace());
+      VMap[&*I] = GV;
+      // We do not copy attributes (mainly because copying between different
+      // kinds of globals is forbidden), but this is generally not required for
+      // correctness.
+      continue;
+    }
+    auto *GA = GlobalAlias::create(I->getValueType(),
+                                   I->getType()->getPointerAddressSpace(),
+                                   I->getLinkage(), I->getName(), New.get());
+    GA->copyAttributesFrom(&*I);
+    VMap[&*I] = GA;
   }
   
   // Now that all of the things that global variable initializer can refer to
@@ -80,7 +109,12 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   //
   for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
        I != E; ++I) {
-    GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+    GlobalVariable *GV = cast<GlobalVariable>(VMap[&*I]);
+    if (!ShouldCloneDefinition(&*I)) {
+      // Skip after setting the correct linkage for an external reference.
+      GV->setLinkage(GlobalValue::ExternalLinkage);
+      continue;
+    }
     if (I->hasInitializer())
       GV->setInitializer(MapValue(I->getInitializer(), VMap));
   }
@@ -88,18 +122,22 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   // Similarly, copy over function bodies now...
   //
   for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
-    Function *F = cast<Function>(VMap[I]);
+    Function *F = cast<Function>(VMap[&*I]);
+    if (!ShouldCloneDefinition(&*I)) {
+      // Skip after setting the correct linkage for an external reference.
+      F->setLinkage(GlobalValue::ExternalLinkage);
+      continue;
+    }
     if (!I->isDeclaration()) {
       Function::arg_iterator DestI = F->arg_begin();
       for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
            ++J) {
         DestI->setName(J->getName());
-        VMap[J] = DestI++;
+        VMap[&*J] = &*DestI++;
       }
 
       SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
-      CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
-
+      CloneFunctionInto(F, &*I, VMap, /*ModuleLevelChanges=*/true, Returns);
     }
 
     if (I->hasPersonalityFn())
@@ -109,7 +147,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
   // And aliases
   for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
        I != E; ++I) {
-    GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+    // We already dealt with undefined aliases above.
+    if (!ShouldCloneDefinition(&*I))
+      continue;
+    GlobalAlias *GA = cast<GlobalAlias>(VMap[&*I]);
     if (const Constant *C = I->getAliasee())
       GA->setAliasee(MapValue(C, VMap));
   }
@@ -129,7 +170,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
 extern "C" {
 
 LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) {
-  return wrap(CloneModule(unwrap(M)));
+  return wrap(CloneModule(unwrap(M)).release());
 }
 
 }
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index ab89b41f6788..823696d88e65 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -51,7 +51,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
 /// \brief Test whether a block is valid for extraction.
 static bool isBlockValidForExtraction(const BasicBlock &BB) {
   // Landing pads must be in the function where they were inserted for cleanup.
-  if (BB.isLandingPad())
+  if (BB.isEHPad())
     return false;
 
   // Don't hoist code containing allocas, invokes, or vastarts.
@@ -175,7 +175,7 @@ void CodeExtractor::findInputsOutputs(ValueSet &Inputs,
 
       for (User *U : II->users())
         if (!definedInRegion(Blocks, U)) {
-          Outputs.insert(II);
+          Outputs.insert(&*II);
           break;
         }
     }
@@ -211,7 +211,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
   // containing PHI nodes merging values from outside of the region, and a
   // second that contains all of the code for the block and merges back any
   // incoming values from inside of the region.
-  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator();
   BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
                                               Header->getName()+".ce");
 
@@ -246,7 +246,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
       // Create a new PHI node in the new region, which has an incoming value
       // from OldPred of PN.
       PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion,
-                                       PN->getName()+".ce", NewBB->begin());
+                                       PN->getName() + ".ce", &NewBB->front());
       NewPN->addIncoming(PN, OldPred);
 
       // Loop over all of the incoming value in PN, moving them to NewPN if they
@@ -266,7 +266,8 @@ void CodeExtractor::splitReturnBlocks() {
   for (SetVector<BasicBlock *>::iterator I = Blocks.begin(), E = Blocks.end();
        I != E; ++I)
     if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
-      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      BasicBlock *New =
+          (*I)->splitBasicBlock(RI->getIterator(), (*I)->getName() + ".ret");
       if (DT) {
         // Old dominates New. New node dominates all other nodes dominated
         // by Old.
@@ -365,10 +366,10 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
       Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
       TerminatorInst *TI = newFunction->begin()->getTerminator();
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructTy, AI, Idx, "gep_" + inputs[i]->getName(), TI);
+          StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
       RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
     } else
-      RewriteVal = AI++;
+      RewriteVal = &*AI++;
 
     std::vector<User*> Users(inputs[i]->user_begin(), inputs[i]->user_end());
     for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
@@ -440,8 +441,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
       StructValues.push_back(*i);
     } else {
       AllocaInst *alloca =
-        new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
-                       codeReplacer->getParent()->begin()->begin());
+          new AllocaInst((*i)->getType(), nullptr, (*i)->getName() + ".loc",
+                         &codeReplacer->getParent()->front().front());
       ReloadOutputs.push_back(alloca);
       params.push_back(alloca);
     }
@@ -457,9 +458,8 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
     // Allocate a struct at the beginning of this function
     StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
-    Struct =
-      new AllocaInst(StructArgTy, nullptr, "structArg",
-                     codeReplacer->getParent()->begin()->begin());
+    Struct = new AllocaInst(StructArgTy, nullptr, "structArg",
+                            &codeReplacer->getParent()->front().front());
     params.push_back(Struct);
 
     for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
@@ -566,8 +566,12 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
             bool DominatesDef = true;
 
-            if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
-              DefBlock = Invoke->getNormalDest();
+            BasicBlock *NormalDest = nullptr;
+            if (auto *Invoke = dyn_cast<InvokeInst>(outputs[out]))
+              NormalDest = Invoke->getNormalDest();
+
+            if (NormalDest) {
+              DefBlock = NormalDest;
 
               // Make sure we are looking at the original successor block, not
               // at a newly inserted exit block, which won't be in the dominator
@@ -606,11 +610,11 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
                 Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
                                           FirstOut+out);
                 GetElementPtrInst *GEP = GetElementPtrInst::Create(
-                    StructArgTy, OAI, Idx, "gep_" + outputs[out]->getName(),
+                    StructArgTy, &*OAI, Idx, "gep_" + outputs[out]->getName(),
                     NTRet);
                 new StoreInst(outputs[out], GEP, NTRet);
               } else {
-                new StoreInst(outputs[out], OAI, NTRet);
+                new StoreInst(outputs[out], &*OAI, NTRet);
               }
             }
             // Advance output iterator even if we don't emit a store
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
index dc95089cd2ca..b56ff684e8a8 100644
--- a/lib/Transforms/Utils/CtorUtils.cpp
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -50,7 +50,7 @@ void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemove) {
   GlobalVariable *NGV =
       new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
                          CA, "", GCL->getThreadLocalMode());
-  GCL->getParent()->getGlobalList().insert(GCL, NGV);
+  GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
   NGV->takeName(GCL);
 
   // Nuke the old list, replacing any uses with the new one.
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index 003da58ee798..75a1dde57c4c 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -35,8 +35,8 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
                           I.getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = I.getParent()->getParent();
-    Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
-                          F->getEntryBlock().begin());
+    Slot = new AllocaInst(I.getType(), nullptr, I.getName() + ".reg2mem",
+                          &F->getEntryBlock().front());
   }
 
   // We cannot demote invoke instructions to the stack if their normal edge
@@ -89,16 +89,15 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
   // AFTER the terminator instruction.
   BasicBlock::iterator InsertPt;
   if (!isa<TerminatorInst>(I)) {
-    InsertPt = &I;
-    ++InsertPt;
-    for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+    InsertPt = ++I.getIterator();
+    for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
       /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
   } else {
     InvokeInst &II = cast<InvokeInst>(I);
     InsertPt = II.getNormalDest()->getFirstInsertionPt();
   }
 
-  new StoreInst(&I, Slot, InsertPt);
+  new StoreInst(&I, Slot, &*InsertPt);
   return Slot;
 }
 
@@ -118,8 +117,8 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
                           P->getName()+".reg2mem", AllocaPoint);
   } else {
     Function *F = P->getParent()->getParent();
-    Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
-                          F->getEntryBlock().begin());
+    Slot = new AllocaInst(P->getType(), nullptr, P->getName() + ".reg2mem",
+                          &F->getEntryBlock().front());
   }
 
   // Iterate over each operand inserting a store in each predecessor.
@@ -133,12 +132,12 @@ AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
   }
 
   // Insert a load in place of the PHI and replace all uses.
-  BasicBlock::iterator InsertPt = P;
+  BasicBlock::iterator InsertPt = P->getIterator();
 
-  for (; isa<PHINode>(InsertPt) || isa<LandingPadInst>(InsertPt); ++InsertPt)
+  for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
     /* empty */;   // Don't insert before PHI nodes or landingpad instrs.
 
-  Value *V = new LoadInst(Slot, P->getName()+".reload", InsertPt);
+  Value *V = new LoadInst(Slot, P->getName() + ".reload", &*InsertPt);
   P->replaceAllUsesWith(V);
 
   // Delete PHI.
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 4eb3e3dd17d2..492ae9f69a65 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -28,12 +28,11 @@ class FlattenCFGOpt {
   AliasAnalysis *AA;
   /// \brief Use parallel-and or parallel-or to generate conditions for
   /// conditional branches.
-  bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
-                            Pass *P = nullptr);
+  bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder);
   /// \brief If \param BB is the merge block of an if-region, attempt to merge
   /// the if-region with an adjacent if-region upstream if two if-regions
   /// contain identical instructions.
-  bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
+  bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder);
   /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
   /// are from two if-regions whose entry blocks are \p Head1 and \p
   /// Head2.  \returns true if \p Block1 and \p Block2 contain identical
@@ -122,8 +121,7 @@ public:
 ///  its predecessor.  In Case 2, \param BB (BB3) only has conditional branches
 ///  as its predecessors.
 ///
-bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
-                                         Pass *P) {
+bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {
   PHINode *PHI = dyn_cast<PHINode>(BB->begin());
   if (PHI)
     return false; // For simplicity, avoid cases containing PHI nodes.
@@ -177,8 +175,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
 
       // Instructions in the internal condition blocks should be safe
       // to hoist up.
-      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI; BI != BE;) {
-        Instruction *CI = BI++;
+      for (BasicBlock::iterator BI = Pred->begin(), BE = PBI->getIterator();
+           BI != BE;) {
+        Instruction *CI = &*BI++;
         if (isa<PHINode>(CI) || !isSafeToSpeculativelyExecute(CI))
           return false;
       }
@@ -315,7 +314,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
                                          BasicBlock *Block1,
                                          BasicBlock *Block2) {
   TerminatorInst *PTI2 = Head2->getTerminator();
-  Instruction *PBI2 = Head2->begin();
+  Instruction *PBI2 = &Head2->front();
 
   bool eq1 = (Block1 == Head1);
   bool eq2 = (Block2 == Head2);
@@ -327,9 +326,9 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
   // Check whether instructions in Block1 and Block2 are identical
   // and do not alias with instructions in Head2.
   BasicBlock::iterator iter1 = Block1->begin();
-  BasicBlock::iterator end1 = Block1->getTerminator();
+  BasicBlock::iterator end1 = Block1->getTerminator()->getIterator();
   BasicBlock::iterator iter2 = Block2->begin();
-  BasicBlock::iterator end2 = Block2->getTerminator();
+  BasicBlock::iterator end2 = Block2->getTerminator()->getIterator();
 
   while (1) {
     if (iter1 == end1) {
@@ -338,7 +337,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
       break;
     }
 
-    if (!iter1->isIdenticalTo(iter2))
+    if (!iter1->isIdenticalTo(&*iter2))
       return false;
 
     // Illegal to remove instructions with side effects except
@@ -356,10 +355,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
       return false;
 
     if (iter1->mayWriteToMemory()) {
-      for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
+      for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
         if (BI->mayReadFromMemory() || BI->mayWriteToMemory()) {
           // Check alias with Head2.
-          if (!AA || AA->alias(iter1, BI))
+          if (!AA || AA->alias(&*iter1, &*BI))
             return false;
         }
       }
@@ -386,8 +385,7 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Head1, BasicBlock *Head2,
 /// if (a || b)
 ///   statement;
 ///
-bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
-                                  Pass *P) {
+bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
   BasicBlock *IfTrue2, *IfFalse2;
   Value *IfCond2 = GetIfCondition(BB, IfTrue2, IfFalse2);
   Instruction *CInst2 = dyn_cast_or_null<Instruction>(IfCond2);
@@ -413,7 +411,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
     return false;
 
   TerminatorInst *PTI2 = SecondEntryBlock->getTerminator();
-  Instruction *PBI2 = SecondEntryBlock->begin();
+  Instruction *PBI2 = &SecondEntryBlock->front();
 
   if (!CompareIfRegionBlock(FirstEntryBlock, SecondEntryBlock, IfTrue1,
                             IfTrue2))
@@ -425,8 +423,8 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder,
 
   // Check whether \param SecondEntryBlock has side-effect and is safe to
   // speculate.
-  for (BasicBlock::iterator BI = PBI2, BE = PTI2; BI != BE; ++BI) {
-    Instruction *CI = BI;
+  for (BasicBlock::iterator BI(PBI2), BE(PTI2); BI != BE; ++BI) {
+    Instruction *CI = &*BI;
     if (isa<PHINode>(CI) || CI->mayHaveSideEffects() ||
         !isSafeToSpeculativelyExecute(CI))
       return false;
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index 44b7d25d519a..3893a752503b 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -49,6 +49,10 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
 
 static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
                              SmallPtrSetImpl<const PHINode *> &PhiUsers) {
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    if (GV->isExternallyInitialized())
+      GS.StoredType = GlobalStatus::StoredOnce;
+
   for (const Use &U : V->uses()) {
     const User *UR = U.getUser();
     if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(UR)) {
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index d2d60d7cd9f6..14574119b9a8 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -13,14 +13,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Attributes.h"
@@ -41,6 +42,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CommandLine.h"
 #include <algorithm>
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -54,17 +56,17 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
   cl::desc("Convert align attributes to assumptions during inlining."));
 
 bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI,
-                          bool InsertLifetime) {
-  return InlineFunction(CallSite(CI), IFI, InsertLifetime);
+                          AAResults *CalleeAAR, bool InsertLifetime) {
+  return InlineFunction(CallSite(CI), IFI, CalleeAAR, InsertLifetime);
 }
 bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
-                          bool InsertLifetime) {
-  return InlineFunction(CallSite(II), IFI, InsertLifetime);
+                          AAResults *CalleeAAR, bool InsertLifetime) {
+  return InlineFunction(CallSite(II), IFI, CalleeAAR, InsertLifetime);
 }
 
 namespace {
-  /// A class for recording information about inlining through an invoke.
-  class InvokeInliningInfo {
+  /// A class for recording information about inlining a landing pad.
+  class LandingPadInliningInfo {
     BasicBlock *OuterResumeDest; ///< Destination of the invoke's unwind.
     BasicBlock *InnerResumeDest; ///< Destination for the callee's resume.
     LandingPadInst *CallerLPad;  ///< LandingPadInst associated with the invoke.
@@ -72,7 +74,7 @@ namespace {
     SmallVector<Value*, 8> UnwindDestPHIValues;
 
   public:
-    InvokeInliningInfo(InvokeInst *II)
+    LandingPadInliningInfo(InvokeInst *II)
       : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
         CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
       // If there are PHI nodes in the unwind destination block, we need to keep
@@ -121,14 +123,14 @@ namespace {
       }
     }
   };
-}
+} // anonymous namespace
 
 /// Get or create a target for the branch from ResumeInsts.
-BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
+BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {
   if (InnerResumeDest) return InnerResumeDest;
 
   // Split the landing pad.
-  BasicBlock::iterator SplitPoint = CallerLPad; ++SplitPoint;
+  BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();
   InnerResumeDest =
     OuterResumeDest->splitBasicBlock(SplitPoint,
                                      OuterResumeDest->getName() + ".body");
@@ -137,7 +139,7 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
   const unsigned PHICapacity = 2;
 
   // Create corresponding new PHIs for all the PHIs in the outer landing pad.
-  BasicBlock::iterator InsertPoint = InnerResumeDest->begin();
+  Instruction *InsertPoint = &InnerResumeDest->front();
   BasicBlock::iterator I = OuterResumeDest->begin();
   for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {
     PHINode *OuterPHI = cast<PHINode>(I);
@@ -162,8 +164,8 @@ BasicBlock *InvokeInliningInfo::getInnerResumeDest() {
 /// When the landing pad block has only one predecessor, this is a simple
 /// branch. When there is more than one predecessor, we need to split the
 /// landing pad block after the landingpad instruction and jump to there.
-void InvokeInliningInfo::forwardResume(ResumeInst *RI,
-                               SmallPtrSetImpl<LandingPadInst*> &InlinedLPads) {
+void LandingPadInliningInfo::forwardResume(
+    ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {
   BasicBlock *Dest = getInnerResumeDest();
   BasicBlock *Src = RI->getParent();
 
@@ -182,33 +184,39 @@ void InvokeInliningInfo::forwardResume(ResumeInst *RI,
 /// This function analyze BB to see if there are any calls, and if so,
 /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
 /// nodes in that block with the values specified in InvokeDestPHIValues.
-static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
-                                                   InvokeInliningInfo &Invoke) {
+static BasicBlock *
+HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) {
   for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
-    Instruction *I = BBI++;
+    Instruction *I = &*BBI++;
 
     // We only need to check for function calls: inlined invoke
     // instructions require no special handling.
     CallInst *CI = dyn_cast<CallInst>(I);
 
-    // If this call cannot unwind, don't convert it to an invoke.
-    // Inline asm calls cannot throw.
     if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue()))
       continue;
 
     // Convert this function call into an invoke instruction.  First, split the
     // basic block.
-    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+    BasicBlock *Split =
+        BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
 
     // Delete the unconditional branch inserted by splitBasicBlock
     BB->getInstList().pop_back();
 
     // Create the new invoke instruction.
-    ImmutableCallSite CS(CI);
-    SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
-    InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split,
-                                        Invoke.getOuterResumeDest(),
-                                        InvokeArgs, CI->getName(), BB);
+    SmallVector<Value*, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+    SmallVector<OperandBundleDef, 1> OpBundles;
+
+    CI->getOperandBundlesAsDefs(OpBundles);
+
+    // Note: we're round tripping operand bundles through memory here, and that
+    // can potentially be avoided with a cleverer API design that we do not have
+    // as of this time.
+
+    InvokeInst *II =
+        InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
+                           OpBundles, CI->getName(), BB);
     II->setDebugLoc(CI->getDebugLoc());
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
@@ -219,12 +227,9 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
 
     // Delete the original call
     Split->getInstList().pop_front();
-
-    // Update any PHI nodes in the exceptional block to indicate that there is
-    // now a new entry in them.
-    Invoke.addIncomingPHIValuesFor(BB);
-    return;
+    return BB;
   }
+  return nullptr;
 }
 
 /// If we inlined an invoke site, we need to convert calls
@@ -233,8 +238,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
 /// II is the invoke instruction being inlined.  FirstNewBlock is the first
 /// block of the inlined code (the last block is the end of the function),
 /// and InlineCodeInfo is information about the code that got inlined.
-static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
-                                ClonedCodeInfo &InlinedCodeInfo) {
+static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+                                    ClonedCodeInfo &InlinedCodeInfo) {
   BasicBlock *InvokeDest = II->getUnwindDest();
 
   Function *Caller = FirstNewBlock->getParent();
@@ -242,11 +247,12 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
   // The inlined code is currently at the end of the function, scan from the
   // start of the inlined code to its end, checking for stuff we need to
   // rewrite.
-  InvokeInliningInfo Invoke(II);
+  LandingPadInliningInfo Invoke(II);
 
   // Get all of the inlined landing pad instructions.
   SmallPtrSet<LandingPadInst*, 16> InlinedLPads;
-  for (Function::iterator I = FirstNewBlock, E = Caller->end(); I != E; ++I)
+  for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();
+       I != E; ++I)
     if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))
       InlinedLPads.insert(II->getLandingPadInst());
 
@@ -262,9 +268,14 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
       InlinedLPad->setCleanup(true);
   }
 
-  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+  for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+       BB != E; ++BB) {
     if (InlinedCodeInfo.ContainsCalls)
-      HandleCallsInBlockInlinedThroughInvoke(BB, Invoke);
+      if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(
+              &*BB, Invoke.getOuterResumeDest()))
+        // Update any PHI nodes in the exceptional block to indicate that there
+        // is now a new entry in them.
+        Invoke.addIncomingPHIValuesFor(NewBB);
 
     // Forward any resumes that are remaining here.
     if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))
@@ -278,6 +289,99 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
   InvokeDest->removePredecessor(II->getParent());
 }
 
+/// If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes.
+///
+/// II is the invoke instruction being inlined.  FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
+                               ClonedCodeInfo &InlinedCodeInfo) {
+  BasicBlock *UnwindDest = II->getUnwindDest();
+  Function *Caller = FirstNewBlock->getParent();
+
+  assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");
+
+  // If there are PHI nodes in the unwind destination block, we need to keep
+  // track of which values came into them from the invoke before removing the
+  // edge from this block.
+  SmallVector<Value *, 8> UnwindDestPHIValues;
+  llvm::BasicBlock *InvokeBB = II->getParent();
+  for (Instruction &I : *UnwindDest) {
+    // Save the value to use for this edge.
+    PHINode *PHI = dyn_cast<PHINode>(&I);
+    if (!PHI)
+      break;
+    UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));
+  }
+
+  // Add incoming-PHI values to the unwind destination block for the given basic
+  // block, using the values for the original invoke's source block.
+  auto UpdatePHINodes = [&](BasicBlock *Src) {
+    BasicBlock::iterator I = UnwindDest->begin();
+    for (Value *V : UnwindDestPHIValues) {
+      PHINode *PHI = cast<PHINode>(I);
+      PHI->addIncoming(V, Src);
+      ++I;
+    }
+  };
+
+  // This connects all the instructions which 'unwind to caller' to the invoke
+  // destination.
+  for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();
+       BB != E; ++BB) {
+    if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+      if (CRI->unwindsToCaller()) {
+        CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI);
+        CRI->eraseFromParent();
+        UpdatePHINodes(&*BB);
+      }
+    }
+
+    Instruction *I = BB->getFirstNonPHI();
+    if (!I->isEHPad())
+      continue;
+
+    Instruction *Replacement = nullptr;
+    if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+      if (CatchSwitch->unwindsToCaller()) {
+        auto *NewCatchSwitch = CatchSwitchInst::Create(
+            CatchSwitch->getParentPad(), UnwindDest,
+            CatchSwitch->getNumHandlers(), CatchSwitch->getName(),
+            CatchSwitch);
+        for (BasicBlock *PadBB : CatchSwitch->handlers())
+          NewCatchSwitch->addHandler(PadBB);
+        Replacement = NewCatchSwitch;
+      }
+    } else if (!isa<FuncletPadInst>(I)) {
+      llvm_unreachable("unexpected EHPad!");
+    }
+
+    if (Replacement) {
+      Replacement->takeName(I);
+      I->replaceAllUsesWith(Replacement);
+      I->eraseFromParent();
+      UpdatePHINodes(&*BB);
+    }
+  }
+
+  if (InlinedCodeInfo.ContainsCalls)
+    for (Function::iterator BB = FirstNewBlock->getIterator(),
+                            E = Caller->end();
+         BB != E; ++BB)
+      if (BasicBlock *NewBB =
+              HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest))
+        // Update any PHI nodes in the exceptional block to indicate that there
+        // is now a new entry in them.
+        UpdatePHINodes(NewBB);
+
+  // Now that everything is happy, we have one final detail.  The PHI nodes in
+  // the exception destination block still have entries due to the original
+  // invoke instruction. Eliminate these entries (which might even delete the
+  // PHI node) now.
+  UnwindDest->removePredecessor(InvokeBB);
+}
+
 /// When inlining a function that contains noalias scope metadata,
 /// this metadata needs to be cloned so that the inlined blocks
 /// have different "unqiue scopes" at every call site. Were this not done, then
@@ -395,17 +499,16 @@ static void CloneAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap) {
 /// parameters with noalias metadata specifying the new scope, and tag all
 /// non-derived loads, stores and memory intrinsics with the new alias scopes.
 static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
-                                  const DataLayout &DL, AliasAnalysis *AA) {
+                                  const DataLayout &DL, AAResults *CalleeAAR) {
   if (!EnableNoAliasConversion)
     return;
 
   const Function *CalledFunc = CS.getCalledFunction();
   SmallVector<const Argument *, 4> NoAliasArgs;
 
-  for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
-       E = CalledFunc->arg_end(); I != E; ++I) {
-    if (I->hasNoAliasAttr() && !I->hasNUses(0))
-      NoAliasArgs.push_back(I);
+  for (const Argument &I : CalledFunc->args()) {
+    if (I.hasNoAliasAttr() && !I.hasNUses(0))
+      NoAliasArgs.push_back(&I);
   }
 
   if (NoAliasArgs.empty())
@@ -480,10 +583,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
           continue;
 
         IsFuncCall = true;
-        if (AA) {
-          AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(ICS);
-          if (MRB == AliasAnalysis::OnlyAccessesArgumentPointees ||
-              MRB == AliasAnalysis::OnlyReadsArgumentPointees)
+        if (CalleeAAR) {
+          FunctionModRefBehavior MRB = CalleeAAR->getModRefBehavior(ICS);
+          if (MRB == FMRB_OnlyAccessesArgumentPointees ||
+              MRB == FMRB_OnlyReadsArgumentPointees)
             IsArgMemOnlyCall = true;
         }
 
@@ -518,7 +621,7 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
       for (unsigned i = 0, ie = PtrArgs.size(); i != ie; ++i) {
         SmallVector<Value *, 4> Objects;
         GetUnderlyingObjects(const_cast<Value*>(PtrArgs[i]),
-                             Objects, DL, /* MaxLookup = */ 0);
+                             Objects, DL, /* LI = */ nullptr);
 
         for (Value *O : Objects)
           ObjSet.insert(O);
@@ -646,7 +749,7 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
       // caller, then don't bother inserting the assumption.
       Value *Arg = CS.getArgument(I->getArgNo());
       if (getKnownAlignment(Arg, DL, CS.getInstruction(),
-                            &IFI.ACT->getAssumptionCache(*CalledFunc),
+                            &IFI.ACT->getAssumptionCache(*CS.getCaller()),
                             &DT) >= Align)
         continue;
 
@@ -731,7 +834,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
                                     BasicBlock *InsertBlock,
                                     InlineFunctionInfo &IFI) {
   Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
-  IRBuilder<> Builder(InsertBlock->begin());
+  IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
 
   Value *Size = Builder.getInt64(M->getDataLayout().getTypeStoreSize(AggTy));
 
@@ -851,9 +954,8 @@ updateInlinedAtInfo(DebugLoc DL, DILocation *InlinedAtNode, LLVMContext &Ctx,
   // Starting from the top, rebuild the nodes to point to the new inlined-at
   // location (then rebuilding the rest of the chain behind it) and update the
   // map of already-constructed inlined-at nodes.
-  for (auto I = InlinedAtLocations.rbegin(), E = InlinedAtLocations.rend();
-       I != E; ++I) {
-    const DILocation *MD = *I;
+  for (const DILocation *MD : make_range(InlinedAtLocations.rbegin(),
+                                         InlinedAtLocations.rend())) {
     Last = IANodes[MD] = DILocation::getDistinct(
         Ctx, MD->getLine(), MD->getColumn(), MD->getScope(), Last);
   }
@@ -917,7 +1019,7 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
-                          bool InsertLifetime) {
+                          AAResults *CalleeAAR, bool InsertLifetime) {
   Instruction *TheCall = CS.getInstruction();
   assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
          "Instruction not in function!");
@@ -930,6 +1032,22 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       CalledFunc->isDeclaration() || // call, or call to a vararg function!
       CalledFunc->getFunctionType()->isVarArg()) return false;
 
+  // The inliner does not know how to inline through calls with operand bundles
+  // in general ...
+  if (CS.hasOperandBundles()) {
+    for (int i = 0, e = CS.getNumOperandBundles(); i != e; ++i) {
+      uint32_t Tag = CS.getOperandBundleAt(i).getTagID();
+      // ... but it knows how to inline through "deopt" operand bundles ...
+      if (Tag == LLVMContext::OB_deopt)
+        continue;
+      // ... and "funclet" operand bundles.
+      if (Tag == LLVMContext::OB_funclet)
+        continue;
+
+      return false;
+    }
+  }
+
   // If the call to the callee cannot throw, set the 'nounwind' flag on any
   // calls that we inline.
   bool MarkNoUnwind = CS.doesNotThrow();
@@ -950,13 +1068,17 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
   // Get the personality function from the callee if it contains a landing pad.
   Constant *CalledPersonality =
-      CalledFunc->hasPersonalityFn() ? CalledFunc->getPersonalityFn() : nullptr;
+      CalledFunc->hasPersonalityFn()
+          ? CalledFunc->getPersonalityFn()->stripPointerCasts()
+          : nullptr;
 
   // Find the personality function used by the landing pads of the caller. If it
   // exists, then check to see that it matches the personality function used in
   // the callee.
   Constant *CallerPersonality =
-      Caller->hasPersonalityFn() ? Caller->getPersonalityFn() : nullptr;
+      Caller->hasPersonalityFn()
+          ? Caller->getPersonalityFn()->stripPointerCasts()
+          : nullptr;
   if (CalledPersonality) {
     if (!CallerPersonality)
       Caller->setPersonalityFn(CalledPersonality);
@@ -968,9 +1090,46 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       return false;
   }
 
+  // We need to figure out which funclet the callsite was in so that we may
+  // properly nest the callee.
+  Instruction *CallSiteEHPad = nullptr;
+  if (CallerPersonality) {
+    EHPersonality Personality = classifyEHPersonality(CallerPersonality);
+    if (isFuncletEHPersonality(Personality)) {
+      Optional<OperandBundleUse> ParentFunclet =
+          CS.getOperandBundle(LLVMContext::OB_funclet);
+      if (ParentFunclet)
+        CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());
+
+      // OK, the inlining site is legal.  What about the target function?
+
+      if (CallSiteEHPad) {
+        if (Personality == EHPersonality::MSVC_CXX) {
+          // The MSVC personality cannot tolerate catches getting inlined into
+          // cleanup funclets.
+          if (isa<CleanupPadInst>(CallSiteEHPad)) {
+            // Ok, the call site is within a cleanuppad.  Let's check the callee
+            // for catchpads.
+            for (const BasicBlock &CalledBB : *CalledFunc) {
+              if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))
+                return false;
+            }
+          }
+        } else if (isAsynchronousEHPersonality(Personality)) {
+          // SEH is even less tolerant, there may not be any sort of exceptional
+          // funclet in the callee.
+          for (const BasicBlock &CalledBB : *CalledFunc) {
+            if (CalledBB.isEHPad())
+              return false;
+          }
+        }
+      }
+    }
+  }
+
   // Get an iterator to the last basic block in the function, which will have
   // the new function inlined after it.
-  Function::iterator LastBlock = &Caller->back();
+  Function::iterator LastBlock = --Caller->end();
 
   // Make sure to capture all of the return instructions from the cloned
   // function.
@@ -1007,7 +1166,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
           ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
       }
 
-      VMap[I] = ActualArg;
+      VMap[&*I] = ActualArg;
     }
 
     // Add alignment assumptions if necessary. We do this before the inlined
@@ -1029,7 +1188,61 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Inject byval arguments initialization.
     for (std::pair<Value*, Value*> &Init : ByValInit)
       HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
-                              FirstNewBlock, IFI);
+                              &*FirstNewBlock, IFI);
+
+    Optional<OperandBundleUse> ParentDeopt =
+        CS.getOperandBundle(LLVMContext::OB_deopt);
+    if (ParentDeopt) {
+      SmallVector<OperandBundleDef, 2> OpDefs;
+
+      for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {
+        Instruction *I = dyn_cast_or_null<Instruction>(VH);
+        if (!I) continue;  // instruction was DCE'd or RAUW'ed to undef
+
+        OpDefs.clear();
+
+        CallSite ICS(I);
+        OpDefs.reserve(ICS.getNumOperandBundles());
+
+        for (unsigned i = 0, e = ICS.getNumOperandBundles(); i < e; ++i) {
+          auto ChildOB = ICS.getOperandBundleAt(i);
+          if (ChildOB.getTagID() != LLVMContext::OB_deopt) {
+            // If the inlined call has other operand bundles, let them be
+            OpDefs.emplace_back(ChildOB);
+            continue;
+          }
+
+          // It may be useful to separate this logic (of handling operand
+          // bundles) out to a separate "policy" component if this gets crowded.
+          // Prepend the parent's deoptimization continuation to the newly
+          // inlined call's deoptimization continuation.
+          std::vector<Value *> MergedDeoptArgs;
+          MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +
+                                  ChildOB.Inputs.size());
+
+          MergedDeoptArgs.insert(MergedDeoptArgs.end(),
+                                 ParentDeopt->Inputs.begin(),
+                                 ParentDeopt->Inputs.end());
+          MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(),
+                                 ChildOB.Inputs.end());
+
+          OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));
+        }
+
+        Instruction *NewI = nullptr;
+        if (isa<CallInst>(I))
+          NewI = CallInst::Create(cast<CallInst>(I), OpDefs, I);
+        else
+          NewI = InvokeInst::Create(cast<InvokeInst>(I), OpDefs, I);
+
+        // Note: the RAUW does the appropriate fixup in VMap, so we need to do
+        // this even if the call returns void.
+        I->replaceAllUsesWith(NewI);
+
+        VH = nullptr;
+        I->eraseFromParent();
+      }
+    }
 
     // Update the callgraph if requested.
     if (IFI.CG)
@@ -1042,7 +1255,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     CloneAliasScopeMetadata(CS, VMap);
 
     // Add noalias metadata if necessary.
-    AddAliasScopeMetadata(CS, VMap, DL, IFI.AA);
+    AddAliasScopeMetadata(CS, VMap, DL, CalleeAAR);
 
     // FIXME: We could register any cloned assumptions instead of clearing the
     // whole function's cache.
@@ -1085,9 +1298,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       // Transfer all of the allocas over in a block.  Using splice means
       // that the instructions aren't removed from the symbol table, then
       // reinserted.
-      Caller->getEntryBlock().getInstList().splice(InsertPoint,
-                                                   FirstNewBlock->getInstList(),
-                                                   AI, I);
+      Caller->getEntryBlock().getInstList().splice(
+          InsertPoint, FirstNewBlock->getInstList(), AI->getIterator(), I);
     }
     // Move any dbg.declares describing the allocas into the entry basic block.
     DIBuilder DIB(*Caller->getParent());
@@ -1137,7 +1349,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // Leave lifetime markers for the static alloca's, scoping them to the
   // function we just inlined.
   if (InsertLifetime && !IFI.StaticAllocas.empty()) {
-    IRBuilder<> builder(FirstNewBlock->begin());
+    IRBuilder<> builder(&FirstNewBlock->front());
     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
       AllocaInst *AI = IFI.StaticAllocas[ai];
 
@@ -1189,7 +1401,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
 
     // Insert the llvm.stacksave.
-    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+    CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())
                              .CreateCall(StackSave, {}, "savedstack");
 
     // Insert a call to llvm.stackrestore before any return instructions in the
@@ -1203,10 +1415,74 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     }
   }
 
+  // Update the lexical scopes of the new funclets and callsites.
+  // Anything that had 'none' as its parent is now nested inside the callsite's
+  // EHPad.
+
+  if (CallSiteEHPad) {
+    for (Function::iterator BB = FirstNewBlock->getIterator(),
+                            E = Caller->end();
+         BB != E; ++BB) {
+      // Add bundle operands to any top-level call sites.
+      SmallVector<OperandBundleDef, 1> OpBundles;
+      for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) {
+        Instruction *I = &*BBI++;
+        CallSite CS(I);
+        if (!CS)
+          continue;
+
+        // Skip call sites which are nounwind intrinsics.
+        auto *CalledFn =
+            dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+        if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow())
+          continue;
+
+        // Skip call sites which already have a "funclet" bundle.
+        if (CS.getOperandBundle(LLVMContext::OB_funclet))
+          continue;
+
+        CS.getOperandBundlesAsDefs(OpBundles);
+        OpBundles.emplace_back("funclet", CallSiteEHPad);
+
+        Instruction *NewInst;
+        if (CS.isCall())
+          NewInst = CallInst::Create(cast<CallInst>(I), OpBundles, I);
+        else
+          NewInst = InvokeInst::Create(cast<InvokeInst>(I), OpBundles, I);
+        NewInst->setDebugLoc(I->getDebugLoc());
+        NewInst->takeName(I);
+        I->replaceAllUsesWith(NewInst);
+        I->eraseFromParent();
+
+        OpBundles.clear();
+      }
+
+      Instruction *I = BB->getFirstNonPHI();
+      if (!I->isEHPad())
+        continue;
+
+      if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {
+        if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))
+          CatchSwitch->setParentPad(CallSiteEHPad);
+      } else {
+        auto *FPI = cast<FuncletPadInst>(I);
+        if (isa<ConstantTokenNone>(FPI->getParentPad()))
+          FPI->setParentPad(CallSiteEHPad);
+      }
+    }
+  }
+
   // If we are inlining for an invoke instruction, we must make sure to rewrite
   // any call instructions into invoke instructions.
-  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
-    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+  if (auto *II = dyn_cast<InvokeInst>(TheCall)) {
+    BasicBlock *UnwindDest = II->getUnwindDest();
+    Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();
+    if (isa<LandingPadInst>(FirstNonPHI)) {
+      HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+    } else {
+      HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);
+    }
+  }
 
   // Handle any inlined musttail call sites.  In order for a new call site to be
   // musttail, the source of the clone and the inlined call site must have been
@@ -1250,7 +1526,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // the calling basic block.
   if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
     // Move all of the instructions right before the call.
-    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+    OrigBB->getInstList().splice(TheCall->getIterator(),
+                                 FirstNewBlock->getInstList(),
                                  FirstNewBlock->begin(), FirstNewBlock->end());
     // Remove the cloned basic block.
     Caller->getBasicBlockList().pop_back();
@@ -1297,15 +1574,16 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Split the basic block.  This guarantees that no PHI nodes will have to be
     // updated due to new incoming edges, and make the invoke case more
     // symmetric to the call case.
-    AfterCallBB = OrigBB->splitBasicBlock(CreatedBranchToNormalDest,
-                                          CalledFunc->getName()+".exit");
+    AfterCallBB =
+        OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),
+                                CalledFunc->getName() + ".exit");
 
   } else {  // It's a call
     // If this is a call instruction, we need to split the basic block that
     // the call lives in.
     //
-    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
-                                          CalledFunc->getName()+".exit");
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall->getIterator(),
+                                          CalledFunc->getName() + ".exit");
   }
 
   // Change the branch that used to go to AfterCallBB to branch to the first
@@ -1314,14 +1592,14 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   TerminatorInst *Br = OrigBB->getTerminator();
   assert(Br && Br->getOpcode() == Instruction::Br &&
          "splitBasicBlock broken!");
-  Br->setOperand(0, FirstNewBlock);
-
+  Br->setOperand(0, &*FirstNewBlock);
 
   // Now that the function is correct, make it a little bit nicer.  In
   // particular, move the basic blocks inserted from the end of the function
   // into the space made by splitting the source basic block.
-  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
-                                     FirstNewBlock, Caller->end());
+  Caller->getBasicBlockList().splice(AfterCallBB->getIterator(),
+                                     Caller->getBasicBlockList(), FirstNewBlock,
+                                     Caller->end());
 
   // Handle all of the return instructions that we just cloned in, and eliminate
   // any users of the original call/invoke instruction.
@@ -1333,7 +1611,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // possible incoming values.
     if (!TheCall->use_empty()) {
       PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(),
-                            AfterCallBB->begin());
+                            &AfterCallBB->front());
       // Anything that used the result of the function call should now use the
       // PHI node as their operand.
       TheCall->replaceAllUsesWith(PHI);
@@ -1350,7 +1628,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
       }
     }
 
-
     // Add a branch to the merge points and remove return instructions.
     DebugLoc Loc;
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
@@ -1413,7 +1690,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // Splice the code entry block into calling block, right before the
   // unconditional branch.
   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
-  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+  OrigBB->getInstList().splice(Br->getIterator(), CalleeEntry->getInstList());
 
   // Remove the unconditional branch.
   OrigBB->getInstList().erase(Br);
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index 30edf3b7aae4..5687afa61e2a 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -380,14 +380,10 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
 
   IRBuilder<> Builder(Rem);
 
-  Type *RemTy = Rem->getType();
-  if (RemTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
-
-  unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
-
-  if (RemTyBitWidth != 32 && RemTyBitWidth != 64)
-    llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+  assert(!Rem->getType()->isVectorTy() && "Div over vectors not supported");
+  assert((Rem->getType()->getIntegerBitWidth() == 32 ||
+          Rem->getType()->getIntegerBitWidth() == 64) &&
+         "Div of bitwidth other than 32 or 64 not supported");
 
   // First prepare the sign if it's a signed remainder
   if (Rem->getOpcode() == Instruction::SRem) {
@@ -401,7 +397,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {
     // If we didn't actually generate an urem instruction, we're done
     // This happens for example if the input were constant. In this case the
     // Builder insertion point was unchanged
-    if (Rem == Builder.GetInsertPoint())
+    if (Rem == Builder.GetInsertPoint().getNodePtrUnchecked())
       return true;
 
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -440,14 +436,10 @@ bool llvm::expandDivision(BinaryOperator *Div) {
 
   IRBuilder<> Builder(Div);
 
-  Type *DivTy = Div->getType();
-  if (DivTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
-
-  unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
-
-  if (DivTyBitWidth != 32 && DivTyBitWidth != 64)
-    llvm_unreachable("Div of bitwidth other than 32 or 64 not supported");
+  assert(!Div->getType()->isVectorTy() && "Div over vectors not supported");
+  assert((Div->getType()->getIntegerBitWidth() == 32 ||
+          Div->getType()->getIntegerBitWidth() == 64) &&
+         "Div of bitwidth other than 32 or 64 not supported");
 
   // First prepare the sign if it's a signed division
   if (Div->getOpcode() == Instruction::SDiv) {
@@ -461,7 +453,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {
     // If we didn't actually generate an udiv instruction, we're done
     // This happens for example if the input were constant. In this case the
     // Builder insertion point was unchanged
-    if (Div == Builder.GetInsertPoint())
+    if (Div == Builder.GetInsertPoint().getNodePtrUnchecked())
       return true;
 
     BinaryOperator *BO = dyn_cast<BinaryOperator>(Builder.GetInsertPoint());
@@ -492,15 +484,14 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {
           "Trying to expand remainder from a non-remainder function");
 
   Type *RemTy = Rem->getType();
-  if (RemTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
+  assert(!RemTy->isVectorTy() && "Div over vectors not supported");
 
   unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
 
-  if (RemTyBitWidth > 32) 
-    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+  assert(RemTyBitWidth <= 32 &&
+         "Div of bitwidth greater than 32 not supported");
 
-  if (RemTyBitWidth == 32) 
+  if (RemTyBitWidth == 32)
     return expandRemainder(Rem);
 
   // If bitwidth smaller than 32 extend inputs, extend output and proceed
@@ -542,15 +533,13 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {
           "Trying to expand remainder from a non-remainder function");
 
   Type *RemTy = Rem->getType();
-  if (RemTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
+  assert(!RemTy->isVectorTy() && "Div over vectors not supported");
 
   unsigned RemTyBitWidth = RemTy->getIntegerBitWidth();
 
-  if (RemTyBitWidth > 64) 
-    llvm_unreachable("Div of bitwidth greater than 64 not supported");
+  assert(RemTyBitWidth <= 64 && "Div of bitwidth greater than 64 not supported");
 
-  if (RemTyBitWidth == 64) 
+  if (RemTyBitWidth == 64)
     return expandRemainder(Rem);
 
   // If bitwidth smaller than 64 extend inputs, extend output and proceed
@@ -593,13 +582,11 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {
           "Trying to expand division from a non-division function");
 
   Type *DivTy = Div->getType();
-  if (DivTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
+  assert(!DivTy->isVectorTy() && "Div over vectors not supported");
 
   unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
 
-  if (DivTyBitWidth > 32)
-    llvm_unreachable("Div of bitwidth greater than 32 not supported");
+  assert(DivTyBitWidth <= 32 && "Div of bitwidth greater than 32 not supported");
 
   if (DivTyBitWidth == 32)
     return expandDivision(Div);
@@ -643,13 +630,12 @@ bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {
           "Trying to expand division from a non-division function");
 
   Type *DivTy = Div->getType();
-  if (DivTy->isVectorTy())
-    llvm_unreachable("Div over vectors not supported");
+  assert(!DivTy->isVectorTy() && "Div over vectors not supported");
 
   unsigned DivTyBitWidth = DivTy->getIntegerBitWidth();
 
-  if (DivTyBitWidth > 64)
-    llvm_unreachable("Div of bitwidth greater than 64 not supported");
+  assert(DivTyBitWidth <= 64 &&
+         "Div of bitwidth greater than 64 not supported");
 
   if (DivTyBitWidth == 64)
     return expandDivision(Div);
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 9d40b6989d6e..b4b2e148dfbb 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -31,8 +31,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -64,6 +66,13 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
                                PredIteratorCache &PredCache, LoopInfo *LI) {
   SmallVector<Use *, 16> UsesToRewrite;
 
+  // Tokens cannot be used in PHI nodes, so we skip over them.
+  // We can run into tokens which are live out of a loop with catchswitch
+  // instructions in Windows EH if the catchswitch has one catchpad which
+  // is inside the loop and another which is not.
+  if (Inst.getType()->isTokenTy())
+    return false;
+
   BasicBlock *InstBB = Inst.getParent();
 
   for (Use &U : Inst.uses()) {
@@ -84,9 +93,8 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
 
   // Invoke instructions are special in that their result value is not available
   // along their unwind edge. The code below tests to see whether DomBB
-  // dominates
-  // the value, so adjust DomBB to the normal destination block, which is
-  // effectively where the value is first usable.
+  // dominates the value, so adjust DomBB to the normal destination block,
+  // which is effectively where the value is first usable.
   BasicBlock *DomBB = Inst.getParent();
   if (InvokeInst *Inv = dyn_cast<InvokeInst>(&Inst))
     DomBB = Inv->getNormalDest();
@@ -101,10 +109,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
 
   // Insert the LCSSA phi's into all of the exit blocks dominated by the
   // value, and add them to the Phi's map.
-  for (SmallVectorImpl<BasicBlock *>::const_iterator BBI = ExitBlocks.begin(),
-                                                     BBE = ExitBlocks.end();
-       BBI != BBE; ++BBI) {
-    BasicBlock *ExitBB = *BBI;
+  for (BasicBlock *ExitBB : ExitBlocks) {
     if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
       continue;
 
@@ -113,7 +118,7 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
       continue;
 
     PHINode *PN = PHINode::Create(Inst.getType(), PredCache.size(ExitBB),
-                                  Inst.getName() + ".lcssa", ExitBB->begin());
+                                  Inst.getName() + ".lcssa", &ExitBB->front());
 
     // Add inputs from inside the loop for this PHI.
     for (BasicBlock *Pred : PredCache.get(ExitBB)) {
@@ -148,26 +153,26 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
 
   // Rewrite all uses outside the loop in terms of the new PHIs we just
   // inserted.
-  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+  for (Use *UseToRewrite : UsesToRewrite) {
     // If this use is in an exit block, rewrite to use the newly inserted PHI.
     // This is required for correctness because SSAUpdate doesn't handle uses in
     // the same block.  It assumes the PHI we inserted is at the end of the
     // block.
-    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    Instruction *User = cast<Instruction>(UseToRewrite->getUser());
     BasicBlock *UserBB = User->getParent();
     if (PHINode *PN = dyn_cast<PHINode>(User))
-      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+      UserBB = PN->getIncomingBlock(*UseToRewrite);
 
     if (isa<PHINode>(UserBB->begin()) && isExitBlock(UserBB, ExitBlocks)) {
       // Tell the VHs that the uses changed. This updates SCEV's caches.
-      if (UsesToRewrite[i]->get()->hasValueHandle())
-        ValueHandleBase::ValueIsRAUWd(*UsesToRewrite[i], UserBB->begin());
-      UsesToRewrite[i]->set(UserBB->begin());
+      if (UseToRewrite->get()->hasValueHandle())
+        ValueHandleBase::ValueIsRAUWd(*UseToRewrite, &UserBB->front());
+      UseToRewrite->set(&UserBB->front());
       continue;
     }
 
     // Otherwise, do full PHI insertion.
-    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+    SSAUpdate.RewriteUse(*UseToRewrite);
   }
 
   // Post process PHI instructions that were inserted into another disjoint loop
@@ -190,10 +195,9 @@ static bool processInstruction(Loop &L, Instruction &Inst, DominatorTree &DT,
   }
 
   // Remove PHI nodes that did not have any uses rewritten.
-  for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) {
-    if (AddedPHIs[i]->use_empty())
-      AddedPHIs[i]->eraseFromParent();
-  }
+  for (PHINode *PN : AddedPHIs)
+    if (PN->use_empty())
+      PN->eraseFromParent();
 
   return true;
 }
@@ -205,8 +209,8 @@ blockDominatesAnExit(BasicBlock *BB,
                      DominatorTree &DT,
                      const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
   DomTreeNode *DomNode = DT.getNode(BB);
-  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
-    if (DT.dominates(DomNode, DT.getNode(ExitBlocks[i])))
+  for (BasicBlock *ExitBB : ExitBlocks)
+    if (DT.dominates(DomNode, DT.getNode(ExitBB)))
       return true;
 
   return false;
@@ -227,25 +231,22 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
 
   // Look at all the instructions in the loop, checking to see if they have uses
   // outside the loop.  If so, rewrite those uses.
-  for (Loop::block_iterator BBI = L.block_begin(), BBE = L.block_end();
-       BBI != BBE; ++BBI) {
-    BasicBlock *BB = *BBI;
-
+  for (BasicBlock *BB : L.blocks()) {
     // For large loops, avoid use-scanning by using dominance information:  In
     // particular, if a block does not dominate any of the loop exits, then none
     // of the values defined in the block could be used outside the loop.
     if (!blockDominatesAnExit(BB, DT, ExitBlocks))
       continue;
 
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    for (Instruction &I : *BB) {
       // Reject two common cases fast: instructions with no uses (like stores)
       // and instructions with one use that is in the same block as this.
-      if (I->use_empty() ||
-          (I->hasOneUse() && I->user_back()->getParent() == BB &&
-           !isa<PHINode>(I->user_back())))
+      if (I.use_empty() ||
+          (I.hasOneUse() && I.user_back()->getParent() == BB &&
+           !isa<PHINode>(I.user_back())))
         continue;
 
-      Changed |= processInstruction(L, *I, DT, ExitBlocks, PredCache, LI);
+      Changed |= processInstruction(L, I, DT, ExitBlocks, PredCache, LI);
     }
   }
 
@@ -266,8 +267,8 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
   bool Changed = false;
 
   // Recurse depth-first through inner loops.
-  for (Loop::iterator I = L.begin(), E = L.end(); I != E; ++I)
-    Changed |= formLCSSARecursively(**I, DT, LI, SE);
+  for (Loop *SubLoop : L.getSubLoops())
+    Changed |= formLCSSARecursively(*SubLoop, DT, LI, SE);
 
   Changed |= formLCSSA(L, DT, LI, SE);
   return Changed;
@@ -296,8 +297,10 @@ struct LCSSA : public FunctionPass {
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addPreservedID(LoopSimplifyID);
-    AU.addPreserved<AliasAnalysis>();
-    AU.addPreserved<ScalarEvolution>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<SCEVAAWrapperPass>();
   }
 };
 }
@@ -306,6 +309,8 @@ char LCSSA::ID = 0;
 INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
 
 Pass *llvm::createLCSSAPass() { return new LCSSA(); }
@@ -317,7 +322,8 @@ bool LCSSA::runOnFunction(Function &F) {
   bool Changed = false;
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = getAnalysisIfAvailable<ScalarEvolution>();
+  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+  SE = SEWP ? &SEWP->getSE() : nullptr;
 
   // Simplify each loop nest in the function.
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt
index 6b2d405b1f28..ece0ad4dbf44 100644
--- a/lib/Transforms/Utils/LLVMBuild.txt
+++ b/lib/Transforms/Utils/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = TransformUtils
 parent = Transforms
-required_libraries = Analysis Core IPA Support
+required_libraries = Analysis Core Support
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index ba8af47b54e1..e75163f323df 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -17,10 +17,11 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LibCallSemantics.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
@@ -188,9 +189,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
       BasicBlock *BB = SI->getParent();
 
       // Remove entries from PHI nodes which we no longer branch to...
-      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+      for (BasicBlock *Succ : SI->successors()) {
         // Found case matching a constant operand?
-        BasicBlock *Succ = SI->getSuccessor(i);
         if (Succ == TheOnlyDest)
           TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
         else
@@ -230,6 +230,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
                                             SIDef->getValue().getZExtValue()));
       }
 
+      // Update make.implicit metadata to the newly-created conditional branch.
+      MDNode *MakeImplicitMD = SI->getMetadata(LLVMContext::MD_make_implicit);
+      if (MakeImplicitMD)
+        NewBr->setMetadata(LLVMContext::MD_make_implicit, MakeImplicitMD);
+
       // Delete the old switch.
       SI->eraseFromParent();
       return true;
@@ -283,8 +288,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
                                       const TargetLibraryInfo *TLI) {
   if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
 
-  // We don't want the landingpad instruction removed by anything this general.
-  if (isa<LandingPadInst>(I))
+  // We don't want the landingpad-like instructions removed by anything this
+  // general.
+  if (I->isEHPad())
     return false;
 
   // We don't want debug info removed by anything this general, unless
@@ -414,6 +420,49 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
   return false;
 }
 
+static bool
+simplifyAndDCEInstruction(Instruction *I,
+                          SmallSetVector<Instruction *, 16> &WorkList,
+                          const DataLayout &DL,
+                          const TargetLibraryInfo *TLI) {
+  if (isInstructionTriviallyDead(I, TLI)) {
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      Value *OpV = I->getOperand(i);
+      I->setOperand(i, nullptr);
+
+      if (!OpV->use_empty() || I == OpV)
+        continue;
+
+      // If the operand is an instruction that became dead as we nulled out the
+      // operand, and if it is 'trivially' dead, delete it in a future loop
+      // iteration.
+      if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+        if (isInstructionTriviallyDead(OpI, TLI))
+          WorkList.insert(OpI);
+    }
+
+    I->eraseFromParent();
+
+    return true;
+  }
+
+  if (Value *SimpleV = SimplifyInstruction(I, DL)) {
+    // Add the users to the worklist. CAREFUL: an instruction can use itself,
+    // in the case of a phi node.
+    for (User *U : I->users())
+      if (U != I)
+        WorkList.insert(cast<Instruction>(U));
+
+    // Replace the instruction with its simplified value.
+    I->replaceAllUsesWith(SimpleV);
+    I->eraseFromParent();
+    return true;
+  }
+  return false;
+}
+
 /// SimplifyInstructionsInBlock - Scan the specified basic block and try to
 /// simplify any instructions in it and recursively delete dead instructions.
 ///
@@ -422,30 +471,34 @@ bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN,
 bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,
                                        const TargetLibraryInfo *TLI) {
   bool MadeChange = false;
+  const DataLayout &DL = BB->getModule()->getDataLayout();
 
 #ifndef NDEBUG
   // In debug builds, ensure that the terminator of the block is never replaced
   // or deleted by these simplifications. The idea of simplification is that it
   // cannot introduce new instructions, and there is no way to replace the
   // terminator of a block without introducing a new instruction.
-  AssertingVH<Instruction> TerminatorVH(--BB->end());
+  AssertingVH<Instruction> TerminatorVH(&BB->back());
 #endif
 
-  for (BasicBlock::iterator BI = BB->begin(), E = --BB->end(); BI != E; ) {
+  SmallSetVector<Instruction *, 16> WorkList;
+  // Iterate over the original function, only adding insts to the worklist
+  // if they actually need to be revisited. This avoids having to pre-init
+  // the worklist with the entire function's worth of instructions.
+  for (BasicBlock::iterator BI = BB->begin(), E = std::prev(BB->end()); BI != E;) {
     assert(!BI->isTerminator());
-    Instruction *Inst = BI++;
+    Instruction *I = &*BI;
+    ++BI;
 
-    WeakVH BIHandle(BI);
-    if (recursivelySimplifyInstruction(Inst, TLI)) {
-      MadeChange = true;
-      if (BIHandle != BI)
-        BI = BB->begin();
-      continue;
-    }
+    // We're visiting this instruction now, so make sure it's not in the
+    // worklist from an earlier visit.
+    if (!WorkList.count(I))
+      MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
+  }
 
-    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
-    if (BIHandle != BI)
-      BI = BB->begin();
+  while (!WorkList.empty()) {
+    Instruction *I = WorkList.pop_back_val();
+    MadeChange |= simplifyAndDCEInstruction(I, WorkList, DL, TLI);
   }
   return MadeChange;
 }
@@ -808,7 +861,8 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
 
     // Copy over any phi, debug or lifetime instruction.
     BB->getTerminator()->eraseFromParent();
-    Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+    Succ->getInstList().splice(Succ->getFirstNonPHI()->getIterator(),
+                               BB->getInstList());
   } else {
     while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
       // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
@@ -1017,8 +1071,13 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (LdStHasDebugValue(DIVar, LI))
     return true;
 
-  Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, DIVar, DIExpr,
-                                  DDI->getDebugLoc(), LI);
+  // We are now tracking the loaded value instead of the address. In the
+  // future if multi-location support is added to the IR, it might be
+  // preferable to keep tracking both the loaded value and the original
+  // address in case the alloca can not be elided.
+  Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
+      LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
+  DbgValue->insertAfter(LI);
   return true;
 }
 
@@ -1034,8 +1093,8 @@ bool llvm::LowerDbgDeclare(Function &F) {
   DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
   SmallVector<DbgDeclareInst *, 4> Dbgs;
   for (auto &FI : F)
-    for (BasicBlock::iterator BI : FI)
-      if (auto DDI = dyn_cast<DbgDeclareInst>(BI))
+    for (Instruction &BI : FI)
+      if (auto DDI = dyn_cast<DbgDeclareInst>(&BI))
         Dbgs.push_back(DDI);
 
   if (Dbgs.empty())
@@ -1060,9 +1119,13 @@ bool llvm::LowerDbgDeclare(Function &F) {
           // This is a call by-value or some other instruction that
           // takes a pointer to the variable. Insert a *value*
           // intrinsic that describes the alloca.
+          SmallVector<uint64_t, 1> NewDIExpr;
+          auto *DIExpr = DDI->getExpression();
+          NewDIExpr.push_back(dwarf::DW_OP_deref);
+          NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
           DIB.insertDbgValueIntrinsic(AI, 0, DDI->getVariable(),
-                                      DDI->getExpression(), DDI->getDebugLoc(),
-                                      CI);
+                                      DIB.createExpression(NewDIExpr),
+                                      DDI->getDebugLoc(), CI);
         }
       DDI->eraseFromParent();
     }
@@ -1082,9 +1145,10 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
   return nullptr;
 }
 
-bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
-                                      DIBuilder &Builder, bool Deref) {
-  DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
+bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
+                             Instruction *InsertBefore, DIBuilder &Builder,
+                             bool Deref, int Offset) {
+  DbgDeclareInst *DDI = FindAllocaDbgDeclare(Address);
   if (!DDI)
     return false;
   DebugLoc Loc = DDI->getDebugLoc();
@@ -1092,29 +1156,40 @@ bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
   auto *DIExpr = DDI->getExpression();
   assert(DIVar && "Missing variable");
 
-  if (Deref) {
+  if (Deref || Offset) {
     // Create a copy of the original DIDescriptor for user variable, prepending
     // "deref" operation to a list of address elements, as new llvm.dbg.declare
     // will take a value storing address of the memory for variable, not
     // alloca itself.
     SmallVector<uint64_t, 4> NewDIExpr;
-    NewDIExpr.push_back(dwarf::DW_OP_deref);
+    if (Deref)
+      NewDIExpr.push_back(dwarf::DW_OP_deref);
+    if (Offset > 0) {
+      NewDIExpr.push_back(dwarf::DW_OP_plus);
+      NewDIExpr.push_back(Offset);
+    } else if (Offset < 0) {
+      NewDIExpr.push_back(dwarf::DW_OP_minus);
+      NewDIExpr.push_back(-Offset);
+    }
     if (DIExpr)
       NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
     DIExpr = Builder.createExpression(NewDIExpr);
   }
 
-  // Insert llvm.dbg.declare in the same basic block as the original alloca,
-  // and remove old llvm.dbg.declare.
-  BasicBlock *BB = AI->getParent();
-  Builder.insertDeclare(NewAllocaAddress, DIVar, DIExpr, Loc, BB);
+  // Insert llvm.dbg.declare immediately after the original alloca, and remove
+  // old llvm.dbg.declare.
+  Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);
   DDI->eraseFromParent();
   return true;
 }
 
-/// changeToUnreachable - Insert an unreachable instruction before the specified
-/// instruction, making it and the rest of the code in the block dead.
-static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
+                                      DIBuilder &Builder, bool Deref, int Offset) {
+  return replaceDbgDeclare(AI, NewAllocaAddress, AI->getNextNode(), Builder,
+                           Deref, Offset);
+}
+
+void llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
@@ -1132,7 +1207,7 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
   new UnreachableInst(I->getContext(), I);
 
   // All instructions after this are dead.
-  BasicBlock::iterator BBI = I, BBE = BB->end();
+  BasicBlock::iterator BBI = I->getIterator(), BBE = BB->end();
   while (BBI != BBE) {
     if (!BBI->use_empty())
       BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
@@ -1142,8 +1217,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
 
 /// changeToCall - Convert the specified invoke into a normal call.
 static void changeToCall(InvokeInst *II) {
-  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
+  SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());
+  SmallVector<OperandBundleDef, 1> OpBundles;
+  II->getOperandBundlesAsDefs(OpBundles);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles,
+                                       "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
@@ -1162,7 +1240,7 @@ static bool markAliveBlocks(Function &F,
                             SmallPtrSetImpl<BasicBlock*> &Reachable) {
 
   SmallVector<BasicBlock*, 128> Worklist;
-  BasicBlock *BB = F.begin();
+  BasicBlock *BB = &F.front();
   Worklist.push_back(BB);
   Reachable.insert(BB);
   bool Changed = false;
@@ -1187,7 +1265,7 @@ static bool markAliveBlocks(Function &F,
 
           if (MakeUnreachable) {
             // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(BBI, false);
+            changeToUnreachable(&*BBI, false);
             Changed = true;
             break;
           }
@@ -1201,7 +1279,7 @@ static bool markAliveBlocks(Function &F,
           ++BBI;
           if (!isa<UnreachableInst>(BBI)) {
             // Don't insert a call to llvm.trap right before the unreachable.
-            changeToUnreachable(BBI, false);
+            changeToUnreachable(&*BBI, false);
             Changed = true;
           }
           break;
@@ -1253,6 +1331,40 @@ static bool markAliveBlocks(Function &F,
   return Changed;
 }
 
+void llvm::removeUnwindEdge(BasicBlock *BB) {
+  TerminatorInst *TI = BB->getTerminator();
+
+  if (auto *II = dyn_cast<InvokeInst>(TI)) {
+    changeToCall(II);
+    return;
+  }
+
+  TerminatorInst *NewTI;
+  BasicBlock *UnwindDest;
+
+  if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+    NewTI = CleanupReturnInst::Create(CRI->getCleanupPad(), nullptr, CRI);
+    UnwindDest = CRI->getUnwindDest();
+  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) {
+    auto *NewCatchSwitch = CatchSwitchInst::Create(
+        CatchSwitch->getParentPad(), nullptr, CatchSwitch->getNumHandlers(),
+        CatchSwitch->getName(), CatchSwitch);
+    for (BasicBlock *PadBB : CatchSwitch->handlers())
+      NewCatchSwitch->addHandler(PadBB);
+
+    NewTI = NewCatchSwitch;
+    UnwindDest = CatchSwitch->getUnwindDest();
+  } else {
+    llvm_unreachable("Could not find unwind successor");
+  }
+
+  NewTI->takeName(TI);
+  NewTI->setDebugLoc(TI->getDebugLoc());
+  UnwindDest->removePredecessor(BB);
+  TI->replaceAllUsesWith(NewTI);
+  TI->eraseFromParent();
+}
+
 /// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
 /// if they are in a dead cycle.  Return true if a change was made, false
 /// otherwise.
@@ -1270,17 +1382,18 @@ bool llvm::removeUnreachableBlocks(Function &F) {
   // Loop over all of the basic blocks that are not reachable, dropping all of
   // their internal references...
   for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
-    if (Reachable.count(BB))
+    if (Reachable.count(&*BB))
       continue;
 
-    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    for (succ_iterator SI = succ_begin(&*BB), SE = succ_end(&*BB); SI != SE;
+         ++SI)
       if (Reachable.count(*SI))
-        (*SI)->removePredecessor(BB);
+        (*SI)->removePredecessor(&*BB);
     BB->dropAllReferences();
   }
 
   for (Function::iterator I = ++F.begin(); I != F.end();)
-    if (!Reachable.count(I))
+    if (!Reachable.count(&*I))
       I = F.getBasicBlockList().erase(I);
     else
       ++I;
@@ -1288,9 +1401,10 @@ bool llvm::removeUnreachableBlocks(Function &F) {
   return true;
 }
 
-void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs) {
+void llvm::combineMetadata(Instruction *K, const Instruction *J,
+                           ArrayRef<unsigned> KnownIDs) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
-  K->dropUnknownMetadata(KnownIDs);
+  K->dropUnknownNonDebugMetadata(KnownIDs);
   K->getAllMetadataOtherThanDebugLoc(Metadata);
   for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
     unsigned Kind = Metadata[i].first;
@@ -1326,8 +1440,29 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsign
         // Only set the !nonnull if it is present in both instructions.
         K->setMetadata(Kind, JMD);
         break;
+      case LLVMContext::MD_invariant_group:
+        // Preserve !invariant.group in K.
+        break;
+      case LLVMContext::MD_align:
+        K->setMetadata(Kind, 
+          MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+        break;
+      case LLVMContext::MD_dereferenceable:
+      case LLVMContext::MD_dereferenceable_or_null:
+        K->setMetadata(Kind, 
+          MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+        break;
     }
   }
+  // Set !invariant.group from J if J has it. If both instructions have it
+  // then we will just pick it from J - even when they are different.
+  // Also make sure that K is load or store - f.e. combining bitcast with load
+  // could produce bitcast with invariant.group metadata, which is invalid.
+  // FIXME: we should try to preserve both invariant.group md if they are
+  // different, but right now instruction can only have one invariant.group.
+  if (auto *JMD = J->getMetadata(LLVMContext::MD_invariant_group))
+    if (isa<LoadInst>(K) || isa<StoreInst>(K))
+      K->setMetadata(LLVMContext::MD_invariant_group, JMD);
 }
 
 unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
@@ -1349,3 +1484,40 @@ unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
   }
   return Count;
 }
+
+unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
+                                        DominatorTree &DT,
+                                        const BasicBlock *BB) {
+  assert(From->getType() == To->getType());
+
+  unsigned Count = 0;
+  for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
+       UI != UE;) {
+    Use &U = *UI++;
+    auto *I = cast<Instruction>(U.getUser());
+    if (DT.dominates(BB, I->getParent())) {
+      U.set(To);
+      DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as "
+                   << *To << " in " << *U << "\n");
+      ++Count;
+    }
+  }
+  return Count;
+}
+
+bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
+  if (isa<IntrinsicInst>(CS.getInstruction()))
+    // Most LLVM intrinsics are things which can never take a safepoint.
+    // As a result, we don't need to have the stack parsable at the
+    // callsite.  This is a highly useful optimization since intrinsic
+    // calls are fairly prevalent, particularly in debug builds.
+    return true;
+
+  // Check if the function is specifically marked as a gc leaf function.
+  //
+  // TODO: we should be checking the attributes on the call site as well.
+  if (const Function *F = CS.getCalledFunction())
+    return F->hasFnAttribute("gc-leaf-function");
+
+  return false;
+}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 5c98043e4632..1fa469595d16 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -44,11 +44,14 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -78,7 +81,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
                                      SmallVectorImpl<BasicBlock *> &SplitPreds,
                                      Loop *L) {
   // Check to see if NewBB is already well placed.
-  Function::iterator BBI = NewBB; --BBI;
+  Function::iterator BBI = --NewBB->getIterator();
   for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
     if (&*BBI == SplitPreds[i])
       return;
@@ -92,9 +95,8 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
   // block that neighbors a BB actually in the loop.
   BasicBlock *FoundBB = nullptr;
   for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
-    Function::iterator BBI = SplitPreds[i];
-    if (++BBI != NewBB->getParent()->end() &&
-        L->contains(BBI)) {
+    Function::iterator BBI = SplitPreds[i]->getIterator();
+    if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
       FoundBB = SplitPreds[i];
       break;
     }
@@ -112,17 +114,10 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
 /// preheader, this method is called to insert one.  This method has two phases:
 /// preheader insertion and analysis updating.
 ///
-BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+                                         LoopInfo *LI, bool PreserveLCSSA) {
   BasicBlock *Header = L->getHeader();
 
-  // Get analyses that we try to update.
-  auto *AA = PP->getAnalysisIfAvailable<AliasAnalysis>();
-  auto *DTWP = PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  auto *LIWP = PP->getAnalysisIfAvailable<LoopInfoWrapperPass>();
-  auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
-  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
   // Compute the set of predecessors of the loop that are not in the loop.
   SmallVector<BasicBlock*, 8> OutsideBlocks;
   for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
@@ -141,8 +136,10 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
 
   // Split out the loop pre-header.
   BasicBlock *PreheaderBB;
-  PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader",
-                                       AA, DT, LI, PreserveLCSSA);
+  PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+                                       LI, PreserveLCSSA);
+  if (!PreheaderBB)
+    return nullptr;
 
   DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
                << PreheaderBB->getName() << "\n");
@@ -159,8 +156,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
 /// This method is used to split exit blocks that have predecessors outside of
 /// the loop.
 static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
-                                        AliasAnalysis *AA, DominatorTree *DT,
-                                        LoopInfo *LI, Pass *PP) {
+                                        DominatorTree *DT, LoopInfo *LI,
+                                        bool PreserveLCSSA) {
   SmallVector<BasicBlock*, 8> LoopBlocks;
   for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
     BasicBlock *P = *I;
@@ -175,10 +172,10 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit,
   assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
   BasicBlock *NewExitBB = nullptr;
 
-  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
-  NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", AA, DT,
-                                     LI, PreserveLCSSA);
+  NewExitBB = SplitBlockPredecessors(Exit, LoopBlocks, ".loopexit", DT, LI,
+                                     PreserveLCSSA);
+  if (!NewExitBB)
+    return nullptr;
 
   DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
                << NewExitBB->getName() << "\n");
@@ -206,8 +203,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
 
 /// \brief The first part of loop-nestification is to find a PHI node that tells
 /// us how to partition the loops.
-static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
-                                        DominatorTree *DT,
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
                                         AssumptionCache *AC) {
   const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
@@ -216,7 +212,6 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
     if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
       // This is a degenerate PHI already, don't modify it!
       PN->replaceAllUsesWith(V);
-      if (AA) AA->deleteValue(PN);
       PN->eraseFromParent();
       continue;
     }
@@ -251,18 +246,18 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
 /// created.
 ///
 static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
-                                AliasAnalysis *AA, DominatorTree *DT,
-                                LoopInfo *LI, ScalarEvolution *SE, Pass *PP,
+                                DominatorTree *DT, LoopInfo *LI,
+                                ScalarEvolution *SE, bool PreserveLCSSA,
                                 AssumptionCache *AC) {
   // Don't try to separate loops without a preheader.
   if (!Preheader)
     return nullptr;
 
   // The header is not a landing pad; preheader insertion should ensure this.
-  assert(!L->getHeader()->isLandingPad() &&
-         "Can't insert backedge to landing pad");
+  BasicBlock *Header = L->getHeader();
+  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
 
-  PHINode *PN = findPHIToPartitionLoops(L, AA, DT, AC);
+  PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
   if (!PN) return nullptr;  // No known way to partition.
 
   // Pull out all predecessors that have varying values in the loop.  This
@@ -286,11 +281,8 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
   if (SE)
     SE->forgetLoop(L);
 
-  bool PreserveLCSSA = PP->mustPreserveAnalysisID(LCSSAID);
-
-  BasicBlock *Header = L->getHeader();
   BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
-                                             AA, DT, LI, PreserveLCSSA);
+                                             DT, LI, PreserveLCSSA);
 
   // Make sure that NewBB is put someplace intelligent, which doesn't mess up
   // code layout too horribly.
@@ -357,7 +349,6 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
 /// and have that block branch to the loop header.  This ensures that loops
 /// have exactly one backedge.
 static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
-                                             AliasAnalysis *AA,
                                              DominatorTree *DT, LoopInfo *LI) {
   assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
 
@@ -369,8 +360,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
   if (!Preheader)
     return nullptr;
 
-  // The header is not a landing pad; preheader insertion should ensure this.
-  assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
+  // The header is not an EH pad; preheader insertion should ensure this.
+  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
 
   // Figure out which basic blocks contain back-edges to the loop header.
   std::vector<BasicBlock*> BackedgeBlocks;
@@ -394,7 +385,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
                << BEBlock->getName() << "\n");
 
   // Move the new backedge block to right after the last backedge block.
-  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+  Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
   F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
 
   // Now that the block has been inserted into the function, create PHI nodes in
@@ -443,7 +434,6 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
     // eliminate the PHI Node.
     if (HasUniqueIncomingValue) {
       NewPN->replaceAllUsesWith(UniqueValue);
-      if (AA) AA->deleteValue(NewPN);
       BEBlock->getInstList().erase(NewPN);
     }
   }
@@ -470,15 +460,10 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
 }
 
 /// \brief Simplify one loop and queue further loops for simplification.
-///
-/// FIXME: Currently this accepts both lots of analyses that it uses and a raw
-/// Pass pointer. The Pass pointer is used by numerous utilities to update
-/// specific analyses. Rather than a pass it would be much cleaner and more
-/// explicit if they accepted the analysis directly and then updated it.
 static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
-                            AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI,
-                            ScalarEvolution *SE, Pass *PP,
-                            AssumptionCache *AC) {
+                            DominatorTree *DT, LoopInfo *LI,
+                            ScalarEvolution *SE, AssumptionCache *AC,
+                            bool PreserveLCSSA) {
   bool Changed = false;
 ReprocessLoop:
 
@@ -544,7 +529,7 @@ ReprocessLoop:
   // Does the loop already have a preheader?  If so, don't insert one.
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
-    Preheader = InsertPreheaderForLoop(L, PP);
+    Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA);
     if (Preheader) {
       ++NumInserted;
       Changed = true;
@@ -568,7 +553,7 @@ ReprocessLoop:
       // Must be exactly this loop: no subloops, parent loops, or non-loop preds
       // allowed.
       if (!L->contains(*PI)) {
-        if (rewriteLoopExitBlock(L, ExitBlock, AA, DT, LI, PP)) {
+        if (rewriteLoopExitBlock(L, ExitBlock, DT, LI, PreserveLCSSA)) {
           ++NumInserted;
           Changed = true;
         }
@@ -585,7 +570,7 @@ ReprocessLoop:
     // common backedge instead.
     if (L->getNumBackEdges() < 8) {
       if (Loop *OuterL =
-              separateNestedLoop(L, Preheader, AA, DT, LI, SE, PP, AC)) {
+              separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) {
         ++NumNested;
         // Enqueue the outer loop as it should be processed next in our
         // depth-first nest walk.
@@ -602,7 +587,7 @@ ReprocessLoop:
     // If we either couldn't, or didn't want to, identify nesting of the loops,
     // insert a new block that all backedges target, then make it jump to the
     // loop header.
-    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, AA, DT, LI);
+    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI);
     if (LoopLatch) {
       ++NumInserted;
       Changed = true;
@@ -618,7 +603,6 @@ ReprocessLoop:
   for (BasicBlock::iterator I = L->getHeader()->begin();
        (PN = dyn_cast<PHINode>(I++)); )
     if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
-      if (AA) AA->deleteValue(PN);
       if (SE) SE->forgetValue(PN);
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
@@ -654,7 +638,7 @@ ReprocessLoop:
       bool AllInvariant = true;
       bool AnyInvariant = false;
       for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
-        Instruction *Inst = I++;
+        Instruction *Inst = &*I++;
         // Skip debug info intrinsics.
         if (isa<DbgInfoIntrinsic>(Inst))
           continue;
@@ -716,9 +700,9 @@ ReprocessLoop:
   return Changed;
 }
 
-bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
-                        AliasAnalysis *AA, ScalarEvolution *SE,
-                        AssumptionCache *AC) {
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+                        ScalarEvolution *SE, AssumptionCache *AC,
+                        bool PreserveLCSSA) {
   bool Changed = false;
 
   // Worklist maintains our depth-first queue of loops in this nest to process.
@@ -734,8 +718,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP,
   }
 
   while (!Worklist.empty())
-    Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI,
-                               SE, PP, AC);
+    Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+                               AC, PreserveLCSSA);
 
   return Changed;
 }
@@ -747,9 +731,6 @@ namespace {
       initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
     }
 
-    // AA - If we have an alias analysis object to update, this is it, otherwise
-    // this is null.
-    AliasAnalysis *AA;
     DominatorTree *DT;
     LoopInfo *LI;
     ScalarEvolution *SE;
@@ -767,8 +748,11 @@ namespace {
       AU.addRequired<LoopInfoWrapperPass>();
       AU.addPreserved<LoopInfoWrapperPass>();
 
-      AU.addPreserved<AliasAnalysis>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<BasicAAWrapperPass>();
+      AU.addPreserved<AAResultsWrapperPass>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
+      AU.addPreserved<SCEVAAWrapperPass>();
       AU.addPreserved<DependenceAnalysis>();
       AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
     }
@@ -784,6 +768,9 @@ INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
                 "Canonicalize natural loops", false, false)
 
@@ -796,15 +783,16 @@ Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
 ///
 bool LoopSimplify::runOnFunction(Function &F) {
   bool Changed = false;
-  AA = getAnalysisIfAvailable<AliasAnalysis>();
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = getAnalysisIfAvailable<ScalarEvolution>();
+  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+  SE = SEWP ? &SEWP->getSE() : nullptr;
   AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
   // Simplify each loop nest in the function.
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
-    Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, AC);
+    Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA);
 
   return Changed;
 }
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 1dbce4746835..2499b88741fe 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -73,7 +73,7 @@ static inline void RemapInstruction(Instruction *I,
 /// of loops that have already been forgotten to prevent redundant, expensive
 /// calls to ScalarEvolution::forgetLoop.  Returns the new combined block.
 static BasicBlock *
-FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
+FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, ScalarEvolution *SE,
                          SmallPtrSetImpl<Loop *> &ForgottenLoops) {
   // Merge basic blocks into their predecessor if there is only one distinct
   // pred, and if there is only one distinct successor of the predecessor, and
@@ -109,12 +109,10 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
   // Erase basic block from the function...
 
   // ScalarEvolution holds references to loop exit blocks.
-  if (LPM) {
-    if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>()) {
-      if (Loop *L = LI->getLoopFor(BB)) {
-        if (ForgottenLoops.insert(L).second)
-          SE->forgetLoop(L);
-      }
+  if (SE) {
+    if (Loop *L = LI->getLoopFor(BB)) {
+      if (ForgottenLoops.insert(L).second)
+        SE->forgetLoop(L);
     }
   }
   LI->removeBlock(BB);
@@ -155,15 +153,13 @@ FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI, LPPassManager *LPM,
 ///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
-/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
-/// removed from the LoopPassManager as well. LPM can also be NULL.
-///
-/// This utility preserves LoopInfo. If DominatorTree or ScalarEvolution are
-/// available from the Pass it must also preserve those analyses.
+/// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
+/// DominatorTree if they are non-null.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
-                      unsigned TripMultiple, LoopInfo *LI, Pass *PP,
-                      LPPassManager *LPM, AssumptionCache *AC) {
+                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
+                      DominatorTree *DT, AssumptionCache *AC,
+                      bool PreserveLCSSA) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -220,6 +216,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
   // Are we eliminating the loop control altogether?
   bool CompletelyUnroll = Count == TripCount;
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  Loop *ParentL = L->getParentLoop();
+  bool AllExitsAreInsideParentLoop = !ParentL ||
+      std::all_of(ExitBlocks.begin(), ExitBlocks.end(),
+                  [&](BasicBlock *BB) { return ParentL->contains(BB); });
 
   // We assume a run-time trip count if the compiler cannot
   // figure out the loop trip count and the unroll-runtime
@@ -227,13 +229,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
   if (RuntimeTripCount &&
-      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, LPM))
+      !UnrollRuntimeLoopProlog(L, Count, AllowExpensiveTripCount, LI, SE, DT,
+                               PreserveLCSSA))
     return false;
 
   // Notify ScalarEvolution that the loop will be substantially changed,
   // if not outright eliminated.
-  ScalarEvolution *SE =
-      PP ? PP->getAnalysisIfAvailable<ScalarEvolution>() : nullptr;
   if (SE)
     SE->forgetLoop(L);
 
@@ -392,7 +393,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     for (unsigned i = 0; i < NewBlocks.size(); ++i)
       for (BasicBlock::iterator I = NewBlocks[i]->begin(),
            E = NewBlocks[i]->end(); I != E; ++I)
-        ::RemapInstruction(I, LastValueMap);
+        ::RemapInstruction(&*I, LastValueMap);
   }
 
   // Loop over the PHI nodes in the original block, setting incoming values.
@@ -432,8 +433,9 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
 
     // For a complete unroll, make the last iteration end with a branch
     // to the exit block.
-    if (CompletelyUnroll && j == 0) {
-      Dest = LoopExit;
+    if (CompletelyUnroll) {
+      if (j == 0)
+        Dest = LoopExit;
       NeedConditional = false;
     }
 
@@ -473,7 +475,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
     BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
     if (Term->isUnconditional()) {
       BasicBlock *Dest = Term->getSuccessor(0);
-      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, LPM,
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI, SE,
                                                       ForgottenLoops))
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
     }
@@ -483,29 +485,24 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   // whole function's cache.
   AC->clear();
 
-  DominatorTree *DT = nullptr;
-  if (PP) {
-    // FIXME: Reconstruct dom info, because it is not preserved properly.
-    // Incrementally updating domtree after loop unrolling would be easy.
-    if (DominatorTreeWrapperPass *DTWP =
-            PP->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) {
-      DT = &DTWP->getDomTree();
-      DT->recalculate(*L->getHeader()->getParent());
-    }
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  // Incrementally updating domtree after loop unrolling would be easy.
+  if (DT)
+    DT->recalculate(*L->getHeader()->getParent());
 
-    // Simplify any new induction variables in the partially unrolled loop.
-    if (SE && !CompletelyUnroll) {
-      SmallVector<WeakVH, 16> DeadInsts;
-      simplifyLoopIVs(L, SE, LPM, DeadInsts);
+  // Simplify any new induction variables in the partially unrolled loop.
+  if (SE && !CompletelyUnroll) {
+    SmallVector<WeakVH, 16> DeadInsts;
+    simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
 
-      // Aggressively clean up dead instructions that simplifyLoopIVs already
-      // identified. Any remaining should be cleaned up below.
-      while (!DeadInsts.empty())
-        if (Instruction *Inst =
-            dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
-          RecursivelyDeleteTriviallyDeadInstructions(Inst);
-    }
+    // Aggressively clean up dead instructions that simplifyLoopIVs already
+    // identified. Any remaining should be cleaned up below.
+    while (!DeadInsts.empty())
+      if (Instruction *Inst =
+              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+        RecursivelyDeleteTriviallyDeadInstructions(Inst);
   }
+
   // At this point, the code is well formed.  We now do a quick sweep over the
   // inserted code, doing constant propagation and dead code elimination as we
   // go.
@@ -514,7 +511,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
        BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
-      Instruction *Inst = I++;
+      Instruction *Inst = &*I++;
 
       if (isInstructionTriviallyDead(Inst))
         (*BB)->getInstList().erase(Inst);
@@ -529,29 +526,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
   ++NumUnrolled;
 
   Loop *OuterL = L->getParentLoop();
-  // Remove the loop from the LoopPassManager if it's completely removed.
-  if (CompletelyUnroll && LPM != nullptr)
-    LPM->deleteLoopFromQueue(L);
+  // Update LoopInfo if the loop is completely removed.
+  if (CompletelyUnroll)
+    LI->updateUnloop(L);;
 
   // If we have a pass and a DominatorTree we should re-simplify impacted loops
   // to ensure subsequent analyses can rely on this form. We want to simplify
   // at least one layer outside of the loop that was unrolled so that any
   // changes to the parent loop exposed by the unrolling are considered.
-  if (PP && DT) {
+  if (DT) {
     if (!OuterL && !CompletelyUnroll)
       OuterL = L;
     if (OuterL) {
-      simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, AC);
+      bool Simplified = simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
 
       // LCSSA must be performed on the outermost affected loop. The unrolled
       // loop's last loop latch is guaranteed to be in the outermost loop after
-      // deleteLoopFromQueue updates LoopInfo.
+      // LoopInfo's been updated by updateUnloop.
       Loop *LatchLoop = LI->getLoopFor(Latches.back());
       if (!OuterL->contains(LatchLoop))
         while (OuterL->getParentLoop() != LatchLoop)
           OuterL = OuterL->getParentLoop();
 
-      formLCSSARecursively(*OuterL, *DT, LI, SE);
+      if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified))
+        formLCSSARecursively(*OuterL, *DT, LI, SE);
+      else
+        assert(OuterL->isLCSSAForm(*DT) &&
+               "Loops should be in LCSSA form after loop-unroll.");
     }
   }
 
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index add5432aa276..0d68f18ad0e5 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -62,8 +62,8 @@ STATISTIC(NumRuntimeUnrolled,
 static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
                           BasicBlock *LastPrologBB, BasicBlock *PrologEnd,
                           BasicBlock *OrigPH, BasicBlock *NewPH,
-                          ValueToValueMapTy &VMap, AliasAnalysis *AA,
-                          DominatorTree *DT, LoopInfo *LI, Pass *P) {
+                          ValueToValueMapTy &VMap, DominatorTree *DT,
+                          LoopInfo *LI, bool PreserveLCSSA) {
   BasicBlock *Latch = L->getLoopLatch();
   assert(Latch && "Loop must have a latch");
 
@@ -127,8 +127,8 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
   assert(Exit && "Loop must have a single exit block only");
   // Split the exit to maintain loop canonicalization guarantees
   SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
-  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", AA, DT, LI,
-                         P->mustPreserveAnalysisID(LCSSAID));
+  SplitBlockPredecessors(Exit, Preds, ".unr-lcssa", DT, LI,
+                         PreserveLCSSA);
   // Add the branch to the exit block (around the unrolled loop)
   B.CreateCondBr(BrLoopExit, Exit, NewPH);
   InsertPt->eraseFromParent();
@@ -150,7 +150,7 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
   Function *F = Header->getParent();
   LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
   LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
-  Loop *NewLoop = 0;
+  Loop *NewLoop = nullptr;
   Loop *ParentLoop = L->getParentLoop();
   if (!UnrollProlog) {
     NewLoop = new Loop();
@@ -206,9 +206,9 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
   // Change the incoming values to the ones defined in the preheader or
   // cloned loop.
   for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
-    PHINode *NewPHI = cast<PHINode>(VMap[I]);
+    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
     if (UnrollProlog) {
-      VMap[I] = NewPHI->getIncomingValueForBlock(Preheader);
+      VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader);
       cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
     } else {
       unsigned idx = NewPHI->getBasicBlockIndex(Preheader);
@@ -279,7 +279,8 @@ static void CloneLoopBlocks(Loop *L, Value *NewIter, const bool UnrollProlog,
 ///
 bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                                    bool AllowExpensiveTripCount, LoopInfo *LI,
-                                   LPPassManager *LPM) {
+                                   ScalarEvolution *SE, DominatorTree *DT,
+                                   bool PreserveLCSSA) {
   // for now, only unroll loops that contain a single exit
   if (!L->getExitingBlock())
     return false;
@@ -291,9 +292,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
 
   // Use Scalar Evolution to compute the trip count.  This allows more
   // loops to be unrolled than relying on induction var simplification
-  if (!LPM)
-    return false;
-  ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
   if (!SE)
     return false;
 
@@ -308,7 +306,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
 
   // Add 1 since the backedge count doesn't include the first loop iteration
   const SCEV *TripCountSC =
-    SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
+      SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));
   if (isa<SCEVCouldNotCompute>(TripCountSC))
     return false;
 
@@ -333,10 +331,6 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
   if (Loop *ParentLoop = L->getParentLoop())
     SE->forgetLoop(ParentLoop);
 
-  // Grab analyses that we preserve.
-  auto *DTWP = LPM->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
   BasicBlock *PH = L->getLoopPreheader();
   BasicBlock *Latch = L->getLoopLatch();
   // It helps to splits the original preheader twice, one for the end of the
@@ -397,8 +391,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
                   VMap, LI);
 
   // Insert the cloned blocks into function just before the original loop
-  F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],
-                                F->end());
+  F->getBasicBlockList().splice(PEnd->getIterator(), F->getBasicBlockList(),
+                                NewBlocks[0]->getIterator(), F->end());
 
   // Rewrite the cloned instruction operands to use the values
   // created when the clone is created.
@@ -406,7 +400,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
     for (BasicBlock::iterator I = NewBlocks[i]->begin(),
                               E = NewBlocks[i]->end();
          I != E; ++I) {
-      RemapInstruction(I, VMap,
+      RemapInstruction(&*I, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
     }
   }
@@ -414,8 +408,8 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count,
   // Connect the prolog code to the original loop and update the
   // PHI functions.
   BasicBlock *LastLoopBB = cast<BasicBlock>(VMap[Latch]);
-  ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap,
-                /*AliasAnalysis*/ nullptr, DT, LI, LPM->getAsPass());
+  ConnectProlog(L, BECount, Count, LastLoopBB, PEnd, PH, NewPH, VMap, DT, LI,
+                PreserveLCSSA);
   NumRuntimeUnrolled++;
   return true;
 }
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 5cbde94a98ed..e03880526bfa 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -12,13 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
@@ -34,6 +34,124 @@ bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
   return true;
 }
 
+bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
+  switch (Kind) {
+  default:
+    break;
+  case RK_IntegerAdd:
+  case RK_IntegerMult:
+  case RK_IntegerOr:
+  case RK_IntegerAnd:
+  case RK_IntegerXor:
+  case RK_IntegerMinMax:
+    return true;
+  }
+  return false;
+}
+
+bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
+  return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
+}
+
+bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
+  switch (Kind) {
+  default:
+    break;
+  case RK_IntegerAdd:
+  case RK_IntegerMult:
+  case RK_FloatAdd:
+  case RK_FloatMult:
+    return true;
+  }
+  return false;
+}
+
+Instruction *
+RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
+                                     SmallPtrSetImpl<Instruction *> &Visited,
+                                     SmallPtrSetImpl<Instruction *> &CI) {
+  if (!Phi->hasOneUse())
+    return Phi;
+
+  const APInt *M = nullptr;
+  Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
+
+  // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
+  // with a new integer type of the corresponding bit width.
+  if (match(J, m_CombineOr(m_And(m_Instruction(I), m_APInt(M)),
+                           m_And(m_APInt(M), m_Instruction(I))))) {
+    int32_t Bits = (*M + 1).exactLogBase2();
+    if (Bits > 0) {
+      RT = IntegerType::get(Phi->getContext(), Bits);
+      Visited.insert(Phi);
+      CI.insert(J);
+      return J;
+    }
+  }
+  return Phi;
+}
+
+bool RecurrenceDescriptor::getSourceExtensionKind(
+    Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
+    SmallPtrSetImpl<Instruction *> &Visited,
+    SmallPtrSetImpl<Instruction *> &CI) {
+
+  SmallVector<Instruction *, 8> Worklist;
+  bool FoundOneOperand = false;
+  unsigned DstSize = RT->getPrimitiveSizeInBits();
+  Worklist.push_back(Exit);
+
+  // Traverse the instructions in the reduction expression, beginning with the
+  // exit value.
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    for (Use &U : I->operands()) {
+
+      // Terminate the traversal if the operand is not an instruction, or we
+      // reach the starting value.
+      Instruction *J = dyn_cast<Instruction>(U.get());
+      if (!J || J == Start)
+        continue;
+
+      // Otherwise, investigate the operation if it is also in the expression.
+      if (Visited.count(J)) {
+        Worklist.push_back(J);
+        continue;
+      }
+
+      // If the operand is not in Visited, it is not a reduction operation, but
+      // it does feed into one. Make sure it is either a single-use sign- or
+      // zero-extend instruction.
+      CastInst *Cast = dyn_cast<CastInst>(J);
+      bool IsSExtInst = isa<SExtInst>(J);
+      if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
+        return false;
+
+      // Ensure the source type of the extend is no larger than the reduction
+      // type. It is not necessary for the types to be identical.
+      unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
+      if (SrcSize > DstSize)
+        return false;
+
+      // Furthermore, ensure that all such extends are of the same kind.
+      if (FoundOneOperand) {
+        if (IsSigned != IsSExtInst)
+          return false;
+      } else {
+        FoundOneOperand = true;
+        IsSigned = IsSExtInst;
+      }
+
+      // Lastly, if the source type of the extend matches the reduction type,
+      // add the extend to CI so that we can avoid accounting for it in the
+      // cost model.
+      if (SrcSize == DstSize)
+        CI.insert(Cast);
+    }
+  }
+  return true;
+}
+
 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
                                            Loop *TheLoop, bool HasFunNoNaNAttr,
                                            RecurrenceDescriptor &RedDes) {
@@ -68,10 +186,32 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
   unsigned NumCmpSelectPatternInst = 0;
   InstDesc ReduxDesc(false, nullptr);
 
+  // Data used for determining if the recurrence has been type-promoted.
+  Type *RecurrenceType = Phi->getType();
+  SmallPtrSet<Instruction *, 4> CastInsts;
+  Instruction *Start = Phi;
+  bool IsSigned = false;
+
   SmallPtrSet<Instruction *, 8> VisitedInsts;
   SmallVector<Instruction *, 8> Worklist;
-  Worklist.push_back(Phi);
-  VisitedInsts.insert(Phi);
+
+  // Return early if the recurrence kind does not match the type of Phi. If the
+  // recurrence kind is arithmetic, we attempt to look through AND operations
+  // resulting from the type promotion performed by InstCombine.  Vector
+  // operations are not limited to the legal integer widths, so we may be able
+  // to evaluate the reduction in the narrower width.
+  if (RecurrenceType->isFloatingPointTy()) {
+    if (!isFloatingPointRecurrenceKind(Kind))
+      return false;
+  } else {
+    if (!isIntegerRecurrenceKind(Kind))
+      return false;
+    if (isArithmeticRecurrenceKind(Kind))
+      Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
+  }
+
+  Worklist.push_back(Start);
+  VisitedInsts.insert(Start);
 
   // A value in the reduction can be used:
   //  - By the reduction:
@@ -110,10 +250,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
         !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
       return false;
 
-    // Any reduction instruction must be of one of the allowed kinds.
-    ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
-    if (!ReduxDesc.isRecurrence())
-      return false;
+    // Any reduction instruction must be of one of the allowed kinds. We ignore
+    // the starting value (the Phi or an AND instruction if the Phi has been
+    // type-promoted).
+    if (Cur != Start) {
+      ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
+      if (!ReduxDesc.isRecurrence())
+        return false;
+    }
 
     // A reduction operation must only have one use of the reduction value.
     if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
@@ -131,7 +275,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
       ++NumCmpSelectPatternInst;
 
     // Check  whether we found a reduction operator.
-    FoundReduxOp |= !IsAPhi;
+    FoundReduxOp |= !IsAPhi && Cur != Start;
 
     // Process users of current instruction. Push non-PHI nodes after PHI nodes
     // onto the stack. This way we are going to have seen all inputs to PHI
@@ -193,6 +337,14 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
+  // If we think Phi may have been type-promoted, we also need to ensure that
+  // all source operands of the reduction are either SExtInsts or ZEstInsts. If
+  // so, we will be able to evaluate the reduction in the narrower bit width.
+  if (Start != Phi)
+    if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
+                                IsSigned, VisitedInsts, CastInsts))
+      return false;
+
   // We found a reduction var if we have reached the original phi node and we
   // only have a single instruction with out-of-loop users.
 
@@ -200,9 +352,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind,
-                          ReduxDesc.getMinMaxKind());
-
+  RecurrenceDescriptor RD(
+      RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
+      ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
   RedDes = RD;
 
   return true;
@@ -263,14 +415,14 @@ RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
                                         InstDesc &Prev, bool HasFunNoNaNAttr) {
   bool FP = I->getType()->isFloatingPointTy();
-  bool FastMath = FP && I->hasUnsafeAlgebra();
+  Instruction *UAI = Prev.getUnsafeAlgebraInst();
+  if (!UAI && FP && !I->hasUnsafeAlgebra())
+    UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
+
   switch (I->getOpcode()) {
   default:
     return InstDesc(false, I);
   case Instruction::PHI:
-    if (FP &&
-        (Kind != RK_FloatMult && Kind != RK_FloatAdd && Kind != RK_FloatMinMax))
-      return InstDesc(false, I);
     return InstDesc(I, Prev.getMinMaxKind());
   case Instruction::Sub:
   case Instruction::Add:
@@ -284,10 +436,10 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
   case Instruction::Xor:
     return InstDesc(Kind == RK_IntegerXor, I);
   case Instruction::FMul:
-    return InstDesc(Kind == RK_FloatMult && FastMath, I);
+    return InstDesc(Kind == RK_FloatMult, I, UAI);
   case Instruction::FSub:
   case Instruction::FAdd:
-    return InstDesc(Kind == RK_FloatAdd && FastMath, I);
+    return InstDesc(Kind == RK_FloatAdd, I, UAI);
   case Instruction::FCmp:
   case Instruction::ICmp:
   case Instruction::Select:
@@ -442,6 +594,13 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
     break;
   }
 
+  // We only match FP sequences with unsafe algebra, so we can unconditionally
+  // set it on any generated instructions.
+  IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+  FastMathFlags FMF;
+  FMF.setUnsafeAlgebra();
+  Builder.SetFastMathFlags(FMF);
+
   Value *Cmp;
   if (RK == MRK_FloatMin || RK == MRK_FloatMax)
     Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
@@ -452,8 +611,54 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
   return Select;
 }
 
-bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
-                          ConstantInt *&StepValue) {
+InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
+                                         ConstantInt *Step)
+  : StartValue(Start), IK(K), StepValue(Step) {
+  assert(IK != IK_NoInduction && "Not an induction");
+  assert(StartValue && "StartValue is null");
+  assert(StepValue && !StepValue->isZero() && "StepValue is zero");
+  assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
+         "StartValue is not a pointer for pointer induction");
+  assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
+         "StartValue is not an integer for integer induction");
+  assert(StepValue->getType()->isIntegerTy() &&
+         "StepValue is not an integer");
+}
+
+int InductionDescriptor::getConsecutiveDirection() const {
+  if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
+    return StepValue->getSExtValue();
+  return 0;
+}
+
+Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index) const {
+  switch (IK) {
+  case IK_IntInduction:
+    assert(Index->getType() == StartValue->getType() &&
+           "Index type does not match StartValue type");
+    if (StepValue->isMinusOne())
+      return B.CreateSub(StartValue, Index);
+    if (!StepValue->isOne())
+      Index = B.CreateMul(Index, StepValue);
+    return B.CreateAdd(StartValue, Index);
+
+  case IK_PtrInduction:
+    assert(Index->getType() == StepValue->getType() &&
+           "Index type does not match StepValue type");
+    if (StepValue->isMinusOne())
+      Index = B.CreateNeg(Index);
+    else if (!StepValue->isOne())
+      Index = B.CreateMul(Index, StepValue);
+    return B.CreateGEP(nullptr, StartValue, Index);
+
+  case IK_NoInduction:
+    return nullptr;
+  }
+  llvm_unreachable("invalid enum");
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+                                         InductionDescriptor &D) {
   Type *PhiTy = Phi->getType();
   // We only handle integer and pointer inductions variables.
   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@@ -467,6 +672,10 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
     return false;
   }
 
+  assert(AR->getLoop()->getHeader() == Phi->getParent() &&
+         "PHI is an AddRec for a different loop?!");
+  Value *StartValue =
+    Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
   const SCEV *Step = AR->getStepRecurrence(*SE);
   // Calculate the pointer stride and check if it is consecutive.
   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
@@ -475,7 +684,7 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
 
   ConstantInt *CV = C->getValue();
   if (PhiTy->isIntegerTy()) {
-    StepValue = CV;
+    D = InductionDescriptor(StartValue, IK_IntInduction, CV);
     return true;
   }
 
@@ -494,6 +703,27 @@ bool llvm::isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
   int64_t CVSize = CV->getSExtValue();
   if (CVSize % Size)
     return false;
-  StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+  auto *StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
+
+  D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
   return true;
 }
+
+/// \brief Returns the instructions that use values defined in the loop.
+SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
+  SmallVector<Instruction *, 8> UsedOutside;
+
+  for (auto *Block : L->getBlocks())
+    // FIXME: I believe that this could use copy_if if the Inst reference could
+    // be adapted into a pointer.
+    for (auto &Inst : *Block) {
+      auto Users = Inst.users();
+      if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+            auto *Use = cast<Instruction>(U);
+            return !L->contains(Use->getParent());
+          }))
+        UsedOutside.push_back(&Inst);
+    }
+
+  return UsedOutside;
+}
diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp
index 832079d2cf63..9a2a06cf6891 100644
--- a/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/lib/Transforms/Utils/LoopVersioning.cpp
@@ -13,43 +13,81 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
 
 using namespace llvm;
 
 LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
-                               DominatorTree *DT,
-                               const SmallVector<int, 8> *PtrToPartition)
-    : VersionedLoop(L), NonVersionedLoop(nullptr),
-      PtrToPartition(PtrToPartition), LAI(LAI), LI(LI), DT(DT) {
+                               DominatorTree *DT, ScalarEvolution *SE,
+                               bool UseLAIChecks)
+    : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
+      SE(SE) {
   assert(L->getExitBlock() && "No single exit block");
   assert(L->getLoopPreheader() && "No preheader");
+  if (UseLAIChecks) {
+    setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
+    setSCEVChecks(LAI.PSE.getUnionPredicate());
+  }
 }
 
-bool LoopVersioning::needsRuntimeChecks() const {
-  return LAI.getRuntimePointerChecking()->needsAnyChecking(PtrToPartition);
+void LoopVersioning::setAliasChecks(
+    const SmallVector<RuntimePointerChecking::PointerCheck, 4> Checks) {
+  AliasChecks = std::move(Checks);
 }
 
-void LoopVersioning::versionLoop(Pass *P) {
+void LoopVersioning::setSCEVChecks(SCEVUnionPredicate Check) {
+  Preds = std::move(Check);
+}
+
+void LoopVersioning::versionLoop(
+    const SmallVectorImpl<Instruction *> &DefsUsedOutside) {
   Instruction *FirstCheckInst;
   Instruction *MemRuntimeCheck;
+  Value *SCEVRuntimeCheck;
+  Value *RuntimeCheck = nullptr;
+
   // Add the memcheck in the original preheader (this is empty initially).
-  BasicBlock *MemCheckBB = VersionedLoop->getLoopPreheader();
+  BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
   std::tie(FirstCheckInst, MemRuntimeCheck) =
-      LAI.addRuntimeCheck(MemCheckBB->getTerminator(), PtrToPartition);
+      LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks);
   assert(MemRuntimeCheck && "called even though needsAnyChecking = false");
 
+  const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate();
+  SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
+                   "scev.check");
+  SCEVRuntimeCheck =
+      Exp.expandCodeForPredicate(&Pred, RuntimeCheckBB->getTerminator());
+  auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
+
+  // Discard the SCEV runtime check if it is always true.
+  if (CI && CI->isZero())
+    SCEVRuntimeCheck = nullptr;
+
+  if (MemRuntimeCheck && SCEVRuntimeCheck) {
+    RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
+                                          SCEVRuntimeCheck, "ldist.safe");
+    if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
+      I->insertBefore(RuntimeCheckBB->getTerminator());
+  } else
+    RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
+
+  assert(RuntimeCheck && "called even though we don't need "
+                         "any runtime checks");
+
   // Rename the block to make the IR more readable.
-  MemCheckBB->setName(VersionedLoop->getHeader()->getName() + ".lver.memcheck");
+  RuntimeCheckBB->setName(VersionedLoop->getHeader()->getName() +
+                          ".lver.check");
 
   // Create empty preheader for the loop (and after cloning for the
   // non-versioned loop).
-  BasicBlock *PH = SplitBlock(MemCheckBB, MemCheckBB->getTerminator(), DT, LI);
+  BasicBlock *PH =
+      SplitBlock(RuntimeCheckBB, RuntimeCheckBB->getTerminator(), DT, LI);
   PH->setName(VersionedLoop->getHeader()->getName() + ".ph");
 
   // Clone the loop including the preheader.
@@ -58,20 +96,23 @@ void LoopVersioning::versionLoop(Pass *P) {
   // block is a join between the two loops.
   SmallVector<BasicBlock *, 8> NonVersionedLoopBlocks;
   NonVersionedLoop =
-      cloneLoopWithPreheader(PH, MemCheckBB, VersionedLoop, VMap, ".lver.orig",
-                             LI, DT, NonVersionedLoopBlocks);
+      cloneLoopWithPreheader(PH, RuntimeCheckBB, VersionedLoop, VMap,
+                             ".lver.orig", LI, DT, NonVersionedLoopBlocks);
   remapInstructionsInBlocks(NonVersionedLoopBlocks, VMap);
 
   // Insert the conditional branch based on the result of the memchecks.
-  Instruction *OrigTerm = MemCheckBB->getTerminator();
+  Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
   BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
-                     VersionedLoop->getLoopPreheader(), MemRuntimeCheck,
-                     OrigTerm);
+                     VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the
   // memchecking block.
-  DT->changeImmediateDominator(VersionedLoop->getExitBlock(), MemCheckBB);
+  DT->changeImmediateDominator(VersionedLoop->getExitBlock(), RuntimeCheckBB);
+
+  // Adds the necessary PHI nodes for the versioned loops based on the
+  // loop-defined values used outside of the loop.
+  addPHINodes(DefsUsedOutside);
 }
 
 void LoopVersioning::addPHINodes(
@@ -94,7 +135,7 @@ void LoopVersioning::addPHINodes(
     // If not create it.
     if (!PN) {
       PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",
-                           PHIBlock->begin());
+                           &PHIBlock->front());
       for (auto *User : Inst->users())
         if (!VersionedLoop->contains(cast<Instruction>(User)->getParent()))
           User->replaceUsesOfWith(Inst, PN);
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 66d57b069fe7..b0ad4d5e84a1 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -69,7 +69,7 @@ bool LowerInvoke::runOnFunction(Function &F) {
       BranchInst::Create(II->getNormalDest(), II);
 
       // Remove any PHI node entries from the exception destination.
-      II->getUnwindDest()->removePredecessor(BB);
+      II->getUnwindDest()->removePredecessor(&*BB);
 
       // Remove the invoke instruction now.
       BB->getInstList().erase(II);
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 4acd988691d2..52beb1542497 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -49,8 +49,7 @@ namespace {
     return I != Ranges.end() && I->Low <= R.Low;
   }
 
-  /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
-  /// instructions.
+  /// Replace all SwitchInst instructions with chained branch instructions.
   class LowerSwitch : public FunctionPass {
   public:
     static char ID; // Pass identification, replacement for typeid
@@ -78,7 +77,7 @@ namespace {
     typedef std::vector<CaseRange> CaseVector;
     typedef std::vector<CaseRange>::iterator CaseItr;
   private:
-    void processSwitchInst(SwitchInst *SI);
+    void processSwitchInst(SwitchInst *SI, SmallPtrSetImpl<BasicBlock*> &DeleteList);
 
     BasicBlock *switchConvert(CaseItr Begin, CaseItr End,
                               ConstantInt *LowerBound, ConstantInt *UpperBound,
@@ -116,21 +115,30 @@ FunctionPass *llvm::createLowerSwitchPass() {
 
 bool LowerSwitch::runOnFunction(Function &F) {
   bool Changed = false;
+  SmallPtrSet<BasicBlock*, 8> DeleteList;
 
   for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
-    BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+    BasicBlock *Cur = &*I++; // Advance over block so we don't traverse new blocks
+
+    // If the block is a dead Default block that will be deleted later, don't
+    // waste time processing it.
+    if (DeleteList.count(Cur))
+      continue;
 
     if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
       Changed = true;
-      processSwitchInst(SI);
+      processSwitchInst(SI, DeleteList);
     }
   }
 
+  for (BasicBlock* BB: DeleteList) {
+    DeleteDeadBlock(BB);
+  }
+
   return Changed;
 }
 
-// operator<< - Used for debugging purposes.
-//
+/// Used for debugging purposes.
 static raw_ostream& operator<<(raw_ostream &O,
                                const LowerSwitch::CaseVector &C)
     LLVM_ATTRIBUTE_USED;
@@ -147,23 +155,24 @@ static raw_ostream& operator<<(raw_ostream &O,
   return O << "]";
 }
 
-// \brief Update the first occurrence of the "switch statement" BB in the PHI
-// node with the "new" BB. The other occurrences will:
-//
-// 1) Be updated by subsequent calls to this function.  Switch statements may
-// have more than one outcoming edge into the same BB if they all have the same
-// value. When the switch statement is converted these incoming edges are now
-// coming from multiple BBs.
-// 2) Removed if subsequent incoming values now share the same case, i.e.,
-// multiple outcome edges are condensed into one. This is necessary to keep the
-// number of phi values equal to the number of branches to SuccBB.
+/// \brief Update the first occurrence of the "switch statement" BB in the PHI
+/// node with the "new" BB. The other occurrences will:
+///
+/// 1) Be updated by subsequent calls to this function.  Switch statements may
+/// have more than one outcoming edge into the same BB if they all have the same
+/// value. When the switch statement is converted these incoming edges are now
+/// coming from multiple BBs.
+/// 2) Removed if subsequent incoming values now share the same case, i.e.,
+/// multiple outcome edges are condensed into one. This is necessary to keep the
+/// number of phi values equal to the number of branches to SuccBB.
 static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
                     unsigned NumMergedCases) {
-  for (BasicBlock::iterator I = SuccBB->begin(), IE = SuccBB->getFirstNonPHI();
+  for (BasicBlock::iterator I = SuccBB->begin(),
+                            IE = SuccBB->getFirstNonPHI()->getIterator();
        I != IE; ++I) {
     PHINode *PN = cast<PHINode>(I);
 
-    // Only update the first occurence.
+    // Only update the first occurrence.
     unsigned Idx = 0, E = PN->getNumIncomingValues();
     unsigned LocalNumMergedCases = NumMergedCases;
     for (; Idx != E; ++Idx) {
@@ -173,7 +182,7 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
       }
     }
 
-    // Remove additional occurences coming from condensed cases and keep the
+    // Remove additional occurrences coming from condensed cases and keep the
     // number of incoming values equal to the number of branches to SuccBB.
     SmallVector<unsigned, 8> Indices;
     for (++Idx; LocalNumMergedCases > 0 && Idx < E; ++Idx)
@@ -188,11 +197,11 @@ static void fixPhis(BasicBlock *SuccBB, BasicBlock *OrigBB, BasicBlock *NewBB,
   }
 }
 
-// switchConvert - Convert the switch statement into a binary lookup of
-// the case values. The function recursively builds this tree.
-// LowerBound and UpperBound are used to keep track of the bounds for Val
-// that have already been checked by a block emitted by one of the previous
-// calls to switchConvert in the call stack.
+/// Convert the switch statement into a binary lookup of the case values.
+/// The function recursively builds this tree. LowerBound and UpperBound are
+/// used to keep track of the bounds for Val that have already been checked by
+/// a block emitted by one of the previous calls to switchConvert in the call
+/// stack.
 BasicBlock *
 LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
                            ConstantInt *UpperBound, Value *Val,
@@ -278,28 +287,24 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,
                                       UpperBound, Val, NewNode, OrigBlock,
                                       Default, UnreachableRanges);
 
-  Function::iterator FI = OrigBlock;
-  F->getBasicBlockList().insert(++FI, NewNode);
+  F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewNode);
   NewNode->getInstList().push_back(Comp);
 
   BranchInst::Create(LBranch, RBranch, Comp, NewNode);
   return NewNode;
 }
 
-// newLeafBlock - Create a new leaf block for the binary lookup tree. It
-// checks if the switch's value == the case's value. If not, then it
-// jumps to the default branch. At this point in the tree, the value
-// can't be another valid case value, so the jump to the "default" branch
-// is warranted.
-//
+/// Create a new leaf block for the binary lookup tree. It checks if the
+/// switch's value == the case's value. If not, then it jumps to the default
+/// branch. At this point in the tree, the value can't be another valid case
+/// value, so the jump to the "default" branch is warranted.
 BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
                                       BasicBlock* OrigBlock,
                                       BasicBlock* Default)
 {
   Function* F = OrigBlock->getParent();
   BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
-  Function::iterator FI = OrigBlock;
-  F->getBasicBlockList().insert(++FI, NewLeaf);
+  F->getBasicBlockList().insert(++OrigBlock->getIterator(), NewLeaf);
 
   // Emit comparison
   ICmpInst* Comp = nullptr;
@@ -352,7 +357,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
   return NewLeaf;
 }
 
-// Clusterify - Transform simple list of Cases into list of CaseRange's
+/// Transform simple list of Cases into list of CaseRange's.
 unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   unsigned numCmps = 0;
 
@@ -394,10 +399,10 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
   return numCmps;
 }
 
-// processSwitchInst - Replace the specified switch instruction with a sequence
-// of chained if-then insts in a balanced binary search.
-//
-void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+/// Replace the specified switch instruction with a sequence of chained if-then
+/// insts in a balanced binary search.
+void LowerSwitch::processSwitchInst(SwitchInst *SI,
+                                    SmallPtrSetImpl<BasicBlock*> &DeleteList) {
   BasicBlock *CurBlock = SI->getParent();
   BasicBlock *OrigBlock = CurBlock;
   Function *F = CurBlock->getParent();
@@ -424,7 +429,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   std::vector<IntRange> UnreachableRanges;
 
   if (isa<UnreachableInst>(Default->getFirstNonPHIOrDbg())) {
-    // Make the bounds tightly fitted around the case value range, becase we
+    // Make the bounds tightly fitted around the case value range, because we
     // know that the value passed to the switch must be exactly one of the case
     // values.
     assert(!Cases.empty());
@@ -495,7 +500,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   // Create a new, empty default block so that the new hierarchy of
   // if-then statements go to this and the PHI nodes are happy.
   BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
-  F->getBasicBlockList().insert(Default, NewDefault);
+  F->getBasicBlockList().insert(Default->getIterator(), NewDefault);
   BranchInst::Create(Default, NewDefault);
 
   // If there is an entry in any PHI nodes for the default edge, make sure
@@ -518,7 +523,7 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) {
   BasicBlock *OldDefault = SI->getDefaultDest();
   CurBlock->getInstList().erase(SI);
 
-  // If the Default block has no more predecessors just remove it.
+  // If the Default block has no more predecessors just add it to DeleteList.
   if (pred_begin(OldDefault) == pred_end(OldDefault))
-    DeleteDeadBlock(OldDefault);
+    DeleteList.insert(OldDefault);
 }
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 00cf4e6c01c8..aa1e35ddba02 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -63,6 +63,9 @@ bool PromotePass::runOnFunction(Function &F) {
 
   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
 
+  if (F.hasFnAttribute(Attribute::OptimizeNone))
+    return false;
+
   bool Changed  = false;
 
   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp
index 395a46bad97b..c999bd008fef 100644
--- a/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/lib/Transforms/Utils/MetaRenamer.cpp
@@ -42,6 +42,24 @@ namespace {
     }
   };
 
+  static const char *const metaNames[] = {
+    // See http://en.wikipedia.org/wiki/Metasyntactic_variable
+    "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
+    "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
+  };
+
+  struct Renamer {
+    Renamer(unsigned int seed) {
+      prng.srand(seed);
+    }
+
+    const char *newName() {
+      return metaNames[prng.rand() % array_lengthof(metaNames)];
+    }
+
+    PRNG prng;
+  };
+  
   struct MetaRenamer : public ModulePass {
     static char ID; // Pass identification, replacement for typeid
     MetaRenamer() : ModulePass(ID) {
@@ -53,36 +71,26 @@ namespace {
     }
 
     bool runOnModule(Module &M) override {
-      static const char *const metaNames[] = {
-        // See http://en.wikipedia.org/wiki/Metasyntactic_variable
-        "foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
-        "wibble", "wobble", "widget", "wombat", "ham", "eggs", "pluto", "spam"
-      };
-
       // Seed our PRNG with simple additive sum of ModuleID. We're looking to
       // simply avoid always having the same function names, and we need to
       // remain deterministic.
       unsigned int randSeed = 0;
-      for (std::string::const_iterator I = M.getModuleIdentifier().begin(),
-           E = M.getModuleIdentifier().end(); I != E; ++I)
-        randSeed += *I;
+      for (auto C : M.getModuleIdentifier())
+        randSeed += C;
 
-      PRNG prng;
-      prng.srand(randSeed);
+      Renamer renamer(randSeed);
 
       // Rename all aliases
-      for (Module::alias_iterator AI = M.alias_begin(), AE = M.alias_end();
-           AI != AE; ++AI) {
+      for (auto AI = M.alias_begin(), AE = M.alias_end(); AI != AE; ++AI) {
         StringRef Name = AI->getName();
         if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
           continue;
 
         AI->setName("alias");
       }
-      
+
       // Rename all global variables
-      for (Module::global_iterator GI = M.global_begin(), GE = M.global_end();
-           GI != GE; ++GI) {
+      for (auto GI = M.global_begin(), GE = M.global_end(); GI != GE; ++GI) {
         StringRef Name = GI->getName();
         if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
           continue;
@@ -93,40 +101,37 @@ namespace {
       // Rename all struct types
       TypeFinder StructTypes;
       StructTypes.run(M, true);
-      for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
-        StructType *STy = StructTypes[i];
+      for (StructType *STy : StructTypes) {
         if (STy->isLiteral() || STy->getName().empty()) continue;
 
         SmallString<128> NameStorage;
-        STy->setName((Twine("struct.") + metaNames[prng.rand() %
-                     array_lengthof(metaNames)]).toStringRef(NameStorage));
+        STy->setName((Twine("struct.") +
+          renamer.newName()).toStringRef(NameStorage));
       }
 
       // Rename all functions
-      for (Module::iterator FI = M.begin(), FE = M.end();
-           FI != FE; ++FI) {
-        StringRef Name = FI->getName();
+      for (auto &F : M) {
+        StringRef Name = F.getName();
         if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1))
           continue;
 
-        FI->setName(metaNames[prng.rand() % array_lengthof(metaNames)]);
-        runOnFunction(*FI);
+        F.setName(renamer.newName());
+        runOnFunction(F);
       }
       return true;
     }
 
     bool runOnFunction(Function &F) {
-      for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
-           AI != AE; ++AI)
+      for (auto AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI)
         if (!AI->getType()->isVoidTy())
           AI->setName("arg");
 
-      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
-        BB->setName("bb");
+      for (auto &BB : F) {
+        BB.setName("bb");
 
-        for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-          if (!I->getType()->isVoidTy())
-            I->setName("tmp");
+        for (auto &I : BB)
+          if (!I.getType()->isVoidTy())
+            I.setName("tmp");
       }
       return true;
     }
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index d69a81ec4741..9ec28a3f3d47 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array,
     }
     GVCtor->eraseFromParent();
   } else {
-    // Use a simple two-field struct if there isn't one already.
+    // Use the new three-field struct if there isn't one already.
     EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
-                            nullptr);
+                            IRB.getInt8PtrTy(), nullptr);
   }
 
   // Build a 2 or 3 field global_ctor entry.  We don't take a comdat key.
@@ -107,7 +107,8 @@ Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
 
 std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
     Module &M, StringRef CtorName, StringRef InitName,
-    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs) {
+    ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
+    StringRef VersionCheckName) {
   assert(!InitName.empty() && "Expected init function name");
   assert(InitArgTypes.size() == InitArgTypes.size() &&
          "Sanitizer's init function expects different number of arguments");
@@ -122,6 +123,13 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
           AttributeSet()));
   InitFunction->setLinkage(Function::ExternalLinkage);
   IRB.CreateCall(InitFunction, InitArgs);
+  if (!VersionCheckName.empty()) {
+    Function *VersionCheckFunction =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
+            AttributeSet()));
+    IRB.CreateCall(VersionCheckFunction, {});
+  }
   return std::make_pair(Ctor, InitFunction);
 }
 
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a87f8504bfb5..c4f9b9f61407 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -205,10 +205,9 @@ public:
     // avoid gratuitus rescans.
     const BasicBlock *BB = I->getParent();
     unsigned InstNo = 0;
-    for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); BBI != E;
-         ++BBI)
-      if (isInterestingInstruction(BBI))
-        InstNumbers[BBI] = InstNo++;
+    for (const Instruction &BBI : *BB)
+      if (isInterestingInstruction(&BBI))
+        InstNumbers[&BBI] = InstNo++;
     It = InstNumbers.find(I);
 
     assert(It != InstNumbers.end() && "Didn't insert instruction?");
@@ -402,8 +401,7 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
   // Record debuginfo for the store and remove the declaration's
   // debuginfo.
   if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-    DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
-                  /*AllowUnresolved*/ false);
+    DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
     ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, DIB);
     DDI->eraseFromParent();
     LBI.deleteValue(DDI);
@@ -425,14 +423,17 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
 /// using the Alloca.
 ///
 /// If we cannot promote this alloca (because it is read before it is written),
-/// return true.  This is necessary in cases where, due to control flow, the
-/// alloca is potentially undefined on some control flow paths.  e.g. code like
-/// this is potentially correct:
-///
-///   for (...) { if (c) { A = undef; undef = B; } }
-///
-/// ... so long as A is not used before undef is set.
-static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
+/// return false.  This is necessary in cases where, due to control flow, the
+/// alloca is undefined only on some control flow paths.  e.g. code like
+/// this is correct in LLVM IR:
+///  // A is an alloca with no stores so far
+///  for (...) {
+///    int t = *A;
+///    if (!first_iteration)
+///      use(t);
+///    *A = 42;
+///  }
+static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                                      LargeBlockInfo &LBI,
                                      AliasSetTracker *AST) {
   // The trickiest case to handle is when we have large blocks. Because of this,
@@ -467,10 +468,15 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
                          std::make_pair(LoadIdx,
                                         static_cast<StoreInst *>(nullptr)),
                          less_first());
-
-    if (I == StoresByIndex.begin())
-      // If there is no store before this load, the load takes the undef value.
-      LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+    if (I == StoresByIndex.begin()) {
+      if (StoresByIndex.empty())
+        // If there are no stores, the load takes the undef value.
+        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+      else
+        // There is no store before this load, bail out (load may be affected
+        // by the following stores - see main comment).
+        return false;
+    }
     else
       // Otherwise, there was a store before this load, the load takes its value.
       LI->replaceAllUsesWith(std::prev(I)->second->getOperand(0));
@@ -486,8 +492,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
     StoreInst *SI = cast<StoreInst>(AI->user_back());
     // Record debuginfo for the store before removing it.
     if (DbgDeclareInst *DDI = Info.DbgDeclare) {
-      DIBuilder DIB(*AI->getParent()->getParent()->getParent(),
-                    /*AllowUnresolved*/ false);
+      DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
       ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
     }
     SI->eraseFromParent();
@@ -506,6 +511,7 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
   }
 
   ++NumLocalPromoted;
+  return true;
 }
 
 void PromoteMem2Reg::run() {
@@ -557,9 +563,8 @@ void PromoteMem2Reg::run() {
 
     // If the alloca is only read and written in one basic block, just perform a
     // linear sweep over the block to eliminate it.
-    if (Info.OnlyUsedInOneBlock) {
-      promoteSingleBlockAlloca(AI, Info, LBI, AST);
-
+    if (Info.OnlyUsedInOneBlock &&
+        promoteSingleBlockAlloca(AI, Info, LBI, AST)) {
       // The alloca has been processed, move on.
       RemoveFromAllocasList(AllocaNum);
       continue;
@@ -636,7 +641,7 @@ void PromoteMem2Reg::run() {
   // and inserting the phi nodes we marked as necessary
   //
   std::vector<RenamePassData> RenamePassWorkList;
-  RenamePassWorkList.emplace_back(F.begin(), nullptr, std::move(Values));
+  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values));
   do {
     RenamePassData RPD;
     RPD.swap(RenamePassWorkList.back());
@@ -854,7 +859,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
   // BasicBlock.
   PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
                        Allocas[AllocaNo]->getName() + "." + Twine(Version++),
-                       BB->begin());
+                       &BB->front());
   ++NumPHIInsert;
   PhiToAllocaMap[PN] = AllocaNo;
 
@@ -919,7 +924,7 @@ NextIteration:
     return;
 
   for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II);) {
-    Instruction *I = II++; // get the instruction, increment iterator
+    Instruction *I = &*II++; // get the instruction, increment iterator
 
     if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
       AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 36781c1189cd..d0932f834cf5 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -43,7 +44,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
 #include <map>
@@ -73,6 +73,22 @@ static cl::opt<bool> HoistCondStores(
     "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
     cl::desc("Hoist conditional stores if an unconditional store precedes"));
 
+static cl::opt<bool> MergeCondStores(
+    "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
+    cl::desc("Hoist conditional stores even if an unconditional store does not "
+             "precede - hoist multiple conditional stores into a single "
+             "predicated store"));
+
+static cl::opt<bool> MergeCondStoresAggressively(
+    "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
+    cl::desc("When merging conditional stores, do so even if the resultant "
+             "basic blocks are unlikely to be if-converted as a result"));
+
+static cl::opt<bool> SpeculateOneExpensiveInst(
+    "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
+    cl::desc("Allow exactly one expensive instruction to be speculatively "
+             "executed"));
+
 STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
 STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping");
 STATISTIC(NumLookupTables, "Number of switch instructions turned into lookup tables");
@@ -83,13 +99,13 @@ STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
 namespace {
   // The first field contains the value that the switch produces when a certain
-  // case group is selected, and the second field is a vector containing the cases
-  // composing the case group.
+  // case group is selected, and the second field is a vector containing the
+  // cases composing the case group.
   typedef SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>
     SwitchCaseResultVectorTy;
   // The first field contains the phi node that generates a result of the switch
-  // and the second field contains the value generated for a certain case in the switch
-  // for that PHI.
+  // and the second field contains the value generated for a certain case in the
+  // switch for that PHI.
   typedef SmallVector<std::pair<PHINode *, Constant *>, 4> SwitchCaseResultsTy;
 
   /// ValueEqualityComparisonCase - Represents a case of a switch.
@@ -124,6 +140,7 @@ class SimplifyCFGOpt {
 
   bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
   bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+  bool SimplifyCleanupReturn(CleanupReturnInst *RI);
   bool SimplifyUnreachable(UnreachableInst *UI);
   bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
   bool SimplifyIndirectBr(IndirectBrInst *IBI);
@@ -226,6 +243,7 @@ static unsigned ComputeSpeculationCost(const User *I,
          "Instruction is not safe to speculatively execute!");
   return TTI.getUserCost(I);
 }
+
 /// If we have a merge point of an "if condition" as accepted above,
 /// return true if the specified value dominates the block.  We
 /// don't handle the true generality of domination here, just a special case
@@ -246,7 +264,8 @@ static unsigned ComputeSpeculationCost(const User *I,
 static bool DominatesMergePoint(Value *V, BasicBlock *BB,
                                 SmallPtrSetImpl<Instruction*> *AggressiveInsts,
                                 unsigned &CostRemaining,
-                                const TargetTransformInfo &TTI) {
+                                const TargetTransformInfo &TTI,
+                                unsigned Depth = 0) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I) {
     // Non-instructions all dominate instructions, but not all constantexprs
@@ -284,15 +303,24 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
 
   unsigned Cost = ComputeSpeculationCost(I, TTI);
 
-  if (Cost > CostRemaining)
+  // Allow exactly one instruction to be speculated regardless of its cost
+  // (as long as it is safe to do so).
+  // This is intended to flatten the CFG even if the instruction is a division
+  // or other expensive operation. The speculation of an expensive instruction
+  // is expected to be undone in CodeGenPrepare if the speculation has not
+  // enabled further IR optimizations.
+  if (Cost > CostRemaining &&
+      (!SpeculateOneExpensiveInst || !AggressiveInsts->empty() || Depth > 0))
     return false;
 
-  CostRemaining -= Cost;
+  // Avoid unsigned wrap.
+  CostRemaining = (Cost > CostRemaining) ? 0 : CostRemaining - Cost;
 
   // Okay, we can only really hoist these out if their operands do
   // not take us over the cost threshold.
   for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
-    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI))
+    if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, TTI,
+                             Depth + 1))
       return false;
   // Okay, it's safe to do this!  Remember this instruction.
   AggressiveInsts->insert(I);
@@ -970,8 +998,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       // Okay, at this point, we know which new successor Pred will get.  Make
       // sure we update the number of entries in the PHI nodes for these
       // successors.
-      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
-        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+      for (BasicBlock *NewSuccessor : NewSuccessors)
+        AddPredecessorToBlock(NewSuccessor, Pred, BB);
 
       Builder.SetInsertPoint(PTI);
       // Convert pointer to int before we switch.
@@ -984,8 +1012,8 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
       SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault,
                                                PredCases.size());
       NewSI->setDebugLoc(PTI->getDebugLoc());
-      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
-        NewSI->addCase(PredCases[i].Value, PredCases[i].Dest);
+      for (ValueEqualityComparisonCase &V : PredCases)
+        NewSI->addCase(V.Value, V.Dest);
 
       if (PredHasWeights || SuccHasWeights) {
         // Halve the weights if any of them cannot fit in an uint32_t
@@ -1059,15 +1087,15 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
   BasicBlock::iterator BB1_Itr = BB1->begin();
   BasicBlock::iterator BB2_Itr = BB2->begin();
 
-  Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+  Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
   // Skip debug info if it is not identical.
   DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
   DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
   if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
     while (isa<DbgInfoIntrinsic>(I1))
-      I1 = BB1_Itr++;
+      I1 = &*BB1_Itr++;
     while (isa<DbgInfoIntrinsic>(I2))
-      I2 = BB2_Itr++;
+      I2 = &*BB2_Itr++;
   }
   if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
       (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
@@ -1088,31 +1116,30 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
     // For a normal instruction, we just move one to right before the branch,
     // then replace all uses of the other with the first.  Finally, we remove
     // the now redundant second instruction.
-    BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+    BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
     I1->intersectOptionalDataWith(I2);
     unsigned KnownIDs[] = {
-      LLVMContext::MD_tbaa,
-      LLVMContext::MD_range,
-      LLVMContext::MD_fpmath,
-      LLVMContext::MD_invariant_load,
-      LLVMContext::MD_nonnull
-    };
+        LLVMContext::MD_tbaa,    LLVMContext::MD_range,
+        LLVMContext::MD_fpmath,  LLVMContext::MD_invariant_load,
+        LLVMContext::MD_nonnull, LLVMContext::MD_invariant_group,
+        LLVMContext::MD_align,   LLVMContext::MD_dereferenceable,
+        LLVMContext::MD_dereferenceable_or_null};
     combineMetadata(I1, I2, KnownIDs);
     I2->eraseFromParent();
     Changed = true;
 
-    I1 = BB1_Itr++;
-    I2 = BB2_Itr++;
+    I1 = &*BB1_Itr++;
+    I2 = &*BB2_Itr++;
     // Skip debug info if it is not identical.
     DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
     DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
     if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
       while (isa<DbgInfoIntrinsic>(I1))
-        I1 = BB1_Itr++;
+        I1 = &*BB1_Itr++;
       while (isa<DbgInfoIntrinsic>(I2))
-        I2 = BB2_Itr++;
+        I2 = &*BB2_Itr++;
     }
   } while (I1->isIdenticalToWhenDefined(I2));
 
@@ -1147,7 +1174,7 @@ HoistTerminator:
 
   // Okay, it is safe to hoist the terminator.
   Instruction *NT = I1->clone();
-  BIParent->getInstList().insert(BI, NT);
+  BIParent->getInstList().insert(BI->getIterator(), NT);
   if (!NT->getType()->isVoidTy()) {
     I1->replaceAllUsesWith(NT);
     I2->replaceAllUsesWith(NT);
@@ -1265,7 +1292,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
     // Cannot move control-flow-involving, volatile loads, vaarg, etc.
     if (isa<PHINode>(I1) || isa<PHINode>(I2) ||
         isa<TerminatorInst>(I1) || isa<TerminatorInst>(I2) ||
-        isa<LandingPadInst>(I1) || isa<LandingPadInst>(I2) ||
+        I1->isEHPad() || I2->isEHPad() ||
         isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
         I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
         I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
@@ -1324,7 +1351,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
       if (!NewPN) {
         NewPN =
             PHINode::Create(DifferentOp1->getType(), 2,
-                            DifferentOp1->getName() + ".sink", BBEnd->begin());
+                            DifferentOp1->getName() + ".sink", &BBEnd->front());
         NewPN->addIncoming(DifferentOp1, BB1);
         NewPN->addIncoming(DifferentOp2, BB2);
         DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
@@ -1339,7 +1366,8 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
     // instruction in the basic block down.
     bool UpdateRE1 = (I1 == BB1->begin()), UpdateRE2 = (I2 == BB2->begin());
     // Sink the instruction.
-    BBEnd->getInstList().splice(FirstNonPhiInBBEnd, BB1->getInstList(), I1);
+    BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
+                                BB1->getInstList(), I1);
     if (!OldPN->use_empty())
       OldPN->replaceAllUsesWith(I1);
     OldPN->eraseFromParent();
@@ -1355,7 +1383,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
       RE1 = BB1->getInstList().rend();
     if (UpdateRE2)
       RE2 = BB2->getInstList().rend();
-    FirstNonPhiInBBEnd = I1;
+    FirstNonPhiInBBEnd = &*I1;
     NumSinkCommons++;
     Changed = true;
   }
@@ -1491,7 +1519,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
   for (BasicBlock::iterator BBI = ThenBB->begin(),
                             BBE = std::prev(ThenBB->end());
        BBI != BBE; ++BBI) {
-    Instruction *I = BBI;
+    Instruction *I = &*BBI;
     // Skip debug info.
     if (isa<DbgInfoIntrinsic>(I))
       continue;
@@ -1604,9 +1632,14 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
     SpeculatedStore->setOperand(0, S);
   }
 
+  // Metadata can be dependent on the condition we are hoisting above.
+  // Conservatively strip all metadata on the instruction.
+  for (auto &I: *ThenBB)
+    I.dropUnknownNonDebugMetadata();
+
   // Hoist the instructions.
-  BB->getInstList().splice(BI, ThenBB->getInstList(), ThenBB->begin(),
-                           std::prev(ThenBB->end()));
+  BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
+                           ThenBB->begin(), std::prev(ThenBB->end()));
 
   // Insert selects and rewrite the PHI operands.
   IRBuilder<true, NoFolder> Builder(BI);
@@ -1747,13 +1780,13 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
 
       // Check for trivial simplification.
       if (Value *V = SimplifyInstruction(N, DL)) {
-        TranslateMap[BBI] = V;
+        TranslateMap[&*BBI] = V;
         delete N;   // Instruction folded away, don't need actual inst
       } else {
         // Insert the new instruction into its new home.
         EdgeBB->getInstList().insert(InsertPt, N);
         if (!BBI->use_empty())
-          TranslateMap[BBI] = N;
+          TranslateMap[&*BBI] = N;
       }
     }
 
@@ -1850,7 +1883,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   } else {
     DomBlock = *pred_begin(IfBlock1);
     for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
-      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+      if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control
         // flow, so the xform is not worth it.
@@ -1863,7 +1896,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   } else {
     DomBlock = *pred_begin(IfBlock2);
     for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
-      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+      if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control
         // flow, so the xform is not worth it.
@@ -1882,13 +1915,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // Move all 'aggressive' instructions, which are defined in the
   // conditional parts of the if's up to the dominating block.
   if (IfBlock1)
-    DomBlock->getInstList().splice(InsertPt,
+    DomBlock->getInstList().splice(InsertPt->getIterator(),
                                    IfBlock1->getInstList(), IfBlock1->begin(),
-                                   IfBlock1->getTerminator());
+                                   IfBlock1->getTerminator()->getIterator());
   if (IfBlock2)
-    DomBlock->getInstList().splice(InsertPt,
+    DomBlock->getInstList().splice(InsertPt->getIterator(),
                                    IfBlock2->getInstList(), IfBlock2->begin(),
-                                   IfBlock2->getTerminator());
+                                   IfBlock2->getTerminator()->getIterator());
 
   while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
     // Change the PHI node into a select instruction.
@@ -2057,7 +2090,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
              BI->getSuccessor(0) == PBI->getSuccessor(1))) {
           for (BasicBlock::iterator I = BB->begin(), E = BB->end();
                I != E; ) {
-            Instruction *Curr = I++;
+            Instruction *Curr = &*I++;
             if (isa<CmpInst>(Curr)) {
               Cond = Curr;
               break;
@@ -2077,7 +2110,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   return false;
 
   // Make sure the instruction after the condition is the cond branch.
-  BasicBlock::iterator CondIt = Cond; ++CondIt;
+  BasicBlock::iterator CondIt = ++Cond->getIterator();
 
   // Ignore dbg intrinsics.
   while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt;
@@ -2095,7 +2128,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
     // Ignore dbg intrinsics.
     if (isa<DbgInfoIntrinsic>(I))
       continue;
-    if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(I))
+    if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
       return false;
     // I has only one use and can be executed unconditionally.
     Instruction *User = dyn_cast<Instruction>(I->user_back());
@@ -2192,17 +2225,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
       Instruction *NewBonusInst = BonusInst->clone();
       RemapInstruction(NewBonusInst, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
-      VMap[BonusInst] = NewBonusInst;
+      VMap[&*BonusInst] = NewBonusInst;
 
       // If we moved a load, we cannot any longer claim any knowledge about
       // its potential value. The previous information might have been valid
       // only given the branch precondition.
       // For an analogous reason, we must also drop all the metadata whose
       // semantics we don't understand.
-      NewBonusInst->dropUnknownMetadata(LLVMContext::MD_dbg);
+      NewBonusInst->dropUnknownNonDebugMetadata();
 
-      PredBlock->getInstList().insert(PBI, NewBonusInst);
-      NewBonusInst->takeName(BonusInst);
+      PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
+      NewBonusInst->takeName(&*BonusInst);
       BonusInst->setName(BonusInst->getName() + ".old");
     }
 
@@ -2211,7 +2244,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
     Instruction *New = Cond->clone();
     RemapInstruction(New, VMap,
                      RF_NoModuleLevelChanges | RF_IgnoreMissingEntries);
-    PredBlock->getInstList().insert(PBI, New);
+    PredBlock->getInstList().insert(PBI->getIterator(), New);
     New->takeName(Cond);
     Cond->setName(New->getName() + ".old");
 
@@ -2332,11 +2365,297 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
   return false;
 }
 
+// If there is only one store in BB1 and BB2, return it, otherwise return
+// nullptr.
+static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
+  StoreInst *S = nullptr;
+  for (auto *BB : {BB1, BB2}) {
+    if (!BB)
+      continue;
+    for (auto &I : *BB)
+      if (auto *SI = dyn_cast<StoreInst>(&I)) {
+        if (S)
+          // Multiple stores seen.
+          return nullptr;
+        else
+          S = SI;
+      }
+  }
+  return S;
+}
+
+static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
+                                              Value *AlternativeV = nullptr) {
+  // PHI is going to be a PHI node that allows the value V that is defined in
+  // BB to be referenced in BB's only successor.
+  //
+  // If AlternativeV is nullptr, the only value we care about in PHI is V. It
+  // doesn't matter to us what the other operand is (it'll never get used). We
+  // could just create a new PHI with an undef incoming value, but that could
+  // increase register pressure if EarlyCSE/InstCombine can't fold it with some
+  // other PHI. So here we directly look for some PHI in BB's successor with V
+  // as an incoming operand. If we find one, we use it, else we create a new
+  // one.
+  //
+  // If AlternativeV is not nullptr, we care about both incoming values in PHI.
+  // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
+  // where OtherBB is the single other predecessor of BB's only successor.
+  PHINode *PHI = nullptr;
+  BasicBlock *Succ = BB->getSingleSuccessor();
+  
+  for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
+    if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
+      PHI = cast<PHINode>(I);
+      if (!AlternativeV)
+        break;
+
+      assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2);
+      auto PredI = pred_begin(Succ);
+      BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
+      if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
+        break;
+      PHI = nullptr;
+    }
+  if (PHI)
+    return PHI;
+
+  // If V is not an instruction defined in BB, just return it.
+  if (!AlternativeV &&
+      (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
+    return V;
+
+  PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
+  PHI->addIncoming(V, BB);
+  for (BasicBlock *PredBB : predecessors(Succ))
+    if (PredBB != BB)
+      PHI->addIncoming(AlternativeV ? AlternativeV : UndefValue::get(V->getType()),
+                       PredBB);
+  return PHI;
+}
+
+static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
+                                           BasicBlock *QTB, BasicBlock *QFB,
+                                           BasicBlock *PostBB, Value *Address,
+                                           bool InvertPCond, bool InvertQCond) {
+  auto IsaBitcastOfPointerType = [](const Instruction &I) {
+    return Operator::getOpcode(&I) == Instruction::BitCast &&
+           I.getType()->isPointerTy();
+  };
+
+  // If we're not in aggressive mode, we only optimize if we have some
+  // confidence that by optimizing we'll allow P and/or Q to be if-converted.
+  auto IsWorthwhile = [&](BasicBlock *BB) {
+    if (!BB)
+      return true;
+    // Heuristic: if the block can be if-converted/phi-folded and the
+    // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
+    // thread this store.
+    unsigned N = 0;
+    for (auto &I : *BB) {
+      // Cheap instructions viable for folding.
+      if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||
+          isa<StoreInst>(I))
+        ++N;
+      // Free instructions.
+      else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) ||
+               IsaBitcastOfPointerType(I))
+        continue;
+      else
+        return false;
+    }
+    return N <= PHINodeFoldingThreshold;
+  };
+
+  if (!MergeCondStoresAggressively && (!IsWorthwhile(PTB) ||
+                                       !IsWorthwhile(PFB) ||
+                                       !IsWorthwhile(QTB) ||
+                                       !IsWorthwhile(QFB)))
+    return false;
+
+  // For every pointer, there must be exactly two stores, one coming from
+  // PTB or PFB, and the other from QTB or QFB. We don't support more than one
+  // store (to any address) in PTB,PFB or QTB,QFB.
+  // FIXME: We could relax this restriction with a bit more work and performance
+  // testing.
+  StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
+  StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
+  if (!PStore || !QStore)
+    return false;
+
+  // Now check the stores are compatible.
+  if (!QStore->isUnordered() || !PStore->isUnordered())
+    return false;
+
+  // Check that sinking the store won't cause program behavior changes. Sinking
+  // the store out of the Q blocks won't change any behavior as we're sinking
+  // from a block to its unconditional successor. But we're moving a store from
+  // the P blocks down through the middle block (QBI) and past both QFB and QTB.
+  // So we need to check that there are no aliasing loads or stores in
+  // QBI, QTB and QFB. We also need to check there are no conflicting memory
+  // operations between PStore and the end of its parent block.
+  //
+  // The ideal way to do this is to query AliasAnalysis, but we don't
+  // preserve AA currently so that is dangerous. Be super safe and just
+  // check there are no other memory operations at all.
+  for (auto &I : *QFB->getSinglePredecessor())
+    if (I.mayReadOrWriteMemory())
+      return false;
+  for (auto &I : *QFB)
+    if (&I != QStore && I.mayReadOrWriteMemory())
+      return false;
+  if (QTB)
+    for (auto &I : *QTB)
+      if (&I != QStore && I.mayReadOrWriteMemory())
+        return false;
+  for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
+       I != E; ++I)
+    if (&*I != PStore && I->mayReadOrWriteMemory())
+      return false;
+
+  // OK, we're going to sink the stores to PostBB. The store has to be
+  // conditional though, so first create the predicate.
+  Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
+                     ->getCondition();
+  Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
+                     ->getCondition();
+
+  Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
+                                                PStore->getParent());
+  Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
+                                                QStore->getParent(), PPHI);
+
+  IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
+
+  Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
+  Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
+
+  if (InvertPCond)
+    PPred = QB.CreateNot(PPred);
+  if (InvertQCond)
+    QPred = QB.CreateNot(QPred);
+  Value *CombinedPred = QB.CreateOr(PPred, QPred);
+
+  auto *T =
+      SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
+  QB.SetInsertPoint(T);
+  StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
+  AAMDNodes AAMD;
+  PStore->getAAMetadata(AAMD, /*Merge=*/false);
+  PStore->getAAMetadata(AAMD, /*Merge=*/true);
+  SI->setAAMetadata(AAMD);
+
+  QStore->eraseFromParent();
+  PStore->eraseFromParent();
+  
+  return true;
+}
+
+static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI) {
+  // The intention here is to find diamonds or triangles (see below) where each
+  // conditional block contains a store to the same address. Both of these
+  // stores are conditional, so they can't be unconditionally sunk. But it may
+  // be profitable to speculatively sink the stores into one merged store at the
+  // end, and predicate the merged store on the union of the two conditions of
+  // PBI and QBI.
+  //
+  // This can reduce the number of stores executed if both of the conditions are
+  // true, and can allow the blocks to become small enough to be if-converted.
+  // This optimization will also chain, so that ladders of test-and-set
+  // sequences can be if-converted away.
+  //
+  // We only deal with simple diamonds or triangles:
+  //
+  //     PBI       or      PBI        or a combination of the two
+  //    /   \               | \
+  //   PTB  PFB             |  PFB
+  //    \   /               | /
+  //     QBI                QBI
+  //    /  \                | \
+  //   QTB  QFB             |  QFB
+  //    \  /                | /
+  //    PostBB            PostBB
+  //
+  // We model triangles as a type of diamond with a nullptr "true" block.
+  // Triangles are canonicalized so that the fallthrough edge is represented by
+  // a true condition, as in the diagram above.
+  //  
+  BasicBlock *PTB = PBI->getSuccessor(0);
+  BasicBlock *PFB = PBI->getSuccessor(1);
+  BasicBlock *QTB = QBI->getSuccessor(0);
+  BasicBlock *QFB = QBI->getSuccessor(1);
+  BasicBlock *PostBB = QFB->getSingleSuccessor();
+
+  bool InvertPCond = false, InvertQCond = false;
+  // Canonicalize fallthroughs to the true branches.
+  if (PFB == QBI->getParent()) {
+    std::swap(PFB, PTB);
+    InvertPCond = true;
+  }
+  if (QFB == PostBB) {
+    std::swap(QFB, QTB);
+    InvertQCond = true;
+  }
+
+  // From this point on we can assume PTB or QTB may be fallthroughs but PFB
+  // and QFB may not. Model fallthroughs as a nullptr block.
+  if (PTB == QBI->getParent())
+    PTB = nullptr;
+  if (QTB == PostBB)
+    QTB = nullptr;
+
+  // Legality bailouts. We must have at least the non-fallthrough blocks and
+  // the post-dominating block, and the non-fallthroughs must only have one
+  // predecessor.
+  auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
+    return BB->getSinglePredecessor() == P &&
+           BB->getSingleSuccessor() == S;
+  };
+  if (!PostBB ||
+      !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
+      !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
+    return false;
+  if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
+      (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
+    return false;
+  if (PostBB->getNumUses() != 2 || QBI->getParent()->getNumUses() != 2)
+    return false;
+
+  // OK, this is a sequence of two diamonds or triangles.
+  // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
+  SmallPtrSet<Value *,4> PStoreAddresses, QStoreAddresses;
+  for (auto *BB : {PTB, PFB}) {
+    if (!BB)
+      continue;
+    for (auto &I : *BB)
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+        PStoreAddresses.insert(SI->getPointerOperand());
+  }
+  for (auto *BB : {QTB, QFB}) {
+    if (!BB)
+      continue;
+    for (auto &I : *BB)
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
+        QStoreAddresses.insert(SI->getPointerOperand());
+  }
+  
+  set_intersect(PStoreAddresses, QStoreAddresses);
+  // set_intersect mutates PStoreAddresses in place. Rename it here to make it
+  // clear what it contains.
+  auto &CommonAddresses = PStoreAddresses;
+
+  bool Changed = false;
+  for (auto *Address : CommonAddresses)
+    Changed |= mergeConditionalStoreToAddress(
+        PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond);
+  return Changed;
+}
+
 /// If we have a conditional branch as a predecessor of another block,
 /// this function tries to simplify it.  We know
 /// that PBI and BI are both conditional branches, and BI is in one of the
 /// successor blocks of PBI - PBI branches to BI.
-static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
+                                           const DataLayout &DL) {
   assert(PBI->isConditional() && BI->isConditional());
   BasicBlock *BB = BI->getParent();
 
@@ -2360,10 +2679,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     // simplifycfg will thread the block.
     if (BlockIsSimpleEnoughToThreadThrough(BB)) {
       pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
-      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
-                                       std::distance(PB, PE),
-                                       BI->getCondition()->getName() + ".pr",
-                                       BB->begin());
+      PHINode *NewPN = PHINode::Create(
+          Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
+          BI->getCondition()->getName() + ".pr", &BB->front());
       // Okay, we're going to insert the PHI node.  Since PBI is not the only
       // predecessor, compute the PHI'd conditional value for all of the preds.
       // Any predecessor where the condition is not computable we keep symbolic.
@@ -2386,6 +2704,29 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
     }
   }
 
+  if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+    if (CE->canTrap())
+      return false;
+
+  // If BI is reached from the true path of PBI and PBI's condition implies
+  // BI's condition, we know the direction of the BI branch.
+  if (PBI->getSuccessor(0) == BI->getParent() &&
+      isImpliedCondition(PBI->getCondition(), BI->getCondition(), DL) &&
+      PBI->getSuccessor(0) != PBI->getSuccessor(1) &&
+      BB->getSinglePredecessor()) {
+    // Turn this into a branch on constant.
+    auto *OldCond = BI->getCondition();
+    BI->setCondition(ConstantInt::getTrue(BB->getContext()));
+    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+    return true;  // Nuke the branch on constant.
+  }
+
+  // If both branches are conditional and both contain stores to the same
+  // address, remove the stores from the conditionals and create a conditional
+  // merged store at the end.
+  if (MergeCondStores && mergeConditionalStores(PBI, BI))
+    return true;
+
   // If this is a conditional branch in an empty block, and if any
   // predecessors are a conditional branch to one of our destinations,
   // fold the conditions into logical ops and one cond br.
@@ -2396,11 +2737,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   if (&*BBI != BI)
     return false;
 
-
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
-    if (CE->canTrap())
-      return false;
-
   int PBIOp, BIOp;
   if (PBI->getSuccessor(0) == BI->getSuccessor(0))
     PBIOp = BIOp = 0;
@@ -2565,15 +2901,15 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
   BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
 
   // Then remove the rest.
-  for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
-    BasicBlock *Succ = OldTerm->getSuccessor(I);
+  for (BasicBlock *Succ : OldTerm->successors()) {
     // Make sure only to keep exactly one copy of each edge.
     if (Succ == KeepEdge1)
       KeepEdge1 = nullptr;
     else if (Succ == KeepEdge2)
       KeepEdge2 = nullptr;
     else
-      Succ->removePredecessor(OldTerm->getParent());
+      Succ->removePredecessor(OldTerm->getParent(),
+                              /*DontDeleteUselessPHIs=*/true);
   }
 
   IRBuilder<> Builder(OldTerm);
@@ -2827,7 +3163,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
   Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
 
   // If Extra was used, we require at least two switch values to do the
-  // transformation.  A switch with one value is just an cond branch.
+  // transformation.  A switch with one value is just a conditional branch.
   if (ExtraCase && Values.size() < 2) return false;
 
   // TODO: Preserve branch weight metadata, similarly to how
@@ -2847,7 +3183,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
   // then we evaluate them with an explicit branch first.  Split the block
   // right before the condbr to handle it.
   if (ExtraCase) {
-    BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+    BasicBlock *NewBB =
+        BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
     // Remove the uncond branch added to the old block.
     TerminatorInst *OldTI = BB->getTerminator();
     Builder.SetInsertPoint(OldTI);
@@ -2911,34 +3248,15 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
     return false;
 
   // Check that there are no other instructions except for debug intrinsics.
-  BasicBlock::iterator I = LPInst, E = RI;
+  BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
   while (++I != E)
     if (!isa<DbgInfoIntrinsic>(I))
       return false;
 
   // Turn all invokes that unwind here into calls and delete the basic block.
   for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
-    InvokeInst *II = cast<InvokeInst>((*PI++)->getTerminator());
-    SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-    // Insert a call instruction before the invoke.
-    CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
-    Call->takeName(II);
-    Call->setCallingConv(II->getCallingConv());
-    Call->setAttributes(II->getAttributes());
-    Call->setDebugLoc(II->getDebugLoc());
-
-    // Anything that used the value produced by the invoke instruction now uses
-    // the value produced by the call instruction.  Note that we do this even
-    // for void functions and calls with no uses so that the callgraph edge is
-    // updated.
-    II->replaceAllUsesWith(Call);
-    BB->removePredecessor(II->getParent());
-
-    // Insert a branch to the normal destination right before the invoke.
-    BranchInst::Create(II->getNormalDest(), II);
-
-    // Finally, delete the invoke instruction!
-    II->eraseFromParent();
+    BasicBlock *Pred = *PI++;
+    removeUnwindEdge(Pred);
   }
 
   // The landingpad is now unreachable.  Zap it.
@@ -2946,6 +3264,124 @@ bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
   return true;
 }
 
+bool SimplifyCFGOpt::SimplifyCleanupReturn(CleanupReturnInst *RI) {
+  // If this is a trivial cleanup pad that executes no instructions, it can be
+  // eliminated.  If the cleanup pad continues to the caller, any predecessor
+  // that is an EH pad will be updated to continue to the caller and any
+  // predecessor that terminates with an invoke instruction will have its invoke
+  // instruction converted to a call instruction.  If the cleanup pad being
+  // simplified does not continue to the caller, each predecessor will be
+  // updated to continue to the unwind destination of the cleanup pad being
+  // simplified.
+  BasicBlock *BB = RI->getParent();
+  CleanupPadInst *CPInst = RI->getCleanupPad();
+  if (CPInst->getParent() != BB)
+    // This isn't an empty cleanup.
+    return false;
+
+  // Check that there are no other instructions except for debug intrinsics.
+  BasicBlock::iterator I = CPInst->getIterator(), E = RI->getIterator();
+  while (++I != E)
+    if (!isa<DbgInfoIntrinsic>(I))
+      return false;
+
+  // If the cleanup return we are simplifying unwinds to the caller, this will
+  // set UnwindDest to nullptr.
+  BasicBlock *UnwindDest = RI->getUnwindDest();
+  Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
+
+  // We're about to remove BB from the control flow.  Before we do, sink any
+  // PHINodes into the unwind destination.  Doing this before changing the
+  // control flow avoids some potentially slow checks, since we can currently
+  // be certain that UnwindDest and BB have no common predecessors (since they
+  // are both EH pads).
+  if (UnwindDest) {
+    // First, go through the PHI nodes in UnwindDest and update any nodes that
+    // reference the block we are removing
+    for (BasicBlock::iterator I = UnwindDest->begin(),
+                              IE = DestEHPad->getIterator();
+         I != IE; ++I) {
+      PHINode *DestPN = cast<PHINode>(I);
+
+      int Idx = DestPN->getBasicBlockIndex(BB);
+      // Since BB unwinds to UnwindDest, it has to be in the PHI node.
+      assert(Idx != -1);
+      // This PHI node has an incoming value that corresponds to a control
+      // path through the cleanup pad we are removing.  If the incoming
+      // value is in the cleanup pad, it must be a PHINode (because we
+      // verified above that the block is otherwise empty).  Otherwise, the
+      // value is either a constant or a value that dominates the cleanup
+      // pad being removed.
+      //
+      // Because BB and UnwindDest are both EH pads, all of their
+      // predecessors must unwind to these blocks, and since no instruction
+      // can have multiple unwind destinations, there will be no overlap in
+      // incoming blocks between SrcPN and DestPN.
+      Value *SrcVal = DestPN->getIncomingValue(Idx);
+      PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
+
+      // Remove the entry for the block we are deleting.
+      DestPN->removeIncomingValue(Idx, false);
+
+      if (SrcPN && SrcPN->getParent() == BB) {
+        // If the incoming value was a PHI node in the cleanup pad we are
+        // removing, we need to merge that PHI node's incoming values into
+        // DestPN.
+        for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
+              SrcIdx != SrcE; ++SrcIdx) {
+          DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
+                              SrcPN->getIncomingBlock(SrcIdx));
+        }
+      } else {
+        // Otherwise, the incoming value came from above BB and
+        // so we can just reuse it.  We must associate all of BB's
+        // predecessors with this value.
+        for (auto *pred : predecessors(BB)) {
+          DestPN->addIncoming(SrcVal, pred);
+        }
+      }
+    }
+
+    // Sink any remaining PHI nodes directly into UnwindDest.
+    Instruction *InsertPt = DestEHPad;
+    for (BasicBlock::iterator I = BB->begin(),
+                              IE = BB->getFirstNonPHI()->getIterator();
+         I != IE;) {
+      // The iterator must be incremented here because the instructions are
+      // being moved to another block.
+      PHINode *PN = cast<PHINode>(I++);
+      if (PN->use_empty())
+        // If the PHI node has no uses, just leave it.  It will be erased
+        // when we erase BB below.
+        continue;
+
+      // Otherwise, sink this PHI node into UnwindDest.
+      // Any predecessors to UnwindDest which are not already represented
+      // must be back edges which inherit the value from the path through
+      // BB.  In this case, the PHI value must reference itself.
+      for (auto *pred : predecessors(UnwindDest))
+        if (pred != BB)
+          PN->addIncoming(PN, pred);
+      PN->moveBefore(InsertPt);
+    }
+  }
+
+  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
+    // The iterator must be updated here because we are removing this pred.
+    BasicBlock *PredBB = *PI++;
+    if (UnwindDest == nullptr) {
+      removeUnwindEdge(PredBB);
+    } else {
+      TerminatorInst *TI = PredBB->getTerminator();
+      TI->replaceUsesOfWith(BB, UnwindDest);
+    }
+  }
+
+  // The cleanup pad is now unreachable.  Zap it.
+  BB->eraseFromParent();
+  return true;
+}
+
 bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
   BasicBlock *BB = RI->getParent();
   if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
@@ -3003,8 +3439,8 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
 
   // If there are any instructions immediately before the unreachable that can
   // be removed, do so.
-  while (UI != BB->begin()) {
-    BasicBlock::iterator BBI = UI;
+  while (UI->getIterator() != BB->begin()) {
+    BasicBlock::iterator BBI = UI->getIterator();
     --BBI;
     // Do not delete instructions that can have side effects which might cause
     // the unreachable to not be reachable; specifically, calls and volatile
@@ -3075,26 +3511,18 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
           --i; --e;
           Changed = true;
         }
-    } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
-      if (II->getUnwindDest() == BB) {
-        // Convert the invoke to a call instruction.  This would be a good
-        // place to note that the call does not throw though.
-        BranchInst *BI = Builder.CreateBr(II->getNormalDest());
-        II->removeFromParent();   // Take out of symbol table
-
-        // Insert the call now...
-        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
-        Builder.SetInsertPoint(BI);
-        CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                          Args, II->getName());
-        CI->setCallingConv(II->getCallingConv());
-        CI->setAttributes(II->getAttributes());
-        // If the invoke produced a value, the call does now instead.
-        II->replaceAllUsesWith(CI);
-        delete II;
-        Changed = true;
-      }
+    } else if ((isa<InvokeInst>(TI) &&
+                cast<InvokeInst>(TI)->getUnwindDest() == BB) ||
+               isa<CatchSwitchInst>(TI)) {
+      removeUnwindEdge(TI->getParent());
+      Changed = true;
+    } else if (isa<CleanupReturnInst>(TI)) {
+      new UnreachableInst(TI->getContext(), TI);
+      TI->eraseFromParent();
+      Changed = true;
     }
+    // TODO: We can remove a catchswitch if all it's catchpads end in
+    // unreachable.
   }
 
   // If this block is now dead, remove it.
@@ -3249,6 +3677,29 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
     }
   }
 
+  // If we can prove that the cases must cover all possible values, the 
+  // default destination becomes dead and we can remove it.  If we know some 
+  // of the bits in the value, we can use that to more precisely compute the
+  // number of possible unique case values.
+  bool HasDefault =
+    !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+  const unsigned NumUnknownBits = Bits - 
+    (KnownZero.Or(KnownOne)).countPopulation();
+  assert(NumUnknownBits <= Bits);
+  if (HasDefault && DeadCases.empty() &&
+      NumUnknownBits < 64 /* avoid overflow */ &&  
+      SI->getNumCases() == (1ULL << NumUnknownBits)) {
+    DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
+    BasicBlock *NewDefault = SplitBlockPredecessors(SI->getDefaultDest(),
+                                                    SI->getParent(), "");
+    SI->setDefaultDest(&*NewDefault);
+    SplitBlock(&*NewDefault, &NewDefault->front());
+    auto *OldTI = NewDefault->getTerminator();
+    new UnreachableInst(SI->getContext(), OldTI);
+    EraseTerminatorInstAndDCECond(OldTI);
+    return true;
+  }
+
   SmallVector<uint64_t, 8> Weights;
   bool HasWeight = HasBranchWeights(SI);
   if (HasWeight) {
@@ -3439,7 +3890,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
     } else if (isa<DbgInfoIntrinsic>(I)) {
       // Skip debug intrinsic.
       continue;
-    } else if (Constant *C = ConstantFold(I, DL, ConstantPool)) {
+    } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) {
       // Instruction is side-effect free and constant.
 
       // If the instruction has uses outside this block or a phi node slot for
@@ -3456,7 +3907,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
         return false;
       }
 
-      ConstantPool.insert(std::make_pair(I, C));
+      ConstantPool.insert(std::make_pair(&*I, C));
     } else {
       break;
     }
@@ -3664,7 +4115,7 @@ namespace {
     /// Return true if a table with TableSize elements of
     /// type ElementType would fit in a target-legal register.
     static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
-                                   const Type *ElementType);
+                                   Type *ElementType);
 
   private:
     // Depending on the contents of the table, it can be represented in
@@ -3880,8 +4331,8 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
 
 bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
                                            uint64_t TableSize,
-                                           const Type *ElementType) {
-  const IntegerType *IT = dyn_cast<IntegerType>(ElementType);
+                                           Type *ElementType) {
+  auto *IT = dyn_cast<IntegerType>(ElementType);
   if (!IT)
     return false;
   // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
@@ -3992,7 +4443,7 @@ static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock,
     assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
            "Expect true or false as compare result.");
   }
- 
+  
   // Check if the branch instruction dominates the phi node. It's a simple
   // dominance check, but sufficient for our needs.
   // Although this check is invariant in the calling loops, it's better to do it
@@ -4422,7 +4873,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
     return true;
 
   // If the Terminator is the only non-phi instruction, simplify the block.
-  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
       TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
@@ -4457,6 +4908,16 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   return false;
 }
 
+static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
+  BasicBlock *PredPred = nullptr;
+  for (auto *P : predecessors(BB)) {
+    BasicBlock *PPred = P->getSinglePredecessor();
+    if (!PPred || (PredPred && PredPred != PPred))
+      return nullptr;
+    PredPred = PPred;
+  }
+  return PredPred;
+}
 
 bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   BasicBlock *BB = BI->getParent();
@@ -4537,9 +4998,17 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
     if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
       if (PBI != BI && PBI->isConditional())
-        if (SimplifyCondBranchToCondBranch(PBI, BI))
+        if (SimplifyCondBranchToCondBranch(PBI, BI, DL))
           return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
+  // Look for diamond patterns.
+  if (MergeCondStores)
+    if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
+      if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
+        if (PBI != BI && PBI->isConditional())
+          if (mergeConditionalStores(PBI, BI))
+            return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+  
   return false;
 }
 
@@ -4663,6 +5132,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
     if (SimplifyReturn(RI, Builder)) return true;
   } else if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator())) {
     if (SimplifyResume(RI, Builder)) return true;
+  } else if (CleanupReturnInst *RI =
+               dyn_cast<CleanupReturnInst>(BB->getTerminator())) {
+    if (SimplifyCleanupReturn(RI)) return true;
   } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
     if (SimplifySwitch(SI, Builder)) return true;
   } else if (UnreachableInst *UI =
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index ab30aa17c76b..ddd8775a8431 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -47,15 +47,16 @@ namespace {
     Loop             *L;
     LoopInfo         *LI;
     ScalarEvolution  *SE;
+    DominatorTree    *DT;
 
     SmallVectorImpl<WeakVH> &DeadInsts;
 
     bool Changed;
 
   public:
-    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LoopInfo *LI,
-                   SmallVectorImpl<WeakVH> &Dead)
-        : L(Loop), LI(LI), SE(SE), DeadInsts(Dead), Changed(false) {
+    SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT,
+                   LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead)
+        : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {
       assert(LI && "IV simplification requires LoopInfo");
     }
 
@@ -63,11 +64,13 @@ namespace {
 
     /// Iteratively perform simplification on a worklist of users of the
     /// specified induction variable. This is the top-level driver that applies
-    /// all simplicitions to users of an IV.
+    /// all simplifications to users of an IV.
     void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
 
     Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
 
+    bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
@@ -166,19 +169,65 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
   S = SE->getSCEVAtScope(S, ICmpLoop);
   X = SE->getSCEVAtScope(X, ICmpLoop);
 
+  ICmpInst::Predicate InvariantPredicate;
+  const SCEV *InvariantLHS, *InvariantRHS;
+
   // If the condition is always true or always false, replace it with
   // a constant value.
-  if (SE->isKnownPredicate(Pred, S, X))
+  if (SE->isKnownPredicate(Pred, S, X)) {
     ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
-  else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+    DeadInsts.emplace_back(ICmp);
+    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+  } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {
     ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
-  else
+    DeadInsts.emplace_back(ICmp);
+    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+  } else if (isa<PHINode>(IVOperand) &&
+             SE->isLoopInvariantPredicate(Pred, S, X, ICmpLoop,
+                                          InvariantPredicate, InvariantLHS,
+                                          InvariantRHS)) {
+
+    // Rewrite the comparison to a loop invariant comparison if it can be done
+    // cheaply, where cheaply means "we don't need to emit any new
+    // instructions".
+
+    Value *NewLHS = nullptr, *NewRHS = nullptr;
+
+    if (S == InvariantLHS || X == InvariantLHS)
+      NewLHS =
+          ICmp->getOperand(S == InvariantLHS ? IVOperIdx : (1 - IVOperIdx));
+
+    if (S == InvariantRHS || X == InvariantRHS)
+      NewRHS =
+          ICmp->getOperand(S == InvariantRHS ? IVOperIdx : (1 - IVOperIdx));
+
+    for (Value *Incoming : cast<PHINode>(IVOperand)->incoming_values()) {
+      if (NewLHS && NewRHS)
+        break;
+
+      const SCEV *IncomingS = SE->getSCEV(Incoming);
+
+      if (!NewLHS && IncomingS == InvariantLHS)
+        NewLHS = Incoming;
+      if (!NewRHS && IncomingS == InvariantRHS)
+        NewRHS = Incoming;
+    }
+
+    if (!NewLHS || !NewRHS)
+      // We could not find an existing value to replace either LHS or RHS.
+      // Generating new instructions has subtler tradeoffs, so avoid doing that
+      // for now.
+      return;
+
+    DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');
+    ICmp->setPredicate(InvariantPredicate);
+    ICmp->setOperand(0, NewLHS);
+    ICmp->setOperand(1, NewRHS);
+  } else
     return;
 
-  DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
   ++NumElimCmp;
   Changed = true;
-  DeadInsts.emplace_back(ICmp);
 }
 
 /// SimplifyIVUsers helper for eliminating useless
@@ -207,8 +256,7 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
     Rem->replaceAllUsesWith(Rem->getOperand(0));
   else {
     // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
-    const SCEV *LessOne =
-      SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+    const SCEV *LessOne = SE->getMinusSCEV(S, SE->getOne(S->getType()));
     if (IsSigned && !SE->isKnownNonNegative(LessOne))
       return;
 
@@ -232,9 +280,9 @@ void SimplifyIndvar::eliminateIVRemainder(BinaryOperator *Rem,
   DeadInsts.emplace_back(Rem);
 }
 
-/// Eliminate an operation that consumes a simple IV and has
-/// no observable side-effect given the range of IV values.
-/// IVOperand is guaranteed SCEVable, but UseInst may not be.
+/// Eliminate an operation that consumes a simple IV and has no observable
+/// side-effect given the range of IV values.  IVOperand is guaranteed SCEVable,
+/// but UseInst may not be.
 bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
                                      Instruction *IVOperand) {
   if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
@@ -249,12 +297,45 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,
     }
   }
 
-  // Eliminate any operation that SCEV can prove is an identity function.
+  if (eliminateIdentitySCEV(UseInst, IVOperand))
+    return true;
+
+  return false;
+}
+
+/// Eliminate any operation that SCEV can prove is an identity function.
+bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
+                                           Instruction *IVOperand) {
   if (!SE->isSCEVable(UseInst->getType()) ||
       (UseInst->getType() != IVOperand->getType()) ||
       (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
     return false;
 
+  // getSCEV(X) == getSCEV(Y) does not guarantee that X and Y are related in the
+  // dominator tree, even if X is an operand to Y.  For instance, in
+  //
+  //     %iv = phi i32 {0,+,1}
+  //     br %cond, label %left, label %merge
+  //
+  //   left:
+  //     %X = add i32 %iv, 0
+  //     br label %merge
+  //
+  //   merge:
+  //     %M = phi (%X, %iv)
+  //
+  // getSCEV(%M) == getSCEV(%X) == {0,+,1}, but %X does not dominate %M, and
+  // %M.replaceAllUsesWith(%X) would be incorrect.
+
+  if (isa<PHINode>(UseInst))
+    // If UseInst is not a PHI node then we know that IVOperand dominates
+    // UseInst directly from the legality of SSA.
+    if (!DT || !DT->dominates(IVOperand, UseInst))
+      return false;
+
+  if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))
+    return false;
+
   DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
 
   UseInst->replaceAllUsesWith(IVOperand);
@@ -436,8 +517,8 @@ static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
 /// This algorithm does not require IVUsers analysis. Instead, it simplifies
 /// instructions in-place during analysis. Rather than rewriting induction
 /// variables bottom-up from their users, it transforms a chain of IVUsers
-/// top-down, updating the IR only when it encouters a clear optimization
-/// opportunitiy.
+/// top-down, updating the IR only when it encounters a clear optimization
+/// opportunity.
 ///
 /// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
 ///
@@ -513,22 +594,21 @@ void IVVisitor::anchor() { }
 
 /// Simplify instructions that use this induction variable
 /// by using ScalarEvolution to analyze the IV's recurrence.
-bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, LPPassManager *LPM,
-                       SmallVectorImpl<WeakVH> &Dead, IVVisitor *V)
-{
-  LoopInfo *LI = &LPM->getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, LI, Dead);
+bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,
+                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead,
+                       IVVisitor *V) {
+  SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);
   SIV.simplifyUsers(CurrIV, V);
   return SIV.hasChanged();
 }
 
 /// Simplify users of induction variables within this
 /// loop. This does not actually change or add IVs.
-bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, LPPassManager *LPM,
-                     SmallVectorImpl<WeakVH> &Dead) {
+bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
+                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) {
   bool Changed = false;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
-    Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, LPM, Dead);
+    Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead);
   }
   return Changed;
 }
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index c499c87b1f0b..d5377f9a4c1f 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -20,12 +20,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
@@ -64,7 +64,7 @@ namespace {
           // Here be subtlety: the iterator must be incremented before the loop
           // body (not sure why), so a range-for loop won't work here.
           for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
-            Instruction *I = BI++;
+            Instruction *I = &*BI++;
             // The first time through the loop ToSimplify is empty and we try to
             // simplify all instructions.  On later iterations ToSimplify is not
             // empty and we only bother simplifying instructions that are in it.
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6bbf8287e223..81dea6d1b9ae 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -30,8 +31,8 @@
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -52,16 +53,8 @@ static cl::opt<bool>
 //===----------------------------------------------------------------------===//
 
 static bool ignoreCallingConv(LibFunc::Func Func) {
-  switch (Func) {
-  case LibFunc::abs:
-  case LibFunc::labs:
-  case LibFunc::llabs:
-  case LibFunc::strlen:
-    return true;
-  default:
-    return false;
-  }
-  llvm_unreachable("All cases should be covered in the switch.");
+  return Func == LibFunc::abs || Func == LibFunc::labs ||
+         Func == LibFunc::llabs || Func == LibFunc::strlen;
 }
 
 /// isOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
@@ -93,16 +86,13 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
 }
 
 static bool callHasFloatingPointArgument(const CallInst *CI) {
-  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
-       it != e; ++it) {
-    if ((*it)->getType()->isFloatingPointTy())
-      return true;
-  }
-  return false;
+  return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) {
+    return OI->getType()->isFloatingPointTy();
+  });
 }
 
 /// \brief Check whether the overloaded unary floating point function
-/// corresponing to \a Ty is available.
+/// corresponding to \a Ty is available.
 static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
                             LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
                             LibFunc::Func LongDoubleFn) {
@@ -116,6 +106,23 @@ static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
   }
 }
 
+/// \brief Check whether we can use unsafe floating point math for
+/// the function passed as input.
+static bool canUseUnsafeFPMath(Function *F) {
+
+  // FIXME: For finer-grain optimization, we need intrinsics to have the same
+  // fast-math flag decorations that are applied to FP instructions. For now,
+  // we have to rely on the function-level unsafe-fp-math attribute to do this
+  // optimization because there's no other way to express that the call can be
+  // relaxed.
+  if (F->hasFnAttribute("unsafe-fp-math")) {
+    Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+    if (Attr.getValueAsString() == "true")
+      return true;
+  }
+  return false;
+}
+
 /// \brief Returns whether \p F matches the signature expected for the
 /// string/memory copying library function \p Func.
 /// Acceptable functions are st[rp][n]?cpy, memove, memcpy, and memset.
@@ -467,9 +474,6 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  // Verify the "stpcpy" function prototype.
-  FunctionType *FT = Callee->getFunctionType();
-
   if (!checkStringCopyLibFuncSignature(Callee, LibFunc::stpcpy))
     return nullptr;
 
@@ -484,7 +488,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
   if (Len == 0)
     return nullptr;
 
-  Type *PT = FT->getParamType(0);
+  Type *PT = Callee->getFunctionType()->getParamType(0);
   Value *LenV = ConstantInt::get(DL.getIntPtrType(PT), Len);
   Value *DstEnd =
       B.CreateGEP(B.getInt8Ty(), Dst, ConstantInt::get(DL.getIntPtrType(PT), Len - 1));
@@ -497,8 +501,6 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
-
   if (!checkStringCopyLibFuncSignature(Callee, LibFunc::strncpy))
     return nullptr;
 
@@ -531,7 +533,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {
   if (Len > SrcLen + 1)
     return nullptr;
 
-  Type *PT = FT->getParamType(0);
+  Type *PT = Callee->getFunctionType()->getParamType(0);
   // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
   B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1);
 
@@ -862,6 +864,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
     return B.CreateSub(LHSV, RHSV, "chardiff");
   }
 
+  // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+  if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+    IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+    unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+    if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+        getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+      Type *LHSPtrTy =
+          IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+      Type *RHSPtrTy =
+          IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+      Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+      Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+      return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+    }
+  }
+
   // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
   StringRef LHSStr, RHSStr;
   if (getConstantStringInfo(LHS, LHSStr) &&
@@ -972,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
 
   // floor((double)floatval) -> (double)floorf(floatval)
   if (Callee->isIntrinsic()) {
-    Module *M = CI->getParent()->getParent()->getParent();
+    Module *M = CI->getModule();
     Intrinsic::ID IID = Callee->getIntrinsicID();
     Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
     V = B.CreateCall(F, V);
@@ -1015,9 +1038,9 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
 Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   Value *Ret = nullptr;
-  if (UnsafeFPShrink && Callee->getName() == "cos" && TLI->has(LibFunc::cosf)) {
+  StringRef Name = Callee->getName();
+  if (UnsafeFPShrink && Name == "cos" && hasFloatVersion(Name))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
-  }
 
   FunctionType *FT = Callee->getFunctionType();
   // Just make sure this has 1 argument of FP type, which matches the
@@ -1035,13 +1058,37 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) {
   return Ret;
 }
 
+static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) {
+  // Multiplications calculated using Addition Chains.
+  // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html
+
+  assert(Exp != 0 && "Incorrect exponent 0 not handled");
+
+  if (InnerChain[Exp])
+    return InnerChain[Exp];
+
+  static const unsigned AddChain[33][2] = {
+      {0, 0}, // Unused.
+      {0, 0}, // Unused (base case = pow1).
+      {1, 1}, // Unused (pre-computed).
+      {1, 2},  {2, 2},   {2, 3},  {3, 3},   {2, 5},  {4, 4},
+      {1, 8},  {5, 5},   {1, 10}, {6, 6},   {4, 9},  {7, 7},
+      {3, 12}, {8, 8},   {8, 9},  {2, 16},  {1, 18}, {10, 10},
+      {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13},
+      {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16},
+  };
+
+  InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B),
+                                 getPow(InnerChain, AddChain[Exp][1], B));
+  return InnerChain[Exp];
+}
+
 Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-
   Value *Ret = nullptr;
-  if (UnsafeFPShrink && Callee->getName() == "pow" && TLI->has(LibFunc::powf)) {
+  StringRef Name = Callee->getName();
+  if (UnsafeFPShrink && Name == "pow" && hasFloatVersion(Name))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
-  }
 
   FunctionType *FT = Callee->getFunctionType();
   // Just make sure this has 2 arguments of the same FP type, which match the
@@ -1060,7 +1107,8 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     if (Op1C->isExactlyValue(2.0) &&
         hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
                         LibFunc::exp2l))
-      return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+      return EmitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
+                                  Callee->getAttributes());
     // pow(10.0, x) -> exp10(x)
     if (Op1C->isExactlyValue(10.0) &&
         hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
@@ -1069,6 +1117,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
                                   Callee->getAttributes());
   }
 
+  bool unsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
+
+  // pow(exp(x), y) -> exp(x*y)
+  // pow(exp2(x), y) -> exp2(x * y)
+  // We enable these only under fast-math. Besides rounding
+  // differences the transformation changes overflow and
+  // underflow behavior quite dramatically.
+  // Example: x = 1000, y = 0.001.
+  // pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
+  if (unsafeFPMath) {
+    if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+      IRBuilder<>::FastMathFlagGuard Guard(B);
+      FastMathFlags FMF;
+      FMF.setUnsafeAlgebra();
+      B.SetFastMathFlags(FMF);
+
+      LibFunc::Func Func;
+      Function *OpCCallee = OpC->getCalledFunction();
+      if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+          TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
+        return EmitUnaryFloatFnCall(
+            B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
+            OpCCallee->getName(), B, OpCCallee->getAttributes());
+    }
+  }
+
   ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
   if (!Op2C)
     return Ret;
@@ -1081,10 +1155,15 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
                       LibFunc::sqrtl) &&
       hasUnaryFloatFn(TLI, Op2->getType(), LibFunc::fabs, LibFunc::fabsf,
                       LibFunc::fabsl)) {
+
+    // In -ffast-math, pow(x, 0.5) -> sqrt(x).
+    if (unsafeFPMath)
+      return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
+                                  Callee->getAttributes());
+
     // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
     // This is faster than calling pow, and still handles negative zero
     // and negative infinity correctly.
-    // TODO: In fast-math mode, this could be just sqrt(x).
     // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
     Value *Inf = ConstantFP::getInfinity(CI->getType());
     Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
@@ -1102,18 +1181,42 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     return B.CreateFMul(Op1, Op1, "pow2");
   if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
     return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip");
+
+  // In -ffast-math, generate repeated fmul instead of generating pow(x, n).
+  if (unsafeFPMath) {
+    APFloat V = abs(Op2C->getValueAPF());
+    // We limit to a max of 7 fmul(s). Thus max exponent is 32.
+    // This transformation applies to integer exponents only.
+    if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan ||
+        !V.isInteger())
+      return nullptr;
+
+    // We will memoize intermediate products of the Addition Chain.
+    Value *InnerChain[33] = {nullptr};
+    InnerChain[1] = Op1;
+    InnerChain[2] = B.CreateFMul(Op1, Op1);
+
+    // We cannot readily convert a non-double type (like float) to a double.
+    // So we first convert V to something which could be converted to double.
+    bool ignored;
+    V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
+    // For negative exponents simply compute the reciprocal.
+    if (Op2C->isNegative())
+      FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul);
+    return FMul;
+  }
+
   return nullptr;
 }
 
 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   Function *Caller = CI->getParent()->getParent();
-
   Value *Ret = nullptr;
-  if (UnsafeFPShrink && Callee->getName() == "exp2" &&
-      TLI->has(LibFunc::exp2f)) {
+  StringRef Name = Callee->getName();
+  if (UnsafeFPShrink && Name == "exp2" && hasFloatVersion(Name))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
-  }
 
   FunctionType *FT = Callee->getFunctionType();
   // Just make sure this has 1 argument of FP type, which matches the
@@ -1162,11 +1265,10 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
 
 Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-
   Value *Ret = nullptr;
-  if (Callee->getName() == "fabs" && TLI->has(LibFunc::fabsf)) {
+  StringRef Name = Callee->getName();
+  if (Name == "fabs" && hasFloatVersion(Name))
     Ret = optimizeUnaryDoubleFP(CI, B, false);
-  }
 
   FunctionType *FT = Callee->getFunctionType();
   // Make sure this has 1 argument of FP type which matches the result type.
@@ -1184,6 +1286,105 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
   return Ret;
 }
 
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+  // If we can shrink the call to a float function rather than a double
+  // function, do that first.
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  if ((Name == "fmin" && hasFloatVersion(Name)) ||
+      (Name == "fmax" && hasFloatVersion(Name))) {
+    Value *Ret = optimizeBinaryDoubleFP(CI, B);
+    if (Ret)
+      return Ret;
+  }
+
+  // Make sure this has 2 arguments of FP type which match the result type.
+  FunctionType *FT = Callee->getFunctionType();
+  if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+      FT->getParamType(0) != FT->getParamType(1) ||
+      !FT->getParamType(0)->isFloatingPointTy())
+    return nullptr;
+
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  FastMathFlags FMF;
+  Function *F = CI->getParent()->getParent();
+  if (canUseUnsafeFPMath(F)) {
+    // Unsafe algebra sets all fast-math-flags to true.
+    FMF.setUnsafeAlgebra();
+  } else {
+    // At a minimum, no-nans-fp-math must be true.
+    Attribute Attr = F->getFnAttribute("no-nans-fp-math");
+    if (Attr.getValueAsString() != "true")
+      return nullptr;
+    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+    // "Ideally, fmax would be sensitive to the sign of zero, for example
+    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+    // might be impractical."
+    FMF.setNoSignedZeros();
+    FMF.setNoNaNs();
+  }
+  B.SetFastMathFlags(FMF);
+
+  // We have a relaxed floating-point environment. We can ignore NaN-handling
+  // and transform to a compare and select. We do not have to consider errno or
+  // exceptions, because fmin/fmax do not have those.
+  Value *Op0 = CI->getArgOperand(0);
+  Value *Op1 = CI->getArgOperand(1);
+  Value *Cmp = Callee->getName().startswith("fmin") ?
+    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+  return B.CreateSelect(Cmp, Op0, Op1);
+}
+
+Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
+  Function *Callee = CI->getCalledFunction();
+  Value *Ret = nullptr;
+  StringRef Name = Callee->getName();
+  if (UnsafeFPShrink && hasFloatVersion(Name))
+    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  FunctionType *FT = Callee->getFunctionType();
+
+  // Just make sure this has 1 argument of FP type, which matches the
+  // result type.
+  if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+      !FT->getParamType(0)->isFloatingPointTy())
+    return Ret;
+
+  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+    return Ret;
+  Value *Op1 = CI->getArgOperand(0);
+  auto *OpC = dyn_cast<CallInst>(Op1);
+  if (!OpC)
+    return Ret;
+
+  // log(pow(x,y)) -> y*log(x)
+  // This is only applicable to log, log2, log10.
+  if (Name != "log" && Name != "log2" && Name != "log10")
+    return Ret;
+
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  FastMathFlags FMF;
+  FMF.setUnsafeAlgebra();
+  B.SetFastMathFlags(FMF);
+
+  LibFunc::Func Func;
+  Function *F = OpC->getCalledFunction();
+  if (F && ((TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+      Func == LibFunc::pow) || F->getIntrinsicID() == Intrinsic::pow))
+    return B.CreateFMul(OpC->getArgOperand(1),
+      EmitUnaryFloatFnCall(OpC->getOperand(0), Callee->getName(), B,
+                           Callee->getAttributes()), "mul");
+
+  // log(exp2(y)) -> y*log(2)
+  if (F && Name == "log" && TLI->getLibFunc(F->getName(), Func) &&
+      TLI->has(Func) && Func == LibFunc::exp2)
+    return B.CreateFMul(
+        OpC->getArgOperand(0),
+        EmitUnaryFloatFnCall(ConstantFP::get(CI->getType(), 2.0),
+                             Callee->getName(), B, Callee->getAttributes()),
+        "logmul");
+  return Ret;
+}
+
 Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
   
@@ -1191,19 +1392,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
   if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
                                    Callee->getIntrinsicID() == Intrinsic::sqrt))
     Ret = optimizeUnaryDoubleFP(CI, B, true);
+  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+    return Ret;
 
-  // FIXME: For finer-grain optimization, we need intrinsics to have the same
-  // fast-math flag decorations that are applied to FP instructions. For now,
-  // we have to rely on the function-level unsafe-fp-math attribute to do this
-  // optimization because there's no other way to express that the sqrt can be
-  // reassociated.
-  Function *F = CI->getParent()->getParent();
-  if (F->hasFnAttribute("unsafe-fp-math")) {
-    // Check for unsafe-fp-math = true.
-    Attribute Attr = F->getFnAttribute("unsafe-fp-math");
-    if (Attr.getValueAsString() != "true")
-      return Ret;
-  }
   Value *Op = CI->getArgOperand(0);
   if (Instruction *I = dyn_cast<Instruction>(Op)) {
     if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
@@ -1238,8 +1429,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
         // and multiply.
         // FIXME: We're not checking the sqrt because it doesn't have
         // fast-math-flags (see earlier comment).
-        IRBuilder<true, ConstantFolder,
-          IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+        IRBuilder<>::FastMathFlagGuard Guard(B);
         B.SetFastMathFlags(I->getFastMathFlags());
         // If we found a repeated factor, hoist it out of the square root and
         // replace it with the fabs of that factor.
@@ -1262,6 +1452,40 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
   return Ret;
 }
 
+Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
+  Function *Callee = CI->getCalledFunction();
+  Value *Ret = nullptr;
+  StringRef Name = Callee->getName();
+  if (UnsafeFPShrink && Name == "tan" && hasFloatVersion(Name))
+    Ret = optimizeUnaryDoubleFP(CI, B, true);
+  FunctionType *FT = Callee->getFunctionType();
+
+  // Just make sure this has 1 argument of FP type, which matches the
+  // result type.
+  if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+      !FT->getParamType(0)->isFloatingPointTy())
+    return Ret;
+
+  if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+    return Ret;
+  Value *Op1 = CI->getArgOperand(0);
+  auto *OpC = dyn_cast<CallInst>(Op1);
+  if (!OpC)
+    return Ret;
+
+  // tan(atan(x)) -> x
+  // tanf(atanf(x)) -> x
+  // tanl(atanl(x)) -> x
+  LibFunc::Func Func;
+  Function *F = OpC->getCalledFunction();
+  if (F && TLI->getLibFunc(F->getName(), Func) && TLI->has(Func) &&
+      ((Func == LibFunc::atan && Callee->getName() == "tan") ||
+       (Func == LibFunc::atanf && Callee->getName() == "tanf") ||
+       (Func == LibFunc::atanl && Callee->getName() == "tanl")))
+    Ret = OpC->getArgOperand(0);
+  return Ret;
+}
+
 static bool isTrigLibCall(CallInst *CI);
 static void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
                              bool UseFloat, Value *&Sin, Value *&Cos,
@@ -1329,9 +1553,9 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
     return;
 
   Function *Callee = CI->getCalledFunction();
-  StringRef FuncName = Callee->getName();
   LibFunc::Func Func;
-  if (!TLI->getLibFunc(FuncName, Func) || !TLI->has(Func) || !isTrigLibCall(CI))
+  if (!Callee || !TLI->getLibFunc(Callee->getName(), Func) || !TLI->has(Func) ||
+      !isTrigLibCall(CI))
     return;
 
   if (IsFloat) {
@@ -1353,10 +1577,8 @@ LibCallSimplifier::classifyArgUse(Value *Val, BasicBlock *BB, bool IsFloat,
 
 void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
                                          Value *Res) {
-  for (SmallVectorImpl<CallInst *>::iterator I = Calls.begin(), E = Calls.end();
-       I != E; ++I) {
-    replaceAllUsesWith(*I, Res);
-  }
+  for (CallInst *C : Calls)
+    replaceAllUsesWith(C, Res);
 }
 
 void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
@@ -1387,8 +1609,7 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
     // If the argument is an instruction, it must dominate all uses so put our
     // sincos call there.
-    BasicBlock::iterator Loc = ArgInst;
-    B.SetInsertPoint(ArgInst->getParent(), ++Loc);
+    B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
   } else {
     // Otherwise (e.g. for a constant) the beginning of the function is as
     // good a place as any.
@@ -1413,15 +1634,16 @@ void insertSinCosCall(IRBuilder<> &B, Function *OrigCallee, Value *Arg,
 // Integer Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
+static bool checkIntUnaryReturnAndParam(Function *Callee) {
+  FunctionType *FT = Callee->getFunctionType();
+  return FT->getNumParams() == 1 && FT->getReturnType()->isIntegerTy(32) &&
+    FT->getParamType(0)->isIntegerTy();
+}
+
 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
   Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
-  // Just make sure this has 2 arguments of the same FP type, which match the
-  // result type.
-  if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy(32) ||
-      !FT->getParamType(0)->isIntegerTy())
+  if (!checkIntUnaryReturnAndParam(Callee))
     return nullptr;
-
   Value *Op = CI->getArgOperand(0);
 
   // Constant fold.
@@ -1436,7 +1658,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
   Type *ArgType = Op->getType();
   Value *F =
       Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
-  Value *V = B.CreateCall(F, {Op, B.getFalse()}, "cttz");
+  Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
   V = B.CreateIntCast(V, B.getInt32Ty(), false);
 
@@ -1461,11 +1683,7 @@ Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
-  // We require integer(i32)
-  if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
-      !FT->getParamType(0)->isIntegerTy(32))
+  if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
     return nullptr;
 
   // isdigit(c) -> (c-'0') <u 10
@@ -1476,11 +1694,7 @@ Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
-  // We require integer(i32)
-  if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
-      !FT->getParamType(0)->isIntegerTy(32))
+  if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
     return nullptr;
 
   // isascii(c) -> c <u 128
@@ -1490,11 +1704,7 @@ Value *LibCallSimplifier::optimizeIsAscii(CallInst *CI, IRBuilder<> &B) {
 }
 
 Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  FunctionType *FT = Callee->getFunctionType();
-  // We require i32(i32)
-  if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
-      !FT->getParamType(0)->isIntegerTy(32))
+  if (!checkIntUnaryReturnAndParam(CI->getCalledFunction()))
     return nullptr;
 
   // toascii(c) -> c & 0x7f
@@ -1529,10 +1739,7 @@ Value *LibCallSimplifier::optimizeErrorReporting(CallInst *CI, IRBuilder<> &B,
 }
 
 static bool isReportingError(Function *Callee, CallInst *CI, int StreamArg) {
-  if (!ColdErrorCalls)
-    return false;
-
-  if (!Callee || !Callee->isDeclaration())
+  if (!ColdErrorCalls || !Callee || !Callee->isDeclaration())
     return false;
 
   if (StreamArg < 0)
@@ -1968,16 +2175,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
   // Command-line parameter overrides function attribute.
   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
     UnsafeFPShrink = EnableUnsafeFPShrink;
-  else if (Callee->hasFnAttribute("unsafe-fp-math")) {
-    // FIXME: This is the same problem as described in optimizeSqrt().
-    // If calls gain access to IR-level FMF, then use that instead of a
-    // function attribute.
-
-    // Check for unsafe-fp-math = true.
-    Attribute Attr = Callee->getFnAttribute("unsafe-fp-math");
-    if (Attr.getValueAsString() == "true")
-      UnsafeFPShrink = true;
-  }
+  else if (canUseUnsafeFPMath(Callee))
+    UnsafeFPShrink = true;
 
   // First, check for intrinsics.
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
@@ -1990,6 +2189,8 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       return optimizeExp2(CI, Builder);
     case Intrinsic::fabs:
       return optimizeFabs(CI, Builder);
+    case Intrinsic::log:
+      return optimizeLog(CI, Builder);
     case Intrinsic::sqrt:
       return optimizeSqrt(CI, Builder);
     default:
@@ -2001,13 +2202,17 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
   if (Value *SimplifiedFortifiedCI = FortifiedSimplifier.optimizeCall(CI)) {
     // Try to further simplify the result.
     CallInst *SimplifiedCI = dyn_cast<CallInst>(SimplifiedFortifiedCI);
-    if (SimplifiedCI && SimplifiedCI->getCalledFunction())
-      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, Builder)) {
+    if (SimplifiedCI && SimplifiedCI->getCalledFunction()) {
+      // Use an IR Builder from SimplifiedCI if available instead of CI
+      // to guarantee we reach all uses we might replace later on.
+      IRBuilder<> TmpBuilder(SimplifiedCI);
+      if (Value *V = optimizeStringMemoryLibCall(SimplifiedCI, TmpBuilder)) {
         // If we were able to further simplify, remove the now redundant call.
         SimplifiedCI->replaceAllUsesWith(V);
         SimplifiedCI->eraseFromParent();
         return V;
       }
+    }
     return SimplifiedFortifiedCI;
   }
 
@@ -2068,8 +2273,18 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
       return optimizeFWrite(CI, Builder);
     case LibFunc::fputs:
       return optimizeFPuts(CI, Builder);
+    case LibFunc::log:
+    case LibFunc::log10:
+    case LibFunc::log1p:
+    case LibFunc::log2:
+    case LibFunc::logb:
+      return optimizeLog(CI, Builder);
     case LibFunc::puts:
       return optimizePuts(CI, Builder);
+    case LibFunc::tan:
+    case LibFunc::tanf:
+    case LibFunc::tanl:
+      return optimizeTan(CI, Builder);
     case LibFunc::perror:
       return optimizeErrorReporting(CI, Builder);
     case LibFunc::vfprintf:
@@ -2097,24 +2312,23 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
     case LibFunc::exp:
     case LibFunc::exp10:
     case LibFunc::expm1:
-    case LibFunc::log:
-    case LibFunc::log10:
-    case LibFunc::log1p:
-    case LibFunc::log2:
-    case LibFunc::logb:
     case LibFunc::sin:
     case LibFunc::sinh:
-    case LibFunc::tan:
     case LibFunc::tanh:
       if (UnsafeFPShrink && hasFloatVersion(FuncName))
         return optimizeUnaryDoubleFP(CI, Builder, true);
       return nullptr;
     case LibFunc::copysign:
-    case LibFunc::fmin:
-    case LibFunc::fmax:
       if (hasFloatVersion(FuncName))
         return optimizeBinaryDoubleFP(CI, Builder);
       return nullptr;
+    case LibFunc::fminf:
+    case LibFunc::fmin:
+    case LibFunc::fminl:
+    case LibFunc::fmaxf:
+    case LibFunc::fmax:
+    case LibFunc::fmaxl:
+      return optimizeFMinFMax(CI, Builder);
     default:
       return nullptr;
     }
@@ -2133,37 +2347,27 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
   Replacer(I, With);
 }
 
-/*static*/ void LibCallSimplifier::replaceAllUsesWithDefault(Instruction *I,
-                                                             Value *With) {
-  I->replaceAllUsesWith(With);
-  I->eraseFromParent();
-}
-
 // TODO:
 //   Additional cases that we need to add to this file:
 //
 // cbrt:
 //   * cbrt(expN(X))  -> expN(x/3)
 //   * cbrt(sqrt(x))  -> pow(x,1/6)
-//   * cbrt(sqrt(x))  -> pow(x,1/9)
+//   * cbrt(cbrt(x))  -> pow(x,1/9)
 //
 // exp, expf, expl:
 //   * exp(log(x))  -> x
 //
 // log, logf, logl:
 //   * log(exp(x))   -> x
-//   * log(x**y)     -> y*log(x)
 //   * log(exp(y))   -> y*log(e)
-//   * log(exp2(y))  -> y*log(2)
 //   * log(exp10(y)) -> y*log(10)
 //   * log(sqrt(x))  -> 0.5*log(x)
-//   * log(pow(x,y)) -> y*log(x)
 //
 // lround, lroundf, lroundl:
 //   * lround(cnst) -> cnst'
 //
 // pow, powf, powl:
-//   * pow(exp(x),y)  -> exp(x*y)
 //   * pow(sqrt(x),y) -> pow(x,y*0.5)
 //   * pow(pow(x,y),z)-> pow(x,y*z)
 //
@@ -2179,9 +2383,6 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
 //   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
 //   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
 //
-// tan, tanf, tanl:
-//   * tan(atan(x)) -> x
-//
 // trunc, truncf, truncl:
 //   * trunc(cnst) -> cnst'
 //
diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp
new file mode 100644
index 000000000000..ad6b782caf8b
--- /dev/null
+++ b/lib/Transforms/Utils/SplitModule.cpp
@@ -0,0 +1,85 @@
+//===- SplitModule.cpp - Split a module into partitions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function llvm::SplitModule, which splits a module
+// into multiple linkable partitions. It can be used to implement parallel code
+// generation for link-time optimization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/SplitModule.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+using namespace llvm;
+
+static void externalize(GlobalValue *GV) {
+  if (GV->hasLocalLinkage()) {
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    GV->setVisibility(GlobalValue::HiddenVisibility);
+  }
+
+  // Unnamed entities must be named consistently between modules. setName will
+  // give a distinct name to each such entity.
+  if (!GV->hasName())
+    GV->setName("__llvmsplit_unnamed");
+}
+
+// Returns whether GV should be in partition (0-based) I of N.
+static bool isInPartition(const GlobalValue *GV, unsigned I, unsigned N) {
+  if (auto GA = dyn_cast<GlobalAlias>(GV))
+    if (const GlobalObject *Base = GA->getBaseObject())
+      GV = Base;
+
+  StringRef Name;
+  if (const Comdat *C = GV->getComdat())
+    Name = C->getName();
+  else
+    Name = GV->getName();
+
+  // Partition by MD5 hash. We only need a few bits for evenness as the number
+  // of partitions will generally be in the 1-2 figure range; the low 16 bits
+  // are enough.
+  MD5 H;
+  MD5::MD5Result R;
+  H.update(Name);
+  H.final(R);
+  return (R[0] | (R[1] << 8)) % N == I;
+}
+
+void llvm::SplitModule(
+    std::unique_ptr<Module> M, unsigned N,
+    std::function<void(std::unique_ptr<Module> MPart)> ModuleCallback) {
+  for (Function &F : *M)
+    externalize(&F);
+  for (GlobalVariable &GV : M->globals())
+    externalize(&GV);
+  for (GlobalAlias &GA : M->aliases())
+    externalize(&GA);
+
+  // FIXME: We should be able to reuse M as the last partition instead of
+  // cloning it.
+  for (unsigned I = 0; I != N; ++I) {
+    ValueToValueMapTy VMap;
+    std::unique_ptr<Module> MPart(
+        CloneModule(M.get(), VMap, [=](const GlobalValue *GV) {
+          return isInPartition(GV, I, N);
+        }));
+    if (I != 0)
+      MPart->setModuleInlineAsm("");
+    ModuleCallback(std::move(MPart));
+  }
+}
diff --git a/lib/Transforms/Utils/SymbolRewriter.cpp b/lib/Transforms/Utils/SymbolRewriter.cpp
index a2a54da8590c..1d1f602b041d 100644
--- a/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -69,7 +69,6 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Utils/SymbolRewriter.h"
 
 using namespace llvm;
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 7e00a80989dc..6b1d1dae5f01 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -50,11 +50,11 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
   //
   std::vector<BasicBlock*> ReturningBlocks;
   std::vector<BasicBlock*> UnreachableBlocks;
-  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
-    if (isa<ReturnInst>(I->getTerminator()))
-      ReturningBlocks.push_back(I);
-    else if (isa<UnreachableInst>(I->getTerminator()))
-      UnreachableBlocks.push_back(I);
+  for (BasicBlock &I : F)
+    if (isa<ReturnInst>(I.getTerminator()))
+      ReturningBlocks.push_back(&I);
+    else if (isa<UnreachableInst>(I.getTerminator()))
+      UnreachableBlocks.push_back(&I);
 
   // Then unreachable blocks.
   if (UnreachableBlocks.empty()) {
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 8c72641da9e7..1add78e01657 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -19,11 +19,14 @@
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Operator.h"
 using namespace llvm;
 
 // Out of line method to get vtable etc for class.
 void ValueMapTypeRemapper::anchor() {}
 void ValueMaterializer::anchor() {}
+void ValueMaterializer::materializeInitFor(GlobalValue *New, GlobalValue *Old) {
+}
 
 Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
                       ValueMapTypeRemapper *TypeMapper,
@@ -35,15 +38,28 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
   
   // If we have a materializer and it can materialize a value, use that.
   if (Materializer) {
-    if (Value *NewV = Materializer->materializeValueFor(const_cast<Value*>(V)))
-      return VM[V] = NewV;
+    if (Value *NewV =
+            Materializer->materializeDeclFor(const_cast<Value *>(V))) {
+      VM[V] = NewV;
+      if (auto *NewGV = dyn_cast<GlobalValue>(NewV))
+        Materializer->materializeInitFor(
+            NewGV, const_cast<GlobalValue *>(cast<GlobalValue>(V)));
+      return NewV;
+    }
   }
 
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
-  if (isa<GlobalValue>(V))
+  if (isa<GlobalValue>(V)) {
+    if (Flags & RF_NullMapMissingGlobalValues) {
+      assert(!(Flags & RF_IgnoreMissingEntries) &&
+             "Illegal to specify both RF_NullMapMissingGlobalValues and "
+             "RF_IgnoreMissingEntries");
+      return nullptr;
+    }
     return VM[V] = const_cast<Value*>(V);
-  
+  }
+
   if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
     // Inline asm may need *type* remapping.
     FunctionType *NewTy = IA->getFunctionType();
@@ -73,7 +89,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
     // correct.  For now, just match behaviour from before the metadata/value
     // split.
     //
-    //    assert(MappedMD && "Referenced metadata value not in value map");
+    //    assert((MappedMD || (Flags & RF_NullMapMissingGlobalValues)) &&
+    //           "Referenced metadata value not in value map");
     return VM[V] = MetadataAsValue::get(V->getContext(), MappedMD);
   }
 
@@ -127,9 +144,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
       Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
                              Flags, TypeMapper, Materializer));
   }
-  
+  Type *NewSrcTy = nullptr;
+  if (TypeMapper)
+    if (auto *GEPO = dyn_cast<GEPOperator>(C))
+      NewSrcTy = TypeMapper->remapType(GEPO->getSourceElementType());
+
   if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    return VM[V] = CE->getWithOperands(Ops, NewTy);
+    return VM[V] = CE->getWithOperands(Ops, NewTy, false, NewSrcTy);
   if (isa<ConstantArray>(C))
     return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
   if (isa<ConstantStruct>(C))
@@ -146,29 +167,42 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
 }
 
 static Metadata *mapToMetadata(ValueToValueMapTy &VM, const Metadata *Key,
-                     Metadata *Val) {
+                               Metadata *Val, ValueMaterializer *Materializer,
+                               RemapFlags Flags) {
   VM.MD()[Key].reset(Val);
+  if (Materializer && !(Flags & RF_HaveUnmaterializedMetadata)) {
+    auto *N = dyn_cast_or_null<MDNode>(Val);
+    // Need to invoke this once we have non-temporary MD.
+    if (!N || !N->isTemporary())
+      Materializer->replaceTemporaryMetadata(Key, Val);
+  }
   return Val;
 }
 
-static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD) {
-  return mapToMetadata(VM, MD, const_cast<Metadata *>(MD));
+static Metadata *mapToSelf(ValueToValueMapTy &VM, const Metadata *MD,
+                           ValueMaterializer *Materializer, RemapFlags Flags) {
+  return mapToMetadata(VM, MD, const_cast<Metadata *>(MD), Materializer, Flags);
 }
 
 static Metadata *MapMetadataImpl(const Metadata *MD,
-                                 SmallVectorImpl<MDNode *> &Cycles,
+                                 SmallVectorImpl<MDNode *> &DistinctWorklist,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer);
 
-static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
+static Metadata *mapMetadataOp(Metadata *Op,
+                               SmallVectorImpl<MDNode *> &DistinctWorklist,
                                ValueToValueMapTy &VM, RemapFlags Flags,
                                ValueMapTypeRemapper *TypeMapper,
                                ValueMaterializer *Materializer) {
   if (!Op)
     return nullptr;
-  if (Metadata *MappedOp =
-          MapMetadataImpl(Op, Cycles, VM, Flags, TypeMapper, Materializer))
+
+  if (Materializer && !Materializer->isMetadataNeeded(Op))
+    return nullptr;
+
+  if (Metadata *MappedOp = MapMetadataImpl(Op, DistinctWorklist, VM, Flags,
+                                           TypeMapper, Materializer))
     return MappedOp;
   // Use identity map if MappedOp is null and we can ignore missing entries.
   if (Flags & RF_IgnoreMissingEntries)
@@ -178,89 +212,113 @@ static Metadata *mapMetadataOp(Metadata *Op, SmallVectorImpl<MDNode *> &Cycles,
   // correct.  For now, just match behaviour from before the metadata/value
   // split.
   //
-  //    llvm_unreachable("Referenced metadata not in value map!");
+  //    assert((Flags & RF_NullMapMissingGlobalValues) &&
+  //           "Referenced metadata not in value map!");
   return nullptr;
 }
 
-/// \brief Remap nodes.
-///
-/// Insert \c NewNode in the value map, and then remap \c OldNode's operands.
-/// Assumes that \c NewNode is already a clone of \c OldNode.
-///
-/// \pre \c NewNode is a clone of \c OldNode.
-static bool remap(const MDNode *OldNode, MDNode *NewNode,
-                  SmallVectorImpl<MDNode *> &Cycles, ValueToValueMapTy &VM,
-                  RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
-                  ValueMaterializer *Materializer) {
-  assert(OldNode->getNumOperands() == NewNode->getNumOperands() &&
-         "Expected nodes to match");
-  assert(OldNode->isResolved() && "Expected resolved node");
-  assert(!NewNode->isUniqued() && "Expected non-uniqued node");
+/// Resolve uniquing cycles involving the given metadata.
+static void resolveCycles(Metadata *MD, bool MDMaterialized) {
+  if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
+    if (!MDMaterialized && N->isTemporary())
+      return;
+    if (!N->isResolved())
+      N->resolveCycles(MDMaterialized);
+  }
+}
+
+/// Remap the operands of an MDNode.
+///
+/// If \c Node is temporary, uniquing cycles are ignored.  If \c Node is
+/// distinct, uniquing cycles are resolved as they're found.
+///
+/// \pre \c Node.isDistinct() or \c Node.isTemporary().
+static bool remapOperands(MDNode &Node,
+                          SmallVectorImpl<MDNode *> &DistinctWorklist,
+                          ValueToValueMapTy &VM, RemapFlags Flags,
+                          ValueMapTypeRemapper *TypeMapper,
+                          ValueMaterializer *Materializer) {
+  assert(!Node.isUniqued() && "Expected temporary or distinct node");
+  const bool IsDistinct = Node.isDistinct();
 
-  // Map the node upfront so it's available for cyclic references.
-  mapToMetadata(VM, OldNode, NewNode);
   bool AnyChanged = false;
-  for (unsigned I = 0, E = OldNode->getNumOperands(); I != E; ++I) {
-    Metadata *Old = OldNode->getOperand(I);
-    assert(NewNode->getOperand(I) == Old &&
-           "Expected old operands to already be in place");
-
-    Metadata *New = mapMetadataOp(OldNode->getOperand(I), Cycles, VM, Flags,
-                                  TypeMapper, Materializer);
+  for (unsigned I = 0, E = Node.getNumOperands(); I != E; ++I) {
+    Metadata *Old = Node.getOperand(I);
+    Metadata *New = mapMetadataOp(Old, DistinctWorklist, VM, Flags, TypeMapper,
+                                  Materializer);
     if (Old != New) {
       AnyChanged = true;
-      NewNode->replaceOperandWith(I, New);
+      Node.replaceOperandWith(I, New);
+
+      // Resolve uniquing cycles underneath distinct nodes on the fly so they
+      // don't infect later operands.
+      if (IsDistinct)
+        resolveCycles(New, !(Flags & RF_HaveUnmaterializedMetadata));
     }
   }
 
   return AnyChanged;
 }
 
-/// \brief Map a distinct MDNode.
+/// Map a distinct MDNode.
 ///
-/// Distinct nodes are not uniqued, so they must always recreated.
+/// Whether distinct nodes change is independent of their operands.  If \a
+/// RF_MoveDistinctMDs, then they are reused, and their operands remapped in
+/// place; effectively, they're moved from one graph to another.  Otherwise,
+/// they're cloned/duplicated, and the new copy's operands are remapped.
 static Metadata *mapDistinctNode(const MDNode *Node,
-                                 SmallVectorImpl<MDNode *> &Cycles,
+                                 SmallVectorImpl<MDNode *> &DistinctWorklist,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
   assert(Node->isDistinct() && "Expected distinct node");
 
-  MDNode *NewMD = MDNode::replaceWithDistinct(Node->clone());
-  remap(Node, NewMD, Cycles, VM, Flags, TypeMapper, Materializer);
+  MDNode *NewMD;
+  if (Flags & RF_MoveDistinctMDs)
+    NewMD = const_cast<MDNode *>(Node);
+  else
+    NewMD = MDNode::replaceWithDistinct(Node->clone());
 
-  // Track any cycles beneath this node.
-  for (Metadata *Op : NewMD->operands())
-    if (auto *Node = dyn_cast_or_null<MDNode>(Op))
-      if (!Node->isResolved())
-        Cycles.push_back(Node);
-
-  return NewMD;
+  // Remap operands later.
+  DistinctWorklist.push_back(NewMD);
+  return mapToMetadata(VM, Node, NewMD, Materializer, Flags);
 }
 
 /// \brief Map a uniqued MDNode.
 ///
 /// Uniqued nodes may not need to be recreated (they may map to themselves).
 static Metadata *mapUniquedNode(const MDNode *Node,
-                                SmallVectorImpl<MDNode *> &Cycles,
+                                SmallVectorImpl<MDNode *> &DistinctWorklist,
                                 ValueToValueMapTy &VM, RemapFlags Flags,
                                 ValueMapTypeRemapper *TypeMapper,
                                 ValueMaterializer *Materializer) {
-  assert(Node->isUniqued() && "Expected uniqued node");
+  assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isUniqued()) &&
+         "Expected uniqued node");
 
-  // Create a temporary node upfront in case we have a metadata cycle.
+  // Create a temporary node and map it upfront in case we have a uniquing
+  // cycle.  If necessary, this mapping will get updated by RAUW logic before
+  // returning.
   auto ClonedMD = Node->clone();
-  if (!remap(Node, ClonedMD.get(), Cycles, VM, Flags, TypeMapper, Materializer))
-    // No operands changed, so use the identity mapping.
-    return mapToSelf(VM, Node);
+  mapToMetadata(VM, Node, ClonedMD.get(), Materializer, Flags);
+  if (!remapOperands(*ClonedMD, DistinctWorklist, VM, Flags, TypeMapper,
+                     Materializer)) {
+    // No operands changed, so use the original.
+    ClonedMD->replaceAllUsesWith(const_cast<MDNode *>(Node));
+    // Even though replaceAllUsesWith would have replaced the value map
+    // entry, we need to explictly map with the final non-temporary node
+    // to replace any temporary metadata via the callback.
+    return mapToSelf(VM, Node, Materializer, Flags);
+  }
 
-  // At least one operand has changed, so uniquify the cloned node.
+  // Uniquify the cloned node. Explicitly map it with the final non-temporary
+  // node so that replacement of temporary metadata via the callback occurs.
   return mapToMetadata(VM, Node,
-                       MDNode::replaceWithUniqued(std::move(ClonedMD)));
+                       MDNode::replaceWithUniqued(std::move(ClonedMD)),
+                       Materializer, Flags);
 }
 
 static Metadata *MapMetadataImpl(const Metadata *MD,
-                                 SmallVectorImpl<MDNode *> &Cycles,
+                                 SmallVectorImpl<MDNode *> &DistinctWorklist,
                                  ValueToValueMapTy &VM, RemapFlags Flags,
                                  ValueMapTypeRemapper *TypeMapper,
                                  ValueMaterializer *Materializer) {
@@ -269,26 +327,28 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
     return NewMD;
 
   if (isa<MDString>(MD))
-    return mapToSelf(VM, MD);
+    return mapToSelf(VM, MD, Materializer, Flags);
 
   if (isa<ConstantAsMetadata>(MD))
     if ((Flags & RF_NoModuleLevelChanges))
-      return mapToSelf(VM, MD);
+      return mapToSelf(VM, MD, Materializer, Flags);
 
   if (const auto *VMD = dyn_cast<ValueAsMetadata>(MD)) {
     Value *MappedV =
         MapValue(VMD->getValue(), VM, Flags, TypeMapper, Materializer);
     if (VMD->getValue() == MappedV ||
         (!MappedV && (Flags & RF_IgnoreMissingEntries)))
-      return mapToSelf(VM, MD);
+      return mapToSelf(VM, MD, Materializer, Flags);
 
     // FIXME: This assert crashes during bootstrap, but I think it should be
     // correct.  For now, just match behaviour from before the metadata/value
     // split.
     //
-    //    assert(MappedV && "Referenced metadata not in value map!");
+    //    assert((MappedV || (Flags & RF_NullMapMissingGlobalValues)) &&
+    //           "Referenced metadata not in value map!");
     if (MappedV)
-      return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV));
+      return mapToMetadata(VM, MD, ValueAsMetadata::get(MappedV), Materializer,
+                           Flags);
     return nullptr;
   }
 
@@ -299,37 +359,54 @@ static Metadata *MapMetadataImpl(const Metadata *MD,
   // If this is a module-level metadata and we know that nothing at the
   // module level is changing, then use an identity mapping.
   if (Flags & RF_NoModuleLevelChanges)
-    return mapToSelf(VM, MD);
+    return mapToSelf(VM, MD, Materializer, Flags);
 
   // Require resolved nodes whenever metadata might be remapped.
-  assert(Node->isResolved() && "Unexpected unresolved node");
+  assert(((Flags & RF_HaveUnmaterializedMetadata) || Node->isResolved()) &&
+         "Unexpected unresolved node");
+
+  if (Materializer && Node->isTemporary()) {
+    assert(Flags & RF_HaveUnmaterializedMetadata);
+    Metadata *TempMD =
+        Materializer->mapTemporaryMetadata(const_cast<Metadata *>(MD));
+    // If the above callback returned an existing temporary node, use it
+    // instead of the current temporary node. This happens when earlier
+    // function importing passes already created and saved a temporary
+    // metadata node for the same value id.
+    if (TempMD) {
+      mapToMetadata(VM, MD, TempMD, Materializer, Flags);
+      return TempMD;
+    }
+  }
 
   if (Node->isDistinct())
-    return mapDistinctNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+    return mapDistinctNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+                           Materializer);
 
-  return mapUniquedNode(Node, Cycles, VM, Flags, TypeMapper, Materializer);
+  return mapUniquedNode(Node, DistinctWorklist, VM, Flags, TypeMapper,
+                        Materializer);
 }
 
 Metadata *llvm::MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
                             RemapFlags Flags, ValueMapTypeRemapper *TypeMapper,
                             ValueMaterializer *Materializer) {
-  SmallVector<MDNode *, 8> Cycles;
-  Metadata *NewMD =
-      MapMetadataImpl(MD, Cycles, VM, Flags, TypeMapper, Materializer);
+  SmallVector<MDNode *, 8> DistinctWorklist;
+  Metadata *NewMD = MapMetadataImpl(MD, DistinctWorklist, VM, Flags, TypeMapper,
+                                    Materializer);
 
-  // Resolve cycles underneath MD.
-  if (NewMD && NewMD != MD) {
-    if (auto *N = dyn_cast<MDNode>(NewMD))
-      if (!N->isResolved())
-        N->resolveCycles();
+  // When there are no module-level changes, it's possible that the metadata
+  // graph has temporaries.  Skip the logic to resolve cycles, since it's
+  // unnecessary (and invalid) in that case.
+  if (Flags & RF_NoModuleLevelChanges)
+    return NewMD;
 
-    for (MDNode *N : Cycles)
-      if (!N->isResolved())
-        N->resolveCycles();
-  } else {
-    // Shouldn't get unresolved cycles if nothing was remapped.
-    assert(Cycles.empty() && "Expected no unresolved cycles");
-  }
+  // Resolve cycles involving the entry metadata.
+  resolveCycles(NewMD, !(Flags & RF_HaveUnmaterializedMetadata));
+
+  // Remap the operands of distinct MDNodes.
+  while (!DistinctWorklist.empty())
+    remapOperands(*DistinctWorklist.pop_back_val(), DistinctWorklist, VM, Flags,
+                  TypeMapper, Materializer);
 
   return NewMD;
 }
@@ -374,14 +451,11 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
   // Remap attached metadata.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
   I->getAllMetadata(MDs);
-  for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
-           MI = MDs.begin(),
-           ME = MDs.end();
-       MI != ME; ++MI) {
-    MDNode *Old = MI->second;
+  for (const auto &MI : MDs) {
+    MDNode *Old = MI.second;
     MDNode *New = MapMetadata(Old, VMap, Flags, TypeMapper, Materializer);
     if (New != Old)
-      I->setMetadata(MI->first, New);
+      I->setMetadata(MI.first, New);
   }
   
   if (!TypeMapper)
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 215d6f9a1eb6..8844d574a79d 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -25,8 +25,11 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constants.h"
@@ -204,9 +207,10 @@ namespace {
 
     BBVectorize(Pass *P, Function &F, const VectorizeConfig &C)
       : BasicBlockPass(ID), Config(C) {
-      AA = &P->getAnalysis<AliasAnalysis>();
+      AA = &P->getAnalysis<AAResultsWrapperPass>().getAAResults();
       DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-      SE = &P->getAnalysis<ScalarEvolution>();
+      SE = &P->getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+      TLI = &P->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
       TTI = IgnoreTargetInfo
                 ? nullptr
                 : &P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
@@ -221,6 +225,7 @@ namespace {
     AliasAnalysis *AA;
     DominatorTree *DT;
     ScalarEvolution *SE;
+    const TargetLibraryInfo *TLI;
     const TargetTransformInfo *TTI;
 
     // FIXME: const correct?
@@ -437,9 +442,10 @@ namespace {
     bool runOnBasicBlock(BasicBlock &BB) override {
       // OptimizeNone check deferred to vectorizeBB().
 
-      AA = &getAnalysis<AliasAnalysis>();
+      AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
       DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-      SE = &getAnalysis<ScalarEvolution>();
+      SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+      TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
       TTI = IgnoreTargetInfo
                 ? nullptr
                 : &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
@@ -450,13 +456,15 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       BasicBlockPass::getAnalysisUsage(AU);
-      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<AAResultsWrapperPass>();
       AU.addRequired<DominatorTreeWrapperPass>();
-      AU.addRequired<ScalarEvolution>();
+      AU.addRequired<ScalarEvolutionWrapperPass>();
+      AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
-      AU.addPreserved<AliasAnalysis>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<GlobalsAAWrapperPass>();
+      AU.addPreserved<ScalarEvolutionWrapperPass>();
+      AU.addPreserved<SCEVAAWrapperPass>();
       AU.setPreservesCFG();
     }
 
@@ -842,7 +850,7 @@ namespace {
 
     // It is important to cleanup here so that future iterations of this
     // function have less work to do.
-    (void)SimplifyInstructionsInBlock(&BB, AA->getTargetLibraryInfo());
+    (void)SimplifyInstructionsInBlock(&BB, TLI);
     return true;
   }
 
@@ -1239,20 +1247,23 @@ namespace {
       if (I == Start) IAfterStart = true;
 
       bool IsSimpleLoadStore;
-      if (!isInstVectorizable(I, IsSimpleLoadStore)) continue;
+      if (!isInstVectorizable(&*I, IsSimpleLoadStore))
+        continue;
 
       // Look for an instruction with which to pair instruction *I...
       DenseSet<Value *> Users;
       AliasSetTracker WriteSet(*AA);
-      if (I->mayWriteToMemory()) WriteSet.add(I);
+      if (I->mayWriteToMemory())
+        WriteSet.add(&*I);
 
       bool JAfterStart = IAfterStart;
       BasicBlock::iterator J = std::next(I);
       for (unsigned ss = 0; J != E && ss <= Config.SearchLimit; ++J, ++ss) {
-        if (J == Start) JAfterStart = true;
+        if (&*J == Start)
+          JAfterStart = true;
 
         // Determine if J uses I, if so, exit the loop.
-        bool UsesI = trackUsesOfI(Users, WriteSet, I, J, !Config.FastDep);
+        bool UsesI = trackUsesOfI(Users, WriteSet, &*I, &*J, !Config.FastDep);
         if (Config.FastDep) {
           // Note: For this heuristic to be effective, independent operations
           // must tend to be intermixed. This is likely to be true from some
@@ -1269,25 +1280,26 @@ namespace {
         // J does not use I, and comes before the first use of I, so it can be
         // merged with I if the instructions are compatible.
         int CostSavings, FixedOrder;
-        if (!areInstsCompatible(I, J, IsSimpleLoadStore, NonPow2Len,
-            CostSavings, FixedOrder)) continue;
+        if (!areInstsCompatible(&*I, &*J, IsSimpleLoadStore, NonPow2Len,
+                                CostSavings, FixedOrder))
+          continue;
 
         // J is a candidate for merging with I.
         if (PairableInsts.empty() ||
-             PairableInsts[PairableInsts.size()-1] != I) {
-          PairableInsts.push_back(I);
+            PairableInsts[PairableInsts.size() - 1] != &*I) {
+          PairableInsts.push_back(&*I);
         }
 
-        CandidatePairs[I].push_back(J);
+        CandidatePairs[&*I].push_back(&*J);
         ++TotalPairs;
         if (TTI)
-          CandidatePairCostSavings.insert(ValuePairWithCost(ValuePair(I, J),
-                                                            CostSavings));
+          CandidatePairCostSavings.insert(
+              ValuePairWithCost(ValuePair(&*I, &*J), CostSavings));
 
         if (FixedOrder == 1)
-          FixedOrderPairs.insert(ValuePair(I, J));
+          FixedOrderPairs.insert(ValuePair(&*I, &*J));
         else if (FixedOrder == -1)
-          FixedOrderPairs.insert(ValuePair(J, I));
+          FixedOrderPairs.insert(ValuePair(&*J, &*I));
 
         // The next call to this function must start after the last instruction
         // selected during this invocation.
@@ -1468,14 +1480,16 @@ namespace {
     BasicBlock::iterator E = BB.end(), EL =
       BasicBlock::iterator(cast<Instruction>(PairableInsts.back()));
     for (BasicBlock::iterator I = BB.getFirstInsertionPt(); I != E; ++I) {
-      if (IsInPair.find(I) == IsInPair.end()) continue;
+      if (IsInPair.find(&*I) == IsInPair.end())
+        continue;
 
       DenseSet<Value *> Users;
       AliasSetTracker WriteSet(*AA);
-      if (I->mayWriteToMemory()) WriteSet.add(I);
+      if (I->mayWriteToMemory())
+        WriteSet.add(&*I);
 
       for (BasicBlock::iterator J = std::next(I); J != E; ++J) {
-        (void) trackUsesOfI(Users, WriteSet, I, J);
+        (void)trackUsesOfI(Users, WriteSet, &*I, &*J);
 
         if (J == EL)
           break;
@@ -1484,7 +1498,7 @@ namespace {
       for (DenseSet<Value *>::iterator U = Users.begin(), E = Users.end();
            U != E; ++U) {
         if (IsInPair.find(*U) == IsInPair.end()) continue;
-        PairableInstUsers.insert(ValuePair(I, *U));
+        PairableInstUsers.insert(ValuePair(&*I, *U));
       }
 
       if (I == EL)
@@ -2806,55 +2820,51 @@ namespace {
                      Instruction *J, Instruction *K,
                      Instruction *&InsertionPt,
                      Instruction *&K1, Instruction *&K2) {
-    if (isa<StoreInst>(I)) {
-      AA->replaceWithNewValue(I, K);
-      AA->replaceWithNewValue(J, K);
+    if (isa<StoreInst>(I))
+      return;
+
+    Type *IType = I->getType();
+    Type *JType = J->getType();
+
+    VectorType *VType = getVecTypeForPair(IType, JType);
+    unsigned numElem = VType->getNumElements();
+
+    unsigned numElemI = getNumScalarElements(IType);
+    unsigned numElemJ = getNumScalarElements(JType);
+
+    if (IType->isVectorTy()) {
+      std::vector<Constant *> Mask1(numElemI), Mask2(numElemI);
+      for (unsigned v = 0; v < numElemI; ++v) {
+        Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+        Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ + v);
+      }
+
+      K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
+                                 ConstantVector::get(Mask1),
+                                 getReplacementName(K, false, 1));
     } else {
-      Type *IType = I->getType();
-      Type *JType = J->getType();
-
-      VectorType *VType = getVecTypeForPair(IType, JType);
-      unsigned numElem = VType->getNumElements();
-
-      unsigned numElemI = getNumScalarElements(IType);
-      unsigned numElemJ = getNumScalarElements(JType);
-
-      if (IType->isVectorTy()) {
-        std::vector<Constant*> Mask1(numElemI), Mask2(numElemI);
-        for (unsigned v = 0; v < numElemI; ++v) {
-          Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
-          Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemJ+v);
-        }
-
-        K1 = new ShuffleVectorInst(K, UndefValue::get(VType),
-                                   ConstantVector::get( Mask1),
-                                   getReplacementName(K, false, 1));
-      } else {
-        Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
-        K1 = ExtractElementInst::Create(K, CV0,
-                                          getReplacementName(K, false, 1));
-      }
-
-      if (JType->isVectorTy()) {
-        std::vector<Constant*> Mask1(numElemJ), Mask2(numElemJ);
-        for (unsigned v = 0; v < numElemJ; ++v) {
-          Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
-          Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI+v);
-        }
-
-        K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
-                                   ConstantVector::get( Mask2),
-                                   getReplacementName(K, false, 2));
-      } else {
-        Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem-1);
-        K2 = ExtractElementInst::Create(K, CV1,
-                                          getReplacementName(K, false, 2));
-      }
-
-      K1->insertAfter(K);
-      K2->insertAfter(K1);
-      InsertionPt = K2;
+      Value *CV0 = ConstantInt::get(Type::getInt32Ty(Context), 0);
+      K1 = ExtractElementInst::Create(K, CV0, getReplacementName(K, false, 1));
     }
+
+    if (JType->isVectorTy()) {
+      std::vector<Constant *> Mask1(numElemJ), Mask2(numElemJ);
+      for (unsigned v = 0; v < numElemJ; ++v) {
+        Mask1[v] = ConstantInt::get(Type::getInt32Ty(Context), v);
+        Mask2[v] = ConstantInt::get(Type::getInt32Ty(Context), numElemI + v);
+      }
+
+      K2 = new ShuffleVectorInst(K, UndefValue::get(VType),
+                                 ConstantVector::get(Mask2),
+                                 getReplacementName(K, false, 2));
+    } else {
+      Value *CV1 = ConstantInt::get(Type::getInt32Ty(Context), numElem - 1);
+      K2 = ExtractElementInst::Create(K, CV1, getReplacementName(K, false, 2));
+    }
+
+    K1->insertAfter(K);
+    K2->insertAfter(K1);
+    InsertionPt = K2;
   }
 
   // Move all uses of the function I (including pairing-induced uses) after J.
@@ -2869,7 +2879,7 @@ namespace {
     if (I->mayWriteToMemory()) WriteSet.add(I);
 
     for (; cast<Instruction>(L) != J; ++L)
-      (void) trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs);
+      (void)trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs);
 
     assert(cast<Instruction>(L) == J &&
       "Tracking has not proceeded far enough to check for dependencies");
@@ -2891,9 +2901,9 @@ namespace {
     if (I->mayWriteToMemory()) WriteSet.add(I);
 
     for (; cast<Instruction>(L) != J;) {
-      if (trackUsesOfI(Users, WriteSet, I, L, true, &LoadMoveSetPairs)) {
+      if (trackUsesOfI(Users, WriteSet, I, &*L, true, &LoadMoveSetPairs)) {
         // Move this instruction
-        Instruction *InstToMove = L; ++L;
+        Instruction *InstToMove = &*L++;
 
         DEBUG(dbgs() << "BBV: moving: " << *InstToMove <<
                         " to after " << *InsertionPt << "\n");
@@ -2924,11 +2934,11 @@ namespace {
     // Note: We cannot end the loop when we reach J because J could be moved
     // farther down the use chain by another instruction pairing. Also, J
     // could be before I if this is an inverted input.
-    for (BasicBlock::iterator E = BB.end(); cast<Instruction>(L) != E; ++L) {
-      if (trackUsesOfI(Users, WriteSet, I, L)) {
+    for (BasicBlock::iterator E = BB.end(); L != E; ++L) {
+      if (trackUsesOfI(Users, WriteSet, I, &*L)) {
         if (L->mayReadFromMemory()) {
-          LoadMoveSet[L].push_back(I);
-          LoadMoveSetPairs.insert(ValuePair(L, I));
+          LoadMoveSet[&*L].push_back(I);
+          LoadMoveSetPairs.insert(ValuePair(&*L, I));
         }
       }
     }
@@ -2991,7 +3001,7 @@ namespace {
     DEBUG(dbgs() << "BBV: initial: \n" << BB << "\n");
 
     for (BasicBlock::iterator PI = BB.getFirstInsertionPt(); PI != BB.end();) {
-      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(PI);
+      DenseMap<Value *, Value *>::iterator P = ChosenPairs.find(&*PI);
       if (P == ChosenPairs.end()) {
         ++PI;
         continue;
@@ -3116,12 +3126,9 @@ namespace {
       } else if (!isa<StoreInst>(K))
         K->mutateType(getVecTypeForPair(L->getType(), H->getType()));
 
-      unsigned KnownIDs[] = {
-        LLVMContext::MD_tbaa,
-        LLVMContext::MD_alias_scope,
-        LLVMContext::MD_noalias,
-        LLVMContext::MD_fpmath
-      };
+      unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+                             LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+                             LLVMContext::MD_invariant_group};
       combineMetadata(K, H, KnownIDs);
       K->intersectOptionalDataWith(H);
 
@@ -3145,8 +3152,6 @@ namespace {
       if (!isa<StoreInst>(I)) {
         L->replaceAllUsesWith(K1);
         H->replaceAllUsesWith(K2);
-        AA->replaceWithNewValue(L, K1);
-        AA->replaceWithNewValue(H, K2);
       }
 
       // Instructions that may read from memory may be in the load move set.
@@ -3197,10 +3202,14 @@ namespace {
 char BBVectorize::ID = 0;
 static const char bb_vectorize_name[] = "Basic-Block Vectorization";
 INITIALIZE_PASS_BEGIN(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
 INITIALIZE_PASS_END(BBVectorize, BBV_NAME, bb_vectorize_name, false, false)
 
 BasicBlockPass *llvm::createBBVectorizePass(const VectorizeConfig &C) {
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 69ca2688c810..a627dd665179 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -48,7 +48,6 @@
 
 #include "llvm/Transforms/Vectorize.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
@@ -58,10 +57,13 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopIterator.h"
@@ -99,6 +101,7 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
+#include <functional>
 #include <map>
 #include <tuple>
 
@@ -123,6 +126,11 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
                                       "trip count that is smaller than this "
                                       "value."));
 
+static cl::opt<bool> MaximizeBandwidth(
+    "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
+    cl::desc("Maximize bandwidth when selecting vectorization factor which "
+             "will be determined by the smallest type in loop."));
+
 /// This enables versioning on the strides of symbolically striding memory
 /// accesses in code like the following.
 ///   for (i = 0; i < N; ++i)
@@ -136,7 +144,7 @@ TinyTripCountVectorThreshold("vectorizer-min-trip-count", cl::init(16),
 ///      ...
 static cl::opt<bool> EnableMemAccessVersioning(
     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
-    cl::desc("Enable symblic stride memory access versioning"));
+    cl::desc("Enable symbolic stride memory access versioning"));
 
 static cl::opt<bool> EnableInterleavedMemAccesses(
     "enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
@@ -214,12 +222,27 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
     cl::desc("The maximum interleave count to use when interleaving a scalar "
              "reduction in a nested loop."));
 
+static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
+    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
+    cl::desc("The maximum allowed number of runtime memory checks with a "
+             "vectorize(enable) pragma."));
+
+static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
+    "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
+    cl::desc("The maximum number of SCEV checks allowed."));
+
+static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
+    "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
+    cl::desc("The maximum number of SCEV checks allowed with a "
+             "vectorize(enable) pragma"));
+
 namespace {
 
 // Forward declarations.
+class LoopVectorizeHints;
 class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
-class LoopVectorizeHints;
+class LoopVectorizationRequirements;
 
 /// \brief This modifies LoopAccessReport to initialize message with
 /// loop-vectorizer-specific part.
@@ -245,6 +268,32 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
   return VectorType::get(Scalar, VF);
 }
 
+/// A helper function that returns GEP instruction and knows to skip a
+/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination
+/// pointee types of the 'bitcast' have the same size.
+/// For example:
+///   bitcast double** %var to i64* - can be skipped
+///   bitcast double** %var to i8*  - can not
+static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
+
+  if (isa<GetElementPtrInst>(Ptr))
+    return cast<GetElementPtrInst>(Ptr);
+
+  if (isa<BitCastInst>(Ptr) &&
+      isa<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0))) {
+    Type *BitcastTy = Ptr->getType();
+    Type *GEPTy = cast<BitCastInst>(Ptr)->getSrcTy();
+    if (!isa<PointerType>(BitcastTy) || !isa<PointerType>(GEPTy))
+      return nullptr;
+    Type *Pointee1Ty = cast<PointerType>(BitcastTy)->getPointerElementType();
+    Type *Pointee2Ty = cast<PointerType>(GEPTy)->getPointerElementType();
+    const DataLayout &DL = cast<BitCastInst>(Ptr)->getModule()->getDataLayout();
+    if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty))
+      return cast<GetElementPtrInst>(cast<BitCastInst>(Ptr)->getOperand(0));
+  }
+  return nullptr;
+}
+
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
@@ -261,25 +310,30 @@ static Type* ToVectorTy(Type *Scalar, unsigned VF) {
 /// and reduction variables that were found to a given vectorization factor.
 class InnerLoopVectorizer {
 public:
-  InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
-                      DominatorTree *DT, const TargetLibraryInfo *TLI,
+  InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+                      LoopInfo *LI, DominatorTree *DT,
+                      const TargetLibraryInfo *TLI,
                       const TargetTransformInfo *TTI, unsigned VecWidth,
                       unsigned UnrollFactor)
-      : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
-        VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+      : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
+        VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
         Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
-        Legal(nullptr), AddedSafetyChecks(false) {}
+        TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr),
+        AddedSafetyChecks(false) {}
 
   // Perform the actual loop widening (vectorization).
-  void vectorize(LoopVectorizationLegality *L) {
+  // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
+  // can be validly truncated to. The cost model has assumed this truncation
+  // will happen when vectorizing.
+  void vectorize(LoopVectorizationLegality *L,
+                 MapVector<Instruction*,uint64_t> MinimumBitWidths) {
+    MinBWs = MinimumBitWidths;
     Legal = L;
     // Create a new empty loop. Unlink the old loop and connect the new one.
     createEmptyLoop();
     // Widen each instruction in the old loop to a new one in the new loop.
     // Use the Legality module to find the induction and reduction variables.
     vectorizeLoop();
-    // Register the new loop and update the analysis passes.
-    updateAnalysis();
   }
 
   // Return true if any runtime check is added.
@@ -302,14 +356,11 @@ protected:
   typedef DenseMap<std::pair<BasicBlock*, BasicBlock*>,
                    VectorParts> EdgeMaskCache;
 
-  /// \brief Add checks for strides that were assumed to be 1.
-  ///
-  /// Returns the last check instruction and the first check instruction in the
-  /// pair as (first, last).
-  std::pair<Instruction *, Instruction *> addStrideCheck(Instruction *Loc);
-
   /// Create an empty loop, based on the loop ranges of the old loop.
   void createEmptyLoop();
+  /// Create a new induction variable inside L.
+  PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
+                                   Value *Step, Instruction *DL);
   /// Copy and widen the instructions from the old loop.
   virtual void vectorizeLoop();
 
@@ -319,6 +370,9 @@ protected:
   /// See PR14725.
   void fixLCSSAPHIs();
 
+  /// Shrinks vector element sizes based on information in "MinBWs".
+  void truncateToMinimalBitwidths();
+  
   /// A helper function that computes the predicate of the block BB, assuming
   /// that the header block of the loop is set to True. It returns the *entry*
   /// mask for the block BB.
@@ -329,7 +383,7 @@ protected:
 
   /// A helper function to vectorize a single BB within the innermost loop.
   void vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV);
-
+  
   /// Vectorize a single PHINode in a block. This method handles the induction
   /// variable canonicalization. It supports both VF = 1 for unrolled loops and
   /// arbitrary length vectors.
@@ -374,6 +428,23 @@ protected:
   /// Generate a shuffle sequence that will reverse the vector Vec.
   virtual Value *reverseVector(Value *Vec);
 
+  /// Returns (and creates if needed) the original loop trip count.
+  Value *getOrCreateTripCount(Loop *NewLoop);
+
+  /// Returns (and creates if needed) the trip count of the widened loop.
+  Value *getOrCreateVectorTripCount(Loop *NewLoop);
+
+  /// Emit a bypass check to see if the trip count would overflow, or we
+  /// wouldn't have enough iterations to execute one vector loop.
+  void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass);
+  /// Emit a bypass check to see if the vector trip count is nonzero.
+  void emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass);
+  /// Emit a bypass check to see if all of the SCEV assumptions we've
+  /// had to make are correct.
+  void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+  /// Emit bypass checks to check any memory assumptions we may have made.
+  void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+
   /// This is a helper class that holds the vectorizer state. It maps scalar
   /// instructions to vector instructions. When the code is 'unrolled' then
   /// then a single scalar value is mapped to multiple vector parts. The parts
@@ -416,8 +487,10 @@ protected:
 
   /// The original loop.
   Loop *OrigLoop;
-  /// Scev analysis to use.
-  ScalarEvolution *SE;
+  /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
+  /// dynamic knowledge to simplify SCEV expressions and converts them to a
+  /// more usable form.
+  PredicatedScalarEvolution &PSE;
   /// Loop Info.
   LoopInfo *LI;
   /// Dominator Tree.
@@ -462,12 +535,21 @@ protected:
   PHINode *Induction;
   /// The induction variable of the old basic block.
   PHINode *OldInduction;
-  /// Holds the extended (to the widest induction type) start index.
-  Value *ExtendedIdx;
   /// Maps scalars to widened vectors.
   ValueMap WidenMap;
+  /// Store instructions that should be predicated, as a pair
+  ///   <StoreInst, Predicate>
+  SmallVector<std::pair<StoreInst*,Value*>, 4> PredicatedStores;
   EdgeMaskCache MaskCache;
+  /// Trip count of the original loop.
+  Value *TripCount;
+  /// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
+  Value *VectorTripCount;
 
+  /// Map of scalar integer values to the smallest bitwidth they can be legally
+  /// represented as. The vector equivalents of these values should be truncated
+  /// to this type.
+  MapVector<Instruction*,uint64_t> MinBWs;
   LoopVectorizationLegality *Legal;
 
   // Record whether runtime check is added.
@@ -476,10 +558,11 @@ protected:
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
 public:
-  InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI,
-                    DominatorTree *DT, const TargetLibraryInfo *TLI,
+  InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+                    LoopInfo *LI, DominatorTree *DT,
+                    const TargetLibraryInfo *TLI,
                     const TargetTransformInfo *TTI, unsigned UnrollFactor)
-      : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
+      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {}
 
 private:
   void scalarizeInstruction(Instruction *Instr,
@@ -551,7 +634,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
     if (Kind != LLVMContext::MD_tbaa &&
         Kind != LLVMContext::MD_alias_scope &&
         Kind != LLVMContext::MD_noalias &&
-        Kind != LLVMContext::MD_fpmath)
+        Kind != LLVMContext::MD_fpmath &&
+        Kind != LLVMContext::MD_nontemporal)
       continue;
 
     To->setMetadata(Kind, M.second);
@@ -559,7 +643,8 @@ static void propagateMetadata(Instruction *To, const Instruction *From) {
 }
 
 /// \brief Propagate known metadata from one instruction to a vector of others.
-static void propagateMetadata(SmallVectorImpl<Value *> &To, const Instruction *From) {
+static void propagateMetadata(SmallVectorImpl<Value *> &To,
+                              const Instruction *From) {
   for (Value *V : To)
     if (Instruction *I = dyn_cast<Instruction>(V))
       propagateMetadata(I, From);
@@ -699,8 +784,9 @@ private:
 /// between the member and the group in a map.
 class InterleavedAccessInfo {
 public:
-  InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT)
-      : SE(SE), TheLoop(L), DT(DT) {}
+  InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,
+                        DominatorTree *DT)
+      : PSE(PSE), TheLoop(L), DT(DT) {}
 
   ~InterleavedAccessInfo() {
     SmallSet<InterleaveGroup *, 4> DelSet;
@@ -730,7 +816,11 @@ public:
   }
 
 private:
-  ScalarEvolution *SE;
+  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.
+  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.
+  /// The interleaved access analysis can also add new predicates (for example
+  /// by versioning strides of pointers).
+  PredicatedScalarEvolution &PSE;
   Loop *TheLoop;
   DominatorTree *DT;
 
@@ -778,407 +868,6 @@ private:
       const ValueToValueMap &Strides);
 };
 
-/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
-/// to what vectorization factor.
-/// This class does not look at the profitability of vectorization, only the
-/// legality. This class has two main kinds of checks:
-/// * Memory checks - The code in canVectorizeMemory checks if vectorization
-///   will change the order of memory accesses in a way that will change the
-///   correctness of the program.
-/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
-/// checks for a number of different conditions, such as the availability of a
-/// single induction variable, that all types are supported and vectorize-able,
-/// etc. This code reflects the capabilities of InnerLoopVectorizer.
-/// This class is also used by InnerLoopVectorizer for identifying
-/// induction variable and the different reduction variables.
-class LoopVectorizationLegality {
-public:
-  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
-                            TargetLibraryInfo *TLI, AliasAnalysis *AA,
-                            Function *F, const TargetTransformInfo *TTI,
-                            LoopAccessAnalysis *LAA)
-      : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
-        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
-        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false) {}
-
-  /// This enum represents the kinds of inductions that we support.
-  enum InductionKind {
-    IK_NoInduction,  ///< Not an induction variable.
-    IK_IntInduction, ///< Integer induction variable. Step = C.
-    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
-  };
-
-  /// A struct for saving information about induction variables.
-  struct InductionInfo {
-    InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
-        : StartValue(Start), IK(K), StepValue(Step) {
-      assert(IK != IK_NoInduction && "Not an induction");
-      assert(StartValue && "StartValue is null");
-      assert(StepValue && !StepValue->isZero() && "StepValue is zero");
-      assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
-             "StartValue is not a pointer for pointer induction");
-      assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
-             "StartValue is not an integer for integer induction");
-      assert(StepValue->getType()->isIntegerTy() &&
-             "StepValue is not an integer");
-    }
-    InductionInfo()
-        : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
-
-    /// Get the consecutive direction. Returns:
-    ///   0 - unknown or non-consecutive.
-    ///   1 - consecutive and increasing.
-    ///  -1 - consecutive and decreasing.
-    int getConsecutiveDirection() const {
-      if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
-        return StepValue->getSExtValue();
-      return 0;
-    }
-
-    /// Compute the transformed value of Index at offset StartValue using step
-    /// StepValue.
-    /// For integer induction, returns StartValue + Index * StepValue.
-    /// For pointer induction, returns StartValue[Index * StepValue].
-    /// FIXME: The newly created binary instructions should contain nsw/nuw
-    /// flags, which can be found from the original scalar operations.
-    Value *transform(IRBuilder<> &B, Value *Index) const {
-      switch (IK) {
-      case IK_IntInduction:
-        assert(Index->getType() == StartValue->getType() &&
-               "Index type does not match StartValue type");
-        if (StepValue->isMinusOne())
-          return B.CreateSub(StartValue, Index);
-        if (!StepValue->isOne())
-          Index = B.CreateMul(Index, StepValue);
-        return B.CreateAdd(StartValue, Index);
-
-      case IK_PtrInduction:
-        assert(Index->getType() == StepValue->getType() &&
-               "Index type does not match StepValue type");
-        if (StepValue->isMinusOne())
-          Index = B.CreateNeg(Index);
-        else if (!StepValue->isOne())
-          Index = B.CreateMul(Index, StepValue);
-        return B.CreateGEP(nullptr, StartValue, Index);
-
-      case IK_NoInduction:
-        return nullptr;
-      }
-      llvm_unreachable("invalid enum");
-    }
-
-    /// Start value.
-    TrackingVH<Value> StartValue;
-    /// Induction kind.
-    InductionKind IK;
-    /// Step value.
-    ConstantInt *StepValue;
-  };
-
-  /// ReductionList contains the reduction descriptors for all
-  /// of the reductions that were found in the loop.
-  typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
-
-  /// InductionList saves induction variables and maps them to the
-  /// induction descriptor.
-  typedef MapVector<PHINode*, InductionInfo> InductionList;
-
-  /// Returns true if it is legal to vectorize this loop.
-  /// This does not mean that it is profitable to vectorize this
-  /// loop, only that it is legal to do so.
-  bool canVectorize();
-
-  /// Returns the Induction variable.
-  PHINode *getInduction() { return Induction; }
-
-  /// Returns the reduction variables found in the loop.
-  ReductionList *getReductionVars() { return &Reductions; }
-
-  /// Returns the induction variables found in the loop.
-  InductionList *getInductionVars() { return &Inductions; }
-
-  /// Returns the widest induction type.
-  Type *getWidestInductionType() { return WidestIndTy; }
-
-  /// Returns True if V is an induction variable in this loop.
-  bool isInductionVariable(const Value *V);
-
-  /// Return true if the block BB needs to be predicated in order for the loop
-  /// to be vectorized.
-  bool blockNeedsPredication(BasicBlock *BB);
-
-  /// Check if this  pointer is consecutive when vectorizing. This happens
-  /// when the last index of the GEP is the induction variable, or that the
-  /// pointer itself is an induction variable.
-  /// This check allows us to vectorize A[idx] into a wide load/store.
-  /// Returns:
-  /// 0 - Stride is unknown or non-consecutive.
-  /// 1 - Address is consecutive.
-  /// -1 - Address is consecutive, and decreasing.
-  int isConsecutivePtr(Value *Ptr);
-
-  /// Returns true if the value V is uniform within the loop.
-  bool isUniform(Value *V);
-
-  /// Returns true if this instruction will remain scalar after vectorization.
-  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
-
-  /// Returns the information that we collected about runtime memory check.
-  const RuntimePointerChecking *getRuntimePointerChecking() const {
-    return LAI->getRuntimePointerChecking();
-  }
-
-  const LoopAccessInfo *getLAI() const {
-    return LAI;
-  }
-
-  /// \brief Check if \p Instr belongs to any interleaved access group.
-  bool isAccessInterleaved(Instruction *Instr) {
-    return InterleaveInfo.isInterleaved(Instr);
-  }
-
-  /// \brief Get the interleaved access group that \p Instr belongs to.
-  const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
-    return InterleaveInfo.getInterleaveGroup(Instr);
-  }
-
-  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
-
-  bool hasStride(Value *V) { return StrideSet.count(V); }
-  bool mustCheckStrides() { return !StrideSet.empty(); }
-  SmallPtrSet<Value *, 8>::iterator strides_begin() {
-    return StrideSet.begin();
-  }
-  SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
-
-  /// Returns true if the target machine supports masked store operation
-  /// for the given \p DataType and kind of access to \p Ptr.
-  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
-    return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
-  }
-  /// Returns true if the target machine supports masked load operation
-  /// for the given \p DataType and kind of access to \p Ptr.
-  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
-    return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
-  }
-  /// Returns true if vector representation of the instruction \p I
-  /// requires mask.
-  bool isMaskRequired(const Instruction* I) {
-    return (MaskedOp.count(I) != 0);
-  }
-  unsigned getNumStores() const {
-    return LAI->getNumStores();
-  }
-  unsigned getNumLoads() const {
-    return LAI->getNumLoads();
-  }
-  unsigned getNumPredStores() const {
-    return NumPredStores;
-  }
-private:
-  /// Check if a single basic block loop is vectorizable.
-  /// At this point we know that this is a loop with a constant trip count
-  /// and we only need to check individual instructions.
-  bool canVectorizeInstrs();
-
-  /// When we vectorize loops we may change the order in which
-  /// we read and write from memory. This method checks if it is
-  /// legal to vectorize the code, considering only memory constrains.
-  /// Returns true if the loop is vectorizable
-  bool canVectorizeMemory();
-
-  /// Return true if we can vectorize this loop using the IF-conversion
-  /// transformation.
-  bool canVectorizeWithIfConvert();
-
-  /// Collect the variables that need to stay uniform after vectorization.
-  void collectLoopUniforms();
-
-  /// Return true if all of the instructions in the block can be speculatively
-  /// executed. \p SafePtrs is a list of addresses that are known to be legal
-  /// and we know that we can read from them without segfault.
-  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
-
-  /// Returns the induction kind of Phi and record the step. This function may
-  /// return NoInduction if the PHI is not an induction variable.
-  InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
-
-  /// \brief Collect memory access with loop invariant strides.
-  ///
-  /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
-  /// invariant.
-  void collectStridedAccess(Value *LoadOrStoreInst);
-
-  /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.  These are handled as LoopAccessReport rather than
-  /// VectorizationReport because the << operator of VectorizationReport returns
-  /// LoopAccessReport.
-  void emitAnalysis(const LoopAccessReport &Message) {
-    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
-  }
-
-  unsigned NumPredStores;
-
-  /// The loop that we evaluate.
-  Loop *TheLoop;
-  /// Scev analysis.
-  ScalarEvolution *SE;
-  /// Target Library Info.
-  TargetLibraryInfo *TLI;
-  /// Parent function
-  Function *TheFunction;
-  /// Target Transform Info
-  const TargetTransformInfo *TTI;
-  /// Dominator Tree.
-  DominatorTree *DT;
-  // LoopAccess analysis.
-  LoopAccessAnalysis *LAA;
-  // And the loop-accesses info corresponding to this loop.  This pointer is
-  // null until canVectorizeMemory sets it up.
-  const LoopAccessInfo *LAI;
-
-  /// The interleave access information contains groups of interleaved accesses
-  /// with the same stride and close to each other.
-  InterleavedAccessInfo InterleaveInfo;
-
-  //  ---  vectorization state --- //
-
-  /// Holds the integer induction variable. This is the counter of the
-  /// loop.
-  PHINode *Induction;
-  /// Holds the reduction variables.
-  ReductionList Reductions;
-  /// Holds all of the induction variables that we found in the loop.
-  /// Notice that inductions don't need to start at zero and that induction
-  /// variables can be pointers.
-  InductionList Inductions;
-  /// Holds the widest induction type encountered.
-  Type *WidestIndTy;
-
-  /// Allowed outside users. This holds the reduction
-  /// vars which can be accessed from outside the loop.
-  SmallPtrSet<Value*, 4> AllowedExit;
-  /// This set holds the variables which are known to be uniform after
-  /// vectorization.
-  SmallPtrSet<Instruction*, 4> Uniforms;
-
-  /// Can we assume the absence of NaNs.
-  bool HasFunNoNaNAttr;
-
-  ValueToValueMap Strides;
-  SmallPtrSet<Value *, 8> StrideSet;
-
-  /// While vectorizing these instructions we have to generate a
-  /// call to the appropriate masked intrinsic
-  SmallPtrSet<const Instruction*, 8> MaskedOp;
-};
-
-/// LoopVectorizationCostModel - estimates the expected speedups due to
-/// vectorization.
-/// In many cases vectorization is not profitable. This can happen because of
-/// a number of reasons. In this class we mainly attempt to predict the
-/// expected speedup/slowdowns due to the supported instruction set. We use the
-/// TargetTransformInfo to query the different backends for the cost of
-/// different operations.
-class LoopVectorizationCostModel {
-public:
-  LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
-                             LoopVectorizationLegality *Legal,
-                             const TargetTransformInfo &TTI,
-                             const TargetLibraryInfo *TLI, AssumptionCache *AC,
-                             const Function *F, const LoopVectorizeHints *Hints)
-      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
-        TheFunction(F), Hints(Hints) {
-    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
-  }
-
-  /// Information about vectorization costs
-  struct VectorizationFactor {
-    unsigned Width; // Vector width with best cost
-    unsigned Cost; // Cost of the loop with that width
-  };
-  /// \return The most profitable vectorization factor and the cost of that VF.
-  /// This method checks every power of two up to VF. If UserVF is not ZERO
-  /// then this vectorization factor will be selected if vectorization is
-  /// possible.
-  VectorizationFactor selectVectorizationFactor(bool OptForSize);
-
-  /// \return The size (in bits) of the widest type in the code that
-  /// needs to be vectorized. We ignore values that remain scalar such as
-  /// 64 bit loop indices.
-  unsigned getWidestType();
-
-  /// \return The desired interleave count.
-  /// If interleave count has been specified by metadata it will be returned.
-  /// Otherwise, the interleave count is computed and returned. VF and LoopCost
-  /// are the selected vectorization factor and the cost of the selected VF.
-  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
-                                 unsigned LoopCost);
-
-  /// \return The most profitable unroll factor.
-  /// This method finds the best unroll-factor based on register pressure and
-  /// other parameters. VF and LoopCost are the selected vectorization factor
-  /// and the cost of the selected VF.
-  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
-                                  unsigned LoopCost);
-
-  /// \brief A struct that represents some properties of the register usage
-  /// of a loop.
-  struct RegisterUsage {
-    /// Holds the number of loop invariant values that are used in the loop.
-    unsigned LoopInvariantRegs;
-    /// Holds the maximum number of concurrent live intervals in the loop.
-    unsigned MaxLocalUsers;
-    /// Holds the number of instructions in the loop.
-    unsigned NumInstructions;
-  };
-
-  /// \return  information about the register usage of the loop.
-  RegisterUsage calculateRegisterUsage();
-
-private:
-  /// Returns the expected execution cost. The unit of the cost does
-  /// not matter because we use the 'cost' units to compare different
-  /// vector widths. The cost that is returned is *not* normalized by
-  /// the factor width.
-  unsigned expectedCost(unsigned VF);
-
-  /// Returns the execution time cost of an instruction for a given vector
-  /// width. Vector width of one means scalar.
-  unsigned getInstructionCost(Instruction *I, unsigned VF);
-
-  /// Returns whether the instruction is a load or store and will be a emitted
-  /// as a vector operation.
-  bool isConsecutiveLoadOrStore(Instruction *I);
-
-  /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.  These are handled as LoopAccessReport rather than
-  /// VectorizationReport because the << operator of VectorizationReport returns
-  /// LoopAccessReport.
-  void emitAnalysis(const LoopAccessReport &Message) {
-    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
-  }
-
-  /// Values used only by @llvm.assume calls.
-  SmallPtrSet<const Value *, 32> EphValues;
-
-  /// The loop that we evaluate.
-  Loop *TheLoop;
-  /// Scev analysis.
-  ScalarEvolution *SE;
-  /// Loop Info analysis.
-  LoopInfo *LI;
-  /// Vectorization legality.
-  LoopVectorizationLegality *Legal;
-  /// Vector target information.
-  const TargetTransformInfo &TTI;
-  /// Target Library Info.
-  const TargetLibraryInfo *TLI;
-  const Function *TheFunction;
-  // Loop Vectorize Hint.
-  const LoopVectorizeHints *Hints;
-};
-
 /// Utility class for getting and setting loop vectorizer hints in the form
 /// of loop metadata.
 /// This class keeps a number of loop annotations locally (as member variables)
@@ -1258,6 +947,41 @@ public:
     writeHintsToMetadata(Hints);
   }
 
+  bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
+    if (getForce() == LoopVectorizeHints::FK_Disabled) {
+      DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
+      emitOptimizationRemarkAnalysis(F->getContext(),
+                                     vectorizeAnalysisPassName(), *F,
+                                     L->getStartLoc(), emitRemark());
+      return false;
+    }
+
+    if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
+      DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
+      emitOptimizationRemarkAnalysis(F->getContext(),
+                                     vectorizeAnalysisPassName(), *F,
+                                     L->getStartLoc(), emitRemark());
+      return false;
+    }
+
+    if (getWidth() == 1 && getInterleave() == 1) {
+      // FIXME: Add a separate metadata to indicate when the loop has already
+      // been vectorized instead of setting width and count to 1.
+      DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
+      // FIXME: Add interleave.disable metadata. This will allow
+      // vectorize.disable to be used without disabling the pass and errors
+      // to differentiate between disabled vectorization and a width of 1.
+      emitOptimizationRemarkAnalysis(
+          F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(),
+          "loop not vectorized: vectorization and interleaving are explicitly "
+          "disabled, or vectorize width and interleave count are both set to "
+          "1");
+      return false;
+    }
+
+    return true;
+  }
+
   /// Dumps all the hint information.
   std::string emitRemark() const {
     VectorizationReport R;
@@ -1281,6 +1005,26 @@ public:
   unsigned getWidth() const { return Width.Value; }
   unsigned getInterleave() const { return Interleave.Value; }
   enum ForceKind getForce() const { return (ForceKind)Force.Value; }
+  const char *vectorizeAnalysisPassName() const {
+    // If hints are provided that don't disable vectorization use the
+    // AlwaysPrint pass name to force the frontend to print the diagnostic.
+    if (getWidth() == 1)
+      return LV_NAME;
+    if (getForce() == LoopVectorizeHints::FK_Disabled)
+      return LV_NAME;
+    if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
+      return LV_NAME;
+    return DiagnosticInfo::AlwaysPrint;
+  }
+
+  bool allowReordering() const {
+    // When enabling loop hints are provided we allow the vectorizer to change
+    // the order of operations that is given by the scalar loop. This is not
+    // enabled by default because can be unsafe or inefficient. For example,
+    // reordering floating-point operations will change the way round-off
+    // error accumulates in the loop.
+    return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
+  }
 
 private:
   /// Find hints specified in the loop metadata and update local values.
@@ -1398,10 +1142,17 @@ private:
   const Loop *TheLoop;
 };
 
+static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop,
+                             const LoopVectorizeHints &Hints,
+                             const LoopAccessReport &Message) {
+  const char *Name = Hints.vectorizeAnalysisPassName();
+  LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name);
+}
+
 static void emitMissedWarning(Function *F, Loop *L,
                               const LoopVectorizeHints &LH) {
-  emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
-                               L->getStartLoc(), LH.emitRemark());
+  emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+                               LH.emitRemark());
 
   if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
     if (LH.getWidth() != 1)
@@ -1415,6 +1166,420 @@ static void emitMissedWarning(Function *F, Loop *L,
   }
 }
 
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
+/// to what vectorization factor.
+/// This class does not look at the profitability of vectorization, only the
+/// legality. This class has two main kinds of checks:
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
+///   will change the order of memory accesses in a way that will change the
+///   correctness of the program.
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
+/// checks for a number of different conditions, such as the availability of a
+/// single induction variable, that all types are supported and vectorize-able,
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
+/// This class is also used by InnerLoopVectorizer for identifying
+/// induction variable and the different reduction variables.
+class LoopVectorizationLegality {
+public:
+  LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
+                            DominatorTree *DT, TargetLibraryInfo *TLI,
+                            AliasAnalysis *AA, Function *F,
+                            const TargetTransformInfo *TTI,
+                            LoopAccessAnalysis *LAA,
+                            LoopVectorizationRequirements *R,
+                            const LoopVectorizeHints *H)
+      : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
+        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT),
+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+        Requirements(R), Hints(H) {}
+
+  /// ReductionList contains the reduction descriptors for all
+  /// of the reductions that were found in the loop.
+  typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
+
+  /// InductionList saves induction variables and maps them to the
+  /// induction descriptor.
+  typedef MapVector<PHINode*, InductionDescriptor> InductionList;
+
+  /// Returns true if it is legal to vectorize this loop.
+  /// This does not mean that it is profitable to vectorize this
+  /// loop, only that it is legal to do so.
+  bool canVectorize();
+
+  /// Returns the Induction variable.
+  PHINode *getInduction() { return Induction; }
+
+  /// Returns the reduction variables found in the loop.
+  ReductionList *getReductionVars() { return &Reductions; }
+
+  /// Returns the induction variables found in the loop.
+  InductionList *getInductionVars() { return &Inductions; }
+
+  /// Returns the widest induction type.
+  Type *getWidestInductionType() { return WidestIndTy; }
+
+  /// Returns True if V is an induction variable in this loop.
+  bool isInductionVariable(const Value *V);
+
+  /// Returns True if PN is a reduction variable in this loop.
+  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
+
+  /// Return true if the block BB needs to be predicated in order for the loop
+  /// to be vectorized.
+  bool blockNeedsPredication(BasicBlock *BB);
+
+  /// Check if this  pointer is consecutive when vectorizing. This happens
+  /// when the last index of the GEP is the induction variable, or that the
+  /// pointer itself is an induction variable.
+  /// This check allows us to vectorize A[idx] into a wide load/store.
+  /// Returns:
+  /// 0 - Stride is unknown or non-consecutive.
+  /// 1 - Address is consecutive.
+  /// -1 - Address is consecutive, and decreasing.
+  int isConsecutivePtr(Value *Ptr);
+
+  /// Returns true if the value V is uniform within the loop.
+  bool isUniform(Value *V);
+
+  /// Returns true if this instruction will remain scalar after vectorization.
+  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
+
+  /// Returns the information that we collected about runtime memory check.
+  const RuntimePointerChecking *getRuntimePointerChecking() const {
+    return LAI->getRuntimePointerChecking();
+  }
+
+  const LoopAccessInfo *getLAI() const {
+    return LAI;
+  }
+
+  /// \brief Check if \p Instr belongs to any interleaved access group.
+  bool isAccessInterleaved(Instruction *Instr) {
+    return InterleaveInfo.isInterleaved(Instr);
+  }
+
+  /// \brief Get the interleaved access group that \p Instr belongs to.
+  const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
+    return InterleaveInfo.getInterleaveGroup(Instr);
+  }
+
+  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
+
+  bool hasStride(Value *V) { return StrideSet.count(V); }
+  bool mustCheckStrides() { return !StrideSet.empty(); }
+  SmallPtrSet<Value *, 8>::iterator strides_begin() {
+    return StrideSet.begin();
+  }
+  SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
+
+  /// Returns true if the target machine supports masked store operation
+  /// for the given \p DataType and kind of access to \p Ptr.
+  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
+    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedStore(DataType);
+  }
+  /// Returns true if the target machine supports masked load operation
+  /// for the given \p DataType and kind of access to \p Ptr.
+  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
+    return isConsecutivePtr(Ptr) && TTI->isLegalMaskedLoad(DataType);
+  }
+  /// Returns true if vector representation of the instruction \p I
+  /// requires mask.
+  bool isMaskRequired(const Instruction* I) {
+    return (MaskedOp.count(I) != 0);
+  }
+  unsigned getNumStores() const {
+    return LAI->getNumStores();
+  }
+  unsigned getNumLoads() const {
+    return LAI->getNumLoads();
+  }
+  unsigned getNumPredStores() const {
+    return NumPredStores;
+  }
+private:
+  /// Check if a single basic block loop is vectorizable.
+  /// At this point we know that this is a loop with a constant trip count
+  /// and we only need to check individual instructions.
+  bool canVectorizeInstrs();
+
+  /// When we vectorize loops we may change the order in which
+  /// we read and write from memory. This method checks if it is
+  /// legal to vectorize the code, considering only memory constrains.
+  /// Returns true if the loop is vectorizable
+  bool canVectorizeMemory();
+
+  /// Return true if we can vectorize this loop using the IF-conversion
+  /// transformation.
+  bool canVectorizeWithIfConvert();
+
+  /// Collect the variables that need to stay uniform after vectorization.
+  void collectLoopUniforms();
+
+  /// Return true if all of the instructions in the block can be speculatively
+  /// executed. \p SafePtrs is a list of addresses that are known to be legal
+  /// and we know that we can read from them without segfault.
+  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
+
+  /// \brief Collect memory access with loop invariant strides.
+  ///
+  /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
+  /// invariant.
+  void collectStridedAccess(Value *LoadOrStoreInst);
+
+  /// Report an analysis message to assist the user in diagnosing loops that are
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) const {
+    emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
+  }
+
+  unsigned NumPredStores;
+
+  /// The loop that we evaluate.
+  Loop *TheLoop;
+  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
+  /// Applies dynamic knowledge to simplify SCEV expressions in the context
+  /// of existing SCEV assumptions. The analysis will also add a minimal set
+  /// of new predicates if this is required to enable vectorization and
+  /// unrolling.
+  PredicatedScalarEvolution &PSE;
+  /// Target Library Info.
+  TargetLibraryInfo *TLI;
+  /// Parent function
+  Function *TheFunction;
+  /// Target Transform Info
+  const TargetTransformInfo *TTI;
+  /// Dominator Tree.
+  DominatorTree *DT;
+  // LoopAccess analysis.
+  LoopAccessAnalysis *LAA;
+  // And the loop-accesses info corresponding to this loop.  This pointer is
+  // null until canVectorizeMemory sets it up.
+  const LoopAccessInfo *LAI;
+
+  /// The interleave access information contains groups of interleaved accesses
+  /// with the same stride and close to each other.
+  InterleavedAccessInfo InterleaveInfo;
+
+  //  ---  vectorization state --- //
+
+  /// Holds the integer induction variable. This is the counter of the
+  /// loop.
+  PHINode *Induction;
+  /// Holds the reduction variables.
+  ReductionList Reductions;
+  /// Holds all of the induction variables that we found in the loop.
+  /// Notice that inductions don't need to start at zero and that induction
+  /// variables can be pointers.
+  InductionList Inductions;
+  /// Holds the widest induction type encountered.
+  Type *WidestIndTy;
+
+  /// Allowed outside users. This holds the reduction
+  /// vars which can be accessed from outside the loop.
+  SmallPtrSet<Value*, 4> AllowedExit;
+  /// This set holds the variables which are known to be uniform after
+  /// vectorization.
+  SmallPtrSet<Instruction*, 4> Uniforms;
+
+  /// Can we assume the absence of NaNs.
+  bool HasFunNoNaNAttr;
+
+  /// Vectorization requirements that will go through late-evaluation.
+  LoopVectorizationRequirements *Requirements;
+
+  /// Used to emit an analysis of any legality issues.
+  const LoopVectorizeHints *Hints;
+
+  ValueToValueMap Strides;
+  SmallPtrSet<Value *, 8> StrideSet;
+
+  /// While vectorizing these instructions we have to generate a
+  /// call to the appropriate masked intrinsic
+  SmallPtrSet<const Instruction *, 8> MaskedOp;
+};
+
+/// LoopVectorizationCostModel - estimates the expected speedups due to
+/// vectorization.
+/// In many cases vectorization is not profitable. This can happen because of
+/// a number of reasons. In this class we mainly attempt to predict the
+/// expected speedup/slowdowns due to the supported instruction set. We use the
+/// TargetTransformInfo to query the different backends for the cost of
+/// different operations.
+class LoopVectorizationCostModel {
+public:
+  LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
+                             LoopInfo *LI, LoopVectorizationLegality *Legal,
+                             const TargetTransformInfo &TTI,
+                             const TargetLibraryInfo *TLI, DemandedBits *DB,
+                             AssumptionCache *AC, const Function *F,
+                             const LoopVectorizeHints *Hints)
+      : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
+        AC(AC), TheFunction(F), Hints(Hints) {}
+
+  /// Information about vectorization costs
+  struct VectorizationFactor {
+    unsigned Width; // Vector width with best cost
+    unsigned Cost; // Cost of the loop with that width
+  };
+  /// \return The most profitable vectorization factor and the cost of that VF.
+  /// This method checks every power of two up to VF. If UserVF is not ZERO
+  /// then this vectorization factor will be selected if vectorization is
+  /// possible.
+  VectorizationFactor selectVectorizationFactor(bool OptForSize);
+
+  /// \return The size (in bits) of the smallest and widest types in the code
+  /// that needs to be vectorized. We ignore values that remain scalar such as
+  /// 64 bit loop indices.
+  std::pair<unsigned, unsigned> getSmallestAndWidestTypes();
+
+  /// \return The desired interleave count.
+  /// If interleave count has been specified by metadata it will be returned.
+  /// Otherwise, the interleave count is computed and returned. VF and LoopCost
+  /// are the selected vectorization factor and the cost of the selected VF.
+  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
+                                 unsigned LoopCost);
+
+  /// \return The most profitable unroll factor.
+  /// This method finds the best unroll-factor based on register pressure and
+  /// other parameters. VF and LoopCost are the selected vectorization factor
+  /// and the cost of the selected VF.
+  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
+                                  unsigned LoopCost);
+
+  /// \brief A struct that represents some properties of the register usage
+  /// of a loop.
+  struct RegisterUsage {
+    /// Holds the number of loop invariant values that are used in the loop.
+    unsigned LoopInvariantRegs;
+    /// Holds the maximum number of concurrent live intervals in the loop.
+    unsigned MaxLocalUsers;
+    /// Holds the number of instructions in the loop.
+    unsigned NumInstructions;
+  };
+
+  /// \return Returns information about the register usages of the loop for the
+  /// given vectorization factors.
+  SmallVector<RegisterUsage, 8>
+  calculateRegisterUsage(const SmallVector<unsigned, 8> &VFs);
+
+  /// Collect values we want to ignore in the cost model.
+  void collectValuesToIgnore();
+
+private:
+  /// Returns the expected execution cost. The unit of the cost does
+  /// not matter because we use the 'cost' units to compare different
+  /// vector widths. The cost that is returned is *not* normalized by
+  /// the factor width.
+  unsigned expectedCost(unsigned VF);
+
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  unsigned getInstructionCost(Instruction *I, unsigned VF);
+
+  /// Returns whether the instruction is a load or store and will be a emitted
+  /// as a vector operation.
+  bool isConsecutiveLoadOrStore(Instruction *I);
+
+  /// Report an analysis message to assist the user in diagnosing loops that are
+  /// not vectorized.  These are handled as LoopAccessReport rather than
+  /// VectorizationReport because the << operator of VectorizationReport returns
+  /// LoopAccessReport.
+  void emitAnalysis(const LoopAccessReport &Message) const {
+    emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
+  }
+
+public:
+  /// Map of scalar integer values to the smallest bitwidth they can be legally
+  /// represented as. The vector equivalents of these values should be truncated
+  /// to this type.
+  MapVector<Instruction*,uint64_t> MinBWs;
+
+  /// The loop that we evaluate.
+  Loop *TheLoop;
+  /// Predicated scalar evolution analysis.
+  PredicatedScalarEvolution &PSE;
+  /// Loop Info analysis.
+  LoopInfo *LI;
+  /// Vectorization legality.
+  LoopVectorizationLegality *Legal;
+  /// Vector target information.
+  const TargetTransformInfo &TTI;
+  /// Target Library Info.
+  const TargetLibraryInfo *TLI;
+  /// Demanded bits analysis.
+  DemandedBits *DB;
+  /// Assumption cache.
+  AssumptionCache *AC;
+  const Function *TheFunction;
+  /// Loop Vectorize Hint.
+  const LoopVectorizeHints *Hints;
+  /// Values to ignore in the cost model.
+  SmallPtrSet<const Value *, 16> ValuesToIgnore;
+  /// Values to ignore in the cost model when VF > 1.
+  SmallPtrSet<const Value *, 16> VecValuesToIgnore;
+};
+
+/// \brief This holds vectorization requirements that must be verified late in
+/// the process. The requirements are set by legalize and costmodel. Once
+/// vectorization has been determined to be possible and profitable the
+/// requirements can be verified by looking for metadata or compiler options.
+/// For example, some loops require FP commutativity which is only allowed if
+/// vectorization is explicitly specified or if the fast-math compiler option
+/// has been provided.
+/// Late evaluation of these requirements allows helpful diagnostics to be
+/// composed that tells the user what need to be done to vectorize the loop. For
+/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
+/// evaluation should be used only when diagnostics can generated that can be
+/// followed by a non-expert user.
+class LoopVectorizationRequirements {
+public:
+  LoopVectorizationRequirements()
+      : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {}
+
+  void addUnsafeAlgebraInst(Instruction *I) {
+    // First unsafe algebra instruction.
+    if (!UnsafeAlgebraInst)
+      UnsafeAlgebraInst = I;
+  }
+
+  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
+
+  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+    const char *Name = Hints.vectorizeAnalysisPassName();
+    bool Failed = false;
+    if (UnsafeAlgebraInst && !Hints.allowReordering()) {
+      emitOptimizationRemarkAnalysisFPCommute(
+          F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
+          VectorizationReport() << "cannot prove it is safe to reorder "
+                                   "floating-point operations");
+      Failed = true;
+    }
+
+    // Test if runtime memcheck thresholds are exceeded.
+    bool PragmaThresholdReached =
+        NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
+    bool ThresholdReached =
+        NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
+    if ((ThresholdReached && !Hints.allowReordering()) ||
+        PragmaThresholdReached) {
+      emitOptimizationRemarkAnalysisAliasing(
+          F->getContext(), Name, *F, L->getStartLoc(),
+          VectorizationReport()
+              << "cannot prove it is safe to reorder memory operations");
+      DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
+      Failed = true;
+    }
+
+    return Failed;
+  }
+
+private:
+  unsigned NumRuntimePointerChecks;
+  Instruction *UnsafeAlgebraInst;
+};
+
 static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
   if (L.empty())
     return V.push_back(&L);
@@ -1441,6 +1606,7 @@ struct LoopVectorize : public FunctionPass {
   DominatorTree *DT;
   BlockFrequencyInfo *BFI;
   TargetLibraryInfo *TLI;
+  DemandedBits *DB;
   AliasAnalysis *AA;
   AssumptionCache *AC;
   LoopAccessAnalysis *LAA;
@@ -1450,16 +1616,17 @@ struct LoopVectorize : public FunctionPass {
   BlockFrequency ColdEntryFreq;
 
   bool runOnFunction(Function &F) override {
-    SE = &getAnalysis<ScalarEvolution>();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-    BFI = &getAnalysis<BlockFrequencyInfo>();
+    BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
     TLI = TLIP ? &TLIP->getTLI() : nullptr;
-    AA = &getAnalysis<AliasAnalysis>();
+    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     LAA = &getAnalysis<LoopAccessAnalysis>();
+    DB = &getAnalysis<DemandedBits>();
 
     // Compute some weights outside of the loop over the loops. Compute this
     // using a BranchProbability to re-use its scaling math.
@@ -1562,26 +1729,8 @@ struct LoopVectorize : public FunctionPass {
     // less verbose reporting vectorized loops and unvectorized loops that may
     // benefit from vectorization, respectively.
 
-    if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
-      DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
-      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
-                                     L->getStartLoc(), Hints.emitRemark());
-      return false;
-    }
-
-    if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
-      DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
-      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
-                                     L->getStartLoc(), Hints.emitRemark());
-      return false;
-    }
-
-    if (Hints.getWidth() == 1 && Hints.getInterleave() == 1) {
-      DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
-      emitOptimizationRemarkAnalysis(
-          F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-          "loop not vectorized: vector width and interleave count are "
-          "explicitly set to 1");
+    if (!Hints.allowVectorization(F, L, AlwaysVectorize)) {
+      DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n");
       return false;
     }
 
@@ -1595,15 +1744,19 @@ struct LoopVectorize : public FunctionPass {
         DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
       else {
         DEBUG(dbgs() << "\n");
-        emitOptimizationRemarkAnalysis(
-            F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-            "vectorization is not beneficial and is not explicitly forced");
+        emitAnalysisDiag(F, L, Hints, VectorizationReport()
+                                          << "vectorization is not beneficial "
+                                             "and is not explicitly forced");
         return false;
       }
     }
 
+    PredicatedScalarEvolution PSE(*SE);
+
     // Check if it is legal to vectorize the loop.
-    LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA);
+    LoopVectorizationRequirements Requirements;
+    LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA,
+                                  &Requirements, &Hints);
     if (!LVL.canVectorize()) {
       DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
       emitMissedWarning(F, L, Hints);
@@ -1611,16 +1764,18 @@ struct LoopVectorize : public FunctionPass {
     }
 
     // Use the cost model.
-    LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints);
+    LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
+                                  &Hints);
+    CM.collectValuesToIgnore();
 
     // Check the function attributes to find out if this function should be
     // optimized for size.
     bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
-                      F->hasFnAttribute(Attribute::OptimizeForSize);
+                      F->optForSize();
 
     // Compute the weighted frequency of this loop being executed and see if it
     // is less than 20% of the function entry baseline frequency. Note that we
-    // always have a canonical loop here because we think we *can* vectoriez.
+    // always have a canonical loop here because we think we *can* vectorize.
     // FIXME: This is hidden behind a flag due to pervasive problems with
     // exactly what block frequency models.
     if (LoopVectorizeWithBlockFrequency) {
@@ -1630,16 +1785,17 @@ struct LoopVectorize : public FunctionPass {
         OptForSize = true;
     }
 
-    // Check the function attributes to see if implicit floats are allowed.a
+    // Check the function attributes to see if implicit floats are allowed.
     // FIXME: This check doesn't seem possibly correct -- what if the loop is
     // an integer loop and the vector instructions selected are purely integer
     // vector instructions?
     if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
       DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
             "attribute is used.\n");
-      emitOptimizationRemarkAnalysis(
-          F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-          "loop not vectorized due to NoImplicitFloat attribute");
+      emitAnalysisDiag(
+          F, L, Hints,
+          VectorizationReport()
+              << "loop not vectorized due to NoImplicitFloat attribute");
       emitMissedWarning(F, L, Hints);
       return false;
     }
@@ -1651,32 +1807,86 @@ struct LoopVectorize : public FunctionPass {
     // Select the interleave count.
     unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost);
 
-    DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
-                 << DebugLocStr << '\n');
-    DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+    // Get user interleave count.
+    unsigned UserIC = Hints.getInterleave();
+
+    // Identify the diagnostic messages that should be produced.
+    std::string VecDiagMsg, IntDiagMsg;
+    bool VectorizeLoop = true, InterleaveLoop = true;
+
+    if (Requirements.doesNotMeet(F, L, Hints)) {
+      DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
+                      "requirements.\n");
+      emitMissedWarning(F, L, Hints);
+      return false;
+    }
 
     if (VF.Width == 1) {
-      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n");
+      DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
+      VecDiagMsg =
+          "the cost-model indicates that vectorization is not beneficial";
+      VectorizeLoop = false;
+    }
 
-      if (IC == 1) {
-        emitOptimizationRemarkAnalysis(
-            F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-            "not beneficial to vectorize and user disabled interleaving");
-        return false;
-      }
-      DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
+    if (IC == 1 && UserIC <= 1) {
+      // Tell the user interleaving is not beneficial.
+      DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
+      IntDiagMsg =
+          "the cost-model indicates that interleaving is not beneficial";
+      InterleaveLoop = false;
+      if (UserIC == 1)
+        IntDiagMsg +=
+            " and is explicitly disabled or interleave count is set to 1";
+    } else if (IC > 1 && UserIC == 1) {
+      // Tell the user interleaving is beneficial, but it explicitly disabled.
+      DEBUG(dbgs()
+            << "LV: Interleaving is beneficial but is explicitly disabled.");
+      IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
+                   "but is explicitly disabled or interleave count is set to 1";
+      InterleaveLoop = false;
+    }
 
-      // Report the unrolling decision.
-      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
-                             Twine("interleaved by " + Twine(IC) +
-                                   " (vectorization not beneficial)"));
+    // Override IC if user provided an interleave count.
+    IC = UserIC > 0 ? UserIC : IC;
 
-      InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC);
-      Unroller.vectorize(&LVL);
+    // Emit diagnostic messages, if any.
+    const char *VAPassName = Hints.vectorizeAnalysisPassName();
+    if (!VectorizeLoop && !InterleaveLoop) {
+      // Do not vectorize or interleaving the loop.
+      emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+                                     L->getStartLoc(), VecDiagMsg);
+      emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+                                     L->getStartLoc(), IntDiagMsg);
+      return false;
+    } else if (!VectorizeLoop && InterleaveLoop) {
+      DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+      emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
+                                     L->getStartLoc(), VecDiagMsg);
+    } else if (VectorizeLoop && !InterleaveLoop) {
+      DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+                   << DebugLocStr << '\n');
+      emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
+                                     L->getStartLoc(), IntDiagMsg);
+    } else if (VectorizeLoop && InterleaveLoop) {
+      DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+                   << DebugLocStr << '\n');
+      DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
+    }
+
+    if (!VectorizeLoop) {
+      assert(IC > 1 && "interleave count should not be 1 or 0");
+      // If we decided that it is not legal to vectorize the loop then
+      // interleave it.
+      InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC);
+      Unroller.vectorize(&LVL, CM.MinBWs);
+
+      emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
+                             Twine("interleaved loop (interleaved count: ") +
+                                 Twine(IC) + ")");
     } else {
       // If we decided that it is *legal* to vectorize the loop then do it.
-      InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC);
-      LB.vectorize(&LVL);
+      InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC);
+      LB.vectorize(&LVL, CM.MinBWs);
       ++LoopsVectorized;
 
       // Add metadata to disable runtime unrolling scalar loop when there's no
@@ -1686,7 +1896,7 @@ struct LoopVectorize : public FunctionPass {
         AddRuntimeUnrollDisableMetaData(L);
 
       // Report the vectorization decision.
-      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+      emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
                              Twine("vectorized loop (vectorization width: ") +
                                  Twine(VF.Width) + ", interleaved count: " +
                                  Twine(IC) + ")");
@@ -1703,16 +1913,19 @@ struct LoopVectorize : public FunctionPass {
     AU.addRequired<AssumptionCacheTracker>();
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequiredID(LCSSAID);
-    AU.addRequired<BlockFrequencyInfo>();
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
-    AU.addRequired<ScalarEvolution>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<LoopAccessAnalysis>();
+    AU.addRequired<DemandedBits>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
-    AU.addPreserved<AliasAnalysis>();
+    AU.addPreserved<BasicAAWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
   }
 
 };
@@ -1773,6 +1986,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
 
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
+  auto *SE = PSE.getSE();
   // Make sure that the pointer does not point to structs.
   if (Ptr->getType()->getPointerElementType()->isAggregateType())
     return 0;
@@ -1780,11 +1994,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   // If this value is a pointer induction variable we know it is consecutive.
   PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
   if (Phi && Inductions.count(Phi)) {
-    InductionInfo II = Inductions[Phi];
+    InductionDescriptor II = Inductions[Phi];
     return II.getConsecutiveDirection();
   }
 
-  GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
+  GetElementPtrInst *Gep = getGEPInstruction(Ptr);
   if (!Gep)
     return 0;
 
@@ -1802,10 +2016,10 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
 
     // Make sure that all of the index operands are loop invariant.
     for (unsigned i = 1; i < NumOperands; ++i)
-      if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+      if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
         return 0;
 
-    InductionInfo II = Inductions[Phi];
+    InductionDescriptor II = Inductions[Phi];
     return II.getConsecutiveDirection();
   }
 
@@ -1815,14 +2029,14 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   // operand.
   for (unsigned i = 0; i != NumOperands; ++i)
     if (i != InductionOperand &&
-        !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop))
+        !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
       return 0;
 
   // We can emit wide load/stores only if the last non-zero index is the
   // induction variable.
   const SCEV *Last = nullptr;
   if (!Strides.count(Gep))
-    Last = SE->getSCEV(Gep->getOperand(InductionOperand));
+    Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
   else {
     // Because of the multiplication by a stride we can have a s/zext cast.
     // We are going to replace this stride by 1 so the cast is safe to ignore.
@@ -1833,7 +2047,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
     //  %idxprom = zext i32 %mul to i64  << Safe cast.
     //  %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
     //
-    Last = replaceSymbolicStrideSCEV(SE, Strides,
+    Last = replaceSymbolicStrideSCEV(PSE, Strides,
                                      Gep->getOperand(InductionOperand), Gep);
     if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
       Last =
@@ -2177,7 +2391,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   VectorParts &Entry = WidenMap.get(Instr);
 
   // Handle consecutive loads/stores.
-  GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(Ptr);
+  GetElementPtrInst *Gep = getGEPInstruction(Ptr);
   if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
     setDebugLocFromInst(Builder, Gep);
     Value *PtrOperand = Gep->getPointerOperand();
@@ -2191,8 +2405,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     Ptr = Builder.Insert(Gep2);
   } else if (Gep) {
     setDebugLocFromInst(Builder, Gep);
-    assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()),
-                               OrigLoop) && "Base ptr must be invariant");
+    assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
+                                        OrigLoop) &&
+           "Base ptr must be invariant");
 
     // The last index does not have to be the induction. It can be
     // consecutive and be a function of the index. For example A[I+1];
@@ -2209,7 +2424,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
       if (i == InductionOperand ||
           (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
         assert((i == InductionOperand ||
-               SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) &&
+                PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
+                                             OrigLoop)) &&
                "Must be last index or loop invariant");
 
         VectorParts &GEPParts = getVectorValue(GepOperand);
@@ -2237,14 +2453,14 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     // We don't want to update the value in the map as it might be used in
     // another expression. So don't use a reference type for "StoredVal".
     VectorParts StoredVal = getVectorValue(SI->getValueOperand());
-    
+
     for (unsigned Part = 0; Part < UF; ++Part) {
       // Calculate the pointer for the specific unroll-part.
       Value *PartPtr =
           Builder.CreateGEP(nullptr, Ptr, Builder.getInt32(Part * VF));
 
       if (Reverse) {
-        // If we store to reverse consecutive memory locations then we need
+        // If we store to reverse consecutive memory locations, then we need
         // to reverse the order of elements in the stored value.
         StoredVal[Part] = reverseVector(StoredVal[Part]);
         // If the address is consecutive but reversed, then the
@@ -2298,7 +2514,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   }
 }
 
-void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredicateStore) {
+void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
+                                               bool IfPredicateStore) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
   // Holds vector parameters or scalars, in case of uniform vals.
   SmallVector<VectorParts, 4> Params;
@@ -2318,7 +2535,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
     // Try using previously calculated values.
     Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
 
-    // If the src is an instruction that appeared earlier in the basic block
+    // If the src is an instruction that appeared earlier in the basic block,
     // then it should already be vectorized.
     if (SrcInst && OrigLoop->contains(SrcInst)) {
       assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
@@ -2343,19 +2560,12 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
   // Create a new entry in the WidenMap and initialize it to Undef or Null.
   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
-  Instruction *InsertPt = Builder.GetInsertPoint();
-  BasicBlock *IfBlock = Builder.GetInsertBlock();
-  BasicBlock *CondBlock = nullptr;
-
   VectorParts Cond;
-  Loop *VectorLp = nullptr;
   if (IfPredicateStore) {
     assert(Instr->getParent()->getSinglePredecessor() &&
            "Only support single predecessor blocks");
     Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
                           Instr->getParent());
-    VectorLp = LI->getLoopFor(IfBlock);
-    assert(VectorLp && "Must have a loop for this block");
   }
 
   // For each vector unroll 'part':
@@ -2367,12 +2577,8 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
       Value *Cmp = nullptr;
       if (IfPredicateStore) {
         Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
-        Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
-        CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
-        LoopVectorBody.push_back(CondBlock);
-        VectorLp->addBasicBlockToLoop(CondBlock, *LI);
-        // Update Builder with newly created basic block.
-        Builder.SetInsertPoint(InsertPt);
+        Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
+                                 ConstantInt::get(Cmp->getType(), 1));
       }
 
       Instruction *Cloned = Instr->clone();
@@ -2396,85 +2602,223 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
         VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
                                                        Builder.getInt32(Width));
       // End if-block.
-      if (IfPredicateStore) {
-         BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
-         LoopVectorBody.push_back(NewIfBlock);
-         VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
-         Builder.SetInsertPoint(InsertPt);
-         ReplaceInstWithInst(IfBlock->getTerminator(),
-                             BranchInst::Create(CondBlock, NewIfBlock, Cmp));
-         IfBlock = NewIfBlock;
-      }
+      if (IfPredicateStore)
+        PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+                                                  Cmp));
     }
   }
 }
 
-static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
-                                 Instruction *Loc) {
-  if (FirstInst)
-    return FirstInst;
-  if (Instruction *I = dyn_cast<Instruction>(V))
-    return I->getParent() == Loc->getParent() ? I : nullptr;
-  return nullptr;
+PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
+                                                      Value *End, Value *Step,
+                                                      Instruction *DL) {
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  // As we're just creating this loop, it's possible no latch exists
+  // yet. If so, use the header as this will be a single block loop.
+  if (!Latch)
+    Latch = Header;
+
+  IRBuilder<> Builder(&*Header->getFirstInsertionPt());
+  setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
+  auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index");
+
+  Builder.SetInsertPoint(Latch->getTerminator());
+  
+  // Create i+1 and fill the PHINode.
+  Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
+  Induction->addIncoming(Start, L->getLoopPreheader());
+  Induction->addIncoming(Next, Latch);
+  // Create the compare.
+  Value *ICmp = Builder.CreateICmpEQ(Next, End);
+  Builder.CreateCondBr(ICmp, L->getExitBlock(), Header);
+  
+  // Now we have two terminators. Remove the old one from the block.
+  Latch->getTerminator()->eraseFromParent();
+
+  return Induction;
 }
 
-std::pair<Instruction *, Instruction *>
-InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
-  Instruction *tnullptr = nullptr;
-  if (!Legal->mustCheckStrides())
-    return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
+Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
+  if (TripCount)
+    return TripCount;
 
-  IRBuilder<> ChkBuilder(Loc);
+  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+  // Find the loop boundaries.
+  ScalarEvolution *SE = PSE.getSE();
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop);
+  assert(BackedgeTakenCount != SE->getCouldNotCompute() &&
+         "Invalid loop count");
 
-  // Emit checks.
-  Value *Check = nullptr;
-  Instruction *FirstInst = nullptr;
-  for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
-                                         SE = Legal->strides_end();
-       SI != SE; ++SI) {
-    Value *Ptr = stripIntegerCast(*SI);
-    Value *C = ChkBuilder.CreateICmpNE(Ptr, ConstantInt::get(Ptr->getType(), 1),
-                                       "stride.chk");
-    // Store the first instruction we create.
-    FirstInst = getFirstInst(FirstInst, C, Loc);
-    if (Check)
-      Check = ChkBuilder.CreateOr(Check, C);
-    else
-      Check = C;
-  }
+  Type *IdxTy = Legal->getWidestInductionType();
+  
+  // The exit count might have the type of i64 while the phi is i32. This can
+  // happen if we have an induction variable that is sign extended before the
+  // compare. The only way that we get a backedge taken count is that the
+  // induction variable was signed and as such will not overflow. In such a case
+  // truncation is legal.
+  if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() >
+      IdxTy->getPrimitiveSizeInBits())
+    BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy);
+  BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy);
+  
+  // Get the total trip count from the count by adding 1.
+  const SCEV *ExitCount = SE->getAddExpr(
+      BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType()));
 
-  // We have to do this trickery because the IRBuilder might fold the check to a
-  // constant expression in which case there is no Instruction anchored in a
-  // the block.
-  LLVMContext &Ctx = Loc->getContext();
-  Instruction *TheCheck =
-      BinaryOperator::CreateAnd(Check, ConstantInt::getTrue(Ctx));
-  ChkBuilder.Insert(TheCheck, "stride.not.one");
-  FirstInst = getFirstInst(FirstInst, TheCheck, Loc);
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
 
-  return std::make_pair(FirstInst, TheCheck);
+  // Expand the trip count and place the new instructions in the preheader.
+  // Notice that the pre-header does not change, only the loop body.
+  SCEVExpander Exp(*SE, DL, "induction");
+
+  // Count holds the overall loop count (N).
+  TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
+                                L->getLoopPreheader()->getTerminator());
+
+  if (TripCount->getType()->isPointerTy())
+    TripCount =
+      CastInst::CreatePointerCast(TripCount, IdxTy,
+                                  "exitcount.ptrcnt.to.int",
+                                  L->getLoopPreheader()->getTerminator());
+
+  return TripCount;
 }
 
+Value *InnerLoopVectorizer::getOrCreateVectorTripCount(Loop *L) {
+  if (VectorTripCount)
+    return VectorTripCount;
+  
+  Value *TC = getOrCreateTripCount(L);
+  IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
+  
+  // Now we need to generate the expression for N - (N % VF), which is
+  // the part that the vectorized body will execute.
+  // The loop step is equal to the vectorization factor (num of SIMD elements)
+  // times the unroll factor (num of SIMD instructions).
+  Constant *Step = ConstantInt::get(TC->getType(), VF * UF);
+  Value *R = Builder.CreateURem(TC, Step, "n.mod.vf");
+  VectorTripCount = Builder.CreateSub(TC, R, "n.vec");
+
+  return VectorTripCount;
+}
+
+void InnerLoopVectorizer::emitMinimumIterationCountCheck(Loop *L,
+                                                         BasicBlock *Bypass) {
+  Value *Count = getOrCreateTripCount(L);
+  BasicBlock *BB = L->getLoopPreheader();
+  IRBuilder<> Builder(BB->getTerminator());
+
+  // Generate code to check that the loop's trip count that we computed by
+  // adding one to the backedge-taken count will not overflow.
+  Value *CheckMinIters =
+    Builder.CreateICmpULT(Count,
+                          ConstantInt::get(Count->getType(), VF * UF),
+                          "min.iters.check");
+  
+  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+                                          "min.iters.checked");
+  if (L->getParentLoop())
+    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+  ReplaceInstWithInst(BB->getTerminator(),
+                      BranchInst::Create(Bypass, NewBB, CheckMinIters));
+  LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L,
+                                                     BasicBlock *Bypass) {
+  Value *TC = getOrCreateVectorTripCount(L);
+  BasicBlock *BB = L->getLoopPreheader();
+  IRBuilder<> Builder(BB->getTerminator());
+  
+  // Now, compare the new count to zero. If it is zero skip the vector loop and
+  // jump to the scalar loop.
+  Value *Cmp = Builder.CreateICmpEQ(TC, Constant::getNullValue(TC->getType()),
+                                    "cmp.zero");
+
+  // Generate code to check that the loop's trip count that we computed by
+  // adding one to the backedge-taken count will not overflow.
+  BasicBlock *NewBB = BB->splitBasicBlock(BB->getTerminator(),
+                                          "vector.ph");
+  if (L->getParentLoop())
+    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+  ReplaceInstWithInst(BB->getTerminator(),
+                      BranchInst::Create(Bypass, NewBB, Cmp));
+  LoopBypassBlocks.push_back(BB);
+}
+
+void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+  BasicBlock *BB = L->getLoopPreheader();
+
+  // Generate the code to check that the SCEV assumptions that we made.
+  // We want the new basic block to start at the first instruction in a
+  // sequence of instructions that form a check.
+  SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
+                   "scev.check");
+  Value *SCEVCheck =
+      Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator());
+
+  if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
+    if (C->isZero())
+      return;
+
+  // Create a new block containing the stride check.
+  BB->setName("vector.scevcheck");
+  auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+  if (L->getParentLoop())
+    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+  ReplaceInstWithInst(BB->getTerminator(),
+                      BranchInst::Create(Bypass, NewBB, SCEVCheck));
+  LoopBypassBlocks.push_back(BB);
+  AddedSafetyChecks = true;
+}
+
+void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
+                                               BasicBlock *Bypass) {
+  BasicBlock *BB = L->getLoopPreheader();
+
+  // Generate the code that checks in runtime if arrays overlap. We put the
+  // checks into a separate block to make the more common case of few elements
+  // faster.
+  Instruction *FirstCheckInst;
+  Instruction *MemRuntimeCheck;
+  std::tie(FirstCheckInst, MemRuntimeCheck) =
+      Legal->getLAI()->addRuntimeChecks(BB->getTerminator());
+  if (!MemRuntimeCheck)
+    return;
+
+  // Create a new block containing the memory check.
+  BB->setName("vector.memcheck");
+  auto *NewBB = BB->splitBasicBlock(BB->getTerminator(), "vector.ph");
+  if (L->getParentLoop())
+    L->getParentLoop()->addBasicBlockToLoop(NewBB, *LI);
+  ReplaceInstWithInst(BB->getTerminator(),
+                      BranchInst::Create(Bypass, NewBB, MemRuntimeCheck));
+  LoopBypassBlocks.push_back(BB);
+  AddedSafetyChecks = true;
+}
+
+
 void InnerLoopVectorizer::createEmptyLoop() {
   /*
    In this function we generate a new loop. The new loop will contain
    the vectorized instructions while the old loop will continue to run the
    scalar remainder.
 
-       [ ] <-- Back-edge taken count overflow check.
+       [ ] <-- loop iteration number check.
     /   |
    /    v
   |    [ ] <-- vector loop bypass (may consist of multiple blocks).
   |  /  |
   | /   v
   ||   [ ]     <-- vector pre header.
-  ||    |
-  ||    v
-  ||   [  ] \
-  ||   [  ]_|   <-- vector loop.
-  ||    |
-  | \   v
-  |   >[ ]   <--- middle-block.
+  |/    |
+  |     v
+  |    [  ] \
+  |    [  ]_|   <-- vector loop.
+  |     |
+  |     v
+  |   -[ ]   <--- middle-block.
   |  /  |
   | /   v
   -|- >[ ]     <--- new preheader.
@@ -2498,65 +2842,16 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // don't. One example is c++ iterators that often have multiple pointer
   // induction variables. In the code below we also support a case where we
   // don't have a single induction variable.
+  //
+  // We try to obtain an induction variable from the original loop as hard
+  // as possible. However if we don't find one that:
+  //   - is an integer
+  //   - counts from zero, stepping by one
+  //   - is the size of the widest induction variable type
+  // then we create a new one.
   OldInduction = Legal->getInduction();
   Type *IdxTy = Legal->getWidestInductionType();
 
-  // Find the loop boundaries.
-  const SCEV *ExitCount = SE->getBackedgeTakenCount(OrigLoop);
-  assert(ExitCount != SE->getCouldNotCompute() && "Invalid loop count");
-
-  // The exit count might have the type of i64 while the phi is i32. This can
-  // happen if we have an induction variable that is sign extended before the
-  // compare. The only way that we get a backedge taken count is that the
-  // induction variable was signed and as such will not overflow. In such a case
-  // truncation is legal.
-  if (ExitCount->getType()->getPrimitiveSizeInBits() >
-      IdxTy->getPrimitiveSizeInBits())
-    ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy);
-
-  const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy);
-  // Get the total trip count from the count by adding 1.
-  ExitCount = SE->getAddExpr(BackedgeTakeCount,
-                             SE->getConstant(BackedgeTakeCount->getType(), 1));
-
-  const DataLayout &DL = OldBasicBlock->getModule()->getDataLayout();
-
-  // Expand the trip count and place the new instructions in the preheader.
-  // Notice that the pre-header does not change, only the loop body.
-  SCEVExpander Exp(*SE, DL, "induction");
-
-  // We need to test whether the backedge-taken count is uint##_max. Adding one
-  // to it will cause overflow and an incorrect loop trip count in the vector
-  // body. In case of overflow we want to directly jump to the scalar remainder
-  // loop.
-  Value *BackedgeCount =
-      Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(),
-                        VectorPH->getTerminator());
-  if (BackedgeCount->getType()->isPointerTy())
-    BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy,
-                                                "backedge.ptrcnt.to.int",
-                                                VectorPH->getTerminator());
-  Instruction *CheckBCOverflow =
-      CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount,
-                      Constant::getAllOnesValue(BackedgeCount->getType()),
-                      "backedge.overflow", VectorPH->getTerminator());
-
-  // The loop index does not have to start at Zero. Find the original start
-  // value from the induction PHI node. If we don't have an induction variable
-  // then we know that it starts at zero.
-  Builder.SetInsertPoint(VectorPH->getTerminator());
-  Value *StartIdx = ExtendedIdx =
-      OldInduction
-          ? Builder.CreateZExt(OldInduction->getIncomingValueForBlock(VectorPH),
-                               IdxTy)
-          : ConstantInt::get(IdxTy, 0);
-
-  // Count holds the overall loop count (N).
-  Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
-                                   VectorPH->getTerminator());
-
-  LoopBypassBlocks.push_back(VectorPH);
-
   // Split the single block loop into the two loop structure described above.
   BasicBlock *VecBody =
       VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.body");
@@ -2580,118 +2875,36 @@ void InnerLoopVectorizer::createEmptyLoop() {
   }
   Lp->addBasicBlockToLoop(VecBody, *LI);
 
-  // Use this IR builder to create the loop instructions (Phi, Br, Cmp)
-  // inside the loop.
-  Builder.SetInsertPoint(VecBody->getFirstNonPHI());
+  // Find the loop boundaries.
+  Value *Count = getOrCreateTripCount(Lp);
 
-  // Generate the induction variable.
-  setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
-  Induction = Builder.CreatePHI(IdxTy, 2, "index");
-  // The loop step is equal to the vectorization factor (num of SIMD elements)
-  // times the unroll factor (num of SIMD instructions).
-  Constant *Step = ConstantInt::get(IdxTy, VF * UF);
-
-  // Generate code to check that the loop's trip count that we computed by
-  // adding one to the backedge-taken count will not overflow.
-  BasicBlock *NewVectorPH =
-      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "overflow.checked");
-  if (ParentLoop)
-    ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
-  ReplaceInstWithInst(
-      VectorPH->getTerminator(),
-      BranchInst::Create(ScalarPH, NewVectorPH, CheckBCOverflow));
-  VectorPH = NewVectorPH;
-
-  // This is the IR builder that we use to add all of the logic for bypassing
-  // the new vector loop.
-  IRBuilder<> BypassBuilder(VectorPH->getTerminator());
-  setDebugLocFromInst(BypassBuilder,
-                      getDebugLocFromInstOrOperands(OldInduction));
-
-  // We may need to extend the index in case there is a type mismatch.
-  // We know that the count starts at zero and does not overflow.
-  if (Count->getType() != IdxTy) {
-    // The exit count can be of pointer type. Convert it to the correct
-    // integer type.
-    if (ExitCount->getType()->isPointerTy())
-      Count = BypassBuilder.CreatePointerCast(Count, IdxTy, "ptrcnt.to.int");
-    else
-      Count = BypassBuilder.CreateZExtOrTrunc(Count, IdxTy, "cnt.cast");
-  }
-
-  // Add the start index to the loop count to get the new end index.
-  Value *IdxEnd = BypassBuilder.CreateAdd(Count, StartIdx, "end.idx");
-
-  // Now we need to generate the expression for N - (N % VF), which is
-  // the part that the vectorized body will execute.
-  Value *R = BypassBuilder.CreateURem(Count, Step, "n.mod.vf");
-  Value *CountRoundDown = BypassBuilder.CreateSub(Count, R, "n.vec");
-  Value *IdxEndRoundDown = BypassBuilder.CreateAdd(CountRoundDown, StartIdx,
-                                                     "end.idx.rnd.down");
+  Value *StartIdx = ConstantInt::get(IdxTy, 0);
 
+  // We need to test whether the backedge-taken count is uint##_max. Adding one
+  // to it will cause overflow and an incorrect loop trip count in the vector
+  // body. In case of overflow we want to directly jump to the scalar remainder
+  // loop.
+  emitMinimumIterationCountCheck(Lp, ScalarPH);
   // Now, compare the new count to zero. If it is zero skip the vector loop and
   // jump to the scalar loop.
-  Value *Cmp =
-      BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero");
-  NewVectorPH =
-      VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
-  if (ParentLoop)
-    ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
-  LoopBypassBlocks.push_back(VectorPH);
-  ReplaceInstWithInst(VectorPH->getTerminator(),
-                      BranchInst::Create(MiddleBlock, NewVectorPH, Cmp));
-  VectorPH = NewVectorPH;
-
-  // Generate the code to check that the strides we assumed to be one are really
-  // one. We want the new basic block to start at the first instruction in a
-  // sequence of instructions that form a check.
-  Instruction *StrideCheck;
-  Instruction *FirstCheckInst;
-  std::tie(FirstCheckInst, StrideCheck) =
-      addStrideCheck(VectorPH->getTerminator());
-  if (StrideCheck) {
-    AddedSafetyChecks = true;
-    // Create a new block containing the stride check.
-    VectorPH->setName("vector.stridecheck");
-    NewVectorPH =
-        VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
-    if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
-    LoopBypassBlocks.push_back(VectorPH);
-
-    // Replace the branch into the memory check block with a conditional branch
-    // for the "few elements case".
-    ReplaceInstWithInst(
-        VectorPH->getTerminator(),
-        BranchInst::Create(MiddleBlock, NewVectorPH, StrideCheck));
-
-    VectorPH = NewVectorPH;
-  }
+  emitVectorLoopEnteredCheck(Lp, ScalarPH);
+  // Generate the code to check any assumptions that we've made for SCEV
+  // expressions.
+  emitSCEVChecks(Lp, ScalarPH);
 
   // Generate the code that checks in runtime if arrays overlap. We put the
   // checks into a separate block to make the more common case of few elements
   // faster.
-  Instruction *MemRuntimeCheck;
-  std::tie(FirstCheckInst, MemRuntimeCheck) =
-      Legal->getLAI()->addRuntimeCheck(VectorPH->getTerminator());
-  if (MemRuntimeCheck) {
-    AddedSafetyChecks = true;
-    // Create a new block containing the memory check.
-    VectorPH->setName("vector.memcheck");
-    NewVectorPH =
-        VectorPH->splitBasicBlock(VectorPH->getTerminator(), "vector.ph");
-    if (ParentLoop)
-      ParentLoop->addBasicBlockToLoop(NewVectorPH, *LI);
-    LoopBypassBlocks.push_back(VectorPH);
-
-    // Replace the branch into the memory check block with a conditional branch
-    // for the "few elements case".
-    ReplaceInstWithInst(
-        VectorPH->getTerminator(),
-        BranchInst::Create(MiddleBlock, NewVectorPH, MemRuntimeCheck));
-
-    VectorPH = NewVectorPH;
-  }
+  emitMemRuntimeChecks(Lp, ScalarPH);
+  
+  // Generate the induction variable.
+  // The loop step is equal to the vectorization factor (num of SIMD elements)
+  // times the unroll factor (num of SIMD instructions).
+  Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
+  Constant *Step = ConstantInt::get(IdxTy, VF * UF);
+  Induction =
+    createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
+                            getDebugLocFromInstOrOperands(OldInduction));
 
   // We are going to resume the execution of the scalar loop.
   // Go over all of the induction variables that we found and fix the
@@ -2701,152 +2914,60 @@ void InnerLoopVectorizer::createEmptyLoop() {
   // If we come from a bypass edge then we need to start from the original
   // start value.
 
-  // This variable saves the new starting index for the scalar loop.
-  PHINode *ResumeIndex = nullptr;
+  // This variable saves the new starting index for the scalar loop. It is used
+  // to test if there are any tail iterations left once the vector loop has
+  // completed.
   LoopVectorizationLegality::InductionList::iterator I, E;
   LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
-  // Set builder to point to last bypass block.
-  BypassBuilder.SetInsertPoint(LoopBypassBlocks.back()->getTerminator());
   for (I = List->begin(), E = List->end(); I != E; ++I) {
     PHINode *OrigPhi = I->first;
-    LoopVectorizationLegality::InductionInfo II = I->second;
-
-    Type *ResumeValTy = (OrigPhi == OldInduction) ? IdxTy : OrigPhi->getType();
-    PHINode *ResumeVal = PHINode::Create(ResumeValTy, 2, "resume.val",
-                                         MiddleBlock->getTerminator());
-    // We might have extended the type of the induction variable but we need a
-    // truncated version for the scalar loop.
-    PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
-      PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
-                      MiddleBlock->getTerminator()) : nullptr;
+    InductionDescriptor II = I->second;
 
     // Create phi nodes to merge from the  backedge-taken check block.
-    PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val",
+    PHINode *BCResumeVal = PHINode::Create(OrigPhi->getType(), 3,
+                                           "bc.resume.val",
                                            ScalarPH->getTerminator());
-    BCResumeVal->addIncoming(ResumeVal, MiddleBlock);
-
-    PHINode *BCTruncResumeVal = nullptr;
+    Value *EndValue;
     if (OrigPhi == OldInduction) {
-      BCTruncResumeVal =
-          PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val",
-                          ScalarPH->getTerminator());
-      BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock);
-    }
-
-    Value *EndValue = nullptr;
-    switch (II.IK) {
-    case LoopVectorizationLegality::IK_NoInduction:
-      llvm_unreachable("Unknown induction");
-    case LoopVectorizationLegality::IK_IntInduction: {
-      // Handle the integer induction counter.
-      assert(OrigPhi->getType()->isIntegerTy() && "Invalid type");
-
-      // We have the canonical induction variable.
-      if (OrigPhi == OldInduction) {
-        // Create a truncated version of the resume value for the scalar loop,
-        // we might have promoted the type to a larger width.
-        EndValue =
-          BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType());
-        // The new PHI merges the original incoming value, in case of a bypass,
-        // or the value at the end of the vectorized loop.
-        for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
-          TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
-        TruncResumeVal->addIncoming(EndValue, VecBody);
-
-        BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
-
-        // We know what the end value is.
-        EndValue = IdxEndRoundDown;
-        // We also know which PHI node holds it.
-        ResumeIndex = ResumeVal;
-        break;
-      }
-
-      // Not the canonical induction variable - add the vector loop count to the
-      // start value.
-      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
-                                                   II.StartValue->getType(),
-                                                   "cast.crd");
-      EndValue = II.transform(BypassBuilder, CRD);
+      // We know what the end value is.
+      EndValue = CountRoundDown;
+    } else {
+      IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
+      Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
+                                       II.getStepValue()->getType(),
+                                       "cast.crd");
+      EndValue = II.transform(B, CRD);
       EndValue->setName("ind.end");
-      break;
     }
-    case LoopVectorizationLegality::IK_PtrInduction: {
-      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
-                                                   II.StepValue->getType(),
-                                                   "cast.crd");
-      EndValue = II.transform(BypassBuilder, CRD);
-      EndValue->setName("ptr.ind.end");
-      break;
-    }
-    }// end of case
 
     // The new PHI merges the original incoming value, in case of a bypass,
     // or the value at the end of the vectorized loop.
-    for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) {
-      if (OrigPhi == OldInduction)
-        ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]);
-      else
-        ResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]);
-    }
-    ResumeVal->addIncoming(EndValue, VecBody);
+    BCResumeVal->addIncoming(EndValue, MiddleBlock);
 
     // Fix the scalar body counter (PHI node).
     unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH);
 
     // The old induction's phi node in the scalar body needs the truncated
     // value.
-    if (OrigPhi == OldInduction) {
-      BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]);
-      OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal);
-    } else {
-      BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]);
-      OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
-    }
+    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+      BCResumeVal->addIncoming(II.getStartValue(), LoopBypassBlocks[I]);
+    OrigPhi->setIncomingValue(BlockIdx, BCResumeVal);
   }
 
-  // If we are generating a new induction variable then we also need to
-  // generate the code that calculates the exit value. This value is not
-  // simply the end of the counter because we may skip the vectorized body
-  // in case of a runtime check.
-  if (!OldInduction){
-    assert(!ResumeIndex && "Unexpected resume value found");
-    ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val",
-                                  MiddleBlock->getTerminator());
-    for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
-      ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]);
-    ResumeIndex->addIncoming(IdxEndRoundDown, VecBody);
-  }
-
-  // Make sure that we found the index where scalar loop needs to continue.
-  assert(ResumeIndex && ResumeIndex->getType()->isIntegerTy() &&
-         "Invalid resume Index");
-
   // Add a check in the middle block to see if we have completed
   // all of the iterations in the first vector loop.
   // If (N - N%VF) == N, then we *don't* need to run the remainder.
-  Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, IdxEnd,
-                                ResumeIndex, "cmp.n",
+  Value *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, Count,
+                                CountRoundDown, "cmp.n",
                                 MiddleBlock->getTerminator());
   ReplaceInstWithInst(MiddleBlock->getTerminator(),
                       BranchInst::Create(ExitBlock, ScalarPH, CmpN));
 
-  // Create i+1 and fill the PHINode.
-  Value *NextIdx = Builder.CreateAdd(Induction, Step, "index.next");
-  Induction->addIncoming(StartIdx, VectorPH);
-  Induction->addIncoming(NextIdx, VecBody);
-  // Create the compare.
-  Value *ICmp = Builder.CreateICmpEQ(NextIdx, IdxEndRoundDown);
-  Builder.CreateCondBr(ICmp, MiddleBlock, VecBody);
-
-  // Now we have two terminators. Remove the old one from the block.
-  VecBody->getTerminator()->eraseFromParent();
-
   // Get ready to start creating new instructions into the vectorized body.
-  Builder.SetInsertPoint(VecBody->getFirstInsertionPt());
+  Builder.SetInsertPoint(&*VecBody->getFirstInsertionPt());
 
   // Save the state.
-  LoopVectorPreHeader = VectorPH;
+  LoopVectorPreHeader = Lp->getLoopPreheader();
   LoopScalarPreHeader = ScalarPH;
   LoopMiddleBlock = MiddleBlock;
   LoopExitBlock = ExitBlock;
@@ -2899,7 +3020,7 @@ static void cse(SmallVector<BasicBlock *, 4> &BBs) {
   for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
     BasicBlock *BB = BBs[i];
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
-      Instruction *In = I++;
+      Instruction *In = &*I++;
 
       if (!CSEDenseMapInfo::canHandle(In))
         continue;
@@ -3021,6 +3142,117 @@ static unsigned getVectorIntrinsicCost(CallInst *CI, unsigned VF,
   return TTI.getIntrinsicInstrCost(ID, RetTy, Tys);
 }
 
+static Type *smallestIntegerVectorType(Type *T1, Type *T2) {
+  IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+  IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+  return I1->getBitWidth() < I2->getBitWidth() ? T1 : T2;
+}
+static Type *largestIntegerVectorType(Type *T1, Type *T2) {
+  IntegerType *I1 = cast<IntegerType>(T1->getVectorElementType());
+  IntegerType *I2 = cast<IntegerType>(T2->getVectorElementType());
+  return I1->getBitWidth() > I2->getBitWidth() ? T1 : T2;
+}
+
+void InnerLoopVectorizer::truncateToMinimalBitwidths() {
+  // For every instruction `I` in MinBWs, truncate the operands, create a
+  // truncated version of `I` and reextend its result. InstCombine runs
+  // later and will remove any ext/trunc pairs.
+  //
+  for (auto &KV : MinBWs) {
+    VectorParts &Parts = WidenMap.get(KV.first);
+    for (Value *&I : Parts) {
+      if (I->use_empty())
+        continue;
+      Type *OriginalTy = I->getType();
+      Type *ScalarTruncatedTy = IntegerType::get(OriginalTy->getContext(),
+                                                 KV.second);
+      Type *TruncatedTy = VectorType::get(ScalarTruncatedTy,
+                                          OriginalTy->getVectorNumElements());
+      if (TruncatedTy == OriginalTy)
+        continue;
+
+      IRBuilder<> B(cast<Instruction>(I));
+      auto ShrinkOperand = [&](Value *V) -> Value* {
+        if (auto *ZI = dyn_cast<ZExtInst>(V))
+          if (ZI->getSrcTy() == TruncatedTy)
+            return ZI->getOperand(0);
+        return B.CreateZExtOrTrunc(V, TruncatedTy);
+      };
+
+      // The actual instruction modification depends on the instruction type,
+      // unfortunately.
+      Value *NewI = nullptr;
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+        NewI = B.CreateBinOp(BO->getOpcode(),
+                             ShrinkOperand(BO->getOperand(0)),
+                             ShrinkOperand(BO->getOperand(1)));
+        cast<BinaryOperator>(NewI)->copyIRFlags(I);
+      } else if (ICmpInst *CI = dyn_cast<ICmpInst>(I)) {
+        NewI = B.CreateICmp(CI->getPredicate(),
+                            ShrinkOperand(CI->getOperand(0)),
+                            ShrinkOperand(CI->getOperand(1)));
+      } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+        NewI = B.CreateSelect(SI->getCondition(),
+                              ShrinkOperand(SI->getTrueValue()),
+                              ShrinkOperand(SI->getFalseValue()));
+      } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+        switch (CI->getOpcode()) {
+        default: llvm_unreachable("Unhandled cast!");
+        case Instruction::Trunc:
+          NewI = ShrinkOperand(CI->getOperand(0));
+          break;
+        case Instruction::SExt:
+          NewI = B.CreateSExtOrTrunc(CI->getOperand(0),
+                                     smallestIntegerVectorType(OriginalTy,
+                                                               TruncatedTy));
+          break;
+        case Instruction::ZExt:
+          NewI = B.CreateZExtOrTrunc(CI->getOperand(0),
+                                     smallestIntegerVectorType(OriginalTy,
+                                                               TruncatedTy));
+          break;
+        }
+      } else if (ShuffleVectorInst *SI = dyn_cast<ShuffleVectorInst>(I)) {
+        auto Elements0 = SI->getOperand(0)->getType()->getVectorNumElements();
+        auto *O0 =
+          B.CreateZExtOrTrunc(SI->getOperand(0),
+                              VectorType::get(ScalarTruncatedTy, Elements0));
+        auto Elements1 = SI->getOperand(1)->getType()->getVectorNumElements();
+        auto *O1 =
+          B.CreateZExtOrTrunc(SI->getOperand(1),
+                              VectorType::get(ScalarTruncatedTy, Elements1));
+
+        NewI = B.CreateShuffleVector(O0, O1, SI->getMask());
+      } else if (isa<LoadInst>(I)) {
+        // Don't do anything with the operands, just extend the result.
+        continue;
+      } else {
+        llvm_unreachable("Unhandled instruction type!");
+      }
+
+      // Lastly, extend the result.
+      NewI->takeName(cast<Instruction>(I));
+      Value *Res = B.CreateZExtOrTrunc(NewI, OriginalTy);
+      I->replaceAllUsesWith(Res);
+      cast<Instruction>(I)->eraseFromParent();
+      I = Res;
+    }
+  }
+
+  // We'll have created a bunch of ZExts that are now parentless. Clean up.
+  for (auto &KV : MinBWs) {
+    VectorParts &Parts = WidenMap.get(KV.first);
+    for (Value *&I : Parts) {
+      ZExtInst *Inst = dyn_cast<ZExtInst>(I);
+      if (Inst && Inst->use_empty()) {
+        Value *NewI = Inst->getOperand(0);
+        Inst->eraseFromParent();
+        I = NewI;
+      }
+    }
+  }
+}
+
 void InnerLoopVectorizer::vectorizeLoop() {
   //===------------------------------------------------===//
   //
@@ -3051,6 +3283,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
        be = DFS.endRPO(); bb != be; ++bb)
     vectorizeBlockInLoop(*bb, &RdxPHIsToFix);
 
+  // Insert truncates and extends for any truncated instructions as hints to
+  // InstCombine.
+  if (VF > 1)
+    truncateToMinimalBitwidths();
+  
   // At this point every instruction in the original loop is widened to
   // a vector form. We are almost done. Now, we need to fix the PHI nodes
   // that we vectorized. The PHI nodes are currently empty because we did
@@ -3066,7 +3303,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
     assert(RdxPhi && "Unable to recover vectorized PHI");
 
     // Find the reduction variable descriptor.
-    assert(Legal->getReductionVars()->count(RdxPhi) &&
+    assert(Legal->isReductionVariable(RdxPhi) &&
            "Unable to find the reduction variable");
     RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[RdxPhi];
 
@@ -3141,21 +3378,33 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // the PHIs and the values we are going to write.
     // This allows us to write both PHINodes and the extractelement
     // instructions.
-    Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
+    Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
 
-    VectorParts RdxParts;
+    VectorParts RdxParts = getVectorValue(LoopExitInst);
     setDebugLocFromInst(Builder, LoopExitInst);
-    for (unsigned part = 0; part < UF; ++part) {
-      // This PHINode contains the vectorized reduction variable, or
-      // the initial value vector, if we bypass the vector loop.
-      VectorParts &RdxExitVal = getVectorValue(LoopExitInst);
-      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
-      Value *StartVal = (part == 0) ? VectorStart : Identity;
-      for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I)
-        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
-      NewPhi->addIncoming(RdxExitVal[part],
-                          LoopVectorBody.back());
-      RdxParts.push_back(NewPhi);
+
+    // If the vector reduction can be performed in a smaller type, we truncate
+    // then extend the loop exit value to enable InstCombine to evaluate the
+    // entire expression in the smaller type.
+    if (VF > 1 && RdxPhi->getType() != RdxDesc.getRecurrenceType()) {
+      Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
+      Builder.SetInsertPoint(LoopVectorBody.back()->getTerminator());
+      for (unsigned part = 0; part < UF; ++part) {
+        Value *Trunc = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
+        Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
+                                          : Builder.CreateZExt(Trunc, VecTy);
+        for (Value::user_iterator UI = RdxParts[part]->user_begin();
+             UI != RdxParts[part]->user_end();)
+          if (*UI != Trunc) {
+            (*UI++)->replaceUsesOfWith(RdxParts[part], Extnd);
+            RdxParts[part] = Extnd;
+          } else {
+            ++UI;
+          }
+      }
+      Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
+      for (unsigned part = 0; part < UF; ++part)
+        RdxParts[part] = Builder.CreateTrunc(RdxParts[part], RdxVecTy);
     }
 
     // Reduce all of the unrolled parts into a single vector.
@@ -3208,13 +3457,22 @@ void InnerLoopVectorizer::vectorizeLoop() {
       // The result is in the first element of the vector.
       ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
                                                     Builder.getInt32(0));
+
+      // If the reduction can be performed in a smaller type, we need to extend
+      // the reduction to the wider type before we branch to the original loop.
+      if (RdxPhi->getType() != RdxDesc.getRecurrenceType())
+        ReducedPartRdx =
+            RdxDesc.isSigned()
+                ? Builder.CreateSExt(ReducedPartRdx, RdxPhi->getType())
+                : Builder.CreateZExt(ReducedPartRdx, RdxPhi->getType());
     }
 
     // Create a phi node that merges control-flow from the backedge-taken check
     // block and the middle block.
     PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx",
                                           LoopScalarPreHeader->getTerminator());
-    BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[0]);
+    for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
+      BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
     BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
 
     // Now, we need to fix the users of the reduction variable
@@ -3252,6 +3510,20 @@ void InnerLoopVectorizer::vectorizeLoop() {
 
   fixLCSSAPHIs();
 
+  // Make sure DomTree is updated.
+  updateAnalysis();
+  
+  // Predicate any stores.
+  for (auto KV : PredicatedStores) {
+    BasicBlock::iterator I(KV.first);
+    auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);
+    auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
+                                        /*BranchWeights=*/nullptr, DT);
+    I->moveBefore(T);
+    I->getParent()->setName("pred.store.if");
+    BB->setName("pred.store.continue");
+  }
+  DEBUG(DT->verifyDomTree());
   // Remove redundant induction instructions.
   cse(LoopVectorBody);
 }
@@ -3326,18 +3598,18 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
   return BlockMask;
 }
 
-void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
-                                              InnerLoopVectorizer::VectorParts &Entry,
-                                              unsigned UF, unsigned VF, PhiVector *PV) {
+void InnerLoopVectorizer::widenPHIInstruction(
+    Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
+    unsigned VF, PhiVector *PV) {
   PHINode* P = cast<PHINode>(PN);
   // Handle reduction variables:
-  if (Legal->getReductionVars()->count(P)) {
+  if (Legal->isReductionVariable(P)) {
     for (unsigned part = 0; part < UF; ++part) {
       // This is phase one of vectorizing PHIs.
       Type *VecTy = (VF == 1) ? PN->getType() :
       VectorType::get(PN->getType(), VF);
-      Entry[part] = PHINode::Create(VecTy, 2, "vec.phi",
-                                    LoopVectorBody.back()-> getFirstInsertionPt());
+      Entry[part] = PHINode::Create(
+          VecTy, 2, "vec.phi", &*LoopVectorBody.back()->getFirstInsertionPt());
     }
     PV->push_back(P);
     return;
@@ -3385,53 +3657,44 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
   assert(Legal->getInductionVars()->count(P) &&
          "Not an induction variable");
 
-  LoopVectorizationLegality::InductionInfo II =
-  Legal->getInductionVars()->lookup(P);
+  InductionDescriptor II = Legal->getInductionVars()->lookup(P);
 
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
-  switch (II.IK) {
-    case LoopVectorizationLegality::IK_NoInduction:
+  switch (II.getKind()) {
+    case InductionDescriptor::IK_NoInduction:
       llvm_unreachable("Unknown induction");
-    case LoopVectorizationLegality::IK_IntInduction: {
-      assert(P->getType() == II.StartValue->getType() && "Types must match");
-      Type *PhiTy = P->getType();
-      Value *Broadcasted;
-      if (P == OldInduction) {
-        // Handle the canonical induction variable. We might have had to
-        // extend the type.
-        Broadcasted = Builder.CreateTrunc(Induction, PhiTy);
-      } else {
-        // Handle other induction variables that are now based on the
-        // canonical one.
-        Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
-                                                 "normalized.idx");
-        NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
-        Broadcasted = II.transform(Builder, NormalizedIdx);
-        Broadcasted->setName("offset.idx");
+    case InductionDescriptor::IK_IntInduction: {
+      assert(P->getType() == II.getStartValue()->getType() &&
+             "Types must match");
+      // Handle other induction variables that are now based on the
+      // canonical one.
+      Value *V = Induction;
+      if (P != OldInduction) {
+        V = Builder.CreateSExtOrTrunc(Induction, P->getType());
+        V = II.transform(Builder, V);
+        V->setName("offset.idx");
       }
-      Broadcasted = getBroadcastInstrs(Broadcasted);
+      Value *Broadcasted = getBroadcastInstrs(V);
       // After broadcasting the induction variable we need to make the vector
       // consecutive by adding 0, 1, 2, etc.
       for (unsigned part = 0; part < UF; ++part)
-        Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
+        Entry[part] = getStepVector(Broadcasted, VF * part, II.getStepValue());
       return;
     }
-    case LoopVectorizationLegality::IK_PtrInduction:
+    case InductionDescriptor::IK_PtrInduction:
       // Handle the pointer induction variable case.
       assert(P->getType()->isPointerTy() && "Unexpected type.");
       // This is the normalized GEP that starts counting at zero.
-      Value *NormalizedIdx =
-          Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
-      NormalizedIdx =
-          Builder.CreateSExtOrTrunc(NormalizedIdx, II.StepValue->getType());
+      Value *PtrInd = Induction;
+      PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStepValue()->getType());
       // This is the vector of results. Notice that we don't generate
       // vector geps because scalar geps result in better code.
       for (unsigned part = 0; part < UF; ++part) {
         if (VF == 1) {
           int EltIndex = part;
-          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
-          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+          Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+          Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
           Entry[part] = SclrGep;
@@ -3441,8 +3704,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
         Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
         for (unsigned int i = 0; i < VF; ++i) {
           int EltIndex = i + part * VF;
-          Constant *Idx = ConstantInt::get(NormalizedIdx->getType(), EltIndex);
-          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
+          Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+          Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
           Value *SclrGep = II.transform(Builder, GlobalIdx);
           SclrGep->setName("next.gep");
           VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
@@ -3458,7 +3721,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
 void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
   // For each instruction in the old loop.
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
-    VectorParts &Entry = WidenMap.get(it);
+    VectorParts &Entry = WidenMap.get(&*it);
+
     switch (it->getOpcode()) {
     case Instruction::Br:
       // Nothing to do for PHIs and BR, since we already took care of the
@@ -3466,7 +3730,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       continue;
     case Instruction::PHI: {
       // Vectorize PHINodes.
-      widenPHIInstruction(it, Entry, UF, VF, PV);
+      widenPHIInstruction(&*it, Entry, UF, VF, PV);
       continue;
     }// End of PHI.
 
@@ -3504,16 +3768,17 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Entry[Part] = V;
       }
 
-      propagateMetadata(Entry, it);
+      propagateMetadata(Entry, &*it);
       break;
     }
     case Instruction::Select: {
       // Widen selects.
       // If the selector is loop invariant we can create a select
       // instruction with a scalar condition. Otherwise, use vector-select.
-      bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)),
-                                               OrigLoop);
-      setDebugLocFromInst(Builder, it);
+      auto *SE = PSE.getSE();
+      bool InvariantCond =
+          SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop);
+      setDebugLocFromInst(Builder, &*it);
 
       // The condition can be loop invariant  but still defined inside the
       // loop. This means that we can't just use the original 'cond' value.
@@ -3522,7 +3787,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       VectorParts &Cond = getVectorValue(it->getOperand(0));
       VectorParts &Op0  = getVectorValue(it->getOperand(1));
       VectorParts &Op1  = getVectorValue(it->getOperand(2));
-
+      
       Value *ScalarCond = (VF == 1) ? Cond[0] :
         Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
 
@@ -3533,7 +3798,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
           Op1[Part]);
       }
 
-      propagateMetadata(Entry, it);
+      propagateMetadata(Entry, &*it);
       break;
     }
 
@@ -3542,25 +3807,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // Widen compares. Generate vector compares.
       bool FCmp = (it->getOpcode() == Instruction::FCmp);
       CmpInst *Cmp = dyn_cast<CmpInst>(it);
-      setDebugLocFromInst(Builder, it);
+      setDebugLocFromInst(Builder, &*it);
       VectorParts &A = getVectorValue(it->getOperand(0));
       VectorParts &B = getVectorValue(it->getOperand(1));
       for (unsigned Part = 0; Part < UF; ++Part) {
         Value *C = nullptr;
-        if (FCmp)
+        if (FCmp) {
           C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
-        else
+          cast<FCmpInst>(C)->copyFastMathFlags(&*it);
+        } else {
           C = Builder.CreateICmp(Cmp->getPredicate(), A[Part], B[Part]);
+        }
         Entry[Part] = C;
       }
 
-      propagateMetadata(Entry, it);
+      propagateMetadata(Entry, &*it);
       break;
     }
 
     case Instruction::Store:
     case Instruction::Load:
-      vectorizeMemoryInstruction(it);
+      vectorizeMemoryInstruction(&*it);
         break;
     case Instruction::ZExt:
     case Instruction::SExt:
@@ -3575,7 +3842,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
     case Instruction::FPTrunc:
     case Instruction::BitCast: {
       CastInst *CI = dyn_cast<CastInst>(it);
-      setDebugLocFromInst(Builder, it);
+      setDebugLocFromInst(Builder, &*it);
       /// Optimize the special case where the source is the induction
       /// variable. Notice that we can only optimize the 'trunc' case
       /// because: a. FP conversions lose precision, b. sext/zext may wrap,
@@ -3585,13 +3852,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
                                                CI->getType());
         Value *Broadcasted = getBroadcastInstrs(ScalarCast);
-        LoopVectorizationLegality::InductionInfo II =
+        InductionDescriptor II =
             Legal->getInductionVars()->lookup(OldInduction);
-        Constant *Step =
-            ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
+        Constant *Step = ConstantInt::getSigned(
+            CI->getType(), II.getStepValue()->getSExtValue());
         for (unsigned Part = 0; Part < UF; ++Part)
           Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
-        propagateMetadata(Entry, it);
+        propagateMetadata(Entry, &*it);
         break;
       }
       /// Vectorize casts.
@@ -3601,7 +3868,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       VectorParts &A = getVectorValue(it->getOperand(0));
       for (unsigned Part = 0; Part < UF; ++Part)
         Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
-      propagateMetadata(Entry, it);
+      propagateMetadata(Entry, &*it);
       break;
     }
 
@@ -3609,7 +3876,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // Ignore dbg intrinsics.
       if (isa<DbgInfoIntrinsic>(it))
         break;
-      setDebugLocFromInst(Builder, it);
+      setDebugLocFromInst(Builder, &*it);
 
       Module *M = BB->getParent()->getParent();
       CallInst *CI = cast<CallInst>(it);
@@ -3625,7 +3892,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       if (ID &&
           (ID == Intrinsic::assume || ID == Intrinsic::lifetime_end ||
            ID == Intrinsic::lifetime_start)) {
-        scalarizeInstruction(it);
+        scalarizeInstruction(&*it);
         break;
       }
       // The flag shows whether we use Intrinsic or a usual Call for vectorized
@@ -3636,7 +3903,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       bool UseVectorIntrinsic =
           ID && getVectorIntrinsicCost(CI, VF, *TTI, TLI) <= CallCost;
       if (!UseVectorIntrinsic && NeedToScalarize) {
-        scalarizeInstruction(it);
+        scalarizeInstruction(&*it);
         break;
       }
 
@@ -3677,13 +3944,13 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Entry[Part] = Builder.CreateCall(VectorF, Args);
       }
 
-      propagateMetadata(Entry, it);
+      propagateMetadata(Entry, &*it);
       break;
     }
 
     default:
       // All other instructions are unsupported. Scalarize them.
-      scalarizeInstruction(it);
+      scalarizeInstruction(&*it);
       break;
     }// end of switch.
   }// end of for_each instr.
@@ -3691,7 +3958,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
 
 void InnerLoopVectorizer::updateAnalysis() {
   // Forget the original basic block.
-  SE->forgetLoop(OrigLoop);
+  PSE.getSE()->forgetLoop(OrigLoop);
 
   // Update the dominator tree information.
   assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) &&
@@ -3701,19 +3968,12 @@ void InnerLoopVectorizer::updateAnalysis() {
     DT->addNewBlock(LoopBypassBlocks[I], LoopBypassBlocks[I-1]);
   DT->addNewBlock(LoopVectorPreHeader, LoopBypassBlocks.back());
 
-  // Due to if predication of stores we might create a sequence of "if(pred)
-  // a[i] = ...;  " blocks.
-  for (unsigned i = 0, e = LoopVectorBody.size(); i != e; ++i) {
-    if (i == 0)
-      DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
-    else if (isPredicatedBlock(i)) {
-      DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-1]);
-    } else {
-      DT->addNewBlock(LoopVectorBody[i], LoopVectorBody[i-2]);
-    }
-  }
+  // We don't predicate stores by this point, so the vector body should be a
+  // single loop.
+  assert(LoopVectorBody.size() == 1 && "Expected single block loop!");
+  DT->addNewBlock(LoopVectorBody[0], LoopVectorPreHeader);
 
-  DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]);
+  DT->addNewBlock(LoopMiddleBlock, LoopVectorBody.back());
   DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]);
   DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader);
   DT->changeImmediateDominator(LoopExitBlock, LoopBypassBlocks[0]);
@@ -3850,10 +4110,10 @@ bool LoopVectorizationLegality::canVectorize() {
   }
 
   // ScalarEvolution needs to be able to find the exit count.
-  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
-  if (ExitCount == SE->getCouldNotCompute()) {
-    emitAnalysis(VectorizationReport() <<
-                 "could not determine number of loop iterations");
+  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop);
+  if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
+    emitAnalysis(VectorizationReport()
+                 << "could not determine number of loop iterations");
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -3879,10 +4139,28 @@ bool LoopVectorizationLegality::canVectorize() {
                        : "")
                << "!\n");
 
+  bool UseInterleaved = TTI->enableInterleavedAccessVectorization();
+
+  // If an override option has been passed in for interleaved accesses, use it.
+  if (EnableInterleavedMemAccesses.getNumOccurrences() > 0)
+    UseInterleaved = EnableInterleavedMemAccesses;
+
   // Analyze interleaved memory accesses.
-  if (EnableInterleavedMemAccesses)
+  if (UseInterleaved)
     InterleaveInfo.analyzeInterleaving(Strides);
 
+  unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
+  if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
+    SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
+
+  if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
+    emitAnalysis(VectorizationReport()
+                 << "Too many SCEV assumptions need to be made and checked "
+                 << "at runtime");
+    DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
+    return false;
+  }
+
   // Okay! We can vectorize. At this point we don't have any other mem analysis
   // which may limit our maximum vectorization factor, so just return true with
   // no restrictions.
@@ -3929,7 +4207,6 @@ static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
 }
 
 bool LoopVectorizationLegality::canVectorizeInstrs() {
-  BasicBlock *PreHeader = TheLoop->getLoopPreheader();
   BasicBlock *Header = TheLoop->getHeader();
 
   // Look for the attribute signaling the absence of NaNs.
@@ -3953,7 +4230,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         if (!PhiTy->isIntegerTy() &&
             !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
-          emitAnalysis(VectorizationReport(it)
+          emitAnalysis(VectorizationReport(&*it)
                        << "loop control flow is not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
@@ -3965,9 +4242,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         if (*bb != Header) {
           // Check that this instruction has no outside users or is an
           // identified reduction value with an outside user.
-          if (!hasOutsideLoopUser(TheLoop, it, AllowedExit))
+          if (!hasOutsideLoopUser(TheLoop, &*it, AllowedExit))
             continue;
-          emitAnalysis(VectorizationReport(it) <<
+          emitAnalysis(VectorizationReport(&*it) <<
                        "value could not be identified as "
                        "an induction or reduction variable");
           return false;
@@ -3975,19 +4252,15 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         // We only allow if-converted PHIs with exactly two incoming values.
         if (Phi->getNumIncomingValues() != 2) {
-          emitAnalysis(VectorizationReport(it)
+          emitAnalysis(VectorizationReport(&*it)
                        << "control flow not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
           return false;
         }
 
-        // This is the value coming from the preheader.
-        Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
-        ConstantInt *StepValue = nullptr;
-        // Check if this is an induction variable.
-        InductionKind IK = isInductionVariable(Phi, StepValue);
-
-        if (IK_NoInduction != IK) {
+        InductionDescriptor ID;
+        if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) {
+          Inductions[Phi] = ID;
           // Get the widest type.
           if (!WidestIndTy)
             WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
@@ -3995,21 +4268,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
             WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
 
           // Int inductions are special because we only allow one IV.
-          if (IK == IK_IntInduction && StepValue->isOne()) {
+          if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
+              ID.getStepValue()->isOne() &&
+              isa<Constant>(ID.getStartValue()) &&
+                cast<Constant>(ID.getStartValue())->isNullValue()) {
             // Use the phi node with the widest type as induction. Use the last
             // one if there are multiple (no good reason for doing this other
-            // than it is expedient).
+            // than it is expedient). We've checked that it begins at zero and
+            // steps by one, so this is a canonical induction variable.
             if (!Induction || PhiTy == WidestIndTy)
               Induction = Phi;
           }
 
           DEBUG(dbgs() << "LV: Found an induction variable.\n");
-          Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
 
           // Until we explicitly handle the case of an induction variable with
           // an outside loop user we have to give up vectorizing this loop.
-          if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
-            emitAnalysis(VectorizationReport(it) <<
+          if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+            emitAnalysis(VectorizationReport(&*it) <<
                          "use of induction value outside of the "
                          "loop is not handled by vectorizer");
             return false;
@@ -4020,11 +4296,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop,
                                                  Reductions[Phi])) {
+          if (Reductions[Phi].hasUnsafeAlgebra())
+            Requirements->addUnsafeAlgebraInst(
+                Reductions[Phi].getUnsafeAlgebraInst());
           AllowedExit.insert(Reductions[Phi].getLoopExitInstr());
           continue;
         }
 
-        emitAnalysis(VectorizationReport(it) <<
+        emitAnalysis(VectorizationReport(&*it) <<
                      "value that could not be identified as "
                      "reduction is used outside the loop");
         DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n");
@@ -4039,8 +4318,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
-        emitAnalysis(VectorizationReport(it) <<
-                     "call instruction cannot be vectorized");
+        emitAnalysis(VectorizationReport(&*it)
+                     << "call instruction cannot be vectorized");
         DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
         return false;
       }
@@ -4049,8 +4328,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // second argument is the same (i.e. loop invariant)
       if (CI &&
           hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) {
-        if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) {
-          emitAnalysis(VectorizationReport(it)
+        auto *SE = PSE.getSE();
+        if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
+          emitAnalysis(VectorizationReport(&*it)
                        << "intrinsic instruction cannot be vectorized");
           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
           return false;
@@ -4061,7 +4341,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Also, we can't vectorize extractelement instructions.
       if ((!VectorType::isValidElementType(it->getType()) &&
            !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) {
-        emitAnalysis(VectorizationReport(it)
+        emitAnalysis(VectorizationReport(&*it)
                      << "instruction return type cannot be vectorized");
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
@@ -4085,8 +4365,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
-      if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) {
-        emitAnalysis(VectorizationReport(it) <<
+      if (hasOutsideLoopUser(TheLoop, &*it, AllowedExit)) {
+        emitAnalysis(VectorizationReport(&*it) <<
                      "value cannot be used outside the loop");
         return false;
       }
@@ -4104,6 +4384,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // Now we know the widest induction type, check if our found induction
+  // is the same size. If it's not, unset it here and InnerLoopVectorizer
+  // will create another.
+  if (Induction && WidestIndTy != Induction->getType())
+    Induction = nullptr;
+
   return true;
 }
 
@@ -4116,7 +4402,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) {
   else
     return;
 
-  Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop);
+  Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop);
   if (!Stride)
     return;
 
@@ -4142,7 +4428,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
        BE = TheLoop->block_end(); B != BE; ++B)
     for (BasicBlock::iterator I = (*B)->begin(), IE = (*B)->end();
          I != IE; ++I)
-      if (I->getType()->isPointerTy() && isConsecutivePtr(I))
+      if (I->getType()->isPointerTy() && isConsecutivePtr(&*I))
         Worklist.insert(Worklist.end(), I->op_begin(), I->op_end());
 
   while (!Worklist.empty()) {
@@ -4179,32 +4465,12 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
     return false;
   }
 
-  if (LAI->getNumRuntimePointerChecks() >
-      VectorizerParams::RuntimeMemoryCheckThreshold) {
-    emitAnalysis(VectorizationReport()
-                 << LAI->getNumRuntimePointerChecks() << " exceeds limit of "
-                 << VectorizerParams::RuntimeMemoryCheckThreshold
-                 << " dependent memory operations checked at runtime");
-    DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
-    return false;
-  }
+  Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
+  PSE.addPredicate(LAI->PSE.getUnionPredicate());
+
   return true;
 }
 
-LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
-                                               ConstantInt *&StepValue) {
-  if (!isInductionPHI(Phi, SE, StepValue))
-    return IK_NoInduction;
-
-  Type *PhiTy = Phi->getType();
-  // Found an Integer induction variable.
-  if (PhiTy->isIntegerTy())
-    return IK_IntInduction;
-  // Found an Pointer induction variable.
-  return IK_PtrInduction;
-}
-
 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
   Value *In0 = const_cast<Value*>(V);
   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
@@ -4256,8 +4522,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
       
       if (++NumPredStores > NumberOfStoresToPredicate || !isSafePtr ||
           !isSinglePredecessor) {
-        // Build a masked store if it is legal for the target, otherwise scalarize
-        // the block.
+        // Build a masked store if it is legal for the target, otherwise
+        // scalarize the block.
         bool isLegalMaskedOp =
           isLegalMaskedStore(SI->getValueOperand()->getType(),
                              SI->getPointerOperand());
@@ -4315,7 +4581,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
     StoreInst *SI = dyn_cast<StoreInst>(I);
 
     Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
-    int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides);
+    int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides);
 
     // The factor of the corresponding interleave group.
     unsigned Factor = std::abs(Stride);
@@ -4324,7 +4590,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses(
     if (Factor < 2 || Factor > MaxInterleaveGroupFactor)
       continue;
 
-    const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
+    const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
     PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
     unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
 
@@ -4411,12 +4677,12 @@ void InterleavedAccessInfo::analyzeInterleaving(
         continue;
 
       // Calculate the distance and prepare for the rule 3.
-      const SCEVConstant *DistToA =
-          dyn_cast<SCEVConstant>(SE->getMinusSCEV(DesB.Scev, DesA.Scev));
+      const SCEVConstant *DistToA = dyn_cast<SCEVConstant>(
+          PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev));
       if (!DistToA)
         continue;
 
-      int DistanceToA = DistToA->getValue()->getValue().getSExtValue();
+      int DistanceToA = DistToA->getAPInt().getSExtValue();
 
       // Skip if the distance is not multiple of size as they are not in the
       // same group.
@@ -4454,8 +4720,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     emitAnalysis(VectorizationReport() <<
                  "runtime pointer checks needed. Enable vectorization of this "
                  "loop with '#pragma clang loop vectorize(enable)' when "
-                 "compiling with -Os");
-    DEBUG(dbgs() << "LV: Aborting. Runtime ptr check is required in Os.\n");
+                 "compiling with -Os/-Oz");
+    DEBUG(dbgs() <<
+          "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
     return Factor;
   }
 
@@ -4467,10 +4734,12 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   }
 
   // Find the trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+  unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
   DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
 
-  unsigned WidestType = getWidestType();
+  MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
+  unsigned SmallestType, WidestType;
+  std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
   unsigned WidestRegister = TTI.getRegisterBitWidth(true);
   unsigned MaxSafeDepDist = -1U;
   if (Legal->getMaxSafeDepDistBytes() != -1U)
@@ -4478,7 +4747,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   WidestRegister = ((WidestRegister < MaxSafeDepDist) ?
                     WidestRegister : MaxSafeDepDist);
   unsigned MaxVectorSize = WidestRegister / WidestType;
-  DEBUG(dbgs() << "LV: The Widest type: " << WidestType << " bits.\n");
+
+  DEBUG(dbgs() << "LV: The Smallest and Widest types: " << SmallestType << " / "
+               << WidestType << " bits.\n");
   DEBUG(dbgs() << "LV: The Widest register is: "
           << WidestRegister << " bits.\n");
 
@@ -4491,6 +4762,26 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
          " into one vector!");
 
   unsigned VF = MaxVectorSize;
+  if (MaximizeBandwidth && !OptForSize) {
+    // Collect all viable vectorization factors.
+    SmallVector<unsigned, 8> VFs;
+    unsigned NewMaxVectorSize = WidestRegister / SmallestType;
+    for (unsigned VS = MaxVectorSize; VS <= NewMaxVectorSize; VS *= 2)
+      VFs.push_back(VS);
+
+    // For each VF calculate its register usage.
+    auto RUs = calculateRegisterUsage(VFs);
+
+    // Select the largest VF which doesn't require more registers than existing
+    // ones.
+    unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
+    for (int i = RUs.size() - 1; i >= 0; --i) {
+      if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
+        VF = VFs[i];
+        break;
+      }
+    }
+  }
 
   // If we optimize the program for size, avoid creating the tail loop.
   if (OptForSize) {
@@ -4499,7 +4790,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
       emitAnalysis
         (VectorizationReport() <<
          "unable to calculate the loop count due to complex control flow");
-      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
     }
 
@@ -4515,8 +4806,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
                    "cannot optimize for size and vectorize at the "
                    "same time. Enable vectorization of this loop "
                    "with '#pragma clang loop vectorize(enable)' "
-                   "when compiling with -Os");
-      DEBUG(dbgs() << "LV: Aborting. A tail loop is required in Os.\n");
+                   "when compiling with -Os/-Oz");
+      DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
     }
   }
@@ -4566,7 +4857,9 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   return Factor;
 }
 
-unsigned LoopVectorizationCostModel::getWidestType() {
+std::pair<unsigned, unsigned>
+LoopVectorizationCostModel::getSmallestAndWidestTypes() {
+  unsigned MinWidth = -1U;
   unsigned MaxWidth = 8;
   const DataLayout &DL = TheFunction->getParent()->getDataLayout();
 
@@ -4579,18 +4872,22 @@ unsigned LoopVectorizationCostModel::getWidestType() {
     for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
       Type *T = it->getType();
 
-      // Ignore ephemeral values.
-      if (EphValues.count(it))
+      // Skip ignored values.
+      if (ValuesToIgnore.count(&*it))
         continue;
 
       // Only examine Loads, Stores and PHINodes.
       if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
         continue;
 
-      // Examine PHI nodes that are reduction variables.
-      if (PHINode *PN = dyn_cast<PHINode>(it))
-        if (!Legal->getReductionVars()->count(PN))
+      // Examine PHI nodes that are reduction variables. Update the type to
+      // account for the recurrence type.
+      if (PHINode *PN = dyn_cast<PHINode>(it)) {
+        if (!Legal->isReductionVariable(PN))
           continue;
+        RecurrenceDescriptor RdxDesc = (*Legal->getReductionVars())[PN];
+        T = RdxDesc.getRecurrenceType();
+      }
 
       // Examine the stored values.
       if (StoreInst *ST = dyn_cast<StoreInst>(it))
@@ -4599,15 +4896,17 @@ unsigned LoopVectorizationCostModel::getWidestType() {
       // Ignore loaded pointer types and stored pointer types that are not
       // consecutive. However, we do want to take consecutive stores/loads of
       // pointer vectors into account.
-      if (T->isPointerTy() && !isConsecutiveLoadOrStore(it))
+      if (T->isPointerTy() && !isConsecutiveLoadOrStore(&*it))
         continue;
 
+      MinWidth = std::min(MinWidth,
+                          (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
       MaxWidth = std::max(MaxWidth,
                           (unsigned)DL.getTypeSizeInBits(T->getScalarType()));
     }
   }
 
-  return MaxWidth;
+  return {MinWidth, MaxWidth};
 }
 
 unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
@@ -4628,11 +4927,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
   // 3. We don't interleave if we think that we will spill registers to memory
   // due to the increased register pressure.
 
-  // Use the user preference, unless 'auto' is selected.
-  int UserUF = Hints->getInterleave();
-  if (UserUF != 0)
-    return UserUF;
-
   // When we optimize for size, we don't interleave.
   if (OptForSize)
     return 1;
@@ -4642,7 +4936,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
     return 1;
 
   // Do not interleave loops with a relatively small trip count.
-  unsigned TC = SE->getSmallConstantTripCount(TheLoop);
+  unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
   if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
     return 1;
 
@@ -4658,7 +4952,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
       TargetNumRegisters = ForceTargetNumVectorRegs;
   }
 
-  LoopVectorizationCostModel::RegisterUsage R = calculateRegisterUsage();
+  RegisterUsage R = calculateRegisterUsage({VF})[0];
   // We divide by these constants so assume that we have at least one
   // instruction that uses at least one register.
   R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
@@ -4756,8 +5050,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
   }
 
   // Interleave if this is a large loop (small loops are already dealt with by
-  // this
-  // point) that could benefit from interleaving.
+  // this point) that could benefit from interleaving.
   bool HasReductions = (Legal->getReductionVars()->size() > 0);
   if (TTI.enableAggressiveInterleaving(HasReductions)) {
     DEBUG(dbgs() << "LV: Interleaving to expose ILP.\n");
@@ -4768,8 +5061,9 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize,
   return 1;
 }
 
-LoopVectorizationCostModel::RegisterUsage
-LoopVectorizationCostModel::calculateRegisterUsage() {
+SmallVector<LoopVectorizationCostModel::RegisterUsage, 8>
+LoopVectorizationCostModel::calculateRegisterUsage(
+    const SmallVector<unsigned, 8> &VFs) {
   // This function calculates the register usage by measuring the highest number
   // of values that are alive at a single location. Obviously, this is a very
   // rough estimation. We scan the loop in a topological order in order and
@@ -4790,8 +5084,8 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
   LoopBlocksDFS DFS(TheLoop);
   DFS.perform(LI);
 
-  RegisterUsage R;
-  R.NumInstructions = 0;
+  RegisterUsage RU;
+  RU.NumInstructions = 0;
 
   // Each 'key' in the map opens a new interval. The values
   // of the map are the index of the 'last seen' usage of the
@@ -4810,15 +5104,13 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
   unsigned Index = 0;
   for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(),
        be = DFS.endRPO(); bb != be; ++bb) {
-    R.NumInstructions += (*bb)->size();
-    for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
-         ++it) {
-      Instruction *I = it;
-      IdxToInstr[Index++] = I;
+    RU.NumInstructions += (*bb)->size();
+    for (Instruction &I : **bb) {
+      IdxToInstr[Index++] = &I;
 
       // Save the end location of each USE.
-      for (unsigned i = 0; i < I->getNumOperands(); ++i) {
-        Value *U = I->getOperand(i);
+      for (unsigned i = 0; i < I.getNumOperands(); ++i) {
+        Value *U = I.getOperand(i);
         Instruction *Instr = dyn_cast<Instruction>(U);
 
         // Ignore non-instruction values such as arguments, constants, etc.
@@ -4847,42 +5139,85 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
     TransposeEnds[it->second].push_back(it->first);
 
   SmallSet<Instruction*, 8> OpenIntervals;
-  unsigned MaxUsage = 0;
 
+  // Get the size of the widest register.
+  unsigned MaxSafeDepDist = -1U;
+  if (Legal->getMaxSafeDepDistBytes() != -1U)
+    MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8;
+  unsigned WidestRegister =
+      std::min(TTI.getRegisterBitWidth(true), MaxSafeDepDist);
+  const DataLayout &DL = TheFunction->getParent()->getDataLayout();
+
+  SmallVector<RegisterUsage, 8> RUs(VFs.size());
+  SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
 
   DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
+
+  // A lambda that gets the register usage for the given type and VF.
+  auto GetRegUsage = [&DL, WidestRegister](Type *Ty, unsigned VF) {
+    unsigned TypeSize = DL.getTypeSizeInBits(Ty->getScalarType());
+    return std::max<unsigned>(1, VF * TypeSize / WidestRegister);
+  };
+
   for (unsigned int i = 0; i < Index; ++i) {
     Instruction *I = IdxToInstr[i];
     // Ignore instructions that are never used within the loop.
     if (!Ends.count(I)) continue;
 
-    // Ignore ephemeral values.
-    if (EphValues.count(I))
-      continue;
-
     // Remove all of the instructions that end at this location.
     InstrList &List = TransposeEnds[i];
-    for (unsigned int j=0, e = List.size(); j < e; ++j)
+    for (unsigned int j = 0, e = List.size(); j < e; ++j)
       OpenIntervals.erase(List[j]);
 
-    // Count the number of live interals.
-    MaxUsage = std::max(MaxUsage, OpenIntervals.size());
+    // Skip ignored values.
+    if (ValuesToIgnore.count(I))
+      continue;
 
-    DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # " <<
-          OpenIntervals.size() << '\n');
+    // For each VF find the maximum usage of registers.
+    for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
+      if (VFs[j] == 1) {
+        MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+        continue;
+      }
+
+      // Count the number of live intervals.
+      unsigned RegUsage = 0;
+      for (auto Inst : OpenIntervals) {
+        // Skip ignored values for VF > 1.
+        if (VecValuesToIgnore.count(Inst))
+          continue;
+        RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
+      }
+      MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
+    }
+
+    DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
+                 << OpenIntervals.size() << '\n');
 
     // Add the current instruction to the list of open intervals.
     OpenIntervals.insert(I);
   }
 
-  unsigned Invariant = LoopInvariants.size();
-  DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsage << '\n');
-  DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
-  DEBUG(dbgs() << "LV(REG): LoopSize: " << R.NumInstructions << '\n');
+  for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
+    unsigned Invariant = 0;
+    if (VFs[i] == 1)
+      Invariant = LoopInvariants.size();
+    else {
+      for (auto Inst : LoopInvariants)
+        Invariant += GetRegUsage(Inst->getType(), VFs[i]);
+    }
 
-  R.LoopInvariantRegs = Invariant;
-  R.MaxLocalUsers = MaxUsage;
-  return R;
+    DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] <<  '\n');
+    DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+    DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant << '\n');
+    DEBUG(dbgs() << "LV(REG): LoopSize: " << RU.NumInstructions << '\n');
+
+    RU.LoopInvariantRegs = Invariant;
+    RU.MaxLocalUsers = MaxUsages[i];
+    RUs[i] = RU;
+  }
+
+  return RUs;
 }
 
 unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
@@ -4900,11 +5235,11 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
       if (isa<DbgInfoIntrinsic>(it))
         continue;
 
-      // Ignore ephemeral values.
-      if (EphValues.count(it))
+      // Skip ignored values.
+      if (ValuesToIgnore.count(&*it))
         continue;
 
-      unsigned C = getInstructionCost(it, VF);
+      unsigned C = getInstructionCost(&*it, VF);
 
       // Check if we should override the cost.
       if (ForceTargetInstructionCost.getNumOccurrences() > 0)
@@ -4969,7 +5304,7 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
   if (!C)
     return true;
 
-  const APInt &APStepVal = C->getValue()->getValue();
+  const APInt &APStepVal = C->getAPInt();
 
   // Huge step value - give up.
   if (APStepVal.getBitWidth() > 64)
@@ -4981,9 +5316,8 @@ static bool isLikelyComplexAddressComputation(Value *Ptr,
 }
 
 static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
-  if (Legal->hasStride(I->getOperand(0)) || Legal->hasStride(I->getOperand(1)))
-    return true;
-  return false;
+  return Legal->hasStride(I->getOperand(0)) ||
+         Legal->hasStride(I->getOperand(1));
 }
 
 unsigned
@@ -4994,7 +5328,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
     VF = 1;
 
   Type *RetTy = I->getType();
+  if (VF > 1 && MinBWs.count(I))
+    RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
   Type *VectorTy = ToVectorTy(RetTy, VF);
+  auto SE = PSE.getSE();
 
   // TODO: We need to estimate the cost of intrinsic calls.
   switch (I->getOpcode()) {
@@ -5076,6 +5413,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   case Instruction::ICmp:
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
+    Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
+    auto It = MinBWs.find(Op0AsInstruction);
+    if (VF > 1 && It != MinBWs.end())
+      ValTy = IntegerType::get(ValTy->getContext(), It->second);
     VectorTy = ToVectorTy(ValTy, VF);
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
   }
@@ -5199,8 +5540,28 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
         Legal->isInductionVariable(I->getOperand(0)))
       return TTI.getCastInstrCost(I->getOpcode(), I->getType(),
                                   I->getOperand(0)->getType());
-
-    Type *SrcVecTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+    
+    Type *SrcScalarTy = I->getOperand(0)->getType();
+    Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
+    if (VF > 1 && MinBWs.count(I)) {
+      // This cast is going to be shrunk. This may remove the cast or it might
+      // turn it into slightly different cast. For example, if MinBW == 16,
+      // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
+      //
+      // Calculate the modified src and dest types.
+      Type *MinVecTy = VectorTy;
+      if (I->getOpcode() == Instruction::Trunc) {
+        SrcVecTy = smallestIntegerVectorType(SrcVecTy, MinVecTy);
+        VectorTy = largestIntegerVectorType(ToVectorTy(I->getType(), VF),
+                                            MinVecTy);
+      } else if (I->getOpcode() == Instruction::ZExt ||
+                 I->getOpcode() == Instruction::SExt) {
+        SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
+        VectorTy = smallestIntegerVectorType(ToVectorTy(I->getType(), VF),
+                                             MinVecTy);
+      }
+    }
+    
     return TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy);
   }
   case Instruction::Call: {
@@ -5240,15 +5601,18 @@ char LoopVectorize::ID = 0;
 static const char lv_name[] = "Loop Vectorization";
 INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LCSSA)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DemandedBits)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
@@ -5269,6 +5633,79 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
   return false;
 }
 
+void LoopVectorizationCostModel::collectValuesToIgnore() {
+  // Ignore ephemeral values.
+  CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
+
+  // Ignore type-promoting instructions we identified during reduction
+  // detection.
+  for (auto &Reduction : *Legal->getReductionVars()) {
+    RecurrenceDescriptor &RedDes = Reduction.second;
+    SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
+    VecValuesToIgnore.insert(Casts.begin(), Casts.end());
+  }
+
+  // Ignore induction phis that are only used in either GetElementPtr or ICmp
+  // instruction to exit loop. Induction variables usually have large types and
+  // can have big impact when estimating register usage.
+  // This is for when VF > 1.
+  for (auto &Induction : *Legal->getInductionVars()) {
+    auto *PN = Induction.first;
+    auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
+
+    // Check that the PHI is only used by the induction increment (UpdateV) or
+    // by GEPs. Then check that UpdateV is only used by a compare instruction or
+    // the loop header PHI.
+    // FIXME: Need precise def-use analysis to determine if this instruction
+    // variable will be vectorized.
+    if (std::all_of(PN->user_begin(), PN->user_end(),
+                    [&](const User *U) -> bool {
+                      return U == UpdateV || isa<GetElementPtrInst>(U);
+                    }) &&
+        std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
+                    [&](const User *U) -> bool {
+                      return U == PN || isa<ICmpInst>(U);
+                    })) {
+      VecValuesToIgnore.insert(PN);
+      VecValuesToIgnore.insert(UpdateV);
+    }
+  }
+
+  // Ignore instructions that will not be vectorized.
+  // This is for when VF > 1.
+  for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
+       ++bb) {
+    for (auto &Inst : **bb) {
+      switch (Inst.getOpcode()) {
+      case Instruction::GetElementPtr: {
+        // Ignore GEP if its last operand is an induction variable so that it is
+        // a consecutive load/store and won't be vectorized as scatter/gather
+        // pattern.
+
+        GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
+        unsigned NumOperands = Gep->getNumOperands();
+        unsigned InductionOperand = getGEPInductionOperand(Gep);
+        bool GepToIgnore = true;
+
+        // Check that all of the gep indices are uniform except for the
+        // induction operand.
+        for (unsigned i = 0; i != NumOperands; ++i) {
+          if (i != InductionOperand &&
+              !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
+                                            TheLoop)) {
+            GepToIgnore = false;
+            break;
+          }
+        }
+
+        if (GepToIgnore)
+          VecValuesToIgnore.insert(&Inst);
+        break;
+      }
+      }
+    }
+  }
+}
 
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
                                              bool IfPredicateStore) {
@@ -5316,19 +5753,12 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
   // Create a new entry in the WidenMap and initialize it to Undef or Null.
   VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
 
-  Instruction *InsertPt = Builder.GetInsertPoint();
-  BasicBlock *IfBlock = Builder.GetInsertBlock();
-  BasicBlock *CondBlock = nullptr;
-
   VectorParts Cond;
-  Loop *VectorLp = nullptr;
   if (IfPredicateStore) {
     assert(Instr->getParent()->getSinglePredecessor() &&
            "Only support single predecessor blocks");
     Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
                           Instr->getParent());
-    VectorLp = LI->getLoopFor(IfBlock);
-    assert(VectorLp && "Must have a loop for this block");
   }
 
   // For each vector unroll 'part':
@@ -5343,11 +5773,6 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
             Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
       Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cond[Part],
                                ConstantInt::get(Cond[Part]->getType(), 1));
-      CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
-      LoopVectorBody.push_back(CondBlock);
-      VectorLp->addBasicBlockToLoop(CondBlock, *LI);
-      // Update Builder with newly created basic block.
-      Builder.SetInsertPoint(InsertPt);
     }
 
     Instruction *Cloned = Instr->clone();
@@ -5367,16 +5792,10 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
       if (!IsVoidRetTy)
         VecResults[Part] = Cloned;
 
-    // End if-block.
-      if (IfPredicateStore) {
-        BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
-        LoopVectorBody.push_back(NewIfBlock);
-        VectorLp->addBasicBlockToLoop(NewIfBlock, *LI);
-        Builder.SetInsertPoint(InsertPt);
-        ReplaceInstWithInst(IfBlock->getTerminator(),
-                            BranchInst::Create(CondBlock, NewIfBlock, Cmp));
-        IfBlock = NewIfBlock;
-      }
+      // End if-block.
+      if (IfPredicateStore)
+        PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned),
+                                                  Cmp));
   }
 }
 
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 7bac407e77e9..40abfc759e0a 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -61,7 +62,7 @@ static cl::opt<int>
                               "number "));
 
 static cl::opt<bool>
-ShouldVectorizeHor("slp-vectorize-hor", cl::init(false), cl::Hidden,
+ShouldVectorizeHor("slp-vectorize-hor", cl::init(true), cl::Hidden,
                    cl::desc("Attempt to vectorize horizontal reductions"));
 
 static cl::opt<bool> ShouldStartVectorizeHorAtStore(
@@ -73,6 +74,14 @@ static cl::opt<int>
 MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
     cl::desc("Attempt to vectorize for this register size in bits"));
 
+/// Limits the size of scheduling regions in a block.
+/// It avoid long compile times for _very_ large blocks where vector
+/// instructions are spread over a wide range.
+/// This limit is way higher than needed by real-world functions.
+static cl::opt<int>
+ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
+    cl::desc("Limit the size of the SLP scheduling region per block"));
+
 namespace {
 
 // FIXME: Set this via cl::opt to allow overriding.
@@ -89,6 +98,10 @@ static const unsigned AliasedCheckLimit = 10;
 // This limit is useful for very large basic blocks.
 static const unsigned MaxMemDepDistance = 160;
 
+/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
+/// regions to be handled.
+static const int MinScheduleRegionSize = 16;
+
 /// \brief Predicate for the element types that the SLP vectorizer supports.
 ///
 /// The most important thing to filter here are types which are invalid in LLVM
@@ -156,13 +169,11 @@ static unsigned getAltOpcode(unsigned Op) {
 /// of an alternate sequence which can later be merged as
 /// a ShuffleVector instruction.
 static bool canCombineAsAltInst(unsigned Op) {
-  if (Op == Instruction::FAdd || Op == Instruction::FSub ||
-      Op == Instruction::Sub || Op == Instruction::Add)
-    return true;
-  return false;
+  return Op == Instruction::FAdd || Op == Instruction::FSub ||
+         Op == Instruction::Sub || Op == Instruction::Add;
 }
 
-/// \returns ShuffleVector instruction if intructions in \p VL have
+/// \returns ShuffleVector instruction if instructions in \p VL have
 ///  alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence.
 /// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...)
 static unsigned isAltInst(ArrayRef<Value *> VL) {
@@ -242,6 +253,9 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
       case LLVMContext::MD_fpmath:
         MD = MDNode::getMostGenericFPMath(MD, IMD);
         break;
+      case LLVMContext::MD_nontemporal:
+        MD = MDNode::intersect(MD, IMD);
+        break;
       }
     }
     I->setMetadata(Kind, MD);
@@ -393,7 +407,7 @@ public:
   /// \brief Perform LICM and CSE on the newly generated gather sequences.
   void optimizeGatherSequence();
 
-  /// \returns true if it is benefitial to reverse the vector order.
+  /// \returns true if it is beneficial to reverse the vector order.
   bool shouldReorder() const {
     return NumLoadsWantToChangeOrder > NumLoadsWantToKeepOrder;
   }
@@ -441,7 +455,7 @@ private:
   /// \returns a vector from a collection of scalars in \p VL.
   Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
 
-  /// \returns whether the VectorizableTree is fully vectoriable and will
+  /// \returns whether the VectorizableTree is fully vectorizable and will
   /// be beneficial even the tree height is tiny.
   bool isFullyVectorizableTinyTree();
 
@@ -506,7 +520,7 @@ private:
   /// This POD struct describes one external user in the vectorized tree.
   struct ExternalUser {
     ExternalUser (Value *S, llvm::User *U, int L) :
-      Scalar(S), User(U), Lane(L){};
+      Scalar(S), User(U), Lane(L){}
     // Which scalar in our function.
     Value *Scalar;
     // Which user that uses the scalar.
@@ -717,6 +731,8 @@ private:
         : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
           ScheduleStart(nullptr), ScheduleEnd(nullptr),
           FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
+          ScheduleRegionSize(0),
+          ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
           // Make sure that the initial SchedulingRegionID is greater than the
           // initial SchedulingRegionID in ScheduleData (which is 0).
           SchedulingRegionID(1) {}
@@ -728,6 +744,13 @@ private:
       FirstLoadStoreInRegion = nullptr;
       LastLoadStoreInRegion = nullptr;
 
+      // Reduce the maximum schedule region size by the size of the
+      // previous scheduling run.
+      ScheduleRegionSizeLimit -= ScheduleRegionSize;
+      if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
+        ScheduleRegionSizeLimit = MinScheduleRegionSize;
+      ScheduleRegionSize = 0;
+
       // Make a new scheduling region, i.e. all existing ScheduleData is not
       // in the new region yet.
       ++SchedulingRegionID;
@@ -804,7 +827,8 @@ private:
     void cancelScheduling(ArrayRef<Value *> VL);
 
     /// Extends the scheduling region so that V is inside the region.
-    void extendSchedulingRegion(Value *V);
+    /// \returns true if the region size is within the limit.
+    bool extendSchedulingRegion(Value *V);
 
     /// Initialize the ScheduleData structures for new instructions in the
     /// scheduling region.
@@ -858,6 +882,12 @@ private:
     /// (can be null).
     ScheduleData *LastLoadStoreInRegion;
 
+    /// The current size of the scheduling region.
+    int ScheduleRegionSize;
+    
+    /// The maximum size allowed for the scheduling region.
+    int ScheduleRegionSizeLimit;
+
     /// The ID of the scheduling region. For a new vectorization iteration this
     /// is incremented which "removes" all ScheduleData from the region.
     int SchedulingRegionID;
@@ -1077,7 +1107,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
 
   if (!BS.tryScheduleBundle(VL, this)) {
     DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
-    BS.cancelScheduling(VL);
+    assert((!BS.getScheduleData(VL[0]) ||
+            !BS.getScheduleData(VL[0])->isPartOfBundle()) &&
+           "tryScheduleBundle should cancelScheduling on failure");
     newTreeEntry(VL, false);
     return;
   }
@@ -1125,6 +1157,23 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
       return;
     }
     case Instruction::Load: {
+      // Check that a vectorized load would load the same memory as a scalar
+      // load.
+      // For example we don't want vectorize loads that are smaller than 8 bit.
+      // Even though we have a packed struct {<i2, i2, i2, i2>} LLVM treats
+      // loading/storing it as an i8 struct. If we vectorize loads/stores from
+      // such a struct we read/write packed bits disagreeing with the
+      // unvectorized version.
+      const DataLayout &DL = F->getParent()->getDataLayout();
+      Type *ScalarTy = VL[0]->getType();
+
+      if (DL.getTypeSizeInBits(ScalarTy) !=
+          DL.getTypeAllocSizeInBits(ScalarTy)) {
+        BS.cancelScheduling(VL);
+        newTreeEntry(VL, false);
+        DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+        return;
+      }
       // Check if the loads are consecutive or of we need to swizzle them.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
         LoadInst *L = cast<LoadInst>(VL[i]);
@@ -1134,7 +1183,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
           DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
           return;
         }
-        const DataLayout &DL = F->getParent()->getDataLayout();
+
         if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
           if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
             ++NumLoadsWantToChangeOrder;
@@ -1692,7 +1741,8 @@ int BoUpSLP::getSpillCost() {
     }    
 
     // Now find the sequence of instructions between PrevInst and Inst.
-    BasicBlock::reverse_iterator InstIt(Inst), PrevInstIt(PrevInst);
+    BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
+        PrevInstIt(PrevInst->getIterator());
     --PrevInstIt;
     while (InstIt != PrevInstIt) {
       if (PrevInstIt == PrevInst->getParent()->rend()) {
@@ -1892,106 +1942,126 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
   }
 }
 
+// Return true if I should be commuted before adding it's left and right
+// operands to the arrays Left and Right.
+//
+// The vectorizer is trying to either have all elements one side being
+// instruction with the same opcode to enable further vectorization, or having
+// a splat to lower the vectorizing cost.
+static bool shouldReorderOperands(int i, Instruction &I,
+                                  SmallVectorImpl<Value *> &Left,
+                                  SmallVectorImpl<Value *> &Right,
+                                  bool AllSameOpcodeLeft,
+                                  bool AllSameOpcodeRight, bool SplatLeft,
+                                  bool SplatRight) {
+  Value *VLeft = I.getOperand(0);
+  Value *VRight = I.getOperand(1);
+  // If we have "SplatRight", try to see if commuting is needed to preserve it.
+  if (SplatRight) {
+    if (VRight == Right[i - 1])
+      // Preserve SplatRight
+      return false;
+    if (VLeft == Right[i - 1]) {
+      // Commuting would preserve SplatRight, but we don't want to break
+      // SplatLeft either, i.e. preserve the original order if possible.
+      // (FIXME: why do we care?)
+      if (SplatLeft && VLeft == Left[i - 1])
+        return false;
+      return true;
+    }
+  }
+  // Symmetrically handle Right side.
+  if (SplatLeft) {
+    if (VLeft == Left[i - 1])
+      // Preserve SplatLeft
+      return false;
+    if (VRight == Left[i - 1])
+      return true;
+  }
+
+  Instruction *ILeft = dyn_cast<Instruction>(VLeft);
+  Instruction *IRight = dyn_cast<Instruction>(VRight);
+
+  // If we have "AllSameOpcodeRight", try to see if the left operands preserves
+  // it and not the right, in this case we want to commute.
+  if (AllSameOpcodeRight) {
+    unsigned RightPrevOpcode = cast<Instruction>(Right[i - 1])->getOpcode();
+    if (IRight && RightPrevOpcode == IRight->getOpcode())
+      // Do not commute, a match on the right preserves AllSameOpcodeRight
+      return false;
+    if (ILeft && RightPrevOpcode == ILeft->getOpcode()) {
+      // We have a match and may want to commute, but first check if there is
+      // not also a match on the existing operands on the Left to preserve
+      // AllSameOpcodeLeft, i.e. preserve the original order if possible.
+      // (FIXME: why do we care?)
+      if (AllSameOpcodeLeft && ILeft &&
+          cast<Instruction>(Left[i - 1])->getOpcode() == ILeft->getOpcode())
+        return false;
+      return true;
+    }
+  }
+  // Symmetrically handle Left side.
+  if (AllSameOpcodeLeft) {
+    unsigned LeftPrevOpcode = cast<Instruction>(Left[i - 1])->getOpcode();
+    if (ILeft && LeftPrevOpcode == ILeft->getOpcode())
+      return false;
+    if (IRight && LeftPrevOpcode == IRight->getOpcode())
+      return true;
+  }
+  return false;
+}
+
 void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
                                              SmallVectorImpl<Value *> &Left,
                                              SmallVectorImpl<Value *> &Right) {
 
-  SmallVector<Value *, 16> OrigLeft, OrigRight;
-
-  bool AllSameOpcodeLeft = true;
-  bool AllSameOpcodeRight = true;
-  for (unsigned i = 0, e = VL.size(); i != e; ++i) {
-    Instruction *I = cast<Instruction>(VL[i]);
-    Value *VLeft = I->getOperand(0);
-    Value *VRight = I->getOperand(1);
-
-    OrigLeft.push_back(VLeft);
-    OrigRight.push_back(VRight);
-
-    Instruction *ILeft = dyn_cast<Instruction>(VLeft);
-    Instruction *IRight = dyn_cast<Instruction>(VRight);
-
-    // Check whether all operands on one side have the same opcode. In this case
-    // we want to preserve the original order and not make things worse by
-    // reordering.
-    if (i && AllSameOpcodeLeft && ILeft) {
-      if (Instruction *PLeft = dyn_cast<Instruction>(OrigLeft[i - 1])) {
-        if (PLeft->getOpcode() != ILeft->getOpcode())
-          AllSameOpcodeLeft = false;
-      } else
-        AllSameOpcodeLeft = false;
-    }
-    if (i && AllSameOpcodeRight && IRight) {
-      if (Instruction *PRight = dyn_cast<Instruction>(OrigRight[i - 1])) {
-        if (PRight->getOpcode() != IRight->getOpcode())
-          AllSameOpcodeRight = false;
-      } else
-        AllSameOpcodeRight = false;
-    }
-
-    // Sort two opcodes. In the code below we try to preserve the ability to use
-    // broadcast of values instead of individual inserts.
-    // vl1 = load
-    // vl2 = phi
-    // vr1 = load
-    // vr2 = vr2
-    //    = vl1 x vr1
-    //    = vl2 x vr2
-    // If we just sorted according to opcode we would leave the first line in
-    // tact but we would swap vl2 with vr2 because opcode(phi) > opcode(load).
-    //    = vl1 x vr1
-    //    = vr2 x vl2
-    // Because vr2 and vr1 are from the same load we loose the opportunity of a
-    // broadcast for the packed right side in the backend: we have [vr1, vl2]
-    // instead of [vr1, vr2=vr1].
-    if (ILeft && IRight) {
-      if (!i && ILeft->getOpcode() > IRight->getOpcode()) {
-        Left.push_back(IRight);
-        Right.push_back(ILeft);
-      } else if (i && ILeft->getOpcode() > IRight->getOpcode() &&
-                 Right[i - 1] != IRight) {
-        // Try not to destroy a broad cast for no apparent benefit.
-        Left.push_back(IRight);
-        Right.push_back(ILeft);
-      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
-                 Right[i - 1] == ILeft) {
-        // Try preserve broadcasts.
-        Left.push_back(IRight);
-        Right.push_back(ILeft);
-      } else if (i && ILeft->getOpcode() == IRight->getOpcode() &&
-                 Left[i - 1] == IRight) {
-        // Try preserve broadcasts.
-        Left.push_back(IRight);
-        Right.push_back(ILeft);
-      } else {
-        Left.push_back(ILeft);
-        Right.push_back(IRight);
-      }
-      continue;
-    }
-    // One opcode, put the instruction on the right.
-    if (ILeft) {
-      Left.push_back(VRight);
-      Right.push_back(ILeft);
-      continue;
-    }
+  if (VL.size()) {
+    // Peel the first iteration out of the loop since there's nothing
+    // interesting to do anyway and it simplifies the checks in the loop.
+    auto VLeft = cast<Instruction>(VL[0])->getOperand(0);
+    auto VRight = cast<Instruction>(VL[0])->getOperand(1);
+    if (!isa<Instruction>(VRight) && isa<Instruction>(VLeft))
+      // Favor having instruction to the right. FIXME: why?
+      std::swap(VLeft, VRight);
     Left.push_back(VLeft);
     Right.push_back(VRight);
   }
 
-  bool LeftBroadcast = isSplat(Left);
-  bool RightBroadcast = isSplat(Right);
+  // Keep track if we have instructions with all the same opcode on one side.
+  bool AllSameOpcodeLeft = isa<Instruction>(Left[0]);
+  bool AllSameOpcodeRight = isa<Instruction>(Right[0]);
+  // Keep track if we have one side with all the same value (broadcast).
+  bool SplatLeft = true;
+  bool SplatRight = true;
 
-  // If operands end up being broadcast return this operand order.
-  if (LeftBroadcast || RightBroadcast)
-    return;
-
-  // Don't reorder if the operands where good to begin.
-  if (AllSameOpcodeRight || AllSameOpcodeLeft) {
-    Left = OrigLeft;
-    Right = OrigRight;
+  for (unsigned i = 1, e = VL.size(); i != e; ++i) {
+    Instruction *I = cast<Instruction>(VL[i]);
+    assert(I->isCommutative() && "Can only process commutative instruction");
+    // Commute to favor either a splat or maximizing having the same opcodes on
+    // one side.
+    if (shouldReorderOperands(i, *I, Left, Right, AllSameOpcodeLeft,
+                              AllSameOpcodeRight, SplatLeft, SplatRight)) {
+      Left.push_back(I->getOperand(1));
+      Right.push_back(I->getOperand(0));
+    } else {
+      Left.push_back(I->getOperand(0));
+      Right.push_back(I->getOperand(1));
+    }
+    // Update Splat* and AllSameOpcode* after the insertion.
+    SplatRight = SplatRight && (Right[i - 1] == Right[i]);
+    SplatLeft = SplatLeft && (Left[i - 1] == Left[i]);
+    AllSameOpcodeLeft = AllSameOpcodeLeft && isa<Instruction>(Left[i]) &&
+                        (cast<Instruction>(Left[i - 1])->getOpcode() ==
+                         cast<Instruction>(Left[i])->getOpcode());
+    AllSameOpcodeRight = AllSameOpcodeRight && isa<Instruction>(Right[i]) &&
+                         (cast<Instruction>(Right[i - 1])->getOpcode() ==
+                          cast<Instruction>(Right[i])->getOpcode());
   }
 
+  // If one operand end up being broadcast, return this operand order.
+  if (SplatRight || SplatLeft)
+    return;
+
   const DataLayout &DL = F->getParent()->getDataLayout();
 
   // Finally check if we can get longer vectorizable chain by reordering
@@ -2032,7 +2102,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
 
 void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
   Instruction *VL0 = cast<Instruction>(VL[0]);
-  BasicBlock::iterator NextInst = VL0;
+  BasicBlock::iterator NextInst(VL0);
   ++NextInst;
   Builder.SetInsertPoint(VL0->getParent(), NextInst);
   Builder.SetCurrentDebugLocation(VL0->getDebugLoc());
@@ -2489,7 +2559,7 @@ Value *BoUpSLP::vectorizeTree() {
     scheduleBlock(BSIter.second.get());
   }
 
-  Builder.SetInsertPoint(F->getEntryBlock().begin());
+  Builder.SetInsertPoint(&F->getEntryBlock().front());
   vectorizeTree(&VectorizableTree[0]);
 
   DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
@@ -2534,7 +2604,7 @@ Value *BoUpSLP::vectorizeTree() {
         User->replaceUsesOfWith(Scalar, Ex);
      }
     } else {
-      Builder.SetInsertPoint(F->getEntryBlock().begin());
+      Builder.SetInsertPoint(&F->getEntryBlock().front());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
       CSEBlocks.insert(&F->getEntryBlock());
       User->replaceUsesOfWith(Scalar, Ex);
@@ -2643,7 +2713,7 @@ void BoUpSLP::optimizeGatherSequence() {
     BasicBlock *BB = (*I)->getBlock();
     // For all instructions in blocks containing gather sequences:
     for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
-      Instruction *In = it++;
+      Instruction *In = &*it++;
       if (!isa<InsertElementInst>(In) && !isa<ExtractElementInst>(In))
         continue;
 
@@ -2683,8 +2753,15 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
   ScheduleData *Bundle = nullptr;
   bool ReSchedule = false;
   DEBUG(dbgs() << "SLP:  bundle: " << *VL[0] << "\n");
+
+  // Make sure that the scheduling region contains all
+  // instructions of the bundle.
+  for (Value *V : VL) {
+    if (!extendSchedulingRegion(V))
+      return false;
+  }
+
   for (Value *V : VL) {
-    extendSchedulingRegion(V);
     ScheduleData *BundleMember = getScheduleData(V);
     assert(BundleMember &&
            "no ScheduleData for bundle member (maybe not in same basic block)");
@@ -2745,7 +2822,11 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
       schedule(pickedSD, ReadyInsts);
     }
   }
-  return Bundle->isReady();
+  if (!Bundle->isReady()) {
+    cancelScheduling(VL);
+    return false;
+  }
+  return true;
 }
 
 void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
@@ -2774,9 +2855,9 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
   }
 }
 
-void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
+bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
   if (getScheduleData(V))
-    return;
+    return true;
   Instruction *I = dyn_cast<Instruction>(V);
   assert(I && "bundle member must be an instruction");
   assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
@@ -2787,21 +2868,26 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
     ScheduleEnd = I->getNextNode();
     assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
     DEBUG(dbgs() << "SLP:  initialize schedule region to " << *I << "\n");
-    return;
+    return true;
   }
   // Search up and down at the same time, because we don't know if the new
   // instruction is above or below the existing scheduling region.
-  BasicBlock::reverse_iterator UpIter(ScheduleStart);
+  BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator());
   BasicBlock::reverse_iterator UpperEnd = BB->rend();
   BasicBlock::iterator DownIter(ScheduleEnd);
   BasicBlock::iterator LowerEnd = BB->end();
   for (;;) {
+    if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
+      DEBUG(dbgs() << "SLP:  exceeded schedule region size limit\n");
+      return false;
+    }
+
     if (UpIter != UpperEnd) {
       if (&*UpIter == I) {
         initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
         ScheduleStart = I;
         DEBUG(dbgs() << "SLP:  extend schedule region start to " << *I << "\n");
-        return;
+        return true;
       }
       UpIter++;
     }
@@ -2812,13 +2898,14 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
         ScheduleEnd = I->getNextNode();
         assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
         DEBUG(dbgs() << "SLP:  extend schedule region end to " << *I << "\n");
-        return;
+        return true;
       }
       DownIter++;
     }
     assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
            "instruction not found in block");
   }
+  return true;
 }
 
 void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
@@ -2898,8 +2985,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
             }
           } else {
             // I'm not sure if this can ever happen. But we need to be safe.
-            // This lets the instruction/bundle never be scheduled and eventally
-            // disable vectorization.
+            // This lets the instruction/bundle never be scheduled and
+            // eventually disable vectorization.
             BundleMember->Dependencies++;
             BundleMember->incrementUnscheduledDeps(1);
           }
@@ -3005,7 +3092,7 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
   };
   std::set<ScheduleData *, ScheduleDataCompare> ReadyInsts;
 
-  // Ensure that all depencency data is updated and fill the ready-list with
+  // Ensure that all dependency data is updated and fill the ready-list with
   // initial instructions.
   int Idx = 0;
   int NumToSchedule = 0;
@@ -3037,7 +3124,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
       Instruction *pickedInst = BundleMember->Inst;
       if (LastScheduledInst->getNextNode() != pickedInst) {
         BS->BB->getInstList().remove(pickedInst);
-        BS->BB->getInstList().insert(LastScheduledInst, pickedInst);
+        BS->BB->getInstList().insert(LastScheduledInst->getIterator(),
+                                     pickedInst);
       }
       LastScheduledInst = pickedInst;
       BundleMember = BundleMember->NextInBundle;
@@ -3076,11 +3164,11 @@ struct SLPVectorizer : public FunctionPass {
     if (skipOptnoneFunction(F))
       return false;
 
-    SE = &getAnalysis<ScalarEvolution>();
+    SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
     TLI = TLIP ? &TLIP->getTLI() : nullptr;
-    AA = &getAnalysis<AliasAnalysis>();
+    AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
     LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
@@ -3141,13 +3229,15 @@ struct SLPVectorizer : public FunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     FunctionPass::getAnalysisUsage(AU);
     AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<ScalarEvolution>();
-    AU.addRequired<AliasAnalysis>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
     AU.setPreservesCFG();
   }
 
@@ -3262,15 +3352,26 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
 
   // Do a quadratic search on all of the given stores and find
   // all of the pairs of stores that follow each other.
+  SmallVector<unsigned, 16> IndexQueue;
   for (unsigned i = 0, e = Stores.size(); i < e; ++i) {
-    for (unsigned j = 0; j < e; ++j) {
-      if (i == j)
-        continue;
-      const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
-      if (R.isConsecutiveAccess(Stores[i], Stores[j], DL)) {
-        Tails.insert(Stores[j]);
+    const DataLayout &DL = Stores[i]->getModule()->getDataLayout();
+    IndexQueue.clear();
+    // If a store has multiple consecutive store candidates, search Stores
+    // array according to the sequence: from i+1 to e, then from i-1 to 0.
+    // This is because usually pairing with immediate succeeding or preceding
+    // candidate create the best chance to find slp vectorization opportunity.
+    unsigned j = 0;
+    for (j = i + 1; j < e; ++j)
+      IndexQueue.push_back(j);
+    for (j = i; j > 0; --j)
+      IndexQueue.push_back(j - 1);
+
+    for (auto &k : IndexQueue) {
+      if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
+        Tails.insert(Stores[k]);
         Heads.insert(Stores[i]);
-        ConsecutiveChain[Stores[i]] = Stores[j];
+        ConsecutiveChain[Stores[i]] = Stores[k];
+        break;
       }
     }
   }
@@ -3430,7 +3531,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
         unsigned VecIdx = 0;
         for (auto &V : BuildVectorSlice) {
           IRBuilder<true, NoFolder> Builder(
-              ++BasicBlock::iterator(InsertAfter));
+              InsertAfter->getParent(), ++BasicBlock::iterator(InsertAfter));
           InsertElementInst *IE = cast<InsertElementInst>(V);
           Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
               VectorizedRoot, Builder.getInt32(VecIdx++)));
@@ -3554,16 +3655,17 @@ class HorizontalReduction {
   unsigned ReductionOpcode;
   /// The opcode of the values we perform a reduction on.
   unsigned ReducedValueOpcode;
-  /// The width of one full horizontal reduction operation.
-  unsigned ReduxWidth;
   /// Should we model this reduction as a pairwise reduction tree or a tree that
   /// splits the vector in halves and adds those halves.
   bool IsPairwiseReduction;
 
 public:
+  /// The width of one full horizontal reduction operation.
+  unsigned ReduxWidth;
+
   HorizontalReduction()
     : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
-    ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
+    ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0) {}
 
   /// \brief Try to find a reduction tree.
   bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
@@ -3609,11 +3711,11 @@ public:
       return false;
 
     // Post order traverse the reduction tree starting at B. We only handle true
-    // trees containing only binary operators.
-    SmallVector<std::pair<BinaryOperator *, unsigned>, 32> Stack;
+    // trees containing only binary operators or selects.
+    SmallVector<std::pair<Instruction *, unsigned>, 32> Stack;
     Stack.push_back(std::make_pair(B, 0));
     while (!Stack.empty()) {
-      BinaryOperator *TreeN = Stack.back().first;
+      Instruction *TreeN = Stack.back().first;
       unsigned EdgeToVist = Stack.back().second++;
       bool IsReducedValue = TreeN->getOpcode() != ReductionOpcode;
 
@@ -3649,9 +3751,10 @@ public:
 
       // Visit left or right.
       Value *NextV = TreeN->getOperand(EdgeToVist);
-      BinaryOperator *Next = dyn_cast<BinaryOperator>(NextV);
-      if (Next)
-        Stack.push_back(std::make_pair(Next, 0));
+      // We currently only allow BinaryOperator's and SelectInst's as reduction
+      // values in our tree.
+      if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV))
+        Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
       else if (NextV != Phi)
         return false;
     }
@@ -3719,9 +3822,12 @@ public:
     return VectorizedTree != nullptr;
   }
 
-private:
+  unsigned numReductionValues() const {
+    return ReducedVals.size();
+  }
 
-  /// \brief Calcuate the cost of a reduction.
+private:
+  /// \brief Calculate the cost of a reduction.
   int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal) {
     Type *ScalarTy = FirstReducedVal->getType();
     Type *VecTy = VectorType::get(ScalarTy, ReduxWidth);
@@ -3827,6 +3933,82 @@ static bool PhiTypeSorterFunc(Value *V, Value *V2) {
   return V->getType() < V2->getType();
 }
 
+/// \brief Try and get a reduction value from a phi node.
+///
+/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
+/// if they come from either \p ParentBB or a containing loop latch.
+///
+/// \returns A candidate reduction value if possible, or \code nullptr \endcode
+/// if not possible.
+static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
+                                BasicBlock *ParentBB, LoopInfo *LI) {
+  // There are situations where the reduction value is not dominated by the
+  // reduction phi. Vectorizing such cases has been reported to cause
+  // miscompiles. See PR25787.
+  auto DominatedReduxValue = [&](Value *R) {
+    return (
+        dyn_cast<Instruction>(R) &&
+        DT->dominates(P->getParent(), dyn_cast<Instruction>(R)->getParent()));
+  };
+
+  Value *Rdx = nullptr;
+
+  // Return the incoming value if it comes from the same BB as the phi node.
+  if (P->getIncomingBlock(0) == ParentBB) {
+    Rdx = P->getIncomingValue(0);
+  } else if (P->getIncomingBlock(1) == ParentBB) {
+    Rdx = P->getIncomingValue(1);
+  }
+
+  if (Rdx && DominatedReduxValue(Rdx))
+    return Rdx;
+
+  // Otherwise, check whether we have a loop latch to look at.
+  Loop *BBL = LI->getLoopFor(ParentBB);
+  if (!BBL)
+    return nullptr;
+  BasicBlock *BBLatch = BBL->getLoopLatch();
+  if (!BBLatch)
+    return nullptr;
+
+  // There is a loop latch, return the incoming value if it comes from
+  // that. This reduction pattern occassionaly turns up.
+  if (P->getIncomingBlock(0) == BBLatch) {
+    Rdx = P->getIncomingValue(0);
+  } else if (P->getIncomingBlock(1) == BBLatch) {
+    Rdx = P->getIncomingValue(1);
+  }
+
+  if (Rdx && DominatedReduxValue(Rdx))
+    return Rdx;
+
+  return nullptr;
+}
+
+/// \brief Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding
+/// the phi node P with reduction operators BI, then check if it
+/// can be done.
+/// \returns true if a horizontal reduction was matched and reduced.
+/// \returns false if a horizontal reduction was not matched.
+static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
+                                        BoUpSLP &R, TargetTransformInfo *TTI) {
+  if (!ShouldVectorizeHor)
+    return false;
+
+  HorizontalReduction HorRdx;
+  if (!HorRdx.matchAssociativeReduction(P, BI))
+    return false;
+
+  // If there is a sufficient number of reduction values, reduce
+  // to a nearby power-of-2. Can safely generate oversized
+  // vectors and rely on the backend to split them to legal sizes.
+  HorRdx.ReduxWidth =
+    std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+
+  return HorRdx.tryToReduce(R, TTI);
+}
+
 bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
   bool Changed = false;
   SmallVector<Value *, 4> Incoming;
@@ -3883,7 +4065,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
 
   for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; it++) {
     // We may go through BB multiple times so skip the one we have checked.
-    if (!VisitedInstrs.insert(it).second)
+    if (!VisitedInstrs.insert(&*it).second)
       continue;
 
     if (isa<DbgInfoIntrinsic>(it))
@@ -3894,20 +4076,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       // Check that the PHI is a reduction PHI.
       if (P->getNumIncomingValues() != 2)
         return Changed;
-      Value *Rdx =
-          (P->getIncomingBlock(0) == BB
-               ? (P->getIncomingValue(0))
-               : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
-                                               : nullptr));
+
+      Value *Rdx = getReductionValue(DT, P, BB, LI);
+
       // Check if this is a Binary Operator.
       BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
       if (!BI)
         continue;
 
       // Try to match and vectorize a horizontal reduction.
-      HorizontalReduction HorRdx;
-      if (ShouldVectorizeHor && HorRdx.matchAssociativeReduction(P, BI) &&
-          HorRdx.tryToReduce(R, TTI)) {
+      if (canMatchHorizontalReduction(P, BI, R, TTI)) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
@@ -3930,15 +4108,12 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       continue;
     }
 
-    // Try to vectorize horizontal reductions feeding into a store.
     if (ShouldStartVectorizeHorAtStore)
       if (StoreInst *SI = dyn_cast<StoreInst>(it))
         if (BinaryOperator *BinOp =
                 dyn_cast<BinaryOperator>(SI->getValueOperand())) {
-          HorizontalReduction HorRdx;
-          if (((HorRdx.matchAssociativeReduction(nullptr, BinOp) &&
-                HorRdx.tryToReduce(R, TTI)) ||
-               tryToVectorize(BinOp, R))) {
+          if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI) ||
+              tryToVectorize(BinOp, R)) {
             Changed = true;
             it = BB->begin();
             e = BB->end();
@@ -4039,10 +4214,10 @@ bool SLPVectorizer::vectorizeStoreChains(BoUpSLP &R) {
 char SLPVectorizer::ID = 0;
 static const char lv_name[] = "SLP Vectorizer";
 INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
 
diff --git a/llvm.spec.in b/llvm.spec.in
index 0a3f6e807849..d4ef08ae5ad3 100644
--- a/llvm.spec.in
+++ b/llvm.spec.in
@@ -65,3 +65,4 @@ rm -rf %{buildroot}
 * Mon Feb 09 2003 Brian R. Gaeke
 - Initial working version of RPM spec file.
 
+
diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
index d3b2fc37af79..4839dd396706 100644
--- a/projects/CMakeLists.txt
+++ b/projects/CMakeLists.txt
@@ -8,7 +8,8 @@ foreach(entry ${entries})
        (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/dragonegg) AND
        (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxx) AND
        (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libcxxabi) AND
-       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind))
+       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND
+       (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite))
       add_subdirectory(${entry})
     endif()
   endif()
diff --git a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
index 069bd0bcfd8f..b59ee42dec3a 100644
--- a/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
+++ b/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
         %struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] }
 @uhci_pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
 
-@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids     
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @uhci_pci_ids     
         ; <[1 x %struct.pci_device_id]*> [#uses=0]
 
 define i32 @uhci_suspend(%struct.usb_hcd* %hcd) {
diff --git a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
index 20be13d153bb..ba9740028180 100644
--- a/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
+++ b/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
 	%struct.usb_hcd = type { %struct.usb_bus, [0 x i64] }
 @pci_ids = constant [1 x %struct.pci_device_id] zeroinitializer
 
-@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids		; <[1 x %struct.pci_device_id]*> [#uses=0]
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id], [1 x %struct.pci_device_id]* @pci_ids		; <[1 x %struct.pci_device_id]*> [#uses=0]
 
 define i32 @ehci_pci_setup(%struct.usb_hcd* %hcd) {
 entry:
diff --git a/test/Analysis/BasicAA/bug.23540.ll b/test/Analysis/BasicAA/bug.23540.ll
new file mode 100644
index 000000000000..f693bcf73cd6
--- /dev/null
+++ b/test/Analysis/BasicAA/bug.23540.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@c = external global i32
+
+; CHECK-LABEL: f
+; CHECK: PartialAlias: i32* %arrayidx, i32* %arrayidx6
+define void @f() {
+  %idxprom = zext i32 undef to i64
+  %add4 = add i32 0, 1
+  %idxprom5 = zext i32 %add4 to i64
+  %arrayidx6 = getelementptr inbounds i32, i32* @c, i64 %idxprom5
+  %arrayidx = getelementptr inbounds i32, i32* @c, i64 %idxprom
+  ret void
+}
+
diff --git a/test/Analysis/BasicAA/bug.23626.ll b/test/Analysis/BasicAA/bug.23626.ll
new file mode 100644
index 000000000000..6a1478c65cef
--- /dev/null
+++ b/test/Analysis/BasicAA/bug.23626.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-darwin13.4.0"
+
+; CHECK-LABEL: compute1
+; CHECK: PartialAlias:	i32* %arrayidx8, i32* %out
+; CHECK: PartialAlias:	i32* %arrayidx11, i32* %out
+; CHECK: PartialAlias:	i32* %arrayidx11, i32* %arrayidx8
+; CHECK: PartialAlias:	i32* %arrayidx14, i32* %out
+; CHECK: PartialAlias:	i32* %arrayidx14, i32* %arrayidx8
+; CHECK: PartialAlias:	i32* %arrayidx11, i32* %arrayidx14
+define void @compute1(i32 %num.0.lcssa, i32* %out) {
+  %idxprom = zext i32 %num.0.lcssa to i64
+  %arrayidx8 = getelementptr inbounds i32, i32* %out, i64 %idxprom
+  %add9 = or i32 %num.0.lcssa, 1
+  %idxprom10 = zext i32 %add9 to i64
+  %arrayidx11 = getelementptr inbounds i32, i32* %out, i64 %idxprom10
+  %add12 = or i32 %num.0.lcssa, 2
+  %idxprom13 = zext i32 %add12 to i64
+  %arrayidx14 = getelementptr inbounds i32, i32* %out, i64 %idxprom13
+  ret void
+}
+
+; CHECK-LABEL: compute2
+; CHECK: PartialAlias: i32* %arrayidx11, i32* %out.addr
+define void @compute2(i32 %num, i32* %out.addr) {
+  %add9 = add i32 %num, 1
+  %idxprom10 = zext i32 %add9 to i64
+  %arrayidx11 = getelementptr inbounds i32, i32* %out.addr, i64 %idxprom10
+  ret void
+}
diff --git a/test/Analysis/BasicAA/cs-cs.ll b/test/Analysis/BasicAA/cs-cs.ll
index 78670b61ca1c..dc298f1668be 100644
--- a/test/Analysis/BasicAA/cs-cs.ll
+++ b/test/Analysis/BasicAA/cs-cs.ll
@@ -2,8 +2,8 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "arm-apple-ios"
 
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
@@ -13,27 +13,27 @@ declare void @a_readonly_func(i8 *) noinline nounwind readonly
 define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
 entry:
   %q = getelementptr i8, i8* %p, i64 16
-  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
-  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
   %c = add <8 x i16> %a, %b
   ret <8 x i16> %c
 
 ; CHECK-LABEL: Function: test1:
 
 ; CHECK: NoAlias:      i8* %p, i8* %q
-; CHECK: Just Ref:  Ptr: i8* %p        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:  Ptr: i8* %q        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:  Ptr: i8* %p        <->  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: Both ModRef:  Ptr: i8* %q     <->  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: Just Ref:  Ptr: i8* %p        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:  Ptr: i8* %q        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1
-; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) #1 <->   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref:  Ptr: i8* %p        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:  Ptr: i8* %q        <->  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:  Ptr: i8* %p        <->  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Both ModRef:  Ptr: i8* %q     <->  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: Just Ref:  Ptr: i8* %p        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:  Ptr: i8* %q        <->  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <->   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK: NoModRef:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16) <->   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <->   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4
+; CHECK: NoModRef:   %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) #4 <->   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
 }
 
 define void @test2(i8* %P, i8* %Q) nounwind ssp {
@@ -233,4 +233,9 @@ define void @test6(i8* %P) nounwind ssp {
 ; CHECK: Just Ref:   call void @a_readonly_func(i8* %P) <->   call void @llvm.memset.p0i8.i64(i8* %P, i8 -51, i64 32, i32 8, i1 false)
 }
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind readonly argmemonly }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { noinline nounwind readonly }
+attributes #3 = { nounwind ssp }
+attributes #4 = { nounwind }
+
diff --git a/test/Analysis/BasicAA/full-store-partial-alias.ll b/test/Analysis/BasicAA/full-store-partial-alias.ll
index 341f6ba23b3a..20f6f7ec4ad0 100644
--- a/test/Analysis/BasicAA/full-store-partial-alias.ll
+++ b/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -tbaa -basicaa -gvn < %s | FileCheck -check-prefix=BASICAA %s
-; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
+; RUN: opt -S -tbaa -gvn < %s | FileCheck -check-prefix=BASICAA %s
+; RUN: opt -S -tbaa -disable-basicaa -gvn < %s | FileCheck %s
 ; rdar://8875631, rdar://8875069
 
 ; BasicAA should notice that the store stores to the entire %u object,
diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll
index 8c05587ce233..526a039ef7ac 100644
--- a/test/Analysis/BasicAA/intrinsics.ll
+++ b/test/Analysis/BasicAA/intrinsics.ll
@@ -7,14 +7,14 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR:#[0-9]+]]
-; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR:#[0-9]+]]
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) {
 entry:
-  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
-  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
   %c = add <8 x i16> %a, %b
   ret <8 x i16> %c
 }
@@ -22,21 +22,22 @@ entry:
 ; CHECK:      define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT:   %q = getelementptr i8, i8* %p, i64 16
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[ATTR]]
-; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[ATTR]]
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test1(i8* %p, <8 x i16> %y) {
 entry:
   %q = getelementptr i8, i8* %p, i64 16
-  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
-  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
-  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
+  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind
   %c = add <8 x i16> %a, %b
   ret <8 x i16> %c
 }
 
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
 
-; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes #0 = { argmemonly nounwind readonly }
+; CHECK: attributes #1 = { argmemonly nounwind }
 ; CHECK: attributes [[ATTR]] = { nounwind }
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 3084f809c370..e42793936c3d 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -190,6 +190,43 @@ define i32 @test10(i32* %P, i32* %P2) {
   ; CHECK: ret i32 %Diff
 }
 
+; CHECK-LABEL: @test11(
+define i32 @test11(i32* %P, i32* %P2) {
+  %V1 = load i32, i32* %P
+  call i32 @func_argmemonly(i32* readonly %P2)
+  %V2 = load i32, i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK-NOT: load
+  ; CHECK: ret i32 0
+}
+
+declare i32 @func_argmemonly_two_args(i32* %P, i32* %P2) argmemonly
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i32* %P, i32* %P2, i32* %P3) {
+  %V1 = load i32, i32* %P
+  call i32 @func_argmemonly_two_args(i32* readonly %P2, i32* %P3)
+  %V2 = load i32, i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: load
+  ; CHECK: load
+  ; CHECK: sub
+  ; CHECK: ret i32 %Diff
+}
+
+; CHECK-LABEL: @test13(
+define i32 @test13(i32* %P, i32* %P2) {
+  %V1 = load i32, i32* %P
+  call i32 @func_argmemonly(i32* readnone %P2)
+  %V2 = load i32, i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK-NOT: load
+  ; CHECK: ret i32 0
+}
+
 declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
diff --git a/test/Analysis/BasicAA/noalias-bugs.ll b/test/Analysis/BasicAA/noalias-bugs.ll
index acb230c45de4..71b3c443f542 100644
--- a/test/Analysis/BasicAA/noalias-bugs.ll
+++ b/test/Analysis/BasicAA/noalias-bugs.ll
@@ -24,7 +24,7 @@ define i64 @testcase(%nested * noalias %p1, %nested * noalias %p2,
 
 ; CHECK: store i64 2
 ; CHECK: load
-; CHECK; store i64 1
+; CHECK: store i64 1
 
   store i64 2, i64* %ptr.64, align 8
   %r = load i64, i64* %either_ptr.64, align 8
diff --git a/test/Analysis/BasicAA/phi-aa.ll b/test/Analysis/BasicAA/phi-aa.ll
index a72778277bb2..3944e9e43566 100644
--- a/test/Analysis/BasicAA/phi-aa.ll
+++ b/test/Analysis/BasicAA/phi-aa.ll
@@ -39,6 +39,7 @@ return:
 
 ; CHECK-LABEL: pr18068
 ; CHECK: MayAlias: i32* %0, i32* %arrayidx5
+; CHECK: NoAlias: i32* %arrayidx13, i32* %arrayidx5
 
 define i32 @pr18068(i32* %jj7, i32* %j) {
 entry:
diff --git a/test/Analysis/BasicAA/phi-loop.ll b/test/Analysis/BasicAA/phi-loop.ll
new file mode 100644
index 000000000000..6337bfbc1cfb
--- /dev/null
+++ b/test/Analysis/BasicAA/phi-loop.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -basicaa -basicaa-recphi=1 -gvn -S | FileCheck %s
+;
+; Check that section->word_ofs doesn't get reloaded in every iteration of the
+; for loop.
+;
+; Code:
+;
+; typedef struct {
+;   unsigned num_words;
+;   unsigned word_ofs;
+;   const unsigned *data;
+; } section_t;
+; 
+; 
+; void test2(const section_t * restrict section, unsigned * restrict dst) {;
+;   while (section->data != NULL) {
+;     const unsigned *src = section->data;
+;     for (unsigned i=0; i < section->num_words; ++i) {
+;       dst[section->word_ofs + i] = src[i];
+;     }   
+; 
+;     ++section;
+;   }
+; }
+; 
+
+; CHECK-LABEL: for.body:
+; CHECK-NOT: load i32, i32* %word_ofs
+
+%struct.section_t = type { i32, i32, i32* }
+
+define void @test2(%struct.section_t* noalias nocapture readonly %section, i32* noalias nocapture %dst) {
+entry:
+  %data13 = getelementptr inbounds %struct.section_t, %struct.section_t* %section, i32 0, i32 2
+  %0 = load i32*, i32** %data13, align 4
+  %cmp14 = icmp eq i32* %0, null
+  br i1 %cmp14, label %while.end, label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %entry, %for.end
+  %1 = phi i32* [ %6, %for.end ], [ %0, %entry ]
+  %section.addr.015 = phi %struct.section_t* [ %incdec.ptr, %for.end ], [ %section, %entry ]
+  %num_words = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 0
+  %2 = load i32, i32* %num_words, align 4
+  %cmp211 = icmp eq i32 %2, 0
+  br i1 %cmp211, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %for.cond.preheader
+  %word_ofs = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 0, i32 1
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %arrayidx.phi = phi i32* [ %1, %for.body.lr.ph ], [ %arrayidx.inc, %for.body ]
+  %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %3 = load i32, i32* %arrayidx.phi, align 4
+  %4 = load i32, i32* %word_ofs, align 4
+  %add = add i32 %4, %i.012
+  %arrayidx3 = getelementptr inbounds i32, i32* %dst, i32 %add
+  store i32 %3, i32* %arrayidx3, align 4
+  %inc = add i32 %i.012, 1
+  %5 = load i32, i32* %num_words, align 4
+  %cmp2 = icmp ult i32 %inc, %5
+  %arrayidx.inc = getelementptr i32, i32* %arrayidx.phi, i32 1
+  br i1 %cmp2, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %for.cond.preheader
+  %incdec.ptr = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1
+  %data = getelementptr inbounds %struct.section_t, %struct.section_t* %section.addr.015, i32 1, i32 2
+  %6 = load i32*, i32** %data, align 4
+  %cmp = icmp eq i32* %6, null
+  br i1 %cmp, label %while.end, label %for.cond.preheader
+
+while.end:                                        ; preds = %for.end, %entry
+  ret void
+}
+
diff --git a/test/Analysis/BasicAA/q.bad.ll b/test/Analysis/BasicAA/q.bad.ll
new file mode 100644
index 000000000000..f2de6a76c5e0
--- /dev/null
+++ b/test/Analysis/BasicAA/q.bad.ll
@@ -0,0 +1,180 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+; CHECK-LABEL: test_zext_sext_amounts255
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_zext_sext_amounts255(i8* %mem) {
+  %sext.1 = sext i8 255 to i16
+  %sext.zext.1 = zext i16 %sext.1 to i64
+  %sext.2 = sext i8 255 to i32
+  %sext.zext.2 = zext i32 %sext.2 to i64
+  %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1
+  %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2
+  ret void
+}
+
+; CHECK-LABEL: test_zext_sext_amounts
+; CHECK: PartialAlias: i8* %a, i8* %b
+; %a and %b only PartialAlias as, although they're both zext(sext(%num)) they'll extend the sign by a different
+; number of bits before zext-ing the remainder.
+define void @test_zext_sext_amounts(i8* %mem, i8 %num) {
+  %sext.1 = sext i8 %num to i16
+  %sext.zext.1 = zext i16 %sext.1 to i64
+  %sext.2 = sext i8 %num to i32
+  %sext.zext.2 = zext i32 %sext.2 to i64
+  %a = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.1
+  %b = getelementptr inbounds i8, i8* %mem, i64 %sext.zext.2
+  ret void
+}
+
+; CHECK-LABEL: based_on_pr18068
+; CHECK: NoAlias: i8* %a, i8* %b
+; CHECK: NoAlias: i8* %a, i8* %c
+define void @based_on_pr18068(i32 %loaded, i8* %mem) {
+  %loaded.64 = zext i32 %loaded to i64
+  %add1 = add i32 %loaded, -1 ; unsigned wraps unless %loaded == 0
+  %add1.64 = zext i32 %add1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias
+  %sub1 = sub i32 %loaded, 1 ; unsigned wraps iff %loaded == 0
+  %sub1.64 = zext i32 %sub1 to i64 ; is zext(%loaded) always != zext(%loaded - 1)? Yes -> NoAlias
+  %a = getelementptr inbounds i8, i8* %mem, i64 %loaded.64
+  %b = getelementptr inbounds i8, i8* %mem, i64 %add1.64
+  %c = getelementptr inbounds i8, i8* %mem, i64 %sub1.64
+  ret void
+}
+
+; CHECK-LABEL: test_path_dependence
+; CHECK: PartialAlias: i8* %a, i8* %b
+; CHECK: MustAlias: i8* %a, i8* %c
+; CHECK: PartialAlias: i8* %a, i8* %d
+define void @test_path_dependence(i32 %p, i8* %mem) {
+  %p.minus1 = add i32 %p, -1 ; this will always unsigned-wrap, unless %p == 0
+  %p.minus1.64 = zext i32 %p.minus1 to i64
+  %p.64.again = add i64 %p.minus1.64, 1 ; either %p (if we wrapped) or 4294967296 (if we didn't)
+
+  %p.nsw.nuw.minus1 = sub nsw nuw i32 %p, 1 ; as nuw we know %p >= 1, and as nsw %p <=   2147483647
+  %p.nsw.nuw.minus1.64 = zext i32 %p.nsw.nuw.minus1 to i64
+  %p.nsw.nuw.64.again = add nsw nuw i64 %p.nsw.nuw.minus1.64, 1 ; ...so always exactly %p
+
+  %p.nsw.minus1 = sub nsw i32 %p, 1 ; only nsw, so can only guarantee %p != 0x10000000
+  %p.nsw.minus1.64 = zext i32 %p.nsw.minus1 to i64 ; when %p > 0x10000000 (ie <= 0 as a signed number) then the zext will make this a huge positive number
+  %p.nsw.64.again = add nsw i64 %p.nsw.minus1.64, 1 ; ...and so this is very much != %p
+
+  %p.64 = zext i32 %p to i64
+  %a = getelementptr inbounds i8, i8* %mem, i64 %p.64
+  %b = getelementptr inbounds i8, i8* %mem, i64 %p.64.again
+  %c = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.nuw.64.again
+  %d = getelementptr inbounds i8, i8* %mem, i64 %p.nsw.64.again
+  ret void
+}
+
+; CHECK-LABEL: test_zext_sext_255
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_zext_sext_255(i8* %mem) {
+  %zext.255 = zext i8 255 to i16 ; 0x00FF
+  %sext.255 = sext i8 255 to i16 ; 0xFFFF
+  %zext.sext.255 = zext i16 %sext.255 to i32 ; 0x0000FFFF
+  %sext.zext.255 = sext i16 %zext.255 to i32 ; 0x000000FF
+  %zext.zext.sext.255 = zext i32 %zext.sext.255 to i64
+  %zext.sext.zext.255 = zext i32 %sext.zext.255 to i64
+  %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.255
+  %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.255
+  ret void
+}
+
+; CHECK-LABEL: test_zext_sext_num
+; CHECK: PartialAlias: i8* %a, i8* %b
+; %a and %b NoAlias if %num == 255 (see @test_zext_sext_255), but %a and %b NoAlias for other values of %num (e.g. 0)
+define void @test_zext_sext_num(i8* %mem, i8 %num) {
+  %zext.num = zext i8 %num to i16
+  %sext.num = sext i8 %num to i16
+  %zext.sext.num = zext i16 %sext.num to i32
+  %sext.zext.num = sext i16 %zext.num to i32
+  %zext.zext.sext.num = zext i32 %zext.sext.num to i64
+  %zext.sext.zext.num = zext i32 %sext.zext.num to i64
+  %a = getelementptr inbounds i8, i8* %mem, i64 %zext.zext.sext.num
+  %b = getelementptr inbounds i8, i8* %mem, i64 %zext.sext.zext.num
+  ret void
+}
+
+; CHECK-LABEL: uncompressStream
+; CHECK: MustAlias:  i8* %a, i8* %b
+; CHECK: NoAlias:  i8* %a, i8* %c
+define void @uncompressStream(i8* %mem) {
+  %zext.255 = zext i8 255 to i32
+  %sext.255 = sext i8 255 to i32
+  %a = getelementptr inbounds i8, i8* %mem, i32 255
+  %b = getelementptr inbounds i8, i8* %mem, i32 %zext.255
+  %c = getelementptr inbounds i8, i8* %mem, i32 %sext.255
+  ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i3_i32
+; CHECK: NoAlias:  i32* %a, i32* %b
+; CHECK: NoAlias:  i32* %a, i32* %c
+; CHECK: NoAlias:  i32* %b, i32* %c
+define void @constantOffsetHeuristic_i3_i32(i32* %mem, i3 %val) {
+  %zext.plus.7 = add nsw i3 %val, 7
+  %zext.plus.4 = add nsw i3 %val, 4
+  %zext.val = zext i3 %val to i32
+  %zext.4 = zext i3 %zext.plus.4 to i32
+  %zext.7 = zext i3 %zext.plus.7 to i32
+  %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4
+  %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7
+  %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val
+  ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i8_i32
+; CHECK: NoAlias:  i32* %a, i32* %b
+; CHECK: NoAlias:  i32* %a, i32* %c
+; CHECK: NoAlias:  i32* %b, i32* %c
+define void @constantOffsetHeuristic_i8_i32(i32* %mem, i8 %val) {
+  %zext.plus.7 = add nsw i8 %val, 7
+  %zext.plus.4 = add nsw i8 %val, 4
+  %zext.val = zext i8 %val to i32
+  %zext.4 = zext i8 %zext.plus.4 to i32
+  %zext.7 = zext i8 %zext.plus.7 to i32
+  %a = getelementptr inbounds i32, i32* %mem, i32 %zext.4
+  %b = getelementptr inbounds i32, i32* %mem, i32 %zext.7
+  %c = getelementptr inbounds i32, i32* %mem, i32 %zext.val
+  ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i3_i8
+; CHECK: PartialAlias:  i32* %a, i32* %b
+; CHECK: NoAlias:  i32* %a, i32* %c
+; CHECK: PartialAlias:  i32* %b, i32* %c
+define void @constantOffsetHeuristic_i3_i8(i8* %mem, i3 %val) {
+  %zext.plus.7 = add nsw i3 %val, 7
+  %zext.plus.4 = add nsw i3 %val, 4
+  %zext.val = zext i3 %val to i32
+  %zext.4 = zext i3 %zext.plus.4 to i32
+  %zext.7 = zext i3 %zext.plus.7 to i32
+  %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4
+  %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7
+  %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val
+  %a = bitcast i8* %a.8 to i32*
+  %b = bitcast i8* %b.8 to i32*
+  %c = bitcast i8* %c.8 to i32*
+  ret void
+}
+
+; CHECK-LABEL: constantOffsetHeuristic_i8_i8
+; CHECK: PartialAlias:  i32* %a, i32* %b
+; CHECK: NoAlias:  i32* %a, i32* %c
+; CHECK: NoAlias:  i32* %b, i32* %c
+define void @constantOffsetHeuristic_i8_i8(i8* %mem, i8 %val) {
+  %zext.plus.7 = add nsw i8 %val, 7
+  %zext.plus.4 = add nsw i8 %val, 4
+  %zext.val = zext i8 %val to i32
+  %zext.4 = zext i8 %zext.plus.4 to i32
+  %zext.7 = zext i8 %zext.plus.7 to i32
+  %a.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.4
+  %b.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.7
+  %c.8 = getelementptr inbounds i8, i8* %mem, i32 %zext.val
+  %a = bitcast i8* %a.8 to i32*
+  %b = bitcast i8* %b.8 to i32*
+  %c = bitcast i8* %c.8 to i32*
+  ret void
+}
diff --git a/test/Analysis/BasicAA/sequential-gep.ll b/test/Analysis/BasicAA/sequential-gep.ll
new file mode 100644
index 000000000000..c17a782aa04b
--- /dev/null
+++ b/test/Analysis/BasicAA/sequential-gep.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+; CHECK: Function: t1
+; CHECK: NoAlias: i32* %gep1, i32* %gep2
+define void @t1([8 x i32]* %p, i32 %addend, i32* %q) {
+  %knownnonzero = load i32, i32* %q, !range !0
+  %add = add nsw nuw i32 %addend, %knownnonzero
+  %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend
+  %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add
+  ret void
+}
+
+; CHECK: Function: t2
+; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+define void @t2([8 x i32]* %p, i32 %addend, i32* %q) {
+  %knownnonzero = load i32, i32* %q, !range !0
+  %add = add nsw nuw i32 %addend, %knownnonzero
+  %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend
+  %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+  ret void
+}
+
+; CHECK: Function: t3
+; CHECK: MustAlias: i32* %gep1, i32* %gep2
+define void @t3([8 x i32]* %p, i32 %addend, i32* %q) {
+  %knownnonzero = load i32, i32* %q, !range !0
+  %add = add nsw nuw i32 %addend, %knownnonzero
+  %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+  %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 0, i32 %add
+  ret void
+}
+
+; CHECK: Function: t4
+; CHECK: PartialAlias: i32* %gep1, i32* %gep2
+define void @t4([8 x i32]* %p, i32 %addend, i32* %q) {
+  %knownnonzero = load i32, i32* %q, !range !0
+  %add = add nsw nuw i32 %addend, %knownnonzero
+  %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 1, i32 %addend
+  %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 %add, i32 %add
+  ret void
+}
+
+; CHECK: Function: t5
+; CHECK: PartialAlias: i32* %gep2, i64* %bc
+define void @t5([8 x i32]* %p, i32 %addend, i32* %q) {
+  %knownnonzero = load i32, i32* %q, !range !0
+  %add = add nsw nuw i32 %addend, %knownnonzero
+  %gep1 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %addend
+  %gep2 = getelementptr [8 x i32], [8 x i32]* %p, i32 2, i32 %add
+  %bc = bitcast i32* %gep1 to i64*
+  ret void
+}
+
+!0 = !{ i32 1, i32 5 }
diff --git a/test/Analysis/BasicAA/zext.ll b/test/Analysis/BasicAA/zext.ll
new file mode 100644
index 000000000000..685d45be6151
--- /dev/null
+++ b/test/Analysis/BasicAA/zext.ll
@@ -0,0 +1,231 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test_with_zext
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_zext() {
+  %1 = tail call i8* @malloc(i64 120)
+  %a = getelementptr inbounds i8, i8* %1, i64 8
+  %2 = getelementptr inbounds i8, i8* %1, i64 16
+  %3 = zext i32 3 to i64
+  %b = getelementptr inbounds i8, i8* %2, i64 %3
+  ret void
+}
+
+; CHECK-LABEL: test_with_lshr
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_lshr(i64 %i) {
+  %1 = tail call i8* @malloc(i64 120)
+  %a = getelementptr inbounds i8, i8* %1, i64 8
+  %2 = getelementptr inbounds i8, i8* %1, i64 16
+  %3 = lshr i64 %i, 2
+  %b = getelementptr inbounds i8, i8* %2, i64 %3
+  ret void
+}
+
+; CHECK-LABEL: test_with_a_loop
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_a_loop(i8* %mem) {
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %a = getelementptr inbounds i8, i8* %mem, i64 8
+  %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
+  %i.64 = zext i32 %i to i64
+  %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
+  %i.plus1 = add nuw nsw i32 %i, 1
+  %cmp = icmp eq i32 %i.plus1, 10
+  br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_with_varying_base_pointer_in_loop
+; CHECK:  NoAlias: i8* %a, i8* %b
+
+define void @test_with_varying_base_pointer_in_loop(i8* %mem.orig) {
+  br label %for.loop
+
+for.loop:
+  %mem = phi i8* [ %mem.orig, %0 ], [ %mem.plus1, %for.loop ]
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %a = getelementptr inbounds i8, i8* %mem, i64 8
+  %a.plus1 = getelementptr inbounds i8, i8* %mem, i64 16
+  %i.64 = zext i32 %i to i64
+  %b = getelementptr inbounds i8, i8* %a.plus1, i64 %i.64
+  %i.plus1 = add nuw nsw i32 %i, 1
+  %mem.plus1 = getelementptr inbounds i8, i8* %mem, i64 8
+  %cmp = icmp eq i32 %i.plus1, 10
+  br i1 %cmp, label %for.loop.exit, label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_sign_extension
+; CHECK:  PartialAlias: i64* %b.i64, i8* %a
+
+define void @test_sign_extension(i32 %p) {
+  %1 = tail call i8* @malloc(i64 120)
+  %p.64 = zext i32 %p to i64
+  %a = getelementptr inbounds i8, i8* %1, i64 %p.64
+  %p.minus1 = add i32 %p, -1
+  %p.minus1.64 = zext i32 %p.minus1 to i64
+  %b.i8 = getelementptr inbounds i8, i8* %1, i64 %p.minus1.64
+  %b.i64 = bitcast i8* %b.i8 to i64*
+  ret void
+}
+
+; CHECK-LABEL: test_fe_tools
+; CHECK:  PartialAlias: i32* %a, i32* %b
+
+define void @test_fe_tools([8 x i32]* %values) {
+  br label %reorder
+
+for.loop:
+  %i = phi i32 [ 0, %reorder ], [ %i.next, %for.loop ]
+  %idxprom = zext i32 %i to i64
+  %b = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 %idxprom
+  %i.next = add nuw nsw i32 %i, 1
+  %1 = icmp eq i32 %i.next, 10
+  br i1 %1, label %for.loop.exit, label %for.loop
+
+reorder:
+  %a = getelementptr inbounds [8 x i32], [8 x i32]* %values, i64 0, i64 1
+  br label %for.loop
+
+for.loop.exit:
+  ret void
+}
+
+@b = global i32 0, align 4
+@d = global i32 0, align 4
+
+; CHECK-LABEL: test_spec2006
+; CHECK:  PartialAlias: i32** %x, i32** %y
+
+define void @test_spec2006() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %d.val = load i32, i32* @d, align 4
+  %d.promoted = sext i32 %d.val to i64
+  %1 = icmp slt i32 %d.val, 2
+  br i1 %1, label %.lr.ph, label %3
+
+.lr.ph:                                           ; preds = %0
+  br label %2
+
+; <label>:2                                       ; preds = %.lr.ph, %2
+  %i = phi i32 [ %d.val, %.lr.ph ], [ %i.plus1, %2 ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %d.promoted, i64 %i.promoted
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %2, label %3
+
+; <label>:3                                      ; preds = %._crit_edge, %0
+  %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_easy_case
+; CHECK:  NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_easy_case(i64 %i) {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i, i64 0
+  %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_in_loop
+; CHECK:  NoAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_in_loop() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 0
+  %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_modulo_analysis_with_global
+; CHECK:  PartialAlias: i32** %x, i32** %y
+
+define void @test_modulo_analysis_with_global() {
+  %h = alloca [1 x [2 x i32*]], align 16
+  %b = load i32, i32* @b, align 4
+  %b.promoted = sext i32 %b to i64
+  br label %for.loop
+
+for.loop:
+  %i = phi i32 [ 0, %0 ], [ %i.plus1, %for.loop ]
+  %i.promoted = sext i32 %i to i64
+  %x = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 %i.promoted, i64 %b.promoted
+  %y = getelementptr inbounds [1 x [2 x i32*]], [1 x [2 x i32*]]* %h, i64 0, i64 0, i64 1
+  %i.plus1 = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %i.plus1, 2
+  br i1 %cmp, label %for.loop, label %for.loop.exit
+
+for.loop.exit:
+  ret void
+}
+
+; CHECK-LABEL: test_const_eval
+; CHECK: NoAlias: i8* %a, i8* %b
+define void @test_const_eval(i8* %ptr, i64 %offset) {
+  %a = getelementptr inbounds i8, i8* %ptr, i64 %offset
+  %a.dup = getelementptr inbounds i8, i8* %ptr, i64 %offset
+  %three = zext i32 3 to i64
+  %b = getelementptr inbounds i8, i8* %a.dup, i64 %three
+  ret void
+}
+
+; CHECK-LABEL: test_const_eval_scaled
+; CHECK: MustAlias: i8* %a, i8* %b
+define void @test_const_eval_scaled(i8* %ptr) {
+  %three = zext i32 3 to i64
+  %six = mul i64 %three, 2
+  %a = getelementptr inbounds i8, i8* %ptr, i64 %six
+  %b = getelementptr inbounds i8, i8* %ptr, i64 6
+  ret void
+}
+
+; CHECK-LABEL: Function: foo
+; CHECK: MustAlias:    float* %arrayidx, float* %arrayidx4.84
+define float @foo(i32 *%A, float %rend, float** %wayar)  {
+entry:
+  %x0 = load i32, i32* %A, align 4
+  %conv = sext i32 %x0 to i64
+  %mul = shl nsw i64 %conv, 3
+  %call = tail call i8* @malloc(i64 %mul)
+  %x1 = bitcast i8* %call to float*
+
+  %sub = add nsw i32 %x0, -1
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds float, float* %x1, i64 %idxprom
+  store float %rend, float* %arrayidx, align 8
+
+  %indvars.iv76.83 = add nsw i64 %conv, -1
+  %arrayidx4.84 = getelementptr inbounds float, float* %x1, i64 %indvars.iv76.83
+  %x4 = load float, float* %arrayidx4.84, align 8
+
+  ret float %x4
+}
+
+; Function Attrs: nounwind
+declare noalias i8* @malloc(i64)
diff --git a/test/Analysis/BlockFrequencyInfo/bad_input.ll b/test/Analysis/BlockFrequencyInfo/bad_input.ll
index e5b1f500e1e6..20b87e6dfcb4 100644
--- a/test/Analysis/BlockFrequencyInfo/bad_input.ll
+++ b/test/Analysis/BlockFrequencyInfo/bad_input.ll
@@ -9,8 +9,8 @@ define void @branch_weight_0(i32 %a) {
 entry:
   br label %for.body
 
-; Check that we get 1,4 instead of 0,3.
-; CHECK-NEXT: for.body: float = 4.0,
+; Check that we get 1 and a huge frequency instead of 0,3.
+; CHECK-NEXT: for.body: float = 2147483647.8,
 for.body:
   %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   call void @g(i32 %i)
diff --git a/test/Analysis/BlockFrequencyInfo/basic.ll b/test/Analysis/BlockFrequencyInfo/basic.ll
index 728adf007f42..8e81cc2ea31c 100644
--- a/test/Analysis/BlockFrequencyInfo/basic.ll
+++ b/test/Analysis/BlockFrequencyInfo/basic.ll
@@ -104,13 +104,13 @@ for.cond1.preheader:
   %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
   br label %for.cond4.preheader
 
-; CHECK-NEXT: for.cond4.preheader: float = 16008001.0,
+; CHECK-NEXT: for.cond4.preheader: float = 16007984.8,
 for.cond4.preheader:
   %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ]
   %add = add i32 %y.023, %x.024
   br label %for.body6
 
-; CHECK-NEXT: for.body6: float = 64048012001.0,
+; CHECK-NEXT: for.body6: float = 64047914563.9,
 for.body6:
   %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
   %add7 = add i32 %add, %z.022
@@ -119,7 +119,7 @@ for.body6:
   %cmp5 = icmp ugt i32 %inc, %a
   br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2
 
-; CHECK-NEXT: for.inc8: float = 16008001.0,
+; CHECK-NEXT: for.inc8: float = 16007984.8,
 for.inc8:
   %inc9 = add i32 %y.023, 1
   %cmp2 = icmp ugt i32 %inc9, %a
diff --git a/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll b/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll
new file mode 100644
index 000000000000..2bcd088dd16e
--- /dev/null
+++ b/test/Analysis/BlockFrequencyInfo/irreducible_loop_crash.ll
@@ -0,0 +1,155 @@
+; RUN: opt < %s -analyze -block-freq
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1(i32* %f) {
+entry:
+  %tobool7 = icmp eq i32 undef, 0
+  br i1 undef, label %if.end.12, label %for.body.5
+
+for.inc:
+  store i32 undef, i32* %f, align 4
+  br label %for.body.5
+
+for.body.5:                                       ; preds = %for.cond.4.preheader
+  br i1 %tobool7, label %for.inc.9, label %for.inc
+
+for.inc.9:                                        ; preds = %for.body.5
+  br i1 %tobool7, label %for.inc.9.1, label %for.inc
+
+if.end.12:                                        ; preds = %if.end.12, %for.body
+  br i1 undef, label %for.end.17, label %for.inc
+
+for.end.17:                                       ; preds = %entry
+  ret void
+
+for.inc.9.1:                                      ; preds = %for.inc.9
+  br i1 %tobool7, label %for.inc.9.2, label %for.inc
+
+for.inc.9.2:                                      ; preds = %for.inc.9.1
+  br i1 %tobool7, label %for.inc.9.3, label %for.inc
+
+for.inc.9.3:                                      ; preds = %for.inc.9.2
+  br i1 %tobool7, label %for.inc.9.4, label %for.inc
+
+for.inc.9.4:                                      ; preds = %for.inc.9.3
+  br i1 %tobool7, label %for.inc.9.5, label %for.inc
+
+for.inc.9.5:                                      ; preds = %for.inc.9.4
+  br i1 %tobool7, label %for.inc.9.6, label %for.inc
+
+for.inc.9.6:                                      ; preds = %for.inc.9.5
+  br i1 %tobool7, label %for.inc.9.7, label %for.inc
+
+for.inc.9.7:                                      ; preds = %for.inc.9.6
+  br i1 %tobool7, label %for.inc.9.8, label %for.inc
+
+for.inc.9.8:                                      ; preds = %for.inc.9.7
+  br i1 %tobool7, label %for.inc.9.9, label %for.inc
+
+for.inc.9.9:                                      ; preds = %for.inc.9.8
+  br i1 %tobool7, label %for.inc.9.10, label %for.inc
+
+for.inc.9.10:                                     ; preds = %for.inc.9.9
+  br i1 %tobool7, label %for.inc.9.11, label %for.inc
+
+for.inc.9.11:                                     ; preds = %for.inc.9.10
+  br i1 %tobool7, label %for.inc.9.12, label %for.inc
+
+for.inc.9.12:                                     ; preds = %for.inc.9.11
+  br i1 %tobool7, label %for.inc.9.13, label %for.inc
+
+for.inc.9.13:                                     ; preds = %for.inc.9.12
+  br i1 %tobool7, label %for.inc.9.14, label %for.inc
+
+for.inc.9.14:                                     ; preds = %for.inc.9.13
+  br i1 %tobool7, label %for.inc.9.15, label %for.inc
+
+for.inc.9.15:                                     ; preds = %for.inc.9.14
+  br i1 %tobool7, label %for.inc.9.16, label %for.inc
+
+for.inc.9.16:                                     ; preds = %for.inc.9.15
+  br i1 %tobool7, label %for.inc.9.17, label %for.inc
+
+for.inc.9.17:                                     ; preds = %for.inc.9.16
+  br i1 %tobool7, label %for.inc.9.18, label %for.inc
+
+for.inc.9.18:                                     ; preds = %for.inc.9.17
+  br i1 %tobool7, label %for.inc.9.19, label %for.inc
+
+for.inc.9.19:                                     ; preds = %for.inc.9.18
+  br i1 %tobool7, label %for.inc.9.20, label %for.inc
+
+for.inc.9.20:                                     ; preds = %for.inc.9.19
+  br i1 %tobool7, label %for.inc.9.21, label %for.inc
+
+for.inc.9.21:                                     ; preds = %for.inc.9.20
+  br i1 %tobool7, label %for.inc.9.22, label %for.inc
+
+for.inc.9.22:                                     ; preds = %for.inc.9.21
+  br i1 %tobool7, label %for.inc.9.23, label %for.inc
+
+for.inc.9.23:                                     ; preds = %for.inc.9.22
+  br i1 %tobool7, label %for.inc.9.24, label %for.inc
+
+for.inc.9.24:                                     ; preds = %for.inc.9.23
+  br i1 %tobool7, label %for.inc.9.25, label %for.inc
+
+for.inc.9.25:                                     ; preds = %for.inc.9.24
+  br i1 %tobool7, label %for.inc.9.26, label %for.inc
+
+for.inc.9.26:                                     ; preds = %for.inc.9.25
+  br i1 %tobool7, label %for.inc.9.27, label %for.inc
+
+for.inc.9.27:                                     ; preds = %for.inc.9.26
+  br i1 %tobool7, label %for.inc.9.28, label %for.inc
+
+for.inc.9.28:                                     ; preds = %for.inc.9.27
+  br i1 %tobool7, label %for.inc.9.29, label %for.inc
+
+for.inc.9.29:                                     ; preds = %for.inc.9.28
+  br i1 %tobool7, label %for.inc.9.30, label %for.inc
+
+for.inc.9.30:                                     ; preds = %for.inc.9.29
+  br i1 %tobool7, label %for.inc.9.31, label %for.inc
+
+for.inc.9.31:                                     ; preds = %for.inc.9.30
+  br i1 %tobool7, label %for.inc.9.32, label %for.inc
+
+for.inc.9.32:                                     ; preds = %for.inc.9.31
+  br i1 %tobool7, label %for.inc.9.33, label %for.inc
+
+for.inc.9.33:                                     ; preds = %for.inc.9.32
+  br i1 %tobool7, label %for.inc.9.34, label %for.inc
+
+for.inc.9.34:                                     ; preds = %for.inc.9.33
+  br i1 %tobool7, label %for.inc.9.35, label %for.inc
+
+for.inc.9.35:                                     ; preds = %for.inc.9.34
+  br i1 %tobool7, label %for.inc.9.36, label %for.inc
+
+for.inc.9.36:                                     ; preds = %for.inc.9.35
+  br i1 %tobool7, label %for.inc.9.37, label %for.inc
+
+for.inc.9.37:                                     ; preds = %for.inc.9.36
+  br i1 %tobool7, label %for.inc.9.38, label %for.inc
+
+for.inc.9.38:                                     ; preds = %for.inc.9.37
+  br i1 %tobool7, label %for.inc.9.39, label %for.inc
+
+for.inc.9.39:                                     ; preds = %for.inc.9.38
+  br i1 %tobool7, label %for.inc.9.40, label %for.inc
+
+for.inc.9.40:                                     ; preds = %for.inc.9.39
+  br i1 %tobool7, label %for.inc.9.41, label %for.inc
+
+for.inc.9.41:                                     ; preds = %for.inc.9.40
+  br i1 %tobool7, label %for.inc.9.42, label %for.inc
+
+for.inc.9.42:                                     ; preds = %for.inc.9.41
+  br i1 %tobool7, label %for.inc.9.43, label %for.inc
+
+for.inc.9.43:                                     ; preds = %for.inc.9.42
+  br i1 %tobool7, label %if.end.12, label %for.inc
+}
diff --git a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
index 534c4ad0e94f..29a9f3b29fb0 100644
--- a/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
+++ b/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
@@ -93,7 +93,7 @@ for.cond4:                                        ; preds = %for.inc, %for.body3
   %cmp5 = icmp slt i32 %2, 100
   br i1 %cmp5, label %for.body6, label %for.end, !prof !3
 
-; CHECK: - for.body6: float = 500000.5, int = 4000003
+; CHECK: - for.body6: float = 500000.5, int = 4000004
 for.body6:                                        ; preds = %for.cond4
   call void @bar()
   br label %for.inc
@@ -143,7 +143,7 @@ for.cond16:                                       ; preds = %for.inc19, %for.bod
   %cmp17 = icmp slt i32 %8, 10000
   br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
 
-; CHECK: - for.body18: float = 500000.5, int = 4000003
+; CHECK: - for.body18: float = 499999.9, int = 3999998
 for.body18:                                       ; preds = %for.cond16
   call void @bar()
   br label %for.inc19
@@ -175,7 +175,7 @@ for.cond26:                                       ; preds = %for.inc29, %for.end
   %cmp27 = icmp slt i32 %12, 1000000
   br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
 
-; CHECK: - for.body28: float = 500000.5, int = 4000003
+; CHECK: - for.body28: float = 499995.2, int = 3999961
 for.body28:                                       ; preds = %for.cond26
   call void @bar()
   br label %for.inc29
diff --git a/test/Analysis/BranchProbabilityInfo/basic.ll b/test/Analysis/BranchProbabilityInfo/basic.ll
index 2c9c15618682..d833b8339aac 100644
--- a/test/Analysis/BranchProbabilityInfo/basic.ll
+++ b/test/Analysis/BranchProbabilityInfo/basic.ll
@@ -4,7 +4,7 @@ define i32 @test1(i32 %i, i32* %a) {
 ; CHECK: Printing analysis {{.*}} for function 'test1'
 entry:
   br label %body
-; CHECK: edge entry -> body probability is 16 / 16 = 100%
+; CHECK: edge entry -> body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 body:
   %iv = phi i32 [ 0, %entry ], [ %next, %body ]
@@ -15,8 +15,8 @@ body:
   %next = add i32 %iv, 1
   %exitcond = icmp eq i32 %next, %i
   br i1 %exitcond, label %exit, label %body
-; CHECK: edge body -> exit probability is 4 / 128
-; CHECK: edge body -> body probability is 124 / 128
+; CHECK: edge body -> exit probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge body -> body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 exit:
   ret i32 %sum
@@ -27,16 +27,16 @@ define i32 @test2(i32 %i, i32 %a, i32 %b) {
 entry:
   %cond = icmp ult i32 %i, 42
   br i1 %cond, label %then, label %else, !prof !0
-; CHECK: edge entry -> then probability is 64 / 68
-; CHECK: edge entry -> else probability is 4 / 68
+; CHECK: edge entry -> then probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
+; CHECK: edge entry -> else probability is 0x07878788 / 0x80000000 = 5.88%
 
 then:
   br label %exit
-; CHECK: edge then -> exit probability is 16 / 16 = 100%
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 else:
   br label %exit
-; CHECK: edge else -> exit probability is 16 / 16 = 100%
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 exit:
   %result = phi i32 [ %a, %then ], [ %b, %else ]
@@ -52,31 +52,31 @@ entry:
                                  i32 2, label %case_c
                                  i32 3, label %case_d
                                  i32 4, label %case_e ], !prof !1
-; CHECK: edge entry -> case_a probability is 4 / 80
-; CHECK: edge entry -> case_b probability is 4 / 80
-; CHECK: edge entry -> case_c probability is 64 / 80
-; CHECK: edge entry -> case_d probability is 4 / 80
-; CHECK: edge entry -> case_e probability is 4 / 80
+; CHECK: edge entry -> case_a probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_b probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_c probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK: edge entry -> case_d probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge entry -> case_e probability is 0x06666666 / 0x80000000 = 5.00%
 
 case_a:
   br label %exit
-; CHECK: edge case_a -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_a -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 case_b:
   br label %exit
-; CHECK: edge case_b -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_b -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 case_c:
   br label %exit
-; CHECK: edge case_c -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_c -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 case_d:
   br label %exit
-; CHECK: edge case_d -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_d -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 case_e:
   br label %exit
-; CHECK: edge case_e -> exit probability is 16 / 16 = 100%
+; CHECK: edge case_e -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 exit:
   %result = phi i32 [ %a, %case_a ],
@@ -99,9 +99,9 @@ entry:
     i64 2, label %sw.bb
     i64 5, label %sw.bb1
   ], !prof !2
-; CHECK: edge entry -> return probability is 7 / 85
-; CHECK: edge entry -> sw.bb probability is 14 / 85
-; CHECK: edge entry -> sw.bb1 probability is 64 / 85
+; CHECK: edge entry -> return probability is 0x0a8a8a8b / 0x80000000 = 8.24%
+; CHECK: edge entry -> sw.bb probability is 0x15151515 / 0x80000000 = 16.47%
+; CHECK: edge entry -> sw.bb1 probability is 0x60606060 / 0x80000000 = 75.29%
 
 sw.bb:
   br label %return
@@ -122,17 +122,17 @@ define i32 @test5(i32 %a, i32 %b, i1 %flag) {
 ; CHECK: Printing analysis {{.*}} for function 'test5'
 entry:
   br i1 %flag, label %then, label %else
-; CHECK: edge entry -> then probability is 4 / 68
-; CHECK: edge entry -> else probability is 64 / 68
+; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
 
 then:
   call void @coldfunc()
   br label %exit
-; CHECK: edge then -> exit probability is 16 / 16 = 100%
+; CHECK: edge then -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 else:
   br label %exit
-; CHECK: edge else -> exit probability is 16 / 16 = 100%
+; CHECK: edge else -> exit probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 exit:
   %result = phi i32 [ %a, %then ], [ %b, %else ]
@@ -149,8 +149,8 @@ define i32 @test_cold_call_sites(i32* %a) {
 ; after that is fixed.
 
 ; CHECK: Printing analysis {{.*}} for function 'test_cold_call_sites'
-; CHECK: edge entry -> then probability is 4 / 68 = 5.88235%
-; CHECK: edge entry -> else probability is 64 / 68 = 94.1176% [HOT edge]
+; CHECK: edge entry -> then probability is 0x07878788 / 0x80000000 = 5.88%
+; CHECK: edge entry -> else probability is 0x78787878 / 0x80000000 = 94.12% [HOT edge]
 
 entry:
   %gep1 = getelementptr i32, i32* %a, i32 1
@@ -179,8 +179,8 @@ define i32 @zero1(i32 %i, i32 %a, i32 %b) {
 entry:
   %cond = icmp eq i32 %i, 0
   br i1 %cond, label %then, label %else
-; CHECK: edge entry -> then probability is 12 / 32
-; CHECK: edge entry -> else probability is 20 / 32
+; CHECK: edge entry -> then probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge entry -> else probability is 0x50000000 / 0x80000000 = 62.50%
 
 then:
   br label %exit
@@ -198,8 +198,8 @@ define i32 @zero2(i32 %i, i32 %a, i32 %b) {
 entry:
   %cond = icmp ne i32 %i, -1
   br i1 %cond, label %then, label %else
-; CHECK: edge entry -> then probability is 20 / 32
-; CHECK: edge entry -> else probability is 12 / 32
+; CHECK: edge entry -> then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> else probability is 0x30000000 / 0x80000000 = 37.50%
 
 then:
   br label %exit
@@ -220,8 +220,8 @@ entry:
   %and = and i32 %i, 2
   %tobool = icmp eq i32 %and, 0
   br i1 %tobool, label %then, label %else
-; CHECK: edge entry -> then probability is 16 / 32
-; CHECK: edge entry -> else probability is 16 / 32
+; CHECK: edge entry -> then probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge entry -> else probability is 0x40000000 / 0x80000000 = 50.00%
 
 then:
 ; AND'ing with other bitmask might be something else, so we still assume the
@@ -229,8 +229,8 @@ then:
   %and2 = and i32 %i, 5
   %tobool2 = icmp eq i32 %and2, 0
   br i1 %tobool2, label %else, label %exit
-; CHECK: edge then -> else probability is 12 / 32
-; CHECK: edge then -> exit probability is 20 / 32
+; CHECK: edge then -> else probability is 0x30000000 / 0x80000000 = 37.50%
+; CHECK: edge then -> exit probability is 0x50000000 / 0x80000000 = 62.50%
 
 else:
   br label %exit
diff --git a/test/Analysis/BranchProbabilityInfo/loop.ll b/test/Analysis/BranchProbabilityInfo/loop.ll
index e792790f84f8..5be7adf3909d 100644
--- a/test/Analysis/BranchProbabilityInfo/loop.ll
+++ b/test/Analysis/BranchProbabilityInfo/loop.ll
@@ -9,13 +9,13 @@ declare void @g4()
 define void @test1(i32 %a, i32 %b) {
 entry:
   br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc3, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.body1 ]
@@ -23,16 +23,16 @@ do.body1:
   %inc = add nsw i32 %j.0, 1
   %cmp = icmp slt i32 %inc, %b
   br i1 %cmp, label %do.body1, label %do.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> do.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end:
   call void @g3()
   %inc3 = add nsw i32 %i.0, 1
   %cmp4 = icmp slt i32 %inc3, %a
   br i1 %cmp4, label %do.body, label %do.end5
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end5 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end5 probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end5:
   call void @g4()
@@ -43,20 +43,20 @@ define void @test2(i32 %a, i32 %b) {
 entry:
   %cmp9 = icmp sgt i32 %a, 0
   br i1 %cmp9, label %for.body.lr.ph, label %for.end6
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end6 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end6 probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body.lr.ph:
   %cmp27 = icmp sgt i32 %b, 0
   br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 for.body:
   %i.010 = phi i32 [ 0, %for.body.lr.ph ], [ %inc5, %for.end ]
   call void @g1()
   br i1 %cmp27, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
-; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge for.body -> for.body3 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge for.body -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body3:
   %j.08 = phi i32 [ %inc, %for.body3 ], [ 0, %for.body ]
@@ -64,16 +64,16 @@ for.body3:
   %inc = add nsw i32 %j.08, 1
   %exitcond = icmp eq i32 %inc, %b
   br i1 %exitcond, label %for.end, label %for.body3
-; CHECK: edge for.body3 -> for.end probability is 4 / 128
-; CHECK: edge for.body3 -> for.body3 probability is 124 / 128
+; CHECK: edge for.body3 -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.body3 -> for.body3 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end:
   call void @g3()
   %inc5 = add nsw i32 %i.010, 1
   %exitcond11 = icmp eq i32 %inc5, %a
   br i1 %exitcond11, label %for.end6, label %for.body
-; CHECK: edge for.end -> for.end6 probability is 4 / 128
-; CHECK: edge for.end -> for.body probability is 124 / 128
+; CHECK: edge for.end -> for.end6 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.end -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end6:
   call void @g4()
@@ -83,7 +83,7 @@ for.end6:
 define void @test3(i32 %a, i32 %b, i32* %c) {
 entry:
   br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %if.end ]
@@ -91,8 +91,8 @@ do.body:
   %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %do.body1, label %if.end
-; CHECK: edge do.body -> do.body1 probability is 16 / 32 = 50%
-; CHECK: edge do.body -> if.end probability is 16 / 32 = 50%
+; CHECK: edge do.body -> do.body1 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge do.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
 
 do.body1:
   %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -100,16 +100,16 @@ do.body1:
   %inc = add nsw i32 %j.0, 1
   %cmp2 = icmp slt i32 %inc, %b
   br i1 %cmp2, label %do.body1, label %if.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> if.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> if.end probability is 0x04000000 / 0x80000000 = 3.12%
 
 if.end:
   call void @g3()
   %inc4 = add nsw i32 %i.0, 1
   %cmp5 = icmp slt i32 %inc4, %a
   br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge if.end -> do.body probability is 124 / 128
-; CHECK: edge if.end -> do.end6 probability is 4 / 128
+; CHECK: edge if.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge if.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end6:
   call void @g4()
@@ -119,7 +119,7 @@ do.end6:
 define void @test4(i32 %a, i32 %b, i32* %c) {
 entry:
   br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
@@ -127,8 +127,8 @@ do.body:
   %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %do.body1
-; CHECK: edge do.body -> return probability is 4 / 128
-; CHECK: edge do.body -> do.body1 probability is 124 / 128
+; CHECK: edge do.body -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 do.body1:
   %j.0 = phi i32 [ %inc, %do.body1 ], [ 0, %do.body ]
@@ -136,21 +136,21 @@ do.body1:
   %inc = add nsw i32 %j.0, 1
   %cmp2 = icmp slt i32 %inc, %b
   br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge do.body1 -> do.body1 probability is 124 / 128
-; CHECK: edge do.body1 -> do.end probability is 4 / 128
+; CHECK: edge do.body1 -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.body1 -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end:
   call void @g3()
   %inc4 = add nsw i32 %i.0, 1
   %cmp5 = icmp slt i32 %inc4, %a
   br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end6:
   call void @g4()
   br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 return:
   ret void
@@ -159,42 +159,42 @@ return:
 define void @test5(i32 %a, i32 %b, i32* %c) {
 entry:
   br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %if.end ]
   %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %if.end
-; CHECK: edge do.body1 -> return probability is 4 / 128
-; CHECK: edge do.body1 -> if.end probability is 124 / 128
+; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body1 -> if.end probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 if.end:
   call void @g2()
   %inc = add nsw i32 %j.0, 1
   %cmp2 = icmp slt i32 %inc, %b
   br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge if.end -> do.body1 probability is 124 / 128
-; CHECK: edge if.end -> do.end probability is 4 / 128
+; CHECK: edge if.end -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge if.end -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end:
   call void @g3()
   %inc4 = add nsw i32 %i.0, 1
   %cmp5 = icmp slt i32 %inc4, %a
   br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end6:
   call void @g4()
   br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 return:
   ret void
@@ -203,13 +203,13 @@ return:
 define void @test6(i32 %a, i32 %b, i32* %c) {
 entry:
   br label %do.body
-; CHECK: edge entry -> do.body probability is 16 / 16 = 100%
+; CHECK: edge entry -> do.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body:
   %i.0 = phi i32 [ 0, %entry ], [ %inc4, %do.end ]
   call void @g1()
   br label %do.body1
-; CHECK: edge do.body -> do.body1 probability is 16 / 16 = 100%
+; CHECK: edge do.body -> do.body1 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 do.body1:
   %j.0 = phi i32 [ 0, %do.body ], [ %inc, %do.cond ]
@@ -217,28 +217,28 @@ do.body1:
   %0 = load i32, i32* %c, align 4
   %cmp = icmp slt i32 %0, 42
   br i1 %cmp, label %return, label %do.cond
-; CHECK: edge do.body1 -> return probability is 4 / 128
-; CHECK: edge do.body1 -> do.cond probability is 124 / 128
+; CHECK: edge do.body1 -> return probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge do.body1 -> do.cond probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 do.cond:
   %inc = add nsw i32 %j.0, 1
   %cmp2 = icmp slt i32 %inc, %b
   br i1 %cmp2, label %do.body1, label %do.end
-; CHECK: edge do.cond -> do.body1 probability is 124 / 128
-; CHECK: edge do.cond -> do.end probability is 4 / 128
+; CHECK: edge do.cond -> do.body1 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.cond -> do.end probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end:
   call void @g3()
   %inc4 = add nsw i32 %i.0, 1
   %cmp5 = icmp slt i32 %inc4, %a
   br i1 %cmp5, label %do.body, label %do.end6
-; CHECK: edge do.end -> do.body probability is 124 / 128
-; CHECK: edge do.end -> do.end6 probability is 4 / 128
+; CHECK: edge do.end -> do.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
+; CHECK: edge do.end -> do.end6 probability is 0x04000000 / 0x80000000 = 3.12%
 
 do.end6:
   call void @g4()
   br label %return
-; CHECK: edge do.end6 -> return probability is 16 / 16 = 100%
+; CHECK: edge do.end6 -> return probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 return:
   ret void
@@ -248,27 +248,27 @@ define void @test7(i32 %a, i32 %b, i32* %c) {
 entry:
   %cmp10 = icmp sgt i32 %a, 0
   br i1 %cmp10, label %for.body.lr.ph, label %for.end7
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end7 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end7 probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body.lr.ph:
   %cmp38 = icmp sgt i32 %b, 0
   br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 for.body:
   %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.inc5 ]
   %0 = load i32, i32* %c, align 4
   %cmp1 = icmp eq i32 %0, %i.011
   br i1 %cmp1, label %for.inc5, label %if.end
-; CHECK: edge for.body -> for.inc5 probability is 16 / 32 = 50%
-; CHECK: edge for.body -> if.end probability is 16 / 32 = 50%
+; CHECK: edge for.body -> for.inc5 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge for.body -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
 
 if.end:
   call void @g1()
   br i1 %cmp38, label %for.body4, label %for.end
-; CHECK: edge if.end -> for.body4 probability is 20 / 32 = 62.5%
-; CHECK: edge if.end -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge if.end -> for.body4 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge if.end -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body4:
   %j.09 = phi i32 [ %inc, %for.body4 ], [ 0, %if.end ]
@@ -276,20 +276,20 @@ for.body4:
   %inc = add nsw i32 %j.09, 1
   %exitcond = icmp eq i32 %inc, %b
   br i1 %exitcond, label %for.end, label %for.body4
-; CHECK: edge for.body4 -> for.end probability is 4 / 128
-; CHECK: edge for.body4 -> for.body4 probability is 124 / 128
+; CHECK: edge for.body4 -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.body4 -> for.body4 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end:
   call void @g3()
   br label %for.inc5
-; CHECK: edge for.end -> for.inc5 probability is 16 / 16 = 100%
+; CHECK: edge for.end -> for.inc5 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 for.inc5:
   %inc6 = add nsw i32 %i.011, 1
   %exitcond12 = icmp eq i32 %inc6, %a
   br i1 %exitcond12, label %for.end7, label %for.body
-; CHECK: edge for.inc5 -> for.end7 probability is 4 / 128
-; CHECK: edge for.inc5 -> for.body probability is 124 / 128
+; CHECK: edge for.inc5 -> for.end7 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.inc5 -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end7:
   call void @g4()
@@ -300,64 +300,64 @@ define void @test8(i32 %a, i32 %b, i32* %c) {
 entry:
   %cmp18 = icmp sgt i32 %a, 0
   br i1 %cmp18, label %for.body.lr.ph, label %for.end15
-; CHECK: edge entry -> for.body.lr.ph probability is 20 / 32
-; CHECK: edge entry -> for.end15 probability is 12 / 32
+; CHECK: edge entry -> for.body.lr.ph probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge entry -> for.end15 probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body.lr.ph:
   %cmp216 = icmp sgt i32 %b, 0
   %arrayidx5 = getelementptr inbounds i32, i32* %c, i64 1
   %arrayidx9 = getelementptr inbounds i32, i32* %c, i64 2
   br label %for.body
-; CHECK: edge for.body.lr.ph -> for.body probability is 16 / 16 = 100%
+; CHECK: edge for.body.lr.ph -> for.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 for.body:
   %i.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc14, %for.end ]
   call void @g1()
   br i1 %cmp216, label %for.body3, label %for.end
-; CHECK: edge for.body -> for.body3 probability is 20 / 32 = 62.5%
-; CHECK: edge for.body -> for.end probability is 12 / 32 = 37.5%
+; CHECK: edge for.body -> for.body3 probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge for.body -> for.end probability is 0x30000000 / 0x80000000 = 37.50%
 
 for.body3:
   %j.017 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
   %0 = load i32, i32* %c, align 4
   %cmp4 = icmp eq i32 %0, %j.017
   br i1 %cmp4, label %for.inc, label %if.end
-; CHECK: edge for.body3 -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge for.body3 -> if.end probability is 16 / 32 = 50%
+; CHECK: edge for.body3 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge for.body3 -> if.end probability is 0x40000000 / 0x80000000 = 50.00%
 
 if.end:
   %1 = load i32, i32* %arrayidx5, align 4
   %cmp6 = icmp eq i32 %1, %j.017
   br i1 %cmp6, label %for.inc, label %if.end8
-; CHECK: edge if.end -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge if.end -> if.end8 probability is 16 / 32 = 50%
+; CHECK: edge if.end -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge if.end -> if.end8 probability is 0x40000000 / 0x80000000 = 50.00%
 
 if.end8:
   %2 = load i32, i32* %arrayidx9, align 4
   %cmp10 = icmp eq i32 %2, %j.017
   br i1 %cmp10, label %for.inc, label %if.end12
-; CHECK: edge if.end8 -> for.inc probability is 16 / 32 = 50%
-; CHECK: edge if.end8 -> if.end12 probability is 16 / 32 = 50%
+; CHECK: edge if.end8 -> for.inc probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge if.end8 -> if.end12 probability is 0x40000000 / 0x80000000 = 50.00%
 
 if.end12:
   call void @g2()
   br label %for.inc
-; CHECK: edge if.end12 -> for.inc probability is 16 / 16 = 100%
+; CHECK: edge if.end12 -> for.inc probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
 
 for.inc:
   %inc = add nsw i32 %j.017, 1
   %exitcond = icmp eq i32 %inc, %b
   br i1 %exitcond, label %for.end, label %for.body3
-; CHECK: edge for.inc -> for.end probability is 4 / 128
-; CHECK: edge for.inc -> for.body3 probability is 124 / 128
+; CHECK: edge for.inc -> for.end probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.inc -> for.body3 probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end:
   call void @g3()
   %inc14 = add nsw i32 %i.019, 1
   %exitcond20 = icmp eq i32 %inc14, %a
   br i1 %exitcond20, label %for.end15, label %for.body
-; CHECK: edge for.end -> for.end15 probability is 4 / 128
-; CHECK: edge for.end -> for.body probability is 124 / 128
+; CHECK: edge for.end -> for.end15 probability is 0x04000000 / 0x80000000 = 3.12%
+; CHECK: edge for.end -> for.body probability is 0x7c000000 / 0x80000000 = 96.88% [HOT edge]
 
 for.end15:
   call void @g4()
diff --git a/test/Analysis/BranchProbabilityInfo/noreturn.ll b/test/Analysis/BranchProbabilityInfo/noreturn.ll
index 8b9ae11f7d35..7098c2f7b8cc 100644
--- a/test/Analysis/BranchProbabilityInfo/noreturn.ll
+++ b/test/Analysis/BranchProbabilityInfo/noreturn.ll
@@ -8,8 +8,8 @@ define i32 @test1(i32 %a, i32 %b) {
 entry:
   %cond = icmp eq i32 %a, 42
   br i1 %cond, label %exit, label %abort
-; CHECK: edge entry -> exit probability is 1048575 / 1048576
-; CHECK: edge entry -> abort probability is 1 / 1048576
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> abort probability is 0x00000800 / 0x80000000 = 0.00%
 
 abort:
   call void @abort() noreturn
@@ -26,11 +26,11 @@ entry:
                               i32 2, label %case_b
                               i32 3, label %case_c
                               i32 4, label %case_d]
-; CHECK: edge entry -> exit probability is 1048575 / 1048579
-; CHECK: edge entry -> case_a probability is 1 / 1048579
-; CHECK: edge entry -> case_b probability is 1 / 1048579
-; CHECK: edge entry -> case_c probability is 1 / 1048579
-; CHECK: edge entry -> case_d probability is 1 / 1048579
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> case_a probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_b probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_c probability is 0x00000200 / 0x80000000 = 0.00%
+; CHECK: edge entry -> case_d probability is 0x00000200 / 0x80000000 = 0.00%
 
 case_a:
   br label %case_b
@@ -55,14 +55,14 @@ define i32 @test3(i32 %a, i32 %b) {
 entry:
   %cond1 = icmp eq i32 %a, 42
   br i1 %cond1, label %exit, label %dom
-; CHECK: edge entry -> exit probability is 1048575 / 1048576
-; CHECK: edge entry -> dom probability is 1 / 1048576
+; CHECK: edge entry -> exit probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge entry -> dom probability is 0x00000800 / 0x80000000 = 0.00%
 
 dom:
   %cond2 = icmp ult i32 %a, 42
   br i1 %cond2, label %idom1, label %idom2
-; CHECK: edge dom -> idom1 probability is 1 / 2
-; CHECK: edge dom -> idom2 probability is 1 / 2
+; CHECK: edge dom -> idom1 probability is 0x40000000 / 0x80000000 = 50.00%
+; CHECK: edge dom -> idom2 probability is 0x40000000 / 0x80000000 = 50.00%
 
 idom1:
   br label %abort
@@ -77,3 +77,48 @@ abort:
 exit:
   ret i32 %b
 }
+
+@_ZTIi = external global i8*
+
+; CHECK-LABEL: throwSmallException
+; CHECK-NOT: invoke i32 @smallFunction
+define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %cmp = icmp sge i32 %idx, %limit
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK: edge entry -> if.then probability is 0x00000800 / 0x80000000 = 0.00%
+; CHECK: edge entry -> if.end probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+
+if.then:                                          ; preds = %entry
+  %exception = call i8* @__cxa_allocate_exception(i64 1) #0
+  invoke i32 @smallFunction(i32 %idx)
+          to label %invoke.cont unwind label %lpad
+; CHECK: edge if.then -> invoke.cont probability is 0x7ffff800 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge if.then -> lpad probability is 0x00000800 / 0x80000000 = 0.00%
+
+invoke.cont:                                      ; preds = %if.then
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi  to i8*), i8* null) #1
+  unreachable
+
+lpad:                                             ; preds = %if.then
+  %ll = landingpad { i8*, i32 }
+          cleanup
+  ret i32 %idx
+
+if.end:                                           ; preds = %entry
+  ret i32 %idx
+}
+
+@a = global i32 4
+define i32 @smallFunction(i32 %a) {
+entry:
+  %r = load volatile i32, i32* @a
+  ret i32 %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { noreturn }
+
+declare i8* @__cxa_allocate_exception(i64)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_throw(i8*, i8*, i8*)
diff --git a/test/Analysis/BranchProbabilityInfo/pr18705.ll b/test/Analysis/BranchProbabilityInfo/pr18705.ll
index aff08a63d506..f5f9612fcdb6 100644
--- a/test/Analysis/BranchProbabilityInfo/pr18705.ll
+++ b/test/Analysis/BranchProbabilityInfo/pr18705.ll
@@ -4,8 +4,8 @@
 ; calcLoopBranchHeuristics should return early without setting the weights.
 ; calcFloatingPointHeuristics, which is run later, sets the weights.
 ;
-; CHECK: edge while.body -> if.then probability is 20 / 32 = 62.5%
-; CHECK: edge while.body -> if.else probability is 12 / 32 = 37.5%
+; CHECK: edge while.body -> if.then probability is 0x50000000 / 0x80000000 = 62.50%
+; CHECK: edge while.body -> if.else probability is 0x30000000 / 0x80000000 = 37.50%
 
 define void @foo1(i32 %n, i32* nocapture %b, i32* nocapture %c, i32* nocapture %d, float* nocapture readonly %f0, float* nocapture readonly %f1) {
 entry:
diff --git a/test/Analysis/BranchProbabilityInfo/pr22718.ll b/test/Analysis/BranchProbabilityInfo/pr22718.ll
index 0de4d643c9c9..51bbd13e83c1 100644
--- a/test/Analysis/BranchProbabilityInfo/pr22718.ll
+++ b/test/Analysis/BranchProbabilityInfo/pr22718.ll
@@ -4,8 +4,8 @@
 ; reflected in the probability computation because the weight is larger than
 ; the branch weight cap (about 2 billion).
 ;
-; CHECK: edge for.body -> if.then probability is 216661881 / 2166666667 = 9.9
-; CHECK: edge for.body -> if.else probability is 1950004786 / 2166666667 = 90.0
+; CHECK: edge for.body -> if.then probability is 0x0cccba45 / 0x80000000 = 10.00%
+; CHECK: edge for.body -> if.else probability is 0x733345bb / 0x80000000 = 90.00% [HOT edge]
 
 @y = common global i64 0, align 8
 @x = common global i64 0, align 8
diff --git a/test/Analysis/CFLAliasAnalysis/arguments-globals.ll b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
index 18bbe8b6b41f..4844577d6fef 100644
--- a/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
+++ b/test/Analysis/CFLAliasAnalysis/arguments-globals.ll
@@ -3,7 +3,7 @@
 ; (Everything should alias everything, because args can alias globals, so the
 ; aliasing sets should of args+alloca+global should be combined)
 
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 ; CHECK:     Function: test
 
diff --git a/test/Analysis/CFLAliasAnalysis/basic-interproc.ll b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
index c0a5404eab6e..d6515eb9ebe5 100644
--- a/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
+++ b/test/Analysis/CFLAliasAnalysis/basic-interproc.ll
@@ -1,7 +1,7 @@
 ; This testcase ensures that CFL AA gives conservative answers on variables
 ; that involve arguments.
 
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 ; CHECK:     Function: test
 ; CHECK: 2 Total Alias Queries Performed
diff --git a/test/Analysis/CFLAliasAnalysis/branch-alias.ll b/test/Analysis/CFLAliasAnalysis/branch-alias.ll
index 8307462928e7..dbbf035f3440 100644
--- a/test/Analysis/CFLAliasAnalysis/branch-alias.ll
+++ b/test/Analysis/CFLAliasAnalysis/branch-alias.ll
@@ -13,7 +13,7 @@
 ;    int* ShouldAliasA = *AliasA1;
 ;  }
 
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 ; CHECK: Function: ptr_test
 define void @ptr_test() #0 {
diff --git a/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
index c7ff407db2f6..11b60dd33b55 100644
--- a/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
+++ b/test/Analysis/CFLAliasAnalysis/const-expr-gep.ll
@@ -2,7 +2,7 @@
 ; resolvable by cfl-aa, but require analysis of getelementptr constant exprs.
 ; Derived from BasicAA/2003-12-11-ConstExprGEP.ll
 
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 %T = type { i32, [10 x i8] }
 
diff --git a/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
index adacf048d678..3503e16898c0 100644
--- a/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
+++ b/test/Analysis/CFLAliasAnalysis/full-store-partial-alias.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
-; RUN: opt -S -tbaa -gvn < %s | FileCheck %s
+; RUN: opt -S -disable-basicaa -tbaa -cfl-aa -gvn < %s | FileCheck -check-prefix=CFLAA %s
+; RUN: opt -S -disable-basicaa -tbaa -gvn < %s | FileCheck %s
 ; Adapted from the BasicAA full-store-partial-alias.ll test.
 
 ; CFL AA could notice that the store stores to the entire %u object,
diff --git a/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
index c2fcf32ce06b..1edbb9fff56b 100644
--- a/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
+++ b/test/Analysis/CFLAliasAnalysis/gep-signed-arithmetic.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 ; Derived from BasicAA/2010-09-15-GEP-SignedArithmetic.ll
 
 target datalayout = "e-p:32:32:32"
diff --git a/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
index e997374e92d9..be671bfc4ca2 100644
--- a/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
+++ b/test/Analysis/CFLAliasAnalysis/multilevel-combine.ll
@@ -8,7 +8,7 @@
 ; }
 ;
 
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 %T = type { i32, [10 x i8] }
 
diff --git a/test/Analysis/CFLAliasAnalysis/must-and-partial.ll b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
index 9deacf860ed8..5bcc3f991022 100644
--- a/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
+++ b/test/Analysis/CFLAliasAnalysis/must-and-partial.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-all-alias-modref-info 2>&1 | FileCheck %s
 ; When merging MustAlias and PartialAlias, merge to PartialAlias
 ; instead of MayAlias.
 
diff --git a/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll b/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll
new file mode 100644
index 000000000000..6121dca1345d
--- /dev/null
+++ b/test/Analysis/CFLAliasAnalysis/opaque-call-alias.ll
@@ -0,0 +1,20 @@
+; We previously had a case where we would put results from a no-args call in
+; its own stratified set. This would make cases like the one in @test say that
+; nothing (except %Escapes and %Arg) can alias
+
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+
+; CHECK:     Function: test
+; CHECK:     MayAlias: i8* %Arg, i8* %Escapes
+; CHECK:     MayAlias: i8* %Arg, i8* %Retrieved
+; CHECK:     MayAlias: i8* %Escapes, i8* %Retrieved
+define void @test(i8* %Arg) {
+  %Noalias = alloca i8
+  %Escapes = alloca i8
+  call void @set_thepointer(i8* %Escapes)
+  %Retrieved = call i8* @get_thepointer()
+  ret void
+}
+
+declare void @set_thepointer(i8* %P)
+declare i8* @get_thepointer()
diff --git a/test/Analysis/CFLAliasAnalysis/va.ll b/test/Analysis/CFLAliasAnalysis/va.ll
index 3094cb0967f6..a432cea8a1df 100644
--- a/test/Analysis/CFLAliasAnalysis/va.ll
+++ b/test/Analysis/CFLAliasAnalysis/va.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-basicaa -cfl-aa -aa-eval -print-may-aliases -disable-output 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: Function: test1
 ; CHECK: 0 no alias responses
diff --git a/test/Analysis/CallGraph/non-leaf-intrinsics.ll b/test/Analysis/CallGraph/non-leaf-intrinsics.ll
index 11bed6abce60..5caecf7e2244 100644
--- a/test/Analysis/CallGraph/non-leaf-intrinsics.ll
+++ b/test/Analysis/CallGraph/non-leaf-intrinsics.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -print-callgraph -disable-output < %s 2>&1 | FileCheck %s
 
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 define private void @f() {
   ret void
@@ -10,7 +10,7 @@ define private void @f() {
 define void @calls_statepoint(i8 addrspace(1)* %arg) gc "statepoint-example" {
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
   ret void
 }
 
diff --git a/test/Analysis/CostModel/AArch64/select.ll b/test/Analysis/CostModel/AArch64/select.ll
index 216dc5ddc488..1a1248e661c5 100644
--- a/test/Analysis/CostModel/AArch64/select.ll
+++ b/test/Analysis/CostModel/AArch64/select.ll
@@ -17,16 +17,16 @@ define void @select() {
   ; CHECK: cost of 1 {{.*}} select
   %v6 = select i1 undef, double undef, double undef
 
-  ; Vector values - check for vectors that have a high cost because they end up
-  ; scalarized.
-  ; CHECK: cost of 320 {{.*}} select
+  ; CHECK: cost of 16 {{.*}} select
   %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
 
-  ; CHECK: cost of 160 {{.*}} select
+  ; CHECK: cost of 8 {{.*}} select
   %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
-  ; CHECK: cost of 320 {{.*}} select
+  ; CHECK: cost of 16 {{.*}} select
   %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
 
+  ; Vector values - check for vectors of i64s that have a high cost because
+  ; they end up scalarized.
   ; CHECK: cost of 80 {{.*}} select
   %v16a = select <4 x i1> undef, <4 x i64> undef, <4 x i64> undef
   ; CHECK: cost of 160 {{.*}} select
@@ -34,5 +34,5 @@ define void @select() {
   ; CHECK: cost of 320 {{.*}} select
   %v16c = select <16 x i1> undef, <16 x i64> undef, <16 x i64> undef
 
-    ret void
+  ret void
 }
diff --git a/test/Analysis/CostModel/AMDGPU/br.ll b/test/Analysis/CostModel/AMDGPU/br.ll
new file mode 100644
index 000000000000..0b9649397563
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/br.ll
@@ -0,0 +1,45 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; CHECK: 'test_br_cost'
+; CHECK: estimated cost of 10 for instruction: br i1
+; CHECK: estimated cost of 10 for instruction: br label
+; CHECK: estimated cost of 10 for instruction: ret void
+define void @test_br_cost(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
+bb0:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %vec = load i32, i32 addrspace(1)* %vaddr
+  %add = add i32 %vec, %b
+  store i32 %add, i32 addrspace(1)* %out
+  br label %bb2
+
+bb2:
+  ret void
+
+}
+
+; CHECK: 'test_switch_cost'
+; CHECK: Unknown cost for instruction:   switch
+define void @test_switch_cost(i32 %a) #0 {
+entry:
+  switch i32 %a, label %default [
+    i32 0, label %case0
+    i32 1, label %case1
+  ]
+
+case0:
+  store volatile i32 undef, i32 addrspace(1)* undef
+  ret void
+
+case1:
+  store volatile i32 undef, i32 addrspace(1)* undef
+  ret void
+
+default:
+  store volatile i32 undef, i32 addrspace(1)* undef
+  ret void
+
+end:
+  ret void
+}
diff --git a/test/Analysis/CostModel/AMDGPU/extractelement.ll b/test/Analysis/CostModel/AMDGPU/extractelement.ll
new file mode 100644
index 000000000000..c328d7686466
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/extractelement.ll
@@ -0,0 +1,110 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
+
+; CHECK: 'extractelement_v2i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i32>
+define void @extractelement_v2i32(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr) {
+  %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
+  %elt = extractelement <2 x i32> %vec, i32 1
+  store i32 %elt, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v2f32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x float>
+define void @extractelement_v2f32(float addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) {
+  %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
+  %elt = extractelement <2 x float> %vec, i32 1
+  store float %elt, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v3i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i32>
+define void @extractelement_v3i32(i32 addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr) {
+  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
+  %elt = extractelement <3 x i32> %vec, i32 1
+  store i32 %elt, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v4i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i32>
+define void @extractelement_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr) {
+  %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
+  %elt = extractelement <4 x i32> %vec, i32 1
+  store i32 %elt, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v8i32'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i32>
+define void @extractelement_v8i32(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr) {
+  %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
+  %elt = extractelement <8 x i32> %vec, i32 1
+  store i32 %elt, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Should be non-0
+; CHECK: 'extractelement_v8i32_dynindex'
+; CHECK: estimated cost of 2 for {{.*}} extractelement <8 x i32>
+define void @extractelement_v8i32_dynindex(i32 addrspace(1)* %out, <8 x i32> addrspace(1)* %vaddr, i32 %idx) {
+  %vec = load <8 x i32>, <8 x i32> addrspace(1)* %vaddr
+  %elt = extractelement <8 x i32> %vec, i32 %idx
+  store i32 %elt, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v2i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i64>
+define void @extractelement_v2i64(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr) {
+  %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
+  %elt = extractelement <2 x i64> %vec, i64 1
+  store i64 %elt, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v3i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <3 x i64>
+define void @extractelement_v3i64(i64 addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr) {
+  %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
+  %elt = extractelement <3 x i64> %vec, i64 1
+  store i64 %elt, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v4i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i64>
+define void @extractelement_v4i64(i64 addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr) {
+  %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
+  %elt = extractelement <4 x i64> %vec, i64 1
+  store i64 %elt, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v8i64'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <8 x i64>
+define void @extractelement_v8i64(i64 addrspace(1)* %out, <8 x i64> addrspace(1)* %vaddr) {
+  %vec = load <8 x i64>, <8 x i64> addrspace(1)* %vaddr
+  %elt = extractelement <8 x i64> %vec, i64 1
+  store i64 %elt, i64 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v4i8'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <4 x i8>
+define void @extractelement_v4i8(i8 addrspace(1)* %out, <4 x i8> addrspace(1)* %vaddr) {
+  %vec = load <4 x i8>, <4 x i8> addrspace(1)* %vaddr
+  %elt = extractelement <4 x i8> %vec, i8 1
+  store i8 %elt, i8 addrspace(1)* %out
+  ret void
+}
+
+; CHECK: 'extractelement_v2i16'
+; CHECK: estimated cost of 0 for {{.*}} extractelement <2 x i16>
+define void @extractelement_v2i16(i16 addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr) {
+  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
+  %elt = extractelement <2 x i16> %vec, i16 1
+  store i16 %elt, i16 addrspace(1)* %out
+  ret void
+}
diff --git a/test/Analysis/CostModel/AMDGPU/lit.local.cfg b/test/Analysis/CostModel/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Analysis/CostModel/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Analysis/CostModel/ARM/cast.ll b/test/Analysis/CostModel/ARM/cast.ll
index 18d6e841433f..21e292822099 100644
--- a/test/Analysis/CostModel/ARM/cast.ll
+++ b/test/Analysis/CostModel/ARM/cast.ll
@@ -5,543 +5,544 @@ target triple = "thumbv7-apple-ios6.0.0"
 define i32 @casts() {
 
     ; -- scalars --
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r0 = sext i1 undef to i8
   %r0 = sext i1 undef to i8
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r1 = zext i1 undef to i8
   %r1 = zext i1 undef to i8
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r2 = sext i1 undef to i16
   %r2 = sext i1 undef to i16
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r3 = zext i1 undef to i16
   %r3 = zext i1 undef to i16
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r4 = sext i1 undef to i32
   %r4 = sext i1 undef to i32
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r5 = zext i1 undef to i32
   %r5 = zext i1 undef to i32
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r6 = sext i1 undef to i64
   %r6 = sext i1 undef to i64
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r7 = zext i1 undef to i64
   %r7 = zext i1 undef to i64
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r8 = trunc i8 undef to i1
   %r8 = trunc i8 undef to i1
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r9 = sext i8 undef to i16
   %r9 = sext i8 undef to i16
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r10 = zext i8 undef to i16
   %r10 = zext i8 undef to i16
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r11 = sext i8 undef to i32
   %r11 = sext i8 undef to i32
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r12 = zext i8 undef to i32
   %r12 = zext i8 undef to i32
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r13 = sext i8 undef to i64
   %r13 = sext i8 undef to i64
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r14 = zext i8 undef to i64
   %r14 = zext i8 undef to i64
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r15 = trunc i16 undef to i1
   %r15 = trunc i16 undef to i1
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r16 = trunc i16 undef to i8
   %r16 = trunc i16 undef to i8
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r17 = sext i16 undef to i32
   %r17 = sext i16 undef to i32
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r18 = zext i16 undef to i32
   %r18 = zext i16 undef to i32
-  ; CHECK: cost of 2 {{.*}} sext
+  ; CHECK: Found an estimated cost of 2 for instruction:   %r19 = sext i16 undef to i64
   %r19 = sext i16 undef to i64
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r20 = zext i16 undef to i64
   %r20 = zext i16 undef to i64
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r21 = trunc i32 undef to i1
   %r21 = trunc i32 undef to i1
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r22 = trunc i32 undef to i8
   %r22 = trunc i32 undef to i8
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r23 = trunc i32 undef to i16
   %r23 = trunc i32 undef to i16
-  ; CHECK: cost of 1 {{.*}} sext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r24 = sext i32 undef to i64
   %r24 = sext i32 undef to i64
-  ; CHECK: cost of 1 {{.*}} zext
+  ; CHECK: Found an estimated cost of 1 for instruction:   %r25 = zext i32 undef to i64
   %r25 = zext i32 undef to i64
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r26 = trunc i64 undef to i1
   %r26 = trunc i64 undef to i1
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r27 = trunc i64 undef to i8
   %r27 = trunc i64 undef to i8
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r28 = trunc i64 undef to i16
   %r28 = trunc i64 undef to i16
-  ; CHECK: cost of 0 {{.*}} trunc
+  ; CHECK: Found an estimated cost of 0 for instruction:   %r29 = trunc i64 undef to i32
   %r29 = trunc i64 undef to i32
 
     ; -- floating point conversions --
   ; Moves between scalar and NEON registers.
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK: Found an estimated cost of 2 for instruction:   %r30 = fptoui float undef to i1
   %r30 = fptoui float undef to i1
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK: Found an estimated cost of 2 for instruction:   %r31 = fptosi float undef to i1
   %r31 = fptosi float undef to i1
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK: Found an estimated cost of 2 for instruction:   %r32 = fptoui float undef to i8
   %r32 = fptoui float undef to i8
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK: Found an estimated cost of 2 for instruction:   %r33 = fptosi float undef to i8
   %r33 = fptosi float undef to i8
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r34 = fptoui float undef to i16
   %r34 = fptoui float undef to i16
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r35 = fptosi float undef to i16
   %r35 = fptosi float undef to i16
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r36 = fptoui float undef to i32
   %r36 = fptoui float undef to i32
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r37 = fptosi float undef to i32
   %r37 = fptosi float undef to i32
-  ; CHECK: cost of 10 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r38 = fptoui float undef to i64
   %r38 = fptoui float undef to i64
-  ; CHECK: cost of 10 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r39 = fptosi float undef to i64
   %r39 = fptosi float undef to i64
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r40 = fptoui double undef to i1
   %r40 = fptoui double undef to i1
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r41 = fptosi double undef to i1
   %r41 = fptosi double undef to i1
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r42 = fptoui double undef to i8
   %r42 = fptoui double undef to i8
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r43 = fptosi double undef to i8
   %r43 = fptosi double undef to i8
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r44 = fptoui double undef to i16
   %r44 = fptoui double undef to i16
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r45 = fptosi double undef to i16
   %r45 = fptosi double undef to i16
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r46 = fptoui double undef to i32
   %r46 = fptoui double undef to i32
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r47 = fptosi double undef to i32
   %r47 = fptosi double undef to i32
   ; Function call
-  ; CHECK: cost of 10 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r48 = fptoui double undef to i64
   %r48 = fptoui double undef to i64
-  ; CHECK: cost of 10 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r49 = fptosi double undef to i64
   %r49 = fptosi double undef to i64
 
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r50 = sitofp i1 undef to float
   %r50 = sitofp i1 undef to float
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r51 = uitofp i1 undef to float
   %r51 = uitofp i1 undef to float
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r52 = sitofp i1 undef to double
   %r52 = sitofp i1 undef to double
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r53 = uitofp i1 undef to double
   %r53 = uitofp i1 undef to double
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r54 = sitofp i8 undef to float
   %r54 = sitofp i8 undef to float
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r55 = uitofp i8 undef to float
   %r55 = uitofp i8 undef to float
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r56 = sitofp i8 undef to double
   %r56 = sitofp i8 undef to double
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r57 = uitofp i8 undef to double
   %r57 = uitofp i8 undef to double
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r58 = sitofp i16 undef to float
   %r58 = sitofp i16 undef to float
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r59 = uitofp i16 undef to float
   %r59 = uitofp i16 undef to float
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r60 = sitofp i16 undef to double
   %r60 = sitofp i16 undef to double
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r61 = uitofp i16 undef to double
   %r61 = uitofp i16 undef to double
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r62 = sitofp i32 undef to float
   %r62 = sitofp i32 undef to float
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r63 = uitofp i32 undef to float
   %r63 = uitofp i32 undef to float
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r64 = sitofp i32 undef to double
   %r64 = sitofp i32 undef to double
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r65 = uitofp i32 undef to double
   %r65 = uitofp i32 undef to double
   ; Function call
-  ; CHECK: cost of 10 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r66 = sitofp i64 undef to float
   %r66 = sitofp i64 undef to float
-  ; CHECK: cost of 10 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r67 = uitofp i64 undef to float
   %r67 = uitofp i64 undef to float
-  ; CHECK: cost of 10 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r68 = sitofp i64 undef to double
   %r68 = sitofp i64 undef to double
-  ; CHECK: cost of 10 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 10 for instruction:   %r69 = uitofp i64 undef to double
   %r69 = uitofp i64 undef to double
 
-  ; CHECK: cost of 3 {{.*}} sext
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r70 = sext <8 x i8> undef to <8 x i32>
   %r70 = sext <8 x i8> undef to <8 x i32>
-  ; CHECK: cost of 6 {{.*}} sext
+  ; CHECK:  Found an estimated cost of 6 for instruction:   %r71 = sext <16 x i8> undef to <16 x i32>
   %r71 = sext <16 x i8> undef to <16 x i32>
-  ; CHECK: cost of 3 {{.*}} zext
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r72 = zext <8 x i8> undef to <8 x i32>
   %r72 = zext <8 x i8> undef to <8 x i32>
-  ; CHECK: cost of 6 {{.*}} zext
+  ; CHECK:  Found an estimated cost of 6 for instruction:   %r73 = zext <16 x i8> undef to <16 x i32>
   %r73 = zext <16 x i8> undef to <16 x i32>
 
-  ; CHECK: cost of 7 {{.*}} sext
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %rext_0 = sext <8 x i8> undef to <8 x i64>
   %rext_0 = sext <8 x i8> undef to <8 x i64>
-  ; CHECK: cost of 7 {{.*}} zext
+  ; CHECK:  Found an estimated cost of 7 for instruction:   %rext_1 = zext <8 x i8> undef to <8 x i64>
   %rext_1 = zext <8 x i8> undef to <8 x i64>
-  ; CHECK: cost of 6 {{.*}} sext
+  ; CHECK:  Found an estimated cost of 6 for instruction:   %rext_2 = sext <8 x i16> undef to <8 x i64>
   %rext_2 = sext <8 x i16> undef to <8 x i64>
-  ; CHECK: cost of 6 {{.*}} zext
+  ; CHECK:  Found an estimated cost of 6 for instruction:   %rext_3 = zext <8 x i16> undef to <8 x i64>
   %rext_3 = zext <8 x i16> undef to <8 x i64>
-  ; CHECK: cost of 3 {{.*}} sext
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %rext_4 = sext <4 x i16> undef to <4 x i64>
   %rext_4 = sext <4 x i16> undef to <4 x i64>
-  ; CHECK: cost of 3 {{.*}} zext
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %rext_5 = zext <4 x i16> undef to <4 x i64>
   %rext_5 = zext <4 x i16> undef to <4 x i64>
 
   ; Vector cast cost of instructions lowering the cast to the stack.
-  ; CHECK: cost of 3 {{.*}} trunc
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r74 = trunc <8 x i32> undef to <8 x i8>
   %r74 = trunc <8 x i32> undef to <8 x i8>
-  ; CHECK: cost of 6 {{.*}} trunc
+  ; CHECK:  Found an estimated cost of 6 for instruction:   %r75 = trunc <16 x i32> undef to <16 x i8>
   %r75 = trunc <16 x i32> undef to <16 x i8>
 
   ; Floating point truncation costs.
-  ; CHECK: cost of 1 {{.*}} fptrunc double
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r80 = fptrunc double undef to float
   %r80 = fptrunc double undef to float
-  ; CHECK: cost of 2 {{.*}} fptrunc <2 x double
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r81 = fptrunc <2 x double> undef to <2 x float>
   %r81 = fptrunc <2 x double> undef to <2 x float>
-  ; CHECK: cost of 4 {{.*}} fptrunc <4 x double
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r82 = fptrunc <4 x double> undef to <4 x float>
   %r82 = fptrunc <4 x double> undef to <4 x float>
-  ; CHECK: cost of 8 {{.*}} fptrunc <8 x double
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r83 = fptrunc <8 x double> undef to <8 x float>
   %r83 = fptrunc <8 x double> undef to <8 x float>
-  ; CHECK: cost of 16 {{.*}} fptrunc <16 x double
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r84 = fptrunc <16 x double> undef to <16 x float>
   %r84 = fptrunc <16 x double> undef to <16 x float>
 
   ; Floating point extension costs.
-  ; CHECK: cost of 1 {{.*}} fpext float
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r85 = fpext float undef to double
   %r85 = fpext float undef to double
-  ; CHECK: cost of 2 {{.*}} fpext <2 x float
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r86 = fpext <2 x float> undef to <2 x double>
   %r86 = fpext <2 x float> undef to <2 x double>
-  ; CHECK: cost of 4 {{.*}} fpext <4 x float
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r87 = fpext <4 x float> undef to <4 x double>
   %r87 = fpext <4 x float> undef to <4 x double>
-  ; CHECK: cost of 8 {{.*}} fpext <8 x float
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r88 = fpext <8 x float> undef to <8 x double>
   %r88 = fpext <8 x float> undef to <8 x double>
-  ; CHECK: cost of 16 {{.*}} fpext <16 x float
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r89 = fpext <16 x float> undef to <16 x double>
   %r89 = fpext <16 x float> undef to <16 x double>
 
   ;; Floating point to integer vector casts.
-  ; CHECK: cost of 1 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r90 = fptoui <2 x float> undef to <2 x i1>
   %r90 = fptoui <2 x float> undef to <2 x i1>
-  ; CHECK: cost of 1 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r91 = fptosi <2 x float> undef to <2 x i1>
   %r91 = fptosi <2 x float> undef to <2 x i1>
-  ; CHECK: cost of 1 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r92 = fptoui <2 x float> undef to <2 x i8>
   %r92 = fptoui <2 x float> undef to <2 x i8>
-  ; CHECK: cost of 1 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r93 = fptosi <2 x float> undef to <2 x i8>
   %r93 = fptosi <2 x float> undef to <2 x i8>
-  ; CHECK: cost of 1 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r94 = fptoui <2 x float> undef to <2 x i16>
   %r94 = fptoui <2 x float> undef to <2 x i16>
-  ; CHECK: cost of 1 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r95 = fptosi <2 x float> undef to <2 x i16>
   %r95 = fptosi <2 x float> undef to <2 x i16>
-  ; CHECK: cost of 1 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r96 = fptoui <2 x float> undef to <2 x i32>
   %r96 = fptoui <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 1 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r97 = fptosi <2 x float> undef to <2 x i32>
   %r97 = fptosi <2 x float> undef to <2 x i32>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r98 = fptoui <2 x float> undef to <2 x i64>
   %r98 = fptoui <2 x float> undef to <2 x i64>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r99 = fptosi <2 x float> undef to <2 x i64>
   %r99 = fptosi <2 x float> undef to <2 x i64>
 
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r100 = fptoui <2 x double> undef to <2 x i1>
   %r100 = fptoui <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r101 = fptosi <2 x double> undef to <2 x i1>
   %r101 = fptosi <2 x double> undef to <2 x i1>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r102 = fptoui <2 x double> undef to <2 x i8>
   %r102 = fptoui <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r103 = fptosi <2 x double> undef to <2 x i8>
   %r103 = fptosi <2 x double> undef to <2 x i8>
-  ; CHECK: cost of 16 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r104 = fptoui <2 x double> undef to <2 x i16>
   %r104 = fptoui <2 x double> undef to <2 x i16>
-  ; CHECK: cost of 16 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r105 = fptosi <2 x double> undef to <2 x i16>
   %r105 = fptosi <2 x double> undef to <2 x i16>
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r106 = fptoui <2 x double> undef to <2 x i32>
   %r106 = fptoui <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r107 = fptosi <2 x double> undef to <2 x i32>
   %r107 = fptosi <2 x double> undef to <2 x i32>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r108 = fptoui <2 x double> undef to <2 x i64>
   %r108 = fptoui <2 x double> undef to <2 x i64>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r109 = fptosi <2 x double> undef to <2 x i64>
   %r109 = fptosi <2 x double> undef to <2 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r110 = fptoui <4 x float> undef to <4 x i1>
   %r110 = fptoui <4 x float> undef to <4 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r111 = fptosi <4 x float> undef to <4 x i1>
   %r111 = fptosi <4 x float> undef to <4 x i1>
-  ; CHECK: cost of 3 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r112 = fptoui <4 x float> undef to <4 x i8>
   %r112 = fptoui <4 x float> undef to <4 x i8>
-  ; CHECK: cost of 3 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r113 = fptosi <4 x float> undef to <4 x i8>
   %r113 = fptosi <4 x float> undef to <4 x i8>
-  ; CHECK: cost of 2 {{.*}} fptoui
+
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r114 = fptoui <4 x float> undef to <4 x i16>
   %r114 = fptoui <4 x float> undef to <4 x i16>
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r115 = fptosi <4 x float> undef to <4 x i16>
   %r115 = fptosi <4 x float> undef to <4 x i16>
-  ; CHECK: cost of 1 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r116 = fptoui <4 x float> undef to <4 x i32>
   %r116 = fptoui <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 1 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r117 = fptosi <4 x float> undef to <4 x i32>
   %r117 = fptosi <4 x float> undef to <4 x i32>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r118 = fptoui <4 x float> undef to <4 x i64>
   %r118 = fptoui <4 x float> undef to <4 x i64>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r119 = fptosi <4 x float> undef to <4 x i64>
   %r119 = fptosi <4 x float> undef to <4 x i64>
 
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r120 = fptoui <4 x double> undef to <4 x i1>
   %r120 = fptoui <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r121 = fptosi <4 x double> undef to <4 x i1>
   %r121 = fptosi <4 x double> undef to <4 x i1>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r122 = fptoui <4 x double> undef to <4 x i8>
   %r122 = fptoui <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r123 = fptosi <4 x double> undef to <4 x i8>
   %r123 = fptosi <4 x double> undef to <4 x i8>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r124 = fptoui <4 x double> undef to <4 x i16>
   %r124 = fptoui <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r125 = fptosi <4 x double> undef to <4 x i16>
   %r125 = fptosi <4 x double> undef to <4 x i16>
-  ; CHECK: cost of 32 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r126 = fptoui <4 x double> undef to <4 x i32>
   %r126 = fptoui <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 32 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r127 = fptosi <4 x double> undef to <4 x i32>
   %r127 = fptosi <4 x double> undef to <4 x i32>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r128 = fptoui <4 x double> undef to <4 x i64>
   %r128 = fptoui <4 x double> undef to <4 x i64>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r129 = fptosi <4 x double> undef to <4 x i64>
   %r129 = fptosi <4 x double> undef to <4 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r130 = fptoui <8 x float> undef to <8 x i1>
   %r130 = fptoui <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r131 = fptosi <8 x float> undef to <8 x i1>
   %r131 = fptosi <8 x float> undef to <8 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r132 = fptoui <8 x float> undef to <8 x i8>
   %r132 = fptoui <8 x float> undef to <8 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r133 = fptosi <8 x float> undef to <8 x i8>
   %r133 = fptosi <8 x float> undef to <8 x i8>
-  ; CHECK: cost of 4 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r134 = fptoui <8 x float> undef to <8 x i16>
   %r134 = fptoui <8 x float> undef to <8 x i16>
-  ; CHECK: cost of 4 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r135 = fptosi <8 x float> undef to <8 x i16>
   %r135 = fptosi <8 x float> undef to <8 x i16>
-  ; CHECK: cost of 2 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r136 = fptoui <8 x float> undef to <8 x i32>
   %r136 = fptoui <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 2 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r137 = fptosi <8 x float> undef to <8 x i32>
   %r137 = fptosi <8 x float> undef to <8 x i32>
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r138 = fptoui <8 x float> undef to <8 x i64>
   %r138 = fptoui <8 x float> undef to <8 x i64>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r139 = fptosi <8 x float> undef to <8 x i64>
   %r139 = fptosi <8 x float> undef to <8 x i64>
 
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r140 = fptoui <8 x double> undef to <8 x i1>
   %r140 = fptoui <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r141 = fptosi <8 x double> undef to <8 x i1>
   %r141 = fptosi <8 x double> undef to <8 x i1>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r142 = fptoui <8 x double> undef to <8 x i8>
   %r142 = fptoui <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r143 = fptosi <8 x double> undef to <8 x i8>
   %r143 = fptosi <8 x double> undef to <8 x i8>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r144 = fptoui <8 x double> undef to <8 x i16>
   %r144 = fptoui <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r145 = fptosi <8 x double> undef to <8 x i16>
   %r145 = fptosi <8 x double> undef to <8 x i16>
-  ; CHECK: cost of 64 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r146 = fptoui <8 x double> undef to <8 x i32>
   %r146 = fptoui <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 64 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r147 = fptosi <8 x double> undef to <8 x i32>
   %r147 = fptosi <8 x double> undef to <8 x i32>
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r148 = fptoui <8 x double> undef to <8 x i64>
   %r148 = fptoui <8 x double> undef to <8 x i64>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r149 = fptosi <8 x double> undef to <8 x i64>
   %r149 = fptosi <8 x double> undef to <8 x i64>
 
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r150 = fptoui <16 x float> undef to <16 x i1>
   %r150 = fptoui <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r151 = fptosi <16 x float> undef to <16 x i1>
   %r151 = fptosi <16 x float> undef to <16 x i1>
-  ; CHECK: cost of 128 {{.*}} fptoui
+ ; CHECK:  Found an estimated cost of 128 for instruction:   %r152 = fptoui <16 x float> undef to <16 x i8>
   %r152 = fptoui <16 x float> undef to <16 x i8>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r153 = fptosi <16 x float> undef to <16 x i8>
   %r153 = fptosi <16 x float> undef to <16 x i8>
-  ; CHECK: cost of 8 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r154 = fptoui <16 x float> undef to <16 x i16>
   %r154 = fptoui <16 x float> undef to <16 x i16>
-  ; CHECK: cost of 8 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r155 = fptosi <16 x float> undef to <16 x i16>
   %r155 = fptosi <16 x float> undef to <16 x i16>
-  ; CHECK: cost of 4 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r156 = fptoui <16 x float> undef to <16 x i32>
   %r156 = fptoui <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 4 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r157 = fptosi <16 x float> undef to <16 x i32>
   %r157 = fptosi <16 x float> undef to <16 x i32>
-  ; CHECK: cost of 256 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 256 for instruction:   %r158 = fptoui <16 x float> undef to <16 x i64>
   %r158 = fptoui <16 x float> undef to <16 x i64>
-  ; CHECK: cost of 256 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 256 for instruction:   %r159 = fptosi <16 x float> undef to <16 x i64>
   %r159 = fptosi <16 x float> undef to <16 x i64>
 
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r160 = fptoui <16 x double> undef to <16 x i1>
   %r160 = fptoui <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r161 = fptosi <16 x double> undef to <16 x i1>
   %r161 = fptosi <16 x double> undef to <16 x i1>
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r162 = fptoui <16 x double> undef to <16 x i8>
   %r162 = fptoui <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r163 = fptosi <16 x double> undef to <16 x i8>
   %r163 = fptosi <16 x double> undef to <16 x i8>
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r164 = fptoui <16 x double> undef to <16 x i16>
   %r164 = fptoui <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r165 = fptosi <16 x double> undef to <16 x i16>
   %r165 = fptosi <16 x double> undef to <16 x i16>
-  ; CHECK: cost of 128 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r166 = fptoui <16 x double> undef to <16 x i32>
   %r166 = fptoui <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 128 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 128 for instruction:   %r167 = fptosi <16 x double> undef to <16 x i32>
   %r167 = fptosi <16 x double> undef to <16 x i32>
-  ; CHECK: cost of 256 {{.*}} fptoui
+  ; CHECK:  Found an estimated cost of 256 for instruction:   %r168 = fptoui <16 x double> undef to <16 x i64>
   %r168 = fptoui <16 x double> undef to <16 x i64>
-  ; CHECK: cost of 256 {{.*}} fptosi
+  ; CHECK:  Found an estimated cost of 256 for instruction:   %r169 = fptosi <16 x double> undef to <16 x i64>
   %r169 = fptosi <16 x double> undef to <16 x i64>
 
-  ; CHECK: cost of 8 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 12 for instruction:   %r170 = uitofp <2 x i1> undef to <2 x float>
   %r170 = uitofp <2 x i1> undef to <2 x float>
-  ; CHECK: cost of 8 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 12 for instruction:   %r171 = sitofp <2 x i1> undef to <2 x float>
   %r171 = sitofp <2 x i1> undef to <2 x float>
-  ; CHECK: cost of 3 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r172 = uitofp <2 x i8> undef to <2 x float>
   %r172 = uitofp <2 x i8> undef to <2 x float>
-  ; CHECK: cost of 3 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r173 = sitofp <2 x i8> undef to <2 x float>
   %r173 = sitofp <2 x i8> undef to <2 x float>
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r174 = uitofp <2 x i16> undef to <2 x float>
   %r174 = uitofp <2 x i16> undef to <2 x float>
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r175 = sitofp <2 x i16> undef to <2 x float>
   %r175 = sitofp <2 x i16> undef to <2 x float>
-  ; CHECK: cost of 1 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r176 = uitofp <2 x i32> undef to <2 x float>
   %r176 = uitofp <2 x i32> undef to <2 x float>
-  ; CHECK: cost of 1 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r177 = sitofp <2 x i32> undef to <2 x float>
   %r177 = sitofp <2 x i32> undef to <2 x float>
-  ; CHECK: cost of 24 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 28 for instruction:   %r178 = uitofp <2 x i64> undef to <2 x float>
   %r178 = uitofp <2 x i64> undef to <2 x float>
-  ; CHECK: cost of 24 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 28 for instruction:   %r179 = sitofp <2 x i64> undef to <2 x float>
   %r179 = sitofp <2 x i64> undef to <2 x float>
 
-  ; CHECK: cost of 8 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r180 = uitofp <2 x i1> undef to <2 x double>
   %r180 = uitofp <2 x i1> undef to <2 x double>
-  ; CHECK: cost of 8 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r181 = sitofp <2 x i1> undef to <2 x double>
   %r181 = sitofp <2 x i1> undef to <2 x double>
-  ; CHECK: cost of 4 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r182 = uitofp <2 x i8> undef to <2 x double>
   %r182 = uitofp <2 x i8> undef to <2 x double>
-  ; CHECK: cost of 4 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r183 = sitofp <2 x i8> undef to <2 x double>
   %r183 = sitofp <2 x i8> undef to <2 x double>
-  ; CHECK: cost of 3 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r184 = uitofp <2 x i16> undef to <2 x double>
   %r184 = uitofp <2 x i16> undef to <2 x double>
-  ; CHECK: cost of 3 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r185 = sitofp <2 x i16> undef to <2 x double>
   %r185 = sitofp <2 x i16> undef to <2 x double>
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r186 = uitofp <2 x i32> undef to <2 x double>
   %r186 = uitofp <2 x i32> undef to <2 x double>
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r187 = sitofp <2 x i32> undef to <2 x double>
   %r187 = sitofp <2 x i32> undef to <2 x double>
-  ; CHECK: cost of 24 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 24 for instruction:   %r188 = uitofp <2 x i64> undef to <2 x double>
   %r188 = uitofp <2 x i64> undef to <2 x double>
-  ; CHECK: cost of 24 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 24 for instruction:   %r189 = sitofp <2 x i64> undef to <2 x double>
   %r189 = sitofp <2 x i64> undef to <2 x double>
 
-  ; CHECK: cost of 3 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r190 = uitofp <4 x i1> undef to <4 x float>
   %r190 = uitofp <4 x i1> undef to <4 x float>
-  ; CHECK: cost of 3 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r191 = sitofp <4 x i1> undef to <4 x float>
   %r191 = sitofp <4 x i1> undef to <4 x float>
-  ; CHECK: cost of 3 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r192 = uitofp <4 x i8> undef to <4 x float>
   %r192 = uitofp <4 x i8> undef to <4 x float>
-  ; CHECK: cost of 3 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 3 for instruction:   %r193 = sitofp <4 x i8> undef to <4 x float>
   %r193 = sitofp <4 x i8> undef to <4 x float>
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r194 = uitofp <4 x i16> undef to <4 x float>
   %r194 = uitofp <4 x i16> undef to <4 x float>
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r195 = sitofp <4 x i16> undef to <4 x float>
   %r195 = sitofp <4 x i16> undef to <4 x float>
-  ; CHECK: cost of 1 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r196 = uitofp <4 x i32> undef to <4 x float>
   %r196 = uitofp <4 x i32> undef to <4 x float>
-  ; CHECK: cost of 1 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 1 for instruction:   %r197 = sitofp <4 x i32> undef to <4 x float>
   %r197 = sitofp <4 x i32> undef to <4 x float>
-  ; CHECK: cost of 48 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 56 for instruction:   %r198 = uitofp <4 x i64> undef to <4 x float>
   %r198 = uitofp <4 x i64> undef to <4 x float>
-  ; CHECK: cost of 48 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 56 for instruction:   %r199 = sitofp <4 x i64> undef to <4 x float>
   %r199 = sitofp <4 x i64> undef to <4 x float>
 
-  ; CHECK: cost of 16 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r200 = uitofp <4 x i1> undef to <4 x double>
   %r200 = uitofp <4 x i1> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r201 = sitofp <4 x i1> undef to <4 x double>
   %r201 = sitofp <4 x i1> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r202 = uitofp <4 x i8> undef to <4 x double>
   %r202 = uitofp <4 x i8> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r203 = sitofp <4 x i8> undef to <4 x double>
   %r203 = sitofp <4 x i8> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r204 = uitofp <4 x i16> undef to <4 x double>
   %r204 = uitofp <4 x i16> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r205 = sitofp <4 x i16> undef to <4 x double>
   %r205 = sitofp <4 x i16> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r206 = uitofp <4 x i32> undef to <4 x double>
   %r206 = uitofp <4 x i32> undef to <4 x double>
-  ; CHECK: cost of 16 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 16 for instruction:   %r207 = sitofp <4 x i32> undef to <4 x double>
   %r207 = sitofp <4 x i32> undef to <4 x double>
-  ; CHECK: cost of 48 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r208 = uitofp <4 x i64> undef to <4 x double>
   %r208 = uitofp <4 x i64> undef to <4 x double>
-  ; CHECK: cost of 48 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r209 = sitofp <4 x i64> undef to <4 x double>
   %r209 = sitofp <4 x i64> undef to <4 x double>
 
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r210 = uitofp <8 x i1> undef to <8 x float>
   %r210 = uitofp <8 x i1> undef to <8 x float>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r211 = sitofp <8 x i1> undef to <8 x float>
   %r211 = sitofp <8 x i1> undef to <8 x float>
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r212 = uitofp <8 x i8> undef to <8 x float>
   %r212 = uitofp <8 x i8> undef to <8 x float>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 48 for instruction:   %r213 = sitofp <8 x i8> undef to <8 x float>
   %r213 = sitofp <8 x i8> undef to <8 x float>
-  ; CHECK: cost of 4 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r214 = uitofp <8 x i16> undef to <8 x float>
   %r214 = uitofp <8 x i16> undef to <8 x float>
-  ; CHECK: cost of 4 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r215 = sitofp <8 x i16> undef to <8 x float>
   %r215 = sitofp <8 x i16> undef to <8 x float>
-  ; CHECK: cost of 2 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r216 = uitofp <8 x i32> undef to <8 x float>
   %r216 = uitofp <8 x i32> undef to <8 x float>
-  ; CHECK: cost of 2 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 2 for instruction:   %r217 = sitofp <8 x i32> undef to <8 x float>
   %r217 = sitofp <8 x i32> undef to <8 x float>
-  ; CHECK: cost of 96 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 112 for instruction:   %r218 = uitofp <8 x i64> undef to <8 x float>
   %r218 = uitofp <8 x i64> undef to <8 x float>
-  ; CHECK: cost of 96 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 112 for instruction:   %r219 = sitofp <8 x i64> undef to <8 x float>
   %r219 = sitofp <8 x i64> undef to <8 x float>
 
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r220 = uitofp <8 x i1> undef to <8 x double>
   %r220 = uitofp <8 x i1> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r221 = sitofp <8 x i1> undef to <8 x double>
   %r221 = sitofp <8 x i1> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r222 = uitofp <8 x i8> undef to <8 x double>
   %r222 = uitofp <8 x i8> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r223 = sitofp <8 x i8> undef to <8 x double>
   %r223 = sitofp <8 x i8> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r224 = uitofp <8 x i16> undef to <8 x double>
   %r224 = uitofp <8 x i16> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r225 = sitofp <8 x i16> undef to <8 x double>
   %r225 = sitofp <8 x i16> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r226 = uitofp <8 x i16> undef to <8 x double>
   %r226 = uitofp <8 x i16> undef to <8 x double>
-  ; CHECK: cost of 32 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 32 for instruction:   %r227 = sitofp <8 x i16> undef to <8 x double>
   %r227 = sitofp <8 x i16> undef to <8 x double>
-  ; CHECK: cost of 96 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r228 = uitofp <8 x i64> undef to <8 x double>
   %r228 = uitofp <8 x i64> undef to <8 x double>
-  ; CHECK: cost of 96 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r229 = sitofp <8 x i64> undef to <8 x double>
   %r229 = sitofp <8 x i64> undef to <8 x double>
 
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r230 = uitofp <16 x i1> undef to <16 x float>
   %r230 = uitofp <16 x i1> undef to <16 x float>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r231 = sitofp <16 x i1> undef to <16 x float>
   %r231 = sitofp <16 x i1> undef to <16 x float>
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r232 = uitofp <16 x i8> undef to <16 x float>
   %r232 = uitofp <16 x i8> undef to <16 x float>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 96 for instruction:   %r233 = sitofp <16 x i8> undef to <16 x float>
   %r233 = sitofp <16 x i8> undef to <16 x float>
-  ; CHECK: cost of 8 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r234 = uitofp <16 x i16> undef to <16 x float>
   %r234 = uitofp <16 x i16> undef to <16 x float>
-  ; CHECK: cost of 8 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 8 for instruction:   %r235 = sitofp <16 x i16> undef to <16 x float>
   %r235 = sitofp <16 x i16> undef to <16 x float>
-  ; CHECK: cost of 4 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r236 = uitofp <16 x i32> undef to <16 x float>
   %r236 = uitofp <16 x i32> undef to <16 x float>
-  ; CHECK: cost of 4 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 4 for instruction:   %r237 = sitofp <16 x i32> undef to <16 x float>
   %r237 = sitofp <16 x i32> undef to <16 x float>
-  ; CHECK: cost of 192 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 224 for instruction:   %r238 = uitofp <16 x i64> undef to <16 x float>
   %r238 = uitofp <16 x i64> undef to <16 x float>
-  ; CHECK: cost of 192 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 224 for instruction:   %r239 = sitofp <16 x i64> undef to <16 x float>
   %r239 = sitofp <16 x i64> undef to <16 x float>
 
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r240 = uitofp <16 x i1> undef to <16 x double>
   %r240 = uitofp <16 x i1> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r241 = sitofp <16 x i1> undef to <16 x double>
   %r241 = sitofp <16 x i1> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r242 = uitofp <16 x i8> undef to <16 x double>
   %r242 = uitofp <16 x i8> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r243 = sitofp <16 x i8> undef to <16 x double>
   %r243 = sitofp <16 x i8> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r244 = uitofp <16 x i16> undef to <16 x double>
   %r244 = uitofp <16 x i16> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r245 = sitofp <16 x i16> undef to <16 x double>
   %r245 = sitofp <16 x i16> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} uitofp
+  ; CHECK:  Found an estimated cost of 64 for instruction:   %r246 = uitofp <16 x i16> undef to <16 x double>
   %r246 = uitofp <16 x i16> undef to <16 x double>
-  ; CHECK: cost of 64 {{.*}} sitofp
+  ;  CHECK:  Found an estimated cost of 64 for instruction:   %r247 = sitofp <16 x i16> undef to <16 x double>
   %r247 = sitofp <16 x i16> undef to <16 x double>
-  ; CHECK: cost of 192 {{.*}} uitofp
+  ; CHECK:   Found an estimated cost of 192 for instruction:   %r248 = uitofp <16 x i64> undef to <16 x double>
   %r248 = uitofp <16 x i64> undef to <16 x double>
-  ; CHECK: cost of 192 {{.*}} sitofp
+  ; CHECK:   Found an estimated cost of 192 for instruction:   %r249 = sitofp <16 x i64> undef to <16 x double>
   %r249 = sitofp <16 x i64> undef to <16 x double>
 
-  ;CHECK: cost of 0 {{.*}} ret
+  ; CHECK:   Found an estimated cost of 0 for instruction:   ret i32 undef
   ret i32 undef
 }
 
diff --git a/test/Analysis/CostModel/ARM/gep.ll b/test/Analysis/CostModel/ARM/gep.ll
index 624ca113a302..a70d6d42b61b 100644
--- a/test/Analysis/CostModel/ARM/gep.ll
+++ b/test/Analysis/CostModel/ARM/gep.ll
@@ -3,41 +3,85 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios6.0.0"
 
-define void @test_geps() {
-  ; Cost of scalar integer geps should be one. We can't always expect it to be
-  ; folded into the instruction addressing mode.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i8, i8*
+define void @test_geps(i32 %i) {
+  ; GEPs with index 0 are essentially NOOPs.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
   %a0 = getelementptr inbounds i8, i8* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i16, i16*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
   %a1 = getelementptr inbounds i16, i16* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
   %a2 = getelementptr inbounds i32, i32* undef, i32 0
-
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
   %a3 = getelementptr inbounds i64, i64* undef, i32 0
-
-  ; Cost of scalar floating point geps should be one. We cannot fold the address
-  ; computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
   %a4 = getelementptr inbounds float, float* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
   %a5 = getelementptr inbounds double, double* undef, i32 0
-
-
-  ; Cost of vector geps should be one. We cannot fold the address computation.
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
   %a7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
   %a8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
   %a9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
   %a10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
   %a11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 0
-;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
   %a12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 0
 
+  ; Cost of GEPs is one if we cannot fold the address computation.
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %b0 = getelementptr inbounds i8, i8* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %b1 = getelementptr inbounds i16, i16* undef, i32 1024
+  ; Thumb-2 cannot fold offset >= 2^12 into address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %b2 = getelementptr inbounds i32, i32* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %b3 = getelementptr inbounds i64, i64* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds float, float*
+  %b4 = getelementptr inbounds float, float* undef, i32 1024
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds double, double*
+  %b5 = getelementptr inbounds double, double* undef, i32 1024
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %b7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %b8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %b9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %b10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %b11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 1
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %b12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 1
+
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i8, i8*
+  %c0 = getelementptr inbounds i8, i8* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i16, i16*
+  %c1 = getelementptr inbounds i16, i16* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i32, i32*
+  %c2 = getelementptr inbounds i32, i32* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds i64, i64*
+  %c3 = getelementptr inbounds i64, i64* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds float, float*
+  %c4 = getelementptr inbounds float, float* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds double, double*
+  %c5 = getelementptr inbounds double, double* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i8>, <4 x i8>*
+  %c7 = getelementptr inbounds <4 x i8>, <4 x i8>* undef, i32 %i
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds <4 x i16>, <4 x i16>*
+  %c8 = getelementptr inbounds <4 x i16>, <4 x i16>* undef, i32 %i
+  ; Thumb-2 cannot fold scales larger than 8 to address computation.
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i32>, <4 x i32>*
+  %c9 = getelementptr inbounds <4 x i32>, <4 x i32>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x i64>, <4 x i64>*
+  %c10 = getelementptr inbounds <4 x i64>, <4 x i64>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x float>, <4 x float>*
+  %c11 = getelementptr inbounds <4 x float>, <4 x float>* undef, i32 %i
+;CHECK: cost of 1 for instruction: {{.*}} getelementptr inbounds <4 x double>, <4 x double>*
+  %c12 = getelementptr inbounds <4 x double>, <4 x double>* undef, i32 %i
 
   ret void
 }
diff --git a/test/Analysis/CostModel/ARM/select.ll b/test/Analysis/CostModel/ARM/select.ll
index 21eef83c4bbe..57e1418a3f28 100644
--- a/test/Analysis/CostModel/ARM/select.ll
+++ b/test/Analysis/CostModel/ARM/select.ll
@@ -34,16 +34,16 @@ define void @casts() {
   %v12 = select <4 x i1>  undef, <4 x i16> undef, <4 x i16> undef
   ; CHECK: cost of 1 {{.*}} select
   %v13 = select <8 x i1>  undef, <8 x i16> undef, <8 x i16> undef
-  ; CHECK: cost of 40 {{.*}} select
+  ; CHECK: cost of 2 {{.*}} select
   %v13b = select <16 x i1>  undef, <16 x i16> undef, <16 x i16> undef
 
   ; CHECK: cost of 1 {{.*}} select
   %v14 = select <2 x i1> undef, <2 x i32> undef, <2 x i32> undef
   ; CHECK: cost of 1 {{.*}} select
   %v15 = select <4 x i1>  undef, <4 x i32> undef, <4 x i32> undef
-  ; CHECK: cost of 41 {{.*}} select
+  ; CHECK: cost of 2 {{.*}} select
   %v15b = select <8 x i1>  undef, <8 x i32> undef, <8 x i32> undef
-  ; CHECK: cost of 82 {{.*}} select
+  ; CHECK: cost of 4 {{.*}} select
   %v15c = select <16 x i1>  undef, <16 x i32> undef, <16 x i32> undef
 
   ; CHECK: cost of 1 {{.*}} select
diff --git a/test/Analysis/CostModel/PowerPC/load_store.ll b/test/Analysis/CostModel/PowerPC/load_store.ll
index 1e50f1651e0a..0a568b88e726 100644
--- a/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s
+; RUN: opt < %s  -cost-model -analyze -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 | FileCheck %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -34,7 +34,7 @@ define i32 @loads(i32 %arg) {
   ; CHECK: cost of 48 {{.*}} load
   load <4 x i16>, <4 x i16>* undef, align 2
 
-  ; CHECK: cost of 1 {{.*}} load
+  ; CHECK: cost of 2 {{.*}} load
   load <4 x i32>, <4 x i32>* undef, align 4
 
   ; CHECK: cost of 46 {{.*}} load
diff --git a/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll b/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
new file mode 100644
index 000000000000..3b1bc3b3fdbc
--- /dev/null
+++ b/test/Analysis/CostModel/PowerPC/unal-vec-ldst.ll
@@ -0,0 +1,404 @@
+; RUN: opt < %s  -cost-model -analyze | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+  %r = load <16 x i8>, <16 x i8>* %p, align 1
+  ret <16 x i8> %r
+
+; CHECK-LABEL: test_l_v16i8
+; CHECK: cost of 2 for instruction:   %r = load <16 x i8>, <16 x i8>* %p, align 1
+}
+
+define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
+entry:
+  %r = load <32 x i8>, <32 x i8>* %p, align 1
+  ret <32 x i8> %r
+
+; CHECK-LABEL: test_l_v32i8
+; CHECK: cost of 4 for instruction:   %r = load <32 x i8>, <32 x i8>* %p, align 1
+}
+
+define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
+entry:
+  %r = load <8 x i16>, <8 x i16>* %p, align 2
+  ret <8 x i16> %r
+
+; CHECK-LABEL: test_l_v8i16
+; CHECK: cost of 2 for instruction:   %r = load <8 x i16>, <8 x i16>* %p, align 2
+}
+
+define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
+entry:
+  %r = load <16 x i16>, <16 x i16>* %p, align 2
+  ret <16 x i16> %r
+
+; CHECK-LABEL: test_l_v16i16
+; CHECK: cost of 4 for instruction:   %r = load <16 x i16>, <16 x i16>* %p, align 2
+}
+
+define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
+entry:
+  %r = load <4 x i32>, <4 x i32>* %p, align 4
+  ret <4 x i32> %r
+
+; CHECK-LABEL: test_l_v4i32
+; CHECK: cost of 2 for instruction:   %r = load <4 x i32>, <4 x i32>* %p, align 4
+}
+
+define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
+entry:
+  %r = load <8 x i32>, <8 x i32>* %p, align 4
+  ret <8 x i32> %r
+
+; CHECK-LABEL: test_l_v8i32
+; CHECK: cost of 4 for instruction:   %r = load <8 x i32>, <8 x i32>* %p, align 4
+}
+
+define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
+entry:
+  %r = load <2 x i64>, <2 x i64>* %p, align 8
+  ret <2 x i64> %r
+
+; CHECK-LABEL: test_l_v2i64
+; CHECK: cost of 1 for instruction:   %r = load <2 x i64>, <2 x i64>* %p, align 8
+}
+
+define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
+entry:
+  %r = load <4 x i64>, <4 x i64>* %p, align 8
+  ret <4 x i64> %r
+
+; CHECK-LABEL: test_l_v4i64
+; CHECK: cost of 2 for instruction:   %r = load <4 x i64>, <4 x i64>* %p, align 8
+}
+
+define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: test_l_v4float
+; CHECK: cost of 2 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: test_l_v8float
+; CHECK: cost of 4 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
+entry:
+  %r = load <2 x double>, <2 x double>* %p, align 8
+  ret <2 x double> %r
+
+; CHECK-LABEL: test_l_v2double
+; CHECK: cost of 1 for instruction:   %r = load <2 x double>, <2 x double>* %p, align 8
+}
+
+define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: test_l_v4double
+; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
+entry:
+  %r = load <16 x i8>, <16 x i8>* %p, align 1
+  ret <16 x i8> %r
+
+; CHECK-LABEL: test_l_p8v16i8
+; CHECK: cost of 1 for instruction:   %r = load <16 x i8>, <16 x i8>* %p, align 1
+}
+
+define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
+entry:
+  %r = load <32 x i8>, <32 x i8>* %p, align 1
+  ret <32 x i8> %r
+
+; CHECK-LABEL: test_l_p8v32i8
+; CHECK: cost of 2 for instruction:   %r = load <32 x i8>, <32 x i8>* %p, align 1
+}
+
+define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
+entry:
+  %r = load <8 x i16>, <8 x i16>* %p, align 2
+  ret <8 x i16> %r
+
+; CHECK-LABEL: test_l_p8v8i16
+; CHECK: cost of 1 for instruction:   %r = load <8 x i16>, <8 x i16>* %p, align 2
+}
+
+define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
+entry:
+  %r = load <16 x i16>, <16 x i16>* %p, align 2
+  ret <16 x i16> %r
+
+; CHECK-LABEL: test_l_p8v16i16
+; CHECK: cost of 2 for instruction:   %r = load <16 x i16>, <16 x i16>* %p, align 2
+}
+
+define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
+entry:
+  %r = load <4 x i32>, <4 x i32>* %p, align 4
+  ret <4 x i32> %r
+
+; CHECK-LABEL: test_l_p8v4i32
+; CHECK: cost of 1 for instruction:   %r = load <4 x i32>, <4 x i32>* %p, align 4
+}
+
+define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
+entry:
+  %r = load <8 x i32>, <8 x i32>* %p, align 4
+  ret <8 x i32> %r
+
+; CHECK-LABEL: test_l_p8v8i32
+; CHECK: cost of 2 for instruction:   %r = load <8 x i32>, <8 x i32>* %p, align 4
+}
+
+define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
+entry:
+  %r = load <2 x i64>, <2 x i64>* %p, align 8
+  ret <2 x i64> %r
+
+; CHECK-LABEL: test_l_p8v2i64
+; CHECK: cost of 1 for instruction:   %r = load <2 x i64>, <2 x i64>* %p, align 8
+}
+
+define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
+entry:
+  %r = load <4 x i64>, <4 x i64>* %p, align 8
+  ret <4 x i64> %r
+
+; CHECK-LABEL: test_l_p8v4i64
+; CHECK: cost of 2 for instruction:   %r = load <4 x i64>, <4 x i64>* %p, align 8
+}
+
+define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: test_l_p8v4float
+; CHECK: cost of 1 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: test_l_p8v8float
+; CHECK: cost of 2 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
+entry:
+  %r = load <2 x double>, <2 x double>* %p, align 8
+  ret <2 x double> %r
+
+; CHECK-LABEL: test_l_p8v2double
+; CHECK: cost of 1 for instruction:   %r = load <2 x double>, <2 x double>* %p, align 8
+}
+
+define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: test_l_p8v4double
+; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: test_l_qv4float
+; CHECK: cost of 2 for instruction:   %r = load <4 x float>, <4 x float>* %p, align 4
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: test_l_qv8float
+; CHECK: cost of 4 for instruction:   %r = load <8 x float>, <8 x float>* %p, align 4
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: test_l_qv4double
+; CHECK: cost of 2 for instruction:   %r = load <4 x double>, <4 x double>* %p, align 8
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+entry:
+  %r = load <8 x double>, <8 x double>* %p, align 8
+  ret <8 x double> %r
+
+; CHECK-LABEL: test_l_qv8double
+; CHECK: cost of 4 for instruction:   %r = load <8 x double>, <8 x double>* %p, align 8
+}
+
+define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+entry:
+  store <16 x i8> %v, <16 x i8>* %p, align 1
+  ret void
+
+; CHECK-LABEL: test_s_v16i8
+; CHECK: cost of 1 for instruction:   store <16 x i8> %v, <16 x i8>* %p, align 1
+}
+
+define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
+entry:
+  store <32 x i8> %v, <32 x i8>* %p, align 1
+  ret void
+
+; CHECK-LABEL: test_s_v32i8
+; CHECK: cost of 2 for instruction:   store <32 x i8> %v, <32 x i8>* %p, align 1
+}
+
+define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+entry:
+  store <8 x i16> %v, <8 x i16>* %p, align 2
+  ret void
+
+; CHECK-LABEL: test_s_v8i16
+; CHECK: cost of 1 for instruction:   store <8 x i16> %v, <8 x i16>* %p, align 2
+}
+
+define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
+entry:
+  store <16 x i16> %v, <16 x i16>* %p, align 2
+  ret void
+
+; CHECK-LABEL: test_s_v16i16
+; CHECK: cost of 2 for instruction:   store <16 x i16> %v, <16 x i16>* %p, align 2
+}
+
+define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+entry:
+  store <4 x i32> %v, <4 x i32>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_v4i32
+; CHECK: cost of 1 for instruction:   store <4 x i32> %v, <4 x i32>* %p, align 4
+}
+
+define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
+entry:
+  store <8 x i32> %v, <8 x i32>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_v8i32
+; CHECK: cost of 2 for instruction:   store <8 x i32> %v, <8 x i32>* %p, align 4
+}
+
+define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
+entry:
+  store <2 x i64> %v, <2 x i64>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_v2i64
+; CHECK: cost of 1 for instruction:   store <2 x i64> %v, <2 x i64>* %p, align 8
+}
+
+define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+entry:
+  store <4 x i64> %v, <4 x i64>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_v4i64
+; CHECK: cost of 2 for instruction:   store <4 x i64> %v, <4 x i64>* %p, align 8
+}
+
+define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_v4float
+; CHECK: cost of 1 for instruction:   store <4 x float> %v, <4 x float>* %p, align 4
+}
+
+define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
+entry:
+  store <8 x float> %v, <8 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_v8float
+; CHECK: cost of 2 for instruction:   store <8 x float> %v, <8 x float>* %p, align 4
+}
+
+define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
+entry:
+  store <2 x double> %v, <2 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_v2double
+; CHECK: cost of 1 for instruction:   store <2 x double> %v, <2 x double>* %p, align 8
+}
+
+define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_v4double
+; CHECK: cost of 2 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
+}
+
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_qv4float
+; CHECK: cost of 7 for instruction:   store <4 x float> %v, <4 x float>* %p, align 4
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+entry:
+  store <8 x float> %v, <8 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: test_s_qv8float
+; CHECK: cost of 15 for instruction:   store <8 x float> %v, <8 x float>* %p, align 4
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_qv4double
+; CHECK: cost of 7 for instruction:   store <4 x double> %v, <4 x double>* %p, align 8
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+entry:
+  store <8 x double> %v, <8 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: test_s_qv8double
+; CHECK: cost of 15 for instruction:   store <8 x double> %v, <8 x double>* %p, align 8
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
+attributes #2 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll
index 92f5a1ec3a00..0b61d3cd4214 100644
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -94,7 +94,7 @@ define void @shift() {
   ; AVX2: cost of 1 {{.*}} ashr
   %C0 = ashr <4 x i32> undef, undef
   ; AVX: cost of 6 {{.*}} ashr
-  ; AVX2: cost of 20 {{.*}} ashr
+  ; AVX2: cost of 4 {{.*}} ashr
   %C1 = ashr <2 x i64> undef, undef
 
   ret void
@@ -121,7 +121,7 @@ define void @avx2shift() {
   ; AVX2: cost of 1 {{.*}} ashr
   %C0 = ashr <8 x i32> undef, undef
   ; AVX: cost of 12 {{.*}} ashr
-  ; AVX2: cost of 40 {{.*}} ashr
+  ; AVX2: cost of 4 {{.*}} ashr
   %C1 = ashr <4 x i64> undef, undef
 
   ret void
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll
index fb16af635f07..c518587c0e1a 100644
--- a/test/Analysis/CostModel/X86/cast.ll
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -84,11 +84,11 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK-AVX: cost of 4 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
 
-  ;CHECK-AVX512: cost of 3 {{.*}} %D1 = zext
-  %D1 = zext <16 x i32> undef to <16 x i64>
+  ;CHECK-AVX512: cost of 1 {{.*}} %D1 = zext
+  %D1 = zext <8 x i32> undef to <8 x i64>
 
-  ;CHECK-AVX512: cost of 3 {{.*}} %D2 = sext
-  %D2 = sext <16 x i32> undef to <16 x i64>
+  ;CHECK-AVX512: cost of 1 {{.*}} %D2 = sext
+  %D2 = sext <8 x i32> undef to <8 x i64>
 
   ;CHECK-AVX512: cost of 1 {{.*}} %D3 = zext
   %D3 = zext <16 x i16> undef to <16 x i32>
@@ -118,9 +118,11 @@ define i32 @zext_sext(<8 x i1> %in) {
   ;CHECK_AVX512: cost of 1 {{.*}} G = trunc
   %G = trunc <8 x i64> undef to <8 x i32>
 
-  ;CHECK-AVX512: cost of 4 {{.*}} %G1 = trunc
-  %G1 = trunc <16 x i64> undef to <16 x i32>
+  ;CHECK-AVX512: cost of 1 {{.*}} %G1 = trunc
+  %G1 = trunc <16 x i32> undef to <16 x i16>
 
+  ;CHECK-AVX512: cost of 1 {{.*}} %G2 = trunc
+  %G2 = trunc <16 x i32> undef to <16 x i8>
   ret i32 undef
 }
 
@@ -207,38 +209,40 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
   ; CHECK: cost of 2 {{.*}} uitofp
   %C2 = uitofp <4 x i16> %c to <4 x double>
 
-  ; CHECK: cost of 6 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
   %D1 = uitofp <4 x i32> %d to <4 x float>
-  ; CHECK: cost of 6 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
   %D2 = uitofp <4 x i32> %d to <4 x double>
   ret void
 }
 
 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
 ; CHECK-LABEL: for function 'uitofp8'
-  ; CHECK: cost of 6 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 6 {{.*}} uitofp
   %A1 = uitofp <8 x i1> %a to <8 x float>
 
-  ; CHECK: cost of 5 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 5 {{.*}} uitofp
+  ; CHECK-AVX512: cost of 2 {{.*}} uitofp
   %B1 = uitofp <8 x i8> %b to <8 x float>
 
-  ; CHECK: cost of 5 {{.*}} uitofp
+  ; CHECK-AVX2: cost of 5 {{.*}} uitofp
+  ; CHECK-AVX512: cost of 2 {{.*}} uitofp
   %C1 = uitofp <8 x i16> %c to <8 x float>
 
   ; CHECK-AVX2: cost of 8 {{.*}} uitofp
-  ; CHECK-AVX512: cost of 8 {{.*}} uitofp
+  ; CHECK-AVX512: cost of 1 {{.*}} uitofp
   ; CHECK-AVX: cost of 9 {{.*}} uitofp
   %D1 = uitofp <8 x i32> %d to <8 x float>
   ret void
 }
 
-define void @fp_conv(<8 x float> %a, <16 x float>%b) {
+define void @fp_conv(<8 x float> %a, <16 x float>%b, <4 x float> %c) {
 ;CHECK-LABEL: for function 'fp_conv'
   ; CHECK-AVX512: cost of 1 {{.*}} fpext
   %A1 = fpext <8 x float> %a to <8 x double>
 
-  ; CHECK-AVX512: cost of 3 {{.*}} fpext
-  %A2 = fpext <16 x float> %b to <16 x double>
+  ; CHECK-AVX512: cost of 1 {{.*}} fpext
+  %A2 = fpext <4 x float> %c to <4 x double>
 
   ; CHECK-AVX2:   cost of 3 {{.*}} %A3 = fpext
   ; CHECK-AVX512: cost of 1 {{.*}} %A3 = fpext
@@ -248,7 +252,7 @@ define void @fp_conv(<8 x float> %a, <16 x float>%b) {
   ; CHECK-AVX512: cost of 1 {{.*}} %A4 = fptrunc
   %A4 = fptrunc <8 x double> undef to <8 x float>
 
-  ; CHECK-AVX512: cost of 3 {{.*}} %A5 = fptrunc
-  %A5 = fptrunc <16 x double> undef to <16 x float>
+  ; CHECK-AVX512: cost of 1 {{.*}} %A5 = fptrunc
+  %A5 = fptrunc <4 x double> undef to <4 x float>
   ret void
 }
diff --git a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 4683c432c559..61d3e0116e8b 100644
--- a/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s -check-prefix=AVX2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -cost-model -analyze < %s | FileCheck %s --check-prefix=AVX2
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=knl -cost-model -analyze < %s | FileCheck %s --check-prefix=KNL
+; RUN: opt -S -mtriple=x86_64-apple-darwin -mcpu=skx -cost-model -analyze < %s | FileCheck %s --check-prefix=SKX
 
 
 ; AVX2-LABEL: test1
@@ -65,6 +67,217 @@ define <2 x i32> @test8(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
   ret <2 x i32> %res
 }
 
+define <2 x double> @test_gather_2f64(<2 x double*> %ptrs, <2 x i1> %mask, <2 x double> %src0)  {
+
+; AVX2-LABEL: test_gather_2f64
+; AVX2: Found an estimated cost of 7 {{.*}}.gather
+
+; KNL-LABEL: test_gather_2f64
+; KNL: Found an estimated cost of 7 {{.*}}.gather
+
+; SKX-LABEL: test_gather_2f64
+; SKX: Found an estimated cost of 7 {{.*}}.gather
+
+%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+
+define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
+
+; AVX2-LABEL: test_gather_4i32
+; AVX2: Found an estimated cost of 16 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4i32
+; KNL: Found an estimated cost of 16 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4i32
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_gather_4i32_const_mask(<4 x i32*> %ptrs, <4 x i32> %src0)  {
+
+; AVX2-LABEL: test_gather_4i32_const_mask
+; AVX2: Found an estimated cost of 8 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4i32_const_mask
+; KNL: Found an estimated cost of 8 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4i32_const_mask
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
+
+define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_16f32_const_mask
+; AVX2: Found an estimated cost of 30 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_const_mask
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_const_mask
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_var_mask(float* %base, <16 x i32> %ind, <16 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_16f32_var_mask
+; AVX2: Found an estimated cost of 62 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_var_mask
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_var_mask
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_ra_var_mask(<16 x float*> %ptrs, <16 x i32> %ind, <16 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_16f32_ra_var_mask
+; AVX2: Found an estimated cost of 62 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_ra_var_mask
+; KNL: Found an estimated cost of 20 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_ra_var_mask
+; SKX: Found an estimated cost of 20 {{.*}}.gather
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+define <16 x float> @test_gather_16f32_const_mask2(float* %base, <16 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_16f32_const_mask2
+; AVX2: Found an estimated cost of 30 {{.*}}.gather
+
+; KNL-LABEL: test_gather_16f32_const_mask2
+; KNL: Found an estimated cost of 18 {{.*}}.gather
+
+; SKX-LABEL: test_gather_16f32_const_mask2
+; SKX: Found an estimated cost of 18 {{.*}}.gather
+
+  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+; AVX2-LABEL: test_scatter_16i32
+; AVX2: Found an estimated cost of 64 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_16i32
+; KNL: Found an estimated cost of 18 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_16i32
+; SKX: Found an estimated cost of 18 {{.*}}.scatter
+
+  %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
+  %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
+
+  %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
+  %imask = bitcast i16 %mask to <16 x i1>
+  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  ret void
+}
+
+define void @test_scatter_8i32(<8 x i32>%a1, <8 x i32*> %ptr, <8 x i1>%mask) {
+; AVX2-LABEL: test_scatter_8i32
+; AVX2: Found an estimated cost of 32 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_8i32
+; KNL: Found an estimated cost of 10 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_8i32
+; SKX: Found an estimated cost of 10 {{.*}}.scatter
+
+  call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
+
+define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+; AVX2-LABEL: test_scatter_4i32
+; AVX2: Found an estimated cost of 16 {{.*}}.scatter
+
+; KNL-LABEL: test_scatter_4i32
+; KNL: Found an estimated cost of 16 {{.*}}.scatter
+
+; SKX-LABEL: test_scatter_4i32
+; SKX: Found an estimated cost of 6 {{.*}}.scatter
+
+  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  ret void
+}
+
+define <4 x float> @test_gather_4f32(float* %ptr, <4 x i32> %ind, <4 x i1>%mask) {
+
+; AVX2-LABEL: test_gather_4f32
+; AVX2: Found an estimated cost of 15 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4f32
+; KNL: Found an estimated cost of 15 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4f32
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+  %sext_ind = sext <4 x i32> %ind to <4 x i64>
+  %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+
+  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+  ret <4 x float>%res
+}
+
+define <4 x float> @test_gather_4f32_const_mask(float* %ptr, <4 x i32> %ind) {
+
+; AVX2-LABEL: test_gather_4f32_const_mask
+; AVX2: Found an estimated cost of 7 {{.*}}.gather
+
+; KNL-LABEL: test_gather_4f32_const_mask
+; KNL: Found an estimated cost of 7 {{.*}}.gather
+
+; SKX-LABEL: test_gather_4f32_const_mask
+; SKX: Found an estimated cost of 6 {{.*}}.gather
+
+  %sext_ind = sext <4 x i32> %ind to <4 x i64>
+  %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
+
+  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+  ret <4 x float>%res
+}
+
+declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
+declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
+declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
+declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
 
 declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
 declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll
index 78e65aee1460..aaafe07c1eb8 100644
--- a/test/Analysis/CostModel/X86/reduction.ll
+++ b/test/Analysis/CostModel/X86/reduction.ll
@@ -33,7 +33,7 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) {
   %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3
 
 ; CHECK-LABEL: reduction_cost_int
-; CHECK:  cost of 23 {{.*}} extractelement
+; CHECK:  cost of 17 {{.*}} extractelement
 
   %r = extractelement <8 x i32> %bin.rdx.3, i32 0
   ret i32 %r
diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll
index dcd0088d0df7..9f0c4065c178 100644
--- a/test/Analysis/CostModel/X86/sitofp.ll
+++ b/test/Analysis/CostModel/X86/sitofp.ll
@@ -4,656 +4,656 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
 
 define <2 x double> @sitofpv2i8v2double(<2 x i8> %a) {
-  ; SSE2: sitofpv2i8v2double
+  ; SSE2-LABEL: sitofpv2i8v2double
   ; SSE2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i8v2double
+  ; AVX1-LABEL: sitofpv2i8v2double
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i8v2double
+  ; AVX2-LABEL: sitofpv2i8v2double
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i8v2double
+  ; AVX512F-LABEL: sitofpv2i8v2double
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i8> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @sitofpv4i8v4double(<4 x i8> %a) {
-  ; SSE2: sitofpv4i8v4double
+  ; SSE2-LABEL: sitofpv4i8v4double
   ; SSE2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i8v4double
+  ; AVX1-LABEL: sitofpv4i8v4double
   ; AVX1: cost of 3 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i8v4double
+  ; AVX2-LABEL: sitofpv4i8v4double
   ; AVX2: cost of 3 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i8v4double
+  ; AVX512F-LABEL: sitofpv4i8v4double
   ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <4 x i8> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @sitofpv8i8v8double(<8 x i8> %a) {
-  ; SSE2: sitofpv8i8v8double
+  ; SSE2-LABEL: sitofpv8i8v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i8v8double
+  ; AVX1-LABEL: sitofpv8i8v8double
   ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i8v8double
+  ; AVX2-LABEL: sitofpv8i8v8double
   ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i8v8double
+  ; AVX512F-LABEL: sitofpv8i8v8double
   ; AVX512F: cost of 2 {{.*}} sitofp
   %1 = sitofp <8 x i8> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @sitofpv16i8v16double(<16 x i8> %a) {
-  ; SSE2: sitofpv16i8v16double
+  ; SSE2-LABEL: sitofpv16i8v16double
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i8v16double
+  ; AVX1-LABEL: sitofpv16i8v16double
   ; AVX1: cost of 40 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i8v16double
+  ; AVX2-LABEL: sitofpv16i8v16double
   ; AVX2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i8v16double
+  ; AVX512F-LABEL: sitofpv16i8v16double
   ; AVX512F: cost of 44 {{.*}} sitofp
   %1 = sitofp <16 x i8> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @sitofpv32i8v32double(<32 x i8> %a) {
-  ; SSE2: sitofpv32i8v32double
+  ; SSE2-LABEL: sitofpv32i8v32double
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i8v32double
+  ; AVX1-LABEL: sitofpv32i8v32double
   ; AVX1: cost of 80 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i8v32double
+  ; AVX2-LABEL: sitofpv32i8v32double
   ; AVX2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i8v32double
+  ; AVX512F-LABEL: sitofpv32i8v32double
   ; AVX512F: cost of 88 {{.*}} sitofp
   %1 = sitofp <32 x i8> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @sitofpv2i16v2double(<2 x i16> %a) {
-  ; SSE2: sitofpv2i16v2double
+  ; SSE2-LABEL: sitofpv2i16v2double
   ; SSE2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i16v2double
+  ; AVX1-LABEL: sitofpv2i16v2double
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i16v2double
+  ; AVX2-LABEL: sitofpv2i16v2double
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i16v2double
+  ; AVX512F-LABEL: sitofpv2i16v2double
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i16> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @sitofpv4i16v4double(<4 x i16> %a) {
-  ; SSE2: sitofpv4i16v4double
+  ; SSE2-LABEL: sitofpv4i16v4double
   ; SSE2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i16v4double
+  ; AVX1-LABEL: sitofpv4i16v4double
   ; AVX1: cost of 3 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i16v4double
+  ; AVX2-LABEL: sitofpv4i16v4double
   ; AVX2: cost of 3 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i16v4double
+  ; AVX512F-LABEL: sitofpv4i16v4double
   ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <4 x i16> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @sitofpv8i16v8double(<8 x i16> %a) {
-  ; SSE2: sitofpv8i16v8double
+  ; SSE2-LABEL: sitofpv8i16v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i16v8double
+  ; AVX1-LABEL: sitofpv8i16v8double
   ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i16v8double
+  ; AVX2-LABEL: sitofpv8i16v8double
   ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i16v8double
+  ; AVX512F-LABEL: sitofpv8i16v8double
   ; AVX512F: cost of 2 {{.*}} sitofp
   %1 = sitofp <8 x i16> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @sitofpv16i16v16double(<16 x i16> %a) {
-  ; SSE2: sitofpv16i16v16double
+  ; SSE2-LABEL: sitofpv16i16v16double
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i16v16double
+  ; AVX1-LABEL: sitofpv16i16v16double
   ; AVX1: cost of 40 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i16v16double
+  ; AVX2-LABEL: sitofpv16i16v16double
   ; AVX2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i16v16double
+  ; AVX512F-LABEL: sitofpv16i16v16double
   ; AVX512F: cost of 44 {{.*}} sitofp
   %1 = sitofp <16 x i16> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @sitofpv32i16v32double(<32 x i16> %a) {
-  ; SSE2: sitofpv32i16v32double
+  ; SSE2-LABEL: sitofpv32i16v32double
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i16v32double
+  ; AVX1-LABEL: sitofpv32i16v32double
   ; AVX1: cost of 80 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i16v32double
+  ; AVX2-LABEL: sitofpv32i16v32double
   ; AVX2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i16v32double
+  ; AVX512F-LABEL: sitofpv32i16v32double
   ; AVX512F: cost of 88 {{.*}} sitofp
   %1 = sitofp <32 x i16> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @sitofpv2i32v2double(<2 x i32> %a) {
-  ; SSE2: sitofpv2i32v2double
+  ; SSE2-LABEL: sitofpv2i32v2double
   ; SSE2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i32v2double
+  ; AVX1-LABEL: sitofpv2i32v2double
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i32v2double
+  ; AVX2-LABEL: sitofpv2i32v2double
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i32v2double
+  ; AVX512F-LABEL: sitofpv2i32v2double
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i32> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @sitofpv4i32v4double(<4 x i32> %a) {
-  ; SSE2: sitofpv4i32v4double
+  ; SSE2-LABEL: sitofpv4i32v4double
   ; SSE2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i32v4double
+  ; AVX1-LABEL: sitofpv4i32v4double
   ; AVX1: cost of 1 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i32v4double
+  ; AVX2-LABEL: sitofpv4i32v4double
   ; AVX2: cost of 1 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i32v4double
+  ; AVX512F-LABEL: sitofpv4i32v4double
   ; AVX512F: cost of 1 {{.*}} sitofp
   %1 = sitofp <4 x i32> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @sitofpv8i32v8double(<8 x i32> %a) {
-  ; SSE2: sitofpv8i32v8double
+  ; SSE2-LABEL: sitofpv8i32v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i32v8double
+  ; AVX1-LABEL: sitofpv8i32v8double
   ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i32v8double
+  ; AVX2-LABEL: sitofpv8i32v8double
   ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i32v8double
+  ; AVX512F-LABEL: sitofpv8i32v8double
   ; AVX512F: cost of 1 {{.*}} sitofp
   %1 = sitofp <8 x i32> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @sitofpv16i32v16double(<16 x i32> %a) {
-  ; SSE2: sitofpv16i32v16double
+  ; SSE2-LABEL: sitofpv16i32v16double
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i32v16double
+  ; AVX1-LABEL: sitofpv16i32v16double
   ; AVX1: cost of 40 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i32v16double
+  ; AVX2-LABEL: sitofpv16i32v16double
   ; AVX2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i32v16double
+  ; AVX512F-LABEL: sitofpv16i32v16double
   ; AVX512F: cost of 44 {{.*}} sitofp
   %1 = sitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @sitofpv32i32v32double(<32 x i32> %a) {
-  ; SSE2: sitofpv32i32v32double
+  ; SSE2-LABEL: sitofpv32i32v32double
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i32v32double
+  ; AVX1-LABEL: sitofpv32i32v32double
   ; AVX1: cost of 80 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i32v32double
+  ; AVX2-LABEL: sitofpv32i32v32double
   ; AVX2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i32v32double
+  ; AVX512F-LABEL: sitofpv32i32v32double
   ; AVX512F: cost of 88 {{.*}} sitofp
   %1 = sitofp <32 x i32> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) {
-  ; SSE2: sitofpv2i64v2double
+  ; SSE2-LABEL: sitofpv2i64v2double
   ; SSE2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i64v2double
-  ; AVX1: cost of 4 {{.*}} sitofp
+  ; AVX1-LABEL: sitofpv2i64v2double
+  ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i64v2double
-  ; AVX2: cost of 4 {{.*}} sitofp
+  ; AVX2-LABEL: sitofpv2i64v2double
+  ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i64v2double
-  ; AVX512F: cost of 4 {{.*}} sitofp
+  ; AVX512F-LABEL: sitofpv2i64v2double
+  ; AVX512F: cost of 20 {{.*}} sitofp
   %1 = sitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @sitofpv4i64v4double(<4 x i64> %a) {
-  ; SSE2: sitofpv4i64v4double
+  ; SSE2-LABEL: sitofpv4i64v4double
   ; SSE2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i64v4double
+  ; AVX1-LABEL: sitofpv4i64v4double
   ; AVX1: cost of 10 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i64v4double
+  ; AVX2-LABEL: sitofpv4i64v4double
   ; AVX2: cost of 10 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i64v4double
+  ; AVX512F-LABEL: sitofpv4i64v4double
   ; AVX512F: cost of 10 {{.*}} sitofp
   %1 = sitofp <4 x i64> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @sitofpv8i64v8double(<8 x i64> %a) {
-  ; SSE2: sitofpv8i64v8double
+  ; SSE2-LABEL: sitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i64v8double
+  ; AVX1-LABEL: sitofpv8i64v8double
   ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i64v8double
+  ; AVX2-LABEL: sitofpv8i64v8double
   ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i64v8double
+  ; AVX512F-LABEL: sitofpv8i64v8double
   ; AVX512F: cost of 22 {{.*}} sitofp
   %1 = sitofp <8 x i64> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @sitofpv16i64v16double(<16 x i64> %a) {
-  ; SSE2: sitofpv16i64v16double
+  ; SSE2-LABEL: sitofpv16i64v16double
   ; SSE2: cost of 160 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i64v16double
+  ; AVX1-LABEL: sitofpv16i64v16double
   ; AVX1: cost of 40 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i64v16double
+  ; AVX2-LABEL: sitofpv16i64v16double
   ; AVX2: cost of 40 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i64v16double
+  ; AVX512F-LABEL: sitofpv16i64v16double
   ; AVX512F: cost of 44 {{.*}} sitofp
   %1 = sitofp <16 x i64> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @sitofpv32i64v32double(<32 x i64> %a) {
-  ; SSE2: sitofpv32i64v32double
+  ; SSE2-LABEL: sitofpv32i64v32double
   ; SSE2: cost of 320 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i64v32double
+  ; AVX1-LABEL: sitofpv32i64v32double
   ; AVX1: cost of 80 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i64v32double
+  ; AVX2-LABEL: sitofpv32i64v32double
   ; AVX2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i64v32double
+  ; AVX512F-LABEL: sitofpv32i64v32double
   ; AVX512F: cost of 88 {{.*}} sitofp
   %1 = sitofp <32 x i64> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x float> @sitofpv2i8v2float(<2 x i8> %a) {
-  ; SSE2: sitofpv2i8v2float
+  ; SSE2-LABEL: sitofpv2i8v2float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i8v2float
+  ; AVX1-LABEL: sitofpv2i8v2float
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i8v2float
+  ; AVX2-LABEL: sitofpv2i8v2float
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i8v2float
+  ; AVX512F-LABEL: sitofpv2i8v2float
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i8> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @sitofpv4i8v4float(<4 x i8> %a) {
-  ; SSE2: sitofpv4i8v4float
+  ; SSE2-LABEL: sitofpv4i8v4float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i8v4float
+  ; AVX1-LABEL: sitofpv4i8v4float
   ; AVX1: cost of 3 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i8v4float
+  ; AVX2-LABEL: sitofpv4i8v4float
   ; AVX2: cost of 3 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i8v4float
+  ; AVX512F-LABEL: sitofpv4i8v4float
   ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <4 x i8> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @sitofpv8i8v8float(<8 x i8> %a) {
-  ; SSE2: sitofpv8i8v8float
+  ; SSE2-LABEL: sitofpv8i8v8float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i8v8float
+  ; AVX1-LABEL: sitofpv8i8v8float
   ; AVX1: cost of 8 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i8v8float
+  ; AVX2-LABEL: sitofpv8i8v8float
   ; AVX2: cost of 8 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i8v8float
+  ; AVX512F-LABEL: sitofpv8i8v8float
   ; AVX512F: cost of 8 {{.*}} sitofp
   %1 = sitofp <8 x i8> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @sitofpv16i8v16float(<16 x i8> %a) {
-  ; SSE2: sitofpv16i8v16float
+  ; SSE2-LABEL: sitofpv16i8v16float
   ; SSE2: cost of 8 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i8v16float
+  ; AVX1-LABEL: sitofpv16i8v16float
   ; AVX1: cost of 44 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i8v16float
+  ; AVX2-LABEL: sitofpv16i8v16float
   ; AVX2: cost of 44 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i8v16float
+  ; AVX512F-LABEL: sitofpv16i8v16float
   ; AVX512F: cost of 2 {{.*}} sitofp
   %1 = sitofp <16 x i8> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @sitofpv32i8v32float(<32 x i8> %a) {
-  ; SSE2: sitofpv32i8v32float
+  ; SSE2-LABEL: sitofpv32i8v32float
   ; SSE2: cost of 16 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i8v32float
+  ; AVX1-LABEL: sitofpv32i8v32float
   ; AVX1: cost of 88 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i8v32float
+  ; AVX2-LABEL: sitofpv32i8v32float
   ; AVX2: cost of 88 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i8v32float
+  ; AVX512F-LABEL: sitofpv32i8v32float
   ; AVX512F: cost of 92 {{.*}} sitofp
   %1 = sitofp <32 x i8> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @sitofpv2i16v2float(<2 x i16> %a) {
-  ; SSE2: sitofpv2i16v2float
+  ; SSE2-LABEL: sitofpv2i16v2float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i16v2float
+  ; AVX1-LABEL: sitofpv2i16v2float
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i16v2float
+  ; AVX2-LABEL: sitofpv2i16v2float
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i16v2float
+  ; AVX512F-LABEL: sitofpv2i16v2float
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i16> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @sitofpv4i16v4float(<4 x i16> %a) {
-  ; SSE2: sitofpv4i16v4float
+  ; SSE2-LABEL: sitofpv4i16v4float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i16v4float
+  ; AVX1-LABEL: sitofpv4i16v4float
   ; AVX1: cost of 3 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i16v4float
+  ; AVX2-LABEL: sitofpv4i16v4float
   ; AVX2: cost of 3 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i16v4float
+  ; AVX512F-LABEL: sitofpv4i16v4float
   ; AVX512F: cost of 3 {{.*}} sitofp
   %1 = sitofp <4 x i16> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @sitofpv8i16v8float(<8 x i16> %a) {
-  ; SSE2: sitofpv8i16v8float
+  ; SSE2-LABEL: sitofpv8i16v8float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i16v8float
+  ; AVX1-LABEL: sitofpv8i16v8float
   ; AVX1: cost of 5 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i16v8float
+  ; AVX2-LABEL: sitofpv8i16v8float
   ; AVX2: cost of 5 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i16v8float
+  ; AVX512F-LABEL: sitofpv8i16v8float
   ; AVX512F: cost of 5 {{.*}} sitofp
   %1 = sitofp <8 x i16> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @sitofpv16i16v16float(<16 x i16> %a) {
-  ; SSE2: sitofpv16i16v16float
+  ; SSE2-LABEL: sitofpv16i16v16float
   ; SSE2: cost of 30 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i16v16float
+  ; AVX1-LABEL: sitofpv16i16v16float
   ; AVX1: cost of 44 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i16v16float
+  ; AVX2-LABEL: sitofpv16i16v16float
   ; AVX2: cost of 44 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i16v16float
+  ; AVX512F-LABEL: sitofpv16i16v16float
   ; AVX512F: cost of 2 {{.*}} sitofp
   %1 = sitofp <16 x i16> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @sitofpv32i16v32float(<32 x i16> %a) {
-  ; SSE2: sitofpv32i16v32float
+  ; SSE2-LABEL: sitofpv32i16v32float
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i16v32float
+  ; AVX1-LABEL: sitofpv32i16v32float
   ; AVX1: cost of 88 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i16v32float
+  ; AVX2-LABEL: sitofpv32i16v32float
   ; AVX2: cost of 88 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i16v32float
-  ; AVX512F: cost of 2 {{.*}} sitofp
+  ; AVX512F-LABEL: sitofpv32i16v32float
+  ; AVX512F: cost of 92 {{.*}} sitofp
   %1 = sitofp <32 x i16> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @sitofpv2i32v2float(<2 x i32> %a) {
-  ; SSE2: sitofpv2i32v2float
+  ; SSE2-LABEL: sitofpv2i32v2float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i32v2float
+  ; AVX1-LABEL: sitofpv2i32v2float
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i32v2float
+  ; AVX2-LABEL: sitofpv2i32v2float
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i32v2float
+  ; AVX512F-LABEL: sitofpv2i32v2float
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i32> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @sitofpv4i32v4float(<4 x i32> %a) {
-  ; SSE2: sitofpv4i32v4float
+  ; SSE2-LABEL: sitofpv4i32v4float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i32v4float
+  ; AVX1-LABEL: sitofpv4i32v4float
   ; AVX1: cost of 1 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i32v4float
+  ; AVX2-LABEL: sitofpv4i32v4float
   ; AVX2: cost of 1 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i32v4float
+  ; AVX512F-LABEL: sitofpv4i32v4float
   ; AVX512F: cost of 1 {{.*}} sitofp
   %1 = sitofp <4 x i32> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @sitofpv8i32v8float(<8 x i32> %a) {
-  ; SSE2: sitofpv8i32v8float
+  ; SSE2-LABEL: sitofpv8i32v8float
   ; SSE2: cost of 30 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i32v8float
+  ; AVX1-LABEL: sitofpv8i32v8float
   ; AVX1: cost of 1 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i32v8float
+  ; AVX2-LABEL: sitofpv8i32v8float
   ; AVX2: cost of 1 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i32v8float
+  ; AVX512F-LABEL: sitofpv8i32v8float
   ; AVX512F: cost of 1 {{.*}} sitofp
   %1 = sitofp <8 x i32> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @sitofpv16i32v16float(<16 x i32> %a) {
-  ; SSE2: sitofpv16i32v16float
+  ; SSE2-LABEL: sitofpv16i32v16float
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i32v16float
+  ; AVX1-LABEL: sitofpv16i32v16float
   ; AVX1: cost of 44 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i32v16float
+  ; AVX2-LABEL: sitofpv16i32v16float
   ; AVX2: cost of 44 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i32v16float
+  ; AVX512F-LABEL: sitofpv16i32v16float
   ; AVX512F: cost of 1 {{.*}} sitofp
   %1 = sitofp <16 x i32> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @sitofpv32i32v32float(<32 x i32> %a) {
-  ; SSE2: sitofpv32i32v32float
+  ; SSE2-LABEL: sitofpv32i32v32float
   ; SSE2: cost of 120 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i32v32float
+  ; AVX1-LABEL: sitofpv32i32v32float
   ; AVX1: cost of 88 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i32v32float
+  ; AVX2-LABEL: sitofpv32i32v32float
   ; AVX2: cost of 88 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i32v32float
-  ; AVX512F: cost of 1 {{.*}} sitofp
+  ; AVX512F-LABEL: sitofpv32i32v32float
+  ; AVX512F: cost of 92 {{.*}} sitofp
   %1 = sitofp <32 x i32> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @sitofpv2i64v2float(<2 x i64> %a) {
-  ; SSE2: sitofpv2i64v2float
+  ; SSE2-LABEL: sitofpv2i64v2float
   ; SSE2: cost of 15 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv2i64v2float
+  ; AVX1-LABEL: sitofpv2i64v2float
   ; AVX1: cost of 4 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv2i64v2float
+  ; AVX2-LABEL: sitofpv2i64v2float
   ; AVX2: cost of 4 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv2i64v2float
+  ; AVX512F-LABEL: sitofpv2i64v2float
   ; AVX512F: cost of 4 {{.*}} sitofp
   %1 = sitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @sitofpv4i64v4float(<4 x i64> %a) {
-  ; SSE2: sitofpv4i64v4float
+  ; SSE2-LABEL: sitofpv4i64v4float
   ; SSE2: cost of 30 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv4i64v4float
+  ; AVX1-LABEL: sitofpv4i64v4float
   ; AVX1: cost of 10 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv4i64v4float
+  ; AVX2-LABEL: sitofpv4i64v4float
   ; AVX2: cost of 10 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv4i64v4float
+  ; AVX512F-LABEL: sitofpv4i64v4float
   ; AVX512F: cost of 10 {{.*}} sitofp
   %1 = sitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @sitofpv8i64v8float(<8 x i64> %a) {
-  ; SSE2: sitofpv8i64v8float
+  ; SSE2-LABEL: sitofpv8i64v8float
   ; SSE2: cost of 60 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i64v8float
+  ; AVX1-LABEL: sitofpv8i64v8float
   ; AVX1: cost of 22 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i64v8float
+  ; AVX2-LABEL: sitofpv8i64v8float
   ; AVX2: cost of 22 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i64v8float
+  ; AVX512F-LABEL: sitofpv8i64v8float
   ; AVX512F: cost of 22 {{.*}} sitofp
   %1 = sitofp <8 x i64> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @sitofpv16i64v16float(<16 x i64> %a) {
-  ; SSE2: sitofpv16i64v16float
+  ; SSE2-LABEL: sitofpv16i64v16float
   ; SSE2: cost of 120 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i64v16float
+  ; AVX1-LABEL: sitofpv16i64v16float
   ; AVX1: cost of 44 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i64v16float
+  ; AVX2-LABEL: sitofpv16i64v16float
   ; AVX2: cost of 44 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i64v16float
+  ; AVX512F-LABEL: sitofpv16i64v16float
   ; AVX512F: cost of 46 {{.*}} sitofp
   %1 = sitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @sitofpv32i64v32float(<32 x i64> %a) {
-  ; SSE2: sitofpv32i64v32float
+  ; SSE2-LABEL: sitofpv32i64v32float
   ; SSE2: cost of 240 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv32i64v32float
+  ; AVX1-LABEL: sitofpv32i64v32float
   ; AVX1: cost of 88 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv32i64v32float
+  ; AVX2-LABEL: sitofpv32i64v32float
   ; AVX2: cost of 88 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv32i64v32float
+  ; AVX512F-LABEL: sitofpv32i64v32float
   ; AVX512F: cost of 92 {{.*}} sitofp
   %1 = sitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
-  ; SSE2: sitofpv8i1v8double
+  ; SSE2-LABEL: sitofpv8i1v8double
   ; SSE2: cost of 80 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv8i1v8double
+  ; AVX1-LABEL: sitofpv8i1v8double
   ; AVX1: cost of 20 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv8i1v8double
+  ; AVX2-LABEL: sitofpv8i1v8double
   ; AVX2: cost of 20 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv8i1v8double
+  ; AVX512F-LABEL: sitofpv8i1v8double
   ; AVX512F: cost of 4 {{.*}} sitofp
   %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
   %1 = sitofp <8 x i1> %cmpres to <8 x double>
@@ -661,16 +661,16 @@ define <8 x double> @sitofpv8i1v8double(<8 x double> %a) {
 }
 
 define <16 x float> @sitofpv16i1v16float(<16 x float> %a) {
-  ; SSE2: sitofpv16i1v16float
+  ; SSE2-LABEL: sitofpv16i1v16float
   ; SSE2: cost of 8 {{.*}} sitofp
   ;
-  ; AVX1: sitofpv16i1v16float
+  ; AVX1-LABEL: sitofpv16i1v16float
   ; AVX1: cost of 44 {{.*}} sitofp
   ;
-  ; AVX2: sitofpv16i1v16float
+  ; AVX2-LABEL: sitofpv16i1v16float
   ; AVX2: cost of 44 {{.*}} sitofp
   ;
-  ; AVX512F: sitofpv16i1v16float
+  ; AVX512F-LABEL: sitofpv16i1v16float
   ; AVX512F: cost of 3 {{.*}} sitofp
   %cmpres = fcmp ogt <16 x float> %a, zeroinitializer
   %1 = sitofp <16 x i1> %cmpres to <16 x float>
diff --git a/test/Analysis/CostModel/X86/sse-itoi.ll b/test/Analysis/CostModel/X86/sse-itoi.ll
new file mode 100644
index 000000000000..9e7f26e54d29
--- /dev/null
+++ b/test/Analysis/CostModel/X86/sse-itoi.ll
@@ -0,0 +1,353 @@
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s
+
+define void @zext_v16i16_to_v16i32(<16 x i16>* %a) {
+; SSE2: zext_v16i16_to_v16i32
+; SSE2: cost of 6 {{.*}} zext
+;
+; SSE41: zext_v16i16_to_v16i32
+; SSE41: cost of 4 {{.*}} zext
+;
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = zext <16 x i16> %1 to <16 x i32>
+  store <16 x i32> %2, <16 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v16i16_to_v16i32(<16 x i16>* %a) {
+; SSE2: sext_v16i16_to_v16i32
+; SSE2: cost of 8 {{.*}} sext
+;
+; SSE41: sext_v16i16_to_v16i32
+; SSE41: cost of 4 {{.*}} sext
+;
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = sext <16 x i16> %1 to <16 x i32>
+  store <16 x i32> %2, <16 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v8i16_to_v8i32(<8 x i16>* %a) {
+; SSE2: zext_v8i16_to_v8i32
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v8i16_to_v8i32
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = zext <8 x i16> %1 to <8 x i32>
+  store <8 x i32> %2, <8 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v8i16_to_v8i32(<8 x i16>* %a) {
+; SSE2: sext_v8i16_to_v8i32
+; SSE2: cost of 4 {{.*}} sext
+;
+; SSE41: sext_v8i16_to_v8i32
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = sext <8 x i16> %1 to <8 x i32>
+  store <8 x i32> %2, <8 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v4i16_to_v4i32(<4 x i16>* %a) {
+; SSE2: zext_v4i16_to_v4i32
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v4i16_to_v4i32
+; SSE41: cost of 1 {{.*}} zext
+;
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  store <4 x i32> %2, <4 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i16_to_v4i32(<4 x i16>* %a) {
+; SSE2: sext_v4i16_to_v4i32
+; SSE2: cost of 2 {{.*}} sext
+;
+; SSE41: sext_v4i16_to_v4i32
+; SSE41: cost of 1 {{.*}} sext
+;
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = sext <4 x i16> %1 to <4 x i32>
+  store <4 x i32> %2, <4 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v16i8_to_v16i32(<16 x i8>* %a) {
+; SSE2: zext_v16i8_to_v16i32
+; SSE2: cost of 9 {{.*}} zext
+;
+; SSE41: zext_v16i8_to_v16i32
+; SSE41: cost of 4 {{.*}} zext
+;
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = zext <16 x i8> %1 to <16 x i32>
+  store <16 x i32> %2, <16 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v16i8_to_v16i32(<16 x i8>* %a) {
+; SSE2: sext_v16i8_to_v16i32
+; SSE2: cost of 12 {{.*}} sext
+;
+; SSE41: sext_v16i8_to_v16i32
+; SSE41: cost of 4 {{.*}} sext
+;
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = sext <16 x i8> %1 to <16 x i32>
+  store <16 x i32> %2, <16 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v8i8_to_v8i32(<8 x i8>* %a) {
+; SSE2: zext_v8i8_to_v8i32
+; SSE2: cost of 6 {{.*}} zext
+;
+; SSE41: zext_v8i8_to_v8i32
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = zext <8 x i8> %1 to <8 x i32>
+  store <8 x i32> %2, <8 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v8i8_to_v8i32(<8 x i8>* %a) {
+; SSE2: sext_v8i8_to_v8i32
+; SSE2: cost of 6 {{.*}} sext
+;
+; SSE41: sext_v8i8_to_v8i32
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = sext <8 x i8> %1 to <8 x i32>
+  store <8 x i32> %2, <8 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v4i8_to_v4i32(<4 x i8>* %a) {
+; SSE2: zext_v4i8_to_v4i32
+; SSE2: cost of 2 {{.*}} zext
+;
+; SSE41: zext_v4i8_to_v4i32
+; SSE41: cost of 1 {{.*}} zext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = zext <4 x i8> %1 to <4 x i32>
+  store <4 x i32> %2, <4 x i32>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i8_to_v4i32(<4 x i8>* %a) {
+; SSE2: sext_v4i8_to_v4i32
+; SSE2: cost of 3 {{.*}} sext
+;
+; SSE41: sext_v4i8_to_v4i32
+; SSE41: cost of 1 {{.*}} sext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = sext <4 x i8> %1 to <4 x i32>
+  store <4 x i32> %2, <4 x i32>* undef, align 4
+  ret void
+}
+
+define void @zext_v16i8_to_v16i16(<16 x i8>* %a) {
+; SSE2: zext_v16i8_to_v16i16
+; SSE2: cost of 3 {{.*}} zext
+;
+; SSE41: zext_v16i8_to_v16i16
+; SSE41: cost of 2 {{.*}} zext
+;
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = zext <16 x i8> %1 to <16 x i16>
+  store <16 x i16> %2, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @sext_v16i8_to_v16i16(<16 x i8>* %a) {
+; SSE2: sext_v16i8_to_v16i16
+; SSE2: cost of 4 {{.*}} sext
+;
+; SSE41: sext_v16i8_to_v16i16
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = sext <16 x i8> %1 to <16 x i16>
+  store <16 x i16> %2, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @zext_v8i8_to_v8i16(<8 x i8>* %a) {
+; SSE2: zext_v8i8_to_v8i16
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v8i8_to_v8i16
+; SSE41: cost of 1 {{.*}} zext
+;
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = zext <8 x i8> %1 to <8 x i16>
+  store <8 x i16> %2, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @sext_v8i8_to_v8i16(<8 x i8>* %a) {
+; SSE2: sext_v8i8_to_v8i16
+; SSE2: cost of 2 {{.*}} sext
+;
+; SSE41: sext_v8i8_to_v8i16
+; SSE41: cost of 1 {{.*}} sext
+;
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = sext <8 x i8> %1 to <8 x i16>
+  store <8 x i16> %2, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @zext_v4i8_to_v4i16(<4 x i8>* %a) {
+; SSE2: zext_v4i8_to_v4i16
+; SSE2: cost of 1 {{.*}} zext
+;
+; SSE41: zext_v4i8_to_v4i16
+; SSE41: cost of 1 {{.*}} zext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = zext <4 x i8> %1 to <4 x i16>
+  store <4 x i16> %2, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @sext_v4i8_to_v4i16(<4 x i8>* %a) {
+; SSE2: sext_v4i8_to_v4i16
+; SSE2: cost of 6 {{.*}} sext
+;
+; SSE41: sext_v4i8_to_v4i16
+; SSE41: cost of 2 {{.*}} sext
+;
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = sext <4 x i8> %1 to <4 x i16>
+  store <4 x i16> %2, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) {
+; SSE2: truncate_v16i32_to_v16i16
+; SSE2: cost of 10 {{.*}} trunc
+;
+; SSE41: truncate_v16i32_to_v16i16
+; SSE41: cost of 6 {{.*}} trunc
+;
+  %1 = load <16 x i32>, <16 x i32>* %a
+  %2 = trunc <16 x i32> %1 to <16 x i16>
+  store <16 x i16> %2, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) {
+; SSE2: truncate_v8i32_to_v8i16
+; SSE2: cost of 5 {{.*}} trunc
+;
+; SSE41: truncate_v8i32_to_v8i16
+; SSE41: cost of 3 {{.*}} trunc
+;
+  %1 = load <8 x i32>, <8 x i32>* %a
+  %2 = trunc <8 x i32> %1 to <8 x i16>
+  store <8 x i16> %2, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) {
+; SSE2: truncate_v4i32_to_v4i16
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v4i32_to_v4i16
+; SSE41: cost of 1 {{.*}} trunc
+;
+  %1 = load <4 x i32>, <4 x i32>* %a
+  %2 = trunc <4 x i32> %1 to <4 x i16>
+  store <4 x i16> %2, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) {
+; SSE2: truncate_v16i32_to_v16i8
+; SSE2: cost of 7 {{.*}} trunc
+;
+; SSE41: truncate_v16i32_to_v16i8
+; SSE41: cost of 30 {{.*}} trunc
+;
+  %1 = load <16 x i32>, <16 x i32>* %a
+  %2 = trunc <16 x i32> %1 to <16 x i8>
+  store <16 x i8> %2, <16 x i8>* undef, align 4
+  ret void
+}
+
+define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) {
+; SSE2: truncate_v8i32_to_v8i8
+; SSE2: cost of 4 {{.*}} trunc
+;
+; SSE41: truncate_v8i32_to_v8i8
+; SSE41: cost of 3 {{.*}} trunc
+;
+  %1 = load <8 x i32>, <8 x i32>* %a
+  %2 = trunc <8 x i32> %1 to <8 x i8>
+  store <8 x i8> %2, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) {
+; SSE2: truncate_v4i32_to_v4i8
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v4i32_to_v4i8
+; SSE41: cost of 1 {{.*}} trunc
+;
+  %1 = load <4 x i32>, <4 x i32>* %a
+  %2 = trunc <4 x i32> %1 to <4 x i8>
+  store <4 x i8> %2, <4 x i8>* undef, align 4
+  ret void
+}
+
+define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) {
+; SSE2: truncate_v16i16_to_v16i8
+; SSE2: cost of 3 {{.*}} trunc
+;
+; SSE41: truncate_v16i16_to_v16i8
+; SSE41: cost of 3 {{.*}} trunc
+;
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = trunc <16 x i16> %1 to <16 x i8>
+  store <16 x i8> %2, <16 x i8>* undef, align 4
+  ret void
+}
+
+define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) {
+; SSE2: truncate_v8i16_to_v8i8
+; SSE2: cost of 2 {{.*}} trunc
+;
+; SSE41: truncate_v8i16_to_v8i8
+; SSE41: cost of 1 {{.*}} trunc
+;
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = trunc <8 x i16> %1 to <8 x i8>
+  store <8 x i8> %2, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) {
+; SSE2: truncate_v4i16_to_v4i8
+; SSE2: cost of 4 {{.*}} trunc
+;
+; SSE41: truncate_v4i16_to_v4i8
+; SSE41: cost of 2 {{.*}} trunc
+;
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = trunc <4 x i16> %1 to <4 x i8>
+  store <4 x i8> %2, <4 x i8>* undef, align 4
+  ret void
+}
diff --git a/test/Analysis/CostModel/X86/testshiftashr.ll b/test/Analysis/CostModel/X86/testshiftashr.ll
index da4e7d466e2b..13f2bd2019d3 100644
--- a/test/Analysis/CostModel/X86/testshiftashr.ll
+++ b/test/Analysis/CostModel/X86/testshiftashr.ll
@@ -5,9 +5,9 @@
 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
 entry:
   ; SSE2: shift2i16
-  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2: cost of 12 {{.*}} ashr
   ; SSE2-CODEGEN: shift2i16
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype %a , %b
   ret %shifttype %0
@@ -65,9 +65,9 @@ entry:
 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
 entry:
   ; SSE2: shift2i32
-  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2: cost of 12 {{.*}} ashr
   ; SSE2-CODEGEN: shift2i32
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype2i32 %a , %b
   ret %shifttype2i32 %0
@@ -125,9 +125,9 @@ entry:
 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
 entry:
   ; SSE2: shift2i64
-  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2: cost of 12 {{.*}} ashr
   ; SSE2-CODEGEN: shift2i64
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype2i64 %a , %b
   ret %shifttype2i64 %0
@@ -137,9 +137,9 @@ entry:
 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
 entry:
   ; SSE2: shift4i64
-  ; SSE2: cost of 40 {{.*}} ashr
+  ; SSE2: cost of 24 {{.*}} ashr
   ; SSE2-CODEGEN: shift4i64
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype4i64 %a , %b
   ret %shifttype4i64 %0
@@ -149,9 +149,9 @@ entry:
 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
 entry:
   ; SSE2: shift8i64
-  ; SSE2: cost of 80 {{.*}} ashr
+  ; SSE2: cost of 48 {{.*}} ashr
   ; SSE2-CODEGEN: shift8i64
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype8i64 %a , %b
   ret %shifttype8i64 %0
@@ -161,9 +161,9 @@ entry:
 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
 entry:
   ; SSE2: shift16i64
-  ; SSE2: cost of 160 {{.*}} ashr
+  ; SSE2: cost of 96 {{.*}} ashr
   ; SSE2-CODEGEN: shift16i64
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype16i64 %a , %b
   ret %shifttype16i64 %0
@@ -173,9 +173,9 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 320 {{.*}} ashr
+  ; SSE2: cost of 192 {{.*}} ashr
   ; SSE2-CODEGEN: shift32i64
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype32i64 %a , %b
   ret %shifttype32i64 %0
@@ -185,9 +185,9 @@ entry:
 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
 entry:
   ; SSE2: shift2i8
-  ; SSE2: cost of 20 {{.*}} ashr
+  ; SSE2: cost of 12 {{.*}} ashr
   ; SSE2-CODEGEN: shift2i8
-  ; SSE2-CODEGEN: sarq %cl
+  ; SSE2-CODEGEN: psrlq
 
   %0 = ashr %shifttype2i8 %a , %b
   ret %shifttype2i8 %0
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll
index 5775a42d08ad..52f176fe4d63 100644
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -5,7 +5,7 @@
 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
 entry:
   ; SSE2: shift2i16
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i16
   ; SSE2-CODEGEN: psrlq
 
@@ -65,7 +65,7 @@ entry:
 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
 entry:
   ; SSE2: shift2i32
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i32
   ; SSE2-CODEGEN: psrlq
 
@@ -125,7 +125,7 @@ entry:
 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
 entry:
   ; SSE2: shift2i64
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i64
   ; SSE2-CODEGEN: psrlq
 
@@ -137,7 +137,7 @@ entry:
 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
 entry:
   ; SSE2: shift4i64
-  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2: cost of 8 {{.*}} lshr
   ; SSE2-CODEGEN: shift4i64
   ; SSE2-CODEGEN: psrlq
 
@@ -149,7 +149,7 @@ entry:
 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
 entry:
   ; SSE2: shift8i64
-  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
   ; SSE2-CODEGEN: shift8i64
   ; SSE2-CODEGEN: psrlq
 
@@ -161,7 +161,7 @@ entry:
 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
 entry:
   ; SSE2: shift16i64
-  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2: cost of 32 {{.*}} lshr
   ; SSE2-CODEGEN: shift16i64
   ; SSE2-CODEGEN: psrlq
 
@@ -173,7 +173,7 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2: cost of 64 {{.*}} lshr
   ; SSE2-CODEGEN: shift32i64
   ; SSE2-CODEGEN: psrlq
 
@@ -185,7 +185,7 @@ entry:
 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
 entry:
   ; SSE2: shift2i8
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
   ; SSE2-CODEGEN: shift2i8
   ; SSE2-CODEGEN: psrlq
 
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll
index d4e33818932b..e385c5bfeeac 100644
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -5,7 +5,7 @@
 define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
 entry:
   ; SSE2: shift2i16
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
   ; SSE2-CODEGEN: shift2i16
   ; SSE2-CODEGEN: psllq
 
@@ -65,7 +65,7 @@ entry:
 define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
 entry:
   ; SSE2: shift2i32
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
   ; SSE2-CODEGEN: shift2i32
   ; SSE2-CODEGEN: psllq
 
@@ -125,7 +125,7 @@ entry:
 define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
 entry:
   ; SSE2: shift2i64
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
   ; SSE2-CODEGEN: shift2i64
   ; SSE2-CODEGEN: psllq
 
@@ -137,7 +137,7 @@ entry:
 define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
 entry:
   ; SSE2: shift4i64
-  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2: cost of 8 {{.*}} shl
   ; SSE2-CODEGEN: shift4i64
   ; SSE2-CODEGEN: psllq
 
@@ -149,7 +149,7 @@ entry:
 define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
 entry:
   ; SSE2: shift8i64
-  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2: cost of 16 {{.*}} shl
   ; SSE2-CODEGEN: shift8i64
   ; SSE2-CODEGEN: psllq
 
@@ -161,7 +161,7 @@ entry:
 define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
 entry:
   ; SSE2: shift16i64
-  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2: cost of 32 {{.*}} shl
   ; SSE2-CODEGEN: shift16i64
   ; SSE2-CODEGEN: psllq
 
@@ -173,7 +173,7 @@ entry:
 define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
 entry:
   ; SSE2: shift32i64
-  ; SSE2: cost of 320 {{.*}} shl
+  ; SSE2: cost of 64 {{.*}} shl
   ; SSE2-CODEGEN: shift32i64
   ; SSE2-CODEGEN: psllq
 
@@ -185,7 +185,7 @@ entry:
 define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
 entry:
   ; SSE2: shift2i8
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
   ; SSE2-CODEGEN: shift2i8
   ; SSE2-CODEGEN: psllq
 
diff --git a/test/Analysis/CostModel/X86/uitofp.ll b/test/Analysis/CostModel/X86/uitofp.ll
index 9ffc483e3f5a..08e36650bec4 100644
--- a/test/Analysis/CostModel/X86/uitofp.ll
+++ b/test/Analysis/CostModel/X86/uitofp.ll
@@ -2,644 +2,708 @@
 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx  -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX1 %s
 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx2 -cost-model -analyze < %s | FileCheck --check-prefix=AVX --check-prefix=AVX2 %s
 ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512f -cost-model -analyze < %s | FileCheck --check-prefix=AVX512F %s
+; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+avx512dq -cost-model -analyze < %s | FileCheck --check-prefix=AVX512DQ %s
 
 define <2 x double> @uitofpv2i8v2double(<2 x i8> %a) {
-  ; SSE2: uitofpv2i8v2double
+  ; SSE2-LABEL: uitofpv2i8v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i8v2double
+  ; AVX1-LABEL: uitofpv2i8v2double
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i8v2double
+  ; AVX2-LABEL: uitofpv2i8v2double
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i8v2double
-  ; AVX512F: cost of 4 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv2i8v2double
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <2 x i8> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @uitofpv4i8v4double(<4 x i8> %a) {
-  ; SSE2: uitofpv4i8v4double
+  ; SSE2-LABEL: uitofpv4i8v4double
   ; SSE2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i8v4double
+  ; AVX1-LABEL: uitofpv4i8v4double
   ; AVX1: cost of 2 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i8v4double
+  ; AVX2-LABEL: uitofpv4i8v4double
   ; AVX2: cost of 2 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i8v4double
+  ; AVX512F-LABEL: uitofpv4i8v4double
   ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <4 x i8> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @uitofpv8i8v8double(<8 x i8> %a) {
-  ; SSE2: uitofpv8i8v8double
+  ; SSE2-LABEL: uitofpv8i8v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i8v8double
+  ; AVX1-LABEL: uitofpv8i8v8double
   ; AVX1: cost of 20 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i8v8double
+  ; AVX2-LABEL: uitofpv8i8v8double
   ; AVX2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i8v8double
-  ; AVX512F: cost of 22 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i8v8double
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <8 x i8> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @uitofpv16i8v16double(<16 x i8> %a) {
-  ; SSE2: uitofpv16i8v16double
+  ; SSE2-LABEL: uitofpv16i8v16double
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i8v16double
+  ; AVX1-LABEL: uitofpv16i8v16double
   ; AVX1: cost of 40 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i8v16double
+  ; AVX2-LABEL: uitofpv16i8v16double
   ; AVX2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i8v16double
+  ; AVX512F-LABEL: uitofpv16i8v16double
   ; AVX512F: cost of 44 {{.*}} uitofp
   %1 = uitofp <16 x i8> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @uitofpv32i8v32double(<32 x i8> %a) {
-  ; SSE2: uitofpv32i8v32double
+  ; SSE2-LABEL: uitofpv32i8v32double
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i8v32double
+  ; AVX1-LABEL: uitofpv32i8v32double
   ; AVX1: cost of 80 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i8v32double
+  ; AVX2-LABEL: uitofpv32i8v32double
   ; AVX2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i8v32double
+  ; AVX512F-LABEL: uitofpv32i8v32double
   ; AVX512F: cost of 88 {{.*}} uitofp
   %1 = uitofp <32 x i8> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @uitofpv2i16v2double(<2 x i16> %a) {
-  ; SSE2: uitofpv2i16v2double
+  ; SSE2-LABEL: uitofpv2i16v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i16v2double
+  ; AVX1-LABEL: uitofpv2i16v2double
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i16v2double
+  ; AVX2-LABEL: uitofpv2i16v2double
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i16v2double
-  ; AVX512F: cost of 4 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv2i16v2double
+  ; AVX512F: cost of 5 {{.*}} uitofp
   %1 = uitofp <2 x i16> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @uitofpv4i16v4double(<4 x i16> %a) {
-  ; SSE2: uitofpv4i16v4double
+  ; SSE2-LABEL: uitofpv4i16v4double
   ; SSE2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i16v4double
+  ; AVX1-LABEL: uitofpv4i16v4double
   ; AVX1: cost of 2 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i16v4double
+  ; AVX2-LABEL: uitofpv4i16v4double
   ; AVX2: cost of 2 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i16v4double
+  ; AVX512F-LABEL: uitofpv4i16v4double
   ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <4 x i16> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @uitofpv8i16v8double(<8 x i16> %a) {
-  ; SSE2: uitofpv8i16v8double
+  ; SSE2-LABEL: uitofpv8i16v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i16v8double
+  ; AVX1-LABEL: uitofpv8i16v8double
   ; AVX1: cost of 20 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i16v8double
+  ; AVX2-LABEL: uitofpv8i16v8double
   ; AVX2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i16v8double
-  ; AVX512F: cost of 22 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i16v8double
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <8 x i16> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @uitofpv16i16v16double(<16 x i16> %a) {
-  ; SSE2: uitofpv16i16v16double
+  ; SSE2-LABEL: uitofpv16i16v16double
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i16v16double
+  ; AVX1-LABEL: uitofpv16i16v16double
   ; AVX1: cost of 40 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i16v16double
+  ; AVX2-LABEL: uitofpv16i16v16double
   ; AVX2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i16v16double
+  ; AVX512F-LABEL: uitofpv16i16v16double
   ; AVX512F: cost of 44 {{.*}} uitofp
   %1 = uitofp <16 x i16> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @uitofpv32i16v32double(<32 x i16> %a) {
-  ; SSE2: uitofpv32i16v32double
+  ; SSE2-LABEL: uitofpv32i16v32double
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i16v32double
+  ; AVX1-LABEL: uitofpv32i16v32double
   ; AVX1: cost of 80 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i16v32double
+  ; AVX2-LABEL: uitofpv32i16v32double
   ; AVX2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i16v32double
+  ; AVX512F-LABEL: uitofpv32i16v32double
   ; AVX512F: cost of 88 {{.*}} uitofp
   %1 = uitofp <32 x i16> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @uitofpv2i32v2double(<2 x i32> %a) {
-  ; SSE2: uitofpv2i32v2double
+  ; SSE2-LABEL: uitofpv2i32v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i32v2double
+  ; AVX1-LABEL: uitofpv2i32v2double
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i32v2double
+  ; AVX2-LABEL: uitofpv2i32v2double
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i32v2double
+  ; AVX512F-LABEL: uitofpv2i32v2double
   ; AVX512F: cost of 4 {{.*}} uitofp
   %1 = uitofp <2 x i32> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @uitofpv4i32v4double(<4 x i32> %a) {
-  ; SSE2: uitofpv4i32v4double
+  ; SSE2-LABEL: uitofpv4i32v4double
   ; SSE2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i32v4double
+  ; AVX1-LABEL: uitofpv4i32v4double
   ; AVX1: cost of 6 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i32v4double
+  ; AVX2-LABEL: uitofpv4i32v4double
   ; AVX2: cost of 6 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i32v4double
-  ; AVX512F: cost of 6 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv4i32v4double
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <4 x i32> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @uitofpv8i32v8double(<8 x i32> %a) {
-  ; SSE2: uitofpv8i32v8double
+  ; SSE2-LABEL: uitofpv8i32v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i32v8double
+  ; AVX1-LABEL: uitofpv8i32v8double
   ; AVX1: cost of 20 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i32v8double
+  ; AVX2-LABEL: uitofpv8i32v8double
   ; AVX2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i32v8double
-  ; AVX512F: cost of 22 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i32v8double
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <8 x i32> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @uitofpv16i32v16double(<16 x i32> %a) {
-  ; SSE2: uitofpv16i32v16double
+  ; SSE2-LABEL: uitofpv16i32v16double
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i32v16double
+  ; AVX1-LABEL: uitofpv16i32v16double
   ; AVX1: cost of 40 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i32v16double
+  ; AVX2-LABEL: uitofpv16i32v16double
   ; AVX2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i32v16double
+  ; AVX512F-LABEL: uitofpv16i32v16double
   ; AVX512F: cost of 44 {{.*}} uitofp
   %1 = uitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @uitofpv32i32v32double(<32 x i32> %a) {
-  ; SSE2: uitofpv32i32v32double
+  ; SSE2-LABEL: uitofpv32i32v32double
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i32v32double
+  ; AVX1-LABEL: uitofpv32i32v32double
   ; AVX1: cost of 80 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i32v32double
+  ; AVX2-LABEL: uitofpv32i32v32double
   ; AVX2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i32v32double
+  ; AVX512F-LABEL: uitofpv32i32v32double
   ; AVX512F: cost of 88 {{.*}} uitofp
   %1 = uitofp <32 x i32> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x double> @uitofpv2i64v2double(<2 x i64> %a) {
-  ; SSE2: uitofpv2i64v2double
+  ; SSE2-LABEL: uitofpv2i64v2double
   ; SSE2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i64v2double
+  ; AVX1-LABEL: uitofpv2i64v2double
   ; AVX1: cost of 20 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i64v2double
+  ; AVX2-LABEL: uitofpv2i64v2double
   ; AVX2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i64v2double
-  ; AVX512F: cost of 20 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv2i64v2double
+  ; AVX512F: cost of 5 {{.*}} uitofp
+  ;
+  ; AVX512DQ: uitofpv2i64v2double
+  ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <2 x i64> %a to <2 x double>
   ret <2 x double> %1
 }
 
 define <4 x double> @uitofpv4i64v4double(<4 x i64> %a) {
-  ; SSE2: uitofpv4i64v4double
+  ; SSE2-LABEL: uitofpv4i64v4double
   ; SSE2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i64v4double
+  ; AVX1-LABEL: uitofpv4i64v4double
   ; AVX1: cost of 40 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i64v4double
+  ; AVX2-LABEL: uitofpv4i64v4double
   ; AVX2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i64v4double
-  ; AVX512F: cost of 40 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv4i64v4double
+  ; AVX512F: cost of 12 {{.*}} uitofp
+  ;
+  ; AVX512DQ: uitofpv4i64v4double
+  ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <4 x i64> %a to <4 x double>
   ret <4 x double> %1
 }
 
 define <8 x double> @uitofpv8i64v8double(<8 x i64> %a) {
-  ; SSE2: uitofpv8i64v8double
+  ; SSE2-LABEL: uitofpv8i64v8double
   ; SSE2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i64v8double
+  ; AVX1-LABEL: uitofpv8i64v8double
   ; AVX1: cost of 20 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i64v8double
+  ; AVX2-LABEL: uitofpv8i64v8double
   ; AVX2: cost of 20 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i64v8double
-  ; AVX512F: cost of 22 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i64v8double
+  ; AVX512F: cost of 26 {{.*}} uitofp
+  ;
+  ; AVX512DQ: uitofpv8i64v8double
+  ; AVX512DQ: cost of 1 {{.*}} uitofp
   %1 = uitofp <8 x i64> %a to <8 x double>
   ret <8 x double> %1
 }
 
 define <16 x double> @uitofpv16i64v16double(<16 x i64> %a) {
-  ; SSE2: uitofpv16i64v16double
+  ; SSE2-LABEL: uitofpv16i64v16double
   ; SSE2: cost of 160 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i64v16double
+  ; AVX1-LABEL: uitofpv16i64v16double
   ; AVX1: cost of 40 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i64v16double
+  ; AVX2-LABEL: uitofpv16i64v16double
   ; AVX2: cost of 40 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i64v16double
+  ; AVX512F-LABEL: uitofpv16i64v16double
   ; AVX512F: cost of 44 {{.*}} uitofp
+  ;
+  ; AVX512DQ: uitofpv16i64v16double
+  ; AVX512DQ: cost of 44 {{.*}} uitofp
   %1 = uitofp <16 x i64> %a to <16 x double>
   ret <16 x double> %1
 }
 
 define <32 x double> @uitofpv32i64v32double(<32 x i64> %a) {
-  ; SSE2: uitofpv32i64v32double
+  ; SSE2-LABEL: uitofpv32i64v32double
   ; SSE2: cost of 320 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i64v32double
+  ; AVX1-LABEL: uitofpv32i64v32double
   ; AVX1: cost of 80 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i64v32double
+  ; AVX2-LABEL: uitofpv32i64v32double
   ; AVX2: cost of 80 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i64v32double
+  ; AVX512F-LABEL: uitofpv32i64v32double
   ; AVX512F: cost of 88 {{.*}} uitofp
+  ;
+  ; AVX512DQ: uitofpv32i64v32double
+  ; AVX512DQ: cost of 88 {{.*}} uitofp
   %1 = uitofp <32 x i64> %a to <32 x double>
   ret <32 x double> %1
 }
 
 define <2 x float> @uitofpv2i8v2float(<2 x i8> %a) {
-  ; SSE2: uitofpv2i8v2float
+  ; SSE2-LABEL: uitofpv2i8v2float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i8v2float
+  ; AVX1-LABEL: uitofpv2i8v2float
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i8v2float
+  ; AVX2-LABEL: uitofpv2i8v2float
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i8v2float
+  ; AVX512F-LABEL: uitofpv2i8v2float
   ; AVX512F: cost of 4 {{.*}} uitofp
   %1 = uitofp <2 x i8> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @uitofpv4i8v4float(<4 x i8> %a) {
-  ; SSE2: uitofpv4i8v4float
+  ; SSE2-LABEL: uitofpv4i8v4float
   ; SSE2: cost of 8 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i8v4float
+  ; AVX1-LABEL: uitofpv4i8v4float
   ; AVX1: cost of 2 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i8v4float
+  ; AVX2-LABEL: uitofpv4i8v4float
   ; AVX2: cost of 2 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i8v4float
+  ; AVX512F-LABEL: uitofpv4i8v4float
   ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <4 x i8> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @uitofpv8i8v8float(<8 x i8> %a) {
-  ; SSE2: uitofpv8i8v8float
+  ; SSE2-LABEL: uitofpv8i8v8float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i8v8float
+  ; AVX1-LABEL: uitofpv8i8v8float
   ; AVX1: cost of 5 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i8v8float
+  ; AVX2-LABEL: uitofpv8i8v8float
   ; AVX2: cost of 5 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i8v8float
-  ; AVX512F: cost of 5 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i8v8float
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <8 x i8> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @uitofpv16i8v16float(<16 x i8> %a) {
-  ; SSE2: uitofpv16i8v16float
+  ; SSE2-LABEL: uitofpv16i8v16float
   ; SSE2: cost of 8 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i8v16float
+  ; AVX1-LABEL: uitofpv16i8v16float
   ; AVX1: cost of 44 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i8v16float
+  ; AVX2-LABEL: uitofpv16i8v16float
   ; AVX2: cost of 44 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i8v16float
-  ; AVX512F: cost of 46 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv16i8v16float
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <16 x i8> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @uitofpv32i8v32float(<32 x i8> %a) {
-  ; SSE2: uitofpv32i8v32float
+  ; SSE2-LABEL: uitofpv32i8v32float
   ; SSE2: cost of 16 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i8v32float
+  ; AVX1-LABEL: uitofpv32i8v32float
   ; AVX1: cost of 88 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i8v32float
+  ; AVX2-LABEL: uitofpv32i8v32float
   ; AVX2: cost of 88 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i8v32float
+  ; AVX512F-LABEL: uitofpv32i8v32float
   ; AVX512F: cost of 92 {{.*}} uitofp
   %1 = uitofp <32 x i8> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @uitofpv2i16v2float(<2 x i16> %a) {
-  ; SSE2: uitofpv2i16v2float
+  ; SSE2-LABEL: uitofpv2i16v2float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i16v2float
+  ; AVX1-LABEL: uitofpv2i16v2float
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i16v2float
+  ; AVX2-LABEL: uitofpv2i16v2float
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i16v2float
+  ; AVX512F-LABEL: uitofpv2i16v2float
   ; AVX512F: cost of 4 {{.*}} uitofp
   %1 = uitofp <2 x i16> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @uitofpv4i16v4float(<4 x i16> %a) {
-  ; SSE2: uitofpv4i16v4float
+  ; SSE2-LABEL: uitofpv4i16v4float
   ; SSE2: cost of 8 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i16v4float
+  ; AVX1-LABEL: uitofpv4i16v4float
   ; AVX1: cost of 2 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i16v4float
+  ; AVX2-LABEL: uitofpv4i16v4float
   ; AVX2: cost of 2 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i16v4float
+  ; AVX512F-LABEL: uitofpv4i16v4float
   ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <4 x i16> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @uitofpv8i16v8float(<8 x i16> %a) {
-  ; SSE2: uitofpv8i16v8float
+  ; SSE2-LABEL: uitofpv8i16v8float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i16v8float
+  ; AVX1-LABEL: uitofpv8i16v8float
   ; AVX1: cost of 5 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i16v8float
+  ; AVX2-LABEL: uitofpv8i16v8float
   ; AVX2: cost of 5 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i16v8float
-  ; AVX512F: cost of 5 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i16v8float
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <8 x i16> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @uitofpv16i16v16float(<16 x i16> %a) {
-  ; SSE2: uitofpv16i16v16float
+  ; SSE2-LABEL: uitofpv16i16v16float
   ; SSE2: cost of 30 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i16v16float
+  ; AVX1-LABEL: uitofpv16i16v16float
   ; AVX1: cost of 44 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i16v16float
+  ; AVX2-LABEL: uitofpv16i16v16float
   ; AVX2: cost of 44 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i16v16float
-  ; AVX512F: cost of 46 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv16i16v16float
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <16 x i16> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @uitofpv32i16v32float(<32 x i16> %a) {
-  ; SSE2: uitofpv32i16v32float
+  ; SSE2-LABEL: uitofpv32i16v32float
   ; SSE2: cost of 60 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i16v32float
+  ; AVX1-LABEL: uitofpv32i16v32float
   ; AVX1: cost of 88 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i16v32float
+  ; AVX2-LABEL: uitofpv32i16v32float
   ; AVX2: cost of 88 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i16v32float
+  ; AVX512F-LABEL: uitofpv32i16v32float
   ; AVX512F: cost of 92 {{.*}} uitofp
   %1 = uitofp <32 x i16> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @uitofpv2i32v2float(<2 x i32> %a) {
-  ; SSE2: uitofpv2i32v2float
+  ; SSE2-LABEL: uitofpv2i32v2float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i32v2float
+  ; AVX1-LABEL: uitofpv2i32v2float
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i32v2float
+  ; AVX2-LABEL: uitofpv2i32v2float
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i32v2float
-  ; AVX512F: cost of 4 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv2i32v2float
+  ; AVX512F: cost of 2 {{.*}} uitofp
   %1 = uitofp <2 x i32> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @uitofpv4i32v4float(<4 x i32> %a) {
-  ; SSE2: uitofpv4i32v4float
+  ; SSE2-LABEL: uitofpv4i32v4float
   ; SSE2: cost of 8 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i32v4float
+  ; AVX1-LABEL: uitofpv4i32v4float
   ; AVX1: cost of 6 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i32v4float
+  ; AVX2-LABEL: uitofpv4i32v4float
   ; AVX2: cost of 6 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i32v4float
-  ; AVX512F: cost of 6 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv4i32v4float
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <4 x i32> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @uitofpv8i32v8float(<8 x i32> %a) {
-  ; SSE2: uitofpv8i32v8float
+  ; SSE2-LABEL: uitofpv8i32v8float
   ; SSE2: cost of 16 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i32v8float
+  ; AVX1-LABEL: uitofpv8i32v8float
   ; AVX1: cost of 9 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i32v8float
+  ; AVX2-LABEL: uitofpv8i32v8float
   ; AVX2: cost of 8 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i32v8float
-  ; AVX512F: cost of 8 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv8i32v8float
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <8 x i32> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @uitofpv16i32v16float(<16 x i32> %a) {
-  ; SSE2: uitofpv16i32v16float
+  ; SSE2-LABEL: uitofpv16i32v16float
   ; SSE2: cost of 32 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i32v16float
+  ; AVX1-LABEL: uitofpv16i32v16float
   ; AVX1: cost of 44 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i32v16float
+  ; AVX2-LABEL: uitofpv16i32v16float
   ; AVX2: cost of 44 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i32v16float
-  ; AVX512F: cost of 46 {{.*}} uitofp
+  ; AVX512F-LABEL: uitofpv16i32v16float
+  ; AVX512F: cost of 1 {{.*}} uitofp
   %1 = uitofp <16 x i32> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @uitofpv32i32v32float(<32 x i32> %a) {
-  ; SSE2: uitofpv32i32v32float
+  ; SSE2-LABEL: uitofpv32i32v32float
   ; SSE2: cost of 64 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i32v32float
+  ; AVX1-LABEL: uitofpv32i32v32float
   ; AVX1: cost of 88 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i32v32float
+  ; AVX2-LABEL: uitofpv32i32v32float
   ; AVX2: cost of 88 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i32v32float
+  ; AVX512F-LABEL: uitofpv32i32v32float
   ; AVX512F: cost of 92 {{.*}} uitofp
   %1 = uitofp <32 x i32> %a to <32 x float>
   ret <32 x float> %1
 }
 
 define <2 x float> @uitofpv2i64v2float(<2 x i64> %a) {
-  ; SSE2: uitofpv2i64v2float
+  ; SSE2-LABEL: uitofpv2i64v2float
   ; SSE2: cost of 15 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv2i64v2float
+  ; AVX1-LABEL: uitofpv2i64v2float
   ; AVX1: cost of 4 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv2i64v2float
+  ; AVX2-LABEL: uitofpv2i64v2float
   ; AVX2: cost of 4 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv2i64v2float
+  ; AVX512F-LABEL: uitofpv2i64v2float
   ; AVX512F: cost of 4 {{.*}} uitofp
   %1 = uitofp <2 x i64> %a to <2 x float>
   ret <2 x float> %1
 }
 
 define <4 x float> @uitofpv4i64v4float(<4 x i64> %a) {
-  ; SSE2: uitofpv4i64v4float
+  ; SSE2-LABEL: uitofpv4i64v4float
   ; SSE2: cost of 30 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv4i64v4float
+  ; AVX1-LABEL: uitofpv4i64v4float
   ; AVX1: cost of 10 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv4i64v4float
+  ; AVX2-LABEL: uitofpv4i64v4float
   ; AVX2: cost of 10 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv4i64v4float
+  ; AVX512F-LABEL: uitofpv4i64v4float
   ; AVX512F: cost of 10 {{.*}} uitofp
   %1 = uitofp <4 x i64> %a to <4 x float>
   ret <4 x float> %1
 }
 
 define <8 x float> @uitofpv8i64v8float(<8 x i64> %a) {
-  ; SSE2: uitofpv8i64v8float
+  ; SSE2-LABEL: uitofpv8i64v8float
   ; SSE2: cost of 60 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv8i64v8float
+  ; AVX1-LABEL: uitofpv8i64v8float
   ; AVX1: cost of 22 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv8i64v8float
+  ; AVX2-LABEL: uitofpv8i64v8float
   ; AVX2: cost of 22 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv8i64v8float
+  ; AVX512F-LABEL: uitofpv8i64v8float
   ; AVX512F: cost of 22 {{.*}} uitofp
   %1 = uitofp <8 x i64> %a to <8 x float>
   ret <8 x float> %1
 }
 
 define <16 x float> @uitofpv16i64v16float(<16 x i64> %a) {
-  ; SSE2: uitofpv16i64v16float
+  ; SSE2-LABEL: uitofpv16i64v16float
   ; SSE2: cost of 120 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv16i64v16float
+  ; AVX1-LABEL: uitofpv16i64v16float
   ; AVX1: cost of 44 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv16i64v16float
+  ; AVX2-LABEL: uitofpv16i64v16float
   ; AVX2: cost of 44 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv16i64v16float
+  ; AVX512F-LABEL: uitofpv16i64v16float
   ; AVX512F: cost of 46 {{.*}} uitofp
   %1 = uitofp <16 x i64> %a to <16 x float>
   ret <16 x float> %1
 }
 
 define <32 x float> @uitofpv32i64v32float(<32 x i64> %a) {
-  ; SSE2: uitofpv32i64v32float
+  ; SSE2-LABEL: uitofpv32i64v32float
   ; SSE2: cost of 240 {{.*}} uitofp
   ;
-  ; AVX1: uitofpv32i64v32float
+  ; AVX1-LABEL: uitofpv32i64v32float
   ; AVX1: cost of 88 {{.*}} uitofp
   ;
-  ; AVX2: uitofpv32i64v32float
+  ; AVX2-LABEL: uitofpv32i64v32float
   ; AVX2: cost of 88 {{.*}} uitofp
   ;
-  ; AVX512F: uitofpv32i64v32float
+  ; AVX512F-LABEL: uitofpv32i64v32float
   ; AVX512F: cost of 92 {{.*}} uitofp
   %1 = uitofp <32 x i64> %a to <32 x float>
   ret <32 x float> %1
 }
 
+define <8 x i32> @fptouiv8f32v8i32(<8 x float> %a) {
+  ; AVX512F-LABEL: fptouiv8f32v8i32
+  ; AVX512F: cost of 1 {{.*}} fptoui
+  %1 = fptoui <8 x float> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <4 x i32> @fptouiv4f32v4i32(<4 x float> %a) {
+  ; AVX512F-LABEL: fptouiv4f32v4i32
+  ; AVX512F: cost of 1 {{.*}} fptoui
+  %1 = fptoui <4 x float> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <2 x i32> @fptouiv2f32v2i32(<2 x float> %a) {
+  ; AVX512F-LABEL: fptouiv2f32v2i32
+  ; AVX512F: cost of 1 {{.*}} fptoui
+  %1 = fptoui <2 x float> %a to <2 x i32>
+  ret <2 x i32> %1
+}
+
+define <16 x i32> @fptouiv16f32v16i32(<16 x float> %a) {
+  ; AVX512F-LABEL: fptouiv16f32v16i32
+  ; AVX512F: cost of 1 {{.*}} fptoui
+  %1 = fptoui <16 x float> %a to <16 x i32>
+  ret <16 x i32> %1
+}
+
+define <8 x i64> @fptouiv8f32v8i64(<8 x float> %a) {
+  ; AVX512DQ-LABEL: fptouiv8f32v8i64
+  ; AVX512DQ: cost of 1 {{.*}} fptoui
+  %1 = fptoui <8 x float> %a to <8 x i64>
+  ret <8 x i64> %1
+}
+
+define <4 x i64> @fptouiv4f32v4i64(<4 x float> %a) {
+  ; AVX512DQ-LABEL: fptouiv4f32v4i64
+  ; AVX512DQ: cost of 1 {{.*}} fptoui
+  %1 = fptoui <4 x float> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <2 x i64> @fptouiv2f32v2i64(<2 x float> %a) {
+  ; AVX512DQ-LABEL: fptouiv2f32v2i64
+  ; AVX512DQ: cost of 1 {{.*}} fptoui
+  %1 = fptoui <2 x float> %a to <2 x i64>
+  ret <2 x i64> %1
+}
diff --git a/test/Analysis/CostModel/X86/vector_gep.ll b/test/Analysis/CostModel/X86/vector_gep.ll
new file mode 100644
index 000000000000..e49f25871d66
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vector_gep.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-linux-unknown-unknown -mattr=+avx512f | FileCheck %s
+
+%struct.S = type { [1000 x i32] }
+
+
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+
+define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
+  %temp = insertelement <4 x i64> undef, i64 %base, i32 0
+  %vector = shufflevector <4 x i64> %temp, <4 x i64> undef, <4 x i32> zeroinitializer
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds %struct.S
+  %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
+  %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
+  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %res
+}
diff --git a/test/Analysis/CostModel/X86/vshift-ashr-cost.ll b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
new file mode 100644
index 000000000000..a0d07d7b6ec0
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-ashr-cost.ll
@@ -0,0 +1,392 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector arithmetic shift right instructions.
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 54 for instruction:   %shift
+; AVX: Found an estimated cost of 54 for instruction:   %shift
+; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 108 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = ashr <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = ashr <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = ashr <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = ashr <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = ashr <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = ashr <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 54 for instruction:   %shift
+; AVX: Found an estimated cost of 54 for instruction:   %shift
+; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = ashr <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 108 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = ashr <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 12 for instruction:   %shift
+; SSE41: Found an estimated cost of 12 for instruction:   %shift
+; AVX: Found an estimated cost of 12 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 24 for instruction:   %shift
+; SSE41: Found an estimated cost of 24 for instruction:   %shift
+; AVX: Found an estimated cost of 24 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 54 for instruction:   %shift
+; SSE41: Found an estimated cost of 54 for instruction:   %shift
+; AVX: Found an estimated cost of 54 for instruction:   %shift
+; AVX2: Found an estimated cost of 54 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 108 for instruction:   %shift
+; SSE41: Found an estimated cost of 108 for instruction:   %shift
+; AVX: Found an estimated cost of 108 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 4 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 24 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll
deleted file mode 100644
index 84d72463ac0d..000000000000
--- a/test/Analysis/CostModel/X86/vshift-cost.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
-
-
-; Verify the cost of vector shift left instructions.
-
-; We always emit a single pmullw in the case of v8i16 vector shifts by
-; non-uniform constant.
-
-define <8 x i16> @test1(<8 x i16> %a) {
-  %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
-  ret <8 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test1':
-; CHECK: Found an estimated cost of 1 for instruction:   %shl
-
-
-define <8 x i16> @test2(<8 x i16> %a) {
-  %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
-  ret <8 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test2':
-; CHECK: Found an estimated cost of 1 for instruction:   %shl
-
-
-; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
-; Make sure that the estimated cost is always 1 except for the case where
-; we only have SSE2 support. With SSE2, we are forced to special lower the
-; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
-
-define <4 x i32> @test3(<4 x i32> %a) {
-  %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
-  ret <4 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test3':
-; SSE2: Found an estimated cost of 6 for instruction:   %shl
-; SSE41: Found an estimated cost of 1 for instruction:   %shl
-; AVX: Found an estimated cost of 1 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-define <4 x i32> @test4(<4 x i32> %a) {
-  %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
-  ret <4 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test4':
-; SSE2: Found an estimated cost of 6 for instruction:   %shl
-; SSE41: Found an estimated cost of 1 for instruction:   %shl
-; AVX: Found an estimated cost of 1 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-; On AVX2 we are able to lower the following shift into a single
-; vpsllvq. Therefore, the expected cost is only 1.
-; In all other cases, this shift is scalarized as the target does not support
-; vpsllv instructions.
-
-define <2 x i64> @test5(<2 x i64> %a) {
-  %shl = shl <2 x i64> %a, <i64 2, i64 3>
-  ret <2 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 20 for instruction:   %shl
-; SSE41: Found an estimated cost of 20 for instruction:   %shl
-; AVX: Found an estimated cost of 20 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-; v16i16 and v8i32 shift left by non-uniform constant are lowered into
-; vector multiply instructions.  With AVX (but not AVX2), the vector multiply
-; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
-;
-; With AVX2, instruction vpmullw works with 256bit quantities and
-; therefore there is no need to split the resulting vector multiply into
-; a sequence of two multiply.
-;
-; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
-; the cost computed in the case of 'test1'. That is because the backend
-; simply emits 2 pmullw with no extract/insert.
-
-
-define <16 x i16> @test6(<16 x i16> %a) {
-  %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
-  ret <16 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test6':
-; SSE2: Found an estimated cost of 2 for instruction:   %shl
-; SSE41: Found an estimated cost of 2 for instruction:   %shl
-; AVX: Found an estimated cost of 4 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
-; the cost computed in the case of 'test3'. That is because the multiply
-; is type-legalized into two 4i32 vector multiply.
-
-define <8 x i32> @test7(<8 x i32> %a) {
-  %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
-  ret <8 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test7':
-; SSE2: Found an estimated cost of 12 for instruction:   %shl
-; SSE41: Found an estimated cost of 2 for instruction:   %shl
-; AVX: Found an estimated cost of 4 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-; On AVX2 we are able to lower the following shift into a single
-; vpsllvq. Therefore, the expected cost is only 1.
-; In all other cases, this shift is scalarized as the target does not support
-; vpsllv instructions.
-
-define <4 x i64> @test8(<4 x i64> %a) {
-  %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
-  ret <4 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 40 for instruction:   %shl
-; SSE41: Found an estimated cost of 40 for instruction:   %shl
-; AVX: Found an estimated cost of 40 for instruction:   %shl
-; AVX2: Found an estimated cost of 1 for instruction:   %shl
-
-
-; Same as 'test6', with the difference that the cost is double.
-
-define <32 x i16> @test9(<32 x i16> %a) {
-  %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
-  ret <32 x i16> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test9':
-; SSE2: Found an estimated cost of 4 for instruction:   %shl
-; SSE41: Found an estimated cost of 4 for instruction:   %shl
-; AVX: Found an estimated cost of 8 for instruction:   %shl
-; AVX2: Found an estimated cost of 2 for instruction:   %shl
-
-
-; Same as 'test7', except that now the cost is double.
-
-define <16 x i32> @test10(<16 x i32> %a) {
-  %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
-  ret <16 x i32> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test10':
-; SSE2: Found an estimated cost of 24 for instruction:   %shl
-; SSE41: Found an estimated cost of 4 for instruction:   %shl
-; AVX: Found an estimated cost of 8 for instruction:   %shl
-; AVX2: Found an estimated cost of 2 for instruction:   %shl
-
-
-; On AVX2 we are able to lower the following shift into a sequence of
-; two vpsllvq instructions. Therefore, the expected cost is only 2.
-; In all other cases, this shift is scalarized as we don't have vpsllv
-; instructions.
-
-define <8 x i64> @test11(<8 x i64> %a) {
-  %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
-  ret <8 x i64> %shl
-}
-; CHECK: 'Cost Model Analysis' for function 'test11':
-; SSE2: Found an estimated cost of 80 for instruction:   %shl
-; SSE41: Found an estimated cost of 80 for instruction:   %shl
-; AVX: Found an estimated cost of 80 for instruction:   %shl
-; AVX2: Found an estimated cost of 2 for instruction:   %shl
-
-
diff --git a/test/Analysis/CostModel/X86/vshift-lshr-cost.ll b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
new file mode 100644
index 000000000000..a686b4368f21
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-lshr-cost.ll
@@ -0,0 +1,400 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector logical shift right instructions.
+
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = lshr <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = lshr <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = lshr <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = lshr <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = lshr <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = lshr <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = lshr <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = lshr <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 16 for instruction:   %shift
+; SSE41: Found an estimated cost of 16 for instruction:   %shift
+; AVX: Found an estimated cost of 16 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 4 for instruction:   %shift
+  %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
diff --git a/test/Analysis/CostModel/X86/vshift-shl-cost.ll b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
new file mode 100644
index 000000000000..85ca5a5a7f32
--- /dev/null
+++ b/test/Analysis/CostModel/X86/vshift-shl-cost.ll
@@ -0,0 +1,580 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+sse2,-sse4.1 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver4 -cost-model -analyze | FileCheck %s -check-prefix=CHECK -check-prefix=XOP -check-prefix=XOPAVX2
+
+; Verify the cost of vector shift left instructions.
+
+;
+;
+; Variable Shifts
+;
+
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v4i32':
+; SSE2: Found an estimated cost of 10 for instruction:   %shift
+; SSE41: Found an estimated cost of 10 for instruction:   %shift
+; AVX: Found an estimated cost of 10 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i32':
+; SSE2: Found an estimated cost of 20 for instruction:   %shift
+; SSE41: Found an estimated cost of 20 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = shl <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <16 x i8> %a, %b
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'var_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = shl <32 x i8> %a, %b
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
+  %shift = shl <2 x i64> %a, %splat
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
+  %shift = shl <4 x i64> %a, %splat
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v4i32':
+; SSE2: Found an estimated cost of 10 for instruction:   %shift
+; SSE41: Found an estimated cost of 10 for instruction:   %shift
+; AVX: Found an estimated cost of 10 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shift = shl <4 x i32> %a, %splat
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i32':
+; SSE2: Found an estimated cost of 20 for instruction:   %shift
+; SSE41: Found an estimated cost of 20 for instruction:   %shift
+; AVX: Found an estimated cost of 20 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
+  %shift = shl <8 x i32> %a, %splat
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v8i16':
+; SSE2: Found an estimated cost of 32 for instruction:   %shift
+; SSE41: Found an estimated cost of 32 for instruction:   %shift
+; AVX: Found an estimated cost of 32 for instruction:   %shift
+; AVX2: Found an estimated cost of 32 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
+  %shift = shl <8 x i16> %a, %splat
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i16':
+; SSE2: Found an estimated cost of 64 for instruction:   %shift
+; SSE41: Found an estimated cost of 64 for instruction:   %shift
+; AVX: Found an estimated cost of 64 for instruction:   %shift
+; AVX2: Found an estimated cost of 10 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
+  %shift = shl <16 x i16> %a, %splat
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
+  %shift = shl <16 x i8> %a, %splat
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; CHECK: 'Cost Model Analysis' for function 'splatvar_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
+  %shift = shl <32 x i8> %a, %splat
+  ret <32 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v2i64':
+; SSE2: Found an estimated cost of 4 for instruction:   %shift
+; SSE41: Found an estimated cost of 4 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <2 x i64> %a, <i64 1, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i64':
+; SSE2: Found an estimated cost of 8 for instruction:   %shift
+; SSE41: Found an estimated cost of 8 for instruction:   %shift
+; AVX: Found an estimated cost of 8 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 15, i64 31>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v4i32':
+; SSE2: Found an estimated cost of 6 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i32':
+; SSE2: Found an estimated cost of 12 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 4 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v16i8':
+; SSE2: Found an estimated cost of 26 for instruction:   %shift
+; SSE41: Found an estimated cost of 26 for instruction:   %shift
+; AVX: Found an estimated cost of 26 for instruction:   %shift
+; AVX2: Found an estimated cost of 26 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'constant_shift_v32i8':
+; SSE2: Found an estimated cost of 52 for instruction:   %shift
+; SSE41: Found an estimated cost of 52 for instruction:   %shift
+; AVX: Found an estimated cost of 52 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <32 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v2i64':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <2 x i64> %a, <i64 7, i64 7>
+  ret <2 x i64> %shift
+}
+
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i64':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
+  ret <4 x i64> %shift
+}
+
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v4i32':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
+  ret <4 x i32> %shift
+}
+
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i32':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %shift
+}
+
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v8i16':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i16':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shift
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v16i8':
+; SSE2: Found an estimated cost of 1 for instruction:   %shift
+; SSE41: Found an estimated cost of 1 for instruction:   %shift
+; AVX: Found an estimated cost of 1 for instruction:   %shift
+; AVX2: Found an estimated cost of 1 for instruction:   %shift
+; XOP: Found an estimated cost of 1 for instruction:   %shift
+  %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <16 x i8> %shift
+}
+
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+; CHECK: 'Cost Model Analysis' for function 'splatconstant_shift_v32i8':
+; SSE2: Found an estimated cost of 2 for instruction:   %shift
+; SSE41: Found an estimated cost of 2 for instruction:   %shift
+; AVX: Found an estimated cost of 2 for instruction:   %shift
+; AVX2: Found an estimated cost of 11 for instruction:   %shift
+; XOP: Found an estimated cost of 2 for instruction:   %shift
+  %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <32 x i8> %shift
+}
+
+;
+; Special Cases
+;
+
+; We always emit a single pmullw in the case of v8i16 vector shifts by
+; non-uniform constant.
+
+define <8 x i16> @test1(<8 x i16> %a) {
+  %shl = shl <8 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test1':
+; CHECK: Found an estimated cost of 1 for instruction:   %shl
+
+
+define <8 x i16> @test2(<8 x i16> %a) {
+  %shl = shl <8 x i16> %a, <i16 0, i16 undef, i16 0, i16 0, i16 1, i16 undef, i16 -1, i16 1>
+  ret <8 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test2':
+; CHECK: Found an estimated cost of 1 for instruction:   %shl
+
+
+; With SSE4.1, v4i32 shifts can be lowered into a single pmulld instruction.
+; Make sure that the estimated cost is always 1 except for the case where
+; we only have SSE2 support. With SSE2, we are forced to special lower the
+; v4i32 mul as a 2x shuffle, 2x pmuludq, 2x shuffle.
+
+define <4 x i32> @test3(<4 x i32> %a) {
+  %shl = shl <4 x i32> %a, <i32 1, i32 -1, i32 2, i32 -3>
+  ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test3':
+; SSE2: Found an estimated cost of 6 for instruction:   %shl
+; SSE41: Found an estimated cost of 1 for instruction:   %shl
+; AVX: Found an estimated cost of 1 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOP: Found an estimated cost of 1 for instruction:   %shl
+
+
+define <4 x i32> @test4(<4 x i32> %a) {
+  %shl = shl <4 x i32> %a, <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test4':
+; SSE2: Found an estimated cost of 6 for instruction:   %shl
+; SSE41: Found an estimated cost of 1 for instruction:   %shl
+; AVX: Found an estimated cost of 1 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOP: Found an estimated cost of 1 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <2 x i64> @test5(<2 x i64> %a) {
+  %shl = shl <2 x i64> %a, <i64 2, i64 3>
+  ret <2 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test5':
+; SSE2: Found an estimated cost of 4 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOP: Found an estimated cost of 1 for instruction:   %shl
+
+
+; v16i16 and v8i32 shift left by non-uniform constant are lowered into
+; vector multiply instructions.  With AVX (but not AVX2), the vector multiply
+; is lowered into a sequence of: 1 extract + 2 vpmullw + 1 insert.
+;
+; With AVX2, instruction vpmullw works with 256bit quantities and
+; therefore there is no need to split the resulting vector multiply into
+; a sequence of two multiply.
+;
+; With SSE2 and SSE4.1, the vector shift cost for 'test6' is twice
+; the cost computed in the case of 'test1'. That is because the backend
+; simply emits 2 pmullw with no extract/insert.
+
+
+define <16 x i16> @test6(<16 x i16> %a) {
+  %shl = shl <16 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <16 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test6':
+; SSE2: Found an estimated cost of 2 for instruction:   %shl
+; SSE41: Found an estimated cost of 2 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; With SSE2 and SSE4.1, the vector shift cost for 'test7' is twice
+; the cost computed in the case of 'test3'. That is because the multiply
+; is type-legalized into two 4i32 vector multiply.
+
+define <8 x i32> @test7(<8 x i32> %a) {
+  %shl = shl <8 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+  ret <8 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test7':
+; SSE2: Found an estimated cost of 12 for instruction:   %shl
+; SSE41: Found an estimated cost of 2 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a single
+; vpsllvq. Therefore, the expected cost is only 1.
+; In all other cases, this shift is scalarized as the target does not support
+; vpsllv instructions.
+
+define <4 x i64> @test8(<4 x i64> %a) {
+  %shl = shl <4 x i64> %a, <i64 1, i64 2, i64 3, i64 4>
+  ret <4 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test8':
+; SSE2: Found an estimated cost of 8 for instruction:   %shl
+; SSE41: Found an estimated cost of 8 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
+; AVX2: Found an estimated cost of 1 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 1 for instruction:   %shl
+
+
+; Same as 'test6', with the difference that the cost is double.
+
+define <32 x i16> @test9(<32 x i16> %a) {
+  %shl = shl <32 x i16> %a, <i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11, i16 1, i16 1, i16 2, i16 3, i16 7, i16 0, i16 9, i16 11>
+  ret <32 x i16> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test9':
+; SSE2: Found an estimated cost of 4 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
+
+
+; Same as 'test7', except that now the cost is double.
+
+define <16 x i32> @test10(<16 x i32> %a) {
+  %shl = shl <16 x i32> %a, <i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 2, i32 3>
+  ret <16 x i32> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test10':
+; SSE2: Found an estimated cost of 24 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
+
+
+; On AVX2 we are able to lower the following shift into a sequence of
+; two vpsllvq instructions. Therefore, the expected cost is only 2.
+; In all other cases, this shift is scalarized as we don't have vpsllv
+; instructions.
+
+define <8 x i64> @test11(<8 x i64> %a) {
+  %shl = shl <8 x i64> %a, <i64 1, i64 1, i64 2, i64 3, i64 1, i64 1, i64 2, i64 3>
+  ret <8 x i64> %shl
+}
+; CHECK: 'Cost Model Analysis' for function 'test11':
+; SSE2: Found an estimated cost of 16 for instruction:   %shl
+; SSE41: Found an estimated cost of 16 for instruction:   %shl
+; AVX: Found an estimated cost of 16 for instruction:   %shl
+; AVX2: Found an estimated cost of 2 for instruction:   %shl
+; XOPAVX: Found an estimated cost of 4 for instruction:   %shl
+; XOPAVX2: Found an estimated cost of 2 for instruction:   %shl
diff --git a/test/Analysis/CostModel/no_info.ll b/test/Analysis/CostModel/no_info.ll
index 5f3b56ad9cf8..931669b60176 100644
--- a/test/Analysis/CostModel/no_info.ll
+++ b/test/Analysis/CostModel/no_info.ll
@@ -5,9 +5,27 @@
 
 ; -- No triple in this module --
 
-;CHECK: cost of 1 {{.*}} add
-;CHECK: cost of 1 {{.*}} ret
+; CHECK-LABEL: function 'no_info'
+; CHECK: cost of 1 {{.*}} add
+; CHECK: cost of 1 {{.*}} ret
 define i32 @no_info(i32 %arg) {
   %e = add i32 %arg, %arg
   ret i32 %e
 }
+
+define i8 @addressing_mode_reg_reg(i8* %a, i32 %b) {
+; CHECK-LABEL: function 'addressing_mode_reg_reg'
+  %p = getelementptr i8, i8* %a, i32 %b ; NoTTI accepts reg+reg addressing.
+; CHECK: cost of 0 {{.*}} getelementptr
+  %v = load i8, i8* %p
+  ret i8 %v
+}
+
+; CHECK-LABEL: function 'addressing_mode_scaled_reg'
+define i32 @addressing_mode_scaled_reg(i32* %a, i32 %b) {
+  ; NoTTI rejects reg+scale*reg addressing.
+  %p = getelementptr i32, i32* %a, i32 %b
+; CHECK: cost of 1 {{.*}} getelementptr
+  %v = load i32, i32* %p
+  ret i32 %v
+}
diff --git a/test/Analysis/Delinearization/a.ll b/test/Analysis/Delinearization/a.ll
index 78bbfcf7de6e..917fc355726c 100644
--- a/test/Analysis/Delinearization/a.ll
+++ b/test/Analysis/Delinearization/a.ll
@@ -10,7 +10,7 @@
 ; AddRec: {{{(28 + (4 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<%for.k>]
+; CHECK: ArrayRef[{3,+,2}<%for.i>][{-4,+,3}<%for.j>][{7,+,5}<nw><%for.k>]
 
 define void @foo(i64 %n, i64 %m, i64 %o, i32* nocapture %A) #0 {
 entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 317e62c8ef90..79d0c789704b 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -11,7 +11,7 @@
 ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * %o) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A) {
 entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index ada7758b21ba..f886d2ccd288 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -11,7 +11,7 @@
 ; AddRec: {{{(56 + (8 * (-4 + (3 * %m)) * (%o + %p)) + %A),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
-; CHECK: ArrayRef[{3,+,1}<nw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+; CHECK: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
 
 define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, double* nocapture %A) nounwind uwtable {
 entry:
diff --git a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index 9e37b76e59b9..893c542c06a9 100644
--- a/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -11,7 +11,7 @@
 ; AddRec: {{{((8 * ((((%m * %p) + %q) * %o) + %r)) + %A),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
 ; CHECK: Base offset: %A
 ; CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
-; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nw><%for.k>]
+; CHECK: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nw><%for.j>][{%r,+,1}<nsw><%for.k>]
 
 define void @foo(i64 %n, i64 %m, i64 %o, double* %A, i64 %p, i64 %q, i64 %r) {
 entry:
diff --git a/test/Analysis/Delinearization/parameter_addrec_product.ll b/test/Analysis/Delinearization/parameter_addrec_product.ll
new file mode 100644
index 000000000000..561158eae739
--- /dev/null
+++ b/test/Analysis/Delinearization/parameter_addrec_product.ll
@@ -0,0 +1,56 @@
+; RUN: opt -delinearize -analyze < %s | FileCheck %s
+;
+;    void foo(float *A, long *p) {
+;      for (long i = 0; i < 100; i++)
+;        for (long j = 0; j < 100; j++)
+;          A[i * (*p) + j] += i + j;
+;    }
+;
+; CHECK: ArrayDecl[UnknownSize][%pval] with elements of 4 bytes.
+; CHECK: ArrayRef[{0,+,1}<nuw><nsw><%bb2>][{0,+,1}<nuw><nsw><%bb4>]
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @foo(float* %A, i64* %p) {
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb16, %bb
+  %i.0 = phi i64 [ 0, %bb ], [ %tmp17, %bb16 ]
+  %exitcond1 = icmp ne i64 %i.0, 100
+  br i1 %exitcond1, label %bb3, label %bb18
+
+bb3:                                              ; preds = %bb2
+  br label %bb4
+
+bb4:                                              ; preds = %bb13, %bb3
+  %j.0 = phi i64 [ 0, %bb3 ], [ %tmp14, %bb13 ]
+  %exitcond = icmp ne i64 %j.0, 100
+  br i1 %exitcond, label %bb5, label %bb15
+
+bb5:                                              ; preds = %bb4
+  %tmp = add nuw nsw i64 %i.0, %j.0
+  %tmp6 = sitofp i64 %tmp to float
+  %pval = load i64, i64* %p, align 8
+  %tmp8 = mul nsw i64 %i.0, %pval
+  %tmp9 = add nsw i64 %tmp8, %j.0
+  %tmp10 = getelementptr inbounds float, float* %A, i64 %tmp9
+  %tmp11 = load float, float* %tmp10, align 4
+  %tmp12 = fadd float %tmp11, %tmp6
+  store float %tmp12, float* %tmp10, align 4
+  br label %bb13
+
+bb13:                                             ; preds = %bb5
+  %tmp14 = add nuw nsw i64 %j.0, 1
+  br label %bb4
+
+bb15:                                             ; preds = %bb4
+  br label %bb16
+
+bb16:                                             ; preds = %bb15
+  %tmp17 = add nuw nsw i64 %i.0, 1
+  br label %bb2
+
+bb18:                                             ; preds = %bb2
+  ret void
+}
diff --git a/test/Analysis/DemandedBits/basic.ll b/test/Analysis/DemandedBits/basic.ll
new file mode 100644
index 000000000000..487e522e9dbc
--- /dev/null
+++ b/test/Analysis/DemandedBits/basic.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -demanded-bits -analyze < %s | FileCheck %s
+
+; CHECK-LABEL: 'test_mul'
+; CHECK-DAG: DemandedBits: 0xFF for   %1 = add nsw i32 %a, 5
+; CHECK-DAG: DemandedBits: 0xFF for   %3 = trunc i32 %2 to i8
+; CHECK-DAG: DemandedBits: 0xFF for   %2 = mul nsw i32 %1, %b
+define i8 @test_mul(i32 %a, i32 %b) {
+  %1 = add nsw i32 %a, 5
+  %2 = mul nsw i32 %1, %b
+  %3 = trunc i32 %2 to i8
+  ret i8 %3
+}
+
+; CHECK-LABEL: 'test_icmp1'
+; CHECK-DAG: DemandedBits: 0x1 for   %3 = icmp eq i32 %1, %2
+; CHECK-DAG: DemandedBits: 0xFFF for   %1 = and i32 %a, 255
+; CHECK-DAG: DemandedBits: 0xFFF for   %2 = shl i32 %1, 4
+define i1 @test_icmp1(i32 %a, i32 %b) {
+  %1 = and i32 %a, 255
+  %2 = shl i32 %1, 4
+  %3 = icmp eq i32 %1, %2
+  ret i1 %3
+}
+
+; CHECK-LABEL: 'test_icmp2'
+; CHECK-DAG: DemandedBits: 0x1 for   %3 = icmp eq i32 %1, %2
+; CHECK-DAG: DemandedBits: 0xFF for   %1 = and i32 %a, 255
+; CHECK-DAG: DemandedBits: 0xF for   %2 = ashr i32 %1, 4
+define i1 @test_icmp2(i32 %a, i32 %b) {
+  %1 = and i32 %a, 255
+  %2 = ashr i32 %1, 4
+  %3 = icmp eq i32 %1, %2
+  ret i1 %3
+}
diff --git a/test/Analysis/DependenceAnalysis/GCD.ll b/test/Analysis/DependenceAnalysis/GCD.ll
index 81d05a10cf1c..f9749d51bb33 100644
--- a/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/test/Analysis/DependenceAnalysis/GCD.ll
@@ -269,10 +269,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd4'
-; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -339,10 +339,10 @@ entry:
 ; CHECK: da analyze - none!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd5'
-; DELIN: da analyze - output [* *]!
-; DELIN: da analyze - flow [<> *]!
+; DELIN: da analyze - none!
+; DELIN: da analyze - flow [> *]!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - none!
 
@@ -410,10 +410,10 @@ entry:
 ; CHECK: da analyze - output [* *]!
 
 ; DELIN: 'Dependence Analysis' for function 'gcd6'
-; DELIN: da analyze - output [* *]!
+; DELIN: da analyze - none!
 ; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
-; DELIN: da analyze - input [* *]!
+; DELIN: da analyze - none!
 ; DELIN: da analyze - confused!
 ; DELIN: da analyze - output [* *]!
 
diff --git a/test/Analysis/DependenceAnalysis/NonAffineExpr.ll b/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
new file mode 100644
index 000000000000..265533797cec
--- /dev/null
+++ b/test/Analysis/DependenceAnalysis/NonAffineExpr.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -analyze -basicaa -da-delinearize -da
+;
+; CHECK: da analyze - consistent input [S S]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - input [* *]!
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+define void @f(i32** %a, i32 %n) align 2 {
+for.preheader:
+  %t.0 = ashr exact i32 %n, 3
+  br label %for.body.1
+
+for.body.1:
+  %i.1 = phi i32 [ %t.5, %for.inc ], [ 0, %for.preheader ]
+  %i.2 = phi i32 [ %i.5, %for.inc ], [ %t.0, %for.preheader ]
+  br i1 undef, label %for.inc, label %for.body.2
+
+for.body.2:
+  %i.3 = phi i32 [ %t.1, %for.body.2 ], [ %i.1, %for.body.1 ]
+  %t.1 = add i32 %i.3, 1
+  %t.2 = load i32*, i32** %a, align 4
+  %t.3 = getelementptr inbounds i32, i32* %t.2, i32 %i.3
+  %t.4 = load i32, i32* %t.3, align 4
+  br i1 undef, label %for.inc, label %for.body.2
+
+for.inc:
+  %i.4 = phi i32 [ %i.2, %for.body.1 ], [ %i.2, %for.body.2 ]
+  %t.5 = add i32 %i.1, %i.4
+  %i.5 = add i32 %i.2, -1
+  br i1 undef, label %for.exit, label %for.body.1
+
+for.exit:
+  ret void
+}
diff --git a/test/Analysis/DependenceAnalysis/PR21585.ll b/test/Analysis/DependenceAnalysis/PR21585.ll
index bb62c68918f7..23af449ad51e 100644
--- a/test/Analysis/DependenceAnalysis/PR21585.ll
+++ b/test/Analysis/DependenceAnalysis/PR21585.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -analyze -basicaa -globalsmodref-aa -da | FileCheck %s
+; RUN: opt < %s -analyze -basicaa -globals-aa -da | FileCheck %s
 define void @i32_subscript(i32* %a) {
 entry:
   br label %for.body
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
new file mode 100644
index 000000000000..63e6c5c50ded
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/kernel-args.ll
@@ -0,0 +1,16 @@
+; RUN: opt %s -mtriple amdgcn-- -analyze -divergence | FileCheck %s
+
+; CHECK: DIVERGENT:
+; CHECK-NOT: %arg0
+; CHECK-NOT: %arg1
+; CHECK-NOT; %arg2
+; CHECK: <2 x i32> %arg3
+; CHECK: DIVERGENT:  <3 x i32> %arg4
+; CHECK: DIVERGENT:  float %arg5
+; CHECK: DIVERGENT:  i32 %arg6
+
+define void @main([4 x <16 x i8>] addrspace(2)* byval %arg0, float inreg %arg1, i32 inreg %arg2, <2 x i32> %arg3, <3 x i32> %arg4, float %arg5, i32 %arg6) #0 {
+  ret void
+}
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg b/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Analysis/DivergenceAnalysis/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
index 9dd3d557f8cd..fc63038e77cc 100644
--- a/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
+++ b/test/Analysis/DivergenceAnalysis/NVPTX/diverge.ll
@@ -185,14 +185,35 @@ else:
   ret i32 1
 }
 
+; Verifies sync-dependence is computed correctly in the absense of loops.
+define i32 @sync_no_loop(i32 %arg) {
+entry:
+  %0 = add i32 %arg, 1
+  %tid = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %1 = icmp sge i32 %tid, 10
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  br label %bb3
+
+bb2:
+  br label %bb3
+
+bb3:
+  %2 = add i32 %0, 2
+  ; CHECK-NOT: DIVERGENT: %2
+  ret i32 %2
+}
+
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.y()
 declare i32 @llvm.nvvm.read.ptx.sreg.tid.z()
 declare i32 @llvm.ptx.read.laneid()
 
-!nvvm.annotations = !{!0, !1, !2, !3, !4}
+!nvvm.annotations = !{!0, !1, !2, !3, !4, !5}
 !0 = !{i32 (i32, i32, i32)* @no_diverge, !"kernel", i32 1}
 !1 = !{i32 (i32, i32)* @sync, !"kernel", i32 1}
 !2 = !{i32 (i32, i32, i32)* @mixed, !"kernel", i32 1}
 !3 = !{i32 ()* @loop, !"kernel", i32 1}
 !4 = !{i32 (i1)* @unstructured_loop, !"kernel", i32 1}
+!5 = !{i32 (i32)* @sync_no_loop, !"kernel", i32 1}
diff --git a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
index 513ec86ef031..37796a9ceb32 100644
--- a/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
+++ b/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
 
 @g = internal global i32 0		; <i32*> [#uses=2]
 
diff --git a/test/Analysis/GlobalsModRef/aliastest.ll b/test/Analysis/GlobalsModRef/aliastest.ll
index 3474e130de8c..ecc6bcc55d1d 100644
--- a/test/Analysis/GlobalsModRef/aliastest.ll
+++ b/test/Analysis/GlobalsModRef/aliastest.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S -enable-unsafe-globalsmodref-alias-results | FileCheck %s
+;
+; Note that this test relies on an unsafe feature of GlobalsModRef. While this
+; test is correct and safe, GMR's technique for handling this isn't generally.
 
 @X = internal global i32 4		; <i32*> [#uses=1]
 
diff --git a/test/Analysis/GlobalsModRef/argmemonly-escape.ll b/test/Analysis/GlobalsModRef/argmemonly-escape.ll
new file mode 100644
index 000000000000..64c625810af9
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/argmemonly-escape.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global [3 x i32] zeroinitializer, align 4
+
+; The important thing we're checking for here is the reload of (some element of)
+; @a after the memset.
+
+; CHECK-LABEL: @main
+; CHECK: load i32, i32* getelementptr {{.*}} @a
+; CHECK-NEXT: call void @memsetp0i8i64{{.*}} @a
+; CHECK-NEXT: load i32, i32* getelementptr {{.*}} @a
+; CHECK-NEXT: call void @memsetp0i8i64A{{.*}} @a
+; CHECK-NEXT: load i32, i32* getelementptr {{.*}} @a
+; CHECK: icmp eq
+; CHECK: br i1
+
+define i32 @main() {
+entry:
+  %0 = bitcast [3 x i32]* @a to i8*
+  %1 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  call void @memsetp0i8i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
+  %2 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  call void @memsetp0i8i64A(i8* %0, i8 0, i64 4, i32 4, i1 false)
+  %3 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  %4 = add i32 %2, %3
+  %cmp1 = icmp eq i32 %1, %4
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entr
+  call void @abort() #3
+  unreachable
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @memsetp0i8i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+; Function Attrs: nounwind inaccessiblemem_or_argmemonly
+declare void @memsetp0i8i64A(i8* nocapture, i8, i64, i32, i1) nounwind inaccessiblemem_or_argmemonly
+
+; Function Attrs: noreturn nounwind
+declare void @abort() noreturn nounwind
diff --git a/test/Analysis/GlobalsModRef/atomic-instrs.ll b/test/Analysis/GlobalsModRef/atomic-instrs.ll
new file mode 100644
index 000000000000..d2ae830ff87a
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/atomic-instrs.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
+
+@X = internal global i32 4
+
+define i32 @test_cmpxchg(i32* %P) {
+; CHECK-LABEL: @test_cmpxchg
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @modrefX_cmpxchg()
+; CHECK-NEXT: %V = load i32, i32* @X
+; CHECK-NEXT: ret i32 %V
+  store i32 12, i32* @X
+  call void @modrefX_cmpxchg()
+  %V = load i32, i32* @X
+  ret i32 %V
+}
+
+define void @modrefX_cmpxchg() {
+  %1 = cmpxchg i32* @X, i32 0, i32 -1 monotonic monotonic
+  ret void
+}
+
+define i32 @test_atomicrmw(i32* %P) {
+; CHECK-LABEL: @test_atomicrmw
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @modrefXatomicrmw()
+; CHECK-NEXT: %V = load i32, i32* @X
+; CHECK-NEXT: ret i32 %V
+  store i32 12, i32* @X
+  call void @modrefXatomicrmw()
+  %V = load i32, i32* @X
+  ret i32 %V
+}
+
+define void @modrefXatomicrmw() {
+  %1 = atomicrmw add i32* @X, i32 1 acquire
+  ret void
+}
diff --git a/test/Analysis/GlobalsModRef/chaining-analysis.ll b/test/Analysis/GlobalsModRef/chaining-analysis.ll
index 26671daf3480..a12d7371524a 100644
--- a/test/Analysis/GlobalsModRef/chaining-analysis.ll
+++ b/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
 
 ; This test requires the use of previous analyses to determine that
 ; doesnotmodX does not modify X (because 'sin' doesn't).
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index 028132324674..a51f54b07503 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -1,4 +1,7 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -instcombine -S -enable-unsafe-globalsmodref-alias-results | FileCheck %s
+;
+; Note that this test relies on an unsafe feature of GlobalsModRef. While this
+; test is correct and safe, GMR's technique for handling this isn't generally.
 
 @G = internal global i32* null		; <i32**> [#uses=3]
 
diff --git a/test/Analysis/GlobalsModRef/memset-escape.ll b/test/Analysis/GlobalsModRef/memset-escape.ll
new file mode 100644
index 000000000000..8da375ad8775
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/memset-escape.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -O1 -S -enable-non-lto-gmr=true | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global [3 x i32] zeroinitializer, align 4
+@b = common global i32 0, align 4
+
+; The important thing we're checking for here is the reload of (some element of)
+; @a after the memset.
+
+; CHECK-LABEL: @main
+; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a
+; CHECK: store i32 3
+; CHECK: load i32, i32* getelementptr {{.*}} @a
+; CHECK: icmp eq i32
+; CHECK: br i1
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %c = alloca [1 x i32], align 4
+  store i32 0, i32* %retval, align 4
+  %0 = bitcast [1 x i32]* %c to i8*
+  call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 4, i32 4, i1 false)
+  store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  store i32 0, i32* @b, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32, i32* @b, align 4
+  %cmp = icmp slt i32 %1, 3
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* @b, align 4
+  %idxprom = sext i32 %2 to i64
+  %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* @a, i64 0, i64 %idxprom
+  store i32 0, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, i32* @b, align 4
+  %inc = add nsw i32 %3, 1
+  store i32 %inc, i32* @b, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i32, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4
+  %cmp1 = icmp ne i32 %4, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.end
+  call void @abort() #3
+  unreachable
+
+if.end:                                           ; preds = %for.end
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+; Function Attrs: noreturn nounwind
+declare void @abort() noreturn nounwind
diff --git a/test/Analysis/GlobalsModRef/modreftest.ll b/test/Analysis/GlobalsModRef/modreftest.ll
index 74101e23bed9..2018b149fc06 100644
--- a/test/Analysis/GlobalsModRef/modreftest.ll
+++ b/test/Analysis/GlobalsModRef/modreftest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
 
 @X = internal global i32 4		; <i32*> [#uses=2]
 
@@ -16,3 +16,23 @@ define i32 @test(i32* %P) {
 define void @doesnotmodX() {
 	ret void
 }
+
+declare void @InaccessibleMemOnlyFunc( ) #0
+declare void @InaccessibleMemOrArgMemOnlyFunc( ) #1
+
+define i32 @test2(i32* %P) {
+; CHECK:      @test2
+; CHECK-NEXT: store i32 12, i32* @X
+; CHECK-NEXT: call void @InaccessibleMemOnlyFunc()
+; CHECK-NEXT: call void @InaccessibleMemOrArgMemOnlyFunc()
+; CHECK-NOT:  load i32
+; CHECK-NEXT: ret i32 12
+	store i32 12, i32* @X
+	call void @InaccessibleMemOnlyFunc( )
+        call void @InaccessibleMemOrArgMemOnlyFunc( )
+	%V = load i32, i32* @X		; <i32> [#uses=1]
+	ret i32 %V
+}
+
+attributes #0 = { inaccessiblememonly }
+attributes #1 = { inaccessiblemem_or_argmemonly }
diff --git a/test/Analysis/GlobalsModRef/nocapture.ll b/test/Analysis/GlobalsModRef/nocapture.ll
new file mode 100644
index 000000000000..0cb80a10f8da
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/nocapture.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = internal global i32 0, align 4
+@b = internal global i32 0, align 4
+
+define void @g(i32* %p, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void %ptr(i32* %p) #1
+  ret void
+}
+
+; CHECK-LABEL: Function: f
+; CHECK: MayAlias: i32* %p, i32* @a
+; CHECK: MayAlias: i32* %q, i32* @a
+define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+  %z1 = load i32, i32* %arrayidx, align 4
+  %z2 = load i32, i32* %q, align 4
+  %add = add nsw i32 %z2, %z1
+  store i32 %add, i32* %q, align 4
+  ret i32 4
+}
+
+define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void %ptr(i32* %p) #1
+  ret void
+}
+
+; CHECK-LABEL: Function: f2
+; CHECK: NoAlias: i32* %p, i32* @b
+; CHECK: NoAlias: i32* %q, i32* @b
+define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
+entry:
+  tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
+  %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
+  %z1 = load i32, i32* %arrayidx, align 4
+  %z2 = load i32, i32* %q, align 4
+  %add = add nsw i32 %z2, %z1
+  store i32 %add, i32* %q, align 4
+  ret i32 4
+}
+
+declare void @g3()
+
+; CHECK-LABEL: Function: f3
+; CHECK: NoAlias: i32* %p, i32* @b
+define void @f3(i32* nocapture readonly %p) {
+entry:
+  tail call void @g3() [ "deopt"(i32* @b, i32 *%p) ]
+  unreachable
+}
diff --git a/test/Analysis/GlobalsModRef/nonescaping-noalias.ll b/test/Analysis/GlobalsModRef/nonescaping-noalias.ll
new file mode 100644
index 000000000000..8ce9551edec7
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/nonescaping-noalias.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -globals-aa -gvn -S | FileCheck %s
+;
+; This tests the safe no-alias conclusions of GMR -- when there is
+; a non-escaping global as one indentified underlying object and some pointer
+; that would inherently have escaped any other function as the other underlying
+; pointer of an alias query.
+
+@g1 = internal global i32 0
+
+define i32 @test1(i32* %param) {
+; Ensure that we can fold a store to a load of a global across a store to
+; a parameter when the global is non-escaping.
+;
+; CHECK-LABEL: @test1(
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+  store i32 42, i32* @g1
+  store i32 7, i32* %param
+  %v = load i32, i32* @g1
+  ret i32 %v
+}
+
+declare i32* @f()
+
+define i32 @test2() {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer returned by a function call. Since the global could not escape,
+; this function cannot be returning its address.
+;
+; CHECK-LABEL: @test2(
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+  %ptr = call i32* @f() readnone
+  store i32 42, i32* @g1
+  store i32 7, i32* %ptr
+  %v = load i32, i32* @g1
+  ret i32 %v
+}
+
+@g2 = external global i32*
+
+define i32 @test3() {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer loaded from that global. Because the global does not escape, it
+; cannot alias a pointer loaded out of a global.
+;
+; CHECK-LABEL: @test3(
+; CHECK: store i32 42, i32* @g1
+; CHECK: store i32 7, i32*
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+  store i32 42, i32* @g1
+  %ptr1 = load i32*, i32** @g2
+  store i32 7, i32* %ptr1
+  %v = load i32, i32* @g1
+  ret i32 %v
+}
+
+@g3 = internal global i32 1
+@g4 = internal global [10 x i32*] zeroinitializer
+
+define i32 @test4(i32* %param, i32 %n, i1 %c1, i1 %c2, i1 %c3) {
+; Ensure that we can fold a store to a load of a global across a store to
+; the pointer loaded from that global even when the load is behind PHIs and
+; selects, and there is a mixture of a load and another global or argument.
+; Note that we can't eliminate the load here because it is used in a PHI and
+; GVN doesn't try to do real DCE. The store is still forwarded by GVN though.
+;
+; CHECK-LABEL: @test4(
+; CHECK: store i32 42, i32* @g1
+; CHECK: store i32 7, i32*
+; CHECK: ret i32 42
+entry:
+  %call = call i32* @f()
+  store i32 42, i32* @g1
+  %ptr1 = load i32*, i32** @g2
+  %ptr2 = select i1 %c1, i32* %ptr1, i32* %param
+  %ptr3 = select i1 %c3, i32* %ptr2, i32* @g3
+  br label %loop
+
+loop:
+  %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+  %ptr = phi i32* [ %ptr3, %entry ], [ %ptr5, %loop ]
+  store i32 7, i32* %ptr
+  %ptr4 = load i32*, i32** getelementptr ([10 x i32*], [10 x i32*]* @g4, i32 0, i32 1)
+  %ptr5 = select i1 %c2, i32* %ptr4, i32* %call
+  %inc = add i32 %iv, 1
+  %test = icmp slt i32 %inc, %n
+  br i1 %test, label %loop, label %exit
+
+exit:
+  %v = load i32, i32* @g1
+  ret i32 %v
+}
+
+define i32 @test5(i32** %param) {
+; Ensure that we can fold a store to a load of a global across a store to
+; a parameter that has been dereferenced when the global is non-escaping.
+;
+; CHECK-LABEL: @test5(
+; CHECK: %p = load i32*
+; CHECK: store i32 42, i32* @g1
+; CHECK-NOT: load i32
+; CHECK: ret i32 42
+entry:
+  %p = load i32*, i32** %param
+  store i32 42, i32* @g1
+  store i32 7, i32* %p
+  %v = load i32, i32* @g1
+  ret i32 %v
+}
diff --git a/test/Analysis/GlobalsModRef/pr12351.ll b/test/Analysis/GlobalsModRef/pr12351.ll
index 8f922770b306..5d299cd2e917 100644
--- a/test/Analysis/GlobalsModRef/pr12351.ll
+++ b/test/Analysis/GlobalsModRef/pr12351.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | FileCheck %s
+; RUN: opt < %s -basicaa -globals-aa -gvn -S | FileCheck %s
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
 define void @foo(i8* %x, i8* %y) {
diff --git a/test/Analysis/GlobalsModRef/pr25309.ll b/test/Analysis/GlobalsModRef/pr25309.ll
new file mode 100644
index 000000000000..bca5e9307656
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/pr25309.ll
@@ -0,0 +1,27 @@
+; RUN: opt -globals-aa -gvn < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; @o and @m are initialized to @i, so they should not be classed as
+; indirect globals referring only to allocation functions.
+@o = internal global i32* @i, align 8
+@m = internal global i32* @i, align 8
+@i = internal global i32 0, align 4
+
+; CHECK-LABEL: @f
+define i1 @f() {
+entry:
+  %0 = load i32*, i32** @o, align 8
+  store i32 0, i32* %0, align 4
+  %1 = load volatile i32*, i32** @m, align 8
+  store i32 1, i32* %1, align 4
+  ; CHECK: %[[a:.*]] = load i32*
+  %2 = load i32*, i32** @o, align 8
+  ; CHECK: %[[b:.*]] = load i32, i32* %[[a]]
+  %3 = load i32, i32* %2, align 4
+  ; CHECK: %[[c:.*]] = icmp ne i32 %[[b]], 0
+  %tobool.i = icmp ne i32 %3, 0
+  ; CHECK: ret i1 %[[c]]
+  ret i1 %tobool.i
+}
diff --git a/test/Analysis/GlobalsModRef/purecse.ll b/test/Analysis/GlobalsModRef/purecse.ll
index e030417f9552..8e2bfebc8af4 100644
--- a/test/Analysis/GlobalsModRef/purecse.ll
+++ b/test/Analysis/GlobalsModRef/purecse.ll
@@ -1,5 +1,5 @@
 ; Test that pure functions are cse'd away
-; RUN: opt < %s -globalsmodref-aa -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -disable-basicaa -globals-aa -gvn -instcombine -S | FileCheck %s
 
 define i32 @pure(i32 %X) {
         %Y = add i32 %X, 1              ; <i32> [#uses=1]
diff --git a/test/Analysis/GlobalsModRef/weak-interposition.ll b/test/Analysis/GlobalsModRef/weak-interposition.ll
new file mode 100644
index 000000000000..091aa74d5217
--- /dev/null
+++ b/test/Analysis/GlobalsModRef/weak-interposition.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -O1 -enable-non-lto-gmr=true < %s | FileCheck %s
+
+@a = common global i32 0, align 4
+
+; @hook_me is weak, so GMR must not eliminate the reload of @a in @f,
+; even though @hook_me doesn't mod or ref @a.
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32 @hook_me() {
+  ret i32 0
+}
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @f() {
+  %1 = alloca i32, align 4
+  store i32 4, i32* @a, align 4
+  %2 = call i32 @hook_me()
+  ; CHECK: load i32, i32* @a, align 4
+  %3 = load i32, i32* @a, align 4
+  %4 = add nsw i32 %3, %2
+  store i32 %4, i32* @a, align 4
+  %5 = load i32, i32* %1
+  ret i32 %5
+}
diff --git a/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
index 01782e0f2c47..89a21e542f74 100644
--- a/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
+++ b/test/Analysis/LazyCallGraph/non-leaf-intrinsics.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -disable-output -passes=print-cg < %s 2>&1 | FileCheck %s
 
 declare void @llvm.experimental.patchpoint.void(i64, i32, i8*, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 define private void @f() {
   ret void
@@ -12,7 +12,7 @@ define void @calls_statepoint(i8 addrspace(1)* %arg) gc "statepoint-example" {
 ; CHECK-NEXT:  -> f
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @f, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
   ret void
 }
 
diff --git a/test/Analysis/Lint/cppeh-catch-intrinsics.ll b/test/Analysis/Lint/cppeh-catch-intrinsics.ll
deleted file mode 100644
index 19480a2f60fe..000000000000
--- a/test/Analysis/Lint/cppeh-catch-intrinsics.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: opt -lint -disable-output < %s 2>&1 | FileCheck %s
-
-; This test is meant to prove that the Verifier is able to identify a variety
-; of errors with the llvm.eh.begincatch and llvm.eh.endcatch intrinsics.
-; See cppeh-catch-intrinsics-clean for correct uses.
-
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-@_ZTIi = external constant i8*
-
-; Function Attrs: uwtable
-define void @test_missing_endcatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  call void @llvm.eh.begincatch(i8* %exn, i8* null)
-  call void @_Z10handle_intv()
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %catch
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_missing_begincatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch
-; CHECK-NEXT:  call void @llvm.eh.endcatch()
-entry:
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  call void @_Z10handle_intv()
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_multiple_begin() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.begincatch may be called a second time before llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  call void @llvm.eh.begincatch(i8* %exn, i8* null)
-  call void @_Z10handle_intv()
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.begincatch(i8* %exn, i8* null)
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  resume { i8*, i32 } %0
-}
-
-; Function Attrs: uwtable
-define void @test_multiple_end() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.endcatch may be called a second time after llvm.eh.begincatch
-; CHECK-NEXT:  call void @llvm.eh.endcatch()
-; CHECK-NEXT:  call void @llvm.eh.endcatch()
-entry:
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  call void @llvm.eh.begincatch(i8* %exn, i8* null)
-  call void @_Z10handle_intv()
-  call void @llvm.eh.endcatch()
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  resume { i8*, i32 } %0
-}
-
-
-; Function Attrs: uwtable
-define void @test_begincatch_without_lpad() {
-; CHECK: llvm.eh.begincatch may be reachable without passing a landingpad
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn, i8* null)
-entry:
-  %exn = alloca i8
-  call void @llvm.eh.begincatch(i8* %exn, i8* null)
-  call void @_Z10handle_intv()
-  br label %invoke.cont2
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-}
-
-; Function Attrs: uwtable
-define void @test_branch_to_begincatch_with_no_lpad(i32 %fake.sel) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: llvm.eh.begincatch may be reachable without passing a landingpad
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-entry:
-  %fake.exn = alloca i8
-  invoke void @_Z9may_throwv()
-          to label %catch unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-catch:                                            ; preds = %lpad, %entry
-  %exn2 = phi i8* [%exn, %lpad], [%fake.exn, %entry]
-  %sel2 = phi i32 [%sel, %lpad], [%fake.sel, %entry]
-  call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-  call void @_Z10handle_intv()
-  %matches1 = icmp eq i32 %sel2, 0
-  br i1 %matches1, label %invoke.cont2, label %invoke.cont3
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-invoke.cont3:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %eh.resume
-
-try.cont:                                         ; preds = %invoke.cont2
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %lpad.val = insertvalue { i8*, i32 } undef, i32 0, 1
-  resume { i8*, i32 } %lpad.val
-}
-
-; Function Attrs: uwtable
-define void @test_branch_missing_endcatch() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch
-; CHECK-NEXT: call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-entry:
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont unwind label %lpad1
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn = extractvalue { i8*, i32 } %0, 0
-  %sel = extractvalue { i8*, i32 } %0, 1
-  %1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matches = icmp eq i32 %sel, %1
-  br i1 %matches, label %catch, label %eh.resume
-
-  invoke void @_Z9may_throwv()
-          to label %try.cont unwind label %lpad
-
-lpad1:                                            ; preds = %entry
-  %l1.0 = landingpad { i8*, i32 }
-		  cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %exn1 = extractvalue { i8*, i32 } %l1.0, 0
-  %sel1 = extractvalue { i8*, i32 } %l1.0, 1
-  %l1.1 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
-  %matchesl1 = icmp eq i32 %sel1, %l1.1
-  br i1 %matchesl1, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad, %lpad1
-  %exn2 = phi i8* [%exn, %lpad], [%exn1, %lpad1]
-  %sel2 = phi i32 [%sel, %lpad], [%sel1, %lpad1]
-  call void @llvm.eh.begincatch(i8* %exn2, i8* null)
-  call void @_Z10handle_intv()
-  %matches1 = icmp eq i32 %sel2, 0
-  br i1 %matches1, label %invoke.cont2, label %invoke.cont3
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-invoke.cont3:                                     ; preds = %catch
-  br label %eh.resume
-
-try.cont:                                         ; preds = %invoke.cont2, %entry
-  ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %lpad.val = insertvalue { i8*, i32 } undef, i32 0, 1
-  resume { i8*, i32 } %lpad.val
-}
-
-declare void @_Z9may_throwv()
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*)
-
-declare void @_Z10handle_intv()
-
diff --git a/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
new file mode 100644
index 000000000000..e18ec2357fdb
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -0,0 +1,44 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i+8] = B[i] + 2;
+;     C[i] = A[i] * 2;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
+
+; CHECK: Dependences:
+; CHECK-NEXT: Forward:
+; CHECK-NEXT:   store i32 %a_p1, i32* %Aidx_ahead, align 4 ->
+; CHECK-NEXT:   %a = load i32, i32* %Aidx, align 4
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %idx = add nuw nsw i64 %indvars.iv, 8
+
+  %Aidx_ahead = getelementptr inbounds i32, i32* %A, i64 %idx
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+  %b = load i32, i32* %Bidx, align 4
+  %a_p1 = add i32 %b, 2
+  store i32 %a_p1, i32* %Aidx_ahead, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+  %c = mul i32 %a, 2
+  store i32 %c, i32* %Cidx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
new file mode 100644
index 000000000000..e1ba674ce80a
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
@@ -0,0 +1,64 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+; Check that loop-indepedent forward dependences are discovered properly.
+;
+; FIXME: This does not actually always work which is pretty confusing.  Right
+; now there is hack in LAA that tries to figure out loop-indepedent forward
+; dependeces *outside* of the MemoryDepChecker logic (i.e. proper dependence
+; analysis).
+;
+; Therefore if there is only loop-independent dependences for an array
+; (i.e. the same index is used), we don't discover the forward dependence.
+; So, at ***, we add another non-I-based access of A to trigger
+; MemoryDepChecker analysis for accesses of A.
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i + 1] = B[i] + 1;   // ***
+;     A[i] = B[i] + 2;
+;     C[i] = A[i] * 2;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
+
+; CHECK: Dependences:
+; CHECK-NEXT:   Forward:
+; CHECK-NEXT:       store i32 %b_p1, i32* %Aidx, align 4 ->
+; CHECK-NEXT:       %a = load i32, i32* %Aidx, align 4
+; CHECK:        ForwardButPreventsForwarding:
+; CHECK-NEXT:       store i32 %b_p2, i32* %Aidx_next, align 4 ->
+; CHECK-NEXT:       %a = load i32, i32* %Aidx, align 4
+; CHECK:        Forward:
+; CHECK-NEXT:       store i32 %b_p2, i32* %Aidx_next, align 4 ->
+; CHECK-NEXT:       store i32 %b_p1, i32* %Aidx, align 4
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+  %b = load i32, i32* %Bidx, align 4
+  %b_p2 = add i32 %b, 1
+  store i32 %b_p2, i32* %Aidx_next, align 4
+
+  %b_p1 = add i32 %b, 2
+  store i32 %b_p1, i32* %Aidx, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+  %c = mul i32 %a, 2
+  store i32 %c, i32* %Cidx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/nullptr.ll b/test/Analysis/LoopAccessAnalysis/nullptr.ll
new file mode 100644
index 000000000000..a72b48cc352d
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/nullptr.ll
@@ -0,0 +1,38 @@
+; RUN: opt -loop-accesses -analyze %s  | FileCheck %s
+
+; Test that the loop accesses are proven safe in this case.
+; The analyzer uses to be confused by the "diamond" because GetUnderlyingObjects
+; is saying that the two pointers can both points to null. The loop analyzer
+; needs to ignore null in the results returned by GetUnderlyingObjects.
+
+; CHECK: Memory dependences are safe with run-time checks
+
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; Function Attrs: ssp uwtable
+define void @foo(i1 %cond, i32* %ptr1, i32* %ptr2)  {
+  br i1 %cond, label %.preheader, label %diamond
+
+diamond:  ; preds = %.noexc.i.i
+  br label %.preheader
+
+.preheader:                                     ; preds = %diamond, %0
+  %ptr1_or_null = phi i32* [ null, %0 ], [ %ptr1, %diamond ]
+  %ptr2_or_null = phi i32* [ null, %0 ], [ %ptr2, %diamond ]
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %.lr.ph, %.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 10, %.preheader ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %tmp4 = getelementptr inbounds i32, i32* %ptr2_or_null, i64 %indvars.iv.next
+  %tmp5 = load i32, i32* %tmp4, align 4
+  %tmp6 = getelementptr inbounds i32, i32* %ptr1_or_null, i64 %indvars.iv.next
+  store i32 undef, i32* %tmp6, align 4
+  br i1 false, label %.lr.ph, label %.end
+
+.end:
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
index 50b37a031a60..01b3eda3e123 100644
--- a/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
+++ b/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
@@ -81,28 +81,28 @@ for.end:                                          ; preds = %for.body
 
 ; CHECK: Run-time memory checks:
 ; CHECK-NEXT:   Check 0:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
 ; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
 ; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT:     Against group 1:
+; CHECK-NEXT:     Against group ([[ONE:.+]]):
 ; CHECK-NEXT:       %arrayidxA1 = getelementptr inbounds i16, i16* %a, i64 %add
 ; CHECK-NEXT:       %arrayidxA = getelementptr inbounds i16, i16* %a, i64 %ind
 ; CHECK-NEXT:   Check 1:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
 ; CHECK-NEXT:       %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
 ; CHECK-NEXT:       %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT:     Against group 2:
+; CHECK-NEXT:     Against group ([[TWO:.+]]):
 ; CHECK-NEXT:       %arrayidxB = getelementptr inbounds i16, i16* %b, i64 %ind
 ; CHECK-NEXT:   Grouped accesses:
-; CHECK-NEXT:    Group 0:
+; CHECK-NEXT:    Group {{.*}}[[ZERO]]:
 ; CHECK-NEXT:       (Low: %c High: (78 + %c))
 ; CHECK-NEXT:         Member: {(2 + %c),+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
-; CHECK-NEXT:     Group 1:
+; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (40 + %a))
 ; CHECK-NEXT:         Member: {(2 + %a),+,2}
 ; CHECK-NEXT:         Member: {%a,+,2}
-; CHECK-NEXT:     Group 2:
+; CHECK-NEXT:     Group {{.*}}[[TWO]]:
 ; CHECK-NEXT:       (Low: %b High: (38 + %b))
 ; CHECK-NEXT:         Member: {%b,+,2}
 
@@ -153,28 +153,28 @@ for.end:                                          ; preds = %for.body
 ; CHECK: function 'testh':
 ; CHECK: Run-time memory checks:
 ; CHECK-NEXT:   Check 0:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
 ; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
 ; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT:     Against group 1:
+; CHECK-NEXT:     Against group ([[ONE:.+]]):
 ; CHECK-NEXT:         %arrayidxA1 = getelementptr i16, i16* %a, i64 %add
 ; CHECK-NEXT:         %arrayidxA = getelementptr i16, i16* %a, i64 %ind
 ; CHECK-NEXT:   Check 1:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
 ; CHECK-NEXT:         %arrayidxC1 = getelementptr inbounds i16, i16* %c, i64 %store_ind_inc
 ; CHECK-NEXT:         %arrayidxC = getelementptr inbounds i16, i16* %c, i64 %store_ind
-; CHECK-NEXT:     Against group 2:
+; CHECK-NEXT:     Against group ([[TWO:.+]]):
 ; CHECK-NEXT:         %arrayidxB = getelementptr i16, i16* %b, i64 %ind
 ; CHECK-NEXT:   Grouped accesses:
-; CHECK-NEXT:     Group 0:
+; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
 ; CHECK-NEXT:       (Low: %c High: (78 + %c))
 ; CHECK-NEXT:         Member: {(2 + %c),+,4}
 ; CHECK-NEXT:         Member: {%c,+,4}
-; CHECK-NEXT:     Group 1:
+; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (40 + %a))
 ; CHECK-NEXT:         Member: {(2 + %a),+,2}
 ; CHECK-NEXT:         Member: {%a,+,2}
-; CHECK-NEXT:     Group 2:
+; CHECK-NEXT:     Group {{.*}}[[TWO]]:
 ; CHECK-NEXT:       (Low: %b High: (38 + %b))
 ; CHECK-NEXT:         Member: {%b,+,2}
 
@@ -217,8 +217,9 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-; Don't merge pointers if there is some other check which could be falsely
-; invalidated. For example, in the following loop:
+; Don't merge pointers if we need to perform a check against a pointer
+; to the same underlying object (doing so would emit a check that could be
+; falsely invalidated) For example, in the following loop:
 ;
 ; for (i = 0; i < 5000; ++i)
 ;   a[i + offset] = a[i] + a[i + 10000]
@@ -226,27 +227,31 @@ for.end:                                          ; preds = %for.body
 ; we should not merge the intervals associated with the reads (0,5000) and
 ; (10000, 15000) into (0, 15000) as this will pottentially fail the check
 ; against the interval associated with the write.
+;
+; We cannot have this check unless ShouldRetryWithRuntimeCheck is set,
+; and therefore the grouping algorithm would create a separate group for
+; each pointer.
 
 ; CHECK: function 'testi':
 ; CHECK: Run-time memory checks:
 ; CHECK-NEXT:   Check 0:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ([[ZERO:.+]]):
 ; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
-; CHECK-NEXT:     Against group 1:
+; CHECK-NEXT:     Against group ([[ONE:.+]]):
 ; CHECK-NEXT:       %arrayidxA1 = getelementptr i16, i16* %a, i64 %ind
 ; CHECK-NEXT:   Check 1:
-; CHECK-NEXT:     Comparing group 0:
+; CHECK-NEXT:     Comparing group ({{.*}}[[ZERO]]):
 ; CHECK-NEXT:       %storeidx = getelementptr inbounds i16, i16* %a, i64 %store_ind
-; CHECK-NEXT:     Against group 2:
+; CHECK-NEXT:     Against group ([[TWO:.+]]):
 ; CHECK-NEXT:       %arrayidxA2 = getelementptr i16, i16* %a, i64 %ind2
 ; CHECK-NEXT:   Grouped accesses:
-; CHECK-NEXT:     Group 0:
+; CHECK-NEXT:     Group {{.*}}[[ZERO]]:
 ; CHECK-NEXT:       (Low: ((2 * %offset) + %a) High: (9998 + (2 * %offset) + %a))
 ; CHECK-NEXT:         Member: {((2 * %offset) + %a),+,2}<nsw><%for.body>
-; CHECK-NEXT:     Group 1:
+; CHECK-NEXT:     Group {{.*}}[[ONE]]:
 ; CHECK-NEXT:       (Low: %a High: (9998 + %a))
 ; CHECK-NEXT:         Member: {%a,+,2}<%for.body>
-; CHECK-NEXT:     Group 2:
+; CHECK-NEXT:     Group {{.*}}[[TWO]]:
 ; CHECK-NEXT:       (Low: (20000 + %a) High: (29998 + %a))
 ; CHECK-NEXT:         Member: {(20000 + %a),+,2}<%for.body>
 
diff --git a/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
index d05849e2be2d..20b7fb2c6335 100644
--- a/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
+++ b/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; We shouldn't quit the analysis if we encounter a pointer without known
 ; bounds *unless* we actually need to emit a memcheck for it.  (We only
-; compute bounds for SCEVAddRecs so A[i*I] is deemed not having known bounds.)
+; compute bounds for SCEVAddRecs so A[i*i] is deemed not having known bounds.)
 ;
 ; for (i = 0; i < 20; ++i)
 ;   A[i*i] *= 2;
@@ -12,7 +12,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; CHECK: for.body:
 ; CHECK:     Report: unsafe dependent memory operations in loop
 ; CHECK-NOT: Report: cannot identify array bounds
-; CHECK:     Interesting Dependences:
+; CHECK:     Dependences:
 ; CHECK:       Unknown:
 ; CHECK:           %loadA = load i16, i16* %arrayidxA, align 2 ->
 ; CHECK:           store i16 %mul, i16* %arrayidxA, align 2
diff --git a/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll b/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
index e7305173dd95..9412028fc702 100644
--- a/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
+++ b/test/Analysis/LoopAccessAnalysis/resort-to-memchecks-only.ll
@@ -2,7 +2,7 @@
 
 ; We give up analyzing the dependences in this loop due to non-constant
 ; distance between A[i+offset] and A[i] and add memchecks to prove
-; independence.  Make sure that no interesting dependences are reported in
+; independence.  Make sure that no dependences are reported in
 ; this case.
 ;
 ;   for (i = 0; i < n; i++)
@@ -12,7 +12,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; CHECK: Memory dependences are safe with run-time checks
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
 ; CHECK-NEXT: Run-time memory checks:
 ; CHECK-NEXT: 0:
 ; CHECK-NEXT: Comparing group
diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
new file mode 100644
index 000000000000..2053e49826b9
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
@@ -0,0 +1,89 @@
+; RUN: opt -loop-accesses -analyze < %s | FileCheck %s
+
+; The runtime memory check code and the access grouping
+; algorithm both assume that the start and end values
+; for an access range are ordered (start <= stop).
+; When generating checks for accesses with negative stride
+; we need to take this into account and swap the interval
+; ends.
+;
+;   for (i = 0; i < 10000; i++) {
+;     B[i] = A[15000 - i] * 3;
+;   }
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+; CHECK: function 'f':
+; CHECK: (Low: (20000 + %a) High: (60000 + %a))
+
+@B = common global i32* null, align 8
+@A = common global i32* null, align 8
+
+define void @f() {
+entry:
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %negidx = sub i64 15000, %idx
+
+  %arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
+  %loadA0 = load i32, i32* %arrayidxA0, align 2
+
+  %res = mul i32 %loadA0, 3
+
+  %add = add nuw nsw i64 %idx, 1
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
+  store i32 %res, i32* %arrayidxB, align 2
+
+  %exitcond = icmp eq i64 %idx, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+; CHECK: function 'g':
+; When the stride is not constant, we are forced to do umin/umax to get
+; the interval limits.
+
+;   for (i = 0; i < 10000; i++) {
+;     B[i] = A[15000 - step * i] * 3;
+;   }
+
+; Here it is not obvious what the limits are, since 'step' could be negative.
+
+; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
+; CHECK: High: ((60000 + %a) umax (60000 + (-40000 * %step) + %a))
+
+define void @g(i64 %step) {
+entry:
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %idx = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %idx_mul = mul i64 %idx, %step
+  %negidx = sub i64 15000, %idx_mul
+
+  %arrayidxA0 = getelementptr inbounds i32, i32* %a, i64 %negidx
+  %loadA0 = load i32, i32* %arrayidxA0, align 2
+
+  %res = mul i32 %loadA0, 3
+
+  %add = add nuw nsw i64 %idx, 1
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %idx
+  store i32 %res, i32* %arrayidxB, align 2
+
+  %exitcond = icmp eq i64 %idx, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
index fa70c024a9c5..d85258f75aae 100644
--- a/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
+++ b/test/Analysis/LoopAccessAnalysis/safe-no-checks.ll
@@ -7,7 +7,15 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
+; Check the loop-carried forward anti-dep between the load of A[i+1] and the
+; store of A[i];
+
 ; CHECK: Memory dependences are safe{{$}}
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT:   Forward:
+; CHECK-NEXT:     %loadA_plus_2 = load i16, i16* %arrayidxA_plus_2, align 2 ->
+; CHECK-NEXT:     store i16 %mul1, i16* %arrayidxA, align 2
+
 
 define void @f(i16* noalias %a,
                i16* noalias %b,
diff --git a/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
index 735735662952..5fc353e70cf8 100644
--- a/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
+++ b/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll
@@ -13,7 +13,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; CHECK: function 'nodep_Read_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:     Run-time memory checks:
 
 define void @nodep_Read_Write(i32* nocapture %A) {
@@ -49,7 +49,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'nodep_Write_Read':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:     Run-time memory checks:
 
 define i32 @nodep_Write_Read(i32* nocapture %A) {
@@ -84,7 +84,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'nodep_Write_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:     Run-time memory checks:
 
 define void @nodep_Write_Write(i32* nocapture %A) {
@@ -118,7 +118,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'unsafe_Read_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:      Backward:
 ; CHECK-NEXT:           %0 = load i32, i32* %arrayidx, align 4 -> 
 ; CHECK-NEXT:           store i32 %add, i32* %arrayidx3, align 4
@@ -157,7 +157,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'unsafe_Write_Read':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:      Backward:
 ; CHECK-NEXT:           store i32 %0, i32* %arrayidx, align 4 ->
 ; CHECK-NEXT:           %1 = load i32, i32* %arrayidx2, align 4
@@ -193,7 +193,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'unsafe_Write_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:      Backward:
 ; CHECK-NEXT:           store i32 %0, i32* %arrayidx, align 4 ->
 ; CHECK-NEXT:           store i32 %2, i32* %arrayidx3, align 4
@@ -230,7 +230,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'vectorizable_Read_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       BackwardVectorizable:
 ; CHECK-NEXT:           %0 = load i32, i32* %arrayidx, align 4 ->
 ; CHECK-NEXT:           store i32 %add, i32* %arrayidx2, align 4
@@ -269,7 +269,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'vectorizable_Write_Read':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       BackwardVectorizable:
 ; CHECK-NEXT:           store i32 %0, i32* %arrayidx, align 4 ->
 ; CHECK-NEXT:           %1 = load i32, i32* %arrayidx2, align 4
@@ -307,7 +307,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'vectorizable_Write_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       BackwardVectorizable:
 ; CHECK-NEXT:           store i32 %0, i32* %arrayidx, align 4 -> 
 ; CHECK-NEXT:           store i32 %2, i32* %arrayidx2, align 4
@@ -346,7 +346,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'vectorizable_unscaled_Read_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       BackwardVectorizableButPreventsForwarding:
 ; CHECK-NEXT:           %2 = load i32, i32* %arrayidx, align 4 ->
 ; CHECK-NEXT:           store i32 %add, i32* %arrayidx2, align 4
@@ -387,7 +387,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: for function 'vectorizable_unscaled_Write_Read':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Memory dependences are safe
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       BackwardVectorizable:
 ; CHECK-NEXT:           store i32 %2, i32* %arrayidx, align 4 -> 
 ; CHECK-NEXT:           %3 = load i32, i32* %arrayidx2, align 4
@@ -425,7 +425,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'unsafe_unscaled_Read_Write':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       Backward:
 ; CHECK-NEXT:           %2 = load i32, i32* %arrayidx, align 4 -> 
 ; CHECK-NEXT:           store i32 %add, i32* %arrayidx2, align 4
@@ -455,7 +455,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'unsafe_unscaled_Read_Write2':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       Backward:
 ; CHECK-NEXT:           %2 = load i32, i32* %arrayidx, align 4 -> 
 ; CHECK-NEXT:           store i32 %add, i32* %arrayidx2, align 4
@@ -505,7 +505,7 @@ for.body:                                         ; preds = %entry, %for.body
 ; CHECK: function 'interleaved_stores':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: unsafe dependent memory operations in loop
-; CHECK-NEXT:     Interesting Dependences:
+; CHECK-NEXT:     Dependences:
 ; CHECK-NEXT:       Backward:
 ; CHECK-NEXT:           store i32 %4, i32* %arrayidx5, align 4 -> 
 ; CHECK-NEXT:           store i32 %4, i32* %arrayidx9, align 4
diff --git a/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
index d0bed68188db..d388151365f1 100644
--- a/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
+++ b/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll
@@ -38,7 +38,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 ; CHECK: for_j.body:
 ; CHECK-NEXT: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
 ; CHECK-NEXT: Backward:
 ; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 ->
 ; CHECK-NEXT: store i8 2, i8* %gepB_plus_one, align 1
diff --git a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
index 237cbc8b9873..7157b954c5b6 100644
--- a/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
+++ b/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; CHECK: Report: unsafe dependent memory operations in loop
-; CHECK-NEXT: Interesting Dependences:
+; CHECK-NEXT: Dependences:
 ; CHECK-NEXT:   Backward:
 ; CHECK-NEXT:     %loadA = load i16, i16* %arrayidxA, align 2 ->
 ; CHECK-NEXT:     store i16 %mul1, i16* %arrayidxA_plus_2, align 2
diff --git a/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll b/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll
new file mode 100644
index 000000000000..20ca31456fd3
--- /dev/null
+++ b/test/Analysis/MemoryDependenceAnalysis/memdep-block-scan-limit.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -memdep -gvn -basicaa < %s | FileCheck %s
+; RUN: opt -S -memdep -memdep-block-scan-limit=1 -gvn -basicaa < %s | FileCheck %s --check-prefix=WITH-LIMIT
+; CHECK-LABEL: @test(
+; CHECK: load
+; CHECK-NOT: load
+; WITH-LIMIT-LABEL: @test(
+; WITH-LIMIT-CHECK: load
+; WITH-LIMIT-CHECK: load
+define i32 @test(i32* %p) {
+ %1 = load i32, i32* %p
+ %2 = add i32 %1, 3
+ %3 = load i32, i32* %p
+ %4 = add i32 %2, %3
+ ret i32 %4
+}
diff --git a/test/Analysis/ScalarEvolution/avoid-assume-hang.ll b/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
new file mode 100644
index 000000000000..e2428ed1f73f
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
@@ -0,0 +1,139 @@
+; RUN: opt %s -always-inline | opt -analyze -scalar-evolution
+; There was optimization bug in ScalarEvolution, that causes too long 
+; compute time and stack overflow crash.
+
+declare void @body(i32)
+declare void @llvm.assume(i1)
+
+define available_externally void @assume1(i64 %i.ext, i64 %a) alwaysinline {
+  %cmp0 = icmp ne i64 %i.ext, %a
+  call void @llvm.assume(i1 %cmp0)
+
+  %a1 = add i64 %a, 1
+  %cmp1 = icmp ne i64 %i.ext, %a1
+  call void @llvm.assume(i1 %cmp1)
+
+  %a2 = add i64 %a1, 1
+  %cmp2 = icmp ne i64 %i.ext, %a2
+  call void @llvm.assume(i1 %cmp2)
+
+  %a3 = add i64 %a2, 1
+  %cmp3 = icmp ne i64 %i.ext, %a3
+  call void @llvm.assume(i1 %cmp3)
+
+  %a4 = add i64 %a3, 1
+  %cmp4 = icmp ne i64 %i.ext, %a4
+  call void @llvm.assume(i1 %cmp4)
+
+  ret void
+}
+
+define available_externally void @assume2(i64 %i.ext, i64 %a) alwaysinline {
+  call void @assume1(i64 %i.ext, i64 %a)
+
+  %a1 = add i64 %a, 5
+  %cmp1 = icmp ne i64 %i.ext, %a1
+  call void @assume1(i64 %i.ext, i64 %a1)
+
+  %a2 = add i64 %a1, 5
+  %cmp2 = icmp ne i64 %i.ext, %a2
+  call void @assume1(i64 %i.ext, i64 %a2)
+
+  %a3 = add i64 %a2, 5
+  %cmp3 = icmp ne i64 %i.ext, %a3
+  call void @assume1(i64 %i.ext, i64 %a3)
+
+  %a4 = add i64 %a3, 5
+  %cmp4 = icmp ne i64 %i.ext, %a4
+  call void @assume1(i64 %i.ext, i64 %a4)
+
+  ret void
+}
+
+define available_externally void @assume3(i64 %i.ext, i64 %a) alwaysinline {
+  call void @assume2(i64 %i.ext, i64 %a)
+
+  %a1 = add i64 %a, 25
+  %cmp1 = icmp ne i64 %i.ext, %a1
+  call void @assume2(i64 %i.ext, i64 %a1)
+
+  %a2 = add i64 %a1, 25
+  %cmp2 = icmp ne i64 %i.ext, %a2
+  call void @assume2(i64 %i.ext, i64 %a2)
+
+  %a3 = add i64 %a2, 25
+  %cmp3 = icmp ne i64 %i.ext, %a3
+  call void @assume2(i64 %i.ext, i64 %a3)
+
+  %a4 = add i64 %a3, 25
+  %cmp4 = icmp ne i64 %i.ext, %a4
+  call void @assume2(i64 %i.ext, i64 %a4)
+
+  ret void
+}
+
+define available_externally void @assume4(i64 %i.ext, i64 %a) alwaysinline {
+  call void @assume3(i64 %i.ext, i64 %a)
+
+  %a1 = add i64 %a, 125
+  %cmp1 = icmp ne i64 %i.ext, %a1
+  call void @assume3(i64 %i.ext, i64 %a1)
+
+  %a2 = add i64 %a1, 125
+  %cmp2 = icmp ne i64 %i.ext, %a2
+  call void @assume3(i64 %i.ext, i64 %a2)
+
+  %a3 = add i64 %a2, 125
+  %cmp3 = icmp ne i64 %i.ext, %a3
+  call void @assume3(i64 %i.ext, i64 %a3)
+
+  %a4 = add i64 %a3, 125
+  %cmp4 = icmp ne i64 %i.ext, %a4
+  call void @assume3(i64 %i.ext, i64 %a4)
+
+  ret void
+}
+
+define available_externally void @assume5(i64 %i.ext, i64 %a) alwaysinline {
+  call void @assume4(i64 %i.ext, i64 %a)
+
+  %a1 = add i64 %a, 625
+  %cmp1 = icmp ne i64 %i.ext, %a1
+  call void @assume4(i64 %i.ext, i64 %a1)
+
+  %a2 = add i64 %a1, 625
+  %cmp2 = icmp ne i64 %i.ext, %a2
+  call void @assume4(i64 %i.ext, i64 %a2)
+
+  %a3 = add i64 %a2, 625
+  %cmp3 = icmp ne i64 %i.ext, %a3
+  call void @assume4(i64 %i.ext, i64 %a3)
+
+  %a4 = add i64 %a3, 625
+  %cmp4 = icmp ne i64 %i.ext, %a4
+  call void @assume4(i64 %i.ext, i64 %a4)
+
+  ret void
+}
+
+define void @fn(i32 %init) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [%init, %entry], [%next, %loop]
+  call void @body(i32 %i)
+
+  %i.ext = zext i32 %i to i64
+
+  call void @assume5(i64 %i.ext, i64 500000000)
+
+  %i.next = add i64 %i.ext, 1
+  %next = trunc i64 %i.next to i32
+  %done = icmp eq i32 %i, 500000000
+
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret void
+}
\ No newline at end of file
diff --git a/test/Analysis/ScalarEvolution/constant_condition.ll b/test/Analysis/ScalarEvolution/constant_condition.ll
new file mode 100644
index 000000000000..32ab91b2c857
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/constant_condition.ll
@@ -0,0 +1,51 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define i32 @branch_true(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @branch_true
+ entry:
+  br i1 true, label %add, label %merge
+
+ add:
+  %sum = add i32 %x, %y
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT:  -->  (%x + %y) U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @branch_false(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @branch_false
+ entry:
+  br i1 false, label %add, label %merge
+
+ add:
+  %sum = add i32 %x, %y
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT:  -->  %x U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @select_true(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @select_true
+ entry:
+ %v = select i1 true, i32 %x, i32 %y
+; CHECK:  %v = select i1 true, i32 %x, i32 %y
+; CHECK-NEXT:  -->  %x U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @select_false(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @select_false
+ entry:
+ %v = select i1 false, i32 %x, i32 %y
+; CHECK:  %v = select i1 false, i32 %x, i32 %y
+; CHECK-NEXT:  -->  %y U: full-set S: full-set
+  ret i32 %v
+}
diff --git a/test/Analysis/ScalarEvolution/flags-from-poison.ll b/test/Analysis/ScalarEvolution/flags-from-poison.ll
new file mode 100644
index 000000000000..b1fe7f1138b6
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -0,0 +1,592 @@
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+; Positive and negative tests for inferring flags like nsw from
+; reasoning about how a poison value from overflow would trigger
+; undefined behavior.
+
+define void @foo() {
+  ret void
+}
+
+; Example where an add should get the nsw flag, so that a sext can be
+; distributed over the add.
+define void @test-add-nsw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+  %index32 = add nsw i32 %i, %offset
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 %offset to i64),+,1}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  call void @foo()
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where an add should get the nuw flag.
+define void @test-add-nuw(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nuw>
+  %index32 = add nuw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; With no load to trigger UB from poison, we cannot infer nsw.
+define void @test-add-no-load(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-load
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; The current code is only supposed to look at the loop header, so
+; it should not infer nsw in this case, as that would require looking
+; outside the loop header.
+define void @test-add-not-header(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+  br label %loop2
+loop2:
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Same thing as test-add-not-header, but in this case only the load
+; instruction is outside the loop header.
+define void @test-add-not-header2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-not-header2
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  br label %loop2
+loop2:
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; The call instruction makes it not guaranteed that the add will be
+; executed, since it could run forever or throw an exception, so we
+; cannot assume that the UB is realized.
+define void @test-add-call(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  call void @foo()
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Same issue as test-add-call, but this time the call is between the
+; producer of poison and the load that consumes it.
+define void @test-add-call2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-call2
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  call void @foo()
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Without inbounds, GEP does not propagate poison in the very
+; conservative approach used here.
+define void @test-add-no-inbounds(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-no-inbounds
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Multiplication by a non-zero constant propagates poison if there is
+; a nuw or nsw flag on the multiplication.
+define void @test-add-mul-propagates(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-propagates
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+  %index32 = add nsw i32 %i, %offset
+
+  %indexmul = mul nuw i32 %index32, 2
+  %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Multiplication by a non-constant should not propagate poison in the
+; very conservative approach used here.
+define void @test-add-mul-no-propagation(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %indexmul = mul nsw i32 %index32, %offset
+  %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Multiplication by a non-zero constant does not propagate poison
+; without a no-wrap flag.
+define void @test-add-mul-no-propagation2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-mul-no-propagation2
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nw>
+  %index32 = add nsw i32 %i, %offset
+
+  %indexmul = mul i32 %index32, 2
+  %ptr = getelementptr inbounds float, float* %input, i32 %indexmul
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Division by poison triggers UB.
+define void @test-add-div(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nsw>
+  %j = add nsw i32 %i, %offset
+
+  %q = sdiv i32 %numIterations, %j
+  %nexti = add nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Remainder of poison by non-poison divisor does not trigger UB.
+define void @test-add-div2(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-div2
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %j =
+; CHECK: --> {%offset,+,1}<nw>
+  %j = add nsw i32 %i, %offset
+
+  %q = sdiv i32 %j, %numIterations
+  %nexti = add nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Store to poison address triggers UB.
+define void @test-add-store(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-store
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {%offset,+,1}<nsw>
+  %index32 = add nsw i32 %i, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  store float 1.0, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Three sequential adds where the middle add should have nsw. There is
+; a special case for sequential adds and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-add-twice(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-add-twice
+entry:
+  br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + %offset),+,1}<nw>
+  %seq = add nsw nuw i32 %index32, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+  %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + %offset),+,1}<nsw>
+  %index32 = add nsw i32 %j, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  store float 1.0, float* %ptr, align 4
+  br label %loop2
+exit:
+  ret void
+}
+
+; Example where a mul should get the nsw flag, so that a sext can be
+; distributed over the mul.
+define void @test-mul-nsw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nsw>
+  %index32 = mul nsw i32 %i, %stride
+
+; CHECK: %index64 =
+; CHECK: --> {0,+,(sext i32 %stride to i64)}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a mul should get the nuw flag.
+define void @test-mul-nuw(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @test-mul-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,%stride}<nuw>
+  %index32 = mul nuw i32 %i, %stride
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a shl should get the nsw flag, so that a sext can be
+; distributed over the shl.
+define void @test-shl-nsw(float* %input, i32 %start, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(256 * %start),+,256}<nsw>
+  %index32 = shl nsw i32 %i, 8
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (256 * %start) to i64),+,256}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a shl should get the nuw flag.
+define void @test-shl-nuw(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @test-shl-nuw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {0,+,512}<nuw>
+  %index32 = shl nuw i32 %i, 9
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nuw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; Example where a sub should *not* get the nsw flag, because of how
+; scalar evolution represents A - B as A + (-B) and -B can wrap even
+; in cases where A - B does not.
+define void @test-sub-no-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-no-nsw
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %sub) + %start),+,1}<nw>
+  %index32 = sub nsw i32 %i, %sub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag as the RHS cannot be the
+; minimal signed value.
+define void @test-sub-nsw(float* %input, i32 %start, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw
+entry:
+  %halfsub = ashr i32 %sub, 1
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {((-1 * %halfsub)<nsw> + %start),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %halfsub
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Example where a sub should get the nsw flag, since the LHS is non-negative,
+; which implies that the RHS cannot be the minimal signed value.
+define void @test-sub-nsw-lhs-non-negative(float* %input, i32 %sub, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-nsw-lhs-non-negative
+entry:
+  br label %loop
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+
+; CHECK: %index32 =
+; CHECK: --> {(-1 * %sub),+,1}<nsw>
+  %index32 = sub nsw i32 %i, %sub
+
+; CHECK: %index64 =
+; CHECK: --> {(sext i32 (-1 * %sub) to i64),+,1}<nsw>
+  %index64 = sext i32 %index32 to i64
+
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %nexti = add nsw i32 %i, 1
+  %f = load float, float* %ptr, align 4
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Two adds with a sub in the middle and the sub should have nsw. There is
+; a special case for sequential adds/subs and this test covers that. We have to
+; put the final add first in the program since otherwise the special case
+; is not triggered, hence the strange basic block ordering.
+define void @test-sub-with-add(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @test-sub-with-add
+entry:
+  br label %loop
+loop2:
+; CHECK: %seq =
+; CHECK: --> {(2 + (-1 * %offset)),+,1}<nw>
+  %seq = add nsw nuw i32 %index32, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop2 ], [ 0, %entry ]
+
+  %j = add nsw i32 %i, 1
+; CHECK: %index32 =
+; CHECK: --> {(1 + (-1 * %offset)),+,1}<nsw>
+  %index32 = sub nsw i32 %j, %offset
+
+  %ptr = getelementptr inbounds float, float* %input, i32 %index32
+  %nexti = add nsw i32 %i, 1
+  store float 1.0, float* %ptr, align 4
+  br label %loop2
+exit:
+  ret void
+}
+
+
+; Subtraction of two recurrences. The addition in the SCEV that this
+; maps to is NSW, but the negation of the RHS does not since that
+; recurrence could be the most negative representable value.
+define void @subrecurrences(i32 %outer_l, i32 %inner_l, i32 %val) {
+; CHECK-LABEL: @subrecurrences
+ entry:
+  br label %outer
+
+outer:
+  %o_idx = phi i32 [ 0, %entry ], [ %o_idx.inc, %outer.be ]
+  %o_idx.inc = add nsw i32 %o_idx, 1
+  %cond = icmp eq i32 %o_idx, %val
+  br i1 %cond, label %inner, label %outer.be
+
+inner:
+  %i_idx = phi i32 [ 0, %outer ], [ %i_idx.inc, %inner ]
+  %i_idx.inc = add nsw i32 %i_idx, 1
+; CHECK: %v =
+; CHECK-NEXT: --> {{[{][{]}}-1,+,-1}<nw><%outer>,+,1}<nsw><%inner>
+  %v = sub nsw i32 %i_idx, %o_idx.inc
+  %forub = udiv i32 1, %v
+  %cond2 = icmp eq i32 %i_idx, %inner_l
+  br i1 %cond2, label %outer.be, label %inner
+
+outer.be:
+  %cond3 = icmp eq i32 %o_idx, %outer_l
+  br i1 %cond3, label %exit, label %outer
+
+exit:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll b/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
index 078ca03ff14e..5c372b5d7b8a 100644
--- a/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
+++ b/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
@@ -11,7 +11,7 @@ define void @infer.sext.0(i1* %c, i32 %start) {
   %idx.inc = add nsw i32 %idx, 1
   %idx.inc.sext = sext i32 %idx.inc to i64
 ; CHECK: %idx.inc.sext = sext i32 %idx.inc to i64
-; CHECK-NEXT: -->  {(1 + (sext i32 %start to i64)),+,1}<nsw><%loop>
+; CHECK-NEXT: -->  {(1 + (sext i32 %start to i64))<nsw>,+,1}<nsw><%loop>
   %condition = icmp eq i32 %counter, 1
   %counter.inc = add i32 %counter, 1
   br i1 %condition, label %exit, label %loop
@@ -31,7 +31,7 @@ define void @infer.zext.0(i1* %c, i32 %start) {
   %idx.inc = add nuw i32 %idx, 1
   %idx.inc.sext = zext i32 %idx.inc to i64
 ; CHECK: %idx.inc.sext = zext i32 %idx.inc to i64
-; CHECK-NEXT: -->  {(1 + (zext i32 %start to i64)),+,1}<nuw><%loop>
+; CHECK-NEXT: -->  {(1 + (zext i32 %start to i64))<nuw><nsw>,+,1}<nuw><%loop>
   %condition = icmp eq i32 %counter, 1
   %counter.inc = add i32 %counter, 1
   br i1 %condition, label %exit, label %loop
@@ -51,7 +51,7 @@ define void @infer.sext.1(i32 %start, i1* %c) {
   %idx = phi i32 [ %start.real, %entry ], [ %idx.inc, %loop ]
   %idx.sext = sext i32 %idx to i64
 ; CHECK: %idx.sext = sext i32 %idx to i64
-; CHECK-NEXT:  -->  {(2 + (sext i32 (4 * %start) to i64)),+,2}<nsw><%loop>
+; CHECK-NEXT:  -->  {(2 + (sext i32 (4 * %start) to i64))<nsw>,+,2}<nsw><%loop>
   %idx.inc = add nsw i32 %idx, 2
   %condition = load i1, i1* %c
   br i1 %condition, label %exit, label %loop
@@ -71,7 +71,7 @@ define void @infer.sext.2(i1* %c, i8 %start) {
   %idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ]
   %idx.sext = sext i8 %idx to i16
 ; CHECK: %idx.sext = sext i8 %idx to i16
-; CHECK-NEXT: -->  {(1 + (sext i8 %start to i16)),+,1}<nsw><%loop>
+; CHECK-NEXT: -->  {(1 + (sext i8 %start to i16))<nsw>,+,1}<nsw><%loop>
   %idx.inc = add nsw i8 %idx, 1
   %condition = load volatile i1, i1* %c
   br i1 %condition, label %exit, label %loop
@@ -91,7 +91,7 @@ define void @infer.zext.1(i1* %c, i8 %start) {
   %idx = phi i8 [ %start.inc, %entry ], [ %idx.inc, %loop ]
   %idx.zext = zext i8 %idx to i16
 ; CHECK: %idx.zext = zext i8 %idx to i16
-; CHECK-NEXT: -->  {(1 + (zext i8 %start to i16)),+,1}<nuw><%loop>
+; CHECK-NEXT: -->  {(1 + (zext i8 %start to i16))<nuw><nsw>,+,1}<nuw><%loop>
   %idx.inc = add nuw i8 %idx, 1
   %condition = load volatile i1, i1* %c
   br i1 %condition, label %exit, label %loop
diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll
index 892fc23fe6b2..e8c1e33e095f 100644
--- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
+++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
@@ -33,7 +33,7 @@ bb2:                                              ; preds = %bb1
   %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
 ;                  min(N, i+3)
 ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
-; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nw><%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64))))))
+; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nuw><%bb1> to i64))<nsw>)<nsw> smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)<nsw>))<nsw>)<nsw>
   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
   %tmp12 = load i32, i32* %tmp11, align 4
   %tmp13 = shl nsw i32 %tmp12, 1
diff --git a/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
new file mode 100644
index 000000000000..c24d7173e4ee
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -0,0 +1,122 @@
+; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+
+!0 = !{i8 0, i8 127}
+
+define void @f0(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f0
+ entry:
+  %len = load i8, i8* %len_addr, !range !0
+  %len_norange = load i8, i8* %len_addr
+; CHECK:  %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT:  -->  %len U: [0,127) S: [0,127)
+; CHECK:  %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT:  -->  %len_norange U: full-set S: full-set
+
+  %t0 = add i8 %len, 1
+  %t1 = add i8 %len, 2
+; CHECK:  %t0 = add i8 %len, 1
+; CHECK-NEXT:  -->  (1 + %len)<nuw><nsw> U: [1,-128) S: [1,-128)
+; CHECK:  %t1 = add i8 %len, 2
+; CHECK-NEXT:  -->  (2 + %len)<nuw> U: [2,-127) S: [2,-127)
+
+  %t2 = sub i8 %len, 1
+  %t3 = sub i8 %len, 2
+; CHECK:  %t2 = sub i8 %len, 1
+; CHECK-NEXT:  -->  (-1 + %len)<nsw> U: [-1,126) S: [-1,126)
+; CHECK:  %t3 = sub i8 %len, 2
+; CHECK-NEXT:  -->  (-2 + %len)<nsw> U: [-2,125) S: [-2,125)
+
+  %q0 = add i8 %len_norange, 1
+  %q1 = add i8 %len_norange, 2
+; CHECK:  %q0 = add i8 %len_norange, 1
+; CHECK-NEXT:  -->  (1 + %len_norange) U: full-set S: full-set
+; CHECK:  %q1 = add i8 %len_norange, 2
+; CHECK-NEXT:  -->  (2 + %len_norange) U: full-set S: full-set
+
+  %q2 = sub i8 %len_norange, 1
+  %q3 = sub i8 %len_norange, 2
+; CHECK:  %q2 = sub i8 %len_norange, 1
+; CHECK-NEXT:  -->  (-1 + %len_norange) U: full-set S: full-set
+; CHECK:  %q3 = sub i8 %len_norange, 2
+; CHECK-NEXT:  -->  (-2 + %len_norange) U: full-set S: full-set
+
+  ret void
+}
+
+define void @f1(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f1
+ entry:
+  %len = load i8, i8* %len_addr, !range !0
+  %len_norange = load i8, i8* %len_addr
+; CHECK:  %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT:  -->  %len U: [0,127) S: [0,127)
+; CHECK:  %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT:  -->  %len_norange U: full-set S: full-set
+
+  %t0 = add i8 %len, -1
+  %t1 = add i8 %len, -2
+; CHECK:  %t0 = add i8 %len, -1
+; CHECK-NEXT:  -->  (-1 + %len)<nsw> U: [-1,126) S: [-1,126)
+; CHECK:  %t1 = add i8 %len, -2
+; CHECK-NEXT:  -->  (-2 + %len)<nsw> U: [-2,125) S: [-2,125)
+
+  %t0.sext = sext i8 %t0 to i16
+  %t1.sext = sext i8 %t1 to i16
+; CHECK:  %t0.sext = sext i8 %t0 to i16
+; CHECK-NEXT:  -->  (-1 + (zext i8 %len to i16))<nsw> U: [-1,126) S: [-1,126)
+; CHECK:  %t1.sext = sext i8 %t1 to i16
+; CHECK-NEXT:  -->  (-2 + (zext i8 %len to i16))<nsw> U: [-2,125) S: [-2,125)
+
+  %q0 = add i8 %len_norange, 1
+  %q1 = add i8 %len_norange, 2
+; CHECK:  %q0 = add i8 %len_norange, 1
+; CHECK-NEXT:  -->  (1 + %len_norange) U: full-set S: full-set
+; CHECK:  %q1 = add i8 %len_norange, 2
+; CHECK-NEXT:  -->  (2 + %len_norange) U: full-set S: full-set
+
+  %q0.sext = sext i8 %q0 to i16
+  %q1.sext = sext i8 %q1 to i16
+; CHECK:  %q0.sext = sext i8 %q0 to i16
+; CHECK-NEXT:  -->  (sext i8 (1 + %len_norange) to i16) U: [-128,128) S: [-128,128)
+; CHECK:  %q1.sext = sext i8 %q1 to i16
+; CHECK-NEXT:  -->  (sext i8 (2 + %len_norange) to i16) U: [-128,128) S: [-128,128)
+
+  ret void
+}
+
+define void @f2(i8* %len_addr) {
+; CHECK-LABEL: Classifying expressions for: @f2
+ entry:
+  %len = load i8, i8* %len_addr, !range !0
+  %len_norange = load i8, i8* %len_addr
+; CHECK:  %len = load i8, i8* %len_addr, !range !0
+; CHECK-NEXT:  -->  %len U: [0,127) S: [0,127)
+; CHECK:  %len_norange = load i8, i8* %len_addr
+; CHECK-NEXT:  -->  %len_norange U: full-set S: full-set
+
+  %t0 = add i8 %len, 1
+  %t1 = add i8 %len, 2
+; CHECK:  %t0 = add i8 %len, 1
+; CHECK-NEXT:  -->  (1 + %len)<nuw><nsw>
+; CHECK:  %t1 = add i8 %len, 2
+; CHECK-NEXT:  -->  (2 + %len)<nuw>
+
+  %t0.zext = zext i8 %t0 to i16
+  %t1.zext = zext i8 %t1 to i16
+; CHECK:  %t0.zext = zext i8 %t0 to i16
+; CHECK-NEXT: -->  (1 + (zext i8 %len to i16))<nuw><nsw> U: [1,128) S: [1,128)
+; CHECK:  %t1.zext = zext i8 %t1 to i16
+; CHECK-NEXT:  -->  (2 + (zext i8 %len to i16))<nuw><nsw> U: [2,129) S: [2,129)
+
+  %q0 = add i8 %len_norange, 1
+  %q1 = add i8 %len_norange, 2
+  %q0.zext = zext i8 %q0 to i16
+  %q1.zext = zext i8 %q1 to i16
+
+; CHECK:  %q0.zext = zext i8 %q0 to i16
+; CHECK-NEXT:  -->  (zext i8 (1 + %len_norange) to i16) U: [0,256) S: [0,256)
+; CHECK:  %q1.zext = zext i8 %q1 to i16
+; CHECK-NEXT:  -->  (zext i8 (2 + %len_norange) to i16) U: [0,256) S: [0,256)
+
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/non-IV-phi.ll b/test/Analysis/ScalarEvolution/non-IV-phi.ll
new file mode 100644
index 000000000000..f0d6c2f5d9d3
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/non-IV-phi.ll
@@ -0,0 +1,59 @@
+; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s
+
+define void @test1(i8 %t, i32 %len) {
+; CHECK-LABEL: test1
+; CHECK: %sphi = phi i32 [ %ext, %entry ], [ %idx.inc.ext, %loop ]
+; CHECK-NEXT:  -->  (zext i8 {%t,+,1}<%loop> to i32)
+
+ entry:
+  %st = zext i8 %t to i16
+  %ext = zext i8 %t to i32
+  %ecmp = icmp ult i16 %st, 42
+  br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+  %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+  %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
+
+  %idx.inc = add i8 %idx, 1
+  %idx.inc.ext = zext i8 %idx.inc to i32
+  %idx.ext = zext i8 %idx to i32
+
+  %c = icmp ult i32 %idx.inc.ext, %len
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+define void @test2(i8 %t, i32 %len) {
+; CHECK-LABEL: test2
+; CHECK: %sphi = phi i32 [ %ext.mul, %entry ], [ %mul, %loop ]
+; CHECK-NEXT:  -->  (4 * (zext i8 {%t,+,1}<%loop> to i32))
+
+ entry:
+  %st = zext i8 %t to i16
+  %ext = zext i8 %t to i32
+  %ext.mul = mul i32 %ext, 4
+
+  %ecmp = icmp ult i16 %st, 42
+  br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+  %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+  %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
+
+  %idx.inc = add i8 %idx, 1
+  %idx.inc.ext = zext i8 %idx.inc to i32
+  %mul = mul i32 %idx.inc.ext, 4
+
+  %idx.ext = zext i8 %idx to i32
+
+  %c = icmp ult i32 %idx.inc.ext, %len
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/pr24757.ll b/test/Analysis/ScalarEvolution/pr24757.ll
new file mode 100644
index 000000000000..815adcde0e9d
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr24757.ll
@@ -0,0 +1,35 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; CHECK: Loop %bb1: backedge-taken count is (zext i7 (trunc i8 %a.promoted to i7) to i8)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = global i8 -127, align 1
+@b = common global i32 0, align 4
+
+declare void @use(i32)
+
+define i32 @main() {
+bb:
+  %a.promoted = load i8, i8* @a
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i8 [ %tmp2, %bb1 ], [ %a.promoted, %bb ]
+  %tmp2 = add i8 %tmp, -1
+  %tmp3 = sext i8 %tmp to i32
+  %tmp4 = xor i32 %tmp3, -1
+  %tmp5 = sext i8 %tmp2 to i32
+  %tmpf = sub nsw i32 %tmp4, %tmp5
+  %tmp6 = trunc i32 %tmpf to i8
+  %tmp7 = icmp eq i8 %tmp6, 0
+  br i1 %tmp7, label %bb8, label %bb1
+
+bb8:                                              ; preds = %bb1
+  store i8 %tmp2, i8* @a
+  store i32 %tmp4, i32* @b
+  %tmp9 = sext i8 %tmp2 to i32
+  call void @use(i32 %tmp9)
+  ret i32 0
+}
diff --git a/test/Analysis/ScalarEvolution/pr25369.ll b/test/Analysis/ScalarEvolution/pr25369.ll
new file mode 100644
index 000000000000..10754867a368
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/pr25369.ll
@@ -0,0 +1,78 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @hoge1() {
+; CHECK-LABEL: Classifying expressions for: @hoge1
+bb:
+  br i1 undef, label %bb4, label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  br i1 false, label %bb4, label %bb2
+
+bb3:                                              ; preds = %bb4
+  %tmp = add i32 %tmp10, -1
+  br label %bb13
+
+bb4:                                              ; preds = %bb4, %bb2, %bb
+  %tmp5 = phi i64 [ %tmp11, %bb4 ], [ 1, %bb2 ], [ 1, %bb ]
+  %tmp6 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb2 ], [ 0, %bb ]
+  %tmp7 = load i32, i32* undef, align 4
+  %tmp8 = add i32 %tmp7, %tmp6
+  %tmp9 = add i32 undef, %tmp8
+  %tmp10 = add i32 undef, %tmp9
+  %tmp11 = add nsw i64 %tmp5, 3
+  %tmp12 = icmp eq i64 %tmp11, 64
+  br i1 %tmp12, label %bb3, label %bb4
+
+; CHECK: Loop %bb4: backedge-taken count is 20
+; CHECK: Loop %bb4: max backedge-taken count is 20
+
+bb13:                                             ; preds = %bb13, %bb3
+  %tmp14 = phi i64 [ 0, %bb3 ], [ %tmp15, %bb13 ]
+  %tmp15 = add nuw nsw i64 %tmp14, 1
+  %tmp16 = trunc i64 %tmp15 to i32
+  %tmp17 = icmp eq i32 %tmp16, %tmp
+  br i1 %tmp17, label %bb18, label %bb13
+
+bb18:                                             ; preds = %bb13
+  ret void
+}
+
+define void @hoge2() {
+; CHECK-LABEL: Classifying expressions for: @hoge2
+bb:
+  br i1 undef, label %bb4, label %bb2
+
+bb2:                                              ; preds = %bb2, %bb
+  br i1 false, label %bb4, label %bb2
+
+bb3:                                              ; preds = %bb4
+  %tmp = add i32 %tmp10, -1
+  br label %bb13
+
+bb4:                                              ; preds = %bb4, %bb2, %bb
+  %tmp5 = phi i64 [ %tmp11, %bb4 ], [ 1, %bb2 ], [ 3, %bb ]
+  %tmp6 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb2 ], [ 0, %bb ]
+  %tmp7 = load i32, i32* undef, align 4
+  %tmp8 = add i32 %tmp7, %tmp6
+  %tmp9 = add i32 undef, %tmp8
+  %tmp10 = add i32 undef, %tmp9
+  %tmp11 = add nsw i64 %tmp5, 3
+  %tmp12 = icmp eq i64 %tmp11, 64
+  br i1 %tmp12, label %bb3, label %bb4
+
+; CHECK: Loop %bb4: Unpredictable backedge-taken count.
+; CHECK: Loop %bb4: Unpredictable max backedge-taken count.
+
+bb13:                                             ; preds = %bb13, %bb3
+  %tmp14 = phi i64 [ 0, %bb3 ], [ %tmp15, %bb13 ]
+  %tmp15 = add nuw nsw i64 %tmp14, 1
+  %tmp16 = trunc i64 %tmp15 to i32
+  %tmp17 = icmp eq i32 %tmp16, %tmp
+  br i1 %tmp17, label %bb18, label %bb13
+
+bb18:                                             ; preds = %bb13
+  ret void
+}
diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll
index e2123f44f167..47a84d57c3b3 100644
--- a/test/Analysis/ScalarEvolution/scev-aa.ll
+++ b/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
+; RUN: opt -disable-output < %s -disable-basicaa -scev-aa -aa-eval -print-all-alias-modref-info \
 ; RUN:   2>&1 | FileCheck %s
 
 ; At the time of this writing, -basicaa misses the example of the form
diff --git a/test/Analysis/ScalarEvolution/shift-op.ll b/test/Analysis/ScalarEvolution/shift-op.ll
new file mode 100644
index 000000000000..fe832d567687
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/shift-op.ll
@@ -0,0 +1,164 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define void @test0(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test0
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = lshr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test1(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test1
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = shl i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test2(i32 %init) {
+; CHECK-LABEL: Determining loop execution counts for: @test2
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; Unpredictable because %iv could "stabilize" to either -1 or 0,
+; depending on %init.
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = ashr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test3(i32* %init.ptr) {
+; CHECK-LABEL: Determining loop execution counts for: @test3
+; CHECK: Loop %loop: max backedge-taken count is 32
+ entry:
+  %init = load i32, i32* %init.ptr, !range !0
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = ashr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test4(i32* %init.ptr) {
+; CHECK-LABEL: Classifying expressions for: @test4
+; CHECK-LABEL: Loop %loop: max backedge-taken count is 32
+ entry:
+  %init = load i32, i32* %init.ptr, !range !1
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = ashr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, -1
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test5(i32* %init.ptr) {
+; CHECK-LABEL: Determining loop execution counts for: @test5
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; %iv will "stabilize" to -1, so this is an infinite loop
+ entry:
+  %init = load i32, i32* %init.ptr, !range !1
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = ashr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test6(i32 %init, i32 %shift.amt) {
+; CHECK-LABEL: Determining loop execution counts for: @test6
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; Potentially infinite loop, since %shift.amt could be 0
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = lshr i32 %iv, %shift.amt
+  %exit.cond = icmp eq i32 %iv, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test7(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test7
+; CHECK: Loop %loop: max backedge-taken count is 32
+
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = lshr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv.shift, 0
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+define void @test8(i32 %init) {
+; CHECK-LABEL: Classifying expressions for: @test8
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; In this test case, %iv.test stabilizes to 127, not -1, so the loop
+; is infinite.
+
+ entry:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.shift, %loop ]
+  %iv.shift = ashr i32 %iv, 1
+  %iv.test = lshr i32 %iv, 1
+  %exit.cond = icmp eq i32 %iv.test, -1
+  br i1 %exit.cond, label %leave, label %loop
+
+ leave:
+  ret void
+}
+
+!0 = !{i32 0, i32 50000}
+!1 = !{i32 -5000, i32 -1}
diff --git a/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
new file mode 100644
index 000000000000..500f3e16c8f5
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
@@ -0,0 +1,128 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+define i32 @f0(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @f0
+ entry:
+  %c = icmp sgt i32 %y, 0
+  br i1 %c, label %add, label %merge
+
+ add:
+  %sum = add i32 %x, %y
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT:  -->  ((0 smax %y) + %x) U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @f1(i32 %x, i32 %y) {
+; CHECK-LABEL: Classifying expressions for: @f1
+ entry:
+  %c = icmp sge i32 %y, 0
+  br i1 %c, label %add, label %merge
+
+ add:
+  %sum = add i32 %x, %y
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %sum, %add ], [ %x, %entry ]
+; CHECK-NEXT:  -->  ((0 smax %y) + %x) U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @f2(i32 %x, i32 %y, i32* %ptr) {
+; CHECK-LABEL: Classifying expressions for: @f2
+ entry:
+  %c = icmp sge i32 %y, 0
+  br i1 %c, label %add, label %merge
+
+ add:
+  %lv = load i32, i32* %ptr
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %lv, %add ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %lv, %add ], [ %x, %entry ]
+; CHECK-NEXT:  -->  %v U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @f3(i32 %x, i32 %init, i32 %lim) {
+; CHECK-LABEL: Classifying expressions for: @f3
+ entry:
+  br label %loop
+
+loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.inc, %merge ]
+  %iv.inc = add i32 %iv, 1
+  %c = icmp sge i32 %iv, 0
+  br i1 %c, label %add, label %merge
+
+ add:
+  %sum = add i32 %x, %iv
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add ], [ %x, %loop ]
+; CHECK:  %v = phi i32 [ %sum, %add ], [ %x, %loop ]
+; CHECK-NEXT:  -->  ((0 smax {%init,+,1}<%loop>) + %x) U: full-set S: full-set
+  %be.cond = icmp eq i32 %iv.inc, %lim
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret i32 0
+}
+
+define i32 @f4(i32 %x, i32 %init, i32 %lim) {
+; CHECK-LABEL: Classifying expressions for: @f4
+ entry:
+  %c = icmp sge i32 %init, 0
+  br i1 %c, label %add, label %merge
+
+ add:
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ %init, %add ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  %be.cond = icmp eq i32 %iv.inc, %lim
+  br i1 %be.cond, label %loop, label %add.cont
+
+ add.cont:
+  %sum = add i32 %x, %iv
+  br label %merge
+
+ merge:
+  %v = phi i32 [ %sum, %add.cont ], [ %x, %entry ]
+; CHECK:  %v = phi i32 [ %sum, %add.cont ], [ %x, %entry ]
+; CHECK-NEXT:  -->  %v U: full-set S: full-set
+  ret i32 %v
+}
+
+define i32 @f5(i32* %val) {
+; CHECK-LABEL: Classifying expressions for: @f5
+entry:
+  br label %for.end
+
+for.condt:
+  br i1 true, label %for.cond.0, label %for.end
+
+for.end:
+  %inc = load i32, i32* %val
+  br i1 false, label %for.condt, label %for.cond.0
+
+for.cond.0:
+  %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+
+; CHECK:  %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+; CHECK-NEXT:  -->  %init U: full-set S: full-set
+
+; Matching "through" %init will break LCSSA at the SCEV expression
+; level.
+
+  ret i32 %init
+}
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index 1b75c88c753c..89750810d1b7 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -passes='print<scalar-evolution>' -disable-output 2>&1 | FileCheck %s
 ; PR1101
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -6,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @A = weak global [1000 x i32] zeroinitializer, align 32         
 
-; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1':
+; CHECK-LABEL: Determining loop execution counts for: @test1
 ; CHECK: backedge-taken count is 10000
 
 define void @test1(i32 %N) {
@@ -32,7 +33,7 @@ return:         ; preds = %bb5
 }
 
 ; PR22795
-; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test2':
+; CHECK-LABEL: Classifying expressions for: @test2
 ; CHECK:   %iv = phi i32 [ -1, %entry ], [ %next.1, %for.inc.1 ]
 ; CHECK-NEXT:  -->  {-1,+,2}<%preheader> U: full-set S: full-set             Exits: 13
 
diff --git a/test/Analysis/ScalarEvolution/zext-wrap.ll b/test/Analysis/ScalarEvolution/zext-wrap.ll
index f56e4556c697..5bc149e2309a 100644
--- a/test/Analysis/ScalarEvolution/zext-wrap.ll
+++ b/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -10,7 +10,7 @@ bb.i:           ; preds = %bb1.i, %bb.nph
 
 ; This cast shouldn't be folded into the addrec.
 ; CHECK: %tmp = zext i8 %l_95.0.i1 to i16
-; CHECK: -->  (zext i8 {0,+,-1}<%bb.i> to i16){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: 2
+; CHECK: -->  (zext i8 {0,+,-1}<nw><%bb.i> to i16){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: 2
 
         %tmp = zext i8 %l_95.0.i1 to i16
 
diff --git a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index 6c9439afeeab..fe2fdd74b411 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -30,7 +30,7 @@ define void @test1_yes(i32* %p) nounwind {
   ret void
 }
 
-; CHECK: define void @test1_no(i32* %p) #1 {
+; CHECK: define void @test1_no(i32* %p) #3 {
 define void @test1_no(i32* %p) nounwind {
   call void @callee(i32* %p), !tbaa !2
   ret void
@@ -72,9 +72,11 @@ define i32 @test3_no(i8* %p) nounwind {
 declare void @callee(i32* %p) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
 
-; CHECK: attributes #0 = { nounwind readnone }
-; CHECK: attributes #1 = { nounwind }
+; CHECK: attributes #0 = { norecurse nounwind readnone }
+; CHECK: attributes #1 = { norecurse nounwind }
 ; CHECK: attributes #2 = { nounwind readonly }
+; CHECK: attributes #3 = { nounwind }
+; CHECK: attributes #4 = { argmemonly nounwind }
 
 ; Root note.
 !0 = !{ }
diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
index 0c12cac27d3a..197ef7e5196f 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll
@@ -7,22 +7,23 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-
 
 ; CHECK:      define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
 ; CHECK-NEXT: entry:
-; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) [[NUW:#[0-9]+]]
-; CHECK-NEXT:   call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16)
+; CHECK-NEXT:   %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) [[NUW:#[0-9]+]]
+; CHECK-NEXT:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16)
 ; CHECK-NEXT:   %c = add <8 x i16> %a, %a
 define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) {
 entry:
-  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
-  call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1
-  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2
+  %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2
+  call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1
+  %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %p, i32 16) nounwind, !tbaa !2
   %c = add <8 x i16> %a, %b
   ret <8 x i16> %c
 }
 
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
 
-; CHECK: attributes #0 = { nounwind readonly }
+; CHECK: attributes #0 = { argmemonly nounwind readonly }
+; CHECK: attributes #1 = { argmemonly nounwind }
 ; CHECK: attributes [[NUW]] = { nounwind }
 
 !0 = !{!"tbaa root", null}
diff --git a/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
index fe07730577e6..d2aee58204df 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/licm.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -5,7 +5,7 @@
 
 ; CHECK: @foo
 ; CHECK:      entry:
-; CHECK-NEXT:   %tmp3 = load double*, double** @P, !tbaa !0
+; CHECK-NEXT:   %tmp3 = load double*, double** @P
 ; CHECK-NEXT:   br label %for.body
 
 @P = common global double* null
diff --git a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
index b2931cac75c8..c7cd2e274dff 100644
--- a/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
+++ b/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -1,9 +1,9 @@
-; RUN: opt -basicaa -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
-; RUN: opt -tbaa -basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
+; RUN: opt -tbaa -disable-basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
+; RUN: opt -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
 
 ; According to the TBAA metadata the load and store don't alias. However,
-; according to the actual code, they do. The order of the alias analysis
-; passes should determine which of these takes precedence.
+; according to the actual code, they do. Disabling basicaa shows the raw TBAA
+; results.
 
 target datalayout = "e-p:64:64:64"
 
diff --git a/test/Analysis/ValueTracking/known-bits-from-range-md.ll b/test/Analysis/ValueTracking/known-bits-from-range-md.ll
new file mode 100644
index 000000000000..e1de089b3501
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-bits-from-range-md.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test0(i8* %ptr) {
+; CHECK-LABEL: @test0(
+ entry:
+  %val = load i8, i8* %ptr, !range !{i8 -50, i8 0}
+  %and = and i8 %val, 128
+  %is.eq = icmp eq i8 %and, 128
+  ret i1 %is.eq
+; CHECK: ret i1 true
+}
+
+define i1 @test1(i8* %ptr) {
+; CHECK-LABEL: @test1(
+ entry:
+  %val = load i8, i8* %ptr, !range !{i8 64, i8 128}
+  %and = and i8 %val, 64
+  %is.eq = icmp eq i8 %and, 64
+  ret i1 %is.eq
+; CHECK: ret i1 true
+}
+
+define i1 @test2(i8* %ptr) {
+; CHECK-LABEL: @test2(
+ entry:
+; CHECK: load
+; CHECK: and
+; CHECK: icmp eq
+; CHECK: ret
+  %val = load i8, i8* %ptr, !range !{i8 64, i8 129}
+  %and = and i8 %val, 64
+  %is.eq = icmp eq i8 %and, 64
+  ret i1 %is.eq
+}
diff --git a/test/Analysis/ValueTracking/known-non-equal.ll b/test/Analysis/ValueTracking/known-non-equal.ll
new file mode 100644
index 000000000000..d28b3f4f63a3
--- /dev/null
+++ b/test/Analysis/ValueTracking/known-non-equal.ll
@@ -0,0 +1,21 @@
+; RUN: opt -instsimplify < %s -S | FileCheck %s
+
+; CHECK: define i1 @test
+define i1 @test(i8* %pq, i8 %B) {
+  %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+  %A = add nsw i8 %B, %q
+  %cmp = icmp eq i8 %A, %B
+  ; CHECK: ret i1 false
+  ret i1 %cmp
+}
+
+; CHECK: define i1 @test2
+define i1 @test2(i8 %a, i8 %b) {
+  %A = or i8 %a, 2    ; %A[1] = 1
+  %B = and i8 %b, -3  ; %B[1] = 0
+  %cmp = icmp eq i8 %A, %B ; %A[1] and %B[1] are contradictory.
+  ; CHECK: ret i1 false
+  ret i1 %cmp
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/knownnonzero-shift.ll b/test/Analysis/ValueTracking/knownnonzero-shift.ll
new file mode 100644
index 000000000000..e59d19cc2e26
--- /dev/null
+++ b/test/Analysis/ValueTracking/knownnonzero-shift.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define i1 @test(i8 %p, i8* %pq) {
+  %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+  %1 = shl i8 %p, %q              ; because %q is nonzero, %1[0] is known to be zero.
+  %2 = and i8 %1, 1
+  %x = icmp eq i8 %2, 0
+  ; CHECK: ret i1 true
+  ret i1 %x
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/knownzero-shift.ll b/test/Analysis/ValueTracking/knownzero-shift.ll
new file mode 100644
index 000000000000..835d87a9d9c1
--- /dev/null
+++ b/test/Analysis/ValueTracking/knownzero-shift.ll
@@ -0,0 +1,14 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define i1 @test(i8 %p, i8* %pq) {
+  %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
+  %1 = or i8 %p, 2                 ; %1[1] = 1
+  %2 = and i8 %1, 254              ; %2[0] = 0, %2[1] = 1
+  %A = lshr i8 %2, 1               ; We should know that %A is nonzero.
+  %x = icmp eq i8 %A, 0
+  ; CHECK: ret i1 false
+  ret i1 %x
+}
+
+!0 = !{ i8 1, i8 5 }
diff --git a/test/Analysis/ValueTracking/memory-dereferenceable.ll b/test/Analysis/ValueTracking/memory-dereferenceable.ll
index f49f4f77f404..5b45172f695e 100644
--- a/test/Analysis/ValueTracking/memory-dereferenceable.ll
+++ b/test/Analysis/ValueTracking/memory-dereferenceable.ll
@@ -5,73 +5,156 @@
 
 target datalayout = "e"
 
+%TypeOpaque = type opaque
+
 declare zeroext i1 @return_i1()
 
+declare i32* @foo()
 @globalstr = global [6 x i8] c"hello\00"
 @globali32ptr = external global i32*
 
 %struct.A = type { [8 x i8], [5 x i8] }
 @globalstruct = external global %struct.A
 
-define void @test(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
+@globalptr.align1 = external global i8, align 1
+@globalptr.align16 = external global i8, align 16
+
+; CHECK-LABEL: 'test'
+define void @test(i32 addrspace(1)* dereferenceable(8) %dparam,
+                  i8 addrspace(1)* dereferenceable(32) align 1 %dparam.align1,
+                  i8 addrspace(1)* dereferenceable(32) align 16 %dparam.align16)
+    gc "statepoint-example" {
 ; CHECK: The following are dereferenceable:
-; CHECK: %globalptr
-; CHECK: %alloca
-; CHECK: %dparam
-; CHECK: %relocate
-; CHECK-NOT: %nparam
-; CHECK-NOT: %nd_load
-; CHECK: %d4_load
-; CHECK-NOT: %d2_load
-; CHECK-NOT: %d_or_null_load
-; CHECK: %d_or_null_non_null_load
-; CHECK: %within_allocation
-; CHECK-NOT: %outside_allocation
 entry:
+; CHECK: %globalptr{{.*}}(aligned)
     %globalptr = getelementptr inbounds [6 x i8], [6 x i8]* @globalstr, i32 0, i32 0
     %load1 = load i8, i8* %globalptr
+
+; CHECK: %alloca{{.*}}(aligned)
     %alloca = alloca i1
     %load2 = load i1, i1* %alloca
+
+; CHECK: %dparam{{.*}}(aligned)
     %load3 = load i32, i32 addrspace(1)* %dparam
-    %tok = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
-    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok, i32 7, i32 7)
+
+; CHECK: %relocate{{.*}}(aligned)
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
     %load4 = load i32, i32 addrspace(1)* %relocate
+
+; CHECK-NOT: %nparam
     %nparam = getelementptr i32, i32 addrspace(1)* %dparam, i32 5
     %load5 = load i32, i32 addrspace(1)* %nparam
 
     ; Load from a non-dereferenceable load
+; CHECK-NOT: %nd_load
     %nd_load = load i32*, i32** @globali32ptr
     %load6 = load i32, i32* %nd_load
 
     ; Load from a dereferenceable load
+; CHECK: %d4_load{{.*}}(aligned)
     %d4_load = load i32*, i32** @globali32ptr, !dereferenceable !0
     %load7 = load i32, i32* %d4_load
 
     ; Load from an offset not covered by the dereferenceable portion
+; CHECK-NOT: %d2_load
     %d2_load = load i32*, i32** @globali32ptr, !dereferenceable !1
     %load8 = load i32, i32* %d2_load
 
     ; Load from a potentially null pointer with dereferenceable_or_null
+; CHECK-NOT: %d_or_null_load
     %d_or_null_load = load i32*, i32** @globali32ptr, !dereferenceable_or_null !0
     %load9 = load i32, i32* %d_or_null_load
 
     ; Load from a non-null pointer with dereferenceable_or_null
+; CHECK: %d_or_null_non_null_load{{.*}}(aligned)
     %d_or_null_non_null_load = load i32*, i32** @globali32ptr, !nonnull !2, !dereferenceable_or_null !0
     %load10 = load i32, i32* %d_or_null_non_null_load
 
     ; It's OK to overrun static array size as long as we stay within underlying object size
+; CHECK: %within_allocation{{.*}}(aligned)
     %within_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 0, i64 10
     %load11 = load i8, i8* %within_allocation
 
     ; GEP is outside the underlying object size
+; CHECK-NOT: %outside_allocation
     %outside_allocation = getelementptr inbounds %struct.A, %struct.A* @globalstruct, i64 0, i32 1, i64 10
     %load12 = load i8, i8* %outside_allocation
 
+    ; Loads from aligned globals
+; CHECK: @globalptr.align1{{.*}}(unaligned)
+; CHECK: @globalptr.align16{{.*}}(aligned)
+    %load13 = load i8, i8* @globalptr.align1, align 16
+    %load14 = load i8, i8* @globalptr.align16, align 16
+
+    ; Loads from aligned arguments
+; CHECK: %dparam.align1{{.*}}(unaligned)
+; CHECK: %dparam.align16{{.*}}(aligned)
+    %load15 = load i8, i8 addrspace(1)* %dparam.align1, align 16
+    %load16 = load i8, i8 addrspace(1)* %dparam.align16, align 16
+
+    ; Loads from aligned allocas
+; CHECK: %alloca.align1{{.*}}(unaligned)
+; CHECK: %alloca.align16{{.*}}(aligned)
+    %alloca.align1 = alloca i1, align 1
+    %alloca.align16 = alloca i1, align 16
+    %load17 = load i1, i1* %alloca.align1, align 16
+    %load18 = load i1, i1* %alloca.align16, align 16
+
+    ; Loads from GEPs
+; CHECK: %gep.align1.offset1{{.*}}(unaligned)
+; CHECK: %gep.align16.offset1{{.*}}(unaligned)
+; CHECK: %gep.align1.offset16{{.*}}(unaligned)
+; CHECK: %gep.align16.offset16{{.*}}(aligned)
+    %gep.align1.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 1
+    %gep.align16.offset1 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 1
+    %gep.align1.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align1, i32 16
+    %gep.align16.offset16 = getelementptr inbounds i8, i8 addrspace(1)* %dparam.align16, i32 16
+    %load19 = load i8, i8 addrspace(1)* %gep.align1.offset1, align 16
+    %load20 = load i8, i8 addrspace(1)* %gep.align16.offset1, align 16
+    %load21 = load i8, i8 addrspace(1)* %gep.align1.offset16, align 16
+    %load22 = load i8, i8 addrspace(1)* %gep.align16.offset16, align 16
+
+; CHECK-NOT: %no_deref_return
+; CHECK: %deref_return{{.*}}(unaligned)
+; CHECK: %deref_and_aligned_return{{.*}}(aligned)
+    %no_deref_return = call i32* @foo()
+    %deref_return = call dereferenceable(32) i32* @foo()
+    %deref_and_aligned_return = call dereferenceable(32) align 16 i32* @foo()
+    %load23 = load i32, i32* %no_deref_return
+    %load24 = load i32, i32* %deref_return, align 16
+    %load25 = load i32, i32* %deref_and_aligned_return, align 16
+
+    ; Load from a dereferenceable and aligned load
+; CHECK: %d4_unaligned_load{{.*}}(unaligned)
+; CHECK: %d4_aligned_load{{.*}}(aligned)
+    %d4_unaligned_load = load i32*, i32** @globali32ptr, !dereferenceable !0
+    %d4_aligned_load = load i32*, i32** @globali32ptr, !dereferenceable !0, !align !{i64 16}
+    %load26 = load i32, i32* %d4_unaligned_load, align 16
+    %load27 = load i32, i32* %d4_aligned_load, align 16
+
     ret void
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+; Just check that we don't crash.
+; CHECK-LABEL: 'opaque_type_crasher'
+define void @opaque_type_crasher(%TypeOpaque* dereferenceable(16) %a) {
+entry:
+  %bc = bitcast %TypeOpaque* %a to i8*
+  %ptr8 = getelementptr inbounds i8, i8* %bc, i32 8
+  %ptr32 = bitcast i8* %ptr8 to i32*
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %res = load i32, i32* %ptr32, align 4
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 
 !0 = !{i64 4}
 !1 = !{i64 2}
diff --git a/test/Analysis/ValueTracking/monotonic-phi.ll b/test/Analysis/ValueTracking/monotonic-phi.ll
new file mode 100644
index 000000000000..3204bda49f0b
--- /dev/null
+++ b/test/Analysis/ValueTracking/monotonic-phi.ll
@@ -0,0 +1,49 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test1
+define i1 @test1(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+  br label %loop
+loop:
+  %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+  %next = add nsw i8 %A, 1
+  %cmp1 = icmp eq i8 %A, %n
+  br i1 %cmp1, label %exit, label %loop
+exit:
+  %add = or i8 %A, %r
+  %cmp = icmp eq i8 %add, 0
+  ; CHECK: ret i1 false
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @test2
+define i1 @test2(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+  br label %loop
+loop:
+  %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+  %next = add i8 %A, 1
+  %cmp1 = icmp eq i8 %A, %n
+  br i1 %cmp1, label %exit, label %loop
+exit:
+  %add = or i8 %A, %r
+  %cmp = icmp eq i8 %add, 0
+  ; CHECK-NOT: ret i1 false
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @test3
+define i1 @test3(i8 %p, i8* %pq, i8 %n, i8 %r) {
+entry:
+  br label %loop
+loop:
+  %A = phi i8 [ 1, %entry ], [ %next, %loop ]
+  %next = add nuw i8 %A, 1
+  %cmp1 = icmp eq i8 %A, %n
+  br i1 %cmp1, label %exit, label %loop
+exit:
+  %add = or i8 %A, %r
+  %cmp = icmp eq i8 %add, 0
+  ; CHECK: ret i1 false
+  ret i1 %cmp
+}
diff --git a/test/Analysis/ValueTracking/pr24866.ll b/test/Analysis/ValueTracking/pr24866.ll
new file mode 100644
index 000000000000..b146b4ac0564
--- /dev/null
+++ b/test/Analysis/ValueTracking/pr24866.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S %s -value-tracking-dom-conditions -licm -load-combine | FileCheck %s
+; In pr24866.ll, we saw a crash when accessing a nullptr returned when
+; asking for a dominator tree Node.  This reproducer is really fragile,
+; but it's currently the best we have.
+
+%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183 = type { [256 x i32], [256 x i8] }
+
+
+; Function Attrs: nounwind uwtable
+define void @encode_one_blockX2(%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* nocapture readonly %actbl) #0 {
+; CHECK-LABEL: @encode_one_blockX2
+entry:
+  br i1 false, label %L_KLOOP_01, label %L_KLOOP.preheader
+
+L_KLOOP_01:                                       ; preds = %while.end, %entry
+  br label %L_KLOOP.preheader
+
+L_KLOOP_08:                                       ; preds = %while.end
+  br label %L_KLOOP.preheader
+
+L_KLOOP.preheader:                                ; preds = %L_KLOOP_08, %L_KLOOP_01, %entry
+  %r.2.ph = phi i32 [ undef, %L_KLOOP_08 ], [ 0, %entry ], [ undef, %L_KLOOP_01 ]
+  br label %L_KLOOP
+
+L_KLOOP:                                          ; preds = %while.end, %L_KLOOP.preheader
+  %r.2 = phi i32 [ 0, %while.end ], [ %r.2.ph, %L_KLOOP.preheader ]
+  br i1 true, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.body, %L_KLOOP
+  br label %while.body
+
+while.end:                                        ; preds = %L_KLOOP
+  %shl105 = shl i32 %r.2, 4
+  %add106 = add nsw i32 %shl105, undef
+  %idxprom107 = sext i32 %add106 to i64
+  %arrayidx108 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 0, i64 %idxprom107
+  %0 = load i32, i32* %arrayidx108, align 4
+  %arrayidx110 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 1, i64 %idxprom107
+  %1 = load i8, i8* %arrayidx110, align 1
+  indirectbr i8* undef, [label %L_KLOOP_DONE, label %L_KLOOP_01, label %L_KLOOP_08, label %L_KLOOP]
+
+L_KLOOP_DONE:                                     ; preds = %while.end
+  ret void
+}
diff --git a/test/Assembler/2007-09-10-AliasFwdRef.ll b/test/Assembler/2007-09-10-AliasFwdRef.ll
index 8e0a5718058c..882569b2f24e 100644
--- a/test/Assembler/2007-09-10-AliasFwdRef.ll
+++ b/test/Assembler/2007-09-10-AliasFwdRef.ll
@@ -3,7 +3,7 @@
 ; PR1645
 
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)    
-@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel
 
 
 
diff --git a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
index 8a6735fbca48..0a2462610acb 100644
--- a/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
+++ b/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-darwin10.2"
 @TestArrayPtr = global %struct.test* getelementptr inbounds ([10 x %struct.test], [10 x %struct.test]* @TestArray, i64 0, i64 3) ; <%struct.test**> [#uses=1]
 @TestArray = common global [10 x %struct.test] zeroinitializer, align 32 ; <[10 x %struct.test]*> [#uses=2]
 
-define i32 @main() nounwind readonly {
+define i32 @main() nounwind readonly !dbg !1 {
   %diff1 = alloca i64                             ; <i64*> [#uses=2]
 ; CHECK: call void @llvm.dbg.value(metadata i64 72,
   call void @llvm.dbg.declare(metadata i64* %diff1, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
@@ -25,9 +25,9 @@ define i32 @main() nounwind readonly {
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !7 = !{!1}
-!6 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !7)
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 2, scope: !1, file: !2, type: !5)
-!1 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !8, scope: !2, type: !3, function: i32 ()* @main)
+!6 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !7)
+!0 = !DILocalVariable(name: "c", line: 2, scope: !1, file: !2, type: !5)
+!1 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !8, scope: !2, type: !3)
 !2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
diff --git a/test/Assembler/ConstantExprFoldCast.ll b/test/Assembler/ConstantExprFoldCast.ll
index 094f87be92cf..dfe840cc37dd 100644
--- a/test/Assembler/ConstantExprFoldCast.ll
+++ b/test/Assembler/ConstantExprFoldCast.ll
@@ -15,3 +15,7 @@
 
 ; Address space cast AS0 null-> AS1 null
 @H = global i32 addrspace(1)* addrspacecast(i32* null to i32 addrspace(1)*)
+
+; Bitcast -> GEP
+@I = external global { i32 }
+@J = global i32* bitcast ({ i32 }* @I to i32*)
diff --git a/test/Assembler/ConstantExprNoFold.ll b/test/Assembler/ConstantExprNoFold.ll
index 38f8cbbc9cd8..6b06a8fbb75f 100644
--- a/test/Assembler/ConstantExprNoFold.ll
+++ b/test/Assembler/ConstantExprNoFold.ll
@@ -43,8 +43,8 @@ target datalayout = "p:32:32"
 @empty.cmp = global i1 icmp eq ([0 x i8]* @empty.1, [0 x i8]* @empty.2)
 
 ; Don't add an inbounds on @glob.a3, since it's not inbounds.
-; CHECK: @glob.a3 = alias getelementptr (i32, i32* @glob.a2, i32 1)
+; CHECK: @glob.a3 = alias i32, getelementptr (i32, i32* @glob.a2, i32 1)
 @glob = global i32 0
-@glob.a3 = alias getelementptr (i32, i32* @glob.a2, i32 1)
-@glob.a2 = alias getelementptr (i32, i32* @glob.a1, i32 1)
-@glob.a1 = alias i32* @glob
+@glob.a3 = alias i32, getelementptr (i32, i32* @glob.a2, i32 1)
+@glob.a2 = alias i32, getelementptr (i32, i32* @glob.a1, i32 1)
+@glob.a1 = alias i32, i32* @glob
diff --git a/test/Assembler/addrspacecast-alias.ll b/test/Assembler/addrspacecast-alias.ll
index 745e525c15f4..32eb5b9d4868 100644
--- a/test/Assembler/addrspacecast-alias.ll
+++ b/test/Assembler/addrspacecast-alias.ll
@@ -4,5 +4,5 @@
 ; Test that global aliases are allowed to be constant addrspacecast
 
 @i = internal addrspace(1) global i8 42
-@ia = internal alias addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
-; CHECK: @ia = internal alias addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
+@ia = internal alias i8 addrspace(2)*, addrspacecast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(3)*)
+; CHECK: @ia = internal alias i8 addrspace(2)*, addrspacecast (i8 addrspace(2)* addrspace(1)* bitcast (i8 addrspace(1)* @i to i8 addrspace(2)* addrspace(1)*) to i8 addrspace(2)* addrspace(3)*)
diff --git a/test/Assembler/alias-redefinition.ll b/test/Assembler/alias-redefinition.ll
index 19ad85bf5f5b..3c36c81d8138 100644
--- a/test/Assembler/alias-redefinition.ll
+++ b/test/Assembler/alias-redefinition.ll
@@ -1,7 +1,7 @@
 ; RUN: not llvm-as %s 2>&1 | FileCheck %s
 
-; CHECK: error: redefinition of global named '@bar'
+; CHECK: error: redefinition of global '@bar'
 
 @foo = global i32 0
-@bar = alias i32* @foo
-@bar = alias i32* @foo
+@bar = alias i32, i32* @foo
+@bar = alias i32, i32* @foo
diff --git a/test/Assembler/alias-use-list-order.ll b/test/Assembler/alias-use-list-order.ll
index c72bad28e500..7e72e450a750 100644
--- a/test/Assembler/alias-use-list-order.ll
+++ b/test/Assembler/alias-use-list-order.ll
@@ -6,6 +6,6 @@
 @alias.ref2 = global i32* getelementptr inbounds (i32, i32* @alias, i64 1)
 
 ; Aliases.
-@alias = alias i32* @global
-@alias.ref3 = alias i32* getelementptr inbounds (i32, i32* @alias, i64 1)
-@alias.ref4 = alias i32* getelementptr inbounds (i32, i32* @alias, i64 1)
+@alias = alias i32, i32* @global
+@alias.ref3 = alias i32, getelementptr inbounds (i32, i32* @alias, i64 1)
+@alias.ref4 = alias i32, getelementptr inbounds (i32, i32* @alias, i64 1)
diff --git a/test/Assembler/anon-functions.ll b/test/Assembler/anon-functions.ll
index 42eea837227a..2352a00f57d2 100644
--- a/test/Assembler/anon-functions.ll
+++ b/test/Assembler/anon-functions.ll
@@ -5,8 +5,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
-@f = alias void ()* @0		; <void ()*> [#uses=0]
-@g = alias void ()* @1		; <void ()*> [#uses=0]
+@f = alias void (), void ()* @0		; <void ()*> [#uses=0]
+@g = alias void (), void ()* @1		; <void ()*> [#uses=0]
 @h = external global void ()* 		; <void ()*> [#uses=0]
 
 define internal void @0() nounwind {
diff --git a/test/Assembler/debug-info.ll b/test/Assembler/debug-info.ll
index 91dfe561a2fe..86630840dc20 100644
--- a/test/Assembler/debug-info.ll
+++ b/test/Assembler/debug-info.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; RUN: verify-uselistorder %s
 
-; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30}
+; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34}
 
 ; CHECK:      !0 = !DISubrange(count: 3)
 ; CHECK-NEXT: !1 = !DISubrange(count: 3, lowerBound: 4)
@@ -63,10 +63,19 @@
 !25 = !DICompositeType(tag: DW_TAG_structure_type)
 !26 = !DICompositeType(tag: DW_TAG_structure_type, runtimeLang: 6)
 
-; !25 = !{!7, !7}
-; !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25)
-; !27 = !DISubroutineType(types: !25)
+; CHECK-NEXT: !25 = !{!6, !6}
+; CHECK-NEXT: !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25)
+; CHECK-NEXT: !27 = !DISubroutineType(types: !25)
 !27 = !{!7, !7}
 !28 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !27)
 !29 = !DISubroutineType(flags: 0, types: !27)
 !30 = !DISubroutineType(types: !27)
+
+; CHECK-NEXT: !28 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value")
+; CHECK-NEXT: !29 = distinct !{!28}
+; CHECK-NEXT: !30 = !DIMacroFile(line: 9, file: !12, nodes: !29)
+; CHECK-NEXT: !31 = !DIMacroFile(line: 11, file: !12)
+!31 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value")
+!32 = distinct !{!31}
+!33 = !DIMacroFile(line: 9, file: !14, nodes: !32)
+!34 = !DIMacroFile(type: DW_MACINFO_start_file, line: 11, file: !14)
diff --git a/test/Assembler/dicompileunit.ll b/test/Assembler/dicompileunit.ll
index dc136f0b83e9..92fa61fe6b90 100644
--- a/test/Assembler/dicompileunit.ll
+++ b/test/Assembler/dicompileunit.ll
@@ -1,8 +1,8 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; RUN: verify-uselistorder %s
 
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !7, !8, !8}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 
 !0 = distinct !{}
 !1 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
@@ -11,21 +11,16 @@
 !4 = distinct !{}
 !5 = distinct !{}
 !6 = distinct !{}
+!7 = distinct !{}
 
-; CHECK: !7 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, dwoId: 42)
-!7 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
-                    isOptimized: true, flags: "-O2", runtimeVersion: 2,
-                    splitDebugFilename: "abc.debug", emissionKind: 3,
-                    enums: !2, retainedTypes: !3, subprograms: !4,
-                    globals: !5, imports: !6, dwoId: 42)
-!8 = !DICompileUnit(language: 12, file: !1, producer: "clang",
-                    isOptimized: true, flags: "-O2", runtimeVersion: 2,
-                    splitDebugFilename: "abc.debug", emissionKind: 3,
-                    enums: !2, retainedTypes: !3, subprograms: !4,
-                    globals: !5, imports: !6, dwoId: 42)
+; CHECK: !8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, macros: !7, dwoId: 42)
+!8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang",
+                             isOptimized: true, flags: "-O2", runtimeVersion: 2,
+                             splitDebugFilename: "abc.debug", emissionKind: 3,
+                             enums: !2, retainedTypes: !3, subprograms: !4,
+                             globals: !5, imports: !6, macros: !7, dwoId: 42)
 
-; CHECK: !8 = !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0)
-!9 = !DICompileUnit(language: 12, file: !1, producer: "",
-                    isOptimized: false, flags: "", runtimeVersion: 0,
-                    splitDebugFilename: "", emissionKind: 0)
-!10 = !DICompileUnit(language: 12, file: !1)
+; CHECK: !9 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0)
+!9 = distinct !DICompileUnit(language: 12, file: !1, producer: "",
+                             isOptimized: false, flags: "", runtimeVersion: 0,
+                             splitDebugFilename: "", emissionKind: 0)
diff --git a/test/Assembler/diimportedentity.ll b/test/Assembler/diimportedentity.ll
index 929267e3b25e..334eb20daede 100644
--- a/test/Assembler/diimportedentity.ll
+++ b/test/Assembler/diimportedentity.ll
@@ -4,9 +4,9 @@
 ; CHECK: !named = !{!0, !1, !2, !3, !3}
 !named = !{!0, !1, !2, !3, !4}
 
-; CHECK:      !0 = !DISubprogram({{.*}})
+; CHECK:      !0 = distinct !DISubprogram({{.*}})
 ; CHECK-NEXT: !1 = !DICompositeType({{.*}})
-!0 = !DISubprogram(name: "foo")
+!0 = distinct !DISubprogram(name: "foo")
 !1 = !DICompositeType(tag: DW_TAG_structure_type, name: "Class", size: 32, align: 32)
 
 ; CHECK-NEXT: !2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0, entity: !1, line: 7)
diff --git a/test/Assembler/dilexicalblock.ll b/test/Assembler/dilexicalblock.ll
index 2cab372384ca..9e1ab64fd51e 100644
--- a/test/Assembler/dilexicalblock.ll
+++ b/test/Assembler/dilexicalblock.ll
@@ -5,7 +5,7 @@
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 
 !0 = distinct !{}
-!1 = !DISubprogram(name: "foo", scope: !2)
+!1 = distinct !DISubprogram(name: "foo", scope: !2)
 !2 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
 
 ; CHECK: !3 = !DILexicalBlock(scope: !1, file: !2, line: 7, column: 35)
diff --git a/test/Assembler/dilocalvariable-arg-large.ll b/test/Assembler/dilocalvariable-arg-large.ll
index 7788186a54ab..a4c3f8c97630 100644
--- a/test/Assembler/dilocalvariable-arg-large.ll
+++ b/test/Assembler/dilocalvariable-arg-large.ll
@@ -6,5 +6,5 @@
 
 !0 = distinct !DISubprogram()
 
-; CHECK: !1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 65535, scope: !0)
+; CHECK: !1 = !DILocalVariable(name: "foo", arg: 65535, scope: !0)
+!1 = !DILocalVariable(name: "foo", arg: 65535, scope: !0)
diff --git a/test/Assembler/dilocalvariable.ll b/test/Assembler/dilocalvariable.ll
index 312373ca8623..d286dd92a402 100644
--- a/test/Assembler/dilocalvariable.ll
+++ b/test/Assembler/dilocalvariable.ll
@@ -12,15 +12,15 @@
 !3 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !4 = !DILocation(scope: !0)
 
-; CHECK: !5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
-; CHECK: !6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "foo", arg: 3,
+; CHECK: !5 = !DILocalVariable(name: "foo", arg: 3, scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
+; CHECK: !6 = !DILocalVariable(name: "foo", scope: !0, file: !2, line: 7, type: !3, flags: DIFlagArtificial)
+!5 = !DILocalVariable(name: "foo", arg: 3,
                       scope: !0, file: !2, line: 7, type: !3,
                       flags: DIFlagArtificial)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !0,
+!6 = !DILocalVariable(name: "foo", scope: !0,
                       file: !2, line: 7, type: !3, flags: DIFlagArtificial)
 
-; CHECK: !7 = !DILocalVariable(tag: DW_TAG_arg_variable, arg: 0, scope: !0)
-; CHECK: !8 = !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !0)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0)
+; CHECK: !7 = !DILocalVariable(arg: 1, scope: !0)
+; CHECK: !8 = !DILocalVariable(scope: !0)
+!7 = !DILocalVariable(scope: !0, arg: 1)
+!8 = !DILocalVariable(scope: !0)
diff --git a/test/Assembler/dilocation.ll b/test/Assembler/dilocation.ll
index a468f8abe9c6..b177be590246 100644
--- a/test/Assembler/dilocation.ll
+++ b/test/Assembler/dilocation.ll
@@ -4,8 +4,8 @@
 ; CHECK: !named = !{!0, !1, !1, !2, !2, !3, !3, !4}
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7}
 
-; CHECK: !0 = !DISubprogram(
-!0 = !DISubprogram()
+; CHECK: !0 = distinct !DISubprogram(
+!0 = distinct !DISubprogram()
 
 ; CHECK-NEXT: !1 = !DILocation(line: 3, column: 7, scope: !0)
 !1 = !DILocation(line: 3, column: 7, scope: !0)
diff --git a/test/Assembler/disubprogram.ll b/test/Assembler/disubprogram.ll
index 3fa1081889ff..af60f52eb210 100644
--- a/test/Assembler/disubprogram.ll
+++ b/test/Assembler/disubprogram.ll
@@ -1,7 +1,10 @@
 ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
 ; RUN: verify-uselistorder %s
 
-declare void @_Z3foov()
+; CHECK: define void @_Z3foov() !dbg !9
+define void @_Z3foov() !dbg !9 {
+  ret void
+}
 
 ; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
@@ -12,17 +15,21 @@ declare void @_Z3foov()
 !3 = !DISubroutineType(types: !0)
 !4 = distinct !DICompositeType(tag: DW_TAG_structure_type)
 !5 = distinct !{}
-!6 = distinct !DISubprogram(isDefinition: false)
-!7 = distinct !{}
+!6 = distinct !{}
 
-; CHECK: !8 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, file: !2, line: 7, type: !3, isLocal: true, isDefinition: false, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov, templateParams: !5, declaration: !6, variables: !7)
-!8 = !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
-                   file: !2, line: 7, type: !3, isLocal: true,
-                   isDefinition: false, scopeLine: 8, containingType: !4,
-                   virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
-                   flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3foov,
-                   templateParams: !5, declaration: !6, variables: !7)
+; CHECK: !7 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!7 = distinct !DISubprogram()
 
-; CHECK: !9 = !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
-!9 = !DISubprogram()
+; CHECK: !8 = !DISubprogram(scope: null, isLocal: false, isDefinition: false, isOptimized: false)
+!8 = !DISubprogram(isDefinition: false)
 
+; CHECK: !9 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1, file: !2, line: 7, type: !3, isLocal: true, isDefinition: true, scopeLine: 8, containingType: !4, virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10, flags: DIFlagPrototyped, isOptimized: true, templateParams: !5, declaration: !8, variables: !6)
+!9 = distinct !DISubprogram(name: "foo", linkageName: "_Zfoov", scope: !1,
+                            file: !2, line: 7, type: !3, isLocal: true,
+                            isDefinition: true, scopeLine: 8, containingType: !4,
+                            virtuality: DW_VIRTUALITY_pure_virtual, virtualIndex: 10,
+                            flags: DIFlagPrototyped, isOptimized: true,
+                            templateParams: !5, declaration: !8, variables: !6)
+
+!10 = !{i32 1, !"Debug Info Version", i32 3}
+!llvm.module.flags = !{!10}
diff --git a/test/Assembler/drop-debug-info.ll b/test/Assembler/drop-debug-info.ll
index 44a7b78b004d..13e0c32718e7 100644
--- a/test/Assembler/drop-debug-info.ll
+++ b/test/Assembler/drop-debug-info.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-dis < %t.bc | FileCheck %s
 ; RUN: verify-uselistorder < %t.bc
 
-define i32 @main() {
+define i32 @main() !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -12,11 +12,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 195495) (llvm/trunk 195495:195504M)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "../llvm/tools/clang/test/CodeGen/debug-info-version.c", directory: "/Users/manmanren/llvm_gmail/release")
 !2 = !{i32 0}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "../llvm/tools/clang/test/CodeGen/debug-info-version.c", directory: "/Users/manmanren/llvm_gmail/release")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/Assembler/global-addrspace-forwardref.ll b/test/Assembler/global-addrspace-forwardref.ll
index 4a036e08be6a..c66b5b73066f 100644
--- a/test/Assembler/global-addrspace-forwardref.ll
+++ b/test/Assembler/global-addrspace-forwardref.ll
@@ -7,3 +7,12 @@
 ; CHECK: @a = addrspace(1) global i8 0
 @a2 = global i8 addrspace(1)* @a
 @a = addrspace(1) global i8 0
+
+; Now test with global IDs instead of global names.
+
+; CHECK: @a3 = global i8 addrspace(1)* @0
+; CHECK: @0 = addrspace(1) global i8 0
+
+@a3 = global i8 addrspace(1)* @0
+@0 = addrspace(1) global i8 0
+
diff --git a/test/Assembler/incorrect-tdep-attrs-parsing.ll b/test/Assembler/incorrect-tdep-attrs-parsing.ll
new file mode 100644
index 000000000000..bf8152b69dc0
--- /dev/null
+++ b/test/Assembler/incorrect-tdep-attrs-parsing.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+; CHECK: define i32 @f(i64 "foo bar", i64, i64, i64 "xyz") {
+define i32 @f(i64 "foo bar", i64, i64, i64 "xyz") {
+  ret i32 41
+}
diff --git a/test/Assembler/internal-hidden-alias.ll b/test/Assembler/internal-hidden-alias.ll
index df547c0838cb..f5bf4fb869e4 100644
--- a/test/Assembler/internal-hidden-alias.ll
+++ b/test/Assembler/internal-hidden-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = internal hidden alias i32* @global
+@alias = internal hidden alias i32, i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/internal-protected-alias.ll b/test/Assembler/internal-protected-alias.ll
index 46a05ec732f5..37e90a0de1c3 100644
--- a/test/Assembler/internal-protected-alias.ll
+++ b/test/Assembler/internal-protected-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = internal protected alias i32* @global
+@alias = internal protected alias i32, i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/invalid-alias-mismatched-explicit-type.ll b/test/Assembler/invalid-alias-mismatched-explicit-type.ll
new file mode 100644
index 000000000000..d28223793082
--- /dev/null
+++ b/test/Assembler/invalid-alias-mismatched-explicit-type.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+; CHECK: <stdin>:4:12: error: explicit pointee type doesn't match operand's pointee type
+@y = global i2 0
+@x = alias i1, i2* @y
diff --git a/test/Assembler/invalid-dicompileunit-language-bad.ll b/test/Assembler/invalid-dicompileunit-language-bad.ll
index e6f49f3fba47..98c04833e78a 100644
--- a/test/Assembler/invalid-dicompileunit-language-bad.ll
+++ b/test/Assembler/invalid-dicompileunit-language-bad.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:31: error: invalid DWARF language 'DW_LANG_NoSuchLanguage'
-!0 = !DICompileUnit(language: DW_LANG_NoSuchLanguage,
-                    file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:40: error: invalid DWARF language 'DW_LANG_NoSuchLanguage'
+!0 = distinct !DICompileUnit(language: DW_LANG_NoSuchLanguage,
+                             file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-language-overflow.ll b/test/Assembler/invalid-dicompileunit-language-overflow.ll
index c558f7aaa258..26b98e10d350 100644
--- a/test/Assembler/invalid-dicompileunit-language-overflow.ll
+++ b/test/Assembler/invalid-dicompileunit-language-overflow.ll
@@ -1,9 +1,9 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
 ; CHECK-NOT: error:
-!0 = !DICompileUnit(language: 65535,
-                    file: !DIFile(filename: "a", directory: "b"))
+!0 = distinct !DICompileUnit(language: 65535,
+                             file: !DIFile(filename: "a", directory: "b"))
 
-; CHECK: <stdin>:[[@LINE+1]]:31: error: value for 'language' too large, limit is 65535
-!1 = !DICompileUnit(language: 65536,
-                    file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:40: error: value for 'language' too large, limit is 65535
+!1 = distinct !DICompileUnit(language: 65536,
+                             file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-missing-language.ll b/test/Assembler/invalid-dicompileunit-missing-language.ll
index 15631b7f640b..8e4cb0261dbb 100644
--- a/test/Assembler/invalid-dicompileunit-missing-language.ll
+++ b/test/Assembler/invalid-dicompileunit-missing-language.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:65: error: missing required field 'language'
-!0 = !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
+; CHECK: <stdin>:[[@LINE+1]]:74: error: missing required field 'language'
+!0 = distinct !DICompileUnit(file: !DIFile(filename: "a", directory: "b"))
diff --git a/test/Assembler/invalid-dicompileunit-null-file.ll b/test/Assembler/invalid-dicompileunit-null-file.ll
index cc1892e91465..450584f3d715 100644
--- a/test/Assembler/invalid-dicompileunit-null-file.ll
+++ b/test/Assembler/invalid-dicompileunit-null-file.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:27: error: 'file' cannot be null
-!0 = !DICompileUnit(file: null)
+; CHECK: <stdin>:[[@LINE+1]]:36: error: 'file' cannot be null
+!0 = distinct !DICompileUnit(file: null)
diff --git a/test/Assembler/invalid-dicompileunit-uniqued.ll b/test/Assembler/invalid-dicompileunit-uniqued.ll
new file mode 100644
index 000000000000..18ae5c5ff01f
--- /dev/null
+++ b/test/Assembler/invalid-dicompileunit-uniqued.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:6: error: missing 'distinct', required for !DICompileUnit
+!0 = !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "file", directory: "/dir"))
diff --git a/test/Assembler/invalid-dilocalvariable-arg-large.ll b/test/Assembler/invalid-dilocalvariable-arg-large.ll
index d62da601e13c..e6e4a76b1c1e 100644
--- a/test/Assembler/invalid-dilocalvariable-arg-large.ll
+++ b/test/Assembler/invalid-dilocalvariable-arg-large.ll
@@ -1,6 +1,6 @@
 ; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65535)
+!0 = !DILocalVariable(scope: !{}, arg: 65535)
 
-; CHECK: <stdin>:[[@LINE+1]]:66: error: value for 'arg' too large, limit is 65535
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 65536)
+; CHECK: <stdin>:[[@LINE+1]]:40: error: value for 'arg' too large, limit is 65535
+!1 = !DILocalVariable(scope: !{}, arg: 65536)
diff --git a/test/Assembler/invalid-dilocalvariable-arg-negative.ll b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
index 08e370a36660..9cf8e973db21 100644
--- a/test/Assembler/invalid-dilocalvariable-arg-negative.ll
+++ b/test/Assembler/invalid-dilocalvariable-arg-negative.ll
@@ -1,6 +1,7 @@
 ; RUN: not llvm-as < %s 2>&1 | FileCheck %s
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: 0)
+!0 = !DILocalVariable(scope: !{}, arg: 1)
+!1 = !DILocalVariable(scope: !{})
 
-; CHECK: <stdin>:[[@LINE+1]]:66: error: expected unsigned integer
-!1 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !{}, arg: -1)
+; CHECK: <stdin>:[[@LINE+1]]:40: error: expected unsigned integer
+!2 = !DILocalVariable(scope: !{}, arg: -1)
diff --git a/test/Assembler/invalid-dilocalvariable-missing-scope.ll b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
index 45dcad713609..86bf52ddf2d3 100644
--- a/test/Assembler/invalid-dilocalvariable-missing-scope.ll
+++ b/test/Assembler/invalid-dilocalvariable-missing-scope.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 
-; CHECK: <stdin>:[[@LINE+1]]:48: error: missing required field 'scope'
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable)
+; CHECK: <stdin>:[[@LINE+1]]:23: error: missing required field 'scope'
+!0 = !DILocalVariable()
diff --git a/test/Assembler/invalid-dilocalvariable-missing-tag.ll b/test/Assembler/invalid-dilocalvariable-missing-tag.ll
deleted file mode 100644
index 18062edf75bd..000000000000
--- a/test/Assembler/invalid-dilocalvariable-missing-tag.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
-
-; CHECK: <stdin>:[[@LINE+1]]:45: error: missing required field 'tag'
-!0 = !DILocalVariable(scope: !DISubprogram())
diff --git a/test/Assembler/invalid-disubprogram-uniqued-definition.ll b/test/Assembler/invalid-disubprogram-uniqued-definition.ll
new file mode 100644
index 000000000000..c146883d6649
--- /dev/null
+++ b/test/Assembler/invalid-disubprogram-uniqued-definition.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
+
+; CHECK: <stdin>:[[@LINE+1]]:6: error: missing 'distinct', required for !DISubprogram when 'isDefinition'
+!0 = !DISubprogram(isDefinition: true)
diff --git a/test/Assembler/invalid-fp80hex.ll b/test/Assembler/invalid-fp80hex.ll
new file mode 100644
index 000000000000..70c518dd648e
--- /dev/null
+++ b/test/Assembler/invalid-fp80hex.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: 24640
+; CHECK: expected '=' in global variable
+
+@- 0xKate potb8ed
diff --git a/test/Assembler/invalid-fwdref2.ll b/test/Assembler/invalid-fwdref2.ll
index d823481f8e1b..0828cd9b08ac 100644
--- a/test/Assembler/invalid-fwdref2.ll
+++ b/test/Assembler/invalid-fwdref2.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-as %s -disable-output 2>&1 | grep "forward reference and definition of global have different types"
 
-@a2 = alias void ()* @g2
+@a2 = alias void (), void ()* @g2
 @g2 = internal global i8 42
diff --git a/test/Assembler/invalid-inline-constraint.ll b/test/Assembler/invalid-inline-constraint.ll
new file mode 100644
index 000000000000..000fb86a8f79
--- /dev/null
+++ b/test/Assembler/invalid-inline-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: https://llvm.org/bugs/show_bug.cgi?id=24646
+; CHECK: error: invalid type for inline asm constraint string
+
+define void @foo() nounwind {
+call void asm sideeffect "mov x0, #42","=~{x0},~{x19},mov |0,{x19},mov x0, #4~x{21}"()ounwi #4~x{21}"()ounwindret
diff --git a/test/Assembler/invalid-untyped-metadata.ll b/test/Assembler/invalid-untyped-metadata.ll
new file mode 100644
index 000000000000..5a97ae659648
--- /dev/null
+++ b/test/Assembler/invalid-untyped-metadata.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+
+; Tests bug: https://llvm.org/bugs/show_bug.cgi?id=24645
+; CHECK: error: invalid type for inline asm constraint string
+
+     !3=!    {%..d04 *asm" !6!={!H)4" ,""  
diff --git a/test/Assembler/invalid-uselistorder-indexes-duplicated.ll b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
index e4affc53c1f6..4ab4fbdfcce9 100644
--- a/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-duplicated.ll
@@ -1,7 +1,7 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: expected distinct uselistorder indexes in range [0, size)
 @global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
 uselistorder i32* @global, { 0, 0, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-one.ll b/test/Assembler/invalid-uselistorder-indexes-one.ll
index f5eac80a3ca2..f61809f7fea2 100644
--- a/test/Assembler/invalid-uselistorder-indexes-one.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-one.ll
@@ -1,5 +1,5 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: value only has one use
 @global = global i32 0
-@alias = alias i32* @global
+@alias = alias i32, i32* @global
 uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-ordered.ll b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
index 7bdc40037afe..e7a17b0e3abe 100644
--- a/test/Assembler/invalid-uselistorder-indexes-ordered.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-ordered.ll
@@ -1,7 +1,7 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: expected uselistorder indexes to change the order
 @global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
 uselistorder i32* @global, { 0, 1, 2 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-range.ll b/test/Assembler/invalid-uselistorder-indexes-range.ll
index fc97acac5e63..7c3ab116bd81 100644
--- a/test/Assembler/invalid-uselistorder-indexes-range.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-range.ll
@@ -1,7 +1,7 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: expected distinct uselistorder indexes in range [0, size)
 @global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
 uselistorder i32* @global, { 0, 3, 1 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toofew.ll b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
index 88a76fc568a9..fd9ff8029f9a 100644
--- a/test/Assembler/invalid-uselistorder-indexes-toofew.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-toofew.ll
@@ -1,7 +1,7 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: wrong number of indexes, expected 3
 @global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
-@alias3 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
+@alias3 = alias i32, i32* @global
 uselistorder i32* @global, { 1, 0 }
diff --git a/test/Assembler/invalid-uselistorder-indexes-toomany.ll b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
index a2cf3da0bd38..ba8481d61aa6 100644
--- a/test/Assembler/invalid-uselistorder-indexes-toomany.ll
+++ b/test/Assembler/invalid-uselistorder-indexes-toomany.ll
@@ -1,6 +1,6 @@
 ; RUN: not llvm-as < %s -disable-output 2>&1 | FileCheck %s
 ; CHECK: error: wrong number of indexes, expected 2
 @global = global i32 0
-@alias1 = alias i32* @global
-@alias2 = alias i32* @global
+@alias1 = alias i32, i32* @global
+@alias2 = alias i32, i32* @global
 uselistorder i32* @global, { 1, 0, 2 }
diff --git a/test/Assembler/metadata.ll b/test/Assembler/metadata.ll
index 052ac1b5097e..8c7781e887dd 100644
--- a/test/Assembler/metadata.ll
+++ b/test/Assembler/metadata.ll
@@ -30,7 +30,7 @@ define void @test_attachment_name() {
 }
 
 !0 = !DILocation(line: 662302, column: 26, scope: !1)
-!1 = !DISubprogram(name: "foo")
+!1 = distinct !DISubprogram(name: "foo")
 !2 = distinct !{}
 !3 = distinct !{}
 !4 = distinct !{}
diff --git a/test/Assembler/private-hidden-alias.ll b/test/Assembler/private-hidden-alias.ll
index 2e770e58784e..eac27f488e6b 100644
--- a/test/Assembler/private-hidden-alias.ll
+++ b/test/Assembler/private-hidden-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = private hidden alias i32* @global
+@alias = private hidden alias i32, i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/private-protected-alias.ll b/test/Assembler/private-protected-alias.ll
index f1824a2f3c7c..37bdabaf8a5e 100644
--- a/test/Assembler/private-protected-alias.ll
+++ b/test/Assembler/private-protected-alias.ll
@@ -2,5 +2,5 @@
 
 @global = global i32 0
 
-@alias = private protected alias i32* @global
+@alias = private protected alias i32, i32* @global
 ; CHECK: symbol with local linkage must have default visibility
diff --git a/test/Assembler/token.ll b/test/Assembler/token.ll
new file mode 100644
index 000000000000..c4d68b4b3ffb
--- /dev/null
+++ b/test/Assembler/token.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; RUN: verify-uselistorder %s
+; Basic smoke test for token type.
+
+; CHECK: declare void @llvm.token.foobar(token)
+declare void @llvm.token.foobar(token)
+
+define void @f() {
+  call void @llvm.token.foobar(token none)
+  ret void
+}
diff --git a/test/Assembler/unnamed-alias.ll b/test/Assembler/unnamed-alias.ll
index 8ae1c45d312d..121e54b7d079 100644
--- a/test/Assembler/unnamed-alias.ll
+++ b/test/Assembler/unnamed-alias.ll
@@ -5,7 +5,7 @@
 @1 = private constant i32 1
 ; CHECK: @1 = private constant i32 1
 
-@2 = private alias i32* @0
-; CHECK: @2 = private alias i32* @0
-@3 = private alias i32* @1
-; CHECK: @3 = private alias i32* @1
+@2 = private alias i32, i32* @3
+; CHECK: @2 = private alias i32, i32* @3
+@3 = private alias i32, i32* @1
+; CHECK: @3 = private alias i32, i32* @1
diff --git a/test/Assembler/uselistorder.ll b/test/Assembler/uselistorder.ll
index 2475fc48caf4..016bd877311b 100644
--- a/test/Assembler/uselistorder.ll
+++ b/test/Assembler/uselistorder.ll
@@ -4,7 +4,7 @@
 ; RUN: verify-uselistorder < %s
 
 @a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
-@b = alias i1* getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
+@b = alias i1, getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
 
 ; Check use-list order of constants used by globals.
 @glob1 = global i5 7
diff --git a/test/Bindings/Go/go.test b/test/Bindings/Go/go.test
index 14eb3281cc4e..407623e9c6dc 100644
--- a/test/Bindings/Go/go.test
+++ b/test/Bindings/Go/go.test
@@ -1,3 +1,3 @@
 ; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm
 
-; REQUIRES: shell, not_ubsan
+; REQUIRES: shell, not_ubsan, not_msan
diff --git a/test/Bindings/Go/lit.local.cfg b/test/Bindings/Go/lit.local.cfg
index e86595b8cb56..d68d867fb308 100644
--- a/test/Bindings/Go/lit.local.cfg
+++ b/test/Bindings/Go/lit.local.cfg
@@ -6,6 +6,9 @@ import sys
 if not 'go' in config.root.llvm_bindings:
     config.unsupported = True
 
+if config.root.include_go_tests != 'ON':
+    config.unsupported = True
+
 def find_executable(executable, path=None):
     if path is None:
         path = os.environ['PATH']
diff --git a/test/Bindings/OCaml/analysis.ml b/test/Bindings/OCaml/analysis.ml
index e935ee838058..22ef153b372a 100644
--- a/test/Bindings/OCaml/analysis.ml
+++ b/test/Bindings/OCaml/analysis.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/analysis.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.analysis -linkpkg %T/analysis.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/bitreader.ml b/test/Bindings/OCaml/bitreader.ml
index 57cfd043b2de..3fda34ab22ed 100644
--- a/test/Bindings/OCaml/bitreader.ml
+++ b/test/Bindings/OCaml/bitreader.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/bitreader.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitreader.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/bitwriter.ml b/test/Bindings/OCaml/bitwriter.ml
index 7c803f6d5510..b051b8c61a29 100644
--- a/test/Bindings/OCaml/bitwriter.ml
+++ b/test/Bindings/OCaml/bitwriter.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/bitwriter.ml
- * RUN: %ocamlc -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %ocamlc -g -w -3 -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -w -3 -warn-error A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
+ * RUN: %ocamlopt -g -w -3 -w +A -package llvm.bitreader -package llvm.bitwriter -linkpkg %T/bitwriter.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/core.ml b/test/Bindings/OCaml/core.ml
index 3c759af791d0..73e9956bc563 100644
--- a/test/Bindings/OCaml/core.ml
+++ b/test/Bindings/OCaml/core.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/core.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.analysis -package llvm.bitwriter -linkpkg %T/core.ml -o %t
  * RUN: %t %t.bc
  * RUN: llvm-dis < %t.bc > %t.ll
  * RUN: FileCheck %s < %t.ll
diff --git a/test/Bindings/OCaml/executionengine.ml b/test/Bindings/OCaml/executionengine.ml
index 1de2cfb7fefd..6d9abe728805 100644
--- a/test/Bindings/OCaml/executionengine.ml
+++ b/test/Bindings/OCaml/executionengine.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/executionengine.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.executionengine -linkpkg %T/executionengine.ml -o %t
  * RUN: %t
  * REQUIRES: native, object-emission
  * XFAIL: vg_leak
diff --git a/test/Bindings/OCaml/ext_exc.ml b/test/Bindings/OCaml/ext_exc.ml
index 2b44803f5170..a24a28b1f528 100644
--- a/test/Bindings/OCaml/ext_exc.ml
+++ b/test/Bindings/OCaml/ext_exc.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/ext_exc.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.bitreader -linkpkg %T/ext_exc.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/ipo.ml b/test/Bindings/OCaml/ipo.ml
index fc728b92ff3d..4a7c73e891b5 100644
--- a/test/Bindings/OCaml/ipo.ml
+++ b/test/Bindings/OCaml/ipo.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/ipo_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.ipo -linkpkg %T/ipo_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/irreader.ml b/test/Bindings/OCaml/irreader.ml
index e1771e75dd49..b5142334d321 100644
--- a/test/Bindings/OCaml/irreader.ml
+++ b/test/Bindings/OCaml/irreader.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/irreader.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.irreader -linkpkg %T/irreader.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/linker.ml b/test/Bindings/OCaml/linker.ml
index 1ea0be9d3dc3..423905e489ce 100644
--- a/test/Bindings/OCaml/linker.ml
+++ b/test/Bindings/OCaml/linker.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/linker.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.linker -linkpkg %T/linker.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.linker -linkpkg %T/linker.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.linker -linkpkg %T/linker.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
@@ -39,23 +39,21 @@ let test_linker () =
 
   let m1 = make_module "one"
   and m2 = make_module "two" in
-  link_modules m1 m2;
+  link_modules' m1 m2;
   dispose_module m1;
-  dispose_module m2;
 
   let m1 = make_module "one"
   and m2 = make_module "two" in
-  link_modules m1 m2;
+  link_modules' m1 m2;
   dispose_module m1;
 
   let m1 = make_module "one"
   and m2 = make_module "one" in
   try
-    link_modules m1 m2;
+    link_modules' m1 m2;
     failwith "must raise"
   with Error _ ->
-    dispose_module m1;
-    dispose_module m2
+    dispose_module m1
 
 (*===-- Driver ------------------------------------------------------------===*)
 
diff --git a/test/Bindings/OCaml/passmgr_builder.ml b/test/Bindings/OCaml/passmgr_builder.ml
index 5dd9d4e00e89..3104736d66aa 100644
--- a/test/Bindings/OCaml/passmgr_builder.ml
+++ b/test/Bindings/OCaml/passmgr_builder.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/passmgr_builder.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.passmgr_builder -linkpkg %T/passmgr_builder.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/scalar_opts.ml b/test/Bindings/OCaml/scalar_opts.ml
index 3017fb131129..9ffcf6915c07 100644
--- a/test/Bindings/OCaml/scalar_opts.ml
+++ b/test/Bindings/OCaml/scalar_opts.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/scalar_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.scalar_opts -linkpkg %T/scalar_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/target.ml b/test/Bindings/OCaml/target.ml
index 41faefa8ea68..68ba75d04f28 100644
--- a/test/Bindings/OCaml/target.ml
+++ b/test/Bindings/OCaml/target.ml
@@ -1,6 +1,6 @@
 (* RUN: cp %s %T/target.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.target -package llvm.all_backends -linkpkg %T/target.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/transform_utils.ml b/test/Bindings/OCaml/transform_utils.ml
index 6b46df173b08..dc14effc57ef 100644
--- a/test/Bindings/OCaml/transform_utils.ml
+++ b/test/Bindings/OCaml/transform_utils.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/transform_utils.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
  * RUN: %t
- * RUN: %ocamlopt -g -warn-error A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.transform_utils -linkpkg %T/transform_utils.ml -o %t
  * RUN: %t
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/OCaml/vectorize.ml b/test/Bindings/OCaml/vectorize.ml
index c5b03b525375..a668654de74a 100644
--- a/test/Bindings/OCaml/vectorize.ml
+++ b/test/Bindings/OCaml/vectorize.ml
@@ -1,7 +1,7 @@
 (* RUN: cp %s %T/vectorize_opts.ml
- * RUN: %ocamlc -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %ocamlc -g -w +A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
  * RUN: %t %t.bc
- * RUN: %ocamlopt -g -warn-error A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
+ * RUN: %ocamlopt -g -w +A -package llvm.vectorize -linkpkg %T/vectorize_opts.ml -o %t
  * RUN: %t %t.bc
  * XFAIL: vg_leak
  *)
diff --git a/test/Bindings/llvm-c/Inputs/invalid.ll.bc b/test/Bindings/llvm-c/Inputs/invalid.ll.bc
index a85c3644b3abb93a450b18dcfa20b5fbf01ed143..60c7afffbc2308e4ceb58663b5e84d21fba6e1c1 100644
GIT binary patch
delta 502
zcmX@Zw1L&o$@#!NQxOIRRt5%!Bpw45pU8dB-hQ^z4eb?RJpmL~<Y8b~1eD`$Qea>L
z(!vu(wamP^jEp)wCWt6FyLBvOkTi07;=wmzqT-PN$H2y(hs{sBSR~!p0wow2n1MQ^
zJ0AQ80``W9g~E*76I&H@M2cAw0+<5WnB0KU3=Gcm7#JGBEFhs`H$hQhqvL`V8Vn2$
zI&5JR_eR&ZOP&$rdozLWp#guM0srF<d>;b%j0KL!-cyvhp(uMOQ7+A((|S*{^#y0k
zEzCB1nr&K`t<QWADNA6l&R{psXfFbi4ebRJ+6xlc%QV=_ez2BRw3j!u$8)q7O0*Zv
zXs?{nsPpav-@6BVUk~uTF<?$Qu~6p8LYXsQ@=krC%qc~gBaO1B64|azaJD?*Y&io=
z?l^2Q#o3~R*?P)heW0y>nyjxdTW>hbb3C)XAfmmfpt&T2y&|E#M54W9LA&I+VE%Uu
z{GS5&o=o6-?7;s}f$t5_8wwh-cMi%Oa+JHID08P#?#=_5Cl6&`G0LS$1X^rxwwlvy
z&7i=*uo4)SY{1ar<|@(vx}3c#qF{!zEztfA&XyOLCu=dvX%qsLu>+L^@d+rUG%b-)
Za5?eB!_uK+!a}ATHUXXn6QCRj003g2t5pC1

delta 144
zcmdnMdWK2V$@#!NMIHu*00ssI?j{8WCLk>|(NN3Clgr3O#e-{tqVkcT1|CVpA{F)%
zCj^pKELh=i>;ofERiFd|12d48?s)JY2-q1WP84S3nz&R!LZFx>A%H2s4Jhd>#>mhB
kq!}1K8Hi8(7%jvOWELtgfbC&(FkraEFgc4!js>Iw0BZaq0RR91

diff --git a/test/Bindings/llvm-c/functions.ll b/test/Bindings/llvm-c/functions.ll
index 4503fb17315f..27c05464502e 100644
--- a/test/Bindings/llvm-c/functions.ll
+++ b/test/Bindings/llvm-c/functions.ll
@@ -1,4 +1,13 @@
-; RUN: llvm-as < %s | llvm-c-test --module-list-functions | FileCheck %s
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-c-test --module-list-functions < %t.bc| FileCheck %s
+; RUN: llvm-c-test --module-dump < %t.bc| FileCheck --check-prefix=MOD %s
+; RUN: llvm-c-test --lazy-module-dump < %t.bc| FileCheck --check-prefix=LMOD %s
+
+; MOD:      define i32 @X() {
+; MOD-NEXT:   entry:
+
+; LMOD:      ; Materializable
+; LMOD-NEXT: define i32 @X() {}
 
 define i32 @X() {
 entry:
diff --git a/test/Bindings/llvm-c/invalid-bitcode.test b/test/Bindings/llvm-c/invalid-bitcode.test
index 6318a9bf13d9..afae0ea1092c 100644
--- a/test/Bindings/llvm-c/invalid-bitcode.test
+++ b/test/Bindings/llvm-c/invalid-bitcode.test
@@ -1,3 +1,10 @@
 ; RUN: not llvm-c-test --module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
+; RUN: not llvm-c-test --lazy-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
 
-CHECK: Error parsing bitcode: Unknown attribute kind (48)
+CHECK: Error parsing bitcode: Unknown attribute kind (52)
+
+
+; RUN: not llvm-c-test --new-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck --check-prefix=NEW %s
+; RUN: not llvm-c-test --lazy-new-module-dump < %S/Inputs/invalid.ll.bc 2>&1 | FileCheck --check-prefix=NEW %s
+
+NEW: Error with new bitcode parser: Unknown attribute kind (52)
diff --git a/test/Bitcode/DICompileUnit-no-DWOId.ll b/test/Bitcode/DICompileUnit-no-DWOId.ll
index 4771c37264f2..dd3273e44ff0 100644
--- a/test/Bitcode/DICompileUnit-no-DWOId.ll
+++ b/test/Bitcode/DICompileUnit-no-DWOId.ll
@@ -2,7 +2,7 @@
 ; default to 0, which is not displayed at all in the textual representation.
 ;
 ; RUN: llvm-dis %s.bc -o - | FileCheck %s
-; CHECK: !DICompileUnit
+; CHECK: distinct !DICompileUnit
 ; CHECK-NOT: dwoId:
 !named = !{!0}
 !0 = !DICompileUnit(language: 12, file: !1)
diff --git a/test/Bitcode/DILocalVariable-explicit-tags.ll b/test/Bitcode/DILocalVariable-explicit-tags.ll
new file mode 100644
index 000000000000..3f7be3c061a3
--- /dev/null
+++ b/test/Bitcode/DILocalVariable-explicit-tags.ll
@@ -0,0 +1,16 @@
+; Bitcode compiled with 3.7_rc2.  3.7 had redundant (but mandatory) tag fields
+; on DILocalVariable.
+;
+; RUN: llvm-dis < %s.bc -o - | llvm-as | llvm-dis | FileCheck %s
+
+; CHECK: ![[SP:[0-9]+]] = distinct !DISubprogram(name: "foo",{{.*}} variables: ![[VARS:[0-9]+]]
+; CHECK: ![[VARS]] = !{![[PARAM:[0-9]+]], ![[AUTO:[0-9]+]]}
+; CHECK: ![[PARAM]] = !DILocalVariable(name: "param", arg: 1, scope: ![[SP]])
+; CHECK: ![[AUTO]]  = !DILocalVariable(name: "auto", scope: ![[SP]])
+
+!named = !{!0}
+
+!0 = distinct !DISubprogram(name: "foo", variables: !1)
+!1 = !{!2, !3}
+!2 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param", arg: 1, scope: !0)
+!3 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "auto", scope: !0)
diff --git a/test/Bitcode/DILocalVariable-explicit-tags.ll.bc b/test/Bitcode/DILocalVariable-explicit-tags.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..4efcbe878f84c471bf746ba6e096bfb171dee6af
GIT binary patch
literal 500
zcmZ>AK5$Qwhk>Drfq{X$Nr8b0NDBiod!zD1#}h1`Yyw7>lNeigR9QJB<yg9t8dzMJ
zoF;KQsR$T}Sa3*qa~TPBcuWvcaCYlh${=av^u&X2!bF850gi!<JrA3ocCm1}v2`ji
zFmM9RG*a^5P)KQInXIs+1xP$@SvWyN$fv1?yG6xEfQ6Y6sDz<Xf`LH@NJ}yMOmJfo
zK63EQ+g{d~88H#Jcd&3wbY@^sVDM<UdU0>BH-msB2T)-`vw_HJH3kr~0f<4sAZkbJ
z)~OSHIf4EHndbx)Gv;WJIr2~@O@z_5!`Y(caF0_1du0WC@r**AXAJyr1^B)+@EP+s
z$sIZ<dv2jjngJ_VS+n#RX4@0awmr<YXDWJ13fQYT*egBS3kuk)3Yg0(+6xrgCC>=*
zy_vxG(11VBfdBCaz7GL>#sWuV?<vaMP?SBCD3@l?X}zb}`hv6N7G|40%{DE})@MG5
zlqIlNXRw=Rv=;%%hW3I9?F9+!Wg6^dKUjgrmN&G=bF>#qv=_~2ubk1S^X>!Ry9az<
z5AeM)U`{%*Q0B=(nKNMWPNK{yMVTXwvZoT+u1#>ZJmG9P15EBXY%#^zqJ!Cb%3*z=
RrGJ{NuP|G0U<L;d0{|Sfp)~*i

literal 0
HcmV?d00001

diff --git a/test/Bitcode/DISubprogram-distinct-definitions.ll b/test/Bitcode/DISubprogram-distinct-definitions.ll
new file mode 100644
index 000000000000..5c6fb908be90
--- /dev/null
+++ b/test/Bitcode/DISubprogram-distinct-definitions.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; Check that subprogram definitions are correctly upgraded to 'distinct'.
+; Bitcode compiled from r245235 of the 3.7 release branch.
+
+!named = !{!0}
+!0 = distinct !DICompileUnit(language: 12, file: !1, subprograms: !2)
+!1 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!2 = !{!3}
+
+; CHECK: = distinct !DISubprogram({{.*}}, isDefinition: true
+!3 = !DISubprogram(name: "foo", isDefinition: true)
diff --git a/test/Bitcode/DISubprogram-distinct-definitions.ll.bc b/test/Bitcode/DISubprogram-distinct-definitions.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..600f68facc3aa104c38bb3a1fac8bd5030f74e34
GIT binary patch
literal 512
zcmZ>AK5$Qwhk>D%fq{X$Nr8b0NDBiod!zD1#}h1`Yyw7>lNeigR9QJB<yg9t8dzMJ
zoF;KQsR$T}Sa3*qa~TPBcuWvcaCYlh${=av^u&X2!bF850gi!<JrA3ocCm1}v2`ji
zFmM9RG*a^5P)KQInXIs+1xP$@SvWyN$fv1?yG6xEfQ6Y6sDz<Xf`LI4NJ}yMOmJfo
zc45=&;d|Tq?(I>X-nBhvxML(_Z*kwf%^f-G&H_db7QVFn{MBj<3P24FKnw<EEJs(U
zC$K>{%nS`;d5O8HDZxz4V6&Zo;>H{eGDjZDq=_)vb~szK9PV*yV6UuTFP>4z^NfN2
ztpMMb20mjRC%HohWzQ{?Ni$#tD{GcM!)$xP*|vw-_Dn@jNdbE`2YaPQdqDwvRRMEZ
zMSFolyW|-`zBd#29vblH8Sp>;!1p15&sgAy>^()98;Y`r66MkiI<5CKTVHUt+`??L
zr`e{3+4{@}k+KB#>I`=CjP@cR+0b4vp}io1y-b6>><25**z$(<c#if$iT0uy?Ugec
zb>4m8d-s6v>jAzu2Fyt(7Ro$XD02o(-bs`>r6_ZxQT9|K+qDVKmM5GoXMo8ahb^W!
ZTXZm6PdTg)wDeDt^%Z984a|@L0sytrqr(6I

literal 0
HcmV?d00001

diff --git a/test/Bitcode/Inputs/invalid-abbrev.bc b/test/Bitcode/Inputs/invalid-abbrev.bc
index 4e8f3944b84b9b1e6d8e604f89ecefe9dee4503e..06c2fd38cb27f343ce6d4f20d5729f3c695f1fc2 100644
GIT binary patch
delta 11
ScmZo<Y+;<xz;b|rfdK#%Py*Zl

delta 8
PcmZo+Y-F6!z<34#3&;ZH

diff --git a/test/Bitcode/Inputs/invalid-cast.bc b/test/Bitcode/Inputs/invalid-cast.bc
new file mode 100644
index 0000000000000000000000000000000000000000..a8b82f3e28618b7a236bab73e8a54e14b8e00383
GIT binary patch
literal 1236
zcmbu8PfXKL9LIm#I`%fIE3m*&hV3?t#wFr5bS7Iiw_-}fWEhQH2!$Bp#RHlcIVhzR
zEHdSv2acc~IG6}}@nFJ$Nr<3{%$S`F#ss~Xpw5c{@wLMN_Gpvs{qlL=_x*m~vPNUc
z4-5(5)d3WWH0S}K7=KJ%uAx&4L|p;pJvpqC$SNZ$uO#|g1yrk#4i`pQ%4-WCd!5D0
zx;e`r<4T<q$a3bY+q584trKGtk@+=j34Jtx8LwWajOa@}L<#dq!e5`H@j+X0y_6`F
z*m{a|L6x9~m`#VU6u|xfaq5tyYB{A5K<^<@sRdBY7Bw>J#LLddQFX$y%RTBOFn7SB
z<N|hMZ27Z?36+rzL_<E}A*C{G$mI?1=M4jSLm_XNGgj)UCjsZ?23#)}rPRwIrAh9=
zp}7GU-a`1Mx#|C{I_#h%`jAkO`dHCs(}m<%K$=TL1yjcj)3wodauuv!sl8cYuaAsY
z&dxaAj90W(8&z5N{v|_bAr?v)LfE672583X@gbCax-Q1k2dC8G87cCV44%cKNW2ix
zPkN@a^xNk6U4GvHhE{gT8Wi2Apc;w1v2Sb3`pVa|896UUM!UNGB~$D7Cf9XNbe%C{
z=dvf8%YN*P#99w_>(p^0w0+(Ui{G1H{4NvxSc61|e_PfO#Ij9a({z;GGi5g_$}`sD
z4NFl?s<nIscj)}~E9{UlccRmi+~2KIWouwN{k@u=$gZ5&aJ{yf#hY8UYR?QXsOE~k
zcg539H^$~T>#cNnxqgVw6ew|qq92B&ixYW^g*p~qL&J1tR*x?Z1GHIekK=k_%poXd
zFTQl4Qr?DdAu<7c1tWbH?}|KW-62>V3#NY_Vc${xRf;Cv#VH#6(?bglV8Ck$F(c0a
z@B*G&=$$NbK>4ffW?ZF~#T{C<LfqDz&7^uY?2Dvj0^9TX-&~o{>Lydlo3p#$prmD^
Wtd@-tS~iiAG=9$|ElaWrQ=UJS2`wT3

literal 0
HcmV?d00001

diff --git a/test/Bitcode/Inputs/invalid-name-with-0-byte.bc b/test/Bitcode/Inputs/invalid-name-with-0-byte.bc
new file mode 100644
index 0000000000000000000000000000000000000000..9c6a9158eee7f1bf05c9b67bb10cb1f0881defbb
GIT binary patch
literal 1265
zcmbtTO-$2Z7=E{P>|<0`V1c24?FL38i8zMNWCYxbDG`%lG;$$q#1JnY(8S0^N-0<b
z<<A30P!AkT1i5%HVZbCr&_tFPP6lH_yqKWQivjWdI<iT6v`PEDdEV!Hp66@d21Bt=
z0t?_Z0put=m;oRkZ(EON^!y6bn8kU|jhR%o!b*zEn89|LG?;^9IYE*0I{n1eX!nZc
zl6^$*#4pQiv2d&0wxV(E)05Nn%iAak13W-Bu3ooB`0IU4f$&Vl#}{>K#F^iu4&|s~
z6UTa>LN-IlX(C7ka6KWCUc|Tti6#Q*Kd#VN099gMi=bB#&HX&6S4g|jr&qG@0PNHk
zP)%^A_nu&MjBRF`(-u!T9YaAWE%-Ps=uZpEX~C2*ji>Jf^i35QFJ(pJr68Ilb)jjh
zz`*+uo-|eczf_kCsADyeOX8nPI-I6jE#z01hJv!K>#ptgL?^oij&EFUQqHYrgJp{g
zrSGRoI;yNXt@p^9rFJD$J7lRvjZPk*4c9ZHB=&Mg2`7%t>ct$ZL^#cYodRY-TL4+?
z$2tm?tEY%^)o-4uoKNx}+QJVbhlde#h-;33Qckj}h!U|5?6(|mt6TT#uW0oXJ-xny
z*_}r-J0>@|BV?mh(TlxxA6gNmp%T@)P0~2;S}upxA8oJy6v;m15T(?Yp>+k2wpZOc
z9}o}Cx~!CW(UHHq$JNB!i^nlT_xIm*r5fUHa$!comOT$!g|s!CasQ~h4<$Fx?t0!i
zaZF{<W%ZmJAds4y=KjsfUcM!?BsuOS>eib^`Hd`UXUY3%iP<D2Vz+ar>$b@lzp-e>
zgTpYNt#pPlULE2PP?Fc)cmOgpKnf#HJdq;!@i<og5;Bzt>&TGF)?^^@>!dix4OUQ2
zqDP_}eB%)10WzNfBC-LiKT78?%Kyug%8XdJ)QD{|_YHeD;`*mPp!ZcZV*4Y(Lz3#I
n#zbR`yqP%tt)d6?MpPp<AsVqs#)wVFRfFCu)riGJiqY#YHl{JN

literal 0
HcmV?d00001

diff --git a/test/Bitcode/Inputs/invalid-no-function-block.bc b/test/Bitcode/Inputs/invalid-no-function-block.bc
new file mode 100644
index 0000000000000000000000000000000000000000..52b65588e4a72ef92a824a67dc8d4d5c1f07e1ec
GIT binary patch
literal 548
zcmZ>AK5)-egn@yTfq@~3$3Vp=a^JJJpY3uLE(<ULg%}ih7#Qk-G<TB%14uv^h}j#J
zCpw;B@njP)vYf=&!lTN{At}ewoz%eN%H%YO+bKoBNW_9e!kf#;sKaA|h=Q|Q$5IAK
zBc~@Gd=n-r9tm&^Z0vd1{IrWj(v7WC0cbYROe3We911C|ERz+Nv;c|6Eej`z2>G=2
zaJQuR2#6>%6tg4*u(X&60F^R0%Q1o|2>4{c5C&u!b2P{tc_@=6!f4y!Y|(PK$Ekt6
zvVy&MMj_8L2L87Kd|w*)jCq{o4jq&|w@@a{fEBE)S^5mK?Fnbw9%kD!6+I;d?A09X
zl^*Q{1?*J?%w-kr1q$twX9W4)OyGNHz@KNp|M&ynhX6iffg`f_6lHEG${tFTOEc)S
z-qUP-!P#;Pv(27nn-*s4Gap3C64<LV*v&KAi-2T9d%=YEf&}(54fe7htUzPS8`|SJ
z+6yJxi)OS}&S=zm_kr)-1HP{Z_}&;WC!JU*^JJmS88CS#QRbAQ%#lXfQ;BTXCOBK3
zaJHNQCU+dRnBr{F!E8O{us+bzKTXzGn5{P)<~g3(UJ%h<RM1?K!CsNjULw(6vY=h^
zTrmGT2L4Y0d`~9uJ$B%KsKECI=nDl6**gd24mrwQQk1#VD0k<9%#(+*uNdXhBmylq
MI9ttWwuS~V01kw>r~m)}

literal 0
HcmV?d00001

diff --git a/test/Bitcode/anon-functions.ll b/test/Bitcode/anon-functions.ll
new file mode 100644
index 000000000000..b68cc8eff050
--- /dev/null
+++ b/test/Bitcode/anon-functions.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; PR25101
+
+; CHECK: define void @0()
+define void @0() {
+  ret void
+}
+
+; CHECK: define void @f()
+define void @f() {
+  ret void
+}
+
+; CHECK: define void @1()
+define void @1() {
+  ret void
+}
+
diff --git a/test/Bitcode/attributes.ll b/test/Bitcode/attributes.ll
index a0bc66642f7d..1b3526f1fa0d 100644
--- a/test/Bitcode/attributes.ll
+++ b/test/Bitcode/attributes.ll
@@ -204,7 +204,7 @@ define void @f34()
 ; CHECK: define void @f34()
 {
         call void @nobuiltin() nobuiltin
-; CHECK: call void @nobuiltin() #27
+; CHECK: call void @nobuiltin() #30
         ret void;
 }
 
@@ -262,6 +262,31 @@ define void @f44() argmemonly
         ret void;
 }
 
+; CHECK: define "string_attribute" void @f45(i32 "string_attribute")
+define "string_attribute" void @f45(i32 "string_attribute") {
+  ret void
+}
+
+; CHECK: define "string_attribute_with_value"="value" void @f46(i32 "string_attribute_with_value"="value")
+define "string_attribute_with_value"="value" void @f46(i32 "string_attribute_with_value"="value") {
+  ret void
+}
+
+; CHECK: define void @f47() #27
+define void @f47() norecurse {
+  ret void
+}
+
+; CHECK: define void @f48() #28
+define void @f48() inaccessiblememonly {
+  ret void
+}
+
+; CHECK: define void @f49() #29
+define void @f49() inaccessiblemem_or_argmemonly {
+  ret void
+}
+
 ; CHECK: attributes #0 = { noreturn }
 ; CHECK: attributes #1 = { nounwind }
 ; CHECK: attributes #2 = { readnone }
@@ -289,4 +314,7 @@ define void @f44() argmemonly
 ; CHECK: attributes #24 = { jumptable }
 ; CHECK: attributes #25 = { convergent }
 ; CHECK: attributes #26 = { argmemonly }
-; CHECK: attributes #27 = { nobuiltin }
+; CHECK: attributes #27 = { norecurse }
+; CHECK: attributes #28 = { inaccessiblememonly }
+; CHECK: attributes #29 = { inaccessiblemem_or_argmemonly }
+; CHECK: attributes #30 = { nobuiltin }
diff --git a/test/Bitcode/compatibility-3.6.ll b/test/Bitcode/compatibility-3.6.ll
new file mode 100644
index 000000000000..87958fc34183
--- /dev/null
+++ b/test/Bitcode/compatibility-3.6.ll
@@ -0,0 +1,1207 @@
+; Bitcode compatibility test for llvm 3.6.2
+;
+; N.b: This is 3.6.2-compatible IR. The CHECK lines occasionally differ from
+;      the IR used to generate the bitcode, and may need to be updated. These
+;      locations are tagged with an 'XXX'.
+
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+;         [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+;         <global | constant> <Type> [<InitializerConstant>]
+;         [, section "name"] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+;                   [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+;         [cconv] [ret attrs]
+;         <ResultType> @<FunctionName> ([argument list])
+;         [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+;         [align N] [gc] [prefix Constant]
+;         { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+  ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+  ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+  ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+  ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+  ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+  ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+  ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+  ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #4
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #5
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #6
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #7
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #8
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #9
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #10
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #11
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #12
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #13
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #14
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #15
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #16
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #17
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #18
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #19
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #20
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #21
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #22
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #23
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #24
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #25
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #26
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #27
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #28
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+  ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+  ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+  ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+  ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+  ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+  %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  fence acquire
+  ; CHECK: fence acquire
+  fence release
+  ; CHECK: fence release
+  fence acq_rel
+  ; CHECK: fence acq_rel
+  fence singlethread seq_cst
+  ; CHECK: fence singlethread seq_cst
+
+  ; XXX: The parser spits out the load type here.
+  %ld.1 = load atomic i32* %word monotonic, align 4
+  ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+  %ld.2 = load atomic volatile i32* %word acquire, align 8
+  ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+  %ld.3 = load atomic volatile i32* %word singlethread seq_cst, align 16
+  ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+  store atomic i32 23, i32* %word monotonic, align 4
+  ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+  store atomic volatile i32 24, i32* %word monotonic, align 4
+  ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+  store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+  %f.nnan = fadd nnan float %op1, %op2
+  ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+  %f.ninf = fadd ninf float %op1, %op2
+  ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+  %f.nsz = fadd nsz float %op1, %op2
+  ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+  %f.arcp = fadd arcp float %op1, %op2
+  ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+  %f.fast = fadd fast float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
+  ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+  %p0 = bitcast i8* null to i32 (i32)*
+  ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+  %p1 = bitcast i8* null to void (i8*)*
+  ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+  %p2 = bitcast i8* null to i32 (i8*, ...)*
+  ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+  %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+  ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+  %t0 = alloca i1942652
+  ; CHECK: %t0 = alloca i1942652
+  %t1 = alloca half
+  ; CHECK: %t1 = alloca half
+  %t2 = alloca float
+  ; CHECK: %t2 = alloca float
+  %t3 = alloca double
+  ; CHECK: %t3 = alloca double
+  %t4 = alloca fp128
+  ; CHECK: %t4 = alloca fp128
+  %t5 = alloca x86_fp80
+  ; CHECK: %t5 = alloca x86_fp80
+  %t6 = alloca ppc_fp128
+  ; CHECK: %t6 = alloca ppc_fp128
+  %t7 = alloca x86_mmx
+  ; CHECK: %t7 = alloca x86_mmx
+  %t8 = alloca %opaquety*
+  ; CHECK: %t8 = alloca %opaquety*
+
+  ret void
+}
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+  call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) { ; XXX: landingpad changed.
+; CHECK: define void @instructions.terminators(i8 %val) personality i32 ()* @personality_handler
+
+  br i1 false, label %iftrue, label %iffalse
+  ; CHECK: br i1 false, label %iftrue, label %iffalse
+  br label %iftrue
+  ; CHECK: br label %iftrue
+iftrue:
+  ret void
+  ; CHECK: ret void
+iffalse:
+
+  switch i8 %val, label %defaultdest [
+  ; CHECK: switch i8 %val, label %defaultdest [
+         i8 0, label %defaultdest.0
+         ; CHECK: i8 0, label %defaultdest.0
+         i8 1, label %defaultdest.1
+         ; CHECK: i8 1, label %defaultdest.1
+         i8 2, label %defaultdest.2
+         ; CHECK: i8 2, label %defaultdest.2
+  ]
+  ; CHECK: ]
+defaultdest:
+  ret void
+defaultdest.0:
+  ret void
+defaultdest.1:
+  ret void
+defaultdest.2:
+
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+  invoke fastcc void @f.fastcc()
+  ; CHECK: invoke fastcc void @f.fastcc()
+         to label %defaultdest unwind label %exc
+         ; CHECK: to label %defaultdest unwind label %exc
+exc:
+  %cleanup = landingpad i32 personality i32()* @personality_handler cleanup
+
+  resume i32 undef
+  ; CHECK: resume i32 undef
+  unreachable
+  ; CHECK: unreachable
+
+  ret void
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  add i8 %op1, %op2
+  ; CHECK: add i8 %op1, %op2
+  add nuw i8 %op1, %op2
+  ; CHECK: add nuw i8 %op1, %op2
+  add nsw i8 %op1, %op2
+  ; CHECK: add nsw i8 %op1, %op2
+  add nuw nsw i8 %op1, %op2
+  ; CHECK: add nuw nsw i8 %op1, %op2
+  sub i8 %op1, %op2
+  ; CHECK: sub i8 %op1, %op2
+  sub nuw i8 %op1, %op2
+  ; CHECK: sub nuw i8 %op1, %op2
+  sub nsw i8 %op1, %op2
+  ; CHECK: sub nsw i8 %op1, %op2
+  sub nuw nsw i8 %op1, %op2
+  ; CHECK: sub nuw nsw i8 %op1, %op2
+  mul i8 %op1, %op2
+  ; CHECK: mul i8 %op1, %op2
+  mul nuw i8 %op1, %op2
+  ; CHECK: mul nuw i8 %op1, %op2
+  mul nsw i8 %op1, %op2
+  ; CHECK: mul nsw i8 %op1, %op2
+  mul nuw nsw i8 %op1, %op2
+  ; CHECK: mul nuw nsw i8 %op1, %op2
+
+  ; exact
+  udiv i8 %op1, %op2
+  ; CHECK: udiv i8 %op1, %op2
+  udiv exact i8 %op1, %op2
+  ; CHECK: udiv exact i8 %op1, %op2
+  sdiv i8 %op1, %op2
+  ; CHECK: sdiv i8 %op1, %op2
+  sdiv exact i8 %op1, %op2
+  ; CHECK: sdiv exact i8 %op1, %op2
+
+  ; none
+  urem i8 %op1, %op2
+  ; CHECK: urem i8 %op1, %op2
+  srem i8 %op1, %op2
+  ; CHECK: srem i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  shl i8 %op1, %op2
+  ; CHECK: shl i8 %op1, %op2
+  shl nuw i8 %op1, %op2
+  ; CHECK: shl nuw i8 %op1, %op2
+  shl nsw i8 %op1, %op2
+  ; CHECK: shl nsw i8 %op1, %op2
+  shl nuw nsw i8 %op1, %op2
+  ; CHECK: shl nuw nsw i8 %op1, %op2
+
+  ; exact
+  lshr i8 %op1, %op2
+  ; CHECK: lshr i8 %op1, %op2
+  lshr exact i8 %op1, %op2
+  ; CHECK: lshr exact i8 %op1, %op2
+  ashr i8 %op1, %op2
+  ; CHECK: ashr i8 %op1, %op2
+  ashr exact i8 %op1, %op2
+  ; CHECK: ashr exact i8 %op1, %op2
+
+  ; none
+  and i8 %op1, %op2
+  ; CHECK: and i8 %op1, %op2
+  or i8 %op1, %op2
+  ; CHECK: or i8 %op1, %op2
+  xor i8 %op1, %op2
+  ; CHECK: xor i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+  extractelement <4 x float> %vec, i8 0
+  ; CHECK: extractelement <4 x float> %vec, i8 0
+  insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+  ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+  ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+                                       [3 x i8] %arr, { i8, { i32 }} %n,
+                                       <2 x i8*> %pvec, <2 x i64> %offsets) {
+  extractvalue { i8, i32 } %up, 0
+  ; CHECK: extractvalue { i8, i32 } %up, 0
+  extractvalue <{ i8, i32 }> %p, 1
+  ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+  extractvalue [3 x i8] %arr, 2
+  ; CHECK: extractvalue [3 x i8] %arr, 2
+  extractvalue { i8, { i32 } } %n, 1, 0
+  ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+  insertvalue { i8, i32 } %up, i8 1, 0
+  ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+  insertvalue <{ i8, i32 }> %p, i32 2, 1
+  ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+  insertvalue [3 x i8] %arr, i8 0, 0
+  ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+  insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+  ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+  %up.ptr = alloca { i8, i32 }
+  %p.ptr = alloca <{ i8, i32 }>
+  %arr.ptr = alloca [3 x i8]
+  %n.ptr = alloca { i8, { i32 } }
+
+  ; XXX: The parser spits out the load type here.
+  getelementptr { i8, i32 }* %up.ptr, i8 0
+  ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+  getelementptr <{ i8, i32 }>* %p.ptr, i8 1
+  ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+  getelementptr [3 x i8]* %arr.ptr, i8 2
+  ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+  getelementptr { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  getelementptr inbounds { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  getelementptr <2 x i8*> %pvec, <2 x i64> %offsets
+  ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+  ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+  alloca i32, i8 4, align 4
+  ; CHECK: alloca i32, i8 4, align 4
+  alloca inalloca i32, i8 4, align 4
+  ; CHECK: alloca inalloca i32, i8 4, align 4
+
+  load i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+  ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+  load volatile i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+  ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9
+
+  store i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+  store volatile i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+  ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+  trunc i32 -1 to i1
+  ; CHECK: trunc i32 -1 to i1
+  zext i32 -1 to i64
+  ; CHECK: zext i32 -1 to i64
+  sext i32 -1 to i64
+  ; CHECK: sext i32 -1 to i64
+  fptrunc float undef to half
+  ; CHECK: fptrunc float undef to half
+  fpext half undef to float
+  ; CHECK: fpext half undef to float
+  fptoui float undef to i32
+  ; CHECK: fptoui float undef to i32
+  fptosi float undef to i32
+  ; CHECK: fptosi float undef to i32
+  uitofp i32 1 to float
+  ; CHECK: uitofp i32 1 to float
+  sitofp i32 -1 to float
+  ; CHECK: sitofp i32 -1 to float
+  ptrtoint i8* null to i64
+  ; CHECK: ptrtoint i8* null to i64
+  inttoptr i64 0 to i8*
+  ; CHECK: inttoptr i64 0 to i8*
+  bitcast i32 0 to i32
+  ; CHECK: bitcast i32 0 to i32
+  addrspacecast i32* null to i32 addrspace(1)*
+  ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+  ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+  icmp eq  i32 %op1, %op2
+  ; CHECK: icmp eq  i32 %op1, %op2
+  icmp ne  i32 %op1, %op2
+  ; CHECK: icmp ne  i32 %op1, %op2
+  icmp ugt i32 %op1, %op2
+  ; CHECK: icmp ugt i32 %op1, %op2
+  icmp uge i32 %op1, %op2
+  ; CHECK: icmp uge i32 %op1, %op2
+  icmp ult i32 %op1, %op2
+  ; CHECK: icmp ult i32 %op1, %op2
+  icmp ule i32 %op1, %op2
+  ; CHECK: icmp ule i32 %op1, %op2
+  icmp sgt i32 %op1, %op2
+  ; CHECK: icmp sgt i32 %op1, %op2
+  icmp sge i32 %op1, %op2
+  ; CHECK: icmp sge i32 %op1, %op2
+  icmp slt i32 %op1, %op2
+  ; CHECK: icmp slt i32 %op1, %op2
+  icmp sle i32 %op1, %op2
+  ; CHECK: icmp sle i32 %op1, %op2
+
+  fcmp false half %fop1, %fop2
+  ; CHECK: fcmp false half %fop1, %fop2
+  fcmp oeq   half %fop1, %fop2
+  ; CHECK: fcmp oeq   half %fop1, %fop2
+  fcmp ogt   half %fop1, %fop2
+  ; CHECK: fcmp ogt   half %fop1, %fop2
+  fcmp oge   half %fop1, %fop2
+  ; CHECK: fcmp oge   half %fop1, %fop2
+  fcmp olt   half %fop1, %fop2
+  ; CHECK: fcmp olt   half %fop1, %fop2
+  fcmp ole   half %fop1, %fop2
+  ; CHECK: fcmp ole   half %fop1, %fop2
+  fcmp one   half %fop1, %fop2
+  ; CHECK: fcmp one   half %fop1, %fop2
+  fcmp ord   half %fop1, %fop2
+  ; CHECK: fcmp ord   half %fop1, %fop2
+  fcmp ueq   half %fop1, %fop2
+  ; CHECK: fcmp ueq   half %fop1, %fop2
+  fcmp ugt   half %fop1, %fop2
+  ; CHECK: fcmp ugt   half %fop1, %fop2
+  fcmp uge   half %fop1, %fop2
+  ; CHECK: fcmp uge   half %fop1, %fop2
+  fcmp ult   half %fop1, %fop2
+  ; CHECK: fcmp ult   half %fop1, %fop2
+  fcmp ule   half %fop1, %fop2
+  ; CHECK: fcmp ule   half %fop1, %fop2
+  fcmp une   half %fop1, %fop2
+  ; CHECK: fcmp une   half %fop1, %fop2
+  fcmp uno   half %fop1, %fop2
+  ; CHECK: fcmp uno   half %fop1, %fop2
+  fcmp true  half %fop1, %fop2
+  ; CHECK: fcmp true  half %fop1, %fop2
+
+  br label %exit
+L1:
+  %v1 = add i32 %op1, %op2
+  br label %exit
+L2:
+  %v2 = add i32 %op1, %op2
+  br label %exit
+exit:
+  phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+  ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+  select i1 true, i32 0, i32 1
+  ; CHECK: select i1 true, i32 0, i32 1
+  select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+  ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+  call void @f.nobuiltin() builtin
+  ; CHECK: call void @f.nobuiltin() #33
+
+  call fastcc noalias i32* @f.noalias() noinline
+  ; CHECK: call fastcc noalias i32* @f.noalias() #11
+  tail call ghccc nonnull i32* @f.nonnull() minsize
+  ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #6
+
+  ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+  musttail call void @f.param.inalloca(i8* inalloca %val)
+  ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+  ret void
+}
+
+declare void @llvm.donothing() nounwind readnone
+
+declare i32 @personality_handler()
+
+define void @instructions.landingpad() {
+; CHECK: define void @instructions.landingpad() personality i32 ()* @personality_handler
+
+  invoke void @llvm.donothing() to label %proceed unwind label %catch1
+  invoke void @llvm.donothing() to label %proceed unwind label %catch2
+  invoke void @llvm.donothing() to label %proceed unwind label %catch3
+  invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+  landingpad i32
+  ; CHECK: landingpad i32
+             personality i32()* @personality_handler
+             cleanup
+             ; CHECK: cleanup
+  br label %proceed
+
+catch2:
+  landingpad i32
+  ; CHECK: landingpad i32
+             personality i32()* @personality_handler
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch3:
+  landingpad i32
+  ; CHECK: landingpad i32
+             personality i32()* @personality_handler
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch4:
+  landingpad i32
+  ; CHECK: landingpad i32
+             personality i32()* @personality_handler
+             filter [2 x i32] zeroinitializer
+             ; CHECK: filter [2 x i32] zeroinitializer
+  br label %proceed
+
+proceed:
+  ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+
+  call void @llvm.va_start(i8* %ap2)
+  ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+  va_arg i8* %ap2, i32
+  ; CHECK: va_arg i8* %ap2, i32
+
+  call void @llvm.va_copy(i8* %v, i8* %ap2)
+  ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+  call void @llvm.va_end(i8* %ap2)
+  ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+  ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+  %ptrloc = alloca i8*
+  call void @llvm.gcroot(i8** %ptrloc, i8* null)
+  ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+  call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+  ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+  %ref = alloca i8
+  call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+  ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+  ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+  call i8* @llvm.returnaddress(i32 1)
+  ; CHECK: call i8* @llvm.returnaddress(i32 1)
+  call i8* @llvm.frameaddress(i32 1)
+  ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+  call i32 @llvm.read_register.i32(metadata !10)
+  ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+  call i64 @llvm.read_register.i64(metadata !10)
+  ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+  call void @llvm.write_register.i32(metadata !10, i32 0)
+  ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+  call void @llvm.write_register.i64(metadata !10, i64 0)
+  ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+  %stack = call i8* @llvm.stacksave()
+  ; CHECK: %stack = call i8* @llvm.stacksave()
+  call void @llvm.stackrestore(i8* %stack)
+  ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+  call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+  ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+  call void @llvm.pcmarker(i32 1)
+  ; CHECK: call void @llvm.pcmarker(i32 1)
+
+  call i64 @llvm.readcyclecounter()
+  ; CHECK: call i64 @llvm.readcyclecounter()
+
+  call void @llvm.clear_cache(i8* null, i8* null)
+  ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+  call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+  ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+  ret void
+}
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { inlinehint }
+; CHECK: attributes #5 = { jumptable }
+; CHECK: attributes #6 = { minsize }
+; CHECK: attributes #7 = { naked }
+; CHECK: attributes #8 = { nobuiltin }
+; CHECK: attributes #9 = { noduplicate }
+; CHECK: attributes #10 = { noimplicitfloat }
+; CHECK: attributes #11 = { noinline }
+; CHECK: attributes #12 = { nonlazybind }
+; CHECK: attributes #13 = { noredzone }
+; CHECK: attributes #14 = { noreturn }
+; CHECK: attributes #15 = { nounwind }
+; CHECK: attributes #16 = { noinline optnone }
+; CHECK: attributes #17 = { optsize }
+; CHECK: attributes #18 = { readnone }
+; CHECK: attributes #19 = { readonly }
+; CHECK: attributes #20 = { returns_twice }
+; CHECK: attributes #21 = { sanitize_address }
+; CHECK: attributes #22 = { sanitize_memory }
+; CHECK: attributes #23 = { sanitize_thread }
+; CHECK: attributes #24 = { ssp }
+; CHECK: attributes #25 = { sspreq }
+; CHECK: attributes #26 = { sspstrong }
+; CHECK: attributes #27 = { uwtable }
+; CHECK: attributes #28 = { "cpu"="cortex-a8" }
+; CHECK: attributes #29 = { nounwind readnone }
+; CHECK: attributes #30 = { argmemonly nounwind readonly }
+; CHECK: attributes #31 = { argmemonly nounwind }
+; CHECK: attributes #32 = { nounwind readonly }
+; CHECK: attributes #33 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
diff --git a/test/Bitcode/compatibility-3.6.ll.bc b/test/Bitcode/compatibility-3.6.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..86b662000316b02f81307581e6abfa97b2d17719
GIT binary patch
literal 10192
zcmeHteOwdQp7)t#NCrs4gajNU=tPi48?OUIi4|lZ0b>msz*@T6$0VTDUeSQotF^6@
z7hYOHL97>R_im_t+iq`{qP6Y4T|0qjsiH-+mR|b;>bl*%cD1!H>3*#4IWr07?mquN
z|2*d-Gr#lp`~J@F?VL05D}G8cfJF=c`tAcj4ggkW;lI)=0QmPr0Fr$)kifoF_$T*f
zca;v|OJrpJf_+#(F*AY4n1=6mb9A|+Y+rI2uFlt*32I4FzBR{|)Tz%7S96LCeN&En
zh)8w69(<iWc@u+tswe=q!gppH0AZ(855O@m1%hKN2|72@Nx<w^He>h(cB4T>+obgt
zFDRYLX13*^!M!*fTq}Wt8I0LTzW^A&GN1up$}T5z^-?T>@AtMzz~`(Db2iYgB321$
z0;@Of!ZO|0KIW)JR0*EZ?|ttMq;`RT68{OcNm*HD+jT_AxiNM<B1*veKJS<~tX+x!
zjM}80jr5-oF&(UxPW*}}z+|@FK!iN_Im?9*mBJnI5`IKY=N^;lWr!*PBQm!_<o1#h
z&$x{mfi1H=KY}RC!{v{$J>qo!sMjn`2aij=(dnPcW+=t!Zz%&)#p&BK+vbVWfnSMd
zij7ZYw#^qCN9L28ZX)i|z6ScMaYV_vODr#<u++x3%VMKjH{LsqYuqm15o~SZTjvw<
zA~2t766ey`1zDX+gw6^6g&pw%V`kd}6t`CDk|Ulapbjj7cD*0kRpWD((RQLG5zEsX
za`BUa@g9=a`0C4ocH%%HR-j*>i=PdQA0%0gZ+#hENxYhf&C|c0i(d?kzd$-QzKvzU
zO5#`|R;d3b7yl|Seu(5k0a}rZyBR&@PsB3p#3^T)Q>O71^pHW=rOd@2XY{qOE7MN=
z#aT8Ey9#?q9(Jj6@g_#!<4=@kU4RowI=_b;3*lbiPb_;I1JBX`Y=$!D&mfZM`es~{
z^Kt+_t^|5JwW;>b_T&IwNgfUBFpA`LrL4pPtjuDr6U$niW9~5hGODc_xVn7ILSsCa
zs?xkz@xoIZo-??<zwf&Kww%<uc?pNjc^QCtifjTD=XkMl#SBm?!^sl_n`r#z@(cHs
z-S$0>?*h3%2`G6+XX(@}c+eqH{fD6xJgp54aAoaIC!1Z~NB8cVQL7tSqE@L4uMp|j
z(R`^xw);SO(=1;C2<;CI@LYY1x^2F}pP*EJN`(NQBm)baP6^1SzJcf;F+TqJ!OODy
z9&3HA^(jM;NN2UCBu7#*;Cl5lW@8}Z`OxKYuJa9NBm3amKGsaXny=H*oi%!+N<|WH
z5OR<+k96Q*_8YS$^0Y^v5BZX~{!`A<6Q!$WnuBju*Xp1eM;@MKRG9%V7!yG|py6;Z
z`=2B{_V*bWA57XHe|cR3@XzkcavVxinZNmC(ngQAa52C4-=~e}lvStA;LA#><EtX@
zZ6)sbT7Oo`YV%xmIQUiV%z(^oo+Kw#x?cJxz3x<R)@Vir8F1kt0P1?!jMm?+mp2je
zCp~_Tr}ZhSAn}~}1Ixlri+RMKxnH^{FlZjJEDT7C+ocb*du5O5zUs{y((AqnDaZ7>
zs}s6Yu4&)6?B(_=H&x%VstGn_)41vetNIb7T<=s}Ayq#(RJWMi*?(x<-nxx$^#tXG
zJ*}SgtxY|xTSr=(sBZkD7@S4dzQk7-<6o-ET_54(+ioUrW91E`>Lz69P&JIGCK%NX
zCM813J>#-QAm0S!nZ9bOx6QI(zqE++nm-s^$ou<WI+(DPknb?an|hj?DEX7N*57$r
z(@xCoEHjLh8Qxq!=FPg^oApI7>y&refLAvFpRQ(3?&)hjD2Ef9wzuwF?}zd1p<169
z*#XmZhCaXkx>LdzL3P67%d&$WnDSxa`uvIDmOuh(8-qEgiF8<%3x=fv9fWbi?EoyQ
zPz!qilL81xa{enj0k2*Joa?87zS}9F?V22{_*M$~f5yP0w*lvQ8t@Pibu-Lfoa*@q
zD2Dx*>{=<1-Xb{f4w_w3f;)i@j!JhrrzJ4}z!!)LDqutfMOzW#1tmdJ04U78<&Z#j
z4dbKo(y&vM7YI^{@&XQSKu);^T!U{G4vEd?AiYeS4JkAQ$_2uFkrPS6z@=^<k%KpX
zvM?)e>#4%KWc#6A#QboG6Gp=ar&r0MG6A4ygiDAPXPX~mk=&?QbP6C7NkCn80zOLf
zq+AMOgIdfKh_PbAA>f3vqJkhml1L*6I!Xcr5sD5%5J+P}fe<VtNK`=(2tgPJA%sQ%
zfgl8j1|Ws}&sF4z9Z+X&{+TfZ2w{gu1F=Ae2V*pZSQT3YfDoEk2ZVSbMni~evDLv<
zphJgPAcVIpRzcD>$0~42dQu#^Q5%QSV~FF%RK>6TaCD6Tt+7HMV+Vn{;(@Z`fmX%?
zz4<plv*Lkj;(=a^2U->n)Ef_Ei3d6s5415YZbj7n@jxr%f&Lf|^mp+<@5cjei3j>A
z9_aet02$+fYU6?KPKjxY34g~45d<4J{T?~aMSB#DJRnd!p+a5eFcLbGP?#_#rDLAz
zY?I1Z0FMT!K(i`105=F!pjjz^aRQA(BM(UED7zN38{k}2CyK@}1WFLcQI~rl5Imit
z!h}m9&{VMk%?g2{ydjX11KY(O2s9P?eU!IFXM#Xc-Yw?G0nSC`Ry2-4pr~t8mxB-p
zor__aj1f2ua+Bd~R2(4;jp86E0ET%cQD7KG#T3G@UL1rRTf)MGinJ|+VW!w2K#YsD
z2q7kBG18+6TYwM_(Qu#;>5Mo-WaWxh1gXN&ih$l86-z*&#e6R04a^bEM}lB{RANB@
z9&Ng)2tX#07J)WID+17kXhi^eGakqp4>a;OKuU3v2ozO;2o$xT2-Fx41UR9Fln`wu
z(Ud3+FepHbR-olXD;#1K>qrnyDOy}W2T>J@8CCQPLC<iC5N`1qwfJ7<1fWgPI2PlJ
zLg0g#Sc1b$_Hg=T=xh`%$j>KBi+Rf=4Gn$mUWK|m7SHXM*kS;*T6Sy`5VQK^q!#ll
zlQiTu0(~{AbHOx2QHD%-K>{5c7^h5RXc!vYqz^2c4KVhe)nGu~^PD5`GQ5@+-*qdA
z1;dkA&qAzkqG&M#A?@?zW9b~|ob-@_WER|$@NGEj>g23(2??5_HAUlAi~#x3T8sGu
zfP;IEx+Y$J6!mfP>p1ZGB%DUhg3d`HVT!xAv~DQ>y)Lz!j$#S%5~IP=JuKSz4(LKC
zGK>p+&%sx16TT9@`mPd3VS;detlkLK#P7@IU?Vqk@dyH3x`#&_F`OueIl#C;BKY3{
z9frdU1*_0KFw`r!?9x5R90Z(;;*@}_7`Ox(<oqJ!g+m}{Wk3%}V2y@U{<NN)3^lyC
zB<3zYiCZKT_PV$1H>>BZGk9kh{AGq_H|?EDVf3Cd+MmItQdqF{?Et#NPX7Tub_H?m
z@QK3-+To86x4a!d_r2-p8o4XzSA*wlzPC2+*<)bVd%el0_+DMdl$X6&$qdNj?Y9Y<
z6dYv-lsX&AF;!~stdM9-rC`Kwy_EULFpe4aDlN1^gVV*fqyN(N@kT1Cu7?KyQTo!i
z9UEKVn1g3zrTX~_Tfd#;ex%R+R8eunwf;qy5A?~OJyzKNf>NqgYw%$4d_{NJM#87$
zaj$=`GIXN!H;T=c&Ux;-Bfs6UVQ2O~)!n}(uP%Avp+j!P$;(!F20=0{fHD;joGv_}
zTc^c}+brh5V0#%S-?3f3rKeGG4WxVvPx+jxKv6brAe42sSGSHy5Qh0Pho!|s7I<?g
z9^T%*)g#{tPvo{%j|W~hq}#T)dg#`SVYLe06DF9HO-u^BLfmwy3dnHQS9;x;K5M`=
zZ5{LM!VmllM=T4DN{c@57X_pXhi_`PIpsUo%M0Nx0zSD`PiL#gDc@$0Z;K>Aw!h<A
z*AT6Z6uM*V=<yo=^QP)Q$5lV_s_Tr@J=Cka+N&EDo?p0hr}Vn8H|rE~u0|SwKSJ-S
z$O?ixKGSpOiNHCsWzB_aS$*J6pE3NS`CNTFRyk34mMu=60W_{KHrl_UeY#qcHjihl
zft-UIKy`%oQ5_AhVYUfrvfK1tuZ(U^F}9t%nLAFX3PAUVpsUo@-5@t|cdciwgeH%2
z^VN~W6Ga-*7+6r+eAgPVTGPIho}NLp^byOlE1$uPsjh#3?C$?mN_V{?Gc<1|Mlr>n
zwXK+aBKwTb6!x0B6Rf^L1%FjDg|;i0#MW7I^Xclm<0i~E150yV#Z==pl~N{rQ#wd~
z{$))hg~ZzD>MJa(yD?c&^If3kBOT{W7y5gqu82(e=erNwS!FwX#pj&XHZgsOAv+G5
zTK1ZT0wB3j?K@*djNdk6eYXd%TKCN>j|p6`$CT*XA<JoA{s>x$7gBT80|9f#26M}Y
zbjM|r<hzBI`r4`qsrr_b&$jJ)sL1Ec=$O!aX0@K#tmfR-r4uQq%U18zROV9S+C`t5
zfJITlD7q5oKnOuowa?Z?c4vdr+&thXjs*qfSmx}WFf{KqY1H$0_v{{lowCn3CJMYy
z?f+J`+Lm3Zo@sA8Z@VKgNk^#^re_QLjxp`ZVw@p*`beK;o<@CeZ*?R*PhBN7R!B0A
zmzu1&QF;+5W>8;?>iXKu`@lYZM)O=^Uc-YG6Uk+d9`b-AkV~d7O|NkkKRB@p58ggY
z9(<@K!iH~yo5Vi4_P1;e7!8B-RYp#K@Ci;DNgg#U%O*c8u(o`ry(6`PE7_G+>KN6h
zedY?T?YG<N^t+)Jd<@irEGYO(imB(z9H;?Xf<5q$G9#p{-qk}5nNtK)o%h-#eMX=2
zLrgPjtFb0(E}QnP!Ai)CS1FJ78L1)c+>88m*%6SDPM>j3Ku?&v2Sk@f)kHqo`m3}`
zFLBu)oRBKh)h+Z`6MGacYN>`csP&c0tUZlfR8_atE_&2zUDR|*mVxB8hC3H1T}8_#
z=GqRMJ6O2%M?8E#Qb?H<eoV6q4jGFS{lRtYib(1Rt=Xr(yVZ&_W2>qoscutv-oe$?
zqVrK(SftJpbrsp|sonEXgiWB6Nn?2VYGm7f=gB5&Q6F7V_ZD_4P#g#>(S*jN+2}J;
zn%=&wKJB@v9Ml(85t~NcGRx>YZ|h!TR571#<s=bX0kZAM%n{?#6Opu=OEqNW$t)g4
zwX_Oc$p3tKjbQ#;(1_GFI9~LH15~GWk>8ZMPd)#>v^%WTT57dslWZUP-nk{sB_tG8
zeU89MBfeSDK5iPlx)hqA`%$@pgAV;YboZlJ#-v$Ns0u5^Tg`jxPhjU@)>vZ&7BEPv
z_thsnJCO`iLrsHBB)S{UU}xZnyM%(AWv$<~e4>QKnP)1js>aHdC^q_PL9QM~exQSG
z`PzIQ>C*_Unw~}L&uH}>0b<`=v(!a)td-|*C1e6Sr#qAb!*<b3wg1G-X3>>5$Z2jD
zJ;<6ac2Rd&EqKR%?1J`qIi8gN>$5pT#R=^qGOq+V@z`3vCZf)(s>=SgyO{_)0W(*L
zF?pb9nJFWbZ5{^p<2Vxt)CZQp%=EQ+MV<bP^CRq{kQVuyivwP0j9=RJT-s9=NyiK;
zr;=&M#~CT}YbsAdwWen>`VLe1en!76`xk5!D(C;$CrZx$c)#@mxw~B8h@k7<!=_Fu
zaxOUieZhWHCFrNNf@9ESvLT1%SG6UhggQi?sh2;8OtD;DP7!sH!G47mbDXZ{vC~kb
z%|zD~`^pvC<213&OF==}&(}KnhlGR}XU@RC$Z~8)XeDNApC_}zRS~z0ZJhnYWzb!W
zGQb0ys$d52{o&I7s)#T6*OhA_!*BllsL-|f5!Ch7sxMbWq-u47THV2dP}zk)eJQg|
z=Aq*SS=Rb~EOT;6)9LPJTX;I%am#jcst0rr(!TngP!Z7V$F=HiubkyJlS(xvroM`)
zj;Inl3LKyK2;OSToi+j%jnW#4F<ed@L-Jfd`UH|^XTusO&%mZjAG=ZRL@B&kXx~(a
z+TU7r5H6wbMs7mdofCiEh1t4+aW_5opgWSPhZ%Ehq#A0F9=>Aj{(CfQ;pPo!)>nt>
zI2g5lQ}QTzP1vLsk>y}r3kEyS=GS&OF09Me9&3<ko98asTUFbVvI6u&CEON+DtVoI
z>BQ2M)L#=mCFbi!=%~wXCJuC>l)h-xo*vNN@hYU}#$JHJ@#kAI=(OWF+=1%DDGH0#
zw(tF!&Hte8g4>tN*pgC|a!z5Zs~xS1oYWJ(XJz*0xyUO^8nJ;e==M;L)I>)8)(^|9
z%C~FRgPI7+OFu}J>)yxCLXRjhcNApV>U!3~1*tK`vLjBJo_eR8XldBx5W~)DUvYu-
zcgR?g?_G&irmtL?5!5~d1Ne$5&<Hp9xl5K?+4Sr=Ft_*yi-yXqDAb;S*&43jRKBdP
zM_nCBpeudOQ7m!1L|u((TpOz=)_id1;Q+Is3#q)v>W(ZAtl+I<0e3W6<@QstL&rOD
z>HQP3{g$mo=RsgA1NQKAwQNSE^=kIJ4Nr;^FB%1>FB9A^G9!%nXA*8(3#e9iETQ#N
zi{PI61CY4VThNZxld%H9oxVYwo=*Fm(|1plbQP!vtcqj3)bVm~>V~50k1~_E>#$m^
z=V|DQaVckACU-A>edkwLb6_AJ+*2Lh=q@se(4xmtfd7(0!`)~{rKnWyA>MrutD0EK
z`t7!g+lr7`z3vK@ENo8DgDWyRwa0JSVS>ojpmm<SjPu_ocWahh`$*h23j?M{PP-!e
zt%`Tv*R?EP5_Vn^tM5J}r%?8F_hHmzc$KKteiILmE0yq6a-*mQTJ0UZu#<oNY9>=`
z%~_!AqcuNGXA6wh`cJSC=ofR1IQgrD)&+iqCmi8`lnp+ymIq3Un|FQpUvn${LY!>m
znoa0e40z1BDwy(?Z9X*u6Et0sUC9N<E8;3QFJ|)M=97HZpYdE>WW{-)$h!^Cjk5h(
zF#{j?-&TEK(_$M6D#F(*Api4~4+Be{r+Y5eMB0BCFZ?z8j5blYb-Uo%6<s1UIMyPC
zhgsApcaUeaE=a)QqW^#Y|7-oUKs#vgQX>i9@b(IDhbo~ewY)05xoYZ(s%Z(;Gs>%T
zo2zG^sQyht4ZNh+$cP_h8vx-2c9;TnFJlf|!mQ<L8#`q@N$~rzrjNY|q|iI2tL4<F
zY2P_-0_7#)gulzG^$Jb>pL^3Nj%?4MUQ7UG%sS`Rm4FEKcqbl_(0HBf<YP*q)9F+T
zZ<jMoc)Oh$!n@9yCA{mMdM{$K);W^{@V(AC%L^14goUZ6$xx@)fm>ug%sy577z0r@
zID(j35qi0_OIGd#hh;5O>!mb~-cF17UTvWPr9xjbO5mSt_Bh)2%EGnJNKO9cB!<7r
zw2^rSZdfa3k3@UmQEdLbigb0wtWIKhOJeK9`IRy1vx~DPM>C3(vtmr|%+$5)NP<Z~
zU^=(T`$$rb<kso|SLE2EG3woh81?71G3xydG3rmQ81-In5_x+3(ogr0Mikp-vqt}#
zRd0Z|6?Yn5k|>!4iW%1Se_gYz{#A)8TB%WtQbFKaaL)NkT$=~oJjM7-gKrJ)O~i<e
zB<F<RZSN<zo+@5G=mqc>PN*XTV_x3imH2Os%{Mr;&-hKv8A(+08ZcTPkd#jpR<CQ8
zR7^;_ZuSJZsz{|Y_SMlv!ygp|-XZ<Eor$OZ;5+v2o8RVb+S&2NS?h?Do5tI%#o!xk
z*g7Uwu%MjLcvHZWpg{d!098U_<&IEMr@|g+x>P|Xy69*^60y9&CmE=_6y$DxQDLBq
z-b;f`n_49UD-QK?%((*w+WxLSiRf<FEg9H<=p~MsxCop6oCljW?U4+Gg-=UJPV_Z1
z79%K5>gRtZr7trnKcnCOWc5I9ojxSZ9Fi)}HYe;c_eqO6EF+9%!sn3i91fk6f6vR%
zzY!IjpACOlA_m(^F(4^*nn@uqLOp^mQSCxroGoBh3UP`25oh54W+-RnHA**_0zV`l
zQ4BcFq!MPkn+!jt{od6!<ZZj9T<jTMtP3^TYXH$3IT6IL*UP|g1|D!j6!HIryvC5f
zcXAR-g7M)I;TUQnN3Wh=+!v#_%FhTVZ>hfW_|vjU`k97+q_vM^EyaZ%x*T5r8zr)8
zySf~575zjD-%v-1*ci?7Z5Y0iWDJLih3y2@6yQ^HWUu77chA7;($%$tL%|o<U2n{F
zj9Ai3wT4-5ypS{K5kA(%`O+lclqlcO=k#j6Z36NwG#q9ZDiqqLmuK2!4=y>HzIOK`
z6mmy^4EBHHp%!^e>hcFh!p^C~X~LHaza+umn+dDwBL^```>RM4{Yc;G26jcjnHl6A
zYGE%!<6JZVbKpDX3&>pc@F(%ZiE`!z4}LhYmHCSY=Qj@S!;UCVQ}_|(ZrN~!aRUim
zHdpZ9M&zXhYJ*VeLKO>vQwlxlW&&)HMPb#w7us(5PSymgl|mvyc1t&qMY*#araJ}Y
zvRRHI^fJ(ny@^Q*R{Qy3uCy4otTcGc<$85@eV0T|mcxem2GUevV2jf9kb@{Y`duX^
zJ6cq|gU~XNC{TOVGY0KeXJCkOln%jfgNyS3df;L%L}O(J7U!(+OD6o9iGIgel^P1}
zd%U;jw@dS>71eXJTN*OjUj9qIb_x7Uii(Q{;D72poF`$iY<RVmvX%lk=HC+er*(}^
zUjMM-%d|P$<-AV97Ru&4v2MiuaO2el@+Th$eLve73E~Bzq3qU48y+>cj!(N$+FqA9
zz#L!*FsN+BWP{2fHGCw_fR6|B;bV6hd<<8^hq4YnmTiHLy?*!@=|+@PQOGW5z935n
zJbtZmP}yx&6gq11zHn5X^!Ojj8dU!Nc}1b~_xWFRx=wri2l57$hb}4#J%<Xu7-<~x
z_&+N?f?Jv~iZ_NFZc@f9WfWV1l_p*V_QKoP^s&%ZDfN_#aWi9BpC(&2n8iMk=Vr!B
zADSZS73G3Be-)2Lxdv8yw&qe0RGIfS^h(MeI`lf{_`HxMr{q7%IWFEu@1GB+3B^T=
zd$tA-wPEK1^w!jS=Pbx~1t0ZXG;Rh!`dcFC4>d!>v&w7$&T9cj{v%U~0b5rRs!&2@
zs4K}Y!?`4kS;{G<QtWITV}?^{zl;bbdEu`pc|tQk8<euJ87>F?$kys9>dGd=9=rwE
z2ddV1_3*AU_L!64b96svQ4%Imj#e!C2J&ktw;h)R<(^Pz4@Ck`QfT#{8fM1?@S;TD
z4qKc_5_E7B<B<l)<N3PGAnQPp_rLitZU<lu_{S)-20QTrwH$6EZC(uYl13?Hg!sq|
z&Z3&5W*=mwt`fM2d{C6cVgqr9zAEJDA4=!DS-?(TkCOHe-Y1b*Bl5rIA*ktna;lxu
z0$zT-s-056@7ie4^-{*|0o*j)#WzE{U|`312khivX95@x-D;<Fz$5=eFLQGftdRRx
P8XPwGaVsj#-@g7E2@oG$

literal 0
HcmV?d00001

diff --git a/test/Bitcode/compatibility-3.7.ll b/test/Bitcode/compatibility-3.7.ll
new file mode 100644
index 000000000000..4ae0aed20181
--- /dev/null
+++ b/test/Bitcode/compatibility-3.7.ll
@@ -0,0 +1,1280 @@
+; Bitcode compatibility test for llvm 3.7.0
+;
+; N.b: This is 3.7-compatible IR. The CHECK lines occasionally differ from
+;      the IR used to generate the bitcode, and may need to be updated. These
+;      locations are tagged with an 'XXX'.
+
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+;         [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+;         <global | constant> <Type> [<InitializerConstant>]
+;         [, section "name"] [, comdat [($name)]] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+;                   [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+;         [cconv] [ret attrs]
+;         <ResultType> @<FunctionName> ([argument list])
+;         [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+;         [align N] [gc] [prefix Constant] [prologue Constant]
+;         [personality Constant] { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+  ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+  ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+  ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+  ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+  ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+  ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+  ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+  ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.convergent() convergent
+; CHECK: declare void @f.convergent() #4
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #5
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #6
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #7
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #8
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #9
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #10
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #11
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #12
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #13
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #14
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #15
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #16
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #17
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #18
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #19
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #20
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #21
+declare void @f.safestack() safestack
+; CHECK: declare void @f.safestack() #22
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #23
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #24
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #25
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #26
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #27
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #28
+declare void @f.thunk() "thunk"
+; CHECK: declare void @f.thunk() #29
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #30
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #31
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+  ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+  ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+  ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+  ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+  ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Prologue data
+declare void @f.prologuei32() prologue i32 1684365669
+; CHECK: declare void @f.prologuei32() prologue i32 1684365669
+declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Personality constant
+declare void @llvm.donothing() nounwind readnone
+; CHECK: declare void @llvm.donothing() #32
+define void @f.no_personality() personality i8 3 {
+; CHECK: define void @f.no_personality() personality i8 3
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+declare i32 @f.personality_handler()
+; CHECK: declare i32 @f.personality_handler()
+define void @f.personality() personality i32 ()* @f.personality_handler {
+; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i32 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+  %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  fence acquire
+  ; CHECK: fence acquire
+  fence release
+  ; CHECK: fence release
+  fence acq_rel
+  ; CHECK: fence acq_rel
+  fence singlethread seq_cst
+  ; CHECK: fence singlethread seq_cst
+
+  %ld.1 = load atomic i32, i32* %word monotonic, align 4
+  ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+  %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+  ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+  %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+  ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+  store atomic i32 23, i32* %word monotonic, align 4
+  ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+  store atomic volatile i32 24, i32* %word monotonic, align 4
+  ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+  store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+  %f.nnan = fadd nnan float %op1, %op2
+  ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+  %f.ninf = fadd ninf float %op1, %op2
+  ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+  %f.nsz = fadd nsz float %op1, %op2
+  ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+  %f.arcp = fadd arcp float %op1, %op2
+  ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+  %f.fast = fadd fast float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
+  ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+  %p0 = bitcast i8* null to i32 (i32)*
+  ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+  %p1 = bitcast i8* null to void (i8*)*
+  ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+  %p2 = bitcast i8* null to i32 (i8*, ...)*
+  ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+  %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+  ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+  %t0 = alloca i1942652
+  ; CHECK: %t0 = alloca i1942652
+  %t1 = alloca half
+  ; CHECK: %t1 = alloca half
+  %t2 = alloca float
+  ; CHECK: %t2 = alloca float
+  %t3 = alloca double
+  ; CHECK: %t3 = alloca double
+  %t4 = alloca fp128
+  ; CHECK: %t4 = alloca fp128
+  %t5 = alloca x86_fp80
+  ; CHECK: %t5 = alloca x86_fp80
+  %t6 = alloca ppc_fp128
+  ; CHECK: %t6 = alloca ppc_fp128
+  %t7 = alloca x86_mmx
+  ; CHECK: %t7 = alloca x86_mmx
+  %t8 = alloca %opaquety*
+  ; CHECK: %t8 = alloca %opaquety*
+
+  ret void
+}
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+  call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) personality i32 -10 {
+  br i1 false, label %iftrue, label %iffalse
+  ; CHECK: br i1 false, label %iftrue, label %iffalse
+  br label %iftrue
+  ; CHECK: br label %iftrue
+iftrue:
+  ret void
+  ; CHECK: ret void
+iffalse:
+
+  switch i8 %val, label %defaultdest [
+  ; CHECK: switch i8 %val, label %defaultdest [
+         i8 0, label %defaultdest.0
+         ; CHECK: i8 0, label %defaultdest.0
+         i8 1, label %defaultdest.1
+         ; CHECK: i8 1, label %defaultdest.1
+         i8 2, label %defaultdest.2
+         ; CHECK: i8 2, label %defaultdest.2
+  ]
+  ; CHECK: ]
+defaultdest:
+  ret void
+defaultdest.0:
+  ret void
+defaultdest.1:
+  ret void
+defaultdest.2:
+
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+  invoke fastcc void @f.fastcc()
+  ; CHECK: invoke fastcc void @f.fastcc()
+         to label %defaultdest unwind label %exc
+         ; CHECK: to label %defaultdest unwind label %exc
+exc:
+  %cleanup = landingpad i32 cleanup
+
+  resume i32 undef
+  ; CHECK: resume i32 undef
+  unreachable
+  ; CHECK: unreachable
+
+  ret void
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  add i8 %op1, %op2
+  ; CHECK: add i8 %op1, %op2
+  add nuw i8 %op1, %op2
+  ; CHECK: add nuw i8 %op1, %op2
+  add nsw i8 %op1, %op2
+  ; CHECK: add nsw i8 %op1, %op2
+  add nuw nsw i8 %op1, %op2
+  ; CHECK: add nuw nsw i8 %op1, %op2
+  sub i8 %op1, %op2
+  ; CHECK: sub i8 %op1, %op2
+  sub nuw i8 %op1, %op2
+  ; CHECK: sub nuw i8 %op1, %op2
+  sub nsw i8 %op1, %op2
+  ; CHECK: sub nsw i8 %op1, %op2
+  sub nuw nsw i8 %op1, %op2
+  ; CHECK: sub nuw nsw i8 %op1, %op2
+  mul i8 %op1, %op2
+  ; CHECK: mul i8 %op1, %op2
+  mul nuw i8 %op1, %op2
+  ; CHECK: mul nuw i8 %op1, %op2
+  mul nsw i8 %op1, %op2
+  ; CHECK: mul nsw i8 %op1, %op2
+  mul nuw nsw i8 %op1, %op2
+  ; CHECK: mul nuw nsw i8 %op1, %op2
+
+  ; exact
+  udiv i8 %op1, %op2
+  ; CHECK: udiv i8 %op1, %op2
+  udiv exact i8 %op1, %op2
+  ; CHECK: udiv exact i8 %op1, %op2
+  sdiv i8 %op1, %op2
+  ; CHECK: sdiv i8 %op1, %op2
+  sdiv exact i8 %op1, %op2
+  ; CHECK: sdiv exact i8 %op1, %op2
+
+  ; none
+  urem i8 %op1, %op2
+  ; CHECK: urem i8 %op1, %op2
+  srem i8 %op1, %op2
+  ; CHECK: srem i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  shl i8 %op1, %op2
+  ; CHECK: shl i8 %op1, %op2
+  shl nuw i8 %op1, %op2
+  ; CHECK: shl nuw i8 %op1, %op2
+  shl nsw i8 %op1, %op2
+  ; CHECK: shl nsw i8 %op1, %op2
+  shl nuw nsw i8 %op1, %op2
+  ; CHECK: shl nuw nsw i8 %op1, %op2
+
+  ; exact
+  lshr i8 %op1, %op2
+  ; CHECK: lshr i8 %op1, %op2
+  lshr exact i8 %op1, %op2
+  ; CHECK: lshr exact i8 %op1, %op2
+  ashr i8 %op1, %op2
+  ; CHECK: ashr i8 %op1, %op2
+  ashr exact i8 %op1, %op2
+  ; CHECK: ashr exact i8 %op1, %op2
+
+  ; none
+  and i8 %op1, %op2
+  ; CHECK: and i8 %op1, %op2
+  or i8 %op1, %op2
+  ; CHECK: or i8 %op1, %op2
+  xor i8 %op1, %op2
+  ; CHECK: xor i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+  extractelement <4 x float> %vec, i8 0
+  ; CHECK: extractelement <4 x float> %vec, i8 0
+  insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+  ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+  ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+                                       [3 x i8] %arr, { i8, { i32 }} %n,
+                                       <2 x i8*> %pvec, <2 x i64> %offsets) {
+  extractvalue { i8, i32 } %up, 0
+  ; CHECK: extractvalue { i8, i32 } %up, 0
+  extractvalue <{ i8, i32 }> %p, 1
+  ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+  extractvalue [3 x i8] %arr, 2
+  ; CHECK: extractvalue [3 x i8] %arr, 2
+  extractvalue { i8, { i32 } } %n, 1, 0
+  ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+  insertvalue { i8, i32 } %up, i8 1, 0
+  ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+  insertvalue <{ i8, i32 }> %p, i32 2, 1
+  ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+  insertvalue [3 x i8] %arr, i8 0, 0
+  ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+  insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+  ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+  %up.ptr = alloca { i8, i32 }
+  %p.ptr = alloca <{ i8, i32 }>
+  %arr.ptr = alloca [3 x i8]
+  %n.ptr = alloca { i8, { i32 } }
+
+  getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+  ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+  getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+  ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+  getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+  ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+  getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+  ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+  ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+  alloca i32, i8 4, align 4
+  ; CHECK: alloca i32, i8 4, align 4
+  alloca inalloca i32, i8 4, align 4
+  ; CHECK: alloca inalloca i32, i8 4, align 4
+
+  load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+
+  store i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+  store volatile i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+  ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+  trunc i32 -1 to i1
+  ; CHECK: trunc i32 -1 to i1
+  zext i32 -1 to i64
+  ; CHECK: zext i32 -1 to i64
+  sext i32 -1 to i64
+  ; CHECK: sext i32 -1 to i64
+  fptrunc float undef to half
+  ; CHECK: fptrunc float undef to half
+  fpext half undef to float
+  ; CHECK: fpext half undef to float
+  fptoui float undef to i32
+  ; CHECK: fptoui float undef to i32
+  fptosi float undef to i32
+  ; CHECK: fptosi float undef to i32
+  uitofp i32 1 to float
+  ; CHECK: uitofp i32 1 to float
+  sitofp i32 -1 to float
+  ; CHECK: sitofp i32 -1 to float
+  ptrtoint i8* null to i64
+  ; CHECK: ptrtoint i8* null to i64
+  inttoptr i64 0 to i8*
+  ; CHECK: inttoptr i64 0 to i8*
+  bitcast i32 0 to i32
+  ; CHECK: bitcast i32 0 to i32
+  addrspacecast i32* null to i32 addrspace(1)*
+  ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+  ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+  icmp eq  i32 %op1, %op2
+  ; CHECK: icmp eq  i32 %op1, %op2
+  icmp ne  i32 %op1, %op2
+  ; CHECK: icmp ne  i32 %op1, %op2
+  icmp ugt i32 %op1, %op2
+  ; CHECK: icmp ugt i32 %op1, %op2
+  icmp uge i32 %op1, %op2
+  ; CHECK: icmp uge i32 %op1, %op2
+  icmp ult i32 %op1, %op2
+  ; CHECK: icmp ult i32 %op1, %op2
+  icmp ule i32 %op1, %op2
+  ; CHECK: icmp ule i32 %op1, %op2
+  icmp sgt i32 %op1, %op2
+  ; CHECK: icmp sgt i32 %op1, %op2
+  icmp sge i32 %op1, %op2
+  ; CHECK: icmp sge i32 %op1, %op2
+  icmp slt i32 %op1, %op2
+  ; CHECK: icmp slt i32 %op1, %op2
+  icmp sle i32 %op1, %op2
+  ; CHECK: icmp sle i32 %op1, %op2
+
+  fcmp false half %fop1, %fop2
+  ; CHECK: fcmp false half %fop1, %fop2
+  fcmp oeq   half %fop1, %fop2
+  ; CHECK: fcmp oeq   half %fop1, %fop2
+  fcmp ogt   half %fop1, %fop2
+  ; CHECK: fcmp ogt   half %fop1, %fop2
+  fcmp oge   half %fop1, %fop2
+  ; CHECK: fcmp oge   half %fop1, %fop2
+  fcmp olt   half %fop1, %fop2
+  ; CHECK: fcmp olt   half %fop1, %fop2
+  fcmp ole   half %fop1, %fop2
+  ; CHECK: fcmp ole   half %fop1, %fop2
+  fcmp one   half %fop1, %fop2
+  ; CHECK: fcmp one   half %fop1, %fop2
+  fcmp ord   half %fop1, %fop2
+  ; CHECK: fcmp ord   half %fop1, %fop2
+  fcmp ueq   half %fop1, %fop2
+  ; CHECK: fcmp ueq   half %fop1, %fop2
+  fcmp ugt   half %fop1, %fop2
+  ; CHECK: fcmp ugt   half %fop1, %fop2
+  fcmp uge   half %fop1, %fop2
+  ; CHECK: fcmp uge   half %fop1, %fop2
+  fcmp ult   half %fop1, %fop2
+  ; CHECK: fcmp ult   half %fop1, %fop2
+  fcmp ule   half %fop1, %fop2
+  ; CHECK: fcmp ule   half %fop1, %fop2
+  fcmp une   half %fop1, %fop2
+  ; CHECK: fcmp une   half %fop1, %fop2
+  fcmp uno   half %fop1, %fop2
+  ; CHECK: fcmp uno   half %fop1, %fop2
+  fcmp true  half %fop1, %fop2
+  ; CHECK: fcmp true  half %fop1, %fop2
+
+  br label %exit
+L1:
+  %v1 = add i32 %op1, %op2
+  br label %exit
+L2:
+  %v2 = add i32 %op1, %op2
+  br label %exit
+exit:
+  phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+  ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+  select i1 true, i32 0, i32 1
+  ; CHECK: select i1 true, i32 0, i32 1
+  select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+  ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+  call void @f.nobuiltin() builtin
+  ; CHECK: call void @f.nobuiltin() #36
+
+  call fastcc noalias i32* @f.noalias() noinline
+  ; CHECK: call fastcc noalias i32* @f.noalias() #12
+  tail call ghccc nonnull i32* @f.nonnull() minsize
+  ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7
+
+  ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+  musttail call void @f.param.inalloca(i8* inalloca %val)
+  ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+  ret void
+}
+
+define void @instructions.landingpad() personality i32 -2 {
+  invoke void @llvm.donothing() to label %proceed unwind label %catch1
+  invoke void @llvm.donothing() to label %proceed unwind label %catch2
+  invoke void @llvm.donothing() to label %proceed unwind label %catch3
+  invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+  br label %proceed
+
+catch2:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch3:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch4:
+  landingpad i32
+  ; CHECK: landingpad i32
+             filter [2 x i32] zeroinitializer
+             ; CHECK: filter [2 x i32] zeroinitializer
+  br label %proceed
+
+proceed:
+  ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+
+  call void @llvm.va_start(i8* %ap2)
+  ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+  va_arg i8* %ap2, i32
+  ; CHECK: va_arg i8* %ap2, i32
+
+  call void @llvm.va_copy(i8* %v, i8* %ap2)
+  ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+  call void @llvm.va_end(i8* %ap2)
+  ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+  ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+  %ptrloc = alloca i8*
+  call void @llvm.gcroot(i8** %ptrloc, i8* null)
+  ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+  call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+  ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+  %ref = alloca i8
+  call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+  ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+  ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+  call i8* @llvm.returnaddress(i32 1)
+  ; CHECK: call i8* @llvm.returnaddress(i32 1)
+  call i8* @llvm.frameaddress(i32 1)
+  ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+  call i32 @llvm.read_register.i32(metadata !10)
+  ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+  call i64 @llvm.read_register.i64(metadata !10)
+  ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+  call void @llvm.write_register.i32(metadata !10, i32 0)
+  ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+  call void @llvm.write_register.i64(metadata !10, i64 0)
+  ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+  %stack = call i8* @llvm.stacksave()
+  ; CHECK: %stack = call i8* @llvm.stacksave()
+  call void @llvm.stackrestore(i8* %stack)
+  ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+  call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+  ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+  call void @llvm.pcmarker(i32 1)
+  ; CHECK: call void @llvm.pcmarker(i32 1)
+
+  call i64 @llvm.readcyclecounter()
+  ; CHECK: call i64 @llvm.readcyclecounter()
+
+  call void @llvm.clear_cache(i8* null, i8* null)
+  ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+  call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+  ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+  ret void
+}
+
+declare void @llvm.localescape(...)
+declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+define void @intrinsics.localescape() {
+  %static.alloca = alloca i32
+  call void (...) @llvm.localescape(i32* %static.alloca)
+  ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca)
+
+  call void @intrinsics.localrecover()
+
+  ret void
+}
+define void @intrinsics.localrecover() {
+  %func = bitcast void ()* @intrinsics.localescape to i8*
+  %fp = call i8* @llvm.frameaddress(i32 1)
+  call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+  ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+
+  ret void
+}
+
+; We need this function to provide `uses' for some metadata tests.
+define void @misc.metadata() {
+  call void @f1(), !srcloc !11
+  call void @f1(), !srcloc !12
+  call void @f1(), !srcloc !13
+  call void @f1(), !srcloc !14
+  ret void
+}
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { inlinehint }
+; CHECK: attributes #6 = { jumptable }
+; CHECK: attributes #7 = { minsize }
+; CHECK: attributes #8 = { naked }
+; CHECK: attributes #9 = { nobuiltin }
+; CHECK: attributes #10 = { noduplicate }
+; CHECK: attributes #11 = { noimplicitfloat }
+; CHECK: attributes #12 = { noinline }
+; CHECK: attributes #13 = { nonlazybind }
+; CHECK: attributes #14 = { noredzone }
+; CHECK: attributes #15 = { noreturn }
+; CHECK: attributes #16 = { nounwind }
+; CHECK: attributes #17 = { noinline optnone }
+; CHECK: attributes #18 = { optsize }
+; CHECK: attributes #19 = { readnone }
+; CHECK: attributes #20 = { readonly }
+; CHECK: attributes #21 = { returns_twice }
+; CHECK: attributes #22 = { safestack }
+; CHECK: attributes #23 = { sanitize_address }
+; CHECK: attributes #24 = { sanitize_memory }
+; CHECK: attributes #25 = { sanitize_thread }
+; CHECK: attributes #26 = { ssp }
+; CHECK: attributes #27 = { sspreq }
+; CHECK: attributes #28 = { sspstrong }
+; CHECK: attributes #29 = { "thunk" }
+; CHECK: attributes #30 = { uwtable }
+; CHECK: attributes #31 = { "cpu"="cortex-a8" }
+; CHECK: attributes #32 = { nounwind readnone }
+; CHECK: attributes #33 = { argmemonly nounwind readonly }
+; CHECK: attributes #34 = { argmemonly nounwind }
+; CHECK: attributes #35 = { nounwind readonly }
+; CHECK: attributes #36 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
+
+; Metadata -- Check `distinct'
+!11 = distinct !{}
+; CHECK: !11 = distinct !{}
+!12 = distinct !{}
+; CHECK: !12 = distinct !{}
+!13 = !{!11}
+; CHECK: !13 = !{!11}
+!14 = !{!12}
+; CHECK: !14 = !{!12}
diff --git a/test/Bitcode/compatibility-3.7.ll.bc b/test/Bitcode/compatibility-3.7.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..14c0f1a6d6f1bda8998e601301911f825864703d
GIT binary patch
literal 11584
zcmeHtdt6gjw*NlKAvr)2PE61P1U&(yps^kxNUW$Q4`{4W!=o0fb9(~O+Ky;Y$7&z;
z<Ox+&6icyoI#{dY?M&z5vya<q0_adhi)a;VuLG5~&P*RvtwZN$_3nL=2;8~<{Qmm=
z^V=UiXRp2YTHm$TUTf`rHn|oY7Y|@j!~gUY0Hgq5CUfwbRt7*3?1}f#Km_|#@W0fZ
zzTMb~&ykS1(|2G#*~AzkX*|BUlBbJArXBGnT$!sj5Y*hbTu+^~K~0;B>-K@9T+@J6
z(V)!eup3L<_5I$jHsQu%NeKmjAIeOr1E7NwYXLasra((COM-?KbQ~~v<$er*hFy`R
zpsix3*$s@zY|18_xXAa~v)C3%XP^=j<@3`>8>kCO-iie)#QivX(7j&7^|CC+NmA;v
zz=v|58hj$DjN_MzjWOVm`+z9;Ia`6bBq^IF6FpEAXKayl1k(P`eOR=eb#nh5R5Led
zQ|i7$H7VQ6x=>{{aJIT{+=I23!6i=I{|jmaWJ=u+s3HaOX(Ts@s$y;h-PR{G_Vu!L
zAyiA?54kBXs!agP#6}6K%?4*BZ^^;}-;n!~?xIFem{ONFglZU^+VZeKvYPG*3uHg%
z4nzcscZMXD@-X{;Ii3<`x2M!i3$q`XMy?FAyQ=AHVRjxW=?XXYRnuRF8-Z*ZxjNi9
zSWSN&0lLa|hk=5}+y^3>uQXRmk!rJLw|T`+eTu(>74-Mv@w9B3Wqnvkj$+WB5Hx(`
zE$lW=?=z?P#N$CqePLLu2JozRPgn>TW3Q&~u%JnfPml!<xo?C~_#7|?`bG}&IS*%|
zZA2gz%h6_K;-`Fl%_ObzI87}!qA3>3(=N}%&-waxk*vzI+(geK_QzuR+5?&RC12m`
zB&YJMFtyAhj>TdH+8depHDBLuk`MW4Stee|Xen<jmSiJNb0$ur^5iv>EwD?Ti9gF|
zD_~cOjrg21^}()!W-<u76q)#HM%xU#Qf<WFAeU6-nbA!4!LEc%{CURFE1qTb#<uN*
zEVG(vaw%iX3Igy74ZvzRm)tZWj&}NSRmR&s__*%d)1V$>Yp9R+;q%Cj4vmooYjBPg
znSq6QlyAUNmt+{W=zs1}m$VHm=rz;EAU{T-dZYOD7oXXb<+%CKx7xc>Qe7GJ@Ys~M
zd5|wlCqy$d+*pw;jW9}N<Vk{!)!q2=^@mJ%J<sAVgG?Y7QPQLaWAYlj#V%6(Eh_|G
zQis}jQ$5GA=|!#do*iiwn(jGDg(7PoE;>?*Z^oY8g=Kk9o{0~sLnhvNnsachzMbeu
z*%@j(6<Ar%(Oit!tzj59uavD1D1B9wvG25F``*JBuHQ1VdOIGkO-_!<^80zVM49wz
z=*vF7;T^7qeQaqfYoPb%YBY31nO3Jzki<KLRFsiV+Hq{sJCj7x#9zG{@;t^LI?WkR
z8W&GAwEU^GLPHQl_Y>JVg#pL2{wNu%Us&h$im^#^Meb~nP5`x$<uATNyVw=QBy+%X
z75jLRG1wAUBh7f)FIK$u2h)<&D`%C1RUb-yj~3m|Rj@B!00RXHEU&^c*4NzvWRb0x
z!dPE<g#^Q}M>NIcHGZ|VcHMzmI5(W5j0{}Is%AE+_POQK)r9nUm)GU0eUZwGJ#YBP
zJfp#E==P@U6wmUt8@kOieBzn);z#P;lBYG-_M~=dH8(=?Uae-JUvt_q{)WR=WV=47
z_>NWdvk5Ev6hE+vpFqO54T|fe;<jCJi^-hyX3hHAb#$$(MOx5Y>snsBy1903ckODb
z5&v@(&aA1Q<Ee_uH!3oh5Ao7<gYoNFX*H=BgaYk~>TX3pqxgYI7$T*vK1mIbt_0E)
zPbsyh&OCjmxR7@nK5Cy4^d5R^D5jK%d6Muap4GLO4V~uc4eh#}XfBU@<el-y<-P9I
zZ}+5r(UN-FJ-*GYX@gIPpO?B?{kx=V3F+$fwJ$99ZfL1>QMJG6-az|hN!hthQg#Oe
zp8i>Y&FPZ@Yv3*bpSDZ@T=GA~U`hMSfUgEhFeulGKpZwZJSYplYXyFOg1Cho^kQn!
zMk{X$CJ=UF$PJ2(7M#@rDeS5e!7dAL0)W**(d!rwKLG$7E>TcXOb&|SJ_hS>pcu@c
zjIas|!Kc>Ka4;&Ok!)yel#PX*y&*!h5x(Ijh`56Z@C|oCPMS^(A+Zp~P7Ps59Hams
z;bsQmuSD2j3m#>Ibw`v90Onf`hZ7Y_-LNw3L9Y^rwn4I)@Kk6tMhc~J!W`ga0bfih
zK|@?d8dJK*f)`JDqb_*BuTu6ld?P|zv`~2Mr!mL~;bMMM*xbWJpl}?>i1x@fJPcr(
z4HxR>5)h0M%)keLjmqOX*I<HCQEw0e@v)I{2neoSq(C4dS_cq_)I=!=L`UlY0<n;Q
z=7I$R@$kjM|IoA$2+dxg5l}<}4E)QCq8b1Q^u!o}2R$Ir#0LZfdh7uKftnr=5GW@M
z1e+kxv(c#{psz-n?(ZXk#$}D{idT>1N{kyRFb*AS!UbZzVu})Iz838R3Tn7FYzNdN
z+wiB7Q3lK&3AcF^+~cF*CXIsIH44r)3hwu#;L1k9O&SRopEz=MM@PXuISTIJD7cDI
zaPN+STR0N#waFvl&WwWlX%yV^qu@fL;J8t6_s2ajt!%?1;X^BGp>D2aK@^-B_?ds;
z_FU&J3}Yi)&=bU>mIwhoL5lNOgfhRGK=3f12qjQBfqWoNwqX_xq8>=ye31oNkzoQ*
zMTFowRT36354vK-G)Rt6)J-2o0!4(9Z8!~+R5(E)_kJ42Uer|PH}imqj;svSFbQCE
zL`A5)j0epTA<*j}oIOG@19KqUqzFabd;`LbjZm@;=OA1(MTFrbk-lg|(}+e94U!@~
zLPYB$gb>jyB7_jpeGx*4=<Z<+MWSHDh(bgM;VcCY5rCq`5xUrDViNKIi;^%Zf<Pv!
zpMXV2mqk!xXLQ8`4ZFfz5#^tbQUY95`!HN|Ey8f}$VdUMbrc*o3hu}#xG$mrf~{6X
zDFMz9p~7&{)EI_~E`4}*(WMt=*D?z3;wZRpqW}Wjb5TlwD;x<I&9(q`McW0NL|H^A
zb4CdXO3_h*L@6suWJOYRB$Nc@qoE|s3$T%)f(`)i0x75$>DYvHh=w9v6y`#G2Vi4Z
zAfig4#|zeI7-0?6Arw9_>E<7y4@b)g<pG!)D6Eclz_)pP)bfx(aKnA(@zaM1Ub`Ub
za<EaCJFq8ec}P+>qfU$%WE;LmuSf|LwnVG&ZGN+4nEIb^Wuv-KER6__2YpXEh2VXA
zc&+Ui!H;lP81)UqK3OtiM%2w#5ec4&iZYLX0*F|2a6yIZ6vwmR+hMWc6&JDzb+Z#P
zZU_%VMq30#^e|%?WV|uVxDq44%*a@1w}(gRb0vlbJyAh{!jA#YjtDN8qpVTGQaA5H
z&=;d)(Q=6B;n2H)jhv=&wAlU!IT6`B>eWzGpz!)A#NTUp&^e6wz_Eq1M-)2jY!9g5
zj;acn()o#h4x+>}cWY=nd^Ks>#n1-2>49gU%_>&Q%Au3C4p1ohbD!i?z!zkEpBET;
z433$*Rf~|DG-eT8X~-ytT0=$+Y=n%Dzyl>@Tn`VWkZ}mQ8AJqdWl2a9kdvq5Aq1ER
z|L2Zx&PY<ikHy%`M^E8q(E+y-jG-{vqXdt@mphsECV|IT<6mawW?@gV#;0aKOU=E3
zzSQ05A$^{j+lwkjf!bvC{AtD3t+HZJC-RP^-fxj<zCXX-m`tfN+@^<@<S%PEe2}0?
z$(XFK+8(cx<YtM-9N07|%bk|xHD&oL>8&QH64*e6bxRi88j3|Ky^(y<XStfPs0%l~
zzfEqYWhy*WWIgin_D@z&aaGMUG1c_ecUx8j;Fw)!B}VNune~v3#3}9hJH^F^9n0Tv
zc<2t|m16~mUYCp2N)@~#nkH*Btsp$|AnxA1T^>4Vd|0;1-0&_~2vR)N=3sth)!|>S
zdFF-mKU7U#lT#HxWB2Y#*{LrrDnYj*sAC?%YdZQh%hcG+I<vvoUT@;18`ewLG}p+!
z1_|H6FD+h?Co_$23#Fdhqglqp2*bQ7UE-OYX883svul0*T9@<%Lb}0P>vF*lKJmKs
zwJy3gsY|JV-)j9#!b&Cqe#Q;j6?tSw>NTyVSDV`A7{82pWd{6ag7+Us#D(x8#3!E7
zHK<<4NncnlEr4Hg@X6P@8fsmfbX}Ho-B1h^`y5~U6j574q3^s6&2HWIgNpC_6h8$O
z-!kIL&OMreJ(@1z`GrGsTC3@Br=E65E;X2^!w<D);rV{MVW-(pC!VpheJTXK>zQ#;
zJfpR}zI~T;10{tJHP9wCGysFpJ@@~lo|QgxXx@EmUXh0zz~cI*TK5{Zu+7i0M|6qT
zr)~v0rPh$BOp-f>imJcI;`<WLn3il)%^RE4t=@QDcJVMRVw86@^pv?Lhm<)bC1RP`
zV%>36Li-bRb?1A(PUh9KTIpi%)n9|sp`eG7uX<=}A0uXdjwh3R`YX=|MMDb)Y(7Tm
z&6nu>zq5DcVIfnqs(fgH#cC;6&TiFtIBavDzGq-uFip7(iaTC96O2h(!IusN=+t0e
z#VIPNp4FqXnl;wEU2`%FUBEW025aj8jjrZuP(HNf)@)10ba^YSTJ{-s?ofIC)Omc1
zl`W!P;>APd7l171ZjiC;|A+dX*aEK4tvgR%c6zY|{j*t|*?3(qKdRd=NxXOR?<cn2
zAKP=|eY!WMa!8shvs4{HgSH<wNQyuS&fPgr&FZ05B3(xjF-?Wcaq3Gvcr&3<HRqB#
z@t#UKM)`Qhr-q&kVqtQFde+i;Y%94A){a{U)tCBd{nR~=e$xe_;|JMxN0C3l(v<+V
z(Z4+YBjK5&4=h-sQs$p10-Kam{IhlOH8bbycRBd7Vf8bQsPCz!tF*QbZv57Mv~1!_
zK$b?i)6n%1oI_wQG?LA75p3SVg~bIAakB-5t;Mos8kcs9zGx?-ouB@5e7DS6RT$*1
zV4p#2O;84DuFXU!KC@YzvmWbLUA9={trb$|kmNk6Y?W1rnL6&}sJTGA$Qp2}kJA^A
zWGDHj62GZFi*-RMtB8d@oA0MA|0*SabB5Wua9+Pyc}pBjw|ck_v3ZzPCAK|!5CoiG
zV~U6i-aSWN`M0v61QM&?rdK*BXRvf=i^|@atKH(4>SbxMv&^<2u<_660?;IFh4b}`
zUP%*Hznfdv2+g5QWRzo5T47BYvs(LJ^%8skQy<-1AWe%69wI#1Qkyk>o^qnC?t*n=
z%w5*mTNNxBlDClFM9GW(`#>2S5-XK4O63+7giLqm-!8a#h+4{)4Q;ugV*@ePyowj0
z|7=bNXZU>JB_yZR_ws&qpDnLrscQ>IvR?e>50;nBQ#R*h`xEpP7Rtk2!p=wB(Bjii
zS31sPm%@e~eAMT*s+2!>Rxe&E%vNdsTJQS}42-E*j25~rwXJL@z9(ybI(Z_`vM$A*
zc+YIvaRJ2VI__ioc9Z3lrd0<$FF-1TTvnqHXIt{@pL&R(#VScA6OZHiPM@=5cB&GS
z-Dy%E_fhhid6}=lNJf6XF0~tRFR-XPGBiz9iL>ljrwEV#F}&ou=7XAm5Y#LATt|Y|
z(PLo(L1KRTyp2UfqiUD8GNPK~Ip}H@pF<oD+s$IzwDBz<;(e)7{h87nXz(7~(s)qH
z@UwMiST#6myK_N3t7e|%;@z=WRsfAh1lT-XlHvc9-r{rm&d%}ACY2?m@;rf)I(+5Y
z(*6XCMQyR@MHn6*T8yPfZ%Uotz&y4XZ-E}P$i)Wq==RiS*fJmvD3uaMc`iL^t&rWu
zDT5>oeP70|$0~<B?XqC`)kwvevW`6r^WSWd*b=8)bS><Lzf*2EFKL`8F~Hn5mY-ui
zOv@dbhW>(cdSp-N3Y%-BR+vbw=~~Y~{5h7gRQ(l;?Mtv=_A^fA6S&3;=5%m1bNUMu
ztu*ia8%Fd^=w!$(i!uVU$ryG@<CJ_DA8wEB<+NB&Nsf>N3OyObsiZ;mMvJZdqV>L0
zD{_1%WMK~jvvAW*LXig=r(!a#Hp%fC_81iXS%o#Pc`bhws{bg?+*La09*bqxTi1_O
zsgJ`9a!_LNPeoyJ0%v^CtUpUe!~w5Am%eLU+0dtJe~DA(cT_eZiSrS3MGZe}$oSx)
zyRwP%RJ`t3DvwefjHL6b=_j}8O%}$Im&6NMr_7>k3Q_l3b>b3mG55;q;323|ifO>m
zn3xDtgU3h*rW$0|^~IDcQOLa`mecVO7K?H^7*n+aD}l3WD~wO0%MKcX&IQH&@t&=V
zca{u!TE3aK6y};6A3lz9%|BN@fx6BuZbDuESXhvFkF{8g7E3w_bM4WU9oU#2Ynde$
zrrRAq*rduUtAfK*l6E?^V}6ucuNUt4jg*BhdhbgjXtIh1$)*$+Xl$oF&W$!<PdKht
zHio1uzlxMAv2gGoOSUH-N744ngJ{c~&<$NavL75nT7G-vH%QADs-J>dwyjL*tVG!(
zzDDWs3+*ebQ2U=2?}FO5{*W1i_MDe7Ya`HYrl~oVLu0fsRrPk44z(YmyRKUre~Y+g
zta=7<UEBTJQ8}mYgz6U<(9@DyU})24J$m1NJu@nL?$0>?7yFq?sSu)KxP9%XIQ8*9
zo%b`MsCt^ov3NOKGPL7%g?H%?8$-vFx(;Crf_p~hG4=7=^!cakTex$WyvL^OvDVQ)
z2RP9yXp>Gq>oi*X1+RV==~r0&CRW-%m-X7L#b}#_v9`E$C`mcJBY#mx<xubzd6w4)
zCZ_sVX*_hrv%zv94sWZ*(KZ<qTRTO<Kxds-WRaY=O-t!Ud)($KJHGnrom5VHR;}IQ
zBX*oHh+4@VpwPEH-#>Mx_4Z`TMRM~Vff0K7>~3yhN*_vV-V25cZhfFylI4ecTAx0C
z84R-B9<Dc&0%8Aqw8K&)&uM-Z9vV<KxILA1l0DAXVGO`P($6<HFNbHC9k(sGGp7e7
za6^|@XBj9PlheB#X6qfdtz`rHK$6z~6o9*Y=J*B>-h)nM3QLKSq_Fe!OZ5Kx<n{r5
z&|_<!k`cIXvBubZIaz^idbrC6D^oW2M-Ew;8fJJ~mW2%4oENam5P4NmrzINLtFK;?
zhSg7(x9YaSZwn#Lo(L|`dmqn=Z1t6CoBPG><eUVWyu(o_4eT=dA>6^cTU+m9X++>4
z6+J$vH}!{8=dKlSvx9p<XN*P3UwHxGLtFCZJ!%2%Ffc0HG3r4q2@5m^2Gw7){3qxw
zU+P5;Nh5NGxiHuo?*orXcItBYbS15b(cMXUun&Do)~g(n@vf0eY4P=yTb#FGjzgyy
zc$jKRf`_DAwlO=DnZw7bczCQD3lDxm-YD?t7oBko?X<{_exM1U9D6lPzW1t>;^B#-
z^5YTe&Q>bD`rv?NhXZacKNOZ-*ZYoYp{wRs|7CwFK-sL#1kO(dZAOY?%Z2mDUksu_
zTe*e0ST;27KkoPF33PzKq!q&l2(-A<w>fJuc&>@B)c4>-+{(HU#Hvo=81xi&qOY*o
zZwM^Nmj}uA1ew{%&Vkz)+8{3e%hJAeeVtDTN}BpyKz`)j+~)FF&vuE%KRdO2FFeO@
zhu>f5`!YTNnC&rU;bPxG1M~$6R5%tuU#sw#52SYTtl9x9usG=d-~a#GKImRWg}3V^
zz*9#qIl-)AD2$m)a|U>$g|54bvt2|yroXsGRD9hcYhA*cmWY0Y`5$%(#X$q`uQG`L
zYZ(D34Sr<n&_x=$lFP6wVoJsom5lY5B%dr9A5)rERGR58opiGF;h3^q0Io}jL@7n%
zB4#CDzM991XxLPjFQpivX~|j+v!Etop>!-KG&$FCSTT*mQR}1&!%ZvJb1@PcuaZcf
z#(1(miTZ6KFfq%x^-X{XHM@UYBo;cU3LL$Qsfrl%4p0>W(<!P#D)c8+ScU$iidY)J
z{-lb>XkeDKk+LMh!Z>MlsKL#FM#-rp0t#%DNRnYQd>1DZMI5*w2_!pVJ9=~G@Oz1w
z29yHbx1hhx6;JL%8^5WeVxw5^{Va|N4ls3OPSX#Tz~t^o&n&Ng-7q0u5}4FLbghZ4
z?Z40ztz8~FJb;l&9tgcRQ4{zi4rU@j=+qMTzsF^WZY^nZ3>|wnT6^!~Xzf3bMr$8@
z5UqW9Fj{;6$2js#-_>cuNZqnylUbejsm1>R4$AIT9}~%$>9RCS{Z~)TcYY{RM2Iq3
z548_Crl03N7}4emU-LO=01}}cRJc19BUX?+*8zyh1m9c|l(xG8K-YA|*^FU9^{XQ9
zt=@InoO+{I?|&waLOY<dSya?tP`YfFsJLIeeXw~SUotdL9KFIsve6$S;Pp`TuV2ud
z{<q7=z6pKz(wY}I*L!)&A^HfB)n(b~=}v25R6j~goB1{yt`!;wy+UJuLb%aNOcBN9
zDbvE8JAJT|?`MU-#uR7Mg-0HbBNkLQiQ1~J?&Al)2*9TIm%^r%yG3o~yZ7_V`AgZf
z?Y){fqOtmSqPCs8f6p`hV<*wJ|8&Eqm2ZjKI#7BQa%QTaZ#Y&^yd0T!SxkS)B>V&Y
zja}jdQp<!+aZ0CHd(I!T)zB)Q$zw?!SPFc03eT?4dFjod1pVtq*Zt{|7~tVr6m3i;
z2WUd_ke0+z!bbTT+7)aMFN1)M?rBx{JRweze>&~>TIMra@={HMa6fri)<#PX<03^P
zg=^D_UZ(O_(_g;WuGQR9*19woC=i$CTm8wYk3uvae8Wg*AsjW7^LvJTjoT?Q$LOBu
zmZ5D(PxcI4c(gTIUn@N;e4UB4^NznHc~IY2-7KnYC0X;#0vBC`>BiKEB&9ZG5zyv@
z_ZyXrChPhk04xqV{JS!kXJWBlnhn-r_(GD&+Dpu+C#coF;Ft`_zKqJvX;{@*Wkq{u
z%NxtSt;w`^o5vc}S=sNrp3&~2(ISlSHAsc!hI61XHgttv60GZoatpHdvNL2d_3F1L
zS|yLoIWl(X=0y~8S7D$0-()CZUo;0I+uqJC1|E932b)<BDfn9t!k7PNyX-RUEs>$@
zfW|D$X3KqC%D$joiIz(!4jO>)I)!}%EgxJbus2r9yzavH#?~^QyYS$O_B084#2l8N
zk#-g9eocZr<%Y1w-*5}geNZYmw`Bry?#YkRr@6lp9m#)8b+2FeDcfay?}b3yUhh)c
zt`hcl<ckZl$--rm?D~6oMa7fsh3J({+qW%>ld<Y62YBO5*wU2kG8Ac*jm~`{DOm&?
z)@76WKsH;LsD%>3_C(jxQCpyiIqig=ft5U^rzCwZqta6Oxr%ZbnJc-`iq)va%nDX>
zk`%VBh~Kg}Z-IZ*fPR4Xlf`30Ejymw)BJ}Ij_JI{iJzHro>)_zRQLAhx#~I5KUz3w
zApC+LiW^=7F<X=eI&ws;I33FQ1`EEVso{d!=hXj|I(7Xk!8g*_hd#czE3`Q^*)@<O
zeSQVqdfCQ^kSz2Ue-&<d5G%J0we|^jj15(>ZA=qGfOdH;CTW*<D&ZqG4L%;rg^$f9
z_~<Hy4|x@Q%wGc^+r046-H2*pSpmC%`GPdIxx8w5yS&jNE3lX4d|@v+<?=qB+Ae?X
zRapV|TJ9GOjx#QAQ%<{l_a#|@Yj@rk-8G#q@8y|?akC$zg1V5sl9VuW8QEIGLKFLe
zt-#loT;H}<Ougt}D%sl@;R)><n=bK92-4Jx|DIFH-ZjcSq1TdyjS1zA5k4F~)-B6s
z)#u8t?gJ%;ZPiCbrpI@`%iFIMu;jShU-9-!579fPLAD368D9s;zoCGF#X*PH3FhHC
zBj%?yZ(#Frh=kT2;*<wb1r`Sl^A&un@Hv8AQ8RQ5Rt_6sb|KP;8ozLwAxf9@Gg~m8
z_E1k4Y*m1+qyL<!xFzwD;4t3#kx`3dOxGCtmZX*>07iz}fRTl5XoBds7PYRz?dfEP
z3l9KWTgg*yExgI?eVQYJ8Jd5jQX(c+imVXU3H=@w>Tc^m`G1M4HJnpjl2)=SU*nD>
z)&n5k$5h%;j(vJCPK1Ic9K$p~5fZFQ*~i*ZwEizL>nS)p4>tLLDY!+bx+lLa8rNl^
zP=wZn;6$mQXA*u5ZaPS2aW+?2cEsR;VwEL=#IVf&nHQjANAf=%zn`zC)DXn2Yc}PE
z)jZ0@-OCAA-{a!$=Tr(C#0Oj)ohC__z(XU~8ov*uovg!<0=GrkVF{t61`|`sqluni
z;zE?JDl+ZWbllnS*tL72Mj_)WX()_<!bn96ErGuZ9R6Nt%C=iobe1p!2MvT@CLlZe
zP@al_8TyRSVG-C9sVD=4=ihBrF<MW84&H4<ih~9K{w52p(`TX3*2r9UigGJmi1{mC
zY8rGvelZ-gFq>9yD~TgxJ|hHwMY%2hdqDWyS@q&MJN?hRA>H)$BQ4(P4PFE9EpU0K
zao&8pSI>I$7;i4^%^|%E@P7LD_vf8yJ@d|izs`dEs5odKoC%Ph3pQJF0!9{szr3oR
xf<NW(gnmj%m*CJCu3sdQ4#)yfep)OEo<AmYAFfWqYp>?Y=12b&jrvEH{{|bE)<ggR

literal 0
HcmV?d00001

diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll
new file mode 100644
index 000000000000..31e501de0a11
--- /dev/null
+++ b/test/Bitcode/compatibility.ll
@@ -0,0 +1,1560 @@
+; Bitcode compatibility test for llvm
+;
+; Please update this file when making any IR changes. Information on the
+; release process for this file is available here:
+;
+;     http://llvm.org/docs/DeveloperPolicy.html#ir-backwards-compatibility 
+
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+; RUN-PR24755: verify-uselistorder < %s
+
+target datalayout = "E"
+; CHECK: target datalayout = "E"
+
+target triple = "x86_64-apple-macosx10.10.0"
+; CHECK: target triple = "x86_64-apple-macosx10.10.0"
+
+;; Module-level assembly
+module asm "beep boop"
+; CHECK: module asm "beep boop"
+
+;; Comdats
+$comdat.any = comdat any
+; CHECK: $comdat.any = comdat any
+$comdat.exactmatch = comdat exactmatch
+; CHECK: $comdat.exactmatch = comdat exactmatch
+$comdat.largest = comdat largest
+; CHECK: $comdat.largest = comdat largest
+$comdat.noduplicates = comdat noduplicates
+; CHECK: $comdat.noduplicates = comdat noduplicates
+$comdat.samesize = comdat samesize
+; CHECK: $comdat.samesize = comdat samesize
+
+;; Constants
+@const.true = constant i1 true
+; CHECK: @const.true = constant i1 true
+@const.false = constant i1 false
+; CHECK: @const.false = constant i1 false
+@const.int = constant i32 zeroinitializer
+; CHECK: @const.int = constant i32 0
+@const.float = constant double 0.0
+; CHECK: @const.float = constant double 0.0
+@const.null = constant i8* null
+; CHECK: @const.null = constant i8* null
+%const.struct.type = type { i32, i8 }
+%const.struct.type.packed = type <{ i32, i8 }>
+@const.struct = constant %const.struct.type { i32 -1, i8 undef }
+; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
+@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
+@const.array = constant [2 x i32] [i32 -3, i32 -4]
+; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
+@const.vector = constant <2 x i32> <i32 -5, i32 -6>
+; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+;; Global Variables
+; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
+;         [ThreadLocal] [unnamed_addr] [AddrSpace] [ExternallyInitialized]
+;         <global | constant> <Type> [<InitializerConstant>]
+;         [, section "name"] [, comdat [($name)]] [, align <Alignment>]
+
+; Global Variables -- Simple
+@g1 = global i32 0
+; CHECK: @g1 = global i32 0
+@g2 = constant i32 0
+; CHECK: @g2 = constant i32 0
+
+; Global Variables -- Linkage
+@g.private = private global i32 0
+; CHECK: @g.private = private global i32 0
+@g.internal = internal global i32 0
+; CHECK: @g.internal = internal global i32 0
+@g.available_externally = available_externally global i32 0
+; CHECK: @g.available_externally = available_externally global i32 0
+@g.linkonce = linkonce global i32 0
+; CHECK: @g.linkonce = linkonce global i32 0
+@g.weak = weak global i32 0
+; CHECK: @g.weak = weak global i32 0
+@g.common = common global i32 0
+; CHECK: @g.common = common global i32 0
+@g.appending = appending global [4 x i8] c"test"
+; CHECK: @g.appending = appending global [4 x i8] c"test"
+@g.extern_weak = extern_weak global i32
+; CHECK: @g.extern_weak = extern_weak global i32
+@g.linkonce_odr = linkonce_odr global i32 0
+; CHECK: @g.linkonce_odr = linkonce_odr global i32 0
+@g.weak_odr = weak_odr global i32 0
+; CHECK: @g.weak_odr = weak_odr global i32 0
+@g.external = external global i32
+; CHECK: @g.external = external global i32
+
+; Global Variables -- Visibility
+@g.default = default global i32 0
+; CHECK: @g.default = global i32 0
+@g.hidden = hidden global i32 0
+; CHECK: @g.hidden = hidden global i32 0
+@g.protected = protected global i32 0
+; CHECK: @g.protected = protected global i32 0
+
+; Global Variables -- DLLStorageClass
+@g.dlldefault = default global i32 0
+; CHECK: @g.dlldefault = global i32 0
+@g.dllimport = external dllimport global i32
+; CHECK: @g.dllimport = external dllimport global i32
+@g.dllexport = dllexport global i32 0
+; CHECK: @g.dllexport = dllexport global i32 0
+
+; Global Variables -- ThreadLocal
+@g.notthreadlocal = global i32 0
+; CHECK: @g.notthreadlocal = global i32 0
+@g.generaldynamic = thread_local global i32 0
+; CHECK: @g.generaldynamic = thread_local global i32 0
+@g.localdynamic = thread_local(localdynamic) global i32 0
+; CHECK: @g.localdynamic = thread_local(localdynamic) global i32 0
+@g.initialexec = thread_local(initialexec) global i32 0
+; CHECK: @g.initialexec = thread_local(initialexec) global i32 0
+@g.localexec = thread_local(localexec) global i32 0
+; CHECK: @g.localexec = thread_local(localexec) global i32 0
+
+; Global Variables -- unnamed_addr
+@g.unnamed_addr = unnamed_addr global i32 0
+; CHECK: @g.unnamed_addr = unnamed_addr global i32 0
+
+; Global Variables -- AddrSpace
+@g.addrspace = addrspace(1) global i32 0
+; CHECK: @g.addrspace = addrspace(1) global i32 0
+
+; Global Variables -- ExternallyInitialized
+@g.externally_initialized = external externally_initialized global i32
+; CHECK: @g.externally_initialized = external externally_initialized global i32
+
+; Global Variables -- section
+@g.section = global i32 0, section "_DATA"
+; CHECK: @g.section = global i32 0, section "_DATA"
+
+; Global Variables -- comdat
+@comdat.any = global i32 0, comdat
+; CHECK: @comdat.any = global i32 0, comdat
+@comdat.exactmatch = global i32 0, comdat
+; CHECK: @comdat.exactmatch = global i32 0, comdat
+@comdat.largest = global i32 0, comdat
+; CHECK: @comdat.largest = global i32 0, comdat
+@comdat.noduplicates = global i32 0, comdat
+; CHECK: @comdat.noduplicates = global i32 0, comdat
+@comdat.samesize = global i32 0, comdat
+; CHECK: @comdat.samesize = global i32 0, comdat
+
+; Force two globals from different comdats into sections with the same name.
+$comdat1 = comdat any
+$comdat2 = comdat any
+@g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+; CHECK: @g.comdat1 = global i32 0, section "SharedSection", comdat($comdat1)
+@g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+; CHECK: @g.comdat2 = global i32 0, section "SharedSection", comdat($comdat2)
+
+; Global Variables -- align
+@g.align = global i32 0, align 4
+; CHECK: @g.align = global i32 0, align 4
+
+; Global Variables -- Intrinsics
+%pri.func.data = type { i32, void ()*, i8* }
+@g.used1 = global i32 0
+@g.used2 = global i32 0
+@g.used3 = global i8 0
+declare void @g.f1()
+@llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+; CHECK: @llvm.used = appending global [1 x i32*] [i32* @g.used1], section "llvm.metadata"
+@llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [1 x i32*] [i32* @g.used2], section "llvm.metadata"
+@llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_ctors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+@llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
+
+;; Aliases
+; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
+;                   [unnamed_addr] alias <AliaseeTy> @<Aliasee>
+
+; Aliases -- Linkage
+@a.private = private alias i32, i32* @g.private
+; CHECK: @a.private = private alias i32, i32* @g.private
+@a.internal = internal alias i32, i32* @g.internal
+; CHECK: @a.internal = internal alias i32, i32* @g.internal
+@a.linkonce = linkonce alias i32, i32* @g.linkonce
+; CHECK: @a.linkonce = linkonce alias i32, i32* @g.linkonce
+@a.weak = weak alias i32, i32* @g.weak
+; CHECK: @a.weak = weak alias i32, i32* @g.weak
+@a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+; CHECK: @a.linkonce_odr = linkonce_odr alias i32, i32* @g.linkonce_odr
+@a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+; CHECK: @a.weak_odr = weak_odr alias i32, i32* @g.weak_odr
+@a.external = external alias i32, i32* @g1
+; CHECK: @a.external = alias i32, i32* @g1
+
+; Aliases -- Visibility
+@a.default = default alias i32, i32* @g.default
+; CHECK: @a.default = alias i32, i32* @g.default
+@a.hidden = hidden alias i32, i32* @g.hidden
+; CHECK: @a.hidden = hidden alias i32, i32* @g.hidden
+@a.protected = protected alias i32, i32* @g.protected
+; CHECK: @a.protected = protected alias i32, i32* @g.protected
+
+; Aliases -- DLLStorageClass
+@a.dlldefault = default alias i32, i32* @g.dlldefault
+; CHECK: @a.dlldefault = alias i32, i32* @g.dlldefault
+@a.dllimport = dllimport alias i32, i32* @g1
+; CHECK: @a.dllimport = dllimport alias i32, i32* @g1
+@a.dllexport = dllexport alias i32, i32* @g.dllexport
+; CHECK: @a.dllexport = dllexport alias i32, i32* @g.dllexport
+
+; Aliases -- ThreadLocal
+@a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+; CHECK: @a.notthreadlocal = alias i32, i32* @g.notthreadlocal
+@a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+; CHECK: @a.generaldynamic = thread_local alias i32, i32* @g.generaldynamic
+@a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+; CHECK: @a.localdynamic = thread_local(localdynamic) alias i32, i32* @g.localdynamic
+@a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+; CHECK: @a.initialexec = thread_local(initialexec) alias i32, i32* @g.initialexec
+@a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+; CHECK: @a.localexec = thread_local(localexec) alias i32, i32* @g.localexec
+
+; Aliases -- unnamed_addr
+@a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+; CHECK: @a.unnamed_addr = unnamed_addr alias i32, i32* @g.unnamed_addr
+
+;; Functions
+; Format: define [linkage] [visibility] [DLLStorageClass]
+;         [cconv] [ret attrs]
+;         <ResultType> @<FunctionName> ([argument list])
+;         [unnamed_addr] [fn Attrs] [section "name"] [comdat [($name)]]
+;         [align N] [gc] [prefix Constant] [prologue Constant]
+;         [personality Constant] { ... }
+
+; Functions -- Simple
+declare void @f1 ()
+; CHECK: declare void @f1()
+
+define void @f2 () {
+; CHECK: define void @f2()
+entry:
+  ret void
+}
+
+; Functions -- linkage
+define private void @f.private() {
+; CHECK: define private void @f.private()
+entry:
+  ret void
+}
+define internal void @f.internal() {
+; CHECK: define internal void @f.internal()
+entry:
+  ret void
+}
+define available_externally void @f.available_externally() {
+; CHECK: define available_externally void @f.available_externally()
+entry:
+  ret void
+}
+define linkonce void @f.linkonce() {
+; CHECK: define linkonce void @f.linkonce()
+entry:
+  ret void
+}
+define weak void @f.weak() {
+; CHECK: define weak void @f.weak()
+entry:
+  ret void
+}
+define linkonce_odr void @f.linkonce_odr() {
+; CHECK: define linkonce_odr void @f.linkonce_odr()
+entry:
+  ret void
+}
+define weak_odr void @f.weak_odr() {
+; CHECK: define weak_odr void @f.weak_odr()
+entry:
+  ret void
+}
+declare external void @f.external()
+; CHECK: declare void @f.external()
+declare extern_weak void @f.extern_weak()
+; CHECK: declare extern_weak void @f.extern_weak()
+
+; Functions -- visibility
+declare default void @f.default()
+; CHECK: declare void @f.default()
+declare hidden void @f.hidden()
+; CHECK: declare hidden void @f.hidden()
+declare protected void @f.protected()
+; CHECK: declare protected void @f.protected()
+
+; Functions -- DLLStorageClass
+declare dllimport void @f.dllimport()
+; CHECK: declare dllimport void @f.dllimport()
+declare dllexport void @f.dllexport()
+; CHECK: declare dllexport void @f.dllexport()
+
+; Functions -- cconv (Calling conventions)
+declare ccc void @f.ccc()
+; CHECK: declare void @f.ccc()
+declare fastcc void @f.fastcc()
+; CHECK: declare fastcc void @f.fastcc()
+declare coldcc void @f.coldcc()
+; CHECK: declare coldcc void @f.coldcc()
+declare cc10 void @f.cc10()
+; CHECK: declare ghccc void @f.cc10()
+declare ghccc void @f.ghccc()
+; CHECK: declare ghccc void @f.ghccc()
+declare cc11 void @f.cc11()
+; CHECK: declare cc11 void @f.cc11()
+declare webkit_jscc void @f.webkit_jscc()
+; CHECK: declare webkit_jscc void @f.webkit_jscc()
+declare anyregcc void @f.anyregcc()
+; CHECK: declare anyregcc void @f.anyregcc()
+declare preserve_mostcc void @f.preserve_mostcc()
+; CHECK: declare preserve_mostcc void @f.preserve_mostcc()
+declare preserve_allcc void @f.preserve_allcc()
+; CHECK: declare preserve_allcc void @f.preserve_allcc()
+declare cc64 void @f.cc64()
+; CHECK: declare x86_stdcallcc void @f.cc64()
+declare x86_stdcallcc void @f.x86_stdcallcc()
+; CHECK: declare x86_stdcallcc void @f.x86_stdcallcc()
+declare cc65 void @f.cc65()
+; CHECK: declare x86_fastcallcc void @f.cc65()
+declare x86_fastcallcc void @f.x86_fastcallcc()
+; CHECK: declare x86_fastcallcc void @f.x86_fastcallcc()
+declare cc66 void @f.cc66()
+; CHECK: declare arm_apcscc void @f.cc66()
+declare arm_apcscc void @f.arm_apcscc()
+; CHECK: declare arm_apcscc void @f.arm_apcscc()
+declare cc67 void @f.cc67()
+; CHECK: declare arm_aapcscc void @f.cc67()
+declare arm_aapcscc void @f.arm_aapcscc()
+; CHECK: declare arm_aapcscc void @f.arm_aapcscc()
+declare cc68 void @f.cc68()
+; CHECK: declare arm_aapcs_vfpcc void @f.cc68()
+declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+; CHECK: declare arm_aapcs_vfpcc void @f.arm_aapcs_vfpcc()
+declare cc69 void @f.cc69()
+; CHECK: declare msp430_intrcc void @f.cc69()
+declare msp430_intrcc void @f.msp430_intrcc()
+; CHECK: declare msp430_intrcc void @f.msp430_intrcc()
+declare cc70 void @f.cc70()
+; CHECK: declare x86_thiscallcc void @f.cc70()
+declare x86_thiscallcc void @f.x86_thiscallcc()
+; CHECK: declare x86_thiscallcc void @f.x86_thiscallcc()
+declare cc71 void @f.cc71()
+; CHECK: declare ptx_kernel void @f.cc71()
+declare ptx_kernel void @f.ptx_kernel()
+; CHECK: declare ptx_kernel void @f.ptx_kernel()
+declare cc72 void @f.cc72()
+; CHECK: declare ptx_device void @f.cc72()
+declare ptx_device void @f.ptx_device()
+; CHECK: declare ptx_device void @f.ptx_device()
+declare cc75 void @f.cc75()
+; CHECK: declare spir_func void @f.cc75()
+declare spir_func void @f.spir_func()
+; CHECK: declare spir_func void @f.spir_func()
+declare cc76 void @f.cc76()
+; CHECK: declare spir_kernel void @f.cc76()
+declare spir_kernel void @f.spir_kernel()
+; CHECK: declare spir_kernel void @f.spir_kernel()
+declare cc77 void @f.cc77()
+; CHECK: declare intel_ocl_bicc void @f.cc77()
+declare intel_ocl_bicc void @f.intel_ocl_bicc()
+; CHECK: declare intel_ocl_bicc void @f.intel_ocl_bicc()
+declare cc78 void @f.cc78()
+; CHECK: declare x86_64_sysvcc void @f.cc78()
+declare x86_64_sysvcc void @f.x86_64_sysvcc()
+; CHECK: declare x86_64_sysvcc void @f.x86_64_sysvcc()
+declare cc79 void @f.cc79()
+; CHECK: declare x86_64_win64cc void @f.cc79()
+declare x86_64_win64cc void @f.x86_64_win64cc()
+; CHECK: declare x86_64_win64cc void @f.x86_64_win64cc()
+declare cc80 void @f.cc80()
+; CHECK: declare x86_vectorcallcc void @f.cc80()
+declare x86_vectorcallcc void @f.x86_vectorcallcc()
+; CHECK: declare x86_vectorcallcc void @f.x86_vectorcallcc()
+declare cc1023 void @f.cc1023()
+; CHECK: declare cc1023 void @f.cc1023()
+
+; Functions -- ret attrs (Return attributes)
+declare zeroext i64 @f.zeroext()
+; CHECK: declare zeroext i64 @f.zeroext()
+declare signext i64 @f.signext()
+; CHECK: declare signext i64 @f.signext()
+declare inreg i32* @f.inreg()
+; CHECK: declare inreg i32* @f.inreg()
+declare noalias i32* @f.noalias()
+; CHECK: declare noalias i32* @f.noalias()
+declare nonnull i32* @f.nonnull()
+; CHECK: declare nonnull i32* @f.nonnull()
+declare dereferenceable(4) i32* @f.dereferenceable4()
+; CHECK: declare dereferenceable(4) i32* @f.dereferenceable4()
+declare dereferenceable(8) i32* @f.dereferenceable8()
+; CHECK: declare dereferenceable(8) i32* @f.dereferenceable8()
+declare dereferenceable(16) i32* @f.dereferenceable16()
+; CHECK: declare dereferenceable(16) i32* @f.dereferenceable16()
+declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+; CHECK: declare dereferenceable_or_null(4) i32* @f.dereferenceable4_or_null()
+declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+; CHECK: declare dereferenceable_or_null(8) i32* @f.dereferenceable8_or_null()
+declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+; CHECK: declare dereferenceable_or_null(16) i32* @f.dereferenceable16_or_null()
+
+; Functions -- Parameter attributes
+declare void @f.param.zeroext(i8 zeroext)
+; CHECK: declare void @f.param.zeroext(i8 zeroext)
+declare void @f.param.signext(i8 signext)
+; CHECK: declare void @f.param.signext(i8 signext)
+declare void @f.param.inreg(i8 inreg)
+; CHECK: declare void @f.param.inreg(i8 inreg)
+declare void @f.param.byval({ i8, i8 }* byval)
+; CHECK: declare void @f.param.byval({ i8, i8 }* byval)
+declare void @f.param.inalloca(i8* inalloca)
+; CHECK: declare void @f.param.inalloca(i8* inalloca)
+declare void @f.param.sret(i8* sret)
+; CHECK: declare void @f.param.sret(i8* sret)
+declare void @f.param.noalias(i8* noalias)
+; CHECK: declare void @f.param.noalias(i8* noalias)
+declare void @f.param.nocapture(i8* nocapture)
+; CHECK: declare void @f.param.nocapture(i8* nocapture)
+declare void @f.param.nest(i8* nest)
+; CHECK: declare void @f.param.nest(i8* nest)
+declare i8* @f.param.returned(i8* returned)
+; CHECK: declare i8* @f.param.returned(i8* returned)
+declare void @f.param.nonnull(i8* nonnull)
+; CHECK: declare void @f.param.nonnull(i8* nonnull)
+declare void @f.param.dereferenceable(i8* dereferenceable(4))
+; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
+declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+
+; Functions -- unnamed_addr
+declare void @f.unnamed_addr() unnamed_addr
+; CHECK: declare void @f.unnamed_addr() unnamed_addr
+
+; Functions -- fn Attrs (Function attributes)
+declare void @f.alignstack4() alignstack(4)
+; CHECK: declare void @f.alignstack4() #0
+declare void @f.alignstack8() alignstack(8)
+; CHECK: declare void @f.alignstack8() #1
+declare void @f.alwaysinline() alwaysinline
+; CHECK: declare void @f.alwaysinline() #2
+declare void @f.cold() cold
+; CHECK: declare void @f.cold() #3
+declare void @f.convergent() convergent
+; CHECK: declare void @f.convergent() #4
+declare void @f.inlinehint() inlinehint
+; CHECK: declare void @f.inlinehint() #5
+declare void @f.jumptable() unnamed_addr jumptable
+; CHECK: declare void @f.jumptable() unnamed_addr #6
+declare void @f.minsize() minsize
+; CHECK: declare void @f.minsize() #7
+declare void @f.naked() naked
+; CHECK: declare void @f.naked() #8
+declare void @f.nobuiltin() nobuiltin
+; CHECK: declare void @f.nobuiltin() #9
+declare void @f.noduplicate() noduplicate
+; CHECK: declare void @f.noduplicate() #10
+declare void @f.noimplicitfloat() noimplicitfloat
+; CHECK: declare void @f.noimplicitfloat() #11
+declare void @f.noinline() noinline
+; CHECK: declare void @f.noinline() #12
+declare void @f.nonlazybind() nonlazybind
+; CHECK: declare void @f.nonlazybind() #13
+declare void @f.noredzone() noredzone
+; CHECK: declare void @f.noredzone() #14
+declare void @f.noreturn() noreturn
+; CHECK: declare void @f.noreturn() #15
+declare void @f.nounwind() nounwind
+; CHECK: declare void @f.nounwind() #16
+declare void @f.optnone() noinline optnone
+; CHECK: declare void @f.optnone() #17
+declare void @f.optsize() optsize
+; CHECK: declare void @f.optsize() #18
+declare void @f.readnone() readnone
+; CHECK: declare void @f.readnone() #19
+declare void @f.readonly() readonly
+; CHECK: declare void @f.readonly() #20
+declare void @f.returns_twice() returns_twice
+; CHECK: declare void @f.returns_twice() #21
+declare void @f.safestack() safestack
+; CHECK: declare void @f.safestack() #22
+declare void @f.sanitize_address() sanitize_address
+; CHECK: declare void @f.sanitize_address() #23
+declare void @f.sanitize_memory() sanitize_memory
+; CHECK: declare void @f.sanitize_memory() #24
+declare void @f.sanitize_thread() sanitize_thread
+; CHECK: declare void @f.sanitize_thread() #25
+declare void @f.ssp() ssp
+; CHECK: declare void @f.ssp() #26
+declare void @f.sspreq() sspreq
+; CHECK: declare void @f.sspreq() #27
+declare void @f.sspstrong() sspstrong
+; CHECK: declare void @f.sspstrong() #28
+declare void @f.thunk() "thunk"
+; CHECK: declare void @f.thunk() #29
+declare void @f.uwtable() uwtable
+; CHECK: declare void @f.uwtable() #30
+declare void @f.kvpair() "cpu"="cortex-a8"
+; CHECK:declare void @f.kvpair() #31
+declare void @f.norecurse() norecurse
+; CHECK: declare void @f.norecurse() #32
+declare void @f.inaccessiblememonly() inaccessiblememonly
+; CHECK: declare void @f.inaccessiblememonly() #33
+declare void @f.inaccessiblemem_or_argmemonly() inaccessiblemem_or_argmemonly
+; CHECK: declare void @f.inaccessiblemem_or_argmemonly() #34
+
+; Functions -- section
+declare void @f.section() section "80"
+; CHECK: declare void @f.section() section "80"
+
+; Functions -- comdat
+define void @f.comdat_any() comdat($comdat.any) {
+; CHECK: define void @f.comdat_any() comdat($comdat.any)
+entry:
+  ret void
+}
+define void @f.comdat_exactmatch() comdat($comdat.exactmatch) {
+; CHECK: define void @f.comdat_exactmatch() comdat($comdat.exactmatch)
+entry:
+  ret void
+}
+define void @f.comdat_largest() comdat($comdat.largest) {
+; CHECK: define void @f.comdat_largest() comdat($comdat.largest)
+entry:
+  ret void
+}
+define void @f.comdat_noduplicates() comdat($comdat.noduplicates) {
+; CHECK: define void @f.comdat_noduplicates() comdat($comdat.noduplicates)
+entry:
+  ret void
+}
+define void @f.comdat_samesize() comdat($comdat.samesize) {
+; CHECK: define void @f.comdat_samesize() comdat($comdat.samesize)
+entry:
+  ret void
+}
+
+; Functions -- align
+declare void @f.align2() align 2
+; CHECK: declare void @f.align2() align 2
+declare void @f.align4() align 4
+; CHECK: declare void @f.align4() align 4
+declare void @f.align8() align 8
+; CHECK: declare void @f.align8() align 8
+
+; Functions -- GC
+declare void @f.gcshadow() gc "shadow-stack"
+; CHECK: declare void @f.gcshadow() gc "shadow-stack"
+
+; Functions -- Prefix data
+declare void @f.prefixi32() prefix i32 1684365668
+; CHECK: declare void @f.prefixi32() prefix i32 1684365668
+declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prefixarray() prefix [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Prologue data
+declare void @f.prologuei32() prologue i32 1684365669
+; CHECK: declare void @f.prologuei32() prologue i32 1684365669
+declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+; CHECK: declare void @f.prologuearray() prologue [4 x i32] [i32 0, i32 1, i32 2, i32 3]
+
+; Functions -- Personality constant
+declare void @llvm.donothing() nounwind readnone
+; CHECK: declare void @llvm.donothing() #35
+define void @f.no_personality() personality i8 3 {
+; CHECK: define void @f.no_personality() personality i8 3
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+declare i32 @f.personality_handler()
+; CHECK: declare i32 @f.personality_handler()
+define void @f.personality() personality i32 ()* @f.personality_handler {
+; CHECK: define void @f.personality() personality i32 ()* @f.personality_handler
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i32 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+;; Atomic Memory Ordering Constraints
+define void @atomics(i32* %word) {
+  %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  ; CHECK: %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 4 monotonic monotonic
+  %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  ; CHECK: %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 5 acq_rel monotonic
+  %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  ; CHECK: %cmpxchg.2 = cmpxchg i32* %word, i32 0, i32 6 acquire monotonic
+  %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  ; CHECK: %cmpxchg.3 = cmpxchg i32* %word, i32 0, i32 7 release monotonic
+  %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  ; CHECK: %cmpxchg.4 = cmpxchg i32* %word, i32 0, i32 8 seq_cst monotonic
+  %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  ; CHECK: %cmpxchg.5 = cmpxchg weak i32* %word, i32 0, i32 9 seq_cst monotonic
+  %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  ; CHECK: %cmpxchg.6 = cmpxchg volatile i32* %word, i32 0, i32 10 seq_cst monotonic
+  %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  ; CHECK: %cmpxchg.7 = cmpxchg weak volatile i32* %word, i32 0, i32 11 singlethread seq_cst monotonic
+  %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  ; CHECK: %atomicrmw.xchg = atomicrmw xchg i32* %word, i32 12 monotonic
+  %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  ; CHECK: %atomicrmw.add = atomicrmw add i32* %word, i32 13 monotonic
+  %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  ; CHECK: %atomicrmw.sub = atomicrmw sub i32* %word, i32 14 monotonic
+  %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  ; CHECK: %atomicrmw.and = atomicrmw and i32* %word, i32 15 monotonic
+  %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  ; CHECK: %atomicrmw.nand = atomicrmw nand i32* %word, i32 16 monotonic
+  %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  ; CHECK: %atomicrmw.or = atomicrmw or i32* %word, i32 17 monotonic
+  %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  ; CHECK: %atomicrmw.xor = atomicrmw xor i32* %word, i32 18 monotonic
+  %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  ; CHECK: %atomicrmw.max = atomicrmw max i32* %word, i32 19 monotonic
+  %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  ; CHECK: %atomicrmw.min = atomicrmw volatile min i32* %word, i32 20 monotonic
+  %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  ; CHECK: %atomicrmw.umax = atomicrmw umax i32* %word, i32 21 singlethread monotonic
+  %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  ; CHECK: %atomicrmw.umin = atomicrmw volatile umin i32* %word, i32 22 singlethread monotonic
+  fence acquire
+  ; CHECK: fence acquire
+  fence release
+  ; CHECK: fence release
+  fence acq_rel
+  ; CHECK: fence acq_rel
+  fence singlethread seq_cst
+  ; CHECK: fence singlethread seq_cst
+
+  %ld.1 = load atomic i32, i32* %word monotonic, align 4
+  ; CHECK: %ld.1 = load atomic i32, i32* %word monotonic, align 4
+  %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+  ; CHECK: %ld.2 = load atomic volatile i32, i32* %word acquire, align 8
+  %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+  ; CHECK: %ld.3 = load atomic volatile i32, i32* %word singlethread seq_cst, align 16
+
+  store atomic i32 23, i32* %word monotonic, align 4
+  ; CHECK: store atomic i32 23, i32* %word monotonic, align 4
+  store atomic volatile i32 24, i32* %word monotonic, align 4
+  ; CHECK: store atomic volatile i32 24, i32* %word monotonic, align 4
+  store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ; CHECK: store atomic volatile i32 25, i32* %word singlethread monotonic, align 4
+  ret void
+}
+
+;; Fast Math Flags
+define void @fastmathflags(float %op1, float %op2) {
+  %f.nnan = fadd nnan float %op1, %op2
+  ; CHECK: %f.nnan = fadd nnan float %op1, %op2
+  %f.ninf = fadd ninf float %op1, %op2
+  ; CHECK: %f.ninf = fadd ninf float %op1, %op2
+  %f.nsz = fadd nsz float %op1, %op2
+  ; CHECK: %f.nsz = fadd nsz float %op1, %op2
+  %f.arcp = fadd arcp float %op1, %op2
+  ; CHECK: %f.arcp = fadd arcp float %op1, %op2
+  %f.fast = fadd fast float %op1, %op2
+  ; CHECK: %f.fast = fadd fast float %op1, %op2
+  ret void
+}
+
+; Check various fast math flags and floating-point types on calls.
+
+declare float @fmf1()
+declare double @fmf2()
+declare <4 x double> @fmf3()
+
+; CHECK-LABEL: fastMathFlagsForCalls(
+define void @fastMathFlagsForCalls(float %f, double %d1, <4 x double> %d2) {
+  %call.fast = call fast float @fmf1()
+  ; CHECK: %call.fast = call fast float @fmf1()
+
+  ; Throw in some other attributes to make sure those stay in the right places.
+
+  %call.nsz.arcp = notail call nsz arcp double @fmf2()
+  ; CHECK: %call.nsz.arcp = notail call nsz arcp double @fmf2()
+
+  %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3()
+  ; CHECK: %call.nnan.ninf = tail call nnan ninf fastcc <4 x double> @fmf3()
+
+  ret void
+}
+
+;; Type System
+%opaquety = type opaque
+define void @typesystem() {
+  %p0 = bitcast i8* null to i32 (i32)*
+  ; CHECK: %p0 = bitcast i8* null to i32 (i32)*
+  %p1 = bitcast i8* null to void (i8*)*
+  ; CHECK: %p1 = bitcast i8* null to void (i8*)*
+  %p2 = bitcast i8* null to i32 (i8*, ...)*
+  ; CHECK: %p2 = bitcast i8* null to i32 (i8*, ...)*
+  %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  ; CHECK: %p3 = bitcast i8* null to { i32, i8 } (i8*, ...)*
+  %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  ; CHECK: %p4 = bitcast i8* null to <{ i32, i8 }> (i8*, ...)*
+  %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+  ; CHECK: %p5 = bitcast i8* null to <{ i32, i8 }> (<{ i8*, i64 }>*, ...)*
+
+  %t0 = alloca i1942652
+  ; CHECK: %t0 = alloca i1942652
+  %t1 = alloca half
+  ; CHECK: %t1 = alloca half
+  %t2 = alloca float
+  ; CHECK: %t2 = alloca float
+  %t3 = alloca double
+  ; CHECK: %t3 = alloca double
+  %t4 = alloca fp128
+  ; CHECK: %t4 = alloca fp128
+  %t5 = alloca x86_fp80
+  ; CHECK: %t5 = alloca x86_fp80
+  %t6 = alloca ppc_fp128
+  ; CHECK: %t6 = alloca ppc_fp128
+  %t7 = alloca x86_mmx
+  ; CHECK: %t7 = alloca x86_mmx
+  %t8 = alloca %opaquety*
+  ; CHECK: %t8 = alloca %opaquety*
+
+  ret void
+}
+
+declare void @llvm.token(token)
+; CHECK: declare void @llvm.token(token)
+
+;; Inline Assembler Expressions
+define void @inlineasm(i32 %arg) {
+  call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  ; CHECK: call i32 asm "bswap $0", "=r,r"(i32 %arg)
+  call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ; CHECK: call i32 asm sideeffect "blt $1, $2, $3", "=r,r,rm"(i32 %arg, i32 %arg)
+  ret void
+}
+
+;; Instructions
+
+; Instructions -- Terminators
+define void @instructions.terminators(i8 %val) personality i32 -10 {
+  br i1 false, label %iftrue, label %iffalse
+  ; CHECK: br i1 false, label %iftrue, label %iffalse
+  br label %iftrue
+  ; CHECK: br label %iftrue
+iftrue:
+  ret void
+  ; CHECK: ret void
+iffalse:
+
+  switch i8 %val, label %defaultdest [
+  ; CHECK: switch i8 %val, label %defaultdest [
+         i8 0, label %defaultdest.0
+         ; CHECK: i8 0, label %defaultdest.0
+         i8 1, label %defaultdest.1
+         ; CHECK: i8 1, label %defaultdest.1
+         i8 2, label %defaultdest.2
+         ; CHECK: i8 2, label %defaultdest.2
+  ]
+  ; CHECK: ]
+defaultdest:
+  ret void
+defaultdest.0:
+  ret void
+defaultdest.1:
+  ret void
+defaultdest.2:
+
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2]
+  indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+  ; CHECK: indirectbr i8* blockaddress(@instructions.terminators, %defaultdest.2), [label %defaultdest.2, label %defaultdest.2]
+
+  invoke fastcc void @f.fastcc()
+  ; CHECK: invoke fastcc void @f.fastcc()
+         to label %defaultdest unwind label %exc
+         ; CHECK: to label %defaultdest unwind label %exc
+exc:
+  %cleanup = landingpad i32 cleanup
+
+  resume i32 undef
+  ; CHECK: resume i32 undef
+  unreachable
+  ; CHECK: unreachable
+
+  ret void
+}
+
+define i32 @instructions.win_eh.1() personality i32 -3 {
+entry:
+  %arg1 = alloca i32
+  %arg2 = alloca i32
+  invoke void @f.ccc() to label %normal unwind label %catchswitch1
+  invoke void @f.ccc() to label %normal unwind label %catchswitch2
+  invoke void @f.ccc() to label %normal unwind label %catchswitch3
+
+catchswitch1:
+  %cs1 = catchswitch within none [label %catchpad1] unwind to caller
+
+catchpad1:
+  catchpad within %cs1 []
+  br label %normal
+  ; CHECK: catchpad within %cs1 []
+  ; CHECK-NEXT: br label %normal
+
+catchswitch2:
+  %cs2 = catchswitch within none [label %catchpad2] unwind to caller
+
+catchpad2:
+  catchpad within %cs2 [i32* %arg1]
+  br label %normal
+  ; CHECK: catchpad within %cs2 [i32* %arg1]
+  ; CHECK-NEXT: br label %normal
+
+catchswitch3:
+  %cs3 = catchswitch within none [label %catchpad3] unwind label %cleanuppad1
+
+catchpad3:
+  catchpad within %cs3 [i32* %arg1, i32* %arg2]
+  br label %normal
+  ; CHECK: catchpad within %cs3 [i32* %arg1, i32* %arg2]
+  ; CHECK-NEXT: br label %normal
+
+cleanuppad1:
+  %clean.1 = cleanuppad within none []
+  unreachable
+  ; CHECK: %clean.1 = cleanuppad within none []
+  ; CHECK-NEXT: unreachable
+
+normal:
+  ret i32 0
+}
+;
+define i32 @instructions.win_eh.2() personality i32 -4 {
+entry:
+  invoke void @f.ccc() to label %invoke.cont unwind label %catchswitch
+
+invoke.cont:
+  invoke void @f.ccc() to label %continue unwind label %cleanup
+
+cleanup:
+  %clean = cleanuppad within none []
+  ; CHECK: %clean = cleanuppad within none []
+  cleanupret from %clean unwind to caller
+  ; CHECK: cleanupret from %clean unwind to caller
+
+catchswitch:
+  %cs = catchswitch within none [label %catchpad] unwind label %terminate
+
+catchpad:
+  %catch = catchpad within %cs []
+  br label %body
+  ; CHECK: %catch = catchpad within %cs []
+  ; CHECK-NEXT: br label %body
+
+body:
+  invoke void @f.ccc() to label %continue unwind label %terminate
+  catchret from %catch to label %return
+  ; CHECK: catchret from %catch to label %return
+
+return:
+  ret i32 0
+
+terminate:
+  cleanuppad within %cs []
+  unreachable
+  ; CHECK: cleanuppad within %cs []
+  ; CHECK-NEXT: unreachable
+
+continue:
+  ret i32 0
+}
+
+; Instructions -- Binary Operations
+define void @instructions.binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  add i8 %op1, %op2
+  ; CHECK: add i8 %op1, %op2
+  add nuw i8 %op1, %op2
+  ; CHECK: add nuw i8 %op1, %op2
+  add nsw i8 %op1, %op2
+  ; CHECK: add nsw i8 %op1, %op2
+  add nuw nsw i8 %op1, %op2
+  ; CHECK: add nuw nsw i8 %op1, %op2
+  sub i8 %op1, %op2
+  ; CHECK: sub i8 %op1, %op2
+  sub nuw i8 %op1, %op2
+  ; CHECK: sub nuw i8 %op1, %op2
+  sub nsw i8 %op1, %op2
+  ; CHECK: sub nsw i8 %op1, %op2
+  sub nuw nsw i8 %op1, %op2
+  ; CHECK: sub nuw nsw i8 %op1, %op2
+  mul i8 %op1, %op2
+  ; CHECK: mul i8 %op1, %op2
+  mul nuw i8 %op1, %op2
+  ; CHECK: mul nuw i8 %op1, %op2
+  mul nsw i8 %op1, %op2
+  ; CHECK: mul nsw i8 %op1, %op2
+  mul nuw nsw i8 %op1, %op2
+  ; CHECK: mul nuw nsw i8 %op1, %op2
+
+  ; exact
+  udiv i8 %op1, %op2
+  ; CHECK: udiv i8 %op1, %op2
+  udiv exact i8 %op1, %op2
+  ; CHECK: udiv exact i8 %op1, %op2
+  sdiv i8 %op1, %op2
+  ; CHECK: sdiv i8 %op1, %op2
+  sdiv exact i8 %op1, %op2
+  ; CHECK: sdiv exact i8 %op1, %op2
+
+  ; none
+  urem i8 %op1, %op2
+  ; CHECK: urem i8 %op1, %op2
+  srem i8 %op1, %op2
+  ; CHECK: srem i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Bitwise Binary Operations
+define void @instructions.bitwise_binops(i8 %op1, i8 %op2) {
+  ; nuw x nsw
+  shl i8 %op1, %op2
+  ; CHECK: shl i8 %op1, %op2
+  shl nuw i8 %op1, %op2
+  ; CHECK: shl nuw i8 %op1, %op2
+  shl nsw i8 %op1, %op2
+  ; CHECK: shl nsw i8 %op1, %op2
+  shl nuw nsw i8 %op1, %op2
+  ; CHECK: shl nuw nsw i8 %op1, %op2
+
+  ; exact
+  lshr i8 %op1, %op2
+  ; CHECK: lshr i8 %op1, %op2
+  lshr exact i8 %op1, %op2
+  ; CHECK: lshr exact i8 %op1, %op2
+  ashr i8 %op1, %op2
+  ; CHECK: ashr i8 %op1, %op2
+  ashr exact i8 %op1, %op2
+  ; CHECK: ashr exact i8 %op1, %op2
+
+  ; none
+  and i8 %op1, %op2
+  ; CHECK: and i8 %op1, %op2
+  or i8 %op1, %op2
+  ; CHECK: or i8 %op1, %op2
+  xor i8 %op1, %op2
+  ; CHECK: xor i8 %op1, %op2
+
+  ret void
+}
+
+; Instructions -- Vector Operations
+define void @instructions.vectorops(<4 x float> %vec, <4 x float> %vec2) {
+  extractelement <4 x float> %vec, i8 0
+  ; CHECK: extractelement <4 x float> %vec, i8 0
+  insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  ; CHECK: insertelement <4 x float> %vec, float 3.500000e+00, i8 0
+  shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+  ; CHECK: shufflevector <4 x float> %vec, <4 x float> %vec2, <2 x i32> zeroinitializer
+
+  ret void
+}
+
+; Instructions -- Aggregate Operations
+define void @instructions.aggregateops({ i8, i32 } %up, <{ i8, i32 }> %p,
+                                       [3 x i8] %arr, { i8, { i32 }} %n,
+                                       <2 x i8*> %pvec, <2 x i64> %offsets) {
+  extractvalue { i8, i32 } %up, 0
+  ; CHECK: extractvalue { i8, i32 } %up, 0
+  extractvalue <{ i8, i32 }> %p, 1
+  ; CHECK: extractvalue <{ i8, i32 }> %p, 1
+  extractvalue [3 x i8] %arr, 2
+  ; CHECK: extractvalue [3 x i8] %arr, 2
+  extractvalue { i8, { i32 } } %n, 1, 0
+  ; CHECK: extractvalue { i8, { i32 } } %n, 1, 0
+
+  insertvalue { i8, i32 } %up, i8 1, 0
+  ; CHECK: insertvalue { i8, i32 } %up, i8 1, 0
+  insertvalue <{ i8, i32 }> %p, i32 2, 1
+  ; CHECK: insertvalue <{ i8, i32 }> %p, i32 2, 1
+  insertvalue [3 x i8] %arr, i8 0, 0
+  ; CHECK: insertvalue [3 x i8] %arr, i8 0, 0
+  insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+  ; CHECK: insertvalue { i8, { i32 } } %n, i32 0, 1, 0
+
+  %up.ptr = alloca { i8, i32 }
+  %p.ptr = alloca <{ i8, i32 }>
+  %arr.ptr = alloca [3 x i8]
+  %n.ptr = alloca { i8, { i32 } }
+
+  getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+  ; CHECK: getelementptr { i8, i32 }, { i8, i32 }* %up.ptr, i8 0
+  getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+  ; CHECK: getelementptr <{ i8, i32 }>, <{ i8, i32 }>* %p.ptr, i8 1
+  getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+  ; CHECK: getelementptr [3 x i8], [3 x i8]* %arr.ptr, i8 2
+  getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  ; CHECK: getelementptr { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 0, i32 1
+  getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  ; CHECK: getelementptr inbounds { i8, { i32 } }, { i8, { i32 } }* %n.ptr, i32 1, i32 0
+  getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+  ; CHECK: getelementptr i8, <2 x i8*> %pvec, <2 x i64> %offsets
+
+  ret void
+}
+
+; Instructions -- Memory Access and Addressing Operations
+!7 = !{i32 1}
+!8 = !{}
+!9 = !{i64 4}
+define void @instructions.memops(i32** %base) {
+  alloca i32, i8 4, align 4
+  ; CHECK: alloca i32, i8 4, align 4
+  alloca inalloca i32, i8 4, align 4
+  ; CHECK: alloca inalloca i32, i8 4, align 4
+
+  load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  ; CHECK: load i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+  ; CHECK: load volatile i32*, i32** %base, align 8, !invariant.load !7, !nontemporal !8, !nonnull !7, !dereferenceable !9, !dereferenceable_or_null !9
+
+  store i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store i32* null, i32** %base, align 4, !nontemporal !8
+  store volatile i32* null, i32** %base, align 4, !nontemporal !8
+  ; CHECK: store volatile i32* null, i32** %base, align 4, !nontemporal !8
+
+  ret void
+}
+
+; Instructions -- Conversion Operations
+define void @instructions.conversions() {
+  trunc i32 -1 to i1
+  ; CHECK: trunc i32 -1 to i1
+  zext i32 -1 to i64
+  ; CHECK: zext i32 -1 to i64
+  sext i32 -1 to i64
+  ; CHECK: sext i32 -1 to i64
+  fptrunc float undef to half
+  ; CHECK: fptrunc float undef to half
+  fpext half undef to float
+  ; CHECK: fpext half undef to float
+  fptoui float undef to i32
+  ; CHECK: fptoui float undef to i32
+  fptosi float undef to i32
+  ; CHECK: fptosi float undef to i32
+  uitofp i32 1 to float
+  ; CHECK: uitofp i32 1 to float
+  sitofp i32 -1 to float
+  ; CHECK: sitofp i32 -1 to float
+  ptrtoint i8* null to i64
+  ; CHECK: ptrtoint i8* null to i64
+  inttoptr i64 0 to i8*
+  ; CHECK: inttoptr i64 0 to i8*
+  bitcast i32 0 to i32
+  ; CHECK: bitcast i32 0 to i32
+  addrspacecast i32* null to i32 addrspace(1)*
+  ; CHECK: addrspacecast i32* null to i32 addrspace(1)*
+
+  ret void
+}
+
+; Instructions -- Other Operations
+define void @instructions.other(i32 %op1, i32 %op2, half %fop1, half %fop2) {
+entry:
+  icmp eq  i32 %op1, %op2
+  ; CHECK: icmp eq  i32 %op1, %op2
+  icmp ne  i32 %op1, %op2
+  ; CHECK: icmp ne  i32 %op1, %op2
+  icmp ugt i32 %op1, %op2
+  ; CHECK: icmp ugt i32 %op1, %op2
+  icmp uge i32 %op1, %op2
+  ; CHECK: icmp uge i32 %op1, %op2
+  icmp ult i32 %op1, %op2
+  ; CHECK: icmp ult i32 %op1, %op2
+  icmp ule i32 %op1, %op2
+  ; CHECK: icmp ule i32 %op1, %op2
+  icmp sgt i32 %op1, %op2
+  ; CHECK: icmp sgt i32 %op1, %op2
+  icmp sge i32 %op1, %op2
+  ; CHECK: icmp sge i32 %op1, %op2
+  icmp slt i32 %op1, %op2
+  ; CHECK: icmp slt i32 %op1, %op2
+  icmp sle i32 %op1, %op2
+  ; CHECK: icmp sle i32 %op1, %op2
+
+  fcmp false half %fop1, %fop2
+  ; CHECK: fcmp false half %fop1, %fop2
+  fcmp oeq   half %fop1, %fop2
+  ; CHECK: fcmp oeq   half %fop1, %fop2
+  fcmp ogt   half %fop1, %fop2
+  ; CHECK: fcmp ogt   half %fop1, %fop2
+  fcmp oge   half %fop1, %fop2
+  ; CHECK: fcmp oge   half %fop1, %fop2
+  fcmp olt   half %fop1, %fop2
+  ; CHECK: fcmp olt   half %fop1, %fop2
+  fcmp ole   half %fop1, %fop2
+  ; CHECK: fcmp ole   half %fop1, %fop2
+  fcmp one   half %fop1, %fop2
+  ; CHECK: fcmp one   half %fop1, %fop2
+  fcmp ord   half %fop1, %fop2
+  ; CHECK: fcmp ord   half %fop1, %fop2
+  fcmp ueq   half %fop1, %fop2
+  ; CHECK: fcmp ueq   half %fop1, %fop2
+  fcmp ugt   half %fop1, %fop2
+  ; CHECK: fcmp ugt   half %fop1, %fop2
+  fcmp uge   half %fop1, %fop2
+  ; CHECK: fcmp uge   half %fop1, %fop2
+  fcmp ult   half %fop1, %fop2
+  ; CHECK: fcmp ult   half %fop1, %fop2
+  fcmp ule   half %fop1, %fop2
+  ; CHECK: fcmp ule   half %fop1, %fop2
+  fcmp une   half %fop1, %fop2
+  ; CHECK: fcmp une   half %fop1, %fop2
+  fcmp uno   half %fop1, %fop2
+  ; CHECK: fcmp uno   half %fop1, %fop2
+  fcmp true  half %fop1, %fop2
+  ; CHECK: fcmp true  half %fop1, %fop2
+
+  br label %exit
+L1:
+  %v1 = add i32 %op1, %op2
+  br label %exit
+L2:
+  %v2 = add i32 %op1, %op2
+  br label %exit
+exit:
+  phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+  ; CHECK: phi i32 [ %v1, %L1 ], [ %v2, %L2 ], [ %op1, %entry ]
+
+  select i1 true, i32 0, i32 1
+  ; CHECK: select i1 true, i32 0, i32 1
+  select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+  ; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x i8> <i8 2, i8 3>, <2 x i8> <i8 3, i8 2>
+
+  call void @f.nobuiltin() builtin
+  ; CHECK: call void @f.nobuiltin() #39
+
+  call fastcc noalias i32* @f.noalias() noinline
+  ; CHECK: call fastcc noalias i32* @f.noalias() #12
+  tail call ghccc nonnull i32* @f.nonnull() minsize
+  ; CHECK: tail call ghccc nonnull i32* @f.nonnull() #7
+
+  ret void
+}
+
+define void @instructions.call_musttail(i8* inalloca %val) {
+  musttail call void @f.param.inalloca(i8* inalloca %val)
+  ; CHECK: musttail call void @f.param.inalloca(i8* inalloca %val)
+
+  ret void
+}
+
+define void @instructions.call_notail() {
+  notail call void @f1()
+  ; CHECK: notail call void @f1()
+
+  ret void
+}
+
+define void @instructions.landingpad() personality i32 -2 {
+  invoke void @llvm.donothing() to label %proceed unwind label %catch1
+  invoke void @llvm.donothing() to label %proceed unwind label %catch2
+  invoke void @llvm.donothing() to label %proceed unwind label %catch3
+  invoke void @llvm.donothing() to label %proceed unwind label %catch4
+
+catch1:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+  br label %proceed
+
+catch2:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch3:
+  landingpad i32
+  ; CHECK: landingpad i32
+             cleanup
+             ; CHECK: cleanup
+             catch i32* null
+             ; CHECK: catch i32* null
+             catch i32* null
+             ; CHECK: catch i32* null
+  br label %proceed
+
+catch4:
+  landingpad i32
+  ; CHECK: landingpad i32
+             filter [2 x i32] zeroinitializer
+             ; CHECK: filter [2 x i32] zeroinitializer
+  br label %proceed
+
+proceed:
+  ret void
+}
+
+;; Intrinsic Functions
+
+; Intrinsic Functions -- Variable Argument Handling
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+define void @instructions.va_arg(i8* %v, ...) {
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+
+  call void @llvm.va_start(i8* %ap2)
+  ; CHECK: call void @llvm.va_start(i8* %ap2)
+
+  va_arg i8* %ap2, i32
+  ; CHECK: va_arg i8* %ap2, i32
+
+  call void @llvm.va_copy(i8* %v, i8* %ap2)
+  ; CHECK: call void @llvm.va_copy(i8* %v, i8* %ap2)
+
+  call void @llvm.va_end(i8* %ap2)
+  ; CHECK: call void @llvm.va_end(i8* %ap2)
+
+  ret void
+}
+
+; Intrinsic Functions -- Accurate Garbage Collection
+declare void @llvm.gcroot(i8**, i8*)
+declare i8* @llvm.gcread(i8*, i8**)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+define void @intrinsics.gc() gc "shadow-stack" {
+  %ptrloc = alloca i8*
+  call void @llvm.gcroot(i8** %ptrloc, i8* null)
+  ; CHECK: call void @llvm.gcroot(i8** %ptrloc, i8* null)
+
+  call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+  ; CHECK: call i8* @llvm.gcread(i8* null, i8** %ptrloc)
+
+  %ref = alloca i8
+  call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+  ; CHECK: call void @llvm.gcwrite(i8* %ref, i8* null, i8** %ptrloc)
+
+  ret void
+}
+
+; Intrinsic Functions -- Code Generation
+declare i8* @llvm.returnaddress(i32)
+declare i8* @llvm.frameaddress(i32)
+declare i32 @llvm.read_register.i32(metadata)
+declare i64 @llvm.read_register.i64(metadata)
+declare void @llvm.write_register.i32(metadata, i32)
+declare void @llvm.write_register.i64(metadata, i64)
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.prefetch(i8*, i32, i32, i32)
+declare void @llvm.pcmarker(i32)
+declare i64 @llvm.readcyclecounter()
+declare void @llvm.clear_cache(i8*, i8*)
+declare void @llvm.instrprof_increment(i8*, i64, i32, i32)
+
+!10 = !{!"rax"}
+define void @intrinsics.codegen() {
+  call i8* @llvm.returnaddress(i32 1)
+  ; CHECK: call i8* @llvm.returnaddress(i32 1)
+  call i8* @llvm.frameaddress(i32 1)
+  ; CHECK: call i8* @llvm.frameaddress(i32 1)
+
+  call i32 @llvm.read_register.i32(metadata !10)
+  ; CHECK: call i32 @llvm.read_register.i32(metadata !10)
+  call i64 @llvm.read_register.i64(metadata !10)
+  ; CHECK: call i64 @llvm.read_register.i64(metadata !10)
+  call void @llvm.write_register.i32(metadata !10, i32 0)
+  ; CHECK: call void @llvm.write_register.i32(metadata !10, i32 0)
+  call void @llvm.write_register.i64(metadata !10, i64 0)
+  ; CHECK: call void @llvm.write_register.i64(metadata !10, i64 0)
+
+  %stack = call i8* @llvm.stacksave()
+  ; CHECK: %stack = call i8* @llvm.stacksave()
+  call void @llvm.stackrestore(i8* %stack)
+  ; CHECK: call void @llvm.stackrestore(i8* %stack)
+
+  call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+  ; CHECK: call void @llvm.prefetch(i8* %stack, i32 0, i32 3, i32 0)
+
+  call void @llvm.pcmarker(i32 1)
+  ; CHECK: call void @llvm.pcmarker(i32 1)
+
+  call i64 @llvm.readcyclecounter()
+  ; CHECK: call i64 @llvm.readcyclecounter()
+
+  call void @llvm.clear_cache(i8* null, i8* null)
+  ; CHECK: call void @llvm.clear_cache(i8* null, i8* null)
+
+  call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+  ; CHECK: call void @llvm.instrprof_increment(i8* null, i64 0, i32 0, i32 0)
+
+  ret void
+}
+
+declare void @llvm.localescape(...)
+declare i8* @llvm.localrecover(i8* %func, i8* %fp, i32 %idx)
+define void @intrinsics.localescape() {
+  %static.alloca = alloca i32
+  call void (...) @llvm.localescape(i32* %static.alloca)
+  ; CHECK: call void (...) @llvm.localescape(i32* %static.alloca)
+
+  call void @intrinsics.localrecover()
+
+  ret void
+}
+define void @intrinsics.localrecover() {
+  %func = bitcast void ()* @intrinsics.localescape to i8*
+  %fp = call i8* @llvm.frameaddress(i32 1)
+  call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+  ; CHECK: call i8* @llvm.localrecover(i8* %func, i8* %fp, i32 0)
+
+  ret void
+}
+
+; We need this function to provide `uses' for some metadata tests.
+define void @misc.metadata() {
+  call void @f1(), !srcloc !11
+  call void @f1(), !srcloc !12
+  call void @f1(), !srcloc !13
+  call void @f1(), !srcloc !14
+  ret void
+}
+
+declare void @op_bundle_callee_0()
+declare void @op_bundle_callee_1(i32,i32)
+
+define void @call_with_operand_bundle0(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle0(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+  ret void
+}
+
+define void @call_with_operand_bundle1(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle1(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+
+  call void @op_bundle_callee_0()
+  call void @op_bundle_callee_0() [ "foo"() ]
+  call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+; CHECK: @op_bundle_callee_0(){{$}}
+; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"() ]
+; CHECK-NEXT: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+  ret void
+}
+
+define void @call_with_operand_bundle2(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle2(
+ entry:
+  call void @op_bundle_callee_0() [ "foo"() ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"() ]
+  ret void
+}
+
+define void @call_with_operand_bundle3(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle3(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+; CHECK: call void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+  ret void
+}
+
+define void @call_with_operand_bundle4(i32* %ptr) {
+; CHECK-LABEL: call_with_operand_bundle4(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+; CHECK: call void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+  ret void
+}
+
+; Invoke versions of the above tests:
+
+
+define void @invoke_with_operand_bundle0(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle0(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @invoke_with_operand_bundle1(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle1(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+
+  invoke void @op_bundle_callee_0() to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0(){{$}}
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+
+normal:
+  invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal1 unwind label %exception1
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ]
+
+exception1:
+  %cleanup1 = landingpad i8 cleanup
+  br label %normal1
+
+normal1:
+  invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception2:
+  %cleanup2 = landingpad i8 cleanup
+  br label %normal2
+
+normal2:
+  ret void
+}
+
+define void @invoke_with_operand_bundle2(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle2(
+ entry:
+  invoke void @op_bundle_callee_0() [ "foo"() ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"() ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @invoke_with_operand_bundle3(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle3(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @invoke_with_operand_bundle4(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @invoke_with_operand_bundle4(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+        to label %normal unwind label %exception
+; CHECK: invoke void @op_bundle_callee_1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+
+; CHECK: attributes #0 = { alignstack=4 }
+; CHECK: attributes #1 = { alignstack=8 }
+; CHECK: attributes #2 = { alwaysinline }
+; CHECK: attributes #3 = { cold }
+; CHECK: attributes #4 = { convergent }
+; CHECK: attributes #5 = { inlinehint }
+; CHECK: attributes #6 = { jumptable }
+; CHECK: attributes #7 = { minsize }
+; CHECK: attributes #8 = { naked }
+; CHECK: attributes #9 = { nobuiltin }
+; CHECK: attributes #10 = { noduplicate }
+; CHECK: attributes #11 = { noimplicitfloat }
+; CHECK: attributes #12 = { noinline }
+; CHECK: attributes #13 = { nonlazybind }
+; CHECK: attributes #14 = { noredzone }
+; CHECK: attributes #15 = { noreturn }
+; CHECK: attributes #16 = { nounwind }
+; CHECK: attributes #17 = { noinline optnone }
+; CHECK: attributes #18 = { optsize }
+; CHECK: attributes #19 = { readnone }
+; CHECK: attributes #20 = { readonly }
+; CHECK: attributes #21 = { returns_twice }
+; CHECK: attributes #22 = { safestack }
+; CHECK: attributes #23 = { sanitize_address }
+; CHECK: attributes #24 = { sanitize_memory }
+; CHECK: attributes #25 = { sanitize_thread }
+; CHECK: attributes #26 = { ssp }
+; CHECK: attributes #27 = { sspreq }
+; CHECK: attributes #28 = { sspstrong }
+; CHECK: attributes #29 = { "thunk" }
+; CHECK: attributes #30 = { uwtable }
+; CHECK: attributes #31 = { "cpu"="cortex-a8" }
+; CHECK: attributes #32 = { norecurse }
+; CHECK: attributes #33 = { inaccessiblememonly }
+; CHECK: attributes #34 = { inaccessiblemem_or_argmemonly }
+; CHECK: attributes #35 = { nounwind readnone }
+; CHECK: attributes #36 = { argmemonly nounwind readonly }
+; CHECK: attributes #37 = { argmemonly nounwind }
+; CHECK: attributes #38 = { nounwind readonly }
+; CHECK: attributes #39 = { builtin }
+
+;; Metadata
+
+; Metadata -- Module flags
+!llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+; CHECK: !llvm.module.flags = !{!0, !1, !2, !4, !5, !6}
+
+!0 = !{i32 1, !"mod1", i32 0}
+; CHECK: !0 = !{i32 1, !"mod1", i32 0}
+!1 = !{i32 2, !"mod2", i32 0}
+; CHECK: !1 = !{i32 2, !"mod2", i32 0}
+!2 = !{i32 3, !"mod3", !3}
+; CHECK: !2 = !{i32 3, !"mod3", !3}
+!3 = !{!"mod6", !0}
+; CHECK: !3 = !{!"mod6", !0}
+!4 = !{i32 4, !"mod4", i32 0}
+; CHECK: !4 = !{i32 4, !"mod4", i32 0}
+!5 = !{i32 5, !"mod5", !0}
+; CHECK: !5 = !{i32 5, !"mod5", !0}
+!6 = !{i32 6, !"mod6", !0}
+; CHECK: !6 = !{i32 6, !"mod6", !0}
+
+; Metadata -- Check `distinct'
+!11 = distinct !{}
+; CHECK: !11 = distinct !{}
+!12 = distinct !{}
+; CHECK: !12 = distinct !{}
+!13 = !{!11}
+; CHECK: !13 = !{!11}
+!14 = !{!12}
+; CHECK: !14 = !{!12}
diff --git a/test/Bitcode/debug-loc-again.ll b/test/Bitcode/debug-loc-again.ll
index 6dbea16121aa..adf74a07ebed 100644
--- a/test/Bitcode/debug-loc-again.ll
+++ b/test/Bitcode/debug-loc-again.ll
@@ -29,8 +29,8 @@ entry:
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"),
-                    subprograms: !{!2})
-!2 = !DISubprogram(name: "foo")
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !DIFile(filename: "f", directory: "/d"),
+                             subprograms: !{!2})
+!2 = distinct !DISubprogram(name: "foo")
 !3 = !DILocation(line: 1, scope: !2)
 !4 = !DILocation(line: 2, scope: !2)
diff --git a/test/Bitcode/highLevelStructure.3.2.ll b/test/Bitcode/highLevelStructure.3.2.ll
index 54356b9fb139..749b157cffc3 100644
--- a/test/Bitcode/highLevelStructure.3.2.ll
+++ b/test/Bitcode/highLevelStructure.3.2.ll
@@ -19,16 +19,16 @@ module asm "some assembly"
 ; Aliases Test
 ; CHECK: @glob1 = global i32 1
 @glob1 = global i32 1
-; CHECK: @aliased1 = alias i32* @glob1
-@aliased1 = alias i32* @glob1
-; CHECK-NEXT: @aliased2 = internal alias i32* @glob1
-@aliased2 = internal alias i32* @glob1
-; CHECK-NEXT: @aliased3 = alias i32* @glob1
-@aliased3 = external alias i32* @glob1
-; CHECK-NEXT: @aliased4 = weak alias i32* @glob1
-@aliased4 = weak alias i32* @glob1
-; CHECK-NEXT: @aliased5 = weak_odr alias i32* @glob1
-@aliased5 = weak_odr alias i32* @glob1
+; CHECK: @aliased1 = alias i32, i32* @glob1
+@aliased1 = alias i32, i32* @glob1
+; CHECK-NEXT: @aliased2 = internal alias i32, i32* @glob1
+@aliased2 = internal alias i32, i32* @glob1
+; CHECK-NEXT: @aliased3 = alias i32, i32* @glob1
+@aliased3 = external alias i32, i32* @glob1
+; CHECK-NEXT: @aliased4 = weak alias i32, i32* @glob1
+@aliased4 = weak alias i32, i32* @glob1
+; CHECK-NEXT: @aliased5 = weak_odr alias i32, i32* @glob1
+@aliased5 = weak_odr alias i32, i32* @glob1
 
 ;Parameter Attribute Test
 ; CHECK: declare void @ParamAttr1(i8 zeroext)
diff --git a/test/Bitcode/identification.ll b/test/Bitcode/identification.ll
new file mode 100644
index 000000000000..2f09d5a43281
--- /dev/null
+++ b/test/Bitcode/identification.ll
@@ -0,0 +1,6 @@
+; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted.
+;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s
+;CHECK: <IDENTIFICATION_BLOCK_ID
+;CHECK-NEXT: <STRING
+;CHECK-NEXT: <EPOCH
+;CHECK-NEXT: </IDENTIFICATION_BLOCK_ID
diff --git a/test/Bitcode/invalid.ll b/test/Bitcode/invalid.ll
index df9fec8772d1..c56f0ffefa9e 100644
--- a/test/Bitcode/invalid.ll
+++ b/test/Bitcode/invalid.ll
@@ -1,6 +1,6 @@
 ; RUN:  not llvm-dis < %s.bc 2>&1 | FileCheck %s
 
-; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (48)
+; CHECK: llvm-dis{{(\.EXE|\.exe)?}}: error: Unknown attribute kind (52)
 
 ; invalid.ll.bc has an invalid attribute number.
 ; The test checks that LLVM reports the error and doesn't access freed memory
diff --git a/test/Bitcode/invalid.ll.bc b/test/Bitcode/invalid.ll.bc
index a85c3644b3abb93a450b18dcfa20b5fbf01ed143..60c7afffbc2308e4ceb58663b5e84d21fba6e1c1 100644
GIT binary patch
delta 502
zcmX@Zw1L&o$@#!NQxOIRRt5%!Bpw45pU8dB-hQ^z4eb?RJpmL~<Y8b~1eD`$Qea>L
z(!vu(wamP^jEp)wCWt6FyLBvOkTi07;=wmzqT-PN$H2y(hs{sBSR~!p0wow2n1MQ^
zJ0AQ80``W9g~E*76I&H@M2cAw0+<5WnB0KU3=Gcm7#JGBEFhs`H$hQhqvL`V8Vn2$
zI&5JR_eR&ZOP&$rdozLWp#guM0srF<d>;b%j0KL!-cyvhp(uMOQ7+A((|S*{^#y0k
zEzCB1nr&K`t<QWADNA6l&R{psXfFbi4ebRJ+6xlc%QV=_ez2BRw3j!u$8)q7O0*Zv
zXs?{nsPpav-@6BVUk~uTF<?$Qu~6p8LYXsQ@=krC%qc~gBaO1B64|azaJD?*Y&io=
z?l^2Q#o3~R*?P)heW0y>nyjxdTW>hbb3C)XAfmmfpt&T2y&|E#M54W9LA&I+VE%Uu
z{GS5&o=o6-?7;s}f$t5_8wwh-cMi%Oa+JHID08P#?#=_5Cl6&`G0LS$1X^rxwwlvy
z&7i=*uo4)SY{1ar<|@(vx}3c#qF{!zEztfA&XyOLCu=dvX%qsLu>+L^@d+rUG%b-)
Za5?eB!_uK+!a}ATHUXXn6QCRj003g2t5pC1

delta 144
zcmdnMdWK2V$@#!NMIHu*00ssI?j{8WCLk>|(NN3Clgr3O#e-{tqVkcT1|CVpA{F)%
zCj^pKELh=i>;ofERiFd|12d48?s)JY2-q1WP84S3nz&R!LZFx>A%H2s4Jhd>#>mhB
kq!}1K8Hi8(7%jvOWELtgfbC&(FkraEFgc4!js>Iw0BZaq0RR91

diff --git a/test/Bitcode/invalid.test b/test/Bitcode/invalid.test
index 0aab553bb615..3425adc84100 100644
--- a/test/Bitcode/invalid.test
+++ b/test/Bitcode/invalid.test
@@ -113,6 +113,11 @@ RUN:   FileCheck --check-prefix=ELEMENT-TYPE %s
 
 ELEMENT-TYPE: Invalid type
 
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-cast.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=INVALID-CAST %s
+
+INVALID-CAST: Invalid cast
+
 RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-op-not-2nd-to-last.bc 2>&1 | \
 RUN:   FileCheck --check-prefix=ARRAY-NOT-2LAST %s
 
@@ -202,3 +207,13 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-alias-type-mismatch.bc 2>&1
 RUN:   FileCheck --check-prefix=ALIAS-TYPE-MISMATCH %s
 
 ALIAS-TYPE-MISMATCH: Alias and aliasee types don't match
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-function-block.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=NO-FUNCTION-BLOCK %s
+
+NO-FUNCTION-BLOCK: Trying to materialize functions before seeing function blocks
+
+RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \
+RUN:   FileCheck --check-prefix=NAME-WITH-0 %s
+
+NAME-WITH-0: Invalid value name
diff --git a/test/Bitcode/local-linkage-default-visibility.3.4.ll b/test/Bitcode/local-linkage-default-visibility.3.4.ll
index df0cf7653e55..15ff5e3a6af8 100644
--- a/test/Bitcode/local-linkage-default-visibility.3.4.ll
+++ b/test/Bitcode/local-linkage-default-visibility.3.4.ll
@@ -25,23 +25,23 @@
 
 @global = global i32 0
 
-@default.internal.alias = alias internal i32* @global
-; CHECK: @default.internal.alias = internal alias i32* @global
+@default.internal.alias = alias internal i32, internal i32* @global
+; CHECK: @default.internal.alias = internal alias i32, i32* @global
 
-@hidden.internal.alias = hidden alias internal i32* @global
-; CHECK: @hidden.internal.alias = internal alias i32* @global
+@hidden.internal.alias = hidden alias internal i32, internal i32* @global
+; CHECK: @hidden.internal.alias = internal alias i32, i32* @global
 
-@protected.internal.alias = protected alias internal i32* @global
-; CHECK: @protected.internal.alias = internal alias i32* @global
+@protected.internal.alias = protected alias internal i32, internal i32* @global
+; CHECK: @protected.internal.alias = internal alias i32, i32* @global
 
-@default.private.alias = alias private i32* @global
-; CHECK: @default.private.alias = private alias i32* @global
+@default.private.alias = alias private i32, private i32* @global
+; CHECK: @default.private.alias = private alias i32, i32* @global
 
-@hidden.private.alias = hidden alias private i32* @global
-; CHECK: @hidden.private.alias = private alias i32* @global
+@hidden.private.alias = hidden alias private i32, private i32* @global
+; CHECK: @hidden.private.alias = private alias i32, i32* @global
 
-@protected.private.alias = protected alias private i32* @global
-; CHECK: @protected.private.alias = private alias i32* @global
+@protected.private.alias = protected alias private i32, private i32* @global
+; CHECK: @protected.private.alias = private alias i32, i32* @global
 
 define internal void @default.internal() {
 ; CHECK: define internal void @default.internal
diff --git a/test/Bitcode/old-aliases.ll b/test/Bitcode/old-aliases.ll
index 8527f074d04b..1bcc4306477c 100644
--- a/test/Bitcode/old-aliases.ll
+++ b/test/Bitcode/old-aliases.ll
@@ -10,14 +10,14 @@
 @v2 = global [1 x i32] zeroinitializer
 ; CHECK: @v2 = global [1 x i32] zeroinitializer
 
-@v3 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @v3 = alias bitcast (i32* @v1 to i16*)
+@v3 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @v3 = alias i16, bitcast (i32* @v1 to i16*)
 
-@v4 = alias getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
-; CHECK: @v4 = alias getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+@v4 = alias i32, getelementptr ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+; CHECK: @v4 = alias i32, getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
 
-@v5 = alias i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
-; CHECK: @v5 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+@v5 = alias i32, i32 addrspace(2)* addrspacecast (i32 addrspace(0)* @v1 to i32 addrspace(2)*)
+; CHECK: @v5 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
 
-@v6 = alias i16* @v3
-; CHECK: @v6 = alias i16* @v3
+@v6 = alias i16, i16* @v3
+; CHECK: @v6 = alias i16, i16* @v3
diff --git a/test/Bitcode/operand-bundles.ll b/test/Bitcode/operand-bundles.ll
new file mode 100644
index 000000000000..ab28cffd84aa
--- /dev/null
+++ b/test/Bitcode/operand-bundles.ll
@@ -0,0 +1,152 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+declare void @callee0()
+declare void @callee1(i32,i32)
+
+define void @f0(i32* %ptr) {
+; CHECK-LABEL: @f0(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+  ret void
+}
+
+define void @f1(i32* %ptr) {
+; CHECK-LABEL: @f1(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+
+  call void @callee0()
+  call void @callee0() [ "foo"() ]
+  call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+; CHECK: @callee0(){{$}}
+; CHECK-NEXT: call void @callee0() [ "foo"() ]
+; CHECK-NEXT: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+  ret void
+}
+
+define void @f2(i32* %ptr) {
+; CHECK-LABEL: @f2(
+ entry:
+  call void @callee0() [ "foo"() ]
+; CHECK: call void @callee0() [ "foo"() ]
+  ret void
+}
+
+define void @f3(i32* %ptr) {
+; CHECK-LABEL: @f3(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+; CHECK: call void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+  ret void
+}
+
+define void @f4(i32* %ptr) {
+; CHECK-LABEL: @f4(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+; CHECK: call void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+  ret void
+}
+
+; Invoke versions of the above tests:
+
+
+define void @g0(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g0(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float  0.000000e+00, i64 100, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @g1(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g1(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+
+  invoke void @callee0() to label %normal unwind label %exception
+; CHECK: invoke void @callee0(){{$}}
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+
+normal:
+  invoke void @callee0() [ "foo"() ] to label %normal1 unwind label %exception1
+; CHECK: invoke void @callee0() [ "foo"() ]
+
+exception1:
+  %cleanup1 = landingpad i8 cleanup
+  br label %normal1
+
+normal1:
+  invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ] to label %normal2 unwind label %exception2
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception2:
+  %cleanup2 = landingpad i8 cleanup
+  br label %normal2
+
+normal2:
+  ret void
+}
+
+define void @g2(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g2(
+ entry:
+  invoke void @callee0() [ "foo"() ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"() ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @g3(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g3(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ] to label %normal unwind label %exception
+; CHECK: invoke void @callee0() [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+define void @g4(i32* %ptr) personality i8 3 {
+; CHECK-LABEL: @g4(
+ entry:
+  %l = load i32, i32* %ptr
+  %x = add i32 42, 1
+  invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+        to label %normal unwind label %exception
+; CHECK: invoke void @callee1(i32 10, i32 %x) [ "foo"(i32 42, i64 100, i32 %x), "foo"(i32 42, float  0.000000e+00, i32 %l) ]
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
diff --git a/test/Bitcode/select.ll b/test/Bitcode/select.ll
index 3ad06796dccf..666d2960fb5f 100644
--- a/test/Bitcode/select.ll
+++ b/test/Bitcode/select.ll
@@ -8,3 +8,11 @@ define <2 x i32> @main() {
 ; CHECK: define <2 x i32> @main() {
 ; CHECK:   ret <2 x i32> <i32 0, i32 undef>
 ; CHECK: }
+
+define <2 x float> @f() {
+  ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> <float 1.000000e+00, float 0.000000e+00>, <2 x float> zeroinitializer)
+}
+
+; CHECK: define <2 x float> @f() {
+; CHECK:   ret <2 x float> select (i1 ptrtoint (<2 x float> ()* @f to i1), <2 x float> <float 1.000000e+00, float 0.000000e+00>, <2 x float> zeroinitializer)
+; CHECK: }
diff --git a/test/Bitcode/tailcall.ll b/test/Bitcode/tailcall.ll
index 01190d74c348..6a4b8885847a 100644
--- a/test/Bitcode/tailcall.ll
+++ b/test/Bitcode/tailcall.ll
@@ -3,16 +3,16 @@
 
 ; Check that musttail and tail roundtrip.
 
-declare cc8191 void @t1_callee()
-define cc8191 void @t1() {
-; CHECK: tail call cc8191 void @t1_callee()
-  tail call cc8191 void @t1_callee()
+declare cc1023 void @t1_callee()
+define cc1023 void @t1() {
+; CHECK: tail call cc1023 void @t1_callee()
+  tail call cc1023 void @t1_callee()
   ret void
 }
 
-declare cc8191 void @t2_callee()
-define cc8191 void @t2() {
-; CHECK: musttail call cc8191 void @t2_callee()
-  musttail call cc8191 void @t2_callee()
+declare cc1023 void @t2_callee()
+define cc1023 void @t2() {
+; CHECK: musttail call cc1023 void @t2_callee()
+  musttail call cc1023 void @t2_callee()
   ret void
 }
diff --git a/test/Bitcode/thinlto-function-summary.ll b/test/Bitcode/thinlto-function-summary.ll
new file mode 100644
index 000000000000..9c2f2acd6c7a
--- /dev/null
+++ b/test/Bitcode/thinlto-function-summary.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as -function-summary < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC
+; Check for function summary block/records.
+
+; BC: <FUNCTION_SUMMARY_BLOCK
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: <PERMODULE_ENTRY
+; BC-NEXT: </FUNCTION_SUMMARY_BLOCK
+
+; RUN: llvm-as -function-summary < %s | llvm-dis | FileCheck %s
+; Check that this round-trips correctly.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define i32 @foo()
+
+; Function Attrs: nounwind uwtable
+define i32 @foo() #0 {
+entry:
+  ret i32 1
+}
+
+; CHECK: define i32 @bar(i32 %x)
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %x) #0 {
+entry:
+  ret i32 %x
+}
+
+; Check an anonymous function as well, since in that case only the alias
+; ends up in the value symbol table and having a summary.
+@f = alias void (), void ()* @0   ; <void ()*> [#uses=0]
+@h = external global void ()*     ; <void ()*> [#uses=0]
+
+define internal void @0() nounwind {
+entry:
+  store void()* @0, void()** @h
+        br label %return
+
+return:         ; preds = %entry
+        ret void
+}
diff --git a/test/Bitcode/upgrade-subprogram.ll b/test/Bitcode/upgrade-subprogram.ll
new file mode 100644
index 000000000000..8d5c000a1480
--- /dev/null
+++ b/test/Bitcode/upgrade-subprogram.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+; RUN: verify-uselistorder < %s.bc
+
+; CHECK: define void @foo() !dbg [[SP:![0-9]+]]
+define void @foo() {
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
+!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
+; CHECK: [[SP]] = distinct !DISubprogram
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", function: void ()* @foo, type: !4)
+!4 = !DISubroutineType(types: !{})
diff --git a/test/Bitcode/upgrade-subprogram.ll.bc b/test/Bitcode/upgrade-subprogram.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..cfab5a2c76a9814c8324d6cece8bf7172fc1ea65
GIT binary patch
literal 784
zcmXX@ZAep57(Uy*+r72h-E0H5yc>6Zgn`kRX<O59yJoW%X^|)hZNn{vaMzDa^UvmH
zch-ul35|$wVEtC;N0@|eqlQe0kQlbavE>h$B7HzV`qA0wh4Y+qp7)&RJnwle%*uA1
z0ss*Jz)9#TZC&qo_kUhEeREwVgcQ&c04%{-GC_j^A%_F=RZN=`XscMJH_iwGM3o3v
zY0yVbFL6<5nUPp}WO{`GS6(@)*9Wb<f@WHRuYgK#$yrH$S{rXOxBEgF|M!Qeszp3S
zLv5(3FHhif&m{U(J0F0M4McdwaodBCB+z4%DJnn}aMAWB0J;Hyd0D{!QCW{GHlh23
zP*A8ruL<L-5*r^na`Ms#lyR2K3c#2WfJ&G<f!O#Kq5Q0w7G7?4Iq%-4Tl?J}y2CX%
z)aU7!qn4BojjlxV`|GYyQqrwdZ%Mv=m2^E%&k3pt((Un-d9Na%QjQ8b>qMB48kz%w
zf{Pje?%?6}YbzAoGy!l7fXAs+eye+HJF%Bq>rSSk8@sPjrnJ8PU45y3vG&>G(kU}i
z^wj~c)Z$($o}t_dL1U0H1Y#kJ7aJSJhQb5HE|Bia$X{Nvmau4|tLlvi<<^Oc$NXhU
zq%pxXhLFbOXlR(l#&K-S%CRhVpG8JSIhN*B$r3WplNp_~Unk8Tl80?%tt_tI)>0|0
zI_l84byKEgzv&ZWe1kMB`x^p?DS4<Ev18-C*tuSA5Tci3d5*PXBh}c*9}$$j=jCqU
z+<=lB40B^)U(LZGd9XwNULo^3#F>atxd@dkV%kBiX{oqRz2*>q;TdCsF@}ryHfES*
z3_-*+8#@CZ{X1dWL`(}Y;$t7jc5;KPf4CPLwR6KtZg`GUZQPI^0O^l5GRKqICTWHy
z^RNoKTAg0iM9rGdS}N_+q<5%XM!hG{xRvdO1?Kz<zX|@Ug9gP0!G;M3;2MM6by8N|
ZGcixm&54}V*c9X=!nBx}%)(SL{sFZl{DlAj

literal 0
HcmV?d00001

diff --git a/test/Bitcode/use-list-order.ll b/test/Bitcode/use-list-order.ll
index 09ec44897e73..ec5f7eceb972 100644
--- a/test/Bitcode/use-list-order.ll
+++ b/test/Bitcode/use-list-order.ll
@@ -1,7 +1,7 @@
 ; RUN: verify-uselistorder < %s
 
 @a = global [4 x i1] [i1 0, i1 1, i1 0, i1 1]
-@b = alias i1* getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
+@b = alias i1, getelementptr ([4 x i1], [4 x i1]* @a, i64 0, i64 2)
 
 ; Check use-list order of constants used by globals.
 @glob1 = global i5 7
@@ -10,9 +10,9 @@
 
 ; Check use-list order between variables and aliases.
 @target = global i3 zeroinitializer
-@alias1 = alias i3* @target
-@alias2 = alias i3* @target
-@alias3 = alias i3* @target
+@alias1 = alias i3, i3* @target
+@alias2 = alias i3, i3* @target
+@alias3 = alias i3, i3* @target
 @var1 = global i3* @target
 @var2 = global i3* @target
 @var3 = global i3* @target
@@ -31,9 +31,9 @@
 
 ; Same as above, but for aliases.
 @const.target = global i62 1
-@const.alias = alias i62* @const.target
-@const.alias.ptr = alias i62* @const.alias
-@const.alias.2 = alias i62* @const.target
+@const.alias = alias i62, i62* @const.target
+@const.alias.ptr = alias i62, i62* @const.alias
+@const.alias.2 = alias i62, i62* @const.target
 
 define i64 @f(i64 %f) {
 entry:
diff --git a/test/Bitcode/use-list-order2.ll b/test/Bitcode/use-list-order2.ll
new file mode 100644
index 000000000000..7de79a520210
--- /dev/null
+++ b/test/Bitcode/use-list-order2.ll
@@ -0,0 +1,57 @@
+; RUN: verify-uselistorder %s
+; XFAIL: *
+
+; Test 1
+@g1 = global i8 0
+
+declare void @llvm.donothing() nounwind readnone
+
+define void @f.no_personality1() personality i8 0 {
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+; Test 2
+@g2 = global i8 -1
+@g3 = global i8 -1
+
+define void @f.no_personality2() personality i8 -1 {
+  invoke void @llvm.donothing() to label %normal unwind label %exception
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+normal:
+  ret void
+}
+
+; Test 3
+declare void @f1() prefix i32 1
+
+define void @test1() {
+  %t1 = alloca half  ; Implicit i32 1 used here.
+  %t2 = alloca float
+  ret void
+}
+
+; Test 4
+declare void @f2() prefix i32 2
+
+define void @test2(i32* %word) {
+  %cmpxchg.0 = cmpxchg i32* %word, i32 0, i32 2 monotonic monotonic
+  %cmpxchg.1 = cmpxchg i32* %word, i32 0, i32 2 acq_rel monotonic
+  ret void
+}
+
+; Test 5
+@g4 = global i32 3
+@g5 = global i32 3
+declare void @test3() prefix i32 3
+
+; Test 6
+@g6 = global i32 4
+@g7 = global i32 4
+declare void @c() prologue i32 4
diff --git a/test/Bitcode/vst-forward-declaration.ll b/test/Bitcode/vst-forward-declaration.ll
new file mode 100644
index 000000000000..599caa0150ed
--- /dev/null
+++ b/test/Bitcode/vst-forward-declaration.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s -check-prefix=BC
+; Check for VST forward declaration record and VST function index records.
+
+; BC: <VSTOFFSET
+; BC: <FNENTRY
+; BC: <FNENTRY
+
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Check that this round-trips correctly.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: define i32 @foo()
+
+; Function Attrs: nounwind uwtable
+define i32 @foo() #0 {
+entry:
+  ret i32 1
+}
+
+; CHECK: define i32 @bar(i32 %x)
+
+; Function Attrs: nounwind uwtable
+define i32 @bar(i32 %x) #0 {
+entry:
+  ret i32 %x
+}
diff --git a/test/BugPoint/metadata.ll b/test/BugPoint/metadata.ll
index d8aca524ee28..44d109aa0077 100644
--- a/test/BugPoint/metadata.ll
+++ b/test/BugPoint/metadata.ll
@@ -6,8 +6,8 @@
 
 ; CHECK: call void @foo(), !dbg ![[LOC:[0-9]+]], !attach ![[CALL:[0-9]+]]
 ; CHECK: ![[LOC]] = !DILocation(line: 104, column: 105, scope: ![[SCOPE:[0-9]+]])
-; CHECK: ![[SCOPE]] = !DISubprogram(name: "test"
-; CHECK-SAME:                       file: ![[FILE:[0-9]+]]
+; CHECK: ![[SCOPE]] = distinct !DISubprogram(name: "test"
+; CHECK-SAME:                                file: ![[FILE:[0-9]+]]
 ; CHECK: ![[FILE]] = !DIFile(filename: "source.c", directory: "/dir")
 ; CHECK: ![[CALL]] = !{!"the call to foo"}
 
@@ -31,7 +31,7 @@ declare void @foo()
 !3 = !{!"noise"}
 !4 = !{!"filler"}
 
-!9 = !DISubprogram(name: "test", file: !15)
+!9 = distinct !DISubprogram(name: "test", file: !15)
 !10 = !DILocation(line: 100, column: 101, scope: !9)
 !11 = !DILocation(line: 102, column: 103, scope: !9)
 !12 = !DILocation(line: 104, column: 105, scope: !9)
diff --git a/test/BugPoint/named-md.ll b/test/BugPoint/named-md.ll
new file mode 100644
index 000000000000..1fffa2cb978f
--- /dev/null
+++ b/test/BugPoint/named-md.ll
@@ -0,0 +1,39 @@
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crash-too-many-cus -silence-passes > /dev/null
+; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; RUN-DISABLE: bugpoint -disable-namedmd-remove -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crash-too-many-cus -silence-passes > /dev/null
+; RUN-DISABLE: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; REQUIRES: loadable_module
+
+; CHECK: !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]]}
+; CHECK-DISABLE:      !llvm.dbg.cu = !{![[FIRST:[0-9]+]], ![[SECOND:[0-9]+]],
+; CHECK-DISABLE-SAME: ![[THIRD:[0-9]+]], ![[FOURTH:[0-9]+]], ![[FIFTH:[0-9]+]]}
+!llvm.dbg.cu = !{!0, !1, !2, !3, !4, !5}
+; CHECK-NOT: !named
+; CHECK-DISABLE: !named
+!named = !{!0, !1, !2, !3, !4, !5}
+; CHECK: !llvm.module.flags = !{![[DIVERSION:[0-9]+]]}
+!llvm.module.flags = !{!6, !7}
+
+; CHECK-DAG: ![[FIRST]] = distinct !DICompileUnit(language: DW_LANG_Julia,
+; CHECK-DAG: ![[SECOND]] = distinct !DICompileUnit(language: DW_LANG_Julia,
+; CHECK-DAG: ![[DIVERSION]] = !{i32 2, !"Debug Info Version", i32 3}
+; CHECK-DAG: !DIFile(filename: "a", directory: "b")
+
+; 4 nodes survive. Due to renumbering !4 should not exist
+; CHECK-NOT: !4
+
+!0 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!1 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!3 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!4 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!5 = distinct !DICompileUnit(language: DW_LANG_Julia,
+                             file: !8)
+!6 = !{i32 2, !"Dwarf Version", i32 2}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !DIFile(filename: "a", directory: "b")
diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll
index 29a03b831077..72be4fe55936 100644
--- a/test/BugPoint/remove_arguments_test.ll
+++ b/test/BugPoint/remove_arguments_test.ll
@@ -2,10 +2,10 @@
 ; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
 ; REQUIRES: loadable_module
 
-; Test to make sure that arguments are removed from the function if they are 
-; unnecessary. And clean up any types that that frees up too.
+; Test to make sure that arguments are removed from the function if they are
+; unnecessary. And clean up any types that frees up too.
 
-; CHECK: target triple
+; CHECK: ModuleID
 ; CHECK-NOT: struct.anon
 %struct.anon = type { i32 }
 
diff --git a/test/BugPoint/replace-funcs-with-null.ll b/test/BugPoint/replace-funcs-with-null.ll
index 3433c456e90f..622f9eb67a29 100644
--- a/test/BugPoint/replace-funcs-with-null.ll
+++ b/test/BugPoint/replace-funcs-with-null.ll
@@ -3,7 +3,7 @@
 ; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -replace-funcs-with-null -bugpoint-crash-decl-funcs -silence-passes -safe-run-llc
 ; REQUIRES: loadable_module
 
-@foo2 = alias i32 ()* @foo
+@foo2 = alias i32 (), i32 ()* @foo
 
 define i32 @foo() { ret i32 1 }
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f49df542f4e5..138450ba8e02 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(LLVM_BUILD_EXAMPLES)
+  set(ENABLE_EXAMPLES 1)
+endif()
+
 configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
   ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
@@ -15,11 +19,12 @@ endif()
 # Set the depends list as a variable so that it can grow conditionally.
 # NOTE: Sync the substitutions in test/lit.cfg when adding to this list.
 set(LLVM_TEST_DEPENDS
-          llvm-config
-          UnitTests
           BugpointPasses
+          FileCheck
           LLVMHello
+          UnitTests
           bugpoint
+          count
           llc
           lli
           lli-child-target
@@ -27,12 +32,14 @@ set(LLVM_TEST_DEPENDS
           llvm-as
           llvm-bcanalyzer
           llvm-c-test
+          llvm-config
           llvm-cov
           llvm-cxxdump
           llvm-diff
           llvm-dis
           llvm-dsymutil
           llvm-dwarfdump
+          llvm-dwp
           llvm-extract
           llvm-lib
           llvm-link
@@ -41,22 +48,22 @@ set(LLVM_TEST_DEPENDS
           llvm-mcmarkup
           llvm-nm
           llvm-objdump
+          llvm-pdbdump
           llvm-profdata
           llvm-ranlib
           llvm-readobj
           llvm-rtdyld
           llvm-size
+          llvm-split
           llvm-symbolizer
           llvm-tblgen
-          macho-dump
-          opt
-          FileCheck
-          count
           not
+          obj2yaml
+          opt
+          sancov
+          verify-uselistorder
           yaml-bench
           yaml2obj
-          obj2yaml
-          verify-uselistorder
         )
 
 # If Intel JIT events are supported, depend on a tool that tests the listener.
@@ -95,6 +102,16 @@ if(TARGET ocaml_llvm)
         )
 endif()
 
+if(LLVM_BUILD_EXAMPLES)
+  list(APPEND LLVM_TEST_DEPENDS
+    Kaleidoscope-Ch3
+    Kaleidoscope-Ch4
+    Kaleidoscope-Ch5
+    Kaleidoscope-Ch6
+    Kaleidoscope-Ch7
+    )
+endif()
+
 add_lit_testsuite(check-llvm "Running the LLVM regression tests"
   ${CMAKE_CURRENT_BINARY_DIR}
   PARAMS llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
diff --git a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
index b075573cc674..5eb455f3a22c 100644
--- a/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
+++ b/test/CodeGen/AArch64/aarch64-2014-08-11-MachineCombinerCrash.ll
@@ -3,7 +3,7 @@
 ; Bug 20598
 
 
-define void @test() #0 {
+define void @test() #0 !dbg !4 {
 entry:
   br label %for.body, !dbg !39
 
@@ -44,39 +44,39 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!36, !37}
 !llvm.ident = !{!38}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, function: void ()* @test, variables: !12)
+!4 = distinct !DISubprogram(name: "", line: 140, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 141, file: !1, scope: !1, type: !6, variables: !12)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !9 = !DIDerivedType(tag: DW_TAG_typedef, line: 30, file: !1, baseType: !11)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13, !14, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
+!13 = !DILocalVariable(name: "", line: 140, arg: 1, scope: !4, file: !1, type: !8)
+!14 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
 !15 = !DIDerivedType(tag: DW_TAG_typedef, line: 183, file: !1, baseType: !17)
 !17 = !DIBasicType(tag: DW_TAG_base_type, size: 64, align: 64, encoding: DW_ATE_signed)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 142, scope: !4, file: !1, type: !15)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 143, scope: !4, file: !1, type: !15)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 144, scope: !4, file: !1, type: !15)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 145, scope: !4, file: !1, type: !8)
-!35 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "", line: 146, scope: !4, file: !1, type: !11)
+!18 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!19 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!20 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!21 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!22 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!23 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!24 = !DILocalVariable(name: "", line: 142, scope: !4, file: !1, type: !15)
+!25 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!26 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!27 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!28 = !DILocalVariable(name: "", line: 143, scope: !4, file: !1, type: !15)
+!29 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!30 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!31 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!32 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!33 = !DILocalVariable(name: "", line: 144, scope: !4, file: !1, type: !15)
+!34 = !DILocalVariable(name: "", line: 145, scope: !4, file: !1, type: !8)
+!35 = !DILocalVariable(name: "", line: 146, scope: !4, file: !1, type: !11)
 !36 = !{i32 2, !"Dwarf Version", i32 4}
 !37 = !{i32 2, !"Debug Info Version", i32 3}
 !38 = !{!"clang version 3.6.0 "}
diff --git a/test/CodeGen/AArch64/aarch64-addv.ll b/test/CodeGen/AArch64/aarch64-addv.ll
new file mode 100644
index 000000000000..ca374eea28e7
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-addv.ll
@@ -0,0 +1,98 @@
+; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+
+define i8 @add_B(<16 x i8>* %arr)  {
+; CHECK-LABEL: add_B
+; CHECK: addv {{b[0-9]+}}, {{v[0-9]+}}.16b
+  %bin.rdx = load <16 x i8>, <16 x i8>* %arr
+  %rdx.shuf0 = shufflevector <16 x i8> %bin.rdx, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx0 = add <16 x i8> %bin.rdx, %rdx.shuf0
+  %rdx.shuf = shufflevector <16 x i8> %bin.rdx0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
+  %bin.rdx11 = add <16 x i8> %bin.rdx0, %rdx.shuf
+  %rdx.shuf12 = shufflevector <16 x i8> %bin.rdx11, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+  %bin.rdx13 = add <16 x i8> %bin.rdx11, %rdx.shuf12
+  %rdx.shuf13 = shufflevector <16 x i8> %bin.rdx13, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+  %bin.rdx14 = add <16 x i8> %bin.rdx13, %rdx.shuf13
+  %r = extractelement <16 x i8> %bin.rdx14, i32 0
+  ret i8 %r
+}
+
+define i16 @add_H(<8 x i16>* %arr)  {
+; CHECK-LABEL: add_H
+; CHECK: addv {{h[0-9]+}}, {{v[0-9]+}}.8h
+  %bin.rdx = load <8 x i16>, <8 x i16>* %arr
+  %rdx.shuf = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef>
+  %bin.rdx11 = add <8 x i16> %bin.rdx, %rdx.shuf
+  %rdx.shuf12 = shufflevector <8 x i16> %bin.rdx11, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx13 = add <8 x i16> %bin.rdx11, %rdx.shuf12
+  %rdx.shuf13 = shufflevector <8 x i16> %bin.rdx13, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx14 = add <8 x i16> %bin.rdx13, %rdx.shuf13
+  %r = extractelement <8 x i16> %bin.rdx14, i32 0
+  ret i16 %r
+}
+
+define i32 @add_S( <4 x i32>* %arr)  {
+; CHECK-LABEL: add_S
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+  %bin.rdx = load <4 x i32>, <4 x i32>* %arr
+  %rdx.shuf = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx11 = add <4 x i32> %bin.rdx, %rdx.shuf
+  %rdx.shuf12 = shufflevector <4 x i32> %bin.rdx11, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx13 = add <4 x i32> %bin.rdx11, %rdx.shuf12
+  %r = extractelement <4 x i32> %bin.rdx13, i32 0
+  ret i32 %r
+}
+
+define i64 @add_D(<2 x i64>* %arr)  {
+; CHECK-LABEL: add_D
+; CHECK-NOT: addv
+  %bin.rdx = load <2 x i64>, <2 x i64>* %arr
+  %rdx.shuf0 = shufflevector <2 x i64> %bin.rdx, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx0 = add <2 x i64> %bin.rdx, %rdx.shuf0
+  %r = extractelement <2 x i64> %bin.rdx0, i32 0
+  ret i64 %r
+}
+
+define i32 @oversized_ADDV_256(i8* noalias nocapture readonly %arg1, i8* noalias nocapture readonly %arg2) {
+; CHECK-LABEL: oversized_ADDV_256
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+entry:
+  %0 = bitcast i8* %arg1 to <8 x i8>*
+  %1 = load <8 x i8>, <8 x i8>* %0, align 1
+  %2 = zext <8 x i8> %1 to <8 x i32>
+  %3 = bitcast i8* %arg2 to <8 x i8>*
+  %4 = load <8 x i8>, <8 x i8>* %3, align 1
+  %5 = zext <8 x i8> %4 to <8 x i32>
+  %6 = sub nsw <8 x i32> %2, %5
+  %7 = icmp slt <8 x i32> %6, zeroinitializer
+  %8 = sub nsw <8 x i32> zeroinitializer, %6
+  %9 = select <8 x i1> %7, <8 x i32> %8, <8 x i32> %6
+  %rdx.shuf = shufflevector <8 x i32> %9, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %9, %rdx.shuf
+  %rdx.shuf1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx2 = add <8 x i32> %bin.rdx, %rdx.shuf1
+  %rdx.shuf3 = shufflevector <8 x i32> %bin.rdx2, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx4 = add <8 x i32> %bin.rdx2, %rdx.shuf3
+  %10 = extractelement <8 x i32> %bin.rdx4, i32 0
+  ret i32 %10
+}
+
+define i32 @oversized_ADDV_512(<16 x i32>* %arr)  {
+; CHECK-LABEL: oversized_ADDV_512
+; CHECK: addv {{s[0-9]+}}, {{v[0-9]+}}.4s
+  %bin.rdx = load <16 x i32>, <16 x i32>* %arr
+
+  %rdx.shuf0 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx0 = add <16 x i32> %bin.rdx, %rdx.shuf0
+
+  %rdx.shuf = shufflevector <16 x i32> %bin.rdx0, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef >
+  %bin.rdx11 = add <16 x i32> %bin.rdx0, %rdx.shuf
+
+  %rdx.shuf12 = shufflevector <16 x i32> %bin.rdx11, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+  %bin.rdx13 = add <16 x i32> %bin.rdx11, %rdx.shuf12
+
+  %rdx.shuf13 = shufflevector <16 x i32> %bin.rdx13, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef, i32 undef, i32 undef,i32 undef, i32 undef>
+  %bin.rdx14 = add <16 x i32> %bin.rdx13, %rdx.shuf13
+
+  %r = extractelement <16 x i32> %bin.rdx14, i32 0
+  ret i32 %r
+}
diff --git a/test/CodeGen/AArch64/aarch64-deferred-spilling.ll b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
new file mode 100644
index 000000000000..7accdced7d44
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-deferred-spilling.ll
@@ -0,0 +1,514 @@
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=true -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=DEFERRED
+;RUN: llc < %s -mtriple=aarch64--linux-android -regalloc=greedy -enable-deferred-spilling=false -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=REGULAR
+
+; Check that we do not end up with useless spill code.
+;
+; Move to the basic block we are interested in.
+;
+; CHECK: // %if.then.120
+;
+; REGULAR: str w21, [sp, #[[OFFSET:[0-9]+]]] // 4-byte Folded Spill
+; Check that w21 wouldn't need to be spilled since it is never reused.
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+;
+; Check that w22 is used to carry a value through the call.
+; DEFERRED-NOT: str {{[wx]}}22,
+; DEFERRED: mov {{[wx]}}22,
+; DEFERRED-NOT: str {{[wx]}}22,
+;
+; CHECK:        bl      fprintf
+;
+; DEFERRED-NOT: ldr {{[wx]}}22,
+; DEFERRED: mov {{[wx][0-9]+}}, {{[wx]}}22
+; DEFERRED-NOT: ldr {{[wx]}}22,
+;
+; REGULAR-NOT: {{[wx]}}21{{,?}}
+; REGULAR: ldr w21, [sp, #[[OFFSET]]] // 4-byte Folded Reload
+;
+; End of the basic block we are interested in.
+; CHECK:        b
+; CHECK: {{[^:]+}}: // %sw.bb.123
+
+%struct.__sFILE = type { i8*, i32, i32, i32, i32, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sbuf = type { i8*, i64 }
+%struct.DState = type { %struct.bz_stream*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.bz_stream = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+@__sF = external global [0 x %struct.__sFILE], align 8
+@.str = private unnamed_addr constant [20 x i8] c"\0A    [%d: stuff+mf \00", align 1
+
+declare i32 @fprintf(%struct.__sFILE* nocapture, i8* nocapture readonly, ...)
+
+declare void @bar(i32)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
+define i32 @foo(%struct.DState* %s) {
+entry:
+  %state = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 1
+  %tmp = load i32, i32* %state, align 4
+  %cmp = icmp eq i32 %tmp, 10
+  %save_i = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 40
+  br i1 %cmp, label %if.end.thread, label %if.end
+
+if.end.thread:                                    ; preds = %entry
+  %save_j = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+  %save_t = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+  %save_alphaSize = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+  %save_nGroups = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+  %save_nSelectors = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+  %save_EOB = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+  %save_groupNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+  %save_groupPos = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+  %save_nextSym = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+  %save_nblockMAX = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+  %save_nblock = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+  %save_es = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+  %save_N = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+  %save_curr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+  %save_zt = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+  %save_zn = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+  %save_zvec = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+  %save_zj = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+  %tmp1 = bitcast i32* %save_i to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp1, i8 0, i64 108, i32 4, i1 false)
+  br label %sw.default
+
+if.end:                                           ; preds = %entry
+  %.pre = load i32, i32* %save_i, align 4
+  %save_j3.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 41
+  %.pre406 = load i32, i32* %save_j3.phi.trans.insert, align 4
+  %save_t4.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 42
+  %.pre407 = load i32, i32* %save_t4.phi.trans.insert, align 4
+  %save_alphaSize5.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 43
+  %.pre408 = load i32, i32* %save_alphaSize5.phi.trans.insert, align 4
+  %save_nGroups6.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 44
+  %.pre409 = load i32, i32* %save_nGroups6.phi.trans.insert, align 4
+  %save_nSelectors7.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 45
+  %.pre410 = load i32, i32* %save_nSelectors7.phi.trans.insert, align 4
+  %save_EOB8.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 46
+  %.pre411 = load i32, i32* %save_EOB8.phi.trans.insert, align 4
+  %save_groupNo9.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 47
+  %.pre412 = load i32, i32* %save_groupNo9.phi.trans.insert, align 4
+  %save_groupPos10.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 48
+  %.pre413 = load i32, i32* %save_groupPos10.phi.trans.insert, align 4
+  %save_nextSym11.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 49
+  %.pre414 = load i32, i32* %save_nextSym11.phi.trans.insert, align 4
+  %save_nblockMAX12.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 50
+  %.pre415 = load i32, i32* %save_nblockMAX12.phi.trans.insert, align 4
+  %save_nblock13.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 51
+  %.pre416 = load i32, i32* %save_nblock13.phi.trans.insert, align 4
+  %save_es14.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 52
+  %.pre417 = load i32, i32* %save_es14.phi.trans.insert, align 4
+  %save_N15.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 53
+  %.pre418 = load i32, i32* %save_N15.phi.trans.insert, align 4
+  %save_curr16.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 54
+  %.pre419 = load i32, i32* %save_curr16.phi.trans.insert, align 4
+  %save_zt17.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 55
+  %.pre420 = load i32, i32* %save_zt17.phi.trans.insert, align 4
+  %save_zn18.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 56
+  %.pre421 = load i32, i32* %save_zn18.phi.trans.insert, align 4
+  %save_zvec19.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 57
+  %.pre422 = load i32, i32* %save_zvec19.phi.trans.insert, align 4
+  %save_zj20.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 58
+  %.pre423 = load i32, i32* %save_zj20.phi.trans.insert, align 4
+  switch i32 %tmp, label %sw.default [
+    i32 13, label %sw.bb
+    i32 14, label %if.end.sw.bb.65_crit_edge
+    i32 25, label %if.end.sw.bb.123_crit_edge
+  ]
+
+if.end.sw.bb.123_crit_edge:                       ; preds = %if.end
+  %.pre433 = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  br label %sw.bb.123
+
+if.end.sw.bb.65_crit_edge:                        ; preds = %if.end
+  %bsLive69.phi.trans.insert = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  %.pre426 = load i32, i32* %bsLive69.phi.trans.insert, align 4
+  br label %sw.bb.65
+
+sw.bb:                                            ; preds = %if.end
+  %sunkaddr = ptrtoint %struct.DState* %s to i64
+  %sunkaddr485 = add i64 %sunkaddr, 8
+  %sunkaddr486 = inttoptr i64 %sunkaddr485 to i32*
+  store i32 13, i32* %sunkaddr486, align 4
+  %bsLive = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 8
+  %tmp2 = load i32, i32* %bsLive, align 4
+  %cmp28.400 = icmp sgt i32 %tmp2, 7
+  br i1 %cmp28.400, label %sw.bb.if.then.29_crit_edge, label %if.end.33.lr.ph
+
+sw.bb.if.then.29_crit_edge:                       ; preds = %sw.bb
+  %sunkaddr487 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr488 = add i64 %sunkaddr487, 32
+  %sunkaddr489 = inttoptr i64 %sunkaddr488 to i32*
+  %.pre425 = load i32, i32* %sunkaddr489, align 4
+  br label %if.then.29
+
+if.end.33.lr.ph:                                  ; preds = %sw.bb
+  %tmp3 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre424 = load %struct.bz_stream*, %struct.bz_stream** %tmp3, align 8
+  %avail_in.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre424, i64 0, i32 1
+  %.pre430 = load i32, i32* %avail_in.phi.trans.insert, align 4
+  %tmp4 = add i32 %.pre430, -1
+  br label %if.end.33
+
+if.then.29:                                       ; preds = %while.body.backedge, %sw.bb.if.then.29_crit_edge
+  %tmp5 = phi i32 [ %.pre425, %sw.bb.if.then.29_crit_edge ], [ %or, %while.body.backedge ]
+  %.lcssa393 = phi i32 [ %tmp2, %sw.bb.if.then.29_crit_edge ], [ %add, %while.body.backedge ]
+  %sub = add nsw i32 %.lcssa393, -8
+  %shr = lshr i32 %tmp5, %sub
+  %and = and i32 %shr, 255
+  %sunkaddr491 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr492 = add i64 %sunkaddr491, 36
+  %sunkaddr493 = inttoptr i64 %sunkaddr492 to i32*
+  store i32 %sub, i32* %sunkaddr493, align 4
+  %blockSize100k = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 9
+  store i32 %and, i32* %blockSize100k, align 4
+  %and.off = add nsw i32 %and, -49
+  %tmp6 = icmp ugt i32 %and.off, 8
+  br i1 %tmp6, label %save_state_and_return, label %if.end.62
+
+if.end.33:                                        ; preds = %while.body.backedge, %if.end.33.lr.ph
+  %lsr.iv482 = phi i32 [ %tmp4, %if.end.33.lr.ph ], [ %lsr.iv.next483, %while.body.backedge ]
+  %tmp7 = phi i32 [ %tmp2, %if.end.33.lr.ph ], [ %add, %while.body.backedge ]
+  %cmp35 = icmp eq i32 %lsr.iv482, -1
+  br i1 %cmp35, label %save_state_and_return, label %if.end.37
+
+if.end.37:                                        ; preds = %if.end.33
+  %tmp8 = bitcast %struct.bz_stream* %.pre424 to i8**
+  %sunkaddr494 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr495 = add i64 %sunkaddr494, 32
+  %sunkaddr496 = inttoptr i64 %sunkaddr495 to i32*
+  %tmp9 = load i32, i32* %sunkaddr496, align 4
+  %shl = shl i32 %tmp9, 8
+  %tmp10 = load i8*, i8** %tmp8, align 8
+  %tmp11 = load i8, i8* %tmp10, align 1
+  %conv = zext i8 %tmp11 to i32
+  %or = or i32 %conv, %shl
+  store i32 %or, i32* %sunkaddr496, align 4
+  %add = add nsw i32 %tmp7, 8
+  %sunkaddr497 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr498 = add i64 %sunkaddr497, 36
+  %sunkaddr499 = inttoptr i64 %sunkaddr498 to i32*
+  store i32 %add, i32* %sunkaddr499, align 4
+  %incdec.ptr = getelementptr inbounds i8, i8* %tmp10, i64 1
+  store i8* %incdec.ptr, i8** %tmp8, align 8
+  %sunkaddr500 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr501 = add i64 %sunkaddr500, 8
+  %sunkaddr502 = inttoptr i64 %sunkaddr501 to i32*
+  store i32 %lsr.iv482, i32* %sunkaddr502, align 4
+  %sunkaddr503 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr504 = add i64 %sunkaddr503, 12
+  %sunkaddr505 = inttoptr i64 %sunkaddr504 to i32*
+  %tmp12 = load i32, i32* %sunkaddr505, align 4
+  %inc = add i32 %tmp12, 1
+  store i32 %inc, i32* %sunkaddr505, align 4
+  %cmp49 = icmp eq i32 %inc, 0
+  br i1 %cmp49, label %if.then.51, label %while.body.backedge
+
+if.then.51:                                       ; preds = %if.end.37
+  %sunkaddr506 = ptrtoint %struct.bz_stream* %.pre424 to i64
+  %sunkaddr507 = add i64 %sunkaddr506, 16
+  %sunkaddr508 = inttoptr i64 %sunkaddr507 to i32*
+  %tmp13 = load i32, i32* %sunkaddr508, align 4
+  %inc53 = add i32 %tmp13, 1
+  store i32 %inc53, i32* %sunkaddr508, align 4
+  br label %while.body.backedge
+
+while.body.backedge:                              ; preds = %if.then.51, %if.end.37
+  %lsr.iv.next483 = add i32 %lsr.iv482, -1
+  %cmp28 = icmp sgt i32 %add, 7
+  br i1 %cmp28, label %if.then.29, label %if.end.33
+
+if.end.62:                                        ; preds = %if.then.29
+  %sub64 = add nsw i32 %and, -48
+  %sunkaddr509 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr510 = add i64 %sunkaddr509, 40
+  %sunkaddr511 = inttoptr i64 %sunkaddr510 to i32*
+  store i32 %sub64, i32* %sunkaddr511, align 4
+  br label %sw.bb.65
+
+sw.bb.65:                                         ; preds = %if.end.62, %if.end.sw.bb.65_crit_edge
+  %bsLive69.pre-phi = phi i32* [ %bsLive69.phi.trans.insert, %if.end.sw.bb.65_crit_edge ], [ %bsLive, %if.end.62 ]
+  %tmp14 = phi i32 [ %.pre426, %if.end.sw.bb.65_crit_edge ], [ %sub, %if.end.62 ]
+  %sunkaddr512 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr513 = add i64 %sunkaddr512, 8
+  %sunkaddr514 = inttoptr i64 %sunkaddr513 to i32*
+  store i32 14, i32* %sunkaddr514, align 4
+  %cmp70.397 = icmp sgt i32 %tmp14, 7
+  br i1 %cmp70.397, label %if.then.72, label %if.end.82.lr.ph
+
+if.end.82.lr.ph:                                  ; preds = %sw.bb.65
+  %tmp15 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre427 = load %struct.bz_stream*, %struct.bz_stream** %tmp15, align 8
+  %avail_in84.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre427, i64 0, i32 1
+  %.pre431 = load i32, i32* %avail_in84.phi.trans.insert, align 4
+  %tmp16 = add i32 %.pre431, -1
+  br label %if.end.82
+
+if.then.72:                                       ; preds = %while.body.68.backedge, %sw.bb.65
+  %.lcssa390 = phi i32 [ %tmp14, %sw.bb.65 ], [ %add97, %while.body.68.backedge ]
+  %sub76 = add nsw i32 %.lcssa390, -8
+  %sunkaddr516 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr517 = add i64 %sunkaddr516, 36
+  %sunkaddr518 = inttoptr i64 %sunkaddr517 to i32*
+  store i32 %sub76, i32* %sunkaddr518, align 4
+  %currBlockNo = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 11
+  %tmp17 = load i32, i32* %currBlockNo, align 4
+  %inc117 = add nsw i32 %tmp17, 1
+  store i32 %inc117, i32* %currBlockNo, align 4
+  %verbosity = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 12
+  %tmp18 = load i32, i32* %verbosity, align 4
+  %cmp118 = icmp sgt i32 %tmp18, 1
+  br i1 %cmp118, label %if.then.120, label %sw.bb.123, !prof !0
+
+if.end.82:                                        ; preds = %while.body.68.backedge, %if.end.82.lr.ph
+  %lsr.iv480 = phi i32 [ %tmp16, %if.end.82.lr.ph ], [ %lsr.iv.next481, %while.body.68.backedge ]
+  %tmp19 = phi i32 [ %tmp14, %if.end.82.lr.ph ], [ %add97, %while.body.68.backedge ]
+  %cmp85 = icmp eq i32 %lsr.iv480, -1
+  br i1 %cmp85, label %save_state_and_return, label %if.end.88
+
+if.end.88:                                        ; preds = %if.end.82
+  %tmp20 = bitcast %struct.bz_stream* %.pre427 to i8**
+  %sunkaddr519 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr520 = add i64 %sunkaddr519, 32
+  %sunkaddr521 = inttoptr i64 %sunkaddr520 to i32*
+  %tmp21 = load i32, i32* %sunkaddr521, align 4
+  %shl90 = shl i32 %tmp21, 8
+  %tmp22 = load i8*, i8** %tmp20, align 8
+  %tmp23 = load i8, i8* %tmp22, align 1
+  %conv93 = zext i8 %tmp23 to i32
+  %or94 = or i32 %conv93, %shl90
+  store i32 %or94, i32* %sunkaddr521, align 4
+  %add97 = add nsw i32 %tmp19, 8
+  %sunkaddr522 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr523 = add i64 %sunkaddr522, 36
+  %sunkaddr524 = inttoptr i64 %sunkaddr523 to i32*
+  store i32 %add97, i32* %sunkaddr524, align 4
+  %incdec.ptr100 = getelementptr inbounds i8, i8* %tmp22, i64 1
+  store i8* %incdec.ptr100, i8** %tmp20, align 8
+  %sunkaddr525 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr526 = add i64 %sunkaddr525, 8
+  %sunkaddr527 = inttoptr i64 %sunkaddr526 to i32*
+  store i32 %lsr.iv480, i32* %sunkaddr527, align 4
+  %sunkaddr528 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr529 = add i64 %sunkaddr528, 12
+  %sunkaddr530 = inttoptr i64 %sunkaddr529 to i32*
+  %tmp24 = load i32, i32* %sunkaddr530, align 4
+  %inc106 = add i32 %tmp24, 1
+  store i32 %inc106, i32* %sunkaddr530, align 4
+  %cmp109 = icmp eq i32 %inc106, 0
+  br i1 %cmp109, label %if.then.111, label %while.body.68.backedge
+
+if.then.111:                                      ; preds = %if.end.88
+  %sunkaddr531 = ptrtoint %struct.bz_stream* %.pre427 to i64
+  %sunkaddr532 = add i64 %sunkaddr531, 16
+  %sunkaddr533 = inttoptr i64 %sunkaddr532 to i32*
+  %tmp25 = load i32, i32* %sunkaddr533, align 4
+  %inc114 = add i32 %tmp25, 1
+  store i32 %inc114, i32* %sunkaddr533, align 4
+  br label %while.body.68.backedge
+
+while.body.68.backedge:                           ; preds = %if.then.111, %if.end.88
+  %lsr.iv.next481 = add i32 %lsr.iv480, -1
+  %cmp70 = icmp sgt i32 %add97, 7
+  br i1 %cmp70, label %if.then.72, label %if.end.82
+
+if.then.120:                                      ; preds = %if.then.72
+  %call = tail call i32 (%struct.__sFILE*, i8*, ...) @fprintf(%struct.__sFILE* getelementptr inbounds ([0 x %struct.__sFILE], [0 x %struct.__sFILE]* @__sF, i64 0, i64 2), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str, i64 0, i64 0), i32 %inc117)
+  br label %sw.bb.123
+
+sw.bb.123:                                        ; preds = %if.then.120, %if.then.72, %if.end.sw.bb.123_crit_edge
+  %bsLive127.pre-phi = phi i32* [ %.pre433, %if.end.sw.bb.123_crit_edge ], [ %bsLive69.pre-phi, %if.then.72 ], [ %bsLive69.pre-phi, %if.then.120 ]
+  %sunkaddr534 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr535 = add i64 %sunkaddr534, 8
+  %sunkaddr536 = inttoptr i64 %sunkaddr535 to i32*
+  store i32 25, i32* %sunkaddr536, align 4
+  %tmp26 = load i32, i32* %bsLive127.pre-phi, align 4
+  %cmp128.395 = icmp sgt i32 %tmp26, 7
+  br i1 %cmp128.395, label %sw.bb.123.if.then.130_crit_edge, label %if.end.140.lr.ph
+
+sw.bb.123.if.then.130_crit_edge:                  ; preds = %sw.bb.123
+  %sunkaddr537 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr538 = add i64 %sunkaddr537, 32
+  %sunkaddr539 = inttoptr i64 %sunkaddr538 to i32*
+  %.pre429 = load i32, i32* %sunkaddr539, align 4
+  br label %if.then.130
+
+if.end.140.lr.ph:                                 ; preds = %sw.bb.123
+  %tmp27 = bitcast %struct.DState* %s to %struct.bz_stream**
+  %.pre428 = load %struct.bz_stream*, %struct.bz_stream** %tmp27, align 8
+  %avail_in142.phi.trans.insert = getelementptr inbounds %struct.bz_stream, %struct.bz_stream* %.pre428, i64 0, i32 1
+  %.pre432 = load i32, i32* %avail_in142.phi.trans.insert, align 4
+  %tmp28 = add i32 %.pre432, -1
+  br label %if.end.140
+
+if.then.130:                                      ; preds = %while.body.126.backedge, %sw.bb.123.if.then.130_crit_edge
+  %tmp29 = phi i32 [ %.pre429, %sw.bb.123.if.then.130_crit_edge ], [ %or152, %while.body.126.backedge ]
+  %.lcssa = phi i32 [ %tmp26, %sw.bb.123.if.then.130_crit_edge ], [ %add155, %while.body.126.backedge ]
+  %sub134 = add nsw i32 %.lcssa, -8
+  %shr135 = lshr i32 %tmp29, %sub134
+  store i32 %sub134, i32* %bsLive127.pre-phi, align 4
+  %origPtr = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 13
+  %tmp30 = load i32, i32* %origPtr, align 4
+  %shl175 = shl i32 %tmp30, 8
+  %conv176 = and i32 %shr135, 255
+  %or177 = or i32 %shl175, %conv176
+  store i32 %or177, i32* %origPtr, align 4
+  %nInUse = getelementptr inbounds %struct.DState, %struct.DState* %s, i64 0, i32 27
+  %tmp31 = load i32, i32* %nInUse, align 4
+  %add179 = add nsw i32 %tmp31, 2
+  br label %save_state_and_return
+
+if.end.140:                                       ; preds = %while.body.126.backedge, %if.end.140.lr.ph
+  %lsr.iv = phi i32 [ %tmp28, %if.end.140.lr.ph ], [ %lsr.iv.next, %while.body.126.backedge ]
+  %tmp32 = phi i32 [ %tmp26, %if.end.140.lr.ph ], [ %add155, %while.body.126.backedge ]
+  %cmp143 = icmp eq i32 %lsr.iv, -1
+  br i1 %cmp143, label %save_state_and_return, label %if.end.146
+
+if.end.146:                                       ; preds = %if.end.140
+  %tmp33 = bitcast %struct.bz_stream* %.pre428 to i8**
+  %sunkaddr541 = ptrtoint %struct.DState* %s to i64
+  %sunkaddr542 = add i64 %sunkaddr541, 32
+  %sunkaddr543 = inttoptr i64 %sunkaddr542 to i32*
+  %tmp34 = load i32, i32* %sunkaddr543, align 4
+  %shl148 = shl i32 %tmp34, 8
+  %tmp35 = load i8*, i8** %tmp33, align 8
+  %tmp36 = load i8, i8* %tmp35, align 1
+  %conv151 = zext i8 %tmp36 to i32
+  %or152 = or i32 %conv151, %shl148
+  store i32 %or152, i32* %sunkaddr543, align 4
+  %add155 = add nsw i32 %tmp32, 8
+  store i32 %add155, i32* %bsLive127.pre-phi, align 4
+  %incdec.ptr158 = getelementptr inbounds i8, i8* %tmp35, i64 1
+  store i8* %incdec.ptr158, i8** %tmp33, align 8
+  %sunkaddr544 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr545 = add i64 %sunkaddr544, 8
+  %sunkaddr546 = inttoptr i64 %sunkaddr545 to i32*
+  store i32 %lsr.iv, i32* %sunkaddr546, align 4
+  %sunkaddr547 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr548 = add i64 %sunkaddr547, 12
+  %sunkaddr549 = inttoptr i64 %sunkaddr548 to i32*
+  %tmp37 = load i32, i32* %sunkaddr549, align 4
+  %inc164 = add i32 %tmp37, 1
+  store i32 %inc164, i32* %sunkaddr549, align 4
+  %cmp167 = icmp eq i32 %inc164, 0
+  br i1 %cmp167, label %if.then.169, label %while.body.126.backedge
+
+if.then.169:                                      ; preds = %if.end.146
+  %sunkaddr550 = ptrtoint %struct.bz_stream* %.pre428 to i64
+  %sunkaddr551 = add i64 %sunkaddr550, 16
+  %sunkaddr552 = inttoptr i64 %sunkaddr551 to i32*
+  %tmp38 = load i32, i32* %sunkaddr552, align 4
+  %inc172 = add i32 %tmp38, 1
+  store i32 %inc172, i32* %sunkaddr552, align 4
+  br label %while.body.126.backedge
+
+while.body.126.backedge:                          ; preds = %if.then.169, %if.end.146
+  %lsr.iv.next = add i32 %lsr.iv, -1
+  %cmp128 = icmp sgt i32 %add155, 7
+  br i1 %cmp128, label %if.then.130, label %if.end.140
+
+sw.default:                                       ; preds = %if.end, %if.end.thread
+  %tmp39 = phi i32 [ 0, %if.end.thread ], [ %.pre, %if.end ]
+  %tmp40 = phi i32 [ 0, %if.end.thread ], [ %.pre406, %if.end ]
+  %tmp41 = phi i32 [ 0, %if.end.thread ], [ %.pre407, %if.end ]
+  %tmp42 = phi i32 [ 0, %if.end.thread ], [ %.pre408, %if.end ]
+  %tmp43 = phi i32 [ 0, %if.end.thread ], [ %.pre409, %if.end ]
+  %tmp44 = phi i32 [ 0, %if.end.thread ], [ %.pre410, %if.end ]
+  %tmp45 = phi i32 [ 0, %if.end.thread ], [ %.pre411, %if.end ]
+  %tmp46 = phi i32 [ 0, %if.end.thread ], [ %.pre412, %if.end ]
+  %tmp47 = phi i32 [ 0, %if.end.thread ], [ %.pre413, %if.end ]
+  %tmp48 = phi i32 [ 0, %if.end.thread ], [ %.pre414, %if.end ]
+  %tmp49 = phi i32 [ 0, %if.end.thread ], [ %.pre415, %if.end ]
+  %tmp50 = phi i32 [ 0, %if.end.thread ], [ %.pre416, %if.end ]
+  %tmp51 = phi i32 [ 0, %if.end.thread ], [ %.pre417, %if.end ]
+  %tmp52 = phi i32 [ 0, %if.end.thread ], [ %.pre418, %if.end ]
+  %tmp53 = phi i32 [ 0, %if.end.thread ], [ %.pre419, %if.end ]
+  %tmp54 = phi i32 [ 0, %if.end.thread ], [ %.pre420, %if.end ]
+  %tmp55 = phi i32 [ 0, %if.end.thread ], [ %.pre421, %if.end ]
+  %tmp56 = phi i32 [ 0, %if.end.thread ], [ %.pre422, %if.end ]
+  %tmp57 = phi i32 [ 0, %if.end.thread ], [ %.pre423, %if.end ]
+  %save_j3.pre-phi469 = phi i32* [ %save_j, %if.end.thread ], [ %save_j3.phi.trans.insert, %if.end ]
+  %save_t4.pre-phi467 = phi i32* [ %save_t, %if.end.thread ], [ %save_t4.phi.trans.insert, %if.end ]
+  %save_alphaSize5.pre-phi465 = phi i32* [ %save_alphaSize, %if.end.thread ], [ %save_alphaSize5.phi.trans.insert, %if.end ]
+  %save_nGroups6.pre-phi463 = phi i32* [ %save_nGroups, %if.end.thread ], [ %save_nGroups6.phi.trans.insert, %if.end ]
+  %save_nSelectors7.pre-phi461 = phi i32* [ %save_nSelectors, %if.end.thread ], [ %save_nSelectors7.phi.trans.insert, %if.end ]
+  %save_EOB8.pre-phi459 = phi i32* [ %save_EOB, %if.end.thread ], [ %save_EOB8.phi.trans.insert, %if.end ]
+  %save_groupNo9.pre-phi457 = phi i32* [ %save_groupNo, %if.end.thread ], [ %save_groupNo9.phi.trans.insert, %if.end ]
+  %save_groupPos10.pre-phi455 = phi i32* [ %save_groupPos, %if.end.thread ], [ %save_groupPos10.phi.trans.insert, %if.end ]
+  %save_nextSym11.pre-phi453 = phi i32* [ %save_nextSym, %if.end.thread ], [ %save_nextSym11.phi.trans.insert, %if.end ]
+  %save_nblockMAX12.pre-phi451 = phi i32* [ %save_nblockMAX, %if.end.thread ], [ %save_nblockMAX12.phi.trans.insert, %if.end ]
+  %save_nblock13.pre-phi449 = phi i32* [ %save_nblock, %if.end.thread ], [ %save_nblock13.phi.trans.insert, %if.end ]
+  %save_es14.pre-phi447 = phi i32* [ %save_es, %if.end.thread ], [ %save_es14.phi.trans.insert, %if.end ]
+  %save_N15.pre-phi445 = phi i32* [ %save_N, %if.end.thread ], [ %save_N15.phi.trans.insert, %if.end ]
+  %save_curr16.pre-phi443 = phi i32* [ %save_curr, %if.end.thread ], [ %save_curr16.phi.trans.insert, %if.end ]
+  %save_zt17.pre-phi441 = phi i32* [ %save_zt, %if.end.thread ], [ %save_zt17.phi.trans.insert, %if.end ]
+  %save_zn18.pre-phi439 = phi i32* [ %save_zn, %if.end.thread ], [ %save_zn18.phi.trans.insert, %if.end ]
+  %save_zvec19.pre-phi437 = phi i32* [ %save_zvec, %if.end.thread ], [ %save_zvec19.phi.trans.insert, %if.end ]
+  %save_zj20.pre-phi435 = phi i32* [ %save_zj, %if.end.thread ], [ %save_zj20.phi.trans.insert, %if.end ]
+  tail call void @bar(i32 4001)
+  br label %save_state_and_return
+
+save_state_and_return:                            ; preds = %sw.default, %if.end.140, %if.then.130, %if.end.82, %if.end.33, %if.then.29
+  %tmp58 = phi i32 [ %tmp39, %sw.default ], [ %.pre, %if.then.29 ], [ %.pre, %if.then.130 ], [ %.pre, %if.end.140 ], [ %.pre, %if.end.82 ], [ %.pre, %if.end.33 ]
+  %tmp59 = phi i32 [ %tmp40, %sw.default ], [ %.pre406, %if.then.29 ], [ %.pre406, %if.then.130 ], [ %.pre406, %if.end.140 ], [ %.pre406, %if.end.82 ], [ %.pre406, %if.end.33 ]
+  %tmp60 = phi i32 [ %tmp41, %sw.default ], [ %.pre407, %if.then.29 ], [ %.pre407, %if.then.130 ], [ %.pre407, %if.end.140 ], [ %.pre407, %if.end.82 ], [ %.pre407, %if.end.33 ]
+  %tmp61 = phi i32 [ %tmp43, %sw.default ], [ %.pre409, %if.then.29 ], [ %.pre409, %if.then.130 ], [ %.pre409, %if.end.140 ], [ %.pre409, %if.end.82 ], [ %.pre409, %if.end.33 ]
+  %tmp62 = phi i32 [ %tmp44, %sw.default ], [ %.pre410, %if.then.29 ], [ %.pre410, %if.then.130 ], [ %.pre410, %if.end.140 ], [ %.pre410, %if.end.82 ], [ %.pre410, %if.end.33 ]
+  %tmp63 = phi i32 [ %tmp45, %sw.default ], [ %.pre411, %if.then.29 ], [ %.pre411, %if.then.130 ], [ %.pre411, %if.end.140 ], [ %.pre411, %if.end.82 ], [ %.pre411, %if.end.33 ]
+  %tmp64 = phi i32 [ %tmp46, %sw.default ], [ %.pre412, %if.then.29 ], [ %.pre412, %if.then.130 ], [ %.pre412, %if.end.140 ], [ %.pre412, %if.end.82 ], [ %.pre412, %if.end.33 ]
+  %tmp65 = phi i32 [ %tmp47, %sw.default ], [ %.pre413, %if.then.29 ], [ %.pre413, %if.then.130 ], [ %.pre413, %if.end.140 ], [ %.pre413, %if.end.82 ], [ %.pre413, %if.end.33 ]
+  %tmp66 = phi i32 [ %tmp48, %sw.default ], [ %.pre414, %if.then.29 ], [ %.pre414, %if.then.130 ], [ %.pre414, %if.end.140 ], [ %.pre414, %if.end.82 ], [ %.pre414, %if.end.33 ]
+  %tmp67 = phi i32 [ %tmp49, %sw.default ], [ %.pre415, %if.then.29 ], [ %.pre415, %if.then.130 ], [ %.pre415, %if.end.140 ], [ %.pre415, %if.end.82 ], [ %.pre415, %if.end.33 ]
+  %tmp68 = phi i32 [ %tmp51, %sw.default ], [ %.pre417, %if.then.29 ], [ %.pre417, %if.then.130 ], [ %.pre417, %if.end.140 ], [ %.pre417, %if.end.82 ], [ %.pre417, %if.end.33 ]
+  %tmp69 = phi i32 [ %tmp52, %sw.default ], [ %.pre418, %if.then.29 ], [ %.pre418, %if.then.130 ], [ %.pre418, %if.end.140 ], [ %.pre418, %if.end.82 ], [ %.pre418, %if.end.33 ]
+  %tmp70 = phi i32 [ %tmp53, %sw.default ], [ %.pre419, %if.then.29 ], [ %.pre419, %if.then.130 ], [ %.pre419, %if.end.140 ], [ %.pre419, %if.end.82 ], [ %.pre419, %if.end.33 ]
+  %tmp71 = phi i32 [ %tmp54, %sw.default ], [ %.pre420, %if.then.29 ], [ %.pre420, %if.then.130 ], [ %.pre420, %if.end.140 ], [ %.pre420, %if.end.82 ], [ %.pre420, %if.end.33 ]
+  %tmp72 = phi i32 [ %tmp55, %sw.default ], [ %.pre421, %if.then.29 ], [ %.pre421, %if.then.130 ], [ %.pre421, %if.end.140 ], [ %.pre421, %if.end.82 ], [ %.pre421, %if.end.33 ]
+  %tmp73 = phi i32 [ %tmp56, %sw.default ], [ %.pre422, %if.then.29 ], [ %.pre422, %if.then.130 ], [ %.pre422, %if.end.140 ], [ %.pre422, %if.end.82 ], [ %.pre422, %if.end.33 ]
+  %tmp74 = phi i32 [ %tmp57, %sw.default ], [ %.pre423, %if.then.29 ], [ %.pre423, %if.then.130 ], [ %.pre423, %if.end.140 ], [ %.pre423, %if.end.82 ], [ %.pre423, %if.end.33 ]
+  %save_j3.pre-phi468 = phi i32* [ %save_j3.pre-phi469, %sw.default ], [ %save_j3.phi.trans.insert, %if.then.29 ], [ %save_j3.phi.trans.insert, %if.then.130 ], [ %save_j3.phi.trans.insert, %if.end.140 ], [ %save_j3.phi.trans.insert, %if.end.82 ], [ %save_j3.phi.trans.insert, %if.end.33 ]
+  %save_t4.pre-phi466 = phi i32* [ %save_t4.pre-phi467, %sw.default ], [ %save_t4.phi.trans.insert, %if.then.29 ], [ %save_t4.phi.trans.insert, %if.then.130 ], [ %save_t4.phi.trans.insert, %if.end.140 ], [ %save_t4.phi.trans.insert, %if.end.82 ], [ %save_t4.phi.trans.insert, %if.end.33 ]
+  %save_alphaSize5.pre-phi464 = phi i32* [ %save_alphaSize5.pre-phi465, %sw.default ], [ %save_alphaSize5.phi.trans.insert, %if.then.29 ], [ %save_alphaSize5.phi.trans.insert, %if.then.130 ], [ %save_alphaSize5.phi.trans.insert, %if.end.140 ], [ %save_alphaSize5.phi.trans.insert, %if.end.82 ], [ %save_alphaSize5.phi.trans.insert, %if.end.33 ]
+  %save_nGroups6.pre-phi462 = phi i32* [ %save_nGroups6.pre-phi463, %sw.default ], [ %save_nGroups6.phi.trans.insert, %if.then.29 ], [ %save_nGroups6.phi.trans.insert, %if.then.130 ], [ %save_nGroups6.phi.trans.insert, %if.end.140 ], [ %save_nGroups6.phi.trans.insert, %if.end.82 ], [ %save_nGroups6.phi.trans.insert, %if.end.33 ]
+  %save_nSelectors7.pre-phi460 = phi i32* [ %save_nSelectors7.pre-phi461, %sw.default ], [ %save_nSelectors7.phi.trans.insert, %if.then.29 ], [ %save_nSelectors7.phi.trans.insert, %if.then.130 ], [ %save_nSelectors7.phi.trans.insert, %if.end.140 ], [ %save_nSelectors7.phi.trans.insert, %if.end.82 ], [ %save_nSelectors7.phi.trans.insert, %if.end.33 ]
+  %save_EOB8.pre-phi458 = phi i32* [ %save_EOB8.pre-phi459, %sw.default ], [ %save_EOB8.phi.trans.insert, %if.then.29 ], [ %save_EOB8.phi.trans.insert, %if.then.130 ], [ %save_EOB8.phi.trans.insert, %if.end.140 ], [ %save_EOB8.phi.trans.insert, %if.end.82 ], [ %save_EOB8.phi.trans.insert, %if.end.33 ]
+  %save_groupNo9.pre-phi456 = phi i32* [ %save_groupNo9.pre-phi457, %sw.default ], [ %save_groupNo9.phi.trans.insert, %if.then.29 ], [ %save_groupNo9.phi.trans.insert, %if.then.130 ], [ %save_groupNo9.phi.trans.insert, %if.end.140 ], [ %save_groupNo9.phi.trans.insert, %if.end.82 ], [ %save_groupNo9.phi.trans.insert, %if.end.33 ]
+  %save_groupPos10.pre-phi454 = phi i32* [ %save_groupPos10.pre-phi455, %sw.default ], [ %save_groupPos10.phi.trans.insert, %if.then.29 ], [ %save_groupPos10.phi.trans.insert, %if.then.130 ], [ %save_groupPos10.phi.trans.insert, %if.end.140 ], [ %save_groupPos10.phi.trans.insert, %if.end.82 ], [ %save_groupPos10.phi.trans.insert, %if.end.33 ]
+  %save_nextSym11.pre-phi452 = phi i32* [ %save_nextSym11.pre-phi453, %sw.default ], [ %save_nextSym11.phi.trans.insert, %if.then.29 ], [ %save_nextSym11.phi.trans.insert, %if.then.130 ], [ %save_nextSym11.phi.trans.insert, %if.end.140 ], [ %save_nextSym11.phi.trans.insert, %if.end.82 ], [ %save_nextSym11.phi.trans.insert, %if.end.33 ]
+  %save_nblockMAX12.pre-phi450 = phi i32* [ %save_nblockMAX12.pre-phi451, %sw.default ], [ %save_nblockMAX12.phi.trans.insert, %if.then.29 ], [ %save_nblockMAX12.phi.trans.insert, %if.then.130 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.140 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.82 ], [ %save_nblockMAX12.phi.trans.insert, %if.end.33 ]
+  %save_nblock13.pre-phi448 = phi i32* [ %save_nblock13.pre-phi449, %sw.default ], [ %save_nblock13.phi.trans.insert, %if.then.29 ], [ %save_nblock13.phi.trans.insert, %if.then.130 ], [ %save_nblock13.phi.trans.insert, %if.end.140 ], [ %save_nblock13.phi.trans.insert, %if.end.82 ], [ %save_nblock13.phi.trans.insert, %if.end.33 ]
+  %save_es14.pre-phi446 = phi i32* [ %save_es14.pre-phi447, %sw.default ], [ %save_es14.phi.trans.insert, %if.then.29 ], [ %save_es14.phi.trans.insert, %if.then.130 ], [ %save_es14.phi.trans.insert, %if.end.140 ], [ %save_es14.phi.trans.insert, %if.end.82 ], [ %save_es14.phi.trans.insert, %if.end.33 ]
+  %save_N15.pre-phi444 = phi i32* [ %save_N15.pre-phi445, %sw.default ], [ %save_N15.phi.trans.insert, %if.then.29 ], [ %save_N15.phi.trans.insert, %if.then.130 ], [ %save_N15.phi.trans.insert, %if.end.140 ], [ %save_N15.phi.trans.insert, %if.end.82 ], [ %save_N15.phi.trans.insert, %if.end.33 ]
+  %save_curr16.pre-phi442 = phi i32* [ %save_curr16.pre-phi443, %sw.default ], [ %save_curr16.phi.trans.insert, %if.then.29 ], [ %save_curr16.phi.trans.insert, %if.then.130 ], [ %save_curr16.phi.trans.insert, %if.end.140 ], [ %save_curr16.phi.trans.insert, %if.end.82 ], [ %save_curr16.phi.trans.insert, %if.end.33 ]
+  %save_zt17.pre-phi440 = phi i32* [ %save_zt17.pre-phi441, %sw.default ], [ %save_zt17.phi.trans.insert, %if.then.29 ], [ %save_zt17.phi.trans.insert, %if.then.130 ], [ %save_zt17.phi.trans.insert, %if.end.140 ], [ %save_zt17.phi.trans.insert, %if.end.82 ], [ %save_zt17.phi.trans.insert, %if.end.33 ]
+  %save_zn18.pre-phi438 = phi i32* [ %save_zn18.pre-phi439, %sw.default ], [ %save_zn18.phi.trans.insert, %if.then.29 ], [ %save_zn18.phi.trans.insert, %if.then.130 ], [ %save_zn18.phi.trans.insert, %if.end.140 ], [ %save_zn18.phi.trans.insert, %if.end.82 ], [ %save_zn18.phi.trans.insert, %if.end.33 ]
+  %save_zvec19.pre-phi436 = phi i32* [ %save_zvec19.pre-phi437, %sw.default ], [ %save_zvec19.phi.trans.insert, %if.then.29 ], [ %save_zvec19.phi.trans.insert, %if.then.130 ], [ %save_zvec19.phi.trans.insert, %if.end.140 ], [ %save_zvec19.phi.trans.insert, %if.end.82 ], [ %save_zvec19.phi.trans.insert, %if.end.33 ]
+  %save_zj20.pre-phi434 = phi i32* [ %save_zj20.pre-phi435, %sw.default ], [ %save_zj20.phi.trans.insert, %if.then.29 ], [ %save_zj20.phi.trans.insert, %if.then.130 ], [ %save_zj20.phi.trans.insert, %if.end.140 ], [ %save_zj20.phi.trans.insert, %if.end.82 ], [ %save_zj20.phi.trans.insert, %if.end.33 ]
+  %nblock.1 = phi i32 [ %tmp50, %sw.default ], [ %.pre416, %if.then.29 ], [ 0, %if.then.130 ], [ %.pre416, %if.end.140 ], [ %.pre416, %if.end.82 ], [ %.pre416, %if.end.33 ]
+  %alphaSize.1 = phi i32 [ %tmp42, %sw.default ], [ %.pre408, %if.then.29 ], [ %add179, %if.then.130 ], [ %.pre408, %if.end.140 ], [ %.pre408, %if.end.82 ], [ %.pre408, %if.end.33 ]
+  %retVal.0 = phi i32 [ 0, %sw.default ], [ -5, %if.then.29 ], [ -4, %if.then.130 ], [ 0, %if.end.140 ], [ 0, %if.end.82 ], [ 0, %if.end.33 ]
+  store i32 %tmp58, i32* %save_i, align 4
+  store i32 %tmp59, i32* %save_j3.pre-phi468, align 4
+  store i32 %tmp60, i32* %save_t4.pre-phi466, align 4
+  store i32 %alphaSize.1, i32* %save_alphaSize5.pre-phi464, align 4
+  store i32 %tmp61, i32* %save_nGroups6.pre-phi462, align 4
+  store i32 %tmp62, i32* %save_nSelectors7.pre-phi460, align 4
+  store i32 %tmp63, i32* %save_EOB8.pre-phi458, align 4
+  store i32 %tmp64, i32* %save_groupNo9.pre-phi456, align 4
+  store i32 %tmp65, i32* %save_groupPos10.pre-phi454, align 4
+  store i32 %tmp66, i32* %save_nextSym11.pre-phi452, align 4
+  store i32 %tmp67, i32* %save_nblockMAX12.pre-phi450, align 4
+  store i32 %nblock.1, i32* %save_nblock13.pre-phi448, align 4
+  store i32 %tmp68, i32* %save_es14.pre-phi446, align 4
+  store i32 %tmp69, i32* %save_N15.pre-phi444, align 4
+  store i32 %tmp70, i32* %save_curr16.pre-phi442, align 4
+  store i32 %tmp71, i32* %save_zt17.pre-phi440, align 4
+  store i32 %tmp72, i32* %save_zn18.pre-phi438, align 4
+  store i32 %tmp73, i32* %save_zvec19.pre-phi436, align 4
+  store i32 %tmp74, i32* %save_zj20.pre-phi434, align 4
+  ret i32 %retVal.0
+}
+
+!0 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
index 739570236da9..1820b8163a90 100644
--- a/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
+++ b/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s
 
 ; This test aims to check basic correctness of frame layout &
 ; frame access code. There are 8 functions in this test file,
@@ -252,11 +252,11 @@ entry:
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: mov	w9, w0
+; CHECK: mov	 x10, sp
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through frame pointer
@@ -299,11 +299,11 @@ entry:
 ; CHECK: ldr	w[[IARG:[0-9]+]], [x29, #24]
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
-; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: mov	w9, w0
+; CHECK: mov	 x10, sp
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through frame pointer
@@ -361,11 +361,11 @@ entry:
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: mov	w9, w0
+; CHECK: mov	 x10, sp
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -414,11 +414,11 @@ entry:
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: mov	w9, w0
+; CHECK: mov	 x10, sp
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -465,11 +465,11 @@ entry:
 ; CHECK: ldr	d[[DARG:[0-9]+]], [x29, #40]
 ;   Check correct reservation of 16-byte aligned VLA (size in w0) on stack
 ;   and set-up of base pointer (x19).
-; CHECK: ubfx	x9, x0, #0, #32
+; CHECK: mov	w9, w0
+; CHECK: mov	 x10, sp
 ; CHECK: lsl	x9, x9, #2
 ; CHECK: add	x9, x9, #15
 ; CHECK: and	x9, x9, #0x7fffffff0
-; CHECK: mov	 x10, sp
 ; CHECK: sub	 x[[VLASPTMP:[0-9]+]], x10, x9
 ; CHECK: mov	 sp, x[[VLASPTMP]]
 ;   Check correct access to local variable, through base pointer
@@ -522,10 +522,10 @@ bb1:
 
 ; CHECK-LABEL: realign_conditional2
 ; Extra realignment in the prologue (performance issue).
+; CHECK:  tbz  {{.*}} .[[LABEL:.*]]
 ; CHECK:  sub  x9, sp, #32            // =32
 ; CHECK:  and  sp, x9, #0xffffffffffffffe0
 ; CHECK:  mov   x19, sp
-; CHECK:  tbz  {{.*}} .[[LABEL:.*]]
 ; Stack is realigned in a non-entry BB.
 ; CHECK:  sub  [[REG:x[01-9]+]], sp, #64
 ; CHECK:  and  sp, [[REG]], #0xffffffffffffffe0
diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
index ea3b8fa55732..1bc2a3ccb1ca 100644
--- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
+++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll
@@ -1,7 +1,10 @@
-; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON
 
-; CHECK-LABEL: load_factor2:
-; CHECK: ld2 { v0.8b, v1.8b }, [x0]
+; NEON-LABEL: load_factor2:
+; NEON: ld2 { v0.8b, v1.8b }, [x0]
+; NONEON-LABEL: load_factor2:
+; NONEON-NOT: ld2
 define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
   %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
   %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
   ret <8 x i8> %add
 }
 
-; CHECK-LABEL: load_factor3:
-; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: load_factor3:
+; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: load_factor3:
+; NONEON-NOT: ld3
 define <4 x i32> @load_factor3(i32* %ptr) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -21,8 +26,10 @@ define <4 x i32> @load_factor3(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: load_factor4:
-; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: load_factor4:
+; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: load_factor4:
+; NONEON-NOT: ld4
 define <4 x i32> @load_factor4(i32* %ptr) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -32,16 +39,20 @@ define <4 x i32> @load_factor4(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: store_factor2:
-; CHECK: st2 { v0.8b, v1.8b }, [x0]
+; NEON-LABEL: store_factor2:
+; NEON: st2 { v0.8b, v1.8b }, [x0]
+; NONEON-LABEL: store_factor2:
+; NONEON-NOT: st2
 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
   ret void
 }
 
-; CHECK-LABEL: store_factor3:
-; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: store_factor3:
+; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: store_factor3:
+; NONEON-NOT: st3
 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -51,8 +62,10 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
   ret void
 }
 
-; CHECK-LABEL: store_factor4:
-; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: store_factor4:
+; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: store_factor4:
+; NONEON-NOT: st4
 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -65,8 +78,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
 ; The following cases test that interleaved access of pointer vectors can be
 ; matched to ldN/stN instruction.
 
-; CHECK-LABEL: load_ptrvec_factor2:
-; CHECK: ld2 { v0.2d, v1.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor2:
+; NEON: ld2 { v0.2d, v1.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor2:
+; NONEON-NOT: ld2
 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
   %base = bitcast i32** %ptr to <4 x i32*>*
   %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
@@ -74,8 +89,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
   ret <2 x i32*> %strided.v0
 }
 
-; CHECK-LABEL: load_ptrvec_factor3:
-; CHECK: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor3:
+; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor3:
+; NONEON-NOT: ld3
 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
   %base = bitcast i32** %ptr to <6 x i32*>*
   %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
@@ -86,8 +103,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
   ret void
 }
 
-; CHECK-LABEL: load_ptrvec_factor4:
-; CHECK: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NEON-LABEL: load_ptrvec_factor4:
+; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NONEON-LABEL: load_ptrvec_factor4:
+; NONEON-NOT: ld4
 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
   %base = bitcast i32** %ptr to <8 x i32*>*
   %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
@@ -98,8 +117,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor2:
-; CHECK: st2 { v0.2d, v1.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor2:
+; NEON: st2 { v0.2d, v1.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor2:
+; NONEON-NOT: st2
 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
   %base = bitcast i32** %ptr to <4 x i32*>*
   %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -107,8 +128,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor3:
-; CHECK: st3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor3:
+; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor3:
+; NONEON-NOT: st3
 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
   %base = bitcast i32** %ptr to <6 x i32*>*
   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -118,8 +141,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor4:
-; CHECK: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NEON-LABEL: store_ptrvec_factor4:
+; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0]
+; NONEON-LABEL: store_ptrvec_factor4:
+; NONEON-NOT: st4
 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
   %base = bitcast i32* %ptr to <8 x i32*>*
   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -132,8 +157,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
 ; Following cases check that shuffle maskes with undef indices can be matched
 ; into ldN/stN instruction.
 
-; CHECK-LABEL: load_undef_mask_factor2:
-; CHECK: ld2 { v0.4s, v1.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor2:
+; NEON: ld2 { v0.4s, v1.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor2:
+; NONEON-NOT: ld2
 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
   %base = bitcast i32* %ptr to <8 x i32>*
   %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
@@ -143,8 +170,10 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: load_undef_mask_factor3:
-; CHECK: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor3:
+; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor3:
+; NONEON-NOT: ld3
 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -154,8 +183,10 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: load_undef_mask_factor4:
-; CHECK: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: load_undef_mask_factor4:
+; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: load_undef_mask_factor4:
+; NONEON-NOT: ld4
 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -165,8 +196,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: store_undef_mask_factor2:
-; CHECK: st2 { v0.4s, v1.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor2:
+; NEON: st2 { v0.4s, v1.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor2:
+; NONEON-NOT: st2
 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
   %base = bitcast i32* %ptr to <8 x i32>*
   %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
@@ -174,8 +207,10 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
   ret void
 }
 
-; CHECK-LABEL: store_undef_mask_factor3:
-; CHECK: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor3:
+; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor3:
+; NONEON-NOT: st3
 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -185,8 +220,10 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
   ret void
 }
 
-; CHECK-LABEL: store_undef_mask_factor4:
-; CHECK: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NEON-LABEL: store_undef_mask_factor4:
+; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0]
+; NONEON-LABEL: store_undef_mask_factor4:
+; NONEON-NOT: st4
 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -195,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
   ret void
 }
+
+; Check that we do something sane with illegal types.
+
+; NEON-LABEL: load_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: ldr q[[V:[0-9]+]], [x0]
+; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s
+; NEON-NEXT: ret
+; NONEON-LABEL: load_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: ldr s0, [x0]
+; NONEON-NEXT: ldr s1, [x0, #8]
+; NONEON-NEXT: ret
+define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
+  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
+  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret <3 x float> %tmp2
+}
+
+; NEON-LABEL: store_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s
+; NEON-NEXT: st1 { v0.d }[0], [x0]
+; NEON-NEXT: ret
+; NONEON-LABEL: store_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2
+; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0
+; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32
+; NONEON-NEXT: str x[[RES]], [x0]
+; NONEON-NEXT: ret
+define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
+  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %tmp1, <3 x float>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
new file mode 100644
index 000000000000..84277995ce5b
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-loop-gep-opt.ll
@@ -0,0 +1,50 @@
+; RUN: llc -O3 -aarch64-gep-opt=true  -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck <%t %s
+; REQUIRES: asserts
+target triple = "aarch64--linux-android"
+
+%typeD = type { i32, i32, [256 x i32], [257 x i32] }
+
+; Function Attrs: noreturn nounwind uwtable
+define i32 @test1(%typeD* nocapture %s) {
+entry:
+; CHECK-LABEL: entry:
+; CHECK:    %uglygep = getelementptr i8, i8* %0, i64 1032
+; CHECK:    br label %do.body.i
+
+
+  %tPos = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 0
+  %k0 = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 1
+  %.pre = load i32, i32* %tPos, align 4
+  br label %do.body.i
+
+do.body.i:
+; CHECK-LABEL: do.body.i:
+; CHECK:          %uglygep2 = getelementptr i8, i8* %uglygep, i64 %3
+; CHECK-NEXT:     %4 = bitcast i8* %uglygep2 to i32*
+; CHECK-NOT:      %uglygep2 = getelementptr i8, i8* %uglygep, i64 1032
+
+
+  %0 = phi i32 [ 256, %entry ], [ %.be, %do.body.i.backedge ]
+  %1 = phi i32 [ 0, %entry ], [ %.be6, %do.body.i.backedge ]
+  %add.i = add nsw i32 %1, %0
+  %shr.i = ashr i32 %add.i, 1
+  %idxprom.i = sext i32 %shr.i to i64
+  %arrayidx.i = getelementptr inbounds %typeD, %typeD* %s, i64 0, i32 3, i64 %idxprom.i
+  %2 = load i32, i32* %arrayidx.i, align 4
+  %cmp.i = icmp sle i32 %2, %.pre
+  %na.1.i = select i1 %cmp.i, i32 %0, i32 %shr.i
+  %nb.1.i = select i1 %cmp.i, i32 %shr.i, i32 %1
+  %sub.i = sub nsw i32 %na.1.i, %nb.1.i
+  %cmp1.i = icmp eq i32 %sub.i, 1
+  br i1 %cmp1.i, label %fooo.exit, label %do.body.i.backedge
+
+do.body.i.backedge:
+  %.be = phi i32 [ %na.1.i, %do.body.i ], [ 256, %fooo.exit ]
+  %.be6 = phi i32 [ %nb.1.i, %do.body.i ], [ 0, %fooo.exit ]
+  br label %do.body.i
+
+fooo.exit:                              ; preds = %do.body.i
+  store i32 %nb.1.i, i32* %k0, align 4
+  br label %do.body.i.backedge
+}
+
diff --git a/test/CodeGen/AArch64/aarch64-minmaxv.ll b/test/CodeGen/AArch64/aarch64-minmaxv.ll
new file mode 100644
index 000000000000..fb13b706cfaf
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-minmaxv.ll
@@ -0,0 +1,511 @@
+; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-linu--gnu"
+
+; CHECK-LABEL: smax_B
+; CHECK: smaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @smax_B(<16 x i8>* nocapture readonly %arr)  {
+  %arr.load = load <16 x i8>, <16 x i8>* %arr
+  %rdx.shuf = shufflevector <16 x i8> %arr.load, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp sgt <16 x i8> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %arr.load, <16 x i8> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp sgt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp sgt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp sgt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  ret i8 %r
+}
+
+; CHECK-LABEL: smax_H
+; CHECK: smaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @smax_H(<8 x i16>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp23 = icmp sgt <8 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp26 = icmp sgt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp29 = icmp sgt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  ret i16 %r
+}
+
+; CHECK-LABEL: smax_S
+; CHECK: smaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @smax_S(<4 x i32> * nocapture readonly %arr)  {
+  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp18 = icmp sgt <4 x i32> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp21 = icmp sgt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  ret i32 %r
+}
+
+; CHECK-LABEL: smax_D
+; CHECK-NOT: smaxv
+define i64 @smax_D(<2 x i64>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %rdx.minmax.cmp18 = icmp sgt <2 x i64> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+  ret i64 %r
+}
+
+
+; CHECK-LABEL: umax_B
+; CHECK: umaxv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @umax_B(<16 x i8>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ugt <16 x i8> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ugt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ugt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ugt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  ret i8 %r
+}
+
+; CHECK-LABEL: umax_H
+; CHECK: umaxv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @umax_H(<8 x i16>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp23 = icmp ugt <8 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp26 = icmp ugt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp29 = icmp ugt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  ret i16 %r
+}
+
+; CHECK-LABEL: umax_S
+; CHECK: umaxv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @umax_S(<4 x i32>* nocapture readonly %arr) {
+  %rdx.minmax.select  = load <4 x i32>, <4 x i32>* %arr
+  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp18 = icmp ugt <4 x i32> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp21 = icmp ugt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  ret i32 %r
+}
+
+; CHECK-LABEL: umax_D
+; CHECK-NOT: umaxv
+define i64 @umax_D(<2 x i64>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %rdx.minmax.cmp18 = icmp ugt <2 x i64> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+  ret i64 %r
+}
+
+
+; CHECK-LABEL: smin_B
+; CHECK: sminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @smin_B(<16 x i8>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp slt <16 x i8> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp slt <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp slt <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp slt <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  ret i8 %r
+}
+
+; CHECK-LABEL: smin_H
+; CHECK: sminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @smin_H(<8 x i16>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp23 = icmp slt <8 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp26 = icmp slt <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp29 = icmp slt <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  ret i16 %r
+}
+
+; CHECK-LABEL: smin_S
+; CHECK: sminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @smin_S(<4 x i32>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp18 = icmp slt <4 x i32> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp21 = icmp slt <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  ret i32 %r
+}
+
+; CHECK-LABEL: smin_D
+; CHECK-NOT: sminv
+define i64 @smin_D(<2 x i64>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %rdx.minmax.cmp18 = icmp slt <2 x i64> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+  ret i64 %r
+}
+
+
+; CHECK-LABEL: umin_B
+; CHECK: uminv {{b[0-9]+}}, {{v[0-9]+}}.16b
+define i8 @umin_B(<16 x i8>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <16 x i8>, <16 x i8>* %arr
+  %rdx.shuf = shufflevector <16 x i8> %rdx.minmax.select, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ult <16 x i8> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i8> %rdx.minmax.select, <16 x i8> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i8> %rdx.minmax.select23, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ult <16 x i8> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i8> %rdx.minmax.select23, <16 x i8> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i8> %rdx.minmax.select26, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ult <16 x i8> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i8> %rdx.minmax.select26, <16 x i8> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i8> %rdx.minmax.select29, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ult <16 x i8> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i8> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i8 %rdx.minmax.select29.elt, i8 %rdx.shuf30.elt
+  ret i8 %r
+}
+
+; CHECK-LABEL: umin_H
+; CHECK: uminv {{h[0-9]+}}, {{v[0-9]+}}.8h
+define i16 @umin_H(<8 x i16>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <8 x i16>, <8 x i16>* %arr
+  %rdx.shuf = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp23 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select24 = select <8 x i1> %rdx.minmax.cmp23, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf
+  %rdx.shuf25 = shufflevector <8 x i16> %rdx.minmax.select24, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp26 = icmp ult <8 x i16> %rdx.minmax.select24, %rdx.shuf25
+  %rdx.minmax.select27 = select <8 x i1> %rdx.minmax.cmp26, <8 x i16> %rdx.minmax.select24, <8 x i16> %rdx.shuf25
+  %rdx.shuf28 = shufflevector <8 x i16> %rdx.minmax.select27, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp29 = icmp ult <8 x i16> %rdx.minmax.select27, %rdx.shuf28
+  %rdx.minmax.cmp29.elt = extractelement <8 x i1> %rdx.minmax.cmp29, i32 0
+  %rdx.minmax.select27.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 0
+  %rdx.shuf28.elt = extractelement <8 x i16> %rdx.minmax.select27, i32 1
+  %r = select i1 %rdx.minmax.cmp29.elt, i16 %rdx.minmax.select27.elt, i16 %rdx.shuf28.elt
+  ret i16 %r
+}
+
+; CHECK-LABEL: umin_S
+; CHECK: uminv {{s[0-9]+}}, {{v[0-9]+}}.4s
+define i32 @umin_S(<4 x i32>* nocapture readonly %arr) {
+  %rdx.minmax.select = load <4 x i32>, <4 x i32>* %arr
+  %rdx.shuf = shufflevector <4 x i32> %rdx.minmax.select, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp18 = icmp ult <4 x i32> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select19 = select <4 x i1> %rdx.minmax.cmp18, <4 x i32> %rdx.minmax.select, <4 x i32> %rdx.shuf
+  %rdx.shuf20 = shufflevector <4 x i32> %rdx.minmax.select19, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp21 = icmp ult <4 x i32> %rdx.minmax.select19, %rdx.shuf20
+  %rdx.minmax.cmp21.elt = extractelement <4 x i1> %rdx.minmax.cmp21, i32 0
+  %rdx.minmax.select19.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 0
+  %rdx.shuf20.elt = extractelement <4 x i32> %rdx.minmax.select19, i32 1
+  %r = select i1 %rdx.minmax.cmp21.elt, i32 %rdx.minmax.select19.elt, i32 %rdx.shuf20.elt
+  ret i32 %r
+}
+
+; CHECK-LABEL: umin_D
+; CHECK-NOT: uminv
+define i64 @umin_D(<2 x i64>* nocapture readonly %arr)  {
+  %rdx.minmax.select = load <2 x i64>, <2 x i64>* %arr
+  %rdx.shuf = shufflevector <2 x i64> %rdx.minmax.select, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
+  %rdx.minmax.cmp18 = icmp ult <2 x i64> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.cmp18.elt = extractelement <2 x i1> %rdx.minmax.cmp18, i32 0
+  %rdx.minmax.select.elt = extractelement <2 x i64> %rdx.minmax.select, i32 0
+  %rdx.shuf.elt = extractelement <2 x i64> %rdx.minmax.select, i32 1
+  %r = select i1 %rdx.minmax.cmp18.elt, i64 %rdx.minmax.select.elt, i64 %rdx.shuf.elt
+  ret i64 %r
+}
+
+; CHECK-LABEL: fmaxnm_S
+; CHECK: fmaxnmv
+define float @fmaxnm_S(<4 x float>* nocapture readonly %arr) {
+  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
+  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp = fcmp fast oge <4 x float> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp1 = fcmp fast oge <4 x float> %rdx.minmax.select1, %rdx.shuf1
+  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  ret float %r
+}
+
+; CHECK-LABEL: fminnm_S
+; CHECK: fminnmv
+define float @fminnm_S(<4 x float>* nocapture readonly %arr) {
+  %rdx.minmax.select  = load <4 x float>, <4 x float>* %arr
+  %rdx.shuf = shufflevector <4 x float> %rdx.minmax.select, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.minmax.cmp = fcmp fast ole <4 x float> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select1 = select <4 x i1> %rdx.minmax.cmp, <4 x float> %rdx.minmax.select, <4 x float> %rdx.shuf
+  %rdx.shuf1 = shufflevector <4 x float> %rdx.minmax.select1, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp1 = fcmp fast ole <4 x float> %rdx.minmax.select1, %rdx.shuf1
+  %rdx.minmax.cmp1.elt = extractelement <4 x i1> %rdx.minmax.cmp1, i32 0
+  %rdx.minmax.select1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 0
+  %rdx.shuf1.elt = extractelement <4 x float> %rdx.minmax.select1, i32 1
+  %r = select i1 %rdx.minmax.cmp1.elt, float %rdx.minmax.select1.elt, float %rdx.shuf1.elt
+  ret float %r
+}
+
+define i16 @oversized_umax_256(<16 x i16>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_umax_256
+; CHECK: umax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: umaxv {{h[0-9]+}}, [[V0]]
+  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ugt <16 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ugt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ugt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ugt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  ret i16 %r
+}
+
+define i32 @oversized_umax_512(<16 x i32>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_umax_512
+; CHECK: umax v
+; CHECK-NEXT: umax v
+; CHECK-NEXT: umax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: umaxv {{s[0-9]+}}, [[V0]]
+  %arr.load = load <16 x i32>, <16 x i32>* %arr
+  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ugt <16 x i32> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ugt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ugt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ugt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  ret i32 %r
+}
+
+define i16 @oversized_umin_256(<16 x i16>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_umin_256
+; CHECK: umin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: uminv {{h[0-9]+}}, [[V0]]
+  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ult <16 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ult <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ult <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ult <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  ret i16 %r
+}
+
+define i32 @oversized_umin_512(<16 x i32>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_umin_512
+; CHECK: umin v
+; CHECK-NEXT: umin v
+; CHECK-NEXT: umin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: uminv {{s[0-9]+}}, [[V0]]
+  %arr.load = load <16 x i32>, <16 x i32>* %arr
+  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp ult <16 x i32> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp ult <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp ult <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp ult <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  ret i32 %r
+}
+
+define i16 @oversized_smax_256(<16 x i16>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_smax_256
+; CHECK: smax [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: smaxv {{h[0-9]+}}, [[V0]]
+  %arr.load = load <16 x i16>, <16 x i16>* %arr
+  %rdx.shuf = shufflevector <16 x i16> %arr.load, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp sgt <16 x i16> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %arr.load, <16 x i16> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp sgt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp sgt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp sgt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  ret i16 %r
+}
+
+define i32 @oversized_smax_512(<16 x i32>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_smax_512
+; CHECK: smax v
+; CHECK-NEXT: smax v
+; CHECK-NEXT: smax [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: smaxv {{s[0-9]+}}, [[V0]]
+  %arr.load = load <16 x i32>, <16 x i32>* %arr
+  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp sgt <16 x i32> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp sgt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp sgt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp sgt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  ret i32 %r
+}
+
+define i16 @oversized_smin_256(<16 x i16>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_smin_256
+; CHECK: smin [[V0:v[0-9]+]].8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+; CHECK: sminv {{h[0-9]+}}, [[V0]]
+  %rdx.minmax.select = load <16 x i16>, <16 x i16>* %arr
+  %rdx.shuf = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i16> %rdx.minmax.select23, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp slt <16 x i16> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i16> %rdx.minmax.select23, <16 x i16> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i16> %rdx.minmax.select26, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp slt <16 x i16> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i16> %rdx.minmax.select26, <16 x i16> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i16> %rdx.minmax.select29, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp slt <16 x i16> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i16> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i16 %rdx.minmax.select29.elt, i16 %rdx.shuf30.elt
+  ret i16 %r
+}
+
+define i32 @oversized_smin_512(<16 x i32>* nocapture readonly %arr)  {
+; CHECK-LABEL: oversized_smin_512
+; CHECK: smin v
+; CHECK-NEXT: smin v
+; CHECK-NEXT: smin [[V0:v[0-9]+]].4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+; CHECK-NEXT: sminv {{s[0-9]+}}, [[V0]]
+  %arr.load = load <16 x i32>, <16 x i32>* %arr
+  %rdx.shuf = shufflevector <16 x i32> %arr.load, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp22 = icmp slt <16 x i32> %arr.load, %rdx.shuf
+  %rdx.minmax.select23 = select <16 x i1> %rdx.minmax.cmp22, <16 x i32> %arr.load, <16 x i32> %rdx.shuf
+  %rdx.shuf24 = shufflevector <16 x i32> %rdx.minmax.select23, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp25 = icmp slt <16 x i32> %rdx.minmax.select23, %rdx.shuf24
+  %rdx.minmax.select26 = select <16 x i1> %rdx.minmax.cmp25, <16 x i32> %rdx.minmax.select23, <16 x i32> %rdx.shuf24
+  %rdx.shuf27 = shufflevector <16 x i32> %rdx.minmax.select26, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp28 = icmp slt <16 x i32> %rdx.minmax.select26, %rdx.shuf27
+  %rdx.minmax.select29 = select <16 x i1> %rdx.minmax.cmp28, <16 x i32> %rdx.minmax.select26, <16 x i32> %rdx.shuf27
+  %rdx.shuf30 = shufflevector <16 x i32> %rdx.minmax.select29, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.minmax.cmp31 = icmp slt <16 x i32> %rdx.minmax.select29, %rdx.shuf30
+  %rdx.minmax.cmp31.elt = extractelement <16 x i1> %rdx.minmax.cmp31, i32 0
+  %rdx.minmax.select29.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 0
+  %rdx.shuf30.elt = extractelement <16 x i32> %rdx.minmax.select29, i32 1
+  %r = select i1 %rdx.minmax.cmp31.elt, i32 %rdx.minmax.select29.elt, i32 %rdx.shuf30.elt
+  ret i32 %r
+}
diff --git a/test/CodeGen/AArch64/aarch64-smax-constantfold.ll b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll
new file mode 100644
index 000000000000..0e5b59f95126
--- /dev/null
+++ b/test/CodeGen/AArch64/aarch64-smax-constantfold.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16>, <4 x i16>)
+
+; CHECK-LABEL: test
+define <4 x i16> @test() {
+entry:
+; CHECK: movi	d{{[0-9]+}}, #0000000000000000
+  %0 = tail call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> zeroinitializer)
+  ret <4 x i16> %0
+}
diff --git a/test/CodeGen/AArch64/addsub_ext.ll b/test/CodeGen/AArch64/addsub_ext.ll
index f0c7572ebf13..f30ab89f238b 100644
--- a/test/CodeGen/AArch64/addsub_ext.ll
+++ b/test/CodeGen/AArch64/addsub_ext.ll
@@ -80,6 +80,64 @@ end:
     ret void
 }
 
+define void @sub_i8rhs() minsize {
+; CHECK-LABEL: sub_i8rhs:
+    %val8_tmp = load i8, i8* @var8
+    %lhs32 = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i8 operand.
+    %val8 = add i8 %val8_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i8 %val8 to i32
+    %res32_zext = sub i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxtb #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i8 %val8 to i64
+    %res64_zext = sub i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtb #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i8 %val8 to i32
+    %res32_sext = sub i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxtb #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i8 %val8 to i64
+    %res64_sext = sub i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtb #4
+
+    ret void
+}
+
 define void @addsub_i16rhs() minsize {
 ; CHECK-LABEL: addsub_i16rhs:
     %val16_tmp = load i16, i16* @var16
@@ -155,6 +213,64 @@ end:
     ret void
 }
 
+define void @sub_i16rhs() minsize {
+; CHECK-LABEL: sub_i16rhs:
+    %val16_tmp = load i16, i16* @var16
+    %lhs32 = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
+
+    ; Need this to prevent extension upon load and give a vanilla i16 operand.
+    %val16 = add i16 %val16_tmp, 123
+
+
+; Zero-extending to 32-bits
+    %rhs32_zext = zext i16 %val16 to i32
+    %res32_zext = sub i32 %lhs32, %rhs32_zext
+    store volatile i32 %res32_zext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs32_zext_shift = shl i32 %rhs32_zext, 3
+   %res32_zext_shift = sub i32 %lhs32, %rhs32_zext_shift
+   store volatile i32 %res32_zext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, uxth #3
+
+
+; Zero-extending to 64-bits
+    %rhs64_zext = zext i16 %val16 to i64
+    %res64_zext = sub i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth
+
+   %rhs64_zext_shift = shl i64 %rhs64_zext, 1
+   %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+   store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxth #1
+
+; Sign-extending to 32-bits
+    %rhs32_sext = sext i16 %val16 to i32
+    %res32_sext = sub i32 %lhs32, %rhs32_sext
+    store volatile i32 %res32_sext, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs32_sext_shift = shl i32 %rhs32_sext, 1
+   %res32_sext_shift = sub i32 %lhs32, %rhs32_sext_shift
+   store volatile i32 %res32_sext_shift, i32* @var32
+; CHECK: sub {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}}, sxth #1
+
+; Sign-extending to 64-bits
+    %rhs64_sext = sext i16 %val16 to i64
+    %res64_sext = sub i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth
+
+   %rhs64_sext_shift = shl i64 %rhs64_sext, 4
+   %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+   store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxth #4
+
+    ret void
+}
+
 ; N.b. we could probably check more here ("add w2, w3, w1, uxtw" for
 ; example), but the remaining instructions are probably not idiomatic
 ; in the face of "add/sub (shifted register)" so I don't intend to.
@@ -187,3 +303,33 @@ define void @addsub_i32rhs() minsize {
 
     ret void
 }
+
+define void @sub_i32rhs() minsize {
+; CHECK-LABEL: sub_i32rhs:
+    %val32_tmp = load i32, i32* @var32
+    %lhs64 = load i64, i64* @var64
+
+    %val32 = add i32 %val32_tmp, 123
+
+    %rhs64_zext = zext i32 %val32 to i64
+    %res64_zext = sub i64 %lhs64, %rhs64_zext
+    store volatile i64 %res64_zext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw
+
+    %rhs64_zext_shift = shl i64 %rhs64_zext, 2
+    %res64_zext_shift = sub i64 %lhs64, %rhs64_zext_shift
+    store volatile i64 %res64_zext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, uxtw #2
+
+    %rhs64_sext = sext i32 %val32 to i64
+    %res64_sext = sub i64 %lhs64, %rhs64_sext
+    store volatile i64 %res64_sext, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw
+
+    %rhs64_sext_shift = shl i64 %rhs64_sext, 2
+    %res64_sext_shift = sub i64 %lhs64, %rhs64_sext_shift
+    store volatile i64 %res64_sext_shift, i64* @var64
+; CHECK: sub {{x[0-9]+}}, {{x[0-9]+}}, {{w[0-9]+}}, sxtw #2
+
+    ret void
+}
diff --git a/test/CodeGen/AArch64/alloca.ll b/test/CodeGen/AArch64/alloca.ll
index 5b2278ce8a35..45754377b2d9 100644
--- a/test/CodeGen/AArch64/alloca.ll
+++ b/test/CodeGen/AArch64/alloca.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
+; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s
 
 declare void @use_addr(i8*)
 
diff --git a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
index 173a440326ac..a66ea0df2e98 100644
--- a/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
+++ b/test/CodeGen/AArch64/arm64-2011-03-17-AsmPrinterCrash.ll
@@ -22,22 +22,22 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.sp = !{!1, !7, !10, !11, !12}
 
 !0 = !DIGlobalVariable(name: "vsplive", line: 617, isLocal: true, isDefinition: true, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "drt_vsprintf", line: 616, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
 !2 = !DIFile(filename: "print.i", directory: "/Volumes/Ebi/echeng/radars/r9146594")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (http://llvm.org/git/clang.git git:/git/puzzlebox/clang.git/ c4d1aea01c4444eb81bdbf391f1be309127c3cf1)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!7 = distinct !DISubprogram(name: "putc_mem", line: 30, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
 !8 = !DISubroutineType(types: !9)
 !9 = !{null}
-!10 = !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!11 = !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
-!12 = !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
+!10 = distinct !DISubprogram(name: "print_double", line: 203, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!11 = distinct !DISubprogram(name: "print_number", line: 75, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !4)
+!12 = distinct !DISubprogram(name: "get_flags", line: 508, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !2, type: !8)
 !13 = !DILocation(line: 653, column: 5, scope: !14)
 !14 = distinct !DILexicalBlock(line: 652, column: 35, file: !20, scope: !15)
 !15 = distinct !DILexicalBlock(line: 616, column: 1, file: !20, scope: !1)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
+!16 = !DILocalVariable(name: "do_tab_convert", line: 853, scope: !17, file: !2, type: !6)
 !17 = distinct !DILexicalBlock(line: 850, column: 12, file: !20, scope: !14)
 !18 = !DILocation(line: 853, column: 11, scope: !17)
 !19 = !DILocation(line: 853, column: 29, scope: !17)
diff --git a/test/CodeGen/AArch64/arm64-aapcs-be.ll b/test/CodeGen/AArch64/arm64-aapcs-be.ll
index f27570acc820..e77952e4b8a1 100644
--- a/test/CodeGen/AArch64/arm64-aapcs-be.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs-be.ll
@@ -32,7 +32,7 @@ define float @test_block_addr([8 x float], [1 x float] %in) {
 
 define void @test_block_addr_callee() {
 ; CHECK-LABEL: test_block_addr_callee:
-; CHECK: str {{[a-z0-9]+}}, [sp]
+; CHECK: str {{[a-z0-9]+}}, [sp, #-16]!
 ; CHECK: bl test_block_addr
   %val = insertvalue [1 x float] undef, float 0.0, 0
   call float @test_block_addr([8 x float] undef, [1 x float] %val)
diff --git a/test/CodeGen/AArch64/arm64-aapcs.ll b/test/CodeGen/AArch64/arm64-aapcs.ll
index d0880cd4f3eb..441f45bf90b3 100644
--- a/test/CodeGen/AArch64/arm64-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-aapcs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-linux-gnu -enable-misched=false -disable-post-ra < %s | FileCheck %s
 
 @var = global i32 0, align 4
 
@@ -27,12 +27,13 @@ define [2 x i64] @test_i64x2_align(i32, [2 x i64] %arg, i32 %after) {
   ; Check stack slots are 64-bit at all times.
 define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
                                 i32 %int, i64 %long) {
-  ; Part of last store. Blasted scheduler.
-; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
-
   %ext_bool = zext i1 %bool to i64
   store volatile i64 %ext_bool, i64* @var64, align 8
 ; CHECK: ldrb w[[EXT:[0-9]+]], [sp]
+
+  ; Part of last store. Blasted scheduler.
+; CHECK: ldr [[LONG:x[0-9]+]], [sp, #32]
+
 ; CHECK: and x[[EXTED:[0-9]+]], x[[EXT]], #0x1
 ; CHECK: str x[[EXTED]], [{{x[0-9]+}}, :lo12:var64]
 
@@ -63,8 +64,8 @@ define void @test_stack_slots([8 x i32], i1 %bool, i8 %char, i16 %short,
 define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
   %ext_bool = zext i1 %bool to i64
   store volatile i64 %ext_bool, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x0, #0x1
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w0, #0x1
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
 
   %ext_char = sext i8 %char to i64
   store volatile i64 %ext_char, i64* @var64
@@ -73,13 +74,13 @@ define void @test_extension(i1 %bool, i8 %char, i16 %short, i32 %int) {
 
   %ext_short = zext i16 %short to i64
   store volatile i64 %ext_short, i64* @var64
-; CHECK: and [[EXT:x[0-9]+]], x2, #0xffff
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: and w[[EXT:[0-9]+]], w2, #0xffff
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
 
   %ext_int = zext i32 %int to i64
   store volatile i64 %ext_int, i64* @var64
-; CHECK: ubfx [[EXT:x[0-9]+]], x3, #0, #32
-; CHECK: str [[EXT]], [{{x[0-9]+}}, :lo12:var64]
+; CHECK: mov w[[EXT:[0-9]+]], w3
+; CHECK: str x[[EXT]], [{{x[0-9]+}}, :lo12:var64]
 
   ret void
 }
diff --git a/test/CodeGen/AArch64/arm64-abi_align.ll b/test/CodeGen/AArch64/arm64-abi_align.ll
index 1c1b58b8b140..dc9884f12f57 100644
--- a/test/CodeGen/AArch64/arm64-abi_align.ll
+++ b/test/CodeGen/AArch64/arm64-abi_align.ll
@@ -508,7 +508,7 @@ entry:
 ; "i64 %0" should be in register x7.
 ; "i32 8" should be on stack at [sp].
 ; CHECK: ldr x7, [{{x[0-9]+}}]
-; CHECK: str {{w[0-9]+}}, [sp]
+; CHECK: str {{w[0-9]+}}, [sp, #-16]!
 ; FAST-LABEL: i64_split
 ; FAST: ldr x7, [{{x[0-9]+}}]
 ; FAST: mov x[[R0:[0-9]+]], sp
diff --git a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
index 4703d25a6016..d46800d34cac 100644
--- a/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
+++ b/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
@@ -1,6 +1,7 @@
-; RUN: llc -march arm64 < %s | FileCheck %s
+; RUN: llc -march arm64 < %s -aarch64-collect-loh=false | FileCheck %s
 ; rdar://13452552
-; ModuleID = 'reduced_test.ll'
+; Disable the collecting of LOH so that the labels do not get in the
+; way of the NEXT patterns.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
 target triple = "arm64-apple-ios3.0.0"
 
@@ -13,8 +14,8 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) {
 ; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
 ; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]],  w0, sxtw]
 ; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
-; CHECK-NEXT cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
-; CHECK-NEXT b.ne
+; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
+; CHECK-NEXT: b.ne
 ; Next BB
 ; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
 ; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
diff --git a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
index eb0cd3547bda..36424506bee8 100644
--- a/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
+++ b/test/CodeGen/AArch64/arm64-alloca-frame-pointer-offset.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -march=arm64 -mcpu=cyclone < %s | FileCheck %s
 
 ; CHECK: foo
-; CHECK: ldr w[[REG:[0-9]+]], [x19, #264]
-; CHECK: str w[[REG]], [x19, #132]
-; CHECK: ldr w{{[0-9]+}}, [x19, #264]
+; CHECK: str w[[REG0:[0-9]+]], [x19, #264]
+; CHECK: mov w[[REG1:[0-9]+]], w[[REG0]]
+; CHECK: str w[[REG1]], [x19, #132]
 
 define i32 @foo(i32 %a) nounwind {
   %retval = alloca i32, align 4
diff --git a/test/CodeGen/AArch64/arm64-arith.ll b/test/CodeGen/AArch64/arm64-arith.ll
index f36e706b15dd..d5d9a1b98174 100644
--- a/test/CodeGen/AArch64/arm64-arith.ll
+++ b/test/CodeGen/AArch64/arm64-arith.ll
@@ -123,7 +123,8 @@ entry:
 define i64 @t14(i16 %a, i64 %x) nounwind ssp {
 entry:
 ; CHECK-LABEL: t14:
-; CHECK: add	x0, x1, w0, uxth #3
+; CHECK: and	w8, w0, #0xffff
+; CHECK: add	x0, x1, w8, uxtw #3
 ; CHECK: ret
   %c = zext i16 %a to i64
   %d = shl i64 %c, 3
diff --git a/test/CodeGen/AArch64/arm64-atomic-128.ll b/test/CodeGen/AArch64/arm64-atomic-128.ll
index a76cf74a6d0c..44c24c51f0df 100644
--- a/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -173,10 +173,13 @@ define i128 @atomic_load_seq_cst(i128* %p) {
    ret i128 %r
 }
 
-define i128 @atomic_load_relaxed(i128* %p) {
+define i128 @atomic_load_relaxed(i64, i64, i128* %p) {
 ; CHECK-LABEL: atomic_load_relaxed:
 ; CHECK-NOT: dmb
-; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
+; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x2]
+; CHECK-NEXT: stxp [[SUCCESS:w[0-9]+]], [[LO]], [[HI]], [x2]
+; CHECK: cbnz [[SUCCESS]], [[LABEL]]
 ; CHECK-NOT: dmb
    %r = load atomic i128, i128* %p monotonic, align 16
    ret i128 %r
diff --git a/test/CodeGen/AArch64/arm64-atomic.ll b/test/CodeGen/AArch64/arm64-atomic.ll
index 0824bd881a95..5d8d60de5fc5 100644
--- a/test/CodeGen/AArch64/arm64-atomic.ll
+++ b/test/CodeGen/AArch64/arm64-atomic.ll
@@ -2,13 +2,17 @@
 
 define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x[[ADDR]]]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
-; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne   [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
+; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b      [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
   %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
@@ -17,13 +21,16 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
 define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
 ; CHECK-LABEL: val_compare_and_swap_from_load:
 ; CHECK-NEXT: ldr    [[NEW:w[0-9]+]], [x2]
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
-; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne   [[FAILBB:.?LBB[0-9_]+]]
 ; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
-; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b      [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
   %new = load i32, i32* %pnew
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
   %val = extractvalue { i32, i1 } %pair, 0
@@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
 
 define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap_rel:
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
-; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x0]
+; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
+; CHECK-NEXT: ldaxr  [[RESULT:w[0-9]+]], [x[[ADDR]]
 ; CHECK-NEXT: cmp    [[RESULT]], w1
-; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
-; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w2, [x0]
-; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: b.ne   [[FAILBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: stlxr  [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
+; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b      [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
   %pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
   %val = extractvalue { i32, i1 } %pair, 0
   ret i32 %val
@@ -47,13 +58,16 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
 define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
 ; CHECK-LABEL: val_compare_and_swap_64:
 ; CHECK-NEXT: mov    x[[ADDR:[0-9]+]], x0
-; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK-NEXT: ldxr   [[RESULT:x[0-9]+]], [x[[ADDR]]]
 ; CHECK-NEXT: cmp    [[RESULT]], x1
-; CHECK-NEXT: b.ne   [[LABEL2:.?LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: b.ne   [[FAILBB:.?LBB[0-9_]+]]
 ; CHECK-NEXT: stxr   [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
-; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[LABEL]]
-; CHECK-NEXT: [[LABEL2]]:
+; CHECK-NEXT: cbnz   [[SCRATCH_REG]], [[TRYBB]]
+; CHECK-NEXT: b      [[EXITBB:.?LBB[0-9_]+]]
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[EXITBB]]:
   %pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
   %val = extractvalue { i64, i1 } %pair, 0
   ret i64 %val
@@ -61,13 +75,13 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
 
 define i32 @fetch_and_nand(i32* %p) #0 {
 ; CHECK-LABEL: fetch_and_nand:
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK: ldxr   w[[DEST_REG:[0-9]+]], [x0]
 ; CHECK: mvn    [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
 ; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
 ; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[TRYBB]]
 ; CHECK: mov    x0, x[[DEST_REG]]
   %val = atomicrmw nand i32* %p, i32 7 release
   ret i32 %val
@@ -76,12 +90,12 @@ define i32 @fetch_and_nand(i32* %p) #0 {
 define i64 @fetch_and_nand_64(i64* %p) #0 {
 ; CHECK-LABEL: fetch_and_nand_64:
 ; CHECK: mov    x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK: ldaxr   x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
 ; CHECK: mvn    w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
 ; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
 ; CHECK: stlxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[TRYBB]]
 
   %val = atomicrmw nand i64* %p, i64 7 acq_rel
   ret i64 %val
@@ -90,12 +104,12 @@ define i64 @fetch_and_nand_64(i64* %p) #0 {
 define i32 @fetch_and_or(i32* %p) #0 {
 ; CHECK-LABEL: fetch_and_or:
 ; CHECK: movz   [[OLDVAL_REG:w[0-9]+]], #0x5
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK: ldaxr   w[[DEST_REG:[0-9]+]], [x0]
 ; CHECK: orr    [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
 ; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
 ; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[TRYBB]]
 ; CHECK: mov    x0, x[[DEST_REG]]
   %val = atomicrmw or i32* %p, i32 5 seq_cst
   ret i32 %val
@@ -104,11 +118,11 @@ define i32 @fetch_and_or(i32* %p) #0 {
 define i64 @fetch_and_or_64(i64* %p) #0 {
 ; CHECK: fetch_and_or_64:
 ; CHECK: mov    x[[ADDR:[0-9]+]], x0
-; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
+; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
 ; CHECK: ldxr   [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
 ; CHECK: orr    [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
 ; CHECK: stxr   [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
-; CHECK: cbnz   [[SCRATCH_REG]], [[LABEL]]
+; CHECK: cbnz   [[SCRATCH_REG]], [[TRYBB]]
   %val = atomicrmw or i64* %p, i64 7 monotonic
   ret i64 %val
 }
diff --git a/test/CodeGen/AArch64/arm64-builtins-linux.ll b/test/CodeGen/AArch64/arm64-builtins-linux.ll
new file mode 100644
index 000000000000..34fa1b471561
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-builtins-linux.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.aarch64.thread.pointer() #1
+
+define i8* @thread_pointer() {
+; CHECK: thread_pointer:
+; CHECK: mrs {{x[0-9]+}}, TPIDR_EL0
+  %1 = tail call i8* @llvm.aarch64.thread.pointer()
+  ret i8* %1
+}
diff --git a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
index 4e47ab6c03f3..25d874e54cb7 100644
--- a/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp-heuristics.ll
@@ -15,10 +15,10 @@ target triple = "arm64-apple-ios7.0.0"
 ; CHECK: Maze1
 ; CHECK: %if.then
 ; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
+; CHECK-NEXT: b.lo
 ; CHECK: %if.then
 ; CHECK: cmp x{{[0-9]+}}, #2
-; CHECK-NEXT b.cc
+; CHECK-NEXT: b.lo
 define i32 @Maze1() nounwind ssp {
 entry:
   %0 = load i64, i64* @channelColumns, align 8, !tbaa !0
diff --git a/test/CodeGen/AArch64/arm64-ccmp.ll b/test/CodeGen/AArch64/arm64-ccmp.ll
index ff18f7364337..72d3b8331162 100644
--- a/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -104,11 +104,14 @@ if.end:                                           ; preds = %if.then, %lor.lhs.f
 ; Speculatively execute division by zero.
 ; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
 ; safe to speculate.
-; CHECK: speculate_division
-; CHECK-NOT: cmp
-; CHECK: sdiv
-; CHECK: cmp
-; CHECK-NEXT: ccmp
+; CHECK-LABEL: speculate_division:
+; CHECK: cmp w0, #1
+; CHECK: sdiv [[DIVRES:w[0-9]+]], w1, w0
+; CHECK: ccmp [[DIVRES]], #16, #0, ge
+; CHECK: b.gt [[BLOCK:LBB[0-9_]+]]
+; CHECK: bl _foo
+; CHECK: [[BLOCK]]:
+; CHECK: orr w0, wzr, #0x7
 define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
 entry:
   %cmp = icmp sgt i32 %a, 0
@@ -287,3 +290,156 @@ sw.bb.i.i:
   %code1.i.i.phi.trans.insert = getelementptr inbounds %str1, %str1* %0, i64 0, i32 0, i32 0, i64 16
   br label %sw.bb.i.i
 }
+
+; CHECK-LABEL: select_and
+define i64 @select_and(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
+; CHECK: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #0, ne
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
+  %1 = icmp slt i32 %w0, %w1
+  %2 = icmp ne i32 5, %w1
+  %3 = and i1 %1, %2
+  %sel = select i1 %3, i64 %x2, i64 %x3
+  ret i64 %sel
+}
+
+; CHECK-LABEL: select_or
+define i64 @select_or(i32 %w0, i32 %w1, i64 %x2, i64 %x3) {
+; CHECK: cmp w1, #5
+; CHECK-NEXT: ccmp w0, w1, #8, eq
+; CHECK-NEXT: csel x0, x2, x3, lt
+; CHECK-NEXT: ret
+  %1 = icmp slt i32 %w0, %w1
+  %2 = icmp ne i32 5, %w1
+  %3 = or i1 %1, %2
+  %sel = select i1 %3, i64 %x2, i64 %x3
+  ret i64 %sel
+}
+
+; CHECK-LABEL: select_complicated
+define i16 @select_complicated(double %v1, double %v2, i16 %a, i16 %b) {
+; CHECK: ldr [[REG:d[0-9]+]],
+; CHECK: fcmp d0, d2
+; CHECK-NEXT: fmov d2, #13.00000000
+; CHECK-NEXT: fccmp d1, d2, #4, ne
+; CHECK-NEXT: fccmp d0, d1, #1, ne
+; CHECK-NEXT: fccmp d0, d1, #4, vc
+; CEHCK-NEXT: csel w0, w0, w1, eq
+  %1 = fcmp one double %v1, %v2
+  %2 = fcmp oeq double %v2, 13.0
+  %3 = fcmp oeq double %v1, 42.0
+  %or0 = or i1 %2, %3
+  %or1 = or i1 %1, %or0
+  %sel = select i1 %or1, i16 %a, i16 %b
+  ret i16 %sel
+}
+
+; CHECK-LABEL: gccbug
+define i64 @gccbug(i64 %x0, i64 %x1) {
+; CHECK: cmp x0, #2
+; CHECK-NEXT: ccmp x0, #4, #4, ne
+; CHECK-NEXT: ccmp x1, #0, #0, eq
+; CHECK-NEXT: orr w[[REGNUM:[0-9]+]], wzr, #0x1
+; CHECK-NEXT: cinc x0, x[[REGNUM]], eq
+; CHECK-NEXT: ret
+  %cmp0 = icmp eq i64 %x1, 0
+  %cmp1 = icmp eq i64 %x0, 2
+  %cmp2 = icmp eq i64 %x0, 4
+
+  %or = or i1 %cmp2, %cmp1
+  %and = and i1 %or, %cmp0
+
+  %sel = select i1 %and, i64 2, i64 1
+  ret i64 %sel
+}
+
+; CHECK-LABEL: select_ororand
+define i32 @select_ororand(i32 %w0, i32 %w1, i32 %w2, i32 %w3) {
+; CHECK: cmp w3, #4
+; CHECK-NEXT: ccmp w2, #2, #0, gt
+; CHECK-NEXT: ccmp w1, #13, #2, ge
+; CHECK-NEXT: ccmp w0, #0, #4, ls
+; CHECK-NEXT: csel w0, w3, wzr, eq
+; CHECK-NEXT: ret
+  %c0 = icmp eq i32 %w0, 0
+  %c1 = icmp ugt i32 %w1, 13
+  %c2 = icmp slt i32 %w2, 2
+  %c4 = icmp sgt i32 %w3, 4
+  %or = or i1 %c0, %c1
+  %and = and i1 %c2, %c4
+  %or1 = or i1 %or, %and
+  %sel = select i1 %or1, i32 %w3, i32 0
+  ret i32 %sel
+}
+
+; CHECK-LABEL: select_andor
+define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
+; CHECK: cmp w1, w2
+; CHECK-NEXT: ccmp w0, #0, #4, lt
+; CHECK-NEXT: ccmp w0, w1, #0, eq
+; CHECK-NEXT: csel w0, w0, w1, eq
+; CHECK-NEXT: ret
+  %c0 = icmp eq i32 %v1, %v2
+  %c1 = icmp sge i32 %v2, %v3
+  %c2 = icmp eq i32 %v1, 0
+  %or = or i1 %c2, %c1
+  %and = and i1 %or, %c0
+  %sel = select i1 %and, i32 %v1, i32 %v2
+  ret i32 %sel
+}
+
+; CHECK-LABEL: select_noccmp1
+define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
+; CHECK: cmp x0, #0
+; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt
+; CHECK-NEXT: cmp x0, #13
+; CHECK-NOT: ccmp
+; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt
+; CHECK-NEXT: cmp x2, #2
+; CHECK-NEXT: cset [[REG2:w[0-9]+]], lt
+; CHECK-NEXT: cmp x2, #4
+; CHECK-NEXT: cset [[REG3:w[0-9]+]], gt
+; CHECK-NEXT: and [[REG4:w[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: and [[REG5:w[0-9]+]], [[REG2]], [[REG3]]
+; CHECK-NEXT: orr [[REG6:w[0-9]+]], [[REG4]], [[REG5]]
+; CHECK-NEXT: cmp [[REG6]], #0
+; CHECK-NEXT: csel x0, xzr, x3, ne
+; CHECK-NEXT: ret
+  %c0 = icmp slt i64 %v1, 0
+  %c1 = icmp sgt i64 %v1, 13
+  %c2 = icmp slt i64 %v3, 2
+  %c4 = icmp sgt i64 %v3, 4
+  %and0 = and i1 %c0, %c1
+  %and1 = and i1 %c2, %c4
+  %or = or i1 %and0, %and1
+  %sel = select i1 %or, i64 0, i64 %r
+  ret i64 %sel
+}
+
+@g = global i32 0
+
+; Should not use ccmp if we have to compute the or expression in an integer
+; register anyway because of other users.
+; CHECK-LABEL: select_noccmp2
+define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
+; CHECK: cmp x0, #0
+; CHECK-NEXT: cset [[REG0:w[0-9]+]], lt
+; CHECK-NOT: ccmp
+; CHECK-NEXT: cmp x0, #13
+; CHECK-NEXT: cset [[REG1:w[0-9]+]], gt
+; CHECK-NEXT: orr [[REG2:w[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: cmp [[REG2]], #0
+; CHECK-NEXT: csel x0, xzr, x3, ne
+; CHECK-NEXT: sbfx [[REG3:w[0-9]+]], [[REG2]], #0, #1
+; CHECK-NEXT: adrp x[[REGN4:[0-9]+]], _g@PAGE
+; CHECK-NEXT: str [[REG3]], [x[[REGN4]], _g@PAGEOFF]
+; CHECK-NEXT: ret
+  %c0 = icmp slt i64 %v1, 0
+  %c1 = icmp sgt i64 %v1, 13
+  %or = or i1 %c0, %c1
+  %sel = select i1 %or, i64 0, i64 %r
+  %ext = sext i1 %or to i32
+  store volatile i32 %ext, i32* @g
+  ret i64 %sel
+}
diff --git a/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll
new file mode 100644
index 000000000000..528d2538bb4a
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-coalescing-MOVi32imm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK:       orr     w0, wzr, #0x1
+; CHECK-NEXT:  bl      foo
+; CHECK-NEXT:  orr     w0, wzr, #0x1
+; CHECK-NEXT:  bl      foo
+
+target triple = "aarch64--linux-android"
+declare i32 @foo(i32)
+
+; Function Attrs: nounwind uwtable
+define i32 @main() {
+entry:
+  %call = tail call i32 @foo(i32 1)
+  %call1 = tail call i32 @foo(i32 1)
+  ret i32 0
+}
diff --git a/test/CodeGen/AArch64/arm64-collect-loh.ll b/test/CodeGen/AArch64/arm64-collect-loh.ll
index c0aa63cc4331..59147d401a30 100644
--- a/test/CodeGen/AArch64/arm64-collect-loh.ll
+++ b/test/CodeGen/AArch64/arm64-collect-loh.ll
@@ -51,3 +51,607 @@ if.end4:                                          ; preds = %if.then2, %if.then,
   %add6 = add nsw i32 %tmp3, %t.addr.0
   ret i32 %add6
 }
+
+@C = common global i32 0, align 4
+
+; Check that we catch AdrpLdrGotLdr case when we have a simple chain:
+; adrp -> ldrgot -> ldr.
+; CHECK-LABEL: _getC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getC() {
+  %res = load i32, i32* @C, align 4
+  ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtC() {
+  %res = load i32, i32* @C, align 4
+  %sextres = sext i32 %res to i64
+  ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we emit AdrpLdrGot for those.
+; CHECK-LABEL: _getSeveralC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define void @getSeveralC(i32 %t) {
+entry:
+  %tmp = load i32, i32* @C, align 4
+  %add = add nsw i32 %tmp, %t
+  store i32 %add, i32* @C, align 4
+  ret void
+}
+
+; Make sure we catch that:
+; adrp -> ldrgot -> str.
+; CHECK-LABEL: _setC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _C@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _C@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define void @setC(i32 %t) {
+entry:
+  store i32 %t, i32* @C, align 4
+  ret void
+}
+
+; Perform the same tests for internal global and a displacement
+; in the addressing mode.
+; Indeed we will get an ADD for those instead of LOADGot.
+@InternalC = internal global i32 0, align 4
+
+; Check that we catch AdrpAddLdr case when we have a simple chain:
+; adrp -> add -> ldr.
+; CHECK-LABEL: _getInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getInternalCPlus4() {
+  %addr = getelementptr i32, i32* @InternalC, i32 4
+  %res = load i32, i32* %addr, align 4
+  ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddLdr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtInternalCPlus4() {
+  %addr = getelementptr i32, i32* @InternalC, i32 4
+  %res = load i32, i32* %addr, align 4
+  %sextres = sext i32 %res to i64
+  ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we emit AdrpAdd for those.
+; CHECK-LABEL: _getSeveralInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAdd [[ADRP_LABEL]], [[ADDGOT_LABEL]]
+define void @getSeveralInternalCPlus4(i32 %t) {
+entry:
+  %addr = getelementptr i32, i32* @InternalC, i32 4
+  %tmp = load i32, i32* %addr, align 4
+  %add = add nsw i32 %tmp, %t
+  store i32 %add, i32* %addr, align 4
+  ret void
+}
+
+; Make sure we catch that:
+; adrp -> add -> str.
+; CHECK-LABEL: _setInternalCPlus4
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[ADDGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: add [[ADDGOT_REG:x[0-9]+]], [[ADRP_REG]], _InternalC@PAGEOFF
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str w0, {{\[}}[[ADDGOT_REG]], #16]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpAddStr [[ADRP_LABEL]], [[ADDGOT_LABEL]], [[LDR_LABEL]]
+define void @setInternalCPlus4(i32 %t) {
+entry:
+  %addr = getelementptr i32, i32* @InternalC, i32 4
+  store i32 %t, i32* %addr, align 4
+  ret void
+}
+
+; Check that we catch AdrpAddLdr case when we have a simple chain:
+; adrp -> ldr.
+; CHECK-LABEL: _getInternalC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
+define i32 @getInternalC() {
+  %res = load i32, i32* @InternalC, align 4
+  ret i32 %res
+}
+
+; LDRSW supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtInternalC
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsw x0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdr [[ADRP_LABEL]], [[LDR_LABEL]]
+define i64 @getSExtInternalC() {
+  %res = load i32, i32* @InternalC, align 4
+  %sextres = sext i32 %res to i64
+  ret i64 %sextres
+}
+
+; It may not be safe to fold the literal in the load if the address is
+; used several times.
+; Make sure we do not catch anything here. We have a adrp alone,
+; there is not much we can do about it.
+; CHECK-LABEL: _getSeveralInternalC
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: ldr [[LOAD:w[0-9]+]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: add [[ADD:w[0-9]+]], [[LOAD]], w0
+; CHECK-NEXT: str [[ADD]], {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+define void @getSeveralInternalC(i32 %t) {
+entry:
+  %tmp = load i32, i32* @InternalC, align 4
+  %add = add nsw i32 %tmp, %t
+  store i32 %add, i32* @InternalC, align 4
+  ret void
+}
+
+; Make sure we do not catch anything when:
+; adrp -> str.
+; We cannot fold anything in the str at this point.
+; Indeed, strs do not support litterals.
+; CHECK-LABEL: _setInternalC
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], _InternalC@PAGE
+; CHECK-NEXT: str w0, {{\[}}[[ADRP_REG]], _InternalC@PAGEOFF]
+; CHECK-NEXT: ret
+define void @setInternalC(i32 %t) {
+entry:
+  store i32 %t, i32* @InternalC, align 4
+  ret void
+}
+
+; Now check other variant of loads/stores.
+
+@D = common global i8 0, align 4
+
+; LDRB does not support loading from a literal.
+; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: ldrb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define i8 @getD() {
+  %res = load i8, i8* @D, align 4
+  ret i8 %res
+}
+
+; CHECK-LABEL: _setD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: strb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setD(i8 %t) {
+  store i8 %t, i8* @D, align 4
+  ret void
+}
+
+; LDRSB supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtD
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsb w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getSExtD() {
+  %res = load i8, i8* @D, align 4
+  %sextres = sext i8 %res to i32
+  ret i32 %sextres
+}
+
+; LDRSB supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExt64D
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _D@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _D@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsb x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExt64D() {
+  %res = load i8, i8* @D, align 4
+  %sextres = sext i8 %res to i64
+  ret i64 %sextres
+}
+
+@E = common global i16 0, align 4
+
+; LDRH does not support loading from a literal.
+; Make sure we emit AdrpLdrGot and not AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: ldrh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define i16 @getE() {
+  %res = load i16, i16* @E, align 4
+  ret i16 %res
+}
+
+; LDRSH supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExtE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i32 @getSExtE() {
+  %res = load i16, i16* @E, align 4
+  %sextres = sext i16 %res to i32
+  ret i32 %sextres
+}
+
+; CHECK-LABEL: _setE
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: strh w0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setE(i16 %t) {
+  store i16 %t, i16* @E, align 4
+  ret void
+}
+
+; LDRSH supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getSExt64E
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _E@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _E@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldrsh x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getSExt64E() {
+  %res = load i16, i16* @E, align 4
+  %sextres = sext i16 %res to i64
+  ret i64 %sextres
+}
+
+@F = common global i64 0, align 4
+
+; LDR supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getF
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define i64 @getF() {
+  %res = load i64, i64* @F, align 4
+  ret i64 %res
+}
+
+; CHECK-LABEL: _setF
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _F@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _F@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str x0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setF(i64 %t) {
+  store i64 %t, i64* @F, align 4
+  ret void
+}
+
+@G = common global float 0.0, align 4
+
+; LDR float supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getG
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr s0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define float @getG() {
+  %res = load float, float* @G, align 4
+  ret float %res
+}
+
+; CHECK-LABEL: _setG
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _G@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _G@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str s0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setG(float %t) {
+  store float %t, float* @G, align 4
+  ret void
+}
+
+@H = common global half 0.0, align 4
+
+; LDR half supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getH
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr h0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define half @getH() {
+  %res = load half, half* @H, align 4
+  ret half %res
+}
+
+; CHECK-LABEL: _setH
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _H@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _H@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str h0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setH(half %t) {
+  store half %t, half* @H, align 4
+  ret void
+}
+
+@I = common global double 0.0, align 4
+
+; LDR double supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getI
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define double @getI() {
+  %res = load double, double* @I, align 4
+  ret double %res
+}
+
+; CHECK-LABEL: _setI
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _I@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _I@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setI(double %t) {
+  store double %t, double* @I, align 4
+  ret void
+}
+
+@J = common global <2 x i32> <i32 0, i32 0>, align 4
+
+; LDR 64-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getJ
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <2 x i32> @getJ() {
+  %res = load <2 x i32>, <2 x i32>* @J, align 4
+  ret <2 x i32> %res
+}
+
+; CHECK-LABEL: _setJ
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _J@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _J@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str d0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setJ(<2 x i32> %t) {
+  store <2 x i32> %t, <2 x i32>* @J, align 4
+  ret void
+}
+
+@K = common global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 4
+
+; LDR 128-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getK
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr q0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <4 x i32> @getK() {
+  %res = load <4 x i32>, <4 x i32>* @K, align 4
+  ret <4 x i32> %res
+}
+
+; CHECK-LABEL: _setK
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _K@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _K@GOTPAGEOFF]
+; CHECK-NEXT: [[STR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: str q0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotStr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[STR_LABEL]]
+define void @setK(<4 x i32> %t) {
+  store <4 x i32> %t, <4 x i32>* @K, align 4
+  ret void
+}
+
+@L = common global <1 x i8> <i8 0>, align 4
+
+; LDR 8-bit vector supports loading from a literal.
+; Make sure we emit AdrpLdrGotLdr for those.
+; CHECK-LABEL: _getL
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; CHECK-NEXT: [[LDR_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr b0, {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGotLdr [[ADRP_LABEL]], [[LDRGOT_LABEL]], [[LDR_LABEL]]
+define <1 x i8> @getL() {
+  %res = load <1 x i8>, <1 x i8>* @L, align 4
+  ret <1 x i8> %res
+}
+
+; CHECK-LABEL: _setL
+; CHECK: [[ADRP_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: adrp [[ADRP_REG:x[0-9]+]], _L@GOTPAGE
+; CHECK-NEXT: [[LDRGOT_LABEL:Lloh[0-9]+]]:
+; CHECK-NEXT: ldr [[LDRGOT_REG:x[0-9]+]], {{\[}}[[ADRP_REG]], _L@GOTPAGEOFF]
+; Ultimately we should generate str b0, but right now, we match the vector
+; variant which does not allow to fold the immediate into the store.
+; CHECK-NEXT: st1.b { v0 }[0], {{\[}}[[LDRGOT_REG]]]
+; CHECK-NEXT: ret
+; CHECK: .loh AdrpLdrGot [[ADRP_LABEL]], [[LDRGOT_LABEL]]
+define void @setL(<1 x i8> %t) {
+  store <1 x i8> %t, <1 x i8>* @L, align 4
+  ret void
+}
+
+; Make sure we do not assert when we do not track
+; all the aliases of a tuple register.
+; Indeed the tuple register can be tracked because of
+; one of its element, but the other elements of the tuple
+; do not need to be tracked and we used to assert on that.
+; Note: The test case is fragile in the sense that we need
+; a tuple register to appear in the lowering. Thus, the target
+; cpu is required to have the problem reproduced.
+; CHECK-LABEL: _uninterestingSub
+; CHECK: adrp [[ADRP_REG:x[0-9]+]], [[CONSTPOOL:lCPI[0-9]+_[0-9]+]]@PAGE
+; CHECK-NEXT: ldr q[[IDX:[0-9]+]], {{\[}}[[ADRP_REG]], [[CONSTPOOL]]@PAGEOFF]
+; The tuple comes from the next instruction.
+; CHECK-NEXT: tbl.16b v{{[0-9]+}}, { v{{[0-9]+}}, v{{[0-9]+}} }, v[[IDX]]
+; CHECK: ret
+define void @uninterestingSub(i8* nocapture %row) #0 {
+  %tmp = bitcast i8* %row to <16 x i8>*
+  %tmp1 = load <16 x i8>, <16 x i8>* %tmp, align 16
+  %vext43 = shufflevector <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %tmp1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+  %add.i.414 = add <16 x i8> zeroinitializer, %vext43
+  store <16 x i8> %add.i.414, <16 x i8>* %tmp, align 16
+  %add.ptr51 = getelementptr inbounds i8, i8* %row, i64 16
+  %tmp2 = bitcast i8* %add.ptr51 to <16 x i8>*
+  %tmp3 = load <16 x i8>, <16 x i8>* %tmp2, align 16
+  %tmp4 = bitcast i8* undef to <16 x i8>*
+  %tmp5 = load <16 x i8>, <16 x i8>* %tmp4, align 16
+  %vext157 = shufflevector <16 x i8> %tmp3, <16 x i8> %tmp5, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+  %add.i.402 = add <16 x i8> zeroinitializer, %vext157
+  store <16 x i8> %add.i.402, <16 x i8>* %tmp4, align 16
+  ret void
+}
+
+attributes #0 = { "target-cpu"="cyclone" }
diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
index 0ef7b143df80..55c9c6036ed5 100644
--- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll
+++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll
@@ -94,9 +94,7 @@ entry:
   store i32 %c, i32* %c.addr, align 4
   store i64 %d, i64* %d.addr, align 8
   %0 = load i16, i16* %b.addr, align 2
-; CHECK: and w0, w0, #0x1
-; CHECK: cmp w0, #0
-; CHECK: b.eq LBB4_2
+; CHECK: tbz w0, #0, LBB4_2
   %conv = trunc i16 %0 to i1
   br i1 %conv, label %if.then, label %if.end
 
@@ -106,9 +104,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %if.then, %entry
   %1 = load i32, i32* %c.addr, align 4
-; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1
-; CHECK: cmp w[[REG]], #0
-; CHECK: b.eq LBB4_4
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4
   %conv1 = trunc i32 %1 to i1
   br i1 %conv1, label %if.then3, label %if.end4
 
@@ -118,8 +114,7 @@ if.then3:                                         ; preds = %if.end
 
 if.end4:                                          ; preds = %if.then3, %if.end
   %2 = load i64, i64* %d.addr, align 8
-; CHECK: cmp w{{[0-9]+}}, #0
-; CHECK: b.eq LBB4_6
+; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6
   %conv5 = trunc i64 %2 to i1
   br i1 %conv5, label %if.then7, label %if.end8
 
@@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind {
 ; CHECK: trunc64
 ; CHECK: and  [[REG1:x[0-9]+]], x0, #0x1
 ; CHECK: mov  x[[REG2:[0-9]+]], [[REG1]]
-; CHECK: and  [[REG3:w[0-9]+]], w[[REG2]], #0x1
-; CHECK: cmp  [[REG3]], #0
-; CHECK: b.eq LBB5_2
+; CHECK: tbz w[[REG2]], #0, LBB5_2
   %a = and i64 %foo, 1
   %b = trunc i64 %a to i1
   br i1 %b, label %if.then, label %if.else
diff --git a/test/CodeGen/AArch64/arm64-fmax-safe.ll b/test/CodeGen/AArch64/arm64-fmax-safe.ll
new file mode 100644
index 000000000000..8b7d66986e78
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-fmax-safe.ll
@@ -0,0 +1,53 @@
+; RUN: llc -march=arm64 < %s | FileCheck %s
+
+define double @test_direct(float %in) {
+; CHECK-LABEL: test_direct:
+  %cmp = fcmp olt float %in, 0.000000e+00
+  %val = select i1 %cmp, float 0.000000e+00, float %in
+  %longer = fpext float %val to double
+  ret double %longer
+
+; CHECK: fmax s
+}
+
+define double @test_cross(float %in) {
+; CHECK-LABEL: test_cross:
+  %cmp = fcmp ult float %in, 0.000000e+00
+  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %longer = fpext float %val to double
+  ret double %longer
+
+; CHECK: fmin s
+}
+
+; Same as previous, but with ordered comparison;
+; must become fminnm, not fmin.
+define double @test_cross_fail_nan(float %in) {
+; CHECK-LABEL: test_cross_fail_nan:
+  %cmp = fcmp olt float %in, 0.000000e+00
+  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %longer = fpext float %val to double
+  ret double %longer
+
+; CHECK: fminnm s
+}
+
+; This isn't a min or a max, but passes the first condition for swapping the
+; results. Make sure they're put back before we resort to the normal fcsel.
+define float @test_cross_fail(float %lhs, float %rhs) {
+; CHECK-LABEL: test_cross_fail:
+  %tst = fcmp une float %lhs, %rhs
+  %res = select i1 %tst, float %rhs, float %lhs
+  ret float %res
+
+  ; The register allocator would have to decide to be deliberately obtuse before
+  ; other register were used.
+; CHECK: fcsel s0, s1, s0, ne
+}
+
+; Make sure the transformation isn't triggered for integers
+define i64 @test_integer(i64  %in) {
+  %cmp = icmp slt i64 %in, 0
+  %val = select i1 %cmp, i64 0, i64 %in
+  ret i64 %val
+}
diff --git a/test/CodeGen/AArch64/arm64-fmax.ll b/test/CodeGen/AArch64/arm64-fmax.ll
index ea281528b84c..40cc36ea52fa 100644
--- a/test/CodeGen/AArch64/arm64-fmax.ll
+++ b/test/CodeGen/AArch64/arm64-fmax.ll
@@ -1,57 +1,48 @@
 ; RUN: llc -march=arm64 -enable-no-nans-fp-math < %s | FileCheck %s
-; RUN: llc -march=arm64 < %s | FileCheck %s --check-prefix=CHECK-SAFE
 
 define double @test_direct(float %in) {
 ; CHECK-LABEL: test_direct:
-; CHECK-SAFE-LABEL: test_direct:
-  %cmp = fcmp olt float %in, 0.000000e+00
-  %longer = fpext float %in to double
-  %val = select i1 %cmp, double 0.000000e+00, double %longer
-  ret double %val
+  %cmp = fcmp nnan olt float %in, 0.000000e+00
+  %val = select i1 %cmp, float 0.000000e+00, float %in
+  %longer = fpext float %val to double
+  ret double %longer
 
 ; CHECK: fmax
-; CHECK-SAFE: fmax
 }
 
 define double @test_cross(float %in) {
 ; CHECK-LABEL: test_cross:
-; CHECK-SAFE-LABEL: test_cross:
-  %cmp = fcmp ult float %in, 0.000000e+00
-  %longer = fpext float %in to double
-  %val = select i1 %cmp, double %longer, double 0.000000e+00
-  ret double %val
+  %cmp = fcmp nnan ult float %in, 0.000000e+00
+  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %longer = fpext float %val to double
+  ret double %longer
 
 ; CHECK: fmin
-; CHECK-SAFE: fmin
 }
 
 ; Same as previous, but with ordered comparison;
 ; can't be converted in safe-math mode.
 define double @test_cross_fail_nan(float %in) {
 ; CHECK-LABEL: test_cross_fail_nan:
-; CHECK-SAFE-LABEL: test_cross_fail_nan:
-  %cmp = fcmp olt float %in, 0.000000e+00
-  %longer = fpext float %in to double
-  %val = select i1 %cmp, double %longer, double 0.000000e+00
-  ret double %val
+  %cmp = fcmp nnan olt float %in, 0.000000e+00
+  %val = select i1 %cmp, float %in, float 0.000000e+00
+  %longer = fpext float %val to double
+  ret double %longer
 
 ; CHECK: fmin
-; CHECK-SAFE: fcsel d0, d1, d0, mi
 }
 
 ; This isn't a min or a max, but passes the first condition for swapping the
 ; results. Make sure they're put back before we resort to the normal fcsel.
 define float @test_cross_fail(float %lhs, float %rhs) {
 ; CHECK-LABEL: test_cross_fail:
-; CHECK-SAFE-LABEL: test_cross_fail:
-  %tst = fcmp une float %lhs, %rhs
+  %tst = fcmp nnan une float %lhs, %rhs
   %res = select i1 %tst, float %rhs, float %lhs
   ret float %res
 
   ; The register allocator would have to decide to be deliberately obtuse before
   ; other register were used.
 ; CHECK: fcsel s0, s1, s0, ne
-; CHECK-SAFE: fcsel s0, s1, s0, ne
 }
 
 ; Make sure the transformation isn't triggered for integers
@@ -60,3 +51,14 @@ define i64 @test_integer(i64  %in) {
   %val = select i1 %cmp, i64 0, i64 %in
   ret i64 %val
 }
+
+define float @test_f16(half %in) {
+; CHECK-LABEL: test_f16:
+  %cmp = fcmp nnan ult half %in, 0.000000e+00
+  %val = select i1 %cmp, half %in, half 0.000000e+00
+  %longer = fpext half %val to float
+  ret float %longer
+; FIXME: It'd be nice for this to create an fmin instruction!
+; CHECK: fcvt
+; CHECK: fcsel
+}
diff --git a/test/CodeGen/AArch64/arm64-fp128.ll b/test/CodeGen/AArch64/arm64-fp128.ll
index aaef39fcf512..097fe2ca6ed9 100644
--- a/test/CodeGen/AArch64/arm64-fp128.ll
+++ b/test/CodeGen/AArch64/arm64-fp128.ll
@@ -148,14 +148,9 @@ define i1 @test_setcc2() {
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
   %val = fcmp ugt fp128 %lhs, %rhs
-; CHECK: bl      __gttf2
+; CHECK: bl      __letf2
 ; CHECK: cmp     w0, #0
-; CHECK: cset   [[GT:w[0-9]+]], gt
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp     w0, #0
-; CHECK: cset   [[UNORDERED:w[0-9]+]], ne
-; CHECK: orr     w0, [[UNORDERED]], [[GT]]
+; CHECK: cset    w0, gt
 
   ret i1 %val
 ; CHECK: ret
@@ -169,31 +164,21 @@ define i32 @test_br_cc() {
 ; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:lhs]
 ; CHECK: ldr q1, [{{x[0-9]+}}, :lo12:rhs]
 
-  ; olt == !uge, which LLVM unfortunately "optimizes" this to.
+  ; olt == !uge, which LLVM optimizes this to.
   %cond = fcmp olt fp128 %lhs, %rhs
-; CHECK: bl      __getf2
-; CHECK: cmp     w0, #0
-; CHECK: cset   [[OGE:w[0-9]+]], ge
-
-; CHECK: bl      __unordtf2
-; CHECK: cmp     w0, #0
-; CHECK: cset   [[UNORDERED:w[0-9]+]], ne
-
-; CHECK: orr     [[UGE:w[0-9]+]], [[UNORDERED]], [[OGE]]
-; CHECK: cbnz [[UGE]], [[RET29:.LBB[0-9]+_[0-9]+]]
+; CHECK: bl      __lttf2
+; CHECK-NEXT: cmp     w0, #0
+; CHECK-NEXT: b.ge {{.LBB[0-9]+_[0-9]+}}
   br i1 %cond, label %iftrue, label %iffalse
 
 iftrue:
   ret i32 42
 ; CHECK-NEXT: BB#
 ; CHECK-NEXT: movz w0, #0x2a
-; CHECK-NEXT: b [[REALRET:.LBB[0-9]+_[0-9]+]]
-
+; CHECK: ret
 iffalse:
   ret i32 29
-; CHECK: [[RET29]]:
-; CHECK-NEXT: movz w0, #0x1d
-; CHECK-NEXT: [[REALRET]]:
+; CHECK: movz w0, #0x1d
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/AArch64/arm64-hello.ll b/test/CodeGen/AArch64/arm64-hello.ll
index f1c4e9bbaed9..895bfe4b3915 100644
--- a/test/CodeGen/AArch64/arm64-hello.ll
+++ b/test/CodeGen/AArch64/arm64-hello.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-linux-gnu -disable-post-ra | FileCheck %s --check-prefix=CHECK-LINUX
 
 ; CHECK-LABEL: main:
 ; CHECK:	stp	x29, x30, [sp, #-16]!
diff --git a/test/CodeGen/AArch64/arm64-indexed-memory.ll b/test/CodeGen/AArch64/arm64-indexed-memory.ll
index b52cddf600ac..b6ab9934dbc3 100644
--- a/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -81,6 +81,17 @@ define void @truncst64to8(i8** nocapture %out, i8 %index, i64 %spacing) nounwind
 }
 
 
+define void @storef16(half** %out, half %index, half %spacing) nounwind {
+; CHECK-LABEL: storef16:
+; CHECK: str h{{[0-9+]}}, [x{{[0-9+]}}], #2
+; CHECK: ret
+  %tmp = load half*, half** %out, align 2
+  %incdec.ptr = getelementptr inbounds half, half* %tmp, i64 1
+  store half %spacing, half* %tmp, align 2
+  store half* %incdec.ptr, half** %out, align 2
+  ret void
+}
+
 define void @storef32(float** nocapture %out, float %index, float %spacing) nounwind noinline ssp {
 ; CHECK-LABEL: storef32:
 ; CHECK: str s{{[0-9+]}}, [x{{[0-9+]}}], #4
@@ -125,6 +136,17 @@ define float * @pref32(float** nocapture %out, float %spacing) nounwind noinline
   ret float *%ptr
 }
 
+define half* @pref16(half** %out, half %spacing) nounwind {
+; CHECK-LABEL: pref16:
+; CHECK: ldr x0, [x0]
+; CHECK-NEXT: str h0, [x0, #6]!
+; CHECK-NEXT: ret
+  %tmp = load half*, half** %out, align 2
+  %ptr = getelementptr inbounds half, half* %tmp, i64 3
+  store half %spacing, half* %ptr, align 2
+  ret half *%ptr
+}
+
 define i64 * @pre64(i64** nocapture %out, i64 %spacing) nounwind noinline ssp {
 ; CHECK-LABEL: pre64:
 ; CHECK: ldr     x0, [x0]
@@ -230,6 +252,17 @@ define float* @preidxf32(float* %src, float* %out) {
   ret float* %ptr
 }
 
+define half* @preidxf16(half* %src, half* %out) {
+; CHECK-LABEL: preidxf16:
+; CHECK: ldr     h0, [x0, #2]!
+; CHECK: str     h0, [x1]
+; CHECK: ret
+  %ptr = getelementptr inbounds half, half* %src, i64 1
+  %tmp = load half, half* %ptr, align 2
+  store half %tmp, half* %out, align 2
+  ret half* %ptr
+}
+
 define i64* @preidx64(i64* %src, i64* %out) {
 ; CHECK-LABEL: preidx64:
 ; CHECK: ldr     x[[REG:[0-9]+]], [x0, #8]!
diff --git a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index ba31513172d5..98d4e3646f56 100644
--- a/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios7.0 -o - %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
 
 @ptr = global i8* null
 
@@ -6215,3 +6215,27 @@ define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %pt
 }
 
 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
+
+; CHECK-LABEL: test_ld1lane_build:
+; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[0], [x0]
+; CHECK-DAG: ld1.s { [[REG0:v[0-9]+]] }[1], [x1]
+; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[0], [x2]
+; CHECK-DAG: ld1.s { [[REG1:v[0-9]+]] }[1], [x3]
+; CHECK: sub.2s v[[REGNUM2:[0-9]+]], [[REG0]], [[REG1]]
+; CHECK-NEXT: str d[[REGNUM2]], [x4]
+; CHECK-NEXT: ret
+define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
+  %load0 = load i32, i32* %ptr0, align 4
+  %load1 = load i32, i32* %ptr1, align 4
+  %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
+  %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
+
+  %load2 = load i32, i32* %ptr2, align 4
+  %load3 = load i32, i32* %ptr3, align 4
+  %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
+  %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
+
+  %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
+  store <2 x i32> %sub, <2 x i32>* %out, align 16
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-inline-asm.ll b/test/CodeGen/AArch64/arm64-inline-asm.ll
index 802d95826ce4..ac6e8a7731c6 100644
--- a/test/CodeGen/AArch64/arm64-inline-asm.ll
+++ b/test/CodeGen/AArch64/arm64-inline-asm.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as | FileCheck %s
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s
 
 ; rdar://9167275
 
diff --git a/test/CodeGen/AArch64/arm64-join-reserved.ll b/test/CodeGen/AArch64/arm64-join-reserved.ll
index dee034483541..c65cf95be2e5 100644
--- a/test/CodeGen/AArch64/arm64-join-reserved.ll
+++ b/test/CodeGen/AArch64/arm64-join-reserved.ll
@@ -5,7 +5,7 @@ target triple = "arm64-apple-macosx10"
 ; A move isn't necessary.
 ; <rdar://problem/11492712>
 ; CHECK-LABEL: g:
-; CHECK: str xzr, [sp]
+; CHECK: str xzr, [sp, #-16]!
 ; CHECK: bl
 ; CHECK: ret
 define void @g() nounwind ssp {
diff --git a/test/CodeGen/AArch64/arm64-large-frame.ll b/test/CodeGen/AArch64/arm64-large-frame.ll
index c4cce36bcb74..d1244e73b0f3 100644
--- a/test/CodeGen/AArch64/arm64-large-frame.ll
+++ b/test/CodeGen/AArch64/arm64-large-frame.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-none-linux-gnu -disable-fp-elim -disable-post-ra < %s | FileCheck %s
 declare void @use_addr(i8*)
 
 @addr = global i8* null
diff --git a/test/CodeGen/AArch64/arm64-ld-from-st.ll b/test/CodeGen/AArch64/arm64-ld-from-st.ll
new file mode 100644
index 000000000000..dd8add70cdb7
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-ld-from-st.ll
@@ -0,0 +1,666 @@
+; RUN: llc < %s -mtriple aarch64--none-eabi -verify-machineinstrs | FileCheck %s
+
+; CHECK-LABEL: Str64Ldr64
+; CHECK: mov x0, x1
+define i64 @Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i64*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 1
+  %1 = load i64, i64* %arrayidx1
+  ret i64 %1
+}
+
+; CHECK-LABEL: Str64Ldr32_0
+; CHECK: and x0, x1, #0xffffffff
+define i32 @Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i32*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 2
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Str64Ldr32_1
+; CHECK: lsr x0, x1, #32
+define i32 @Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i32*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 3
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_0
+; CHECK: and x0, x1, #0xffff
+define i16 @Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 4
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_1
+; CHECK: ubfx x0, x1, #16, #16
+define i16 @Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 5
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_2
+; CHECK: ubfx x0, x1, #32, #16
+define i16 @Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 6
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr16_3
+; CHECK: lsr x0, x1, #48
+define i16 @Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 7
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_0
+; CHECK: and x0, x1, #0xff
+define i8 @Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 8
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_1
+; CHECK: ubfx x0, x1, #8, #8
+define i8 @Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 9
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_2
+; CHECK: ubfx x0, x1, #16, #8
+define i8 @Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 10
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_3
+; CHECK: ubfx x0, x1, #24, #8
+define i8 @Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 11
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_4
+; CHECK: ubfx x0, x1, #32, #8
+define i8 @Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 12
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_5
+; CHECK: ubfx x0, x1, #40, #8
+define i8 @Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 13
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_6
+; CHECK: ubfx x0, x1, #48, #8
+define i8 @Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 14
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str64Ldr8_7
+; CHECK: lsr x0, x1, #56
+define i8 @Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 15
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr32
+; CHECK: mov w0, w1
+define i32 @Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i32*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 1
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Str32Ldr16_0
+; CHECK: and w0, w1, #0xffff
+define i16 @Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str32Ldr16_1
+; CHECK: lsr	w0, w1, #16
+define i16 @Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 4
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 5
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_2
+; CHECK: ubfx w0, w1, #16, #8
+define i8 @Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 6
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str32Ldr8_3
+; CHECK: lsr w0, w1, #24
+define i8 @Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 7
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str16Ldr16
+; CHECK: and w0, w1, #0xffff
+define i16 @Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i16*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Str16Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i8*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 2
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Str16Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i8*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 3
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+
+; CHECK-LABEL: Unscaled_Str64Ldr64
+; CHECK: mov x0, x1
+define i64 @Unscaled_Str64Ldr64(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i64*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i64, i64* %0, i64 -1
+  %1 = load i64, i64* %arrayidx1
+  ret i64 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr32_0
+; CHECK: and x0, x1, #0xffffffff
+define i32 @Unscaled_Str64Ldr32_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i32*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -2
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr32_1
+; CHECK: lsr x0, x1, #32
+define i32 @Unscaled_Str64Ldr32_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i32*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_0
+; CHECK: and x0, x1, #0xffff
+define i16 @Unscaled_Str64Ldr16_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -4
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_1
+; CHECK: ubfx x0, x1, #16, #16
+define i16 @Unscaled_Str64Ldr16_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_2
+; CHECK: ubfx x0, x1, #32, #16
+define i16 @Unscaled_Str64Ldr16_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr16_3
+; CHECK: lsr x0, x1, #48
+define i16 @Unscaled_Str64Ldr16_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i16*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_0
+; CHECK: and x0, x1, #0xff
+define i8 @Unscaled_Str64Ldr8_0(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -8
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_1
+; CHECK: ubfx x0, x1, #8, #8
+define i8 @Unscaled_Str64Ldr8_1(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -7
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_2
+; CHECK: ubfx x0, x1, #16, #8
+define i8 @Unscaled_Str64Ldr8_2(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -6
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_3
+; CHECK: ubfx x0, x1, #24, #8
+define i8 @Unscaled_Str64Ldr8_3(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -5
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_4
+; CHECK: ubfx x0, x1, #32, #8
+define i8 @Unscaled_Str64Ldr8_4(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_5
+; CHECK: ubfx x0, x1, #40, #8
+define i8 @Unscaled_Str64Ldr8_5(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_6
+; CHECK: ubfx x0, x1, #48, #8
+define i8 @Unscaled_Str64Ldr8_6(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str64Ldr8_7
+; CHECK: lsr x0, x1, #56
+define i8 @Unscaled_Str64Ldr8_7(i64* nocapture %P, i64 %v, i64 %n) {
+entry:
+  %0 = bitcast i64* %P to i8*
+  %arrayidx0 = getelementptr inbounds i64, i64* %P, i64 -1
+  store i64 %v, i64* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr32
+; CHECK: mov w0, w1
+define i32 @Unscaled_Str32Ldr32(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i32*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, i32* %0, i64 -1
+  %1 = load i32, i32* %arrayidx1
+  ret i32 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr16_0
+; CHECK: and w0, w1, #0xffff
+define i16 @Unscaled_Str32Ldr16_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -2
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr16_1
+; CHECK: lsr	w0, w1, #16
+define i16 @Unscaled_Str32Ldr16_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Unscaled_Str32Ldr8_0(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -4
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Unscaled_Str32Ldr8_1(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -3
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_2
+; CHECK: ubfx w0, w1, #16, #8
+define i8 @Unscaled_Str32Ldr8_2(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str32Ldr8_3
+; CHECK: lsr w0, w1, #24
+define i8 @Unscaled_Str32Ldr8_3(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i8*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr16
+; CHECK: and w0, w1, #0xffff
+define i16 @Unscaled_Str16Ldr16(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i16*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr8_0
+; CHECK: and w0, w1, #0xff
+define i8 @Unscaled_Str16Ldr8_0(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i8*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -2
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: Unscaled_Str16Ldr8_1
+; CHECK: ubfx w0, w1, #8, #8
+define i8 @Unscaled_Str16Ldr8_1(i16* nocapture %P, i16 %v, i64 %n) {
+entry:
+  %0 = bitcast i16* %P to i8*
+  %arrayidx0 = getelementptr inbounds i16, i16* %P, i64 -1
+  store i16 %v, i16* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i8, i8* %0, i64 -1
+  %1 = load i8, i8* %arrayidx1
+  ret i8 %1
+}
+
+; CHECK-LABEL: StrVolatileLdr
+; CHECK: ldrh
+define i16 @StrVolatileLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+  %1 = load volatile i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: StrNotInRangeLdr
+; CHECK: ldrh
+define i16 @StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: Unscaled_StrNotInRangeLdr
+; CHECK: ldurh
+define i16 @Unscaled_StrNotInRangeLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 -1
+  store i32 %v, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 -3
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+; CHECK-LABEL: StrCallLdr
+; CHECK: ldrh
+define i16 @StrCallLdr(i32* nocapture %P, i32 %v, i64 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  %c = call i1 @test_dummy()
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 1
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
+
+declare i1 @test_dummy()
+
+; CHECK-LABEL: StrStrLdr
+; CHECK: ldrh
+define i16 @StrStrLdr(i32 %v, i32* %P, i32* %P2, i32 %n) {
+entry:
+  %0 = bitcast i32* %P to i16*
+  %arrayidx0 = getelementptr inbounds i32, i32* %P, i64 1
+  store i32 %v, i32* %arrayidx0
+  store i32 %n, i32* %P2
+  %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 2
+  %1 = load i16, i16* %arrayidx1
+  ret i16 %1
+}
diff --git a/test/CodeGen/AArch64/arm64-ldp.ll b/test/CodeGen/AArch64/arm64-ldp.ll
index a192eab112fa..6071d092f8b3 100644
--- a/test/CodeGen/AArch64/arm64-ldp.ll
+++ b/test/CodeGen/AArch64/arm64-ldp.ll
@@ -1,8 +1,6 @@
 ; RUN: llc < %s -march=arm64 -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
-; RUN:   -verify-machineinstrs | FileCheck -check-prefix=LDUR_CHK %s
 
-; CHECK: ldp_int
+; CHECK-LABEL: ldp_int
 ; CHECK: ldp
 define i32 @ldp_int(i32* %p) nounwind {
   %tmp = load i32, i32* %p, align 4
@@ -12,7 +10,7 @@ define i32 @ldp_int(i32* %p) nounwind {
   ret i32 %add
 }
 
-; CHECK: ldp_sext_int
+; CHECK-LABEL: ldp_sext_int
 ; CHECK: ldpsw
 define i64 @ldp_sext_int(i32* %p) nounwind {
   %tmp = load i32, i32* %p, align 4
@@ -51,7 +49,7 @@ define i64 @ldp_half_sext_res1_int(i32* %p) nounwind {
 }
 
 
-; CHECK: ldp_long
+; CHECK-LABEL: ldp_long
 ; CHECK: ldp
 define i64 @ldp_long(i64* %p) nounwind {
   %tmp = load i64, i64* %p, align 8
@@ -61,7 +59,7 @@ define i64 @ldp_long(i64* %p) nounwind {
   ret i64 %add
 }
 
-; CHECK: ldp_float
+; CHECK-LABEL: ldp_float
 ; CHECK: ldp
 define float @ldp_float(float* %p) nounwind {
   %tmp = load float, float* %p, align 4
@@ -71,7 +69,7 @@ define float @ldp_float(float* %p) nounwind {
   ret float %add
 }
 
-; CHECK: ldp_double
+; CHECK-LABEL: ldp_double
 ; CHECK: ldp
 define double @ldp_double(double* %p) nounwind {
   %tmp = load double, double* %p, align 8
@@ -83,10 +81,10 @@ define double @ldp_double(double* %p) nounwind {
 
 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
 define i32 @ldur_int(i32* %a) nounwind {
-; LDUR_CHK: ldur_int
-; LDUR_CHK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_int
+; CHECK: ldp     [[DST1:w[0-9]+]], [[DST2:w[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add     w{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -96,10 +94,10 @@ define i32 @ldur_int(i32* %a) nounwind {
 }
 
 define i64 @ldur_sext_int(i32* %a) nounwind {
-; LDUR_CHK: ldur_sext_int
-; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_sext_int
+; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -111,11 +109,11 @@ define i64 @ldur_sext_int(i32* %a) nounwind {
 }
 
 define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
-; LDUR_CHK: ldur_half_sext_int_res0
-; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res0
+; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; CHECK: sxtw     x[[DST1]], w[[DST1]]
+; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -127,11 +125,11 @@ define i64 @ldur_half_sext_int_res0(i32* %a) nounwind {
 }
 
 define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
-; LDUR_CHK: ldur_half_sext_int_res1
-; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
-; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_half_sext_int_res1
+; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-8]
+; CHECK: sxtw     x[[DST2]], w[[DST2]]
+; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i32 -1
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i32 -2
@@ -144,10 +142,10 @@ define i64 @ldur_half_sext_int_res1(i32* %a) nounwind {
 
 
 define i64 @ldur_long(i64* %a) nounwind ssp {
-; LDUR_CHK: ldur_long
-; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_long
+; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-16]
+; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -1
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -2
@@ -157,10 +155,10 @@ define i64 @ldur_long(i64* %a) nounwind ssp {
 }
 
 define float @ldur_float(float* %a) {
-; LDUR_CHK: ldur_float
-; LDUR_CHK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
-; LDUR_CHK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_float
+; CHECK: ldp     [[DST1:s[0-9]+]], [[DST2:s[0-9]+]], [x0, #-8]
+; CHECK-NEXT: add     s{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds float, float* %a, i64 -1
   %tmp1 = load float, float* %p1, align 2
   %p2 = getelementptr inbounds float, float* %a, i64 -2
@@ -170,10 +168,10 @@ define float @ldur_float(float* %a) {
 }
 
 define double @ldur_double(double* %a) {
-; LDUR_CHK: ldur_double
-; LDUR_CHK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
-; LDUR_CHK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: ldur_double
+; CHECK: ldp     [[DST1:d[0-9]+]], [[DST2:d[0-9]+]], [x0, #-16]
+; CHECK-NEXT: add     d{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds double, double* %a, i64 -1
   %tmp1 = load double, double* %p1, align 2
   %p2 = getelementptr inbounds double, double* %a, i64 -2
@@ -184,11 +182,11 @@ define double @ldur_double(double* %a) {
 
 ; Now check some boundary conditions
 define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyIn
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyIn
+; CHECK-NOT: ldur
+; CHECK: ldp     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -31
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -32
@@ -198,11 +196,11 @@ define i64 @pairUpBarelyIn(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInSext
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInSext
+; CHECK-NOT: ldur
+; CHECK: ldpsw     [[DST1:x[0-9]+]], [[DST2:x[0-9]+]], [x0, #-256]
+; CHECK-NEXT: add     x{{[0-9]+}}, [[DST2]], [[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -214,12 +212,12 @@ define i64 @pairUpBarelyInSext(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInHalfSextRes0
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; LDUR_CHK: sxtw     x[[DST1]], w[[DST1]]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes0
+; CHECK-NOT: ldur
+; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; CHECK: sxtw     x[[DST1]], w[[DST1]]
+; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -231,12 +229,12 @@ define i64 @pairUpBarelyInHalfSextRes0(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyInHalfSextRes1
-; LDUR_CHK-NOT: ldur
-; LDUR_CHK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
-; LDUR_CHK: sxtw     x[[DST2]], w[[DST2]]
-; LDUR_CHK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpBarelyInHalfSextRes1
+; CHECK-NOT: ldur
+; CHECK: ldp     w[[DST1:[0-9]+]], w[[DST2:[0-9]+]], [x0, #-256]
+; CHECK: sxtw     x[[DST2]], w[[DST2]]
+; CHECK-NEXT: add     x{{[0-9]+}}, x[[DST2]], x[[DST1]]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -63
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -64
@@ -248,12 +246,12 @@ define i64 @pairUpBarelyInHalfSextRes1(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyOut
-; LDUR_CHK-NOT: ldp
+; CHECK-LABEL: pairUpBarelyOut
+; CHECK-NOT: ldp
 ; Don't be fragile about which loads or manipulations of the base register
 ; are used---just check that there isn't an ldp before the add
-; LDUR_CHK: add
-; LDUR_CHK-NEXT: ret
+; CHECK: add
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -32
   %tmp1 = load i64, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %a, i64 -33
@@ -263,12 +261,12 @@ define i64 @pairUpBarelyOut(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpBarelyOutSext
-; LDUR_CHK-NOT: ldp
+; CHECK-LABEL: pairUpBarelyOutSext
+; CHECK-NOT: ldp
 ; Don't be fragile about which loads or manipulations of the base register
 ; are used---just check that there isn't an ldp before the add
-; LDUR_CHK: add
-; LDUR_CHK-NEXT: ret
+; CHECK: add
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -64
   %tmp1 = load i32, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %a, i64 -65
@@ -280,12 +278,12 @@ define i64 @pairUpBarelyOutSext(i32* %a) nounwind ssp {
 }
 
 define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
-; LDUR_CHK: pairUpNotAligned
-; LDUR_CHK-NOT: ldp
-; LDUR_CHK: ldur
-; LDUR_CHK-NEXT: ldur
-; LDUR_CHK-NEXT: add
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpNotAligned
+; CHECK-NOT: ldp
+; CHECK: ldur
+; CHECK-NEXT: ldur
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %a, i64 -18
   %bp1 = bitcast i64* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -303,12 +301,12 @@ define i64 @pairUpNotAligned(i64* %a) nounwind ssp {
 }
 
 define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
-; LDUR_CHK: pairUpNotAlignedSext
-; LDUR_CHK-NOT: ldp
-; LDUR_CHK: ldursw
-; LDUR_CHK-NEXT: ldursw
-; LDUR_CHK-NEXT: add
-; LDUR_CHK-NEXT: ret
+; CHECK-LABEL: pairUpNotAlignedSext
+; CHECK-NOT: ldp
+; CHECK: ldursw
+; CHECK-NEXT: ldursw
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %a, i64 -18
   %bp1 = bitcast i32* %p1 to i8*
   %bp1p1 = getelementptr inbounds i8, i8* %bp1, i64 1
@@ -326,3 +324,35 @@ define i64 @pairUpNotAlignedSext(i32* %a) nounwind ssp {
   %tmp3 = add i64 %sexttmp1, %sexttmp2
  ret i64 %tmp3
 }
+
+declare void @use-ptr(i32*)
+
+; CHECK-LABEL: ldp_sext_int_pre
+; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x{{[0-9]+}}, #8]
+define i64 @ldp_sext_int_pre(i32* %p) nounwind {
+  %ptr = getelementptr inbounds i32, i32* %p, i64 2
+  call void @use-ptr(i32* %ptr)
+  %add.ptr = getelementptr inbounds i32, i32* %ptr, i64 0
+  %tmp = load i32, i32* %add.ptr, align 4
+  %add.ptr1 = getelementptr inbounds i32, i32* %ptr, i64 1
+  %tmp1 = load i32, i32* %add.ptr1, align 4
+  %sexttmp = sext i32 %tmp to i64
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %add = add nsw i64 %sexttmp1, %sexttmp
+  ret i64 %add
+}
+
+; CHECK-LABEL: ldp_sext_int_post
+; CHECK: ldpsw x{{[0-9]+}}, x{{[0-9]+}}, [x0], #8
+define i64 @ldp_sext_int_post(i32* %p) nounwind {
+  %tmp = load i32, i32* %p, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %p, i64 1
+  %tmp1 = load i32, i32* %add.ptr, align 4
+  %sexttmp = sext i32 %tmp to i64
+  %sexttmp1 = sext i32 %tmp1 to i64
+  %ptr = getelementptr inbounds i32, i32* %add.ptr, i64 1
+  call void @use-ptr(i32* %ptr)
+  %add = add nsw i64 %sexttmp1, %sexttmp
+  ret i64 %add
+}
+
diff --git a/test/CodeGen/AArch64/arm64-long-shift.ll b/test/CodeGen/AArch64/arm64-long-shift.ll
index d5baf16bdd5c..ad89d3ff711b 100644
--- a/test/CodeGen/AArch64/arm64-long-shift.ll
+++ b/test/CodeGen/AArch64/arm64-long-shift.ll
@@ -2,18 +2,20 @@
 
 define i128 @shl(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: shl:
-; CHECK: lsl  [[XREG_0:x[0-9]+]], x1, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsr  [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
-; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
-; CHECK-NEXT: lsl  [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
-; CHECK-NEXT: cmp   [[XREG_4]], #0
-; CHECK-NEXT: csel  x1, [[XREG_5]], [[XREG_6]], ge
-; CHECK-NEXT: lsl  [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
-; CHECK-NEXT: csel  x0, xzr, [[SMALLSHIFT_LO]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsr  [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
+; CHECK: lsl  [[HI_FOR_HI:x[0-9]+]], x1, x2
+; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: lsl  [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
+; CHECK: cmp   [[EXTRA_SHIFT]], #0
+; CHECK: csel  x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
+; CHECK: lsl  [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
+; CHECK: csel  x0, xzr, [[SMALLSHIFT_LO]], ge
+; CHECK: ret
 
   %shl = shl i128 %r, %s
   ret i128 %shl
@@ -21,19 +23,21 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone {
 
 define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: ashr:
-; CHECK: lsr  [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsl  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: asr  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp   [[XREG_5]], #0
-; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: asr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
-; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsl  [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
+; CHECK: lsr  [[LO_FOR_LO:x[0-9]+]], x0, x2
+; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: asr  [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
+; CHECK: cmp   [[EXTRA_SHIFT]], #0
+; CHECK: csel  x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
+; CHECK: asr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
+; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
+; CHECK: ret
 
   %shr = ashr i128 %r, %s
   ret i128 %shr
@@ -41,18 +45,20 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
 
 define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
 ; CHECK-LABEL: lshr:
-; CHECK: lsr  [[XREG_0:x[0-9]+]], x0, x2
-; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
-; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
-; CHECK-NEXT: lsl  [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
-; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
-; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
-; CHECK-NEXT: lsr  [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
-; CHECK-NEXT: cmp   [[XREG_5]], #0
-; CHECK-NEXT: csel  x0, [[XREG_6]], [[XREG_4]], ge
-; CHECK-NEXT: lsr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
-; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
-; CHECK-NEXT: ret
+; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
+; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
+; CHECK: lsl  [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
+; CHECK: cmp x2, #0
+; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
+; CHECK: lsr  [[LO_FOR_LO:x[0-9]+]], x0, x2
+; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
+; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
+; CHECK: lsr  [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
+; CHECK: cmp   [[EXTRA_SHIFT]], #0
+; CHECK: csel  x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
+; CHECK: lsr  [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
+; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
+; CHECK: ret
 
   %shr = lshr i128 %r, %s
   ret i128 %shr
diff --git a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
index 5bc4d71501ba..85572f2cf0f8 100644
--- a/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
+++ b/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm64-apple-ios -aarch64-strict-align < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-ios -mattr=+strict-align < %s | FileCheck %s
 
 ; Small (16-bytes here) unaligned memcpys should stay memcpy calls if
 ; strict-alignment is turned on.
diff --git a/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
new file mode 100644
index 000000000000..5276ac334a71
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-narrow-ldst-merge.ll
@@ -0,0 +1,406 @@
+; RUN: llc < %s -mtriple aarch64--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=LE
+; RUN: llc < %s -mtriple aarch64_be--none-eabi -mcpu=cortex-a57 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=BE
+
+; CHECK-LABEL: Ldrh_merge
+; CHECK-NOT: ldrh
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i16 @Ldrh_merge(i16* nocapture readonly %p) {
+  %1 = load i16, i16* %p, align 2
+  %arrayidx2 = getelementptr inbounds i16, i16* %p, i64 1
+  %2 = load i16, i16* %arrayidx2, align 2
+  %add = sub nuw nsw i16 %1, %2
+  ret i16 %add
+}
+
+; CHECK-LABEL: Ldurh_merge
+; CHECK-NOT: ldurh
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; CHECK-DAG: lsr  [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i16 @Ldurh_merge(i16* nocapture readonly %p)  {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 -2
+  %0 = load i16, i16* %arrayidx
+  %arrayidx3 = getelementptr inbounds i16, i16* %p, i64 -1
+  %1 = load i16, i16* %arrayidx3
+  %add = sub nuw nsw i16 %0, %1
+  ret i16 %add
+}
+
+; CHECK-LABEL: Ldrh_4_merge
+; CHECK-NOT: ldrh
+; CHECK: ldp [[WORD1:w[0-9]+]], [[WORD2:w[0-9]+]], [x0]
+; CHECK-DAG: and [[WORD1LO:w[0-9]+]], [[WORD1]], #0xffff
+; CHECK-DAG: lsr [[WORD1HI:w[0-9]+]], [[WORD1]], #16
+; CHECK-DAG: and [[WORD2LO:w[0-9]+]], [[WORD2]], #0xffff
+; CHECK-DAG: lsr [[WORD2HI:w[0-9]+]], [[WORD2]], #16
+; LE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1HI]], [[WORD1LO]]
+; BE-DAG: sub [[TEMP1:w[0-9]+]], [[WORD1LO]], [[WORD1HI]]
+; LE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2LO]]
+; BE: udiv [[TEMP2:w[0-9]+]], [[TEMP1]], [[WORD2HI]]
+; LE: sub w0, [[TEMP2]], [[WORD2HI]]
+; BE: sub w0, [[TEMP2]], [[WORD2LO]]
+define i16 @Ldrh_4_merge(i16* nocapture readonly %P) {
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 0
+  %l0 = load i16, i16* %arrayidx
+  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 1
+  %l1 = load i16, i16* %arrayidx2
+  %arrayidx7 = getelementptr inbounds i16, i16* %P, i64 2
+  %l2 = load i16, i16* %arrayidx7
+  %arrayidx12 = getelementptr inbounds i16, i16* %P, i64 3
+  %l3 = load i16, i16* %arrayidx12
+  %add4 = sub nuw nsw i16 %l1, %l0
+  %add9 = udiv i16 %add4, %l2
+  %add14 = sub nuw nsw i16 %add9, %l3
+  ret i16 %add14
+}
+
+; CHECK-LABEL: Ldrsh_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+
+define i32 @Ldrsh_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = sext i16 %tmp to i32
+  %sexttmp1 = sext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp1, %sexttmp
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_zsext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_zsext_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = zext i16 %tmp to i32
+  %sexttmp1 = sext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsh_szext_merge
+; CHECK: ldr [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxth [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: lsr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: asr [[HI_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsh_szext_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 4
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 5
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = sext i16 %tmp to i32
+  %sexttmp1 = zext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; CHECK-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrb_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = zext i8 %tmp to i32
+  %sexttmp1 = zext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; CHECK-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = sext i8 %tmp to i32
+  %sexttmp1 = sext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_zsext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_zsext_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = zext i8 %tmp to i32
+  %sexttmp1 = sext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldrsb_szext_merge
+; CHECK: ldrh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sxtb [[LO_PART:w[0-9]+]], [[NEW_DEST]]
+; LE-DAG: ubfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[LO_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: sbfx [[HI_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldrsb_szext_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 2
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 3
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = sext i8 %tmp to i32
+  %sexttmp1 = zext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; CHECK-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = sext i16 %tmp to i32
+  %sexttmp1 = sext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_zsext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: lsr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_zsext_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = zext i16 %tmp to i32
+  %sexttmp1 = sext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursh_szext_merge
+; CHECK: ldur [[NEW_DEST:w[0-9]+]]
+; LE-DAG: asr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xffff
+; BE-DAG: lsr  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #16
+; BE-DAG: sxth [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursh_szext_merge(i16* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i16, i16* %p, i64 -1
+  %tmp = load i16, i16* %add.ptr0
+  %add.ptr = getelementptr inbounds i16, i16* %p, i64 -2
+  %tmp1 = load i16, i16* %add.ptr
+  %sexttmp = sext i16 %tmp to i32
+  %sexttmp1 = zext i16 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldurb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: ubfx  [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldurb_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = zext i8 %tmp to i32
+  %sexttmp1 = zext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; CHECK-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; CHECK-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = sext i8 %tmp to i32
+  %sexttmp1 = sext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_zsext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; BE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_zsext_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = zext i8 %tmp to i32
+  %sexttmp1 = sext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Ldursb_szext_merge
+; CHECK: ldurh [[NEW_DEST:w[0-9]+]]
+; LE-DAG: sbfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; LE-DAG: and [[HI_PART:w[0-9]+]], [[NEW_DEST]], #0xff
+; BE-DAG: ubfx [[LO_PART:w[0-9]+]], [[NEW_DEST]], #8, #8
+; BE-DAG: sxtb [[HI_PART:w[0-9]+]], [[NEW_DEST]]
+; LE: sub {{w[0-9]+}}, [[LO_PART]], [[HI_PART]]
+; BE: sub {{w[0-9]+}}, [[HI_PART]], [[LO_PART]]
+define i32 @Ldursb_szext_merge(i8* %p) nounwind {
+  %add.ptr0 = getelementptr inbounds i8, i8* %p, i64 -1
+  %tmp = load i8, i8* %add.ptr0
+  %add.ptr = getelementptr inbounds i8, i8* %p, i64 -2
+  %tmp1 = load i8, i8* %add.ptr
+  %sexttmp = sext i8 %tmp to i32
+  %sexttmp1 = zext i8 %tmp1 to i32
+  %add = sub nsw i32 %sexttmp, %sexttmp1
+  ret i32 %add
+}
+
+; CHECK-LABEL: Strh_zero
+; CHECK: str wzr
+define void @Strh_zero(i16* nocapture %P, i32 %n) {
+entry:
+ %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+ store i16 0, i16* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+  store i16 0, i16* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Strh_zero_4
+; CHECK: stp wzr, wzr
+define void @Strh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %add = add nsw i32 %n, 1
+  %idxprom1 = sext i32 %add to i64
+  %arrayidx2 = getelementptr inbounds i16, i16* %P, i64 %idxprom1
+  store i16 0, i16* %arrayidx2
+  %add3 = add nsw i32 %n, 2
+  %idxprom4 = sext i32 %add3 to i64
+  %arrayidx5 = getelementptr inbounds i16, i16* %P, i64 %idxprom4
+  store i16 0, i16* %arrayidx5
+  %add6 = add nsw i32 %n, 3
+  %idxprom7 = sext i32 %add6 to i64
+  %arrayidx8 = getelementptr inbounds i16, i16* %P, i64 %idxprom7
+  store i16 0, i16* %arrayidx8
+  ret void
+}
+
+; CHECK-LABEL: Sturb_zero
+; CHECK: sturh wzr
+define void @Sturb_zero(i8* nocapture %P, i32 %n) #0 {
+entry:
+  %sub = add nsw i32 %n, -2
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i8, i8* %P, i64 %idxprom
+  store i8 0, i8* %arrayidx
+  %sub2= add nsw i32 %n, -1
+  %idxprom1 = sext i32 %sub2 to i64
+  %arrayidx2 = getelementptr inbounds i8, i8* %P, i64 %idxprom1
+  store i8 0, i8* %arrayidx2
+  ret void
+}
+
+; CHECK-LABEL: Sturh_zero
+; CHECK: stur wzr
+define void @Sturh_zero(i16* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -2
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %sub1 = add nsw i32 %n, -3
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+  store i16 0, i16* %arrayidx3
+  ret void
+}
+
+; CHECK-LABEL: Sturh_zero_4
+; CHECK: stp wzr, wzr
+define void @Sturh_zero_4(i16* nocapture %P, i32 %n) {
+entry:
+  %sub = add nsw i32 %n, -3
+  %idxprom = sext i32 %sub to i64
+  %arrayidx = getelementptr inbounds i16, i16* %P, i64 %idxprom
+  store i16 0, i16* %arrayidx
+  %sub1 = add nsw i32 %n, -4
+  %idxprom2 = sext i32 %sub1 to i64
+  %arrayidx3 = getelementptr inbounds i16, i16* %P, i64 %idxprom2
+  store i16 0, i16* %arrayidx3
+  %sub4 = add nsw i32 %n, -2
+  %idxprom5 = sext i32 %sub4 to i64
+  %arrayidx6 = getelementptr inbounds i16, i16* %P, i64 %idxprom5
+  store i16 0, i16* %arrayidx6
+  %sub7 = add nsw i32 %n, -1
+  %idxprom8 = sext i32 %sub7 to i64
+  %arrayidx9 = getelementptr inbounds i16, i16* %P, i64 %idxprom8
+  store i16 0, i16* %arrayidx9
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-neon-2velem.ll b/test/CodeGen/AArch64/arm64-neon-2velem.ll
index 869966caa3ae..985b5bf483ac 100644
--- a/test/CodeGen/AArch64/arm64-neon-2velem.ll
+++ b/test/CodeGen/AArch64/arm64-neon-2velem.ll
@@ -535,6 +535,17 @@ entry:
 
 declare double @llvm.fma.f64(double, double, double)
 
+define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %extract.rhs = extractelement <2 x float> %v, i32 1
+  %extract = fsub float -0.000000e+00, %extract.rhs
+  %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a)
+  ret float %0
+}
+
 define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) {
 ; CHECK-LABEL: test_vfmss_laneq_f32
 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
@@ -557,6 +568,50 @@ entry:
   ret double %0
 }
 
+define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) {
+; CHCK-LABEL: test_vfmsd_lane_f64_0
+; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHCK-NEXT: ret
+entry:
+  %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v
+  %tmp1 = extractelement <1 x double> %tmp0, i32 0
+  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %0
+}
+
+define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) {
+; CHECK-LABEL: test_vfmss_lane_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v
+  %tmp1 = extractelement <2 x float> %tmp0, i32 1
+  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %0
+}
+
+define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) {
+; CHECK-LABEL: test_vfmss_laneq_f32_0
+; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v
+  %tmp1 = extractelement <4 x float> %tmp0, i32 3
+  %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a)
+  ret float %0
+}
+
+define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) {
+; CHECK-LABEL: test_vfmsd_laneq_f64_0
+; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+; CHECK-NEXT: ret
+entry:
+  %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v
+  %tmp1 = extractelement <2 x double> %tmp0, i32 1
+  %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a)
+  ret double %0
+}
+
 define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) {
 ; CHECK-LABEL: test_vmlal_lane_s16:
 ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
diff --git a/test/CodeGen/AArch64/arm64-neon-copy.ll b/test/CodeGen/AArch64/arm64-neon-copy.ll
index b74a40626cee..83b1cac70f5c 100644
--- a/test/CodeGen/AArch64/arm64-neon-copy.ll
+++ b/test/CodeGen/AArch64/arm64-neon-copy.ll
@@ -320,21 +320,20 @@ define i32 @smovw8h(<8 x i16> %tmp1) {
   ret i32 %tmp5
 }
 
-define i32 @smovx16b(<16 x i8> %tmp1) {
+define i64 @smovx16b(<16 x i8> %tmp1) {
 ; CHECK-LABEL: smovx16b:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.b[8]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.b[8]
   %tmp3 = extractelement <16 x i8> %tmp1, i32 8
-  %tmp4 = sext i8 %tmp3 to i32
-  %tmp5 = add i32 %tmp4, %tmp4
-  ret i32 %tmp5
+  %tmp4 = sext i8 %tmp3 to i64
+  ret i64 %tmp4
 }
 
-define i32 @smovx8h(<8 x i16> %tmp1) {
+define i64 @smovx8h(<8 x i16> %tmp1) {
 ; CHECK-LABEL: smovx8h:
-; CHECK: smov {{[xw][0-9]+}}, {{v[0-9]+}}.h[2]
+; CHECK: smov {{x[0-9]+}}, {{v[0-9]+}}.h[2]
   %tmp3 = extractelement <8 x i16> %tmp1, i32 2
-  %tmp4 = sext i16 %tmp3 to i32
-  ret i32 %tmp4
+  %tmp4 = sext i16 %tmp3 to i64
+  ret i64 %tmp4
 }
 
 define i64 @smovx4s(<4 x i32> %tmp1) {
diff --git a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
index b8236c5b2479..c2006ccdd064 100644
--- a/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
+++ b/test/CodeGen/AArch64/arm64-patchpoint-webkit_jscc.ll
@@ -7,7 +7,7 @@ define void @jscall_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
 entry:
 ; CHECK-LABEL: jscall_patchpoint_codegen:
 ; CHECK:       Ltmp
-; CHECK:       str x{{.+}}, [sp]
+; CHECK:       str x{{.+}}, [sp, #-16]!
 ; CHECK-NEXT:  mov  x0, x{{.+}}
 ; CHECK:       Ltmp
 ; CHECK-NEXT:  movz  x16, #0xffff, lsl #32
@@ -16,7 +16,7 @@ entry:
 ; CHECK-NEXT:  blr x16
 ; FAST-LABEL:  jscall_patchpoint_codegen:
 ; FAST:        Ltmp
-; FAST:        str x{{.+}}, [sp]
+; FAST:        str x{{.+}}, [sp, #-16]!
 ; FAST:        Ltmp
 ; FAST-NEXT:   movz  x16, #0xffff, lsl #32
 ; FAST-NEXT:   movk  x16, #0xdead, lsl #16
@@ -50,7 +50,7 @@ entry:
 ; FAST:        orr [[REG1:x[0-9]+]], xzr, #0x2
 ; FAST-NEXT:   orr [[REG2:w[0-9]+]], wzr, #0x4
 ; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
-; FAST-NEXT:   str [[REG1]], [sp]
+; FAST-NEXT:   str [[REG1]], [sp, #-32]!
 ; FAST-NEXT:   str [[REG2]], [sp, #16]
 ; FAST-NEXT:   str [[REG3]], [sp, #24]
 ; FAST:        Ltmp
@@ -90,7 +90,7 @@ entry:
 ; FAST-NEXT:   orr [[REG3:x[0-9]+]], xzr, #0x6
 ; FAST-NEXT:   orr [[REG4:w[0-9]+]], wzr, #0x8
 ; FAST-NEXT:   movz [[REG5:x[0-9]+]], #0xa
-; FAST-NEXT:   str [[REG1]], [sp]
+; FAST-NEXT:   str [[REG1]], [sp, #-64]!
 ; FAST-NEXT:   str [[REG2]], [sp, #16]
 ; FAST-NEXT:   str [[REG3]], [sp, #24]
 ; FAST-NEXT:   str [[REG4]], [sp, #36]
diff --git a/test/CodeGen/AArch64/arm64-platform-reg.ll b/test/CodeGen/AArch64/arm64-platform-reg.ll
index 60672aa38486..f3af01a73559 100644
--- a/test/CodeGen/AArch64/arm64-platform-reg.ll
+++ b/test/CodeGen/AArch64/arm64-platform-reg.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
-; RUN: llc -mtriple=arm64-freebsd-gnu -aarch64-reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-apple-ios -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
+; RUN: llc -mtriple=arm64-freebsd-gnu -mattr=+reserve-x18 -o - %s | FileCheck %s --check-prefix=CHECK-RESERVE-X18
 ; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s
 
 ; x18 is reserved as a platform register on Darwin but not on other
diff --git a/test/CodeGen/AArch64/arm64-popcnt.ll b/test/CodeGen/AArch64/arm64-popcnt.ll
index b0b529a13f41..9ee53a0f92e6 100644
--- a/test/CodeGen/AArch64/arm64-popcnt.ll
+++ b/test/CodeGen/AArch64/arm64-popcnt.ll
@@ -4,8 +4,8 @@
 define i32 @cnt32_advsimd(i32 %x) nounwind readnone {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
-; CHECK: ubfx	x{{[0-9]+}}
-; CHECK: fmov	d0, x{{[0-9]+}}
+; CHECK: mov w[[IN64:[0-9]+]], w0
+; CHECK: fmov	d0, x[[IN64]]
 ; CHECK: cnt.8b	v0, v0
 ; CHECK: uaddlv.8b	h0, v0
 ; CHECK: fmov w0, s0
@@ -59,7 +59,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat {
   %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %cnt
 ; CHECK-LABEL: cnt32:
-; CHECK-NOT 16b
+; CHECK-NOT: 16b
 ; CHECK: ret
 }
 
@@ -67,7 +67,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat {
   %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
   ret i64 %cnt
 ; CHECK-LABEL: cnt64:
-; CHECK-NOT 16b
+; CHECK-NOT: 16b
 ; CHECK: ret
 }
 
diff --git a/test/CodeGen/AArch64/arm64-rounding.ll b/test/CodeGen/AArch64/arm64-rounding.ll
index 931114447adf..d487aabccc4f 100644
--- a/test/CodeGen/AArch64/arm64-rounding.ll
+++ b/test/CodeGen/AArch64/arm64-rounding.ll
@@ -1,10 +1,8 @@
-; RUN: llc -O3 < %s -mcpu=cyclone | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64"
-target triple = "arm64-apple-ios6.0.0"
+; RUN: llc -O3 < %s -mtriple=arm64 | FileCheck %s
 
-; CHECK: test1
-; CHECK: frintx
+; CHECK-LABEL: test1:
 ; CHECK: frintm
+; CHECK-NOT: frintx
 define float @test1(float %a) #0 {
 entry:
   %call = tail call float @floorf(float %a) nounwind readnone
@@ -13,9 +11,9 @@ entry:
 
 declare float @floorf(float) nounwind readnone
 
-; CHECK: test2
-; CHECK: frintx
+; CHECK-LABEL: test2:
 ; CHECK: frintm
+; CHECK-NOT: frintx
 define double @test2(double %a) #0 {
 entry:
   %call = tail call double @floor(double %a) nounwind readnone
@@ -24,7 +22,7 @@ entry:
 
 declare double @floor(double) nounwind readnone
 
-; CHECK: test3
+; CHECK-LABEL: test3:
 ; CHECK: frinti
 define float @test3(float %a) #0 {
 entry:
@@ -34,7 +32,7 @@ entry:
 
 declare float @nearbyintf(float) nounwind readnone
 
-; CHECK: test4
+; CHECK-LABEL: test4:
 ; CHECK: frinti
 define double @test4(double %a) #0 {
 entry:
@@ -44,9 +42,9 @@ entry:
 
 declare double @nearbyint(double) nounwind readnone
 
-; CHECK: test5
-; CHECK: frintx
+; CHECK-LABEL: test5:
 ; CHECK: frintp
+; CHECK-NOT: frintx
 define float @test5(float %a) #0 {
 entry:
   %call = tail call float @ceilf(float %a) nounwind readnone
@@ -55,9 +53,9 @@ entry:
 
 declare float @ceilf(float) nounwind readnone
 
-; CHECK: test6
-; CHECK: frintx
+; CHECK-LABEL: test6:
 ; CHECK: frintp
+; CHECK-NOT: frintx
 define double @test6(double %a) #0 {
 entry:
   %call = tail call double @ceil(double %a) nounwind readnone
@@ -66,7 +64,7 @@ entry:
 
 declare double @ceil(double) nounwind readnone
 
-; CHECK: test7
+; CHECK-LABEL: test7:
 ; CHECK: frintx
 define float @test7(float %a) #0 {
 entry:
@@ -76,7 +74,7 @@ entry:
 
 declare float @rintf(float) nounwind readnone
 
-; CHECK: test8
+; CHECK-LABEL: test8:
 ; CHECK: frintx
 define double @test8(double %a) #0 {
 entry:
@@ -86,9 +84,9 @@ entry:
 
 declare double @rint(double) nounwind readnone
 
-; CHECK: test9
-; CHECK: frintx
+; CHECK-LABEL: test9:
 ; CHECK: frintz
+; CHECK-NOT: frintx
 define float @test9(float %a) #0 {
 entry:
   %call = tail call float @truncf(float %a) nounwind readnone
@@ -97,9 +95,9 @@ entry:
 
 declare float @truncf(float) nounwind readnone
 
-; CHECK: test10
-; CHECK: frintx
+; CHECK-LABEL: test10:
 ; CHECK: frintz
+; CHECK-NOT: frintx
 define double @test10(double %a) #0 {
 entry:
   %call = tail call double @trunc(double %a) nounwind readnone
@@ -108,9 +106,9 @@ entry:
 
 declare double @trunc(double) nounwind readnone
 
-; CHECK: test11
-; CHECK: frintx
+; CHECK-LABEL: test11:
 ; CHECK: frinta
+; CHECK-NOT: frintx
 define float @test11(float %a) #0 {
 entry:
   %call = tail call float @roundf(float %a) nounwind readnone
@@ -119,9 +117,9 @@ entry:
 
 declare float @roundf(float %a) nounwind readnone
 
-; CHECK: test12
-; CHECK: frintx
+; CHECK-LABEL: test12:
 ; CHECK: frinta
+; CHECK-NOT: frintx
 define double @test12(double %a) #0 {
 entry:
   %call = tail call double @round(double %a) nounwind readnone
@@ -130,7 +128,7 @@ entry:
 
 declare double @round(double %a) nounwind readnone
 
-; CHECK: test13
+; CHECK-LABEL: test13:
 ; CHECK-NOT: frintx
 ; CHECK: frintm
 define float @test13(float %a) #1 {
@@ -139,7 +137,7 @@ entry:
   ret float %call
 }
 
-; CHECK: test14
+; CHECK-LABEL: test14:
 ; CHECK-NOT: frintx
 ; CHECK: frintm
 define double @test14(double %a) #1 {
@@ -148,7 +146,7 @@ entry:
   ret double %call
 }
 
-; CHECK: test15
+; CHECK-LABEL: test15:
 ; CHECK-NOT: frintx
 ; CHECK: frintp
 define float @test15(float %a) #1 {
@@ -157,7 +155,7 @@ entry:
   ret float %call
 }
 
-; CHECK: test16
+; CHECK-LABEL: test16:
 ; CHECK-NOT: frintx
 ; CHECK: frintp
 define double @test16(double %a) #1 {
@@ -166,7 +164,7 @@ entry:
   ret double %call
 }
 
-; CHECK: test17
+; CHECK-LABEL: test17:
 ; CHECK-NOT: frintx
 ; CHECK: frintz
 define float @test17(float %a) #1 {
@@ -175,7 +173,7 @@ entry:
   ret float %call
 }
 
-; CHECK: test18
+; CHECK-LABEL: test18:
 ; CHECK-NOT: frintx
 ; CHECK: frintz
 define double @test18(double %a) #1 {
@@ -184,7 +182,7 @@ entry:
   ret double %call
 }
 
-; CHECK: test19
+; CHECK-LABEL: test19:
 ; CHECK-NOT: frintx
 ; CHECK: frinta
 define float @test19(float %a) #1 {
@@ -193,7 +191,7 @@ entry:
   ret float %call
 }
 
-; CHECK: test20
+; CHECK-LABEL: test20:
 ; CHECK-NOT: frintx
 ; CHECK: frinta
 define double @test20(double %a) #1 {
@@ -202,7 +200,5 @@ entry:
   ret double %call
 }
 
-
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
index 599712be401c..2ecd66ddf5d4 100644
--- a/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
+++ b/test/CodeGen/AArch64/arm64-shrink-wrapping.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
-; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=true -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -disable-post-ra | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios"
 
@@ -539,3 +539,94 @@ if.end:
 declare void @abort() #0
 
 attributes #0 = { noreturn nounwind }
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+;
+; CHECK-LABEL: infiniteloop
+; CHECK: ret
+define void @infiniteloop() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: ret
+define void @infiniteloop2() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+  %call = tail call i32 asm "mov $0, #0", "=r,~{x19}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br i1 undef, label %body1, label %body2
+
+body1:
+  tail call void asm sideeffect "nop", "~{x19}"()
+  br label %for.body
+
+body2:
+  tail call void asm sideeffect "nop", "~{x19}"()
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: ret
+define void @infiniteloop3() {
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:                                             ; preds = %entry
+  br i1 undef, label %loop2a, label %end
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  %0 = icmp eq i32* %var, null
+  %next.load = load i32*, i32** undef
+  br i1 %0, label %loop2a, label %loop2b
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+  br label %loop1
+
+loop2b:                                           ; preds = %loop1
+  %gep1 = bitcast i32* %var.phi to i32*
+  %next.ptr = bitcast i32* %gep1 to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop1
+
+end:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/arm64-spill-lr.ll b/test/CodeGen/AArch64/arm64-spill-lr.ll
index 88109088a2ff..2ea5d7810a14 100644
--- a/test/CodeGen/AArch64/arm64-spill-lr.ll
+++ b/test/CodeGen/AArch64/arm64-spill-lr.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -mtriple=arm64-apple-ios < %s
 @bar = common global i32 0, align 4
 
-; Leaf function which uses all callee-saved registers and allocates >= 256 bytes on the stack
-; this will cause processFunctionBeforeCalleeSavedScan() to spill LR as an additional scratch
-; register.
+; Leaf function which uses all callee-saved registers and allocates >= 256 bytes
+; on the stack this will cause determineCalleeSaves() to spill LR as an
+; additional scratch register.
 ;
 ; This is a crash-only regression test for rdar://15124582.
 define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
diff --git a/test/CodeGen/AArch64/arm64-stackmap.ll b/test/CodeGen/AArch64/arm64-stackmap.ll
index 1a4df7a6f2d6..3eb1d2753001 100644
--- a/test/CodeGen/AArch64/arm64-stackmap.ll
+++ b/test/CodeGen/AArch64/arm64-stackmap.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=arm64-apple-darwin                             < %s | FileCheck %s
-; RUN: llc -mtriple=arm64-apple-darwin -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18                             < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-apple-darwin -mattr=+reserve-x18 -fast-isel -fast-isel-abort=1 < %s | FileCheck %s
 ;
 ; Note: Print verbose stackmaps using -debug-only=stackmaps.
 
diff --git a/test/CodeGen/AArch64/arm64-stp.ll b/test/CodeGen/AArch64/arm64-stp.ll
index 72561aac6e87..98242d0bb57e 100644
--- a/test/CodeGen/AArch64/arm64-stp.ll
+++ b/test/CodeGen/AArch64/arm64-stp.ll
@@ -1,8 +1,6 @@
 ; RUN: llc < %s -march=arm64 -aarch64-stp-suppress=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
-; RUN: llc < %s -march=arm64 -aarch64-unscaled-mem-op=true\
-; RUN:   -verify-machineinstrs -mcpu=cyclone | FileCheck -check-prefix=STUR_CHK %s
 
-; CHECK: stp_int
+; CHECK-LABEL: stp_int
 ; CHECK: stp w0, w1, [x2]
 define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
   store i32 %a, i32* %p, align 4
@@ -11,7 +9,7 @@ define void @stp_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
   ret void
 }
 
-; CHECK: stp_long
+; CHECK-LABEL: stp_long
 ; CHECK: stp x0, x1, [x2]
 define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
   store i64 %a, i64* %p, align 8
@@ -20,7 +18,7 @@ define void @stp_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
   ret void
 }
 
-; CHECK: stp_float
+; CHECK-LABEL: stp_float
 ; CHECK: stp s0, s1, [x0]
 define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
   store float %a, float* %p, align 4
@@ -29,7 +27,7 @@ define void @stp_float(float %a, float %b, float* nocapture %p) nounwind {
   ret void
 }
 
-; CHECK: stp_double
+; CHECK-LABEL: stp_double
 ; CHECK: stp d0, d1, [x0]
 define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
   store double %a, double* %p, align 8
@@ -40,9 +38,9 @@ define void @stp_double(double %a, double %b, double* nocapture %p) nounwind {
 
 ; Test the load/store optimizer---combine ldurs into a ldp, if appropriate
 define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
-; STUR_CHK: stur_int
-; STUR_CHK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_int
+; CHECK: stp w{{[0-9]+}}, {{w[0-9]+}}, [x{{[0-9]+}}, #-8]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i32, i32* %p, i32 -1
   store i32 %a, i32* %p1, align 2
   %p2 = getelementptr inbounds i32, i32* %p, i32 -2
@@ -51,9 +49,9 @@ define void @stur_int(i32 %a, i32 %b, i32* nocapture %p) nounwind {
 }
 
 define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
-; STUR_CHK: stur_long
-; STUR_CHK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_long
+; CHECK: stp x{{[0-9]+}}, {{x[0-9]+}}, [x{{[0-9]+}}, #-16]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds i64, i64* %p, i32 -1
   store i64 %a, i64* %p1, align 2
   %p2 = getelementptr inbounds i64, i64* %p, i32 -2
@@ -62,9 +60,9 @@ define void @stur_long(i64 %a, i64 %b, i64* nocapture %p) nounwind {
 }
 
 define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
-; STUR_CHK: stur_float
-; STUR_CHK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_float
+; CHECK: stp s{{[0-9]+}}, {{s[0-9]+}}, [x{{[0-9]+}}, #-8]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds float, float* %p, i32 -1
   store float %a, float* %p1, align 2
   %p2 = getelementptr inbounds float, float* %p, i32 -2
@@ -73,9 +71,9 @@ define void @stur_float(float %a, float %b, float* nocapture %p) nounwind {
 }
 
 define void @stur_double(double %a, double %b, double* nocapture %p) nounwind {
-; STUR_CHK: stur_double
-; STUR_CHK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
-; STUR_CHK-NEXT: ret
+; CHECK-LABEL: stur_double
+; CHECK: stp d{{[0-9]+}}, {{d[0-9]+}}, [x{{[0-9]+}}, #-16]
+; CHECK-NEXT: ret
   %p1 = getelementptr inbounds double, double* %p, i32 -1
   store double %a, double* %p1, align 2
   %p2 = getelementptr inbounds double, double* %p, i32 -2
diff --git a/test/CodeGen/AArch64/arm64-strict-align.ll b/test/CodeGen/AArch64/arm64-strict-align.ll
index 109f4115d801..28c158f7a2eb 100644
--- a/test/CodeGen/AArch64/arm64-strict-align.ll
+++ b/test/CodeGen/AArch64/arm64-strict-align.ll
@@ -1,7 +1,6 @@
 ; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-no-strict-align | FileCheck %s
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align | FileCheck %s --check-prefix=CHECK-STRICT
-; RUN: llc < %s -mtriple=arm64-apple-darwin -aarch64-strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align | FileCheck %s --check-prefix=CHECK-STRICT
+; RUN: llc < %s -mtriple=arm64-apple-darwin -mattr=+strict-align -fast-isel | FileCheck %s --check-prefix=CHECK-STRICT
 
 define i32 @f0(i32* nocapture %p) nounwind {
 ; CHECK-STRICT: ldrh [[HIGH:w[0-9]+]], [x0, #2]
diff --git a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
index f94f88a1183f..c95eca062ff6 100644
--- a/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
+++ b/test/CodeGen/AArch64/arm64-tls-dynamic-together.ll
@@ -1,4 +1,7 @@
-; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \
+; RUN:     -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=NOEMU %s
+; RUN: llc -emulated-tls -O0 -mtriple=arm64-none-linux-gnu -relocation-model=pic \
+; RUN:     -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK -check-prefix=EMU %s
 
 ; If the .tlsdesccall and blr parts are emitted completely separately (even with
 ; glue) then LLVM will separate them quite happily (with a spill at O0, hence
@@ -13,6 +16,40 @@ define i32 @test_generaldynamic() {
   %val = load i32, i32* @general_dynamic_var
   ret i32 %val
 
-; CHECK: .tlsdesccall general_dynamic_var
-; CHECK-NEXT: blr {{x[0-9]+}}
+; NOEMU: .tlsdesccall general_dynamic_var
+; NOEMU-NEXT: blr {{x[0-9]+}}
+; NOEMU-NOT: __emutls_v.general_dynamic_var:
+
+; EMU: adrp{{.+}}__emutls_v.general_dynamic_var
+; EMU: bl __emutls_get_address
+
+; EMU-NOT: __emutls_v.general_dynamic_var
+; EMU-NOT: __emutls_t.general_dynamic_var
 }
+
+@emulated_init_var = thread_local global i32 37, align 8
+
+define i32 @test_emulated_init() {
+; COMMON-LABEL: test_emulated_init:
+
+  %val = load i32, i32* @emulated_init_var
+  ret i32 %val
+
+; EMU: adrp{{.+}}__emutls_v.emulated_init_var
+; EMU: bl __emutls_get_address
+
+; EMU-NOT: __emutls_v.general_dynamic_var:
+
+; EMU:      .align 3
+; EMU-LABEL: __emutls_v.emulated_init_var:
+; EMU-NEXT: .xword 4
+; EMU-NEXT: .xword 8
+; EMU-NEXT: .xword 0
+; EMU-NEXT: .xword __emutls_t.emulated_init_var
+
+; EMU-LABEL: __emutls_t.emulated_init_var:
+; EMU-NEXT: .word 37
+}
+
+; CHECK-NOT: __emutls_v.general_dynamic_var:
+; EMU-NOT: __emutls_t.general_dynamic_var
diff --git a/test/CodeGen/AArch64/arm64-trunc-store.ll b/test/CodeGen/AArch64/arm64-trunc-store.ll
index 7cde629b33ae..be0388284fb8 100644
--- a/test/CodeGen/AArch64/arm64-trunc-store.ll
+++ b/test/CodeGen/AArch64/arm64-trunc-store.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -disable-post-ra | FileCheck %s
 
 define void @bar(<8 x i16> %arg, <8 x i8>* %p) nounwind {
 ; CHECK-LABEL: bar:
diff --git a/test/CodeGen/AArch64/arm64-vabs.ll b/test/CodeGen/AArch64/arm64-vabs.ll
index a52c4ebf13e7..c1800085884c 100644
--- a/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/test/CodeGen/AArch64/arm64-vabs.ll
@@ -134,6 +134,72 @@ define <2 x i64> @uabdl2_2d(<4 x i32>* %A, <4 x i32>* %B) nounwind {
   ret <2 x i64> %tmp4
 }
 
+define i16 @uabdl8h_log2_shuffle(<16 x i8>* %a, <16 x i8>* %b) {
+; CHECK-LABEL: uabdl8h_log2_shuffle
+; CHECK: uabdl2.8h
+; CHECK: uabdl.8h
+  %aload = load <16 x i8>, <16 x i8>* %a, align 1
+  %bload = load <16 x i8>, <16 x i8>* %b, align 1
+  %aext = zext <16 x i8> %aload to <16 x i16>
+  %bext = zext <16 x i8> %bload to <16 x i16>
+  %abdiff = sub nsw <16 x i16> %aext, %bext
+  %abcmp = icmp slt <16 x i16> %abdiff, zeroinitializer
+  %ababs = sub nsw <16 x i16> zeroinitializer, %abdiff
+  %absel = select <16 x i1> %abcmp, <16 x i16> %ababs, <16 x i16> %abdiff
+  %rdx.shuf = shufflevector <16 x i16> %absel, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin1.rdx = add <16 x i16> %absel, %rdx.shuf
+  %rdx.shufx = shufflevector <16 x i16> %bin1.rdx, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <16 x i16> %bin1.rdx, %rdx.shufx
+  %rdx.shuf136 = shufflevector <16 x i16> %bin.rdx, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx137 = add <16 x i16> %bin.rdx, %rdx.shuf136
+  %rdx.shuf138 = shufflevector <16 x i16> %bin.rdx137, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx139 = add <16 x i16> %bin.rdx137, %rdx.shuf138
+  %reduced_v = extractelement <16 x i16> %bin.rdx139, i16 0
+  ret i16 %reduced_v
+}
+
+define i32 @uabdl4s_log2_shuffle(<8 x i16>* %a, <8 x i16>* %b) {
+; CHECK-LABEL: uabdl4s_log2_shuffle
+; CHECK: uabdl2.4s
+; CHECK: uabdl.4s
+  %aload = load <8 x i16>, <8 x i16>* %a, align 1
+  %bload = load <8 x i16>, <8 x i16>* %b, align 1
+  %aext = zext <8 x i16> %aload to <8 x i32>
+  %bext = zext <8 x i16> %bload to <8 x i32>
+  %abdiff = sub nsw <8 x i32> %aext, %bext
+  %abcmp = icmp slt <8 x i32> %abdiff, zeroinitializer
+  %ababs = sub nsw <8 x i32> zeroinitializer, %abdiff
+  %absel = select <8 x i1> %abcmp, <8 x i32> %ababs, <8 x i32> %abdiff
+  %rdx.shuf = shufflevector <8 x i32> %absel, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <8 x i32> %absel, %rdx.shuf
+  %rdx.shuf136 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx137 = add <8 x i32> %bin.rdx, %rdx.shuf136
+  %rdx.shuf138 = shufflevector <8 x i32> %bin.rdx137, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx139 = add <8 x i32> %bin.rdx137, %rdx.shuf138
+  %reduced_v = extractelement <8 x i32> %bin.rdx139, i32 0
+  ret i32 %reduced_v
+}
+
+define i64 @uabdl2d_log2_shuffle(<4 x i32>* %a, <4 x i32>* %b, i32 %h) {
+; CHECK: uabdl2d_log2_shuffle
+; CHECK: uabdl2.2d
+; CHECK: uabdl.2d
+  %aload = load <4 x i32>, <4 x i32>* %a, align 1
+  %bload = load <4 x i32>, <4 x i32>* %b, align 1
+  %aext = zext <4 x i32> %aload to <4 x i64>
+  %bext = zext <4 x i32> %bload to <4 x i64>
+  %abdiff = sub nsw <4 x i64> %aext, %bext
+  %abcmp = icmp slt <4 x i64> %abdiff, zeroinitializer
+  %ababs = sub nsw <4 x i64> zeroinitializer, %abdiff
+  %absel = select <4 x i1> %abcmp, <4 x i64> %ababs, <4 x i64> %abdiff
+  %rdx.shuf136 = shufflevector <4 x i64> %absel, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %bin.rdx137 = add <4 x i64> %absel, %rdx.shuf136
+  %rdx.shuf138 = shufflevector <4 x i64> %bin.rdx137, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %bin.rdx139 = add <4 x i64> %bin.rdx137, %rdx.shuf138
+  %reduced_v = extractelement <4 x i64> %bin.rdx139, i16 0
+  ret i64 %reduced_v
+}
+
 define <2 x float> @fabd_2s(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK-LABEL: fabd_2s:
 ;CHECK: fabd.2s
diff --git a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
index 44f2af1c5e79..8702b41023d0 100644
--- a/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
+++ b/test/CodeGen/AArch64/arm64-variadic-aapcs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=arm64-linux-gnu -pre-RA-sched=linearize -enable-misched=false -disable-post-ra < %s | FileCheck %s
 
 %va_list = type {i8*, i8*, i8*, i32, i32}
 
diff --git a/test/CodeGen/AArch64/arm64-vector-ext.ll b/test/CodeGen/AArch64/arm64-vector-ext.ll
index 5bee1611e6c6..994a9956cf7f 100644
--- a/test/CodeGen/AArch64/arm64-vector-ext.ll
+++ b/test/CodeGen/AArch64/arm64-vector-ext.ll
@@ -1,27 +1,27 @@
-; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
-
-;CHECK: @func30
-;CHECK: ushll.4s  v0, v0, #0
-;CHECK: movi.4s v1, #0x1
-;CHECK: and.16b v0, v0, v1
-;CHECK: str  q0, [x0]
-;CHECK: ret
-
-%T0_30 = type <4 x i1>
-%T1_30 = type <4 x i32>
-define void @func30(%T0_30 %v0, %T1_30* %p1) {
-  %r = zext %T0_30 %v0 to %T1_30
-  store %T1_30 %r, %T1_30* %p1
-  ret void
-}
-
-; Extend from v1i1 was crashing things (PR20791). Make sure we do something
-; sensible instead.
-define <1 x i32> @autogen_SD7918() {
-; CHECK-LABEL: autogen_SD7918
-; CHECK: movi d0, #0000000000000000
-; CHECK-NEXT: ret
-  %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
-  %ZE = zext <1 x i1> %I29 to <1 x i32>
-  ret <1 x i32> %ZE
-}
+; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple | FileCheck %s
+
+;CHECK: @func30
+;CHECK: movi.4h v1, #0x1
+;CHECK: and.8b v0, v0, v1
+;CHECK: ushll.4s  v0, v0, #0
+;CHECK: str  q0, [x0]
+;CHECK: ret
+
+%T0_30 = type <4 x i1>
+%T1_30 = type <4 x i32>
+define void @func30(%T0_30 %v0, %T1_30* %p1) {
+  %r = zext %T0_30 %v0 to %T1_30
+  store %T1_30 %r, %T1_30* %p1
+  ret void
+}
+
+; Extend from v1i1 was crashing things (PR20791). Make sure we do something
+; sensible instead.
+define <1 x i32> @autogen_SD7918() {
+; CHECK-LABEL: autogen_SD7918
+; CHECK: movi d0, #0000000000000000
+; CHECK-NEXT: ret
+  %I29 = insertelement <1 x i1> zeroinitializer, i1 false, i32 0
+  %ZE = zext <1 x i1> %I29 to <1 x i32>
+  ret <1 x i32> %ZE
+}
diff --git a/test/CodeGen/AArch64/arm64-vminmaxnm.ll b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
index b5aca45cd479..302ba9d681c6 100644
--- a/test/CodeGen/AArch64/arm64-vminmaxnm.ll
+++ b/test/CodeGen/AArch64/arm64-vminmaxnm.ll
@@ -42,13 +42,28 @@ define <2 x double> @f6(<2 x double> %a, <2 x double> %b) nounwind readnone ssp
   ret <2 x double> %vminnm2.i
 }
 
+define float @f7(float %a, float %b) nounwind readnone ssp {
+; CHECK: fmaxnm	s0, s0, s1
+; CHECK: ret
+  %vmaxnm2.i = tail call float @llvm.aarch64.neon.fmaxnm.f32(float %a, float %b) nounwind
+  ret float %vmaxnm2.i
+}
+
+define double @f8(double %a, double %b) nounwind readnone ssp {
+; CHECK: fminnm	d0, d0, d1
+; CHECK: ret
+  %vmaxnm2.i = tail call double @llvm.aarch64.neon.fminnm.f64(double %a, double %b) nounwind
+  ret double %vmaxnm2.i
+}
+
 declare <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
 declare <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
 declare <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
 declare <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double>, <2 x double>) nounwind readnone
 declare <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
 declare <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
-
+declare float @llvm.aarch64.neon.fmaxnm.f32(float, float) nounwind readnone
+declare double @llvm.aarch64.neon.fminnm.f64(double, double) nounwind readnone
 
 define double @test_fmaxnmv(<2 x double> %in) {
 ; CHECK-LABEL: test_fmaxnmv:
diff --git a/test/CodeGen/AArch64/arm64-xaluo.ll b/test/CodeGen/AArch64/arm64-xaluo.ll
index ce9c0a64b587..ec49110d4052 100644
--- a/test/CodeGen/AArch64/arm64-xaluo.ll
+++ b/test/CodeGen/AArch64/arm64-xaluo.ll
@@ -1,5 +1,5 @@
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0                             -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=arm64 -aarch64-atomic-cfg-tidy=0 -fast-isel -fast-isel-abort=1 -disable-post-ra -verify-machineinstrs < %s | FileCheck %s
 
 ;
 ; Get the actual value of the overflow bit.
diff --git a/test/CodeGen/AArch64/atomic-ops.ll b/test/CodeGen/AArch64/atomic-ops.ll
index cb90caeadc1f..900d2072925f 100644
--- a/test/CodeGen/AArch64/atomic-ops.ll
+++ b/test/CodeGen/AArch64/atomic-ops.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
-; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-REG
 
 
 ; Point of CHECK-REG is to make sure UNPREDICTABLE instructions aren't created
@@ -893,6 +893,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
 ; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
 ; CHECK-NOT: dmb
 
 ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -916,6 +918,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
 ; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
 ; CHECK-NOT: dmb
 
 ; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
@@ -927,21 +931,21 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
    %pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
    %old = extractvalue { i32, i1 } %pair, 0
 
+; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0
+
 ; CHECK-NOT: dmb
 ; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
 ; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
 
 ; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
-  ; w0 below is a reasonable guess but could change: it certainly comes into the
-  ;  function there.
-; CHECK-NEXT: cmp w[[OLD]], w0
+; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]]
 ; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
 ; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
 ; CHECK-NOT: dmb
-
-; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
    ret i32 %old
 }
 
@@ -963,6 +967,8 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
   ; As above, w1 is a reasonable guess.
 ; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
 ; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
+; CHECK: [[GET_OUT]]:
+; CHECK: clrex
 ; CHECK-NOT: dmb
 
 ; CHECK: str x[[OLD]],
diff --git a/test/CodeGen/AArch64/bitcast-v2i8.ll b/test/CodeGen/AArch64/bitcast-v2i8.ll
index 4bdac641c5bc..aff3ffc70a71 100644
--- a/test/CodeGen/AArch64/bitcast-v2i8.ll
+++ b/test/CodeGen/AArch64/bitcast-v2i8.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-apple-ios -disable-post-ra | FileCheck %s
 
 ; Part of PR21549: going through the stack isn't ideal but is correct.
 
diff --git a/test/CodeGen/AArch64/bitfield-insert.ll b/test/CodeGen/AArch64/bitfield-insert.ll
index 9b731fa72a47..509b547a5c82 100644
--- a/test/CodeGen/AArch64/bitfield-insert.ll
+++ b/test/CodeGen/AArch64/bitfield-insert.ll
@@ -196,3 +196,44 @@ define void @test_32bit_with_shr(i32* %existing, i32* %new) {
 
   ret void
 }
+
+; Bitfield insert where the second or operand is a better match to be folded into the BFM
+define void @test_32bit_opnd1_better(i32* %existing, i32* %new) {
+; CHECK-LABEL: test_32bit_opnd1_better:
+
+  %oldval = load volatile i32, i32* %existing
+  %oldval_keep = and i32 %oldval, 65535 ; 0x0000ffff
+
+  %newval = load i32, i32* %new
+  %newval_shifted = shl i32 %newval, 16
+  %newval_masked = and i32 %newval_shifted, 16711680 ; 0x00ff0000
+
+  %combined = or i32 %oldval_keep, %newval_masked
+  store volatile i32 %combined, i32* %existing
+; CHECK: and [[BIT:w[0-9]+]], {{w[0-9]+}}, #0xffff
+; CHECK: bfi [[BIT]], {{w[0-9]+}}, #16, #8
+
+  ret void
+}
+
+; Tests when all the bits from one operand are not useful
+define i32 @test_nouseful_bits(i8 %a, i32 %b) {
+; CHECK-LABEL: test_nouseful_bits:
+; CHECK: bfi
+; CHECK: bfi
+; CHECK: bfi
+; CHECK-NOT: bfi
+; CHECK-NOT: or
+; CHECK: lsl
+  %conv = zext i8 %a to i32     ;   0  0  0  A
+  %shl = shl i32 %b, 8          ;   B2 B1 B0 0
+  %or = or i32 %conv, %shl      ;   B2 B1 B0 A
+  %shl.1 = shl i32 %or, 8       ;   B1 B0 A 0
+  %or.1 = or i32 %conv, %shl.1  ;   B1 B0 A A
+  %shl.2 = shl i32 %or.1, 8     ;   B0 A A 0
+  %or.2 = or i32 %conv, %shl.2  ;   B0 A A A
+  %shl.3 = shl i32 %or.2, 8     ;   A A A 0
+  %or.3 = or i32 %conv, %shl.3  ;   A A A A
+  %shl.4 = shl i32 %or.3, 8     ;   A A A 0
+  ret i32 %shl.4
+}
diff --git a/test/CodeGen/AArch64/bitfield.ll b/test/CodeGen/AArch64/bitfield.ll
index 78399c80b5de..5f19b6943b8e 100644
--- a/test/CodeGen/AArch64/bitfield.ll
+++ b/test/CodeGen/AArch64/bitfield.ll
@@ -3,13 +3,25 @@
 @var32 = global i32 0
 @var64 = global i64 0
 
-define void @test_extendb(i8 %var) {
-; CHECK-LABEL: test_extendb:
+define void @test_extendb32(i8 %var) {
+; CHECK-LABEL: test_extendb32:
 
   %sxt32 = sext i8 %var to i32
   store volatile i32 %sxt32, i32* @var32
 ; CHECK: sxtb {{w[0-9]+}}, {{w[0-9]+}}
 
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i8 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
+  ret void
+}
+
+define void @test_extendb64(i8 %var) {
+; CHECK-LABEL: test_extendb64:
+
   %sxt64 = sext i8 %var to i64
   store volatile i64 %sxt64, i64* @var64
 ; CHECK: sxtb {{x[0-9]+}}, {{w[0-9]+}}
@@ -17,23 +29,31 @@ define void @test_extendb(i8 %var) {
 ; N.b. this doesn't actually produce a bitfield instruction at the
 ; moment, but it's still a good test to have and the semantics are
 ; correct.
-  %uxt32 = zext i8 %var to i32
-  store volatile i32 %uxt32, i32* @var32
-; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
-
   %uxt64 = zext i8 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xff
   ret void
 }
 
-define void @test_extendh(i16 %var) {
-; CHECK-LABEL: test_extendh:
+define void @test_extendh32(i16 %var) {
+; CHECK-LABEL: test_extendh32:
 
   %sxt32 = sext i16 %var to i32
   store volatile i32 %sxt32, i32* @var32
 ; CHECK: sxth {{w[0-9]+}}, {{w[0-9]+}}
 
+; N.b. this doesn't actually produce a bitfield instruction at the
+; moment, but it's still a good test to have and the semantics are
+; correct.
+  %uxt32 = zext i16 %var to i32
+  store volatile i32 %uxt32, i32* @var32
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+  ret void
+}
+
+define void @test_extendh64(i16 %var) {
+; CHECK-LABEL: test_extendh64:
+
   %sxt64 = sext i16 %var to i64
   store volatile i64 %sxt64, i64* @var64
 ; CHECK: sxth {{x[0-9]+}}, {{w[0-9]+}}
@@ -41,13 +61,9 @@ define void @test_extendh(i16 %var) {
 ; N.b. this doesn't actually produce a bitfield instruction at the
 ; moment, but it's still a good test to have and the semantics are
 ; correct.
-  %uxt32 = zext i16 %var to i32
-  store volatile i32 %uxt32, i32* @var32
-; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
-
   %uxt64 = zext i16 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: and {{x[0-9]+}}, {{x[0-9]+}}, #0xffff
+; CHECK: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
   ret void
 }
 
@@ -60,7 +76,7 @@ define void @test_extendw(i32 %var) {
 
   %uxt64 = zext i32 %var to i64
   store volatile i64 %uxt64, i64* @var64
-; CHECK: ubfx {{x[0-9]+}}, {{x[0-9]+}}, #0, #32
+; CHECK: mov {{w[0-9]+}}, w0
   ret void
 }
 
diff --git a/test/CodeGen/AArch64/bitreverse.ll b/test/CodeGen/AArch64/bitreverse.ll
new file mode 100644
index 000000000000..936e3554b397
--- /dev/null
+++ b/test/CodeGen/AArch64/bitreverse.ll
@@ -0,0 +1,87 @@
+; RUN: llc -mtriple=aarch64-eabi %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: ushr
+  %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+  ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+; Unfortunately some of the shift-and-inserts become BFIs, and some do not :(
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK-DAG: lsr [[S5:w.*]], w0, #5
+; CHECK-DAG: lsr [[S4:w.*]], w0, #4
+; CHECK-DAG: lsr [[S3:w.*]], w0, #3
+; CHECK-DAG: lsr [[S2:w.*]], w0, #2
+; CHECK-DAG: lsl [[L1:w.*]], w0, #29
+; CHECK-DAG: lsl [[L2:w.*]], w0, #19
+; CHECK-DAG: lsl [[L3:w.*]], w0, #17
+
+; CHECK-DAG: and [[T1:w.*]], [[L1]], #0x40000000
+; CHECK-DAG: bfi [[T1]], w0, #31, #1
+; CHECK-DAG: bfi [[T1]], [[S2]], #29, #1
+; CHECK-DAG: bfi [[T1]], [[S3]], #28, #1
+; CHECK-DAG: bfi [[T1]], [[S4]], #27, #1
+; CHECK-DAG: bfi [[T1]], [[S5]], #26, #1
+; CHECK-DAG: and [[T2:w.*]], [[L2]], #0x2000000
+; CHECK-DAG: and [[T3:w.*]], [[L3]], #0x1000000
+; CHECK-DAG: orr [[T4:w.*]], [[T1]], [[T2]]
+; CHECK-DAG: orr [[T5:w.*]], [[T4]], [[T3]]
+; CHECK:     lsr w0, [[T5]], #24
+
+  %b = call i8 @llvm.bitreverse.i8(i8 %a)
+  ret i8 %b
+}
+
+declare <8 x i8> @llvm.bitreverse.v8i8(<8 x i8>) readnone
+
+define <8 x i8> @g_vec(<8 x i8> %a) {
+; Try and match as much of the sequence as precisely as possible.
+
+; CHECK-LABEL: g_vec:
+; CHECK-DAG: movi [[M1:v.*]], #0x80
+; CHECK-DAG: movi [[M2:v.*]], #0x40
+; CHECK-DAG: movi [[M3:v.*]], #0x20
+; CHECK-DAG: movi [[M4:v.*]], #0x10
+; CHECK-DAG: movi [[M5:v.*]], #0x8
+; CHECK-DAG: movi [[M6:v.*]], #0x4{{$}}
+; CHECK-DAG: movi [[M7:v.*]], #0x2{{$}}
+; CHECK-DAG: movi [[M8:v.*]], #0x1{{$}}
+; CHECK-DAG: shl  [[S1:v.*]], v0.8b, #7
+; CHECK-DAG: shl  [[S2:v.*]], v0.8b, #5
+; CHECK-DAG: shl  [[S3:v.*]], v0.8b, #3
+; CHECK-DAG: shl  [[S4:v.*]], v0.8b, #1
+; CHECK-DAG: ushr [[S5:v.*]], v0.8b, #1
+; CHECK-DAG: ushr [[S6:v.*]], v0.8b, #3
+; CHECK-DAG: ushr [[S7:v.*]], v0.8b, #5
+; CHECK-DAG: ushr [[S8:v.*]], v0.8b, #7
+; CHECK-DAG: and  [[A1:v.*]], [[S1]], [[M1]]
+; CHECK-DAG: and  [[A2:v.*]], [[S2]], [[M2]]
+; CHECK-DAG: and  [[A3:v.*]], [[S3]], [[M3]]
+; CHECK-DAG: and  [[A4:v.*]], [[S4]], [[M4]]
+; CHECK-DAG: and  [[A5:v.*]], [[S5]], [[M5]]
+; CHECK-DAG: and  [[A6:v.*]], [[S6]], [[M6]]
+; CHECK-DAG: and  [[A7:v.*]], [[S7]], [[M7]]
+; CHECK-DAG: and  [[A8:v.*]], [[S8]], [[M8]]
+
+; The rest can be ORRed together in any order; it's not worth the test
+; maintenance to match them precisely.
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK-DAG: orr
+; CHECK: ret
+  %b = call <8 x i8> @llvm.bitreverse.v8i8(<8 x i8> %a)
+  ret <8 x i8> %b
+}
diff --git a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
index c78fabac6187..004267f4e4e0 100644
--- a/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
+++ b/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
@@ -403,6 +403,32 @@ return:                                           ; preds = %land.lhs.true, %con
   ret i32 %retval.0
 }
 
+define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: cmp_shifted:
+; CHECK: cmp w0, #1
+; [...]
+; CHECK: cmp w0, #2, lsl #12
+
+  %tst_low = icmp sgt i32 %in, 0
+  br i1 %tst_low, label %true, label %false
+
+true:
+  call i32 @zoo(i32 128)
+  ret void
+
+false:
+  %tst = icmp sgt i32 %in, 8191
+  br i1 %tst, label %truer, label %falser
+
+truer:
+  call i32 @zoo(i32 42)
+  ret void
+
+falser:
+  call i32 @zoo(i32 1)
+  ret void
+}
+
 declare i32 @zoo(i32)
 
 declare double @yoo(i32)
diff --git a/test/CodeGen/AArch64/cpus.ll b/test/CodeGen/AArch64/cpus.ll
index 1266842fcc6d..a8399f92ebe4 100644
--- a/test/CodeGen/AArch64/cpus.ll
+++ b/test/CodeGen/AArch64/cpus.ll
@@ -2,6 +2,7 @@
 
 
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a57 2>&1 | FileCheck %s
 ; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a72 2>&1 | FileCheck %s
diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll
new file mode 100644
index 000000000000..a9ae00c8d270
--- /dev/null
+++ b/test/CodeGen/AArch64/cxx-tlscc.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
+; Shrink wrapping currently does not kick in because we have a TLS CALL
+; in the entry block and it will clobber the link register.
+
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
+declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
+  %.b.i = load i1, i1* @__tls_guard, align 1
+  br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+  store i1 true, i1* @__tls_guard, align 1
+  %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg)
+  %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle)
+  br label %__tls_init.exit
+
+__tls_init.exit:
+  ret %struct.S* @sg
+}
+
+; CHECK-LABEL: _ZTW2sg
+; CHECK-NOT: stp d31, d30
+; CHECK-NOT: stp d29, d28
+; CHECK-NOT: stp d27, d26
+; CHECK-NOT: stp d25, d24
+; CHECK-NOT: stp d23, d22
+; CHECK-NOT: stp d21, d20
+; CHECK-NOT: stp d19, d18
+; CHECK-NOT: stp d17, d16
+; CHECK-NOT: stp d7, d6
+; CHECK-NOT: stp d5, d4
+; CHECK-NOT: stp d3, d2
+; CHECK-NOT: stp d1, d0
+; CHECK-NOT: stp x20, x19
+; CHECK-NOT: stp x14, x13
+; CHECK-NOT: stp x12, x11
+; CHECK-NOT: stp x10, x9
+; CHECK-NOT: stp x8, x7
+; CHECK-NOT: stp x6, x5
+; CHECK-NOT: stp x4, x3
+; CHECK-NOT: stp x2, x1
+; CHECK: blr
+; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]]
+; CHECK: blr
+; CHECK: tlv_atexit
+; CHECK: [[BB_end]]:
+; CHECK: blr
+; CHECK-NOT: ldp x2, x1
+; CHECK-NOT: ldp x4, x3
+; CHECK-NOT: ldp x6, x5
+; CHECK-NOT: ldp x8, x7
+; CHECK-NOT: ldp x10, x9
+; CHECK-NOT: ldp x12, x11
+; CHECK-NOT: ldp x14, x13
+; CHECK-NOT: ldp x20, x19
+; CHECK-NOT: ldp d1, d0
+; CHECK-NOT: ldp d3, d2
+; CHECK-NOT: ldp d5, d4
+; CHECK-NOT: ldp d7, d6
+; CHECK-NOT: ldp d17, d16
+; CHECK-NOT: ldp d19, d18
+; CHECK-NOT: ldp d21, d20
+; CHECK-NOT: ldp d23, d22
+; CHECK-NOT: ldp d25, d24
+; CHECK-NOT: ldp d27, d26
+; CHECK-NOT: ldp d29, d28
+; CHECK-NOT: ldp d31, d30
diff --git a/test/CodeGen/AArch64/dag-combine-select.ll b/test/CodeGen/AArch64/dag-combine-select.ll
new file mode 100644
index 000000000000..45b998d9136d
--- /dev/null
+++ b/test/CodeGen/AArch64/dag-combine-select.ll
@@ -0,0 +1,47 @@
+; RUN: llc -disable-post-ra -o - %s | FileCheck %s
+target triple = "arm64--"
+
+@out = internal global i32 0, align 4
+
+; Ensure that we transform select(C0, x, select(C1, x, y)) towards
+; select(C0 | C1, x, y) so we can use CMP;CCMP for the implementation.
+; CHECK-LABEL: test0:
+; CHECK: cmp w0, #7
+; CHECK: ccmp w1, #0, #0, ne
+; CHECK: csel w0, w1, w2, gt
+; CHECK: ret
+define i32 @test0(i32 %v0, i32 %v1, i32 %v2) {
+  %cmp1 = icmp eq i32 %v0, 7
+  %cmp2 = icmp sgt i32 %v1, 0
+  %sel0 = select i1 %cmp1, i32 %v1, i32 %v2
+  %sel1 = select i1 %cmp2, i32 %v1, i32 %sel0
+  ret i32 %sel1
+}
+
+; Usually we keep select(C0 | C1, x, y) as is on aarch64 to create CMP;CCMP
+; sequences. This case should be transformed to select(C0, select(C1, x, y), y)
+; anyway to get CSE effects.
+; CHECK-LABEL: test1:
+; CHECK-NOT: ccmp
+; CHECK: cmp w0, #7
+; CHECK: adrp x[[OUTNUM:[0-9]+]], out
+; CHECK: csel w[[SEL0NUM:[0-9]+]], w1, w2, eq
+; CHECK: cmp w[[SEL0NUM]], #13
+; CHECK: csel w[[SEL1NUM:[0-9]+]], w1, w2, lo
+; CHECK: cmp w0, #42
+; CHECK: csel w[[SEL2NUM:[0-9]+]], w1, w[[SEL1NUM]], eq
+; CHECK: str w[[SEL1NUM]], [x[[OUTNUM]], :lo12:out]
+; CHECK: str w[[SEL2NUM]], [x[[OUTNUM]], :lo12:out]
+; CHECK: ret
+define void @test1(i32 %bitset, i32 %val0, i32 %val1) {
+  %cmp1 = icmp eq i32 %bitset, 7
+  %cond = select i1 %cmp1, i32 %val0, i32 %val1
+  %cmp5 = icmp ult i32 %cond, 13
+  %cond11 = select i1 %cmp5, i32 %val0, i32 %val1
+  %cmp3 = icmp eq i32 %bitset, 42
+  %or.cond = or i1 %cmp3, %cmp5
+  %cond17 = select i1 %or.cond, i32 %val0, i32 %val1
+  store volatile i32 %cond11, i32* @out, align 4
+  store volatile i32 %cond17, i32* @out, align 4
+  ret void
+}
diff --git a/test/CodeGen/AArch64/divrem.ll b/test/CodeGen/AArch64/divrem.ll
new file mode 100644
index 000000000000..9f648eb63eac
--- /dev/null
+++ b/test/CodeGen/AArch64/divrem.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s
+
+; SDIVREM/UDIVREM DAG nodes are generated but expanded when lowering and
+; should not generate select error.
+define <2 x i32> @test_udivrem(<2 x i32> %x, < 2 x i32> %y, < 2 x i32>* %z) {
+; CHECK-LABEL: test_udivrem
+; CHECK-DAG: udivrem
+; CHECK-NOT: LLVM ERROR: Cannot select
+  %div = udiv <2 x i32> %x, %y
+  store <2 x i32> %div, <2 x i32>* %z
+  %1 = urem <2 x i32> %x, %y
+  ret <2 x i32> %1
+}
+
+define <4 x i32> @test_sdivrem(<4 x i32> %x,  <4 x i32>* %y) {
+; CHECK-LABEL: test_sdivrem
+; CHECK-DAG: sdivrem
+  %div = sdiv <4 x i32> %x,  < i32 20, i32 20, i32 20, i32 20 >
+  store <4 x i32> %div, <4 x i32>* %y
+  %1 = srem <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 >
+  ret <4 x i32> %1
+}
diff --git a/test/CodeGen/AArch64/emutls.ll b/test/CodeGen/AArch64/emutls.ll
new file mode 100644
index 000000000000..ac5762edba98
--- /dev/null
+++ b/test/CodeGen/AArch64/emutls.ll
@@ -0,0 +1,116 @@
+; RUN: llc -emulated-tls -mtriple=aarch64-linux-android \
+; RUN:     -relocation-model=pic < %s | FileCheck -check-prefix=ARM64 %s
+
+; Copied from X86/emutls.ll
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; ARM64-LABEL: my_get_xyz:
+; ARM64:        adrp x0, :got:my_emutls_v_xyz
+; ARM64-NEXT:   ldr x0, [x0, :got_lo12:my_emutls_v_xyz]
+; ARM64-NEXT:   bl my_emutls_get_address
+; ARM64-NEXT:   ldr  w0, [x0]
+; ARM64-NEXT:   ldp x29, x30, [sp]
+
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; ARM64-LABEL: f1:
+; ARM64:        adrp x0, :got:__emutls_v.i1
+; ARM64-NEXT:   ldr x0, [x0, :got_lo12:__emutls_v.i1]
+; ARM64-NEXT:   bl __emutls_get_address
+; ARM64-NEXT:   ldr  w0, [x0]
+; ARM64-NEXT:   ldp x29, x30, [sp]
+
+entry:
+  %tmp1 = load i32, i32* @i1
+  ret i32 %tmp1
+}
+
+define i32* @f2() {
+; ARM64-LABEL: f2:
+; ARM64:        adrp x0, :got:__emutls_v.i1
+; ARM64-NEXT:   ldr x0, [x0, :got_lo12:__emutls_v.i1]
+; ARM64-NEXT:   bl __emutls_get_address
+; ARM64-NEXT:   ldp x29, x30, [sp]
+
+entry:
+  ret i32* @i1
+}
+
+;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.
+
+; ARM64       .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i1:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i1
+
+; ARM64       .section .rodata,
+; ARM64-LABEL: __emutls_t.i1:
+; ARM64-NEXT: .word 15
+
+; ARM64-NOT:   __emutls_v.i2
+
+; ARM64       .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i3:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i3
+
+; ARM64       .section .rodata,
+; ARM64-LABEL: __emutls_t.i3:
+; ARM64-NEXT: .word 15
+
+; ARM64       .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.i4:
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 4
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.i4
+
+; ARM64       .section .rodata,
+; ARM64-LABEL: __emutls_t.i4:
+; ARM64-NEXT: .word 15
+
+; ARM64-NOT:   __emutls_v.i5:
+; ARM64       .hidden __emutls_v.i5
+; ARM64-NOT:   __emutls_v.i5:
+
+; ARM64       .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.s1:
+; ARM64-NEXT: .xword 2
+; ARM64-NEXT: .xword 2
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword __emutls_t.s1
+
+; ARM64       .section .rodata,
+; ARM64-LABEL: __emutls_t.s1:
+; ARM64-NEXT: .hword 15
+
+; ARM64       .section .data.rel.local,
+; ARM64-LABEL: __emutls_v.b1:
+; ARM64-NEXT: .xword 1
+; ARM64-NEXT: .xword 1
+; ARM64-NEXT: .xword 0
+; ARM64-NEXT: .xword 0
+
+; ARM64-NOT:  __emutls_t.b1
diff --git a/test/CodeGen/AArch64/emutls_generic.ll b/test/CodeGen/AArch64/emutls_generic.ll
new file mode 100644
index 000000000000..7664db3df8d2
--- /dev/null
+++ b/test/CodeGen/AArch64/emutls_generic.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=ARM_64 %s
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -relocation-model=pic -O3 \
+; RUN:     | FileCheck -check-prefix=ARM_64 %s
+; RUN: llc < %s -emulated-tls -mtriple=aarch64-linux-android -O3 \
+; RUN:     | FileCheck -check-prefix=ARM_64 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+  ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+  ret i64* @internal_y
+}
+
+; ARM_64-LABEL:  get_external_x:
+; ARM_64:      __emutls_v.external_x
+; ARM_64:      __emutls_get_address
+; ARM_64-LABEL:  get_external_y:
+; ARM_64:      __emutls_v.external_y
+; ARM_64:      __emutls_get_address
+; ARM_64-LABEL:  get_internal_y:
+; ARM_64:      __emutls_v.internal_y
+; ARM_64:      __emutls_get_address
+; ARM_64-NOT:   __emutls_t.external_x
+; ARM_64-NOT:   __emutls_v.external_x:
+; ARM_64:        .align 3
+; ARM_64-LABEL:  __emutls_v.external_y:
+; ARM_64-NEXT:   .xword 1
+; ARM_64-NEXT:   .xword 2
+; ARM_64-NEXT:   .xword 0
+; ARM_64-NEXT:   .xword __emutls_t.external_y
+; ARM_64-NOT:    __emutls_v.external_x:
+; ARM_64:        .section .rodata,
+; ARM_64-LABEL:  __emutls_t.external_y:
+; ARM_64-NEXT:   .byte 7
+; ARM_64:        .data
+; ARM_64:        .align 3
+; ARM_64-LABEL:  __emutls_v.internal_y:
+; ARM_64-NEXT:   .xword 8
+; ARM_64-NEXT:   .xword 16
+; ARM_64-NEXT:   .xword 0
+; ARM_64-NEXT:   .xword __emutls_t.internal_y
+; ARM_64:        .section .rodata,
+; ARM_64-LABEL:  __emutls_t.internal_y:
+; ARM_64-NEXT:   .xword 9
diff --git a/test/CodeGen/AArch64/eon.ll b/test/CodeGen/AArch64/eon.ll
new file mode 100644
index 000000000000..ea61ce34c050
--- /dev/null
+++ b/test/CodeGen/AArch64/eon.ll
@@ -0,0 +1,29 @@
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
+
+; Check that the eon instruction is generated instead of eor,movn
+define i64 @test1(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test1:
+; CHECK: eon
+; CHECK: ret
+entry:
+  %shl = shl i64 %b, 4
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %shl, %neg
+  ret i64 %xor
+}
+
+; Same check with mutliple uses of %neg
+define i64 @test2(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: test2:
+; CHECK: eon
+; CHECK: eon
+; CHECK: lsl
+; CHECK: ret
+entry:
+  %shl = shl i64 %b, 4
+  %neg = xor i64 %shl, -1
+  %xor = xor i64 %neg, %a
+  %xor1 = xor i64 %c, %neg
+  %shl2 = shl i64 %xor, %xor1
+  ret i64 %shl2
+}
diff --git a/test/CodeGen/AArch64/f16-instructions.ll b/test/CodeGen/AArch64/f16-instructions.ll
index be5e2e51385d..e8ecb13b3564 100644
--- a/test/CodeGen/AArch64/f16-instructions.ll
+++ b/test/CodeGen/AArch64/f16-instructions.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 
@@ -143,6 +143,33 @@ define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
   ret half %r
 }
 
+; CHECK-LABEL: test_select_cc_f32_f16:
+; CHECK-DAG:   fcvt s2, h2
+; CHECK-DAG:   fcvt s3, h3
+; CHECK-NEXT:  fcmp s2, s3
+; CHECK-NEXT:  fcsel s0, s0, s1, ne
+; CHECK-NEXT:  ret
+define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
+  %cc = fcmp une half %c, %d
+  %r = select i1 %cc, float %a, float %b
+  ret float %r
+}
+
+; CHECK-LABEL: test_select_cc_f16_f32:
+; CHECK-DAG:  fcvt s0, h0
+; CHECK-DAG:  fcvt s1, h1
+; CHECK-DAG:  fcmp s2, s3
+; CHECK-DAG:  cset w8, ne
+; CHECK-NEXT: cmp w8, #0
+; CHECK-NEXT: fcsel s0, s0, s1, ne
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
+  %cc = fcmp une float %c, %d
+  %r = select i1 %cc, half %a, half %b
+  ret half %r
+}
+
 ; CHECK-LABEL: test_fcmp_une:
 ; CHECK-NEXT: fcvt s1, h1
 ; CHECK-NEXT: fcvt s0, h0
@@ -644,13 +671,10 @@ define half @test_fabs(half %a) #0 {
 }
 
 ; CHECK-LABEL: test_minnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov  x29, sp
-; CHECK-NEXT: fcvt s0, h0
 ; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fminf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fminnm s0, s0, s1
 ; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 define half @test_minnum(half %a, half %b) #0 {
   %r = call half @llvm.minnum.f16(half %a, half %b)
@@ -658,13 +682,10 @@ define half @test_minnum(half %a, half %b) #0 {
 }
 
 ; CHECK-LABEL: test_maxnum:
-; CHECK-NEXT: stp x29, x30, [sp, #-16]!
-; CHECK-NEXT: mov  x29, sp
-; CHECK-NEXT: fcvt s0, h0
 ; CHECK-NEXT: fcvt s1, h1
-; CHECK-NEXT: bl {{_?}}fmaxf
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: fmaxnm s0, s0, s1
 ; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: ldp x29, x30, [sp], #16
 ; CHECK-NEXT: ret
 define half @test_maxnum(half %a, half %b) #0 {
   %r = call half @llvm.maxnum.f16(half %a, half %b)
@@ -683,11 +704,50 @@ define half @test_copysign(half %a, half %b) #0 {
   ret half %r
 }
 
-; CHECK-LABEL: test_floor:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintm s0, s1
+; CHECK-LABEL: test_copysign_f32:
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
 ; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: ret
+define half @test_copysign_f32(half %a, float %b) #0 {
+  %tb = fptrunc float %b to half
+  %r = call half @llvm.copysign.f16(half %a, half %tb)
+  ret half %r
+}
+
+; CHECK-LABEL: test_copysign_f64:
+; CHECK-NEXT: fcvt s1, d1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ret
+define half @test_copysign_f64(half %a, double %b) #0 {
+  %tb = fptrunc double %b to half
+  %r = call half @llvm.copysign.f16(half %a, half %tb)
+  ret half %r
+}
+
+; Check that the FP promotion will use a truncating FP_ROUND, so we can fold
+; away the (fpext (fp_round <result>)) here.
+
+; CHECK-LABEL: test_copysign_extended:
+; CHECK-NEXT: fcvt s1, h1
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT: bit.16b v0, v1, v2
+; CHECK-NEXT: ret
+define float @test_copysign_extended(half %a, half %b) #0 {
+  %r = call half @llvm.copysign.f16(half %a, half %b)
+  %xr = fpext half %r to float
+  ret float %xr
+}
+
+; CHECK-LABEL: test_floor:
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
 ; CHECK-NEXT: ret
 define half @test_floor(half %a) #0 {
   %r = call half @llvm.floor.f16(half %a)
@@ -695,10 +755,9 @@ define half @test_floor(half %a) #0 {
 }
 
 ; CHECK-LABEL: test_ceil:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintp s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
 ; CHECK-NEXT: ret
 define half @test_ceil(half %a) #0 {
   %r = call half @llvm.ceil.f16(half %a)
@@ -706,10 +765,9 @@ define half @test_ceil(half %a) #0 {
 }
 
 ; CHECK-LABEL: test_trunc:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frintz s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
 ; CHECK-NEXT: ret
 define half @test_trunc(half %a) #0 {
   %r = call half @llvm.trunc.f16(half %a)
@@ -737,10 +795,9 @@ define half @test_nearbyint(half %a) #0 {
 }
 
 ; CHECK-LABEL: test_round:
-; CHECK-NEXT: fcvt s1, h0
-; CHECK-NEXT: frinta s0, s1
-; CHECK-NEXT: fcvt h0, s0
-; CHECK-NEXT: frintx s1, s1
+; CHECK-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0
+; CHECK-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]]
+; CHECK-NEXT: fcvt h0, [[INT32]]
 ; CHECK-NEXT: ret
 define half @test_round(half %a) #0 {
   %r = call half @llvm.round.f16(half %a)
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
new file mode 100644
index 000000000000..55fbf63319ee
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -O0 -fast-isel -fast-isel-abort=0 -verify-machineinstrs < %s | FileCheck %s
+
+define void @test(i64 %a, i64 %b, i2* %c) {
+; CHECK-LABEL: test
+; CHECK:       and [[REG1:w[0-9]+]], w8, #0x3
+; CHECK-NEXT:  strb [[REG1]], {{\[}}x2{{\]}}
+; CHECK-NEXT:  tbz w9, #0,
+ %1 = trunc i64 %a to i2
+ %2 = trunc i64 %b to i1
+; Force fast-isel to fall back to SDAG.
+ store i2 %1, i2* %c, align 8
+ br i1 %2, label %bb1, label %bb2
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
index da6ddbf5101e..e04a62b85c8e 100644
--- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
+++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
 
-; CHECK-label: test_or
+; CHECK-LABEL: test_or
 ; CHECK:       cbnz w0, {{LBB[0-9]+_2}}
 ; CHECK:       cbz w1, {{LBB[0-9]+_1}}
 define i64 @test_or(i32 %a, i32 %b) {
@@ -18,7 +18,7 @@ bb4:
   ret i64 %2
 }
 
-; CHECK-label: test_ans
+; CHECK-LABEL: test_and
 ; CHECK:       cbz w0, {{LBB[0-9]+_2}}
 ; CHECK:       cbnz w1, {{LBB[0-9]+_3}}
 define i64 @test_and(i32 %a, i32 %b) {
@@ -36,7 +36,55 @@ bb4:
   ret i64 %2
 }
 
+; If the branch is unpredictable, don't add another branch.
+
+; CHECK-LABEL: test_or_unpredictable
+; CHECK:       cmp   w0, #0
+; CHECK-NEXT:  cset  w8, eq
+; CHECK-NEXT:  cmp   w1, #0
+; CHECK-NEXT:  cset  w9, eq
+; CHECK-NEXT:  orr   w8, w8, w9
+; CHECK-NEXT:  tbnz w8, #0,
+define i64 @test_or_unpredictable(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp eq i32 %a, 0
+  %1 = icmp eq i32 %b, 0
+  %or.cond = or i1 %0, %1
+  br i1 %or.cond, label %bb3, label %bb4, !unpredictable !2
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
+; CHECK-LABEL: test_and_unpredictable
+; CHECK:       cmp   w0, #0
+; CHECK-NEXT:  cset  w8, ne
+; CHECK-NEXT:  cmp   w1, #0
+; CHECK-NEXT:  cset  w9, ne
+; CHECK-NEXT:  and   w8, w8, w9
+; CHECK-NEXT:  tbz w8, #0,
+define i64 @test_and_unpredictable(i32 %a, i32 %b) {
+bb1:
+  %0 = icmp ne i32 %a, 0
+  %1 = icmp ne i32 %b, 0
+  %or.cond = and i1 %0, %1
+  br i1 %or.cond, label %bb4, label %bb3, !unpredictable !2
+
+bb3:
+  ret i64 0
+
+bb4:
+  %2 = call i64 @bar()
+  ret i64 %2
+}
+
 declare i64 @bar()
 
 !0 = !{!"branch_weights", i32 5128, i32 32}
 !1 = !{!"branch_weights", i32 1024, i32 4136}
+!2 = !{}
+
diff --git a/test/CodeGen/AArch64/fast-isel-cmp-vec.ll b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
new file mode 100644
index 000000000000..2855419a1ca0
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-cmp-vec.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -verify-machineinstrs \
+; RUN:   -aarch64-atomic-cfg-tidy=0 -disable-cgp -disable-branch-fold \
+; RUN:   < %s | FileCheck %s
+
+;
+; Verify that we don't mess up vector comparisons in fast-isel.
+;
+
+define <2 x i32> @icmp_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: icmp_v2i32:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  cmeq.2s [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.8b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT:  ret
+  %c = icmp eq <2 x i32> %a, zeroinitializer
+  br label %bb2
+bb2:
+  %z = zext <2 x i1> %c to <2 x i32>
+  ret <2 x i32> %z
+}
+
+define <2 x i32> @icmp_constfold_v2i32(<2 x i32> %a) {
+; CHECK-LABEL: icmp_constfold_v2i32:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  movi d[[CMP:[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.2s [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.8b v0, v[[CMP]], [[MASK]]
+; CHECK-NEXT:  ret
+  %1 = icmp eq <2 x i32> %a, %a
+  br label %bb2
+bb2:
+  %2 = zext <2 x i1> %1 to <2 x i32>
+  ret <2 x i32> %2
+}
+
+define <4 x i32> @icmp_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: icmp_v4i32:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  cmeq.4s [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT:  xtn.4h [[CMPV4I16:v[0-9]+]], [[CMP]]
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.8b [[ZEXT:v[0-9]+]], [[CMPV4I16]], [[MASK]]
+; CHECK-NEXT:  ushll.4s v0, [[ZEXT]], #0
+; CHECK-NEXT:  ret
+  %c = icmp eq <4 x i32> %a, zeroinitializer
+  br label %bb2
+bb2:
+  %z = zext <4 x i1> %c to <4 x i32>
+  ret <4 x i32> %z
+}
+
+define <4 x i32> @icmp_constfold_v4i32(<4 x i32> %a) {
+; CHECK-LABEL: icmp_constfold_v4i32:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  movi d[[CMP:[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.4h [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.8b [[ZEXT:v[0-9]+]], v[[CMP]], [[MASK]]
+; CHECK-NEXT:  ushll.4s v0, [[ZEXT]], #0
+; CHECK-NEXT:  ret
+  %1 = icmp eq <4 x i32> %a, %a
+  br label %bb2
+bb2:
+  %2 = zext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <16 x i8> @icmp_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: icmp_v16i8:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  cmeq.16b [[CMP:v[0-9]+]], v0, #0
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.16b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT:  ret
+  %c = icmp eq <16 x i8> %a, zeroinitializer
+  br label %bb2
+bb2:
+  %z = zext <16 x i1> %c to <16 x i8>
+  ret <16 x i8> %z
+}
+
+define <16 x i8> @icmp_constfold_v16i8(<16 x i8> %a) {
+; CHECK-LABEL: icmp_constfold_v16i8:
+; CHECK:      ; BB#0:
+; CHECK-NEXT:  movi.2d [[CMP:v[0-9]+]], #0xffffffffffffffff
+; CHECK-NEXT: ; BB#1:
+; CHECK-NEXT:  movi.16b [[MASK:v[0-9]+]], #0x1
+; CHECK-NEXT:  and.16b v0, [[CMP]], [[MASK]]
+; CHECK-NEXT:  ret
+  %1 = icmp eq <16 x i8> %a, %a
+  br label %bb2
+bb2:
+  %2 = zext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
diff --git a/test/CodeGen/AArch64/fast-isel-folded-shift.ll b/test/CodeGen/AArch64/fast-isel-folded-shift.ll
new file mode 100644
index 000000000000..b881ef5c6d52
--- /dev/null
+++ b/test/CodeGen/AArch64/fast-isel-folded-shift.ll
@@ -0,0 +1,125 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -verify-machineinstrs < %s | FileCheck %s
+
+; Test invalid shift values. This will fall-back to SDAG.
+; AND
+define zeroext i8 @and_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: and_rs_i8
+; CHECK:       and [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = shl i8 %b, 8
+  %2 = and i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @and_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: and_rs_i16
+; CHECK:       and [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = shl i16 %b, 16
+  %2 = and i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @and_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: and_rs_i32
+; CHECK:       and w0, w0, w8
+  %1 = shl i32 %b, 32
+  %2 = and i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @and_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: and_rs_i64
+; CHECK:       and x0, x0, x8
+  %1 = shl i64 %b, 64
+  %2 = and i64 %a, %1
+  ret i64 %2
+}
+
+; OR
+define zeroext i8 @or_rs_i8(i8 signext %a, i8 signext %b) {
+; CHECK-LABEL: or_rs_i8
+; CHECK:       orr [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = shl i8 %b, 8
+  %2 = or i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @or_rs_i16(i16 signext %a, i16 signext %b) {
+; CHECK-LABEL: or_rs_i16
+; CHECK:       orr [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = shl i16 %b, 16
+  %2 = or i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @or_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: or_rs_i32
+; CHECK:       orr w0, w0, w8
+  %1 = shl i32 %b, 32
+  %2 = or i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @or_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: or_rs_i64
+; CHECK:       orr x0, x0, x8
+  %1 = shl i64 %b, 64
+  %2 = or i64 %a, %1
+  ret i64 %2
+}
+
+; XOR
+define zeroext i8 @xor_rs_i8(i8 %a, i8 %b) {
+; CHECK-LABEL: xor_rs_i8
+; CHECK:       eor [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xff
+  %1 = shl i8 %b, 8
+  %2 = xor i8 %a, %1
+  ret i8 %2
+}
+
+define zeroext i16 @xor_rs_i16(i16 %a, i16 %b) {
+; CHECK-LABEL: xor_rs_i16
+; CHECK:       eor [[REG:w[0-9]+]], w0, w8
+; CHECK-NEXT:  and {{w[0-9]+}}, [[REG]], #0xffff
+  %1 = shl i16 %b, 16
+  %2 = xor i16 %a, %1
+  ret i16 %2
+}
+
+define i32 @xor_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: xor_rs_i32
+; CHECK:       eor w0, w0, w8
+  %1 = shl i32 %b, 32
+  %2 = xor i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @xor_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: xor_rs_i64
+; CHECK:       eor x0, x0, x8
+  %1 = shl i64 %b, 64
+  %2 = xor i64 %a, %1
+  ret i64 %2
+}
+
+;ADD
+define i32 @add_rs_i32(i32 %a, i32 %b) {
+; CHECK-LABEL: add_rs_i32
+; CHECK:       add w0, w0, w8
+  %1 = shl i32 %b, 32
+  %2 = add i32 %a, %1
+  ret i32 %2
+}
+
+define i64 @add_rs_i64(i64 %a, i64 %b) {
+; CHECK-LABEL: add_rs_i64
+; CHECK:       add x0, x0, x8
+  %1 = shl i64 %b, 64
+  %2 = add i64 %a, %1
+  ret i64 %2
+}
+
diff --git a/test/CodeGen/AArch64/fast-isel-logic-op.ll b/test/CodeGen/AArch64/fast-isel-logic-op.ll
index 89c5f2c48024..16d0429fe98d 100644
--- a/test/CodeGen/AArch64/fast-isel-logic-op.ll
+++ b/test/CodeGen/AArch64/fast-isel-logic-op.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0                  -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=0                    -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel=1 -fast-isel-abort=1 -verify-machineinstrs < %s | FileCheck %s
 
 ; AND
diff --git a/test/CodeGen/AArch64/fastcc-reserved.ll b/test/CodeGen/AArch64/fastcc-reserved.ll
index a392619a768d..b5e03f08280f 100644
--- a/test/CodeGen/AArch64/fastcc-reserved.ll
+++ b/test/CodeGen/AArch64/fastcc-reserved.ll
@@ -16,7 +16,7 @@ define fastcc void @foo(i32 %in) {
 ; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame:
-; CHECK: sub sp, sp, #16
+; CHECK: str w{{[0-9]+}}, [sp, #-16]!
 
   call fastcc void @will_pop([8 x i32] undef, i32 42)
 ; CHECK: bl will_pop
@@ -42,7 +42,7 @@ define void @foo1(i32 %in) {
 ; CHECK: mov     x29, sp
 
 ; Reserve space for call-frame
-; CHECK: sub sp, sp, #16
+; CHECK: str w{{[0-9]+}}, [sp, #-16]!
 
   call void @wont_pop([8 x i32] undef, i32 42)
 ; CHECK: bl wont_pop
diff --git a/test/CodeGen/AArch64/fastcc.ll b/test/CodeGen/AArch64/fastcc.ll
index 9917fcd044fd..f021eb232618 100644
--- a/test/CodeGen/AArch64/fastcc.ll
+++ b/test/CodeGen/AArch64/fastcc.ll
@@ -7,12 +7,12 @@
 define fastcc void @func_stack0() {
 ; CHECK-LABEL: func_stack0:
 ; CHECK: mov x29, sp
-; CHECK-NEXT: sub sp, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp, #-32]!
 
 ; CHECK-TAIL-LABEL: func_stack0:
 ; CHECK-TAIL: stp x29, x30, [sp, #-16]!
 ; CHECK-TAIL-NEXT: mov x29, sp
-; CHECK-TAIL-NEXT: sub sp, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
@@ -55,13 +55,13 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {
 ; CHECK-LABEL: func_stack8:
 ; CHECK: stp x29, x30, [sp, #-16]!
 ; CHECK: mov x29, sp
-; CHECK: sub sp, sp, #32
+; CHECK: str w{{[0-9]+}}, [sp, #-32]!
 
 
 ; CHECK-TAIL-LABEL: func_stack8:
 ; CHECK-TAIL: stp x29, x30, [sp, #-16]!
 ; CHECK-TAIL: mov x29, sp
-; CHECK-TAIL: sub sp, sp, #32
+; CHECK-TAIL: str w{{[0-9]+}}, [sp, #-32]!
 
 
   call fastcc void @func_stack8([8 x i32] undef, i32 42)
diff --git a/test/CodeGen/AArch64/fcvt_combine.ll b/test/CodeGen/AArch64/fcvt_combine.ll
new file mode 100644
index 000000000000..093ce4a4cd85
--- /dev/null
+++ b/test/CodeGen/AArch64/fcvt_combine.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; CHECK-LABEL: test1
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzs.2s v0, v0, #4
+; CHECK: ret
+define <2 x i32> @test1(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; CHECK-LABEL: test2
+; CHECK-NOT: fmul.4s
+; CHECK: fcvtzs.4s v0, v0, #3
+; CHECK: ret
+define <4 x i32> @test2(<4 x float> %f) {
+  %mul.i = fmul <4 x float> %f, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
+  ret <4 x i32> %vcvt.i
+}
+
+; CHECK-LABEL: test3
+; CHECK-NOT: fmul.2d
+; CHECK: fcvtzs.2d v0, v0, #5
+; CHECK: ret
+define <2 x i64> @test3(<2 x double> %d) {
+  %mul.i = fmul <2 x double> %d, <double 32.000000e+00, double 32.000000e+00>
+  %vcvt.i = fptosi <2 x double> %mul.i to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+; Truncate double to i32
+; CHECK-LABEL: test4
+; CHECK-NOT: fmul.2d v0, v0, #4
+; CHECK: fcvtzs.2d v0, v0
+; CHECK: xtn.2s
+; CHECK: ret
+define <2 x i32> @test4(<2 x double> %d) {
+  %mul.i = fmul <2 x double> %d, <double 16.000000e+00, double 16.000000e+00>
+  %vcvt.i = fptosi <2 x double> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Truncate float to i16
+; CHECK-LABEL: test5
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzs.2s v0, v0, #4
+; CHECK: ret
+define <2 x i16> @test5(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i16>
+  ret <2 x i16> %vcvt.i
+}
+
+; Don't convert float to i64
+; CHECK-LABEL: test6
+; CHECK: fmov.2s v1, #16.00000000
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtl v0.2d, v0.2s
+; CHECK: fcvtzs.2d v0, v0
+; CHECK: ret
+define <2 x i64> @test6(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+; Check unsigned conversion.
+; CHECK-LABEL: test7
+; CHECK-NOT: fmul.2s
+; CHECK: fcvtzu.2s v0, v0, #4
+; CHECK: ret
+define <2 x i32> @test7(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 16.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK-LABEL: test8
+; CHECK: fmov.2s v1, #17.00000000
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzu.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test8(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 17.000000e+00, float 17.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to non-matching power of 2.
+; CHECK-LABEL: test9
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzu.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test9(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Don't combine all undefs.
+; CHECK-LABEL: test10
+; CHECK: fmul.2s v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+; CHECK: fcvtzu.2s v{{[0-9]+}}, v{{[0-9]+}}
+; CHECK: ret
+define <2 x i32> @test10(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float undef, float undef>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Combine if mix of undef and pow2.
+; CHECK-LABEL: test11
+; CHECK: fcvtzu.2s v0, v0, #3
+; CHECK: ret
+define <2 x i32> @test11(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float undef, float 8.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Don't combine when multiplied by 0.0.
+; CHECK-LABEL: test12
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzs.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test12(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 0.000000e+00, float 0.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Test which should not fold due to power of 2 out of range (i.e., 2^33).
+; CHECK-LABEL: test13
+; CHECK: fmul.2s v0, v0, v1
+; CHECK: fcvtzs.2s v0, v0
+; CHECK: ret
+define <2 x i32> @test13(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 0x4200000000000000, float 0x4200000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK-LABEL: test14
+; CHECK: fcvtzs.2s v0, v0, #32
+; CHECK: ret
+define <2 x i32> @test14(<2 x float> %f) {
+  %mul.i = fmul <2 x float> %f, <float 0x41F0000000000000, float 0x41F0000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
diff --git a/test/CodeGen/AArch64/fdiv_combine.ll b/test/CodeGen/AArch64/fdiv_combine.ll
new file mode 100644
index 000000000000..6f38a267ec3f
--- /dev/null
+++ b/test/CodeGen/AArch64/fdiv_combine.ll
@@ -0,0 +1,115 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-neon-syntax=apple -verify-machineinstrs -o - %s | FileCheck %s
+
+; Test signed conversion.
+; CHECK-LABEL: @test1
+; CHECK: scvtf.2s v0, v0, #4
+; CHECK: ret
+define <2 x float> @test1(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 16.0, float 16.0>
+  ret <2 x float> %div.i
+}
+
+; Test unsigned conversion.
+; CHECK-LABEL: @test2
+; CHECK: ucvtf.2s v0, v0, #3
+; CHECK: ret
+define <2 x float> @test2(<2 x i32> %in) {
+entry:
+  %vcvt.i = uitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.0, float 8.0>
+  ret <2 x float> %div.i
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK-LABEL: @test3
+; CHECK: scvtf.2s v0, v0
+; CHECK: fmov.2s v1, #9.00000000
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test3(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 9.0, float 9.0>
+  ret <2 x float> %div.i
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK-LABEL: @test4
+; CHECK: scvtf.2s v0, v0
+; CHECK: movi.2s v1, #0x50, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test4(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
+  ret <2 x float> %div.i
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK-LABEL: @test5
+; CHECK: scvtf.2s v0, v0, #32
+; CHECK: ret
+define <2 x float> @test5(<2 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <2 x i32> %in to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  ret <2 x float> %div.i
+}
+
+; Test quadword.
+; CHECK-LABEL: @test6
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test6(<4 x i32> %in) {
+entry:
+  %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+  %div.i = fdiv <4 x float> %vcvt.i, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %div.i
+}
+
+; Test unsigned i16 to float
+; CHECK-LABEL: @test7
+; CHECK: ushll.4s  v0, v0, #0
+; CHECK: ucvtf.4s  v0, v0, #1
+; CHECK: ret
+define <4 x float> @test7(<4 x i16> %in) {
+  %conv = uitofp <4 x i16> %in to <4 x float>
+  %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+  ret <4 x float> %shift
+}
+
+; Test signed i16 to float
+; CHECK-LABEL: @test8
+; CHECK: sshll.4s v0, v0, #0
+; CHECK: scvtf.4s v0, v0, #2
+; CHECK: ret
+define <4 x float> @test8(<4 x i16> %in) {
+  %conv = sitofp <4 x i16> %in to <4 x float>
+  %shift = fdiv <4 x float> %conv, <float 4.0, float 4.0, float 4.0, float 4.0>
+  ret <4 x float> %shift
+}
+
+; Can't convert i64 to float.
+; CHECK-LABEL: @test9
+; CHECK: ucvtf.2d v0, v0
+; CHECK: fcvtn v0.2s, v0.2d
+; CHECK: movi.2s v1, #0x40, lsl #24
+; CHECK: fdiv.2s v0, v0, v1
+; CHECK: ret
+define <2 x float> @test9(<2 x i64> %in) {
+  %conv = uitofp <2 x i64> %in to <2 x float>
+  %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
+  ret <2 x float> %shift
+}
+
+; CHECK-LABEL: @test10
+; CHECK: ucvtf.2d v0, v0, #1
+; CHECK: ret
+define <2 x double> @test10(<2 x i64> %in) {
+  %conv = uitofp <2 x i64> %in to <2 x double>
+  %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
+  ret <2 x double> %shift
+}
diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll
index 2dd0d1245930..c0fec4d171cd 100644
--- a/test/CodeGen/AArch64/fold-constants.ll
+++ b/test/CodeGen/AArch64/fold-constants.ll
@@ -3,9 +3,6 @@
 define i64 @dotests_616() {
 ; CHECK-LABEL: dotests_616
 ; CHECK:       movi d0, #0000000000000000
-; CHECK-NEXT:  umov w8, v0.b[2]
-; CHECK-NEXT:  sbfx w8, w8, #0, #1
-; CHECK-NEXT:  fmov s0, w8
 ; CHECK-NEXT:  fmov x0, d0
 ; CHECK-NEXT:  ret
 entry:
@@ -19,3 +16,19 @@ entry:
   %vget_lane = extractelement <1 x i64> %4, i32 0
   ret i64 %vget_lane
 }
+
+; PR25763 - folding constant vector comparisons with sign-extended result
+define <8 x i16> @dotests_458() {
+; CHECK-LABEL: dotests_458
+; CHECK:       movi d0, #0x00000000ff0000
+; CHECK-NEXT:  sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:  ret
+entry:
+  %vclz_v.i = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> <i8 127, i8 38, i8 -1, i8 -128, i8 127, i8 0, i8 0, i8 0>, i1 false) #6
+  %vsra_n = lshr <8 x i8> %vclz_v.i, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  %name_6 = or <8 x i8> %vsra_n, <i8 127, i8 -128, i8 -1, i8 67, i8 84, i8 127, i8 -1, i8 0>
+  %cmp.i603 = icmp slt <8 x i8> %name_6, <i8 -57, i8 -128, i8 127, i8 -128, i8 -1, i8 0, i8 -1, i8 -1>
+  %vmovl.i4.i = sext <8 x i1> %cmp.i603 to <8 x i16>
+  ret <8 x i16> %vmovl.i4.i
+}
+declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1)
diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll
index 0dbda152fca9..f6e4bdf73459 100644
--- a/test/CodeGen/AArch64/fp16-v4-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll
@@ -130,7 +130,6 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) {
   ret <4 x i16> %2
 }
 
-
 define <4 x half> @sitofp_i8(<4 x i8> %a) #0 {
 ; CHECK-LABEL: sitofp_i8:
 ; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8
@@ -218,4 +217,54 @@ define <4 x half> @uitofp_i64(<4 x i64> %a) #0 {
   ret <4 x half> %1
 }
 
+define void @test_insert_at_zero(half %a, <4 x half>* %b) #0 {
+; CHECK-LABEL: test_insert_at_zero:
+; CHECK-NEXT: str d0, [x0]
+; CHECK-NEXT: ret
+  %1 = insertelement <4 x half> undef, half %a, i64 0
+  store <4 x half> %1, <4 x half>* %b, align 4
+  ret void
+}
+
+define <4 x i8> @fptosi_i8(<4 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i8:
+; CHECK-NEXT: fcvtl  [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn    v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+  %1 = fptosi<4 x half> %a to <4 x i8>
+  ret <4 x i8> %1
+}
+
+define <4 x i16> @fptosi_i16(<4 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i16:
+; CHECK-NEXT: fcvtl  [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn    v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+  %1 = fptosi<4 x half> %a to <4 x i16>
+  ret <4 x i16> %1
+}
+
+define <4 x i8> @fptoui_i8(<4 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i8:
+; CHECK-NEXT: fcvtl  [[REG1:v[0-9]+\.4s]], v0.4h
+; NOTE: fcvtzs selected here because the xtn shaves the sign bit
+; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn    v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+  %1 = fptoui<4 x half> %a to <4 x i8>
+  ret <4 x i8> %1
+}
+
+define <4 x i16> @fptoui_i16(<4 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i16:
+; CHECK-NEXT: fcvtl  [[REG1:v[0-9]+\.4s]], v0.4h
+; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]]
+; CHECK-NEXT: xtn    v0.4h, [[REG2]]
+; CHECK-NEXT: ret
+  %1 = fptoui<4 x half> %a to <4 x i16>
+  ret <4 x i16> %1
+}
+
 attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll
index 10a8c22d6f7e..137d1f358a30 100644
--- a/test/CodeGen/AArch64/fp16-v8-instructions.ll
+++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll
@@ -358,4 +358,67 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 {
   ret <8 x half> %1
 }
 
+define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 {
+; CHECK-LABEL: test_insert_at_zero:
+; CHECK-NEXT: str q0, [x0]
+; CHECK-NEXT: ret
+  %1 = insertelement <8 x half> undef, half %a, i64 0
+  store <8 x half> %1, <8 x half>* %b, align 4
+  ret void
+}
+
+define <8 x i8> @fptosi_i8(<8 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i8:
+; CHECK-DAG: fcvtl   [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2  [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzs  [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn     [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzs  [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: xtn2    [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: xtn     v0.8b, [[I16]].8h
+; CHECK-NEXT: ret
+  %1 = fptosi<8 x half> %a to <8 x i8>
+  ret <8 x i8> %1
+}
+
+define <8 x i16> @fptosi_i16(<8 x half> %a) #0 {
+; CHECK-LABEL: fptosi_i16:
+; CHECK-DAG: fcvtl   [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2  [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzs  [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn     [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzs  [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-NEXT: xtn2    [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: ret
+  %1 = fptosi<8 x half> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <8 x i8> @fptoui_i8(<8 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i8:
+; CHECK-DAG: fcvtl   [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2  [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzu  [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn     [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzu  [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-DAG: xtn2    [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: xtn     v0.8b, [[I16]].8h
+; CHECK-NEXT: ret
+  %1 = fptoui<8 x half> %a to <8 x i8>
+  ret <8 x i8> %1
+}
+
+define <8 x i16> @fptoui_i16(<8 x half> %a) #0 {
+; CHECK-LABEL: fptoui_i16:
+; CHECK-DAG: fcvtl   [[LO:v[0-9]+\.4s]], v0.4h
+; CHECK-DAG: fcvtl2  [[HI:v[0-9]+\.4s]], v0.8h
+; CHECK-DAG: fcvtzu  [[LOF32:v[0-9]+\.4s]], [[LO]]
+; CHECK-DAG: xtn     [[I16:v[0-9]+]].4h, [[LOF32]]
+; CHECK-DAG: fcvtzu  [[HIF32:v[0-9]+\.4s]], [[HI]]
+; CHECK-NEXT: xtn2    [[I16]].8h, [[HIF32]]
+; CHECK-NEXT: ret
+  %1 = fptoui<8 x half> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
 attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/free-zext.ll b/test/CodeGen/AArch64/free-zext.ll
index cff11f85bda4..ea4f1f4e10f3 100644
--- a/test/CodeGen/AArch64/free-zext.ll
+++ b/test/CodeGen/AArch64/free-zext.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -verify-machineinstrs -o - %s -mtriple=arm64-apple-ios7.0 | FileCheck %s
 
 define i64 @test_free_zext(i8* %a, i16* %b) {
-; CHECK-LABEL: test_free_zext
+; CHECK-LABEL: test_free_zext:
 ; CHECK-DAG: ldrb w[[A:[0-9]+]], [x0]
 ; CHECK: ldrh w[[B:[0-9]+]], [x1]
 ; CHECK: add x0, x[[B]], x[[A]]
@@ -12,3 +12,60 @@ define i64 @test_free_zext(i8* %a, i16* %b) {
   %add = add nsw i64 %conv1, %conv
   ret i64 %add
 }
+
+define void @test_free_zext2(i32* %ptr, i32* %dst1, i64* %dst2) {
+; CHECK-LABEL: test_free_zext2:
+; CHECK: ldrh w[[A:[0-9]+]], [x0]
+; CHECK-NOT: and x
+; CHECK: str w[[A]], [x1]
+; CHECK: str x[[A]], [x2]
+  %load = load i32, i32* %ptr, align 8
+  %load16 = and i32 %load, 65535
+  %load64 = zext i32 %load16 to i64
+  store i32 %load16, i32* %dst1, align 4
+  store i64 %load64, i64* %dst2, align 8
+  ret void
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i32 %c) {
+; CHECK-LABEL: test_free_zext3:
+bb1:
+; CHECK: ldrh [[REG:w[0-9]+]]
+; CHECK-NOT: and {{w[0-9]+}}, [[REG]], #0xffff
+  %tmp1 = load i32, i32* %ptr, align 4
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK: ldrh [[REG2:w[0-9]+]]
+; CHECK-NOT: and {{w[0-9]+}}, [[REG2]], #0xffff
+  %tmp2 = load i32, i32* %ptr2, align 4
+  br label %bb3
+bb3:
+  %tmp3 = phi i32 [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+  %tmpand = and i32 %tmp3, 65535
+  ret i32 %tmpand
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext4(i32* %ptr, i32* %ptr2, i32* %dst) {
+; CHECK-LABEL: test_free_zext4:
+; CHECK: ldrh [[REG:w[0-9]+]]
+; TODO: fix isel to remove final and XCHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xffff
+; CHECK: ldrh [[REG:w[0-9]+]]
+bb1:
+  %load1 = load i32, i32* %ptr, align 4
+  br label %loop
+loop:
+  %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+  %and = and i32 %phi, 65535
+  store i32 %and, i32* %dst, align 4
+  %load2 = load i32, i32* %ptr2, align 4
+  %cmp = icmp ne i32 %and, 0
+  br i1 %cmp, label %loop, label %end
+end:
+  ret void
+}
diff --git a/test/CodeGen/AArch64/func-argpassing.ll b/test/CodeGen/AArch64/func-argpassing.ll
index 9100ae39282b..2ea13e388867 100644
--- a/test/CodeGen/AArch64/func-argpassing.ll
+++ b/test/CodeGen/AArch64/func-argpassing.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck --check-prefix=CHECK %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 | FileCheck --check-prefix=CHECK-NOFP %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -disable-post-ra | FileCheck --check-prefix=CHECK %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=-fp-armv8 -disable-post-ra | FileCheck --check-prefix=CHECK-NOFP %s
 
 %myStruct = type { i64 , i8, i32 }
 
diff --git a/test/CodeGen/AArch64/func-calls.ll b/test/CodeGen/AArch64/func-calls.ll
index 22a33157fd55..2f45666ba13a 100644
--- a/test/CodeGen/AArch64/func-calls.ll
+++ b/test/CodeGen/AArch64/func-calls.ll
@@ -89,11 +89,11 @@ define void @check_stack_args() {
   ; that varstruct is passed on the stack. Rather dependent on how a
   ; memcpy gets created, but the following works for now.
 
-; CHECK-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-DAG: str {{q[0-9]+}}, [sp, #-16]
 ; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
 ; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b
 
-; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp]
+; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp, #-16]!
 ; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0
 ; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]]
 
diff --git a/test/CodeGen/AArch64/global-alignment.ll b/test/CodeGen/AArch64/global-alignment.ll
index 657778e34187..5e820b8bb303 100644
--- a/test/CodeGen/AArch64/global-alignment.ll
+++ b/test/CodeGen/AArch64/global-alignment.ll
@@ -3,7 +3,7 @@
 @var32 = global [3 x i32] zeroinitializer
 @var64 = global [3 x i64] zeroinitializer
 @var32_align64 = global [3 x i32] zeroinitializer, align 8
-@alias = alias [3 x i32]* @var32_align64
+@alias = alias [3 x i32], [3 x i32]* @var32_align64
 
 define i64 @test_align32() {
 ; CHECK-LABEL: test_align32:
diff --git a/test/CodeGen/AArch64/global-merge-1.ll b/test/CodeGen/AArch64/global-merge-1.ll
index 14b04303ffb3..b93f41c07df9 100644
--- a/test/CodeGen/AArch64/global-merge-1.ll
+++ b/test/CodeGen/AArch64/global-merge-1.ll
@@ -12,16 +12,20 @@
 
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals@PAGE
+;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
   store i32 %a1, i32* @m, align 4
   store i32 %a2, i32* @n, align 4
   ret void
 }
 
-;CHECK:	.type	_MergedGlobals,@object  // @_MergedGlobals
-;CHECK:	.local	_MergedGlobals
-;CHECK:	.comm	_MergedGlobals,8,8
+;CHECK:	.type	.L_MergedGlobals,@object  // @_MergedGlobals
+;CHECK:	.local	.L_MergedGlobals
+;CHECK:	.comm	.L_MergedGlobals,8,8
+;CHECK: m = .L_MergedGlobals
+;CHECK: n = .L_MergedGlobals+4
 
-;CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3 ; @_MergedGlobals
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,8,3 ; @_MergedGlobals
+;CHECK-APPLE-IOS-NOT: _m = l__MergedGlobals
+;CHECK-APPLE-IOS-NOT: _n = l__MergedGlobals+4
diff --git a/test/CodeGen/AArch64/global-merge-2.ll b/test/CodeGen/AArch64/global-merge-2.ll
index af684039bf10..53bed1d9bc09 100644
--- a/test/CodeGen/AArch64/global-merge-2.ll
+++ b/test/CodeGen/AArch64/global-merge-2.ll
@@ -9,8 +9,8 @@
 define void @f1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _f1:
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
-;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
   store i32 %a1, i32* @x, align 4
   store i32 %a2, i32* @y, align 4
@@ -19,34 +19,34 @@ define void @f1(i32 %a1, i32 %a2) {
 
 define void @g1(i32 %a1, i32 %a2) {
 ;CHECK-APPLE-IOS-LABEL: _g1:
-;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
-;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
+;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
 ;CHECK-APPLE-IOS-NOT: adrp
   store i32 %a1, i32* @y, align 4
   store i32 %a2, i32* @z, align 4
   ret void
 }
 
-;CHECK:	.type	_MergedGlobals_x,@object // @_MergedGlobals_x
-;CHECK:	.globl	_MergedGlobals_x
-;CHECK:	.align	3
-;CHECK: _MergedGlobals_x:
-;CHECK:	.size	_MergedGlobals_x, 12
+;CHECK:	.type	.L_MergedGlobals,@object // @_MergedGlobals
+;CHECK:	.local	.L_MergedGlobals
+;CHECK:	.comm	.L_MergedGlobals,12,8
 
 ;CHECK:	.globl	x
-;CHECK: x = _MergedGlobals_x
+;CHECK: x = .L_MergedGlobals
+;CHECK: .size x, 4
 ;CHECK:	.globl	y
-;CHECK: y = _MergedGlobals_x+4
+;CHECK: y = .L_MergedGlobals+4
+;CHECK: .size y, 4
 ;CHECK:	.globl	z
-;CHECK: z = _MergedGlobals_x+8
+;CHECK: z = .L_MergedGlobals+8
+;CHECK: .size z, 4
 
-;CHECK-APPLE-IOS: .globl	__MergedGlobals_x       ; @_MergedGlobals_x
-;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_x,12,3
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals,12,3
 
 ;CHECK-APPLE-IOS: .globl	_x
-;CHECK-APPLE-IOS: _x = __MergedGlobals_x
+;CHECK-APPLE-IOS:  = l__MergedGlobals
 ;CHECK-APPLE-IOS: .globl	_y
-;CHECK-APPLE-IOS: _y = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: _y = l__MergedGlobals+4
 ;CHECK-APPLE-IOS: .globl	_z
-;CHECK-APPLE-IOS: _z = __MergedGlobals_x+8
+;CHECK-APPLE-IOS: _z = l__MergedGlobals+8
 ;CHECK-APPLE-IOS: .subsections_via_symbols
diff --git a/test/CodeGen/AArch64/global-merge-3.ll b/test/CodeGen/AArch64/global-merge-3.ll
index 925108308e56..6895380ca63e 100644
--- a/test/CodeGen/AArch64/global-merge-3.ll
+++ b/test/CodeGen/AArch64/global-merge-3.ll
@@ -1,17 +1,17 @@
-; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s
-; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
+; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s
+; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS
 
 @x = global [1000 x i32] zeroinitializer, align 1
 @y = global [1000 x i32] zeroinitializer, align 1
 @z = internal global i32 1, align 4
 
 define void @f1(i32 %a1, i32 %a2, i32 %a3) {
-;CHECK-APPLE-IOS: adrp	x8, __MergedGlobals_x@PAGE
+;CHECK-APPLE-IOS: adrp	x8, l__MergedGlobals@PAGE
 ;CHECK-APPLE-IOS-NOT: adrp
-;CHECK-APPLE-IOS: add	x8, x8, __MergedGlobals_x@PAGEOFF
-;CHECK-APPLE-IOS: adrp	x9, __MergedGlobals_y@PAGE
-;CHECK-APPLE-IOS: add	x9, x9, __MergedGlobals_y@PAGEOFF
+;CHECK-APPLE-IOS: add	x8, x8, l__MergedGlobals@PAGEOFF
+;CHECK-APPLE-IOS: adrp	x9, l__MergedGlobals.1@PAGE
+;CHECK-APPLE-IOS: add	x9, x9, l__MergedGlobals.1@PAGEOFF
   %x3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @x, i32 0, i64 3
   %y3 = getelementptr inbounds [1000 x i32], [1000 x i32]* @y, i32 0, i64 3
   store i32 %a1, i32* %x3, align 4
@@ -20,32 +20,32 @@ define void @f1(i32 %a1, i32 %a2, i32 %a3) {
   ret void
 }
 
-;CHECK:	.type	_MergedGlobals_x,@object // @_MergedGlobals_x
-;CHECK: .globl	_MergedGlobals_x
+;CHECK:	.type	.L_MergedGlobals,@object // @_MergedGlobals
 ;CHECK: .align	4
-;CHECK: _MergedGlobals_x:
-;CHECK: .size	_MergedGlobals_x, 4004
+;CHECK: .L_MergedGlobals:
+;CHECK: .size	.L_MergedGlobals, 4004
 
-;CHECK: .type	_MergedGlobals_y,@object // @_MergedGlobals_y
-;CHECK: .globl	_MergedGlobals_y
-;CHECK: _MergedGlobals_y:
-;CHECK: .size	_MergedGlobals_y, 4000
+;CHECK: .type	.L_MergedGlobals.1,@object // @_MergedGlobals.1
+;CHECK: .local	.L_MergedGlobals.1
+;CHECK: .comm	.L_MergedGlobals.1,4000,16
 
-;CHECK-APPLE-IOS: .globl	__MergedGlobals_x       ; @_MergedGlobals_x
 ;CHECK-APPLE-IOS: .align	4
-;CHECK-APPLE-IOS:  __MergedGlobals_x:
+;CHECK-APPLE-IOS:  l__MergedGlobals:
 ;CHECK-APPLE-IOS: .long 1
 ;CHECK-APPLE-IOS: .space	4000
 
-;CHECK-APPLE-IOS: .globl	__MergedGlobals_y       ; @_MergedGlobals_y
-;CHECK-APPLE-IOS: .zerofill __DATA,__common,__MergedGlobals_y,4000,4
+;CHECK-APPLE-IOS: .zerofill __DATA,__bss,l__MergedGlobals.1,4000,4
 
+;CHECK: z = .L_MergedGlobals
 ;CHECK:	.globl	x
-;CHECK: x = _MergedGlobals_x+4
+;CHECK: x = .L_MergedGlobals+4
+;CHECK: .size x, 4000
 ;CHECK:	.globl	y
-;CHECK: y = _MergedGlobals_y
+;CHECK: y = .L_MergedGlobals.1
+;CHECK: .size y, 4000
 
+;CHECK-APPLE-IOS-NOT: _z = l__MergedGlobals
 ;CHECK-APPLE-IOS:.globl	_x
-;CHECK-APPLE-IOS: _x = __MergedGlobals_x+4
+;CHECK-APPLE-IOS: _x = l__MergedGlobals+4
 ;CHECK-APPLE-IOS:.globl	_y
-;CHECK-APPLE-IOS: _y = __MergedGlobals_y
+;CHECK-APPLE-IOS: _y = l__MergedGlobals.1
diff --git a/test/CodeGen/AArch64/global-merge-4.ll b/test/CodeGen/AArch64/global-merge-4.ll
index bc6b68a9c046..a5109f6e8ea5 100644
--- a/test/CodeGen/AArch64/global-merge-4.ll
+++ b/test/CodeGen/AArch64/global-merge-4.ll
@@ -64,9 +64,9 @@ define internal i32* @returnFoo() #1 {
   ret i32* getelementptr inbounds ([5 x i32], [5 x i32]* @foo, i64 0, i64 0)
 }
 
-;CHECK:	.type	_MergedGlobals,@object  // @_MergedGlobals
-;CHECK:	.local	_MergedGlobals
-;CHECK:	.comm	_MergedGlobals,60,16
+;CHECK:	.type	.L_MergedGlobals,@object  // @_MergedGlobals
+;CHECK:	.local	.L_MergedGlobals
+;CHECK:	.comm	.L_MergedGlobals,60,16
 
 attributes #0 = { nounwind ssp }
 attributes #1 = { nounwind readnone ssp }
diff --git a/test/CodeGen/AArch64/global-merge-group-by-use.ll b/test/CodeGen/AArch64/global-merge-group-by-use.ll
index ddc044ed9e08..8b3fc97c9e2e 100644
--- a/test/CodeGen/AArch64/global-merge-group-by-use.ll
+++ b/test/CodeGen/AArch64/global-merge-group-by-use.ll
@@ -12,7 +12,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET1:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET1:l__MergedGlobals.[0-9]*]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET1]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
@@ -27,7 +27,7 @@ define void @f1(i32 %a1, i32 %a2) #0 {
 
 ; CHECK-LABEL: f2:
 define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
-; CHECK-NEXT: adrp x8, [[SET2:__MergedGlobals.[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET2:l__MergedGlobals.[0-9]*]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET2]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: str w2, [x8, #8]
@@ -48,7 +48,7 @@ define void @f2(i32 %a1, i32 %a2, i32 %a3) #0 {
 ; CHECK-LABEL: f3:
 define void @f3(i32 %a1, i32 %a2) #0 {
 ; CHECK-NEXT: adrp x8, _m3@PAGE
-; CHECK-NEXT: adrp x9, [[SET3:__MergedGlobals[0-9]*]]@PAGE
+; CHECK-NEXT: adrp x9, [[SET3:l__MergedGlobals[0-9]*]]@PAGE
 ; CHECK-NEXT: str w0, [x8, _m3@PAGEOFF]
 ; CHECK-NEXT: str w1, [x9, [[SET3]]@PAGEOFF]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
index e83cbab140a7..399438925771 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use-minsize.ll
@@ -11,7 +11,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) minsize nounwind {
-; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
index e6de4699132a..c3756a85feff 100644
--- a/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
+++ b/test/CodeGen/AArch64/global-merge-ignore-single-use.ll
@@ -10,7 +10,7 @@
 
 ; CHECK-LABEL: f1:
 define void @f1(i32 %a1, i32 %a2) #0 {
-; CHECK-NEXT: adrp x8, [[SET:__MergedGlobals]]@PAGE
+; CHECK-NEXT: adrp x8, [[SET:l__MergedGlobals]]@PAGE
 ; CHECK-NEXT: add x8, x8, [[SET]]@PAGEOFF
 ; CHECK-NEXT: stp w0, w1, [x8]
 ; CHECK-NEXT: ret
diff --git a/test/CodeGen/AArch64/ldst-opt.ll b/test/CodeGen/AArch64/ldst-opt.ll
index b2c11c7517c0..d2133213f186 100644
--- a/test/CodeGen/AArch64/ldst-opt.ll
+++ b/test/CodeGen/AArch64/ldst-opt.ll
@@ -3,11 +3,15 @@
 ; This file contains tests for the AArch64 load/store optimizer.
 
 %padding = type { i8*, i8*, i8*, i8* }
+%s.byte = type { i8, i8 }
+%s.halfword = type { i16, i16 }
 %s.word = type { i32, i32 }
 %s.doubleword = type { i64, i32 }
 %s.quadword = type { fp128, i32 }
 %s.float = type { float, i32 }
 %s.double = type { double, i32 }
+%struct.byte = type { %padding, %s.byte }
+%struct.halfword = type { %padding, %s.halfword }
 %struct.word = type { %padding, %s.word }
 %struct.doubleword = type { %padding, %s.doubleword }
 %struct.quadword = type { %padding, %s.quadword }
@@ -24,6 +28,62 @@
 ;
 ; with X being either w1, x1, s0, d0 or q0.
 
+declare void @bar_byte(%s.byte*, i8)
+
+define void @load-pre-indexed-byte(%struct.byte* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-byte
+; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
+  %add = load i8, i8* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
+  tail call void @bar_byte(%s.byte* %c, i8 %add)
+  ret void
+}
+
+define void @store-pre-indexed-byte(%struct.byte* %ptr, i8 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-byte
+; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1, i32 0
+  store i8 %val, i8* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.byte, %struct.byte* %ptr, i64 0, i32 1
+  tail call void @bar_byte(%s.byte* %c, i8 %val)
+  ret void
+}
+
+declare void @bar_halfword(%s.halfword*, i16)
+
+define void @load-pre-indexed-halfword(%struct.halfword* %ptr) nounwind {
+; CHECK-LABEL: load-pre-indexed-halfword
+; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
+  %add = load i16, i16* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
+  tail call void @bar_halfword(%s.halfword* %c, i16 %add)
+  ret void
+}
+
+define void @store-pre-indexed-halfword(%struct.halfword* %ptr, i16 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-halfword
+; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+entry:
+  %a = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1, i32 0
+  store i16 %val, i16* %a, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.halfword, %struct.halfword* %ptr, i64 0, i32 1
+  tail call void @bar_halfword(%s.halfword* %c, i16 %val)
+  ret void
+}
+
 declare void @bar_word(%s.word*, i32)
 
 define void @load-pre-indexed-word(%struct.word* %ptr) nounwind {
@@ -164,6 +224,48 @@ bar:
   ret void
 }
 
+; Check the following transform:
+;
+; (ldp|stp) w1, w2 [x0, #32]
+;  ...
+; add x0, x0, #32
+;  ->
+; (ldp|stp) w1, w2, [x0, #32]!
+;
+
+define void @load-pair-pre-indexed-word(%struct.word* %ptr) nounwind {
+; CHECK-LABEL: load-pair-pre-indexed-word
+; CHECK: ldp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
+; CHECK-NOT: add x0, x0, #32
+entry:
+  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
+  %a1 = load i32, i32* %a, align 4
+  %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
+  %b1 = load i32, i32* %b, align 4
+  %add = add i32 %a1, %b1
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
+  tail call void @bar_word(%s.word* %c, i32 %add)
+  ret void
+}
+
+define void @store-pair-pre-indexed-word(%struct.word* %ptr, i32 %val) nounwind {
+; CHECK-LABEL: store-pair-pre-indexed-word
+; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [x0, #32]!
+; CHECK-NOT: add x0, x0, #32
+entry:
+  %a = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 0
+  store i32 %val, i32* %a, align 4
+  %b = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1, i32 1
+  store i32 %val, i32* %b, align 4
+  br label %bar
+bar:
+  %c = getelementptr inbounds %struct.word, %struct.word* %ptr, i64 0, i32 1
+  tail call void @bar_word(%s.word* %c, i32 %val)
+  ret void
+}
+
 ; Check the following transform:
 ;
 ; add x8, x8, #16
@@ -174,11 +276,11 @@ bar:
 ;
 ; with X being either w0, x0, s0, d0 or q0.
 
-%pre.struct.i32 = type { i32, i32, i32}
-%pre.struct.i64 = type { i32, i64, i64}
-%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>}
-%pre.struct.float = type { i32, float, float}
-%pre.struct.double = type { i32, double, double}
+%pre.struct.i32 = type { i32, i32, i32, i32, i32}
+%pre.struct.i64 = type { i32, i64, i64, i64, i64}
+%pre.struct.i128 = type { i32, <2 x i64>, <2 x i64>, <2 x i64>}
+%pre.struct.float = type { i32, float, float, float}
+%pre.struct.double = type { i32, double, double, double}
 
 define i32 @load-pre-indexed-word2(%pre.struct.i32** %this, i1 %cond,
                                    %pre.struct.i32* %load2) nounwind {
@@ -270,6 +372,96 @@ return:
   ret double %ret
 }
 
+define i32 @load-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
+                                   %pre.struct.i32* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-word3
+; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
+  br label %return
+return:
+  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load i32, i32* %retptr
+  ret i32 %ret
+}
+
+define i64 @load-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
+                                         %pre.struct.i64* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-doubleword3
+; CHECK: ldr x{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load i64, i64* %retptr
+  ret i64 %ret
+}
+
+define <2 x i64> @load-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
+                                             %pre.struct.i128* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-quadword3
+; CHECK: ldr q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load <2 x i64>, <2 x i64>* %retptr
+  ret <2 x i64> %ret
+}
+
+define float @load-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
+                                      %pre.struct.float* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-float3
+; CHECK: ldr s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.float*, %pre.struct.float** %this
+  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load float, float* %retptr
+  ret float %ret
+}
+
+define double @load-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
+                                        %pre.struct.double* %load2) nounwind {
+; CHECK-LABEL: load-pre-indexed-double3
+; CHECK: ldr d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.double*, %pre.struct.double** %this
+  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  %ret = load double, double* %retptr
+  ret double %ret
+}
+
 ; Check the following transform:
 ;
 ; add x8, x8, #16
@@ -375,6 +567,101 @@ return:
   ret void
 }
 
+define void @store-pre-indexed-word3(%pre.struct.i32** %this, i1 %cond,
+                                     %pre.struct.i32* %load2,
+                                     i32 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-word3
+; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}, #12]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i32*, %pre.struct.i32** %this
+  %gep1 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load1, i64 0, i32 3
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i32, %pre.struct.i32* %load2, i64 0, i32 4
+  br label %return
+return:
+  %retptr = phi i32* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store i32 %val, i32* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-doubleword3(%pre.struct.i64** %this, i1 %cond,
+                                           %pre.struct.i64* %load2,
+                                           i64 %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-doubleword3
+; CHECK: str x{{[0-9]+}}, [x{{[0-9]+}}, #24]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i64*, %pre.struct.i64** %this
+  %gep1 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load1, i64 0, i32 3
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i64, %pre.struct.i64* %load2, i64 0, i32 4
+  br label %return
+return:
+  %retptr = phi i64* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store i64 %val, i64* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-quadword3(%pre.struct.i128** %this, i1 %cond,
+                                         %pre.struct.i128* %load2,
+                                         <2 x i64> %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-quadword3
+; CHECK: str q{{[0-9]+}}, [x{{[0-9]+}}, #32]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.i128*, %pre.struct.i128** %this
+  %gep1 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.i128, %pre.struct.i128* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi <2 x i64>* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store <2 x i64> %val, <2 x i64>* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-float3(%pre.struct.float** %this, i1 %cond,
+                                      %pre.struct.float* %load2,
+                                      float %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-float3
+; CHECK: str s{{[0-9]+}}, [x{{[0-9]+}}, #8]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.float*, %pre.struct.float** %this
+  %gep1 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.float, %pre.struct.float* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi float* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store float %val, float* %retptr
+  ret void
+}
+
+define void @store-pre-indexed-double3(%pre.struct.double** %this, i1 %cond,
+                                      %pre.struct.double* %load2,
+                                      double %val) nounwind {
+; CHECK-LABEL: store-pre-indexed-double3
+; CHECK: str d{{[0-9]+}}, [x{{[0-9]+}}, #16]!
+  br i1 %cond, label %if.then, label %if.end
+if.then:
+  %load1 = load %pre.struct.double*, %pre.struct.double** %this
+  %gep1 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load1, i64 0, i32 2
+  br label %return
+if.end:
+  %gep2 = getelementptr inbounds %pre.struct.double, %pre.struct.double* %load2, i64 0, i32 3
+  br label %return
+return:
+  %retptr = phi double* [ %gep1, %if.then ], [ %gep2, %if.end ]
+  store double %val, double* %retptr
+  ret void
+}
+
 ; Check the following transform:
 ;
 ; ldr X, [x20]
@@ -385,6 +672,54 @@ return:
 ;
 ; with X being either w0, x0, s0, d0 or q0.
 
+define void @load-post-indexed-byte(i8* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-byte
+; CHECK: ldrb w{{[0-9]+}}, [x{{[0-9]+}}], #4
+entry:
+  %gep1 = getelementptr i8, i8* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i8, i8* %iv2, i64 -1
+  %load = load i8, i8* %gep2
+  call void @use-byte(i8 %load)
+  %load2 = load i8, i8* %iv2
+  call void @use-byte(i8 %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i8, i8* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @load-post-indexed-halfword(i16* %array, i64 %count) nounwind {
+; CHECK-LABEL: load-post-indexed-halfword
+; CHECK: ldrh w{{[0-9]+}}, [x{{[0-9]+}}], #8
+entry:
+  %gep1 = getelementptr i16, i16* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i16, i16* %iv2, i64 -1
+  %load = load i16, i16* %gep2
+  call void @use-halfword(i16 %load)
+  %load2 = load i16, i16* %iv2
+  call void @use-halfword(i16 %load2)
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i16, i16* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
 define void @load-post-indexed-word(i32* %array, i64 %count) nounwind {
 ; CHECK-LABEL: load-post-indexed-word
 ; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}], #16
@@ -515,6 +850,52 @@ exit:
 ;
 ; with X being either w0, x0, s0, d0 or q0.
 
+define void @store-post-indexed-byte(i8* %array, i64 %count, i8 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-byte
+; CHECK: strb w{{[0-9]+}}, [x{{[0-9]+}}], #4
+entry:
+  %gep1 = getelementptr i8, i8* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i8* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i8, i8* %iv2, i64 -1
+  %load = load i8, i8* %gep2
+  call void @use-byte(i8 %load)
+  store i8 %val, i8* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i8, i8* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
+define void @store-post-indexed-halfword(i16* %array, i64 %count, i16 %val) nounwind {
+; CHECK-LABEL: store-post-indexed-halfword
+; CHECK: strh w{{[0-9]+}}, [x{{[0-9]+}}], #8
+entry:
+  %gep1 = getelementptr i16, i16* %array, i64 2
+  br label %body
+
+body:
+  %iv2 = phi i16* [ %gep3, %body ], [ %gep1, %entry ]
+  %iv = phi i64 [ %iv.next, %body ], [ %count, %entry ]
+  %gep2 = getelementptr i16, i16* %iv2, i64 -1
+  %load = load i16, i16* %gep2
+  call void @use-halfword(i16 %load)
+  store i16 %val, i16* %iv2
+  %iv.next = add i64 %iv, -4
+  %gep3 = getelementptr i16, i16* %iv2, i64 4
+  %cond = icmp eq i64 %iv.next, 0
+  br i1 %cond, label %exit, label %body
+
+exit:
+  ret void
+}
+
 define void @store-post-indexed-word(i32* %array, i64 %count, i32 %val) nounwind {
 ; CHECK-LABEL: store-post-indexed-word
 ; CHECK: str w{{[0-9]+}}, [x{{[0-9]+}}], #16
@@ -630,12 +1011,98 @@ exit:
   ret void
 }
 
+declare void @use-byte(i8)
+declare void @use-halfword(i16)
 declare void @use-word(i32)
 declare void @use-doubleword(i64)
 declare void @use-quadword(<2 x i64>)
 declare void @use-float(float)
 declare void @use-double(double)
 
+; Check the following transform:
+;
+; stp w0, [x20]
+;  ...
+; add x20, x20, #32
+;  ->
+; stp w0, [x20], #32
+
+define void @store-pair-post-indexed-word() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-word
+; CHECK: stp w{{[0-9]+}}, w{{[0-9]+}}, [sp], #16
+; CHECK: ret
+  %src = alloca { i32, i32 }, align 8
+  %dst = alloca { i32, i32 }, align 8
+
+  %src.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 0
+  %src.real = load i32, i32* %src.realp
+  %src.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %src, i32 0, i32 1
+  %src.imag = load i32, i32* %src.imagp
+
+  %dst.realp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { i32, i32 }, { i32, i32 }* %dst, i32 0, i32 1
+  store i32 %src.real, i32* %dst.realp
+  store i32 %src.imag, i32* %dst.imagp
+  ret void
+}
+
+define void @store-pair-post-indexed-doubleword() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-doubleword
+; CHECK: stp x{{[0-9]+}}, x{{[0-9]+}}, [sp], #32
+; CHECK: ret
+  %src = alloca { i64, i64 }, align 8
+  %dst = alloca { i64, i64 }, align 8
+
+  %src.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 0
+  %src.real = load i64, i64* %src.realp
+  %src.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %src, i32 0, i32 1
+  %src.imag = load i64, i64* %src.imagp
+
+  %dst.realp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { i64, i64 }, { i64, i64 }* %dst, i32 0, i32 1
+  store i64 %src.real, i64* %dst.realp
+  store i64 %src.imag, i64* %dst.imagp
+  ret void
+}
+
+define void @store-pair-post-indexed-float() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-float
+; CHECK: stp s{{[0-9]+}}, s{{[0-9]+}}, [sp], #16
+; CHECK: ret
+  %src = alloca { float, float }, align 8
+  %dst = alloca { float, float }, align 8
+
+  %src.realp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 0
+  %src.real = load float, float* %src.realp
+  %src.imagp = getelementptr inbounds { float, float }, { float, float }* %src, i32 0, i32 1
+  %src.imag = load float, float* %src.imagp
+
+  %dst.realp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { float, float }, { float, float }* %dst, i32 0, i32 1
+  store float %src.real, float* %dst.realp
+  store float %src.imag, float* %dst.imagp
+  ret void
+}
+
+define void @store-pair-post-indexed-double() nounwind {
+; CHECK-LABEL: store-pair-post-indexed-double
+; CHECK: stp d{{[0-9]+}}, d{{[0-9]+}}, [sp], #32
+; CHECK: ret
+  %src = alloca { double, double }, align 8
+  %dst = alloca { double, double }, align 8
+
+  %src.realp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 0
+  %src.real = load double, double* %src.realp
+  %src.imagp = getelementptr inbounds { double, double }, { double, double }* %src, i32 0, i32 1
+  %src.imag = load double, double* %src.imagp
+
+  %dst.realp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 0
+  %dst.imagp = getelementptr inbounds { double, double }, { double, double }* %dst, i32 0, i32 1
+  store double %src.real, double* %dst.realp
+  store double %src.imag, double* %dst.imagp
+  ret void
+}
+
 ; Check the following transform:
 ;
 ; (ldr|str) X, [x20]
diff --git a/test/CodeGen/AArch64/merge-store.ll b/test/CodeGen/AArch64/merge-store.ll
index 18dbad4ce25b..86f5edd5da1d 100644
--- a/test/CodeGen/AArch64/merge-store.ll
+++ b/test/CodeGen/AArch64/merge-store.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -march aarch64 %s -o - | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown  -mcpu=cyclone  | FileCheck %s --check-prefix=CYCLONE
 
 @g0 = external global <3 x float>, align 16
 @g1 = external global <3 x float>, align 4
@@ -18,3 +19,32 @@ define void @blam() {
   store float %tmp9, float* %tmp7
   ret void;
 }
+
+
+; PR21711 - Merge vector stores into wider vector stores.
+
+; On Cyclone, the stores should not get merged into a 16-byte store because
+; unaligned 16-byte stores are slow. This test would infinite loop when
+; the fastness of unaligned accesses was not specified correctly.
+
+define void @merge_vec_extract_stores(<4 x float> %v1, <2 x float>* %ptr) {
+  %idx0 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 3
+  %idx1 = getelementptr inbounds <2 x float>, <2 x float>* %ptr, i64 4
+
+  %shuffle0 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %shuffle1 = shufflevector <4 x float> %v1, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+
+  store <2 x float> %shuffle0, <2 x float>* %idx0, align 8
+  store <2 x float> %shuffle1, <2 x float>* %idx1, align 8
+  ret void
+
+; CHECK-LABEL:    merge_vec_extract_stores
+; CHECK:          stur   q0, [x0, #24]
+; CHECK-NEXT:     ret
+
+; CYCLONE-LABEL:    merge_vec_extract_stores
+; CYCLONE:          ext   v1.16b, v0.16b, v0.16b, #8
+; CYCLONE-NEXT:     str   d0, [x0, #24]
+; CYCLONE-NEXT:     str   d1, [x0, #32]
+; CYCLONE-NEXT:     ret
+}
diff --git a/test/CodeGen/AArch64/misched-fusion.ll b/test/CodeGen/AArch64/misched-fusion.ll
new file mode 100644
index 000000000000..d38869329034
--- /dev/null
+++ b/test/CodeGen/AArch64/misched-fusion.ll
@@ -0,0 +1,34 @@
+; RUN: llc -o - %s -mcpu=cyclone | FileCheck %s
+target triple = "arm64-apple-ios"
+
+declare void @foobar(i32 %v0, i32 %v1)
+
+; Make sure sub is scheduled in front of cbnz
+; CHECK-LABEL: test_sub_cbz:
+; CHECK: add w[[ADDRES:[0-9]+]], w1, #7
+; CHECK: sub w[[SUBRES:[0-9]+]], w0, #13
+; CHECK-NEXT: cbnz w[[SUBRES]], [[SKIPBLOCK:LBB[0-9_]+]]
+; CHECK: mov x0, x[[ADDRES]]
+; CHECK: mov x1, x[[SUBRES]]
+; CHECK: bl _foobar
+; CHECK: [[SKIPBLOCK]]:
+; CHECK: mov x0, x[[SUBRES]]
+; CHECK: mov x1, x[[ADDRES]]
+; CHECK: bl _foobar
+define void @test_sub_cbz(i32 %a0, i32 %a1) {
+entry:
+  ; except for the fusion opportunity the sub/add should be equal so the
+  ; scheduler would leave them in source order if it weren't for the scheduling
+  %v0 = sub i32 %a0, 13
+  %cond = icmp eq i32 %v0, 0
+  %v1 = add i32 %a1, 7
+  br i1 %cond, label %if, label %exit
+
+if:
+  call void @foobar(i32 %v1, i32 %v0)
+  br label %exit
+
+exit:
+  call void @foobar(i32 %v0, i32 %v1)
+  ret void
+}
diff --git a/test/CodeGen/AArch64/mul-lohi.ll b/test/CodeGen/AArch64/mul-lohi.ll
index 4515697b9991..e93521858a31 100644
--- a/test/CodeGen/AArch64/mul-lohi.ll
+++ b/test/CodeGen/AArch64/mul-lohi.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -mtriple=arm64-apple-ios7.0 -mcpu=cyclone %s -o - | FileCheck %s
 ; RUN: llc -mtriple=aarch64_be-linux-gnu -mcpu=cyclone %s -o - | FileCheck --check-prefix=CHECK-BE %s
+
 define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
 ; CHECK-LABEL: test_128bitmul:
 ; CHECK-DAG: mul [[PART1:x[0-9]+]], x0, x3
@@ -16,3 +17,31 @@ define i128 @test_128bitmul(i128 %lhs, i128 %rhs) {
   %prod = mul i128 %lhs, %rhs
   ret i128 %prod
 }
+
+; The machine combiner should create madd instructions when
+; optimizing for size because that's smaller than mul + add.
+
+define i128 @test_128bitmul_optsize(i128 %lhs, i128 %rhs) optsize {
+; CHECK-LABEL: test_128bitmul_optsize:
+; CHECK:       umulh [[HI:x[0-9]+]], x0, x2
+; CHECK-NEXT:  madd  [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
+; CHECK-NEXT:  madd  x1, x1, x2, [[TEMP1]]
+; CHECK-NEXT:  mul   x0, x0, x2
+; CHECK-NEXT:  ret
+
+  %prod = mul i128 %lhs, %rhs
+  ret i128 %prod
+}
+
+define i128 @test_128bitmul_minsize(i128 %lhs, i128 %rhs) minsize {
+; CHECK-LABEL: test_128bitmul_minsize:
+; CHECK:       umulh [[HI:x[0-9]+]], x0, x2
+; CHECK-NEXT:  madd  [[TEMP1:x[0-9]+]], x0, x3, [[HI]]
+; CHECK-NEXT:  madd  x1, x1, x2, [[TEMP1]]
+; CHECK-NEXT:  mul   x0, x0, x2
+; CHECK-NEXT:  ret
+
+  %prod = mul i128 %lhs, %rhs
+  ret i128 %prod
+}
+
diff --git a/test/CodeGen/AArch64/nest-register.ll b/test/CodeGen/AArch64/nest-register.ll
index 9c659fb74ec4..cc42913e10a6 100644
--- a/test/CodeGen/AArch64/nest-register.ll
+++ b/test/CodeGen/AArch64/nest-register.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
+; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
 
 ; Tests that the 'nest' parameter attribute causes the relevant parameter to be
 ; passed in the right register.
diff --git a/test/CodeGen/AArch64/nontemporal.ll b/test/CodeGen/AArch64/nontemporal.ll
new file mode 100644
index 000000000000..db9779e03190
--- /dev/null
+++ b/test/CodeGen/AArch64/nontemporal.ll
@@ -0,0 +1,339 @@
+; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
+
+define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i64:
+; CHECK-NEXT:  mov d[[HI1:[0-9]+]], v1[1]
+; CHECK-NEXT:  mov d[[HI0:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d1, d[[HI1]], [x0, #16]
+; CHECK-NEXT:  stnp d0, d[[HI0]], [x0]
+; CHECK-NEXT:  ret
+  store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i32:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i16:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v16i8:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 {
+; CHECK-LABEL: test_stnp_v2i32:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 {
+; CHECK-LABEL: test_stnp_v4i16:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 {
+; CHECK-LABEL: test_stnp_v8i8:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v1f64:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
+; CHECK-LABEL: test_stnp_v1i64:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_i64(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64:
+; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  stnp w1, w[[HI]], [x0]
+; CHECK-NEXT:  ret
+  store i64 %v, i64* %p, align 1, !nontemporal !0
+  ret void
+}
+
+
+define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #16]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
+  store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f64_offset_neg:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-16]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
+  store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #8]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
+  store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-8]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
+  store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset:
+; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #8]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i64, i64* %p, i32 1
+  store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
+; CHECK-LABEL: test_stnp_i64_offset_neg:
+; CHECK-NEXT:  ubfx x[[HI:[0-9]+]], x1, #0, #32
+; CHECK-NEXT:  stnp w1, w[[HI]], [x0, #-8]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i64, i64* %p, i32 -1
+  store i64 %v, i64* %tmp0, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 4
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #4
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -4
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #512
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 512
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_504:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #504]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 504
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #508
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 508
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #520
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -520
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
+; CHECK-NEXT:  mov d[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp d0, d[[HI]], [x0, #-512]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -512
+  %tmp1 = bitcast i8* %tmp0 to <4 x float>*
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+
+define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
+; CHECK-NEXT:  add x[[PTR:[0-9]+]], x0, #256
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 256
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_252:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #252]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 252
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
+; CHECK-NEXT:  sub x[[PTR:[0-9]+]], x0, #260
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x[[PTR]]]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -260
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
+; CHECK-NEXT:  mov s[[HI:[0-9]+]], v0[1]
+; CHECK-NEXT:  stnp s0, s[[HI]], [x0, #-256]
+; CHECK-NEXT:  ret
+  %tmp0 = getelementptr i8, i8* %p, i32 -256
+  %tmp1 = bitcast i8* %tmp0 to <2 x float>*
+  store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
+  ret void
+}
+
+declare void @dummy(<4 x float>*)
+
+define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
+; CHECK:       stnp d0, d{{.*}}, [sp]
+; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  bl _dummy
+  %tmp0 = alloca <4 x float>
+  store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0
+  call void @dummy(<4 x float>* %tmp0)
+  ret void
+}
+
+define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
+; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
+; CHECK:       stnp d0, d{{.*}}, [sp, #16]
+; CHECK-NEXT:  mov x0, sp
+; CHECK-NEXT:  bl _dummy
+  %tmp0 = alloca <4 x float>, i32 2
+  %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1
+  store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
+  call void @dummy(<4 x float>* %tmp0)
+  ret void
+}
+
+!0 = !{ i32 1 }
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/pic-eh-stubs.ll b/test/CodeGen/AArch64/pic-eh-stubs.ll
index 143558f7b2c7..c59a5b6743d6 100644
--- a/test/CodeGen/AArch64/pic-eh-stubs.ll
+++ b/test/CodeGen/AArch64/pic-eh-stubs.ll
@@ -15,7 +15,7 @@
 ; CHECK-NEXT: .xword  .L_ZTIi.DW.stub-[[TYPEINFO_LBL]]
 
   ; .. and which is properly defined (in a writable section for the dynamic loader) later.
-; CHECK: .section .data.rel,"aw"
+; CHECK: .data
 ; CHECK: .L_ZTIi.DW.stub:
 ; CHECK-NEXT: .xword _ZTIi
 
diff --git a/test/CodeGen/AArch64/readcyclecounter.ll b/test/CodeGen/AArch64/readcyclecounter.ll
new file mode 100644
index 000000000000..037f11809386
--- /dev/null
+++ b/test/CodeGen/AArch64/readcyclecounter.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=aarch64-unknown-unknown -asm-verbose=false < %s |\
+; RUN:   FileCheck %s --check-prefix=CHECK --check-prefix=PERFMON
+; RUN: llc -mtriple=aarch64-unknown-unknown -mattr=-perfmon -asm-verbose=false < %s |\
+; RUN:   FileCheck %s --check-prefix=CHECK --check-prefix=NOPERFMON
+
+define i64 @test_readcyclecounter() nounwind {
+  ; CHECK-LABEL:   test_readcyclecounter:
+  ; PERFMON-NEXT:   mrs x0, PMCCNTR_EL0
+  ; NOPERFMON-NEXT: mov x0, xzr
+  ; CHECK-NEXT:     ret
+  %tmp0 = call i64 @llvm.readcyclecounter()
+  ret i64 %tmp0
+}
+
+declare i64 @llvm.readcyclecounter()
diff --git a/test/CodeGen/AArch64/regress-tblgen-chains.ll b/test/CodeGen/AArch64/regress-tblgen-chains.ll
index 0d301bbd502a..ba34873eaa5b 100644
--- a/test/CodeGen/AArch64/regress-tblgen-chains.ll
+++ b/test/CodeGen/AArch64/regress-tblgen-chains.ll
@@ -27,8 +27,8 @@ define i64 @test_chains() {
 
 ; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR:#-?[0-9]+]]]
 ; CHECK: add {{w[0-9]+}}, {{w[0-9]+}}, #1
-; CHECK: sturb {{w[0-9]+}}, [x29, [[LOCADDR]]]
-; CHECK: ldurb {{w[0-9]+}}, [x29, [[LOCADDR]]]
+; CHECK: sturb w[[STRVAL:[0-9]+]], [x29, [[LOCADDR]]]
+; CHECK; and w0, w[[STRVAL]], #0xff
 
   %ret.1 = load i8, i8* %locvar
   %ret.2 = zext i8 %ret.1 to i64
diff --git a/test/CodeGen/AArch64/remat.ll b/test/CodeGen/AArch64/remat.ll
index 8b3e6dd5ad92..a397c339a2d7 100644
--- a/test/CodeGen/AArch64/remat.ll
+++ b/test/CodeGen/AArch64/remat.ll
@@ -1,3 +1,4 @@
+; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a35 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a57 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a53 -o - %s | FileCheck %s
 ; RUN: llc -mtriple=aarch64-linux-gnuabi -mcpu=cortex-a72 -o - %s | FileCheck %s
diff --git a/test/CodeGen/AArch64/rotate.ll b/test/CodeGen/AArch64/rotate.ll
new file mode 100644
index 000000000000..5ac86d5f59c9
--- /dev/null
+++ b/test/CodeGen/AArch64/rotate.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=aarch64--linux-gnueabihf | FileCheck %s
+
+;; This used to cause a backend crash about not being able to
+;; select ROTL. Make sure if generates the basic ushr/shl.
+define <2 x i64> @testcase(<2 x i64>* %in) {
+; CHECK-LABEL: testcase
+; CHECK: ushr {{v[0-9]+}}.2d
+; CHECK: shl  {{v[0-9]+}}.2d
+  %1 = load <2 x i64>, <2 x i64>* %in
+  %2 = lshr <2 x i64> %1, <i64 8, i64 8>
+  %3 = shl <2 x i64> %1, <i64 56, i64 56>
+  %4 = or <2 x i64> %2, %3
+  ret <2 x i64> %4
+}
diff --git a/test/CodeGen/AArch64/round-conv.ll b/test/CodeGen/AArch64/round-conv.ll
new file mode 100644
index 000000000000..5ed7d9409e3d
--- /dev/null
+++ b/test/CodeGen/AArch64/round-conv.ll
@@ -0,0 +1,330 @@
+; RUN: llc < %s -mtriple=arm64 | FileCheck %s
+
+; CHECK-LABEL: testmsws:
+; CHECK: fcvtms w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testmsws(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxs:
+; CHECK: fcvtms x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testmsxs(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptosi float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testmswd:
+; CHECK: fcvtms w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testmswd(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmsxd:
+; CHECK: fcvtms x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testmsxd(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptosi double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testmuws:
+; CHECK: fcvtmu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testmuws(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmuxs:
+; CHECK: fcvtmu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testmuxs(float %a) {
+entry:
+  %call = call float @floorf(float %a) nounwind readnone
+  %conv = fptoui float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testmuwd:
+; CHECK: fcvtmu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testmuwd(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testmuxd:
+; CHECK: fcvtmu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testmuxd(double %a) {
+entry:
+  %call = call double @floor(double %a) nounwind readnone
+  %conv = fptoui double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testpsws:
+; CHECK: fcvtps w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testpsws(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testpsxs:
+; CHECK: fcvtps x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testpsxs(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptosi float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testpswd:
+; CHECK: fcvtps w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testpswd(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testpsxd:
+; CHECK: fcvtps x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testpsxd(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptosi double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testpuws:
+; CHECK: fcvtpu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testpuws(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testpuxs:
+; CHECK: fcvtpu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testpuxs(float %a) {
+entry:
+  %call = call float @ceilf(float %a) nounwind readnone
+  %conv = fptoui float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testpuwd:
+; CHECK: fcvtpu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testpuwd(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testpuxd:
+; CHECK: fcvtpu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testpuxd(double %a) {
+entry:
+  %call = call double @ceil(double %a) nounwind readnone
+  %conv = fptoui double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testzsws:
+; CHECK: fcvtzs w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testzsws(float %a) {
+entry:
+  %call = call float @truncf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testzsxs:
+; CHECK: fcvtzs x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testzsxs(float %a) {
+entry:
+  %call = call float @truncf(float %a) nounwind readnone
+  %conv = fptosi float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testzswd:
+; CHECK: fcvtzs w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testzswd(double %a) {
+entry:
+  %call = call double @trunc(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testzsxd:
+; CHECK: fcvtzs x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testzsxd(double %a) {
+entry:
+  %call = call double @trunc(double %a) nounwind readnone
+  %conv = fptosi double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testzuws:
+; CHECK: fcvtzu w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testzuws(float %a) {
+entry:
+  %call = call float @truncf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testzuxs:
+; CHECK: fcvtzu x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testzuxs(float %a) {
+entry:
+  %call = call float @truncf(float %a) nounwind readnone
+  %conv = fptoui float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testzuwd:
+; CHECK: fcvtzu w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testzuwd(double %a) {
+entry:
+  %call = call double @trunc(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testzuxd:
+; CHECK: fcvtzu x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testzuxd(double %a) {
+entry:
+  %call = call double @trunc(double %a) nounwind readnone
+  %conv = fptoui double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testasws:
+; CHECK: fcvtas w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testasws(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptosi float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testasxs:
+; CHECK: fcvtas x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testasxs(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptosi float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testaswd:
+; CHECK: fcvtas w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testaswd(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptosi double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testasxd:
+; CHECK: fcvtas x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testasxd(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptosi double %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testauws:
+; CHECK: fcvtau w0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i32 @testauws(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptoui float %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testauxs:
+; CHECK: fcvtau x0, s0
+; CHECK-NOT: frintx {{s[0-9]+}}, s0
+define i64 @testauxs(float %a) {
+entry:
+  %call = call float @roundf(float %a) nounwind readnone
+  %conv = fptoui float %call to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: testauwd:
+; CHECK: fcvtau w0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i32 @testauwd(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptoui double %call to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: testauxd:
+; CHECK: fcvtau x0, d0
+; CHECK-NOT: frintx {{d[0-9]+}}, d0
+define i64 @testauxd(double %a) {
+entry:
+  %call = call double @round(double %a) nounwind readnone
+  %conv = fptoui double %call to i64
+  ret i64 %conv
+}
+
+declare float @floorf(float) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare float @ceilf(float) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare float @truncf(float) nounwind readnone
+declare double @trunc(double) nounwind readnone
+declare float @roundf(float) nounwind readnone
+declare double @round(double) nounwind readnone
diff --git a/test/CodeGen/AArch64/shrink-wrap.ll b/test/CodeGen/AArch64/shrink-wrap.ll
new file mode 100755
index 000000000000..ea101a8da15d
--- /dev/null
+++ b/test/CodeGen/AArch64/shrink-wrap.ll
@@ -0,0 +1,184 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -o - %s
+
+; Regression test for a crash in the ShrinkWrap pass not handling targets
+; requiring a register scavenger.
+
+%type1 = type { i32, i32, i32 }
+
+@g1 = external unnamed_addr global i32, align 4
+@g2 = external unnamed_addr global i1
+@g3 = external unnamed_addr global [144 x i32], align 4
+@g4 = external unnamed_addr constant [144 x i32], align 4
+@g5 = external unnamed_addr constant [144 x i32], align 4
+@g6 = external unnamed_addr constant [144 x i32], align 4
+@g7 = external unnamed_addr constant [144 x i32], align 4
+@g8 = external unnamed_addr constant [144 x i32], align 4
+@g9 = external unnamed_addr constant [144 x i32], align 4
+@g10 = external unnamed_addr constant [144 x i32], align 4
+@g11 = external unnamed_addr global i32, align 4
+@g12 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g13 = external unnamed_addr global %type1*, align 8
+@g14 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g15 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g16 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g17 = external unnamed_addr global [62 x i32], align 4
+@g18 = external unnamed_addr global i32, align 4
+@g19 = external unnamed_addr constant [144 x i32], align 4
+@g20 = external unnamed_addr global [144 x [144 x i8]], align 1
+@g21 = external unnamed_addr global i32, align 4
+
+declare fastcc i32 @foo()
+
+declare fastcc i32 @bar()
+
+define internal fastcc i32 @func(i32 %alpha, i32 %beta) {
+entry:
+  %v1 = alloca [2 x [11 x i32]], align 4
+  %v2 = alloca [11 x i32], align 16
+  %v3 = alloca [11 x i32], align 16
+  switch i32 undef, label %if.end.9 [
+    i32 4, label %if.then.6
+    i32 3, label %if.then.2
+  ]
+
+if.then.2:
+  %call3 = tail call fastcc i32 @bar()
+  br label %cleanup
+
+if.then.6:
+  %call7 = tail call fastcc i32 @foo()
+  unreachable
+
+if.end.9:
+  %tmp = load i32, i32* @g1, align 4
+  %rem.i = urem i32 %tmp, 1000000
+  %idxprom.1.i = zext i32 %rem.i to i64
+  %tmp1 = load %type1*, %type1** @g13, align 8
+  %v4 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 0
+  %.b = load i1, i1* @g2, align 1
+  %v5 = select i1 %.b, i32 2, i32 0
+  %tmp2 = load i32, i32* @g18, align 4
+  %tmp3 = load i32, i32* @g11, align 4
+  %idxprom58 = sext i32 %tmp3 to i64
+  %tmp4 = load i32, i32* @g21, align 4
+  %idxprom69 = sext i32 %tmp4 to i64
+  br label %for.body
+
+for.body:
+  %v6 = phi i32 [ 0, %if.end.9 ], [ %v7, %for.inc ]
+  %a.0983 = phi i32 [ 1, %if.end.9 ], [ %a.1, %for.inc ]
+  %arrayidx = getelementptr inbounds [62 x i32], [62 x i32]* @g17, i64 0, i64 undef
+  %tmp5 = load i32, i32* %arrayidx, align 4
+  br i1 undef, label %for.inc, label %if.else.51
+
+if.else.51:
+  %idxprom53 = sext i32 %tmp5 to i64
+  %arrayidx54 = getelementptr inbounds [144 x i32], [144 x i32]* @g3, i64 0, i64 %idxprom53
+  %tmp6 = load i32, i32* %arrayidx54, align 4
+  switch i32 %tmp6, label %for.inc [
+    i32 1, label %block.bb
+    i32 10, label %block.bb.159
+    i32 7, label %block.bb.75
+    i32 8, label %block.bb.87
+    i32 9, label %block.bb.147
+    i32 12, label %block.bb.111
+    i32 3, label %block.bb.123
+    i32 4, label %block.bb.135
+  ]
+
+block.bb:
+  %arrayidx56 = getelementptr inbounds [144 x i32], [144 x i32]* @g6, i64 0, i64 %idxprom53
+  %tmp7 = load i32, i32* %arrayidx56, align 4
+  %shr = ashr i32 %tmp7, %v5
+  %add57 = add nsw i32 %shr, 0
+  %arrayidx61 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g14, i64 0, i64 %idxprom53, i64 %idxprom58
+  %tmp8 = load i8, i8* %arrayidx61, align 1
+  %conv = zext i8 %tmp8 to i32
+  %add62 = add nsw i32 %conv, %add57
+  br label %for.inc
+
+block.bb.75:
+  %arrayidx78 = getelementptr inbounds [144 x i32], [144 x i32]* @g10, i64 0, i64 %idxprom53
+  %tmp9 = load i32, i32* %arrayidx78, align 4
+  %shr79 = ashr i32 %tmp9, %v5
+  %add80 = add nsw i32 %shr79, 0
+  %add86 = add nsw i32 0, %add80
+  br label %for.inc
+
+block.bb.87:
+  %arrayidx90 = getelementptr inbounds [144 x i32], [144 x i32]* @g9, i64 0, i64 %idxprom53
+  %tmp10 = load i32, i32* %arrayidx90, align 4
+  %shr91 = ashr i32 %tmp10, 0
+  %sub92 = sub nsw i32 0, %shr91
+  %arrayidx96 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g15, i64 0, i64 %idxprom53, i64 %idxprom69
+  %tmp11 = load i8, i8* %arrayidx96, align 1
+  %conv97 = zext i8 %tmp11 to i32
+  %sub98 = sub nsw i32 %sub92, %conv97
+  br label %for.inc
+
+block.bb.111:
+  %arrayidx114 = getelementptr inbounds [144 x i32], [144 x i32]* @g19, i64 0, i64 %idxprom53
+  %tmp12 = load i32, i32* %arrayidx114, align 4
+  %shr115 = ashr i32 %tmp12, 0
+  %sub116 = sub nsw i32 0, %shr115
+  %arrayidx120 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g12, i64 0, i64 %idxprom53, i64 %idxprom69
+  %tmp13 = load i8, i8* %arrayidx120, align 1
+  %conv121 = zext i8 %tmp13 to i32
+  %sub122 = sub nsw i32 %sub116, %conv121
+  br label %for.inc
+
+block.bb.123:
+  %arrayidx126 = getelementptr inbounds [144 x i32], [144 x i32]* @g5, i64 0, i64 %idxprom53
+  %tmp14 = load i32, i32* %arrayidx126, align 4
+  %shr127 = ashr i32 %tmp14, %v5
+  %add128 = add nsw i32 %shr127, 0
+  %add134 = add nsw i32 0, %add128
+  br label %for.inc
+
+block.bb.135:
+  %arrayidx138 = getelementptr inbounds [144 x i32], [144 x i32]* @g4, i64 0, i64 %idxprom53
+  %tmp15 = load i32, i32* %arrayidx138, align 4
+  %shr139 = ashr i32 %tmp15, 0
+  %sub140 = sub nsw i32 0, %shr139
+  %arrayidx144 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g20, i64 0, i64 %idxprom53, i64 %idxprom69
+  %tmp16 = load i8, i8* %arrayidx144, align 1
+  %conv145 = zext i8 %tmp16 to i32
+  %sub146 = sub nsw i32 %sub140, %conv145
+  br label %for.inc
+
+block.bb.147:
+  %arrayidx150 = getelementptr inbounds [144 x i32], [144 x i32]* @g8, i64 0, i64 %idxprom53
+  %tmp17 = load i32, i32* %arrayidx150, align 4
+  %shr151 = ashr i32 %tmp17, %v5
+  %add152 = add nsw i32 %shr151, 0
+  %arrayidx156 = getelementptr inbounds [144 x [144 x i8]], [144 x [144 x i8]]* @g16, i64 0, i64 %idxprom53, i64 %idxprom58
+  %tmp18 = load i8, i8* %arrayidx156, align 1
+  %conv157 = zext i8 %tmp18 to i32
+  %add158 = add nsw i32 %conv157, %add152
+  br label %for.inc
+
+block.bb.159:
+  %sub160 = add nsw i32 %v6, -450
+  %arrayidx162 = getelementptr inbounds [144 x i32], [144 x i32]* @g7, i64 0, i64 %idxprom53
+  %tmp19 = load i32, i32* %arrayidx162, align 4
+  %shr163 = ashr i32 %tmp19, 0
+  %sub164 = sub nsw i32 %sub160, %shr163
+  %sub170 = sub nsw i32 %sub164, 0
+  br label %for.inc
+
+for.inc:
+  %v7 = phi i32 [ %v6, %for.body ], [ %v6, %if.else.51 ], [ %sub170, %block.bb.159 ], [ %add158, %block.bb.147 ], [ %sub146, %block.bb.135 ], [ %add134, %block.bb.123 ], [ %sub122, %block.bb.111 ], [ %sub98, %block.bb.87 ], [ %add86, %block.bb.75 ], [ %add62, %block.bb ]
+  %a.1 = phi i32 [ %a.0983, %for.body ], [ undef, %if.else.51 ], [ undef, %block.bb.159 ], [ undef, %block.bb.147 ], [ undef, %block.bb.135 ], [ undef, %block.bb.123 ], [ undef, %block.bb.111 ], [ undef, %block.bb.87 ], [ undef, %block.bb.75 ], [ undef, %block.bb ]
+  %cmp48 = icmp sgt i32 %a.1, %tmp2
+  br i1 %cmp48, label %for.end, label %for.body
+
+for.end:
+  store i32 %tmp, i32* %v4, align 4
+  %hold_hash.i.7 = getelementptr inbounds %type1, %type1* %tmp1, i64 %idxprom.1.i, i32 1
+  store i32 0, i32* %hold_hash.i.7, align 4
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i32 [ %call3, %if.then.2 ], [ undef, %for.end ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/AArch64/stackmap-frame-setup.ll b/test/CodeGen/AArch64/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..4712012b0d25
--- /dev/null
+++ b/test/CodeGen/AArch64/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+; ISEL:      ADJCALLSTACKDOWN 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL:      ADJCALLSTACKDOWN 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/AArch64/tail-call.ll b/test/CodeGen/AArch64/tail-call.ll
index e5766154bb46..fa5d8b943b6b 100644
--- a/test/CodeGen/AArch64/tail-call.ll
+++ b/test/CodeGen/AArch64/tail-call.ll
@@ -59,8 +59,7 @@ define fastcc void @caller_to16_from8([8 x i32], i64 %a) {
 ; callee will not deallocate the space, even in fastcc.
   tail call fastcc void @callee_stack16([8 x i32] undef, i64 42, i64 2)
 
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack16
   ret void
 }
@@ -89,8 +88,7 @@ define fastcc void @caller_to16_from16([8 x i32], i64 %a, i64 %b) {
   ret void
 
 ; CHECK: ldp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK: stp {{x[0-9]+}}, {{x[0-9]+}}, [sp, #16]!
 ; CHECK-NEXT: b callee_stack16
 }
 
diff --git a/test/CodeGen/AArch64/tailcall-explicit-sret.ll b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
index 4d80f2ac5c12..bcc8af8d0690 100644
--- a/test/CodeGen/AArch64/tailcall-explicit-sret.ll
+++ b/test/CodeGen/AArch64/tailcall-explicit-sret.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple arm64-apple-darwin -aarch64-load-store-opt=false -asm-verbose=false -disable-post-ra | FileCheck %s
 ; Disable the load/store optimizer to avoid having LDP/STPs and simplify checks.
 
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
diff --git a/test/CodeGen/AArch64/tbi.ll b/test/CodeGen/AArch64/tbi.ll
new file mode 100644
index 000000000000..ab2d31b7cacc
--- /dev/null
+++ b/test/CodeGen/AArch64/tbi.ll
@@ -0,0 +1,102 @@
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios8.0.0 < %s \
+; RUN:     | FileCheck --check-prefix=TBI    --check-prefix=BOTH %s
+; RUN: llc -aarch64-use-tbi -mtriple=arm64-apple-ios7.1.0 < %s \
+; RUN:     | FileCheck --check-prefix=NO_TBI --check-prefix=BOTH %s
+
+; BOTH-LABEL:ld_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r & MASK) + 4
+; BOTH-LABEL:ld_and_plus_offset:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and_plus_offset(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %gep = getelementptr i32, i32* %cast, i64 4
+  %load = load i32, i32* %gep
+  ret i32 %load
+}
+
+; load (r & WIDER_MASK)
+; BOTH-LABEL:ld_and32_wider:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_and32_wider(i64 %p) {
+  %and = and i64 %p, 1152921504606846975
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; BOTH-LABEL:ld_and64:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i64 @ld_and64(i64 %p) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i64*
+  %load = load i64, i64* %cast
+  ret i64 %load
+}
+
+; BOTH-LABEL:st_and32:
+; TBI-NOT: and x
+; NO_TBI: and x
+define void @st_and32(i64 %p, i32 %v) {
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  store i32 %v, i32* %cast
+  ret void
+}
+
+; load (x1 + x2) & MASK
+; BOTH-LABEL:ld_ro:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro(i64 %a, i64 %b) {
+  %p = add i64 %a, %b
+  %and = and i64 %p, 72057594037927935
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r1 & MASK) + r2
+; BOTH-LABEL:ld_ro2:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_ro2(i64 %a, i64 %b) {
+  %and = and i64 %a, 72057594037927935
+  %p = add i64 %and, %b
+  %cast = inttoptr i64 %p to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; load (r1 & MASK) | r2
+; BOTH-LABEL:ld_indirect_and:
+; TBI-NOT: and x
+; NO_TBI: and x
+define i32 @ld_indirect_and(i64 %r1, i64 %r2) {
+  %and = and i64 %r1, 72057594037927935
+  %p = or i64 %and, %r2
+  %cast = inttoptr i64 %p to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
+
+; BOTH-LABEL:ld_and32_narrower:
+; BOTH: and x
+define i32 @ld_and32_narrower(i64 %p) {
+  %and = and i64 %p, 36028797018963967
+  %cast = inttoptr i64 %and to i32*
+  %load = load i32, i32* %cast
+  ret i32 %load
+}
diff --git a/test/CodeGen/AArch64/vector-fcopysign.ll b/test/CodeGen/AArch64/vector-fcopysign.ll
new file mode 100644
index 000000000000..865a0a5b8580
--- /dev/null
+++ b/test/CodeGen/AArch64/vector-fcopysign.ll
@@ -0,0 +1,178 @@
+; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+;============ v1f32
+
+; WidenVecRes same
+define <1 x float> @test_copysign_v1f32_v1f32(<1 x float> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f32:
+; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.8b v0, v1, v2
+; CHECK-NEXT:    ret
+  %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b)
+  ret <1 x float> %r
+}
+
+; WidenVecRes mismatched
+define <1 x float> @test_copysign_v1f32_v1f64(<1 x float> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f32_v1f64:
+; CHECK-NEXT:    fcvt s1, d1
+; CHECK-NEXT:    movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %tmp0 = fptrunc <1 x double> %b to <1 x float>
+  %r = call <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %tmp0)
+  ret <1 x float> %r
+}
+
+declare <1 x float> @llvm.copysign.v1f32(<1 x float> %a, <1 x float> %b) #0
+
+;============ v1f64
+
+; WidenVecOp #1
+define <1 x double> @test_copysign_v1f64_v1f32(<1 x double> %a, <1 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f32:
+; CHECK-NEXT:    fcvt d1, s1
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fneg.2d v2, v2
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %tmp0 = fpext <1 x float> %b to <1 x double>
+  %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %tmp0)
+  ret <1 x double> %r
+}
+
+define <1 x double> @test_copysign_v1f64_v1f64(<1 x double> %a, <1 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v1f64_v1f64:
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fneg.2d v2, v2
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %r = call <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b)
+  ret <1 x double> %r
+}
+
+declare <1 x double> @llvm.copysign.v1f64(<1 x double> %a, <1 x double> %b) #0
+
+;============ v2f32
+
+define <2 x float> @test_copysign_v2f32_v2f32(<2 x float> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f32:
+; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.8b v0, v1, v2
+; CHECK-NEXT:    ret
+  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b)
+  ret <2 x float> %r
+}
+
+define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f32_v2f64:
+; CHECK-NEXT:    fcvtn v1.2s, v1.2d
+; CHECK-NEXT:    movi.2s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.8b v0, v1, v2
+; CHECK-NEXT:    ret
+  %tmp0 = fptrunc <2 x double> %b to <2 x float>
+  %r = call <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %tmp0)
+  ret <2 x float> %r
+}
+
+declare <2 x float> @llvm.copysign.v2f32(<2 x float> %a, <2 x float> %b) #0
+
+;============ v4f32
+
+define <4 x float> @test_copysign_v4f32_v4f32(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f32:
+; CHECK-NEXT:    movi.4s v2, #0x80, lsl #24
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b)
+  ret <4 x float> %r
+}
+
+; SplitVecOp #1
+define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f32_v4f64:
+; CHECK-NEXT:    mov s3, v0[1]
+; CHECK-NEXT:    mov d4, v1[1]
+; CHECK-NEXT:    movi.4s v5, #0x80, lsl #24
+; CHECK-NEXT:    fcvt s1, d1
+; CHECK-NEXT:    mov s6, v0[2]
+; CHECK-NEXT:    mov s7, v0[3]
+; CHECK-NEXT:    fcvt s16, d2
+; CHECK-NEXT:    bit.16b v0, v1, v5
+; CHECK-NEXT:    bit.16b v6, v16, v5
+; CHECK-NEXT:    fcvt s1, d4
+; CHECK-NEXT:    bit.16b v3, v1, v5
+; CHECK-NEXT:    mov d1, v2[1]
+; CHECK-NEXT:    fcvt s1, d1
+; CHECK-NEXT:    ins.s v0[1], v3[0]
+; CHECK-NEXT:    ins.s v0[2], v6[0]
+; CHECK-NEXT:    bit.16b v7, v1, v5
+; CHECK-NEXT:    ins.s v0[3], v7[0]
+; CHECK-NEXT:    ret
+  %tmp0 = fptrunc <4 x double> %b to <4 x float>
+  %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)
+  ret <4 x float> %r
+}
+
+declare <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %b) #0
+
+;============ v2f64
+
+define <2 x double> @test_copysign_v2f64_v232(<2 x double> %a, <2 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v232:
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fneg.2d v2, v2
+; CHECK-NEXT:    fcvtl v1.2d, v1.2s
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %tmp0 = fpext <2 x float> %b to <2 x double>
+  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %tmp0)
+  ret <2 x double> %r
+}
+
+define <2 x double> @test_copysign_v2f64_v2f64(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v2f64_v2f64:
+; CHECK-NEXT:    movi.2d v2, #0000000000000000
+; CHECK-NEXT:    fneg.2d v2, v2
+; CHECK-NEXT:    bit.16b v0, v1, v2
+; CHECK-NEXT:    ret
+  %r = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b)
+  ret <2 x double> %r
+}
+
+declare <2 x double> @llvm.copysign.v2f64(<2 x double> %a, <2 x double> %b) #0
+
+;============ v4f64
+
+; SplitVecRes mismatched
+define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f32:
+; CHECK-NEXT:    movi.2d v3, #0000000000000000
+; CHECK-NEXT:    fcvtl2 v4.2d, v2.4s
+; CHECK-NEXT:    fcvtl v2.2d, v2.2s
+; CHECK-NEXT:    fneg.2d v3, v3
+; CHECK-NEXT:    bit.16b v1, v4, v3
+; CHECK-NEXT:    bit.16b v0, v2, v3
+; CHECK-NEXT:    ret
+  %tmp0 = fpext <4 x float> %b to <4 x double>
+  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0)
+  ret <4 x double> %r
+}
+
+; SplitVecRes same
+define <4 x double> @test_copysign_v4f64_v4f64(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_copysign_v4f64_v4f64:
+; CHECK-NEXT:    movi.2d v4, #0000000000000000
+; CHECK-NEXT:    fneg.2d v4, v4
+; CHECK-NEXT:    bit.16b v0, v2, v4
+; CHECK-NEXT:    bit.16b v1, v3, v4
+; CHECK-NEXT:    ret
+  %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b)
+  ret <4 x double> %r
+}
+
+declare <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %b) #0
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AArch64/xbfiz.ll b/test/CodeGen/AArch64/xbfiz.ll
index f763400d7f6a..3211cc3f2ced 100644
--- a/test/CodeGen/AArch64/xbfiz.ll
+++ b/test/CodeGen/AArch64/xbfiz.ll
@@ -31,3 +31,33 @@ define i32 @ubfiz32(i32 %v) {
   %shr = lshr i32 %shl, 2
   ret i32 %shr
 }
+
+define i64 @ubfiz64and(i64 %v) {
+; CHECK-LABEL: ubfiz64and:
+; CHECK: ubfiz	x0, x0, #36, #11
+  %shl = shl i64 %v, 36
+  %and = and i64 %shl, 140668768878592
+  ret i64 %and
+}
+
+define i32 @ubfiz32and(i32 %v) {
+; CHECK-LABEL: ubfiz32and:
+; CHECK: ubfiz	w0, w0, #6, #24
+  %shl = shl i32 %v, 6
+  %and = and i32 %shl, 1073741760
+  ret i32 %and
+}
+
+; Check that we don't generate a ubfiz if the lsl has more than one
+; use, since we'd just be replacing an and with a ubfiz.
+define i32 @noubfiz32(i32 %v) {
+; CHECK-LABEL: noubfiz32:
+; CHECK: lsl	w[[REG1:[0-9]+]], w0, #6
+; CHECK: and	w[[REG2:[0-9]+]], w[[REG1]], #0x3fffffc0
+; CHECK: add	w0, w[[REG1]], w[[REG2]]
+; CHECK: ret
+  %shl = shl i32 %v, 6
+  %and = and i32 %shl, 1073741760
+  %add = add i32 %shl, %and
+  ret i32 %add
+}
diff --git a/test/CodeGen/AMDGPU/add.ll b/test/CodeGen/AMDGPU/add.ll
index 655e75dbc1a4..2ddfa9649ac9 100644
--- a/test/CodeGen/AMDGPU/add.ll
+++ b/test/CodeGen/AMDGPU/add.ll
@@ -5,7 +5,7 @@
 ;FUNC-LABEL: {{^}}test1:
 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}}
 ;SI-NOT: [[REG]]
 ;SI: buffer_store_dword [[REG]],
 define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
@@ -21,8 +21,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
 
 define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -39,10 +39,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
 
 define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/AMDGPU/address-space.ll b/test/CodeGen/AMDGPU/address-space.ll
index 4be8c5847529..3aa2f653bf9c 100644
--- a/test/CodeGen/AMDGPU/address-space.ll
+++ b/test/CodeGen/AMDGPU/address-space.ll
@@ -5,15 +5,11 @@
 
 %struct.foo = type { [3 x float], [3 x float] }
 
-; FIXME: Extra V_MOV from SGPR to VGPR for second read. The address is
-; already in a VGPR after the first read.
-
 ; CHECK-LABEL: {{^}}do_as_ptr_calcs:
 ; CHECK: s_load_dword [[SREG1:s[0-9]+]],
-; CHECK: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG1]]
 ; CHECK: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]]
 ; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:12
-; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG2]] offset:20
+; CHECK-DAG: ds_read_b32 v{{[0-9]+}}, [[VREG1]] offset:20
 define void @do_as_ptr_calcs(%struct.foo addrspace(3)* nocapture %ptr) nounwind {
 entry:
   %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0
diff --git a/test/CodeGen/AMDGPU/addrspacecast.ll b/test/CodeGen/AMDGPU/addrspacecast.ll
new file mode 100644
index 000000000000..61bcd4b3c093
--- /dev/null
+++ b/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -0,0 +1,66 @@
+; RUN: not llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s
+
+; ERROR: unsupported addrspacecast not implemented
+
+; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=bonaire -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-NO-PROMOTE %s
+; XUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=+promote-alloca < %s | FileCheck -check-prefix=CHECK -check-prefix=CHECK-PROMOTE %s
+
+; Disable optimizations in case there are optimizations added that
+; specialize away generic pointer accesses.
+
+; CHECK-LABEL: {{^}}branch_use_flat_i32:
+; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: s_endpgm
+define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
+entry:
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %local, label %global
+
+local:
+  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
+  br label %end
+
+global:
+  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
+  br label %end
+
+end:
+  %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
+  store i32 %x, i32 addrspace(4)* %fptr, align 4
+;  %val = load i32, i32 addrspace(4)* %fptr, align 4
+;  store i32 %val, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; TODO: This should not be zero when registers are used for small
+; scratch allocations again.
+
+; Check for prologue initializing special SGPRs pointing to scratch.
+; CHECK-LABEL: {{^}}store_flat_scratch:
+; CHECK: s_movk_i32 flat_scratch_lo, 0
+; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
+; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
+; CHECK: flat_store_dword
+; CHECK: s_barrier
+; CHECK: flat_load_dword
+define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
+  %alloca = alloca i32, i32 9, align 4
+  %x = call i32 @llvm.r600.read.tidig.x() #3
+  %pptr = getelementptr i32, i32* %alloca, i32 %x
+  %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
+  store i32 %x, i32 addrspace(4)* %fptr
+  ; Dummy call
+  call void @llvm.AMDGPU.barrier.local() #1
+  %reload = load i32, i32 addrspace(4)* %fptr, align 4
+  store i32 %reload, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.local() #1
+declare i32 @llvm.r600.read.tidig.x() #3
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
+attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/and.ll b/test/CodeGen/AMDGPU/and.ll
index 5672d470bd7e..f83fb16101fb 100644
--- a/test/CodeGen/AMDGPU/and.ll
+++ b/test/CodeGen/AMDGPU/and.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
+declare i32 @llvm.r600.read.tidig.x() #0
+
 ; FUNC-LABEL: {{^}}test2:
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
@@ -54,13 +56,80 @@ define void @s_and_constant_i32(i32 addrspace(1)* %out, i32 %a) {
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_and_i32:
-; SI: v_and_b32
-define void @v_and_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
-  %a = load i32, i32 addrspace(1)* %aptr, align 4
-  %b = load i32, i32 addrspace(1)* %bptr, align 4
+; FIXME: We should really duplicate the constant so that the SALU use
+; can fold into the s_and_b32 and the VALU one is materialized
+; directly without copying from the SGPR.
+
+; Second use is a VGPR use of the constant.
+; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_0:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
+; SI-DAG: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
+; SI: buffer_store_dword [[VK]]
+define void @s_and_multi_use_constant_i32_0(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %and = and i32 %a, 1234567
+
+  ; Just to stop future replacement of copy to vgpr + store with VALU op.
+  %foo = add i32 %and, %b
+  store volatile i32 %foo, i32 addrspace(1)* %out
+  store volatile i32 1234567, i32 addrspace(1)* %out
+  ret void
+}
+
+; Second use is another SGPR use of the constant.
+; FUNC-LABEL: {{^}}s_and_multi_use_constant_i32_1:
+; SI: s_mov_b32 [[K:s[0-9]+]], 0x12d687
+; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI: s_add_i32
+; SI: s_add_i32 [[ADD:s[0-9]+]], s{{[0-9]+}}, [[K]]
+; SI: buffer_store_dword [[VK]]
+define void @s_and_multi_use_constant_i32_1(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %and = and i32 %a, 1234567
+  %foo = add i32 %and, 1234567
+  %bar = add i32 %foo, %b
+  store volatile i32 %bar, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_vgpr_vgpr:
+; SI: v_and_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
+define void @v_and_i32_vgpr_vgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) {
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+  %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+  %a = load i32, i32 addrspace(1)* %gep.a
+  %b = load i32, i32 addrspace(1)* %gep.b
   %and = and i32 %a, %b
-  store i32 %and, i32 addrspace(1)* %out, align 4
+  store i32 %and, i32 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_sgpr_vgpr:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]]
+; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
+define void @v_and_i32_sgpr_vgpr(i32 addrspace(1)* %out, i32 %a, i32 addrspace(1)* %bptr) {
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %gep.b = getelementptr i32, i32 addrspace(1)* %bptr, i32 %tid
+  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+  %b = load i32, i32 addrspace(1)* %gep.b
+  %and = and i32 %a, %b
+  store i32 %and, i32 addrspace(1)* %gep.out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_and_i32_vgpr_sgpr:
+; SI-DAG: s_load_dword [[SA:s[0-9]+]]
+; SI-DAG: {{buffer|flat}}_load_dword [[VB:v[0-9]+]]
+; SI: v_and_b32_e32 v{{[0-9]+}}, [[SA]], [[VB]]
+define void @v_and_i32_vgpr_sgpr(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 %b) {
+  %tid = call i32 @llvm.r600.read.tidig.x() #0
+  %gep.a = getelementptr i32, i32 addrspace(1)* %aptr, i32 %tid
+  %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+  %a = load i32, i32 addrspace(1)* %gep.a
+  %and = and i32 %a, %b
+  store i32 %and, i32 addrspace(1)* %gep.out
   ret void
 }
 
@@ -148,9 +217,23 @@ endif:
 }
 
 ; FUNC-LABEL: {{^}}v_and_constant_i64:
-; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
-; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI-DAG: s_mov_b32 [[KLO:s[0-9]+]], 0xab19b207
+; SI-DAG: s_movk_i32 [[KHI:s[0-9]+]], 0x11e{{$}}
+; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KLO]], {{v[0-9]+}}
+; SI-DAG: v_and_b32_e32 {{v[0-9]+}}, [[KHI]], {{v[0-9]+}}
+; SI: buffer_store_dwordx2
 define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+  %a = load i64, i64 addrspace(1)* %aptr, align 8
+  %and = and i64 %a, 1231231234567
+  store i64 %and, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FIXME: Should replace and 0
+; FUNC-LABEL: {{^}}v_and_i64_32_bit_constant:
+; SI: v_and_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_and_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
   %a = load i64, i64 addrspace(1)* %aptr, align 8
   %and = and i64 %a, 1234567
   store i64 %and, i64 addrspace(1)* %out, align 8
@@ -294,3 +377,5 @@ define void @s_and_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 a
   store i64 %and, i64 addrspace(1)* %out, align 8
   ret void
 }
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/test/CodeGen/AMDGPU/annotate-kernel-features.ll
new file mode 100644
index 000000000000..b116c72322bb
--- /dev/null
+++ b/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -0,0 +1,193 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA -check-prefix=ALL %s
+; RUN: opt -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=NOHSA -check-prefix=ALL %s
+
+declare i32 @llvm.r600.read.tgid.x() #0
+declare i32 @llvm.r600.read.tgid.y() #0
+declare i32 @llvm.r600.read.tgid.z() #0
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
+declare i32 @llvm.r600.read.tidig.z() #0
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+declare i32 @llvm.r600.read.global.size.x() #0
+declare i32 @llvm.r600.read.global.size.y() #0
+declare i32 @llvm.r600.read.global.size.z() #0
+
+
+; ALL: define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tgid.x()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tgid.y()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 {
+define void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tgid.y()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  %val1 = call i32 @llvm.r600.read.tgid.y()
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 {
+define void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tgid.x()
+  %val1 = call i32 @llvm.r600.read.tgid.y()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tgid.z()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 {
+define void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tgid.x()
+  %val1 = call i32 @llvm.r600.read.tgid.z()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tgid.y()
+  %val1 = call i32 @llvm.r600.read.tgid.z()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 {
+define void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tgid.x()
+  %val1 = call i32 @llvm.r600.read.tgid.y()
+  %val2 = call i32 @llvm.r600.read.tgid.z()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  store volatile i32 %val2, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tidig.x()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_y(i32 addrspace(1)* %ptr) #5 {
+define void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tidig.y()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_z(i32 addrspace(1)* %ptr) #6 {
+define void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.tidig.z()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tidig.x()
+  %val1 = call i32 @llvm.r600.read.tgid.x()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 {
+define void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tidig.y()
+  %val1 = call i32 @llvm.r600.read.tgid.y()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 {
+define void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tidig.x()
+  %val1 = call i32 @llvm.r600.read.tidig.y()
+  %val2 = call i32 @llvm.r600.read.tidig.z()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  store volatile i32 %val2, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; ALL: define void @use_all_workitems(i32 addrspace(1)* %ptr) #9 {
+define void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
+  %val0 = call i32 @llvm.r600.read.tidig.x()
+  %val1 = call i32 @llvm.r600.read.tidig.y()
+  %val2 = call i32 @llvm.r600.read.tidig.z()
+  %val3 = call i32 @llvm.r600.read.tgid.x()
+  %val4 = call i32 @llvm.r600.read.tgid.y()
+  %val5 = call i32 @llvm.r600.read.tgid.z()
+  store volatile i32 %val0, i32 addrspace(1)* %ptr
+  store volatile i32 %val1, i32 addrspace(1)* %ptr
+  store volatile i32 %val2, i32 addrspace(1)* %ptr
+  store volatile i32 %val3, i32 addrspace(1)* %ptr
+  store volatile i32 %val4, i32 addrspace(1)* %ptr
+  store volatile i32 %val5, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; HSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.local.size.x()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; HSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.local.size.y()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+; HSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #10 {
+; NOHSA: define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+define void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
+  %val = call i32 @llvm.r600.read.local.size.z()
+  store i32 %val, i32 addrspace(1)* %ptr
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
+; HSA: attributes #0 = { nounwind readnone }
+; HSA: attributes #1 = { nounwind }
+; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
+; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
+; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
+; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
+; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
+; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
+; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
+; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
diff --git a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index 8c2a0795860d..f8a74222d566 100644
--- a/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=SI -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
 
 ; The required pointer calculations for the alloca'd actually requires
 ; an add and won't be folded into the addressing, which fails with a
@@ -14,7 +14,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 ; FIXME: We end up with zero argument for ADD, because
 ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index
 ; with the appropriate offset.  We should fold this into the store.
-; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}}
+; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}}
 ; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}]
 ;
 ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this
@@ -22,7 +22,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 ; to interpret:
 ; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
 
-; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16
+; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16
 ; SI-PROMOTE: ds_write_b32 [[PTRREG]]
 define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) {
   %alloca = alloca [4 x i32], i32 4, align 16
@@ -35,7 +35,7 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add
   %alloca_ptr = getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b
   store i32 %result, i32* %alloca_ptr, align 4
   ; Dummy call
-  call void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+  call void @llvm.AMDGPU.barrier.local() nounwind convergent
   %reload = load i32, i32* %alloca_ptr, align 4
   %out_ptr = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
   store i32 %reload, i32 addrspace(1)* %out_ptr, align 4
diff --git a/test/CodeGen/AMDGPU/bitreverse.ll b/test/CodeGen/AMDGPU/bitreverse.ll
new file mode 100644
index 000000000000..0ef7d5184c1f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/bitreverse.ll
@@ -0,0 +1,115 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+
+declare i16 @llvm.bitreverse.i16(i16) #1
+declare i32 @llvm.bitreverse.i32(i32) #1
+declare i64 @llvm.bitreverse.i64(i64) #1
+
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
+declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
+
+declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
+declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
+
+declare i32 @llvm.AMDGPU.brev(i32) #1
+
+; FUNC-LABEL: {{^}}s_brev_i16:
+; SI: s_brev_b32
+define void @s_brev_i16(i16 addrspace(1)* noalias %out, i16 %val) #0 {
+  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+  store i16 %brev, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i16:
+; SI: v_bfrev_b32_e32
+define void @v_brev_i16(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) #0 {
+  %val = load i16, i16 addrspace(1)* %valptr
+  %brev = call i16 @llvm.bitreverse.i16(i16 %val) #1
+  store i16 %brev, i16 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]],
+; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
+; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
+; SI: buffer_store_dword [[VRESULT]],
+; SI: s_endpgm
+define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) #0 {
+  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+  store i32 %brev, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 {
+  %val = load i32, i32 addrspace(1)* %valptr
+  %brev = call i32 @llvm.bitreverse.i32(i32 %val) #1
+  store i32 %brev, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_v2i32:
+; SI: s_brev_b32
+; SI: s_brev_b32
+define void @s_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> %val) #0 {
+  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_v2i32:
+; SI: v_bfrev_b32_e32
+; SI: v_bfrev_b32_e32
+define void @v_brev_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) #0 {
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr
+  %brev = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %val) #1
+  store <2 x i32> %brev, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_i64:
+define void @s_brev_i64(i64 addrspace(1)* noalias %out, i64 %val) #0 {
+  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+  store i64 %brev, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_i64:
+define void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %valptr) #0 {
+  %val = load i64, i64 addrspace(1)* %valptr
+  %brev = call i64 @llvm.bitreverse.i64(i64 %val) #1
+  store i64 %brev, i64 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_brev_v2i64:
+define void @s_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> %val) #0 {
+  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_brev_v2i64:
+define void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %valptr) #0 {
+  %val = load <2 x i64>, <2 x i64> addrspace(1)* %valptr
+  %brev = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %val) #1
+  store <2 x i64> %brev, <2 x i64> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}legacy_s_brev_i32:
+; SI: s_brev_b32
+define void @legacy_s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+  %brev = call i32 @llvm.AMDGPU.brev(i32 %val) #1
+  store i32 %brev, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/calling-conventions.ll b/test/CodeGen/AMDGPU/calling-conventions.ll
new file mode 100644
index 000000000000..57adc8be6a99
--- /dev/null
+++ b/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI %s
+
+; Make sure we don't crash or assert on spir_kernel calling convention.
+
+; SI-LABEL: {{^}}kernel:
+; SI: s_endpgm
+define spir_kernel void @kernel(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FIXME: This is treated like a kernel
+; SI-LABEL: {{^}}func:
+; SI: s_endpgm
+define spir_func void @func(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
new file mode 100644
index 000000000000..1c5bed3b905f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -0,0 +1,98 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %in
+; OPT: br i1
+; OPT-NOT: ptrtoint
+
+; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
+; GCN: flat_load_dword
+; GCN: {{^}}BB0_2:
+define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(4)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT-CI-NOT: getelementptr
+; OPT: br i1
+
+; OPT-CI: ptrtoint
+; OPT-CI: add
+; OPT-CI: inttoptr
+; OPT: br label
+
+; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_global_i32:
+; CI: buffer_load_dword {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:28
+define void @test_sink_noop_addrspacecast_flat_to_global_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+  %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(1)*
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(1)* %cast
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
+; OPT: getelementptr i32, i32 addrspace(4)* %out,
+; OPT-CI-NOT: getelementptr
+; OPT: br i1
+
+; OPT-CI: ptrtoint
+; OPT-CI: add
+; OPT-CI: inttoptr
+; OPT: br label
+
+; GCN-LABEL: {{^}}test_sink_noop_addrspacecast_flat_to_constant_i32:
+; CI: s_load_dword {{s[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0xd
+define void @test_sink_noop_addrspacecast_flat_to_constant_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
+  %cast = addrspacecast i32 addrspace(4)* %in.gep to i32 addrspace(2)*
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %cast
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(4)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index a68d110fdc96..698494265a7d 100644
--- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -1,5 +1,7 @@
+; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s
 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s
 ; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
@@ -115,35 +117,6 @@ done:
   ret void
 }
 
-; OPT-LABEL: @test_no_sink_flat_small_offset_i32(
-; OPT: getelementptr i32, i32 addrspace(4)* %in
-; OPT: br i1
-; OPT-NOT: ptrtoint
-
-; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
-; GCN: flat_load_dword
-; GCN: {{^}}BB4_2:
-
-define void @test_no_sink_flat_small_offset_i32(i32 addrspace(4)* %out, i32 addrspace(4)* %in, i32 %cond) {
-entry:
-  %out.gep = getelementptr i32, i32 addrspace(4)* %out, i64 999999
-  %in.gep = getelementptr i32, i32 addrspace(4)* %in, i64 7
-  %tmp0 = icmp eq i32 %cond, 0
-  br i1 %tmp0, label %endif, label %if
-
-if:
-  %tmp1 = load i32, i32 addrspace(4)* %in.gep
-  br label %endif
-
-endif:
-  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
-  store i32 %x, i32 addrspace(4)* %out.gep
-  br label %done
-
-done:
-  ret void
-}
-
 ; OPT-LABEL: @test_sink_scratch_small_offset_i32(
 ; OPT-NOT:  getelementptr [512 x i32]
 ; OPT: br i1
@@ -153,7 +126,7 @@ done:
 ; GCN: s_and_saveexec_b64
 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:4092{{$}}
-; GCN: {{^}}BB5_2:
+; GCN: {{^}}BB4_2:
 define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
 entry:
   %alloca = alloca [512 x i32], align 4
@@ -189,7 +162,7 @@ done:
 ; GCN: s_and_saveexec_b64
 ; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
 ; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
-; GCN: {{^}}BB6_2:
+; GCN: {{^}}BB5_2:
 define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %cond, i32 %arg) {
 entry:
   %alloca = alloca [512 x i32], align 4
@@ -222,7 +195,7 @@ done:
 ; GCN: s_and_saveexec_b64
 ; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
-; GCN: {{^}}BB7_2:
+; GCN: {{^}}BB6_2:
 define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset, i32 %cond) {
 entry:
   %offset.ext = zext i32 %offset to i64
@@ -246,3 +219,220 @@ done:
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
+
+
+
+; OPT-LABEL: @test_sink_constant_small_offset_i32
+; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32
+; OPT-NOT:  getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32
+; OPT-SI:  getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT:  getelementptr i32, i32 addrspace(2)*
+; OPT-VI-NOT:  getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400
+
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32:
+; GCN: s_and_saveexec_b64
+; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}}
+; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32
+; OPT: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; GCN: s_add_u32
+; GCN: s_addc_u32
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32
+; OPT-SI: getelementptr i32, i32 addrspace(2)*
+; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)*
+; OPT-VI: getelementptr i32, i32 addrspace(2)*
+; OPT: br i1
+
+; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32:
+; GCN: s_and_saveexec_b64
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}}
+
+; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}}
+; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}}
+
+; GCN: s_or_b64 exec, exec
+define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %cond) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999
+  %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144
+  %tmp0 = icmp eq i32 %cond, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = load i32, i32 addrspace(2)* %in.gep
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(1)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
new file mode 100644
index 000000000000..1a37e3c75fa3
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck -check-prefix=HSA-DEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck -check-prefix=HSA-NODEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri | FileCheck -check-prefix=NOHSA-DEFAULT %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=kaveri -mattr=+flat-for-global | FileCheck -check-prefix=NOHSA-NODEFAULT %s
+
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+define void @test(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/ctpop64.ll b/test/CodeGen/AMDGPU/ctpop64.ll
index e1a0ee3ea217..ec2971e98032 100644
--- a/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/test/CodeGen/AMDGPU/ctpop64.ll
@@ -36,6 +36,24 @@ define void @v_ctpop_i64(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noali
   ret void
 }
 
+; FIXME: or 0 should be replaxed with copy
+; FUNC-LABEL: {{^}}v_ctpop_i64_user:
+; GCN: buffer_load_dwordx2 v{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}},
+; GCN: v_bcnt_u32_b32_e64 [[MIDRESULT:v[0-9]+]], v[[LOVAL]], 0
+; SI-NEXT: v_bcnt_u32_b32_e32 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; VI-NEXT: v_bcnt_u32_b32_e64 [[RESULT:v[0-9]+]], v[[HIVAL]], [[MIDRESULT]]
+; GCN-DAG: v_or_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, [[RESULT]]
+; GCN-DAG: v_or_b32_e64 v[[RESULT_HI:[0-9]+]], 0, s{{[0-9]+}}
+; GCN: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+; GCN: s_endpgm
+define void @v_ctpop_i64_user(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in, i64 %s.val) nounwind {
+  %val = load i64, i64 addrspace(1)* %in, align 8
+  %ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
+  %or = or i64 %ctpop, %s.val
+  store i64 %or, i64 addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}s_ctpop_v2i64:
 ; GCN: s_bcnt1_i32_b64
 ; GCN: s_bcnt1_i32_b64
@@ -99,8 +117,8 @@ define void @v_ctpop_v4i64(<4 x i32> addrspace(1)* noalias %out, <4 x i64> addrs
 ; SI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0xd
 ; VI: s_load_dwordx2 s{{\[}}[[LOVAL:[0-9]+]]:[[HIVAL:[0-9]+]]{{\]}}, s[{{[0-9]+:[0-9]+}}], 0x34
 ; GCN: s_bcnt1_i32_b64 [[RESULT:s[0-9]+]], {{s\[}}[[LOVAL]]:[[HIVAL]]{{\]}}
-; GCN: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
-; GCN: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
+; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[RESULT]]
+; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HIVAL]]
 ; GCN: buffer_store_dwordx2 {{v\[}}[[VLO]]:[[VHI]]{{\]}}
 ; GCN: s_endpgm
 define void @ctpop_i64_in_br(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %ctpop_arg, i32 %cond) {
diff --git a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 3399d9da29e3..834922c62cbd 100644
--- a/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -137,14 +137,8 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8>
 ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]]
 ; SI-NOT: bfe
 ; SI-NOT: lshr
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8
   %cvt = uitofp <8 x i8> %load to <8 x float>
@@ -154,7 +148,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8>
 
 ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32:
 ; SI: buffer_load_dword [[LOADREG:v[0-9]+]],
-; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]]
+; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]]
 ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]]
 ; SI: buffer_store_dword [[CONV]],
 define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
new file mode 100644
index 000000000000..171883e4c74b
--- /dev/null
+++ b/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+; The memory operand was dropped from the buffer_load_dword_offset
+; when replaced with the addr64 during operand legalization, resulting
+; in the global loads not being scheduled together.
+
+; GCN-LABEL: {{^}}reschedule_global_load_lds_store:
+; GCN: buffer_load_dword
+; GCN: buffer_load_dword
+; GCN: ds_write_b32
+; GCN: ds_write_b32
+; GCN: s_endpgm
+define void @reschedule_global_load_lds_store(i32 addrspace(1)* noalias %gptr0, i32 addrspace(1)* noalias %gptr1, i32 addrspace(3)* noalias %lptr, i32 %c) #0 {
+entry:
+  %tid = tail call i32 @llvm.r600.read.tidig.x() #1
+  %idx = shl i32 %tid, 2
+  %gep0 = getelementptr i32, i32 addrspace(1)* %gptr0, i32 %idx
+  %gep1 = getelementptr i32, i32 addrspace(1)* %gptr1, i32 %idx
+  %gep2 = getelementptr i32, i32 addrspace(3)* %lptr, i32 %tid
+  %cmp0 = icmp eq i32 %c, 0
+  br i1 %cmp0, label %for.body, label %exit
+
+for.body:                                         ; preds = %for.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %for.body ]
+  %gptr0.phi = phi i32 addrspace(1)* [ %gep0, %entry ], [ %gep0.inc, %for.body ]
+  %gptr1.phi = phi i32 addrspace(1)* [ %gep1, %entry ], [ %gep1.inc, %for.body ]
+  %lptr0.phi = phi i32 addrspace(3)* [ %gep2, %entry ], [ %gep2.inc, %for.body ]
+  %lptr1 = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 1
+  %val0 = load i32, i32 addrspace(1)* %gep0
+  store i32 %val0, i32 addrspace(3)* %lptr0.phi
+  %val1 = load i32, i32 addrspace(1)* %gep1
+  store i32 %val1, i32 addrspace(3)* %lptr1
+  %gep0.inc = getelementptr i32, i32 addrspace(1)* %gptr0.phi, i32 4
+  %gep1.inc = getelementptr i32, i32 addrspace(1)* %gptr1.phi, i32 4
+  %gep2.inc = getelementptr i32, i32 addrspace(3)* %lptr0.phi, i32 4
+  %i.inc = add nsw i32 %i, 1
+  %cmp1 = icmp ne i32 %i, 256
+  br i1 %cmp1, label %for.body, label %exit
+
+exit:                                             ; preds = %for.body, %entry
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
index 5e4654abd91b..e657991557e3 100644
--- a/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -10,13 +10,13 @@ declare void @llvm.AMDGPU.barrier.local() #1
 ; CHECK: BB0_1:
 ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]],
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]]
-; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]]
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]]
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]]
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]]
-; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]]
+; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]]
 ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]]
 
 ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1
@@ -66,5 +66,5 @@ for.end:                                          ; preds = %for.body
 }
 
 attributes #0 = { nounwind readnone }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
 attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/AMDGPU/ds-sub-offset.ll b/test/CodeGen/AMDGPU/ds-sub-offset.ll
new file mode 100644
index 000000000000..7d6eddb01993
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+declare void @llvm.AMDGPU.barrier.local() #2
+declare i32 @llvm.r600.read.tidig.x() #0
+
+@lds.obj = addrspace(3) global [256 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}write_ds_sub0_offset0_global:
+; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0
+; GCN: v_sub_i32_e32 [[BASEPTR:v[0-9]+]], vcc, 0, [[SHL]]
+; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b
+; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12
+define void @write_ds_sub0_offset0_global() #0 {
+entry:
+  %x.i = call i32 @llvm.r600.read.tidig.x() #1
+  %sub1 = sub i32 0, %x.i
+  %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1
+  %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3
+  store i32 123, i32 addrspace(3)* %arrayidx
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535
+define void @add_x_shl_neg_to_sub_max_offset() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add = add i32 65535, %shl
+  %ptr = inttoptr i32 %add to i8 addrspace(3)*
+  store i8 13, i8 addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN: ds_write_b8 [[NEG]], [[K]]{{$}}
+define void @add_x_shl_neg_to_sub_max_offset_p1() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add = add i32 65536, %shl
+  %ptr = inttoptr i32 %add to i8 addrspace(3)*
+  store i8 13, i8 addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}}
+; GCN: s_endpgm
+define void @add_x_shl_neg_to_sub_multi_use() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add0 = add i32 123, %shl
+  %add1 = add i32 456, %shl
+  %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)*
+  store volatile i32 13, i32 addrspace(3)* %ptr0
+  %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)*
+  store volatile i32 13, i32 addrspace(3)* %ptr1
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN-NOT: v_sub
+; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}}
+; GCN: s_endpgm
+define void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add = add i32 123, %shl
+  %ptr = inttoptr i32 %add to i32 addrspace(3)*
+  store volatile i32 13, i32 addrspace(3)* %ptr
+  store volatile i32 13, i32 addrspace(3)* %ptr
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]]
+; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255
+define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add = add i32 1019, %shl
+  %ptr = inttoptr i32 %add to i64 addrspace(3)*
+  store i64 123, i64 addrspace(3)* %ptr, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1:
+; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0
+; GCN-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]]
+; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}}
+define void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 {
+  %x.i = call i32 @llvm.r600.read.tidig.x() #0
+  %neg = sub i32 0, %x.i
+  %shl = shl i32 %neg, 2
+  %add = add i32 1020, %shl
+  %ptr = inttoptr i32 %add to i64 addrspace(3)*
+  store i64 123, i64 addrspace(3)* %ptr, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/ds_read2.ll b/test/CodeGen/AMDGPU/ds_read2.ll
index ec04f8b1acd6..5170d9c82712 100644
--- a/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/test/CodeGen/AMDGPU/ds_read2.ll
@@ -216,10 +216,8 @@ define void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x f
   ret void
 }
 
-; We should be able to merge in this case, but probably not worth the effort.
-; SI-NOT: ds_read2_b32
-; SI: ds_read_b32
-; SI: ds_read_b32
+; SI-LABEL: {{^}}read2_ptr_is_subreg_f32:
+; SI: ds_read2_b32 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset1:8{{$}}
 ; SI: s_endpgm
 define void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -507,9 +505,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.y() #1
 
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.local() #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
index 842c2d8bc339..0061aaf2cdbd 100644
--- a/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -35,14 +35,11 @@ define void @simple_read2_v2f32_superreg(<2 x float> addrspace(1)* %out) #0 {
   ret void
 }
 
-; FIXME: Shuffling to new superregister
 ; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align4:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Y:[0-9]+]]:[[REG_X:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
-; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Y:[0-9]+]], v[[REG_Y]]
-; CI-DAG: v_mov_b32_e32 v[[COPY_REG_Z:[0-9]+]], v[[REG_Z]]
-; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[COPY_REG_Z]], v[[REG_X]]
-; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[COPY_REG_Y]]
+; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
+; CI-DAG: ds_read2_b32 v{{\[}}[[REG_Z:[0-9]+]]:[[REG_W:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
+; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]]
+; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_W]], v[[REG_Y]]
 ; CI: v_add_f32_e32 v[[ADD2:[0-9]+]], v[[ADD1]], v[[ADD0]]
 ; CI: buffer_store_dword v[[ADD2]]
 ; CI: s_endpgm
@@ -88,8 +85,13 @@ define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
 }
 
 ; CI-LABEL: {{^}}simple_read2_v4f32_superreg_align8:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI-DAG: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+
+; FIXME: These moves shouldn't be necessary, it should be able to
+; store the same register if offset1 was the non-zero offset.
+
+; CI: v_mov_b32
+; CI: v_mov_b32
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out) #0 {
@@ -102,8 +104,9 @@ define void @simple_read2_v4f32_superreg_align8(<4 x float> addrspace(1)* %out)
 }
 
 ; CI-LABEL: {{^}}simple_read2_v4f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_W:[0-9]+]]:[[REG_Z:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
 ; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
@@ -115,19 +118,16 @@ define void @simple_read2_v4f32_superreg(<4 x float> addrspace(1)* %out) #0 {
   ret void
 }
 
+; FIXME: Extra moves shuffling superregister
 ; CI-LABEL: {{^}}simple_read2_v8f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT7:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT6:[0-9]+]]:[[REG_ELT5:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -138,33 +138,24 @@ define void @simple_read2_v8f32_superreg(<8 x float> addrspace(1)* %out) #0 {
   ret void
 }
 
+; FIXME: Extra moves shuffling superregister
 ; CI-LABEL: {{^}}simple_read2_v16f32_superreg:
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:15 offset1:14{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:13 offset1:12{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:11 offset1:10{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:9 offset1:8{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT7:[0-9]+]]:[[REG_ELT6:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:7 offset1:6{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT5:[0-9]+]]:[[REG_ELT4:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:4{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT3:[0-9]+]]:[[REG_ELT2:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:3 offset1:2{{$}}
-; CI-DAG: ds_read2_b32 v{{\[}}[[REG_ELT1:[0-9]+]]:[[REG_ELT0:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT11:[0-9]+]]:[[REG_ELT15:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:1 offset1:3{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:5 offset1:7{{$}}
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT14:[0-9]+]]:[[REG_ELT13:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:6 offset1:4{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
+; CI: ds_read2_b64 v{{\[}}[[REG_ELT12:[0-9]+]]:[[REG_ELT10:[0-9]+]]{{\]}}, v{{[0-9]+}} offset0:2{{$}}
+; CI: v_mov_b32
+; CI: v_mov_b32
 
 ; CI: s_waitcnt lgkmcnt(0)
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
-; CI: buffer_store_dword
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
+; CI: buffer_store_dwordx4
 ; CI: s_endpgm
 define void @simple_read2_v16f32_superreg(<16 x float> addrspace(1)* %out) #0 {
   %x.i = tail call i32 @llvm.r600.read.tidig.x() #1
@@ -238,9 +229,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.y() #1
 
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.local() #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_read2st64.ll b/test/CodeGen/AMDGPU/ds_read2st64.ll
index e2e441214b4a..4a0571ea16f2 100644
--- a/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -65,7 +65,7 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add
 
 ; SI-LABEL: @simple_read2st64_f32_over_max_offset
 ; SI-NOT: ds_read2st64_b32
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
 ; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256
 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]]
 ; SI: s_endpgm
@@ -197,7 +197,7 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a
 
 ; SI-LABEL: @simple_read2st64_f64_over_max_offset
 ; SI-NOT: ds_read2st64_b64
-; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}}
+; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}}
 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512
 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]]
 ; SI: s_endpgm
@@ -264,9 +264,5 @@ declare i32 @llvm.r600.read.tidig.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.y() #1
 
-; Function Attrs: noduplicate nounwind
-declare void @llvm.AMDGPU.barrier.local() #2
-
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2.ll b/test/CodeGen/AMDGPU/ds_write2.ll
index d4973e377b59..9d3a293f3b89 100644
--- a/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/test/CodeGen/AMDGPU/ds_write2.ll
@@ -345,8 +345,9 @@ define void @store_constant_disjoint_offsets() {
 
 ; SI-LABEL: @store_misaligned64_constant_offsets
 ; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
-; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
+; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
+; SI: s_endpgm
 define void @store_misaligned64_constant_offsets() {
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
   store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
@@ -430,9 +431,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.y() #1
 
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.local() #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/ds_write2st64.ll b/test/CodeGen/AMDGPU/ds_write2st64.ll
index 358aa6a9e363..5a1024ccf6d7 100644
--- a/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -109,9 +109,9 @@ declare i32 @llvm.r600.read.tidig.x() #1
 ; Function Attrs: nounwind readnone
 declare i32 @llvm.r600.read.tidig.y() #1
 
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.local() #2
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/dynamic_stackalloc.ll b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
new file mode 100644
index 000000000000..f4409a0984a9
--- /dev/null
+++ b/test/CodeGen/AMDGPU/dynamic_stackalloc.ll
@@ -0,0 +1,11 @@
+; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=+promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -verify-machineinstrs < %s 2>&1 | FileCheck %s
+; RUN: not llc -march=r600 -mcpu=cypress < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported dynamic alloca in test_dynamic_stackalloc
+
+define void @test_dynamic_stackalloc(i32 addrspace(1)* %out, i32 %n) {
+  %alloca = alloca i32, i32 %n
+  store volatile i32 0, i32* %alloca
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
new file mode 100644
index 000000000000..e32559139623
--- /dev/null
+++ b/test/CodeGen/AMDGPU/extract-vector-elt-i64.ll
@@ -0,0 +1,43 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; How the replacement of i64 stores with v2i32 stores resulted in
+; breaking other users of the bitcast if they already existed
+
+; GCN-LABEL: {{^}}extract_vector_elt_select_error:
+; GCN: buffer_store_dword
+; GCN: buffer_store_dword
+; GCN: buffer_store_dwordx2
+define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %val) nounwind {
+  %vec = bitcast i64 %val to <2 x i32>
+  %elt0 = extractelement <2 x i32> %vec, i32 0
+  %elt1 = extractelement <2 x i32> %vec, i32 1
+
+  store volatile i32 %elt0, i32 addrspace(1)* %out
+  store volatile i32 %elt1, i32 addrspace(1)* %out
+  store volatile i64 %val, i64 addrspace(1)* %in
+  ret void
+}
+
+
+define void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) nounwind {
+  %p0 = extractelement <2 x i64> %foo, i32 0
+  %p1 = extractelement <2 x i64> %foo, i32 1
+  %out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1
+  store volatile i64 %p1, i64 addrspace(1)* %out
+  store volatile i64 %p0, i64 addrspace(1)* %out1
+  ret void
+}
+
+define void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) nounwind {
+  %dynelt = extractelement <2 x i64> %foo, i32 %elt
+  store volatile i64 %dynelt, i64 addrspace(1)* %out
+  ret void
+}
+
+define void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) nounwind {
+  %load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo
+  %or = or <2 x i64> %load, %arst
+  %dynelt = extractelement <2 x i64> %or, i32 %elt
+  store volatile i64 %dynelt, i64 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/fadd64.ll b/test/CodeGen/AMDGPU/fadd64.ll
index 485c55870c47..19c17289da3d 100644
--- a/test/CodeGen/AMDGPU/fadd64.ll
+++ b/test/CodeGen/AMDGPU/fadd64.ll
@@ -1,14 +1,44 @@
-; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
 
-; CHECK: {{^}}fadd_f64:
+; CHECK-LABEL: {{^}}v_fadd_f64:
 ; CHECK: v_add_f64 {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}, {{v[[0-9]+:[0-9]+]}}
-
-define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
-                      double addrspace(1)* %in2) {
-   %r0 = load double, double addrspace(1)* %in1
-   %r1 = load double, double addrspace(1)* %in2
-   %r2 = fadd double %r0, %r1
-   store double %r2, double addrspace(1)* %out
-   ret void
+define void @v_fadd_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+                        double addrspace(1)* %in2) {
+  %r0 = load double, double addrspace(1)* %in1
+  %r1 = load double, double addrspace(1)* %in2
+  %r2 = fadd double %r0, %r1
+  store double %r2, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}s_fadd_f64:
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
+define void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
+  %r2 = fadd double %r0, %r1
+  store double %r2, double addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}v_fadd_v2f64:
+; CHECK: v_add_f64
+; CHECK: v_add_f64
+; CHECK: buffer_store_dwordx4
+define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
+                          <2 x double> addrspace(1)* %in2) {
+  %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
+  %r1 = load <2 x double>, <2 x double> addrspace(1)* %in2
+  %r2 = fadd <2 x double> %r0, %r1
+  store <2 x double> %r2, <2 x double> addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: {{^}}s_fadd_v2f64:
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
+; CHECK: buffer_store_dwordx4
+define void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) {
+  %r2 = fadd <2 x double> %r0, %r1
+  store <2 x double> %r2, <2 x double> addrspace(1)* %out
+  ret void
 }
diff --git a/test/CodeGen/AMDGPU/fceil64.ll b/test/CodeGen/AMDGPU/fceil64.ll
index e8c34f0141e4..c8ef5b101c4d 100644
--- a/test/CodeGen/AMDGPU/fceil64.ll
+++ b/test/CodeGen/AMDGPU/fceil64.ll
@@ -17,12 +17,12 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
 ; SI: s_lshr_b64
 ; SI: s_not_b64
 ; SI: s_and_b64
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI-DAG: cmp_gt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
+; SI-DAG: cmp_lt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
 ; SI-DAG: v_cmp_lt_f64
 ; SI-DAG: v_cmp_lg_f64
 ; SI: s_and_b64
diff --git a/test/CodeGen/AMDGPU/fcmp.ll b/test/CodeGen/AMDGPU/fcmp.ll
index 5207ab57bade..97d954fcc3c2 100644
--- a/test/CodeGen/AMDGPU/fcmp.ll
+++ b/test/CodeGen/AMDGPU/fcmp.ll
@@ -20,7 +20,7 @@ entry:
 
 ; CHECK: {{^}}fcmp_br:
 ; CHECK: SET{{[N]*}}E_DX10 * T{{[0-9]+\.[XYZW],}}
-; CHECK-NEXT {{[0-9]+(5.0}}
+; CHECK-NEXT: {{[0-9]+\(5.0}}
 
 define void @fcmp_br(i32 addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/AMDGPU/flat-address-space.ll b/test/CodeGen/AMDGPU/flat-address-space.ll
index 8ceca078f2d6..86e0c07323bb 100644
--- a/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -7,39 +7,16 @@
 ; specialize away generic pointer accesses.
 
 
-; CHECK-LABEL: {{^}}branch_use_flat_i32:
-; CHECK: flat_store_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}
-; CHECK: s_endpgm
-define void @branch_use_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* %gptr, i32 addrspace(3)* %lptr, i32 %x, i32 %c) #0 {
-entry:
-  %cmp = icmp ne i32 %c, 0
-  br i1 %cmp, label %local, label %global
-
-local:
-  %flat_local = addrspacecast i32 addrspace(3)* %lptr to i32 addrspace(4)*
-  br label %end
-
-global:
-  %flat_global = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
-  br label %end
-
-end:
-  %fptr = phi i32 addrspace(4)* [ %flat_local, %local ], [ %flat_global, %global ]
-  store i32 %x, i32 addrspace(4)* %fptr, align 4
-;  %val = load i32, i32 addrspace(4)* %fptr, align 4
-;  store i32 %val, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-
-
 ; These testcases might become useless when there are optimizations to
 ; remove generic pointers.
 
 ; CHECK-LABEL: {{^}}store_flat_i32:
-; CHECK: v_mov_b32_e32 v[[DATA:[0-9]+]], {{s[0-9]+}}
-; CHECK: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], {{s[0-9]+}}
-; CHECK: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], {{s[0-9]+}}
+; CHECK-DAG: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]],
+; CHECK-DAG: s_load_dword s[[SDATA:[0-9]+]],
+; CHECK: s_waitcnt lgkmcnt(0)
+; CHECK-DAG: v_mov_b32_e32 v[[DATA:[0-9]+]], s[[SDATA]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
 ; CHECK: flat_store_dword v[[DATA]], v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
 define void @store_flat_i32(i32 addrspace(1)* %gptr, i32 %x) #0 {
   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
@@ -83,7 +60,7 @@ define void @store_flat_trunc_i8(i8 addrspace(1)* %gptr, i32 %x) #0 {
 
 
 
-; CHECK-LABEL @load_flat_i32:
+; CHECK-LABEL: load_flat_i32:
 ; CHECK: flat_load_dword
 define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i32 addrspace(1)* %gptr to i32 addrspace(4)*
@@ -92,7 +69,7 @@ define void @load_flat_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noa
   ret void
 }
 
-; CHECK-LABEL @load_flat_i64:
+; CHECK-LABEL: load_flat_i64:
 ; CHECK: flat_load_dwordx2
 define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i64 addrspace(1)* %gptr to i64 addrspace(4)*
@@ -101,7 +78,7 @@ define void @load_flat_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noa
   ret void
 }
 
-; CHECK-LABEL @load_flat_v4i32:
+; CHECK-LABEL: load_flat_v4i32:
 ; CHECK: flat_load_dwordx4
 define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast <4 x i32> addrspace(1)* %gptr to <4 x i32> addrspace(4)*
@@ -110,7 +87,7 @@ define void @load_flat_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> add
   ret void
 }
 
-; CHECK-LABEL @sextload_flat_i8:
+; CHECK-LABEL: sextload_flat_i8:
 ; CHECK: flat_load_sbyte
 define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
@@ -120,7 +97,7 @@ define void @sextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
   ret void
 }
 
-; CHECK-LABEL @zextload_flat_i8:
+; CHECK-LABEL: zextload_flat_i8:
 ; CHECK: flat_load_ubyte
 define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i8 addrspace(1)* %gptr to i8 addrspace(4)*
@@ -130,7 +107,7 @@ define void @zextload_flat_i8(i32 addrspace(1)* noalias %out, i8 addrspace(1)* n
   ret void
 }
 
-; CHECK-LABEL @sextload_flat_i16:
+; CHECK-LABEL: sextload_flat_i16:
 ; CHECK: flat_load_sshort
 define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
@@ -140,7 +117,7 @@ define void @sextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
   ret void
 }
 
-; CHECK-LABEL @zextload_flat_i16:
+; CHECK-LABEL: zextload_flat_i16:
 ; CHECK: flat_load_ushort
 define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %gptr) #0 {
   %fptr = addrspacecast i16 addrspace(1)* %gptr to i16 addrspace(4)*
@@ -150,35 +127,9 @@ define void @zextload_flat_i16(i32 addrspace(1)* noalias %out, i16 addrspace(1)*
   ret void
 }
 
-
-
-; TODO: This should not be zero when registers are used for small
-; scratch allocations again.
-
-; Check for prologue initializing special SGPRs pointing to scratch.
-; CHECK-LABEL: {{^}}store_flat_scratch:
-; CHECK: s_movk_i32 flat_scratch_lo, 0
-; CHECK-NO-PROMOTE: s_movk_i32 flat_scratch_hi, 0x28{{$}}
-; CHECK-PROMOTE: s_movk_i32 flat_scratch_hi, 0x0{{$}}
-; CHECK: flat_store_dword
-; CHECK: s_barrier
-; CHECK: flat_load_dword
-define void @store_flat_scratch(i32 addrspace(1)* noalias %out, i32) #0 {
-  %alloca = alloca i32, i32 9, align 4
-  %x = call i32 @llvm.r600.read.tidig.x() #3
-  %pptr = getelementptr i32, i32* %alloca, i32 %x
-  %fptr = addrspacecast i32* %pptr to i32 addrspace(4)*
-  store i32 %x, i32 addrspace(4)* %fptr
-  ; Dummy call
-  call void @llvm.AMDGPU.barrier.local() #1
-  %reload = load i32, i32 addrspace(4)* %fptr, align 4
-  store i32 %reload, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
 declare void @llvm.AMDGPU.barrier.local() #1
 declare i32 @llvm.r600.read.tidig.x() #3
 
 attributes #0 = { nounwind }
-attributes #1 = { nounwind noduplicate }
+attributes #1 = { nounwind convergent }
 attributes #3 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
new file mode 100644
index 000000000000..e2ae3353ae1d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI
+; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI
+
+; GCN-LABEL: {{^}}no_vcc_no_flat:
+; GCN: ; NumSgprs: 8
+define void @no_vcc_no_flat() {
+entry:
+  call void asm sideeffect "", "~{SGPR7}"()
+  ret void
+}
+
+; GCN-LABEL: {{^}}vcc_no_flat:
+; GCN: ; NumSgprs: 10
+define void @vcc_no_flat() {
+entry:
+  call void asm sideeffect "", "~{SGPR7},~{VCC}"()
+  ret void
+}
+
+; GCN-LABEL: {{^}}no_vcc_flat:
+; CI: ; NumSgprs: 12
+; VI: ; NumSgprs: 14
+define void @no_vcc_flat() {
+entry:
+  call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
+  ret void
+}
+
+; GCN-LABEL: {{^}}vcc_flat:
+; CI: ; NumSgprs: 12
+; VI: ; NumSgprs: 14
+define void @vcc_flat() {
+entry:
+  call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/fma-combine.ll b/test/CodeGen/AMDGPU/fma-combine.ll
index bd574b877117..6f3437048ed8 100644
--- a/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/test/CodeGen/AMDGPU/fma-combine.ll
@@ -364,5 +364,205 @@ define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %
   ret void
 }
 
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
+define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out,
+                                        float addrspace(1)* %in1,
+                                        float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %a = fadd float %x, 1.0
+  %m = fmul float %a, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]]
+define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out,
+                                        float addrspace(1)* %in1,
+                                        float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %a = fadd float %x, 1.0
+  %m = fmul float %y, %a
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out,
+                                           float addrspace(1)* %in1,
+                                           float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %a = fadd float %x, -1.0
+  %m = fmul float %a, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out,
+                                           float addrspace(1)* %in1,
+                                           float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %a = fadd float %x, -1.0
+  %m = fmul float %y, %a
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
+define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out,
+                                        float addrspace(1)* %in1,
+                                        float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float 1.0, %x
+  %m = fmul float %s, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]]
+define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out,
+                                        float addrspace(1)* %in1,
+                                        float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float 1.0, %x
+  %m = fmul float %y, %s
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out,
+                                           float addrspace(1)* %in1,
+                                           float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float -1.0, %x
+  %m = fmul float %s, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x:
+; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out,
+                                         float addrspace(1)* %in1,
+                                         float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float -1.0, %x
+  %m = fmul float %y, %s
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out,
+                                        float addrspace(1)* %in1,
+                                        float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float %x, 1.0
+  %m = fmul float %s, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one:
+; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]]
+define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out,
+                                      float addrspace(1)* %in1,
+                                      float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float %x, 1.0
+  %m = fmul float %y, %s
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
+define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out,
+                                         float addrspace(1)* %in1,
+                                         float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float %x, -1.0
+  %m = fmul float %s, %y
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone:
+; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]]
+define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out,
+                                         float addrspace(1)* %in1,
+                                         float addrspace(1)* %in2) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %s = fsub float %x, -1.0
+  %m = fmul float %y, %s
+  store float %m, float addrspace(1)* %out
+  ret void
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+; FUNC-LABEL: {{^}}test_f32_interp:
+; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]]
+; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]]
+define void @test_f32_interp(float addrspace(1)* %out,
+                             float addrspace(1)* %in1,
+                             float addrspace(1)* %in2,
+                             float addrspace(1)* %in3) {
+  %x = load float, float addrspace(1)* %in1
+  %y = load float, float addrspace(1)* %in2
+  %t = load float, float addrspace(1)* %in3
+  %t1 = fsub float 1.0, %t
+  %tx = fmul float %x, %t
+  %ty = fmul float %y, %t1
+  %r = fadd float %tx, %ty
+  store float %r, float addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_f64_interp:
+; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]]
+; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]]
+define void @test_f64_interp(double addrspace(1)* %out,
+                             double addrspace(1)* %in1,
+                             double addrspace(1)* %in2,
+                             double addrspace(1)* %in3) {
+  %x = load double, double addrspace(1)* %in1
+  %y = load double, double addrspace(1)* %in2
+  %t = load double, double addrspace(1)* %in3
+  %t1 = fsub double 1.0, %t
+  %tx = fmul double %x, %t
+  %ty = fmul double %y, %t1
+  %r = fadd double %tx, %ty
+  store double %r, double addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/fmax_legacy.ll b/test/CodeGen/AMDGPU/fmax_legacy.ll
index 413957d2982a..d374fb67350c 100644
--- a/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -87,6 +87,46 @@ define void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(
   ret void
 }
 
+; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32:
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]]
+; SI-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; EG: MAX
+define void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
+
+  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
+  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+
+  %cmp = fcmp ogt <1 x float> %a, %b
+  %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
+  store <1 x float> %val, <1 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32:
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-SAFE: v_max_legacy_f32_e32
+; SI-NONAN: v_max_f32_e32
+; SI-NONAN: v_max_f32_e32
+; SI-NONAN: v_max_f32_e32
+define void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
+
+  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
+  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
+
+  %cmp = fcmp ogt <3 x float> %a, %b
+  %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
+  store <3 x float> %val, <3 x float> addrspace(1)* %out
+  ret void
+}
 
 ; FUNC-LABEL: @test_fmax_legacy_ogt_f32_multi_use
 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
diff --git a/test/CodeGen/AMDGPU/fmin_legacy.ll b/test/CodeGen/AMDGPU/fmin_legacy.ll
index 6a625c239d76..52fc3d0d251a 100644
--- a/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -96,6 +96,69 @@ define void @test_fmin_legacy_ult_f32(float addrspace(1)* %out, float addrspace(
   ret void
 }
 
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v1f32:
+; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
+; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
+; SI-SAFE: v_min_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+; SI-NONAN: v_min_f32_e32 {{v[0-9]+}}, [[B]], [[A]]
+define void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
+
+  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
+  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+
+  %cmp = fcmp ult <1 x float> %a, %b
+  %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
+  store <1 x float> %val, <1 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v2f32:
+; SI: buffer_load_dwordx2
+; SI: buffer_load_dwordx2
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
+
+  %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
+  %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
+
+  %cmp = fcmp ult <2 x float> %a, %b
+  %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
+  store <2 x float> %val, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_fmin_legacy_ult_v3f32:
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+; SI-SAFE: v_min_legacy_f32_e32
+
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+; SI-NONAN: v_min_f32_e32
+define void @test_fmin_legacy_ult_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid
+  %gep.1 = getelementptr <3 x float>, <3 x float> addrspace(1)* %gep.0, i32 1
+
+  %a = load <3 x float>, <3 x float> addrspace(1)* %gep.0
+  %b = load <3 x float>, <3 x float> addrspace(1)* %gep.1
+
+  %cmp = fcmp ult <3 x float> %a, %b
+  %val = select <3 x i1> %cmp, <3 x float> %a, <3 x float> %b
+  store <3 x float> %val, <3 x float> addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: @test_fmin_legacy_ole_f32_multi_use
 ; SI: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
diff --git a/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
new file mode 100644
index 000000000000..1ee92b2f7c08
--- /dev/null
+++ b/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -0,0 +1,102 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
+; make add an instruction if the fadd has more than one use.
+
+declare float @llvm.fabs.f32(float) #1
+
+; GCN-LABEL: {{^}}multiple_fadd_use_test:
+; GCN: v_max_legacy_f32_e64 [[A16:v[0-9]+]],
+; GCN: v_add_f32_e32 [[A17:v[0-9]+]], [[A16]], [[A16]]
+; GCN: v_mul_f32_e32 [[A18:v[0-9]+]], [[A17]], [[A17]]
+; GCN: v_mad_f32 [[A20:v[0-9]+]], -[[A18]], [[A17]], 1.0
+; GCN: buffer_store_dword [[A20]]
+define void @multiple_fadd_use_test(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
+  %a11 = fadd fast float %y, -1.0
+  %a12 = call float @llvm.fabs.f32(float %a11)
+  %a13 = fadd fast float %x, -1.0
+  %a14 = call float @llvm.fabs.f32(float %a13)
+  %a15 = fcmp ogt float %a12, %a14
+  %a16 = select i1 %a15, float %a12, float %a14
+  %a17 = fmul fast float %a16, 2.0
+  %a18 = fmul fast float %a17, %a17
+  %a19 = fmul fast float %a18, %a17
+  %a20 = fsub fast float 1.0, %a19
+  store float %a20, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_fmac
+; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], [[X:s[0-9]+]], s{{[0-9]+}}
+; GCN-DAG: v_mac_f32_e64 [[MAD:v[0-9]+]], 2.0, [[X]]
+; GCN-DAG: buffer_store_dword [[MUL2]]
+; GCN-DAG: buffer_store_dword [[MAD]]
+; GCN: s_endpgm
+define void @multiple_use_fadd_fmac(float addrspace(1)* %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %mul2 = fmul fast float %x, 2.0
+  %mad = fadd fast float %mul2, %y
+  store float %mul2, float addrspace(1)* %out
+  store float %mad, float addrspace(1)* %out.gep.1
+  ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_fmad:
+; GCN-DAG: v_add_f32_e64 [[MUL2:v[0-9]+]], |[[X:s[0-9]+]]|, |s{{[0-9]+}}|
+; GCN-DAG: v_mad_f32 [[MAD:v[0-9]+]], 2.0, |[[X]]|, v{{[0-9]+}}
+; GCN-DAG: buffer_store_dword [[MUL2]]
+; GCN-DAG: buffer_store_dword [[MAD]]
+; GCN: s_endpgm
+define void @multiple_use_fadd_fmad(float addrspace(1)* %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %x.abs = call float @llvm.fabs.f32(float %x)
+  %mul2 = fmul fast float %x.abs, 2.0
+  %mad = fadd fast float %mul2, %y
+  store float %mul2, float addrspace(1)* %out
+  store float %mad, float addrspace(1)* %out.gep.1
+  ret void
+}
+
+; GCN-LABEL: {{^}}multiple_use_fadd_multi_fmad:
+; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X:s[0-9]+]]|, v{{[0-9]+}}
+; GCN: v_mad_f32 {{v[0-9]+}}, 2.0, |[[X]]|, v{{[0-9]+}}
+define void @multiple_use_fadd_multi_fmad(float addrspace(1)* %out, float %x, float %y, float %z) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %x.abs = call float @llvm.fabs.f32(float %x)
+  %mul2 = fmul fast float %x.abs, 2.0
+  %mad0 = fadd fast float %mul2, %y
+  %mad1 = fadd fast float %mul2, %z
+  store float %mad0, float addrspace(1)* %out
+  store float %mad1, float addrspace(1)* %out.gep.1
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_x2_xn2:
+; GCN: v_mul_f32_e64 [[TMP0:v[0-9]+]], -4.0, [[X:s[0-9]+]]
+; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fmul_x2_xn2(float addrspace(1)* %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %mul2 = fmul fast float %x, 2.0
+  %muln2 = fmul fast float %x, -2.0
+  %mul = fmul fast float %mul2, %muln2
+  store float %mul, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}fmul_x2_xn3:
+; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0xc0c00000
+; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], [[X:s[0-9]+]], [[K]]
+; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], [[X]], [[TMP0]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @fmul_x2_xn3(float addrspace(1)* %out, float %x, float %y) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %mul2 = fmul fast float %x, 2.0
+  %muln2 = fmul fast float %x, -3.0
+  %mul = fmul fast float %mul2, %muln2
+  store float %mul, float addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind "unsafe-fp-math"="true" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/fneg-fabs.ll b/test/CodeGen/AMDGPU/fneg-fabs.ll
index 3b4930d9897d..b99d2712ed75 100644
--- a/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -34,8 +34,7 @@ define void @fneg_fabs_fmul_f32(float addrspace(1)* %out, float %x, float %y) {
 ; R600: |PV.{{[XYZW]}}|
 ; R600: -PV
 
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
 define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fabs = call float @llvm.fabs.f32(float %bc)
@@ -49,8 +48,7 @@ define void @fneg_fabs_free_f32(float addrspace(1)* %out, i32 %in) {
 ; R600: |PV.{{[XYZW]}}|
 ; R600: -PV
 
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
 define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
   %bc = bitcast i32 %in to float
   %fabs = call float @fabs(float %bc)
@@ -60,8 +58,7 @@ define void @fneg_fabs_fn_free_f32(float addrspace(1)* %out, i32 %in) {
 }
 
 ; FUNC-LABEL: {{^}}fneg_fabs_f32:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[IMMREG]]
+; SI: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80000000
 define void @fneg_fabs_f32(float addrspace(1)* %out, float %in) {
   %fabs = call float @llvm.fabs.f32(float %in)
   %fsub = fsub float -0.000000e+00, %fabs
@@ -85,11 +82,8 @@ define void @v_fneg_fabs_f32(float addrspace(1)* %out, float addrspace(1)* %in)
 ; R600: |{{(PV|T[0-9])\.[XYZW]}}|
 ; R600: -PV
 
-; FIXME: SGPR should be used directly for first src operand.
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI-NOT: 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in)
   %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs
@@ -97,14 +91,11 @@ define void @fneg_fabs_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %in) {
   ret void
 }
 
-; FIXME: SGPR should be used directly for first src operand.
 ; FUNC-LABEL: {{^}}fneg_fabs_v4f32:
-; SI: v_mov_b32_e32 [[IMMREG:v[0-9]+]], 0x80000000
-; SI-NOT: 0x80000000
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
-; SI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[IMMREG]]
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; SI: v_or_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
 define void @fneg_fabs_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %in) {
   %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in)
   %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs
diff --git a/test/CodeGen/AMDGPU/ftrunc.f64.ll b/test/CodeGen/AMDGPU/ftrunc.f64.ll
index 6618d8b5e57e..83a8ad8901d2 100644
--- a/test/CodeGen/AMDGPU/ftrunc.f64.ll
+++ b/test/CodeGen/AMDGPU/ftrunc.f64.ll
@@ -29,12 +29,12 @@ define void @v_ftrunc_f64(double addrspace(1)* %out, double addrspace(1)* %in) {
 ; SI: s_lshr_b64
 ; SI: s_not_b64
 ; SI: s_and_b64
-; SI: cmp_gt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
-; SI: cmp_lt_i32
-; SI: cndmask_b32
-; SI: cndmask_b32
+; SI-DAG: cmp_gt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
+; SI-DAG: cmp_lt_i32
+; SI-DAG: cndmask_b32
+; SI-DAG: cndmask_b32
 ; SI: s_endpgm
 define void @ftrunc_f64(double addrspace(1)* %out, double %x) {
   %y = call double @llvm.trunc.f64(double %x) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/gep-address-space.ll b/test/CodeGen/AMDGPU/gep-address-space.ll
index 471b0f6b13e7..f5ab390ce686 100644
--- a/test/CodeGen/AMDGPU/gep-address-space.ll
+++ b/test/CodeGen/AMDGPU/gep-address-space.ll
@@ -11,24 +11,35 @@ define void @use_gep_address_space([1024 x i32] addrspace(3)* %array) nounwind {
   ret void
 }
 
-define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
 ; CHECK-LABEL: {{^}}use_gep_address_space_large_offset:
 ; The LDS offset will be 65536 bytes, which is larger than the size of LDS on
 ; SI, which is why it is being OR'd with the base pointer.
 ; SI: s_or_b32
 ; CI: s_add_i32
 ; CHECK: ds_write_b32
+define void @use_gep_address_space_large_offset([1024 x i32] addrspace(3)* %array) nounwind {
   %p = getelementptr [1024 x i32], [1024 x i32] addrspace(3)* %array, i16 0, i16 16384
   store i32 99, i32 addrspace(3)* %p
   ret void
 }
 
-define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
 ; CHECK-LABEL: {{^}}gep_as_vector_v4:
-; CHECK: s_add_i32
-; CHECK: s_add_i32
-; CHECK: s_add_i32
-; CHECK: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CHECK: s_endpgm
+define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
   %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(3)*> %array, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
   %p0 = extractelement <4 x i32 addrspace(3)*> %p, i32 0
   %p1 = extractelement <4 x i32 addrspace(3)*> %p, i32 1
@@ -41,10 +52,15 @@ define void @gep_as_vector_v4(<4 x [1024 x i32] addrspace(3)*> %array) nounwind
   ret void
 }
 
-define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
 ; CHECK-LABEL: {{^}}gep_as_vector_v2:
-; CHECK: s_add_i32
-; CHECK: s_add_i32
+; SI: s_add_i32
+; SI: s_add_i32
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CI-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:64
+; CHECK: s_endpgm
+define void @gep_as_vector_v2(<2 x [1024 x i32] addrspace(3)*> %array) nounwind {
   %p = getelementptr [1024 x i32], <2 x [1024 x i32] addrspace(3)*> %array, <2 x i16> zeroinitializer, <2 x i16> <i16 16, i16 16>
   %p0 = extractelement <2 x i32 addrspace(3)*> %p, i32 0
   %p1 = extractelement <2 x i32 addrspace(3)*> %p, i32 1
diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll
new file mode 100644
index 000000000000..bc5f031cd4a2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/global-constant.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s
+
+@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0]
+@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0]
+
+; GCN-LABEL: {{^}}main:
+; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0
+; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}}
+; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly
+; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0
+; NOHSA: .text
+; HSA: .hsatext
+; GCN: readonly:
+; GCN: readonly2:
+define void @main(i32 %index, float addrspace(1)* %out) {
+  %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index
+  %val = load float, float addrspace(2)* %ptr
+  store float %val, float addrspace(1)* %out
+  %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index
+  %val2 = load float, float addrspace(2)* %ptr2
+  store float %val2, float addrspace(1)* %out
+  ret void
+}
+
diff --git a/test/CodeGen/AMDGPU/global-extload-i32.ll b/test/CodeGen/AMDGPU/global-extload-i32.ll
index 79b83452939e..e5e6be2199c3 100644
--- a/test/CodeGen/AMDGPU/global-extload-i32.ll
+++ b/test/CodeGen/AMDGPU/global-extload-i32.ll
@@ -49,8 +49,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
 
 ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
 ; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
@@ -63,8 +62,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
 ; SI: buffer_load_dwordx2
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
@@ -75,10 +73,8 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
 
 ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
 ; SI: buffer_load_dwordx4
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
@@ -93,10 +89,8 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
@@ -106,22 +100,12 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
 }
 
 ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -131,14 +115,8 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
 }
 
 ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
@@ -148,15 +126,10 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -166,50 +139,34 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
 }
 
 ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
@@ -219,40 +176,19 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
 }
 
 ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
   %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
@@ -262,41 +198,15 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
 }
 
 ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
 
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
@@ -331,41 +241,25 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
 ; SI-DAG: v_ashrrev_i32
 ; SI-DAG: v_ashrrev_i32
 
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
 ; SI: s_endpgm
 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
@@ -376,77 +270,34 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32
 }
 
 ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
-; SI-DAG: buffer_store_dwordx2
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
+; SI-DAG: buffer_store_dwordx4
 
 ; SI: s_endpgm
 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/global_atomics.ll b/test/CodeGen/AMDGPU/global_atomics.ll
index 146f0a5fbf26..6786e4a2f375 100644
--- a/test/CodeGen/AMDGPU/global_atomics.ll
+++ b/test/CodeGen/AMDGPU/global_atomics.ll
@@ -12,7 +12,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_add_i32_ret_offset:
-; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_add [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -105,7 +105,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_and_i32_ret_offset:
-; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_and [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -197,7 +197,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_sub_i32_ret_offset:
-; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_sub [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -289,7 +289,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_max_i32_ret_offset:
-; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_smax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -381,7 +381,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_umax_i32_ret_offset:
-; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_umax [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -473,7 +473,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_min_i32_ret_offset:
-; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_smin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -565,7 +565,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_umin_i32_ret_offset:
-; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_umin [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -657,7 +657,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_or_i32_ret_offset:
-; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_or [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -749,7 +749,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_xchg_i32_ret_offset:
-; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_swap [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
@@ -838,7 +838,7 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}atomic_xor_i32_ret_offset:
-; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc {{$}}
+; GCN: buffer_atomic_xor [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
 ; GCN: buffer_store_dword [[RET]]
 define void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
 entry:
diff --git a/test/CodeGen/AMDGPU/half.ll b/test/CodeGen/AMDGPU/half.ll
index bf8f11860b50..a02cbf43c400 100644
--- a/test/CodeGen/AMDGPU/half.ll
+++ b/test/CodeGen/AMDGPU/half.ll
@@ -105,6 +105,26 @@ define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x hal
 }
 
 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
 define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
   %ext = fpext <8 x half> %arg to <8 x float>
   store <8 x float> %ext, <8 x float> addrspace(1)* %out
@@ -112,12 +132,24 @@ define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x hal
 }
 
 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
+; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
+; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
+; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
+; GCN: buffer_store_dwordx2 [[RESULT]]
 define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
   %ext = fpext half %arg to double
   store double %ext, double addrspace(1)* %out
   ret void
 }
+
 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
   %ext = fpext <2 x half> %arg to <2 x double>
   store <2 x double> %ext, <2 x double> addrspace(1)* %out
@@ -125,6 +157,16 @@ define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
   %ext = fpext <3 x half> %arg to <3 x double>
   store <3 x double> %ext, <3 x double> addrspace(1)* %out
@@ -132,6 +174,19 @@ define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN: s_endpgm
 define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
   %ext = fpext <4 x half> %arg to <4 x double>
   store <4 x double> %ext, <4 x double> addrspace(1)* %out
@@ -139,6 +194,37 @@ define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x ha
 }
 
 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+; GCN-DAG: buffer_load_ushort v
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+; GCN-DAG: v_cvt_f32_f16_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+; GCN-DAG: v_cvt_f64_f32_e32
+
+; GCN: s_endpgm
 define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
   %ext = fpext <8 x half> %arg to <8 x double>
   store <8 x double> %ext, <8 x double> addrspace(1)* %out
@@ -194,6 +280,12 @@ define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
+; GCN: s_endpgm
 define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
   %val = load <2 x half>, <2 x half> addrspace(1)* %in
   %cvt = fpext <2 x half> %val to <2 x float>
@@ -226,6 +318,46 @@ define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x
 }
 
 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+; GCN: buffer_load_ushort
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+
+; GCN: s_endpgm
 define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
   %val = load <16 x half>, <16 x half> addrspace(1)* %in
   %cvt = fpext <16 x half> %val to <16 x float>
@@ -246,6 +378,14 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace
 }
 
 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
+; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
+; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
+; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
+; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
+; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
+; GCN: s_endpgm
 define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
   %val = load <2 x half>, <2 x half> addrspace(1)* %in
   %cvt = fpext <2 x half> %val to <2 x double>
@@ -254,6 +394,25 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
 }
 
 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
+
+; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
+; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32
+; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]]
+; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
+
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN: v_cvt_f32_f16_e32
+; GCN-NOT: v_cvt_f32_f16_e32
+
+; GCN: v_cvt_f64_f32_e32
+; GCN: v_cvt_f64_f32_e32
+; GCN: v_cvt_f64_f32_e32
+; GCN-NOT: v_cvt_f64_f32_e32
+
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
+; GCN: s_endpgm
 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
   %val = load <3 x half>, <3 x half> addrspace(1)* %in
   %cvt = fpext <3 x half> %val to <3 x double>
@@ -310,13 +469,12 @@ define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2
   ret void
 }
 
-; FIXME: Shouldn't do 4th conversion
 ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
 ; GCN: buffer_load_dwordx4
 ; GCN: v_cvt_f16_f32_e32
 ; GCN: v_cvt_f16_f32_e32
 ; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
+; GCN-NOT: v_cvt_f16_f32_e32
 ; GCN: buffer_store_short
 ; GCN: buffer_store_dword
 ; GCN: s_endpgm
@@ -346,14 +504,8 @@ define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4
 }
 
 ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
 ; GCN: v_cvt_f16_f32_e32
 ; GCN: v_cvt_f16_f32_e32
 ; GCN: v_cvt_f16_f32_e32
@@ -379,54 +531,42 @@ define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8
 }
 
 ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: buffer_load_dword
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: v_cvt_f16_f32_e32
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
-; GCN: buffer_store_short
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: v_cvt_f16_f32_e32
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
+; GCN-DAG: buffer_store_short
 ; GCN: s_endpgm
 define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
   %val = load <16 x float>, <16 x float> addrspace(1)* %in
diff --git a/test/CodeGen/AMDGPU/hsa-globals.ll b/test/CodeGen/AMDGPU/hsa-globals.ll
new file mode 100644
index 000000000000..1d76c40c042e
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-globals.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=ASM %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s | FileCheck %s --check-prefix=ELF
+
+@internal_global_program = internal addrspace(1) global i32 0
+@common_global_program = common addrspace(1) global i32 0
+@external_global_program = addrspace(1) global i32 0
+
+@internal_global_agent = internal addrspace(1) global i32 0, section ".hsadata_global_agent"
+@common_global_agent = common addrspace(1) global i32 0, section ".hsadata_global_agent"
+@external_global_agent = addrspace(1) global i32 0, section ".hsadata_global_agent"
+
+@internal_readonly = internal unnamed_addr addrspace(2) constant i32 0
+@external_readonly = unnamed_addr addrspace(2) constant i32 0
+
+define void @test() {
+  ret void
+}
+
+; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .hsadata_global_program
+; ASM: internal_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .hsadata_global_program
+; ASM: common_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .hsadata_global_program
+; ASM: external_global_program:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .hsadata_global_agent
+; ASM: internal_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .hsadata_global_agent
+; ASM: common_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .hsadata_global_agent
+; ASM: external_global_agent:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_module_global internal_readonly
+; ASM: .hsatext
+; ASM: internal_readonly:
+; ASM: .long 0
+
+; ASM: .amdgpu_hsa_program_global external_readonly
+; ASM: .hsatext
+; ASM: external_readonly:
+; ASM: .long 0
+
+; ELF: Section {
+; ELF: Name: .hsadata_global_program
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0x100003)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+; ELF: SHF_WRITE (0x1)
+; ELF: ]
+; ELF: }
+
+; ELF: Section {
+; ELF: Name: .hsadata_global_agent
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0x900003)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+; ELF: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+; ELF: SHF_WRITE (0x1)
+; ELF: ]
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: common_global_agent
+; ELF: Binding: Local
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: common_global_program
+; ELF: Binding: Local
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_global_agent
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_global_program
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: internal_readonly
+; ELF: Binding: Local
+; ELF: Type: Object
+; ELF: Section: .hsatext
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_global_agent
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_agent
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_global_program
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsadata_global_program
+; ELF: }
+
+; ELF: Symbol {
+; ELF: Name: external_readonly
+; ELF: Binding: Global
+; ELF: Type: Object
+; ELF: Section: .hsatext
+; ELF: }
diff --git a/test/CodeGen/AMDGPU/hsa-group-segment.ll b/test/CodeGen/AMDGPU/hsa-group-segment.ll
new file mode 100644
index 000000000000..1999dc38a6b0
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-group-segment.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+
+@internal_group = internal addrspace(3) global i32 undef
+@external_group = addrspace(3) global i32 undef
+
+define void @test() {
+entry:
+  store i32 0, i32 addrspace(3)* @internal_group
+  store i32 0, i32 addrspace(3)* @external_group
+  ret void
+}
+
+; HSA-NOT: internal_group:
+; HSA-NOT: external_group:
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index 653a6bb1b609..abc89b7fd837 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -1,11 +1,24 @@
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA-CI --check-prefix=HSA %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA-VI --check-prefix=HSA %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj  | llvm-readobj -s -sd | FileCheck --check-prefix=ELF %s
-; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global | FileCheck --check-prefix=HSA-CI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo  | FileCheck --check-prefix=HSA %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo -mattr=-flat-for-global | FileCheck --check-prefix=HSA-VI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj | llvm-readobj -symbols -s -sd | FileCheck --check-prefix=ELF %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
 
 ; The SHT_NOTE section contains the output from the .hsa_code_object_*
 ; directives.
 
+; ELF: Section {
+; ELF: Name: .hsatext
+; ELF: Type: SHT_PROGBITS (0x1)
+; ELF: Flags [ (0xC00007)
+; ELF: SHF_ALLOC (0x2)
+; ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+; ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+; ELF: SHF_EXECINSTR (0x4)
+; ELF: SHF_WRITE (0x1)
+; ELF: }
+
 ; ELF: SHT_NOTE
 ; ELF: 0000: 04000000 08000000 01000000 414D4400
 ; ELF: 0010: 01000000 00000000 04000000 1B000000
@@ -13,20 +26,31 @@
 ; ELF: 0030: 00000000 00000000 414D4400 414D4447
 ; ELF: 0040: 50550000
 
+; ELF: Symbol {
+; ELF: Name: simple
+; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+; ELF: }
+
 ; HSA: .hsa_code_object_version 1,0
 ; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 
+; HSA: .hsatext
+
+; HSA: .amdgpu_hsa_kernel simple
 ; HSA: {{^}}simple:
 ; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
 ; HSA: .end_amd_kernel_code_t
-; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[0:1], 0x0
+; HSA: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x0
 
 ; Make sure we are setting the ATC bit:
 ; HSA-CI: s_mov_b32 s[[HI:[0-9]]], 0x100f000
 ; On VI+ we also need to set MTYPE = 2
 ; HSA-VI: s_mov_b32 s[[HI:[0-9]]], 0x1100f000
-; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
+; Make sure we generate flat store for HSA
+; HSA: flat_store_dword v{{[0-9]+}}
 
 define void @simple(i32 addrspace(1)* %out) {
 entry:
diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll
new file mode 100644
index 000000000000..5906b2f15709
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-attributes.ll
@@ -0,0 +1,206 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === WIDTH ==================================================================
+; 9 implicit args = 9 dwords to first image argument.
+; First width at dword index 9+1 -> KC0[2].Z
+
+; FUNC-LABEL: {{^}}width_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
+                       i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  %1 = extractvalue [3 x i32] %0, 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}width_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
+                       i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  %1 = extractvalue [3 x i32] %0, 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === HEIGHT =================================================================
+; First height at dword index 9+2 -> KC0[2].W
+
+; FUNC-LABEL: {{^}}height_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].W
+define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
+                        i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  %1 = extractvalue [3 x i32] %0, 1
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}height_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].W
+define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
+                        i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  %1 = extractvalue [3 x i32] %0, 1
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === DEPTH ==================================================================
+; First depth at dword index 9+3 -> KC0[3].X
+
+; FUNC-LABEL: {{^}}depth_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].X
+define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
+                       i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  %1 = extractvalue [3 x i32] %0, 2
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === CHANNEL DATA TYPE ======================================================
+; First channel data type at dword index 9+4 -> KC0[3].Y
+
+; FUNC-LABEL: {{^}}data_type_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Y
+define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
+                           i32 addrspace(1)* %out) {
+entry:
+  %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  %1 = extractvalue [2 x i32] %0, 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}data_type_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Y
+define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
+                                     i32 addrspace(1)* %out) {
+entry:
+  %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  %1 = extractvalue [2 x i32] %0, 0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === CHANNEL ORDER ==========================================================
+; First channel order at dword index 9+5 -> KC0[3].Z
+
+; FUNC-LABEL: {{^}}channel_order_2d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Z
+define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
+                               i32 addrspace(1)* %out) {
+entry:
+  %0 = call [2 x i32] @llvm.OpenCL.image.get.format.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  %1 = extractvalue [2 x i32] %0, 1
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}channel_order_3d:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[3].Z
+define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
+                                         i32 addrspace(1)* %out) {
+entry:
+  %0 = call [2 x i32] @llvm.OpenCL.image.get.format.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  %1 = extractvalue [2 x i32] %0, 1
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; === 2ND IMAGE ==============================================================
+; 9 implicit args + 2 explicit args + 5 implicit args for 1st image argument
+;   = 16 dwords to 2nd image argument.
+; Height of the second image is at 16+2 -> KC0[4].Z
+;
+; FUNC-LABEL: {{^}}image_arg_2nd:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[4].Z
+define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
+                            i32 %x,
+                            %opencl.image2d_t addrspace(1)* %in2,
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call [3 x i32] @llvm.OpenCL.image.get.size.2d(
+      %opencl.image2d_t addrspace(1)* %in2) #0
+  %1 = extractvalue [3 x i32] %0, 1
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare [3 x i32] @llvm.OpenCL.image.get.size.2d(%opencl.image2d_t addrspace(1)*) #0
+declare [3 x i32] @llvm.OpenCL.image.get.size.3d(%opencl.image3d_t addrspace(1)*) #0
+declare [2 x i32] @llvm.OpenCL.image.get.format.2d(%opencl.image2d_t addrspace(1)*) #0
+declare [2 x i32] @llvm.OpenCL.image.get.format.3d(%opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @width_2d,
+       !10, !20, !30, !40, !50}
+!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @width_3d,
+       !10, !21, !31, !41, !50}
+!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @height_2d,
+       !10, !20, !30, !40, !50}
+!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @height_3d,
+       !10, !21, !31, !41, !50}
+!4 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @depth_3d,
+       !10, !21, !31, !41, !50}
+!5 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_2d,
+       !10, !20, !30, !40, !50}
+!6 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @data_type_3d,
+       !10, !21, !31, !41, !50}
+!7 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_2d,
+       !10, !20, !30, !40, !50}
+!8 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @channel_order_3d,
+       !10, !21, !31, !41, !50}
+!9 = !{void (%opencl.image3d_t addrspace(1)*, i32, %opencl.image2d_t addrspace(1)*,
+      i32 addrspace(1)*)* @image_arg_2nd, !12, !22, !32, !42, !52}
+
+!10 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!20 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!21 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!30 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!31 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!40 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!41 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!50 = !{!"kernel_arg_type_qual", !"", !""}
+
+!12 = !{!"kernel_arg_addr_space", i32 1, i32 0, i32 1, i32 1}
+!22 = !{!"kernel_arg_access_qual", !"read_only", !"none", !"write_only", !"none"}
+!32 = !{!"kernel_arg_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"}
+!42 = !{!"kernel_arg_base_type", !"image3d_t", !"sampler_t", !"image2d_t", !"int*"}
+!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/image-resource-id.ll b/test/CodeGen/AMDGPU/image-resource-id.ll
new file mode 100644
index 000000000000..d4cf34944240
--- /dev/null
+++ b/test/CodeGen/AMDGPU/image-resource-id.ll
@@ -0,0 +1,409 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; === 1 image arg, read_only ===================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_rd_1_0(%opencl.image2d_t addrspace(1)* %in, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_rd_1_0(%opencl.image3d_t addrspace(1)* %in, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 1 image arg, write_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_wr_1_0(%opencl.image2d_t addrspace(1)* %in, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_1_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_wr_1_0(%opencl.image3d_t addrspace(1)* %in, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 2 image args, read_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_rd_2_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_rd_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_rd_2_1(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_rd_2_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_rd_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_rd_2_1(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 2 image args, write_only =================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_2d_wr_2_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_wr_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_wr_2_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_2_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_3d_wr_2_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_wr_2_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_wr_2_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 3 image args, read_only ==================================================
+
+; FUNC-LABEL: {{^}}test_2d_rd_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2d_rd_3_0(%opencl.image2d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; read_only
+                            %opencl.image2d_t addrspace(1)* %in3, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; FUNC-LABEL: {{^}}test_3d_rd_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_3d_rd_3_0(%opencl.image3d_t addrspace(1)* %in1, ; read_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; read_only
+                            %opencl.image3d_t addrspace(1)* %in3, ; read_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 3 image args, write_only =================================================
+
+; FUNC-LABEL: {{^}}test_2d_wr_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2d_wr_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image3d_t addrspace(1)* %in2, ; write_only
+                            %opencl.image2d_t addrspace(1)* %in3, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+; FUNC-LABEL: {{^}}test_3d_wr_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_3d_wr_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+                            %opencl.image2d_t addrspace(1)* %in2, ; write_only
+                            %opencl.image3d_t addrspace(1)* %in3, ; write_only
+                            i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; === 3 image args, mixed ======================================================
+
+; FUNC-LABEL: {{^}}test_2d_mix_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_mix_3_0(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+                             %opencl.image3d_t addrspace(1)* %in2, ; read_only
+                             %opencl.image2d_t addrspace(1)* %in3, ; read_only
+                             i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_mix_3_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_mix_3_0(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+                             %opencl.image2d_t addrspace(1)* %in2, ; read_only
+                             %opencl.image3d_t addrspace(1)* %in3, ; read_only
+                             i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2d_mix_3_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_2d_mix_3_1(%opencl.image2d_t addrspace(1)* %in1, ; write_only
+                             %opencl.image3d_t addrspace(1)* %in2, ; read_only
+                             %opencl.image2d_t addrspace(1)* %in3, ; write_only
+                             i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.2d(
+      %opencl.image2d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_3d_mix_3_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_3d_mix_3_1(%opencl.image3d_t addrspace(1)* %in1, ; write_only
+                             %opencl.image2d_t addrspace(1)* %in2, ; read_only
+                             %opencl.image3d_t addrspace(1)* %in3, ; write_only
+                             i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.image.get.resource.id.3d(
+      %opencl.image3d_t addrspace(1)* %in3) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+%opencl.image2d_t = type opaque
+%opencl.image3d_t = type opaque
+
+declare i32 @llvm.OpenCL.image.get.resource.id.2d(%opencl.image2d_t addrspace(1)*) #0
+declare i32 @llvm.OpenCL.image.get.resource.id.3d(%opencl.image3d_t addrspace(1)*) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13,
+                    !14, !15, !16, !17, !18, !19}
+!0 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_1_0,
+       !110, !120, !130, !140, !150}
+!1 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_1_0,
+       !110, !120, !131, !141, !150}
+!2 = !{void (%opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_1_0,
+       !110, !121, !130, !140, !150}
+!3 = !{void (%opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_1_0,
+       !110, !121, !131, !141, !150}
+!110 = !{!"kernel_arg_addr_space", i32 1, i32 1}
+!120 = !{!"kernel_arg_access_qual", !"read_only", !"none"}
+!121 = !{!"kernel_arg_access_qual", !"write_only", !"none"}
+!130 = !{!"kernel_arg_type", !"image2d_t", !"int*"}
+!131 = !{!"kernel_arg_type", !"image3d_t", !"int*"}
+!140 = !{!"kernel_arg_base_type", !"image2d_t", !"int*"}
+!141 = !{!"kernel_arg_base_type", !"image3d_t", !"int*"}
+!150 = !{!"kernel_arg_type_qual", !"", !""}
+
+!4  = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_2d_rd_2_0, !112, !122, !132, !142, !152}
+!5  = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_2d_rd_2_1, !112, !122, !132, !142, !152}
+!6  = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_3d_rd_2_0, !112, !122, !133, !143, !152}
+!7  = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_3d_rd_2_1, !112, !122, !133, !143, !152}
+!8  = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_2d_wr_2_0, !112, !123, !132, !142, !152}
+!9  = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_2d_wr_2_1, !112, !123, !132, !142, !152}
+!10 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_3d_wr_2_0, !112, !123, !133, !143, !152}
+!11 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              i32 addrspace(1)*)* @test_3d_wr_2_1, !112, !123, !133, !143, !152}
+!112 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1}
+!122 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"none"}
+!123 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"none"}
+!132 = !{!"kernel_arg_type", !"image2d_t", !"image2d_t", !"int*"}
+!133 = !{!"kernel_arg_type", !"image3d_t", !"image3d_t", !"int*"}
+!142 = !{!"kernel_arg_base_type", !"image2d_t", !"image2d_t", !"int*"}
+!143 = !{!"kernel_arg_base_type", !"image3d_t", !"image3d_t", !"int*"}
+!152 = !{!"kernel_arg_type_qual", !"", !"", !""}
+
+!12 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_rd_3_0,
+              !114, !124, !134, !144, !154}
+!13 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_rd_3_0,
+              !114, !124, !135, !145, !154}
+!14 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_wr_3_0,
+              !114, !125, !134, !144, !154}
+!15 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_wr_3_0,
+              !114, !125, !135, !145, !154}
+!16 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_0,
+              !114, !126, !134, !144, !154}
+!17 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_0,
+              !114, !126, !135, !145, !154}
+!18 = !{void (%opencl.image2d_t addrspace(1)*, %opencl.image3d_t addrspace(1)*,
+              %opencl.image2d_t addrspace(1)*, i32 addrspace(1)*)* @test_2d_mix_3_1,
+              !114, !127, !134, !144, !154}
+!19 = !{void (%opencl.image3d_t addrspace(1)*, %opencl.image2d_t addrspace(1)*,
+              %opencl.image3d_t addrspace(1)*, i32 addrspace(1)*)* @test_3d_mix_3_1,
+              !114, !127, !135, !145, !154}
+!114 = !{!"kernel_arg_addr_space", i32 1, i32 1, i32 1, i32 1}
+!124 = !{!"kernel_arg_access_qual", !"read_only", !"read_only", !"read_only", !"none"}
+!125 = !{!"kernel_arg_access_qual", !"write_only", !"write_only", !"write_only", !"none"}
+!126 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"read_only", !"none"}
+!127 = !{!"kernel_arg_access_qual", !"write_only", !"read_only", !"write_only", !"none"}
+!134 = !{!"kernel_arg_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"}
+!135 = !{!"kernel_arg_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"}
+!144 = !{!"kernel_arg_base_type", !"image2d_t", !"image3d_t", !"image2d_t", !"int*"}
+!145 = !{!"kernel_arg_base_type", !"image3d_t", !"image2d_t", !"image3d_t", !"int*"}
+!154 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/imm.ll b/test/CodeGen/AMDGPU/imm.ll
index 12eed550eb1f..8db9ea4ccf31 100644
--- a/test/CodeGen/AMDGPU/imm.ll
+++ b/test/CodeGen/AMDGPU/imm.ll
@@ -3,8 +3,7 @@
 
 ; Use a 64-bit value with lo bits that can be represented as an inline constant
 ; CHECK-LABEL: {{^}}i64_imm_inline_lo:
-; CHECK: s_mov_b32 [[LO:s[0-9]+]], 5
-; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], [[LO]]
+; CHECK: v_mov_b32_e32 v[[LO_VGPR:[0-9]+]], 5
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VGPR]]:
 define void @i64_imm_inline_lo(i64 addrspace(1) *%out) {
 entry:
@@ -14,8 +13,7 @@ entry:
 
 ; Use a 64-bit value with hi bits that can be represented as an inline constant
 ; CHECK-LABEL: {{^}}i64_imm_inline_hi:
-; CHECK: s_mov_b32 [[HI:s[0-9]+]], 5
-; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], [[HI]]
+; CHECK: v_mov_b32_e32 v[[HI_VGPR:[0-9]+]], 5
 ; CHECK: buffer_store_dwordx2 v{{\[[0-9]+:}}[[HI_VGPR]]
 define void @i64_imm_inline_hi(i64 addrspace(1) *%out) {
 entry:
@@ -24,10 +22,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}store_imm_neg_0.0_i64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
 define void @store_imm_neg_0.0_i64(i64 addrspace(1) *%out) {
   store i64 -9223372036854775808, i64 addrspace(1) *%out
@@ -523,10 +519,8 @@ define void @store_inline_imm_0.0_f64(double addrspace(1)* %out) {
 
 
 ; CHECK-LABEL: {{^}}store_literal_imm_neg_0.0_f64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x80000000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x80000000
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
 define void @store_literal_imm_neg_0.0_f64(double addrspace(1)* %out) {
   store double -0.0, double addrspace(1)* %out
@@ -606,10 +600,8 @@ define void @store_inline_imm_m_4.0_f64(double addrspace(1)* %out) {
 }
 
 ; CHECK-LABEL: {{^}}store_literal_imm_f64:
-; CHECK-DAG: s_mov_b32 s[[HI_SREG:[0-9]+]], 0x40b00000
-; CHECK-DAG: s_mov_b32 s[[LO_SREG:[0-9]+]], 0{{$}}
-; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
-; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; CHECK-DAG: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], 0{{$}}
+; CHECK-DAG: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], 0x40b00000
 ; CHECK: buffer_store_dwordx2 v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
 define void @store_literal_imm_f64(double addrspace(1)* %out) {
   store double 4096.0, double addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index f551606d63a7..e40cac22725c 100644
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -5,23 +5,52 @@
 ; indexing of vectors.
 
 ; CHECK-LABEL: {{^}}extract_w_offset:
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
 ; CHECK: s_mov_b32 m0
 ; CHECK-NEXT: v_movrels_b32_e32
 define void @extract_w_offset(float addrspace(1)* %out, i32 %in) {
 entry:
-  %0 = add i32 %in, 1
-  %1 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %0
-  store float %1, float addrspace(1)* %out
+  %idx = add i32 %in, 1
+  %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %idx
+  store float %elt, float addrspace(1)* %out
+  ret void
+}
+
+; XXX: Could do v_or_b32 directly
+; CHECK-LABEL: {{^}}extract_w_offset_salu_use_vector:
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: s_or_b32
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; CHECK: s_mov_b32 m0
+; CHECK-NEXT: v_movrels_b32_e32
+define void @extract_w_offset_salu_use_vector(i32 addrspace(1)* %out, i32 %in, <4 x i32> %or.val) {
+entry:
+  %idx = add i32 %in, 1
+  %vec = or <4 x i32> %or.val, <i32 1, i32 2, i32 3, i32 4>
+  %elt = extractelement <4 x i32> %vec, i32 %idx
+  store i32 %elt, i32 addrspace(1)* %out
   ret void
 }
 
 ; CHECK-LABEL: {{^}}extract_wo_offset:
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 4.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x40400000
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 2.0
+; CHECK-DAG: v_mov_b32_e32 v{{[0-9]+}}, 1.0
 ; CHECK: s_mov_b32 m0
 ; CHECK-NEXT: v_movrels_b32_e32
 define void @extract_wo_offset(float addrspace(1)* %out, i32 %in) {
 entry:
-  %0 = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
-  store float %0, float addrspace(1)* %out
+  %elt = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %in
+  store float %elt, float addrspace(1)* %out
   ret void
 }
 
@@ -37,6 +66,19 @@ entry:
   ret void
 }
 
+; CHECK-LABEL: {{^}}extract_neg_offset_sgpr_loaded:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movrels_b32_e32 v{{[0-9]}}, v0
+define void @extract_neg_offset_sgpr_loaded(i32 addrspace(1)* %out, <4 x i32> %vec0, <4 x i32> %vec1, i32 %offset) {
+entry:
+  %index = add i32 %offset, -512
+  %or = or <4 x i32> %vec0, %vec1
+  %value = extractelement <4 x i32> %or, i32 %index
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
 ; CHECK-LABEL: {{^}}extract_neg_offset_vgpr:
 ; The offset depends on the register that holds the first element of the vector.
 ; CHECK: v_readfirstlane_b32
@@ -87,6 +129,21 @@ entry:
   ret void
 }
 
+; The vector indexed into is originally loaded into an SGPR rather
+; than built with a reg_sequence
+
+; CHECK-LABEL: {{^}}insert_neg_offset_sgpr_loadreg:
+; The offset depends on the register that holds the first element of the vector.
+; CHECK: s_add_i32 m0, s{{[0-9]+}}, 0xfffffe{{[0-9a-z]+}}
+; CHECK: v_movreld_b32_e32 v0, v{{[0-9]}}
+define void @insert_neg_offset_sgpr_loadreg(i32 addrspace(1)* %in, <4 x i32> addrspace(1)* %out, <4 x i32> %vec, i32 %offset) {
+entry:
+  %index = add i32 %offset, -512
+  %value = insertelement <4 x i32> %vec, i32 5, i32 %index
+  store <4 x i32> %value, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 ; CHECK-LABEL: {{^}}insert_neg_offset_vgpr:
 ; The offset depends on the register that holds the first element of the vector.
 ; CHECK: v_readfirstlane_b32
diff --git a/test/CodeGen/AMDGPU/indirect-private-64.ll b/test/CodeGen/AMDGPU/indirect-private-64.ll
index d63e1b6c5212..2a3b29f54fa9 100644
--- a/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
 
-declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+declare void @llvm.AMDGPU.barrier.local() convergent nounwind
 
 ; SI-LABEL: {{^}}private_access_f64_alloca:
 
@@ -18,7 +18,7 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
   %array = alloca double, i32 16, align 8
   %ptr = getelementptr double, double* %array, i32 %b
   store double %val, double* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
   %result = load double, double* %ptr, align 8
   store double %result, double addrspace(1)* %out, align 8
   ret void
@@ -29,20 +29,16 @@ define void @private_access_f64_alloca(double addrspace(1)* noalias %out, double
 ; SI-ALLOCA: buffer_store_dwordx4
 ; SI-ALLOCA: buffer_load_dwordx4
 
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
+; SI-PROMOTE: ds_read_b64
 define void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
   %array = alloca <2 x double>, i32 16, align 16
   %ptr = getelementptr <2 x double>, <2 x double>* %array, i32 %b
   store <2 x double> %val, <2 x double>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
   %result = load <2 x double>, <2 x double>* %ptr, align 16
   store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16
   ret void
@@ -60,7 +56,7 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
   %array = alloca i64, i32 16, align 8
   %ptr = getelementptr i64, i64* %array, i32 %b
   store i64 %val, i64* %ptr, align 8
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
   %result = load i64, i64* %ptr, align 8
   store i64 %result, i64 addrspace(1)* %out, align 8
   ret void
@@ -71,20 +67,16 @@ define void @private_access_i64_alloca(i64 addrspace(1)* noalias %out, i64 addrs
 ; SI-ALLOCA: buffer_store_dwordx4
 ; SI-ALLOCA: buffer_load_dwordx4
 
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_write_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
-; SI-PROMOTE: ds_read_b32
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_write_b64
+; SI-PROMOTE: ds_read_b64
+; SI-PROMOTE: ds_read_b64
 define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) nounwind {
   %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
   %array = alloca <2 x i64>, i32 16, align 16
   %ptr = getelementptr <2 x i64>, <2 x i64>* %array, i32 %b
   store <2 x i64> %val, <2 x i64>* %ptr, align 16
-  call void @llvm.AMDGPU.barrier.local() noduplicate nounwind
+  call void @llvm.AMDGPU.barrier.local() convergent nounwind
   %result = load <2 x i64>, <2 x i64>* %ptr, align 16
   store <2 x i64> %result, <2 x i64> addrspace(1)* %out, align 16
   ret void
diff --git a/test/CodeGen/AMDGPU/inline-constraints.ll b/test/CodeGen/AMDGPU/inline-constraints.ll
new file mode 100644
index 000000000000..78868710c6a2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}inline_reg_constraints:
+; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+
+define void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
+entry:
+  %v32 = tail call i32 asm sideeffect "flat_load_dword   $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+  %s32 =  tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s64 =  tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s128 =  tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  %s256 =  tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.ll b/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 6de3d408c486..7f9579e59782 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -70,8 +70,9 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
 }
 
 ; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
   store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
@@ -79,10 +80,11 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
 }
 
 ; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
-; FIXMESI: buffer_store_dwordx4
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
   %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
   store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
@@ -202,10 +204,28 @@ endif:
 }
 
 ; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}}
+; SI-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
+; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}}
+
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
+
+; SI: s_mov_b32 m0, [[SCALEDIDX]]
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
+
+; Increment to next element.
+; FIXME: Should be able to manipulate m0 directly instead of add and
+; copy.
+
+; SI: s_or_b32 [[IDX1:s[0-9]+]], [[SCALEDIDX]], 1
+; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
+; SI-DAG: s_mov_b32 m0, [[IDX1]]
+; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
+
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
@@ -213,9 +233,16 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
   ret void
 }
 
+; FIXME: Inline immediate should be folded into v_movreld_b32.
 ; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+
+; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 5{{$}}
+; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0{{$}}
+
+; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
+; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
+
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
   %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
@@ -223,12 +250,29 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
   ret void
 }
 
+; FIXME: Should be able to do without stack access. The used stack
+; space is also 2x what should be required.
+
 ; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: SCRATCH_RSRC_DWORD
+
+; Stack store
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+
+; Write element
+; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; Stack reload
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; Store result
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
+; SI: ScratchSize: 64
+
 define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
   %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
   store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
@@ -236,15 +280,26 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
 }
 
 ; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: SCRATCH_RSRC_DWORD
+
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
+; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
+
+; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
+; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
+
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
+; SI: ScratchSize: 128
 define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
   %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
   store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
diff --git a/test/CodeGen/AMDGPU/kernel-args.ll b/test/CodeGen/AMDGPU/kernel-args.ll
index 1dd7c2cb7995..e9d98ac89e72 100644
--- a/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/test/CodeGen/AMDGPU/kernel-args.ll
@@ -4,8 +4,10 @@
 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
 
 ; FUNC-LABEL: {{^}}i8_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ubyte
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
 
 define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
 entry:
@@ -39,8 +41,10 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}i16_arg:
-; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-; GCN: buffer_load_ushort
+; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c
+; GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff
 
 define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
 entry:
@@ -290,8 +294,8 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
-; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44
+; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
+; VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44
 define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
 entry:
   store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
@@ -307,7 +311,7 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X
-; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11
+; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11
 define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
 entry:
   store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
@@ -409,8 +413,8 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
-; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
 define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
 entry:
   store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
@@ -434,8 +438,8 @@ entry:
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W
 ; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X
-; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19
-; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64
+; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19
+; VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64
 define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
 entry:
   store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
diff --git a/test/CodeGen/AMDGPU/large-alloca-compute.ll b/test/CodeGen/AMDGPU/large-alloca-compute.ll
new file mode 100644
index 000000000000..8347b8c96ec4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/large-alloca-compute.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -mtriple=amdgcn-unknown-amdhsa < %s -mattr=-flat-for-global | FileCheck -check-prefix=GCNHSA -check-prefix=CIHSA -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCNHSA -check-prefix=VIHSA -check-prefix=ALL %s
+
+; FIXME: align on alloca seems to be ignored for private_segment_alignment
+
+; ALL-LABEL: {{^}}large_alloca_compute_shader:
+
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+
+; GCNHSA: .amd_kernel_code_t
+
+; GCNHSA: compute_pgm_rsrc2_scratch_en = 1
+; GCNHSA: compute_pgm_rsrc2_user_sgpr = 6
+; GCNHSA: compute_pgm_rsrc2_tgid_x_en = 1
+; GCNHSA: compute_pgm_rsrc2_tgid_y_en = 0
+; GCNHSA: compute_pgm_rsrc2_tgid_z_en = 0
+; GCNHSA: compute_pgm_rsrc2_tg_size_en = 0
+; GCNHSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+
+; GCNHSA: enable_sgpr_private_segment_buffer = 1
+; GCNHSA: enable_sgpr_dispatch_ptr = 0
+; GCNHSA: enable_sgpr_queue_ptr = 0
+; GCNHSA: enable_sgpr_kernarg_segment_ptr = 1
+; GCNHSA: enable_sgpr_dispatch_id = 0
+; GCNHSA: enable_sgpr_flat_scratch_init = 0
+; GCNHSA: enable_sgpr_private_segment_size = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_x = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_y = 0
+; GCNHSA: enable_sgpr_grid_workgroup_count_z = 0
+; GCNHSA: workitem_private_segment_byte_size = 32772
+; GCNHSA: private_segment_alignment = 4
+; GCNHSA: .end_amd_kernel_code_t
+
+
+; GCNHSA: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
+; GCNHSA: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[0:3], s7 offen
+
+; Scratch size = alloca size + emergency stack slot
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_compute_shader(i32 %x, i32 %y) #0 {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+  store volatile i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+  %val = load volatile i32, i32* %gep1
+  store volatile i32 %val, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind  }
diff --git a/test/CodeGen/AMDGPU/large-alloca-graphics.ll b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
new file mode 100644
index 000000000000..141ee2560152
--- /dev/null
+++ b/test/CodeGen/AMDGPU/large-alloca-graphics.ll
@@ -0,0 +1,47 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=ALL %s
+; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=ALL %s
+
+; ALL-LABEL: {{^}}large_alloca_pixel_shader:
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_pixel_shader(i32 %x, i32 %y) #1 {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+  store volatile i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+  %val = load volatile i32, i32* %gep1
+  store volatile i32 %val, i32 addrspace(1)* undef
+  ret void
+}
+
+; ALL-LABEL: {{^}}large_alloca_pixel_shader_inreg:
+; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
+; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
+; GCN: s_mov_b32 s10, -1
+; CI: s_mov_b32 s11, 0x80f000
+; VI: s_mov_b32 s11, 0x800000
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
+
+; ALL: ; ScratchSize: 32772
+define void @large_alloca_pixel_shader_inreg(i32 inreg %x, i32 inreg %y) #1 {
+  %large = alloca [8192 x i32], align 4
+  %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
+  store volatile i32 %x, i32* %gep
+  %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
+  %val = load volatile i32, i32* %gep1
+  store volatile i32 %val, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind  }
+attributes #1 = { nounwind "ShaderType"="0" }
diff --git a/test/CodeGen/AMDGPU/large-alloca.ll b/test/CodeGen/AMDGPU/large-alloca.ll
deleted file mode 100644
index 671833d1a33a..000000000000
--- a/test/CodeGen/AMDGPU/large-alloca.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; XFAIL: *
-; REQUIRES: asserts
-; RUN: llc -march=amdgcn -mcpu=SI < %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s
-
-define void @large_alloca(i32 addrspace(1)* %out, i32 %x, i32 %y) nounwind {
-  %large = alloca [8192 x i32], align 4
-  %gep = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 8191
-  store i32 %x, i32* %gep
-  %gep1 = getelementptr [8192 x i32], [8192 x i32]* %large, i32 0, i32 %y
-  %0 = load i32, i32* %gep1
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll
index cff1c24f89d6..9d2320cb2d19 100644
--- a/test/CodeGen/AMDGPU/literals.ll
+++ b/test/CodeGen/AMDGPU/literals.ll
@@ -7,8 +7,8 @@
 ; ADD_INT literal.x KC0[2].Z, 5
 
 ; CHECK: {{^}}i32_literal:
-; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
 ; CHECK-NEXT: 5
 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -24,8 +24,8 @@ entry:
 ; ADD literal.x KC0[2].Z, 5.0
 
 ; CHECK: {{^}}float_literal:
-; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
 ; CHECK-NEXT: 1084227584(5.0
 define void @float_literal(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
index 8bf094b8bc7b..ca8ddbae9fbc 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.abs.ll
@@ -8,9 +8,7 @@ declare i32 @llvm.AMDGPU.abs(i32) nounwind readnone
 declare i32 @llvm.AMDIL.abs.i32(i32) nounwind readnone
 
 ; FUNC-LABEL: {{^}}s_abs_i32:
-; SI: s_sub_i32
-; SI: s_max_i32
-; SI: s_endpgm
+; SI: s_abs_i32
 
 ; EG: SUB_INT
 ; EG: MAX_INT
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
index 1168713ca66e..d56b48457285 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll
@@ -425,7 +425,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
 ; SI: buffer_load_dword [[LOAD:v[0-9]+]]
 ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16
 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]]
-; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]]
+; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]]
 ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]]
 ; SI: buffer_store_dword [[TMP2]]
 define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll
deleted file mode 100644
index 301de4b1c82d..000000000000
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.brev.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-
-declare i32 @llvm.AMDGPU.brev(i32) nounwind readnone
-
-; FUNC-LABEL: {{^}}s_brev_i32:
-; SI: s_load_dword [[VAL:s[0-9]+]],
-; SI: s_brev_b32 [[SRESULT:s[0-9]+]], [[VAL]]
-; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]]
-; SI: buffer_store_dword [[VRESULT]],
-; SI: s_endpgm
-define void @s_brev_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
-  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
-  ret void
-}
-
-; FUNC-LABEL: {{^}}v_brev_i32:
-; SI: buffer_load_dword [[VAL:v[0-9]+]],
-; SI: v_bfrev_b32_e32 [[RESULT:v[0-9]+]], [[VAL]]
-; SI: buffer_store_dword [[RESULT]],
-; SI: s_endpgm
-define void @v_brev_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
-  %val = load i32, i32 addrspace(1)* %valptr, align 4
-  %ctlz = call i32 @llvm.AMDGPU.brev(i32 %val) nounwind readnone
-  store i32 %ctlz, i32 addrspace(1)* %out, align 4
-  ret void
-}
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
index 805a88b59c72..80eb3b93f8e5 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
@@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
 ; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
 ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI-NEXT: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
@@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
 ; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
 ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
 ; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
index f948c987b038..7dc094ed1b4b 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.div_fmas.ll
@@ -4,7 +4,6 @@
 ; FIXME: Enable for VI.
 
 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
 declare float @llvm.AMDGPU.div.fmas.f32(float, float, float, i1) nounwind readnone
 declare double @llvm.AMDGPU.div.fmas.f64(double, double, double, i1) nounwind readnone
 
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
new file mode 100644
index 000000000000..2e299e30b8c7
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}read_workdim:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].Z
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+define void @read_workdim(i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}read_workdim_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
+; GCN-NOT: 0xff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @read_workdim_known_bits(i32 addrspace(1)* %out) {
+entry:
+  %dim = call i32 @llvm.AMDGPU.read.workdim() #0
+  %shl = shl i32 %dim, 24
+  %shr = lshr i32 %shl, 24
+  store i32 %shr, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.AMDGPU.read.workdim() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
index 74792e50017f..a30a8e083eb6 100644
--- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
+++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
 
 ; R600: {{^}}amdgpu_trunc:
-; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 ; SI: {{^}}amdgpu_trunc:
 ; SI: v_trunc_f32
 
diff --git a/test/CodeGen/AMDGPU/llvm.SI.packf16.ll b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
new file mode 100644
index 000000000000..0155757632d4
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.SI.packf16.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN: v_cvt_pkrtz_f16_f32
+; GCN-NOT: v_cvt_pkrtz_f16_f32
+
+define void @main(float %src) #0 {
+main_body:
+  %p1 = call i32 @llvm.SI.packf16(float undef, float %src)
+  %p2 = call i32 @llvm.SI.packf16(float %src, float undef)
+  %p3 = call i32 @llvm.SI.packf16(float undef, float undef)
+  %f1 = bitcast i32 %p1 to float
+  %f2 = bitcast i32 %p2 to float
+  %f3 = bitcast i32 %p3 to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f1, float undef, float %f1)
+  call void @llvm.SI.export(i32 15, i32 1, i32 0, i32 0, i32 1, float undef, float %f2, float undef, float %f2)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %f3, float undef, float %f2)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
new file mode 100644
index 000000000000..6d9db65e7d93
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1() #0
+
+; GCN-LABEL: {{^}}test_buffer_wbinvl1:
+; GCN-NEXT: ; BB#0:
+; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
+; VI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xf8,0xe0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_buffer_wbinvl1() #0 {
+  call void @llvm.amdgcn.buffer.wbinvl1()
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
new file mode 100644
index 000000000000..746298465e58
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0
+
+; SI-LABEL: {{^}}test_buffer_wbinvl1_sc:
+; SI-NEXT: ; BB#0:
+; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+; SI-NEXT: s_endpgm
+define void @test_buffer_wbinvl1_sc() #0 {
+  call void @llvm.amdgcn.buffer.wbinvl1.sc()
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
new file mode 100644
index 000000000000..cecfcb1bfe7c
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
@@ -0,0 +1,16 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
+
+; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol:
+; GCN-NEXT: ; BB#0:
+; CI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+; VI-NEXT: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_buffer_wbinvl1_vol() #0 {
+  call void @llvm.amdgcn.buffer.wbinvl1.vol()
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
new file mode 100644
index 000000000000..dc95cd1ee012
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}test:
+; GCN: enable_sgpr_dispatch_ptr = 1
+; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
+define void @test(i32 addrspace(1)* %out) {
+  %dispatch_ptr = call noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+  %header_ptr = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)*
+  %value = load i32, i32 addrspace(2)* %header_ptr
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
+
+declare noalias i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0
+
+attributes #0 = { readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
new file mode 100644
index 000000000000..a28e1b1eb241
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -0,0 +1,30 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}v_interp:
+;GCN-NOT: s_wqm
+;GCN: s_mov_b32 m0, s{{[0-9]+}}
+;GCN: v_interp_p1_f32
+;GCN: v_interp_p2_f32
+define void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) #0 {
+main_body:
+  %i = extractelement <2 x i32> %4, i32 0
+  %j = extractelement <2 x i32> %4, i32 1
+  %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3)
+  %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3)
+  %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3)
+  %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1)
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
new file mode 100644
index 000000000000..02ee2039542a
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -0,0 +1,24 @@
+;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
+;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
+
+;GCN-LABEL: {{^}}mbcnt_intrinsics:
+;GCN: v_mbcnt_lo_u32_b32_e64 [[LO:v[0-9]+]], -1, 0
+;SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]]
+;VI: v_mbcnt_hi_u32_b32_e64 {{v[0-9]+}}, -1, [[LO]]
+
+define void @mbcnt_intrinsics(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg) "ShaderType"="0" {
+main_body:
+  %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #1
+  %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #1
+  %4 = bitcast i32 %hi to float
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %4, float %4, float %4, float %4)
+  ret void
+}
+
+declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
+
+declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
new file mode 100644
index 000000000000..f8af67c17ec2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.inv() #0
+
+; GCN-LABEL: {{^}}test_s_dcache_inv:
+; GCN-NEXT: ; BB#0:
+; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
+; VI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0x80,0xc0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_s_dcache_inv() #0 {
+  call void @llvm.amdgcn.s.dcache.inv()
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_insert_wait:
+; GCN-NEXT: ; BB#0:
+; GCN-NEXT: s_dcache_inv
+; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_inv_insert_wait() #0 {
+  call void @llvm.amdgcn.s.dcache.inv()
+  br label %end
+
+end:
+  store volatile i32 3, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
new file mode 100644
index 000000000000..a8502a7c5033
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.inv.vol() #0
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_vol:
+; GCN-NEXT: ; BB#0:
+; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
+; VI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x88,0xc0,0x00,0x00,0x00,0x00]
+; GCN-NEXT: s_endpgm
+define void @test_s_dcache_inv_vol() #0 {
+  call void @llvm.amdgcn.s.dcache.inv.vol()
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_s_dcache_inv_vol_insert_wait:
+; GCN-NEXT: ; BB#0:
+; GCN-NEXT: s_dcache_inv_vol
+; GCN-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_inv_vol_insert_wait() #0 {
+  call void @llvm.amdgcn.s.dcache.inv.vol()
+  br label %end
+
+end:
+  store volatile i32 3, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
new file mode 100644
index 000000000000..f9ae09b391aa
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.wb() #0
+
+; VI-LABEL: {{^}}test_s_dcache_wb:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT: s_endpgm
+define void @test_s_dcache_wb() #0 {
+  call void @llvm.amdgcn.s.dcache.wb()
+  ret void
+}
+
+; VI-LABEL: {{^}}test_s_dcache_wb_insert_wait:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb
+; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_wb_insert_wait() #0 {
+  call void @llvm.amdgcn.s.dcache.wb()
+  br label %end
+
+end:
+  store volatile i32 3, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
new file mode 100644
index 000000000000..d9145458a1f6
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
+
+declare void @llvm.amdgcn.s.dcache.wb.vol() #0
+
+; VI-LABEL: {{^}}test_s_dcache_wb_vol:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
+; VI-NEXT: s_endpgm
+define void @test_s_dcache_wb_vol() #0 {
+  call void @llvm.amdgcn.s.dcache.wb.vol()
+  ret void
+}
+
+; VI-LABEL: {{^}}test_s_dcache_wb_vol_insert_wait:
+; VI-NEXT: ; BB#0:
+; VI-NEXT: s_dcache_wb_vol
+; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding
+define void @test_s_dcache_wb_vol_insert_wait() #0 {
+  call void @llvm.amdgcn.s.dcache.wb.vol()
+  br label %end
+
+end:
+  store volatile i32 3, i32 addrspace(1)* undef
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
index a64dd0ebd2dd..0c3e4ecaa1a0 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgpu.lrp.ll
@@ -4,7 +4,7 @@
 declare float @llvm.AMDGPU.lrp(float, float, float) nounwind readnone
 
 ; FUNC-LABEL: {{^}}test_lrp:
-; SI: v_sub_f32
+; SI: v_mad_f32
 ; SI: v_mac_f32_e32
 define void @test_lrp(float addrspace(1)* %out, float %src0, float %src1, float %src2) nounwind {
   %mad = call float @llvm.AMDGPU.lrp(float %src0, float %src1, float %src2) nounwind readnone
diff --git a/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index d001bcb4db17..b01f8ab2bdf9 100644
--- a/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -1,11 +1,11 @@
-; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -mattr=-flat-for-global < %s | FileCheck %s
 
 ; CHECK-LABEL: {{^}}test_debug_value:
-; CHECK: s_load_dwordx2
-; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- SGPR0_SGPR1
+; CHECK: s_load_dwordx2 s[4:5]
+; CHECK: DEBUG_VALUE: test_debug_value:globalptr_arg <- %SGPR4_SGPR5
 ; CHECK: buffer_store_dword
 ; CHECK: s_endpgm
-define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 {
+define void @test_debug_value(i32 addrspace(1)* nocapture %globalptr_arg) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 addrspace(1)* %globalptr_arg, i64 0, metadata !10, metadata !13), !dbg !14
   store i32 123, i32 addrspace(1)* %globalptr_arg, align 4
@@ -24,13 +24,13 @@ attributes #1 = { nounwind readnone }
 !1 = !DIFile(filename: "/tmp/test_debug_value.cl", directory: "/Users/matt/src/llvm/build_debug")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: void (i32 addrspace(1)*)* @test_debug_value, variables: !9)
+!4 = distinct !DISubprogram(name: "test_debug_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !9)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null, !7}
 !7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 32)
 !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !DILocalVariable(name: "globalptr_arg", arg: 1, scope: !4, file: !1, line: 1, type: !7)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32 2, !"Debug Info Version", i32 3}
 !13 = !DIExpression()
diff --git a/test/CodeGen/AMDGPU/llvm.memcpy.ll b/test/CodeGen/AMDGPU/llvm.memcpy.ll
index e491732cf9c5..d83ab562b718 100644
--- a/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -132,32 +132,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %
 }
 
 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
+; SI: ds_read2_b32
 
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+; SI: ds_write2_b32
 
 ; SI: s_endpgm
 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
@@ -170,32 +153,15 @@ define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %
 ; FIXME: Use 64-bit ops
 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
 
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_read_b64
+; SI: ds_read_b64
 
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
-
-; SI-DAG: ds_read_b32
-; SI-DAG: ds_write_b32
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
+; SI: ds_write_b64
 
 ; SI-DAG: s_endpgm
 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
new file mode 100644
index 000000000000..13ebee41e844
--- /dev/null
+++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -0,0 +1,184 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+
+; FUNC-LABEL: {{^}}local_size_x:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[1].Z
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
+; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
+
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_x(i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.x() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[1].W
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_y(i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.y() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV * [[VAL]], KC0[2].X
+
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN: buffer_store_dword [[VVAL]]
+define void @local_size_z(i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.r600.read.local.size.z() #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xy:
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xy(i32 addrspace(1)* %out) {
+entry:
+  %x = call i32 @llvm.r600.read.local.size.x() #0
+  %y = call i32 @llvm.r600.read.local.size.y() #0
+  %val = mul i32 %x, %y
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xz:
+
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xz(i32 addrspace(1)* %out) {
+entry:
+  %x = call i32 @llvm.r600.read.local.size.x() #0
+  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %val = mul i32 %x, %z
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_yz:
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 1
+
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_yz(i32 addrspace(1)* %out) {
+entry:
+  %y = call i32 @llvm.r600.read.local.size.y() #0
+  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %val = mul i32 %y, %z
+  store i32 %val, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_xyz:
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 1
+
+; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
+; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
+; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
+; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
+; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
+; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
+; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
+; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
+; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]]
+; GCN: buffer_store_dword [[VAL]]
+define void @local_size_xyz(i32 addrspace(1)* %out) {
+entry:
+  %x = call i32 @llvm.r600.read.local.size.x() #0
+  %y = call i32 @llvm.r600.read.local.size.y() #0
+  %z = call i32 @llvm.r600.read.local.size.z() #0
+  %xy = mul i32 %x, %y
+  %xyz = add i32 %xy, %z
+  store i32 %xyz, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_x_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_x_known_bits(i32 addrspace(1)* %out) {
+entry:
+  %size = call i32 @llvm.r600.read.local.size.x() #0
+  %shl = shl i32 %size, 16
+  %shr = lshr i32 %shl, 16
+  store i32 %shr, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_y_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_y_known_bits(i32 addrspace(1)* %out) {
+entry:
+  %size = call i32 @llvm.r600.read.local.size.y() #0
+  %shl = shl i32 %size, 16
+  %shr = lshr i32 %shl, 16
+  store i32 %shr, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}local_size_z_known_bits:
+; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
+; GCN-NOT: 0xffff
+; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NEXT: buffer_store_dword [[VVAL]]
+define void @local_size_z_known_bits(i32 addrspace(1)* %out) {
+entry:
+  %size = call i32 @llvm.r600.read.local.size.z() #0
+  %shl = shl i32 %size, 16
+  %shr = lshr i32 %shl, 16
+  store i32 %shr, i32 addrspace(1)* %out
+  ret void
+}
+
+declare i32 @llvm.r600.read.local.size.x() #0
+declare i32 @llvm.r600.read.local.size.y() #0
+declare i32 @llvm.r600.read.local.size.z() #0
+
+attributes #0 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 3d0f57e33280..6b365dc09e2a 100644
--- a/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -21,12 +21,9 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
 ; SI-DAG: v_cmp_eq_i32
 
 ; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
-; SI-DAG: v_cmp_gt_i32_e64
+; SI-DAG: v_cmp_gt_i32_e32
 ; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
 
-; SI-DAG: v_cmp_gt_i32_e64
-
-
 ; SI: buffer_store_dwordx2
 ; SI: s_endpgm
 define void @v_round_f64(double addrspace(1)* %out, double addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/load.ll b/test/CodeGen/AMDGPU/load.ll
index 93b1b51a0d07..6a04261fe47b 100644
--- a/test/CodeGen/AMDGPU/load.ll
+++ b/test/CodeGen/AMDGPU/load.ll
@@ -277,15 +277,9 @@ entry:
 ; FUNC-LABEL: {{^}}load_v8i32:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) {
 entry:
   %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in
@@ -298,23 +292,11 @@ entry:
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
 ; R600: VTX_READ_128
-; XXX: We should be using DWORDX4 instructions on SI.
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) {
 entry:
   %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in
diff --git a/test/CodeGen/AMDGPU/local-memory-two-objects.ll b/test/CodeGen/AMDGPU/local-memory-two-objects.ll
index f501a7ac6274..6b52b80ba082 100644
--- a/test/CodeGen/AMDGPU/local-memory-two-objects.ll
+++ b/test/CodeGen/AMDGPU/local-memory-two-objects.ll
@@ -10,7 +10,7 @@
 ; EG: .long 166120
 ; EG-NEXT: .long 8
 ; GCN: .long 47180
-; GCN-NEXT: .long 38792
+; GCN-NEXT: .long 32900
 
 ; EG: {{^}}local_memory_two_objects:
 
@@ -30,7 +30,7 @@
 ; constant offsets.
 ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]]
 ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]]
-; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}}
 ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]]
 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16
 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]]
diff --git a/test/CodeGen/AMDGPU/local-memory.ll b/test/CodeGen/AMDGPU/local-memory.ll
index 9494ed75bd0c..9ffb59e70920 100644
--- a/test/CodeGen/AMDGPU/local-memory.ll
+++ b/test/CodeGen/AMDGPU/local-memory.ll
@@ -9,9 +9,9 @@
 ; EG: .long 166120
 ; EG-NEXT: .long 128
 ; SI: .long 47180
-; SI-NEXT: .long 71560
+; SI-NEXT: .long 65668
 ; CI: .long 47180
-; CI-NEXT: .long 38792
+; CI-NEXT: .long 32900
 
 ; FUNC-LABEL: {{^}}local_memory:
 
diff --git a/test/CodeGen/AMDGPU/max.ll b/test/CodeGen/AMDGPU/max.ll
index fef3e2f0a21c..eeb915c10a96 100644
--- a/test/CodeGen/AMDGPU/max.ll
+++ b/test/CodeGen/AMDGPU/max.ll
@@ -2,7 +2,7 @@
 
 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: @v_test_imax_sge_i32
+; FUNC-LABEL: {{^}}v_test_imax_sge_i32:
 ; SI: v_max_i32_e32
 define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -17,6 +17,24 @@ define void @v_test_imax_sge_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_imax_sge_v4i32:
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %aptr, <4 x i32> addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %out, i32 %tid
+  %a = load <4 x i32>, <4 x i32> addrspace(1)* %gep0, align 4
+  %b = load <4 x i32>, <4 x i32> addrspace(1)* %gep1, align 4
+  %cmp = icmp sge <4 x i32> %a, %b
+  %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  store <4 x i32> %val, <4 x i32> addrspace(1)* %outgep, align 4
+  ret void
+}
+
 ; FUNC-LABEL: @s_test_imax_sge_i32
 ; SI: s_max_i32
 define void @s_test_imax_sge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -35,6 +53,23 @@ define void @s_test_imax_sge_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_imax_sge_i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: v_max_i32_e32
+define void @v_test_imax_sge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp sge i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_i32:
 ; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
 define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@@ -44,6 +79,15 @@ define void @s_test_imax_sgt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_test_imax_sgt_imm_v2i32:
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+; SI: s_max_i32 {{s[0-9]+}}, {{s[0-9]+}}, 9
+define void @s_test_imax_sgt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+  %cmp = icmp sgt <2 x i32> %a, <i32 9, i32 9>
+  %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 9, i32 9>
+  store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
 ; FUNC-LABEL: @v_test_imax_sgt_i32
 ; SI: v_max_i32_e32
 define void @v_test_imax_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -92,6 +136,36 @@ define void @s_test_umax_uge_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_test_umax_uge_v3i32:
+; SI: s_max_u32
+; SI: s_max_u32
+; SI: s_max_u32
+; SI-NOT: s_max_u32
+; SI: s_endpgm
+define void @s_test_umax_uge_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, <3 x i32> %b) nounwind {
+  %cmp = icmp uge <3 x i32> %a, %b
+  %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
+  store <3 x i32> %val, <3 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_test_umax_uge_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_max_u32_e32
+define void @v_test_umax_uge_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp uge i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: @v_test_umax_ugt_i32
 ; SI: v_max_u32_e32
 define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -107,7 +181,7 @@ define void @v_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
-; FUNC-LABEL: @s_test_umax_ugt_i32
+; FUNC-LABEL: {{^}}s_test_umax_ugt_i32:
 ; SI: s_max_u32
 define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp ugt i32 %a, %b
@@ -116,13 +190,23 @@ define void @s_test_umax_ugt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_test_umax_ugt_imm_v2i32:
+; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 15
+; SI: s_max_u32 {{s[0-9]+}}, {{s[0-9]+}}, 23
+define void @s_test_umax_ugt_imm_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a) nounwind {
+  %cmp = icmp ugt <2 x i32> %a, <i32 15, i32 23>
+  %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> <i32 15, i32 23>
+  store <2 x i32> %val, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
 ; Make sure redundant and removed
 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umax_ugt_i16:
 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_u32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
+; SI: s_max_u32 [[MAX:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
+; SI-NEXT: buffer_store_dword [[VMAX]]
 define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i16 zeroext %a, i16 zeroext %b) nounwind {
   %a.ext = zext i16 %a to i32
   %b.ext = zext i16 %b to i32
@@ -135,13 +219,13 @@ define void @simplify_demanded_bits_test_umax_ugt_i16(i32 addrspace(1)* %out, i1
 
 ; Make sure redundant sign_extend_inreg removed.
 
-; FUNC-LABEL: {{^}}simplify_demanded_bits_test_min_slt_i16:
+; FUNC-LABEL: {{^}}simplify_demanded_bits_test_max_slt_i16:
 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI-DAG: s_load_dword [[B:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xc
-; SI: s_max_i32 [[MIN:s[0-9]+]], [[A]], [[B]]
-; SI-NEXT: v_mov_b32_e32 [[VMIN:v[0-9]+]], [[MIN]]
-; SI-NEXT: buffer_store_dword [[VMIN]]
-define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
+; SI: s_max_i32 [[MAX:s[0-9]+]], [[A]], [[B]]
+; SI-NEXT: v_mov_b32_e32 [[VMAX:v[0-9]+]], [[MAX]]
+; SI-NEXT: buffer_store_dword [[VMAX]]
+define void @simplify_demanded_bits_test_max_slt_i16(i32 addrspace(1)* %out, i16 signext %a, i16 signext %b) nounwind {
   %a.ext = sext i16 %a to i32
   %b.ext = sext i16 %b to i32
   %cmp = icmp sgt i32 %a.ext, %b.ext
@@ -152,15 +236,13 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
   ret void
 }
 
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
-; FUNC-LABEL: {{^}}s_test_imin_sge_i16:
+; FUNC-LABEL: {{^}}s_test_imax_sge_i16:
+; SI: s_load_dword
+; SI: s_load_dword
 ; SI: s_sext_i32_i16
 ; SI: s_sext_i32_i16
-; SI: v_cmp_ge_i32_e32
-; SI: v_cndmask_b32
-define void @s_test_imin_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
+; SI: s_max_i32
+define void @s_test_imax_sge_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
   %cmp = icmp sge i16 %a, %b
   %val = select i1 %cmp, i16 %a, i16 %b
   store i16 %val, i16 addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/merge-stores.ll b/test/CodeGen/AMDGPU/merge-stores.ll
index 34a2fc7ffa74..65b454b5d8cb 100644
--- a/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/test/CodeGen/AMDGPU/merge-stores.ll
@@ -1,5 +1,8 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
+
+; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
 
 ; Run with devices with different unaligned load restrictions.
 
@@ -65,10 +68,8 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)*
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i32:
-; SI-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; SI-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; SI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
 ; GCN: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
 define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
@@ -89,10 +90,8 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_f32_i32:
-; SI-DAG: s_mov_b32 [[SLO:s[0-9]+]], 4.0
-; SI-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b{{$}}
-; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[SLO]]
-; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[SHI]]
+; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 4.0
+; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0x7b
 ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
@@ -121,10 +120,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32_order:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dwordx2 v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -137,17 +133,9 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out)
   ret void
 }
 
-; First store is out of order. Because of order of combines, the
-; consecutive store fails because only some of the stores have been
-; replaced with integer constant stores, and then won't merge because
-; the types are different.
-
+; First store is out of order.
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_f32:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
@@ -160,6 +148,33 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
   ret void
 }
 
+; FIXME: Should be able to merge this
+; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx2
+; GCN-AA: buffer_store_dword v
+; GCN-AA: buffer_store_dword v
+
+; GCN: s_endpgm
+define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+  %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+  %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+  %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+  %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+  %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
+
+  store i32 11, i32 addrspace(1)* %out.gep.1.bc
+  store float 2.0, float addrspace(1)* %out.gep.2
+  store i32 17, i32 addrspace(1)* %out.gep.3.bc
+  store float 8.0, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}merge_global_store_3_constants_i32:
 ; SI-DAG: buffer_store_dwordx2
 ; SI-DAG: buffer_store_dword
@@ -176,9 +191,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
-; XGCN: buffer_store_dwordx4
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
 
@@ -188,13 +201,8 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
-
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
-; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
 define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
   %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
   %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
@@ -472,11 +480,15 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
 ; This works once AA is enabled on the subtarget
 ; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
 ; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
-; XGCN: buffer_store_dwordx4 [[LOAD]]
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+; GCN-NOAA: buffer_store_dword v
+
+; GCN-AA: buffer_store_dwordx4 [[LOAD]]
+
+; GCN: s_endpgm
 define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
   %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
   %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
@@ -508,10 +520,8 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_local_store_2_constants_i32:
-; GCN-DAG: s_movk_i32 [[SLO:s[0-9]+]], 0x1c8
-; GCN-DAG: s_movk_i32 [[SHI:s[0-9]+]], 0x7b
-; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], [[SLO]]
-; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0x7b
 ; GCN: ds_write2_b32 v{{[0-9]+}}, v[[LO]], v[[HI]] offset1:1{{$}}
 define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
   %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
@@ -522,10 +532,15 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
 }
 
 ; GCN-LABEL: {{^}}merge_local_store_4_constants_i32:
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
-; GCN: ds_write_b32
+; GCN-DAG: v_mov_b32_e32 [[K2:v[0-9]+]], 0x1c8
+; GCN-DAG: v_mov_b32_e32 [[K3:v[0-9]+]], 0x14d
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K2]], [[K3]] offset0:2 offset1:3
+
+; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0x4d2
+; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0x7b
+; GCN-DAG: ds_write2_b32 v{{[0-9]+}}, [[K0]], [[K1]] offset1:1
+
+; GCN: s_endpgm
 define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
   %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
   %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
@@ -597,17 +612,9 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
 }
 
 ; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
-; XGCN: buffer_store_dwordx4
-; XGCN: buffer_store_dwordx4
-
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
-; GCN: buffer_store_dword v
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: s_endpgm
 define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
   store i32 34, i32 addrspace(1)* %out, align 4
   %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
@@ -627,7 +634,78 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
   ret void
 }
 
+; This requires handling of scalar_to_vector for v2i64 to avoid
+; scratch usage.
+; FIXME: Should do single load and store
+
+; GCN-LABEL: {{^}}copy_v3i32_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
+  %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
+  store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3i64_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
+  %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
+  store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3f32_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
+  %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
+  %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
+  store <3 x float> %fadd, <3 x float> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}copy_v3f64_align4:
+; GCN-NOT: SCRATCH_RSRC_DWORD
+; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN-NOT: offen
+; GCN: s_waitcnt vmcnt
+; GCN-NOT: offen
+; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
+; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}}
+; GCN: ScratchSize: 0{{$}}
+define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
+  %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
+  %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
+  store <3 x double> %fadd, <3 x double> addrspace(1)* %out
+  ret void
+}
+
 declare void @llvm.AMDGPU.barrier.local() #1
 
 attributes #0 = { nounwind }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/min.ll b/test/CodeGen/AMDGPU/min.ll
index 0332d1a8e407..215dbeb4b2fd 100644
--- a/test/CodeGen/AMDGPU/min.ll
+++ b/test/CodeGen/AMDGPU/min.ll
@@ -2,7 +2,7 @@
 
 declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
-; FUNC-LABEL: @v_test_imin_sle_i32
+; FUNC-LABEL: {{^}}v_test_imin_sle_i32:
 ; SI: v_min_i32_e32
 define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
   %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
@@ -17,7 +17,7 @@ define void @v_test_imin_sle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
-; FUNC-LABEL: @s_test_imin_sle_i32
+; FUNC-LABEL: {{^}}s_test_imin_sle_i32:
 ; SI: s_min_i32
 define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
   %cmp = icmp sle i32 %a, %b
@@ -26,6 +26,78 @@ define void @s_test_imin_sle_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_test_imin_sle_v1i32:
+; SI: s_min_i32
+define void @s_test_imin_sle_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
+  %cmp = icmp sle <1 x i32> %a, %b
+  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
+  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i32:
+; SI: s_min_i32
+; SI: s_min_i32
+; SI: s_min_i32
+; SI: s_min_i32
+define void @s_test_imin_sle_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, <4 x i32> %b) nounwind {
+  %cmp = icmp sle <4 x i32> %a, %b
+  %val = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  store <4 x i32> %val, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_i8:
+; SI: s_load_dword
+; SI: s_load_dword
+; SI: s_sext_i32_i8
+; SI: s_sext_i32_i8
+; SI: s_min_i32
+define void @s_test_imin_sle_i8(i8 addrspace(1)* %out, i8 %a, i8 %b) nounwind {
+  %cmp = icmp sle i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %out
+  ret void
+}
+
+; XXX - should be able to use s_min if we stop unnecessarily doing
+; extloads with mubuf instructions.
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i8:
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+; SI: buffer_load_sbyte
+
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+
+; SI: s_endpgm
+define void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, <4 x i8> %b) nounwind {
+  %cmp = icmp sle <4 x i8> %a, %b
+  %val = select <4 x i1> %cmp, <4 x i8> %a, <4 x i8> %b
+  store <4 x i8> %val, <4 x i8> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_imin_sle_v4i16:
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+; SI: v_min_i32
+define void @s_test_imin_sle_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, <4 x i16> %b) nounwind {
+  %cmp = icmp sle <4 x i16> %a, %b
+  %val = select <4 x i1> %cmp, <4 x i16> %a, <4 x i16> %b
+  store <4 x i16> %val, <4 x i16> addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: @v_test_imin_slt_i32
 ; SI: v_min_i32_e32
 define void @v_test_imin_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind {
@@ -50,6 +122,16 @@ define void @s_test_imin_slt_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwin
   ret void
 }
 
+; FUNC-LABEL: {{^}}s_test_imin_slt_v2i32:
+; SI: s_min_i32
+; SI: s_min_i32
+define void @s_test_imin_slt_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
+  %cmp = icmp slt <2 x i32> %a, %b
+  %val = select <2 x i1> %cmp, <2 x i32> %a, <2 x i32> %b
+  store <2 x i32> %val, <2 x i32> addrspace(1)* %out
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}s_test_imin_slt_imm_i32:
 ; SI: s_min_i32 {{s[0-9]+}}, {{s[0-9]+}}, 8
 define void @s_test_imin_slt_imm_i32(i32 addrspace(1)* %out, i32 %a) nounwind {
@@ -83,6 +165,24 @@ define void @v_test_umin_ule_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
+; FUNC-LABEL: @v_test_umin_ule_v3i32
+; SI: v_min_u32_e32
+; SI: v_min_u32_e32
+; SI: v_min_u32_e32
+; SI-NOT: v_min_u32_e32
+; SI: s_endpgm
+define void @v_test_umin_ule_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %aptr, <3 x i32> addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr <3 x i32>, <3 x i32> addrspace(1)* %out, i32 %tid
+  %a = load <3 x i32>, <3 x i32> addrspace(1)* %gep0
+  %b = load <3 x i32>, <3 x i32> addrspace(1)* %gep1
+  %cmp = icmp ule <3 x i32> %a, %b
+  %val = select <3 x i1> %cmp, <3 x i32> %a, <3 x i32> %b
+  store <3 x i32> %val, <3 x i32> addrspace(1)* %outgep
+  ret void
+}
 ; FUNC-LABEL: @s_test_umin_ule_i32
 ; SI: s_min_u32
 define void @s_test_umin_ule_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -107,6 +207,23 @@ define void @v_test_umin_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr
   ret void
 }
 
+; FUNC-LABEL: {{^}}v_test_umin_ult_i8:
+; SI: buffer_load_ubyte
+; SI: buffer_load_ubyte
+; SI: v_min_u32_e32
+define void @v_test_umin_ult_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %aptr, i8 addrspace(1)* %bptr) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
+  %gep0 = getelementptr i8, i8 addrspace(1)* %aptr, i32 %tid
+  %gep1 = getelementptr i8, i8 addrspace(1)* %bptr, i32 %tid
+  %outgep = getelementptr i8, i8 addrspace(1)* %out, i32 %tid
+  %a = load i8, i8 addrspace(1)* %gep0, align 1
+  %b = load i8, i8 addrspace(1)* %gep1, align 1
+  %cmp = icmp ult i8 %a, %b
+  %val = select i1 %cmp, i8 %a, i8 %b
+  store i8 %val, i8 addrspace(1)* %outgep, align 1
+  ret void
+}
+
 ; FUNC-LABEL: @s_test_umin_ult_i32
 ; SI: s_min_u32
 define void @s_test_umin_ult_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
@@ -137,6 +254,48 @@ define void @v_test_umin_ult_i32_multi_use(i32 addrspace(1)* %out0, i1 addrspace
   ret void
 }
 
+
+; FUNC-LABEL: @s_test_umin_ult_v1i32
+; SI: s_min_u32
+define void @s_test_umin_ult_v1i32(<1 x i32> addrspace(1)* %out, <1 x i32> %a, <1 x i32> %b) nounwind {
+  %cmp = icmp ult <1 x i32> %a, %b
+  %val = select <1 x i1> %cmp, <1 x i32> %a, <1 x i32> %b
+  store <1 x i32> %val, <1 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_umin_ult_v8i32:
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+; SI: s_min_u32
+define void @s_test_umin_ult_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) nounwind {
+  %cmp = icmp ult <8 x i32> %a, %b
+  %val = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  store <8 x i32> %val, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_test_umin_ult_v8i16:
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+; SI: v_min_u32
+define void @s_test_umin_ult_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> %a, <8 x i16> %b) nounwind {
+  %cmp = icmp ult <8 x i16> %a, %b
+  %val = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  store <8 x i16> %val, <8 x i16> addrspace(1)* %out
+  ret void
+}
+
 ; Make sure redundant and removed
 ; FUNC-LABEL: {{^}}simplify_demanded_bits_test_umin_ult_i16:
 ; SI-DAG: s_load_dword [[A:s[0-9]+]], {{s\[[0-9]+:[0-9]+\]}}, 0xb
@@ -173,14 +332,8 @@ define void @simplify_demanded_bits_test_min_slt_i16(i32 addrspace(1)* %out, i16
   ret void
 }
 
-; FIXME: Should get match min/max through extends inserted by
-; legalization.
-
 ; FUNC-LABEL: {{^}}s_test_imin_sle_i16:
-; SI: s_sext_i32_i16
-; SI: s_sext_i32_i16
-; SI: v_cmp_le_i32_e32
-; SI: v_cndmask_b32
+; SI: s_min_i32
 define void @s_test_imin_sle_i16(i16 addrspace(1)* %out, i16 %a, i16 %b) nounwind {
   %cmp = icmp sle i16 %a, %b
   %val = select i1 %cmp, i16 %a, i16 %b
diff --git a/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
new file mode 100644
index 000000000000..e9f641b736d5
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-addr64-rsrc-dead-subreg-writes.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=amdgcn -mcpu=kaveri -mtriple=amdgcn-unknown-amdhsa -mattr=-flat-for-global < %s | FileCheck -check-prefix=GCN %s
+
+; Check that when mubuf addr64 instruction is handled in moveToVALU
+; from the pointer, dead register writes are not emitted.
+
+; FIXME: We should be able to use the SGPR directly as src0 to v_add_i32
+
+; GCN-LABEL: {{^}}clobber_vgpr_pair_pointer_add:
+; GCN: s_load_dwordx2 s{{\[}}[[ARG1LO:[0-9]+]]:[[ARG1HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
+; GCN: buffer_load_dwordx2 v{{\[}}[[LDPTRLO:[0-9]+]]:[[LDPTRHI:[0-9]+]]{{\]}}
+
+; GCN-NOT: v_mov_b32
+; GCN: v_mov_b32_e32 v[[VARG1LO:[0-9]+]], s[[ARG1LO]]
+; GCN-NEXT: v_mov_b32_e32 v[[VARG1HI:[0-9]+]], s[[ARG1HI]]
+; GCN-NOT: v_mov_b32
+
+; GCN: v_add_i32_e32 v[[PTRLO:[0-9]+]], vcc, v[[LDPTRLO]], v[[VARG1LO]]
+; GCN: v_addc_u32_e32 v[[PTRHI:[0-9]+]], vcc, v[[LDPTRHI]], v[[VARG1HI]]
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v{{\[}}[[PTRLO]]:[[PTRHI]]{{\]}},
+
+define void @clobber_vgpr_pair_pointer_add(i64 %arg1, i8 addrspace(1)* addrspace(1)* %ptrarg, i32 %arg3) #0 {
+bb:
+  %tmp = icmp sgt i32 %arg3, 0
+  br i1 %tmp, label %bb4, label %bb17
+
+bb4:
+  %tmp14 = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %ptrarg
+  %tmp15 = getelementptr inbounds i8, i8 addrspace(1)* %tmp14, i64 %arg1
+  %tmp16 = load volatile i8, i8 addrspace(1)* %tmp15
+  br label %bb17
+
+bb17:
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
new file mode 100644
index 000000000000..8bca0575ecd2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
@@ -0,0 +1,52 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; XUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; FIXME: broken on VI because flat instructions need to be emitted
+; instead of addr64 equivalent of the _OFFSET variants.
+
+; Check that moving the pointer out of the resource descriptor to
+; vaddr works for atomics.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; GCN-LABEL: {{^}}atomic_max_i32:
+; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400 glc{{$}}
+define void @atomic_max_i32(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
+  %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
+  %xor = xor i32 %tid, 1
+  %cmp = icmp ne i32 %xor, 0
+  br i1 %cmp, label %atomic, label %exit
+
+atomic:
+  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
+  %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
+  store i32 %ret, i32 addrspace(1)* %out
+  br label %exit
+
+exit:
+  ret void
+}
+
+; GCN-LABEL: {{^}}atomic_max_i32_noret:
+; GCN: buffer_atomic_smax v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:400{{$}}
+define void @atomic_max_i32_noret(i32 addrspace(1)* %out, i32 addrspace(1)* addrspace(1)* %in, i32 addrspace(1)* %x, i32 %y) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %tid.gep = getelementptr i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %in, i32 %tid
+  %ptr = load volatile i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %tid.gep
+  %xor = xor i32 %tid, 1
+  %cmp = icmp ne i32 %xor, 0
+  br i1 %cmp, label %atomic, label %exit
+
+atomic:
+  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 100
+  %ret = atomicrmw max i32 addrspace(1)* %gep, i32 %y seq_cst
+  br label %exit
+
+exit:
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
new file mode 100644
index 000000000000..73a146710a9f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/no-hsa-graphics-shaders.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa < %s 2>&1 | FileCheck %s
+
+; CHECK: error: unsupported non-compute shaders with HSA in pixel_shader
+define void @pixel_shader() #0 {
+  ret void
+}
+
+define void @vertex_shader() #1 {
+  ret void
+}
+
+define void @geometry_shader() #2 {
+  ret void
+}
+
+attributes #0 = { nounwind "ShaderType"="0" }
+attributes #1 = { nounwind "ShaderType"="1" }
+attributes #2 = { nounwind "ShaderType"="2" }
diff --git a/test/CodeGen/AMDGPU/no-shrink-extloads.ll b/test/CodeGen/AMDGPU/no-shrink-extloads.ll
index e4328ecbaca8..f81911aafe22 100644
--- a/test/CodeGen/AMDGPU/no-shrink-extloads.ll
+++ b/test/CodeGen/AMDGPU/no-shrink-extloads.ll
@@ -189,3 +189,15 @@ define void @truncate_buffer_load_i64_to_i8(i8 addrspace(1)* %out, i64 addrspace
   store i8 %trunc, i8 addrspace(1)* %gep.out
   ret void
 }
+
+; FUNC-LABEL: {{^}}smrd_mask_i32_to_i16
+; SI: s_load_dword [[LOAD:s[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0x0
+; SI: s_waitcnt lgkmcnt(0)
+; SI: s_and_b32 s{{[0-9]+}}, [[LOAD]], 0xffff
+define void @smrd_mask_i32_to_i16(i32 addrspace(1)* %out, i32 addrspace(2)* %in) {
+entry:
+  %val = load i32, i32 addrspace(2)* %in
+  %mask = and i32 %val, 65535
+  store i32 %mask, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/opencl-image-metadata.ll b/test/CodeGen/AMDGPU/opencl-image-metadata.ll
new file mode 100644
index 000000000000..bc467e47dc31
--- /dev/null
+++ b/test/CodeGen/AMDGPU/opencl-image-metadata.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s
+
+; Make sure the OpenCL Image lowering pass doesn't crash when argument metadata
+; is not in expected order.
+
+; EG: CF_END
+; SI: s_endpgm
+define void @kernel(i32 addrspace(1)* %out) {
+entry:
+  store i32 0, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #3 = { nounwind }
+
+!opencl.kernels = !{!0}
+
+!0 = !{void (i32 addrspace(1)*)* @kernel, !1, !2, !3, !4, !5}
+!1 = !{!"kernel_arg_addr_space", i32 0}
+!2 = !{!"kernel_arg_access_qual", !"none"}
+!3 = !{!"kernel_arg_type", !"int*"}
+!4 = !{!"kernel_arg_type_qual", !""}
+!5 = !{!"kernel_arg_name", !""}
diff --git a/test/CodeGen/AMDGPU/operand-folding.ll b/test/CodeGen/AMDGPU/operand-folding.ll
index 816755efb07c..9e514ef9970a 100644
--- a/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/test/CodeGen/AMDGPU/operand-folding.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
 
 ; CHECK-LABEL: {{^}}fold_sgpr:
-; CHECK: v_add_i32_e32 v{{[0-9]+}}, s
+; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s
 define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) {
 entry:
   %tmp0 = icmp ne i32 %fold, 0
diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll
index 1c04090b407f..e40f18f040b7 100644
--- a/test/CodeGen/AMDGPU/or.ll
+++ b/test/CodeGen/AMDGPU/or.ll
@@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 }
 
 ; FUNC-LABEL: {{^}}or_i1:
-; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
 
 ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
 define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
diff --git a/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
new file mode 100644
index 000000000000..51985af42a29
--- /dev/null
+++ b/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs -verify-coalescing < %s
+
+; The original and requires materializing a 64-bit immediate for
+; s_and_b64. This is split into 2 x v_and_i32, part of the immediate
+; is folded through the reg_sequence into the v_and_i32 operand, and
+; only half of the result is ever used.
+;
+; During live interval construction, the first sub register def is
+; incorrectly marked as dead.
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+define void @dead_def_subregister(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) #0 {
+  %tid = call i32 @llvm.r600.read.tidig.x() #1
+  %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+  %val = load i64, i64 addrspace(1)* %in.gep
+
+  %lshr = shl i64 %val, 24
+  %and1 = and i64 %lshr, 2190433320969 ; (255 << 33) | 9
+  %vec = bitcast i64 %and1 to <2 x i32>
+  %elt1 = extractelement <2 x i32> %vec, i32 1
+
+  store i32 %elt1, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/private-memory.ll b/test/CodeGen/AMDGPU/private-memory.ll
index 645dc04f4420..79778eebd802 100644
--- a/test/CodeGen/AMDGPU/private-memory.ll
+++ b/test/CodeGen/AMDGPU/private-memory.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-PROMOTE
 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=SI < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
+; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn-amdhsa -mcpu=kaveri < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC -check-prefix=HSA-ALLOCA
 ; RUN: llc -show-mc-encoding -mattr=+promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC
 ; RUN: llc -show-mc-encoding -mattr=-promote-alloca -verify-machineinstrs -march=amdgcn -mcpu=tonga < %s | FileCheck %s -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC
 
@@ -13,11 +15,21 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 ; R600: LDS_READ
 ; R600: LDS_READ
 
+; HSA-PROMOTE: .amd_kernel_code_t
+; HSA-PROMOTE: workgroup_group_segment_byte_size = 5120
+; HSA-PROMOTE: .end_amd_kernel_code_t
+
 ; SI-PROMOTE: ds_write_b32
 ; SI-PROMOTE: ds_write_b32
 ; SI-PROMOTE: ds_read_b32
 ; SI-PROMOTE: ds_read_b32
 
+; HSA-ALLOCA: .amd_kernel_code_t
+; FIXME: Creating the emergency stack slots causes us to over-estimate scratch
+; by 4 bytes.
+; HSA-ALLOCA: workitem_private_segment_byte_size = 24
+; HSA-ALLOCA: .end_amd_kernel_code_t
+
 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
 ; SI-ALLOCA: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; encoding: [0x00,0x10,0x70,0xe0
 define void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
diff --git a/test/CodeGen/AMDGPU/register-count-comments.ll b/test/CodeGen/AMDGPU/register-count-comments.ll
index de6bfb310883..4bb315049be4 100644
--- a/test/CodeGen/AMDGPU/register-count-comments.ll
+++ b/test/CodeGen/AMDGPU/register-count-comments.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
 
 declare i32 @llvm.SI.tid() nounwind readnone
 
diff --git a/test/CodeGen/AMDGPU/reorder-stores.ll b/test/CodeGen/AMDGPU/reorder-stores.ll
index 187650ff9a53..d5e10d0be883 100644
--- a/test/CodeGen/AMDGPU/reorder-stores.ll
+++ b/test/CodeGen/AMDGPU/reorder-stores.ll
@@ -2,14 +2,10 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
 
 ; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_load_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
-; SI: buffer_store_dwordx2
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
   %tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
@@ -34,46 +30,16 @@ define void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace
 }
 
 ; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
-; SI: buffer_load_dword
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
+; SI: buffer_load_dwordx4
 
 
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
-; SI: buffer_store_dword
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
+; SI: buffer_store_dwordx4
 ; SI: s_endpgm
 define void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
   %tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
diff --git a/test/CodeGen/AMDGPU/s_movk_i32.ll b/test/CodeGen/AMDGPU/s_movk_i32.ll
index 6b1a36c979c2..47c7fbb6dd6a 100644
--- a/test/CodeGen/AMDGPU/s_movk_i32.ll
+++ b/test/CodeGen/AMDGPU/s_movk_i32.ll
@@ -3,10 +3,9 @@
 
 ; SI-LABEL: {{^}}s_movk_i32_k0:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -17,10 +16,9 @@ define void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
 
 ; SI-LABEL: {{^}}s_movk_i32_k1:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -31,10 +29,9 @@ define void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
 
 ; SI-LABEL: {{^}}s_movk_i32_k2:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 64{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -45,10 +42,9 @@ define void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
 
 ; SI-LABEL: {{^}}s_movk_i32_k3:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -59,10 +55,9 @@ define void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
 
 ; SI-LABEL: {{^}}s_movk_i32_k4:
 ; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 1{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
@@ -87,10 +82,9 @@ define void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 add
 
 ; SI-LABEL: {{^}}s_movk_i32_k6:
 ; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}}
-; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 63{{$}}
 ; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}},
 ; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]]
-; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]]
+; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]]
 ; SI: s_endpgm
 define void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
   %loada = load i64, i64 addrspace(1)* %a, align 4
diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll
index 0b9649576545..a30c25e700ab 100644
--- a/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -1,4 +1,8 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
+
+declare i32 @llvm.r600.read.tidig.x() #0
+declare i32 @llvm.r600.read.tidig.y() #0
 
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
@@ -7,94 +11,267 @@
 ; sgpr register pair and use that for the pointer operand
 ; (low 64-bits of srsrc).
 
-; CHECK-LABEL: {{^}}mubuf:
+; GCN-LABEL: {{^}}mubuf:
 
 ; Make sure we aren't using VGPRs for the source operand of s_mov_b64
-; CHECK-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
+; GCN-NOT: s_mov_b64 s[{{[0-9]+:[0-9]+}}], v
 
 ; Make sure we aren't using VGPR's for the srsrc operand of BUFFER_LOAD_*
 ; instructions
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-; CHECK: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
-define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+; GCN: buffer_load_ubyte v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}], 0 addr64
+
+define void @mubuf(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #1 {
 entry:
-  %0 = call i32 @llvm.r600.read.tidig.x() #1
-  %1 = call i32 @llvm.r600.read.tidig.y() #1
-  %2 = sext i32 %0 to i64
-  %3 = sext i32 %1 to i64
+  %tmp = call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = call i32 @llvm.r600.read.tidig.y()
+  %tmp2 = sext i32 %tmp to i64
+  %tmp3 = sext i32 %tmp1 to i64
   br label %loop
 
-loop:
-  %4 = phi i64 [0, %entry], [%5, %loop]
-  %5 = add i64 %2, %4
-  %6 = getelementptr i8, i8 addrspace(1)* %in, i64 %5
-  %7 = load i8, i8 addrspace(1)* %6, align 1
-  %8 = or i64 %5, 1
-  %9 = getelementptr i8, i8 addrspace(1)* %in, i64 %8
-  %10 = load i8, i8 addrspace(1)* %9, align 1
-  %11 = add i8 %7, %10
-  %12 = sext i8 %11 to i32
-  store i32 %12, i32 addrspace(1)* %out
-  %13 = icmp slt i64 %5, 10
-  br i1 %13, label %loop, label %done
+loop:                                             ; preds = %loop, %entry
+  %tmp4 = phi i64 [ 0, %entry ], [ %tmp5, %loop ]
+  %tmp5 = add i64 %tmp2, %tmp4
+  %tmp6 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp5
+  %tmp7 = load i8, i8 addrspace(1)* %tmp6, align 1
+  %tmp8 = or i64 %tmp5, 1
+  %tmp9 = getelementptr i8, i8 addrspace(1)* %in, i64 %tmp8
+  %tmp10 = load i8, i8 addrspace(1)* %tmp9, align 1
+  %tmp11 = add i8 %tmp7, %tmp10
+  %tmp12 = sext i8 %tmp11 to i32
+  store i32 %tmp12, i32 addrspace(1)* %out
+  %tmp13 = icmp slt i64 %tmp5, 10
+  br i1 %tmp13, label %loop, label %done
 
-done:
+done:                                             ; preds = %loop
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
-declare i32 @llvm.r600.read.tidig.y() #1
-
-attributes #1 = { nounwind readnone }
-
 ; Test moving an SMRD instruction to the VALU
 
-; CHECK-LABEL: {{^}}smrd_valu:
-; CHECK: buffer_load_dword [[OUT:v[0-9]+]]
-; CHECK: buffer_store_dword [[OUT]]
-
-define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 addrspace(1)* %out) {
+; GCN-LABEL: {{^}}smrd_valu:
+; GCN: buffer_load_dword [[OUT:v[0-9]+]]
+; GCN: buffer_store_dword [[OUT]]
+define void @smrd_valu(i32 addrspace(2)* addrspace(1)* %in, i32 %a, i32 %b, i32 addrspace(1)* %out) #1 {
 entry:
-  %0 = icmp ne i32 %a, 0
-  br i1 %0, label %if, label %else
+  %tmp = icmp ne i32 %a, 0
+  br i1 %tmp, label %if, label %else
 
-if:
-  %1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+if:                                               ; preds = %entry
+  %tmp1 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
   br label %endif
 
-else:
-  %2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
-  %3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %2
+else:                                             ; preds = %entry
+  %tmp2 = getelementptr i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %in
+  %tmp3 = load i32 addrspace(2)*, i32 addrspace(2)* addrspace(1)* %tmp2
   br label %endif
 
-endif:
-  %4 = phi i32 addrspace(2)*  [%1, %if], [%3, %else]
-  %5 = getelementptr i32, i32 addrspace(2)* %4, i32 3000
-  %6 = load i32, i32 addrspace(2)* %5
-  store i32 %6, i32 addrspace(1)* %out
+endif:                                            ; preds = %else, %if
+  %tmp4 = phi i32 addrspace(2)* [ %tmp1, %if ], [ %tmp3, %else ]
+  %tmp5 = getelementptr i32, i32 addrspace(2)* %tmp4, i32 3000
+  %tmp6 = load i32, i32 addrspace(2)* %tmp5
+  store i32 %tmp6, i32 addrspace(1)* %out
   ret void
 }
 
-; Test moving ann SMRD with an immediate offset to the VALU
+; Test moving an SMRD with an immediate offset to the VALU
 
-; CHECK-LABEL: {{^}}smrd_valu2:
-; CHECK: buffer_load_dword
-define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) {
+; GCN-LABEL: {{^}}smrd_valu2:
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16{{$}}
+define void @smrd_valu2(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in) #1 {
 entry:
-  %0 = call i32 @llvm.r600.read.tidig.x() nounwind readnone
-  %1 = add i32 %0, 4
-  %2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %0, i32 4
-  %3 = load i32, i32 addrspace(2)* %2
-  store i32 %3, i32 addrspace(1)* %out
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = add i32 %tmp, 4
+  %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
+  %tmp3 = load i32, i32 addrspace(2)* %tmp2
+  store i32 %tmp3, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK-LABEL: {{^}}s_load_imm_v8i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+; Use a big offset that will use the SMRD literal offset on CI
+; GCN-LABEL: {{^}}smrd_valu_ci_offset:
+; GCN-NOT: v_add
+; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4e20{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @smrd_valu_ci_offset(i32 addrspace(1)* %out, i32 addrspace(2)* %in, i32 %c) #1 {
 entry:
-  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp2 = getelementptr i32, i32 addrspace(2)* %in, i32 %tmp
+  %tmp3 = getelementptr i32, i32 addrspace(2)* %tmp2, i32 5000
+  %tmp4 = load i32, i32 addrspace(2)* %tmp3
+  %tmp5 = add i32 %tmp4, %c
+  store i32 %tmp5, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x2:
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET:s[0-9]+]], 0x9c40{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx2
+define void @smrd_valu_ci_offset_x2(i64 addrspace(1)* %out, i64 addrspace(2)* %in, i64 %c) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp2 = getelementptr i64, i64 addrspace(2)* %in, i32 %tmp
+  %tmp3 = getelementptr i64, i64 addrspace(2)* %tmp2, i32 5000
+  %tmp4 = load i64, i64 addrspace(2)* %tmp3
+  %tmp5 = or i64 %tmp4, %c
+  store i64 %tmp5, i64 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x4:
+; GCN-NOT: v_add
+; GCN: s_movk_i32 [[OFFSET:s[0-9]+]], 0x4d20{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET]] addr64{{$}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+define void @smrd_valu_ci_offset_x4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in, <4 x i32> %c) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp2 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %in, i32 %tmp
+  %tmp3 = getelementptr <4 x i32>, <4 x i32> addrspace(2)* %tmp2, i32 1234
+  %tmp4 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp3
+  %tmp5 = or <4 x i32> %tmp4, %c
+  store <4 x i32> %tmp5, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; Original scalar load uses SGPR offset on SI and 32-bit literal on
+; CI.
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x8:
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+define void @smrd_valu_ci_offset_x8(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in, <8 x i32> %c) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp2 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %in, i32 %tmp
+  %tmp3 = getelementptr <8 x i32>, <8 x i32> addrspace(2)* %tmp2, i32 1234
+  %tmp4 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp3
+  %tmp5 = or <8 x i32> %tmp4, %c
+  store <8 x i32> %tmp5, <8 x i32> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
+
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
+; GCN-NOT: v_add
+; GCN: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
+; GCN-NOT: v_add
+; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
+
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+
+; GCN: s_endpgm
+define void @smrd_valu_ci_offset_x16(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in, <16 x i32> %c) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp2 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %in, i32 %tmp
+  %tmp3 = getelementptr <16 x i32>, <16 x i32> addrspace(2)* %tmp2, i32 1234
+  %tmp4 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp3
+  %tmp5 = or <16 x i32> %tmp4, %c
+  store <16 x i32> %tmp5, <16 x i32> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_salu_user:
+; GCN: buffer_load_dword [[MOVED:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}}
+; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, s{{[0-9]+}}, [[MOVED]]
+; GCN: buffer_store_dword [[ADD]]
+define void @smrd_valu2_salu_user(i32 addrspace(1)* %out, [8 x i32] addrspace(2)* %in, i32 %a) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = add i32 %tmp, 4
+  %tmp2 = getelementptr [8 x i32], [8 x i32] addrspace(2)* %in, i32 %tmp, i32 4
+  %tmp3 = load i32, i32 addrspace(2)* %tmp2
+  %tmp4 = add i32 %tmp3, %a
+  store i32 %tmp4, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_max_smrd_offset:
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1020{{$}}
+define void @smrd_valu2_max_smrd_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = add i32 %tmp, 4
+  %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 255
+  %tmp3 = load i32, i32 addrspace(2)* %tmp2
+  store i32 %tmp3, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}smrd_valu2_mubuf_offset:
+; GCN-NOT: v_add
+; GCN: buffer_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:1024{{$}}
+define void @smrd_valu2_mubuf_offset(i32 addrspace(1)* %out, [1024 x i32] addrspace(2)* %in) #1 {
+entry:
+  %tmp = call i32 @llvm.r600.read.tidig.x() #0
+  %tmp1 = add i32 %tmp, 4
+  %tmp2 = getelementptr [1024 x i32], [1024 x i32] addrspace(2)* %in, i32 %tmp, i32 256
+  %tmp3 = load i32, i32 addrspace(2)* %tmp2
+  store i32 %tmp3, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_load_imm_v8i32:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+define void @s_load_imm_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+  %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
   %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
   %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
@@ -102,12 +279,51 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: {{^}}s_load_imm_v16i32:
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-; CHECK: buffer_load_dwordx4
-define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) {
+; GCN-LABEL: {{^}}s_load_imm_v8i32_salu_user:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @s_load_imm_v8i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+  %tmp0 = tail call i32 @llvm.r600.read.tidig.x()
+  %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
+  %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <8 x i32> addrspace(2)*
+  %tmp3 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp2, align 4
+
+  %elt0 = extractelement <8 x i32> %tmp3, i32 0
+  %elt1 = extractelement <8 x i32> %tmp3, i32 1
+  %elt2 = extractelement <8 x i32> %tmp3, i32 2
+  %elt3 = extractelement <8 x i32> %tmp3, i32 3
+  %elt4 = extractelement <8 x i32> %tmp3, i32 4
+  %elt5 = extractelement <8 x i32> %tmp3, i32 5
+  %elt6 = extractelement <8 x i32> %tmp3, i32 6
+  %elt7 = extractelement <8 x i32> %tmp3, i32 7
+
+  %add0 = add i32 %elt0, %elt1
+  %add1 = add i32 %add0, %elt2
+  %add2 = add i32 %add1, %elt3
+  %add3 = add i32 %add2, %elt4
+  %add4 = add i32 %add3, %elt5
+  %add5 = add i32 %add4, %elt6
+  %add6 = add i32 %add5, %elt7
+
+  store i32 %add6, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}s_load_imm_v16i32:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+define void @s_load_imm_v16i32(<16 x i32> addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
 entry:
   %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
   %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
@@ -116,3 +332,71 @@ entry:
   store <16 x i32> %tmp3, <16 x i32> addrspace(1)* %out, align 32
   ret void
 }
+
+; GCN-LABEL: {{^}}s_load_imm_v16i32_salu_user:
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: buffer_load_dwordx4
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: v_add_i32_e32
+; GCN: buffer_store_dword
+define void @s_load_imm_v16i32_salu_user(i32 addrspace(1)* %out, i32 addrspace(2)* nocapture readonly %in) #1 {
+entry:
+  %tmp0 = tail call i32 @llvm.r600.read.tidig.x() #1
+  %tmp1 = getelementptr inbounds i32, i32 addrspace(2)* %in, i32 %tmp0
+  %tmp2 = bitcast i32 addrspace(2)* %tmp1 to <16 x i32> addrspace(2)*
+  %tmp3 = load <16 x i32>, <16 x i32> addrspace(2)* %tmp2, align 4
+
+  %elt0 = extractelement <16 x i32> %tmp3, i32 0
+  %elt1 = extractelement <16 x i32> %tmp3, i32 1
+  %elt2 = extractelement <16 x i32> %tmp3, i32 2
+  %elt3 = extractelement <16 x i32> %tmp3, i32 3
+  %elt4 = extractelement <16 x i32> %tmp3, i32 4
+  %elt5 = extractelement <16 x i32> %tmp3, i32 5
+  %elt6 = extractelement <16 x i32> %tmp3, i32 6
+  %elt7 = extractelement <16 x i32> %tmp3, i32 7
+  %elt8 = extractelement <16 x i32> %tmp3, i32 8
+  %elt9 = extractelement <16 x i32> %tmp3, i32 9
+  %elt10 = extractelement <16 x i32> %tmp3, i32 10
+  %elt11 = extractelement <16 x i32> %tmp3, i32 11
+  %elt12 = extractelement <16 x i32> %tmp3, i32 12
+  %elt13 = extractelement <16 x i32> %tmp3, i32 13
+  %elt14 = extractelement <16 x i32> %tmp3, i32 14
+  %elt15 = extractelement <16 x i32> %tmp3, i32 15
+
+  %add0 = add i32 %elt0, %elt1
+  %add1 = add i32 %add0, %elt2
+  %add2 = add i32 %add1, %elt3
+  %add3 = add i32 %add2, %elt4
+  %add4 = add i32 %add3, %elt5
+  %add5 = add i32 %add4, %elt6
+  %add6 = add i32 %add5, %elt7
+  %add7 = add i32 %add6, %elt8
+  %add8 = add i32 %add7, %elt9
+  %add9 = add i32 %add8, %elt10
+  %add10 = add i32 %add9, %elt11
+  %add11 = add i32 %add10, %elt12
+  %add12 = add i32 %add11, %elt13
+  %add13 = add i32 %add12, %elt14
+  %add14 = add i32 %add13, %elt15
+
+  store i32 %add14, i32 addrspace(1)* %out
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/sampler-resource-id.ll b/test/CodeGen/AMDGPU/sampler-resource-id.ll
new file mode 100644
index 000000000000..c41d345369bf
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=r600 -mcpu=juniper < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}test_0:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 0(
+define void @test_0(i32 %in0, i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_1:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 1(
+define void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; FUNC-LABEL: {{^}}test_2:
+; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
+; EG: MOV [[VAL]], literal.x
+; EG-NEXT: LSHR
+; EG-NEXT: 2(
+define void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
+entry:
+  %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+
+declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) #0
+
+attributes #0 = { readnone }
+
+!opencl.kernels = !{!0, !1, !2}
+
+!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50}
+!10 = !{!"kernel_arg_addr_space", i32 0, i32 1}
+!20 = !{!"kernel_arg_access_qual", !"none", !"none"}
+!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"}
+!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"}
+!50 = !{!"kernel_arg_type_qual", !"", !""}
+
+!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51}
+!11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1}
+!21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
+!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"}
+!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"}
+!51 = !{!"kernel_arg_type_qual", !"", !"", !""}
+
+!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52}
+!12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1}
+!22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
+!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
+!42 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
+!52 = !{!"kernel_arg_type_qual", !"", !"", !"", !""}
diff --git a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
index 3863afda5dd3..e4b16c0a165f 100644
--- a/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
+++ b/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -O0 -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
 ; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck %s -check-prefix=SI
 
-declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
+declare void @llvm.AMDGPU.barrier.local() nounwind convergent
 
 
 ; SI-LABEL: {{^}}main(
diff --git a/test/CodeGen/AMDGPU/scratch-buffer.ll b/test/CodeGen/AMDGPU/scratch-buffer.ll
index 268869daaa32..d43de4766057 100644
--- a/test/CodeGen/AMDGPU/scratch-buffer.ll
+++ b/test/CodeGen/AMDGPU/scratch-buffer.ll
@@ -51,7 +51,7 @@ done:
 
 ; GCN-LABEL: {{^}}legal_offset_fi_offset
 ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen
-; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000
+; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000
 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}}
 
 define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) {
diff --git a/test/CodeGen/AMDGPU/select64.ll b/test/CodeGen/AMDGPU/select64.ll
index 13fb575b2b15..a68fdecb00af 100644
--- a/test/CodeGen/AMDGPU/select64.ll
+++ b/test/CodeGen/AMDGPU/select64.ll
@@ -51,12 +51,8 @@ define void @v_select_trunc_i64_2(i32 addrspace(1)* %out, i32 %cond, i64 addrspa
 }
 
 ; CHECK-LABEL: {{^}}v_select_i64_split_imm:
-; CHECK: s_mov_b32 [[SHI:s[0-9]+]], 63
-; CHECK: s_mov_b32 [[SLO:s[0-9]+]], 0
-; CHECK-DAG: v_mov_b32_e32 [[VHI:v[0-9]+]], [[SHI]]
-; CHECK-DAG: v_mov_b32_e32 [[VLO:v[0-9]+]], [[SLO]]
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VLO]], {{v[0-9]+}}
-; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, [[VHI]], {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+; CHECK-DAG: v_cndmask_b32_e32 {{v[0-9]+}}, 63, {{v[0-9]+}}
 ; CHECK: s_endpgm
 define void @v_select_i64_split_imm(i64 addrspace(1)* %out, i32 %cond, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind {
   %cmp = icmp ugt i32 %cond, 5
diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll
index 53694dcffa66..57365a6e1fc3 100644
--- a/test/CodeGen/AMDGPU/set-dx10.ll
+++ b/test/CodeGen/AMDGPU/set-dx10.ll
@@ -5,8 +5,8 @@
 ; SET*DX10 instructions.
 
 ; CHECK: {{^}}fcmp_une_select_fptosi:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -19,8 +19,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_une_select_i32:
-; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -31,8 +31,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_oeq_select_fptosi:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -45,8 +45,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_oeq_select_i32:
-; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -57,8 +57,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_ogt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -71,8 +71,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_ogt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -83,8 +83,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_oge_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -97,8 +97,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_oge_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -109,8 +109,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_ole_select_fptosi:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -123,8 +123,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_ole_select_i32:
-; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -135,8 +135,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_olt_select_fptosi:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
@@ -149,8 +149,8 @@ entry:
 }
 
 ; CHECK: {{^}}fcmp_olt_select_i32:
-; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/AMDGPU/setcc-opt.ll b/test/CodeGen/AMDGPU/setcc-opt.ll
index 4e6a10d6b78d..63d74820f961 100644
--- a/test/CodeGen/AMDGPU/setcc-opt.ll
+++ b/test/CodeGen/AMDGPU/setcc-opt.ll
@@ -142,11 +142,14 @@ define void @sext_bool_icmp_ne_k(i1 addrspace(1)* %out, i32 %a, i32 %b) nounwind
 }
 
 ; FUNC-LABEL: {{^}}cmp_zext_k_i8max:
-; GCN: buffer_load_ubyte [[B:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
-; GCN: v_mov_b32_e32 [[K255:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K255]], [[B]]
+; SI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
+; VI: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K255:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VALUE]], [[K255]]
+; GCN: v_mov_b32_e32 [[VK255:v[0-9]+]], [[K255]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK255]]
 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
 define void @cmp_zext_k_i8max(i1 addrspace(1)* %out, i8 %b) nounwind {
   %b.ext = zext i8 %b to i32
@@ -187,11 +190,14 @@ define void @cmp_sext_k_neg1_i8_sext_arg(i1 addrspace(1)* %out, i8 signext %b) n
 ; Should do a buffer_load_sbyte and compare with -1
 
 ; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg:
-; GCN-DAG: buffer_load_ubyte [[B:v[0-9]+]]
-; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xff{{$}}
-; GCN: v_cmp_ne_i32_e32 vcc, [[K]], [[B]]{{$}}
+; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb
+; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c
+; GCN: s_movk_i32 [[K:s[0-9]+]], 0xff
+; GCN: s_and_b32 [[B:s[0-9]+]], [[VAL]], [[K]]
+; GCN: v_mov_b32_e32 [[VK:v[0-9]+]], [[K]]
+; GCN: v_cmp_ne_i32_e32 vcc, [[B]], [[VK]]{{$}}
 ; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc
-; GCN-NEXT: buffer_store_byte [[RESULT]]
+; GCN: buffer_store_byte [[RESULT]]
 ; GCN: s_endpgm
 define void @cmp_sext_k_neg1_i8_arg(i1 addrspace(1)* %out, i8 %b) nounwind {
   %b.ext = sext i8 %b to i32
diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll
index 5aedda2ce1a9..23ae3b967971 100644
--- a/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 ; SI: buffer_store_dword [[EXTRACT]],
 
 ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
-; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
-; EG-NEXT: LSHR * [[ADDR]]
+; EG: LSHR * [[ADDR]]
+; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
   %shl = shl i32 %in, 31
   %sext = ashr i32 %shl, 31
@@ -609,3 +609,53 @@ define void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1
   store i32 %bfe, i32 addrspace(1)* %out, align 4
   ret void
 }
+
+; Make sure we propagate the VALUness to users of a moved scalar BFE.
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i1_to_i64_move_use:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[VAL_LO:[0-9]+]]:[[VAL_HI:[0-9]+]]{{\]}}
+; SI-DAG: v_bfe_i32 v[[LO:[0-9]+]], v[[VAL_LO]], 0, 1
+; SI-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[HI]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @v_sext_in_reg_i1_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 63
+  %ashr = ashr i64 %shl, 63
+
+  %and = and i64 %ashr, %s.val
+  store i64 %and, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_sext_in_reg_i32_to_i64_move_use:
+; SI: buffer_load_dwordx2
+; SI: v_lshl_b64 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}},
+; SI-DAG: v_ashrrev_i32_e32 v[[SHR:[0-9]+]], 31, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_LO:[0-9]+]], s{{[0-9]+}}, v[[LO]]
+; SI-DAG: v_and_b32_e32 v[[RESULT_HI:[0-9]+]], s{{[0-9]+}}, v[[SHR]]
+; SI: buffer_store_dwordx2 v{{\[}}[[RESULT_LO]]:[[RESULT_HI]]{{\]}}
+define void @v_sext_in_reg_i32_to_i64_move_use(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr, i64 %s.val) nounwind {
+  %tid = call i32 @llvm.r600.read.tidig.x()
+  %a.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+  %b.gep = getelementptr i64, i64 addrspace(1)* %aptr, i32 %tid
+  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+  %a = load i64, i64 addrspace(1)* %a.gep, align 8
+  %b = load i64, i64 addrspace(1)* %b.gep, align 8
+
+  %c = shl i64 %a, %b
+  %shl = shl i64 %c, 32
+  %ashr = ashr i64 %shl, 32
+  %and = and i64 %ashr, %s.val
+  store i64 %and, i64 addrspace(1)* %out.gep, align 8
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll
index 6f81a39ed96a..55db80731c90 100644
--- a/test/CodeGen/AMDGPU/shl.ll
+++ b/test/CodeGen/AMDGPU/shl.ll
@@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
   ret void
 }
 
-;EG: {{^}}shl_i64:
+;EG-LABEL: {{^}}shl_i64:
 ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
+;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}}
 ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
@@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   ret void
 }
 
-;EG: {{^}}shl_v2i64:
+;EG-LABEL: {{^}}shl_v2i64:
 ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
@@ -185,8 +185,7 @@ define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in
 ; Make sure load width gets reduced to i32 load.
 ; GCN-LABEL: {{^}}s_shl_32_i64:
 ; GCN-DAG: s_load_dword [[LO_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
-; GCN-DAG: s_mov_b32 s[[SLO:[0-9]+]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[SLO]]
+; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[LO_A]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) {
diff --git a/test/CodeGen/AMDGPU/shl_add_constant.ll b/test/CodeGen/AMDGPU/shl_add_constant.ll
index b1485bfaaebb..dfb2bf3383fc 100644
--- a/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1
 
 ; FUNC-LABEL: {{^}}shl_2_add_9_i32:
 ; SI: v_lshlrev_b32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -20,7 +20,7 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 }
 
 ; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
-; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}}
+; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
 ; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
 ; SI-DAG: buffer_store_dword [[ADDREG]]
 ; SI-DAG: buffer_store_dword [[SHLREG]]
@@ -40,7 +40,7 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1
 
 ; FUNC-LABEL: {{^}}shl_2_add_999_i32:
 ; SI: v_lshlrev_b32_e32  [[REG:v[0-9]+]], 2, {{v[0-9]+}}
-; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
 ; SI: buffer_store_dword [[RESULT]]
 ; SI: s_endpgm
 define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
diff --git a/test/CodeGen/AMDGPU/shl_add_ptr.ll b/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 6671e909cd1d..ac94824bd61f 100644
--- a/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -35,7 +35,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad
 ; SI-LABEL: {{^}}load_shl_base_lds_1:
 ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
 ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
-; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}}
+; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
 ; SI-DAG: buffer_store_dword [[RESULT]]
 ; SI-DAG: buffer_store_dword [[ADDUSE]]
 ; SI: s_endpgm
diff --git a/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
new file mode 100644
index 000000000000..27a8e70aae13
--- /dev/null
+++ b/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
@@ -0,0 +1,16 @@
+; RUN: llc -o /dev/null %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after expand-isel-pseudos 2>&1 | FileCheck %s
+; This test verifies that the instruction selection will add the implicit
+; register operands in the correct order when modifying the opcode of an
+; instruction to V_ADD_I32_e32.
+
+; CHECK: %{{[0-9]+}} = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def %vcc, implicit %exec
+
+define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+entry:
+  %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+  %a = load i32, i32 addrspace(1)* %in
+  %b = load i32, i32 addrspace(1)* %b_ptr
+  %result = add i32 %a, %b
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/si-literal-folding.ll b/test/CodeGen/AMDGPU/si-literal-folding.ll
new file mode 100644
index 000000000000..901b3c3453fc
--- /dev/null
+++ b/test/CodeGen/AMDGPU/si-literal-folding.ll
@@ -0,0 +1,17 @@
+; XFAIL: *
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}main:
+; CHECK-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0xbf4353f8
+
+define void @main(float) #0 {
+main_body:
+  %1 = fmul float %0, 0x3FE86A7F00000000
+  %2 = fmul float %0, 0xBFE86A7F00000000
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %1, float %1, float %2, float %2)
+  ret void
+}
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" }
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index 84652701f773..d7b35fc631eb 100644
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -6,6 +6,16 @@
 
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: s_wqm
+
+; Make sure not emitting unused scratch resource descriptor setup
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+; CHECK-NOT: s_mov_b32
+
+; CHECK: s_mov_b32 m0
+
+
 ; Writing to M0 from an SMRD instruction will hang the GPU.
 ; CHECK-NOT: s_buffer_load_dword m0
 ; CHECK: s_endpgm
diff --git a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 5a6129aaa3fa..bc766dbcac67 100644
--- a/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -155,9 +155,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
 }
 
 ; FUNC-LABEL: @reorder_local_offsets
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
 ; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
 ; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
 ; CI: buffer_store_dword
@@ -181,9 +181,10 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
 }
 
 ; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
 ; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
 ; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
 ; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
 ; CI: buffer_store_dword
@@ -233,4 +234,4 @@ define void @reorder_global_offsets(i32 addrspace(1)* nocapture %out, i32 addrsp
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
 attributes #1 = { "ShaderType"="1" nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
-attributes #2 = { nounwind noduplicate }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index 0db7cdc171b5..a94ccc32e61c 100644
--- a/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -46,9 +46,9 @@ define void @s_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 %in) {
 
 ; SI-LABEL: @v_sint_to_fp_i64_to_f64
 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_i32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @v_sint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
diff --git a/test/CodeGen/AMDGPU/sminmax.ll b/test/CodeGen/AMDGPU/sminmax.ll
new file mode 100644
index 000000000000..e646605f7da1
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sminmax.ll
@@ -0,0 +1,130 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+
+; FUNC-LABEL: {{^}}s_abs_i32:
+; GCN: s_abs_i32
+; GCN: s_add_i32
+define void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind {
+  %neg = sub i32 0, %val
+  %cond = icmp sgt i32 %val, %neg
+  %res = select i1 %cond, i32 %val, i32 %neg
+  %res2 = add i32 %res, 2
+  store i32 %res2, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_i32:
+; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG]], [[SRC]]
+; GCN: v_add_i32
+define void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind {
+  %val = load i32, i32 addrspace(1)* %src, align 4
+  %neg = sub i32 0, %val
+  %cond = icmp sgt i32 %val, %neg
+  %res = select i1 %cond, i32 %val, i32 %neg
+  %res2 = add i32 %res, 2
+  store i32 %res2, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_abs_v2i32:
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+define void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %val) nounwind {
+  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
+  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
+  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
+  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
+  %neg = sub <2 x i32> %z1, %val
+  %cond = icmp sgt <2 x i32> %val, %neg
+  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
+  %res2 = add <2 x i32> %res, %t1
+  store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_v2i32:
+; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+
+; GCN: v_add_i32
+; GCN: v_add_i32
+define void @v_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %src) nounwind {
+  %z0 = insertelement <2 x i32> undef, i32 0, i32 0
+  %z1 = insertelement <2 x i32> %z0, i32 0, i32 1
+  %t0 = insertelement <2 x i32> undef, i32 2, i32 0
+  %t1 = insertelement <2 x i32> %t0, i32 2, i32 1
+  %val = load <2 x i32>, <2 x i32> addrspace(1)* %src, align 4
+  %neg = sub <2 x i32> %z1, %val
+  %cond = icmp sgt <2 x i32> %val, %neg
+  %res = select <2 x i1> %cond, <2 x i32> %val, <2 x i32> %neg
+  %res2 = add <2 x i32> %res, %t1
+  store <2 x i32> %res2, <2 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_abs_v4i32:
+; TODO: this should use s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+; GCN: s_abs_i32
+
+; GCN: s_add_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+; GCN: s_add_i32
+define void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %val) nounwind {
+  %z0 = insertelement <4 x i32> undef, i32 0, i32 0
+  %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
+  %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
+  %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
+  %t0 = insertelement <4 x i32> undef, i32 2, i32 0
+  %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
+  %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
+  %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
+  %neg = sub <4 x i32> %z3, %val
+  %cond = icmp sgt <4 x i32> %val, %neg
+  %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
+  %res2 = add <4 x i32> %res, %t3
+  store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
+  ret void
+}
+
+; FUNC-LABEL: {{^}}v_abs_v4i32:
+; GCN: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]]
+; GCN: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]]
+
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG0]], [[SRC0]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG1]], [[SRC1]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG2]], [[SRC2]]
+; GCN: v_max_i32_e32 {{v[0-9]+}}, [[NEG3]], [[SRC3]]
+
+; GCN: v_add_i32
+; GCN: v_add_i32
+; GCN: v_add_i32
+; GCN: v_add_i32
+define void @v_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %src) nounwind {
+  %z0 = insertelement <4 x i32> undef, i32 0, i32 0
+  %z1 = insertelement <4 x i32> %z0, i32 0, i32 1
+  %z2 = insertelement <4 x i32> %z1, i32 0, i32 2
+  %z3 = insertelement <4 x i32> %z2, i32 0, i32 3
+  %t0 = insertelement <4 x i32> undef, i32 2, i32 0
+  %t1 = insertelement <4 x i32> %t0, i32 2, i32 1
+  %t2 = insertelement <4 x i32> %t1, i32 2, i32 2
+  %t3 = insertelement <4 x i32> %t2, i32 2, i32 3
+  %val = load <4 x i32>, <4 x i32> addrspace(1)* %src, align 4
+  %neg = sub <4 x i32> %z3, %val
+  %cond = icmp sgt <4 x i32> %val, %neg
+  %res = select <4 x i1> %cond, <4 x i32> %val, <4 x i32> %neg
+  %res2 = add <4 x i32> %res, %t3
+  store <4 x i32> %res2, <4 x i32> addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/smrd.ll b/test/CodeGen/AMDGPU/smrd.ll
index 0598208e1317..1d6bb9ece8c6 100644
--- a/test/CodeGen/AMDGPU/smrd.ll
+++ b/test/CodeGen/AMDGPU/smrd.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN %s
+; RUN: llc < %s -march=amdgcn -mcpu=SI -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=GCN --check-prefix=SIVI %s
+; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=CI --check-prefix=GCN  %s
+; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs | FileCheck --check-prefix=VI --check-prefix=GCN --check-prefix=SIVI %s
 
 ; SMRD load with an immediate offset.
 ; GCN-LABEL: {{^}}smrd0:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
+; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
 define void @smrd0(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
@@ -15,7 +16,7 @@ entry:
 
 ; SMRD load with the largest possible immediate offset.
 ; GCN-LABEL: {{^}}smrd1:
-; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
 define void @smrd1(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
 entry:
@@ -29,6 +30,7 @@ entry:
 ; GCN-LABEL: {{^}}smrd2:
 ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
 ; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
 ; GCN: s_endpgm
 define void @smrd2(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
@@ -54,9 +56,37 @@ entry:
   ret void
 }
 
+; SMRD load with the largest possible immediate offset on VI
+; GCN-LABEL: {{^}}smrd4:
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
+; SI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
+; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
+define void @smrd4(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262143
+  %1 = load i32, i32 addrspace(2)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; SMRD load with an offset greater than the largest possible immediate on VI
+; GCN-LABEL: {{^}}smrd5:
+; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
+; SIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
+; GCN: s_endpgm
+define void @smrd5(i32 addrspace(1)* %out, i32 addrspace(2)* %ptr) {
+entry:
+  %0 = getelementptr i32, i32 addrspace(2)* %ptr, i64 262144
+  %1 = load i32, i32 addrspace(2)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
 ; SMRD load using the load.const intrinsic with an immediate offset
 ; GCN-LABEL: {{^}}smrd_load_const0:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
+; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4 ; encoding: [0x04
 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x10
 define void @smrd_load_const0(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -70,7 +100,7 @@ main_body:
 ; SMRD load using the load.const intrinsic with the largest possible immediate
 ; offset.
 ; GCN-LABEL: {{^}}smrd_load_const1:
-; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
+; SICI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff
 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
 define void @smrd_load_const1(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -86,6 +116,7 @@ main_body:
 ; GCN-LABEL: {{^}}smrd_load_const2:
 ; SI: s_movk_i32 s[[OFFSET:[0-9]]], 0x400
 ; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], s[[OFFSET]] ; encoding: [0x0[[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
 ; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
 define void @smrd_load_const2(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
 main_body:
@@ -96,6 +127,36 @@ main_body:
   ret void
 }
 
+; SMRD load with the largest possible immediate offset on VI
+; GCN-LABEL: {{^}}smrd_load_const3:
+; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
+; SI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3ffff
+; VI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
+define void @smrd_load_const3(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048572)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+  ret void
+}
+
+; SMRD load with an offset greater than the largest possible immediate on VI
+; GCN-LABEL: {{^}}smrd_load_const4:
+; SIVI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000
+; SIVI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
+; CI: s_buffer_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
+; GCN: s_endpgm
+define void @smrd_load_const4(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 {
+main_body:
+  %20 = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %0, i32 0
+  %21 = load <16 x i8>, <16 x i8> addrspace(2)* %20
+  %22 = call float @llvm.SI.load.const(<16 x i8> %21, i32 1048576)
+  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %22, float %22, float %22)
+  ret void
+}
+
 ; Function Attrs: nounwind readnone
 declare float @llvm.SI.load.const(<16 x i8>, i32) #1
 
diff --git a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
index 46409cdfae1c..9e181bc14d9d 100644
--- a/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ b/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
 declare i32 @llvm.r600.read.tidig.x() readnone
 
@@ -8,9 +8,22 @@ declare i32 @llvm.r600.read.tidig.x() readnone
 ; scc instead.
 
 ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_0:
-; SI: v_add_i32
-; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
+; SI: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x18f, v{{[0-9]+}}
+; SI: v_addc_u32_e32 v{{[0-9]+}}, vcc, 0, v{{[0-9]+}}, vcc
+define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %s.val) {
+  %v.val = load volatile i32, i32 addrspace(1)* %in
+  %vec.0 = insertelement <2 x i32> undef, i32 %s.val, i32 0
+  %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
+  %bc = bitcast <2 x i32> %vec.1 to i64
+  %add = add i64 %bc, 399
+  store i64 %add, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_0:
+; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0x18f
+; SI: s_addc_u32 {{s[0-9]+}}, 0xf423f, 0
+define void @s_imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
   %vec.0 = insertelement <2 x i32> undef, i32 %val, i32 0
   %vec.1 = insertelement <2 x i32> %vec.0, i32 999999, i32 1
   %bc = bitcast <2 x i32> %vec.1 to i64
@@ -22,7 +35,20 @@ define void @imp_def_vcc_split_i64_add_0(i64 addrspace(1)* %out, i32 %val) {
 ; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_1:
 ; SI: v_add_i32
 ; SI: v_addc_u32
-define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
+define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
+  %v.val = load volatile i32, i32 addrspace(1)* %in
+  %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
+  %vec.1 = insertelement <2 x i32> %vec.0, i32 %v.val, i32 1
+  %bc = bitcast <2 x i32> %vec.1 to i64
+  %add = add i64 %bc, %val1
+  store i64 %add, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; FUNC-LABEL: {{^}}s_imp_def_vcc_split_i64_add_1:
+; SI: s_add_u32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; SI: s_addc_u32 {{s[0-9]+}}, 0x1869f, {{s[0-9]+}}
+define void @s_imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64 %val1) {
   %vec.0 = insertelement <2 x i32> undef, i32 %val0, i32 0
   %vec.1 = insertelement <2 x i32> %vec.0, i32 99999, i32 1
   %bc = bitcast <2 x i32> %vec.1 to i64
@@ -32,9 +58,9 @@ define void @imp_def_vcc_split_i64_add_1(i64 addrspace(1)* %out, i32 %val0, i64
 }
 
 ; Doesn't use constants
-; FUNC-LABEL @imp_def_vcc_split_i64_add_2
-; SI: v_add_i32
-; SI: v_addc_u32
+; FUNC-LABEL: {{^}}imp_def_vcc_split_i64_add_2:
+; SI: v_add_i32_e32 {{v[0-9]+}}, vcc, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_addc_u32_e32 {{v[0-9]+}}, vcc, {{v[0-9]+}}, {{v[0-9]+}}, vcc
 define void @imp_def_vcc_split_i64_add_2(i64 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %val0, i64 %val1) {
   %tid = call i32 @llvm.r600.read.tidig.x() readnone
   %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
diff --git a/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
new file mode 100644
index 000000000000..4c82ed6affc2
--- /dev/null
+++ b/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -0,0 +1,104 @@
+; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -mattr=-promote-alloca < %s | FileCheck -check-prefix=GCN %s
+
+@sPrivateStorage = external addrspace(3) global [256 x [8 x <4 x i64>]]
+
+; GCN-LABEL: {{^}}ds_reorder_vector_split:
+
+; Write zeroinitializer
+; GCN-DAG: ds_write_b64 [[PTR:v[0-9]+]], [[VAL:v\[[0-9]+:[0-9]+\]]] offset:24
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:16
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]] offset:8
+; GCN-DAG: ds_write_b64 [[PTR]], [[VAL]]{{$}}
+
+; GCN: s_waitcnt vmcnt
+
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:24
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16
+; GCN-DAG: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:8
+
+; GCN: s_waitcnt lgkmcnt
+
+; GCN-DAG ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:8
+; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:16
+; GCN-DAG: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:24
+
+; Appears to be dead store of vector component.
+; GCN: ds_write_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
+
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: buffer_store_dwordx2
+; GCN: s_endpgm
+define void @ds_reorder_vector_split(<4 x i64> addrspace(1)* nocapture readonly %srcValues, i32 addrspace(1)* nocapture readonly %offsets, <4 x i64> addrspace(1)* nocapture %destBuffer, i32 %alignmentOffset) #0 {
+entry:
+  %tmp = tail call i32 @llvm.r600.read.local.size.y()
+  %tmp1 = tail call i32 @llvm.r600.read.local.size.z()
+  %tmp2 = tail call i32 @llvm.r600.read.tidig.x()
+  %tmp3 = tail call i32 @llvm.r600.read.tidig.y()
+  %tmp4 = tail call i32 @llvm.r600.read.tidig.z()
+  %tmp6 = mul i32 %tmp2, %tmp
+  %tmp10 = add i32 %tmp3, %tmp6
+  %tmp11 = mul i32 %tmp10, %tmp1
+  %tmp9 = add i32 %tmp11, %tmp4
+  %x.i.i = tail call i32 @llvm.r600.read.tgid.x() #1
+  %x.i.12.i = tail call i32 @llvm.r600.read.local.size.x() #1
+  %mul.26.i = mul i32 %x.i.12.i, %x.i.i
+  %add.i = add i32 %tmp2, %mul.26.i
+  %arrayidx = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %add.i
+  store <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* %arrayidx
+  %tmp12 = sext i32 %add.i to i64
+  %arrayidx1 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %srcValues, i64 %tmp12
+  %tmp13 = load <4 x i64>, <4 x i64> addrspace(1)* %arrayidx1
+  %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %offsets, i64 %tmp12
+  %tmp14 = load i32, i32 addrspace(1)* %arrayidx2
+  %add.ptr = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 0, i32 %alignmentOffset
+  %mul.i = shl i32 %tmp14, 2
+  %arrayidx.i = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr, i32 %mul.i
+  %tmp15 = bitcast i64 addrspace(3)* %arrayidx.i to <4 x i64> addrspace(3)*
+  store <4 x i64> %tmp13, <4 x i64> addrspace(3)* %tmp15
+  %add.ptr6 = getelementptr [256 x [8 x <4 x i64>]], [256 x [8 x <4 x i64>]] addrspace(3)* @sPrivateStorage, i32 0, i32 %tmp9, i32 %tmp14, i32 %alignmentOffset
+  %tmp16 = sext i32 %tmp14 to i64
+  %tmp17 = sext i32 %alignmentOffset to i64
+  %add.ptr9 = getelementptr inbounds <4 x i64>, <4 x i64> addrspace(1)* %destBuffer, i64 %tmp16, i64 %tmp17
+  %tmp18 = bitcast <4 x i64> %tmp13 to i256
+  %trunc = trunc i256 %tmp18 to i64
+  store i64 %trunc, i64 addrspace(1)* %add.ptr9
+  %arrayidx10.1 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 1
+  %tmp19 = load i64, i64 addrspace(3)* %arrayidx10.1
+  %arrayidx11.1 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 1
+  store i64 %tmp19, i64 addrspace(1)* %arrayidx11.1
+  %arrayidx10.2 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 2
+  %tmp20 = load i64, i64 addrspace(3)* %arrayidx10.2
+  %arrayidx11.2 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 2
+  store i64 %tmp20, i64 addrspace(1)* %arrayidx11.2
+  %arrayidx10.3 = getelementptr inbounds i64, i64 addrspace(3)* %add.ptr6, i32 3
+  %tmp21 = load i64, i64 addrspace(3)* %arrayidx10.3
+  %arrayidx11.3 = getelementptr inbounds i64, i64 addrspace(1)* %add.ptr9, i64 3
+  store i64 %tmp21, i64 addrspace(1)* %arrayidx11.3
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tgid.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.x() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.local.size.z() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.y() #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.r600.read.tidig.z() #1
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll
index bcbc32f4c053..3b59bbfb18c0 100644
--- a/test/CodeGen/AMDGPU/sra.ll
+++ b/test/CodeGen/AMDGPU/sra.ll
@@ -70,11 +70,11 @@ entry:
 ;EG-LABEL: {{^}}ashr_i64_2:
 ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
-;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}}
 ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll
index 0dad91e709d9..bbd954356322 100644
--- a/test/CodeGen/AMDGPU/srl.ll
+++ b/test/CodeGen/AMDGPU/srl.ll
@@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
 
 ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
-; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
+; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
-; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
-; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
-; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
+; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
+; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}}
 ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
-; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
+; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
 ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
   %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
@@ -190,8 +190,7 @@ define void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %i
 ; Make sure load width gets reduced to i32 load.
 ; GCN-LABEL: {{^}}s_lshr_32_i64:
 ; GCN-DAG: s_load_dword [[HI_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc{{$}}
-; GCN-DAG: s_mov_b32 s[[SHI:[0-9]+]], 0{{$}}
-; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[SHI]]
+; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], 0{{$}}
 ; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], [[HI_A]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
 define void @s_lshr_32_i64(i64 addrspace(1)* %out, i64 %a) {
diff --git a/test/CodeGen/AMDGPU/store-barrier.ll b/test/CodeGen/AMDGPU/store-barrier.ll
index 4a72b4d090ad..ba4049f28a6e 100644
--- a/test/CodeGen/AMDGPU/store-barrier.ll
+++ b/test/CodeGen/AMDGPU/store-barrier.ll
@@ -36,7 +36,7 @@ bb:
   ret void
 }
 
-; Function Attrs: noduplicate nounwind
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.local() #2
 
-attributes #2 = { noduplicate nounwind }
+attributes #2 = { convergent nounwind }
diff --git a/test/CodeGen/AMDGPU/store.ll b/test/CodeGen/AMDGPU/store.ll
index 0f89405e073b..d22f43fa05ef 100644
--- a/test/CodeGen/AMDGPU/store.ll
+++ b/test/CodeGen/AMDGPU/store.ll
@@ -287,16 +287,33 @@ entry:
 ; CM: LDS_WRITE
 ; CM: LDS_WRITE
 
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
-; SI: ds_write_b32
+; SI: ds_write_b64
+; SI: ds_write_b64
 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
 entry:
   store <4 x i32> %in, <4 x i32> addrspace(3)* %out
   ret void
 }
 
+; FUNC-LABEL: {{^}}store_local_v4i32_align4:
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+; EG: LDS_WRITE
+
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+; CM: LDS_WRITE
+
+; SI: ds_write2_b32
+; SI: ds_write2_b32
+define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+entry:
+  store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
+  ret void
+}
+
 ; FUNC-LABEL: {{^}}store_local_i64_i8:
 ; EG: LDS_BYTE_WRITE
 ; SI: ds_write_b8
diff --git a/test/CodeGen/AMDGPU/store_typed.ll b/test/CodeGen/AMDGPU/store_typed.ll
new file mode 100644
index 000000000000..515fcf04f406
--- /dev/null
+++ b/test/CodeGen/AMDGPU/store_typed.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cayman  < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s
+
+; store to rat 0
+; FUNC-LABEL: {{^}}store_typed_rat0:
+; EG: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}, 1
+; CM: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}
+
+define void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) {
+  call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 0)
+  ret void
+}
+
+; store to rat 11
+; FUNC-LABEL: {{^}}store_typed_rat11:
+; EG: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}, 1
+; CM: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}
+
+define void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) {
+  call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11)
+  ret void
+}
+
+declare void @llvm.r600.rat.store.typed(<4 x i32>, <4 x i32>, i32)
diff --git a/test/CodeGen/AMDGPU/sub.ll b/test/CodeGen/AMDGPU/sub.ll
index b7fba0efa5b2..9f9446a4e608 100644
--- a/test/CodeGen/AMDGPU/sub.ll
+++ b/test/CodeGen/AMDGPU/sub.ll
@@ -7,7 +7,7 @@ declare i32 @llvm.r600.read.tidig.x() readnone
 ; FUNC-LABEL: {{^}}test_sub_i32:
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
 define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
   %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
   %a = load i32, i32 addrspace(1)* %in
@@ -22,8 +22,8 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
 
 define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -40,10 +40,10 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
-; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
+; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}}
 
 define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
   %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
diff --git a/test/CodeGen/AMDGPU/trunc.ll b/test/CodeGen/AMDGPU/trunc.ll
index bf690ca4cb28..ad52d0f2e238 100644
--- a/test/CodeGen/AMDGPU/trunc.ll
+++ b/test/CodeGen/AMDGPU/trunc.ll
@@ -61,7 +61,7 @@ define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
 }
 
 ; SI-LABEL: {{^}}sgpr_trunc_i32_to_i1:
-; SI: v_and_b32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: s_and_b32 s{{[0-9]+}}, 1, s{{[0-9]+}}
 ; SI: v_cmp_eq_i32
 define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
   %trunc = trunc i32 %a to i1
@@ -72,9 +72,9 @@ define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
 
 ; SI-LABEL: {{^}}s_trunc_i64_to_i1:
 ; SI: s_load_dwordx2 s{{\[}}[[SLO:[0-9]+]]:{{[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0xb
-; SI: v_and_b32_e64 [[MASKED:v[0-9]+]], 1, s[[SLO]]
-; SI: v_cmp_eq_i32_e32 vcc, 1, [[MASKED]]
-; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, vcc
+; SI: s_and_b32 [[MASKED:s[0-9]+]], 1, s[[SLO]]
+; SI: v_cmp_eq_i32_e64 s{{\[}}[[VLO:[0-9]+]]:[[VHI:[0-9]+]]], 1, [[MASKED]]
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, -12, 63, s{{\[}}[[VLO]]:[[VHI]]]
 define void @s_trunc_i64_to_i1(i32 addrspace(1)* %out, i64 %x) {
   %trunc = trunc i64 %x to i1
   %sel = select i1 %trunc, i32 63, i32 -12
diff --git a/test/CodeGen/AMDGPU/udivrem.ll b/test/CodeGen/AMDGPU/udivrem.ll
index b3837f28209a..f692b7dfdc27 100644
--- a/test/CodeGen/AMDGPU/udivrem.ll
+++ b/test/CodeGen/AMDGPU/udivrem.ll
@@ -30,19 +30,19 @@
 ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]]
 ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
 ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
-; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
 ; SI: v_cndmask_b32_e64
 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
-; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]]
-; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]]
+; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
+; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
 ; SI: v_cndmask_b32_e64
 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
 ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]]
+; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]]
+; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
 ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]],
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
@@ -110,15 +110,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
 ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
 ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
 ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
 ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
@@ -133,15 +133,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
 ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
 ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
 ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
 ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
@@ -257,83 +257,83 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3
 ; EG-DAG: CNDE_INT
 ; EG-DAG: CNDE_INT
 
-; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]]
-; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[FIRST_Quotient_A_One:v[0-9]+]], {{.*}}, [[FIRST_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[FIRST_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]]
-; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]]
-; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[SECOND_Quotient_A_One:v[0-9]+]], {{.*}}, [[SECOND_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[SECOND_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]]
-; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]]
-; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]]
-; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}}
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]]
-; SI-DAG: v_add_i32_e32 [[THIRD_Quotient_A_One:v[0-9]+]], {{.*}}, [[THIRD_Quotient]]
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Quotient_S_One:v[0-9]+]],
+; SI-DAG: v_and_b32_e32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_add_i32_e32 [[THIRD_Remainder_A_Den:v[0-9]+]],
-; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder_S_Den:v[0-9]+]],
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]]
-; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]]
-; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]]
+; SI-DAG: v_rcp_iflag_f32_e32
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_mul_lo_i32
+; SI-DAG: v_sub_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]]
-; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
-; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]]
+; SI-DAG: v_mul_hi_u32
+; SI-DAG: v_add_i32_e32
+; SI-DAG: v_subrev_i32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI: s_endpgm
 define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
diff --git a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index 6f608df5e9f5..65fe580792a5 100644
--- a/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -4,9 +4,9 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 
 ; SI-LABEL: {{^}}v_uint_to_fp_i64_to_f64
 ; SI: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
-; SI: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
-; SI: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
-; SI: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_cvt_f64_u32_e32 [[HI_CONV:v\[[0-9]+:[0-9]+\]]], v[[HI]]
+; SI-DAG: v_cvt_f64_u32_e32 [[LO_CONV:v\[[0-9]+:[0-9]+\]]], v[[LO]]
+; SI-DAG: v_ldexp_f64 [[LDEXP:v\[[0-9]+:[0-9]+\]]], [[HI_CONV]], 32
 ; SI: v_add_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[LDEXP]], [[LO_CONV]]
 ; SI: buffer_store_dwordx2 [[RESULT]]
 define void @v_uint_to_fp_i64_to_f64(double addrspace(1)* %out, i64 addrspace(1)* %in) {
diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll
index 8ab4faf2f145..d120111a71fb 100644
--- a/test/CodeGen/AMDGPU/unsupported-cc.ll
+++ b/test/CodeGen/AMDGPU/unsupported-cc.ll
@@ -3,8 +3,8 @@
 ; These tests are for condition codes that are not supported by the hardware
 
 ; CHECK-LABEL: {{^}}slt:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @slt(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -15,8 +15,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}ult_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT: 5(7.006492e-45)
 define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -40,8 +40,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}ult_float_native:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @ult_float_native(float addrspace(1)* %out, float %in) {
 entry:
@@ -52,8 +52,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}olt:
-; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @olt(float addrspace(1)* %out, float %in) {
 entry:
@@ -64,8 +64,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}sle:
-; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @sle(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -76,8 +76,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}ule_i32:
-; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK: LSHR
+; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT: 6(8.407791e-45)
 define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 entry:
@@ -101,8 +101,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}ule_float_native:
-; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
 ; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @ule_float_native(float addrspace(1)* %out, float %in) {
 entry:
@@ -113,8 +113,8 @@ entry:
 }
 
 ; CHECK-LABEL: {{^}}ole:
-; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR *
+; CHECK: LSHR
+; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 ; CHECK-NEXT:1084227584(5.000000e+00)
 define void @ole(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index f26f30022b4f..87b925a24a04 100644
--- a/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
 declare float @llvm.fma.f32(float, float, float) #1
+declare double @llvm.fma.f64(double, double, double) #1
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare i32 @llvm.AMDGPU.imad24(i32, i32, i32) #1
 
@@ -40,6 +41,32 @@ define void @test_sgpr_use_twice_ternary_op_a_a_b(float addrspace(1)* %out, floa
   ret void
 }
 
+; GCN-LABEL: {{^}}test_use_s_v_s:
+; GCN-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+
+; GCN: buffer_load_dword [[VA0:v[0-9]+]]
+; GCN-NOT: v_mov_b32
+; GCN: buffer_load_dword [[VA1:v[0-9]+]]
+
+; GCN-NOT: v_mov_b32
+; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]]
+; GCN-NOT: v_mov_b32
+
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VA0]], [[SA]], [[VB]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VA1]], [[SA]], [[VB]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+define void @test_use_s_v_s(float addrspace(1)* %out, float %a, float %b, float addrspace(1)* %in) #0 {
+  %va0 = load volatile float, float addrspace(1)* %in
+  %va1 = load volatile float, float addrspace(1)* %in
+  %fma0 = call float @llvm.fma.f32(float %a, float %va0, float %b) #1
+  %fma1 = call float @llvm.fma.f32(float %a, float %va1, float %b) #1
+  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma1, float addrspace(1)* %out
+  ret void
+}
+
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_b_a:
 ; SI: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
 ; SI: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
@@ -99,5 +126,145 @@ define void @test_sgpr_use_twice_ternary_op_imm_a_a(i32 addrspace(1)* %out, i32
   ret void
 }
 
+; GCN-LABEL: {{^}}test_sgpr_use_twice_ternary_op_a_a_kimm:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[SGPR]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_sgpr_use_twice_ternary_op_a_a_kimm(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float %a, float 1024.0) #1
+  store float %fma, float addrspace(1)* %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR]]
+; GCN: buffer_store_dword [[RESULT0]]
+define void @test_literal_use_twice_ternary_op_k_k_s(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
+  store float %fma, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_k_s_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[VK]], [[VK]], [[SGPR0]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[VK]], [[VK]], [[SGPR1]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_k_k_s_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma0 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %a) #1
+  %fma1 = call float @llvm.fma.f32(float 1024.0, float 1024.0, float %b) #1
+  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_literal_use_twice_ternary_op_k_s_k(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
+  store float %fma, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_k_s_k_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_k_s_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma0 = call float @llvm.fma.f32(float 1024.0, float %a, float 1024.0) #1
+  %fma1 = call float @llvm.fma.f32(float 1024.0, float %b, float 1024.0) #1
+  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k:
+; GCN-DAG: s_load_dword [[SGPR:s[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN: v_fma_f32 [[RESULT:v[0-9]+]], [[SGPR]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT]]
+define void @test_literal_use_twice_ternary_op_s_k_k(float addrspace(1)* %out, float %a) #0 {
+  %fma = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
+  store float %fma, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_literal_use_twice_ternary_op_s_k_k_x2:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x44800000
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VK]], [[VK]]
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR1]], [[VK]], [[VK]]
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+; GCN: s_endpgm
+define void @test_literal_use_twice_ternary_op_s_k_k_x2(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma0 = call float @llvm.fma.f32(float %a, float 1024.0, float 1024.0) #1
+  %fma1 = call float @llvm.fma.f32(float %b, float 1024.0, float 1024.0) #1
+  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma1, float addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_s0_s1_k_f32:
+; GCN-DAG: s_load_dword [[SGPR0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dword [[SGPR1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xc|0x30}}
+; GCN-DAG: v_mov_b32_e32 [[VK0:v[0-9]+]], 0x44800000
+; GCN-DAG: v_mov_b32_e32 [[VS1:v[0-9]+]], [[SGPR1]]
+
+; GCN-DAG: v_fma_f32 [[RESULT0:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK0]]
+; GCN-DAG: v_mov_b32_e32 [[VK1:v[0-9]+]], 0x45800000
+; GCN-DAG: v_fma_f32 [[RESULT1:v[0-9]+]], [[SGPR0]], [[VS1]], [[VK1]]
+
+; GCN: buffer_store_dword [[RESULT0]]
+; GCN: buffer_store_dword [[RESULT1]]
+define void @test_s0_s1_k_f32(float addrspace(1)* %out, float %a, float %b) #0 {
+  %fma0 = call float @llvm.fma.f32(float %a, float %b, float 1024.0) #1
+  %fma1 = call float @llvm.fma.f32(float %a, float %b, float 4096.0) #1
+  store volatile float %fma0, float addrspace(1)* %out
+  store volatile float %fma1, float addrspace(1)* %out
+  ret void
+}
+
+; FIXME: Immediate in SGPRs just copied to VGPRs
+; GCN-LABEL: {{^}}test_s0_s1_k_f64:
+; GCN-DAG: s_load_dwordx2 [[SGPR0:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; GCN-DAG: s_load_dwordx2 s{{\[}}[[SGPR1_SUB0:[0-9]+]]:[[SGPR1_SUB1:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
+; GCN-DAG: v_mov_b32_e32 v[[VK0_SUB1:[0-9]+]], 0x40900000
+; GCN-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0{{$}}
+
+; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB0:[0-9]+]], s[[SGPR1_SUB0]]
+; GCN-DAG: v_mov_b32_e32 v[[VS1_SUB1:[0-9]+]], s[[SGPR1_SUB1]]
+; GCN: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK0_SUB1]]{{\]}}
+
+; Same zero component is re-used for half of each immediate.
+; GCN: v_mov_b32_e32 v[[VK1_SUB1:[0-9]+]], 0x40b00000
+; GCN: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[VS1_SUB0]]:[[VS1_SUB1]]{{\]}}, [[SGPR0]], v{{\[}}[[VZERO]]:[[VK1_SUB1]]{{\]}}
+
+; GCN: buffer_store_dwordx2 [[RESULT0]]
+; GCN: buffer_store_dwordx2 [[RESULT1]]
+define void @test_s0_s1_k_f64(double addrspace(1)* %out, double %a, double %b) #0 {
+  %fma0 = call double @llvm.fma.f64(double %a, double %b, double 1024.0) #1
+  %fma1 = call double @llvm.fma.f64(double %a, double %b, double 4096.0) #1
+  store volatile double %fma0, double addrspace(1)* %out
+  store volatile double %fma1, double addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll
index 7d0ebd139f51..1cbefba60c95 100644
--- a/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/test/CodeGen/AMDGPU/valu-i1.ll
@@ -78,8 +78,8 @@ exit:
 
 ; SI: BB2_3:
 ; SI: buffer_load_dword
-; SI: buffer_store_dword
-; SI: v_cmp_eq_i32_e32 vcc,
+; SI-DAG: buffer_store_dword
+; SI-DAG: v_cmp_eq_i32_e32 vcc,
 ; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
 ; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
 ; SI: s_cbranch_execnz BB2_3
@@ -128,18 +128,18 @@ exit:
 ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
 ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
 ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
-; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
-; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
 ; SI: s_cbranch_execz BB3_5
 
 ; SI: BB#4:
 ; SI: buffer_store_dword
-; SI: v_cmp_ge_i64_e32 vcc
-; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]]
+; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]]
 
 ; SI: BB3_5:
-; SI: s_or_b64 exec, exec, [[ORNEG1]]
-; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_or_b64 exec, exec, [[ORNEG2]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]]
 ; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
 ; SI: s_cbranch_execnz BB3_3
 
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
new file mode 100644
index 000000000000..cd7c78f408dd
--- /dev/null
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -0,0 +1,585 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; XUN: llc -march=amdgcn -mcpu=hawaii -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CIHSA %s
+; XUN: llc -march=amdgcn -mcpu=fiji -mtriple=amdgcn-unknown-amdhsa -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VIHSA %s
+
+; This ends up using all 256 registers and requires register
+; scavenging which will fail to find an unsued register.
+
+; Check the ScratchSize to avoid regressions from spilling
+; intermediate register class copies.
+
+; FIXME: The same register is initialized to 0 for every spill.
+
+declare i32 @llvm.r600.read.tgid.x() #1
+declare i32 @llvm.r600.read.tgid.y() #1
+declare i32 @llvm.r600.read.tgid.z() #1
+
+; GCN-LABEL: {{^}}spill_vgpr_compute:
+
+; GCN: s_mov_b32 s16, s3
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s14, -1
+; SI-NEXT: s_mov_b32 s15, 0x80f000
+; VI-NEXT: s_mov_b32 s15, 0x800000
+
+
+; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
+
+; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+; GCN: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[12:15], s16 offen offset:{{[0-9]+}}
+
+; GCN: NumVgprs: 256
+; GCN: ScratchSize: 1024
+
+; s[0:3] input user SGPRs. s4,s5,s6 = workgroup IDs. s8 scratch offset.
+define void @spill_vgpr_compute(<4 x float> %arg6, float addrspace(1)* %arg, i32 %arg1, i32 %arg2, float %arg3, float %arg4, float %arg5) #0 {
+bb:
+  %tmp = add i32 %arg1, %arg2
+  %tmp7 = extractelement <4 x float> %arg6, i32 0
+  %tmp8 = extractelement <4 x float> %arg6, i32 1
+  %tmp9 = extractelement <4 x float> %arg6, i32 2
+  %tmp10 = extractelement <4 x float> %arg6, i32 3
+  %tmp11 = bitcast float %arg5 to i32
+  br label %bb12
+
+bb12:                                             ; preds = %bb145, %bb
+  %tmp13 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb145 ]
+  %tmp14 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb145 ]
+  %tmp15 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb145 ]
+  %tmp16 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb145 ]
+  %tmp17 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb145 ]
+  %tmp18 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb145 ]
+  %tmp19 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb145 ]
+  %tmp20 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb145 ]
+  %tmp21 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb145 ]
+  %tmp22 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb145 ]
+  %tmp23 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb145 ]
+  %tmp24 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb145 ]
+  %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb145 ]
+  %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb145 ]
+  %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb145 ]
+  %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb145 ]
+  %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb145 ]
+  %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb145 ]
+  %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb145 ]
+  %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb145 ]
+  %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb145 ]
+  %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb145 ]
+  %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb145 ]
+  %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb145 ]
+  %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb145 ]
+  %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb145 ]
+  %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb145 ]
+  %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb145 ]
+  %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb145 ]
+  %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb145 ]
+  %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb145 ]
+  %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb145 ]
+  %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb145 ]
+  %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb145 ]
+  %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb145 ]
+  %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb145 ]
+  %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb145 ]
+  %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb145 ]
+  %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb145 ]
+  %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb145 ]
+  %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb145 ]
+  %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb145 ]
+  %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb145 ]
+  %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb145 ]
+  %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb145 ]
+  %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb145 ]
+  %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb145 ]
+  %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb145 ]
+  %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb145 ]
+  %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb145 ]
+  %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb145 ]
+  %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb145 ]
+  %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb145 ]
+  %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb145 ]
+  %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb145 ]
+  %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb145 ]
+  %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb145 ]
+  %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb145 ]
+  %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb145 ]
+  %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb145 ]
+  %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb145 ]
+  %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb145 ]
+  %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb145 ]
+  %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb145 ]
+  %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp290, %bb145 ]
+  %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp289, %bb145 ]
+  %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp288, %bb145 ]
+  %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb145 ]
+  %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp287, %bb145 ]
+  %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp286, %bb145 ]
+  %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp285, %bb145 ]
+  %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb145 ]
+  %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp284, %bb145 ]
+  %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp283, %bb145 ]
+  %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp282, %bb145 ]
+  %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb145 ]
+  %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp281, %bb145 ]
+  %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp280, %bb145 ]
+  %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp279, %bb145 ]
+  %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb145 ]
+  %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb145 ]
+  %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb145 ]
+  %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb145 ]
+  %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb145 ]
+  %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb145 ]
+  %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb145 ]
+  %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb145 ]
+  %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb145 ]
+  %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb145 ]
+  %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb145 ]
+  %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb145 ]
+  %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb145 ]
+  %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb145 ]
+  %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb145 ]
+  %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb145 ]
+  %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb145 ]
+  %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb145 ]
+  %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb145 ]
+  %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb145 ]
+  %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb145 ]
+  %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb145 ]
+  %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb145 ]
+  %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb145 ]
+  %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb145 ]
+  %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb145 ]
+  %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb145 ]
+  %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb145 ]
+  %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb145 ]
+  %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb145 ]
+  %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb145 ]
+  %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb145 ]
+  %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb145 ]
+  %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb145 ]
+  %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb145 ]
+  %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb145 ]
+  %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb145 ]
+  %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb145 ]
+  %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb145 ]
+  %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb145 ]
+  %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb145 ]
+  %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb145 ]
+  %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb145 ]
+  %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb145 ]
+  %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb145 ]
+  %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb145 ]
+  %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb145 ]
+  %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb145 ]
+  %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb145 ]
+  %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb145 ]
+  %tmp142 = bitcast float %tmp95 to i32
+  %tmp143 = icmp sgt i32 %tmp142, 125
+  br i1 %tmp143, label %bb144, label %bb145
+
+bb144:                                            ; preds = %bb12
+  store volatile float %arg3, float addrspace(1)* %arg
+  store volatile float %tmp91, float addrspace(1)* %arg
+  store volatile float %tmp90, float addrspace(1)* %arg
+  store volatile float %tmp89, float addrspace(1)* %arg
+  store volatile float %tmp87, float addrspace(1)* %arg
+  store volatile float %tmp86, float addrspace(1)* %arg
+  store volatile float %tmp85, float addrspace(1)* %arg
+  store volatile float %tmp83, float addrspace(1)* %arg
+  store volatile float %tmp82, float addrspace(1)* %arg
+  store volatile float %tmp81, float addrspace(1)* %arg
+  store volatile float %tmp79, float addrspace(1)* %arg
+  store volatile float %tmp78, float addrspace(1)* %arg
+  store volatile float %tmp77, float addrspace(1)* %arg
+  store volatile float %tmp75, float addrspace(1)* %arg
+  store volatile float %tmp74, float addrspace(1)* %arg
+  store volatile float %tmp73, float addrspace(1)* %arg
+  store volatile float %tmp71, float addrspace(1)* %arg
+  store volatile float %tmp70, float addrspace(1)* %arg
+  store volatile float %tmp69, float addrspace(1)* %arg
+  store volatile float %tmp67, float addrspace(1)* %arg
+  store volatile float %tmp66, float addrspace(1)* %arg
+  store volatile float %tmp65, float addrspace(1)* %arg
+  store volatile float %tmp63, float addrspace(1)* %arg
+  store volatile float %tmp62, float addrspace(1)* %arg
+  store volatile float %tmp61, float addrspace(1)* %arg
+  store volatile float %tmp59, float addrspace(1)* %arg
+  store volatile float %tmp58, float addrspace(1)* %arg
+  store volatile float %tmp57, float addrspace(1)* %arg
+  store volatile float %tmp55, float addrspace(1)* %arg
+  store volatile float %tmp54, float addrspace(1)* %arg
+  store volatile float %tmp53, float addrspace(1)* %arg
+  store volatile float %tmp51, float addrspace(1)* %arg
+  store volatile float %tmp50, float addrspace(1)* %arg
+  store volatile float %tmp49, float addrspace(1)* %arg
+  store volatile float %tmp47, float addrspace(1)* %arg
+  store volatile float %tmp46, float addrspace(1)* %arg
+  store volatile float %tmp45, float addrspace(1)* %arg
+  store volatile float %tmp43, float addrspace(1)* %arg
+  store volatile float %tmp42, float addrspace(1)* %arg
+  store volatile float %tmp41, float addrspace(1)* %arg
+  store volatile float %tmp39, float addrspace(1)* %arg
+  store volatile float %tmp38, float addrspace(1)* %arg
+  store volatile float %tmp37, float addrspace(1)* %arg
+  store volatile float %tmp35, float addrspace(1)* %arg
+  store volatile float %tmp34, float addrspace(1)* %arg
+  store volatile float %tmp33, float addrspace(1)* %arg
+  store volatile float %tmp31, float addrspace(1)* %arg
+  store volatile float %tmp30, float addrspace(1)* %arg
+  store volatile float %tmp29, float addrspace(1)* %arg
+  store volatile float %tmp27, float addrspace(1)* %arg
+  store volatile float %tmp26, float addrspace(1)* %arg
+  store volatile float %tmp25, float addrspace(1)* %arg
+  store volatile float %tmp23, float addrspace(1)* %arg
+  store volatile float %tmp22, float addrspace(1)* %arg
+  store volatile float %tmp21, float addrspace(1)* %arg
+  store volatile float %tmp19, float addrspace(1)* %arg
+  store volatile float %tmp18, float addrspace(1)* %arg
+  store volatile float %tmp17, float addrspace(1)* %arg
+  store volatile float %tmp15, float addrspace(1)* %arg
+  store volatile float %tmp14, float addrspace(1)* %arg
+  store volatile float %tmp13, float addrspace(1)* %arg
+  store volatile float %tmp16, float addrspace(1)* %arg
+  store volatile float %tmp20, float addrspace(1)* %arg
+  store volatile float %tmp24, float addrspace(1)* %arg
+  store volatile float %tmp28, float addrspace(1)* %arg
+  store volatile float %tmp32, float addrspace(1)* %arg
+  store volatile float %tmp36, float addrspace(1)* %arg
+  store volatile float %tmp40, float addrspace(1)* %arg
+  store volatile float %tmp44, float addrspace(1)* %arg
+  store volatile float %tmp48, float addrspace(1)* %arg
+  store volatile float %tmp52, float addrspace(1)* %arg
+  store volatile float %tmp56, float addrspace(1)* %arg
+  store volatile float %tmp60, float addrspace(1)* %arg
+  store volatile float %tmp64, float addrspace(1)* %arg
+  store volatile float %tmp68, float addrspace(1)* %arg
+  store volatile float %tmp72, float addrspace(1)* %arg
+  store volatile float %tmp76, float addrspace(1)* %arg
+  store volatile float %tmp80, float addrspace(1)* %arg
+  store volatile float %tmp84, float addrspace(1)* %arg
+  store volatile float %tmp88, float addrspace(1)* %arg
+  store volatile float %tmp92, float addrspace(1)* %arg
+  store volatile float %tmp93, float addrspace(1)* %arg
+  store volatile float %tmp94, float addrspace(1)* %arg
+  store volatile float %tmp96, float addrspace(1)* %arg
+  store volatile float %tmp97, float addrspace(1)* %arg
+  store volatile float %tmp98, float addrspace(1)* %arg
+  store volatile float %tmp99, float addrspace(1)* %arg
+  store volatile float %tmp100, float addrspace(1)* %arg
+  store volatile float %tmp101, float addrspace(1)* %arg
+  store volatile float %tmp102, float addrspace(1)* %arg
+  store volatile float %tmp103, float addrspace(1)* %arg
+  store volatile float %tmp104, float addrspace(1)* %arg
+  store volatile float %tmp105, float addrspace(1)* %arg
+  store volatile float %tmp106, float addrspace(1)* %arg
+  store volatile float %tmp107, float addrspace(1)* %arg
+  store volatile float %tmp108, float addrspace(1)* %arg
+  store volatile float %tmp109, float addrspace(1)* %arg
+  store volatile float %tmp110, float addrspace(1)* %arg
+  store volatile float %tmp111, float addrspace(1)* %arg
+  store volatile float %tmp112, float addrspace(1)* %arg
+  store volatile float %tmp113, float addrspace(1)* %arg
+  store volatile float %tmp114, float addrspace(1)* %arg
+  store volatile float %tmp115, float addrspace(1)* %arg
+  store volatile float %tmp116, float addrspace(1)* %arg
+  store volatile float %tmp117, float addrspace(1)* %arg
+  store volatile float %tmp118, float addrspace(1)* %arg
+  store volatile float %tmp119, float addrspace(1)* %arg
+  store volatile float %tmp120, float addrspace(1)* %arg
+  store volatile float %tmp121, float addrspace(1)* %arg
+  store volatile float %tmp122, float addrspace(1)* %arg
+  store volatile float %tmp123, float addrspace(1)* %arg
+  store volatile float %tmp124, float addrspace(1)* %arg
+  store volatile float %tmp125, float addrspace(1)* %arg
+  store volatile float %tmp126, float addrspace(1)* %arg
+  store volatile float %tmp127, float addrspace(1)* %arg
+  store volatile float %tmp128, float addrspace(1)* %arg
+  store volatile float %tmp129, float addrspace(1)* %arg
+  store volatile float %tmp130, float addrspace(1)* %arg
+  store volatile float %tmp131, float addrspace(1)* %arg
+  store volatile float %tmp132, float addrspace(1)* %arg
+  store volatile float %tmp133, float addrspace(1)* %arg
+  store volatile float %tmp134, float addrspace(1)* %arg
+  store volatile float %tmp135, float addrspace(1)* %arg
+  store volatile float %tmp136, float addrspace(1)* %arg
+  store volatile float %tmp137, float addrspace(1)* %arg
+  store volatile float %tmp138, float addrspace(1)* %arg
+  store volatile float %tmp139, float addrspace(1)* %arg
+  store volatile float %arg4, float addrspace(1)* %arg
+  store volatile float %tmp7, float addrspace(1)* %arg
+  store volatile float %tmp8, float addrspace(1)* %arg
+  store volatile float %tmp9, float addrspace(1)* %arg
+  store volatile float %tmp10, float addrspace(1)* %arg
+  ret void
+
+bb145:                                            ; preds = %bb12
+  %tmp146 = bitcast float %tmp95 to i32
+  %tmp147 = bitcast float %tmp95 to i32
+  %tmp148 = add i32 %tmp11, %tmp147
+  %tmp149 = bitcast i32 %tmp148 to float
+  %tmp150 = insertelement <128 x float> undef, float %tmp91, i32 0
+  %tmp151 = insertelement <128 x float> %tmp150, float %tmp90, i32 1
+  %tmp152 = insertelement <128 x float> %tmp151, float %tmp89, i32 2
+  %tmp153 = insertelement <128 x float> %tmp152, float %tmp87, i32 3
+  %tmp154 = insertelement <128 x float> %tmp153, float %tmp86, i32 4
+  %tmp155 = insertelement <128 x float> %tmp154, float %tmp85, i32 5
+  %tmp156 = insertelement <128 x float> %tmp155, float %tmp83, i32 6
+  %tmp157 = insertelement <128 x float> %tmp156, float %tmp82, i32 7
+  %tmp158 = insertelement <128 x float> %tmp157, float %tmp81, i32 8
+  %tmp159 = insertelement <128 x float> %tmp158, float %tmp79, i32 9
+  %tmp160 = insertelement <128 x float> %tmp159, float %tmp78, i32 10
+  %tmp161 = insertelement <128 x float> %tmp160, float %tmp77, i32 11
+  %tmp162 = insertelement <128 x float> %tmp161, float %tmp75, i32 12
+  %tmp163 = insertelement <128 x float> %tmp162, float %tmp74, i32 13
+  %tmp164 = insertelement <128 x float> %tmp163, float %tmp73, i32 14
+  %tmp165 = insertelement <128 x float> %tmp164, float %tmp71, i32 15
+  %tmp166 = insertelement <128 x float> %tmp165, float %tmp70, i32 16
+  %tmp167 = insertelement <128 x float> %tmp166, float %tmp69, i32 17
+  %tmp168 = insertelement <128 x float> %tmp167, float %tmp67, i32 18
+  %tmp169 = insertelement <128 x float> %tmp168, float %tmp66, i32 19
+  %tmp170 = insertelement <128 x float> %tmp169, float %tmp65, i32 20
+  %tmp171 = insertelement <128 x float> %tmp170, float %tmp63, i32 21
+  %tmp172 = insertelement <128 x float> %tmp171, float %tmp62, i32 22
+  %tmp173 = insertelement <128 x float> %tmp172, float %tmp61, i32 23
+  %tmp174 = insertelement <128 x float> %tmp173, float %tmp59, i32 24
+  %tmp175 = insertelement <128 x float> %tmp174, float %tmp58, i32 25
+  %tmp176 = insertelement <128 x float> %tmp175, float %tmp57, i32 26
+  %tmp177 = insertelement <128 x float> %tmp176, float %tmp55, i32 27
+  %tmp178 = insertelement <128 x float> %tmp177, float %tmp54, i32 28
+  %tmp179 = insertelement <128 x float> %tmp178, float %tmp53, i32 29
+  %tmp180 = insertelement <128 x float> %tmp179, float %tmp51, i32 30
+  %tmp181 = insertelement <128 x float> %tmp180, float %tmp50, i32 31
+  %tmp182 = insertelement <128 x float> %tmp181, float %tmp49, i32 32
+  %tmp183 = insertelement <128 x float> %tmp182, float %tmp47, i32 33
+  %tmp184 = insertelement <128 x float> %tmp183, float %tmp46, i32 34
+  %tmp185 = insertelement <128 x float> %tmp184, float %tmp45, i32 35
+  %tmp186 = insertelement <128 x float> %tmp185, float %tmp43, i32 36
+  %tmp187 = insertelement <128 x float> %tmp186, float %tmp42, i32 37
+  %tmp188 = insertelement <128 x float> %tmp187, float %tmp41, i32 38
+  %tmp189 = insertelement <128 x float> %tmp188, float %tmp39, i32 39
+  %tmp190 = insertelement <128 x float> %tmp189, float %tmp38, i32 40
+  %tmp191 = insertelement <128 x float> %tmp190, float %tmp37, i32 41
+  %tmp192 = insertelement <128 x float> %tmp191, float %tmp35, i32 42
+  %tmp193 = insertelement <128 x float> %tmp192, float %tmp34, i32 43
+  %tmp194 = insertelement <128 x float> %tmp193, float %tmp33, i32 44
+  %tmp195 = insertelement <128 x float> %tmp194, float %tmp31, i32 45
+  %tmp196 = insertelement <128 x float> %tmp195, float %tmp30, i32 46
+  %tmp197 = insertelement <128 x float> %tmp196, float %tmp29, i32 47
+  %tmp198 = insertelement <128 x float> %tmp197, float %tmp27, i32 48
+  %tmp199 = insertelement <128 x float> %tmp198, float %tmp26, i32 49
+  %tmp200 = insertelement <128 x float> %tmp199, float %tmp25, i32 50
+  %tmp201 = insertelement <128 x float> %tmp200, float %tmp23, i32 51
+  %tmp202 = insertelement <128 x float> %tmp201, float %tmp22, i32 52
+  %tmp203 = insertelement <128 x float> %tmp202, float %tmp21, i32 53
+  %tmp204 = insertelement <128 x float> %tmp203, float %tmp19, i32 54
+  %tmp205 = insertelement <128 x float> %tmp204, float %tmp18, i32 55
+  %tmp206 = insertelement <128 x float> %tmp205, float %tmp17, i32 56
+  %tmp207 = insertelement <128 x float> %tmp206, float %tmp15, i32 57
+  %tmp208 = insertelement <128 x float> %tmp207, float %tmp14, i32 58
+  %tmp209 = insertelement <128 x float> %tmp208, float %tmp13, i32 59
+  %tmp210 = insertelement <128 x float> %tmp209, float %tmp16, i32 60
+  %tmp211 = insertelement <128 x float> %tmp210, float %tmp20, i32 61
+  %tmp212 = insertelement <128 x float> %tmp211, float %tmp24, i32 62
+  %tmp213 = insertelement <128 x float> %tmp212, float %tmp28, i32 63
+  %tmp214 = insertelement <128 x float> %tmp213, float %tmp32, i32 64
+  %tmp215 = insertelement <128 x float> %tmp214, float %tmp36, i32 65
+  %tmp216 = insertelement <128 x float> %tmp215, float %tmp40, i32 66
+  %tmp217 = insertelement <128 x float> %tmp216, float %tmp44, i32 67
+  %tmp218 = insertelement <128 x float> %tmp217, float %tmp48, i32 68
+  %tmp219 = insertelement <128 x float> %tmp218, float %tmp52, i32 69
+  %tmp220 = insertelement <128 x float> %tmp219, float %tmp56, i32 70
+  %tmp221 = insertelement <128 x float> %tmp220, float %tmp60, i32 71
+  %tmp222 = insertelement <128 x float> %tmp221, float %tmp64, i32 72
+  %tmp223 = insertelement <128 x float> %tmp222, float %tmp68, i32 73
+  %tmp224 = insertelement <128 x float> %tmp223, float %tmp72, i32 74
+  %tmp225 = insertelement <128 x float> %tmp224, float %tmp76, i32 75
+  %tmp226 = insertelement <128 x float> %tmp225, float %tmp80, i32 76
+  %tmp227 = insertelement <128 x float> %tmp226, float %tmp84, i32 77
+  %tmp228 = insertelement <128 x float> %tmp227, float %tmp88, i32 78
+  %tmp229 = insertelement <128 x float> %tmp228, float %tmp92, i32 79
+  %tmp230 = insertelement <128 x float> %tmp229, float %tmp93, i32 80
+  %tmp231 = insertelement <128 x float> %tmp230, float %tmp94, i32 81
+  %tmp232 = insertelement <128 x float> %tmp231, float %tmp96, i32 82
+  %tmp233 = insertelement <128 x float> %tmp232, float %tmp97, i32 83
+  %tmp234 = insertelement <128 x float> %tmp233, float %tmp98, i32 84
+  %tmp235 = insertelement <128 x float> %tmp234, float %tmp99, i32 85
+  %tmp236 = insertelement <128 x float> %tmp235, float %tmp100, i32 86
+  %tmp237 = insertelement <128 x float> %tmp236, float %tmp101, i32 87
+  %tmp238 = insertelement <128 x float> %tmp237, float %tmp102, i32 88
+  %tmp239 = insertelement <128 x float> %tmp238, float %tmp103, i32 89
+  %tmp240 = insertelement <128 x float> %tmp239, float %tmp104, i32 90
+  %tmp241 = insertelement <128 x float> %tmp240, float %tmp105, i32 91
+  %tmp242 = insertelement <128 x float> %tmp241, float %tmp106, i32 92
+  %tmp243 = insertelement <128 x float> %tmp242, float %tmp107, i32 93
+  %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 94
+  %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 95
+  %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 96
+  %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 97
+  %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 98
+  %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 99
+  %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 100
+  %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 101
+  %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 102
+  %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 103
+  %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 104
+  %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 105
+  %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 106
+  %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 107
+  %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 108
+  %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 109
+  %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 110
+  %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 111
+  %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 112
+  %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 113
+  %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 114
+  %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 115
+  %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 116
+  %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 117
+  %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 118
+  %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 119
+  %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 120
+  %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 121
+  %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 122
+  %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 123
+  %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 124
+  %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 125
+  %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 126
+  %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 127
+  %tmp278 = insertelement <128 x float> %tmp277, float %tmp149, i32 %tmp146
+  %tmp279 = extractelement <128 x float> %tmp278, i32 0
+  %tmp280 = extractelement <128 x float> %tmp278, i32 1
+  %tmp281 = extractelement <128 x float> %tmp278, i32 2
+  %tmp282 = extractelement <128 x float> %tmp278, i32 3
+  %tmp283 = extractelement <128 x float> %tmp278, i32 4
+  %tmp284 = extractelement <128 x float> %tmp278, i32 5
+  %tmp285 = extractelement <128 x float> %tmp278, i32 6
+  %tmp286 = extractelement <128 x float> %tmp278, i32 7
+  %tmp287 = extractelement <128 x float> %tmp278, i32 8
+  %tmp288 = extractelement <128 x float> %tmp278, i32 9
+  %tmp289 = extractelement <128 x float> %tmp278, i32 10
+  %tmp290 = extractelement <128 x float> %tmp278, i32 11
+  %tmp291 = extractelement <128 x float> %tmp278, i32 12
+  %tmp292 = extractelement <128 x float> %tmp278, i32 13
+  %tmp293 = extractelement <128 x float> %tmp278, i32 14
+  %tmp294 = extractelement <128 x float> %tmp278, i32 15
+  %tmp295 = extractelement <128 x float> %tmp278, i32 16
+  %tmp296 = extractelement <128 x float> %tmp278, i32 17
+  %tmp297 = extractelement <128 x float> %tmp278, i32 18
+  %tmp298 = extractelement <128 x float> %tmp278, i32 19
+  %tmp299 = extractelement <128 x float> %tmp278, i32 20
+  %tmp300 = extractelement <128 x float> %tmp278, i32 21
+  %tmp301 = extractelement <128 x float> %tmp278, i32 22
+  %tmp302 = extractelement <128 x float> %tmp278, i32 23
+  %tmp303 = extractelement <128 x float> %tmp278, i32 24
+  %tmp304 = extractelement <128 x float> %tmp278, i32 25
+  %tmp305 = extractelement <128 x float> %tmp278, i32 26
+  %tmp306 = extractelement <128 x float> %tmp278, i32 27
+  %tmp307 = extractelement <128 x float> %tmp278, i32 28
+  %tmp308 = extractelement <128 x float> %tmp278, i32 29
+  %tmp309 = extractelement <128 x float> %tmp278, i32 30
+  %tmp310 = extractelement <128 x float> %tmp278, i32 31
+  %tmp311 = extractelement <128 x float> %tmp278, i32 32
+  %tmp312 = extractelement <128 x float> %tmp278, i32 33
+  %tmp313 = extractelement <128 x float> %tmp278, i32 34
+  %tmp314 = extractelement <128 x float> %tmp278, i32 35
+  %tmp315 = extractelement <128 x float> %tmp278, i32 36
+  %tmp316 = extractelement <128 x float> %tmp278, i32 37
+  %tmp317 = extractelement <128 x float> %tmp278, i32 38
+  %tmp318 = extractelement <128 x float> %tmp278, i32 39
+  %tmp319 = extractelement <128 x float> %tmp278, i32 40
+  %tmp320 = extractelement <128 x float> %tmp278, i32 41
+  %tmp321 = extractelement <128 x float> %tmp278, i32 42
+  %tmp322 = extractelement <128 x float> %tmp278, i32 43
+  %tmp323 = extractelement <128 x float> %tmp278, i32 44
+  %tmp324 = extractelement <128 x float> %tmp278, i32 45
+  %tmp325 = extractelement <128 x float> %tmp278, i32 46
+  %tmp326 = extractelement <128 x float> %tmp278, i32 47
+  %tmp327 = extractelement <128 x float> %tmp278, i32 48
+  %tmp328 = extractelement <128 x float> %tmp278, i32 49
+  %tmp329 = extractelement <128 x float> %tmp278, i32 50
+  %tmp330 = extractelement <128 x float> %tmp278, i32 51
+  %tmp331 = extractelement <128 x float> %tmp278, i32 52
+  %tmp332 = extractelement <128 x float> %tmp278, i32 53
+  %tmp333 = extractelement <128 x float> %tmp278, i32 54
+  %tmp334 = extractelement <128 x float> %tmp278, i32 55
+  %tmp335 = extractelement <128 x float> %tmp278, i32 56
+  %tmp336 = extractelement <128 x float> %tmp278, i32 57
+  %tmp337 = extractelement <128 x float> %tmp278, i32 58
+  %tmp338 = extractelement <128 x float> %tmp278, i32 59
+  %tmp339 = extractelement <128 x float> %tmp278, i32 60
+  %tmp340 = extractelement <128 x float> %tmp278, i32 61
+  %tmp341 = extractelement <128 x float> %tmp278, i32 62
+  %tmp342 = extractelement <128 x float> %tmp278, i32 63
+  %tmp343 = extractelement <128 x float> %tmp278, i32 64
+  %tmp344 = extractelement <128 x float> %tmp278, i32 65
+  %tmp345 = extractelement <128 x float> %tmp278, i32 66
+  %tmp346 = extractelement <128 x float> %tmp278, i32 67
+  %tmp347 = extractelement <128 x float> %tmp278, i32 68
+  %tmp348 = extractelement <128 x float> %tmp278, i32 69
+  %tmp349 = extractelement <128 x float> %tmp278, i32 70
+  %tmp350 = extractelement <128 x float> %tmp278, i32 71
+  %tmp351 = extractelement <128 x float> %tmp278, i32 72
+  %tmp352 = extractelement <128 x float> %tmp278, i32 73
+  %tmp353 = extractelement <128 x float> %tmp278, i32 74
+  %tmp354 = extractelement <128 x float> %tmp278, i32 75
+  %tmp355 = extractelement <128 x float> %tmp278, i32 76
+  %tmp356 = extractelement <128 x float> %tmp278, i32 77
+  %tmp357 = extractelement <128 x float> %tmp278, i32 78
+  %tmp358 = extractelement <128 x float> %tmp278, i32 79
+  %tmp359 = extractelement <128 x float> %tmp278, i32 80
+  %tmp360 = extractelement <128 x float> %tmp278, i32 81
+  %tmp361 = extractelement <128 x float> %tmp278, i32 82
+  %tmp362 = extractelement <128 x float> %tmp278, i32 83
+  %tmp363 = extractelement <128 x float> %tmp278, i32 84
+  %tmp364 = extractelement <128 x float> %tmp278, i32 85
+  %tmp365 = extractelement <128 x float> %tmp278, i32 86
+  %tmp366 = extractelement <128 x float> %tmp278, i32 87
+  %tmp367 = extractelement <128 x float> %tmp278, i32 88
+  %tmp368 = extractelement <128 x float> %tmp278, i32 89
+  %tmp369 = extractelement <128 x float> %tmp278, i32 90
+  %tmp370 = extractelement <128 x float> %tmp278, i32 91
+  %tmp371 = extractelement <128 x float> %tmp278, i32 92
+  %tmp372 = extractelement <128 x float> %tmp278, i32 93
+  %tmp373 = extractelement <128 x float> %tmp278, i32 94
+  %tmp374 = extractelement <128 x float> %tmp278, i32 95
+  %tmp375 = extractelement <128 x float> %tmp278, i32 96
+  %tmp376 = extractelement <128 x float> %tmp278, i32 97
+  %tmp377 = extractelement <128 x float> %tmp278, i32 98
+  %tmp378 = extractelement <128 x float> %tmp278, i32 99
+  %tmp379 = extractelement <128 x float> %tmp278, i32 100
+  %tmp380 = extractelement <128 x float> %tmp278, i32 101
+  %tmp381 = extractelement <128 x float> %tmp278, i32 102
+  %tmp382 = extractelement <128 x float> %tmp278, i32 103
+  %tmp383 = extractelement <128 x float> %tmp278, i32 104
+  %tmp384 = extractelement <128 x float> %tmp278, i32 105
+  %tmp385 = extractelement <128 x float> %tmp278, i32 106
+  %tmp386 = extractelement <128 x float> %tmp278, i32 107
+  %tmp387 = extractelement <128 x float> %tmp278, i32 108
+  %tmp388 = extractelement <128 x float> %tmp278, i32 109
+  %tmp389 = extractelement <128 x float> %tmp278, i32 110
+  %tmp390 = extractelement <128 x float> %tmp278, i32 111
+  %tmp391 = extractelement <128 x float> %tmp278, i32 112
+  %tmp392 = extractelement <128 x float> %tmp278, i32 113
+  %tmp393 = extractelement <128 x float> %tmp278, i32 114
+  %tmp394 = extractelement <128 x float> %tmp278, i32 115
+  %tmp395 = extractelement <128 x float> %tmp278, i32 116
+  %tmp396 = extractelement <128 x float> %tmp278, i32 117
+  %tmp397 = extractelement <128 x float> %tmp278, i32 118
+  %tmp398 = extractelement <128 x float> %tmp278, i32 119
+  %tmp399 = extractelement <128 x float> %tmp278, i32 120
+  %tmp400 = extractelement <128 x float> %tmp278, i32 121
+  %tmp401 = extractelement <128 x float> %tmp278, i32 122
+  %tmp402 = extractelement <128 x float> %tmp278, i32 123
+  %tmp403 = extractelement <128 x float> %tmp278, i32 124
+  %tmp404 = extractelement <128 x float> %tmp278, i32 125
+  %tmp405 = extractelement <128 x float> %tmp278, i32 126
+  %tmp406 = extractelement <128 x float> %tmp278, i32 127
+  %tmp407 = bitcast float %tmp95 to i32
+  %tmp408 = add i32 %tmp407, 1
+  %tmp409 = bitcast i32 %tmp408 to float
+  br label %bb12
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
new file mode 100644
index 000000000000..16abb89bb0b8
--- /dev/null
+++ b/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -0,0 +1,494 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+vgpr-spilling -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; This ends up using all 255 registers and requires register
+; scavenging which will fail to find an unsued register.
+
+; Check the ScratchSize to avoid regressions from spilling
+; intermediate register class copies.
+
+; FIXME: The same register is initialized to 0 for every spill.
+
+; GCN-LABEL: {{^}}main:
+
+; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s14, -1
+; SI-NEXT: s_mov_b32 s15, 0x80f000
+; VI-NEXT: s_mov_b32 s15, 0x800000
+
+; s12 is offset user SGPR
+; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
+
+; GCN: NumVgprs: 256
+; GCN: ScratchSize: 1024
+
+define void @main([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <4 x i32>] addrspace(2)* byval %arg2, [34 x <8 x i32>] addrspace(2)* byval %arg3, [16 x <16 x i8>] addrspace(2)* byval %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10) #0 {
+bb:
+  %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg1, i64 0, i64 0
+  %tmp11 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, align 16, !tbaa !0
+  %tmp12 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 0)
+  %tmp13 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 16)
+  %tmp14 = call float @llvm.SI.load.const(<16 x i8> %tmp11, i32 32)
+  %tmp15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %arg4, i64 0, i64 0
+  %tmp16 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp15, align 16, !tbaa !0
+  %tmp17 = add i32 %arg5, %arg7
+  %tmp18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %tmp16, i32 0, i32 %tmp17)
+  %tmp19 = extractelement <4 x float> %tmp18, i32 0
+  %tmp20 = extractelement <4 x float> %tmp18, i32 1
+  %tmp21 = extractelement <4 x float> %tmp18, i32 2
+  %tmp22 = extractelement <4 x float> %tmp18, i32 3
+  %tmp23 = bitcast float %tmp14 to i32
+  br label %bb24
+
+bb24:                                             ; preds = %bb157, %bb
+  %tmp25 = phi float [ 0.000000e+00, %bb ], [ %tmp350, %bb157 ]
+  %tmp26 = phi float [ 0.000000e+00, %bb ], [ %tmp349, %bb157 ]
+  %tmp27 = phi float [ 0.000000e+00, %bb ], [ %tmp348, %bb157 ]
+  %tmp28 = phi float [ 0.000000e+00, %bb ], [ %tmp351, %bb157 ]
+  %tmp29 = phi float [ 0.000000e+00, %bb ], [ %tmp347, %bb157 ]
+  %tmp30 = phi float [ 0.000000e+00, %bb ], [ %tmp346, %bb157 ]
+  %tmp31 = phi float [ 0.000000e+00, %bb ], [ %tmp345, %bb157 ]
+  %tmp32 = phi float [ 0.000000e+00, %bb ], [ %tmp352, %bb157 ]
+  %tmp33 = phi float [ 0.000000e+00, %bb ], [ %tmp344, %bb157 ]
+  %tmp34 = phi float [ 0.000000e+00, %bb ], [ %tmp343, %bb157 ]
+  %tmp35 = phi float [ 0.000000e+00, %bb ], [ %tmp342, %bb157 ]
+  %tmp36 = phi float [ 0.000000e+00, %bb ], [ %tmp353, %bb157 ]
+  %tmp37 = phi float [ 0.000000e+00, %bb ], [ %tmp341, %bb157 ]
+  %tmp38 = phi float [ 0.000000e+00, %bb ], [ %tmp340, %bb157 ]
+  %tmp39 = phi float [ 0.000000e+00, %bb ], [ %tmp339, %bb157 ]
+  %tmp40 = phi float [ 0.000000e+00, %bb ], [ %tmp354, %bb157 ]
+  %tmp41 = phi float [ 0.000000e+00, %bb ], [ %tmp338, %bb157 ]
+  %tmp42 = phi float [ 0.000000e+00, %bb ], [ %tmp337, %bb157 ]
+  %tmp43 = phi float [ 0.000000e+00, %bb ], [ %tmp336, %bb157 ]
+  %tmp44 = phi float [ 0.000000e+00, %bb ], [ %tmp355, %bb157 ]
+  %tmp45 = phi float [ 0.000000e+00, %bb ], [ %tmp335, %bb157 ]
+  %tmp46 = phi float [ 0.000000e+00, %bb ], [ %tmp334, %bb157 ]
+  %tmp47 = phi float [ 0.000000e+00, %bb ], [ %tmp333, %bb157 ]
+  %tmp48 = phi float [ 0.000000e+00, %bb ], [ %tmp356, %bb157 ]
+  %tmp49 = phi float [ 0.000000e+00, %bb ], [ %tmp332, %bb157 ]
+  %tmp50 = phi float [ 0.000000e+00, %bb ], [ %tmp331, %bb157 ]
+  %tmp51 = phi float [ 0.000000e+00, %bb ], [ %tmp330, %bb157 ]
+  %tmp52 = phi float [ 0.000000e+00, %bb ], [ %tmp357, %bb157 ]
+  %tmp53 = phi float [ 0.000000e+00, %bb ], [ %tmp329, %bb157 ]
+  %tmp54 = phi float [ 0.000000e+00, %bb ], [ %tmp328, %bb157 ]
+  %tmp55 = phi float [ 0.000000e+00, %bb ], [ %tmp327, %bb157 ]
+  %tmp56 = phi float [ 0.000000e+00, %bb ], [ %tmp358, %bb157 ]
+  %tmp57 = phi float [ 0.000000e+00, %bb ], [ %tmp326, %bb157 ]
+  %tmp58 = phi float [ 0.000000e+00, %bb ], [ %tmp325, %bb157 ]
+  %tmp59 = phi float [ 0.000000e+00, %bb ], [ %tmp324, %bb157 ]
+  %tmp60 = phi float [ 0.000000e+00, %bb ], [ %tmp359, %bb157 ]
+  %tmp61 = phi float [ 0.000000e+00, %bb ], [ %tmp323, %bb157 ]
+  %tmp62 = phi float [ 0.000000e+00, %bb ], [ %tmp322, %bb157 ]
+  %tmp63 = phi float [ 0.000000e+00, %bb ], [ %tmp321, %bb157 ]
+  %tmp64 = phi float [ 0.000000e+00, %bb ], [ %tmp360, %bb157 ]
+  %tmp65 = phi float [ 0.000000e+00, %bb ], [ %tmp320, %bb157 ]
+  %tmp66 = phi float [ 0.000000e+00, %bb ], [ %tmp319, %bb157 ]
+  %tmp67 = phi float [ 0.000000e+00, %bb ], [ %tmp318, %bb157 ]
+  %tmp68 = phi float [ 0.000000e+00, %bb ], [ %tmp361, %bb157 ]
+  %tmp69 = phi float [ 0.000000e+00, %bb ], [ %tmp317, %bb157 ]
+  %tmp70 = phi float [ 0.000000e+00, %bb ], [ %tmp316, %bb157 ]
+  %tmp71 = phi float [ 0.000000e+00, %bb ], [ %tmp315, %bb157 ]
+  %tmp72 = phi float [ 0.000000e+00, %bb ], [ %tmp362, %bb157 ]
+  %tmp73 = phi float [ 0.000000e+00, %bb ], [ %tmp314, %bb157 ]
+  %tmp74 = phi float [ 0.000000e+00, %bb ], [ %tmp313, %bb157 ]
+  %tmp75 = phi float [ 0.000000e+00, %bb ], [ %tmp312, %bb157 ]
+  %tmp76 = phi float [ 0.000000e+00, %bb ], [ %tmp363, %bb157 ]
+  %tmp77 = phi float [ 0.000000e+00, %bb ], [ %tmp311, %bb157 ]
+  %tmp78 = phi float [ 0.000000e+00, %bb ], [ %tmp310, %bb157 ]
+  %tmp79 = phi float [ 0.000000e+00, %bb ], [ %tmp309, %bb157 ]
+  %tmp80 = phi float [ 0.000000e+00, %bb ], [ %tmp364, %bb157 ]
+  %tmp81 = phi float [ 0.000000e+00, %bb ], [ %tmp308, %bb157 ]
+  %tmp82 = phi float [ 0.000000e+00, %bb ], [ %tmp307, %bb157 ]
+  %tmp83 = phi float [ 0.000000e+00, %bb ], [ %tmp306, %bb157 ]
+  %tmp84 = phi float [ 0.000000e+00, %bb ], [ %tmp365, %bb157 ]
+  %tmp85 = phi float [ 0.000000e+00, %bb ], [ %tmp305, %bb157 ]
+  %tmp86 = phi float [ 0.000000e+00, %bb ], [ %tmp304, %bb157 ]
+  %tmp87 = phi float [ 0.000000e+00, %bb ], [ %tmp303, %bb157 ]
+  %tmp88 = phi float [ 0.000000e+00, %bb ], [ %tmp366, %bb157 ]
+  %tmp89 = phi float [ 0.000000e+00, %bb ], [ %tmp302, %bb157 ]
+  %tmp90 = phi float [ 0.000000e+00, %bb ], [ %tmp301, %bb157 ]
+  %tmp91 = phi float [ 0.000000e+00, %bb ], [ %tmp300, %bb157 ]
+  %tmp92 = phi float [ 0.000000e+00, %bb ], [ %tmp367, %bb157 ]
+  %tmp93 = phi float [ 0.000000e+00, %bb ], [ %tmp299, %bb157 ]
+  %tmp94 = phi float [ 0.000000e+00, %bb ], [ %tmp298, %bb157 ]
+  %tmp95 = phi float [ 0.000000e+00, %bb ], [ %tmp297, %bb157 ]
+  %tmp96 = phi float [ 0.000000e+00, %bb ], [ %tmp368, %bb157 ]
+  %tmp97 = phi float [ 0.000000e+00, %bb ], [ %tmp296, %bb157 ]
+  %tmp98 = phi float [ 0.000000e+00, %bb ], [ %tmp295, %bb157 ]
+  %tmp99 = phi float [ 0.000000e+00, %bb ], [ %tmp294, %bb157 ]
+  %tmp100 = phi float [ 0.000000e+00, %bb ], [ %tmp369, %bb157 ]
+  %tmp101 = phi float [ 0.000000e+00, %bb ], [ %tmp293, %bb157 ]
+  %tmp102 = phi float [ 0.000000e+00, %bb ], [ %tmp292, %bb157 ]
+  %tmp103 = phi float [ 0.000000e+00, %bb ], [ %tmp291, %bb157 ]
+  %tmp104 = phi float [ 0.000000e+00, %bb ], [ %tmp370, %bb157 ]
+  %tmp105 = phi float [ 0.000000e+00, %bb ], [ %tmp371, %bb157 ]
+  %tmp106 = phi float [ 0.000000e+00, %bb ], [ %tmp372, %bb157 ]
+  %tmp107 = phi float [ 0.000000e+00, %bb ], [ %tmp421, %bb157 ]
+  %tmp108 = phi float [ 0.000000e+00, %bb ], [ %tmp373, %bb157 ]
+  %tmp109 = phi float [ 0.000000e+00, %bb ], [ %tmp374, %bb157 ]
+  %tmp110 = phi float [ 0.000000e+00, %bb ], [ %tmp375, %bb157 ]
+  %tmp111 = phi float [ 0.000000e+00, %bb ], [ %tmp376, %bb157 ]
+  %tmp112 = phi float [ 0.000000e+00, %bb ], [ %tmp377, %bb157 ]
+  %tmp113 = phi float [ 0.000000e+00, %bb ], [ %tmp378, %bb157 ]
+  %tmp114 = phi float [ 0.000000e+00, %bb ], [ %tmp379, %bb157 ]
+  %tmp115 = phi float [ 0.000000e+00, %bb ], [ %tmp380, %bb157 ]
+  %tmp116 = phi float [ 0.000000e+00, %bb ], [ %tmp381, %bb157 ]
+  %tmp117 = phi float [ 0.000000e+00, %bb ], [ %tmp382, %bb157 ]
+  %tmp118 = phi float [ 0.000000e+00, %bb ], [ %tmp383, %bb157 ]
+  %tmp119 = phi float [ 0.000000e+00, %bb ], [ %tmp384, %bb157 ]
+  %tmp120 = phi float [ 0.000000e+00, %bb ], [ %tmp385, %bb157 ]
+  %tmp121 = phi float [ 0.000000e+00, %bb ], [ %tmp386, %bb157 ]
+  %tmp122 = phi float [ 0.000000e+00, %bb ], [ %tmp387, %bb157 ]
+  %tmp123 = phi float [ 0.000000e+00, %bb ], [ %tmp388, %bb157 ]
+  %tmp124 = phi float [ 0.000000e+00, %bb ], [ %tmp389, %bb157 ]
+  %tmp125 = phi float [ 0.000000e+00, %bb ], [ %tmp390, %bb157 ]
+  %tmp126 = phi float [ 0.000000e+00, %bb ], [ %tmp391, %bb157 ]
+  %tmp127 = phi float [ 0.000000e+00, %bb ], [ %tmp392, %bb157 ]
+  %tmp128 = phi float [ 0.000000e+00, %bb ], [ %tmp393, %bb157 ]
+  %tmp129 = phi float [ 0.000000e+00, %bb ], [ %tmp394, %bb157 ]
+  %tmp130 = phi float [ 0.000000e+00, %bb ], [ %tmp395, %bb157 ]
+  %tmp131 = phi float [ 0.000000e+00, %bb ], [ %tmp396, %bb157 ]
+  %tmp132 = phi float [ 0.000000e+00, %bb ], [ %tmp397, %bb157 ]
+  %tmp133 = phi float [ 0.000000e+00, %bb ], [ %tmp398, %bb157 ]
+  %tmp134 = phi float [ 0.000000e+00, %bb ], [ %tmp399, %bb157 ]
+  %tmp135 = phi float [ 0.000000e+00, %bb ], [ %tmp400, %bb157 ]
+  %tmp136 = phi float [ 0.000000e+00, %bb ], [ %tmp401, %bb157 ]
+  %tmp137 = phi float [ 0.000000e+00, %bb ], [ %tmp402, %bb157 ]
+  %tmp138 = phi float [ 0.000000e+00, %bb ], [ %tmp403, %bb157 ]
+  %tmp139 = phi float [ 0.000000e+00, %bb ], [ %tmp404, %bb157 ]
+  %tmp140 = phi float [ 0.000000e+00, %bb ], [ %tmp405, %bb157 ]
+  %tmp141 = phi float [ 0.000000e+00, %bb ], [ %tmp406, %bb157 ]
+  %tmp142 = phi float [ 0.000000e+00, %bb ], [ %tmp407, %bb157 ]
+  %tmp143 = phi float [ 0.000000e+00, %bb ], [ %tmp408, %bb157 ]
+  %tmp144 = phi float [ 0.000000e+00, %bb ], [ %tmp409, %bb157 ]
+  %tmp145 = phi float [ 0.000000e+00, %bb ], [ %tmp410, %bb157 ]
+  %tmp146 = phi float [ 0.000000e+00, %bb ], [ %tmp411, %bb157 ]
+  %tmp147 = phi float [ 0.000000e+00, %bb ], [ %tmp412, %bb157 ]
+  %tmp148 = phi float [ 0.000000e+00, %bb ], [ %tmp413, %bb157 ]
+  %tmp149 = phi float [ 0.000000e+00, %bb ], [ %tmp414, %bb157 ]
+  %tmp150 = phi float [ 0.000000e+00, %bb ], [ %tmp415, %bb157 ]
+  %tmp151 = phi float [ 0.000000e+00, %bb ], [ %tmp416, %bb157 ]
+  %tmp152 = phi float [ 0.000000e+00, %bb ], [ %tmp417, %bb157 ]
+  %tmp153 = phi float [ 0.000000e+00, %bb ], [ %tmp418, %bb157 ]
+  %tmp154 = bitcast float %tmp107 to i32
+  %tmp155 = icmp sgt i32 %tmp154, 125
+  br i1 %tmp155, label %bb156, label %bb157
+
+bb156:                                            ; preds = %bb24
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %tmp12, float %tmp103, float %tmp102, float %tmp101)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %tmp99, float %tmp98, float %tmp97, float %tmp95)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %tmp94, float %tmp93, float %tmp91, float %tmp90)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %tmp89, float %tmp87, float %tmp86, float %tmp85)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %tmp83, float %tmp82, float %tmp81, float %tmp79)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %tmp78, float %tmp77, float %tmp75, float %tmp74)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %tmp73, float %tmp71, float %tmp70, float %tmp69)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %tmp67, float %tmp66, float %tmp65, float %tmp63)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 40, i32 0, float %tmp62, float %tmp61, float %tmp59, float %tmp58)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 41, i32 0, float %tmp57, float %tmp55, float %tmp54, float %tmp53)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 42, i32 0, float %tmp51, float %tmp50, float %tmp49, float %tmp47)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 43, i32 0, float %tmp46, float %tmp45, float %tmp43, float %tmp42)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 44, i32 0, float %tmp41, float %tmp39, float %tmp38, float %tmp37)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 45, i32 0, float %tmp35, float %tmp34, float %tmp33, float %tmp31)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 46, i32 0, float %tmp30, float %tmp29, float %tmp27, float %tmp26)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 47, i32 0, float %tmp25, float %tmp28, float %tmp32, float %tmp36)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 48, i32 0, float %tmp40, float %tmp44, float %tmp48, float %tmp52)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 49, i32 0, float %tmp56, float %tmp60, float %tmp64, float %tmp68)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 50, i32 0, float %tmp72, float %tmp76, float %tmp80, float %tmp84)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 51, i32 0, float %tmp88, float %tmp92, float %tmp96, float %tmp100)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 52, i32 0, float %tmp104, float %tmp105, float %tmp106, float %tmp108)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 53, i32 0, float %tmp109, float %tmp110, float %tmp111, float %tmp112)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 54, i32 0, float %tmp113, float %tmp114, float %tmp115, float %tmp116)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 55, i32 0, float %tmp117, float %tmp118, float %tmp119, float %tmp120)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 56, i32 0, float %tmp121, float %tmp122, float %tmp123, float %tmp124)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 57, i32 0, float %tmp125, float %tmp126, float %tmp127, float %tmp128)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 58, i32 0, float %tmp129, float %tmp130, float %tmp131, float %tmp132)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 59, i32 0, float %tmp133, float %tmp134, float %tmp135, float %tmp136)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 60, i32 0, float %tmp137, float %tmp138, float %tmp139, float %tmp140)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 61, i32 0, float %tmp141, float %tmp142, float %tmp143, float %tmp144)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 62, i32 0, float %tmp145, float %tmp146, float %tmp147, float %tmp148)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 63, i32 0, float %tmp149, float %tmp150, float %tmp151, float %tmp13)
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %tmp19, float %tmp20, float %tmp21, float %tmp22)
+  ret void
+
+bb157:                                            ; preds = %bb24
+  %tmp158 = bitcast float %tmp107 to i32
+  %tmp159 = bitcast float %tmp107 to i32
+  %tmp160 = add i32 %tmp23, %tmp159
+  %tmp161 = bitcast i32 %tmp160 to float
+  %tmp162 = insertelement <128 x float> undef, float %tmp103, i32 0
+  %tmp163 = insertelement <128 x float> %tmp162, float %tmp102, i32 1
+  %tmp164 = insertelement <128 x float> %tmp163, float %tmp101, i32 2
+  %tmp165 = insertelement <128 x float> %tmp164, float %tmp99, i32 3
+  %tmp166 = insertelement <128 x float> %tmp165, float %tmp98, i32 4
+  %tmp167 = insertelement <128 x float> %tmp166, float %tmp97, i32 5
+  %tmp168 = insertelement <128 x float> %tmp167, float %tmp95, i32 6
+  %tmp169 = insertelement <128 x float> %tmp168, float %tmp94, i32 7
+  %tmp170 = insertelement <128 x float> %tmp169, float %tmp93, i32 8
+  %tmp171 = insertelement <128 x float> %tmp170, float %tmp91, i32 9
+  %tmp172 = insertelement <128 x float> %tmp171, float %tmp90, i32 10
+  %tmp173 = insertelement <128 x float> %tmp172, float %tmp89, i32 11
+  %tmp174 = insertelement <128 x float> %tmp173, float %tmp87, i32 12
+  %tmp175 = insertelement <128 x float> %tmp174, float %tmp86, i32 13
+  %tmp176 = insertelement <128 x float> %tmp175, float %tmp85, i32 14
+  %tmp177 = insertelement <128 x float> %tmp176, float %tmp83, i32 15
+  %tmp178 = insertelement <128 x float> %tmp177, float %tmp82, i32 16
+  %tmp179 = insertelement <128 x float> %tmp178, float %tmp81, i32 17
+  %tmp180 = insertelement <128 x float> %tmp179, float %tmp79, i32 18
+  %tmp181 = insertelement <128 x float> %tmp180, float %tmp78, i32 19
+  %tmp182 = insertelement <128 x float> %tmp181, float %tmp77, i32 20
+  %tmp183 = insertelement <128 x float> %tmp182, float %tmp75, i32 21
+  %tmp184 = insertelement <128 x float> %tmp183, float %tmp74, i32 22
+  %tmp185 = insertelement <128 x float> %tmp184, float %tmp73, i32 23
+  %tmp186 = insertelement <128 x float> %tmp185, float %tmp71, i32 24
+  %tmp187 = insertelement <128 x float> %tmp186, float %tmp70, i32 25
+  %tmp188 = insertelement <128 x float> %tmp187, float %tmp69, i32 26
+  %tmp189 = insertelement <128 x float> %tmp188, float %tmp67, i32 27
+  %tmp190 = insertelement <128 x float> %tmp189, float %tmp66, i32 28
+  %tmp191 = insertelement <128 x float> %tmp190, float %tmp65, i32 29
+  %tmp192 = insertelement <128 x float> %tmp191, float %tmp63, i32 30
+  %tmp193 = insertelement <128 x float> %tmp192, float %tmp62, i32 31
+  %tmp194 = insertelement <128 x float> %tmp193, float %tmp61, i32 32
+  %tmp195 = insertelement <128 x float> %tmp194, float %tmp59, i32 33
+  %tmp196 = insertelement <128 x float> %tmp195, float %tmp58, i32 34
+  %tmp197 = insertelement <128 x float> %tmp196, float %tmp57, i32 35
+  %tmp198 = insertelement <128 x float> %tmp197, float %tmp55, i32 36
+  %tmp199 = insertelement <128 x float> %tmp198, float %tmp54, i32 37
+  %tmp200 = insertelement <128 x float> %tmp199, float %tmp53, i32 38
+  %tmp201 = insertelement <128 x float> %tmp200, float %tmp51, i32 39
+  %tmp202 = insertelement <128 x float> %tmp201, float %tmp50, i32 40
+  %tmp203 = insertelement <128 x float> %tmp202, float %tmp49, i32 41
+  %tmp204 = insertelement <128 x float> %tmp203, float %tmp47, i32 42
+  %tmp205 = insertelement <128 x float> %tmp204, float %tmp46, i32 43
+  %tmp206 = insertelement <128 x float> %tmp205, float %tmp45, i32 44
+  %tmp207 = insertelement <128 x float> %tmp206, float %tmp43, i32 45
+  %tmp208 = insertelement <128 x float> %tmp207, float %tmp42, i32 46
+  %tmp209 = insertelement <128 x float> %tmp208, float %tmp41, i32 47
+  %tmp210 = insertelement <128 x float> %tmp209, float %tmp39, i32 48
+  %tmp211 = insertelement <128 x float> %tmp210, float %tmp38, i32 49
+  %tmp212 = insertelement <128 x float> %tmp211, float %tmp37, i32 50
+  %tmp213 = insertelement <128 x float> %tmp212, float %tmp35, i32 51
+  %tmp214 = insertelement <128 x float> %tmp213, float %tmp34, i32 52
+  %tmp215 = insertelement <128 x float> %tmp214, float %tmp33, i32 53
+  %tmp216 = insertelement <128 x float> %tmp215, float %tmp31, i32 54
+  %tmp217 = insertelement <128 x float> %tmp216, float %tmp30, i32 55
+  %tmp218 = insertelement <128 x float> %tmp217, float %tmp29, i32 56
+  %tmp219 = insertelement <128 x float> %tmp218, float %tmp27, i32 57
+  %tmp220 = insertelement <128 x float> %tmp219, float %tmp26, i32 58
+  %tmp221 = insertelement <128 x float> %tmp220, float %tmp25, i32 59
+  %tmp222 = insertelement <128 x float> %tmp221, float %tmp28, i32 60
+  %tmp223 = insertelement <128 x float> %tmp222, float %tmp32, i32 61
+  %tmp224 = insertelement <128 x float> %tmp223, float %tmp36, i32 62
+  %tmp225 = insertelement <128 x float> %tmp224, float %tmp40, i32 63
+  %tmp226 = insertelement <128 x float> %tmp225, float %tmp44, i32 64
+  %tmp227 = insertelement <128 x float> %tmp226, float %tmp48, i32 65
+  %tmp228 = insertelement <128 x float> %tmp227, float %tmp52, i32 66
+  %tmp229 = insertelement <128 x float> %tmp228, float %tmp56, i32 67
+  %tmp230 = insertelement <128 x float> %tmp229, float %tmp60, i32 68
+  %tmp231 = insertelement <128 x float> %tmp230, float %tmp64, i32 69
+  %tmp232 = insertelement <128 x float> %tmp231, float %tmp68, i32 70
+  %tmp233 = insertelement <128 x float> %tmp232, float %tmp72, i32 71
+  %tmp234 = insertelement <128 x float> %tmp233, float %tmp76, i32 72
+  %tmp235 = insertelement <128 x float> %tmp234, float %tmp80, i32 73
+  %tmp236 = insertelement <128 x float> %tmp235, float %tmp84, i32 74
+  %tmp237 = insertelement <128 x float> %tmp236, float %tmp88, i32 75
+  %tmp238 = insertelement <128 x float> %tmp237, float %tmp92, i32 76
+  %tmp239 = insertelement <128 x float> %tmp238, float %tmp96, i32 77
+  %tmp240 = insertelement <128 x float> %tmp239, float %tmp100, i32 78
+  %tmp241 = insertelement <128 x float> %tmp240, float %tmp104, i32 79
+  %tmp242 = insertelement <128 x float> %tmp241, float %tmp105, i32 80
+  %tmp243 = insertelement <128 x float> %tmp242, float %tmp106, i32 81
+  %tmp244 = insertelement <128 x float> %tmp243, float %tmp108, i32 82
+  %tmp245 = insertelement <128 x float> %tmp244, float %tmp109, i32 83
+  %tmp246 = insertelement <128 x float> %tmp245, float %tmp110, i32 84
+  %tmp247 = insertelement <128 x float> %tmp246, float %tmp111, i32 85
+  %tmp248 = insertelement <128 x float> %tmp247, float %tmp112, i32 86
+  %tmp249 = insertelement <128 x float> %tmp248, float %tmp113, i32 87
+  %tmp250 = insertelement <128 x float> %tmp249, float %tmp114, i32 88
+  %tmp251 = insertelement <128 x float> %tmp250, float %tmp115, i32 89
+  %tmp252 = insertelement <128 x float> %tmp251, float %tmp116, i32 90
+  %tmp253 = insertelement <128 x float> %tmp252, float %tmp117, i32 91
+  %tmp254 = insertelement <128 x float> %tmp253, float %tmp118, i32 92
+  %tmp255 = insertelement <128 x float> %tmp254, float %tmp119, i32 93
+  %tmp256 = insertelement <128 x float> %tmp255, float %tmp120, i32 94
+  %tmp257 = insertelement <128 x float> %tmp256, float %tmp121, i32 95
+  %tmp258 = insertelement <128 x float> %tmp257, float %tmp122, i32 96
+  %tmp259 = insertelement <128 x float> %tmp258, float %tmp123, i32 97
+  %tmp260 = insertelement <128 x float> %tmp259, float %tmp124, i32 98
+  %tmp261 = insertelement <128 x float> %tmp260, float %tmp125, i32 99
+  %tmp262 = insertelement <128 x float> %tmp261, float %tmp126, i32 100
+  %tmp263 = insertelement <128 x float> %tmp262, float %tmp127, i32 101
+  %tmp264 = insertelement <128 x float> %tmp263, float %tmp128, i32 102
+  %tmp265 = insertelement <128 x float> %tmp264, float %tmp129, i32 103
+  %tmp266 = insertelement <128 x float> %tmp265, float %tmp130, i32 104
+  %tmp267 = insertelement <128 x float> %tmp266, float %tmp131, i32 105
+  %tmp268 = insertelement <128 x float> %tmp267, float %tmp132, i32 106
+  %tmp269 = insertelement <128 x float> %tmp268, float %tmp133, i32 107
+  %tmp270 = insertelement <128 x float> %tmp269, float %tmp134, i32 108
+  %tmp271 = insertelement <128 x float> %tmp270, float %tmp135, i32 109
+  %tmp272 = insertelement <128 x float> %tmp271, float %tmp136, i32 110
+  %tmp273 = insertelement <128 x float> %tmp272, float %tmp137, i32 111
+  %tmp274 = insertelement <128 x float> %tmp273, float %tmp138, i32 112
+  %tmp275 = insertelement <128 x float> %tmp274, float %tmp139, i32 113
+  %tmp276 = insertelement <128 x float> %tmp275, float %tmp140, i32 114
+  %tmp277 = insertelement <128 x float> %tmp276, float %tmp141, i32 115
+  %tmp278 = insertelement <128 x float> %tmp277, float %tmp142, i32 116
+  %tmp279 = insertelement <128 x float> %tmp278, float %tmp143, i32 117
+  %tmp280 = insertelement <128 x float> %tmp279, float %tmp144, i32 118
+  %tmp281 = insertelement <128 x float> %tmp280, float %tmp145, i32 119
+  %tmp282 = insertelement <128 x float> %tmp281, float %tmp146, i32 120
+  %tmp283 = insertelement <128 x float> %tmp282, float %tmp147, i32 121
+  %tmp284 = insertelement <128 x float> %tmp283, float %tmp148, i32 122
+  %tmp285 = insertelement <128 x float> %tmp284, float %tmp149, i32 123
+  %tmp286 = insertelement <128 x float> %tmp285, float %tmp150, i32 124
+  %tmp287 = insertelement <128 x float> %tmp286, float %tmp151, i32 125
+  %tmp288 = insertelement <128 x float> %tmp287, float %tmp152, i32 126
+  %tmp289 = insertelement <128 x float> %tmp288, float %tmp153, i32 127
+  %tmp290 = insertelement <128 x float> %tmp289, float %tmp161, i32 %tmp158
+  %tmp291 = extractelement <128 x float> %tmp290, i32 0
+  %tmp292 = extractelement <128 x float> %tmp290, i32 1
+  %tmp293 = extractelement <128 x float> %tmp290, i32 2
+  %tmp294 = extractelement <128 x float> %tmp290, i32 3
+  %tmp295 = extractelement <128 x float> %tmp290, i32 4
+  %tmp296 = extractelement <128 x float> %tmp290, i32 5
+  %tmp297 = extractelement <128 x float> %tmp290, i32 6
+  %tmp298 = extractelement <128 x float> %tmp290, i32 7
+  %tmp299 = extractelement <128 x float> %tmp290, i32 8
+  %tmp300 = extractelement <128 x float> %tmp290, i32 9
+  %tmp301 = extractelement <128 x float> %tmp290, i32 10
+  %tmp302 = extractelement <128 x float> %tmp290, i32 11
+  %tmp303 = extractelement <128 x float> %tmp290, i32 12
+  %tmp304 = extractelement <128 x float> %tmp290, i32 13
+  %tmp305 = extractelement <128 x float> %tmp290, i32 14
+  %tmp306 = extractelement <128 x float> %tmp290, i32 15
+  %tmp307 = extractelement <128 x float> %tmp290, i32 16
+  %tmp308 = extractelement <128 x float> %tmp290, i32 17
+  %tmp309 = extractelement <128 x float> %tmp290, i32 18
+  %tmp310 = extractelement <128 x float> %tmp290, i32 19
+  %tmp311 = extractelement <128 x float> %tmp290, i32 20
+  %tmp312 = extractelement <128 x float> %tmp290, i32 21
+  %tmp313 = extractelement <128 x float> %tmp290, i32 22
+  %tmp314 = extractelement <128 x float> %tmp290, i32 23
+  %tmp315 = extractelement <128 x float> %tmp290, i32 24
+  %tmp316 = extractelement <128 x float> %tmp290, i32 25
+  %tmp317 = extractelement <128 x float> %tmp290, i32 26
+  %tmp318 = extractelement <128 x float> %tmp290, i32 27
+  %tmp319 = extractelement <128 x float> %tmp290, i32 28
+  %tmp320 = extractelement <128 x float> %tmp290, i32 29
+  %tmp321 = extractelement <128 x float> %tmp290, i32 30
+  %tmp322 = extractelement <128 x float> %tmp290, i32 31
+  %tmp323 = extractelement <128 x float> %tmp290, i32 32
+  %tmp324 = extractelement <128 x float> %tmp290, i32 33
+  %tmp325 = extractelement <128 x float> %tmp290, i32 34
+  %tmp326 = extractelement <128 x float> %tmp290, i32 35
+  %tmp327 = extractelement <128 x float> %tmp290, i32 36
+  %tmp328 = extractelement <128 x float> %tmp290, i32 37
+  %tmp329 = extractelement <128 x float> %tmp290, i32 38
+  %tmp330 = extractelement <128 x float> %tmp290, i32 39
+  %tmp331 = extractelement <128 x float> %tmp290, i32 40
+  %tmp332 = extractelement <128 x float> %tmp290, i32 41
+  %tmp333 = extractelement <128 x float> %tmp290, i32 42
+  %tmp334 = extractelement <128 x float> %tmp290, i32 43
+  %tmp335 = extractelement <128 x float> %tmp290, i32 44
+  %tmp336 = extractelement <128 x float> %tmp290, i32 45
+  %tmp337 = extractelement <128 x float> %tmp290, i32 46
+  %tmp338 = extractelement <128 x float> %tmp290, i32 47
+  %tmp339 = extractelement <128 x float> %tmp290, i32 48
+  %tmp340 = extractelement <128 x float> %tmp290, i32 49
+  %tmp341 = extractelement <128 x float> %tmp290, i32 50
+  %tmp342 = extractelement <128 x float> %tmp290, i32 51
+  %tmp343 = extractelement <128 x float> %tmp290, i32 52
+  %tmp344 = extractelement <128 x float> %tmp290, i32 53
+  %tmp345 = extractelement <128 x float> %tmp290, i32 54
+  %tmp346 = extractelement <128 x float> %tmp290, i32 55
+  %tmp347 = extractelement <128 x float> %tmp290, i32 56
+  %tmp348 = extractelement <128 x float> %tmp290, i32 57
+  %tmp349 = extractelement <128 x float> %tmp290, i32 58
+  %tmp350 = extractelement <128 x float> %tmp290, i32 59
+  %tmp351 = extractelement <128 x float> %tmp290, i32 60
+  %tmp352 = extractelement <128 x float> %tmp290, i32 61
+  %tmp353 = extractelement <128 x float> %tmp290, i32 62
+  %tmp354 = extractelement <128 x float> %tmp290, i32 63
+  %tmp355 = extractelement <128 x float> %tmp290, i32 64
+  %tmp356 = extractelement <128 x float> %tmp290, i32 65
+  %tmp357 = extractelement <128 x float> %tmp290, i32 66
+  %tmp358 = extractelement <128 x float> %tmp290, i32 67
+  %tmp359 = extractelement <128 x float> %tmp290, i32 68
+  %tmp360 = extractelement <128 x float> %tmp290, i32 69
+  %tmp361 = extractelement <128 x float> %tmp290, i32 70
+  %tmp362 = extractelement <128 x float> %tmp290, i32 71
+  %tmp363 = extractelement <128 x float> %tmp290, i32 72
+  %tmp364 = extractelement <128 x float> %tmp290, i32 73
+  %tmp365 = extractelement <128 x float> %tmp290, i32 74
+  %tmp366 = extractelement <128 x float> %tmp290, i32 75
+  %tmp367 = extractelement <128 x float> %tmp290, i32 76
+  %tmp368 = extractelement <128 x float> %tmp290, i32 77
+  %tmp369 = extractelement <128 x float> %tmp290, i32 78
+  %tmp370 = extractelement <128 x float> %tmp290, i32 79
+  %tmp371 = extractelement <128 x float> %tmp290, i32 80
+  %tmp372 = extractelement <128 x float> %tmp290, i32 81
+  %tmp373 = extractelement <128 x float> %tmp290, i32 82
+  %tmp374 = extractelement <128 x float> %tmp290, i32 83
+  %tmp375 = extractelement <128 x float> %tmp290, i32 84
+  %tmp376 = extractelement <128 x float> %tmp290, i32 85
+  %tmp377 = extractelement <128 x float> %tmp290, i32 86
+  %tmp378 = extractelement <128 x float> %tmp290, i32 87
+  %tmp379 = extractelement <128 x float> %tmp290, i32 88
+  %tmp380 = extractelement <128 x float> %tmp290, i32 89
+  %tmp381 = extractelement <128 x float> %tmp290, i32 90
+  %tmp382 = extractelement <128 x float> %tmp290, i32 91
+  %tmp383 = extractelement <128 x float> %tmp290, i32 92
+  %tmp384 = extractelement <128 x float> %tmp290, i32 93
+  %tmp385 = extractelement <128 x float> %tmp290, i32 94
+  %tmp386 = extractelement <128 x float> %tmp290, i32 95
+  %tmp387 = extractelement <128 x float> %tmp290, i32 96
+  %tmp388 = extractelement <128 x float> %tmp290, i32 97
+  %tmp389 = extractelement <128 x float> %tmp290, i32 98
+  %tmp390 = extractelement <128 x float> %tmp290, i32 99
+  %tmp391 = extractelement <128 x float> %tmp290, i32 100
+  %tmp392 = extractelement <128 x float> %tmp290, i32 101
+  %tmp393 = extractelement <128 x float> %tmp290, i32 102
+  %tmp394 = extractelement <128 x float> %tmp290, i32 103
+  %tmp395 = extractelement <128 x float> %tmp290, i32 104
+  %tmp396 = extractelement <128 x float> %tmp290, i32 105
+  %tmp397 = extractelement <128 x float> %tmp290, i32 106
+  %tmp398 = extractelement <128 x float> %tmp290, i32 107
+  %tmp399 = extractelement <128 x float> %tmp290, i32 108
+  %tmp400 = extractelement <128 x float> %tmp290, i32 109
+  %tmp401 = extractelement <128 x float> %tmp290, i32 110
+  %tmp402 = extractelement <128 x float> %tmp290, i32 111
+  %tmp403 = extractelement <128 x float> %tmp290, i32 112
+  %tmp404 = extractelement <128 x float> %tmp290, i32 113
+  %tmp405 = extractelement <128 x float> %tmp290, i32 114
+  %tmp406 = extractelement <128 x float> %tmp290, i32 115
+  %tmp407 = extractelement <128 x float> %tmp290, i32 116
+  %tmp408 = extractelement <128 x float> %tmp290, i32 117
+  %tmp409 = extractelement <128 x float> %tmp290, i32 118
+  %tmp410 = extractelement <128 x float> %tmp290, i32 119
+  %tmp411 = extractelement <128 x float> %tmp290, i32 120
+  %tmp412 = extractelement <128 x float> %tmp290, i32 121
+  %tmp413 = extractelement <128 x float> %tmp290, i32 122
+  %tmp414 = extractelement <128 x float> %tmp290, i32 123
+  %tmp415 = extractelement <128 x float> %tmp290, i32 124
+  %tmp416 = extractelement <128 x float> %tmp290, i32 125
+  %tmp417 = extractelement <128 x float> %tmp290, i32 126
+  %tmp418 = extractelement <128 x float> %tmp290, i32 127
+  %tmp419 = bitcast float %tmp107 to i32
+  %tmp420 = add i32 %tmp419, 1
+  %tmp421 = bitcast i32 %tmp420 to float
+  br label %bb24
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.load.const(<16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!1, !1, i64 0, i32 1}
+!1 = !{!"const", null}
diff --git a/test/CodeGen/AMDGPU/vop-shrink.ll b/test/CodeGen/AMDGPU/vop-shrink.ll
index 9b2f229c05af..2bfe1b2bd6ec 100644
--- a/test/CodeGen/AMDGPU/vop-shrink.ll
+++ b/test/CodeGen/AMDGPU/vop-shrink.ll
@@ -3,8 +3,8 @@
 
 ; Test that we correctly commute a sub instruction
 ; FUNC-LABEL: {{^}}sub_rev:
-; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s
-; SI: v_subrev_i32_e32 v{{[0-9]+}}, s
+; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
+; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s
 
 ; ModuleID = 'vop-shrink.ll'
 
diff --git a/test/CodeGen/AMDGPU/wait.ll b/test/CodeGen/AMDGPU/wait.ll
index 5cc7577cad33..107e84b33be9 100644
--- a/test/CodeGen/AMDGPU/wait.ll
+++ b/test/CodeGen/AMDGPU/wait.ll
@@ -1,11 +1,16 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=DEFAULT
+; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=SI -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX
+; RUN: llc -march=amdgcn --misched=ilpmax -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace %s --check-prefix=ILPMAX
+; The ilpmax scheduler is used for the second test to get the ordering we want for the test.
 
-; CHECK-LABEL: {{^}}main:
-; CHECK: s_load_dwordx4
-; CHECK: s_load_dwordx4
-; CHECK: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
-; CHECK: s_endpgm
+; DEFAULT-LABEL: {{^}}main:
+; DEFAULT: s_load_dwordx4
+; DEFAULT: s_load_dwordx4
+; DEFAULT: s_waitcnt vmcnt(0)
+; DEFAULT: exp
+; DEFAULT: s_waitcnt lgkmcnt(0)
+; DEFAULT: s_endpgm
 define void @main(<16 x i8> addrspace(2)* inreg %arg, <16 x i8> addrspace(2)* inreg %arg1, <32 x i8> addrspace(2)* inreg %arg2, <16 x i8> addrspace(2)* inreg %arg3, <16 x i8> addrspace(2)* inreg %arg4, i32 inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9, float addrspace(2)* inreg %constptr) #0 {
 main_body:
   %tmp = getelementptr <16 x i8>, <16 x i8> addrspace(2)* %arg3, i32 0
@@ -29,7 +34,43 @@ main_body:
   ret void
 }
 
-; Function Attrs: noduplicate nounwind
+; ILPMAX-LABEL: {{^}}main2:
+; ILPMAX: s_load_dwordx4
+; ILPMAX: s_waitcnt lgkmcnt(0)
+; ILPMAX: buffer_load
+; ILPMAX: s_load_dwordx4
+; ILPMAX: s_waitcnt lgkmcnt(0)
+; ILPMAX: buffer_load
+; ILPMAX: s_waitcnt vmcnt(1)
+; ILPMAX: s_waitcnt vmcnt(0)
+; ILPMAX: s_endpgm
+
+define void @main2([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)*
+byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
+main_body:
+  %11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
+  %12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
+  %13 = add i32 %5, %7
+  %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
+  %15 = extractelement <4 x float> %14, i32 0
+  %16 = extractelement <4 x float> %14, i32 1
+  %17 = extractelement <4 x float> %14, i32 2
+  %18 = extractelement <4 x float> %14, i32 3
+  %19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
+  %20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
+  %21 = add i32 %5, %7
+  %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
+  %23 = extractelement <4 x float> %22, i32 0
+  %24 = extractelement <4 x float> %22, i32 1
+  %25 = extractelement <4 x float> %22, i32 2
+  %26 = extractelement <4 x float> %22, i32 3
+  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
+  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
+  ret void
+}
+
+
+; Function Attrs: convergent nounwind
 declare void @llvm.AMDGPU.barrier.global() #1
 
 ; Function Attrs: nounwind readnone
@@ -38,7 +79,7 @@ declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #2
 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
 
 attributes #0 = { "ShaderType"="1" }
-attributes #1 = { noduplicate nounwind }
+attributes #1 = { convergent nounwind }
 attributes #2 = { nounwind readnone }
 
 !0 = !{!1, !1, i64 0, i32 1}
diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll
index 4328e964c1bf..e7fcd1ff3650 100644
--- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll
+++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll
@@ -1,15 +1,34 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
 
 ; FUNC-LABEL: {{^}}ngroups_x:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
+
+; HSA: .amd_kernel_code_t
+
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 0
+; HSA: enable_sgpr_queue_ptr = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_dispatch_id = 0
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: enable_sgpr_private_segment_size = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+
+; HSA: .end_amd_kernel_code_t
+
+
+; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 
-; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
 define void @ngroups_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.x() #0
@@ -19,12 +38,12 @@ entry:
 
 ; FUNC-LABEL: {{^}}ngroups_y:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].Y
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
 
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 define void @ngroups_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.y() #0
@@ -34,12 +53,12 @@ entry:
 
 ; FUNC-LABEL: {{^}}ngroups_z:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].Z
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
 
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 define void @ngroups_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.ngroups.z() #0
@@ -49,12 +68,12 @@ entry:
 
 ; FUNC-LABEL: {{^}}global_size_x:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[0].W
+; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
 
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 define void @global_size_x (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.x() #0
@@ -64,12 +83,12 @@ entry:
 
 ; FUNC-LABEL: {{^}}global_size_y:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].X
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
 
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 define void @global_size_y (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.y() #0
@@ -79,12 +98,12 @@ entry:
 
 ; FUNC-LABEL: {{^}}global_size_z:
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].Y
+; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
 
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
+; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
+; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
 define void @global_size_z (i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.global.size.z() #0
@@ -92,74 +111,34 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}local_size_x:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].Z
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_x (i32 addrspace(1)* %out) {
-entry:
-  %0 = call i32 @llvm.r600.read.local.size.x() #0
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}local_size_y:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[1].W
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_y (i32 addrspace(1)* %out) {
-entry:
-  %0 = call i32 @llvm.r600.read.local.size.y() #0
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}local_size_z:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[2].X
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @local_size_z (i32 addrspace(1)* %out) {
-entry:
-  %0 = call i32 @llvm.r600.read.local.size.z() #0
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; FUNC-LABEL: {{^}}get_work_dim:
-; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
-; EG: MOV [[VAL]], KC0[2].Z
-
-; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
-; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
-; GCN: buffer_store_dword [[VVAL]]
-define void @get_work_dim (i32 addrspace(1)* %out) {
-entry:
-  %0 = call i32 @llvm.AMDGPU.read.workdim() #0
-  store i32 %0, i32 addrspace(1)* %out
-  ret void
-}
-
-; The tgid values are stored in sgprs offset by the number of user sgprs.
-; Currently we always use exactly 2 user sgprs for the pointer to the
-; kernel arguments, but this may change in the future.
+; The tgid values are stored in sgprs offset by the number of user
+; sgprs.
 
 ; FUNC-LABEL: {{^}}tgid_x:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_x (i32 addrspace(1)* %out) {
+; HSA: .amd_kernel_code_t
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 0
+; HSA: compute_pgm_rsrc2_tgid_z_en = 0
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+; HSA: .end_amd_kernel_code_t
+
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
+; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_x(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.x() #0
   store i32 %0, i32 addrspace(1)* %out
@@ -167,9 +146,26 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}tgid_y:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_y (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 1
+; HSA: compute_pgm_rsrc2_tgid_z_en = 0
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
+; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_y(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.y() #0
   store i32 %0, i32 addrspace(1)* %out
@@ -177,36 +173,81 @@ entry:
 }
 
 ; FUNC-LABEL: {{^}}tgid_z:
-; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6
-; GCN: buffer_store_dword [[VVAL]]
-define void @tgid_z (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_user_sgpr = 6
+; HSA: compute_pgm_rsrc2_tgid_x_en = 1
+; HSA: compute_pgm_rsrc2_tgid_y_en = 0
+; HSA: compute_pgm_rsrc2_tgid_z_en = 1
+; HSA: compute_pgm_rsrc2_tg_size_en = 0
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; HSA: enable_sgpr_private_segment_buffer = 1
+; HSA: enable_sgpr_dispatch_ptr = 0
+; HSA: enable_sgpr_queue_ptr = 0
+; HSA: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: enable_sgpr_dispatch_id = 0
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: enable_sgpr_private_segment_size = 0
+; HSA: enable_sgpr_grid_workgroup_count_x = 0
+; HSA: enable_sgpr_grid_workgroup_count_y = 0
+; HSA: enable_sgpr_grid_workgroup_count_z = 0
+
+; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
+; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
+; GCN-NOHSA: buffer_store_dword [[VVAL]]
+; HSA: flat_store_dword [[VVAL]]
+
+; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
+; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
+; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
+; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
+; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
+define void @tgid_z(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tgid.z() #0
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
 
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 132{{$}}
+
 ; FUNC-LABEL: {{^}}tidig_x:
-; GCN: buffer_store_dword v0
-define void @tidig_x (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
+; GCN-NOHSA: buffer_store_dword v0
+; HSA: flat_store_dword v0
+define void @tidig_x(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.x() #0
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
 
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 2180{{$}}
+
 ; FUNC-LABEL: {{^}}tidig_y:
-; GCN: buffer_store_dword v1
-define void @tidig_y (i32 addrspace(1)* %out) {
+
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
+; GCN-NOHSA: buffer_store_dword v1
+; HSA: flat_store_dword v1
+define void @tidig_y(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.y() #0
   store i32 %0, i32 addrspace(1)* %out
   ret void
 }
 
+; GCN-NOHSA: .section .AMDGPU.config
+; GCN-NOHSA: .long 47180
+; GCN-NOHSA-NEXT: .long 4228{{$}}
+
 ; FUNC-LABEL: {{^}}tidig_z:
-; GCN: buffer_store_dword v2
-define void @tidig_z (i32 addrspace(1)* %out) {
+; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
+; GCN-NOHSA: buffer_store_dword v2
+; HSA: flat_store_dword v2
+define void @tidig_z(i32 addrspace(1)* %out) {
 entry:
   %0 = call i32 @llvm.r600.read.tidig.z() #0
   store i32 %0, i32 addrspace(1)* %out
@@ -221,10 +262,6 @@ declare i32 @llvm.r600.read.global.size.x() #0
 declare i32 @llvm.r600.read.global.size.y() #0
 declare i32 @llvm.r600.read.global.size.z() #0
 
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
 declare i32 @llvm.r600.read.tgid.x() #0
 declare i32 @llvm.r600.read.tgid.y() #0
 declare i32 @llvm.r600.read.tgid.z() #0
diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll
index ddb920af29d8..655655d92f08 100644
--- a/test/CodeGen/AMDGPU/xor.ll
+++ b/test/CodeGen/AMDGPU/xor.ll
@@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
 }
 
 ; FUNC-LABEL: {{^}}xor_i1:
-; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
+; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}}
 
 ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
 ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
diff --git a/test/CodeGen/AMDGPU/zero_extend.ll b/test/CodeGen/AMDGPU/zero_extend.ll
index 033055db185a..35ddf2b0a465 100644
--- a/test/CodeGen/AMDGPU/zero_extend.ll
+++ b/test/CodeGen/AMDGPU/zero_extend.ll
@@ -7,8 +7,7 @@
 ; R600: MEM_RAT_CACHELESS STORE_RAW
 
 ; SI: {{^}}test:
-; SI: s_mov_b32 [[ZERO:s[0-9]]], 0{{$}}
-; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], [[ZERO]]
+; SI: v_mov_b32_e32 v[[V_ZERO:[0-9]]], 0{{$}}
 ; SI: buffer_store_dwordx2 v[0:[[V_ZERO]]{{\]}}
 define void @test(i64 addrspace(1)* %out, i32 %a, i32 %b, i32 %c) {
 entry:
diff --git a/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
index 9c0143be06c3..81a6bb64971d 100644
--- a/test/CodeGen/ARM/2007-03-13-InstrSched.ll
+++ b/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
 ; RUN:   -mattr=+v6 | grep r9
 ; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
-; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
+; RUN:   -mattr=+v6,+reserve-r9 -ifcvt-limit=0 -stats 2>&1 | grep asm-printer
 ; | grep 35
 
 define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll
index 3f47488372b8..613694f091d1 100644
--- a/test/CodeGen/ARM/2009-10-16-Scope.ll
+++ b/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -24,9 +24,9 @@ declare i32 @foo(i32) ssp
 
 !0 = !DILocation(line: 5, column: 2, scope: !1)
 !1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
-!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!2 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(name: "count_", line: 5, scope: !5, file: !3, type: !6)
 !5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !7 = !DILocation(line: 6, column: 1, scope: !2)
diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
index 638b26c73146..1341830b4a4b 100644
--- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
+++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -14,11 +14,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!15}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 93, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4)
+!0 = !DILocalVariable(name: "b", line: 93, arg: 2, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "__addvsi3", linkageName: "__addvsi3", line: 94, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: null, type: !4)
 !2 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
 !12 = !DIFile(filename: "libgcc2.c", directory: "/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !14)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !6, !6}
 !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SItype", line: 152, file: !12, baseType: !8)
diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
index cfaffd8234ba..171b6d2bcc5c 100644
--- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
+++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -1,36 +1,36 @@
 ; RUN: llc -mtriple=arm-eabi -mattr=+neon -O0 -optimize-regalloc -regalloc=basic %s -o /dev/null
 
 ; This test would crash the rewriter when trying to handle a spill after one of
-; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
+; the @llvm.arm.neon.vld3.v8i8.p0i8 defined three parts of a register.
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
 
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
 
 define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
-  %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
   %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
-  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
   %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
-  %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
   %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
-  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
   %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
-  %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
   %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
-  %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
   %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
   %tmp2bd = add <8 x i8> %tmp2b, %tmp2d           ; <<8 x i8>> [#uses=1]
   %tmp4bd = add <8 x i8> %tmp4b, %tmp4d           ; <<8 x i8>> [#uses=1]
   %tmp2abcd = mul <8 x i8> undef, %tmp2bd         ; <<8 x i8>> [#uses=1]
   %tmp4abcd = mul <8 x i8> undef, %tmp4bd         ; <<8 x i8>> [#uses=2]
-  call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
+  call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
   %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f           ; <<8 x i8>> [#uses=1]
   %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h           ; <<8 x i8>> [#uses=1]
   %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h  ; <<8 x i8>> [#uses=1]
@@ -38,8 +38,8 @@ define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A
   %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh       ; <<8 x i8>> [#uses=1]
   %tmp3efgh = mul <8 x i8> undef, %tmp3gh         ; <<8 x i8>> [#uses=1]
   %tmp4efgh = mul <8 x i8> %tmp4ef, undef         ; <<8 x i8>> [#uses=2]
-  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
+  call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
   %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd       ; <<8 x i8>> [#uses=1]
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
+  tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
   ret <8 x i8> %tmp4
 }
diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
index 6a6ccf3d0a01..c6c0e2caee42 100644
--- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll
+++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -36,8 +36,8 @@ entry:
   %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
   %19 = fmul <4 x float> %tmp5, %2
   %20 = bitcast float* %fltp to i8*
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %20, <4 x float> %19, i32 1)
   ret void
 }
 
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
index f86c3ba9ef6e..1deb98631a4f 100644
--- a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
+++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -12,8 +12,8 @@ entry:
  %tmp9 = trunc i128 %tmp8 to i64                 ; <i64> [#uses=1]
  %tmp16.i = bitcast i64 %tmp6 to <8 x i8>        ; <<8 x i8>> [#uses=1]
  %tmp20.i = bitcast i64 %tmp9 to <8 x i8>        ; <<8 x i8>> [#uses=1]
- tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
+ tail call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
  ret void
 }
 
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
index bead8d9781e8..47a5ef0bc544 100755
--- a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
+++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -20,7 +20,7 @@
 @.str51 = external constant [45 x i8]             ; <[45 x i8]*> [#uses=1]
 @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1]
 
-@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
+@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32), void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
 
 declare i8* @__dynamic_cast(i8*, i8*, i8*, i32)
 
diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
index 95bb2769759e..38b352c473b1 100644
--- a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
+++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -47,19 +47,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.lv.fn = !{!0, !8, !10, !12}
 !llvm.dbg.gv = !{!14}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "buf", line: 4, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4)
+!0 = !DILocalVariable(name: "buf", line: 4, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "x0", linkageName: "x0", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !26, scope: null, type: !4)
 !2 = !DIFile(filename: "t.c", directory: "/private/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.0", isOptimized: true, file: !26)
 !4 = !DISubroutineType(types: !5)
 !5 = !{null}
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !26, scope: !2, baseType: !7)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nbytes", line: 4, arg: 0, scope: !1, file: !2, type: !9)
+!8 = !DILocalVariable(name: "nbytes", line: 4, arg: 2, scope: !1, file: !2, type: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned long", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nread", line: 6, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "nread", line: 6, scope: !11, file: !2, type: !9)
 !11 = distinct !DILexicalBlock(line: 5, column: 1, file: !26, scope: !1)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !11, file: !2, type: !13)
+!12 = !DILocalVariable(name: "c", line: 7, scope: !11, file: !2, type: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !DIGlobalVariable(name: "length", linkageName: "length", line: 1, isLocal: false, isDefinition: true, scope: !2, file: !2, type: !13, variable: i32* @length)
 !15 = !DILocation(line: 4, column: 24, scope: !1)
diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
index 1aee5088eee4..130221d38c23 100644
--- a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
+++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -16,10 +16,10 @@ target triple = "thumbv7-apple-darwin10"
 
 define i32 @test(i8* %arg) nounwind {
 entry:
- %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1)
+ %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %arg, i32 1)
  %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2>
  store <2 x i64> %1, <2 x i64>* undef, align 16
  ret i32 undef
 }
 
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 953e2bbf291c..14ddb59b5387 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -3,7 +3,7 @@
 
 %struct.SVal = type { i8*, i32 }
 
-define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp !dbg !17 {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
@@ -31,7 +31,7 @@ return:                                           ; preds = %bb2
   ret i32 %.0, !dbg !29
 }
 
-define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2  {
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2  !dbg !16 {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
@@ -47,7 +47,7 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !20 {
 entry:
   %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
   %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
@@ -80,7 +80,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14)
 !1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !48, elements: !4)
 !2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports:  !47)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !48, enums: !47, retainedTypes: !47, subprograms: !46, globals: !47, imports:  !47)
 !4 = !{!5, !7, !0, !9}
 !5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !48, scope: !1, baseType: !6)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, baseType: null)
@@ -93,35 +93,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !DISubroutineType(types: !15)
 !15 = !{null, !12}
-!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
-!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!16 = distinct !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !1, type: !14)
+!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !18)
 !18 = !DISubroutineType(types: !19)
 !19 = !{!13, !13, !1}
-!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21, function: i32 ()* @main)
+!20 = distinct !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !48, scope: !2, type: !21)
 !21 = !DISubroutineType(types: !22)
 !22 = !{!13}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!23 = !DILocalVariable(name: "i", line: 16, arg: 1, scope: !17, file: !2, type: !13)
 !24 = !DILocation(line: 16, scope: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!25 = !DILocalVariable(name: "location", line: 16, arg: 2, scope: !17, file: !2, type: !26)
 !26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !48, scope: !2, baseType: !1)
 !27 = !DILocation(line: 17, scope: !28)
 !28 = distinct !DILexicalBlock(line: 16, column: 0, file: !2, scope: !17)
 !29 = !DILocation(line: 18, scope: !28)
 !30 = !DILocation(line: 20, scope: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!31 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !16, file: !2, type: !32)
 !32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !48, scope: !2, baseType: !33)
 !33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !48, scope: !2, baseType: !1)
 !34 = !DILocation(line: 11, scope: !16)
 !35 = !DILocation(line: 11, scope: !36)
 !36 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !37)
 !37 = distinct !DILexicalBlock(line: 11, column: 0, file: !48, scope: !16)
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!38 = !DILocalVariable(name: "v", line: 24, scope: !39, file: !2, type: !1)
 !39 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !40)
 !40 = distinct !DILexicalBlock(line: 23, column: 0, file: !48, scope: !20)
 !41 = !DILocation(line: 24, scope: !39)
 !42 = !DILocation(line: 25, scope: !39)
 !43 = !DILocation(line: 26, scope: !39)
-!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!44 = !DILocalVariable(name: "k", line: 26, scope: !39, file: !2, type: !13)
 !45 = !DILocation(line: 27, scope: !39)
 !46 = !{!16, !17, !20}
 !47 = !{}
diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
index 9a5baf21b8fb..d5eed8b6a2c4 100644
--- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -28,7 +28,7 @@ target triple = "thumbv7-apple-darwin10"
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:    DW_AT_location [DW_FORM_exprloc]        (<0x8> 03 [[ADDR]] 10 01 22  )
 
-define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
   %0 = load i8, i8* @x1, align 4, !dbg !30
@@ -39,7 +39,7 @@ entry:
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize !dbg !6 {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !18, metadata !DIExpression()), !dbg !32
   %0 = load i8, i8* @x2, align 4, !dbg !32
@@ -48,7 +48,7 @@ entry:
   ret i8 %0, !dbg !33
 }
 
-define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize !dbg !7 {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !21, metadata !DIExpression()), !dbg !34
   %0 = load i8, i8* @x3, align 4, !dbg !34
@@ -57,7 +57,7 @@ entry:
   ret i8 %0, !dbg !35
 }
 
-define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize !dbg !8 {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !24, metadata !DIExpression()), !dbg !36
   %0 = load i8, i8* @x4, align 4, !dbg !36
@@ -66,7 +66,7 @@ entry:
   ret i8 %0, !dbg !37
 }
 
-define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize !dbg !9 {
 entry:
   tail call void @llvm.dbg.value(metadata i8 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
   %0 = load i8, i8* @x5, align 4, !dbg !38
@@ -78,35 +78,35 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!49}
 
-!0 = !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, function: i8 (i8)* @get1, variables: !42)
+!0 = distinct !DISubprogram(name: "get1", linkageName: "get1", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !47, scope: !1, type: !3, variables: !42)
 !1 = !DIFile(filename: "foo.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports:  !48)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", isOptimized: true, emissionKind: 0, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports:  !48)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "_Bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!6 = !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, function: i8 (i8)* @get2, variables: !43)
-!7 = !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, function: i8 (i8)* @get3, variables: !44)
-!8 = !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, function: i8 (i8)* @get4, variables: !45)
-!9 = !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, function: i8 (i8)* @get5, variables: !46)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 0, scope: !0, file: !1, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !12, file: !1, type: !5)
+!6 = distinct !DISubprogram(name: "get2", linkageName: "get2", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !47, scope: !1, type: !3, variables: !43)
+!7 = distinct !DISubprogram(name: "get3", linkageName: "get3", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !47, scope: !1, type: !3, variables: !44)
+!8 = distinct !DISubprogram(name: "get4", linkageName: "get4", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !47, scope: !1, type: !3, variables: !45)
+!9 = distinct !DISubprogram(name: "get5", linkageName: "get5", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !47, scope: !1, type: !3, variables: !46)
+!10 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(name: "b", line: 4, scope: !12, file: !1, type: !5)
 !12 = distinct !DILexicalBlock(line: 4, column: 0, file: !47, scope: !0)
 !13 = !DIGlobalVariable(name: "x1", line: 3, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x1)
 !14 = !DIGlobalVariable(name: "x2", line: 6, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x2)
 !15 = !DIGlobalVariable(name: "x3", line: 9, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x3)
 !16 = !DIGlobalVariable(name: "x4", line: 12, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x4)
 !17 = !DIGlobalVariable(name: "x5", line: 15, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !5, variable: i8* @x5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 0, scope: !6, file: !1, type: !5)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !20, file: !1, type: !5)
+!18 = !DILocalVariable(name: "a", line: 7, arg: 1, scope: !6, file: !1, type: !5)
+!19 = !DILocalVariable(name: "b", line: 7, scope: !20, file: !1, type: !5)
 !20 = distinct !DILexicalBlock(line: 7, column: 0, file: !47, scope: !6)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 10, arg: 0, scope: !7, file: !1, type: !5)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 10, scope: !23, file: !1, type: !5)
+!21 = !DILocalVariable(name: "a", line: 10, arg: 1, scope: !7, file: !1, type: !5)
+!22 = !DILocalVariable(name: "b", line: 10, scope: !23, file: !1, type: !5)
 !23 = distinct !DILexicalBlock(line: 10, column: 0, file: !47, scope: !7)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 13, arg: 0, scope: !8, file: !1, type: !5)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 13, scope: !26, file: !1, type: !5)
+!24 = !DILocalVariable(name: "a", line: 13, arg: 1, scope: !8, file: !1, type: !5)
+!25 = !DILocalVariable(name: "b", line: 13, scope: !26, file: !1, type: !5)
 !26 = distinct !DILexicalBlock(line: 13, column: 0, file: !47, scope: !8)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 16, arg: 0, scope: !9, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !29, file: !1, type: !5)
+!27 = !DILocalVariable(name: "a", line: 16, arg: 1, scope: !9, file: !1, type: !5)
+!28 = !DILocalVariable(name: "b", line: 16, scope: !29, file: !1, type: !5)
 !29 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !9)
 !30 = !DILocation(line: 4, scope: !0)
 !31 = !DILocation(line: 4, scope: !12)
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
index aac8f7b3a026..1097050df54b 100644
--- a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -arm-global-merge -global-merge-group-by-use=false | FileCheck %s
-; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
+; CHECK: .zerofill __DATA,__bss,l__MergedGlobals,16,2
 
 @prev = external global [0 x i16]
 @max_lazy_match = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
index 067c719f491c..3d82e706862c 100644
--- a/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
+++ b/test/CodeGen/ARM/2011-08-02-MergedGlobalDbg.ll
@@ -28,7 +28,7 @@ target triple = "thumbv7-apple-macosx10.7.0"
 @x4 = internal unnamed_addr global i32 4, align 4
 @x5 = global i32 0, align 4
 
-define i32 @get1(i32 %a) nounwind optsize ssp {
+define i32 @get1(i32 %a) nounwind optsize ssp !dbg !1 {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !30
   %1 = load i32, i32* @x1, align 4, !dbg !31
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !11, metadata !DIExpression()), !dbg !31
@@ -36,7 +36,7 @@ define i32 @get1(i32 %a) nounwind optsize ssp {
   ret i32 %1, !dbg !31
 }
 
-define i32 @get2(i32 %a) nounwind optsize ssp {
+define i32 @get2(i32 %a) nounwind optsize ssp !dbg !6 {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !32
   %1 = load i32, i32* @x2, align 4, !dbg !33
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !14, metadata !DIExpression()), !dbg !33
@@ -44,7 +44,7 @@ define i32 @get2(i32 %a) nounwind optsize ssp {
   ret i32 %1, !dbg !33
 }
 
-define i32 @get3(i32 %a) nounwind optsize ssp {
+define i32 @get3(i32 %a) nounwind optsize ssp !dbg !7 {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !DIExpression()), !dbg !34
   %1 = load i32, i32* @x3, align 4, !dbg !35
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !17, metadata !DIExpression()), !dbg !35
@@ -52,7 +52,7 @@ define i32 @get3(i32 %a) nounwind optsize ssp {
   ret i32 %1, !dbg !35
 }
 
-define i32 @get4(i32 %a) nounwind optsize ssp {
+define i32 @get4(i32 %a) nounwind optsize ssp !dbg !8 {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !19, metadata !DIExpression()), !dbg !36
   %1 = load i32, i32* @x4, align 4, !dbg !37
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !20, metadata !DIExpression()), !dbg !37
@@ -60,7 +60,7 @@ define i32 @get4(i32 %a) nounwind optsize ssp {
   ret i32 %1, !dbg !37
 }
 
-define i32 @get5(i32 %a) nounwind optsize ssp {
+define i32 @get5(i32 %a) nounwind optsize ssp !dbg !9 {
   tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !27, metadata !DIExpression()), !dbg !38
   %1 = load i32, i32* @x5, align 4, !dbg !39
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !28, metadata !DIExpression()), !dbg !39
@@ -73,32 +73,32 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!49}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports:  !48)
-!1 = !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, function: i32 (i32)* @get1, variables: !42)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !40, globals: !41, imports:  !48)
+!1 = distinct !DISubprogram(name: "get1", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !47, scope: !2, type: !3, variables: !42)
 !2 = !DIFile(filename: "ss3.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, function: i32 (i32)* @get2, variables: !43)
-!7 = !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, function: i32 (i32)* @get3, variables: !44)
-!8 = !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, function: i32 (i32)* @get4, variables: !45)
-!9 = !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, function: i32 (i32)* @get5, variables: !46)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 5, scope: !12, file: !2, type: !5)
+!6 = distinct !DISubprogram(name: "get2", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !47, scope: !2, type: !3, variables: !43)
+!7 = distinct !DISubprogram(name: "get3", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !47, scope: !2, type: !3, variables: !44)
+!8 = distinct !DISubprogram(name: "get4", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !47, scope: !2, type: !3, variables: !45)
+!9 = distinct !DISubprogram(name: "get5", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !47, scope: !2, type: !3, variables: !46)
+!10 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !1, file: !2, type: !5)
+!11 = !DILocalVariable(name: "b", line: 5, scope: !12, file: !2, type: !5)
 !12 = distinct !DILexicalBlock(line: 5, column: 19, file: !47, scope: !1)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 8, scope: !15, file: !2, type: !5)
+!13 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !6, file: !2, type: !5)
+!14 = !DILocalVariable(name: "b", line: 8, scope: !15, file: !2, type: !5)
 !15 = distinct !DILexicalBlock(line: 8, column: 17, file: !47, scope: !6)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 11, scope: !18, file: !2, type: !5)
+!16 = !DILocalVariable(name: "a", line: 11, arg: 1, scope: !7, file: !2, type: !5)
+!17 = !DILocalVariable(name: "b", line: 11, scope: !18, file: !2, type: !5)
 !18 = distinct !DILexicalBlock(line: 11, column: 19, file: !47, scope: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 14, scope: !21, file: !2, type: !5)
+!19 = !DILocalVariable(name: "a", line: 14, arg: 1, scope: !8, file: !2, type: !5)
+!20 = !DILocalVariable(name: "b", line: 14, scope: !21, file: !2, type: !5)
 !21 = distinct !DILexicalBlock(line: 14, column: 19, file: !47, scope: !8)
 !25 = !DIGlobalVariable(name: "x1", line: 4, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x1)
 !26 = !DIGlobalVariable(name: "x2", line: 7, isLocal: true, isDefinition: true, scope: !0, file: !2, type: !5, variable: i32* @x2)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 17, scope: !29, file: !2, type: !5)
+!27 = !DILocalVariable(name: "a", line: 17, arg: 1, scope: !9, file: !2, type: !5)
+!28 = !DILocalVariable(name: "b", line: 17, scope: !29, file: !2, type: !5)
 !29 = distinct !DILexicalBlock(line: 17, column: 19, file: !47, scope: !9)
 !30 = !DILocation(line: 5, column: 16, scope: !1)
 !31 = !DILocation(line: 5, column: 32, scope: !12)
diff --git a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
index 3cbc4cdcd707..d702af7c0c70 100644
--- a/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
+++ b/test/CodeGen/ARM/2011-08-12-vmovqqqq-pseudo.ll
@@ -4,9 +4,9 @@
 
 define void @test_vmovqqqq_pseudo() nounwind ssp {
 entry:
-  %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
+  %vld3_lane = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> zeroinitializer, i32 7, i32 2)
   store { <8 x i16>, <8 x i16>, <8 x i16> } %vld3_lane, { <8 x i16>, <8 x i16>, <8 x i16> }* undef
   ret void
 }
 
-declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/2011-10-26-memset-inline.ll b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
index 17bd291a6b55..5df439389cdb 100644
--- a/test/CodeGen/ARM/2011-10-26-memset-inline.ll
+++ b/test/CodeGen/ARM/2011-10-26-memset-inline.ll
@@ -1,5 +1,5 @@
 ; Make sure short memsets on ARM lower to stores, even when optimizing for size.
-; RUN: llc -march=arm < %s | FileCheck %s -check-prefix=CHECK-GENERIC
+; RUN: llc -march=arm -mattr=+strict-align < %s | FileCheck %s -check-prefix=CHECK-GENERIC
 ; RUN: llc -march=arm -mcpu=cortex-a8 < %s | FileCheck %s -check-prefix=CHECK-UNALIGNED
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
diff --git a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
index b70b7f6f3b2e..f622ceb584e6 100644
--- a/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
+++ b/test/CodeGen/ARM/2012-01-24-RegSequenceLiveRange.ll
@@ -52,8 +52,8 @@ cond.end295:                                      ; preds = %entry
   %shuffle.i35.i.i = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
   %shuffle.i34.i.i = shufflevector <1 x i64> %shuffle.i36.i.i, <1 x i64> %shuffle.i35.i.i, <2 x i32> <i32 0, i32 1>
   %2 = bitcast <2 x i64> %shuffle.i34.i.i to <4 x float>
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %0, i32 4) nounwind
+  tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* undef, <4 x float> %2, i32 4) nounwind
   unreachable
 
 for.end:                                          ; preds = %entry
@@ -63,10 +63,10 @@ for.end:                                          ; preds = %entry
 ; Check that pseudo-expansion preserves <undef> flags.
 define void @foo3(i8* %p) nounwind ssp {
 entry:
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %p, <4 x float> undef, <4 x float> undef, i32 4)
   ret void
 }
 
 declare arm_aapcs_vfpcc void @bar(i8*, float, float, float)
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
index 7f30ae10e436..606af47a3d8e 100644
--- a/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
+++ b/test/CodeGen/ARM/2012-05-10-PreferVMOVtoVDUP32.ll
@@ -7,8 +7,8 @@ entry:
   %vecinit.i = insertelement <2 x i32> undef, i32 %x, i32 0
   %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %x, i32 1
   %0 = bitcast i32* %p to i8*
-  tail call void @llvm.arm.neon.vst1.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
+  tail call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %0, <2 x i32> %vecinit1.i, i32 4)
   ret void
 }
 
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind
diff --git a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
index 545bfc73c590..6cff67614c64 100644
--- a/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
+++ b/test/CodeGen/ARM/2012-08-27-CopyPhysRegCrash.ll
@@ -5,9 +5,9 @@
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios5.1.0"
 
-declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind
 
 define void @findEdges(i8*) nounwind ssp {
   %2 = icmp sgt i32 undef, 0
@@ -19,16 +19,16 @@ define void @findEdges(i8*) nounwind ssp {
 
 ; <label>:5                                       ; preds = %5, %1
   %6 = phi i8* [ %19, %5 ], [ %0, %1 ]
-  %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* null, i32 1)
+  %7 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* null, i32 1)
   %8 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %7, 0
   %9 = getelementptr inbounds i8, i8* null, i32 3
-  %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %9, i32 1)
+  %10 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %9, i32 1)
   %11 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %10, 2
-  %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %6, i32 1)
+  %12 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %6, i32 1)
   %13 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 0
   %14 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %12, 1
   %15 = getelementptr inbounds i8, i8* %6, i32 3
-  %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8(i8* %15, i32 1)
+  %16 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.arm.neon.vld3.v16i8.p0i8(i8* %15, i32 1)
   %17 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 1
   %18 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %16, 2
   %19 = getelementptr inbounds i8, i8* %6, i32 48
@@ -111,7 +111,7 @@ define void @findEdges(i8*) nounwind ssp {
   %96 = bitcast <8 x i8> %94 to <1 x i64>
   %97 = shufflevector <1 x i64> %95, <1 x i64> %96, <2 x i32> <i32 0, i32 1>
   %98 = bitcast <2 x i64> %97 to <16 x i8>
-  tail call void @llvm.arm.neon.vst1.v16i8(i8* null, <16 x i8> %98, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* null, <16 x i8> %98, i32 1)
   %99 = icmp slt i32 undef, undef
   br i1 %99, label %5, label %3
 }
diff --git a/test/CodeGen/ARM/2012-11-14-subs_carry.ll b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
index 33083303a3d4..f7abac06919d 100644
--- a/test/CodeGen/ARM/2012-11-14-subs_carry.ll
+++ b/test/CodeGen/ARM/2012-11-14-subs_carry.ll
@@ -1,10 +1,14 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s
 
 ;CHECK-LABEL: foo:
-;CHECK: adds
-;CHECK-NEXT: adc
-;CHECK-NEXT: bx
+;CHECK: movs r0, #0
+;CHECK-NEXT: bx lr
 
+; Note: This test case originally checked, per r167963, for:
+;       adds
+;       adc
+;       bx
+; But SDAG now, like InstCombine, can fold everything away.
 ;rdar://12028498
 
 define i32 @foo() nounwind ssp {
diff --git a/test/CodeGen/ARM/2013-10-11-select-stalls.ll b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
index d6045c7b8c8c..2c15c1a943ba 100644
--- a/test/CodeGen/ARM/2013-10-11-select-stalls.ll
+++ b/test/CodeGen/ARM/2013-10-11-select-stalls.ll
@@ -1,16 +1,21 @@
 ; REQUIRES: asserts
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -stats 2>&1 | not grep "Number of pipeline stalls"
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -disable-ifcvt-diamond -stats 2>&1 | FileCheck %s
 ; Evaluate the two vld1.8 instructions in separate MBB's,
 ; instead of stalling on one and conditionally overwriting its result.
+;
+; Update: After if-conversion the two vld1.8 instructions are in the same MBB
+; again. So we disable this if-conversion to eliminate its influence to this
+; test.
 
+; CHECK-NOT: Number of pipeline stalls
 define <16 x i8> @multiselect(i32 %avail, i8* %foo, i8* %bar) {
 entry:
-  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %foo, i32 1)
-  %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %foo, i32 1)
+  %vld2 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %bar, i32 1)
   %and = and i32 %avail, 3
   %tobool = icmp eq i32 %and, 0
   %retv = select i1 %tobool, <16 x i8> %vld1, <16 x i8> %vld2
   ret <16 x i8> %retv
 }
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* , i32 )
diff --git a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
index ef575f4c41ec..be87a2fb1c89 100644
--- a/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
+++ b/test/CodeGen/ARM/2014-01-09-pseudo_expand_implicit_reg.ll
@@ -27,7 +27,7 @@ entry:
   %n0 = insertelement <2 x i64> undef, i64 %tmp0, i32 0
   %n1 = insertelement <2 x i64> %n0, i64 %tmp1, i32 1
 
-  call void @llvm.arm.neon.vst4.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
+  call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %m, <1 x i64> %s0, <1 x i64> %s1, <1 x i64> %s2, <1 x i64> %s3, i32 8)
 
   call void @bar(<2 x i64> %n1)
 
@@ -50,7 +50,7 @@ define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C
 	ret <8 x i8> %tmp8
 }
 
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
+declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32)
 declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
 declare void @bar2(%struct.__neon_int8x8x4_t, <8 x i8>)
 declare void @bar(<2 x i64> %arg)
diff --git a/test/CodeGen/ARM/MachO-subtypes.ll b/test/CodeGen/ARM/MachO-subtypes.ll
new file mode 100644
index 000000000000..8176d6640847
--- /dev/null
+++ b/test/CodeGen/ARM/MachO-subtypes.ll
@@ -0,0 +1,68 @@
+; Check that MachO ARM CPU Subtypes are respected
+
+; RUN: llc -mtriple=armv4t-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V4T
+
+; RUN: llc -mtriple=armv5-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5e-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5t-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5te-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+; RUN: llc -mtriple=armv5tej-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V5
+
+; RUN: llc -mtriple=armv6-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=armv6k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=thumbv6-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+; RUN: llc -mtriple=thumbv6k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6
+
+; RUN: llc -mtriple=armv6m-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6M
+; RUN: llc -mtriple=thumbv6m-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V6M
+
+; RUN: llc -mtriple=armv7-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7
+; RUN: llc -mtriple=thumbv7-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7
+
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m4 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7EM
+; RUN: llc -mtriple=thumbv7em-apple-darwin -mcpu=cortex-m7 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7EM
+
+; RUN: llc -mtriple=armv7k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7K
+; RUN: llc -mtriple=thumbv7k-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7K
+
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=sc300 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7M
+; RUN: llc -mtriple=thumbv7m-apple-darwin -mcpu=cortex-m3 -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7M
+
+; RUN: llc -mtriple=armv7s-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7S
+; RUN: llc -mtriple=thumbv7s-apple-darwin -filetype=obj -o - < %s \
+; RUN: | llvm-readobj -file-headers | FileCheck %s --check-prefix=CHECK-V7S
+
+define void @_test() {
+  ret void
+}
+
+; CHECK-V4T:   CpuSubType: CPU_SUBTYPE_ARM_V4T (0x5)
+; CHECK-V5:   CpuSubType: CPU_SUBTYPE_ARM_V5 (0x7)
+; CHECK-V6:   CpuSubType: CPU_SUBTYPE_ARM_V6 (0x6)
+; CHECK-V6M:   CpuSubType: CPU_SUBTYPE_ARM_V6M (0xE)
+; CHECK-V7:   CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+; CHECK-V7EM:   CpuSubType: CPU_SUBTYPE_ARM_V7EM (0x10)
+; CHECK-V7K:   CpuSubType: CPU_SUBTYPE_ARM_V7K (0xC)
+; CHECK-V7M:   CpuSubType: CPU_SUBTYPE_ARM_V7M (0xF)
+; CHECK-V7S:   CpuSubType: CPU_SUBTYPE_ARM_V7S (0xB)
diff --git a/test/CodeGen/ARM/Windows/division.ll b/test/CodeGen/ARM/Windows/division.ll
new file mode 100644
index 000000000000..b3ef9c6d278b
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/division.ll
@@ -0,0 +1,38 @@
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc i32 @sdiv32(i32 %divisor, i32 %divident) {
+entry:
+  %div = sdiv i32 %divident, %divisor
+  ret i32 %div
+}
+
+; CHECK-LABEL: sdiv32
+; CHECK: b __rt_sdiv
+
+define arm_aapcs_vfpcc i32 @udiv32(i32 %divisor, i32 %divident) {
+entry:
+  %div = udiv i32 %divident, %divisor
+  ret i32 %div
+}
+
+; CHECK-LABEL: udiv32:
+; CHECK: b __rt_udiv
+
+define arm_aapcs_vfpcc i64 @sdiv64(i64 %divisor, i64 %divident) {
+entry:
+  %div = sdiv i64 %divident, %divisor
+  ret i64 %div
+}
+
+; CHECK-LABEL: sdiv64
+; CHECK: bl __rt_sdiv64
+
+define arm_aapcs_vfpcc i64 @udiv64(i64 %divisor, i64 %divident) {
+entry:
+  %div = udiv i64 %divident, %divisor
+  ret i64 %div
+}
+
+; CHECK-LABEL: udiv64:
+; CHECK: bl __rt_udiv64
diff --git a/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll b/test/CodeGen/ARM/Windows/libcalls.ll
similarity index 88%
rename from test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
rename to test/CodeGen/ARM/Windows/libcalls.ll
index acf21a1caad3..d8b498a40d97 100644
--- a/test/CodeGen/ARM/Windows/integer-floating-point-conversion.ll
+++ b/test/CodeGen/ARM/Windows/libcalls.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-windows-itanium -filetype asm -o - %s | FileCheck %s
+; RUN: llc -mtriple thumbv7-windows-msvc -filetype asm -o - %s | FileCheck %s
 
 define arm_aapcs_vfpcc i64 @stoi64(float %f) {
 entry:
diff --git a/test/CodeGen/ARM/Windows/no-eabi.ll b/test/CodeGen/ARM/Windows/no-eabi.ll
new file mode 100644
index 000000000000..033ca0267ee0
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-eabi.ll
@@ -0,0 +1,10 @@
+; RUN: llc -O3 -mtriple thumbv7-windows %s -filetype asm -o - | FileCheck -check-prefix CHECK-NONEABI %s
+; RUN: llc -O3 -mtriple armv7--linux-gnueabi %s -filetype asm -o - | FileCheck -check-prefix CHECK-EABI %s
+
+define arm_aapcs_vfpcc void @function() {
+  ret void
+}
+
+; CHECK-EABI: .eabi_attribute
+; CHECK-NONEABI-NOT: .eabi_attribute
+
diff --git a/test/CodeGen/ARM/Windows/no-frame-register.ll b/test/CodeGen/ARM/Windows/no-frame-register.ll
new file mode 100644
index 000000000000..80187af7ef22
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/no-frame-register.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple thumbv7-windows -filetype asm -o - %s | FileCheck %s
+
+declare void @callee(i32)
+
+define i32 @calleer(i32 %i) {
+entry:
+  %i.addr = alloca i32, align 4
+  %j = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %j, align 4
+  %1 = load i32, i32* %j, align 4
+  call void @callee(i32 %1)
+  %2 = load i32, i32* %j, align 4
+  %add1 = add nsw i32 %2, 1
+  ret i32 %add1
+}
+
+; CHECK-NOT: push.w {r7, lr}
+; CHECK: push.w {r11, lr}
+
diff --git a/test/CodeGen/ARM/Windows/overflow.ll b/test/CodeGen/ARM/Windows/overflow.ll
new file mode 100644
index 000000000000..5f74f25ac224
--- /dev/null
+++ b/test/CodeGen/ARM/Windows/overflow.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple thumbv7-windows-gnu -filetype asm -o - %s
+
+define i32 @divsoverflow32(i32 %a, i32 %b) {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = load i32, i32* %1, align 4
+  %4 = load i32, i32* %2, align 4
+  %5 = sub nsw i32 0, %4
+  %6 = sdiv i32 -2147483647, %3
+  %7 = icmp sgt i32 %5, %6
+  br i1 %7, label %8, label %9
+  call void (...) @abort_simpl32()
+  unreachable
+  %10 = load i32, i32* %1, align 4
+  %11 = load i32, i32* %2, align 4
+  %12 = mul nsw i32 %10, %11
+  ret i32 %12
+}
+
+declare void @abort_simpl32(...)
+
+define i64 @divsoverflow64(i64 %a, i64 %b) {
+  %1 = alloca i64, align 8
+  %2 = alloca i64, align 8
+  %3 = load i64, i64* %1, align 8
+  %4 = load i64, i64* %2, align 8
+  %5 = sub nsw i64 0, %4
+  %6 = sdiv i64 -9223372036854775808, %3
+  %7 = icmp sgt i64 %5, %6
+  br i1 %7, label %8, label %9
+  call void (...) @abort_simpl64()
+  unreachable
+  %10 = load i64, i64* %1, align 8
+  %11 = load i64, i64* %2, align 8
+  %12 = mul nsw i64 %10, %11
+  ret i64 %12
+}
+
+declare void @abort_simpl64(...)
+
+define i32 @divuoverflow32(i32 %a, i32 %b) {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  %3 = load i32, i32* %1, align 4
+  %4 = load i32, i32* %2, align 4
+  %5 = sub nsw i32 0, %4
+  %6 = udiv i32 4294967296, %3
+  %7 = icmp sgt i32 %5, %6
+  br i1 %7, label %8, label %9
+  call void (...) @abort_uimpl32()
+  unreachable
+  %10 = load i32, i32* %1, align 4
+  %11 = load i32, i32* %2, align 4
+  %12 = mul nsw i32 %10, %11
+  ret i32 %12
+}
+
+declare void @abort_uimpl32(...)
+
+define i64 @divuoverflow64(i64 %a, i64 %b) {
+  %1 = alloca i64, align 8
+  %2 = alloca i64, align 8
+  %3 = load i64, i64* %1, align 8
+  %4 = load i64, i64* %2, align 8
+  %5 = sub nsw i64 0, %4
+  %6 = udiv i64 18446744073709551616, %3
+  %7 = icmp sgt i64 %5, %6
+  br i1 %7, label %8, label %9
+  call void (...) @abort_uimpl64()
+  unreachable
+  %10 = load i64, i64* %1, align 8
+  %11 = load i64, i64* %2, align 8
+  %12 = mul nsw i64 %10, %11
+  ret i64 %12
+}
+
+declare void @abort_uimpl64(...)
diff --git a/test/CodeGen/ARM/adv-copy-opt.ll b/test/CodeGen/ARM/adv-copy-opt.ll
index f71bf78b62c4..395be3457203 100644
--- a/test/CodeGen/ARM/adv-copy-opt.ll
+++ b/test/CodeGen/ARM/adv-copy-opt.ll
@@ -11,25 +11,25 @@
 ; r0 = r0 / r2
 ; r1 = r1 / r3
 ;
-; NOOPT: vmov	[[B:d[0-9]+]], r2, r3
-; NOOPT-NEXT: vmov	[[A:d[0-9]+]], r0, r1
+; NOOPT: vmov	[[A:d[0-9]+]], r0, r1
+; NOOPT-NEXT: vmov	[[B:d[0-9]+]], r2, r3
 ; Move the low part of B into a register.
 ; Unfortunately, we cannot express that the 's' register is the low
 ; part of B, i.e., sIdx == BIdx x 2. E.g., B = d1, B_low = s2.
 ; NOOPT-NEXT: vmov	[[B_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: vmov	[[A_LOW:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv	[[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
 ; NOOPT-NEXT: vmov	[[B_HIGH:r[0-9]+]], s{{[0-9]+}}
+; NOOPT-NEXT: vmov	[[A_LOW:r[0-9]+]], s{{[0-9]+}}
 ; NOOPT-NEXT: vmov	[[A_HIGH:r[0-9]+]], s{{[0-9]+}}
-; NOOPT-NEXT: udiv	[[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
+; NOOPT-NEXT: udiv	[[RES_LOW:r[0-9]+]], [[A_LOW]], [[B_LOW]]
 ; NOOPT-NEXT: vmov.32	[[RES:d[0-9]+]][0], [[RES_LOW]]
+; NOOPT-NEXT: udiv	[[RES_HIGH:r[0-9]+]], [[A_HIGH]], [[B_HIGH]]
 ; NOOPT-NEXT: vmov.32	[[RES]][1], [[RES_HIGH]]
 ; NOOPT-NEXT: vmov	r0, r1, [[RES]]
 ; NOOPT-NEXT: bx	lr
 ;
 ; OPT-NOT: vmov
-; OPT: 	udiv	r0, r0, r2
-; OPT-NEXT: udiv	r1, r1, r3
+; OPT: udiv	r1, r1, r3
+; OPT-NEXT: 	udiv	r0, r0, r2
 ; OPT-NEXT: bx	lr
 define <2 x i32> @simpleVectorDiv(<2 x i32> %A, <2 x i32> %B) nounwind {
 entry:
diff --git a/test/CodeGen/ARM/aliases.ll b/test/CodeGen/ARM/aliases.ll
index 04ca3e875487..665ffe902c81 100644
--- a/test/CodeGen/ARM/aliases.ll
+++ b/test/CodeGen/ARM/aliases.ll
@@ -2,34 +2,54 @@
 
 ; CHECK: .globl	test
 
+; CHECK: .Lstructvar:
+; CHECK: .size .Lstructvar, 8
+
 ; CHECK: .globl	foo1
 ; CHECK: foo1 = bar
+; CHECK-NOT: .size foo1
 
 ; CHECK: .globl	foo2
 ; CHECK: foo2 = bar
+; CHECK-NOT: .size foo2
 
 ; CHECK: .weak	bar_f
 ; CHECK: bar_f = foo_f
+; CHECK-NOT: .size bar_f
 
 ; CHECK: bar_i = bar
+; CHECK-NOT: .size bar_i
 
 ; CHECK: .globl	A
 ; CHECK: A = bar
+; CHECK-NOT: .size A
+
+; CHECK: .globl elem0
+; CHECK: elem0 = .Lstructvar
+; CHECK: .size elem0, 4
+
+; CHECK: .globl elem1
+; CHECK: elem1 = .Lstructvar+4
+; CHECK: .size elem1, 4
 
 @bar = global i32 42
-@foo1 = alias i32* @bar
-@foo2 = alias i32* @bar
+@foo1 = alias i32, i32* @bar
+@foo2 = alias i32, i32* @bar
 
 %FunTy = type i32()
 
 define i32 @foo_f() {
   ret i32 0
 }
-@bar_f = weak alias %FunTy* @foo_f
+@bar_f = weak alias %FunTy, %FunTy* @foo_f
 
-@bar_i = internal alias i32* @bar
+@bar_i = internal alias i32, i32* @bar
 
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, bitcast (i32* @bar to i64*)
+
+@structvar = private global {i32, i32} {i32 1, i32 2}
+@elem0 = alias i32, getelementptr({i32, i32}, {i32, i32}*  @structvar, i32 0, i32 0)
+@elem1 = alias i32, getelementptr({i32, i32}, {i32, i32}*  @structvar, i32 0, i32 1)
 
 define i32 @test() {
 entry:
diff --git a/test/CodeGen/ARM/align-sp-adjustment.ll b/test/CodeGen/ARM/align-sp-adjustment.ll
new file mode 100644
index 000000000000..cce7b03e2362
--- /dev/null
+++ b/test/CodeGen/ARM/align-sp-adjustment.ll
@@ -0,0 +1,47 @@
+; RUN: llc -mtriple=thumbv7 -o - %s | FileCheck %s
+
+; CHECK: [sp, #2120]
+
+%struct.struct_2 = type { [172 x %struct.struct_1] }
+%struct.struct_1 = type { i32, i32, i32 }
+
+@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"b\00", align 1
+@.str.2 = private unnamed_addr constant [2 x i8] c"c\00", align 1
+@.str.3 = private unnamed_addr constant [2 x i8] c"d\00", align 1
+
+declare i32* @_Z4bar3iiPKcS0_i(i32, i32, i8*, i8*, i32)
+declare void @_Z4bar1i8struct_2(i32, %struct.struct_2* byval align 4)
+declare i32 @_Z4bar2PiPKc(i32*, i8*)
+
+define void @_Z3fooiiiii(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) #0 {
+entry:
+  %params = alloca %struct.struct_2, align 4
+  %0 = bitcast %struct.struct_2* %params to i8*
+  br label %for.body
+
+for.body:
+  %i.015 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %call = tail call i32* @_Z4bar3iiPKcS0_i(i32 %p1, i32 %p5, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0), i32 %i.015) #4
+  %cmp1 = icmp eq i32* %call, null
+  br i1 %cmp1, label %cleanup.8, label %for.inc
+
+for.inc:
+  %call2 = tail call i32 @_Z4bar2PiPKc(i32* %call, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i32 0, i32 0)) #4
+  %f1 = getelementptr inbounds %struct.struct_2, %struct.struct_2* %params, i32 0, i32 0, i32 %i.015, i32 0
+  store i32 %call2, i32* %f1, align 4
+  %call3 = tail call i32 @_Z4bar2PiPKc(i32* %call, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i32 0, i32 0)) #4
+  %f2 = getelementptr inbounds %struct.struct_2, %struct.struct_2* %params, i32 0, i32 0, i32 %i.015, i32 1
+  store i32 %call3, i32* %f2, align 4
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp slt i32 %inc, 4
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  call void @_Z4bar1i8struct_2(i32 %p4, %struct.struct_2* byval nonnull align 4 %params) #4
+  br label %cleanup.8
+
+cleanup.8:
+  ret void
+}
+
diff --git a/test/CodeGen/ARM/apcs-vfp.ll b/test/CodeGen/ARM/apcs-vfp.ll
new file mode 100644
index 000000000000..9157521bfbc4
--- /dev/null
+++ b/test/CodeGen/ARM/apcs-vfp.ll
@@ -0,0 +1,153 @@
+; RUN: llc -mtriple=armv7k-apple-watchos2.0 < %s | FileCheck %s
+
+define arm_aapcs_vfpcc float @t1(float %a, float %b) {
+entry:
+; CHECK: t1
+; CHECK-NOT: vmov
+; CHECK: vadd.f32 
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %1 = load float, float* %b.addr, align 4
+  %add = fadd float %0, %1
+  ret float %add
+}
+
+define arm_aapcs_vfpcc double @t2(double %a, double %b) {
+entry:
+; CHECK: t2
+; CHECK-NOT: vmov
+; CHECK: vadd.f64
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  %0 = load double, double* %a.addr, align 8
+  %1 = load double, double* %b.addr, align 8
+  %add = fadd double %0, %1
+  ret double %add
+}
+
+define arm_aapcs_vfpcc i64 @t3(double %ti) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixunsdfdi
+  %conv = fptoui double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc i64 @t4(double %ti) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixdfdi
+  %conv = fptosi double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc double @t5(i64 %ti) {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: bl ___floatundidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t6(i64 %ti) {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: bl ___floatdidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc float @t7(i64 %ti) {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: bl ___floatundisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc float @t8(i64 %ti) {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: bl ___floatdisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t9:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd float %a, %b
+  %conv = fpext float %add to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t10:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd double %a, %c
+  ret double %add
+}
+
+define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: vldr
+  %add = fadd float %a, %c
+  ret float %add
+}
+
+define arm_aapcs_vfpcc double @t12(double %a, double %b) {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: vstr
+  %add = fadd double %a, %b
+  %sub = fsub double %a, %b
+  %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub)
+  ret double %call
+}
+
+define arm_aapcs_vfpcc double @t13(double %x) {
+entry:
+; CHECK-LABEL: t13:
+; CHECK-NOT: vmov
+; CHECK: bl ___sincos_stret
+  %call = tail call arm_aapcs_vfpcc double @cos(double %x)
+  %call1 = tail call arm_aapcs_vfpcc double @sin(double %x)
+  %mul = fmul double %call, %call1
+  ret double %mul
+}
+
+define arm_aapcs_vfpcc double @t14(double %x) {
+; CHECK-LABEL: t14:
+; CHECK-NOT: vmov
+; CHECK: b ___exp10
+  %__exp10 = tail call double @__exp10(double %x) #1
+  ret double %__exp10
+}
+
+declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double)
+declare arm_aapcs_vfpcc double @cos(double) #0
+declare arm_aapcs_vfpcc double @sin(double) #0
+declare double @__exp10(double)
+
+attributes #0 = { readnone }
+attributes #1 = { readonly }
diff --git a/test/CodeGen/ARM/arm-eabi.ll b/test/CodeGen/ARM/arm-eabi.ll
new file mode 100644
index 000000000000..d1e7a947553f
--- /dev/null
+++ b/test/CodeGen/ARM/arm-eabi.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=gnu -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=4 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -meabi=5 -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+
+%struct.my_s = type { [18 x i32] }
+
+define void @foo(i32* %t) {
+  ; CHECK-LABEL: foo
+
+  %1 = alloca i32*, align 4
+  store i32* %t, i32** %1, align 4
+  %2 = load i32*, i32** %1, align 4
+  %3 = bitcast i32* %2 to %struct.my_s*
+  %4 = bitcast %struct.my_s* %3 to i8*
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %4, i8* inttoptr (i32 1 to i8*), i32 72, i32 4, i1 false)
+  ret void
+}
+
+define void @f1(i8* %dest, i8* %src) {
+entry:
+  ; CHECK-LABEL: f1
+
+  ; memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+  ; memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
+
+  ; memset
+  ; CHECK-EABI: mov r2, #1
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll
index 9a9885ccdd0c..002e71f6d9b8 100644
--- a/test/CodeGen/ARM/arm-interleaved-accesses.ll
+++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON
+; RUN: llc -mtriple=arm-eabi -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON
 
-; CHECK-LABEL: load_factor2:
-; CHECK: vld2.8 {d16, d17}, [r0]
+; NEON-LABEL: load_factor2:
+; NEON: vld2.8 {d16, d17}, [r0]
+; NONEON-LABEL: load_factor2:
+; NONEON-NOT: vld2
 define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
   %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
   %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -10,8 +13,10 @@ define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
   ret <8 x i8> %add
 }
 
-; CHECK-LABEL: load_factor3:
-; CHECK: vld3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: load_factor3:
+; NEON: vld3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: load_factor3:
+; NONEON-NOT: vld3
 define <2 x i32> @load_factor3(i32* %ptr) {
   %base = bitcast i32* %ptr to <6 x i32>*
   %wide.vec = load <6 x i32>, <6 x i32>* %base, align 4
@@ -21,9 +26,11 @@ define <2 x i32> @load_factor3(i32* %ptr) {
   ret <2 x i32> %add
 }
 
-; CHECK-LABEL: load_factor4:
-; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: load_factor4:
+; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: load_factor4:
+; NONEON-NOT: vld4
 define <4 x i32> @load_factor4(i32* %ptr) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -33,17 +40,21 @@ define <4 x i32> @load_factor4(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: store_factor2:
-; CHECK: vst2.8 {d16, d17}, [r0]
+; NEON-LABEL: store_factor2:
+; NEON: vst2.8 {d16, d17}, [r0]
+; NONEON-LABEL: store_factor2:
+; NONEON-NOT: vst2
 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) {
   %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
   store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4
   ret void
 }
 
-; CHECK-LABEL: store_factor3:
-; CHECK: vst3.32 {d16, d18, d20}, [r0]!
-; CHECK: vst3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: store_factor3:
+; NEON: vst3.32 {d16, d18, d20}, [r0]!
+; NEON: vst3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: store_factor3:
+; NONEON-NOT: vst3.32
 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -53,9 +64,11 @@ define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
   ret void
 }
 
-; CHECK-LABEL: store_factor4:
-; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: store_factor4:
+; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: store_factor4:
+; NONEON-NOT: vst4
 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -68,8 +81,10 @@ define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v
 ; The following cases test that interleaved access of pointer vectors can be
 ; matched to ldN/stN instruction.
 
-; CHECK-LABEL: load_ptrvec_factor2:
-; CHECK: vld2.32 {d16, d17}, [r0]
+; NEON-LABEL: load_ptrvec_factor2:
+; NEON: vld2.32 {d16, d17}, [r0]
+; NONEON-LABEL: load_ptrvec_factor2:
+; NONEON-NOT: vld2
 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
   %base = bitcast i32** %ptr to <4 x i32*>*
   %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4
@@ -77,8 +92,10 @@ define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) {
   ret <2 x i32*> %strided.v0
 }
 
-; CHECK-LABEL: load_ptrvec_factor3:
-; CHECK: vld3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: load_ptrvec_factor3:
+; NEON: vld3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: load_ptrvec_factor3:
+; NONEON-NOT: vld3
 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
   %base = bitcast i32** %ptr to <6 x i32*>*
   %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4
@@ -89,8 +106,10 @@ define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
   ret void
 }
 
-; CHECK-LABEL: load_ptrvec_factor4:
-; CHECK: vld4.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: load_ptrvec_factor4:
+; NEON: vld4.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: load_ptrvec_factor4:
+; NONEON-NOT: vld4
 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) {
   %base = bitcast i32** %ptr to <8 x i32*>*
   %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4
@@ -101,8 +120,10 @@ define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor2:
-; CHECK: vst2.32 {d16, d17}, [r0]
+; NEON-LABEL: store_ptrvec_factor2:
+; NEON: vst2.32 {d16, d17}, [r0]
+; NONEON-LABEL: store_ptrvec_factor2:
+; NONEON-NOT: vst2
 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
   %base = bitcast i32** %ptr to <4 x i32*>*
   %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -110,8 +131,10 @@ define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) {
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor3:
-; CHECK: vst3.32 {d16, d17, d18}, [r0]
+; NEON-LABEL: store_ptrvec_factor3:
+; NEON: vst3.32 {d16, d17, d18}, [r0]
+; NONEON-LABEL: store_ptrvec_factor3:
+; NONEON-NOT: vst3
 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) {
   %base = bitcast i32** %ptr to <6 x i32*>*
   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -121,8 +144,10 @@ define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
   ret void
 }
 
-; CHECK-LABEL: store_ptrvec_factor4:
-; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: store_ptrvec_factor4:
+; NEON: vst4.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: store_ptrvec_factor4:
+; NONEON-NOT: vst4
 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) {
   %base = bitcast i32* %ptr to <8 x i32*>*
   %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -135,8 +160,10 @@ define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2
 ; Following cases check that shuffle maskes with undef indices can be matched
 ; into ldN/stN instruction.
 
-; CHECK-LABEL: load_undef_mask_factor2:
-; CHECK: vld2.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: load_undef_mask_factor2:
+; NEON: vld2.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: load_undef_mask_factor2:
+; NONEON-NOT: vld2
 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
   %base = bitcast i32* %ptr to <8 x i32>*
   %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4
@@ -146,9 +173,11 @@ define <4 x i32> @load_undef_mask_factor2(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: load_undef_mask_factor3:
-; CHECK: vld3.32 {d16, d18, d20}, [r0]!
-; CHECK: vld3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: load_undef_mask_factor3:
+; NEON: vld3.32 {d16, d18, d20}, [r0]!
+; NEON: vld3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: load_undef_mask_factor3:
+; NONEON-NOT: vld3
 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4
@@ -158,9 +187,11 @@ define <4 x i32> @load_undef_mask_factor3(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: load_undef_mask_factor4:
-; CHECK: vld4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vld4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: load_undef_mask_factor4:
+; NEON: vld4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vld4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: load_undef_mask_factor4:
+; NONEON-NOT: vld4
 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4
@@ -170,8 +201,10 @@ define <4 x i32> @load_undef_mask_factor4(i32* %ptr) {
   ret <4 x i32> %add
 }
 
-; CHECK-LABEL: store_undef_mask_factor2:
-; CHECK: vst2.32 {d16, d17, d18, d19}, [r0]
+; NEON-LABEL: store_undef_mask_factor2:
+; NEON: vst2.32 {d16, d17, d18, d19}, [r0]
+; NONEON-LABEL: store_undef_mask_factor2:
+; NONEON-NOT: vst2
 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
   %base = bitcast i32* %ptr to <8 x i32>*
   %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
@@ -179,9 +212,11 @@ define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) {
   ret void
 }
 
-; CHECK-LABEL: store_undef_mask_factor3:
-; CHECK: vst3.32 {d16, d18, d20}, [r0]!
-; CHECK: vst3.32 {d17, d19, d21}, [r0]
+; NEON-LABEL: store_undef_mask_factor3:
+; NEON: vst3.32 {d16, d18, d20}, [r0]!
+; NEON: vst3.32 {d17, d19, d21}, [r0]
+; NONEON-LABEL: store_undef_mask_factor3:
+; NONEON-NOT: vst3
 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
   %base = bitcast i32* %ptr to <12 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -191,9 +226,11 @@ define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
   ret void
 }
 
-; CHECK-LABEL: store_undef_mask_factor4:
-; CHECK: vst4.32 {d16, d18, d20, d22}, [r0]!
-; CHECK: vst4.32 {d17, d19, d21, d23}, [r0]
+; NEON-LABEL: store_undef_mask_factor4:
+; NEON: vst4.32 {d16, d18, d20, d22}, [r0]!
+; NEON: vst4.32 {d17, d19, d21, d23}, [r0]
+; NONEON-LABEL: store_undef_mask_factor4:
+; NONEON-NOT: vst4
 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
   %base = bitcast i32* %ptr to <16 x i32>*
   %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -202,3 +239,68 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <
   store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4
   ret void
 }
+
+; The following test cases check that address spaces are properly handled
+
+; NEON-LABEL: load_address_space
+; NEON: vld3.32
+; NONEON-LABEL: load_address_space
+; NONEON-NOT: vld3
+define void @load_address_space(<4 x i32> addrspace(1)* %A, <2 x i32>* %B) {
+ %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %A
+ %interleaved = shufflevector <4 x i32> %tmp, <4 x i32> undef, <2 x i32> <i32 0, i32 3>
+ store <2 x i32> %interleaved, <2 x i32>* %B
+ ret void
+}
+
+; NEON-LABEL: store_address_space
+; NEON: vst2.32
+; NONEON-LABEL: store_address_space
+; NONEON-NOT: vst2
+define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspace(1)* %C) {
+ %tmp0 = load <2 x i32>, <2 x i32>* %A
+ %tmp1 = load <2 x i32>, <2 x i32>* %B
+ %interleaved = shufflevector <2 x i32> %tmp0, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C
+ ret void
+}
+
+; Check that we do something sane with illegal types.
+
+; NEON-LABEL: load_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: vld1.64 {d16, d17}, [r0:128]
+; NEON-NEXT: vuzp.32 q8, {{.*}}
+; NEON-NEXT: vmov r0, r1, d16
+; NEON-NEXT: vmov r2, r3, {{.*}}
+; NEON-NEXT: mov pc, lr
+; NONEON-LABEL: load_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0]
+; NONEON-NEXT: ldr r1, [r0, #8]
+; NONEON-NEXT: mov r0, [[ELT0]]
+; NONEON-NEXT: mov pc, lr
+define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind {
+  %tmp1 = load <3 x float>, <3 x float>* %p, align 16
+  %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  ret <3 x float> %tmp2
+}
+
+; This lowering isn't great, but it's at least correct.
+
+; NEON-LABEL: store_illegal_factor2:
+; NEON: BB#0:
+; NEON-NEXT: vldr d17, [sp]
+; NEON-NEXT: vmov d16, r2, r3
+; NEON-NEXT: vuzp.32 q8, {{.*}}
+; NEON-NEXT: vstr d16, [r0]
+; NEON-NEXT: mov pc, lr
+; NONEON-LABEL: store_illegal_factor2:
+; NONEON: BB#0:
+; NONEON-NEXT: stm r0, {r1, r3}
+; NONEON-NEXT: mov pc, lr
+define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind {
+  %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> <i32 0, i32 2, i32 undef>
+  store <3 x float> %tmp1, <3 x float>* %p, align 16
+  ret void
+}
diff --git a/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
new file mode 100644
index 000000000000..1434f40137b5
--- /dev/null
+++ b/test/CodeGen/ARM/arm-shrink-wrapping-linux.ll
@@ -0,0 +1,142 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+; We cannot merge this test with the main test for shrink-wrapping, because
+; the code path we want to exerce is not taken with ios lowering.
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "armv7--linux-gnueabi"
+
+@skip = internal unnamed_addr constant [2 x i8] c"\01\01", align 1
+
+; Check that we do not restore the before having used the saved CSRs.
+; This happened because of a bad use of the post-dominance property.
+; The exit block of the loop happens to also lead to defs/uses of CSRs.
+; It also post-dominates the loop body and we use to generate invalid
+; restore sequence. I.e., we restored too early.
+;
+; CHECK-LABEL: wrongUseOfPostDominate:
+;
+; The prologue is the first thing happening in the function
+; without shrink-wrapping.
+; DISABLE: push
+;
+; CHECK: cmp r1, #0
+;
+; With shrink-wrapping, we branch to a pre-header, where the prologue
+; is located.
+; ENABLE-NEXT: blt [[LOOP_PREHEADER:[.a-zA-Z0-9_]+]]
+; Without shrink-wrapping, we go straight into the loop.
+; DISABLE-NEXT: blt [[LOOP_HEADER:[.a-zA-Z0-9_]+]]
+;
+; CHECK: @ %if.end29
+; DISABLE-NEXT: pop
+; ENABLE-NEXT: bx lr
+;
+; ENABLE: [[LOOP_PREHEADER]]
+; ENABLE: push
+; We must not find a pop here, otherwise that means we are in the loop
+; and are restoring before using the saved CSRs.
+; ENABLE-NOT: pop
+; ENALBE-NEXT: [[LOOP_HEADER:[.a-zA-Z0-9_]+]]: @ %while.cond2.outer
+;
+; DISABLE: [[LOOP_HEADER]]: @ %while.cond2.outer
+;
+; ENABLE-NOT: pop
+;
+; CHECK: @ %while.cond2
+; CHECK: add
+; CHECK-NEXT: cmp r{{[0-1]+}}, #1
+; Set the return value.
+; CHECK-NEXT: moveq r0,
+; CHECK-NEXT: popeq
+;
+; Use the back edge to check we get the label of the loop right.
+; This is to make sure we check the right loop pattern.
+; CHECK:  @ %while.body24.land.rhs14_crit_edge
+; CHECK: cmp r{{[0-9]+}}, #192
+; CHECK-NEXT bhs [[LOOP_HEADER]]
+define fastcc i8* @wrongUseOfPostDominate(i8* readonly %s, i32 %off, i8* readnone %lim) {
+entry:
+  %cmp = icmp sgt i32 %off, -1
+  br i1 %cmp, label %while.cond.preheader, label %while.cond2.outer
+
+while.cond.preheader:                             ; preds = %entry
+  %tobool4 = icmp ne i32 %off, 0
+  %cmp15 = icmp ult i8* %s, %lim
+  %sel66 = and i1 %tobool4, %cmp15
+  br i1 %sel66, label %while.body, label %if.end29
+
+while.body:                                       ; preds = %while.body, %while.cond.preheader
+  %s.addr.08 = phi i8* [ %add.ptr, %while.body ], [ %s, %while.cond.preheader ]
+  %off.addr.07 = phi i32 [ %dec, %while.body ], [ %off, %while.cond.preheader ]
+  %dec = add nsw i32 %off.addr.07, -1
+  %tmp = load i8, i8* %s.addr.08, align 1, !tbaa !2
+  %idxprom = zext i8 %tmp to i32
+  %arrayidx = getelementptr inbounds [2 x i8], [2 x i8]* @skip, i32 0, i32 %idxprom
+  %tmp1 = load i8, i8* %arrayidx, align 1, !tbaa !2
+  %conv = zext i8 %tmp1 to i32
+  %add.ptr = getelementptr inbounds i8, i8* %s.addr.08, i32 %conv
+  %tobool = icmp ne i32 %off.addr.07, 1
+  %cmp1 = icmp ult i8* %add.ptr, %lim
+  %sel6 = and i1 %tobool, %cmp1
+  br i1 %sel6, label %while.body, label %if.end29
+
+while.cond2.outer:                                ; preds = %while.body24.land.rhs14_crit_edge, %while.body24, %land.rhs14.preheader, %if.then7, %entry
+  %off.addr.1.ph = phi i32 [ %off, %entry ], [ %inc, %land.rhs14.preheader ], [ %inc, %if.then7 ], [ %inc, %while.body24.land.rhs14_crit_edge ], [ %inc, %while.body24 ]
+  %s.addr.1.ph = phi i8* [ %s, %entry ], [ %incdec.ptr, %land.rhs14.preheader ], [ %incdec.ptr, %if.then7 ], [ %lsr.iv, %while.body24.land.rhs14_crit_edge ], [ %lsr.iv, %while.body24 ]
+  br label %while.cond2
+
+while.cond2:                                      ; preds = %while.body4, %while.cond2.outer
+  %off.addr.1 = phi i32 [ %inc, %while.body4 ], [ %off.addr.1.ph, %while.cond2.outer ]
+  %inc = add nsw i32 %off.addr.1, 1
+  %tobool3 = icmp eq i32 %off.addr.1, 0
+  br i1 %tobool3, label %if.end29, label %while.body4
+
+while.body4:                                      ; preds = %while.cond2
+  %tmp2 = icmp ugt i8* %s.addr.1.ph, %lim
+  br i1 %tmp2, label %if.then7, label %while.cond2
+
+if.then7:                                         ; preds = %while.body4
+  %incdec.ptr = getelementptr inbounds i8, i8* %s.addr.1.ph, i32 -1
+  %tmp3 = load i8, i8* %incdec.ptr, align 1, !tbaa !2
+  %conv1525 = zext i8 %tmp3 to i32
+  %tobool9 = icmp slt i8 %tmp3, 0
+  %cmp129 = icmp ugt i8* %incdec.ptr, %lim
+  %or.cond13 = and i1 %tobool9, %cmp129
+  br i1 %or.cond13, label %land.rhs14.preheader, label %while.cond2.outer
+
+land.rhs14.preheader:                             ; preds = %if.then7
+  %cmp1624 = icmp slt i8 %tmp3, 0
+  %cmp2026 = icmp ult i32 %conv1525, 192
+  %or.cond27 = and i1 %cmp1624, %cmp2026
+  br i1 %or.cond27, label %while.body24.preheader, label %while.cond2.outer
+
+while.body24.preheader:                           ; preds = %land.rhs14.preheader
+  %scevgep = getelementptr i8, i8* %s.addr.1.ph, i32 -2
+  br label %while.body24
+
+while.body24:                                     ; preds = %while.body24.land.rhs14_crit_edge, %while.body24.preheader
+  %lsr.iv = phi i8* [ %scevgep, %while.body24.preheader ], [ %scevgep34, %while.body24.land.rhs14_crit_edge ]
+  %cmp12 = icmp ugt i8* %lsr.iv, %lim
+  br i1 %cmp12, label %while.body24.land.rhs14_crit_edge, label %while.cond2.outer
+
+while.body24.land.rhs14_crit_edge:                ; preds = %while.body24
+  %.pre = load i8, i8* %lsr.iv, align 1, !tbaa !2
+  %cmp16 = icmp slt i8 %.pre, 0
+  %conv15 = zext i8 %.pre to i32
+  %cmp20 = icmp ult i32 %conv15, 192
+  %or.cond = and i1 %cmp16, %cmp20
+  %scevgep34 = getelementptr i8, i8* %lsr.iv, i32 -1
+  br i1 %or.cond, label %while.body24, label %while.cond2.outer
+
+if.end29:                                         ; preds = %while.cond2, %while.body, %while.cond.preheader
+  %s.addr.3 = phi i8* [ %s, %while.cond.preheader ], [ %add.ptr, %while.body ], [ %s.addr.1.ph, %while.cond2 ]
+  ret i8* %s.addr.3
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/ARM/arm-shrink-wrapping.ll b/test/CodeGen/ARM/arm-shrink-wrapping.ll
new file mode 100644
index 000000000000..9375df4b15cb
--- /dev/null
+++ b/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -0,0 +1,683 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=ENABLE --check-prefix=ARM-ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=armv7-apple-ios \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ARM --check-prefix=DISABLE --check-prefix=ARM-DISABLE
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=ENABLE --check-prefix=THUMB-ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv7-apple-ios \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB --check-prefix=DISABLE --check-prefix=THUMB-DISABLE
+
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statements (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+; Also disable the late if-converter as it makes harder to reason on
+; the diffs.
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmp r0, r1
+; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+;;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: sub sp
+; DISABLE: cmp r0, r1
+; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; ARM-ENABLE: push {r0}
+; THUMB-ENABLE: str r0, [sp, #-4]
+; DISABLE: str r0, [sp]
+; Set the alloca address in the second argument.
+; CHECK-NEXT: mov r1, sp
+; Set the first argument to zero.
+; CHECK-NEXT: mov{{s?}} r0, #0
+; CHECK-NEXT: bl{{x?}} _doSomething
+;
+; With shrink-wrapping, epilogue is just after the call.
+; ARM-ENABLE-NEXT: mov sp, r7
+; THUMB-ENABLE-NEXT: add sp, #4
+; ENABLE-NEXT: pop{{(\.w)?}} {r7, lr}
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; ARM-DISABLE: mov sp, r7
+; THUMB-DISABLE: add sp, 
+; DISABLE-NEXT: pop {r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @foo(i32 %a, i32 %b) {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE-NEXT: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+; CHECK: nop
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: @ %for.exit
+; CHECK: nop
+; CHECK: pop {r4
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+  br label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.03
+  %inc = add nuw nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  tail call void asm "nop", "~{r4}"()
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: mov{{s?}} [[SUM:r0]], #0
+; CHECK-NEXT: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB-NEXT: add [[SUM]], [[TMP]]
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsl{{s?}} [[SUM]], [[SUM]], #3
+; ENABLE-NEXT: pop {r4, r7, pc}
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void asm "nop", "~{r4}"()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %if.then
+  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: @ %entry
+; CHECK-NEXT: mov{{s?}} r0, #0
+; CHECK-NEXT: bx lr
+define i32 @emptyFrame() {
+entry:
+  ret i32 0
+}
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, r7, lr}
+; CHECK-NEXT: add r7, sp, #4
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: mov{{s?}} [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
+; CHECK: add{{(\.w)?}} r4, r4, #1
+; CHECK: bne [[LOOP]]
+;
+; Next BB.
+; CHECK: mov{{s?}} r0, #0
+;
+; Duplicated epilogue.
+; DISABLE: pop {r4, r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+; DISABLE-NEXT: pop {r4, r7, pc}
+;
+; ENABLE-NEXT: bx lr
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  tail call void asm sideeffect "add r4, #1", "~{r4}"()
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.else
+  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+  ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ARM-ENABLE: cmp r0, #0
+; ARM-ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-ENABLE: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: sub sp, {{(sp, )?}}#12
+;
+; ARM-DISABLE: cmp r0, #0
+; ARM-DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+; THUMB-DISABLE-NEXT: cbz r0, [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mov r0, r1
+; CHECK-NEXT: mov r2, r1
+; CHECK-NEXT: mov r3, r1
+; ARM-NEXT: str r1, [sp]
+; ARM-NEXT: str r1, [sp, #4]
+; THUMB-NEXT: strd r1, r1, [sp]
+; CHECK-NEXT: str r1, [sp, #8]
+; CHECK-NEXT: bl{{x?}} _someVariadicFunc
+; CHECK-NEXT: lsl{{s?}} r0, r0, #3
+; ARM-NEXT: mov sp, r7
+; THUMB-NEXT: add sp, #12
+; CHECK-NEXT: pop {r7, pc}
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsl{{s?}} r0, r1, #1
+;
+; Epilogue code.
+; ENABLE-NEXT: bx lr
+;
+; ARM-DISABLE-NEXT: mov sp, r7
+; THUMB-DISABLE-NEXT: add sp, #12
+; DISABLE-NEXT: pop {r7, pc}
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+  %shl = shl i32 %call, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+  ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: push
+;
+; CHECK: tst{{(\.w)?}}  r0, #255
+; CHECK-NEXT: bne      [[ABORT:LBB[0-9_]+]]
+;
+; CHECK: mov{{s?}} r0, #42
+;
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-NEXT: pop
+;;
+; CHECK: [[ABORT]]: @ %if.abort
+;
+; ENABLE: push
+;
+; CHECK: bl{{x?}} _abort
+; ENABLE-NOT: pop
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+  %tobool = icmp eq i8 %bad_thing, 0
+  br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  tail call void @abort() #0
+  unreachable
+
+if.end:
+  ret i32 42
+}
+
+declare void @abort() #0
+
+attributes #0 = { noreturn nounwind }
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+; CHECK-LABEL: infiniteloop
+; CHECK: pop
+define void @infiniteloop() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 asm sideeffect "mov $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: pop
+define void @infiniteloop2() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+  %call = tail call i32 asm "mov $0, #0", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br i1 undef, label %body1, label %body2
+
+body1:
+  tail call void asm sideeffect "nop", "~{r4}"()
+  br label %for.body
+
+body2:
+  tail call void asm sideeffect "nop", "~{r4}"()
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: bx lr
+define void @infiniteloop3() {
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:                                             ; preds = %entry
+  br i1 undef, label %loop2a, label %end
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  %0 = icmp eq i32* %var, null
+  %next.load = load i32*, i32** undef
+  br i1 %0, label %loop2a, label %loop2b
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+  br label %loop1
+
+loop2b:                                           ; preds = %loop1
+  %gep1 = bitcast i32* %var.phi to i32*
+  %next.ptr = bitcast i32* %gep1 to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop1
+
+end:
+  ret void
+}
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double)
+
+; This function needs to spill floating point registers to
+; exerce the path where we were deferencing the end iterator
+; to access debug info location while inserting the spill code
+; during PEI with shrink-wrapping enable.
+; CHECK-LABEL: debug_info:
+;
+; ENABLE: tst{{(\.w)?}}  r2, #1
+; ENABLE-NEXT: beq      [[BB13:LBB[0-9_]+]]
+;
+; CHECK: push
+;
+; DISABLE: tst{{(\.w)?}}  r2, #1
+; DISABLE-NEXT: beq      [[BB13:LBB[0-9_]+]]
+;
+; CHECK: bl{{x?}} _pow
+;
+;
+; ENABLE: pop
+;
+; CHECK: [[BB13]]:
+; CHECK: vldr
+;
+; DISABLE: pop
+;
+; CHECK: bl
+define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %tmp) {
+bb:
+  br i1 %or.cond, label %bb3, label %bb13
+
+bb3:                                              ; preds = %bb
+  %tmp4 = fcmp ogt float %gamma, 1.000000e+00
+  %tmp5 = fadd double 1.000000e+00, %tmp
+  %tmp6 = select i1 %tmp4, double %tmp5, double %tmp
+  %tmp10 = tail call double @llvm.pow.f64(double %tmp, double %tmp)
+  %tmp11 = fcmp une double %tmp6, %tmp
+  %tmp12 = fadd double %tmp10, %tmp10
+  %cutoff.0 = select i1 %tmp11, double %tmp12, double %tmp
+  %phitmp = fptrunc double %cutoff.0 to float
+  br label %bb13
+
+bb13:                                             ; preds = %bb3, %bb
+  %cutoff.1 = phi float [ 0.000000e+00, %bb ], [ %phitmp, %bb3 ]
+  ret float %cutoff.1
+}
+
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "LLVM", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "a.cpp", directory: "b")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll
index 7510d6ccdc33..573cd45c0825 100644
--- a/test/CodeGen/ARM/atomic-64bit.ll
+++ b/test/CodeGen/ARM/atomic-64bit.ll
@@ -208,10 +208,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
 define i64 @test8(i64* %ptr) {
 ; CHECK-LABEL: test8:
 ; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
+; CHECK-NOT: strexd
+; CHECK: clrex
+; CHECK-NOT: strexd
 ; CHECK: dmb {{ish$}}
 
 ; CHECK-THUMB-LABEL: test8:
 ; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
+; CHECK-THUMB-NOT: strexd
+; CHECK-THUMB: clrex
+; CHECK-THUMB-NOT: strexd
 ; CHECK-THUMB: dmb {{ish$}}
 
   %r = load atomic i64, i64* %ptr seq_cst, align 8
diff --git a/test/CodeGen/ARM/atomic-cmp.ll b/test/CodeGen/ARM/atomic-cmp.ll
index 629b16d86ab5..7f41b7d93d1a 100644
--- a/test/CodeGen/ARM/atomic-cmp.ll
+++ b/test/CodeGen/ARM/atomic-cmp.ll
@@ -6,10 +6,12 @@ define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
 ; ARM-LABEL: t:
 ; ARM: ldrexb
 ; ARM: strexb
+; ARM: clrex
 
 ; T2-LABEL: t:
-; T2: ldrexb
 ; T2: strexb
+; T2: ldrexb
+; T2: clrex
   %tmp0 = cmpxchg i8* %a, i8 %b, i8 %c monotonic monotonic
   %tmp1 = extractvalue { i8, i1 } %tmp0, 0
   ret i8 %tmp1
diff --git a/test/CodeGen/ARM/atomic-cmpxchg.ll b/test/CodeGen/ARM/atomic-cmpxchg.ll
index 84790be6d605..32cdf4174ddc 100644
--- a/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
-; RUN: llc < %s -mtriple=thumb-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMB
 
-; RUN: llc < %s -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
-; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV6
+; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV6
+
+; RUN: llc < %s -mtriple=armv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -asm-verbose=false -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-THUMBV7
 
 define zeroext i1 @test_cmpxchg_res_i8(i8* %addr, i8 %desired, i8 zeroext %new) {
 entry:
@@ -26,28 +29,71 @@ entry:
 ; CHECK-THUMB: movs r0, #1
 ; CHECK-THUMB: movs [[R2:r[0-9]+]], #0
 ; CHECK-THUMB: cmp [[R1]], {{r[0-9]+}}
-; CHECK-THU<B: beq
+; CHECK-THUMB: beq
 ; CHECK-THUMB: push  {[[R2]]}
 ; CHECK-THUMB: pop {r0}
 
-; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8
-; CHECK-ARMV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-ARMV7: mov [[R1:r[0-9]+]], #0
-; CHECK-ARMV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-ARMV7: mov [[R1]], #1
-; CHECK-ARMV7: cmp [[R3]], #0
-; CHECK-ARMV7: bne
-; CHECK-ARMV7: mov r0, [[R1]]
+; CHECK-ARMV6-LABEL: test_cmpxchg_res_i8:
+; CHECK-ARMV6-NEXT:  .fnstart
+; CHECK-ARMV6-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-ARMV6-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV6-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV6-NEXT: mov [[RES:r[0-9]+]], #0
+; CHECK-ARMV6-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV6-NEXT: bne [[END:.LBB[0-9_]+]]
+; CHECK-ARMV6-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV6-NEXT: mov [[RES]], #1
+; CHECK-ARMV6-NEXT: cmp [[SUCCESS]], #0
+; CHECK-ARMV6-NEXT: bne [[TRY]]
+; CHECK-ARMV6-NEXT: [[END]]:
+; CHECK-ARMV6-NEXT: mov r0, [[RES]]
+; CHECK-ARMV6-NEXT: bx lr
 
-; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8
-; CHECK-THUMBV7: ldrexb [[R3:r[0-9]+]], [r0]
-; CHECK-THUMBV7: cmp [[R3]], {{r[0-9]+}}
-; CHECK-THUMBV7: movne r0, #0
-; CHECK-THUMBV7: bxne lr
-; CHECK-THUMBV7: strexb [[R3]], {{r[0-9]+}}, [{{r[0-9]+}}]
-; CHECK-THUMBV7: cmp [[R3]], #0
-; CHECK-THUMBV7: itt eq
-; CHECK-THUMBV7: moveq r0, #1
-; CHECK-THUMBV7: bxeq lr
+; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
+; CHECK-THUMBV6:       mov [[EXPECTED:r[0-9]+]], r1
+; CHECK-THUMBV6-NEXT:  bl __sync_val_compare_and_swap_1
+; CHECK-THUMBV6-NEXT:  mov [[RES:r[0-9]+]], r0
+; CHECK-THUMBV6-NEXT:  movs r0, #1
+; CHECK-THUMBV6-NEXT:  movs [[ZERO:r[0-9]+]], #0
+; CHECK-THUMBV6-NEXT:  cmp [[RES]], [[EXPECTED]]
+; CHECK-THUMBV6-NEXT:  beq [[END:.LBB[0-9_]+]]
+; CHECK-THUMBV6-NEXT:  mov r0, [[ZERO]]
+; CHECK-THUMBV6-NEXT: [[END]]:
+; CHECK-THUMBV6-NEXT:  pop {{.*}}pc}
+
+; CHECK-ARMV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-ARMV7-NEXT: .fnstart
+; CHECK-ARMV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-ARMV7-NEXT: [[TRY:.LBB[0-9_]+]]:
+; CHECK-ARMV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-ARMV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-ARMV7-NEXT: bne [[FAIL:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-ARMV7-NEXT: mov [[RES:r[0-9]+]], #1
+; CHECK-ARMV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-ARMV7-NEXT: bne [[TRY]]
+; CHECK-ARMV7-NEXT: b [[END:.LBB[0-9_]+]]
+; CHECK-ARMV7-NEXT: [[FAIL]]:
+; CHECK-ARMV7-NEXT: clrex
+; CHECK-ARMV7-NEXT: mov [[RES]], #0
+; CHECK-ARMV7-NEXT: [[END]]:
+; CHECK-ARMV7-NEXT: mov r0, [[RES]]
+; CHECK-ARMV7-NEXT: bx lr
+
+; CHECK-THUMBV7-LABEL: test_cmpxchg_res_i8:
+; CHECK-THUMBV7-NEXT: .fnstart
+; CHECK-THUMBV7-NEXT: uxtb [[DESIRED:r[0-9]+]], r1
+; CHECK-THUMBV7-NEXT: b [[TRYLD:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: [[TRYST:.LBB[0-9_]+]]:
+; CHECK-THUMBV7-NEXT: strexb [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-THUMBV7-NEXT: cmp [[SUCCESS]], #0
+; CHECK-THUMBV7-NEXT: itt eq
+; CHECK-THUMBV7-NEXT: moveq r0, #1
+; CHECK-THUMBV7-NEXT: bxeq lr
+; CHECK-THUMBV7-NEXT: [[TRYLD]]:
+; CHECK-THUMBV7-NEXT: ldrexb [[LD:r[0-9]+]], [r0]
+; CHECK-THUMBV7-NEXT: cmp [[LD]], [[DESIRED]]
+; CHECK-THUMBV7-NEXT: beq [[TRYST:.LBB[0-9_]+]]
+; CHECK-THUMBV7-NEXT: clrex
+; CHECK-THUMBV7-NEXT: movs r0, #0
+; CHECK-THUMBV7-NEXT: bx lr
diff --git a/test/CodeGen/ARM/atomic-op.ll b/test/CodeGen/ARM/atomic-op.ll
index db32bffdd5d1..791389456619 100644
--- a/test/CodeGen/ARM/atomic-op.ll
+++ b/test/CodeGen/ARM/atomic-op.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix CHECK-ARMV7
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-T2
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-T1
 ; RUN: llc < %s -mtriple=thumbv6-apple-ios -verify-machineinstrs -mcpu=cortex-m0 | FileCheck %s --check-prefix=CHECK-M0
 ; RUN: llc < %s -mtriple=thumbv7--none-eabi -thread-model single -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-BAREMETAL
@@ -272,16 +272,31 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) {
 
   %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
   %oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK:     dmb ish
-; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
-; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
-; CHECK:     cmp     [[OLDVAL]], r1
-; CHECK:     bxne    lr
-; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
-; CHECK:     cmp     [[SUCCESS]], #0
-; CHECK:     bne     [[LOOP_BB]]
-; CHECK:     dmb     ish
-; CHECK:     bx      lr
+; CHECK-ARMV7:     dmb ish
+; CHECK-ARMV7: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-ARMV7:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-ARMV7:     cmp     [[OLDVAL]], r1
+; CHECK-ARMV7:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK-ARMV7:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-ARMV7:     cmp     [[SUCCESS]], #0
+; CHECK-ARMV7:     bne     [[LOOP_BB]]
+; CHECK-ARMV7:     dmb     ish
+; CHECK-ARMV7:     bx      lr
+; CHECK-ARMV7: [[FAIL_BB]]:
+; CHECK-ARMV7:     clrex
+; CHECK-ARMV7:     bx      lr
+
+; CHECK-T2:     dmb ish
+; CHECK-T2: [[LOOP_BB:\.?LBB[0-9]+_1]]:
+; CHECK-T2:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
+; CHECK-T2:     cmp     [[OLDVAL]], r1
+; CHECK-T2:     clrexne
+; CHECK-T2:     bxne    lr
+; CHECK-T2:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
+; CHECK-T2:     cmp     [[SUCCESS]], #0
+; CHECK-T2:     dmbeq   ish
+; CHECK-T2:     bxeq    lr
+; CHECK-T2:     b       [[LOOP_BB]]
 
   ret i32 %oldval
 }
@@ -295,11 +310,14 @@ define i32 @test_cmpxchg_fail_order1(i32 *%addr, i32 %desired, i32 %new) {
 ; CHECK: [[LOOP_BB:\.?LBB[0-9]+_1]]:
 ; CHECK:     ldrex   [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]]
 ; CHECK:     cmp     [[OLDVAL]], r1
-; CHECK:     bne     [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK:     bne     [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]]
 ; CHECK:     strex   [[SUCCESS:r[0-9]+]], r2, [r[[ADDR]]]
 ; CHECK:     cmp     [[SUCCESS]], #0
 ; CHECK:     bne     [[LOOP_BB]]
-; CHECK: [[END_BB]]:
+; CHECK:     b       [[END_BB:\.?LBB[0-9]+_[0-9]+]]
+; CHECK: [[FAIL_BB]]:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: [[END_BB]]:
 ; CHECK:     dmb     ish
 ; CHECK:     bx      lr
 
diff --git a/test/CodeGen/ARM/atomic-ops-v8.ll b/test/CodeGen/ARM/atomic-ops-v8.ll
index 86287c1178db..efdb75b63222 100644
--- a/test/CodeGen/ARM/atomic-ops-v8.ll
+++ b/test/CodeGen/ARM/atomic-ops-v8.ll
@@ -1055,24 +1055,30 @@ define i8 @test_atomic_cmpxchg_i8(i8 zeroext %wanted, i8 zeroext %new) nounwind
    %old = extractvalue { i8, i1 } %pair, 0
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var8
-; CHECK: movt r[[ADDR]], :upper16:var8
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var8
+; CHECK-DAG: movt r[[ADDR]], :upper16:var8
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
 
 ; CHECK: .LBB{{[0-9]+}}_1:
 ; CHECK: ldaexb r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT:   cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r1 is a reasonable guess.
-; CHECK: strexb [[STATUS:r[0-9]+]], r1, {{.*}}[[ADDR]]
+; CHECK: strexb [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
    ret i8 %old
 }
 
@@ -1082,24 +1088,30 @@ define i16 @test_atomic_cmpxchg_i16(i16 zeroext %wanted, i16 zeroext %new) nounw
    %old = extractvalue { i16, i1 } %pair, 0
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
-; CHECK: movw r[[ADDR:[0-9]+]], :lower16:var16
-; CHECK: movt r[[ADDR]], :upper16:var16
+; CHECK-DAG: movw r[[ADDR:[0-9]+]], :lower16:var16
+; CHECK-DAG: movt r[[ADDR]], :upper16:var16
+; CHECK-THUMB-DAG: mov r[[WANTED:[0-9]+]], r0
 
 ; CHECK: .LBB{{[0-9]+}}_1:
 ; CHECK: ldaexh r[[OLD:[0-9]+]], [r[[ADDR]]]
   ; r0 below is a reasonable guess but could change: it certainly comes into the
   ;  function there.
-; CHECK-NEXT: cmp r[[OLD]], r0
+; CHECK-ARM-NEXT:   cmp r[[OLD]], r0
+; CHECK-THUMB-NEXT: cmp r[[OLD]], r[[WANTED]]
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
 ; CHECK-NEXT: BB#2:
   ; As above, r1 is a reasonable guess.
 ; CHECK: stlexh [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
-; CHECK: mov r0, r[[OLD]]
+; CHECK-ARM: mov r0, r[[OLD]]
    ret i16 %old
 }
 
@@ -1124,6 +1136,10 @@ define void @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
 ; CHECK: stlex [[STATUS:r[0-9]+]], r1, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
@@ -1158,6 +1174,10 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
 ; CHECK: strexd [[STATUS:r[0-9]+]], r2, r3, [r[[ADDR]]]
 ; CHECK-NEXT: cmp [[STATUS]], #0
 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_1
+; CHECK-NEXT: b .LBB{{[0-9]+}}_4
+; CHECK-NEXT: .LBB{{[0-9]+}}_3:
+; CHECK-NEXT: clrex
+; CHECK-NEXT: .LBB{{[0-9]+}}_4:
 ; CHECK-NOT: dmb
 ; CHECK-NOT: mcr
 
diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
index c3de07e03b6b..79e8e68e2f57 100644
--- a/test/CodeGen/ARM/avoid-cpsr-rmw.ll
+++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CORTEX
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=swift     | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SWIFT
 ; Avoid some 's' 16-bit instruction which partially update CPSR (and add false
 ; dependency) when it isn't dependent on last CPSR defining instruction.
 ; rdar://8928208
@@ -7,8 +7,10 @@
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
  entry:
 ; CHECK-LABEL: t1:
-; CHECK: muls [[REG:(r[0-9]+)]], r3, r2
-; CHECK-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-CORTEX: muls [[REG:(r[0-9]+)]], r3, r2
+; CHECK-CORTEX-NEXT: mul  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT: muls  [[REG2:(r[0-9]+)]], r1, r0
+; CHECK-SWIFT-NEXT: mul [[REG:(r[0-9]+)]], r2, r3
 ; CHECK-NEXT: muls r0, [[REG]], [[REG2]]
   %0 = mul nsw i32 %a, %b
   %1 = mul nsw i32 %c, %d
@@ -21,8 +23,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone {
 define void @t2(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind {
 entry:
 ; CHECK-LABEL: t2:
-  %tobool7 = icmp eq i32* %ptr2, null
-  br i1 %tobool7, label %while.end, label %while.body
+  br label %while.body
 
 while.body:
 ; CHECK: while.body
@@ -55,8 +56,7 @@ while.end:
 define void @t3(i32* nocapture %ptr1, i32* %ptr2, i32 %c) nounwind minsize {
 entry:
 ; CHECK-LABEL: t3:
-  %tobool7 = icmp eq i32* %ptr2, null
-  br i1 %tobool7, label %while.end, label %while.body
+  br label %while.body
 
 while.body:
 ; CHECK: while.body
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index 0661960d1ae0..893fef3add7e 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -74,3 +74,98 @@ entry:
   %or = or i32 %shl, %and
   ret i32 %or
 }
+
+define i32 @f7(i32 %x, i32 %y) {
+; CHECK-LABEL: f7:
+; CHECK: bfi r1, r0, #4, #1
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 16
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+  ret i32 %sel
+}
+
+define i32 @f8(i32 %x, i32 %y) {
+; CHECK-LABEL: f8:
+; CHECK: bfi r1, r0, #4, #1
+; CHECK: bfi r1, r0, #5, #1
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 48
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+  ret i32 %sel
+}
+
+define i32 @f9(i32 %x, i32 %y) {
+; CHECK-LABEL: f9:
+; CHECK-NOT: bfi
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 48
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %y2, i32 %or
+  ret i32 %sel
+}
+
+define i32 @f10(i32 %x, i32 %y) {
+; CHECK-LABEL: f10:
+; CHECK: bfi r1, r0, #4, #2
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 32
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+
+  %aand = and i32 %x, 2
+  %aor = or i32 %sel, 16
+  %acmp = icmp ne i32 %aand, 0
+  %asel = select i1 %acmp, i32 %aor, i32 %sel
+
+  ret i32 %asel
+}
+
+define i32 @f11(i32 %x, i32 %y) {
+; CHECK-LABEL: f11:
+; CHECK: bfi r1, r0, #4, #3
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 32
+  %cmp = icmp ne i32 %and, 0
+  %sel = select i1 %cmp, i32 %or, i32 %y2
+
+  %aand = and i32 %x, 2
+  %aor = or i32 %sel, 16
+  %acmp = icmp ne i32 %aand, 0
+  %asel = select i1 %acmp, i32 %aor, i32 %sel
+
+  %band = and i32 %x, 8
+  %bor = or i32 %asel, 64
+  %bcmp = icmp ne i32 %band, 0
+  %bsel = select i1 %bcmp, i32 %bor, i32 %asel
+
+  ret i32 %bsel
+}
+
+define i32 @f12(i32 %x, i32 %y) {
+; CHECK-LABEL: f12:
+; CHECK: bfi r1, r0, #4, #1
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 16
+  %cmp = icmp eq i32 %and, 0
+  %sel = select i1 %cmp, i32 %y2, i32 %or
+  ret i32 %sel
+}
+
+define i32 @f13(i32 %x, i32 %y) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: bfi
+  %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00
+  %and = and i32 %x, 4
+  %or = or i32 %y2, 16
+  %cmp = icmp eq i32 %and, 42 ; Not comparing against zero!
+  %sel = select i1 %cmp, i32 %y2, i32 %or
+  ret i32 %sel
+}
diff --git a/test/CodeGen/ARM/build-attributes-optimization-minsize.ll b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll
new file mode 100644
index 000000000000..4cfb6012f439
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 4	@ Tag_ABI_optimization_goals
+; CHECK-OBJ:          TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT:     Description: Aggressive Size
+
+define i32 @f(i64 %z) #0 {
+    ret i32 0
+}
+
+attributes #0 = { minsize optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-mixed.ll b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll
new file mode 100644
index 000000000000..8009fc6e28f8
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s
+
+; CHECK-NOT: .eabi_attribute 30
+; CHECK-NOT: Tag_ABI_optimization_goals
+
+define i32 @f(i64 %z) #0 {
+    ret i32 0
+}
+
+define i32 @g(i64 %z) #1 {
+    ret i32 1
+}
+
+attributes #0 = { noinline optnone }
+
+attributes #1 = { minsize optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-optnone.ll b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll
new file mode 100644
index 000000000000..cbdb915045c6
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 6	@ Tag_ABI_optimization_goals
+; CHECK-OBJ:          TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT:     Description: Best Debugging
+
+define i32 @f(i64 %z) #0 {
+    ret i32 0
+}
+
+attributes #0 = { noinline optnone }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization-optsize.ll b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll
new file mode 100644
index 000000000000..bab210aa8d01
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ
+
+; CHECK: .eabi_attribute 30, 3	@ Tag_ABI_optimization_goals
+; CHECK-OBJ:          TagName: ABI_optimization_goals
+; CHECK-OBJ-NEXT:     Description: Size
+
+define i32 @f(i64 %z) #0 {
+    ret i32 0
+}
+
+attributes #0 = { optsize }
+
diff --git a/test/CodeGen/ARM/build-attributes-optimization.ll b/test/CodeGen/ARM/build-attributes-optimization.ll
new file mode 100644
index 000000000000..21b7b3c3ab0c
--- /dev/null
+++ b/test/CodeGen/ARM/build-attributes-optimization.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s --check-prefix=NONE
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s --check-prefix=SPEED
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s --check-prefix=MAXSPEED
+
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=NONE-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=SPEED-OBJ
+; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=MAXSPEED-OBJ
+
+; NONE:     .eabi_attribute 30, 5	@ Tag_ABI_optimization_goals
+; SPEED:    .eabi_attribute 30, 1	@ Tag_ABI_optimization_goals
+; MAXSPEED: .eabi_attribute 30, 2	@ Tag_ABI_optimization_goals
+
+; NONE-OBJ:          TagName: ABI_optimization_goals
+; NONE-OBJ-NEXT:     Description: Debugging
+; SPEED-OBJ:         TagName: ABI_optimization_goals
+; SPEED-OBJ-NEXT:    Description: Speed
+; MAXSPEED-OBJ:      TagName: ABI_optimization_goals
+; MAXSPEED-OBJ-NEXT: Description: Aggressive Speed
+
+define i32 @f(i64 %z) {
+    ret i32 0
+}
+
diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll
index 29c702304a3f..bf502b3ae077 100644
--- a/test/CodeGen/ARM/build-attributes.ll
+++ b/test/CodeGen/ARM/build-attributes.ll
@@ -1,17 +1,17 @@
 ; This tests that MC/asm header conversion is smooth and that the
 ; build attributes are correct
 
-; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale | FileCheck %s --check-prefix=XSCALE
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi | FileCheck %s --check-prefix=V6
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi | FileCheck %s --check-prefix=V6M
-; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s | FileCheck %s --check-prefix=ARM1156T2F-S
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast  | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
-; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv5-linux-gnueabi -mcpu=xscale -mattr=+strict-align | FileCheck %s --check-prefix=XSCALE
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=V6M
+; RUN: llc < %s -mtriple=thumbv6sm-linux-gnueabi -mattr=+strict-align -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V6M-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align | FileCheck %s --check-prefix=ARM1156T2F-S
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast  | FileCheck %s --check-prefix=ARM1156T2F-S-FAST
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -mcpu=arm1156t2f-s -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi | FileCheck %s --check-prefix=V7M
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=V7M-FAST
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -59,18 +59,18 @@
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mattr=+neon,+fp16 | FileCheck %s --check-prefix=GENERIC-FPU-NEON-FP16
 
 ; RUN: llc < %s -mtriple=armv7-linux-gnueabi -mcpu=cortex-a17 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=CORTEX-M0
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus | FileCheck %s --check-prefix=CORTEX-M0PLUS
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 | FileCheck %s --check-prefix=CORTEX-M1
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 | FileCheck %s --check-prefix=SC000
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
-; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M0
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M0PLUS
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M0PLUS-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m0plus -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align | FileCheck %s --check-prefix=CORTEX-M1
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M1-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=cortex-m1 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align | FileCheck %s --check-prefix=SC000
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=SC000-FAST
+; RUN: llc < %s -mtriple=thumbv6m-linux-gnueabi -mcpu=sc000 -mattr=+strict-align -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=CORTEX-M3
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-M3-FAST
 ; RUN: llc < %s -mtriple=thumbv7m-linux-gnueabi -mcpu=cortex-m3 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -96,6 +96,9 @@
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 | FileCheck %s --check-prefix=CORTEX-R7
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-R7-FAST
 ; RUN: llc < %s -mtriple=armv7r-linux-gnueabi -mcpu=cortex-r7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=CORTEX-A35
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A35-FAST
+; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a35 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 | FileCheck %s --check-prefix=CORTEX-A53
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A53-FAST
 ; RUN: llc < %s -mtriple=armv8-linux-gnueabi -mcpu=cortex-a53 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
@@ -116,58 +119,50 @@
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -enable-sign-dependent-rounding-fp-math | FileCheck %s --check-prefix=DYN-ROUNDING
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,-neon  -enable-unsafe-fp-math -disable-fp-elim -enable-no-infs-fp-math -enable-no-nans-fp-math -fp-contract=fast | FileCheck %s --check-prefix=CORTEX-A7-FPUV4-FAST
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+vfp4,,+d16,-neon | FileCheck %s --check-prefix=CORTEX-A7-FPUV4
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=RELOC-OTHER
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi | FileCheck %s --check-prefix=PCS-R9-USE
-; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -arm-reserve-r9 | FileCheck %s --check-prefix=PCS-R9-RESERVE
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=pic | FileCheck %s --check-prefix=RELOC-PIC
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=static | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=default | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align -relocation-model=dynamic-no-pic | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=RELOC-OTHER
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=PCS-R9-USE
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi -mattr=+reserve-r9,+strict-align | FileCheck %s --check-prefix=PCS-R9-RESERVE
 
 ; ARMv8.1a (AArch32)
-; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv8.1a-none-linux-gnueabi | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; ARMv8a (AArch32)
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a35 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a35 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a57 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv8-none-linux-gnueabi -mcpu=cortex-a72 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; ARMv7a
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mcpu=cortex-a7 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; ARMv7r
-; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv7r-none-linux-gnueabi -mcpu=cortex-r5 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; ARMv7m
-; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv7m-none-linux-gnueabi -mcpu=cortex-m3 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; ARMv6
 ; RUN: llc < %s -mtriple=armv6-none-netbsd-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6-none-linux-gnueabi -mcpu=arm1136j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; ARMv6k
 ; RUN: llc < %s -mtriple=armv6k-none-netbsd-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv6k-none-linux-gnueabi -mcpu=arm1176j-s | FileCheck %s --check-prefix=NO-STRICT-ALIGN
 ; ARMv6m
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-no-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -arm-strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-no-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mattr=+strict-align -mcpu=cortex-m0 | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumbv6m-none-linux-gnueabi -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=thumb-none-linux-gnueabi -mcpu=cortex-m0 -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 ; ARMv5
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-no-strict-align | FileCheck %s --check-prefix=NO-STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -arm-strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
-; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e | FileCheck %s --check-prefix=STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e | FileCheck %s --check-prefix=NO-STRICT-ALIGN
+; RUN: llc < %s -mtriple=armv5-none-linux-gnueabi -mcpu=arm1022e -mattr=+strict-align | FileCheck %s --check-prefix=STRICT-ALIGN
 
 ; XSCALE:      .eabi_attribute 6, 5
 ; XSCALE:      .eabi_attribute 8, 1
@@ -748,6 +743,7 @@
 ; CORTEX-M0:  .eabi_attribute 21, 1
 ; CORTEX-M0-NOT:  .eabi_attribute 22
 ; CORTEX-M0:  .eabi_attribute 23, 3
+; CORTEX-M0: .eabi_attribute 34, 0
 ; CORTEX-M0:  .eabi_attribute 24, 1
 ; CORTEX-M0:  .eabi_attribute 25, 1
 ; CORTEX-M0-NOT:  .eabi_attribute 27
@@ -1109,7 +1105,7 @@
 ; CORTEX-R7:  .eabi_attribute 25, 1
 ; CORTEX-R7:  .eabi_attribute 27, 1
 ; CORTEX-R7-NOT:  .eabi_attribute 28
-; CORTEX-R7-NOT:  .eabi_attribute 36
+; CORTEX-R7:  .eabi_attribute 36, 1
 ; CORTEX-R7:  .eabi_attribute 38, 1
 ; CORTEX-R7:  .eabi_attribute 42, 1
 ; CORTEX-R7:  .eabi_attribute 44, 2
@@ -1122,6 +1118,36 @@
 ; CORTEX-R7-FAST-NOT:  .eabi_attribute 22
 ; CORTEX-R7-FAST:  .eabi_attribute 23, 1
 
+; CORTEX-A35:  .cpu cortex-a35
+; CORTEX-A35:  .eabi_attribute 6, 14
+; CORTEX-A35:  .eabi_attribute 7, 65
+; CORTEX-A35:  .eabi_attribute 8, 1
+; CORTEX-A35:  .eabi_attribute 9, 2
+; CORTEX-A35:  .fpu crypto-neon-fp-armv8
+; CORTEX-A35:  .eabi_attribute 12, 3
+; CORTEX-A35-NOT:   .eabi_attribute 19
+;; We default to IEEE 754 compliance
+; CORTEX-A35:  .eabi_attribute 20, 1
+; CORTEX-A35:  .eabi_attribute 21, 1
+; CORTEX-A35-NOT:  .eabi_attribute 22
+; CORTEX-A35:  .eabi_attribute 23, 3
+; CORTEX-A35:  .eabi_attribute 24, 1
+; CORTEX-A35:  .eabi_attribute 25, 1
+; CORTEX-A35-NOT:  .eabi_attribute 27
+; CORTEX-A35-NOT:  .eabi_attribute 28
+; CORTEX-A35:  .eabi_attribute 36, 1
+; CORTEX-A35:  .eabi_attribute 38, 1
+; CORTEX-A35:  .eabi_attribute 42, 1
+; CORTEX-A35-NOT:  .eabi_attribute 44
+; CORTEX-A35:  .eabi_attribute 68, 3
+
+; CORTEX-A35-FAST-NOT:   .eabi_attribute 19
+;; The A35 has the ARMv8 FP unit, which always flushes preserving sign.
+; CORTEX-A35-FAST:  .eabi_attribute 20, 2
+; CORTEX-A35-FAST-NOT:  .eabi_attribute 21
+; CORTEX-A35-FAST-NOT:  .eabi_attribute 22
+; CORTEX-A35-FAST:  .eabi_attribute 23, 1
+
 ; CORTEX-A53:  .cpu cortex-a53
 ; CORTEX-A53:  .eabi_attribute 6, 14
 ; CORTEX-A53:  .eabi_attribute 7, 65
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index b2b6aaec8131..8821029520fe 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -83,9 +83,11 @@ declare void @foo() nounwind
 define void @t7() nounwind {
 entry:
 ; CHECKT2D-LABEL: t7:
-; CHECKT2D: blxeq _foo
-; CHECKT2D-NEXT: pop.w
-; CHECKT2D-NEXT: b.w _foo
+; CHECKT2D: it ne
+; CHECKT2D-NEXT: bne.w _foo
+; CHECKT2D-NEXT: push
+; CHECKT2D-NEXT: mov r7, sp
+; CHECKT2D-NEXT: blx _foo
   br i1 undef, label %bb, label %bb1.lr.ph
 
 bb1.lr.ph:
diff --git a/test/CodeGen/ARM/cfi-alignment.ll b/test/CodeGen/ARM/cfi-alignment.ll
new file mode 100644
index 000000000000..11add2242656
--- /dev/null
+++ b/test/CodeGen/ARM/cfi-alignment.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos7.0 -o - %s | FileCheck %s
+
+; Since d11 doesn't get pushed with the aligned registers, its frameindex
+; shouldn't be modified to say it has been.
+
+define void @foo() {
+; CHECK-LABEL: foo:
+; CHECK: push {r7, lr}
+; CHECK: .cfi_offset r7, -8
+; CHECK: vpush {d11}
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d11, -16
+; CHECK: .cfi_offset d9, -24
+; CHECK: .cfi_offset d8, -32
+  call void asm sideeffect "", "~{d8},~{d9},~{d11}"()
+  call void @bar()
+  ret void
+}
+
+define void @variadic_foo(i8, ...) {
+; CHECK-LABEL: variadic_foo:
+; CHECK: sub sp, #12
+; CHECK: push {r7, lr}
+; CHECK: .cfi_offset r7, -20
+; CHECK: sub sp, #4
+; CHECK: vpush {d11}
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d11, -32
+; CHECK: .cfi_offset d9, -40
+; CHECK: .cfi_offset d8, -48
+  call void asm sideeffect "", "~{d8},~{d9},~{d11}"()
+  call void @llvm.va_start(i8* null)
+  call void @bar()
+  ret void
+}
+
+define void @test_maintain_stack_align() {
+; CHECK-LABEL: test_maintain_stack_align:
+; CHECK: push {r7, lr}
+; CHECK: vpush {d8, d9}
+; CHECK: sub sp, #8
+  call void asm sideeffect "", "~{d8},~{d9}"()
+  call void @bar()
+  ret void
+}
+
+declare void @bar()
+declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/cmpxchg-idioms.ll b/test/CodeGen/ARM/cmpxchg-idioms.ll
index fb88575cab3b..81e05acfef79 100644
--- a/test/CodeGen/ARM/cmpxchg-idioms.ll
+++ b/test/CodeGen/ARM/cmpxchg-idioms.ll
@@ -15,14 +15,14 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 ; CHECK: bne [[LOOP]]
 
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #1
 ; CHECK: dmb ish
+; CHECK: movs r0, #1
 ; CHECK: bx lr
 
 ; CHECK: [[FAILED]]:
 ; CHECK-NOT: cmp {{r[0-9]+}}, {{r[0-9]+}}
-; CHECK: movs r0, #0
 ; CHECK: dmb ish
+; CHECK: movs r0, #0
 ; CHECK: bx lr
 
   %pair = cmpxchg i32* %p, i32 %oldval, i32 %newval seq_cst seq_cst
@@ -34,8 +34,8 @@ define i32 @test_return(i32* %p, i32 %oldval, i32 %newval) {
 define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) {
 ; CHECK-LABEL: test_return_bool:
 
-; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
 ; CHECK: dmb ishst
+; CHECK: uxtb [[OLDBYTE:r[0-9]+]], r1
 
 ; CHECK: [[LOOP:LBB[0-9]+_[0-9]+]]:
 ; CHECK: ldrexb [[LOADED:r[0-9]+]], [r0]
diff --git a/test/CodeGen/ARM/cmpxchg-weak.ll b/test/CodeGen/ARM/cmpxchg-weak.ll
index 126e33062623..1eac9c41cf92 100644
--- a/test/CodeGen/ARM/cmpxchg-weak.ll
+++ b/test/CodeGen/ARM/cmpxchg-weak.ll
@@ -5,16 +5,24 @@ define void @test_cmpxchg_weak(i32 *%addr, i32 %desired, i32 %new) {
 
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
   %oldval = extractvalue { i32, i1 } %pair, 0
-; CHECK:     dmb ish
-; CHECK:     ldrex   [[LOADED:r[0-9]+]], [r0]
-; CHECK:     cmp     [[LOADED]], r1
-; CHECK:     strexeq [[SUCCESS:r[0-9]+]], r2, [r0]
-; CHECK:     cmpeq   [[SUCCESS]], #0
-; CHECK:     bne     [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK:     dmb     ish
-; CHECK: [[DONE]]:
-; CHECK:     str     r3, [r0]
-; CHECK:     bx      lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT:     dmb ish
+; CHECK-NEXT:     ldrex   [[LOADED:r[0-9]+]], [r0]
+; CHECK-NEXT:     cmp     [[LOADED]], r1
+; CHECK-NEXT:     bne     [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r2, [r0]
+; CHECK-NEXT:     cmp     [[SUCCESS]], #0
+; CHECK-NEXT:     bne     [[FAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#2:
+; CHECK-NEXT:     dmb     ish
+; CHECK-NEXT:     str     r3, [r0]
+; CHECK-NEXT:     bx      lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT:     clrex
+; CHECK-NEXT: [[FAILBB]]:
+; CHECK-NEXT:     str     r3, [r0]
+; CHECK-NEXT:     bx      lr
 
   store i32 %oldval, i32* %addr
   ret void
@@ -27,17 +35,23 @@ define i1 @test_cmpxchg_weak_to_bool(i32, i32 *%addr, i32 %desired, i32 %new) {
   %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst monotonic
   %success = extractvalue { i32, i1 } %pair, 1
 
-; CHECK:      dmb     ish
-; CHECK:      mov     r0, #0
-; CHECK:      ldrex   [[LOADED:r[0-9]+]], [r1]
-; CHECK:      cmp     [[LOADED]], r2
-; CHECK:      strexeq [[STATUS:r[0-9]+]], r3, [r1]
-; CHECK:      cmpeq   [[STATUS]], #0
-; CHECK:      bne     [[DONE:LBB[0-9]+_[0-9]+]]
-; CHECK:      dmb     ish
-; CHECK:      mov     r0, #1
-; CHECK: [[DONE]]:
-; CHECK:      bx      lr
+; CHECK-NEXT: BB#0:
+; CHECK-NEXT:     dmb ish
+; CHECK-NEXT:     ldrex   [[LOADED:r[0-9]+]], [r1]
+; CHECK-NEXT:     cmp     [[LOADED]], r2
+; CHECK-NEXT:     bne     [[LDFAILBB:LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: BB#1:
+; CHECK-NEXT:     strex   [[SUCCESS:r[0-9]+]], r3, [r1]
+; CHECK-NEXT:     mov     r0, #0
+; CHECK-NEXT:     cmp     [[SUCCESS]], #0
+; CHECK-NEXT:     bxne    lr
+; CHECK-NEXT:     dmb     ish
+; CHECK-NEXT:     mov     r0, #1
+; CHECK-NEXT:     bx      lr
+; CHECK-NEXT: [[LDFAILBB]]:
+; CHECK-NEXT:     clrex
+; CHECK-NEXT:     mov     r0, #0
+; CHECK-NEXT:     bx      lr
 
   ret i1 %success
 }
diff --git a/test/CodeGen/ARM/coalesce-dbgvalue.ll b/test/CodeGen/ARM/coalesce-dbgvalue.ll
index cd2ab257207a..4468f1ec9c42 100644
--- a/test/CodeGen/ARM/coalesce-dbgvalue.ll
+++ b/test/CodeGen/ARM/coalesce-dbgvalue.ll
@@ -15,7 +15,7 @@ target triple = "thumbv7-apple-ios3.0.0"
 @d = common global i32 0, align 4
 
 ; Function Attrs: nounwind ssp
-define i32 @pr16110() #0 {
+define i32 @pr16110() #0 !dbg !4 {
 for.cond1.preheader:
   store i32 0, i32* @c, align 4, !dbg !21
   br label %for.cond1.outer, !dbg !26
@@ -79,18 +79,18 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 182024) (llvm/trunk 182023)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 182024) (llvm/trunk 182023)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
 !1 = !DIFile(filename: "pr16110.c", directory: "/d/b")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "pr16110", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @pr16110, variables: !9)
+!4 = distinct !DISubprogram(name: "pr16110", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "pr16110.c", directory: "/d/b")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10, !11}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 8, scope: !4, file: !5, type: !8)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f", line: 13, scope: !12, file: !5, type: !14)
+!10 = !DILocalVariable(name: "e", line: 8, scope: !4, file: !5, type: !8)
+!11 = !DILocalVariable(name: "f", line: 13, scope: !12, file: !5, type: !14)
 !12 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !13)
 !13 = distinct !DILexicalBlock(line: 12, column: 0, file: !1, scope: !4)
 !14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll
index 72fefeacfc5b..a11976e27448 100644
--- a/test/CodeGen/ARM/coalesce-subregs.ll
+++ b/test/CodeGen/ARM/coalesce-subregs.ll
@@ -14,11 +14,11 @@ target triple = "thumbv7-apple-ios0.0.0"
 define void @f(float* %p, i32 %c) nounwind ssp {
 entry:
   %0 = bitcast float* %p to i8*
-  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
   %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
   %add.ptr = getelementptr inbounds float, float* %p, i32 8
   %1 = bitcast float* %add.ptr to i8*
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> undef, i32 4)
   ret void
 }
 
@@ -27,13 +27,13 @@ entry:
 define void @f1(float* %p, i32 %c) nounwind ssp {
 entry:
   %0 = bitcast float* %p to i8*
-  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
   %vld221 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
   %add.ptr = getelementptr inbounds float, float* %p, i32 8
   %1 = bitcast float* %add.ptr to i8*
-  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4)
   %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %vld221, <4 x float> %vld2215, i32 4)
   ret void
 }
 
@@ -42,7 +42,7 @@ entry:
 define void @f2(float* %p, i32 %c) nounwind ssp {
 entry:
   %0 = bitcast float* %p to i8*
-  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %0, i32 4)
+  %vld2 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %0, i32 4)
   %vld224 = extractvalue { <4 x float>, <4 x float> } %vld2, 1
   br label %do.body
 
@@ -52,10 +52,10 @@ do.body:                                          ; preds = %do.body, %entry
   %p.addr.0 = phi float* [ %p, %entry ], [ %add.ptr, %do.body ]
   %add.ptr = getelementptr inbounds float, float* %p.addr.0, i32 8
   %1 = bitcast float* %add.ptr to i8*
-  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8* %1, i32 4)
+  %vld22 = tail call { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8* %1, i32 4)
   %vld2215 = extractvalue { <4 x float>, <4 x float> } %vld22, 0
   %vld2216 = extractvalue { <4 x float>, <4 x float> } %vld22, 1
-  tail call void @llvm.arm.neon.vst2.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %1, <4 x float> %qq0.0.1.0, <4 x float> %vld2215, i32 4)
   %dec = add nsw i32 %c.addr.0, -1
   %tobool = icmp eq i32 %dec, 0
   br i1 %tobool, label %do.end, label %do.body
@@ -64,8 +64,8 @@ do.end:                                           ; preds = %do.body
   ret void
 }
 
-declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare { <4 x float>, <4 x float> } @llvm.arm.neon.vld2.v4f32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
 
 ; CHECK: f3
 ; This function has lane insertions that span basic blocks.
@@ -109,12 +109,12 @@ if.end:                                           ; preds = %if.else, %if.then
   %x.0 = phi <2 x float> [ %vecins3, %if.then ], [ %vecins5, %if.else ]
   %add.ptr = getelementptr inbounds float, float* %p, i32 4
   %4 = bitcast float* %add.ptr to i8*
-  tail call void @llvm.arm.neon.vst1.v2f32(i8* %4, <2 x float> %x.0, i32 4)
+  tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %4, <2 x float> %x.0, i32 4)
   ret void
 }
 
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind
+declare <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8*, i32) nounwind readonly
 
 ; CHECK: f4
 ; This function inserts a lane into a fully defined vector.
@@ -124,7 +124,7 @@ declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
 define void @f4(float* %p, float* %q) nounwind ssp {
 entry:
   %0 = bitcast float* %p to i8*
-  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %0, i32 4)
+  %vld1 = tail call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* %0, i32 4)
   %tobool = icmp eq float* %q, null
   br i1 %tobool, label %if.end, label %if.then
 
@@ -138,7 +138,7 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry, %if.then
   %x.0 = phi <2 x float> [ %vecins, %if.then ], [ %vld1, %entry ]
-  tail call void @llvm.arm.neon.vst1.v2f32(i8* %0, <2 x float> %x.0, i32 4)
+  tail call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %0, <2 x float> %x.0, i32 4)
   ret void
 }
 
@@ -154,7 +154,7 @@ if.end:                                           ; preds = %entry, %if.then
 define void @f5(float* %p, float* %q) nounwind ssp {
 entry:
   %0 = bitcast float* %p to i8*
-  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %0, i32 4)
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %0, i32 4)
   %vecext = extractelement <4 x float> %vld1, i32 0
   %vecext1 = extractelement <4 x float> %vld1, i32 1
   %vecext2 = extractelement <4 x float> %vld1, i32 2
@@ -182,13 +182,13 @@ if.end:                                           ; preds = %entry, %if.then
   %vecinit9 = insertelement <4 x float> %vecinit, float %b.0, i32 1
   %vecinit10 = insertelement <4 x float> %vecinit9, float %c.0, i32 2
   %vecinit11 = insertelement <4 x float> %vecinit10, float %add, i32 3
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
+  tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
   ret void
 }
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
 
 ; CHECK: pr13999
 define void @pr13999() nounwind readonly {
diff --git a/test/CodeGen/ARM/combine-vmovdrr.ll b/test/CodeGen/ARM/combine-vmovdrr.ll
new file mode 100644
index 000000000000..358f7e3a983e
--- /dev/null
+++ b/test/CodeGen/ARM/combine-vmovdrr.ll
@@ -0,0 +1,72 @@
+; RUN: llc %s -o - | FileCheck %s
+
+target triple = "thumbv7s-apple-ios"
+
+declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i)
+
+; Check that we get the motivating example:
+; The bitcasts force the values to go through the GPRs, whereas
+; they are defined on VPRs and used on VPRs.
+;
+; CHECK-LABEL: motivatingExample:
+; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: bx lr
+define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) {
+  %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+  %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+  store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+  ret void
+}
+
+; Check that we do not perform the transformation for dynamic index.
+; CHECK-LABEL: dynamicIndex:
+; CHECK-NOT: mul
+; CHECK: pop
+define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) {
+  %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+  %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index
+  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+  store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+  ret void
+}
+
+; Check that we do not perform the transformation when there are several uses
+; of the result of the bitcast.
+; CHECK-LABEL: severalUses:
+; ARG1_VALlo is hard coded because we need to access the high part of d0,
+; i.e., s1, and we can't express that with filecheck.
+; CHECK: vld1.32 {[[ARG1_VALlo:d0]], [[ARG1_VALhi:d[0-9]+]]}, [r0]
+; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1]
+; s1 is actually 2 * ARG1_VALlo + 1, but we cannot express that with filecheck.
+; CHECK-NEXT: vmov [[REThi:r[0-9]+]], s1
+; We build the return value here. s0 is 2 * ARG1_VALlo.
+; CHECK-NEXT: vmov r0, s0
+; This copy is correct but actually useless. We should be able to clean it up.
+; CHECK-NEXT: vmov [[ARG1_VALloCPY:d[0-9]+]], r0, [[REThi]]
+; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALloCPY]], [[ARG1_VALhi]]}, [[ARG2_VAL]]
+; CHECK-NEXT: vstr [[RES]], [r1]
+; CHECK-NEXT: mov r1, [[REThi]]
+; CHECK-NEXT: bx lr
+define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) {
+  %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr
+  %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2
+  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
+  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
+  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
+  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
+  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
+  store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2
+  ret i64 %shuffle.i.extract.i310
+}
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 3baa103e3d5d..75a90bbf0caa 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -63,7 +63,7 @@ define i32 @f8() nounwind {
                                     float 3.000000e+00> }, align 16
 ; CHECK: const1
 ; CHECK: .zero 16
-; CHECK: float 1.0
-; CHECK: float 2.0
-; CHECK: float 3.0
+; CHECK: float 1
+; CHECK: float 2
+; CHECK: float 3
 ; CHECK: .zero 4
diff --git a/test/CodeGen/ARM/dagcombine-concatvector.ll b/test/CodeGen/ARM/dagcombine-concatvector.ll
index 80ef2ab7b8bf..578d80d1cef4 100644
--- a/test/CodeGen/ARM/dagcombine-concatvector.ll
+++ b/test/CodeGen/ARM/dagcombine-concatvector.ll
@@ -19,8 +19,8 @@ bb:
   %tmp5 = bitcast i64 %tmp4 to <8 x i8>
   %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> %tmp3, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  tail call void @llvm.arm.neon.vst1.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
+  tail call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %arg, <16 x i8> %tmp7, i32 2)
   ret void
 }
 
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32)
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32)
diff --git a/test/CodeGen/ARM/debug-frame-vararg.ll b/test/CodeGen/ARM/debug-frame-vararg.ll
index c1eff0a5bd67..13ca20c20359 100644
--- a/test/CodeGen/ARM/debug-frame-vararg.ll
+++ b/test/CodeGen/ARM/debug-frame-vararg.ll
@@ -25,11 +25,11 @@
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "var.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "sum", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32, ...)* @sum, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "var.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -37,9 +37,9 @@
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5 "}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DILocation(line: 5, scope: !4)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!14 = !DILocalVariable(name: "vl", line: 6, scope: !4, file: !5, type: !15)
 !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
 !16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
 !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
@@ -49,9 +49,9 @@
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
 !22 = !DILocation(line: 6, scope: !4)
 !23 = !DILocation(line: 7, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 8, scope: !4, file: !5, type: !8)
+!24 = !DILocalVariable(name: "sum", line: 8, scope: !4, file: !5, type: !8)
 !25 = !DILocation(line: 8, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!26 = !DILocalVariable(name: "i", line: 9, scope: !27, file: !5, type: !8)
 !27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
 !28 = !DILocation(line: 9, scope: !27)
 !29 = !DILocation(line: 10, scope: !30)
@@ -108,7 +108,7 @@
 ; CHECK-THUMB-FP-ELIM: add    r7, sp, #8
 ; CHECK-THUMB-FP-ELIM: .cfi_def_cfa r7, 20
 
-define i32 @sum(i32 %count, ...) {
+define i32 @sum(i32 %count, ...) !dbg !4 {
 entry:
   %vl = alloca i8*, align 4
   %vl1 = bitcast i8** %vl to i8*
diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll
index cc07400c2e1c..4bd401b68496 100644
--- a/test/CodeGen/ARM/debug-frame.ll
+++ b/test/CodeGen/ARM/debug-frame.ll
@@ -30,11 +30,11 @@
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM
 
-; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
 ; RUN:     -disable-fp-elim -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP
 
-; RUN: llc -mtriple thumb-unknown-linux-gnueabi \
+; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \
 ; RUN:     -filetype=asm -o - %s \
 ; RUN:   | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM
 
@@ -125,11 +125,11 @@ declare void @_ZSt9terminatev()
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "exp.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testiiiiiddddd", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void (i32, i32, i32, i32, i32, double, double, double, double, double)* @_Z4testiiiiiddddd, variables: !2)
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testiiiiiddddd", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "exp.cpp", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8, !8, !8, !8, !8, !9, !9, !9, !9, !9}
@@ -138,18 +138,18 @@ declare void @_ZSt9terminatev()
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.5 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !4, file: !5, type: !8)
 !14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 4, arg: 2, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 4, arg: 4, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "e", line: 4, arg: 5, scope: !4, file: !5, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 5, arg: 6, scope: !4, file: !5, type: !9)
+!15 = !DILocalVariable(name: "b", line: 4, arg: 2, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "d", line: 4, arg: 4, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(name: "e", line: 4, arg: 5, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "m", line: 5, arg: 6, scope: !4, file: !5, type: !9)
 !20 = !DILocation(line: 5, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", line: 5, arg: 7, scope: !4, file: !5, type: !9)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 5, arg: 8, scope: !4, file: !5, type: !9)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "q", line: 5, arg: 9, scope: !4, file: !5, type: !9)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 5, arg: 10, scope: !4, file: !5, type: !9)
+!21 = !DILocalVariable(name: "n", line: 5, arg: 7, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(name: "p", line: 5, arg: 8, scope: !4, file: !5, type: !9)
+!23 = !DILocalVariable(name: "q", line: 5, arg: 9, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "r", line: 5, arg: 10, scope: !4, file: !5, type: !9)
 !25 = !DILocation(line: 7, scope: !26)
 !26 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !4)
 !27 = !DILocation(line: 8, scope: !26)
diff --git a/test/CodeGen/ARM/debug-info-arg.ll b/test/CodeGen/ARM/debug-info-arg.ll
index 84eae77794a4..bf7e7321ae3d 100644
--- a/test/CodeGen/ARM/debug-info-arg.ll
+++ b/test/CodeGen/ARM/debug-info-arg.ll
@@ -6,12 +6,12 @@ target triple = "thumbv7-apple-ios"
 
 %struct.tag_s = type { i32, i32, i32 }
 
-define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp {
+define void @foo(%struct.tag_s* nocapture %this, %struct.tag_s* %c, i64 %x, i64 %y, %struct.tag_s* nocapture %ptr1, %struct.tag_s* nocapture %ptr2) nounwind ssp !dbg !1 {
   tail call void @llvm.dbg.value(metadata %struct.tag_s* %this, i64 0, metadata !5, metadata !DIExpression()), !dbg !20
   tail call void @llvm.dbg.value(metadata %struct.tag_s* %c, i64 0, metadata !13, metadata !DIExpression()), !dbg !21
   tail call void @llvm.dbg.value(metadata i64 %x, i64 0, metadata !14, metadata !DIExpression()), !dbg !22
   tail call void @llvm.dbg.value(metadata i64 %y, i64 0, metadata !17, metadata !DIExpression()), !dbg !23
-;CHECK:	@DEBUG_VALUE: foo:y <- [R7+8]
+;CHECK:	@DEBUG_VALUE: foo:y <- [%R7+8]
   tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr1, i64 0, metadata !18, metadata !DIExpression()), !dbg !24
   tail call void @llvm.dbg.value(metadata %struct.tag_s* %ptr2, i64 0, metadata !19, metadata !DIExpression()), !dbg !25
   %1 = icmp eq %struct.tag_s* %c, null, !dbg !26
@@ -32,12 +32,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports:  null)
-!1 = !DISubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, function: void (%struct.tag_s*, %struct.tag_s*, i64, i64, %struct.tag_s*, %struct.tag_s*)* @foo, variables: !31)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !32, enums: !{}, retainedTypes: !{}, subprograms: !30, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !2, scope: !2, type: !3, variables: !31)
 !2 = !DIFile(filename: "one.c", directory: "/Volumes/Athwagate/R10048772")
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 1, scope: !1, file: !2, type: !6)
+!5 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !1, file: !2, type: !6)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !0, baseType: !7)
 !7 = !DICompositeType(tag: DW_TAG_structure_type, name: "tag_s", line: 5, size: 96, align: 32, file: !32, scope: !0, elements: !8)
 !8 = !{!9, !11, !12}
@@ -45,13 +45,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !11 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 7, size: 32, align: 32, offset: 32, file: !32, scope: !7, baseType: !10)
 !12 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 8, size: 32, align: 32, offset: 64, file: !32, scope: !7, baseType: !10)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 2, scope: !1, file: !2, type: !6)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 3, scope: !1, file: !2, type: !15)
+!13 = !DILocalVariable(name: "c", line: 11, arg: 2, scope: !1, file: !2, type: !6)
+!14 = !DILocalVariable(name: "x", line: 11, arg: 3, scope: !1, file: !2, type: !15)
 !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "UInt64", line: 1, file: !32, scope: !0, baseType: !16)
 !16 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 32, encoding: DW_ATE_unsigned)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 11, arg: 4, scope: !1, file: !2, type: !15)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr1", line: 11, arg: 5, scope: !1, file: !2, type: !6)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr2", line: 11, arg: 6, scope: !1, file: !2, type: !6)
+!17 = !DILocalVariable(name: "y", line: 11, arg: 4, scope: !1, file: !2, type: !15)
+!18 = !DILocalVariable(name: "ptr1", line: 11, arg: 5, scope: !1, file: !2, type: !6)
+!19 = !DILocalVariable(name: "ptr2", line: 11, arg: 6, scope: !1, file: !2, type: !6)
 !20 = !DILocation(line: 11, column: 24, scope: !1)
 !21 = !DILocation(line: 11, column: 44, scope: !1)
 !22 = !DILocation(line: 11, column: 54, scope: !1)
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
index 366102755174..c628c5e9038d 100644
--- a/test/CodeGen/ARM/debug-info-blocks.ll
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -1,5 +1,21 @@
-; RUN: llc -O0 < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: foobar_func_block_invoke_0:mydata <- [SP+{{[0-9]+}}]
+; RUN: llc -filetype=obj -O0 < %s | llvm-dwarfdump - | FileCheck %s
+
+; debug_info content
+; CHECK: DW_AT_name {{.*}} "foobar_func_block_invoke_0"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK: DW_TAG_variable
+; CHECK-NOT: DW_TAG
+; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset]	([[MYDATA_LOC:0x[0-9a-f]*]])
+; CHECK-NEXT: DW_AT_name {{.*}} "mydata"
+
+; debug_loc content
+; CHECK: .debug_loc contents:
+; CHECK: [[MYDATA_LOC]]: Beginning address offset: {{.*}}
+; CHECK-NOT: {{0x[0-9a-f]*}}: Beginning address offset
+; CHECK: Location description: {{.*}} 23 04 06 23 18
+; CHECK-NOT: {{0x[0-9a-f]*}}: Beginning address offset
+; CHECK: Location description: {{.*}} 23 04 06 23 18
+
 ; Radar 9331779
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-ios"
@@ -27,7 +43,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp {
+define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp !dbg !23 {
   %1 = alloca %0*, align 4
   %bounds = alloca %struct.CR, align 4
   %data = alloca %struct.CR, align 4
@@ -95,7 +111,7 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!162}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "Apple clang version 2.1", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !153, enums: !147, retainedTypes: !{}, subprograms: !148)
 !1 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 248, size: 32, align: 32, file: !160, scope: !0, elements: !3)
 !2 = !DIFile(filename: "header.h", directory: "/Volumes/Sandbox/llvm")
 !3 = !{!4}
@@ -118,11 +134,11 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !20 = !DIFile(filename: "header4.h", directory: "/Volumes/Sandbox/llvm")
 !21 = !{!22}
 !22 = !DIEnumerator(name: "Eleven", value: 0) ; [ DW_TAG_enumerator ]
-!23 = !DISubprogram(name: "foobar_func_block_invoke_0", line: 609, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 609, file: !152, scope: !24, type: !25, function: void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0)
+!23 = distinct !DISubprogram(name: "foobar_func_block_invoke_0", line: 609, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 609, file: !152, scope: !24, type: !25)
 !24 = !DIFile(filename: "MyLibrary.m", directory: "/Volumes/Sandbox/llvm")
 !25 = !DISubroutineType(types: !26)
 !26 = !{null}
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 609, arg: 1, flags: DIFlagArtificial, scope: !23, file: !24, type: !28)
+!27 = !DILocalVariable(name: ".block_descriptor", line: 609, arg: 1, flags: DIFlagArtificial, scope: !23, file: !24, type: !28)
 !28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, scope: !0, baseType: !29)
 !29 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_14", line: 609, size: 256, align: 32, file: !152, scope: !24, elements: !30)
 !30 = !{!31, !33, !35, !36, !37, !48, !89, !124}
@@ -225,16 +241,16 @@ define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %load
 !127 = !DICompositeType(tag: DW_TAG_structure_type, name: "my_struct", line: 49, flags: DIFlagFwdDecl, file: !159, scope: !0)
 !128 = !DIFile(filename: "header15.h", directory: "/Volumes/Sandbox/llvm")
 !129 = !DILocation(line: 609, column: 144, scope: !23)
-!130 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "loadedMydata", line: 609, arg: 2, scope: !23, file: !24, type: !59)
+!130 = !DILocalVariable(name: "loadedMydata", line: 609, arg: 2, scope: !23, file: !24, type: !59)
 !131 = !DILocation(line: 609, column: 155, scope: !23)
-!132 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bounds", line: 609, arg: 3, scope: !23, file: !24, type: !108)
+!132 = !DILocalVariable(name: "bounds", line: 609, arg: 3, scope: !23, file: !24, type: !108)
 !133 = !DILocation(line: 609, column: 175, scope: !23)
-!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "data", line: 609, arg: 4, scope: !23, file: !24, type: !108)
+!134 = !DILocalVariable(name: "data", line: 609, arg: 4, scope: !23, file: !24, type: !108)
 !135 = !DILocation(line: 609, column: 190, scope: !23)
-!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mydata", line: 604, scope: !23, file: !24, type: !50)
+!136 = !DILocalVariable(name: "mydata", line: 604, scope: !23, file: !24, type: !50)
 !137 = !DILocation(line: 604, column: 49, scope: !23)
-!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 604, scope: !23, file: !40, type: !90)
-!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "semi", line: 607, scope: !23, file: !24, type: !125)
+!138 = !DILocalVariable(name: "self", line: 604, scope: !23, file: !40, type: !90)
+!139 = !DILocalVariable(name: "semi", line: 607, scope: !23, file: !24, type: !125)
 !140 = !DILocation(line: 607, column: 30, scope: !23)
 !141 = !DILocation(line: 610, column: 17, scope: !142)
 !142 = distinct !DILexicalBlock(line: 609, column: 200, file: !152, scope: !23)
diff --git a/test/CodeGen/ARM/debug-info-branch-folding.ll b/test/CodeGen/ARM/debug-info-branch-folding.ll
index 03b4d6b38151..b9d110e42cd4 100644
--- a/test/CodeGen/ARM/debug-info-branch-folding.ll
+++ b/test/CodeGen/ARM/debug-info-branch-folding.ll
@@ -6,15 +6,15 @@ target triple = "thumbv7-apple-macosx10.6.7"
 ;CHECK-NEXT: Ltmp1
 ;CHECK-NEXT: LBB0_1
 
-;CHECK:@DEBUG_VALUE: x <- Q4{{$}}
-;CHECK-NEXT:@DEBUG_VALUE: y <- Q4{{$}}
+;CHECK:@DEBUG_VALUE: x <- %Q4{{$}}
+;CHECK-NEXT:@DEBUG_VALUE: y <- %Q4{{$}}
 
 
 @.str = external constant [13 x i8]
 
 declare <4 x float> @test0001(float) nounwind readnone ssp
 
-define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv, i1 %cond) nounwind ssp !dbg !10 {
 entry:
   br label %for.body9
 
@@ -42,9 +42,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!56}
 !llvm.dbg.cu = !{!2}
 
-!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!0 = distinct !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !3, variables: !51)
 !1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
@@ -52,27 +52,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !8 = !{!9}
 !9 = !DISubrange(count: 4)
-!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !11, function: i32 (i32, i8**, i1)* @main, variables: !52)
+!10 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !54, scope: null, type: !11, variables: !52)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!13}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !55, scope: null, type: !16, variables: !53)
+!14 = distinct !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !55, scope: null, type: !16, variables: !53)
 !15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!18 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
 !23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!24 = !DILocalVariable(name: "i", line: 60, scope: !25, file: !1, type: !13)
 !25 = distinct !DILexicalBlock(line: 59, column: 33, file: !1, scope: !10)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!26 = !DILocalVariable(name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
 !32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
 !33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll
index 27bd3b8639c4..0d457d3a7371 100644
--- a/test/CodeGen/ARM/debug-info-d16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-d16-reg.ll
@@ -10,7 +10,7 @@ target triple = "thumbv7-apple-darwin10"
 @.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4
 @.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4
 
-define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize {
+define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize !dbg !9 {
 entry:
   tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !19, metadata !DIExpression()), !dbg !26
   tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !20, metadata !DIExpression()), !dbg !26
@@ -20,7 +20,7 @@ entry:
   ret i32 0, !dbg !29
 }
 
-define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline {
+define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !16, metadata !DIExpression()), !dbg !30
   tail call void @llvm.dbg.value(metadata double %val, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
@@ -34,7 +34,7 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize !dbg !10 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !22, metadata !DIExpression()), !dbg !34
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !23, metadata !DIExpression()), !dbg !34
@@ -59,36 +59,36 @@ declare i32 @puts(i8* nocapture) nounwind
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!48}
 
-!0 = !DISubprogram(name: "printer", linkageName: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @printer, variables: !43)
+!0 = distinct !DISubprogram(name: "printer", linkageName: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !46, scope: !1, type: !3, variables: !43)
 !1 = !DIFile(filename: "a.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "(LLVM build 00)", isOptimized: true, emissionKind: 1, file: !46, enums: !47, retainedTypes: !47, subprograms: !42, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "(LLVM build 00)", isOptimized: true, emissionKind: 1, file: !46, enums: !47, retainedTypes: !47, subprograms: !42, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !6, !7, !8}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: null)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 32, encoding: DW_ATE_float)
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!9 = !DISubprogram(name: "inlineprinter", linkageName: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !46, scope: !1, type: !3, function: i32 (i8*, double, i8)* @inlineprinter, variables: !44)
-!10 = !DISubprogram(name: "main", linkageName: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !46, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !45)
+!9 = distinct !DISubprogram(name: "inlineprinter", linkageName: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !46, scope: !1, type: !3, variables: !44)
+!10 = distinct !DISubprogram(name: "main", linkageName: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !46, scope: !1, type: !11, variables: !45)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!5, !5, !13}
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !14)
 !14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !46, scope: !1, baseType: !15)
 !15 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
+!16 = !DILocalVariable(name: "ptr", line: 11, arg: 1, scope: !0, file: !1, type: !6)
+!17 = !DILocalVariable(name: "val", line: 11, arg: 2, scope: !0, file: !1, type: !7)
+!18 = !DILocalVariable(name: "c", line: 11, arg: 3, scope: !0, file: !1, type: !8)
+!19 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!20 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!21 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !9, file: !1, type: !8)
 
-!49 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
-!50 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
+!49 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !9, file: !1, type: !6)
+!50 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !9, file: !1, type: !7)
+!51 = !DILocalVariable(name: "c", line: 4, arg: 2, scope: !9, file: !1, type: !8)
 
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 0, scope: !10, file: !1, type: !5)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 0, scope: !10, file: !1, type: !13)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !25, file: !1, type: !7)
+!22 = !DILocalVariable(name: "argc", line: 17, arg: 1, scope: !10, file: !1, type: !5)
+!23 = !DILocalVariable(name: "argv", line: 17, arg: 2, scope: !10, file: !1, type: !13)
+!24 = !DILocalVariable(name: "dval", line: 19, scope: !25, file: !1, type: !7)
 !25 = distinct !DILexicalBlock(line: 18, column: 0, file: !46, scope: !10)
 !26 = !DILocation(line: 4, scope: !9)
 !27 = !DILocation(line: 6, scope: !28)
diff --git a/test/CodeGen/ARM/debug-info-no-frame.ll b/test/CodeGen/ARM/debug-info-no-frame.ll
index e00563cc47c4..d77a195b9528 100644
--- a/test/CodeGen/ARM/debug-info-no-frame.ll
+++ b/test/CodeGen/ARM/debug-info-no-frame.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=armv7-none-linux-gnueabihf < %s -o - | FileCheck %s
 
 ; Function Attrs: nounwind
-define void @need_cfi_def_cfa_offset() #0 {
+define void @need_cfi_def_cfa_offset() #0 !dbg !3 {
 ; CHECK-LABEL: need_cfi_def_cfa_offset:
 ; CHECK: sub	sp, sp, #4
 ; CHECK: .cfi_def_cfa_offset 4
@@ -21,16 +21,16 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !{!3})
 !1 = !DIFile(filename: "file.c", directory: "/dir")
 !2 = !{}
-!3 = !DISubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: void ()* @need_cfi_def_cfa_offset, variables: !2)
+!3 = distinct !DISubprogram(name: "need_cfi_def_cfa_offset", scope: !1, file: !1, line: 1, type: !4, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
 !4 = !DISubroutineType(types: !5)
 !5 = !{null}
 !6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
 !8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Depth", scope: !3, file: !1, line: 3, type: !6)
+!9 = !DILocalVariable(name: "Depth", scope: !3, file: !1, line: 3, type: !6)
 !10 = !DIExpression()
 !11 = !DILocation(line: 3, column: 9, scope: !3)
 !12 = !DILocation(line: 7, column: 5, scope: !3)
diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll
index 665818fc0b2e..1cd90d433640 100644
--- a/test/CodeGen/ARM/debug-info-qreg.ll
+++ b/test/CodeGen/ARM/debug-info-qreg.ll
@@ -15,7 +15,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
 
 declare <4 x float> @test0001(float) nounwind readnone ssp
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp !dbg !10 {
 entry:
   br label %for.body9
 
@@ -38,9 +38,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!56}
 
-!0 = !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, function: <4 x float> (float)* @test0001, variables: !51)
+!0 = distinct !DISubprogram(name: "test0001", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !54, scope: !1, type: !3, variables: !51)
 !1 = !DIFile(filename: "build2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !54, enums: !{}, retainedTypes: !{}, subprograms: !50, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIDerivedType(tag: DW_TAG_typedef, name: "v4f32", line: 14, file: !54, scope: !2, baseType: !6)
@@ -48,27 +48,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !8 = !{!9}
 !9 = !DISubrange(count: 4)
-!10 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 59, file: !54, scope: !1, type: !11, function: i32 (i32, i8**)* @main, variables: !52)
+!10 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 59, file: !54, scope: !1, type: !11, variables: !52)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!13}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 41, file: !55, scope: !15, type: !16, variables: !53)
+!14 = distinct !DISubprogram(name: "printFV", line: 41, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 41, file: !55, scope: !15, type: !16, variables: !53)
 !15 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", directory: "/private/tmp")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
+!18 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !0, file: !1, type: !7)
+!19 = !DILocalVariable(name: "argc", line: 59, arg: 1, scope: !10, file: !1, type: !13)
+!20 = !DILocalVariable(name: "argv", line: 59, arg: 2, scope: !10, file: !1, type: !21)
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !22)
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !23)
 !23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 60, scope: !25, file: !1, type: !13)
+!24 = !DILocalVariable(name: "i", line: 60, scope: !25, file: !1, type: !13)
 !25 = distinct !DILexicalBlock(line: 59, column: 33, file: !54, scope: !10)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 60, scope: !25, file: !1, type: !13)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 61, scope: !25, file: !1, type: !5)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 62, scope: !25, file: !1, type: !5)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 63, scope: !25, file: !1, type: !5)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
+!26 = !DILocalVariable(name: "j", line: 60, scope: !25, file: !1, type: !13)
+!27 = !DILocalVariable(name: "x", line: 61, scope: !25, file: !1, type: !5)
+!28 = !DILocalVariable(name: "y", line: 62, scope: !25, file: !1, type: !5)
+!29 = !DILocalVariable(name: "z", line: 63, scope: !25, file: !1, type: !5)
+!30 = !DILocalVariable(name: "F", line: 41, arg: 1, scope: !14, file: !15, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !32)
 !32 = !DIDerivedType(tag: DW_TAG_typedef, name: "FV", line: 25, file: !55, scope: !2, baseType: !33)
 !33 = !DICompositeType(tag: DW_TAG_union_type, line: 22, size: 128, align: 128, file: !55, scope: !2, elements: !34)
diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll
index ec080f20db9c..654aa4545ca4 100644
--- a/test/CodeGen/ARM/debug-info-s16-reg.ll
+++ b/test/CodeGen/ARM/debug-info-s16-reg.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
 @.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00"
 @.str1 = private unnamed_addr constant [6 x i8] c"point\00"
 
-define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp {
+define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !8, metadata !DIExpression()), !dbg !24
   tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !10, metadata !DIExpression()), !dbg !25
@@ -25,7 +25,7 @@ entry:
 
 declare i32 @printf(i8* nocapture, ...) nounwind optsize
 
-define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp {
+define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp !dbg !6 {
 entry:
   tail call void @llvm.dbg.value(metadata i8* %ptr, i64 0, metadata !14, metadata !DIExpression()), !dbg !30
   tail call void @llvm.dbg.value(metadata float %val, i64 0, metadata !15, metadata !DIExpression()), !dbg !31
@@ -36,7 +36,7 @@ entry:
   ret i32 0, !dbg !35
 }
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp !dbg !7 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !17, metadata !DIExpression()), !dbg !36
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
@@ -65,34 +65,34 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!53}
 
-!0 = !DISubprogram(name: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @inlineprinter, variables: !48)
+!0 = distinct !DISubprogram(name: "inlineprinter", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !51, scope: !1, type: !3, variables: !48)
 !1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !51, enums: !52, retainedTypes: !52, subprograms: !47, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129915)", isOptimized: true, emissionKind: 1, file: !51, enums: !52, retainedTypes: !52, subprograms: !47, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !51, scope: !1, type: !3, function: i32 (i8*, float, i8)* @printer, variables: !49)
-!7 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !51, scope: !1, type: !3, function: i32 (i32, i8**)* @main, variables: !50)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!6 = distinct !DISubprogram(name: "printer", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !51, scope: !1, type: !3, variables: !49)
+!7 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !51, scope: !1, type: !3, variables: !50)
+!8 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: null)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!10 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+!12 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
 
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
+!58 = !DILocalVariable(name: "ptr", line: 4, arg: 1, scope: !0, file: !1, type: !9)
+!60 = !DILocalVariable(name: "val", line: 4, arg: 2, scope: !0, file: !1, type: !11)
+!62 = !DILocalVariable(name: "c", line: 4, arg: 3, scope: !0, file: !1, type: !13)
 
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 17, arg: 1, scope: !7, file: !1, type: !5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 17, arg: 2, scope: !7, file: !1, type: !19)
+!14 = !DILocalVariable(name: "ptr", line: 11, arg: 1, scope: !6, file: !1, type: !9)
+!15 = !DILocalVariable(name: "val", line: 11, arg: 2, scope: !6, file: !1, type: !11)
+!16 = !DILocalVariable(name: "c", line: 11, arg: 3, scope: !6, file: !1, type: !13)
+!17 = !DILocalVariable(name: "argc", line: 17, arg: 1, scope: !7, file: !1, type: !5)
+!18 = !DILocalVariable(name: "argv", line: 17, arg: 2, scope: !7, file: !1, type: !19)
 !19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !20)
 !20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !21)
 !21 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dval", line: 19, scope: !23, file: !1, type: !11)
+!22 = !DILocalVariable(name: "dval", line: 19, scope: !23, file: !1, type: !11)
 !23 = distinct !DILexicalBlock(line: 18, column: 1, file: !51, scope: !7)
 !24 = !DILocation(line: 4, column: 22, scope: !0)
 !25 = !DILocation(line: 4, column: 33, scope: !0)
diff --git a/test/CodeGen/ARM/debug-info-sreg2.ll b/test/CodeGen/ARM/debug-info-sreg2.ll
index f22559efad4d..eadf1b48156b 100644
--- a/test/CodeGen/ARM/debug-info-sreg2.ll
+++ b/test/CodeGen/ARM/debug-info-sreg2.ll
@@ -12,7 +12,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
 ; CHECK-NEXT:           Ending address offset:
 ; CHECK-NEXT:            Location description: 90 {{.. .. .. .. $}}
 
-define void @_Z3foov() optsize ssp {
+define void @_Z3foov() optsize ssp !dbg !1 {
 entry:
   %call = tail call float @_Z3barv() optsize, !dbg !11
   tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !DIExpression()), !dbg !11
@@ -43,15 +43,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports:  null)
-!1 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, function: void ()* @_Z3foov, variables: !17)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, variables: !17)
 !2 = !DIFile(filename: "k.cc", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 6, scope: !6, file: !2, type: !7)
+!5 = !DILocalVariable(name: "k", line: 6, scope: !6, file: !2, type: !7)
 !6 = distinct !DILexicalBlock(line: 5, column: 12, file: !18, scope: !1)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !9, file: !2, type: !7)
+!8 = !DILocalVariable(name: "y", line: 8, scope: !9, file: !2, type: !7)
 !9 = distinct !DILexicalBlock(line: 7, column: 25, file: !18, scope: !10)
 !10 = distinct !DILexicalBlock(line: 7, column: 3, file: !18, scope: !6)
 !11 = !DILocation(line: 6, column: 18, scope: !6)
diff --git a/test/CodeGen/ARM/debug-segmented-stacks.ll b/test/CodeGen/ARM/debug-segmented-stacks.ll
index 47d366e49ded..bd0abedc4133 100644
--- a/test/CodeGen/ARM/debug-segmented-stacks.ll
+++ b/test/CodeGen/ARM/debug-segmented-stacks.ll
@@ -5,7 +5,7 @@
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-define void @test_basic() #0 {
+define void @test_basic() #0 !dbg !4 {
         %mem = alloca i32, i32 10
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
@@ -39,11 +39,11 @@ define void @test_basic() #0 {
 ; ARM-linux       .cfi_same_value r5
 }
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "var.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test_basic", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test_basic, variables: !2)
+!4 = distinct !DISubprogram(name: "test_basic", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "var.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -51,9 +51,9 @@ define void @test_basic() #0 {
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5 "}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "count", line: 5, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DILocation(line: 5, scope: !4)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vl", line: 6, scope: !4, file: !5, type: !15)
+!14 = !DILocalVariable(name: "vl", line: 6, scope: !4, file: !5, type: !15)
 !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "va_list", line: 30, file: !16, baseType: !17)
 !16 = !DIFile(filename: "/linux-x86_64-high/gcc_4.7.2/dbg/llvm/bin/../lib/clang/3.5/include/stdarg.h", directory: "/tmp")
 !17 = !DIDerivedType(tag: DW_TAG_typedef, name: "__builtin_va_list", line: 6, file: !1, baseType: !18)
@@ -63,9 +63,9 @@ define void @test_basic() #0 {
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: null)
 !22 = !DILocation(line: 6, scope: !4)
 !23 = !DILocation(line: 7, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "test_basic", line: 8, scope: !4, file: !5, type: !8)
+!24 = !DILocalVariable(name: "test_basic", line: 8, scope: !4, file: !5, type: !8)
 !25 = !DILocation(line: 8, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 9, scope: !27, file: !5, type: !8)
+!26 = !DILocalVariable(name: "i", line: 9, scope: !27, file: !5, type: !8)
 !27 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
 !28 = !DILocation(line: 9, scope: !27)
 !29 = !DILocation(line: 10, scope: !30)
diff --git a/test/CodeGen/ARM/debugtrap.ll b/test/CodeGen/ARM/debugtrap.ll
new file mode 100644
index 000000000000..9ce73939ce56
--- /dev/null
+++ b/test/CodeGen/ARM/debugtrap.ll
@@ -0,0 +1,17 @@
+; This test ensures the @llvm.debugtrap() call is not removed when generating
+; the 'pop' instruction to restore the callee saved registers on ARM.
+
+; RUN: llc < %s -mtriple=armv7 -O0 -filetype=asm | FileCheck %s 
+
+declare void @llvm.debugtrap() nounwind
+declare void @foo() nounwind
+
+define void @test() nounwind {
+entry:
+  ; CHECK: bl foo
+  ; CHECK-NEXT: pop
+  ; CHECK-NEXT: trap
+  call void @foo()
+  call void @llvm.debugtrap()
+  ret void
+}
diff --git a/test/CodeGen/ARM/div.ll b/test/CodeGen/ARM/div.ll
index 7b298fee42a5..997f50760f3a 100644
--- a/test/CodeGen/ARM/div.ll
+++ b/test/CodeGen/ARM/div.ll
@@ -1,52 +1,97 @@
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8    | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift        | FileCheck %s -check-prefix=CHECK-HWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4    | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f   | FileCheck %s -check-prefix=CHECK-SWDIV
-; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5    | FileCheck %s -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-a8    | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=swift        | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4    | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r4f   | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-SWDIV
+; RUN: llc < %s -mtriple=arm-apple-ios -mcpu=cortex-r5    | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-HWDIV
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8    | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-EABI
 
 define i32 @f1(i32 %a, i32 %b) {
 entry:
-; CHECK-SWDIV: f1
+; CHECK-LABEL: f1
 ; CHECK-SWDIV: __divsi3
 
-; CHECK-HWDIV: f1
 ; CHECK-HWDIV: sdiv
+
+; CHECK-EABI: __aeabi_idiv
         %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f2(i32 %a, i32 %b) {
 entry:
-; CHECK-SWDIV: f2
+; CHECK-LABEL: f2
 ; CHECK-SWDIV: __udivsi3
 
-; CHECK-HWDIV: f2
 ; CHECK-HWDIV: udiv
+
+; CHECK-EABI: __aeabi_uidiv
         %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f3(i32 %a, i32 %b) {
 entry:
-; CHECK-SWDIV: f3
+; CHECK-LABEL: f3
 ; CHECK-SWDIV: __modsi3
 
-; CHECK-HWDIV: f3
 ; CHECK-HWDIV: sdiv
 ; CHECK-HWDIV: mls
+
+; EABI MODE = Remainder in R1, quotient in R0
+; CHECK-EABI: __aeabi_idivmod
+; CHECK-EABI-NEXT: mov r0, r1
         %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
 define i32 @f4(i32 %a, i32 %b) {
 entry:
-; CHECK-SWDIV: f4
+; CHECK-LABEL: f4
 ; CHECK-SWDIV: __umodsi3
 
-; CHECK-HWDIV: f4
 ; CHECK-HWDIV: udiv
 ; CHECK-HWDIV: mls
+
+; EABI MODE = Remainder in R1, quotient in R0
+; CHECK-EABI: __aeabi_uidivmod
+; CHECK-EABI-NEXT: mov r0, r1
         %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
         ret i32 %tmp1
 }
 
+
+define i64 @f5(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f5
+; CHECK-SWDIV: __moddi3
+
+; CHECK-HWDIV: __moddi3
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK-EABI: __aeabi_ldivmod
+; CHECK-EABI-NEXT: mov r0, r2
+; CHECK-EABI-NEXT: mov r1, r3
+        %tmp1 = srem i64 %a, %b         ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
+
+define i64 @f6(i64 %a, i64 %b) {
+entry:
+; CHECK-LABEL: f6
+; CHECK-SWDIV: __umoddi3
+
+; CHECK-HWDIV: __umoddi3
+
+; EABI MODE = Remainder in R2-R3, quotient in R0-R1
+; CHECK-EABI: __aeabi_uldivmod
+; CHECK-EABI-NEXT: mov r0, r2
+; CHECK-EABI-NEXT: mov r1, r3
+        %tmp1 = urem i64 %a, %b         ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
diff --git a/test/CodeGen/ARM/divmod-eabi.ll b/test/CodeGen/ARM/divmod-eabi.ll
index 7f72048d391e..4178af397e66 100644
--- a/test/CodeGen/ARM/divmod-eabi.ll
+++ b/test/CodeGen/ARM/divmod-eabi.ll
@@ -1,5 +1,7 @@
 ; RUN: llc -mtriple armv7-none-eabi %s -o - | FileCheck %s --check-prefix=EABI
 ; RUN: llc -mtriple armv7-none-eabihf %s -o - | FileCheck %s --check-prefix=EABI
+; Both "none-eabi" and "androideabi" must lower SREM/UREM to __aeabi_{u,i}divmod
+; RUN: llc -mtriple armv7-linux-androideabi %s -o - | FileCheck %s --check-prefix=EABI
 ; RUN: llc -mtriple armv7-linux-gnueabi %s -o - | FileCheck %s --check-prefix=GNU
 ; RUN: llc -mtriple armv7-apple-darwin %s -o - | FileCheck %s --check-prefix=DARWIN
 ; FIXME: long-term, we will use "-apple-macho" and won't need this exception:
@@ -187,7 +189,7 @@ define i32 @g4(i32 %a, i32 %b) {
 ; DARWIN-LABEL: g4:
 entry:
   %div = sdiv i32 %a, %b
-; EABI: __aeabi_idivmod
+; EABI: __aeabi_idiv{{$}}
 ; EABI: mov [[div:r[0-9]+]], r0
 ; GNU: __aeabi_idiv
 ; GNU: mov [[sum:r[0-9]+]], r0
diff --git a/test/CodeGen/ARM/eh-resume-darwin.ll b/test/CodeGen/ARM/eh-resume-darwin.ll
index 0cd49775cfb4..d3a8481275f3 100644
--- a/test/CodeGen/ARM/eh-resume-darwin.ll
+++ b/test/CodeGen/ARM/eh-resume-darwin.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=arm | FileCheck %s
-target triple = "armv6-apple-macosx10.6"
+; RUN: llc < %s -mtriple=armv7-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7k-apple-ios -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=IOS
+; RUN: llc < %s -mtriple=armv7k-apple-watchos -arm-atomic-cfg-tidy=0 | FileCheck %s -check-prefix=WATCHOS
 
 declare void @func()
 
@@ -19,4 +20,5 @@ lpad:
   resume { i8*, i32 } %exn
 }
 
-; CHECK: __Unwind_SjLj_Resume
+; IOS: __Unwind_SjLj_Resume
+; WATCHOS: __Unwind_Resume
diff --git a/test/CodeGen/ARM/emutls.ll b/test/CodeGen/ARM/emutls.ll
new file mode 100644
index 000000000000..7ba50dd249bb
--- /dev/null
+++ b/test/CodeGen/ARM/emutls.ll
@@ -0,0 +1,258 @@
+; RUN: llc -emulated-tls -mtriple=arm-linux-android \
+; RUN:     -relocation-model=pic < %s | FileCheck -check-prefix=ARM32 %s
+
+; Copied from X86/emutls.ll
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; ARM32-LABEL: my_get_xyz:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl my_emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; ARM32-LABEL: f1:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %tmp1 = load i32, i32* @i1
+  ret i32 %tmp1
+}
+
+define i32* @f2() {
+; ARM32-LABEL: f2:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   pop
+
+entry:
+  ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; ARM32-LABEL: f3:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %tmp1 = load i32, i32* @i2
+  ret i32 %tmp1
+}
+
+define i32* @f4() {
+; ARM32-LABEL: f4:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   pop
+
+entry:
+  ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; ARM32-LABEL: f5:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %tmp1 = load i32, i32* @i3
+  ret i32 %tmp1
+}
+
+define i32* @f6() {
+; ARM32-LABEL: f6:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   pop
+
+entry:
+  ret i32* @i3
+}
+
+define i32 @f7() {
+; ARM32-LABEL: f7:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %tmp1 = load i32, i32* @i4
+  ret i32 %tmp1
+}
+
+define i32* @f8() {
+; ARM32-LABEL: f8:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   pop
+
+entry:
+  ret i32* @i4
+}
+
+define i32 @f9() {
+; ARM32-LABEL: f9:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldr r0, [r0]
+
+entry:
+  %tmp1 = load i32, i32* @i5
+  ret i32 %tmp1
+}
+
+define i32* @f10() {
+; ARM32-LABEL: f10:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   pop
+
+entry:
+  ret i32* @i5
+}
+
+define i16 @f11() {
+; ARM32-LABEL: f11:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldrh r0, [r0]
+
+entry:
+  %tmp1 = load i16, i16* @s1
+  ret i16 %tmp1
+}
+
+define i32 @f12() {
+; ARM32-LABEL: f12:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldrsh r0, [r0]
+
+entry:
+  %tmp1 = load i16, i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i8 @f13() {
+; ARM32-LABEL: f13:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldrb r0, [r0]
+; ARM32-NEXT: pop
+
+entry:
+  %tmp1 = load i8, i8* @b1
+  ret i8 %tmp1
+}
+
+define i32 @f14() {
+; ARM32-LABEL: f14:
+; ARM32:        ldr r0,
+; ARM32:        ldr r0, [pc, r0]
+; ARM32-NEXT:   bl __emutls_get_address(PLT)
+; ARM32-NEXT:   ldrsb r0, [r0]
+; ARM32-NEXT: pop
+
+entry:
+  %tmp1 = load i8, i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t.
+
+; ARM32       .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i1:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i1
+
+; ARM32       .section .rodata,
+; ARM32-LABEL: __emutls_t.i1:
+; ARM32-NEXT: .long 15
+
+; ARM32-NOT:   __emutls_v.i2
+
+; ARM32       .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i3:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i3
+
+; ARM32       .section .rodata,
+; ARM32-LABEL: __emutls_t.i3:
+; ARM32-NEXT: .long 15
+
+; ARM32       .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.i4:
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 4
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.i4
+
+; ARM32       .section .rodata,
+; ARM32-LABEL: __emutls_t.i4:
+; ARM32-NEXT: .long 15
+
+; ARM32-NOT:   __emutls_v.i5:
+; ARM32       .hidden __emutls_v.i5
+; ARM32-NOT:   __emutls_v.i5:
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.s1:
+; ARM32-NEXT: .long 2
+; ARM32-NEXT: .long 2
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long __emutls_t.s1
+
+; ARM32 .section .rodata,
+; ARM32-LABEL: __emutls_t.s1:
+; ARM32-NEXT: .short 15
+
+; ARM32 .section .data.rel.local,
+; ARM32-LABEL: __emutls_v.b1:
+; ARM32-NEXT: .long 1
+; ARM32-NEXT: .long 1
+; ARM32-NEXT: .long 0
+; ARM32-NEXT: .long 0
+
+; ARM32-NOT:   __emutls_t.b1
diff --git a/test/CodeGen/ARM/emutls1.ll b/test/CodeGen/ARM/emutls1.ll
new file mode 100644
index 000000000000..d4ba7eced66c
--- /dev/null
+++ b/test/CodeGen/ARM/emutls1.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \
+; RUN:     | FileCheck %s
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-androideabi \
+; RUN:     -relocation-model=pic | FileCheck %s --check-prefix=PIC
+
+; Compared with tls1.ll, emulated mode should not use __aeabi_read_tp or __tls_get_addr.
+
+; CHECK-NOT: _aeabi_read_tp
+; CHECK-NOT: _tls_get_addr
+; CHECK:     __emutls_get_addr
+; CHECK-NOT: __aeabi_read_tp
+; CHECK-NOT: _tls_get_addr
+
+; PIC-NOT: _aeabi_read_tp
+; PIC-NOT: _tls_get_addr
+; PIC:     __emutls_get_addr
+; PIC-NOT: _aeabi_read_tp
+; PIC-NOT: _tls_get_addr
+
+@i = thread_local global i32 15 ; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+ %tmp1 = load i32, i32* @i ; <i32> [#uses=1]
+ ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+ ret i32* @i
+}
diff --git a/test/CodeGen/ARM/emutls_generic.ll b/test/CodeGen/ARM/emutls_generic.ll
new file mode 100644
index 000000000000..0fada88fb5d9
--- /dev/null
+++ b/test/CodeGen/ARM/emutls_generic.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -relocation-model=pic -O3 \
+; RUN:     | FileCheck -check-prefix=ARM_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=arm-linux-androidabi -O3 \
+; RUN:     | FileCheck -check-prefix=ARM_32 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+  ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+  ret i64* @internal_y
+}
+
+; ARM_32-LABEL:  get_external_x:
+; ARM_32:        bl __emutls_get_address
+; ARM_32:        .long __emutls_v.external_x
+; ARM_32-LABEL:  get_external_y:
+; ARM_32:        bl __emutls_get_address
+; ARM_32:        .long __emutls_v.external_y
+; ARM_32-LABEL:  get_internal_y:
+; ARM_32:      bl __emutls_get_address
+; ARM_32:      .long __emutls_v.internal_y
+; ARM_32-NOT:   __emutls_t.external_x
+; ARM_32-NOT:   __emutls_v.external_x:
+; ARM_32:        .data
+; ARM_32:        .align 2
+; ARM_32-LABEL:  __emutls_v.external_y:
+; ARM_32-NEXT:   .long 1
+; ARM_32-NEXT:   .long 2
+; ARM_32-NEXT:   .long 0
+; ARM_32-NEXT:   .long __emutls_t.external_y
+; ARM_32:        .section .rodata,
+; ARM_32-LABEL:  __emutls_t.external_y:
+; ARM_32-NEXT:   .byte 7
+; ARM_32:        .data
+; ARM_32:        .align 2
+; ARM_32-LABEL:  __emutls_v.internal_y:
+; ARM_32-NEXT:   .long 8
+; ARM_32-NEXT:   .long 16
+; ARM_32-NEXT:   .long 0
+; ARM_32-NEXT:   .long __emutls_t.internal_y
+; ARM_32-LABEL:  __emutls_t.internal_y:
+; ARM_32-NEXT:   .long 9
+; ARM_32-NEXT:   .long 0
diff --git a/test/CodeGen/ARM/fast-isel-align.ll b/test/CodeGen/ARM/fast-isel-align.ll
index 39085db95316..701884e926a8 100644
--- a/test/CodeGen/ARM/fast-isel-align.ll
+++ b/test/CodeGen/ARM/fast-isel-align.ll
@@ -1,22 +1,22 @@
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=thumbv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-nacl -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
 
-; RUN: llc < %s -O0  -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0  -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
-; RUN: llc < %s -O0 -arm-no-strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
-; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -mattr=+strict-align -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0  -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -O0 -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix=THUMB
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=armv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=ARM-STRICT-ALIGN
+; RUN: llc < %s -O0 -relocation-model=dynamic-no-pic -mtriple=thumbv7-unknown-unknown -mattr=+strict-align -verify-machineinstrs | FileCheck %s --check-prefix=THUMB-STRICT-ALIGN
 
 ; Check unaligned stores
 %struct.anon = type <{ float }>
diff --git a/test/CodeGen/ARM/fast-isel-ext.ll b/test/CodeGen/ARM/fast-isel-ext.ll
index b792f7a90738..440aa426067c 100644
--- a/test/CodeGen/ARM/fast-isel-ext.ll
+++ b/test/CodeGen/ARM/fast-isel-ext.ll
@@ -1,9 +1,5 @@
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv7-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=v7
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv4t-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=prev6
-; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=armv5-linux-gnueabi -verify-machineinstrs | FileCheck %s --check-prefix=prev6
 ; RUN: llc < %s -O0 -fast-isel-abort=1 -mtriple=thumbv7-apple-ios -verify-machineinstrs | FileCheck %s --check-prefix=v7
 
 ; Can't test pre-ARMv6 Thumb because ARM FastISel currently only supports
@@ -19,8 +15,6 @@
 define i8 @zext_1_8(i1 %a) nounwind ssp {
 ; v7-LABEL: zext_1_8:
 ; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_8:
-; prev6: and r0, r0, #1
   %r = zext i1 %a to i8
   ret i8 %r
 }
@@ -28,8 +22,6 @@ define i8 @zext_1_8(i1 %a) nounwind ssp {
 define i16 @zext_1_16(i1 %a) nounwind ssp {
 ; v7-LABEL: zext_1_16:
 ; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_16:
-; prev6: and r0, r0, #1
   %r = zext i1 %a to i16
   ret i16 %r
 }
@@ -37,8 +29,6 @@ define i16 @zext_1_16(i1 %a) nounwind ssp {
 define i32 @zext_1_32(i1 %a) nounwind ssp {
 ; v7-LABEL: zext_1_32:
 ; v7: and r0, r0, #1
-; prev6-LABEL: zext_1_32:
-; prev6: and r0, r0, #1
   %r = zext i1 %a to i32
   ret i32 %r
 }
@@ -46,8 +36,6 @@ define i32 @zext_1_32(i1 %a) nounwind ssp {
 define i16 @zext_8_16(i8 %a) nounwind ssp {
 ; v7-LABEL: zext_8_16:
 ; v7: and r0, r0, #255
-; prev6-LABEL: zext_8_16:
-; prev6: and r0, r0, #255
   %r = zext i8 %a to i16
   ret i16 %r
 }
@@ -55,8 +43,6 @@ define i16 @zext_8_16(i8 %a) nounwind ssp {
 define i32 @zext_8_32(i8 %a) nounwind ssp {
 ; v7-LABEL: zext_8_32:
 ; v7: and r0, r0, #255
-; prev6-LABEL: zext_8_32:
-; prev6: and r0, r0, #255
   %r = zext i8 %a to i32
   ret i32 %r
 }
@@ -64,9 +50,6 @@ define i32 @zext_8_32(i8 %a) nounwind ssp {
 define i32 @zext_16_32(i16 %a) nounwind ssp {
 ; v7-LABEL: zext_16_32:
 ; v7: uxth r0, r0
-; prev6-LABEL: zext_16_32:
-; prev6: lsl{{s?}} r0, r0, #16
-; prev6: lsr{{s?}} r0, r0, #16
   %r = zext i16 %a to i32
   ret i32 %r
 }
@@ -77,9 +60,6 @@ define i8 @sext_1_8(i1 %a) nounwind ssp {
 ; v7-LABEL: sext_1_8:
 ; v7: lsl{{s?}} r0, r0, #31
 ; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_8:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
   %r = sext i1 %a to i8
   ret i8 %r
 }
@@ -88,9 +68,6 @@ define i16 @sext_1_16(i1 %a) nounwind ssp {
 ; v7-LABEL: sext_1_16:
 ; v7: lsl{{s?}} r0, r0, #31
 ; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_16:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
   %r = sext i1 %a to i16
   ret i16 %r
 }
@@ -99,9 +76,6 @@ define i32 @sext_1_32(i1 %a) nounwind ssp {
 ; v7-LABEL: sext_1_32:
 ; v7: lsl{{s?}} r0, r0, #31
 ; v7: asr{{s?}} r0, r0, #31
-; prev6-LABEL: sext_1_32:
-; prev6: lsl{{s?}} r0, r0, #31
-; prev6: asr{{s?}} r0, r0, #31
   %r = sext i1 %a to i32
   ret i32 %r
 }
@@ -109,9 +83,6 @@ define i32 @sext_1_32(i1 %a) nounwind ssp {
 define i16 @sext_8_16(i8 %a) nounwind ssp {
 ; v7-LABEL: sext_8_16:
 ; v7: sxtb r0, r0
-; prev6-LABEL: sext_8_16:
-; prev6: lsl{{s?}} r0, r0, #24
-; prev6: asr{{s?}} r0, r0, #24
   %r = sext i8 %a to i16
   ret i16 %r
 }
@@ -119,9 +90,6 @@ define i16 @sext_8_16(i8 %a) nounwind ssp {
 define i32 @sext_8_32(i8 %a) nounwind ssp {
 ; v7-LABEL: sext_8_32:
 ; v7: sxtb r0, r0
-; prev6-LABEL: sext_8_32:
-; prev6: lsl{{s?}} r0, r0, #24
-; prev6: asr{{s?}} r0, r0, #24
   %r = sext i8 %a to i32
   ret i32 %r
 }
@@ -129,9 +97,6 @@ define i32 @sext_8_32(i8 %a) nounwind ssp {
 define i32 @sext_16_32(i16 %a) nounwind ssp {
 ; v7-LABEL: sext_16_32:
 ; v7: sxth r0, r0
-; prev6-LABEL: sext_16_32:
-; prev6: lsl{{s?}} r0, r0, #16
-; prev6: asr{{s?}} r0, r0, #16
   %r = sext i16 %a to i32
   ret i32 %r
 }
diff --git a/test/CodeGen/ARM/fast-isel-mvn.ll b/test/CodeGen/ARM/fast-isel-mvn.ll
index 89b7c05158cd..34bb7225854a 100644
--- a/test/CodeGen/ARM/fast-isel-mvn.ll
+++ b/test/CodeGen/ARM/fast-isel-mvn.ll
@@ -1,8 +1,8 @@
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=false -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true  -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
-; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -arm-use-movt=true  -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=MOVT
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=armv7-linux-gnueabi < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mattr=+no-movt -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=ARM
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=thumbv7-apple-ios   < %s | FileCheck %s --check-prefix=CHECK --check-prefix=THUMB
+; RUN: llc -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=dynamic-no-pic -mtriple=armv7-apple-ios     < %s | FileCheck %s --check-prefix=MOVT
 ; rdar://10412592
 
 define void @t1() nounwind {
diff --git a/test/CodeGen/ARM/fast-isel-pic.ll b/test/CodeGen/ARM/fast-isel-pic.ll
index 70e15daaca62..4cee5a7eba90 100644
--- a/test/CodeGen/ARM/fast-isel-pic.ll
+++ b/test/CodeGen/ARM/fast-isel-pic.ll
@@ -1,8 +1,7 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-apple-ios | FileCheck %s --check-prefix=THUMB
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=arm-apple-ios | FileCheck %s --check-prefix=ARM
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-apple-ios | FileCheck %s --check-prefix=ARMv7
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
-; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -arm-force-fast-isel -relocation-model=pic -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s --check-prefix=THUMB-ELF
+; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -arm-force-fast-isel -relocation-model=pic -mtriple=armv7-none-linux-gnueabi | FileCheck %s --check-prefix=ARMv7-ELF
 
 @g = global i32 0, align 4
 
@@ -14,8 +13,8 @@ entry:
 ; THUMB: add  [[reg0]], pc
 ; THUMB-ELF: LoadGV
 ; THUMB-ELF: ldr r[[reg0:[0-9]+]],
-; THUMB-ELF: ldr r[[reg1:[0-9]+]],
-; THUMB-ELF: ldr r[[reg0]], [r[[reg0]], r[[reg1]]]
+; THUMB-ELF: add r[[reg0]], pc
+; THUMB-ELF: ldr r[[reg0]], [r[[reg0]]]
 ; ARM: LoadGV
 ; ARM: ldr [[reg1:r[0-9]+]],
 ; ARM: add [[reg1]], pc, [[reg1]]
@@ -26,9 +25,8 @@ entry:
 ; ARMv7-ELF: LoadGV
 ; ARMv7-ELF: ldr r[[reg2:[0-9]+]],
 ; ARMv7-ELF: .LPC
-; ARMv7-ELF-NEXT: add r[[reg2]], pc
-; ARMv7-ELF: ldr r[[reg3:[0-9]+]],
-; ARMv7-ELF: ldr r[[reg2]], [r[[reg3]], r[[reg2]]]
+; ARMv7-ELF-NEXT: ldr r[[reg2]], [pc, r[[reg2]]]
+; ARMv7-ELF: ldr r[[reg2]], [r[[reg2]]]
   %tmp = load i32, i32* @g
   ret i32 %tmp
 }
@@ -44,8 +42,8 @@ entry:
 ; THUMB: ldr  r[[reg3]], [r[[reg3]]]
 ; THUMB-ELF: LoadIndirectSymbol
 ; THUMB-ELF: ldr r[[reg3:[0-9]+]],
-; THUMB-ELF: ldr r[[reg4:[0-9]+]],
-; THUMB-ELF: ldr r[[reg3]], [r[[reg3]], r[[reg4]]]
+; THUMB-ELF: ldr r[[reg4:[0-9]+]], [r[[reg3]]]
+; THUMB-ELF: ldr r0, [r[[reg4]]]
 ; ARM: LoadIndirectSymbol
 ; ARM: ldr [[reg4:r[0-9]+]],
 ; ARM: ldr [[reg4]], [pc, [[reg4]]]
@@ -57,9 +55,8 @@ entry:
 ; ARMv7-ELF: LoadIndirectSymbol
 ; ARMv7-ELF: ldr r[[reg5:[0-9]+]],
 ; ARMv7-ELF: .LPC
-; ARMv7-ELF-NEXT: add r[[reg5]], pc
-; ARMv7-ELF: ldr r[[reg6:[0-9]+]],
-; ARMv7-ELF: ldr r[[reg5]], [r[[reg6]], r[[reg5]]]
+; ARMv7-ELF: ldr r[[reg6:[0-9]+]], [pc, r[[reg5]]]
+; ARMv7-ELF: ldr r0, [r[[reg5]]]
   %tmp = load i32, i32* @i
   ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/fold-stack-adjust.ll b/test/CodeGen/ARM/fold-stack-adjust.ll
index 1fd9bd9e47a3..8944a40f311f 100644
--- a/test/CodeGen/ARM/fold-stack-adjust.ll
+++ b/test/CodeGen/ARM/fold-stack-adjust.ll
@@ -1,4 +1,6 @@
-; RUN: llc -mtriple=thumbv7-apple-none-macho < %s | FileCheck %s
+; Disable shrink-wrapping on the first test otherwise we wouldn't
+; exerce the path for PR18136.
+; RUN: llc -mtriple=thumbv7-apple-none-macho < %s -enable-shrink-wrap=false | FileCheck %s
 ; RUN: llc -mtriple=thumbv6m-apple-none-macho -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-T1
 ; RUN: llc -mtriple=thumbv7-apple-darwin-ios -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-IOS
 ; RUN: llc -mtriple=thumbv7--linux-gnueabi -disable-fp-elim < %s | FileCheck %s --check-prefix=CHECK-LINUX
@@ -60,20 +62,19 @@ define void @check_vfp_fold() minsize {
 ; CHECK: vpush {d6, d7, d8, d9}
 ; CHECK-NOT: sub sp,
 ; ...
-; CHECK: vldmia r[[GLOBREG]], {d8, d9}
-; ...
 ; CHECK-NOT: add sp,
 ; CHECK: vpop {d6, d7, d8, d9}
-; CHECKL pop {r[[GLOBREG]], pc}
+; CHECK: pop {r[[GLOBREG]], pc}
 
   ; iOS uses aligned NEON stores here, which is convenient since we
   ; want to make sure that works too.
 ; CHECK-IOS-LABEL: check_vfp_fold:
-; CHECK-IOS: push {r0, r1, r2, r3, r4, r7, lr}
+; CHECK-IOS: push {r4, r7, lr}
 ; CHECK-IOS: sub.w r4, sp, #16
 ; CHECK-IOS: bfc r4, #0, #4
 ; CHECK-IOS: mov sp, r4
 ; CHECK-IOS: vst1.64 {d8, d9}, [r4:128]
+; CHECK-IOS: sub sp, #16
 ; ...
 ; CHECK-IOS: add r4, sp, #16
 ; CHECK-IOS: vld1.64 {d8, d9}, [r4:128]
@@ -82,9 +83,8 @@ define void @check_vfp_fold() minsize {
 
   %var = alloca i8, i32 16
 
-  %tmp = load %bigVec, %bigVec* @var
+  call void asm "", "r,~{d8},~{d9}"(i8* %var)
   call void @bar(i8* %var)
-  store %bigVec %tmp, %bigVec* @var
 
   ret void
 }
@@ -170,9 +170,9 @@ define void @test_varsize(...) minsize {
 ; CHECK-T1: push	{r5, r6, r7, lr}
 ; ...
 ; CHECK-T1: pop	{r2, r3, r7}
-; CHECK-T1: pop	{r3}
+; CHECK-T1: pop {[[POP_REG:r[0-3]]]}
 ; CHECK-T1: add	sp, #16
-; CHECK-T1: bx	r3
+; CHECK-T1: bx	[[POP_REG]]
 
 ; CHECK-LABEL: test_varsize:
 ; CHECK: sub	sp, #16
diff --git a/test/CodeGen/ARM/fp16-args.ll b/test/CodeGen/ARM/fp16-args.ll
new file mode 100644
index 000000000000..31a20f85483b
--- /dev/null
+++ b/test/CodeGen/ARM/fp16-args.ll
@@ -0,0 +1,40 @@
+; RUN: llc -float-abi soft -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+; RUN: llc -float-abi hard -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7a--none-eabi"
+
+define float @foo(float %a.coerce, float %b.coerce) {
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %2 = bitcast float %b.coerce to i32
+  %tmp1.0.extract.trunc = trunc i32 %2 to i16
+  %3 = bitcast i16 %tmp1.0.extract.trunc to half
+  %4 = fadd half %1, %3
+  %5 = bitcast half %4 to i16
+  %tmp5.0.insert.ext = zext i16 %5 to i32
+  %6 = bitcast i32 %tmp5.0.insert.ext to float
+  ret float %6
+; CHECK: foo:
+
+; SOFT: vmov    {{s[0-9]+}}, r1
+; SOFT: vmov    {{s[0-9]+}}, r0
+; SOFT: vcvtb.f32.f16   {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vcvtb.f32.f16   {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vadd.f32        {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vcvtb.f16.f32   {{s[0-9]+}}, {{s[0-9]+}}
+; SOFT: vmov    r0, {{s[0-9]+}}
+
+; HARD-NOT: vmov
+; HARD-NOT: uxth
+; HARD: vcvtb.f32.f16   {{s[0-9]+}}, s1
+; HARD: vcvtb.f32.f16   {{s[0-9]+}}, s0
+; HARD: vadd.f32        {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; HARD: vcvtb.f16.f32   s0, {{s[0-9]+}}
+; HARD-NOT: vmov
+; HARD-NOT: uxth
+
+; CHECK: bx lr
+}
diff --git a/test/CodeGen/ARM/fp16-promote.ll b/test/CodeGen/ARM/fp16-promote.ll
index e691c2bb8a97..2a2eb8d2b6ba 100644
--- a/test/CodeGen/ARM/fp16-promote.ll
+++ b/test/CodeGen/ARM/fp16-promote.ll
@@ -1,19 +1,19 @@
-; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16 -check-prefix=CHECK-ALL
-; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s -mattr=+vfp3,+fp16 | FileCheck %s -check-prefix=CHECK-FP16  --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=CHECK-LIBCALL --check-prefix=CHECK-VFP -check-prefix=CHECK-ALL --check-prefix=CHECK-LIBCALL-VFP
+; RUN: llc -asm-verbose=false < %s -mattr=-vfp2 | FileCheck %s --check-prefix=CHECK-LIBCALL -check-prefix=CHECK-NOVFP -check-prefix=CHECK-ALL
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
-target triple = "armv7-eabihf"
+target triple = "armv7---eabihf"
 
-; CHECK-FP16-LABEL: test_fadd:
+; CHECK-ALL-LABEL: test_fadd:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vadd.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vadd.f32
+; CHECK-NOVFP: bl __aeabi_fadd
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fadd:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vadd.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fadd(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -22,16 +22,15 @@ define void @test_fadd(half* %p, half* %q) #0 {
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fsub:
+; CHECK-ALL-LABEL: test_fsub:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vsub.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vsub.f32
+; CHECK-NOVFP: bl __aeabi_fsub
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fsub:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vsub.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fsub(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -40,16 +39,15 @@ define void @test_fsub(half* %p, half* %q) #0 {
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fmul:
+; CHECK-ALL-LABEL: test_fmul:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vmul.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vmul.f32
+; CHECK-NOVFP: bl __aeabi_fmul
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fmul
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vmul.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fmul(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -58,16 +56,15 @@ define void @test_fmul(half* %p, half* %q) #0 {
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fdiv:
+; CHECK-ALL-LABEL: test_fdiv:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vdiv.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vdiv.f32
+; CHECK-NOVFP: bl __aeabi_fdiv
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_fdiv
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vdiv.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fdiv(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -76,16 +73,14 @@ define void @test_fdiv(half* %p, half* %q) #0 {
   ret void
 }
 
-; CHECK-FP16-LABEL: test_frem:
+; CHECK-ALL-LABEL: test_frem:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl fmodf
-; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_frem
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl fmodf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-FP16: vcvtb.f16.f32
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_frem(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -96,9 +91,8 @@ define void @test_frem(half* %p, half* %q) #0 {
 
 ; CHECK-ALL-LABEL: test_load_store:
 ; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh r0, [r0]
-; CHECK-ALL-NEXT: strh r0, [r1]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
 define void @test_load_store(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   store half %a, half* %q
@@ -112,6 +106,7 @@ declare half @test_callee(half %a, half %b) #0
 
 ; CHECK-ALL-LABEL: test_call:
 ; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: .save {r11, lr}
 ; CHECK-ALL-NEXT: push {r11, lr}
 ; CHECK-ALL-NEXT: bl test_callee
 ; CHECK-ALL-NEXT: pop {r11, pc}
@@ -122,10 +117,14 @@ define half @test_call(half %a, half %b) #0 {
 
 ; CHECK-ALL-LABEL: test_call_flipped:
 ; CHECK-ALL-NEXT: .fnstart
+; CHECK-ALL-NEXT: .save {r11, lr}
 ; CHECK-ALL-NEXT: push {r11, lr}
-; CHECK-ALL-NEXT: mov r2, r0
-; CHECK-ALL-NEXT: mov r0, r1
-; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
 ; CHECK-ALL-NEXT: bl test_callee
 ; CHECK-ALL-NEXT: pop {r11, pc}
 define half @test_call_flipped(half %a, half %b) #0 {
@@ -135,9 +134,12 @@ define half @test_call_flipped(half %a, half %b) #0 {
 
 ; CHECK-ALL-LABEL: test_tailcall_flipped:
 ; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: mov r2, r0
-; CHECK-ALL-NEXT: mov r0, r1
-; CHECK-ALL-NEXT: mov r1, r2
+; CHECK-VFP-NEXT: vmov.f32 s2, s0
+; CHECK-VFP-NEXT: vmov.f32 s0, s1
+; CHECK-VFP-NEXT: vmov.f32 s1, s2
+; CHECK-NOVFP-NEXT: mov r2, r0
+; CHECK-NOVFP-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r1, r2
 ; CHECK-ALL-NEXT: b test_callee
 define half @test_tailcall_flipped(half %a, half %b) #0 {
   %r = tail call half @test_callee(half %b, half %a)
@@ -147,12 +149,10 @@ define half @test_tailcall_flipped(half %a, half %b) #0 {
 ; Optimizer picks %p or %q based on %c and only loads that value
 ; No conversion is needed
 ; CHECK-ALL-LABEL: test_select:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: cmp r2, #0
-; CHECK-ALL-NEXT: movne r1, r0
-; CHECK-ALL-NEXT: ldrh r1, [r1]
-; CHECK-ALL-NEXT: strh r1, [r0]
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: cmp {{r[0-9]+}}, #0
+; CHECK-ALL: movne {{r[0-9]+}}, {{r[0-9]+}}
+; CHECK-ALL: ldrh {{r[0-9]+}}, [{{r[0-9]+}}]
+; CHECK-ALL: strh {{r[0-9]+}}, [{{r[0-9]+}}]
 define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -163,17 +163,15 @@ define void @test_select(half* %p, half* %q, i1 zeroext %c) #0 {
 
 ; Test only two variants of fcmp.  These get translated to f32 vcmpe
 ; instructions anyway.
-; CHECK-FP16-LABEL: test_fcmp_une:
+; CHECK-ALL-LABEL: test_fcmp_une:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movwne
-; CHECK-LIBCALL-LABEL: test_fcmp_une:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movwne
+; CHECK-ALL: movw{{ne|eq}}
 define i1 @test_fcmp_une(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -181,18 +179,15 @@ define i1 @test_fcmp_une(half* %p, half* %q) #0 {
   ret i1 %r
 }
 
-; CHECK-FP16-LABEL: test_fcmp_ueq:
+; CHECK-ALL-LABEL: test_fcmp_ueq:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmpeq
 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: movweq
-; CHECK-FP16: movwvs
-; CHECK-LIBCALL-LABEL: test_fcmp_ueq:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: movweq
+; CHECK-LIBCALL: movw{{ne|eq}}
 define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -200,19 +195,18 @@ define i1 @test_fcmp_ueq(half* %p, half* %q) #0 {
   ret i1 %r
 }
 
-; CHECK-FP16-LABEL: test_br_cc:
+; CHECK-ALL-LABEL: test_br_cc:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcmpe.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcmpe.f32
+; CHECK-NOVFP: bl __aeabi_fcmplt
 ; CHECK-FP16: vmrs APSR_nzcv, fpscr
-; CHECK-FP16: strmi
-; CHECK-FP16: strpl
-; CHECK-LIBCALL-LABEL: test_br_cc:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcmpe.f32
-; CHECK-LIBCALL: strmi
-; CHECK-LIBCALL: strpl
+; CHECK-VFP: strmi
+; CHECK-VFP: strpl
+; CHECK-NOVFP: strne
+; CHECK-NOVFP: streq
 define void @test_br_cc(half* %p, half* %q, i32* %p1, i32* %p2) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -227,20 +221,19 @@ else:
 }
 
 declare i1 @test_dummy(half* %p) #0
-; CHECK-FP16-LABEL: test_phi:
+; CHECK-ALL-LABEL: test_phi:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: [[LOOP:.LBB[1-9_]+]]:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: bl      test_dummy
 ; CHECK-FP16: bne     [[LOOP]]
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_phi:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
 ; CHECK-LIBCALL: [[LOOP:.LBB[1-9_]+]]:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl test_dummy
 ; CHECK-LIBCALL: bne     [[LOOP]]
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL-VFP: bl __aeabi_f2h
 define void @test_phi(half* %p) #0 {
 entry:
   %a = load half, half* %p
@@ -255,96 +248,84 @@ return:
   ret void
 }
 
-; CHECK-FP16-LABEL: test_fptosi_i32:
+; CHECK-ALL-LABEL: test_fptosi_i32:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.s32.f32
-; CHECK-LIBCALL-LABEL: test_fptosi_i32:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.s32.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.s32.f32
+; CHECK-NOVFP: bl __aeabi_f2iz
 define i32 @test_fptosi_i32(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptosi half %a to i32
   ret i32 %r
 }
 
-; CHECK-FP16-LABEL: test_fptosi_i64:
+; CHECK-ALL-LABEL: test_fptosi_i64:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2lz
-; CHECK-LIBCALL-LABEL: test_fptosi_i64:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __aeabi_f2lz
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-ALL: bl __aeabi_f2lz
 define i64 @test_fptosi_i64(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptosi half %a to i64
   ret i64 %r
 }
 
-; CHECK-FP16-LABEL: test_fptoui_i32:
+; CHECK-ALL-LABEL: test_fptoui_i32:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.u32.f32
-; CHECK-LIBCALL-LABEL: test_fptoui_i32:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.u32.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.u32.f32
+; CHECK-NOVFP: bl __aeabi_f2uiz
 define i32 @test_fptoui_i32(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptoui half %a to i32
   ret i32 %r
 }
 
-; CHECK-FP16-LABEL: test_fptoui_i64:
+; CHECK-ALL-LABEL: test_fptoui_i64:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: bl __aeabi_f2ulz
-; CHECK-LIBCALL-LABEL: test_fptoui_i64:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __aeabi_f2ulz
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-ALL: bl __aeabi_f2ulz
 define i64 @test_fptoui_i64(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = fptoui half %a to i64
   ret i64 %r
 }
 
-; CHECK-FP16-LABEL: test_sitofp_i32:
-; CHECK-FP16: vcvt.f32.s32
+; CHECK-ALL-LABEL: test_sitofp_i32:
+; CHECK-VFP: vcvt.f32.s32
+; CHECK-NOVFP: bl __aeabi_i2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.s32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sitofp_i32(i32 %a, half* %p) #0 {
   %r = sitofp i32 %a to half
   store half %r, half* %p
   ret void
 }
 
-; CHECK-FP16-LABEL: test_uitofp_i32:
-; CHECK-FP16: vcvt.f32.u32
+; CHECK-ALL-LABEL: test_uitofp_i32:
+; CHECK-VFP: vcvt.f32.u32
+; CHECK-NOVFP: bl __aeabi_ui2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i32:
-; CHECK-LIBCALL: vcvt.f32.u32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_uitofp_i32(i32 %a, half* %p) #0 {
   %r = uitofp i32 %a to half
   store half %r, half* %p
   ret void
 }
 
-; CHECK-FP16-LABEL: test_sitofp_i64:
-; CHECK-FP16: bl __aeabi_l2f
+; CHECK-ALL-LABEL: test_sitofp_i64:
+; CHECK-ALL: bl __aeabi_l2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_l2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sitofp_i64(i64 %a, half* %p) #0 {
   %r = sitofp i64 %a to half
   store half %r, half* %p
   ret void
 }
 
-; CHECK-FP16-LABEL: test_uitofp_i64:
-; CHECK-FP16: bl __aeabi_ul2f
+; CHECK-ALL-LABEL: test_uitofp_i64:
+; CHECK-ALL: bl __aeabi_ul2f
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_uitofp_i64:
-; CHECK-LIBCALL: bl __aeabi_ul2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_uitofp_i64(i64 %a, half* %p) #0 {
   %r = uitofp i64 %a to half
   store half %r, half* %p
@@ -354,7 +335,7 @@ define void @test_uitofp_i64(i64 %a, half* %p) #0 {
 ; CHECK-FP16-LABEL: test_fptrunc_float:
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_fptrunc_float:
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fptrunc_float(float %f, half* %p) #0 {
   %a = fptrunc float %f to half
   store half %a, half* %p
@@ -374,7 +355,7 @@ define void @test_fptrunc_double(double %d, half* %p) #0 {
 ; CHECK-FP16-LABEL: test_fpextend_float:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-LIBCALL-LABEL: test_fpextend_float:
-; CHECK-LIBCALL: b __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 define float @test_fpextend_float(half* %p) {
   %a = load half, half* %p, align 2
   %r = fpext half %a to float
@@ -383,10 +364,10 @@ define float @test_fpextend_float(half* %p) {
 
 ; CHECK-FP16-LABEL: test_fpextend_double:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvt.f64.f32
 ; CHECK-LIBCALL-LABEL: test_fpextend_double:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vcvt.f64.f32
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP: vcvt.f64.f32
+; CHECK-NOVFP: bl __aeabi_f2d
 define double @test_fpextend_double(half* %p) {
   %a = load half, half* %p, align 2
   %r = fpext half %a to double
@@ -436,14 +417,14 @@ declare half @llvm.nearbyint.f16(half %a) #0
 declare half @llvm.round.f16(half %a) #0
 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
 
-; CHECK-FP16-LABEL: test_sqrt:
+; CHECK-ALL-LABEL: test_sqrt:
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-FP16: vsqrt.f32
 ; CHECK-FP16: vcvtb.f16.f32
-; CHECK-LIBCALL-LABEL: test_sqrt:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vsqrt.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vsqrt.f32
+; CHECK-NOVFP: bl sqrtf
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sqrt(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.sqrt.f16(half %a)
@@ -456,9 +437,9 @@ define void @test_sqrt(half* %p) #0 {
 ; CHECK-FP16: bl __powisf2
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_fpowi:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __powisf2
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fpowi(half* %p, i32 %b) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.powi.f16(half %a, i32 %b)
@@ -471,9 +452,9 @@ define void @test_fpowi(half* %p, i32 %b) #0 {
 ; CHECK-FP16: bl sinf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_sin:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl sinf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sin(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.sin.f16(half %a)
@@ -486,9 +467,9 @@ define void @test_sin(half* %p) #0 {
 ; CHECK-FP16: bl cosf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_cos:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl cosf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_cos(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.cos.f16(half %a)
@@ -502,10 +483,10 @@ define void @test_cos(half* %p) #0 {
 ; CHECK-FP16: bl powf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_pow:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl powf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_pow(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -519,9 +500,9 @@ define void @test_pow(half* %p, half* %q) #0 {
 ; CHECK-FP16: bl expf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_exp:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl expf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_exp(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.exp.f16(half %a)
@@ -534,9 +515,9 @@ define void @test_exp(half* %p) #0 {
 ; CHECK-FP16: bl exp2f
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_exp2:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl exp2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_exp2(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.exp2.f16(half %a)
@@ -549,9 +530,9 @@ define void @test_exp2(half* %p) #0 {
 ; CHECK-FP16: bl logf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_log:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl logf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_log(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.log.f16(half %a)
@@ -564,9 +545,9 @@ define void @test_log(half* %p) #0 {
 ; CHECK-FP16: bl log10f
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_log10:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl log10f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_log10(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.log10.f16(half %a)
@@ -579,9 +560,9 @@ define void @test_log10(half* %p) #0 {
 ; CHECK-FP16: bl log2f
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_log2:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl log2f
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_log2(half* %p) #0 {
   %a = load half, half* %p, align 2
   %r = call half @llvm.log2.f16(half %a)
@@ -596,11 +577,11 @@ define void @test_log2(half* %p) #0 {
 ; CHECK-FP16: bl fmaf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_fma:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl fmaf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fma(half* %p, half* %q, half* %r) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -615,9 +596,9 @@ define void @test_fma(half* %p, half* %q, half* %r) #0 {
 ; CHECK-FP16: vabs.f32
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_fabs:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bfc
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fabs(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.fabs.f16(half %a)
@@ -631,10 +612,10 @@ define void @test_fabs(half* %p) {
 ; CHECK-FP16: bl fminf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_minnum:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl fminf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_minnum(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -649,10 +630,10 @@ define void @test_minnum(half* %p, half* %q) #0 {
 ; CHECK-FP16: bl fmaxf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_maxnum:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl fmaxf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_maxnum(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -667,10 +648,13 @@ define void @test_maxnum(half* %p, half* %q) #0 {
 ; CHECK-FP16: vbsl
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_copysign:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vbsl
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vbsl
+; CHECK-NOVFP: bfc
+; CHECK-NOVFP: and
+; CHECK-NOVFP: orr
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_copysign(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -684,9 +668,9 @@ define void @test_copysign(half* %p, half* %q) #0 {
 ; CHECK-FP16: bl floorf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_floor:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl floorf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_floor(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.floor.f16(half %a)
@@ -699,9 +683,9 @@ define void @test_floor(half* %p) {
 ; CHECK-FP16: bl ceilf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_ceil:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl ceilf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_ceil(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.ceil.f16(half %a)
@@ -714,9 +698,9 @@ define void @test_ceil(half* %p) {
 ; CHECK-FP16: bl truncf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_trunc:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl truncf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_trunc(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.trunc.f16(half %a)
@@ -729,9 +713,9 @@ define void @test_trunc(half* %p) {
 ; CHECK-FP16: bl rintf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_rint:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl rintf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_rint(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.rint.f16(half %a)
@@ -744,9 +728,9 @@ define void @test_rint(half* %p) {
 ; CHECK-FP16: bl nearbyintf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_nearbyint:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl nearbyintf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_nearbyint(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.nearbyint.f16(half %a)
@@ -759,9 +743,9 @@ define void @test_nearbyint(half* %p) {
 ; CHECK-FP16: bl roundf
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_round:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl roundf
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_round(half* %p) {
   %a = load half, half* %p, align 2
   %r = call half @llvm.round.f16(half %a)
@@ -776,11 +760,12 @@ define void @test_round(half* %p) {
 ; CHECK-FP16: vmla.f32
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL-LABEL: test_fmuladd:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
-; CHECK-LIBCALL: vmla.f32
-; CHECK-LIBCALL: bl __gnu_f2h_ieee
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-VFP-LIBCALL: vmla.f32
+; CHECK-NOVFP: bl __aeabi_fmul
+; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -795,30 +780,28 @@ define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
 ; and extractelement have these extra loads and stores.
 
 ; CHECK-ALL-LABEL: test_insertelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: sub sp, sp, #8
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: mov
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: add
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
+; CHECK-ALL: add sp, sp, #8
 define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
   %a = load half, half* %p, align 2
   %b = load <4 x half>, <4 x half>* %q, align 8
@@ -828,22 +811,30 @@ define void @test_insertelement(half* %p, <4 x half>* %q, i32 %i) #0 {
 }
 
 ; CHECK-ALL-LABEL: test_extractelement:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: sub sp, sp, #8
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: orr
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: mov
-; CHECK-ALL-NEXT: add
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: add sp, sp, #8
-; CHECK-ALL-NEXT: bx lr
+; CHECK-VFP: sub sp, sp, #8
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: ldrh
+; CHECK-VFP: ldrh
+; CHECK-VFP: orr
+; CHECK-VFP: str
+; CHECK-VFP: mov
+; CHECK-VFP: add
+; CHECK-VFP: ldrh
+; CHECK-VFP: strh
+; CHECK-VFP: add sp, sp, #8
+; CHECK-VFP: bx lr
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
+; CHECK-NOVFP: strh
+; CHECK-NOVFP: ldrh
 define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
   %a = load <4 x half>, <4 x half>* %q, align 8
   %b = extractelement <4 x half> %a, i32 %i
@@ -856,12 +847,10 @@ define void @test_extractelement(half* %p, <4 x half>* %q, i32 %i) #0 {
 %struct.dummy = type { i32, half }
 
 ; CHECK-ALL-LABEL: test_insertvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldr
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: str
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL-DAG: ldr
+; CHECK-ALL-DAG: ldrh
+; CHECK-ALL-DAG: strh
+; CHECK-ALL-DAG: str
 define void @test_insertvalue(%struct.dummy* %p, half* %q) {
   %a = load %struct.dummy, %struct.dummy* %p
   %b = load half, half* %q
@@ -871,10 +860,9 @@ define void @test_insertvalue(%struct.dummy* %p, half* %q) {
 }
 
 ; CHECK-ALL-LABEL: test_extractvalue:
-; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: ldrh
-; CHECK-ALL-NEXT: strh
-; CHECK-ALL-NEXT: bx lr
+; CHECK-ALL: .fnstart
+; CHECK-ALL: ldrh
+; CHECK-ALL: strh
 define void @test_extractvalue(%struct.dummy* %p, half* %q) {
   %a = load %struct.dummy, %struct.dummy* %p
   %b = extractvalue %struct.dummy %a, 1
@@ -882,10 +870,11 @@ define void @test_extractvalue(%struct.dummy* %p, half* %q) {
   ret void
 }
 
-; CHECK-FP16-LABEL: test_struct_return:
+; CHECK-ALL-LABEL: test_struct_return:
 ; CHECK-FP16: vcvtb.f32.f16
-; CHECK-LIBCALL-LABEL: test_struct_return:
-; CHECK-LIBCALL: bl __gnu_h2f_ieee
+; CHECK-VFP-LIBCALL: bl __aeabi_h2f
+; CHECK-NOVFP-DAG: ldr
+; CHECK-NOVFP-DAG: ldrh
 define %struct.dummy @test_struct_return(%struct.dummy* %p) {
   %a = load %struct.dummy, %struct.dummy* %p
   ret %struct.dummy %a
@@ -893,7 +882,7 @@ define %struct.dummy @test_struct_return(%struct.dummy* %p) {
 
 ; CHECK-ALL-LABEL: test_struct_arg:
 ; CHECK-ALL-NEXT: .fnstart
-; CHECK-ALL-NEXT: mov r0, r1
+; CHECK-NOVFP-NEXT: mov r0, r1
 ; CHECK-ALL-NEXT: bx lr
 define half @test_struct_arg(%struct.dummy %p) {
   %a = extractvalue %struct.dummy %p, 1
diff --git a/test/CodeGen/ARM/fp16.ll b/test/CodeGen/ARM/fp16.ll
index 25fbf9070cb6..73d5c36a9c2f 100644
--- a/test/CodeGen/ARM/fp16.ll
+++ b/test/CodeGen/ARM/fp16.ll
@@ -1,7 +1,9 @@
-; RUN: llc < %s | FileCheck %s
-; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
-; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK-ARMV8 %s
-; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK-SOFTFLOAT %s
+; RUN: llc -mtriple=armv7a--none-eabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-HARDFLOAT-EABI %s
+; RUN: llc -mtriple=armv7a--none-gnueabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-HARDFLOAT-GNU %s
+; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FP16 %s
+; RUN: llc -mtriple=armv8-eabihf < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-ARMV8 %s
+; RUN: llc -mtriple=thumbv7m-eabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SOFTFLOAT-EABI %s
+; RUN: llc -mtriple=thumbv7m-gnueabi < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SOFTFLOAT-GNU %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
 target triple = "armv7---eabihf"
@@ -12,41 +14,45 @@ target triple = "armv7---eabihf"
 
 define void @foo() nounwind {
 ; CHECK-LABEL: foo:
-; CHECK-FP16-LABEL: foo:
-; CHECK-ARMV8-LABEL: foo:
-; CHECK-SOFTFLOAT-LABEL: foo:
 entry:
   %0 = load i16, i16* @x, align 2
   %1 = load i16, i16* @y, align 2
   %2 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
-; CHECK: __gnu_h2f_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_h2f
+; CHECK-HARDFLOAT-GNU: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-ARMv8: vcvtb.f32.f16
-; CHECK-SOFTFLOAT: __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_h2f
+; CHECK-SOFTFLOAT-GNU: __gnu_h2f_ieee
   %3 = tail call float @llvm.convert.from.fp16.f32(i16 %1)
-; CHECK: __gnu_h2f_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_h2f
+; CHECK-HARDFLOAT-GNU: __gnu_h2f_ieee
 ; CHECK-FP16: vcvtb.f32.f16
 ; CHECK-ARMV8: vcvtb.f32.f16
-; CHECK-SOFTFLOAT: __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_h2f
+; CHECK-SOFTFLOAT-GNU: __gnu_h2f_ieee
   %4 = fadd float %2, %3
   %5 = tail call i16 @llvm.convert.to.fp16.f32(float %4)
-; CHECK: __gnu_f2h_ieee
+; CHECK-HARDFLOAT-EABI: __aeabi_f2h
+; CHECK-HARDFLOAT-GNU: __gnu_f2h_ieee
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-ARMV8: vcvtb.f16.f32
-; CHECK-SOFTFLOAT: __gnu_f2h_ieee
+; CHECK-SOFTFLOAT-EABI: __aeabi_f2h
+; CHECK-SOFTFLOAT-GNU: __gnu_f2h_ieee
   store i16 %5, i16* @x, align 2
   ret void
 }
 
 define double @test_from_fp16(i16 %in) {
 ; CHECK-LABEL: test_from_fp16:
-; CHECK-FP16-LABEL: test_from_fp16:
-; CHECK-ARMV8-LABEL: test_from_fp16:
-; CHECK-SOFTFLOAT-LABEL: test_from_fp16:
   %val = call double @llvm.convert.from.fp16.f64(i16 %in)
-; CHECK: bl __gnu_h2f_ieee
-; CHECK: vmov [[TMP:s[0-9]+]], r0
-; CHECK: vcvt.f64.f32 d0, [[TMP]]
+; CHECK-HARDFLOAT-EABI: bl __aeabi_h2f
+; CHECK-HARDFLOAT-EABI: vmov [[TMP:s[0-9]+]], r0
+; CHECK-HARDFLOAT-EABI: vcvt.f64.f32 {{d[0-9]+}}, [[TMP]]
+
+; CHECK-HARDFLOAT-GNU: bl __gnu_h2f_ieee
+; CHECK-HARDFLOAT-GNU: vmov [[TMP:s[0-9]+]], r0
+; CHECK-HARDFLOAT-GNU: vcvt.f64.f32 {{d[0-9]+}}, [[TMP]]
 
 ; CHECK-FP16: vmov [[TMP16:s[0-9]+]], r0
 ; CHECK-FP16: vcvtb.f32.f16 [[TMP32:s[0-9]+]], [[TMP16]]
@@ -55,25 +61,29 @@ define double @test_from_fp16(i16 %in) {
 ; CHECK-ARMV8: vmov [[TMP:s[0-9]+]], r0
 ; CHECK-ARMV8: vcvtb.f64.f16 d0, [[TMP]]
 
-; CHECK-SOFTFLOAT: bl __gnu_h2f_ieee
-; CHECK-SOFTFLOAT: bl __aeabi_f2d
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_h2f
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_f2d
+
+; CHECK-SOFTFLOAT-GNU: bl __gnu_h2f_ieee
+; CHECK-SOFTFLOAT-GNU: bl __aeabi_f2d
   ret double %val
 }
 
 define i16 @test_to_fp16(double %in) {
 ; CHECK-LABEL: test_to_fp16:
-; CHECK-FP16-LABEL: test_to_fp16:
-; CHECK-ARMV8-LABEL: test_to_fp16:
-; CHECK-SOFTFLOAT-LABEL: test_to_fp16:
   %val = call i16 @llvm.convert.to.fp16.f64(double %in)
-; CHECK: bl __aeabi_d2h
+; CHECK-HARDFLOAT-EABI: bl __aeabi_d2h
+
+; CHECK-HARDFLOAT-GNU: bl __aeabi_d2h
 
 ; CHECK-FP16: bl __aeabi_d2h
 
 ; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
 ; CHECK-ARMV8: vmov r0, [[TMP]]
 
-; CHECK-SOFTFLOAT: bl __aeabi_d2h
+; CHECK-SOFTFLOAT-EABI: bl __aeabi_d2h
+
+; CHECK-SOFTFLOAT-GNU: bl __aeabi_d2h
   ret i16 %val
 }
 
diff --git a/test/CodeGen/ARM/fparith.ll b/test/CodeGen/ARM/fparith.ll
index cc880148da85..824824429db1 100644
--- a/test/CodeGen/ARM/fparith.ll
+++ b/test/CodeGen/ARM/fparith.ll
@@ -56,7 +56,7 @@ entry:
 	ret float %tmp1
 }
 
-define double @f8(double %a) {
+define arm_aapcs_vfpcc double @f8(double %a) {
 ;CHECK-LABEL: f8:
 ;CHECK: vneg.f64
 entry:
@@ -90,7 +90,7 @@ entry:
 
 declare float @fabsf(float)
 
-define double @f12(double %a) {
+define arm_aapcs_vfpcc double @f12(double %a) {
 ;CHECK-LABEL: f12:
 ;CHECK: vabs.f64
 entry:
diff --git a/test/CodeGen/ARM/gep-optimization.ll b/test/CodeGen/ARM/gep-optimization.ll
new file mode 100644
index 000000000000..ce5af66d56ce
--- /dev/null
+++ b/test/CodeGen/ARM/gep-optimization.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=armv7a-eabi   | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-AT2
+; RUN: llc < %s -mtriple=thumbv7m-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-AT2
+; RUN: llc < %s -mtriple=thumbv6m-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1
+
+; This test checks that various kinds of getelementptr are all optimised to a
+; simple multiply plus add, with the add being done by a register offset if the
+; result is used in a load.
+
+; CHECK-LABEL: calc_1d:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_1d(i32* %p, i32 %n) {
+entry:
+  %mul = mul nsw i32 %n, 21
+  %add.ptr = getelementptr inbounds i32, i32* %p, i32 %mul
+  ret i32* %add.ptr
+}
+
+; CHECK-LABEL: load_1d:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_1d(i32* %p, i32 %n) #1 {
+entry:
+  %mul = mul nsw i32 %n, 21
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 %mul
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: calc_2d_a:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_2d_a([100 x i32]* %p, i32 %n) {
+entry:
+  %mul = mul nsw i32 %n, 21
+  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %p, i32 0, i32 %mul
+  ret i32* %arrayidx1
+}
+
+; CHECK-LABEL: load_2d_a:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_2d_a([100 x i32]* %p, i32 %n) #1 {
+entry:
+  %mul = mul nsw i32 %n, 21
+  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %p, i32 0, i32 %mul
+  %0 = load i32, i32* %arrayidx1, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: calc_2d_b:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK-AT2: mla r0, r1, [[REG1]], r0
+; CHECK-T1: muls [[REG2:r[0-9]+]], r1, [[REG1]]
+; CHECK-T1: adds r0, r0, [[REG2]]
+define i32* @calc_2d_b([21 x i32]* %p, i32 %n) {
+entry:
+  %arrayidx1 = getelementptr inbounds [21 x i32], [21 x i32]* %p, i32 %n, i32 0
+  ret i32* %arrayidx1
+}
+
+; CHECK-LABEL: load_2d_b:
+; CHECK: mov{{s?}} [[REG1:r[0-9]+]], #84
+; CHECK: mul{{s?}} [[REG2:r[0-9]+]],{{( r1,)?}} [[REG1]]{{(, r1)?}}
+; CHECK: ldr r0, [r0, [[REG2]]]
+define i32 @load_2d_b([21 x i32]* %p, i32 %n) {
+entry:
+  %arrayidx1 = getelementptr inbounds [21 x i32], [21 x i32]* %p, i32 %n, i32 0
+  %0 = load i32, i32* %arrayidx1, align 4
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/global-merge-1.ll b/test/CodeGen/ARM/global-merge-1.ll
index d4d9b0f9d1f3..a3cbe8aec098 100644
--- a/test/CodeGen/ARM/global-merge-1.ll
+++ b/test/CodeGen/ARM/global-merge-1.ll
@@ -11,16 +11,16 @@
 ; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
-; MERGE: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; MERGE: .zerofill __DATA,__bss,l__MergedGlobals,60,4
 ; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2
 ; MERGE-NOT: .zerofill __DATA,__bss,_foo,20,2
 
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
 ; NO-MERGE: .zerofill __DATA,__bss,_bar,20,2
 ; NO-MERGE: .zerofill __DATA,__bss,_baz,20,2
 ; NO-MERGE: .zerofill __DATA,__bss,_foo,20,2
-; NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,60,4
+; NO-MERGE-NOT: .zerofill __DATA,__bss,l__MergedGlobals,60,4
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
 target triple = "thumbv7-apple-ios3.0.0"
diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll
new file mode 100644
index 000000000000..a9e0d199705a
--- /dev/null
+++ b/test/CodeGen/ARM/global-merge-external.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge                                 | FileCheck %s --check-prefix=CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=true  | FileCheck %s --check-prefix=CHECK-MERGE
+; RUN: llc < %s -mtriple=arm-eabi  -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-macho -arm-global-merge                                 | FileCheck %s --check-prefix=CHECK-NO-MERGE
+
+@x = global i32 0, align 4
+@y = global i32 0, align 4
+@z = global i32 0, align 4
+
+define void @f1(i32 %a1, i32 %a2) {
+;CHECK:          f1:
+;CHECK:          ldr {{r[0-9]+}}, [[LABEL1:\.LCPI[0-9]+_[0-9]]]
+;CHECK:          [[LABEL1]]:
+;CHECK-MERGE:    .long .L_MergedGlobals
+;CHECK-NO-MERGE: .long {{_?x}}
+  store i32 %a1, i32* @x, align 4
+  store i32 %a2, i32* @y, align 4
+  ret void
+}
+
+define void @g1(i32 %a1, i32 %a2) {
+;CHECK:          g1:
+;CHECK:          ldr {{r[0-9]+}}, [[LABEL2:\.LCPI[0-9]+_[0-9]]]
+;CHECK:          [[LABEL2]]:
+;CHECK-MERGE:    .long .L_MergedGlobals
+;CHECK-NO-MERGE: .long {{_?y}}
+  store i32 %a1, i32* @y, align 4
+  store i32 %a2, i32* @z, align 4
+  ret void
+}
+
+;CHECK-NO-MERGE-NOT: .globl .L_MergedGlobals
+
+;CHECK-MERGE:	.type	.L_MergedGlobals,%object
+;CHECK-MERGE:	.local	.L_MergedGlobals
+;CHECK-MERGE:	.comm	.L_MergedGlobals,12,4
+
+;CHECK-MERGE:	.globl	x
+;CHECK-MERGE: x = .L_MergedGlobals
+;CHECK-MERGE: .size x, 4
+;CHECK-MERGE:	.globl	y
+;CHECK-MERGE: y = .L_MergedGlobals+4
+;CHECK-MERGE: .size y, 4
+;CHECK-MERGE:	.globl	z
+;CHECK-MERGE: z = .L_MergedGlobals+8
+;CHECK-MERGE: .size z, 4
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index bab96dadce55..e6aa2db744d5 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -60,16 +60,13 @@ define i32 @test1() {
 
 ; LinuxPIC-LABEL: test1:
 ; LinuxPIC: 	ldr r0, .LCPI0_0
-; LinuxPIC: 	ldr r1, .LCPI0_1
 	
 ; LinuxPIC: .LPC0_0:
-; LinuxPIC: 	add r0, pc, r0
-; LinuxPIC: 	ldr r0, [r1, r0]
+; LinuxPIC: 	ldr r0, [pc, r0]
 ; LinuxPIC: 	ldr r0, [r0]
 ; LinuxPIC: 	bx lr
 
 ; LinuxPIC: .align 2
 ; LinuxPIC: .LCPI0_0:
-; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
-; LinuxPIC: .LCPI0_1:
-; LinuxPIC:     .long	G(GOT)
+; LinuxPIC: .Ltmp0:
+; LinuxPIC:     .long   G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
index 4e6924fe5b6b..a44c9721d6c1 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight-bug.ll
@@ -14,15 +14,15 @@ entry:
   br i1 undef, label %for.end, label %for.body
 
 ; Before if conversion, we have
-; for.body -> lor.lhs.false.i (62)
-;          -> for.cond.backedge (62)
-; lor.lhs.false.i -> for.cond.backedge (1048575)
-;                 -> cond.false.i (1)
+; for.body -> lor.lhs.false.i (50%)
+;          -> for.cond.backedge (50%)
+; lor.lhs.false.i -> for.cond.backedge (100%)
+;                 -> cond.false.i (0%)
 ; Afer if conversion, we have
-; for.body -> for.cond.backedge (130023362)
-;          -> cond.false.i (62)
+; for.body -> for.cond.backedge (100%)
+;          -> cond.false.i (0%)
 ; CHECK: BB#1: derived from LLVM BB %for.body
-; CHECK: Successors according to CFG: BB#2(130023362) BB#4(62)
+; CHECK: Successors according to CFG: BB#2(0x7ffffc00 / 0x80000000 = 100.00%) BB#4(0x00000400 / 0x80000000 = 0.00%)
 for.body:
   br i1 undef, label %for.cond.backedge, label %lor.lhs.false.i, !prof !1
 
diff --git a/test/CodeGen/ARM/ifcvt-branch-weight.ll b/test/CodeGen/ARM/ifcvt-branch-weight.ll
index 41d78e53acc7..0de039cde23c 100644
--- a/test/CodeGen/ARM/ifcvt-branch-weight.ll
+++ b/test/CodeGen/ARM/ifcvt-branch-weight.ll
@@ -19,7 +19,7 @@ bb:
   br i1 %9, label %return, label %bb2
 
 ; CHECK: BB#2: derived from LLVM BB %bb2
-; CHECK: Successors according to CFG: BB#3(192) BB#4(192)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}50.00%) BB#4({{[0-9a-fx/= ]+}}50.00%)
 
 bb2:
   %v10 = icmp eq i32 %3, 16
diff --git a/test/CodeGen/ARM/ifcvt-iter-indbr.ll b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
index 75e9d77d7920..a96b6e8a1e83 100644
--- a/test/CodeGen/ARM/ifcvt-iter-indbr.ll
+++ b/test/CodeGen/ARM/ifcvt-iter-indbr.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mtriple thumbv7s-apple-darwin  -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple thumbv7s-apple-darwin  -asm-verbose=false -print-machineinstrs=if-converter 2>&1 | FileCheck --check-prefix=CHECK-PROB %s
 
 declare i32 @foo(i32)
 declare i8* @bar(i32, i8*, i8*)
@@ -27,6 +28,11 @@ declare i8* @bar(i32, i8*, i8*)
 ; CHECK-NEXT:  movw r0, #4567
 ; CHECK-NEXT: [[FOOCALL]]:
 ; CHECK-NEXT:  blx _foo
+;
+; CHECK-PROB: BB#0:
+; CHECK-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}50.00%) BB#2({{[0-9a-fx/= ]+}}25.00%) BB#4({{[0-9a-fx/= ]+}}25.00%)
+; CHECK-PROB: BB#1:
+; CHECK-PROB: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.00%) BB#4({{[0-9a-fx/= ]+}}25.00%)
 
 define i32 @test(i32 %a, i32 %a2, i32* %p, i32* %p2) {
 entry:
diff --git a/test/CodeGen/ARM/ifcvt4.ll b/test/CodeGen/ARM/ifcvt4.ll
index 8c6825aeda97..0a6b99fb89b3 100644
--- a/test/CodeGen/ARM/ifcvt4.ll
+++ b/test/CodeGen/ARM/ifcvt4.ll
@@ -1,10 +1,8 @@
 ; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
 
-; Do not if-convert when branches go to the different loops.
 ; CHECK-LABEL: t:
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
-; Don't use
+; CHECK: subgt
+; CHECK: suble
 define i32 @t(i32 %a, i32 %b) {
 entry:
 	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll
index 3aa2139cc03a..9fb8abde6130 100644
--- a/test/CodeGen/ARM/ifcvt5.ll
+++ b/test/CodeGen/ARM/ifcvt5.ll
@@ -13,10 +13,10 @@ entry:
 
 define i32 @t1(i32 %a, i32 %b) {
 ; A8-LABEL: t1:
-; A8: poplt {r7, pc}
+; A8: bxlt lr
 
 ; SWIFT-LABEL: t1:
-; SWIFT: pop {r7, pc}
+; SWIFT: bxlt lr
 ; SWIFT: pop {r7, pc}
 entry:
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll
index 78901930e4b2..668069751cf1 100644
--- a/test/CodeGen/ARM/ifcvt6.ll
+++ b/test/CodeGen/ARM/ifcvt6.ll
@@ -3,7 +3,7 @@
 define void @foo(i32 %X, i32 %Y) {
 entry:
 ; CHECK: cmpne
-; CHECK: pophi
+; CHECK: bxhi lr
 	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
 	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
 	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll
index ca9a5c63cda6..e8b7f6926396 100644
--- a/test/CodeGen/ARM/ifcvt8.ll
+++ b/test/CodeGen/ARM/ifcvt8.ll
@@ -5,7 +5,9 @@
 declare void @abort()
 
 define fastcc void @t(%struct.SString* %word, i8 signext  %c) {
-; CHECK: popne
+; CHECK-NOT: pop
+; CHECK: bxne
+; CHECK-NOT: pop
 entry:
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %cond_false
diff --git a/test/CodeGen/ARM/inlineasm-switch-mode.ll b/test/CodeGen/ARM/inlineasm-switch-mode.ll
index 65fea114d7de..6035612788d8 100644
--- a/test/CodeGen/ARM/inlineasm-switch-mode.ll
+++ b/test/CodeGen/ARM/inlineasm-switch-mode.ll
@@ -15,8 +15,8 @@ define hidden i32 @bah(i8* %start) #0 align 2 {
 ; ARM: $t
 ; ARM-NEXT: 48 1c
 
-; THUMB: $a
+; THUMB: $a{{.*}}:
 ; THUMB-NEXT: 04 70
 ; THUMB-NEXT: 2d e5
-; THUMB: $t
+; THUMB: $t{{.*}}:
 ; THUMB-NEXT: 48 1c   adds    r0, r1, #1
diff --git a/test/CodeGen/ARM/ldm-stm-base-materialization.ll b/test/CodeGen/ARM/ldm-stm-base-materialization.ll
new file mode 100644
index 000000000000..a3231f95f478
--- /dev/null
+++ b/test/CodeGen/ARM/ldm-stm-base-materialization.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mtriple armv7a-none-eabi -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s
+
+; Thumb1 (thumbv6m) is tested in tests/Thumb
+
+@a = external global i32*
+@b = external global i32*
+
+; Function Attrs: nounwind
+define void @foo24() #0 {
+entry:
+; CHECK-LABEL: foo24:
+; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr'
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
+  ret void
+}
+
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+  ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+  ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: add [[NSB:[rl0-9]+]], [[SB]], #4
+; CHECK-NEXT: ldm [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]}
+; CHECK-NEXT: stm [[NSB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll
index 5411618ed86d..b2596346bfa1 100644
--- a/test/CodeGen/ARM/ldrd.ll
+++ b/test/CodeGen/ARM/ldrd.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=fast -optimize-regalloc=0 -verify-machineinstrs | FileCheck %s -check-prefix=A8 -check-prefix=CHECK
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-m3 -regalloc=fast -optimize-regalloc=0 | FileCheck %s -check-prefix=M3 -check-prefix=CHECK
 ; rdar://6949835
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
@@ -112,10 +112,10 @@ entry:
 }
 
 ; CHECK-LABEL: strd_spill_ldrd_reload:
-; A8: strd r1, r0, [sp]
-; M3: strd r1, r0, [sp]
-; BASIC: strd r1, r0, [sp]
-; GREEDY: strd r0, r1, [sp]
+; A8: strd r1, r0, [sp, #-8]!
+; M3: strd r1, r0, [sp, #-8]!
+; BASIC: strd r1, r0, [sp, #-8]!
+; GREEDY: strd r0, r1, [sp, #-8]!
 ; CHECK: @ InlineAsm Start
 ; CHECK: @ InlineAsm End
 ; A8: ldrd r2, r1, [sp]
@@ -131,5 +131,53 @@ define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
   ret void
 }
 
+declare void @extfunc2(i32*, i32, i32)
+
+; CHECK-LABEL: ldrd_postupdate_dec:
+; CHECK: ldrd r1, r2, [r0], #-8
+; CHECK-NEXT: bl{{x?}} _extfunc
+define void @ldrd_postupdate_dec(i32* %p0) {
+  %p0.1 = getelementptr i32, i32* %p0, i32 1
+  %v0 = load i32, i32* %p0
+  %v1 = load i32, i32* %p0.1
+  %p1 = getelementptr i32, i32* %p0, i32 -2
+  call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
+  ret void
+}
+
+; CHECK-LABEL: ldrd_postupdate_inc:
+; CHECK: ldrd r1, r2, [r0], #8
+; CHECK-NEXT: bl{{x?}} _extfunc
+define void @ldrd_postupdate_inc(i32* %p0) {
+  %p0.1 = getelementptr i32, i32* %p0, i32 1
+  %v0 = load i32, i32* %p0
+  %v1 = load i32, i32* %p0.1
+  %p1 = getelementptr i32, i32* %p0, i32 2
+  call void @extfunc2(i32* %p1, i32 %v0, i32 %v1)
+  ret void
+}
+
+; CHECK-LABEL: strd_postupdate_dec:
+; CHECK: strd r1, r2, [r0], #-8
+; CHECK-NEXT: bx lr
+define i32* @strd_postupdate_dec(i32* %p0, i32 %v0, i32 %v1) {
+  %p0.1 = getelementptr i32, i32* %p0, i32 1
+  store i32 %v0, i32* %p0
+  store i32 %v1, i32* %p0.1
+  %p1 = getelementptr i32, i32* %p0, i32 -2
+  ret i32* %p1
+}
+
+; CHECK-LABEL: strd_postupdate_inc:
+; CHECK: strd r1, r2, [r0], #8
+; CHECK-NEXT: bx lr
+define i32* @strd_postupdate_inc(i32* %p0, i32 %v0, i32 %v1) {
+  %p0.1 = getelementptr i32, i32* %p0, i32 1
+  store i32 %v0, i32* %p0
+  store i32 %v1, i32* %p0.1
+  %p1 = getelementptr i32, i32* %p0, i32 2
+  ret i32* %p1
+}
+
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/legalize-unaligned-load.ll b/test/CodeGen/ARM/legalize-unaligned-load.ll
new file mode 100644
index 000000000000..fa5b21aa4a23
--- /dev/null
+++ b/test/CodeGen/ARM/legalize-unaligned-load.ll
@@ -0,0 +1,35 @@
+; RUN:  llc -O3 -code-model=default -relocation-model=default -mtriple=armv7l-unknown-linux-gnueabihf -mcpu=generic %s -o - | FileCheck %s
+; Check that we respect the existing chain between loads and stores when we
+; legalize unaligned loads.
+; Test case from PR24669.
+
+; Make sure the loads happen before the stores.
+; CHECK-LABEL: get_set_complex:
+; CHECK-NOT: str
+; CHECK: ldr
+; CHECK-NOT: str
+; CHECK: ldr
+; CHECK: str
+; CHECK: bx
+define i32 @get_set_complex({ float, float }* noalias nocapture %retptr,
+                            { i8*, i32 }** noalias nocapture readnone %excinfo,
+                            i8* noalias nocapture readnone %env,
+                            [38 x i8]* nocapture %arg.rec,
+                            float %arg.val.0, float %arg.val.1)
+{
+entry:
+  %inserted.real = insertvalue { float, float } undef, float %arg.val.0, 0
+  %inserted.imag = insertvalue { float, float } %inserted.real, float %arg.val.1, 1
+  %.15 = getelementptr inbounds [38 x i8], [38 x i8]* %arg.rec, i32 0, i32 10
+  %.16 = bitcast i8* %.15 to { float, float }*
+  %.17 = bitcast i8* %.15 to float*
+  %.18 = load float, float* %.17, align 1
+  %.19 = getelementptr inbounds [38 x i8], [38 x i8]* %arg.rec, i32 0, i32 14
+  %tmp = bitcast i8* %.19 to float*
+  %.20 = load float, float* %tmp, align 1
+  %inserted.real.1 = insertvalue { float, float } undef, float %.18, 0
+  %inserted.imag.1 = insertvalue { float, float } %inserted.real.1, float %.20, 1
+  store { float, float } %inserted.imag, { float, float }* %.16, align 1
+  store { float, float } %inserted.imag.1, { float, float }* %retptr, align 4
+  ret i32 0
+}
diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll
index 34748bc848bd..eade2fda3705 100644
--- a/test/CodeGen/ARM/load-global.ll
+++ b/test/CodeGen/ARM/load-global.ll
@@ -4,6 +4,7 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
 ; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
 ; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=thumbv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX_T
 
 @G = external global i32
 
@@ -40,11 +41,14 @@ define i32 @test1() {
 
 ; LINUX: test1
 ; LINUX: ldr r0, .LCPI0_0
-; LINUX: ldr r1, .LCPI0_1
-; LINUX: add r0, pc, r0
-; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [pc, r0]
 ; LINUX: ldr r0, [r0]
-; LINUX: .long G(GOT)
+; LINUX: .long G(GOT_PREL)-((.LPC0_0+8)-.Ltmp0)
+
+; LINUX_T: ldr r0, .LCPI0_0
+; LINUX_T: add r0, pc
+; LINUX_T: ldr r0, [r0]
+; LINUX_T: ldr r0, [r0]
 	%tmp = load i32, i32* @G
 	ret i32 %tmp
 }
diff --git a/test/CodeGen/ARM/load-store-flags.ll b/test/CodeGen/ARM/load-store-flags.ll
index 5825a30109d0..95d9b484a0a7 100644
--- a/test/CodeGen/ARM/load-store-flags.ll
+++ b/test/CodeGen/ARM/load-store-flags.ll
@@ -6,7 +6,7 @@
 define void @test_base_kill(i32 %v0, i32 %v1, i32* %addr) {
 ; CHECK-LABEL: test_base_kill:
 ; CHECK: adds [[NEWBASE:r[0-9]+]], r2, #4
-; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+; CHECK: stm [[NEWBASE]]!, {r0, r1, r2}
 
   %addr.1 = getelementptr i32, i32* %addr, i32 1
   store i32 %v0, i32* %addr.1
@@ -27,7 +27,7 @@ define void @test_base_kill(i32 %v0, i32 %v1, i32* %addr) {
 define void @test_base_kill_mid(i32 %v0, i32* %addr, i32 %v1) {
 ; CHECK-LABEL: test_base_kill_mid:
 ; CHECK: adds [[NEWBASE:r[0-9]+]], r1, #4
-; CHECK: stm.w [[NEWBASE]], {r0, r1, r2}
+; CHECK: stm [[NEWBASE]]!, {r0, r1, r2}
 
   %addr.1 = getelementptr i32, i32* %addr, i32 1
   store i32 %v0, i32* %addr.1
diff --git a/test/CodeGen/ARM/load.ll b/test/CodeGen/ARM/load.ll
index 3b2d637cb26e..b8f3003755a0 100644
--- a/test/CodeGen/ARM/load.ll
+++ b/test/CodeGen/ARM/load.ll
@@ -1,35 +1,564 @@
-; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv6m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T1
+; RUN: llc -mtriple=thumbv7m-eabi %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-T2
 
-define i32 @f1(i8* %p) {
+
+; Register offset
+
+; CHECK-LABEL: ldrsb_rr
+; CHECK:    ldrsb   r0, [r0, r1]
+define i32 @ldrsb_rr(i8* %p, i32 %n) {
 entry:
-        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
-        %tmp1 = sext i8 %tmp to i32              ; <i32> [#uses=1]
-        ret i32 %tmp1
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
 }
 
-define i32 @f2(i8* %p) {
+; CHECK-LABEL: ldrsh_rr
+; CHECK-T1: lsls    r1, r1, #1
+; CHECK-T1: ldrsh   r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, r1, lsl #1]
+define i32 @ldrsh_rr(i16* %p, i32 %n) {
 entry:
-        %tmp = load i8, i8* %p              ; <i8> [#uses=1]
-        %tmp2 = zext i8 %tmp to i32              ; <i32> [#uses=1]
-        ret i32 %tmp2
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
 }
 
-define i32 @f3(i16* %p) {
+; CHECK-LABEL: ldrb_rr
+; CHECK:    ldrb r0, [r0, r1]
+define i32 @ldrb_rr(i8* %p, i32 %n) {
 entry:
-        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
-        %tmp3 = sext i16 %tmp to i32             ; <i32> [#uses=1]
-        ret i32 %tmp3
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
 }
 
-define i32 @f4(i16* %p) {
+; CHECK-LABEL: ldrh_rr
+; CHECK-T1: lsls    r1, r1, #1
+; CHECK-T1: ldrh    r0, [r0, r1]
+; CHECK-T2: ldrh.w  r0, [r0, r1, lsl #1]
+define i32 @ldrh_rr(i16* %p, i32 %n) {
 entry:
-        %tmp = load i16, i16* %p             ; <i16> [#uses=1]
-        %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]
-        ret i32 %tmp4
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = zext i16 %0 to i32
+  ret i32 %conv
 }
 
-; CHECK: ldrsb
-; CHECK: ldrb
-; CHECK: ldrsh
-; CHECK: ldrh
+; CHECK-LABEL: ldr_rr
+; CHECK-T1: lsls    r1, r1, #2
+; CHECK-T1: ldr     r0, [r0, r1]
+; CHECK-T2: ldr.w   r0, [r0, r1, lsl #2]
+define i32 @ldr_rr(i32* %p, i32 %n) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
 
+; CHECK-LABEL: strb_rr
+; CHECK:    strb    r2, [r0, r1]
+define void @strb_rr(i8* %p, i32 %n, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 %n
+  store i8 %conv, i8* %arrayidx, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_rr
+; CHECK-T1: lsls    r1, r1, #1
+; CHECK-T1: strh    r2, [r0, r1]
+; CHECK-T2: strh.w  r2, [r0, r1, lsl #1]
+define void @strh_rr(i16* %p, i32 %n, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 %n
+  store i16 %conv, i16* %arrayidx, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_rr
+; CHECK-T1: lsls    r1, r1, #2
+; CHECK-T1: str     r2, [r0, r1]
+; CHECK-T2: str.w   r2, [r0, r1, lsl #2]
+define void @str_rr(i32* %p, i32 %n, i32 %x) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 %n
+  store i32 %x, i32* %arrayidx, align 4
+  ret void
+}
+
+
+; Immediate offset of zero
+
+; CHECK-LABEL: ldrsb_ri_zero
+; CHECK-T1: ldrb    r0, [r0]
+; CHECK-T1: sxtb    r0, r0
+; CHECK-T2: ldrsb.w r0, [r0]
+define i32 @ldrsb_ri_zero(i8* %p) {
+entry:
+  %0 = load i8, i8* %p, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_zero
+; CHECK-T1: ldrh    r0, [r0]
+; CHECK-T1: sxth    r0, r0
+; CHECK-T2: ldrsh.w r0, [r0]
+define i32 @ldrsh_ri_zero(i16* %p) {
+entry:
+  %0 = load i16, i16* %p, align 2
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_zero
+; CHECK:    ldrb    r0, [r0]
+define i32 @ldrb_ri_zero(i8* %p) {
+entry:
+  %0 = load i8, i8* %p, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_zero
+; CHECK:    ldrh    r0, [r0]
+define i32 @ldrh_ri_zero(i16* %p) {
+entry:
+  %0 = load i16, i16* %p, align 2
+  %conv = zext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_zero
+; CHECK:    ldr     r0, [r0]
+define i32 @ldr_ri_zero(i32* %p) {
+entry:
+  %0 = load i32, i32* %p, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: strb_ri_zero
+; CHECK:    strb    r1, [r0]
+define void @strb_ri_zero(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  store i8 %conv, i8* %p, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_ri_zero
+; CHECK:    strh    r1, [r0]
+define void @strh_ri_zero(i16* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  store i16 %conv, i16* %p, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_ri_zero
+; CHECK:    str     r1, [r0]
+define void @str_ri_zero(i32* %p, i32 %x) {
+entry:
+  store i32 %x, i32* %p, align 4
+  ret void
+}
+
+
+; Maximum Thumb-1 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t1_max
+; CHECK-T1: movs    r1, #31
+; CHECK-T1: ldrsb   r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #31]
+define i32 @ldrsb_ri_t1_max(i8* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t1_max
+; CHECK-T1: movs    r1, #62
+; CHECK-T1: ldrsh   r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #62]
+define i32 @ldrsh_ri_t1_max(i16* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t1_max
+; CHECK:    ldrb    r0, [r0, #31]
+define i32 @ldrb_ri_t1_max(i8* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t1_max
+; CHECK:    ldrh    r0, [r0, #62]
+define i32 @ldrh_ri_t1_max(i16* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = zext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t1_max
+; CHECK:    ldr     r0, [r0, #124]
+define i32 @ldr_ri_t1_max(i32* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 31
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: strb_ri_t1_max
+; CHECK:    strb    r1, [r0, #31]
+define void @strb_ri_t1_max(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 31
+  store i8 %conv, i8* %arrayidx, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_ri_t1_max
+; CHECK:    strh    r1, [r0, #62]
+define void @strh_ri_t1_max(i16* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 31
+  store i16 %conv, i16* %arrayidx, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_ri_t1_max
+; CHECK:    str     r1, [r0, #124]
+define void @str_ri_t1_max(i32* %p, i32 %x) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 31
+  store i32 %x, i32* %arrayidx, align 4
+  ret void
+}
+
+
+; One past maximum Thumb-1 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t1_too_big
+; CHECK-T1: movs    r1, #32
+; CHECK-T1: ldrsb   r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #32]
+define i32 @ldrsb_ri_t1_too_big(i8* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t1_too_big
+; CHECK-T1: movs    r1, #64
+; CHECK-T1: ldrsh   r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #64]
+define i32 @ldrsh_ri_t1_too_big(i16* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t1_too_big
+; CHECK-T1: movs    r1, #32
+; CHECK-T1: ldrb    r0, [r0, r1]
+; CHECK-T2: ldrb.w  r0, [r0, #32]
+define i32 @ldrb_ri_t1_too_big(i8* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t1_too_big
+; CHECK-T1: movs    r1, #64
+; CHECK-T1: ldrh    r0, [r0, r1]
+; CHECK-T2: ldrh.w  r0, [r0, #64]
+define i32 @ldrh_ri_t1_too_big(i16* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = zext i16 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t1_too_big
+; CHECK-T1: movs    r1, #128
+; CHECK-T1: ldr     r0, [r0, r1]
+; CHECK-T2: ldr.w   r0, [r0, #128]
+define i32 @ldr_ri_t1_too_big(i32* %p) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 32
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: strb_ri_t1_too_big
+; CHECK-T1: movs    r2, #32
+; CHECK-T1: strb    r1, [r0, r2]
+; CHECK-T2: strb.w  r1, [r0, #32]
+define void @strb_ri_t1_too_big(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  %arrayidx = getelementptr inbounds i8, i8* %p, i32 32
+  store i8 %conv, i8* %arrayidx, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_ri_t1_too_big
+; CHECK-T1: movs    r2, #64
+; CHECK-T1: strh    r1, [r0, r2]
+; CHECK-T2: strh.w  r1, [r0, #64]
+define void @strh_ri_t1_too_big(i16* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  %arrayidx = getelementptr inbounds i16, i16* %p, i32 32
+  store i16 %conv, i16* %arrayidx, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_ri_t1_too_big
+; CHECK-T1: movs    r2, #128
+; CHECK-T1: str     r1, [r0, r2]
+; CHECK-T2: str.w   r1, [r0, #128]
+define void @str_ri_t1_too_big(i32* %p, i32 %x) {
+entry:
+  %arrayidx = getelementptr inbounds i32, i32* %p, i32 32
+  store i32 %x, i32* %arrayidx, align 4
+  ret void
+}
+
+
+; Maximum Thumb-2 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t2_max
+; CHECK-T1: ldr     r1, .LCP
+; CHECK-T1: ldrsb   r0, [r0, r1]
+; CHECK-T2: ldrsb.w r0, [r0, #4095]
+define i32 @ldrsb_ri_t2_max(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t2_max
+; CHECK-T1: ldr     r1, .LCP
+; CHECK-T1: ldrsh   r0, [r0, r1]
+; CHECK-T2: ldrsh.w r0, [r0, #4095]
+define i32 @ldrsh_ri_t2_max(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = bitcast i8* %add.ptr to i16*
+  %1 = load i16, i16* %0, align 2
+  %conv = sext i16 %1 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t2_max
+; CHECK-T1: ldr     r1, .LCP
+; CHECK-T1: ldrb    r0, [r0, r1]
+; CHECK-T2: ldrb.w  r0, [r0, #4095]
+define i32 @ldrb_ri_t2_max(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t2_max
+; CHECK-T1: ldr     r1, .LCP
+; CHECK-T1: ldrh    r0, [r0, r1]
+; CHECK-T2: ldrh.w  r0, [r0, #4095]
+define i32 @ldrh_ri_t2_max(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = bitcast i8* %add.ptr to i16*
+  %1 = load i16, i16* %0, align 2
+  %conv = zext i16 %1 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t2_max
+; CHECK-T1: ldr     r1, .LCP
+; CHECK-T1: ldr     r0, [r0, r1]
+; CHECK-T2: ldr.w   r0, [r0, #4095]
+define i32 @ldr_ri_t2_max(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = bitcast i8* %add.ptr to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: strb_ri_t2_max
+; CHECK-T1: ldr     r2, .LCP
+; CHECK-T1: strb    r1, [r0, r2]
+; CHECK-T2: strb.w  r1, [r0, #4095]
+define void @strb_ri_t2_max(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  store i8 %conv, i8* %add.ptr, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_ri_t2_max
+; CHECK-T1: ldr     r2, .LCP
+; CHECK-T1: strh    r1, [r0, r2]
+; CHECK-T2: strh.w  r1, [r0, #4095]
+define void @strh_ri_t2_max(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = bitcast i8* %add.ptr to i16*
+  store i16 %conv, i16* %0, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_ri_t2_max
+; CHECK-T1: ldr     r2, .LCP
+; CHECK-T1: str     r1, [r0, r2]
+; CHECK-T2: str.w   r1, [r0, #4095]
+define void @str_ri_t2_max(i8* %p, i32 %x) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4095
+  %0 = bitcast i8* %add.ptr to i32*
+  store i32 %x, i32* %0, align 4
+  ret void
+}
+
+
+; One past maximum Thumb-2 immediate offset
+
+; CHECK-LABEL: ldrsb_ri_t2_too_big
+; CHECK-T1: movs    r1, #1
+; CHECK-T1: lsls    r1, r1, #12
+; CHECK-T2: mov.w   r1, #4096
+; CHECK:    ldrsb   r0, [r0, r1]
+define i32 @ldrsb_ri_t2_too_big(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrsh_ri_t2_too_big
+; CHECK-T1: movs    r1, #1
+; CHECK-T1: lsls    r1, r1, #12
+; CHECK-T2: mov.w   r1, #4096
+; CHECK:    ldrsh   r0, [r0, r1]
+define i32 @ldrsh_ri_t2_too_big(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = bitcast i8* %add.ptr to i16*
+  %1 = load i16, i16* %0, align 2
+  %conv = sext i16 %1 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrb_ri_t2_too_big
+; CHECK-T1: movs    r1, #1
+; CHECK-T1: lsls    r1, r1, #12
+; CHECK-T2: mov.w   r1, #4096
+; CHECK:    ldrb    r0, [r0, r1]
+define i32 @ldrb_ri_t2_too_big(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = load i8, i8* %add.ptr, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldrh_ri_t2_too_big
+; CHECK-T1: movs    r1, #1
+; CHECK-T1: lsls    r1, r1, #12
+; CHECK-T2: mov.w   r1, #4096
+; CHECK:    ldrh    r0, [r0, r1]
+define i32 @ldrh_ri_t2_too_big(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = bitcast i8* %add.ptr to i16*
+  %1 = load i16, i16* %0, align 2
+  %conv = zext i16 %1 to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: ldr_ri_t2_too_big
+; CHECK-T1: movs    r1, #1
+; CHECK-T1: lsls    r1, r1, #12
+; CHECK-T2: mov.w   r1, #4096
+; CHECK:    ldr     r0, [r0, r1]
+define i32 @ldr_ri_t2_too_big(i8* %p) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = bitcast i8* %add.ptr to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: strb_ri_t2_too_big
+; CHECK-T1: movs    r2, #1
+; CHECK-T1: lsls    r2, r2, #12
+; CHECK-T2: mov.w   r2, #4096
+; CHECK:    strb    r1, [r0, r2]
+define void @strb_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i8
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  store i8 %conv, i8* %add.ptr, align 1
+  ret void
+}
+
+; CHECK-LABEL: strh_ri_t2_too_big
+; CHECK-T1: movs    r2, #1
+; CHECK-T1: lsls    r2, r2, #12
+; CHECK-T2: mov.w   r2, #4096
+; CHECK:    strh    r1, [r0, r2]
+define void @strh_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+  %conv = trunc i32 %x to i16
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = bitcast i8* %add.ptr to i16*
+  store i16 %conv, i16* %0, align 2
+  ret void
+}
+
+; CHECK-LABEL: str_ri_t2_too_big
+; CHECK-T1: movs    r2, #1
+; CHECK-T1: lsls    r2, r2, #12
+; CHECK-T2: mov.w   r2, #4096
+; CHECK:    str     r1, [r0, r2]
+define void @str_ri_t2_too_big(i8* %p, i32 %x) {
+entry:
+  %add.ptr = getelementptr inbounds i8, i8* %p, i32 4096
+  %0 = bitcast i8* %add.ptr to i32*
+  store i32 %x, i32* %0, align 4
+  ret void
+}
diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll
index 1f92ff4e1192..611cba6ed1fc 100644
--- a/test/CodeGen/ARM/machine-cse-cmp.ll
+++ b/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -27,7 +27,7 @@ define void @f2() nounwind ssp {
 entry:
 ; CHECK-LABEL: f2:
 ; CHECK: cmp
-; CHECK: poplt
+; CHECK: bxlt
 ; CHECK-NOT: cmp
 ; CHECK: movle
   %0 = load i32, i32* @foo, align 4
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 4ea26e1c59a3..d874884dcb39 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-T1
+; RUN: llc < %s -mtriple=thumbv6m-apple-ios -mcpu=cortex-m0 -pre-RA-sched=source -disable-post-ra -mattr=+strict-align | FileCheck %s -check-prefix=CHECK-T1
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
 
 @src = external global %struct.x
diff --git a/test/CodeGen/ARM/memcpy-ldm-stm.ll b/test/CodeGen/ARM/memcpy-ldm-stm.ll
new file mode 100644
index 000000000000..2ebe7ed5b146
--- /dev/null
+++ b/test/CodeGen/ARM/memcpy-ldm-stm.ll
@@ -0,0 +1,94 @@
+; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | \
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
+; RUN: llc -mtriple=thumbv6m-eabi -O=0 -verify-machineinstrs %s -o - | \
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV6
+; RUN: llc -mtriple=thumbv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
+; RUN: llc -mtriple=armv7a-eabi -mattr=-neon -verify-machineinstrs %s -o - | \
+; RUN:    FileCheck %s --check-prefix=CHECK --check-prefix=CHECKV7
+
+@d = external global [64 x i32]
+@s = external global [64 x i32]
+
+; Function Attrs: nounwind
+define void @t1() #0 {
+entry:
+; CHECK-LABEL: t1:
+; CHECKV6: ldr [[LB:r[0-7]]],
+; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
+; We use '[rl0-9]+' to allow 'r0'..'r12', 'lr'
+; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
+; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
+; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
+; Think of the monstrosity '{{\[}}[[LB]]]' as '[ [[LB]] ]' without the spaces.
+; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
+    ret void
+}
+
+; Function Attrs: nounwind
+define void @t2() #0 {
+entry:
+; CHECK-LABEL: t2:
+; CHECKV6: ldr [[LB:r[0-7]]],
+; CHECKV6-NEXT: ldr [[SB:r[0-7]]],
+; CHECKV7: movt [[LB:[rl0-9]+]], :upper16:d
+; CHECKV7-NEXT: movt [[SB:[rl0-9]+]], :upper16:s
+; CHECK-NEXT: ldm{{(\.w)?}} [[LB]]!,
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!,
+; CHECK-NEXT: ldrh{{(\.w)?}} {{.*}}, {{\[}}[[LB]]]
+; CHECK-NEXT: ldrb{{(\.w)?}} {{.*}}, {{\[}}[[LB]], #2]
+; CHECK-NEXT: strb{{(\.w)?}} {{.*}}, {{\[}}[[SB]], #2]
+; CHECK-NEXT: strh{{(\.w)?}} {{.*}}, {{\[}}[[SB]]]
+    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
+    ret void
+}
+
+; PR23768
+%struct.T = type { i8, i64, i8 }
+
+@copy = external global %struct.T, align 8
+@etest = external global %struct.T, align 8
+
+define void @t3() {
+  call void @llvm.memcpy.p0i8.p0i8.i32(
+     i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
+     i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
+     i32 24, i32 8, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(
+     i8* getelementptr inbounds (%struct.T, %struct.T* @copy, i32 0, i32 0),
+     i8* getelementptr inbounds (%struct.T, %struct.T* @etest, i32 0, i32 0),
+     i32 24, i32 8, i1 false)
+  ret void
+}
+
+%struct.S = type { [12 x i32] }
+
+; CHECK-LABEL: test3
+define void @test3(%struct.S* %d, %struct.S* %s) #0 {
+  %1 = bitcast %struct.S* %d to i8*
+  %2 = bitcast %struct.S* %s to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 48, i32 4, i1 false)
+; 3 ldm/stm pairs in v6; 2 in v7
+; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1:{.*}]]
+; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST1]]
+; CHECK: ldm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2:{.*}]]
+; CHECK: stm{{(\.w)?}} {{[rl0-9]+!?}}, [[REGLIST2]]
+; CHECKV6: ldm {{r[0-7]!?}}, [[REGLIST3:{.*}]]
+; CHECKV6: stm {{r[0-7]!?}}, [[REGLIST3]]
+; CHECKV7-NOT: ldm
+; CHECKV7-NOT: stm
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %s, i32 0, i32 0, i32 1
+  tail call void @g(i32* %arrayidx) #3
+  ret void
+}
+
+declare void @g(i32*)
+
+; Set "no-frame-pointer-elim" to increase register pressure
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 5223983a7f30..66743f3e9d5e 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,79 +1,95 @@
-; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS --check-prefix=CHECK
-; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN --check-prefix=CHECK
-; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
-; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI --check-prefix=CHECK
+; RUN: llc < %s -mtriple=armv7-apple-ios -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-IOS
+; RUN: llc < %s -mtriple=thumbv7m-none-macho -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-DARWIN
+; RUN: llc < %s -mtriple=arm-none-eabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-eabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-androideabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-EABI
+; RUN: llc < %s -mtriple=arm-none-gnueabi -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
+; RUN: llc < %s -mtriple=arm-none-gnueabihf -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-GNUEABI
 
 define void @f1(i8* %dest, i8* %src) {
 entry:
   ; CHECK-LABEL: f1
 
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 0, i1 false)
 
   ; EABI memset swaps arguments
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 0, i1 false)
 
   ; EABI uses memclr if value set to 0
   ; CHECK-IOS: mov r1, #0
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #0
-  ; CHECK-DARWIN: memset
-  ; CHECK-EABI: __aeabi_memclr
+  ; CHECK-DARWIN: bl _memset
+  ; CHECK-EABI: bl __aeabi_memclr
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 0, i1 false)
-  
+
   ; EABI uses aligned function variants if possible
 
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove4
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove4
+  ; CHECK-GNUEABI: bl memmove
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
 
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy4
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy4
+  ; CHECK-GNUEABI: bl memcpy
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 4, i1 false)
 
-  ; CHECK-IOS: memset
-  ; CHECK-DARWIN: memset
-  ; CHECK-EABI: __aeabi_memset4
+  ; CHECK-IOS: bl _memset
+  ; CHECK-DARWIN: bl _memset
+  ; CHECK-EABI: bl __aeabi_memset4
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 4, i1 false)
 
-  ; CHECK-IOS: memset
-  ; CHECK-DARWIN: memset
-  ; CHECK-EABI: __aeabi_memclr4
+  ; CHECK-IOS: bl _memset
+  ; CHECK-DARWIN: bl _memset
+  ; CHECK-EABI: bl __aeabi_memclr4
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 4, i1 false)
 
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove8
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove8
+  ; CHECK-GNUEABI: bl memmove
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
 
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy8
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy8
+  ; CHECK-GNUEABI: bl memcpy
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 500, i32 8, i1 false)
 
-  ; CHECK-IOS: memset
-  ; CHECK-DARWIN: memset
-  ; CHECK-EABI: __aeabi_memset8
+  ; CHECK-IOS: bl _memset
+  ; CHECK-DARWIN: bl _memset
+  ; CHECK-EABI: bl __aeabi_memset8
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 1, i32 500, i32 8, i1 false)
 
-  ; CHECK-IOS: memset
-  ; CHECK-DARWIN: memset
-  ; CHECK-EABI: __aeabi_memclr8
+  ; CHECK-IOS: bl _memset
+  ; CHECK-DARWIN: bl _memset
+  ; CHECK-EABI: bl __aeabi_memclr8
+  ; CHECK-GNUEABI: bl memset
   call void @llvm.memset.p0i8.i32(i8* %dest, i8 0, i32 500, i32 8, i1 false)
 
   unreachable
@@ -86,32 +102,38 @@ entry:
 
   ; IOS (ARMv7) should 8-byte align, others should 4-byte align
   ; CHECK-IOS: add r1, sp, #32
-  ; CHECK-IOS: memmove
+  ; CHECK-IOS: bl _memmove
   ; CHECK-DARWIN: add r1, sp, #28
-  ; CHECK-DARWIN: memmove
+  ; CHECK-DARWIN: bl _memmove
   ; CHECK-EABI: add r1, sp, #28
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: add r1, sp, #28
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [9 x i8], align 1
   %0 = bitcast [9 x i8]* %arr0 to i8*
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: add r1, sp, #16
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [9 x i8], align 1
   %1 = bitcast [9 x i8]* %arr1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK-IOS: mov r0, sp
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: add r0, sp, #4
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: add r0, sp, #4
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: add r0, sp, #4
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [9 x i8], align 1
   %2 = bitcast [9 x i8]* %arr2 to i8*
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -125,28 +147,32 @@ entry:
   ; CHECK-LABEL: f3
 
   ; CHECK: {{add(.w)? r1, sp, #17|sub(.w)? r1, r7, #15}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [7 x i8], align 1
   %0 = bitcast [7 x i8]* %arr0 to i8*
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r1, sp, #10}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [7 x i8], align 1
   %1 = bitcast [7 x i8]* %arr1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r0, sp, #3}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [7 x i8], align 1
   %2 = bitcast [7 x i8]* %arr2 to i8*
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -160,28 +186,32 @@ entry:
   ; CHECK-LABEL: f4
 
   ; CHECK: {{add(.w)? r., sp, #23|sub(.w)? r., r7, #17}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [9 x i8], align 1
   %0 = getelementptr inbounds [9 x i8], [9 x i8]* %arr0, i32 0, i32 4
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(10|14)}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [9 x i8], align 1
   %1 = getelementptr inbounds [9 x i8], [9 x i8]* %arr1, i32 0, i32 4
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(1|5)}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [9 x i8], align 1
   %2 = getelementptr inbounds [9 x i8], [9 x i8]* %arr2, i32 0, i32 4
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -195,28 +225,32 @@ entry:
   ; CHECK-LABEL: f5
 
   ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [13 x i8], align 1
   %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 1
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(10|14)}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [13 x i8], align 1
   %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 1
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(1|5)}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [13 x i8], align 1
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 1
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -230,28 +264,32 @@ entry:
   ; CHECK-LABEL: f6
 
   ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #25}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [13 x i8], align 1
   %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 %i
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(10|14)}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [13 x i8], align 1
   %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 %i
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(1|5)}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [13 x i8], align 1
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 %i
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -265,28 +303,32 @@ entry:
   ; CHECK-LABEL: f7
 
   ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [13 x i8], align 1
   %0 = getelementptr [13 x i8], [13 x i8]* %arr0, i32 0, i32 4
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(10|14)}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [13 x i8], align 1
   %1 = getelementptr [13 x i8], [13 x i8]* %arr1, i32 0, i32 4
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(1|5)}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [13 x i8], align 1
   %2 = getelementptr [13 x i8], [13 x i8]* %arr2, i32 0, i32 4
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -300,28 +342,32 @@ entry:
   ; CHECK-LABEL: f8
 
   ; CHECK: {{add(.w)? r., sp, #27|sub(.w)? r., r7, #21}}
-  ; CHECK-IOS: memmove
-  ; CHECK-DARWIN: memmove
-  ; CHECK-EABI: __aeabi_memmove
+  ; CHECK-IOS: bl _memmove
+  ; CHECK-DARWIN: bl _memmove
+  ; CHECK-EABI: bl __aeabi_memmove
+  ; CHECK-GNUEABI: bl memmove
   %arr0 = alloca [13 x i8], align 1
   %0 = getelementptr inbounds [13 x i8], [13 x i8]* %arr0, i32 0, i32 16
   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %0, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(10|14)}}
-  ; CHECK-IOS: memcpy
-  ; CHECK-DARWIN: memcpy
-  ; CHECK-EABI: __aeabi_memcpy
+  ; CHECK-IOS: bl _memcpy
+  ; CHECK-DARWIN: bl _memcpy
+  ; CHECK-EABI: bl __aeabi_memcpy
+  ; CHECK-GNUEABI: bl memcpy
   %arr1 = alloca [13 x i8], align 1
   %1 = getelementptr inbounds [13 x i8], [13 x i8]* %arr1, i32 0, i32 16
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %1, i32 %n, i32 0, i1 false)
 
   ; CHECK: {{add(.w)? r., sp, #(1|5)}}
   ; CHECK-IOS: mov r1, #1
-  ; CHECK-IOS: memset
+  ; CHECK-IOS: bl _memset
   ; CHECK-DARWIN: movs r1, #1
-  ; CHECK-DARWIN: memset
+  ; CHECK-DARWIN: bl _memset
   ; CHECK-EABI: mov r2, #1
-  ; CHECK-EABI: __aeabi_memset
+  ; CHECK-EABI: bl __aeabi_memset
+  ; CHECK-GNUEABI: mov r1, #1
+  ; CHECK-GNUEABI: bl memset
   %arr2 = alloca [13 x i8], align 1
   %2 = getelementptr inbounds [13 x i8], [13 x i8]* %arr2, i32 0, i32 16
   call void @llvm.memset.p0i8.i32(i8* %2, i8 1, i32 %n, i32 0, i1 false)
@@ -357,6 +403,7 @@ entry:
 ; CHECK-IOS: .align 3
 ; CHECK-DARWIN: .align 2
 ; CHECK-EABI: .align 2
+; CHECK-GNUEABI: .align 2
 ; CHECK: arr2:
 ; CHECK: {{\.section.+foo,bar}}
 ; CHECK-NOT: .align
diff --git a/test/CodeGen/ARM/minmax.ll b/test/CodeGen/ARM/minmax.ll
new file mode 100644
index 000000000000..78e8922fba0e
--- /dev/null
+++ b/test/CodeGen/ARM/minmax.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=armv8-linux-gnu -mattr=+neon | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t1(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp sgt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t2
+; CHECK: vmin.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t2(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp slt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t3
+; CHECK: vmax.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t3(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp ugt <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t4
+; CHECK: vmin.u32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <4 x i32> @t4(<4 x i32> %a, <4 x i32> %b) {
+  %t1 = icmp ult <4 x i32> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %t2
+}
+
+; CHECK-LABEL: t5
+; CHECK: vmax.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t5(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp sgt <2 x i32> %a, %b
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t6
+; CHECK: vmin.s32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t6(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp slt <2 x i32> %a, %b
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t7
+; CHECK: vmax.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t7(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp ugt <2 x i32> %a, %b
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t8
+; CHECK: vmin.u32 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <2 x i32> @t8(<2 x i32> %a, <2 x i32> %b) {
+  %t1 = icmp ult <2 x i32> %a, %b
+  %t2 = select <2 x i1> %t1, <2 x i32> %a, <2 x i32> %b
+  ret <2 x i32> %t2
+}
+
+; CHECK-LABEL: t9
+; CHECK: vmax.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t9(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp sgt <8 x i16> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t10
+; CHECK: vmin.s16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t10(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp slt <8 x i16> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t11
+; CHECK: vmax.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t11(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp ugt <8 x i16> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t12
+; CHECK: vmin.u16 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <8 x i16> @t12(<8 x i16> %a, <8 x i16> %b) {
+  %t1 = icmp ult <8 x i16> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %t2
+}
+
+; CHECK-LABEL: t13
+; CHECK: vmax.s16
+define <4 x i16> @t13(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp sgt <4 x i16> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t14
+; CHECK: vmin.s16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t14(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp slt <4 x i16> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t15
+; CHECK: vmax.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t15(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp ugt <4 x i16> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t16
+; CHECK: vmin.u16 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <4 x i16> @t16(<4 x i16> %a, <4 x i16> %b) {
+  %t1 = icmp ult <4 x i16> %a, %b
+  %t2 = select <4 x i1> %t1, <4 x i16> %a, <4 x i16> %b
+  ret <4 x i16> %t2
+}
+
+; CHECK-LABEL: t17
+; CHECK: vmax.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t17(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp sgt <16 x i8> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t18
+; CHECK: vmin.s8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t18(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp slt <16 x i8> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t19
+; CHECK: vmax.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t19(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp ugt <16 x i8> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t20
+; CHECK: vmin.u8 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
+define <16 x i8> @t20(<16 x i8> %a, <16 x i8> %b) {
+  %t1 = icmp ult <16 x i8> %a, %b
+  %t2 = select <16 x i1> %t1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %t2
+}
+
+; CHECK-LABEL: t21
+; CHECK: vmax.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t21(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp sgt <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t22
+; CHECK: vmin.s8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t22(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp slt <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t23
+; CHECK: vmax.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t23(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp ugt <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
+
+; CHECK-LABEL: t24
+; CHECK: vmin.u8 {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+define <8 x i8> @t24(<8 x i8> %a, <8 x i8> %b) {
+  %t1 = icmp ult <8 x i8> %a, %b
+  %t2 = select <8 x i1> %t1, <8 x i8> %a, <8 x i8> %b
+  ret <8 x i8> %t2
+}
diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll
index 84e4b303c16d..883522a829a5 100644
--- a/test/CodeGen/ARM/neon_minmax.ll
+++ b/test/CodeGen/ARM/neon_minmax.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mtriple=arm-eabi -mcpu=swift %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 -mattr=-neon %s -o -
 
 define float @fmin_ole(float %x) nounwind {
 ;CHECK-LABEL: fmin_ole:
diff --git a/test/CodeGen/ARM/neon_spill.ll b/test/CodeGen/ARM/neon_spill.ll
index 04f08f43840f..f9282f93f4d2 100644
--- a/test/CodeGen/ARM/neon_spill.ll
+++ b/test/CodeGen/ARM/neon_spill.ll
@@ -22,7 +22,7 @@ declare arm_aapcs_vfpcc %2* @func3(%2*, %2*, i32)
 declare arm_aapcs_vfpcc %2** @func4()
 
 define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
-  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   %2 = call arm_aapcs_vfpcc  %0** @func2() nounwind
   %3 = load %0*, %0** %2, align 4
   store float 0.000000e+00, float* undef, align 4
@@ -40,10 +40,10 @@ define arm_aapcs_vfpcc void @foo(%3* nocapture) nounwind align 2 {
   %10 = fmul float undef, 2.000000e+05
   %11 = fadd float %10, -1.000000e+05
   store float %11, float* undef, align 4
-  call void @llvm.arm.neon.vst4.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
+  call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* undef, <4 x i32> <i32 0, i32 1065353216, i32 1073741824, i32 1077936128>, <4 x i32> <i32 1082130432, i32 1084227584, i32 1086324736, i32 1088421888>, <4 x i32> <i32 1090519040, i32 1091567616, i32 1092616192, i32 1093664768>, <4 x i32> <i32 1094713344, i32 1095761920, i32 1096810496, i32 1097859072>, i32 16) nounwind
   ret void
 }
 
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
 
 declare arm_aapcs_vfpcc i32 @rand()
diff --git a/test/CodeGen/ARM/neon_vabs.ll b/test/CodeGen/ARM/neon_vabs.ll
index 7a02512198be..d32e7b78879b 100644
--- a/test/CodeGen/ARM/neon_vabs.ll
+++ b/test/CodeGen/ARM/neon_vabs.ll
@@ -89,3 +89,41 @@ define <2 x i32> @test10(<2 x i32> %a) nounwind {
         %abs = select <2 x i1> %b, <2 x i32> %tmp1neg, <2 x i32> %a
         ret <2 x i32> %abs
 }
+
+;; Check that absdiff patterns as emitted by log2 shuffles are
+;; matched by VABD.
+
+define <4 x i32> @test11(<4 x i16> %a, <4 x i16> %b) nounwind {
+; CHECK-LABEL: test11:
+; CHECK: vabdl.u16 q
+        %zext1 = zext <4 x i16> %a to <4 x i32>
+        %zext2 = zext <4 x i16> %b to <4 x i32>
+        %diff = sub <4 x i32> %zext1, %zext2
+        %shift1 = ashr <4 x i32> %diff, <i32 31, i32 31, i32 31, i32 31>
+        %add1 = add <4 x i32> %shift1, %diff
+        %res = xor <4 x i32> %shift1, %add1
+        ret <4 x i32> %res
+}
+define <8 x i16> @test12(<8 x i8> %a, <8 x i8> %b) nounwind {
+; CHECK-LABEL: test12:
+; CHECK: vabdl.u8 q
+        %zext1 = zext <8 x i8> %a to <8 x i16>
+        %zext2 = zext <8 x i8> %b to <8 x i16>
+        %diff = sub <8 x i16> %zext1, %zext2
+        %shift1 = ashr <8 x i16> %diff,<i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+        %add1 = add <8 x i16> %shift1, %diff
+        %res = xor <8 x i16> %shift1, %add1
+        ret <8 x i16> %res
+}
+
+define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
+; CHECK-LABEL: test13:
+; CHECK: vabdl.u32 q
+        %zext1 = zext <2 x i32> %a to <2 x i64>
+        %zext2 = zext <2 x i32> %b to <2 x i64>
+        %diff = sub <2 x i64> %zext1, %zext2
+        %shift1 = ashr <2 x i64> %diff,<i64 63, i64 63>
+        %add1 = add <2 x i64> %shift1, %diff
+        %res = xor <2 x i64> %shift1, %add1
+        ret <2 x i64> %res
+}
diff --git a/test/CodeGen/ARM/neon_vshl_minint.ll b/test/CodeGen/ARM/neon_vshl_minint.ll
new file mode 100644
index 000000000000..769eff845fd6
--- /dev/null
+++ b/test/CodeGen/ARM/neon_vshl_minint.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s
+
+define <1 x i64> @vshl_minint() #0 {
+  entry:
+    ; CHECK-LABEL: vshl_minint
+    ; CHECK: vldr
+    ; CHECK: vshl.u64
+    %vshl.i = tail call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> undef, <1 x i64> <i64 -9223372036854775808>)
+    ret <1 x i64> %vshl.i
+}
+
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>)
diff --git a/test/CodeGen/ARM/out-of-registers.ll b/test/CodeGen/ARM/out-of-registers.ll
index a83923db0b30..891319881f45 100644
--- a/test/CodeGen/ARM/out-of-registers.ll
+++ b/test/CodeGen/ARM/out-of-registers.ll
@@ -8,7 +8,7 @@ target triple = "thumbv7-none-linux-gnueabi"
 
 define void @foo(float* nocapture %A) #0 {
   %1= bitcast float* %A to i8*
-  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8* %1, i32 4)
+  %2 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8* %1, i32 4)
   %3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 0
   %divp_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %3
   %4 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 1
@@ -17,7 +17,7 @@ define void @foo(float* nocapture %A) #0 {
   %div8p_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %5
   %6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %2, 3
   %div13p_vec = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %6
-  tail call void @llvm.arm.neon.vst4.v4f32(i8* %1, <4 x float> %divp_vec, <4 x float> %div3p_vec, <4 x float> %div8p_vec, <4 x float> %div13p_vec, i32 4)
+  tail call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %1, <4 x float> %divp_vec, <4 x float> %div3p_vec, <4 x float> %div8p_vec, <4 x float> %div13p_vec, i32 4)
  ret void
 }
 
@@ -27,8 +27,8 @@ declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32) #1
 ; Function Attrs: nounwind readonly
 
 ; Function Attrs: nounwind
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #1
-declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32(i8*, i32) #2
+declare void @llvm.arm.neon.vst4.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) #1
+declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.arm.neon.vld4.v4f32.p0i8(i8*, i32) #2
 
 ; Function Attrs: nounwind
 
diff --git a/test/CodeGen/ARM/pr25317.ll b/test/CodeGen/ARM/pr25317.ll
new file mode 100644
index 000000000000..6770c6f84ecd
--- /dev/null
+++ b/test/CodeGen/ARM/pr25317.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; CHECK-LABEL: f:
+; CHECK: str lr, [r0]
+define void @f(i32* %p) {
+  call void asm sideeffect "str lr, $0", "=*o"(i32* %p)
+  ret void
+}
\ No newline at end of file
diff --git a/test/CodeGen/ARM/pr25838.ll b/test/CodeGen/ARM/pr25838.ll
new file mode 100644
index 000000000000..0aa95fd2d720
--- /dev/null
+++ b/test/CodeGen/ARM/pr25838.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+; PR25838
+
+target triple = "armv7--linux-android"
+
+%0 = type { i32, i32 }
+
+define i32 @foo(%0* readonly) {
+  br i1 undef, label %12, label %2
+
+; <label>:2
+  %3 = trunc i64 undef to i32
+  %4 = icmp eq i32 undef, 0
+  br i1 %4, label %5, label %9
+
+; <label>:5
+  %6 = icmp slt i32 %3, 0
+  %7 = sub nsw i32 0, %3
+  %8 = select i1 %6, i32 %7, i32 %3
+  br label %12
+
+; <label>:9
+  br i1 undef, label %12, label %10
+
+; <label>:10
+  %11 = tail call i32 @bar(i32 undef)
+  unreachable
+
+; <label>:12
+  %13 = phi i32 [ %8, %5 ], [ 0, %1 ], [ undef, %9 ]
+  ret i32 %13
+}
+
+declare i32 @bar(i32)
diff --git a/test/CodeGen/ARM/rbit.ll b/test/CodeGen/ARM/rbit.ll
index 41f866fc8d2f..a2bfeca75526 100644
--- a/test/CodeGen/ARM/rbit.ll
+++ b/test/CodeGen/ARM/rbit.ll
@@ -18,3 +18,14 @@ entry:
 }
 
 declare i32 @llvm.arm.rbit(i32)
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
+
+; CHECK-LABEL: rbit_generic
+; CHECK: rbit r0, r0
+define i32 @rbit_generic(i32 %t) {
+entry:
+  %rbit = call i32 @llvm.bitreverse.i32(i32 %t)
+  ret i32 %rbit
+}
+
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index 507ee48cafc2..d02f5f883795 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -24,7 +24,7 @@ entry:
   %2 = getelementptr inbounds %struct.int32x4_t, %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
   %3 = load <4 x i32>, <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
-  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
   %7 = extractelement <2 x double> %6, i32 0      ; <double> [#uses=1]
   %8 = bitcast double %7 to <4 x i16>             ; <<4 x i16>> [#uses=1]
@@ -40,7 +40,7 @@ entry:
   %trunc_16 = trunc <4 x i32> %16 to <4 x i16>
   %17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
   %18 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %18, <8 x i16> %17, i32 1)
   ret void
 }
 
@@ -60,17 +60,17 @@ entry:
   %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
   %3 = load <8 x i16>, <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
   %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
-  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
   %6 = getelementptr inbounds i16, i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
   %7 = bitcast i16* %6 to i8*                     ; <i8*> [#uses=1]
-  %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
+  %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
   %9 = mul <8 x i16> %1, %5                       ; <<8 x i16>> [#uses=1]
   %10 = mul <8 x i16> %3, %8                      ; <<8 x i16>> [#uses=1]
   %11 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %11, <8 x i16> %9, i32 1)
   %12 = getelementptr inbounds i16, i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
   %13 = bitcast i16* %12 to i8*                   ; <i8*> [#uses=1]
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %13, <8 x i16> %10, i32 1)
   ret void
 }
 
@@ -81,14 +81,14 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
 ; CHECK:        vmov r
 ; CHECK-NOT:    vmov d
 ; CHECK:        vst3.8
-  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
   %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
   %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
   %tmp5 = sub <8 x i8> %tmp3, %tmp4
   %tmp6 = add <8 x i8> %tmp2, %tmp3               ; <<8 x i8>> [#uses=1]
   %tmp7 = mul <8 x i8> %tmp4, %tmp2
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
+  tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
   ret <8 x i8> %tmp4
 }
 
@@ -101,10 +101,10 @@ entry:
 ; CHECK-NOT:    vmov
 ; CHECK:        bne
   %tmp1 = bitcast i32* %in to i8*                 ; <i8*> [#uses=1]
-  %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
   %tmp3 = getelementptr inbounds i32, i32* %in, i32 8  ; <i32*> [#uses=1]
   %tmp4 = bitcast i32* %tmp3 to i8*               ; <i8*> [#uses=1]
-  %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
   %tmp8 = bitcast i32* %out to i8*                ; <i8*> [#uses=1]
   br i1 undef, label %return1, label %return2
 
@@ -120,7 +120,7 @@ return1:
   %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
   %tmp6 = add <4 x i32> %tmp52, %tmp              ; <<4 x i32>> [#uses=1]
   %tmp7 = add <4 x i32> %tmp57, %tmp39            ; <<4 x i32>> [#uses=1]
-  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
   ret void
 
 return2:
@@ -131,7 +131,7 @@ return2:
   %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
   %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
   %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
-  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
+  tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
   call void @llvm.trap()
   unreachable
 }
@@ -147,7 +147,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK:        vadd.i16
   %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
   %tmp1 = load <8 x i16>, <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
-  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
   %tmp5 = add <8 x i16> %tmp3, %tmp4              ; <<8 x i16>> [#uses=1]
@@ -160,7 +160,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>, <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
-  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
   %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
   %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
   %tmp5 = add <8 x i8> %tmp3, %tmp4               ; <<8 x i8>> [#uses=1]
@@ -178,14 +178,14 @@ entry:
 ; CHECK:        vuzp.32 q[[Q1]], q[[Q0]]
 ; CHECK:        vst1.32
   %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
-  %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
   %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
   %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
   %2 = bitcast i32* %optr to i8*                  ; <i8*> [#uses=2]
-  tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
-  %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
+  %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
   %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
-  tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %2, <4 x i32> %4, i32 1)
   ret void
 }
 
@@ -307,43 +307,43 @@ bb14:                                             ; preds = %bb6
 
 ; This test crashes the coalescer because live variables were not updated properly.
 define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
-  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
   %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
-  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
   %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
   %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d  ; <<8 x i8>> [#uses=1]
   %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
   %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f  ; <<8 x i8>> [#uses=1]
   %tmp2efgh = mul <8 x i8> %tmp2ef, undef         ; <<8 x i8>> [#uses=2]
-  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
+  call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
   %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd       ; <<8 x i8>> [#uses=1]
   %tmp7 = mul <8 x i8> undef, %tmp2               ; <<8 x i8>> [#uses=1]
-  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
+  tail call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
   ret <8 x i8> undef
 }
 
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32) nounwind readonly
 
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
 
 declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind
 
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
 
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
 nounwind
 
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
 
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8*, i32) nounwind readonly
 
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
 
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
 
 declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
 
diff --git a/test/CodeGen/ARM/rotate.ll b/test/CodeGen/ARM/rotate.ll
new file mode 100644
index 000000000000..f3f7de2160fb
--- /dev/null
+++ b/test/CodeGen/ARM/rotate.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv8--linux-gnueabihf | FileCheck %s
+
+;; This used to cause a backend crash about not being able to
+;; select ROTL. Make sure if generates the basic VSHL/VSHR.
+define <2 x i64> @testcase(<2 x i64>* %in) {
+; CHECK-LABEL: testcase
+; CHECK: vshl.i64
+; CHECK: vshr.u64
+  %1 = load <2 x i64>, <2 x i64>* %in
+  %2 = lshr <2 x i64> %1, <i64 8, i64 8>
+  %3 = shl <2 x i64> %1, <i64 56, i64 56>
+  %4 = or <2 x i64> %2, %3
+  ret <2 x i64> %4
+}
diff --git a/test/CodeGen/ARM/sat-arith.ll b/test/CodeGen/ARM/sat-arith.ll
new file mode 100644
index 000000000000..4844ed1bd21e
--- /dev/null
+++ b/test/CodeGen/ARM/sat-arith.ll
@@ -0,0 +1,63 @@
+; RUN: llc -O1 -mtriple=armv6-none-none-eabi %s -o - | FileCheck %s -check-prefix=ARM -check-prefix=CHECK
+; RUN: llc -O1 -mtriple=thumbv7-none-none-eabi %s -o - | FileCheck %s -check-prefix=THUMB -check-prefix=CHECK
+
+; CHECK-LABEL: qadd
+define i32 @qadd() nounwind {
+; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
+; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
+; CHECK-ARM: qadd [[R0]], [[R1]], [[R0]]
+; CHECK-THRUMB: qadd [[R0]], [[R0]], [[R1]]
+  %tmp = call i32 @llvm.arm.qadd(i32 128, i32 8)
+  ret i32 %tmp
+}
+
+; CHECK-LABEL: qsub
+define i32 @qsub() nounwind {
+; CHECK-DAG: mov{{s?}} [[R0:.*]], #8
+; CHECK-DAG: mov{{s?}} [[R1:.*]], #128
+; CHECK-ARM: qsub [[R0]], [[R1]], [[R0]]
+; CHECK-THRUMB: qadd [[R0]], [[R1]], [[R0]]
+  %tmp = call i32 @llvm.arm.qsub(i32 128, i32 8)
+  ret i32 %tmp
+}
+
+; upper-bound of the immediate argument
+; CHECK-LABEL: ssat1
+define i32 @ssat1() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: ssat [[R1:.*]], #32, [[R0]]
+  %tmp = call i32 @llvm.arm.ssat(i32 128, i32 32)
+  ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+; CHECK-LABEL: ssat2
+define i32 @ssat2() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: ssat [[R1:.*]], #1, [[R0]]
+  %tmp = call i32 @llvm.arm.ssat(i32 128, i32 1)
+  ret i32 %tmp
+}
+
+; upper-bound of the immediate argument
+; CHECK-LABEL: usat1
+define i32 @usat1() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: usat [[R1:.*]], #31, [[R0]]
+  %tmp = call i32 @llvm.arm.usat(i32 128, i32 31)
+  ret i32 %tmp
+}
+
+; lower-bound of the immediate argument
+; CHECK-LABEL: usat2
+define i32 @usat2() nounwind {
+; CHECK: mov{{s?}} [[R0:.*]], #128
+; CHECK: usat [[R1:.*]], #0, [[R0]]
+  %tmp = call i32 @llvm.arm.usat(i32 128, i32 0)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.arm.qadd(i32, i32) nounwind
+declare i32 @llvm.arm.qsub(i32, i32) nounwind
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/sched-it-debug-nodes.ll b/test/CodeGen/ARM/sched-it-debug-nodes.ll
deleted file mode 100644
index 7906e7c7581e..000000000000
--- a/test/CodeGen/ARM/sched-it-debug-nodes.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; RUN: llc -mtriple thumbv7 -print-before=post-RA-sched -print-after=post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "thumbv7"
-
-%struct.s = type opaque
-
-; Function Attrs: nounwind
-define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 {
-entry:
-  tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !19, metadata !28), !dbg !29
-  tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !20, metadata !28), !dbg !29
-  tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !21, metadata !28), !dbg !29
-  tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !22, metadata !28), !dbg !29
-  %cmp = icmp ult i32 %n, 4, !dbg !30
-  br i1 %cmp, label %return, label %if.end, !dbg !32
-
-if.end:                                           ; preds = %entry
-  tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !33
-  br label %return, !dbg !34
-
-return:                                           ; preds = %entry, %if.end
-  %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
-  ret i32 %retval.0, !dbg !35
-}
-
-; NOTE: This is checking that the register in the DEBUG_VALUE node is not
-; accidentally being marked as KILL.  The DBG_VALUE node gets introduced in
-; If-Conversion, and gets bundled into the IT block.  The Post RA Scheduler
-; attempts to schedule the Machine Instr, and tries to tag the register in the
-; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen!  (or
-; hopefully, triggering an assert).
-
-; CHECK: BUNDLE %ITSTATE<imp-def,dead>
-; CHECK:  * DBG_VALUE %R1, %noreg, !"u"
-; CHECK-NOT:  * DBG_VALUE %R1<kill>, %noreg, !"u"
-
-declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!23, !24, !25, !26}
-!llvm.ident = !{!27}
-
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0  (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
-!2 = !{}
-!3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !5, file: !5, line: 9, type: !6, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, function: i32 (%struct.s*, i32, i8*, i32)* @f, variables: !18)
-!5 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !9, !12, !13, !17}
-!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 32, align: 32)
-!10 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !5, line: 5, baseType: !11)
-!11 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !5, line: 5, flags: DIFlagFwdDecl)
-!12 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 32, align: 32)
-!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15)
-!15 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !5, line: 2, baseType: !16)
-!16 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!17 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !5, line: 3, baseType: !12)
-!18 = !{!19, !20, !21, !22}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", arg: 1, scope: !4, file: !5, line: 9, type: !9)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", arg: 2, scope: !4, file: !5, line: 9, type: !12)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 3, scope: !4, file: !5, line: 9, type: !13)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", arg: 4, scope: !4, file: !5, line: 9, type: !17)
-!23 = !{i32 2, !"Dwarf Version", i32 4}
-!24 = !{i32 2, !"Debug Info Version", i32 3}
-!25 = !{i32 1, !"wchar_size", i32 4}
-!26 = !{i32 1, !"min_enum_size", i32 4}
-!27 = !{!"clang version 3.7.0  (llvm/trunk 237059)"}
-!28 = !DIExpression()
-!29 = !DILocation(line: 9, scope: !4)
-!30 = !DILocation(line: 10, scope: !31)
-!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
-!32 = !DILocation(line: 10, scope: !4)
-!33 = !DILocation(line: 13, scope: !4)
-!34 = !DILocation(line: 14, scope: !4)
-!35 = !DILocation(line: 15, scope: !4)
diff --git a/test/CodeGen/ARM/setjmp_longjmp.ll b/test/CodeGen/ARM/setjmp_longjmp.ll
new file mode 100644
index 000000000000..7100175a97a4
--- /dev/null
+++ b/test/CodeGen/ARM/setjmp_longjmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc %s -o - | FileCheck %s
+target triple = "armv7-apple-ios"
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+declare void @llvm.eh.sjlj.longjmp(i8*)
+@g = external global i32
+
+declare void @may_throw()
+declare i32 @__gxx_personality_sj0(...)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+declare i32 @llvm.eh.typeid.for(i8*)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+@_ZTIPKc = external constant i8*
+
+; CHECK-LABEL: foobar
+;
+; setjmp sequence:
+; CHECK: add [[PCREG:r[0-9]+]], pc, #8
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: add pc, pc, #0
+; CHECK-NEXT: mov r0, #1
+;
+; longjmp sequence:
+; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @foobar() {
+entry:
+  %buf = alloca [5 x i8*], align 4
+  %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i32 0, i32 0
+  %bufptr = bitcast i8** %arraydecay to i8*
+  ; Note: This is simplified, in reality you have to store the framepointer +
+  ; stackpointer in the buffer as well for this to be legal!
+  %setjmpres = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+  %tobool = icmp ne i32 %setjmpres, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+  store volatile i32 1, i32* @g, align 4
+  br label %if.end
+
+if.else:
+  store volatile i32 0, i32* @g, align 4
+  call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+  unreachable
+
+if.end:
+  ret void
+}
+
+; CHECK-LABEL: combine_sjlj_eh_and_setjmp_longjmp
+; Check that we can mix sjlj exception handling with __builtin_setjmp
+; and __builtin_longjmp.
+;
+; setjmp sequence:
+; CHECK: add [[PCREG:r[0-9]+]], pc, #8
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: mov r0, #0
+; CHECK-NEXT: add pc, pc, #0
+; CHECK-NEXT: mov r0, #1
+;
+; longjmp sequence:
+; CHECK: ldr sp, [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @combine_sjlj_eh_and_setjmp_longjmp() personality i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*) {
+entry:
+  %buf = alloca [5 x i8*], align 4
+  invoke void @may_throw() to label %try.cont unwind label %lpad
+
+lpad:
+  %0 = landingpad { i8*, i32 } catch i8* bitcast (i8** @_ZTIPKc to i8*)
+  %1 = extractvalue { i8*, i32 } %0, 1
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIPKc to i8*)) #3
+  %matches = icmp eq i32 %1, %2
+  br i1 %matches, label %catch, label %eh.resume
+
+catch:
+  %3 = extractvalue { i8*, i32 } %0, 0
+  %4 = tail call i8* @__cxa_begin_catch(i8* %3) #3
+  store volatile i32 0, i32* @g, align 4
+  %5 = bitcast [5 x i8*]* %buf to i8*
+  %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i64 0, i64 0
+  %6 = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %6, i8** %arraydecay, align 16
+  %7 = tail call i8* @llvm.stacksave()
+  %8 = getelementptr [5 x i8*], [5 x i8*]* %buf, i64 0, i64 2
+  store i8* %7, i8** %8, align 16
+  %9 = call i32 @llvm.eh.sjlj.setjmp(i8* %5)
+  %tobool = icmp eq i32 %9, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  store volatile i32 2, i32* @g, align 4
+  call void @__cxa_end_catch() #3
+  br label %try.cont
+
+if.else:
+  store volatile i32 1, i32* @g, align 4
+  call void @llvm.eh.sjlj.longjmp(i8* %5)
+  unreachable
+
+eh.resume:
+  resume { i8*, i32 } %0
+
+try.cont:
+  ret void
+}
diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll
index 6f5c0e8279a9..5d44eb0f11d1 100644
--- a/test/CodeGen/ARM/shifter_operand.ll
+++ b/test/CodeGen/ARM/shifter_operand.ll
@@ -1,14 +1,14 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
-; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARM
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-THUMB
 ; rdar://8576755
 
 
 define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
-; A8-LABEL: test1:
-; A8: add r0, r0, r1, lsl r2
-
-; A9-LABEL: test1:
-; A9: add r0, r0, r1, lsl r2
+; CHECK-LABEL: test1:
+; CHECK-ARM: add r0, r0, r1, lsl r2
+; CHECK-THUMB: lsls r1, r2
+; CHECK-THUMB: add r0, r1
         %shift.upgrd.1 = zext i8 %sh to i32
         %A = shl i32 %Y, %shift.upgrd.1
         %B = add i32 %X, %A
@@ -16,11 +16,10 @@ define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
 }
 
 define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
-; A8-LABEL: test2:
-; A8: bic r0, r0, r1, asr r2
-
-; A9-LABEL: test2:
-; A9: bic r0, r0, r1, asr r2
+; CHECK-LABEL: test2:
+; CHECK-ARM: bic r0, r0, r1, asr r2
+; CHECK-THUMB: asrs r1, r2
+; CHECK-THUMB: bics r0, r1
         %shift.upgrd.2 = zext i8 %sh to i32
         %A = ashr i32 %Y, %shift.upgrd.2
         %B = xor i32 %A, -1
@@ -30,14 +29,9 @@ define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
 
 define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
 entry:
-; A8-LABEL: test3:
-; A8: ldr r0, [r0, r2, lsl #2]
-; A8: ldr r1, [r1, r2, lsl #2]
-
-; lsl #2 is free
-; A9-LABEL: test3:
-; A9: ldr r0, [r0, r2, lsl #2]
-; A9: ldr r1, [r1, r2, lsl #2]
+; CHECK-LABEL: test3:
+; CHECK: ldr{{(.w)?}} r0, [r0, r2, lsl #2]
+; CHECK: ldr{{(.w)?}} r1, [r1, r2, lsl #2]
         %tmp1 = shl i32 %offset, 2
         %tmp2 = add i32 %base, %tmp1
         %tmp3 = inttoptr i32 %tmp2 to i32*
@@ -53,17 +47,11 @@ declare i8* @malloc(...)
 
 define fastcc void @test4(i16 %addr) nounwind {
 entry:
-; A8-LABEL: test4:
-; A8: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A8-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A8: str [[REG]], [r0, r1, lsl #2]
-; A8-NOT: str [[REG]], [r0]
-
-; A9-LABEL: test4:
-; A9: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]
-; A9-NOT: ldr [[REG:r[0-9]+]], [r0, r1, lsl #2]!
-; A9: str [[REG]], [r0, r1, lsl #2]
-; A9-NOT: str [[REG]], [r0]
+; CHECK-LABEL: test4:
+; CHECK: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]
+; CHECK-NOT: ldr{{(.w)?}} [[REG:r[0-9]+]], [r0, r1, lsl #2]!
+; CHECK: str{{(.w)?}} [[REG]], [r0, r1, lsl #2]
+; CHECK-NOT: str{{(.w)?}} [[REG]], [r0]
   %0 = tail call i8* (...) @malloc(i32 undef) nounwind
   %1 = bitcast i8* %0 to i32*
   %2 = sext i16 %addr to i32
@@ -73,3 +61,181 @@ entry:
   store i32 %5, i32* %3, align 4
   ret void
 }
+
+define i32 @test_orr_extract_from_mul_1(i32 %x, i32 %y) {
+entry:
+; CHECK-LABEL: test_orr_extract_from_mul_1
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: orr r0, r1, r0
+; CHECK-THUMB: muls r1, r2, r1
+; CHECk-THUMB: orrs r0, r1
+  %mul = mul i32 %y, 63767
+  %or = or i32 %mul, %x
+  ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_2(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_2
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #1
+entry:
+  %mul1 = mul i32 %y, 127534
+  %or = or i32 %mul1, %x
+  ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_3(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_3
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #2
+entry:
+  %mul1 = mul i32 %y, 255068
+  %or = or i32 %mul1, %x
+  ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_4(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_4
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #3
+entry:
+  %mul1 = mul i32 %y, 510136
+  %or = or i32 %mul1, %x
+  ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_5(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_5
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #4
+entry:
+  %mul1 = mul i32 %y, 1020272
+  %or = or i32 %mul1, %x
+  ret i32 %or
+}
+
+define i32 @test_orr_extract_from_mul_6(i32 %x, i32 %y) {
+; CHECK-LABEL: test_orr_extract_from_mul_6
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: orr{{(.w)?}} r0, r0, r1, lsl #16
+entry:
+  %mul = mul i32 %y, -115933184
+  %or = or i32 %mul, %x
+  ret i32 %or
+}
+
+define i32 @test_load_extract_from_mul_1(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_1
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb r0, [r0, r1]
+entry:
+  %mul = mul i32 %y, 63767
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_2(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_2
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #1]
+entry:
+  %mul1 = mul i32 %y, 127534
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_3(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_3
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #2]
+entry:
+  %mul1 = mul i32 %y, 255068
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_4(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_4
+; CHECK: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK: ldrb{{(.w)?}} r0, [r0, r1, lsl #3]
+entry:
+  %mul1 = mul i32 %y, 510136
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_5(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_5
+; CHECK-ARM: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: ldrb r0, [r0, r1, lsl #4]
+; CHECK-THUMB: movw r2, #37232
+; CHECK-THUMB: movt r2, #15
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK-THUMB: ldrb r0, [r0, r1]
+entry:
+  %mul1 = mul i32 %y, 1020272
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul1
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i32 @test_load_extract_from_mul_6(i8* %x, i32 %y) {
+; CHECK-LABEL: test_load_extract_from_mul_6
+; CHECK-ARM: movw r2, #63767
+; CHECK-ARM: mul r1, r1, r2
+; CHECK-ARM: ldrb r0, [r0, r1, lsl #16]
+; CHECK-THUMB: movs r2, #0
+; CHECK-THUMB: movt r2, #63767
+; CHECK-THUMB: muls r1, r2, r1
+; CHECK-THUMB: ldrb r0, [r0, r1]
+entry:
+  %mul = mul i32 %y, -115933184
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %mul
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+
+define void @test_well_formed_dag(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: test_well_formed_dag:
+; CHECK-ARM: movw [[SMALL_CONST:r[0-9]+]], #675
+; CHECK-ARM: mul [[SMALL_PROD:r[0-9]+]], r0, [[SMALL_CONST]]
+; CHECK-ARM: add {{r[0-9]+}}, r1, [[SMALL_PROD]], lsl #7
+
+  %mul.small = mul i32 %in1, 675
+  store i32 %mul.small, i32* %addr
+  %mul.big = mul i32 %in1, 86400
+  %add = add i32 %in2, %mul.big
+  store i32 %add, i32* %addr
+  ret void
+}
diff --git a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
index c7f47b0962dc..a1abef9605ca 100644
--- a/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
+++ b/test/CodeGen/ARM/sjlj-prepare-critical-edge.ll
@@ -77,8 +77,8 @@ declare void @terminatev()
 ; CHECK: blx __Znwm
 ; CHECK: {{.*}}@ %entry.do.body.i.i.i_crit_edge
 ; CHECK: str r0, [sp, [[OFFSET:#[0-9]+]]]
-; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
 ; CHECK: {{.*}}@ %do.body.i.i.i
+; CHECK: ldr [[R0:r[0-9]+]], [sp, [[OFFSET]]]
 ; CHECK: cbz [[R0]]
 
 %"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
diff --git a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
index 5d015738623a..b44b447b3dff 100644
--- a/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
+++ b/test/CodeGen/ARM/sjljehprepare-lower-empty-struct.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=armv7-apple-ios -O1 < %s | FileCheck %s
 ; RUN: llc -mtriple=armv7-apple-ios -O2 < %s | FileCheck %s
 ; RUN: llc -mtriple=armv7-apple-ios -O3 < %s | FileCheck %s
+; RUN: llc -mtriple=armv7k-apple-ios < %s | FileCheck %s
 
 ; SjLjEHPrepare shouldn't crash when lowering empty structs.
 ;
diff --git a/test/CodeGen/ARM/softfp-fabs-fneg.ll b/test/CodeGen/ARM/softfp-fabs-fneg.ll
new file mode 100644
index 000000000000..b608fb840218
--- /dev/null
+++ b/test/CodeGen/ARM/softfp-fabs-fneg.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7 < %s | FileCheck %s --check-prefix=CHECK-ARM --check-prefix=CHECK
+; RUN: llc -mtriple=thumbv7 < %s | FileCheck %s --check-prefix=CHECK-THUMB --check-prefix=CHECK
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--"
+
+define double @f(double %a) {
+  ; CHECK-LABEL: f:
+  ; CHECK: bfc r1, #31, #1
+  ; CHECK-NEXT: bx lr
+  %x = call double @llvm.fabs.f64(double %a) readnone
+  ret double %x
+}
+
+define float @g(float %a) {
+  ; CHECK-LABEL: g:
+  ; CHECK-THUMB: bic r0, r0, #-2147483648
+  ; CHECK-ARM: bfc r0, #31, #1
+  ; CHECK-NEXT: bx lr
+  %x = call float @llvm.fabs.f32(float %a) readnone
+  ret float %x
+}
+
+define double @h(double %a) {
+  ; CHECK-LABEL: h:
+  ; CHECK: eor r1, r1, #-2147483648
+  ; CHECK-NEXT: bx lr
+  %x = fsub nsz double -0.0, %a
+  ret double %x
+}
+
+define float @i(float %a) {
+  ; CHECK-LABEL: i:
+  ; CHECK: eor r0, r0, #-2147483648
+  ; CHECK-NEXT: bx lr
+  %x = fsub nsz float -0.0, %a
+  ret float %x
+}
+
+declare double @llvm.fabs.f64(double) readnone
+declare float @llvm.fabs.f32(float) readnone
diff --git a/test/CodeGen/ARM/special-reg-mcore.ll b/test/CodeGen/ARM/special-reg-mcore.ll
index 686da0f6b839..45e6db9e78fe 100644
--- a/test/CodeGen/ARM/special-reg-mcore.ll
+++ b/test/CodeGen/ARM/special-reg-mcore.ll
@@ -3,7 +3,7 @@
 ; RUN: not llc < %s -mtriple=arm-none-eabi -mcpu=cortex-a8 2>&1 | FileCheck %s --check-prefix=ACORE
 
 ; ACORE: LLVM ERROR: Invalid register name "control".
-; M3CORE: LLVM ERROR: Invalid register name "control".
+; M3CORE: LLVM ERROR: Invalid register name "xpsr_nzcvqg".
 
 define i32 @read_mclass_registers() nounwind {
 entry:
diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll
index 1a102e3d971f..845018ebb0e7 100644
--- a/test/CodeGen/ARM/spill-q.ll
+++ b/test/CodeGen/ARM/spill-q.ll
@@ -7,7 +7,7 @@
 %quux = type { i32 (...)**, %baz*, i32 }
 %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK-LABEL: aaa:
@@ -18,30 +18,30 @@ entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
   %vecptr = bitcast <4 x float>* %aligned_vec to i8*
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 6.300000e+01, float* undef, align 4
-  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
-  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
-  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>, <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
diff --git a/test/CodeGen/ARM/ssat-lower.ll b/test/CodeGen/ARM/ssat-lower.ll
new file mode 100644
index 000000000000..9f0cd0364bcf
--- /dev/null
+++ b/test/CodeGen/ARM/ssat-lower.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument < lower-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.ssat
+define i32 @ssat1() nounwind {
+  %tmp = call i32 @llvm.arm.ssat(i32 128, i32 0)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/ssat-upper.ll b/test/CodeGen/ARM/ssat-upper.ll
new file mode 100644
index 000000000000..e53f82b3efa3
--- /dev/null
+++ b/test/CodeGen/ARM/ssat-upper.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument > upper-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.ssat
+define i32 @ssat1() nounwind {
+  %tmp = call i32 @llvm.arm.ssat(i32 128, i32 33)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.arm.ssat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/subtarget-no-movt.ll b/test/CodeGen/ARM/subtarget-no-movt.ll
new file mode 100644
index 000000000000..cb61bde3f9c0
--- /dev/null
+++ b/test/CodeGen/ARM/subtarget-no-movt.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - | FileCheck -check-prefix=NO-OPTION %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -mattr=-no-movt | FileCheck -check-prefix=USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -mattr=+no-movt | FileCheck -check-prefix=NO-USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 | FileCheck -check-prefix=NO-OPTION %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 -mattr=-no-movt | FileCheck -check-prefix=USE-MOVT %s
+; RUN: llc -march thumb -mcpu=cortex-a8 -relocation-model=static %s -o - -O0 -mattr=+no-movt | FileCheck -check-prefix=NO-USE-MOVT %s
+
+; NO-OPTION-LABEL: {{_?}}foo0
+; NO-OPTION: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-OPTION: [[L0]]:
+; NO-OPTION: .long 2296237089
+
+; USE-MOVT-LABEL: {{_?}}foo0
+; USE-MOVT: movw [[R0:r[0-9]+]], #52257
+; USE-MOVT: movt [[R0]], #35037
+
+; NO-USE-MOVT-LABEL: {{_?}}foo0
+; NO-USE-MOVT: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-USE-MOVT: [[L0]]:
+; NO-USE-MOVT: .long 2296237089
+
+define i32 @foo0(i32 %a) #0 {
+  %1 = xor i32 -1998730207, %a
+  ret i32 %1
+}
+
+; NO-OPTION-LABEL: {{_?}}foo1
+; NO-OPTION: movw [[R0:r[0-9]+]], #52257
+; NO-OPTION: movt [[R0]], #35037
+
+; USE-MOVT-LABEL: {{_?}}foo1
+; USE-MOVT: movw [[R0:r[0-9]+]], #52257
+; USE-MOVT: movt [[R0]], #35037
+
+; NO-USE-MOVT-LABEL: {{_?}}foo1
+; NO-USE-MOVT: ldr [[R0:r[0-9]+]], [[L0:.*]]
+; NO-USE-MOVT: [[L0]]:
+; NO-USE-MOVT: .long 2296237089
+
+define i32 @foo1(i32 %a) {
+  %1 = xor i32 -1998730207, %a
+  ret i32 %1
+}
+
+attributes #0 = { "target-features"="+no-movt" }
diff --git a/test/CodeGen/ARM/tail-merge-branch-weight.ll b/test/CodeGen/ARM/tail-merge-branch-weight.ll
index 95b0a202e7ff..f83f28815793 100644
--- a/test/CodeGen/ARM/tail-merge-branch-weight.ll
+++ b/test/CodeGen/ARM/tail-merge-branch-weight.ll
@@ -9,7 +9,7 @@
 ;                = 0.2 * 0.4 + 0.8 * 0.7 = 0.64
 
 ; CHECK: # Machine code for function test0:
-; CHECK: Successors according to CFG: BB#{{[0-9]+}}(13) BB#{{[0-9]+}}(24)
+; CHECK: Successors according to CFG: BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}20.00%) BB#{{[0-9]+}}({{[0-9a-fx/= ]+}}80.00%)
 ; CHECK: BB#{{[0-9]+}}:
 ; CHECK: BB#{{[0-9]+}}:
 ; CHECK: # End machine code for function test0.
diff --git a/test/CodeGen/ARM/taildup-branch-weight.ll b/test/CodeGen/ARM/taildup-branch-weight.ll
index 64e0f4bcdefc..6f8d245e74a0 100644
--- a/test/CodeGen/ARM/taildup-branch-weight.ll
+++ b/test/CodeGen/ARM/taildup-branch-weight.ll
@@ -3,7 +3,7 @@
 ; RUN:	| FileCheck %s
 
 ; CHECK: Machine code for function test0:
-; CHECK: Successors according to CFG: BB#1(4) BB#2(124)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}3.12%) BB#2({{[0-9a-fx/= ]+}}96.88%)
 
 define void @test0(i32 %a, i32 %b, i32* %c, i32* %d) {
 entry:
@@ -30,7 +30,7 @@ B4:
 !0 = !{!"branch_weights", i32 4, i32 124}
 
 ; CHECK: Machine code for function test1:
-; CHECK: Successors according to CFG: BB#1(8) BB#2(248)
+; CHECK: Successors according to CFG: BB#2(0x7c000000 / 0x80000000 = 96.88%) BB#1(0x04000000 / 0x80000000 = 3.12%)
 
 @g0 = common global i32 0, align 4
 
diff --git a/test/CodeGen/ARM/test-sharedidx.ll b/test/CodeGen/ARM/test-sharedidx.ll
index 377996c4c3c8..db32f18d82c0 100644
--- a/test/CodeGen/ARM/test-sharedidx.ll
+++ b/test/CodeGen/ARM/test-sharedidx.ll
@@ -20,8 +20,8 @@ entry:
 
 for.body:                                         ; preds = %entry, %for.body.3
 ; CHECK: %for.body
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
   %i.09 = phi i32 [ %add5.3, %for.body.3 ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i8, i8* %a, i32 %i.09
   %0 = load i8, i8* %arrayidx, align 1
@@ -42,8 +42,8 @@ for.end:                                          ; preds = %for.body, %for.body
 
 for.body.1:                                       ; preds = %for.body
 ; CHECK: %for.body.1
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
   %arrayidx.1 = getelementptr inbounds i8, i8* %a, i32 %add5
   %2 = load i8, i8* %arrayidx.1, align 1
   %conv6.1 = zext i8 %2 to i32
@@ -59,9 +59,6 @@ for.body.1:                                       ; preds = %for.body
   br i1 %cmp.1, label %for.body.2, label %for.end
 
 for.body.2:                                       ; preds = %for.body.1
-; CHECK: %for.body.2
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
   %arrayidx.2 = getelementptr inbounds i8, i8* %a, i32 %add5.1
   %4 = load i8, i8* %arrayidx.2, align 1
   %conv6.2 = zext i8 %4 to i32
@@ -78,8 +75,8 @@ for.body.2:                                       ; preds = %for.body.1
 
 for.body.3:                                       ; preds = %for.body.2
 ; CHECK: %for.body.3
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
-; CHECK: ldrb {{r[0-9]|lr}}, [{{r[0-9]|lr}}, {{r[0-9]|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
+; CHECK: ldrb {{r[0-9]+|lr}}, [{{r[0-9]+|lr}}, {{r[0-9]+|lr}}]!
   %arrayidx.3 = getelementptr inbounds i8, i8* %a, i32 %add5.2
   %6 = load i8, i8* %arrayidx.3, align 1
   %conv6.3 = zext i8 %6 to i32
diff --git a/test/CodeGen/ARM/thumb-alignment.ll b/test/CodeGen/ARM/thumb-alignment.ll
index c11d4b6da3c9..b9ddfbb714d1 100644
--- a/test/CodeGen/ARM/thumb-alignment.ll
+++ b/test/CodeGen/ARM/thumb-alignment.ll
@@ -23,7 +23,7 @@ define i32* @bar() {
 
 ; CHECK: .globl	baz
 ; CHECK-NEXT: .align	2
-; CHECK: adr.w
+; CHECK: tbb
 define i32 @baz() {
   %1 = load i32, i32* @c, align 4
   switch i32 %1, label %7 [
diff --git a/test/CodeGen/ARM/thumb1-ldst-opt.ll b/test/CodeGen/ARM/thumb1-ldst-opt.ll
new file mode 100644
index 000000000000..eb82385de0c5
--- /dev/null
+++ b/test/CodeGen/ARM/thumb1-ldst-opt.ll
@@ -0,0 +1,27 @@
+; RUN: llc -stop-after block-placement -o /dev/null %s | FileCheck %s
+
+target triple = "thumbv6m-none-none"
+
+define i32* @foo(i32* readonly %p0) {
+entry:
+  %add.ptr = getelementptr inbounds i32, i32* %p0, i32 10
+  %arrayidx = getelementptr inbounds i32, i32* %p0, i32 13
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %p0, i32 12
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %p0, i32 11
+  %2 = load i32, i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %add, %2
+  %3 = load i32, i32* %add.ptr, align 4
+  %add5 = add nsw i32 %add3, %3
+  tail call void @g(i32 %add5)
+  ret i32* %p0
+}
+
+declare void @g(i32)
+
+; CHECK-LABEL: name: foo
+; CHECK: [[BASE:%r[0-7]]], {{.*}} tADDi8
+; CHECK-NOT: [[BASE]] = tLDMIA_UPD {{.*}} [[BASE]]
+; CHECK: tLDMIA killed [[BASE]], {{.*}} def [[BASE]]
diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll
index 9c62faeaa684..67d1cad2cf68 100644
--- a/test/CodeGen/ARM/thumb1_return_sequence.ll
+++ b/test/CodeGen/ARM/thumb1_return_sequence.ll
@@ -23,11 +23,22 @@ entry:
 ; --------
 ; CHECK-V4T:         add sp,
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    mov r12, r3
-; CHECK-V4T-NEXT:    pop {r3}
-; CHECK-V4T-NEXT:    mov lr, r3
-; CHECK-V4T-NEXT:    mov r3, r12
-; CHECK-V4T:         bx  lr
+; The ISA for v4 does not support pop pc, so make sure we do not emit
+; one even when we do not need to update SP.
+; CHECK-V4T-NOT:     pop {pc}
+; We may only use lo register to pop, but in that case, all the scratch
+; ones are used.
+; r12 is the only register we are allowed to clobber for AAPCS.
+; Use it to save a lo register.
+; CHECK-V4T-NEXT:    mov [[TEMP_REG:r12]], [[POP_REG:r[0-7]]]
+; Pop the value of LR.
+; CHECK-V4T-NEXT:    pop {[[POP_REG]]}
+; Copy the value of LR in the right register.
+; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
+; Restore the value that was in the register we used to pop the value of LR.
+; CHECK-V4T-NEXT:    mov [[POP_REG]], [[TEMP_REG]]
+; Return.
+; CHECK-V4T-NEXT:    bx lr
 ; CHECK-V5T:         pop {[[SAVED]], pc}
 }
 
@@ -53,19 +64,19 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:         pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    mov r12, r3
-; CHECK-V4T-NEXT:    pop {r3}
+; CHECK-V4T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V4T-NEXT:    pop {[[POP_REG]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    mov lr, r3
-; CHECK-V4T-NEXT:    mov r3, r12
+; CHECK-V4T-NEXT:    mov lr, [[POP_REG]]
+; CHECK-V4T-NEXT:    mov [[POP_REG]], r12
 ; CHECK-V4T:         bx  lr
 ; CHECK-V5T:         add sp,
 ; CHECK-V5T-NEXT:    pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    mov r12, r3
-; CHECK-V5T-NEXT:    pop {r3}
+; CHECK-V5T-NEXT:    mov r12, [[POP_REG:r[0-7]]]
+; CHECK-V5T-NEXT:    pop {[[POP_REG]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    mov lr, r3
-; CHECK-V5T-NEXT:    mov r3, r12
+; CHECK-V5T-NEXT:    mov lr, [[POP_REG]]
+; CHECK-V5T-NEXT:    mov [[POP_REG]], r12
 ; CHECK-V5T-NEXT:    bx lr
 }
 
@@ -95,8 +106,13 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:    pop {[[SAVED]]}
-; CHECK-V4T:    pop {r3}
-; CHECK-V4T:    bx r3
+; The ISA for v4 does not support pop pc, so make sure we do not emit
+; one even when we do not need to update SP.
+; CHECK-V4T-NOT:     pop {pc}
+; Pop the value of LR into a scratch lo register other than r0 (it is
+; used for the return value).
+; CHECK-V4T-NEXT:    pop {[[POP_REG:r[1-3]]]}
+; CHECK-V4T-NEXT:    bx [[POP_REG]]
 ; CHECK-V5T:    pop {[[SAVED]], pc}
 }
 
@@ -148,14 +164,18 @@ entry:
 ; --------
 ; CHECK-V4T:         add sp,
 ; CHECK-V4T-NEXT:    pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    bx r3
+; CHECK-V4T-NEXT:    bx [[POP_REG]]
 ; CHECK-V5T:         add sp,
 ; CHECK-V5T-NEXT:    pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    bx r3
+; CHECK-V5T-NEXT:    bx [[POP_REG]]
 }
 
 ; CHECK-V4T-LABEL: noframe
@@ -191,13 +211,17 @@ entry:
 ; Epilogue
 ; --------
 ; CHECK-V4T:         pop {[[SAVED]]}
-; CHECK-V4T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V4T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V4T-NEXT:    add sp,
-; CHECK-V4T-NEXT:    bx r3
+; CHECK-V4T-NEXT:    bx [[POP_REG]]
 ; CHECK-V5T:         pop {[[SAVED]]}
-; CHECK-V5T-NEXT:    pop {r3}
+; Only r1 to r3 are available to pop LR.
+; r0 is used for the return value.
+; CHECK-V5T-NEXT:    pop {[[POP_REG:r[1-3]]]}
 ; CHECK-V5T-NEXT:    add sp,
-; CHECK-V5T-NEXT:    bx r3
+; CHECK-V5T-NEXT:    bx [[POP_REG]]
 }
 
 declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/ARM/thumb2-it-block.ll b/test/CodeGen/ARM/thumb2-it-block.ll
index 2675a733da97..aaefc0a14863 100644
--- a/test/CodeGen/ARM/thumb2-it-block.ll
+++ b/test/CodeGen/ARM/thumb2-it-block.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck -check-prefix CHECK-V7 %s
-; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s -check-prefix CHECK-V8
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2 %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8 %s -o - | FileCheck %s
 ; PR11107
 
 define i32 @test(i32 %a, i32 %b) {
@@ -14,17 +14,13 @@ entry:
  ret i32 %add
 }
 
-; CHECK-V7:        cmp
-; CHECK-V7-NEXT:   it    mi
-; CHECK-V7-NEXT:   rsbmi
-; CHECK-V7-NEXT:   cmp
-; CHECK-V7-NEXT:   it    mi
-; CHECK-V7-NEXT:   rsbmi
+; CHECK:        cmp
+; CHECK-NEXT:   it    mi
+; We shouldn't need to check for the extra 's' here; tRSB should be printed as
+; "rsb" inside an IT block, not "rsbs".
+; CHECK-NEXT:   rsb{{s?}}mi
+; CHECK-NEXT:   cmp
+; CHECK-NEXT:   it    mi
+; CHECK-NEXT:   rsb{{s?}}mi
 
-; CHECK-V8:        cmp
-; CHECK-V8-NEXT:   bpl
-; CHECK-V8:        rsbs
-; CHECK-V8:        cmp
-; CHECK-V8-NEXT:   bpl
-; CHECK-V8:        rsbs
 
diff --git a/test/CodeGen/ARM/thumb_indirect_calls.ll b/test/CodeGen/ARM/thumb_indirect_calls.ll
index 9f1950c743c0..67346c6fde9c 100644
--- a/test/CodeGen/ARM/thumb_indirect_calls.ll
+++ b/test/CodeGen/ARM/thumb_indirect_calls.ll
@@ -3,7 +3,7 @@
 
 @f = common global void (i32)* null, align 4
 
-; CHECK-LABEL foo:
+; CHECK-LABEL: foo:
 define void @foo(i32 %x) {
 entry:
   %0 = load void (i32)*, void (i32)** @f, align 4
@@ -21,7 +21,7 @@ entry:
 ; CHECK-V5T: blx [[CALLEE]]
 }
 
-; CHECK-LABEL bar:
+; CHECK-LABEL: bar:
 define void @bar(void (i32)* nocapture %g, i32 %x, void (i32)* nocapture %h) {
 entry:
   tail call void %g(i32 %x)
@@ -37,4 +37,3 @@ entry:
 ; CHECK-V5T: blx
 ; CHECK-V5T: blx
 }
-
diff --git a/test/CodeGen/ARM/tls-models.ll b/test/CodeGen/ARM/tls-models.ll
index 42c1ba911028..f3c58f74ebf7 100644
--- a/test/CodeGen/ARM/tls-models.ll
+++ b/test/CodeGen/ARM/tls-models.ll
@@ -1,5 +1,11 @@
-; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s | FileCheck -check-prefix=CHECK-NONPIC %s
-; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s | FileCheck -check-prefix=CHECK-PIC %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi < %s \
+; RUN:     | FileCheck -check-prefix=CHECK-NONPIC -check-prefix=COMMON %s
+; RUN: llc -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \
+; RUN:     | FileCheck -check-prefix=CHECK-PIC  -check-prefix=COMMON %s
+; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi < %s \
+; RUN:     | FileCheck -check-prefix=EMUNONPIC -check-prefix=EMU -check-prefix=COMMON %s
+; RUN: llc -emulated-tls -march=arm -mtriple=arm-linux-gnueabi -relocation-model=pic < %s \
+; RUN:     | FileCheck -check-prefix=EMUPIC -check-prefix=EMU -check-prefix=COMMON %s
 
 
 @external_gd = external thread_local global i32
@@ -20,23 +26,23 @@ define i32* @f1() {
 entry:
   ret i32* @external_gd
 
+  ; COMMON-LABEL:   f1:
   ; Non-PIC code can use initial-exec, PIC code has to use general dynamic.
-  ; CHECK-NONPIC-LABEL:   f1:
   ; CHECK-NONPIC:   external_gd(GOTTPOFF)
-  ; CHECK-PIC-LABEL:      f1:
   ; CHECK-PIC:      external_gd(TLSGD)
+  ; EMU:            __emutls_get_address
 }
 
 define i32* @f2() {
 entry:
   ret i32* @internal_gd
 
+  ; COMMON-LABEL:   f2:
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
-  ; CHECK-NONPIC-LABEL:   f2:
   ; CHECK-NONPIC:   internal_gd(TPOFF)
-  ; CHECK-PIC-LABEL:      f2:
   ; CHECK-PIC:      internal_gd(TLSGD)
+  ; EMU:            __emutls_get_address
 }
 
 
@@ -46,24 +52,24 @@ define i32* @f3() {
 entry:
   ret i32* @external_ld
 
+  ; COMMON-LABEL:   f3:
   ; Non-PIC code can use initial exec, PIC should use local dynamic,
   ; but that is not implemented, so falls back to general dynamic.
-  ; CHECK-NONPIC-LABEL:   f3:
   ; CHECK-NONPIC:   external_ld(GOTTPOFF)
-  ; CHECK-PIC-LABEL:      f3:
   ; CHECK-PIC:      external_ld(TLSGD)
+  ; EMU:            __emutls_get_address
 }
 
 define i32* @f4() {
 entry:
   ret i32* @internal_ld
 
+  ; COMMON-LABEL:   f4:
   ; Non-PIC code can use local exec, PIC code can use local dynamic,
   ; but that is not implemented, so it falls back to general dynamic.
-  ; CHECK-NONPIC-LABEL:   f4:
   ; CHECK-NONPIC:   internal_ld(TPOFF)
-  ; CHECK-PIC-LABEL:      f4:
   ; CHECK-PIC:      internal_ld(TLSGD)
+  ; EMU:            __emutls_get_address
 }
 
 
@@ -73,22 +79,22 @@ define i32* @f5() {
 entry:
   ret i32* @external_ie
 
+  ; COMMON-LABEL:   f5:
   ; Non-PIC and PIC code will use initial exec as specified.
-  ; CHECK-NONPIC-LABEL:   f5:
   ; CHECK-NONPIC:   external_ie(GOTTPOFF)
-  ; CHECK-PIC-LABEL:      f5:
   ; CHECK-PIC:      external_ie(GOTTPOFF)
+  ; EMU:            __emutls_get_address
 }
 
 define i32* @f6() {
 entry:
   ret i32* @internal_ie
 
+  ; COMMON-LABEL:   f6:
   ; Non-PIC code can use local exec, PIC code use initial exec as specified.
-  ; CHECK-NONPIC-LABEL:   f6:
   ; CHECK-NONPIC:   internal_ie(TPOFF)
-  ; CHECK-PIC-LABEL:      f6:
   ; CHECK-PIC:      internal_ie(GOTTPOFF)
+  ; EMU:            __emutls_get_address
 }
 
 
@@ -98,20 +104,52 @@ define i32* @f7() {
 entry:
   ret i32* @external_le
 
+  ; COMMON-LABEL:   f7:
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC-LABEL:   f7:
   ; CHECK-NONPIC:   external_le(TPOFF)
-  ; CHECK-PIC-LABEL:      f7:
   ; CHECK-PIC:      external_le(TPOFF)
+  ; EMU:            __emutls_get_address
 }
 
 define i32* @f8() {
 entry:
   ret i32* @internal_le
 
+  ; COMMON-LABEL:   f8:
   ; Non-PIC and PIC code will use local exec as specified.
-  ; CHECK-NONPIC-LABEL:   f8:
   ; CHECK-NONPIC:   internal_le(TPOFF)
-  ; CHECK-PIC-LABEL:      f8:
   ; CHECK-PIC:      internal_le(TPOFF)
+  ; EMU:            __emutls_get_address
 }
+
+
+; ----- emulated specified -----
+
+; External declaration has no initializer.
+; Internal definition has initializer.
+
+; EMU-NOT:   __emutls_t.external_gd
+; EMU-NOT:   __emutls_v.external_gd
+; EMU:       .align 2
+; EMU-LABEL: __emutls_v.internal_gd:
+; EMU-NEXT:  .long 4
+; EMU-NEXT:  .long 4
+; EMU-NEXT:  .long 0
+; EMU-NEXT:  .long __emutls_t.internal_gd
+; EMU-LABEL: __emutls_t.internal_gd:
+; EMU-NEXT:  .long 42
+; EMU-NOT:   __emutls_t.external_gd
+
+; __emutls_t and __emutls_v are the same for PIC and non-PIC modes.
+
+; EMU-NOT:   __emutls_t.external_gd
+; EMU-NOT:   __emutls_v.external_gd
+; EMU:       .align 2
+; EMU-LABEL: __emutls_v.internal_le:
+; EMU-NEXT:  .long 4
+; EMU-NEXT:  .long 4
+; EMU-NEXT:  .long 0
+; EMU-NEXT:  .long __emutls_t.internal_le
+; EMU-LABEL: __emutls_t.internal_le:
+; EMU-NEXT:  .long 42
+; EMU-NOT:   __emutls_t.external_le
diff --git a/test/CodeGen/ARM/tls3.ll b/test/CodeGen/ARM/tls3.ll
index 7e17b13a3c99..94cadeedd938 100644
--- a/test/CodeGen/ARM/tls3.ll
+++ b/test/CodeGen/ARM/tls3.ll
@@ -1,11 +1,34 @@
 ; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
 ; RUN:     grep "tbss"
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=NOEMU
+; RUN: llc < %s -emulated-tls -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     FileCheck %s -check-prefix=CHECK -check-prefix=EMU
 
 %struct.anon = type { i32, i32 }
-@teste = internal thread_local global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=1]
+@teste = internal thread_local global %struct.anon zeroinitializer ; <%struct.anon*> [#uses=1]
 
 define i32 @main() {
 entry:
-	%tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
-	ret i32 %tmp2
+  %tmp2 = load i32, i32* getelementptr (%struct.anon, %struct.anon* @teste, i32 0, i32 0), align 8 ; <i32> [#uses=1]
+  ret i32 %tmp2
 }
+
+; CHECK-LABEL: main:
+; NOEMU-NOT:   __emutls_get_address
+
+; NOEMU:       .section .tbss
+; NOEMU-LABEL: teste:
+; NOEMU-NEXT:  .zero 8
+
+; CHECK-NOT: __emutls_t.teste
+
+; EMU:       .align 2
+; EMU-LABEL: __emutls_v.teste:
+; EMU-NEXT:  .long 8
+; EMU-NEXT:  .long 4
+; EMU-NEXT:  .long 0
+; EMU-NEXT:  .long 0
+
+; CHECK-NOT: teste:
+; CHECK-NOT: __emutls_t.teste
diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll
index 0be3917ffa26..4e16bda6c4d9 100644
--- a/test/CodeGen/ARM/unaligned_load_store.ll
+++ b/test/CodeGen/ARM/unaligned_load_store.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source %s -o - \
+; RUN: llc -mtriple=arm-eabi -pre-RA-sched=source -mattr=+strict-align %s -o - \
 ; RUN:	| FileCheck %s -check-prefix=EXPANDED
 
-; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source %s -o - \
+; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon,+strict-align -pre-RA-sched=source %s -o - \
 ; RUN:	| FileCheck %s -check-prefix=EXPANDED
 
 ; RUN: llc -mtriple=armv6-apple-darwin -mcpu=cortex-a8 %s -o - \
diff --git a/test/CodeGen/ARM/unaligned_load_store_vfp.ll b/test/CodeGen/ARM/unaligned_load_store_vfp.ll
new file mode 100644
index 000000000000..90d17e19c286
--- /dev/null
+++ b/test/CodeGen/ARM/unaligned_load_store_vfp.ll
@@ -0,0 +1,98 @@
+; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s
+
+define float @test_load_s32_float(i32* %addr) {
+; CHECK-LABEL: test_load_s32_float:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f32.s32 s0, [[RES_INT]]
+
+  %val = load i32, i32* %addr, align 1
+  %res = sitofp i32 %val to float
+  ret float %res
+}
+
+define double @test_load_s32_double(i32* %addr) {
+; CHECK-LABEL: test_load_s32_double:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f64.s32 d0, [[RES_INT]]
+
+  %val = load i32, i32* %addr, align 1
+  %res = sitofp i32 %val to double
+  ret double %res
+}
+
+define float @test_load_u32_float(i32* %addr) {
+; CHECK-LABEL: test_load_u32_float:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f32.u32 s0, [[RES_INT]]
+
+  %val = load i32, i32* %addr, align 1
+  %res = uitofp i32 %val to float
+  ret float %res
+}
+
+define double @test_load_u32_double(i32* %addr) {
+; CHECK-LABEL: test_load_u32_double:
+; CHECK: ldr [[TMP:r[0-9]+]], [r0]
+; CHECK: vmov [[RES_INT:s[0-9]+]], [[TMP]]
+; CHECK: vcvt.f64.u32 d0, [[RES_INT]]
+
+  %val = load i32, i32* %addr, align 1
+  %res = uitofp i32 %val to double
+  ret double %res
+}
+
+define void @test_store_f32(float %in, float* %addr) {
+; CHECK-LABEL: test_store_f32:
+; CHECK: vmov [[TMP:r[0-9]+]], s0
+; CHECK: str [[TMP]], [r0]
+
+  store float %in, float* %addr, align 1
+  ret void
+}
+
+define void @test_store_float_s32(float %in, i32* %addr) {
+; CHECK-LABEL: test_store_float_s32:
+; CHECK: vcvt.s32.f32 [[TMP:s[0-9]+]], s0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+  %val = fptosi float %in to i32
+  store i32 %val, i32* %addr, align 1
+  ret void
+}
+
+define void @test_store_double_s32(double %in, i32* %addr) {
+; CHECK-LABEL: test_store_double_s32:
+; CHECK: vcvt.s32.f64 [[TMP:s[0-9]+]], d0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+  %val = fptosi double %in to i32
+  store i32 %val, i32* %addr, align 1
+  ret void
+}
+
+define void @test_store_float_u32(float %in, i32* %addr) {
+; CHECK-LABEL: test_store_float_u32:
+; CHECK: vcvt.u32.f32 [[TMP:s[0-9]+]], s0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+  %val = fptoui float %in to i32
+  store i32 %val, i32* %addr, align 1
+  ret void
+}
+
+define void @test_store_double_u32(double %in, i32* %addr) {
+; CHECK-LABEL: test_store_double_u32:
+; CHECK: vcvt.u32.f64 [[TMP:s[0-9]+]], d0
+; CHECK: vmov [[TMP_INT:r[0-9]+]], [[TMP]]
+; CHECK: str [[TMP_INT]], [r0]
+
+  %val = fptoui double %in to i32
+  store i32 %val, i32* %addr, align 1
+  ret void
+}
diff --git a/test/CodeGen/ARM/usat-lower.ll b/test/CodeGen/ARM/usat-lower.ll
new file mode 100644
index 000000000000..58d3bba5a1f8
--- /dev/null
+++ b/test/CodeGen/ARM/usat-lower.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument < lower-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat
+define i32 @usat1() nounwind {
+  %tmp = call i32 @llvm.arm.usat(i32 128, i32 -1)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/usat-upper.ll b/test/CodeGen/ARM/usat-upper.ll
new file mode 100644
index 000000000000..84ad694725b4
--- /dev/null
+++ b/test/CodeGen/ARM/usat-upper.ll
@@ -0,0 +1,11 @@
+; RUN: not llc < %s -O1 -mtriple=armv6-none-none-eabi 2>&1 | FileCheck %s
+; RUN: not llc < %s -O1 -mtriple=thumbv7-none-none-eabi 2>&1 | FileCheck %s
+
+; immediate argument > upper-bound
+; CHECK: LLVM ERROR: Cannot select: intrinsic %llvm.arm.usat
+define i32 @usat1() nounwind {
+  %tmp = call i32 @llvm.arm.usat(i32 128, i32 32)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.arm.usat(i32, i32) nounwind readnone
diff --git a/test/CodeGen/ARM/v7k-abi-align.ll b/test/CodeGen/ARM/v7k-abi-align.ll
new file mode 100644
index 000000000000..e9b67f22edf2
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-abi-align.ll
@@ -0,0 +1,152 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+%struct = type { i8, i64, i8, double, i8, <2 x float>, i8, <4 x float> }
+
+define i32 @test_i64_align() {
+; CHECK-LABEL: test_i64_align:
+; CHECL: movs r0, #8
+  ret i32 ptrtoint(i64* getelementptr(%struct, %struct* null, i32 0, i32 1) to i32)
+}
+
+define i32 @test_f64_align() {
+; CHECK-LABEL: test_f64_align:
+; CHECL: movs r0, #24
+  ret i32 ptrtoint(double* getelementptr(%struct, %struct* null, i32 0, i32 3) to i32)
+}
+
+define i32 @test_v2f32_align() {
+; CHECK-LABEL: test_v2f32_align:
+; CHECL: movs r0, #40
+  ret i32 ptrtoint(<2 x float>* getelementptr(%struct, %struct* null, i32 0, i32 5) to i32)
+}
+
+define i32 @test_v4f32_align() {
+; CHECK-LABEL: test_v4f32_align:
+; CHECL: movs r0, #64
+  ret i32 ptrtoint(<4 x float>* getelementptr(%struct, %struct* null, i32 0, i32 7) to i32)
+}
+
+; Key point here is than an extra register has to be saved so that the DPRs end
+; up in an aligned location (as prologue/epilogue inserter had calculated).
+define void @test_dpr_unwind_align() {
+; CHECK-LABEL: test_dpr_unwind_align:
+; CHECK: push {r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -24
+; CHECK: .cfi_offset d8, -32
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK-NOT: add sp,
+; CHECK: pop {r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r6},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; This time, there's no viable way to tack CS-registers onto the list: a real SP
+; adjustment needs to be performed to put d8 and d9 where they should be.
+define void @test_dpr_unwind_align_manually() {
+; CHECK-LABEL: test_dpr_unwind_align_manually:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK-NOT: sub sp
+; CHECK: push.w {r8, r11}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -40
+; CHECK: .cfi_offset d8, -48
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK-NOT: add sp,
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop.w {r8, r11}
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; If there's only a CS1 area, the sub should be in the right place:
+define void @test_dpr_unwind_align_just_cs1() {
+; CHECK-LABEL: test_dpr_unwind_align_just_cs1:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #4
+; CHECK: vpush {d8, d9}
+; CHECK: .cfi_offset d9, -32
+; CHECK: .cfi_offset d8, -40
+; CHECK: sub sp, #8
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #8
+; CHECK: vpop {d8, d9}
+; CHECK: add sp, #4
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{d8},~{d9}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; If there are no DPRs, we shouldn't try to align the stack in stages anyway
+define void @test_dpr_unwind_align_no_dprs() {
+; CHECK-LABEL: test_dpr_unwind_align_no_dprs:
+; CHECK: push {r4, r5, r6, r7, lr}
+; CHECK: sub sp, #12
+; [...]
+; CHECK: bl _test_i64_align
+; CHECK: add sp, #12
+; CHECK: pop {r4, r5, r6, r7, pc}
+
+  call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"()
+
+  ; Whatever
+  call i32 @test_i64_align()
+  ret void
+}
+
+; 128-bit vectors should use 128-bit (i.e. correctly aligned) slots on
+; the stack.
+define <4 x float> @test_v128_stack_pass([8 x double], float, <4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vld1.64 {d0, d1}, [r[[ADDR]]:128]
+
+  ret <4 x float> %in
+}
+
+declare void @varargs(i32, ...)
+
+; When varargs are enabled, we go down a different route. Still want 128-bit
+; alignment though.
+define void @test_v128_stack_pass_varargs(<4 x float> %in) {
+; CHECK-LABEL: test_v128_stack_pass_varargs:
+; CHECK: add r[[ADDR:[0-9]+]], sp, #16
+; CHECK: vst1.64 {d0, d1}, [r[[ADDR]]:128]
+
+  call void(i32, ...) @varargs(i32 undef, [3 x i32] undef, float undef, <4 x float> %in)
+  ret void
+}
+
+; To be compatible with AAPCS's va_start model (store r0-r3 at incoming SP, give
+; a single pointer), 64-bit quantities must be pass
+define i64 @test_64bit_gpr_align(i32, i64 %r2_r3, i32 %sp) {
+; CHECK-LABEL: test_64bit_gpr_align:
+; CHECK: ldr [[RHS:r[0-9]+]], [sp]
+; CHECK: adds r0, [[RHS]], r2
+; CHECK: adc r1, r3, #0
+
+  %ext = zext i32 %sp to i64
+  %sum = add i64 %ext, %r2_r3
+  ret i64 %sum
+}
diff --git a/test/CodeGen/ARM/v7k-libcalls.ll b/test/CodeGen/ARM/v7k-libcalls.ll
new file mode 100644
index 000000000000..a1dfb07ca614
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-libcalls.ll
@@ -0,0 +1,154 @@
+; RUN: llc -mtriple=armv7k-apple-watchos2.0 -mcpu=cortex-a7 < %s | FileCheck %s
+
+define arm_aapcs_vfpcc float @t1(float %a, float %b) {
+entry:
+; CHECK: t1
+; CHECK-NOT: vmov
+; CHECK: vadd.f32
+  %a.addr = alloca float, align 4
+  %b.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  store float %b, float* %b.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %1 = load float, float* %b.addr, align 4
+  %add = fadd float %0, %1
+  ret float %add
+}
+
+define arm_aapcs_vfpcc double @t2(double %a, double %b) {
+entry:
+; CHECK: t2
+; CHECK-NOT: vmov
+; CHECK: vadd.f64
+  %a.addr = alloca double, align 8
+  %b.addr = alloca double, align 8
+  store double %a, double* %a.addr, align 8
+  store double %b, double* %b.addr, align 8
+  %0 = load double, double* %a.addr, align 8
+  %1 = load double, double* %b.addr, align 8
+  %add = fadd double %0, %1
+  ret double %add
+}
+
+define arm_aapcs_vfpcc i64 @t3(double %ti) {
+entry:
+; CHECK-LABEL: t3:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixunsdfdi
+  %conv = fptoui double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc i64 @t4(double %ti) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK-NOT: vmov
+; CHECK: bl ___fixdfdi
+  %conv = fptosi double %ti to i64
+  ret i64 %conv
+}
+
+define arm_aapcs_vfpcc double @t5(i64 %ti) {
+entry:
+; CHECK-LABEL: t5:
+; CHECK: bl ___floatundidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t6(i64 %ti) {
+entry:
+; CHECK-LABEL: t6:
+; CHECK: bl ___floatdidf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc float @t7(i64 %ti) {
+entry:
+; CHECK-LABEL: t7:
+; CHECK: bl ___floatundisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = uitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc float @t8(i64 %ti) {
+entry:
+; CHECK-LABEL: t8:
+; CHECK: bl ___floatdisf
+; CHECK-NOT: vmov
+; CHECK: pop
+  %conv = sitofp i64 %ti to float
+  ret float %conv
+}
+
+define arm_aapcs_vfpcc double @t9(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %a, float %b) {
+entry:
+; CHECK-LABEL: t9:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd float %a, %b
+  %conv = fpext float %add to double
+  ret double %conv
+}
+
+define arm_aapcs_vfpcc double @t10(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %a, float %b, double %c) {
+entry:
+; CHECK-LABEL: t10:
+; CHECK-NOT: vmov
+; CHECK: vldr
+  %add = fadd double %a, %c
+  ret double %add
+}
+
+define arm_aapcs_vfpcc float @t11(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, float %a, double %b, float %c) {
+entry:
+; CHECK-LABEL: t11:
+; CHECK: vldr
+  %add = fadd float %a, %c
+  ret float %add
+}
+
+; rdar://16039676
+define arm_aapcs_vfpcc double @t12(double %a, double %b) {
+entry:
+; CHECK-LABEL: t12:
+; CHECK: vstr
+  %add = fadd double %a, %b
+  %sub = fsub double %a, %b
+  %call = tail call arm_aapcs_vfpcc double @x(double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double %add, float 0.000000e+00, double %sub)
+  ret double %call
+}
+
+define arm_aapcs_vfpcc double @t13(double %x) {
+entry:
+; CHECK-LABEL: t13:
+; CHECK-NOT: vmov
+; CHECK: bl ___sincos_stret
+  %call = tail call arm_aapcs_vfpcc double @cos(double %x)
+  %call1 = tail call arm_aapcs_vfpcc double @sin(double %x)
+  %mul = fmul double %call, %call1
+  ret double %mul
+}
+
+define arm_aapcs_vfpcc double @t14(double %x) {
+; CHECK-LABEL: t14:
+; CHECK-NOT: vmov
+; CHECK: b ___exp10
+  %__exp10 = tail call double @__exp10(double %x) #1
+  ret double %__exp10
+}
+
+declare arm_aapcs_vfpcc double @x(double, double, double, double, double, double, double, float, double)
+declare arm_aapcs_vfpcc double @cos(double) #0
+declare arm_aapcs_vfpcc double @sin(double) #0
+declare double @__exp10(double)
+
+attributes #0 = { readnone }
+attributes #1 = { readonly }
diff --git a/test/CodeGen/ARM/v7k-sincos.ll b/test/CodeGen/ARM/v7k-sincos.ll
new file mode 100644
index 000000000000..b89d4dc8120b
--- /dev/null
+++ b/test/CodeGen/ARM/v7k-sincos.ll
@@ -0,0 +1,16 @@
+; RUN: llc -mtriple=thumbv7k-apple-watchos2.0 -o - %s | FileCheck %s
+
+declare double @sin(double) nounwind readnone
+declare double @cos(double) nounwind readnone
+
+define double @test_stret(double %in) {
+; CHECK-LABEL: test_stret:
+; CHECK: blx ___sincos_stret
+; CHECK-NOT: ldr
+; CHECK: vadd.f64 d0, d0, d1
+
+  %sin = call double @sin(double %in)
+  %cos = call double @cos(double %in)
+  %sum = fadd double %sin, %cos
+  ret double %sum
+}
diff --git a/test/CodeGen/ARM/vcge.ll b/test/CodeGen/ARM/vcge.ll
index 2cd33cf3a422..e34b3e5e365a 100644
--- a/test/CodeGen/ARM/vcge.ll
+++ b/test/CodeGen/ARM/vcge.ll
@@ -196,8 +196,8 @@ entry:
   %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %5 = trunc <8 x i16> %4 to <8 x i8>
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* undef, <8 x i8> %5, i32 1)
   unreachable
 }
 
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
diff --git a/test/CodeGen/ARM/vcombine.ll b/test/CodeGen/ARM/vcombine.ll
index 9491c15aef58..fc171889f5f8 100644
--- a/test/CodeGen/ARM/vcombine.ll
+++ b/test/CodeGen/ARM/vcombine.ll
@@ -2,11 +2,15 @@
 ; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
 
 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
-; CHECK: vcombine8
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine8
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
+; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
+
+; CHECK-BE-DAG: vmov r1, r0, d16
+; CHECK-BE-DAG: vmov r3, r2, d17
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -14,11 +18,15 @@ define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 }
 
 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
-; CHECK: vcombine16
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine16
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
+; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
+
+; CHECK-BE-DAG: vmov r1, r0, d16
+; CHECK-BE-DAG: vmov r3, r2, d17
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -26,9 +34,14 @@ define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 }
 
 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
-; CHECK: vcombine32
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
+; CHECK-LABEL: vcombine32
+
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
@@ -38,9 +51,14 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 }
 
 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK: vcombinefloat
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
+; CHECK-LABEL: vcombinefloat
+
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
 ; CHECK-BE: vmov r1, r0, d16
 ; CHECK-BE: vmov r3, r2, d17
 	%tmp1 = load <2 x float>, <2 x float>* %A
@@ -50,11 +68,15 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
 }
 
 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
-; CHECK: vcombine64
-; CHECK-LE: vmov r0, r1, d16
-; CHECK-LE: vmov r2, r3, d17
-; CHECK-BE: vmov r1, r0, d16
-; CHECK-BE: vmov r3, r2, d17
+; CHECK-LABEL: vcombine64
+; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
+; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
+
+; CHECK-LE: vmov r0, r1, [[LD0]]
+; CHECK-LE: vmov r2, r3, [[LD1]]
+
+; CHECK-BE: vmov r1, r0, [[LD0]]
+; CHECK-BE: vmov r3, r2, [[LD1]]
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
index 0c856e8d7617..9a8f084d2303 100644
--- a/test/CodeGen/ARM/vcvt_combine.ll
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -1,95 +1,64 @@
 ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
 
-@in = global float 0x400921FA00000000, align 4
-
 ; Test signed conversion.
-; CHECK: t0
-; CHECK-NOT: vmul
-define void @t0() nounwind {
-entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
-  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t0
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #2
+; CHECK: bx lr
+define <2 x i32> @t0(<2 x float> %in) {
+  %mul.i = fmul <2 x float> %in, <float 4.0, float 4.0>
   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
-  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
-  ret void
+  ret <2 x i32> %vcvt.i
 }
 
-declare void @foo_int32x2_t(<2 x i32>)
-
 ; Test unsigned conversion.
-; CHECK: t1
-; CHECK-NOT: vmul
-define void @t1() nounwind {
-entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
-  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t1
+; CHECK: vcvt.u32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #3
+; CHECK: bx lr
+define <2 x i32> @t1(<2 x float> %in) {
+  %mul.i = fmul <2 x float> %in, <float 8.0, float 8.0>
   %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
-  tail call void @foo_uint32x2_t(<2 x i32> %vcvt.i) nounwind
-  ret void
+  ret <2 x i32> %vcvt.i
 }
 
-declare void @foo_uint32x2_t(<2 x i32>)
-
 ; Test which should not fold due to non-power of 2.
-; CHECK: t2
+; CHECK-LABEL: @t2
 ; CHECK: vmul
-define void @t2() nounwind {
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bx lr
+define <2 x i32> @t2(<2 x float> %in) {
 entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
-  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
+  %mul.i = fmul <2 x float> %in, <float 0x401B333340000000, float 0x401B333340000000>
   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
-  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
-  ret void
+  ret <2 x i32> %vcvt.i
 }
 
 ; Test which should not fold due to power of 2 out of range.
-; CHECK: t3
+; CHECK-LABEL: @t3
 ; CHECK: vmul
-define void @t3() nounwind {
-entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
-  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bx lr
+define <2 x i32> @t3(<2 x float> %in) {
+  %mul.i = fmul <2 x float> %in, <float 0x4200000000000000, float 0x4200000000000000>
   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
-  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
-  ret void
+  ret <2 x i32> %vcvt.i
 }
 
 ; Test which case where const is max power of 2 (i.e., 2^32).
-; CHECK: t4
-; CHECK-NOT: vmul
-define void @t4() nounwind {
-entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
-  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+; CHECK-LABEL: @t4
+; CHECK: vcvt.s32.f32 d{{[0-9]+}}, d{{[0-9]+}}, #32
+; CHECK: bx lr
+define <2 x i32> @t4(<2 x float> %in) {
+  %mul.i = fmul <2 x float> %in, <float 0x41F0000000000000, float 0x41F0000000000000>
   %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
-  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
-  ret void
+  ret <2 x i32> %vcvt.i
 }
 
 ; Test quadword.
-; CHECK: t5
-; CHECK-NOT: vmul
-define void @t5() nounwind {
-entry:
-  %tmp = load float, float* @in, align 4
-  %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
-  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
-  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
-  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp, i32 3
-  %mul.i = fmul <4 x float> %vecinit6.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+; CHECK-LABEL: @t5
+; CHECK: vcvt.s32.f32 q{{[0-9]+}}, q{{[0-9]+}}, #3
+; CHECK: bx lr
+define <4 x i32> @t5(<4 x float> %in) {
+  %mul.i = fmul <4 x float> %in, <float 8.0, float 8.0, float 8.0, float 8.0>
   %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
-  tail call void @foo_int32x4_t(<4 x i32> %vcvt.i) nounwind
-  ret void
+  ret <4 x i32> %vcvt.i
 }
-
-declare void @foo_int32x4_t(<4 x i32>)
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
index 8c6e4ba35054..8511dbcb6876 100644
--- a/test/CodeGen/ARM/vdiv_combine.ll
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -136,3 +136,20 @@ define <2 x double> @fix_i64_to_double(<2 x i64> %in) {
     ret <2 x double> %shift
 }
 
+; Don't combine with 8 lanes.  Just make sure things don't crash.
+; CHECK-LABEL: test7
+define <8 x float> @test7(<8 x i32> %in) nounwind {
+entry:
+  %vcvt.i = sitofp <8 x i32> %in to <8 x float>
+  %div.i = fdiv <8 x float> %vcvt.i, <float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0, float 8.0>
+  ret <8 x float> %div.i
+}
+
+; Can combine splat with an undef.
+; CHECK-LABEL: test8
+; CHECK: vcvt.f32.s32 q{{[0-9]+}}, q{{[0-9]+}}, #1
+define <4 x float> @test8(<4 x i32> %in) {
+  %vcvt.i = sitofp <4 x i32> %in to <4 x float>
+  %div.i = fdiv <4 x float> %vcvt.i, <float 2.0, float 2.0, float 2.0, float undef>
+  ret <4 x float> %div.i
+}
diff --git a/test/CodeGen/ARM/vdup.ll b/test/CodeGen/ARM/vdup.ll
index 36eebbfc4650..25c4807d9862 100644
--- a/test/CodeGen/ARM/vdup.ll
+++ b/test/CodeGen/ARM/vdup.ll
@@ -364,3 +364,19 @@ define <4 x float> @check_spr_splat4_lane1(<4 x float> %p, i16 %q) {
   %sub = fsub <4 x float> %splat.splat, %p
   ret <4 x float> %sub
 }
+
+; Also make sure we don't barf on variable-index extractelts, where we almost
+; could have generated a vdup.
+
+define <8 x i8> @check_i8_varidx(<16 x i8> %v, i32 %idx) {
+; CHECK-LABEL: check_i8_varidx:
+; CHECK: mov r[[FP:[0-9]+]], sp
+; CHECK: ldr r[[IDX:[0-9]+]], [r[[FP]], #4]
+; CHECK: mov r[[SPCOPY:[0-9]+]], sp
+; CHECK: vst1.64 {d{{.*}}, d{{.*}}}, [r[[SPCOPY]]:128], r[[IDX]]
+; CHECK: vld1.8 {d{{.*}}[]}, [r[[SPCOPY]]]
+  %x = extractelement <16 x i8> %v, i32 %idx
+  %1 = insertelement  <8 x i8> undef, i8 %x, i32 0
+  %2 = insertelement  <8 x i8> %1, i8 %x, i32 1
+  ret <8 x i8> %2
+}
diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll
index 29f4bb972a24..2ef2a0697ec9 100644
--- a/test/CodeGen/ARM/vector-DAGCombine.ll
+++ b/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -78,11 +78,11 @@ entry:
   %2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %4 = trunc <8 x i16> %3 to <8 x i8>
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* undef, <8 x i8> %4, i32 1)
   unreachable
 }
 
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
 
 ; Test that loads and stores of i64 vector elements are handled as f64 values
 ; so they are not split up into i32 values.  Radar 8755338.
diff --git a/test/CodeGen/ARM/vector-load.ll b/test/CodeGen/ARM/vector-load.ll
index 17f134f458a2..a638c2bdb9be 100644
--- a/test/CodeGen/ARM/vector-load.ll
+++ b/test/CodeGen/ARM/vector-load.ll
@@ -238,12 +238,12 @@ define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
 
 define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
 ;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
-;CHECK: ldr.w   r[[PTRREG:[0-9]+]], [r0]
+;CHECK: ldr   r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
 ;CHECK: add.w   r[[INCREG:[0-9]+]], r[[PTRREG]], #16
-;CHECK: str.w   r[[INCREG]], [r0]
 ;CHECK: vmovl.u8        {{q[0-9]+}}, {{d[0-9]+}}
 ;CHECK: vmovl.u16       {{q[0-9]+}}, {{d[0-9]+}}
+;CHECK: str   r[[INCREG]], [r0]
 	%A = load <4 x i8>*, <4 x i8>** %ptr
 	%lA = load <4 x i8>, <4 x i8>* %A, align 4
 	%inc = getelementptr <4 x i8>, <4 x i8>* %A, i38 4
diff --git a/test/CodeGen/ARM/vector-store.ll b/test/CodeGen/ARM/vector-store.ll
index 30baa9a20ddc..161bbf1d0fde 100644
--- a/test/CodeGen/ARM/vector-store.ll
+++ b/test/CodeGen/ARM/vector-store.ll
@@ -228,9 +228,9 @@ define void @truncstore_v4i32tov4i8(<4 x i8>** %ptr, <4 x i32> %val) {
 ;CHECK: ldr.w   r9, [sp]
 ;CHECK: vmov    {{d[0-9]+}}, r3, r9
 ;CHECK: vmov    {{d[0-9]+}}, r1, r2
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
 ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32]
 	%A = load <4 x i8>*, <4 x i8>** %ptr
         %trunc = trunc <4 x i32> %val to <4 x i8>
@@ -243,10 +243,10 @@ define void @truncstore_v4i32tov4i8_fake_update(<4 x i8>** %ptr, <4 x i32> %val)
 ;CHECK: ldr.w   r9, [sp]
 ;CHECK: vmov    {{d[0-9]+}}, r3, r9
 ;CHECK: vmov    {{d[0-9]+}}, r1, r2
-;CHECK: movs    [[IMM16:r[0-9]+]], #16
+;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
 ;CHECK: vmovn.i32       [[VECLO:d[0-9]+]], {{q[0-9]+}}
 ;CHECK: vuzp.8  [[VECLO]], {{d[0-9]+}}
-;CHECK: ldr     r[[PTRREG:[0-9]+]], [r0]
+;CHECK: movs    [[IMM16:r[0-9]+]], #16
 ;CHECK: vst1.32 {[[VECLO]][0]}, [r[[PTRREG]]:32], [[IMM16]]
 ;CHECK: str     r[[PTRREG]], [r0]
 	%A = load <4 x i8>*, <4 x i8>** %ptr
diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll
index 72ecf0ef0626..394ecfb281fc 100644
--- a/test/CodeGen/ARM/vext.ll
+++ b/test/CodeGen/ARM/vext.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -lower-interleaved-accesses=false | FileCheck %s
 
 define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: test_vextd:
@@ -196,3 +196,35 @@ define arm_aapcscc void @test_elem_mismatch(<2 x i64>* nocapture %src, <4 x i16>
   store <4 x i16> %tmp7, <4 x i16>* %dest, align 4
   ret void
 }
+
+define <4 x i32> @test_reverse_and_extract(<2 x i32>* %A) {
+entry:
+  ; CHECK-LABEL: test_reverse_and_extract
+  ; CHECK-NOT: vtrn
+  ; CHECK: vrev
+  ; CHECK: vext
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 1, i32 0>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_dup_and_extract(<2 x i32>* %A) {
+entry:
+  ; CHECK-LABEL: test_dup_and_extract
+  ; CHECK-NOT: vtrn
+  ; CHECK: vdup
+  ; CHECK: vext
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @test_zip_and_extract(<2 x i32>* %A) {
+entry:
+  ; CHECK-LABEL: test_zip_and_extract
+  ; CHECK: vzip
+  ; CHECK: vext
+	%tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 1>
+  ret <4 x i32> %0
+}
diff --git a/test/CodeGen/ARM/vfp-reg-stride.ll b/test/CodeGen/ARM/vfp-reg-stride.ll
new file mode 100644
index 000000000000..c5339db68e30
--- /dev/null
+++ b/test/CodeGen/ARM/vfp-reg-stride.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=swift -mtriple=thumbv7s-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-STRIDE4
+; RUN: llc -mcpu=swift -mtriple=thumbv7k-apple-watchos -o - %s | FileCheck %s --check-prefix=CHECK-STRIDE4-WATCH
+; RUN: llc -mcpu=cortex-a57 -mtriple=thumbv7-linux-gnueabihf -o - %s | FileCheck %s --check-prefix=CHECK-GENERIC
+
+define void @test_reg_stride(double %a, double %b) {
+; CHECK-STRIDE4-LABEL: test_reg_stride:
+; CHECK-STRIDE4-DAG: vmov d16, r
+; CHECK-STRIDE4-DAG: vmov d18, r
+
+; CHECK-STRIDE4-WATCH-LABEL: test_reg_stride:
+; CHECK-STRIDE4-WATCH-DAG: vmov.f64 d16, d
+; CHECK-STRIDE4-WATCH-DAG: vmov.f64 d18, d
+
+; CHECK-GENERIC-LABEL: test_reg_stride:
+; CHECK-GENERIC-DAG: vmov.f64 d16, {{d[01]}}
+; CHECK-GENERIC-DAG: vmov.f64 d17, {{d[01]}}
+
+  call void asm "", "~{r0},~{r1},~{d0},~{d1}"()
+  call arm_aapcs_vfpcc void @eat_doubles(double %a, double %b)
+  ret void
+}
+
+define void @test_stride_minsize(float %a, float %b) minsize {
+; CHECK-STRIDE4-LABEL: test_stride_minsize:
+; CHECK-STRIDE4: vmov d2, {{r[01]}}
+; CHECK-STRIDE4: vmov d3, {{r[01]}}
+
+; CHECK-STRIDE4-WATCH-LABEL: test_stride_minsize:
+; CHECK-STRIDE4-WATCH-DAG: vmov.f32 s4, {{s[01]}}
+; CHECK-STRIDE4-WATCH-DAG: vmov.f32 s8, {{s[01]}}
+
+; CHECK-GENERIC-LABEL: test_stride_minsize:
+; CHECK-GENERIC-DAG: vmov.f32 s4, {{s[01]}}
+; CHECK-GENERIC-DAG: vmov.f32 s6, {{s[01]}}
+  call void asm "", "~{r0},~{r1},~{s0},~{s1},~{d0},~{d1}"()
+  call arm_aapcs_vfpcc void @eat_floats(float %a, float %b)
+  ret void
+}
+
+
+declare arm_aapcs_vfpcc void @eat_doubles(double, double)
+declare arm_aapcs_vfpcc void @eat_floats(float, float)
diff --git a/test/CodeGen/ARM/vfp-regs-dwarf.ll b/test/CodeGen/ARM/vfp-regs-dwarf.ll
index eca0c26e5562..1b2055e5aff1 100644
--- a/test/CodeGen/ARM/vfp-regs-dwarf.ll
+++ b/test/CodeGen/ARM/vfp-regs-dwarf.ll
@@ -10,7 +10,7 @@
 ; the layout of the VFP registers correctly. The fact that the numbers are
 ; monotonic in memory is also a nice property to have.
 
-define void @stack_offsets() {
+define void @stack_offsets() !dbg !4 {
 ; CHECK-LABEL: stack_offsets:
 ; CHECK: vpush {d13}
 ; CHECK: vpush {d11}
@@ -31,11 +31,11 @@ define void @stack_offsets() {
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @stack_offsets, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "tmp.c", directory: "/Users/tim/llvm/build")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/CodeGen/ARM/vld-vst-upgrade.ll b/test/CodeGen/ARM/vld-vst-upgrade.ll
new file mode 100644
index 000000000000..fe868f6cb078
--- /dev/null
+++ b/test/CodeGen/ARM/vld-vst-upgrade.ll
@@ -0,0 +1,139 @@
+; RUN: llc -mtriple=arm-eabi -mattr=+neon < %s | FileCheck %s
+
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+; vld[1234] auto-upgrade tests
+
+; CHECK-LABEL: test_vld1_upgrade:
+; CHECK: vld1.32 {d16}, [r0]
+define <2 x i32> @test_vld1_upgrade(i8* %ptr) {
+  %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %ptr, i32 1)
+  ret <2 x i32> %tmp1
+}
+
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld2_upgrade:
+; CHECK: vld2.32 {d16, d17}, [r0]
+define %struct.__neon_int32x2x2_t @test_vld2_upgrade(i8* %ptr) {
+  %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %ptr, i32 1)
+  ret %struct.__neon_int32x2x2_t %tmp1
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld3_upgrade:
+; CHECK: vld3.32 {d16, d17, d18}, [r1]
+define %struct.__neon_int32x2x3_t @test_vld3_upgrade(i8* %ptr) {
+  %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %ptr, i32 1)
+  ret %struct.__neon_int32x2x3_t %tmp1
+}
+
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld4_upgrade:
+; CHECK: vld4.32 {d16, d17, d18, d19}, [r1]
+define %struct.__neon_int32x2x4_t @test_vld4_upgrade(i8* %ptr) {
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %ptr, i32 1)
+  ret %struct.__neon_int32x2x4_t %tmp1
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+
+; vld[234]lane auto-upgrade tests
+
+; CHECK-LABEL: test_vld2lane_upgrade:
+; CHECK: vld2.32 {d16[1], d17[1]}, [r0]
+define %struct.__neon_int32x2x2_t @test_vld2lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+  %tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1, i32 1)
+  ret %struct.__neon_int32x2x2_t %tmp1
+}
+
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld3lane_upgrade:
+; CHECK: vld3.32 {d16[1], d17[1], d18[1]}, [r1]
+define %struct.__neon_int32x2x3_t @test_vld3lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+  %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1, i32 1)
+  ret %struct.__neon_int32x2x3_t %tmp1
+}
+
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; CHECK-LABEL: test_vld4lane_upgrade:
+; CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r1]
+define %struct.__neon_int32x2x4_t @test_vld4lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1, i32 1)
+  ret %struct.__neon_int32x2x4_t %tmp1
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+; vst[1234] auto-upgrade tests
+
+; CHECK-LABEL: test_vst1_upgrade:
+; CHECK: vst1.32 {d16}, [r0]
+define void @test_vst1_upgrade(i8* %ptr, <2 x i32> %A) {
+  call void @llvm.arm.neon.vst1.v2i32(i8* %ptr, <2 x i32> %A, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst2_upgrade:
+; CHECK: vst2.32 {d16, d17}, [r0]
+define void @test_vst2_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+  call void @llvm.arm.neon.vst2.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst3_upgrade:
+; CHECK: vst3.32 {d16, d17, d18}, [r0]
+define void @test_vst3_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+  call void @llvm.arm.neon.vst3.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+
+; CHECK-LABEL: test_vst4_upgrade:
+; CHECK: vst4.32 {d16, d17, d18, d19}, [r0]
+define void @test_vst4_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+  call void @llvm.arm.neon.vst4.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+
+; vst[234]lane auto-upgrade tests
+
+; CHECK-LABEL: test_vst2lane_upgrade:
+; CHECK: vst2.32 {d16[1], d17[1]}, [r0]
+define void @test_vst2lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B) {
+  call void @llvm.arm.neon.vst2lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, i32 1, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+
+; CHECK-LABEL: test_vst3lane_upgrade:
+; CHECK: vst3.32 {d16[1], d17[1], d18[1]}, [r0]
+define void @test_vst3lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C) {
+  call void @llvm.arm.neon.vst3lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32 1, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+
+; CHECK-LABEL: test_vst4lane_upgrade:
+; CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0]
+define void @test_vst4lane_upgrade(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) {
+  call void @llvm.arm.neon.vst4lane.v2i32(i8* %ptr, <2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32 1, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index 8064ea4a320a..bdb384769741 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -7,7 +7,7 @@ define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld1.8 {d16}, [r0:64]
-	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %A, i32 16)
 	ret <8 x i8> %tmp1
 }
 
@@ -15,7 +15,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind {
 ;CHECK-LABEL: vld1i16:
 ;CHECK: vld1.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %tmp0, i32 1)
 	ret <4 x i16> %tmp1
 }
 
@@ -25,7 +25,7 @@ define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
 ;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]!
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 4
 	       store i16* %tmp2, i16** %ptr
 	ret <4 x i16> %tmp1
@@ -35,7 +35,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;CHECK-LABEL: vld1i32:
 ;CHECK: vld1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %tmp0, i32 1)
 	ret <2 x i32> %tmp1
 }
 
@@ -45,7 +45,7 @@ define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
 ;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %tmp0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 %inc
 	store i32* %tmp2, i32** %ptr
 	ret <2 x i32> %tmp1
@@ -55,7 +55,7 @@ define <2 x float> @vld1f(float* %A) nounwind {
 ;CHECK-LABEL: vld1f:
 ;CHECK: vld1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* %tmp0, i32 1)
 	ret <2 x float> %tmp1
 }
 
@@ -63,7 +63,7 @@ define <1 x i64> @vld1i64(i64* %A) nounwind {
 ;CHECK-LABEL: vld1i64:
 ;CHECK: vld1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %tmp0, i32 1)
 	ret <1 x i64> %tmp1
 }
 
@@ -71,7 +71,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld1Qi8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.8 {d16, d17}, [r0:64]
-	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
 	ret <16 x i8> %tmp1
 }
 
@@ -80,7 +80,7 @@ define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
 ;CHECK-LABEL: vld1Qi8_update:
 ;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}:64]!
 	%A = load i8*, i8** %ptr
-	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %A, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 16
 	store i8* %tmp2, i8** %ptr
 	ret <16 x i8> %tmp1
@@ -91,7 +91,7 @@ define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %tmp0, i32 32)
 	ret <8 x i16> %tmp1
 }
 
@@ -99,7 +99,7 @@ define <4 x i32> @vld1Qi32(i32* %A) nounwind {
 ;CHECK-LABEL: vld1Qi32:
 ;CHECK: vld1.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %tmp0, i32 1)
 	ret <4 x i32> %tmp1
 }
 
@@ -107,7 +107,7 @@ define <4 x float> @vld1Qf(float* %A) nounwind {
 ;CHECK-LABEL: vld1Qf:
 ;CHECK: vld1.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %tmp0, i32 1)
 	ret <4 x float> %tmp1
 }
 
@@ -115,7 +115,7 @@ define <2 x i64> @vld1Qi64(i64* %A) nounwind {
 ;CHECK-LABEL: vld1Qi64:
 ;CHECK: vld1.64
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %tmp0, i32 1)
 	ret <2 x i64> %tmp1
 }
 
@@ -123,28 +123,28 @@ define <2 x double> @vld1Qf64(double* %A) nounwind {
 ;CHECK-LABEL: vld1Qf64:
 ;CHECK: vld1.64
 	%tmp0 = bitcast double* %A to i8*
-	%tmp1 = call <2 x double> @llvm.arm.neon.vld1.v2f64(i8* %tmp0, i32 1)
+	%tmp1 = call <2 x double> @llvm.arm.neon.vld1.v2f64.p0i8(i8* %tmp0, i32 1)
 	ret <2 x double> %tmp1
 }
 
-declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
-declare <2 x double> @llvm.arm.neon.vld1.v2f64(i8*, i32) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32) nounwind readonly
+declare <2 x double> @llvm.arm.neon.vld1.v2f64.p0i8(i8*, i32) nounwind readonly
 
 ; Radar 8355607
 ; Do not crash if the vld1 result is not used.
 define void @unused_vld1_result() {
 entry:
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) 
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1)
   call void @llvm.trap()
   unreachable
 }
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 391b49152cd9..1ca16587bd91 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK-LABEL: vld2i8:
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld2.8 {d16, d17}, [r0:64]
-	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -38,7 +38,7 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
 ;CHECK-LABEL: vld2i32:
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -49,7 +49,7 @@ define <2 x float> @vld2f(float* %A) nounwind {
 ;CHECK-LABEL: vld2f:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
         %tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -62,7 +62,7 @@ define <2 x float> @vld2f_update(float** %ptr) nounwind {
 ;CHECK: vld2.32 {d16, d17}, [r1]!
 	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
 	%tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld2i64(i64* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vld1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <16 x i8> @vld2Qi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld2Qi8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -99,7 +99,7 @@ define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld2Qi8_update:
 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8*, i8** %ptr
-	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8* %A, i32 16)
         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -113,7 +113,7 @@ define <8 x i16> @vld2Qi16(i16* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -125,7 +125,7 @@ define <4 x i32> @vld2Qi32(i32* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -136,20 +136,20 @@ define <4 x float> @vld2Qf(float* %A) nounwind {
 ;CHECK-LABEL: vld2Qf:
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
         %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64.p0i8(i8*, i32) nounwind readonly
 
-declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 0d14179ba73a..c3e8ee8691fd 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -16,7 +16,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK-LABEL: vld3i8:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld3.8 {d16, d17, d18}, [r0:64]
-	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 ;CHECK-LABEL: vld3i16:
 ;CHECK: vld3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -40,7 +40,7 @@ define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
 ;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}}
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
 	%tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -53,7 +53,7 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
 ;CHECK-LABEL: vld3i32:
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -64,7 +64,7 @@ define <2 x float> @vld3f(float* %A) nounwind {
 ;CHECK-LABEL: vld3f:
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
         %tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld3i64(i64* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld1.64 {d16, d17, d18}, [r0:64]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <1 x i64> @vld3i64_update(i64** %ptr, i64* %A) nounwind {
 ;CHECK-LABEL: vld3i64_update:
 ;CHECK: vld1.64	{d16, d17, d18}, [r1:64]!
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8* %tmp0, i32 16)
         %tmp5 = getelementptr i64, i64* %A, i32 3
         store i64* %tmp5, i64** %ptr
         %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
@@ -101,7 +101,7 @@ define <16 x i8> @vld3Qi8(i8* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld3.8 {d16, d18, d20}, [r0:64]!
 ;CHECK: vld3.8 {d17, d19, d21}, [r0:64]
-	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8* %A, i32 32)
         %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -113,7 +113,7 @@ define <8 x i16> @vld3Qi16(i16* %A) nounwind {
 ;CHECK: vld3.16
 ;CHECK: vld3.16
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -125,7 +125,7 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
 ;CHECK: vld3.32
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -139,7 +139,7 @@ define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
 ;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]!
 	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8* %tmp0, i32 1)
 	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
 	%tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -153,20 +153,20 @@ define <4 x float> @vld3Qf(float* %A) nounwind {
 ;CHECK: vld3.32
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
         %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0i8(i8*, i32) nounwind readonly
 
-declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index 575e0fa717fb..10570039a9d2 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK-LABEL: vld4i8:
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r0:64]
-	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8)
         %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -27,7 +27,7 @@ define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld4i8_update:
 ;CHECK: vld4.8 {d16, d17, d18, d19}, [r2:128], r1
 	%A = load i8*, i8** %ptr
-	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 16)
 	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
 	%tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -41,7 +41,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8* %tmp0, i32 16)
         %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -53,7 +53,7 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* %tmp0, i32 32)
         %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -64,7 +64,7 @@ define <2 x float> @vld4f(float* %A) nounwind {
 ;CHECK-LABEL: vld4f:
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
         %tmp4 = fadd <2 x float> %tmp2, %tmp3
@@ -76,7 +76,7 @@ define <1 x i64> @vld4i64(i64* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
-	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
         %tmp4 = add <1 x i64> %tmp2, %tmp3
@@ -87,7 +87,7 @@ define <1 x i64> @vld4i64_update(i64** %ptr, i64* %A) nounwind {
 ;CHECK-LABEL: vld4i64_update:
 ;CHECK: vld1.64 {d16, d17, d18, d19}, [r1:256]!
         %tmp0 = bitcast i64* %A to i8*
-        %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8* %tmp0, i32 64)
         %tmp5 = getelementptr i64, i64* %A, i32 4
         store i64* %tmp5, i64** %ptr
         %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
@@ -101,7 +101,7 @@ define <16 x i8> @vld4Qi8(i8* %A) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vld4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vld4.8 {d17, d19, d21, d23}, [r0:256]
-	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8* %A, i32 64)
         %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
         %tmp4 = add <16 x i8> %tmp2, %tmp3
@@ -114,7 +114,7 @@ define <8 x i16> @vld4Qi16(i16* %A) nounwind {
 ;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
         %tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -128,7 +128,7 @@ define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4.16 {d17, d19, d21, d23}, [r1:64]!
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
-	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8* %tmp0, i32 8)
 	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
 	%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
 	%tmp4 = add <8 x i16> %tmp2, %tmp3
@@ -142,7 +142,7 @@ define <4 x i32> @vld4Qi32(i32* %A) nounwind {
 ;CHECK: vld4.32
 ;CHECK: vld4.32
 	%tmp0 = bitcast i32* %A to i8*
-	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
         %tmp4 = add <4 x i32> %tmp2, %tmp3
@@ -154,20 +154,20 @@ define <4 x float> @vld4Qf(float* %A) nounwind {
 ;CHECK: vld4.32
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
-	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1)
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32.p0i8(i8* %tmp0, i32 1)
         %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
         %tmp4 = fadd <4 x float> %tmp2, %tmp3
 	ret <4 x float> %tmp4
 }
 
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
-declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64.p0i8(i8*, i32) nounwind readonly
 
-declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32.p0i8(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32.p0i8(i8*, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vlddup.ll b/test/CodeGen/ARM/vlddup.ll
index 09304d87d53b..c115a3863d0d 100644
--- a/test/CodeGen/ARM/vlddup.ll
+++ b/test/CodeGen/ARM/vlddup.ll
@@ -66,7 +66,7 @@ define <8 x i8> @vld2dupi8(i8* %A) nounwind {
 ;CHECK-LABEL: vld2dupi8:
 ;Check the (default) alignment value.
 ;CHECK: vld2.8 {d16[], d17[]}, [r0]
-	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
 	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
@@ -80,7 +80,7 @@ define <4 x i16> @vld2dupi16(i8* %A) nounwind {
 ;Check that a power-of-two alignment smaller than the total size of the memory
 ;being loaded is ignored.
 ;CHECK: vld2.16 {d16[], d17[]}, [r0]
-	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -95,7 +95,7 @@ define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
 ;CHECK: vld2.16 {d16[], d17[]}, [r1]!
 	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
-	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
 	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
@@ -110,7 +110,7 @@ define <2 x i32> @vld2dupi32(i8* %A) nounwind {
 ;CHECK-LABEL: vld2dupi32:
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vld2.32 {d16[], d17[]}, [r0:64]
-	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
 	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
@@ -119,9 +119,9 @@ define <2 x i32> @vld2dupi32(i8* %A) nounwind {
         ret <2 x i32> %tmp5
 }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -131,7 +131,7 @@ define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
 ;CHECK-LABEL: vld3dupi8_update:
 ;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
 	%A = load i8*, i8** %ptr
-	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
@@ -149,7 +149,7 @@ define <4 x i16> @vld3dupi16(i8* %A) nounwind {
 ;CHECK-LABEL: vld3dupi16:
 ;Check the (default) alignment value. VLD3 does not support alignment.
 ;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
-	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
@@ -161,8 +161,8 @@ define <4 x i16> @vld3dupi16(i8* %A) nounwind {
         ret <4 x i16> %tmp8
 }
 
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
 
 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
@@ -173,7 +173,7 @@ define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
 ;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
 	%A = load i16*, i16** %ptr
         %A2 = bitcast i16* %A to i8*
-	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
 	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
@@ -195,7 +195,7 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
 ;Check the alignment value.  An 8-byte alignment is allowed here even though
 ;it is smaller than the total size of the memory being loaded.
 ;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64]
-	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
 	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
 	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
@@ -210,5 +210,5 @@ define <2 x i32> @vld4dupi32(i8* %A) nounwind {
         ret <2 x i32> %tmp11
 }
 
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll
index ac2be7f87f53..2c14bc2d8f4e 100644
--- a/test/CodeGen/ARM/vldlane.ll
+++ b/test/CodeGen/ARM/vldlane.ll
@@ -102,7 +102,7 @@ define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vld2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
         %tmp5 = add <8 x i8> %tmp3, %tmp4
@@ -115,7 +115,7 @@ define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
         %tmp5 = add <4 x i16> %tmp3, %tmp4
@@ -127,7 +127,7 @@ define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
         %tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -141,7 +141,7 @@ define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
 	%tmp5 = add <2 x i32> %tmp3, %tmp4
@@ -155,7 +155,7 @@ define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
         %tmp5 = fadd <2 x float> %tmp3, %tmp4
@@ -168,7 +168,7 @@ define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld2.16 {d17[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
         %tmp5 = add <8 x i16> %tmp3, %tmp4
@@ -181,7 +181,7 @@ define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld2.32 {d17[0], d19[0]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
         %tmp5 = add <4 x i32> %tmp3, %tmp4
@@ -193,21 +193,21 @@ define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
         %tmp5 = fadd <4 x float> %tmp3, %tmp4
 	ret <4 x float> %tmp5
 }
 
-declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
 
-declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
 
 %struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
@@ -222,7 +222,7 @@ define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vld3lanei8:
 ;CHECK: vld3.8
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
@@ -237,7 +237,7 @@ define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
@@ -251,7 +251,7 @@ define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
@@ -265,7 +265,7 @@ define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
@@ -280,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld3.16 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -296,7 +296,7 @@ define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounw
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
 	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
@@ -312,7 +312,7 @@ define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
@@ -326,7 +326,7 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
@@ -335,14 +335,14 @@ define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp7
 }
 
-declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
 
-declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
 
 %struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
 %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
@@ -358,7 +358,7 @@ define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vld4.8 {d{{.*}}[1], d{{.*}}[1], d{{.*}}[1], d{{.*}}[1]}, [{{r[0-9]+}}:32]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -375,7 +375,7 @@ define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:32]!
 	%A = load i8*, i8** %ptr
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
 	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
 	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
@@ -395,7 +395,7 @@ define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
         %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
@@ -413,7 +413,7 @@ define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
         %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
@@ -429,7 +429,7 @@ define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
@@ -446,7 +446,7 @@ define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [{{r[0-9]+}}:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
         %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
@@ -463,7 +463,7 @@ define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [{{r[0-9]+}}]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
         %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
@@ -479,7 +479,7 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vld4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
         %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
         %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
         %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
@@ -490,14 +490,14 @@ define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
 	ret <4 x float> %tmp9
 }
 
-declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
-declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32.p0i8(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
 
-declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
-declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
-declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16.p0i8(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32.p0i8(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32.p0i8(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
 
 ; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
 ; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
@@ -511,7 +511,7 @@ define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
   %tmp65 = shl i128 %tmp64, 64
   %ins67 = or i128 %tmp65, 0
   %tmp78 = bitcast i128 %ins67 to <8 x i16>
-  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16.p0i8(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
   %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0
   %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1
   %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2
diff --git a/test/CodeGen/ARM/vminmaxnm-safe.ll b/test/CodeGen/ARM/vminmaxnm-safe.ll
new file mode 100644
index 000000000000..ce1aab2dbcec
--- /dev/null
+++ b/test/CodeGen/ARM/vminmaxnm-safe.ll
@@ -0,0 +1,396 @@
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
+
+; vectors
+
+define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK-LABEL: vmaxnmq:
+; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  ret <4 x float> %tmp3
+}
+
+define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK-LABEL: vmaxnmd:
+; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  ret <2 x float> %tmp3
+}
+
+define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
+; CHECK-LABEL: vminnmq:
+; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+  %tmp1 = load <4 x float>, <4 x float>* %A
+  %tmp2 = load <4 x float>, <4 x float>* %B
+  %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+  ret <4 x float> %tmp3
+}
+
+define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK-LABEL: vminnmd:
+; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+  %tmp1 = load <2 x float>, <2 x float>* %A
+  %tmp2 = load <2 x float>, <2 x float>* %B
+  %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+  ret <2 x float> %tmp3
+}
+
+; scalars
+
+define float @fp-armv8_vminnm_o(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_o":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define double @fp-armv8_vminnm_ole(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_ole":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp ole double %a, %b
+  %cond = select i1 %cmp, double %a, double %b
+  ret double %cond
+}
+
+define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
+define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp oge double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
+define float @fp-armv8_vminnm_u(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_u":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ult float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vminnm_ule(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_ule":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ule float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
+; CHECK-NOT: vminnm.f32
+  %cmp = fcmp ugt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
+define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
+; CHECK-NOT: vminnm.f64
+  %cmp = fcmp uge double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
+define float @fp-armv8_vmaxnm_o(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_o":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ogt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp oge float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp olt float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ole float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_u":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ugt float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp uge float %a, %b
+  %cond = select i1 %cmp, float %a, float %b
+  ret float %cond
+}
+
+define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
+; CHECK-NOT: vmaxnm.f32
+  %cmp = fcmp ult float %a, %b
+  %cond = select i1 %cmp, float %b, float %a
+  ret float %cond
+}
+
+define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
+; CHECK-NOT: vmaxnm.f64
+  %cmp = fcmp ule double %a, %b
+  %cond = select i1 %cmp, double %b, double %a
+  ret double %cond
+}
+
+; known non-NaNs
+
+define float @fp-armv8_vminnm_NNNo(float %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+  %cmp1 = fcmp olt float %a, 12.
+  %cond1 = select i1 %cmp1, float %a, float 12.
+  %cmp2 = fcmp olt float 34., %cond1
+  %cond2 = select i1 %cmp2, float 34., float %cond1
+  ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNole(double %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+  %cmp1 = fcmp ole double %a, 34.
+  %cond1 = select i1 %cmp1, double %a, double 34.
+  %cmp2 = fcmp ole double 56., %cond1
+  %cond2 = select i1 %cmp2, double 56., double %cond1
+  ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNo_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+  %cmp1 = fcmp ogt float %a, 56.
+  %cond1 = select i1 %cmp1, float 56., float %a
+  %cmp2 = fcmp ogt float 78., %cond1
+  %cond2 = select i1 %cmp2, float %cond1, float 78.
+  ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+  %cmp1 = fcmp oge double %a, 78.
+  %cond1 = select i1 %cmp1, double 78., double %a
+  %cmp2 = fcmp oge double 90., %cond1
+  %cond2 = select i1 %cmp2, double %cond1, double 90.
+  ret double %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+  %cmp1 = fcmp ult float 12., %b
+  %cond1 = select i1 %cmp1, float 12., float %b
+  %cmp2 = fcmp ult float %cond1, 34.
+  %cond2 = select i1 %cmp2, float %cond1, float 34.
+  ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNule(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+  %cmp1 = fcmp ule float 34., %b
+  %cond1 = select i1 %cmp1, float 34., float %b
+  %cmp2 = fcmp ule float %cond1, 56.
+  %cond2 = select i1 %cmp2, float %cond1, float 56.
+  ret float %cond2
+}
+
+define float @fp-armv8_vminnm_NNNu_rev(float %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
+; CHECK: vminnm.f32
+; CHECK-NOT: vminnm.f32
+  %cmp1 = fcmp ugt float 56., %b
+  %cond1 = select i1 %cmp1, float %b, float 56.
+  %cmp2 = fcmp ugt float %cond1, 78.
+  %cond2 = select i1 %cmp2, float 78., float %cond1
+  ret float %cond2
+}
+
+define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
+; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
+; CHECK: vminnm.f64
+; CHECK-NOT: vminnm.f64
+  %cmp1 = fcmp uge double 78., %b
+  %cond1 = select i1 %cmp1, double %b, double 78.
+  %cmp2 = fcmp uge double %cond1, 90.
+  %cond2 = select i1 %cmp2, double 90., double %cond1
+  ret double %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp ogt float %a, 12.
+  %cond1 = select i1 %cmp1, float %a, float 12.
+  %cmp2 = fcmp ogt float 34., %cond1
+  %cond2 = select i1 %cmp2, float 34., float %cond1
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNoge(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp oge float %a, 34.
+  %cond1 = select i1 %cmp1, float %a, float 34.
+  %cmp2 = fcmp oge float 56., %cond1
+  %cond2 = select i1 %cmp2, float 56., float %cond1
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp olt float %a, 56.
+  %cond1 = select i1 %cmp1, float 56., float %a
+  %cmp2 = fcmp olt float 78., %cond1
+  %cond2 = select i1 %cmp2, float %cond1, float 78.
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp ole float %a, 78.
+  %cond1 = select i1 %cmp1, float 78., float %a
+  %cmp2 = fcmp ole float 90., %cond1
+  %cond2 = select i1 %cmp2, float %cond1, float 90.
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
+; CHECK: vmaxnm.f32
+; CHEC-NOT: vmaxnm.f32
+  %cmp1 = fcmp ugt float 12., %b
+  %cond1 = select i1 %cmp1, float 12., float %b
+  %cmp2 = fcmp ugt float %cond1, 34.
+  %cond2 = select i1 %cmp2, float %cond1, float 34.
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNuge(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp uge float 34., %b
+  %cond1 = select i1 %cmp1, float 34., float %b
+  %cmp2 = fcmp uge float %cond1, 56.
+  %cond2 = select i1 %cmp2, float %cond1, float 56.
+  ret float %cond2
+}
+
+define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
+; CHECK: vmaxnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp ult float 56., %b
+  %cond1 = select i1 %cmp1, float %b, float 56.
+  %cmp2 = fcmp ult float %cond1, 78.
+  %cond2 = select i1 %cmp2, float 78., float %cond1
+  ret float %cond2
+}
+
+define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
+; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
+; CHECK: vmaxnm.f64
+; CHECK-NOT: vmaxnm.f64
+  %cmp1 = fcmp ule double 78., %b
+  %cond1 = select i1 %cmp1, double %b, double 78.
+  %cmp2 = fcmp ule double %cond1, 90.
+  %cond2 = select i1 %cmp2, double 90., double %cond1
+  ret double %cond2
+}
+
+define float @fp-armv8_vminmaxnm_0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp ult float %a, 0.
+  %cond1 = select i1 %cmp1, float %a, float 0.
+  %cmp2 = fcmp ogt float %cond1, 0.
+  %cond2 = select i1 %cmp2, float %cond1, float 0.
+  ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_neg0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp olt float %a, -0.
+  %cond1 = select i1 %cmp1, float %a, float -0.
+  %cmp2 = fcmp ugt float %cond1, -0.
+  %cond2 = select i1 %cmp2, float %cond1, float -0.
+  ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
+; CHECK-NOT: vminnm.f32
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp nsz ole float 0., %a
+  %cond1 = select i1 %cmp1, float 0., float %a
+  %cmp2 = fcmp nsz uge float 0., %cond1
+  %cond2 = select i1 %cmp2, float 0., float %cond1
+  ret float %cond2
+}
+
+define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
+; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK: vminnm.f32
+; CHECK-NOT: vmaxnm.f32
+  %cmp1 = fcmp nsz ule float -0., %a
+  %cond1 = select i1 %cmp1, float -0., float %a
+  %cmp2 = fcmp nsz oge float -0., %cond1
+  %cond2 = select i1 %cmp2, float -0., float %cond1
+  ret float %cond2
+}
+
+declare <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vminmaxnm.ll b/test/CodeGen/ARM/vminmaxnm.ll
index 3632ffd00213..a6803fc78d8c 100644
--- a/test/CodeGen/ARM/vminmaxnm.ll
+++ b/test/CodeGen/ARM/vminmaxnm.ll
@@ -1,219 +1,147 @@
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 | FileCheck %s
-; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 \
-; RUN:          -enable-no-nans-fp-math -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-FAST
-
-; vectors
-
-define <4 x float> @vmaxnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK-LABEL: vmaxnmq:
-; CHECK: vmaxnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>, <4 x float>* %A
-  %tmp2 = load <4 x float>, <4 x float>* %B
-  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-  ret <4 x float> %tmp3
-}
-
-define <2 x float> @vmaxnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK-LABEL: vmaxnmd:
-; CHECK: vmaxnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>, <2 x float>* %A
-  %tmp2 = load <2 x float>, <2 x float>* %B
-  %tmp3 = call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-  ret <2 x float> %tmp3
-}
-
-define <4 x float> @vminnmq(<4 x float>* %A, <4 x float>* %B) nounwind {
-; CHECK-LABEL: vminnmq:
-; CHECK: vminnm.f32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
-  %tmp1 = load <4 x float>, <4 x float>* %A
-  %tmp2 = load <4 x float>, <4 x float>* %B
-  %tmp3 = call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
-  ret <4 x float> %tmp3
-}
-
-define <2 x float> @vminnmd(<2 x float>* %A, <2 x float>* %B) nounwind {
-; CHECK-LABEL: vminnmd:
-; CHECK: vminnm.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
-  %tmp1 = load <2 x float>, <2 x float>* %A
-  %tmp2 = load <2 x float>, <2 x float>* %B
-  %tmp3 = call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
-  ret <2 x float> %tmp3
-}
+; RUN: llc < %s -mtriple armv8 -mattr=+neon,+fp-armv8 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s
 
 ; scalars
 
 define float @fp-armv8_vminnm_o(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_o":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_o":
-; CHECK-NOT: vminnm.f32
-  %cmp = fcmp olt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+  %cmp = fcmp fast olt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define double @fp-armv8_vminnm_ole(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_ole":
-; CHECK-NOT: vminnm.f64
-  %cmp = fcmp ole double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+  %cmp = fcmp fast ole double %a, %b
   %cond = select i1 %cmp, double %a, double %b
   ret double %cond
 }
 
 define float @fp-armv8_vminnm_o_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_o_rev":
-; CHECK-NOT: vminnm.f32
-  %cmp = fcmp ogt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+  %cmp = fcmp fast ogt float %a, %b
   %cond = select i1 %cmp, float %b, float %a
   ret float %cond
 }
 
 define double @fp-armv8_vminnm_oge_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_oge_rev":
-; CHECK-NOT: vminnm.f64
-  %cmp = fcmp oge double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+  %cmp = fcmp fast oge double %a, %b
   %cond = select i1 %cmp, double %b, double %a
   ret double %cond
 }
 
 define float @fp-armv8_vminnm_u(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_u":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_u":
-; CHECK-NOT: vminnm.f32
-  %cmp = fcmp ult float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+  %cmp = fcmp fast ult float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vminnm_ule(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_ule":
-; CHECK-NOT: vminnm.f32
-  %cmp = fcmp ule float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+  %cmp = fcmp fast ule float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vminnm_u_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_u_rev":
-; CHECK-NOT: vminnm.f32
-  %cmp = fcmp ugt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
+  %cmp = fcmp fast ugt float %a, %b
   %cond = select i1 %cmp, float %b, float %a
   ret float %cond
 }
 
 define double @fp-armv8_vminnm_uge_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_uge_rev":
-; CHECK-NOT: vminnm.f64
-  %cmp = fcmp uge double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f64
+  %cmp = fcmp fast uge double %a, %b
   %cond = select i1 %cmp, double %b, double %a
   ret double %cond
 }
 
 define float @fp-armv8_vmaxnm_o(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_o":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp ogt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast ogt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_oge(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_oge":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp oge float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast oge float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_o_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_o_rev":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp olt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast olt float %a, %b
   %cond = select i1 %cmp, float %b, float %a
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_ole_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_ole_rev":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp ole float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast ole float %a, %b
   %cond = select i1 %cmp, float %b, float %a
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_u(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_u":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp ugt float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast ugt float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_uge(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_uge":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp uge float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast uge float %a, %b
   %cond = select i1 %cmp, float %a, float %b
   ret float %cond
 }
 
 define float @fp-armv8_vmaxnm_u_rev(float %a, float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_u_rev":
-; CHECK-NOT: vmaxnm.f32
-  %cmp = fcmp ult float %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f32
+  %cmp = fcmp fast ult float %a, %b
   %cond = select i1 %cmp, float %b, float %a
   ret float %cond
 }
 
 define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vmaxnm.f64
 ; CHECK-LABEL: "fp-armv8_vmaxnm_ule_rev":
-; CHECK-NOT: vmaxnm.f64
-  %cmp = fcmp ule double %a, %b
+; CHECK-NOT: vcmp
+; CHECK: vmaxnm.f64
+  %cmp = fcmp fast ule double %a, %b
   %cond = select i1 %cmp, double %b, double %a
   ret double %cond
 }
@@ -221,285 +149,225 @@ define double @fp-armv8_vmaxnm_ule_rev(double %a, double %b) {
 ; known non-NaNs
 
 define float @fp-armv8_vminnm_NNNo(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNo":
 ; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
-  %cmp1 = fcmp olt float %a, 12.
+; CHECK: vminnm.f32
+  %cmp1 = fcmp fast olt float %a, 12.
   %cond1 = select i1 %cmp1, float %a, float 12.
-  %cmp2 = fcmp olt float 34., %cond1
+  %cmp2 = fcmp fast olt float 34., %cond1
   %cond2 = select i1 %cmp2, float 34., float %cond1
   ret float %cond2
 }
 
 define double @fp-armv8_vminnm_NNNole(double %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNole":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNole":
 ; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
-  %cmp1 = fcmp ole double %a, 34.
+; CHECK: vminnm.f64
+  %cmp1 = fcmp fast ole double %a, 34.
   %cond1 = select i1 %cmp1, double %a, double 34.
-  %cmp2 = fcmp ole double 56., %cond1
+  %cmp2 = fcmp fast ole double 56., %cond1
   %cond2 = select i1 %cmp2, double 56., double %cond1
   ret double %cond2
 }
 
 define float @fp-armv8_vminnm_NNNo_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNo_rev":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNo_rev":
 ; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
-  %cmp1 = fcmp ogt float %a, 56.
+; CHECK: vminnm.f32
+  %cmp1 = fcmp fast ogt float %a, 56.
   %cond1 = select i1 %cmp1, float 56., float %a
-  %cmp2 = fcmp ogt float 78., %cond1
+  %cmp2 = fcmp fast ogt float 78., %cond1
   %cond2 = select i1 %cmp2, float %cond1, float 78.
   ret float %cond2
 }
 
 define double @fp-armv8_vminnm_NNNoge_rev(double %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNoge_rev":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNoge_rev":
 ; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
-  %cmp1 = fcmp oge double %a, 78.
+; CHECK: vminnm.f64
+  %cmp1 = fcmp fast oge double %a, 78.
   %cond1 = select i1 %cmp1, double 78., double %a
-  %cmp2 = fcmp oge double 90., %cond1
+  %cmp2 = fcmp fast oge double 90., %cond1
   %cond2 = select i1 %cmp2, double %cond1, double 90.
   ret double %cond2
 }
 
 define float @fp-armv8_vminnm_NNNu(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNu":
 ; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
-  %cmp1 = fcmp ult float 12., %b
+; CHECK: vminnm.f32
+  %cmp1 = fcmp fast ult float 12., %b
   %cond1 = select i1 %cmp1, float 12., float %b
-  %cmp2 = fcmp ult float %cond1, 34.
+  %cmp2 = fcmp fast ult float %cond1, 34.
   %cond2 = select i1 %cmp2, float %cond1, float 34.
   ret float %cond2
 }
 
 define float @fp-armv8_vminnm_NNNule(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNule":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNule":
 ; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
-  %cmp1 = fcmp ule float 34., %b
+; CHECK: vminnm.f32
+  %cmp1 = fcmp fast ule float 34., %b
   %cond1 = select i1 %cmp1, float 34., float %b
-  %cmp2 = fcmp ule float %cond1, 56.
+  %cmp2 = fcmp fast ule float %cond1, 56.
   %cond2 = select i1 %cmp2, float %cond1, float 56.
   ret float %cond2
 }
 
 define float @fp-armv8_vminnm_NNNu_rev(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNu_rev":
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vminnm.f32
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNu_rev":
 ; CHECK: vminnm.f32
-; CHECK-NOT: vminnm.f32
-  %cmp1 = fcmp ugt float 56., %b
+; CHECK: vminnm.f32
+  %cmp1 = fcmp fast ugt float 56., %b
   %cond1 = select i1 %cmp1, float %b, float 56.
-  %cmp2 = fcmp ugt float %cond1, 78.
+  %cmp2 = fcmp fast ugt float %cond1, 78.
   %cond2 = select i1 %cmp2, float 78., float %cond1
   ret float %cond2
 }
 
 define double @fp-armv8_vminnm_NNNuge_rev(double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vminnm_NNNuge_rev":
-; CHECK-FAST: vminnm.f64
-; CHECK-FAST: vminnm.f64
 ; CHECK-LABEL: "fp-armv8_vminnm_NNNuge_rev":
 ; CHECK: vminnm.f64
-; CHECK-NOT: vminnm.f64
-  %cmp1 = fcmp uge double 78., %b
+; CHECK: vminnm.f64
+  %cmp1 = fcmp fast uge double 78., %b
   %cond1 = select i1 %cmp1, double %b, double 78.
-  %cmp2 = fcmp uge double %cond1, 90.
+  %cmp2 = fcmp fast uge double %cond1, 90.
   %cond2 = select i1 %cmp2, double 90., double %cond1
   ret double %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNo(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp ogt float %a, 12.
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast ogt float %a, 12.
   %cond1 = select i1 %cmp1, float %a, float 12.
-  %cmp2 = fcmp ogt float 34., %cond1
+  %cmp2 = fcmp fast ogt float 34., %cond1
   %cond2 = select i1 %cmp2, float 34., float %cond1
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNoge(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNoge":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNoge":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp oge float %a, 34.
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast oge float %a, 34.
   %cond1 = select i1 %cmp1, float %a, float 34.
-  %cmp2 = fcmp oge float 56., %cond1
+  %cmp2 = fcmp fast oge float 56., %cond1
   %cond2 = select i1 %cmp2, float 56., float %cond1
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNo_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNo_rev":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp olt float %a, 56.
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast olt float %a, 56.
   %cond1 = select i1 %cmp1, float 56., float %a
-  %cmp2 = fcmp olt float 78., %cond1
+  %cmp2 = fcmp fast olt float 78., %cond1
   %cond2 = select i1 %cmp2, float %cond1, float 78.
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNole_rev(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNole_rev":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp ole float %a, 78.
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast ole float %a, 78.
   %cond1 = select i1 %cmp1, float 78., float %a
-  %cmp2 = fcmp ole float 90., %cond1
+  %cmp2 = fcmp fast ole float 90., %cond1
   %cond2 = select i1 %cmp2, float %cond1, float 90.
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNu(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu":
 ; CHECK: vmaxnm.f32
-; CHEC-NOT: vmaxnm.f32
-  %cmp1 = fcmp ugt float 12., %b
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast ugt float 12., %b
   %cond1 = select i1 %cmp1, float 12., float %b
-  %cmp2 = fcmp ugt float %cond1, 34.
+  %cmp2 = fcmp fast ugt float %cond1, 34.
   %cond2 = select i1 %cmp2, float %cond1, float 34.
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNuge(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNuge":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNuge":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp uge float 34., %b
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast uge float 34., %b
   %cond1 = select i1 %cmp1, float 34., float %b
-  %cmp2 = fcmp uge float %cond1, 56.
+  %cmp2 = fcmp fast uge float %cond1, 56.
   %cond2 = select i1 %cmp2, float %cond1, float 56.
   ret float %cond2
 }
 
 define float @fp-armv8_vmaxnm_NNNu_rev(float %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
-; CHECK-FAST: vmaxnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNu_rev":
 ; CHECK: vmaxnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp ult float 56., %b
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast ult float 56., %b
   %cond1 = select i1 %cmp1, float %b, float 56.
-  %cmp2 = fcmp ult float %cond1, 78.
+  %cmp2 = fcmp fast ult float %cond1, 78.
   %cond2 = select i1 %cmp2, float 78., float %cond1
   ret float %cond2
 }
 
 define double @fp-armv8_vmaxnm_NNNule_rev( double %b) {
-; CHECK-FAST-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
-; CHECK-FAST: vmaxnm.f64
-; CHECK-FAST: vmaxnm.f64
 ; CHECK-LABEL: "fp-armv8_vmaxnm_NNNule_rev":
 ; CHECK: vmaxnm.f64
-; CHECK-NOT: vmaxnm.f64
-  %cmp1 = fcmp ule double 78., %b
+; CHECK: vmaxnm.f64
+  %cmp1 = fcmp fast ule double 78., %b
   %cond1 = select i1 %cmp1, double %b, double 78.
-  %cmp2 = fcmp ule double %cond1, 90.
+  %cmp2 = fcmp fast ule double %cond1, 90.
   %cond2 = select i1 %cmp2, double 90., double %cond1
   ret double %cond2
 }
 
 define float @fp-armv8_vminmaxnm_0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vminmaxnm_0":
-; CHECK-NOT: vminnm.f32
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
 ; CHECK: vmaxnm.f32
-  %cmp1 = fcmp olt float %a, 0.
+  %cmp1 = fcmp fast olt float %a, 0.
   %cond1 = select i1 %cmp1, float %a, float 0.
-  %cmp2 = fcmp ogt float %cond1, 0.
+  %cmp2 = fcmp fast ogt float %cond1, 0.
   %cond2 = select i1 %cmp2, float %cond1, float 0.
   ret float %cond2
 }
 
 define float @fp-armv8_vminmaxnm_neg0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_neg0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vminmaxnm_neg0":
+; CHECK-NOT: vcmp
 ; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp olt float %a, -0.
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast olt float %a, -0.
   %cond1 = select i1 %cmp1, float %a, float -0.
-  %cmp2 = fcmp ogt float %cond1, -0.
+  %cmp2 = fcmp fast ugt float %cond1, -0.
   %cond2 = select i1 %cmp2, float %cond1, float -0.
   ret float %cond2
 }
 
 define float @fp-armv8_vminmaxnm_e_0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vminmaxnm_e_0":
-; CHECK-NOT: vminnm.f32
+; CHECK-NOT: vcmp
+; CHECK: vminnm.f32
 ; CHECK: vmaxnm.f32
-  %cmp1 = fcmp ule float 0., %a
+  %cmp1 = fcmp fast ule float 0., %a
   %cond1 = select i1 %cmp1, float 0., float %a
-  %cmp2 = fcmp uge float 0., %cond1
+  %cmp2 = fcmp fast uge float 0., %cond1
   %cond2 = select i1 %cmp2, float 0., float %cond1
   ret float %cond2
 }
 
 define float @fp-armv8_vminmaxnm_e_neg0(float %a) {
-; CHECK-FAST-LABEL: "fp-armv8_vminmaxnm_e_neg0":
-; CHECK-FAST-NOT: vcmp
-; CHECK-FAST: vminnm.f32
-; CHECK-FAST: vmaxnm.f32
 ; CHECK-LABEL: "fp-armv8_vminmaxnm_e_neg0":
+; CHECK-NOT: vcmp
 ; CHECK: vminnm.f32
-; CHECK-NOT: vmaxnm.f32
-  %cmp1 = fcmp ule float -0., %a
+; CHECK: vmaxnm.f32
+  %cmp1 = fcmp fast ule float -0., %a
   %cond1 = select i1 %cmp1, float -0., float %a
-  %cmp2 = fcmp uge float -0., %cond1
+  %cmp2 = fcmp fast oge float -0., %cond1
   %cond2 = select i1 %cmp2, float -0., float %cond1
   ret float %cond2
 }
diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll
index b7a23b7bb59c..b1b4f1a940d4 100644
--- a/test/CodeGen/ARM/vmov.ll
+++ b/test/CodeGen/ARM/vmov.ll
@@ -393,8 +393,8 @@ entry:
   %sub.i = sub <4 x i32> %add.i185, zeroinitializer
   %add.i = add <4 x i32> %sub.i, zeroinitializer
   %vmovn.i = trunc <4 x i32> %add.i to <4 x i16>
-  tail call void @llvm.arm.neon.vst1.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
+  tail call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* undef, <4 x i16> %vmovn.i, i32 2)
   unreachable
 }
 
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index c3e41cacde4c..0455190b4c9d 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -447,7 +447,7 @@ entry:
   %0 = trunc i32 %mul to i8
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
   %4 = bitcast <16 x i8> %3 to <2 x double>
   %5 = extractelement <2 x double> %4, i32 1
   %6 = bitcast double %5 to <8 x i8>
@@ -459,13 +459,13 @@ entry:
   %12 = add <8 x i16> %7, %11
   %13 = mul <8 x i16> %12, %8
   %14 = bitcast i16* %dst to i8*
-  tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %14, <8 x i16> %13, i32 2)
   ret void
 }
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
 
 ; Take advantage of the Cortex-A8 multiplier accumulator forward.
 
@@ -480,7 +480,7 @@ entry:
   %0 = trunc i32 %mul to i8
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
   %4 = bitcast <16 x i8> %3 to <2 x double>
   %5 = extractelement <2 x double> %4, i32 1
   %6 = bitcast double %5 to <8 x i8>
@@ -502,7 +502,7 @@ entry:
   %0 = trunc i32 %mul to i8
   %1 = insertelement <8 x i8> undef, i8 %0, i32 0
   %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
-  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %src, i32 1)
   %4 = bitcast <16 x i8> %3 to <2 x double>
   %5 = extractelement <2 x double> %4, i32 1
   %6 = bitcast double %5 to <8 x i8>
@@ -559,7 +559,7 @@ for.body33.lr.ph:                                 ; preds = %for.body
 
 for.body33:                                       ; preds = %for.body33, %for.body33.lr.ph
   %add45 = add i32 undef, undef
-  %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* undef, i32 1)
+  %vld155 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* undef, i32 1)
   %0 = load i32*, i32** undef, align 4
   %shuffle.i250 = shufflevector <2 x i64> undef, <2 x i64> undef, <1 x i32> zeroinitializer
   %1 = bitcast <1 x i64> %shuffle.i250 to <8 x i8>
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index e362ce36f8ba..6ddf9850cfcb 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - -lower-interleaved-accesses=false | FileCheck %s
 
 define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vpaddi8:
diff --git a/test/CodeGen/ARM/vselect_imax.ll b/test/CodeGen/ARM/vselect_imax.ll
index 0eb051036d99..85c8c5cfcda1 100644
--- a/test/CodeGen/ARM/vselect_imax.ll
+++ b/test/CodeGen/ARM/vselect_imax.ll
@@ -3,16 +3,13 @@
 ; Make sure that ARM backend with NEON handles vselect.
 
 define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
-; CHECK: vcgt.s32 [[QR:q[0-9]+]], [[Q1:q[0-9]+]], [[Q2:q[0-9]+]]
-; CHECK: vbsl [[QR]], [[Q1]], [[Q2]]
+; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
     %cmpres = icmp sgt <4 x i32> %a, %b
     %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
     store <4 x i32> %maxres, <4 x i32>* %m
     ret void
 }
 
-; We adjusted the cost model of the following selects. When we improve code
-; lowering we also need to adjust the cost.
 %T0_10 = type <16 x i16>
 %T1_10 = type <16 x i1>
 ; CHECK-LABEL: func_blend10:
@@ -21,10 +18,10 @@ define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
   %v0 = load %T0_10, %T0_10* %loadaddr
   %v1 = load %T0_10, %T0_10* %loadaddr2
   %c = icmp slt %T0_10 %v0, %v1
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s16
+; CHECK: vmin.s16
 ; COST: func_blend10
-; COST: cost of 40 {{.*}} select
+; COST: cost of 2 {{.*}} select
   %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
   store %T0_10 %r, %T0_10* %storeaddr
   ret void
@@ -37,10 +34,10 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
   %v0 = load %T0_14, %T0_14* %loadaddr
   %v1 = load %T0_14, %T0_14* %loadaddr2
   %c = icmp slt %T0_14 %v0, %v1
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s32
+; CHECK: vmin.s32
 ; COST: func_blend14
-; COST: cost of 41 {{.*}} select
+; COST: cost of 2 {{.*}} select
   %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
   store %T0_14 %r, %T0_14* %storeaddr
   ret void
@@ -50,17 +47,20 @@ define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
 ; CHECK-LABEL: func_blend15:
 define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
                            %T1_15* %blend, %T0_15* %storeaddr) {
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK: vmin.s32
+; CHECK: vmin.s32
   %v0 = load %T0_15, %T0_15* %loadaddr
   %v1 = load %T0_15, %T0_15* %loadaddr2
   %c = icmp slt %T0_15 %v0, %v1
 ; COST: func_blend15
-; COST: cost of 82 {{.*}} select
+; COST: cost of 4 {{.*}} select
   %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
   store %T0_15 %r, %T0_15* %storeaddr
   ret void
 }
+
+; We adjusted the cost model of the following selects. When we improve code
+; lowering we also need to adjust the cost.
 %T0_18 = type <4 x i64>
 %T1_18 = type <4 x i1>
 ; CHECK-LABEL: func_blend18:
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index f605fa4d6003..404129a7e6ad 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -5,7 +5,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 64 bits:
 ;CHECK: vst1.8 {d16}, [r0:64]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
+	call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
 	ret void
 }
 
@@ -14,7 +14,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
 	ret void
 }
 
@@ -23,7 +23,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -32,7 +32,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -43,7 +43,7 @@ define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
 	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 2
 	store float* %tmp2, float** %ptr
 	ret void
@@ -54,7 +54,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>, <1 x i64>* %B
-	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
 	ret void
 }
 
@@ -63,7 +63,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst1.8 {d16, d17}, [r0:64]
 	%tmp1 = load <16 x i8>, <16 x i8>* %B
-	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
@@ -73,7 +73,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
 	ret void
 }
 
@@ -84,7 +84,7 @@ define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
 	ret void
@@ -95,7 +95,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -104,7 +104,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst1.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -113,7 +113,7 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
 ;CHECK: vst1.64
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <2 x i64>, <2 x i64>* %B
-	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
 	ret void
 }
 
@@ -122,19 +122,19 @@ define void @vst1Qf64(double* %A, <2 x double>* %B) nounwind {
 ;CHECK: vst1.64
 	%tmp0 = bitcast double* %A to i8*
 	%tmp1 = load <2 x double>, <2 x double>* %B
-	call void @llvm.arm.neon.vst1.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst1.p0i8.v2f64(i8* %tmp0, <2 x double> %tmp1, i32 1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v1i64(i8*, <1 x i64>, i32) nounwind
 
-declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
-declare void @llvm.arm.neon.vst1.v2f64(i8*, <2 x double>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2i64(i8*, <2 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v2f64(i8*, <2 x double>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index 17c8a4bdad9b..e0846ff512ea 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -5,7 +5,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 128 bits:
 ;CHECK: vst2.8 {d16, d17}, [r0:64]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
 
@@ -15,7 +15,7 @@ define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK: vst2.8 {d16, d17}, [r1], r2
 	%A = load i8*, i8** %ptr
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+	call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
 	ret void
@@ -27,7 +27,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2.16 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
 	ret void
 }
 
@@ -36,7 +36,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -45,7 +45,7 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -55,7 +55,7 @@ define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1.64 {d16, d17}, [r0:128]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>, <1 x i64>* %B
-	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
 	ret void
 }
 
@@ -66,7 +66,7 @@ define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
 	%A = load i64*, i64** %ptr
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>, <1 x i64>* %B
-	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
 	%tmp2 = getelementptr i64, i64* %A, i32 2
 	store i64* %tmp2, i64** %ptr
 	ret void
@@ -77,7 +77,7 @@ define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst2.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <16 x i8>, <16 x i8>* %B
-	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
 	ret void
 }
 
@@ -87,7 +87,7 @@ define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
+	call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
 	ret void
 }
 
@@ -97,7 +97,7 @@ define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
+	call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
 	ret void
 }
 
@@ -106,7 +106,7 @@ define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -114,7 +114,7 @@ define i8* @vst2update(i8* %out, <4 x i16>* %B) nounwind {
 ;CHECK-LABEL: vst2update:
 ;CHECK: vst2.16 {d16, d17}, [r0]!
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	tail call void @llvm.arm.neon.vst2.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
+	tail call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* %out, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 2)
 	%t5 = getelementptr inbounds i8, i8* %out, i32 16
 	ret i8* %t5
 }
@@ -123,18 +123,18 @@ define i8* @vst2update2(i8 * %out, <4 x float> * %this) nounwind optsize ssp ali
 ;CHECK-LABEL: vst2update2:
 ;CHECK: vst2.32 {d16, d17, d18, d19}, [r0]!
   %tmp1 = load <4 x float>, <4 x float>* %this
-  call void @llvm.arm.neon.vst2.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
+  call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* %out, <4 x float> %tmp1, <4 x float> %tmp1, i32 4) nounwind
   %tmp2 = getelementptr inbounds i8, i8* %out, i32  32
   ret i8* %tmp2
 }
 
-declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
 
-declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index 691ee3bd28f3..d70d59579009 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -6,7 +6,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;This test runs at -O0 so do not check for specific register numbers.
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -15,7 +15,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
 	ret void
 }
 
@@ -24,7 +24,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -35,7 +35,7 @@ define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
 	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 6
 	store i32* %tmp2, i32** %ptr
 	ret void
@@ -46,7 +46,7 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -57,7 +57,7 @@ define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>, <1 x i64>* %B
-	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
+	call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
 	ret void
 }
 
@@ -67,7 +67,7 @@ define void @vst3i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
         %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
         %tmp1 = load <1 x i64>, <1 x i64>* %B
-        call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 3
         store i64* %tmp2, i64** %ptr
         ret void
@@ -80,7 +80,7 @@ define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]!
 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]
 	%tmp1 = load <16 x i8>, <16 x i8>* %B
-	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
 	ret void
 }
 
@@ -90,7 +90,7 @@ define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
 
@@ -102,7 +102,7 @@ define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	%tmp2 = getelementptr i16, i16* %A, i32 24
 	store i16* %tmp2, i16** %ptr
 	ret void
@@ -114,7 +114,7 @@ define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -124,17 +124,17 @@ define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
 
-declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index c343c6c86959..188955102290 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -5,7 +5,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 256 bits:
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r0:64]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
 	ret void
 }
 
@@ -15,7 +15,7 @@ define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
 ;CHECK: vst4.8 {d16, d17, d18, d19}, [r1:128], r2
 	%A = load i8*, i8** %ptr
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+	call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
 	%tmp2 = getelementptr i8, i8* %A, i32 %inc
 	store i8* %tmp2, i8** %ptr
 	ret void
@@ -27,7 +27,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4.16 {d16, d17, d18, d19}, [r0:128]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
+	call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
 	ret void
 }
 
@@ -37,7 +37,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4.32 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
+	call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
 	ret void
 }
 
@@ -46,7 +46,7 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -56,7 +56,7 @@ define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1.64 {d16, d17, d18, d19}, [r0:256]
 	%tmp0 = bitcast i64* %A to i8*
 	%tmp1 = load <1 x i64>, <1 x i64>* %B
-	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
+	call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
 	ret void
 }
 
@@ -66,7 +66,7 @@ define void @vst4i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
         %A = load i64*, i64** %ptr
         %tmp0 = bitcast i64* %A to i8*
         %tmp1 = load <1 x i64>, <1 x i64>* %B
-        call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
+        call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1)
         %tmp2 = getelementptr i64, i64* %A, i32 4
         store i64* %tmp2, i64** %ptr
         ret void
@@ -78,7 +78,7 @@ define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst4.8 {d16, d18, d20, d22}, [r0:256]!
 ;CHECK: vst4.8 {d17, d19, d21, d23}, [r0:256]
 	%tmp1 = load <16 x i8>, <16 x i8>* %B
-	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
+	call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
 	ret void
 }
 
@@ -89,7 +89,7 @@ define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
 	ret void
 }
 
@@ -99,7 +99,7 @@ define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
 	ret void
 }
 
@@ -109,7 +109,7 @@ define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	ret void
 }
 
@@ -121,19 +121,19 @@ define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
 	%A = load float*, float** %ptr
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
 	%tmp2 = getelementptr float, float* %A, i32 16
 	store float* %tmp2, float** %ptr
 	ret void
 }
 
-declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
 
-declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
-declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll
index a4575417bce5..7e130ea01b64 100644
--- a/test/CodeGen/ARM/vstlane.ll
+++ b/test/CodeGen/ARM/vstlane.ll
@@ -110,7 +110,7 @@ define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 16 bits:
 ;CHECK: vst2.8 {d16[1], d17[1]}, [r0:16]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+	call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
 	ret void
 }
 
@@ -120,7 +120,7 @@ define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2.16 {d16[1], d17[1]}, [r0:32]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
 
@@ -131,7 +131,7 @@ define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
 	%A = load i16*, i16** %ptr
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+	call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
 	%tmp2 = getelementptr i16, i16* %A, i32 %inc
 	store i16* %tmp2, i16** %ptr
 	ret void
@@ -142,7 +142,7 @@ define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -151,7 +151,7 @@ define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -161,7 +161,7 @@ define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+	call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
 	ret void
 }
 
@@ -171,7 +171,7 @@ define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst2.32 {d17[0], d19[0]}, [r0:64]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+	call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
 	ret void
 }
 
@@ -180,24 +180,24 @@ define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst2.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
+	call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
 
-declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
 
 define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK-LABEL: vst3lanei8:
 ;CHECK: vst3.8
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -207,7 +207,7 @@ define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
 	ret void
 }
 
@@ -216,7 +216,7 @@ define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -225,7 +225,7 @@ define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -235,7 +235,7 @@ define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
+	call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
 	ret void
 }
 
@@ -244,7 +244,7 @@ define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	ret void
 }
 
@@ -255,7 +255,7 @@ define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
 	%A = load i32*, i32** %ptr
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
 	%tmp2 = getelementptr i32, i32* %A, i32 3
 	store i32* %tmp2, i32** %ptr
 	ret void
@@ -266,18 +266,18 @@ define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst3.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
 
-declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
 
 
 define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
@@ -285,7 +285,7 @@ define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
 ;Check the alignment value.  Max for this instruction is 32 bits:
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32]
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	ret void
 }
 
@@ -295,7 +295,7 @@ define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
 ;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1:32]!
 	%A = load i8*, i8** %ptr
 	%tmp1 = load <8 x i8>, <8 x i8>* %B
-	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
 	%tmp2 = getelementptr i8, i8* %A, i32 4
 	store i8* %tmp2, i8** %ptr
 	ret void
@@ -306,7 +306,7 @@ define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4.16
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <4 x i16>, <4 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -316,7 +316,7 @@ define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <2 x i32>, <2 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
+	call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
 	ret void
 }
 
@@ -325,7 +325,7 @@ define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <2 x float>, <2 x float>* %B
-	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -335,7 +335,7 @@ define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64]
 	%tmp0 = bitcast i16* %A to i8*
 	%tmp1 = load <8 x i16>, <8 x i16>* %B
-	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
+	call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
 	ret void
 }
 
@@ -345,7 +345,7 @@ define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
 	%tmp0 = bitcast i32* %A to i8*
 	%tmp1 = load <4 x i32>, <4 x i32>* %B
-	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
 	ret void
 }
 
@@ -354,7 +354,7 @@ define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst4.32
 	%tmp0 = bitcast float* %A to i8*
 	%tmp1 = load <4 x float>, <4 x float>* %B
-	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
 	ret void
 }
 
@@ -365,11 +365,11 @@ define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind
     ret <8 x i16> %r
 }
 
-declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
 
-declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
-declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 7b83dfdaf229..36bcde22731d 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vtrni8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vtrn.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vtrni16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vtrn.16 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vtrn.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -84,11 +84,11 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ; CHECK-LABEL: vtrni32_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vtrn.32 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
@@ -116,11 +116,11 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
 define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind {
 ; CHECK-LABEL: vtrnf_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vtrn.32 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vtrn.32 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = load <2 x float>, <2 x float>* %B
@@ -281,11 +281,11 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vtrni8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vtrn.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vtrn.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -325,3 +325,77 @@ define <16 x i16> @vtrnQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14, i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
 	ret <16 x i16> %tmp3
 }
+
+define <8 x i16> @vtrn_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+  ; CHECK-LABEL: vtrn_lower_shufflemask_undef
+  ; CHECK: vtrn
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 5, i32 3, i32 7>
+  ret <8 x i16> %0
+}
+
+; Here we get a build_vector node, where all the incoming extract_element
+; values do modify the type. However, we get different input types, as some of
+; them get truncated from i32 to i8 (from comparing cmp0 with cmp1) and some of
+; them get truncated from i16 to i8 (from comparing cmp2 with cmp3).
+define <8 x i8> @vtrn_mismatched_builvector0(<8 x i8> %tr0, <8 x i8> %tr1,
+                                             <4 x i32> %cmp0, <4 x i32> %cmp1,
+                                             <4 x i16> %cmp2, <4 x i16> %cmp3) {
+  ; CHECK-LABEL: vtrn_mismatched_builvector0
+  ; CHECK: vmovn.i32
+  ; CHECK: vtrn
+  ; CHECK: vbsl
+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+  %c1 = icmp ult <4 x i16> %cmp2, %cmp3
+  %c = shufflevector <4 x i1> %c0, <4 x i1> %c1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+  ret <8 x i8> %rv
+}
+
+; Here we get a build_vector node, where half the incoming extract_element
+; values do not modify the type (the values form cmp2), but half of them do
+; (from the icmp operation).
+define <8 x i8> @vtrn_mismatched_builvector1(<8 x i8> %tr0, <8 x i8> %tr1,
+                           <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+  ; CHECK-LABEL: vtrn_mismatched_builvector1
+  ; We need to extend the 4 x i8 to 4 x i16 in order to perform the vtrn
+  ; CHECK: vmovl
+  ; CHECK: vtrn.8
+  ; CHECK: vbsl
+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+  ret <8 x i8> %rv
+}
+
+; Negative test that should not generate a vtrn
+define void @lower_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+  ; CHECK-LABEL: lower_twice_no_vtrn
+  ; CHECK: @ BB#0:
+  ; CHECK-NOT: vtrn
+  ; CHECK: mov pc, lr
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 5, i32 3, i32 7, i32 1, i32 5, i32 3, i32 7>
+  store <8 x i16> %0, <8 x i16>* %C
+  ret void
+}
+
+; Negative test that should not generate a vtrn
+define void @upper_twice_no_vtrn(<4 x i16>* %A, <4 x i16>* %B, <8 x i16>* %C) {
+entry:
+  ; CHECK-LABEL: upper_twice_no_vtrn
+  ; CHECK: @ BB#0:
+  ; CHECK-NOT: vtrn
+  ; CHECK: mov pc, lr
+  %tmp1 = load <4 x i16>, <4 x i16>* %A
+  %tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 6, i32 0, i32 4, i32 2, i32 6>
+  store <8 x i16> %0, <8 x i16>* %C
+  ret void
+}
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 5510634b0668..04499e77fde6 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vuzpi8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vuzpi16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.16 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vuzp.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -220,11 +220,11 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vuzpi8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vuzp.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vuzp.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -264,3 +264,109 @@ define <16 x i16> @vuzpQi16_undef_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
 	ret <16 x i16> %tmp3
 }
+
+define <8 x i16> @vuzp_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+  ; CHECK-LABEL: vuzp_lower_shufflemask_undef
+  ; CHECK: vuzp
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 3, i32 5, i32 7>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @vuzp_lower_shufflemask_zeroed(<2 x i32>* %A, <2 x i32>* %B) {
+entry:
+  ; CHECK-LABEL: vuzp_lower_shufflemask_zeroed
+  ; CHECK-NOT: vtrn
+  ; CHECK: vuzp
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+	%tmp2 = load <2 x i32>, <2 x i32>* %B
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 0, i32 1, i32 3>
+  ret <4 x i32> %0
+}
+
+define void @vuzp_rev_shufflemask_vtrn(<2 x i32>* %A, <2 x i32>* %B, <4 x i32>* %C) {
+entry:
+  ; CHECK-LABEL: vuzp_rev_shufflemask_vtrn
+  ; CHECK-NOT: vtrn
+  ; CHECK: vuzp
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %tmp2 = load <2 x i32>, <2 x i32>* %B
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 0, i32 2>
+  store <4 x i32> %0, <4 x i32>* %C
+  ret void
+}
+
+define <8 x i8> @vuzp_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8 x i32> %cmp1) {
+; In order to create the select we need to truncate the vcgt result from a vector of i32 to a vector of i8.
+; This results in a build_vector with mismatched types. We will generate two vmovn.i32 instructions to
+; truncate from i32 to i16 and one vuzp to perform the final truncation for i8.
+; CHECK-LABEL: vuzp_trunc
+; CHECK: vmovn.i32
+; CHECK: vmovn.i32
+; CHECK: vuzp
+; CHECK: vbsl
+  %c = icmp ult <8 x i32> %cmp0, %cmp1
+  %res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
+  ret <8 x i8> %res
+}
+
+; Shuffle the result from the compare with a <4 x i8>.
+; We need to extend the loaded <4 x i8> to <4 x i16>. Otherwise we wouldn't be able
+; to perform the vuzp and get the vbsl mask.
+define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle
+; CHECK: vmovl
+; CHECK: vuzp
+; CHECK: vbsl
+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+  %c = shufflevector <4 x i1> %c0, <4 x i1> %cmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+  ret <8 x i8> %rv
+}
+
+; Use an undef value for the <4 x i8> that is being shuffled with the compare result.
+; This produces a build_vector with some of the operands undefs.
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right
+; CHECK: vuzp
+; CHECK: vbsl
+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+  %c = shufflevector <4 x i1> %c0, <4 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+  ret <8 x i8> %rv
+}
+
+define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
+                         <4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left
+; CHECK: vuzp
+; CHECK: vbsl
+  %cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
+  %cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
+  %c0 = icmp ult <4 x i32> %cmp0, %cmp1
+  %c = shufflevector <4 x i1> undef, <4 x i1> %c0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %rv = select <8 x i1> %c, <8 x i8> %tr0, <8 x i8> %tr1
+  ret <8 x i8> %rv
+}
+
+; We're using large data types here, and we have to fill with undef values until we
+; get some vector size that we can represent.
+define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
+                            <5 x i32> %cmp0, <5 x i32> %cmp1, <5 x i8> *%cmp2_ptr) {
+; CHECK-LABEL: vuzp_wide_type
+; CHECK: vbsl
+  %cmp2_load = load <5 x i8>, <5 x i8> * %cmp2_ptr, align 4
+  %cmp2 = trunc <5 x i8> %cmp2_load to <5 x i1>
+  %c0 = icmp ult <5 x i32> %cmp0, %cmp1
+  %c = shufflevector <5 x i1> %c0, <5 x i1> %cmp2, <10 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+  %rv = select <10 x i1> %c, <10 x i8> %tr0, <10 x i8> %tr1
+  ret <10 x i8> %rv
+}
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index 1d9f59aeda0b..259b484f5f8e 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -20,11 +20,11 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vzipi8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -52,11 +52,11 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vzipi16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.16 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vzip.16 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -220,11 +220,11 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vzipi8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vzip.8 d16, d17
-; CHECK-NEXT:    vmov r0, r1, d16
-; CHECK-NEXT:    vmov r2, r3, d17
+; CHECK-NEXT:    vldr [[LDR1:d[0-9]+]], [r1]
+; CHECK-NEXT:    vldr [[LDR0:d[0-9]+]], [r0]
+; CHECK-NEXT:    vzip.8 [[LDR0]], [[LDR1]]
+; CHECK-NEXT:    vmov r0, r1, [[LDR0]]
+; CHECK-NEXT:    vmov r2, r3, [[LDR1]]
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -264,3 +264,55 @@ define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
 	ret <32 x i8> %tmp3
 }
+
+define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) {
+entry:
+  ; CHECK-LABEL: vzip_lower_shufflemask_undef
+  ; CHECK: vzip
+	%tmp1 = load <4 x i16>, <4 x i16>* %A
+	%tmp2 = load <4 x i16>, <4 x i16>* %B
+  %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x i16> %0
+}
+
+define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) {
+entry:
+  ; CHECK-LABEL: vzip_lower_shufflemask_zeroed
+  ; CHECK-NOT: vtrn
+  ; CHECK: vzip
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  ret <4 x i32> %0
+}
+
+define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) {
+entry:
+  ; CHECK-LABEL: vzip_lower_shufflemask_vuzp
+  ; CHECK-NOT: vuzp
+  ; CHECK: vzip
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0>
+  ret <4 x i32> %0
+}
+
+define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) {
+entry:
+  ; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn
+  ; CHECK-NOT: vtrn
+  ; CHECK: vzip
+  %tmp1 = load <2 x i32>, <2 x i32>* %A
+  %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0>
+  store <4 x i32> %0, <4 x i32>* %B
+  ret void
+}
+
+define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) {
+entry:
+  ; CHECK-LABEL: vzip_vext_factor
+  ; CHECK: vext.16 d16, d16, d17, #3
+  ; CHECK: vzip
+  %tmp1 = load <8 x i16>, <8 x i16>* %A
+  %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3>
+  store <4 x i16> %0, <4 x i16>* %B
+  ret void
+}
diff --git a/test/CodeGen/BPF/sockex2.ll b/test/CodeGen/BPF/sockex2.ll
index 5de2787d5b07..b3e83eadf537 100644
--- a/test/CodeGen/BPF/sockex2.ll
+++ b/test/CodeGen/BPF/sockex2.ll
@@ -311,7 +311,7 @@ flow_dissector.exit.thread:                       ; preds = %86, %12, %196, %199
 ; CHECK-LABEL: bpf_prog2:
 ; CHECK: ldabs_h r0, r6.data + 12 # encoding: [0x28,0x00,0x00,0x00,0x0c,0x00,0x00,0x00]
 ; CHECK: ldabs_h r0, r6.data + 16 # encoding: [0x28,0x00,0x00,0x00,0x10,0x00,0x00,0x00]
-; CHECK: implicit-def: R
+; CHECK: implicit-def: %R1
 ; CHECK: ld_64   r1
 ; CHECK-NOT: ori
 ; CHECK: call 1 # encoding: [0x85,0x00,0x00,0x00,0x01,0x00,0x00,0x00]
diff --git a/test/CodeGen/CPP/gep.ll b/test/CodeGen/CPP/gep.ll
new file mode 100644
index 000000000000..88a0bf1f216d
--- /dev/null
+++ b/test/CodeGen/CPP/gep.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=cpp -o - %s | FileCheck %s
+
+define void @f1(i32* %addr) {
+  %x = getelementptr i32, i32* %addr, i32 1
+; CHECK: ConstantInt* [[INT_1:.*]] = ConstantInt::get(mod->getContext(), APInt(32, StringRef("1"), 10));
+; CHECK: GetElementPtrInst::Create(IntegerType::get(mod->getContext(), 32), ptr_addr,
+; CHECK-NEXT:  [[INT_1]]
+; CHECK-NEXT: }, "x", label_3);
+  ret void
+}
diff --git a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
index eaaeb37eebb4..ff5952db43b3 100644
--- a/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
+++ b/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -30,20 +30,20 @@
 	%"struct.qdesigner_internal::GridLayout" = type { %"struct.qdesigner_internal::Layout", %"struct.QPair<int,int>", %"struct.qdesigner_internal::Grid"* }
 	%"struct.qdesigner_internal::Layout" = type { %struct.QObject, %"struct.QList<QAbstractExtensionFactory*>", %struct.QWidget*, %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >", %struct.QWidget*, %struct.QDesignerFormWindowInterface*, i8, %"struct.QPair<int,int>", %struct.QRect, i8 }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = weak alias i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i64), i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.Alignment*), i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*), i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.Alignment*, i32), i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.Alignment*), i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
 
 define void @_ZN18qdesigner_internal10GridLayout9buildGridEv(%"struct.qdesigner_internal::GridLayout"* %this) nounwind {
 entry:
diff --git a/test/CodeGen/Generic/ForceStackAlign.ll b/test/CodeGen/Generic/ForceStackAlign.ll
new file mode 100644
index 000000000000..57ccb2c41d77
--- /dev/null
+++ b/test/CodeGen/Generic/ForceStackAlign.ll
@@ -0,0 +1,27 @@
+; Check that stack alignment can be forced. Individual targets should test their
+; specific implementation details.
+
+; RUN: llc < %s -stackrealign -stack-alignment=32 | FileCheck %s
+; CHECK-LABEL: @f
+; CHECK-LABEL: @g
+
+define i32 @f(i8* %p) nounwind {
+entry:
+  %0 = load i8, i8* %p
+  %conv = sext i8 %0 to i32
+  ret i32 %conv
+}
+
+define i64 @g(i32 %i) nounwind {
+entry:
+  br label %if.then
+
+if.then:
+  %0 = alloca i8, i32 %i
+  call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 %i, i32 1, i1 false)
+  %call = call i32 @f(i8* %0)
+  %conv = sext i32 %call to i64
+  ret i64 %conv
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Generic/MachineBranchProb.ll b/test/CodeGen/Generic/MachineBranchProb.ll
index 8288e45ee509..ae3c8da21471 100644
--- a/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/test/CodeGen/Generic/MachineBranchProb.ll
@@ -16,11 +16,11 @@ entry:
     i64 5, label %sw.bb1
   ], !prof !0
 ; CHECK: BB#0: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#2(64) BB#4(14)
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}75.29%) BB#4({{[0-9a-fx/= ]+}}24.71%)
 ; CHECK: BB#4: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(10) BB#5(4)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}47.62%) BB#5({{[0-9a-fx/= ]+}}52.38%)
 ; CHECK: BB#5: derived from LLVM BB %entry
-; CHECK: Successors according to CFG: BB#1(4) BB#3(7)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}36.36%) BB#3({{[0-9a-fx/= ]+}}63.64%)
 
 sw.bb:
   br label %return
@@ -62,7 +62,7 @@ return: ret void
 ; CHECK-LABEL: Machine code for function left_leaning_weight_balanced_tree:
 ; CHECK: BB#0: derived from LLVM BB %entry
 ; CHECK-NOT: Successors
-; CHECK: Successors according to CFG: BB#8(13) BB#9(20)
+; CHECK: Successors according to CFG: BB#8({{[0-9a-fx/= ]+}}39.71%) BB#9({{[0-9a-fx/= ]+}}60.29%)
 }
 
 !1 = !{!"branch_weights",
diff --git a/test/CodeGen/Generic/dbg_value.ll b/test/CodeGen/Generic/dbg_value.ll
index 4038086cbb4e..e96458862298 100644
--- a/test/CodeGen/Generic/dbg_value.ll
+++ b/test/CodeGen/Generic/dbg_value.ll
@@ -4,11 +4,12 @@
 %0 = type { i32, i32 }
 
 define void @t(%0*, i32, i32, i32, i32) nounwind {
-  tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+  tail call void @llvm.dbg.value(metadata %0* %0, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   unreachable
 }
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
 ; !0 should conform to the format of DIVariable.
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 0, scope: !DISubprogram())
+!0 = !DILocalVariable(name: "a", arg: 1, scope: !1)
+!1 = distinct !DISubprogram()
diff --git a/test/CodeGen/Generic/lit.local.cfg b/test/CodeGen/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f22d4aabd730
--- /dev/null
+++ b/test/CodeGen/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if not config.target_triple:
+    config.unsupported = True
+
diff --git a/test/CodeGen/Generic/overloaded-intrinsic-name.ll b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
index 979bc772f75f..65fc9c1184cf 100644
--- a/test/CodeGen/Generic/overloaded-intrinsic-name.ll
+++ b/test/CodeGen/Generic/overloaded-intrinsic-name.ll
@@ -13,29 +13,29 @@
 
 ; function and integer
 define i32* @test_iAny(i32* %v) gc "statepoint-example" {
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %v)
-       %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %v)
+       %v-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok,  i32 7, i32 7)
        ret i32* %v-new
 }
 
 ; float
 define float* @test_fAny(float* %v) gc "statepoint-example" {
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, float* %v)
-       %v-new = call float* @llvm.experimental.gc.relocate.p0f32(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, float* %v)
+       %v-new = call float* @llvm.experimental.gc.relocate.p0f32(token %tok,  i32 7, i32 7)
        ret float* %v-new
 }
 
 ; array of integers
 define [3 x i32]* @test_aAny([3 x i32]* %v) gc "statepoint-example" {
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %v)
-       %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %v)
+       %v-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok,  i32 7, i32 7)
        ret [3 x i32]* %v-new
 }
 
 ; vector of integers
 define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, <3 x i32>* %v)
-       %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, <3 x i32>* %v)
+       %v-new = call <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(token %tok,  i32 7, i32 7)
        ret <3 x i32>* %v-new
 }
 
@@ -43,15 +43,15 @@ define <3 x i32>* @test_vAny(<3 x i32>* %v) gc "statepoint-example" {
 
 ; struct
 define %struct.test* @test_struct(%struct.test* %v) gc "statepoint-example" {
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, %struct.test* %v)
-       %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, %struct.test* %v)
+       %v-new = call %struct.test* @llvm.experimental.gc.relocate.p0struct.test(token %tok,  i32 7, i32 7)
        ret %struct.test* %v-new
 }
 
 declare zeroext i1 @return_i1()
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
-declare float* @llvm.experimental.gc.relocate.p0f32(i32, i32, i32)
-declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
-declare <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(i32, i32, i32)
-declare %struct.test* @llvm.experimental.gc.relocate.p0struct.test(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare float* @llvm.experimental.gc.relocate.p0f32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)
+declare <3 x i32>* @llvm.experimental.gc.relocate.p0v3i32(token, i32, i32)
+declare %struct.test* @llvm.experimental.gc.relocate.p0struct.test(token, i32, i32)
diff --git a/test/CodeGen/Generic/vector.ll b/test/CodeGen/Generic/vector.ll
index 962b1295b5de..2d4dc501a53a 100644
--- a/test/CodeGen/Generic/vector.ll
+++ b/test/CodeGen/Generic/vector.ll
@@ -156,3 +156,9 @@ define <2 x i32*> @vector_gep(<2 x [3 x {i32, i32}]*> %a) {
     %w = getelementptr [3 x {i32, i32}], <2 x [3 x {i32, i32}]*> %a, <2 x i32> <i32 1, i32 2>, <2 x i32> <i32 2, i32 3>, <2 x i32> <i32 1, i32 1>
       ret <2 x i32*> %w
 }
+
+define i32 @extractelt_constant_bitcast() {
+  %1 = bitcast i64 4 to <2 x i32>
+  %2 = extractelement <2 x i32> %1, i32 0
+  ret i32 %2
+}
diff --git a/test/CodeGen/Hexagon/NVJumpCmp.ll b/test/CodeGen/Hexagon/NVJumpCmp.ll
new file mode 100644
index 000000000000..6b160d962ebb
--- /dev/null
+++ b/test/CodeGen/Hexagon/NVJumpCmp.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60  < %s | FileCheck %s
+
+; Look for an instruction, we really just do not want to see an abort.
+; CHECK: trace_event
+; REQUIRES: asserts
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Function Attrs: nounwind
+define void @_ZN6Halide7Runtime8Internal13default_traceEPvPK18halide_trace_event() #0 {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %if.then
+  br i1 undef, label %while.cond, label %while.end
+
+while.end:                                        ; preds = %while.cond
+  %add = add i32 undef, 48
+  br i1 undef, label %if.end, label %if.then17
+
+if.then17:                                        ; preds = %while.end
+  unreachable
+
+if.end:                                           ; preds = %while.end
+  %arrayidx21 = getelementptr inbounds [4096 x i8], [4096 x i8]* undef, i32 0, i32 8
+  store i8 undef, i8* %arrayidx21, align 4, !tbaa !1
+  br i1 undef, label %for.body42.preheader6, label %min.iters.checked
+
+for.body42.preheader6:                            ; preds = %vector.body.preheader, %min.iters.checked, %if.end
+  unreachable
+
+min.iters.checked:                                ; preds = %if.end
+  br i1 undef, label %for.body42.preheader6, label %vector.body.preheader
+
+vector.body.preheader:                            ; preds = %min.iters.checked
+  br i1 undef, label %for.cond48.preheader, label %for.body42.preheader6
+
+for.cond48.preheader:                             ; preds = %vector.body.preheader
+  br i1 undef, label %while.cond.i, label %for.body61.lr.ph
+
+for.body61.lr.ph:                                 ; preds = %for.cond48.preheader
+  br i1 undef, label %for.body61, label %min.iters.checked595
+
+min.iters.checked595:                             ; preds = %for.body61.lr.ph
+  br i1 undef, label %for.body61, label %vector.memcheck608
+
+vector.memcheck608:                               ; preds = %min.iters.checked595
+  %scevgep600 = getelementptr [4096 x i8], [4096 x i8]* undef, i32 0, i32 %add
+  %bound0604 = icmp ule i8* %scevgep600, undef
+  %memcheck.conflict607 = and i1 undef, %bound0604
+  br i1 %memcheck.conflict607, label %for.body61, label %vector.body590
+
+vector.body590:                                   ; preds = %vector.body590, %vector.memcheck608
+  br i1 undef, label %middle.block591, label %vector.body590, !llvm.loop !4
+
+middle.block591:                                  ; preds = %vector.body590
+  %cmp.n613 = icmp eq i32 undef, 0
+  br i1 %cmp.n613, label %while.cond.i, label %for.body61
+
+while.cond.i:                                     ; preds = %for.body61, %while.cond.i, %middle.block591, %for.cond48.preheader
+  br i1 undef, label %_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit, label %while.cond.i
+
+_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit: ; preds = %while.cond.i
+  unreachable
+
+for.body61:                                       ; preds = %for.body61, %middle.block591, %vector.memcheck608, %min.iters.checked595, %for.body61.lr.ph
+  %cmp59 = icmp ult i32 undef, undef
+  br i1 %cmp59, label %for.body61, label %while.cond.i, !llvm.loop !7
+
+if.else:                                          ; preds = %entry
+  unreachable
+}
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"halide_mattrs", !"+hvx"}
+!1 = !{!2, !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C/C++ TBAA"}
+!4 = distinct !{!4, !5, !6}
+!5 = !{!"llvm.loop.vectorize.width", i32 1}
+!6 = !{!"llvm.loop.interleave.count", i32 1}
+!7 = distinct !{!7, !5, !6}
diff --git a/test/CodeGen/Hexagon/absaddr-store.ll b/test/CodeGen/Hexagon/absaddr-store.ll
index dac8607d88db..f4e97d22e7d2 100644
--- a/test/CodeGen/Hexagon/absaddr-store.ll
+++ b/test/CodeGen/Hexagon/absaddr-store.ll
@@ -1,5 +1,6 @@
 ; RUN: llc -march=hexagon -hexagon-small-data-threshold=0 < %s | FileCheck %s
 ; Check that we generate load instructions with absolute addressing mode.
+; XFAIL: *
 
 @a0 = external global i32
 @a1 = external global i32
diff --git a/test/CodeGen/Hexagon/adde.ll b/test/CodeGen/Hexagon/adde.ll
index 7b29e7ad8a0f..4a88914dc6cb 100644
--- a/test/CodeGen/Hexagon/adde.ll
+++ b/test/CodeGen/Hexagon/adde.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 < %s | FileCheck %s
 
-; CHECK: r{{[0-9]+:[0-9]+}} = #1
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #1)
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #0)
 ; CHECK: r{{[0-9]+:[0-9]+}} = add(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
 ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
 ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
diff --git a/test/CodeGen/Hexagon/alu64.ll b/test/CodeGen/Hexagon/alu64.ll
index d0824a4ecadc..f986f1359374 100644
--- a/test/CodeGen/Hexagon/alu64.ll
+++ b/test/CodeGen/Hexagon/alu64.ll
@@ -1,7 +1,7 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -O0 < %s | FileCheck %s
 
 ; CHECK-LABEL: @test00
-; CHECK: p0 = cmp.eq(r1:0, r3:2)
+; CHECK: = cmp.eq(r1:0, r3:2)
 define i32 @test00(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.C2.cmpeqp(i64 %Rs, i64 %Rt)
@@ -9,7 +9,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test01
-; CHECK: p0 = cmp.gt(r1:0, r3:2)
+; CHECK: = cmp.gt(r1:0, r3:2)
 define i32 @test01(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.C2.cmpgtp(i64 %Rs, i64 %Rt)
@@ -17,7 +17,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test02
-; CHECK: p0 = cmp.gtu(r1:0, r3:2)
+; CHECK: = cmp.gtu(r1:0, r3:2)
 define i32 @test02(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.C2.cmpgtup(i64 %Rs, i64 %Rt)
@@ -25,7 +25,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test10
-; CHECK: r0 = cmp.eq(r0, r1)
+; CHECK: = cmp.eq(r0, r1)
 define i32 @test10(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.rcmpeq(i32 %Rs, i32 %Rt)
@@ -33,7 +33,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test11
-; CHECK: r0 = !cmp.eq(r0, r1)
+; CHECK: = !cmp.eq(r0, r1)
 define i32 @test11(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.rcmpneq(i32 %Rs, i32 %Rt)
@@ -41,7 +41,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test12
-; CHECK: r0 = cmp.eq(r0, #23)
+; CHECK: = cmp.eq(r0, #23)
 define i32 @test12(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.rcmpeqi(i32 %Rs, i32 23)
@@ -49,7 +49,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test13
-; CHECK: r0 = !cmp.eq(r0, #47)
+; CHECK: = !cmp.eq(r0, #47)
 define i32 @test13(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.rcmpneqi(i32 %Rs, i32 47)
@@ -57,7 +57,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test20
-; CHECK: p0 = cmpb.eq(r0, r1)
+; CHECK: = cmpb.eq(r0, r1)
 define i32 @test20(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbeq(i32 %Rs, i32 %Rt)
@@ -65,7 +65,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test21
-; CHECK: p0 = cmpb.gt(r0, r1)
+; CHECK: = cmpb.gt(r0, r1)
 define i32 @test21(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbgt(i32 %Rs, i32 %Rt)
@@ -73,7 +73,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test22
-; CHECK: p0 = cmpb.gtu(r0, r1)
+; CHECK: = cmpb.gtu(r0, r1)
 define i32 @test22(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbgtu(i32 %Rs, i32 %Rt)
@@ -81,7 +81,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test23
-; CHECK: p0 = cmpb.eq(r0, #56)
+; CHECK: = cmpb.eq(r0, #56)
 define i32 @test23(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbeqi(i32 %Rs, i32 56)
@@ -89,7 +89,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test24
-; CHECK: p0 = cmpb.gt(r0, #29)
+; CHECK: = cmpb.gt(r0, #29)
 define i32 @test24(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbgti(i32 %Rs, i32 29)
@@ -97,7 +97,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test25
-; CHECK: p0 = cmpb.gtu(r0, #111)
+; CHECK: = cmpb.gtu(r0, #111)
 define i32 @test25(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpbgtui(i32 %Rs, i32 111)
@@ -105,7 +105,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test30
-; CHECK: p0 = cmph.eq(r0, r1)
+; CHECK: = cmph.eq(r0, r1)
 define i32 @test30(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpheq(i32 %Rs, i32 %Rt)
@@ -113,7 +113,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test31
-; CHECK: p0 = cmph.gt(r0, r1)
+; CHECK: = cmph.gt(r0, r1)
 define i32 @test31(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmphgt(i32 %Rs, i32 %Rt)
@@ -121,7 +121,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test32
-; CHECK: p0 = cmph.gtu(r0, r1)
+; CHECK: = cmph.gtu(r0, r1)
 define i32 @test32(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmphgtu(i32 %Rs, i32 %Rt)
@@ -129,7 +129,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test33
-; CHECK: p0 = cmph.eq(r0, #-123)
+; CHECK: = cmph.eq(r0, #-123)
 define i32 @test33(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmpheqi(i32 %Rs, i32 -123)
@@ -137,7 +137,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test34
-; CHECK: p0 = cmph.gt(r0, #-3)
+; CHECK: = cmph.gt(r0, #-3)
 define i32 @test34(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmphgti(i32 %Rs, i32 -3)
@@ -145,7 +145,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test35
-; CHECK: p0 = cmph.gtu(r0, #13)
+; CHECK: = cmph.gtu(r0, #13)
 define i32 @test35(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.cmphgtui(i32 %Rs, i32 13)
@@ -153,7 +153,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test40
-; CHECK: r1:0 = vmux(p0, r3:2, r5:4)
+; CHECK: = vmux(p0, r3:2, r5:4)
 define i64 @test40(i32 %Pu, i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.C2.vmux(i32 %Pu, i64 %Rs, i64 %Rt)
@@ -161,7 +161,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test41
-; CHECK: p0 = any8(vcmpb.eq(r1:0, r3:2))
+; CHECK: = any8(vcmpb.eq(r1:0, r3:2))
 define i32 @test41(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.vcmpbeq.any(i64 %Rs, i64 %Rt)
@@ -169,7 +169,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test50
-; CHECK: r1:0 = add(r1:0, r3:2)
+; CHECK: = add(r1:0, r3:2)
 define i64 @test50(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.addp(i64 %Rs, i64 %Rt)
@@ -177,7 +177,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test51
-; CHECK: r1:0 = add(r1:0, r3:2):sat
+; CHECK: = add(r1:0, r3:2):sat
 define i64 @test51(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.addpsat(i64 %Rs, i64 %Rt)
@@ -185,7 +185,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test52
-; CHECK: r1:0 = sub(r1:0, r3:2)
+; CHECK: = sub(r1:0, r3:2)
 define i64 @test52(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.subp(i64 %Rs, i64 %Rt)
@@ -193,7 +193,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test53
-; CHECK: r1:0 = add(r0, r3:2)
+; CHECK: = add(r1:0, r3:2):raw:
 define i64 @test53(i32 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.addsp(i32 %Rs, i64 %Rt)
@@ -201,7 +201,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test54
-; CHECK: r1:0 = and(r1:0, r3:2)
+; CHECK: = and(r1:0, r3:2)
 define i64 @test54(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.andp(i64 %Rs, i64 %Rt)
@@ -209,7 +209,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test55
-; CHECK: r1:0 = or(r1:0, r3:2)
+; CHECK: = or(r1:0, r3:2)
 define i64 @test55(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.orp(i64 %Rs, i64 %Rt)
@@ -217,7 +217,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test56
-; CHECK: r1:0 = xor(r1:0, r3:2)
+; CHECK: = xor(r1:0, r3:2)
 define i64 @test56(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A2.xorp(i64 %Rs, i64 %Rt)
@@ -225,7 +225,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test57
-; CHECK: r1:0 = and(r1:0, ~r3:2)
+; CHECK: = and(r1:0, ~r3:2)
 define i64 @test57(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A4.andnp(i64 %Rs, i64 %Rt)
@@ -233,7 +233,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test58
-; CHECK: r1:0 = or(r1:0, ~r3:2)
+; CHECK: = or(r1:0, ~r3:2)
 define i64 @test58(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A4.ornp(i64 %Rs, i64 %Rt)
@@ -241,7 +241,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test60
-; CHECK: r0 = add(r0.l, r1.l)
+; CHECK: = add(r0.l, r1.l)
 define i32 @test60(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.l16.ll(i32 %Rs, i32 %Rt)
@@ -249,7 +249,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test61
-; CHECK: r0 = add(r0.l, r1.h)
+; CHECK: = add(r0.l, r1.h)
 define i32 @test61(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.l16.hl(i32 %Rs, i32 %Rt)
@@ -257,7 +257,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test62
-; CHECK: r0 = add(r0.l, r1.l):sat
+; CHECK: = add(r0.l, r1.l):sat
 define i32 @test62(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.ll(i32 %Rs, i32 %Rt)
@@ -265,7 +265,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test63
-; CHECK: r0 = add(r0.l, r1.h):sat
+; CHECK: = add(r0.l, r1.h):sat
 define i32 @test63(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.l16.sat.hl(i32 %Rs, i32 %Rt)
@@ -273,7 +273,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test64
-; CHECK: r0 = add(r0.l, r1.l):<<16
+; CHECK: = add(r0.l, r1.l):<<16
 define i32 @test64(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.ll(i32 %Rs, i32 %Rt)
@@ -281,7 +281,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test65
-; CHECK: r0 = add(r0.l, r1.h):<<16
+; CHECK: = add(r0.l, r1.h):<<16
 define i32 @test65(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.lh(i32 %Rs, i32 %Rt)
@@ -289,7 +289,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test66
-; CHECK: r0 = add(r0.h, r1.l):<<16
+; CHECK: = add(r0.h, r1.l):<<16
 define i32 @test66(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hl(i32 %Rs, i32 %Rt)
@@ -297,7 +297,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test67
-; CHECK: r0 = add(r0.h, r1.h):<<16
+; CHECK: = add(r0.h, r1.h):<<16
 define i32 @test67(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.hh(i32 %Rs, i32 %Rt)
@@ -305,7 +305,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test68
-; CHECK: r0 = add(r0.l, r1.l):sat:<<16
+; CHECK: = add(r0.l, r1.l):sat:<<16
 define i32 @test68(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.ll(i32 %Rs, i32 %Rt)
@@ -313,7 +313,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test69
-; CHECK: r0 = add(r0.l, r1.h):sat:<<16
+; CHECK: = add(r0.l, r1.h):sat:<<16
 define i32 @test69(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.lh(i32 %Rs, i32 %Rt)
@@ -321,7 +321,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test6A
-; CHECK: r0 = add(r0.h, r1.l):sat:<<16
+; CHECK: = add(r0.h, r1.l):sat:<<16
 define i32 @test6A(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hl(i32 %Rs, i32 %Rt)
@@ -329,7 +329,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test6B
-; CHECK: r0 = add(r0.h, r1.h):sat:<<16
+; CHECK: = add(r0.h, r1.h):sat:<<16
 define i32 @test6B(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.addh.h16.sat.hh(i32 %Rs, i32 %Rt)
@@ -337,7 +337,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test70
-; CHECK: r0 = sub(r0.l, r1.l)
+; CHECK: = sub(r0.l, r1.l)
 define i32 @test70(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.l16.ll(i32 %Rs, i32 %Rt)
@@ -345,7 +345,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test71
-; CHECK: r0 = sub(r0.l, r1.h)
+; CHECK: = sub(r0.l, r1.h)
 define i32 @test71(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.l16.hl(i32 %Rs, i32 %Rt)
@@ -353,7 +353,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test72
-; CHECK: r0 = sub(r0.l, r1.l):sat
+; CHECK: = sub(r0.l, r1.l):sat
 define i32 @test72(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.ll(i32 %Rs, i32 %Rt)
@@ -361,7 +361,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test73
-; CHECK: r0 = sub(r0.l, r1.h):sat
+; CHECK: = sub(r0.l, r1.h):sat
 define i32 @test73(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.l16.sat.hl(i32 %Rs, i32 %Rt)
@@ -369,7 +369,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test74
-; CHECK: r0 = sub(r0.l, r1.l):<<16
+; CHECK: = sub(r0.l, r1.l):<<16
 define i32 @test74(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.ll(i32 %Rs, i32 %Rt)
@@ -377,7 +377,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test75
-; CHECK: r0 = sub(r0.l, r1.h):<<16
+; CHECK: = sub(r0.l, r1.h):<<16
 define i32 @test75(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.lh(i32 %Rs, i32 %Rt)
@@ -385,7 +385,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test76
-; CHECK: r0 = sub(r0.h, r1.l):<<16
+; CHECK: = sub(r0.h, r1.l):<<16
 define i32 @test76(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hl(i32 %Rs, i32 %Rt)
@@ -393,7 +393,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test77
-; CHECK: r0 = sub(r0.h, r1.h):<<16
+; CHECK: = sub(r0.h, r1.h):<<16
 define i32 @test77(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.hh(i32 %Rs, i32 %Rt)
@@ -401,7 +401,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test78
-; CHECK: r0 = sub(r0.l, r1.l):sat:<<16
+; CHECK: = sub(r0.l, r1.l):sat:<<16
 define i32 @test78(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.ll(i32 %Rs, i32 %Rt)
@@ -409,7 +409,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test79
-; CHECK: r0 = sub(r0.l, r1.h):sat:<<16
+; CHECK: = sub(r0.l, r1.h):sat:<<16
 define i32 @test79(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.lh(i32 %Rs, i32 %Rt)
@@ -417,7 +417,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test7A
-; CHECK: r0 = sub(r0.h, r1.l):sat:<<16
+; CHECK: = sub(r0.h, r1.l):sat:<<16
 define i32 @test7A(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hl(i32 %Rs, i32 %Rt)
@@ -425,7 +425,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test7B
-; CHECK: r0 = sub(r0.h, r1.h):sat:<<16
+; CHECK: = sub(r0.h, r1.h):sat:<<16
 define i32 @test7B(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A2.subh.h16.sat.hh(i32 %Rs, i32 %Rt)
@@ -433,7 +433,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test90
-; CHECK: r0 = and(#1, asl(r0, #2))
+; CHECK: = and(#1, asl(r0, #2))
 define i32 @test90(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.andi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -441,7 +441,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test91
-; CHECK: r0 = or(#1, asl(r0, #2))
+; CHECK: = or(#1, asl(r0, #2))
 define i32 @test91(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.ori.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -449,7 +449,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test92
-; CHECK: r0 = add(#1, asl(r0, #2))
+; CHECK: = add(#1, asl(r0, #2))
 define i32 @test92(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.addi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -457,7 +457,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test93
-; CHECK: r0 = sub(#1, asl(r0, #2))
+; CHECK: = sub(#1, asl(r0, #2))
 define i32 @test93(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.subi.asl.ri(i32 1, i32 %Rs, i32 2)
@@ -465,7 +465,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test94
-; CHECK: r0 = and(#1, lsr(r0, #2))
+; CHECK: = and(#1, lsr(r0, #2))
 define i32 @test94(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.andi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -473,7 +473,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test95
-; CHECK: r0 = or(#1, lsr(r0, #2))
+; CHECK: = or(#1, lsr(r0, #2))
 define i32 @test95(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.ori.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -481,7 +481,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test96
-; CHECK: r0 = add(#1, lsr(r0, #2))
+; CHECK: = add(#1, lsr(r0, #2))
 define i32 @test96(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.addi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -489,7 +489,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test97
-; CHECK: r0 = sub(#1, lsr(r0, #2))
+; CHECK: = sub(#1, lsr(r0, #2))
 define i32 @test97(i32 %Rs) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 1, i32 %Rs, i32 2)
@@ -497,7 +497,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test100
-; CHECK: r1:0 = bitsplit(r0, r1)
+; CHECK: = bitsplit(r0, r1)
 define i64 @test100(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i64 @llvm.hexagon.A4.bitsplit(i32 %Rs, i32 %Rt)
@@ -505,7 +505,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test101
-; CHECK: r0 = modwrap(r0, r1)
+; CHECK: = modwrap(r0, r1)
 define i32 @test101(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %Rs, i32 %Rt)
@@ -513,7 +513,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test102
-; CHECK: r0 = parity(r1:0, r3:2)
+; CHECK: = parity(r1:0, r3:2)
 define i32 @test102(i64 %Rs, i64 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S2.parityp(i64 %Rs, i64 %Rt)
@@ -521,7 +521,7 @@ entry:
 }
 
 ; CHECK-LABEL: @test103
-; CHECK: r0 = parity(r0, r1)
+; CHECK: = parity(r0, r1)
 define i32 @test103(i32 %Rs, i32 %Rt) #0 {
 entry:
   %0 = tail call i32 @llvm.hexagon.S4.parity(i32 %Rs, i32 %Rt)
diff --git a/test/CodeGen/Hexagon/bit-eval.ll b/test/CodeGen/Hexagon/bit-eval.ll
new file mode 100644
index 000000000000..1d2be5bfc19d
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-eval.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; CHECK-LABEL: test1:
+; CHECK: r0 = ##1073741824
+define i32 @test1() #0 {
+entry:
+  %0 = tail call i32 @llvm.hexagon.S2.asr.i.r.rnd(i32 2147483647, i32 0)
+  ret i32 %0
+}
+
+; CHECK-LABEL: test2:
+; CHECK: r0 = ##1073741824
+define i32 @test2() #0 {
+entry:
+  %0 = tail call i32 @llvm.hexagon.S2.asr.i.r.rnd.goodsyntax(i32 2147483647, i32 1)
+  ret i32 %0
+}
+
+; CHECK-LABEL: test3:
+; CHECK: r1:0 = combine(#0, #1)
+define i64 @test3() #0 {
+entry:
+  %0 = tail call i64 @llvm.hexagon.S4.extractp(i64 -1, i32 63, i32 63)
+  ret i64 %0
+}
+
+; CHECK-LABEL: test4:
+; CHECK: r0 = #1
+define i32 @test4() #0 {
+entry:
+  %0 = tail call i32 @llvm.hexagon.S4.extract(i32 -1, i32 31, i32 31)
+  ret i32 %0
+}
+
+; CHECK-LABEL: test5:
+; CHECK: r0 = ##-1073741569
+define i32 @test5() #0 {
+entry:
+  %0 = tail call i32 @llvm.hexagon.S4.subi.lsr.ri(i32 255, i32 -2147483648, i32 1)
+  ret i32 %0
+}
+
+declare i32 @llvm.hexagon.S2.asr.i.r.rnd(i32, i32) #0
+declare i32 @llvm.hexagon.S2.asr.i.r.rnd.goodsyntax(i32, i32) #0
+declare i64 @llvm.hexagon.S4.extractp(i64, i32, i32) #0
+declare i32 @llvm.hexagon.S4.extract(i32, i32, i32) #0
+declare i32 @llvm.hexagon.S4.subi.lsr.ri(i32, i32, i32) #0
+
+attributes #0 = { nounwind readnone }
+
diff --git a/test/CodeGen/Hexagon/bit-loop.ll b/test/CodeGen/Hexagon/bit-loop.ll
new file mode 100644
index 000000000000..74a1a276115d
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-loop.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s | FileCheck %s
+; CHECK-DAG: memh(r{{[0-9]+}}+#0) = r{{[0-9]+}}
+; CHECK-DAG: memh(r{{[0-9]+}}+#2) = r{{[0-9]+}}.h
+; CHECK-DAG: memh(r{{[0-9]+}}+#4) = r{{[0-9]+}}
+; CHECK-DAG: memh(r{{[0-9]+}}+#6) = r{{[0-9]+}}.h
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define void @foo(i64* nocapture readonly %r64, i16 zeroext %n, i16 zeroext %s, i64* nocapture %p64) #0 {
+entry:
+  %conv = zext i16 %n to i32
+  %cmp = icmp eq i16 %n, 0
+  br i1 %cmp, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %0 = load i64, i64* %r64, align 8, !tbaa !1
+  %v.sroa.0.0.extract.trunc = trunc i64 %0 to i16
+  %v.sroa.4.0.extract.shift = lshr i64 %0, 16
+  %v.sroa.4.0.extract.trunc = trunc i64 %v.sroa.4.0.extract.shift to i16
+  %v.sroa.5.0.extract.shift = lshr i64 %0, 32
+  %v.sroa.5.0.extract.trunc = trunc i64 %v.sroa.5.0.extract.shift to i16
+  %v.sroa.6.0.extract.shift = lshr i64 %0, 48
+  %v.sroa.6.0.extract.trunc = trunc i64 %v.sroa.6.0.extract.shift to i16
+  %1 = bitcast i64* %p64 to i16*
+  %conv2 = zext i16 %s to i32
+  %add.ptr = getelementptr inbounds i16, i16* %1, i32 %conv2
+  %add.ptr.sum = add nuw nsw i32 %conv2, 1
+  %add.ptr3 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum
+  %add.ptr.sum50 = add nuw nsw i32 %conv2, 2
+  %add.ptr4 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum50
+  %add.ptr.sum51 = add nuw nsw i32 %conv2, 3
+  %add.ptr5 = getelementptr inbounds i16, i16* %1, i32 %add.ptr.sum51
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %add.ptr11.phi = phi i16* [ %add.ptr11.inc, %for.body ], [ %add.ptr, %for.body.preheader ]
+  %add.ptr16.phi = phi i16* [ %add.ptr16.inc, %for.body ], [ %add.ptr3, %for.body.preheader ]
+  %add.ptr21.phi = phi i16* [ %add.ptr21.inc, %for.body ], [ %add.ptr4, %for.body.preheader ]
+  %add.ptr26.phi = phi i16* [ %add.ptr26.inc, %for.body ], [ %add.ptr5, %for.body.preheader ]
+  %i.058.pmt = phi i32 [ %inc.pmt, %for.body ], [ 0, %for.body.preheader ]
+  %v.sroa.0.157 = phi i16 [ %v.sroa.0.0.extract.trunc34, %for.body ], [ %v.sroa.0.0.extract.trunc, %for.body.preheader ]
+  %v.sroa.4.156 = phi i16 [ %v.sroa.4.0.extract.trunc36, %for.body ], [ %v.sroa.4.0.extract.trunc, %for.body.preheader ]
+  %v.sroa.5.155 = phi i16 [ %v.sroa.5.0.extract.trunc38, %for.body ], [ %v.sroa.5.0.extract.trunc, %for.body.preheader ]
+  %v.sroa.6.154 = phi i16 [ %v.sroa.6.0.extract.trunc40, %for.body ], [ %v.sroa.6.0.extract.trunc, %for.body.preheader ]
+  %q64.153.pn = phi i64* [ %q64.153, %for.body ], [ %r64, %for.body.preheader ]
+  %q64.153 = getelementptr inbounds i64, i64* %q64.153.pn, i32 1
+  store i16 %v.sroa.0.157, i16* %add.ptr11.phi, align 2, !tbaa !5
+  store i16 %v.sroa.4.156, i16* %add.ptr16.phi, align 2, !tbaa !5
+  store i16 %v.sroa.5.155, i16* %add.ptr21.phi, align 2, !tbaa !5
+  store i16 %v.sroa.6.154, i16* %add.ptr26.phi, align 2, !tbaa !5
+  %2 = load i64, i64* %q64.153, align 8, !tbaa !1
+  %v.sroa.0.0.extract.trunc34 = trunc i64 %2 to i16
+  %v.sroa.4.0.extract.shift35 = lshr i64 %2, 16
+  %v.sroa.4.0.extract.trunc36 = trunc i64 %v.sroa.4.0.extract.shift35 to i16
+  %v.sroa.5.0.extract.shift37 = lshr i64 %2, 32
+  %v.sroa.5.0.extract.trunc38 = trunc i64 %v.sroa.5.0.extract.shift37 to i16
+  %v.sroa.6.0.extract.shift39 = lshr i64 %2, 48
+  %v.sroa.6.0.extract.trunc40 = trunc i64 %v.sroa.6.0.extract.shift39 to i16
+  %inc.pmt = add i32 %i.058.pmt, 1
+  %cmp8 = icmp slt i32 %inc.pmt, %conv
+  %add.ptr11.inc = getelementptr i16, i16* %add.ptr11.phi, i32 4
+  %add.ptr16.inc = getelementptr i16, i16* %add.ptr16.phi, i32 4
+  %add.ptr21.inc = getelementptr i16, i16* %add.ptr21.phi, i32 4
+  %add.ptr26.inc = getelementptr i16, i16* %add.ptr26.phi, i32 4
+  br i1 %cmp8, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"long long", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"short", !3, i64 0}
diff --git a/test/CodeGen/Hexagon/cfi-late.ll b/test/CodeGen/Hexagon/cfi-late.ll
new file mode 100644
index 000000000000..ce38711ae8d7
--- /dev/null
+++ b/test/CodeGen/Hexagon/cfi-late.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=hexagon -enable-misched=false < %s | FileCheck %s
+; This testcase causes the scheduler to crash for some reason. Disable
+; it for now.
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Check that allocframe was packetized with the two adds.
+; CHECK: foo:
+; CHECK: {
+; CHECK-DAG: allocframe
+; CHECK-DAG: add
+; CHECK-DAG: add
+; CHECK: }
+; CHECK: dealloc_return
+; CHECK: }
+
+; Function Attrs: nounwind
+define i32 @foo(i32 %x, i32 %y) #0 !dbg !4 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !10, metadata !14), !dbg !16
+  %add = add nsw i32 %x, 1, !dbg !17
+  %add1 = add nsw i32 %y, 1, !dbg !18
+  %call = tail call i32 @bar(i32 %add, i32 %add1) #3, !dbg !19
+  %add2 = add nsw i32 %call, 1, !dbg !20
+  ret i32 %add2, !dbg !21
+}
+
+declare i32 @bar(i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv4" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git 15506a21305e212c406f980ed9b6b1bac785df56)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "cfi-late.c", directory: "/test")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!10 = !DILocalVariable(name: "y", arg: 2, scope: !4, file: !1, line: 3, type: !7)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 15506a21305e212c406f980ed9b6b1bac785df56)"}
+!14 = !DIExpression()
+!15 = !DILocation(line: 3, column: 13, scope: !4)
+!16 = !DILocation(line: 3, column: 20, scope: !4)
+!17 = !DILocation(line: 4, column: 15, scope: !4)
+!18 = !DILocation(line: 4, column: 20, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 24, scope: !4)
+!21 = !DILocation(line: 4, column: 3, scope: !4)
diff --git a/test/CodeGen/Hexagon/clr_set_toggle.ll b/test/CodeGen/Hexagon/clr_set_toggle.ll
index 87c52956129e..4e90f3d99a1e 100644
--- a/test/CodeGen/Hexagon/clr_set_toggle.ll
+++ b/test/CodeGen/Hexagon/clr_set_toggle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5  < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-bit=0 < %s | FileCheck %s
 ; Optimized bitwise operations.
 
 define i32 @my_clrbit(i32 %x) nounwind {
diff --git a/test/CodeGen/Hexagon/combine.ll b/test/CodeGen/Hexagon/combine.ll
index 2e320d977d62..8f5cec88d692 100644
--- a/test/CodeGen/Hexagon/combine.ll
+++ b/test/CodeGen/Hexagon/combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 ; CHECK: combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 @j = external global i32
diff --git a/test/CodeGen/Hexagon/combine_ir.ll b/test/CodeGen/Hexagon/combine_ir.ll
index 634a5c82a916..0d781d8d5d49 100644
--- a/test/CodeGen/Hexagon/combine_ir.ll
+++ b/test/CodeGen/Hexagon/combine_ir.ll
@@ -1,18 +1,8 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
-; CHECK: word
-; CHECK: combine(#0
-
-define void @word(i32* nocapture %a) nounwind {
-entry:
-  %0 = load i32, i32* %a, align 4
-  %1 = zext i32 %0 to i64
-  tail call void @bar(i64 %1) nounwind
-  ret void
-}
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
 
 declare void @bar(i64)
 
-; CHECK: halfword
+; CHECK-LABEL: halfword:
 ; CHECK: combine(#0
 
 define void @halfword(i16* nocapture %a) nounwind {
@@ -28,7 +18,7 @@ entry:
   ret void
 }
 
-; CHECK: byte
+; CHECK-LABEL: byte:
 ; CHECK: combine(#0
 
 define void @byte(i8* nocapture %a) nounwind {
diff --git a/test/CodeGen/Hexagon/early-if-conversion-bug1.ll b/test/CodeGen/Hexagon/early-if-conversion-bug1.ll
new file mode 100644
index 000000000000..6739b03985dd
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-conversion-bug1.ll
@@ -0,0 +1,412 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; we do not want to see a segv.
+; CHECK-NOT: segmentation
+; CHECK: call
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+%"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair" }
+%"class.std::__1::__compressed_pair" = type { %"class.std::__1::__libcpp_compressed_pair_imp" }
+%"class.std::__1::__libcpp_compressed_pair_imp" = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep" = type { %union.anon }
+%union.anon = type { %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long" = type { i32, i32, i8* }
+%"class.std::__1::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, i8*, i8*, void (i8, %"class.std::__1::ios_base"*, i32)**, i32*, i32, i32, i32*, i32, i32, i8**, i32, i32 }
+%"class.std::__1::basic_streambuf" = type { i32 (...)**, %"class.std::__1::locale", i8*, i8*, i8*, i8*, i8*, i8* }
+%"class.std::__1::locale" = type { %"class.std::__1::locale::__imp"* }
+%"class.std::__1::locale::__imp" = type opaque
+%"class.std::__1::allocator" = type { i8 }
+%"class.std::__1::ostreambuf_iterator" = type { %"class.std::__1::basic_streambuf"* }
+%"class.std::__1::__basic_string_common" = type { i8 }
+%"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short" = type { %union.anon.0, [11 x i8] }
+%union.anon.0 = type { i8 }
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind
+declare void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"*) #1
+
+define weak_odr hidden i32 @_ZNSt3__116__pad_and_outputIcNS_11char_traitsIcEEEENS_19ostreambuf_iteratorIT_T0_EES6_PKS4_S8_S8_RNS_8ios_baseES4_(i32 %__s.coerce, i8* %__ob, i8* %__op, i8* %__oe, %"class.std::__1::ios_base"* nonnull %__iob, i8 zeroext %__fl) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  %this.addr.i66 = alloca %"class.std::__1::basic_streambuf"*, align 4
+  %__s.addr.i67 = alloca i8*, align 4
+  %__n.addr.i68 = alloca i32, align 4
+  %__p.addr.i.i = alloca i8*, align 4
+  %this.addr.i.i.i13.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+  %this.addr.i.i14.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+  %this.addr.i15.i.i = alloca %"class.std::__1::basic_string"*, align 4
+  %__x.addr.i.i.i.i.i = alloca i8*, align 4
+  %__r.addr.i.i.i.i = alloca i8*, align 4
+  %this.addr.i.i.i4.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+  %this.addr.i.i5.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+  %this.addr.i6.i.i = alloca %"class.std::__1::basic_string"*, align 4
+  %this.addr.i.i.i.i.i56 = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+  %this.addr.i.i.i.i57 = alloca %"class.std::__1::__compressed_pair"*, align 4
+  %this.addr.i.i.i58 = alloca %"class.std::__1::basic_string"*, align 4
+  %this.addr.i.i59 = alloca %"class.std::__1::basic_string"*, align 4
+  %this.addr.i60 = alloca %"class.std::__1::basic_string"*, align 4
+  %this.addr.i.i.i.i.i = alloca %"class.std::__1::allocator"*, align 4
+  %this.addr.i.i.i.i = alloca %"class.std::__1::__libcpp_compressed_pair_imp"*, align 4
+  %this.addr.i.i.i = alloca %"class.std::__1::__compressed_pair"*, align 4
+  %this.addr.i.i = alloca %"class.std::__1::basic_string"*, align 4
+  %__n.addr.i.i = alloca i32, align 4
+  %__c.addr.i.i = alloca i8, align 1
+  %this.addr.i53 = alloca %"class.std::__1::basic_string"*, align 4
+  %__n.addr.i54 = alloca i32, align 4
+  %__c.addr.i = alloca i8, align 1
+  %this.addr.i46 = alloca %"class.std::__1::basic_streambuf"*, align 4
+  %__s.addr.i47 = alloca i8*, align 4
+  %__n.addr.i48 = alloca i32, align 4
+  %this.addr.i44 = alloca %"class.std::__1::basic_streambuf"*, align 4
+  %__s.addr.i = alloca i8*, align 4
+  %__n.addr.i = alloca i32, align 4
+  %this.addr.i41 = alloca %"class.std::__1::ios_base"*, align 4
+  %__wide.addr.i = alloca i32, align 4
+  %__r.i = alloca i32, align 4
+  %this.addr.i = alloca %"class.std::__1::ios_base"*, align 4
+  %retval = alloca %"class.std::__1::ostreambuf_iterator", align 4
+  %__s = alloca %"class.std::__1::ostreambuf_iterator", align 4
+  %__ob.addr = alloca i8*, align 4
+  %__op.addr = alloca i8*, align 4
+  %__oe.addr = alloca i8*, align 4
+  %__iob.addr = alloca %"class.std::__1::ios_base"*, align 4
+  %__fl.addr = alloca i8, align 1
+  %__sz = alloca i32, align 4
+  %__ns = alloca i32, align 4
+  %__np = alloca i32, align 4
+  %__sp = alloca %"class.std::__1::basic_string", align 4
+  %exn.slot = alloca i8*
+  %ehselector.slot = alloca i32
+  %cleanup.dest.slot = alloca i32
+  %coerce.dive = getelementptr %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  %coerce.val.ip = inttoptr i32 %__s.coerce to %"class.std::__1::basic_streambuf"*
+  store %"class.std::__1::basic_streambuf"* %coerce.val.ip, %"class.std::__1::basic_streambuf"** %coerce.dive
+  store i8* %__ob, i8** %__ob.addr, align 4
+  store i8* %__op, i8** %__op.addr, align 4
+  store i8* %__oe, i8** %__oe.addr, align 4
+  store %"class.std::__1::ios_base"* %__iob, %"class.std::__1::ios_base"** %__iob.addr, align 4
+  store i8 %__fl, i8* %__fl.addr, align 1
+  %__sbuf_ = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  %0 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_, align 4
+  %cmp = icmp eq %"class.std::__1::basic_streambuf"* %0, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %1 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+  %2 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %2, i32 4, i32 4, i1 false)
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %3 = load i8*, i8** %__oe.addr, align 4
+  %4 = load i8*, i8** %__ob.addr, align 4
+  %sub.ptr.lhs.cast = ptrtoint i8* %3 to i32
+  %sub.ptr.rhs.cast = ptrtoint i8* %4 to i32
+  %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  store i32 %sub.ptr.sub, i32* %__sz, align 4
+  %5 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %__iob.addr, align 4
+  store %"class.std::__1::ios_base"* %5, %"class.std::__1::ios_base"** %this.addr.i, align 4
+  %this1.i = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %this.addr.i
+  %__width_.i = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i, i32 0, i32 3
+  %6 = load i32, i32* %__width_.i, align 4
+  store i32 %6, i32* %__ns, align 4
+  %7 = load i32, i32* %__ns, align 4
+  %8 = load i32, i32* %__sz, align 4
+  %cmp1 = icmp sgt i32 %7, %8
+  br i1 %cmp1, label %if.then2, label %if.else
+
+if.then2:                                         ; preds = %if.end
+  %9 = load i32, i32* %__sz, align 4
+  %10 = load i32, i32* %__ns, align 4
+  %sub = sub nsw i32 %10, %9
+  store i32 %sub, i32* %__ns, align 4
+  br label %if.end3
+
+if.else:                                          ; preds = %if.end
+  store i32 0, i32* %__ns, align 4
+  br label %if.end3
+
+if.end3:                                          ; preds = %if.else, %if.then2
+  %11 = load i8*, i8** %__op.addr, align 4
+  %12 = load i8*, i8** %__ob.addr, align 4
+  %sub.ptr.lhs.cast4 = ptrtoint i8* %11 to i32
+  %sub.ptr.rhs.cast5 = ptrtoint i8* %12 to i32
+  %sub.ptr.sub6 = sub i32 %sub.ptr.lhs.cast4, %sub.ptr.rhs.cast5
+  store i32 %sub.ptr.sub6, i32* %__np, align 4
+  %13 = load i32, i32* %__np, align 4
+  %cmp7 = icmp sgt i32 %13, 0
+  br i1 %cmp7, label %if.then8, label %if.end15
+
+if.then8:                                         ; preds = %if.end3
+  %__sbuf_9 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  %14 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_9, align 4
+  %15 = load i8*, i8** %__ob.addr, align 4
+  %16 = load i32, i32* %__np, align 4
+  store %"class.std::__1::basic_streambuf"* %14, %"class.std::__1::basic_streambuf"** %this.addr.i46, align 4
+  store i8* %15, i8** %__s.addr.i47, align 4
+  store i32 %16, i32* %__n.addr.i48, align 4
+  %this1.i49 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i46
+  %17 = bitcast %"class.std::__1::basic_streambuf"* %this1.i49 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+  %vtable.i50 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %17
+  %vfn.i51 = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i50, i64 12
+  %18 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i51
+  %19 = load i8*, i8** %__s.addr.i47, align 4
+  %20 = load i32, i32* %__n.addr.i48, align 4
+  %call.i52 = call i32 %18(%"class.std::__1::basic_streambuf"* %this1.i49, i8* %19, i32 %20)
+  %21 = load i32, i32* %__np, align 4
+  %cmp11 = icmp ne i32 %call.i52, %21
+  br i1 %cmp11, label %if.then12, label %if.end14
+
+if.then12:                                        ; preds = %if.then8
+  %__sbuf_13 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_13, align 4
+  %22 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+  %23 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %22, i8* %23, i32 4, i32 4, i1 false)
+  br label %return
+
+if.end14:                                         ; preds = %if.then8
+  br label %if.end15
+
+if.end15:                                         ; preds = %if.end14, %if.end3
+  %24 = load i32, i32* %__ns, align 4
+  %cmp16 = icmp sgt i32 %24, 0
+  br i1 %cmp16, label %if.then17, label %if.end25
+
+if.then17:                                        ; preds = %if.end15
+  %25 = load i32, i32* %__ns, align 4
+  %26 = load i8, i8* %__fl.addr, align 1
+  store %"class.std::__1::basic_string"* %__sp, %"class.std::__1::basic_string"** %this.addr.i53, align 4
+  store i32 %25, i32* %__n.addr.i54, align 4
+  store i8 %26, i8* %__c.addr.i, align 1
+  %this1.i55 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i53
+  %27 = load i32, i32* %__n.addr.i54, align 4
+  %28 = load i8, i8* %__c.addr.i, align 1
+  store %"class.std::__1::basic_string"* %this1.i55, %"class.std::__1::basic_string"** %this.addr.i.i, align 4
+  store i32 %27, i32* %__n.addr.i.i, align 4
+  store i8 %28, i8* %__c.addr.i.i, align 1
+  %this1.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i
+  %29 = bitcast %"class.std::__1::basic_string"* %this1.i.i to %"class.std::__1::__basic_string_common"*
+  %__r_.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i.i, i32 0, i32 0
+  store %"class.std::__1::__compressed_pair"* %__r_.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i, align 4
+  %this1.i.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i
+  %30 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+  store %"class.std::__1::__libcpp_compressed_pair_imp"* %30, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i, align 4
+  %this1.i.i.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i
+  %31 = bitcast %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i to %"class.std::__1::allocator"*
+  store %"class.std::__1::allocator"* %31, %"class.std::__1::allocator"** %this.addr.i.i.i.i.i, align 4
+  %this1.i.i.i.i.i = load %"class.std::__1::allocator"*, %"class.std::__1::allocator"** %this.addr.i.i.i.i.i
+  %__first_.i.i.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i, i32 0, i32 0
+  %32 = load i32, i32* %__n.addr.i.i, align 4
+  %33 = load i8, i8* %__c.addr.i.i, align 1
+  call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEjc(%"class.std::__1::basic_string"* %this1.i.i, i32 %32, i8 zeroext %33)
+  %__sbuf_18 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  %34 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_18, align 4
+  store %"class.std::__1::basic_string"* %__sp, %"class.std::__1::basic_string"** %this.addr.i60, align 4
+  %this1.i61 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i60
+  store %"class.std::__1::basic_string"* %this1.i61, %"class.std::__1::basic_string"** %this.addr.i.i59, align 4
+  %this1.i.i62 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i59
+  store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i.i.i58, align 4
+  %this1.i.i.i63 = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i.i.i58
+  %__r_.i.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i.i.i63, i32 0, i32 0
+  store %"class.std::__1::__compressed_pair"* %__r_.i.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i.i57, align 4
+  %this1.i.i.i.i64 = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i.i.i57
+  %35 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i.i.i64 to %"class.std::__1::__libcpp_compressed_pair_imp"*
+  store %"class.std::__1::__libcpp_compressed_pair_imp"* %35, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i.i56, align 4
+  %this1.i.i.i.i.i65 = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i.i.i56
+  %__first_.i.i.i.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i.i.i65, i32 0, i32 0
+  %36 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i.i.i, i32 0, i32 0
+  %__s.i.i.i = bitcast %union.anon* %36 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"*
+  %37 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"* %__s.i.i.i, i32 0, i32 0
+  %__size_.i.i.i = bitcast %union.anon.0* %37 to i8*
+  %38 = load i8, i8* %__size_.i.i.i, align 1
+  %conv.i.i.i = zext i8 %38 to i32
+  %and.i.i.i = and i32 %conv.i.i.i, 1
+  %tobool.i.i.i = icmp ne i32 %and.i.i.i, 0
+  br i1 %tobool.i.i.i, label %cond.true.i.i, label %cond.false.i.i
+
+cond.true.i.i:                                    ; preds = %if.then17
+  store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i15.i.i, align 4
+  %this1.i16.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i15.i.i
+  %__r_.i17.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i16.i.i, i32 0, i32 0
+  store %"class.std::__1::__compressed_pair"* %__r_.i17.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i14.i.i, align 4
+  %this1.i.i18.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i14.i.i
+  %39 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i18.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+  store %"class.std::__1::__libcpp_compressed_pair_imp"* %39, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i13.i.i, align 4
+  %this1.i.i.i19.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i13.i.i
+  %__first_.i.i.i20.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i19.i.i, i32 0, i32 0
+  %40 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i20.i.i, i32 0, i32 0
+  %__l.i.i.i = bitcast %union.anon* %40 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long"*
+  %__data_.i21.i.i = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__long"* %__l.i.i.i, i32 0, i32 2
+  %41 = load i8*, i8** %__data_.i21.i.i, align 4
+  br label %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+
+cond.false.i.i:                                   ; preds = %if.then17
+  store %"class.std::__1::basic_string"* %this1.i.i62, %"class.std::__1::basic_string"** %this.addr.i6.i.i, align 4
+  %this1.i7.i.i = load %"class.std::__1::basic_string"*, %"class.std::__1::basic_string"** %this.addr.i6.i.i
+  %__r_.i8.i.i = getelementptr inbounds %"class.std::__1::basic_string", %"class.std::__1::basic_string"* %this1.i7.i.i, i32 0, i32 0
+  store %"class.std::__1::__compressed_pair"* %__r_.i8.i.i, %"class.std::__1::__compressed_pair"** %this.addr.i.i5.i.i, align 4
+  %this1.i.i9.i.i = load %"class.std::__1::__compressed_pair"*, %"class.std::__1::__compressed_pair"** %this.addr.i.i5.i.i
+  %42 = bitcast %"class.std::__1::__compressed_pair"* %this1.i.i9.i.i to %"class.std::__1::__libcpp_compressed_pair_imp"*
+  store %"class.std::__1::__libcpp_compressed_pair_imp"* %42, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i4.i.i, align 4
+  %this1.i.i.i10.i.i = load %"class.std::__1::__libcpp_compressed_pair_imp"*, %"class.std::__1::__libcpp_compressed_pair_imp"** %this.addr.i.i.i4.i.i
+  %__first_.i.i.i11.i.i = getelementptr inbounds %"class.std::__1::__libcpp_compressed_pair_imp", %"class.std::__1::__libcpp_compressed_pair_imp"* %this1.i.i.i10.i.i, i32 0, i32 0
+  %43 = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__rep"* %__first_.i.i.i11.i.i, i32 0, i32 0
+  %__s.i12.i.i = bitcast %union.anon* %43 to %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"*
+  %__data_.i.i.i = getelementptr inbounds %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short", %"struct.std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char> >::__short"* %__s.i12.i.i, i32 0, i32 1
+  %arrayidx.i.i.i = getelementptr inbounds [11 x i8], [11 x i8]* %__data_.i.i.i, i32 0, i32 0
+  store i8* %arrayidx.i.i.i, i8** %__r.addr.i.i.i.i, align 4
+  %44 = load i8*, i8** %__r.addr.i.i.i.i, align 4
+  store i8* %44, i8** %__x.addr.i.i.i.i.i, align 4
+  %45 = load i8*, i8** %__x.addr.i.i.i.i.i, align 4
+  br label %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+
+_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit: ; preds = %cond.false.i.i, %cond.true.i.i
+  %cond.i.i = phi i8* [ %41, %cond.true.i.i ], [ %45, %cond.false.i.i ]
+  store i8* %cond.i.i, i8** %__p.addr.i.i, align 4
+  %46 = load i8*, i8** %__p.addr.i.i, align 4
+  %47 = load i32, i32* %__ns, align 4
+  store %"class.std::__1::basic_streambuf"* %34, %"class.std::__1::basic_streambuf"** %this.addr.i66, align 4
+  store i8* %46, i8** %__s.addr.i67, align 4
+  store i32 %47, i32* %__n.addr.i68, align 4
+  %this1.i69 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i66
+  %48 = bitcast %"class.std::__1::basic_streambuf"* %this1.i69 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+  %vtable.i70 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %48
+  %vfn.i71 = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i70, i64 12
+  %49 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i71
+  %50 = load i8*, i8** %__s.addr.i67, align 4
+  %51 = load i32, i32* %__n.addr.i68, align 4
+  %call.i7273 = invoke i32 %49(%"class.std::__1::basic_streambuf"* %this1.i69, i8* %50, i32 %51)
+          to label %_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit unwind label %lpad
+
+_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit: ; preds = %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+  br label %invoke.cont
+
+invoke.cont:                                      ; preds = %_ZNSt3__115basic_streambufIcNS_11char_traitsIcEEE5sputnEPKci.exit
+  %52 = load i32, i32* %__ns, align 4
+  %cmp21 = icmp ne i32 %call.i7273, %52
+  br i1 %cmp21, label %if.then22, label %if.end24
+
+if.then22:                                        ; preds = %invoke.cont
+  %__sbuf_23 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_23, align 4
+  %53 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+  %54 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %53, i8* %54, i32 4, i32 4, i1 false)
+  store i32 1, i32* %cleanup.dest.slot
+  br label %cleanup
+
+lpad:                                             ; preds = %_ZNKSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE4dataEv.exit
+  %55 = landingpad { i8*, i32 }
+          cleanup
+  %56 = extractvalue { i8*, i32 } %55, 0
+  store i8* %56, i8** %exn.slot
+  %57 = extractvalue { i8*, i32 } %55, 1
+  store i32 %57, i32* %ehselector.slot
+  call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %__sp) #0
+  br label %eh.resume
+
+if.end24:                                         ; preds = %invoke.cont
+  store i32 0, i32* %cleanup.dest.slot
+  br label %cleanup
+
+cleanup:                                          ; preds = %if.end24, %if.then22
+  call void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEED1Ev(%"class.std::__1::basic_string"* %__sp) #0
+  %cleanup.dest = load i32, i32* %cleanup.dest.slot
+  switch i32 %cleanup.dest, label %unreachable [
+    i32 0, label %cleanup.cont
+    i32 1, label %return
+  ]
+
+cleanup.cont:                                     ; preds = %cleanup
+  br label %if.end25
+
+if.end25:                                         ; preds = %cleanup.cont, %if.end15
+  %58 = load i8*, i8** %__oe.addr, align 4
+  %59 = load i8*, i8** %__op.addr, align 4
+  %sub.ptr.lhs.cast26 = ptrtoint i8* %58 to i32
+  %sub.ptr.rhs.cast27 = ptrtoint i8* %59 to i32
+  %sub.ptr.sub28 = sub i32 %sub.ptr.lhs.cast26, %sub.ptr.rhs.cast27
+  store i32 %sub.ptr.sub28, i32* %__np, align 4
+  %60 = load i32, i32* %__np, align 4
+  %cmp29 = icmp sgt i32 %60, 0
+  br i1 %cmp29, label %if.then30, label %if.end37
+
+if.then30:                                        ; preds = %if.end25
+  %__sbuf_31 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  %61 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %__sbuf_31, align 4
+  %62 = load i8*, i8** %__op.addr, align 4
+  %63 = load i32, i32* %__np, align 4
+  store %"class.std::__1::basic_streambuf"* %61, %"class.std::__1::basic_streambuf"** %this.addr.i44, align 4
+  store i8* %62, i8** %__s.addr.i, align 4
+  store i32 %63, i32* %__n.addr.i, align 4
+  %this1.i45 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %this.addr.i44
+  %64 = bitcast %"class.std::__1::basic_streambuf"* %this1.i45 to i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)***
+  %vtable.i = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)**, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*** %64
+  %vfn.i = getelementptr inbounds i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vtable.i, i64 12
+  %65 = load i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)*, i32 (%"class.std::__1::basic_streambuf"*, i8*, i32)** %vfn.i
+  %66 = load i8*, i8** %__s.addr.i, align 4
+  %67 = load i32, i32* %__n.addr.i, align 4
+  %call.i = call i32 %65(%"class.std::__1::basic_streambuf"* %this1.i45, i8* %66, i32 %67)
+  %68 = load i32, i32* %__np, align 4
+  %cmp33 = icmp ne i32 %call.i, %68
+  br i1 %cmp33, label %if.then34, label %if.end36
+
+if.then34:                                        ; preds = %if.then30
+  %__sbuf_35 = getelementptr inbounds %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %__s, i32 0, i32 0
+  store %"class.std::__1::basic_streambuf"* null, %"class.std::__1::basic_streambuf"** %__sbuf_35, align 4
+  %69 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+  %70 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %70, i32 4, i32 4, i1 false)
+  br label %return
+
+if.end36:                                         ; preds = %if.then30
+  br label %if.end37
+
+if.end37:                                         ; preds = %if.end36, %if.end25
+  %71 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %__iob.addr, align 4
+  store %"class.std::__1::ios_base"* %71, %"class.std::__1::ios_base"** %this.addr.i41, align 4
+  store i32 0, i32* %__wide.addr.i, align 4
+  %this1.i42 = load %"class.std::__1::ios_base"*, %"class.std::__1::ios_base"** %this.addr.i41
+  %__width_.i43 = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i42, i32 0, i32 3
+  %72 = load i32, i32* %__width_.i43, align 4
+  store i32 %72, i32* %__r.i, align 4
+  %73 = load i32, i32* %__wide.addr.i, align 4
+  %__width_2.i = getelementptr inbounds %"class.std::__1::ios_base", %"class.std::__1::ios_base"* %this1.i42, i32 0, i32 3
+  store i32 %73, i32* %__width_2.i, align 4
+  %74 = load i32, i32* %__r.i, align 4
+  %75 = bitcast %"class.std::__1::ostreambuf_iterator"* %retval to i8*
+  %76 = bitcast %"class.std::__1::ostreambuf_iterator"* %__s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %75, i8* %76, i32 4, i32 4, i1 false)
+  br label %return
+
+return:                                           ; preds = %if.end37, %if.then34, %cleanup, %if.then12, %if.then
+  %coerce.dive39 = getelementptr %"class.std::__1::ostreambuf_iterator", %"class.std::__1::ostreambuf_iterator"* %retval, i32 0, i32 0
+  %77 = load %"class.std::__1::basic_streambuf"*, %"class.std::__1::basic_streambuf"** %coerce.dive39
+  %coerce.val.pi = ptrtoint %"class.std::__1::basic_streambuf"* %77 to i32
+  ret i32 %coerce.val.pi
+
+eh.resume:                                        ; preds = %lpad
+  %exn = load i8*, i8** %exn.slot
+  %sel = load i32, i32* %ehselector.slot
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+  %lpad.val40 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+  resume { i8*, i32 } %lpad.val40
+
+unreachable:                                      ; preds = %cleanup
+  unreachable
+}
+
+declare void @_ZNSt3__112basic_stringIcNS_11char_traitsIcEENS_9allocatorIcEEE6__initEjc(%"class.std::__1::basic_string"*, i32, i8 zeroext) #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang 3.1"}
diff --git a/test/CodeGen/Hexagon/early-if-phi-i1.ll b/test/CodeGen/Hexagon/early-if-phi-i1.ll
new file mode 100644
index 000000000000..1649d51269ee
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-phi-i1.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; REQUIRES: asserts
+; Check that the early if-conversion does not predicate block1 (where the
+; join block has a phi node of type i1).
+
+define i1 @foo(i32 %x, i32* %p) {
+entry:
+  %c = icmp sgt i32 %x, 0
+  %c1 = icmp sgt i32 %x, 10
+  br i1 %c, label %block2, label %block1
+block1:
+  store i32 1, i32* %p, align 4
+  br label %block2
+block2:
+  %b = phi i1 [ 0, %entry ], [ %c1, %block1 ]
+  ret i1 %b
+}
diff --git a/test/CodeGen/Hexagon/early-if-spare.ll b/test/CodeGen/Hexagon/early-if-spare.ll
new file mode 100644
index 000000000000..7497b53ba3ca
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if-spare.ll
@@ -0,0 +1,57 @@
+; RUN: llc -O2 -mcpu=hexagonv5 < %s | FileCheck %s
+; Check if the three stores in the loop were predicated.
+; CHECK: if{{.*}}memw
+; CHECK: if{{.*}}memw
+; CHECK: if{{.*}}memw
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @fred(i32 %n, i32* %bp) nounwind {
+entry:
+  %cmp16 = icmp eq i32 %n, 0
+  br i1 %cmp16, label %for.end, label %for.body.lr.ph
+
+for.body.lr.ph:                                   ; preds = %entry
+  %cmp2 = icmp ugt i32 %n, 32
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %for.body.lr.ph
+  %i.017 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %call = tail call i32 @foo(i32* %bp) nounwind
+  %call1 = tail call i32 @bar(i32* %bp) nounwind
+  br i1 %cmp2, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %bp, i32 %i.017
+  store i32 %call, i32* %arrayidx, align 4, !tbaa !0
+  %add = add i32 %i.017, 2
+  %arrayidx3 = getelementptr inbounds i32, i32* %bp, i32 %add
+  store i32 %call1, i32* %arrayidx3, align 4, !tbaa !0
+  br label %for.inc
+
+if.else:                                          ; preds = %for.body
+  %or = or i32 %call1, %call
+  %arrayidx4 = getelementptr inbounds i32, i32* %bp, i32 %i.017
+  store i32 %or, i32* %arrayidx4, align 4, !tbaa !0
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %if.else
+  %inc = add i32 %i.017, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.inc
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
+
+declare i32 @foo(i32*) nounwind
+
+declare i32 @bar(i32*) nounwind
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/early-if.ll b/test/CodeGen/Hexagon/early-if.ll
new file mode 100644
index 000000000000..dcb1dd20b515
--- /dev/null
+++ b/test/CodeGen/Hexagon/early-if.ll
@@ -0,0 +1,75 @@
+; RUN: llc -O2 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; Rely on the comments generated by llc. Check that "if.then" was predicated.
+; CHECK: while.body13
+; CHECK: if{{.*}}memd
+; CHECK: while.end
+
+%struct.1 = type { i32, i32 }
+%struct.2 = type { [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [24 x i32], [3 x i32], [24 x i32], [8 x %struct.1], [5 x i32] }
+
+@A1 = global i64 zeroinitializer
+@A2 = global i64 zeroinitializer
+@B1 = global i32 zeroinitializer
+@B2 = global i32 zeroinitializer
+@C1 = global i8 zeroinitializer
+
+declare i32 @llvm.hexagon.S2.cl0(i32) nounwind readnone
+declare i32 @llvm.hexagon.S2.setbit.r(i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.M2.vmpy2s.s0(i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.M2.vmac2s.s0(i64, i32, i32) nounwind readnone
+declare i64 @llvm.hexagon.A2.vaddws(i64, i64) nounwind readnone
+declare i64 @llvm.hexagon.A2.vsubws(i64, i64) nounwind readnone
+declare i32 @llvm.hexagon.A4.modwrapu(i32, i32) nounwind readnone
+
+define void @foo(i32 %n, i64* %ptr) nounwind {
+entry:
+  br label %while.body
+
+while.body:
+  %count = phi i32 [ 0, %entry ], [ %next, %while.end ]
+  %idx = phi i32 [ 0, %entry ], [ %15, %while.end ]
+  %0 = load i32, i32* @B1, align 4
+  %1 = load i32, i32* @B2, align 8
+  %2 = and i32 %1, %0
+  br label %while.body13
+
+while.body13:                                     ; preds = %while.body, %if.end
+  %3 = phi i64 [ %13, %if.end ], [ 0, %while.body ]
+  %4 = phi i64 [ %14, %if.end ], [ 0, %while.body ]
+  %m = phi i32 [ %6, %if.end ], [ %2, %while.body ]
+  %5 = tail call i32 @llvm.hexagon.S2.cl0(i32 %m)
+  %6 = tail call i32 @llvm.hexagon.S2.setbit.r(i32 %m, i32 %5)
+  %cgep85 = getelementptr [10 x %struct.2], [10 x %struct.2]* inttoptr (i32 -121502345 to [10 x %struct.2]*), i32 0, i32 %idx
+  %cgep90 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 12, i32 %5
+  %7 = load i32, i32* %cgep90, align 4
+  %8 = tail call i64 @llvm.hexagon.M2.vmpy2s.s0(i32 %7, i32 %7)
+  %cgep91 = getelementptr %struct.2, %struct.2* %cgep85, i32 0, i32 13, i32 %5
+  %9 = load i32, i32* %cgep91, align 4
+  %10 = tail call i64 @llvm.hexagon.M2.vmac2s.s0(i64 %8, i32 %9, i32 %9)
+  %11 = load i8, i8* @C1, align 1
+  %and24 = and i8 %11, 1
+  %cmp = icmp eq i8 %and24, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %while.body13
+  %12 = tail call i64 @llvm.hexagon.A2.vaddws(i64 %3, i64 %10)
+  store i64 %12, i64* %ptr, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %while.body13
+  %13 = phi i64 [ %12, %if.then ], [ %3, %while.body13 ]
+  %14 = tail call i64 @llvm.hexagon.A2.vsubws(i64 %4, i64 %10)
+  %tobool12 = icmp eq i32 %6, 0
+  br i1 %tobool12, label %while.end, label %while.body13
+
+while.end:
+  %add40 = add i32 %idx, 1
+  %15 = tail call i32 @llvm.hexagon.A4.modwrapu(i32 %add40, i32 10) nounwind
+  %next = add i32 %count, 1
+  %cc = icmp eq i32 %next, %n
+  br i1 %cc, label %end, label %while.body
+
+end:
+  store i64 %10, i64* @A2, align 8
+  ret void
+}
diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll
index 519177fc75fc..773b10b2b288 100644
--- a/test/CodeGen/Hexagon/extload-combine.ll
+++ b/test/CodeGen/Hexagon/extload-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 -disable-hsdr  < %s | FileCheck %s
 ; Check that the combine/stxw instructions are being generated.
 ; In case of combine one of the operand should be 0 and another should be
 ; the output of absolute addressing load instruction.
diff --git a/test/CodeGen/Hexagon/hwloop-dbg.ll b/test/CodeGen/Hexagon/hwloop-dbg.ll
index 66c6662f735a..341f8db9e336 100644
--- a/test/CodeGen/Hexagon/hwloop-dbg.ll
+++ b/test/CodeGen/Hexagon/hwloop-dbg.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
 target triple = "hexagon"
 
-define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind {
+define void @foo(i32* nocapture %a, i32* nocapture %b) nounwind !dbg !5 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !17
   tail call void @llvm.dbg.value(metadata i32* %b, i64 0, metadata !14, metadata !DIExpression()), !dbg !18
@@ -37,19 +37,19 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: 1, file: !28, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "QuIC LLVM Hexagon Clang version 6.1-pre-unknown, (git://git-hexagon-aus.quicinc.com/llvm/clang-mainline.git e9382867661454cdf44addb39430741578e9765c) (llvm/llvm-mainline.git 36412bb1fcf03ed426d4437b41198bae066675ac)", isOptimized: true, emissionKind: 1, file: !28, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !28, scope: null, type: !7, function: void (i32*, i32*)* @foo, variables: !11)
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !28, scope: null, type: !7, variables: !11)
 !6 = !DIFile(filename: "hwloop-dbg.c", directory: "/usr2/kparzysz/s.hex/t")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9, !9}
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !10)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !11 = !{!13, !14, !15}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !16, file: !6, type: !10)
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !5, file: !6, type: !9)
+!15 = !DILocalVariable(name: "i", line: 2, scope: !16, file: !6, type: !10)
 !16 = distinct !DILexicalBlock(line: 1, column: 26, file: !28, scope: !5)
 !17 = !DILocation(line: 1, column: 15, scope: !5)
 !18 = !DILocation(line: 1, column: 23, scope: !5)
diff --git a/test/CodeGen/Hexagon/i16_VarArg.ll b/test/CodeGen/Hexagon/i16_VarArg.ll
index ba98f6226683..74d066e4936e 100644
--- a/test/CodeGen/Hexagon/i16_VarArg.ll
+++ b/test/CodeGen/Hexagon/i16_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
 
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/i1_VarArg.ll b/test/CodeGen/Hexagon/i1_VarArg.ll
index 1908b3c71f3f..4078c0f3f005 100644
--- a/test/CodeGen/Hexagon/i1_VarArg.ll
+++ b/test/CodeGen/Hexagon/i1_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: call __hexagon_{{[_A-Za-z0-9]+}}
 
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/i8_VarArg.ll b/test/CodeGen/Hexagon/i8_VarArg.ll
index c40a6a957270..1353de47a976 100644
--- a/test/CodeGen/Hexagon/i8_VarArg.ll
+++ b/test/CodeGen/Hexagon/i8_VarArg.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
 ; CHECK: call __hexagon_{{[A-Z_a-z0-9]+}}
 
 @a_str = internal constant [8 x i8] c"a = %f\0A\00"
diff --git a/test/CodeGen/Hexagon/ifcvt-edge-weight.ll b/test/CodeGen/Hexagon/ifcvt-edge-weight.ll
new file mode 100644
index 000000000000..341567e1d02f
--- /dev/null
+++ b/test/CodeGen/Hexagon/ifcvt-edge-weight.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -hexagon-eif=0 -print-machineinstrs=if-converter %s -o /dev/null 2>&1 | FileCheck %s
+; Check that the edge weights are updated correctly after if-conversion.
+
+; CHECK: BB#3:
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}10.00%) BB#1({{[0-9a-fx/= ]+}}90.00%)
+@a = external global i32
+@d = external global i32
+
+; In the following CFG, A,B,C,D will be if-converted into a single block.
+; Check if the edge weights on edges to E and F are maintained correctly.
+;
+;    A
+;   / \
+;  B   C
+;   \ /
+;    D
+;   / \
+;  E   F
+;
+define void @test1(i8 zeroext %la, i8 zeroext %lb) {
+entry:
+  %cmp0 = call i1 @pred()
+  br i1 %cmp0, label %if.else2, label %if.then0, !prof !1
+
+if.else2:
+  call void @bar(i32 2)
+  br label %if.end2
+
+if.end2:
+  call void @foo(i32 2)
+  br label %return
+
+if.end:
+  %storemerge = phi i32 [ %and, %if.else ], [ %shl, %if.then ]
+  store i32 %storemerge, i32* @a, align 4
+  %0 = load i32, i32* @d, align 4
+  %cmp2 = call i1 @pred()
+  br i1 %cmp2, label %if.end2, label %if.else2, !prof !2
+
+if.then0:
+  %cmp = icmp eq i8 %la, %lb
+  br i1 %cmp, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %conv1 = zext i8 %la to i32
+  %shl = shl nuw nsw i32 %conv1, 16
+  br label %if.end
+
+if.else:
+  %and8 = and i8 %lb, %la
+  %and = zext i8 %and8 to i32
+  br label %if.end
+
+return:
+  call void @foo(i32 2)
+  ret void
+}
+
+declare void @foo(i32)
+declare void @bar(i32)
+declare i1 @pred()
+
+!1 = !{!"branch_weights", i32 80, i32 20}
+!2 = !{!"branch_weights", i32 10, i32 90}
diff --git a/test/CodeGen/Hexagon/memcpy-likely-aligned.ll b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll
new file mode 100644
index 000000000000..f2677efc3049
--- /dev/null
+++ b/test/CodeGen/Hexagon/memcpy-likely-aligned.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK: __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-a0:0-n32"
+target triple = "hexagon-unknown-linux-gnu"
+
+%struct.e = type { i8, i8, [2 x i8] }
+%struct.s = type { i8* }
+%struct.o = type { %struct.n }
+%struct.n = type { [2 x %struct.l] }
+%struct.l = type { %struct.e, %struct.d, %struct.e }
+%struct.d = type <{ i8, i8, i8, i8, [2 x i8], [2 x i8] }>
+
+@y = global { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> } { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } { %struct.e { i8 3, i8 0, [2 x i8] undef }, { i8, i8, i8, [5 x i8] } { i8 -47, i8 2, i8 0, [5 x i8] undef }, %struct.e { i8 3, i8 0, [2 x i8] undef } } }> }, align 4
+@t = common global %struct.s zeroinitializer, align 4
+@q = internal global %struct.o* null, align 4
+
+define void @foo() nounwind {
+entry:
+  %0 = load i8*, i8** getelementptr inbounds (%struct.s, %struct.s* @t, i32 0, i32 0), align 4
+  %1 = bitcast i8* %0 to %struct.o*
+  store %struct.o* %1, %struct.o** @q, align 4
+  %2 = load %struct.o*, %struct.o** @q, align 4
+  %p = getelementptr inbounds %struct.o, %struct.o* %2, i32 0, i32 0
+  %m = getelementptr inbounds %struct.n, %struct.n* %p, i32 0, i32 0
+  %arraydecay = getelementptr inbounds [2 x %struct.l], [2 x %struct.l]* %m, i32 0, i32 0
+  %3 = bitcast %struct.l* %arraydecay to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %3, i8* getelementptr inbounds ({ <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }, { <{ { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e }, { %struct.e, { i8, i8, i8, [5 x i8] }, %struct.e } }> }* @y, i32 0, i32 0, i32 0, i32 0, i32 0), i32 32, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Hexagon/mux-basic.ll b/test/CodeGen/Hexagon/mux-basic.ll
new file mode 100644
index 000000000000..ef1f7cb60e17
--- /dev/null
+++ b/test/CodeGen/Hexagon/mux-basic.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; We should generate a MUX instruction for one of the selects.
+; CHECK: mux
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%struct.struct_t = type { i32, i32, i32 }
+
+define void @foo(%struct.struct_t* nocapture %p, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %cmp = icmp slt i32 %x, 4660
+  %add = add nsw i32 %x, 1
+  %add.y = select i1 %cmp, i32 %add, i32 %y
+  %x.add.y = select i1 %cmp, i32 %x, i32 %y
+  %. = zext i1 %cmp to i32
+  %b.0 = add nsw i32 %x.add.y, %z
+  %a3 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 0
+  store i32 %add.y, i32* %a3, align 4, !tbaa !0
+  %b4 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 1
+  store i32 %b.0, i32* %b4, align 4, !tbaa !0
+  %c5 = getelementptr inbounds %struct.struct_t, %struct.struct_t* %p, i32 0, i32 2
+  store i32 %., i32* %c5, align 4, !tbaa !0
+  ret void
+}
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/opt-fabs.ll b/test/CodeGen/Hexagon/opt-fabs.ll
index da657e4b1b8f..2ecbce310ade 100644
--- a/test/CodeGen/Hexagon/opt-fabs.ll
+++ b/test/CodeGen/Hexagon/opt-fabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5  < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-elf -mcpu=hexagonv5 -hexagon-bit=0 < %s | FileCheck %s
 ; Optimize fabsf to clrbit in V5.
 
 ; CHECK: r{{[0-9]+}} = clrbit(r{{[0-9]+}}, #31)
diff --git a/test/CodeGen/Hexagon/pic-jumptables.ll b/test/CodeGen/Hexagon/pic-jumptables.ll
new file mode 100644
index 000000000000..271105cb4b5b
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-jumptables.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=hexagon -relocation-model=pic < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add({{pc|PC}}{{ *}},{{ *}}##
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}memw(r{{[0-9]+}}{{ *}}+{{ *}}r{{[0-9]+}}{{ *}}<<{{ *}}#2)
+; CHECK: r{{[0-9]+}}{{ *}}={{ *}}add(r{{[0-9]+}}{{ *}},{{ *}}r{{[0-9]+}})
+
+
+define i32 @test(i32 %y) nounwind {
+entry:
+  switch i32 %y, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+    i32 4, label %sw.bb3
+    i32 5, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry
+  tail call void bitcast (void (...)* @baz1 to void ()*)() nounwind
+  br label %sw.epilog
+
+sw.bb1:                                           ; preds = %entry
+  tail call void @baz2(i32 2, i32 78) nounwind
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry
+  tail call void @baz3(i32 59) nounwind
+  br label %sw.epilog
+
+sw.bb3:                                           ; preds = %entry
+  tail call void @baz4(i32 4, i32 14) nounwind
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb, %entry
+  %y.addr.0 = phi i32 [ %y, %entry ], [ 14, %sw.bb4 ], [ 4, %sw.bb3 ], [ 3, %sw.bb2 ], [ 2, %sw.bb1 ], [ 1, %sw.bb ]
+  ret i32 %y.addr.0
+}
+
+declare void @baz1(...)
+
+declare void @baz2(i32, i32)
+
+declare void @baz3(i32)
+
+declare void @baz4(i32, i32)
diff --git a/test/CodeGen/Hexagon/pic-simple.ll b/test/CodeGen/Hexagon/pic-simple.ll
new file mode 100644
index 000000000000..fa223d5372e1
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-simple.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -relocation-model=pic < %s | FileCheck %s
+
+; CHECK: r{{[0-9]+}} = add({{pc|PC}}, ##_GLOBAL_OFFSET_TABLE_@PCREL)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##src@GOT)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##dst@GOT)
+
+@dst = external global i32
+@src = external global i32
+
+define i32 @foo() nounwind {
+entry:
+  %0 = load i32, i32* @src, align 4, !tbaa !0
+  store i32 %0, i32* @dst, align 4, !tbaa !0
+  %call = tail call i32 @baz(i32 %0) nounwind
+  ret i32 0
+}
+
+declare i32 @baz(i32)
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/pic-static.ll b/test/CodeGen/Hexagon/pic-static.ll
new file mode 100644
index 000000000000..f4ccc6b9ee73
--- /dev/null
+++ b/test/CodeGen/Hexagon/pic-static.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -relocation-model=pic < %s | FileCheck %s
+
+; CHECK-DAG: r{{[0-9]+}} = add({{pc|PC}}, ##_GLOBAL_OFFSET_TABLE_@PCREL)
+; CHECK-DAG: r{{[0-9]+}} = add({{pc|PC}}, ##x@PCREL)
+; CHECK: r{{[0-9]+}} = memw(r{{[0-9]+}}{{.*}}+{{.*}}##bar@GOT)
+
+@x = internal global i32 9, align 4
+@bar = external global i32*
+
+define i32 @foo(i32 %y) nounwind {
+entry:
+  store i32* @x, i32** @bar, align 4, !tbaa !0
+  %0 = load i32, i32* @x, align 4, !tbaa !3
+  %add = add nsw i32 %0, %y
+  ret i32 %add
+}
+
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
+!3 = !{!"int", !1}
diff --git a/test/CodeGen/Hexagon/relax.ll b/test/CodeGen/Hexagon/relax.ll
index 9823d4d1cd9c..9af45f3353ce 100644
--- a/test/CodeGen/Hexagon/relax.ll
+++ b/test/CodeGen/Hexagon/relax.ll
@@ -7,8 +7,7 @@ call void @bar()
 ret void
 }
 
-
-; CHECK: { allocframe(#0) }
-; CHECK: { call 0 }
-; CHECK: 00000004:  R_HEX_B22_PCREL
-; CHECK: { dealloc_return }
\ No newline at end of file
+; CHECK: { call 0
+; CHECK:   allocframe(#0)
+; CHECK: 00000000:  R_HEX_B22_PCREL
+; CHECK: { dealloc_return }
diff --git a/test/CodeGen/Hexagon/sdr-basic.ll b/test/CodeGen/Hexagon/sdr-basic.ll
new file mode 100644
index 000000000000..162e5452572d
--- /dev/null
+++ b/test/CodeGen/Hexagon/sdr-basic.ll
@@ -0,0 +1,15 @@
+; RUN: llc -O2 -mcpu=hexagonv5 < %s | FileCheck %s
+; There should be no register pair used.
+; CHECK-NOT: r{{.*}}:{{[0-9]}} = and
+; CHECK-NOT: r{{.*}}:{{[0-9]}} = xor
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define i32 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+  %and = and i64 %y, -361700868401135616
+  %xor = xor i64 %and, %z
+  %shr1 = lshr i64 %xor, 32
+  %conv = trunc i64 %shr1 to i32
+  ret i32 %conv
+}
diff --git a/test/CodeGen/Hexagon/sdr-shr32.ll b/test/CodeGen/Hexagon/sdr-shr32.ll
new file mode 100644
index 000000000000..67dc1c14d720
--- /dev/null
+++ b/test/CodeGen/Hexagon/sdr-shr32.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: lsr{{.*}}#31
+
+target datalayout = "e-m:e-p:32:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+; Function Attrs: nounwind readnone
+define i64 @foo(i64 %x) #0 {
+entry:
+  %0 = tail call i64 @llvm.hexagon.S2.asr.i.p(i64 %x, i32 32)
+  ret i64 %0
+}
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.hexagon.S2.asr.i.p(i64, i32) #1
+
+attributes #0 = { nounwind readnone "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.7.0)"}
diff --git a/test/CodeGen/Hexagon/simple_addend.ll b/test/CodeGen/Hexagon/simple_addend.ll
index ec3a87f1dcc0..939d44b2a241 100644
--- a/test/CodeGen/Hexagon/simple_addend.ll
+++ b/test/CodeGen/Hexagon/simple_addend.ll
@@ -7,4 +7,4 @@ define void @foo(i32 %a) {
   call void @bar(i32 %b)
   ret void
 }
-; CHECK:     0x8 R_HEX_B22_PCREL bar 0x4
+; CHECK:     0x4 R_HEX_B22_PCREL bar 0x4
diff --git a/test/CodeGen/Hexagon/store-widen-aliased-load.ll b/test/CodeGen/Hexagon/store-widen-aliased-load.ll
new file mode 100644
index 000000000000..a8380306565e
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-aliased-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: memh
+; Check that store widening does not merge the two stores.
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%struct.type_t = type { i8, i8, [2 x i8] }
+
+define zeroext i8 @foo(%struct.type_t* nocapture %p) nounwind {
+entry:
+  %a = getelementptr inbounds %struct.type_t, %struct.type_t* %p, i32 0, i32 0
+  store i8 0, i8* %a, align 2, !tbaa !0
+  %b = getelementptr inbounds %struct.type_t, %struct.type_t* %p, i32 0, i32 1
+  %0 = load i8, i8* %b, align 1, !tbaa !0
+  store i8 0, i8* %b, align 1, !tbaa !0
+  ret i8 %0
+}
+
+!0 = !{!"omnipotent char", !1}
+!1 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/store-widen-negv.ll b/test/CodeGen/Hexagon/store-widen-negv.ll
new file mode 100644
index 000000000000..50a633d82be4
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-negv.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; We shouldn't see a 32-bit expansion of -120, just the uint8 value.
+; CHECK: #136
+define i32 @foo([4 x i8]* %ptr) {
+entry:
+  %msb = getelementptr inbounds [4 x i8], [4 x i8]* %ptr, i32 0, i32 3
+  %lsb = getelementptr inbounds [4 x i8], [4 x i8]* %ptr, i32 0, i32 2
+  store i8 0, i8* %msb
+  store i8 -120, i8* %lsb, align 2
+  ret i32 0
+}
diff --git a/test/CodeGen/Hexagon/store-widen-negv2.ll b/test/CodeGen/Hexagon/store-widen-negv2.ll
new file mode 100644
index 000000000000..6abe01a6ed9f
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen-negv2.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-LABEL: foo:
+; CHECK: memh(r0+#0){{.*}}={{.*}}#-2
+; Don't use memh(r0+#0)=##65534.
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon"
+
+; Function Attrs: nounwind
+define void @foo(i16* nocapture %s) #0 {
+entry:
+  %0 = bitcast i16* %s to i8*
+  store i8 -2, i8* %0, align 2
+  %add.ptr = getelementptr inbounds i8, i8* %0, i32 1
+  store i8 -1, i8* %add.ptr, align 1
+  ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/Hexagon/store-widen.ll b/test/CodeGen/Hexagon/store-widen.ll
new file mode 100644
index 000000000000..9428093901c5
--- /dev/null
+++ b/test/CodeGen/Hexagon/store-widen.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+define void @foo(i16* nocapture %a) nounwind {
+entry:
+; There should be a memw, not memh.
+; CHECK: memw
+  ; Cheated on the alignment, just to trigger the widening.
+  store i16 0, i16* %a, align 4, !tbaa !0
+  %arrayidx1 = getelementptr inbounds i16, i16* %a, i32 1
+  store i16 0, i16* %arrayidx1, align 2, !tbaa !0
+  ret void
+}
+
+!0 = !{!"short", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/struct_args.ll b/test/CodeGen/Hexagon/struct_args.ll
index 95b76c7999d4..2ac1f8eadbb7 100644
--- a/test/CodeGen/Hexagon/struct_args.ll
+++ b/test/CodeGen/Hexagon/struct_args.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hsdr < %s | FileCheck %s
 ; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
 ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
 
diff --git a/test/CodeGen/Hexagon/sube.ll b/test/CodeGen/Hexagon/sube.ll
index 9735894c419e..fab3dcaefa86 100644
--- a/test/CodeGen/Hexagon/sube.ll
+++ b/test/CodeGen/Hexagon/sube.ll
@@ -1,12 +1,12 @@
-; RUN: llc -march=hexagon -hexagon-expand-condsets=0 < %s | FileCheck %s
+; RUN: llc -march=hexagon -disable-hsdr -hexagon-expand-condsets=0 -hexagon-bit=0 < %s | FileCheck %s
 
-; CHECK: r{{[0-9]+:[0-9]+}} = #1
-; CHECK: r{{[0-9]+:[0-9]+}} = #0
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #1)
+; CHECK: r{{[0-9]+:[0-9]+}} = combine(#0, #0)
 ; CHECK: p{{[0-9]+}} = cmp.gtu(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
 ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
+; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
 ; CHECK: r{{[0-9]+:[0-9]+}} = sub(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
-; CHECK: r{{[0-9]+}} = mux(p{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}})
 ; CHECK: r{{[0-9]+:[0-9]+}} = combine(r{{[0-9]+}}, r{{[0-9]+}})
 
 define void @check_sube_subc(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
diff --git a/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll b/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll
new file mode 100644
index 000000000000..82dae2cc586a
--- /dev/null
+++ b/test/CodeGen/Hexagon/tail-dup-subreg-abort.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=hexagon -O2 -disable-cgp < %s
+; REQUIRES: asserts
+;
+; Tail duplication can ignore subregister information on PHI nodes, and as
+; a result, generate COPY instructions between registers of different classes.
+; This could lead to HexagonInstrInfo::copyPhysReg aborting on an unhandled
+; src/dst combination.
+;
+define i32 @foo(i32 %x, i64 %y) nounwind {
+entry:
+  %a = icmp slt i32 %x, 0
+  %lo = trunc i64 %y to i32
+  br i1 %a, label %next, label %tail
+tail:
+  br label %join
+next:
+  %c = icmp eq i32 %x, 0
+  br i1 %c, label %b1, label %tail
+b1:
+  %t1 = lshr i64 %y, 32
+  %hi = trunc i64 %t1 to i32
+  br label %join
+join:
+  %val = phi i32 [ %hi, %b1 ], [ %lo, %tail ]
+  ret i32 %val
+}
+
+
diff --git a/test/CodeGen/Hexagon/tfr-to-combine.ll b/test/CodeGen/Hexagon/tfr-to-combine.ll
index a257acfeb49b..1b82f3e4562e 100644
--- a/test/CodeGen/Hexagon/tfr-to-combine.ll
+++ b/test/CodeGen/Hexagon/tfr-to-combine.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5  -O3 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5  -O3 -disable-hsdr < %s | FileCheck %s
 
 ; Check that we combine TFRs and TFRIs into COMBINEs.
 
diff --git a/test/CodeGen/Hexagon/union-1.ll b/test/CodeGen/Hexagon/union-1.ll
index 1d93797db858..8f2ff28b3814 100644
--- a/test/CodeGen/Hexagon/union-1.ll
+++ b/test/CodeGen/Hexagon/union-1.ll
@@ -2,8 +2,6 @@
 ; CHECK: word
 ; CHECK-NOT: combine(#0
 ; CHECK: jump bar
-; XFAIL: *
-; Disable this test temporarily.
 
 define void @word(i32* nocapture %a) nounwind {
 entry:
diff --git a/test/CodeGen/Hexagon/v60Intrins.ll b/test/CodeGen/Hexagon/v60Intrins.ll
new file mode 100644
index 000000000000..5f4f294c405b
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60Intrins.ll
@@ -0,0 +1,2559 @@
+; RUN: llc -march=hexagon -mcpu=hexagonv60 -O2  < %s | FileCheck %s
+
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vsetq(r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.eq(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} &= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} |= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.uw,v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} ^= vcmp.gt(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = xor{{[0-9]*}}(q{{[0-3]}},q{{[0-3]}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = v
+; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},#0)
+; CHECK: v{{[0-9]*}} = valign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vand(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} |= vand(q{{[0-3]}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vdelta(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},#0)
+; CHECK: v{{[0-9]*}} = vlalign(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vmux(q{{[0-3]}},v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vnot(v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vor{{[0-9]*}}(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vr{{[0-9]*}}delta(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vr{{[0-9]*}}or{{[0-9]*}}(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}} = vxor{{[0-9]*}}(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b = vadd(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vdeal(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vdeale(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vlut32(v{{[0-9]*}}.b,v{{[0-9]*}}.b,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b |= vlut32(v{{[0-9]*}}.b,v{{[0-9]*}}.b,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.b = vnav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.b = vpack(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vpacke(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.b = vpacko(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.b = vr{{[0-9]*}}ound(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.b = vshuff(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vshuffe(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vshuffo(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.b = vsub(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vabs(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vabs(v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vasl(v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasl(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.h = vdeal(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vdmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h += vdmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vlsr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmax(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmin(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vmpyi(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h = vmpyi(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h += vmpyi(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.h += vmpyi(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vnav{{[0-9]*}}g(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vnor{{[0-9]*}}mamt(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vpack(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vpacke(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vpacko(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vpopcount(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vr{{[0-9]*}}ound(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.h = vsat(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.h = vshuff(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vshuffe(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vshuffo(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.h = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vabsdiff(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vadd(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vasr{{[0-9]*}}(v{{[0-9]*}}.h,v{{[0-9]*}}.h,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vav{{[0-9]*}}g(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.ub = vmax(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vmin(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.ub = vpack(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vr{{[0-9]*}}ound(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.ub = vsat(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.ub = vsub(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vabsdiff(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.uh = vabsdiff(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vadd(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w,r{{[0-9]*}}):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vav{{[0-9]*}}g(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vav{{[0-9]*}}g(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.uh = vcl0(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vlsr{{[0-9]*}}(v{{[0-9]*}}.uh,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.uh = vmax(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vmin(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.uh = vpack(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vr{{[0-9]*}}ound(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uh = vsub(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.uw = vabsdiff(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.uw = vcl0(v{{[0-9]*}}.uw)
+; CHECK: v{{[0-9]*}}.uw = vlsr{{[0-9]*}}(v{{[0-9]*}}.uw,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}.w = vabs(v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vabs(v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vadd(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vadd(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vasl(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasl(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w += vasl(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasr{{[0-9]*}}(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vasr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w += vasr{{[0-9]*}}(v{{[0-9]*}}.w,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}r{{[0-9]*}}nd
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.uh,#1):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.uh,#1):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vinser{{[0-9]*}}t(r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}.w = vlsr{{[0-9]*}}(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmax(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmin(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vmpye(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w = vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w += vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vmpyi(v{{[0-9]*}}.w,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w += vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w += vmpyie(v{{[0-9]*}}.w,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}.w = vmpyieo(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyio(v{{[0-9]*}}.w,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}.w = vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w = vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}.w += vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}r{{[0-9]*}}nd:{{[0-9]*}}sat:{{[0-9]*}}shift
+; CHECK: v{{[0-9]*}}.w += vmpyo(v{{[0-9]*}}.w,v{{[0-9]*}}.h):{{[0-9]*}}<<1:{{[0-9]*}}sat:{{[0-9]*}}shift
+; CHECK: v{{[0-9]*}}.w = vnav{{[0-9]*}}g(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vnor{{[0-9]*}}mamt(v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}.w = vsub(v{{[0-9]*}}.w,v{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}.w = vsub(v{{[0-9]*}}.w,v{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vcombine(v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vdeal(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vshuff(v{{[0-9]*}},v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: q{{[0-3]}} = vand(v{{[0-9]*}},r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}} = vswap(q{{[0-3]}},v{{[0-9]*}},v{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vadd(v{{[0-9]*}}:{{[0-9]*}}.b,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vshuffoe(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.b = vsub(v{{[0-9]*}}:{{[0-9]*}}.b,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vadd(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vlut16(v{{[0-9]*}}.b,v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h |= vlut16(v{{[0-9]*}}.b,v{{[0-9]*}}.h,r{{[0-9]*}})
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpa(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.b,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vshuffoe(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsub(v{{[0-9]*}}:{{[0-9]*}}.h,v{{[0-9]*}}:{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vsxt(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.b,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.b,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h = vunpack(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.h |= vunpacko(v{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.ub = vadd(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.ub = vsub(v{{[0-9]*}}:{{[0-9]*}}.ub,v{{[0-9]*}}:{{[0-9]*}}.ub):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vadd(v{{[0-9]*}}:{{[0-9]*}}.uh,v{{[0-9]*}}:{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh += vmpy(v{{[0-9]*}}.ub,r{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh += vmpy(v{{[0-9]*}}.ub,v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vsub(v{{[0-9]*}}:{{[0-9]*}}.uh,v{{[0-9]*}}:{{[0-9]*}}.uh):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vunpack(v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uh = vzxt(v{{[0-9]*}}.ub)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vdsad(v{{[0-9]*}}:{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vdsad(v{{[0-9]*}}:{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vmpy(v{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vmpy(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vmpy(v{{[0-9]*}}.uh,r{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vmpy(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vr{{[0-9]*}}sad(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw += vr{{[0-9]*}}sad(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.ub,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vunpack(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.uw = vzxt(v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vadd(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vdmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpa(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpa(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,r{{[0-9]*}}.h):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vmpy(v{{[0-9]*}}.h,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vr{{[0-9]*}}mpy(v{{[0-9]*}}:{{[0-9]*}}.ub,r{{[0-9]*}}.b,#0)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}.h,v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}.uh,v{{[0-9]*}}.uh)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsub(v{{[0-9]*}}:{{[0-9]*}}.w,v{{[0-9]*}}:{{[0-9]*}}.w):{{[0-9]*}}sat
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vsxt(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vtmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w += vtmpy(v{{[0-9]*}}:{{[0-9]*}}.h,r{{[0-9]*}}.b)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w = vunpack(v{{[0-9]*}}.h)
+; CHECK: v{{[0-9]*}}:{{[0-9]*}}.w |= vunpacko(v{{[0-9]*}}.h)
+target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon"
+
+@K = global i64 0, align 8
+@src = global i8 -1, align 1
+@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorResult = common global <16 x i32> zeroinitializer, align 64
+@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+@dst_addresses = common global [15 x i8] zeroinitializer, align 8
+@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+@src_addresses = common global [15 x i8*] zeroinitializer, align 8
+@dst = common global i8 0, align 1
+@ptr = common global [32768 x i8] zeroinitializer, align 8
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %1 = bitcast <16 x i32> %0 to <512 x i1>
+  %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %3 = bitcast <16 x i32> %2 to <512 x i1>
+  %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
+  %5 = bitcast <512 x i1> %4 to <16 x i32>
+  store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
+  %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %7 = bitcast <16 x i32> %6 to <512 x i1>
+  %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %9 = bitcast <16 x i32> %8 to <512 x i1>
+  %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
+  %11 = bitcast <512 x i1> %10 to <16 x i32>
+  store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
+  %12 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %13 = bitcast <16 x i32> %12 to <512 x i1>
+  %14 = call <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1> %13)
+  %15 = bitcast <512 x i1> %14 to <16 x i32>
+  store volatile <16 x i32> %15, <16 x i32>* @Q6VecPredResult, align 64
+  %16 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %17 = bitcast <16 x i32> %16 to <512 x i1>
+  %18 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %19 = bitcast <16 x i32> %18 to <512 x i1>
+  %20 = call <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1> %17, <512 x i1> %19)
+  %21 = bitcast <512 x i1> %20 to <16 x i32>
+  store volatile <16 x i32> %21, <16 x i32>* @Q6VecPredResult, align 64
+  %22 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %23 = bitcast <16 x i32> %22 to <512 x i1>
+  %24 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %25 = bitcast <16 x i32> %24 to <512 x i1>
+  %26 = call <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1> %23, <512 x i1> %25)
+  %27 = bitcast <512 x i1> %26 to <16 x i32>
+  store volatile <16 x i32> %27, <16 x i32>* @Q6VecPredResult, align 64
+  %28 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %29 = call <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32> %28, i32 -1)
+  %30 = bitcast <512 x i1> %29 to <16 x i32>
+  store volatile <16 x i32> %30, <16 x i32>* @Q6VecPredResult, align 64
+  %31 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %32 = bitcast <16 x i32> %31 to <512 x i1>
+  %33 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %34 = call <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1> %32, <16 x i32> %33, i32 -1)
+  %35 = bitcast <512 x i1> %34 to <16 x i32>
+  store volatile <16 x i32> %35, <16 x i32>* @Q6VecPredResult, align 64
+  %36 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %37 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %38 = call <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32> %36, <16 x i32> %37)
+  %39 = bitcast <512 x i1> %38 to <16 x i32>
+  store volatile <16 x i32> %39, <16 x i32>* @Q6VecPredResult, align 64
+  %40 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %41 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %42 = call <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32> %40, <16 x i32> %41)
+  %43 = bitcast <512 x i1> %42 to <16 x i32>
+  store volatile <16 x i32> %43, <16 x i32>* @Q6VecPredResult, align 64
+  %44 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %45 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %46 = call <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32> %44, <16 x i32> %45)
+  %47 = bitcast <512 x i1> %46 to <16 x i32>
+  store volatile <16 x i32> %47, <16 x i32>* @Q6VecPredResult, align 64
+  %48 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %49 = bitcast <16 x i32> %48 to <512 x i1>
+  %50 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %51 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %52 = call <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1> %49, <16 x i32> %50, <16 x i32> %51)
+  %53 = bitcast <512 x i1> %52 to <16 x i32>
+  store volatile <16 x i32> %53, <16 x i32>* @Q6VecPredResult, align 64
+  %54 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %55 = bitcast <16 x i32> %54 to <512 x i1>
+  %56 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %57 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %58 = call <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1> %55, <16 x i32> %56, <16 x i32> %57)
+  %59 = bitcast <512 x i1> %58 to <16 x i32>
+  store volatile <16 x i32> %59, <16 x i32>* @Q6VecPredResult, align 64
+  %60 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %61 = bitcast <16 x i32> %60 to <512 x i1>
+  %62 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %63 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %64 = call <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1> %61, <16 x i32> %62, <16 x i32> %63)
+  %65 = bitcast <512 x i1> %64 to <16 x i32>
+  store volatile <16 x i32> %65, <16 x i32>* @Q6VecPredResult, align 64
+  %66 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %67 = bitcast <16 x i32> %66 to <512 x i1>
+  %68 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %69 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %70 = call <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1> %67, <16 x i32> %68, <16 x i32> %69)
+  %71 = bitcast <512 x i1> %70 to <16 x i32>
+  store volatile <16 x i32> %71, <16 x i32>* @Q6VecPredResult, align 64
+  %72 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %73 = bitcast <16 x i32> %72 to <512 x i1>
+  %74 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %75 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %76 = call <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1> %73, <16 x i32> %74, <16 x i32> %75)
+  %77 = bitcast <512 x i1> %76 to <16 x i32>
+  store volatile <16 x i32> %77, <16 x i32>* @Q6VecPredResult, align 64
+  %78 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %79 = bitcast <16 x i32> %78 to <512 x i1>
+  %80 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %81 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %82 = call <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1> %79, <16 x i32> %80, <16 x i32> %81)
+  %83 = bitcast <512 x i1> %82 to <16 x i32>
+  store volatile <16 x i32> %83, <16 x i32>* @Q6VecPredResult, align 64
+  %84 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %85 = bitcast <16 x i32> %84 to <512 x i1>
+  %86 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %87 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %88 = call <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1> %85, <16 x i32> %86, <16 x i32> %87)
+  %89 = bitcast <512 x i1> %88 to <16 x i32>
+  store volatile <16 x i32> %89, <16 x i32>* @Q6VecPredResult, align 64
+  %90 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %91 = bitcast <16 x i32> %90 to <512 x i1>
+  %92 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %93 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %94 = call <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1> %91, <16 x i32> %92, <16 x i32> %93)
+  %95 = bitcast <512 x i1> %94 to <16 x i32>
+  store volatile <16 x i32> %95, <16 x i32>* @Q6VecPredResult, align 64
+  %96 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %97 = bitcast <16 x i32> %96 to <512 x i1>
+  %98 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %99 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %100 = call <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1> %97, <16 x i32> %98, <16 x i32> %99)
+  %101 = bitcast <512 x i1> %100 to <16 x i32>
+  store volatile <16 x i32> %101, <16 x i32>* @Q6VecPredResult, align 64
+  %102 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %103 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %104 = call <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32> %102, <16 x i32> %103)
+  %105 = bitcast <512 x i1> %104 to <16 x i32>
+  store volatile <16 x i32> %105, <16 x i32>* @Q6VecPredResult, align 64
+  %106 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %107 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %108 = call <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32> %106, <16 x i32> %107)
+  %109 = bitcast <512 x i1> %108 to <16 x i32>
+  store volatile <16 x i32> %109, <16 x i32>* @Q6VecPredResult, align 64
+  %110 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %111 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %112 = call <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32> %110, <16 x i32> %111)
+  %113 = bitcast <512 x i1> %112 to <16 x i32>
+  store volatile <16 x i32> %113, <16 x i32>* @Q6VecPredResult, align 64
+  %114 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %115 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %116 = call <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32> %114, <16 x i32> %115)
+  %117 = bitcast <512 x i1> %116 to <16 x i32>
+  store volatile <16 x i32> %117, <16 x i32>* @Q6VecPredResult, align 64
+  %118 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %119 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %120 = call <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32> %118, <16 x i32> %119)
+  %121 = bitcast <512 x i1> %120 to <16 x i32>
+  store volatile <16 x i32> %121, <16 x i32>* @Q6VecPredResult, align 64
+  %122 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %123 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %124 = call <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32> %122, <16 x i32> %123)
+  %125 = bitcast <512 x i1> %124 to <16 x i32>
+  store volatile <16 x i32> %125, <16 x i32>* @Q6VecPredResult, align 64
+  %126 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %127 = bitcast <16 x i32> %126 to <512 x i1>
+  %128 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %129 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %130 = call <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1> %127, <16 x i32> %128, <16 x i32> %129)
+  %131 = bitcast <512 x i1> %130 to <16 x i32>
+  store volatile <16 x i32> %131, <16 x i32>* @Q6VecPredResult, align 64
+  %132 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %133 = bitcast <16 x i32> %132 to <512 x i1>
+  %134 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %135 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %136 = call <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1> %133, <16 x i32> %134, <16 x i32> %135)
+  %137 = bitcast <512 x i1> %136 to <16 x i32>
+  store volatile <16 x i32> %137, <16 x i32>* @Q6VecPredResult, align 64
+  %138 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %139 = bitcast <16 x i32> %138 to <512 x i1>
+  %140 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %141 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %142 = call <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1> %139, <16 x i32> %140, <16 x i32> %141)
+  %143 = bitcast <512 x i1> %142 to <16 x i32>
+  store volatile <16 x i32> %143, <16 x i32>* @Q6VecPredResult, align 64
+  %144 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %145 = bitcast <16 x i32> %144 to <512 x i1>
+  %146 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %147 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %148 = call <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1> %145, <16 x i32> %146, <16 x i32> %147)
+  %149 = bitcast <512 x i1> %148 to <16 x i32>
+  store volatile <16 x i32> %149, <16 x i32>* @Q6VecPredResult, align 64
+  %150 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %151 = bitcast <16 x i32> %150 to <512 x i1>
+  %152 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %153 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %154 = call <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1> %151, <16 x i32> %152, <16 x i32> %153)
+  %155 = bitcast <512 x i1> %154 to <16 x i32>
+  store volatile <16 x i32> %155, <16 x i32>* @Q6VecPredResult, align 64
+  %156 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %157 = bitcast <16 x i32> %156 to <512 x i1>
+  %158 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %159 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %160 = call <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1> %157, <16 x i32> %158, <16 x i32> %159)
+  %161 = bitcast <512 x i1> %160 to <16 x i32>
+  store volatile <16 x i32> %161, <16 x i32>* @Q6VecPredResult, align 64
+  %162 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %163 = bitcast <16 x i32> %162 to <512 x i1>
+  %164 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %165 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %166 = call <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1> %163, <16 x i32> %164, <16 x i32> %165)
+  %167 = bitcast <512 x i1> %166 to <16 x i32>
+  store volatile <16 x i32> %167, <16 x i32>* @Q6VecPredResult, align 64
+  %168 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %169 = bitcast <16 x i32> %168 to <512 x i1>
+  %170 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %171 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %172 = call <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1> %169, <16 x i32> %170, <16 x i32> %171)
+  %173 = bitcast <512 x i1> %172 to <16 x i32>
+  store volatile <16 x i32> %173, <16 x i32>* @Q6VecPredResult, align 64
+  %174 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %175 = bitcast <16 x i32> %174 to <512 x i1>
+  %176 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %177 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %178 = call <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1> %175, <16 x i32> %176, <16 x i32> %177)
+  %179 = bitcast <512 x i1> %178 to <16 x i32>
+  store volatile <16 x i32> %179, <16 x i32>* @Q6VecPredResult, align 64
+  %180 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %181 = bitcast <16 x i32> %180 to <512 x i1>
+  %182 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %183 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %184 = call <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1> %181, <16 x i32> %182, <16 x i32> %183)
+  %185 = bitcast <512 x i1> %184 to <16 x i32>
+  store volatile <16 x i32> %185, <16 x i32>* @Q6VecPredResult, align 64
+  %186 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %187 = bitcast <16 x i32> %186 to <512 x i1>
+  %188 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %189 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %190 = call <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1> %187, <16 x i32> %188, <16 x i32> %189)
+  %191 = bitcast <512 x i1> %190 to <16 x i32>
+  store volatile <16 x i32> %191, <16 x i32>* @Q6VecPredResult, align 64
+  %192 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %193 = bitcast <16 x i32> %192 to <512 x i1>
+  %194 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %195 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %196 = call <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1> %193, <16 x i32> %194, <16 x i32> %195)
+  %197 = bitcast <512 x i1> %196 to <16 x i32>
+  store volatile <16 x i32> %197, <16 x i32>* @Q6VecPredResult, align 64
+  %198 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %199 = bitcast <16 x i32> %198 to <512 x i1>
+  %200 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %201 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %202 = call <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1> %199, <16 x i32> %200, <16 x i32> %201)
+  %203 = bitcast <512 x i1> %202 to <16 x i32>
+  store volatile <16 x i32> %203, <16 x i32>* @Q6VecPredResult, align 64
+  %204 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %205 = bitcast <16 x i32> %204 to <512 x i1>
+  %206 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %207 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %208 = call <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1> %205, <16 x i32> %206, <16 x i32> %207)
+  %209 = bitcast <512 x i1> %208 to <16 x i32>
+  store volatile <16 x i32> %209, <16 x i32>* @Q6VecPredResult, align 64
+  %210 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %211 = bitcast <16 x i32> %210 to <512 x i1>
+  %212 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %213 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %214 = call <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1> %211, <16 x i32> %212, <16 x i32> %213)
+  %215 = bitcast <512 x i1> %214 to <16 x i32>
+  store volatile <16 x i32> %215, <16 x i32>* @Q6VecPredResult, align 64
+  %216 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %217 = bitcast <16 x i32> %216 to <512 x i1>
+  %218 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %219 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %220 = call <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1> %217, <16 x i32> %218, <16 x i32> %219)
+  %221 = bitcast <512 x i1> %220 to <16 x i32>
+  store volatile <16 x i32> %221, <16 x i32>* @Q6VecPredResult, align 64
+  %222 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %223 = bitcast <16 x i32> %222 to <512 x i1>
+  %224 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %225 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %226 = call <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1> %223, <16 x i32> %224, <16 x i32> %225)
+  %227 = bitcast <512 x i1> %226 to <16 x i32>
+  store volatile <16 x i32> %227, <16 x i32>* @Q6VecPredResult, align 64
+  %228 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %229 = bitcast <16 x i32> %228 to <512 x i1>
+  %230 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %231 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %232 = call <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1> %229, <16 x i32> %230, <16 x i32> %231)
+  %233 = bitcast <512 x i1> %232 to <16 x i32>
+  store volatile <16 x i32> %233, <16 x i32>* @Q6VecPredResult, align 64
+  %234 = call <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32 1)
+  %235 = bitcast <512 x i1> %234 to <16 x i32>
+  store volatile <16 x i32> %235, <16 x i32>* @Q6VecPredResult, align 64
+  %236 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %237 = bitcast <16 x i32> %236 to <512 x i1>
+  %238 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %239 = bitcast <16 x i32> %238 to <512 x i1>
+  %240 = call <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1> %237, <512 x i1> %239)
+  %241 = bitcast <512 x i1> %240 to <16 x i32>
+  store volatile <16 x i32> %241, <16 x i32>* @Q6VecPredResult, align 64
+  %242 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %243 = call <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32> %242)
+  store volatile <16 x i32> %243, <16 x i32>* @VectorResult, align 64
+  %244 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %245 = call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %244)
+  store volatile <16 x i32> %245, <16 x i32>* @VectorResult, align 64
+  %246 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %247 = call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %246)
+  store volatile <16 x i32> %247, <16 x i32>* @VectorResult, align 64
+  %248 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %249 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %250 = call <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32> %248, <16 x i32> %249, i32 0)
+  store volatile <16 x i32> %250, <16 x i32>* @VectorResult, align 64
+  %251 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %252 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %253 = call <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32> %251, <16 x i32> %252, i32 -1)
+  store volatile <16 x i32> %253, <16 x i32>* @VectorResult, align 64
+  %254 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %255 = bitcast <16 x i32> %254 to <512 x i1>
+  %256 = call <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1> %255, i32 -1)
+  store volatile <16 x i32> %256, <16 x i32>* @VectorResult, align 64
+  %257 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %258 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %259 = call <16 x i32> @llvm.hexagon.V6.vand(<16 x i32> %257, <16 x i32> %258)
+  store volatile <16 x i32> %259, <16 x i32>* @VectorResult, align 64
+  %260 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %261 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %262 = bitcast <16 x i32> %261 to <512 x i1>
+  %263 = call <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32> %260, <512 x i1> %262, i32 -1)
+  store volatile <16 x i32> %263, <16 x i32>* @VectorResult, align 64
+  %264 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %265 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %266 = call <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32> %264, <16 x i32> %265)
+  store volatile <16 x i32> %266, <16 x i32>* @VectorResult, align 64
+  %267 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %268 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %269 = call <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32> %267, <16 x i32> %268, i32 0)
+  store volatile <16 x i32> %269, <16 x i32>* @VectorResult, align 64
+  %270 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %271 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %272 = call <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32> %270, <16 x i32> %271, i32 -1)
+  store volatile <16 x i32> %272, <16 x i32>* @VectorResult, align 64
+  %273 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %274 = bitcast <16 x i32> %273 to <512 x i1>
+  %275 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %276 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %277 = call <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1> %274, <16 x i32> %275, <16 x i32> %276)
+  store volatile <16 x i32> %277, <16 x i32>* @VectorResult, align 64
+  %278 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %279 = call <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32> %278)
+  store volatile <16 x i32> %279, <16 x i32>* @VectorResult, align 64
+  %280 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %281 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %282 = call <16 x i32> @llvm.hexagon.V6.vor(<16 x i32> %280, <16 x i32> %281)
+  store volatile <16 x i32> %282, <16 x i32>* @VectorResult, align 64
+  %283 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %284 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %285 = call <16 x i32> @llvm.hexagon.V6.vrdelta(<16 x i32> %283, <16 x i32> %284)
+  store volatile <16 x i32> %285, <16 x i32>* @VectorResult, align 64
+  %286 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %287 = call <16 x i32> @llvm.hexagon.V6.vror(<16 x i32> %286, i32 -1)
+  store volatile <16 x i32> %287, <16 x i32>* @VectorResult, align 64
+  %288 = call <16 x i32> @llvm.hexagon.V6.lvsplatw(i32 -1)
+  store volatile <16 x i32> %288, <16 x i32>* @VectorResult, align 64
+  %289 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %290 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %291 = call <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32> %289, <16 x i32> %290)
+  store volatile <16 x i32> %291, <16 x i32>* @VectorResult, align 64
+  %292 = call <16 x i32> @llvm.hexagon.V6.vd0()
+  store volatile <16 x i32> %292, <16 x i32>* @VectorResult, align 64
+  %293 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %294 = bitcast <16 x i32> %293 to <512 x i1>
+  %295 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %296 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %297 = call <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1> %294, <16 x i32> %295, <16 x i32> %296)
+  store volatile <16 x i32> %297, <16 x i32>* @VectorResult, align 64
+  %298 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %299 = bitcast <16 x i32> %298 to <512 x i1>
+  %300 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %301 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %302 = call <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1> %299, <16 x i32> %300, <16 x i32> %301)
+  store volatile <16 x i32> %302, <16 x i32>* @VectorResult, align 64
+  %303 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %304 = bitcast <16 x i32> %303 to <512 x i1>
+  %305 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %306 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %307 = call <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1> %304, <16 x i32> %305, <16 x i32> %306)
+  store volatile <16 x i32> %307, <16 x i32>* @VectorResult, align 64
+  %308 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %309 = bitcast <16 x i32> %308 to <512 x i1>
+  %310 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %311 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %312 = call <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1> %309, <16 x i32> %310, <16 x i32> %311)
+  store volatile <16 x i32> %312, <16 x i32>* @VectorResult, align 64
+  %313 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %314 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %315 = call <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32> %313, <16 x i32> %314)
+  store volatile <16 x i32> %315, <16 x i32>* @VectorResult, align 64
+  %316 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %317 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %318 = call <16 x i32> @llvm.hexagon.V6.vasrhbrndsat(<16 x i32> %316, <16 x i32> %317, i32 -1)
+  store volatile <16 x i32> %318, <16 x i32>* @VectorResult, align 64
+  %319 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %320 = call <16 x i32> @llvm.hexagon.V6.vdealb(<16 x i32> %319)
+  store volatile <16 x i32> %320, <16 x i32>* @VectorResult, align 64
+  %321 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %322 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %323 = call <16 x i32> @llvm.hexagon.V6.vdealb4w(<16 x i32> %321, <16 x i32> %322)
+  store volatile <16 x i32> %323, <16 x i32>* @VectorResult, align 64
+  %324 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %325 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %326 = call <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32> %324, <16 x i32> %325, i32 -1)
+  store volatile <16 x i32> %326, <16 x i32>* @VectorResult, align 64
+  %327 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %328 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %329 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %330 = call <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32> %327, <16 x i32> %328, <16 x i32> %329, i32 -1)
+  store volatile <16 x i32> %330, <16 x i32>* @VectorResult, align 64
+  %331 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %332 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %333 = call <16 x i32> @llvm.hexagon.V6.vnavgub(<16 x i32> %331, <16 x i32> %332)
+  store volatile <16 x i32> %333, <16 x i32>* @VectorResult, align 64
+  %334 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %335 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %336 = call <16 x i32> @llvm.hexagon.V6.vpackhb.sat(<16 x i32> %334, <16 x i32> %335)
+  store volatile <16 x i32> %336, <16 x i32>* @VectorResult, align 64
+  %337 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %338 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %339 = call <16 x i32> @llvm.hexagon.V6.vpackeb(<16 x i32> %337, <16 x i32> %338)
+  store volatile <16 x i32> %339, <16 x i32>* @VectorResult, align 64
+  %340 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %341 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %342 = call <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32> %340, <16 x i32> %341)
+  store volatile <16 x i32> %342, <16 x i32>* @VectorResult, align 64
+  %343 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %344 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %345 = call <16 x i32> @llvm.hexagon.V6.vroundhb(<16 x i32> %343, <16 x i32> %344)
+  store volatile <16 x i32> %345, <16 x i32>* @VectorResult, align 64
+  %346 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %347 = call <16 x i32> @llvm.hexagon.V6.vshuffb(<16 x i32> %346)
+  store volatile <16 x i32> %347, <16 x i32>* @VectorResult, align 64
+  %348 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %349 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %350 = call <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32> %348, <16 x i32> %349)
+  store volatile <16 x i32> %350, <16 x i32>* @VectorResult, align 64
+  %351 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %352 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %353 = call <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32> %351, <16 x i32> %352)
+  store volatile <16 x i32> %353, <16 x i32>* @VectorResult, align 64
+  %354 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %355 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %356 = call <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32> %354, <16 x i32> %355)
+  store volatile <16 x i32> %356, <16 x i32>* @VectorResult, align 64
+  %357 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %358 = bitcast <16 x i32> %357 to <512 x i1>
+  %359 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %360 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %361 = call <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1> %358, <16 x i32> %359, <16 x i32> %360)
+  store volatile <16 x i32> %361, <16 x i32>* @VectorResult, align 64
+  %362 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %363 = bitcast <16 x i32> %362 to <512 x i1>
+  %364 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %365 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %366 = call <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1> %363, <16 x i32> %364, <16 x i32> %365)
+  store volatile <16 x i32> %366, <16 x i32>* @VectorResult, align 64
+  %367 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %368 = bitcast <16 x i32> %367 to <512 x i1>
+  %369 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %370 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %371 = call <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1> %368, <16 x i32> %369, <16 x i32> %370)
+  store volatile <16 x i32> %371, <16 x i32>* @VectorResult, align 64
+  %372 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %373 = bitcast <16 x i32> %372 to <512 x i1>
+  %374 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %375 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %376 = call <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1> %373, <16 x i32> %374, <16 x i32> %375)
+  store volatile <16 x i32> %376, <16 x i32>* @VectorResult, align 64
+  %377 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %378 = call <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32> %377)
+  store volatile <16 x i32> %378, <16 x i32>* @VectorResult, align 64
+  %379 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %380 = call <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32> %379)
+  store volatile <16 x i32> %380, <16 x i32>* @VectorResult, align 64
+  %381 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %382 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %383 = call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %381, <16 x i32> %382)
+  store volatile <16 x i32> %383, <16 x i32>* @VectorResult, align 64
+  %384 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %385 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %386 = call <16 x i32> @llvm.hexagon.V6.vaddhsat(<16 x i32> %384, <16 x i32> %385)
+  store volatile <16 x i32> %386, <16 x i32>* @VectorResult, align 64
+  %387 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %388 = call <16 x i32> @llvm.hexagon.V6.vaslh(<16 x i32> %387, i32 -1)
+  store volatile <16 x i32> %388, <16 x i32>* @VectorResult, align 64
+  %389 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %390 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %391 = call <16 x i32> @llvm.hexagon.V6.vaslhv(<16 x i32> %389, <16 x i32> %390)
+  store volatile <16 x i32> %391, <16 x i32>* @VectorResult, align 64
+  %392 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %393 = call <16 x i32> @llvm.hexagon.V6.vasrh(<16 x i32> %392, i32 -1)
+  store volatile <16 x i32> %393, <16 x i32>* @VectorResult, align 64
+  %394 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %395 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %396 = call <16 x i32> @llvm.hexagon.V6.vasrhv(<16 x i32> %394, <16 x i32> %395)
+  store volatile <16 x i32> %396, <16 x i32>* @VectorResult, align 64
+  %397 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %398 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %399 = call <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32> %397, <16 x i32> %398, i32 -1)
+  store volatile <16 x i32> %399, <16 x i32>* @VectorResult, align 64
+  %400 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %401 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %402 = call <16 x i32> @llvm.hexagon.V6.vasrwhrndsat(<16 x i32> %400, <16 x i32> %401, i32 -1)
+  store volatile <16 x i32> %402, <16 x i32>* @VectorResult, align 64
+  %403 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %404 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %405 = call <16 x i32> @llvm.hexagon.V6.vasrwhsat(<16 x i32> %403, <16 x i32> %404, i32 -1)
+  store volatile <16 x i32> %405, <16 x i32>* @VectorResult, align 64
+  %406 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %407 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %408 = call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %406, <16 x i32> %407)
+  store volatile <16 x i32> %408, <16 x i32>* @VectorResult, align 64
+  %409 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %410 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %411 = call <16 x i32> @llvm.hexagon.V6.vavghrnd(<16 x i32> %409, <16 x i32> %410)
+  store volatile <16 x i32> %411, <16 x i32>* @VectorResult, align 64
+  %412 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %413 = call <16 x i32> @llvm.hexagon.V6.vdealh(<16 x i32> %412)
+  store volatile <16 x i32> %413, <16 x i32>* @VectorResult, align 64
+  %414 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %415 = call <16 x i32> @llvm.hexagon.V6.vdmpybus(<16 x i32> %414, i32 -1)
+  store volatile <16 x i32> %415, <16 x i32>* @VectorResult, align 64
+  %416 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %417 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %418 = call <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32> %416, <16 x i32> %417, i32 -1)
+  store volatile <16 x i32> %418, <16 x i32>* @VectorResult, align 64
+  %419 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %420 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %421 = call <16 x i32> @llvm.hexagon.V6.vlsrhv(<16 x i32> %419, <16 x i32> %420)
+  store volatile <16 x i32> %421, <16 x i32>* @VectorResult, align 64
+  %422 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %423 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %424 = call <16 x i32> @llvm.hexagon.V6.vmaxh(<16 x i32> %422, <16 x i32> %423)
+  store volatile <16 x i32> %424, <16 x i32>* @VectorResult, align 64
+  %425 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %426 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %427 = call <16 x i32> @llvm.hexagon.V6.vminh(<16 x i32> %425, <16 x i32> %426)
+  store volatile <16 x i32> %427, <16 x i32>* @VectorResult, align 64
+  %428 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %429 = call <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32> %428, i32 -1)
+  store volatile <16 x i32> %429, <16 x i32>* @VectorResult, align 64
+  %430 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %431 = call <16 x i32> @llvm.hexagon.V6.vmpyhss(<16 x i32> %430, i32 -1)
+  store volatile <16 x i32> %431, <16 x i32>* @VectorResult, align 64
+  %432 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %433 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %434 = call <16 x i32> @llvm.hexagon.V6.vmpyhvsrs(<16 x i32> %432, <16 x i32> %433)
+  store volatile <16 x i32> %434, <16 x i32>* @VectorResult, align 64
+  %435 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %436 = call <16 x i32> @llvm.hexagon.V6.vmpyihb(<16 x i32> %435, i32 -1)
+  store volatile <16 x i32> %436, <16 x i32>* @VectorResult, align 64
+  %437 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %438 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %439 = call <16 x i32> @llvm.hexagon.V6.vmpyih(<16 x i32> %437, <16 x i32> %438)
+  store volatile <16 x i32> %439, <16 x i32>* @VectorResult, align 64
+  %440 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %441 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %442 = call <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32> %440, <16 x i32> %441, i32 -1)
+  store volatile <16 x i32> %442, <16 x i32>* @VectorResult, align 64
+  %443 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %444 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %445 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %446 = call <16 x i32> @llvm.hexagon.V6.vmpyih.acc(<16 x i32> %443, <16 x i32> %444, <16 x i32> %445)
+  store volatile <16 x i32> %446, <16 x i32>* @VectorResult, align 64
+  %447 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %448 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %449 = call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %447, <16 x i32> %448)
+  store volatile <16 x i32> %449, <16 x i32>* @VectorResult, align 64
+  %450 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %451 = call <16 x i32> @llvm.hexagon.V6.vnormamth(<16 x i32> %450)
+  store volatile <16 x i32> %451, <16 x i32>* @VectorResult, align 64
+  %452 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %453 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %454 = call <16 x i32> @llvm.hexagon.V6.vpackwh.sat(<16 x i32> %452, <16 x i32> %453)
+  store volatile <16 x i32> %454, <16 x i32>* @VectorResult, align 64
+  %455 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %456 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %457 = call <16 x i32> @llvm.hexagon.V6.vpackeh(<16 x i32> %455, <16 x i32> %456)
+  store volatile <16 x i32> %457, <16 x i32>* @VectorResult, align 64
+  %458 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %459 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %460 = call <16 x i32> @llvm.hexagon.V6.vpackoh(<16 x i32> %458, <16 x i32> %459)
+  store volatile <16 x i32> %460, <16 x i32>* @VectorResult, align 64
+  %461 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %462 = call <16 x i32> @llvm.hexagon.V6.vpopcounth(<16 x i32> %461)
+  store volatile <16 x i32> %462, <16 x i32>* @VectorResult, align 64
+  %463 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %464 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %465 = call <16 x i32> @llvm.hexagon.V6.vroundwh(<16 x i32> %463, <16 x i32> %464)
+  store volatile <16 x i32> %465, <16 x i32>* @VectorResult, align 64
+  %466 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %467 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %468 = call <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32> %466, <16 x i32> %467)
+  store volatile <16 x i32> %468, <16 x i32>* @VectorResult, align 64
+  %469 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %470 = call <16 x i32> @llvm.hexagon.V6.vshuffh(<16 x i32> %469)
+  store volatile <16 x i32> %470, <16 x i32>* @VectorResult, align 64
+  %471 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %472 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %473 = call <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32> %471, <16 x i32> %472)
+  store volatile <16 x i32> %473, <16 x i32>* @VectorResult, align 64
+  %474 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %475 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %476 = call <16 x i32> @llvm.hexagon.V6.vshufoh(<16 x i32> %474, <16 x i32> %475)
+  store volatile <16 x i32> %476, <16 x i32>* @VectorResult, align 64
+  %477 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %478 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %479 = call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %477, <16 x i32> %478)
+  store volatile <16 x i32> %479, <16 x i32>* @VectorResult, align 64
+  %480 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %481 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %482 = call <16 x i32> @llvm.hexagon.V6.vsubhsat(<16 x i32> %480, <16 x i32> %481)
+  store volatile <16 x i32> %482, <16 x i32>* @VectorResult, align 64
+  %483 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %484 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %485 = call <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32> %483, <16 x i32> %484)
+  store volatile <16 x i32> %485, <16 x i32>* @VectorResult, align 64
+  %486 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %487 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %488 = call <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32> %486, <16 x i32> %487)
+  store volatile <16 x i32> %488, <16 x i32>* @VectorResult, align 64
+  %489 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %490 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %491 = call <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32> %489, <16 x i32> %490, i32 -1)
+  store volatile <16 x i32> %491, <16 x i32>* @VectorResult, align 64
+  %492 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %493 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %494 = call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %492, <16 x i32> %493, i32 -1)
+  store volatile <16 x i32> %494, <16 x i32>* @VectorResult, align 64
+  %495 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %496 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %497 = call <16 x i32> @llvm.hexagon.V6.vavgub(<16 x i32> %495, <16 x i32> %496)
+  store volatile <16 x i32> %497, <16 x i32>* @VectorResult, align 64
+  %498 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %499 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %500 = call <16 x i32> @llvm.hexagon.V6.vavgubrnd(<16 x i32> %498, <16 x i32> %499)
+  store volatile <16 x i32> %500, <16 x i32>* @VectorResult, align 64
+  %501 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %502 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %503 = call <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32> %501, <16 x i32> %502)
+  store volatile <16 x i32> %503, <16 x i32>* @VectorResult, align 64
+  %504 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %505 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %506 = call <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32> %504, <16 x i32> %505)
+  store volatile <16 x i32> %506, <16 x i32>* @VectorResult, align 64
+  %507 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %508 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %509 = call <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32> %507, <16 x i32> %508)
+  store volatile <16 x i32> %509, <16 x i32>* @VectorResult, align 64
+  %510 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %511 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %512 = call <16 x i32> @llvm.hexagon.V6.vroundhub(<16 x i32> %510, <16 x i32> %511)
+  store volatile <16 x i32> %512, <16 x i32>* @VectorResult, align 64
+  %513 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %514 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %515 = call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %513, <16 x i32> %514)
+  store volatile <16 x i32> %515, <16 x i32>* @VectorResult, align 64
+  %516 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %517 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %518 = call <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32> %516, <16 x i32> %517)
+  store volatile <16 x i32> %518, <16 x i32>* @VectorResult, align 64
+  %519 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %520 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %521 = call <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32> %519, <16 x i32> %520)
+  store volatile <16 x i32> %521, <16 x i32>* @VectorResult, align 64
+  %522 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %523 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %524 = call <16 x i32> @llvm.hexagon.V6.vabsdiffuh(<16 x i32> %522, <16 x i32> %523)
+  store volatile <16 x i32> %524, <16 x i32>* @VectorResult, align 64
+  %525 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %526 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %527 = call <16 x i32> @llvm.hexagon.V6.vadduhsat(<16 x i32> %525, <16 x i32> %526)
+  store volatile <16 x i32> %527, <16 x i32>* @VectorResult, align 64
+  %528 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %529 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %530 = call <16 x i32> @llvm.hexagon.V6.vasrwuhsat(<16 x i32> %528, <16 x i32> %529, i32 -1)
+  store volatile <16 x i32> %530, <16 x i32>* @VectorResult, align 64
+  %531 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %532 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %533 = call <16 x i32> @llvm.hexagon.V6.vavguh(<16 x i32> %531, <16 x i32> %532)
+  store volatile <16 x i32> %533, <16 x i32>* @VectorResult, align 64
+  %534 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %535 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %536 = call <16 x i32> @llvm.hexagon.V6.vavguhrnd(<16 x i32> %534, <16 x i32> %535)
+  store volatile <16 x i32> %536, <16 x i32>* @VectorResult, align 64
+  %537 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %538 = call <16 x i32> @llvm.hexagon.V6.vcl0h(<16 x i32> %537)
+  store volatile <16 x i32> %538, <16 x i32>* @VectorResult, align 64
+  %539 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %540 = call <16 x i32> @llvm.hexagon.V6.vlsrh(<16 x i32> %539, i32 -1)
+  store volatile <16 x i32> %540, <16 x i32>* @VectorResult, align 64
+  %541 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %542 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %543 = call <16 x i32> @llvm.hexagon.V6.vmaxuh(<16 x i32> %541, <16 x i32> %542)
+  store volatile <16 x i32> %543, <16 x i32>* @VectorResult, align 64
+  %544 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %545 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %546 = call <16 x i32> @llvm.hexagon.V6.vminuh(<16 x i32> %544, <16 x i32> %545)
+  store volatile <16 x i32> %546, <16 x i32>* @VectorResult, align 64
+  %547 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %548 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %549 = call <16 x i32> @llvm.hexagon.V6.vpackwuh.sat(<16 x i32> %547, <16 x i32> %548)
+  store volatile <16 x i32> %549, <16 x i32>* @VectorResult, align 64
+  %550 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %551 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %552 = call <16 x i32> @llvm.hexagon.V6.vroundwuh(<16 x i32> %550, <16 x i32> %551)
+  store volatile <16 x i32> %552, <16 x i32>* @VectorResult, align 64
+  %553 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %554 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %555 = call <16 x i32> @llvm.hexagon.V6.vsubuhsat(<16 x i32> %553, <16 x i32> %554)
+  store volatile <16 x i32> %555, <16 x i32>* @VectorResult, align 64
+  %556 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %557 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %558 = call <16 x i32> @llvm.hexagon.V6.vabsdiffw(<16 x i32> %556, <16 x i32> %557)
+  store volatile <16 x i32> %558, <16 x i32>* @VectorResult, align 64
+  %559 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %560 = call <16 x i32> @llvm.hexagon.V6.vcl0w(<16 x i32> %559)
+  store volatile <16 x i32> %560, <16 x i32>* @VectorResult, align 64
+  %561 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %562 = call <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32> %561, i32 -1)
+  store volatile <16 x i32> %562, <16 x i32>* @VectorResult, align 64
+  %563 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %564 = call <16 x i32> @llvm.hexagon.V6.vrmpyub(<16 x i32> %563, i32 -1)
+  store volatile <16 x i32> %564, <16 x i32>* @VectorResult, align 64
+  %565 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %566 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %567 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv(<16 x i32> %565, <16 x i32> %566)
+  store volatile <16 x i32> %567, <16 x i32>* @VectorResult, align 64
+  %568 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %569 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %570 = call <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32> %568, <16 x i32> %569, i32 -1)
+  store volatile <16 x i32> %570, <16 x i32>* @VectorResult, align 64
+  %571 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %572 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %573 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %574 = call <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32> %571, <16 x i32> %572, <16 x i32> %573)
+  store volatile <16 x i32> %574, <16 x i32>* @VectorResult, align 64
+  %575 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %576 = bitcast <16 x i32> %575 to <512 x i1>
+  %577 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %578 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %579 = call <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1> %576, <16 x i32> %577, <16 x i32> %578)
+  store volatile <16 x i32> %579, <16 x i32>* @VectorResult, align 64
+  %580 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %581 = bitcast <16 x i32> %580 to <512 x i1>
+  %582 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %583 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %584 = call <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1> %581, <16 x i32> %582, <16 x i32> %583)
+  store volatile <16 x i32> %584, <16 x i32>* @VectorResult, align 64
+  %585 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %586 = bitcast <16 x i32> %585 to <512 x i1>
+  %587 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %588 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %589 = call <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1> %586, <16 x i32> %587, <16 x i32> %588)
+  store volatile <16 x i32> %589, <16 x i32>* @VectorResult, align 64
+  %590 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %591 = bitcast <16 x i32> %590 to <512 x i1>
+  %592 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %593 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %594 = call <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1> %591, <16 x i32> %592, <16 x i32> %593)
+  store volatile <16 x i32> %594, <16 x i32>* @VectorResult, align 64
+  %595 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %596 = call <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32> %595)
+  store volatile <16 x i32> %596, <16 x i32>* @VectorResult, align 64
+  %597 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %598 = call <16 x i32> @llvm.hexagon.V6.vabsw.sat(<16 x i32> %597)
+  store volatile <16 x i32> %598, <16 x i32>* @VectorResult, align 64
+  %599 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %600 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %601 = call <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32> %599, <16 x i32> %600)
+  store volatile <16 x i32> %601, <16 x i32>* @VectorResult, align 64
+  %602 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %603 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %604 = call <16 x i32> @llvm.hexagon.V6.vaddwsat(<16 x i32> %602, <16 x i32> %603)
+  store volatile <16 x i32> %604, <16 x i32>* @VectorResult, align 64
+  %605 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %606 = call <16 x i32> @llvm.hexagon.V6.vaslw(<16 x i32> %605, i32 -1)
+  store volatile <16 x i32> %606, <16 x i32>* @VectorResult, align 64
+  %607 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %608 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %609 = call <16 x i32> @llvm.hexagon.V6.vaslwv(<16 x i32> %607, <16 x i32> %608)
+  store volatile <16 x i32> %609, <16 x i32>* @VectorResult, align 64
+  %610 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %611 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %612 = call <16 x i32> @llvm.hexagon.V6.vaslw.acc(<16 x i32> %610, <16 x i32> %611, i32 -1)
+  store volatile <16 x i32> %612, <16 x i32>* @VectorResult, align 64
+  %613 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %614 = call <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32> %613, i32 -1)
+  store volatile <16 x i32> %614, <16 x i32>* @VectorResult, align 64
+  %615 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %616 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %617 = call <16 x i32> @llvm.hexagon.V6.vasrwv(<16 x i32> %615, <16 x i32> %616)
+  store volatile <16 x i32> %617, <16 x i32>* @VectorResult, align 64
+  %618 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %619 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %620 = call <16 x i32> @llvm.hexagon.V6.vasrw.acc(<16 x i32> %618, <16 x i32> %619, i32 -1)
+  store volatile <16 x i32> %620, <16 x i32>* @VectorResult, align 64
+  %621 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %622 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %623 = call <16 x i32> @llvm.hexagon.V6.vavgw(<16 x i32> %621, <16 x i32> %622)
+  store volatile <16 x i32> %623, <16 x i32>* @VectorResult, align 64
+  %624 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %625 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %626 = call <16 x i32> @llvm.hexagon.V6.vavgwrnd(<16 x i32> %624, <16 x i32> %625)
+  store volatile <16 x i32> %626, <16 x i32>* @VectorResult, align 64
+  %627 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %628 = call <16 x i32> @llvm.hexagon.V6.vdmpyhb(<16 x i32> %627, i32 -1)
+  store volatile <16 x i32> %628, <16 x i32>* @VectorResult, align 64
+  %629 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %630 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsat(<16 x i32> %629, i32 -1)
+  store volatile <16 x i32> %630, <16 x i32>* @VectorResult, align 64
+  %631 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %632 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsusat(<16 x i32> %631, i32 -1)
+  store volatile <16 x i32> %632, <16 x i32>* @VectorResult, align 64
+  %633 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %634 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %635 = call <16 x i32> @llvm.hexagon.V6.vdmpyhvsat(<16 x i32> %633, <16 x i32> %634)
+  store volatile <16 x i32> %635, <16 x i32>* @VectorResult, align 64
+  %636 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %637 = call <16 x i32> @llvm.hexagon.V6.vdmpyhisat(<32 x i32> %636, i32 -1)
+  store volatile <16 x i32> %637, <16 x i32>* @VectorResult, align 64
+  %638 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %639 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat(<32 x i32> %638, i32 -1)
+  store volatile <16 x i32> %639, <16 x i32>* @VectorResult, align 64
+  %640 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %641 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %642 = call <16 x i32> @llvm.hexagon.V6.vdmpyhb.acc(<16 x i32> %640, <16 x i32> %641, i32 -1)
+  store volatile <16 x i32> %642, <16 x i32>* @VectorResult, align 64
+  %643 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %644 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %645 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsat.acc(<16 x i32> %643, <16 x i32> %644, i32 -1)
+  store volatile <16 x i32> %645, <16 x i32>* @VectorResult, align 64
+  %646 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %647 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %648 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc(<16 x i32> %646, <16 x i32> %647, i32 -1)
+  store volatile <16 x i32> %648, <16 x i32>* @VectorResult, align 64
+  %649 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %650 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %651 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %652 = call <16 x i32> @llvm.hexagon.V6.vdmpyhvsat.acc(<16 x i32> %649, <16 x i32> %650, <16 x i32> %651)
+  store volatile <16 x i32> %652, <16 x i32>* @VectorResult, align 64
+  %653 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %654 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %655 = call <16 x i32> @llvm.hexagon.V6.vdmpyhisat.acc(<16 x i32> %653, <32 x i32> %654, i32 -1)
+  store volatile <16 x i32> %655, <16 x i32>* @VectorResult, align 64
+  %656 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %657 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %658 = call <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc(<16 x i32> %656, <32 x i32> %657, i32 -1)
+  store volatile <16 x i32> %658, <16 x i32>* @VectorResult, align 64
+  %659 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %660 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %659, i32 -1)
+  store volatile <16 x i32> %660, <16 x i32>* @VectorResult, align 64
+  %661 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %662 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %661, i32 0)
+  store volatile <16 x i32> %662, <16 x i32>* @VectorResult, align 64
+  %663 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %664 = call <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32> %663, i32 1)
+  store volatile <16 x i32> %664, <16 x i32>* @VectorResult, align 64
+  %665 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %666 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %667 = call <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32> %665, <16 x i32> %666)
+  store volatile <16 x i32> %667, <16 x i32>* @VectorResult, align 64
+  %668 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %669 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %670 = call <16 x i32> @llvm.hexagon.V6.vmaxw(<16 x i32> %668, <16 x i32> %669)
+  store volatile <16 x i32> %670, <16 x i32>* @VectorResult, align 64
+  %671 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %672 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %673 = call <16 x i32> @llvm.hexagon.V6.vminw(<16 x i32> %671, <16 x i32> %672)
+  store volatile <16 x i32> %673, <16 x i32>* @VectorResult, align 64
+  %674 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %675 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %676 = call <16 x i32> @llvm.hexagon.V6.vmpyewuh(<16 x i32> %674, <16 x i32> %675)
+  store volatile <16 x i32> %676, <16 x i32>* @VectorResult, align 64
+  %677 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %678 = call <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32> %677, i32 -1)
+  store volatile <16 x i32> %678, <16 x i32>* @VectorResult, align 64
+  %679 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %680 = call <16 x i32> @llvm.hexagon.V6.vmpyiwh(<16 x i32> %679, i32 -1)
+  store volatile <16 x i32> %680, <16 x i32>* @VectorResult, align 64
+  %681 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %682 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %683 = call <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32> %681, <16 x i32> %682, i32 -1)
+  store volatile <16 x i32> %683, <16 x i32>* @VectorResult, align 64
+  %684 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %685 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %686 = call <16 x i32> @llvm.hexagon.V6.vmpyiwh.acc(<16 x i32> %684, <16 x i32> %685, i32 -1)
+  store volatile <16 x i32> %686, <16 x i32>* @VectorResult, align 64
+  %687 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %688 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %689 = call <16 x i32> @llvm.hexagon.V6.vmpyiewuh(<16 x i32> %687, <16 x i32> %688)
+  store volatile <16 x i32> %689, <16 x i32>* @VectorResult, align 64
+  %690 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %691 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %692 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %693 = call <16 x i32> @llvm.hexagon.V6.vmpyiewh.acc(<16 x i32> %690, <16 x i32> %691, <16 x i32> %692)
+  store volatile <16 x i32> %693, <16 x i32>* @VectorResult, align 64
+  %694 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %695 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %696 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %697 = call <16 x i32> @llvm.hexagon.V6.vmpyiewuh.acc(<16 x i32> %694, <16 x i32> %695, <16 x i32> %696)
+  store volatile <16 x i32> %697, <16 x i32>* @VectorResult, align 64
+  %698 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %699 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %700 = call <16 x i32> @llvm.hexagon.V6.vmpyieoh(<16 x i32> %698, <16 x i32> %699)
+  store volatile <16 x i32> %700, <16 x i32>* @VectorResult, align 64
+  %701 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %702 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %703 = call <16 x i32> @llvm.hexagon.V6.vmpyiowh(<16 x i32> %701, <16 x i32> %702)
+  store volatile <16 x i32> %703, <16 x i32>* @VectorResult, align 64
+  %704 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %705 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %706 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd(<16 x i32> %704, <16 x i32> %705)
+  store volatile <16 x i32> %706, <16 x i32>* @VectorResult, align 64
+  %707 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %708 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %709 = call <16 x i32> @llvm.hexagon.V6.vmpyowh(<16 x i32> %707, <16 x i32> %708)
+  store volatile <16 x i32> %709, <16 x i32>* @VectorResult, align 64
+  %710 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %711 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %712 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %713 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc(<16 x i32> %710, <16 x i32> %711, <16 x i32> %712)
+  store volatile <16 x i32> %713, <16 x i32>* @VectorResult, align 64
+  %714 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %715 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %716 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %717 = call <16 x i32> @llvm.hexagon.V6.vmpyowh.sacc(<16 x i32> %714, <16 x i32> %715, <16 x i32> %716)
+  store volatile <16 x i32> %717, <16 x i32>* @VectorResult, align 64
+  %718 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %719 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %720 = call <16 x i32> @llvm.hexagon.V6.vnavgw(<16 x i32> %718, <16 x i32> %719)
+  store volatile <16 x i32> %720, <16 x i32>* @VectorResult, align 64
+  %721 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %722 = call <16 x i32> @llvm.hexagon.V6.vnormamtw(<16 x i32> %721)
+  store volatile <16 x i32> %722, <16 x i32>* @VectorResult, align 64
+  %723 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %724 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %725 = call <16 x i32> @llvm.hexagon.V6.vrmpybv(<16 x i32> %723, <16 x i32> %724)
+  store volatile <16 x i32> %725, <16 x i32>* @VectorResult, align 64
+  %726 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %727 = call <16 x i32> @llvm.hexagon.V6.vrmpybus(<16 x i32> %726, i32 -1)
+  store volatile <16 x i32> %727, <16 x i32>* @VectorResult, align 64
+  %728 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %729 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %730 = call <16 x i32> @llvm.hexagon.V6.vrmpybusv(<16 x i32> %728, <16 x i32> %729)
+  store volatile <16 x i32> %730, <16 x i32>* @VectorResult, align 64
+  %731 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %732 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %733 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %734 = call <16 x i32> @llvm.hexagon.V6.vrmpybv.acc(<16 x i32> %731, <16 x i32> %732, <16 x i32> %733)
+  store volatile <16 x i32> %734, <16 x i32>* @VectorResult, align 64
+  %735 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %736 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %737 = call <16 x i32> @llvm.hexagon.V6.vrmpybus.acc(<16 x i32> %735, <16 x i32> %736, i32 -1)
+  store volatile <16 x i32> %737, <16 x i32>* @VectorResult, align 64
+  %738 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %739 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %740 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 2), align 64
+  %741 = call <16 x i32> @llvm.hexagon.V6.vrmpybusv.acc(<16 x i32> %738, <16 x i32> %739, <16 x i32> %740)
+  store volatile <16 x i32> %741, <16 x i32>* @VectorResult, align 64
+  %742 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %743 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %744 = call <16 x i32> @llvm.hexagon.V6.vsubw(<16 x i32> %742, <16 x i32> %743)
+  store volatile <16 x i32> %744, <16 x i32>* @VectorResult, align 64
+  %745 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %746 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %747 = call <16 x i32> @llvm.hexagon.V6.vsubwsat(<16 x i32> %745, <16 x i32> %746)
+  store volatile <16 x i32> %747, <16 x i32>* @VectorResult, align 64
+  %748 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %749 = call <32 x i32> @llvm.hexagon.V6.vassignp(<32 x i32> %748)
+  store volatile <32 x i32> %749, <32 x i32>* @VectorPairResult, align 128
+  %750 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %751 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %752 = call <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32> %750, <16 x i32> %751)
+  store volatile <32 x i32> %752, <32 x i32>* @VectorPairResult, align 128
+  %753 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %754 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %755 = call <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32> %753, <16 x i32> %754, i32 -1)
+  store volatile <32 x i32> %755, <32 x i32>* @VectorPairResult, align 128
+  %756 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %757 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %758 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %756, <16 x i32> %757, i32 -1)
+  store volatile <32 x i32> %758, <32 x i32>* @VectorPairResult, align 128
+  %759 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %760 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %761 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %759, <16 x i32> %760, i32 0)
+  store volatile <32 x i32> %761, <32 x i32>* @VectorPairResult, align 128
+  %762 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %763 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %764 = call <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32> %762, <16 x i32> %763, i32 1)
+  store volatile <32 x i32> %764, <32 x i32>* @VectorPairResult, align 128
+  %765 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %766 = bitcast <16 x i32> %765 to <512 x i1>
+  %767 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %768 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %769 = call <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1> %766, <16 x i32> %767, <16 x i32> %768)
+  store volatile <32 x i32> %769, <32 x i32>* @VectorPairResult, align 128
+  %770 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %771 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %772 = call <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32> %770, <32 x i32> %771)
+  store volatile <32 x i32> %772, <32 x i32>* @VectorPairResult, align 128
+  %773 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %774 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %775 = call <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32> %773, <16 x i32> %774)
+  store volatile <32 x i32> %775, <32 x i32>* @VectorPairResult, align 128
+  %776 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %777 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %778 = call <32 x i32> @llvm.hexagon.V6.vsubb.dv(<32 x i32> %776, <32 x i32> %777)
+  store volatile <32 x i32> %778, <32 x i32>* @VectorPairResult, align 128
+  %779 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %780 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %781 = call <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32> %779, <16 x i32> %780)
+  store volatile <32 x i32> %781, <32 x i32>* @VectorPairResult, align 128
+  %782 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %783 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %784 = call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %782, <32 x i32> %783)
+  store volatile <32 x i32> %784, <32 x i32>* @VectorPairResult, align 128
+  %785 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %786 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %787 = call <32 x i32> @llvm.hexagon.V6.vaddhsat.dv(<32 x i32> %785, <32 x i32> %786)
+  store volatile <32 x i32> %787, <32 x i32>* @VectorPairResult, align 128
+  %788 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %789 = call <32 x i32> @llvm.hexagon.V6.vdmpybus.dv(<32 x i32> %788, i32 -1)
+  store volatile <32 x i32> %789, <32 x i32>* @VectorPairResult, align 128
+  %790 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %791 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %792 = call <32 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc(<32 x i32> %790, <32 x i32> %791, i32 -1)
+  store volatile <32 x i32> %792, <32 x i32>* @VectorPairResult, align 128
+  %793 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %794 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %795 = call <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32> %793, <16 x i32> %794, i32 -1)
+  store volatile <32 x i32> %795, <32 x i32>* @VectorPairResult, align 128
+  %796 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %797 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %798 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %799 = call <32 x i32> @llvm.hexagon.V6.vlutvwh.oracc(<32 x i32> %796, <16 x i32> %797, <16 x i32> %798, i32 -1)
+  store volatile <32 x i32> %799, <32 x i32>* @VectorPairResult, align 128
+  %800 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %801 = call <32 x i32> @llvm.hexagon.V6.vmpabus(<32 x i32> %800, i32 -1)
+  store volatile <32 x i32> %801, <32 x i32>* @VectorPairResult, align 128
+  %802 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %803 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %804 = call <32 x i32> @llvm.hexagon.V6.vmpabusv(<32 x i32> %802, <32 x i32> %803)
+  store volatile <32 x i32> %804, <32 x i32>* @VectorPairResult, align 128
+  %805 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %806 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %807 = call <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32> %805, <32 x i32> %806)
+  store volatile <32 x i32> %807, <32 x i32>* @VectorPairResult, align 128
+  %808 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %809 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %810 = call <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32> %808, <32 x i32> %809, i32 -1)
+  store volatile <32 x i32> %810, <32 x i32>* @VectorPairResult, align 128
+  %811 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %812 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %813 = call <32 x i32> @llvm.hexagon.V6.vmpybv(<16 x i32> %811, <16 x i32> %812)
+  store volatile <32 x i32> %813, <32 x i32>* @VectorPairResult, align 128
+  %814 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %815 = call <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32> %814, i32 -1)
+  store volatile <32 x i32> %815, <32 x i32>* @VectorPairResult, align 128
+  %816 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %817 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %818 = call <32 x i32> @llvm.hexagon.V6.vmpybusv(<16 x i32> %816, <16 x i32> %817)
+  store volatile <32 x i32> %818, <32 x i32>* @VectorPairResult, align 128
+  %819 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %820 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %821 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %822 = call <32 x i32> @llvm.hexagon.V6.vmpybv.acc(<32 x i32> %819, <16 x i32> %820, <16 x i32> %821)
+  store volatile <32 x i32> %822, <32 x i32>* @VectorPairResult, align 128
+  %823 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %824 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %825 = call <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32> %823, <16 x i32> %824, i32 -1)
+  store volatile <32 x i32> %825, <32 x i32>* @VectorPairResult, align 128
+  %826 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %827 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %828 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %829 = call <32 x i32> @llvm.hexagon.V6.vmpybusv.acc(<32 x i32> %826, <16 x i32> %827, <16 x i32> %828)
+  store volatile <32 x i32> %829, <32 x i32>* @VectorPairResult, align 128
+  %830 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %831 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %832 = call <32 x i32> @llvm.hexagon.V6.vshufoeh(<16 x i32> %830, <16 x i32> %831)
+  store volatile <32 x i32> %832, <32 x i32>* @VectorPairResult, align 128
+  %833 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %834 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %835 = call <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32> %833, <16 x i32> %834)
+  store volatile <32 x i32> %835, <32 x i32>* @VectorPairResult, align 128
+  %836 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %837 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %838 = call <32 x i32> @llvm.hexagon.V6.vsubh.dv(<32 x i32> %836, <32 x i32> %837)
+  store volatile <32 x i32> %838, <32 x i32>* @VectorPairResult, align 128
+  %839 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %840 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %841 = call <32 x i32> @llvm.hexagon.V6.vsubhsat.dv(<32 x i32> %839, <32 x i32> %840)
+  store volatile <32 x i32> %841, <32 x i32>* @VectorPairResult, align 128
+  %842 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %843 = call <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32> %842)
+  store volatile <32 x i32> %843, <32 x i32>* @VectorPairResult, align 128
+  %844 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %845 = call <32 x i32> @llvm.hexagon.V6.vtmpyb(<32 x i32> %844, i32 -1)
+  store volatile <32 x i32> %845, <32 x i32>* @VectorPairResult, align 128
+  %846 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %847 = call <32 x i32> @llvm.hexagon.V6.vtmpybus(<32 x i32> %846, i32 -1)
+  store volatile <32 x i32> %847, <32 x i32>* @VectorPairResult, align 128
+  %848 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %849 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %850 = call <32 x i32> @llvm.hexagon.V6.vtmpyb.acc(<32 x i32> %848, <32 x i32> %849, i32 -1)
+  store volatile <32 x i32> %850, <32 x i32>* @VectorPairResult, align 128
+  %851 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %852 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %853 = call <32 x i32> @llvm.hexagon.V6.vtmpybus.acc(<32 x i32> %851, <32 x i32> %852, i32 -1)
+  store volatile <32 x i32> %853, <32 x i32>* @VectorPairResult, align 128
+  %854 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %855 = call <32 x i32> @llvm.hexagon.V6.vunpackb(<16 x i32> %854)
+  store volatile <32 x i32> %855, <32 x i32>* @VectorPairResult, align 128
+  %856 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %857 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %858 = call <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32> %856, <16 x i32> %857)
+  store volatile <32 x i32> %858, <32 x i32>* @VectorPairResult, align 128
+  %859 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %860 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %861 = call <32 x i32> @llvm.hexagon.V6.vaddubsat.dv(<32 x i32> %859, <32 x i32> %860)
+  store volatile <32 x i32> %861, <32 x i32>* @VectorPairResult, align 128
+  %862 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %863 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %864 = call <32 x i32> @llvm.hexagon.V6.vsububsat.dv(<32 x i32> %862, <32 x i32> %863)
+  store volatile <32 x i32> %864, <32 x i32>* @VectorPairResult, align 128
+  %865 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %866 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %867 = call <32 x i32> @llvm.hexagon.V6.vadduhsat.dv(<32 x i32> %865, <32 x i32> %866)
+  store volatile <32 x i32> %867, <32 x i32>* @VectorPairResult, align 128
+  %868 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %869 = call <32 x i32> @llvm.hexagon.V6.vmpyub(<16 x i32> %868, i32 -1)
+  store volatile <32 x i32> %869, <32 x i32>* @VectorPairResult, align 128
+  %870 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %871 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %872 = call <32 x i32> @llvm.hexagon.V6.vmpyubv(<16 x i32> %870, <16 x i32> %871)
+  store volatile <32 x i32> %872, <32 x i32>* @VectorPairResult, align 128
+  %873 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %874 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %875 = call <32 x i32> @llvm.hexagon.V6.vmpyub.acc(<32 x i32> %873, <16 x i32> %874, i32 -1)
+  store volatile <32 x i32> %875, <32 x i32>* @VectorPairResult, align 128
+  %876 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %877 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %878 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %879 = call <32 x i32> @llvm.hexagon.V6.vmpyubv.acc(<32 x i32> %876, <16 x i32> %877, <16 x i32> %878)
+  store volatile <32 x i32> %879, <32 x i32>* @VectorPairResult, align 128
+  %880 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %881 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %882 = call <32 x i32> @llvm.hexagon.V6.vsubuhsat.dv(<32 x i32> %880, <32 x i32> %881)
+  store volatile <32 x i32> %882, <32 x i32>* @VectorPairResult, align 128
+  %883 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %884 = call <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32> %883)
+  store volatile <32 x i32> %884, <32 x i32>* @VectorPairResult, align 128
+  %885 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %886 = call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %885)
+  store volatile <32 x i32> %886, <32 x i32>* @VectorPairResult, align 128
+  %887 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %888 = call <32 x i32> @llvm.hexagon.V6.vdsaduh(<32 x i32> %887, i32 -1)
+  store volatile <32 x i32> %888, <32 x i32>* @VectorPairResult, align 128
+  %889 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %890 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %891 = call <32 x i32> @llvm.hexagon.V6.vdsaduh.acc(<32 x i32> %889, <32 x i32> %890, i32 -1)
+  store volatile <32 x i32> %891, <32 x i32>* @VectorPairResult, align 128
+  %892 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %893 = call <32 x i32> @llvm.hexagon.V6.vmpyuh(<16 x i32> %892, i32 -1)
+  store volatile <32 x i32> %893, <32 x i32>* @VectorPairResult, align 128
+  %894 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %895 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %896 = call <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32> %894, <16 x i32> %895)
+  store volatile <32 x i32> %896, <32 x i32>* @VectorPairResult, align 128
+  %897 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %898 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %899 = call <32 x i32> @llvm.hexagon.V6.vmpyuh.acc(<32 x i32> %897, <16 x i32> %898, i32 -1)
+  store volatile <32 x i32> %899, <32 x i32>* @VectorPairResult, align 128
+  %900 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %901 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %902 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %903 = call <32 x i32> @llvm.hexagon.V6.vmpyuhv.acc(<32 x i32> %900, <16 x i32> %901, <16 x i32> %902)
+  store volatile <32 x i32> %903, <32 x i32>* @VectorPairResult, align 128
+  %904 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %905 = call <32 x i32> @llvm.hexagon.V6.vrmpyubi(<32 x i32> %904, i32 -1, i32 0)
+  store volatile <32 x i32> %905, <32 x i32>* @VectorPairResult, align 128
+  %906 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %907 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %908 = call <32 x i32> @llvm.hexagon.V6.vrmpyubi.acc(<32 x i32> %906, <32 x i32> %907, i32 -1, i32 0)
+  store volatile <32 x i32> %908, <32 x i32>* @VectorPairResult, align 128
+  %909 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %910 = call <32 x i32> @llvm.hexagon.V6.vrsadubi(<32 x i32> %909, i32 -1, i32 0)
+  store volatile <32 x i32> %910, <32 x i32>* @VectorPairResult, align 128
+  %911 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %912 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %913 = call <32 x i32> @llvm.hexagon.V6.vrsadubi.acc(<32 x i32> %911, <32 x i32> %912, i32 -1, i32 0)
+  store volatile <32 x i32> %913, <32 x i32>* @VectorPairResult, align 128
+  %914 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %915 = call <32 x i32> @llvm.hexagon.V6.vunpackuh(<16 x i32> %914)
+  store volatile <32 x i32> %915, <32 x i32>* @VectorPairResult, align 128
+  %916 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %917 = call <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32> %916)
+  store volatile <32 x i32> %917, <32 x i32>* @VectorPairResult, align 128
+  %918 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %919 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %920 = call <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32> %918, <16 x i32> %919)
+  store volatile <32 x i32> %920, <32 x i32>* @VectorPairResult, align 128
+  %921 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %922 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %923 = call <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32> %921, <16 x i32> %922)
+  store volatile <32 x i32> %923, <32 x i32>* @VectorPairResult, align 128
+  %924 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %925 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %926 = call <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32> %924, <32 x i32> %925)
+  store volatile <32 x i32> %926, <32 x i32>* @VectorPairResult, align 128
+  %927 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %928 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %929 = call <32 x i32> @llvm.hexagon.V6.vaddwsat.dv(<32 x i32> %927, <32 x i32> %928)
+  store volatile <32 x i32> %929, <32 x i32>* @VectorPairResult, align 128
+  %930 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %931 = call <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv(<32 x i32> %930, i32 -1)
+  store volatile <32 x i32> %931, <32 x i32>* @VectorPairResult, align 128
+  %932 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %933 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %934 = call <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc(<32 x i32> %932, <32 x i32> %933, i32 -1)
+  store volatile <32 x i32> %934, <32 x i32>* @VectorPairResult, align 128
+  %935 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %936 = call <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32> %935, i32 -1)
+  store volatile <32 x i32> %936, <32 x i32>* @VectorPairResult, align 128
+  %937 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %938 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %939 = call <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32> %937, <32 x i32> %938, i32 -1)
+  store volatile <32 x i32> %939, <32 x i32>* @VectorPairResult, align 128
+  %940 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %941 = call <32 x i32> @llvm.hexagon.V6.vmpyh(<16 x i32> %940, i32 -1)
+  store volatile <32 x i32> %941, <32 x i32>* @VectorPairResult, align 128
+  %942 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %943 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %944 = call <32 x i32> @llvm.hexagon.V6.vmpyhv(<16 x i32> %942, <16 x i32> %943)
+  store volatile <32 x i32> %944, <32 x i32>* @VectorPairResult, align 128
+  %945 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %946 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %947 = call <32 x i32> @llvm.hexagon.V6.vmpyhus(<16 x i32> %945, <16 x i32> %946)
+  store volatile <32 x i32> %947, <32 x i32>* @VectorPairResult, align 128
+  %948 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %949 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %950 = call <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32> %948, <16 x i32> %949, i32 -1)
+  store volatile <32 x i32> %950, <32 x i32>* @VectorPairResult, align 128
+  %951 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %952 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %953 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %954 = call <32 x i32> @llvm.hexagon.V6.vmpyhv.acc(<32 x i32> %951, <16 x i32> %952, <16 x i32> %953)
+  store volatile <32 x i32> %954, <32 x i32>* @VectorPairResult, align 128
+  %955 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %956 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %957 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %958 = call <32 x i32> @llvm.hexagon.V6.vmpyhus.acc(<32 x i32> %955, <16 x i32> %956, <16 x i32> %957)
+  store volatile <32 x i32> %958, <32 x i32>* @VectorPairResult, align 128
+  %959 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %960 = call <32 x i32> @llvm.hexagon.V6.vrmpybusi(<32 x i32> %959, i32 -1, i32 0)
+  store volatile <32 x i32> %960, <32 x i32>* @VectorPairResult, align 128
+  %961 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %962 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %963 = call <32 x i32> @llvm.hexagon.V6.vrmpybusi.acc(<32 x i32> %961, <32 x i32> %962, i32 -1, i32 0)
+  store volatile <32 x i32> %963, <32 x i32>* @VectorPairResult, align 128
+  %964 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %965 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %966 = call <32 x i32> @llvm.hexagon.V6.vsubhw(<16 x i32> %964, <16 x i32> %965)
+  store volatile <32 x i32> %966, <32 x i32>* @VectorPairResult, align 128
+  %967 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %968 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 1), align 64
+  %969 = call <32 x i32> @llvm.hexagon.V6.vsubuhw(<16 x i32> %967, <16 x i32> %968)
+  store volatile <32 x i32> %969, <32 x i32>* @VectorPairResult, align 128
+  %970 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %971 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %972 = call <32 x i32> @llvm.hexagon.V6.vsubw.dv(<32 x i32> %970, <32 x i32> %971)
+  store volatile <32 x i32> %972, <32 x i32>* @VectorPairResult, align 128
+  %973 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %974 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %975 = call <32 x i32> @llvm.hexagon.V6.vsubwsat.dv(<32 x i32> %973, <32 x i32> %974)
+  store volatile <32 x i32> %975, <32 x i32>* @VectorPairResult, align 128
+  %976 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %977 = call <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32> %976)
+  store volatile <32 x i32> %977, <32 x i32>* @VectorPairResult, align 128
+  %978 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %979 = call <32 x i32> @llvm.hexagon.V6.vtmpyhb(<32 x i32> %978, i32 -1)
+  store volatile <32 x i32> %979, <32 x i32>* @VectorPairResult, align 128
+  %980 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %981 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 1), align 128
+  %982 = call <32 x i32> @llvm.hexagon.V6.vtmpyhb.acc(<32 x i32> %980, <32 x i32> %981, i32 -1)
+  store volatile <32 x i32> %982, <32 x i32>* @VectorPairResult, align 128
+  %983 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %984 = call <32 x i32> @llvm.hexagon.V6.vunpackh(<16 x i32> %983)
+  store volatile <32 x i32> %984, <32 x i32>* @VectorPairResult, align 128
+  %985 = load volatile <32 x i32>, <32 x i32>* getelementptr inbounds ([15 x <32 x i32>], [15 x <32 x i32>]* @vector_pairs, i32 0, i32 0), align 128
+  %986 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vectors, i32 0, i32 0), align 64
+  %987 = call <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32> %985, <16 x i32> %986)
+  store volatile <32 x i32> %987, <32 x i32>* @VectorPairResult, align 128
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.not(<512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.or(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.or.n(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vandvrt.acc(<512 x i1>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.veqw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.and(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.or(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtb.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgth.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtub.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuh.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtuw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.vgtw.xor(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.scalar2(i32) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.xor(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vassign(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.valignbi(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.valignb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt(<512 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vand(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vandqrt.acc(<16 x i32>, <512 x i1>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdelta(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlalignbi(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlalignb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmux(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnot(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vor(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrdelta(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vror(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lvsplatw(i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vxor(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vd0() #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubbq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubbnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhbrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealb4w(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlutvvb(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlutvvb.oracc(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackhb.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackob(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundhb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffob(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsh.sat(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwh(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwhrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwhsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavghrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdealh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhsrs(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhss(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyhvsrs(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyihb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyih(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyihb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyih.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnormamth(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackwh.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpopcounth(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundwh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsatwh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshuffh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshufeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vshufoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddubsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubrndsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgubrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackhub.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundhub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsububsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vadduhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwuhsat(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavguh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavguhrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vcl0h(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vpackwuh.sat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vroundwuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubuhsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsdiffw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vcl0w(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyub(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyubv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyub.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpyubv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwnq(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsw(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vabsw.sat(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaddwsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vaslw.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrw(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrw.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vavgwrnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsat(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsusat(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhvsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhisat(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsat.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhvsat.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhisat.acc(<16 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc(<16 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vinsertwr(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vlsrwv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmaxw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vminw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyewuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwb(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwb.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiwh.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewuh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewh.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiewuh.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyieoh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyiowh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.rnd.sacc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vmpyowh.sacc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnavgw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vnormamtw(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybusv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybus.acc(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vrmpybusv.acc(<16 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vsubwsat(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vassignp(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vcombine(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdealvdd(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshuffvdd(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vswap(<512 x i1>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddb.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshufoeb(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubb.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddubh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpybus.dv(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vlutvwh(<16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vlutvwh.oracc(<32 x i32>, <16 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabus(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabusv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabuuv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpabus.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybus(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybusv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybus.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpybusv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vshufoeh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsububh(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpybus(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpybus.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackob(<32 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddubsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsububsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vadduhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyub(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyubv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyub.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyubv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubuhsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackub(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdsaduh(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdsaduh.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuh.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyuhv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpyubi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpyubi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrsadubi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrsadubi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackuh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vadduhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddw.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddwsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpahb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpahb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyh(<16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhv(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhus(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhsat.acc(<32 x i32>, <16 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhv.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vmpyhus.acc(<32 x i32>, <16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpybusi(<32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vrmpybusi.acc(<32 x i32>, <32 x i32>, i32, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubuhw(<16 x i32>, <16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubw.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsubwsat.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vsh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyhb(<32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vtmpyhb.acc(<32 x i32>, <32 x i32>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackh(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vunpackoh(<32 x i32>, <16 x i32>) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/v60Vasr.ll b/test/CodeGen/Hexagon/v60Vasr.ll
new file mode 100644
index 000000000000..fb177f614f72
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60Vasr.ll
@@ -0,0 +1,247 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60  < %s | FileCheck %s
+
+; CHECK: vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-7]+}}):sat
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon-unknown--elf"
+
+%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] }
+
+; Function Attrs: norecurse nounwind
+define i32 @__test_vasr(%struct.buffer_t* noalias nocapture %f.buffer, %struct.buffer_t* noalias nocapture %g.buffer, %struct.buffer_t* noalias nocapture %res.buffer) #0 {
+entry:
+  %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 1
+  %f.host = load i8*, i8** %buf_host, align 4
+  %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 0
+  %f.dev = load i64, i64* %buf_dev, align 8
+  %0 = icmp eq i8* %f.host, null
+  %1 = icmp eq i64 %f.dev, 0
+  %f.host_and_dev_are_null = and i1 %0, %1
+  %buf_min = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 0
+  %f.min.0 = load i32, i32* %buf_min, align 4
+  %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 1
+  %g.host = load i8*, i8** %buf_host10, align 4
+  %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 0
+  %g.dev = load i64, i64* %buf_dev11, align 8
+  %2 = icmp eq i8* %g.host, null
+  %3 = icmp eq i64 %g.dev, 0
+  %g.host_and_dev_are_null = and i1 %2, %3
+  %buf_min22 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 0
+  %g.min.0 = load i32, i32* %buf_min22, align 4
+  %buf_host27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 1
+  %res.host = load i8*, i8** %buf_host27, align 4
+  %buf_dev28 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 0
+  %res.dev = load i64, i64* %buf_dev28, align 8
+  %4 = icmp eq i8* %res.host, null
+  %5 = icmp eq i64 %res.dev, 0
+  %res.host_and_dev_are_null = and i1 %4, %5
+  %buf_extent31 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 0
+  %res.extent.0 = load i32, i32* %buf_extent31, align 4
+  %buf_min39 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 0
+  %res.min.0 = load i32, i32* %buf_min39, align 4
+  %6 = add nsw i32 %res.extent.0, -1
+  %7 = and i32 %6, -64
+  %8 = add i32 %res.min.0, 63
+  %9 = add i32 %8, %7
+  %10 = add nsw i32 %res.min.0, %res.extent.0
+  %11 = add nsw i32 %10, -1
+  %12 = icmp slt i32 %9, %11
+  %13 = select i1 %12, i32 %9, i32 %11
+  %14 = add nsw i32 %10, -64
+  %15 = icmp slt i32 %res.min.0, %14
+  %16 = select i1 %15, i32 %res.min.0, i32 %14
+  %f.extent.0.required.s = sub nsw i32 %13, %16
+  br i1 %f.host_and_dev_are_null, label %true_bb, label %after_bb
+
+true_bb:                                          ; preds = %entry
+  %buf_elem_size44 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 5
+  store i32 1, i32* %buf_elem_size44, align 4
+  store i32 %16, i32* %buf_min, align 4
+  %17 = add nsw i32 %f.extent.0.required.s, 1
+  %buf_extent46 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 0
+  store i32 %17, i32* %buf_extent46, align 4
+  %buf_stride47 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 0
+  store i32 1, i32* %buf_stride47, align 4
+  %buf_min48 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 1
+  store i32 0, i32* %buf_min48, align 4
+  %buf_extent49 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 1
+  store i32 0, i32* %buf_extent49, align 4
+  %buf_stride50 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 1
+  store i32 0, i32* %buf_stride50, align 4
+  %buf_min51 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 2
+  store i32 0, i32* %buf_min51, align 4
+  %buf_extent52 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 2
+  store i32 0, i32* %buf_extent52, align 4
+  %buf_stride53 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 2
+  store i32 0, i32* %buf_stride53, align 4
+  %buf_min54 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 3
+  store i32 0, i32* %buf_min54, align 4
+  %buf_extent55 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 3
+  store i32 0, i32* %buf_extent55, align 4
+  %buf_stride56 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 3
+  store i32 0, i32* %buf_stride56, align 4
+  br label %after_bb
+
+after_bb:                                         ; preds = %true_bb, %entry
+  br i1 %g.host_and_dev_are_null, label %true_bb57, label %after_bb59
+
+true_bb57:                                        ; preds = %after_bb
+  %buf_elem_size60 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 5
+  store i32 1, i32* %buf_elem_size60, align 4
+  store i32 %16, i32* %buf_min22, align 4
+  %18 = add nsw i32 %f.extent.0.required.s, 1
+  %buf_extent62 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 0
+  store i32 %18, i32* %buf_extent62, align 4
+  %buf_stride63 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 0
+  store i32 1, i32* %buf_stride63, align 4
+  %buf_min64 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 1
+  store i32 0, i32* %buf_min64, align 4
+  %buf_extent65 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 1
+  store i32 0, i32* %buf_extent65, align 4
+  %buf_stride66 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 1
+  store i32 0, i32* %buf_stride66, align 4
+  %buf_min67 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 2
+  store i32 0, i32* %buf_min67, align 4
+  %buf_extent68 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 2
+  store i32 0, i32* %buf_extent68, align 4
+  %buf_stride69 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 2
+  store i32 0, i32* %buf_stride69, align 4
+  %buf_min70 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 3
+  store i32 0, i32* %buf_min70, align 4
+  %buf_extent71 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 3
+  store i32 0, i32* %buf_extent71, align 4
+  %buf_stride72 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 3
+  store i32 0, i32* %buf_stride72, align 4
+  br label %after_bb59
+
+after_bb59:                                       ; preds = %true_bb57, %after_bb
+  br i1 %res.host_and_dev_are_null, label %after_bb75.thread, label %after_bb75
+
+after_bb75.thread:                                ; preds = %after_bb59
+  %buf_elem_size76 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 5
+  store i32 1, i32* %buf_elem_size76, align 4
+  store i32 %16, i32* %buf_min39, align 4
+  %19 = add nsw i32 %f.extent.0.required.s, 1
+  store i32 %19, i32* %buf_extent31, align 4
+  %buf_stride79 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 0
+  store i32 1, i32* %buf_stride79, align 4
+  %buf_min80 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 1
+  store i32 0, i32* %buf_min80, align 4
+  %buf_extent81 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 1
+  store i32 0, i32* %buf_extent81, align 4
+  %buf_stride82 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 1
+  store i32 0, i32* %buf_stride82, align 4
+  %buf_min83 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 2
+  store i32 0, i32* %buf_min83, align 4
+  %buf_extent84 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 2
+  store i32 0, i32* %buf_extent84, align 4
+  %buf_stride85 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 2
+  store i32 0, i32* %buf_stride85, align 4
+  %buf_min86 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 3
+  store i32 0, i32* %buf_min86, align 4
+  %buf_extent87 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 3
+  store i32 0, i32* %buf_extent87, align 4
+  %buf_stride88 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 3
+  store i32 0, i32* %buf_stride88, align 4
+  br label %destructor_block
+
+after_bb75:                                       ; preds = %after_bb59
+  %20 = or i1 %f.host_and_dev_are_null, %g.host_and_dev_are_null
+  br i1 %20, label %destructor_block, label %"produce res"
+
+"produce res":                                    ; preds = %after_bb75
+  %21 = ashr i32 %res.extent.0, 6
+  %22 = icmp sgt i32 %21, 0
+  br i1 %22, label %"for res.s0.x.x", label %"end for res.s0.x.x", !prof !4
+
+"for res.s0.x.x":                                 ; preds = %"for res.s0.x.x", %"produce res"
+  %res.s0.x.x = phi i32 [ %41, %"for res.s0.x.x" ], [ 0, %"produce res" ]
+  %23 = shl nsw i32 %res.s0.x.x, 6
+  %24 = add nsw i32 %23, %res.min.0
+  %25 = sub nsw i32 %24, %f.min.0
+  %26 = getelementptr inbounds i8, i8* %f.host, i32 %25
+  %27 = bitcast i8* %26 to <16 x i32>*
+  %28 = load <16 x i32>, <16 x i32>* %27, align 1, !tbaa !5
+  %29 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %28)
+  %30 = sub nsw i32 %24, %g.min.0
+  %31 = getelementptr inbounds i8, i8* %g.host, i32 %30
+  %32 = bitcast i8* %31 to <16 x i32>*
+  %33 = load <16 x i32>, <16 x i32>* %32, align 1, !tbaa !8
+  %34 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %33)
+  %35 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %29, <32 x i32> %34)
+  %36 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %35)
+  %37 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %35)
+  %38 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %36, <16 x i32> %37, i32 4)
+  %39 = getelementptr inbounds i8, i8* %res.host, i32 %23
+  %40 = bitcast i8* %39 to <16 x i32>*
+  store <16 x i32> %38, <16 x i32>* %40, align 1, !tbaa !10
+  %41 = add nuw nsw i32 %res.s0.x.x, 1
+  %42 = icmp eq i32 %41, %21
+  br i1 %42, label %"end for res.s0.x.x", label %"for res.s0.x.x"
+
+"end for res.s0.x.x":                             ; preds = %"for res.s0.x.x", %"produce res"
+  %43 = add nsw i32 %res.extent.0, 63
+  %44 = ashr i32 %43, 6
+  %45 = icmp sgt i32 %44, %21
+  br i1 %45, label %"for res.s0.x.x92.preheader", label %destructor_block, !prof !4
+
+"for res.s0.x.x92.preheader":                     ; preds = %"end for res.s0.x.x"
+  %46 = sub i32 -64, %f.min.0
+  %47 = add i32 %46, %10
+  %48 = getelementptr inbounds i8, i8* %f.host, i32 %47
+  %49 = bitcast i8* %48 to <16 x i32>*
+  %50 = load <16 x i32>, <16 x i32>* %49, align 1
+  %51 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %50)
+  %52 = sub i32 -64, %g.min.0
+  %53 = add i32 %52, %10
+  %54 = getelementptr inbounds i8, i8* %g.host, i32 %53
+  %55 = bitcast i8* %54 to <16 x i32>*
+  %56 = load <16 x i32>, <16 x i32>* %55, align 1
+  %57 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %56)
+  %58 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %51, <32 x i32> %57)
+  %59 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %58)
+  %60 = add nsw i32 %res.extent.0, -64
+  %61 = getelementptr inbounds i8, i8* %res.host, i32 %60
+  %62 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %58)
+  %63 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %62, <16 x i32> %59, i32 4)
+  %64 = bitcast i8* %61 to <16 x i32>*
+  store <16 x i32> %63, <16 x i32>* %64, align 1, !tbaa !10
+  br label %destructor_block
+
+destructor_block:                                 ; preds = %"for res.s0.x.x92.preheader", %"end for res.s0.x.x", %after_bb75, %after_bb75.thread
+  ret i32 0
+}
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1
+
+attributes #0 = { norecurse nounwind }
+attributes #1 = { nounwind readnone }
+
+!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!llvm.module.flags = !{!1, !2, !3}
+
+!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.8.0)"}
+!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0}
+!2 = !{i32 2, !"halide_mcpu", !"hexagonv60"}
+!3 = !{i32 2, !"halide_mattrs", !"+hvx"}
+!4 = !{!"branch_weights", i32 1073741824, i32 0}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"f", !7}
+!7 = !{!"Halide buffer"}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"g", !7}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"res", !7}
diff --git a/test/CodeGen/Hexagon/v60small.ll b/test/CodeGen/Hexagon/v60small.ll
new file mode 100644
index 000000000000..8a6a6155a399
--- /dev/null
+++ b/test/CodeGen/Hexagon/v60small.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60  < %s | FileCheck %s
+
+; CHECK: q{{[0-3]}} = v{{[0-9]*}}and(v{{[0-9]*}},r{{[0-9]*}})
+target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
+target triple = "hexagon"
+
+@K = global i64 0, align 8
+@src = global i8 -1, align 1
+@vecpreds = common global [15 x <16 x i32>] zeroinitializer, align 64
+@Q6VecPredResult = common global <16 x i32> zeroinitializer, align 64
+@vectors = common global [15 x <16 x i32>] zeroinitializer, align 64
+@VectorResult = common global <16 x i32> zeroinitializer, align 64
+@vector_pairs = common global [15 x <32 x i32>] zeroinitializer, align 128
+@VectorPairResult = common global <32 x i32> zeroinitializer, align 128
+@dst_addresses = common global [15 x i8] zeroinitializer, align 8
+@ptr_addresses = common global [15 x i8*] zeroinitializer, align 8
+@src_addresses = common global [15 x i8*] zeroinitializer, align 8
+@dst = common global i8 0, align 1
+@ptr = common global [32768 x i8] zeroinitializer, align 8
+
+; Function Attrs: nounwind
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %0 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %1 = bitcast <16 x i32> %0 to <512 x i1>
+  %2 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %3 = bitcast <16 x i32> %2 to <512 x i1>
+  %4 = call <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1> %1, <512 x i1> %3)
+  %5 = bitcast <512 x i1> %4 to <16 x i32>
+  store volatile <16 x i32> %5, <16 x i32>* @Q6VecPredResult, align 64
+  %6 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 0), align 64
+  %7 = bitcast <16 x i32> %6 to <512 x i1>
+  %8 = load volatile <16 x i32>, <16 x i32>* getelementptr inbounds ([15 x <16 x i32>], [15 x <16 x i32>]* @vecpreds, i32 0, i32 1), align 64
+  %9 = bitcast <16 x i32> %8 to <512 x i1>
+  %10 = call <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1> %7, <512 x i1> %9)
+  %11 = bitcast <512 x i1> %10 to <16 x i32>
+  store volatile <16 x i32> %11, <16 x i32>* @Q6VecPredResult, align 64
+  ret i32 0
+
+}
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and(<512 x i1>, <512 x i1>) #1
+
+; Function Attrs: nounwind readnone
+declare <512 x i1> @llvm.hexagon.V6.pred.and.n(<512 x i1>, <512 x i1>) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv60" "target-features"="+hvx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
index f5ee5d001510..70c4aeb4bac0 100644
--- a/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
+++ b/test/CodeGen/Hexagon/vect/vect-cst-v4i32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 ; This one should generate a combine with two immediates.
 ; CHECK: combine(#7, #7)
 @B = common global [400 x i32] zeroinitializer, align 8
diff --git a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
index 16591ef68536..91b32652400f 100644
--- a/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
+++ b/test/CodeGen/Hexagon/vect/vect-loadv4i16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 
 ; Check that store is post-incremented.
 ; CHECK: memuh(r{{[0-9]+}} + {{ *}}#6{{ *}})
diff --git a/test/CodeGen/Hexagon/vect/vect-shuffle.ll b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
index 9d80df2e0887..bd5b2b981695 100644
--- a/test/CodeGen/Hexagon/vect/vect-shuffle.ll
+++ b/test/CodeGen/Hexagon/vect/vect-shuffle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 
 ; Check that store is post-incremented.
 ; CHECK-NOT: extractu
diff --git a/test/CodeGen/Hexagon/vect/vect-splat.ll b/test/CodeGen/Hexagon/vect/vect-splat.ll
index 3613dbf6fdd1..8cc226a00dab 100644
--- a/test/CodeGen/Hexagon/vect/vect-splat.ll
+++ b/test/CodeGen/Hexagon/vect/vect-splat.ll
@@ -1,6 +1,6 @@
 ; Extracted from test/CodeGen/Generic/vector.ll: used to loop indefinitely.
 ; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
-; CHECK: combine
+; CHECK: splat_i4
 
 %i4 = type <4 x i32>
 
diff --git a/test/CodeGen/Hexagon/vect/vect-xor.ll b/test/CodeGen/Hexagon/vect/vect-xor.ll
index 961185581128..96719e683413 100644
--- a/test/CodeGen/Hexagon/vect/vect-xor.ll
+++ b/test/CodeGen/Hexagon/vect/vect-xor.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -march=hexagon -mcpu=hexagonv5 -disable-hsdr < %s | FileCheck %s
 
 ; Check that the parsing succeeded.
 ; CHECK: r{{[0-9]+:[0-9]+}} = xor(r{{[0-9]+:[0-9]+}}, r{{[0-9]+:[0-9]+}})
diff --git a/test/CodeGen/Inputs/DbgValueOtherTargets.ll b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
index efa1a0849a8e..440073fea153 100644
--- a/test/CodeGen/Inputs/DbgValueOtherTargets.ll
+++ b/test/CodeGen/Inputs/DbgValueOtherTargets.ll
@@ -1,6 +1,6 @@
 ; Check that DEBUG_VALUE comments come through on a variety of targets.
 
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !0 {
 entry:
 ; CHECK: DEBUG_VALUE
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !7, metadata !DIExpression()), !dbg !9
@@ -14,14 +14,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!13}
 
-!0 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !1, type: !3, function: i32 ()* @main)
+!0 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !1, type: !3)
 !1 = !DIFile(filename: "/tmp/x.c", directory: "/Users/manav")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 120996)", isOptimized: false, emissionKind: 0, file: !12, enums: !6, retainedTypes: !6, subprograms: !11)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 120996)", isOptimized: false, emissionKind: 0, file: !12, enums: !6, retainedTypes: !6, subprograms: !11)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !6 = !{}
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !8, file: !1, type: !5)
+!7 = !DILocalVariable(name: "i", line: 3, scope: !8, file: !1, type: !5)
 !8 = distinct !DILexicalBlock(line: 2, column: 12, file: !12, scope: !0)
 !9 = !DILocation(line: 3, column: 11, scope: !8)
 !10 = !DILocation(line: 4, column: 2, scope: !8)
diff --git a/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir b/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir
new file mode 100644
index 000000000000..cf7572ecad37
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/cfi-def-cfa.mir
@@ -0,0 +1,31 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa operands
+# correctly.
+
+--- |
+
+  declare void @foo()
+
+  define void @trivial_fp_func() {
+  entry:
+    call void @foo()
+    ret void
+  }
+
+...
+---
+name:            trivial_fp_func
+body: |
+  bb.0.entry:
+    liveins: %lr, %fp, %lr, %fp
+
+    %sp = frame-setup STPXpre killed %fp, killed %lr, %sp, -2
+    %fp = frame-setup ADDXri %sp, 0, 0
+    ; CHECK: CFI_INSTRUCTION .cfi_def_cfa %w29, 16
+    frame-setup CFI_INSTRUCTION .cfi_def_cfa %w29, 16
+    frame-setup CFI_INSTRUCTION .cfi_offset %w30, -8
+    frame-setup CFI_INSTRUCTION .cfi_offset %w29, -16
+    BL @foo, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+    %sp, %fp, %lr = LDPXpost %sp, 2
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir b/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir
new file mode 100644
index 000000000000..b7bac2682c70
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/expected-target-flag-name.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @var_i32 = global i32 42
+  @var_i64 = global i64 0
+
+  define i32 @sub_small() {
+  entry:
+    %val32 = load i32, i32* @var_i32
+    ret i32 %val32
+  }
+
+...
+---
+name:            sub_small
+body: |
+  bb.0.entry:
+    %x8 = ADRP target-flags(aarch64-page) @var_i32
+  ; CHECK: [[@LINE+1]]:60: expected the name of the target flag
+    %w0 = LDRWui killed %x8, target-flags(aarch64-pageoff, ) @var_i32
+    RET_ReallyLR implicit %w0
+...
diff --git a/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir b/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir
new file mode 100644
index 000000000000..d4145b8961df
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/invalid-target-flag-name.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @var_i32 = global i32 42
+  @var_i64 = global i64 0
+
+  define i32 @sub_small() {
+  entry:
+    %val32 = load i32, i32* @var_i32
+    ret i32 %val32
+  }
+
+...
+---
+name:            sub_small
+body: |
+  bb.0.entry:
+    %x8 = ADRP target-flags(aarch64-page) @var_i32
+  ; CHECK: [[@LINE+1]]:60: use of undefined target flag 'ncc'
+    %w0 = LDRWui killed %x8, target-flags(aarch64-pageoff, ncc) @var_i32
+    RET_ReallyLR implicit %w0
+...
diff --git a/test/CodeGen/MIR/AArch64/lit.local.cfg b/test/CodeGen/MIR/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..f4f77c5aa312
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/lit.local.cfg
@@ -0,0 +1,8 @@
+import re
+
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
+# For now we don't test arm64-win32.
+if re.search(r'cygwin|mingw32|win32|windows-gnu|windows-msvc', config.target_triple):
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir b/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
new file mode 100644
index 000000000000..e23a352dff21
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/multiple-lhs-operands.mir
@@ -0,0 +1,28 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser can parse multiple register machine
+# operands before '='.
+
+--- |
+
+  declare void @foo()
+
+  define void @trivial_fp_func() {
+  entry:
+    call void @foo()
+    ret void
+  }
+
+...
+---
+name:            trivial_fp_func
+body: |
+  bb.0.entry:
+    liveins: %lr, %fp, %lr, %fp
+
+    %sp = frame-setup STPXpre killed %fp, killed %lr, %sp, -2
+    %fp = frame-setup ADDXri %sp, 0, 0
+    BL @foo, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit-def %sp
+  ; CHECK: %sp, %fp, %lr = LDPXpost %sp, 2
+    %sp, %fp, %lr = LDPXpost %sp, 2
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
new file mode 100644
index 000000000000..9471516db647
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/stack-object-local-offset.mir
@@ -0,0 +1,41 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+
+--- |
+  @var = global i64 0
+  @local_addr = global i64* null
+
+  define void @stack_local() {
+  entry:
+    %local_var = alloca i64
+    %val = load i64, i64* @var
+    store i64 %val, i64* %local_var
+    store i64* %local_var, i64** @local_addr
+    ret void
+  }
+...
+---
+name:            stack_local
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr64common }
+  - { id: 1, class: gpr64 }
+  - { id: 2, class: gpr64common }
+  - { id: 3, class: gpr64common }
+frameInfo:
+  maxAlignment:    8
+# CHECK-LABEL: stack_local
+# CHECK: stack:
+# CHECK_NEXT: { id:0, name:local_var, offset:0, size:8, alignment:8, local-offset: -8 }
+stack:
+  - { id: 0,name: local_var,offset: 0,size: 8,alignment: 8, local-offset: -8 }
+body: |
+  bb.0.entry:
+    %0 = ADRP @var
+    %1 = LDRXui killed %0, @var :: (load 8 from @var)
+    STRXui killed %1, %stack.0.local_var, 0 :: (store 8 into %ir.local_var)
+    %2 = ADRP @local_addr
+    %3 = ADDXri %stack.0.local_var, 0, 0
+    STRXui killed %3, killed %2, @local_addr :: (store 8 into @local_addr)
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AArch64/target-flags.mir b/test/CodeGen/MIR/AArch64/target-flags.mir
new file mode 100644
index 000000000000..e96fce7c2f2b
--- /dev/null
+++ b/test/CodeGen/MIR/AArch64/target-flags.mir
@@ -0,0 +1,39 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+
+  @var_i32 = global i32 42
+  @var_i64 = global i64 0
+
+  define void @sub_small() {
+  entry:
+    %val32 = load i32, i32* @var_i32
+    %newval32 = sub i32 %val32, 4095
+    store i32 %newval32, i32* @var_i32
+    %val64 = load i64, i64* @var_i64
+    %newval64 = sub i64 %val64, 52
+    store i64 %newval64, i64* @var_i64
+    ret void
+  }
+
+...
+---
+name:            sub_small
+body: |
+  bb.0.entry:
+  ; CHECK:      %x8 = ADRP target-flags(aarch64-page) @var_i32
+  ; CHECK-NEXT: %x9 = ADRP target-flags(aarch64-page) @var_i64
+  ; CHECK-NEXT: %w10 = LDRWui %x8, target-flags(aarch64-pageoff, aarch64-nc) @var_i32
+  ; CHECK-NEXT: %x11 = LDRXui %x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @var_i64
+  ; CHECK:      STRWui killed %w10, killed %x8, target-flags(aarch64-nc) @var_i32
+  ; CHECK:      STRXui killed %x11, killed %x9, target-flags(aarch64-pageoff, aarch64-nc) @var_i64
+    %x8 = ADRP target-flags(aarch64-page) @var_i32
+    %x9 = ADRP target-flags(aarch64-page) @var_i64
+    %w10 = LDRWui %x8, target-flags(aarch64-pageoff, aarch64-nc) @var_i32
+    %x11 = LDRXui %x9, target-flags(aarch64-pageoff, aarch64-got, aarch64-nc) @var_i64
+    %w10 = SUBWri killed %w10, 4095, 0
+    %x11 = SUBXri killed %x11, 52, 0
+    STRWui killed %w10, killed %x8, target-flags(aarch64-nc) @var_i32
+    STRXui killed %x11, killed %x9, target-flags(aarch64-pageoff, aarch64-nc) @var_i64
+    RET_ReallyLR
+...
diff --git a/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir b/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir
new file mode 100644
index 000000000000..34793880a60b
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/expected-target-index-name.mir
@@ -0,0 +1,64 @@
+# RUN: not llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  %struct.foo = type { float, [5 x i32] }
+
+  @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+  define void @float(float addrspace(1)* %out, i32 %index) #0 {
+  entry:
+    %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+    %1 = load float, float addrspace(2)* %0
+    store float %1, float addrspace(1)* %out
+    ret void
+  }
+
+  declare { i1, i64 } @llvm.SI.if(i1)
+
+  declare { i1, i64 } @llvm.SI.else(i64)
+
+  declare i64 @llvm.SI.break(i64)
+
+  declare i64 @llvm.SI.if.break(i1, i64)
+
+  declare i64 @llvm.SI.else.break(i64, i64)
+
+  declare i1 @llvm.SI.loop(i64)
+
+  declare void @llvm.SI.end.cf(i64)
+
+  attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name:            float
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  maxAlignment:  8
+body: |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr2_sgpr3 = S_GETPC_B64
+  ; CHECK: [[@LINE+1]]:45: expected the name of the target index
+    %sgpr2 = S_ADD_U32 %sgpr2, target-index(0), implicit-def %scc, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+    %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+    %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+    %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+    %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+    %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+    %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir b/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir
new file mode 100644
index 000000000000..e20cf376414a
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/invalid-target-index-operand.mir
@@ -0,0 +1,64 @@
+# RUN: not llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  %struct.foo = type { float, [5 x i32] }
+
+  @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+  define void @float(float addrspace(1)* %out, i32 %index) #0 {
+  entry:
+    %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+    %1 = load float, float addrspace(2)* %0
+    store float %1, float addrspace(1)* %out
+    ret void
+  }
+
+  declare { i1, i64 } @llvm.SI.if(i1)
+
+  declare { i1, i64 } @llvm.SI.else(i64)
+
+  declare i64 @llvm.SI.break(i64)
+
+  declare i64 @llvm.SI.if.break(i1, i64)
+
+  declare i64 @llvm.SI.else.break(i64, i64)
+
+  declare i1 @llvm.SI.loop(i64)
+
+  declare void @llvm.SI.end.cf(i64)
+
+  attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name:            float
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  maxAlignment:  8
+body: |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr2_sgpr3 = S_GETPC_B64
+  ; CHECK: [[@LINE+1]]:45: use of undefined target index 'constdata-start'
+    %sgpr2 = S_ADD_U32 %sgpr2, target-index(constdata-start), implicit-def %scc, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+    %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+    %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+    %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+    %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+    %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+    %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/AMDGPU/lit.local.cfg b/test/CodeGen/MIR/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/AMDGPU/target-index-operands.mir b/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
new file mode 100644
index 000000000000..839fd3212c61
--- /dev/null
+++ b/test/CodeGen/MIR/AMDGPU/target-index-operands.mir
@@ -0,0 +1,104 @@
+# RUN: llc -march=amdgcn -mcpu=SI -start-after postrapseudos -stop-after postrapseudos -o /dev/null %s | FileCheck %s
+# This test verifies that the MIR parser can parse target index operands.
+
+--- |
+
+  %struct.foo = type { float, [5 x i32] }
+
+  @float_gv = internal unnamed_addr addrspace(2) constant [5 x float] [float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00], align 4
+
+  define void @float(float addrspace(1)* %out, i32 %index) #0 {
+  entry:
+    %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+    %1 = load float, float addrspace(2)* %0
+    store float %1, float addrspace(1)* %out
+    ret void
+  }
+
+  define void @float2(float addrspace(1)* %out, i32 %index) #0 {
+  entry:
+    %0 = getelementptr inbounds [5 x float], [5 x float] addrspace(2)* @float_gv, i32 0, i32 %index
+    %1 = load float, float addrspace(2)* %0
+    store float %1, float addrspace(1)* %out
+    ret void
+  }
+
+  declare { i1, i64 } @llvm.SI.if(i1)
+
+  declare { i1, i64 } @llvm.SI.else(i64)
+
+  declare i64 @llvm.SI.break(i64)
+
+  declare i64 @llvm.SI.if.break(i1, i64)
+
+  declare i64 @llvm.SI.else.break(i64, i64)
+
+  declare i1 @llvm.SI.loop(i64)
+
+  declare void @llvm.SI.end.cf(i64)
+
+  attributes #0 = { "target-cpu"="SI" }
+
+...
+---
+name:            float
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  maxAlignment:  8
+body: |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr2_sgpr3 = S_GETPC_B64
+  ; CHECK: %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start), implicit-def %scc, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+    %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+    %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+    %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+    %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+    %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+    %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+...
+---
+name:            float2
+tracksSubRegLiveness: true
+liveins:
+  - { reg: '%sgpr0_sgpr1' }
+frameInfo:
+  maxAlignment:  8
+body: |
+  bb.0.entry:
+    liveins: %sgpr0_sgpr1
+
+    %sgpr2_sgpr3 = S_GETPC_B64
+  ; CHECK: %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, target-index(amdgpu-constdata-start) + 1, implicit-def %scc, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr3, 0, implicit-def %scc, implicit %scc, implicit-def %scc, implicit %scc
+    %sgpr4_sgpr5 = S_LSHR_B64 %sgpr2_sgpr3, 32, implicit-def dead %scc
+    %sgpr6 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 11
+    %sgpr7 = S_ASHR_I32 %sgpr6, 31, implicit-def dead %scc
+    %sgpr6_sgpr7 = S_LSHL_B64 %sgpr6_sgpr7, 2, implicit-def dead %scc
+    %sgpr2 = S_ADD_U32 %sgpr2, @float_gv, implicit-def %scc
+    %sgpr3 = S_ADDC_U32 %sgpr4, 0, implicit-def dead %scc, implicit %scc
+    %sgpr4 = S_ADD_U32 %sgpr2, %sgpr6, implicit-def %scc
+    %sgpr5 = S_ADDC_U32 %sgpr3, %sgpr7, implicit-def dead %scc, implicit %scc
+    %sgpr2 = S_LOAD_DWORD_IMM %sgpr4_sgpr5, 0
+    %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
+    %sgpr7 = S_MOV_B32 61440
+    %sgpr6 = S_MOV_B32 -1
+    %vgpr0 = V_MOV_B32_e32 killed %sgpr2, implicit %exec
+    BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
+    S_ENDPGM
+...
diff --git a/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir b/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir
new file mode 100644
index 000000000000..e351713dc290
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/ARMLoadStoreDBG.mir
@@ -0,0 +1,165 @@
+# RUN: llc -start-after machine-cp -stop-after=if-converter -mtriple=thumbv7 %s -o /dev/null 2>&1 | FileCheck %s
+--- |
+  ; ModuleID = '/Volumes/Data/llvm/test/CodeGen/ARM/sched-it-debug-nodes.ll'
+  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv7"
+  
+  %struct.s = type opaque
+  
+  ; Function Attrs: nounwind
+  define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 !dbg !4 {
+  entry:
+    tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !18, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !19, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !20, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !21, metadata !27), !dbg !28
+    %cmp = icmp ult i32 %n, 4, !dbg !29
+    br i1 %cmp, label %return, label %if.end, !dbg !31
+  
+  if.end:                                           ; preds = %entry
+    tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !32
+    br label %return, !dbg !33
+  
+  return:                                           ; preds = %if.end, %entry
+    %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+    ret i32 %retval.0, !dbg !34
+  }
+  
+  declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
+  
+  ; Function Attrs: nounwind readnone
+  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+  
+  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { nounwind readnone }
+  attributes #3 = { nounwind }
+  
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!22, !23, !24, !25}
+  !llvm.ident = !{!26}
+  
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0  (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !17)
+  !5 = !DISubroutineType(types: !6)
+  !6 = !{!7, !8, !11, !12, !16}
+  !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+  !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !1, line: 5, baseType: !10)
+  !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 5, flags: DIFlagFwdDecl)
+  !11 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+  !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 32, align: 32)
+  !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+  !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !1, line: 2, baseType: !15)
+  !15 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+  !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !1, line: 3, baseType: !11)
+  !17 = !{!18, !19, !20, !21}
+  !18 = !DILocalVariable(name: "s", arg: 1, scope: !4, file: !1, line: 9, type: !8)
+  !19 = !DILocalVariable(name: "u", arg: 2, scope: !4, file: !1, line: 9, type: !11)
+  !20 = !DILocalVariable(name: "b", arg: 3, scope: !4, file: !1, line: 9, type: !12)
+  !21 = !DILocalVariable(name: "n", arg: 4, scope: !4, file: !1, line: 9, type: !16)
+  !22 = !{i32 2, !"Dwarf Version", i32 4}
+  !23 = !{i32 2, !"Debug Info Version", i32 3}
+  !24 = !{i32 1, !"wchar_size", i32 4}
+  !25 = !{i32 1, !"min_enum_size", i32 4}
+  !26 = !{!"clang version 3.7.0  (llvm/trunk 237059)"}
+  !27 = !DIExpression()
+  !28 = !DILocation(line: 9, scope: !4)
+  !29 = !DILocation(line: 10, scope: !30)
+  !30 = distinct !DILexicalBlock(scope: !4, file: !1, line: 10)
+  !31 = !DILocation(line: 10, scope: !4)
+  !32 = !DILocation(line: 13, scope: !4)
+  !33 = !DILocation(line: 14, scope: !4)
+  !34 = !DILocation(line: 15, scope: !4)
+
+...
+---
+name:            f
+alignment:       1
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:         
+  - { reg: '%r0' }
+  - { reg: '%r2' }
+  - { reg: '%r3' }
+calleeSavedRegisters: [ '%lr', '%d8', '%d9', '%d10', '%d11', '%d12', '%d13', 
+                        '%d14', '%d15', '%q4', '%q5', '%q6', '%q7', '%r4', 
+                        '%r5', '%r6', '%r7', '%r8', '%r9', '%r10', '%r11', 
+                        '%s16', '%s17', '%s18', '%s19', '%s20', '%s21', 
+                        '%s22', '%s23', '%s24', '%s25', '%s26', '%s27', 
+                        '%s28', '%s29', '%s30', '%s31', '%d8_d10', '%d9_d11', 
+                        '%d10_d12', '%d11_d13', '%d12_d14', '%d13_d15', 
+                        '%q4_q5', '%q5_q6', '%q6_q7', '%q4_q5_q6_q7', '%r4_r5', 
+                        '%r6_r7', '%r8_r9', '%r10_r11', '%d8_d9_d10', '%d9_d10_d11', 
+                        '%d10_d11_d12', '%d11_d12_d13', '%d12_d13_d14', 
+                        '%d13_d14_d15', '%d8_d10_d12', '%d9_d11_d13', '%d10_d12_d14', 
+                        '%d11_d13_d15', '%d8_d10_d12_d14', '%d9_d11_d13_d15', 
+                        '%d9_d10', '%d11_d12', '%d13_d14', '%d9_d10_d11_d12', 
+                        '%d11_d12_d13_d14' ]
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       '%bb.2.if.end'
+  restorePoint:    '%bb.2.if.end'
+stack:           
+  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' }
+  - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' }
+body:             |
+  bb.0.entry:
+    successors: %bb.1, %bb.2.if.end
+    liveins: %r0, %r2, %r3, %lr, %r7
+  
+    DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+    DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+    t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31
+    t2Bcc %bb.2.if.end, 2, killed %cpsr
+  
+  bb.1:
+    liveins: %lr, %r7
+  
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    %r0 = t2MOVi -1, 14, _, _
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    tBX_RET 14, _, implicit %r0, debug-location !34
+  
+  bb.2.if.end:
+    liveins: %r0, %r2, %r3, %r7, %lr
+  
+    %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr
+    frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+    frame-setup CFI_INSTRUCTION .cfi_offset %r7, -8
+    DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+    DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+    %r1 = COPY killed %r2, debug-location !32
+    %r2 = COPY killed %r3, debug-location !32
+    tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32
+    %r0 = t2MOVi 0, 14, _, _
+    %sp = t2LDMIA_UPD %sp, 14, _, def %r7, def %lr
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    tBX_RET 14, _, implicit %r0, debug-location !34
+# Verify that the DBG_VALUE is ignored.
+# CHECK: %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0
+
+...
diff --git a/test/CodeGen/MIR/ARM/bundled-instructions.mir b/test/CodeGen/MIR/ARM/bundled-instructions.mir
new file mode 100644
index 000000000000..814c4e188ea5
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/bundled-instructions.mir
@@ -0,0 +1,75 @@
+# RUN: llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the bundled machine instructions
+# and 'internal' register flags correctly.
+
+--- |
+
+  define i32 @test1(i32 %a) {
+  entry:
+    %cmp = icmp sgt i32 %a, -78
+    %. = zext i1 %cmp to i32
+    ret i32 %.
+  }
+
+  define i32 @test2(i32 %a) {
+  entry:
+    %cmp = icmp sgt i32 %a, -78
+    %. = zext i1 %cmp to i32
+    ret i32 %.
+  }
+
+...
+---
+name:            test1
+tracksRegLiveness: true
+liveins:
+  - { reg: '%r0' }
+body: |
+  bb.0.entry:
+    liveins: %r0
+    ; CHECK-LABEL: name: test1
+    ; CHECK:      %r1 = t2MOVi 0, 14, _, _
+    ; CHECK-NEXT: t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+    ; CHECK-NEXT: BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+    ; CHECK-NEXT:   t2IT 12, 8, implicit-def %itstate
+    ; CHECK-NEXT:   %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: %r0 = tMOVr killed %r1, 14, _
+    ; CHECK-NEXT: tBX_RET 14, _, implicit killed %r0
+    %r1 = t2MOVi 0, 14, _, _
+    t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+    BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+      t2IT 12, 8, implicit-def %itstate
+      %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+    }
+    %r0 = tMOVr killed %r1, 14, _
+    tBX_RET 14, _, implicit killed %r0
+...
+---
+name:            test2
+tracksRegLiveness: true
+liveins:
+  - { reg: '%r0' }
+body: |
+  bb.0.entry:
+    liveins: %r0
+
+    ; Verify that the next machine instruction can be on the same line as
+    ; '{' or '}'.
+
+    ; CHECK-LABEL: name: test2
+    ; CHECK:      %r1 = t2MOVi 0, 14, _, _
+    ; CHECK-NEXT: t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+    ; CHECK-NEXT: BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+    ; CHECK-NEXT:   t2IT 12, 8, implicit-def %itstate
+    ; CHECK-NEXT:   %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit internal killed %itstate
+    ; CHECK-NEXT: }
+    ; CHECK-NEXT: %r0 = tMOVr killed %r1, 14, _
+    ; CHECK-NEXT: tBX_RET 14, _, implicit killed %r0
+    %r1 = t2MOVi 0, 14, _, _
+    t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+    BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr { t2IT 12, 8, implicit-def %itstate
+      %r1 = t2MOVi 1, 12, killed %cpsr, _, internal implicit killed %itstate
+    } %r0 = tMOVr killed %r1, 14, _
+    tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/cfi-same-value.mir b/test/CodeGen/MIR/ARM/cfi-same-value.mir
new file mode 100644
index 000000000000..f9850abe0463
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/cfi-same-value.mir
@@ -0,0 +1,80 @@
+# RUN: llc -mtriple=arm-linux-unknown-gnueabi -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+  declare void @dummy_use(i32*, i32)
+
+  define void @test_basic() #0 {
+  entry:
+    %mem = alloca i32, i32 10
+    call void @dummy_use(i32* %mem, i32 10)
+    ret void
+  }
+
+  attributes #0 = { "split-stack" }
+...
+---
+name:            test_basic
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       48
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+stack:
+  - { id: 0, name: mem, offset: -48, size: 40, alignment: 4 }
+  - { id: 1, type: spill-slot, offset: -4, size: 4, alignment: 4,
+      callee-saved-register: '%lr' }
+  - { id: 2, type: spill-slot, offset: -8, size: 4, alignment: 4,
+      callee-saved-register: '%r11' }
+body: |
+  bb.0:
+    successors: %bb.2, %bb.1
+    liveins: %r11, %lr
+
+    %sp = STMDB_UPD %sp, 14, _, %r4, %r5
+    CFI_INSTRUCTION .cfi_def_cfa_offset 8
+    CFI_INSTRUCTION .cfi_offset %r5, -4
+    CFI_INSTRUCTION .cfi_offset %r4, -8
+    %r5 = MOVr %sp, 14, _, _
+    %r4 = MRC 15, 0, 13, 0, 3, 14, _
+    %r4 = LDRi12 %r4, 4, 14, _
+    CMPrr %r4, %r5, 14, _, implicit-def %cpsr
+    Bcc %bb.2, 3, %cpsr
+
+  bb.1:
+    successors: %bb.2
+    liveins: %r11, %lr
+
+    %r4 = MOVi 48, 14, _, _
+    %r5 = MOVi 0, 14, _, _
+    %sp = STMDB_UPD %sp, 14, _, %lr
+    CFI_INSTRUCTION .cfi_def_cfa_offset 12
+    CFI_INSTRUCTION .cfi_offset %lr, -12
+    BL $__morestack, implicit-def %lr, implicit %sp
+    %sp = LDMIA_UPD %sp, 14, _, %lr
+    %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+    CFI_INSTRUCTION .cfi_def_cfa_offset 0
+    BX_RET 14, _
+
+  bb.2:
+    liveins: %r11, %lr
+
+    %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+    CFI_INSTRUCTION .cfi_def_cfa_offset 0
+  ; CHECK:      CFI_INSTRUCTION .cfi_same_value %r4
+  ; CHECK-NEXT: CFI_INSTRUCTION .cfi_same_value %r5
+    CFI_INSTRUCTION .cfi_same_value %r4
+    CFI_INSTRUCTION .cfi_same_value %r5
+    %sp = frame-setup STMDB_UPD %sp, 14, _, killed %r11, killed %lr
+    frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+    frame-setup CFI_INSTRUCTION .cfi_offset %r11, -8
+    %sp = frame-setup SUBri killed %sp, 40, 14, _, _
+    frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 48
+    %r0 = MOVr %sp, 14, _, _
+    %r1 = MOVi 10, 14, _, _
+    BL @dummy_use, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit killed %r1, implicit-def %sp
+    %sp = ADDri killed %sp, 40, 14, _, _
+    %sp = LDMIA_UPD %sp, 14, _, %r4, %r5
+    MOVPCLR 14, _
+...
diff --git a/test/CodeGen/MIR/ARM/expected-closing-brace.mir b/test/CodeGen/MIR/ARM/expected-closing-brace.mir
new file mode 100644
index 000000000000..78d91aead247
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/expected-closing-brace.mir
@@ -0,0 +1,50 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+  @G = external global i32
+
+  define i32 @test1(i32 %a) {
+  entry:
+    br label %foo
+
+  foo:
+    %cmp = icmp sgt i32 %a, -78
+    %. = zext i1 %cmp to i32
+    br i1 %cmp, label %if.then, label %if.else
+
+  if.then:
+    ret i32 %.
+
+  if.else:
+    %b = load i32, i32* @G
+    %c = add i32 %b, 1
+    br label %foo
+  }
+...
+---
+name:            test1
+tracksRegLiveness: true
+liveins:
+  - { reg: '%r0' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.foo
+    liveins: %r0
+  bb.1.foo:
+    successors: %bb.2.if.then, %bb.1.foo
+    liveins: %r0
+
+    t2CMNri %r0, 78, 14, _, implicit-def %cpsr
+    %r1 = t2MOVi 0, 14, _, _
+    BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+      t2IT 12, 8, implicit-def %itstate
+      %r1 = t2MOVi 1, 12, killed %cpsr, _, implicit killed %itstate
+    t2CMNri %r0, 77, 14, _, implicit-def %cpsr
+    t2Bcc %bb.1.foo, 11, killed %cpsr
+  ; CHECK: [[@LINE+1]]:3: expected '}'
+  bb.2.if.then:
+    liveins: %r1
+
+    %r0 = tMOVr killed %r1, 14, _
+    tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir b/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir
new file mode 100644
index 000000000000..a069dd307936
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/extraneous-closing-brace-error.mir
@@ -0,0 +1,20 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+  define i32 @test1(i32 %a) {
+  entry:
+    ret i32 %a
+  }
+...
+---
+name:            test1
+tracksRegLiveness: true
+liveins:
+  - { reg: '%r0' }
+body: |
+  bb.0.entry:
+    liveins: %r0
+    tBX_RET 14, _, implicit killed %r0
+  ; CHECK: [[@LINE+1]]:5: extraneous closing brace ('}')
+    }
+...
diff --git a/test/CodeGen/MIR/ARM/lit.local.cfg b/test/CodeGen/MIR/ARM/lit.local.cfg
new file mode 100644
index 000000000000..236e1d344166
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir b/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir
new file mode 100644
index 000000000000..b93697857e79
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/nested-instruction-bundle-error.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -mtriple thumbv7-apple-ios -start-after block-placement -stop-after block-placement -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+  define i32 @test1(i32 %a) {
+  entry:
+    %cmp = icmp sgt i32 %a, -78
+    %. = zext i1 %cmp to i32
+    ret i32 %.
+  }
+...
+---
+name:            test1
+tracksRegLiveness: true
+liveins:
+  - { reg: '%r0' }
+body: |
+  bb.0.entry:
+    liveins: %r0
+    %r1 = t2MOVi 0, 14, _, _
+    t2CMNri killed %r0, 78, 14, _, implicit-def %cpsr
+    BUNDLE implicit-def dead %itstate, implicit-def %r1, implicit killed %cpsr {
+      t2IT 12, 8, implicit-def %itstate
+      %r1 = t2MOVi 1, 12, killed %cpsr, _
+  ; CHECK: [[@LINE+1]]:14: nested instruction bundles are not allowed
+      BUNDLE {
+      }
+    }
+    %r0 = tMOVr killed %r1, 14, _
+    tBX_RET 14, _, implicit killed %r0
+...
diff --git a/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir b/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir
new file mode 100644
index 000000000000..5b5750b8d1e8
--- /dev/null
+++ b/test/CodeGen/MIR/ARM/sched-it-debug-nodes.mir
@@ -0,0 +1,160 @@
+# RUN: llc -mtriple thumbv7 -start-after if-converter -print-before=post-RA-sched -print-after=post-RA-sched %s -o /dev/null 2>&1 | FileCheck %s
+--- |
+  ; ModuleID = '/Volumes/Data/llvm/test/CodeGen/ARM/sched-it-debug-nodes.ll'
+  target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "thumbv7"
+  
+  %struct.s = type opaque
+  
+  ; Function Attrs: nounwind
+  define arm_aapcscc i32 @f(%struct.s* %s, i32 %u, i8* %b, i32 %n) #0 !dbg !4 {
+  entry:
+    tail call void @llvm.dbg.value(metadata %struct.s* %s, i64 0, metadata !18, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i32 %u, i64 0, metadata !19, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i8* %b, i64 0, metadata !20, metadata !27), !dbg !28
+    tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !21, metadata !27), !dbg !28
+    %cmp = icmp ult i32 %n, 4, !dbg !29
+    br i1 %cmp, label %return, label %if.end, !dbg !31
+  
+  if.end:                                           ; preds = %entry
+    tail call arm_aapcscc void @g(%struct.s* %s, i8* %b, i32 %n) #3, !dbg !32
+    br label %return, !dbg !33
+  
+  return:                                           ; preds = %if.end, %entry
+    %retval.0 = phi i32 [ 0, %if.end ], [ -1, %entry ]
+    ret i32 %retval.0, !dbg !34
+  }
+
+  ; NOTE: This is checking that the register in the DEBUG_VALUE node is not
+  ; accidentally being marked as KILL.  The DBG_VALUE node gets introduced in
+  ; If-Conversion, and gets bundled into the IT block.  The Post RA Scheduler
+  ; attempts to schedule the Machine Instr, and tries to tag the register in the
+  ; debug value as KILL'ed, resulting in a DEBUG_VALUE node changing codegen!  (or
+  ; hopefully, triggering an assert).
+   
+  ; CHECK: BUNDLE %ITSTATE<imp-def,dead>
+  ; CHECK:  * DBG_VALUE %R1, %noreg, !"u"
+  ; CHECK-NOT:  * DBG_VALUE %R1<kill>, %noreg, !"u"
+ 
+  declare arm_aapcscc void @g(%struct.s*, i8*, i32) #1
+  
+  ; Function Attrs: nounwind readnone
+  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+  
+  attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+  attributes #2 = { nounwind readnone }
+  attributes #3 = { nounwind }
+  
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!22, !23, !24, !25}
+  !llvm.ident = !{!26}
+  
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0  (llvm/trunk 237059)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "<stdin>", directory: "/Users/compnerd/Source/llvm")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 9, type: !5, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !17)
+  !5 = !DISubroutineType(types: !6)
+  !6 = !{!7, !8, !11, !12, !16}
+  !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+  !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "s", file: !1, line: 5, baseType: !10)
+  !10 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", file: !1, line: 5, flags: DIFlagFwdDecl)
+  !11 = !DIBasicType(name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
+  !12 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 32, align: 32)
+  !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+  !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !1, line: 2, baseType: !15)
+  !15 = !DIBasicType(name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
+  !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !1, line: 3, baseType: !11)
+  !17 = !{!18, !19, !20, !21}
+  !18 = !DILocalVariable(name: "s", arg: 1, scope: !4, file: !1, line: 9, type: !8)
+  !19 = !DILocalVariable(name: "u", arg: 2, scope: !4, file: !1, line: 9, type: !11)
+  !20 = !DILocalVariable(name: "b", arg: 3, scope: !4, file: !1, line: 9, type: !12)
+  !21 = !DILocalVariable(name: "n", arg: 4, scope: !4, file: !1, line: 9, type: !16)
+  !22 = !{i32 2, !"Dwarf Version", i32 4}
+  !23 = !{i32 2, !"Debug Info Version", i32 3}
+  !24 = !{i32 1, !"wchar_size", i32 4}
+  !25 = !{i32 1, !"min_enum_size", i32 4}
+  !26 = !{!"clang version 3.7.0  (llvm/trunk 237059)"}
+  !27 = !DIExpression()
+  !28 = !DILocation(line: 9, scope: !4)
+  !29 = !DILocation(line: 10, scope: !30)
+  !30 = distinct !DILexicalBlock(scope: !4, file: !1, line: 10)
+  !31 = !DILocation(line: 10, scope: !4)
+  !32 = !DILocation(line: 13, scope: !4)
+  !33 = !DILocation(line: 14, scope: !4)
+  !34 = !DILocation(line: 15, scope: !4)
+
+...
+---
+name:            f
+alignment:       1
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:         
+  - { reg: '%r0' }
+  - { reg: '%r2' }
+  - { reg: '%r3' }
+calleeSavedRegisters: [ '%lr', '%d8', '%d9', '%d10', '%d11', '%d12', '%d13', 
+                        '%d14', '%d15', '%q4', '%q5', '%q6', '%q7', '%r4', 
+                        '%r5', '%r6', '%r7', '%r8', '%r9', '%r10', '%r11', 
+                        '%s16', '%s17', '%s18', '%s19', '%s20', '%s21', 
+                        '%s22', '%s23', '%s24', '%s25', '%s26', '%s27', 
+                        '%s28', '%s29', '%s30', '%s31', '%d8_d10', '%d9_d11', 
+                        '%d10_d12', '%d11_d13', '%d12_d14', '%d13_d15', 
+                        '%q4_q5', '%q5_q6', '%q6_q7', '%q4_q5_q6_q7', '%r4_r5', 
+                        '%r6_r7', '%r8_r9', '%r10_r11', '%d8_d9_d10', '%d9_d10_d11', 
+                        '%d10_d11_d12', '%d11_d12_d13', '%d12_d13_d14', 
+                        '%d13_d14_d15', '%d8_d10_d12', '%d9_d11_d13', '%d10_d12_d14', 
+                        '%d11_d13_d15', '%d8_d10_d12_d14', '%d9_d11_d13_d15', 
+                        '%d9_d10', '%d11_d12', '%d13_d14', '%d9_d10_d11_d12', 
+                        '%d11_d12_d13_d14' ]
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+stack:           
+  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, callee-saved-register: '%lr' }
+  - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, callee-saved-register: '%r7' }
+body:             |
+  bb.0.entry:
+    liveins: %r0, %r2, %r3, %lr, %r7
+  
+    DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+    DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+    t2CMPri %r3, 4, 14, _, implicit-def %cpsr, debug-location !31
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    %r0 = t2MOVi -1, 3, %cpsr, _, implicit undef %r0
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    tBX_RET 3, %cpsr, implicit %r0, debug-location !34
+    %sp = frame-setup t2STMDB_UPD %sp, 14, _, killed %r7, killed %lr
+    frame-setup CFI_INSTRUCTION .cfi_def_cfa_offset 8
+    frame-setup CFI_INSTRUCTION .cfi_offset %lr, -4
+    frame-setup CFI_INSTRUCTION .cfi_offset %r7, -8
+    DBG_VALUE debug-use %r0, debug-use _, !18, !27, debug-location !28
+    DBG_VALUE debug-use %r1, debug-use _, !19, !27, debug-location !28
+    DBG_VALUE debug-use %r2, debug-use _, !20, !27, debug-location !28
+    DBG_VALUE debug-use %r3, debug-use _, !21, !27, debug-location !28
+    %r1 = tMOVr killed %r2, 14, _, debug-location !32
+    %r2 = tMOVr killed %r3, 14, _, debug-location !32
+    tBL 14, _, @g, csr_aapcs, implicit-def dead %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit-def %sp, debug-location !32
+    %r0 = t2MOVi 0, 14, _, _
+    %sp = t2LDMIA_RET %sp, 14, _, def %r7, def %pc, implicit %r0
+
+...
diff --git a/test/CodeGen/MIR/Generic/basic-blocks.mir b/test/CodeGen/MIR/Generic/basic-blocks.mir
new file mode 100644
index 000000000000..22f8d28290db
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/basic-blocks.mir
@@ -0,0 +1,49 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine functions correctly.
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+  define i32 @bar() {
+  start:
+    ret i32 0
+  }
+
+  define i32 @test() {
+  start:
+    ret i32 0
+  }
+
+...
+---
+# CHECK-LABEL: name: foo
+# CHECK: body:
+# CHECK-NEXT: bb.0.entry:
+name:            foo
+body: |
+  bb.0.entry:
+...
+---
+# CHECK-LABEL: name: bar
+# CHECK: body:
+# CHECK-NEXT: bb.0.start (align 4):
+# CHECK:      bb.1 (address-taken):
+name:            bar
+body: |
+  bb.0.start (align 4):
+  bb.1 (address-taken):
+...
+---
+# CHECK-LABEL: name: test
+# CHECK: body:
+# CHECK-NEXT: bb.0.start (address-taken, align 4):
+# CHECK:      bb.1 (address-taken, align 4):
+name:            test
+body: |
+  bb.0.start (align 4, address-taken):
+  bb.1 (address-taken, align 4):
+...
diff --git a/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir b/test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir
similarity index 67%
rename from test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
rename to test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir
index deac3b0b69bf..892258666d10 100644
--- a/test/CodeGen/MIR/machine-basic-block-redefinition-error.mir
+++ b/test/CodeGen/MIR/Generic/expected-colon-after-basic-block.mir
@@ -10,8 +10,7 @@
 ...
 ---
 name:            foo
-body:
-  # CHECK: redefinition of machine basic block with id #0
-  - id:       0
-  - id:       0
+body: |
+  ; CHECK: [[@LINE+1]]:13: expected ':'
+  bb.0.entry
 ...
diff --git a/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir b/test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir
similarity index 56%
rename from test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
rename to test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir
index ce9192901d7d..a5e04f86c6d1 100644
--- a/test/CodeGen/MIR/expected-mbb-reference-for-successor-mbb.mir
+++ b/test/CodeGen/MIR/Generic/expected-mbb-reference-for-successor-mbb.mir
@@ -17,13 +17,12 @@
 ...
 ---
 name:            foo
-body:
-  - id:          0
-    name:        entry
-    # CHECK: [[@LINE+1]]:35: expected a machine basic block reference
-    successors:  [ '%bb.1.less', '2' ]
-  - id:          1
-    name:        less
-  - id:          2
-    name:        exit
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:29: expected a machine basic block reference
+    successors: %bb.1.less, 2
+
+  bb.1.less:
+
+  bb.2.exit:
 ...
diff --git a/test/CodeGen/MIR/frame-info.mir b/test/CodeGen/MIR/Generic/frame-info.mir
similarity index 95%
rename from test/CodeGen/MIR/frame-info.mir
rename to test/CodeGen/MIR/Generic/frame-info.mir
index c5468f94f33a..6e4e3955cb17 100644
--- a/test/CodeGen/MIR/frame-info.mir
+++ b/test/CodeGen/MIR/Generic/frame-info.mir
@@ -44,9 +44,8 @@ tracksRegLiveness: true
 # CHECK: body
 frameInfo:
   maxAlignment:    4
-body:
-  - id:          0
-    name:        entry
+body: |
+  bb.0.entry:
 ...
 ---
 name:            test2
@@ -84,8 +83,7 @@ frameInfo:
   hasOpaqueSPAdjustment: true
   hasVAStart:      true
   hasMustTailInVarArgFunc: true
-body:
-  - id:          0
-    name:        entry
+body: |
+  bb.0.entry:
 ...
 
diff --git a/test/CodeGen/MIR/function-missing-machine-function.mir b/test/CodeGen/MIR/Generic/function-missing-machine-function.mir
similarity index 100%
rename from test/CodeGen/MIR/function-missing-machine-function.mir
rename to test/CodeGen/MIR/Generic/function-missing-machine-function.mir
diff --git a/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir b/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir
new file mode 100644
index 000000000000..576de4bd9dc7
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/invalid-jump-table-kind.mir
@@ -0,0 +1,53 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test_jumptable(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+
+  def:
+    ret i32 0
+
+  lbl1:
+    ret i32 1
+
+  lbl2:
+    ret i32 2
+
+  lbl3:
+    ret i32 4
+
+  lbl4:
+    ret i32 8
+  }
+
+...
+---
+name:            test_jumptable
+jumpTable:
+  # CHECK: [[@LINE+1]]:18: unknown enumerated scalar
+  kind:          switch
+  entries:
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+
+  bb.1.entry:
+
+  bb.2.def:
+
+  bb.3.lbl1:
+
+  bb.4.lbl2:
+
+  bb.5.lbl3:
+
+  bb.6.lbl4:
+...
diff --git a/test/CodeGen/MIR/Generic/lit.local.cfg b/test/CodeGen/MIR/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f3f03bd7047e
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if 'native' not in config.available_features:
+    config.unsupported = True
+
diff --git a/test/CodeGen/MIR/llvm-ir-error-reported.mir b/test/CodeGen/MIR/Generic/llvm-ir-error-reported.mir
similarity index 100%
rename from test/CodeGen/MIR/llvm-ir-error-reported.mir
rename to test/CodeGen/MIR/Generic/llvm-ir-error-reported.mir
diff --git a/test/CodeGen/MIR/llvmIR.mir b/test/CodeGen/MIR/Generic/llvmIR.mir
similarity index 98%
rename from test/CodeGen/MIR/llvmIR.mir
rename to test/CodeGen/MIR/Generic/llvmIR.mir
index 3c084ad7d393..c7a220afa505 100644
--- a/test/CodeGen/MIR/llvmIR.mir
+++ b/test/CodeGen/MIR/Generic/llvmIR.mir
@@ -32,6 +32,6 @@
 ...
 ---
 name: foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/llvmIRMissing.mir b/test/CodeGen/MIR/Generic/llvmIRMissing.mir
similarity index 92%
rename from test/CodeGen/MIR/llvmIRMissing.mir
rename to test/CodeGen/MIR/Generic/llvmIRMissing.mir
index 80cea5a6fdaa..afa96010f297 100644
--- a/test/CodeGen/MIR/llvmIRMissing.mir
+++ b/test/CodeGen/MIR/Generic/llvmIRMissing.mir
@@ -4,6 +4,6 @@
 ---
 # CHECK: name: foo
 name: foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir b/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir
new file mode 100644
index 000000000000..d6ecd5dc8514
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-ir-block-reference.mir
@@ -0,0 +1,17 @@
+# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser preserves unnamed LLVM IR block
+# references.
+
+--- |
+
+  define i32 @foo() {
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body: |
+  ; CHECK: bb.0 (%ir-block.0):
+  bb.0 (%ir-block.0):
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir b/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir
new file mode 100644
index 000000000000..41747535c351
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-redefinition-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body: |
+  ; CHECK: [[@LINE+3]]:3: redefinition of machine basic block with id #0
+  bb.0:
+
+  bb.0:
+...
diff --git a/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir b/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir
new file mode 100644
index 000000000000..df559f852ec0
--- /dev/null
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-undefined-ir-block.mir
@@ -0,0 +1,15 @@
+# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+    ret i32 0
+  }
+
+...
+---
+name:            foo
+body: |
+  ; CHECK: [[@LINE+1]]:9: use of undefined IR block '%ir-block.10'
+  bb.0 (%ir-block.10):
+...
diff --git a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir b/test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir
similarity index 71%
rename from test/CodeGen/MIR/machine-basic-block-unknown-name.mir
rename to test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir
index df8eee9d2708..876947b868b0 100644
--- a/test/CodeGen/MIR/machine-basic-block-unknown-name.mir
+++ b/test/CodeGen/MIR/Generic/machine-basic-block-unknown-name.mir
@@ -12,8 +12,7 @@
 ...
 ---
 name:            foo
-body:
-  # CHECK: [[@LINE+2]]:18: basic block 'entrie' is not defined in the function 'foo'
-  - id:          0
-    name:        entrie
+body: |
+  ; CHECK: [[@LINE+1]]:3: basic block 'entrie' is not defined in the function 'foo'
+  bb.0.entrie:
 ...
diff --git a/test/CodeGen/MIR/machine-function-missing-body-error.mir b/test/CodeGen/MIR/Generic/machine-function-missing-body-error.mir
similarity index 100%
rename from test/CodeGen/MIR/machine-function-missing-body-error.mir
rename to test/CodeGen/MIR/Generic/machine-function-missing-body-error.mir
diff --git a/test/CodeGen/MIR/machine-function-missing-function.mir b/test/CodeGen/MIR/Generic/machine-function-missing-function.mir
similarity index 93%
rename from test/CodeGen/MIR/machine-function-missing-function.mir
rename to test/CodeGen/MIR/Generic/machine-function-missing-function.mir
index 424c34aae847..6800f8724324 100644
--- a/test/CodeGen/MIR/machine-function-missing-function.mir
+++ b/test/CodeGen/MIR/Generic/machine-function-missing-function.mir
@@ -12,12 +12,12 @@
 ...
 ---
 name:            foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 # CHECK: function 'faa' isn't defined in the provided LLVM IR
 name:            faa
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/machine-function-missing-name.mir b/test/CodeGen/MIR/Generic/machine-function-missing-name.mir
similarity index 92%
rename from test/CodeGen/MIR/machine-function-missing-name.mir
rename to test/CodeGen/MIR/Generic/machine-function-missing-name.mir
index a868a65d35f2..f65b77880e97 100644
--- a/test/CodeGen/MIR/machine-function-missing-name.mir
+++ b/test/CodeGen/MIR/Generic/machine-function-missing-name.mir
@@ -16,11 +16,11 @@
 ---
 # CHECK: [[@LINE+1]]:1: missing required key 'name'
 nme:             foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 name:            bar
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/machine-function-redefinition-error.mir b/test/CodeGen/MIR/Generic/machine-function-redefinition-error.mir
similarity index 100%
rename from test/CodeGen/MIR/machine-function-redefinition-error.mir
rename to test/CodeGen/MIR/Generic/machine-function-redefinition-error.mir
diff --git a/test/CodeGen/MIR/machine-function.mir b/test/CodeGen/MIR/Generic/machine-function.mir
similarity index 94%
rename from test/CodeGen/MIR/machine-function.mir
rename to test/CodeGen/MIR/Generic/machine-function.mir
index afd10ab02c26..1c4ca3d07d2a 100644
--- a/test/CodeGen/MIR/machine-function.mir
+++ b/test/CodeGen/MIR/Generic/machine-function.mir
@@ -27,8 +27,8 @@
 # CHECK-NEXT: hasInlineAsm: false
 # CHECK: ...
 name:            foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 # CHECK: name: bar
@@ -37,8 +37,8 @@ body:
 # CHECK-NEXT: hasInlineAsm: false
 # CHECK: ...
 name:            bar
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 # CHECK: name: func
@@ -48,8 +48,8 @@ body:
 # CHECK: ...
 name:            func
 alignment:       8
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 # CHECK: name: func2
@@ -61,6 +61,6 @@ name:            func2
 alignment:       16
 exposesReturnsTwice: true
 hasInlineAsm:    true
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/register-info.mir b/test/CodeGen/MIR/Generic/register-info.mir
similarity index 95%
rename from test/CodeGen/MIR/register-info.mir
rename to test/CodeGen/MIR/Generic/register-info.mir
index 9585faa96223..229cf0f9130f 100644
--- a/test/CodeGen/MIR/register-info.mir
+++ b/test/CodeGen/MIR/Generic/register-info.mir
@@ -22,8 +22,8 @@
 # CHECK-NEXT: tracksSubRegLiveness: false
 # CHECK: ...
 name:            foo
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
 ---
 # CHECK: name: bar
@@ -35,6 +35,6 @@ name: bar
 isSSA: false
 tracksRegLiveness: true
 tracksSubRegLiveness: true
-body:
-  - id: 0
+body: |
+  bb.0:
 ...
diff --git a/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir b/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir
new file mode 100644
index 000000000000..ea94c9906557
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/expected-global-value-or-symbol-after-call-entry.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=mipsel -mattr=mips16 -relocation-model=pic -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i32 @test(i32 %a) {
+  entry:
+    %call = call i32 @foo(i32 %a)
+    ret i32 0
+  }
+
+  declare i32 @foo(i32)
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%a0' }
+frameInfo:
+  stackSize:       24
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 16
+stack:
+  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+      callee-saved-register: '%ra' }
+body: |
+  bb.0.entry:
+    liveins: %a0, %ra
+
+    Save16 %ra, 24, implicit-def %sp, implicit %sp
+    %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+    %v0 = SllX16 killed %v0, 16
+    %v0 = AdduRxRyRz16 killed %v1, killed %v0
+  ; CHECK: [[@LINE+1]]:67: expected a global value or an external symbol after 'call-entry'
+    %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry foo)
+    %t9 = COPY %v1
+    %gp = COPY killed %v0
+    JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit killed %t9, implicit %a0, implicit killed %gp, implicit-def %sp, implicit-def dead %v0
+    %v0 = LiRxImmX16 0
+    %ra = Restore16 24, implicit-def %sp, implicit %sp
+    RetRA16 implicit %v0
+...
diff --git a/test/CodeGen/MIR/Mips/lit.local.cfg b/test/CodeGen/MIR/Mips/lit.local.cfg
new file mode 100644
index 000000000000..7d12f7a9c564
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Mips' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/Mips/memory-operands.mir b/test/CodeGen/MIR/Mips/memory-operands.mir
new file mode 100644
index 000000000000..d4206b067f7e
--- /dev/null
+++ b/test/CodeGen/MIR/Mips/memory-operands.mir
@@ -0,0 +1,102 @@
+# RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the call entry pseudo source
+# values in memory operands correctly.
+
+--- |
+  define i32 @test(i32 %a) {
+  entry:
+    %call = call i32 @foo(i32 %a)
+    ret i32 0
+  }
+
+  declare i32 @foo(i32)
+
+  define float @test2() #0 {
+  entry:
+    %call = tail call float bitcast (float (...)* @g to float ()*)()
+    call void @__mips16_ret_sf(float %call)
+    ret float %call
+  }
+
+  declare float @g(...)
+
+  declare void @__mips16_ret_sf(float) #1
+
+  attributes #0 = { "saveS2" }
+  attributes #1 = { noinline readnone "__Mips16RetHelper" }
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%a0' }
+frameInfo:
+  stackSize:       24
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 16
+stack:
+  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+      callee-saved-register: '%ra' }
+body:             |
+  bb.0.entry:
+    liveins: %a0, %ra
+
+    Save16 %ra, 24, implicit-def %sp, implicit %sp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 24
+    CFI_INSTRUCTION .cfi_offset %ra_64, -4
+    %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+    %v0 = SllX16 killed %v0, 16
+    %v0 = AdduRxRyRz16 killed %v1, killed %v0
+  ; CHECK-LABEL: name: test
+  ; CHECK: %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry @foo)
+    %v1 = LwRxRyOffMemX16 %v0, @foo, 0 :: (load 4 from call-entry @foo)
+    %t9 = COPY %v1
+    %gp = COPY killed %v0
+    JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit killed %t9, implicit %a0, implicit killed %gp, implicit-def %sp, implicit-def dead %v0
+    %v0 = LiRxImmX16 0
+    %ra = Restore16 24, implicit-def %sp, implicit %sp
+    RetRA16 implicit %v0
+...
+---
+name:            test2
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       32
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 16
+stack:
+  - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4,
+      callee-saved-register: '%ra' }
+  - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4,
+      callee-saved-register: '%s2' }
+  - { id: 2, type: spill-slot, offset: -12, size: 4, alignment: 4,
+      callee-saved-register: '%s0' }
+body:             |
+  bb.0.entry:
+    liveins: %ra, %s2, %s0, %ra, %s2, %s0
+
+    SaveX16 %s0, %ra, %s2, 32, implicit-def %sp, implicit %sp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 32
+    CFI_INSTRUCTION .cfi_offset %ra_64, -4
+    CFI_INSTRUCTION .cfi_offset %s2_64, -8
+    CFI_INSTRUCTION .cfi_offset %s0_64, -12
+    %v0, %v1 = GotPrologue16 $_gp_disp, $_gp_disp
+    %v0 = SllX16 killed %v0, 16
+    %s0 = AdduRxRyRz16 killed %v1, killed %v0
+    %v0 = LwRxRyOffMemX16 %s0, @g, 0 :: (load 4 from call-entry @g)
+  ; CHECK-LABEL: test2
+  ; CHECK: %v1 = LwRxRyOffMemX16 %s0, $__mips16_call_stub_sf_0, 0 :: (load 4 from call-entry $__mips16_call_stub_sf_0)
+    %v1 = LwRxRyOffMemX16 %s0, $__mips16_call_stub_sf_0, 0 :: (load 4 from call-entry $__mips16_call_stub_sf_0)
+    %gp = COPY %s0
+    JumpLinkReg16 killed %v1, csr_o32, implicit-def %ra, implicit %v0, implicit killed %gp, implicit-def %sp, implicit-def %v0
+    %v1 = LwRxRyOffMemX16 %s0, @__mips16_ret_sf, 0 :: (load 4 from call-entry @__mips16_ret_sf)
+    %t9 = COPY %v1
+    %gp = COPY killed %s0
+    JumpLinkReg16 killed %v1, csr_mips16rethelper, implicit-def %ra, implicit killed %t9, implicit %v0, implicit killed %gp, implicit-def %sp
+    %s0, %ra, %s2 = RestoreX16 32, implicit-def %sp, implicit %sp
+    RetRA16 implicit %v0
+...
diff --git a/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir b/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
new file mode 100644
index 000000000000..28fb2a2cf5c9
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/expected-floating-point-literal.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define float @test(float %k) {
+  entry:
+    %0 = fadd float %k, 3.250000e+00
+    ret float %0
+  }
+
+...
+---
+name:            test
+registers:
+  - { id: 0, class: float32regs }
+  - { id: 1, class: float32regs }
+body: |
+  bb.0.entry:
+    %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+  ; CHECK: [[@LINE+1]]:33: expected a floating point literal
+    %1 = FADD_rnf32ri %0, float 3
+    StoreRetvalF32 %1, 0
+    Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
new file mode 100644
index 000000000000..18866d58a946
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -0,0 +1,81 @@
+# RUN: llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses floating point constant operands
+# correctly.
+
+--- |
+
+  define float @test(float %k, i32 %i) {
+  entry:
+    %0 = fpext float %k to double
+    %1 = fadd double %0, 3.250000e+00
+    %2 = fptrunc double %1 to float
+    %3 = sitofp i32 %i to float
+    %4 = fadd float %3, 6.250000e+00
+    %5 = fmul float %4, %2
+    ret float %5
+  }
+
+  define float @test2(float %k, i32 %i) {
+  entry:
+    %0 = fpext float %k to double
+    %1 = fadd double %0, 0x7FF8000000000000
+    %2 = fptrunc double %1 to float
+    %3 = sitofp i32 %i to float
+    %4 = fadd float %3, 0x7FF8000000000000
+    %5 = fmul float %4, %2
+    ret float %5
+  }
+
+...
+---
+name:            test
+registers:
+  - { id: 0, class: float32regs }
+  - { id: 1, class: float64regs }
+  - { id: 2, class: int32regs }
+  - { id: 3, class: float64regs }
+  - { id: 4, class: float32regs }
+  - { id: 5, class: float32regs }
+  - { id: 6, class: float32regs }
+  - { id: 7, class: float32regs }
+body: |
+  bb.0.entry:
+    %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+    %1 = CVT_f64_f32 %0, 0
+    %2 = LD_i32_avar 0, 4, 1, 0, 32, $test_param_1
+  ; CHECK: %3 = FADD_rnf64ri %1, double 3.250000e+00
+    %3 = FADD_rnf64ri %1, double 3.250000e+00
+    %4 = CVT_f32_f64 %3, 5
+    %5 = CVT_f32_s32 %2, 5
+  ; CHECK: %6 = FADD_rnf32ri %5, float 6.250000e+00
+    %6 = FADD_rnf32ri %5, float 6.250000e+00
+    %7 = FMUL_rnf32rr %6, %4
+    StoreRetvalF32 %7, 0
+    Return
+...
+---
+name:            test2
+registers:
+  - { id: 0, class: float32regs }
+  - { id: 1, class: float64regs }
+  - { id: 2, class: int32regs }
+  - { id: 3, class: float64regs }
+  - { id: 4, class: float32regs }
+  - { id: 5, class: float32regs }
+  - { id: 6, class: float32regs }
+  - { id: 7, class: float32regs }
+body: |
+  bb.0.entry:
+    %0 = LD_f32_avar 0, 4, 1, 2, 32, $test2_param_0
+    %1 = CVT_f64_f32 %0, 0
+    %2 = LD_i32_avar 0, 4, 1, 0, 32, $test2_param_1
+  ; CHECK: %3 = FADD_rnf64ri %1, double 0x7FF8000000000000
+    %3 = FADD_rnf64ri %1, double 0x7FF8000000000000
+    %4 = CVT_f32_f64 %3, 5
+    %5 = CVT_f32_s32 %2, 5
+  ; CHECK: %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
+    %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
+    %7 = FMUL_rnf32rr %6, %4
+    StoreRetvalF32 %7, 0
+    Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir b/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
new file mode 100644
index 000000000000..e4080f80ee52
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/floating-point-invalid-type-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=nvptx -mcpu=sm_20 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define float @test(float %k) {
+  entry:
+    %0 = fadd float %k, 3.250000e+00
+    ret float %0
+  }
+
+...
+---
+name:            test
+registers:
+  - { id: 0, class: float32regs }
+  - { id: 1, class: float32regs }
+body: |
+  bb.0.entry:
+    %0 = LD_f32_avar 0, 4, 1, 2, 32, $test_param_0
+  ; CHECK: [[@LINE+1]]:33: floating point constant does not have type 'float'
+    %1 = FADD_rnf32ri %0, float 0xH3C00
+    StoreRetvalF32 %1, 0
+    Return
+...
diff --git a/test/CodeGen/MIR/NVPTX/lit.local.cfg b/test/CodeGen/MIR/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/CodeGen/MIR/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/PowerPC/lit.local.cfg b/test/CodeGen/MIR/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/CodeGen/MIR/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
diff --git a/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir b/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir
new file mode 100644
index 000000000000..39d14e72ffee
--- /dev/null
+++ b/test/CodeGen/MIR/PowerPC/unordered-implicit-registers.mir
@@ -0,0 +1,45 @@
+# RUN: llc -mtriple=powerpc64-unknown-linux-gnu -start-after machine-combiner -stop-after machine-combiner -o /dev/null %s | FileCheck %s
+# PR24724
+
+--- |
+  define signext i32 @main(i32* %p) #0 {
+  entry:
+    %0 = load i32, i32* %p, align 4
+    %or = or i32 0, %0
+    store i32 %or, i32* %p, align 4
+    %lnot.1 = icmp eq i32 undef, 0
+    %lnot.ext.1 = zext i1 %lnot.1 to i32
+    %shr.i.1 = lshr i32 2072, %lnot.ext.1
+    %call.lobit.1 = lshr i32 %shr.i.1, 7
+    %1 = and i32 %call.lobit.1, 1
+    %or.1 = or i32 %1, %or
+    ret i32 %or.1
+  }
+
+  attributes #0 = { nounwind "target-cpu"="ppc64" }
+...
+---
+name:            main
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: g8rc_and_g8rc_nox0 }
+  - { id: 1, class: gprc }
+  - { id: 2, class: gprc }
+  - { id: 3, class: gprc }
+  - { id: 4, class: g8rc }
+liveins:
+  - { reg: '%x3', virtual-reg: '%0' }
+body: |
+  bb.0.entry:
+    liveins: %x3
+
+    %0 = COPY %x3
+    %1 = LWZ 0, %0 :: (load 4 from %ir.p)
+    %2 = LI 0
+    %3 = RLWIMI %2, killed %1, 0, 0, 31
+    %4 = EXTSW_32_64 killed %3
+    %x3 = COPY %4
+  ; CHECK: BLR8 implicit %lr8, implicit %rm, implicit %x3
+    BLR8 implicit %lr8, implicit %rm, implicit %x3
+...
diff --git a/test/CodeGen/MIR/X86/basic-block-liveins.mir b/test/CodeGen/MIR/X86/basic-block-liveins.mir
index d749a0524422..00732975495d 100644
--- a/test/CodeGen/MIR/X86/basic-block-liveins.mir
+++ b/test/CodeGen/MIR/X86/basic-block-liveins.mir
@@ -9,17 +9,54 @@
     ret i32 %c
   }
 
+  define i32 @test2(i32 %a, i32 %b) {
+  body:
+    %c = add i32 %a, %b
+    ret i32 %c
+  }
+
+  define i32 @test3() {
+  body:
+    ret i32 0
+  }
+
 ...
 ---
 name:            test
-body:
-  # CHECK: name: body
-  # CHECK: liveins: [ '%edi', '%esi' ]
-  # CHECK-NEXT: instructions:
-  - id:          0
-    name:        body
-    liveins:     [ '%edi', '%esi' ]
-    instructions:
-      - '%eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _'
-      - 'RETQ %eax'
+body: |
+  ; CHECK-LABEL: bb.0.body:
+  ; CHECK-NEXT:    liveins: %edi, %esi
+  bb.0.body:
+    liveins: %edi, %esi
+
+    %eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _
+    RETQ %eax
+...
+---
+name:            test2
+body: |
+  ; CHECK-LABEL: name: test2
+  ; Verify that we can have multiple lists of liveins that will be merged into
+  ; one.
+  ; CHECK:       bb.0.body:
+  ; CHECK-NEXT:    liveins: %edi, %esi
+  bb.0.body:
+    liveins: %edi
+    liveins: %esi
+
+    %eax = LEA64_32r killed %rdi, 1, killed %rsi, 0, _
+    RETQ %eax
+...
+---
+name:            test3
+body: |
+  ; Verify that we can have an empty list of liveins.
+  ; CHECK-LABEL: name: test3
+  ; CHECK:       bb.0.body:
+  ; CHECK-NEXT:    %eax = MOV32r0 implicit-def dead %eflags
+  bb.0.body:
+    liveins:
+
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
 ...
diff --git a/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir b/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir
new file mode 100644
index 000000000000..b4b7dddea56c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/basic-block-not-at-start-of-line-error.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:                                             ; preds = %entry
+    ret i32 0
+
+  exit:                                             ; preds = %entry
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+    liveins: %edi 44
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  ; CHECK: [[@LINE+1]]:8: basic block definition should be located at the start of the line
+  less bb.1:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/block-address-operands.mir b/test/CodeGen/MIR/X86/block-address-operands.mir
new file mode 100644
index 000000000000..3c2d2aefff20
--- /dev/null
+++ b/test/CodeGen/MIR/X86/block-address-operands.mir
@@ -0,0 +1,121 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the block address operands
+# correctly.
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+  define void @test2() {
+  entry:
+    store volatile i8* blockaddress(@test2, %"quoted block"), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %"quoted block"]
+
+  "quoted block":
+    ret void
+  }
+
+  define void @slot_in_other_function(i8** %addr) {
+  entry:
+    store volatile i8* blockaddress(@test3, %0), i8** %addr
+    ret void
+  }
+
+  define void @test3() {
+  entry:
+    store volatile i8* blockaddress(@test3, %0), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %0]
+
+    ret void
+  }
+
+  define void @test4() {
+  entry:
+    store volatile i8* blockaddress(@test4, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+  ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block), _
+    %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
+---
+name:            test2
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    successors: %bb.1
+  ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test2, %ir-block."quoted block"), _
+    %rax = LEA64r %rip, 1, _, blockaddress(@test2, %ir-block."quoted block"), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1 (address-taken):
+    RETQ
+...
+---
+name:            slot_in_other_function
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK-LABEL: name: slot_in_other_function
+  ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+    %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+    MOV64mr killed %rdi, 1, _, 0, _, killed %rax
+    RETQ
+...
+---
+name:            test3
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    successors: %bb.1
+  ; CHECK-LABEL: name: test3
+  ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+    %rax = LEA64r %rip, 1, _, blockaddress(@test3, %ir-block.0), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1 (address-taken):
+    RETQ
+...
+---
+name:            test4
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+  ; CHECK: %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block) + 2, _
+    %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.block) + 2, _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/callee-saved-info.mir b/test/CodeGen/MIR/X86/callee-saved-info.mir
new file mode 100644
index 000000000000..17c7739951d9
--- /dev/null
+++ b/test/CodeGen/MIR/X86/callee-saved-info.mir
@@ -0,0 +1,95 @@
+# RUN: llc -march=x86-64 -start-after prologepilog -stop-after prologepilog -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses callee saved information in the
+# stack objects correctly.
+
+--- |
+
+  define i32 @compute(i32 %a) {
+  body:
+    ret i32 %a
+  }
+
+  define i32 @func(i32 %a) {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    br label %check
+
+  check:
+    %comp = icmp sle i32 %a, 10
+    br i1 %comp, label %loop, label %exit
+
+  loop:
+    %c = load i32, i32* %b
+    %d = call i32 @compute(i32 %c)
+    %e = sub i32 %d, 1
+    store i32 %e, i32* %b
+    br label %check
+
+  exit:
+    ret i32 0
+  }
+
+...
+---
+name:            compute
+tracksRegLiveness: true
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
+---
+name:            func
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       24
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+# CHECK: fixedStack:
+# CHECK-NEXT: , callee-saved-register: '%rbx' }
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
+# CHECK: stack:
+# CHECK-NEXT: - { id: 0
+# CHECK-NEXT: , callee-saved-register: '%edi' }
+stack:
+  - { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
+  - { id: 1, offset: -24, size: 4, alignment: 4, callee-saved-register: '%edi' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.check
+    liveins: %edi, %rbx
+
+    frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    %rsp = frame-setup SUB64ri8 %rsp, 16, implicit-def dead %eflags
+    %ebx = COPY %edi
+    MOV32mr %rsp, 1, _, 12, _, %ebx
+
+  bb.1.check:
+    successors: %bb.2.loop, %bb.3.exit
+    liveins: %ebx
+
+    CMP32ri8 %ebx, 10, implicit-def %eflags
+    JG_1 %bb.3.exit, implicit killed %eflags
+    JMP_1 %bb.2.loop
+
+  bb.2.loop:
+    successors: %bb.1.check
+    liveins: %ebx
+
+    %edi = MOV32rm %rsp, 1, _, 12, _
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %eax = DEC32r killed %eax, implicit-def dead %eflags
+    MOV32mr %rsp, 1, _, 12, _, killed %eax
+    JMP_1 %bb.1.check
+
+  bb.3.exit:
+    %eax = MOV32r0 implicit-def dead %eflags
+    %rsp = ADD64ri8 %rsp, 16, implicit-def dead %eflags
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir b/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir
new file mode 100644
index 000000000000..47051a53e3f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-def-cfa-offset.mir
@@ -0,0 +1,29 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa_offset operands
+# correctly.
+
+--- |
+
+  define void @test() {
+  entry:
+    %tmp = alloca [4168 x i8], align 4
+    ret void
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       4040
+stack:
+  - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+  bb.0.entry:
+    %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+    ; CHECK: CFI_INSTRUCTION .cfi_def_cfa_offset 4048
+    CFI_INSTRUCTION .cfi_def_cfa_offset 4048
+    %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+    RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir b/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir
new file mode 100644
index 000000000000..74a33b5c3437
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-def-cfa-register.mir
@@ -0,0 +1,32 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_def_cfa_register
+# operands correctly.
+
+--- |
+
+  define void @func() #0 {
+  entry:
+    unreachable
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="true" }
+
+...
+---
+name:            func
+tracksRegLiveness: true
+frameInfo:
+  stackSize:     8
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    liveins: %rbp
+
+    PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    CFI_INSTRUCTION .cfi_offset %rbp, -16
+    %rbp = MOV64rr %rsp
+    ; CHECK: CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+    CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+...
diff --git a/test/CodeGen/MIR/X86/cfi-offset.mir b/test/CodeGen/MIR/X86/cfi-offset.mir
new file mode 100644
index 000000000000..fd9e605a036a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/cfi-offset.mir
@@ -0,0 +1,47 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the .cfi_offset operands
+# correctly.
+
+--- |
+
+  declare void @foo(i32)
+
+  define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+  entry:
+    %add = add nsw i32 %b, %a
+    %add1 = add nsw i32 %add, %c
+    %add2 = add nsw i32 %add1, %d
+    tail call void @foo(i32 %add2)
+    %add6 = add nsw i32 %add2, %add2
+    ret i32 %add6
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       8
+  adjustsStack:    true
+  hasCalls:        true
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    liveins: %ecx, %edi, %edx, %esi, %rbx
+
+    PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    ; CHECK: CFI_INSTRUCTION .cfi_offset %rbx, -16
+    CFI_INSTRUCTION .cfi_offset %rbx, -16
+    %ebx = COPY %edi, implicit-def %rbx
+    %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+    %edi = COPY %ebx
+    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+    %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir b/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir
new file mode 100644
index 000000000000..2ddf5736b977
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-pool-item-redefinition-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define double @test(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    ret double %c
+  }
+
+...
+---
+name:            test
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+# CHECK: [[@LINE+1]]:18: redefinition of constant pool item '%const.0'
+  - id:          0
+    value:       'double 3.250000e+00'
+body: |
+  bb.0.entry:
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/constant-pool.mir b/test/CodeGen/MIR/X86/constant-pool.mir
new file mode 100644
index 000000000000..213e4e283485
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-pool.mir
@@ -0,0 +1,139 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses constant pool constants and
+# constant pool operands correctly.
+
+--- |
+
+  define double @test(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    %d = fadd float %b, 6.250000e+00
+    %e = fpext float %d to double
+    %f = fmul double %c, %e
+    ret double %f
+  }
+
+  define double @test2(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    %d = fadd float %b, 6.250000e+00
+    %e = fpext float %d to double
+    %f = fmul double %c, %e
+    ret double %f
+  }
+
+  define double @test3(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    %d = fadd float %b, 6.250000e+00
+    %e = fpext float %d to double
+    %f = fmul double %c, %e
+    ret double %f
+  }
+
+  define double @test4(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    %d = fadd float %b, 6.250000e+00
+    %e = fpext float %d to double
+    %f = fmul double %c, %e
+    ret double %f
+  }
+...
+---
+# CHECK: name: test
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT:   value: 'double 3.250000e+00'
+# CHECK-NEXT:   alignment: 8
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT:   value: 'float 6.250000e+00'
+# CHECK-NEXT:   alignment: 4
+name:            test
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+    alignment:   8
+  - id:          1
+    value:       'float 6.250000e+00'
+    alignment:   4
+body: |
+  bb.0.entry:
+    ; CHECK:      %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+    %xmm1 = CVTSS2SDrr killed %xmm1
+    %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+    RETQ %xmm0
+...
+---
+# Verify that alignment can be inferred:
+# CHECK: name: test2
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT:   value: 'double 3.250000e+00'
+# CHECK-NEXT:   alignment: 8
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT:   value: 'float 6.250000e+00'
+# CHECK-NEXT:   alignment: 4
+name:            test2
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+  - id:          1
+    value:       'float 6.250000e+00'
+body: |
+  bb.0.entry:
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+    %xmm1 = CVTSS2SDrr killed %xmm1
+    %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+    RETQ %xmm0
+...
+---
+# Verify that the non-standard alignments are respected:
+# CHECK: name: test3
+# CHECK: constants:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT:   value: 'double 3.250000e+00'
+# CHECK-NEXT:   alignment: 128
+# CHECK-NEXT: - id: 1
+# CHECK-NEXT:   value: 'float 6.250000e+00'
+# CHECK-NEXT:   alignment: 1
+name:            test3
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+    alignment:   128
+  - id:          1
+    value:       'float 6.250000e+00'
+    alignment:   1
+body: |
+  bb.0.entry:
+    ; CHECK:      %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.1, _
+    %xmm1 = CVTSS2SDrr killed %xmm1
+    %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+    RETQ %xmm0
+...
+---
+# CHECK:  name:  test4
+name:            test4
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+  - id:          1
+    value:       'float 6.250000e+00'
+body: |
+  bb.0.entry:
+    ; CHECK:      %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.1 - 12, _
+    ; CHECK-NEXT: %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.0 + 8, _
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.1 - 12, _
+    %xmm1 = ADDSSrm killed %xmm1, %rip, 1, _, %const.0 + 8, _
+    %xmm1 = CVTSS2SDrr killed %xmm1
+    %xmm0 = MULSDrr killed %xmm0, killed %xmm1
+    RETQ %xmm0
+...
diff --git a/test/CodeGen/MIR/X86/constant-value-error.mir b/test/CodeGen/MIR/X86/constant-value-error.mir
new file mode 100644
index 000000000000..1e14d2282c5a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/constant-value-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when parsing an invalid
+# constant value.
+
+--- |
+
+  define double @test(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    ret double %c
+  }
+
+...
+---
+name:            test
+constants:
+  - id:          0
+  # CHECK: [[@LINE+1]]:19: expected type
+    value:       'dub 3.250000e+00'
+body: |
+  bb.0.entry:
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _
+    RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/dead-register-flag.mir b/test/CodeGen/MIR/X86/dead-register-flag.mir
index 988b554659cb..309e776de46a 100644
--- a/test/CodeGen/MIR/X86/dead-register-flag.mir
+++ b/test/CodeGen/MIR/X86/dead-register-flag.mir
@@ -15,12 +15,10 @@
 ...
 ---
 name:            foo
-body:
-  # CHECK: name: body
-  - id:          0
-    name:        body
-    instructions:
-      # CHECK: - '%eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags'
-      - '%eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags'
-      - 'RETQ %eax'
+body: |
+  ; CHECK: bb.0.body:
+  bb.0.body:
+    ; CHECK: %eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags
+    %eax = IMUL32rri8 %edi, 11, implicit-def dead %eflags
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/def-register-already-tied-error.mir b/test/CodeGen/MIR/X86/def-register-already-tied-error.mir
new file mode 100644
index 000000000000..69c816f59b9b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/def-register-already-tied-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i64 @test(i64 %x) #0 {
+  entry:
+    %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+    ret i64 %asm
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+  ; CHECK: [[@LINE+1]]:83: the tied-def operand #3 is already tied with another register operand
+    INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 3), killed %rdi(tied-def 3)
+    %rax = COPY killed %rdi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir b/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir
new file mode 100644
index 000000000000..7d01810c792b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/duplicate-memory-operand-flag.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @volatile_inc(i32* %x) {
+  entry:
+    %0 = load volatile i32, i32* %x
+    %1 = add i32 %0, 1
+    store volatile i32 %1, i32* %x
+    ret i32 %1
+  }
+
+...
+---
+name:            volatile_inc
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:50: duplicate 'volatile' memory operand flag
+    %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile volatile load 4 from %ir.x)
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir b/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir
new file mode 100644
index 000000000000..d80c6ed061de
--- /dev/null
+++ b/test/CodeGen/MIR/X86/duplicate-register-flag-error.mir
@@ -0,0 +1,35 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+  ; CHECK: [[@LINE+1]]:31: duplicate 'implicit' register flag
+    JG_1 %bb.2.exit, implicit implicit %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.2.exit:
+    %eax = COPY %edi
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/early-clobber-register-flag.mir b/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
new file mode 100644
index 000000000000..4dc442e4fb94
--- /dev/null
+++ b/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
@@ -0,0 +1,45 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the 'early-clobber' register
+# flags correctly.
+
+--- |
+
+  declare void @foo(i32)
+
+  define void @test(i32 %a, i32 %b) #0 {
+  entry:
+    %c = add i32 %a, %b
+    call void asm sideeffect "nop", "~{ax},~{di}"()
+    call void @foo(i32 %c)
+    ret void
+  }
+
+  attributes #0 = { optsize }
+
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+  - { reg: '%esi' }
+frameInfo:
+  stackSize:     8
+  adjustsStack:  true
+  hasCalls:      true
+body: |
+  bb.0.entry:
+    liveins: %edi, %esi
+
+    frame-setup PUSH64r undef %rax, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    %ecx = COPY %edi
+    %ecx = ADD32rr killed %ecx, killed %esi, implicit-def dead %eflags
+  ; CHECK: INLINEASM $nop, 1, 12, implicit-def dead early-clobber %ax, 12, implicit-def dead early-clobber %di
+    INLINEASM $nop, 1, 12, implicit-def dead early-clobber %ax, 12, implicit-def dead early-clobber %di
+    %edi = COPY killed %ecx
+    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+    %rax = POP64r implicit-def %rsp, implicit %rsp
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir
new file mode 100644
index 000000000000..f2e349454c5d
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-align-in-memory-operand.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @memory_alignment(<8 x float>* %vec) {
+  entry:
+    %v = load <8 x float>, <8 x float>* %vec
+    %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+    store <8 x float> %v2, <8 x float>* %vec
+    ret void
+  }
+
+...
+---
+name:            memory_alignment
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:65: expected 'align'
+    %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, 32)
+    %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+    %xmm2 = FsFLD0SS
+    %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+    MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+    MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir
new file mode 100644
index 000000000000..7ce377f8c5fb
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-alignment-after-align-in-memory-operand.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @memory_alignment(<8 x float>* %vec) {
+  entry:
+    %v = load <8 x float>, <8 x float>* %vec
+    %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+    store <8 x float> %v2, <8 x float>* %vec
+    ret void
+  }
+
+...
+---
+name:            memory_alignment
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:70: expected an integer literal after 'align'
+    %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align)
+    %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+    %xmm2 = FsFLD0SS
+    %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+    MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+    MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir b/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir
new file mode 100644
index 000000000000..861baec4bcbc
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-basic-block-at-start-of-body.mir
@@ -0,0 +1,40 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:                                             ; preds = %entry
+    ret i32 0
+
+  exit:                                             ; preds = %entry
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body: |
+  ; CHECK: [[@LINE+1]]:3: expected a basic block definition before instructions
+  successors: %bb.1.less, %bb.2.exit
+    liveins: %edi 44
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir b/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir
new file mode 100644
index 000000000000..ef7df4c8c20f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-block-reference-in-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+    ; CHECK: [[@LINE+1]]:51: expected an IR block reference
+    %rax = LEA64r %rip, 1, _, blockaddress(@test, _), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir b/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir
new file mode 100644
index 000000000000..ba7b2ab64c3e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-comma-after-cfi-register.mir
@@ -0,0 +1,42 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  declare void @foo(i32)
+
+  define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+  entry:
+    %add = add nsw i32 %b, %a
+    %add1 = add nsw i32 %add, %c
+    %add2 = add nsw i32 %add1, %d
+    tail call void @foo(i32 %add2)
+    %add6 = add nsw i32 %add2, %add2
+    ret i32 %add6
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       8
+  adjustsStack:    true
+  hasCalls:        true
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    ; CHECK: [[@LINE+1]]:38: expected ','
+    CFI_INSTRUCTION .cfi_offset %rbx -16
+    %ebx = COPY %edi, implicit-def %rbx
+    %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+    %edi = COPY %ebx
+    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+    %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir b/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir
new file mode 100644
index 000000000000..dd5693952573
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-comma-after-memory-operand.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @test(i32* %a) {
+  entry2:
+    %b = load i32, i32* %a
+    %c = add i32 %b, 1
+    store i32 %c, i32* %a
+    ret void
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry2:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:87: expected ',' before the next machine memory operand
+    INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir.a) (load 4 from %ir.a)
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir b/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
index c5f5aaca34e0..601551a7720a 100644
--- a/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
+++ b/test/CodeGen/MIR/X86/expected-different-implicit-operand.mir
@@ -19,20 +19,16 @@
 ...
 ---
 name:            foo
-body:
- - id:              0
-   name:            entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:26: expected an implicit register operand 'implicit %eflags'
-     - 'JG_1 %bb.2.exit, implicit %eax'
- - id:              1
-   name:            less
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id:              2
-   name:            exit
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+  ; CHECK: [[@LINE+1]]:35: missing implicit register operand 'implicit %eflags'
+    JG_1 %bb.2.exit, implicit %eax
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.exit:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir b/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
index ecf3a122bf66..6494960d3264 100644
--- a/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
+++ b/test/CodeGen/MIR/X86/expected-different-implicit-register-flag.mir
@@ -19,20 +19,16 @@
 ...
 ---
 name:            foo
-body:
- - id:              0
-   name:            entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:26: expected an implicit register operand 'implicit %eflags'
-     - 'JG_1 %bb.2.exit, implicit-def %eflags'
- - id:              1
-   name:            less
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id:              2
-   name:            exit
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+  ; CHECK: [[@LINE+1]]:42: missing implicit register operand 'implicit %eflags'
+    JG_1 %bb.2.exit, implicit-def %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.exit:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir
new file mode 100644
index 000000000000..f9e9d0b22968
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-from-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:55: expected 'from'
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 %ir.a)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir b/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir
new file mode 100644
index 000000000000..de6a745fd702
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-function-reference-after-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+    ; CHECK: [[@LINE+1]]:44: expected an IR function reference
+    %rax = LEA64r %rip, 1, _, blockaddress(@addr, %ir-block.block), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir b/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir
new file mode 100644
index 000000000000..f737c06c3e1e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-global-value-after-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+    ; CHECK: [[@LINE+1]]:44: expected a global value
+    %rax = LEA64r %rip, 1, _, blockaddress(0, %ir-block.block), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir b/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir
new file mode 100644
index 000000000000..e337292f17a2
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-after-offset-sign.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:37: expected an integer literal after '+'
+    %rax = MOV64rm %rip, 1, _, @G + , _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax, implicit-def %eflags
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir b/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir
new file mode 100644
index 000000000000..580d2bc0a419
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-after-tied-def.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i64 @test(i64 %x) #0 {
+  entry:
+    %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+    ret i64 %asm
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+  ; CHECK: [[@LINE+1]]:78: expected an integer literal after 'tied-def'
+    INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def)
+    %rax = COPY killed %rdi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir b/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir
new file mode 100644
index 000000000000..83874eb67476
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-integer-in-successor-weight.mir
@@ -0,0 +1,38 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body: |
+  bb.0.entry:
+  ; CHECK: [[@LINE+1]]:29: expected an integer literal after '('
+    successors: %bb.1.less (_), %bb.2.exit(32)
+    liveins: %edi
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir
new file mode 100644
index 000000000000..8fcd622a18e6
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-load-or-store-in-memory-operand.mir
@@ -0,0 +1,23 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:48: expected 'load' or 'store' memory operation
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (4 from %ir.a)
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-machine-operand.mir b/test/CodeGen/MIR/X86/expected-machine-operand.mir
index 3725c93cd3ea..3ba5126b9982 100644
--- a/test/CodeGen/MIR/X86/expected-machine-operand.mir
+++ b/test/CodeGen/MIR/X86/expected-machine-operand.mir
@@ -10,12 +10,10 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:24: expected a machine operand
-     - '%eax = XOR32rr ='
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:20: expected a machine operand
+    %eax = XOR32rr =
+    RETQ %eax
 ...
 
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir b/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir
new file mode 100644
index 000000000000..620bb5d961ee
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-after-debug-location.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %x) #0 !dbg !4 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+  attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+  attributes #1 = { nounwind readnone }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!9, !10}
+  !llvm.ident = !{!11}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "test.ll", directory: "")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+  !5 = !DIFile(filename: "test.c", directory: "")
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{!8, !8}
+  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !9 = !{i32 2, !"Dwarf Version", i32 4}
+  !10 = !{i32 2, !"Debug Info Version", i32 3}
+  !11 = !{!"clang version 3.7.0"}
+  !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 4, scope: !4)
+  !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:46: expected a metadata node after 'debug-location'
+    DBG_VALUE _, 0, !12, !13, debug-location 14
+    MOV32mr %stack.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir b/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir
new file mode 100644
index 000000000000..6497f5db2026
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-after-exclaim.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %x) #0 !dbg !4 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+  attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+  attributes #1 = { nounwind readnone }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!9, !10}
+  !llvm.ident = !{!11}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "test.ll", directory: "")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+  !5 = !DIFile(filename: "test.c", directory: "")
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{!8, !8}
+  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !9 = !{i32 2, !"Dwarf Version", i32 4}
+  !10 = !{i32 2, !"Debug Info Version", i32 3}
+  !11 = !{!"clang version 3.7.0"}
+  !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 4, scope: !4)
+  !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:28: expected metadata id after '!'
+    DBG_VALUE _, 0, !12, ! _
+    MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir b/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir
new file mode 100644
index 000000000000..9a4696779fb5
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-metadata-node-in-stack-object.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i32 @test(i32 %x) {
+  entry:
+    %xa = alloca i32, align 4
+    store i32 %x, i32* %xa, align 4
+    %0 = load i32, i32* %xa, align 4
+    ret i32 %0
+  }
+...
+---
+name:            test
+liveins:
+  - { reg: '%edi' }
+stack:
+# CHECK: [[@LINE+1]]:74: expected a metadata node
+  - { id: 0, name: xa, offset: -12, size: 4, alignment: 4, di-variable: '0' }
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    MOV32mr %rsp, 1, _, -4, _, %edi :: (store 4 into %ir.xa)
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir b/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir
new file mode 100644
index 000000000000..04568f6dde57
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-allocation-hint.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -march=x86-64 -start-after machine-scheduler -stop-after machine-scheduler -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a, i32 %b) {
+  body:
+    %c = mul i32 %a, %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  # CHECK: [[@LINE+1]]:48: expected a named register
+  - { id: 1, class: gr32, preferred-register: '%0' }
+  - { id: 2, class: gr32, preferred-register: '%edi' }
+body: |
+  bb.0.body:
+    liveins: %edi, %esi
+
+    %1 = COPY %esi
+    %2 = COPY %edi
+    %2 = IMUL32rr %2, %1, implicit-def dead %eflags
+    %eax = COPY %2
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir b/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir
new file mode 100644
index 000000000000..be57734ecf33
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-callee-saved-register.mir
@@ -0,0 +1,88 @@
+# RUN: not llc -march=x86-64 -start-after prologepilog -stop-after prologepilog -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @compute(i32 %a) {
+  body:
+    ret i32 %a
+  }
+
+  define i32 @func(i32 %a) {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    br label %check
+
+  check:
+    %comp = icmp sle i32 %a, 10
+    br i1 %comp, label %loop, label %exit
+
+  loop:
+    %c = load i32, i32* %b
+    %d = call i32 @compute(i32 %c)
+    %e = sub i32 %d, 1
+    store i32 %e, i32* %b
+    br label %check
+
+  exit:
+    ret i32 0
+  }
+
+...
+---
+name:            compute
+tracksRegLiveness: true
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
+---
+name:            func
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       24
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+fixedStack:
+  # CHECK: [[@LINE+1]]:93: expected a named register
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%0' }
+stack:
+  - { id: 0, name: b, offset: -20, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    successors: %bb.1.check
+    liveins: %edi, %rbx
+
+    frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    %rsp = frame-setup SUB64ri8 %rsp, 16, implicit-def dead %eflags
+    %ebx = COPY %edi
+    MOV32mr %rsp, 1, _, 12, _, %ebx
+
+  bb.1.check:
+    successors: %bb.2.loop, %bb.3.exit
+    liveins: %ebx
+
+    CMP32ri8 %ebx, 10, implicit-def %eflags
+    JG_1 %bb.3.exit, implicit killed %eflags
+    JMP_1 %bb.2.loop
+
+  bb.2.loop:
+    successors: %bb.1.check
+    liveins: %ebx
+
+    %edi = MOV32rm %rsp, 1, _, 12, _
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %eax = DEC32r killed %eax, implicit-def dead %eflags
+    MOV32mr %rsp, 1, _, 12, _, killed %eax
+    JMP_1 %bb.1.check
+
+  bb.3.exit:
+    %eax = MOV32r0 implicit-def dead %eflags
+    %rsp = ADD64ri8 %rsp, 16, implicit-def dead %eflags
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir b/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir
new file mode 100644
index 000000000000..ae9f776ad769
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-named-register-in-functions-livein.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a) {
+  body:
+    ret i32 %a
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+liveins:
+  # CHECK: [[@LINE+1]]:13: expected a named register
+  - { reg: '%0' }
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %0 = COPY %edi
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-named-register-livein.mir b/test/CodeGen/MIR/X86/expected-named-register-livein.mir
index 1fbe881c8c70..41e6a4a6cc88 100644
--- a/test/CodeGen/MIR/X86/expected-named-register-livein.mir
+++ b/test/CodeGen/MIR/X86/expected-named-register-livein.mir
@@ -10,12 +10,11 @@
 ...
 ---
 name:            test
-body:
-  - id:          0
-    name:        body
-    # CHECK: [[@LINE+1]]:21: expected a named register
-    liveins:     [ '%0' ]
-    instructions:
-      - '%eax = COPY %edi'
-      - 'RETQ %eax'
+body: |
+  bb.0.body:
+    ; CHECK: [[@LINE+1]]:14: expected a named register
+    liveins: %0
+
+    %eax = COPY %edi
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir b/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir
new file mode 100644
index 000000000000..1f0439d126f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-newline-at-end-of-list.mir
@@ -0,0 +1,41 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:                                             ; preds = %entry
+    ret i32 0
+
+  exit:                                             ; preds = %entry
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+  ; CHECK: [[@LINE+1]]:19: expected line break at the end of a list
+    liveins: %edi 44
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-number-after-bb.mir b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
index 5343a847fbb9..a239cf176f5f 100644
--- a/test/CodeGen/MIR/X86/expected-number-after-bb.mir
+++ b/test/CodeGen/MIR/X86/expected-number-after-bb.mir
@@ -18,20 +18,16 @@
 ...
 ---
 name:            foo
-body:
- - id: 0
-   name:   entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     # CHECK: [[@LINE+1]]:18: expected a number after '%bb.'
-     - 'JG_1 %bb.nah, implicit %eflags'
- - id: 1
-   name: yes
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
-   name: nah
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    ; CHECK: [[@LINE+1]]:14: expected a number after '%bb.'
+    JG_1 %bb.nah, implicit %eflags
+
+  bb.1.true:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.nah:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir b/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir
new file mode 100644
index 000000000000..aefeed9ce05e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-offset-after-cfi-operand.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @test() {
+  entry:
+    %tmp = alloca [4168 x i8], align 4
+    ret void
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       4040
+stack:
+  - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+  bb.0.entry:
+    %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+    ; CHECK: [[@LINE+1]]:41: expected a cfi offset
+    CFI_INSTRUCTION .cfi_def_cfa_offset _
+    %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+    RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir
new file mode 100644
index 000000000000..fca078c3497c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-pointer-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:60: expected a pointer IR value
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.b)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir b/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir
new file mode 100644
index 000000000000..31b4c5be1251
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-positive-alignment-after-align.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @memory_alignment(<8 x float>* %vec) {
+  entry:
+    %v = load <8 x float>, <8 x float>* %vec
+    %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+    store <8 x float> %v2, <8 x float>* %vec
+    ret void
+  }
+
+...
+---
+name:            memory_alignment
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:71: expected an integer literal after 'align'
+    %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align -32)
+    %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+    %xmm2 = FsFLD0SS
+    %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+    MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+    MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir b/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir
new file mode 100644
index 000000000000..3280fca6d551
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-register-after-cfi-operand.mir
@@ -0,0 +1,42 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  declare void @foo(i32)
+
+  define i32 @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+  entry:
+    %add = add nsw i32 %b, %a
+    %add1 = add nsw i32 %add, %c
+    %add2 = add nsw i32 %add1, %d
+    tail call void @foo(i32 %add2)
+    %add6 = add nsw i32 %add2, %add2
+    ret i32 %add6
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       8
+  adjustsStack:    true
+  hasCalls:        true
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    ; CHECK: [[@LINE+1]]:33: expected a cfi register
+    CFI_INSTRUCTION .cfi_offset %0, -16
+    %ebx = COPY %edi, implicit-def %rbx
+    %ebx = ADD32rr %ebx, killed %esi, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %edx, implicit-def dead %eflags
+    %ebx = ADD32rr %ebx, killed %ecx, implicit-def dead %eflags
+    %edi = COPY %ebx
+    CALL64pcrel32 @foo, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp
+    %eax = LEA64_32r killed %rbx, 1, %rbx, 0, _
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-register-after-flags.mir b/test/CodeGen/MIR/X86/expected-register-after-flags.mir
index 111f5496a378..68f1060ad873 100644
--- a/test/CodeGen/MIR/X86/expected-register-after-flags.mir
+++ b/test/CodeGen/MIR/X86/expected-register-after-flags.mir
@@ -12,11 +12,9 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:37: expected a register after register flags
-     - '%eax = MOV32r0 implicit-def 2'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:33: expected a register after register flags
+    %eax = MOV32r0 implicit-def 2
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir b/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir
new file mode 100644
index 000000000000..71ff15bd9c52
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-size-integer-after-memory-operation.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:53: expected the size integer literal after memory operation
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load from %ir.a)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-stack-object.mir b/test/CodeGen/MIR/X86/expected-stack-object.mir
new file mode 100644
index 000000000000..ff0c10d59e33
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-stack-object.mir
@@ -0,0 +1,67 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+
+--- |
+  @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+  @__stack_chk_guard = external global i8*
+
+  define i32 @test() #0 {
+  entry:
+    %StackGuardSlot = alloca i8*
+    %StackGuard = load i8*, i8** @__stack_chk_guard
+    call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+    %test = alloca i8*, align 8
+    %a = alloca i8, i64 5
+    store i8* %a, i8** %test, align 8
+    %b = load i8*, i8** %test, align 8
+    %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %b)
+    call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+    ret i32 %call
+  }
+
+  declare i32 @printf(i8*, ...)
+
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+  declare void @llvm.stackprotectorcheck(i8**) #2
+
+  attributes #0 = { ssp "stack-protector-buffer-size"="5" }
+  attributes #1 = { nounwind }
+  attributes #2 = { nounwind argmemonly }
+...
+---
+name:            test
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       40
+  maxAlignment:    8
+  adjustsStack:    true
+  hasCalls:        true
+# CHECK: [[@LINE+1]]:21: expected a stack object
+  stackProtector:  '0'
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16,
+      callee-saved-register: '%rbx' }
+stack:
+  - { id: 0, name: StackGuardSlot, offset: -24, size: 8, alignment: 8 }
+  - { id: 1, name: test, offset: -40, size: 8, alignment: 8 }
+  - { id: 2, name: a, offset: -29, size: 5, alignment: 1 }
+body: |
+  bb.0.entry:
+    liveins: %rbx, %rbx
+
+    frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    %rsp = frame-setup SUB64ri8 %rsp, 32, implicit-def dead %eflags
+    %rbx = LOAD_STACK_GUARD :: (invariant load 8 from %ir.__stack_chk_guard)
+    MOV64mr %rsp, 1, _, 24, _, %rbx
+    %rsi = LEA64r %rsp, 1, _, 19, _
+    MOV64mr %rsp, 1, _, 8, _, %rsi
+    %rdi = LEA64r %rip, 1, _, @.str, _
+    dead %eax = MOV32r0 implicit-def dead %eflags, implicit-def %al
+    CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %rsi, implicit %al, implicit-def %rsp, implicit-def %eax
+    CMP64rm killed %rbx, %rsp, 1, _, 24, _, implicit-def %eflags
+    %rsp = ADD64ri8 %rsp, 32, implicit-def dead %eflags
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir b/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
index c891a115a180..6283427c10b3 100644
--- a/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
+++ b/test/CodeGen/MIR/X86/expected-subregister-after-colon.mir
@@ -16,14 +16,12 @@ registers:
   - { id: 0, class: gr32 }
   - { id: 1, class: gr8 }
   - { id: 2, class: gr8 }
-body:
-  - name:        entry
-    id:          0
-    instructions:
-      - '%0 = COPY %edi'
-      # CHECK: [[@LINE+1]]:25: expected a subregister index after ':'
-      - '%1 = COPY %0 : 42'
-      - '%2 = AND8ri %1, 1, implicit-def %eflags'
-      - '%al = COPY %2'
-      - 'RETQ %al'
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:20: expected a subregister index after ':'
+    %1 = COPY %0 : 42
+    %2 = AND8ri %1, 1, implicit-def %eflags
+    %al = COPY %2
+    RETQ %al
 ...
diff --git a/test/CodeGen/MIR/X86/expected-target-flag-name.mir b/test/CodeGen/MIR/X86/expected-target-flag-name.mir
new file mode 100644
index 000000000000..3d094a11e9f3
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-target-flag-name.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body: |
+  bb.0.entry:
+  ; CHECK: [[@LINE+1]]:46: expected the name of the target flag
+    %rax = MOV64rm %rip, 1, _, target-flags( ) @G, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir b/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir
new file mode 100644
index 000000000000..e8d6afd5333e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-tied-def-after-lparen.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i64 @test(i64 %x) #0 {
+  entry:
+    %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+    ret i64 %asm
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+  ; CHECK: [[@LINE+1]]:70: expected 'tied-def' after '('
+    INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(3)
+    %rax = COPY killed %rdi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir
new file mode 100644
index 000000000000..f99443f1726d
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:60: expected an IR value reference
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from a)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir b/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir
new file mode 100644
index 000000000000..da0d1e166a1c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/expected-virtual-register-in-functions-livein.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a) {
+  body:
+    ret i32 %a
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+liveins:
+  # CHECK: [[@LINE+1]]:34: expected a virtual register
+  - { reg: '%edi', virtual-reg: '%edi' }
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %0 = COPY %edi
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/external-symbol-operands.mir b/test/CodeGen/MIR/X86/external-symbol-operands.mir
new file mode 100644
index 000000000000..7e85d946b75a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/external-symbol-operands.mir
@@ -0,0 +1,64 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the external symbol machine
+# operands correctly.
+
+--- |
+  @__stack_chk_guard = external global i8*
+
+  define i32 @test(i32 %n) #0 {
+  entry:
+    %StackGuardSlot = alloca i8*
+    %StackGuard = load i8*, i8** @__stack_chk_guard
+    call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+    %a = alloca [128 x i32], align 16
+    %idxprom = sext i32 %n to i64
+    %arrayidx = getelementptr inbounds [128 x i32], [128 x i32]* %a, i64 0, i64 %idxprom
+    %0 = load i32, i32* %arrayidx, align 4
+    call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+    ret i32 %0
+  }
+
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+  declare void @llvm.stackprotectorcheck(i8**) #1
+
+  attributes #0 = { ssp "stack-protector-buffer-size"="8" }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    successors: %bb.1.entry, %bb.2.entry
+    liveins: %edi
+
+    %rsp = SUB64ri32 %rsp, 520, implicit-def %eflags
+    %rcx = LOAD_STACK_GUARD
+    MOV64mr %rsp, 1, _, 512, _, %rcx
+    %rax = MOVSX64rr32 %edi
+    %eax = MOV32rm %rsp, 4, %rax, 0, _
+    CMP64rm %rcx, %rsp, 1, _, 512, _, implicit-def %eflags
+    JNE_1 %bb.2.entry, implicit %eflags
+
+  bb.1.entry:
+    liveins: %eax
+
+    %rsp = ADD64ri32 %rsp, 520, implicit-def %eflags
+    RETQ %eax
+
+  bb.2.entry:
+    ; CHECK:      CALL64pcrel32 $__stack_chk_fail,
+    ; CHECK-NEXT: CALL64pcrel32 $__stack_chk_fail.09-_,
+    ; CHECK-NEXT: CALL64pcrel32 $"__stack_chk_fail$",
+    ; CHECK-NEXT: CALL64pcrel32 $"$Quoted \09 External symbol \11 ",
+    ; CHECK-NEXT: CALL64pcrel32 $__stack_chk_fail + 2,
+    ; CHECK-NEXT: CALL64pcrel32 $" check stack - 20" - 20,
+    CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp
+    CALL64pcrel32 $__stack_chk_fail.09-_, csr_64, implicit %rsp, implicit-def %rsp
+    CALL64pcrel32 $__stack_chk_fail$, csr_64, implicit %rsp, implicit-def %rsp
+    CALL64pcrel32 $"$Quoted \09 External symbol \11 ", csr_64, implicit %rsp, implicit-def %rsp
+    CALL64pcrel32 $__stack_chk_fail + 2, csr_64, implicit %rsp, implicit-def %rsp
+    CALL64pcrel32 $" check stack - 20" - 20, csr_64, implicit %rsp, implicit-def %rsp
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir b/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir
new file mode 100644
index 000000000000..75d0f8a39c1c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/fixed-stack-memory-operands.mir
@@ -0,0 +1,39 @@
+# RUN: llc -march=x86 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses fixed stack memory operands
+# correctly.
+
+--- |
+
+  define i32 @test(i32 %a) #0 {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name:            test
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       4
+  maxAlignment:    4
+fixedStack:
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true }
+stack:
+  - { id: 0, name: b, offset: -8, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    frame-setup PUSH32r undef %eax, implicit-def %esp, implicit %esp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 8
+  ; CHECK: name: test
+  ; CHECK: %eax = MOV32rm %esp, 1, _, 8, _ :: (load 4 from %fixed-stack.0, align 16)
+    %eax = MOV32rm %esp, 1, _, 8, _ :: (load 4 from %fixed-stack.0, align 16)
+    MOV32mr %esp, 1, _, 0, _, %eax :: (store 4 into %ir.b)
+    %edx = POP32r implicit-def %esp, implicit %esp
+    RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir b/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir
new file mode 100644
index 000000000000..c4c57a1d2443
--- /dev/null
+++ b/test/CodeGen/MIR/X86/fixed-stack-object-redefinition-error.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a, i32 %b) #0 {
+  entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    4
+fixedStack:
+  - { id: 0, offset: 4, size: 4, alignment: 4, isImmutable: true, isAliased: false }
+# CHECK: [[@LINE+1]]:11: redefinition of fixed stack object '%fixed-stack.0'
+  - { id: 0, offset: 0, size: 4, alignment: 16, isImmutable: true, isAliased: false }
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %esp, 1, _, 4, _
+    %eax = ADD32rm killed %eax, %esp, 1, _, 8, _, implicit-def dead %eflags
+    RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/fixed-stack-objects.mir b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
index dcbe6f73a6d0..70e5a7428359 100644
--- a/test/CodeGen/MIR/X86/fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/fixed-stack-objects.mir
@@ -25,11 +25,9 @@ fixedStack:
   - { id: 0, offset: 0, size: 4, alignment: 4, isImmutable: true, isAliased: false }
 stack:
   - { id: 0, offset: -8, size: 4, alignment: 4 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - '%eax = MOV32rm %esp, 1, _, 8, _'
-      - 'MOV32mr %esp, 1, _, 0, _, %eax'
-      - 'RETL %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %esp, 1, _, 8, _
+    MOV32mr %esp, 1, _, 0, _, %eax
+    RETL %eax
 ...
diff --git a/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir b/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir
new file mode 100644
index 000000000000..54fa8ad0b616
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-info-save-restore-points.mir
@@ -0,0 +1,73 @@
+# RUN: llc -march=x86-64 -enable-shrink-wrap=true -start-after shrink-wrap -stop-after shrink-wrap -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the save and restore points in
+# the machine frame info correctly.
+
+--- |
+
+  define i32 @foo(i32 %a, i32 %b) {
+  entry:
+    %tmp = alloca i32, align 4
+    %tmp2 = icmp slt i32 %a, %b
+    br i1 %tmp2, label %true, label %false
+
+  true:
+    store i32 %a, i32* %tmp, align 4
+    %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+    br label %false
+
+  false:
+    %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %entry ]
+    ret i32 %tmp.0
+  }
+
+  declare i32 @doSomething(i32, i32*)
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+  - { reg: '%esi' }
+# CHECK: frameInfo:
+# CHECK:      savePoint: '%bb.2.true'
+# CHECK-NEXT: restorePoint: '%bb.2.true'
+# CHECK: stack
+frameInfo:
+  maxAlignment:  4
+  hasCalls:      true
+  savePoint:     '%bb.2.true'
+  restorePoint:  '%bb.2.true'
+stack:
+  - { id: 0, name: tmp, offset: 0, size: 4, alignment: 4 }
+body: |
+  bb.0:
+    successors: %bb.2.true, %bb.1
+    liveins: %edi, %esi
+
+    %eax = COPY %edi
+    CMP32rr %eax, killed %esi, implicit-def %eflags
+    JL_1 %bb.2.true, implicit killed %eflags
+
+  bb.1:
+    successors: %bb.3.false
+    liveins: %eax
+
+    JMP_1 %bb.3.false
+
+  bb.2.true:
+    successors: %bb.3.false
+    liveins: %eax
+
+    MOV32mr %stack.0.tmp, 1, _, 0, _, killed %eax
+    ADJCALLSTACKDOWN64 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp
+    %rsi = LEA64r %stack.0.tmp, 1, _, 0, _
+    %edi = MOV32r0 implicit-def dead %eflags
+    CALL64pcrel32 @doSomething, csr_64, implicit %rsp, implicit %edi, implicit %rsi, implicit-def %rsp, implicit-def %eax
+    ADJCALLSTACKUP64 0, 0, implicit-def %rsp, implicit-def dead %eflags, implicit %rsp
+
+  bb.3.false:
+    liveins: %eax
+
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/frame-info-stack-references.mir b/test/CodeGen/MIR/X86/frame-info-stack-references.mir
new file mode 100644
index 000000000000..c8fa3bbe226f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-info-stack-references.mir
@@ -0,0 +1,79 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the stack protector stack
+# object reference in the machine frame info correctly.
+
+--- |
+  @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+  @__stack_chk_guard = external global i8*
+
+  define i32 @test() #0 {
+  entry:
+    %StackGuardSlot = alloca i8*
+    %StackGuard = load i8*, i8** @__stack_chk_guard
+    call void @llvm.stackprotector(i8* %StackGuard, i8** %StackGuardSlot)
+    %test = alloca i8*, align 8
+    %a = alloca i8, i64 5
+    store i8* %a, i8** %test, align 8
+    %b = load i8*, i8** %test, align 8
+    %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* %b)
+    call void @llvm.stackprotectorcheck(i8** @__stack_chk_guard)
+    ret i32 %call
+  }
+
+  declare i32 @printf(i8*, ...)
+
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+  declare void @llvm.stackprotectorcheck(i8**) #2
+
+  attributes #0 = { ssp "stack-protector-buffer-size"="5" }
+  attributes #1 = { nounwind }
+  attributes #2 = { nounwind argmemonly }
+...
+---
+name:            test
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       40
+  maxAlignment:    8
+  adjustsStack:    true
+  hasCalls:        true
+# CHECK-LABEL: name: test
+# CHECK: frameInfo
+# CHECK: stackProtector: '%stack.0.StackGuardSlot'
+  stackProtector:  '%stack.0.StackGuardSlot'
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16,
+      callee-saved-register: '%rbx' }
+stack:
+  - { id: 0, name: StackGuardSlot, offset: -24, size: 8, alignment: 8 }
+  - { id: 1, name: test, offset: -40, size: 8, alignment: 8 }
+  - { id: 2, name: a, offset: -29, size: 5, alignment: 1 }
+body: |
+  bb.0.entry:
+    successors: %bb.1.entry, %bb.2.entry
+    liveins: %rbx, %rbx
+
+    frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    %rsp = frame-setup SUB64ri8 %rsp, 32, implicit-def dead %eflags
+    %rbx = LOAD_STACK_GUARD :: (invariant load 8 from @__stack_chk_guard)
+    MOV64mr %rsp, 1, _, 24, _, %rbx
+    %rsi = LEA64r %rsp, 1, _, 19, _
+    MOV64mr %rsp, 1, _, 8, _, %rsi
+    %rdi = LEA64r %rip, 1, _, @.str, _
+    dead %eax = MOV32r0 implicit-def dead %eflags, implicit-def %al
+    CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %rsi, implicit %al, implicit-def %rsp, implicit-def %eax
+    CMP64rm killed %rbx, %rsp, 1, _, 24, _, implicit-def %eflags
+    JNE_1 %bb.2.entry, implicit %eflags
+
+  bb.1.entry:
+    liveins: %eax
+
+    %rsp = ADD64ri8 %rsp, 32, implicit-def dead %eflags
+    %rbx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+
+  bb.2.entry:
+    CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp
+...
diff --git a/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir b/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir
new file mode 100644
index 000000000000..87c1fc68046e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/frame-setup-instruction-flag.mir
@@ -0,0 +1,35 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the frame setup instruction flag.
+
+--- |
+
+  define i32 @compute(i32 %a) {
+  body:
+    %c = mul i32 %a, 11
+    ret i32 %c
+  }
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %b = call i32 @compute(i32 %a)
+    ret i32 %b
+  }
+
+...
+---
+name:            compute
+body: |
+  bb.0.body:
+    %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+    RETQ %eax
+...
+---
+name:            foo
+body: |
+  bb.0.entry:
+    ; CHECK: frame-setup PUSH64r %rax
+    frame-setup PUSH64r %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/function-liveins.mir b/test/CodeGen/MIR/X86/function-liveins.mir
new file mode 100644
index 000000000000..95f8786b47a8
--- /dev/null
+++ b/test/CodeGen/MIR/X86/function-liveins.mir
@@ -0,0 +1,37 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses machine function's liveins
+# correctly.
+
+--- |
+
+  define i32 @test(i32 %a, i32 %b) {
+  body:
+    %c = add i32 %a, %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+  - { id: 2, class: gr32 }
+# CHECK: liveins:
+# CHECK-NEXT: - { reg: '%edi', virtual-reg: '%0' }
+# CHECK-NEXT: - { reg: '%esi', virtual-reg: '%1' }
+liveins:
+  - { reg: '%edi', virtual-reg: '%0' }
+  - { reg: '%esi', virtual-reg: '%1' }
+body: |
+  bb.0.body:
+    liveins: %edi, %esi
+
+    %1 = COPY %esi
+    %0 = COPY %edi
+    %2 = ADD32rr %0, %1, implicit-def dead %eflags
+    %eax = COPY %2
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/global-value-operands.mir b/test/CodeGen/MIR/X86/global-value-operands.mir
index 3ea729b00554..394aa397aef4 100644
--- a/test/CodeGen/MIR/X86/global-value-operands.mir
+++ b/test/CodeGen/MIR/X86/global-value-operands.mir
@@ -20,30 +20,121 @@
     ret i32 %b
   }
 
+  @.$0  = external global i32
+  @-_-  = external global i32
+  @_-_a = external global i32
+  @$.-B = external global i32
+
+  define i32 @test() {
+  entry:
+    %a = load i32, i32* @.$0
+    store i32 %a, i32* @-_-
+    %b = load i32, i32* @_-_a
+    store i32 %b, i32* @$.-B
+    ret i32 %b
+  }
+
+  @"\01Hello@$%09 \\ World," = external global i32
+
+  define i32 @test2() {
+  entry:
+    %a = load i32, i32* @"\01Hello@$%09 \\ World,"
+    ret i32 %a
+  }
+
+  define i32 @test3() {
+  entry:
+    %a = load i32, i32* @.$0
+    store i32 %a, i32* @-_-
+    %b = load i32, i32* @_-_a
+    store i32 %b, i32* @$.-B
+    ret i32 %b
+  }
+
+  define i32 @tf() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
 ...
 ---
 # CHECK: name: inc
 name: inc
-body:
-  - id: 0
-    name: entry
-    instructions:
-      # CHECK: - '%rax = MOV64rm %rip, 1, _, @G, _'
-      - '%rax = MOV64rm %rip, 1, _, @G, _'
-      - '%eax = MOV32rm %rax, 1, _, 0, _'
-      - '%eax = INC32r %eax, implicit-def %eflags'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: %rax = MOV64rm %rip, 1, _, @G, _
+    %rax = MOV64rm %rip, 1, _, @G, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax, implicit-def %eflags
+    RETQ %eax
 ...
 ---
 # CHECK: name: inc2
 name: inc2
-body:
-  - id: 0
-    name: entry
-    instructions:
-      # CHECK: - '%rax = MOV64rm %rip, 1, _, @0, _'
-      - '%rax = MOV64rm %rip, 1, _, @0, _'
-      - '%eax = MOV32rm %rax, 1, _, 0, _'
-      - '%eax = INC32r %eax, implicit-def %eflags'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: %rax = MOV64rm %rip, 1, _, @0, _
+    %rax = MOV64rm %rip, 1, _, @0, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax, implicit-def %eflags
+    RETQ %eax
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    ; CHECK: , @".$0",
+    ; CHECK: , @-_-,
+    ; CHECK: , @_-_a,
+    ; CHECK: , @"$.-B",
+    %rax = MOV64rm %rip, 1, _, @.$0, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %rcx = MOV64rm %rip, 1, _, @-_-, _
+    MOV32mr killed %rcx, 1, _, 0, _, killed %eax
+    %rax = MOV64rm %rip, 1, _, @_-_a, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %rcx = MOV64rm %rip, 1, _, @$.-B, _
+    MOV32mr killed %rcx, 1, _, 0, _, %eax
+    RETQ %eax
+...
+---
+name:            test2
+body: |
+  bb.0.entry:
+    ; CHECK: , @"\01Hello@$%09 \5C World,",
+    %rax = MOV64rm %rip, 1, _, @"\01Hello@$%09 \\ World,", _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    RETQ %eax
+...
+---
+# CHECK: name: test3
+name:            test3
+body: |
+  bb.0.entry:
+    ; CHECK: , @".$0",
+    ; CHECK: , @-_-,
+    ; CHECK: , @_-_a + 4,
+    ; CHECK: , @"$.-B" - 8,
+    %rax = MOV64rm %rip, 1, _, @.$0 + 0, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %rcx = MOV64rm %rip, 1, _, @-_- - 0, _
+    MOV32mr killed %rcx, 1, _, 0, _, killed %eax
+    %rax = MOV64rm %rip, 1, _, @_-_a + 4, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %rcx = MOV64rm %rip, 1, _, @$.-B - 8, _
+    MOV32mr killed %rcx, 1, _, 0, _, %eax
+    RETQ %eax
+...
+---
+# CHECK: name: tf
+name: tf
+body: |
+  bb.0.entry:
+  ; CHECK: %rax = MOV64rm %rip, 1, _, target-flags(x86-gotpcrel) @G, _
+    %rax = MOV64rm %rip, 1, _, target-flags(x86-gotpcrel) @G, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax, implicit-def %eflags
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/immediate-operands.mir b/test/CodeGen/MIR/X86/immediate-operands.mir
index 5d4956f539dd..34bd0fa14904 100644
--- a/test/CodeGen/MIR/X86/immediate-operands.mir
+++ b/test/CodeGen/MIR/X86/immediate-operands.mir
@@ -17,24 +17,20 @@
 ---
 # CHECK: name: foo
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK:      - '%eax = MOV32ri 42'
-     # CHECK-NEXT: - 'RETQ %eax'
-     - '%eax = MOV32ri 42'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK:      %eax = MOV32ri 42
+    ; CHECK-NEXT: RETQ %eax
+    %eax = MOV32ri 42
+    RETQ %eax
 ...
 ---
 # CHECK: name: bar
 name:            bar
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK:      - '%eax = MOV32ri -11'
-     # CHECK-NEXT: - 'RETQ %eax'
-     - '%eax = MOV32ri -11'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK:      %eax = MOV32ri -11
+    ; CHECK-NEXT: RETQ %eax
+    %eax = MOV32ri -11
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/implicit-register-flag.mir b/test/CodeGen/MIR/X86/implicit-register-flag.mir
index 9c6882d27bdc..b0a15ed93a8f 100644
--- a/test/CodeGen/MIR/X86/implicit-register-flag.mir
+++ b/test/CodeGen/MIR/X86/implicit-register-flag.mir
@@ -16,26 +16,53 @@
     ret i32 %a
   }
 
+  define i1 @implicit_subregister1() {
+  entry:
+    ret i1 false
+  }
+
+  define i16 @implicit_subregister2() {
+  entry:
+    ret i16 0
+  }
+
 ...
 ---
 name:            foo
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      # CHECK:      - 'CMP32ri8 %edi, 10, implicit-def %eflags'
-      # CHECK-NEXT: - 'JG_1 %bb.2.exit, implicit %eflags'
-      - 'CMP32ri8 %edi, 10, implicit-def %eflags'
-      - 'JG_1 %bb.2.exit, implicit %eflags'
-  - id:          1
-    name:        less
-    instructions:
-      # CHECK: - '%eax = MOV32r0 implicit-def %eflags'
-      - '%eax = MOV32r0 implicit-def %eflags'
-      - 'RETQ %eax'
-  - id:          2
-    name:        exit
-    instructions:
-      - '%eax = COPY %edi'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+    ; CHECK:      CMP32ri8 %edi, 10, implicit-def %eflags
+    ; CHECK-NEXT: JG_1 %bb.2.exit, implicit %eflags
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit %eflags
+
+  bb.1.less:
+    ; CHECK: %eax = MOV32r0 implicit-def %eflags
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.2.exit:
+    %eax = COPY %edi
+    RETQ %eax
+...
+---
+name:            implicit_subregister1
+body: |
+  bb.0.entry:
+  ; Verify that the implicit register verifier won't report an error on implicit
+  ; subregisters.
+  ; CHECK-LABEL: name: implicit_subregister1
+  ; CHECK: dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+    dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
+    RETQ killed %al
+...
+---
+name:            implicit_subregister2
+body: |
+  bb.0.entry:
+  ; CHECK-LABEL: name: implicit_subregister2
+  ; CHECK: dead %r15 = XOR64rr undef %r15, undef %r15, implicit-def dead %eflags, implicit-def %r15w
+    dead %r15 = XOR64rr undef %r15, undef %r15, implicit-def dead %eflags, implicit-def %r15w
+    RETQ killed %r15w
 ...
diff --git a/test/CodeGen/MIR/X86/inline-asm-registers.mir b/test/CodeGen/MIR/X86/inline-asm-registers.mir
new file mode 100644
index 000000000000..3fd565891091
--- /dev/null
+++ b/test/CodeGen/MIR/X86/inline-asm-registers.mir
@@ -0,0 +1,54 @@
+# RUN: llc -march=x86-64 -start-after block-placement -stop-after block-placement -o /dev/null %s | FileCheck %s
+
+--- |
+  define i64 @test(i64 %x, i64 %y) #0 {
+  entry:
+    %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) #0
+    %x1 = extractvalue { i64, i64 } %x0, 0
+    ret i64 %x1
+  }
+
+  define i64 @test2(i64 %x, i64 %y) #0 {
+  entry:
+    %x0 = call { i64, i64 } asm "foo", "=r,=r,1,0,~{dirflag},~{fpsr},~{flags}"(i64 %x, i64 %y) #0
+    %x1 = extractvalue { i64, i64 } %x0, 0
+    ret i64 %x1
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%rsi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi, %rsi
+
+  ; CHECK-LABEL: name: test
+  ; CHECK: INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi,
+    INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi, 2147483657, killed %rsi, 12, implicit-def dead early-clobber %eflags
+    %rax = MOV64rr killed %rsi
+    RETQ killed %rax
+...
+---
+name:            test2
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%rsi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi, %rsi
+
+  ; Verify that the register ties are preserved.
+  ; CHECK-LABEL: name: test2
+  ; CHECK: INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi(tied-def 5), 2147483657, killed %rsi(tied-def 3), 12, implicit-def dead early-clobber %eflags
+    INLINEASM $foo, 0, 2818058, def %rsi, 2818058, def dead %rdi, 2147549193, killed %rdi(tied-def 5), 2147483657, killed %rsi(tied-def 3), 12, implicit-def dead early-clobber %eflags
+    %rax = MOV64rr killed %rsi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/instructions-debug-location.mir b/test/CodeGen/MIR/X86/instructions-debug-location.mir
new file mode 100644
index 000000000000..ea2cdbf7cb2f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/instructions-debug-location.mir
@@ -0,0 +1,98 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the machine instruction's
+# debug location metadata correctly.
+
+--- |
+
+  define i32 @test(i32 %x) #0 !dbg !4 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  define i32 @test_typed_immediates(i32 %x) #0 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+  attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+  attributes #1 = { nounwind readnone }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!9, !10}
+  !llvm.ident = !{!11}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "test.ll", directory: "")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+  !5 = !DIFile(filename: "test.c", directory: "")
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{!8, !8}
+  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !9 = !{i32 2, !"Dwarf Version", i32 4}
+  !10 = !{i32 2, !"Debug Info Version", i32 3}
+  !11 = !{!"clang version 3.7.0"}
+  !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 4, scope: !4)
+  !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    liveins: %edi
+    ; CHECK: DBG_VALUE debug-use _, 0, !12, !13, debug-location !14
+    ; CHECK: %eax = COPY %0, debug-location !15
+    ; CHECK: RETQ %eax, debug-location !15
+    %0 = COPY %edi
+    DBG_VALUE debug-use _, 0, !12, !13, debug-location !14
+    MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0, debug-location !15
+    RETQ %eax, debug-location !15
+...
+---
+name:            test_typed_immediates
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    %0 = COPY %edi
+  ; CHECK:      DBG_VALUE _, i32 0, !12, !13
+  ; CHECK-NEXT: DBG_VALUE _, i64 -22, !12, !13
+  ; CHECK-NEXT: DBG_VALUE _, i128 123492148938512984928424384934328985928, !12, !13
+    DBG_VALUE _, i32 0, !12, !13
+    DBG_VALUE _, i64 -22, !12, !13
+    DBG_VALUE _, i128 123492148938512984928424384934328985928, !12, !13
+    MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir b/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir
new file mode 100644
index 000000000000..afd6c78546ce
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-constant-pool-item.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when parsing an invalid
+# constant pool item operand.
+
+--- |
+
+  define double @test(double %a, float %b) {
+  entry:
+    %c = fadd double %a, 3.250000e+00
+    ret double %c
+  }
+
+...
+---
+name:            test
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:47: use of undefined constant '%const.10'
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.10, _
+    RETQ %xmm0
+...
+
diff --git a/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir b/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir
new file mode 100644
index 000000000000..a6c2e509da0c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-metadata-node-type.mir
@@ -0,0 +1,53 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+  define void @foo() #1 {
+  entry:
+    %x.i = alloca i8, align 1
+    %y.i = alloca [256 x i8], align 16
+    %0 = bitcast [256 x i8]* %y.i to i8*
+    br label %for.body
+
+  for.body:
+    %1 = bitcast [256 x i8]* %y.i to i8*
+    call void @llvm.dbg.declare(metadata i8* %0, metadata !4, metadata !7) #3, !dbg !8
+    br label %for.body
+  }
+
+  attributes #0 = { nounwind readnone }
+  attributes #1 = { nounwind ssp uwtable }
+  attributes #3 = { nounwind }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!3}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2)
+  !1 = !DIFile(filename: "t.c", directory: "")
+  !2 = !{}
+  !3 = !{i32 1, !"Debug Info Version", i32 3}
+  !4 = !DILocalVariable(name: "x", scope: !5, file: !1, line: 16, type: !6)
+  !5 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+  !6 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+  !7 = !DIExpression()
+  !8 = !DILocation(line: 0, scope: !5)
+...
+---
+name:            foo
+isSSA:           true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    16
+stack:
+# CHECK: [[@LINE+1]]:75: expected a reference to a 'DILocalVariable' metadata node
+  - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!8',
+      di-expression: '!7', di-location: '!8' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.for.body
+  bb.1.for.body:
+    successors: %bb.1.for.body
+
+    DBG_VALUE %stack.0.y.i, 0, !4, !7, debug-location !8
+    JMP_1 %bb.1.for.body
+...
diff --git a/test/CodeGen/MIR/X86/invalid-target-flag-name.mir b/test/CodeGen/MIR/X86/invalid-target-flag-name.mir
new file mode 100644
index 000000000000..313c5bdafed8
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-target-flag-name.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body: |
+  bb.0.entry:
+  ; CHECK: [[@LINE+1]]:45: use of undefined target flag 'x86-test'
+    %rax = MOV64rm %rip, 1, _, target-flags(x86-test) @G, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir b/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir
new file mode 100644
index 000000000000..00436adca484
--- /dev/null
+++ b/test/CodeGen/MIR/X86/invalid-tied-def-index-error.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i64 @test(i64 %x) #0 {
+  entry:
+    %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+    ret i64 %asm
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+  ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '300'; instruction has only 6 operands
+    INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 300)
+    %rax = COPY killed %rdi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/jump-table-info.mir b/test/CodeGen/MIR/X86/jump-table-info.mir
new file mode 100644
index 000000000000..a4e6f6a1728c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/jump-table-info.mir
@@ -0,0 +1,150 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the jump table info and jump
+# table operands correctly.
+
+--- |
+
+  define i32 @test_jumptable(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+
+  def:
+    ret i32 0
+
+  lbl1:
+    ret i32 1
+
+  lbl2:
+    ret i32 2
+
+  lbl3:
+    ret i32 4
+
+  lbl4:
+    ret i32 8
+  }
+
+  define i32 @test_jumptable2(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+
+  def:
+    ret i32 0
+
+  lbl1:
+    ret i32 1
+
+  lbl2:
+    ret i32 2
+
+  lbl3:
+    ret i32 4
+
+  lbl4:
+    ret i32 8
+  }
+
+...
+---
+name:            test_jumptable
+# CHECK:      jumpTable:
+# CHECK-NEXT: kind: label-difference32
+# CHECK-NEXT: entries:
+# CHECK-NEXT: - id: 0
+# CHECK-NEXT: blocks: [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+# CHECK_NEXT: body:
+jumpTable:
+  kind:          label-difference32
+  entries:
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+    successors: %bb.2.def, %bb.1.entry
+
+    %eax = MOV32rr %edi, implicit-def %rax
+    CMP32ri8 %edi, 3, implicit-def %eflags
+    JA_1 %bb.2.def, implicit %eflags
+
+  bb.1.entry:
+    successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+    ; CHECK: %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+    %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+    %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+    %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+    JMP64r %rax
+
+  bb.2.def:
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.3.lbl1:
+    %eax = MOV32ri 1
+    RETQ %eax
+
+  bb.4.lbl2:
+    %eax = MOV32ri 2
+    RETQ %eax
+
+  bb.5.lbl3:
+    %eax = MOV32ri 4
+    RETQ %eax
+
+  bb.6.lbl4:
+    %eax = MOV32ri 8
+    RETQ %eax
+...
+---
+name:            test_jumptable2
+jumpTable:
+  kind:          label-difference32
+  entries:
+    - id:        1
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+    successors: %bb.2.def, %bb.1.entry
+
+    %eax = MOV32rr %edi, implicit-def %rax
+    CMP32ri8 %edi, 3, implicit-def %eflags
+    JA_1 %bb.2.def, implicit %eflags
+
+  bb.1.entry:
+    successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+    ; Verify that the printer will use an id of 0 for this jump table:
+    ; CHECK: %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+    %rcx = LEA64r %rip, 1, _, %jump-table.1, _
+    %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+    %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+    JMP64r %rax
+
+  bb.2.def:
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.3.lbl1:
+    %eax = MOV32ri 1
+    RETQ %eax
+
+  bb.4.lbl2:
+    %eax = MOV32ri 2
+    RETQ %eax
+
+  bb.5.lbl3:
+    %eax = MOV32ri 4
+    RETQ %eax
+
+  bb.6.lbl4:
+    %eax = MOV32ri 8
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir b/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir
new file mode 100644
index 000000000000..d4ab11f40787
--- /dev/null
+++ b/test/CodeGen/MIR/X86/jump-table-redefinition-error.mir
@@ -0,0 +1,76 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test_jumptable(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+
+  def:
+    ret i32 0
+
+  lbl1:
+    ret i32 1
+
+  lbl2:
+    ret i32 2
+
+  lbl3:
+    ret i32 4
+
+  lbl4:
+    ret i32 8
+  }
+
+...
+---
+name:            test_jumptable
+jumpTable:
+  kind:          label-difference32
+  entries:
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+# CHECK: [[@LINE+1]]:18: redefinition of jump table entry '%jump-table.0'
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+    successors: %bb.2.def, %bb.1.entry
+
+    %eax = MOV32rr %edi, implicit-def %rax
+    CMP32ri8 %edi, 3, implicit-def %eflags
+    JA_1 %bb.2.def, implicit %eflags
+
+  bb.1.entry:
+    successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+
+    %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+    %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+    %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+    JMP64r %rax
+
+  bb.2.def:
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.3.lbl1:
+    %eax = MOV32ri 1
+    RETQ %eax
+
+  bb.4.lbl2:
+    %eax = MOV32ri 2
+    RETQ %eax
+
+  bb.5.lbl3:
+    %eax = MOV32ri 4
+    RETQ %eax
+
+  bb.6.lbl4:
+    %eax = MOV32ri 8
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/killed-register-flag.mir b/test/CodeGen/MIR/X86/killed-register-flag.mir
index d654a9d2fa56..9e8f3ba3b368 100644
--- a/test/CodeGen/MIR/X86/killed-register-flag.mir
+++ b/test/CodeGen/MIR/X86/killed-register-flag.mir
@@ -19,24 +19,22 @@
 ...
 ---
 name:            foo
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'CMP32ri8 %edi, 10, implicit-def %eflags'
-      - 'JG_1 %bb.2.exit, implicit %eflags'
-  - id:          1
-    name:        less
-    instructions:
-      # CHECK:      - '%eax = MOV32r0
-      # CHECK-NEXT: - 'RETQ killed %eax
-      - '%eax = MOV32r0 implicit-def %eflags'
-      - 'RETQ killed %eax'
-  - id:          2
-    name:        exit
-    instructions:
-      # CHECK:      - '%eax = COPY killed %edi
-      # CHECK-NEXT: - 'RETQ killed %eax
-      - '%eax = COPY killed %edi'
-      - 'RETQ killed %eax'
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit %eflags
+
+  bb.1.less:
+    ; CHECK:      %eax = MOV32r0
+    ; CHECK-NEXT: RETQ killed %eax
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    ; CHECK:      %eax = COPY killed %edi
+    ; CHECK-NEXT: RETQ killed %eax
+    %eax = COPY killed %edi
+    RETQ killed %eax
 ...
diff --git a/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir b/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir
new file mode 100644
index 000000000000..93ce30abec7c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-cfi-offset-number-error.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define void @test() {
+  entry:
+    %tmp = alloca [4168 x i8], align 4
+    ret void
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       4040
+stack:
+  - { id: 0, name: tmp, offset: -4176, size: 4168, alignment: 4 }
+body: |
+  bb.0.entry:
+    %rsp = SUB64ri32 %rsp, 4040, implicit-def dead %eflags
+    ; CHECK: [[@LINE+1]]:41: expected a 32 bit integer (the cfi offset is too large)
+    CFI_INSTRUCTION .cfi_def_cfa_offset 123456789123456
+    %rsp = ADD64ri32 %rsp, 4040, implicit-def dead %eflags
+    RETQ
+...
+
diff --git a/test/CodeGen/MIR/X86/large-immediate-operand-error.mir b/test/CodeGen/MIR/X86/large-immediate-operand-error.mir
new file mode 100644
index 000000000000..f815c63e18e9
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-immediate-operand-error.mir
@@ -0,0 +1,18 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @foo() {
+  entry:
+    ret i32 42
+  }
+
+...
+---
+name:            foo
+body: |
+  bb.0.entry:
+  ; CHECK: [[@LINE+1]]:20: integer literal is too large to be an immediate operand
+    %eax = MOV32ri 12346127502983478823754212949184914
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/large-index-number-error.mir b/test/CodeGen/MIR/X86/large-index-number-error.mir
index fdb25c907f52..272cd685b381 100644
--- a/test/CodeGen/MIR/X86/large-index-number-error.mir
+++ b/test/CodeGen/MIR/X86/large-index-number-error.mir
@@ -18,18 +18,16 @@
 ...
 ---
 name:            foo
-body:
- - id: 0
-   name: entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     # CHECK: [[@LINE+1]]:14: expected 32-bit integer (too large)
-     - 'JG_1 %bb.123456789123456, implicit %eflags'
- - id: 1
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    ; CHECK: [[@LINE+1]]:10: expected 32-bit integer (too large)
+    JG_1 %bb.123456789123456, implicit %eflags
+
+  bb.1:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/large-offset-number-error.mir b/test/CodeGen/MIR/X86/large-offset-number-error.mir
new file mode 100644
index 000000000000..5463cdbce444
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-offset-number-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:37: expected 64-bit integer (too large)
+    %rax = MOV64rm %rip, 1, _, @G + 123456789123456789123456789, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax implicit-def %eflags
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir b/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir
new file mode 100644
index 000000000000..c570f0992a3f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/large-size-in-memory-operand-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:53: expected 64-bit integer (too large)
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 12345678912345678924218574857 from %ir.a)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/liveout-register-mask.mir b/test/CodeGen/MIR/X86/liveout-register-mask.mir
new file mode 100644
index 000000000000..7ded7287060e
--- /dev/null
+++ b/test/CodeGen/MIR/X86/liveout-register-mask.mir
@@ -0,0 +1,42 @@
+# RUN: llc -march=x86-64 -start-after stackmap-liveness -stop-after stackmap-liveness -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the liveout register mask
+# machine operands correctly.
+
+--- |
+
+  define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+  entry:
+    %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+    ret void
+  }
+
+  declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
+...
+---
+name:            small_patchpoint_codegen
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%rsi' }
+frameInfo:
+  hasPatchPoint: true
+  stackSize:     8
+  adjustsStack:  true
+  hasCalls:      true
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    liveins: %rdi, %rsi, %rbp
+
+    frame-setup PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    CFI_INSTRUCTION .cfi_offset %rbp, -16
+    %rbp = frame-setup MOV64rr %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+  ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, liveout(%esp, %rsp, %sp, %spl),
+    PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, liveout(%esp, %rsp, %sp, %spl), implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+    %rbp = POP64r implicit-def %rsp, implicit %rsp
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/machine-basic-block-operands.mir b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
index 607acb5f273e..0d7a9f8ef34c 100644
--- a/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
+++ b/test/CodeGen/MIR/X86/machine-basic-block-operands.mir
@@ -33,43 +33,41 @@
 ---
 # CHECK: name: foo
 name:            foo
-body:
- # CHECK: name: entry
- - id:              0
-   name:            entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     # CHECK:      - 'CMP32ri8 %eax, 10
-     # CHECK-NEXT: - 'JG_1 %bb.2.exit
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     - 'JG_1 %bb.2.exit, implicit %eflags'
- # CHECK: name: less
- - id:              1
-   name:            less
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id:              2
-   name:            exit
-   instructions:
-     - 'RETQ %eax'
+body: |
+  ; CHECK: bb.0.entry
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    ; CHECK:      CMP32ri8 %eax, 10
+    ; CHECK-NEXT: JG_1 %bb.2.exit
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit %eflags
+  ; CHECK: bb.1.less:
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.exit:
+    RETQ %eax
 ...
 ---
 # CHECK: name: bar
 name:            bar
-body:
- # CHECK: name: entry
- - id: 0
-   name: entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     # CHECK:      - 'CMP32ri8 %eax, 10
-     # CHECK-NEXT: - 'JG_1 %bb.2
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     - 'JG_1 %bb.3, implicit %eflags'
- - id: 1
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id: 3
-   instructions:
-     - 'RETQ %eax'
+body: |
+  ; CHECK: bb.0.entry:
+  bb.0.entry:
+    successors: %bb.1, %bb.3
+
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    ; CHECK:      CMP32ri8 %eax, 10
+    ; CHECK-NEXT: JG_1 %bb.2
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    JG_1 %bb.3, implicit %eflags
+
+  bb.1:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.3:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/machine-instructions.mir b/test/CodeGen/MIR/X86/machine-instructions.mir
index 08f3d76486b1..0e46d01e0bd1 100644
--- a/test/CodeGen/MIR/X86/machine-instructions.mir
+++ b/test/CodeGen/MIR/X86/machine-instructions.mir
@@ -14,12 +14,10 @@
 ---
 # CHECK: name: inc
 name:            inc
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK:      - MOV32rr
-     # CHECK-NEXT: - RETQ
-     - MOV32rr
-     - ' RETQ '
+body: |
+  bb.0.entry:
+    ; CHECK:      MOV32rr
+    ; CHECK-NEXT: RETQ
+    %eax = MOV32rr %eax
+     RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/machine-verifier.mir b/test/CodeGen/MIR/X86/machine-verifier.mir
new file mode 100644
index 000000000000..a7413d4d03bc
--- /dev/null
+++ b/test/CodeGen/MIR/X86/machine-verifier.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser runs the machine verifier after parsing.
+
+--- |
+
+  define i32 @inc(i32 %a) {
+  entry:
+    ret i32 %a
+  }
+
+...
+---
+name:            inc
+body: |
+  bb.0.entry:
+    liveins: %edi
+   ; CHECK: *** Bad machine code: Too few operands ***
+   ; CHECK: instruction: COPY
+   ; CHECK: 2 operands expected, but 0 given.
+    COPY
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/memory-operands.mir b/test/CodeGen/MIR/X86/memory-operands.mir
new file mode 100644
index 000000000000..3c9463d2f313
--- /dev/null
+++ b/test/CodeGen/MIR/X86/memory-operands.mir
@@ -0,0 +1,508 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the machine memory operands
+# correctly.
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    store i32 42, i32* %a
+    ret i32 %b
+  }
+
+  define void @test2(i32* %"a value") {
+  entry2:
+    %b = load i32, i32* %"a value"
+    %c = add i32 %b, 1
+    store i32 %c, i32* %"a value"
+    ret void
+  }
+
+  define void @test3(i32*) {
+  entry3:
+    %1 = alloca i32
+    %b = load i32, i32* %0
+    %c = add i32 %b, 1
+    store i32 %c, i32* %1
+    ret void
+  }
+
+  define i32 @volatile_inc(i32* %x) {
+  entry:
+    %0 = load volatile i32, i32* %x
+    %1 = add i32 %0, 1
+    store volatile i32 %1, i32* %x
+    ret i32 %1
+  }
+
+  define void @non_temporal_store(i32* %a, i32 %b) {
+  entry:
+    store i32 %b, i32* %a, align 16, !nontemporal !0
+    ret void
+  }
+
+  !0 = !{i32 1}
+
+  define i32 @invariant_load(i32* %x) {
+  entry:
+    %v = load i32, i32* %x, !invariant.load !1
+    ret i32 %v
+  }
+
+  !1 = !{}
+
+  define void @memory_offset(<8 x float>* %vec) {
+  entry:
+    %v = load <8 x float>, <8 x float>* %vec
+    %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+    store <8 x float> %v2, <8 x float>* %vec
+    ret void
+  }
+
+  define void @memory_alignment(<8 x float>* %vec) {
+  entry:
+    %v = load <8 x float>, <8 x float>* %vec
+    %v2 = insertelement <8 x float> %v, float 0.0, i32 4
+    store <8 x float> %v2, <8 x float>* %vec
+    ret void
+  }
+
+  define double @constant_pool_psv(double %a) {
+  entry:
+    %b = fadd double %a, 3.250000e+00
+    ret double %b
+  }
+
+  declare x86_fp80 @cosl(x86_fp80) #0
+
+  define x86_fp80 @stack_psv(x86_fp80 %x) {
+  entry:
+    %y = call x86_fp80 @cosl(x86_fp80 %x) #0
+    ret x86_fp80 %y
+  }
+
+  attributes #0 = { readonly }
+
+  @G = external global i32
+
+  define i32 @got_psv() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+  @0 = external global i32
+
+  define i32 @global_value() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    %c = load i32, i32* @0
+    %d = add i32 %b, %c
+    ret i32 %d
+  }
+
+  define i32 @jumptable_psv(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+  def:
+    ret i32 0
+  lbl1:
+    ret i32 1
+  lbl2:
+    ret i32 2
+  lbl3:
+    ret i32 4
+  lbl4:
+    ret i32 8
+  }
+
+  %struct.XXH_state64_t = type { i32, i32, i64, i64, i64 }
+
+  @a = common global i32 0, align 4
+
+  define i32 @tbaa_metadata() {
+  entry:
+    %0 = load i32, i32* @a, align 4, !tbaa !2
+    %1 = inttoptr i32 %0 to %struct.XXH_state64_t*
+    %total_len2 = bitcast %struct.XXH_state64_t* %1 to i32*
+    %2 = load i32, i32* %total_len2, align 4, !tbaa !6
+    ret i32 %2
+  }
+
+  !2 = !{!3, !3, i64 0}
+  !3 = !{!"int", !4, i64 0}
+  !4 = !{!"omnipotent char", !5, i64 0}
+  !5 = !{!"Simple C/C++ TBAA"}
+  !6 = !{!7, !3, i64 0}
+  !7 = !{!"XXH_state64_t", !3, i64 0, !3, i64 4, !8, i64 8, !8, i64 16, !8, i64 24}
+  !8 = !{!"long long", !4, i64 0}
+
+  define void @aa_scope(float* nocapture %a, float* nocapture readonly %c) #1 {
+  entry:
+    %0 = load float, float* %c, align 4, !alias.scope !9
+    %arrayidx.i = getelementptr inbounds float, float* %a, i64 5
+    store float %0, float* %arrayidx.i, align 4, !noalias !9
+    %1 = load float, float* %c, align 4
+    %arrayidx = getelementptr inbounds float, float* %a, i64 7
+    store float %1, float* %arrayidx, align 4
+    ret void
+  }
+
+  attributes #1 = { nounwind uwtable }
+
+  !9 = distinct !{!9, !10, !"some scope"}
+  !10 = distinct !{!10, !"some domain"}
+
+  define zeroext i1 @range_metadata(i8* %x) {
+  entry:
+    %0 = load i8, i8* %x, align 1, !range !11
+    %tobool = trunc i8 %0 to i1
+    ret i1 %tobool
+  }
+
+  !11 = !{i8 0, i8 2}
+
+  %st = type { i32, i32 }
+
+  @values = common global [50 x %st] zeroinitializer, align 16
+
+  define void @gep_value(i64 %d) {
+  entry:
+    %conv = trunc i64 %d to i32
+    store i32 %conv, i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0), align 16
+    ret void
+  }
+
+  define i8* @undef_value() {
+  entry:
+    %0 = load i8*, i8** undef, align 8
+    ret i8* %0
+  }
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK:      %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.a)
+  ; CHECK-NEXT: MOV32mi killed %rdi, 1, _, 0, _, 42 :: (store 4 into %ir.a)
+    %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.a)
+    MOV32mi killed %rdi, 1, _, 0, _, 42 :: (store 4 into %ir.a)
+    RETQ %eax
+...
+---
+name:            test2
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry2:
+    liveins: %rdi
+  ; CHECK: INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value")
+    INC32m killed %rdi, 1, _, 0, _, implicit-def dead %eflags :: (store 4 into %ir."a value"), (load 4 from %ir."a value")
+    RETQ
+...
+---
+name:            test3
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+frameInfo:
+  maxAlignment:    4
+stack:
+  - { id: 0, offset: -12, size: 4, alignment: 4 }
+body: |
+  bb.0.entry3:
+    liveins: %rdi
+  ; Verify that the unnamed local values can be serialized.
+  ; CHECK-LABEL: name: test3
+  ; CHECK: %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.0)
+  ; CHECK: MOV32mr %rsp, 1, _, -4, _, killed %eax :: (store 4 into %ir.1)
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.0)
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    MOV32mr %rsp, 1, _, -4, _, killed %eax :: (store 4 into %ir.1)
+    RETQ
+...
+---
+name:            volatile_inc
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+    ; CHECK: name: volatile_inc
+    ; CHECK: %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile load 4 from %ir.x)
+    ; CHECK: MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+    %eax = MOV32rm %rdi, 1, _, 0, _ :: (volatile load 4 from %ir.x)
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    MOV32mr killed %rdi, 1, _, 0, _, %eax :: (volatile store 4 into %ir.x)
+    RETQ %eax
+...
+---
+name:            non_temporal_store
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%esi' }
+body: |
+  bb.0.entry:
+    liveins: %esi, %rdi
+  ; CHECK: name: non_temporal_store
+  ; CHECK: MOVNTImr killed %rdi, 1, _, 0, _, killed %esi :: (non-temporal store 4 into %ir.a)
+    MOVNTImr killed %rdi, 1, _, 0, _, killed %esi :: (non-temporal store 4 into %ir.a)
+    RETQ
+...
+---
+name:            invariant_load
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: name: invariant_load
+  ; CHECK: %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (invariant load 4 from %ir.x)
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (invariant load 4 from %ir.x)
+    RETQ %eax
+...
+---
+name:            memory_offset
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: name: memory_offset
+  ; CHECK:      %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec)
+  ; CHECK-NEXT: %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16)
+  ; CHECK:      MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec)
+  ; CHECK-NEXT: MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16)
+    %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec)
+    %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16)
+    %xmm2 = FsFLD0SS
+    %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+    MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec)
+    MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16)
+    RETQ
+...
+---
+name:            memory_alignment
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: name: memory_alignment
+  ; CHECK:      %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align 32)
+  ; CHECK-NEXT: %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+  ; CHECK:      MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+  ; CHECK-NEXT: MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+    %xmm0 = MOVAPSrm %rdi, 1, _, 0, _ :: (load 16 from %ir.vec, align 32)
+    %xmm1 = MOVAPSrm %rdi, 1, _, 16, _ :: (load 16 from %ir.vec + 16, align 32)
+    %xmm2 = FsFLD0SS
+    %xmm1 = MOVSSrr killed %xmm1, killed %xmm2
+    MOVAPSmr %rdi, 1, _, 0, _, killed %xmm0 :: (store 16 into %ir.vec, align 32)
+    MOVAPSmr killed %rdi, 1, _, 16, _, killed %xmm1 :: (store 16 into %ir.vec + 16, align 32)
+    RETQ
+...
+---
+name:            constant_pool_psv
+tracksRegLiveness: true
+liveins:
+  - { reg: '%xmm0' }
+constants:
+  - id:          0
+    value:       'double 3.250000e+00'
+body: |
+  bb.0.entry:
+    liveins: %xmm0
+  ; CHECK: name: constant_pool_psv
+  ; CHECK:      %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool)
+  ; CHECK-NEXT: %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8)
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool)
+    %xmm0 = ADDSDrm killed %xmm0, %rip, 1, _, %const.0, _ :: (load 8 from constant-pool + 8)
+    RETQ %xmm0
+...
+---
+name:            stack_psv
+tracksRegLiveness: true
+frameInfo:
+  stackSize:       24
+  maxAlignment:    16
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 16
+fixedStack:
+  - { id: 0, offset: 0, size: 10, alignment: 16, isImmutable: true, isAliased: false }
+body: |
+  bb.0.entry:
+    %rsp = frame-setup SUB64ri8 %rsp, 24, implicit-def dead %eflags
+    CFI_INSTRUCTION .cfi_def_cfa_offset 32
+    LD_F80m %rsp, 1, _, 32, _, implicit-def dead %fpsw
+  ; CHECK: name: stack_psv
+  ; CHECK: ST_FP80m %rsp, 1, _, 0, _, implicit-def dead %fpsw :: (store 10 into stack, align 16)
+    ST_FP80m %rsp, 1, _, 0, _, implicit-def dead %fpsw :: (store 10 into stack, align 16)
+    CALL64pcrel32 $cosl, csr_64, implicit %rsp, implicit-def %rsp, implicit-def %fp0
+    %rsp = ADD64ri8 %rsp, 24, implicit-def dead %eflags
+    RETQ
+...
+---
+name:            got_psv
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+  ; CHECK: name: got_psv
+  ; CHECK: %rax = MOV64rm %rip, 1, _, @G, _ :: (load 8 from got)
+    %rax = MOV64rm %rip, 1, _, @G, _ :: (load 8 from got)
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    RETQ %eax
+...
+---
+name:            global_value
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    %rax = MOV64rm %rip, 1, _, @G, _
+  ; CHECK-LABEL: name: global_value
+  ; CHECK: %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @G)
+  ; CHECK: %ecx = MOV32rm killed %rcx, 1, _, 0, _, implicit-def %rcx :: (load 4 from @0)
+    %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @G)
+    %rcx = MOV64rm %rip, 1, _, @0, _
+    %ecx = MOV32rm killed %rcx, 1, _, 0, _, implicit-def %rcx :: (load 4 from @0)
+    %eax = LEA64_32r killed %rax, 1, killed %rcx, 1, _
+    RETQ %eax
+...
+---
+name:            jumptable_psv
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+jumpTable:
+  kind:          label-difference32
+  entries:
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+    successors: %bb.2.def, %bb.1.entry
+    liveins: %edi
+
+    %eax = MOV32rr %edi, implicit-def %rax
+    CMP32ri8 killed %edi, 3, implicit-def %eflags
+    JA_1 %bb.2.def, implicit killed %eflags
+
+  bb.1.entry:
+    successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+    liveins: %rax
+
+    %rcx = LEA64r %rip, 1, _, %jump-table.0, _
+  ; CHECK: name: jumptable_psv
+  ; CHECK: %rax = MOVSX64rm32 %rcx, 4, killed %rax, 0, _ :: (load 4 from jump-table, align 8)
+    %rax = MOVSX64rm32 %rcx, 4, killed %rax, 0, _ :: (load 4 from jump-table, align 8)
+    %rax = ADD64rr killed %rax, killed %rcx, implicit-def dead %eflags
+    JMP64r killed %rax
+
+  bb.2.def:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ %eax
+
+  bb.3.lbl1:
+    %eax = MOV32ri 1
+    RETQ %eax
+
+  bb.4.lbl2:
+    %eax = MOV32ri 2
+    RETQ %eax
+
+  bb.5.lbl3:
+    %eax = MOV32ri 4
+    RETQ %eax
+
+  bb.6.lbl4:
+    %eax = MOV32ri 8
+    RETQ %eax
+...
+---
+name:            tbaa_metadata
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    %rax = MOV64rm %rip, 1, _, @a, _ :: (load 8 from got)
+  ; CHECK-LABEL: name: tbaa_metadata
+  ; CHECK:      %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @a, !tbaa !2)
+  ; CHECK-NEXT: %eax = MOV32rm killed %rax, 1, _, 0, _ :: (load 4 from %ir.total_len2, !tbaa !6)
+    %eax = MOV32rm killed %rax, 1, _, 0, _, implicit-def %rax :: (load 4 from @a, !tbaa !2)
+    %eax = MOV32rm killed %rax, 1, _, 0, _ :: (load 4 from %ir.total_len2, !tbaa !6)
+    RETQ %eax
+...
+---
+name:            aa_scope
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%rsi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi, %rsi
+  ; CHECK-LABEL: name: aa_scope
+  ; CHECK: %xmm0 = MOVSSrm %rsi, 1, _, 0, _ :: (load 4 from %ir.c, !alias.scope !9)
+    %xmm0 = MOVSSrm %rsi, 1, _, 0, _ :: (load 4 from %ir.c, !alias.scope !9)
+  ; CHECK-NEXT: MOVSSmr %rdi, 1, _, 20, _, killed %xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9)
+    MOVSSmr %rdi, 1, _, 20, _, killed %xmm0 :: (store 4 into %ir.arrayidx.i, !noalias !9)
+    %xmm0 = MOVSSrm killed %rsi, 1, _, 0, _ :: (load 4 from %ir.c)
+    MOVSSmr killed %rdi, 1, _, 28, _, killed %xmm0 :: (store 4 into %ir.arrayidx)
+    RETQ
+...
+---
+name:            range_metadata
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK-LABEL: name: range_metadata
+  ; CHECK: %al = MOV8rm killed %rdi, 1, _, 0, _ :: (load 1 from %ir.x, !range !11)
+    %al = MOV8rm killed %rdi, 1, _, 0, _ :: (load 1 from %ir.x, !range !11)
+    RETQ %al
+...
+---
+name:            gep_value
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+    %rax = MOV64rm %rip, 1, _, @values, _ :: (load 8 from got)
+  ; CHECK-LABEL: gep_value
+  ; CHECK: MOV32mr killed %rax, 1, _, 0, _, %edi, implicit killed %rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16)
+    MOV32mr killed %rax, 1, _, 0, _, %edi, implicit killed %rdi :: (store 4 into `i32* getelementptr inbounds ([50 x %st], [50 x %st]* @values, i64 0, i64 0, i32 0)`, align 16)
+    RETQ
+...
+---
+name:            undef_value
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+  ; CHECK-LABEL: name: undef_value
+  ; CHECK: %rax = MOV64rm undef %rax, 1, _, 0, _ :: (load 8 from `i8** undef`)
+    %rax = MOV64rm undef %rax, 1, _, 0, _ :: (load 8 from `i8** undef`)
+    RETQ %rax
+...
diff --git a/test/CodeGen/MIR/X86/metadata-operands.mir b/test/CodeGen/MIR/X86/metadata-operands.mir
new file mode 100644
index 000000000000..89a1e6fcb815
--- /dev/null
+++ b/test/CodeGen/MIR/X86/metadata-operands.mir
@@ -0,0 +1,63 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the metadata machine operands
+# correctly.
+
+--- |
+
+  define i32 @test(i32 %x) #0 !dbg !4 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+  attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+  attributes #1 = { nounwind readnone }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!9, !10}
+  !llvm.ident = !{!11}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "test.ll", directory: "")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+  !5 = !DIFile(filename: "test.c", directory: "")
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{!8, !8}
+  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !9 = !{i32 2, !"Dwarf Version", i32 4}
+  !10 = !{i32 2, !"Debug Info Version", i32 3}
+  !11 = !{!"clang version 3.7.0"}
+  !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 4, scope: !4)
+  !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    liveins: %edi
+    ; CHECK:      %0 = COPY %edi
+    ; CHECK-NEXT: DBG_VALUE _, 0, !12, !13
+    %0 = COPY %edi
+    DBG_VALUE _, 0, !12, ! 13
+    MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/missing-closing-quote.mir b/test/CodeGen/MIR/X86/missing-closing-quote.mir
new file mode 100644
index 000000000000..9f4b369a3df4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/missing-closing-quote.mir
@@ -0,0 +1,22 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @"quoted name" = external global i32
+
+  define i32 @test() {
+  entry:
+    %a = load i32, i32* @"quoted name"
+    ret i32 %a
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:48: end of machine instruction reached before the closing '"'
+    %rax = MOV64rm %rip, 1, _, @"quoted name, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/missing-comma.mir b/test/CodeGen/MIR/X86/missing-comma.mir
index 54c67ac6c911..092995e59c70 100644
--- a/test/CodeGen/MIR/X86/missing-comma.mir
+++ b/test/CodeGen/MIR/X86/missing-comma.mir
@@ -10,12 +10,10 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:29: expected ',' before the next machine operand
-     - '%eax = XOR32rr %eax %eflags'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:25: expected ',' before the next machine operand
+    %eax = XOR32rr %eax %eflags
+    RETQ %eax
 ...
 
diff --git a/test/CodeGen/MIR/X86/missing-implicit-operand.mir b/test/CodeGen/MIR/X86/missing-implicit-operand.mir
index 4d2cd03f4a3d..0135c756e138 100644
--- a/test/CodeGen/MIR/X86/missing-implicit-operand.mir
+++ b/test/CodeGen/MIR/X86/missing-implicit-operand.mir
@@ -21,20 +21,18 @@
 ...
 ---
 name:            foo
-body:
- - id:              0
-   name:            entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-# CHECK: [[@LINE+1]]:24: missing implicit register operand 'implicit %eflags'
-     - 'JG_1 %bb.2.exit'
- - id:              1
-   name:            less
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id:              2
-   name:            exit
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+  ; CHECK: [[@LINE+1]]:20: missing implicit register operand 'implicit %eflags'
+    JG_1 %bb.2.exit
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.exit:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/missing-instruction.mir b/test/CodeGen/MIR/X86/missing-instruction.mir
deleted file mode 100644
index 8d11ab5eaabe..000000000000
--- a/test/CodeGen/MIR/X86/missing-instruction.mir
+++ /dev/null
@@ -1,19 +0,0 @@
-# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
-  define void @foo() {
-  entry:
-    ret void
-  }
-
-...
----
-name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:9: expected a machine instruction
-     - ''
-...
diff --git a/test/CodeGen/MIR/X86/named-registers.mir b/test/CodeGen/MIR/X86/named-registers.mir
index 91ed48568678..e547c326563e 100644
--- a/test/CodeGen/MIR/X86/named-registers.mir
+++ b/test/CodeGen/MIR/X86/named-registers.mir
@@ -12,12 +12,10 @@
 ---
 # CHECK: name: foo
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK:      - '%eax = MOV32r0
-     # CHECK-NEXT: - 'RETQ %eax
-     - '%eax = MOV32r0 implicit-def %eflags'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK:      %eax = MOV32r0
+    ; CHECK-NEXT: RETQ %eax
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/newline-handling.mir b/test/CodeGen/MIR/X86/newline-handling.mir
new file mode 100644
index 000000000000..bce06d540114
--- /dev/null
+++ b/test/CodeGen/MIR/X86/newline-handling.mir
@@ -0,0 +1,109 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+  define i32 @bar(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+# CHECK-LABEL: name: foo
+# CHECK: body: |
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+# CHECK-NEXT: liveins: %edi
+# CHECK:      CMP32ri8 %edi, 10, implicit-def %eflags
+# CHECK-NEXT: JG_1 %bb.2.exit, implicit killed %eflags
+
+# CHECK:      bb.1.less:
+# CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+# CHECK-NEXT: RETQ killed %eax
+
+# CHECK:      bb.2.exit:
+# CHECK-NEXT: liveins: %edi
+# CHECK:      %eax = COPY killed %edi
+# CHECK-NEXT: RETQ killed %eax
+body: |
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+
+    liveins: %edi
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+
+
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+  bb.2.exit:
+
+
+    liveins: %edi
+    %eax = COPY killed %edi
+    RETQ killed %eax
+
+...
+---
+name:            bar
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+# CHECK-LABEL: name: bar
+# CHECK: body: |
+# CHECK-NEXT: bb.0.entry:
+# CHECK-NEXT: successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+# CHECK-NEXT: liveins: %edi
+# CHECK:      CMP32ri8 %edi, 10, implicit-def %eflags
+# CHECK-NEXT: JG_1 %bb.2.exit, implicit killed %eflags
+
+# CHECK:      bb.1.less:
+# CHECK-NEXT: %eax = MOV32r0 implicit-def dead %eflags
+# CHECK-NEXT: RETQ killed %eax
+
+# CHECK:      bb.2.exit:
+# CHECK-NEXT: liveins: %edi
+# CHECK:      %eax = COPY killed %edi
+# CHECK-NEXT: RETQ killed %eax
+body: |
+
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+    liveins: %edi
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+  bb.1.less:  %eax = MOV32r0 implicit-def dead %eflags
+              RETQ killed %eax
+
+  bb.2.exit:  liveins: %edi
+    %eax = COPY killed %edi
+    RETQ killed %eax
+
+...
diff --git a/test/CodeGen/MIR/X86/null-register-operands.mir b/test/CodeGen/MIR/X86/null-register-operands.mir
index 55c0ceb3a60a..5563ef8e8f75 100644
--- a/test/CodeGen/MIR/X86/null-register-operands.mir
+++ b/test/CodeGen/MIR/X86/null-register-operands.mir
@@ -13,12 +13,10 @@
 ---
 # CHECK: name: deref
 name:            deref
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK:      - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     # CHECK-NEXT: - 'RETQ %eax'
-     - '%eax = MOV32rm %rdi, 1, _, 0, %noreg'
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK:      %eax = MOV32rm %rdi, 1, _, 0, _
+    ; CHECK-NEXT: RETQ %eax
+    %eax = MOV32rm %rdi, 1, _, 0, %noreg
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/register-mask-operands.mir b/test/CodeGen/MIR/X86/register-mask-operands.mir
index f4136598ff5c..9fa4e6e3994e 100644
--- a/test/CodeGen/MIR/X86/register-mask-operands.mir
+++ b/test/CodeGen/MIR/X86/register-mask-operands.mir
@@ -20,24 +20,20 @@
 ...
 ---
 name:            compute
-body:
-  - id:          0
-    name:        body
-    instructions:
-      - '%eax = IMUL32rri8 %edi, 11, implicit-def %eflags'
-      - 'RETQ %eax'
+body: |
+  bb.0.body:
+    %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+    RETQ %eax
 ...
 ---
 # CHECK: name: foo
 name:            foo
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      # CHECK:      - 'PUSH64r %rax
-      # CHECK-NEXT: - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
-      - 'PUSH64r %rax, implicit-def %rsp, implicit %rsp'
-      - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
-      - '%rdx = POP64r implicit-def %rsp, implicit %rsp'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK:      PUSH64r %rax
+    ; CHECK-NEXT: CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    PUSH64r %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir b/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir
new file mode 100644
index 000000000000..64d46d20db74
--- /dev/null
+++ b/test/CodeGen/MIR/X86/register-operands-target-flag-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @G = external global i32
+
+  define i32 @inc() {
+  entry:
+    %a = load i32, i32* @G
+    %b = add i32 %a, 1
+    ret i32 %b
+  }
+
+...
+---
+name: inc
+body: |
+  bb.0.entry:
+  ; CHECK: [[@LINE+1]]:42: register operands can't have target flags
+    %rax = MOV64rm target-flags(x86-got) %rip, 1, _, @G, _
+    %eax = MOV32rm killed %rax, 1, _, 0, _
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
new file mode 100644
index 000000000000..d7e76329be73
--- /dev/null
+++ b/test/CodeGen/MIR/X86/simple-register-allocation-hints.mir
@@ -0,0 +1,34 @@
+# RUN: llc -march=x86-64 -start-after machine-scheduler -stop-after machine-scheduler -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses simple register allocation hints
+# correctly.
+
+--- |
+
+  define i32 @test(i32 %a, i32 %b) {
+  body:
+    %c = mul i32 %a, %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+# CHECK: registers:
+# CHECK-NEXT:  - { id: 0, class: gr32 }
+# CHECK-NEXT:  - { id: 1, class: gr32, preferred-register: '%esi' }
+# CHECK-NEXT:  - { id: 2, class: gr32, preferred-register: '%edi' }
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32, preferred-register: '%esi' }
+  - { id: 2, class: gr32, preferred-register: '%edi' }
+body: |
+  bb.0.body:
+    liveins: %edi, %esi
+
+    %1 = COPY %esi
+    %2 = COPY %edi
+    %2 = IMUL32rr %2, %1, implicit-def dead %eflags
+    %eax = COPY %2
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
index 67f4bd21cd05..b62cd755fec1 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-aliased.mir
@@ -22,11 +22,9 @@ fixedStack:
   - { id: 0, type: spill-slot, offset: 0, size: 4, isAliased: true }
 stack:
   - { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - '%eax = COPY %edi'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    %eax = COPY %edi
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
index 1e1b0fdcc8dc..c89216bea67a 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-object-immutable.mir
@@ -22,11 +22,9 @@ fixedStack:
   - { id: 0, type: spill-slot, offset: 0, size: 4, isImmutable: true }
 stack:
   - { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - '%eax = COPY %edi'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    %eax = COPY %edi
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
index f771f796ec34..7e13a26f0b68 100644
--- a/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/spill-slot-fixed-stack-objects.mir
@@ -24,11 +24,9 @@ fixedStack:
   - { id: 0, type: spill-slot, offset: 0, size: 4, alignment: 4 }
 stack:
   - { id: 0, offset: -12, size: 4, alignment: 4 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - '%eax = COPY %edi'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    %eax = COPY %edi
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/stack-object-debug-info.mir b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
new file mode 100644
index 000000000000..509b196416fd
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-debug-info.mir
@@ -0,0 +1,65 @@
+# RUN: llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the stack object's debug info
+# correctly.
+--- |
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+  define void @foo() #1 {
+  entry:
+    %x.i = alloca i8, align 1
+    %y.i = alloca [256 x i8], align 16
+    %0 = bitcast [256 x i8]* %y.i to i8*
+    br label %for.body
+
+  for.body:
+    %1 = bitcast [256 x i8]* %y.i to i8*
+    call void @llvm.lifetime.end(i64 -1, i8* %1) #3
+    call void @llvm.lifetime.start(i64 -1, i8* %0) #3
+    call void @llvm.dbg.declare(metadata i8* %0, metadata !4, metadata !7) #3, !dbg !8
+    br label %for.body
+  }
+
+  declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+  declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+  attributes #0 = { nounwind readnone }
+  attributes #1 = { nounwind ssp uwtable }
+  attributes #2 = { nounwind argmemonly }
+  attributes #3 = { nounwind }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!3}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2)
+  !1 = !DIFile(filename: "t.c", directory: "")
+  !2 = !{}
+  !3 = !{i32 1, !"Debug Info Version", i32 3}
+  !4 = !DILocalVariable(name: "x", scope: !5, file: !1, line: 16, type: !6)
+  !5 = distinct !DISubprogram(scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+  !6 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+  !7 = !DIExpression()
+  !8 = !DILocation(line: 0, scope: !5)
+...
+---
+name:            foo
+isSSA:           true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    16
+# CHECK-LABEL: foo
+# CHECK: stack:
+# CHECK:  - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
+# CHECK-NEXT: di-expression: '!7', di-location: '!8' }
+stack:
+  - { id: 0, name: y.i, offset: 0, size: 256, alignment: 16, di-variable: '!4',
+      di-expression: '!7', di-location: '!8' }
+body: |
+  bb.0.entry:
+    successors: %bb.1.for.body
+  bb.1.for.body:
+    successors: %bb.1.for.body
+
+    DBG_VALUE %stack.0.y.i, 0, !4, !7, debug-location !8
+    JMP_1 %bb.1.for.body
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-invalid-name.mir b/test/CodeGen/MIR/X86/stack-object-invalid-name.mir
new file mode 100644
index 000000000000..e42e1e59f1e7
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-invalid-name.mir
@@ -0,0 +1,28 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that the MIR parser reports an error when it encounters a
+# stack object with a name that can't be associated with an alloca instruction.
+
+--- |
+
+  define i32 @test(i32 %a) {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+frameInfo:
+  maxAlignment:  4
+stack:
+  # CHECK: [[@LINE+1]]:20: alloca instruction named 'x' isn't defined in the function 'test'
+  - { id: 0, name: x, offset: -12, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    %eax = MOV32rm %rsp, 1, _, -4, _
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir b/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir
new file mode 100644
index 000000000000..46661d95e727
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-operand-name-mismatch-error.mir
@@ -0,0 +1,33 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+# This test ensures that an error is reported when an stack object reference
+# uses a different name then the stack object definition.
+
+--- |
+
+  define i32 @test(i32 %a) {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: b, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:13: the name of the stack object '%stack.0' isn't 'x'
+    MOV32mr %stack.0.x, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-operands.mir b/test/CodeGen/MIR/X86/stack-object-operands.mir
new file mode 100644
index 000000000000..fce5bf717d1a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-operands.mir
@@ -0,0 +1,45 @@
+# RUN: llc -march=x86 -start-after machine-sink -stop-after machine-sink -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses stack object machine operands
+# correctly.
+
+--- |
+
+  define i32 @test(i32 %a) {
+  entry:
+    %b = alloca i32
+    %0 = alloca i32
+    store i32 %a, i32* %b
+    store i32 2, i32* %0
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+frameInfo:
+  maxAlignment:    4
+fixedStack:
+  - { id: 0, offset: 0, size: 4, isImmutable: true, isAliased: false }
+stack:
+  - { id: 0, name: b, size: 4, alignment: 4 }
+  - { id: 1, size: 4, alignment: 4 }
+body: |
+  ; CHECK: bb.0.entry:
+  ; CHECK-NEXT: %0 = MOV32rm %fixed-stack.0, 1, _, 0, _
+  ; CHECK-NEXT: MOV32mr %stack.0.b, 1, _, 0, _, %0
+  ; CHECK-NEXT: MOV32mi %stack.1, 1, _, 0, _, 2
+  ; CHECK-NEXT: %1 = MOV32rm %stack.0.b, 1, _, 0, _
+  bb.0.entry:
+    %0 = MOV32rm %fixed-stack.0, 1, _, 0, _
+    MOV32mr %stack.0.b, 1, _, 0, _, %0
+    MOV32mi %stack.1, 1, _, 0, _, 2
+    %1 = MOV32rm %stack.0, 1, _, 0, _
+    %eax = COPY %1
+    RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir b/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir
new file mode 100644
index 000000000000..b84863ebca67
--- /dev/null
+++ b/test/CodeGen/MIR/X86/stack-object-redefinition-error.mir
@@ -0,0 +1,37 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a) #0 {
+  entry:
+    %b = alloca i32
+    %x = alloca i64
+    store i32 %a, i32* %b
+    store i64 2, i64* %x
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%edi' }
+frameInfo:
+  maxAlignment:    8
+stack:
+  - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+# CHECK: [[@LINE+1]]:11: redefinition of stack object '%stack.0'
+  - { id: 0, name: x, offset: -24, size: 8, alignment: 8 }
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    MOV32mr %rsp, 1, _, -4, _, killed %edi
+    MOV64mi32 %rsp, 1, _, -16, _, 2
+    %eax = MOV32rm %rsp, 1, _, -4, _
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/stack-objects.mir b/test/CodeGen/MIR/X86/stack-objects.mir
index 14ed4b74f96f..bdd911075da0 100644
--- a/test/CodeGen/MIR/X86/stack-objects.mir
+++ b/test/CodeGen/MIR/X86/stack-objects.mir
@@ -21,19 +21,17 @@ name:            test
 frameInfo:
   maxAlignment:    8
 # CHECK: stack:
-# CHECK-NEXT: - { id: 0, offset: -12, size: 4, alignment: 4 }
-# CHECK-NEXT: - { id: 1, offset: -24, size: 8, alignment: 8 }
+# CHECK-NEXT: - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+# CHECK-NEXT: - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
 # CHECK-NEXT: - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
 stack:
-  - { id: 0, offset: -12, size: 4, alignment: 4 }
-  - { id: 1, offset: -24, size: 8, alignment: 8 }
+  - { id: 0, name: b, offset: -12, size: 4, alignment: 4 }
+  - { id: 1, name: x, offset: -24, size: 8, alignment: 8 }
   - { id: 2, type: spill-slot, offset: -32, size: 4, alignment: 4 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
-      - '%eax = MOV32rm %rsp, 1, _, -4, _'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    MOV64mi32 %rsp, 1, _, -16, _, 2
+    %eax = MOV32rm %rsp, 1, _, -4, _
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/standalone-register-error.mir b/test/CodeGen/MIR/X86/standalone-register-error.mir
new file mode 100644
index 000000000000..f17451bfc89c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/standalone-register-error.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i32 @test(i32 %a) {
+  body:
+    ret i32 %a
+  }
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+liveins:
+# CHECK: [[@LINE+1]]:13: unknown register name 'register'
+  - { reg: '%register', virtual-reg: '%0' }
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %0 = COPY %edi
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/subregister-operands.mir b/test/CodeGen/MIR/X86/subregister-operands.mir
index 5e46fab4b058..8a3fcf69aca6 100644
--- a/test/CodeGen/MIR/X86/subregister-operands.mir
+++ b/test/CodeGen/MIR/X86/subregister-operands.mir
@@ -18,16 +18,15 @@ registers:
   - { id: 0, class: gr32 }
   - { id: 1, class: gr8 }
   - { id: 2, class: gr8 }
-body:
-  - name:        entry
-    id:          0
-    instructions:
-      # CHECK:      %0 = COPY %edi
-      # CHECK-NEXT: %1 = COPY %0:sub_8bit
-      - '%0 = COPY %edi'
-      - '%1 = COPY %0:sub_8bit'
-      - '%2 = AND8ri %1, 1, implicit-def %eflags'
-      - '%al = COPY %2'
-      - 'RETQ %al'
+body: |
+  bb.0.entry:
+    liveins: %edi
+    ; CHECK:      %0 = COPY %edi
+    ; CHECK-NEXT: %1 = COPY %0:sub_8bit
+    %0 = COPY %edi
+    %1 = COPY %0:sub_8bit
+    %2 = AND8ri %1, 1, implicit-def %eflags
+    %al = COPY %2
+    RETQ %al
 ...
 
diff --git a/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir b/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir
new file mode 100644
index 000000000000..64af6121189a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/successor-basic-blocks-weights.mir
@@ -0,0 +1,42 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses basic block successors and
+# probabilities correctly.
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body: |
+  ; CHECK-LABEL: bb.0.entry:
+  ; CHECK:         successors: %bb.1.less({{[0-9a-fx/= ]+}}33.00%), %bb.2.exit({{[0-9a-fx/= ]+}}67.00%)
+  ; CHECK-LABEL: bb.1.less:
+  bb.0.entry:
+    successors: %bb.1.less (33), %bb.2.exit(67)
+    liveins: %edi
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/successor-basic-blocks.mir b/test/CodeGen/MIR/X86/successor-basic-blocks.mir
new file mode 100644
index 000000000000..a6c14f70bc7c
--- /dev/null
+++ b/test/CodeGen/MIR/X86/successor-basic-blocks.mir
@@ -0,0 +1,83 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses basic block successors correctly.
+
+--- |
+
+  define i32 @foo(i32 %a) {
+  entry:
+    %0 = icmp sle i32 %a, 10
+    br i1 %0, label %less, label %exit
+
+  less:
+    ret i32 0
+
+  exit:
+    ret i32 %a
+  }
+
+  define i32 @bar(i32 %a) {
+  entry:
+    %b = icmp sle i32 %a, 10
+    br i1 %b, label %0, label %1
+
+  ; <label>:0
+    ret i32 0
+
+  ; <label>:1
+    ret i32 %a
+  }
+
+...
+---
+name:            foo
+body: |
+  ; CHECK-LABEL: bb.0.entry:
+  ; CHECK:         successors: %bb.1.less(0x40000000 / 0x80000000 = 50.00%), %bb.2.exit(0x40000000 / 0x80000000 = 50.00%)
+  ; CHECK-LABEL: bb.1.less:
+  bb.0.entry:
+    successors: %bb.1.less, %bb.2.exit
+    liveins: %edi
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit killed %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2.exit:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
+---
+name:            bar
+body: |
+  ; CHECK-LABEL: name: bar
+  ; Verify that we can have multiple lists of successors that will be merged
+  ; into one.
+  ; CHECK-LABEL: bb.0.entry:
+  ; CHECK:         successors: %bb.1(0x80000000 / 0x80000000 = 100.00%), %bb.2(0x00000000 / 0x80000000 = 0.00%)
+  bb.0.entry:
+    liveins: %edi
+    successors: %bb.1
+    successors: %bb.2
+
+    CMP32ri8 %edi, 10, implicit-def %eflags
+    JG_1 %bb.2, implicit killed %eflags
+
+  ; Verify that we can have an empty list of successors.
+  ; CHECK-LABEL: bb.1:
+  ; CHECK-NEXT:  %eax = MOV32r0 implicit-def dead %eflags
+  bb.1:
+    successors:
+    %eax = MOV32r0 implicit-def dead %eflags
+    RETQ killed %eax
+
+  bb.2:
+    liveins: %edi
+
+    %eax = COPY killed %edi
+    RETQ killed %eax
+...
diff --git a/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir b/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir
new file mode 100644
index 000000000000..fe5263df355f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/tied-def-operand-invalid.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+  define i64 @test(i64 %x) #0 {
+  entry:
+    %asm = tail call i64 asm sideeffect "$foo", "=r,0"(i64 %x) nounwind
+    ret i64 %asm
+  }
+
+  attributes #0 = { nounwind }
+...
+---
+name:            test
+hasInlineAsm:    true
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+
+  ; CHECK: [[@LINE+1]]:58: use of invalid tied-def operand index '0'; the operand #0 isn't a defined register
+    INLINEASM $"$foo", 1, 2818058, def %rdi, 2147483657, killed %rdi(tied-def 0)
+    %rax = COPY killed %rdi
+    RETQ killed %rax
+...
diff --git a/test/CodeGen/MIR/X86/undef-register-flag.mir b/test/CodeGen/MIR/X86/undef-register-flag.mir
index 83b9e10a80d1..0b26c528aee1 100644
--- a/test/CodeGen/MIR/X86/undef-register-flag.mir
+++ b/test/CodeGen/MIR/X86/undef-register-flag.mir
@@ -21,22 +21,18 @@
 ...
 ---
 name:            compute
-body:
-  - id:          0
-    name:        body
-    instructions:
-      - '%eax = IMUL32rri8 %edi, 11, implicit-def %eflags'
-      - 'RETQ %eax'
+body: |
+  bb.0.body:
+    %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+    RETQ %eax
 ...
 ---
 name:            foo
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      # CHECK: - 'PUSH64r undef %rax
-      - 'PUSH64r undef %rax, implicit-def %rsp, implicit %rsp'
-      - 'CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax'
-      - '%rdx = POP64r implicit-def %rsp, implicit %rsp'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: PUSH64r undef %rax
+    PUSH64r undef %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir b/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir
new file mode 100644
index 000000000000..8d8f8614f32b
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-fixed-stack-object.mir
@@ -0,0 +1,38 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+
+  define i32 @test(i32 %a) {
+  entry:
+    %b = alloca i32
+    %0 = alloca i32
+    store i32 %a, i32* %b
+    store i32 2, i32* %0
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+  - { id: 1, class: gr32 }
+frameInfo:
+  maxAlignment:    4
+fixedStack:
+  - { id: 0, offset: 0, size: 4, isImmutable: true, isAliased: false }
+stack:
+  - { id: 0, name: b, size: 4, alignment: 4 }
+  - { id: 1, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:18: use of undefined fixed stack object '%fixed-stack.11'
+    %0 = MOV32rm %fixed-stack.11, 1, _, 0, _
+    MOV32mr %stack.0, 1, _, 0, _, %0
+    MOV32mi %stack.1, 1, _, 0, _, 2
+    %1 = MOV32rm %stack.0, 1, _, 0, _
+    %eax = COPY %1
+    RETL %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-global-value.mir b/test/CodeGen/MIR/X86/undefined-global-value.mir
index e41dc0454d2c..f82c626397a9 100644
--- a/test/CodeGen/MIR/X86/undefined-global-value.mir
+++ b/test/CodeGen/MIR/X86/undefined-global-value.mir
@@ -16,13 +16,11 @@
 ...
 ---
 name: inc
-body:
-  - id: 0
-    name: entry
-    instructions:
-      # CHECK: [[@LINE+1]]:37: use of undefined global value '@2'
-      - '%rax = MOV64rm %rip, 1, _, @2, _'
-      - '%eax = MOV32rm %rax, 1, _, 0, _'
-      - '%eax = INC32r %eax'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:32: use of undefined global value '@2'
+    %rax = MOV64rm %rip, 1, _, @2, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir b/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir
new file mode 100644
index 000000000000..f6b10e3123ca
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-ir-block-in-blockaddress.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %block), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %block]
+
+  block:
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1.block
+    ; CHECK: [[@LINE+1]]:51: use of undefined IR block '%ir-block."block "'
+    %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block."block "), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1.block (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir b/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir
new file mode 100644
index 000000000000..0b3c0093dc62
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-ir-block-slot-in-blockaddress.mir
@@ -0,0 +1,29 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  @addr = global i8* null
+
+  define void @test() {
+  entry:
+    store volatile i8* blockaddress(@test, %0), i8** @addr
+    %val = load volatile i8*, i8** @addr
+    indirectbr i8* %val, [label %0]
+
+    ret void
+  }
+
+...
+---
+name:            test
+body: |
+  bb.0.entry:
+    successors: %bb.1
+    ; CHECK: [[@LINE+1]]:51: use of undefined IR block '%ir-block.1'
+    %rax = LEA64r %rip, 1, _, blockaddress(@test, %ir-block.1), _
+    MOV64mr %rip, 1, _, @addr, _, killed %rax
+    JMP64m %rip, 1, _, @addr, _
+
+  bb.1 (address-taken):
+    RETQ
+...
diff --git a/test/CodeGen/MIR/X86/undefined-jump-table-id.mir b/test/CodeGen/MIR/X86/undefined-jump-table-id.mir
new file mode 100644
index 000000000000..b463dc4bd9f4
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-jump-table-id.mir
@@ -0,0 +1,73 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test_jumptable(i32 %in) {
+  entry:
+    switch i32 %in, label %def [
+      i32 0, label %lbl1
+      i32 1, label %lbl2
+      i32 2, label %lbl3
+      i32 3, label %lbl4
+    ]
+
+  def:
+    ret i32 0
+
+  lbl1:
+    ret i32 1
+
+  lbl2:
+    ret i32 2
+
+  lbl3:
+    ret i32 4
+
+  lbl4:
+    ret i32 8
+  }
+
+...
+---
+name:            test_jumptable
+jumpTable:
+  kind:          label-difference32
+  entries:
+    - id:        0
+      blocks:    [ '%bb.3.lbl1', '%bb.4.lbl2', '%bb.5.lbl3', '%bb.6.lbl4' ]
+body: |
+  bb.0.entry:
+    successors: %bb.2.def, %bb.1.entry
+
+    %eax = MOV32rr %edi, implicit-def %rax
+    CMP32ri8 %edi, 3, implicit-def %eflags
+    JA_1 %bb.2.def, implicit %eflags
+
+  bb.1.entry:
+    successors: %bb.3.lbl1, %bb.4.lbl2, %bb.5.lbl3, %bb.6.lbl4
+    ; CHECK: [[@LINE+1]]:31: use of undefined jump table '%jump-table.2'
+    %rcx = LEA64r %rip, 1, _, %jump-table.2, _
+    %rax = MOVSX64rm32 %rcx, 4, %rax, 0, _
+    %rax = ADD64rr %rax, %rcx, implicit-def %eflags
+    JMP64r %rax
+
+  bb.2.def:
+    %eax = MOV32r0 implicit-def %eflags
+    RETQ %eax
+
+  bb.3.lbl1:
+    %eax = MOV32ri 1
+    RETQ %eax
+
+  bb.4.lbl2:
+    %eax = MOV32ri 2
+    RETQ %eax
+
+  bb.5.lbl3:
+    %eax = MOV32ri 4
+    RETQ %eax
+
+  bb.6.lbl4:
+    %eax = MOV32ri 8
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-named-global-value.mir b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
index b40c2ce43b5f..a1ada4b42e46 100644
--- a/test/CodeGen/MIR/X86/undefined-named-global-value.mir
+++ b/test/CodeGen/MIR/X86/undefined-named-global-value.mir
@@ -16,13 +16,11 @@
 ...
 ---
 name: inc
-body:
-  - id: 0
-    name: entry
-    instructions:
-      # CHECK: [[@LINE+1]]:37: use of undefined global value '@GG'
-      - '%rax = MOV64rm %rip, 1, _, @GG, _'
-      - '%eax = MOV32rm %rax, 1, _, 0, _'
-      - '%eax = INC32r %eax'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:32: use of undefined global value '@GG'
+    %rax = MOV64rm %rip, 1, _, @GG, _
+    %eax = MOV32rm %rax, 1, _, 0, _
+    %eax = INC32r %eax
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/undefined-register-class.mir b/test/CodeGen/MIR/X86/undefined-register-class.mir
index a14d2303a7d8..348f6af5c44f 100644
--- a/test/CodeGen/MIR/X86/undefined-register-class.mir
+++ b/test/CodeGen/MIR/X86/undefined-register-class.mir
@@ -17,10 +17,8 @@ tracksRegLiveness: true
 registers:
   # CHECK: [[@LINE+1]]:20: use of undefined register class 'gr3200'
   - {id: 0, class: 'gr3200'}
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    RETQ %eax
 ...
 
diff --git a/test/CodeGen/MIR/X86/undefined-stack-object.mir b/test/CodeGen/MIR/X86/undefined-stack-object.mir
new file mode 100644
index 000000000000..416e6789ba0f
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-stack-object.mir
@@ -0,0 +1,30 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+--- |
+
+  define i32 @test(i32 %a) {
+  entry:
+    %b = alloca i32
+    store i32 %a, i32* %b
+    %c = load i32, i32* %b
+    ret i32 %c
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: b, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:13: use of undefined stack object '%stack.2'
+    MOV32mr %stack.2, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir b/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir
new file mode 100644
index 000000000000..a3907d7a3a4a
--- /dev/null
+++ b/test/CodeGen/MIR/X86/undefined-value-in-memory-operand.mir
@@ -0,0 +1,24 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32* %a) {
+  entry:
+    %b = load i32, i32* %a
+    ret i32 %b
+  }
+
+...
+---
+name:            test
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:60: use of undefined IR value '%ir.c'
+    %eax = MOV32rm killed %rdi, 1, _, 0, _ :: (load 4 from %ir.c)
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/undefined-virtual-register.mir b/test/CodeGen/MIR/X86/undefined-virtual-register.mir
index 12370c80caf9..2f9a304ffe5c 100644
--- a/test/CodeGen/MIR/X86/undefined-virtual-register.mir
+++ b/test/CodeGen/MIR/X86/undefined-virtual-register.mir
@@ -16,13 +16,11 @@ isSSA:           true
 tracksRegLiveness: true
 registers:
   - { id: 0, class: gr32 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - '%0 = COPY %edi'
-      # CHECK: [[@LINE+1]]:22: use of undefined virtual register '%10'
-      - '%eax = COPY %10'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:17: use of undefined virtual register '%10'
+    %eax = COPY %10
+    RETQ %eax
 ...
 
diff --git a/test/CodeGen/MIR/X86/unknown-instruction.mir b/test/CodeGen/MIR/X86/unknown-instruction.mir
index 4e58ca6bad40..cec354948832 100644
--- a/test/CodeGen/MIR/X86/unknown-instruction.mir
+++ b/test/CodeGen/MIR/X86/unknown-instruction.mir
@@ -12,10 +12,8 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:8: unknown machine instruction name 'retJust0'
-     - retJust0
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:5: unknown machine instruction name 'retJust0'
+    retJust0
 ...
diff --git a/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
index a82e9a780f54..a512d9aa08e6 100644
--- a/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
+++ b/test/CodeGen/MIR/X86/unknown-machine-basic-block.mir
@@ -21,18 +21,16 @@
 ...
 ---
 name:            foo
-body:
- - id: 0
-   name:         entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     # CHECK: [[@LINE+1]]:14: use of undefined machine basic block #4
-     - 'JG_1 %bb.4, implicit %eflags'
- - id: 1
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id: 2
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    ; CHECK: [[@LINE+1]]:10: use of undefined machine basic block #4
+    JG_1 %bb.4, implicit %eflags
+
+  bb.1:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir b/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir
new file mode 100644
index 000000000000..c58c38ab1322
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-metadata-keyword.mir
@@ -0,0 +1,25 @@
+# RUN: not llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+  define i32 @inc(i32* %x) {
+  entry:
+    %0 = load i32, i32* %x
+    %1 = add i32 %0, 1
+    store i32 %1, i32* %x
+    ret i32 %1
+  }
+...
+---
+name:            inc
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+body: |
+  bb.0.entry:
+    liveins: %rdi
+  ; CHECK: [[@LINE+1]]:60: use of unknown metadata keyword '!tba'
+    %eax = MOV32rm %rdi, 1, _, 0, _ :: (load 4 from %ir.x, !tba !0)
+    %eax = INC32r killed %eax, implicit-def dead %eflags
+    MOV32mr killed %rdi, 1, _, 0, _, %eax :: (store 4 into %ir.x)
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/unknown-metadata-node.mir b/test/CodeGen/MIR/X86/unknown-metadata-node.mir
new file mode 100644
index 000000000000..958a30678be1
--- /dev/null
+++ b/test/CodeGen/MIR/X86/unknown-metadata-node.mir
@@ -0,0 +1,59 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %x) #0 !dbg !4 {
+  entry:
+    %x.addr = alloca i32, align 4
+    store i32 %x, i32* %x.addr, align 4
+    call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !12, metadata !13), !dbg !14
+    %0 = load i32, i32* %x.addr, align 4, !dbg !15
+    ret i32 %0, !dbg !15
+  }
+
+  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+  attributes #0 = { nounwind "no-frame-pointer-elim"="false" }
+  attributes #1 = { nounwind readnone }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!9, !10}
+  !llvm.ident = !{!11}
+
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+  !1 = !DIFile(filename: "test.ll", directory: "")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 4, type: !6, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+  !5 = !DIFile(filename: "test.c", directory: "")
+  !6 = !DISubroutineType(types: !7)
+  !7 = !{!8, !8}
+  !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !9 = !{i32 2, !"Dwarf Version", i32 4}
+  !10 = !{i32 2, !"Debug Info Version", i32 3}
+  !11 = !{!"clang version 3.7.0"}
+  !12 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !5, line: 4, type: !8)
+  !13 = !DIExpression()
+  !14 = !DILocation(line: 4, scope: !4)
+  !15 = !DILocation(line: 8, scope: !4)
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+frameInfo:
+  maxAlignment:  4
+stack:
+  - { id: 0, name: x.addr, size: 4, alignment: 4 }
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:21: use of undefined metadata '!42'
+    DBG_VALUE _, 0, !42, !13
+    MOV32mr %stack.0.x.addr, 1, _, 0, _, %0
+    %eax = COPY %0
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
index f304113f40b9..6627273d4470 100644
--- a/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
+++ b/test/CodeGen/MIR/X86/unknown-named-machine-basic-block.mir
@@ -20,20 +20,16 @@
 ...
 ---
 name:            foo
-body:
- - id:              0
-   name:            entry
-   instructions:
-     - '%eax = MOV32rm %rdi, 1, _, 0, _'
-     - 'CMP32ri8 %eax, 10, implicit-def %eflags'
-     # CHECK: [[@LINE+1]]:14: the name of machine basic block #2 isn't 'hit'
-     - 'JG_1 %bb.2.hit, implicit %eflags'
- - id:              1
-   name:            less
-   instructions:
-     - '%eax = MOV32r0 implicit-def %eflags'
- - id:              2
-   name:            exit
-   instructions:
-     - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    %eax = MOV32rm %rdi, 1, _, 0, _
+    CMP32ri8 %eax, 10, implicit-def %eflags
+    ; CHECK: [[@LINE+1]]:10: the name of machine basic block #2 isn't 'hit'
+    JG_1 %bb.2.hit, implicit %eflags
+
+  bb.1.less:
+    %eax = MOV32r0 implicit-def %eflags
+
+  bb.2.exit:
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/unknown-register.mir b/test/CodeGen/MIR/X86/unknown-register.mir
index ce40ee809bf3..da0798ca1b52 100644
--- a/test/CodeGen/MIR/X86/unknown-register.mir
+++ b/test/CodeGen/MIR/X86/unknown-register.mir
@@ -12,11 +12,9 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:9: unknown register name 'xax'
-     - '%xax = MOV32r0'
-     - 'RETQ %xax'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:5: unknown register name 'xax'
+    %xax = MOV32r0
+    RETQ %xax
 ...
diff --git a/test/CodeGen/MIR/X86/unknown-subregister-index.mir b/test/CodeGen/MIR/X86/unknown-subregister-index.mir
index 50461232b623..5dde34561236 100644
--- a/test/CodeGen/MIR/X86/unknown-subregister-index.mir
+++ b/test/CodeGen/MIR/X86/unknown-subregister-index.mir
@@ -18,14 +18,12 @@ registers:
   - { id: 0, class: gr32 }
   - { id: 1, class: gr8 }
   - { id: 2, class: gr8 }
-body:
-  - name:        entry
-    id:          0
-    instructions:
-      - '%0 = COPY %edi'
-      # CHECK: [[@LINE+1]]:23: use of unknown subregister index 'bit8'
-      - '%1 = COPY %0:bit8'
-      - '%2 = AND8ri %1, 1, implicit-def %eflags'
-      - '%al = COPY %2'
-      - 'RETQ %al'
+body: |
+  bb.0.entry:
+    %0 = COPY %edi
+    ; CHECK: [[@LINE+1]]:18: use of unknown subregister index 'bit8'
+    %1 = COPY %0:bit8
+    %2 = AND8ri %1, 1, implicit-def %eflags
+    %al = COPY %2
+    RETQ %al
 ...
diff --git a/test/CodeGen/MIR/X86/unrecognized-character.mir b/test/CodeGen/MIR/X86/unrecognized-character.mir
index 3b4fb1a9fc6e..cf99028677fa 100644
--- a/test/CodeGen/MIR/X86/unrecognized-character.mir
+++ b/test/CodeGen/MIR/X86/unrecognized-character.mir
@@ -10,10 +10,8 @@
 ...
 ---
 name:            foo
-body:
- - id:           0
-   name:         entry
-   instructions:
-     # CHECK: [[@LINE+1]]:9: unexpected character '`'
-     - '` RETQ'
+body: |
+  bb.0.entry:
+    ; CHECK: [[@LINE+1]]:5: unexpected character '\'
+    \ RETQ
 ...
diff --git a/test/CodeGen/MIR/X86/used-physical-register-info.mir b/test/CodeGen/MIR/X86/used-physical-register-info.mir
new file mode 100644
index 000000000000..9a81578703e0
--- /dev/null
+++ b/test/CodeGen/MIR/X86/used-physical-register-info.mir
@@ -0,0 +1,109 @@
+# RUN: llc -march=x86-64 -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
+# This test ensures that the MIR parser parses the callee saved register mask
+# correctly and that the MIR parser can infer it as well.
+
+--- |
+
+  define i32 @compute(i32 %a) #0 {
+  body:
+    %c = mul i32 %a, 11
+    ret i32 %c
+  }
+
+  define i32 @foo(i32 %a) #0 {
+  entry:
+    %b = call i32 @compute(i32 %a)
+    ret i32 %b
+  }
+
+  define i32 @bar(i32 %a) #0 {
+  entry:
+    %b = call i32 @compute(i32 %a)
+    ret i32 %b
+  }
+
+  define i32 @empty(i32 %a) #0 {
+  entry:
+    %b = call i32 @compute(i32 %a)
+    ret i32 %b
+  }
+
+  attributes #0 = { "no-frame-pointer-elim"="false" }
+
+...
+---
+# CHECK: name: compute
+# CHECK: liveins:
+# CHECK-NEXT: - { reg: '%edi' }
+# CHECK-NEXT: frameInfo:
+name:            compute
+liveins:
+  - { reg: '%edi' }
+frameInfo:
+  stackSize:     8
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %eax = IMUL32rri8 %edi, 11, implicit-def %eflags
+    RETQ %eax
+...
+---
+name:            foo
+liveins:
+  - { reg: '%edi' }
+# CHECK: name: foo
+# CHECK: calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+# CHECK-NEXT:                    '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+# CHECK-NEXT:                    '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+# CHECK-NEXT:                    '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+                        '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+                        '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+                        '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    PUSH64r %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
+---
+name:            bar
+liveins:
+  - { reg: '%edi' }
+# Verify that the callee saved register can be inferred from register mask
+# machine operands:
+# CHECK: name: bar
+# CHECK: calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx',
+# CHECK-NEXT:                    '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15',
+# CHECK-NEXT:                    '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d',
+# CHECK-NEXT:                    '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    PUSH64r %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
+---
+name:            empty
+liveins:
+  - { reg: '%edi' }
+# Verify that the callee saved register can be empty.
+# CHECK: name: empty
+# CHECK: calleeSavedRegisters: [ ]
+calleeSavedRegisters: [ ]
+body: |
+  bb.0.entry:
+    liveins: %edi
+
+    PUSH64r %rax, implicit-def %rsp, implicit %rsp
+    CALL64pcrel32 @compute, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax
+    %rdx = POP64r implicit-def %rsp, implicit %rsp
+    RETQ %eax
+...
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir b/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
index 8e50c52f5e18..e6a9ef8d4c88 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-object-size-error.mir
@@ -25,12 +25,10 @@ stack:
   - { id: 1, offset: -32, size: 8, alignment: 8 }
   # CHECK: [[@LINE+1]]:55: unknown key 'size'
   - { id: 2, type: variable-sized, offset: -32, size: 42, alignment: 1 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
-      - '%eax = MOV32rm %rsp, 1, _, -4, _'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    MOV64mi32 %rsp, 1, _, -16, _, 2
+    %eax = MOV32rm %rsp, 1, _, -4, _
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
index 4c45742b25a4..a58be69ae046 100644
--- a/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
+++ b/test/CodeGen/MIR/X86/variable-sized-stack-objects.mir
@@ -26,17 +26,15 @@ frameInfo:
 # CHECK: stack:
 # CHECK-NEXT: - { id: 0, offset: -20, size: 4, alignment: 4 }
 # CHECK-NEXT: - { id: 1, offset: -32, size: 8, alignment: 8 }
-# CHECK-NEXT: - { id: 2, type: variable-sized, offset: -32, alignment: 1 }
+# CHECK-NEXT: - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
 stack:
   - { id: 0, offset: -20, size: 4, alignment: 4 }
   - { id: 1, offset: -32, size: 8, alignment: 8 }
-  - { id: 2, type: variable-sized, offset: -32, alignment: 1 }
-body:
-  - id:          0
-    name:        entry
-    instructions:
-      - 'MOV32mr %rsp, 1, _, -4, _, %edi'
-      - 'MOV64mi32 %rsp, 1, _, -16, _, 2'
-      - '%eax = MOV32rm %rsp, 1, _, -4, _'
-      - 'RETQ %eax'
+  - { id: 2, name: y, type: variable-sized, offset: -32, alignment: 1 }
+body: |
+  bb.0.entry:
+    MOV32mr %rsp, 1, _, -4, _, %edi
+    MOV64mi32 %rsp, 1, _, -16, _, 2
+    %eax = MOV32rm %rsp, 1, _, -4, _
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir b/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir
new file mode 100644
index 000000000000..5dae6e666c83
--- /dev/null
+++ b/test/CodeGen/MIR/X86/virtual-register-redefinition-error.mir
@@ -0,0 +1,27 @@
+# RUN: not llc -march=x86-64 -start-after machine-sink -stop-after machine-sink -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+
+  define i32 @test(i32 %a) {
+  body:
+    ret i32 %a
+  }
+
+...
+---
+name:            test
+isSSA:           true
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr32 }
+# CHECK: [[@LINE+1]]:11: redefinition of virtual register '%0'
+  - { id: 0, class: gr32 }
+body: |
+  bb.0.body:
+    liveins: %edi
+
+    %0 = COPY %edi
+    %eax = COPY %0
+    RETQ %eax
+...
+
diff --git a/test/CodeGen/MIR/X86/virtual-registers.mir b/test/CodeGen/MIR/X86/virtual-registers.mir
index c6d76e6a18c5..93c2fea6fd95 100644
--- a/test/CodeGen/MIR/X86/virtual-registers.mir
+++ b/test/CodeGen/MIR/X86/virtual-registers.mir
@@ -41,29 +41,27 @@ registers:
   - { id: 0, class: gr32 }
   - { id: 1, class: gr32 }
   - { id: 2, class: gr32 }
-body:
-  - id:          0
-    name:        entry
-    # CHECK:      %0 = COPY %edi
-    # CHECK-NEXT: %1 = SUB32ri8 %0, 10
-    instructions:
-      - '%0 = COPY %edi'
-      - '%1 = SUB32ri8 %0, 10, implicit-def %eflags'
-      - 'JG_1 %bb.2.exit, implicit %eflags'
-      - 'JMP_1 %bb.1.less'
-  - id:          1
-    name:        less
-    # CHECK:      %2 = MOV32r0
-    # CHECK-NEXT: %eax = COPY %2
-    instructions:
-      - '%2 = MOV32r0 implicit-def %eflags'
-      - '%eax = COPY %2'
-      - 'RETQ %eax'
-  - id:          2
-    name:        exit
-    instructions:
-      - '%eax = COPY %0'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    successors: %bb.2.exit, %bb.1.less
+    liveins: %edi
+    ; CHECK:      %0 = COPY %edi
+    ; CHECK-NEXT: %1 = SUB32ri8 %0, 10
+    %0 = COPY %edi
+    %1 = SUB32ri8 %0, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit %eflags
+    JMP_1 %bb.1.less
+
+  bb.1.less:
+    ; CHECK:      %2 = MOV32r0
+    ; CHECK-NEXT: %eax = COPY %2
+    %2 = MOV32r0 implicit-def %eflags
+    %eax = COPY %2
+    RETQ %eax
+
+  bb.2.exit:
+    %eax = COPY %0
+    RETQ %eax
 ...
 ---
 name:            foo
@@ -78,28 +76,26 @@ registers:
   - { id: 2, class: gr32 }
   - { id: 0, class: gr32 }
   - { id: 10, class: gr32 }
-body:
-  - id:          0
-    name:        entry
-    # CHECK:      %0 = COPY %edi
-    # CHECK-NEXT: %1 = SUB32ri8 %0, 10
-    instructions:
-      - '%2 = COPY %edi'
-      - '%0 = SUB32ri8 %2, 10, implicit-def %eflags'
-      - 'JG_1 %bb.2.exit, implicit %eflags'
-      - 'JMP_1 %bb.1.less'
-  - id:          1
-    name:        less
-    # CHECK:      %2 = MOV32r0
-    # CHECK-NEXT: %eax = COPY %2
-    instructions:
-      - '%10 = MOV32r0 implicit-def %eflags'
-      - '%eax = COPY %10'
-      - 'RETQ %eax'
-  - id:          2
-    name:        exit
-    # CHECK: %eax = COPY %0
-    instructions:
-      - '%eax = COPY %2'
-      - 'RETQ %eax'
+body: |
+  bb.0.entry:
+    successors: %bb.2.exit, %bb.1.less
+    liveins: %edi
+    ; CHECK:      %0 = COPY %edi
+    ; CHECK-NEXT: %1 = SUB32ri8 %0, 10
+    %2 = COPY %edi
+    %0 = SUB32ri8 %2, 10, implicit-def %eflags
+    JG_1 %bb.2.exit, implicit %eflags
+    JMP_1 %bb.1.less
+
+  bb.1.less:
+    ; CHECK:      %2 = MOV32r0
+    ; CHECK-NEXT: %eax = COPY %2
+    %10 = MOV32r0 implicit-def %eflags
+    %eax = COPY %10
+    RETQ %eax
+
+  bb.2.exit:
+    ; CHECK: %eax = COPY %0
+    %eax = COPY %2
+    RETQ %eax
 ...
diff --git a/test/CodeGen/MIR/basic-blocks.mir b/test/CodeGen/MIR/basic-blocks.mir
deleted file mode 100644
index 17313047576b..000000000000
--- a/test/CodeGen/MIR/basic-blocks.mir
+++ /dev/null
@@ -1,49 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses machine functions correctly.
-
---- |
-
-  define i32 @foo() {
-  entry:
-    ret i32 0
-  }
-
-  define i32 @bar() {
-  start:
-    ret i32 0
-  }
-
-...
----
-# CHECK: name: foo
-# CHECK: body:
-# CHECK-NEXT: - id: 0
-# CHECK-NEXT:   name: entry
-# CHECK-NEXT:   alignment: 0
-# CHECK-NEXT:   isLandingPad: false
-# CHECK-NEXT:   addressTaken: false
-name:            foo
-body:
- - id:           0
-   name:         entry
-...
----
-# CHECK: name: bar
-# CHECK: body:
-# CHECK-NEXT: - id: 0
-# CHECK-NEXT:   name: start
-# CHECK-NEXT:   alignment: 4
-# CHECK-NEXT:   isLandingPad: false
-# CHECK-NEXT:   addressTaken: false
-# CHECK-NEXT: - id: 1
-# CHECK-NEXT:   alignment: 0
-# CHECK-NEXT:   isLandingPad: false
-# CHECK-NEXT:   addressTaken: true
-name:            bar
-body:
- - id:           0
-   name:         start
-   alignment:    4
- - id:           1
-   addressTaken: true
-...
diff --git a/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir b/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
deleted file mode 100644
index 25ae51192971..000000000000
--- a/test/CodeGen/MIR/expected-eof-after-successor-mbb.mir
+++ /dev/null
@@ -1,29 +0,0 @@
-# RUN: not llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
-
---- |
-
-  define i32 @foo(i32 %a) {
-  entry:
-    %0 = icmp sle i32 %a, 10
-    br i1 %0, label %less, label %exit
-
-  less:
-    ret i32 0
-
-  exit:
-    ret i32 %a
-  }
-
-...
----
-name:            foo
-body:
-  - id:          0
-    name:        entry
-    # CHECK: [[@LINE+1]]:46: expected end of string after the machine basic block reference
-    successors:  [ '%bb.1.less', '%bb.2.exit 2' ]
-  - id:          1
-    name:        less
-  - id:          2
-    name:        exit
-...
diff --git a/test/CodeGen/MIR/successor-basic-blocks.mir b/test/CodeGen/MIR/successor-basic-blocks.mir
deleted file mode 100644
index 3fe01e3ad438..000000000000
--- a/test/CodeGen/MIR/successor-basic-blocks.mir
+++ /dev/null
@@ -1,58 +0,0 @@
-# RUN: llc -start-after branch-folder -stop-after branch-folder -o /dev/null %s | FileCheck %s
-# This test ensures that the MIR parser parses basic block successors correctly.
-
---- |
-
-  define i32 @foo(i32 %a) {
-  entry:
-    %0 = icmp sle i32 %a, 10
-    br i1 %0, label %less, label %exit
-
-  less:
-    ret i32 0
-
-  exit:
-    ret i32 %a
-  }
-
-  define i32 @bar(i32 %a) {
-  entry:
-    %b = icmp sle i32 %a, 10
-    br i1 %b, label %0, label %1
-
-  ; <label>:0
-    ret i32 0
-
-  ; <label>:1
-    ret i32 %a
-  }
-
-...
----
-name:            foo
-body:
-  # CHECK: name: entry
-  # CHECK: successors: [ '%bb.1.less', '%bb.2.exit' ]
-  # CHECK: name: less
-  - id:          0
-    name:        entry
-    successors:  [ '%bb.1.less', '%bb.2.exit' ]
-  - id:          1
-    name:        less
-  - id:          2
-    name:        exit
-...
----
-name:            bar
-body:
-  # CHECK: name: bar
-  # CHECK: name: entry
-  # CHECK: successors: [ '%bb.1', '%bb.2' ]
-  # CHECK: id: 1
-  # CHECK: id: 2
-  - id:          0
-    name:        entry
-    successors:  [ '%bb.1', '%bb.2' ]
-  - id:          1
-  - id:          2
-...
diff --git a/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll b/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll
new file mode 100644
index 000000000000..203e5a7e1595
--- /dev/null
+++ b/test/CodeGen/Mips/Fast-ISel/check-disabled-mcpus.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=mips -mcpu=mips2 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips3 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips4 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; RUN: llc -march=mips -mcpu=mips32r6 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; RUN: llc -march=mips -mcpu=mips64 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r2 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r3 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips64r5 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+; RUN: llc -march=mips -mcpu=mips32r6 -O0 -relocation-model=pic \
+; RUN:     -fast-isel-verbose <%s 2>&1 | FileCheck %s
+
+; CHECK: FastISel missed terminator:   ret i32 0
+
+define i32 @foo() {
+entry:
+  ret i32 0
+}
diff --git a/test/CodeGen/Mips/addi.ll b/test/CodeGen/Mips/addi.ll
index b6af2ee45687..f1db843caf64 100644
--- a/test/CodeGen/Mips/addi.ll
+++ b/test/CodeGen/Mips/addi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 6, align 4
 @j = global i32 12, align 4
diff --git a/test/CodeGen/Mips/adjust-callstack-sp.ll b/test/CodeGen/Mips/adjust-callstack-sp.ll
index 8c61a650a962..e4afcd835005 100644
--- a/test/CodeGen/Mips/adjust-callstack-sp.ll
+++ b/test/CodeGen/Mips/adjust-callstack-sp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=M16
+; RUN: llc < %s -march=mips -mattr=mips16 | FileCheck %s -check-prefix=M16
 ; RUN: llc < %s -march=mips -mcpu=mips2 | FileCheck %s -check-prefix=GP32
 ; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s -check-prefix=GP32
 ; RUN: llc < %s -march=mips -mcpu=mips32r6 | FileCheck %s -check-prefix=GP32
diff --git a/test/CodeGen/Mips/align16.ll b/test/CodeGen/Mips/align16.ll
index f385adfaa04c..ac6685dd5524 100644
--- a/test/CodeGen/Mips/align16.ll
+++ b/test/CodeGen/Mips/align16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 25, align 4
 @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/alloca16.ll b/test/CodeGen/Mips/alloca16.ll
index be8cc740310b..d728d3bb0b7b 100644
--- a/test/CodeGen/Mips/alloca16.ll
+++ b/test/CodeGen/Mips/alloca16.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 25, align 4
 @jjjj = global i32 35, align 4
diff --git a/test/CodeGen/Mips/and1.ll b/test/CodeGen/Mips/and1.ll
index 57076a4d4fcf..a2bf4f080a07 100644
--- a/test/CodeGen/Mips/and1.ll
+++ b/test/CodeGen/Mips/and1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @x = global i32 65504, align 4
 @y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/asm-large-immediate.ll b/test/CodeGen/Mips/asm-large-immediate.ll
index 246fff615edb..c75b9e4ad12b 100644
--- a/test/CodeGen/Mips/asm-large-immediate.ll
+++ b/test/CodeGen/Mips/asm-large-immediate.ll
@@ -1,4 +1,5 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
+; RUN: llc -march=mipsel -no-integrated-as < %s | FileCheck %s
+
 define void @test() {
 entry:
 ; CHECK: /* result: 68719476738 */
diff --git a/test/CodeGen/Mips/atomicops.ll b/test/CodeGen/Mips/atomicops.ll
index 0ff9f5c22a84..18a48ca5023b 100644
--- a/test/CodeGen/Mips/atomicops.ll
+++ b/test/CodeGen/Mips/atomicops.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [8 x i8] c"%d, %d\0A\00", align 1
 
diff --git a/test/CodeGen/Mips/beqzc.ll b/test/CodeGen/Mips/beqzc.ll
index 37bece884212..c0845f7185e1 100644
--- a/test/CodeGen/Mips/beqzc.ll
+++ b/test/CodeGen/Mips/beqzc.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/beqzc1.ll b/test/CodeGen/Mips/beqzc1.ll
index 1f5575f099fa..144983513edf 100644
--- a/test/CodeGen/Mips/beqzc1.ll
+++ b/test/CodeGen/Mips/beqzc1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/br-jmp.ll b/test/CodeGen/Mips/br-jmp.ll
index 9ca8d159614f..5e94c755c969 100644
--- a/test/CodeGen/Mips/br-jmp.ll
+++ b/test/CodeGen/Mips/br-jmp.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC
 ; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 define void @count(i32 %x, i32 %y, i32 %z) noreturn nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/brconeq.ll b/test/CodeGen/Mips/brconeq.ll
index f555528bbb64..7c3c31e0ec3c 100644
--- a/test/CodeGen/Mips/brconeq.ll
+++ b/test/CodeGen/Mips/brconeq.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconeqk.ll b/test/CodeGen/Mips/brconeqk.ll
index 59edae82e5ad..85d257e8d797 100644
--- a/test/CodeGen/Mips/brconeqk.ll
+++ b/test/CodeGen/Mips/brconeqk.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconeqz.ll b/test/CodeGen/Mips/brconeqz.ll
index 22c566407528..cf1beed49bb4 100644
--- a/test/CodeGen/Mips/brconeqz.ll
+++ b/test/CodeGen/Mips/brconeqz.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconge.ll b/test/CodeGen/Mips/brconge.ll
index 46d19847d9bc..f3f059ff2d54 100644
--- a/test/CodeGen/Mips/brconge.ll
+++ b/test/CodeGen/Mips/brconge.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brcongt.ll b/test/CodeGen/Mips/brcongt.ll
index cefacb8318b0..7dffdb411211 100644
--- a/test/CodeGen/Mips/brcongt.ll
+++ b/test/CodeGen/Mips/brcongt.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconle.ll b/test/CodeGen/Mips/brconle.ll
index e1f15ecb6b92..99599f84db17 100644
--- a/test/CodeGen/Mips/brconle.ll
+++ b/test/CodeGen/Mips/brconle.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 -5, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconlt.ll b/test/CodeGen/Mips/brconlt.ll
index 049f35c393fe..487018c22f26 100644
--- a/test/CodeGen/Mips/brconlt.ll
+++ b/test/CodeGen/Mips/brconlt.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/brconne.ll b/test/CodeGen/Mips/brconne.ll
index b260320b94e1..e0cbe378fe3c 100644
--- a/test/CodeGen/Mips/brconne.ll
+++ b/test/CodeGen/Mips/brconne.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 5, align 4
 @j = global i32 5, align 4
diff --git a/test/CodeGen/Mips/brconnek.ll b/test/CodeGen/Mips/brconnek.ll
index 778a5cce72b3..0b9234fe3b9d 100644
--- a/test/CodeGen/Mips/brconnek.ll
+++ b/test/CodeGen/Mips/brconnek.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brconnez.ll b/test/CodeGen/Mips/brconnez.ll
index 754714b21daf..27cf9e8cacb8 100644
--- a/test/CodeGen/Mips/brconnez.ll
+++ b/test/CodeGen/Mips/brconnez.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 0, align 4
 @result = global i32 0, align 4
diff --git a/test/CodeGen/Mips/brind.ll b/test/CodeGen/Mips/brind.ll
index a3e9b8011a2b..ed2c3b3dddb7 100644
--- a/test/CodeGen/Mips/brind.ll
+++ b/test/CodeGen/Mips/brind.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @main.L = internal unnamed_addr constant [5 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* blockaddress(@main, %L3), i8* blockaddress(@main, %L4), i8* null], align 4
 @str = private unnamed_addr constant [2 x i8] c"A\00"
diff --git a/test/CodeGen/Mips/brsize3.ll b/test/CodeGen/Mips/brsize3.ll
index dad0d841d4c6..1e76879409c6 100644
--- a/test/CodeGen/Mips/brsize3.ll
+++ b/test/CodeGen/Mips/brsize3.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-no-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-no-short
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-long
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-long
 
 ; ModuleID = 'brsize3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/brsize3a.ll b/test/CodeGen/Mips/brsize3a.ll
index e1cd5893ceda..24516018b9b7 100644
--- a/test/CodeGen/Mips/brsize3a.ll
+++ b/test/CodeGen/Mips/brsize3a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=b-short
 
 ; ModuleID = 'brsize3.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/cconv/arguments-varargs.ll b/test/CodeGen/Mips/cconv/arguments-varargs.ll
index af217c92dab8..d1a196738aee 100644
--- a/test/CodeGen/Mips/cconv/arguments-varargs.ll
+++ b/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -55,7 +55,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -89,7 +89,7 @@ entry:
 
 ; ALL-DAG:       sh [[ARG1]], 2([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -117,12 +117,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -173,7 +173,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -207,7 +207,7 @@ entry:
 
 ; ALL-DAG:       sw [[ARG1]], 4([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -235,12 +235,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -291,7 +291,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]] (and realign pointer for O32)
 ; O32:           lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -328,7 +328,7 @@ entry:
 ; NEW-DAG:       ld [[ARG1:\$[0-9]+]], 0([[VA]])
 ; NEW-DAG:       sd [[ARG1]], 8([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -362,12 +362,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
@@ -418,7 +418,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -452,7 +452,7 @@ entry:
 
 ; ALL-DAG:       sh [[ARG1]], 2([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -480,12 +480,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -536,7 +536,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -570,7 +570,7 @@ entry:
 
 ; ALL-DAG:       sw [[ARG1]], 4([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -598,12 +598,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -654,7 +654,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]] (and realign pointer for O32)
 ; O32:           lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -691,7 +691,7 @@ entry:
 ; NEW-DAG:       ld [[ARG1:\$[0-9]+]], 0([[VA]])
 ; NEW-DAG:       sd [[ARG1]], 8([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -725,12 +725,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
@@ -780,7 +780,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -814,7 +814,7 @@ entry:
 
 ; ALL-DAG:       sh [[ARG1]], 2([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -842,12 +842,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -897,7 +897,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]]
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -931,7 +931,7 @@ entry:
 
 ; ALL-DAG:       sw [[ARG1]], 4([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; O32-DAG:       lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -959,12 +959,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -1014,7 +1014,7 @@ entry:
 ; Store [[VA]]
 ; O32-DAG:       sw [[VA]], 0([[SP]])
 
-; ALL: # ANCHOR1
+; ALL: teqi $zero, 1
 
 ; Increment [[VA]] (and realign pointer for O32)
 ; O32:           lw [[VA:\$[0-9]+]], 0([[SP]])
@@ -1051,7 +1051,7 @@ entry:
 ; NEW-DAG:       ld [[ARG1:\$[0-9]+]], 0([[VA]])
 ; NEW-DAG:       sd [[ARG1]], 8([[GV]])
 
-; ALL: # ANCHOR2
+; ALL: teqi $zero, 2
 
 ; Increment [[VA]] again.
 ; FIXME: We're still aligned from the last one but CodeGen doesn't spot that.
@@ -1085,12 +1085,12 @@ entry:
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "# ANCHOR1", ""()
+  call void asm sideeffect "teqi $$zero, 1", ""()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "# ANCHOR2", ""()
+  call void asm sideeffect "teqi $$zero, 2", ""()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
diff --git a/test/CodeGen/Mips/ci2.ll b/test/CodeGen/Mips/ci2.ll
index 4687748879ac..bb16fa83fc5c 100644
--- a/test/CodeGen/Mips/ci2.ll
+++ b/test/CodeGen/Mips/ci2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=constisle
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=constisle
 
 @i = common global i32 0, align 4
 @b = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/cmplarge.ll b/test/CodeGen/Mips/cmplarge.ll
index 79019065a905..1ca5b921e0bc 100644
--- a/test/CodeGen/Mips/cmplarge.ll
+++ b/test/CodeGen/Mips/cmplarge.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=cmp16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=cmp16
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
 target triple = "mipsel--linux-gnu"
diff --git a/test/CodeGen/Mips/const1.ll b/test/CodeGen/Mips/const1.ll
index 1a5d58bd3f95..2bcd405179b2 100644
--- a/test/CodeGen/Mips/const1.ll
+++ b/test/CodeGen/Mips/const1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands < %s | FileCheck %s 
 
 ; ModuleID = 'const1.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const4a.ll b/test/CodeGen/Mips/const4a.ll
index c31e54a01036..d1182d7fc6ec 100644
--- a/test/CodeGen/Mips/const4a.ll
+++ b/test/CodeGen/Mips/const4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
 
 ; ModuleID = 'const4.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const6.ll b/test/CodeGen/Mips/const6.ll
index 49e98ea78703..c576f573a43b 100644
--- a/test/CodeGen/Mips/const6.ll
+++ b/test/CodeGen/Mips/const6.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands -mips-constant-islands-no-load-relaxation  < %s | FileCheck %s -check-prefix=no-load-relax
 
 ; ModuleID = 'const6.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/const6a.ll b/test/CodeGen/Mips/const6a.ll
index 54a3f2234dc2..653cdeb920f3 100644
--- a/test/CodeGen/Mips/const6a.ll
+++ b/test/CodeGen/Mips/const6a.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax1
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=load-relax
 
 ; ModuleID = 'const6a.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/div.ll b/test/CodeGen/Mips/div.ll
index 731841c554fa..92258bce02d2 100644
--- a/test/CodeGen/Mips/div.ll
+++ b/test/CodeGen/Mips/div.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 100, align 4
 @jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/div_rem.ll b/test/CodeGen/Mips/div_rem.ll
index e64529cee841..be1e001a24c3 100644
--- a/test/CodeGen/Mips/div_rem.ll
+++ b/test/CodeGen/Mips/div_rem.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 103, align 4
 @jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/divu.ll b/test/CodeGen/Mips/divu.ll
index 5bc765a71eb9..ce1b70cacf6f 100644
--- a/test/CodeGen/Mips/divu.ll
+++ b/test/CodeGen/Mips/divu.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 100, align 4
 @jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/divu_remu.ll b/test/CodeGen/Mips/divu_remu.ll
index a079440b913f..0e094cbe48ae 100644
--- a/test/CodeGen/Mips/divu_remu.ll
+++ b/test/CodeGen/Mips/divu_remu.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 103, align 4
 @jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/eh.ll b/test/CodeGen/Mips/eh.ll
index 2f843d9da9a6..19f3d4d23d64 100644
--- a/test/CodeGen/Mips/eh.ll
+++ b/test/CodeGen/Mips/eh.ll
@@ -24,7 +24,7 @@ entry:
 
 lpad:                                             ; preds = %entry
 ; CHECK-EL:  # %lpad
-; CHECK-EL:  bne $5
+; CHECK-EL:  beq $5
 
   %exn.val = landingpad { i8*, i32 }
            cleanup
diff --git a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
index 58dd16c9f9c8..54092b4e3ebe 100644
--- a/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
+++ b/test/CodeGen/Mips/emergency-spill-slot-near-fp.ll
@@ -1,5 +1,5 @@
 ; Check that register scavenging spill slot is close to $fp.
-; RUN: llc -march=mipsel -O0 -fast-isel=false < %s | FileCheck %s
+; RUN: llc -march=mipsel -O0 < %s | FileCheck %s
 
 ; CHECK: sw ${{.*}}, 8($sp)
 ; CHECK: lw ${{.*}}, 8($sp)
@@ -31,4 +31,4 @@ entry:
   ret i32 0
 }
 
-attributes #0 = { noinline optnone "no-frame-pointer-elim"="true" }
+attributes #0 = { noinline "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/Mips/emutls_generic.ll b/test/CodeGen/Mips/emutls_generic.ll
new file mode 100644
index 000000000000..a6cf23aa67ff
--- /dev/null
+++ b/test/CodeGen/Mips/emutls_generic.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -emulated-tls -mtriple=mipsel-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=MIPS_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=mips64el-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=MIPS_64 %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+  ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+  ret i64* @internal_y
+}
+
+; MIPS_32-LABEL: get_external_y:
+; MIPS_32-LABEL: get_internal_y:
+; MIPS_32:     lw {{.+}}(__emutls_v.internal_y
+; MIPS_32:     lw {{.+}}call16(__emutls_get_address
+; MIPS_32-NOT:  __emutls_t.external_x
+; MIPS_32-NOT:  __emutls_v.external_x:
+; MIPS_32:       .data
+; MIPS_32:       .align 2
+; MIPS_32-LABEL: __emutls_v.external_y:
+; MIPS_32:       .section .rodata,
+; MIPS_32-LABEL: __emutls_t.external_y:
+; MIPS_32-NEXT:  .byte 7
+; MIPS_32:       .data
+; MIPS_32:       .align 2
+; MIPS_32-LABEL: __emutls_v.internal_y:
+; MIPS_32-NEXT:  .4byte 8
+; MIPS_32-NEXT:  .4byte 16
+; MIPS_32-NEXT:  .4byte 0
+; MIPS_32-NEXT:  .4byte __emutls_t.internal_y
+; MIPS_32-LABEL: __emutls_t.internal_y:
+; MIPS_32-NEXT:  .8byte 9
+
+; MIPS_64-LABEL: get_external_x:
+; MIPS_64-LABEL: get_external_y:
+; MIPS_64-LABEL: get_internal_y:
+; MIPS_64:     ld {{.+}}(__emutls_v.internal_y
+; MIPS_64:     ld {{.+}}call16(__emutls_get_address
+; MIPS_64-NOT:  __emutls_t.external_x
+; MIPS_64-NOT:  __emutls_v.external_x:
+; MIPS_64-LABEL: __emutls_v.external_y:
+; MIPS_64-NOT:   __emutls_v.external_x:
+; MIPS_64:       .section .rodata,
+; MIPS_64-LABEL: __emutls_t.external_y:
+; MIPS_64-NEXT:  .byte 7
+; MIPS_64:       .data
+; MIPS_64:       .align 3
+; MIPS_64-LABEL: __emutls_v.internal_y:
+; MIPS_64-NEXT:  .8byte 8
+; MIPS_64-NEXT:  .8byte 16
+; MIPS_64-NEXT:  .8byte 0
+; MIPS_64-NEXT:  .8byte __emutls_t.internal_y
+; MIPS_64:       .section .rodata,
+; MIPS_64-LABEL: __emutls_t.internal_y:
+; MIPS_64-NEXT:  .8byte 9
diff --git a/test/CodeGen/Mips/ex2.ll b/test/CodeGen/Mips/ex2.ll
index 7547fdf81e35..87fe77035ec2 100644
--- a/test/CodeGen/Mips/ex2.ll
+++ b/test/CodeGen/Mips/ex2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 @_ZTIPKc = external constant i8*
diff --git a/test/CodeGen/Mips/extins.ll b/test/CodeGen/Mips/extins.ll
index 6604f89b1843..0b327a91bbfd 100644
--- a/test/CodeGen/Mips/extins.ll
+++ b/test/CodeGen/Mips/extins.ll
@@ -1,5 +1,5 @@
 ; RUN: llc  < %s -march=mips -mcpu=mips32r2 | FileCheck %s -check-prefix=32R2
-; RUN: llc  < %s -march=mips -mcpu=mips16 | FileCheck %s -check-prefix=16
+; RUN: llc  < %s -march=mips -mattr=mips16 | FileCheck %s -check-prefix=16
 
 define i32 @ext0_5_9(i32 %s, i32 %pos, i32 %sz) nounwind readnone {
 entry:
diff --git a/test/CodeGen/Mips/f16abs.ll b/test/CodeGen/Mips/f16abs.ll
index 838983274e9b..6c33e011719e 100644
--- a/test/CodeGen/Mips/f16abs.ll
+++ b/test/CodeGen/Mips/f16abs.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=static
 
 @y = global double -1.450000e+00, align 8
 @x = common global double 0.000000e+00, align 8
diff --git a/test/CodeGen/Mips/fixdfsf.ll b/test/CodeGen/Mips/fixdfsf.ll
index 869579922d51..5eb336bf6499 100644
--- a/test/CodeGen/Mips/fixdfsf.ll
+++ b/test/CodeGen/Mips/fixdfsf.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic2
 
 @x = common global double 0.000000e+00, align 8
 @y = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/fp16instrinsmc.ll b/test/CodeGen/Mips/fp16instrinsmc.ll
index 797be2668d40..258b1bf97d7b 100644
--- a/test/CodeGen/Mips/fp16instrinsmc.ll
+++ b/test/CodeGen/Mips/fp16instrinsmc.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=1010111 -mips-os16 < %s | FileCheck %s -check-prefix=fmask
 
 @x = global float 1.500000e+00, align 4
 @xn = global float -1.900000e+01, align 4
diff --git a/test/CodeGen/Mips/fp16mix.ll b/test/CodeGen/Mips/fp16mix.ll
index a94f838fb675..d97759422cf9 100644
--- a/test/CodeGen/Mips/fp16mix.ll
+++ b/test/CodeGen/Mips/fp16mix.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=10 -mips-os16 < %s | FileCheck %s -check-prefix=fmask1
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=01 -mips-os16 < %s | FileCheck %s -check-prefix=fmask2
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -mips32-function-mask=10. -mips-os16 < %s | FileCheck %s -check-prefix=fmask1nr
 
 ; Function Attrs: nounwind optsize readnone
 define void @foo1()  {
diff --git a/test/CodeGen/Mips/fp16static.ll b/test/CodeGen/Mips/fp16static.ll
index 4e5059ed39e9..341ecf02cb5c 100644
--- a/test/CodeGen/Mips/fp16static.ll
+++ b/test/CodeGen/Mips/fp16static.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 @x = common global float 0.000000e+00, align 4
 
diff --git a/test/CodeGen/Mips/helloworld.ll b/test/CodeGen/Mips/helloworld.ll
index a0dbdf3afd47..f715313354ea 100644
--- a/test/CodeGen/Mips/helloworld.ll
+++ b/test/CodeGen/Mips/helloworld.ll
@@ -1,10 +1,10 @@
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=C2
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PE
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST1
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s | FileCheck %s -check-prefix=ST2
 ;
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR
 ; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32  -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=SR32
 
 
diff --git a/test/CodeGen/Mips/hf16_1.ll b/test/CodeGen/Mips/hf16_1.ll
index 103fd2d7fd63..aea241e27195 100644
--- a/test/CodeGen/Mips/hf16_1.ll
+++ b/test/CodeGen/Mips/hf16_1.ll
@@ -1,5 +1,5 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=1
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=2
 
 
 @x = common global float 0.000000e+00, align 4
diff --git a/test/CodeGen/Mips/hf16call32.ll b/test/CodeGen/Mips/hf16call32.ll
index 3b3f8f799111..9fc94cac5175 100644
--- a/test/CodeGen/Mips/hf16call32.ll
+++ b/test/CodeGen/Mips/hf16call32.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
 
 @x = common global float 0.000000e+00, align 4
 @y = common global float 0.000000e+00, align 4
@@ -751,280 +751,280 @@ land.end289:                                      ; preds = %land.rhs286, %land.
 }
 
 declare void @v_sf(float) #1
-; stel: .section	.mips16.call.fp.v_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_sf
-; stel:	mtc1 $4,$f12
-; stel:	lui  $25,%hi(v_sf)
-; stel:	addiu  $25,$25,%lo(v_sf)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_sf
+; stel: .section .mips16.call.fp.v_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf
+; stel: mtc1 $4, $f12
+; stel: lui $25, %hi(v_sf)
+; stel: addiu $25, $25, %lo(v_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf
 
 declare i32 @printf(i8*, ...) #1
 
 declare void @v_df(double) #1
-; stel: .section	.mips16.call.fp.v_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_df
+; stel: .section .mips16.call.fp.v_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df
 ; stel: #APP
-; setl: .set reorder
-; stel:	mtc1 $4,$f12
-; stel:	mtc1 $5,$f13
-; stel:	lui  $25,%hi(v_df)
-; stel:	addiu  $25,$25,%lo(v_df)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_df
+; stel: .set reorder
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: lui $25, %hi(v_df)
+; stel: addiu $25, $25, %lo(v_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df
 
 declare void @v_sf_sf(float, float) #1
-; stel: .section	.mips16.call.fp.v_sf_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_sf_sf
-; stel:	mtc1 $4,$f12
-; stel:	mtc1 $5,$f14
-; stel:	lui  $25,%hi(v_sf_sf)
-; stel:	addiu  $25,$25,%lo(v_sf_sf)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_sf_sf
+; stel: .section .mips16.call.fp.v_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: lui $25, %hi(v_sf_sf)
+; stel: addiu $25, $25, %lo(v_sf_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf_sf
 
 declare void @v_sf_df(float, double) #1
-; stel: .section	.mips16.call.fp.v_sf_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_sf_df
-; stel:	mtc1 $4,$f12
-; stel:	mtc1 $6,$f14
-; stel:	mtc1 $7,$f15
-; stel:	lui  $25,%hi(v_sf_df)
-; stel:	addiu  $25,$25,%lo(v_sf_df)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_sf_df
+; stel: .section .mips16.call.fp.v_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: lui $25, %hi(v_sf_df)
+; stel: addiu $25, $25, %lo(v_sf_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_sf_df
 
 declare void @v_df_sf(double, float) #1
-; stel: .section	.mips16.call.fp.v_df_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_df_sf
-; stel:	mtc1 $4,$f12
-; stel:	mtc1 $5,$f13
-; stel:	mtc1 $6,$f14
-; stel:	lui  $25,%hi(v_df_sf)
-; stel:	addiu  $25,$25,%lo(v_df_sf)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_df_sf
+; stel: .section .mips16.call.fp.v_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: lui $25, %hi(v_df_sf)
+; stel: addiu $25, $25, %lo(v_df_sf)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df_sf
 
 declare void @v_df_df(double, double) #1
-; stel: .section	.mips16.call.fp.v_df_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_v_df_df
-; stel:	mtc1 $4,$f12
-; stel:	mtc1 $5,$f13
-; stel:	mtc1 $6,$f14
-; stel:	mtc1 $7,$f15
-; stel:	lui  $25,%hi(v_df_df)
-; stel:	addiu  $25,$25,%lo(v_df_df)
-; stel:	jr $25
-; stel:	.end	__call_stub_fp_v_df_df
+; stel: .section .mips16.call.fp.v_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_v_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: lui $25, %hi(v_df_df)
+; stel: addiu $25, $25, %lo(v_df_df)
+; stel: jr $25
+; stel: .end __call_stub_fp_v_df_df
 
 declare float @sf_v() #1
-; stel: .section	.mips16.call.fp.sf_v,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_v
+; stel: .section .mips16.call.fp.sf_v,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_v
 ; stel: move $18, $31
 ; stel: jal sf_v
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_v
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_v
 
 declare float @sf_sf(float) #1
-; stel: .section	.mips16.call.fp.sf_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_sf
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf
 
 declare float @sf_df(double) #1
-; stel: .section	.mips16.call.fp.sf_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_df
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df
 
 declare float @sf_sf_sf(float, float) #1
-; stel: .section	.mips16.call.fp.sf_sf_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_sf_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_sf_sf
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_sf_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf_sf
 
 declare float @sf_sf_df(float, double) #1
-; stel: .section	.mips16.call.fp.sf_sf_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_sf_df
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_sf_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_sf_df
 
 declare float @sf_df_sf(double, float) #1
-; stel: .section	.mips16.call.fp.sf_df_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_df_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_df_sf
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_df_sf
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df_sf
 
 declare float @sf_df_df(double, double) #1
-; stel: .section	.mips16.call.fp.sf_df_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sf_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sf_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_sf_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
 ; stel: jal sf_df_df
-; stel:	mfc1 $2,$f0
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sf_df_df
+; stel: mfc1 $2, $f0
+; stel: jr $18
+; stel: .end __call_stub_fp_sf_df_df
 
 declare double @df_v() #1
-; stel: .section	.mips16.call.fp.df_v,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_v
+; stel: .section .mips16.call.fp.df_v,"ax",@progbits
+; stel: .ent __call_stub_fp_df_v
 ; stel: move $18, $31
 ; stel: jal df_v
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_v
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_v
 
 declare double @df_sf(float) #1
-; stel: .section	.mips16.call.fp.df_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_sf
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf
 
 declare double @df_df(double) #1
-; stel: .section	.mips16.call.fp.df_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_df
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df
 
 declare double @df_sf_sf(float, float) #1
-; stel: .section	.mips16.call.fp.df_sf_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_sf_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f14
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_sf_sf
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_sf_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf_sf
 
 declare double @df_sf_df(float, double) #1
-; stel: .section	.mips16.call.fp.df_sf_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_sf_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_sf_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_sf_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_sf_df
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_sf_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_sf_df
 
 declare double @df_df_sf(double, float) #1
-; stel: .section	.mips16.call.fp.df_df_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_df_sf
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df_sf
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_df_sf
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_df_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df_sf
 
 declare double @df_df_df(double, double) #1
-; stel: .section	.mips16.call.fp.df_df_df,"ax",@progbits
-; stel:	.ent	__call_stub_fp_df_df_df
-; stel: mtc1 $4,$f12
-; stel: mtc1 $5,$f13
-; stel: mtc1 $6,$f14
-; stel: mtc1 $7,$f15
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.df_df_df,"ax",@progbits
+; stel: .ent __call_stub_fp_df_df_df
+; stel: mtc1 $4, $f12
+; stel: mtc1 $5, $f13
+; stel: mtc1 $6, $f14
+; stel: mtc1 $7, $f15
+; stel: move $18, ${{31|ra}}
 ; stel: jal df_df_df
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_df_df_df
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_df_df_df
 
 declare { float, float } @sc_v() #1
-; stel: .section	.mips16.call.fp.sc_v,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sc_v
+; stel: .section .mips16.call.fp.sc_v,"ax",@progbits
+; stel: .ent __call_stub_fp_sc_v
 ; stel: move $18, $31
 ; stel: jal sc_v
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f2
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sc_v
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f2
+; stel: jr $18
+; stel: .end __call_stub_fp_sc_v
 
 declare { float, float } @sc_sf(float) #1
-; stel: .section	.mips16.call.fp.sc_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_sc_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.sc_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_sc_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
 ; stel: jal sc_sf
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f2
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_sc_sf
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f2
+; stel: jr $18
+; stel: .end __call_stub_fp_sc_sf
 
 declare { double, double } @dc_v() #1
-; stel: .section	.mips16.call.fp.dc_v,"ax",@progbits
-; stel:	.ent	__call_stub_fp_dc_v
+; stel: .section .mips16.call.fp.dc_v,"ax",@progbits
+; stel: .ent __call_stub_fp_dc_v
 ; stel: move $18, $31
 ; stel: jal dc_v
-; stel:	mfc1 $4,$f2
-; stel:	mfc1 $5,$f3
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_dc_v
+; stel: mfc1 $4, $f2
+; stel: mfc1 $5, $f3
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_dc_v
 
 declare { double, double } @dc_sf(float) #1
-; stel: .section	.mips16.call.fp.dc_sf,"ax",@progbits
-; stel:	.ent	__call_stub_fp_dc_sf
-; stel: mtc1 $4,$f12
-; stel: move $18, $31
+; stel: .section .mips16.call.fp.dc_sf,"ax",@progbits
+; stel: .ent __call_stub_fp_dc_sf
+; stel: mtc1 $4, $f12
+; stel: move $18, ${{31|ra}}
 ; stel: jal dc_sf
-; stel:	mfc1 $4,$f2
-; stel:	mfc1 $5,$f3
-; stel:	mfc1 $2,$f0
-; stel:	mfc1 $3,$f1
-; stel:	jr $18
-; stel:	.end	__call_stub_fp_dc_sf
+; stel: mfc1 $4, $f2
+; stel: mfc1 $5, $f3
+; stel: mfc1 $2, $f0
+; stel: mfc1 $3, $f1
+; stel: jr $18
+; stel: .end __call_stub_fp_dc_sf
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf16call32_body.ll b/test/CodeGen/Mips/hf16call32_body.ll
index d06256cc564f..1a04fd46f8bd 100644
--- a/test/CodeGen/Mips/hf16call32_body.ll
+++ b/test/CodeGen/Mips/hf16call32_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=stel
 
 @x = external global float
 @xd = external global double
@@ -18,13 +18,13 @@ entry:
   store float %0, float* @x, align 4
   ret void
 }
-; stel: .section	.mips16.fn.v_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_v_sf
-; stel:		la $25,v_sf
-; stel:		mfc1 $4,$f12
-; stel:		jr $25
-; stel:		__fn_local_v_sf = v_sf
-; stel:	.end	__fn_stub_v_sf
+; stel: .section .mips16.fn.v_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_sf
+; stel: la $25, v_sf
+; stel: mfc1 $4, $f12
+; stel: jr $25
+; stel: __fn_local_v_sf = v_sf
+; stel: .end __fn_stub_v_sf
 
 declare i32 @printf(i8*, ...) #1
 
@@ -38,14 +38,14 @@ entry:
   ret void
 }
 
-; stel: .section	.mips16.fn.v_df,"ax",@progbits
-; stel:	.ent	__fn_stub_v_df
-; stel:		la $25,v_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		jr $25
-; stel:		__fn_local_v_df = v_df
-; stel:	.end	__fn_stub_v_df
+; stel: .section .mips16.fn.v_df,"ax",@progbits
+; stel: .ent __fn_stub_v_df
+; stel: la $25, v_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: jr $25
+; stel: __fn_local_v_df = v_df
+; stel: .end __fn_stub_v_df
 
 ; Function Attrs: nounwind
 define void @v_sf_sf(float %p1, float %p2) #0 {
@@ -61,14 +61,14 @@ entry:
   ret void
 }
 
-; stel: .section	.mips16.fn.v_sf_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_v_sf_sf
-; stel:		la $25,v_sf_sf
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f14
-; stel:		jr $25
-; stel:		__fn_local_v_sf_sf = v_sf_sf
-; stel:	.end	__fn_stub_v_sf_sf
+; stel: .section .mips16.fn.v_sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_sf_sf
+; stel: la $25, v_sf_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f14
+; stel: jr $25
+; stel: __fn_local_v_sf_sf = v_sf_sf
+; stel: .end __fn_stub_v_sf_sf
 
 ; Function Attrs: nounwind
 define void @v_sf_df(float %p1, double %p2) #0 {
@@ -84,15 +84,15 @@ entry:
   ret void
 }
 
-; stel: .section	.mips16.fn.v_sf_df,"ax",@progbits
-; stel:	.ent	__fn_stub_v_sf_df
-; stel:		la $25,v_sf_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $6,$f14
-; stel:		mfc1 $7,$f15
-; stel:		jr $25
-; stel:		__fn_local_v_sf_df = v_sf_df
-; stel:	.end	__fn_stub_v_sf_df
+; stel: .section .mips16.fn.v_sf_df,"ax",@progbits
+; stel: .ent __fn_stub_v_sf_df
+; stel: la $25, v_sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_v_sf_df = v_sf_df
+; stel: .end __fn_stub_v_sf_df
 
 ; Function Attrs: nounwind
 define void @v_df_sf(double %p1, float %p2) #0 {
@@ -108,15 +108,15 @@ entry:
   ret void
 }
 
-; stel: .section	.mips16.fn.v_df_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_v_df_sf
-; stel:		la $25,v_df_sf
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		mfc1 $6,$f14
-; stel:		jr $25
-; stel:		__fn_local_v_df_sf = v_df_sf
-; stel:	.end	__fn_stub_v_df_sf
+; stel: .section .mips16.fn.v_df_sf,"ax",@progbits
+; stel: .ent __fn_stub_v_df_sf
+; stel: la $25, v_df_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: jr $25
+; stel: __fn_local_v_df_sf = v_df_sf
+; stel: .end __fn_stub_v_df_sf
 
 ; Function Attrs: nounwind
 define void @v_df_df(double %p1, double %p2) #0 {
@@ -132,16 +132,16 @@ entry:
   ret void
 }
 
-; stel: .section	.mips16.fn.v_df_df,"ax",@progbits
-; stel:	.ent	__fn_stub_v_df_df
-; stel:		la $25,v_df_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		mfc1 $6,$f14
-; stel:		mfc1 $7,$f15
-; stel:		jr $25
-; stel:		__fn_local_v_df_df = v_df_df
-; stel:	.end	__fn_stub_v_df_df
+; stel: .section .mips16.fn.v_df_df,"ax",@progbits
+; stel: .ent __fn_stub_v_df_df
+; stel: la $25, v_df_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_v_df_df = v_df_df
+; stel: .end __fn_stub_v_df_df
 
 ; Function Attrs: nounwind
 define float @sf_v() #0 {
@@ -162,13 +162,13 @@ entry:
 }
 
 
-; stel: .section	.mips16.fn.sf_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_sf
-; stel:		la $25,sf_sf
-; stel:		mfc1 $4,$f12
-; stel:		jr $25
-; stel:		__fn_local_sf_sf = sf_sf
-; stel:	.end	__fn_stub_sf_sf
+; stel: .section .mips16.fn.sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf
+; stel: la $25, sf_sf
+; stel: mfc1 $4, $f12
+; stel: jr $25
+; stel: __fn_local_sf_sf = sf_sf
+; stel: .end __fn_stub_sf_sf
 
 
 ; Function Attrs: nounwind
@@ -182,14 +182,14 @@ entry:
   ret float %1
 }
 
-; stel: .section	.mips16.fn.sf_df,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_df
-; stel:		la $25,sf_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		jr $25
-; stel:		__fn_local_sf_df = sf_df
-; stel:	.end	__fn_stub_sf_df
+; stel: .section .mips16.fn.sf_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_df
+; stel: la $25, sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: jr $25
+; stel: __fn_local_sf_df = sf_df
+; stel: .end __fn_stub_sf_df
 
 ; Function Attrs: nounwind
 define float @sf_sf_sf(float %p1, float %p2) #0 {
@@ -206,14 +206,14 @@ entry:
   ret float %2
 }
 
-; stel: .section	.mips16.fn.sf_sf_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_sf_sf
-; stel:		la $25,sf_sf_sf
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f14
-; stel:		jr $25
-; stel:		__fn_local_sf_sf_sf = sf_sf_sf
-; stel:	.end	__fn_stub_sf_sf_sf
+; stel: .section .mips16.fn.sf_sf_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf_sf
+; stel: la $25, sf_sf_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f14
+; stel: jr $25
+; stel: __fn_local_sf_sf_sf = sf_sf_sf
+; stel: .end __fn_stub_sf_sf_sf
 
 ; Function Attrs: nounwind
 define float @sf_sf_df(float %p1, double %p2) #0 {
@@ -230,15 +230,15 @@ entry:
   ret float %2
 }
 
-; stel: .section	.mips16.fn.sf_sf_df,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_sf_df
-; stel:		la $25,sf_sf_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $6,$f14
-; stel:		mfc1 $7,$f15
-; stel:		jr $25
-; stel:		__fn_local_sf_sf_df = sf_sf_df
-; stel:	.end	__fn_stub_sf_sf_df
+; stel: .section .mips16.fn.sf_sf_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_sf_df
+; stel: la $25, sf_sf_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_sf_sf_df = sf_sf_df
+; stel: .end __fn_stub_sf_sf_df
 
 ; Function Attrs: nounwind
 define float @sf_df_sf(double %p1, float %p2) #0 {
@@ -255,15 +255,15 @@ entry:
   ret float %2
 }
 
-; stel: .section	.mips16.fn.sf_df_sf,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_df_sf
-; stel:		la $25,sf_df_sf
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		mfc1 $6,$f14
-; stel:		jr $25
-; stel:		__fn_local_sf_df_sf = sf_df_sf
-; stel:	.end	__fn_stub_sf_df_sf
+; stel: .section .mips16.fn.sf_df_sf,"ax",@progbits
+; stel: .ent __fn_stub_sf_df_sf
+; stel: la $25, sf_df_sf
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: jr $25
+; stel: __fn_local_sf_df_sf = sf_df_sf
+; stel: .end __fn_stub_sf_df_sf
 
 ; Function Attrs: nounwind
 define float @sf_df_df(double %p1, double %p2) #0 {
@@ -280,15 +280,15 @@ entry:
   ret float %2
 }
 
-; stel: .section	.mips16.fn.sf_df_df,"ax",@progbits
-; stel:	.ent	__fn_stub_sf_df_df
-; stel:		la $25,sf_df_df
-; stel:		mfc1 $4,$f12
-; stel:		mfc1 $5,$f13
-; stel:		mfc1 $6,$f14
-; stel:		mfc1 $7,$f15
-; stel:		jr $25
-; stel:		__fn_local_sf_df_df = sf_df_df
-; stel:	.end	__fn_stub_sf_df_df
+; stel: .section .mips16.fn.sf_df_df,"ax",@progbits
+; stel: .ent __fn_stub_sf_df_df
+; stel: la $25, sf_df_df
+; stel: mfc1 $4, $f12
+; stel: mfc1 $5, $f13
+; stel: mfc1 $6, $f14
+; stel: mfc1 $7, $f15
+; stel: jr $25
+; stel: __fn_local_sf_df_df = sf_df_df
+; stel: .end __fn_stub_sf_df_df
 
 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/Mips/hf1_body.ll b/test/CodeGen/Mips/hf1_body.ll
index 71a1b960c5b3..adf45109d69a 100644
--- a/test/CodeGen/Mips/hf1_body.ll
+++ b/test/CodeGen/Mips/hf1_body.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16  -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picfp16
 
 @x = external global float
 
@@ -11,11 +11,11 @@ entry:
   store float %0, float* @x, align 4
   ret void
 }
-; picfp16:	.ent	__fn_stub_v_sf
-; picfp16:	.cpload  $25
-; picfp16:	.set reorder
-; picfp16:	.reloc 0,R_MIPS_NONE,v_sf
-; picfp16: 	la $25,$__fn_local_v_sf
-; picfp16: 	mfc1 $4,$f12
-; picfp16: 	jr $25
-; picfp16: 	.end	__fn_stub_v_sf
+; picfp16: .ent __fn_stub_v_sf
+; picfp16: .cpload $25
+; picfp16: .set reorder
+; picfp16: .reloc 0, R_MIPS_NONE, v_sf
+; picfp16: la $25, $__fn_local_v_sf
+; picfp16: mfc1 $4, $f12
+; picfp16: jr $25
+; picfp16: .end __fn_stub_v_sf
diff --git a/test/CodeGen/Mips/hfptrcall.ll b/test/CodeGen/Mips/hfptrcall.ll
index c9f1fe973796..f6ce243c3047 100644
--- a/test/CodeGen/Mips/hfptrcall.ll
+++ b/test/CodeGen/Mips/hfptrcall.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=picel
 
 @ptrsv = global float ()* @sv, align 4
 @ptrdv = global double ()* @dv, align 4
diff --git a/test/CodeGen/Mips/i32k.ll b/test/CodeGen/Mips/i32k.ll
index ba9cf7342308..57a2e788a301 100644
--- a/test/CodeGen/Mips/i32k.ll
+++ b/test/CodeGen/Mips/i32k.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic -mips16-constant-islands=false -O3 < %s | FileCheck %s -check-prefix=16
 
 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
 
diff --git a/test/CodeGen/Mips/inlineasm-assembler-directives.ll b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
index 88ceed4114c2..9f6f1ebb2858 100644
--- a/test/CodeGen/Mips/inlineasm-assembler-directives.ll
+++ b/test/CodeGen/Mips/inlineasm-assembler-directives.ll
@@ -10,14 +10,14 @@ entry:
 ; CHECK-NEXT: .set  macro
 ; CHECK-NEXT: .set  reorder
 ; CHECK:      addi $9, ${{[2-9][0-9]?}}, 8
-; CHECK:      subi ${{[2-9][0-9]?}}, $9, 6
+; CHECK:      ori ${{[2-9][0-9]?}}, $9, 6
 ; CHECK:      .set  pop
 ; CHECK-NEXT: #NO_APP
   %a = alloca i32, align 4
   %b = alloca i32, align 4
   store i32 20, i32* %a, align 4
   %0 = load i32, i32* %a, align 4
-  %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09subi $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
+  %1 = call i32 asm sideeffect "addi $$9, $1, 8\0A\09ori $0, $$9, 6", "=r,r,~{$1}"(i32 %0)
   store i32 %1, i32* %b, align 4
   ret void
 }
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
index 41991d07a4fe..63ee42c0c7cd 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg.ll
@@ -7,38 +7,38 @@ define i32 @main() nounwind {
 entry:
 
 ; r with char
-;CHECK:	#APP
-;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},23
-;CHECK:	#NO_APP
-  tail call i8 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i8 27, i8 23) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 23
+;CHECK: #NO_APP
+  tail call i8 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i8 27, i8 23) nounwind
 
 ; r with short
-;CHECK:	#APP
-;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},13
-;CHECK:	#NO_APP
-  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i16 17, i16 13) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 13
+;CHECK: #NO_APP
+  tail call i16 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i16 17, i16 13) nounwind
 
 ; r with int
-;CHECK:	#APP
-;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
-;CHECK:	#NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,n"(i32 7, i32 3) nounwind
+;CHECK: #APP
+;CHECK: addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
+;CHECK: #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,n"(i32 7, i32 3) nounwind
 
 ; Now c with 1024: make sure register $25 is picked
 ; CHECK: #APP
-; CHECK: addiu $25,${{[0-9]+}},1024
-; CHECK: #NO_APP	
-   tail call i32 asm sideeffect "addiu $0,$1,$2", "=c,c,I"(i32 4194304, i32 1024) nounwind
+; CHECK: addiu $25, ${{[0-9]+}}, 1024
+; CHECK: #NO_APP
+   tail call i32 asm sideeffect "addiu $0, $1, $2", "=c,c,I"(i32 4194304, i32 1024) nounwind
 
 ; Now l with 1024: make sure register lo is picked. We do this by checking the instruction
 ; after the inline expression for a mflo to pull the value out of lo.
 ; CHECK:       #APP
 ; CHECK:       mtlo ${{[0-9]+}}
-; CHECK-NEXT:  madd ${{[0-9]+}},${{[0-9]+}}
+; CHECK-NEXT:  madd ${{[0-9]+}}, ${{[0-9]+}}
 ; CHECK:       #NO_APP
-; CHECK-NEXT:  mflo	${{[0-9]+}}
+; CHECK-NEXT:  mflo ${{[0-9]+}}
   %bosco = alloca i32, align 4
-  call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1,$2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
+  call i32 asm sideeffect "\09mtlo $3 \0A\09\09madd $1, $2 ", "=l,r,r,r"(i32 7, i32 6, i32 44) nounwind
   store volatile i32 %4, i32* %bosco, align 4
  
   ret i32 0
diff --git a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
index acce63203812..3b078d6f70db 100644
--- a/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
+++ b/test/CodeGen/Mips/inlineasm-cnstrnt-reg64.ll
@@ -12,9 +12,9 @@ entry:
 
 ; r with long long
 ;CHECK:	#APP
-;CHECK:	addiu ${{[0-9]+}},${{[0-9]+}},3
+;CHECK:	addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
 ;CHECK:	#NO_APP
-  tail call i64 asm sideeffect "addiu $0,$1,$2", "=r,r,i"(i64 7, i64 3) nounwind
+  tail call i64 asm sideeffect "addiu $0, $1, $2", "=r,r,i"(i64 7, i64 3) nounwind
   ret i32 0
 }
 
diff --git a/test/CodeGen/Mips/inlineasm-operand-code.ll b/test/CodeGen/Mips/inlineasm-operand-code.ll
index b9415ee90cdb..6d41385d18de 100644
--- a/test/CodeGen/Mips/inlineasm-operand-code.ll
+++ b/test/CodeGen/Mips/inlineasm-operand-code.ll
@@ -1,7 +1,9 @@
 ; Positive test for inline register constraints
 ;
-; RUN: llc -march=mipsel < %s  | FileCheck -check-prefix=CHECK_LITTLE_32 %s
-; RUN: llc -march=mips < %s  | FileCheck -check-prefix=CHECK_BIG_32 %s
+; RUN: llc -no-integrated-as -march=mipsel < %s | \
+; RUN:     FileCheck -check-prefix=ALL -check-prefix=LE32 -check-prefix=GAS %s
+; RUN: llc -no-integrated-as -march=mips < %s | \
+; RUN:     FileCheck -check-prefix=ALL -check-prefix=BE32 -check-prefix=GAS %s
 
 %union.u_tag = type { i64 }
 %struct.anon = type { i32, i32 }
@@ -10,171 +12,152 @@
 ; X with -3
 define i32 @constraint_X() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL:   constraint_X:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffffffffffffffd
-;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:X}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_X:
+; ALL:           #APP
+; GAS:           addiu ${{[0-9]+}}, ${{[0-9]+}}, 0xfffffffffffffffd
+; ALL:           #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:X}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
 ; x with -3
 define i32 @constraint_x() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL:   constraint_x:
-;CHECK_LITTLE_32: #APP
-;CHECK_LITTLE_32: addiu ${{[0-9]+}},${{[0-9]+}},0xfffd
-;CHECK_LITTLE_32: #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:x}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_x:
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, 0xfffd
+; ALL: #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:x}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
 ; d with -3
 define i32 @constraint_d() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL:   constraint_d:
-;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-3
-;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:d}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_d:
+; ALL:   #APP
+; ALL:   addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL:   #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:d}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
 ; m with -3
 define i32 @constraint_m() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL:   constraint_m:
-;CHECK_LITTLE_32:   #APP
-;CHECK_LITTLE_32:   addiu ${{[0-9]+}},${{[0-9]+}},-4
-;CHECK_LITTLE_32:   #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:m}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_m:
+; ALL:   #APP
+; ALL:   addiu ${{[0-9]+}}, ${{[0-9]+}}, -4
+; ALL:   #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:m}", "=r,r,I"(i32 7, i32 -3) ;
   ret i32 0
 }
 
 ; z with -3
 define i32 @constraint_z() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL: constraint_z:
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},-3
-;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 -3) ;
+; ALL-LABEL: constraint_z:
+; ALL:    #APP
+; ALL:    addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL:    #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:z}", "=r,r,I"(i32 7, i32 -3) ;
 
 ; z with 0
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},$0
-;CHECK_LITTLE_32:    #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
+; ALL:    #APP
+; GAS:    addiu ${{[0-9]+}}, ${{[0-9]+}}, $0
+; ALL:    #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, ${2:z}", "=r,r,I"(i32 7, i32 0) nounwind
 
 ; z with non-zero and the "r"(register) and "J"(integer zero) constraints
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
+; ALL:    #APP
+; ALL:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL:    #NO_APP
   call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 7) nounwind
 
 ; z with zero and the "r"(register) and "J"(integer zero) constraints
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    mtc0 $0, ${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
+; ALL:    #APP
+; ALL:    mtc0 $0, ${{[0-9]+}}
+; ALL:    #NO_APP
   call void asm sideeffect "mtc0 ${0:z}, $$12", "Jr"(i32 0) nounwind
 
 ; z with non-zero and just the "r"(register) constraint
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
+; ALL:    #APP
+; ALL:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL:    #NO_APP
   call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 7) nounwind
 
 ; z with zero and just the "r"(register) constraint
 ; FIXME: Check for $0, instead of other registers.
 ;        We should be using $0 directly in this case, not real registers.
 ;        When the materialization of 0 gets fixed, this test will fail.
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
+; ALL:    #APP
+; ALL:    mtc0 ${{[1-9][0-9]?}}, ${{[0-9]+}}
+; ALL:    #NO_APP
   call void asm sideeffect "mtc0 ${0:z}, $$12", "r"(i32 0) nounwind
   ret i32 0
 }
 
-; a long long in 32 bit mode (use to assert)
+; A long long in 32 bit mode (use to assert)
 define i32 @constraint_longlong() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL: constraint_longlong:
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    addiu ${{[0-9]+}},${{[0-9]+}},3
-;CHECK_LITTLE_32:    #NO_APP
-  tail call i64 asm sideeffect "addiu $0,$1,$2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
+; ALL-LABEL: constraint_longlong:
+; ALL:           #APP
+; ALL:           addiu ${{[0-9]+}}, ${{[0-9]+}}, 3
+; ALL:           #NO_APP
+  tail call i64 asm sideeffect "addiu $0, $1, $2 \0A\09", "=r,r,X"(i64 1229801703532086340, i64 3) nounwind
   ret i32 0
 }
 
-; D, in little endian the source reg will be 4 bytes into the long long
+; In little endian the source reg will be 4 bytes into the long long
+; In big endian the source reg will also be 4 bytes into the long long
 define i32 @constraint_D() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL: constraint_D:
-;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
-
-; D, in big endian the source reg will also be 4 bytes into the long long
-;CHECK_BIG_32-LABEL:    constraint_D:
-;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32:       #APP
-;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_BIG_32:       #NO_APP
+; ALL-LABEL: constraint_D:
+; ALL:           lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL:           lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL:           lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL:           #APP
+; LE32:          or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; BE32:          or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; ALL:           #NO_APP
   %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
-  tail call i32 asm sideeffect "or $0,${1:D},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  tail call i32 asm sideeffect "or $0, ${1:D}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
 }
 
-; L, in little endian the source reg will be 0 bytes into the long long
+; In little endian the source reg will be 0 bytes into the long long
+; In big endian the source reg will be 4 bytes into the long long
 define i32 @constraint_L() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL: constraint_L:
-;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
-; L, in big endian the source reg will be 4 bytes into the long long
-;CHECK_BIG_32-LABEL: constraint_L:
-;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32:       #APP
-;CHECK_BIG_32:       or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_BIG_32:       #NO_APP
+; ALL-LABEL: constraint_L:
+; ALL:           lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL:           lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL:           lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL:           #APP
+; LE32:          or ${{[0-9]+}}, $[[FIRST]], ${{[0-9]+}}
+; BE32:          or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; ALL:           #NO_APP
   %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
-  tail call i32 asm sideeffect "or $0,${1:L},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  tail call i32 asm sideeffect "or $0, ${1:L}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
 }
 
-; M, in little endian the source reg will be 4 bytes into the long long
+; In little endian the source reg will be 4 bytes into the long long
+; In big endian the source reg will be 0 bytes into the long long
 define i32 @constraint_M() nounwind {
 entry:
-;CHECK_LITTLE_32-LABEL: constraint_M:
-;CHECK_LITTLE_32:    lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_LITTLE_32:    lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_LITTLE_32:    lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_LITTLE_32:    #APP
-;CHECK_LITTLE_32:    or ${{[0-9]+}},$[[SECOND]],${{[0-9]+}}
-;CHECK_LITTLE_32:    #NO_APP
-; M, in big endian the source reg will be 0 bytes into the long long
-;CHECK_BIG_32-LABEL:    constraint_M:
-;CHECK_BIG_32:       lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
-;CHECK_BIG_32:       lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
-;CHECK_BIG_32:       lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
-;CHECK_BIG_32:       #APP
-;CHECK_BIG_32:       or ${{[0-9]+}},$[[FIRST]],${{[0-9]+}}
-;CHECK_BIG_32:       #NO_APP
+; ALL-LABEL: constraint_M:
+; ALL:           lw ${{[0-9]+}}, %got(uval)(${{[0-9,a-z]+}})
+; ALL:           lw $[[SECOND:[0-9]+]], 4(${{[0-9]+}})
+; ALL:           lw $[[FIRST:[0-9]+]], 0(${{[0-9]+}})
+; ALL:           #APP
+; LE32:          or ${{[0-9]+}}, $[[SECOND]], ${{[0-9]+}}
+; BE32:          or ${{[0-9]+}}, $[[FIRST]], ${{[0-9]+}}
+; ALL:           #NO_APP
   %bosco = load i64, i64* getelementptr inbounds (%union.u_tag, %union.u_tag* @uval, i32 0, i32 0), align 8
   %trunc1 = trunc i64 %bosco to i32
-  tail call i32 asm sideeffect "or $0,${1:M},$2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
+  tail call i32 asm sideeffect "or $0, ${1:M}, $2", "=r,r,r"(i64 %bosco, i32 %trunc1) nounwind
   ret i32 0
 }
diff --git a/test/CodeGen/Mips/inlineasm_constraint.ll b/test/CodeGen/Mips/inlineasm_constraint.ll
index 868433e0941f..a6ac07182ff5 100644
--- a/test/CodeGen/Mips/inlineasm_constraint.ll
+++ b/test/CodeGen/Mips/inlineasm_constraint.ll
@@ -1,55 +1,73 @@
-; RUN: llc -march=mipsel < %s | FileCheck %s
-
-define i32 @main() nounwind {
-entry:
+; RUN: llc -no-integrated-as -march=mipsel < %s | \
+; RUN:     FileCheck %s -check-prefix=ALL -check-prefix=GAS
 
+define void @constraint_I() nounwind {
 ; First I with short
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},4096
-; CHECK: #NO_APP
-  tail call i16 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i16 7, i16 4096) nounwind
+; ALL-LABEL: constraint_I:
+; ALL:           #APP
+; ALL:           addiu ${{[0-9]+}}, ${{[0-9]+}}, 4096
+; ALL:           #NO_APP
+  tail call i16 asm sideeffect "addiu $0, $1, $2", "=r,r,I"(i16 7, i16 4096) nounwind
 
 ; Then I with int
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP
-   tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,I"(i32 7, i32 -3) nounwind
-
-; Now J with 0
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},0
-; CHECK: #NO_APP
-  tail call i32 asm sideeffect "addiu $0,$1,$2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
-
-; Now K with 64
-; CHECK: #APP
-; CHECK: addu ${{[0-9]+}},${{[0-9]+}},64
-; CHECK: #NO_APP	
-  tail call i16 asm sideeffect "addu $0,$1,$2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
-
-; Now L with 0x00100000
-; CHECK: #APP
-; CHECK: add ${{[0-9]+}},${{[0-9]+}},${{[0-9]+}}
-; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "add $0,$1,$3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
-
-; Now N with -3
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,N"(i32 7, i32 -3) nounwind
-
-; Now O with -3
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},-3
-; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,O"(i32 7, i16 -3) nounwind
-
-; Now P with 65535
-; CHECK: #APP
-; CHECK: addiu ${{[0-9]+}},${{[0-9]+}},65535
-; CHECK: #NO_APP	
-  tail call i32 asm sideeffect "addiu $0,$1,$2", "=r,r,P"(i32 7, i32 65535) nounwind
-
-  ret i32 0
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,I"(i32 7, i32 -3) nounwind
+  ret void
+}
+
+define void @constraint_J() nounwind {
+; Now J with 0
+; ALL-LABEL: constraint_J:
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, 0
+; ALL: #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, $2\0A\09 ", "=r,r,J"(i32 7, i16 0) nounwind
+  ret void
+}
+
+define void @constraint_K() nounwind {
+; Now K with 64
+; ALL: #APP
+; GAS: addu ${{[0-9]+}}, ${{[0-9]+}}, 64
+; ALL: #NO_APP	
+  tail call i16 asm sideeffect "addu $0, $1, $2\0A\09 ", "=r,r,K"(i16 7, i16 64) nounwind
+  ret void
+}
+
+define void @constraint_L() nounwind {
+; Now L with 0x00100000
+; ALL: #APP
+; ALL: add ${{[0-9]+}}, ${{[0-9]+}}, ${{[0-9]+}}
+; ALL: #NO_APP	
+  tail call i32 asm sideeffect "add $0, $1, $3\0A\09", "=r,r,L,r"(i32 7, i32 1048576, i32 0) nounwind
+  ret void
+}
+
+define void @constraint_N() nounwind {
+; Now N with -3
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP	
+  tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,N"(i32 7, i32 -3) nounwind
+  ret void
+}
+
+define void @constraint_O() nounwind {
+; Now O with -3
+; ALL: #APP
+; ALL: addiu ${{[0-9]+}}, ${{[0-9]+}}, -3
+; ALL: #NO_APP	
+  tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,O"(i32 7, i16 -3) nounwind
+  ret void
+}
+
+define void @constraint_P() nounwind {
+; Now P with 65535
+; ALL: #APP
+; GAS: addiu ${{[0-9]+}}, ${{[0-9]+}}, 65535
+; ALL: #NO_APP
+  tail call i32 asm sideeffect "addiu $0, $1, $2", "=r,r,P"(i32 7, i32 65535) nounwind
+  ret void
 }
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
index 9e9b6cd089ea..bdf3ae55b802 100644
--- a/test/CodeGen/Mips/inlineasmmemop.ll
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -26,13 +26,13 @@ entry:
 ; "D": Second word of a double word. This works for any memory element
 ; double or single.
 ; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 16(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 16(${{[0-9]+}})
 ; CHECK: #NO_APP
 
 ; No "D": First word of a double word. This works for any memory element
 ; double or single.
 ; CHECK: #APP
-; CHECK: lw ${{[0-9]+}}, 12(${{[0-9]+}});
+; CHECK: lw ${{[0-9]+}}, 12(${{[0-9]+}})
 ; CHECK: #NO_APP
 
 @b = common global [20 x i32] zeroinitializer, align 4
@@ -40,8 +40,8 @@ entry:
 define void @main() {
 entry:
 ; Second word:
-  tail call void asm sideeffect "    lw    $0, ${1:D};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
+  tail call void asm sideeffect "    lw    $0, ${1:D}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
 ; First word. Notice, no 'D':
-  tail call void asm sideeffect "    lw    $0, ${1};", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
+  tail call void asm sideeffect "    lw    $0, ${1}", "r,*m,~{$11}"(i32 undef, i32* getelementptr inbounds ([20 x i32], [20 x i32]* @b, i32 0, i32 3))
   ret void
 }
diff --git a/test/CodeGen/Mips/insn-zero-size-bb.ll b/test/CodeGen/Mips/insn-zero-size-bb.ll
index ea61c994ae1d..d2124c407a0d 100644
--- a/test/CodeGen/Mips/insn-zero-size-bb.ll
+++ b/test/CodeGen/Mips/insn-zero-size-bb.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=mips -mcpu=mips32 | FileCheck %s
 ; RUN: llc < %s -march=mips -mcpu=mips32r3 -mattr=+micromips | FileCheck %s
-; RUN: llc < %s -march=mips -mcpu=mips16 | FileCheck %s
+; RUN: llc < %s -march=mips -mattr=mips16 | FileCheck %s
 
 ; Verify that we emit the .insn directive for zero-sized (empty) basic blocks.
 ; This only really matters for microMIPS and MIPS16.
@@ -14,7 +14,7 @@ entry:
           to label %unreachable unwind label %return
 
 unreachable:
-; CHECK:          ${{.*}}: # %unreachable
+; CHECK:          {{.*}}: # %unreachable
 ; CHECK-NEXT:         .insn
   unreachable
 
diff --git a/test/CodeGen/Mips/interrupt-attr-64-error.ll b/test/CodeGen/Mips/interrupt-attr-64-error.ll
new file mode 100644
index 000000000000..830c199d91d9
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-64-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips64r6 -march=mipsel -relocation-model=static < %s 2>%t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: "interrupt" attribute is only supported for the O32 ABI on MIPS32R2+ at the present time.
+define i32 @isr_sw0() #0 {
+  ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr-args-error.ll b/test/CodeGen/Mips/interrupt-attr-args-error.ll
new file mode 100644
index 000000000000..993629bdbcd6
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-args-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips32r2 -march=mipsel -relocation-model=static < %s 2> %t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: Functions with the interrupt attribute cannot have arguments!
+define i32 @isr_sw0(i8 signext %n) #0 {
+  ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr-error.ll b/test/CodeGen/Mips/interrupt-attr-error.ll
new file mode 100644
index 000000000000..f35e98ea14bf
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr-error.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mcpu=mips32 -march=mipsel -relocation-model=static < %s 2>%t
+; RUN: FileCheck %s < %t
+
+; CHECK: LLVM ERROR: "interrupt" attribute is not supported on pre-MIPS32R2 or MIPS16 targets.
+define i32 @isr_sw0() #0 {
+  ret i32 0
+}
+
+attributes #0 = { "interrupt"="sw0" }
diff --git a/test/CodeGen/Mips/interrupt-attr.ll b/test/CodeGen/Mips/interrupt-attr.ll
new file mode 100644
index 000000000000..6cfb0c3ebd54
--- /dev/null
+++ b/test/CodeGen/Mips/interrupt-attr.ll
@@ -0,0 +1,244 @@
+; RUN: llc -mcpu=mips32r2 -march=mipsel -relocation-model=static -o - %s | FileCheck %s
+
+define void @isr_sw0() #0 {
+; CHECK-LABEL: isr_sw0:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, [[R1:[0-9]+]]($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, [[R2:[0-9]+]]($sp)
+; CHECK: ins    $27, $zero, 8, 1
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ; Must save all registers
+; CHECK: sw      $7, {{[0-9]+}}($sp)
+; CHECK: sw      $6, {{[0-9]+}}($sp)
+; CHECK: sw      $5, {{[0-9]+}}($sp)
+; CHECK: sw      $4, {{[0-9]+}}($sp)
+; CHECK: sw      $3, {{[0-9]+}}($sp)
+; CHECK: sw      $2, {{[0-9]+}}($sp)
+; CHECK: sw      $25, {{[0-9]+}}($sp)
+; CHECK: sw      $24, {{[0-9]+}}($sp)
+; CHECK: sw      $15, {{[0-9]+}}($sp)
+; CHECK: sw      $14, {{[0-9]+}}($sp)
+; CHECK: sw      $13, {{[0-9]+}}($sp)
+; CHECK: sw      $12, {{[0-9]+}}($sp)
+; CHECK: sw      $11, {{[0-9]+}}($sp)
+; CHECK: sw      $10, {{[0-9]+}}($sp)
+; CHECK: sw      $9, {{[0-9]+}}($sp)
+; CHECK: sw      $8, {{[0-9]+}}($sp)
+; CHECK: sw      $ra, [[R5:[0-9]+]]($sp)
+; CHECK: sw      $gp, {{[0-9]+}}($sp)
+; CHECK: sw      $1, {{[0-9]+}}($sp)
+; CHECK: mflo    $26
+; CHECK: sw      $26, [[R3:[0-9]+]]($sp)
+; CHECK: mfhi    $26
+; CHECK: sw      $26, [[R4:[0-9]+]]($sp)
+  call void bitcast (void (...)* @write to void ()*)()
+; CHECK: lw      $26, [[R4:[0-9]+]]($sp)
+; CHECK: mthi    $26
+; CHECK: lw      $26, [[R3:[0-9]+]]($sp)
+; CHECK: mtlo    $26
+; CHECK: lw      $1, {{[0-9]+}}($sp)
+; CHECK: lw      $gp, {{[0-9]+}}($sp)
+; CHECK: lw      $ra, [[R5:[0-9]+]]($sp)
+; CHECK: lw      $8, {{[0-9]+}}($sp)
+; CHECK: lw      $9, {{[0-9]+}}($sp)
+; CHECK: lw      $10, {{[0-9]+}}($sp)
+; CHECK: lw      $11, {{[0-9]+}}($sp)
+; CHECK: lw      $12, {{[0-9]+}}($sp)
+; CHECK: lw      $13, {{[0-9]+}}($sp)
+; CHECK: lw      $14, {{[0-9]+}}($sp)
+; CHECK: lw      $15, {{[0-9]+}}($sp)
+; CHECK: lw      $24, {{[0-9]+}}($sp)
+; CHECK: lw      $25, {{[0-9]+}}($sp)
+; CHECK: lw      $2, {{[0-9]+}}($sp)
+; CHECK: lw      $3, {{[0-9]+}}($sp)
+; CHECK: lw      $4, {{[0-9]+}}($sp)
+; CHECK: lw      $5, {{[0-9]+}}($sp)
+; CHECK: lw      $6, {{[0-9]+}}($sp)
+; CHECK: lw      $7, {{[0-9]+}}($sp)
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, [[R2:[0-9]+]]($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, [[R1:[0-9]+]]($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+  ret void
+}
+
+declare void @write(...)
+
+define void @isr_sw1() #2 {
+; CHECK-LABEL: isr_sw1:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 2
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw0() #3 {
+; CHECK-LABEL: isr_hw0:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 3
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw1() #4 {
+; CHECK-LABEL: isr_hw1:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 4
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+
+define void @isr_hw2() #5 {
+; CHECK-LABEL: isr_hw2:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 5
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw3() #6 {
+; CHECK-LABEL: isr_hw3:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 6
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw4() #7 {
+; CHECK-LABEL: isr_hw4:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 7
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_hw5() #8 {
+; CHECK-LABEL: isr_hw5:
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $zero, 8, 8
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+define void @isr_eic() #9 {
+; CHECK-LABEL: isr_eic:
+; CHECK: mfc0   $26, $13, 0
+; CHECK: ext    $26, $26, 10, 6
+; CHECK: mfc0   $27, $14, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: mfc0   $27, $12, 0
+; CHECK: sw     $27, {{[0-9]+}}($sp)
+; CHECK: ins    $27, $26, 10, 6
+; CHECK: ins    $27, $zero, 1, 4
+; CHECK: ins    $27, $zero, 29, 1
+; CHECK: mtc0   $27, $12, 0
+  ret void
+; CHECK: di
+; CHECK: ehb
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $14, 0
+; CHECK: lw      $27, {{[0-9]+}}($sp)
+; CHECK: mtc0    $27, $12, 0
+; CHECK: eret
+ }
+
+attributes #0 = { "interrupt"="sw0" }
+attributes #2 = { "interrupt"="sw1" }
+attributes #3 = { "interrupt"="hw0" }
+attributes #4 = { "interrupt"="hw1" }
+attributes #5 = { "interrupt"="hw2" }
+attributes #6 = { "interrupt"="hw3" }
+attributes #7 = { "interrupt"="hw4" }
+attributes #8 = { "interrupt"="hw5" }
+attributes #9 = { "interrupt"="eic" }
diff --git a/test/CodeGen/Mips/jtstat.ll b/test/CodeGen/Mips/jtstat.ll
index 35f71cf2dc85..5b578d4cffe4 100644
--- a/test/CodeGen/Mips/jtstat.ll
+++ b/test/CodeGen/Mips/jtstat.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 @s = global i8 115, align 1
 @c = common global i8 0, align 1
diff --git a/test/CodeGen/Mips/l3mc.ll b/test/CodeGen/Mips/l3mc.ll
index c1bff11595c9..c6855fc03fec 100644
--- a/test/CodeGen/Mips/l3mc.ll
+++ b/test/CodeGen/Mips/l3mc.ll
@@ -1,22 +1,22 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck  %s -check-prefix=__call_stub_fp___fixunsdfsi 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck  %s -check-prefix=__call_stub_fp___fixunsdfsi 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdidf 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatdisf 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundidf
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixsfdi 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunsdfdi 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixdfdi
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfsi 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___fixunssfdi 
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static  < %s | FileCheck %s -check-prefix=__call_stub_fp___floatundisf 
 
 @ll1 = global i64 0, align 8
 @ll2 = global i64 0, align 8
diff --git a/test/CodeGen/Mips/lb1.ll b/test/CodeGen/Mips/lb1.ll
index 21648d7572a5..1e908b81a878 100644
--- a/test/CodeGen/Mips/lb1.ll
+++ b/test/CodeGen/Mips/lb1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @c = global i8 -1, align 1
 @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/lbu1.ll b/test/CodeGen/Mips/lbu1.ll
index 28ca27132467..32515411b7d4 100644
--- a/test/CodeGen/Mips/lbu1.ll
+++ b/test/CodeGen/Mips/lbu1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @c = global i8 97, align 1
 @.str = private unnamed_addr constant [5 x i8] c"%c \0A\00", align 1
diff --git a/test/CodeGen/Mips/lcb2.ll b/test/CodeGen/Mips/lcb2.ll
index a6f4968e6d23..b15a5085f5a2 100644
--- a/test/CodeGen/Mips/lcb2.ll
+++ b/test/CodeGen/Mips/lcb2.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcb
 
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcbn
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands=true   < %s | FileCheck %s -check-prefix=lcbn
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb3c.ll b/test/CodeGen/Mips/lcb3c.ll
index 4c6f2c036a0b..b953229a15c9 100644
--- a/test/CodeGen/Mips/lcb3c.ll
+++ b/test/CodeGen/Mips/lcb3c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -O0    < %s | FileCheck %s -check-prefix=lcb
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -O0    < %s | FileCheck %s -check-prefix=lcb
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb4a.ll b/test/CodeGen/Mips/lcb4a.ll
index 9e97b5bf1433..4a99ef26efca 100644
--- a/test/CodeGen/Mips/lcb4a.ll
+++ b/test/CodeGen/Mips/lcb4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lcb5.ll b/test/CodeGen/Mips/lcb5.ll
index 41878d5f8817..ec4c3da6515c 100644
--- a/test/CodeGen/Mips/lcb5.ll
+++ b/test/CodeGen/Mips/lcb5.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static     < %s | FileCheck %s -check-prefix=ci
 
 @i = global i32 0, align 4
 @j = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/lh1.ll b/test/CodeGen/Mips/lh1.ll
index 31967e5a5379..dcab12a38e17 100644
--- a/test/CodeGen/Mips/lh1.ll
+++ b/test/CodeGen/Mips/lh1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @s = global i16 -1, align 2
 @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/lhu1.ll b/test/CodeGen/Mips/lhu1.ll
index 413da46d4a31..9a52d6fb269f 100644
--- a/test/CodeGen/Mips/lhu1.ll
+++ b/test/CodeGen/Mips/lhu1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 
 @s = global i16 255, align 2
diff --git a/test/CodeGen/Mips/llcarry.ll b/test/CodeGen/Mips/llcarry.ll
index f4120ecec175..fcf129420234 100644
--- a/test/CodeGen/Mips/llcarry.ll
+++ b/test/CodeGen/Mips/llcarry.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i64 4294967295, align 8
 @j = global i64 15, align 8
diff --git a/test/CodeGen/Mips/llvm-ir/atomicrmx.ll b/test/CodeGen/Mips/llvm-ir/atomicrmx.ll
new file mode 100644
index 000000000000..9069a6f2d13f
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/atomicrmx.ll
@@ -0,0 +1,26 @@
+; RUN: llc -asm-show-inst  -march=mipsel -mcpu=mips32r6 < %s | \
+; RUN:    FileCheck %s -check-prefix=CHK32
+; RUN: llc -asm-show-inst  -march=mips64el -mcpu=mips64r6 < %s | \
+; RUN:    FileCheck %s -check-prefix=CHK64
+
+@a = common global i32 0, align 4
+@b = common global i64 0, align 8
+
+
+define i32 @ll_sc(i32 signext %x) {
+; CHK32-LABEL: ll_sc
+
+;CHK32:  LL_R6
+;CHK32:  SC_R6
+  %1 = atomicrmw add i32* @a, i32 %x monotonic
+  ret i32 %1
+}
+
+define i64 @lld_scd(i64 signext %x) {
+; CHK64-LABEL: lld_scd
+
+;CHK64:  LLD_R6
+;CHK64:  SCD_R6
+  %1 = atomicrmw add i64* @b, i64 %x monotonic
+  ret i64 %1
+}
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
index 112ab8ee8c7f..22a44da0b069 100644
--- a/test/CodeGen/Mips/llvm-ir/call.ll
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -6,6 +6,7 @@
 ; RUN: llc -march=mips   -mcpu=mips32r3 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
 ; RUN: llc -march=mips   -mcpu=mips32r5 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
 ; RUN: llc -march=mips   -mcpu=mips32r6 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+; RUN: llc -march=mips   -mcpu=mips32r6 -mattr=+fp64,+nooddspreg -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
 ; RUN: llc -march=mips64 -mcpu=mips4    -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
 ; RUN: llc -march=mips64 -mcpu=mips64   -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
 ; RUN: llc -march=mips64 -mcpu=mips64r2 -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=ALL -check-prefix=N64
@@ -168,3 +169,16 @@ define float @tail_indirect_call_float_void(float ()* %addr) {
   %1 = tail call float %addr()
   ret float %1
 }
+
+; Check that passing undef as a double value doesn't cause machine code errors
+; for FP64.
+declare hidden void @undef_double(i32 %this, double %volume) unnamed_addr align 2
+
+define hidden void @thunk_undef_double(i32 %this, double %volume) unnamed_addr align 2 {
+; ALL-LABEL: thunk_undef_double:
+; O32: # implicit-def: %A2
+; O32: # implicit-def: %A3
+; ALL: jr $25
+  tail call void @undef_double(i32 undef, double undef) #8
+  ret void
+}
diff --git a/test/CodeGen/Mips/llvm-ir/load-atomic.ll b/test/CodeGen/Mips/llvm-ir/load-atomic.ll
new file mode 100644
index 000000000000..a44b00bff586
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/load-atomic.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | \
+; RUN:    FileCheck %s -check-prefix=ALL -check-prefix=M64
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | \
+; RUN:    FileCheck %s -check-prefix=ALL -check-prefix=M64
+
+define i8 @load_i8(i8* %ptr) {
+; ALL-LABEL: load_i8
+
+; ALL: lb $2, 0($4)
+; ALL: sync
+  %val = load atomic i8, i8* %ptr acquire, align 1
+  ret i8 %val
+}
+
+define i16 @load_i16(i16* %ptr) {
+; ALL-LABEL: load_i16
+
+; ALL: lh $2, 0($4)
+; ALL: sync
+  %val = load atomic i16, i16* %ptr acquire, align 2
+  ret i16 %val
+}
+
+define i32 @load_i32(i32* %ptr) {
+; ALL-LABEL: load_i32
+
+; ALL: lw $2, 0($4)
+; ALL: sync
+  %val = load atomic i32, i32* %ptr acquire, align 4
+  ret i32 %val
+}
+
+define i64 @load_i64(i64* %ptr) {
+; M64-LABEL: load_i64
+
+; M64: ld $2, 0($4)
+; M64: sync
+  %val = load atomic i64, i64* %ptr acquire, align 8
+  ret i64 %val
+}
diff --git a/test/CodeGen/Mips/llvm-ir/sqrt.ll b/test/CodeGen/Mips/llvm-ir/sqrt.ll
new file mode 100644
index 000000000000..1a8892de0ee2
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/sqrt.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -mattr=+micromips | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips32r2 -mattr=+micromips | FileCheck %s
+; RUN: llc < %s -march=mips -mcpu=mips32r6 -mattr=+micromips | FileCheck %s
+
+define float @sqrt_fn(float %value) #0 {
+entry:
+  %sqrtf = tail call float @sqrtf(float %value) #0
+  ret float %sqrtf
+}
+
+declare float @sqrtf(float)
+
+; CHECK: sqrt.s $f0, $f12
diff --git a/test/CodeGen/Mips/llvm-ir/store-atomic.ll b/test/CodeGen/Mips/llvm-ir/store-atomic.ll
new file mode 100644
index 000000000000..6b33f2685d17
--- /dev/null
+++ b/test/CodeGen/Mips/llvm-ir/store-atomic.ll
@@ -0,0 +1,42 @@
+; RUN: llc -march=mips -mcpu=mips32r2 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips -mcpu=mips32r6 < %s | FileCheck %s -check-prefix=ALL
+; RUN: llc -march=mips64 -mcpu=mips64r2 < %s | \
+; RUN:    FileCheck %s -check-prefix=ALL -check-prefix=M64
+; RUN: llc -march=mips64 -mcpu=mips64r6 < %s | \
+; RUN:    FileCheck %s -check-prefix=ALL -check-prefix=M64
+
+define void @store_i8(i8* %ptr, i8 signext %v) {
+; ALL-LABEL: store_i8
+
+; ALL: sync
+; ALL: sb $5, 0($4)
+  store atomic i8 %v, i8* %ptr release, align 1
+  ret void
+}
+
+define void @store_i16(i16* %ptr, i16 signext %v) {
+; ALL-LABEL: store_i16
+
+; ALL: sync
+; ALL: sh $5, 0($4)
+  store atomic i16 %v, i16* %ptr release, align 2
+  ret void
+}
+
+define void @store_i32(i32* %ptr, i32 signext %v) {
+; ALL-LABEL: store_i32
+
+; ALL: sync
+; ALL: sw $5, 0($4)
+  store atomic i32 %v, i32* %ptr release, align 4
+  ret void
+}
+
+define void @store_i64(i64* %ptr, i64 %v) {
+; M64-LABEL: store_i64
+
+; M64: sync
+; M64: sd $5, 0($4)
+  store atomic i64 %v, i64* %ptr release, align 8
+  ret void
+}
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index b0c3ff6ff9b5..b84d94d31494 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -march=mips -mcpu=mips64r6 < %s | FileCheck %s -check-prefix=ALL -check-prefix=64R6
 
 ; FIXME: The MIPS16 test should check its output
-; RUN: llc -march=mips -mcpu=mips16 < %s
+; RUN: llc -march=mips -mattr=mips16 < %s
 
 ; ALL-LABEL: madd1:
 
diff --git a/test/CodeGen/Mips/mbrsize4a.ll b/test/CodeGen/Mips/mbrsize4a.ll
index 264d2284afc9..9562e55dcc2a 100644
--- a/test/CodeGen/Mips/mbrsize4a.ll
+++ b/test/CodeGen/Mips/mbrsize4a.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=jal16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=static -mips16-constant-islands   < %s | FileCheck %s -check-prefix=jal16
 
 @j = global i32 10, align 4
 @.str = private unnamed_addr constant [11 x i8] c"at bottom\0A\00", align 1
diff --git a/test/CodeGen/Mips/mips16-hf-attr-2.ll b/test/CodeGen/Mips/mips16-hf-attr-2.ll
index 60c6eaad8f76..82c0989c4254 100644
--- a/test/CodeGen/Mips/mips16-hf-attr-2.ll
+++ b/test/CodeGen/Mips/mips16-hf-attr-2.ll
@@ -1,7 +1,7 @@
 ; Check that stubs generation for mips16 hard-float mode does not depend
 ; on the function 'use-soft-float' attribute's value.
 ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
-; RUN:     -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+; RUN:     -mattr=mips16 -relocation-model=pic < %s | FileCheck %s
 
 define void @bar_sf() #1 {
 ; CHECK: bar_sf:
diff --git a/test/CodeGen/Mips/mips16-hf-attr.ll b/test/CodeGen/Mips/mips16-hf-attr.ll
index c6ad442fdea2..bcae1e92f7cc 100644
--- a/test/CodeGen/Mips/mips16-hf-attr.ll
+++ b/test/CodeGen/Mips/mips16-hf-attr.ll
@@ -1,7 +1,7 @@
 ; Check that stubs generation for mips16 hard-float mode does not depend
 ; on the function 'use-soft-float' attribute's value.
 ; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel \
-; RUN:     -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s
+; RUN:     -mattr=mips16 -relocation-model=pic < %s | FileCheck %s
 
 define void @bar_hf() #0 {
 ; CHECK: bar_hf:
diff --git a/test/CodeGen/Mips/mips16_32_1.ll b/test/CodeGen/Mips/mips16_32_1.ll
index f6096b402f2d..211aa2a0f4b0 100644
--- a/test/CodeGen/Mips/mips16_32_1.ll
+++ b/test/CodeGen/Mips/mips16_32_1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=pic -O3 < %s -mips-mixed-16-32  | FileCheck %s 
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_10.ll b/test/CodeGen/Mips/mips16_32_10.ll
index ff9831ed7622..b256912d7151 100644
--- a/test/CodeGen/Mips/mips16_32_10.ll
+++ b/test/CodeGen/Mips/mips16_32_10.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 
 define void @foo() #0 {
 entry:
diff --git a/test/CodeGen/Mips/mips16_32_3.ll b/test/CodeGen/Mips/mips16_32_3.ll
index c5a29a0b8fdb..8891eba8c954 100644
--- a/test/CodeGen/Mips/mips16_32_3.ll
+++ b/test/CodeGen/Mips/mips16_32_3.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_4.ll b/test/CodeGen/Mips/mips16_32_4.ll
index 1238363d907e..2d50881c61b4 100644
--- a/test/CodeGen/Mips/mips16_32_4.ll
+++ b/test/CodeGen/Mips/mips16_32_4.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_5.ll b/test/CodeGen/Mips/mips16_32_5.ll
index 5d4c8a1af563..644ba4c98e34 100644
--- a/test/CodeGen/Mips/mips16_32_5.ll
+++ b/test/CodeGen/Mips/mips16_32_5.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_6.ll b/test/CodeGen/Mips/mips16_32_6.ll
index 63323b608bc5..1afce84b0335 100644
--- a/test/CodeGen/Mips/mips16_32_6.ll
+++ b/test/CodeGen/Mips/mips16_32_6.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_32_7.ll b/test/CodeGen/Mips/mips16_32_7.ll
index 480a23c8b25e..cc247c054543 100644
--- a/test/CodeGen/Mips/mips16_32_7.ll
+++ b/test/CodeGen/Mips/mips16_32_7.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=16
 ; RUN: llc  -march=mipsel -mcpu=mips32 -relocation-model=static -O3 < %s -mips-mixed-16-32  | FileCheck %s -check-prefix=32
 
 define void @foo() #0 {
diff --git a/test/CodeGen/Mips/mips16_fpret.ll b/test/CodeGen/Mips/mips16_fpret.ll
index 0f09c4105a17..651feba19808 100644
--- a/test/CodeGen/Mips/mips16_fpret.ll
+++ b/test/CodeGen/Mips/mips16_fpret.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
-; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=1
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=2
+; RUN: llc -mtriple=mipsel-linux-gnu  -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=3
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s -check-prefix=4
 
 
 @x = global float 0x41F487E980000000, align 4
diff --git a/test/CodeGen/Mips/mips16ex.ll b/test/CodeGen/Mips/mips16ex.ll
index c3a02261119e..b2521ae872a8 100644
--- a/test/CodeGen/Mips/mips16ex.ll
+++ b/test/CodeGen/Mips/mips16ex.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 ;16: main:
 ;16-NEXT: [[TMP:.*]]:
diff --git a/test/CodeGen/Mips/mips16fpe.ll b/test/CodeGen/Mips/mips16fpe.ll
index f8b916da3a49..16695e45265a 100644
--- a/test/CodeGen/Mips/mips16fpe.ll
+++ b/test/CodeGen/Mips/mips16fpe.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16hf
 
 @x = global float 5.000000e+00, align 4
 @y = global float 1.500000e+01, align 4
@@ -297,7 +297,7 @@ entry:
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
   store i32 %and, i32* @ltsf2_result, align 4
-;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unordsf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltsf2)(${{[0-9]+}})
   ret void
 }
@@ -313,7 +313,7 @@ entry:
   %and2 = and i1 %lnot, %cmp1
   %and = zext i1 %and2 to i32
   store i32 %and, i32* @ltdf2_result, align 4
-;16hf:  lw	${{[0-9]+}}, %call16(__mips16_unorddf2)(${{[0-9]+}})
+;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
 ;16hf:  lw	${{[0-9]+}}, %call16(__mips16_ltdf2)(${{[0-9]+}})
   ret void
 }
diff --git a/test/CodeGen/Mips/misha.ll b/test/CodeGen/Mips/misha.ll
index 23ad7f6057af..bedea9de5f92 100644
--- a/test/CodeGen/Mips/misha.ll
+++ b/test/CodeGen/Mips/misha.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 define i32 @sumc(i8* nocapture %to, i8* nocapture %from, i32) nounwind {
 entry:
diff --git a/test/CodeGen/Mips/msa/elm_copy.ll b/test/CodeGen/Mips/msa/elm_copy.ll
index 2a0d74f44524..251b535fd76c 100644
--- a/test/CodeGen/Mips/msa/elm_copy.ll
+++ b/test/CodeGen/Mips/msa/elm_copy.ll
@@ -170,7 +170,8 @@ declare i32 @llvm.mips.copy.u.w(<4 x i32>, i32) nounwind
 ; MIPS32-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_copy_u_w_ARG1)
 ; MIPS64-DAG: ld [[R1:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_ARG1)
 ; MIPS-ANY-DAG: ld.w [[WS:\$w[0-9]+]], 0([[R1]])
-; MIPS-ANY-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS32-DAG: copy_s.w [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS64-DAG: copy_u.w [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_w_RES)
 ; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_w_RES)
 ; MIPS-ANY-DAG: sw [[RD]], 0([[RES]])
@@ -196,7 +197,7 @@ declare i64 @llvm.mips.copy.u.d(<2 x i64>, i32) nounwind
 ; MIPS64-DAG: ld.d [[WS:\$w[0-9]+]], 0([[R1]])
 ; MIPS32-DAG: copy_s.w [[RD1:\$[0-9]+]], [[WS]][2]
 ; MIPS32-DAG: copy_s.w [[RD2:\$[0-9]+]], [[WS]][3]
-; MIPS64-DAG: copy_u.d [[RD:\$[0-9]+]], [[WS]][1]
+; MIPS64-DAG: copy_s.d [[RD:\$[0-9]+]], [[WS]][1]
 ; MIPS32-DAG: lw [[RES:\$[0-9]+]], %got(llvm_mips_copy_u_d_RES)
 ; MIPS64-DAG: ld [[RES:\$[0-9]+]], %got_disp(llvm_mips_copy_u_d_RES)
 ; MIPS32-DAG: sw [[RD1]], 0([[RES]])
diff --git a/test/CodeGen/Mips/mul.ll b/test/CodeGen/Mips/mul.ll
index 3231f9cac38e..9e053fc2e7d6 100644
--- a/test/CodeGen/Mips/mul.ll
+++ b/test/CodeGen/Mips/mul.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 5, align 4
 @jjjj = global i32 -6, align 4
diff --git a/test/CodeGen/Mips/mulll.ll b/test/CodeGen/Mips/mulll.ll
index 6e5ba647b8bf..9a2acd417381 100644
--- a/test/CodeGen/Mips/mulll.ll
+++ b/test/CodeGen/Mips/mulll.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i64 5, align 8
 @jjjj = global i64 -6, align 8
diff --git a/test/CodeGen/Mips/mulull.ll b/test/CodeGen/Mips/mulull.ll
index c1334484fb66..fdcb68d036f8 100644
--- a/test/CodeGen/Mips/mulull.ll
+++ b/test/CodeGen/Mips/mulull.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i64 5, align 8
 @jjjj = global i64 6, align 8
diff --git a/test/CodeGen/Mips/nacl-align.ll b/test/CodeGen/Mips/nacl-align.ll
index ec8f3f06afdf..8191c7dec6f2 100644
--- a/test/CodeGen/Mips/nacl-align.ll
+++ b/test/CodeGen/Mips/nacl-align.ll
@@ -44,6 +44,9 @@ default:
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
 ; CHECK-NEXT:        addiu   $2, $zero, 111
+; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
+; CHECK-NEXT:        jr      $ra
+; CHECK-NEXT:        addiu   $2, $zero, 555
 ; CHECK-NEXT:        .align  4
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
@@ -52,10 +55,6 @@ default:
 ; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
 ; CHECK-NEXT:        jr      $ra
 ; CHECK-NEXT:        addiu   $2, $zero, 333
-; CHECK-NEXT:        .align  4
-; CHECK-NEXT:    ${{BB[0-9]+_[0-9]+}}:
-; CHECK-NEXT:        jr      $ra
-; CHECK-NEXT:        addiu   $2, $zero, 444
 
 }
 
diff --git a/test/CodeGen/Mips/neg1.ll b/test/CodeGen/Mips/neg1.ll
index 36275a2991f6..dd5d7a09eb28 100644
--- a/test/CodeGen/Mips/neg1.ll
+++ b/test/CodeGen/Mips/neg1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10, align 4
 @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/no-odd-spreg-msa.ll b/test/CodeGen/Mips/no-odd-spreg-msa.ll
index cf79557cc97f..7213044a2300 100644
--- a/test/CodeGen/Mips/no-odd-spreg-msa.ll
+++ b/test/CodeGen/Mips/no-odd-spreg-msa.ll
@@ -1,5 +1,9 @@
-; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=ODDSPREG
-; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg < %s | FileCheck %s -check-prefix=ALL -check-prefix=NOODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,-nooddspreg \
+; RUN:     -no-integrated-as < %s | FileCheck %s -check-prefix=ALL \
+; RUN:     -check-prefix=ODDSPREG
+; RUN: llc -march=mipsel -mcpu=mips32 -mattr=+fp64,+msa,+nooddspreg \
+; RUN:     -no-integrated-as < %s | FileCheck %s -check-prefix=ALL \
+; RUN:     -check-prefix=NOODDSPREG
 
 @v4f32 = global <4 x float> zeroinitializer
 
@@ -19,7 +23,7 @@ entry:
   ; On the other hand, if odd single precision registers are not permitted, it
   ; must copy $f13 to an even-numbered register before inserting into the
   ; vector.
-  call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
   %1 = insertelement <4 x float> %0, float %b, i32 0
   store <4 x float> %1, <4 x float>* @v4f32
   ret void
@@ -32,7 +36,7 @@ entry:
 ; NOODDSPREG:     mov.s $f[[F0:[0-9]+]], $f13
 ; NOODDSPREG:     insve.w $w[[W0]][0], $w[[F0]][0]
 ; ODDSPREG:       insve.w $w[[W0]][0], $w13[0]
-; ALL:            # Clobber
+; ALL:            teqi $zero, 1
 ; ALL-NOT: sdc1
 ; ALL-NOT: ldc1
 ; ALL:            st.w $w[[W0]], 0($[[R0]])
@@ -53,7 +57,7 @@ entry:
   ; On the other hand, if odd single precision registers are not permitted, it
   ; must copy $f13 to an even-numbered register before inserting into the
   ; vector.
-  call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
   %1 = insertelement <4 x float> %0, float %b, i32 1
   store <4 x float> %1, <4 x float>* @v4f32
   ret void
@@ -66,7 +70,7 @@ entry:
 ; NOODDSPREG:     mov.s $f[[F0:[0-9]+]], $f13
 ; NOODDSPREG:     insve.w $w[[W0]][1], $w[[F0]][0]
 ; ODDSPREG:       insve.w $w[[W0]][1], $w13[0]
-; ALL:            # Clobber
+; ALL:            teqi $zero, 1
 ; ALL-NOT: sdc1
 ; ALL-NOT: ldc1
 ; ALL:            st.w $w[[W0]], 0($[[R0]])
@@ -83,7 +87,7 @@ entry:
   ;
   ; On the other hand, if odd single precision registers are not permitted, it
   ; must move it to $f12/$w12.
-  call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
 
   %2 = extractelement <4 x float> %1, i32 0
   ret float %2
@@ -94,7 +98,7 @@ entry:
 ; ALL:            ld.w $w12, 0($[[R0]])
 ; ALL:            move.v $w[[W0:13]], $w12
 ; NOODDSPREG:     move.v $w[[W0:12]], $w13
-; ALL:            # Clobber
+; ALL:            teqi $zero, 1
 ; ALL-NOT: st.w
 ; ALL-NOT: ld.w
 ; ALL:            mov.s $f0, $f[[W0]]
@@ -111,7 +115,7 @@ entry:
   ;
   ; On the other hand, if odd single precision registers are not permitted, it
   ; must be spilled.
-  call void asm sideeffect "# Clobber", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
+  call void asm sideeffect "teqi $$zero, 1", "~{$f0},~{$f1},~{$f2},~{$f3},~{$f4},~{$f5},~{$f6},~{$f7},~{$f8},~{$f9},~{$f10},~{$f11},~{$f12},~{$f14},~{$f15},~{$f16},~{$f17},~{$f18},~{$f19},~{$f20},~{$f21},~{$f22},~{$f23},~{$f24},~{$f25},~{$f26},~{$f27},~{$f28},~{$f29},~{$f30},~{$f31}"()
 
   %2 = extractelement <4 x float> %1, i32 1
   ret float %2
@@ -124,7 +128,7 @@ entry:
 ; NOODDSPREG:     st.w $w[[W0]], 0($sp)
 ; ODDSPREG-NOT: st.w
 ; ODDSPREG-NOT: ld.w
-; ALL:            # Clobber
+; ALL:            teqi $zero, 1
 ; ODDSPREG-NOT: st.w
 ; ODDSPREG-NOT: ld.w
 ; NOODDSPREG:     ld.w $w0, 0($sp)
diff --git a/test/CodeGen/Mips/nomips16.ll b/test/CodeGen/Mips/nomips16.ll
index 418d8ead2c39..749272181884 100644
--- a/test/CodeGen/Mips/nomips16.ll
+++ b/test/CodeGen/Mips/nomips16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s
 
 @x = global float 0.000000e+00, align 4
 @.str = private unnamed_addr constant [20 x i8] c"in main: mips16 %f\0A\00", align 1
diff --git a/test/CodeGen/Mips/not1.ll b/test/CodeGen/Mips/not1.ll
index f5ec5b60e421..512480508272 100644
--- a/test/CodeGen/Mips/not1.ll
+++ b/test/CodeGen/Mips/not1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @x = global i32 65504, align 4
 @y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/null.ll b/test/CodeGen/Mips/null.ll
index bc78a27f199c..67e346d959f9 100644
--- a/test/CodeGen/Mips/null.ll
+++ b/test/CodeGen/Mips/null.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 < %s | FileCheck %s -check-prefix=16
 
 
 define i32 @main() nounwind {
diff --git a/test/CodeGen/Mips/or1.ll b/test/CodeGen/Mips/or1.ll
index 51b6ebfe8e3b..aabffd111c5e 100644
--- a/test/CodeGen/Mips/or1.ll
+++ b/test/CodeGen/Mips/or1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @x = global i32 65504, align 4
 @y = global i32 60929, align 4
diff --git a/test/CodeGen/Mips/powif64_16.ll b/test/CodeGen/Mips/powif64_16.ll
index 33ec8c40c610..9be1a368b199 100644
--- a/test/CodeGen/Mips/powif64_16.ll
+++ b/test/CodeGen/Mips/powif64_16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static < %s | FileCheck %s
 
 declare float     @llvm.powi.f32(float  %Val, i32 %power)
 declare double    @llvm.powi.f64(double %Val, i32 %power)
diff --git a/test/CodeGen/Mips/rem.ll b/test/CodeGen/Mips/rem.ll
index 70f957ce15f6..ef16483f39d3 100644
--- a/test/CodeGen/Mips/rem.ll
+++ b/test/CodeGen/Mips/rem.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 103, align 4
 @jjjj = global i32 -4, align 4
diff --git a/test/CodeGen/Mips/remu.ll b/test/CodeGen/Mips/remu.ll
index 12679727952f..dac4b05cd00a 100644
--- a/test/CodeGen/Mips/remu.ll
+++ b/test/CodeGen/Mips/remu.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @iiii = global i32 103, align 4
 @jjjj = global i32 4, align 4
diff --git a/test/CodeGen/Mips/s2rem.ll b/test/CodeGen/Mips/s2rem.ll
index 65e48fe57c92..715abc072b4b 100644
--- a/test/CodeGen/Mips/s2rem.ll
+++ b/test/CodeGen/Mips/s2rem.ll
@@ -1,6 +1,6 @@
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic  < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic  < %s | FileCheck %s -check-prefix=PIC
 
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=STATIC
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=STATIC
 
 
 @xi = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/sb1.ll b/test/CodeGen/Mips/sb1.ll
index d2e8510024e5..4724a7f2cfd7 100644
--- a/test/CodeGen/Mips/sb1.ll
+++ b/test/CodeGen/Mips/sb1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 97, align 4
 @c = common global i8 0, align 1
diff --git a/test/CodeGen/Mips/sel1c.ll b/test/CodeGen/Mips/sel1c.ll
index 6753af106e0f..b09be8d41157 100644
--- a/test/CodeGen/Mips/sel1c.ll
+++ b/test/CodeGen/Mips/sel1c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
 
 @i = global i32 1, align 4
 @j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/sel2c.ll b/test/CodeGen/Mips/sel2c.ll
index 987cccad5bf4..0650147be70c 100644
--- a/test/CodeGen/Mips/sel2c.ll
+++ b/test/CodeGen/Mips/sel2c.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic -mips16-constant-islands   < %s | FileCheck %s -check-prefix=cond-b-short
 
 @i = global i32 1, align 4
 @j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBteqzCmpi.ll b/test/CodeGen/Mips/selTBteqzCmpi.ll
index 5a72ea01073c..97eba29e99fb 100644
--- a/test/CodeGen/Mips/selTBteqzCmpi.ll
+++ b/test/CodeGen/Mips/selTBteqzCmpi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 1, align 4
 @j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBtnezCmpi.ll b/test/CodeGen/Mips/selTBtnezCmpi.ll
index b6407e67f27a..62af3dffb7b3 100644
--- a/test/CodeGen/Mips/selTBtnezCmpi.ll
+++ b/test/CodeGen/Mips/selTBtnezCmpi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 1, align 4
 @j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/selTBtnezSlti.ll b/test/CodeGen/Mips/selTBtnezSlti.ll
index 2f1cdb866294..3851fdf093e4 100644
--- a/test/CodeGen/Mips/selTBtnezSlti.ll
+++ b/test/CodeGen/Mips/selTBtnezSlti.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 1, align 4
 @j = global i32 2, align 4
diff --git a/test/CodeGen/Mips/seleq.ll b/test/CodeGen/Mips/seleq.ll
index bd25358fb9e6..7d1e034d68c7 100644
--- a/test/CodeGen/Mips/seleq.ll
+++ b/test/CodeGen/Mips/seleq.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/seleqk.ll b/test/CodeGen/Mips/seleqk.ll
index 2eeaa9e33738..a0bfe44eadd6 100644
--- a/test/CodeGen/Mips/seleqk.ll
+++ b/test/CodeGen/Mips/seleqk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selgek.ll b/test/CodeGen/Mips/selgek.ll
index 38ad95ee01a9..9d9df743db9b 100644
--- a/test/CodeGen/Mips/selgek.ll
+++ b/test/CodeGen/Mips/selgek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selgt.ll b/test/CodeGen/Mips/selgt.ll
index a2e1e39e742f..94f0f9b50af1 100644
--- a/test/CodeGen/Mips/selgt.ll
+++ b/test/CodeGen/Mips/selgt.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selle.ll b/test/CodeGen/Mips/selle.ll
index 1adefb7846e4..8925aac10c4d 100644
--- a/test/CodeGen/Mips/selle.ll
+++ b/test/CodeGen/Mips/selle.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selltk.ll b/test/CodeGen/Mips/selltk.ll
index db9f8c171b78..106fe9b85d60 100644
--- a/test/CodeGen/Mips/selltk.ll
+++ b/test/CodeGen/Mips/selltk.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selne.ll b/test/CodeGen/Mips/selne.ll
index 9be99d669475..270c0dadd864 100644
--- a/test/CodeGen/Mips/selne.ll
+++ b/test/CodeGen/Mips/selne.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selnek.ll b/test/CodeGen/Mips/selnek.ll
index 5b6aa2afa1af..13ab693adb8b 100644
--- a/test/CodeGen/Mips/selnek.ll
+++ b/test/CodeGen/Mips/selnek.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/selpat.ll b/test/CodeGen/Mips/selpat.ll
index c682d8182a46..ff4bed327f45 100644
--- a/test/CodeGen/Mips/selpat.ll
+++ b/test/CodeGen/Mips/selpat.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @t = global i32 10, align 4
 @f = global i32 199, align 4
diff --git a/test/CodeGen/Mips/seteq.ll b/test/CodeGen/Mips/seteq.ll
index 8fad6122bdbe..76f9bb3ebf9d 100644
--- a/test/CodeGen/Mips/seteq.ll
+++ b/test/CodeGen/Mips/seteq.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 1, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/seteqz.ll b/test/CodeGen/Mips/seteqz.ll
index 8e9a4beac75b..368e85ce886e 100644
--- a/test/CodeGen/Mips/seteqz.ll
+++ b/test/CodeGen/Mips/seteqz.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 0, align 4
 @j = global i32 99, align 4
diff --git a/test/CodeGen/Mips/setge.ll b/test/CodeGen/Mips/setge.ll
index 8fb729964cf5..af69d7b325b9 100644
--- a/test/CodeGen/Mips/setge.ll
+++ b/test/CodeGen/Mips/setge.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 -5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setgek.ll b/test/CodeGen/Mips/setgek.ll
index 1148d1b67bda..d6eee1ff6e18 100644
--- a/test/CodeGen/Mips/setgek.ll
+++ b/test/CodeGen/Mips/setgek.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @k = global i32 10, align 4
 @r1 = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/setle.ll b/test/CodeGen/Mips/setle.ll
index fe4a2c37eb54..f7d25054e013 100644
--- a/test/CodeGen/Mips/setle.ll
+++ b/test/CodeGen/Mips/setle.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 -5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setlt.ll b/test/CodeGen/Mips/setlt.ll
index c4211e6dd696..040f8b17f219 100644
--- a/test/CodeGen/Mips/setlt.ll
+++ b/test/CodeGen/Mips/setlt.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 -5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setltk.ll b/test/CodeGen/Mips/setltk.ll
index 8c0041111270..79d25b1f130d 100644
--- a/test/CodeGen/Mips/setltk.ll
+++ b/test/CodeGen/Mips/setltk.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 -5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setne.ll b/test/CodeGen/Mips/setne.ll
index 484674e5da32..02692bf9e632 100644
--- a/test/CodeGen/Mips/setne.ll
+++ b/test/CodeGen/Mips/setne.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 1, align 4
 @j = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setuge.ll b/test/CodeGen/Mips/setuge.ll
index 025b4dcefd76..6ae77b6cc1b4 100644
--- a/test/CodeGen/Mips/setuge.ll
+++ b/test/CodeGen/Mips/setuge.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setugt.ll b/test/CodeGen/Mips/setugt.ll
index 0ce317e0df9e..f8de59b754cf 100644
--- a/test/CodeGen/Mips/setugt.ll
+++ b/test/CodeGen/Mips/setugt.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setule.ll b/test/CodeGen/Mips/setule.ll
index 4255fd27c5cd..8874d4d698b9 100644
--- a/test/CodeGen/Mips/setule.ll
+++ b/test/CodeGen/Mips/setule.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setult.ll b/test/CodeGen/Mips/setult.ll
index d30107e54dd0..29c7588a153d 100644
--- a/test/CodeGen/Mips/setult.ll
+++ b/test/CodeGen/Mips/setult.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/setultk.ll b/test/CodeGen/Mips/setultk.ll
index 1b79f103bed7..c1ef0aa0b059 100644
--- a/test/CodeGen/Mips/setultk.ll
+++ b/test/CodeGen/Mips/setultk.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @j = global i32 5, align 4
 @k = global i32 10, align 4
diff --git a/test/CodeGen/Mips/sh1.ll b/test/CodeGen/Mips/sh1.ll
index 3f70b9bc6e68..ccba32a4cca9 100644
--- a/test/CodeGen/Mips/sh1.ll
+++ b/test/CodeGen/Mips/sh1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 97, align 4
 @s = common global i16 0, align 2
diff --git a/test/CodeGen/Mips/simplebr.ll b/test/CodeGen/Mips/simplebr.ll
index 2aeacc903fbe..96dfce915e90 100644
--- a/test/CodeGen/Mips/simplebr.ll
+++ b/test/CodeGen/Mips/simplebr.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mips16-hard-float -mattr=+soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mips16-hard-float -mattr=+soft-float -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC16
 
 ; ModuleID = 'simplebr.c'
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32-S64"
diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
index c60fceb1a04c..751fba46d72f 100644
--- a/test/CodeGen/Mips/sitofp-selectcc-opt.ll
+++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll
@@ -7,7 +7,7 @@ entry:
 ; check that this transformation doesn't happen:
 ; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
 ;
-; CHECK-NOT:   # double -1.000000e+00
+; CHECK-NOT:   # double -1
 
   %tobool1 = icmp ne i32 %a, 0
   %not.tobool = icmp ne i64 %b, 0
@@ -19,4 +19,3 @@ entry:
   store double %add, double* @foo12.d4, align 8
   ret double %add
 }
-
diff --git a/test/CodeGen/Mips/sll1.ll b/test/CodeGen/Mips/sll1.ll
index 4d35b64e0b58..93b814f944c5 100644
--- a/test/CodeGen/Mips/sll1.ll
+++ b/test/CodeGen/Mips/sll1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10, align 4
 @j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/sll2.ll b/test/CodeGen/Mips/sll2.ll
index dc2236b10ccf..f30108d14df8 100644
--- a/test/CodeGen/Mips/sll2.ll
+++ b/test/CodeGen/Mips/sll2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10, align 4
 @j = global i32 4, align 4
diff --git a/test/CodeGen/Mips/sr1.ll b/test/CodeGen/Mips/sr1.ll
index 69655f7b842c..b3fdef0a17b4 100644
--- a/test/CodeGen/Mips/sr1.ll
+++ b/test/CodeGen/Mips/sr1.ll
@@ -1,6 +1,6 @@
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s 
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static  < %s | FileCheck %s 
 
-; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=NEG
+; RUN: llc  -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=static  < %s | FileCheck %s -check-prefix=NEG
 
 @f = common global float 0.000000e+00, align 4
 
diff --git a/test/CodeGen/Mips/sra1.ll b/test/CodeGen/Mips/sra1.ll
index 1c7d417cb13a..51282bd8033d 100644
--- a/test/CodeGen/Mips/sra1.ll
+++ b/test/CodeGen/Mips/sra1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 -354, align 4
 @.str = private unnamed_addr constant [5 x i8] c"%i \0A\00", align 1
diff --git a/test/CodeGen/Mips/sra2.ll b/test/CodeGen/Mips/sra2.ll
index 771d0f4a79e3..0a2bff9e4080 100644
--- a/test/CodeGen/Mips/sra2.ll
+++ b/test/CodeGen/Mips/sra2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 -354, align 4
 @j = global i32 3, align 4
diff --git a/test/CodeGen/Mips/srl1.ll b/test/CodeGen/Mips/srl1.ll
index a748eabb066f..8e97734bb313 100644
--- a/test/CodeGen/Mips/srl1.ll
+++ b/test/CodeGen/Mips/srl1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10654, align 4
 @j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/srl2.ll b/test/CodeGen/Mips/srl2.ll
index 6e338b39350f..4ccdefdf1413 100644
--- a/test/CodeGen/Mips/srl2.ll
+++ b/test/CodeGen/Mips/srl2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10654, align 4
 @j = global i32 0, align 4
diff --git a/test/CodeGen/Mips/stchar.ll b/test/CodeGen/Mips/stchar.ll
index 6bc4889931a7..4b3c8fb79315 100644
--- a/test/CodeGen/Mips/stchar.ll
+++ b/test/CodeGen/Mips/stchar.ll
@@ -1,5 +1,5 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_h
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16_b
 
 @.str = private unnamed_addr constant [9 x i8] c"%hd %c \0A\00", align 1
 @sp = common global i16* null, align 4
diff --git a/test/CodeGen/Mips/stldst.ll b/test/CodeGen/Mips/stldst.ll
index 4eef5ece0589..8aecca4aed67 100644
--- a/test/CodeGen/Mips/stldst.ll
+++ b/test/CodeGen/Mips/stldst.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @kkkk = global i32 67, align 4
 @llll = global i32 33, align 4
diff --git a/test/CodeGen/Mips/sub1.ll b/test/CodeGen/Mips/sub1.ll
index 636ab8f2c5f3..a5e698840264 100644
--- a/test/CodeGen/Mips/sub1.ll
+++ b/test/CodeGen/Mips/sub1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10, align 4
 @.str = private unnamed_addr constant [4 x i8] c"%i\0A\00", align 1
diff --git a/test/CodeGen/Mips/sub2.ll b/test/CodeGen/Mips/sub2.ll
index a97f5e947ca9..d10cddb9e6b9 100644
--- a/test/CodeGen/Mips/sub2.ll
+++ b/test/CodeGen/Mips/sub2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @i = global i32 10, align 4
 @j = global i32 20, align 4
diff --git a/test/CodeGen/Mips/tail16.ll b/test/CodeGen/Mips/tail16.ll
index 13f27fcc513b..75a2a827f258 100644
--- a/test/CodeGen/Mips/tail16.ll
+++ b/test/CodeGen/Mips/tail16.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic   < %s | FileCheck %s 
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -mattr=+soft-float -mips16-hard-float -relocation-model=pic   < %s | FileCheck %s 
 
 ; Function Attrs: nounwind optsize
 define float @h()  {
diff --git a/test/CodeGen/Mips/tailcall.ll b/test/CodeGen/Mips/tailcall.ll
index 6a0d64b7eed8..b0868255053a 100644
--- a/test/CodeGen/Mips/tailcall.ll
+++ b/test/CodeGen/Mips/tailcall.ll
@@ -4,7 +4,7 @@
 ; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=STATIC32
 ; RUN: llc -march=mips64el -mcpu=mips64r2 -mattr=+n64 -enable-mips-tail-calls \
 ; RUN: < %s | FileCheck %s -check-prefix=N64
-; RUN: llc -march=mipsel -mcpu=mips16 -relocation-model=pic \
+; RUN: llc -march=mipsel -mattr=mips16 -relocation-model=pic \
 ; RUN: -enable-mips-tail-calls < %s | FileCheck %s -check-prefix=PIC16
 
 @g0 = common global i32 0, align 4
diff --git a/test/CodeGen/Mips/tls-alias.ll b/test/CodeGen/Mips/tls-alias.ll
index b61f84e03761..5de23103c997 100644
--- a/test/CodeGen/Mips/tls-alias.ll
+++ b/test/CodeGen/Mips/tls-alias.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -march=mipsel -relocation-model=pic -disable-mips-delay-filler < %s | FileCheck %s
 
 @foo = thread_local global i32 42
-@bar = hidden thread_local alias i32* @foo
+@bar = hidden thread_local alias i32, i32* @foo
 
 define i32* @zed() {
 ; CHECK-DAG: __tls_get_addr
diff --git a/test/CodeGen/Mips/tls16.ll b/test/CodeGen/Mips/tls16.ll
index 3d324d7ed1e8..349e381af2b7 100644
--- a/test/CodeGen/Mips/tls16.ll
+++ b/test/CodeGen/Mips/tls16.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
 
 @a = thread_local global i32 4, align 4
 
diff --git a/test/CodeGen/Mips/tls16_2.ll b/test/CodeGen/Mips/tls16_2.ll
index 0a6a4123e116..b232c8534c55 100644
--- a/test/CodeGen/Mips/tls16_2.ll
+++ b/test/CodeGen/Mips/tls16_2.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=PIC16
 
 @f.i = internal thread_local unnamed_addr global i32 1, align 4
 
diff --git a/test/CodeGen/Mips/trap1.ll b/test/CodeGen/Mips/trap1.ll
index 90755130e7c2..575574a0a3b1 100644
--- a/test/CodeGen/Mips/trap1.ll
+++ b/test/CodeGen/Mips/trap1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
+; RUN: llc -mtriple=mipsel-linux-gnu -march=mipsel -mattr=mips16 -relocation-model=pic < %s | FileCheck %s -check-prefix=pic
 
 declare void @llvm.trap()
 
diff --git a/test/CodeGen/Mips/ul1.ll b/test/CodeGen/Mips/ul1.ll
index ad0992954631..eb5187a8533a 100644
--- a/test/CodeGen/Mips/ul1.ll
+++ b/test/CodeGen/Mips/ul1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 %struct.ua = type <{ i16, i32 }>
 
 @foo = common global %struct.ua zeroinitializer, align 1
diff --git a/test/CodeGen/Mips/xor1.ll b/test/CodeGen/Mips/xor1.ll
index dd51f143bb6c..b203271a042b 100644
--- a/test/CodeGen/Mips/xor1.ll
+++ b/test/CodeGen/Mips/xor1.ll
@@ -1,4 +1,4 @@
-; RUN: llc  -march=mipsel -mcpu=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
+; RUN: llc  -march=mipsel -mattr=mips16 -relocation-model=pic -O3 < %s | FileCheck %s -check-prefix=16
 
 @x = global i32 65504, align 4
 @y = global i32 60929, align 4
diff --git a/test/CodeGen/NVPTX/branch-fold.ll b/test/CodeGen/NVPTX/branch-fold.ll
new file mode 100644
index 000000000000..2b9cd0a35d92
--- /dev/null
+++ b/test/CodeGen/NVPTX/branch-fold.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -disable-cgp | FileCheck %s
+; Disable CGP which also folds branches, so that only BranchFolding is under
+; the spotlight.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+define void @foo(i32 %x, float* %output) {
+; CHECK-LABEL: .visible .func foo(
+; CHECK-NOT: bra.uni
+; CHECK-NOT: LBB0_
+  %1 = icmp eq i32 %x, 1
+  br i1 %1, label %then, label %else
+
+then:
+  br label %merge
+
+else:
+  br label %merge
+
+merge:
+  store float 2.0, float* %output
+  ret void
+}
+
+; PR24299. no crash
+define ptx_kernel void @hoge() #0 {
+; CHECK-LABEL: .visible .entry hoge(
+bb:
+  br i1 undef, label %bb1, label %bb4
+
+bb1:                                              ; preds = %bb1, %bb
+  %tmp = phi i64 [ %tmp2, %bb1 ], [ undef, %bb ]
+  %tmp2 = add nsw i64 %tmp, 1
+  %tmp3 = icmp sle i64 %tmp, 0
+  br i1 %tmp3, label %bb1, label %bb4
+
+bb4:                                              ; preds = %bb4, %bb1, %bb
+  br label %bb4
+}
diff --git a/test/CodeGen/NVPTX/bypass-div.ll b/test/CodeGen/NVPTX/bypass-div.ll
new file mode 100644
index 000000000000..bd98c9a5b0b1
--- /dev/null
+++ b/test/CodeGen/NVPTX/bypass-div.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; 64-bit divides and rems should be split into a fast and slow path where
+; the fast path uses a 32-bit operation.
+
+define void @sdiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: sdiv64(
+; CHECK:        div.s64
+; CHECK:        div.u32
+; CHECK:        ret
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+define void @udiv64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: udiv64(
+; CHECK:        div.u64
+; CHECK:        div.u32
+; CHECK:        ret
+  %d = udiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+define void @srem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: srem64(
+; CHECK:        rem.s64
+; CHECK:        rem.u32
+; CHECK:        ret
+  %d = srem i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+define void @urem64(i64 %a, i64 %b, i64* %retptr) {
+; CHECK-LABEL: urem64(
+; CHECK:        rem.u64
+; CHECK:        rem.u32
+; CHECK:        ret
+  %d = urem i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+define void @sdiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: sdiv32(
+; CHECK: div.s32
+; CHECK-NOT: div.
+  %d = sdiv i32 %a, %b
+  store i32 %d, i32* %retptr
+  ret void
+}
+
+define void @udiv32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: udiv32(
+; CHECK: div.u32
+; CHECK-NOT: div.
+  %d = udiv i32 %a, %b
+  store i32 %d, i32* %retptr
+  ret void
+}
+
+define void @srem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: srem32(
+; CHECK: rem.s32
+; CHECK-NOT: rem.
+  %d = srem i32 %a, %b
+  store i32 %d, i32* %retptr
+  ret void
+}
+
+define void @urem32(i32 %a, i32 %b, i32* %retptr) {
+; CHECK-LABEL: urem32(
+; CHECK: rem.u32
+; CHECK-NOT: rem.
+  %d = urem i32 %a, %b
+  store i32 %d, i32* %retptr
+  ret void
+}
diff --git a/test/CodeGen/NVPTX/combine-min-max.ll b/test/CodeGen/NVPTX/combine-min-max.ll
new file mode 100644
index 000000000000..64bb7a37ffd2
--- /dev/null
+++ b/test/CodeGen/NVPTX/combine-min-max.ll
@@ -0,0 +1,307 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O2 | FileCheck %s
+
+; *************************************
+; * Cases with no min/max
+
+define i32 @ab_eq_i32(i32 %a, i32 %b) {
+; LABEL: @ab_slt_i32
+; CHECK-NOT: min
+; CHECK-NOT: max
+  %cmp = icmp eq i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i64 @ba_ne_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ne_i64
+; CHECK-NOT: min
+; CHECK-NOT: max
+  %cmp = icmp ne i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+; PTX does have e.g. max.s16, but at least as of Kepler (sm_3x) that
+; gets compiled to SASS that converts the 16 bit parameters to 32 bit
+; before using a 32 bit instruction. That is probably not a win and
+; NVCC 7.5 does not emit 16 bit min/max either, presumably for that
+; reason.
+define i16 @ab_ugt_i16(i16 %a, i16 %b) {
+; LABEL: @ab_ugt_i16
+; CHECK-NOT: min
+; CHECK-NOT: max
+  %cmp = icmp ugt i16 %a, %b
+  %sel = select i1 %cmp, i16 %a, i16 %b
+  ret i16 %sel
+}
+
+
+; *************************************
+; * All variations with i32
+
+; *** ab, unsigned, i32
+define i32 @ab_ugt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ugt_i32
+; CHECK: max.u32
+  %cmp = icmp ugt i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_uge_i32(i32 %a, i32 %b) {
+; LABEL: @ab_uge_i32
+; CHECK: max.u32
+  %cmp = icmp uge i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_ult_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ult_i32
+; CHECK: min.u32
+  %cmp = icmp ult i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_ule_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ule_i32
+; CHECK: min.u32
+  %cmp = icmp ule i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+; *** ab, signed, i32
+define i32 @ab_sgt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_ugt_i32
+; CHECK: max.s32
+  %cmp = icmp sgt i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_sge_i32(i32 %a, i32 %b) {
+; LABEL: @ab_sge_i32
+; CHECK: max.s32
+  %cmp = icmp sge i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_slt_i32(i32 %a, i32 %b) {
+; LABEL: @ab_slt_i32
+; CHECK: min.s32
+  %cmp = icmp slt i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+define i32 @ab_sle_i32(i32 %a, i32 %b) {
+; LABEL: @ab_sle_i32
+; CHECK: min.s32
+  %cmp = icmp sle i32 %a, %b
+  %sel = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %sel
+}
+
+; *** ba, unsigned, i32
+define i32 @ba_ugt_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ugt_i32
+; CHECK: min.u32
+  %cmp = icmp ugt i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_uge_i32(i32 %a, i32 %b) {
+; LABEL: @ba_uge_i32
+; CHECK: min.u32
+  %cmp = icmp uge i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_ult_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ult_i32
+; CHECK: max.u32
+  %cmp = icmp ult i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_ule_i32(i32 %a, i32 %b) {
+; LABEL: @ba_ule_i32
+; CHECK: max.u32
+  %cmp = icmp ule i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+; *** ba, signed, i32
+define i32 @ba_sgt_i32(i32 %a, i32 %b) {
+; LBAEL: @ba_ugt_i32
+; CHECK: min.s32
+  %cmp = icmp sgt i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_sge_i32(i32 %a, i32 %b) {
+; LABEL: @ba_sge_i32
+; CHECK: min.s32
+  %cmp = icmp sge i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_slt_i32(i32 %a, i32 %b) {
+; LABEL: @ba_slt_i32
+; CHECK: max.s32
+  %cmp = icmp slt i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+define i32 @ba_sle_i32(i32 %a, i32 %b) {
+; LABEL: @ba_sle_i32
+; CHECK: max.s32
+  %cmp = icmp sle i32 %a, %b
+  %sel = select i1 %cmp, i32 %b, i32 %a
+  ret i32 %sel
+}
+
+; *************************************
+; * All variations with i64
+
+; *** ab, unsigned, i64
+define i64 @ab_ugt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ugt_i64
+; CHECK: max.u64
+  %cmp = icmp ugt i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_uge_i64(i64 %a, i64 %b) {
+; LABEL: @ab_uge_i64
+; CHECK: max.u64
+  %cmp = icmp uge i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_ult_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ult_i64
+; CHECK: min.u64
+  %cmp = icmp ult i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_ule_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ule_i64
+; CHECK: min.u64
+  %cmp = icmp ule i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+; *** ab, signed, i64
+define i64 @ab_sgt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_ugt_i64
+; CHECK: max.s64
+  %cmp = icmp sgt i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_sge_i64(i64 %a, i64 %b) {
+; LABEL: @ab_sge_i64
+; CHECK: max.s64
+  %cmp = icmp sge i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_slt_i64(i64 %a, i64 %b) {
+; LABEL: @ab_slt_i64
+; CHECK: min.s64
+  %cmp = icmp slt i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+define i64 @ab_sle_i64(i64 %a, i64 %b) {
+; LABEL: @ab_sle_i64
+; CHECK: min.s64
+  %cmp = icmp sle i64 %a, %b
+  %sel = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %sel
+}
+
+; *** ba, unsigned, i64
+define i64 @ba_ugt_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ugt_i64
+; CHECK: min.u64
+  %cmp = icmp ugt i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_uge_i64(i64 %a, i64 %b) {
+; LABEL: @ba_uge_i64
+; CHECK: min.u64
+  %cmp = icmp uge i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_ult_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ult_i64
+; CHECK: max.u64
+  %cmp = icmp ult i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_ule_i64(i64 %a, i64 %b) {
+; LABEL: @ba_ule_i64
+; CHECK: max.u64
+  %cmp = icmp ule i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+; *** ba, signed, i64
+define i64 @ba_sgt_i64(i64 %a, i64 %b) {
+; LBAEL: @ba_ugt_i64
+; CHECK: min.s64
+  %cmp = icmp sgt i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_sge_i64(i64 %a, i64 %b) {
+; LABEL: @ba_sge_i64
+; CHECK: min.s64
+  %cmp = icmp sge i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_slt_i64(i64 %a, i64 %b) {
+; LABEL: @ba_slt_i64
+; CHECK: max.s64
+  %cmp = icmp slt i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
+
+define i64 @ba_sle_i64(i64 %a, i64 %b) {
+; LABEL: @ba_sle_i64
+; CHECK: max.s64
+  %cmp = icmp sle i64 %a, %b
+  %sel = select i1 %cmp, i64 %b, i64 %a
+  ret i64 %sel
+}
diff --git a/test/CodeGen/NVPTX/fma-assoc.ll b/test/CodeGen/NVPTX/fma-assoc.ll
index fc04c61dd691..80a08a86316c 100644
--- a/test/CodeGen/NVPTX/fma-assoc.ll
+++ b/test/CodeGen/NVPTX/fma-assoc.ll
@@ -23,3 +23,16 @@ define ptx_device double @t1_f64(double %x, double %y, double %z,
   %d = fadd double %c, %z
   ret double %d
 }
+
+define double @two_choices(double %val1, double %val2) {
+; CHECK-LABEL: two_choices(
+; CHECK: mul.f64
+; CHECK-NOT: mul.f64
+; CHECK: fma.rn.f64
+  %1 = fmul double %val1, %val2
+  %2 = fmul double %1, %1
+  %3 = fadd double %1, %2
+
+  ret double %3
+}
+
diff --git a/test/CodeGen/NVPTX/global-addrspace.ll b/test/CodeGen/NVPTX/global-addrspace.ll
new file mode 100644
index 000000000000..4da14c7ff4fe
--- /dev/null
+++ b/test/CodeGen/NVPTX/global-addrspace.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 | FileCheck %s --check-prefix=PTX32
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s --check-prefix=PTX64
+
+; PTX32: .visible .global .align 4 .u32 i;
+; PTX32: .visible .const .align 4 .u32 j;
+; PTX32: .visible .shared .align 4 .u32 k;
+; PTX64: .visible .global .align 4 .u32 i;
+; PTX64: .visible .const .align 4 .u32 j;
+; PTX64: .visible .shared .align 4 .u32 k;
+@i = addrspace(1) externally_initialized global i32 0, align 4
+@j = addrspace(4) externally_initialized global i32 0, align 4
+@k = addrspace(3) global i32 undef, align 4
diff --git a/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
new file mode 100644
index 000000000000..d93499b47f59
--- /dev/null
+++ b/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck -check-prefix=SM20 %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck -check-prefix=SM35 %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; SM20-LABEL: .visible .entry foo1(
+; SM20: ld.global.f32
+; SM35-LABEL: .visible .entry foo1(
+; SM35: ld.global.nc.f32
+define void @foo1(float * noalias readonly %from, float * %to) {
+  %1 = load float, float * %from
+  store float %1, float * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo2(
+; SM20: ld.global.f64
+; SM35-LABEL: .visible .entry foo2(
+; SM35: ld.global.nc.f64
+define void @foo2(double * noalias readonly %from, double * %to) {
+  %1 = load double, double * %from
+  store double %1, double * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo3(
+; SM20: ld.global.u16
+; SM35-LABEL: .visible .entry foo3(
+; SM35: ld.global.nc.u16
+define void @foo3(i16 * noalias readonly %from, i16 * %to) {
+  %1 = load i16, i16 * %from
+  store i16 %1, i16 * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo4(
+; SM20: ld.global.u32
+; SM35-LABEL: .visible .entry foo4(
+; SM35: ld.global.nc.u32
+define void @foo4(i32 * noalias readonly %from, i32 * %to) {
+  %1 = load i32, i32 * %from
+  store i32 %1, i32 * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo5(
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo5(
+; SM35: ld.global.nc.u64
+define void @foo5(i64 * noalias readonly %from, i64 * %to) {
+  %1 = load i64, i64 * %from
+  store i64 %1, i64 * %to
+  ret void
+}
+
+; i128 is non standard integer in nvptx64
+; SM20-LABEL: .visible .entry foo6(
+; SM20: ld.global.u64
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo6(
+; SM35: ld.global.nc.u64
+; SM35: ld.global.nc.u64
+define void @foo6(i128 * noalias readonly %from, i128 * %to) {
+  %1 = load i128, i128 * %from
+  store i128 %1, i128 * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo7(
+; SM20: ld.global.v2.u8
+; SM35-LABEL: .visible .entry foo7(
+; SM35: ld.global.nc.v2.u8
+define void @foo7(<2 x i8> * noalias readonly %from, <2 x i8> * %to) {
+  %1 = load <2 x i8>, <2 x i8> * %from
+  store <2 x i8> %1, <2 x i8> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo8(
+; SM20: ld.global.v2.u16
+; SM35-LABEL: .visible .entry foo8(
+; SM35: ld.global.nc.v2.u16
+define void @foo8(<2 x i16> * noalias readonly %from, <2 x i16> * %to) {
+  %1 = load <2 x i16>, <2 x i16> * %from
+  store <2 x i16> %1, <2 x i16> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo9(
+; SM20: ld.global.v2.u32
+; SM35-LABEL: .visible .entry foo9(
+; SM35: ld.global.nc.v2.u32
+define void @foo9(<2 x i32> * noalias readonly %from, <2 x i32> * %to) {
+  %1 = load <2 x i32>, <2 x i32> * %from
+  store <2 x i32> %1, <2 x i32> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo10(
+; SM20: ld.global.v2.u64
+; SM35-LABEL: .visible .entry foo10(
+; SM35: ld.global.nc.v2.u64
+define void @foo10(<2 x i64> * noalias readonly %from, <2 x i64> * %to) {
+  %1 = load <2 x i64>, <2 x i64> * %from
+  store <2 x i64> %1, <2 x i64> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo11(
+; SM20: ld.global.v2.f32
+; SM35-LABEL: .visible .entry foo11(
+; SM35: ld.global.nc.v2.f32
+define void @foo11(<2 x float> * noalias readonly %from, <2 x float> * %to) {
+  %1 = load <2 x float>, <2 x float> * %from
+  store <2 x float> %1, <2 x float> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo12(
+; SM20: ld.global.v2.f64
+; SM35-LABEL: .visible .entry foo12(
+; SM35: ld.global.nc.v2.f64
+define void @foo12(<2 x double> * noalias readonly %from, <2 x double> * %to) {
+  %1 = load <2 x double>, <2 x double> * %from
+  store <2 x double> %1, <2 x double> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo13(
+; SM20: ld.global.v4.u8
+; SM35-LABEL: .visible .entry foo13(
+; SM35: ld.global.nc.v4.u8
+define void @foo13(<4 x i8> * noalias readonly %from, <4 x i8> * %to) {
+  %1 = load <4 x i8>, <4 x i8> * %from
+  store <4 x i8> %1, <4 x i8> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo14(
+; SM20: ld.global.v4.u16
+; SM35-LABEL: .visible .entry foo14(
+; SM35: ld.global.nc.v4.u16
+define void @foo14(<4 x i16> * noalias readonly %from, <4 x i16> * %to) {
+  %1 = load <4 x i16>, <4 x i16> * %from
+  store <4 x i16> %1, <4 x i16> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo15(
+; SM20: ld.global.v4.u32
+; SM35-LABEL: .visible .entry foo15(
+; SM35: ld.global.nc.v4.u32
+define void @foo15(<4 x i32> * noalias readonly %from, <4 x i32> * %to) {
+  %1 = load <4 x i32>, <4 x i32> * %from
+  store <4 x i32> %1, <4 x i32> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo16(
+; SM20: ld.global.v4.f32
+; SM35-LABEL: .visible .entry foo16(
+; SM35: ld.global.nc.v4.f32
+define void @foo16(<4 x float> * noalias readonly %from, <4 x float> * %to) {
+  %1 = load <4 x float>, <4 x float> * %from
+  store <4 x float> %1, <4 x float> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo17(
+; SM20: ld.global.v2.f64
+; SM20: ld.global.v2.f64
+; SM35-LABEL: .visible .entry foo17(
+; SM35: ld.global.nc.v2.f64
+; SM35: ld.global.nc.v2.f64
+define void @foo17(<4 x double> * noalias readonly %from, <4 x double> * %to) {
+  %1 = load <4 x double>, <4 x double> * %from
+  store <4 x double> %1, <4 x double> * %to
+  ret void
+}
+
+; SM20-LABEL: .visible .entry foo18(
+; SM20: ld.global.u64
+; SM35-LABEL: .visible .entry foo18(
+; SM35: ld.global.nc.u64
+define void @foo18(float ** noalias readonly %from, float ** %to) {
+  %1 = load float *, float ** %from
+  store float * %1, float ** %to
+  ret void
+}
+
+; Test that we can infer a cached load for a pointer induction variable.
+; SM20-LABEL: .visible .entry foo19(
+; SM20: ld.global.f32
+; SM35-LABEL: .visible .entry foo19(
+; SM35: ld.global.nc.f32
+define void @foo19(float * noalias readonly %from, float * %to, i32 %n) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %nexti, %loop ]
+  %sum = phi float [ 0.0, %entry ], [ %nextsum, %loop ]
+  %ptr = getelementptr inbounds float, float * %from, i32 %i
+  %value = load float, float * %ptr, align 4
+  %nextsum = fadd float %value, %sum
+  %nexti = add nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %n
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  store float %nextsum, float * %to
+  ret void
+}
+
+; This test captures the case of a non-kernel function. In a
+; non-kernel function, without interprocedural analysis, we do not
+; know that the parameter is global. We also do not know that the
+; pointed-to memory is never written to (for the duration of the
+; kernel). For both reasons, we cannot use a cached load here.
+; SM20-LABEL: notkernel(
+; SM20: ld.f32
+; SM35-LABEL: notkernel(
+; SM35: ld.f32
+define void @notkernel(float * noalias readonly %from, float * %to) {
+  %1 = load float, float * %from
+  store float %1, float * %to
+  ret void
+}
+
+; As @notkernel, but with the parameter explicitly marked as global. We still
+; do not know that the parameter is never written to (for the duration of the
+; kernel). This case does not currently come up normally since we do not infer
+; that pointers are global interprocedurally as of 2015-08-05.
+; SM20-LABEL: notkernel2(
+; SM20: ld.global.f32
+; SM35-LABEL: notkernel2(
+; SM35: ld.global.f32
+define void @notkernel2(float addrspace(1) * noalias readonly %from, float * %to) {
+  %1 = load float, float addrspace(1) * %from
+  store float %1, float * %to
+  ret void
+}
+
+!nvvm.annotations = !{!1 ,!2 ,!3 ,!4 ,!5 ,!6, !7 ,!8 ,!9 ,!10 ,!11 ,!12, !13, !14, !15, !16, !17, !18, !19}
+!1 = !{void (float *, float *)* @foo1, !"kernel", i32 1}
+!2 = !{void (double *, double *)* @foo2, !"kernel", i32 1}
+!3 = !{void (i16 *, i16 *)* @foo3, !"kernel", i32 1}
+!4 = !{void (i32 *, i32 *)* @foo4, !"kernel", i32 1}
+!5 = !{void (i64 *, i64 *)* @foo5, !"kernel", i32 1}
+!6 = !{void (i128 *, i128 *)* @foo6, !"kernel", i32 1}
+!7 = !{void (<2 x i8> *, <2 x i8> *)* @foo7, !"kernel", i32 1}
+!8 = !{void (<2 x i16> *, <2 x i16> *)* @foo8, !"kernel", i32 1}
+!9 = !{void (<2 x i32> *, <2 x i32> *)* @foo9, !"kernel", i32 1}
+!10 = !{void (<2 x i64> *, <2 x i64> *)* @foo10, !"kernel", i32 1}
+!11 = !{void (<2 x float> *, <2 x float> *)* @foo11, !"kernel", i32 1}
+!12 = !{void (<2 x double> *, <2 x double> *)* @foo12, !"kernel", i32 1}
+!13 = !{void (<4 x i8> *, <4 x i8> *)* @foo13, !"kernel", i32 1}
+!14 = !{void (<4 x i16> *, <4 x i16> *)* @foo14, !"kernel", i32 1}
+!15 = !{void (<4 x i32> *, <4 x i32> *)* @foo15, !"kernel", i32 1}
+!16 = !{void (<4 x float> *, <4 x float> *)* @foo16, !"kernel", i32 1}
+!17 = !{void (<4 x double> *, <4 x double> *)* @foo17, !"kernel", i32 1}
+!18 = !{void (float **, float **)* @foo18, !"kernel", i32 1}
+!19 = !{void (float *, float *, i32)* @foo19, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/lower-aggr-copies.ll b/test/CodeGen/NVPTX/lower-aggr-copies.ll
index c3adfc4646cf..ef570982b808 100644
--- a/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -1,35 +1,68 @@
-; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX
+; RUN: opt < %s -S -nvptx-lower-aggr-copies | FileCheck %s --check-prefix IR
 
 ; Verify that the NVPTXLowerAggrCopies pass works as expected - calls to
 ; llvm.mem* intrinsics get lowered to loops.
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "nvptx64-unknown-unknown"
+
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
 
 define i8* @memcpy_caller(i8* %dst, i8* %src, i64 %n) #0 {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 false)
   ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_caller
-; CHECK: LBB[[LABEL:[_0-9]+]]:
-; CHECK:      ld.u8 %rs[[REG:[0-9]+]]
-; CHECK:      st.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK:      add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL:   @memcpy_caller
+; IR:         loadstoreloop:
+; IR:         [[LOADPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %src, i64
+; IR-NEXT:    [[VAL:%[0-9]+]] = load i8, i8* [[LOADPTR]]
+; IR-NEXT:    [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
+; IR-NEXT:    store i8 [[VAL]], i8* [[STOREPTR]]
+
+; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memcpy_caller
+; PTX:        LBB[[LABEL:[_0-9]+]]:
+; PTX:        ld.u8 %rs[[REG:[0-9]+]]
+; PTX:        st.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX:        add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT:   setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT:   @%p[[PRED]] bra LBB[[LABEL]]
 }
 
 define i8* @memcpy_volatile_caller(i8* %dst, i8* %src, i64 %n) #0 {
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 true)
   ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memcpy_volatile_caller
-; CHECK: LBB[[LABEL:[_0-9]+]]:
-; CHECK:      ld.volatile.u8 %rs[[REG:[0-9]+]]
-; CHECK:      st.volatile.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK:      add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL:   @memcpy_volatile_caller
+; IR:         load volatile
+; IR:         store volatile
+
+; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memcpy_volatile_caller
+; PTX:        LBB[[LABEL:[_0-9]+]]:
+; PTX:        ld.volatile.u8 %rs[[REG:[0-9]+]]
+; PTX:        st.volatile.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX:        add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT:   setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT:   @%p[[PRED]] bra LBB[[LABEL]]
+}
+
+define i8* @memcpy_casting_caller(i32* %dst, i32* %src, i64 %n) #0 {
+entry:
+  %0 = bitcast i32* %dst to i8*
+  %1 = bitcast i32* %src to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 %n, i32 1, i1 false)
+  ret i8* %0
+
+; Check that casts in calls to memcpy are handled properly
+; IR-LABEL:   @memcpy_casting_caller
+; IR:         [[DSTCAST:%[0-9]+]] = bitcast i32* %dst to i8*
+; IR:         [[SRCCAST:%[0-9]+]] = bitcast i32* %src to i8*
+; IR:         getelementptr inbounds i8, i8* [[SRCCAST]]
+; IR:         getelementptr inbounds i8, i8* [[DSTCAST]]
 }
 
 define i8* @memset_caller(i8* %dst, i32 %c, i64 %n) #0 {
@@ -37,11 +70,52 @@ entry:
   %0 = trunc i32 %c to i8
   tail call void @llvm.memset.p0i8.i64(i8* %dst, i8 %0, i64 %n, i32 1, i1 false)
   ret i8* %dst
-; CHECK-LABEL: .visible .func (.param .b32 func_retval0) memset_caller(
-; CHECK:      ld.param.u8 %rs[[REG:[0-9]+]]
-; CHECK:      LBB[[LABEL:[_0-9]+]]:
-; CHECK:      st.u8 [%r{{[0-9]+}}], %rs[[REG]]
-; CHECK:      add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
-; CHECK-NEXT: setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
-; CHECK-NEXT: @%p[[PRED]] bra LBB[[LABEL]]
+
+; IR-LABEL:   @memset_caller
+; IR:         [[VAL:%[0-9]+]] = trunc i32 %c to i8
+; IR:         loadstoreloop:
+; IR:         [[STOREPTR:%[0-9]+]] = getelementptr inbounds i8, i8* %dst, i64
+; IR-NEXT:    store i8 [[VAL]], i8* [[STOREPTR]]
+
+; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memset_caller(
+; PTX:        ld.param.u8 %rs[[REG:[0-9]+]]
+; PTX:        LBB[[LABEL:[_0-9]+]]:
+; PTX:        st.u8 [%rd{{[0-9]+}}], %rs[[REG]]
+; PTX:        add.s64 %rd[[COUNTER:[0-9]+]], %rd[[COUNTER]], 1
+; PTX-NEXT:   setp.lt.u64 %p[[PRED:[0-9]+]], %rd[[COUNTER]], %rd
+; PTX-NEXT:   @%p[[PRED]] bra LBB[[LABEL]]
+}
+
+define i8* @memmove_caller(i8* %dst, i8* %src, i64 %n) #0 {
+entry:
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %n, i32 1, i1 false)
+  ret i8* %dst
+
+; IR-LABEL:   @memmove_caller
+; IR:         icmp ult i8* %src, %dst
+; IR:         [[PHIVAL:%[0-9a-zA-Z_]+]] = phi i64
+; IR-NEXT:    %index_ptr = sub i64 [[PHIVAL]], 1
+; IR:         [[FWDPHIVAL:%[0-9a-zA-Z_]+]] = phi i64
+; IR:         {{%[0-9a-zA-Z_]+}} = add i64 [[FWDPHIVAL]], 1
+
+; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memmove_caller(
+; PTX:        ld.param.u64 %rd[[N:[0-9]+]]
+; PTX:        setp.eq.s64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
+; PTX:        setp.ge.u64 %p[[SRC_GT_THAN_DST:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; PTX-NEXT:   @%p[[SRC_GT_THAN_DST]] bra LBB[[FORWARD_BB:[0-9_]+]]
+; -- this is the backwards copying BB
+; PTX:        @%p[[NEQ0]] bra LBB[[EXIT:[0-9_]+]]
+; PTX:        add.s64 %rd[[N]], %rd[[N]], -1
+; PTX:        ld.u8 %rs[[ELEMENT:[0-9]+]]
+; PTX:        st.u8 [%rd{{[0-9]+}}], %rs[[ELEMENT]]
+; -- this is the forwards copying BB
+; PTX:        LBB[[FORWARD_BB]]:
+; PTX:        @%p[[NEQ0]] bra LBB[[EXIT]]
+; PTX:        ld.u8 %rs[[ELEMENT2:[0-9]+]]
+; PTX:        st.u8 [%rd{{[0-9]+}}], %rs[[ELEMENT2]]
+; PTX:        add.s64 %rd[[INDEX:[0-9]+]], %rd[[INDEX]], 1
+; -- exit block
+; PTX:        LBB[[EXIT]]:
+; PTX-NEXT:   st.param.b64 [func_retval0
+; PTX-NEXT:   ret
 }
diff --git a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
index 0de72c4a1aed..2fffa3eeac15 100644
--- a/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
+++ b/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
-target triple = "nvptx64-unknown-unknown"
+target triple = "nvptx64-nvidia-cuda"
 
 ; Verify that both %input and %output are converted to global pointers and then
 ; addrspacecast'ed back to the original type.
@@ -26,6 +26,22 @@ define void @kernel2(float addrspace(1)* %input, float addrspace(1)* %output) {
   ret void
 }
 
-!nvvm.annotations = !{!0, !1}
+%struct.S = type { i32*, i32* }
+
+define void @ptr_in_byval(%struct.S* byval %input, i32* %output) {
+; CHECK-LABEL: .visible .entry ptr_in_byval(
+; CHECK: cvta.to.global.u64
+; CHECK: cvta.to.global.u64
+  %b_ptr = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1
+  %b = load i32*, i32** %b_ptr, align 4
+  %v = load i32, i32* %b, align 4
+; CHECK: ld.global.u32
+  store i32 %v, i32* %output, align 4
+; CHECK: st.global.u32
+  ret void
+}
+
+!nvvm.annotations = !{!0, !1, !2}
 !0 = !{void (float*, float*)* @kernel, !"kernel", i32 1}
 !1 = !{void (float addrspace(1)*, float addrspace(1)*)* @kernel2, !"kernel", i32 1}
+!2 = !{void (%struct.S*, i32*)* @ptr_in_byval, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/reg-copy.ll b/test/CodeGen/NVPTX/reg-copy.ll
new file mode 100644
index 000000000000..98ee49d39023
--- /dev/null
+++ b/test/CodeGen/NVPTX/reg-copy.ll
@@ -0,0 +1,224 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+define void @PR24303(float* %f) {
+; CHECK-LABEL: .visible .entry PR24303(
+; Do not use mov.f or mov.u to convert between float and int.
+; CHECK-NOT: mov.{{f|u}}{{32|64}} %f{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK-NOT: mov.{{f|u}}{{32|64}} %r{{[0-9]+}}, %f{{[0-9]+}}
+entry:
+  %arrayidx1 = getelementptr inbounds float, float* %f, i64 1
+  %0 = load float, float* %f, align 4
+  %1 = load float, float* %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds float, float* %f, i64 2
+  %arrayidx3 = getelementptr inbounds float, float* %f, i64 3
+  %2 = load float, float* %arrayidx2, align 4
+  %3 = load float, float* %arrayidx3, align 4
+  %mul.i = fmul float %0, %2
+  %mul4.i = fmul float %1, %3
+  %mul5.i = fmul float %0, %3
+  %mul6.i = fmul float %1, %2
+  %sub.i = fsub float %mul.i, %mul4.i
+  %4 = bitcast float %sub.i to i32
+  %add.i = fadd float %mul6.i, %mul5.i
+  %5 = bitcast float %add.i to i32
+  %6 = tail call float @llvm.nvvm.fabs.f(float %sub.i) #2
+  %7 = fcmp ugt float %6, 0x7FF0000000000000
+  br i1 %7, label %land.lhs.true.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+land.lhs.true.i:                                  ; preds = %entry
+  %8 = tail call float @llvm.nvvm.fabs.f(float %add.i) #2
+  %9 = fcmp ugt float %8, 0x7FF0000000000000
+  br i1 %9, label %if.then.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+if.then.i:                                        ; preds = %land.lhs.true.i
+  %10 = tail call float @llvm.nvvm.fabs.f(float %0) #2
+  %11 = fcmp oeq float %10, 0x7FF0000000000000
+  %.pre.i = tail call float @llvm.nvvm.fabs.f(float %1) #2
+  %12 = fcmp oeq float %.pre.i, 0x7FF0000000000000
+  %or.cond.i = or i1 %11, %12
+  br i1 %or.cond.i, label %if.then.14.i, label %if.end.31.i
+
+if.then.14.i:                                     ; preds = %if.then.i
+  %13 = bitcast float %0 to i32
+  %14 = and i32 %13, -2147483648
+  %15 = select i1 %11, i32 1065353216, i32 0
+  %16 = or i32 %15, %14
+  %17 = bitcast i32 %16 to float
+  %18 = bitcast float %1 to i32
+  %19 = and i32 %18, -2147483648
+  %20 = select i1 %12, i32 1065353216, i32 0
+  %21 = or i32 %20, %19
+  %22 = bitcast i32 %21 to float
+  %23 = tail call float @llvm.nvvm.fabs.f(float %2) #2
+  %24 = fcmp ugt float %23, 0x7FF0000000000000
+  br i1 %24, label %if.then.24.i, label %if.end.i
+
+if.then.24.i:                                     ; preds = %if.then.14.i
+  %25 = bitcast float %2 to i32
+  %26 = and i32 %25, -2147483648
+  %27 = bitcast i32 %26 to float
+  br label %if.end.i
+
+if.end.i:                                         ; preds = %if.then.24.i, %if.then.14.i
+  %__c.0.i = phi float [ %27, %if.then.24.i ], [ %2, %if.then.14.i ]
+  %28 = tail call float @llvm.nvvm.fabs.f(float %3) #2
+  %29 = fcmp ugt float %28, 0x7FF0000000000000
+  br i1 %29, label %if.then.28.i, label %if.end.31.i
+
+if.then.28.i:                                     ; preds = %if.end.i
+  %30 = bitcast float %3 to i32
+  %31 = and i32 %30, -2147483648
+  %32 = bitcast i32 %31 to float
+  br label %if.end.31.i
+
+if.end.31.i:                                      ; preds = %if.then.28.i, %if.end.i, %if.then.i
+  %__d.1.i = phi float [ %32, %if.then.28.i ], [ %3, %if.end.i ], [ %3, %if.then.i ]
+  %__c.1.i = phi float [ %__c.0.i, %if.then.28.i ], [ %__c.0.i, %if.end.i ], [ %2, %if.then.i ]
+  %__b.0.i = phi float [ %22, %if.then.28.i ], [ %22, %if.end.i ], [ %1, %if.then.i ]
+  %__a.0.i = phi float [ %17, %if.then.28.i ], [ %17, %if.end.i ], [ %0, %if.then.i ]
+  %__recalc.0.off0.i = phi i1 [ true, %if.then.28.i ], [ true, %if.end.i ], [ false, %if.then.i ]
+  %33 = tail call float @llvm.nvvm.fabs.f(float %__c.1.i) #2
+  %34 = fcmp oeq float %33, 0x7FF0000000000000
+  %.pre6.i = tail call float @llvm.nvvm.fabs.f(float %__d.1.i) #2
+  %35 = fcmp oeq float %.pre6.i, 0x7FF0000000000000
+  %or.cond8.i = or i1 %34, %35
+  br i1 %or.cond8.i, label %if.then.37.i, label %if.end.56.i
+
+if.then.37.i:                                     ; preds = %if.end.31.i
+  %36 = bitcast float %__c.1.i to i32
+  %37 = and i32 %36, -2147483648
+  %38 = select i1 %34, i32 1065353216, i32 0
+  %39 = or i32 %38, %37
+  %40 = bitcast i32 %39 to float
+  %41 = bitcast float %__d.1.i to i32
+  %42 = and i32 %41, -2147483648
+  %43 = select i1 %35, i32 1065353216, i32 0
+  %44 = or i32 %43, %42
+  %45 = bitcast i32 %44 to float
+  %46 = tail call float @llvm.nvvm.fabs.f(float %__a.0.i) #2
+  %47 = fcmp ugt float %46, 0x7FF0000000000000
+  br i1 %47, label %if.then.48.i, label %if.end.50.i
+
+if.then.48.i:                                     ; preds = %if.then.37.i
+  %48 = bitcast float %__a.0.i to i32
+  %49 = and i32 %48, -2147483648
+  %50 = bitcast i32 %49 to float
+  br label %if.end.50.i
+
+if.end.50.i:                                      ; preds = %if.then.48.i, %if.then.37.i
+  %__a.1.i = phi float [ %50, %if.then.48.i ], [ %__a.0.i, %if.then.37.i ]
+  %51 = tail call float @llvm.nvvm.fabs.f(float %__b.0.i) #2
+  %52 = fcmp ugt float %51, 0x7FF0000000000000
+  br i1 %52, label %if.then.53.i, label %if.then.93.i
+
+if.then.53.i:                                     ; preds = %if.end.50.i
+  %53 = bitcast float %__b.0.i to i32
+  %54 = and i32 %53, -2147483648
+  %55 = bitcast i32 %54 to float
+  br label %if.then.93.i
+
+if.end.56.i:                                      ; preds = %if.end.31.i
+  br i1 %__recalc.0.off0.i, label %if.then.93.i, label %land.lhs.true.58.i
+
+land.lhs.true.58.i:                               ; preds = %if.end.56.i
+  %56 = tail call float @llvm.nvvm.fabs.f(float %mul.i) #2
+  %57 = fcmp oeq float %56, 0x7FF0000000000000
+  br i1 %57, label %if.then.70.i, label %lor.lhs.false.61.i
+
+lor.lhs.false.61.i:                               ; preds = %land.lhs.true.58.i
+  %58 = tail call float @llvm.nvvm.fabs.f(float %mul4.i) #2
+  %59 = fcmp oeq float %58, 0x7FF0000000000000
+  br i1 %59, label %if.then.70.i, label %lor.lhs.false.64.i
+
+lor.lhs.false.64.i:                               ; preds = %lor.lhs.false.61.i
+  %60 = tail call float @llvm.nvvm.fabs.f(float %mul5.i) #2
+  %61 = fcmp oeq float %60, 0x7FF0000000000000
+  br i1 %61, label %if.then.70.i, label %lor.lhs.false.67.i
+
+lor.lhs.false.67.i:                               ; preds = %lor.lhs.false.64.i
+  %62 = tail call float @llvm.nvvm.fabs.f(float %mul6.i) #2
+  %63 = fcmp oeq float %62, 0x7FF0000000000000
+  br i1 %63, label %if.then.70.i, label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+if.then.70.i:                                     ; preds = %lor.lhs.false.67.i, %lor.lhs.false.64.i, %lor.lhs.false.61.i, %land.lhs.true.58.i
+  %64 = tail call float @llvm.nvvm.fabs.f(float %__a.0.i) #2
+  %65 = fcmp ugt float %64, 0x7FF0000000000000
+  br i1 %65, label %if.then.73.i, label %if.end.75.i
+
+if.then.73.i:                                     ; preds = %if.then.70.i
+  %66 = bitcast float %__a.0.i to i32
+  %67 = and i32 %66, -2147483648
+  %68 = bitcast i32 %67 to float
+  br label %if.end.75.i
+
+if.end.75.i:                                      ; preds = %if.then.73.i, %if.then.70.i
+  %__a.3.i = phi float [ %68, %if.then.73.i ], [ %__a.0.i, %if.then.70.i ]
+  %69 = tail call float @llvm.nvvm.fabs.f(float %__b.0.i) #2
+  %70 = fcmp ugt float %69, 0x7FF0000000000000
+  br i1 %70, label %if.then.78.i, label %if.end.80.i
+
+if.then.78.i:                                     ; preds = %if.end.75.i
+  %71 = bitcast float %__b.0.i to i32
+  %72 = and i32 %71, -2147483648
+  %73 = bitcast i32 %72 to float
+  br label %if.end.80.i
+
+if.end.80.i:                                      ; preds = %if.then.78.i, %if.end.75.i
+  %__b.3.i = phi float [ %73, %if.then.78.i ], [ %__b.0.i, %if.end.75.i ]
+  %74 = fcmp ugt float %33, 0x7FF0000000000000
+  br i1 %74, label %if.then.83.i, label %if.end.85.i
+
+if.then.83.i:                                     ; preds = %if.end.80.i
+  %75 = bitcast float %__c.1.i to i32
+  %76 = and i32 %75, -2147483648
+  %77 = bitcast i32 %76 to float
+  br label %if.end.85.i
+
+if.end.85.i:                                      ; preds = %if.then.83.i, %if.end.80.i
+  %__c.3.i = phi float [ %77, %if.then.83.i ], [ %__c.1.i, %if.end.80.i ]
+  %78 = fcmp ugt float %.pre6.i, 0x7FF0000000000000
+  br i1 %78, label %if.then.88.i, label %if.then.93.i
+
+if.then.88.i:                                     ; preds = %if.end.85.i
+  %79 = bitcast float %__d.1.i to i32
+  %80 = and i32 %79, -2147483648
+  %81 = bitcast i32 %80 to float
+  br label %if.then.93.i
+
+if.then.93.i:                                     ; preds = %if.then.88.i, %if.end.85.i, %if.end.56.i, %if.then.53.i, %if.end.50.i
+  %__d.4.ph.i = phi float [ %__d.1.i, %if.end.85.i ], [ %81, %if.then.88.i ], [ %__d.1.i, %if.end.56.i ], [ %45, %if.end.50.i ], [ %45, %if.then.53.i ]
+  %__c.4.ph.i = phi float [ %__c.3.i, %if.end.85.i ], [ %__c.3.i, %if.then.88.i ], [ %__c.1.i, %if.end.56.i ], [ %40, %if.end.50.i ], [ %40, %if.then.53.i ]
+  %__b.4.ph.i = phi float [ %__b.3.i, %if.end.85.i ], [ %__b.3.i, %if.then.88.i ], [ %__b.0.i, %if.end.56.i ], [ %__b.0.i, %if.end.50.i ], [ %55, %if.then.53.i ]
+  %__a.4.ph.i = phi float [ %__a.3.i, %if.end.85.i ], [ %__a.3.i, %if.then.88.i ], [ %__a.0.i, %if.end.56.i ], [ %__a.1.i, %if.end.50.i ], [ %__a.1.i, %if.then.53.i ]
+  %mul95.i = fmul float %__c.4.ph.i, %__a.4.ph.i
+  %mul96.i = fmul float %__d.4.ph.i, %__b.4.ph.i
+  %sub97.i = fsub float %mul95.i, %mul96.i
+  %mul98.i = fmul float %sub97.i, 0x7FF0000000000000
+  %82 = bitcast float %mul98.i to i32
+  %mul100.i = fmul float %__d.4.ph.i, %__a.4.ph.i
+  %mul101.i = fmul float %__c.4.ph.i, %__b.4.ph.i
+  %add102.i = fadd float %mul101.i, %mul100.i
+  %mul103.i = fmul float %add102.i, 0x7FF0000000000000
+  %83 = bitcast float %mul103.i to i32
+  br label %_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit
+
+_ZN12cuda_builtinmlIfEENS_7complexIT_EERKS3_S5_.exit: ; preds = %if.then.93.i, %lor.lhs.false.67.i, %land.lhs.true.i, %entry
+  %84 = phi i32 [ %4, %land.lhs.true.i ], [ %4, %entry ], [ %82, %if.then.93.i ], [ %4, %lor.lhs.false.67.i ]
+  %85 = phi i32 [ %5, %land.lhs.true.i ], [ %5, %entry ], [ %83, %if.then.93.i ], [ %5, %lor.lhs.false.67.i ]
+  %arrayidx5 = getelementptr inbounds float, float* %f, i64 5
+  %86 = bitcast float* %arrayidx5 to i32*
+  store i32 %84, i32* %86, align 4
+  %arrayidx7 = getelementptr inbounds float, float* %f, i64 6
+  %87 = bitcast float* %arrayidx7 to i32*
+  store i32 %85, i32* %87, align 4
+  ret void
+}
+
+declare float @llvm.nvvm.fabs.f(float)
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (float*)* @PR24303, !"kernel", i32 1}
diff --git a/test/CodeGen/NVPTX/symbol-naming.ll b/test/CodeGen/NVPTX/symbol-naming.ll
index 0f176934ca39..7a3e6310ffdf 100644
--- a/test/CodeGen/NVPTX/symbol-naming.ll
+++ b/test/CodeGen/NVPTX/symbol-naming.ll
@@ -7,10 +7,10 @@
 ; PTX32-NOT: .str
 ; PTX64-NOT: .str
 
-; PTX32-DAG: _$_str1
+; PTX32-DAG: _$_str.1
 ; PTX32-DAG: _$_str
 
-; PTX64-DAG: _$_str1
+; PTX64-DAG: _$_str.1
 ; PTX64-DAG: _$_str
 
 target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
diff --git a/test/CodeGen/NVPTX/vector-call.ll b/test/CodeGen/NVPTX/vector-call.ll
index a03d7fd41914..968d1d4a5f51 100644
--- a/test/CodeGen/NVPTX/vector-call.ll
+++ b/test/CodeGen/NVPTX/vector-call.ll
@@ -4,7 +4,7 @@ target triple = "nvptx-unknown-cuda"
 
 declare void @bar(<4 x i32>)
 
-; CHECK-LABEL @foo
+; CHECK-LABEL: @foo
 define void @foo(<4 x i32> %a) {
 ; CHECK: st.param.v4.b32
   tail call void @bar(<4 x i32> %a)
diff --git a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
index fde330321aa4..d20e3b05c091 100644
--- a/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
+++ b/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 define void @iterative_hash_host_wide_int() {
         %zero = alloca i32              ; <i32*> [#uses=2]
diff --git a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
index c63fd9ae1700..3d5fa52d0abd 100644
--- a/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
+++ b/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 	%struct..0anon = type { i32 }
 	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
diff --git a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
index 0e7709857406..c064c273173f 100644
--- a/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=ppc64
 ; RUN: llc < %s -march=ppc32
 ; RUN: llc < %s 
+; REQUIRES: default_triple
 
 define void @bitap() {
 entry:
diff --git a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
index 9660d450cb4c..8536dda0a9ba 100644
--- a/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
+++ b/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=ppc64
 ; RUN: llc < %s -march=ppc32
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 @qsz.b = external global i1             ; <i1*> [#uses=1]
 
diff --git a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
index 4830ca60f9ff..aa39dfd03748 100644
--- a/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
+++ b/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -march=ppc32 -mcpu=g3
 ; RUN: llc < %s -march=ppc32 -mcpu=g5
 ; PR1811
+; REQUIRES: default_triple
 
 define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
 %CONST) {
diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
new file mode 100644
index 000000000000..a7b79789b4ca
--- /dev/null
+++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll
@@ -0,0 +1,203 @@
+; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; CHECK-LABEL: notBoolRet
+define signext i32 @notBoolRet() {
+entry:
+; CHECK: ret i32 1
+  ret i32 1
+}
+
+; CHECK-LABEL: find
+define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) {
+entry:
+  %cmp.4 = icmp eq i8** %begin, %end
+  br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i8** %incdec.ptr, %end
+  br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+  %0 = load i8*, i8** %curr.05, align 8
+  %call = tail call zeroext i1 %hasProp(i8* %0)
+  %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+  br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit:                                 ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  br label %cleanup
+
+cleanup:                                          ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+  %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+  ret i1 %cleanup.dest.slot.0
+}
+
+; CHECK-LABEL: retFalse
+define zeroext i1 @retFalse() {
+entry:
+; CHECK: ret i1 false
+  ret i1 false
+}
+
+; CHECK-LABEL: retCvtFalse
+define zeroext i1 @retCvtFalse() {
+entry:
+; CHECK: ret i1 false
+  ret i1 trunc(i32 0 to i1)
+}
+
+; CHECK-LABEL: find_cont
+define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
+entry:
+  %cmp.4 = icmp eq i8** %begin, %end
+  br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i8** %incdec.ptr, %end
+  br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+  %0 = load i8*, i8** %curr.05, align 8
+  %call = tail call zeroext i1 %hasProp(i8* %0)
+  %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+  br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit:                                 ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  br label %cleanup
+
+cleanup:                                          ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+  %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: call void %cont(i1 [[REG]]
+  tail call void %cont(i1 %cleanup.dest.slot.0)
+  ret void
+}
+
+; CHECK-LABEL: find_cont_ret
+define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) {
+entry:
+  %cmp.4 = icmp eq i8** %begin, %end
+  br i1 %cmp.4, label %cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond:                                         ; preds = %for.body
+  %cmp = icmp eq i8** %incdec.ptr, %end
+  br i1 %cmp, label %cleanup.loopexit, label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.cond
+  %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ]
+  %0 = load i8*, i8** %curr.05, align 8
+  %call = tail call zeroext i1 %hasProp(i8* %0)
+  %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1
+  br i1 %call, label %cleanup.loopexit, label %for.cond
+
+cleanup.loopexit:                                 ; preds = %for.body, %for.cond
+; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+  %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  br label %cleanup
+
+cleanup:                                          ; preds = %cleanup.loopexit, %entry
+; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+  %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: call void %cont(i1 [[REG]]
+  tail call void %cont(i1 %cleanup.dest.slot.0)
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+  ret i1 %cleanup.dest.slot.0
+}
+
+; CHECK-LABEL: arg_operand
+define zeroext i1 @arg_operand(i1 %operand) {
+entry:
+  br i1 %operand, label %foo, label %cleanup
+
+foo:
+  br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+  %result = phi i1 [ false, %foo ], [ %operand, %entry ]
+  ret i1 %result
+}
+
+; CHECK-LABEL: bad_use
+define zeroext i1 @bad_use(i1 %operand) {
+entry:
+  br i1 %operand, label %foo, label %cleanup
+
+foo:
+  br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = phi i1
+; CHECK: ret i1 [[REG]]
+  %result = phi i1 [ false, %foo], [ true, %entry ]
+  %0 = icmp eq i1 %result, %operand
+  ret i1 %result
+}
+
+; CHECK-LABEL: bad_use_closure
+define zeroext i1 @bad_use_closure(i1 %operand) {
+entry:
+  br i1 %operand, label %foo, label %cleanup
+
+foo:
+  %bar = phi i1 [ false, %entry ]
+  %0 = icmp eq i1 %bar, %operand
+  br label %cleanup
+
+cleanup:
+; CHECK: [[REG:%.+]] = phi i1 [ true
+; CHECK: ret i1 [[REG]]
+  %result = phi i1 [ true, %entry ], [ %bar, %foo]
+  ret i1 %result
+}
+
+; CHECK-LABEL: arg_test
+define zeroext i1 @arg_test(i1 %operand) {
+entry:
+  br i1 %operand, label %foo, label %cleanup
+
+foo:
+  %bar = phi i1 [ false, %entry ]
+  br label %cleanup
+
+; CHECK-LABEL: cleanup
+cleanup:
+; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: ret i1 [[REG]]
+  %result = phi i1 [ %bar, %foo], [ %operand, %entry ]
+  ret i1 %result
+}
+
+declare zeroext i1 @return_i1()
+
+; CHECK-LABEL: call_test
+define zeroext i1 @call_test() {
+; CHECK: [[REG:%.+]] = call i1
+  %result = call i1 @return_i1()
+; CHECK: ret i1 [[REG]]
+  ret i1 %result
+}
\ No newline at end of file
diff --git a/test/CodeGen/PowerPC/BreakableToken-reduced.ll b/test/CodeGen/PowerPC/BreakableToken-reduced.ll
new file mode 100644
index 000000000000..2077dbb820f7
--- /dev/null
+++ b/test/CodeGen/PowerPC/BreakableToken-reduced.ll
@@ -0,0 +1,335 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-shrink-wrap=true %s -o - | FileCheck %s --check-prefix=CHECK
+;
+; Test the use of a non-R0 register to save/restore the LR in function 
+; prologue/epilogue.
+; This problem can occur as a result of shrink wrapping, where the function
+; prologue and epilogue are moved from the beginning/ending of the function. If
+; register R0 is used before the prologue/epilogue blocks, then it cannot be
+; used to save/restore the LR.
+;
+; TODO: Convert this to an MIR test once the infrastructure can support it.
+;       To convert this to an MIR pass, generate MIR after register allocation
+;       but before shrink wrapping and verify that has been used in the body of
+;       the function. This can be done with something like: 
+;         llc -stop-after stack-slot-coloring BreakableToken-reduced.ll > BreakableToken-reduced.mir
+;
+;       The resulting MIR file can then be used as input to llc, and only run
+;       shrink wrapping and Prologue/Epilogue insertion on it. For example:
+;         llc -start-after stack-slot-coloring -stop-after prologepilog BreakableToken-reduced.mir
+;       
+;       Verify in the resulting code that R0 is not used in the prologue/epilogue.
+;
+;       This currently cannot be done because the PrologEpilogInserter pass has
+;       a dependency on the TargetPassConfig and StackProtector classes, which
+;       are currently not serialized when generating the MIR.
+;
+
+; ModuleID = 'BreakableToken.cpp'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%"class.clang::format::BreakableStringLiteral" = type { %"class.clang::format::BreakableSingleLineToken" }
+%"class.clang::format::BreakableSingleLineToken" = type { %"class.clang::format::BreakableToken", i32, %"class.llvm::StringRef", %"class.llvm::StringRef", %"class.llvm::StringRef" }
+%"class.clang::format::BreakableToken" = type { i32 (...)**, %"struct.clang::format::FormatToken"*, i32, i8, i32, %"struct.clang::format::FormatStyle"* }
+%"class.llvm::StringRef" = type { i8*, i64 }
+%"struct.clang::format::FormatToken" = type <{ %"class.clang::Token", i32, i8, [3 x i8], %"class.clang::SourceRange", i32, i32, i32, i8, i8, i8, i8, %"class.llvm::StringRef", i8, [3 x i8], i32, i32, i32, i8, i8, [2 x i8], i32, i32, i16, [2 x i8], %"class.std::unique_ptr", i32, i32, i32, i32, i32, i32, i32, i32, %"class.llvm::SmallVector", i32, i8, i8, [2 x i8], i32, i8, i8, [2 x i8], %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"*, %"class.llvm::SmallVector.6", i32, i8, [3 x i8] }>
+%"class.clang::Token" = type <{ i32, i32, i8*, i16, i16, [4 x i8] }>
+%"class.clang::SourceRange" = type { %"class.clang::SourceLocation", %"class.clang::SourceLocation" }
+%"class.clang::SourceLocation" = type { i32 }
+%"class.std::unique_ptr" = type { %"class.std::tuple" }
+%"class.std::tuple" = type { %"struct.std::_Tuple_impl" }
+%"struct.std::_Tuple_impl" = type { %"struct.std::_Head_base.2" }
+%"struct.std::_Head_base.2" = type { %"class.clang::format::TokenRole"* }
+%"class.clang::format::TokenRole" = type { i32 (...)**, %"struct.clang::format::FormatStyle"* }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" }
+%"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" }
+%"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }>
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [4 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { [3 x %"struct.llvm::AlignedCharArrayUnion"] }
+%"class.llvm::SmallVector.6" = type <{ %"class.llvm::SmallVectorImpl.7", %"struct.llvm::SmallVectorStorage.12", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.7" = type { %"class.llvm::SmallVectorTemplateBase.8" }
+%"class.llvm::SmallVectorTemplateBase.8" = type { %"class.llvm::SmallVectorTemplateCommon.9" }
+%"class.llvm::SmallVectorTemplateCommon.9" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.10" }
+%"struct.llvm::AlignedCharArrayUnion.10" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::AlignedCharArray.11" = type { [8 x i8] }
+%"struct.llvm::SmallVectorStorage.12" = type { i8 }
+%"struct.clang::format::FormatStyle" = type { i32, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8, i8, i32, i8, i8, i8, i8, i32, i32, i8, i8, i32, %"class.std::basic_string", i8, i32, i32, i8, i8, i8, i8, %"class.std::vector", i8, i32, i8, i8, i32, %"class.std::basic_string", %"class.std::basic_string", i32, i32, i32, i8, i8, i32, i32, i32, i32, i32, i32, i32, i8, i8, i32, i8, i32, i8, i8, i8, i8, i8, i32, i32, i32 }
+%"class.std::vector" = type { %"struct.std::_Vector_base" }
+%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base<std::basic_string<char>, std::allocator<std::basic_string<char> > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char>, std::allocator<std::basic_string<char> > >::_Vector_impl" = type { %"class.std::basic_string"*, %"class.std::basic_string"*, %"class.std::basic_string"* }
+%"class.std::basic_string" = type { %"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" }
+%"struct.std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.llvm::AlignedCharArray.52" = type { [16 x i8] }
+%"class.clang::format::WhitespaceManager" = type <{ %"class.llvm::SmallVector.13", %"class.clang::SourceManager"*, %"class.std::set", %"struct.clang::format::FormatStyle"*, i8, [7 x i8] }>
+%"class.llvm::SmallVector.13" = type { %"class.llvm::SmallVectorImpl.14", %"struct.llvm::SmallVectorStorage.19" }
+%"class.llvm::SmallVectorImpl.14" = type { %"class.llvm::SmallVectorTemplateBase.15" }
+%"class.llvm::SmallVectorTemplateBase.15" = type { %"class.llvm::SmallVectorTemplateCommon.16" }
+%"class.llvm::SmallVectorTemplateCommon.16" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.17" }
+%"struct.llvm::AlignedCharArrayUnion.17" = type { %"struct.llvm::AlignedCharArray.18" }
+%"struct.llvm::AlignedCharArray.18" = type { [88 x i8] }
+%"struct.llvm::SmallVectorStorage.19" = type { [15 x %"struct.llvm::AlignedCharArrayUnion.17"] }
+%"class.clang::SourceManager" = type { %"class.llvm::RefCountedBase", %"class.clang::DiagnosticsEngine"*, %"class.clang::FileManager"*, %"class.llvm::BumpPtrAllocatorImpl", %"class.llvm::DenseMap.65", i8, i8, %"class.std::unique_ptr.78", %"class.std::vector.94", %"class.llvm::SmallVector.99", %"class.llvm::SmallVector.99", i32, i32, %"class.std::vector.107", %"class.clang::ExternalSLocEntrySource"*, %"class.clang::FileID", %"class.clang::LineTableInfo"*, %"class.clang::FileID", %"class.clang::SrcMgr::ContentCache"*, i32, i32, %"class.clang::FileID", %"class.clang::FileID", i32, i32, %"class.llvm::DenseMap.111", %"class.llvm::DenseMap.115", %"class.clang::InBeforeInTUCacheEntry", %"class.std::unique_ptr.119", %"class.std::unique_ptr.127", %"class.llvm::DenseMap.135", %"class.llvm::SmallVector.139" }
+%"class.llvm::RefCountedBase" = type { i32 }
+%"class.clang::DiagnosticsEngine" = type opaque
+%"class.clang::FileManager" = type { %"class.llvm::RefCountedBase.20", %"class.llvm::IntrusiveRefCntPtr", %"class.clang::FileSystemOptions", %"class.std::map", %"class.std::map.24", %"class.llvm::SmallVector.29", %"class.llvm::SmallVector.35", %"class.llvm::StringMap", %"class.llvm::StringMap.56", %"class.llvm::DenseMap", %"class.llvm::BumpPtrAllocatorImpl", i32, i32, i32, i32, i32, %"class.std::unique_ptr.57" }
+%"class.llvm::RefCountedBase.20" = type { i32 }
+%"class.llvm::IntrusiveRefCntPtr" = type { %"class.clang::vfs::FileSystem"* }
+%"class.clang::vfs::FileSystem" = type <{ i32 (...)**, %"class.llvm::ThreadSafeRefCountedBase", [4 x i8] }>
+%"class.llvm::ThreadSafeRefCountedBase" = type { %"struct.std::atomic" }
+%"struct.std::atomic" = type { %"struct.std::__atomic_base" }
+%"struct.std::__atomic_base" = type { i32 }
+%"class.clang::FileSystemOptions" = type { %"class.std::basic_string" }
+%"class.std::map" = type { %"class.std::_Rb_tree" }
+%"class.std::_Rb_tree" = type { %"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> > >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::DirectoryEntry> > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 }
+%"struct.std::less" = type { i8 }
+%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+%"class.std::map.24" = type { %"class.std::_Rb_tree.25" }
+%"class.std::_Rb_tree.25" = type { %"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> > >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<llvm::sys::fs::UniqueID, std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry>, std::_Select1st<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> >, std::less<llvm::sys::fs::UniqueID>, std::allocator<std::pair<const llvm::sys::fs::UniqueID, clang::FileEntry> > >::_Rb_tree_impl" = type { %"struct.std::less", %"struct.std::_Rb_tree_node_base", i64 }
+%"class.llvm::SmallVector.29" = type { %"class.llvm::SmallVectorImpl.30", %"struct.llvm::SmallVectorStorage.34" }
+%"class.llvm::SmallVectorImpl.30" = type { %"class.llvm::SmallVectorTemplateBase.31" }
+%"class.llvm::SmallVectorTemplateBase.31" = type { %"class.llvm::SmallVectorTemplateCommon.32" }
+%"class.llvm::SmallVectorTemplateCommon.32" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.33" }
+%"struct.llvm::AlignedCharArrayUnion.33" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.34" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.33"] }
+%"class.llvm::SmallVector.35" = type { %"class.llvm::SmallVectorImpl.36", %"struct.llvm::SmallVectorStorage.40" }
+%"class.llvm::SmallVectorImpl.36" = type { %"class.llvm::SmallVectorTemplateBase.37" }
+%"class.llvm::SmallVectorTemplateBase.37" = type { %"class.llvm::SmallVectorTemplateCommon.38" }
+%"class.llvm::SmallVectorTemplateCommon.38" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.39" }
+%"struct.llvm::AlignedCharArrayUnion.39" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.40" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.39"] }
+%"class.llvm::StringMap" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" }
+%"class.llvm::StringMapImpl" = type { %"class.llvm::StringMapEntryBase"**, i32, i32, i32, i32 }
+%"class.llvm::StringMapEntryBase" = type { i32 }
+%"class.llvm::StringMap.56" = type { %"class.llvm::StringMapImpl", %"class.llvm::BumpPtrAllocatorImpl" }
+%"class.llvm::DenseMap" = type <{ %"struct.llvm::detail::DenseMapPair"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair" = type opaque
+%"class.std::unique_ptr.57" = type { %"class.std::tuple.58" }
+%"class.std::tuple.58" = type { %"struct.std::_Tuple_impl.59" }
+%"struct.std::_Tuple_impl.59" = type { %"struct.std::_Head_base.64" }
+%"struct.std::_Head_base.64" = type { %"class.clang::FileSystemStatCache"* }
+%"class.clang::FileSystemStatCache" = type opaque
+%"class.llvm::BumpPtrAllocatorImpl" = type <{ i8*, i8*, %"class.llvm::SmallVector.41", %"class.llvm::SmallVector.47", i64, %"class.llvm::MallocAllocator", [7 x i8] }>
+%"class.llvm::SmallVector.41" = type { %"class.llvm::SmallVectorImpl.42", %"struct.llvm::SmallVectorStorage.46" }
+%"class.llvm::SmallVectorImpl.42" = type { %"class.llvm::SmallVectorTemplateBase.43" }
+%"class.llvm::SmallVectorTemplateBase.43" = type { %"class.llvm::SmallVectorTemplateCommon.44" }
+%"class.llvm::SmallVectorTemplateCommon.44" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.45" }
+%"struct.llvm::AlignedCharArrayUnion.45" = type { %"struct.llvm::AlignedCharArray.11" }
+%"struct.llvm::SmallVectorStorage.46" = type { [3 x %"struct.llvm::AlignedCharArrayUnion.45"] }
+%"class.llvm::SmallVector.47" = type <{ %"class.llvm::SmallVectorImpl.48", %"struct.llvm::SmallVectorStorage.53", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.48" = type { %"class.llvm::SmallVectorTemplateBase.49" }
+%"class.llvm::SmallVectorTemplateBase.49" = type { %"class.llvm::SmallVectorTemplateCommon.50" }
+%"class.llvm::SmallVectorTemplateCommon.50" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.51" }
+%"struct.llvm::AlignedCharArrayUnion.51" = type { %"struct.llvm::AlignedCharArray.52" }
+%"struct.llvm::SmallVectorStorage.53" = type { i8 }
+%"class.llvm::MallocAllocator" = type { i8 }
+%"class.llvm::DenseMap.65" = type <{ %"struct.llvm::detail::DenseMapPair.67"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.67" = type { %"struct.std::pair.68" }
+%"struct.std::pair.68" = type { %"class.clang::FileEntry"*, %"class.clang::SrcMgr::ContentCache"* }
+%"class.clang::FileEntry" = type { i8*, i64, i64, %"class.clang::DirectoryEntry"*, i32, %"class.llvm::sys::fs::UniqueID", i8, i8, i8, %"class.std::unique_ptr.69" }
+%"class.clang::DirectoryEntry" = type { i8* }
+%"class.llvm::sys::fs::UniqueID" = type { i64, i64 }
+%"class.std::unique_ptr.69" = type { %"class.std::tuple.70" }
+%"class.std::tuple.70" = type { %"struct.std::_Tuple_impl.71" }
+%"struct.std::_Tuple_impl.71" = type { %"struct.std::_Head_base.76" }
+%"struct.std::_Head_base.76" = type { %"class.clang::vfs::File"* }
+%"class.clang::vfs::File" = type { i32 (...)** }
+%"class.std::unique_ptr.78" = type { %"class.std::tuple.79" }
+%"class.std::tuple.79" = type { %"struct.std::_Tuple_impl.80" }
+%"struct.std::_Tuple_impl.80" = type { %"struct.std::_Head_base.85" }
+%"struct.std::_Head_base.85" = type { %"struct.clang::SourceManager::OverriddenFilesInfoTy"* }
+%"struct.clang::SourceManager::OverriddenFilesInfoTy" = type { %"class.llvm::DenseMap.86", %"class.llvm::DenseSet" }
+%"class.llvm::DenseMap.86" = type <{ %"struct.llvm::detail::DenseMapPair.88"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.88" = type { %"struct.std::pair.89" }
+%"struct.std::pair.89" = type { %"class.clang::FileEntry"*, %"class.clang::FileEntry"* }
+%"class.llvm::DenseSet" = type { %"class.llvm::DenseMap.91" }
+%"class.llvm::DenseMap.91" = type <{ %"class.llvm::detail::DenseSetPair"*, i32, i32, i32, [4 x i8] }>
+%"class.llvm::detail::DenseSetPair" = type { %"class.clang::FileEntry"* }
+%"class.std::vector.94" = type { %"struct.std::_Vector_base.95" }
+%"struct.std::_Vector_base.95" = type { %"struct.std::_Vector_base<clang::SrcMgr::ContentCache *, std::allocator<clang::SrcMgr::ContentCache *> >::_Vector_impl" }
+%"struct.std::_Vector_base<clang::SrcMgr::ContentCache *, std::allocator<clang::SrcMgr::ContentCache *> >::_Vector_impl" = type { %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"**, %"class.clang::SrcMgr::ContentCache"** }
+%"class.llvm::SmallVector.99" = type <{ %"class.llvm::SmallVectorImpl.100", %"struct.llvm::SmallVectorStorage.105", [7 x i8] }>
+%"class.llvm::SmallVectorImpl.100" = type { %"class.llvm::SmallVectorTemplateBase.101" }
+%"class.llvm::SmallVectorTemplateBase.101" = type { %"class.llvm::SmallVectorTemplateCommon.102" }
+%"class.llvm::SmallVectorTemplateCommon.102" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.103" }
+%"struct.llvm::AlignedCharArrayUnion.103" = type { %"struct.llvm::AlignedCharArray.104" }
+%"struct.llvm::AlignedCharArray.104" = type { [24 x i8] }
+%"struct.llvm::SmallVectorStorage.105" = type { i8 }
+%"class.std::vector.107" = type { %"struct.std::_Bvector_base" }
+%"struct.std::_Bvector_base" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_iterator", %"struct.std::_Bit_iterator", i64* }
+%"struct.std::_Bit_iterator" = type { %"struct.std::_Bit_iterator_base.base", [4 x i8] }
+%"struct.std::_Bit_iterator_base.base" = type <{ i64*, i32 }>
+%"class.clang::ExternalSLocEntrySource" = type { i32 (...)** }
+%"class.clang::LineTableInfo" = type opaque
+%"class.clang::SrcMgr::ContentCache" = type <{ %"class.llvm::PointerIntPair", %"class.clang::FileEntry"*, %"class.clang::FileEntry"*, i32*, [5 x i8], [3 x i8] }>
+%"class.llvm::PointerIntPair" = type { i64 }
+%"class.clang::FileID" = type { i32 }
+%"class.llvm::DenseMap.111" = type <{ %"struct.llvm::detail::DenseMapPair.113"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.113" = type opaque
+%"class.llvm::DenseMap.115" = type <{ %"struct.llvm::detail::DenseMapPair.117"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.117" = type opaque
+%"class.clang::InBeforeInTUCacheEntry" = type { %"class.clang::FileID", %"class.clang::FileID", i8, %"class.clang::FileID", i32, i32 }
+%"class.std::unique_ptr.119" = type { %"class.std::tuple.120" }
+%"class.std::tuple.120" = type { %"struct.std::_Tuple_impl.121" }
+%"struct.std::_Tuple_impl.121" = type { %"struct.std::_Head_base.126" }
+%"struct.std::_Head_base.126" = type { %"class.llvm::MemoryBuffer"* }
+%"class.llvm::MemoryBuffer" = type { i32 (...)**, i8*, i8* }
+%"class.std::unique_ptr.127" = type { %"class.std::tuple.128" }
+%"class.std::tuple.128" = type { %"struct.std::_Tuple_impl.129" }
+%"struct.std::_Tuple_impl.129" = type { %"struct.std::_Head_base.134" }
+%"struct.std::_Head_base.134" = type { %"class.clang::SrcMgr::ContentCache"* }
+%"class.llvm::DenseMap.135" = type <{ %"struct.llvm::detail::DenseMapPair.137"*, i32, i32, i32, [4 x i8] }>
+%"struct.llvm::detail::DenseMapPair.137" = type opaque
+%"class.llvm::SmallVector.139" = type { %"class.llvm::SmallVectorImpl.140", %"struct.llvm::SmallVectorStorage.144" }
+%"class.llvm::SmallVectorImpl.140" = type { %"class.llvm::SmallVectorTemplateBase.141" }
+%"class.llvm::SmallVectorTemplateBase.141" = type { %"class.llvm::SmallVectorTemplateCommon.142" }
+%"class.llvm::SmallVectorTemplateCommon.142" = type { %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion.143" }
+%"struct.llvm::AlignedCharArrayUnion.143" = type { %"struct.llvm::AlignedCharArray.104" }
+%"struct.llvm::SmallVectorStorage.144" = type { [1 x %"struct.llvm::AlignedCharArrayUnion.143"] }
+%"class.std::set" = type { %"class.std::_Rb_tree.145" }
+%"class.std::_Rb_tree.145" = type { %"struct.std::_Rb_tree<clang::tooling::Replacement, clang::tooling::Replacement, std::_Identity<clang::tooling::Replacement>, std::less<clang::tooling::Replacement>, std::allocator<clang::tooling::Replacement> >::_Rb_tree_impl" }
+%"struct.std::_Rb_tree<clang::tooling::Replacement, clang::tooling::Replacement, std::_Identity<clang::tooling::Replacement>, std::less<clang::tooling::Replacement>, std::allocator<clang::tooling::Replacement> >::_Rb_tree_impl" = type { %"struct.std::less.149", %"struct.std::_Rb_tree_node_base", i64 }
+%"struct.std::less.149" = type { i8 }
+
+
+; Function Attrs: nounwind
+; CHECK-LABEL: @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE
+
+; Load a value into R0 before saving the LR
+; CHECK: lwz 0, {{[0-9]+([0-9]+)}}
+
+; Ensure the LR is saved using a different register
+; CHECK: mflr {{[1-9]+}}
+
+; Ensure the LR is restored using a different register
+; CHECK: mtlr {{[0-9]+}}
+; CHECK: blr
+define void @_ZN5clang6format22BreakableStringLiteral11insertBreakEjjSt4pairImjERNS0_17WhitespaceManagerE(%"class.clang::format::BreakableStringLiteral"* nocapture readonly %this, i32 zeroext %LineIndex, i32 zeroext %TailOffset, [2 x i64] %Split.coerce, %"class.clang::format::WhitespaceManager"* dereferenceable(1504) %Whitespaces) unnamed_addr #1 align 2 {
+entry:
+  %Split.coerce.fca.0.extract = extractvalue [2 x i64] %Split.coerce, 0
+  %Split.coerce.fca.1.extract = extractvalue [2 x i64] %Split.coerce, 1
+  %StartColumn = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 1
+  %0 = load i32, i32* %StartColumn, align 8, !tbaa !2
+  %Prefix = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2
+  %Length.i.19 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 2, i32 1
+  %1 = load i64, i64* %Length.i.19, align 8, !tbaa !10
+  %cmp.i = icmp eq i64 %1, 0
+  br i1 %cmp.i, label %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, label %if.end.i.i
+
+entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge: ; preds = %entry
+  %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert = bitcast %"class.llvm::StringRef"* %Prefix to i64*
+  %agg.tmp7.sroa.0.0.copyload.pre = load i64, i64* %agg.tmp7.sroa.0.0..sroa_cast.phi.trans.insert, align 8
+  br label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+if.end.i.i:                                       ; preds = %entry
+  %Data.i.20 = getelementptr inbounds %"class.llvm::StringRef", %"class.llvm::StringRef"* %Prefix, i64 0, i32 0
+  %2 = load i8*, i8** %Data.i.20, align 8, !tbaa !12
+  %lhsc = load i8, i8* %2, align 1
+  %phitmp.i = icmp eq i8 %lhsc, 64
+  %3 = ptrtoint i8* %2 to i64
+  br label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit:         ; preds = %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge, %if.end.i.i
+  %agg.tmp7.sroa.0.0.copyload = phi i64 [ %agg.tmp7.sroa.0.0.copyload.pre, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %3, %if.end.i.i ]
+  %4 = phi i1 [ false, %entry._ZNK4llvm9StringRef10startswithES0_.exit_crit_edge ], [ %phitmp.i, %if.end.i.i ]
+  %dec = sext i1 %4 to i32
+  %dec. = add i32 %dec, %0
+  %Tok = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 1
+  %ref = load %"struct.clang::format::FormatToken"*, %"struct.clang::format::FormatToken"** %Tok, align 8, !tbaa !13
+  %conv = zext i32 %TailOffset to i64
+  %add = add i64 %Split.coerce.fca.0.extract, %conv
+  %add4 = add i64 %add, %1
+  %conv5 = trunc i64 %add4 to i32
+  %Split.sroa.2.8.extract.trunc = trunc i64 %Split.coerce.fca.1.extract to i32
+  %agg.tmp6.sroa.0.0..sroa_idx13 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3
+  %agg.tmp6.sroa.0.0..sroa_cast = bitcast %"class.llvm::StringRef"* %agg.tmp6.sroa.0.0..sroa_idx13 to i64*
+  %agg.tmp6.sroa.0.0.copyload = load i64, i64* %agg.tmp6.sroa.0.0..sroa_cast, align 8
+  %agg.tmp6.sroa.2.0..sroa_idx14 = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 3, i32 1
+  %agg.tmp6.sroa.2.0.copyload = load i64, i64* %agg.tmp6.sroa.2.0..sroa_idx14, align 8
+  %InPPDirective = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 3
+  %5 = load i8, i8* %InPPDirective, align 4, !tbaa !34, !range !39
+  %tobool = icmp ne i8 %5, 0
+  %IndentLevel = getelementptr inbounds %"class.clang::format::BreakableStringLiteral", %"class.clang::format::BreakableStringLiteral"* %this, i64 0, i32 0, i32 0, i32 2
+  %6 = load i32, i32* %IndentLevel, align 8, !tbaa !33
+  %.fca.0.insert11 = insertvalue [2 x i64] undef, i64 %agg.tmp6.sroa.0.0.copyload, 0
+  %.fca.1.insert12 = insertvalue [2 x i64] %.fca.0.insert11, i64 %agg.tmp6.sroa.2.0.copyload, 1
+  %.fca.0.insert = insertvalue [2 x i64] undef, i64 %agg.tmp7.sroa.0.0.copyload, 0
+  %.fca.1.insert = insertvalue [2 x i64] %.fca.0.insert, i64 %1, 1
+  tail call void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"* nonnull %Whitespaces, %"struct.clang::format::FormatToken"* dereferenceable(272) %ref, i32 zeroext %conv5, i32 zeroext %Split.sroa.2.8.extract.trunc, [2 x i64] %.fca.1.insert12, [2 x i64] %.fca.1.insert, i1 zeroext %tobool, i32 zeroext 1, i32 zeroext %6, i32 signext %dec.) #9
+  ret void
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #2
+
+declare void @_ZN5clang6format17WhitespaceManager24replaceWhitespaceInTokenERKNS0_11FormatTokenEjjN4llvm9StringRefES6_bjji(%"class.clang::format::WhitespaceManager"*, %"struct.clang::format::FormatToken"* dereferenceable(272), i32 zeroext, i32 zeroext, [2 x i64], [2 x i64], i1 zeroext, i32 zeroext, i32 zeroext, i32 signext) #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #2
+
+attributes #9 = { nounwind }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 (trunk 248714) (llvm/trunk 248719)"}
+!2 = !{!3, !4, i64 40}
+!3 = !{!"_ZTSN5clang6format24BreakableSingleLineTokenE", !4, i64 40, !7, i64 48, !7, i64 64, !7, i64 80}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"_ZTSN4llvm9StringRefE", !8, i64 0, !9, i64 8}
+!8 = !{!"any pointer", !5, i64 0}
+!9 = !{!"long", !5, i64 0}
+!10 = !{!7, !9, i64 8}
+!11 = !{!9, !9, i64 0}
+!12 = !{!7, !8, i64 0}
+!13 = !{!5, !5, i64 0}
+!14 = !{!15, !4, i64 200}
+!15 = !{!"_ZTSN5clang6format11FormatStyleE", !4, i64 0, !16, i64 4, !16, i64 5, !16, i64 6, !16, i64 7, !16, i64 8, !16, i64 9, !16, i64 10, !16, i64 11, !17, i64 12, !16, i64 16, !16, i64 17, !18, i64 20, !16, i64 24, !16, i64 25, !16, i64 26, !16, i64 27, !19, i64 28, !20, i64 32, !16, i64 36, !16, i64 37, !4, i64 40, !21, i64 48, !16, i64 56, !4, i64 60, !4, i64 64, !16, i64 68, !16, i64 69, !16, i64 70, !16, i64 71, !23, i64 72, !16, i64 96, !4, i64 100, !16, i64 104, !16, i64 105, !24, i64 108, !21, i64 112, !21, i64 120, !4, i64 128, !25, i64 132, !4, i64 136, !16, i64 140, !16, i64 141, !4, i64 144, !4, i64 148, !4, i64 152, !4, i64 156, !4, i64 160, !4, i64 164, !26, i64 168, !16, i64 172, !16, i64 173, !27, i64 176, !16, i64 180, !4, i64 184, !16, i64 188, !16, i64 189, !16, i64 190, !16, i64 191, !16, i64 192, !28, i64 196, !4, i64 200, !29, i64 204}
+!16 = !{!"bool", !5, i64 0}
+!17 = !{!"_ZTSN5clang6format11FormatStyle18ShortFunctionStyleE", !5, i64 0}
+!18 = !{!"_ZTSN5clang6format11FormatStyle33DefinitionReturnTypeBreakingStyleE", !5, i64 0}
+!19 = !{!"_ZTSN5clang6format11FormatStyle19BinaryOperatorStyleE", !5, i64 0}
+!20 = !{!"_ZTSN5clang6format11FormatStyle18BraceBreakingStyleE", !5, i64 0}
+!21 = !{!"_ZTSSs", !22, i64 0}
+!22 = !{!"_ZTSNSs12_Alloc_hiderE", !8, i64 0}
+!23 = !{!"_ZTSSt6vectorISsSaISsEE"}
+!24 = !{!"_ZTSN5clang6format11FormatStyle12LanguageKindE", !5, i64 0}
+!25 = !{!"_ZTSN5clang6format11FormatStyle24NamespaceIndentationKindE", !5, i64 0}
+!26 = !{!"_ZTSN5clang6format11FormatStyle21PointerAlignmentStyleE", !5, i64 0}
+!27 = !{!"_ZTSN5clang6format11FormatStyle24SpaceBeforeParensOptionsE", !5, i64 0}
+!28 = !{!"_ZTSN5clang6format11FormatStyle16LanguageStandardE", !5, i64 0}
+!29 = !{!"_ZTSN5clang6format11FormatStyle11UseTabStyleE", !5, i64 0}
+!30 = !{!31, !32, i64 24}
+!31 = !{!"_ZTSN5clang6format14BreakableTokenE", !5, i64 8, !4, i64 16, !16, i64 20, !32, i64 24, !5, i64 32}
+!32 = !{!"_ZTSN5clang6format8encoding8EncodingE", !5, i64 0}
+!33 = !{!31, !4, i64 16}
+!34 = !{!31, !16, i64 20}
+!35 = !{!36, !36, i64 0}
+!36 = !{!"vtable pointer", !6, i64 0}
+!37 = !{!38, !38, i64 0}
+!38 = !{!"short", !5, i64 0}
+!39 = !{i8 0, i8 2}
+!40 = !{i64 0, i64 8, !41, i64 8, i64 8, !11}
+!41 = !{!8, !8, i64 0}
+!42 = !{!43, !8, i64 16}
+!43 = !{!"_ZTSN4llvm15SmallVectorBaseE", !8, i64 0, !8, i64 8, !8, i64 16}
+!44 = !{!43, !8, i64 8}
+!45 = !{!43, !8, i64 0}
+!46 = !{!4, !4, i64 0}
+!47 = !{!48, !16, i64 500}
+!48 = !{!"_ZTSN5clang6format21BreakableBlockCommentE", !49, i64 40, !51, i64 320, !53, i64 408, !4, i64 496, !16, i64 500, !7, i64 504}
+!49 = !{!"_ZTSN4llvm11SmallVectorINS_9StringRefELj16EEE", !50, i64 40}
+!50 = !{!"_ZTSN4llvm18SmallVectorStorageINS_9StringRefELj16EEE", !5, i64 0}
+!51 = !{!"_ZTSN4llvm11SmallVectorIjLj16EEE", !52, i64 28}
+!52 = !{!"_ZTSN4llvm18SmallVectorStorageIjLj16EEE", !5, i64 0}
+!53 = !{!"_ZTSN4llvm11SmallVectorIiLj16EEE", !54, i64 28}
+!54 = !{!"_ZTSN4llvm18SmallVectorStorageIiLj16EEE", !5, i64 0}
+!55 = !{!48, !4, i64 496}
diff --git a/test/CodeGen/PowerPC/aantidep-def-ec.mir b/test/CodeGen/PowerPC/aantidep-def-ec.mir
new file mode 100644
index 000000000000..d1cb6782f038
--- /dev/null
+++ b/test/CodeGen/PowerPC/aantidep-def-ec.mir
@@ -0,0 +1,117 @@
+# RUN: llc -o - %s -start-after=if-converter | FileCheck %s
+
+--- |
+  target datalayout = "E-m:e-i64:64-n32:64"
+  target triple = "powerpc64-unknown-linux-gnu"
+  
+  %struct.rwlock_t.0.22.58.68.242.244 = type {}
+  
+  @tasklist_lock = external global %struct.rwlock_t.0.22.58.68.242.244, align 1
+  
+  ; Function Attrs: nounwind
+  define void @mm_update_next_owner(i8** %p1, i32* %p2) #0 {
+  entry:
+    %0 = load i8*, i8** %p1, align 8
+    br i1 undef, label %do.body.92, label %for.body.21
+  
+  for.body.21:                                      ; preds = %entry
+    unreachable
+  
+  do.body.92:                                       ; preds = %entry
+    %usage = getelementptr inbounds i8, i8* %0, i64 -48
+    %counter.i = bitcast i8* %usage to i32*
+    %call95 = tail call signext i32 bitcast (i32 (...)* @__raw_read_unlock to i32 (%struct.rwlock_t.0.22.58.68.242.244*)*)(%struct.rwlock_t.0.22.58.68.242.244* nonnull @tasklist_lock) #1
+    store volatile i32 0, i32* %p2, align 4
+    tail call void asm sideeffect "#compiler barrier", "~{memory}"() #1
+    %1 = tail call i32 asm sideeffect "\0Alwsync \0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0Async \0A", "=&r,r,~{cc},~{xer},~{memory}"(i32* %counter.i) #1
+    %cmp.i = icmp eq i32 %1, 0
+    br i1 %cmp.i, label %if.then.i, label %put_task_struct.exit
+  
+  if.then.i:                                        ; preds = %do.body.92
+    unreachable
+  
+  put_task_struct.exit:                             ; preds = %do.body.92
+    ret void
+  }
+  
+  declare signext i32 @__raw_read_unlock(...)
+  
+  attributes #0 = { nounwind "target-cpu"="pwr7" }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            mm_update_next_owner
+alignment:       4
+exposesReturnsTwice: false
+hasInlineAsm:    true
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:         
+  - { reg: '%x3' }
+  - { reg: '%x4' }
+calleeSavedRegisters: [ '%cr2', '%cr3', '%cr4', '%f14', '%f15', '%f16', 
+                        '%f17', '%f18', '%f19', '%f20', '%f21', '%f22', 
+                        '%f23', '%f24', '%f25', '%f26', '%f27', '%f28', 
+                        '%f29', '%f30', '%f31', '%r14', '%r15', '%r16', 
+                        '%r17', '%r18', '%r19', '%r20', '%r21', '%r22', 
+                        '%r23', '%r24', '%r25', '%r26', '%r27', '%r28', 
+                        '%r29', '%r30', '%r31', '%v20', '%v21', '%v22', 
+                        '%v23', '%v24', '%v25', '%v26', '%v27', '%v28', 
+                        '%v29', '%v30', '%v31', '%vf20', '%vf21', '%vf22', 
+                        '%vf23', '%vf24', '%vf25', '%vf26', '%vf27', '%vf28', 
+                        '%vf29', '%vf30', '%vf31', '%x14', '%x15', '%x16', 
+                        '%x17', '%x18', '%x19', '%x20', '%x21', '%x22', 
+                        '%x23', '%x24', '%x25', '%x26', '%x27', '%x28', 
+                        '%x29', '%x30', '%x31', '%cr2eq', '%cr3eq', '%cr4eq', 
+                        '%cr2gt', '%cr3gt', '%cr4gt', '%cr2lt', '%cr3lt', 
+                        '%cr4lt', '%cr2un', '%cr3un', '%cr4un' ]
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       144
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 112
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+fixedStack:      
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%x30' }
+  - { id: 1, type: spill-slot, offset: -24, size: 8, alignment: 8, callee-saved-register: '%x29' }
+body:             |
+  bb.0.entry:
+    liveins: %x3, %x4, %x29, %x30, %x29, %x30
+  
+    %x0 = MFLR8 implicit %lr8
+    STD %x0, 16, %x1
+    %x1 = STDU %x1, -144, %x1
+    STD killed %x29, 120, %x1 :: (store 8 into %fixed-stack.1)
+    STD killed %x30, 128, %x1 :: (store 8 into %fixed-stack.0, align 16)
+    %x30 = OR8 %x4, %x4
+    %x3 = LD 0, killed %x3 :: (load 8 from %ir.p1)
+    %x29 = ADDI8 killed %x3, -48
+    %x3 = ADDIStocHA %x2, @tasklist_lock
+    %x3 = LDtocL @tasklist_lock, killed %x3, implicit %x2 :: (load 8 from got)
+    BL8_NOP @__raw_read_unlock, csr_svr464_altivec, implicit-def %lr8, implicit %rm, implicit %x3, implicit %x2, implicit-def %r1, implicit-def dead %x3
+    %r3 = LI 0
+    STW killed %r3, 0, killed %x30 :: (volatile store 4 into %ir.p2)
+    INLINEASM $"#compiler barrier", 25
+    INLINEASM $"\0Alwsync \0A1:\09lwarx\09$0,0,$1\09\09# atomic_dec_return\0A\09addic\09$0,$0,-1\0A\09stwcx.\09$0,0,$1\0A\09bne-\091b\0Async \0A", 25, 131083, def early-clobber %r3, 851977, killed %x29, 12, implicit-def dead early-clobber %cr0
+    ; CHECK-LABEL: @mm_update_next_owner
+    ; CHECK-NOT: lwarx 29, 0, 29
+    ; CHECK-NOT: stwcx. 29, 0, 29
+    %cr0 = CMPLWI killed %r3, 0
+    %x30 = LD 128, %x1 :: (load 8 from %fixed-stack.0, align 16)
+    %x29 = LD 120, %x1 :: (load 8 from %fixed-stack.1)
+    %x1 = ADDI8 %x1, 144
+    %x0 = LD 16, %x1
+    MTLR8 %x0, implicit-def %lr8
+    BLR8 implicit %lr8, implicit %rm
+
+...
diff --git a/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll b/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
new file mode 100644
index 000000000000..f0c0deacf4dd
--- /dev/null
+++ b/test/CodeGen/PowerPC/aantidep-inline-asm-use.ll
@@ -0,0 +1,305 @@
+; RUN: llc -O2 < %s | FileCheck %s
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-grtev4-linux-gnu"
+
+; Function Attrs: nounwind
+define void @_ZN10SubProcess19ScrubbedForkAndExecEiPiS0_PNS_7ResultsE() #0 align 2 {
+; CHECK: lis 3, 1234
+; CHECK-NOT: li 3
+; CHECK-NOT: ori 3
+; CHECK-NOT: addi 3
+; CHECK-NOT: addis 3
+; CHECK-NOT: lis 3
+; CHECK: sc
+  br i1 undef, label %1, label %2
+
+; <label>:1                                       ; preds = %0
+  br label %60
+
+; <label>:2                                       ; preds = %0
+  br i1 undef, label %3, label %4
+
+; <label>:3                                       ; preds = %2
+  unreachable
+
+; <label>:4                                       ; preds = %2
+  br i1 undef, label %.lr.ph111, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+
+.lr.ph111:                                        ; preds = %4
+  br label %5
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit: ; preds = %12, %4
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader, label %13
+
+; <label>:5                                       ; preds = %12, %.lr.ph111
+  br i1 undef, label %6, label %9
+
+; <label>:6                                       ; preds = %5
+  br i1 undef, label %7, label %8
+
+; <label>:7                                       ; preds = %6
+  unreachable
+
+; <label>:8                                       ; preds = %6
+  br label %12
+
+; <label>:9                                       ; preds = %5
+  br i1 undef, label %10, label %11
+
+; <label>:10                                      ; preds = %9
+  br label %12
+
+; <label>:11                                      ; preds = %9
+  br label %12
+
+; <label>:12                                      ; preds = %11, %10, %8
+  br i1 undef, label %5, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+
+; <label>:13                                      ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader, label %14
+
+; <label>:14                                      ; preds = %13
+  br label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader: ; preds = %14, %13, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader: ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+  br label %_ZN10SubProcess12SafeSyscalls5closeEi.exit
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge: ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19, %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19.preheader
+  br i1 undef, label %15, label %19
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit:       ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit, %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19
+
+_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19: ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit.preheader
+
+; <label>:15                                      ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+  br label %16
+
+; <label>:16                                      ; preds = %17, %15
+  br i1 undef, label %17, label %.critedge.preheader
+
+; <label>:17                                      ; preds = %16
+  br i1 undef, label %16, label %.critedge.preheader
+
+.critedge.preheader:                              ; preds = %17, %16
+  br label %.critedge
+
+.critedge:                                        ; preds = %18, %.critedge.preheader
+  br i1 undef, label %18, label %.critedge8
+
+; <label>:18                                      ; preds = %.critedge
+  br i1 undef, label %.critedge, label %.critedge8
+
+.critedge8:                                       ; preds = %18, %.critedge
+  br label %59
+
+; <label>:19                                      ; preds = %_ZN10SubProcess12SafeSyscalls11sigprocmaskEiPKNS0_15kernel_sigset_tEPS1_.exit19._crit_edge
+  br label %_ZN10SubProcess12SafeSyscalls5closeEi.exit22
+
+_ZN10SubProcess12SafeSyscalls5closeEi.exit22:     ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit22, %19
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls5closeEi.exit22, label %20
+
+; <label>:20                                      ; preds = %_ZN10SubProcess12SafeSyscalls5closeEi.exit22
+  %21 = alloca i8, i64 undef, align 1
+  br label %.thread.outer
+
+.thread.outer:                                    ; preds = %._crit_edge, %20
+  br label %.thread
+
+.thread:                                          ; preds = %45, %.thread.outer
+  call void @llvm.memset.p0i8.i64(i8* undef, i8 0, i64 56, i32 8, i1 false)
+  store i8* %21, i8** undef, align 8
+  store i32 1073741824, i32* undef, align 8
+  %22 = call { i64, i64, i64, i64, i64, i64, i64 } asm sideeffect "sc\0A\09mfcr $0", "=&{r0},=&{r3},=&{r4},=&{r5},=&{r6},=&{r7},=&{r8},{r0},{r3},{r4},{r5},~{cr0},~{ctr},~{memory},~{r11},~{r12}"(i64 342, i64 80871424, i64 undef, i64 0) #2, !srcloc !1
+  br i1 undef, label %.lr.ph, label %.critedge15.preheader
+
+.critedge15.preheader:                            ; preds = %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge, %.thread
+  br i1 undef, label %.lr.ph93.preheader, label %.critedge15._crit_edge
+
+.lr.ph93.preheader:                               ; preds = %.critedge15.preheader
+  br label %.lr.ph93
+
+.lr.ph:                                           ; preds = %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge, %.thread
+  switch i32 undef, label %.critedge9 [
+    i32 11, label %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge
+    i32 4, label %_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge
+  ]
+
+_ZN10SubProcess12SafeSyscalls7recvmsgEiPNS0_13kernel_msghdrEi.exit.backedge: ; preds = %.lr.ph, %.lr.ph
+  br i1 undef, label %.lr.ph, label %.critedge15.preheader
+
+.critedge9:                                       ; preds = %.lr.ph
+  unreachable
+
+.critedge15._crit_edge:                           ; preds = %.critedge15, %.critedge15.preheader
+  br i1 undef, label %35, label %34
+
+.lr.ph93:                                         ; preds = %.critedge15, %.lr.ph93.preheader
+  switch i32 undef, label %33 [
+    i32 0, label %23
+    i32 1, label %23
+    i32 2, label %23
+    i32 3, label %23
+    i32 4, label %23
+    i32 5, label %23
+    i32 6, label %23
+    i32 7, label %23
+    i32 8, label %27
+    i32 9, label %30
+  ]
+
+; <label>:23                                      ; preds = %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93, %.lr.ph93
+  br i1 undef, label %24, label %.critedge15
+
+; <label>:24                                      ; preds = %23
+  br i1 undef, label %.critedge15, label %25
+
+; <label>:25                                      ; preds = %24
+  br i1 undef, label %.critedge15, label %26
+
+; <label>:26                                      ; preds = %25
+  unreachable
+
+; <label>:27                                      ; preds = %.lr.ph93
+  br i1 undef, label %.critedge15, label %28
+
+; <label>:28                                      ; preds = %27
+  br i1 undef, label %29, label %.critedge15
+
+; <label>:29                                      ; preds = %28
+  br label %.critedge15
+
+; <label>:30                                      ; preds = %.lr.ph93
+  br i1 undef, label %.critedge15, label %31
+
+; <label>:31                                      ; preds = %30
+  br i1 undef, label %32, label %.critedge15
+
+; <label>:32                                      ; preds = %31
+  br label %.critedge15
+
+; <label>:33                                      ; preds = %.lr.ph93
+  unreachable
+
+.critedge15:                                      ; preds = %32, %31, %30, %29, %28, %27, %25, %24, %23
+  br i1 undef, label %.lr.ph93, label %.critedge15._crit_edge
+
+; <label>:34                                      ; preds = %.critedge15._crit_edge
+  unreachable
+
+; <label>:35                                      ; preds = %.critedge15._crit_edge
+  br i1 undef, label %45, label %36
+
+; <label>:36                                      ; preds = %35
+  br i1 undef, label %37, label %38
+
+; <label>:37                                      ; preds = %36
+  br i1 undef, label %.preheader, label %38
+
+.preheader:                                       ; preds = %37
+  br i1 undef, label %.lr.ph101, label %._crit_edge
+
+.lr.ph101:                                        ; preds = %.preheader
+  br label %39
+
+; <label>:38                                      ; preds = %37, %36
+  unreachable
+
+; <label>:39                                      ; preds = %43, %.lr.ph101
+  br i1 undef, label %40, label %43
+
+; <label>:40                                      ; preds = %39
+  br i1 undef, label %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17, label %41
+
+; <label>:41                                      ; preds = %40
+  unreachable
+
+_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17:   ; preds = %40
+  br i1 undef, label %42, label %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit
+
+; <label>:42                                      ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17
+  unreachable
+
+_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit:     ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit17
+  br i1 undef, label %.thread27, label %43
+
+; <label>:43                                      ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit, %39
+  br i1 undef, label %39, label %._crit_edge
+
+.thread27:                                        ; preds = %_ZN10SubProcess12SafeSyscalls5fcntlEiil.exit
+  br label %58
+
+._crit_edge:                                      ; preds = %43, %.preheader
+  br i1 undef, label %.thread.outer, label %44
+
+; <label>:44                                      ; preds = %._crit_edge
+  unreachable
+
+; <label>:45                                      ; preds = %35
+  br i1 undef, label %46, label %.thread
+
+; <label>:46                                      ; preds = %45
+  br i1 undef, label %48, label %47
+
+; <label>:47                                      ; preds = %46
+  unreachable
+
+; <label>:48                                      ; preds = %46
+  br i1 undef, label %55, label %49
+
+; <label>:49                                      ; preds = %48
+  br i1 undef, label %50, label %51
+
+; <label>:50                                      ; preds = %49
+  br label %52
+
+; <label>:51                                      ; preds = %49
+  br label %52
+
+; <label>:52                                      ; preds = %51, %50
+  br label %53
+
+; <label>:53                                      ; preds = %54, %52
+  br i1 undef, label %54, label %.critedge13
+
+; <label>:54                                      ; preds = %53
+  br i1 undef, label %53, label %.critedge13
+
+.critedge13:                                      ; preds = %54, %53
+  br label %58
+
+; <label>:55                                      ; preds = %48
+  br label %56
+
+; <label>:56                                      ; preds = %57, %55
+  br i1 undef, label %57, label %.critedge14
+
+; <label>:57                                      ; preds = %56
+  br i1 undef, label %56, label %.critedge14
+
+.critedge14:                                      ; preds = %57, %56
+  br label %58
+
+; <label>:58                                      ; preds = %.critedge14, %.critedge13, %.thread27
+  br label %59
+
+; <label>:59                                      ; preds = %58, %.critedge8
+  br label %60
+
+; <label>:60                                      ; preds = %59, %1
+  ret void
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pwr8" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind }
+
+!1 = !{i32 -2140527538, i32 -2140527533}
diff --git a/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir b/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir
new file mode 100644
index 000000000000..e4aaaf30f90f
--- /dev/null
+++ b/test/CodeGen/PowerPC/addisdtprelha-nonr3.mir
@@ -0,0 +1,80 @@
+# RUN: llc -relocation-model=pic -start-after=block-placement -o - %s | FileCheck %s
+
+--- |
+  target datalayout = "E-m:e-i64:64-n32:64"
+  target triple = "powerpc64-unknown-linux-gnu"
+  
+  @x = internal thread_local unnamed_addr global i1 false
+  @y = external thread_local global i32, align 4
+  
+  ; Function Attrs: nounwind
+  define void @test1() #0 {
+  entry:
+    store i1 true, i1* @x, align 1
+    store i32 20, i32* @y, align 4
+    ret void
+  }
+  
+  attributes #0 = { nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "target-cpu"="pwr7" }
+  
+  !llvm.module.flags = !{!0}
+  
+  !0 = !{i32 1, !"PIC Level", i32 2}
+
+...
+---
+name:            test1
+alignment:       4
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       64
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 48
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+fixedStack:      
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%x30' }
+  - { id: 1, offset: -8, size: 8, alignment: 8, isImmutable: true, isAliased: false }
+body:             |
+  bb.0.entry:
+    liveins: %x30, %x30
+  
+    %x0 = MFLR8 implicit %lr8
+    STD %x31, -8, %x1
+    STD killed %x0, 16, %x1
+    %x1 = STDU %x1, -64, %x1
+    %x3 = ADDIStlsldHA %x2, @x
+    %x31 = OR8 %x1, %x1
+    %x3 = ADDItlsldL killed %x3, @x
+    STD killed %x30, 48, %x31 :: (store 8 into %fixed-stack.0, align 16)
+    %x3 = GETtlsldADDR killed %x3, @x, implicit-def dead %x0, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def %lr8, implicit-def %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+    %x12 = ADDIStlsgdHA %x2, @y
+    %x30 = OR8 killed %x3, %x3
+    %x3 = ADDItlsgdL killed %x12, @y
+    %x3 = GETtlsADDR killed %x3, @y, implicit-def dead %x0, implicit-def dead %x4, implicit-def dead %x5, implicit-def dead %x6, implicit-def dead %x7, implicit-def dead %x8, implicit-def dead %x9, implicit-def dead %x10, implicit-def dead %x11, implicit-def dead %x12, implicit-def %lr8, implicit-def %ctr8, implicit-def dead %cr0, implicit-def dead %cr1, implicit-def dead %cr5, implicit-def dead %cr6, implicit-def dead %cr7
+    %x4 = ADDISdtprelHA killed %x30, @x
+    ; CHECK: addis 4, 30, x@dtprel@ha
+    %x5 = LI8 1
+    %r6 = LI 20
+    %x30 = LD 48, %x31 :: (load 8 from %fixed-stack.0, align 16)
+    STB8 killed %x5, target-flags(ppc-dtprel-lo) @x, killed %x4 :: (store 1 into @x)
+    STW killed %r6, 0, killed %x3 :: (store 4 into @y)
+    %x1 = ADDI8 %x1, 64
+    %x0 = LD 16, %x1
+    %x31 = LD -8, %x1
+    MTLR8 killed %x0, implicit-def %lr8
+    BLR8 implicit %lr8, implicit %rm
+
+...
diff --git a/test/CodeGen/PowerPC/alias.ll b/test/CodeGen/PowerPC/alias.ll
index 524abd5da3ef..3650cd9d8a06 100644
--- a/test/CodeGen/PowerPC/alias.ll
+++ b/test/CodeGen/PowerPC/alias.ll
@@ -2,10 +2,10 @@
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -code-model=large | FileCheck --check-prefix=CHECK --check-prefix=LARGE %s
 
 @foo = global i32 42
-@fooa = alias i32* @foo
+@fooa = alias i32, i32* @foo
 
 @foo2 = global i64 42
-@foo2a = alias i64* @foo2
+@foo2a = alias i64, i64* @foo2
 
 ; CHECK-LABEL: bar:
 define i32 @bar() {
diff --git a/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
new file mode 100644
index 000000000000..756f57917548
--- /dev/null
+++ b/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -0,0 +1,83 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
+; RUN:  --check-prefix=CHECK-P7
+
+define signext i32 @f32toi32(float %a) {
+entry:
+  %0 = bitcast float %a to i32
+  ret i32 %0
+; CHECK-P7: stfs 1,
+; CHECK-P7: lwa 3,
+; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
+; CHECK: mfvsrwz 3, [[SHIFTREG]]
+}
+
+define i64 @f64toi64(double %a) {
+entry:
+  %0 = bitcast double %a to i64
+  ret i64 %0
+; CHECK-P7: stxsdx 1,
+; CHECK-P7: ld 3,
+; CHECK: mfvsrd 3, 1
+}
+
+define float @i32tof32(i32 signext %a) {
+entry:
+  %0 = bitcast i32 %a to float
+  ret float %0
+; CHECK-P7: stw 3,
+; CHECK-P7: lfs 1,
+; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
+; CHECK: xscvspdpn 1, [[SHIFTREG]]
+}
+
+define double @i64tof64(i64 %a) {
+entry:
+  %0 = bitcast i64 %a to double
+  ret double %0
+; CHECK-P7: std 3,
+; CHECK-P7: lxsdx 1,
+; CHECK: mtvsrd 1, 3
+}
+
+define zeroext i32 @f32toi32u(float %a) {
+entry:
+  %0 = bitcast float %a to i32
+  ret i32 %0
+; CHECK-P7: stfs 1,
+; CHECK-P7: lwz 3,
+; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
+; CHECK: mfvsrwz 3, [[SHIFTREG]]
+}
+
+define i64 @f64toi64u(double %a) {
+entry:
+  %0 = bitcast double %a to i64
+  ret i64 %0
+; CHECK-P7: stxsdx 1,
+; CHECK-P7: ld 3,
+; CHECK: mfvsrd 3, 1
+}
+
+define float @i32utof32(i32 zeroext %a) {
+entry:
+  %0 = bitcast i32 %a to float
+  ret float %0
+; CHECK-P7: stw 3,
+; CHECK-P7: lfs 1,
+; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
+; CHECK: xscvspdpn 1, [[SHIFTREG]]
+}
+
+define double @i64utof64(i64 %a) {
+entry:
+  %0 = bitcast i64 %a to double
+  ret double %0
+; CHECK-P7: std 3,
+; CHECK-P7: lxsdx 1,
+; CHECK: mtvsrd 1, 3
+}
diff --git a/test/CodeGen/PowerPC/bitreverse.ll b/test/CodeGen/PowerPC/bitreverse.ll
new file mode 100644
index 000000000000..1c3741a9a696
--- /dev/null
+++ b/test/CodeGen/PowerPC/bitreverse.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=ppc64 %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: rlwinm
+  %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+  ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK: rlwinm
+; CHECK: rlwimi
+  %b = call i8 @llvm.bitreverse.i8(i8 %a)
+  ret i8 %b
+}
diff --git a/test/CodeGen/PowerPC/branch-hint.ll b/test/CodeGen/PowerPC/branch-hint.ll
new file mode 100644
index 000000000000..46160507105f
--- /dev/null
+++ b/test/CodeGen/PowerPC/branch-hint.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=false | FileCheck %s
+; RUN: llc < %s -O1 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -ppc-use-branch-hint=true | FileCheck %s -check-prefix=CHECK-HINT
+define void @branch_hint_1(i32 %src) {
+entry:
+  %cmp = icmp eq i32 %src, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @foo() #0
+  unreachable
+
+if.end:
+  call void @goo()
+  ret void
+
+; CHECK-LABEL: branch_hint_1:
+; CHECK: beq
+
+; CHECK-HINT-LABEL: branch_hint_1:
+; CHECK-HINT: beq-
+}
+
+define void @branch_hint_2(i32 %src) {
+entry:
+  %cmp = icmp eq i32 %src, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  call void @goo()
+  ret void
+
+if.end:
+  tail call void @foo() #0
+  unreachable
+
+; CHECK-LABEL: @branch_hint_2
+; CHECK: bne
+
+; CHECK-HINT-LABEL: @branch_hint_2
+; CHECK-HINT: bne-
+}
+
+declare void @foo()
+attributes #0 = { noreturn }
+
+define void @branch_hint_3(i32 %src) {
+entry:
+  %cmp = icmp eq i32 %src, 0
+  br i1 %cmp, label %if.then, label %if.end, !prof !0
+
+if.then:
+  call void @foo()
+  ret void
+
+if.end:
+  call void @goo()
+  ret void
+
+; CHECK-LABEL: @branch_hint_3
+; CHECK: bne
+
+; CHECK-HINT-LABEL: @branch_hint_3
+; CHECK-HINT: bne
+}
+
+!0 = !{!"branch_weights", i32 64, i32 4}
+
+define void @branch_hint_4(i32 %src) {
+entry:
+  %cmp = icmp eq i32 %src, 0
+  br i1 %cmp, label %if.then, label %if.end, !prof !1
+
+if.then:
+  call void @foo()
+  ret void
+
+if.end:
+  call void @goo()
+  ret void
+
+; CHECK-HINT-LABEL: branch_hint_4
+; CHECK-HINT: bne
+}
+
+!1 = !{!"branch_weights", i32 64, i32 8}
+
+define void @branch_hint_5(i32 %src) {
+entry:
+  %cmp = icmp eq i32 %src, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ret void
+
+if.end:
+  call void @goo()
+  ret void
+
+; CHECK-HINT-LABEL: branch_hint_5:
+; CHECK-HINT: beq
+}
+
+declare void @goo()
+
+define void @branch_hint_6(i32 %src1, i32 %src2, i32 %src3) {
+entry:
+  %cmp = icmp eq i32 %src1, 0
+  br i1 %cmp, label %if.end.6, label %if.end, !prof !3
+
+if.end:
+  %cmp1 = icmp eq i32 %src2, 0
+  br i1 %cmp1, label %if.end.3, label %if.then.2
+
+if.then.2:
+  tail call void @foo() #0
+  unreachable
+
+if.end.3:
+  %cmp4 = icmp eq i32 %src3, 1
+  br i1 %cmp4, label %if.then.5, label %if.end.6
+
+if.then.5:
+  tail call void @foo() #0
+  unreachable
+
+if.end.6:
+  ret void
+
+; CHECK-HINT-LABEL: branch_hint_6:
+; CHECK-HINT: bne
+; CHECK-HINT: bne-
+; CHECK-HINT: bne+
+}
+
+!3 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/CodeGen/PowerPC/coal-sections.ll b/test/CodeGen/PowerPC/coal-sections.ll
new file mode 100644
index 000000000000..377891c47143
--- /dev/null
+++ b/test/CodeGen/PowerPC/coal-sections.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple powerpc-apple-darwin8 -march=ppc32 | FileCheck %s
+
+; Check that *coal* sections are emitted.
+
+; CHECK: .section  __TEXT,__textcoal_nt,coalesced,pure_instructions
+; CHECK: .section  __TEXT,__textcoal_nt,coalesced,pure_instructions
+; CHECK-NEXT: .globl  _foo
+
+; CHECK: .section  __TEXT,__const_coal,coalesced
+; CHECK-NEXT: .globl  _a
+
+; CHECK: .section  __DATA,__datacoal_nt,coalesced
+; CHECK-NEXT: .globl  _b
+
+@a = weak_odr constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 16
+@b = weak global i32 5, align 4
+@g = common global i32* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32* @foo() {
+entry:
+  store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), i32** @g, align 8
+  ret i32* @b
+}
diff --git a/test/CodeGen/PowerPC/crbit-asm-disabled.ll b/test/CodeGen/PowerPC/crbit-asm-disabled.ll
new file mode 100644
index 000000000000..56ec8ecb85d7
--- /dev/null
+++ b/test/CodeGen/PowerPC/crbit-asm-disabled.ll
@@ -0,0 +1,16 @@
+; RUN: not llc -mcpu=pwr7 -o /dev/null %s 2>&1 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @testi1(i1 zeroext %b1, i1 zeroext %b2) #0 {
+entry:
+  %0 = tail call i8 asm "crand $0, $1, $2", "=^wc,^wc,^wc"(i1 %b1, i1 %b2) #0
+  %1 = and i8 %0, 1
+  %tobool3 = icmp ne i8 %1, 0
+  ret i1 %tobool3
+
+; CHECK: error: couldn't allocate output register for constraint 'wc'
+}
+
+attributes #0 = { nounwind "target-features"="-crbits" }
+
diff --git a/test/CodeGen/PowerPC/crbit-asm.ll b/test/CodeGen/PowerPC/crbit-asm.ll
index 36de3435a081..41e65af29a8a 100644
--- a/test/CodeGen/PowerPC/crbit-asm.ll
+++ b/test/CodeGen/PowerPC/crbit-asm.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
+; RUN: llc -O1 -mcpu=pwr7 < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -55,5 +56,5 @@ entry:
 ; CHECK: blr
 }
 
-attributes #0 = { nounwind }
+attributes #0 = { nounwind "target-features"="+crbits" }
 
diff --git a/test/CodeGen/PowerPC/cttz.ll b/test/CodeGen/PowerPC/cttz.ll
index 60de982d91a1..3757fa3e2f29 100644
--- a/test/CodeGen/PowerPC/cttz.ll
+++ b/test/CodeGen/PowerPC/cttz.ll
@@ -6,7 +6,7 @@ declare i32 @llvm.cttz.i32(i32, i1)
 define i32 @bar(i32 %x) {
 entry:
 ; CHECK: @bar
-; CHECK: cntlz
+; CHECK: cntlzw
         %tmp.1 = call i32 @llvm.cttz.i32( i32 %x, i1 true )              ; <i32> [#uses=1]
         ret i32 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/dbg.ll b/test/CodeGen/PowerPC/dbg.ll
index 87914025b733..b636cff0f205 100644
--- a/test/CodeGen/PowerPC/dbg.ll
+++ b/test/CodeGen/PowerPC/dbg.ll
@@ -4,7 +4,7 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readnone !dbg !5 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !15, metadata !DIExpression()), !dbg !17
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
@@ -17,10 +17,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1", isOptimized: true, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !13)
+!5 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !21, scope: null, type: !7, variables: !13)
 !6 = !DIFile(filename: "dbg.c", directory: "/src")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9, !10}
@@ -29,8 +29,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
 !13 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 1, arg: 1, scope: !5, file: !6, type: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 1, arg: 2, scope: !5, file: !6, type: !10)
+!15 = !DILocalVariable(name: "argc", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!16 = !DILocalVariable(name: "argv", line: 1, arg: 2, scope: !5, file: !6, type: !10)
 !17 = !DILocation(line: 1, column: 14, scope: !5)
 !18 = !DILocation(line: 1, column: 26, scope: !5)
 !19 = !DILocation(line: 2, column: 3, scope: !20)
diff --git a/test/CodeGen/PowerPC/dyn-alloca-offset.ll b/test/CodeGen/PowerPC/dyn-alloca-offset.ll
new file mode 100644
index 000000000000..7159b9da736d
--- /dev/null
+++ b/test/CodeGen/PowerPC/dyn-alloca-offset.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare i64 @llvm.get.dynamic.area.offset.i64()
+
+declare i64 @bar(i64)
+
+attributes #0 = { nounwind }
+
+; Function Attrs: nounwind sanitize_address uwtable
+define signext i64 @foo(i32 signext %N, i32 signext %M) #0 {
+  %1 = alloca i64, align 32
+  %dynamic_area_offset = call i64 @llvm.get.dynamic.area.offset.i64()
+  %2 = call i64 @bar(i64 %dynamic_area_offset)
+  ret i64 %2
+
+; CHECK-DAG: li [[REG1:[0-9]+]], 112
+; CHECK: blr
+
+}
diff --git a/test/CodeGen/PowerPC/e500-1.ll b/test/CodeGen/PowerPC/e500-1.ll
new file mode 100644
index 000000000000..7457c0e57e18
--- /dev/null
+++ b/test/CodeGen/PowerPC/e500-1.ll
@@ -0,0 +1,30 @@
+; RUN: llc -O0 -mcpu=e500mc < %s | FileCheck %s
+; Check if e500 generates code with mfocrf insn.
+
+target datalayout = "E-m:e-p:32:32-i64:64-n32"
+target triple = "powerpc-unknown-linux-gnu"
+
+define internal i32 @func_49(i64 %p_50, i16 zeroext %p_51, i8* %p_52, i32 %p_53) {
+; CHECK-LABEL: @func_49
+; CHECK-NOT: mfocrf
+
+  %1 = load i64, i64* undef, align 8
+  %2 = load i64, i64* undef, align 8
+  %3 = icmp sge i32 undef, undef
+  %4 = zext i1 %3 to i32
+  %5 = sext i32 %4 to i64
+  %6 = icmp slt i64 %2, %5
+  %7 = zext i1 %6 to i32
+  %8 = call i64 @safe_sub_func_int64_t_s_s(i64 -6372137293439783564, i64 undef)
+  %9 = icmp slt i32 %7, undef
+  %10 = zext i1 %9 to i32
+  %11 = sext i32 %10 to i64
+  %12 = icmp sle i64 %1, %11
+  %13 = zext i1 %12 to i32
+  %14 = call i32 @safe_add_func_int32_t_s_s(i32 undef, i32 %13)
+  ret i32 undef
+}
+
+declare i32 @safe_add_func_int32_t_s_s(i32, i32)
+
+declare i64 @safe_sub_func_int64_t_s_s(i64, i64)
diff --git a/test/CodeGen/PowerPC/emutls_generic.ll b/test/CodeGen/PowerPC/emutls_generic.ll
new file mode 100644
index 000000000000..a2e13a6723f8
--- /dev/null
+++ b/test/CodeGen/PowerPC/emutls_generic.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -emulated-tls -mtriple=powerpc64-unknown-linux-gnu -relocation-model=pic \
+; RUN:     | FileCheck %s
+; RUN: llc < %s -emulated-tls -mtriple=powerpc-unknown-linux-gnu -relocation-model=pic \
+; RUN:     | FileCheck %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+  ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+  ret i64* @internal_y
+}
+
+; CHECK-LABEL: get_external_x:
+; CHECK-NOT:   _tls_get_address
+; CHECK:       __emutls_get_address
+; CHECK-LABEL: get_external_y:
+; CHECK:       __emutls_get_address
+; CHECK-NOT:   _tls_get_address
+; CHECK-LABEL: get_internal_y:
+; CHECK-NOT:   __emutls_t.external_x:
+; CHECK-NOT:   __emutls_v.external_x:
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK:       __emutls_t.external_y
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK:       __emutls_t.internal_y
diff --git a/test/CodeGen/PowerPC/fast-isel-binary.ll b/test/CodeGen/PowerPC/fast-isel-binary.ll
index 2f1513f8aa11..1036689ff44d 100644
--- a/test/CodeGen/PowerPC/fast-isel-binary.ll
+++ b/test/CodeGen/PowerPC/fast-isel-binary.ll
@@ -2,7 +2,7 @@
 
 ; Test add with non-legal types
 
-define void @add_i8(i8 %a, i8 %b) nounwind ssp {
+define void @add_i8(i8 %a, i8 %b) nounwind {
 entry:
 ; ELF64: add_i8
   %a.addr = alloca i8, align 4
@@ -12,7 +12,7 @@ entry:
   ret void
 }
 
-define void @add_i8_imm(i8 %a) nounwind ssp {
+define void @add_i8_imm(i8 %a) nounwind {
 entry:
 ; ELF64: add_i8_imm
   %a.addr = alloca i8, align 4
@@ -22,7 +22,7 @@ entry:
   ret void
 }
 
-define void @add_i16(i16 %a, i16 %b) nounwind ssp {
+define void @add_i16(i16 %a, i16 %b) nounwind {
 entry:
 ; ELF64: add_i16
   %a.addr = alloca i16, align 4
@@ -32,7 +32,7 @@ entry:
   ret void
 }
 
-define void @add_i16_imm(i16 %a, i16 %b) nounwind ssp {
+define void @add_i16_imm(i16 %a, i16 %b) nounwind {
 entry:
 ; ELF64: add_i16_imm
   %a.addr = alloca i16, align 4
@@ -44,7 +44,7 @@ entry:
 
 ; Test or with non-legal types
 
-define void @or_i8(i8 %a, i8 %b) nounwind ssp {
+define void @or_i8(i8 %a, i8 %b) nounwind {
 entry:
 ; ELF64: or_i8
   %a.addr = alloca i8, align 4
@@ -54,7 +54,7 @@ entry:
   ret void
 }
 
-define void @or_i8_imm(i8 %a) nounwind ssp {
+define void @or_i8_imm(i8 %a) nounwind {
 entry:
 ; ELF64: or_i8_imm
   %a.addr = alloca i8, align 4
@@ -64,7 +64,7 @@ entry:
   ret void
 }
 
-define void @or_i16(i16 %a, i16 %b) nounwind ssp {
+define void @or_i16(i16 %a, i16 %b) nounwind {
 entry:
 ; ELF64: or_i16
   %a.addr = alloca i16, align 4
@@ -74,7 +74,7 @@ entry:
   ret void
 }
 
-define void @or_i16_imm(i16 %a) nounwind ssp {
+define void @or_i16_imm(i16 %a) nounwind {
 entry:
 ; ELF64: or_i16_imm
   %a.addr = alloca i16, align 4
@@ -86,7 +86,7 @@ entry:
 
 ; Test sub with non-legal types
 
-define void @sub_i8(i8 %a, i8 %b) nounwind ssp {
+define void @sub_i8(i8 %a, i8 %b) nounwind {
 entry:
 ; ELF64: sub_i8
   %a.addr = alloca i8, align 4
@@ -96,7 +96,7 @@ entry:
   ret void
 }
 
-define void @sub_i8_imm(i8 %a) nounwind ssp {
+define void @sub_i8_imm(i8 %a) nounwind {
 entry:
 ; ELF64: sub_i8_imm
   %a.addr = alloca i8, align 4
@@ -106,7 +106,7 @@ entry:
   ret void
 }
 
-define void @sub_i16(i16 %a, i16 %b) nounwind ssp {
+define void @sub_i16(i16 %a, i16 %b) nounwind {
 entry:
 ; ELF64: sub_i16
   %a.addr = alloca i16, align 4
@@ -116,7 +116,7 @@ entry:
   ret void
 }
 
-define void @sub_i16_imm(i16 %a) nounwind ssp {
+define void @sub_i16_imm(i16 %a) nounwind {
 entry:
 ; ELF64: sub_i16_imm
   %a.addr = alloca i16, align 4
@@ -126,7 +126,7 @@ entry:
   ret void
 }
 
-define void @sub_i16_badimm(i16 %a) nounwind ssp {
+define void @sub_i16_badimm(i16 %a) nounwind {
 entry:
 ; ELF64: sub_i16_imm
   %a.addr = alloca i16, align 4
diff --git a/test/CodeGen/PowerPC/fast-isel-br-const.ll b/test/CodeGen/PowerPC/fast-isel-br-const.ll
index 6be7fbf9e02f..f411d23fb288 100644
--- a/test/CodeGen/PowerPC/fast-isel-br-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-br-const.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
 
-define i32 @t1(i32 %a, i32 %b) nounwind uwtable ssp {
+define i32 @t1(i32 %a, i32 %b) nounwind {
 entry:
 ; ELF64: t1
   %x = add i32 %a, %b  
diff --git a/test/CodeGen/PowerPC/fast-isel-call.ll b/test/CodeGen/PowerPC/fast-isel-call.ll
index 64d8f6e79195..5d541e3a01f1 100644
--- a/test/CodeGen/PowerPC/fast-isel-call.ll
+++ b/test/CodeGen/PowerPC/fast-isel-call.ll
@@ -56,7 +56,7 @@ declare zeroext i16 @t6();
 declare signext i8 @t7();
 declare zeroext i8 @t8();
 
-define i32 @t10(i32 %argc, i8** nocapture %argv) {
+define i32 @t10(i32 %argc, i8** nocapture %argv) nounwind {
 entry:
 ; ELF64: t10
   %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70)
@@ -97,9 +97,9 @@ define i32 @bar0(i32 %i) nounwind {
 ;        ret i32 %tmp1
 ;}
 
-declare void @float_foo(float %f) ssp
+declare void @float_foo(float %f)
 
-define void @float_const() ssp {
+define void @float_const() nounwind {
 entry:
 ; ELF64: float_const
   call void @float_foo(float 0x401C666660000000)
@@ -108,7 +108,7 @@ entry:
   ret void
 }
 
-define void @float_reg(float %dummy, float %f) ssp {
+define void @float_reg(float %dummy, float %f) nounwind {
 entry:
 ; ELF64: float_reg
   call void @float_foo(float %f)
@@ -116,9 +116,9 @@ entry:
   ret void
 }
 
-declare void @double_foo(double %d) ssp
+declare void @double_foo(double %d)
 
-define void @double_const() ssp {
+define void @double_const() nounwind {
 entry:
 ; ELF64: double_const
   call void @double_foo(double 0x1397723CCABD0000401C666660000000)
@@ -127,7 +127,7 @@ entry:
   ret void
 }
 
-define void @double_reg(double %dummy, double %d) ssp {
+define void @double_reg(double %dummy, double %d) nounwind {
 entry:
 ; ELF64: double_reg
   call void @double_foo(double %d)
diff --git a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
index 5a9d15868b6b..5881dc3798ae 100644
--- a/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
+++ b/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll
@@ -3,7 +3,7 @@
 ; When fastisel better supports VSX fix up this test case.
 ;
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
-define void @t1a(float %a) uwtable ssp {
+define void @t1a(float %a) nounwind {
 entry:
 ; ELF64: t1a
   %cmp = fcmp oeq float %a, 0.000000e+00
@@ -22,7 +22,7 @@ if.end:                                           ; preds = %if.then, %entry
 
 declare void @foo()
 
-define void @t1b(float %a) uwtable ssp {
+define void @t1b(float %a) nounwind {
 entry:
 ; ELF64: t1b
   %cmp = fcmp oeq float %a, -0.000000e+00
@@ -39,7 +39,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t2a(double %a) uwtable ssp {
+define void @t2a(double %a) nounwind {
 entry:
 ; ELF64: t2a
   %cmp = fcmp oeq double %a, 0.000000e+00
@@ -56,7 +56,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t2b(double %a) uwtable ssp {
+define void @t2b(double %a) nounwind {
 entry:
 ; ELF64: t2b
   %cmp = fcmp oeq double %a, -0.000000e+00
@@ -73,7 +73,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t4(i8 signext %a) uwtable ssp {
+define void @t4(i8 signext %a) nounwind {
 entry:
 ; ELF64: t4
   %cmp = icmp eq i8 %a, -1
@@ -89,7 +89,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t5(i8 zeroext %a) uwtable ssp {
+define void @t5(i8 zeroext %a) nounwind {
 entry:
 ; ELF64: t5
   %cmp = icmp eq i8 %a, 1
@@ -105,7 +105,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t6(i16 signext %a) uwtable ssp {
+define void @t6(i16 signext %a) nounwind {
 entry:
 ; ELF64: t6
   %cmp = icmp eq i16 %a, -1
@@ -121,7 +121,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t7(i16 zeroext %a) uwtable ssp {
+define void @t7(i16 zeroext %a) nounwind {
 entry:
 ; ELF64: t7
   %cmp = icmp eq i16 %a, 1
@@ -137,7 +137,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t8(i32 %a) uwtable ssp {
+define void @t8(i32 %a) nounwind {
 entry:
 ; ELF64: t8
   %cmp = icmp eq i32 %a, -1
@@ -152,7 +152,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t9(i32 %a) uwtable ssp {
+define void @t9(i32 %a) nounwind {
 entry:
 ; ELF64: t9
   %cmp = icmp eq i32 %a, 1
@@ -167,7 +167,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t10(i32 %a) uwtable ssp {
+define void @t10(i32 %a) nounwind {
 entry:
 ; ELF64: t10
   %cmp = icmp eq i32 %a, 384
@@ -182,7 +182,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t11(i32 %a) uwtable ssp {
+define void @t11(i32 %a) nounwind {
 entry:
 ; ELF64: t11
   %cmp = icmp eq i32 %a, 4096
@@ -197,7 +197,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t12(i8 %a) uwtable ssp {
+define void @t12(i8 %a) nounwind {
 entry:
 ; ELF64: t12
   %cmp = icmp ugt i8 %a, -113
@@ -229,7 +229,7 @@ if.end:                                           ; preds = %entry
   ret void
 }
 
-define void @t14(i64 %a) uwtable ssp {
+define void @t14(i64 %a) nounwind {
 entry:
 ; ELF64: t14
   %cmp = icmp eq i64 %a, -1
@@ -244,7 +244,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t15(i64 %a) uwtable ssp {
+define void @t15(i64 %a) nounwind {
 entry:
 ; ELF64: t15
   %cmp = icmp eq i64 %a, 1
@@ -259,7 +259,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t16(i64 %a) uwtable ssp {
+define void @t16(i64 %a) nounwind {
 entry:
 ; ELF64: t16
   %cmp = icmp eq i64 %a, 384
@@ -274,7 +274,7 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 }
 
-define void @t17(i64 %a) uwtable ssp {
+define void @t17(i64 %a) nounwind {
 entry:
 ; ELF64: t17
   %cmp = icmp eq i64 %a, 32768
diff --git a/test/CodeGen/PowerPC/fast-isel-const.ll b/test/CodeGen/PowerPC/fast-isel-const.ll
index a751a2be6c69..3987e54a8d1b 100644
--- a/test/CodeGen/PowerPC/fast-isel-const.ll
+++ b/test/CodeGen/PowerPC/fast-isel-const.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
 
-define zeroext i1 @testi1(i8 %in) nounwind uwtable ssp {
+define zeroext i1 @testi1(i8 %in) nounwind {
 entry:
   %c = icmp eq i8 %in, 5
   br i1 %c, label %true, label %false
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
index cfb934c6ab02..e4cdf8d7a9c3 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion-p5.ll
@@ -2,7 +2,7 @@
 
 ; Test sitofp
 
-define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @sitofp_double_i32(i32 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i32
   %b.addr = alloca double, align 8
@@ -14,7 +14,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @sitofp_double_i64(i64 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i64
   %b.addr = alloca double, align 8
@@ -26,7 +26,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @sitofp_double_i16(i16 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i16
   %b.addr = alloca double, align 8
@@ -39,7 +39,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @sitofp_double_i8(i8 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i8
   %b.addr = alloca double, align 8
@@ -54,7 +54,7 @@ entry:
 
 ; Test fptosi
 
-define void @fptosi_float_i32(float %a) nounwind ssp {
+define void @fptosi_float_i32(float %a) nounwind {
 entry:
 ; ELF64: fptosi_float_i32
   %b.addr = alloca i32, align 4
@@ -66,7 +66,7 @@ entry:
   ret void
 }
 
-define void @fptosi_float_i64(float %a) nounwind ssp {
+define void @fptosi_float_i64(float %a) nounwind {
 entry:
 ; ELF64: fptosi_float_i64
   %b.addr = alloca i64, align 4
@@ -78,7 +78,7 @@ entry:
   ret void
 }
 
-define void @fptosi_double_i32(double %a) nounwind ssp {
+define void @fptosi_double_i32(double %a) nounwind {
 entry:
 ; ELF64: fptosi_double_i32
   %b.addr = alloca i32, align 8
@@ -90,7 +90,7 @@ entry:
   ret void
 }
 
-define void @fptosi_double_i64(double %a) nounwind ssp {
+define void @fptosi_double_i64(double %a) nounwind {
 entry:
 ; ELF64: fptosi_double_i64
   %b.addr = alloca i64, align 8
@@ -104,7 +104,7 @@ entry:
 
 ; Test fptoui
 
-define void @fptoui_float_i32(float %a) nounwind ssp {
+define void @fptoui_float_i32(float %a) nounwind {
 entry:
 ; ELF64: fptoui_float_i32
   %b.addr = alloca i32, align 4
@@ -116,7 +116,7 @@ entry:
   ret void
 }
 
-define void @fptoui_double_i32(double %a) nounwind ssp {
+define void @fptoui_double_i32(double %a) nounwind {
 entry:
 ; ELF64: fptoui_double_i32
   %b.addr = alloca i32, align 8
diff --git a/test/CodeGen/PowerPC/fast-isel-conversion.ll b/test/CodeGen/PowerPC/fast-isel-conversion.ll
index f7557d456858..a9324592aeab 100644
--- a/test/CodeGen/PowerPC/fast-isel-conversion.ll
+++ b/test/CodeGen/PowerPC/fast-isel-conversion.ll
@@ -11,7 +11,7 @@
 
 ; Test sitofp
 
-define void @sitofp_single_i64(i64 %a, float %b) nounwind ssp {
+define void @sitofp_single_i64(i64 %a, float %b) nounwind {
 entry:
 ; ELF64: sitofp_single_i64
 ; ELF64LE: sitofp_single_i64
@@ -32,7 +32,7 @@ entry:
   ret void
 }
 
-define void @sitofp_single_i32(i32 %a, float %b) nounwind ssp {
+define void @sitofp_single_i32(i32 %a, float %b) nounwind {
 entry:
 ; ELF64: sitofp_single_i32
 ; ELF64LE: sitofp_single_i32
@@ -57,7 +57,7 @@ entry:
   ret void
 }
 
-define void @sitofp_single_i16(i16 %a, float %b) nounwind ssp {
+define void @sitofp_single_i16(i16 %a, float %b) nounwind {
 entry:
 ; ELF64: sitofp_single_i16
 ; ELF64LE: sitofp_single_i16
@@ -81,7 +81,7 @@ entry:
   ret void
 }
 
-define void @sitofp_single_i8(i8 %a) nounwind ssp {
+define void @sitofp_single_i8(i8 %a) nounwind {
 entry:
 ; ELF64: sitofp_single_i8
 ; ELF64LE: sitofp_single_i8
@@ -105,7 +105,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @sitofp_double_i32(i32 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i32
 ; ELF64LE: sitofp_double_i32
@@ -129,7 +129,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @sitofp_double_i64(i64 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i64
 ; ELF64LE: sitofp_double_i64
@@ -149,7 +149,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @sitofp_double_i16(i16 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i16
 ; ELF64LE: sitofp_double_i16
@@ -172,7 +172,7 @@ entry:
   ret void
 }
 
-define void @sitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @sitofp_double_i8(i8 %a, double %b) nounwind {
 entry:
 ; ELF64: sitofp_double_i8
 ; ELF64LE: sitofp_double_i8
@@ -197,7 +197,7 @@ entry:
 
 ; Test uitofp
 
-define void @uitofp_single_i64(i64 %a, float %b) nounwind ssp {
+define void @uitofp_single_i64(i64 %a, float %b) nounwind {
 entry:
 ; ELF64: uitofp_single_i64
 ; ELF64LE: uitofp_single_i64
@@ -215,7 +215,7 @@ entry:
   ret void
 }
 
-define void @uitofp_single_i32(i32 %a, float %b) nounwind ssp {
+define void @uitofp_single_i32(i32 %a, float %b) nounwind {
 entry:
 ; ELF64: uitofp_single_i32
 ; ELF64LE: uitofp_single_i32
@@ -238,7 +238,7 @@ entry:
   ret void
 }
 
-define void @uitofp_single_i16(i16 %a, float %b) nounwind ssp {
+define void @uitofp_single_i16(i16 %a, float %b) nounwind {
 entry:
 ; ELF64: uitofp_single_i16
 ; ELF64LE: uitofp_single_i16
@@ -262,7 +262,7 @@ entry:
   ret void
 }
 
-define void @uitofp_single_i8(i8 %a) nounwind ssp {
+define void @uitofp_single_i8(i8 %a) nounwind {
 entry:
 ; ELF64: uitofp_single_i8
 ; ELF64LE: uitofp_single_i8
@@ -286,7 +286,7 @@ entry:
   ret void
 }
 
-define void @uitofp_double_i64(i64 %a, double %b) nounwind ssp {
+define void @uitofp_double_i64(i64 %a, double %b) nounwind {
 entry:
 ; ELF64: uitofp_double_i64
 ; ELF64LE: uitofp_double_i64
@@ -304,7 +304,7 @@ entry:
   ret void
 }
 
-define void @uitofp_double_i32(i32 %a, double %b) nounwind ssp {
+define void @uitofp_double_i32(i32 %a, double %b) nounwind {
 entry:
 ; ELF64: uitofp_double_i32
 ; ELF64LE: uitofp_double_i32
@@ -327,7 +327,7 @@ entry:
   ret void
 }
 
-define void @uitofp_double_i16(i16 %a, double %b) nounwind ssp {
+define void @uitofp_double_i16(i16 %a, double %b) nounwind {
 entry:
 ; ELF64: uitofp_double_i16
 ; ELF64LE: uitofp_double_i16
@@ -350,7 +350,7 @@ entry:
   ret void
 }
 
-define void @uitofp_double_i8(i8 %a, double %b) nounwind ssp {
+define void @uitofp_double_i8(i8 %a, double %b) nounwind {
 entry:
 ; ELF64: uitofp_double_i8
 ; ELF64LE: uitofp_double_i8
@@ -375,7 +375,7 @@ entry:
 
 ; Test fptosi
 
-define void @fptosi_float_i32(float %a) nounwind ssp {
+define void @fptosi_float_i32(float %a) nounwind {
 entry:
 ; ELF64: fptosi_float_i32
 ; ELF64LE: fptosi_float_i32
@@ -395,7 +395,7 @@ entry:
   ret void
 }
 
-define void @fptosi_float_i64(float %a) nounwind ssp {
+define void @fptosi_float_i64(float %a) nounwind {
 entry:
 ; ELF64: fptosi_float_i64
 ; ELF64LE: fptosi_float_i64
@@ -415,7 +415,7 @@ entry:
   ret void
 }
 
-define void @fptosi_double_i32(double %a) nounwind ssp {
+define void @fptosi_double_i32(double %a) nounwind {
 entry:
 ; ELF64: fptosi_double_i32
 ; ELF64LE: fptosi_double_i32
@@ -435,7 +435,7 @@ entry:
   ret void
 }
 
-define void @fptosi_double_i64(double %a) nounwind ssp {
+define void @fptosi_double_i64(double %a) nounwind {
 entry:
 ; ELF64: fptosi_double_i64
 ; ELF64LE: fptosi_double_i64
@@ -457,7 +457,7 @@ entry:
 
 ; Test fptoui
 
-define void @fptoui_float_i32(float %a) nounwind ssp {
+define void @fptoui_float_i32(float %a) nounwind {
 entry:
 ; ELF64: fptoui_float_i32
 ; ELF64LE: fptoui_float_i32
@@ -477,7 +477,7 @@ entry:
   ret void
 }
 
-define void @fptoui_float_i64(float %a) nounwind ssp {
+define void @fptoui_float_i64(float %a) nounwind {
 entry:
 ; ELF64: fptoui_float_i64
 ; ELF64LE: fptoui_float_i64
@@ -495,7 +495,7 @@ entry:
   ret void
 }
 
-define void @fptoui_double_i32(double %a) nounwind ssp {
+define void @fptoui_double_i32(double %a) nounwind {
 entry:
 ; ELF64: fptoui_double_i32
 ; ELF64LE: fptoui_double_i32
@@ -515,7 +515,7 @@ entry:
   ret void
 }
 
-define void @fptoui_double_i64(double %a) nounwind ssp {
+define void @fptoui_double_i64(double %a) nounwind {
 entry:
 ; ELF64: fptoui_double_i64
 ; ELF64LE: fptoui_double_i64
diff --git a/test/CodeGen/PowerPC/fast-isel-crash.ll b/test/CodeGen/PowerPC/fast-isel-crash.ll
index 55e87effcd82..e20ef6bcd5d3 100644
--- a/test/CodeGen/PowerPC/fast-isel-crash.ll
+++ b/test/CodeGen/PowerPC/fast-isel-crash.ll
@@ -11,12 +11,12 @@ entry:
   ret void
 }
 
-define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind {
 entry:
   ret i32 undef
 }
 
-define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind {
 entry:
   call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
   ret void
diff --git a/test/CodeGen/PowerPC/fast-isel-ext.ll b/test/CodeGen/PowerPC/fast-isel-ext.ll
index 6fd3b4035122..ce8ac440b79c 100644
--- a/test/CodeGen/PowerPC/fast-isel-ext.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ext.ll
@@ -2,35 +2,35 @@
 
 ; zext
 
-define i32 @zext_8_32(i8 %a) nounwind ssp {
+define i32 @zext_8_32(i8 %a) nounwind {
 ; ELF64: zext_8_32
   %r = zext i8 %a to i32
 ; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 24
   ret i32 %r
 }
 
-define i32 @zext_16_32(i16 %a) nounwind ssp {
+define i32 @zext_16_32(i16 %a) nounwind {
 ; ELF64: zext_16_32
   %r = zext i16 %a to i32
 ; ELF64: clrlwi {{[0-9]+}}, {{[0-9]+}}, 16
   ret i32 %r
 }
 
-define i64 @zext_8_64(i8 %a) nounwind ssp {
+define i64 @zext_8_64(i8 %a) nounwind {
 ; ELF64: zext_8_64
   %r = zext i8 %a to i64
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
   ret i64 %r
 }
 
-define i64 @zext_16_64(i16 %a) nounwind ssp {
+define i64 @zext_16_64(i16 %a) nounwind {
 ; ELF64: zext_16_64
   %r = zext i16 %a to i64
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
   ret i64 %r
 }
 
-define i64 @zext_32_64(i32 %a) nounwind ssp {
+define i64 @zext_32_64(i32 %a) nounwind {
 ; ELF64: zext_32_64
   %r = zext i32 %a to i64
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -39,35 +39,35 @@ define i64 @zext_32_64(i32 %a) nounwind ssp {
 
 ; sext
 
-define i32 @sext_8_32(i8 %a) nounwind ssp {
+define i32 @sext_8_32(i8 %a) nounwind {
 ; ELF64: sext_8_32
   %r = sext i8 %a to i32
 ; ELF64: extsb
   ret i32 %r
 }
 
-define i32 @sext_16_32(i16 %a) nounwind ssp {
+define i32 @sext_16_32(i16 %a) nounwind {
 ; ELF64: sext_16_32
   %r = sext i16 %a to i32
 ; ELF64: extsh
   ret i32 %r
 }
 
-define i64 @sext_8_64(i8 %a) nounwind ssp {
+define i64 @sext_8_64(i8 %a) nounwind {
 ; ELF64: sext_8_64
   %r = sext i8 %a to i64
 ; ELF64: extsb
   ret i64 %r
 }
 
-define i64 @sext_16_64(i16 %a) nounwind ssp {
+define i64 @sext_16_64(i16 %a) nounwind {
 ; ELF64: sext_16_64
   %r = sext i16 %a to i64
 ; ELF64: extsh
   ret i64 %r
 }
 
-define i64 @sext_32_64(i32 %a) nounwind ssp {
+define i64 @sext_32_64(i32 %a) nounwind {
 ; ELF64: sext_32_64
   %r = sext i32 %a to i64
 ; ELF64: extsw
diff --git a/test/CodeGen/PowerPC/fast-isel-fold.ll b/test/CodeGen/PowerPC/fast-isel-fold.ll
index e56101a28e2b..24cdca35b0dd 100644
--- a/test/CodeGen/PowerPC/fast-isel-fold.ll
+++ b/test/CodeGen/PowerPC/fast-isel-fold.ll
@@ -4,7 +4,7 @@
 @b = global i16 2, align 2
 @c = global i32 4, align 4
 
-define void @t1() nounwind uwtable ssp {
+define void @t1() nounwind {
 ; ELF64: t1
   %1 = load i8, i8* @a, align 1
   call void @foo1(i8 zeroext %1)
@@ -14,7 +14,7 @@ define void @t1() nounwind uwtable ssp {
   ret void
 }
 
-define void @t2() nounwind uwtable ssp {
+define void @t2() nounwind {
 ; ELF64: t2
   %1 = load i16, i16* @b, align 2
   call void @foo2(i16 zeroext %1)
@@ -24,7 +24,7 @@ define void @t2() nounwind uwtable ssp {
   ret void
 }
 
-define void @t2a() nounwind uwtable ssp {
+define void @t2a() nounwind {
 ; ELF64: t2a
   %1 = load i32, i32* @c, align 4
   call void @foo3(i32 zeroext %1)
@@ -38,7 +38,7 @@ declare void @foo1(i8 zeroext)
 declare void @foo2(i16 zeroext)
 declare void @foo3(i32 zeroext)
 
-define i32 @t3() nounwind uwtable ssp {
+define i32 @t3() nounwind {
 ; ELF64: t3
   %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i32
@@ -47,7 +47,7 @@ define i32 @t3() nounwind uwtable ssp {
   ret i32 %2
 }
 
-define i32 @t4() nounwind uwtable ssp {
+define i32 @t4() nounwind {
 ; ELF64: t4
   %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i32
@@ -56,7 +56,7 @@ define i32 @t4() nounwind uwtable ssp {
   ret i32 %2
 }
 
-define i32 @t5() nounwind uwtable ssp {
+define i32 @t5() nounwind {
 ; ELF64: t5
   %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i32
@@ -65,7 +65,7 @@ define i32 @t5() nounwind uwtable ssp {
   ret i32 %2
 }
 
-define i32 @t6() nounwind uwtable ssp {
+define i32 @t6() nounwind {
 ; ELF64: t6
   %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i32
@@ -74,7 +74,7 @@ define i32 @t6() nounwind uwtable ssp {
   ret i32 %2
 }
 
-define i64 @t7() nounwind uwtable ssp {
+define i64 @t7() nounwind {
 ; ELF64: t7
   %1 = load i8, i8* @a, align 1
   %2 = zext i8 %1 to i64
@@ -83,7 +83,7 @@ define i64 @t7() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define i64 @t8() nounwind uwtable ssp {
+define i64 @t8() nounwind {
 ; ELF64: t8
   %1 = load i16, i16* @b, align 2
   %2 = zext i16 %1 to i64
@@ -92,7 +92,7 @@ define i64 @t8() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define i64 @t9() nounwind uwtable ssp {
+define i64 @t9() nounwind {
 ; ELF64: t9
   %1 = load i16, i16* @b, align 2
   %2 = sext i16 %1 to i64
@@ -101,7 +101,7 @@ define i64 @t9() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define i64 @t10() nounwind uwtable ssp {
+define i64 @t10() nounwind {
 ; ELF64: t10
   %1 = load i8, i8* @a, align 2
   %2 = sext i8 %1 to i64
@@ -110,7 +110,7 @@ define i64 @t10() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define i64 @t11() nounwind uwtable ssp {
+define i64 @t11() nounwind {
 ; ELF64: t11
   %1 = load i32, i32* @c, align 4
   %2 = zext i32 %1 to i64
@@ -119,7 +119,7 @@ define i64 @t11() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define i64 @t12() nounwind uwtable ssp {
+define i64 @t12() nounwind {
 ; ELF64: t12
   %1 = load i32, i32* @c, align 4
   %2 = sext i32 %1 to i64
diff --git a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
index b5477134c517..d66fd1fb752d 100644
--- a/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
+++ b/test/CodeGen/PowerPC/fast-isel-indirectbr.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
 
-define void @t1(i8* %x) {
+define void @t1(i8* %x) nounwind {
 entry:
 ; ELF64: t1
   br label %L0
diff --git a/test/CodeGen/PowerPC/fast-isel-load-store.ll b/test/CodeGen/PowerPC/fast-isel-load-store.ll
index f6a55f06b2cb..800e5aa66090 100644
--- a/test/CodeGen/PowerPC/fast-isel-load-store.ll
+++ b/test/CodeGen/PowerPC/fast-isel-load-store.ll
@@ -24,7 +24,7 @@
 
 ; load
 
-define i8 @t1() nounwind uwtable ssp {
+define i8 @t1() nounwind {
 ; ELF64: t1
   %1 = load i8, i8* @a, align 1
 ; ELF64: lbz
@@ -33,7 +33,7 @@ define i8 @t1() nounwind uwtable ssp {
   ret i8 %2
 }
 
-define i16 @t2() nounwind uwtable ssp {
+define i16 @t2() nounwind {
 ; ELF64: t2
   %1 = load i16, i16* @b, align 2
 ; ELF64: lhz
@@ -42,7 +42,7 @@ define i16 @t2() nounwind uwtable ssp {
   ret i16 %2
 }
 
-define i32 @t3() nounwind uwtable ssp {
+define i32 @t3() nounwind {
 ; ELF64: t3
   %1 = load i32, i32* @c, align 4
 ; ELF64: lwz
@@ -51,7 +51,7 @@ define i32 @t3() nounwind uwtable ssp {
   ret i32 %2
 }
 
-define i64 @t4() nounwind uwtable ssp {
+define i64 @t4() nounwind {
 ; ELF64: t4
   %1 = load i64, i64* @d, align 4
 ; ELF64: ld
@@ -60,7 +60,7 @@ define i64 @t4() nounwind uwtable ssp {
   ret i64 %2
 }
 
-define float @t5() nounwind uwtable ssp {
+define float @t5() nounwind {
 ; ELF64: t5
   %1 = load float, float* @e, align 4
 ; ELF64: lfs
@@ -69,7 +69,7 @@ define float @t5() nounwind uwtable ssp {
   ret float %2
 }
 
-define double @t6() nounwind uwtable ssp {
+define double @t6() nounwind {
 ; ELF64: t6
   %1 = load double, double* @f, align 8
 ; ELF64: lfd
@@ -80,7 +80,7 @@ define double @t6() nounwind uwtable ssp {
 
 ; store
 
-define void @t7(i8 %v) nounwind uwtable ssp {
+define void @t7(i8 %v) nounwind {
 ; ELF64: t7
   %1 = add nsw i8 %v, 1
   store i8 %1, i8* @a, align 1
@@ -91,7 +91,7 @@ define void @t7(i8 %v) nounwind uwtable ssp {
   ret void
 }
 
-define void @t8(i16 %v) nounwind uwtable ssp {
+define void @t8(i16 %v) nounwind {
 ; ELF64: t8
   %1 = add nsw i16 %v, 1
   store i16 %1, i16* @b, align 2
@@ -102,7 +102,7 @@ define void @t8(i16 %v) nounwind uwtable ssp {
   ret void
 }
 
-define void @t9(i32 %v) nounwind uwtable ssp {
+define void @t9(i32 %v) nounwind {
 ; ELF64: t9
   %1 = add nsw i32 %v, 1
   store i32 %1, i32* @c, align 4
@@ -113,7 +113,7 @@ define void @t9(i32 %v) nounwind uwtable ssp {
   ret void
 }
 
-define void @t10(i64 %v) nounwind uwtable ssp {
+define void @t10(i64 %v) nounwind {
 ; ELF64: t10
   %1 = add nsw i64 %v, 1
   store i64 %1, i64* @d, align 4
@@ -124,7 +124,7 @@ define void @t10(i64 %v) nounwind uwtable ssp {
   ret void
 }
 
-define void @t11(float %v) nounwind uwtable ssp {
+define void @t11(float %v) nounwind {
 ; ELF64: t11
   %1 = fadd float %v, 1.0
   store float %1, float* @e, align 4
@@ -133,7 +133,7 @@ define void @t11(float %v) nounwind uwtable ssp {
   ret void
 }
 
-define void @t12(double %v) nounwind uwtable ssp {
+define void @t12(double %v) nounwind {
 ; ELF64: t12
   %1 = fadd double %v, 1.0
   store double %1, double* @f, align 8
@@ -143,7 +143,7 @@ define void @t12(double %v) nounwind uwtable ssp {
 }
 
 ;; lwa requires an offset divisible by 4, so we need lwax here.
-define i64 @t13() nounwind uwtable ssp {
+define i64 @t13() nounwind {
 ; ELF64: t13
   %1 = load i32, i32* getelementptr inbounds (%struct.s, %struct.s* @g, i32 0, i32 1), align 1
   %2 = sext i32 %1 to i64
@@ -155,7 +155,7 @@ define i64 @t13() nounwind uwtable ssp {
 }
 
 ;; ld requires an offset divisible by 4, so we need ldx here.
-define i64 @t14() nounwind uwtable ssp {
+define i64 @t14() nounwind {
 ; ELF64: t14
   %1 = load i64, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
 ; ELF64: li
@@ -166,7 +166,7 @@ define i64 @t14() nounwind uwtable ssp {
 }
 
 ;; std requires an offset divisible by 4, so we need stdx here.
-define void @t15(i64 %v) nounwind uwtable ssp {
+define void @t15(i64 %v) nounwind {
 ; ELF64: t15
   %1 = add nsw i64 %v, 1
   store i64 %1, i64* getelementptr inbounds (%struct.t, %struct.t* @h, i32 0, i32 1), align 1
@@ -179,7 +179,7 @@ define void @t15(i64 %v) nounwind uwtable ssp {
 }
 
 ;; ld requires an offset that fits in 16 bits, so we need ldx here.
-define i64 @t16() nounwind uwtable ssp {
+define i64 @t16() nounwind {
 ; ELF64: t16
   %1 = load i64, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
 ; ELF64: lis
@@ -191,7 +191,7 @@ define i64 @t16() nounwind uwtable ssp {
 }
 
 ;; std requires an offset that fits in 16 bits, so we need stdx here.
-define void @t17(i64 %v) nounwind uwtable ssp {
+define void @t17(i64 %v) nounwind {
 ; ELF64: t17
   %1 = add nsw i64 %v, 1
   store i64 %1, i64* getelementptr inbounds ([8192 x i64], [8192 x i64]* @i, i32 0, i64 5000), align 8
diff --git a/test/CodeGen/PowerPC/fast-isel-redefinition.ll b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
index 60706a6e1438..19392c938e15 100644
--- a/test/CodeGen/PowerPC/fast-isel-redefinition.ll
+++ b/test/CodeGen/PowerPC/fast-isel-redefinition.ll
@@ -3,7 +3,7 @@
 ; doesn't crash.  (It crashed formerly on ARM, and proved useful in
 ; discovering a bug on PowerPC as well.)
 
-define i32 @f(i32* %x) nounwind ssp {
+define i32 @f(i32* %x) nounwind {
   %y = getelementptr inbounds i32, i32* %x, i32 5000
   %tmp103 = load i32, i32* %y, align 4
   ret i32 %tmp103
diff --git a/test/CodeGen/PowerPC/fast-isel-ret.ll b/test/CodeGen/PowerPC/fast-isel-ret.ll
index 1e4566d94dfd..e05ef7d9ab82 100644
--- a/test/CodeGen/PowerPC/fast-isel-ret.ll
+++ b/test/CodeGen/PowerPC/fast-isel-ret.ll
@@ -4,7 +4,7 @@
 ;
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64
 
-define zeroext i1 @rettrue() nounwind uwtable ssp {
+define zeroext i1 @rettrue() nounwind {
 entry:
 ; ELF64-LABEL: rettrue
 ; ELF64: li 3, 1
@@ -12,7 +12,7 @@ entry:
   ret i1 true
 }
 
-define zeroext i1 @retfalse() nounwind uwtable ssp {
+define zeroext i1 @retfalse() nounwind {
 entry:
 ; ELF64-LABEL: retfalse
 ; ELF64: li 3, 0
@@ -20,7 +20,7 @@ entry:
   ret i1 false
 }
 
-define signext i1 @retstrue() nounwind uwtable ssp {
+define signext i1 @retstrue() nounwind {
 entry:
 ; ELF64-LABEL: retstrue
 ; ELF64: li 3, -1
@@ -28,7 +28,7 @@ entry:
   ret i1 true
 }
 
-define signext i1 @retsfalse() nounwind uwtable ssp {
+define signext i1 @retsfalse() nounwind {
 entry:
 ; ELF64-LABEL: retsfalse
 ; ELF64: li 3, 0
@@ -36,7 +36,7 @@ entry:
   ret i1 false
 }
 
-define signext i8 @ret2(i8 signext %a) nounwind uwtable ssp {
+define signext i8 @ret2(i8 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret2
 ; ELF64: extsb
@@ -44,7 +44,7 @@ entry:
   ret i8 %a
 }
 
-define zeroext i8 @ret3(i8 signext %a) nounwind uwtable ssp {
+define zeroext i8 @ret3(i8 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret3
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 56
@@ -52,7 +52,7 @@ entry:
   ret i8 %a
 }
 
-define signext i16 @ret4(i16 signext %a) nounwind uwtable ssp {
+define signext i16 @ret4(i16 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret4
 ; ELF64: extsh
@@ -60,7 +60,7 @@ entry:
   ret i16 %a
 }
 
-define zeroext i16 @ret5(i16 signext %a) nounwind uwtable ssp {
+define zeroext i16 @ret5(i16 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret5
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
@@ -68,7 +68,7 @@ entry:
   ret i16 %a
 }
 
-define i16 @ret6(i16 %a) nounwind uwtable ssp {
+define i16 @ret6(i16 %a) nounwind {
 entry:
 ; ELF64-LABEL: ret6
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 48
@@ -76,7 +76,7 @@ entry:
   ret i16 %a
 }
 
-define signext i32 @ret7(i32 signext %a) nounwind uwtable ssp {
+define signext i32 @ret7(i32 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret7
 ; ELF64: extsw
@@ -84,7 +84,7 @@ entry:
   ret i32 %a
 }
 
-define zeroext i32 @ret8(i32 signext %a) nounwind uwtable ssp {
+define zeroext i32 @ret8(i32 signext %a) nounwind {
 entry:
 ; ELF64-LABEL: ret8
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -92,7 +92,7 @@ entry:
   ret i32 %a
 }
 
-define i32 @ret9(i32 %a) nounwind uwtable ssp {
+define i32 @ret9(i32 %a) nounwind {
 entry:
 ; ELF64-LABEL: ret9
 ; ELF64: rldicl {{[0-9]+}}, {{[0-9]+}}, 0, 32
@@ -100,7 +100,7 @@ entry:
   ret i32 %a
 }
 
-define i64 @ret10(i64 %a) nounwind uwtable ssp {
+define i64 @ret10(i64 %a) nounwind {
 entry:
 ; ELF64-LABEL: ret10
 ; ELF64-NOT: exts
@@ -109,21 +109,21 @@ entry:
   ret i64 %a
 }
 
-define float @ret11(float %a) nounwind uwtable ssp {
+define float @ret11(float %a) nounwind {
 entry:
 ; ELF64-LABEL: ret11
 ; ELF64: blr
   ret float %a
 }
 
-define double @ret12(double %a) nounwind uwtable ssp {
+define double @ret12(double %a) nounwind {
 entry:
 ; ELF64-LABEL: ret12
 ; ELF64: blr
   ret double %a
 }
 
-define i8 @ret13() nounwind uwtable ssp {
+define i8 @ret13() nounwind {
 entry:
 ; ELF64-LABEL: ret13
 ; ELF64: li
@@ -131,7 +131,7 @@ entry:
   ret i8 15;
 }
 
-define i16 @ret14() nounwind uwtable ssp {
+define i16 @ret14() nounwind {
 entry:
 ; ELF64-LABEL: ret14
 ; ELF64: li
@@ -139,7 +139,7 @@ entry:
   ret i16 -225;
 }
 
-define i32 @ret15() nounwind uwtable ssp {
+define i32 @ret15() nounwind {
 entry:
 ; ELF64-LABEL: ret15
 ; ELF64: lis
@@ -148,7 +148,7 @@ entry:
   ret i32 278135;
 }
 
-define i64 @ret16() nounwind uwtable ssp {
+define i64 @ret16() nounwind {
 entry:
 ; ELF64-LABEL: ret16
 ; ELF64: li
@@ -159,7 +159,7 @@ entry:
   ret i64 27813515225;
 }
 
-define float @ret17() nounwind uwtable ssp {
+define float @ret17() nounwind {
 entry:
 ; ELF64-LABEL: ret17
 ; ELF64: addis
@@ -168,7 +168,7 @@ entry:
   ret float 2.5;
 }
 
-define double @ret18() nounwind uwtable ssp {
+define double @ret18() nounwind {
 entry:
 ; ELF64-LABEL: ret18
 ; ELF64: addis
@@ -176,3 +176,13 @@ entry:
 ; ELF64: blr
   ret double 2.5e-33;
 }
+
+define zeroext i32 @ret19() nounwind {
+entry:
+; ELF64-LABEL: ret19
+; ELF64: li
+; ELF64: oris
+; ELF64: ori
+; ELF64: blr
+  ret i32 -1
+}
diff --git a/test/CodeGen/PowerPC/fast-isel-shifter.ll b/test/CodeGen/PowerPC/fast-isel-shifter.ll
index c18f659dde13..04cb41920605 100644
--- a/test/CodeGen/PowerPC/fast-isel-shifter.ll
+++ b/test/CodeGen/PowerPC/fast-isel-shifter.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort=1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64
 
-define i32 @shl() nounwind ssp {
+define i32 @shl() nounwind {
 entry:
 ; ELF64: shl
 ; ELF64: slw
@@ -8,7 +8,7 @@ entry:
   ret i32 %shl
 }
 
-define i32 @shl_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @shl_reg(i32 %src1, i32 %src2) nounwind {
 entry:
 ; ELF64: shl_reg
 ; ELF64: slw
@@ -16,7 +16,7 @@ entry:
   ret i32 %shl
 }
 
-define i32 @lshr() nounwind ssp {
+define i32 @lshr() nounwind {
 entry:
 ; ELF64: lshr
 ; ELF64: srw
@@ -24,7 +24,7 @@ entry:
   ret i32 %lshr
 }
 
-define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @lshr_reg(i32 %src1, i32 %src2) nounwind {
 entry:
 ; ELF64: lshr_reg
 ; ELF64: srw
@@ -32,7 +32,7 @@ entry:
   ret i32 %lshr
 }
 
-define i32 @ashr() nounwind ssp {
+define i32 @ashr() nounwind {
 entry:
 ; ELF64: ashr
 ; ELF64: srawi
@@ -40,7 +40,7 @@ entry:
   ret i32 %ashr
 }
 
-define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind ssp {
+define i32 @ashr_reg(i32 %src1, i32 %src2) nounwind {
 entry:
 ; ELF64: ashr_reg
 ; ELF64: sraw
diff --git a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
index 96cf67c869f9..32f4c23c2de2 100644
--- a/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
+++ b/test/CodeGen/PowerPC/fastisel-gep-promote-before-add.ll
@@ -2,7 +2,7 @@
 ; sext(a) + sext(b) != sext(a + b)
 ; RUN: llc -mtriple=powerpc64-unknown-freebsd10.0 %s -O0 -o - | FileCheck %s
 
-define zeroext i8 @gep_promotion(i8* %ptr) nounwind uwtable ssp {
+define zeroext i8 @gep_promotion(i8* %ptr) nounwind {
 entry:
   %ptr.addr = alloca i8*, align 8
   %add = add i8 64, 64 ; 0x40 + 0x40
diff --git a/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll
new file mode 100644
index 000000000000..fd2ba4ec635e
--- /dev/null
+++ b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll
@@ -0,0 +1,89 @@
+; RUN: llc -enable-unsafe-fp-math < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+; CHECK-NOT: {{vmrg[hl]w.*(3[23456789]|[456][0-9])}}
+define void @__f0() {
+entry:
+  %0 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %1 = shufflevector <16 x float> %0, <16 x float> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %2 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %3 = shufflevector <16 x float> %2, <16 x float> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %4 = fmul <32 x float> %1, %3
+  %5 = load <4 x float>, <4 x float>* undef, align 128
+  %6 = load <4 x float>, <4 x float>* undef, align 128
+  %7 = shufflevector <4 x float> undef, <4 x float> %5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %8 = shufflevector <4 x float> undef, <4 x float> %6, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %9 = shufflevector <8 x float> %7, <8 x float> %8, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %10 = shufflevector <16 x float> undef, <16 x float> %9, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %11 = load <4 x float>, <4 x float>* null, align 128
+  %12 = load <4 x float>, <4 x float>* undef, align 128
+  %13 = shufflevector <4 x float> undef, <4 x float> %11, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %14 = shufflevector <4 x float> undef, <4 x float> %12, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %15 = shufflevector <8 x float> %13, <8 x float> %14, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %16 = shufflevector <16 x float> undef, <16 x float> %15, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %17 = fmul <32 x float> %10, %16
+  %18 = fsub <32 x float> %4, %17
+  %19 = shufflevector <32 x float> %18, <32 x float> undef, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  %20 = bitcast <64 x float> %19 to <32 x double>
+  %21 = shufflevector <32 x double> undef, <32 x double> %20, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  %22 = bitcast <64 x double> %21 to <128 x float>
+  %23 = shufflevector <128 x float> undef, <128 x float> %22, <256 x i32> <i32 0, i32 128, i32 1, i32 129, i32 2, i32 130, i32 3, i32 131, i32 4, i32 132, i32 5, i32 133, i32 6, i32 134, i32 7, i32 135, i32 8, i32 136, i32 9, i32 137, i32 10, i32 138, i32 11, i32 139, i32 12, i32 140, i32 13, i32 141, i32 14, i32 142, i32 15, i32 143, i32 16, i32 144, i32 17, i32 145, i32 18, i32 146, i32 19, i32 147, i32 20, i32 148, i32 21, i32 149, i32 22, i32 150, i32 23, i32 151, i32 24, i32 152, i32 25, i32 153, i32 26, i32 154, i32 27, i32 155, i32 28, i32 156, i32 29, i32 157, i32 30, i32 158, i32 31, i32 159, i32 32, i32 160, i32 33, i32 161, i32 34, i32 162, i32 35, i32 163, i32 36, i32 164, i32 37, i32 165, i32 38, i32 166, i32 39, i32 167, i32 40, i32 168, i32 41, i32 169, i32 42, i32 170, i32 43, i32 171, i32 44, i32 172, i32 45, i32 173, i32 46, i32 174, i32 47, i32 175, i32 48, i32 176, i32 49, i32 177, i32 50, i32 178, i32 51, i32 179, i32 52, i32 180, i32 53, i32 181, i32 54, i32 182, i32 55, i32 183, i32 56, i32 184, i32 57, i32 185, i32 58, i32 186, i32 59, i32 187, i32 60, i32 188, i32 61, i32 189, i32 62, i32 190, i32 63, i32 191, i32 64, i32 192, i32 65, i32 193, i32 66, i32 194, i32 67, i32 195, i32 68, i32 196, i32 69, i32 197, i32 70, i32 198, i32 71, i32 199, i32 72, i32 200, i32 73, i32 201, i32 74, i32 202, i32 75, i32 203, i32 76, i32 204, i32 77, i32 205, i32 78, i32 206, i32 79, i32 207, i32 80, i32 208, i32 81, i32 209, i32 82, i32 210, i32 83, i32 211, i32 84, i32 212, i32 85, i32 213, i32 86, i32 214, i32 87, i32 215, i32 88, i32 216, i32 89, i32 217, i32 90, i32 218, i32 91, i32 219, i32 92, i32 220, i32 93, i32 221, i32 94, i32 222, i32 95, i32 223, i32 96, i32 224, i32 97, i32 225, i32 98, i32 226, i32 99, i32 227, i32 100, i32 228, i32 101, i32 229, i32 102, i32 230, i32 103, i32 231, i32 104, i32 232, i32 105, i32 233, i32 106, i32 234, i32 107, i32 235, i32 108, i32 236, i32 109, i32 237, i32 110, i32 238, i32 111, i32 239, i32 112, i32 240, i32 113, i32 241, i32 114, i32 242, i32 115, i32 243, i32 116, i32 244, i32 117, i32 245, i32 118, i32 246, i32 119, i32 247, i32 120, i32 248, i32 121, i32 249, i32 122, i32 250, i32 123, i32 251, i32 124, i32 252, i32 125, i32 253, i32 126, i32 254, i32 127, i32 255>
+  %24 = shufflevector <256 x float> undef, <256 x float> %23, <512 x i32> <i32 0, i32 256, i32 1, i32 257, i32 2, i32 258, i32 3, i32 259, i32 4, i32 260, i32 5, i32 261, i32 6, i32 262, i32 7, i32 263, i32 8, i32 264, i32 9, i32 265, i32 10, i32 266, i32 11, i32 267, i32 12, i32 268, i32 13, i32 269, i32 14, i32 270, i32 15, i32 271, i32 16, i32 272, i32 17, i32 273, i32 18, i32 274, i32 19, i32 275, i32 20, i32 276, i32 21, i32 277, i32 22, i32 278, i32 23, i32 279, i32 24, i32 280, i32 25, i32 281, i32 26, i32 282, i32 27, i32 283, i32 28, i32 284, i32 29, i32 285, i32 30, i32 286, i32 31, i32 287, i32 32, i32 288, i32 33, i32 289, i32 34, i32 290, i32 35, i32 291, i32 36, i32 292, i32 37, i32 293, i32 38, i32 294, i32 39, i32 295, i32 40, i32 296, i32 41, i32 297, i32 42, i32 298, i32 43, i32 299, i32 44, i32 300, i32 45, i32 301, i32 46, i32 302, i32 47, i32 303, i32 48, i32 304, i32 49, i32 305, i32 50, i32 306, i32 51, i32 307, i32 52, i32 308, i32 53, i32 309, i32 54, i32 310, i32 55, i32 311, i32 56, i32 312, i32 57, i32 313, i32 58, i32 314, i32 59, i32 315, i32 60, i32 316, i32 61, i32 317, i32 62, i32 318, i32 63, i32 319, i32 64, i32 320, i32 65, i32 321, i32 66, i32 322, i32 67, i32 323, i32 68, i32 324, i32 69, i32 325, i32 70, i32 326, i32 71, i32 327, i32 72, i32 328, i32 73, i32 329, i32 74, i32 330, i32 75, i32 331, i32 76, i32 332, i32 77, i32 333, i32 78, i32 334, i32 79, i32 335, i32 80, i32 336, i32 81, i32 337, i32 82, i32 338, i32 83, i32 339, i32 84, i32 340, i32 85, i32 341, i32 86, i32 342, i32 87, i32 343, i32 88, i32 344, i32 89, i32 345, i32 90, i32 346, i32 91, i32 347, i32 92, i32 348, i32 93, i32 349, i32 94, i32 350, i32 95, i32 351, i32 96, i32 352, i32 97, i32 353, i32 98, i32 354, i32 99, i32 355, i32 100, i32 356, i32 101, i32 357, i32 102, i32 358, i32 103, i32 359, i32 104, i32 360, i32 105, i32 361, i32 106, i32 362, i32 107, i32 363, i32 108, i32 364, i32 109, i32 365, i32 110, i32 366, i32 111, i32 367, i32 112, i32 368, i32 113, i32 369, i32 114, i32 370, i32 115, i32 371, i32 116, i32 372, i32 117, i32 373, i32 118, i32 374, i32 119, i32 375, i32 120, i32 376, i32 121, i32 377, i32 122, i32 378, i32 123, i32 379, i32 124, i32 380, i32 125, i32 381, i32 126, i32 382, i32 127, i32 383, i32 128, i32 384, i32 129, i32 385, i32 130, i32 386, i32 131, i32 387, i32 132, i32 388, i32 133, i32 389, i32 134, i32 390, i32 135, i32 391, i32 136, i32 392, i32 137, i32 393, i32 138, i32 394, i32 139, i32 395, i32 140, i32 396, i32 141, i32 397, i32 142, i32 398, i32 143, i32 399, i32 144, i32 400, i32 145, i32 401, i32 146, i32 402, i32 147, i32 403, i32 148, i32 404, i32 149, i32 405, i32 150, i32 406, i32 151, i32 407, i32 152, i32 408, i32 153, i32 409, i32 154, i32 410, i32 155, i32 411, i32 156, i32 412, i32 157, i32 413, i32 158, i32 414, i32 159, i32 415, i32 160, i32 416, i32 161, i32 417, i32 162, i32 418, i32 163, i32 419, i32 164, i32 420, i32 165, i32 421, i32 166, i32 422, i32 167, i32 423, i32 168, i32 424, i32 169, i32 425, i32 170, i32 426, i32 171, i32 427, i32 172, i32 428, i32 173, i32 429, i32 174, i32 430, i32 175, i32 431, i32 176, i32 432, i32 177, i32 433, i32 178, i32 434, i32 179, i32 435, i32 180, i32 436, i32 181, i32 437, i32 182, i32 438, i32 183, i32 439, i32 184, i32 440, i32 185, i32 441, i32 186, i32 442, i32 187, i32 443, i32 188, i32 444, i32 189, i32 445, i32 190, i32 446, i32 191, i32 447, i32 192, i32 448, i32 193, i32 449, i32 194, i32 450, i32 195, i32 451, i32 196, i32 452, i32 197, i32 453, i32 198, i32 454, i32 199, i32 455, i32 200, i32 456, i32 201, i32 457, i32 202, i32 458, i32 203, i32 459, i32 204, i32 460, i32 205, i32 461, i32 206, i32 462, i32 207, i32 463, i32 208, i32 464, i32 209, i32 465, i32 210, i32 466, i32 211, i32 467, i32 212, i32 468, i32 213, i32 469, i32 214, i32 470, i32 215, i32 471, i32 216, i32 472, i32 217, i32 473, i32 218, i32 474, i32 219, i32 475, i32 220, i32 476, i32 221, i32 477, i32 222, i32 478, i32 223, i32 479, i32 224, i32 480, i32 225, i32 481, i32 226, i32 482, i32 227, i32 483, i32 228, i32 484, i32 229, i32 485, i32 230, i32 486, i32 231, i32 487, i32 232, i32 488, i32 233, i32 489, i32 234, i32 490, i32 235, i32 491, i32 236, i32 492, i32 237, i32 493, i32 238, i32 494, i32 239, i32 495, i32 240, i32 496, i32 241, i32 497, i32 242, i32 498, i32 243, i32 499, i32 244, i32 500, i32 245, i32 501, i32 246, i32 502, i32 247, i32 503, i32 248, i32 504, i32 249, i32 505, i32 250, i32 506, i32 251, i32 507, i32 252, i32 508, i32 253, i32 509, i32 254, i32 510, i32 255, i32 511>
+  %25 = shufflevector <512 x float> %24, <512 x float> undef, <1024 x i32> <i32 0, i32 512, i32 1, i32 513, i32 2, i32 514, i32 3, i32 515, i32 4, i32 516, i32 5, i32 517, i32 6, i32 518, i32 7, i32 519, i32 8, i32 520, i32 9, i32 521, i32 10, i32 522, i32 11, i32 523, i32 12, i32 524, i32 13, i32 525, i32 14, i32 526, i32 15, i32 527, i32 16, i32 528, i32 17, i32 529, i32 18, i32 530, i32 19, i32 531, i32 20, i32 532, i32 21, i32 533, i32 22, i32 534, i32 23, i32 535, i32 24, i32 536, i32 25, i32 537, i32 26, i32 538, i32 27, i32 539, i32 28, i32 540, i32 29, i32 541, i32 30, i32 542, i32 31, i32 543, i32 32, i32 544, i32 33, i32 545, i32 34, i32 546, i32 35, i32 547, i32 36, i32 548, i32 37, i32 549, i32 38, i32 550, i32 39, i32 551, i32 40, i32 552, i32 41, i32 553, i32 42, i32 554, i32 43, i32 555, i32 44, i32 556, i32 45, i32 557, i32 46, i32 558, i32 47, i32 559, i32 48, i32 560, i32 49, i32 561, i32 50, i32 562, i32 51, i32 563, i32 52, i32 564, i32 53, i32 565, i32 54, i32 566, i32 55, i32 567, i32 56, i32 568, i32 57, i32 569, i32 58, i32 570, i32 59, i32 571, i32 60, i32 572, i32 61, i32 573, i32 62, i32 574, i32 63, i32 575, i32 64, i32 576, i32 65, i32 577, i32 66, i32 578, i32 67, i32 579, i32 68, i32 580, i32 69, i32 581, i32 70, i32 582, i32 71, i32 583, i32 72, i32 584, i32 73, i32 585, i32 74, i32 586, i32 75, i32 587, i32 76, i32 588, i32 77, i32 589, i32 78, i32 590, i32 79, i32 591, i32 80, i32 592, i32 81, i32 593, i32 82, i32 594, i32 83, i32 595, i32 84, i32 596, i32 85, i32 597, i32 86, i32 598, i32 87, i32 599, i32 88, i32 600, i32 89, i32 601, i32 90, i32 602, i32 91, i32 603, i32 92, i32 604, i32 93, i32 605, i32 94, i32 606, i32 95, i32 607, i32 96, i32 608, i32 97, i32 609, i32 98, i32 610, i32 99, i32 611, i32 100, i32 612, i32 101, i32 613, i32 102, i32 614, i32 103, i32 615, i32 104, i32 616, i32 105, i32 617, i32 106, i32 618, i32 107, i32 619, i32 108, i32 620, i32 109, i32 621, i32 110, i32 622, i32 111, i32 623, i32 112, i32 624, i32 113, i32 625, i32 114, i32 626, i32 115, i32 627, i32 116, i32 628, i32 117, i32 629, i32 118, i32 630, i32 119, i32 631, i32 120, i32 632, i32 121, i32 633, i32 122, i32 634, i32 123, i32 635, i32 124, i32 636, i32 125, i32 637, i32 126, i32 638, i32 127, i32 639, i32 128, i32 640, i32 129, i32 641, i32 130, i32 642, i32 131, i32 643, i32 132, i32 644, i32 133, i32 645, i32 134, i32 646, i32 135, i32 647, i32 136, i32 648, i32 137, i32 649, i32 138, i32 650, i32 139, i32 651, i32 140, i32 652, i32 141, i32 653, i32 142, i32 654, i32 143, i32 655, i32 144, i32 656, i32 145, i32 657, i32 146, i32 658, i32 147, i32 659, i32 148, i32 660, i32 149, i32 661, i32 150, i32 662, i32 151, i32 663, i32 152, i32 664, i32 153, i32 665, i32 154, i32 666, i32 155, i32 667, i32 156, i32 668, i32 157, i32 669, i32 158, i32 670, i32 159, i32 671, i32 160, i32 672, i32 161, i32 673, i32 162, i32 674, i32 163, i32 675, i32 164, i32 676, i32 165, i32 677, i32 166, i32 678, i32 167, i32 679, i32 168, i32 680, i32 169, i32 681, i32 170, i32 682, i32 171, i32 683, i32 172, i32 684, i32 173, i32 685, i32 174, i32 686, i32 175, i32 687, i32 176, i32 688, i32 177, i32 689, i32 178, i32 690, i32 179, i32 691, i32 180, i32 692, i32 181, i32 693, i32 182, i32 694, i32 183, i32 695, i32 184, i32 696, i32 185, i32 697, i32 186, i32 698, i32 187, i32 699, i32 188, i32 700, i32 189, i32 701, i32 190, i32 702, i32 191, i32 703, i32 192, i32 704, i32 193, i32 705, i32 194, i32 706, i32 195, i32 707, i32 196, i32 708, i32 197, i32 709, i32 198, i32 710, i32 199, i32 711, i32 200, i32 712, i32 201, i32 713, i32 202, i32 714, i32 203, i32 715, i32 204, i32 716, i32 205, i32 717, i32 206, i32 718, i32 207, i32 719, i32 208, i32 720, i32 209, i32 721, i32 210, i32 722, i32 211, i32 723, i32 212, i32 724, i32 213, i32 725, i32 214, i32 726, i32 215, i32 727, i32 216, i32 728, i32 217, i32 729, i32 218, i32 730, i32 219, i32 731, i32 220, i32 732, i32 221, i32 733, i32 222, i32 734, i32 223, i32 735, i32 224, i32 736, i32 225, i32 737, i32 226, i32 738, i32 227, i32 739, i32 228, i32 740, i32 229, i32 741, i32 230, i32 742, i32 231, i32 743, i32 232, i32 744, i32 233, i32 745, i32 234, i32 746, i32 235, i32 747, i32 236, i32 748, i32 237, i32 749, i32 238, i32 750, i32 239, i32 751, i32 240, i32 752, i32 241, i32 753, i32 242, i32 754, i32 243, i32 755, i32 244, i32 756, i32 245, i32 757, i32 246, i32 758, i32 247, i32 759, i32 248, i32 760, i32 249, i32 761, i32 250, i32 762, i32 251, i32 763, i32 252, i32 764, i32 253, i32 765, i32 254, i32 766, i32 255, i32 767, i32 256, i32 768, i32 257, i32 769, i32 258, i32 770, i32 259, i32 771, i32 260, i32 772, i32 261, i32 773, i32 262, i32 774, i32 263, i32 775, i32 264, i32 776, i32 265, i32 777, i32 266, i32 778, i32 267, i32 779, i32 268, i32 780, i32 269, i32 781, i32 270, i32 782, i32 271, i32 783, i32 272, i32 784, i32 273, i32 785, i32 274, i32 786, i32 275, i32 787, i32 276, i32 788, i32 277, i32 789, i32 278, i32 790, i32 279, i32 791, i32 280, i32 792, i32 281, i32 793, i32 282, i32 794, i32 283, i32 795, i32 284, i32 796, i32 285, i32 797, i32 286, i32 798, i32 287, i32 799, i32 288, i32 800, i32 289, i32 801, i32 290, i32 802, i32 291, i32 803, i32 292, i32 804, i32 293, i32 805, i32 294, i32 806, i32 295, i32 807, i32 296, i32 808, i32 297, i32 809, i32 298, i32 810, i32 299, i32 811, i32 300, i32 812, i32 301, i32 813, i32 302, i32 814, i32 303, i32 815, i32 304, i32 816, i32 305, i32 817, i32 306, i32 818, i32 307, i32 819, i32 308, i32 820, i32 309, i32 821, i32 310, i32 822, i32 311, i32 823, i32 312, i32 824, i32 313, i32 825, i32 314, i32 826, i32 315, i32 827, i32 316, i32 828, i32 317, i32 829, i32 318, i32 830, i32 319, i32 831, i32 320, i32 832, i32 321, i32 833, i32 322, i32 834, i32 323, i32 835, i32 324, i32 836, i32 325, i32 837, i32 326, i32 838, i32 327, i32 839, i32 328, i32 840, i32 329, i32 841, i32 330, i32 842, i32 331, i32 843, i32 332, i32 844, i32 333, i32 845, i32 334, i32 846, i32 335, i32 847, i32 336, i32 848, i32 337, i32 849, i32 338, i32 850, i32 339, i32 851, i32 340, i32 852, i32 341, i32 853, i32 342, i32 854, i32 343, i32 855, i32 344, i32 856, i32 345, i32 857, i32 346, i32 858, i32 347, i32 859, i32 348, i32 860, i32 349, i32 861, i32 350, i32 862, i32 351, i32 863, i32 352, i32 864, i32 353, i32 865, i32 354, i32 866, i32 355, i32 867, i32 356, i32 868, i32 357, i32 869, i32 358, i32 870, i32 359, i32 871, i32 360, i32 872, i32 361, i32 873, i32 362, i32 874, i32 363, i32 875, i32 364, i32 876, i32 365, i32 877, i32 366, i32 878, i32 367, i32 879, i32 368, i32 880, i32 369, i32 881, i32 370, i32 882, i32 371, i32 883, i32 372, i32 884, i32 373, i32 885, i32 374, i32 886, i32 375, i32 887, i32 376, i32 888, i32 377, i32 889, i32 378, i32 890, i32 379, i32 891, i32 380, i32 892, i32 381, i32 893, i32 382, i32 894, i32 383, i32 895, i32 384, i32 896, i32 385, i32 897, i32 386, i32 898, i32 387, i32 899, i32 388, i32 900, i32 389, i32 901, i32 390, i32 902, i32 391, i32 903, i32 392, i32 904, i32 393, i32 905, i32 394, i32 906, i32 395, i32 907, i32 396, i32 908, i32 397, i32 909, i32 398, i32 910, i32 399, i32 911, i32 400, i32 912, i32 401, i32 913, i32 402, i32 914, i32 403, i32 915, i32 404, i32 916, i32 405, i32 917, i32 406, i32 918, i32 407, i32 919, i32 408, i32 920, i32 409, i32 921, i32 410, i32 922, i32 411, i32 923, i32 412, i32 924, i32 413, i32 925, i32 414, i32 926, i32 415, i32 927, i32 416, i32 928, i32 417, i32 929, i32 418, i32 930, i32 419, i32 931, i32 420, i32 932, i32 421, i32 933, i32 422, i32 934, i32 423, i32 935, i32 424, i32 936, i32 425, i32 937, i32 426, i32 938, i32 427, i32 939, i32 428, i32 940, i32 429, i32 941, i32 430, i32 942, i32 431, i32 943, i32 432, i32 944, i32 433, i32 945, i32 434, i32 946, i32 435, i32 947, i32 436, i32 948, i32 437, i32 949, i32 438, i32 950, i32 439, i32 951, i32 440, i32 952, i32 441, i32 953, i32 442, i32 954, i32 443, i32 955, i32 444, i32 956, i32 445, i32 957, i32 446, i32 958, i32 447, i32 959, i32 448, i32 960, i32 449, i32 961, i32 450, i32 962, i32 451, i32 963, i32 452, i32 964, i32 453, i32 965, i32 454, i32 966, i32 455, i32 967, i32 456, i32 968, i32 457, i32 969, i32 458, i32 970, i32 459, i32 971, i32 460, i32 972, i32 461, i32 973, i32 462, i32 974, i32 463, i32 975, i32 464, i32 976, i32 465, i32 977, i32 466, i32 978, i32 467, i32 979, i32 468, i32 980, i32 469, i32 981, i32 470, i32 982, i32 471, i32 983, i32 472, i32 984, i32 473, i32 985, i32 474, i32 986, i32 475, i32 987, i32 476, i32 988, i32 477, i32 989, i32 478, i32 990, i32 479, i32 991, i32 480, i32 992, i32 481, i32 993, i32 482, i32 994, i32 483, i32 995, i32 484, i32 996, i32 485, i32 997, i32 486, i32 998, i32 487, i32 999, i32 488, i32 1000, i32 489, i32 1001, i32 490, i32 1002, i32 491, i32 1003, i32 492, i32 1004, i32 493, i32 1005, i32 494, i32 1006, i32 495, i32 1007, i32 496, i32 1008, i32 497, i32 1009, i32 498, i32 1010, i32 499, i32 1011, i32 500, i32 1012, i32 501, i32 1013, i32 502, i32 1014, i32 503, i32 1015, i32 504, i32 1016, i32 505, i32 1017, i32 506, i32 1018, i32 507, i32 1019, i32 508, i32 1020, i32 509, i32 1021, i32 510, i32 1022, i32 511, i32 1023>
+  %26 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  store <4 x float> %26, <4 x float>* undef, align 128
+  %27 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 56, i32 57, i32 58, i32 59>
+  store <4 x float> %27, <4 x float>* undef, align 128
+  %28 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 164, i32 165, i32 166, i32 167>
+  store <4 x float> %28, <4 x float>* undef, align 128
+  %29 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 168, i32 169, i32 170, i32 171>
+  store <4 x float> %29, <4 x float>* undef, align 128
+  %30 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 172, i32 173, i32 174, i32 175>
+  store <4 x float> %30, <4 x float>* undef, align 128
+  %31 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 176, i32 177, i32 178, i32 179>
+  store <4 x float> %31, <4 x float>* undef, align 128
+  %32 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 284, i32 285, i32 286, i32 287>
+  store <4 x float> %32, <4 x float>* undef, align 128
+  %33 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 328, i32 329, i32 330, i32 331>
+  store <4 x float> %33, <4 x float>* undef, align 128
+  %34 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 332, i32 333, i32 334, i32 335>
+  store <4 x float> %34, <4 x float>* undef, align 128
+  %35 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 524, i32 525, i32 526, i32 527>
+  store <4 x float> %35, <4 x float>* undef, align 128
+  %36 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 528, i32 529, i32 530, i32 531>
+  store <4 x float> %36, <4 x float>* undef, align 128
+  %37 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 648, i32 649, i32 650, i32 651>
+  store <4 x float> %37, <4 x float>* undef, align 128
+  %38 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 652, i32 653, i32 654, i32 655>
+  store <4 x float> %38, <4 x float>* undef, align 128
+  %39 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 656, i32 657, i32 658, i32 659>
+  store <4 x float> %39, <4 x float>* undef, align 128
+  %40 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 732, i32 733, i32 734, i32 735>
+  store <4 x float> %40, <4 x float>* undef, align 128
+  %41 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 736, i32 737, i32 738, i32 739>
+  store <4 x float> %41, <4 x float>* undef, align 128
+  %42 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 740, i32 741, i32 742, i32 743>
+  store <4 x float> %42, <4 x float>* undef, align 128
+  %43 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 872, i32 873, i32 874, i32 875>
+  store <4 x float> %43, <4 x float>* undef, align 128
+  %44 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 968, i32 969, i32 970, i32 971>
+  store <4 x float> %44, <4 x float>* undef, align 128
+  %45 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 1016, i32 1017, i32 1018, i32 1019>
+  store <4 x float> %45, <4 x float>* undef, align 128
+  %46 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> <i32 1020, i32 1021, i32 1022, i32 1023>
+  store <4 x float> %46, <4 x float>* undef, align 128
+  %47 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
+  store <4 x float> %47, <4 x float>* undef, align 128
+  %48 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  store <4 x float> %48, <4 x float>* undef, align 128
+  %49 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 16, i32 17, i32 18, i32 19>
+  store <4 x float> %49, <4 x float>* undef, align 128
+  %50 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 20, i32 21, i32 22, i32 23>
+  store <4 x float> %50, <4 x float>* undef, align 128
+  %51 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 148, i32 149, i32 150, i32 151>
+  store <4 x float> %51, <4 x float>* undef, align 128
+  %52 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> <i32 632, i32 633, i32 634, i32 635>
+  store <4 x float> %52, <4 x float>* undef, align 128
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
index 1d9b64823140..4868a18a95a0 100644
--- a/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
+++ b/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
@@ -24,8 +24,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z6testfcc
 ; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG01]]
+; CHECK: xscvuxdsp 1, [[MOVEREG01]]
 }
 
 ; Function Attrs: nounwind
@@ -77,8 +76,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z7testfuch
 ; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG03]]
+; CHECK: xscvuxdsp 1, [[MOVEREG03]]
 }
 
 ; Function Attrs: nounwind
@@ -130,8 +128,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z6testfss
 ; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG05]]
+; CHECK: xscvsxdsp 1, [[MOVEREG05]]
 }
 
 ; Function Attrs: nounwind
@@ -183,8 +180,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z7testfust
 ; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG07]]
+; CHECK: xscvuxdsp 1, [[MOVEREG07]]
 }
 
 ; Function Attrs: nounwind
@@ -236,8 +232,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z6testfii
 ; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG09]]
+; CHECK: xscvsxdsp 1, [[MOVEREG09]]
 }
 
 ; Function Attrs: nounwind
@@ -289,8 +284,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z7testfuij
 ; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG11]]
+; CHECK: xscvuxdsp 1, [[MOVEREG11]]
 }
 
 ; Function Attrs: nounwind
@@ -342,8 +336,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL:@_Z7testfllx
 ; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3
-; FIXME: Once we have XSCVSXDSP implemented, this will change
-; CHECK: fcfids 1, [[MOVEREG13]]
+; CHECK: xscvsxdsp 1, [[MOVEREG13]]
 }
 
 ; Function Attrs: nounwind
@@ -395,8 +388,7 @@ entry:
   ret float %conv
 ; CHECK-LABEL: @_Z8testfully
 ; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3
-; FIXME: Once we have XSCVUXDSP implemented, this will change
-; CHECK: fcfidus 1, [[MOVEREG15]]
+; CHECK: xscvuxdsp 1, [[MOVEREG15]]
 }
 
 ; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
new file mode 100644
index 000000000000..f5b0a3a59bf3
--- /dev/null
+++ b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll
@@ -0,0 +1,137 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64-P8
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64-P8
+; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64
+; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32
+
+define i128 @test_abs(ppc_fp128 %x) nounwind  {
+entry:
+; PPC64-LABEL: test_abs:
+; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
+; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: li [[MASK_REG:[0-9]+]], 1
+; PPC64: sldi [[MASK_REG]], [[MASK_REG]], 63
+; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
+; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
+; PPC64: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]]
+; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_abs:
+; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2
+; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[MASK_REG:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[SHIFT_REG:[0-9]+]], [[MASK_REG]], 63
+; PPC64-P8: and [[FLIP_BIT:[0-9]+]], [[HI]], [[SHIFT_REG]]
+; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64-P8: blr
+
+; PPC32-DAG: stfd 1, 24(1)
+; PPC32-DAG: stfd 2, 16(1)
+; PPC32: nop
+; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
+; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0
+; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]]
+; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]]
+; PPC32: blr
+	%0 = tail call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %x)
+	%1 = bitcast ppc_fp128 %0 to i128
+	ret i128 %1
+}
+
+define i128 @test_neg(ppc_fp128 %x) nounwind  {
+entry:
+; PPC64-LABEL: test_neg:
+; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]]
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]]
+; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]]
+; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1
+; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63
+; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]])
+; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]])
+; PPC64-NOT: BARRIER
+; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_neg:
+; PPC64-P8-DAG: mfvsrd [[LO:[0-9]+]], 2
+; PPC64-P8-DAG: mfvsrd [[HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[IMM1:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[FLIP_BIT]], [[IMM1]], 63
+; PPC64-P8-NOT: BARRIER
+; PPC64-P8-DAG: xor 3, [[HI]], [[FLIP_BIT]]
+; PPC64-P8-DAG: xor 4, [[LO]], [[FLIP_BIT]]
+; PPC64-P8: blr
+
+; PPC32-DAG: stfd 1, 24(1)
+; PPC32-DAG: stfd 2, 16(1)
+; PPC32: nop
+; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1)
+; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1)
+; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1)
+; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1)
+; PPC32-NOT: BARRIER
+; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768
+; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768
+; PPC32: blr
+	%0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x
+	%1 = bitcast ppc_fp128 %0 to i128
+	ret i128 %1
+}
+
+define i128 @test_copysign(ppc_fp128 %x) nounwind  {
+entry:
+; PPC64-LABEL: test_copysign:
+; PPC64-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]]
+; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]]
+; PPC64-DAG: li [[SIGN:[0-9]+]], 1
+; PPC64-DAG: sldi [[SIGN]], [[SIGN]], 63
+; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399
+; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48
+; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019
+; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52
+; PPC64-NOT: BARRIER
+; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1)
+; PPC64-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]]
+; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]]
+; PPC64-DAG: xor 4, [[SIGN]], [[CST_LO]]
+; PPC64: blr
+
+; PPC64-P8-LABEL: test_copysign:
+; PPC64-P8-DAG: mfvsrd [[X_HI:[0-9]+]], 1
+; PPC64-P8-DAG: li [[SIGN:[0-9]+]], 1
+; PPC64-P8-DAG: sldi [[SIGN]], [[SIGN]], 63
+; PPC64-P8-DAG: li [[HI_TMP:[0-9]+]], 16399
+; PPC64-P8-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48
+; PPC64-P8-DAG: li [[LO_TMP:[0-9]+]], 3019
+; PPC64-P8-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52
+; PPC64-P8-NOT: BARRIER
+; PPC64-P8-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]]
+; PPC64-P8-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]]
+; PPC64-P8-DAG: xor 4, [[NEW_HI_TMP]], [[CST_LO]]
+; PPC64-P8: blr
+
+; PPC32: stfd 1, [[STACK:[0-9]+]](1)
+; PPC32: nop
+; PPC32: lwz [[HI:[0-9]+]], [[STACK]](1)
+; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0, 0
+; PPC32-NOT: BARRIER
+; PPC32-DAG: oris {{[0-9]+}}, [[FLIP_BIT]], 16399
+; PPC32-DAG: xoris {{[0-9]+}}, [[FLIP_BIT]], 48304
+; PPC32: blr
+	%0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 0xMBCB0000000000000400F000000000000, ppc_fp128 %x)
+	%1 = bitcast ppc_fp128 %0 to i128
+	ret i128 %1
+}
+
+declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128)
+declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)
diff --git a/test/CodeGen/PowerPC/load-shift-combine.ll b/test/CodeGen/PowerPC/load-shift-combine.ll
index 8d1f8146db95..3b4685725216 100644
--- a/test/CodeGen/PowerPC/load-shift-combine.ll
+++ b/test/CodeGen/PowerPC/load-shift-combine.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 ; This used to cause a crash.  A standard load is converted to a pre-increment
 ; load.  Later the pre-increment load is combined with a subsequent SRL to
diff --git a/test/CodeGen/PowerPC/long-compare.ll b/test/CodeGen/PowerPC/long-compare.ll
index e53356a5ddf2..d596068cbb71 100644
--- a/test/CodeGen/PowerPC/long-compare.ll
+++ b/test/CodeGen/PowerPC/long-compare.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ppc32 | grep cntlz
+; RUN: llc < %s -march=ppc32 | grep cntlzw
 ; RUN: llc < %s -march=ppc32 | not grep xori 
 ; RUN: llc < %s -march=ppc32 | not grep "li "
 ; RUN: llc < %s -march=ppc32 | not grep "mr "
diff --git a/test/CodeGen/PowerPC/machine-combiner.ll b/test/CodeGen/PowerPC/machine-combiner.ll
new file mode 100644
index 000000000000..93fb2020d530
--- /dev/null
+++ b/test/CodeGen/PowerPC/machine-combiner.ll
@@ -0,0 +1,188 @@
+; RUN: llc -O3 -mcpu=pwr7 -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-PWR
+; RUN: llc -O3 -mcpu=a2q -enable-unsafe-fp-math < %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-QPX
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
+; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
+; CHECK:       fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
+; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
+; CHECK:       fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %t1, %x3
+  ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
+; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
+; CHECK:       fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK:       # BB#0:
+; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
+; CHECK:       fadds [[REG1:[0-9]+]], 3, 4
+; CHECK:       fadds 1, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK:       # BB#0:
+; CHECK:       fadds [[REG12:[0-9]+]], 5, 6
+; CHECK:       fadds [[REG0:[0-9]+]], 1, 2
+; CHECK:       fadds [[REG11:[0-9]+]], 3, 4
+; CHECK:       fadds [[REG13:[0-9]+]], [[REG12]], 7
+; CHECK:       fadds [[REG1:[0-9]+]], [[REG0]], [[REG11]]
+; CHECK:       fadds [[REG2:[0-9]+]], [[REG1]], [[REG13]]
+; CHECK:       fadds 1, [[REG2]], 8
+; CHECK-NEXT:    blr
+
+  %t0 = fadd float %x0, %x1
+  %t1 = fadd float %t0, %x2
+  %t2 = fadd float %t1, %x3
+  %t3 = fadd float %t2, %x4
+  %t4 = fadd float %t3, %x5
+  %t5 = fadd float %t4, %x6
+  %t6 = fadd float %t5, %x7
+  ret float %t6
+}
+
+; Verify that we reassociate vector instructions too.
+
+define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK:       # BB#0:
+; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd <4 x float> %x0, %x1
+  %t1 = fadd <4 x float> %t0, %x2
+  %t2 = fadd <4 x float> %t1, %x3
+  ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK:       # BB#0:
+; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd <4 x float> %x0, %x1
+  %t1 = fadd <4 x float> %x2, %t0
+  %t2 = fadd <4 x float> %t1, %x3
+  ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK:       # BB#0:
+; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd <4 x float> %x0, %x1
+  %t1 = fadd <4 x float> %t0, %x2
+  %t2 = fadd <4 x float> %x3, %t1
+  ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK:       # BB#0:
+; CHECK-QPX:       qvfadds [[REG0:[0-9]+]], 1, 2
+; CHECK-QPX:       qvfadds [[REG1:[0-9]+]], 3, 4
+; CHECK-QPX:       qvfadds 1, [[REG0]], [[REG1]]
+; CHECK-PWR:       xvaddsp [[REG0:[0-9]+]], 34, 35
+; CHECK-PWR:       xvaddsp [[REG1:[0-9]+]], 36, 37
+; CHECK-PWR:       xvaddsp 34, [[REG0]], [[REG1]]
+; CHECK-NEXT:  blr
+
+  %t0 = fadd <4 x float> %x0, %x1
+  %t1 = fadd <4 x float> %x2, %t0
+  %t2 = fadd <4 x float> %x3, %t1
+  ret <4 x float> %t2
+}
+
+define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
+  %t0 = fdiv float %x0, %x1
+  %t1 = fadd float %x2, %t0
+  %t2 = fadd float %x3, %t1
+  ret float %t2
+}
+
+define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
+  %t0 = fdiv float %x0, %x1
+  %t1 = fmul float %x2, %t0
+  %t2 = fmul float %x3, %t1
+  ret float %t2
+}
+
+define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
+  %t0 = fdiv double %x0, %x1
+  %t1 = fadd double %x2, %t0
+  %t2 = fadd double %x3, %t1
+  ret double %t2
+}
+
+define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
+  %t0 = fdiv double %x0, %x1
+  %t1 = fmul double %x2, %t0
+  %t2 = fmul double %x3, %t1
+  ret double %t2
+}
+
+
diff --git a/test/CodeGen/PowerPC/mc-instrlat.ll b/test/CodeGen/PowerPC/mc-instrlat.ll
new file mode 100644
index 000000000000..0bbac14f6d3e
--- /dev/null
+++ b/test/CodeGen/PowerPC/mc-instrlat.ll
@@ -0,0 +1,25 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double %eps) #0 {
+entry:
+  %0 = fmul fast double %eps, %eps
+  %div = fmul fast double %0, 0x3FD5555555555555
+  tail call void @bar(double %div) #2
+  unreachable
+
+; This used to crash because we'd call a function to compute instruction
+; latency not supported with itineraries.
+; CHECK-LABEL: @foo
+; CHECK: bar
+
+}
+
+declare void @bar(double) #1
+
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #1 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="ppc64" "target-features"="+altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-qpx,-vsx" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/mcm-13.ll b/test/CodeGen/PowerPC/mcm-13.ll
new file mode 100644
index 000000000000..ba371c5026c1
--- /dev/null
+++ b/test/CodeGen/PowerPC/mcm-13.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mcpu=pwr7 -O0 -code-model=medium <%s | FileCheck %s
+; RUN: llc -mcpu=pwr7 -O0 -code-model=large <%s | FileCheck %s
+
+; Test correct code generation for medium and large code model
+; for loading and storing a weak variable
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@wi = weak global i32 0, align 4
+
+define signext i32 @test_weak() nounwind {
+entry:
+  %0 = load i32, i32* @wi, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* @wi, align 4
+  ret i32 %0
+}
+
+; CHECK-LABEL: test_weak:
+; CHECK: addis [[REG1:[0-9]+]], 2, .LC[[TOCNUM:[0-9]+]]@toc@ha
+; CHECK: ld [[REG2:[0-9]+]], .LC[[TOCNUM]]@toc@l([[REG1]])
+; CHECK: lwz {{[0-9]+}}, 0([[REG2]])
+; CHECK: stw {{[0-9]+}}, 0([[REG2]])
+; CHECK: .section .toc
+; CHECK: .LC[[TOCNUM]]:
+; CHECK: .tc {{[a-z0-9A-Z_.]+}}[TC],{{[a-z0-9A-Z_.]+}}
diff --git a/test/CodeGen/PowerPC/memcpy-vec.ll b/test/CodeGen/PowerPC/memcpy-vec.ll
index 70b8ea931a27..29baef55ce17 100644
--- a/test/CodeGen/PowerPC/memcpy-vec.ll
+++ b/test/CodeGen/PowerPC/memcpy-vec.ll
@@ -14,8 +14,11 @@ entry:
 
 ; PWR7-LABEL: @foo1
 ; PWR7-NOT: bl memcpy
-; PWR7: ld {{[0-9]+}}, {{[0-9]+}}(4)
-; PWR7: std {{[0-9]+}}, {{[0-9]+}}(3)
+; PWR7-DAG: li [[OFFSET:[0-9]+]], 16
+; PWR7-DAG: lxvd2x [[TMP0:[0-9]+]], 4, [[OFFSET]]
+; PWR7-DAG: stxvd2x [[TMP0]], 0, 3
+; PWR7-DAG: lxvd2x [[TMP1:[0-9]+]], 0, 4
+; PWR7-DAG: stxvd2x [[TMP1]], 0, 3
 ; PWR7: blr
 
 ; PWR8-LABEL: @foo1
diff --git a/test/CodeGen/PowerPC/merge-st-chain-op.ll b/test/CodeGen/PowerPC/merge-st-chain-op.ll
new file mode 100644
index 000000000000..bfb911c01157
--- /dev/null
+++ b/test/CodeGen/PowerPC/merge-st-chain-op.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@_ZNSs4_Rep20_S_empty_rep_storageE = external global [0 x i64], align 8
+
+; Function Attrs: nounwind
+define void @_ZN5clang7tooling15RefactoringTool10runAndSaveEPNS0_21FrontendActionFactoryE() #0 align 2 {
+entry:
+  br i1 undef, label %_ZN4llvm18IntrusiveRefCntPtrIN5clang13DiagnosticIDsEEC2EPS2_.exit, label %return
+
+; CHECK: @_ZN5clang7tooling15RefactoringTool10runAndSaveEPNS0_21FrontendActionFactoryE
+
+_ZN4llvm18IntrusiveRefCntPtrIN5clang13DiagnosticIDsEEC2EPS2_.exit: ; preds = %entry
+  %call2 = call noalias i8* @_Znwm() #3
+  %ref_cnt.i.i = bitcast i8* %call2 to i32*
+  store <2 x i8*> <i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*), i8* bitcast (i64* getelementptr inbounds ([0 x i64], [0 x i64]* @_ZNSs4_Rep20_S_empty_rep_storageE, i64 0, i64 3) to i8*)>, <2 x i8*>* undef, align 8
+  %IgnoreWarnings.i = getelementptr inbounds i8, i8* %call2, i64 4
+  %0 = bitcast i8* %IgnoreWarnings.i to i32*
+  call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 48, i32 8, i1 false) #4
+  store i32 251658240, i32* %0, align 4
+  store i256 37662610426935100959726589394453639584271499769928088551424, i256* null, align 8
+  store i32 1, i32* %ref_cnt.i.i, align 4
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Function Attrs: nobuiltin
+declare noalias i8* @_Znwm() #1
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #2
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nobuiltin "target-cpu"="pwr7" }
+attributes #2 = { nounwind argmemonly }
+attributes #3 = { builtin nounwind }
+attributes #4 = { nounwind }
+
diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
new file mode 100644
index 000000000000..8da8df58a85c
--- /dev/null
+++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -0,0 +1,1476 @@
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE
+
+; The build[csilf] functions simply test the scalar_to_vector handling with
+; direct moves. This corresponds to the "insertelement" instruction. Subsequent
+; to this, there will be a splat corresponding to the shufflevector.
+
+@d = common global double 0.000000e+00, align 8
+
+; Function Attrs: nounwind
+define <16 x i8> @buildc(i8 zeroext %a) {
+entry:
+  %a.addr = alloca i8, align 1
+  store i8 %a, i8* %a.addr, align 1
+  %0 = load i8, i8* %a.addr, align 1
+  %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0
+  %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 56
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <8 x i16> @builds(i16 zeroext %a) {
+entry:
+  %a.addr = alloca i16, align 2
+  store i16 %a, i16* %a.addr, align 2
+  %0 = load i16, i16* %a.addr, align 2
+  %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0
+  %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 48
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <4 x i32> @buildi(i32 zeroext %a) {
+entry:
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  %0 = load i32, i32* %a.addr, align 4
+  %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
+  %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %splat.splat
+; CHECK: sldi [[REG1:[0-9]+]], 3, 32
+; CHECK: mtvsrd {{[0-9]+}}, [[REG1]]
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define <2 x i64> @buildl(i64 %a) {
+entry:
+  %a.addr = alloca i64, align 8
+  store i64 %a, i64* %a.addr, align 8
+  %0 = load i64, i64* %a.addr, align 8
+  %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
+  %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %splat.splat
+; CHECK: mtvsrd {{[0-9]+}}, 3
+; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3
+; CHECK-LE: xxspltd [[REG1]], [[REG1]], 0
+}
+
+; Function Attrs: nounwind
+define <4 x float> @buildf(float %a) {
+entry:
+  %a.addr = alloca float, align 4
+  store float %a, float* %a.addr, align 4
+  %0 = load float, float* %a.addr, align 4
+  %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  ret <4 x float> %splat.splat
+; CHECK: xscvdpspn {{[0-9]+}}, 1
+; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1
+; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1
+}
+
+; The optimization to remove stack operations from PPCDAGToDAGISel::Select
+; should still trigger for v2f64, producing an lxvdsx.
+; Function Attrs: nounwind
+define <2 x double> @buildd() #0 {
+entry:
+  %0 = load double, double* @d, align 8
+  %splat.splatinsert = insertelement <2 x double> undef, double %0, i32 0
+  %splat.splat = shufflevector <2 x double> %splat.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer
+  ret <2 x double> %splat.splat
+; CHECK: ld [[REG1:[0-9]+]], .LC0@toc@l
+; CHECK: lxvdsx 34, 0, [[REG1]]
+; CHECK-LE: ld [[REG1:[0-9]+]], .LC0@toc@l
+; CHECK-LE: lxvdsx 34, 0, [[REG1]]
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc0(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 0
+  ret i8 %vecext
+; CHECK-LABEL: @getsc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc1(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 1
+  ret i8 %vecext
+; CHECK-LABEL: @getsc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc2(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 2
+  ret i8 %vecext
+; CHECK-LABEL: @getsc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc3(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 3
+  ret i8 %vecext
+; CHECK-LABEL: @getsc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc4(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 4
+  ret i8 %vecext
+; CHECK-LABEL: @getsc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc5(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 5
+  ret i8 %vecext
+; CHECK-LABEL: @getsc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc6(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 6
+  ret i8 %vecext
+; CHECK-LABEL: @getsc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc7(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 7
+  ret i8 %vecext
+; CHECK-LABEL: @getsc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc8(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 8
+  ret i8 %vecext
+; CHECK-LABEL: @getsc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc9(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 9
+  ret i8 %vecext
+; CHECK-LABEL: @getsc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc10(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 10
+  ret i8 %vecext
+; CHECK-LABEL: @getsc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc11(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 11
+  ret i8 %vecext
+; CHECK-LABEL: @getsc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc12(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 12
+  ret i8 %vecext
+; CHECK-LABEL: @getsc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc13(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 13
+  ret i8 %vecext
+; CHECK-LABEL: @getsc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc14(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 14
+  ret i8 %vecext
+; CHECK-LABEL: @getsc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i8 @getsc15(<16 x i8> %vsc) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 15
+  ret i8 %vecext
+; CHECK-LABEL: @getsc15
+; CHECK: mfvsrd 3,
+; CHECK: extsb 3, 3
+; CHECK-LE-LABEL: @getsc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc0(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 0
+  ret i8 %vecext
+; CHECK-LABEL: @getuc0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc1(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 1
+  ret i8 %vecext
+; CHECK-LABEL: @getuc1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc2(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 2
+  ret i8 %vecext
+; CHECK-LABEL: @getuc2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc3(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 3
+  ret i8 %vecext
+; CHECK-LABEL: @getuc3
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc4(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 4
+  ret i8 %vecext
+; CHECK-LABEL: @getuc4
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc4
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc5(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 5
+  ret i8 %vecext
+; CHECK-LABEL: @getuc5
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc5
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc6(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 6
+  ret i8 %vecext
+; CHECK-LABEL: @getuc6
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc6
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc7(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 7
+  ret i8 %vecext
+; CHECK-LABEL: @getuc7
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc7
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc8(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 8
+  ret i8 %vecext
+; CHECK-LABEL: @getuc8
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 8, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc8
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc9(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 9
+  ret i8 %vecext
+; CHECK-LABEL: @getuc9
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc9
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 56, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc10(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 10
+  ret i8 %vecext
+; CHECK-LABEL: @getuc10
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 24, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc10
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc11(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 11
+  ret i8 %vecext
+; CHECK-LABEL: @getuc11
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc11
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 40, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc12(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 12
+  ret i8 %vecext
+; CHECK-LABEL: @getuc12
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 40, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc12
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc13(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 13
+  ret i8 %vecext
+; CHECK-LABEL: @getuc13
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc13
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 24, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc14(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 14
+  ret i8 %vecext
+; CHECK-LABEL: @getuc14
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 56, 56
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc14
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getuc15(<16 x i8> %vuc) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %vecext = extractelement <16 x i8> %0, i32 15
+  ret i8 %vecext
+; CHECK-LABEL: @getuc15
+; CHECK: mfvsrd 3,
+; CHECK: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getuc15
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 8, 56
+; CHECK-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i8 @getvelsc(<16 x i8> %vsc, i32 signext %i) {
+entry:
+  %vsc.addr = alloca <16 x i8>, align 16
+  %i.addr = alloca i32, align 4
+  store <16 x i8> %vsc, <16 x i8>* %vsc.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <16 x i8>, <16 x i8>* %vsc.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <16 x i8> %0, i32 %1
+  ret i8 %vecext
+; CHECK-LABEL: @getvelsc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsb 3, 3
+; CHECK-LE-LABEL: @getvelsc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsb 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i8 @getveluc(<16 x i8> %vuc, i32 signext %i) {
+entry:
+  %vuc.addr = alloca <16 x i8>, align 16
+  %i.addr = alloca i32, align 4
+  store <16 x i8> %vuc, <16 x i8>* %vuc.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <16 x i8>, <16 x i8>* %vuc.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <16 x i8> %0, i32 %1
+  ret i8 %vecext
+; CHECK-LABEL: @getveluc
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 8
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[ANDI]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM7]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 3
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi   3, 3, 56
+; CHECK-LE-LABEL: @getveluc
+; CHECK-DAG-LE: li [[IMM8:[0-9]+]], 8
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM8]]
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[ANDC]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM7:[0-9]+]], 7
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM7]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 3
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi   3, 3, 56
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss0(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 0
+  ret i16 %vecext
+; CHECK-LABEL: @getss0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss1(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 1
+  ret i16 %vecext
+; CHECK-LABEL: @getss1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss2(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 2
+  ret i16 %vecext
+; CHECK-LABEL: @getss2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss3(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 3
+  ret i16 %vecext
+; CHECK-LABEL: @getss3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi 3, 3, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss4(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 4
+  ret i16 %vecext
+; CHECK-LABEL: @getss4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi 3, 3, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss5(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 5
+  ret i16 %vecext
+; CHECK-LABEL: @getss5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss6(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 6
+  ret i16 %vecext
+; CHECK-LABEL: @getss6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i16 @getss7(<8 x i16> %vss) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 7
+  ret i16 %vecext
+; CHECK-LABEL: @getss7
+; CHECK: mfvsrd 3,
+; CHECK: extsh 3, 3
+; CHECK-LE-LABEL: @getss7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus0(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 0
+  ret i16 %vecext
+; CHECK-LABEL: @getus0
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus0
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus1(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 1
+  ret i16 %vecext
+; CHECK-LABEL: @getus1
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus1
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus2(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 2
+  ret i16 %vecext
+; CHECK-LABEL: @getus2
+; CHECK: mfvsrd 3, 34
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus2
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus3(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 3
+  ret i16 %vecext
+; CHECK-LABEL: @getus3
+; CHECK: mfvsrd 3, 34
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus3
+; CHECK-LE: mfvsrd 3,
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus4(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 4
+  ret i16 %vecext
+; CHECK-LABEL: @getus4
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 16, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus4
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus5(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 5
+  ret i16 %vecext
+; CHECK-LABEL: @getus5
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 32, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus5
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 48, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus6(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 6
+  ret i16 %vecext
+; CHECK-LABEL: @getus6
+; CHECK: mfvsrd 3,
+; CHECK: rldicl 3, 3, 48, 48
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus6
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 32, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getus7(<8 x i16> %vus) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %vecext = extractelement <8 x i16> %0, i32 7
+  ret i16 %vecext
+; CHECK-LABEL: @getus7
+; CHECK: mfvsrd 3,
+; CHECK: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getus7
+; CHECK-LE: mfvsrd 3, 34
+; CHECK-LE: rldicl 3, 3, 16, 48
+; CHECK-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i16 @getvelss(<8 x i16> %vss, i32 signext %i) {
+entry:
+  %vss.addr = alloca <8 x i16>, align 16
+  %i.addr = alloca i32, align 4
+  store <8 x i16> %vss, <8 x i16>* %vss.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <8 x i16>, <8 x i16>* %vss.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <8 x i16> %0, i32 %1
+  ret i16 %vecext
+; CHECK-LABEL: @getvelss
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: extsh 3, 3
+; CHECK-LE-LABEL: @getvelss
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: extsh 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i16 @getvelus(<8 x i16> %vus, i32 signext %i) {
+entry:
+  %vus.addr = alloca <8 x i16>, align 16
+  %i.addr = alloca i32, align 4
+  store <8 x i16> %vus, <8 x i16>* %vus.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <8 x i16>, <8 x i16>* %vus.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <8 x i16> %0, i32 %1
+  ret i16 %vecext
+; CHECK-LABEL: @getvelus
+; CHECK-DAG: andi. [[ANDI:[0-9]+]], {{[0-9]+}}, 4
+; CHECK-DAG: sldi [[MUL2:[0-9]+]], [[ANDI]], 1
+; CHECK-DAG: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]]
+; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4
+; CHECK-DAG: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG: clrldi   3, 3, 48
+; CHECK-LE-LABEL: @getvelus
+; CHECK-DAG-LE: li [[IMM4:[0-9]+]], 4
+; CHECK-DAG-LE: andc [[ANDC:[0-9]+]], [[IMM4]]
+; CHECK-DAG-LE: sldi [[MUL2:[0-9]+]], [[ANDC]], 1
+; CHECK-DAG-LE: lvsl [[SHMSK:[0-9]+]], 0, [[MUL2]]
+; CHECK-DAG-LE: vperm [[PERMD:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, [[SHMSK]]
+; CHECK-DAG-LE: mfvsrd [[MOV:[0-9]+]],
+; CHECK-DAG-LE: li [[IMM3:[0-9]+]], 3
+; CHECK-DAG-LE: and [[AND:[0-9]+]], [[IMM3]]
+; CHECK-DAG-LE: sldi [[SHL:[0-9]+]], [[AND]], 4
+; CHECK-DAG-LE: srd 3, [[MOV]], [[SHL]]
+; CHECK-DAG-LE: clrldi   3, 3, 48
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi0(<4 x i32> %vsi) {
+entry:
+  %vsi.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 0
+  ret i32 %vecext
+; CHECK-LABEL: @getsi0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi1(<4 x i32> %vsi) {
+entry:
+  %vsi.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 1
+  ret i32 %vecext
+; CHECK-LABEL: @getsi1
+; CHECK: mfvsrwz 3, 34
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi2(<4 x i32> %vsi) {
+entry:
+  %vsi.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 2
+  ret i32 %vecext
+; CHECK-LABEL: @getsi2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define signext i32 @getsi3(<4 x i32> %vsi) {
+entry:
+  %vsi.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 3
+  ret i32 %vecext
+; CHECK-LABEL: @getsi3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: extsw 3, 3
+; CHECK-LE-LABEL: @getsi3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: extsw 3, 3
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui0(<4 x i32> %vui) {
+entry:
+  %vui.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 0
+  ret i32 %vecext
+; CHECK-LABEL: @getui0
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi   3, 3, 32
+; CHECK-LE-LABEL: @getui0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi   3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui1(<4 x i32> %vui) {
+entry:
+  %vui.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 1
+  ret i32 %vecext
+; CHECK-LABEL: @getui1
+; CHECK: mfvsrwz 3, 34
+; CHECK: clrldi   3, 3, 32
+; CHECK-LE-LABEL: @getui1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi   3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui2(<4 x i32> %vui) {
+entry:
+  %vui.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 2
+  ret i32 %vecext
+; CHECK-LABEL: @getui2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi   3, 3, 32
+; CHECK-LE-LABEL: @getui2
+; CHECK-LE: mfvsrwz 3, 34
+; CHECK-LE: clrldi   3, 3, 32
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getui3(<4 x i32> %vui) {
+entry:
+  %vui.addr = alloca <4 x i32>, align 16
+  store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+  %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+  %vecext = extractelement <4 x i32> %0, i32 3
+  ret i32 %vecext
+; CHECK-LABEL: @getui3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: mfvsrwz 3, [[SHL]]
+; CHECK: clrldi   3, 3, 32
+; CHECK-LE-LABEL: @getui3
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: mfvsrwz 3, [[SHL]]
+; CHECK-LE: clrldi   3, 3, 32
+}
+
+; Function Attrs: nounwind
+define signext i32 @getvelsi(<4 x i32> %vsi, i32 signext %i) {
+entry:
+  %vsi.addr = alloca <4 x i32>, align 16
+  %i.addr = alloca i32, align 4
+  store <4 x i32> %vsi, <4 x i32>* %vsi.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <4 x i32>, <4 x i32>* %vsi.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <4 x i32> %0, i32 %1
+  ret i32 %vecext
+; CHECK-LABEL: @getvelsi
+; CHECK-LE-LABEL: @getvelsi
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define zeroext i32 @getvelui(<4 x i32> %vui, i32 signext %i) {
+entry:
+  %vui.addr = alloca <4 x i32>, align 16
+  %i.addr = alloca i32, align 4
+  store <4 x i32> %vui, <4 x i32>* %vui.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <4 x i32>, <4 x i32>* %vui.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <4 x i32> %0, i32 %1
+  ret i32 %vecext
+; CHECK-LABEL: @getvelui
+; CHECK-LE-LABEL: @getvelui
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getsl0(<2 x i64> %vsl) {
+entry:
+  %vsl.addr = alloca <2 x i64>, align 16
+  store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+  %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+  %vecext = extractelement <2 x i64> %0, i32 0
+  ret i64 %vecext
+; CHECK-LABEL: @getsl0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getsl0
+; CHECK-LE: xxswapd  [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getsl1(<2 x i64> %vsl) {
+entry:
+  %vsl.addr = alloca <2 x i64>, align 16
+  store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+  %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+  %vecext = extractelement <2 x i64> %0, i32 1
+  ret i64 %vecext
+; CHECK-LABEL: @getsl1
+; CHECK: xxswapd  [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getsl1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getul0(<2 x i64> %vul) {
+entry:
+  %vul.addr = alloca <2 x i64>, align 16
+  store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+  %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+  %vecext = extractelement <2 x i64> %0, i32 0
+  ret i64 %vecext
+; CHECK-LABEL: @getul0
+; CHECK: mfvsrd 3, 34
+; CHECK-LE-LABEL: @getul0
+; CHECK-LE: xxswapd  [[SWP:[0-9]+]], 34
+; CHECK-LE: mfvsrd 3, [[SWP]]
+}
+
+; Function Attrs: nounwind
+define i64 @getul1(<2 x i64> %vul) {
+entry:
+  %vul.addr = alloca <2 x i64>, align 16
+  store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+  %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+  %vecext = extractelement <2 x i64> %0, i32 1
+  ret i64 %vecext
+; CHECK-LABEL: @getul1
+; CHECK: xxswapd  [[SWP:[0-9]+]], 34
+; CHECK: mfvsrd 3, [[SWP]]
+; CHECK-LE-LABEL: @getul1
+; CHECK-LE: mfvsrd 3, 34
+}
+
+; Function Attrs: nounwind
+define i64 @getvelsl(<2 x i64> %vsl, i32 signext %i) {
+entry:
+  %vsl.addr = alloca <2 x i64>, align 16
+  %i.addr = alloca i32, align 4
+  store <2 x i64> %vsl, <2 x i64>* %vsl.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <2 x i64>, <2 x i64>* %vsl.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <2 x i64> %0, i32 %1
+  ret i64 %vecext
+; CHECK-LABEL: @getvelsl
+; CHECK-LE-LABEL: @getvelsl
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define i64 @getvelul(<2 x i64> %vul, i32 signext %i) {
+entry:
+  %vul.addr = alloca <2 x i64>, align 16
+  %i.addr = alloca i32, align 4
+  store <2 x i64> %vul, <2 x i64>* %vul.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <2 x i64>, <2 x i64>* %vul.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <2 x i64> %0, i32 %1
+  ret i64 %vecext
+; CHECK-LABEL: @getvelul
+; CHECK-LE-LABEL: @getvelul
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define float @getf0(<4 x float> %vf) {
+entry:
+  %vf.addr = alloca <4 x float>, align 16
+  store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  ret float %vecext
+; CHECK-LABEL: @getf0
+; CHECK: xscvspdpn 1, 34
+; CHECK-LE-LABEL: @getf0
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf1(<4 x float> %vf) {
+entry:
+  %vf.addr = alloca <4 x float>, align 16
+  store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+  %vecext = extractelement <4 x float> %0, i32 1
+  ret float %vecext
+; CHECK-LABEL: @getf1
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf1
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf2(<4 x float> %vf) {
+entry:
+  %vf.addr = alloca <4 x float>, align 16
+  store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+  %vecext = extractelement <4 x float> %0, i32 2
+  ret float %vecext
+; CHECK-LABEL: @getf2
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 2
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf2
+; CHECK-LE: xxsldwi [[SHL:[0-9]+]], 34, 34, 1
+; CHECK-LE: xscvspdpn 1, [[SHL]]
+}
+
+; Function Attrs: nounwind
+define float @getf3(<4 x float> %vf) {
+entry:
+  %vf.addr = alloca <4 x float>, align 16
+  store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+  %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+  %vecext = extractelement <4 x float> %0, i32 3
+  ret float %vecext
+; CHECK-LABEL: @getf3
+; CHECK: xxsldwi [[SHL:[0-9]+]], 34, 34, 3
+; CHECK: xscvspdpn 1, [[SHL]]
+; CHECK-LE-LABEL: @getf3
+; CHECK-LE: xscvspdpn 1, 34
+}
+
+; Function Attrs: nounwind
+define float @getvelf(<4 x float> %vf, i32 signext %i) {
+entry:
+  %vf.addr = alloca <4 x float>, align 16
+  %i.addr = alloca i32, align 4
+  store <4 x float> %vf, <4 x float>* %vf.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <4 x float>, <4 x float>* %vf.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <4 x float> %0, i32 %1
+  ret float %vecext
+; CHECK-LABEL: @getvelf
+; CHECK-LE-LABEL: @getvelf
+; FIXME: add check patterns when variable element extraction is implemented
+}
+
+; Function Attrs: nounwind
+define double @getd0(<2 x double> %vd) {
+entry:
+  %vd.addr = alloca <2 x double>, align 16
+  store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+  %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+  %vecext = extractelement <2 x double> %0, i32 0
+  ret double %vecext
+; CHECK-LABEL: @getd0
+; CHECK: xxlor 1, 34, 34
+; CHECK-LE-LABEL: @getd0
+; CHECK-LE: xxswapd  1, 34
+}
+
+; Function Attrs: nounwind
+define double @getd1(<2 x double> %vd) {
+entry:
+  %vd.addr = alloca <2 x double>, align 16
+  store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+  %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+  %vecext = extractelement <2 x double> %0, i32 1
+  ret double %vecext
+; CHECK-LABEL: @getd1
+; CHECK: xxswapd  1, 34
+; CHECK-LE-LABEL: @getd1
+; CHECK-LE: xxlor 1, 34, 34
+}
+
+; Function Attrs: nounwind
+define double @getveld(<2 x double> %vd, i32 signext %i) {
+entry:
+  %vd.addr = alloca <2 x double>, align 16
+  %i.addr = alloca i32, align 4
+  store <2 x double> %vd, <2 x double>* %vd.addr, align 16
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load <2 x double>, <2 x double>* %vd.addr, align 16
+  %1 = load i32, i32* %i.addr, align 4
+  %vecext = extractelement <2 x double> %0, i32 %1
+  ret double %vecext
+; CHECK-LABEL: @getveld
+; CHECK-LE-LABEL: @getveld
+; FIXME: add check patterns when variable element extraction is implemented
+}
diff --git a/test/CodeGen/PowerPC/peephole-align.ll b/test/CodeGen/PowerPC/peephole-align.ll
new file mode 100644
index 000000000000..c8c2fe4d32ce
--- /dev/null
+++ b/test/CodeGen/PowerPC/peephole-align.ll
@@ -0,0 +1,335 @@
+; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER7 -check-prefix=CHECK %s
+; RUN: llc -mcpu=pwr8 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER8 -check-prefix=CHECK %s
+
+; Test peephole optimization for medium code model (32-bit TOC offsets)
+; for loading and storing small offsets within aligned values.
+; For power8, verify that the optimization doesn't fire, as it prevents fusion
+; opportunities.
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+%struct.b4 = type<{ i8, i8, i8, i8 }>
+%struct.h2 = type<{ i16, i16 }>
+
+%struct.b8 = type<{ i8, i8, i8, i8, i8, i8, i8, i8 }>
+%struct.h4 = type<{ i16, i16, i16, i16 }>
+%struct.w2 = type<{ i32, i32 }>
+
+%struct.d2 = type<{ i64, i64 }>
+%struct.misalign = type<{ i8, i64 }>
+
+@b4v = global %struct.b4 <{ i8 1, i8 2, i8 3, i8 4 }>, align 4
+@h2v = global %struct.h2 <{ i16 1, i16 2 }>, align 4
+
+@b8v = global %struct.b8 <{ i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8 }>, align 8
+@h4v = global %struct.h4 <{ i16 1, i16 2, i16 3, i16 4 }>, align 8
+@w2v = global %struct.w2 <{ i32 1, i32 2 }>, align 8
+
+@d2v = global %struct.d2 <{ i64 1, i64 2 }>, align 16
+@misalign_v = global %struct.misalign <{ i8 1, i64 2 }>, align 16
+
+; CHECK-LABEL: test_b4:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
+; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b4v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b4v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: stb [[REG0_1]], b4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG1_1]], b4v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG2_1]], b4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG3_1]], b4v@toc@l+3([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b4v@toc@l
+; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
+define void @test_b4() nounwind {
+entry:
+  %0 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
+  %inc0 = add nsw i8 %0, 1
+  store i8 %inc0, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1
+  %1 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
+  %inc1 = add nsw i8 %1, 2
+  store i8 %inc1, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1
+  %2 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
+  %inc2 = add nsw i8 %2, 3
+  store i8 %inc2, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1
+  %3 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
+  %inc3 = add nsw i8 %3, 4
+  store i8 %inc3, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1
+  ret void
+}
+
+; CHECK-LABEL: test_h2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h2v@toc@l
+; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
+define void @test_h2() nounwind {
+entry:
+  %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+  %inc0 = add nsw i16 %0, 1
+  store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+  %inc1 = add nsw i16 %1, 2
+  store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+  ret void
+}
+
+; CHECK-LABEL: test_h2_optsize:
+; CHECK: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha
+; CHECK-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]])
+; CHECK-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]])
+; CHECK-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; CHECK-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; CHECK-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]])
+; CHECK-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]])
+define void @test_h2_optsize() optsize nounwind {
+entry:
+  %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+  %inc0 = add nsw i16 %0, 1
+  store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+  %inc1 = add nsw i16 %1, 2
+  store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2
+  ret void
+}
+
+; CHECK-LABEL: test_b8:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
+; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b8v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b8v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b8v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b8v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG4_0:[0-9]+]], b8v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG5_0:[0-9]+]], b8v@toc@l+5([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG6_0:[0-9]+]], b8v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: lbz [[REG7_0:[0-9]+]], b8v@toc@l+7([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
+; POWER7-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
+; POWER7-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
+; POWER7-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
+; POWER7-DAG: stb [[REG0_1]], b8v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG1_1]], b8v@toc@l+1([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG2_1]], b8v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG3_1]], b8v@toc@l+3([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG4_1]], b8v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG5_1]], b8v@toc@l+5([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG6_1]], b8v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: stb [[REG7_1]], b8v@toc@l+7([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b8v@toc@l
+; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG4_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG5_0:[0-9]+]], 5([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG6_0:[0-9]+]], 6([[REGSTRUCT]])
+; POWER8-DAG: lbz [[REG7_0:[0-9]+]], 7([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5
+; POWER8-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6
+; POWER8-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7
+; POWER8-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8
+; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG4_1]], 4([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG5_1]], 5([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG6_1]], 6([[REGSTRUCT]])
+; POWER8-DAG: stb [[REG7_1]], 7([[REGSTRUCT]])
+define void @test_b8() nounwind {
+entry:
+  %0 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
+  %inc0 = add nsw i8 %0, 1
+  store i8 %inc0, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1
+  %1 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
+  %inc1 = add nsw i8 %1, 2
+  store i8 %inc1, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1
+  %2 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
+  %inc2 = add nsw i8 %2, 3
+  store i8 %inc2, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1
+  %3 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
+  %inc3 = add nsw i8 %3, 4
+  store i8 %inc3, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1
+  %4 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
+  %inc4 = add nsw i8 %4, 5
+  store i8 %inc4, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1
+  %5 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
+  %inc5 = add nsw i8 %5, 6
+  store i8 %inc5, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1
+  %6 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
+  %inc6 = add nsw i8 %6, 7
+  store i8 %inc6, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1
+  %7 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
+  %inc7 = add nsw i8 %7, 8
+  store i8 %inc7, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1
+  ret void
+}
+
+; CHECK-LABEL: test_h4:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
+; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG2_0:[0-9]+]], h4v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: lhz [[REG3_0:[0-9]+]], h4v@toc@l+6([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER7-DAG: sth [[REG0_1]], h4v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG1_1]], h4v@toc@l+2([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG2_1]], h4v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: sth [[REG3_1]], h4v@toc@l+6([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h4v@toc@l
+; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG2_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: lhz [[REG3_0:[0-9]+]], 6([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3
+; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4
+; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG2_1]], 4([[REGSTRUCT]])
+; POWER8-DAG: sth [[REG3_1]], 6([[REGSTRUCT]])
+define void @test_h4() nounwind {
+entry:
+  %0 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
+  %inc0 = add nsw i16 %0, 1
+  store i16 %inc0, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2
+  %1 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
+  %inc1 = add nsw i16 %1, 2
+  store i16 %inc1, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2
+  %2 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
+  %inc2 = add nsw i16 %2, 3
+  store i16 %inc2, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2
+  %3 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
+  %inc3 = add nsw i16 %3, 4
+  store i16 %inc3, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2
+  ret void
+}
+
+; CHECK-LABEL: test_w2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
+; POWER7-DAG: lwz [[REG0_0:[0-9]+]], w2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: lwz [[REG1_0:[0-9]+]], w2v@toc@l+4([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: stw [[REG0_1]], w2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: stw [[REG1_1]], w2v@toc@l+4([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], w2v@toc@l
+; POWER8-DAG: lwz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: lwz [[REG1_0:[0-9]+]], 4([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: stw [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: stw [[REG1_1]], 4([[REGSTRUCT]])
+define void @test_w2() nounwind {
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
+  %inc0 = add nsw i32 %0, 1
+  store i32 %inc0, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4
+  %1 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
+  %inc1 = add nsw i32 %1, 2
+  store i32 %inc1, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4
+  ret void
+}
+
+; CHECK-LABEL: test_d2:
+; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
+; POWER7-DAG: ld [[REG0_0:[0-9]+]], d2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: ld [[REG1_0:[0-9]+]], d2v@toc@l+8([[REGSTRUCT]])
+; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER7-DAG: std [[REG0_1]], d2v@toc@l([[REGSTRUCT]])
+; POWER7-DAG: std [[REG1_1]], d2v@toc@l+8([[REGSTRUCT]])
+
+; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha
+; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], d2v@toc@l
+; POWER8-DAG: ld [[REG0_0:[0-9]+]], 0([[REGSTRUCT]])
+; POWER8-DAG: ld [[REG1_0:[0-9]+]], 8([[REGSTRUCT]])
+; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2
+; POWER8-DAG: std [[REG0_1]], 0([[REGSTRUCT]])
+; POWER8-DAG: std [[REG1_1]], 8([[REGSTRUCT]])
+define void @test_d2() nounwind {
+entry:
+  %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
+  %inc0 = add nsw i64 %0, 1
+  store i64 %inc0, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8
+  %1 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+  %inc1 = add nsw i64 %1, 2
+  store i64 %inc1, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+  ret void
+}
+
+; Make sure the optimization fires on power8 if there is a single use resulting
+; in a better fusion opportunity.
+; register 3 is the return value, so it should be chosen
+; CHECK-LABEL: test_singleuse:
+; CHECK: addis 3, 2, d2v@toc@ha
+; CHECK: ld 3, d2v@toc@l+8(3)
+define i64 @test_singleuse() nounwind {
+entry:
+  %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8
+  ret i64 %0
+}
+
+; Make sure the optimization fails to fire if the symbol is aligned, but the offset is not.
+; CHECK-LABEL: test_misalign
+; POWER7: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha
+; POWER7: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l
+; POWER7: li [[OFFSET_REG:[0-9]+]], 1
+; POWER7: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]]
+; POWER7: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1
+; POWER7: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]]
+define void @test_misalign() nounwind {
+entry:
+  %0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
+  %inc0 = add nsw i64 %0, 1
+  store i64 %inc0, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
new file mode 100644
index 000000000000..2f75190327ef
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -0,0 +1,784 @@
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu %s -o - -enable-shrink-wrap=false |  FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statement (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and return
+; No prologue needed.
+; ENABLE: cmpw 0, 3, 4
+; ENABLE-NEXT: bgelr 0
+;
+; Prologue code.
+;  At a minimum, we save/restore the link register. Other registers may be saved
+;  as well. 
+; CHECK: mflr 
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmpw 0, 3, 4
+; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a on the stack
+; CHECK: stw 3, {{[0-9]+([0-9]+)}}
+; Set the alloca address in the second argument.
+; CHECK-NEXT: addi 4, 1, {{[0-9]+}}
+; Set the first argument to zero.
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: bl doSomething
+;
+; With shrink-wrapping, epilogue is just after the call.
+; Restore the link register and return.
+; Note that there could be other epilog code before the link register is 
+; restored but we will not check for it here.
+; ENABLE: mtlr
+; ENABLE-NEXT: blr
+;
+; DISABLE: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: mtlr {{[0-9]+}}
+; DISABLE-NEXT: blr
+;
+
+define i32 @foo(i32 %a, i32 %b) {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+; 
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB.
+; CHECK: slwi 3, [[SUM]], 3
+;
+; Jump to epilogue.
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+;
+; Epilogue code.
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+;
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the link register before the call
+; CHECK: mflr {{[0-9]+}}
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+; 
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB
+; CHECK: %for.exit
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+  br label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.03
+  %inc = add nuw nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register 
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+; 
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+; 
+; Next BB
+; CHECK: bl somethingElse 
+; CHECK: slwi 3, [[SUM]], 3
+;
+; Jump to epilogue
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+;
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+; Epilog code
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+; 
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  tail call void bitcast (void (...)* @somethingElse to void ()*)()
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the link register
+; CHECK: mflr {{[0-9]+}}
+;
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: bl somethingElse
+;
+; Loop preheader
+; CHECK-DAG: li [[SUM:[0-9]+]], 0
+; CHECK-DAG: li [[IV:[0-9]+]], 10
+; 
+; Loop body
+; CHECK: .[[LOOP:LBB[0-9_]+]]: # %for.body
+; CHECK: bl something
+; CHECK-DAG: addi [[IV]], [[IV]], -1
+; CHECK-DAG: add [[SUM]], 3, [[SUM]] 
+; CHECK-NEXT: cmplwi [[IV]], 0
+; CHECK-NEXT: bne 0, .[[LOOP]]
+;
+; Next BB. 
+; slwi 3, [[SUM]], 3
+;
+; DISABLE: b .[[EPILOG_BB:LBB[0-9_]+]]
+;
+; DISABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; DISABLE: slwi 3, 4, 1
+; DISABLE: .[[EPILOG_BB]]: # %if.end
+;
+; Epilogue code.
+; CHECK: mtlr {{[0-9]+}}
+; CHECK-NEXT: blr
+;
+; ENABLE: .[[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; ENABLE: slwi 3, 4, 1
+; ENABLE-NEXT: blr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void bitcast (void (...)* @somethingElse to void ()*)()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %if.then
+  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: # %entry
+; CHECK-NEXT: li 3, 0
+; CHECK-NEXT: blr
+define i32 @emptyFrame() {
+entry:
+  ret i32 0
+}
+
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r14
+; ENABLE-DAG: li [[IV:[0-9]+]], 10
+; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+;
+; DISABLE: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+; DISABLE: li [[IV:[0-9]+]], 10
+;
+; CHECK: nop
+; CHECK: mtctr [[IV]]
+;
+; CHECK: .[[LOOP_LABEL:LBB[0-9_]+]]: # %for.body
+; Inline asm statement.
+; CHECK: addi 14, 14, 1
+; CHECK: bdnz .[[LOOP_LABEL]]
+;
+; Epilogue code.
+; CHECK: li 3, 0
+; CHECK-DAG: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; CHECK: nop
+; CHECK: blr
+;
+; CHECK: [[ELSE_LABEL]]
+; CHECK-NEXT: slwi 3, 4, 1
+; DISABLE: ld 14, -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+; CHECK-NEXT blr
+; 
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  tail call void asm "addi 14, 14, 1", "~{r14}"()
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.else
+  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+  ret i32 %sum.0
+}
+
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: cmplwi 0, 3, 0
+; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: mflr {{[0-9]+}}
+; 
+; DISABLE: cmplwi 0, 3, 0
+; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mr 4, 3
+; CHECK-NEXT: mr 5, 3
+; CHECK-NEXT: mr 6, 3
+; CHECK-NEXT: mr 7, 3
+; CHECK-NEXT: mr 8, 3
+; CHECK-NEXT: mr 9, 3
+; CHECK-NEXT: bl someVariadicFunc
+; CHECK: slwi 3, 3, 3
+; DISABLE: b .[[EPILOGUE_BB:LBB[0-9_]+]]
+;
+; ENABLE: mtlr {{[0-9]+}}
+; ENABLE-NEXT: blr
+;
+; CHECK: .[[ELSE_LABEL]]: # %if.else
+; CHECK-NEXT: slwi 3, 4, 1
+; 
+; DISABLE: .[[EPILOGUE_BB]]: # %if.end
+; DISABLE: mtlr
+; CHECK: blr
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+  %shl = shl i32 %call, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+  ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: mflr {{[0-9]+}}
+;
+; CHECK: cmplwi 3, 0
+; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]]
+;
+; CHECK: li 3, 42
+;
+; DISABLE: mtlr {{[0-9]+}}
+;
+; CHECK-NEXT: blr
+;
+; CHECK: .[[ABORT]]: # %if.abort
+;
+; ENABLE: mflr {{[0-9]+}}
+;
+; CHECK: bl abort
+; ENABLE-NOT: mtlr {{[0-9]+}}
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+  %tobool = icmp eq i8 %bad_thing, 0
+  br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+  tail call void @abort() #0
+  unreachable
+
+if.end:
+  ret i32 42
+}
+
+declare void @abort() #0
+
+attributes #0 = { noreturn nounwind }
+
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+; 
+; CHECK-LABEL: infiniteloop
+; CHECK: blr
+define void @infiniteloop() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 bitcast (i32 (...)* @something to i32 ()*)()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: blr
+define void @infiniteloop2() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+  %call = tail call i32 asm "mftb $0, 268", "=r,~{r14}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br i1 undef, label %body1, label %body2
+
+body1:
+  tail call void asm sideeffect "nop", "~{r14}"()
+  br label %for.body
+
+body2:
+  tail call void asm sideeffect "nop", "~{r14}"()
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: # %end
+define void @infiniteloop3() {
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:                                             ; preds = %entry
+  br i1 undef, label %loop2a, label %end
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  %0 = icmp eq i32* %var, null
+  %next.load = load i32*, i32** undef
+  br i1 %0, label %loop2a, label %loop2b
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+  br label %loop1
+
+loop2b:                                           ; preds = %loop1
+  %gep1 = bitcast i32* %var.phi to i32*
+  %next.ptr = bitcast i32* %gep1 to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop1
+
+end:
+  ret void
+}
+
+@columns = external global [0 x i32], align 4
+@lock = common global i32 0, align 4
+@htindex = common global i32 0, align 4
+@stride = common global i32 0, align 4
+@ht = common global i32* null, align 8
+@he = common global i8* null, align 8
+
+; Test for a bug that was caused when save point was equal to restore point.
+; Function Attrs: nounwind
+; CHECK-LABEL: transpose
+;
+; Store of callee-save register saved by shrink wrapping
+; CHECK: std [[CSR:[0-9]+]], -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
+;
+; Reload of callee-save register
+; CHECK: ld [[CSR]], -[[STACK_OFFSET]](1) # 8-byte Folded Reload
+;
+; Ensure no subsequent uses of callee-save register before end of function
+; CHECK-NOT: {{[a-z]+}} [[CSR]]
+; CHECK: blr
+define signext i32 @transpose() {
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 1), align 4
+  %shl.i = shl i32 %0, 7
+  %1 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 2), align 4
+  %or.i = or i32 %shl.i, %1
+  %shl1.i = shl i32 %or.i, 7
+  %2 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 3), align 4
+  %or2.i = or i32 %shl1.i, %2
+  %3 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 7), align 4
+  %shl3.i = shl i32 %3, 7
+  %4 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 6), align 4
+  %or4.i = or i32 %shl3.i, %4
+  %shl5.i = shl i32 %or4.i, 7
+  %5 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 5), align 4
+  %or6.i = or i32 %shl5.i, %5
+  %cmp.i = icmp ugt i32 %or2.i, %or6.i
+  br i1 %cmp.i, label %cond.true.i, label %cond.false.i
+
+cond.true.i:
+  %shl7.i = shl i32 %or2.i, 7
+  %6 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
+  %or8.i = or i32 %6, %shl7.i
+  %conv.i = zext i32 %or8.i to i64
+  %shl9.i = shl nuw nsw i64 %conv.i, 21
+  %conv10.i = zext i32 %or6.i to i64
+  %or11.i = or i64 %shl9.i, %conv10.i
+  br label %hash.exit
+
+cond.false.i:
+  %shl12.i = shl i32 %or6.i, 7
+  %7 = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @columns, i64 0, i64 4), align 4
+  %or13.i = or i32 %7, %shl12.i
+  %conv14.i = zext i32 %or13.i to i64
+  %shl15.i = shl nuw nsw i64 %conv14.i, 21
+  %conv16.i = zext i32 %or2.i to i64
+  %or17.i = or i64 %shl15.i, %conv16.i
+  br label %hash.exit
+
+hash.exit:
+  %cond.i = phi i64 [ %or11.i, %cond.true.i ], [ %or17.i, %cond.false.i ]
+  %shr.29.i = lshr i64 %cond.i, 17
+  %conv18.i = trunc i64 %shr.29.i to i32
+  store i32 %conv18.i, i32* @lock, align 4
+  %rem.i = srem i64 %cond.i, 1050011
+  %conv19.i = trunc i64 %rem.i to i32
+  store i32 %conv19.i, i32* @htindex, align 4
+  %rem20.i = urem i32 %conv18.i, 179
+  %add.i = or i32 %rem20.i, 131072
+  store i32 %add.i, i32* @stride, align 4
+  %8 = load i32*, i32** @ht, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %8, i64 %rem.i
+  %9 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp eq i32 %9, %conv18.i
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:
+  %idxprom.lcssa = phi i64 [ %rem.i, %hash.exit ], [ %idxprom.1, %if.end ], [ %idxprom.2, %if.end.1 ], [ %idxprom.3, %if.end.2 ], [ %idxprom.4, %if.end.3 ], [ %idxprom.5, %if.end.4 ], [ %idxprom.6, %if.end.5 ], [ %idxprom.7, %if.end.6 ]
+  %10 = load i8*, i8** @he, align 8
+  %arrayidx3 = getelementptr inbounds i8, i8* %10, i64 %idxprom.lcssa
+  %11 = load i8, i8* %arrayidx3, align 1
+  %conv = sext i8 %11 to i32
+  br label %cleanup
+
+if.end:
+  %add = add nsw i32 %add.i, %conv19.i
+  %cmp4 = icmp sgt i32 %add, 1050010
+  %sub = add nsw i32 %add, -1050011
+  %sub.add = select i1 %cmp4, i32 %sub, i32 %add
+  %idxprom.1 = sext i32 %sub.add to i64
+  %arrayidx.1 = getelementptr inbounds i32, i32* %8, i64 %idxprom.1
+  %12 = load i32, i32* %arrayidx.1, align 4
+  %cmp1.1 = icmp eq i32 %12, %conv18.i
+  br i1 %cmp1.1, label %if.then, label %if.end.1
+
+cleanup:
+  %retval.0 = phi i32 [ %conv, %if.then ], [ -128, %if.end.6 ]
+  ret i32 %retval.0
+
+if.end.1:
+  %add.1 = add nsw i32 %add.i, %sub.add
+  %cmp4.1 = icmp sgt i32 %add.1, 1050010
+  %sub.1 = add nsw i32 %add.1, -1050011
+  %sub.add.1 = select i1 %cmp4.1, i32 %sub.1, i32 %add.1
+  %idxprom.2 = sext i32 %sub.add.1 to i64
+  %arrayidx.2 = getelementptr inbounds i32, i32* %8, i64 %idxprom.2
+  %13 = load i32, i32* %arrayidx.2, align 4
+  %cmp1.2 = icmp eq i32 %13, %conv18.i
+  br i1 %cmp1.2, label %if.then, label %if.end.2
+
+if.end.2:
+  %add.2 = add nsw i32 %add.i, %sub.add.1
+  %cmp4.2 = icmp sgt i32 %add.2, 1050010
+  %sub.2 = add nsw i32 %add.2, -1050011
+  %sub.add.2 = select i1 %cmp4.2, i32 %sub.2, i32 %add.2
+  %idxprom.3 = sext i32 %sub.add.2 to i64
+  %arrayidx.3 = getelementptr inbounds i32, i32* %8, i64 %idxprom.3
+  %14 = load i32, i32* %arrayidx.3, align 4
+  %cmp1.3 = icmp eq i32 %14, %conv18.i
+  br i1 %cmp1.3, label %if.then, label %if.end.3
+
+if.end.3:
+  %add.3 = add nsw i32 %add.i, %sub.add.2
+  %cmp4.3 = icmp sgt i32 %add.3, 1050010
+  %sub.3 = add nsw i32 %add.3, -1050011
+  %sub.add.3 = select i1 %cmp4.3, i32 %sub.3, i32 %add.3
+  %idxprom.4 = sext i32 %sub.add.3 to i64
+  %arrayidx.4 = getelementptr inbounds i32, i32* %8, i64 %idxprom.4
+  %15 = load i32, i32* %arrayidx.4, align 4
+  %cmp1.4 = icmp eq i32 %15, %conv18.i
+  br i1 %cmp1.4, label %if.then, label %if.end.4
+
+if.end.4:
+  %add.4 = add nsw i32 %add.i, %sub.add.3
+  %cmp4.4 = icmp sgt i32 %add.4, 1050010
+  %sub.4 = add nsw i32 %add.4, -1050011
+  %sub.add.4 = select i1 %cmp4.4, i32 %sub.4, i32 %add.4
+  %idxprom.5 = sext i32 %sub.add.4 to i64
+  %arrayidx.5 = getelementptr inbounds i32, i32* %8, i64 %idxprom.5
+  %16 = load i32, i32* %arrayidx.5, align 4
+  %cmp1.5 = icmp eq i32 %16, %conv18.i
+  br i1 %cmp1.5, label %if.then, label %if.end.5
+
+if.end.5:
+  %add.5 = add nsw i32 %add.i, %sub.add.4
+  %cmp4.5 = icmp sgt i32 %add.5, 1050010
+  %sub.5 = add nsw i32 %add.5, -1050011
+  %sub.add.5 = select i1 %cmp4.5, i32 %sub.5, i32 %add.5
+  %idxprom.6 = sext i32 %sub.add.5 to i64
+  %arrayidx.6 = getelementptr inbounds i32, i32* %8, i64 %idxprom.6
+  %17 = load i32, i32* %arrayidx.6, align 4
+  %cmp1.6 = icmp eq i32 %17, %conv18.i
+  br i1 %cmp1.6, label %if.then, label %if.end.6
+
+if.end.6:
+  %add.6 = add nsw i32 %add.i, %sub.add.5
+  %cmp4.6 = icmp sgt i32 %add.6, 1050010
+  %sub.6 = add nsw i32 %add.6, -1050011
+  %sub.add.6 = select i1 %cmp4.6, i32 %sub.6, i32 %add.6
+  %idxprom.7 = sext i32 %sub.add.6 to i64
+  %arrayidx.7 = getelementptr inbounds i32, i32* %8, i64 %idxprom.7
+  %18 = load i32, i32* %arrayidx.7, align 4
+  %cmp1.7 = icmp eq i32 %18, %conv18.i
+  br i1 %cmp1.7, label %if.then, label %cleanup
+}
diff --git a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
index ad8ed38da7fa..028006320cb5 100644
--- a/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
+++ b/test/CodeGen/PowerPC/ppc32-i1-vaarg.ll
@@ -10,7 +10,7 @@ define void @main() {
 }
 
 ; CHECK-LABEL: @main
-; CHECK-DAG li 4, 0
+; CHECK-DAG: li 4, 0
 ; CHECK-DAG: crxor 6, 6, 6
 ; CHECK: bl printf
 
diff --git a/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
index e8617ccfc8a5..65b45ea555ae 100644
--- a/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
+++ b/test/CodeGen/PowerPC/ppc64-icbt-pwr7.ll
@@ -10,10 +10,10 @@ entry:
   ret void
 
 ; FIXME: Crashing is not really the correct behavior here, we really should just emit nothing
-; CHECK: Cannot select: 0x{{[0-9,a-f]+}}: ch = Prefetch 
-; CHECK: 0x{{[0-9,a-f]+}}: i32 = Constant<0> 
-; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<3>
-; CHECK-NEXT: 0x{{[0-9,a-f]+}}: i32 = Constant<0>
+; CHECK: Cannot select: {{0x[0-9,a-f]+|t[0-9]+}}: ch = Prefetch
+; CHECK: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<0>
+; CHECK-NEXT: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<3>
+; CHECK-NEXT: {{0x[0-9,a-f]+|t[0-9]+}}: i32 = Constant<0>
 
 }
 
diff --git a/test/CodeGen/PowerPC/ppcsoftops.ll b/test/CodeGen/PowerPC/ppcsoftops.ll
new file mode 100644
index 000000000000..56c057613bdc
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppcsoftops.ll
@@ -0,0 +1,50 @@
+; RUN: llc  -mtriple=powerpc-unknown-linux-gnu -O0 < %s | FileCheck %s
+define double @foo() #0 {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
+  %add = fadd double %0, %1
+  ret double %add
+
+  ; CHECK-LABEL:      __adddf3
+}
+
+define double @foo1() #0 {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
+  %mul = fmul double %0, %1
+  ret double %mul
+
+  ; CHECK-LABEL:      __muldf3
+}
+
+define double @foo2() #0 {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
+  %sub = fsub double %0, %1
+  ret double %sub
+
+  ; CHECK-LABEL:      __subdf3
+}
+
+define double @foo3() #0 {
+entry:
+  %a = alloca double, align 8
+  %b = alloca double, align 8
+  %0 = load double, double* %a, align 8
+  %1 = load double, double* %b, align 8
+  %div = fdiv double %0, %1
+  ret double %div
+
+  ; CHECK-LABEL:      __divdf3
+}
+
+attributes #0 = {"use-soft-float"="true" }
diff --git a/test/CodeGen/PowerPC/pr17168.ll b/test/CodeGen/PowerPC/pr17168.ll
index 096895491381..b1bac59c9ce1 100644
--- a/test/CodeGen/PowerPC/pr17168.ll
+++ b/test/CodeGen/PowerPC/pr17168.ll
@@ -9,7 +9,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 @grid_points = external global [3 x i32], align 4
 
 ; Function Attrs: nounwind
-define fastcc void @compute_rhs() #0 {
+define fastcc void @compute_rhs() #0 !dbg !114 {
 entry:
   br i1 undef, label %for.cond871.preheader.for.inc960_crit_edge, label %for.end1042, !dbg !439
 
@@ -54,11 +54,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!438, !464}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 190311)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !298, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 190311)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !298, imports: !2)
 !1 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
 !2 = !{}
 !3 = !{!4, !82, !102, !114, !132, !145, !154, !155, !162, !183, !200, !201, !207, !208, !215, !221, !230, !238, !246, !255, !260, !261, !268, !274, !279, !280, !287, !293}
-!4 = !DISubprogram(name: "main", line: 74, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 74, file: !1, scope: !5, type: !6, variables: !12)
+!4 = distinct !DISubprogram(name: "main", line: 74, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 74, file: !1, scope: !5, type: !6, variables: !12)
 !5 = !DIFile(filename: "bt.c", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8, !9}
@@ -67,20 +67,20 @@ attributes #1 = { nounwind readnone }
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
 !12 = !{!13, !14, !15, !16, !17, !18, !19, !21, !22, !23, !25, !26}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 74, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 74, arg: 2, scope: !4, file: !5, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "niter", line: 76, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "step", line: 76, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n3", line: 76, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "nthreads", line: 77, scope: !4, file: !5, type: !8)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "navg", line: 78, scope: !4, file: !5, type: !20)
+!13 = !DILocalVariable(name: "argc", line: 74, arg: 1, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(name: "argv", line: 74, arg: 2, scope: !4, file: !5, type: !9)
+!15 = !DILocalVariable(name: "niter", line: 76, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "step", line: 76, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "n3", line: 76, scope: !4, file: !5, type: !8)
+!18 = !DILocalVariable(name: "nthreads", line: 77, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "navg", line: 78, scope: !4, file: !5, type: !20)
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "mflops", line: 78, scope: !4, file: !5, type: !20)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmax", line: 80, scope: !4, file: !5, type: !20)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "verified", line: 81, scope: !4, file: !5, type: !24)
+!21 = !DILocalVariable(name: "mflops", line: 78, scope: !4, file: !5, type: !20)
+!22 = !DILocalVariable(name: "tmax", line: 80, scope: !4, file: !5, type: !20)
+!23 = !DILocalVariable(name: "verified", line: 81, scope: !4, file: !5, type: !24)
 !24 = !DIDerivedType(tag: DW_TAG_typedef, name: "boolean", line: 12, file: !1, baseType: !8)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "class", line: 82, scope: !4, file: !5, type: !11)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "fp", line: 83, scope: !4, file: !5, type: !27)
+!25 = !DILocalVariable(name: "class", line: 82, scope: !4, file: !5, type: !11)
+!26 = !DILocalVariable(name: "fp", line: 83, scope: !4, file: !5, type: !27)
 !27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !28)
 !28 = !DIDerivedType(tag: DW_TAG_typedef, name: "FILE", line: 49, file: !1, baseType: !29)
 !29 = !DICompositeType(tag: DW_TAG_structure_type, name: "_IO_FILE", line: 271, size: 1728, align: 64, file: !30, elements: !31)
@@ -136,222 +136,222 @@ attributes #1 = { nounwind readnone }
 !79 = !DICompositeType(tag: DW_TAG_array_type, size: 160, align: 8, baseType: !11, elements: !80)
 !80 = !{!81}
 !81 = !DISubrange(count: 20)
-!82 = !DISubprogram(name: "verify", line: 2388, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2388, file: !1, scope: !5, type: !83, variables: !86)
+!82 = distinct !DISubprogram(name: "verify", line: 2388, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2388, file: !1, scope: !5, type: !83, variables: !86)
 !83 = !DISubroutineType(types: !84)
 !84 = !{null, !8, !10, !85}
 !85 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !24)
 !86 = !{!87, !88, !89, !90, !94, !95, !96, !97, !98, !99, !100, !101}
-!87 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "no_time_steps", line: 2388, arg: 1, scope: !82, file: !5, type: !8)
-!88 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "class", line: 2388, arg: 2, scope: !82, file: !5, type: !10)
-!89 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "verified", line: 2388, arg: 3, scope: !82, file: !5, type: !85)
-!90 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrref", line: 2397, scope: !82, file: !5, type: !91)
+!87 = !DILocalVariable(name: "no_time_steps", line: 2388, arg: 1, scope: !82, file: !5, type: !8)
+!88 = !DILocalVariable(name: "class", line: 2388, arg: 2, scope: !82, file: !5, type: !10)
+!89 = !DILocalVariable(name: "verified", line: 2388, arg: 3, scope: !82, file: !5, type: !85)
+!90 = !DILocalVariable(name: "xcrref", line: 2397, scope: !82, file: !5, type: !91)
 !91 = !DICompositeType(tag: DW_TAG_array_type, size: 320, align: 64, baseType: !20, elements: !92)
 !92 = !{!93}
 !93 = !DISubrange(count: 5)
-!94 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xceref", line: 2397, scope: !82, file: !5, type: !91)
-!95 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcrdif", line: 2397, scope: !82, file: !5, type: !91)
-!96 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcedif", line: 2397, scope: !82, file: !5, type: !91)
-!97 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "epsilon", line: 2398, scope: !82, file: !5, type: !20)
-!98 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xce", line: 2398, scope: !82, file: !5, type: !91)
-!99 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xcr", line: 2398, scope: !82, file: !5, type: !91)
-!100 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtref", line: 2398, scope: !82, file: !5, type: !20)
-!101 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2399, scope: !82, file: !5, type: !8)
-!102 = !DISubprogram(name: "rhs_norm", line: 266, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 266, file: !1, scope: !5, type: !103, variables: !106)
+!94 = !DILocalVariable(name: "xceref", line: 2397, scope: !82, file: !5, type: !91)
+!95 = !DILocalVariable(name: "xcrdif", line: 2397, scope: !82, file: !5, type: !91)
+!96 = !DILocalVariable(name: "xcedif", line: 2397, scope: !82, file: !5, type: !91)
+!97 = !DILocalVariable(name: "epsilon", line: 2398, scope: !82, file: !5, type: !20)
+!98 = !DILocalVariable(name: "xce", line: 2398, scope: !82, file: !5, type: !91)
+!99 = !DILocalVariable(name: "xcr", line: 2398, scope: !82, file: !5, type: !91)
+!100 = !DILocalVariable(name: "dtref", line: 2398, scope: !82, file: !5, type: !20)
+!101 = !DILocalVariable(name: "m", line: 2399, scope: !82, file: !5, type: !8)
+!102 = distinct !DISubprogram(name: "rhs_norm", line: 266, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 266, file: !1, scope: !5, type: !103, variables: !106)
 !103 = !DISubroutineType(types: !104)
 !104 = !{null, !105}
 !105 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !20)
 !106 = !{!107, !108, !109, !110, !111, !112, !113}
-!107 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 266, arg: 1, scope: !102, file: !5, type: !105)
-!108 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 271, scope: !102, file: !5, type: !8)
-!109 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 271, scope: !102, file: !5, type: !8)
-!110 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 271, scope: !102, file: !5, type: !8)
-!111 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 271, scope: !102, file: !5, type: !8)
-!112 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 271, scope: !102, file: !5, type: !8)
-!113 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 272, scope: !102, file: !5, type: !20)
-!114 = !DISubprogram(name: "compute_rhs", line: 1767, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1767, file: !1, scope: !5, type: !115, function: void ()* @compute_rhs, variables: !117)
+!107 = !DILocalVariable(name: "rms", line: 266, arg: 1, scope: !102, file: !5, type: !105)
+!108 = !DILocalVariable(name: "i", line: 271, scope: !102, file: !5, type: !8)
+!109 = !DILocalVariable(name: "j", line: 271, scope: !102, file: !5, type: !8)
+!110 = !DILocalVariable(name: "k", line: 271, scope: !102, file: !5, type: !8)
+!111 = !DILocalVariable(name: "d", line: 271, scope: !102, file: !5, type: !8)
+!112 = !DILocalVariable(name: "m", line: 271, scope: !102, file: !5, type: !8)
+!113 = !DILocalVariable(name: "add", line: 272, scope: !102, file: !5, type: !20)
+!114 = distinct !DISubprogram(name: "compute_rhs", line: 1767, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1767, file: !1, scope: !5, type: !115, variables: !117)
 !115 = !DISubroutineType(types: !116)
 !116 = !{null}
 !117 = !{!118, !119, !120, !121, !122, !123, !124, !125, !126, !127, !128, !129, !130, !131}
-!118 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1769, scope: !114, file: !5, type: !8)
-!119 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1769, scope: !114, file: !5, type: !8)
-!120 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1769, scope: !114, file: !5, type: !8)
-!121 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 1769, scope: !114, file: !5, type: !8)
-!122 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "rho_inv", line: 1770, scope: !114, file: !5, type: !20)
-!123 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "uijk", line: 1770, scope: !114, file: !5, type: !20)
-!124 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "up1", line: 1770, scope: !114, file: !5, type: !20)
-!125 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "um1", line: 1770, scope: !114, file: !5, type: !20)
-!126 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vijk", line: 1770, scope: !114, file: !5, type: !20)
-!127 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vp1", line: 1770, scope: !114, file: !5, type: !20)
-!128 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vm1", line: 1770, scope: !114, file: !5, type: !20)
-!129 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wijk", line: 1770, scope: !114, file: !5, type: !20)
-!130 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wp1", line: 1770, scope: !114, file: !5, type: !20)
-!131 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "wm1", line: 1770, scope: !114, file: !5, type: !20)
-!132 = !DISubprogram(name: "error_norm", line: 225, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 225, file: !1, scope: !5, type: !103, variables: !133)
+!118 = !DILocalVariable(name: "i", line: 1769, scope: !114, file: !5, type: !8)
+!119 = !DILocalVariable(name: "j", line: 1769, scope: !114, file: !5, type: !8)
+!120 = !DILocalVariable(name: "k", line: 1769, scope: !114, file: !5, type: !8)
+!121 = !DILocalVariable(name: "m", line: 1769, scope: !114, file: !5, type: !8)
+!122 = !DILocalVariable(name: "rho_inv", line: 1770, scope: !114, file: !5, type: !20)
+!123 = !DILocalVariable(name: "uijk", line: 1770, scope: !114, file: !5, type: !20)
+!124 = !DILocalVariable(name: "up1", line: 1770, scope: !114, file: !5, type: !20)
+!125 = !DILocalVariable(name: "um1", line: 1770, scope: !114, file: !5, type: !20)
+!126 = !DILocalVariable(name: "vijk", line: 1770, scope: !114, file: !5, type: !20)
+!127 = !DILocalVariable(name: "vp1", line: 1770, scope: !114, file: !5, type: !20)
+!128 = !DILocalVariable(name: "vm1", line: 1770, scope: !114, file: !5, type: !20)
+!129 = !DILocalVariable(name: "wijk", line: 1770, scope: !114, file: !5, type: !20)
+!130 = !DILocalVariable(name: "wp1", line: 1770, scope: !114, file: !5, type: !20)
+!131 = !DILocalVariable(name: "wm1", line: 1770, scope: !114, file: !5, type: !20)
+!132 = distinct !DISubprogram(name: "error_norm", line: 225, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 225, file: !1, scope: !5, type: !103, variables: !133)
 !133 = !{!134, !135, !136, !137, !138, !139, !140, !141, !142, !143, !144}
-!134 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rms", line: 225, arg: 1, scope: !132, file: !5, type: !105)
-!135 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 232, scope: !132, file: !5, type: !8)
-!136 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 232, scope: !132, file: !5, type: !8)
-!137 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 232, scope: !132, file: !5, type: !8)
-!138 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 232, scope: !132, file: !5, type: !8)
-!139 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 232, scope: !132, file: !5, type: !8)
-!140 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 233, scope: !132, file: !5, type: !20)
-!141 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 233, scope: !132, file: !5, type: !20)
-!142 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 233, scope: !132, file: !5, type: !20)
-!143 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u_exact", line: 233, scope: !132, file: !5, type: !91)
-!144 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "add", line: 233, scope: !132, file: !5, type: !20)
-!145 = !DISubprogram(name: "exact_solution", line: 643, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 644, file: !1, scope: !5, type: !146, variables: !148)
+!134 = !DILocalVariable(name: "rms", line: 225, arg: 1, scope: !132, file: !5, type: !105)
+!135 = !DILocalVariable(name: "i", line: 232, scope: !132, file: !5, type: !8)
+!136 = !DILocalVariable(name: "j", line: 232, scope: !132, file: !5, type: !8)
+!137 = !DILocalVariable(name: "k", line: 232, scope: !132, file: !5, type: !8)
+!138 = !DILocalVariable(name: "m", line: 232, scope: !132, file: !5, type: !8)
+!139 = !DILocalVariable(name: "d", line: 232, scope: !132, file: !5, type: !8)
+!140 = !DILocalVariable(name: "xi", line: 233, scope: !132, file: !5, type: !20)
+!141 = !DILocalVariable(name: "eta", line: 233, scope: !132, file: !5, type: !20)
+!142 = !DILocalVariable(name: "zeta", line: 233, scope: !132, file: !5, type: !20)
+!143 = !DILocalVariable(name: "u_exact", line: 233, scope: !132, file: !5, type: !91)
+!144 = !DILocalVariable(name: "add", line: 233, scope: !132, file: !5, type: !20)
+!145 = distinct !DISubprogram(name: "exact_solution", line: 643, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 644, file: !1, scope: !5, type: !146, variables: !148)
 !146 = !DISubroutineType(types: !147)
 !147 = !{null, !20, !20, !20, !105}
 !148 = !{!149, !150, !151, !152, !153}
-!149 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "xi", line: 643, arg: 1, scope: !145, file: !5, type: !20)
-!150 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "eta", line: 643, arg: 2, scope: !145, file: !5, type: !20)
-!151 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "zeta", line: 643, arg: 3, scope: !145, file: !5, type: !20)
-!152 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dtemp", line: 644, arg: 4, scope: !145, file: !5, type: !105)
-!153 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 653, scope: !145, file: !5, type: !8)
-!154 = !DISubprogram(name: "set_constants", line: 2191, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2191, file: !1, scope: !5, type: !115, variables: !2)
-!155 = !DISubprogram(name: "lhsinit", line: 855, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 855, file: !1, scope: !5, type: !115, variables: !156)
+!149 = !DILocalVariable(name: "xi", line: 643, arg: 1, scope: !145, file: !5, type: !20)
+!150 = !DILocalVariable(name: "eta", line: 643, arg: 2, scope: !145, file: !5, type: !20)
+!151 = !DILocalVariable(name: "zeta", line: 643, arg: 3, scope: !145, file: !5, type: !20)
+!152 = !DILocalVariable(name: "dtemp", line: 644, arg: 4, scope: !145, file: !5, type: !105)
+!153 = !DILocalVariable(name: "m", line: 653, scope: !145, file: !5, type: !8)
+!154 = distinct !DISubprogram(name: "set_constants", line: 2191, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2191, file: !1, scope: !5, type: !115, variables: !2)
+!155 = distinct !DISubprogram(name: "lhsinit", line: 855, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 855, file: !1, scope: !5, type: !115, variables: !156)
 !156 = !{!157, !158, !159, !160, !161}
-!157 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 857, scope: !155, file: !5, type: !8)
-!158 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 857, scope: !155, file: !5, type: !8)
-!159 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 857, scope: !155, file: !5, type: !8)
-!160 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 857, scope: !155, file: !5, type: !8)
-!161 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 857, scope: !155, file: !5, type: !8)
-!162 = !DISubprogram(name: "initialize", line: 669, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 669, file: !1, scope: !5, type: !115, variables: !163)
+!157 = !DILocalVariable(name: "i", line: 857, scope: !155, file: !5, type: !8)
+!158 = !DILocalVariable(name: "j", line: 857, scope: !155, file: !5, type: !8)
+!159 = !DILocalVariable(name: "k", line: 857, scope: !155, file: !5, type: !8)
+!160 = !DILocalVariable(name: "m", line: 857, scope: !155, file: !5, type: !8)
+!161 = !DILocalVariable(name: "n", line: 857, scope: !155, file: !5, type: !8)
+!162 = distinct !DISubprogram(name: "initialize", line: 669, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 669, file: !1, scope: !5, type: !115, variables: !163)
 !163 = !{!164, !165, !166, !167, !168, !169, !170, !171, !172, !173, !174, !179, !180, !181, !182}
-!164 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 679, scope: !162, file: !5, type: !8)
-!165 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 679, scope: !162, file: !5, type: !8)
-!166 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 679, scope: !162, file: !5, type: !8)
-!167 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 679, scope: !162, file: !5, type: !8)
-!168 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ix", line: 679, scope: !162, file: !5, type: !8)
-!169 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iy", line: 679, scope: !162, file: !5, type: !8)
-!170 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "iz", line: 679, scope: !162, file: !5, type: !8)
-!171 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 680, scope: !162, file: !5, type: !20)
-!172 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 680, scope: !162, file: !5, type: !20)
-!173 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 680, scope: !162, file: !5, type: !20)
-!174 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pface", line: 680, scope: !162, file: !5, type: !175)
+!164 = !DILocalVariable(name: "i", line: 679, scope: !162, file: !5, type: !8)
+!165 = !DILocalVariable(name: "j", line: 679, scope: !162, file: !5, type: !8)
+!166 = !DILocalVariable(name: "k", line: 679, scope: !162, file: !5, type: !8)
+!167 = !DILocalVariable(name: "m", line: 679, scope: !162, file: !5, type: !8)
+!168 = !DILocalVariable(name: "ix", line: 679, scope: !162, file: !5, type: !8)
+!169 = !DILocalVariable(name: "iy", line: 679, scope: !162, file: !5, type: !8)
+!170 = !DILocalVariable(name: "iz", line: 679, scope: !162, file: !5, type: !8)
+!171 = !DILocalVariable(name: "xi", line: 680, scope: !162, file: !5, type: !20)
+!172 = !DILocalVariable(name: "eta", line: 680, scope: !162, file: !5, type: !20)
+!173 = !DILocalVariable(name: "zeta", line: 680, scope: !162, file: !5, type: !20)
+!174 = !DILocalVariable(name: "Pface", line: 680, scope: !162, file: !5, type: !175)
 !175 = !DICompositeType(tag: DW_TAG_array_type, size: 1920, align: 64, baseType: !20, elements: !176)
 !176 = !{!177, !178, !93}
 !177 = !DISubrange(count: 2)
 !178 = !DISubrange(count: 3)
-!179 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pxi", line: 680, scope: !162, file: !5, type: !20)
-!180 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Peta", line: 680, scope: !162, file: !5, type: !20)
-!181 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "Pzeta", line: 680, scope: !162, file: !5, type: !20)
-!182 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 680, scope: !162, file: !5, type: !91)
-!183 = !DISubprogram(name: "exact_rhs", line: 301, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 301, file: !1, scope: !5, type: !115, variables: !184)
+!179 = !DILocalVariable(name: "Pxi", line: 680, scope: !162, file: !5, type: !20)
+!180 = !DILocalVariable(name: "Peta", line: 680, scope: !162, file: !5, type: !20)
+!181 = !DILocalVariable(name: "Pzeta", line: 680, scope: !162, file: !5, type: !20)
+!182 = !DILocalVariable(name: "temp", line: 680, scope: !162, file: !5, type: !91)
+!183 = distinct !DISubprogram(name: "exact_rhs", line: 301, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 301, file: !1, scope: !5, type: !115, variables: !184)
 !184 = !{!185, !186, !187, !188, !189, !190, !191, !192, !193, !194, !195, !196, !197, !198, !199}
-!185 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtemp", line: 310, scope: !183, file: !5, type: !91)
-!186 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xi", line: 310, scope: !183, file: !5, type: !20)
-!187 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "eta", line: 310, scope: !183, file: !5, type: !20)
-!188 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "zeta", line: 310, scope: !183, file: !5, type: !20)
-!189 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "dtpp", line: 310, scope: !183, file: !5, type: !20)
-!190 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 311, scope: !183, file: !5, type: !8)
-!191 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 311, scope: !183, file: !5, type: !8)
-!192 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 311, scope: !183, file: !5, type: !8)
-!193 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 311, scope: !183, file: !5, type: !8)
-!194 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ip1", line: 311, scope: !183, file: !5, type: !8)
-!195 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "im1", line: 311, scope: !183, file: !5, type: !8)
-!196 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jp1", line: 311, scope: !183, file: !5, type: !8)
-!197 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jm1", line: 311, scope: !183, file: !5, type: !8)
-!198 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "km1", line: 311, scope: !183, file: !5, type: !8)
-!199 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "kp1", line: 311, scope: !183, file: !5, type: !8)
-!200 = !DISubprogram(name: "adi", line: 210, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 210, file: !1, scope: !5, type: !115, variables: !2)
-!201 = !DISubprogram(name: "add", line: 187, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 187, file: !1, scope: !5, type: !115, variables: !202)
+!185 = !DILocalVariable(name: "dtemp", line: 310, scope: !183, file: !5, type: !91)
+!186 = !DILocalVariable(name: "xi", line: 310, scope: !183, file: !5, type: !20)
+!187 = !DILocalVariable(name: "eta", line: 310, scope: !183, file: !5, type: !20)
+!188 = !DILocalVariable(name: "zeta", line: 310, scope: !183, file: !5, type: !20)
+!189 = !DILocalVariable(name: "dtpp", line: 310, scope: !183, file: !5, type: !20)
+!190 = !DILocalVariable(name: "m", line: 311, scope: !183, file: !5, type: !8)
+!191 = !DILocalVariable(name: "i", line: 311, scope: !183, file: !5, type: !8)
+!192 = !DILocalVariable(name: "j", line: 311, scope: !183, file: !5, type: !8)
+!193 = !DILocalVariable(name: "k", line: 311, scope: !183, file: !5, type: !8)
+!194 = !DILocalVariable(name: "ip1", line: 311, scope: !183, file: !5, type: !8)
+!195 = !DILocalVariable(name: "im1", line: 311, scope: !183, file: !5, type: !8)
+!196 = !DILocalVariable(name: "jp1", line: 311, scope: !183, file: !5, type: !8)
+!197 = !DILocalVariable(name: "jm1", line: 311, scope: !183, file: !5, type: !8)
+!198 = !DILocalVariable(name: "km1", line: 311, scope: !183, file: !5, type: !8)
+!199 = !DILocalVariable(name: "kp1", line: 311, scope: !183, file: !5, type: !8)
+!200 = distinct !DISubprogram(name: "adi", line: 210, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 210, file: !1, scope: !5, type: !115, variables: !2)
+!201 = distinct !DISubprogram(name: "add", line: 187, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 187, file: !1, scope: !5, type: !115, variables: !202)
 !202 = !{!203, !204, !205, !206}
-!203 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 193, scope: !201, file: !5, type: !8)
-!204 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 193, scope: !201, file: !5, type: !8)
-!205 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 193, scope: !201, file: !5, type: !8)
-!206 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 193, scope: !201, file: !5, type: !8)
-!207 = !DISubprogram(name: "z_solve", line: 3457, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3457, file: !1, scope: !5, type: !115, variables: !2)
-!208 = !DISubprogram(name: "z_backsubstitute", line: 3480, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3480, file: !1, scope: !5, type: !115, variables: !209)
+!203 = !DILocalVariable(name: "i", line: 193, scope: !201, file: !5, type: !8)
+!204 = !DILocalVariable(name: "j", line: 193, scope: !201, file: !5, type: !8)
+!205 = !DILocalVariable(name: "k", line: 193, scope: !201, file: !5, type: !8)
+!206 = !DILocalVariable(name: "m", line: 193, scope: !201, file: !5, type: !8)
+!207 = distinct !DISubprogram(name: "z_solve", line: 3457, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3457, file: !1, scope: !5, type: !115, variables: !2)
+!208 = distinct !DISubprogram(name: "z_backsubstitute", line: 3480, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3480, file: !1, scope: !5, type: !115, variables: !209)
 !209 = !{!210, !211, !212, !213, !214}
-!210 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3492, scope: !208, file: !5, type: !8)
-!211 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3492, scope: !208, file: !5, type: !8)
-!212 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3492, scope: !208, file: !5, type: !8)
-!213 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3492, scope: !208, file: !5, type: !8)
-!214 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3492, scope: !208, file: !5, type: !8)
-!215 = !DISubprogram(name: "z_solve_cell", line: 3512, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3512, file: !1, scope: !5, type: !115, variables: !216)
+!210 = !DILocalVariable(name: "i", line: 3492, scope: !208, file: !5, type: !8)
+!211 = !DILocalVariable(name: "j", line: 3492, scope: !208, file: !5, type: !8)
+!212 = !DILocalVariable(name: "k", line: 3492, scope: !208, file: !5, type: !8)
+!213 = !DILocalVariable(name: "m", line: 3492, scope: !208, file: !5, type: !8)
+!214 = !DILocalVariable(name: "n", line: 3492, scope: !208, file: !5, type: !8)
+!215 = distinct !DISubprogram(name: "z_solve_cell", line: 3512, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3512, file: !1, scope: !5, type: !115, variables: !216)
 !216 = !{!217, !218, !219, !220}
-!217 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3527, scope: !215, file: !5, type: !8)
-!218 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3527, scope: !215, file: !5, type: !8)
-!219 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3527, scope: !215, file: !5, type: !8)
-!220 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ksize", line: 3527, scope: !215, file: !5, type: !8)
-!221 = !DISubprogram(name: "binvrhs", line: 3154, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3154, file: !1, scope: !5, type: !222, variables: !225)
+!217 = !DILocalVariable(name: "i", line: 3527, scope: !215, file: !5, type: !8)
+!218 = !DILocalVariable(name: "j", line: 3527, scope: !215, file: !5, type: !8)
+!219 = !DILocalVariable(name: "k", line: 3527, scope: !215, file: !5, type: !8)
+!220 = !DILocalVariable(name: "ksize", line: 3527, scope: !215, file: !5, type: !8)
+!221 = distinct !DISubprogram(name: "binvrhs", line: 3154, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3154, file: !1, scope: !5, type: !222, variables: !225)
 !222 = !DISubroutineType(types: !223)
 !223 = !{null, !224, !105}
 !224 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !91)
 !225 = !{!226, !227, !228, !229}
-!226 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 3154, arg: 1, scope: !221, file: !5, type: !224)
-!227 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 3154, arg: 2, scope: !221, file: !5, type: !105)
-!228 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 3159, scope: !221, file: !5, type: !20)
-!229 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 3159, scope: !221, file: !5, type: !20)
-!230 = !DISubprogram(name: "matmul_sub", line: 2841, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2842, file: !1, scope: !5, type: !231, variables: !233)
+!226 = !DILocalVariable(name: "lhs", line: 3154, arg: 1, scope: !221, file: !5, type: !224)
+!227 = !DILocalVariable(name: "r", line: 3154, arg: 2, scope: !221, file: !5, type: !105)
+!228 = !DILocalVariable(name: "pivot", line: 3159, scope: !221, file: !5, type: !20)
+!229 = !DILocalVariable(name: "coeff", line: 3159, scope: !221, file: !5, type: !20)
+!230 = distinct !DISubprogram(name: "matmul_sub", line: 2841, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2842, file: !1, scope: !5, type: !231, variables: !233)
 !231 = !DISubroutineType(types: !232)
 !232 = !{null, !224, !224, !224}
 !233 = !{!234, !235, !236, !237}
-!234 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2841, arg: 1, scope: !230, file: !5, type: !224)
-!235 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bblock", line: 2841, arg: 2, scope: !230, file: !5, type: !224)
-!236 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "cblock", line: 2842, arg: 3, scope: !230, file: !5, type: !224)
-!237 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2851, scope: !230, file: !5, type: !8)
-!238 = !DISubprogram(name: "matvec_sub", line: 2814, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2814, file: !1, scope: !5, type: !239, variables: !241)
+!234 = !DILocalVariable(name: "ablock", line: 2841, arg: 1, scope: !230, file: !5, type: !224)
+!235 = !DILocalVariable(name: "bblock", line: 2841, arg: 2, scope: !230, file: !5, type: !224)
+!236 = !DILocalVariable(name: "cblock", line: 2842, arg: 3, scope: !230, file: !5, type: !224)
+!237 = !DILocalVariable(name: "j", line: 2851, scope: !230, file: !5, type: !8)
+!238 = distinct !DISubprogram(name: "matvec_sub", line: 2814, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2814, file: !1, scope: !5, type: !239, variables: !241)
 !239 = !DISubroutineType(types: !240)
 !240 = !{null, !224, !105, !105}
 !241 = !{!242, !243, !244, !245}
-!242 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ablock", line: 2814, arg: 1, scope: !238, file: !5, type: !224)
-!243 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "avec", line: 2814, arg: 2, scope: !238, file: !5, type: !105)
-!244 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bvec", line: 2814, arg: 3, scope: !238, file: !5, type: !105)
-!245 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2823, scope: !238, file: !5, type: !8)
-!246 = !DISubprogram(name: "binvcrhs", line: 2885, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2885, file: !1, scope: !5, type: !247, variables: !249)
+!242 = !DILocalVariable(name: "ablock", line: 2814, arg: 1, scope: !238, file: !5, type: !224)
+!243 = !DILocalVariable(name: "avec", line: 2814, arg: 2, scope: !238, file: !5, type: !105)
+!244 = !DILocalVariable(name: "bvec", line: 2814, arg: 3, scope: !238, file: !5, type: !105)
+!245 = !DILocalVariable(name: "i", line: 2823, scope: !238, file: !5, type: !8)
+!246 = distinct !DISubprogram(name: "binvcrhs", line: 2885, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2885, file: !1, scope: !5, type: !247, variables: !249)
 !247 = !DISubroutineType(types: !248)
 !248 = !{null, !224, !224, !105}
 !249 = !{!250, !251, !252, !253, !254}
-!250 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 2885, arg: 1, scope: !246, file: !5, type: !224)
-!251 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2885, arg: 2, scope: !246, file: !5, type: !224)
-!252 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 2885, arg: 3, scope: !246, file: !5, type: !105)
-!253 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pivot", line: 2890, scope: !246, file: !5, type: !20)
-!254 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "coeff", line: 2890, scope: !246, file: !5, type: !20)
-!255 = !DISubprogram(name: "lhsz", line: 1475, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1475, file: !1, scope: !5, type: !115, variables: !256)
+!250 = !DILocalVariable(name: "lhs", line: 2885, arg: 1, scope: !246, file: !5, type: !224)
+!251 = !DILocalVariable(name: "c", line: 2885, arg: 2, scope: !246, file: !5, type: !224)
+!252 = !DILocalVariable(name: "r", line: 2885, arg: 3, scope: !246, file: !5, type: !105)
+!253 = !DILocalVariable(name: "pivot", line: 2890, scope: !246, file: !5, type: !20)
+!254 = !DILocalVariable(name: "coeff", line: 2890, scope: !246, file: !5, type: !20)
+!255 = distinct !DISubprogram(name: "lhsz", line: 1475, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1475, file: !1, scope: !5, type: !115, variables: !256)
 !256 = !{!257, !258, !259}
-!257 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1484, scope: !255, file: !5, type: !8)
-!258 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1484, scope: !255, file: !5, type: !8)
-!259 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1484, scope: !255, file: !5, type: !8)
-!260 = !DISubprogram(name: "y_solve", line: 3299, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3299, file: !1, scope: !5, type: !115, variables: !2)
-!261 = !DISubprogram(name: "y_backsubstitute", line: 3323, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3323, file: !1, scope: !5, type: !115, variables: !262)
+!257 = !DILocalVariable(name: "i", line: 1484, scope: !255, file: !5, type: !8)
+!258 = !DILocalVariable(name: "j", line: 1484, scope: !255, file: !5, type: !8)
+!259 = !DILocalVariable(name: "k", line: 1484, scope: !255, file: !5, type: !8)
+!260 = distinct !DISubprogram(name: "y_solve", line: 3299, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3299, file: !1, scope: !5, type: !115, variables: !2)
+!261 = distinct !DISubprogram(name: "y_backsubstitute", line: 3323, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3323, file: !1, scope: !5, type: !115, variables: !262)
 !262 = !{!263, !264, !265, !266, !267}
-!263 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3335, scope: !261, file: !5, type: !8)
-!264 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3335, scope: !261, file: !5, type: !8)
-!265 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3335, scope: !261, file: !5, type: !8)
-!266 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 3335, scope: !261, file: !5, type: !8)
-!267 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 3335, scope: !261, file: !5, type: !8)
-!268 = !DISubprogram(name: "y_solve_cell", line: 3355, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3355, file: !1, scope: !5, type: !115, variables: !269)
+!263 = !DILocalVariable(name: "i", line: 3335, scope: !261, file: !5, type: !8)
+!264 = !DILocalVariable(name: "j", line: 3335, scope: !261, file: !5, type: !8)
+!265 = !DILocalVariable(name: "k", line: 3335, scope: !261, file: !5, type: !8)
+!266 = !DILocalVariable(name: "m", line: 3335, scope: !261, file: !5, type: !8)
+!267 = !DILocalVariable(name: "n", line: 3335, scope: !261, file: !5, type: !8)
+!268 = distinct !DISubprogram(name: "y_solve_cell", line: 3355, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3355, file: !1, scope: !5, type: !115, variables: !269)
 !269 = !{!270, !271, !272, !273}
-!270 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3370, scope: !268, file: !5, type: !8)
-!271 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 3370, scope: !268, file: !5, type: !8)
-!272 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3370, scope: !268, file: !5, type: !8)
-!273 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "jsize", line: 3370, scope: !268, file: !5, type: !8)
-!274 = !DISubprogram(name: "lhsy", line: 1181, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1181, file: !1, scope: !5, type: !115, variables: !275)
+!270 = !DILocalVariable(name: "i", line: 3370, scope: !268, file: !5, type: !8)
+!271 = !DILocalVariable(name: "j", line: 3370, scope: !268, file: !5, type: !8)
+!272 = !DILocalVariable(name: "k", line: 3370, scope: !268, file: !5, type: !8)
+!273 = !DILocalVariable(name: "jsize", line: 3370, scope: !268, file: !5, type: !8)
+!274 = distinct !DISubprogram(name: "lhsy", line: 1181, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1181, file: !1, scope: !5, type: !115, variables: !275)
 !275 = !{!276, !277, !278}
-!276 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 1190, scope: !274, file: !5, type: !8)
-!277 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 1190, scope: !274, file: !5, type: !8)
-!278 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 1190, scope: !274, file: !5, type: !8)
-!279 = !DISubprogram(name: "x_solve", line: 2658, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2658, file: !1, scope: !5, type: !115, variables: !2)
-!280 = !DISubprogram(name: "x_backsubstitute", line: 2684, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2684, file: !1, scope: !5, type: !115, variables: !281)
+!276 = !DILocalVariable(name: "i", line: 1190, scope: !274, file: !5, type: !8)
+!277 = !DILocalVariable(name: "j", line: 1190, scope: !274, file: !5, type: !8)
+!278 = !DILocalVariable(name: "k", line: 1190, scope: !274, file: !5, type: !8)
+!279 = distinct !DISubprogram(name: "x_solve", line: 2658, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2658, file: !1, scope: !5, type: !115, variables: !2)
+!280 = distinct !DISubprogram(name: "x_backsubstitute", line: 2684, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2684, file: !1, scope: !5, type: !115, variables: !281)
 !281 = !{!282, !283, !284, !285, !286}
-!282 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2696, scope: !280, file: !5, type: !8)
-!283 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2696, scope: !280, file: !5, type: !8)
-!284 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2696, scope: !280, file: !5, type: !8)
-!285 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 2696, scope: !280, file: !5, type: !8)
-!286 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "n", line: 2696, scope: !280, file: !5, type: !8)
-!287 = !DISubprogram(name: "x_solve_cell", line: 2716, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2716, file: !1, scope: !5, type: !115, variables: !288)
+!282 = !DILocalVariable(name: "i", line: 2696, scope: !280, file: !5, type: !8)
+!283 = !DILocalVariable(name: "j", line: 2696, scope: !280, file: !5, type: !8)
+!284 = !DILocalVariable(name: "k", line: 2696, scope: !280, file: !5, type: !8)
+!285 = !DILocalVariable(name: "m", line: 2696, scope: !280, file: !5, type: !8)
+!286 = !DILocalVariable(name: "n", line: 2696, scope: !280, file: !5, type: !8)
+!287 = distinct !DISubprogram(name: "x_solve_cell", line: 2716, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2716, file: !1, scope: !5, type: !115, variables: !288)
 !288 = !{!289, !290, !291, !292}
-!289 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2728, scope: !287, file: !5, type: !8)
-!290 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2728, scope: !287, file: !5, type: !8)
-!291 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 2728, scope: !287, file: !5, type: !8)
-!292 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "isize", line: 2728, scope: !287, file: !5, type: !8)
-!293 = !DISubprogram(name: "lhsx", line: 898, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 898, file: !1, scope: !5, type: !115, variables: !294)
+!289 = !DILocalVariable(name: "i", line: 2728, scope: !287, file: !5, type: !8)
+!290 = !DILocalVariable(name: "j", line: 2728, scope: !287, file: !5, type: !8)
+!291 = !DILocalVariable(name: "k", line: 2728, scope: !287, file: !5, type: !8)
+!292 = !DILocalVariable(name: "isize", line: 2728, scope: !287, file: !5, type: !8)
+!293 = distinct !DISubprogram(name: "lhsx", line: 898, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 898, file: !1, scope: !5, type: !115, variables: !294)
 !294 = !{!295, !296, !297}
-!295 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 907, scope: !293, file: !5, type: !8)
-!296 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 907, scope: !293, file: !5, type: !8)
-!297 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 907, scope: !293, file: !5, type: !8)
+!295 = !DILocalVariable(name: "i", line: 907, scope: !293, file: !5, type: !8)
+!296 = !DILocalVariable(name: "j", line: 907, scope: !293, file: !5, type: !8)
+!297 = !DILocalVariable(name: "k", line: 907, scope: !293, file: !5, type: !8)
 !298 = !{!299, !304, !305, !309, !310, !311, !312, !313, !314, !315, !316, !317, !318, !319, !320, !321, !322, !323, !324, !325, !326, !327, !328, !329, !330, !331, !332, !333, !334, !335, !336, !337, !338, !339, !340, !341, !342, !343, !347, !350, !351, !352, !353, !354, !355, !356, !360, !361, !362, !363, !364, !365, !366, !367, !368, !369, !370, !371, !372, !373, !374, !375, !376, !377, !378, !379, !380, !381, !382, !383, !384, !385, !386, !387, !388, !389, !390, !391, !392, !393, !394, !395, !396, !397, !398, !399, !400, !401, !402, !403, !404, !405, !406, !407, !408, !409, !410, !411, !412, !413, !414, !415, !416, !417, !418, !419, !422, !426, !427, !430, !431, !434, !435, !436, !437}
 !299 = !DIGlobalVariable(name: "grid_points", line: 28, isLocal: true, isDefinition: true, scope: null, file: !300, type: !302, variable: [3 x i32]* @grid_points)
 !300 = !DIFile(filename: "./header.h", directory: "/home/hfinkel/src/NPB2.3-omp-C/BT")
diff --git a/test/CodeGen/PowerPC/pr24546.ll b/test/CodeGen/PowerPC/pr24546.ll
index 3bb638af2343..06f6bc93da99 100644
--- a/test/CodeGen/PowerPC/pr24546.ll
+++ b/test/CodeGen/PowerPC/pr24546.ll
@@ -6,7 +6,7 @@
 @php_intpow10.powers = external unnamed_addr constant [23 x double], align 8
 
 ; Function Attrs: nounwind
-define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 {
+define double @_php_math_round(double %value, i32 signext %places, i32 signext %mode) #0 !dbg !6 {
 entry:
   br i1 undef, label %if.then, label %if.else, !dbg !32
 
@@ -62,23 +62,23 @@ attributes #3 = { nounwind }
 !3 = !{!4}
 !4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !5 = !{!6, !18}
-!6 = !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, function: double (double, i32, i32)* @_php_math_round, variables: !10)
+!6 = distinct !DISubprogram(name: "_php_math_round", scope: !1, file: !1, line: 15, type: !7, isLocal: false, isDefinition: true, scopeLine: 16, flags: DIFlagPrototyped, isOptimized: true, variables: !10)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!4, !4, !9, !9}
 !9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !{!11, !12, !13, !14, !15, !16, !17}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f1", scope: !6, file: !1, line: 17, type: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f2", scope: !6, file: !1, line: 17, type: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
-!18 = !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
+!11 = !DILocalVariable(name: "value", arg: 1, scope: !6, file: !1, line: 15, type: !4)
+!12 = !DILocalVariable(name: "places", arg: 2, scope: !6, file: !1, line: 15, type: !9)
+!13 = !DILocalVariable(name: "mode", arg: 3, scope: !6, file: !1, line: 15, type: !9)
+!14 = !DILocalVariable(name: "f1", scope: !6, file: !1, line: 17, type: !4)
+!15 = !DILocalVariable(name: "f2", scope: !6, file: !1, line: 17, type: !4)
+!16 = !DILocalVariable(name: "tmp_value", scope: !6, file: !1, line: 18, type: !4)
+!17 = !DILocalVariable(name: "precision_places", scope: !6, file: !1, line: 19, type: !9)
+!18 = distinct !DISubprogram(name: "php_intpow10", scope: !1, file: !1, line: 1, type: !19, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !21)
 !19 = !DISubroutineType(types: !20)
 !20 = !{!4, !9}
 !21 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
+!22 = !DILocalVariable(name: "power", arg: 1, scope: !18, file: !1, line: 1, type: !9)
 !23 = !{!24}
 !24 = !DIGlobalVariable(name: "powers", scope: !18, file: !1, line: 3, type: !25, isLocal: true, isDefinition: true, variable: [23 x double]* @php_intpow10.powers)
 !25 = !DICompositeType(tag: DW_TAG_array_type, baseType: !26, size: 1472, align: 64, elements: !27)
diff --git a/test/CodeGen/PowerPC/pr24636.ll b/test/CodeGen/PowerPC/pr24636.ll
new file mode 100644
index 000000000000..cc51dd38f9e2
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr24636.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+@c = external global i32, align 4
+@b = external global [1 x i32], align 4
+
+; Function Attrs: nounwind
+define void @fn2() #0 align 4 {
+  br i1 undef, label %.lr.ph, label %4
+
+; We used to crash because a bad DAGCombine was creating i32-typed SETCC nodes,
+; even when crbits are enabled.
+; CHECK-LABEL: @fn2
+; CHECK: blr
+
+.lr.ph:                                           ; preds = %0
+  br i1 undef, label %.lr.ph.split, label %.preheader
+
+.preheader:                                       ; preds = %.preheader, %.lr.ph
+  br i1 undef, label %.lr.ph.split, label %.preheader
+
+.lr.ph.split:                                     ; preds = %.preheader, %.lr.ph
+  br i1 undef, label %._crit_edge, label %.lr.ph.split.split
+
+.lr.ph.split.split:                               ; preds = %.lr.ph.split.split, %.lr.ph.split
+  %1 = phi i32 [ %2, %.lr.ph.split.split ], [ undef, %.lr.ph.split ]
+  %2 = and i32 %1, and (i32 and (i32 and (i32 and (i32 and (i32 and (i32 and (i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32)), i32 zext (i1 select (i1 icmp eq ([1 x i32]* bitcast (i32* @c to [1 x i32]*), [1 x i32]* @b), i1 true, i1 false) to i32))
+  %3 = icmp slt i32 undef, 4
+  br i1 %3, label %.lr.ph.split.split, label %._crit_edge
+
+._crit_edge:                                      ; preds = %.lr.ph.split.split, %.lr.ph.split
+  %.lcssa = phi i32 [ undef, %.lr.ph.split ], [ %2, %.lr.ph.split.split ]
+  br label %4
+
+; <label>:4                                       ; preds = %._crit_edge, %0
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="ppc64le" }
+
diff --git a/test/CodeGen/PowerPC/pr25157-peephole.ll b/test/CodeGen/PowerPC/pr25157-peephole.ll
new file mode 100644
index 000000000000..c5bd49b492cc
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25157-peephole.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Verify peephole simplification of splats and swaps.  Bugpoint-reduced
+; test from Eric Schweitz.
+
+%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625 = type <{ [28 x i8] }>
+%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626 = type <{ [64 x i8] }>
+
+@.BSS38 = external global %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, align 32
+@_main1_2_ = external global %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, section ".comm", align 16
+
+define void @aercalc_() {
+L.entry:
+  br i1 undef, label %L.LB38_2426, label %L.LB38_2911
+
+L.LB38_2911:
+  br i1 undef, label %L.LB38_2140, label %L.LB38_2640
+
+L.LB38_2640:
+  unreachable
+
+L.LB38_2426:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2920
+
+L.LB38_2920:
+  br i1 undef, label %L.LB38_2438, label %L.LB38_2921
+
+L.LB38_2921:
+  br label %L.LB38_2140
+
+L.LB38_2140:
+  ret void
+
+L.LB38_2438:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2935
+
+L.LB38_2935:
+  br i1 undef, label %L.LB38_2451, label %L.LB38_2936
+
+L.LB38_2936:
+  unreachable
+
+L.LB38_2451:
+  br i1 undef, label %L.LB38_2452, label %L.LB38_2937
+
+L.LB38_2937:
+  unreachable
+
+L.LB38_2452:
+  %0 = load float, float* bitcast (i8* getelementptr inbounds (%struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625, %struct.BSS38.51.4488.9911.14348.16813.20264.24701.28152.31603.35054.39491.44914.45407.46393.46886.47872.49351.49844.50830.51323.52309.53295.53788.54281.55267.55760.59211.61625* @.BSS38, i64 0, i32 0, i64 16) to float*), align 16
+  %1 = fpext float %0 to double
+  %2 = insertelement <2 x double> undef, double %1, i32 1
+  store <2 x double> %2, <2 x double>* bitcast (i8* getelementptr inbounds (%struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626, %struct_main1_2_.491.4928.10351.14788.17253.20704.25141.28592.32043.35494.39931.45354.45847.46833.47326.48312.49791.50284.51270.51763.52749.53735.54228.54721.55707.56200.59651.61626* @_main1_2_, i64 0, i32 0, i64 32) to <2 x double>*), align 16
+  unreachable
+}
+
+; CHECK-LABEL: @aercalc_
+; CHECK: lxsspx
+; CHECK: xxspltd
+; CHECK: stxvd2x
+; CHECK-NOT: xxswapd
diff --git a/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll b/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll
new file mode 100644
index 000000000000..cfec302d4690
--- /dev/null
+++ b/test/CodeGen/PowerPC/preincprep-nontrans-crash.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64-i64:64-n32:64"
+target triple = "powerpc64le-linux"
+
+%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144 = type <{ [220 x i8] }>
+
+@.BSS1 = external unnamed_addr global %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, align 32
+
+; Function Attrs: noinline nounwind
+define void @ety2_() #0 {
+
+; This test case used to crash because the preinc prep pass would assume that
+; if X-Y could be simplified to a constant, than so could Y-X. While not
+; desirable, we cannot actually make this guarantee.
+; CHECK-LABEL: @ety2_
+
+L.entry:
+  %0 = load i32, i32* undef, align 4
+  %1 = sext i32 %0 to i64
+  %2 = shl nsw i64 %1, 3
+  %3 = add nsw i64 %2, 8
+  br label %L.LB1_425
+
+L.LB1_425:                                        ; preds = %L.LB1_427, %L.entry
+  %4 = phi i64 [ %21, %L.LB1_427 ], [ undef, %L.entry ]
+  br i1 undef, label %L.LB1_427, label %L.LB1_816
+
+L.LB1_816:                                        ; preds = %L.LB1_425
+  switch i32 undef, label %L.LB1_432 [
+    i32 30, label %L.LB1_805
+    i32 10, label %L.LB1_451
+    i32 20, label %L.LB1_451
+  ]
+
+L.LB1_451:                                        ; preds = %L.LB1_816, %L.LB1_816
+  unreachable
+
+L.LB1_432:                                        ; preds = %L.LB1_816
+  %.in.31 = lshr i64 %4, 32
+  %5 = trunc i64 %.in.31 to i32
+  br i1 undef, label %L.LB1_769, label %L.LB1_455
+
+L.LB1_455:                                        ; preds = %L.LB1_432
+  unreachable
+
+L.LB1_769:                                        ; preds = %L.LB1_432
+  %6 = sext i32 %5 to i64
+  %7 = add nsw i64 %6, 2
+  %8 = add nsw i64 %6, -1
+  %9 = mul i64 %8, %1
+  %10 = add i64 %9, %7
+  %11 = shl i64 %10, 3
+  %12 = getelementptr i8, i8* undef, i64 %11
+  %13 = mul nsw i64 %6, %1
+  %14 = add i64 %7, %13
+  %15 = shl i64 %14, 3
+  %16 = getelementptr i8, i8* undef, i64 %15
+  br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662.prol:                                   ; preds = %L.LB1_662.prol, %L.LB1_769
+  %indvars.iv.next20.prol = add nuw nsw i64 undef, 1
+  br i1 undef, label %L.LB1_662, label %L.LB1_662.prol
+
+L.LB1_662:                                        ; preds = %L.LB1_437.2, %L.LB1_662.prol, %L.LB1_769
+  %indvars.iv19 = phi i64 [ %indvars.iv.next20.3, %L.LB1_437.2 ], [ 0, %L.LB1_769 ], [ %indvars.iv.next20.prol, %L.LB1_662.prol ]
+  %indvars.iv.next20 = add nuw nsw i64 %indvars.iv19, 1
+  %17 = mul i64 %indvars.iv.next20, %3
+  %18 = getelementptr i8, i8* %16, i64 %17
+  %19 = bitcast i8* %18 to double*
+  store double 0.000000e+00, double* %19, align 8
+  %indvars.iv.next20.1 = add nsw i64 %indvars.iv19, 2
+  %20 = mul i64 %indvars.iv.next20.1, %3
+  br i1 undef, label %L.LB1_437.2, label %L.LB1_824.2
+
+L.LB1_427:                                        ; preds = %L.LB1_425
+  %21 = load i64, i64* bitcast (i8* getelementptr inbounds (%struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144, %struct.BSS1.0.9.28.39.43.46.47.54.56.57.64.65.69.71.144* @.BSS1, i64 0, i32 0, i64 8) to i64*), align 8
+  br label %L.LB1_425
+
+L.LB1_805:                                        ; preds = %L.LB1_816
+  ret void
+
+L.LB1_824.2:                                      ; preds = %L.LB1_662
+  %22 = getelementptr i8, i8* %12, i64 %20
+  %23 = bitcast i8* %22 to double*
+  store double 0.000000e+00, double* %23, align 8
+  br label %L.LB1_437.2
+
+L.LB1_437.2:                                      ; preds = %L.LB1_824.2, %L.LB1_662
+  %indvars.iv.next20.3 = add nsw i64 %indvars.iv19, 4
+  br label %L.LB1_662
+}
+
+attributes #0 = { noinline nounwind }
+
diff --git a/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll b/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
new file mode 100644
index 000000000000..606c0551a56c
--- /dev/null
+++ b/test/CodeGen/PowerPC/qpx-unal-cons-lds.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %vector.body
+
+; CHECK-LABEL: @foo
+; Make sure that the offset constants we use are all even (only the last should be odd).
+; CHECK-DAG: li {{[0-9]+}}, 1056
+; CHECK-DAG: li {{[0-9]+}}, 1088
+; CHECK-DAG: li {{[0-9]+}}, 1152
+; CHECK-DAG: li {{[0-9]+}}, 1216
+; CHECK-DAG: li {{[0-9]+}}, 1280
+; CHECK-DAG: li {{[0-9]+}}, 1344
+; CHECK-DAG: li {{[0-9]+}}, 1408
+; CHECK-DAG: li {{[0-9]+}}, 1472
+; CHECK-DAG: li {{[0-9]+}}, 1536
+; CHECK-DAG: li {{[0-9]+}}, 1600
+; CHECK-DAG: li {{[0-9]+}}, 1568
+; CHECK-DAG: li {{[0-9]+}}, 1664
+; CHECK-DAG: li {{[0-9]+}}, 1632
+; CHECK-DAG: li {{[0-9]+}}, 1728
+; CHECK-DAG: li {{[0-9]+}}, 1696
+; CHECK-DAG: li {{[0-9]+}}, 1792
+; CHECK-DAG: li {{[0-9]+}}, 1760
+; CHECK-DAG: li {{[0-9]+}}, 1856
+; CHECK-DAG: li {{[0-9]+}}, 1824
+; CHECK-DAG: li {{[0-9]+}}, 1920
+; CHECK-DAG: li {{[0-9]+}}, 1888
+; CHECK-DAG: li {{[0-9]+}}, 1984
+; CHECK-DAG: li {{[0-9]+}}, 1952
+; CHECK-DAG: li {{[0-9]+}}, 2016
+; CHECK-DAG: li {{[0-9]+}}, 1024
+; CHECK-DAG: li {{[0-9]+}}, 1120
+; CHECK-DAG: li {{[0-9]+}}, 1184
+; CHECK-DAG: li {{[0-9]+}}, 1248
+; CHECK-DAG: li {{[0-9]+}}, 1312
+; CHECK-DAG: li {{[0-9]+}}, 1376
+; CHECK-DAG: li {{[0-9]+}}, 1440
+; CHECK-DAG: li {{[0-9]+}}, 1504
+; CHECK-DAG: li {{[0-9]+}}, 2047
+; CHECK: blr
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next.15, %vector.body ]
+  %0 = shl i64 %index, 1
+  %1 = getelementptr inbounds double, double* %b, i64 %0
+  %2 = bitcast double* %1 to <8 x double>*
+  %wide.vec = load <8 x double>, <8 x double>* %2, align 8
+  %strided.vec = shufflevector <8 x double> %wide.vec, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %3 = fadd <4 x double> %strided.vec, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %4 = getelementptr inbounds double, double* %a, i64 %index
+  %5 = bitcast double* %4 to <4 x double>*
+  store <4 x double> %3, <4 x double>* %5, align 8
+  %index.next = or i64 %index, 4
+  %6 = shl i64 %index.next, 1
+  %7 = getelementptr inbounds double, double* %b, i64 %6
+  %8 = bitcast double* %7 to <8 x double>*
+  %wide.vec.1 = load <8 x double>, <8 x double>* %8, align 8
+  %strided.vec.1 = shufflevector <8 x double> %wide.vec.1, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %9 = fadd <4 x double> %strided.vec.1, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %10 = getelementptr inbounds double, double* %a, i64 %index.next
+  %11 = bitcast double* %10 to <4 x double>*
+  store <4 x double> %9, <4 x double>* %11, align 8
+  %index.next.1 = or i64 %index, 8
+  %12 = shl i64 %index.next.1, 1
+  %13 = getelementptr inbounds double, double* %b, i64 %12
+  %14 = bitcast double* %13 to <8 x double>*
+  %wide.vec.2 = load <8 x double>, <8 x double>* %14, align 8
+  %strided.vec.2 = shufflevector <8 x double> %wide.vec.2, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %15 = fadd <4 x double> %strided.vec.2, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %16 = getelementptr inbounds double, double* %a, i64 %index.next.1
+  %17 = bitcast double* %16 to <4 x double>*
+  store <4 x double> %15, <4 x double>* %17, align 8
+  %index.next.2 = or i64 %index, 12
+  %18 = shl i64 %index.next.2, 1
+  %19 = getelementptr inbounds double, double* %b, i64 %18
+  %20 = bitcast double* %19 to <8 x double>*
+  %wide.vec.3 = load <8 x double>, <8 x double>* %20, align 8
+  %strided.vec.3 = shufflevector <8 x double> %wide.vec.3, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %21 = fadd <4 x double> %strided.vec.3, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %22 = getelementptr inbounds double, double* %a, i64 %index.next.2
+  %23 = bitcast double* %22 to <4 x double>*
+  store <4 x double> %21, <4 x double>* %23, align 8
+  %index.next.3 = or i64 %index, 16
+  %24 = shl i64 %index.next.3, 1
+  %25 = getelementptr inbounds double, double* %b, i64 %24
+  %26 = bitcast double* %25 to <8 x double>*
+  %wide.vec.4 = load <8 x double>, <8 x double>* %26, align 8
+  %strided.vec.4 = shufflevector <8 x double> %wide.vec.4, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %27 = fadd <4 x double> %strided.vec.4, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %28 = getelementptr inbounds double, double* %a, i64 %index.next.3
+  %29 = bitcast double* %28 to <4 x double>*
+  store <4 x double> %27, <4 x double>* %29, align 8
+  %index.next.4 = or i64 %index, 20
+  %30 = shl i64 %index.next.4, 1
+  %31 = getelementptr inbounds double, double* %b, i64 %30
+  %32 = bitcast double* %31 to <8 x double>*
+  %wide.vec.5 = load <8 x double>, <8 x double>* %32, align 8
+  %strided.vec.5 = shufflevector <8 x double> %wide.vec.5, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %33 = fadd <4 x double> %strided.vec.5, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %34 = getelementptr inbounds double, double* %a, i64 %index.next.4
+  %35 = bitcast double* %34 to <4 x double>*
+  store <4 x double> %33, <4 x double>* %35, align 8
+  %index.next.5 = or i64 %index, 24
+  %36 = shl i64 %index.next.5, 1
+  %37 = getelementptr inbounds double, double* %b, i64 %36
+  %38 = bitcast double* %37 to <8 x double>*
+  %wide.vec.6 = load <8 x double>, <8 x double>* %38, align 8
+  %strided.vec.6 = shufflevector <8 x double> %wide.vec.6, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %39 = fadd <4 x double> %strided.vec.6, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %40 = getelementptr inbounds double, double* %a, i64 %index.next.5
+  %41 = bitcast double* %40 to <4 x double>*
+  store <4 x double> %39, <4 x double>* %41, align 8
+  %index.next.6 = or i64 %index, 28
+  %42 = shl i64 %index.next.6, 1
+  %43 = getelementptr inbounds double, double* %b, i64 %42
+  %44 = bitcast double* %43 to <8 x double>*
+  %wide.vec.7 = load <8 x double>, <8 x double>* %44, align 8
+  %strided.vec.7 = shufflevector <8 x double> %wide.vec.7, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %45 = fadd <4 x double> %strided.vec.7, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %46 = getelementptr inbounds double, double* %a, i64 %index.next.6
+  %47 = bitcast double* %46 to <4 x double>*
+  store <4 x double> %45, <4 x double>* %47, align 8
+  %index.next.7 = or i64 %index, 32
+  %48 = shl i64 %index.next.7, 1
+  %49 = getelementptr inbounds double, double* %b, i64 %48
+  %50 = bitcast double* %49 to <8 x double>*
+  %wide.vec.8 = load <8 x double>, <8 x double>* %50, align 8
+  %strided.vec.8 = shufflevector <8 x double> %wide.vec.8, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %51 = fadd <4 x double> %strided.vec.8, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %52 = getelementptr inbounds double, double* %a, i64 %index.next.7
+  %53 = bitcast double* %52 to <4 x double>*
+  store <4 x double> %51, <4 x double>* %53, align 8
+  %index.next.8 = or i64 %index, 36
+  %54 = shl i64 %index.next.8, 1
+  %55 = getelementptr inbounds double, double* %b, i64 %54
+  %56 = bitcast double* %55 to <8 x double>*
+  %wide.vec.9 = load <8 x double>, <8 x double>* %56, align 8
+  %strided.vec.9 = shufflevector <8 x double> %wide.vec.9, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %57 = fadd <4 x double> %strided.vec.9, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %58 = getelementptr inbounds double, double* %a, i64 %index.next.8
+  %59 = bitcast double* %58 to <4 x double>*
+  store <4 x double> %57, <4 x double>* %59, align 8
+  %index.next.9 = or i64 %index, 40
+  %60 = shl i64 %index.next.9, 1
+  %61 = getelementptr inbounds double, double* %b, i64 %60
+  %62 = bitcast double* %61 to <8 x double>*
+  %wide.vec.10 = load <8 x double>, <8 x double>* %62, align 8
+  %strided.vec.10 = shufflevector <8 x double> %wide.vec.10, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %63 = fadd <4 x double> %strided.vec.10, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %64 = getelementptr inbounds double, double* %a, i64 %index.next.9
+  %65 = bitcast double* %64 to <4 x double>*
+  store <4 x double> %63, <4 x double>* %65, align 8
+  %index.next.10 = or i64 %index, 44
+  %66 = shl i64 %index.next.10, 1
+  %67 = getelementptr inbounds double, double* %b, i64 %66
+  %68 = bitcast double* %67 to <8 x double>*
+  %wide.vec.11 = load <8 x double>, <8 x double>* %68, align 8
+  %strided.vec.11 = shufflevector <8 x double> %wide.vec.11, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %69 = fadd <4 x double> %strided.vec.11, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %70 = getelementptr inbounds double, double* %a, i64 %index.next.10
+  %71 = bitcast double* %70 to <4 x double>*
+  store <4 x double> %69, <4 x double>* %71, align 8
+  %index.next.11 = or i64 %index, 48
+  %72 = shl i64 %index.next.11, 1
+  %73 = getelementptr inbounds double, double* %b, i64 %72
+  %74 = bitcast double* %73 to <8 x double>*
+  %wide.vec.12 = load <8 x double>, <8 x double>* %74, align 8
+  %strided.vec.12 = shufflevector <8 x double> %wide.vec.12, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %75 = fadd <4 x double> %strided.vec.12, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %76 = getelementptr inbounds double, double* %a, i64 %index.next.11
+  %77 = bitcast double* %76 to <4 x double>*
+  store <4 x double> %75, <4 x double>* %77, align 8
+  %index.next.12 = or i64 %index, 52
+  %78 = shl i64 %index.next.12, 1
+  %79 = getelementptr inbounds double, double* %b, i64 %78
+  %80 = bitcast double* %79 to <8 x double>*
+  %wide.vec.13 = load <8 x double>, <8 x double>* %80, align 8
+  %strided.vec.13 = shufflevector <8 x double> %wide.vec.13, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %81 = fadd <4 x double> %strided.vec.13, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %82 = getelementptr inbounds double, double* %a, i64 %index.next.12
+  %83 = bitcast double* %82 to <4 x double>*
+  store <4 x double> %81, <4 x double>* %83, align 8
+  %index.next.13 = or i64 %index, 56
+  %84 = shl i64 %index.next.13, 1
+  %85 = getelementptr inbounds double, double* %b, i64 %84
+  %86 = bitcast double* %85 to <8 x double>*
+  %wide.vec.14 = load <8 x double>, <8 x double>* %86, align 8
+  %strided.vec.14 = shufflevector <8 x double> %wide.vec.14, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %87 = fadd <4 x double> %strided.vec.14, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %88 = getelementptr inbounds double, double* %a, i64 %index.next.13
+  %89 = bitcast double* %88 to <4 x double>*
+  store <4 x double> %87, <4 x double>* %89, align 8
+  %index.next.14 = or i64 %index, 60
+  %90 = shl i64 %index.next.14, 1
+  %91 = getelementptr inbounds double, double* %b, i64 %90
+  %92 = bitcast double* %91 to <8 x double>*
+  %wide.vec.15 = load <8 x double>, <8 x double>* %92, align 8
+  %strided.vec.15 = shufflevector <8 x double> %wide.vec.15, <8 x double> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %93 = fadd <4 x double> %strided.vec.15, <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  %94 = getelementptr inbounds double, double* %a, i64 %index.next.14
+  %95 = bitcast double* %94 to <4 x double>*
+  store <4 x double> %93, <4 x double>* %95, align 8
+  %index.next.15 = add nsw i64 %index, 64
+  %96 = icmp eq i64 %index.next.15, 1600
+  br i1 %96, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/test/CodeGen/PowerPC/retaddr2.ll b/test/CodeGen/PowerPC/retaddr2.ll
index 8581f6cb9a38..1038cd03d154 100644
--- a/test/CodeGen/PowerPC/retaddr2.ll
+++ b/test/CodeGen/PowerPC/retaddr2.ll
@@ -10,11 +10,11 @@ entry:
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: mflr 0
+; CHECK: mflr {{[0-9]+}}
 ; CHECK: std 0, 16(1)
 ; CHECK-DAG: ld 3, 64(1)
-; CHECK-DAG: ld 0, 16(1)
-; CHECK: mtlr 0
+; CHECK-DAG: ld [[SR:[0-9]+]], 16(1)
+; CHECK: mtlr [[SR]]
 ; CHECK: blr
 
 ; Function Attrs: nounwind readnone
diff --git a/test/CodeGen/PowerPC/rm-zext.ll b/test/CodeGen/PowerPC/rm-zext.ll
index 97c546c0145f..df5fe4f7a154 100644
--- a/test/CodeGen/PowerPC/rm-zext.ll
+++ b/test/CodeGen/PowerPC/rm-zext.ll
@@ -9,7 +9,7 @@ entry:
   %shr2 = lshr i32 %mul, 5
   ret i32 %shr2
 
-; CHECK-LABEL @foo
+; CHECK-LABEL: @foo
 ; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
 ; CHECK: blr
 }
@@ -23,7 +23,7 @@ entry:
   %or = or i32 %shr, %shl
   ret i32 %or
 
-; CHECK-LABEL @test6
+; CHECK-LABEL: @test6
 ; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
 ; CHECK: blr
 }
@@ -34,7 +34,7 @@ entry:
   %cond = select i1 %cmp, i32 %a, i32 %b
   ret i32 %cond
 
-; CHECK-LABEL @min
+; CHECK-LABEL: @min
 ; CHECK-NOT: rldicl 3, {{[0-9]+}}, 0, 32
 ; CHECK: blr
 }
diff --git a/test/CodeGen/PowerPC/rotl-rotr-crash.ll b/test/CodeGen/PowerPC/rotl-rotr-crash.ll
new file mode 100644
index 000000000000..3fbb67ecf25e
--- /dev/null
+++ b/test/CodeGen/PowerPC/rotl-rotr-crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8
+
+; Ensure this does not crash
+
+; Function Attrs: norecurse nounwind
+define <4 x i32> @func1 (<4 x i32> %a) {
+entry:
+  %0 = lshr <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
+  %1 = shl <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16>
+  %2 = or <4 x i32> %1, %0
+  ret <4 x i32> %2
+}
diff --git a/test/CodeGen/PowerPC/sdiv-pow2.ll b/test/CodeGen/PowerPC/sdiv-pow2.ll
index 5ec019dfb4af..d1f60da6c740 100644
--- a/test/CodeGen/PowerPC/sdiv-pow2.ll
+++ b/test/CodeGen/PowerPC/sdiv-pow2.ll
@@ -9,7 +9,7 @@ entry:
   %div = sdiv i32 %a, 8
   ret i32 %div
 
-; CHECK-LABEL @foo4
+; CHECK-LABEL: @foo4
 ; CHECK: srawi [[REG1:[0-9]+]], 3, 3
 ; CHECK: addze [[REG2:[0-9]+]], [[REG1]]
 ; CHECK: extsw 3, [[REG2]]
@@ -22,12 +22,12 @@ entry:
   %div = sdiv i64 %a, 8
   ret i64 %div
 
-; CHECK-LABEL @foo8
+; CHECK-LABEL: @foo8
 ; CHECK: sradi [[REG1:[0-9]+]], 3, 3
 ; CHECK: addze 3, [[REG1]]
 ; CHECK: blr
 
-; CHECK-32-LABEL @foo8
+; CHECK-32-LABEL: @foo8
 ; CHECK-32-NOT: sradi
 ; CHECK-32: blr
 }
@@ -58,7 +58,7 @@ entry:
 ; CHECK: neg 3, [[REG2]]
 ; CHECK: blr
 
-; CHECK-32-LABEL @foo8n
+; CHECK-32-LABEL: @foo8n
 ; CHECK-32-NOT: sradi
 ; CHECK-32: blr
 }
diff --git a/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll b/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
new file mode 100644
index 000000000000..79dccaa98ca1
--- /dev/null
+++ b/test/CodeGen/PowerPC/selectiondag-extload-computeknownbits.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=powerpc64-bgq-linux < %s
+
+; Check that llc does not crash due to an illegal APInt operation
+
+define i1 @f(i8* %ptr) {
+ entry:
+  %val = load i8, i8* %ptr, align 8, !range !0
+  %tobool = icmp eq i8 %val, 0
+  ret i1 %tobool
+}
+
+!0 = !{i8 0, i8 2}
diff --git a/test/CodeGen/PowerPC/seteq-0.ll b/test/CodeGen/PowerPC/seteq-0.ll
index 4afb8fee1776..b7dd78085eb1 100644
--- a/test/CodeGen/PowerPC/seteq-0.ll
+++ b/test/CodeGen/PowerPC/seteq-0.ll
@@ -5,7 +5,7 @@ define i32 @eq0(i32 %a) {
         %tmp.2 = zext i1 %tmp.1 to i32          ; <i32> [#uses=1]
         ret i32 %tmp.2
 
-; CHECK: cntlz [[REG:r[0-9]+]], r3
+; CHECK: cntlzw [[REG:r[0-9]+]], r3
 ; CHECK: rlwinm r3, [[REG]], 27, 31, 31
 ; CHECK: blr
 }
diff --git a/test/CodeGen/PowerPC/sjlj.ll b/test/CodeGen/PowerPC/sjlj.ll
index dcbdd69d5d50..8c6682ca706e 100644
--- a/test/CodeGen/PowerPC/sjlj.ll
+++ b/test/CodeGen/PowerPC/sjlj.ll
@@ -74,24 +74,24 @@ return:                                           ; preds = %if.end, %if.then
 ; CHECK-DAG: std [[REGA]], [[OFF:[0-9]+]](31)                  # 8-byte Folded Spill
 ; CHECK-DAG: std 1, 16([[REGA]])
 ; CHECK-DAG: std 2, 24([[REGA]])
-; CHECK: bcl 20, 31, .LBB1_1
+; CHECK: bcl 20, 31, .LBB1_5
 ; CHECK: li 3, 1
-; CHECK: #EH_SjLj_Setup	.LBB1_1
-; CHECK: b .LBB1_2
+; CHECK: #EH_SjLj_Setup	.LBB1_5
+; CHECK: b .LBB1_1
 
-; CHECK: .LBB1_1:
-; CHECK: mflr [[REGL:[0-9]+]]
-; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
-; CHECK: std [[REGL]], 8([[REG2]])
-; CHECK: li 3, 0
-
-; CHECK: .LBB1_2:
+; CHECK: .LBB1_4:
 
 ; CHECK: lfd
 ; CHECK: lvx
 ; CHECK: ld
 ; CHECK: blr
 
+; CHECK: .LBB1_5:
+; CHECK: mflr [[REGL:[0-9]+]]
+; CHECK: ld [[REG2:[0-9]+]], [[OFF]](31)                   # 8-byte Folded Reload
+; CHECK: std [[REGL]], 8([[REG2]])
+; CHECK: li 3, 0
+
 ; CHECK-NOAV: @main
 ; CHECK-NOAV-NOT: stvx
 ; CHECK-NOAV: bcl
diff --git a/test/CodeGen/PowerPC/stack-realign.ll b/test/CodeGen/PowerPC/stack-realign.ll
index e91b563af72e..1c93d665c16d 100644
--- a/test/CodeGen/PowerPC/stack-realign.ll
+++ b/test/CodeGen/PowerPC/stack-realign.ll
@@ -29,7 +29,7 @@ entry:
 
 ; CHECK-LABEL: @goo
 
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
 ; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
 ; CHECK-DAG: std 30, -16(1)
 ; CHECK-DAG: mr 30, 1
@@ -44,14 +44,14 @@ entry:
 ; CHECK: std 3, 48(30)
 
 ; CHECK: ld 1, 0(1)
-; CHECK-DAG: ld 0, 16(1)
+; CHECK-DAG: ld [[SR:[0-9]+]], 16(1)
 ; CHECK-DAG: ld 30, -16(1)
-; CHECK-DAG: mtlr 0
+; CHECK-DAG: mtlr [[SR]]
 ; CHECK: blr
 
 ; CHECK-FP-LABEL: @goo
 
-; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: mflr {{[0-9]+}}
 ; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
 ; CHECK-FP-DAG: std 31, -8(1)
 ; CHECK-FP-DAG: std 30, -16(1)
@@ -70,14 +70,14 @@ entry:
 ; CHECK-FP: std 3, 48(30)
 
 ; CHECK-FP: ld 1, 0(1)
-; CHECK-FP-DAG: ld 0, 16(1)
+; CHECK-FP-DAG: ld [[SR:[0-9]+]], 16(1)
 ; CHECK-FP-DAG: ld 31, -8(1)
 ; CHECK-FP-DAG: ld 30, -16(1)
-; CHECK-FP-DAG: mtlr 0
+; CHECK-FP-DAG: mtlr [[SR]]
 ; CHECK-FP: blr
 
 ; CHECK-32-LABEL: @goo
-; CHECK-32-DAG: mflr 0
+; CHECK-32-DAG: mflr {{[0-9]+}}
 ; CHECK-32-DAG: clrlwi [[REG:[0-9]+]], 1, 27
 ; CHECK-32-DAG: stw 30, -8(1)
 ; CHECK-32-DAG: mr 30, 1
@@ -86,7 +86,7 @@ entry:
 ; CHECK-32: stwux 1, 1, 0
 
 ; CHECK-32-PIC-LABEL: @goo
-; CHECK-32-PIC-DAG: mflr 0
+; CHECK-32-PIC-DAG: mflr {{[0-9]+}}
 ; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
 ; CHECK-32-PIC-DAG: stw 29, -12(1)
 ; CHECK-32-PIC-DAG: mr 29, 1
@@ -114,7 +114,7 @@ entry:
 
 ; CHECK-DAG: lis [[REG1:[0-9]+]], -13
 ; CHECK-DAG: clrldi [[REG3:[0-9]+]], 1, 59
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
 ; CHECK-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51808
 ; CHECK-DAG: std 30, -16(1)
 ; CHECK-DAG: mr 30, 1
@@ -130,7 +130,7 @@ entry:
 
 ; CHECK-32-DAG: lis [[REG1:[0-9]+]], -13
 ; CHECK-32-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
-; CHECK-32-DAG: mflr 0
+; CHECK-32-DAG: mflr {{[0-9]+}}
 ; CHECK-32-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
 ; CHECK-32-DAG: stw 30, -8(1)
 ; CHECK-32-DAG: mr 30, 1
@@ -144,7 +144,7 @@ entry:
 
 ; CHECK-32-PIC-DAG: lis [[REG1:[0-9]+]], -13
 ; CHECK-32-PIC-DAG: clrlwi [[REG3:[0-9]+]], 1, 27
-; CHECK-32-PIC-DAG: mflr 0
+; CHECK-32-PIC-DAG: mflr {{[0-9]+}}
 ; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
 ; CHECK-32-PIC-DAG: stw 29, -12(1)
 ; CHECK-32-PIC-DAG: mr 29, 1
@@ -174,7 +174,7 @@ entry:
 
 ; CHECK-LABEL: @loo
 
-; CHECK-DAG: mflr 0
+; CHECK-DAG: mflr {{[0-9]+}}
 ; CHECK-DAG: clrldi [[REG:[0-9]+]], 1, 59
 ; CHECK-DAG: std 30, -32(1)
 ; CHECK-DAG: mr 30, 1
@@ -190,7 +190,7 @@ entry:
 
 ; CHECK-FP-LABEL: @loo
 
-; CHECK-FP-DAG: mflr 0
+; CHECK-FP-DAG: mflr {{[0-9]+}}
 ; CHECK-FP-DAG: clrldi [[REG:[0-9]+]], 1, 59
 ; CHECK-FP-DAG: std 31, -24(1)
 ; CHECK-FP-DAG: std 30, -32(1)
diff --git a/test/CodeGen/PowerPC/stackmap-frame-setup.ll b/test/CodeGen/PowerPC/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..487da00faa1c
--- /dev/null
+++ b/test/CodeGen/PowerPC/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=powerpc64-unknown-gnu-linux -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+; ISEL:      ADJCALLSTACKDOWN 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL:      ADJCALLSTACKDOWN 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP 0, 0, implicit-def
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/PowerPC/swaps-le-5.ll b/test/CodeGen/PowerPC/swaps-le-5.ll
index 5cd739a0efa9..3e13bd16c23b 100644
--- a/test/CodeGen/PowerPC/swaps-le-5.ll
+++ b/test/CodeGen/PowerPC/swaps-le-5.ll
@@ -15,11 +15,11 @@ entry:
 }
 
 ; CHECK-LABEL: @bar0
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
 ; CHECK: xxpermdi [[REG3:[0-9]+]], [[REG2]], [[REG1]], 1
 ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
 
 define void @bar1(double %y) {
 entry:
@@ -30,11 +30,11 @@ entry:
 }
 
 ; CHECK-LABEL: @bar1
-; CHECK-DAG: xxswapd {{[0-9]+}}, 1
 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
 ; CHECK-DAG: xxspltd [[REG2:[0-9]+]]
 ; CHECK: xxmrghd [[REG3:[0-9]+]], [[REG1]], [[REG2]]
 ; CHECK: stxvd2x [[REG3]]
+; CHECK-NOT: xxswapd
 
 define void @baz0() {
 entry:
diff --git a/test/CodeGen/PowerPC/swaps-le-6.ll b/test/CodeGen/PowerPC/swaps-le-6.ll
new file mode 100644
index 000000000000..df88322e4fd8
--- /dev/null
+++ b/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; These tests verify that VSX swap optimization works when loading a scalar
+; into a vector register.
+
+
+@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
+@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
+@y = global double 1.780000e+00, align 8
+
+define void @bar0() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @x, align 16
+  %1 = load double, double* @y, align 8
+  %vecins = insertelement <2 x double> %0, double %1, i32 0
+  store <2 x double> %vecins, <2 x double>* @z, align 16
+  ret void
+}
+
+; CHECK-LABEL: @bar0
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
+; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK: stxvd2x [[REG5]]
+
+define void @bar1() {
+entry:
+  %0 = load <2 x double>, <2 x double>* @x, align 16
+  %1 = load double, double* @y, align 8
+  %vecins = insertelement <2 x double> %0, double %1, i32 1
+  store <2 x double> %vecins, <2 x double>* @z, align 16
+  ret void
+}
+
+; CHECK-LABEL: @bar1
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0
+; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK: stxvd2x [[REG5]]
+
diff --git a/test/CodeGen/PowerPC/unal-vec-ldst.ll b/test/CodeGen/PowerPC/unal-vec-ldst.ll
new file mode 100644
index 000000000000..260e7f6115f9
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-vec-ldst.ll
@@ -0,0 +1,580 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+  %r = load <16 x i8>, <16 x i8>* %p, align 1
+  ret <16 x i8> %r
+
+; CHECK-LABEL: @test_l_v16i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <32 x i8> @test_l_v32i8(<32 x i8>* %p) #0 {
+entry:
+  %r = load <32 x i8>, <32 x i8>* %p, align 1
+  ret <32 x i8> %r
+
+; CHECK-LABEL: @test_l_v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <8 x i16> @test_l_v8i16(<8 x i16>* %p) #0 {
+entry:
+  %r = load <8 x i16>, <8 x i16>* %p, align 2
+  ret <8 x i16> %r
+
+; CHECK-LABEL: @test_l_v8i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <16 x i16> @test_l_v16i16(<16 x i16>* %p) #0 {
+entry:
+  %r = load <16 x i16>, <16 x i16>* %p, align 2
+  ret <16 x i16> %r
+
+; CHECK-LABEL: @test_l_v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <4 x i32> @test_l_v4i32(<4 x i32>* %p) #0 {
+entry:
+  %r = load <4 x i32>, <4 x i32>* %p, align 4
+  ret <4 x i32> %r
+
+; CHECK-LABEL: @test_l_v4i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <8 x i32> @test_l_v8i32(<8 x i32>* %p) #0 {
+entry:
+  %r = load <8 x i32>, <8 x i32>* %p, align 4
+  ret <8 x i32> %r
+
+; CHECK-LABEL: @test_l_v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <2 x i64> @test_l_v2i64(<2 x i64>* %p) #0 {
+entry:
+  %r = load <2 x i64>, <2 x i64>* %p, align 8
+  ret <2 x i64> %r
+
+; CHECK-LABEL: @test_l_v2i64
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i64> @test_l_v4i64(<4 x i64>* %p) #0 {
+entry:
+  %r = load <4 x i64>, <4 x i64>* %p, align 8
+  ret <4 x i64> %r
+
+; CHECK-LABEL: @test_l_v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_v4float(<4 x float>* %p) #0 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_v4float
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: lvsl [[REG2:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 0, 3
+; CHECK: vperm 2, [[REG4]], [[REG3]], [[REG2]]
+; CHECK: blr
+}
+
+define <8 x float> @test_l_v8float(<8 x float>* %p) #0 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: lvsl [[REG3:[0-9]+]], 0, 3
+; CHECK-DAG: lvx [[REG4:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: lvx [[REG5:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: lvx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: vperm 3, [[REG5]], [[REG4]], [[REG3]]
+; CHECK-DAG: vperm 2, [[REG6]], [[REG5]], [[REG3]]
+; CHECK: blr
+}
+
+define <2 x double> @test_l_v2double(<2 x double>* %p) #0 {
+entry:
+  %r = load <2 x double>, <2 x double>* %p, align 8
+  ret <2 x double> %r
+
+; CHECK-LABEL: @test_l_v2double
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x double> @test_l_v4double(<4 x double>* %p) #0 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <16 x i8> @test_l_p8v16i8(<16 x i8>* %p) #2 {
+entry:
+  %r = load <16 x i8>, <16 x i8>* %p, align 1
+  ret <16 x i8> %r
+
+; CHECK-LABEL: @test_l_p8v16i8
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <32 x i8> @test_l_p8v32i8(<32 x i8>* %p) #2 {
+entry:
+  %r = load <32 x i8>, <32 x i8>* %p, align 1
+  ret <32 x i8> %r
+
+; CHECK-LABEL: @test_l_p8v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <8 x i16> @test_l_p8v8i16(<8 x i16>* %p) #2 {
+entry:
+  %r = load <8 x i16>, <8 x i16>* %p, align 2
+  ret <8 x i16> %r
+
+; CHECK-LABEL: @test_l_p8v8i16
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <16 x i16> @test_l_p8v16i16(<16 x i16>* %p) #2 {
+entry:
+  %r = load <16 x i16>, <16 x i16>* %p, align 2
+  ret <16 x i16> %r
+
+; CHECK-LABEL: @test_l_p8v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x i32> @test_l_p8v4i32(<4 x i32>* %p) #2 {
+entry:
+  %r = load <4 x i32>, <4 x i32>* %p, align 4
+  ret <4 x i32> %r
+
+; CHECK-LABEL: @test_l_p8v4i32
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <8 x i32> @test_l_p8v8i32(<8 x i32>* %p) #2 {
+entry:
+  %r = load <8 x i32>, <8 x i32>* %p, align 4
+  ret <8 x i32> %r
+
+; CHECK-LABEL: @test_l_p8v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <2 x i64> @test_l_p8v2i64(<2 x i64>* %p) #2 {
+entry:
+  %r = load <2 x i64>, <2 x i64>* %p, align 8
+  ret <2 x i64> %r
+
+; CHECK-LABEL: @test_l_p8v2i64
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x i64> @test_l_p8v4i64(<4 x i64>* %p) #2 {
+entry:
+  %r = load <4 x i64>, <4 x i64>* %p, align 8
+  ret <4 x i64> %r
+
+; CHECK-LABEL: @test_l_p8v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_p8v4float(<4 x float>* %p) #2 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_p8v4float
+; CHECK: lxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define <8 x float> @test_l_p8v8float(<8 x float>* %p) #2 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_p8v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvw4x 34, 0, 3
+; CHECK-DAG: lxvw4x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <2 x double> @test_l_p8v2double(<2 x double>* %p) #2 {
+entry:
+  %r = load <2 x double>, <2 x double>* %p, align 8
+  ret <2 x double> %r
+
+; CHECK-LABEL: @test_l_p8v2double
+; CHECK: lxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define <4 x double> @test_l_p8v4double(<4 x double>* %p) #2 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_p8v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: lxvd2x 34, 0, 3
+; CHECK-DAG: lxvd2x 35, 3, [[REG1]]
+; CHECK: blr
+}
+
+define <4 x float> @test_l_qv4float(<4 x float>* %p) #1 {
+entry:
+  %r = load <4 x float>, <4 x float>* %p, align 4
+  ret <4 x float> %r
+
+; CHECK-LABEL: @test_l_qv4float
+; CHECK-DAG: li [[REG1:[0-9]+]], 15
+; CHECK-DAG: qvlpclsx 0, 0, 3
+; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 0, 3
+; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
+; CHECK: blr
+}
+
+define <8 x float> @test_l_qv8float(<8 x float>* %p) #1 {
+entry:
+  %r = load <8 x float>, <8 x float>* %p, align 4
+  ret <8 x float> %r
+
+; CHECK-LABEL: @test_l_qv8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: li [[REG2:[0-9]+]], 16
+; CHECK-DAG: qvlfsx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: qvlpclsx [[REG5:[0-5]+]], 0, 3
+; CHECK-DAG: qvlfsx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
+; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
+; CHECK: blr
+}
+
+define <4 x double> @test_l_qv4double(<4 x double>* %p) #1 {
+entry:
+  %r = load <4 x double>, <4 x double>* %p, align 8
+  ret <4 x double> %r
+
+; CHECK-LABEL: @test_l_qv4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 31
+; CHECK-DAG: qvlpcldx 0, 0, 3
+; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 0, 3
+; CHECK: qvfperm 1, [[REG3]], [[REG2]], 0
+; CHECK: blr
+}
+
+define <8 x double> @test_l_qv8double(<8 x double>* %p) #1 {
+entry:
+  %r = load <8 x double>, <8 x double>* %p, align 8
+  ret <8 x double> %r
+
+; CHECK-LABEL: @test_l_qv8double
+; CHECK-DAG: li [[REG1:[0-9]+]], 63
+; CHECK-DAG: li [[REG2:[0-9]+]], 32
+; CHECK-DAG: qvlfdx [[REG3:[0-9]+]], 3, [[REG1]]
+; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 3, [[REG2]]
+; CHECK-DAG: qvlpcldx [[REG5:[0-5]+]], 0, 3
+; CHECK-DAG: qvlfdx [[REG6:[0-9]+]], 0, 3
+; CHECK-DAG: qvfperm 2, [[REG4]], [[REG3]], [[REG5]]
+; CHECK-DAG: qvfperm 1, [[REG6]], [[REG4]], [[REG5]]
+; CHECK: blr
+}
+
+define void @test_s_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
+entry:
+  store <16 x i8> %v, <16 x i8>* %p, align 1
+  ret void
+
+; CHECK-LABEL: @test_s_v16i8
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v32i8(<32 x i8>* %p, <32 x i8> %v) #0 {
+entry:
+  store <32 x i8> %v, <32 x i8>* %p, align 1
+  ret void
+
+; CHECK-LABEL: @test_s_v32i8
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
+entry:
+  store <8 x i16> %v, <8 x i16>* %p, align 2
+  ret void
+
+; CHECK-LABEL: @test_s_v8i16
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v16i16(<16 x i16>* %p, <16 x i16> %v) #0 {
+entry:
+  store <16 x i16> %v, <16 x i16>* %p, align 2
+  ret void
+
+; CHECK-LABEL: @test_s_v16i16
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
+entry:
+  store <4 x i32> %v, <4 x i32>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_v4i32
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8i32(<8 x i32>* %p, <8 x i32> %v) #0 {
+entry:
+  store <8 x i32> %v, <8 x i32>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_v8i32
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v2i64(<2 x i64>* %p, <2 x i64> %v) #0 {
+entry:
+  store <2 x i64> %v, <2 x i64>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_v2i64
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
+entry:
+  store <4 x i64> %v, <4 x i64>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_v4i64
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
+; CHECK-DAG: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4float(<4 x float>* %p, <4 x float> %v) #0 {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_v4float
+; CHECK: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v8float(<8 x float>* %p, <8 x float> %v) #0 {
+entry:
+  store <8 x float> %v, <8 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_v8float
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvw4x 35, 3, [[REG1]]
+; CHECK-DAG: stxvw4x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v2double(<2 x double>* %p, <2 x double> %v) #0 {
+entry:
+  store <2 x double> %v, <2 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_v2double
+; CHECK: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_v4double(<4 x double>* %p, <4 x double> %v) #0 {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_v4double
+; CHECK-DAG: li [[REG1:[0-9]+]], 16
+; CHECK-DAG: stxvd2x 35, 3, [[REG1]]
+; CHECK-DAG: stxvd2x 34, 0, 3
+; CHECK: blr
+}
+
+define void @test_s_qv4float(<4 x float>* %p, <4 x float> %v) #1 {
+entry:
+  store <4 x float> %v, <4 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_qv4float
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
+; CHECK-DAG: stfs 1, 0(3)
+; CHECK-DAG: stfs [[REG1]], 12(3)
+; CHECK-DAG: stfs [[REG2]], 8(3)
+; CHECK-DAG: stfs [[REG3]], 4(3)
+; CHECK: blr
+}
+
+define void @test_s_qv8float(<8 x float>* %p, <8 x float> %v) #1 {
+entry:
+  store <8 x float> %v, <8 x float>* %p, align 4
+  ret void
+
+; CHECK-LABEL: @test_s_qv8float
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
+; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
+; CHECK-DAG: stfs 2, 16(3)
+; CHECK-DAG: stfs 1, 0(3)
+; CHECK-DAG: stfs [[REG1]], 28(3)
+; CHECK-DAG: stfs [[REG2]], 24(3)
+; CHECK-DAG: stfs [[REG3]], 20(3)
+; CHECK-DAG: stfs [[REG4]], 12(3)
+; CHECK-DAG: stfs [[REG5]], 8(3)
+; CHECK-DAG: stfs [[REG6]], 4(3)
+; CHECK: blr
+}
+
+define void @test_s_qv4double(<4 x double>* %p, <4 x double> %v) #1 {
+entry:
+  store <4 x double> %v, <4 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_qv4double
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 1, 1
+; CHECK-DAG: stfd 1, 0(3)
+; CHECK-DAG: stfd [[REG1]], 24(3)
+; CHECK-DAG: stfd [[REG2]], 16(3)
+; CHECK-DAG: stfd [[REG3]], 8(3)
+; CHECK: blr
+}
+
+define void @test_s_qv8double(<8 x double>* %p, <8 x double> %v) #1 {
+entry:
+  store <8 x double> %v, <8 x double>* %p, align 8
+  ret void
+
+; CHECK-LABEL: @test_s_qv8double
+; CHECK-DAG: qvesplati [[REG1:[0-9]+]], 2, 3
+; CHECK-DAG: qvesplati [[REG2:[0-9]+]], 2, 2
+; CHECK-DAG: qvesplati [[REG3:[0-9]+]], 2, 1
+; CHECK-DAG: qvesplati [[REG4:[0-9]+]], 1, 3
+; CHECK-DAG: qvesplati [[REG5:[0-9]+]], 1, 2
+; CHECK-DAG: qvesplati [[REG6:[0-9]+]], 1, 1
+; CHECK-DAG: stfd 2, 32(3)
+; CHECK-DAG: stfd 1, 0(3)
+; CHECK-DAG: stfd [[REG1]], 56(3)
+; CHECK-DAG: stfd [[REG2]], 48(3)
+; CHECK-DAG: stfd [[REG3]], 40(3)
+; CHECK-DAG: stfd [[REG4]], 24(3)
+; CHECK-DAG: stfd [[REG5]], 16(3)
+; CHECK-DAG: stfd [[REG6]], 8(3)
+; CHECK: blr
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+attributes #1 = { nounwind "target-cpu"="a2q" }
+attributes #2 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/CodeGen/PowerPC/unal-vec-negarith.ll b/test/CodeGen/PowerPC/unal-vec-negarith.ll
new file mode 100644
index 000000000000..faac891f5c6f
--- /dev/null
+++ b/test/CodeGen/PowerPC/unal-vec-negarith.ll
@@ -0,0 +1,17 @@
+; RUN: llc -debug-only=isel <%s >%t 2>&1 && FileCheck <%t %s
+; REQUIRES: asserts
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <16 x i8> @test_l_v16i8(<16 x i8>* %p) #0 {
+entry:
+  %r = load <16 x i8>, <16 x i8>* %p, align 1
+  ret <16 x i8> %r
+
+; CHECK-NOT: v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<LD31[%p+4294967281](align=1)>
+; CHECK:     v4i32,ch = llvm.ppc.altivec.lvx{{.*}}<LD31[%p+-15](align=1)>
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr7" }
+
diff --git a/test/CodeGen/PowerPC/unwind-dw2-g.ll b/test/CodeGen/PowerPC/unwind-dw2-g.ll
index 8bd158867c79..e44da85f5b36 100644
--- a/test/CodeGen/PowerPC/unwind-dw2-g.ll
+++ b/test/CodeGen/PowerPC/unwind-dw2-g.ll
@@ -3,7 +3,7 @@ target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "powerpc64-unknown-linux-gnu"
 
 ; Function Attrs: nounwind
-define void @foo() #0 {
+define void @foo() #0 !dbg !4 {
 entry:
   call void @llvm.eh.unwind.init(), !dbg !9
   ret void, !dbg !10
@@ -21,11 +21,11 @@ attributes #0 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "/tmp/unwind-dw2.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
new file mode 100644
index 000000000000..3d4789360f55
--- /dev/null
+++ b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll
@@ -0,0 +1,114 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
+; RUN:  --check-prefix=CHECK-BE
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
+; RUN:  --check-prefix=CHECK-P7
+
+; Function Attrs: norecurse nounwind readnone
+define signext i32 @geti(<4 x i32> %a, i32 signext %b) {
+entry:
+  %vecext = extractelement <4 x i32> %a, i32 %b
+  ret i32 %vecext
+; CHECK-LABEL: @geti
+; CHECK-P7-LABEL: @geti
+; CHECK-BE-LABEL: @geti
+; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2
+; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2
+; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
+; CHECK-DAG: li [[ONEREG:[0-9]+]], 1
+; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5
+; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5
+; CHECK: mfvsrd [[TOGPR:[0-9]+]],
+; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
+; CHECK: extsw 3, [[RSHREG]]
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
+; CHECK-P7-DAG: stxvw4x 34,
+; CHECK-P7: lwax 3, [[ELEMOFFREG]],
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1
+; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5
+; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5
+; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]],
+; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]]
+; CHECk-BE: extsw 3, [[RSHREG]]
+}
+
+; Function Attrs: norecurse nounwind readnone
+define i64 @getl(<2 x i64> %a, i32 signext %b) {
+entry:
+  %vecext = extractelement <2 x i64> %a, i32 %b
+  ret i64 %vecext
+; CHECK-LABEL: @getl
+; CHECK-P7-LABEL: @getl
+; CHECK-BE-LABEL: @getl
+; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1
+; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
+; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]]
+; CHECK: mfvsrd 3,
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3
+; CHECK-P7-DAG: stxvd2x 34,
+; CHECK-P7: ldx 3, [[ELEMOFFREG]],
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE: mfvsrd 3,
+}
+
+; Function Attrs: norecurse nounwind readnone
+define float @getf(<4 x float> %a, i32 signext %b) {
+entry:
+  %vecext = extractelement <4 x float> %a, i32 %b
+  ret float %vecext
+; CHECK-LABEL: @getf
+; CHECK-P7-LABEL: @getf
+; CHECK-BE-LABEL: @getf
+; CHECK: li [[IMMREG:[0-9]+]], 3
+; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5
+; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]]
+; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK: xscvspdpn 1,
+; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2
+; CHECK-P7-DAG: stxvw4x 34,
+; CHECK-P7: lfsx 1, [[ELEMOFFREG]],
+; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2
+; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]]
+; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; CHECK-BE: xscvspdpn 1,
+}
+
+; Function Attrs: norecurse nounwind readnone
+define double @getd(<2 x double> %a, i32 signext %b) {
+entry:
+  %vecext = extractelement <2 x double> %a, i32 %b
+  ret double %vecext
+; CHECK-LABEL: @getd
+; CHECK-P7-LABEL: @getd
+; CHECK-BE-LABEL: @getd
+; CHECK: li [[TRUNCREG:[0-9]+]], 1
+; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5
+; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3
+; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]]
+; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK: xxlor 1,
+; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK-P7: xxlor 1,
+; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1
+; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3
+; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]]
+; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]]
+; FIXME: the instruction below is a redundant regclass copy, to be removed
+; CHECK-BE: xxlor 1,
+}
diff --git a/test/CodeGen/PowerPC/vec-asm-disabled.ll b/test/CodeGen/PowerPC/vec-asm-disabled.ll
new file mode 100644
index 000000000000..333ccce6b89f
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec-asm-disabled.ll
@@ -0,0 +1,14 @@
+; RUN: not llc -mcpu=pwr7 -o /dev/null %s 2>&1 | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define <4 x i32> @testi1(<4 x i32> %b1, <4 x i32> %b2) #0 {
+entry:
+  %0 = call <4 x i32> asm "xxland $0, $1, $2", "=^wd,^wd,^wd"(<4 x i32> %b1, <4 x i32> %b2) #0
+  ret <4 x i32> %0
+
+; CHECK: error: couldn't allocate output register for constraint 'wd'
+}
+
+attributes #0 = { nounwind "target-features"="-vsx" }
+
diff --git a/test/CodeGen/PowerPC/vec_add_sub_quadword.ll b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
index f7ebf479755c..9e79b52c4049 100644
--- a/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
+++ b/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
@@ -14,7 +14,7 @@ define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind {
        %result = add <1 x i128> %x, <i128 1>
        ret <1 x i128> %result
 ; CHECK-LABEL: @increment_by_one
-; CHECK vadduqm 2, 2, 3
+; CHECK: vadduqm 2, 2, 3
 }
 
 define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind {
@@ -37,7 +37,7 @@ define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind {
        %result = sub <1 x i128> %x, <i128 1>
        ret <1 x i128> %result
 ; CHECK-LABEL: @decrement_by_one
-; CHECK vsubuqm 2, 2, 3
+; CHECK: vsubuqm 2, 2, 3
 }
 
 define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
@@ -46,7 +46,7 @@ define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
        %result = sub <1 x i128> %x, %tmpvec2
        ret <1 x i128> %result
 ; CHECK-LABEL: @decrement_by_val
-; CHECK vsubuqm   2, 2, 3
+; CHECK: vsubuqm   2, 2, 3
 }
 
 declare <1 x i128> @llvm.ppc.altivec.vaddeuqm(<1 x i128> %x, 
diff --git a/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll b/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
new file mode 100644
index 000000000000..db92f20c352a
--- /dev/null
+++ b/test/CodeGen/PowerPC/vector-merge-store-fp-constants.ll
@@ -0,0 +1,28 @@
+; RUN: llc -march=ppc64 -mtriple=ppc64-apple-darwin < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}_merge_8_float_zero_stores:
+; CHECK: li [[ZEROREG:r[0-9]+]], 0
+; CHECK-DAG: std [[ZEROREG]], 0([[PTR:r[0-9]+]])
+; CHECK-DAG: std [[ZEROREG]], 8([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 16([[PTR]])
+; CHECK-DAG: std [[ZEROREG]], 24([[PTR]])
+; CHECK: blr
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/vsx.ll b/test/CodeGen/PowerPC/vsx.ll
index dceb2516c696..b2eefb666760 100644
--- a/test/CodeGen/PowerPC/vsx.ll
+++ b/test/CodeGen/PowerPC/vsx.ll
@@ -1226,11 +1226,10 @@ define <2 x i32> @test80(i32 %v) {
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test80
-; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16
+; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
-; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]]
 ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxswapd 34, [[V1]]
+; CHECK-LE-DAG: xxspltd 34, [[R1]]
 ; CHECK-LE-DAG: xxswapd 35, [[V2]]
 ; CHECK-LE: vaddudm 2, 2, 3
 ; CHECK-LE: blr
diff --git a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index 6c89b1092bdf..97e1548f965f 100644
--- a/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -33,12 +33,8 @@ define double @teste0(<2 x double>* %p1) {
   %r = extractelement <2 x double> %v, i32 0
   ret double %r
 
-; FIXME: Swap optimization will collapse this into lxvd2x 1, 0, 3.
-
 ; CHECK-LABEL: teste0
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 1, 0
+; CHECK: lxvd2x 1, 0, 3
 }
 
 define double @teste1(<2 x double>* %p1) {
diff --git a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
index 102970885963..c2cb71c58881 100644
--- a/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
+++ b/test/CodeGen/PowerPC/vsx_scalar_ld_st.ll
@@ -55,8 +55,7 @@ entry:
   ret void
 ; CHECK-LABEL: @intToFlt
 ; CHECK: lxsiwax [[REGLD2:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfids {{[0-9]}}, [[REGLD2]]
+; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]]
 }
 
 ; Function Attrs: nounwind
@@ -108,8 +107,7 @@ entry:
   ret void
 ; CHECK-LABEL: @uIntToFlt
 ; CHECK: lxsiwzx [[REGLD4:[0-9]+]],
-; FIXME: the below will change when the VSX form is implemented
-; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]]
+; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]]
 }
 
 ; Function Attrs: nounwind
diff --git a/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
index dcfa0e788867..4f767c7ca78f 100644
--- a/test/CodeGen/PowerPC/vsx_shuffle_le.ll
+++ b/test/CodeGen/PowerPC/vsx_shuffle_le.ll
@@ -8,8 +8,7 @@ define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: test00
 ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
 }
 
 define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) {
@@ -58,9 +57,7 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) {
   ret <2 x double> %v3
 
 ; CHECK-LABEL: @test10
-; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 3
 }
 
 define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
@@ -71,8 +68,7 @@ define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test11
 ; CHECK: lxvd2x 0, 0, 3
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
 }
 
 define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) {
@@ -139,8 +135,7 @@ define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test22
 ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 1
+; CHECK: xxspltd 34, 0, 0
 }
 
 define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) {
@@ -189,9 +184,7 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) {
   ret <2 x double> %v3
 
 ; CHECK-LABEL: @test32
-; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxswapd 34, 0
+; CHECK: lxvd2x 34, 0, 4
 }
 
 define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
@@ -202,6 +195,5 @@ define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {
 
 ; CHECK-LABEL: @test33
 ; CHECK: lxvd2x 0, 0, 4
-; CHECK: xxswapd 0, 0
-; CHECK: xxspltd 34, 0, 0
+; CHECK: xxspltd 34, 0, 1
 }
diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll
index ae9764e82084..678544ebf2cb 100644
--- a/test/CodeGen/SPARC/2011-01-22-SRet.ll
+++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -19,8 +19,8 @@ entry:
 define i32 @test() nounwind {
 entry:
 ;CHECK-LABEL: test:
-;CHECK: st {{.+}}, [%sp+64]
 ;CHECK: call make_foo
+;CHECK: st {{.+}}, [%sp+64]
 ;CHECK: unimp 12
   %f = alloca %struct.foo_t, align 8
   call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
diff --git a/test/CodeGen/SPARC/32abi.ll b/test/CodeGen/SPARC/32abi.ll
new file mode 100644
index 000000000000..7ac1de5c0904
--- /dev/null
+++ b/test/CodeGen/SPARC/32abi.ll
@@ -0,0 +1,191 @@
+; RUN: llc < %s -march=sparc -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
+; RUN: llc < %s -march=sparcel -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
+
+; CHECK-LABEL: intarg:
+; The save/restore frame is not strictly necessary here, but we would need to
+; refer to %o registers instead.
+; CHECK: save %sp, -96, %sp
+; CHECK: ld [%fp+96], [[R2:%[gilo][0-7]]]
+; CHECK: ld [%fp+92], [[R1:%[gilo][0-7]]]
+; CHECK: stb %i0, [%i4]
+; CHECK: stb %i1, [%i4]
+; CHECK: sth %i2, [%i4]
+; CHECK: st  %i3, [%i4]
+; CHECK: st  %i4, [%i4]
+; CHECK: st  %i5, [%i4]
+; CHECK: st  [[R1]], [%i4]
+; CHECK: st  [[R2]], [%i4]
+; CHECK: restore
+define void @intarg(i8  %a0,   ; %i0
+                    i8  %a1,   ; %i1
+                    i16 %a2,   ; %i2
+                    i32 %a3,   ; %i3
+                    i8* %a4,   ; %i4
+                    i32 %a5,   ; %i5
+                    i32 signext %a6,   ; [%fp+92]
+                    i8* %a7) { ; [%fp+96]
+  store i8 %a0, i8* %a4
+  store i8 %a1, i8* %a4
+  %p16 = bitcast i8* %a4 to i16*
+  store i16 %a2, i16* %p16
+  %p32 = bitcast i8* %a4 to i32*
+  store i32 %a3, i32* %p32
+  %pp = bitcast i8* %a4 to i8**
+  store i8* %a4, i8** %pp
+  store i32 %a5, i32* %p32
+  store i32 %a6, i32* %p32
+  store i8* %a7, i8** %pp
+  ret void
+}
+
+; CHECK-LABEL: call_intarg:
+; CHECK: save %sp, -104, %sp
+; Use %o0-%o5 for outgoing arguments
+; CHECK: mov 5, %o5
+; CHECK: st %i0, [%sp+92]
+; CHECK: call intarg
+; CHECK-NOT: add %sp
+; CHECK: restore
+define void @call_intarg(i32 %i0, i8* %i1) {
+  call void @intarg(i8 0, i8 1, i16 2, i32 3, i8* undef, i32 5, i32 %i0, i8* %i1)
+  ret void
+}
+
+;; Verify doubles starting with an even reg, starting with an odd reg,
+;; straddling the boundary of regs and mem, and floats in regs and mem.
+;
+; CHECK-LABEL: floatarg:
+; CHECK: save %sp, -120, %sp
+; CHECK: mov %i5, %g2
+; CHECK-NEXT: ld [%fp+92], %g3
+; CHECK-NEXT: mov %i4, %i5
+; CHECK-NEXT: std %g2, [%fp+-24]
+; CHECK-NEXT: mov %i3, %i4
+; CHECK-NEXT: std %i4, [%fp+-16]
+; CHECK-NEXT: std %i0, [%fp+-8]
+; CHECK-NEXT: st %i2, [%fp+-28]
+; CHECK-NEXT: ld [%fp+104], %f0
+; CHECK-NEXT: ldd [%fp+96], %f2
+; CHECK-NEXT: ld [%fp+-28], %f1
+; CHECK-NEXT: ldd [%fp+-8], %f4
+; CHECK-NEXT: ldd [%fp+-16], %f6
+; CHECK-NEXT: ldd [%fp+-24], %f8
+; CHECK-NEXT: fstod %f1, %f10
+; CHECK-NEXT: faddd %f4, %f10, %f4
+; CHECK-NEXT: faddd %f6, %f4, %f4
+; CHECK-NEXT: faddd %f8, %f4, %f4
+; CHECK-NEXT: faddd %f2, %f4, %f2
+; CHECK-NEXT: fstod %f0, %f0
+; CHECK-NEXT: faddd %f0, %f2, %f0
+; CHECK-NEXT: restore
+define double @floatarg(double %a0,   ; %i0,%i1
+                        float %a1,    ; %i2
+                        double %a2,   ; %i3, %i4
+                        double %a3,   ; %i5, [%fp+92] (using 4 bytes)
+                        double %a4,   ; [%fp+96] (using 8 bytes)
+                        float %a5) {  ; [%fp+104] (using 4 bytes)
+  %d1 = fpext float %a1 to double
+  %s1 = fadd double %a0, %d1
+  %s2 = fadd double %a2, %s1
+  %s3 = fadd double %a3, %s2
+  %s4 = fadd double %a4, %s3
+  %d5 = fpext float %a5 to double
+  %s5 = fadd double %d5, %s4
+  ret double %s5
+}
+
+; CHECK-LABEL: call_floatarg:
+; CHECK: save %sp, -112, %sp
+; CHECK: mov %i2, %o1
+; CHECK-NEXT: mov %i1, %o0
+; CHECK-NEXT: st %i0, [%sp+104]
+; CHECK-NEXT: std %o0, [%sp+96]
+; CHECK-NEXT: st %o1, [%sp+92]
+; CHECK-NEXT: mov %i0, %o2
+; CHECK-NEXT: mov %o0, %o3
+; CHECK-NEXT: mov %o1, %o4
+; CHECK-NEXT: mov %o0, %o5
+; CHECK-NEXT: call floatarg
+; CHECK: std %f0, [%i4]
+; CHECK: restore
+define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
+  %r = call double @floatarg(double %d2, float %f1, double %d2, double %d2,
+                             double %d2, float %f1)
+  store double %r, double* %p
+  ret void
+}
+
+;; i64 arguments should effectively work the same as double: split
+;; into two locations.  This is different for little-endian vs big
+;; endian, since the 64-bit math needs to be split
+; CHECK-LABEL: i64arg:
+; CHECK:  save %sp, -96, %sp
+; CHECK-BE: ld [%fp+100], %g2
+; CHECK-BE-NEXT: ld [%fp+96], %g3
+; CHECK-BE-NEXT: ld [%fp+92], %g4
+; CHECK-BE-NEXT: addcc %i1, %i2, %i1
+; CHECK-BE-NEXT: addxcc %i0, 0, %i0
+; CHECK-BE-NEXT: addcc %i4, %i1, %i1
+; CHECK-BE-NEXT: addxcc %i3, %i0, %i0
+; CHECK-BE-NEXT: addcc %g4, %i1, %i1
+; CHECK-BE-NEXT: ld [%fp+104], %i2
+; CHECK-BE-NEXT: addxcc %i5, %i0, %i0
+; CHECK-BE-NEXT: addcc %g2, %i1, %i1
+; CHECK-BE-NEXT: addxcc %g3, %i0, %i0
+; CHECK-BE-NEXT: addcc %i2, %i1, %i1
+; CHECK-BE-NEXT: addxcc %i0, 0, %i0
+;
+; CHECK-LE: ld [%fp+96], %g2
+; CHECK-LE-NEXT: ld [%fp+100], %g3
+; CHECK-LE-NEXT: ld [%fp+92], %g4
+; CHECK-LE-NEXT: addcc %i0, %i2, %i0
+; CHECK-LE-NEXT: addxcc %i1, 0, %i1
+; CHECK-LE-NEXT: addcc %i3, %i0, %i0
+; CHECK-LE-NEXT: addxcc %i4, %i1, %i1
+; CHECK-LE-NEXT: addcc %i5, %i0, %i0
+; CHECK-LE-NEXT: ld [%fp+104], %i2
+; CHECK-LE-NEXT: addxcc %g4, %i1, %i1
+; CHECK-LE-NEXT: addcc %g2, %i0, %i0
+; CHECK-LE-NEXT: addxcc %g3, %i1, %i1
+; CHECK-LE-NEXT: addcc %i2, %i0, %i0
+; CHECK-LE-NEXT: addxcc %i1, 0, %i1
+; CHECK-NEXT: restore
+
+
+define i64 @i64arg(i64 %a0,    ; %i0,%i1
+		   i32 %a1,    ; %i2
+		   i64 %a2,    ; %i3, %i4
+		   i64 %a3,    ; %i5, [%fp+92] (using 4 bytes)
+		   i64 %a4,    ; [%fp+96] (using 8 bytes)
+                   i32 %a5) {  ; [%fp+104] (using 4 bytes)
+  %a1L = zext i32 %a1 to i64
+  %s1 = add i64 %a0, %a1L
+  %s2 = add i64 %a2, %s1
+  %s3 = add i64 %a3, %s2
+  %s4 = add i64 %a4, %s3
+  %a5L = zext i32 %a5 to i64
+  %s5 = add i64 %a5L, %s4
+  ret i64 %s5
+}
+
+; CHECK-LABEL: call_i64arg:
+; CHECK: save %sp, -112, %sp
+; CHECK: st %i0, [%sp+104]
+; CHECK-NEXT: st %i2, [%sp+100]
+; CHECK-NEXT: st %i1, [%sp+96]
+; CHECK-NEXT: st %i2, [%sp+92]
+; CHECK-NEXT: mov      %i1, %o0
+; CHECK-NEXT: mov      %i2, %o1
+; CHECK-NEXT: mov      %i0, %o2
+; CHECK-NEXT: mov      %i1, %o3
+; CHECK-NEXT: mov      %i2, %o4
+; CHECK-NEXT: mov      %i1, %o5
+; CHECK-NEXT: call i64arg
+; CHECK: std %o0, [%i3]
+; CHECK-NEXT: restore
+
+define void @call_i64arg(i32 %a0, i64 %a1, i64* %p) {
+  %r = call i64 @i64arg(i64 %a1, i32 %a0, i64 %a1, i64 %a1, i64 %a1, i32 %a0)
+  store i64 %r, i64* %p
+  ret void
+}
diff --git a/test/CodeGen/SPARC/64abi.ll b/test/CodeGen/SPARC/64abi.ll
index 7c08998a1427..96104ecc3c68 100644
--- a/test/CodeGen/SPARC/64abi.ll
+++ b/test/CodeGen/SPARC/64abi.ll
@@ -1,19 +1,19 @@
 ; RUN: llc < %s -march=sparcv9 -disable-sparc-delay-filler -disable-sparc-leaf-proc | FileCheck %s
 
-; CHECK: intarg
+; CHECK-LABEL: intarg:
 ; The save/restore frame is not strictly necessary here, but we would need to
 ; refer to %o registers instead.
 ; CHECK: save %sp, -128, %sp
+; CHECK: ldx [%fp+2231], [[R2:%[gilo][0-7]]]
+; CHECK: ld [%fp+2227], [[R1:%[gilo][0-7]]]
 ; CHECK: stb %i0, [%i4]
 ; CHECK: stb %i1, [%i4]
 ; CHECK: sth %i2, [%i4]
 ; CHECK: st  %i3, [%i4]
 ; CHECK: stx %i4, [%i4]
 ; CHECK: st  %i5, [%i4]
-; CHECK: ld [%fp+2227], [[R:%[gilo][0-7]]]
-; CHECK: st  [[R]], [%i4]
-; CHECK: ldx [%fp+2231], [[R:%[gilo][0-7]]]
-; CHECK: stx [[R]], [%i4]
+; CHECK: st  [[R1]], [%i4]
+; CHECK: stx [[R2]], [%i4]
 ; CHECK: restore
 define void @intarg(i8  %a0,   ; %i0
                     i8  %a1,   ; %i1
@@ -37,14 +37,14 @@ define void @intarg(i8  %a0,   ; %i0
   ret void
 }
 
-; CHECK: call_intarg
+; CHECK-LABEL: call_intarg:
 ; 16 saved + 8 args.
 ; CHECK: save %sp, -192, %sp
 ; Sign-extend and store the full 64 bits.
 ; CHECK: sra %i0, 0, [[R:%[gilo][0-7]]]
-; CHECK: stx [[R]], [%sp+2223]
 ; Use %o0-%o5 for outgoing arguments
 ; CHECK: mov 5, %o5
+; CHECK: stx [[R]], [%sp+2223]
 ; CHECK: call intarg
 ; CHECK-NOT: add %sp
 ; CHECK: restore
@@ -53,13 +53,13 @@ define void @call_intarg(i32 %i0, i8* %i1) {
   ret void
 }
 
-; CHECK: floatarg
+; CHECK-LABEL: floatarg:
 ; CHECK: save %sp, -128, %sp
+; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
 ; CHECK: fstod %f1,
 ; CHECK: faddd %f2,
 ; CHECK: faddd %f4,
 ; CHECK: faddd %f6,
-; CHECK: ld [%fp+2307], [[F:%f[0-9]+]]
 ; CHECK: fadds %f31, [[F]]
 define double @floatarg(float %a0,    ; %f1
                         double %a1,   ; %d2
@@ -89,12 +89,12 @@ define double @floatarg(float %a0,    ; %f1
   ret double %s17
 }
 
-; CHECK: call_floatarg
+; CHECK-LABEL: call_floatarg:
 ; CHECK: save %sp, -272, %sp
-; Store 4 bytes, right-aligned in slot.
-; CHECK: st %f1, [%sp+2307]
 ; Store 8 bytes in full slot.
 ; CHECK: std %f2, [%sp+2311]
+; Store 4 bytes, right-aligned in slot.
+; CHECK: st %f1, [%sp+2307]
 ; CHECK: fmovd %f2, %f4
 ; CHECK: call floatarg
 ; CHECK-NOT: add %sp
@@ -109,12 +109,12 @@ define void @call_floatarg(float %f1, double %d2, float %f5, double *%p) {
   ret void
 }
 
-; CHECK: mixedarg
+; CHECK-LABEL: mixedarg:
+; CHECK: ldx [%fp+2247]
+; CHECK: ldx [%fp+2231]
 ; CHECK: fstod %f3
 ; CHECK: faddd %f6
 ; CHECK: faddd %f16
-; CHECK: ldx [%fp+2231]
-; CHECK: ldx [%fp+2247]
 define void @mixedarg(i8 %a0,      ; %i0
                       float %a1,   ; %f3
                       i16 %a2,     ; %i2
@@ -133,7 +133,7 @@ define void @mixedarg(i8 %a0,      ; %i0
   ret void
 }
 
-; CHECK: call_mixedarg
+; CHECK-LABEL: call_mixedarg:
 ; CHECK: stx %i2, [%sp+2247]
 ; CHECK: stx %i0, [%sp+2223]
 ; CHECK: fmovd %f2, %f6
@@ -157,7 +157,7 @@ define void @call_mixedarg(i64 %i0, double %f2, i16* %i2) {
 
 ; The inreg attribute is used to indicate 32-bit sized struct elements that
 ; share an 8-byte slot.
-; CHECK: inreg_fi
+; CHECK-LABEL: inreg_fi:
 ; CHECK: fstoi %f1
 ; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
 ; CHECK: sub [[R]],
@@ -168,7 +168,7 @@ define i32 @inreg_fi(i32 inreg %a0,     ; high bits of %i0
   ret i32 %rv
 }
 
-; CHECK: call_inreg_fi
+; CHECK-LABEL: call_inreg_fi:
 ; Allocate space for 6 arguments, even when only 2 are used.
 ; CHECK: save %sp, -176, %sp
 ; CHECK: sllx %i1, 32, %o0
@@ -179,7 +179,7 @@ define void @call_inreg_fi(i32* %p, i32 %i1, float %f5) {
   ret void
 }
 
-; CHECK: inreg_ff
+; CHECK-LABEL: inreg_ff:
 ; CHECK: fsubs %f0, %f1, %f0
 define float @inreg_ff(float inreg %a0,   ; %f0
                        float inreg %a1) { ; %f1
@@ -187,7 +187,7 @@ define float @inreg_ff(float inreg %a0,   ; %f0
   ret float %rv
 }
 
-; CHECK: call_inreg_ff
+; CHECK-LABEL: call_inreg_ff:
 ; CHECK: fmovs %f3, %f0
 ; CHECK: fmovs %f5, %f1
 ; CHECK: call inreg_ff
@@ -196,7 +196,7 @@ define void @call_inreg_ff(i32* %p, float %f3, float %f5) {
   ret void
 }
 
-; CHECK: inreg_if
+; CHECK-LABEL: inreg_if:
 ; CHECK: fstoi %f0
 ; CHECK: sub %i0
 define i32 @inreg_if(float inreg %a0, ; %f0
@@ -206,7 +206,7 @@ define i32 @inreg_if(float inreg %a0, ; %f0
   ret i32 %rv
 }
 
-; CHECK: call_inreg_if
+; CHECK-LABEL: call_inreg_if:
 ; CHECK: fmovs %f3, %f0
 ; CHECK: mov %i2, %o0
 ; CHECK: call inreg_if
@@ -216,7 +216,7 @@ define void @call_inreg_if(i32* %p, float %f3, i32 %i2) {
 }
 
 ; The frontend shouldn't do this. Just pass i64 instead.
-; CHECK: inreg_ii
+; CHECK-LABEL: inreg_ii:
 ; CHECK: srlx %i0, 32, [[R:%[gilo][0-7]]]
 ; CHECK: sub %i0, [[R]], %i0
 define i32 @inreg_ii(i32 inreg %a0,   ; high bits of %i0
@@ -225,7 +225,7 @@ define i32 @inreg_ii(i32 inreg %a0,   ; high bits of %i0
   ret i32 %rv
 }
 
-; CHECK: call_inreg_ii
+; CHECK-LABEL: call_inreg_ii:
 ; CHECK: srl %i2, 0, [[R2:%[gilo][0-7]]]
 ; CHECK: sllx %i1, 32, [[R1:%[gilo][0-7]]]
 ; CHECK: or [[R1]], [[R2]], %o0
@@ -236,7 +236,7 @@ define void @call_inreg_ii(i32* %p, i32 %i1, i32 %i2) {
 }
 
 ; Structs up to 32 bytes in size can be returned in registers.
-; CHECK: ret_i64_pair
+; CHECK-LABEL: ret_i64_pair:
 ; CHECK: ldx [%i2], %i0
 ; CHECK: ldx [%i3], %i1
 define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
@@ -248,7 +248,7 @@ define { i64, i64 } @ret_i64_pair(i32 %a0, i32 %a1, i64* %p, i64* %q) {
   ret { i64, i64 } %rv2
 }
 
-; CHECK: call_ret_i64_pair
+; CHECK-LABEL: call_ret_i64_pair:
 ; CHECK: call ret_i64_pair
 ; CHECK: stx %o0, [%i0]
 ; CHECK: stx %o1, [%i0]
@@ -263,7 +263,7 @@ define void @call_ret_i64_pair(i64* %i0) {
 }
 
 ; This is not a C struct, the i32 member uses 8 bytes, but the float only 4.
-; CHECK: ret_i32_float_pair
+; CHECK-LABEL: ret_i32_float_pair:
 ; CHECK: ld [%i2], %i0
 ; CHECK: ld [%i3], %f2
 define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
@@ -276,7 +276,7 @@ define { i32, float } @ret_i32_float_pair(i32 %a0, i32 %a1,
   ret { i32, float } %rv2
 }
 
-; CHECK: call_ret_i32_float_pair
+; CHECK-LABEL: call_ret_i32_float_pair:
 ; CHECK: call ret_i32_float_pair
 ; CHECK: st %o0, [%i0]
 ; CHECK: st %f2, [%i1]
@@ -291,10 +291,10 @@ define void @call_ret_i32_float_pair(i32* %i0, float* %i1) {
 }
 
 ; This is a C struct, each member uses 4 bytes.
-; CHECK: ret_i32_float_packed
+; CHECK-LABEL: ret_i32_float_packed:
 ; CHECK: ld [%i2], [[R:%[gilo][0-7]]]
-; CHECK: sllx [[R]], 32, %i0
 ; CHECK: ld [%i3], %f1
+; CHECK: sllx [[R]], 32, %i0
 define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
                                                   i32* %p, float* %q) {
   %r1 = load i32, i32* %p
@@ -305,7 +305,7 @@ define inreg { i32, float } @ret_i32_float_packed(i32 %a0, i32 %a1,
   ret { i32, float } %rv2
 }
 
-; CHECK: call_ret_i32_float_packed
+; CHECK-LABEL: call_ret_i32_float_packed:
 ; CHECK: call ret_i32_float_packed
 ; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
 ; CHECK: st [[R]], [%i0]
@@ -322,7 +322,7 @@ define void @call_ret_i32_float_packed(i32* %i0, float* %i1) {
 
 ; The C frontend should use i64 to return { i32, i32 } structs, but verify that
 ; we don't miscompile thi case where both struct elements are placed in %i0.
-; CHECK: ret_i32_packed
+; CHECK-LABEL: ret_i32_packed:
 ; CHECK: ld [%i2], [[R1:%[gilo][0-7]]]
 ; CHECK: ld [%i3], [[R2:%[gilo][0-7]]]
 ; CHECK: sllx [[R2]], 32, [[R3:%[gilo][0-7]]]
@@ -337,7 +337,7 @@ define inreg { i32, i32 } @ret_i32_packed(i32 %a0, i32 %a1,
   ret { i32, i32 } %rv2
 }
 
-; CHECK: call_ret_i32_packed
+; CHECK-LABEL: call_ret_i32_packed:
 ; CHECK: call ret_i32_packed
 ; CHECK: srlx %o0, 32, [[R:%[gilo][0-7]]]
 ; CHECK: st [[R]], [%i0]
@@ -353,31 +353,31 @@ define void @call_ret_i32_packed(i32* %i0, i32* %i1) {
 }
 
 ; The return value must be sign-extended to 64 bits.
-; CHECK: ret_sext
+; CHECK-LABEL: ret_sext:
 ; CHECK: sra %i0, 0, %i0
 define signext i32 @ret_sext(i32 %a0) {
   ret i32 %a0
 }
 
-; CHECK: ret_zext
+; CHECK-LABEL: ret_zext:
 ; CHECK: srl %i0, 0, %i0
 define zeroext i32 @ret_zext(i32 %a0) {
   ret i32 %a0
 }
 
-; CHECK: ret_nosext
+; CHECK-LABEL: ret_nosext:
 ; CHECK-NOT: sra
 define signext i32 @ret_nosext(i32 signext %a0) {
   ret i32 %a0
 }
 
-; CHECK: ret_nozext
+; CHECK-LABEL: ret_nozext:
 ; CHECK-NOT: srl
 define signext i32 @ret_nozext(i32 signext %a0) {
   ret i32 %a0
 }
 
-; CHECK-LABEL: test_register_directive
+; CHECK-LABEL: test_register_directive:
 ; CHECK:       .register %g2, #scratch
 ; CHECK:       .register %g3, #scratch
 ; CHECK:       add %i0, 2, %g2
@@ -391,7 +391,7 @@ entry:
   ret i32 %2
 }
 
-; CHECK-LABEL: test_large_stack
+; CHECK-LABEL: test_large_stack:
 
 ; CHECK:       sethi 16, %g1
 ; CHECK:       xor %g1, -176, %g1
@@ -412,7 +412,7 @@ entry:
 
 declare i32 @use_buf(i32, i8*)
 
-; CHECK-LABEL: test_fp128_args
+; CHECK-LABEL: test_fp128_args:
 ; CHECK-DAG:   std %f0, [%fp+{{.+}}]
 ; CHECK-DAG:   std %f2, [%fp+{{.+}}]
 ; CHECK-DAG:   std %f6, [%fp+{{.+}}]
@@ -428,7 +428,7 @@ entry:
 
 declare i64 @receive_fp128(i64 %a, ...)
 
-; CHECK-LABEL: test_fp128_variable_args
+; CHECK-LABEL: test_fp128_variable_args:
 ; CHECK-DAG:   std %f4, [%sp+[[Offset0:[0-9]+]]]
 ; CHECK-DAG:   std %f6, [%sp+[[Offset1:[0-9]+]]]
 ; CHECK-DAG:   ldx [%sp+[[Offset0]]], %o2
@@ -440,7 +440,7 @@ entry:
   ret i64 %0
 }
 
-; CHECK-LABEL: test_call_libfunc
+; CHECK-LABEL: test_call_libfunc:
 ; CHECK:       st %f1, [%fp+[[Offset0:[0-9]+]]]
 ; CHECK:       fmovs %f3, %f1
 ; CHECK:       call cosf
diff --git a/test/CodeGen/SPARC/basictest.ll b/test/CodeGen/SPARC/basictest.ll
index 3792100b2e63..889f5144413f 100644
--- a/test/CodeGen/SPARC/basictest.ll
+++ b/test/CodeGen/SPARC/basictest.ll
@@ -71,12 +71,12 @@ define i64 @signed_multiply_32x32_64(i32 %a, i32 %b) {
 }
 
 ; CHECK-LABEL: unsigned_multiply_32x32_64:
-; CHECK: umul %o0, %o1, %o2
-; CHECK: rd %y, %o2
 ;FIXME: the smul in the output is totally redundant and should not there.
-; CHECK: smul %o0, %o1, %o1
+; CHECK: smul %o0, %o1, %o2
+; CHECK: umul %o0, %o1, %o0
+; CHECK: rd %y, %o0
 ; CHECK: retl
-; CHECK: mov      %o2, %o0
+; CHECK: mov      %o2, %o1
 define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
   %xa = zext i32 %a to i64
   %xb = zext i32 %b to i64
@@ -84,3 +84,16 @@ define i64 @unsigned_multiply_32x32_64(i32 %a, i32 %b) {
   ret i64 %r
 }
 
+; CHECK-LABEL: load_store_64bit:
+; CHECK: ldd [%o0], %o2
+; CHECK: addcc %o3, 3, %o5
+; CHECK: addxcc %o2, 0, %o4
+; CHECK: retl
+; CHECK: std %o4, [%o1]
+define void @load_store_64bit(i64* %x, i64* %y) {
+entry:
+  %0 = load i64, i64* %x
+  %add = add nsw i64 %0, 3
+  store i64 %add, i64* %y
+  ret void
+}
diff --git a/test/CodeGen/SPARC/float-constants.ll b/test/CodeGen/SPARC/float-constants.ll
new file mode 100644
index 000000000000..b3686ebdf440
--- /dev/null
+++ b/test/CodeGen/SPARC/float-constants.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=sparc | FileCheck %s
+
+; TODO: actually fix the codegen to be optimal. At least we don't
+; crash for now, though...
+
+;; Bitcast should not do a runtime conversion, but rather emit a
+;; constant into integer registers directly.
+
+; CHECK-LABEL: bitcast:
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+define <2 x i32> @bitcast() {
+  %1 = bitcast double 5.0 to <2 x i32>
+  ret <2 x i32> %1
+}
+
+;; Same thing for a call using a double (which gets passed in integer
+;; registers)
+
+; CHECK-LABEL: test_call
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+declare void @a(double)
+define void @test_call() {
+  call void @a(double 5.0)
+  ret void
+}
+
+;; And for a libcall emitted from the pow intrinsic.  (libcall
+;; emission happens after SelectionDAG type legalization, so is a bit
+;; different than a normal function call. This was crashing before,
+;; due to an earlier broken workaround for this issue.)
+
+; CHECK-LABEL: test_intrins_call
+; TODO-CHECK: sethi 1049856, %o0
+; TODO-CHECK: sethi 0, %o1
+declare double @llvm.pow.f64(double, double)
+define double @test_intrins_call() {
+  %1 = call double @llvm.pow.f64(double 2.0, double 2.0)
+  ret double %1
+}
diff --git a/test/CodeGen/SPARC/float.ll b/test/CodeGen/SPARC/float.ll
index d7a79cb05a82..c4cc04420ad7 100644
--- a/test/CodeGen/SPARC/float.ll
+++ b/test/CodeGen/SPARC/float.ll
@@ -53,20 +53,18 @@ declare double @get_double()
 declare double @llvm.fabs.f64(double) nounwind readonly
 
 ; V8-LABEL:    test_v9_floatreg:
-; V8:          fsubd {{.+}}, {{.+}}, {{.+}}
-; V8:          faddd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
+; V8:          fsubd {{.+}}, {{.+}}, [[R:%f(((1|2)?(0|2|4|6|8))|30)]]
 ; V8:          std [[R]], [%{{.+}}]
 ; V8:          ldd [%{{.+}}], %f0
+; V8:          faddd {{.+}}, {{.+}}, {{.+}}
 
 ; V9-LABEL:    test_v9_floatreg:
 ; V9:          fsubd {{.+}}, {{.+}}, {{.+}}
-; V9:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
-; V9:          fmovd [[R]], %f0
+; V9:          faddd {{.+}}, {{.+}}, %f0
 
 ; SPARC64-LABEL:    test_v9_floatreg:
 ; SPARC64:          fsubd {{.+}}, {{.+}}, {{.+}}
-; SPARC64:          faddd {{.+}}, {{.+}}, [[R:%f((3(2|4|6|8))|((4|5)(0|2|4|6|8))|(60|62))]]
-; SPARC64:          fmovd [[R]], %f0
+; SPARC64:          faddd {{.+}}, {{.+}}, %f0
 
 define double @test_v9_floatreg() {
 entry:
diff --git a/test/CodeGen/SPARC/fp128.ll b/test/CodeGen/SPARC/fp128.ll
index c864cb7d599b..e0eaf93a733e 100644
--- a/test/CodeGen/SPARC/fp128.ll
+++ b/test/CodeGen/SPARC/fp128.ll
@@ -45,14 +45,14 @@ entry:
 ; HARD:       std %f{{.+}}, [%[[S1:.+]]]
 ; HARD-DAG:   ldd [%[[S0]]], %f{{.+}}
 ; HARD-DAG:   ldd [%[[S1]]], %f{{.+}}
-; HARD:       jmp %o7+12
+; HARD:       jmp {{%[oi]7}}+12
 
 ; SOFT-LABEL: f128_spill
 ; SOFT:       std %f{{.+}}, [%[[S0:.+]]]
 ; SOFT:       std %f{{.+}}, [%[[S1:.+]]]
 ; SOFT-DAG:   ldd [%[[S0]]], %f{{.+}}
 ; SOFT-DAG:   ldd [%[[S1]]], %f{{.+}}
-; SOFT:       jmp %o7+12
+; SOFT:       jmp {{%[oi]7}}+12
 
 define void @f128_spill(fp128* noalias sret %scalar.result, fp128* byval %a) {
 entry:
diff --git a/test/CodeGen/SPARC/inlineasm.ll b/test/CodeGen/SPARC/inlineasm.ll
index 526cde8de8b4..d54c5c6bc780 100644
--- a/test/CodeGen/SPARC/inlineasm.ll
+++ b/test/CodeGen/SPARC/inlineasm.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=sparc -no-integrated-as <%s | FileCheck %s
+; RUN: llc -march=sparc <%s | FileCheck %s
 
 ; CHECK-LABEL: test_constraint_r
 ; CHECK:       add %o1, %o0, %o0
@@ -8,7 +8,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK-LABEL: test_constraint_I
+; CHECK-LABEL: test_constraint_I:
 ; CHECK:       add %o0, 1023, %o0
 define i32 @test_constraint_I(i32 %a) {
 entry:
@@ -16,7 +16,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK-LABEL: test_constraint_I_neg
+; CHECK-LABEL: test_constraint_I_neg:
 ; CHECK:       add %o0, -4096, %o0
 define i32 @test_constraint_I_neg(i32 %a) {
 entry:
@@ -24,7 +24,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK-LABEL: test_constraint_I_largeimm
+; CHECK-LABEL: test_constraint_I_largeimm:
 ; CHECK:       sethi 9, [[R0:%[gilo][0-7]]]
 ; CHECK:       or [[R0]], 784, [[R1:%[gilo][0-7]]]
 ; CHECK:       add %o0, [[R1]], %o0
@@ -34,12 +34,51 @@ entry:
   ret i32 %0
 }
 
-; CHECK-LABEL: test_constraint_reg
+; CHECK-LABEL: test_constraint_reg:
 ; CHECK:       ldda [%o1] 43, %g2
-; CHECK:       ldda [%o1] 43, %g3
+; CHECK:       ldda [%o1] 43, %g4
 define void @test_constraint_reg(i32 %s, i32* %ptr) {
 entry:
   %0 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={r2},r,n"(i32* %ptr, i32 43)
-  %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g3},r,n"(i32* %ptr, i32 43)
+  %1 = tail call i64 asm sideeffect "ldda [$1] $2, $0", "={g4},r,n"(i32* %ptr, i32 43)
   ret void
 }
+
+;; Ensure that i64 args to asm are allocated to the IntPair register class.
+;; Also checks that register renaming for leaf proc works.
+; CHECK-LABEL: test_constraint_r_i64:
+; CHECK: mov %o0, %o5
+; CHECK: sra %o5, 31, %o4
+; CHECK: std %o4, [%o1]
+define i32 @test_constraint_r_i64(i32 %foo, i64* %out, i32 %o) {
+entry:
+  %conv = sext i32 %foo to i64
+  tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+  ret i32 %o
+}
+
+;; Same test without leaf-proc opt
+; CHECK-LABEL: test_constraint_r_i64_noleaf:
+; CHECK: mov %i0, %i5
+; CHECK: sra %i5, 31, %i4
+; CHECK: std %i4, [%i1]
+define i32 @test_constraint_r_i64_noleaf(i32 %foo, i64* %out, i32 %o) #0 {
+entry:
+  %conv = sext i32 %foo to i64
+  tail call void asm sideeffect "std $0, [$1]", "r,r,~{memory}"(i64 %conv, i64* %out)
+  ret i32 %o
+}
+attributes #0 = { "no-frame-pointer-elim"="true" }
+
+;; Ensures that tied in and out gets allocated properly.
+; CHECK-LABEL: test_i64_inout:
+; CHECK: sethi 0, %o2
+; CHECK: mov 5, %o3
+; CHECK: xor %o2, %g0, %o2
+; CHECK: mov %o2, %o0
+; CHECK: ret
+define i64 @test_i64_inout() {
+entry:
+  %0 = call i64 asm sideeffect "xor $1, %g0, $0", "=r,0,~{i1}"(i64 5);
+  ret i64 %0
+}
diff --git a/test/CodeGen/SPARC/missing-sret.ll b/test/CodeGen/SPARC/missing-sret.ll
new file mode 100644
index 000000000000..683d840bd250
--- /dev/null
+++ b/test/CodeGen/SPARC/missing-sret.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc -filetype=obj < %s > /dev/null 2> %t2
+
+define void @mul_double_cc({ double, double }* noalias sret %agg.result, double %a, double %b, double %c, double %d) {
+entry:
+  call void @__muldc3({ double, double }* sret %agg.result, double %a, double %b, double %c, double %d)
+  ret void
+}
+
+declare void @__muldc3({ double, double }*, double, double, double, double)
diff --git a/test/CodeGen/SPARC/reserved-regs.ll b/test/CodeGen/SPARC/reserved-regs.ll
new file mode 100644
index 000000000000..fe208015827b
--- /dev/null
+++ b/test/CodeGen/SPARC/reserved-regs.ll
@@ -0,0 +1,135 @@
+; RUN: llc -march=sparc  < %s | FileCheck %s
+
+@g = common global [32 x i32] zeroinitializer, align 16
+@h = common global [16 x i64] zeroinitializer, align 16
+
+;; Ensures that we don't use registers which are supposed to be reserved.
+
+; CHECK-LABEL: use_all_i32_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i32_regs() {
+entry:
+  %0 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+  %1 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+  %2 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+  %3 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+  %4 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+  %5 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+  %6 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+  %7 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+  %8 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+  %9 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+  %10 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+  %11 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+  %12 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+  %13 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+  %14 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+  %15 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+  %16 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+  %17 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+  %18 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+  %19 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+  %20 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+  %21 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+  %22 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+  %23 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+  %24 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+  %25 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+  %26 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+  %27 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+  %28 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+  %29 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+  %30 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+  %31 = load volatile i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+  store volatile i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 0), align 16
+  store volatile i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 1), align 4
+  store volatile i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 2), align 8
+  store volatile i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 3), align 4
+  store volatile i32 %5, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 4), align 16
+  store volatile i32 %6, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 5), align 4
+  store volatile i32 %7, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 6), align 8
+  store volatile i32 %8, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 7), align 4
+  store volatile i32 %9, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 8), align 16
+  store volatile i32 %10, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 9), align 4
+  store volatile i32 %11, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 10), align 8
+  store volatile i32 %12, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 11), align 4
+  store volatile i32 %13, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 12), align 16
+  store volatile i32 %14, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 13), align 4
+  store volatile i32 %15, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 14), align 8
+  store volatile i32 %16, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 15), align 4
+  store volatile i32 %17, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 16), align 16
+  store volatile i32 %18, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 17), align 4
+  store volatile i32 %19, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 18), align 8
+  store volatile i32 %20, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 19), align 4
+  store volatile i32 %21, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 20), align 16
+  store volatile i32 %22, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 21), align 4
+  store volatile i32 %23, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 22), align 8
+  store volatile i32 %24, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 23), align 4
+  store volatile i32 %25, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 24), align 16
+  store volatile i32 %26, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 25), align 4
+  store volatile i32 %27, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 26), align 8
+  store volatile i32 %28, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 27), align 4
+  store volatile i32 %29, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 28), align 16
+  store volatile i32 %30, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 29), align 4
+  store volatile i32 %31, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 30), align 8
+  store volatile i32 %0, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @g, i64 0, i64 31), align 4
+  ret void
+}
+
+
+; CHECK-LABEL: use_all_i64_regs:
+; CHECK-NOT: %g0
+; CHECK-NOT: %g1
+; CHECK-NOT: %g4
+; CHECK-NOT: %g5
+; CHECK-NOT: %g6
+; CHECK-NOT: %g7
+; CHECK-NOT: %o6
+; CHECK-NOT: %o7
+; CHECK-NOT: %i6
+; CHECK-NOT: %i7
+; CHECK: ret
+define void @use_all_i64_regs() {
+entry:
+  %0 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+  %1 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+  %2 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+  %3 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+  %4 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+  %5 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+  %6 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+  %7 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+  %8 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+  %9 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+  %10 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+  %11 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+  %12 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+  %13 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+  %14 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+  %15 = load volatile i64, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+  store volatile i64 %1, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 0), align 16
+  store volatile i64 %2, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 1), align 4
+  store volatile i64 %3, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 2), align 8
+  store volatile i64 %4, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 3), align 4
+  store volatile i64 %5, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 4), align 16
+  store volatile i64 %6, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 5), align 4
+  store volatile i64 %7, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 6), align 8
+  store volatile i64 %8, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 7), align 4
+  store volatile i64 %9, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 8), align 16
+  store volatile i64 %10, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 9), align 4
+  store volatile i64 %11, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 10), align 8
+  store volatile i64 %12, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 11), align 4
+  store volatile i64 %13, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 12), align 16
+  store volatile i64 %14, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 13), align 4
+  store volatile i64 %15, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 14), align 8
+  store volatile i64 %0, i64* getelementptr inbounds ([16 x i64], [16 x i64]* @h, i64 0, i64 15), align 4
+  ret void
+}
diff --git a/test/CodeGen/SPARC/select-mask.ll b/test/CodeGen/SPARC/select-mask.ll
new file mode 100644
index 000000000000..2e69a3b9be53
--- /dev/null
+++ b/test/CodeGen/SPARC/select-mask.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=sparc < %s
+
+;; getBooleanContents on Sparc used to claim that no bits mattered
+;; other than the first for SELECT. Thus, the 'trunc' got eliminated
+;; as redundant. But, cmp does NOT ignore the other bits!
+
+; CHECK-LABEL: select_mask:
+; CHECK: ldub [%o0], [[R:%[goli][0-7]]]
+; CHECK: and [[R]], 1, [[V:%[goli][0-7]]]
+; CHECK: cmp [[V]], 0
+define i32 @select_mask(i8* %this) {
+entry:
+  %bf.load2 = load i8, i8* %this, align 4
+  %bf.cast5 = trunc i8 %bf.load2 to i1
+  %cond = select i1 %bf.cast5, i32 2, i32 0
+  ret i32 %cond
+}
diff --git a/test/CodeGen/SPARC/spill.ll b/test/CodeGen/SPARC/spill.ll
new file mode 100644
index 000000000000..a461de9640bd
--- /dev/null
+++ b/test/CodeGen/SPARC/spill.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=sparc  < %s | FileCheck %s
+
+;; Ensure that spills and reloads work for various types on
+;; sparcv8.
+
+;; For i32/i64 tests, use an asm statement which clobbers most
+;; registers to ensure the spill will happen.
+
+; CHECK-LABEL: test_i32_spill:
+; CHECK:       and %i0, %i1, %o0
+; CHECK:       st %o0, [%fp+{{.+}}]
+; CHECK:       add %o0, %o0, %g0
+; CHECK:       ld [%fp+{{.+}}, %i0
+define i32 @test_i32_spill(i32 %a, i32 %b) {
+entry:
+  %r0 = and i32 %a, %b
+  ; The clobber list has all registers except g0/o0. (Only o0 is usable.)
+  %0 = call i32 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g1},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o1},~{o2},~{o3},~{o4},~{o5},~{o6},~{o7}"(i32 %r0)
+  ret i32 %r0
+}
+
+; CHECK-LABEL: test_i64_spill:
+; CHECK:       and %i0, %i2, %o0
+; CHECK:       and %i1, %i3, %o1
+; CHECK:       std %o0, [%fp+{{.+}}]
+; CHECK:       add %o0, %o0, %g0
+; CHECK:       ldd [%fp+{{.+}}, %i0
+define i64 @test_i64_spill(i64 %a, i64 %b) {
+entry:
+  %r0 = and i64 %a, %b
+  ; The clobber list has all registers except g0,g1,o0,o1. (Only o0/o1 are a usable pair)
+  ; So, o0/o1 must be used.
+  %0 = call i64 asm sideeffect "add $0,$1,%g0", "=r,0,~{i0},~{i1},~{i2},~{i3},~{i4},~{i5},~{i6},~{i7},~{g2},~{g3},~{g4},~{g5},~{g6},~{g7},~{l0},~{l1},~{l2},~{l3},~{l4},~{l5},~{l6},~{l7},~{o2},~{o3},~{o4},~{o5},~{o7}"(i64 %r0)
+  ret i64 %r0
+}
+
+;; For float/double tests, a call is a suitable clobber as *all* FPU
+;; registers are caller-save on sparcv8.
+
+; CHECK-LABEL: test_float_spill:
+; CHECK:       fadds %f1, %f0, [[R:%[f][0-31]]]
+; CHECK:       st [[R]], [%fp+{{.+}}]
+; CHECK:       call
+; CHECK:       ld [%fp+{{.+}}, %f0
+declare float @foo_float(float)
+define float @test_float_spill(float %a, float %b) {
+entry:
+  %r0 = fadd float %a, %b
+  %0 = call float @foo_float(float %r0)
+  ret float %r0
+}
+
+; CHECK-LABEL: test_double_spill:
+; CHECK:       faddd %f2, %f0, [[R:%[f][0-31]]]
+; CHECK:       std [[R]], [%fp+{{.+}}]
+; CHECK:       call
+; CHECK:       ldd [%fp+{{.+}}, %f0
+declare double @foo_double(double)
+define double @test_double_spill(double %a, double %b) {
+entry:
+  %r0 = fadd double %a, %b
+  %0 = call double @foo_double(double %r0)
+  ret double %r0
+}
diff --git a/test/CodeGen/SPARC/stack-align.ll b/test/CodeGen/SPARC/stack-align.ll
new file mode 100644
index 000000000000..2554ee821fcd
--- /dev/null
+++ b/test/CodeGen/SPARC/stack-align.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=sparc < %s | FileCheck %s
+declare void @stack_realign_helper(i32 %a, i32* %b)
+
+@foo = global i32 1
+
+;; This is a function where we have a local variable of 64-byte
+;; alignment.  We want to see that the stack is aligned (the initial
+;; andn), that the local var is accessed via stack pointer (to %o0), and that
+;; the argument is accessed via frame pointer not stack pointer (to %o1).
+
+;; CHECK-LABEL: stack_realign:
+;; CHECK:      andn %sp, 63, %sp
+;; CHECK-NEXT: ld [%fp+92], %o0
+;; CHECK-NEXT: call stack_realign_helper
+;; CHECK-NEXT: add %sp, 128, %o1
+
+define void @stack_realign(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g) {
+entry:
+  %aligned = alloca i32, align 64
+  call void @stack_realign_helper(i32 %g, i32* %aligned)
+  ret void
+}
diff --git a/test/CodeGen/SPARC/tls.ll b/test/CodeGen/SPARC/tls.ll
index a70637b283f5..8ebd36833ba1 100644
--- a/test/CodeGen/SPARC/tls.ll
+++ b/test/CodeGen/SPARC/tls.ll
@@ -103,10 +103,10 @@ entry:
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_HIX22 local_symbol 0x0
-; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_LOX10 local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_HI22 local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_LO10 local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_ADD local_symbol 0x0
+; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_LOX10 local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDM_CALL local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_TLS_LDO_ADD local_symbol 0x0
 ; pic-obj:    0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
diff --git a/test/CodeGen/SPARC/varargs.ll b/test/CodeGen/SPARC/varargs.ll
index c2d1e98b698b..576acc284fb9 100644
--- a/test/CodeGen/SPARC/varargs.ll
+++ b/test/CodeGen/SPARC/varargs.ll
@@ -67,8 +67,8 @@ declare void @llvm.va_start(i8*)
 ; CHECK: call_1d
 ; The fixed-arg double goes in %d2, the second goes in %o2.
 ; CHECK: sethi 1048576
-; CHECK: , %o2
 ; CHECK: , %f2
+; CHECK: , %o2
 define i32 @call_1d() #0 {
 entry:
   %call = call double (i8*, double, ...) @varargsfunc(i8* undef, double 1.000000e+00, double 2.000000e+00)
diff --git a/test/CodeGen/SystemZ/alloca-03.ll b/test/CodeGen/SystemZ/alloca-03.ll
new file mode 100644
index 000000000000..ece1198ad62f
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-03.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; Allocate 8 bytes, no need to align stack.
+define void @f0() {
+; CHECK-LABEL: f0:
+; CHECK: aghi %r15, -168
+; CHECK-NOT: nil
+; CHECK: mvghi 160(%r15), 10
+; CHECK: aghi %r15, 168
+  %x = alloca i64
+  store volatile i64 10, i64* %x
+  ret void
+}
+
+; Allocate %len * 8, no need to align stack.
+define void @f1(i64 %len) {
+; CHECK-LABEL: f1:
+; CHECK: sllg    %r0, %r2, 3
+; CHECK: lgr     %r1, %r15
+; CHECK: sgr     %r1, %r0
+; CHECK-NOT: ngr
+; CHECK: lgr     %r15, %r1
+; CHECK: la      %r1, 160(%r1)
+; CHECK: mvghi   0(%r1), 10
+  %x = alloca i64, i64 %len
+  store volatile i64 10, i64* %x
+  ret void
+}
+
+; Static alloca, align 128.
+define void @f2() {
+; CHECK-LABEL: f2:
+; CHECK: aghi    %r1, -128
+; CHECK: lgr     %r15, %r1
+; CHECK: la      %r1, 280(%r1)
+; CHECK: nill	 %r1, 65408
+; CHECK: mvghi   0(%r1), 10
+  %x = alloca i64, i64 1, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; Dynamic alloca, align 128.
+define void @f3(i64 %len) {
+; CHECK-LABEL: f3:
+; CHECK: sllg	%r1, %r2, 3
+; CHECK: la	%r0, 120(%r1)
+; CHECK: lgr	%r1, %r15
+; CHECK: sgr	%r1, %r0
+; CHECK: lgr	%r15, %r1
+; CHECK: la	%r1, 280(%r1)
+; CHECK: nill	%r1, 65408
+; CHECK: mvghi	0(%r1), 10
+  %x = alloca i64, i64 %len, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; Static alloca w/out alignment - part of frame.
+define void @f4() {
+; CHECK-LABEL: f4:
+; CHECK: aghi    %r15, -168
+; CHECK: mvhi    164(%r15), 10
+; CHECK: aghi    %r15, 168
+  %x = alloca i32
+  store volatile i32 10, i32* %x
+  ret void
+}
+
+; Static alloca of one i32, aligned by 128.
+define void @f5() {
+; CHECK-LABEL: f5:
+
+; CHECK: lgr	%r1, %r15
+; CHECK: aghi	%r1, -128
+; CHECK: lgr	%r15, %r1
+; CHECK: la	%r1, 280(%r1)
+; CHECK: nill	%r1, 65408
+; CHECK: mvhi	0(%r1), 10
+  %x = alloca i32, i64 1, align 128
+  store volatile i32 10, i32* %x
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/alloca-04.ll b/test/CodeGen/SystemZ/alloca-04.ll
new file mode 100644
index 000000000000..86c77493d3e9
--- /dev/null
+++ b/test/CodeGen/SystemZ/alloca-04.ll
@@ -0,0 +1,14 @@
+; Check the "no-realign-stack" function attribute. We should get a warning.
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu -debug-only=codegen 2>&1 | \
+; RUN:   FileCheck %s
+; REQUIRES: asserts
+
+define void @f6() "no-realign-stack" {
+  %x = alloca i64, i64 1, align 128
+  store volatile i64 10, i64* %x, align 128
+  ret void
+}
+
+; CHECK: Warning: requested alignment 128 exceeds the stack alignment 8
+; CHECK-NOT: nill
diff --git a/test/CodeGen/SystemZ/args-01.ll b/test/CodeGen/SystemZ/args-01.ll
index 3105503eda53..113110faf341 100644
--- a/test/CodeGen/SystemZ/args-01.ll
+++ b/test/CodeGen/SystemZ/args-01.ll
@@ -30,12 +30,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-02.ll b/test/CodeGen/SystemZ/args-02.ll
index 8686df88e679..89b080e821bf 100644
--- a/test/CodeGen/SystemZ/args-02.ll
+++ b/test/CodeGen/SystemZ/args-02.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-03.ll b/test/CodeGen/SystemZ/args-03.ll
index d7d3ea105df7..a52782f4c183 100644
--- a/test/CodeGen/SystemZ/args-03.ll
+++ b/test/CodeGen/SystemZ/args-03.ll
@@ -31,12 +31,12 @@ define void @foo() {
 ;
 ; CHECK-FLOAT-LABEL: foo:
 ; CHECK-FLOAT: lzer %f0
-; CHECK-FLOAT: lcebr %f4, %f0
+; CHECK-FLOAT: lcdfr %f4, %f0
 ; CHECK-FLOAT: brasl %r14, bar@PLT
 ;
 ; CHECK-DOUBLE-LABEL: foo:
 ; CHECK-DOUBLE: lzdr %f2
-; CHECK-DOUBLE: lcdbr %f6, %f2
+; CHECK-DOUBLE: lcdfr %f6, %f2
 ; CHECK-DOUBLE: brasl %r14, bar@PLT
 ;
 ; CHECK-FP128-1-LABEL: foo:
diff --git a/test/CodeGen/SystemZ/args-04.ll b/test/CodeGen/SystemZ/args-04.ll
index 48a2cf491049..475cceb106e5 100644
--- a/test/CodeGen/SystemZ/args-04.ll
+++ b/test/CodeGen/SystemZ/args-04.ll
@@ -1,7 +1,7 @@
 ; Test incoming GPR, FPR and stack arguments when no extension type is given.
 ; This type of argument is used for passing structures, etc.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 ; Do some arithmetic so that we can see the register being used.
 define i8 @f1(i8 %r2) {
diff --git a/test/CodeGen/SystemZ/args-07.ll b/test/CodeGen/SystemZ/args-07.ll
index 29d9b319ffc0..44a31fadd6d2 100644
--- a/test/CodeGen/SystemZ/args-07.ll
+++ b/test/CodeGen/SystemZ/args-07.ll
@@ -1,6 +1,6 @@
 ; Test multiple return values (LLVM ABI extension)
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs| FileCheck %s
 
 ; Up to four integer return values fit into GPRs.
 define { i64, i64, i64, i64 } @f1() {
diff --git a/test/CodeGen/SystemZ/asm-17.ll b/test/CodeGen/SystemZ/asm-17.ll
index 533b5e90d62d..acf2aff45429 100644
--- a/test/CodeGen/SystemZ/asm-17.ll
+++ b/test/CodeGen/SystemZ/asm-17.ll
@@ -1,6 +1,7 @@
 ; Test explicit register names.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -no-integrated-as | FileCheck %s
+; RUN: llc < %s  -verify-machineinstrs -mtriple=s390x-linux-gnu -no-integrated-as \
+; RUN:   | FileCheck %s
 
 ; Test i32 GPRs.
 define i32 @f1() {
diff --git a/test/CodeGen/SystemZ/asm-18.ll b/test/CodeGen/SystemZ/asm-18.ll
index 999984be88d4..7909253d188c 100644
--- a/test/CodeGen/SystemZ/asm-18.ll
+++ b/test/CodeGen/SystemZ/asm-18.ll
@@ -1,7 +1,8 @@
 ; Test high-word operations, using "h" constraints to force a high
 ; register and "r" constraints to force a low register.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN:   -no-integrated-as | FileCheck %s
 
 ; Test loads and stores involving mixtures of high and low registers.
 define void @f1(i32 *%ptr1, i32 *%ptr2) {
diff --git a/test/CodeGen/SystemZ/dag-combine-01.ll b/test/CodeGen/SystemZ/dag-combine-01.ll
new file mode 100644
index 000000000000..a56a118dadaa
--- /dev/null
+++ b/test/CodeGen/SystemZ/dag-combine-01.ll
@@ -0,0 +1,97 @@
+; Test that MergeConsecutiveStores() does not during DAG combining
+; incorrectly drop a chain dependency to a store previously chained to
+; one of two combined loads.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+@A = common global [2048 x float] zeroinitializer, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main(i32 signext %argc, i8** nocapture readnone %argv) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv24 = phi i64 [ 0, %entry ], [ %indvars.iv.next25, %for.body ]
+  %sum.018 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %0 = trunc i64 %indvars.iv24 to i32
+  %conv = sitofp i32 %0 to float
+  %arrayidx = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv24
+  store float %conv, float* %arrayidx, align 4
+  %add = fadd float %sum.018, %conv
+  %indvars.iv.next25 = add nuw nsw i64 %indvars.iv24, 1
+  %exitcond26 = icmp eq i64 %indvars.iv.next25, 2048
+  br i1 %exitcond26, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  br label %for.body.3.lr.ph.i.preheader
+
+for.body.3.lr.ph.i.preheader:                     ; preds = %complex_transpose.exit, %for.end
+  %i.116 = phi i32 [ 0, %for.end ], [ %inc9, %complex_transpose.exit ]
+  br label %for.body.3.lr.ph.i
+
+for.body.3.lr.ph.i:                               ; preds = %for.body.3.lr.ph.i.preheader, %for.inc.40.i
+  %indvars.iv19 = phi i32 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next20, %for.inc.40.i ]
+  %indvars.iv57.i = phi i64 [ 1, %for.body.3.lr.ph.i.preheader ], [ %indvars.iv.next58.i, %for.inc.40.i ]
+  %1 = shl nsw i64 %indvars.iv57.i, 1
+  %2 = shl nsw i64 %indvars.iv57.i, 6
+  br label %for.body.3.i
+
+for.body.3.i:                                     ; preds = %for.body.3.i, %for.body.3.lr.ph.i
+; CHECK-LABEL: .LBB0_5:
+; CHECK-NOT:    stfh    %r{{.*}}, 0(%r{{.*}})
+; CHECK:        lg      %r{{.*}}, -4(%r{{.*}})
+; Overlapping load should go before the store
+  %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
+  %3 = shl nsw i64 %indvars.iv.i, 6
+  %4 = add nuw nsw i64 %3, %1
+  %arrayidx.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %4
+  %5 = bitcast float* %arrayidx.i to i32*
+  %6 = load i32, i32* %5, align 4
+  %arrayidx9.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %4
+  %7 = bitcast float* %arrayidx9.i to i32*
+  %8 = load i32, i32* %7, align 4
+  %9 = shl nsw i64 %indvars.iv.i, 1
+  %10 = add nuw nsw i64 %9, %2
+  %arrayidx14.i = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %10
+  %11 = bitcast float* %arrayidx14.i to i32*
+  %12 = load i32, i32* %11, align 4
+  %arrayidx19.i = getelementptr inbounds float, float* getelementptr inbounds ([2048 x float], [2048 x float]* @A, i64 0, i64 1), i64 %10
+  %13 = bitcast float* %arrayidx19.i to i32*
+  %14 = load i32, i32* %13, align 4
+  store i32 %6, i32* %11, align 4
+  store i32 %8, i32* %13, align 4
+  store i32 %12, i32* %5, align 4
+  store i32 %14, i32* %7, align 4
+  %indvars.iv.next.i = add nuw nsw i64 %indvars.iv.i, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next.i to i32
+  %exitcond21 = icmp eq i32 %lftr.wideiv, %indvars.iv19
+  br i1 %exitcond21, label %for.inc.40.i, label %for.body.3.i
+
+for.inc.40.i:                                     ; preds = %for.body.3.i
+  %indvars.iv.next58.i = add nuw nsw i64 %indvars.iv57.i, 1
+  %indvars.iv.next20 = add nuw nsw i32 %indvars.iv19, 1
+  %exitcond22 = icmp eq i64 %indvars.iv.next58.i, 32
+  br i1 %exitcond22, label %complex_transpose.exit, label %for.body.3.lr.ph.i
+
+complex_transpose.exit:                           ; preds = %for.inc.40.i
+  %inc9 = add nuw nsw i32 %i.116, 1
+  %exitcond23 = icmp eq i32 %inc9, 10
+  br i1 %exitcond23, label %for.body.14.preheader, label %for.body.3.lr.ph.i.preheader
+
+for.body.14.preheader:                            ; preds = %complex_transpose.exit
+  br label %for.body.14
+
+for.body.14:                                      ; preds = %for.body.14.preheader, %for.body.14
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body.14 ], [ 0, %for.body.14.preheader ]
+  %sum.115 = phi float [ %add17, %for.body.14 ], [ 0.000000e+00, %for.body.14.preheader ]
+  %arrayidx16 = getelementptr inbounds [2048 x float], [2048 x float]* @A, i64 0, i64 %indvars.iv
+  %15 = load float, float* %arrayidx16, align 4
+  %add17 = fadd float %sum.115, %15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.end.20, label %for.body.14
+
+for.end.20:                                       ; preds = %for.body.14
+  ret i32 0
+}
diff --git a/test/CodeGen/SystemZ/fp-abs-01.ll b/test/CodeGen/SystemZ/fp-abs-01.ll
index 3b143d93315b..3bb3ede457f3 100644
--- a/test/CodeGen/SystemZ/fp-abs-01.ll
+++ b/test/CodeGen/SystemZ/fp-abs-01.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lpebr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call float @llvm.fabs.f32(float %f)
   ret float %res
@@ -17,7 +17,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lpdbr %f0, %f0
+; CHECK: lpdfr %f0, %f0
 ; CHECK: br %r14
   %res = call double @llvm.fabs.f64(double %f)
   ret double %res
diff --git a/test/CodeGen/SystemZ/fp-abs-02.ll b/test/CodeGen/SystemZ/fp-abs-02.ll
index e831ddb86fea..b2d2cfd52b6a 100644
--- a/test/CodeGen/SystemZ/fp-abs-02.ll
+++ b/test/CodeGen/SystemZ/fp-abs-02.ll
@@ -7,7 +7,7 @@
 declare float @llvm.fabs.f32(float %f)
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lnebr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call float @llvm.fabs.f32(float %f)
   %res = fsub float -0.0, %abs
@@ -18,7 +18,7 @@ define float @f1(float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lndbr %f0, %f0
+; CHECK: lndfr %f0, %f0
 ; CHECK: br %r14
   %abs = call double @llvm.fabs.f64(double %f)
   %res = fsub double -0.0, %abs
diff --git a/test/CodeGen/SystemZ/fp-add-02.ll b/test/CodeGen/SystemZ/fp-add-02.ll
index 5be1ad79d453..4f98742197bd 100644
--- a/test/CodeGen/SystemZ/fp-add-02.ll
+++ b/test/CodeGen/SystemZ/fp-add-02.ll
@@ -2,7 +2,7 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
 declare double @foo()
 
 ; Check register addition.
diff --git a/test/CodeGen/SystemZ/fp-cmp-02.ll b/test/CodeGen/SystemZ/fp-cmp-02.ll
index 94a256777c75..0808ddd8db48 100644
--- a/test/CodeGen/SystemZ/fp-cmp-02.ll
+++ b/test/CodeGen/SystemZ/fp-cmp-02.ll
@@ -3,7 +3,7 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs\
 ; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
 
 declare double @foo()
@@ -164,8 +164,7 @@ define i64 @f8(i64 %a, i64 %b, double %f) {
 ; CHECK-SCALAR: ltdbr %f0, %f0
 ; CHECK-SCALAR-NEXT: je
 ; CHECK-SCALAR: lgr %r2, %r3
-; CHECK-VECTOR: lzdr %f1
-; CHECK-VECTOR-NEXT: cdbr %f0, %f1
+; CHECK-VECTOR: ltdbr %f0, %f0
 ; CHECK-VECTOR-NEXT: locgrne %r2, %r3
 ; CHECK: br %r14
   %cond = fcmp oeq double %f, 0.0
diff --git a/test/CodeGen/SystemZ/fp-cmp-05.ll b/test/CodeGen/SystemZ/fp-cmp-05.ll
new file mode 100644
index 000000000000..c8eb18c6e6ba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -0,0 +1,80 @@
+; Test that floating-point instructions that set cc are used to
+; eliminate compares for load complement, load negative and load
+; positive.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Load complement (sign-bit flipped).
+; Test f32
+define float @f1(float %a, float %b, float %f) {
+; CHECK-LABEL: f1:
+; CHECK: lcebr
+; CHECK-NEXT: je
+  %neg = fsub float -0.0, %f
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f2(double %a, double %b, double %f) {
+; CHECK-LABEL: f2:
+; CHECK: lcdbr
+; CHECK-NEXT: je
+  %neg = fsub double -0.0, %f
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Negation of floating-point absolute.
+; Test f32
+declare float @llvm.fabs.f32(float %f)
+define float @f3(float %a, float %b, float %f) {
+; CHECK-LABEL: f3:
+; CHECK: lnebr
+; CHECK-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %neg = fsub float -0.0, %abs
+  %cond = fcmp oeq float %neg, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+declare double @llvm.fabs.f64(double %f)
+define double @f4(double %a, double %b, double %f) {
+; CHECK-LABEL: f4:
+; CHECK: lndbr
+; CHECK-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %neg = fsub double -0.0, %abs
+  %cond = fcmp oeq double %neg, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
+; Absolute floating-point value.
+; Test f32
+define float @f5(float %a, float %b, float %f) {
+; CHECK-LABEL: f5:
+; CHECK: lpebr
+; CHECK-NEXT: je
+  %abs = call float @llvm.fabs.f32(float %f)
+  %cond = fcmp oeq float %abs, 0.0
+  %res = select i1 %cond, float %a, float %b
+  ret float %res
+}
+
+; Test f64
+define double @f6(double %a, double %b, double %f) {
+; CHECK-LABEL: f6:
+; CHECK: lpdbr
+; CHECK-NEXT: je
+  %abs = call double @llvm.fabs.f64(double %f)
+  %cond = fcmp oeq double %abs, 0.0
+  %res = select i1 %cond, double %a, double %b
+  ret double %res
+}
+
diff --git a/test/CodeGen/SystemZ/fp-const-02.ll b/test/CodeGen/SystemZ/fp-const-02.ll
index 96f857895ecf..942465c06600 100644
--- a/test/CodeGen/SystemZ/fp-const-02.ll
+++ b/test/CodeGen/SystemZ/fp-const-02.ll
@@ -6,7 +6,7 @@
 define float @f1() {
 ; CHECK-LABEL: f1:
 ; CHECK: lzer [[REGISTER:%f[0-5]+]]
-; CHECK: lcebr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret float -0.0
 }
@@ -15,7 +15,7 @@ define float @f1() {
 define double @f2() {
 ; CHECK-LABEL: f2:
 ; CHECK: lzdr [[REGISTER:%f[0-5]+]]
-; CHECK: lcdbr %f0, [[REGISTER]]
+; CHECK: lcdfr %f0, [[REGISTER]]
 ; CHECK: br %r14
   ret double -0.0
 }
diff --git a/test/CodeGen/SystemZ/fp-libcall.ll b/test/CodeGen/SystemZ/fp-libcall.ll
new file mode 100644
index 000000000000..75250b811cba
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-libcall.ll
@@ -0,0 +1,273 @@
+; Test that library calls are emitted for LLVM IR intrinsics
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+define float @f1(float %x, i32 %y) {
+; CHECK-LABEL: f1:
+; CHECK: brasl %r14, __powisf2@PLT
+  %tmp = call float @llvm.powi.f32(float %x, i32 %y)
+  ret float %tmp
+}
+
+define double @f2(double %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK: brasl %r14, __powidf2@PLT
+  %tmp = call double @llvm.powi.f64(double %x, i32 %y)
+  ret double %tmp
+}
+
+define fp128 @f3(fp128 %x, i32 %y) {
+; CHECK-LABEL: f3:
+; CHECK: brasl %r14, __powitf2@PLT
+  %tmp = call fp128 @llvm.powi.f128(fp128 %x, i32 %y)
+  ret fp128 %tmp
+}
+
+define float @f4(float %x, float %y) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, powf@PLT
+  %tmp = call float @llvm.pow.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f5(double %x, double %y) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, pow@PLT
+  %tmp = call double @llvm.pow.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f6(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, powl@PLT
+  %tmp = call fp128 @llvm.pow.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+define float @f7(float %x) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, sinf@PLT
+  %tmp = call float @llvm.sin.f32(float %x)
+  ret float %tmp
+}
+
+define double @f8(double %x) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, sin@PLT
+  %tmp = call double @llvm.sin.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f9(fp128 %x) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, sinl@PLT
+  %tmp = call fp128 @llvm.sin.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f10(float %x) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, cosf@PLT
+  %tmp = call float @llvm.cos.f32(float %x)
+  ret float %tmp
+}
+
+define double @f11(double %x) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, cos@PLT
+  %tmp = call double @llvm.cos.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f12(fp128 %x) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, cosl@PLT
+  %tmp = call fp128 @llvm.cos.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f13(float %x) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, expf@PLT
+  %tmp = call float @llvm.exp.f32(float %x)
+  ret float %tmp
+}
+
+define double @f14(double %x) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, exp@PLT
+  %tmp = call double @llvm.exp.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f15(fp128 %x) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, expl@PLT
+  %tmp = call fp128 @llvm.exp.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f16(float %x) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, exp2f@PLT
+  %tmp = call float @llvm.exp2.f32(float %x)
+  ret float %tmp
+}
+
+define double @f17(double %x) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, exp2@PLT
+  %tmp = call double @llvm.exp2.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f18(fp128 %x) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, exp2l@PLT
+  %tmp = call fp128 @llvm.exp2.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f19(float %x) {
+; CHECK-LABEL: f19:
+; CHECK: brasl %r14, logf@PLT
+  %tmp = call float @llvm.log.f32(float %x)
+  ret float %tmp
+}
+
+define double @f20(double %x) {
+; CHECK-LABEL: f20:
+; CHECK: brasl %r14, log@PLT
+  %tmp = call double @llvm.log.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f21(fp128 %x) {
+; CHECK-LABEL: f21:
+; CHECK: brasl %r14, logl@PLT
+  %tmp = call fp128 @llvm.log.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f22(float %x) {
+; CHECK-LABEL: f22:
+; CHECK: brasl %r14, log2f@PLT
+  %tmp = call float @llvm.log2.f32(float %x)
+  ret float %tmp
+}
+
+define double @f23(double %x) {
+; CHECK-LABEL: f23:
+; CHECK: brasl %r14, log2@PLT
+  %tmp = call double @llvm.log2.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f24(fp128 %x) {
+; CHECK-LABEL: f24:
+; CHECK: brasl %r14, log2l@PLT
+  %tmp = call fp128 @llvm.log2.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f25(float %x) {
+; CHECK-LABEL: f25:
+; CHECK: brasl %r14, log10f@PLT
+  %tmp = call float @llvm.log10.f32(float %x)
+  ret float %tmp
+}
+
+define double @f26(double %x) {
+; CHECK-LABEL: f26:
+; CHECK: brasl %r14, log10@PLT
+  %tmp = call double @llvm.log10.f64(double %x)
+  ret double %tmp
+}
+
+define fp128 @f27(fp128 %x) {
+; CHECK-LABEL: f27:
+; CHECK: brasl %r14, log10l@PLT
+  %tmp = call fp128 @llvm.log10.f128(fp128 %x)
+  ret fp128 %tmp
+}
+
+define float @f28(float %x, float %y) {
+; CHECK-LABEL: f28:
+; CHECK: brasl %r14, fminf@PLT
+  %tmp = call float @llvm.minnum.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f29(double %x, double %y) {
+; CHECK-LABEL: f29:
+; CHECK: brasl %r14, fmin@PLT
+  %tmp = call double @llvm.minnum.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f30(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f30:
+; CHECK: brasl %r14, fminl@PLT
+  %tmp = call fp128 @llvm.minnum.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+define float @f31(float %x, float %y) {
+; CHECK-LABEL: f31:
+; CHECK: brasl %r14, fmaxf@PLT
+  %tmp = call float @llvm.maxnum.f32(float %x, float %y)
+  ret float %tmp
+}
+
+define double @f32(double %x, double %y) {
+; CHECK-LABEL: f32:
+; CHECK: brasl %r14, fmax@PLT
+  %tmp = call double @llvm.maxnum.f64(double %x, double %y)
+  ret double %tmp
+}
+
+define fp128 @f33(fp128 %x, fp128 %y) {
+; CHECK-LABEL: f33:
+; CHECK: brasl %r14, fmaxl@PLT
+  %tmp = call fp128 @llvm.maxnum.f128(fp128 %x, fp128 %y)
+  ret fp128 %tmp
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare double @llvm.powi.f64(double, i32)
+declare fp128 @llvm.powi.f128(fp128, i32)
+declare float @llvm.pow.f32(float, float)
+declare double @llvm.pow.f64(double, double)
+declare fp128 @llvm.pow.f128(fp128, fp128)
+
+declare float @llvm.sin.f32(float)
+declare double @llvm.sin.f64(double)
+declare fp128 @llvm.sin.f128(fp128)
+declare float @llvm.cos.f32(float)
+declare double @llvm.cos.f64(double)
+declare fp128 @llvm.cos.f128(fp128)
+
+declare float @llvm.exp.f32(float)
+declare double @llvm.exp.f64(double)
+declare fp128 @llvm.exp.f128(fp128)
+declare float @llvm.exp2.f32(float)
+declare double @llvm.exp2.f64(double)
+declare fp128 @llvm.exp2.f128(fp128)
+
+declare float @llvm.log.f32(float)
+declare double @llvm.log.f64(double)
+declare fp128 @llvm.log.f128(fp128)
+declare float @llvm.log2.f32(float)
+declare double @llvm.log2.f64(double)
+declare fp128 @llvm.log2.f128(fp128)
+declare float @llvm.log10.f32(float)
+declare double @llvm.log10.f64(double)
+declare fp128 @llvm.log10.f128(fp128)
+
+declare float @llvm.minnum.f32(float, float)
+declare double @llvm.minnum.f64(double, double)
+declare fp128 @llvm.minnum.f128(fp128, fp128)
+declare float @llvm.maxnum.f32(float, float)
+declare double @llvm.maxnum.f64(double, double)
+declare fp128 @llvm.maxnum.f128(fp128, fp128)
+
diff --git a/test/CodeGen/SystemZ/fp-move-05.ll b/test/CodeGen/SystemZ/fp-move-05.ll
index da12af6d68c1..0864deee5137 100644
--- a/test/CodeGen/SystemZ/fp-move-05.ll
+++ b/test/CodeGen/SystemZ/fp-move-05.ll
@@ -1,6 +1,6 @@
 ; Test 128-bit floating-point loads.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 ; Check loads with no offset.
 define double @f1(i64 %src) {
diff --git a/test/CodeGen/SystemZ/fp-neg-01.ll b/test/CodeGen/SystemZ/fp-neg-01.ll
index fe2e5f67cf5b..b9810f9f34d3 100644
--- a/test/CodeGen/SystemZ/fp-neg-01.ll
+++ b/test/CodeGen/SystemZ/fp-neg-01.ll
@@ -6,7 +6,7 @@
 ; Test f32.
 define float @f1(float %f) {
 ; CHECK-LABEL: f1:
-; CHECK: lcebr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub float -0.0, %f
   ret float %res
@@ -15,7 +15,7 @@ define float @f1(float %f) {
 ; Test f64.
 define double @f2(double %f) {
 ; CHECK-LABEL: f2:
-; CHECK: lcdbr %f0, %f0
+; CHECK: lcdfr %f0, %f0
 ; CHECK: br %r14
   %res = fsub double -0.0, %f
   ret double %res
diff --git a/test/CodeGen/SystemZ/fp-sincos-01.ll b/test/CodeGen/SystemZ/fp-sincos-01.ll
new file mode 100644
index 000000000000..cd182a590eee
--- /dev/null
+++ b/test/CodeGen/SystemZ/fp-sincos-01.ll
@@ -0,0 +1,56 @@
+; Test that combined sin/cos library call is emitted when appropriate
+
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s --check-prefix=CHECK-NOOPT
+; RUN: llc < %s -mtriple=s390x-linux-gnu -enable-unsafe-fp-math | FileCheck %s --check-prefix=CHECK-OPT
+
+define float @f1(float %x) {
+; CHECK-OPT-LABEL: f1:
+; CHECK-OPT: brasl %r14, sincosf@PLT
+; CHECK-OPT: le %f0, 164(%r15)
+; CHECK-OPT: aeb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f1:
+; CHECK-NOOPT: brasl %r14, sinf@PLT
+; CHECK-NOOPT: brasl %r14, cosf@PLT
+  %tmp1 = call float @sinf(float %x)
+  %tmp2 = call float @cosf(float %x)
+  %add = fadd float %tmp1, %tmp2
+  ret float %add
+}
+
+define double @f2(double %x) {
+; CHECK-OPT-LABEL: f2:
+; CHECK-OPT: brasl %r14, sincos@PLT
+; CHECK-OPT: ld %f0, 168(%r15)
+; CHECK-OPT: adb %f0, 160(%r15)
+
+; CHECK-NOOPT-LABEL: f2:
+; CHECK-NOOPT: brasl %r14, sin@PLT
+; CHECK-NOOPT: brasl %r14, cos@PLT
+  %tmp1 = call double @sin(double %x)
+  %tmp2 = call double @cos(double %x)
+  %add = fadd double %tmp1, %tmp2
+  ret double %add
+}
+
+define fp128 @f3(fp128 %x) {
+; CHECK-OPT-LABEL: f3:
+; CHECK-OPT: brasl %r14, sincosl@PLT
+; CHECK-OPT: axbr
+
+; CHECK-NOOPT-LABEL: f3:
+; CHECK-NOOPT: brasl %r14, sinl@PLT
+; CHECK-NOOPT: brasl %r14, cosl@PLT
+  %tmp1 = call fp128 @sinl(fp128 %x)
+  %tmp2 = call fp128 @cosl(fp128 %x)
+  %add = fadd fp128 %tmp1, %tmp2
+  ret fp128 %add
+}
+
+declare float @sinf(float) readonly
+declare double @sin(double) readonly
+declare fp128 @sinl(fp128) readonly
+declare float @cosf(float) readonly
+declare double @cos(double) readonly
+declare fp128 @cosl(fp128) readonly
+
diff --git a/test/CodeGen/SystemZ/insert-05.ll b/test/CodeGen/SystemZ/insert-05.ll
index b76859a568f3..1ea8a64e28e3 100644
--- a/test/CodeGen/SystemZ/insert-05.ll
+++ b/test/CodeGen/SystemZ/insert-05.ll
@@ -214,8 +214,8 @@ define i64 @f18(i32 %a) {
 ; The truncation here isn't free; we need an explicit zero extension.
 define i64 @f19(i32 %a) {
 ; CHECK-LABEL: f19:
-; CHECK: llgcr %r2, %r2
-; CHECK: oihl %r2, 1
+; CHECK: llcr %r2, %r2
+; CHECK: iihf %r2, 1
 ; CHECK: br %r14
   %trunc = trunc i32 %a to i8
   %ext = zext i8 %trunc to i64
diff --git a/test/CodeGen/SystemZ/int-cmp-44.ll b/test/CodeGen/SystemZ/int-cmp-44.ll
index 97d48521254d..a87dccd4ac2a 100644
--- a/test/CodeGen/SystemZ/int-cmp-44.ll
+++ b/test/CodeGen/SystemZ/int-cmp-44.ll
@@ -1,7 +1,8 @@
 ; Test that compares are omitted if CC already has the right value
 ; (z10 version).
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -no-integrated-as \
+; RUN:   -verify-machineinstrs| FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/int-cmp-51.ll b/test/CodeGen/SystemZ/int-cmp-51.ll
new file mode 100644
index 000000000000..85a0e4b4d3a7
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-51.ll
@@ -0,0 +1,34 @@
+; Check that modelling of CC/CCRegs does not stop MachineCSE from
+; removing a compare.  MachineCSE will not extend a live range of an
+; allocatable or reserved phys reg.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @bar(i8)
+
+; Check the low end of the CH range.
+define void @f1(i32 %lhs) {
+; CHECK-LABEL: BB#1:
+; CHECK-NOT: cijlh %r0, 1, .LBB0_3
+
+entry:
+  %and188 = and i32 %lhs, 255
+  %cmp189 = icmp ult i32 %and188, 2
+  br i1 %cmp189, label %if.then.191, label %if.else.201
+
+if.then.191:
+  %cmp194 = icmp eq i32 %and188, 1
+  br i1 %cmp194, label %if.then.196, label %if.else.198
+
+if.then.196:
+  call void @bar(i8 1);
+  br label %if.else.201
+
+if.else.198:
+  call void @bar(i8 0);
+  br label %if.else.201
+
+if.else.201:
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/int-cmp-52.ll b/test/CodeGen/SystemZ/int-cmp-52.ll
new file mode 100644
index 000000000000..a0b72371d1c5
--- /dev/null
+++ b/test/CodeGen/SystemZ/int-cmp-52.ll
@@ -0,0 +1,24 @@
+; This used to crash the backend due to a failed assertion.
+; No particular output expected, but must compile.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu
+
+define void @test(i16 *%input, i32 *%result) {
+entry:
+  %0 = load i16, i16* %input, align 2
+  %1 = zext i16 %0 to i32
+  %2 = icmp slt i32 %1, 0
+  br i1 %2, label %if.then, label %if.else
+
+if.then:
+  store i32 1, i32* %result, align 4
+  br label %return
+
+if.else:
+  store i32 0, i32* %result, align 4
+  br label %return
+
+return:
+  ret void
+}
+
diff --git a/test/CodeGen/SystemZ/memchr-01.ll b/test/CodeGen/SystemZ/memchr-01.ll
index c51690b9848d..f7509c4f256b 100644
--- a/test/CodeGen/SystemZ/memchr-01.ll
+++ b/test/CodeGen/SystemZ/memchr-01.ll
@@ -1,6 +1,6 @@
 ; Test memchr using SRST, with a weird but usable prototype.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s
 
 declare i8 *@memchr(i8 *%src, i16 %char, i32 %len)
 
diff --git a/test/CodeGen/SystemZ/spill-01.ll b/test/CodeGen/SystemZ/spill-01.ll
index a59c06f192b6..9be4420fd839 100644
--- a/test/CodeGen/SystemZ/spill-01.ll
+++ b/test/CodeGen/SystemZ/spill-01.ll
@@ -1,7 +1,7 @@
 ; Test spilling using MVC.  The tests here assume z10 register pressure,
 ; without the high words being available.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -verify-machineinstrs | FileCheck %s
 
 declare void @foo()
 
diff --git a/test/CodeGen/SystemZ/vec-args-04.ll b/test/CodeGen/SystemZ/vec-args-04.ll
index 3a25404934e2..5176d80f08fb 100644
--- a/test/CodeGen/SystemZ/vec-args-04.ll
+++ b/test/CodeGen/SystemZ/vec-args-04.ll
@@ -21,17 +21,25 @@ define void @foo() {
 ; CHECK-VEC-DAG: vrepib %v31, 8
 ; CHECK-VEC: brasl %r14, bar@PLT
 ;
+
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK:	.quad	795741901033570304      # 0xb0b0b0b00000000
+; CHECK-STACK:	.quad	868082074056920076      # 0xc0c0c0c0c0c0c0c
+; CHECK-STACK: .LCPI0_1:
+; CHECK-STACK:	.quad	648518346341351424      # 0x900000000000000
+; CHECK-STACK:	.quad	723390690146385920      # 0xa0a000000000000
+
 ; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -192
-; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
-; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099
-; CHECK-STACK-DAG: stg [[REG3]], 176(%r15)
-; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108
-; CHECK-STACK-DAG: oilf [[REG4]], 202116108
-; CHECK-STACK-DAG: stg [[REG4]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG0:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG0]], 176(%r15)
+
+; CHECK-STACK-DAG: larl [[REG2:%r[0-9]+]], .LCPI0_1
+; CHECK-STACK-DAG: vl [[VREG1:%v[0-9]+]], 0([[REG2]])
+; CHECK-STACK-DAG: vst [[VREG1]], 160(%r15)
+
 ; CHECK-STACK: brasl %r14, bar@PLT
 
   call void @bar (<1 x i8> <i8 1>,
diff --git a/test/CodeGen/SystemZ/vec-args-05.ll b/test/CodeGen/SystemZ/vec-args-05.ll
index cd1448b8611e..8c5ff8414292 100644
--- a/test/CodeGen/SystemZ/vec-args-05.ll
+++ b/test/CodeGen/SystemZ/vec-args-05.ll
@@ -14,12 +14,14 @@ define void @foo() {
 ; CHECK-VEC-DAG: vrepib %v26, 2
 ; CHECK-VEC: brasl %r14, bar@PLT
 ;
+; CHECK-STACK: .LCPI0_0:
+; CHECK-STACK: .quad	217020518463700992      # 0x303030300000000
+; CHECK-STACK: .quad	289360691284934656      # 0x404040400000000
 ; CHECK-STACK-LABEL: foo:
 ; CHECK-STACK: aghi %r15, -176
-; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027
-; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036
-; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
+; CHECK-STACK-DAG: larl [[REG1:%r[0-9]+]], .LCPI0_0
+; CHECK-STACK-DAG: vl [[VREG:%v[0-9]+]], 0([[REG1]])
+; CHECK-STACK-DAG: vst [[VREG]], 160(%r15)
 ; CHECK-STACK: brasl %r14, bar@PLT
 
   call void (<4 x i8>, <4 x i8>, ...) @bar
diff --git a/test/CodeGen/SystemZ/vec-perm-12.ll b/test/CodeGen/SystemZ/vec-perm-12.ll
new file mode 100644
index 000000000000..b70b13d90682
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-12.ll
@@ -0,0 +1,43 @@
+; Test inserting a truncated value into a vector element
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i32> @f1(<4 x i32> %x, i64 %y) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: vlvgf [[ELT:%v[0-9]+]], %r2, 0
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .byte 12
+; CHECK-VECTOR-NEXT: .byte 13
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 8
+; CHECK-VECTOR-NEXT: .byte 9
+; CHECK-VECTOR-NEXT: .byte 10
+; CHECK-VECTOR-NEXT: .byte 11
+; CHECK-VECTOR-NEXT: .byte 4
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+; CHECK-VECTOR-NEXT: .byte 18
+; CHECK-VECTOR-NEXT: .byte 19
+
+  %elt0 = extractelement <4 x i32> %x, i32 3
+  %elt1 = extractelement <4 x i32> %x, i32 2
+  %elt2 = extractelement <4 x i32> %x, i32 1
+  %elt3 = trunc i64 %y to i32
+  %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
+  %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
+  %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
+  %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
+  ret <4 x i32> %vec3
+}
+
diff --git a/test/CodeGen/SystemZ/vec-perm-13.ll b/test/CodeGen/SystemZ/vec-perm-13.ll
new file mode 100644
index 000000000000..708d8de53f86
--- /dev/null
+++ b/test/CodeGen/SystemZ/vec-perm-13.ll
@@ -0,0 +1,38 @@
+; Test vector shuffles on vectors with implicitly extended elements
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i16> @f1(<4 x i16> %x) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vgbm [[ELT:%v[0-9]+]], 0
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .byte   6
+; CHECK-VECTOR-NEXT: .byte   7
+; CHECK-VECTOR-NEXT: .byte   16
+; CHECK-VECTOR-NEXT: .byte   17
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+; CHECK-VECTOR-NEXT: .space  1                                        
+
+  %elt = extractelement <4 x i16> %x, i32 3
+  %vec1 = insertelement <4 x i16> undef, i16 %elt, i32 2
+  %vec2 = insertelement <4 x i16> %vec1, i16 0, i32 3
+  ret <4 x i16> %vec2
+}
+
diff --git a/test/CodeGen/SystemZ/xor-01.ll b/test/CodeGen/SystemZ/xor-01.ll
index e0aaffbb257e..281f386ce955 100644
--- a/test/CodeGen/SystemZ/xor-01.ll
+++ b/test/CodeGen/SystemZ/xor-01.ll
@@ -1,6 +1,6 @@
 ; Test 32-bit XORs in which the second operand is variable.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
 
 declare i32 @foo()
diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
index 0fd1a9e1e232..8ec4d5b9865b 100644
--- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
+++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -51,9 +51,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !0 = !DILocation(line: 46, scope: !1)
 !1 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !2)
 !2 = distinct !DILexicalBlock(line: 44, column: 0, file: !101, scope: !3)
-!3 = !DISubprogram(name: "getClosestDiagonal3", linkageName: "_Z19getClosestDiagonal3ii", line: 44, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !101, scope: null, type: !6)
+!3 = distinct !DISubprogram(name: "getClosestDiagonal3", linkageName: "_Z19getClosestDiagonal3ii", line: 44, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !101, scope: null, type: !6)
 !4 = !DIFile(filename: "ggEdgeDiscrepancy.cc", directory: "/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src")
-!5 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !101, enums: !102, retainedTypes: !102, subprograms: !103)
+!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", isOptimized: true, emissionKind: 0, file: !101, enums: !102, retainedTypes: !102, subprograms: !103)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !22, !22}
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "ggVector3", line: 66, size: 192, align: 32, file: !99, elements: !10)
@@ -87,12 +87,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !34 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 32, file: !101, scope: !4, baseType: !8)
 !35 = !DISubprogram(name: "y", linkageName: "_ZNK9ggVector31yEv", line: 83, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
 !36 = !DISubprogram(name: "z", linkageName: "_ZNK9ggVector31zEv", line: 84, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
-!37 = !DISubprogram(name: "x", linkageName: "_ZN9ggVector31xEv", line: 85, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!37 = distinct !DISubprogram(name: "x", linkageName: "_ZN9ggVector31xEv", line: 85, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
 !38 = !DISubroutineType(types: !39)
 !39 = !{!40, !19}
 !40 = !DIDerivedType(tag: DW_TAG_reference_type, name: "double", size: 32, align: 32, file: !101, scope: !4, baseType: !13)
-!41 = !DISubprogram(name: "y", linkageName: "_ZN9ggVector31yEv", line: 86, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
-!42 = !DISubprogram(name: "z", linkageName: "_ZN9ggVector31zEv", line: 87, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!41 = distinct !DISubprogram(name: "y", linkageName: "_ZN9ggVector31yEv", line: 86, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
+!42 = distinct !DISubprogram(name: "z", linkageName: "_ZN9ggVector31zEv", line: 87, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !38)
 !43 = !DISubprogram(name: "SetX", linkageName: "_ZN9ggVector34SetXEd", line: 88, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !44)
 !44 = !DISubroutineType(types: !45)
 !45 = !{null, !19, !13}
@@ -127,7 +127,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !74 = !DISubprogram(name: "operator/=", linkageName: "_ZN9ggVector3dVEd", line: 324, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !72)
 !75 = !DISubprogram(name: "length", linkageName: "_ZNK9ggVector36lengthEv", line: 121, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
 !76 = !DISubprogram(name: "squaredLength", linkageName: "_ZNK9ggVector313squaredLengthEv", line: 122, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !31)
-!77 = !DISubprogram(name: "MakeUnitVector", linkageName: "_ZN9ggVector314MakeUnitVectorEv", line: 217, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
+!77 = distinct !DISubprogram(name: "MakeUnitVector", linkageName: "_ZN9ggVector314MakeUnitVectorEv", line: 217, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !24)
 !78 = !DISubprogram(name: "Perturb", linkageName: "_ZNK9ggVector37PerturbEdd", line: 126, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !79)
 !79 = !DISubroutineType(types: !80)
 !80 = !{!8, !33, !13, !13}
@@ -141,7 +141,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !88 = !DISubprogram(name: "indexOfMinAbsComponent", linkageName: "_ZNK9ggVector322indexOfMinAbsComponentEv", line: 137, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
 !89 = !DISubprogram(name: "indexOfMaxComponent", linkageName: "_ZNK9ggVector319indexOfMaxComponentEv", line: 146, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
 !90 = !DISubprogram(name: "indexOfMaxAbsComponent", linkageName: "_ZNK9ggVector322indexOfMaxAbsComponentEv", line: 150, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, file: !9, scope: !8, type: !86)
-!91 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vx", line: 46, scope: !1, file: !4, type: !13)
+!91 = !DILocalVariable(name: "vx", line: 46, scope: !1, file: !4, type: !13)
 !92 = !DILocation(line: 48, scope: !1)
 !93 = !DILocation(line: 218, scope: !94, inlinedAt: !96)
 !94 = distinct !DILexicalBlock(line: 217, column: 0, file: !101, scope: !95)
diff --git a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
index cba1ca68569f..1ba7cb795d11 100644
--- a/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
+++ b/test/CodeGen/Thumb/cortex-m0-unaligned-access.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumbv6m-apple-unknown-macho < %s | FileCheck --check-prefix=V6M %s
+; RUN: llc -mtriple=thumbv6m-apple-unknown-macho -mattr=+strict-align < %s | FileCheck --check-prefix=V6M %s
 ; RUN: llc -mtriple=thumbv7m-apple-unknown-macho < %s | FileCheck --check-prefix=V7M %s
 
 define i32 @split_load(i32* %p) nounwind {
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
index 0d534589ae0a..c5d1044e9d69 100644
--- a/test/CodeGen/Thumb/large-stack.ll
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -32,10 +32,10 @@ define void @test100() {
 ; Smallest stack for which we use a constant pool
 define void @test2() {
 ; CHECK-LABEL: test2:
-; CHECK: ldr r0,
-; CHECK: add sp, r0
-; EABI: ldr r0,
-; EABI: add sp, r0
+; CHECK: ldr [[TEMP:r[0-7]]],
+; CHECK: add sp, [[TEMP]]
+; EABI: ldr [[TEMP:r[0-7]]],
+; EABI: add sp, [[TEMP]]
 ; IOS: subs r4, r7, #4
 ; IOS: mov sp, r4
     %tmp = alloca [ 1528 x i8 ] , align 4
@@ -44,12 +44,12 @@ define void @test2() {
 
 define i32 @test3() {
 ; CHECK-LABEL: test3:
-; CHECK: ldr r1,
-; CHECK: add sp, r1
-; CHECK: ldr r1,
-; CHECK: add r1, sp
-; EABI: ldr r1,
-; EABI: add sp, r1
+; CHECK: ldr [[TEMP:r[0-7]]],
+; CHECK: add sp, [[TEMP]]
+; CHECK: ldr [[TEMP]],
+; CHECK: add [[TEMP]], sp
+; EABI: ldr [[TEMP:r[0-7]]],
+; EABI: add sp, [[TEMP]]
 ; IOS: subs r4, r7, #4
 ; IOS: mov sp, r4
     %retval = alloca i32, align 4
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll
new file mode 100644
index 000000000000..7901a158a959
--- /dev/null
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization-thumb2.ll
@@ -0,0 +1,93 @@
+; RUN: llc -mattr=-neon < %s -verify-machineinstrs -o - | FileCheck %s
+
+target triple = "thumbv7a-none--eabi"
+
+@a = external global i32*
+@b = external global i32*
+
+; Function Attrs: nounwind
+define void @foo24() #0 {
+entry:
+; CHECK-LABEL: foo24:
+; We use '[rl0-9]*' to allow 'r0'..'r12', 'lr'
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{s?}}{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]], [[R6:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]], [[R6]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 24, i32 4, i1 false)
+  ret void
+}
+
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+  ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+  ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: movt [[LB:[rl0-9]+]], :upper16:b
+; CHECK: movt [[SB:[rl0-9]+]], :upper16:a
+; CHECK: add{{(\.w)?}} [[NLB:[rl0-9]+]], [[LB]], #4
+; CHECK: adds [[SB]], #4
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]]!, {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]]!, {[[R1]], [[R2]], [[R3]], [[R4]]}
+; CHECK-NEXT: ldm{{(\.w)?}} [[NLB]], {[[R1:[rl0-9]+]], [[R2:[rl0-9]+]], [[R3:[rl0-9]+]], [[R4:[rl0-9]+]], [[R5:[rl0-9]+]]}
+; CHECK-NEXT: stm{{(\.w)?}} [[SB]], {[[R1]], [[R2]], [[R3]], [[R4]], [[R5]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+  ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
index 916e5ea299a3..0be796eb8f8d 100644
--- a/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
+++ b/test/CodeGen/Thumb/ldm-stm-base-materialization.ll
@@ -6,15 +6,17 @@ target triple = "thumbv6m-none--eabi"
 @b = external global i32*
 
 ; Function Attrs: nounwind
-define void @foo() #0 {
+define void @foo24() #0 {
 entry:
-; CHECK-LABEL: foo:
-; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK-LABEL: foo24:
 ; CHECK: ldr r[[LB:[0-9]]], .LCPI
 ; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
-; CHECK-NEXT: ldm r[[NLB]],
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
 ; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
-; CHECK-NEXT: stm r[[NSB]]
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
   %0 = load i32*, i32** @a, align 4
   %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
   %1 = bitcast i32* %arrayidx to i8*
@@ -25,5 +27,70 @@ entry:
   ret void
 }
 
+define void @foo28() #0 {
+entry:
+; CHECK-LABEL: foo28:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 28, i32 4, i1 false)
+  ret void
+}
+
+define void @foo32() #0 {
+entry:
+; CHECK-LABEL: foo32:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]], r[[R4:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]], r[[R4]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 32, i32 4, i1 false)
+  ret void
+}
+
+define void @foo36() #0 {
+entry:
+; CHECK-LABEL: foo36:
+; CHECK: ldr r[[LB:[0-9]]], .LCPI
+; CHECK: adds r[[NLB:[0-9]]], r[[LB]], #4
+; CHECK: ldr r[[SB:[0-9]]], .LCPI
+; CHECK: adds r[[NSB:[0-9]]], r[[SB]], #4
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+; CHECK-NEXT: ldm r[[NLB]]!, {r[[R1:[0-9]]], r[[R2:[0-9]]], r[[R3:[0-9]]]}
+; CHECK-NEXT: stm r[[NSB]]!, {r[[R1]], r[[R2]], r[[R3]]}
+  %0 = load i32*, i32** @a, align 4
+  %arrayidx = getelementptr inbounds i32, i32* %0, i32 1
+  %1 = bitcast i32* %arrayidx to i8*
+  %2 = load i32*, i32** @b, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %2, i32 1
+  %3 = bitcast i32* %arrayidx1 to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* %3, i32 36, i32 4, i1 false)
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/pop.ll b/test/CodeGen/Thumb/pop.ll
index 3c539c690170..851f793e5ee0 100644
--- a/test/CodeGen/Thumb/pop.ll
+++ b/test/CodeGen/Thumb/pop.ll
@@ -3,9 +3,9 @@
 
 define void @t(i8* %a, ...) nounwind {
 ; CHECK-LABEL:      t:
-; CHECK:      pop {r3}
+; CHECK:      pop {[[POP_REG:r[0-3]]]}
 ; CHECK-NEXT: add sp, #12
-; CHECK-NEXT: bx r3
+; CHECK-NEXT: bx [[POP_REG]]
 entry:
   %a.addr = alloca i8, i32 4
   call void @llvm.va_start(i8* %a.addr)
diff --git a/test/CodeGen/Thumb/segmented-stacks.ll b/test/CodeGen/Thumb/segmented-stacks.ll
index 09f5db852bf4..251c29534727 100644
--- a/test/CodeGen/Thumb/segmented-stacks.ll
+++ b/test/CodeGen/Thumb/segmented-stacks.ll
@@ -12,7 +12,7 @@ define void @test_basic() #0 {
         call void @dummy_use (i32* %mem, i32 10)
 	ret void
 
-; Thumb-android:      test_basic:
+; Thumb-android-LABEL:      test_basic:
 
 ; Thumb-android:      push    {r4, r5}
 ; Thumb-android-NEXT: mov     r5, sp
@@ -32,7 +32,11 @@ define void @test_basic() #0 {
 
 ; Thumb-android:      pop     {r4, r5}
 
-; Thumb-linux:      test_basic:
+; Thumb-android: .align 2
+; Thumb-android: .LCPI0_0:
+; Thumb-android-NEXT: .long __STACK_LIMIT
+
+; Thumb-linux-LABEL:      test_basic:
 
 ; Thumb-linux:      push    {r4, r5}
 ; Thumb-linux-NEXT: mov     r5, sp
@@ -61,7 +65,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
        call void @dummy_use (i32* %mem, i32 10)
        ret i32 %result
 
-; Thumb-android:      test_nested:
+; Thumb-android-LABEL:      test_nested:
 
 ; Thumb-android:      push  {r4, r5}
 ; Thumb-android-NEXT: mov     r5, sp
@@ -81,7 +85,7 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
 
 ; Thumb-android:      pop     {r4, r5}
 
-; Thumb-linux:      test_nested:
+; Thumb-linux-LABEL:      test_nested:
 
 ; Thumb-linux:      push    {r4, r5}
 ; Thumb-linux-NEXT: mov     r5, sp
@@ -108,7 +112,7 @@ define void @test_large() #0 {
         call void @dummy_use (i32* %mem, i32 0)
         ret void
 
-; Thumb-android:      test_large:
+; Thumb-android-LABEL:      test_large:
 
 ; Thumb-android:      push    {r4, r5}
 ; Thumb-android-NEXT: mov     r5, sp
@@ -129,7 +133,7 @@ define void @test_large() #0 {
 
 ; Thumb-android:      pop     {r4, r5}
 
-; Thumb-linux:      test_large:
+; Thumb-linux-LABEL:      test_large:
 
 ; Thumb-linux:      push    {r4, r5}
 ; Thumb-linux-NEXT: mov     r5, sp
@@ -157,7 +161,7 @@ define fastcc void @test_fastcc() #0 {
         call void @dummy_use (i32* %mem, i32 10)
         ret void
 
-; Thumb-android:      test_fastcc:
+; Thumb-android-LABEL:      test_fastcc:
 
 ; Thumb-android:      push    {r4, r5}
 ; Thumb-android-NEXT: mov     r5, sp
@@ -177,7 +181,7 @@ define fastcc void @test_fastcc() #0 {
 
 ; Thumb-android:      pop     {r4, r5}
 
-; Thumb-linux:      test_fastcc:
+; Thumb-linux-LABEL:      test_fastcc:
 
 ; Thumb-linux:      push    {r4, r5}
 ; Thumb-linux-NEXT: mov     r5, sp
@@ -204,7 +208,7 @@ define fastcc void @test_fastcc_large() #0 {
         call void @dummy_use (i32* %mem, i32 0)
         ret void
 
-; Thumb-android:      test_fastcc_large:
+; Thumb-android-LABEL:      test_fastcc_large:
 
 ; Thumb-android:      push    {r4, r5}
 ; Thumb-android-NEXT: mov     r5, sp
@@ -225,7 +229,7 @@ define fastcc void @test_fastcc_large() #0 {
 
 ; Thumb-android:      pop     {r4, r5}
 
-; Thumb-linux:      test_fastcc_large:
+; Thumb-linux-LABEL:      test_fastcc_large:
 
 ; Thumb-linux:      push    {r4, r5}
 ; Thumb-linux-NEXT: mov     r5, sp
diff --git a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll b/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
deleted file mode 100644
index da2f3f09b281..000000000000
--- a/test/CodeGen/Thumb/thumb-memcpy-ldm-stm.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc -mtriple=thumbv6m-eabi -verify-machineinstrs %s -o - | FileCheck %s
-@d = external global [64 x i32]
-@s = external global [64 x i32]
-
-; Function Attrs: nounwind
-define void @t1() #0 {
-entry:
-; CHECK-LABEL: t1:
-; CHECK: ldr r[[LB:[0-9]]],
-; CHECK-NEXT: ldm r[[LB]]!,
-; CHECK-NEXT: ldr r[[SB:[0-9]]],
-; CHECK-NEXT: stm r[[SB]]!,
-; CHECK-NEXT: ldrb {{.*}}, [r[[LB]]]
-; CHECK-NEXT: strb {{.*}}, [r[[SB]]]
-    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 17, i32 4, i1 false)
-    ret void
-}
-
-; Function Attrs: nounwind
-define void @t2() #0 {
-entry:
-; CHECK-LABEL: t2:
-; CHECK: ldr r[[LB:[0-9]]],
-; CHECK-NEXT: ldm r[[LB]]!,
-; CHECK-NEXT: ldr r[[SB:[0-9]]],
-; CHECK-NEXT: stm r[[SB]]!,
-; CHECK-NEXT: ldrh {{.*}}, [r[[LB]]]
-; CHECK-NEXT: ldrb {{.*}}, [r[[LB]], #2]
-; CHECK-NEXT: strb {{.*}}, [r[[SB]], #2]
-; CHECK-NEXT: strh {{.*}}, [r[[SB]]]
-    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([64 x i32]* @s to i8*), i8* bitcast ([64 x i32]* @d to i8*), i32 15, i32 4, i1 false)
-    ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
diff --git a/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
new file mode 100644
index 000000000000..fb4ee8dba7a9
--- /dev/null
+++ b/test/CodeGen/Thumb/thumb-shrink-wrapping.ll
@@ -0,0 +1,691 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=true -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE --check-prefix=ENABLE-V5T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumb-macho \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V4T
+; RUN: llc %s -o - -enable-shrink-wrap=false -ifcvt-fn-start=1 -ifcvt-fn-stop=0 -mtriple=thumbv5-macho \
+; RUN:      | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE --check-prefix=DISABLE-V5T
+;
+; Note: Lots of tests use inline asm instead of regular calls.
+; This allows to have a better control on what the allocation will do.
+; Otherwise, we may have spill right in the entry block, defeating
+; shrink-wrapping. Moreover, some of the inline asm statements (nop)
+; are here to ensure that the related paths do not end up as critical
+; edges.
+; Also disable the late if-converter as it makes harder to reason on
+; the diffs.
+
+; Initial motivating example: Simple diamond with a call just on one side.
+; CHECK-LABEL: foo:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmp r0, r1
+; ENABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {r7, lr}
+; CHECK: sub sp, #8
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmp r0, r1
+; DISABLE-NEXT: bge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: str r0, [sp, #4]
+; Set the alloca address in the second argument.
+; Set the first argument to zero.
+; CHECK: movs r0, #0
+; CHECK-NEXT: add r1, sp, #4
+; CHECK-NEXT: bl
+;
+; With shrink-wrapping, epilogue is just after the call.
+; ENABLE-NEXT: add sp, #8
+; ENABLE-V5T-NEXT: pop {r7, pc}
+; ENABLE-V4T-NEXT: pop {r7}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: mov lr, r1
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: add sp, #8
+; DISABLE-V5T-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-NEXT: bx lr
+define i32 @foo(i32 %a, i32 %b) {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+
+; Same, but the final BB is non-trivial, so we don't duplicate the return inst.
+; CHECK-LABEL: bar:
+;
+; With shrink-wrapping, epilogue is just after the call.
+; CHECK: bl
+; ENABLE-NEXT: add sp, #8
+; ENABLE-NEXT: pop {r7}
+; ENABLE-NEXT: pop {r0}
+; ENABLE-NEXT: mov lr, r0
+;
+; CHECK: movs r0, #42
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; DISABLE: add sp, #8
+; DISABLE-V5T-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-NEXT: bx lr
+define i32 @bar(i32 %a, i32 %b) {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  ret i32 42
+}
+
+; Function Attrs: optsize
+declare i32 @doSomething(i32, i32*)
+
+
+; Check that we do not perform the restore inside the loop whereas the save
+; is outside.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
+;
+; Shrink-wrapping allows to skip the prologue in the else case.
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @freqSaveAndRestoreOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare i32 @something(...)
+
+; Check that we do not perform the shrink-wrapping inside the loop even
+; though that would be legal. The cost model must prevent that.
+; CHECK-LABEL: freqSaveAndRestoreOutsideLoop2:
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4
+; This is the nop.
+; CHECK: mov r8, r8
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+; Next BB.
+; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: @ %for.exit
+; This is the nop.
+; CHECK: mov r8, r8
+; CHECK: pop {r4
+define i32 @freqSaveAndRestoreOutsideLoop2(i32 %cond) {
+entry:
+  br label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.04 = phi i32 [ 0, %for.preheader ], [ %inc, %for.body ]
+  %sum.03 = phi i32 [ 0, %for.preheader ], [ %add, %for.body ]
+  %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.03
+  %inc = add nuw nsw i32 %i.04, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret i32 %add
+}
+
+; Check with a more complex case that we do not have save within the loop and
+; restore outside.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  tail call void asm "nop", "~{r4}"()
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+declare void @somethingElse(...)
+
+; Check with a more complex case that we do not have restore within the loop and
+; save outside.
+; CHECK-LABEL: loopInfoRestoreOutsideLoop:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE-NEXT: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; SUM is in r0 because it is coalesced with the second
+; argument on the else path.
+; CHECK: movs [[SUM:r0]], #0
+; CHECK-NEXT: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs [[TMP:r[0-9]+]], #1
+; CHECK: adds [[SUM]], [[TMP]], [[SUM]]
+; CHECK-NEXT: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; SUM << 3.
+; CHECK: lsls [[SUM]], [[SUM]], #3
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T: pop {r4, pc}
+; DISABLE-V4T: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @loopInfoRestoreOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  tail call void asm "nop", "~{r4}"()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %if.then
+  %i.05 = phi i32 [ 0, %if.then ], [ %inc, %for.body ]
+  %sum.04 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+; Check that we handle function with no frame information correctly.
+; CHECK-LABEL: emptyFrame:
+; CHECK: @ %entry
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: bx lr
+define i32 @emptyFrame() {
+entry:
+  ret i32 0
+}
+
+; Check that we handle inline asm correctly.
+; CHECK-LABEL: inlineAsm:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: r4.
+; CHECK: push {r4, lr}
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movs [[IV:r[0-9]+]], #10
+;
+; Next BB.
+; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
+; CHECK: movs r4, #1
+; CHECK: subs [[IV]], [[IV]], #1
+; CHECK-NEXT: cmp [[IV]], #0
+; CHECK-NEXT: bne [[LOOP]]
+;
+; Next BB.
+; CHECK: movs r0, #0
+; ENABLE-V5T-NEXT: pop {r4, pc}
+; ENABLE-V4T-NEXT: pop {r4}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+; DISABLE-V5T-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+;
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+define i32 @inlineAsm(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.03 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  tail call void asm sideeffect "movs r4, #1", "~{r4}"()
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.exit, label %for.body
+
+for.exit:
+  tail call void asm "nop", ""()
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.else
+  %sum.0 = phi i32 [ %mul, %if.else ], [ 0, %for.exit ]
+  ret i32 %sum.0
+}
+
+; Check that we handle calls to variadic functions correctly.
+; CHECK-LABEL: callVariadicFunc:
+;
+; ENABLE: cmp r0, #0
+; ENABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: push {[[TMP:r[0-9]+]], lr}
+; CHECK: sub sp, #16
+;
+; DISABLE: cmp r0, #0
+; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; Setup of the varags.
+; CHECK: mov [[TMP_SP:r[0-9]+]], sp
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]]
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4]
+; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8]
+; Thumb has quite a strange way for moving stuff
+; in around. Oh well, match the current sequence.
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r0}
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r2}
+; CHECK: push {r1}
+; CHECK-NEXT: pop {r3}
+; CHECK-NEXT: bl
+; CHECK-NEXT: lsls r0, r0, #3
+;
+; ENABLE-NEXT: add sp, #16
+; ENABLE-V5T-NEXT: pop {[[TMP]], pc}
+; ENABLE-V4T-NEXT: pop {[[TMP]]}
+; ENABLE-V4T-NEXT: pop {r1}
+; ENABLE-V4T-NEXT: bx r1
+;
+; Duplicated epilogue.
+; DISABLE-V5T-NEXT: add sp, #16
+; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+;
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: lsls r0, r1, #1
+;
+; Epilogue code.
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-NEXT: add sp, #16
+; DISABLE-V5T-NEXT: pop {[[TMP]], pc}
+; DISABLE-V4T-NEXT: pop {[[TMP]]}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+define i32 @callVariadicFunc(i32 %cond, i32 %N) {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %call = tail call i32 (i32, ...) @someVariadicFunc(i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N, i32 %N)
+  %shl = shl i32 %call, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %sum.0 = phi i32 [ %shl, %if.then ], [ %mul, %if.else ]
+  ret i32 %sum.0
+}
+
+declare i32 @someVariadicFunc(i32, ...)
+
+; Make sure we do not insert unreachable code after noreturn function.
+; Although this is not incorrect to insert such code, it is useless
+; and it hurts the binary size.
+;
+; CHECK-LABEL: noreturn:
+; DISABLE: push
+;
+; CHECK: movs [[TMP:r[0-9]+]], #255
+; CHECK-NEXT: tst  r0, [[TMP]]
+; CHECK-NEXT: bne      [[ABORT:LBB[0-9_]+]]
+;
+; CHECK: movs r0, #42
+;
+; ENABLE-NEXT: bx lr
+;
+; DISABLE-NEXT: pop
+;;
+; CHECK: [[ABORT]]: @ %if.abort
+;
+; ENABLE: push
+;
+; CHECK: bl
+; ENABLE-NOT: pop
+define i32 @noreturn(i8 signext %bad_thing) {
+entry:
+  %tobool = icmp eq i8 %bad_thing, 0
+  br i1 %tobool, label %if.end, label %if.abort
+
+if.abort:
+  %call = tail call i32 asm sideeffect "movs $0, #1", "=r,~{r4}"()
+  tail call void @abort() #0
+  unreachable
+
+if.end:
+  ret i32 42
+}
+
+declare void @abort() #0
+
+define i32 @b_to_bx(i32 %value) {
+; CHECK-LABEL: b_to_bx:
+; DISABLE: push {r7, lr}
+; CHECK: cmp r1, #49
+; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r7, lr}
+
+; CHECK: bl
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: b [[END_LABEL:LBB[0-9_]+]]
+
+; ENABLE-V5-NEXT: pop {r7, pc}
+; ENABLE-V4-NEXT: pop {r7}
+; ENABLE-V4-NEXT: pop {r1}
+; ENABLE-V4-NEXT: bx r1
+
+; CHECK: [[ELSE_LABEL]]: @ %if.else
+; CHECK-NEXT: lsls r0, r1, #1
+; DISABLE-V5-NEXT: pop {r7, pc}
+; DISABLE-V4T-NEXT: [[END_LABEL]]: @ %if.end
+; DISABLE-V4T-NEXT: pop {r7}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+; ENABLE-V5T-NEXT: {{LBB[0-9_]+}}: @ %if.end
+; ENABLE-NEXT: bx lr
+
+entry:
+  %cmp = icmp slt i32 %value, 50
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %div = sdiv i32 5000, %value
+  br label %if.end
+
+if.else:
+  %mul = shl nsw i32 %value, 1
+  br label %if.end
+
+if.end:
+  %value.addr.0 = phi i32 [ %div, %if.then ], [ %mul, %if.else ]
+  ret i32 %value.addr.0
+}
+
+define i1 @beq_to_bx(i32* %y, i32 %head) {
+; CHECK-LABEL: beq_to_bx:
+; DISABLE: push {r4, lr}
+; CHECK: cmp r2, #0
+; CHECK-NEXT: beq [[EXIT_LABEL:LBB[0-9_]+]]
+; ENABLE: push {r4, lr}
+
+; CHECK: tst r3, r4
+; ENABLE-NEXT: pop {r4}
+; ENABLE-NEXT: pop {r3}
+; ENABLE-NEXT: mov lr, r3
+; CHECK-NEXT: beq [[EXIT_LABEL]]
+
+; CHECK: str r1, [r2]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: [[EXIT_LABEL]]: @ %cleanup
+; ENABLE-NEXT: bx lr
+; DISABLE-V5-NEXT: pop {r4, pc}
+; DISABLE-V4T-NEXT: pop {r4}
+; DISABLE-V4T-NEXT: pop {r1}
+; DISABLE-V4T-NEXT: bx r1
+
+entry:
+  %cmp = icmp eq i32* %y, null
+  br i1 %cmp, label %cleanup, label %if.end
+
+if.end:
+  %z = load i32, i32* %y, align 4
+  %and = and i32 %z, 2
+  %cmp2 = icmp eq i32 %and, 0
+  br i1 %cmp2, label %cleanup, label %if.end4
+
+if.end4:
+  store i32 %head, i32* %y, align 4
+  br label %cleanup
+
+cleanup:
+  %retval.0 = phi i1 [ 0, %if.end4 ], [ 1, %entry ], [ 1, %if.end ]
+  ret i1 %retval.0
+}
+
+attributes #0 = { noreturn nounwind }
diff --git a/test/CodeGen/Thumb/vargs.ll b/test/CodeGen/Thumb/vargs.ll
index 1c7b631741b7..45b42125e166 100644
--- a/test/CodeGen/Thumb/vargs.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -32,12 +32,12 @@ bb7:            ; preds = %bb
         call void @llvm.va_end( i8* %va.upgrd.4 )
         ret void
 
-; The return sequence should pop the lr to r3, recover the stack space used to
+; The return sequence should pop the lr to r0-3, recover the stack space used to
 ; store variadic argument registers, then return via r3. Possibly there is a pop
 ; before this, but only if the function happened to use callee-saved registers.
-; CHECK: pop {r3}
+; CHECK: pop {[[POP_REG:r[0-3]]]}
 ; CHECK: add sp, #[[IMM]]
-; CHECK: bx r3
+; CHECK: bx [[POP_REG]]
 }
 
 declare void @llvm.va_start(i8*)
diff --git a/test/CodeGen/Thumb2/crash.ll b/test/CodeGen/Thumb2/crash.ll
index 893a45d8f722..fb32a2cac3a1 100644
--- a/test/CodeGen/Thumb2/crash.ll
+++ b/test/CodeGen/Thumb2/crash.ll
@@ -15,11 +15,11 @@ entry:
   %6 = bitcast i32* %sp3 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
   %7 = load <4 x i32>, <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
   %8 = bitcast i32* %dp to i8*                    ; <i8*> [#uses=1]
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
+  tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
   ret void
 }
 
-declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.p0i8.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
 
 @sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
 @dbuf = common global [16 x i32] zeroinitializer  ; <[16 x i32]*> [#uses=2]
@@ -45,7 +45,7 @@ bb2:                                              ; preds = %bb
   %3 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
   %4 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
   %5 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32], [16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
-  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
+  tail call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
   ret i32 0
 }
 
@@ -53,15 +53,15 @@ bb2:                                              ; preds = %bb
 ; Make sure the DPair register class can spill.
 define void @pr12389(i8* %p) nounwind ssp {
 entry:
-  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %p, i32 1)
+  %vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %p, i32 1)
   tail call void asm sideeffect "", "~{q0},~{q1},~{q2},~{q3},~{q4},~{q5},~{q6},~{q7},~{q8},~{q9},~{q10},~{q11},~{q12},~{q13},~{q14},~{q15}"() nounwind
-  tail call void @llvm.arm.neon.vst1.v4f32(i8* %p, <4 x float> %vld1, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %p, <4 x float> %vld1, i32 1)
   ret void
 }
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
 
 ; <rdar://problem/11101911>
 ; When an strd is expanded into two str instructions, make sure the first str
diff --git a/test/CodeGen/Thumb2/emit-unwinding.ll b/test/CodeGen/Thumb2/emit-unwinding.ll
new file mode 100644
index 000000000000..1f1ea1b48af0
--- /dev/null
+++ b/test/CodeGen/Thumb2/emit-unwinding.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple thumbv7em-apple-unknown-eabi-macho %s -o - -O0 | FileCheck %s
+
+; CHECK: add.w r11, sp, #{{[1-9]+}}
+
+define void @foo1() {
+  call void asm sideeffect "", "~{r4}"()
+  call void @foo2()
+  ret void
+}
+
+declare void @foo2()
diff --git a/test/CodeGen/Thumb2/float-cmp.ll b/test/CodeGen/Thumb2/float-cmp.ll
index 88d6c3b0adb8..77b0999337c6 100644
--- a/test/CodeGen/Thumb2/float-cmp.ll
+++ b/test/CodeGen/Thumb2/float-cmp.ll
@@ -81,8 +81,9 @@ define i1 @cmp_f_ord(float %a, float %b) {
 }
 define i1 @cmp_f_ugt(float %a, float %b) {
 ; CHECK-LABEL: cmp_f_ugt:
-; NONE: bl __aeabi_fcmpgt
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmple
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
 ; HARD: vcmpe.f32
 ; HARD: movhi r0, #1
   %1 = fcmp ugt float %a, %b
@@ -90,8 +91,9 @@ define i1 @cmp_f_ugt(float %a, float %b) {
 }
 define i1 @cmp_f_uge(float %a, float %b) {
 ; CHECK-LABEL: cmp_f_uge:
-; NONE: bl __aeabi_fcmpge
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmplt
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
 ; HARD: vcmpe.f32
 ; HARD: movpl r0, #1
   %1 = fcmp uge float %a, %b
@@ -99,8 +101,9 @@ define i1 @cmp_f_uge(float %a, float %b) {
 }
 define i1 @cmp_f_ult(float %a, float %b) {
 ; CHECK-LABEL: cmp_f_ult:
-; NONE: bl __aeabi_fcmplt
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmpge
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
 ; HARD: vcmpe.f32
 ; HARD: movlt r0, #1
   %1 = fcmp ult float %a, %b
@@ -108,8 +111,9 @@ define i1 @cmp_f_ult(float %a, float %b) {
 }
 define i1 @cmp_f_ule(float %a, float %b) {
 ; CHECK-LABEL: cmp_f_ule:
-; NONE: bl __aeabi_fcmple
-; NONE: bl __aeabi_fcmpun
+; NONE: bl __aeabi_fcmpgt
+; NONE: cmp r0, #0
+; NONE-NEXT: it eq
 ; HARD: vcmpe.f32
 ; HARD: movle r0, #1
   %1 = fcmp ule float %a, %b
@@ -214,10 +218,8 @@ define i1 @cmp_d_ord(double %a, double %b) {
 }
 define i1 @cmp_d_ugt(double %a, double %b) {
 ; CHECK-LABEL: cmp_d_ugt:
-; NONE: bl __aeabi_dcmpgt
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmple
+; SP: bl __aeabi_dcmple
 ; DP: vcmpe.f64
 ; DP: movhi r0, #1
   %1 = fcmp ugt double %a, %b
@@ -226,10 +228,8 @@ define i1 @cmp_d_ugt(double %a, double %b) {
 
 define i1 @cmp_d_ult(double %a, double %b) {
 ; CHECK-LABEL: cmp_d_ult:
-; NONE: bl __aeabi_dcmplt
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmplt
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmpge
+; SP: bl __aeabi_dcmpge
 ; DP: vcmpe.f64
 ; DP: movlt r0, #1
   %1 = fcmp ult double %a, %b
@@ -268,10 +268,8 @@ define i1 @cmp_d_ueq(double %a, double %b) {
 
 define i1 @cmp_d_uge(double %a, double %b) {
 ; CHECK-LABEL: cmp_d_uge:
-; NONE: bl __aeabi_dcmpge
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmpge
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmplt
+; SP: bl __aeabi_dcmplt
 ; DP: vcmpe.f64
 ; DP: movpl r0, #1
   %1 = fcmp uge double %a, %b
@@ -280,10 +278,8 @@ define i1 @cmp_d_uge(double %a, double %b) {
 
 define i1 @cmp_d_ule(double %a, double %b) {
 ; CHECK-LABEL: cmp_d_ule:
-; NONE: bl __aeabi_dcmple
-; NONE: bl __aeabi_dcmpun
-; SP: bl __aeabi_dcmple
-; SP: bl __aeabi_dcmpun
+; NONE: bl __aeabi_dcmpgt
+; SP: bl __aeabi_dcmpgt
 ; DP: vcmpe.f64
 ; DP: movle r0, #1
   %1 = fcmp ule double %a, %b
diff --git a/test/CodeGen/Thumb2/float-intrinsics-double.ll b/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 01a23bd0fe69..657d1b172da9 100644
--- a/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -109,9 +109,12 @@ declare double     @llvm.fabs.f64(double %Val)
 define double @abs_d(double %a) {
 ; CHECK-LABEL: abs_d:
 ; NONE: bic r1, r1, #-2147483648
-; SP: bl __aeabi_dcmpgt
-; SP: bl __aeabi_dcmpun
-; SP: bl __aeabi_dsub
+; SP: vldr d1, .LCPI{{.*}}
+; SP: vmov r0, r1, d0
+; SP: vmov r2, r3, d1
+; SP: lsrs r2, r3, #31
+; SP: bfi r1, r2, #31, #1
+; SP: vmov d0, r0, r1
 ; DP: vabs.f64 d0, d0
   %1 = call double @llvm.fabs.f64(double %a)
   ret double %1
@@ -216,7 +219,7 @@ define i16 @d_to_h(double %a) {
 declare double @llvm.convert.from.fp16.f64(i16 %a)
 define double @h_to_d(i16 %a) {
 ; CHECK-LABEL: h_to_d:
-; NONE: bl __gnu_h2f_ieee
+; NONE: bl __aeabi_h2f
 ; NONE: bl __aeabi_f2d
 ; SP: vcvt{{[bt]}}.f32.f16
 ; SP: bl __aeabi_f2d
diff --git a/test/CodeGen/Thumb2/float-intrinsics-float.ll b/test/CodeGen/Thumb2/float-intrinsics-float.ll
index ec1bcd3708ac..847aeacd2f91 100644
--- a/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -205,7 +205,7 @@ define float @fmuladd_f(float %a, float %b, float %c) {
 declare i16 @llvm.convert.to.fp16.f32(float %a)
 define i16 @f_to_h(float %a) {
 ; CHECK-LABEL: f_to_h:
-; SOFT: bl __gnu_f2h_ieee
+; SOFT: bl __aeabi_f2h
 ; HARD: vcvt{{[bt]}}.f16.f32
   %1 = call i16 @llvm.convert.to.fp16.f32(float %a)
   ret i16 %1
@@ -214,7 +214,7 @@ define i16 @f_to_h(float %a) {
 declare float @llvm.convert.from.fp16.f32(i16 %a)
 define float @h_to_f(i16 %a) {
 ; CHECK-LABEL: h_to_f:
-; SOFT: bl __gnu_h2f_ieee
+; SOFT: bl __aeabi_h2f
 ; HARD: vcvt{{[bt]}}.f32.f16
   %1 = call float @llvm.convert.from.fp16.f32(i16 %a)
   ret float %1
diff --git a/test/CodeGen/Thumb2/ifcvt-compare.ll b/test/CodeGen/Thumb2/ifcvt-compare.ll
index 8af139a5ef6e..7b5ce4fa3f5f 100644
--- a/test/CodeGen/Thumb2/ifcvt-compare.ll
+++ b/test/CodeGen/Thumb2/ifcvt-compare.ll
@@ -19,7 +19,8 @@ f:
 define void @f1(i32 %x) optsize {
   ; CHECK-LABEL: f1:
   ; CHECK: cmp r0, #1
-  ; CHECK: it eq
+  ; CHECK: it ne
+  ; CHECK-NEXT: bxne lr
   %p = icmp eq i32 %x, 1
   br i1 %p, label %t, label %f
 
@@ -34,7 +35,8 @@ f:
 define void @f2(i32 %x) {
   ; CHECK-LABEL: f2:
   ; CHECK: cmp r0, #0
-  ; CHECK: it eq
+  ; CHECK: it ne
+  ; CHECK-NEXT: bxne lr
   %p = icmp eq i32 %x, 0
   br i1 %p, label %t, label %f
 
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index 2b1caa393072..c57274ea5599 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -59,10 +59,10 @@ bb1:
   %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
   %tmp1 = shl i32 %indvar, 2
   %gep1 = getelementptr i8, i8* %ptr1, i32 %tmp1
-  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %gep1, i32 1)
   %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
   %gep2 = getelementptr i8, i8* %ptr2, i32 %tmp1
-  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+  call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
   %indvar.next = add i32 %indvar, 1
   %cond = icmp eq i32 %indvar.next, 10
   br i1 %cond, label %bb2, label %bb1
@@ -73,9 +73,9 @@ bb2:
 
 ; CHECK-NOT: LCPI1_0:
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v4f32(i8*, <4 x float>, i32) nounwind
 
 declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
 
diff --git a/test/CodeGen/Thumb2/pic-load.ll b/test/CodeGen/Thumb2/pic-load.ll
index 53d456c53452..cfdad03dcd58 100644
--- a/test/CodeGen/Thumb2/pic-load.ll
+++ b/test/CodeGen/Thumb2/pic-load.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s --check-prefix=CHECK --check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=swift -mattr=+no-movt | FileCheck %s --check-prefix=CHECK --check-prefix=PIC-NOMOVT
 
 	%struct.anon = type { void ()* }
 	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
@@ -8,7 +9,14 @@
 define hidden i32 @atexit(void ()* %func) nounwind {
 entry:
 ; CHECK-LABEL: atexit:
-; CHECK: add r0, pc
+; CHECK-PIC: add r0, pc
+; CHECK-NOMOVT: ldr r[[REGNUM:[0-9]+]], LCPI0_0
+; CHECK-NOMOVT: LPC0_0:
+; CHECK-NOMOVT: add r[[REGNUM]], pc
+; CHECK-NOMOVT: ldr r1, [r[[REGNUM]]
+; CHECK-NOMOVT: blx _atexit_common
+; CHECK-NOMOVT: LCPI0_0:
+; CHECK-NOMOVT: .long L___dso_handle$non_lazy_ptr-(LPC0_0+4)
 	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
 	%0 = getelementptr %struct.one_atexit_routine, %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
 	store void ()* %func, void ()** %0, align 4
diff --git a/test/CodeGen/Thumb2/setjmp_longjmp.ll b/test/CodeGen/Thumb2/setjmp_longjmp.ll
new file mode 100644
index 000000000000..9e0fad00c140
--- /dev/null
+++ b/test/CodeGen/Thumb2/setjmp_longjmp.ll
@@ -0,0 +1,89 @@
+; RUN: llc %s -o - | FileCheck %s
+target triple = "thumbv7-apple-ios"
+
+declare i32 @llvm.eh.sjlj.setjmp(i8*)
+declare void @llvm.eh.sjlj.longjmp(i8*)
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.stacksave()
+@g = external global i32
+
+; CHECK-LABEL: double_foobar
+;
+; setjmp sequence:
+; CHECK: mov [[PCREG:r[0-9]+]], pc
+; CHECK-NEXT: adds [[PCREG]], [[PCREG]], #7
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b [[LABEL:L[a-zA-Z0-9]+]]
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: [[LABEL]]:
+;
+; setjmp sequence 2:
+; CHECK: mov [[PCREG:r[0-9]+]], pc
+; CHECK-NEXT: adds [[PCREG]], [[PCREG]], #7
+; CHECK-NEXT: str [[PCREG]], {{\[}}[[BUFREG:r[0-9]+]], #4]
+; CHECK-NEXT: movs r0, #0
+; CHECK-NEXT: b [[LABEL:L[a-zA-Z0-9]+]]
+; CHECK-NEXT: movs r0, #1
+; CHECK-NEXT: [[LABEL]]:
+
+; longjmp sequence:
+; CHECK: ldr [[TEMPREG:r[0-9]+]], [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: mov sp, [[TEMPREG]]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+;
+; longjmp sequence2:
+; CHECK: ldr [[TEMPREG:r[0-9]+]], [{{\s*}}[[BUFREG:r[0-9]+]], #8]
+; CHECK-NEXT: mov sp, [[TEMPREG]]
+; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4]
+; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}}
+; CHECK-NEXT: bx [[DESTREG]]
+define void @double_foobar() {
+entry:
+  %buf = alloca [5 x i8*], align 4
+  %bufptr = bitcast [5 x i8*]* %buf to i8*
+  %arraydecay = getelementptr inbounds [5 x i8*], [5 x i8*]* %buf, i32 0, i32 0
+
+  %fa = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %fa, i8** %arraydecay, align 4
+  %ss = tail call i8* @llvm.stacksave()
+  %ssgep = getelementptr [5 x i8*], [5 x i8*]* %buf, i32 0, i32 2
+  store i8* %ss, i8** %ssgep, align 4
+
+  %setjmpres = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+  %tobool = icmp ne i32 %setjmpres, 0
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:
+  store volatile i32 1, i32* @g, align 4
+  br label %if.end
+
+if.else:
+  store volatile i32 0, i32* @g, align 4
+  call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+  unreachable
+
+if.end:
+  %fa2 = tail call i8* @llvm.frameaddress(i32 0)
+  store i8* %fa2, i8** %arraydecay, align 4
+  %ss2 = tail call i8* @llvm.stacksave()
+  store i8* %ss2, i8** %ssgep, align 4
+
+  %setjmpres2 = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr)
+  %tobool2 = icmp ne i32 %setjmpres2, 0
+  br i1 %tobool2, label %if2.then, label %if2.else
+
+if2.then:
+  store volatile i32 3, i32* @g, align 4
+  br label %if2.end
+
+if2.else:
+  store volatile i32 2, i32* @g, align 4
+  call void @llvm.eh.sjlj.longjmp(i8* %bufptr)
+  unreachable
+
+if2.end:
+  ret void
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index da1057b8bb4a..eb48ffb7d80e 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -24,11 +24,10 @@ cond_next:
 
 define i32 @t2(i32 %a, i32 %b) nounwind {
 entry:
-; Do not if-convert when branches go to the different loops.
 ; CHECK-LABEL: t2:
-; CHECK-NOT: ite gt
-; CHECK-NOT: subgt
-; CHECK-NOT: suble
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
 	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
 	br i1 %tmp1434, label %bb17, label %bb.outer
 
@@ -73,9 +72,10 @@ entry:
 define void @t3(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK-LABEL: t3:
-; CHECK: itt ge
-; CHECK: movge r0, r1
-; CHECK: blge  {{_?}}foo
+; CHECK: it lt
+; CHECK-NEXT: bxlt lr
+; CHECK: mov r0, r1
+; CHECK: bl  {{_?}}foo
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
index 1d2ba0008be8..4a76e100b658 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -8,7 +8,7 @@ entry:
 ; CHECK: it ne
 ; CHECK: cmpne
 ; CHECK: it hi
-; CHECK: pophi {r7, pc}
+; CHECK: bxhi lr
 	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
 	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
 	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
@@ -69,7 +69,7 @@ define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
 entry:
 ; CHECK-LABEL: t1:
 ; CHECK: it ne
-; CHECK: popne {r7, pc}
+; CHECK: bxne lr
 	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %cond_false
 
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
index db9b644d4f92..273abb8a7b0c 100644
--- a/test/CodeGen/Thumb2/thumb2-mulhi.ll
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+dsp %s -o - | FileCheck %s
 
 define i32 @smulhi(i32 %x, i32 %y) {
 ; CHECK: smulhi
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index 8573d39f09f6..5ddaf9353f92 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - | FileCheck %s
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp -arm-use-mulops=false %s -o - | FileCheck %s -check-prefix=NO_MULOPS
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
 ; CHECK: f3
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index 937f7737f2b3..a196a3c79ae9 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,4 +1,4 @@
-; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+t2dsp %s -o - |  FileCheck %s
+; RUN: llc -mtriple=thumb-eabi -mcpu=arm1156t2-s -mattr=+thumb2,+t2xtpk,+dsp %s -o - |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index e0f7b5bd919c..f408242ea01f 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -7,7 +7,7 @@
 %quux = type { i32 (...)**, %baz*, i32 }
 %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
 
-declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8*, i32) nounwind readonly
 
 define void @aaa(%quuz* %this, i8* %block) {
 ; CHECK-LABEL: aaa:
@@ -18,30 +18,30 @@ entry:
   %aligned_vec = alloca <4 x float>, align 16
   %"alloca point" = bitcast i32 0 to i32
   %vecptr = bitcast <4 x float>* %aligned_vec to i8*
-  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind 
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %vecptr, i32 1) nounwind 
   store float 6.300000e+01, float* undef, align 4
-  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
   store float 0.000000e+00, float* undef, align 4
-  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
-  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
-  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* undef, i32 1) nounwind
   store float 0.000000e+00, float* undef, align 4
   %val173 = load <4 x float>, <4 x float>* undef               ; <<4 x float>> [#uses=1]
   br label %bb4
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index 4afea894aebc..e091a6529cd1 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -3,17 +3,17 @@
 ; rdar://11318438
 
 define zeroext i8 @test1(i32 %A.u)  {
-; A8: test1
+; A8-LABEL: test1:
 ; A8: uxtb r0, r0
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
 define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
-; A8: test2
+; A8-LABEL: test2:
 ; A8: uxtab  r0, r0, r1
 
-; M3: test2
+; M3-LABEL: test2:
 ; M3: uxtb  r1, r1
 ; M3-NOT: uxtab
 ; M3: add   r0, r1
@@ -24,7 +24,7 @@ define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
 }
 
 define zeroext i32 @test3(i32 %A.u)  {
-; A8-LABEL: test3
+; A8-LABEL: test3:
 ; A8: ubfx  r0, r0, #8, #16
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
diff --git a/test/CodeGen/Thumb2/v8_IT_1.ll b/test/CodeGen/Thumb2/v8_IT_1.ll
index 30dbb4802b6d..948f159c343d 100644
--- a/test/CodeGen/Thumb2/v8_IT_1.ll
+++ b/test/CodeGen/Thumb2/v8_IT_1.ll
@@ -6,12 +6,12 @@
 ;CHECK: bx
 define <16 x i8> @select_s_v_v(i32 %avail, i8* %bar) {
 entry:
-  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %bar, i32 1)
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %bar, i32 1)
   %and = and i32 %avail, 1
   %tobool = icmp eq i32 %and, 0
   %vld1. = select i1 %tobool, <16 x i8> %vld1, <16 x i8> zeroinitializer
   ret <16 x i8> %vld1.
 }
 
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* , i32 )
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* , i32 )
 
diff --git a/test/CodeGen/Thumb2/v8_IT_3.ll b/test/CodeGen/Thumb2/v8_IT_3.ll
index 3ccee5fbb8ca..78b51a033084 100644
--- a/test/CodeGen/Thumb2/v8_IT_3.ll
+++ b/test/CodeGen/Thumb2/v8_IT_3.ll
@@ -35,7 +35,7 @@ bb:
   br i1 %tmp4, label %bb1, label %bb8
 
 bb1:
-; CHECK: %bb6
+; CHECK: %entry
 ; CHECK: it	eq
 ; CHECK-NEXT: ldreq
 ; CHECK-NEXT: it	eq
@@ -54,8 +54,9 @@ bb1:
 bb4:
 ; CHECK-PIC: cmp
 ; CHECK-PIC: cmp
+; CHECK-PIC: cmp
 ; CHECK-PIC-NEXT: bne
-; CHECK-PIC-NEXT: %bb4
+; CHECK-PIC: %bb6
 ; CHECK-PIC-NEXT: movs
 ; CHECK-PIC-NEXT: add
 ; CHECK-PIC-NEXT: pop
diff --git a/test/CodeGen/Thumb2/v8_IT_5.ll b/test/CodeGen/Thumb2/v8_IT_5.ll
index 78b80d7dcdef..6a7a7a0b0aa0 100644
--- a/test/CodeGen/Thumb2/v8_IT_5.ll
+++ b/test/CodeGen/Thumb2/v8_IT_5.ll
@@ -7,9 +7,11 @@
 ; CHECK-NEXT: %if.else163
 ; CHECK-NEXT: mov.w
 ; CHECK-NEXT: b
+; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
+; CHECK-NEXT: mov.w
+; CHECK-NEXT: bx lr
 ; CHECK-NEXT: %if.else145
 ; CHECK-NEXT: mov.w
-; CHECK: [[JUMPTARGET]]:{{.*}}%if.else173
 
 %struct.hc = type { i32, i32, i32, i32 }
 
diff --git a/test/CodeGen/WebAssembly/call.ll b/test/CodeGen/WebAssembly/call.ll
new file mode 100644
index 000000000000..9158ccec0979
--- /dev/null
+++ b/test/CodeGen/WebAssembly/call.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic call operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @i32_nullary()
+declare i32 @i32_unary(i32)
+declare i32 @i32_binary(i32, i32)
+declare i64 @i64_nullary()
+declare float @float_nullary()
+declare double @double_nullary()
+declare void @void_nullary()
+
+; CHECK-LABEL: call_i32_nullary:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_nullary() {
+  %r = call i32 @i32_nullary()
+  ret i32 %r
+}
+
+; CHECK-LABEL: call_i64_nullary:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: {{^}} i64.call $push[[NUM:[0-9]+]]=, i64_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @call_i64_nullary() {
+  %r = call i64 @i64_nullary()
+  ret i64 %r
+}
+
+; CHECK-LABEL: call_float_nullary:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: {{^}} f32.call $push[[NUM:[0-9]+]]=, float_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @call_float_nullary() {
+  %r = call float @float_nullary()
+  ret float %r
+}
+
+; CHECK-LABEL: call_double_nullary:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: {{^}} f64.call $push[[NUM:[0-9]+]]=, double_nullary{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @call_double_nullary() {
+  %r = call double @double_nullary()
+  ret double %r
+}
+
+; CHECK-LABEL: call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @call_void_nullary() {
+  call void @void_nullary()
+  ret void
+}
+
+; CHECK-LABEL: call_i32_unary:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_unary, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_unary(i32 %a) {
+  %r = call i32 @i32_unary(i32 %a)
+  ret i32 %r
+}
+
+; CHECK-LABEL: call_i32_binary:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_binary, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_i32_binary(i32 %a, i32 %b) {
+  %r = call i32 @i32_binary(i32 %a, i32 %b)
+  ret i32 %r
+}
+
+; CHECK-LABEL: call_indirect_void:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: {{^}} call_indirect $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @call_indirect_void(void ()* %callee) {
+  call void %callee()
+  ret void
+}
+
+; CHECK-LABEL: call_indirect_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: {{^}} i32.call_indirect $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @call_indirect_i32(i32 ()* %callee) {
+  %t = call i32 %callee()
+  ret i32 %t
+}
+
+; CHECK-LABEL: tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @tail_call_void_nullary() {
+  tail call void @void_nullary()
+  ret void
+}
+
+; CHECK-LABEL: fastcc_tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: return{{$}}
+define void @fastcc_tail_call_void_nullary() {
+  tail call fastcc void @void_nullary()
+  ret void
+}
+
+; CHECK-LABEL: coldcc_tail_call_void_nullary:
+; CHECK-NEXT: {{^}} call void_nullary
+; CHECK-NEXT: return{{$}}
+define void @coldcc_tail_call_void_nullary() {
+  tail call coldcc void @void_nullary()
+  ret void
+}
+
+; FIXME test the following:
+;  - More argument combinations.
+;  - Tail call.
+;  - Interesting returns (struct, multiple).
+;  - Vararg.
diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll
new file mode 100644
index 000000000000..71f3551347bf
--- /dev/null
+++ b/test/CodeGen/WebAssembly/cfg-stackify.ll
@@ -0,0 +1,1102 @@
+; RUN: llc < %s -asm-verbose=false -disable-block-placement -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck -check-prefix=OPT %s
+
+; Test the CFG stackifier pass.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @something()
+
+; Test that loops are made contiguous, even in the presence of split backedges.
+
+; CHECK-LABEL: test0:
+; CHECK: loop
+; CHECK-NOT: br
+; CHECK: i32.add
+; CHECK-NEXT: i32.ge_s
+; CHECK-NEXT: br_if
+; CHECK-NOT: br
+; CHECK: call
+; CHECK: br BB0_1{{$}}
+; CHECK: return{{$}}
+; OPT-LABEL: test0:
+; OPT: loop
+; OPT-NOT: br
+; OPT: i32.add
+; OPT-NEXT: i32.ge_s
+; OPT-NEXT: br_if
+; OPT-NOT: br
+; OPT: call
+; OPT: br BB0_1{{$}}
+; OPT: return{{$}}
+define void @test0(i32 %n) {
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+  %i.next = add i32 %i, 1
+
+  %c = icmp slt i32 %i.next, %n
+  br i1 %c, label %back, label %exit
+
+exit:
+  ret void
+
+back:
+  call void @something()
+  br label %header
+}
+
+; Same as test0, but the branch condition is reversed.
+
+; CHECK-LABEL: test1:
+; CHECK: loop
+; CHECK-NOT: br
+; CHECK: i32.add
+; CHECK-NEXT: i32.ge_s
+; CHECK-NEXT: br_if
+; CHECK-NOT: br
+; CHECK: call
+; CHECK: br BB1_1{{$}}
+; CHECK: return{{$}}
+; OPT-LABEL: test1:
+; OPT: loop
+; OPT-NOT: br
+; OPT: i32.add
+; OPT-NEXT: i32.ge_s
+; OPT-NEXT: br_if
+; OPT-NOT: br
+; OPT: call
+; OPT: br BB1_1{{$}}
+; OPT: return{{$}}
+define void @test1(i32 %n) {
+entry:
+  br label %header
+
+header:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %back ]
+  %i.next = add i32 %i, 1
+
+  %c = icmp sge i32 %i.next, %n
+  br i1 %c, label %exit, label %back
+
+exit:
+  ret void
+
+back:
+  call void @something()
+  br label %header
+}
+
+; Test that a simple loop is handled as expected.
+
+; CHECK-LABEL: test2:
+; CHECK: block BB2_2{{$}}
+; CHECK: br_if {{[^,]*}}, BB2_2{{$}}
+; CHECK: BB2_1:
+; CHECK: br_if ${{[0-9]+}}, BB2_1{{$}}
+; CHECK: BB2_2:
+; CHECK: return{{$}}
+; OPT-LABEL: test2:
+; OPT: block BB2_2{{$}}
+; OPT: br_if {{[^,]*}}, BB2_2{{$}}
+; OPT: BB2_1:
+; OPT: br_if ${{[0-9]+}}, BB2_1{{$}}
+; OPT: BB2_2:
+; OPT: return{{$}}
+define void @test2(double* nocapture %p, i32 %n) {
+entry:
+  %cmp.4 = icmp sgt i32 %n, 0
+  br i1 %cmp.4, label %for.body.preheader, label %for.end
+
+for.body.preheader:
+  br label %for.body
+
+for.body:
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds double, double* %p, i32 %i.05
+  %0 = load double, double* %arrayidx, align 8
+  %mul = fmul double %0, 3.200000e+00
+  store double %mul, double* %arrayidx, align 8
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+  br label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: doublediamond:
+; CHECK: block BB3_5{{$}}
+; CHECK: block BB3_2{{$}}
+; CHECK: br_if $0, BB3_2{{$}}
+; CHECK: block BB3_4{{$}}
+; CHECK: br_if $1, BB3_4{{$}}
+; CHECK: br BB3_5{{$}}
+; CHECK: BB3_4:
+; CHECK: BB3_5:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: doublediamond:
+; OPT: block BB3_5{{$}}
+; OPT: block BB3_4{{$}}
+; OPT: br_if {{[^,]*}}, BB3_4{{$}}
+; OPT: block BB3_3{{$}}
+; OPT: br_if {{[^,]*}}, BB3_3{{$}}
+; OPT: br BB3_5{{$}}
+; OPT: BB3_4:
+; OPT: BB3_5:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @doublediamond(i32 %a, i32 %b, i32* %p) {
+entry:
+  %c = icmp eq i32 %a, 0
+  %d = icmp eq i32 %b, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %false
+true:
+  store volatile i32 1, i32* %p
+  br label %exit
+false:
+  store volatile i32 2, i32* %p
+  br i1 %d, label %ft, label %ff
+ft:
+  store volatile i32 3, i32* %p
+  br label %exit
+ff:
+  store volatile i32 4, i32* %p
+  br label %exit
+exit:
+  store volatile i32 5, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: triangle:
+; CHECK: block BB4_2{{$}}
+; CHECK: br_if $1, BB4_2{{$}}
+; CHECK: BB4_2:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: triangle:
+; OPT: block BB4_2{{$}}
+; OPT: br_if $1, BB4_2{{$}}
+; OPT: BB4_2:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @triangle(i32* %p, i32 %a) {
+entry:
+  %c = icmp eq i32 %a, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %exit
+true:
+  store volatile i32 1, i32* %p
+  br label %exit
+exit:
+  store volatile i32 2, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: diamond:
+; CHECK: block BB5_3{{$}}
+; CHECK: block BB5_2{{$}}
+; CHECK: br_if $1, BB5_2{{$}}
+; CHECK: br BB5_3{{$}}
+; CHECK: BB5_2:
+; CHECK: BB5_3:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: diamond:
+; OPT: block BB5_3{{$}}
+; OPT: block BB5_2{{$}}
+; OPT: br_if {{[^,]*}}, BB5_2{{$}}
+; OPT: br BB5_3{{$}}
+; OPT: BB5_2:
+; OPT: BB5_3:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @diamond(i32* %p, i32 %a) {
+entry:
+  %c = icmp eq i32 %a, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %false
+true:
+  store volatile i32 1, i32* %p
+  br label %exit
+false:
+  store volatile i32 2, i32* %p
+  br label %exit
+exit:
+  store volatile i32 3, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: single_block:
+; CHECK-NOT: br
+; CHECK: return $pop{{[0-9]+}}{{$}}
+; OPT-LABEL: single_block:
+; OPT-NOT: br
+; OPT: return $pop{{[0-9]+}}{{$}}
+define i32 @single_block(i32* %p) {
+entry:
+  store volatile i32 0, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: minimal_loop:
+; CHECK-NOT: br
+; CHECK: BB7_1:
+; CHECK: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
+; CHECK: br BB7_1{{$}}
+; CHECK: BB7_2:
+; OPT-LABEL: minimal_loop:
+; OPT-NOT: br
+; OPT: BB7_1:
+; OPT: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
+; OPT: br BB7_1{{$}}
+; OPT: BB7_2:
+define i32 @minimal_loop(i32* %p) {
+entry:
+  store volatile i32 0, i32* %p
+  br label %loop
+loop:
+  store volatile i32 1, i32* %p
+  br label %loop
+}
+
+; CHECK-LABEL: simple_loop:
+; CHECK-NOT: br
+; CHECK: BB8_1:
+; CHECK: loop BB8_2{{$}}
+; CHECK: br_if $pop{{[0-9]+}}, BB8_1{{$}}
+; CHECK: BB8_2:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: simple_loop:
+; OPT-NOT: br
+; OPT: BB8_1:
+; OPT: loop BB8_2{{$}}
+; OPT: br_if {{[^,]*}}, BB8_1{{$}}
+; OPT: BB8_2:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @simple_loop(i32* %p, i32 %a) {
+entry:
+  %c = icmp eq i32 %a, 0
+  store volatile i32 0, i32* %p
+  br label %loop
+loop:
+  store volatile i32 1, i32* %p
+  br i1 %c, label %loop, label %exit
+exit:
+  store volatile i32 2, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: doubletriangle:
+; CHECK: block BB9_4{{$}}
+; CHECK: br_if $0, BB9_4{{$}}
+; CHECK: block BB9_3{{$}}
+; CHECK: br_if $1, BB9_3{{$}}
+; CHECK: BB9_3:
+; CHECK: BB9_4:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: doubletriangle:
+; OPT: block BB9_4{{$}}
+; OPT: br_if $0, BB9_4{{$}}
+; OPT: block BB9_3{{$}}
+; OPT: br_if $1, BB9_3{{$}}
+; OPT: BB9_3:
+; OPT: BB9_4:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @doubletriangle(i32 %a, i32 %b, i32* %p) {
+entry:
+  %c = icmp eq i32 %a, 0
+  %d = icmp eq i32 %b, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %exit
+true:
+  store volatile i32 2, i32* %p
+  br i1 %d, label %tt, label %tf
+tt:
+  store volatile i32 3, i32* %p
+  br label %tf
+tf:
+  store volatile i32 4, i32* %p
+  br label %exit
+exit:
+  store volatile i32 5, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: ifelse_earlyexits:
+; CHECK: block BB10_4{{$}}
+; CHECK: block BB10_2{{$}}
+; CHECK: br_if $0, BB10_2{{$}}
+; CHECK: br BB10_4{{$}}
+; CHECK: BB10_2:
+; CHECK: br_if $1, BB10_4{{$}}
+; CHECK: BB10_4:
+; CHECK: return ${{[0-9]+}}{{$}}
+; OPT-LABEL: ifelse_earlyexits:
+; OPT: block BB10_4{{$}}
+; OPT: block BB10_3{{$}}
+; OPT: br_if {{[^,]*}}, BB10_3{{$}}
+; OPT: br_if $1, BB10_4{{$}}
+; OPT: br BB10_4{{$}}
+; OPT: BB10_3:
+; OPT: BB10_4:
+; OPT: return ${{[0-9]+}}{{$}}
+define i32 @ifelse_earlyexits(i32 %a, i32 %b, i32* %p) {
+entry:
+  %c = icmp eq i32 %a, 0
+  %d = icmp eq i32 %b, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %false
+true:
+  store volatile i32 1, i32* %p
+  br label %exit
+false:
+  store volatile i32 2, i32* %p
+  br i1 %d, label %ft, label %exit
+ft:
+  store volatile i32 3, i32* %p
+  br label %exit
+exit:
+  store volatile i32 4, i32* %p
+  ret i32 0
+}
+
+; CHECK-LABEL: doublediamond_in_a_loop:
+; CHECK: BB11_1:
+; CHECK: loop            BB11_7{{$}}
+; CHECK: block           BB11_6{{$}}
+; CHECK: block           BB11_3{{$}}
+; CHECK: br_if           $0, BB11_3{{$}}
+; CHECK: br              BB11_6{{$}}
+; CHECK: BB11_3:
+; CHECK: block           BB11_5{{$}}
+; CHECK: br_if           $1, BB11_5{{$}}
+; CHECK: br              BB11_6{{$}}
+; CHECK: BB11_5:
+; CHECK: BB11_6:
+; CHECK: br              BB11_1{{$}}
+; CHECK: BB11_7:
+; OPT-LABEL: doublediamond_in_a_loop:
+; OPT: BB11_1:
+; OPT: loop            BB11_7{{$}}
+; OPT: block           BB11_6{{$}}
+; OPT: block           BB11_5{{$}}
+; OPT: br_if           {{[^,]*}}, BB11_5{{$}}
+; OPT: block           BB11_4{{$}}
+; OPT: br_if           {{[^,]*}}, BB11_4{{$}}
+; OPT: br              BB11_6{{$}}
+; OPT: BB11_4:
+; OPT: br              BB11_6{{$}}
+; OPT: BB11_5:
+; OPT: BB11_6:
+; OPT: br              BB11_1{{$}}
+; OPT: BB11_7:
+define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
+entry:
+  br label %header
+header:
+  %c = icmp eq i32 %a, 0
+  %d = icmp eq i32 %b, 0
+  store volatile i32 0, i32* %p
+  br i1 %c, label %true, label %false
+true:
+  store volatile i32 1, i32* %p
+  br label %exit
+false:
+  store volatile i32 2, i32* %p
+  br i1 %d, label %ft, label %ff
+ft:
+  store volatile i32 3, i32* %p
+  br label %exit
+ff:
+  store volatile i32 4, i32* %p
+  br label %exit
+exit:
+  store volatile i32 5, i32* %p
+  br label %header
+}
+
+; Test that nested loops are handled.
+
+; CHECK-LABEL: test3:
+; CHECK: loop
+; CHECK-NEXT: br_if
+; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-NEXT: loop
+; OPT-LABEL: test3:
+; OPT: loop
+; OPT-NEXT: br_if
+; OPT-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; OPT-NEXT: loop
+declare void @bar()
+define void @test3(i32 %w)  {
+entry:
+  br i1 undef, label %outer.ph, label %exit
+
+outer.ph:
+  br label %outer
+
+outer:
+  %tobool = icmp eq i32 undef, 0
+  br i1 %tobool, label %inner, label %unreachable
+
+unreachable:
+  unreachable
+
+inner:
+  %c = icmp eq i32 undef, %w
+  br i1 %c, label %if.end, label %inner
+
+exit:
+  ret void
+
+if.end:
+  call void @bar()
+  br label %outer
+}
+
+; Test switch lowering and block placement.
+
+; CHECK-LABEL: test4:
+; CHECK-NEXT: .param      i32{{$}}
+; CHECK:      block       BB13_8{{$}}
+; CHECK-NEXT: block       BB13_7{{$}}
+; CHECK-NEXT: block       BB13_4{{$}}
+; CHECK:      br_if       $pop{{[0-9]*}}, BB13_4{{$}}
+; CHECK-NEXT: block       BB13_3{{$}}
+; CHECK:      br_if       $pop{{[0-9]*}}, BB13_3{{$}}
+; CHECK:      br_if       $pop{{[0-9]*}}, BB13_7{{$}}
+; CHECK-NEXT: BB13_3:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_4:
+; CHECK:      br_if       $pop{{[0-9]*}}, BB13_8{{$}}
+; CHECK:      br_if       $pop{{[0-9]*}}, BB13_7{{$}}
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_7:
+; CHECK-NEXT: return{{$}}
+; CHECK-NEXT: BB13_8:
+; CHECK-NEXT: return{{$}}
+; OPT-LABEL: test4:
+; OPT-NEXT: .param      i32{{$}}
+; OPT:      block       BB13_8{{$}}
+; OPT-NEXT: block       BB13_7{{$}}
+; OPT-NEXT: block       BB13_4{{$}}
+; OPT:      br_if       $pop{{[0-9]*}}, BB13_4{{$}}
+; OPT-NEXT: block       BB13_3{{$}}
+; OPT:      br_if       $pop{{[0-9]*}}, BB13_3{{$}}
+; OPT:      br_if       $pop{{[0-9]*}}, BB13_7{{$}}
+; OPT-NEXT: BB13_3:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_4:
+; OPT:      br_if       $pop{{[0-9]*}}, BB13_8{{$}}
+; OPT:      br_if       $pop{{[0-9]*}}, BB13_7{{$}}
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_7:
+; OPT-NEXT: return{{$}}
+; OPT-NEXT: BB13_8:
+; OPT-NEXT: return{{$}}
+define void @test4(i32 %t) {
+entry:
+  switch i32 %t, label %default [
+    i32 0, label %bb2
+    i32 2, label %bb2
+    i32 4, label %bb1
+    i32 622, label %bb0
+  ]
+
+bb0:
+  ret void
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+
+default:
+  ret void
+}
+
+; Test a case where the BLOCK needs to be placed before the LOOP in the
+; same basic block.
+
+; CHECK-LABEL: test5:
+; CHECK:       BB14_1:
+; CHECK-NEXT:  block BB14_4{{$}}
+; CHECK-NEXT:  loop BB14_3{{$}}
+; CHECK:       br_if {{[^,]*}}, BB14_4{{$}}
+; CHECK:       br_if {{[^,]*}}, BB14_1{{$}}
+; CHECK-NEXT:  BB14_3:
+; CHECK:       return{{$}}
+; CHECK-NEXT:  BB14_4:
+; CHECK:       return{{$}}
+; OPT-LABEL: test5:
+; OPT:       BB14_1:
+; OPT-NEXT:  block BB14_4{{$}}
+; OPT-NEXT:  loop BB14_3{{$}}
+; OPT:       br_if {{[^,]*}}, BB14_4{{$}}
+; OPT:       br_if {{[^,]*}}, BB14_1{{$}}
+; OPT-NEXT:  BB14_3:
+; OPT:       return{{$}}
+; OPT-NEXT:  BB14_4:
+; OPT:       return{{$}}
+define void @test5(i1 %p, i1 %q) {
+entry:
+  br label %header
+
+header:
+  store volatile i32 0, i32* null
+  br i1 %p, label %more, label %alt
+
+more:
+  store volatile i32 1, i32* null
+  br i1 %q, label %header, label %return
+
+alt:
+  store volatile i32 2, i32* null
+  ret void
+
+return:
+  store volatile i32 3, i32* null
+  ret void
+}
+
+; Test an interesting case of a loop with multiple exits, which
+; aren't to layout successors of the loop, and one of which is to a successors
+; which has another predecessor.
+
+; CHECK-LABEL: test6:
+; CHECK:       BB15_1:
+; CHECK-NEXT:  block BB15_6{{$}}
+; CHECK-NEXT:  block BB15_5{{$}}
+; CHECK-NEXT:  loop  BB15_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if {{[^,]*}}, BB15_6{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if {{[^,]*}}, BB15_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if {{[^,]*}}, BB15_1{{$}}
+; CHECK-NEXT:  BB15_4:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; CHECK-NEXT:  BB15_5:
+; CHECK-NOT:   block
+; CHECK:       BB15_6:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; OPT-LABEL: test6:
+; OPT:       BB15_1:
+; OPT-NEXT:  block BB15_6{{$}}
+; OPT-NEXT:  block BB15_5{{$}}
+; OPT-NEXT:  loop  BB15_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB15_6{{$}}
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB15_5{{$}}
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB15_1{{$}}
+; OPT-NEXT:  BB15_4:
+; OPT-NOT:   block
+; OPT:       return{{$}}
+; OPT-NEXT:  BB15_5:
+; OPT-NOT:   block
+; OPT:       BB15_6:
+; OPT-NOT:   block
+; OPT:       return{{$}}
+define void @test6(i1 %p, i1 %q) {
+entry:
+  br label %header
+
+header:
+  store volatile i32 0, i32* null
+  br i1 %p, label %more, label %second
+
+more:
+  store volatile i32 1, i32* null
+  br i1 %q, label %evenmore, label %first
+
+evenmore:
+  store volatile i32 1, i32* null
+  br i1 %q, label %header, label %return
+
+return:
+  store volatile i32 2, i32* null
+  ret void
+
+first:
+  store volatile i32 3, i32* null
+  br label %second
+
+second:
+  store volatile i32 4, i32* null
+  ret void
+}
+
+; Test a case where there are multiple backedges and multiple loop exits
+; that end in unreachable.
+
+; CHECK-LABEL: test7:
+; CHECK:       BB16_1:
+; CHECK-NEXT:  loop BB16_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       block BB16_4{{$}}
+; CHECK:       br_if {{[^,]*}}, BB16_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if {{[^,]*}}, BB16_1{{$}}
+; CHECK-NOT:   block
+; CHECK:       unreachable
+; CHECK_NEXT:  BB16_4:
+; CHECK-NOT:   block
+; CHECK:       br_if {{[^,]*}}, BB16_1{{$}}
+; CHECK-NEXT:  BB16_5:
+; CHECK-NOT:   block
+; CHECK:       unreachable
+; OPT-LABEL: test7:
+; OPT:       BB16_1:
+; OPT-NEXT:  loop BB16_5{{$}}
+; OPT-NOT:   block
+; OPT:       block BB16_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB16_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB16_1{{$}}
+; OPT-NOT:   block
+; OPT:       unreachable
+; OPT_NEXT:  BB16_4:
+; OPT-NOT:   block
+; OPT:       br_if {{[^,]*}}, BB16_1{{$}}
+; OPT-NEXT:  BB16_5:
+; OPT-NOT:   block
+; OPT:       unreachable
+define void @test7(i1 %tobool2, i1 %tobool9) {
+entry:
+  store volatile i32 0, i32* null
+  br label %loop
+
+loop:
+  store volatile i32 1, i32* null
+  br i1 %tobool2, label %l1, label %l0
+
+l0:
+  store volatile i32 2, i32* null
+  br i1 %tobool9, label %loop, label %u0
+
+l1:
+  store volatile i32 3, i32* null
+  br i1 %tobool9, label %loop, label %u1
+
+u0:
+  store volatile i32 4, i32* null
+  unreachable
+
+u1:
+  store volatile i32 5, i32* null
+  unreachable
+}
+
+; Test an interesting case using nested loops and switches.
+
+; CHECK-LABEL: test8:
+; CHECK:       BB17_1:
+; CHECK-NEXT:  loop     BB17_4{{$}}
+; CHECK-NEXT:  block    BB17_3{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if    {{[^,]*}}, BB17_3{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if    {{[^,]*}}, BB17_1{{$}}
+; CHECK-NEXT:  BB17_3:
+; CHECK-NEXT:  loop     BB17_4{{$}}
+; CHECK-NEXT:  br_if    {{[^,]*}}, BB17_3{{$}}
+; CHECK-NEXT:  br       BB17_1{{$}}
+; CHECK-NEXT:  BB17_4:
+; OPT-LABEL: test8:
+; OPT:       BB17_1:
+; OPT-NEXT:  loop     BB17_4{{$}}
+; OPT-NEXT:  block    BB17_3{{$}}
+; OPT-NOT:   block
+; OPT:       br_if    {{[^,]*}}, BB17_3{{$}}
+; OPT-NOT:   block
+; OPT:       br_if    {{[^,]*}}, BB17_1{{$}}
+; OPT-NEXT:  BB17_3:
+; OPT-NEXT:  loop     BB17_4{{$}}
+; OPT-NEXT:  br_if    {{[^,]*}}, BB17_3{{$}}
+; OPT-NEXT:  br       BB17_1{{$}}
+; OPT-NEXT:  BB17_4:
+define i32 @test8() {
+bb:
+  br label %bb1
+
+bb1:
+  br i1 undef, label %bb2, label %bb3
+
+bb2:
+  switch i8 undef, label %bb1 [
+    i8 44, label %bb2
+  ]
+
+bb3:
+  switch i8 undef, label %bb1 [
+    i8 44, label %bb2
+  ]
+}
+
+; Test an interesting case using nested loops that share a bottom block.
+
+; CHECK-LABEL: test9:
+; CHECK:       BB18_1:
+; CHECK-NEXT:  loop      BB18_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if     {{[^,]*}}, BB18_5{{$}}
+; CHECK-NEXT:  BB18_2:
+; CHECK-NEXT:  loop      BB18_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       block     BB18_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if     {{[^,]*}}, BB18_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if     {{[^,]*}}, BB18_2{{$}}
+; CHECK-NEXT:  br        BB18_1{{$}}
+; CHECK-NEXT:  BB18_4:
+; CHECK-NOT:   block
+; CHECK:       br_if     {{[^,]*}}, BB18_2{{$}}
+; CHECK-NEXT:  br        BB18_1{{$}}
+; CHECK-NEXT:  BB18_5:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; OPT-LABEL: test9:
+; OPT:       BB18_1:
+; OPT-NEXT:  loop      BB18_5{{$}}
+; OPT-NOT:   block
+; OPT:       br_if     {{[^,]*}}, BB18_5{{$}}
+; OPT-NEXT:  BB18_2:
+; OPT-NEXT:  loop      BB18_5{{$}}
+; OPT-NOT:   block
+; OPT:       block     BB18_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if     {{[^,]*}}, BB18_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if     {{[^,]*}}, BB18_2{{$}}
+; OPT-NEXT:  br        BB18_1{{$}}
+; OPT-NEXT:  BB18_4:
+; OPT-NOT:   block
+; OPT:       br_if     {{[^,]*}}, BB18_2{{$}}
+; OPT-NEXT:  br        BB18_1{{$}}
+; OPT-NEXT:  BB18_5:
+; OPT-NOT:   block
+; OPT:       return{{$}}
+declare i1 @a()
+define void @test9() {
+entry:
+  store volatile i32 0, i32* null
+  br label %header
+
+header:
+  store volatile i32 1, i32* null
+  %call4 = call i1 @a()
+  br i1 %call4, label %header2, label %end
+
+header2:
+  store volatile i32 2, i32* null
+  %call = call i1 @a()
+  br i1 %call, label %if.then, label %if.else
+
+if.then:
+  store volatile i32 3, i32* null
+  %call3 = call i1 @a()
+  br i1 %call3, label %header2, label %header
+
+if.else:
+  store volatile i32 4, i32* null
+  %call2 = call i1 @a()
+  br i1 %call2, label %header2, label %header
+
+end:
+  store volatile i32 5, i32* null
+  ret void
+}
+
+; Test an interesting case involving nested loops sharing a loop bottom,
+; and loop exits to a block with unreachable.
+
+; CHECK-LABEL: test10:
+; CHECK:       BB19_1:
+; CHECK-NEXT:  loop     BB19_7{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if    {{[^,]*}}, BB19_1{{$}}
+; CHECK-NEXT:  BB19_2:
+; CHECK-NEXT:  block    BB19_6{{$}}
+; CHECK-NEXT:  loop     BB19_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       BB19_3:
+; CHECK-NEXT:  loop     BB19_5{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if    {{[^,]*}}, BB19_1{{$}}
+; CHECK-NOT:   block
+; CHECK:       tableswitch  {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
+; CHECK-NEXT:  BB19_5:
+; CHECK-NEXT:  return{{$}}
+; CHECK-NEXT:  BB19_6:
+; CHECK-NOT:   block
+; CHECK:       br       BB19_1{{$}}
+; CHECK-NEXT:  BB19_7:
+; OPT-LABEL: test10:
+; OPT:       BB19_1:
+; OPT-NEXT:  loop     BB19_7{{$}}
+; OPT-NOT:   block
+; OPT:       br_if    {{[^,]*}}, BB19_1{{$}}
+; OPT-NEXT:  BB19_2:
+; OPT-NEXT:  block    BB19_6{{$}}
+; OPT-NEXT:  loop     BB19_5{{$}}
+; OPT-NOT:   block
+; OPT:       BB19_3:
+; OPT-NEXT:  loop     BB19_5{{$}}
+; OPT-NOT:   block
+; OPT:       br_if    {{[^,]*}}, BB19_1{{$}}
+; OPT-NOT:   block
+; OPT:       tableswitch  {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
+; OPT-NEXT:  BB19_5:
+; OPT-NEXT:  return{{$}}
+; OPT-NEXT:  BB19_6:
+; OPT-NOT:   block
+; OPT:       br       BB19_1{{$}}
+; OPT-NEXT:  BB19_7:
+define void @test10() {
+bb0:
+  br label %bb1
+
+bb1:
+  %tmp = phi i32 [ 2, %bb0 ], [ 3, %bb3 ]
+  %tmp3 = phi i32 [ undef, %bb0 ], [ %tmp11, %bb3 ]
+  %tmp4 = icmp eq i32 %tmp3, 0
+  br i1 %tmp4, label %bb4, label %bb2
+
+bb2:
+  br label %bb3
+
+bb3:
+  %tmp11 = phi i32 [ 1, %bb5 ], [ 0, %bb2 ]
+  br label %bb1
+
+bb4:
+  %tmp6 = phi i32 [ %tmp9, %bb5 ], [ 4, %bb1 ]
+  %tmp7 = phi i32 [ %tmp6, %bb5 ], [ %tmp, %bb1 ]
+  br label %bb5
+
+bb5:
+  %tmp9 = phi i32 [ %tmp6, %bb5 ], [ %tmp7, %bb4 ]
+  switch i32 %tmp9, label %bb2 [
+    i32 0, label %bb5
+    i32 1, label %bb6
+    i32 3, label %bb4
+    i32 4, label %bb3
+  ]
+
+bb6:
+  ret void
+}
+
+; Test a CFG DAG with interesting merging.
+
+; CHECK-LABEL: test11:
+; CHECK:       block        BB20_8{{$}}
+; CHECK-NEXT:  block        BB20_7{{$}}
+; CHECK-NEXT:  block        BB20_6{{$}}
+; CHECK-NEXT:  block        BB20_4{{$}}
+; CHECK-NEXT:  br_if        {{[^,]*}}, BB20_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       block        BB20_3{{$}}
+; CHECK:       br_if        {{[^,]*}}, BB20_3{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if        {{[^,]*}}, BB20_6{{$}}
+; CHECK-NEXT:  BB20_3:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; CHECK-NEXT:  BB20_4:
+; CHECK-NOT:   block
+; CHECK:       br_if        {{[^,]*}}, BB20_8{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if        {{[^,]*}}, BB20_7{{$}}
+; CHECK-NEXT:  BB20_6:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; CHECK-NEXT:  BB20_7:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; CHECK-NEXT:  BB20_8:
+; CHECK-NOT:   block
+; CHECK:       return{{$}}
+; OPT-LABEL: test11:
+; OPT:       block        BB20_8{{$}}
+; OPT-NEXT:  block        BB20_4{{$}}
+; OPT-NEXT:  br_if        $0, BB20_4{{$}}
+; OPT-NOT:   block
+; OPT:       block        BB20_3{{$}}
+; OPT:       br_if        $0, BB20_3{{$}}
+; OPT-NOT:   block
+; OPT:       br_if        $0, BB20_8{{$}}
+; OPT-NEXT:  BB20_3:
+; OPT-NOT:   block
+; OPT:       return{{$}}
+; OPT-NEXT:  BB20_4:
+; OPT-NOT:   block
+; OPT:       block        BB20_6{{$}}
+; OPT-NOT:   block
+; OPT:       br_if        $pop9, BB20_6{{$}}
+; OPT-NOT:   block
+; OPT:       return{{$}}
+; OPT-NEXT:  BB20_6:
+; OPT-NOT:   block
+; OPT:       br_if        $0, BB20_8{{$}}
+; OPT-NOT:   block
+; OPT:       return{{$}}
+; OPT-NEXT:  BB20_8:
+; OPT-NOT:   block
+; OPT:       return{{$}}
+define void @test11() {
+bb0:
+  store volatile i32 0, i32* null
+  br i1 undef, label %bb1, label %bb4
+bb1:
+  store volatile i32 1, i32* null
+  br i1 undef, label %bb3, label %bb2
+bb2:
+  store volatile i32 2, i32* null
+  br i1 undef, label %bb3, label %bb7
+bb3:
+  store volatile i32 3, i32* null
+  ret void
+bb4:
+  store volatile i32 4, i32* null
+  br i1 undef, label %bb8, label %bb5
+bb5:
+  store volatile i32 5, i32* null
+  br i1 undef, label %bb6, label %bb7
+bb6:
+  store volatile i32 6, i32* null
+  ret void
+bb7:
+  store volatile i32 7, i32* null
+  ret void
+bb8:
+  store volatile i32 8, i32* null
+  ret void
+}
+
+; CHECK-LABEL: test12:
+; CHECK:       BB21_1:
+; CHECK-NEXT:  loop        BB21_8{{$}}
+; CHECK-NOT:   block
+; CHECK:       block       BB21_7{{$}}
+; CHECK-NEXT:  block       BB21_6{{$}}
+; CHECK-NEXT:  block       BB21_4{{$}}
+; CHECK:       br_if       {{[^,]*}}, BB21_4{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if       {{[^,]*}}, BB21_7{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if       {{[^,]*}}, BB21_7{{$}}
+; CHECK-NEXT:  br          BB21_6{{$}}
+; CHECK-NEXT:  BB21_4:
+; CHECK-NOT:   block
+; CHECK:       br_if       {{[^,]*}}, BB21_7{{$}}
+; CHECK-NOT:   block
+; CHECK:       br_if       {{[^,]*}}, BB21_7{{$}}
+; CHECK-NEXT:  BB21_6:
+; CHECK-NEXT:  return{{$}}
+; CHECK-NEXT:  BB21_7:
+; CHECK-NOT:   block
+; CHECK:       br          BB21_1{{$}}
+; CHECK-NEXT:  BB21_8:
+; OPT-LABEL: test12:
+; OPT:       BB21_1:
+; OPT-NEXT:  loop        BB21_8{{$}}
+; OPT-NOT:   block
+; OPT:       block       BB21_7{{$}}
+; OPT-NEXT:  block       BB21_6{{$}}
+; OPT-NEXT:  block       BB21_4{{$}}
+; OPT:       br_if       {{[^,]*}}, BB21_4{{$}}
+; OPT-NOT:   block
+; OPT:       br_if       {{[^,]*}}, BB21_7{{$}}
+; OPT-NOT:   block
+; OPT:       br_if       {{[^,]*}}, BB21_7{{$}}
+; OPT-NEXT:  br          BB21_6{{$}}
+; OPT-NEXT:  BB21_4:
+; OPT-NOT:   block
+; OPT:       br_if       {{[^,]*}}, BB21_7{{$}}
+; OPT-NOT:   block
+; OPT:       br_if       {{[^,]*}}, BB21_7{{$}}
+; OPT-NEXT:  BB21_6:
+; OPT-NEXT:  return{{$}}
+; OPT-NEXT:  BB21_7:
+; OPT-NOT:   block
+; OPT:       br          BB21_1{{$}}
+; OPT-NEXT:  BB21_8:
+define void @test12(i8* %arg) {
+bb:
+  br label %bb1
+
+bb1:
+  %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ]
+  %tmp2 = getelementptr i8, i8* %arg, i32 %tmp
+  %tmp3 = load i8, i8* %tmp2
+  switch i8 %tmp3, label %bb7 [
+    i8 42, label %bb4
+    i8 76, label %bb4
+    i8 108, label %bb4
+    i8 104, label %bb4
+  ]
+
+bb4:
+  %tmp5 = add i32 %tmp, 1
+  br label %bb1
+
+bb7:
+  ret void
+}
+
+; A block can be "branched to" from another even if it is also reachable via
+; fallthrough from the other. This would normally be optimized away, so use
+; optnone to disable optimizations to test this case.
+
+; CHECK-LABEL: test13:
+; CHECK-NEXT:  .local i32{{$}}
+; CHECK:       block BB22_2{{$}}
+; CHECK:       br_if $pop4, BB22_2{{$}}
+; CHECK-NEXT:  return{{$}}
+; CHECK-NEXT:  BB22_2:
+; CHECK:       block BB22_4{{$}}
+; CHECK-NEXT:  br_if $0, BB22_4{{$}}
+; CHECK:       BB22_4:
+; CHECK:       block BB22_5{{$}}
+; CHECK:       br_if $pop6, BB22_5{{$}}
+; CHECK-NEXT:  BB22_5:
+; CHECK-NEXT:  unreachable{{$}}
+; OPT-LABEL: test13:
+; OPT-NEXT:  .local i32{{$}}
+; OPT:       block BB22_2{{$}}
+; OPT:       br_if $pop4, BB22_2{{$}}
+; OPT-NEXT:  return{{$}}
+; OPT-NEXT:  BB22_2:
+; OPT:       block BB22_4{{$}}
+; OPT-NEXT:  br_if $0, BB22_4{{$}}
+; OPT:       BB22_4:
+; OPT:       block BB22_5{{$}}
+; OPT:       br_if $pop6, BB22_5{{$}}
+; OPT-NEXT:  BB22_5:
+; OPT-NEXT:  unreachable{{$}}
+define void @test13() noinline optnone {
+bb:
+  br i1 undef, label %bb5, label %bb2
+bb1:
+  unreachable
+bb2:
+  br i1 undef, label %bb3, label %bb4
+bb3:
+  br label %bb4
+bb4:
+  %tmp = phi i1 [ false, %bb2 ], [ false, %bb3 ]
+  br i1 %tmp, label %bb1, label %bb1
+bb5:
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_f32.ll b/test/CodeGen/WebAssembly/comparisons_f32.ll
new file mode 100644
index 000000000000..6df37ea1c6dd
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_f32.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit floating-point comparison operations assemble as
+; expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ord_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @ord_f32(float %x, float %y) {
+  %a = fcmp ord float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uno_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @uno_f32(float %x, float %y) {
+  %a = fcmp uno float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: oeq_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oeq_f32(float %x, float %y) {
+  %a = fcmp oeq float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: une_f32:
+; CHECK: f32.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @une_f32(float %x, float %y) {
+  %a = fcmp une float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: olt_f32:
+; CHECK: f32.lt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @olt_f32(float %x, float %y) {
+  %a = fcmp olt float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ole_f32:
+; CHECK: f32.le $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ole_f32(float %x, float %y) {
+  %a = fcmp ole float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ogt_f32:
+; CHECK: f32.gt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ogt_f32(float %x, float %y) {
+  %a = fcmp ogt float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: oge_f32:
+; CHECK: f32.ge $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oge_f32(float %x, float %y) {
+  %a = fcmp oge float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; Expanded comparisons, which also check for NaN.
+
+; CHECK-LABEL: ueq_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ueq_f32(float %x, float %y) {
+  %a = fcmp ueq float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: one_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.eq $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]
+define i32 @one_f32(float %x, float %y) {
+  %a = fcmp one float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ult_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.lt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ult_f32(float %x, float %y) {
+  %a = fcmp ult float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ule_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.le $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ule_f32(float %x, float %y) {
+  %a = fcmp ule float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ugt_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.gt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ugt_f32(float %x, float %y) {
+  %a = fcmp ugt float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uge_f32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f32.ge $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f32.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @uge_f32(float %x, float %y) {
+  %a = fcmp uge float %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_f64.ll b/test/CodeGen/WebAssembly/comparisons_f64.ll
new file mode 100644
index 000000000000..f5acc64b667c
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_f64.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit floating-point comparison operations assemble as
+; expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ord_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @ord_f64(double %x, double %y) {
+  %a = fcmp ord double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uno_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @uno_f64(double %x, double %y) {
+  %a = fcmp uno double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: oeq_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oeq_f64(double %x, double %y) {
+  %a = fcmp oeq double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: une_f64:
+; CHECK: f64.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @une_f64(double %x, double %y) {
+  %a = fcmp une double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: olt_f64:
+; CHECK: f64.lt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @olt_f64(double %x, double %y) {
+  %a = fcmp olt double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ole_f64:
+; CHECK: f64.le $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ole_f64(double %x, double %y) {
+  %a = fcmp ole double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ogt_f64:
+; CHECK: f64.gt $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ogt_f64(double %x, double %y) {
+  %a = fcmp ogt double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: oge_f64:
+; CHECK: f64.ge $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @oge_f64(double %x, double %y) {
+  %a = fcmp oge double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; Expanded comparisons, which also check for NaN.
+
+; CHECK-LABEL: ueq_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ueq_f64(double %x, double %y) {
+  %a = fcmp ueq double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: one_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.eq $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.and $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]
+define i32 @one_f64(double %x, double %y) {
+  %a = fcmp one double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ult_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.lt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ult_f64(double %x, double %y) {
+  %a = fcmp ult double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ule_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.le $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ule_f64(double %x, double %y) {
+  %a = fcmp ule double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ugt_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.gt $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @ugt_f64(double %x, double %y) {
+  %a = fcmp ugt double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uge_f64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: f64.ge $push[[NUM0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM1:[0-9]+]]=, $0, $0{{$}}
+; CHECK-NEXT: f64.ne $push[[NUM2:[0-9]+]]=, $1, $1{{$}}
+; CHECK-NEXT: i32.or $push[[NUM3:[0-9]+]]=, $pop[[NUM1]], $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.or $push[[NUM4:[0-9]+]]=, $pop[[NUM0]], $pop[[NUM3]]{{$}}
+; CHECK-NEXT: return $pop[[NUM4]]{{$}}
+define i32 @uge_f64(double %x, double %y) {
+  %a = fcmp uge double %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_i32.ll b/test/CodeGen/WebAssembly/comparisons_i32.ll
new file mode 100644
index 000000000000..b724cec1cc63
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_i32.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit integer comparison operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: eq_i32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @eq_i32(i32 %x, i32 %y) {
+  %a = icmp eq i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ne_i32:
+; CHECK: i32.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ne_i32(i32 %x, i32 %y) {
+  %a = icmp ne i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: slt_i32:
+; CHECK: i32.lt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @slt_i32(i32 %x, i32 %y) {
+  %a = icmp slt i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sle_i32:
+; CHECK: i32.le_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sle_i32(i32 %x, i32 %y) {
+  %a = icmp sle i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ult_i32:
+; CHECK: i32.lt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ult_i32(i32 %x, i32 %y) {
+  %a = icmp ult i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ule_i32:
+; CHECK: i32.le_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ule_i32(i32 %x, i32 %y) {
+  %a = icmp ule i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sgt_i32:
+; CHECK: i32.gt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sgt_i32(i32 %x, i32 %y) {
+  %a = icmp sgt i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sge_i32:
+; CHECK: i32.ge_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sge_i32(i32 %x, i32 %y) {
+  %a = icmp sge i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ugt_i32:
+; CHECK: i32.gt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ugt_i32(i32 %x, i32 %y) {
+  %a = icmp ugt i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uge_i32:
+; CHECK: i32.ge_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @uge_i32(i32 %x, i32 %y) {
+  %a = icmp uge i32 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_i64.ll b/test/CodeGen/WebAssembly/comparisons_i64.ll
new file mode 100644
index 000000000000..898591999bec
--- /dev/null
+++ b/test/CodeGen/WebAssembly/comparisons_i64.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit integer comparison operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: eq_i64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i64.eq $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @eq_i64(i64 %x, i64 %y) {
+  %a = icmp eq i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ne_i64:
+; CHECK: i64.ne $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ne_i64(i64 %x, i64 %y) {
+  %a = icmp ne i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: slt_i64:
+; CHECK: i64.lt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @slt_i64(i64 %x, i64 %y) {
+  %a = icmp slt i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sle_i64:
+; CHECK: i64.le_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sle_i64(i64 %x, i64 %y) {
+  %a = icmp sle i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ult_i64:
+; CHECK: i64.lt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ult_i64(i64 %x, i64 %y) {
+  %a = icmp ult i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ule_i64:
+; CHECK: i64.le_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ule_i64(i64 %x, i64 %y) {
+  %a = icmp ule i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sgt_i64:
+; CHECK: i64.gt_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sgt_i64(i64 %x, i64 %y) {
+  %a = icmp sgt i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: sge_i64:
+; CHECK: i64.ge_s $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @sge_i64(i64 %x, i64 %y) {
+  %a = icmp sge i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: ugt_i64:
+; CHECK: i64.gt_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ugt_i64(i64 %x, i64 %y) {
+  %a = icmp ugt i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+; CHECK-LABEL: uge_i64:
+; CHECK: i64.ge_u $push[[NUM:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @uge_i64(i64 %x, i64 %y) {
+  %a = icmp uge i64 %x, %y
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
diff --git a/test/CodeGen/WebAssembly/conv.ll b/test/CodeGen/WebAssembly/conv.ll
new file mode 100644
index 000000000000..e1acaca2c9ec
--- /dev/null
+++ b/test/CodeGen/WebAssembly/conv.ll
@@ -0,0 +1,255 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic conversion operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: i32_wrap_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.wrap/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_wrap_i64(i64 %x) {
+  %a = trunc i64 %x to i32
+  ret i32 %a
+}
+
+; CHECK-LABEL: i64_extend_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.extend_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_extend_s_i32(i32 %x) {
+  %a = sext i32 %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: i64_extend_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.extend_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_extend_u_i32(i32 %x) {
+  %a = zext i32 %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: i32_trunc_s_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_s/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_s_f32(float %x) {
+  %a = fptosi float %x to i32
+  ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_u_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_u/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_u_f32(float %x) {
+  %a = fptoui float %x to i32
+  ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_s_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_s/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_s_f64(double %x) {
+  %a = fptosi double %x to i32
+  ret i32 %a
+}
+
+; CHECK-LABEL: i32_trunc_u_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.trunc_u/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @i32_trunc_u_f64(double %x) {
+  %a = fptoui double %x to i32
+  ret i32 %a
+}
+
+; CHECK-LABEL: i64_trunc_s_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_s/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_s_f32(float %x) {
+  %a = fptosi float %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_u_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_u/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_u_f32(float %x) {
+  %a = fptoui float %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_s_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_s/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_s_f64(double %x) {
+  %a = fptosi double %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: i64_trunc_u_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.trunc_u/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @i64_trunc_u_f64(double %x) {
+  %a = fptoui double %x to i64
+  ret i64 %a
+}
+
+; CHECK-LABEL: f32_convert_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_s_i32(i32 %x) {
+  %a = sitofp i32 %x to float
+  ret float %a
+}
+
+; CHECK-LABEL: f32_convert_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_u_i32(i32 %x) {
+  %a = uitofp i32 %x to float
+  ret float %a
+}
+
+; CHECK-LABEL: f64_convert_s_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_s/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_s_i32(i32 %x) {
+  %a = sitofp i32 %x to double
+  ret double %a
+}
+
+; CHECK-LABEL: f64_convert_u_i32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_u/i32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_u_i32(i32 %x) {
+  %a = uitofp i32 %x to double
+  ret double %a
+}
+
+; CHECK-LABEL: f32_convert_s_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_s/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_s_i64(i64 %x) {
+  %a = sitofp i64 %x to float
+  ret float %a
+}
+
+; CHECK-LABEL: f32_convert_u_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.convert_u/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_convert_u_i64(i64 %x) {
+  %a = uitofp i64 %x to float
+  ret float %a
+}
+
+; CHECK-LABEL: f64_convert_s_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_s/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_s_i64(i64 %x) {
+  %a = sitofp i64 %x to double
+  ret double %a
+}
+
+; CHECK-LABEL: f64_convert_u_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.convert_u/i64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_convert_u_i64(i64 %x) {
+  %a = uitofp i64 %x to double
+  ret double %a
+}
+
+; CHECK-LABEL: f64_promote_f32:
+; CHECK-NEXT: .param f32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.promote/f32 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @f64_promote_f32(float %x) {
+  %a = fpext float %x to double
+  ret double %a
+}
+
+; CHECK-LABEL: f32_demote_f64:
+; CHECK-NEXT: .param f64{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.demote/f64 $push[[NUM:[0-9]+]]=, $0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @f32_demote_f64(double %x) {
+  %a = fptrunc double %x to float
+  ret float %a
+}
+
+; If the high its are unused, LLVM will optimize sext/zext into anyext, which
+; we need to patterm-match back to a specific instruction.
+
+; CHECK-LABEL: anyext:
+; CHECK: i64.extend_u/i32 $push0=, $0{{$}}
+define i64 @anyext(i32 %x) {
+    %y = sext i32 %x to i64
+    %w = shl i64 %y, 32
+    ret i64 %w
+}
+
+; CHECK-LABEL: bitcast_i32_to_float:
+; CHECK: f32.reinterpret/i32   $push0=, $0{{$}}
+define float @bitcast_i32_to_float(i32 %a) {
+  %t = bitcast i32 %a to float
+  ret float %t
+}
+
+; CHECK-LABEL: bitcast_float_to_i32:
+; CHECK: i32.reinterpret/f32   $push0=, $0{{$}}
+define i32 @bitcast_float_to_i32(float %a) {
+  %t = bitcast float %a to i32
+  ret i32 %t
+}
+
+; CHECK-LABEL: bitcast_i64_to_double:
+; CHECK: f64.reinterpret/i64   $push0=, $0{{$}}
+define double @bitcast_i64_to_double(i64 %a) {
+  %t = bitcast i64 %a to double
+  ret double %t
+}
+
+; CHECK-LABEL: bitcast_double_to_i64:
+; CHECK: i64.reinterpret/f64   $push0=, $0{{$}}
+define i64 @bitcast_double_to_i64(double %a) {
+  %t = bitcast double %a to i64
+  ret i64 %t
+}
diff --git a/test/CodeGen/WebAssembly/copysign-casts.ll b/test/CodeGen/WebAssembly/copysign-casts.ll
new file mode 100644
index 000000000000..760e49133018
--- /dev/null
+++ b/test/CodeGen/WebAssembly/copysign-casts.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; DAGCombiner oddly folds casts into the rhs of copysign. Test that they get
+; unfolded.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
+
+; CHECK-LABEL: fold_promote:
+; CHECK: f64.promote/f32 $push0=, $1{{$}}
+; CHECK: f64.copysign    $push1=, $0, $pop0{{$}}
+define double @fold_promote(double %a, float %b) {
+  %c = fpext float %b to double
+  %t = call double @copysign(double %a, double %c)
+  ret double %t
+}
+
+; CHECK-LABEL: fold_demote:{{$}}
+; CHECK: f32.demote/f64  $push0=, $1{{$}}
+; CHECK: f32.copysign    $push1=, $0, $pop0{{$}}
+define float @fold_demote(float %a, double %b) {
+  %c = fptrunc double %b to float
+  %t = call float @copysignf(float %a, float %c)
+  ret float %t
+}
diff --git a/test/CodeGen/WebAssembly/cpus.ll b/test/CodeGen/WebAssembly/cpus.ll
new file mode 100644
index 000000000000..2b77c5f475c8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/cpus.ll
@@ -0,0 +1,17 @@
+; This tests that llc accepts all valid WebAssembly CPUs.
+
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+
+; CHECK-NOT: {{.*}} is not a recognized processor for this target
+; INVALID: {{.*}} is not a recognized processor for this target
+
+define i32 @f(i32 %i_like_the_web) {
+  ret i32 %i_like_the_web
+}
diff --git a/test/CodeGen/WebAssembly/dead-vreg.ll b/test/CodeGen/WebAssembly/dead-vreg.ll
new file mode 100644
index 000000000000..b03e1569fde6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/dead-vreg.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Check that unused vregs aren't assigned registers.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+define void @foo(i32* nocapture %a, i32 %w, i32 %h) {
+; CHECK-LABEL: foo:
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .local i32, i32, i32, i32, i32, i32, i32{{$}}
+entry:
+  %cmp.19 = icmp sgt i32 %h, 0
+  br i1 %cmp.19, label %for.cond.1.preheader.lr.ph, label %for.end.7
+
+for.cond.1.preheader.lr.ph:
+  %cmp2.17 = icmp sgt i32 %w, 0
+  br label %for.cond.1.preheader
+
+for.cond.1.preheader:
+  %y.020 = phi i32 [ 0, %for.cond.1.preheader.lr.ph ], [ %inc6, %for.inc.5 ]
+  br i1 %cmp2.17, label %for.body.3.lr.ph, label %for.inc.5
+
+for.body.3.lr.ph:
+  %mul4 = mul nsw i32 %y.020, %w
+  br label %for.body.3
+
+for.body.3:
+  %x.018 = phi i32 [ 0, %for.body.3.lr.ph ], [ %inc, %for.body.3 ]
+  %mul = mul nsw i32 %x.018, %y.020
+  %add = add nsw i32 %x.018, %mul4
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %add
+  store i32 %mul, i32* %arrayidx, align 4
+  %inc = add nuw nsw i32 %x.018, 1
+  %exitcond = icmp eq i32 %inc, %w
+  br i1 %exitcond, label %for.inc.5.loopexit, label %for.body.3
+
+for.inc.5.loopexit:
+  br label %for.inc.5
+
+for.inc.5:
+  %inc6 = add nuw nsw i32 %y.020, 1
+  %exitcond22 = icmp eq i32 %inc6, %h
+  br i1 %exitcond22, label %for.end.7.loopexit, label %for.cond.1.preheader
+
+for.end.7.loopexit:
+  br label %for.end.7
+
+for.end.7:
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/f32.ll b/test/CodeGen/WebAssembly/f32.ll
new file mode 100644
index 000000000000..777010064cdb
--- /dev/null
+++ b/test/CodeGen/WebAssembly/f32.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit floating-point operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare float @llvm.fabs.f32(float)
+declare float @llvm.copysign.f32(float, float)
+declare float @llvm.sqrt.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.fma.f32(float, float, float)
+
+; CHECK-LABEL: fadd32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fadd32(float %x, float %y) {
+  %a = fadd float %x, %y
+  ret float %a
+}
+
+; CHECK-LABEL: fsub32:
+; CHECK: f32.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fsub32(float %x, float %y) {
+  %a = fsub float %x, %y
+  ret float %a
+}
+
+; CHECK-LABEL: fmul32:
+; CHECK: f32.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fmul32(float %x, float %y) {
+  %a = fmul float %x, %y
+  ret float %a
+}
+
+; CHECK-LABEL: fdiv32:
+; CHECK: f32.div $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fdiv32(float %x, float %y) {
+  %a = fdiv float %x, %y
+  ret float %a
+}
+
+; CHECK-LABEL: fabs32:
+; CHECK: f32.abs $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fabs32(float %x) {
+  %a = call float @llvm.fabs.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: fneg32:
+; CHECK: f32.neg $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fneg32(float %x) {
+  %a = fsub float -0., %x
+  ret float %a
+}
+
+; CHECK-LABEL: copysign32:
+; CHECK: f32.copysign $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @copysign32(float %x, float %y) {
+  %a = call float @llvm.copysign.f32(float %x, float %y)
+  ret float %a
+}
+
+; CHECK-LABEL: sqrt32:
+; CHECK: f32.sqrt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @sqrt32(float %x) {
+  %a = call float @llvm.sqrt.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: ceil32:
+; CHECK: f32.ceil $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @ceil32(float %x) {
+  %a = call float @llvm.ceil.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: floor32:
+; CHECK: f32.floor $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @floor32(float %x) {
+  %a = call float @llvm.floor.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: trunc32:
+; CHECK: f32.trunc $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @trunc32(float %x) {
+  %a = call float @llvm.trunc.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: nearest32:
+; CHECK: f32.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @nearest32(float %x) {
+  %a = call float @llvm.nearbyint.f32(float %x)
+  ret float %a
+}
+
+; CHECK-LABEL: nearest32_via_rint:
+; CHECK: f32.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @nearest32_via_rint(float %x) {
+  %a = call float @llvm.rint.f32(float %x)
+  ret float %a
+}
+
+; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
+; cases where there's a single fcmp with a select and it can prove that one
+; of the arms is never NaN, so we only test that case. In the future if LLVM
+; learns to form fminnan/fmaxnan in more cases, we can write more general
+; tests.
+
+; CHECK-LABEL: fmin32:
+; CHECK: f32.min $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define float @fmin32(float %x) {
+  %a = fcmp ult float %x, 0.0
+  %b = select i1 %a, float %x, float 0.0
+  ret float %b
+}
+
+; CHECK-LABEL: fmax32:
+; CHECK: f32.max $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define float @fmax32(float %x) {
+  %a = fcmp ugt float %x, 0.0
+  %b = select i1 %a, float %x, float 0.0
+  ret float %b
+}
+
+; CHECK-LABEL: fma32:
+; CHECK: {{^}} f32.call $push0=, fmaf, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @fma32(float %a, float %b, float %c) {
+  %d = call float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %d
+}
diff --git a/test/CodeGen/WebAssembly/f64.ll b/test/CodeGen/WebAssembly/f64.ll
new file mode 100644
index 000000000000..302ee79389b3
--- /dev/null
+++ b/test/CodeGen/WebAssembly/f64.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit floating-point operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare double @llvm.fabs.f64(double)
+declare double @llvm.copysign.f64(double, double)
+declare double @llvm.sqrt.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.floor.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.nearbyint.f64(double)
+declare double @llvm.rint.f64(double)
+declare double @llvm.fma.f64(double, double, double)
+
+; CHECK-LABEL: fadd64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fadd64(double %x, double %y) {
+  %a = fadd double %x, %y
+  ret double %a
+}
+
+; CHECK-LABEL: fsub64:
+; CHECK: f64.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fsub64(double %x, double %y) {
+  %a = fsub double %x, %y
+  ret double %a
+}
+
+; CHECK-LABEL: fmul64:
+; CHECK: f64.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fmul64(double %x, double %y) {
+  %a = fmul double %x, %y
+  ret double %a
+}
+
+; CHECK-LABEL: fdiv64:
+; CHECK: f64.div $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fdiv64(double %x, double %y) {
+  %a = fdiv double %x, %y
+  ret double %a
+}
+
+; CHECK-LABEL: fabs64:
+; CHECK: f64.abs $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fabs64(double %x) {
+  %a = call double @llvm.fabs.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: fneg64:
+; CHECK: f64.neg $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fneg64(double %x) {
+  %a = fsub double -0., %x
+  ret double %a
+}
+
+; CHECK-LABEL: copysign64:
+; CHECK: f64.copysign $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @copysign64(double %x, double %y) {
+  %a = call double @llvm.copysign.f64(double %x, double %y)
+  ret double %a
+}
+
+; CHECK-LABEL: sqrt64:
+; CHECK: f64.sqrt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @sqrt64(double %x) {
+  %a = call double @llvm.sqrt.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: ceil64:
+; CHECK: f64.ceil $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @ceil64(double %x) {
+  %a = call double @llvm.ceil.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: floor64:
+; CHECK: f64.floor $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @floor64(double %x) {
+  %a = call double @llvm.floor.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: trunc64:
+; CHECK: f64.trunc $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @trunc64(double %x) {
+  %a = call double @llvm.trunc.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: nearest64:
+; CHECK: f64.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @nearest64(double %x) {
+  %a = call double @llvm.nearbyint.f64(double %x)
+  ret double %a
+}
+
+; CHECK-LABEL: nearest64_via_rint:
+; CHECK: f64.nearest $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @nearest64_via_rint(double %x) {
+  %a = call double @llvm.rint.f64(double %x)
+  ret double %a
+}
+
+; Min and max tests. LLVM currently only forms fminnan and fmaxnan nodes in
+; cases where there's a single fcmp with a select and it can prove that one
+; of the arms is never NaN, so we only test that case. In the future if LLVM
+; learns to form fminnan/fmaxnan in more cases, we can write more general
+; tests.
+
+; CHECK-LABEL: fmin64:
+; CHECK: f64.min $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define double @fmin64(double %x) {
+  %a = fcmp ult double %x, 0.0
+  %b = select i1 %a, double %x, double 0.0
+  ret double %b
+}
+
+; CHECK-LABEL: fmax64:
+; CHECK: f64.max $push1=, $0, $pop0{{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define double @fmax64(double %x) {
+  %a = fcmp ugt double %x, 0.0
+  %b = select i1 %a, double %x, double 0.0
+  ret double %b
+}
+
+; CHECK-LABEL: fma64:
+; CHECK: {{^}} f64.call $push0=, fma, $0, $1, $2{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @fma64(double %a, double %b, double %c) {
+  %d = call double @llvm.fma.f64(double %a, double %b, double %c)
+  ret double %d
+}
diff --git a/test/CodeGen/WebAssembly/fast-isel.ll b/test/CodeGen/WebAssembly/fast-isel.ll
new file mode 100644
index 000000000000..07d78c1415e5
--- /dev/null
+++ b/test/CodeGen/WebAssembly/fast-isel.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -asm-verbose=false \
+; RUN:   -fast-isel -fast-isel-abort=1 -verify-machineinstrs \
+; RUN:   | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; This tests very minimal fast-isel functionality.
+
+; CHECK-LABEL: immediate_f32:
+; CHECK: f32.const $push{{[0-9]+}}=, 0x1.4p1{{$}}
+define float @immediate_f32() {
+  ret float 2.5
+}
+
+; CHECK-LABEL: immediate_f64:
+; CHECK: f64.const $push{{[0-9]+}}=, 0x1.4p1{{$}}
+define double @immediate_f64() {
+  ret double 2.5
+}
diff --git a/test/CodeGen/WebAssembly/frem.ll b/test/CodeGen/WebAssembly/frem.ll
new file mode 100644
index 000000000000..688370313b48
--- /dev/null
+++ b/test/CodeGen/WebAssembly/frem.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the frem instruction works.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: frem32:
+; CHECK-NEXT: .param f32, f32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: {{^}} f32.call $push0=, fmodf, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define float @frem32(float %x, float %y) {
+  %a = frem float %x, %y
+  ret float %a
+}
+
+; CHECK-LABEL: frem64:
+; CHECK-NEXT: .param f64, f64{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: {{^}} f64.call $push0=, fmod, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define double @frem64(double %x, double %y) {
+  %a = frem double %x, %y
+  ret double %a
+}
diff --git a/test/CodeGen/WebAssembly/func.ll b/test/CodeGen/WebAssembly/func.ll
new file mode 100644
index 000000000000..6f42dc744ac7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/func.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic functions assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: f0:
+; CHECK: return{{$}}
+; CHECK: .size f0,
+define void @f0() {
+  ret void
+}
+
+; CHECK-LABEL: f1:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+; CHECK: .size f1,
+define i32 @f1() {
+  ret i32 0
+}
+
+; CHECK-LABEL: f2:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+; CHECK: .size f2,
+define i32 @f2(i32 %p1, float %p2) {
+  ret i32 0
+}
+
+; CHECK-LABEL: f3:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NOT: local
+; CHECK-NEXT: return{{$}}
+; CHECK: .size f3,
+define void @f3(i32 %p1, float %p2) {
+  ret void
+}
+
+; CHECK-LABEL: f4:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: local
+define i32 @f4(i32 %x) {
+entry:
+   %c = trunc i32 %x to i1
+   br i1 %c, label %true, label %false
+true:
+   ret i32 0
+false:
+   ret i32 1
+}
+
+; CHECK-LABEL: f5:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: unreachable
+define float @f5()  {
+ unreachable
+}
diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll
new file mode 100644
index 000000000000..5f149ed067c8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/global.ll
@@ -0,0 +1,177 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that globals assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-NOT: llvm.used
+; CHECK-NOT: llvm.metadata
+@llvm.used = appending global [1 x i32*] [i32* @g], section "llvm.metadata"
+
+; CHECK: foo:
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK-NEXT: i32.load $push1=, answer($pop0){{$}}
+; CHECK-NEXT: return $pop1{{$}}
+define i32 @foo() {
+  %a = load i32, i32* @answer
+  ret i32 %a
+}
+
+; CHECK-LABEL: call_memcpy:
+; CHECK-NEXT: .param          i32, i32, i32{{$}}
+; CHECK-NEXT: .result         i32{{$}}
+; CHECK-NEXT: call            memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: return          $0{{$}}
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
+  ret i8* %p
+}
+
+; CHECK: .type   g,@object
+; CHECK: .align  2{{$}}
+; CHECK-NEXT: g:
+; CHECK-NEXT: .int32 1337{{$}}
+; CHECK-NEXT: .size g, 4{{$}}
+@g = private global i32 1337
+
+; CHECK-LABEL: ud:
+; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .size ud, 4{{$}}
+@ud = internal global i32 undef
+
+; CHECK: .type nil,@object
+; CHECK-NEXT: .lcomm nil,4,2{{$}}
+@nil = internal global i32 zeroinitializer
+
+; CHECK: .type z,@object
+; CHECK-NEXT: .lcomm z,4,2{{$}}
+@z = internal global i32 0
+
+; CHECK-NEXT: .type one,@object
+; CHECK-NEXT: .align 2{{$}}
+; CHECK-NEXT: one:
+; CHECK-NEXT: .int32 1{{$}}
+; CHECK-NEXT: .size one, 4{{$}}
+@one = internal global i32 1
+
+; CHECK: .type answer,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: answer:
+; CHECK-NEXT: .int32 42{{$}}
+; CHECK-NEXT: .size answer, 4{{$}}
+@answer = internal global i32 42
+
+; CHECK: .type u32max,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: u32max:
+; CHECK-NEXT: .int32 4294967295{{$}}
+; CHECK-NEXT: .size u32max, 4{{$}}
+@u32max = internal global i32 -1
+
+; CHECK: .type ud64,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: ud64:
+; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .size ud64, 8{{$}}
+@ud64 = internal global i64 undef
+
+; CHECK: .type nil64,@object
+; CHECK: .lcomm nil64,8,3{{$}}
+@nil64 = internal global i64 zeroinitializer
+
+; CHECK: .type z64,@object
+; CHECK: .lcomm z64,8,3{{$}}
+@z64 = internal global i64 0
+
+; CHECK: .type twoP32,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: twoP32:
+; CHECK-NEXT: .int64 4294967296{{$}}
+; CHECK-NEXT: .size twoP32, 8{{$}}
+@twoP32 = internal global i64 4294967296
+
+; CHECK: .type u64max,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: u64max:
+; CHECK-NEXT: .int64 -1{{$}}
+; CHECK-NEXT: .size u64max, 8{{$}}
+@u64max = internal global i64 -1
+
+; CHECK: .type f32ud,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: f32ud:
+; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .size f32ud, 4{{$}}
+@f32ud = internal global float undef
+
+; CHECK: .type f32nil,@object
+; CHECK: .lcomm f32nil,4,2{{$}}
+@f32nil = internal global float zeroinitializer
+
+; CHECK: .type f32z,@object
+; CHECK: .lcomm f32z,4,2{{$}}
+@f32z = internal global float 0.0
+
+; CHECK: .type f32nz,@object
+; CHECK: .align 2{{$}}
+; CHECK: f32nz:
+; CHECK: .int32 2147483648{{$}}
+; CHECK: .size f32nz, 4{{$}}
+@f32nz = internal global float -0.0
+
+; CHECK: .type f32two,@object
+; CHECK: .align 2{{$}}
+; CHECK-NEXT: f32two:
+; CHECK-NEXT: .int32 1073741824{{$}}
+; CHECK-NEXT: .size f32two, 4{{$}}
+@f32two = internal global float 2.0
+
+; CHECK: .type f64ud,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64ud:
+; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .size f64ud, 8{{$}}
+@f64ud = internal global double undef
+
+; CHECK: .type f64nil,@object
+; CHECK: .lcomm f64nil,8,3{{$}}
+@f64nil = internal global double zeroinitializer
+
+; CHECK: .type f64z,@object
+; CHECK: .lcomm f64z,8,3{{$}}
+@f64z = internal global double 0.0
+
+; CHECK: .type f64nz,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64nz:
+; CHECK-NEXT: .int64 -9223372036854775808{{$}}
+; CHECK-NEXT: .size f64nz, 8{{$}}
+@f64nz = internal global double -0.0
+
+; CHECK: .type f64two,@object
+; CHECK: .align 3{{$}}
+; CHECK-NEXT: f64two:
+; CHECK-NEXT: .int64 4611686018427387904{{$}}
+; CHECK-NEXT: .size f64two, 8{{$}}
+@f64two = internal global double 2.0
+
+; Indexing into a global array produces a relocation.
+; CHECK:      .type arr,@object
+; CHECK:      .type ptr,@object
+; CHECK:      ptr:
+; CHECK-NEXT: .int32 arr+80
+; CHECK-NEXT: .size ptr, 4
+@arr = global [128 x i32] zeroinitializer, align 16
+@ptr = global i32* getelementptr inbounds ([128 x i32], [128 x i32]* @arr, i32 0, i32 20), align 4
+
+; Constant global.
+; CHECK: .type    rom,@object{{$}}
+; CHECK: .section .rodata,"a",@progbits{{$}}
+; CHECK: .globl   rom{{$}}
+; CHECK: .align   4{{$}}
+; CHECK: rom:
+; CHECK: .zero    512{{$}}
+; CHECK: .size    rom, 512{{$}}
+@rom = constant [128 x i32] zeroinitializer, align 16
diff --git a/test/CodeGen/WebAssembly/globl.ll b/test/CodeGen/WebAssembly/globl.ll
new file mode 100644
index 000000000000..a5dc028c1db4
--- /dev/null
+++ b/test/CodeGen/WebAssembly/globl.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK: .globl foo
+; CHECK-LABEL: foo:
+define void @foo() {
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/i32.ll b/test/CodeGen/WebAssembly/i32.ll
new file mode 100644
index 000000000000..ab29b0472bf2
--- /dev/null
+++ b/test/CodeGen/WebAssembly/i32.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 32-bit integer operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i32 @llvm.ctpop.i32(i32)
+
+; CHECK-LABEL: add32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @add32(i32 %x, i32 %y) {
+  %a = add i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: sub32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sub32(i32 %x, i32 %y) {
+  %a = sub i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: mul32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @mul32(i32 %x, i32 %y) {
+  %a = mul i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: sdiv32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.div_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sdiv32(i32 %x, i32 %y) {
+  %a = sdiv i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: udiv32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.div_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @udiv32(i32 %x, i32 %y) {
+  %a = udiv i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: srem32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.rem_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @srem32(i32 %x, i32 %y) {
+  %a = srem i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: urem32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.rem_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @urem32(i32 %x, i32 %y) {
+  %a = urem i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: and32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.and $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @and32(i32 %x, i32 %y) {
+  %a = and i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: or32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.or $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @or32(i32 %x, i32 %y) {
+  %a = or i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: xor32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.xor $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @xor32(i32 %x, i32 %y) {
+  %a = xor i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: shl32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shl $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @shl32(i32 %x, i32 %y) {
+  %a = shl i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: shr32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shr_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @shr32(i32 %x, i32 %y) {
+  %a = lshr i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: sar32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.shr_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sar32(i32 %x, i32 %y) {
+  %a = ashr i32 %x, %y
+  ret i32 %a
+}
+
+; CHECK-LABEL: clz32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @clz32(i32 %x) {
+  %a = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
+  ret i32 %a
+}
+
+; CHECK-LABEL: clz32_zero_undef:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @clz32_zero_undef(i32 %x) {
+  %a = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
+  ret i32 %a
+}
+
+; CHECK-LABEL: ctz32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @ctz32(i32 %x) {
+  %a = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  ret i32 %a
+}
+
+; CHECK-LABEL: ctz32_zero_undef:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @ctz32_zero_undef(i32 %x) {
+  %a = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  ret i32 %a
+}
+
+; CHECK-LABEL: popcnt32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.popcnt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @popcnt32(i32 %x) {
+  %a = call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %a
+}
diff --git a/test/CodeGen/WebAssembly/i64.ll b/test/CodeGen/WebAssembly/i64.ll
new file mode 100644
index 000000000000..769f74266754
--- /dev/null
+++ b/test/CodeGen/WebAssembly/i64.ll
@@ -0,0 +1,190 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic 64-bit integer operations assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctpop.i64(i64)
+
+; CHECK-LABEL: add64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.add $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @add64(i64 %x, i64 %y) {
+  %a = add i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: sub64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.sub $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sub64(i64 %x, i64 %y) {
+  %a = sub i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: mul64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.mul $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @mul64(i64 %x, i64 %y) {
+  %a = mul i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: sdiv64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.div_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sdiv64(i64 %x, i64 %y) {
+  %a = sdiv i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: udiv64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.div_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @udiv64(i64 %x, i64 %y) {
+  %a = udiv i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: srem64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.rem_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @srem64(i64 %x, i64 %y) {
+  %a = srem i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: urem64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.rem_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @urem64(i64 %x, i64 %y) {
+  %a = urem i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: and64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.and $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @and64(i64 %x, i64 %y) {
+  %a = and i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: or64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.or $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @or64(i64 %x, i64 %y) {
+  %a = or i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: xor64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.xor $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @xor64(i64 %x, i64 %y) {
+  %a = xor i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: shl64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shl $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @shl64(i64 %x, i64 %y) {
+  %a = shl i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: shr64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shr_u $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @shr64(i64 %x, i64 %y) {
+  %a = lshr i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: sar64:
+; CHECK-NEXT: .param i64, i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.shr_s $push0=, $0, $1{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sar64(i64 %x, i64 %y) {
+  %a = ashr i64 %x, %y
+  ret i64 %a
+}
+
+; CHECK-LABEL: clz64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @clz64(i64 %x) {
+  %a = call i64 @llvm.ctlz.i64(i64 %x, i1 false)
+  ret i64 %a
+}
+
+; CHECK-LABEL: clz64_zero_undef:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.clz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @clz64_zero_undef(i64 %x) {
+  %a = call i64 @llvm.ctlz.i64(i64 %x, i1 true)
+  ret i64 %a
+}
+
+; CHECK-LABEL: ctz64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @ctz64(i64 %x) {
+  %a = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+  ret i64 %a
+}
+
+; CHECK-LABEL: ctz64_zero_undef:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.ctz $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @ctz64_zero_undef(i64 %x) {
+  %a = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  ret i64 %a
+}
+
+; CHECK-LABEL: popcnt64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.popcnt $push0=, $0{{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @popcnt64(i64 %x) {
+  %a = call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %a
+}
diff --git a/test/CodeGen/WebAssembly/ident.ll b/test/CodeGen/WebAssembly/ident.ll
new file mode 100644
index 000000000000..1e0dc2aa6725
--- /dev/null
+++ b/test/CodeGen/WebAssembly/ident.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test llvm.ident.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK: .ident "hello world"
+
+!llvm.ident = !{!0}
+
+!0 = !{!"hello world"}
diff --git a/test/CodeGen/WebAssembly/immediates.ll b/test/CodeGen/WebAssembly/immediates.ll
new file mode 100644
index 000000000000..abab11f2254e
--- /dev/null
+++ b/test/CodeGen/WebAssembly/immediates.ll
@@ -0,0 +1,198 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic immediates assemble as expected.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: zero_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @zero_i32() {
+  ret i32 0
+}
+
+; CHECK-LABEL: one_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @one_i32() {
+  ret i32 1
+}
+
+; CHECK-LABEL: max_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, 2147483647{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @max_i32() {
+  ret i32 2147483647
+}
+
+; CHECK-LABEL: min_i32:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM:[0-9]+]]=, -2147483648{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @min_i32() {
+  ret i32 -2147483648
+}
+
+; CHECK-LABEL: zero_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @zero_i64() {
+  ret i64 0
+}
+
+; CHECK-LABEL: one_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @one_i64() {
+  ret i64 1
+}
+
+; CHECK-LABEL: max_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, 9223372036854775807{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @max_i64() {
+  ret i64 9223372036854775807
+}
+
+; CHECK-LABEL: min_i64:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.const $push[[NUM:[0-9]+]]=, -9223372036854775808{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @min_i64() {
+  ret i64 -9223372036854775808
+}
+
+; CHECK-LABEL: negzero_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @negzero_f32() {
+  ret float -0.0
+}
+
+; CHECK-LABEL: zero_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @zero_f32() {
+  ret float 0.0
+}
+
+; CHECK-LABEL: one_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x1p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @one_f32() {
+  ret float 1.0
+}
+
+; CHECK-LABEL: two_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, 0x1p1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @two_f32() {
+  ret float 2.0
+}
+
+; CHECK-LABEL: nan_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @nan_f32() {
+  ret float 0x7FF8000000000000
+}
+
+; CHECK-LABEL: negnan_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @negnan_f32() {
+  ret float 0xFFF8000000000000
+}
+
+; CHECK-LABEL: inf_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @inf_f32() {
+  ret float 0x7FF0000000000000
+}
+
+; CHECK-LABEL: neginf_f32:
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.const $push[[NUM:[0-9]+]]=, -infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @neginf_f32() {
+  ret float 0xFFF0000000000000
+}
+
+; CHECK-LABEL: negzero_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @negzero_f64() {
+  ret double -0.0
+}
+
+; CHECK-LABEL: zero_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x0p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @zero_f64() {
+  ret double 0.0
+}
+
+; CHECK-LABEL: one_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x1p0{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @one_f64() {
+  ret double 1.0
+}
+
+; CHECK-LABEL: two_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, 0x1p1{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @two_f64() {
+  ret double 2.0
+}
+
+; CHECK-LABEL: nan_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @nan_f64() {
+  ret double 0x7FF8000000000000
+}
+
+; CHECK-LABEL: negnan_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -nan{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @negnan_f64() {
+  ret double 0xFFF8000000000000
+}
+
+; CHECK-LABEL: inf_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @inf_f64() {
+  ret double 0x7FF0000000000000
+}
+
+; CHECK-LABEL: neginf_f64:
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.const $push[[NUM:[0-9]+]]=, -infinity{{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @neginf_f64() {
+  ret double 0xFFF0000000000000
+}
diff --git a/test/CodeGen/WebAssembly/inline-asm.ll b/test/CodeGen/WebAssembly/inline-asm.ll
new file mode 100644
index 000000000000..fc066c4b812f
--- /dev/null
+++ b/test/CodeGen/WebAssembly/inline-asm.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test basic inline assembly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: foo:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = aaa($0){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @foo(i32 %r) {
+entry:
+  %0 = tail call i32 asm sideeffect "# $0 = aaa($1)", "=r,r"(i32 %r) #0, !srcloc !0
+  ret i32 %0
+}
+
+; CHECK-LABEL: bar:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # 0($1) = bbb(0($0)){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return{{$}}
+define void @bar(i32* %r, i32* %s) {
+entry:
+  tail call void asm sideeffect "# $0 = bbb($1)", "=*m,*m"(i32* %s, i32* %r) #0, !srcloc !1
+  ret void
+}
+
+; CHECK-LABEL: imm:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = ccc(42){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @imm() {
+entry:
+  %0 = tail call i32 asm sideeffect "# $0 = ccc($1)", "=r,i"(i32 42) #0, !srcloc !2
+  ret i32 %0
+}
+
+; CHECK-LABEL: foo_i64:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: #APP{{$}}
+; CHECK-NEXT: # $0 = aaa($0){{$}}
+; CHECK-NEXT: #NO_APP{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i64 @foo_i64(i64 %r) {
+entry:
+  %0 = tail call i64 asm sideeffect "# $0 = aaa($1)", "=r,r"(i64 %r) #0, !srcloc !0
+  ret i64 %0
+}
+
+; CHECK-LABEL: X_i16:
+; CHECK: foo $1{{$}}
+; CHECK: i32.store16 $discard=, 0($0), $1{{$}}
+define void @X_i16(i16 * %t) {
+  call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %t)
+  ret void
+}
+
+; CHECK-LABEL: X_ptr:
+; CHECK: foo $1{{$}}
+; CHECK: i32.store $discard=, 0($0), $1{{$}}
+define void @X_ptr(i16 ** %t) {
+  call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"(i16** %t)
+  ret void
+}
+
+; CHECK-LABEL: funcname:
+; CHECK: foo funcname{{$}}
+define void @funcname() {
+  tail call void asm sideeffect "foo $0", "i"(void ()* nonnull @funcname) #0, !srcloc !0
+  ret void
+}
+
+; CHECK-LABEL: varname:
+; CHECK: foo gv+37{{$}}
+@gv = global [0 x i8] zeroinitializer
+define void @varname() {
+  tail call void asm sideeffect "foo $0", "i"(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @gv, i64 0, i64 37)) #0, !srcloc !0
+  ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{i32 47}
+!1 = !{i32 145}
+!2 = !{i32 231}
diff --git a/test/CodeGen/WebAssembly/legalize.ll b/test/CodeGen/WebAssembly/legalize.ll
new file mode 100644
index 000000000000..e780b2ee36ca
--- /dev/null
+++ b/test/CodeGen/WebAssembly/legalize.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test various types and operators that need to be legalized.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: shl_i3:
+; CHECK: i32.const   $push0=, 7{{$}}
+; CHECK: i32.and     $push1=, $1, $pop0{{$}}
+; CHECK: i32.shl     $push2=, $0, $pop1{{$}}
+define i3 @shl_i3(i3 %a, i3 %b, i3* %p) {
+  %t = shl i3 %a, %b
+  ret i3 %t
+}
+
+; CHECK-LABEL: shl_i53:
+; CHECK: i64.const   $push0=, 9007199254740991{{$}}
+; CHECK: i64.and     $push1=, $1, $pop0{{$}}
+; CHECK: i64.shl     $push2=, $0, $pop1{{$}}
+define i53 @shl_i53(i53 %a, i53 %b, i53* %p) {
+  %t = shl i53 %a, %b
+  ret i53 %t
+}
+
+; CHECK-LABEL: sext_in_reg_i32_i64:
+; CHECK: i64.shl
+; CHECK: i64.shr_s
+define i64 @sext_in_reg_i32_i64(i64 %a) {
+  %b = shl i64 %a, 32
+  %c = ashr i64 %b, 32
+  ret i64 %c
+}
+
+; CHECK-LABEL: fpext_f32_f64:
+; CHECK: f32.load $push0=, 0($0){{$}}
+; CHECK: f64.promote/f32 $push1=, $pop0{{$}}
+; CHECK: return $pop1{{$}}
+define double @fpext_f32_f64(float *%p) {
+  %v = load float, float* %p
+  %e = fpext float %v to double
+  ret double %e
+}
+
+; CHECK-LABEL: fpconv_f64_f32:
+; CHECK: f64.load $push0=, 0($0){{$}}
+; CHECK: f32.demote/f64 $push1=, $pop0{{$}}
+; CHECK: return $pop1{{$}}
+define float @fpconv_f64_f32(double *%p) {
+  %v = load double, double* %p
+  %e = fptrunc double %v to float
+  ret float %e
+}
+
+; Check that big shifts work. This generates a big pile of code from the
+; legalizer; the main thing here is that we don't abort.
+
+; CHECK-LABEL: bigshift:
+define i1024 @bigshift(i1024 %a, i1024 %b) {
+    %c = shl i1024 %a, %b
+    ret i1024 %c
+}
diff --git a/test/CodeGen/WebAssembly/load-ext.ll b/test/CodeGen/WebAssembly/load-ext.ll
new file mode 100644
index 000000000000..0ffcd38a8666
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load-ext.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that extending loads are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: sext_i8_i32:
+; CHECK: i32.load8_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sext_i8_i32(i8 *%p) {
+  %v = load i8, i8* %p
+  %e = sext i8 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: zext_i8_i32:
+; CHECK: i32.load8_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @zext_i8_i32(i8 *%p) {
+  %v = load i8, i8* %p
+  %e = zext i8 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: sext_i16_i32:
+; CHECK: i32.load16_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @sext_i16_i32(i16 *%p) {
+  %v = load i16, i16* %p
+  %e = sext i16 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: zext_i16_i32:
+; CHECK: i32.load16_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @zext_i16_i32(i16 *%p) {
+  %v = load i16, i16* %p
+  %e = zext i16 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: sext_i8_i64:
+; CHECK: i64.load8_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i8_i64(i8 *%p) {
+  %v = load i8, i8* %p
+  %e = sext i8 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: zext_i8_i64:
+; CHECK: i64.load8_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @zext_i8_i64(i8 *%p) {
+  %v = load i8, i8* %p
+  %e = zext i8 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: sext_i16_i64:
+; CHECK: i64.load16_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i16_i64(i16 *%p) {
+  %v = load i16, i16* %p
+  %e = sext i16 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: zext_i16_i64:
+; CHECK: i64.load16_u $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @zext_i16_i64(i16 *%p) {
+  %v = load i16, i16* %p
+  %e = zext i16 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: sext_i32_i64:
+; CHECK: i64.load32_s $push0=, 0($0){{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @sext_i32_i64(i32 *%p) {
+  %v = load i32, i32* %p
+  %e = sext i32 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: zext_i32_i64:
+; CHECK: i64.load32_u $push0=, 0($0){{$}}
+; CHECK: return $pop0{{$}}
+define i64 @zext_i32_i64(i32 *%p) {
+  %v = load i32, i32* %p
+  %e = zext i32 %v to i64
+  ret i64 %e
+}
diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll
new file mode 100644
index 000000000000..37b514729479
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load-store-i1.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that i1 extending loads and truncating stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: load_u_i1_i32:
+; CHECK:      i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
+define i32 @load_u_i1_i32(i1* %p) {
+  %v = load i1, i1* %p
+  %e = zext i1 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: load_s_i1_i32:
+; CHECK:      i32.const $[[NUM1:[0-9]+]]=, 31{{$}}
+; CHECK-NEXT: i32.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define i32 @load_s_i1_i32(i1* %p) {
+  %v = load i1, i1* %p
+  %e = sext i1 %v to i32
+  ret i32 %e
+}
+
+; CHECK-LABEL: load_u_i1_i64:
+; CHECK:      i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM0]]{{$}}
+define i64 @load_u_i1_i64(i1* %p) {
+  %v = load i1, i1* %p
+  %e = zext i1 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: load_s_i1_i64:
+; CHECK:      i64.const $[[NUM1:[0-9]+]]=, 63{{$}}
+; CHECK-NEXT: i64.load8_u $push[[NUM0:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: shl $push[[NUM2:[0-9]+]]=, $pop[[NUM0]], $[[NUM1]]{{$}}
+; CHECK-NEXT: shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define i64 @load_s_i1_i64(i1* %p) {
+  %v = load i1, i1* %p
+  %e = sext i1 %v to i64
+  ret i64 %e
+}
+
+; CHECK-LABEL: store_i32_i1:
+; CHECK:      i32.const $push[[NUM0:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: i32.store8 $discard=, 0($0), $pop[[NUM1]]{{$}}
+define void @store_i32_i1(i1* %p, i32 %v) {
+  %t = trunc i32 %v to i1
+  store i1 %t, i1* %p
+  ret void
+}
+
+; CHECK-LABEL: store_i64_i1:
+; CHECK:      i64.const $push[[NUM0:[0-9]+]]=, 1{{$}}
+; CHECK-NEXT: i64.and $push[[NUM1:[0-9]+]]=, $1, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: i64.store8 $discard=, 0($0), $pop[[NUM1]]{{$}}
+define void @store_i64_i1(i1* %p, i64 %v) {
+  %t = trunc i64 %v to i1
+  store i1 %t, i1* %p
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/load.ll b/test/CodeGen/WebAssembly/load.ll
new file mode 100644
index 000000000000..aa8ae689e0d1
--- /dev/null
+++ b/test/CodeGen/WebAssembly/load.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic loads are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: ldi32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i32 @ldi32(i32 *%p) {
+  %v = load i32, i32* %p
+  ret i32 %v
+}
+
+; CHECK-LABEL: ldi64:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: i64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define i64 @ldi64(i64 *%p) {
+  %v = load i64, i64* %p
+  ret i64 %v
+}
+
+; CHECK-LABEL: ldf32:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f32{{$}}
+; CHECK-NEXT: f32.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define float @ldf32(float *%p) {
+  %v = load float, float* %p
+  ret float %v
+}
+
+; CHECK-LABEL: ldf64:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result f64{{$}}
+; CHECK-NEXT: f64.load $push[[NUM:[0-9]+]]=, 0($0){{$}}
+; CHECK-NEXT: return $pop[[NUM]]{{$}}
+define double @ldf64(double *%p) {
+  %v = load double, double* %p
+  ret double %v
+}
diff --git a/test/CodeGen/WebAssembly/loop-idiom.ll b/test/CodeGen/WebAssembly/loop-idiom.ll
new file mode 100644
index 000000000000..2906df20a229
--- /dev/null
+++ b/test/CodeGen/WebAssembly/loop-idiom.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-idiom -S < %s -march=wasm32 | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+
+; Make sure loop-idiom doesn't create memcpy or memset. These aren't well
+; supported in WebAssembly for now.
+;
+; TODO Check the patterns are recognized once memcpy / memset are supported.
+
+; CHECK-LABEL: @cpy(
+; CHECK-NOT: llvm.memcpy
+; CHECK: load
+; CHECK: store
+define void @cpy(i64 %Size) {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  %DestI = getelementptr i8, i8* %Dest, i64 %indvar
+  %V = load i8, i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @set(
+; CHECK-NOT: llvm.memset
+; CHECK: store
+define void @set(i8* %Base, i64 %Size) {
+bb.nph:
+  br label %for.body
+
+for.body:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8, i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/memory-addr32.ll b/test/CodeGen/WebAssembly/memory-addr32.ll
new file mode 100644
index 000000000000..e2dd556bddc0
--- /dev/null
+++ b/test/CodeGen/WebAssembly/memory-addr32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic memory operations assemble as expected with 32-bit addresses.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare i32 @llvm.wasm.memory.size.i32() nounwind readonly
+declare void @llvm.wasm.grow.memory.i32(i32) nounwind
+
+; CHECK-LABEL: memory_size:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i32 @memory_size() {
+  %a = call i32 @llvm.wasm.memory.size.i32()
+  ret i32 %a
+}
+
+; CHECK-LABEL: grow_memory:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK: grow_memory $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @grow_memory(i32 %n) {
+  call void @llvm.wasm.grow.memory.i32(i32 %n)
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/memory-addr64.ll b/test/CodeGen/WebAssembly/memory-addr64.ll
new file mode 100644
index 000000000000..5de1f2b11cfd
--- /dev/null
+++ b/test/CodeGen/WebAssembly/memory-addr64.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic memory operations assemble as expected with 64-bit addresses.
+
+target datalayout = "e-p:64:64-i64:64-n32:64-S128"
+target triple = "wasm64-unknown-unknown"
+
+declare i64 @llvm.wasm.memory.size.i64() nounwind readonly
+declare void @llvm.wasm.grow.memory.i64(i64) nounwind
+
+; CHECK-LABEL: memory_size:
+; CHECK-NEXT: .result i64{{$}}
+; CHECK-NEXT: memory_size $push0={{$}}
+; CHECK-NEXT: return $pop0{{$}}
+define i64 @memory_size() {
+  %a = call i64 @llvm.wasm.memory.size.i64()
+  ret i64 %a
+}
+
+; CHECK-LABEL: grow_memory:
+; CHECK-NEXT: .param i64{{$}}
+; CHECK: grow_memory $0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @grow_memory(i64 %n) {
+  call void @llvm.wasm.grow.memory.i64(i64 %n)
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/offset-folding.ll b/test/CodeGen/WebAssembly/offset-folding.ll
new file mode 100644
index 000000000000..2b4e8a90b0f0
--- /dev/null
+++ b/test/CodeGen/WebAssembly/offset-folding.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that constant offsets can be folded into global addresses.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; FIXME: make this 'external' and make sure it still works. WebAssembly
+;        currently only supports linking single files, so 'external' makes
+;        little sense.
+@x = global [0 x i32] zeroinitializer
+@y = global [50 x i32] zeroinitializer
+
+; Test basic constant offsets of both defined and external symbols.
+
+; CHECK-LABEL: test0:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, x+188{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test0() {
+  ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 47)
+}
+
+; CHECK-LABEL: test1:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, y+188{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test1() {
+  ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 47)
+}
+
+; Test zero offsets.
+
+; CHECK-LABEL: test2:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, x{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test2() {
+  ret i32* getelementptr ([0 x i32], [0 x i32]* @x, i32 0, i32 0)
+}
+
+; CHECK-LABEL: test3:
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push0=, y{{$}}
+; CHECK=NEXT: return $pop0{{$}}
+define i32* @test3() {
+  ret i32* getelementptr ([50 x i32], [50 x i32]* @y, i32 0, i32 0)
+}
diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll
new file mode 100644
index 000000000000..75a0bc9ab6c6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/offset.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test constant load and store address offsets.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; With an nuw add, we can fold an offset.
+
+; CHECK-LABEL: load_i32_with_folded_offset:
+; CHECK: i32.load  $push0=, 24($0){{$}}
+define i32 @load_i32_with_folded_offset(i32* %p) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; Without nuw, and even with nsw, we can't fold an offset.
+
+; CHECK-LABEL: load_i32_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.load  $push2=, 0($pop1){{$}}
+define i32 @load_i32_with_unfolded_offset(i32* %p) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_folded_offset:
+; CHECK: i64.load  $push0=, 24($0){{$}}
+define i64 @load_i64_with_folded_offset(i64* %p) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with i64.
+
+; CHECK-LABEL: load_i64_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.load  $push2=, 0($pop1){{$}}
+define i64 @load_i64_with_unfolded_offset(i64* %p) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  %t = load i64, i64* %s
+  ret i64 %t
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_folded_offset:
+; CHECK: i32.store $discard=, 24($0), $pop0{{$}}
+define void @store_i32_with_folded_offset(i32* %p) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store.
+
+; CHECK-LABEL: store_i32_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i32.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i32_with_unfolded_offset(i32* %p) {
+  %q = ptrtoint i32* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i32*
+  store i32 0, i32* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_folded_offset:
+; CHECK: i64.store $discard=, 24($0), $pop0{{$}}
+define void @store_i64_with_folded_offset(i64* %p) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  store i64 0, i64* %s
+  ret void
+}
+
+; Same as above but with store with i64.
+
+; CHECK-LABEL: store_i64_with_unfolded_offset:
+; CHECK: i32.const $push0=, 24{{$}}
+; CHECK: i32.add   $push1=, $0, $pop0{{$}}
+; CHECK: i64.store $discard=, 0($pop1), $pop2{{$}}
+define void @store_i64_with_unfolded_offset(i64* %p) {
+  %q = ptrtoint i64* %p to i32
+  %r = add nsw i32 %q, 24
+  %s = inttoptr i32 %r to i64*
+  store i64 0, i64* %s
+  ret void
+}
+
+; When loading from a fixed address, materialize a zero.
+
+; CHECK-LABEL: load_i32_from_numeric_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.load  $push1=, 42($pop0){{$}}
+define i32 @load_i32_from_numeric_address() {
+  %s = inttoptr i32 42 to i32*
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; CHECK-LABEL: load_i32_from_global_address
+; CHECK: i32.const $push0=, 0{{$}}
+; CHECK: i32.load  $push1=, gv($pop0){{$}}
+@gv = global i32 0
+define i32 @load_i32_from_global_address() {
+  %t = load i32, i32* @gv
+  ret i32 %t
+}
+
+; CHECK-LABEL: store_i32_to_numeric_address:
+; CHECK: i32.const $0=, 0{{$}}
+; CHECK: i32.store $discard=, 42($0), $0{{$}}
+define void @store_i32_to_numeric_address() {
+  %s = inttoptr i32 42 to i32*
+  store i32 0, i32* %s
+  ret void
+}
+
+; CHECK-LABEL: store_i32_to_global_address:
+; CHECK: i32.const $0=, 0{{$}}
+; CHECK: i32.store $discard=, gv($0), $0{{$}}
+define void @store_i32_to_global_address() {
+  store i32 0, i32* @gv
+  ret void
+}
+
+; Fold an offset into a sign-extending load.
+
+; CHECK-LABEL: load_i8_s_with_folded_offset:
+; CHECK: i32.load8_s $push0=, 24($0){{$}}
+define i32 @load_i8_s_with_folded_offset(i8* %p) {
+  %q = ptrtoint i8* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i8*
+  %t = load i8, i8* %s
+  %u = sext i8 %t to i32
+  ret i32 %u
+}
+
+; Fold an offset into a zero-extending load.
+
+; CHECK-LABEL: load_i8_u_with_folded_offset:
+; CHECK: i32.load8_u $push0=, 24($0){{$}}
+define i32 @load_i8_u_with_folded_offset(i8* %p) {
+  %q = ptrtoint i8* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i8*
+  %t = load i8, i8* %s
+  %u = zext i8 %t to i32
+  ret i32 %u
+}
+
+; Fold an offset into a truncating store.
+
+; CHECK-LABEL: store_i8_with_folded_offset:
+; CHECK: i32.store8 $discard=, 24($0), $pop0{{$}}
+define void @store_i8_with_folded_offset(i8* %p) {
+  %q = ptrtoint i8* %p to i32
+  %r = add nuw i32 %q, 24
+  %s = inttoptr i32 %r to i8*
+  store i8 0, i8* %s
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/phi.ll b/test/CodeGen/WebAssembly/phi.ll
new file mode 100644
index 000000000000..bae8a7c9e3b8
--- /dev/null
+++ b/test/CodeGen/WebAssembly/phi.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test that phis are lowered.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Basic phi triangle.
+
+; CHECK-LABEL: test0:
+; CHECK: div_s $[[NUM0:[0-9]+]]=, $0, $pop[[NUM1:[0-9]+]]{{$}}
+; CHECK: return $[[NUM0]]{{$}}
+define i32 @test0(i32 %p) {
+entry:
+  %t = icmp slt i32 %p, 0
+  br i1 %t, label %true, label %done
+true:
+  %a = sdiv i32 %p, 3
+  br label %done
+done:
+  %s = phi i32 [ %a, %true ], [ %p, %entry ]
+  ret i32 %s
+}
+
+; Swap phis.
+
+; CHECK-LABEL: test1:
+; CHECK: BB1_1:
+; CHECK: copy_local $[[NUM0:[0-9]+]]=, $[[NUM1:[0-9]+]]{{$}}
+; CHECK: copy_local $[[NUM1]]=, $[[NUM2:[0-9]+]]{{$}}
+; CHECK: copy_local $[[NUM2]]=, $[[NUM0]]{{$}}
+define i32 @test1(i32 %n) {
+entry:
+  br label %loop
+
+loop:
+  %a = phi i32 [ 0, %entry ], [ %b, %loop ]
+  %b = phi i32 [ 1, %entry ], [ %a, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+
+  %i.next = add i32 %i, 1
+  %t = icmp slt i32 %i.next, %n
+  br i1 %t, label %loop, label %exit
+
+exit:
+  ret i32 %a
+}
diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll
new file mode 100644
index 000000000000..1c1b1e193f7a
--- /dev/null
+++ b/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -0,0 +1,126 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test the register stackifier pass.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; No because of pointer aliasing.
+
+; CHECK-LABEL: no0:
+; CHECK: return $1{{$}}
+define i32 @no0(i32* %p, i32* %q) {
+  %t = load i32, i32* %q
+  store i32 0, i32* %p
+  ret i32 %t
+}
+
+; No because of side effects.
+
+; CHECK-LABEL: no1:
+; CHECK: return $1{{$}}
+define i32 @no1(i32* %p, i32* dereferenceable(4) %q) {
+  %t = load volatile i32, i32* %q, !invariant.load !0
+  store volatile i32 0, i32* %p
+  ret i32 %t
+}
+
+; Yes because of invariant load and no side effects.
+
+; CHECK-LABEL: yes0:
+; CHECK: return $pop0{{$}}
+define i32 @yes0(i32* %p, i32* dereferenceable(4) %q) {
+  %t = load i32, i32* %q, !invariant.load !0
+  store i32 0, i32* %p
+  ret i32 %t
+}
+
+; Yes because of no intervening side effects.
+
+; CHECK-LABEL: yes1:
+; CHECK: return $pop0{{$}}
+define i32 @yes1(i32* %q) {
+  %t = load volatile i32, i32* %q
+  ret i32 %t
+}
+
+; Don't schedule stack uses into the stack. To reduce register pressure, the
+; scheduler might be tempted to move the definition of $2 down. However, this
+; would risk getting incorrect liveness if the instructions are later
+; rearranged to make the stack contiguous.
+
+; CHECK-LABEL: stack_uses:
+; CHECK-NEXT: .param i32, i32, i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32, i32{{$}}
+; CHECK-NEXT: i32.const   $5=, 2{{$}}
+; CHECK-NEXT: i32.const   $4=, 1{{$}}
+; CHECK-NEXT: block       BB4_2{{$}}
+; CHECK-NEXT: i32.lt_s    $push0=, $0, $4{{$}}
+; CHECK-NEXT: i32.lt_s    $push1=, $1, $5{{$}}
+; CHECK-NEXT: i32.xor     $push4=, $pop0, $pop1{{$}}
+; CHECK-NEXT: i32.lt_s    $push2=, $2, $4{{$}}
+; CHECK-NEXT: i32.lt_s    $push3=, $3, $5{{$}}
+; CHECK-NEXT: i32.xor     $push5=, $pop2, $pop3{{$}}
+; CHECK-NEXT: i32.xor     $push6=, $pop4, $pop5{{$}}
+; CHECK-NEXT: i32.ne      $push7=, $pop6, $4{{$}}
+; CHECK-NEXT: br_if       $pop7, BB4_2{{$}}
+; CHECK-NEXT: i32.const   $push8=, 0{{$}}
+; CHECK-NEXT: return      $pop8{{$}}
+; CHECK-NEXT: BB4_2:
+; CHECK-NEXT: return      $4{{$}}
+define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
+entry:
+  %c = icmp sle i32 %x, 0
+  %d = icmp sle i32 %y, 1
+  %e = icmp sle i32 %z, 0
+  %f = icmp sle i32 %w, 1
+  %g = xor i1 %c, %d
+  %h = xor i1 %e, %f
+  %i = xor i1 %g, %h
+  br i1 %i, label %true, label %false
+true:
+  ret i32 0
+false:
+  ret i32 1
+}
+
+; Test an interesting case where the load has multiple uses and cannot
+; be trivially stackified.
+
+; CHECK-LABEL: multiple_uses:
+; CHECK-NEXT: .param      i32, i32, i32{{$}}
+; CHECK-NEXT: .local      i32{{$}}
+; CHECK-NEXT: i32.load    $3=, 0($2){{$}}
+; CHECK-NEXT: block       BB5_3{{$}}
+; CHECK-NEXT: i32.ge_u    $push0=, $3, $1{{$}}
+; CHECK-NEXT: br_if       $pop0, BB5_3{{$}}
+; CHECK-NEXT: i32.lt_u    $push1=, $3, $0{{$}}
+; CHECK-NEXT: br_if       $pop1, BB5_3{{$}}
+; CHECK-NEXT: i32.store   $discard=, 0($2), $3{{$}}
+; CHECK-NEXT: BB5_3:
+; CHECK-NEXT: return{{$}}
+define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
+bb:
+  br label %loop
+
+loop:
+  %tmp7 = load i32, i32* %arg2
+  %tmp8 = inttoptr i32 %tmp7 to i32*
+  %tmp9 = icmp uge i32* %tmp8, %arg1
+  %tmp10 = icmp ult i32* %tmp8, %arg0
+  %tmp11 = or i1 %tmp9, %tmp10
+  br i1 %tmp11, label %back, label %then
+
+then:
+  store i32 %tmp7, i32* %arg2
+  br label %back
+
+back:
+  br i1 undef, label %return, label %loop
+
+return:
+  ret void
+}
+
+!0 = !{}
diff --git a/test/CodeGen/WebAssembly/return-int32.ll b/test/CodeGen/WebAssembly/return-int32.ll
new file mode 100644
index 000000000000..663cef4e459d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/return-int32.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: return_i32:
+; CHECK: return $0{{$}}
+define i32 @return_i32(i32 %p) {
+  ret i32 %p
+}
diff --git a/test/CodeGen/WebAssembly/return-void.ll b/test/CodeGen/WebAssembly/return-void.ll
new file mode 100644
index 000000000000..4933bfcb87e6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/return-void.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: return_void:
+; CHECK: return{{$}}
+define void @return_void() {
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll
new file mode 100644
index 000000000000..e208e198c73d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/returned.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the "returned" attribute is optimized effectively.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: _Z3foov:
+; CHECK-NEXT: .result   i32{{$}}
+; CHECK-NEXT: i32.const $push0=, 1{{$}}
+; CHECK-NEXT: {{^}} i32.call      $push1=, _Znwm, $pop0{{$}}
+; CHECK-NEXT: {{^}} i32.call      $push2=, _ZN5AppleC1Ev, $pop1{{$}}
+; CHECK-NEXT: return    $pop2{{$}}
+%class.Apple = type { i8 }
+declare noalias i8* @_Znwm(i32)
+declare %class.Apple* @_ZN5AppleC1Ev(%class.Apple* returned)
+define %class.Apple* @_Z3foov() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i32 1)
+  %0 = bitcast i8* %call to %class.Apple*
+  %call1 = tail call %class.Apple* @_ZN5AppleC1Ev(%class.Apple* %0)
+  ret %class.Apple* %0
+}
+
+; CHECK-LABEL: _Z3barPvS_l:
+; CHECK-NEXT: .param   i32, i32, i32{{$}}
+; CHECK-NEXT: .result  i32{{$}}
+; CHECK-NEXT: {{^}} i32.call     $push0=, memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: return   $pop0{{$}}
+declare i8* @memcpy(i8* returned, i8*, i32)
+define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) {
+entry:
+  %call = tail call i8* @memcpy(i8* %p, i8* %s, i32 %n)
+  ret i8* %p
+}
+
+; Test that the optimization isn't performed on constant arguments.
+
+; CHECK-LABEL: test_constant_arg:
+; CHECK-NEXT: i32.const   $push0=, global{{$}}
+; CHECK-NEXT: {{^}} i32.call        $discard=, returns_arg, $pop0{{$}}
+; CHECK-NEXT: return{{$}}
+@global = external global i32
+@addr = global i32* @global
+define void @test_constant_arg() {
+  %call = call i32* @returns_arg(i32* @global)
+  ret void
+}
+declare i32* @returns_arg(i32* returned)
diff --git a/test/CodeGen/WebAssembly/select.ll b/test/CodeGen/WebAssembly/select.ll
new file mode 100644
index 000000000000..1b1d7aed7154
--- /dev/null
+++ b/test/CodeGen/WebAssembly/select.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s
+
+; Test that wasm select instruction is selected from LLVM select instruction.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: select_i32_bool:
+; CHECK-NEXT: .param     i32, i32, i32{{$}}
+; CHECK-NEXT: .result    i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i32 @select_i32_bool(i1 zeroext %a, i32 %b, i32 %c) {
+  %cond = select i1 %a, i32 %b, i32 %c
+  ret i32 %cond
+}
+
+; CHECK-LABEL: select_i32_eq:
+; CHECK-NEXT: .param     i32, i32, i32{{$}}
+; CHECK-NEXT: .result    i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i32 @select_i32_eq(i32 %a, i32 %b, i32 %c) {
+  %cmp = icmp eq i32 %a, 0
+  %cond = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %cond
+}
+
+; CHECK-LABEL: select_i32_ne:
+; CHECK-NEXT: .param     i32, i32, i32{{$}}
+; CHECK-NEXT: .result    i32{{$}}
+; CHECK-NEXT: i32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i32 @select_i32_ne(i32 %a, i32 %b, i32 %c) {
+  %cmp = icmp ne i32 %a, 0
+  %cond = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %cond
+}
+
+; CHECK-LABEL: select_i64_bool:
+; CHECK-NEXT: .param     i32, i64, i64{{$}}
+; CHECK-NEXT: .result    i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i64 @select_i64_bool(i1 zeroext %a, i64 %b, i64 %c) {
+  %cond = select i1 %a, i64 %b, i64 %c
+  ret i64 %cond
+}
+
+; CHECK-LABEL: select_i64_eq:
+; CHECK-NEXT: .param     i32, i64, i64{{$}}
+; CHECK-NEXT: .result    i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i64 @select_i64_eq(i32 %a, i64 %b, i64 %c) {
+  %cmp = icmp eq i32 %a, 0
+  %cond = select i1 %cmp, i64 %b, i64 %c
+  ret i64 %cond
+}
+
+; CHECK-LABEL: select_i64_ne:
+; CHECK-NEXT: .param     i32, i64, i64{{$}}
+; CHECK-NEXT: .result    i64{{$}}
+; CHECK-NEXT: i64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define i64 @select_i64_ne(i32 %a, i64 %b, i64 %c) {
+  %cmp = icmp ne i32 %a, 0
+  %cond = select i1 %cmp, i64 %b, i64 %c
+  ret i64 %cond
+}
+
+; CHECK-LABEL: select_f32_bool:
+; CHECK-NEXT: .param     i32, f32, f32{{$}}
+; CHECK-NEXT: .result    f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define float @select_f32_bool(i1 zeroext %a, float %b, float %c) {
+  %cond = select i1 %a, float %b, float %c
+  ret float %cond
+}
+
+; CHECK-LABEL: select_f32_eq:
+; CHECK-NEXT: .param     i32, f32, f32{{$}}
+; CHECK-NEXT: .result    f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define float @select_f32_eq(i32 %a, float %b, float %c) {
+  %cmp = icmp eq i32 %a, 0
+  %cond = select i1 %cmp, float %b, float %c
+  ret float %cond
+}
+
+; CHECK-LABEL: select_f32_ne:
+; CHECK-NEXT: .param     i32, f32, f32{{$}}
+; CHECK-NEXT: .result    f32{{$}}
+; CHECK-NEXT: f32.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define float @select_f32_ne(i32 %a, float %b, float %c) {
+  %cmp = icmp ne i32 %a, 0
+  %cond = select i1 %cmp, float %b, float %c
+  ret float %cond
+}
+
+; CHECK-LABEL: select_f64_bool:
+; CHECK-NEXT: .param     i32, f64, f64{{$}}
+; CHECK-NEXT: .result    f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define double @select_f64_bool(i1 zeroext %a, double %b, double %c) {
+  %cond = select i1 %a, double %b, double %c
+  ret double %cond
+}
+
+; CHECK-LABEL: select_f64_eq:
+; CHECK-NEXT: .param     i32, f64, f64{{$}}
+; CHECK-NEXT: .result    f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $2, $1{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define double @select_f64_eq(i32 %a, double %b, double %c) {
+  %cmp = icmp eq i32 %a, 0
+  %cond = select i1 %cmp, double %b, double %c
+  ret double %cond
+}
+
+; CHECK-LABEL: select_f64_ne:
+; CHECK-NEXT: .param     i32, f64, f64{{$}}
+; CHECK-NEXT: .result    f64{{$}}
+; CHECK-NEXT: f64.select $push0=, $0, $1, $2{{$}}
+; CHECK-NEXT: return     $pop0{{$}}
+define double @select_f64_ne(i32 %a, double %b, double %c) {
+  %cmp = icmp ne i32 %a, 0
+  %cond = select i1 %cmp, double %b, double %c
+  ret double %cond
+}
diff --git a/test/CodeGen/WebAssembly/signext-zeroext.ll b/test/CodeGen/WebAssembly/signext-zeroext.ll
new file mode 100644
index 000000000000..40d49af0ccc7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/signext-zeroext.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test zeroext and signext ABI keywords
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: z2s_func:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.const $[[NUM0:[0-9]+]]=, 24{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM2:[0-9]+]]=, $0, $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM3:[0-9]+]]=, $pop[[NUM2]], $[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM3]]{{$}}
+define signext i8 @z2s_func(i8 zeroext %t) {
+  ret i8 %t
+}
+
+; CHECK-LABEL: s2z_func:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM0:[0-9]+]]=, 255{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $0, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM1]]{{$}}
+define zeroext i8 @s2z_func(i8 signext %t) {
+  ret i8 %t
+}
+
+; CHECK-LABEL: z2s_call:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: i32.const $push[[NUM0:[0-9]+]]=, 255{{$}}
+; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $0, $pop[[NUM0]]{{$}}
+; CHECK-NEXT: call $push[[NUM2:[0-9]+]]=, z2s_func, $pop[[NUM1]]{{$}}
+; CHECK-NEXT: return $pop[[NUM2]]{{$}}
+define i32 @z2s_call(i32 %t) {
+  %s = trunc i32 %t to i8
+  %u = call signext i8 @z2s_func(i8 zeroext %s)
+  %v = sext i8 %u to i32
+  ret i32 %v
+}
+
+; CHECK-LABEL: s2z_call:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: i32.const $[[NUM0:[0-9]+]]=, 24{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM1:[0-9]+]]=, $0, $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $[[NUM0]]{{$}}
+; CHECK-NEXT: call $push[[NUM3:[0-9]]]=, s2z_func, $pop[[NUM2]]{{$}}
+; CHECK-NEXT: i32.shl $push[[NUM4:[0-9]+]]=, $pop[[NUM3]], $[[NUM0]]{{$}}
+; CHECK-NEXT: i32.shr_s $push[[NUM5:[0-9]+]]=, $pop[[NUM4]], $[[NUM0]]{{$}}
+; CHECK-NEXT: return $pop[[NUM5]]{{$}}
+define i32 @s2z_call(i32 %t) {
+  %s = trunc i32 %t to i8
+  %u = call zeroext i8 @s2z_func(i8 signext %s)
+  %v = sext i8 %u to i32
+  ret i32 %v
+}
diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll
new file mode 100644
index 000000000000..73479e544db9
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store-results.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that the wasm-store-results pass makes users of stored values use the
+; result of store expressions to reduce get_local/set_local traffic.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: single_block:
+; CHECK-NOT: .local
+; CHECK: i32.const $push{{[0-9]+}}=, 0{{$}}
+; CHECK: i32.store $push[[STORE:[0-9]+]]=, 0($0), $pop{{[0-9]+}}{{$}}
+; CHECK: return $pop[[STORE]]{{$}}
+define i32 @single_block(i32* %p) {
+entry:
+  store i32 0, i32* %p
+  ret i32 0
+}
+
+; Test interesting corner cases for wasm-store-results, in which the operand of
+; a store ends up getting used by a phi, which needs special handling in the
+; dominance test, since phis use their operands on their incoming edges.
+
+%class.Vec3 = type { float, float, float }
+
+@pos = global %class.Vec3 zeroinitializer, align 4
+
+; CHECK-LABEL: foo:
+; CHECK: i32.store $discard=, pos($0), $0{{$}}
+define void @foo() {
+for.body.i:
+  br label %for.body5.i
+
+for.body5.i:
+  %i.0168.i = phi i32 [ 0, %for.body.i ], [ %inc.i, %for.body5.i ]
+  %conv6.i = sitofp i32 %i.0168.i to float
+  store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0)
+  %inc.i = add nuw nsw i32 %i.0168.i, 1
+  %exitcond.i = icmp eq i32 %inc.i, 256
+  br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i
+
+for.cond.cleanup4.i:
+  ret void
+}
+
+; CHECK-LABEL: bar:
+; CHECK: i32.store $discard=, pos($0), $0{{$}}
+define void @bar() {
+for.body.i:
+  br label %for.body5.i
+
+for.body5.i:
+  %i.0168.i = phi float [ 0.0, %for.body.i ], [ %inc.i, %for.body5.i ]
+  store volatile float 0.0, float* getelementptr inbounds (%class.Vec3, %class.Vec3* @pos, i32 0, i32 0)
+  %inc.i = fadd float %i.0168.i, 1.0
+  %exitcond.i = fcmp oeq float %inc.i, 256.0
+  br i1 %exitcond.i, label %for.cond.cleanup4.i, label %for.body5.i
+
+for.cond.cleanup4.i:
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/store-trunc.ll b/test/CodeGen/WebAssembly/store-trunc.ll
new file mode 100644
index 000000000000..c12b716dfd59
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store-trunc.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that truncating stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: trunc_i8_i32:
+; CHECK: i32.store8 $discard=, 0($0), $1{{$}}
+define void @trunc_i8_i32(i8 *%p, i32 %v) {
+  %t = trunc i32 %v to i8
+  store i8 %t, i8* %p
+  ret void
+}
+
+; CHECK-LABEL: trunc_i16_i32:
+; CHECK: i32.store16 $discard=, 0($0), $1{{$}}
+define void @trunc_i16_i32(i16 *%p, i32 %v) {
+  %t = trunc i32 %v to i16
+  store i16 %t, i16* %p
+  ret void
+}
+
+; CHECK-LABEL: trunc_i8_i64:
+; CHECK: i64.store8 $discard=, 0($0), $1{{$}}
+define void @trunc_i8_i64(i8 *%p, i64 %v) {
+  %t = trunc i64 %v to i8
+  store i8 %t, i8* %p
+  ret void
+}
+
+; CHECK-LABEL: trunc_i16_i64:
+; CHECK: i64.store16 $discard=, 0($0), $1{{$}}
+define void @trunc_i16_i64(i16 *%p, i64 %v) {
+  %t = trunc i64 %v to i16
+  store i16 %t, i16* %p
+  ret void
+}
+
+; CHECK-LABEL: trunc_i32_i64:
+; CHECK: i64.store32 $discard=, 0($0), $1{{$}}
+define void @trunc_i32_i64(i32 *%p, i64 %v) {
+  %t = trunc i64 %v to i32
+  store i32 %t, i32* %p
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/store.ll b/test/CodeGen/WebAssembly/store.ll
new file mode 100644
index 000000000000..442caedef3a7
--- /dev/null
+++ b/test/CodeGen/WebAssembly/store.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Test that basic stores are assembled properly.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: sti32:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti32(i32 *%p, i32 %v) {
+  store i32 %v, i32* %p
+  ret void
+}
+
+; CHECK-LABEL: sti64:
+; CHECK-NEXT: .param i32, i64{{$}}
+; CHECK-NEXT: i64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @sti64(i64 *%p, i64 %v) {
+  store i64 %v, i64* %p
+  ret void
+}
+
+; CHECK-LABEL: stf32:
+; CHECK-NEXT: .param i32, f32{{$}}
+; CHECK-NEXT: f32.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @stf32(float *%p, float %v) {
+  store float %v, float* %p
+  ret void
+}
+
+; CHECK-LABEL: stf64:
+; CHECK-NEXT: .param i32, f64{{$}}
+; CHECK-NEXT: f64.store $discard=, 0($0), $1{{$}}
+; CHECK-NEXT: return{{$}}
+define void @stf64(double *%p, double %v) {
+  store double %v, double* %p
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll
new file mode 100644
index 000000000000..7f6f6efff7d6
--- /dev/null
+++ b/test/CodeGen/WebAssembly/switch.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s -asm-verbose=false -disable-block-placement -verify-machineinstrs | FileCheck %s
+
+; Test switch instructions. Block placement is disabled because it reorders
+; the blocks in a way that isn't interesting here.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @foo0()
+declare void @foo1()
+declare void @foo2()
+declare void @foo3()
+declare void @foo4()
+declare void @foo5()
+
+; CHECK-LABEL: bar32:
+; CHECK: block BB0_8{{$}}
+; CHECK: block BB0_7{{$}}
+; CHECK: block BB0_6{{$}}
+; CHECK: block BB0_5{{$}}
+; CHECK: block BB0_4{{$}}
+; CHECK: block BB0_3{{$}}
+; CHECK: block BB0_2{{$}}
+; CHECK: tableswitch {{[^,]*}}, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_5, BB0_6, BB0_7{{$}}
+; CHECK: BB0_2:
+; CHECK:   call foo0
+; CHECK: BB0_3:
+; CHECK:   call foo1
+; CHECK: BB0_4:
+; CHECK:   call foo2
+; CHECK: BB0_5:
+; CHECK:   call foo3
+; CHECK: BB0_6:
+; CHECK:   call foo4
+; CHECK: BB0_7:
+; CHECK:   call foo5
+; CHECK: BB0_8:
+; CHECK:   return{{$}}
+define void @bar32(i32 %n) {
+entry:
+  switch i32 %n, label %sw.epilog [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb
+    i32 4, label %sw.bb
+    i32 5, label %sw.bb
+    i32 6, label %sw.bb
+    i32 7, label %sw.bb.1
+    i32 8, label %sw.bb.1
+    i32 9, label %sw.bb.1
+    i32 10, label %sw.bb.1
+    i32 11, label %sw.bb.1
+    i32 12, label %sw.bb.1
+    i32 13, label %sw.bb.1
+    i32 14, label %sw.bb.1
+    i32 15, label %sw.bb.2
+    i32 16, label %sw.bb.2
+    i32 17, label %sw.bb.2
+    i32 18, label %sw.bb.2
+    i32 19, label %sw.bb.2
+    i32 20, label %sw.bb.2
+    i32 21, label %sw.bb.3
+    i32 22, label %sw.bb.4
+    i32 23, label %sw.bb.5
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo0()
+  br label %sw.epilog
+
+sw.bb.1:                                          ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo1()
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo2()
+  br label %sw.epilog
+
+sw.bb.3:                                          ; preds = %entry
+  tail call void @foo3()
+  br label %sw.epilog
+
+sw.bb.4:                                          ; preds = %entry
+  tail call void @foo4()
+  br label %sw.epilog
+
+sw.bb.5:                                          ; preds = %entry
+  tail call void @foo5()
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb
+  ret void
+}
+
+; CHECK-LABEL: bar64:
+; CHECK: block BB1_8{{$}}
+; CHECK: block BB1_7{{$}}
+; CHECK: block BB1_6{{$}}
+; CHECK: block BB1_5{{$}}
+; CHECK: block BB1_4{{$}}
+; CHECK: block BB1_3{{$}}
+; CHECK: block BB1_2{{$}}
+; CHECK: tableswitch {{[^,]*}}, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_5, BB1_6, BB1_7{{$}}
+; CHECK: BB1_2:
+; CHECK:   call foo0
+; CHECK: BB1_3:
+; CHECK:   call foo1
+; CHECK: BB1_4:
+; CHECK:   call foo2
+; CHECK: BB1_5:
+; CHECK:   call foo3
+; CHECK: BB1_6:
+; CHECK:   call foo4
+; CHECK: BB1_7:
+; CHECK:   call foo5
+; CHECK: BB1_8:
+; CHECK:   return{{$}}
+define void @bar64(i64 %n) {
+entry:
+  switch i64 %n, label %sw.epilog [
+    i64 0, label %sw.bb
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+    i64 3, label %sw.bb
+    i64 4, label %sw.bb
+    i64 5, label %sw.bb
+    i64 6, label %sw.bb
+    i64 7, label %sw.bb.1
+    i64 8, label %sw.bb.1
+    i64 9, label %sw.bb.1
+    i64 10, label %sw.bb.1
+    i64 11, label %sw.bb.1
+    i64 12, label %sw.bb.1
+    i64 13, label %sw.bb.1
+    i64 14, label %sw.bb.1
+    i64 15, label %sw.bb.2
+    i64 16, label %sw.bb.2
+    i64 17, label %sw.bb.2
+    i64 18, label %sw.bb.2
+    i64 19, label %sw.bb.2
+    i64 20, label %sw.bb.2
+    i64 21, label %sw.bb.3
+    i64 22, label %sw.bb.4
+    i64 23, label %sw.bb.5
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo0()
+  br label %sw.epilog
+
+sw.bb.1:                                          ; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo1()
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry, %entry, %entry, %entry, %entry, %entry
+  tail call void @foo2()
+  br label %sw.epilog
+
+sw.bb.3:                                          ; preds = %entry
+  tail call void @foo3()
+  br label %sw.epilog
+
+sw.bb.4:                                          ; preds = %entry
+  tail call void @foo4()
+  br label %sw.epilog
+
+sw.bb.5:                                          ; preds = %entry
+  tail call void @foo5()
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb.5, %sw.bb.4, %sw.bb.3, %sw.bb.2, %sw.bb.1, %sw.bb
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/unreachable.ll b/test/CodeGen/WebAssembly/unreachable.ll
new file mode 100644
index 000000000000..414767e5c35d
--- /dev/null
+++ b/test/CodeGen/WebAssembly/unreachable.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel -verify-machineinstrs | FileCheck %s
+
+; Test that LLVM unreachable instruction and trap intrinsic are lowered to
+; wasm unreachable
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+declare void @llvm.trap()
+declare void @llvm.debugtrap()
+declare void @abort()
+
+; CHECK-LABEL: f1:
+; CHECK: call abort
+; CHECK: unreachable
+define i32 @f1() {
+  call void @abort()
+  unreachable
+}
+
+; CHECK-LABEL: f2:
+; CHECK: unreachable
+define void @f2() {
+  call void @llvm.trap()
+  ret void
+}
+
+; CHECK-LABEL: f3:
+; CHECK: unreachable
+define void @f3() {
+  call void @llvm.debugtrap()
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/unused-argument.ll b/test/CodeGen/WebAssembly/unused-argument.ll
new file mode 100644
index 000000000000..e7851b216cb4
--- /dev/null
+++ b/test/CodeGen/WebAssembly/unused-argument.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+
+; Make sure that argument offsets are correct even if some arguments are unused.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: unused_first:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: return $1{{$}}
+define i32 @unused_first(i32 %x, i32 %y) {
+  ret i32 %y
+}
+
+; CHECK-LABEL: unused_second:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .result i32{{$}}
+; CHECK-NEXT: return $0{{$}}
+define i32 @unused_second(i32 %x, i32 %y) {
+  ret i32 %x
+}
+
+; CHECK-LABEL: call_something:
+; CHECK-NEXT: {{^}} i32.call $discard=, return_something{{$}}
+; CHECK-NEXT: return{{$}}
+declare i32 @return_something()
+define void @call_something() {
+  call i32 @return_something()
+  ret void
+}
diff --git a/test/CodeGen/WebAssembly/userstack.ll b/test/CodeGen/WebAssembly/userstack.ll
new file mode 100644
index 000000000000..6e01e36cf9fa
--- /dev/null
+++ b/test/CodeGen/WebAssembly/userstack.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s
+
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; CHECK-LABEL: alloca32:
+; Check that there is an extra local for the stack pointer.
+; CHECK: .local i32, i32, i32, i32{{$}}
+define void @alloca32() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %retval = alloca i32
+ ; CHECK: i32.const $push[[L3:.+]]=, 0
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ store i32 0, i32* %retval
+ ; CHECK: i32.const [[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
+ ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ret void
+}
+
+; CHECK-LABEL: alloca3264:
+; CHECK: .local i32, i32, i32, i32{{$}}
+define void @alloca3264() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 16
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %r1 = alloca i32
+ %r2 = alloca double
+ ; CHECK: i32.const $push[[L3:.+]]=, 0
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ store i32 0, i32* %r1
+ ; CHECK: i64.const $push[[L4:.+]]=, 0
+ ; CHECK: i64.store {{.*}}=, 0([[SP]]), $pop[[L4]]
+ store double 0.0, double* %r2
+ ; CHECK: i32.const [[L4:.+]]=, 16
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L4]]
+ ; CHECK-NEXT: i32.const [[L5:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L5]]), [[SP]]
+ ret void
+}
+
+; CHECK-LABEL: allocarray:
+; CHECK: .local i32, i32, i32, i32, i32, i32{{$}}
+define void @allocarray() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 32
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %r = alloca [5 x i32]
+ ; CHECK: i32.const $push[[L3:.+]]=, 1
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ %p = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 0
+ store i32 1, i32* %p
+ ; CHECK: i32.const $push[[L4:.+]]=, 4
+ ; CHECK: i32.const [[L5:.+]]=, 12
+ ; CHECK: i32.add [[L5]]=, [[SP]], [[L5]]
+ ; CHECK: i32.add $push[[L6:.+]]=, [[L5]], $pop[[L4]]
+ ; CHECK: i32.store {{.*}}=, 0($pop[[L6]]), ${{.+}}
+ %p2 = getelementptr [5 x i32], [5 x i32]* %r, i32 0, i32 1
+ store i32 1, i32* %p2
+ ; CHECK: i32.const [[L7:.+]]=, 32
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]
+ ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L7]]), [[SP]]
+ ret void
+}
+
+define void @dynamic_alloca(i32 %alloc) {
+ ; TODO: Support frame pointers
+ ;%r = alloca i32, i32 %alloc
+ ;store i32 0, i32* %r
+ ret void
+}
+; TODO: test aligned alloc
diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll
new file mode 100644
index 000000000000..c564d9420742
--- /dev/null
+++ b/test/CodeGen/WebAssembly/varargs.ll
@@ -0,0 +1,123 @@
+; RUN: llc < %s -asm-verbose=false -verify-machineinstrs | FileCheck %s
+
+; Test varargs constructs.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+; Test va_start.
+
+; TODO: Test va_start.
+
+;define void @start(i8** %ap, ...) {
+;entry:
+;  %0 = bitcast i8** %ap to i8*
+;  call void @llvm.va_start(i8* %0)
+;  ret void
+;}
+
+; Test va_end.
+
+; CHECK-LABEL: end:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK-NEXT: return{{$}}
+define void @end(i8** %ap) {
+entry:
+  %0 = bitcast i8** %ap to i8*
+  call void @llvm.va_end(i8* %0)
+  ret void
+}
+
+; Test va_copy.
+
+; CHECK-LABEL: copy:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: i32.load  $push0=, 0($1){{$}}
+; CHECK-NEXT: i32.store $discard=, 0($0), $pop0{{$}}
+; CHECK-NEXT: return{{$}}
+define void @copy(i8** %ap, i8** %bp) {
+entry:
+  %0 = bitcast i8** %ap to i8*
+  %1 = bitcast i8** %bp to i8*
+  call void @llvm.va_copy(i8* %0, i8* %1)
+  ret void
+}
+
+; Test va_arg with an i8 argument.
+
+; CHECK-LABEL: arg_i8:
+; CHECK-NEXT: .param     i32{{$}}
+; CHECK-NEXT: .result    i32{{$}}
+; CHECK-NEXT: .local     i32{{$}}
+; CHECK-NEXT: i32.load   $1=, 0($0){{$}}
+; CHECK-NEXT: i32.const  $push0=, 4{{$}}
+; CHECK-NEXT: i32.add    $push1=, $1, $pop0{{$}}
+; CHECK-NEXT: i32.store  $discard=, 0($0), $pop1{{$}}
+; CHECK-NEXT: i32.load   $push2=, 0($1){{$}}
+; CHECK-NEXT: return     $pop2{{$}}
+define i8 @arg_i8(i8** %ap) {
+entry:
+  %t = va_arg i8** %ap, i8
+  ret i8 %t
+}
+
+; Test va_arg with an i32 argument.
+
+; CHECK-LABEL: arg_i32:
+; CHECK-NEXT: .param     i32{{$}}
+; CHECK-NEXT: .result    i32{{$}}
+; CHECK-NEXT: .local     i32{{$}}
+; CHECK-NEXT: i32.load   $push0=, 0($0){{$}}
+; CHECK-NEXT: i32.const  $push1=, 3{{$}}
+; CHECK-NEXT: i32.add    $push2=, $pop0, $pop1{{$}}
+; CHECK-NEXT: i32.const  $push3=, -4{{$}}
+; CHECK-NEXT: i32.and    $1=, $pop2, $pop3{{$}}
+; CHECK-NEXT: i32.const  $push4=, 4{{$}}
+; CHECK-NEXT: i32.add    $push5=, $1, $pop4{{$}}
+; CHECK-NEXT: i32.store  $discard=, 0($0), $pop5{{$}}
+; CHECK-NEXT: i32.load   $push6=, 0($1){{$}}
+; CHECK-NEXT: return     $pop6{{$}}
+define i32 @arg_i32(i8** %ap) {
+entry:
+  %t = va_arg i8** %ap, i32
+  ret i32 %t
+}
+
+; Test va_arg with an i128 argument.
+
+; CHECK-LABEL: arg_i128:
+; CHECK-NEXT: .param i32, i32{{$}}
+; CHECK-NEXT: .local
+; CHECK: i32.and
+; CHECK: i64.load
+; CHECK: i64.load
+; CHECK: return{{$}}
+define i128 @arg_i128(i8** %ap) {
+entry:
+  %t = va_arg i8** %ap, i128
+  ret i128 %t
+}
+
+; Test a varargs call with no actual arguments.
+
+declare void @callee(...)
+
+; CHECK-LABEL: caller_none:
+; CHECK-NEXT: call callee{{$}}
+; CHECK-NEXT: return{{$}}
+define void @caller_none() {
+  call void (...) @callee()
+  ret void
+}
+
+; CHECK-LABEL: caller_some
+define void @caller_some() {
+  ; TODO: Fix interaction between register coalescer and reg stackifier,
+  ; or disable coalescer.
+  ;call void (...) @callee(i32 0, double 2.0)
+  ret void
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
diff --git a/test/CodeGen/WebAssembly/vtable.ll b/test/CodeGen/WebAssembly/vtable.ll
new file mode 100644
index 000000000000..38298bc474b5
--- /dev/null
+++ b/test/CodeGen/WebAssembly/vtable.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=TYPEINFONAME
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=VTABLE
+; RUN: llc < %s -asm-verbose=false | FileCheck %s --check-prefix=TYPEINFO
+
+; Test that simple vtables assemble as expected.
+;
+; The class hierarchy is:
+;   struct A;
+;   struct B : public A;
+;   struct C : public A;
+;   struct D : public B;
+; Each with a virtual dtor and method foo.
+
+target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target triple = "wasm32-unknown-unknown"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { %struct.A }
+%struct.C = type { %struct.A }
+%struct.D = type { %struct.B }
+
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8*
+
+; TYPEINFONAME-LABEL: _ZTS1A:
+; TYPEINFONAME-NEXT: .asciz "1A"
+@_ZTS1A = constant [3 x i8] c"1A\00"
+; TYPEINFONAME-LABEL: _ZTS1B:
+; TYPEINFONAME-NEXT: .asciz "1B"
+@_ZTS1B = constant [3 x i8] c"1B\00"
+; TYPEINFONAME-LABEL: _ZTS1C:
+; TYPEINFONAME-NEXT: .asciz "1C"
+@_ZTS1C = constant [3 x i8] c"1C\00"
+; TYPEINFONAME-LABEL: _ZTS1D:
+; TYPEINFONAME-NEXT: .asciz "1D"
+@_ZTS1D = constant [3 x i8] c"1D\00"
+
+; VTABLE:       .type _ZTV1A,@object
+; VTABLE-NEXT:  .section .data.rel.ro,"aw",@progbits
+; VTABLE-NEXT:  .globl _ZTV1A
+; VTABLE-LABEL: _ZTV1A:
+; VTABLE-NEXT:  .int32 0
+; VTABLE-NEXT:  .int32 _ZTI1A
+; VTABLE-NEXT:  .int32 _ZN1AD2Ev
+; VTABLE-NEXT:  .int32 _ZN1AD0Ev
+; VTABLE-NEXT:  .int32 _ZN1A3fooEv
+; VTABLE-NEXT:  .size _ZTV1A, 20
+@_ZTV1A = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD0Ev to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 4
+; VTABLE:       .type _ZTV1B,@object
+; VTABLE-NEXT:  .globl _ZTV1B
+; VTABLE-LABEL: _ZTV1B:
+; VTABLE-NEXT:  .int32 0
+; VTABLE-NEXT:  .int32 _ZTI1B
+; VTABLE-NEXT:  .int32 _ZN1AD2Ev
+; VTABLE-NEXT:  .int32 _ZN1BD0Ev
+; VTABLE-NEXT:  .int32 _ZN1B3fooEv
+; VTABLE-NEXT:  .size _ZTV1B, 20
+@_ZTV1B = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.B*)* @_ZN1BD0Ev to i8*), i8* bitcast (void (%struct.B*)* @_ZN1B3fooEv to i8*)], align 4
+; VTABLE:       .type _ZTV1C,@object
+; VTABLE-NEXT:  .globl _ZTV1C
+; VTABLE-LABEL: _ZTV1C:
+; VTABLE-NEXT:  .int32 0
+; VTABLE-NEXT:  .int32 _ZTI1C
+; VTABLE-NEXT:  .int32 _ZN1AD2Ev
+; VTABLE-NEXT:  .int32 _ZN1CD0Ev
+; VTABLE-NEXT:  .int32 _ZN1C3fooEv
+; VTABLE-NEXT:  .size _ZTV1C, 20
+@_ZTV1C = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1C to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.C*)* @_ZN1CD0Ev to i8*), i8* bitcast (void (%struct.C*)* @_ZN1C3fooEv to i8*)], align 4
+; VTABLE:       .type _ZTV1D,@object
+; VTABLE-NEXT:  .globl _ZTV1D
+; VTABLE-LABEL: _ZTV1D:
+; VTABLE-NEXT:  .int32 0
+; VTABLE-NEXT:  .int32 _ZTI1D
+; VTABLE-NEXT:  .int32 _ZN1AD2Ev
+; VTABLE-NEXT:  .int32 _ZN1DD0Ev
+; VTABLE-NEXT:  .int32 _ZN1D3fooEv
+; VTABLE-NEXT:  .size _ZTV1D, 20
+@_ZTV1D = constant [5 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1D to i8*), i8* bitcast (%struct.A* (%struct.A*)* @_ZN1AD2Ev to i8*), i8* bitcast (void (%struct.D*)* @_ZN1DD0Ev to i8*), i8* bitcast (void (%struct.D*)* @_ZN1D3fooEv to i8*)], align 4
+
+; TYPEINFO:       .type _ZTI1A,@object
+; TYPEINFO:       .globl _ZTI1A
+; TYPEINFO-LABEL: _ZTI1A:
+; TYPEINFO-NEXT:  .int32 _ZTVN10__cxxabiv117__class_type_infoE+8
+; TYPEINFO-NEXT:  .int32 _ZTS1A
+; TYPEINFO-NEXT:  .size _ZTI1A, 8
+@_ZTI1A = constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
+; TYPEINFO:       .type _ZTI1B,@object
+; TYPEINFO:       .globl _ZTI1B
+; TYPEINFO-LABEL: _ZTI1B:
+; TYPEINFO-NEXT:  .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT:  .int32 _ZTS1B
+; TYPEINFO-NEXT:  .int32 _ZTI1A
+; TYPEINFO-NEXT:  .size _ZTI1B, 12
+@_ZTI1B = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1B, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*) }
+; TYPEINFO:       .type _ZTI1C,@object
+; TYPEINFO:       .globl _ZTI1C
+; TYPEINFO-LABEL: _ZTI1C:
+; TYPEINFO-NEXT:  .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT:  .int32 _ZTS1C
+; TYPEINFO-NEXT:  .int32 _ZTI1A
+; TYPEINFO-NEXT:  .size _ZTI1C, 12
+@_ZTI1C = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*) }
+; TYPEINFO:       .type _ZTI1D,@object
+; TYPEINFO:       .globl _ZTI1D
+; TYPEINFO-LABEL: _ZTI1D:
+; TYPEINFO-NEXT:  .int32 _ZTVN10__cxxabiv120__si_class_type_infoE+8
+; TYPEINFO-NEXT:  .int32 _ZTS1D
+; TYPEINFO-NEXT:  .int32 _ZTI1B
+; TYPEINFO-NEXT:  .size _ZTI1D, 12
+@_ZTI1D = constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI1B to i8*) }
+
+@g = global i32 0, align 4
+
+define void @_ZN1A3fooEv(%struct.A* %this) {
+entry:
+  store i32 2, i32* @g, align 4
+  ret void
+}
+
+define void @_ZN1B3fooEv(%struct.B* %this) {
+entry:
+  store i32 4, i32* @g, align 4
+  ret void
+}
+
+define void @_ZN1C3fooEv(%struct.C* %this) {
+entry:
+  store i32 6, i32* @g, align 4
+  ret void
+}
+
+define void @_ZN1D3fooEv(%struct.D* %this) {
+entry:
+  store i32 8, i32* @g, align 4
+  ret void
+}
+
+define linkonce_odr void @_ZN1AD0Ev(%struct.A* %this) {
+entry:
+  %0 = bitcast %struct.A* %this to i8*
+  tail call void @_ZdlPv(i8* %0)
+  ret void
+}
+
+define linkonce_odr void @_ZN1BD0Ev(%struct.B* %this) {
+entry:
+  %0 = bitcast %struct.B* %this to i8*
+  tail call void @_ZdlPv(i8* %0)
+  ret void
+}
+
+define linkonce_odr void @_ZN1CD0Ev(%struct.C* %this) {
+entry:
+  %0 = bitcast %struct.C* %this to i8*
+  tail call void @_ZdlPv(i8* %0)
+  ret void
+}
+
+define linkonce_odr %struct.A* @_ZN1AD2Ev(%struct.A* returned %this) {
+entry:
+  ret %struct.A* %this
+}
+
+define linkonce_odr void @_ZN1DD0Ev(%struct.D* %this) {
+entry:
+  %0 = bitcast %struct.D* %this to i8*
+  tail call void @_ZdlPv(i8* %0)
+  ret void
+}
+
+declare void @_ZdlPv(i8*)
diff --git a/test/CodeGen/WinEH/cppeh-alloca-sink.ll b/test/CodeGen/WinEH/cppeh-alloca-sink.ll
deleted file mode 100644
index f215dca2ddd3..000000000000
--- a/test/CodeGen/WinEH/cppeh-alloca-sink.ll
+++ /dev/null
@@ -1,180 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test describes two difficult cases in sinking allocas into child frames.
-; We don't currently do this optimization, but we'll need to tweak these tests
-; when we do.
-
-; This test is based on the following code:
-;
-; // In this case we can sink the alloca from the parent into the catch because
-; // the lifetime is limited to the catch.
-; extern "C" void may_throw();
-; extern "C" void sink_alloca_to_catch() {
-;   try {
-;     may_throw();
-;   } catch (int) {
-;     volatile int only_used_in_catch = 42;
-;   }
-; }
-;
-; // In this case we cannot. The variable should live as long as the parent
-; // frame lives.
-; extern "C" void use_catch_var(int *);
-; extern "C" void dont_sink_alloca_to_catch(int n) {
-;   int live_in_out_catch = 0;
-;   while (n > 0) {
-;     try {
-;       may_throw();
-;     } catch (int) {
-;       use_catch_var(&live_in_out_catch);
-;     }
-;     n--;
-;   }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @may_throw() #1
-declare i32 @__CxxFrameHandler3(...)
-declare i32 @llvm.eh.typeid.for(i8*) #2
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-declare void @llvm.eh.endcatch() #3
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-; Function Attrs: uwtable
-define void @sink_alloca_to_catch() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %0 = alloca i32
-  %only_used_in_catch = alloca i32, align 4
-  invoke void @may_throw()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-  %2 = extractvalue { i8*, i32 } %1, 1
-  %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
-  %matches = icmp eq i32 %2, %3
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %lpad
-  %4 = extractvalue { i8*, i32 } %1, 0
-  call void @llvm.eh.begincatch(i8* %4, i8* null) #3
-  store volatile i32 42, i32* %only_used_in_catch, align 4
-  tail call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %catch
-  ret void
-
-eh.resume:                                        ; preds = %lpad
-  resume { i8*, i32 } %1
-}
-
-; CHECK-LABEL: define void @sink_alloca_to_catch()
-; CHECK: call void (...) @llvm.localescape(i32* %only_used_in_catch)
-
-declare void @use_catch_var(i32*) #1
-
-; Function Attrs: uwtable
-define void @dont_sink_alloca_to_catch(i32 %n) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %0 = alloca i32
-  %n.addr = alloca i32, align 4
-  %live_in_out_catch = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 %n, i32* %n.addr, align 4
-  br label %while.cond
-
-while.cond:                                       ; preds = %try.cont, %entry
-  %1 = load i32, i32* %n.addr, align 4
-  %cmp = icmp sgt i32 %1, 0
-  br i1 %cmp, label %while.body, label %while.end
-
-while.body:                                       ; preds = %while.cond
-  invoke void @may_throw()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %while.body
-  br label %try.cont
-
-lpad:                                             ; preds = %while.body
-  %2 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-  %3 = extractvalue { i8*, i32 } %2, 0
-  store i8* %3, i8** %exn.slot
-  %4 = extractvalue { i8*, i32 } %2, 1
-  store i32 %4, i32* %ehselector.slot
-  br label %catch.dispatch
-
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %5 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
-  %matches = icmp eq i32 %sel, %5
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
-  invoke void @use_catch_var(i32* %live_in_out_catch)
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %invoke.cont
-  %6 = load i32, i32* %0
-  %7 = load i32, i32* %n.addr, align 4
-  %dec = add nsw i32 %7, -1
-  store i32 %dec, i32* %n.addr, align 4
-  br label %while.cond
-
-lpad1:                                            ; preds = %catch
-  %8 = landingpad { i8*, i32 }
-          cleanup
-  %9 = extractvalue { i8*, i32 } %8, 0
-  store i8* %9, i8** %exn.slot
-  %10 = extractvalue { i8*, i32 } %8, 1
-  store i32 %10, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #3
-  br label %eh.resume
-
-while.end:                                        ; preds = %while.cond
-  ret void
-
-eh.resume:                                        ; preds = %lpad1, %catch.dispatch
-  %exn3 = load i8*, i8** %exn.slot
-  %sel4 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
-  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
-  resume { i8*, i32 } %lpad.val5
-}
-
-; CHECK-LABEL: define void @dont_sink_alloca_to_catch(i32 %n)
-; CHECK: call void (...) @llvm.localescape(i32* %live_in_out_catch)
-
-; CHECK-LABEL: define internal i8* @sink_alloca_to_catch.catch(i8*, i8*)
-; CHECK: %only_used_in_catch.i8 = call i8* @llvm.localrecover({{.*}}, i32 0)
-; CHECK: %only_used_in_catch = bitcast
-
-; CHECK-LABEL: define internal i8* @dont_sink_alloca_to_catch.catch(i8*, i8*)
-; CHECK: %live_in_out_catch.i8 = call i8* @llvm.localrecover({{.*}}, i32 0)
-; CHECK: %live_in_out_catch = bitcast
-
-
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all-win32.ll b/test/CodeGen/WinEH/cppeh-catch-all-win32.ll
deleted file mode 100644
index b2e84b90d69f..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-all-win32.ll
+++ /dev/null
@@ -1,86 +0,0 @@
-; RUN: opt -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; extern "C" void may_throw();
-; extern "C" void handle_exception();
-; extern "C" void test() {
-;   try {
-;     may_throw();
-;   } catch (...) {
-;     handle_exception();
-;   }
-; }
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i686-pc-windows-msvc"
-
-; The function entry in this case remains unchanged.
-; CHECK: define void @test()
-; CHECK: entry:
-; CHECK:   invoke void @may_throw()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-define void @test() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  invoke void @may_throw()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* null
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* ()* @test.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %catch
-
-; CHECK-NOT: catch:
-; CHECK-NOT: @handle_exception()
-
-catch:                                            ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
-  call void @handle_exception()
-  call void @llvm.eh.endcatch() #1
-  br label %try.cont
-
-try.cont:                                         ; preds = %catch, %invoke.cont
-  ret void
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @test.catch()
-; CHECK:   call i8* @llvm.frameaddress(i32 1)
-; CHECK:   call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @test to i8*), i8* %{{.*}})
-; CHECK:   call void @handle_exception()
-; CHECK:   ret i8* blockaddress(@test, %try.cont)
-; CHECK: }
-
-
-declare void @may_throw() #0
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #1
-
-declare void @handle_exception() #0
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #1
-
-attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-catch-all.ll b/test/CodeGen/WinEH/cppeh-catch-all.ll
deleted file mode 100644
index 266dd3e305ca..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-all.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-;   try {
-;     may_throw();
-;   } catch (...) {
-;     handle_exception();
-;   }
-; }
-;
-; Parts of the IR have been hand-edited to simplify the test case.
-; The full IR will be restored when Windows C++ EH support is complete.
-
-; ModuleID = 'catch-all.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; The function entry in this case remains unchanged.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK:   invoke void @_Z9may_throwv()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* null
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad:                                             ; preds = %entry
-  %tmp = landingpad { i8*, i32 }
-          catch i8* null
-  %tmp1 = extractvalue { i8*, i32 } %tmp, 0
-  store i8* %tmp1, i8** %exn.slot
-  %tmp2 = extractvalue { i8*, i32 } %tmp, 1
-  store i32 %tmp2, i32* %ehselector.slot
-  br label %catch
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
-  call void @_Z16handle_exceptionv()
-  br label %invoke.cont2
-
-; CHECK-NOT: invoke.cont2:
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %invoke.cont
-  ret void
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK:   call void @_Z16handle_exceptionv()
-; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
-; CHECK: }
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @_Z16handle_exceptionv() #1
-
-declare void @llvm.eh.endcatch()
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { nounwind }
-attributes #4 = { noreturn nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll b/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
deleted file mode 100644
index d604b86deb35..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-and-throw.ll
+++ /dev/null
@@ -1,143 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; class Obj {
-; public:
-;   ~Obj();
-; };
-;
-; void test(void)
-; {
-;   try {
-;     Obj o;
-;     throw 1;
-;   } catch (...) {
-;     throw;
-;   }
-; }
-
-; ModuleID = 'cppeh-catch-and-throw.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%class.Obj = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; This is just a minimal check to verify that main was handled by WinEHPrepare.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape
-; CHECK:   invoke void @_CxxThrowException
-; CHECK: }
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %o = alloca %class.Obj, align 1
-  %tmp = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 1, i32* %tmp
-  %0 = bitcast i32* %tmp to i8*
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #3
-          to label %unreachable unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch i8* null
-  %2 = extractvalue { i8*, i32 } %1, 0
-  store i8* %2, i8** %exn.slot
-  %3 = extractvalue { i8*, i32 } %1, 1
-  store i32 %3, i32* %ehselector.slot
-  call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #2
-  br label %catch
-
-catch:                                            ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #2
-  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #3
-          to label %unreachable unwind label %lpad1
-
-lpad1:                                            ; preds = %catch
-  %4 = landingpad { i8*, i32 }
-          cleanup
-  %5 = extractvalue { i8*, i32 } %4, 0
-  store i8* %5, i8** %exn.slot
-  %6 = extractvalue { i8*, i32 } %4, 1
-  store i32 %6, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-try.cont:                                         ; No predecessors!
-  ret void
-
-eh.resume:                                        ; preds = %lpad1
-  %exn2 = load i8*, i8** %exn.slot
-  %sel = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
-  %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
-  resume { i8*, i32 } %lpad.val3
-
-unreachable:                                      ; preds = %catch, %entry
-  unreachable
-}
-
-; Verify that we inserted a stub invoke into the outlined cleanup handler.
-;
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   call i8* @llvm.localrecover
-; CHECK:   call void @"\01??1Obj@@QEAA@XZ"
-; CHECK:   invoke void @llvm.donothing()
-; CHECK:           to label %[[SPLIT_LABEL:.+]] unwind label %[[LPAD_LABEL:.+]]
-;
-; CHECK: [[SPLIT_LABEL]]
-;
-; CHECK: [[LPAD_LABEL]]
-; CHECK:   landingpad { i8*, i32 }
-; CHECK:           cleanup
-; CHECK:   unreachable
-; CHECK: }
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
-attributes #3 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-scalar.ll b/test/CodeGen/WinEH/cppeh-catch-scalar.ll
deleted file mode 100644
index 3b5ab746d63c..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-scalar.ll
+++ /dev/null
@@ -1,126 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-;   try {
-;     may_throw();
-;   } catch (int i) {
-;     handle_int(i);
-;   }
-; }
-;
-; Parts of the IR have been hand-edited to simplify the test case.
-; The full IR will be restored when Windows C++ EH support is complete.
-
-;ModuleID = 'cppeh-catch-scalar.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@_ZTIi = external constant i8*
-
-; The function entry will be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK:   [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   call void (...) @llvm.localescape(i32* [[I_PTR]])
-; CHECK:   invoke void @_Z9may_throwv()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %i = alloca i32, align 4
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad:                                             ; preds = %entry
-  %tmp = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-  %tmp1 = extractvalue { i8*, i32 } %tmp, 0
-  store i8* %tmp1, i8** %exn.slot
-  %tmp2 = extractvalue { i8*, i32 } %tmp, 1
-  store i32 %tmp2, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch-dispatch:
-
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #3
-  %matches = icmp eq i32 %sel, %tmp3
-  br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %catch.dispatch
-  %exn11 = load i8*, i8** %exn.slot
-  %i.i8 = bitcast i32* %i to i8*
-  call void @llvm.eh.begincatch(i8* %exn11, i8* %i.i8) #3
-  %tmp7 = load i32, i32* %i, align 4
-  call void @_Z10handle_inti(i32 %tmp7)
-  br label %invoke.cont2
-
-; CHECK-NOT: invoke.cont2:
-
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %invoke.cont
-  ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %exn3 = load i8*, i8** %exn.slot
-  %sel4 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn3, 0
-  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
-  resume { i8*, i32 } %lpad.val5
-
-; CHECK: }
-}
-
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK:   [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[TMP:\%.+]] = load i32, i32* [[I_PTR1]], align 4
-; CHECK:   call void @_Z10handle_inti(i32 [[TMP]])
-; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
-; CHECK: }
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @_Z10handle_inti(i32) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 227474) (llvm/trunk 227508)"}
diff --git a/test/CodeGen/WinEH/cppeh-catch-unwind.ll b/test/CodeGen/WinEH/cppeh-catch-unwind.ll
deleted file mode 100644
index 8fdda9bbc02a..000000000000
--- a/test/CodeGen/WinEH/cppeh-catch-unwind.ll
+++ /dev/null
@@ -1,240 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following source:
-;
-; void test() {
-;   try {
-;     SomeClass obj;
-;     may_throw();
-;     try {
-;       may_throw();
-;     } catch (int) {
-;       handle_exception();
-;     }
-;   } catch (int) {
-;     handle_exception();
-;   }
-; }
-;
-; The code above was compiled with the -O2 option.
-
-; ModuleID = 'catch-unwind.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%class.SomeClass = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-
-; CHECK-LABEL: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: entry:
-; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass
-; CHECK:   [[TMP0:\%.+]] = alloca i32, align 4
-; CHECK:   [[TMP1:\%.+]] = alloca i32, align 4
-; CHECK:   call void (...) @llvm.localescape(i32* [[TMP1]], %class.SomeClass* [[OBJ_PTR]], i32* [[TMP0]])
-; CHECK:   %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %obj = alloca %class.SomeClass, align 1
-  %0 = alloca i32, align 4
-  %1 = alloca i32, align 4
-  %call = invoke %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* %obj)
-          to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont:
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-
-invoke.cont:                                      ; preds = %entry
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont2 unwind label %lpad1
-
-; CHECK: invoke.cont2:
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %try.cont unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
-
-invoke.cont2:                                     ; preds = %invoke.cont
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %try.cont unwind label %lpad3
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   [[LPAD_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont15]
-
-lpad:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %3 = extractvalue { i8*, i32 } %2, 0
-  %4 = extractvalue { i8*, i32 } %2, 1
-  br label %catch.dispatch7
-
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %invoke.cont
-; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER1]], [label %try.cont15]
-
-lpad1:                                            ; preds = %invoke.cont
-  %5 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %6 = extractvalue { i8*, i32 } %5, 0
-  %7 = extractvalue { i8*, i32 } %5, 1
-  br label %ehcleanup
-
-; CHECK: [[LPAD3_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
-; CHECK:   [[LPAD3_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 0, void (i8*, i8*)* @"\01?test@@YAXXZ.cleanup")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont15]
-
-lpad3:                                            ; preds = %invoke.cont2
-  %8 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %9 = extractvalue { i8*, i32 } %8, 0
-  %10 = extractvalue { i8*, i32 } %8, 1
-  %11 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches = icmp eq i32 %10, %11
-  br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-catch:                                            ; preds = %lpad3
-  %12 = bitcast i32* %0 to i8*
-  call void @llvm.eh.begincatch(i8* %9, i8* %12) #3
-  invoke void @"\01?handle_exception@@YAXXZ"()
-          to label %invoke.cont6 unwind label %lpad5
-
-; CHECK-NOT: invoke.cont6:
-invoke.cont6:                                     ; preds = %catch
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %invoke.cont6
-  call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
-  br label %try.cont15
-
-; CHECK-NOT: lpad5:
-lpad5:                                            ; preds = %catch
-  %13 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %14 = extractvalue { i8*, i32 } %13, 0
-  %15 = extractvalue { i8*, i32 } %13, 1
-  call void @llvm.eh.endcatch() #3
-  br label %ehcleanup
-
-; CHECK-NOT: ehcleanup
-ehcleanup:                                        ; preds = %lpad5, %lpad3, %lpad1
-  %exn.slot.0 = phi i8* [ %14, %lpad5 ], [ %9, %lpad3 ], [ %6, %lpad1 ]
-  %ehselector.slot.0 = phi i32 [ %15, %lpad5 ], [ %10, %lpad3 ], [ %7, %lpad1 ]
-  call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* %obj) #3
-  br label %catch.dispatch7
-
-; CHECK-NOT: catch.dispatch7:
-catch.dispatch7:                                  ; preds = %ehcleanup, %lpad
-  %exn.slot.1 = phi i8* [ %exn.slot.0, %ehcleanup ], [ %3, %lpad ]
-  %ehselector.slot.1 = phi i32 [ %ehselector.slot.0, %ehcleanup ], [ %4, %lpad ]
-  %16 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches9 = icmp eq i32 %ehselector.slot.1, %16
-  br i1 %matches9, label %catch10, label %eh.resume
-
-; CHECK-NOT: catch10:
-catch10:                                          ; preds = %catch.dispatch7
-  %17 = bitcast i32* %1 to i8*
-  call void @llvm.eh.begincatch(i8* %exn.slot.1, i8* %17) #3
-  call void @"\01?handle_exception@@YAXXZ"()
-  br label %invoke.cont13
-
-; CHECK-NOT: invoke.cont13:
-invoke.cont13:                                    ; preds = %catch10
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont15
-
-try.cont15:                                       ; preds = %invoke.cont13, %try.cont
-  ret void
-
-; CHECK-NOT: eh.resume
-eh.resume:                                        ; preds = %catch.dispatch7
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.1, 0
-  %lpad.val18 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.1, 1
-  resume { i8*, i32 } %lpad.val18
-
-; CHECK: }
-}
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_TMP1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[TMP1_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP1]] to i32*
-; CHECK:   call void @"\01?handle_exception@@YAXXZ"()
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont15)
-; CHECK: }
-
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[OBJ_PTR:\%.+]] = bitcast i8* %obj.i8 to %class.SomeClass*
-; CHECK:   call void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass* [[OBJ_PTR]])
-; CHECK:   ret void
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_TMP0:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[TMP0_PTR:\%.+]] = bitcast i8* [[RECOVER_TMP0]] to i32*
-; CHECK:   invoke void @"\01?handle_exception@@YAXXZ"()
-; CHECK:           to label %invoke.cont6 unwind label %[[LPAD5_LABEL:lpad[0-9]+]]
-;
-; CHECK: invoke.cont6:                                     ; preds = %entry
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-;
-; CHECK: [[LPAD5_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   [[LPAD5_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK:           cleanup
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK: }
-
-declare %class.SomeClass* @"\01??0SomeClass@@QEAA@XZ"(%class.SomeClass* returned) #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_exception@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @"\01??1SomeClass@@QEAA@XZ"(%class.SomeClass*) #4
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll b/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
deleted file mode 100644
index 7e5f659f2a4f..000000000000
--- a/test/CodeGen/WinEH/cppeh-cleanup-invoke.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; Modified based on this code:
-; struct HasDtor {
-;   ~HasDtor();
-; };
-; extern "C" void may_throw();
-; int main() {
-;   try {
-;     HasDtor o;
-;     may_throw();
-;   } catch (int) {
-;   }
-; }
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%struct.HasDtor = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-define i32 @main() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %o = alloca %struct.HasDtor, align 1
-  invoke void @may_throw()
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %invoke.cont
-  call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
-  br label %try.cont
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = extractvalue { i8*, i32 } %0, 1
-  br label %catch.dispatch
-
-lpad1:                                            ; preds = %invoke.cont
-  %3 = landingpad { i8*, i32 }
-          cleanup
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-  %4 = extractvalue { i8*, i32 } %3, 0
-  %5 = extractvalue { i8*, i32 } %3, 1
-  invoke void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %o)
-	  to label %catch.dispatch unwind label %lpad
-
-catch.dispatch:                                   ; preds = %lpad1, %lpad
-  %exn.slot.0 = phi i8* [ %4, %lpad1 ], [ %1, %lpad ]
-  %ehselector.slot.0 = phi i32 [ %5, %lpad1 ], [ %2, %lpad ]
-  %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*))
-  %matches = icmp eq i32 %ehselector.slot.0, %6
-  br i1 %matches, label %catch, label %eh.resume
-
-catch:                                            ; preds = %catch.dispatch
-  call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null)
-  call void @llvm.eh.endcatch()
-  br label %try.cont
-
-try.cont:                                         ; preds = %catch, %invoke.cont2
-  ret i32 0
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
-  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
-  resume { i8*, i32 } %lpad.val5
-}
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: @llvm.eh.actions(i32 0, void (i8*, i8*)* @main.cleanup, i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 -1, i8* (i8*, i8*)* @main.catch)
-
-; CHECK-LABEL: define internal void @main.cleanup(i8*, i8*)
-; CHECK: call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* %{{.*}})
-; CHECK: ret void
-
-declare void @may_throw()
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*)
-
-declare i32 @llvm.eh.typeid.for(i8*)
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture)
-declare void @llvm.eh.endcatch()
diff --git a/test/CodeGen/WinEH/cppeh-demote-liveout.ll b/test/CodeGen/WinEH/cppeh-demote-liveout.ll
deleted file mode 100644
index 309952bfc94b..000000000000
--- a/test/CodeGen/WinEH/cppeh-demote-liveout.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S < %s | FileCheck %s
-
-; Notionally based on this C++ source:
-; int liveout_catch(int p) {
-;   int val = p + 1;
-;   try {
-;     might_throw();
-;   } catch (int) {
-;     val++;
-;   }
-;   return val;
-; }
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-declare void @llvm.eh.endcatch()
-declare void @might_throw()
-declare i32 @__CxxFrameHandler3(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-@typeinfo.int = external global i32
-
-define i32 @liveout_catch(i32 %p) personality i32 (...)* @__CxxFrameHandler3 {
-entry:
-  %val.entry = add i32 %p, 1
-  invoke void @might_throw()
-      to label %ret unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-      cleanup
-      catch i32* @typeinfo.int
-  %ehptr = extractvalue { i8*, i32 } %ehvals, 0
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %int_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32* @typeinfo.int to i8*))
-  %match = icmp eq i32 %sel, %int_sel
-  br i1 %match, label %catchit, label %resume
-
-catchit:
-  call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
-  %val.lpad = add i32 %val.entry, 1
-  call void @llvm.eh.endcatch()
-  br label %ret
-
-ret:
-  %rv = phi i32 [%val.entry, %entry], [%val.lpad, %catchit]
-  ret i32 %rv
-
-resume:
-  resume {i8*, i32} %ehvals
-}
-
-; CHECK-LABEL: define i32 @liveout_catch(i32 %p)
-; CHECK: %val.entry = add i32 %p, 1
-; CHECK-NEXT: store i32 %val.entry, i32* %val.entry.reg2mem
-; CHECK: invoke void @might_throw()
-;
-; CHECK: landingpad
-; CHECK: indirectbr i8* {{.*}}, [label %catchit.split]
-;
-; CHECK: catchit.split:
-; CHECK: load i32, i32* %val.lpad.reg2mem
-; CHECK: br label %ret
-;
-; CHECK: ret:
-; CHECK: %rv = phi i32 [ {{.*}}, %entry ], [ {{.*}}, %catchit.split ]
-; CHECK: ret i32
-
-; CHECK-LABEL: define internal i8* @liveout_catch.catch(i8*, i8*)
-; CHECK: %[[val:[^ ]*]] = load i32, i32*
-; CHECK-NEXT: %[[val_lpad:[^ ]*]] = add i32 %[[val]], 1
-; CHECK-NEXT: store i32 %[[val_lpad]], i32*
-; CHECK: ret i8* blockaddress(@liveout_catch, %catchit.split)
diff --git a/test/CodeGen/WinEH/cppeh-frame-vars.ll b/test/CodeGen/WinEH/cppeh-frame-vars.ll
deleted file mode 100644
index c2dbd8ecab60..000000000000
--- a/test/CodeGen/WinEH/cppeh-frame-vars.ll
+++ /dev/null
@@ -1,272 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; struct SomeData {
-;   int a;
-;   int b;
-; };
-;
-; void may_throw();
-; void does_not_throw(int i);
-; void dump(int *, int, SomeData&);
-;
-; void test() {
-;   int NumExceptions = 0;
-;   int ExceptionVal[10];
-;   SomeData Data = { 0, 0 };
-;
-;   for (int i = 0; i < 10; ++i) {
-;     try {
-;       may_throw();
-;       Data.a += i;
-;     }
-;     catch (int e) {
-;       ExceptionVal[NumExceptions] = e;
-;       ++NumExceptions;
-;       if (e == i)
-;         Data.b += e;
-;       else
-;         Data.a += e;
-;     }
-;     does_not_throw(NumExceptions);
-;   }
-;   dump(ExceptionVal, NumExceptions, Data);
-; }
-
-; ModuleID = 'cppeh-frame-vars.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.SomeData = type { i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   [[NUMEXCEPTIONS_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   [[EXCEPTIONVAL_PTR:\%.+]] = alloca [10 x i32], align 16
-; CHECK:   [[DATA_PTR:\%.+]] = alloca %struct.SomeData, align 4
-; CHECK:   [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   store i32 0, i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK:   [[TMP:\%.+]] = bitcast %struct.SomeData* [[DATA_PTR]] to i8*
-; CHECK:   call void @llvm.memset(i8* [[TMP]], i8 0, i64 8, i32 4, i1 false)
-; CHECK:   store i32 0, i32* [[I_PTR]], align 4
-; CHECK:   call void (...) @llvm.localescape(i32* [[E_PTR]], i32* [[NUMEXCEPTIONS_PTR]], [10 x i32]* [[EXCEPTIONVAL_PTR]], i32* [[I_PTR]], %struct.SomeData* [[DATA_PTR]])
-; CHECK:   br label %for.cond
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %NumExceptions = alloca i32, align 4
-  %ExceptionVal = alloca [10 x i32], align 16
-  %Data = alloca %struct.SomeData, align 4
-  %i = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %e = alloca i32, align 4
-  store i32 0, i32* %NumExceptions, align 4
-  %tmp = bitcast %struct.SomeData* %Data to i8*
-  call void @llvm.memset(i8* %tmp, i8 0, i64 8, i32 4, i1 false)
-  store i32 0, i32* %i, align 4
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.inc, %entry
-  %tmp1 = load i32, i32* %i, align 4
-  %cmp = icmp slt i32 %tmp1, 10
-  br i1 %cmp, label %for.body, label %for.end
-
-; CHECK: for.body:
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-for.body:                                         ; preds = %for.cond
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %for.body
-  %tmp2 = load i32, i32* %i, align 4
-  %a = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-  %tmp3 = load i32, i32* %a, align 4
-  %add = add nsw i32 %tmp3, %tmp2
-  store i32 %add, i32* %a, align 4
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %for.body
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont]
-
-lpad:                                             ; preds = %for.body
-  %tmp4 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %tmp5 = extractvalue { i8*, i32 } %tmp4, 0
-  store i8* %tmp5, i8** %exn.slot
-  %tmp6 = extractvalue { i8*, i32 } %tmp4, 1
-  store i32 %tmp6, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %tmp7 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
-  %matches = icmp eq i32 %sel, %tmp7
-  br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %e.i8 = bitcast i32* %e to i8*
-  call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #1
-  %tmp11 = load i32, i32* %e, align 4
-  %tmp12 = load i32, i32* %NumExceptions, align 4
-  %idxprom = sext i32 %tmp12 to i64
-  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i64 %idxprom
-  store i32 %tmp11, i32* %arrayidx, align 4
-  %tmp13 = load i32, i32* %NumExceptions, align 4
-  %inc = add nsw i32 %tmp13, 1
-  store i32 %inc, i32* %NumExceptions, align 4
-  %tmp14 = load i32, i32* %e, align 4
-  %tmp15 = load i32, i32* %i, align 4
-  %cmp1 = icmp eq i32 %tmp14, %tmp15
-  br i1 %cmp1, label %if.then, label %if.else
-
-; CHECK-NOT: if.then:
-
-if.then:                                          ; preds = %catch
-  %tmp16 = load i32, i32* %e, align 4
-  %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 1
-  %tmp17 = load i32, i32* %b, align 4
-  %add2 = add nsw i32 %tmp17, %tmp16
-  store i32 %add2, i32* %b, align 4
-  br label %if.end
-
-; CHECK-NOT: if.else:
-
-if.else:                                          ; preds = %catch
-  %tmp18 = load i32, i32* %e, align 4
-  %a3 = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-  %tmp19 = load i32, i32* %a3, align 4
-  %add4 = add nsw i32 %tmp19, %tmp18
-  store i32 %add4, i32* %a3, align 4
-  br label %if.end
-
-; CHECK-NOT: if.end:
-
-if.end:                                           ; preds = %if.else, %if.then
-  call void @llvm.eh.endcatch() #1
-  br label %try.cont
-
-try.cont:                                         ; preds = %if.end, %invoke.cont
-  %tmp20 = load i32, i32* %NumExceptions, align 4
-  call void @"\01?does_not_throw@@YAXH@Z"(i32 %tmp20)
-  br label %for.inc
-
-for.inc:                                          ; preds = %try.cont
-  %tmp21 = load i32, i32* %i, align 4
-  %inc5 = add nsw i32 %tmp21, 1
-  store i32 %inc5, i32* %i, align 4
-  br label %for.cond
-
-for.end:                                          ; preds = %for.cond
-  %tmp22 = load i32, i32* %NumExceptions, align 4
-  %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i32 0, i32 0
-  call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %tmp22, %struct.SomeData* dereferenceable(8) %Data)
-  ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %exn6 = load i8*, i8** %exn.slot
-  %sel7 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn6, 0
-  %lpad.val8 = insertvalue { i8*, i32 } %lpad.val, i32 %sel7, 1
-  resume { i8*, i32 } %lpad.val8
-
-; CHECK: }
-}
-
-; The following catch handler should be outlined.
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[E_PTR1:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK:   [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[NUMEXCEPTIONS_PTR1:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
-; CHECK:   [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[EXCEPTIONVAL_PTR1:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
-; CHECK:   [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[RECOVER_DATA:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
-; CHECK:   [[DATA_PTR1:\%.+]] = bitcast i8* [[RECOVER_DATA]] to %struct.SomeData*
-; CHECK:   [[TMP:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK:   [[IDXPROM:\%.+]] = sext i32 [[TMP1]] to i64
-; CHECK:   [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL_PTR1]], i32 0, i64 [[IDXPROM]]
-; CHECK:   store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
-; CHECK:   [[TMP2:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_PTR1]], align 4
-; CHECK:   [[INC:\%.+]] = add nsw i32 [[TMP2]], 1
-; CHECK:   store i32 [[INC]], i32* [[NUMEXCEPTIONS_PTR]], align 4
-; CHECK:   [[TMP3:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK:   [[TMP4:\%.+]] = load i32, i32* [[I_PTR1]], align 4
-; CHECK:   [[CMP:\%.+]] = icmp eq i32 [[TMP3]], [[TMP4]]
-; CHECK:   br i1 [[CMP]], label %if.then, label %if.else
-;
-; CHECK: if.then:                                          ; preds = %entry
-; CHECK:   [[TMP5:\%.+]] = load i32, i32* [[E_PTR1]], align 4
-; CHECK:   [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[DATA_PTR1]], i32 0, i32 1
-; CHECK:   [[TMP6:\%.+]] = load i32, i32* [[B_PTR]], align 4
-; CHECK:   %add2 = add nsw i32 [[TMP6]], [[TMP5]]
-; CHECK:   store i32 [[ADD:\%.+]], i32* [[B_PTR]], align 4
-; CHECK:   br label %if.end
-;
-; CHECK: if.else:                                          ; preds = %entry
-; CHECK:   [[TMP7:\%.+]] = load i32, i32* %e, align 4
-; CHECK:   [[A3:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* %Data, i32 0, i32 0
-; CHECK:   [[TMP8:\%.+]] = load i32, i32* %a3, align 4
-; CHECK:   [[ADD1:\%.+]] = add nsw i32 [[TMP8]], [[TMP7]]
-; CHECK:   store i32 [[ADD1]], i32* [[A3]], align 4
-; CHECK:   br label %if.end
-;
-; CHECK: if.end:                                           ; preds = %if.else, %if.then
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-; CHECK: }
-
-
-; Function Attrs: nounwind
-declare void @llvm.memset(i8* nocapture, i8, i64, i32, i1) #1
-
-declare void @"\01?may_throw@@YAXXZ"() #2
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
-
-declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-inalloca.ll b/test/CodeGen/WinEH/cppeh-inalloca.ll
deleted file mode 100644
index 649c5e72e2dd..000000000000
--- a/test/CodeGen/WinEH/cppeh-inalloca.ll
+++ /dev/null
@@ -1,194 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is built from the following code:
-; struct A {
-;   A(int a);
-;   A(const A &o);
-;   ~A();
-;   int a;
-; };
-;
-; void may_throw();
-;
-; int test(A a) {
-;   try {
-;     may_throw();
-;   }
-;   catch (int e) {
-;     return a.a + e;
-;   }
-;   return 0;
-; }
-;
-; The test was built for a 32-bit Windows target and then the reference to
-; the inalloca instruction was manually sunk into the landingpad.
-
-; ModuleID = 'cppeh-inalloca.cpp'
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.A = type { i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca)
-; CHECK: entry:
-; CHECK:   [[TMP_REGMEM:\%.+]] = alloca <{ %struct.A }>*
-; CHECK:   store <{ %struct.A }>* %0, <{ %struct.A }>** [[TMP_REGMEM]]
-; CHECK:   [[RETVAL:\%.+]] = alloca i32, align 4
-; CHECK:   [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   [[CLEANUP_SLOT:\%.+]] = alloca i32
-; CHECK:   call void (...) @llvm.localescape(i32* %e, <{ %struct.A }>** [[TMP_REGMEM]], i32* [[RETVAL]], i32* [[CLEANUP_SLOT]])
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-define i32 @"\01?test@@YAHUA@@@Z"(<{ %struct.A }>* inalloca) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %retval = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %e = alloca i32, align 4
-  %cleanup.dest.slot = alloca i32
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%recover.*]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAHUA@@@Z.catch", i32 0, void (i8*, i8*)* @"\01?test@@YAHUA@@@Z.cleanup")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %cleanup]
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %2 = extractvalue { i8*, i32 } %1, 0
-  store i8* %2, i8** %exn.slot
-  %3 = extractvalue { i8*, i32 } %1, 1
-  store i32 %3, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %4 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches = icmp eq i32 %sel, %4
-  br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %e.i8 = bitcast i32* %e to i8*
-  call void @llvm.eh.begincatch(i8* %exn, i8* %e.i8) #3
-  %a = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
-  %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0
-  %tmp8 = load i32, i32* %a1, align 4
-  %tmp9 = load i32, i32* %e, align 4
-  %add = add nsw i32 %tmp8, %tmp9
-  store i32 %add, i32* %retval
-  store i32 1, i32* %cleanup.dest.slot
-  call void @llvm.eh.endcatch() #3
-  br label %cleanup
-
-try.cont:                                         ; preds = %invoke.cont
-  store i32 0, i32* %retval
-  store i32 1, i32* %cleanup.dest.slot
-  br label %cleanup
-
-; The cleanup block should be re-written like this.
-; CHECK: cleanup:{{[ ]+}}; preds = %[[LPAD_LABEL]], %try.cont
-; CHECK:   %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
-; CHECK:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2)
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[RETVAL]]
-; CHECK:   ret i32 [[TMP1]]
-
-cleanup:                                          ; preds = %try.cont, %catch
-  %a2 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
-  call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a2) #3
-  %tmp10 = load i32, i32* %retval
-  ret i32 %tmp10
-
-; CHECK-NOT: ehcleanup:
-
-ehcleanup:                                        ; preds = %catch.dispatch
-  %a3 = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* %0, i32 0, i32 0
-  call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %a3) #3
-  br label %eh.resume
-
-; CHECK-NOT: eh.resume:
-
-eh.resume:                                        ; preds = %ehcleanup
-  %exn2 = load i8*, i8** %exn.slot
-  %sel3 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn2, 0
-  %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel3, 1
-  resume { i8*, i32 } %lpad.val4
-
-; CHECK: }
-}
-
-; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAHUA@@@Z.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 0)
-; CHECK:   [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK:   [[RECOVER_EH_TEMP:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
-; CHECK:   [[EH_TEMP:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
-; CHECK:   [[RECOVER_RETVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 2)
-; CHECK:   [[RETVAL1:\%.+]] = bitcast i8* [[RECOVER_RETVAL]] to i32*
-; CHECK:   [[RECOVER_CLEANUPSLOT:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 3)
-; CHECK:   [[CLEANUPSLOT1:\%.+]] = bitcast i8* [[RECOVER_CLEANUPSLOT]] to i32*
-; CHECK:   [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
-; CHECK:   [[TMP_RELOAD:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP]]
-; CHECK:   [[RECOVER_A:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD]], i32 0, i32 0
-; CHECK:   [[A1:\%.+]] = getelementptr inbounds %struct.A, %struct.A* [[RECOVER_A]], i32 0, i32 0
-; CHECK:   [[TMP2:\%.+]] = load i32, i32* [[A1]], align 4
-; CHECK:   [[TMP3:\%.+]] = load i32, i32* [[E_PTR]], align 4
-; CHECK:   [[ADD:\%.+]] = add nsw i32 [[TMP2]], [[TMP3]]
-; CHECK:   store i32 [[ADD]], i32* [[RETVAL1]]
-; CHECK:   store i32 1, i32* [[CLEANUPSLOT1]]
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAHUA@@@Z", %cleanup)
-; CHECK: }
-
-; The following cleanup handler should be outlined.
-; CHECK: define internal void @"\01?test@@YAHUA@@@Z.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_EH_TEMP1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (i32 (<{ %struct.A }>*)* @"\01?test@@YAHUA@@@Z" to i8*), i8* %1, i32 1)
-; CHECK:   [[EH_TEMP1:\%.+]] = bitcast i8* [[RECOVER_EH_TEMP]] to <{ %struct.A }>**
-; CHECK:   [[TMP_RELOAD1:\%.+]] = load <{ %struct.A }>*, <{ %struct.A }>** [[EH_TEMP1]]
-; CHECK:   [[A3:\%.+]] = getelementptr inbounds <{ %struct.A }>, <{ %struct.A }>* [[TMP_RELOAD1]], i32 0, i32 0
-; CHECK:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* [[A3]])
-; CHECK:   ret void
-; CHECK: }
-
-declare void @"\01?may_throw@@YAXXZ"() #0
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-; Function Attrs: nounwind
-declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A*) #2
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 228868)"}
diff --git a/test/CodeGen/WinEH/cppeh-min-unwind.ll b/test/CodeGen/WinEH/cppeh-min-unwind.ll
deleted file mode 100644
index 98d6d6fcacb6..000000000000
--- a/test/CodeGen/WinEH/cppeh-min-unwind.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following source:
-;
-; class SomeClass {
-; public:
-;   SomeClass();
-;   ~SomeClass();
-; };
-;
-; void test() {
-;   SomeClass obj;
-;   may_throw();
-; }
-
-
-; ModuleID = 'min-unwind.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.SomeClass = type { [28 x i32] }
-
-; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass, align 4
-; CHECK:   call void @_ZN9SomeClassC1Ev(%class.SomeClass* [[OBJ_PTR]])
-; CHECK:   call void (...) @llvm.localescape(%class.SomeClass* [[OBJ_PTR]])
-; CHECK:   invoke void @_Z9may_throwv()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %obj = alloca %class.SomeClass, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  call void @_ZN9SomeClassC1Ev(%class.SomeClass* %obj)
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
-  ret void
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @_Z4testv.cleanup)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], []
-
-lpad:                                             ; preds = %entry
-  %tmp = landingpad { i8*, i32 }
-          cleanup
-  %tmp1 = extractvalue { i8*, i32 } %tmp, 0
-  store i8* %tmp1, i8** %exn.slot
-  %tmp2 = extractvalue { i8*, i32 } %tmp, 1
-  store i32 %tmp2, i32* %ehselector.slot
-  call void @_ZN9SomeClassD1Ev(%class.SomeClass* %obj)
-  br label %eh.resume
-
-; CHECK-NOT: eh.resume:
-
-eh.resume:                                        ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  %sel = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
-  %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
-  resume { i8*, i32 } %lpad.val2
-
-; CHECK: }
-}
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK:   [[OBJ_PTR1:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass*
-; CHECK:   call void @_ZN9SomeClassD1Ev(%class.SomeClass* [[OBJ_PTR1]])
-; CHECK:   ret void
-; CHECK: }
-
-declare void @_ZN9SomeClassC1Ev(%class.SomeClass*) #1
-
-declare void @_Z9may_throwv() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_ZN9SomeClassD1Ev(%class.SomeClass*) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { noreturn nounwind }
-attributes #4 = { nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll b/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
deleted file mode 100644
index c69633f17e28..000000000000
--- a/test/CodeGen/WinEH/cppeh-mixed-catch-and-cleanup.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-;   try {
-;     Obj o;
-;     may_throw();
-;   } catch (...) {
-;   }
-; }
-;
-; The purpose of this test is to verify that we create separate catch and
-; cleanup handlers.  When compiling for the C++ 11 standard, this isn't
-; strictly necessary, since calling the destructor from the catch handler
-; would be logically equivalent to calling it from a cleanup handler.
-; However, if the -std=c++98 option is used, an exception in the cleanup
-; code should terminate the process (the MSVCRT runtime will do that) but
-; if the destructor is called from the catch handler, it wouldn't terminate
-; the process
-
-
-; ModuleID = 'cppeh-mixed-catch-and-cleanup.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.Obj = type { i8 }
-
-; This just verifies that the function was processed by WinEHPrepare.
-;
-; CHECK-LABEL: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape
-; CHECK: }
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %o = alloca %class.Obj, align 1
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
-  br label %try.cont
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  call void @"\01??1Obj@@QEAA@XZ"(%class.Obj* %o) #3
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %catch, %invoke.cont
-  ret void
-}
-
-; Verify that a cleanup handler was created and that it calls ~Obj().
-; CHECK-LABEL: define internal void @"\01?test@@YAXXZ.cleanup"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   call void @"\01??1Obj@@QEAA@XZ"
-; CHECK:   ret void
-; CHECK: }
-
-; Verify that a catch handler was created and that it does not call ~Obj().
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK-NOT:  call void @"\01??1Obj@@QEAA@XZ"
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-; CHECK: }
-
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @"\01??1Obj@@QEAA@XZ"(%class.Obj*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235779) (llvm/trunk 235769)"}
diff --git a/test/CodeGen/WinEH/cppeh-multi-catch.ll b/test/CodeGen/WinEH/cppeh-multi-catch.ll
deleted file mode 100644
index 266cdea20cdb..000000000000
--- a/test/CodeGen/WinEH/cppeh-multi-catch.ll
+++ /dev/null
@@ -1,226 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; void test()
-; {
-;   try {
-;     may_throw();
-;   } catch (int i) {
-;     handle_int(i);
-;   } catch (long long ll) {
-;     handle_long_long(ll);
-;   } catch (SomeClass &obj) {
-;     handle_obj(&obj);
-;   } catch (...) {
-;     handle_exception();
-;   }
-; }
-;
-; The catch handlers were edited to insert 'ret void' after the endcatch call.
-
-; ModuleID = 'catch-with-type.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.HandlerMapEntry = type { i32, i32 }
-%rtti.TypeDescriptor3 = type { i8**, i8*, [4 x i8] }
-%rtti.TypeDescriptor15 = type { i8**, i8*, [16 x i8] }
-%class.SomeClass = type { i8 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0_J@8" = comdat any
-
-$"\01??_R0?AVSomeClass@@@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@llvm.eh.handlermapentry.H = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-@"\01??_R0_J@8" = linkonce_odr global %rtti.TypeDescriptor3 { i8** @"\01??_7type_info@@6B@", i8* null, [4 x i8] c"._J\00" }, comdat
-@llvm.eh.handlermapentry._J = private unnamed_addr constant %eh.HandlerMapEntry { i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor3* @"\01??_R0_J@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-@"\01??_R0?AVSomeClass@@@8" = linkonce_odr global %rtti.TypeDescriptor15 { i8** @"\01??_7type_info@@6B@", i8* null, [16 x i8] c".?AVSomeClass@@\00" }, comdat
-@"llvm.eh.handlermapentry.reference.?AVSomeClass@@" = private unnamed_addr constant %eh.HandlerMapEntry { i32 8, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor15* @"\01??_R0?AVSomeClass@@@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section "llvm.metadata"
-
-
-; CHECK: define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-; CHECK: entry:
-; CHECK:   [[OBJ_PTR:\%.+]] = alloca %class.SomeClass*, align 8
-; CHECK:   [[LL_PTR:\%.+]] = alloca i64, align 8
-; CHECK:   [[I_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   call void (...) @llvm.localescape(i32* [[I_PTR]], i64* [[LL_PTR]], %class.SomeClass** [[OBJ_PTR]])
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %obj = alloca %class.SomeClass*, align 8
-  %ll = alloca i64, align 8
-  %i = alloca i32, align 4
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
-; CHECK-NEXT:           catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
-; CHECK-NEXT:           catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
-; CHECK-NEXT:           catch i8* null
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME:	     i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch",
-; CHECK-SAME:        i32 1, i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1",
-; CHECK-SAME:        i32 1, i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2",
-; CHECK-SAME:        i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.3")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %ret]
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry.H
-          catch %eh.HandlerMapEntry* @llvm.eh.handlermapentry._J
-          catch %eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@"
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry.H to i8*)) #3
-  %matches = icmp eq i32 %sel, %3
-  br i1 %matches, label %catch14, label %catch.fallthrough
-
-ret:
-  ret void
-
-; CHECK-NOT: catch14:
-; CHECK: ret:
-; CHECK-NEXT:   ret void
-catch14:                                          ; preds = %catch.dispatch
-  %exn15 = load i8*, i8** %exn.slot
-  %4 = bitcast i32* %i to i8*
-  call void @llvm.eh.begincatch(i8* %exn15, i8* %4) #3
-  %5 = load i32, i32* %i, align 4
-  call void @"\01?handle_int@@YAXH@Z"(i32 %5)
-  call void @llvm.eh.endcatch() #3
-  br label %ret
-
-try.cont:                                         ; preds = %invoke.cont
-  br label %ret
-
-; CHECK-NOT: catch.fallthrough:
-catch.fallthrough:                                ; preds = %catch.dispatch
-  %6 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @llvm.eh.handlermapentry._J to i8*)) #3
-  %matches1 = icmp eq i32 %sel, %6
-  br i1 %matches1, label %catch10, label %catch.fallthrough2
-
-; CHECK-NOT: catch10:
-catch10:                                          ; preds = %catch.fallthrough
-  %exn11 = load i8*, i8** %exn.slot
-  %7 = bitcast i64* %ll to i8*
-  call void @llvm.eh.begincatch(i8* %exn11, i8* %7) #3
-  %8 = load i64, i64* %ll, align 8
-  call void @"\01?handle_long_long@@YAX_J@Z"(i64 %8)
-  call void @llvm.eh.endcatch() #3
-  br label %ret
-
-; CHECK-NOT: catch.fallthrough2:
-catch.fallthrough2:                               ; preds = %catch.fallthrough
-  %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.HandlerMapEntry* @"llvm.eh.handlermapentry.reference.?AVSomeClass@@" to i8*)) #3
-  %matches3 = icmp eq i32 %sel, %9
-  br i1 %matches3, label %catch6, label %catch
-
-; CHECK-NOT: catch6:
-catch6:                                           ; preds = %catch.fallthrough2
-  %exn7 = load i8*, i8** %exn.slot
-  %10 = bitcast %class.SomeClass** %obj to i8*
-  call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
-  %11 = load %class.SomeClass*, %class.SomeClass** %obj, align 8
-  call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* %11)
-  call void @llvm.eh.endcatch() #3
-  br label %ret
-
-; CHECK-NOT: catch:
-catch:                                            ; preds = %catch.fallthrough2
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #3
-  call void @"\01?handle_exception@@YAXXZ"()  call void @llvm.eh.endcatch() #3
-  br label %ret
-; CHECK: }
-}
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK:   call void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_LL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[LL_PTR:\%.+]] = bitcast i8* [[RECOVER_LL]] to i64*
-; CHECK:   [[TMP2:\%.+]] = load i64, i64* [[LL_PTR]], align 8
-; CHECK:   call void @"\01?handle_long_long@@YAX_J@Z"(i64 [[TMP2]])
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_OBJ:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[OBJ_PTR:\%.+]] = bitcast i8* [[RECOVER_OBJ]] to %class.SomeClass**
-; CHECK:   [[TMP3:\%.+]] = load %class.SomeClass*, %class.SomeClass** [[OBJ_PTR]], align 8
-; CHECK:   call void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass* [[TMP3]])
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-; CHECK-LABEL: define internal i8* @"\01?test@@YAXXZ.catch.3"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   call void @"\01?handle_exception@@YAXXZ"()
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %ret)
-; CHECK: }
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_exception@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_obj@@YAXPEAVSomeClass@@@Z"(%class.SomeClass*) #1
-
-declare void @"\01?handle_long_long@@YAX_J@Z"(i64) #1
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 233155) (llvm/trunk 233153)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-1.ll b/test/CodeGen/WinEH/cppeh-nested-1.ll
deleted file mode 100644
index d525d8a1a67e..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-1.ll
+++ /dev/null
@@ -1,194 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-;void test()
-;{
-;  try {
-;    try {
-;       may_throw();
-;    } catch (int i) {
-;      handle_int(i);
-;    }
-;  } catch (float f) {
-;    handle_float(f);
-;  }
-;  done();
-;}
-
-; ModuleID = 'cppeh-nested-1.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-
-$"\01??_R0M@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   %i = alloca i32, align 4
-; CHECK:   %f = alloca float, align 4
-; CHECK:   call void (...) @llvm.localescape(float* %f, i32* %i)
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %i = alloca i32, align 4
-  %f = alloca float, align 4
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK:   indirectbr i8* [[RECOVER]], [label %try.cont, label %try.cont10]
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches = icmp eq i32 %sel, %3
-  br i1 %matches, label %catch, label %catch.dispatch3
-
-; CHECK-NOT: catch:
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %4 = bitcast i32* %i to i8*
-  call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
-  %5 = load i32, i32* %i, align 4
-  invoke void @"\01?handle_int@@YAXH@Z"(i32 %5)
-          to label %invoke.cont2 unwind label %lpad1
-
-; CHECK-NOT: invoke.cont2:
-invoke.cont2:                                     ; preds = %catch
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont2, %invoke.cont
-  br label %try.cont10
-
-; CHECK-NOT: lpad1:
-lpad1:                                            ; preds = %catch
-  %6 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-  %7 = extractvalue { i8*, i32 } %6, 0
-  store i8* %7, i8** %exn.slot
-  %8 = extractvalue { i8*, i32 } %6, 1
-  store i32 %8, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #3
-  br label %catch.dispatch3
-
-; CHECK-NOT: catch.dispatch3:
-catch.dispatch3:                                  ; preds = %lpad1, %catch.dispatch
-  %sel4 = load i32, i32* %ehselector.slot
-  %9 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
-  %matches5 = icmp eq i32 %sel4, %9
-  br i1 %matches5, label %catch6, label %eh.resume
-
-; CHECK-NOT: catch6:
-catch6:                                           ; preds = %catch.dispatch3
-  %exn7 = load i8*, i8** %exn.slot
-  %10 = bitcast float* %f to i8*
-  call void @llvm.eh.begincatch(i8* %exn7, i8* %10) #3
-  %11 = load float, float* %f, align 4
-  call void @"\01?handle_float@@YAXM@Z"(float %11)
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont10
-
-try.cont10:                                       ; preds = %catch6, %try.cont
-  call void @"\01?done@@YAXXZ"()
-  ret void
-
-; CHECK-NOT: eh.resume:
-eh.resume:                                        ; %catch.dispatch3
-  %exn11 = load i8*, i8** %exn.slot
-  %sel12 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn11, 0
-  %lpad.val13 = insertvalue { i8*, i32 } %lpad.val, i32 %sel12, 1
-  resume { i8*, i32 } %lpad.val13
-; CHECK: }
-}
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_F1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[F_PTR1:\%.+]] = bitcast i8* [[RECOVER_F1]] to float*
-; CHECK:   [[TMP2:\%.+]] = load float, float* [[F_PTR1]], align 4
-; CHECK:   call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK:   invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont2:
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-;
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK:   indirectbr i8* [[RECOVER1]], []
-;
-; CHECK: }
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_float@@YAXM@Z"(float) #1
-
-declare void @"\01?done@@YAXXZ"() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-2.ll b/test/CodeGen/WinEH/cppeh-nested-2.ll
deleted file mode 100644
index 2764e7478c71..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-2.ll
+++ /dev/null
@@ -1,324 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; class Inner {
-; public:
-;   Inner();
-;   ~Inner();
-; };
-; class Outer {
-; public:
-;   Outer();
-;   ~Outer();
-; };
-; void test() {
-;   try {
-;     Outer outer;
-;     try {
-;        Inner inner;
-;        may_throw();
-;     } catch (int i) {
-;       handle_int(i);
-;     }
-;   } catch (float f) {
-;     handle_float(f);
-;   }
-;   done();
-; }
-
-; ModuleID = 'nested-2.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%class.Outer = type { i8 }
-%class.Inner = type { i8 }
-
-@_ZTIf = external constant i8*
-@_ZTIi = external constant i8*
-
-; The function entry should be rewritten like this.
-; CHECK: define void @_Z4testv()
-; CHECK: entry:
-; CHECK:   %outer = alloca %class.Outer, align 1
-; CHECK:   %inner = alloca %class.Inner, align 1
-; CHECK:   %i = alloca i32, align 4
-; CHECK:   %f = alloca float, align 4
-; CHECK:   call void (...) @llvm.localescape(float* %f, i32* %i, %class.Outer* %outer, %class.Inner* %inner)
-; CHECK:   invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @_Z4testv() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %outer = alloca %class.Outer, align 1
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %inner = alloca %class.Inner, align 1
-  %i = alloca i32, align 4
-  %f = alloca float, align 4
-  invoke void @_ZN5OuterC1Ev(%class.Outer* %outer)
-          to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont:
-; CHECK:   invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
-; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-
-invoke.cont:                                      ; preds = %entry
-  invoke void @_ZN5InnerC1Ev(%class.Inner* %inner)
-          to label %invoke.cont2 unwind label %lpad1
-
-; CHECK: invoke.cont2:
-; CHECK:   invoke void @_Z9may_throwv()
-; CHECK:           to label %invoke.cont4 unwind label %[[LPAD3_LABEL:lpad[0-9]*]]
-
-invoke.cont2:                                     ; preds = %invoke.cont
-  invoke void @_Z9may_throwv()
-          to label %invoke.cont4 unwind label %lpad3
-
-; CHECK: invoke.cont4:
-; CHECK:   invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
-; CHECK:           to label %invoke.cont5 unwind label %[[LPAD1_LABEL]]
-
-invoke.cont4:                                     ; preds = %invoke.cont2
-  invoke void @_ZN5InnerD1Ev(%class.Inner* %inner)
-          to label %invoke.cont5 unwind label %lpad1
-
-; CHECK: invoke.cont5:
-; CHECK:   br label %try.cont
-
-invoke.cont5:                                     ; preds = %invoke.cont4
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont19]
-
-lpad:                                             ; preds = %try.cont, %entry
-  %tmp = landingpad { i8*, i32 }
-          catch i8* bitcast (i8** @_ZTIf to i8*)
-  %tmp1 = extractvalue { i8*, i32 } %tmp, 0
-  store i8* %tmp1, i8** %exn.slot
-  %tmp2 = extractvalue { i8*, i32 } %tmp, 1
-  store i32 %tmp2, i32* %ehselector.slot
-  br label %catch.dispatch11
-
-; CHECK: [[LPAD1_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
-; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER1]], [label %try.cont, label %try.cont19]
-
-lpad1:                                            ; preds = %invoke.cont4, %invoke.cont
-  %tmp3 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-          catch i8* bitcast (i8** @_ZTIf to i8*)
-  %tmp4 = extractvalue { i8*, i32 } %tmp3, 0
-  store i8* %tmp4, i8** %exn.slot
-  %tmp5 = extractvalue { i8*, i32 } %tmp3, 1
-  store i32 %tmp5, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK: [[LPAD3_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           cleanup
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIi to i8*)
-; CHECK-NEXT:           catch i8* bitcast (i8** @_ZTIf to i8*)
-; CHECK-NEXT:   [[RECOVER3:\%.+]] = call i8* (...) @llvm.eh.actions(
-; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup.2,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIi to i8*), i32 1, i8* (i8*, i8*)* @_Z4testv.catch.1,
-; CHECK-SAME:       i32 0, void (i8*, i8*)* @_Z4testv.cleanup,
-; CHECK-SAME:       i32 1, i8* bitcast (i8** @_ZTIf to i8*), i32 0, i8* (i8*, i8*)* @_Z4testv.catch)
-; CHECK-NEXT:   indirectbr i8* [[RECOVER3]], [label %try.cont, label %try.cont19]
-
-lpad3:                                            ; preds = %invoke.cont2
-  %tmp6 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (i8** @_ZTIi to i8*)
-          catch i8* bitcast (i8** @_ZTIf to i8*)
-  %tmp7 = extractvalue { i8*, i32 } %tmp6, 0
-  store i8* %tmp7, i8** %exn.slot
-  %tmp8 = extractvalue { i8*, i32 } %tmp6, 1
-  store i32 %tmp8, i32* %ehselector.slot
-  call void @_ZN5InnerD1Ev(%class.Inner* %inner)
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-
-catch.dispatch:                                   ; preds = %lpad3, %lpad1
-  %sel = load i32, i32* %ehselector.slot
-  %tmp9 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) #4
-  %matches = icmp eq i32 %sel, %tmp9
-  br i1 %matches, label %catch, label %ehcleanup
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %i.i8 = bitcast i32* %i to i8*
-  call void @llvm.eh.begincatch(i8* %exn, i8* %i.i8) #4
-  %tmp13 = load i32, i32* %i, align 4
-  invoke void @_Z10handle_inti(i32 %tmp13)
-          to label %invoke.cont8 unwind label %lpad7
-
-; CHECK-NOT: invoke.cont8:
-
-invoke.cont8:                                     ; preds = %catch
-  call void @llvm.eh.endcatch() #4
-  br label %try.cont
-
-; CHECK: try.cont:
-; CHECK:   invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
-; CHECK:           to label %invoke.cont9 unwind label %[[LPAD_LABEL]]
-
-try.cont:                                         ; preds = %invoke.cont8, %invoke.cont5
-  invoke void @_ZN5OuterD1Ev(%class.Outer* %outer)
-          to label %invoke.cont9 unwind label %lpad
-
-invoke.cont9:                                     ; preds = %try.cont
-  br label %try.cont19
-
-; CHECK-NOT: lpad7:
-
-lpad7:                                            ; preds = %catch
-  %tmp14 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (i8** @_ZTIf to i8*)
-  %tmp15 = extractvalue { i8*, i32 } %tmp14, 0
-  store i8* %tmp15, i8** %exn.slot
-  %tmp16 = extractvalue { i8*, i32 } %tmp14, 1
-  store i32 %tmp16, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #4
-  br label %ehcleanup
-
-; CHECK-NOT: ehcleanup:                                        ; preds = %lpad7, %catch.dispatch
-
-ehcleanup:                                        ; preds = %lpad7, %catch.dispatch
-  call void @_ZN5OuterD1Ev(%class.Outer* %outer)
-  br label %catch.dispatch11
-
-; CHECK-NOT: catch.dispatch11:
-
-catch.dispatch11:                                 ; preds = %ehcleanup, %lpad
-  %sel12 = load i32, i32* %ehselector.slot
-  %tmp17 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIf to i8*)) #4
-  %matches13 = icmp eq i32 %sel12, %tmp17
-  br i1 %matches13, label %catch14, label %eh.resume
-
-; CHECK-NOT: catch14:
-
-catch14:                                          ; preds = %catch.dispatch11
-  %exn15 = load i8*, i8** %exn.slot
-  %f.i8 = bitcast float* %f to i8*
-  call void @llvm.eh.begincatch(i8* %exn15, i8* %f.i8) #4
-  %tmp21 = load float, float* %f, align 4
-  call void @_Z12handle_floatf(float %tmp21)
-  call void @llvm.eh.endcatch() #4
-  br label %try.cont19
-
-try.cont19:                                       ; preds = %catch14, %invoke.cont9
-  call void @_Z4donev()
-  ret void
-
-; CHECK-NOT: eh.resume:
-
-eh.resume:                                        ; preds = %catch.dispatch11
-  %exn20 = load i8*, i8** %exn.slot
-  %sel21 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
-  %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
-  resume { i8*, i32 } %lpad.val22
-
-; CHECK: }
-}
-
-; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 0)
-; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
-; CHECK:   [[TMP:\%.+]] = load float, float* [[F_PTR]], align 4
-; CHECK:   call void @_Z12handle_floatf(float [[TMP]])
-; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont19)
-; CHECK: }
-
-; This catch handler should be outlined.
-; CHECK: define internal i8* @_Z4testv.catch.1(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 1)
-; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK:   invoke void @_Z10handle_inti(i32 [[TMP1]])
-; CHECK:           to label %invoke.cont8 unwind label %[[LPAD7_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont8:                                     ; preds = %entry
-; CHECK:   ret i8* blockaddress(@_Z4testv, %try.cont)
-;
-; CHECK: [[LPAD7_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   [[LPAD7_VAL:\%.+]] = landingpad { i8*, i32 }
-; (FIXME) The nested handler body isn't being populated yet.
-; CHECK: }
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_OUTER:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 2)
-; CHECK:   [[OUTER_PTR:\%.+]] = bitcast i8* [[RECOVER_OUTER]] to %class.Outer*
-; CHECK:   call void @_ZN5OuterD1Ev(%class.Outer* [[OUTER_PTR]])
-; CHECK:   ret void
-; CHECK: }
-
-; This cleanup handler should be outlined.
-; CHECK: define internal void @_Z4testv.cleanup.2(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_INNER:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @_Z4testv to i8*), i8* %1, i32 3)
-; CHECK:   [[INNER_PTR:\%.+]] = bitcast i8* [[RECOVER_INNER]] to %class.Inner*
-; CHECK:   call void @_ZN5InnerD1Ev(%class.Inner* [[INNER_PTR]])
-; CHECK:   ret void
-; CHECK: }
-
-
-
-declare void @_ZN5OuterC1Ev(%class.Outer*) #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_ZN5InnerC1Ev(%class.Inner*) #1
-
-declare void @_Z9may_throwv() #1
-
-declare void @_ZN5InnerD1Ev(%class.Inner*) #1
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @_Z10handle_inti(i32) #1
-
-declare void @llvm.eh.endcatch()
-
-declare void @_ZN5OuterD1Ev(%class.Outer*) #1
-
-declare void @_Z12handle_floatf(float) #1
-
-declare void @_Z4donev() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { noinline noreturn nounwind }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind }
-attributes #5 = { noreturn nounwind }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 226027)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-3.ll b/test/CodeGen/WinEH/cppeh-nested-3.ll
deleted file mode 100644
index 88759f406fb1..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-3.ll
+++ /dev/null
@@ -1,260 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-;void test()
-;{
-;  try {
-;    try {
-;       may_throw();
-;    } catch (int i) {
-;      try {
-;        may_throw();
-;      }
-;      catch (int j) {
-;        i = j;
-;      }
-;      handle_int(i);
-;    }
-;  } catch (float f) {
-;    handle_float(f);
-;  }
-;  done();
-;}
-
-; ModuleID = 'cppeh-nested-3.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-
-$"\01??_R0M@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0M@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".M\00" }, comdat
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   %i = alloca i32, align 4
-; CHECK:   %j = alloca i32, align 4
-; CHECK:   %f = alloca float, align 4
-; CHECK:   call void (...) @llvm.localescape(i32* %j, i32* %i, float* %f)
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont unwind label %[[LPAD_LABEL:lpad[0-9]*]]
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %i = alloca i32, align 4
-  %j = alloca i32, align 4
-  %f = alloca float, align 4
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont10
-
-; CHECK: [[LPAD_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.2", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK:   indirectbr i8* [[RECOVER]], [label %try.cont10, label %try.cont19]
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %catch.dispatch
-
-; CHECK-NOT: catch.dispatch:
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches = icmp eq i32 %sel, %3
-  br i1 %matches, label %catch, label %catch.dispatch11
-
-; CHECK-NOT: catch:
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %4 = bitcast i32* %i to i8*
-  call void @llvm.eh.begincatch(i8* %exn, i8* %4) #3
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont2 unwind label %lpad1
-
-; CHECK-NOT: invoke.cont2:
-invoke.cont2:                                     ; preds = %catch
-  br label %try.cont
-
-; CHECK-NOT: lpad1:
-lpad1:                                            ; preds = %catch
-  %5 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-  %6 = extractvalue { i8*, i32 } %5, 0
-  store i8* %6, i8** %exn.slot
-  %7 = extractvalue { i8*, i32 } %5, 1
-  store i32 %7, i32* %ehselector.slot
-  br label %catch.dispatch3
-
-; CHECK-NOT: catch.dispatch3:
-catch.dispatch3:                                  ; preds = %lpad1
-  %sel4 = load i32, i32* %ehselector.slot
-  %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #3
-  %matches5 = icmp eq i32 %sel4, %8
-  br i1 %matches5, label %catch6, label %catch.dispatch11
-
-; CHECK-NOT: catch6:
-catch6:                                           ; preds = %catch.dispatch3
-  %exn7 = load i8*, i8** %exn.slot
-  %9 = bitcast i32* %j to i8*
-  call void @llvm.eh.begincatch(i8* %exn7, i8* %9) #3
-  %10 = load i32, i32* %j, align 4
-  store i32 %10, i32* %i, align 4
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont
-
-; CHECK-NOT: try.cont:
-try.cont:                                         ; preds = %catch6, %invoke.cont2
-  %11 = load i32, i32* %i, align 4
-  invoke void @"\01?handle_int@@YAXH@Z"(i32 %11)
-          to label %invoke.cont9 unwind label %lpad8
-
-; CHECK-NOT: invoke.cont9:
-invoke.cont9:                                     ; preds = %try.cont
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont10
-
-try.cont10:                                       ; preds = %invoke.cont9, %invoke.cont
-  br label %try.cont19
-
-; CHECK-NOT: lpad8:
-lpad8:                                            ; preds = %try.cont
-  %12 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-  %13 = extractvalue { i8*, i32 } %12, 0
-  store i8* %13, i8** %exn.slot
-  %14 = extractvalue { i8*, i32 } %12, 1
-  store i32 %14, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #3
-  br label %catch.dispatch11
-
-; CHECK-NOT: catch.dispatch11:
-catch.dispatch11:                                 ; preds = %lpad8, %catch.dispatch3, %catch.dispatch
-  %sel12 = load i32, i32* %ehselector.slot
-  %15 = call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)) #3
-  %matches13 = icmp eq i32 %sel12, %15
-  br i1 %matches13, label %catch14, label %eh.resume
-
-; CHECK-NOT: catch14:
-catch14:                                          ; preds = %catch.dispatch11
-  %exn15 = load i8*, i8** %exn.slot
-  %16 = bitcast float* %f to i8*
-  call void @llvm.eh.begincatch(i8* %exn15, i8* %16) #3
-  %17 = load float, float* %f, align 4
-  call void @"\01?handle_float@@YAXM@Z"(float %17)
-  call void @llvm.eh.endcatch() #3
-  br label %try.cont19
-
-try.cont19:                                       ; preds = %catch14, %try.cont10
-  call void @"\01?done@@YAXXZ"()
-  ret void
-
-; CHECK-NOT: eh.resume:
-eh.resume:                                        ; preds = %lpad16, %catch.dispatch11
-  %exn20 = load i8*, i8** %exn.slot
-  %sel21 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn20, 0
-  %lpad.val22 = insertvalue { i8*, i32 } %lpad.val, i32 %sel21, 1
-  resume { i8*, i32 } %lpad.val22
-; CHECK: }
-}
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_J:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[J_PTR:\%.+]] = bitcast i8* [[RECOVER_J]] to i32*
-; CHECK:   [[RECOVER_I1:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[I_PTR1:\%.+]] = bitcast i8* [[RECOVER_I1]] to i32*
-; CHECK:   [[TMP3:\%.+]] = load i32, i32* [[J_PTR]], align 4
-; CHECK:   store i32 [[TMP3]], i32* [[I_PTR1]]
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ.catch.2", %invoke.cont2)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_F:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[F_PTR:\%.+]] = bitcast i8* [[RECOVER_F]] to float*
-; CHECK:   [[TMP2:\%.+]] = load float, float* [[F_PTR]], align 4
-; CHECK:   call void @"\01?handle_float@@YAXM@Z"(float [[TMP2]])
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont19)
-; CHECK: }
-
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[I_PTR:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-; CHECK:           to label %invoke.cont2 unwind label %[[LPAD1_LABEL:lpad[0-9]*]]
-;
-; CHECK: invoke.cont2:                                     ; preds = %[[LPAD1_LABEL]], %entry
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[I_PTR]], align 4
-; CHECK:   invoke void @"\01?handle_int@@YAXH@Z"(i32 [[TMP1]])
-; CHECK:           to label %invoke.cont9 unwind label %[[LPAD8_LABEL:lpad[0-9]*]]
-;
-; CHECK: [[LPAD1_LABEL]]:{{[ ]+}}; preds = %entry
-; CHECK:   [[LPAD1_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER1:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK:   indirectbr i8* [[RECOVER1]], [label %invoke.cont2]
-;
-; CHECK: invoke.cont9:
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-;
-; CHECK: [[LPAD8_LABEL]]:{{[ ]+}}; preds = %invoke.cont2
-; CHECK:   [[LPAD8_VAL:\%.+]] = landingpad { i8*, i32 }
-; CHECK:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*)
-; CHECK:   [[RECOVER2:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0M@8" to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch.1")
-; CHECK:   indirectbr i8* [[RECOVER2]], []
-;
-; CHECK: }
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-declare void @"\01?handle_int@@YAXH@Z"(i32) #1
-
-declare void @"\01?handle_float@@YAXM@Z"(float) #1
-
-declare void @"\01?done@@YAXXZ"() #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 232069) (llvm/trunk 232070)"}
diff --git a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll b/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
deleted file mode 100644
index 53f532c8eb16..000000000000
--- a/test/CodeGen/WinEH/cppeh-nested-rethrow.ll
+++ /dev/null
@@ -1,212 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test was generated from the following code.
-;
-; void test1() {
-;   try {
-;     try {
-;       throw 1;
-;     } catch(...) { throw; }
-;   } catch (...) { }
-; }
-; void test2() {
-;   try {
-;     throw 1;
-;   } catch(...) {
-;     try {
-;       throw; 
-;     } catch (...) {}
-;   }
-; }
-;
-; These two functions result in functionally equivalent code, but the last
-; catch block contains a call to llvm.eh.endcatch that tripped up processing
-; during development.
-;
-; The main purpose of this test is to verify that we can correctly
-; handle the case of nested landing pads that return directly to a block in
-; the parent function.
-
-; ModuleID = 'cppeh-nested-rethrow.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; CHECK-LABEL: define void @"\01?test1@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %tmp = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 1, i32* %tmp
-  %0 = bitcast i32* %tmp to i8*
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
-          to label %unreachable unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch i8* null
-  %2 = extractvalue { i8*, i32 } %1, 0
-  store i8* %2, i8** %exn.slot
-  %3 = extractvalue { i8*, i32 } %1, 1
-  store i32 %3, i32* %ehselector.slot
-  br label %catch
-
-catch:                                            ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
-  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
-          to label %unreachable unwind label %lpad1
-
-lpad1:                                            ; preds = %catch
-  %4 = landingpad { i8*, i32 }
-          catch i8* null
-  %5 = extractvalue { i8*, i32 } %4, 0
-  store i8* %5, i8** %exn.slot
-  %6 = extractvalue { i8*, i32 } %4, 1
-  store i32 %6, i32* %ehselector.slot
-  br label %catch2
-
-catch2:                                           ; preds = %lpad1
-  %exn3 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
-  call void @llvm.eh.endcatch() #1
-  br label %try.cont.4
-
-; This block should not be eliminated.
-; CHECK: try.cont.4:
-try.cont.4:                                        ; preds = %catch2, %try.cont
-  ret void
-
-try.cont:                                         ; No predecessors!
-  br label %try.cont.4
-
-unreachable:                                      ; preds = %catch, %entry
-  unreachable
-; CHECK: }
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #1
-
-; CHECK-LABEL: define void @"\01?test2@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test2@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %tmp = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 1, i32* %tmp
-  %0 = bitcast i32* %tmp to i8*
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #2
-          to label %unreachable unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch i8* null
-  %2 = extractvalue { i8*, i32 } %1, 0
-  store i8* %2, i8** %exn.slot
-  %3 = extractvalue { i8*, i32 } %1, 1
-  store i32 %3, i32* %ehselector.slot
-  br label %catch
-
-catch:                                            ; preds = %lpad
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* null) #1
-  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #2
-          to label %unreachable unwind label %lpad1
-
-lpad1:                                            ; preds = %catch
-  %4 = landingpad { i8*, i32 }
-          catch i8* null
-  %5 = extractvalue { i8*, i32 } %4, 0
-  store i8* %5, i8** %exn.slot
-  %6 = extractvalue { i8*, i32 } %4, 1
-  store i32 %6, i32* %ehselector.slot
-  br label %catch2
-
-catch2:                                           ; preds = %lpad1
-  %exn3 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn3, i8* null) #1
-  call void @llvm.eh.endcatch() #1
-  br label %try.cont
-
-; This block should not be eliminated.
-; CHECK: try.cont:
-; The endcatch call should be eliminated.
-; CHECK-NOT: call void @llvm.eh.endcatch()
-try.cont:                                         ; preds = %catch2
-  call void @llvm.eh.endcatch() #1
-  br label %try.cont.4
-
-try.cont.4:                                        ; preds = %try.cont
-  ret void
-
-unreachable:                                      ; preds = %catch, %entry
-  unreachable
-; CHECK: }
-}
-
-; The outlined test1.catch handler should return to a valid block address.
-; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch"(i8*, i8*)
-; CHECK-NOT:  ret i8* inttoptr (i32 1 to i8*)
-; CHECK: }
-
-; The outlined test1.catch1 handler should not contain a return instruction.
-; CHECK-LABEL: define internal i8* @"\01?test1@@YAXXZ.catch.1"(i8*, i8*)
-; CHECK-NOT: ret
-; CHECK: }
-
-; The outlined test2.catch handler should return to a valid block address.
-; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch"(i8*, i8*)
-; CHECK-NOT:  ret i8* inttoptr (i32 1 to i8*)
-; CHECK: }
-
-; The outlined test2.catch2 handler should not contain a return instruction.
-; CHECK-LABEL: define internal i8* @"\01?test2@@YAXXZ.catch.2"(i8*, i8*)
-; CHECK-NOT: ret
-; CHECK: }
-
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 236059)"}
diff --git a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll b/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
deleted file mode 100644
index 7b474c9d38a3..000000000000
--- a/test/CodeGen/WinEH/cppeh-nonalloca-frame-values.ll
+++ /dev/null
@@ -1,278 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; struct SomeData {
-;   int a;
-;   int b;
-; };
-; 
-; void may_throw();
-; void does_not_throw(int i);
-; void dump(int *, int, SomeData&);
-; 
-; void test() {
-;   int NumExceptions = 0;
-;   int ExceptionVal[10];
-;   SomeData Data = { 0, 0 };
-; 
-;   for (int i = 0; i < 10; ++i) {
-;     try {
-;       may_throw();
-;       Data.a += i;
-;     }
-;     catch (int e) {
-;       ExceptionVal[NumExceptions] = e;
-;       ++NumExceptions;
-;       if (e == i)
-;         Data.b += e;
-;       else
-;         Data.a += e;
-;     }
-;     does_not_throw(NumExceptions);
-;   }
-;   dump(ExceptionVal, NumExceptions, Data);
-; }
-;
-; Unlike the cppeh-frame-vars.ll test, this test was generated using -O2
-; optimization, which results in non-alloca values being used in the
-; catch handler.
-
-; ModuleID = 'cppeh-frame-vars.cpp'
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%struct.SomeData = type { i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-
-; The function entry should be rewritten like this.
-; CHECK: define void @"\01?test@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   [[NUMEXCEPTIONS_REGMEM:\%.+]] = alloca i32
-; CHECK:   [[I_REGMEM:\%.+]] = alloca i32
-; CHECK:   [[B_REGMEM:\%.+]] = alloca i32*
-; CHECK:   [[A_REGMEM:\%.+]] = alloca i32*
-; CHECK:   [[E_PTR:\%.+]] = alloca i32, align 4
-; CHECK:   [[EXCEPTIONVAL:\%.+]] = alloca [10 x i32], align 16
-; CHECK:   [[DATA_PTR:\%.+]] = alloca i64, align 8
-; CHECK:   [[TMPCAST:\%.+]] = bitcast i64* [[DATA_PTR]] to %struct.SomeData*
-; CHECK:   [[TMP:\%.+]] = bitcast [10 x i32]* [[EXCEPTIONVAL]] to i8*
-; CHECK:   call void @llvm.lifetime.start(i64 40, i8* [[TMP]])
-; CHECK:   store i64 0, i64* [[DATA_PTR]], align 8
-; CHECK:   [[A_PTR:\%.+]] = bitcast i64* [[DATA_PTR]] to i32*
-; CHECK:   store i32* [[A_PTR]], i32** [[A_REGMEM]]
-; CHECK:   [[B_PTR:\%.+]] = getelementptr inbounds %struct.SomeData, %struct.SomeData* [[TMPCAST]], i64 0, i32 1
-; CHECK:   store i32* [[B_PTR]], i32** [[B_REGMEM]]
-; CHECK:   call void (...) @llvm.localescape(i32* %e, i32* %NumExceptions.020.reg2mem, [10 x i32]* [[EXCEPTIONVAL]], i32* %inc.reg2mem, i32* [[I_REGMEM]], i32** [[A_REGMEM]], i32** [[B_REGMEM]])
-; CHECK:   br label %for.body
-
-; Function Attrs: uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %e = alloca i32, align 4
-  %ExceptionVal = alloca [10 x i32], align 16
-  %Data = alloca i64, align 8
-  %tmpcast = bitcast i64* %Data to %struct.SomeData*
-  %0 = bitcast [10 x i32]* %ExceptionVal to i8*
-  call void @llvm.lifetime.start(i64 40, i8* %0) #1
-  store i64 0, i64* %Data, align 8
-  %a = bitcast i64* %Data to i32*
-  %b = getelementptr inbounds %struct.SomeData, %struct.SomeData* %tmpcast, i64 0, i32 1
-  br label %for.body
-
-; CHECK: for.body:
-; CHECK:   [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%NumExceptions.*}}, %try.cont ]
-; CHECK:   [[I_PHI:\%.*]] = phi i32 [ 0, %entry ], [ {{\%inc.*}}, %try.cont ]
-; CHECK:   store i32 [[I_PHI]], i32* [[I_REGMEM]]
-; CHECK:   store i32 [[NUMEXCEPTIONS_PHI]], i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK:   invoke void @"\01?may_throw@@YAXXZ"()
-for.body:                                         ; preds = %entry, %try.cont
-  %NumExceptions.020 = phi i32 [ 0, %entry ], [ %NumExceptions.1, %try.cont ]
-  %i.019 = phi i32 [ 0, %entry ], [ %inc5, %try.cont ]
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-; CHECK: invoke.cont:                                      ; preds = %for.body
-; CHECK:   [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
-; CHECK:   [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK:   [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
-; CHECK:   [[A_RELOAD1:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK:   [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK:   br label %try.cont
-invoke.cont:                                      ; preds = %for.body
-  %1 = load i32, i32* %a, align 8, !tbaa !2
-  %add = add nsw i32 %1, %i.019
-  store i32 %add, i32* %a, align 8, !tbaa !2
-  br label %try.cont
-
-; CHECK: [[LPAD_LABEL:lpad[0-9]*]]:{{[ ]+}}; preds = %for.body
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %[[SPLIT_RECOVER_BB:.*]]]
-
-lpad:                                             ; preds = %for.body
-  %2 = landingpad { i8*, i32 }
-          catch i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)
-  %3 = extractvalue { i8*, i32 } %2, 1
-  %4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*)) #1
-  %matches = icmp eq i32 %3, %4
-  br i1 %matches, label %catch, label %eh.resume
-
-; CHECK-NOT: catch:
-
-catch:                                            ; preds = %lpad
-  %5 = extractvalue { i8*, i32 } %2, 0
-  %e.i8 = bitcast i32* %e to i8*
-  call void @llvm.eh.begincatch(i8* %5, i8* %e.i8) #1
-  %tmp8 = load i32, i32* %e, align 4, !tbaa !7
-  %idxprom = sext i32 %NumExceptions.020 to i64
-  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 %idxprom
-  store i32 %tmp8, i32* %arrayidx, align 4, !tbaa !7
-  %inc = add nsw i32 %NumExceptions.020, 1
-  %cmp1 = icmp eq i32 %tmp8, %i.019
-  br i1 %cmp1, label %if.then, label %if.else
-
-if.then:                                          ; preds = %catch
-  %tmp9 = load i32, i32* %b, align 4, !tbaa !8
-  %add2 = add nsw i32 %tmp9, %i.019
-  store i32 %add2, i32* %b, align 4, !tbaa !8
-  br label %if.end
-
-; CHECK-NOT: if.else:
-
-if.else:                                          ; preds = %catch
-  %tmp10 = load i32, i32* %a, align 8, !tbaa !2
-  %add4 = add nsw i32 %tmp10, %tmp8
-  store i32 %add4, i32* %a, align 8, !tbaa !2
-  br label %if.end
-
-; CHECK-NOT: if.end:
-; CHECK: [[SPLIT_RECOVER_BB]]:
-; CHECK: [[INC_RELOAD:\%.*]] = load i32, i32*
-; CHECK: br label %try.cont
-
-if.end:                                           ; preds = %if.else, %if.then
-  tail call void @llvm.eh.endcatch() #1
-  br label %try.cont
-
-; CHECK: try.cont:{{[ ]+}}; preds = %[[SPLIT_RECOVER_BB]], %invoke.cont
-; CHECK:   [[NUMEXCEPTIONS_PHI:\%.*]] = phi i32 [ [[NUMEXCEPTIONS_RELOAD]], %invoke.cont ], [ [[INC_RELOAD]], %[[SPLIT_RECOVER_BB]] ]
-; CHECK:   tail call void @"\01?does_not_throw@@YAXH@Z"(i32 [[NUMEXCEPTIONS_PHI]])
-; CHECK:   [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK:   [[INC:\%.+]] = add nuw nsw i32 [[I_RELOAD]], 1
-; CHECK:   [[CMP:\%.+]] = icmp slt i32 [[INC]], 10
-; CHECK:   br i1 [[CMP]], label %for.body, label %for.end
-
-try.cont:                                         ; preds = %if.end, %invoke.cont
-  %NumExceptions.1 = phi i32 [ %NumExceptions.020, %invoke.cont ], [ %inc, %if.end ]
-  tail call void @"\01?does_not_throw@@YAXH@Z"(i32 %NumExceptions.1)
-  %inc5 = add nuw nsw i32 %i.019, 1
-  %cmp = icmp slt i32 %inc5, 10
-  br i1 %cmp, label %for.body, label %for.end
-
-for.end:                                          ; preds = %try.cont
-  %NumExceptions.1.lcssa = phi i32 [ %NumExceptions.1, %try.cont ]
-  %arraydecay = getelementptr inbounds [10 x i32], [10 x i32]* %ExceptionVal, i64 0, i64 0
-  call void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32* %arraydecay, i32 %NumExceptions.1.lcssa, %struct.SomeData* dereferenceable(8) %tmpcast)
-  call void @llvm.lifetime.end(i64 40, i8* %0) #1
-  ret void
-
-eh.resume:                                        ; preds = %lpad
-  %.lcssa = phi { i8*, i32 } [ %2, %lpad ]
-  resume { i8*, i32 } %.lcssa
-}
-
-; The following catch handler should be outlined.
-; CHECK: define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*)
-; CHECK: entry:
-; CHECK:   [[RECOVER_E:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-; CHECK:   [[E_PTR:\%.+]] = bitcast i8* [[RECOVER_E]] to i32*
-; CHECK:   [[RECOVER_NUMEXCEPTIONS:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 1)
-; CHECK:   [[NUMEXCEPTIONS_REGMEM:\%.+]] = bitcast i8* [[RECOVER_NUMEXCEPTIONS]] to i32*
-; CHECK:   [[RECOVER_EXCEPTIONVAL:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-; CHECK:   [[EXCEPTIONVAL:\%.+]] = bitcast i8* [[RECOVER_EXCEPTIONVAL]] to [10 x i32]*
-; CHECK:   [[RECOVER_INC:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 3)
-; CHECK:   [[INC_REGMEM:\%.+]] = bitcast i8* [[RECOVER_INC]] to i32*
-; CHECK:   [[RECOVER_I:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 4)
-; CHECK:   [[I_REGMEM:\%.+]] = bitcast i8* [[RECOVER_I]] to i32*
-; CHECK:   [[RECOVER_A:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 5)
-; CHECK:   [[A_REGMEM:\%.+]] = bitcast i8* [[RECOVER_A]] to i32**
-; CHECK:   [[RECOVER_B:\%.+]] = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 6)
-; CHECK:   [[B_REGMEM:\%.+]] = bitcast i8* [[RECOVER_B]] to i32**
-; CHECK:   [[E_I8PTR:\%.+]] = bitcast i32* [[E_PTR]] to i8*
-; CHECK:   [[TMP:\%.+]] = load i32, i32* [[E_PTR]], align 4
-; CHECK:   [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK:   [[IDXPROM:\%.+]] = sext i32 [[NUMEXCEPTIONS_RELOAD]] to i64
-; CHECK:   [[ARRAYIDX:\%.+]] = getelementptr inbounds [10 x i32], [10 x i32]* [[EXCEPTIONVAL]], i64 0, i64 [[IDXPROM]]
-; CHECK:   store i32 [[TMP]], i32* [[ARRAYIDX]], align 4
-; CHECK:   [[NUMEXCEPTIONS_RELOAD:\%.+]] = load i32, i32* [[NUMEXCEPTIONS_REGMEM]]
-; CHECK:   [[INC:\%.+]] = add nsw i32 [[NUMEXCEPTIONS_RELOAD]], 1
-; CHECK:   [[CMP:\%.+]] = icmp eq i32 [[TMP]], [[I_RELOAD]]
-; CHECK:   br i1 [[CMP]], label %if.then, label %if.else
-;
-; CHECK: if.then:{{[ ]+}}; preds = %entry
-; CHECK:   [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
-; CHECK:   [[TMP1:\%.+]] = load i32, i32* [[B_RELOAD]], align 4
-; CHECK:   [[I_RELOAD:\%.+]] = load i32, i32* [[I_REGMEM]]
-; CHECK:   [[ADD:\%.+]] = add nsw i32 [[TMP1]], [[I_RELOAD]]
-; CHECK:   [[B_RELOAD:\%.+]] = load i32*, i32** [[B_REGMEM]]
-; CHECK:   store i32 [[ADD]], i32* [[B_RELOAD]], align 4
-; CHECK:   br label %if.end
-;
-; CHECK: if.else:{{[ ]+}}; preds = %entry
-; CHECK:   [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK:   [[TMP2:\%.+]] = load i32, i32* [[A_RELOAD]], align 8
-; CHECK:   [[ADD2:\%.+]] = add nsw i32 [[TMP2]], [[TMP]]
-; CHECK:   [[A_RELOAD:\%.+]] = load i32*, i32** [[A_REGMEM]]
-; CHECK:   store i32 [[ADD2]], i32* [[A_RELOAD]], align 8
-; CHECK:   br label %if.end
-;
-; CHECK: if.end:{{[ ]+}}; preds = %if.else, %if.then
-; CHECK:   ret i8* blockaddress(@"\01?test@@YAXXZ", %[[SPLIT_RECOVER_BB]])
-; CHECK: }
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #1
-
-declare void @"\01?may_throw@@YAXXZ"() #2
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-declare void @llvm.eh.begincatch(i8*, i8*)
-
-declare void @llvm.eh.endcatch()
-
-declare void @"\01?does_not_throw@@YAXH@Z"(i32) #2
-
-declare void @"\01?dump@@YAXPEAHHAEAUSomeData@@@Z"(i32*, i32, %struct.SomeData* dereferenceable(8)) #2
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.end(i64, i8* nocapture) #1
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 228868)"}
-!2 = !{!3, !4, i64 0}
-!3 = !{!"?AUSomeData@@", !4, i64 0, !4, i64 4}
-!4 = !{!"int", !5, i64 0}
-!5 = !{!"omnipotent char", !6, i64 0}
-!6 = !{!"Simple C/C++ TBAA"}
-!7 = !{!4, !4, i64 0}
-!8 = !{!3, !4, i64 4}
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
deleted file mode 100644
index 31b5e58562b2..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch-all.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; This test case is equivalent to:
-; extern "C" void may_throw();
-; extern "C" void test_catch_all() {
-;   try {
-;     may_throw();
-;   } catch (...) {
-;   }
-; }
-
-declare void @may_throw() #1
-declare i32 @__CxxFrameHandler3(...)
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-declare void @llvm.eh.endcatch() #2
-
-; Function Attrs: nounwind uwtable
-define void @test_catch_all() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  invoke void @may_throw()
-          to label %try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  tail call void @llvm.eh.begincatch(i8* %1, i8* null) #2
-  tail call void @llvm.eh.endcatch() #2
-  br label %try.cont
-
-try.cont:                                         ; preds = %entry, %lpad
-  ret void
-}
-
-; CHECK-LABEL: $handlerMap$0$test_catch_all:
-; CHECK:         .long   {{[0-9]+}}
-; CHECK:         .long   0
-; CHECK:         .long   0
-; CHECK:         .long   test_catch_all.catch@IMGREL
-; CHECK:         .long   .Ltest_catch_all.catch$parent_frame_offset
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll b/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
deleted file mode 100644
index 2d31a1d5cf4f..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch-reordered.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; Verify that we get the right frame escape label when the catch comes after the
-; parent function.
-
-; This test case is equivalent to:
-; int main() {
-;   try {
-;     throw 42;
-;   } catch (int e) {
-;     printf("e: %d\n", e);
-;   }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"e: %d\0A\00", comdat, align 1
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-; Function Attrs: uwtable
-define i32 @main() #1 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %tmp.i = alloca i32, align 4
-  %e = alloca i32, align 4
-  %0 = bitcast i32* %tmp.i to i8*
-  store i32 42, i32* %tmp.i, align 4, !tbaa !2
-  call void (...) @llvm.localescape(i32* %e)
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #6
-          to label %.noexc unwind label %lpad1
-
-.noexc:                                           ; preds = %entry
-  unreachable
-
-lpad1:                                            ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @main.catch)
-  indirectbr i8* %recover, [label %try.cont.split]
-
-try.cont.split:                                   ; preds = %lpad1
-  ret i32 0
-}
-
-; CHECK-LABEL: main:
-; CHECK:        .seh_handlerdata
-; CHECK:        .long   ($cppxdata$main)@IMGREL
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...) #4
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @llvm.lifetime.start(i64, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-define internal i8* @main.catch(i8*, i8*) #5 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %e.i8 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
-  %e = bitcast i8* %e.i8 to i32*
-  %2 = bitcast i32* %e to i8*
-  %3 = load i32, i32* %e, align 4, !tbaa !2
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06PNOAJMHG@e?3?5?$CFd?6?$AA@", i64 0, i64 0), i32 %3)
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret i8* blockaddress(@main, %try.cont.split)
-
-stub:                                             ; preds = %entry
-  %4 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-; CHECK-LABEL: main.catch:
-; CHECK:        .seh_handlerdata
-; CHECK:        .long   ($cppxdata$main)@IMGREL
-
-; CHECK: .align 4
-; CHECK-NEXT: $cppxdata$main:
-; CHECK-NEXT:         .long   429065506
-; CHECK-NEXT:         .long   2
-; CHECK-NEXT:         .long   ($stateUnwindMap$main)@IMGREL
-; CHECK-NEXT:         .long   1
-; CHECK-NEXT:         .long   ($tryMap$main)@IMGREL
-; CHECK-NEXT:         .long   3
-; CHECK-NEXT:         .long   ($ip2state$main)@IMGREL
-; CHECK-NEXT:         .long   40
-; CHECK-NEXT:         .long   0
-; CHECK-NEXT:         .long   1
-
-; Make sure we get the right frame escape label.
-
-; CHECK: $handlerMap$0$main:
-; CHECK-NEXT:         .long   0
-; CHECK-NEXT:         .long   "??_R0H@8"@IMGREL
-; CHECK-NEXT:         .long   .Lmain$frame_escape_0
-; CHECK-NEXT:         .long   main.catch@IMGREL
-; CHECK-NEXT:         .long   .Lmain.catch$parent_frame_offset
-
-; Function Attrs: nounwind readnone
-declare void @llvm.donothing() #2
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-attributes #0 = { noreturn uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="main" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #5 = { "wineh-parent"="main" }
-attributes #6 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 "}
-!2 = !{!3, !3, i64 0}
-!3 = !{!"int", !4, i64 0}
-!4 = !{!"omnipotent char", !5, i64 0}
-!5 = !{!"Simple C/C++ TBAA"}
-
diff --git a/test/CodeGen/WinEH/cppeh-prepared-catch.ll b/test/CodeGen/WinEH/cppeh-prepared-catch.ll
deleted file mode 100644
index a5d86dceea93..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-catch.ll
+++ /dev/null
@@ -1,232 +0,0 @@
-; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X64
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=CHECK --check-prefix=X86
-
-; This test case is equivalent to:
-; void f() {
-;   try {
-;     try {
-;       may_throw();
-;     } catch (int &) {
-;       may_throw();
-;     }
-;     may_throw();
-;   } catch (double) {
-;   }
-; }
-
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-
-$"\01??_R0N@8" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0N@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".N\00" }, comdat
-@llvm.eh.handlertype.N.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0N@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.8 = private unnamed_addr constant %eh.CatchHandlerType { i32 8, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-define internal i8* @"\01?f@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 0)
-  %bc2 = bitcast i8* %.i8 to i32**
-  %bc3 = bitcast i32** %bc2 to i8*
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %entry
-  ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont)
-
-lpad1:                                            ; preds = %entry
-  %lp4 = landingpad { i8*, i32 }
-          cleanup
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
-  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
-  indirectbr i8* %recover, [label %invoke.cont2]
-}
-
-; CHECK-LABEL: "?f@@YAXXZ.catch":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-; X64:        .seh_handlerdata
-; X64-NEXT:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
-
-
-define internal i8* @"\01?f@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?f@@YAXXZ" to i8*), i8* %1, i32 1)
-  %2 = bitcast i8* %.i8 to double*
-  %3 = bitcast double* %2 to i8*
-  invoke void () @llvm.donothing()
-          to label %done unwind label %lpad
-
-done:
-  ret i8* blockaddress(@"\01?f@@YAXXZ", %try.cont8)
-
-lpad:                                             ; preds = %entry
-  %4 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-; CHECK-LABEL: "?f@@YAXXZ.catch1":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-; X64: ".L?f@@YAXXZ.catch1$parent_frame_offset" = 16
-; X64:         movq    %rdx, 16(%rsp)
-; X64:        .seh_handlerdata
-; X64:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
-
-define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %0 = alloca i32*, align 8
-  %1 = alloca double, align 8
-  call void (...) @llvm.localescape(i32** %0, double* %1)
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad2
-
-invoke.cont:                                      ; preds = %entry
-  br label %try.cont
-
-lpad2:                                            ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.8
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
-  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.8 to i8*), i32 0, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
-  indirectbr i8* %recover, [label %try.cont, label %try.cont8]
-
-try.cont:                                         ; preds = %lpad2, %invoke.cont
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %try.cont8 unwind label %lpad1
-
-lpad1:
-  %3 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.N.0
-  %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.N.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch1")
-  indirectbr i8* %recover2, [label %try.cont8]
-
-try.cont8:                                        ; preds = %lpad2, %try.cont
-  ret void
-}
-
-; CHECK-LABEL: "?f@@YAXXZ":
-; No code should be generated for the indirectbr.
-; CHECK-NOT: jmp{{[ql]}} *
-
-; X64:             .seh_handlerdata
-; X64-NEXT:        .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
-; X86:             .section .xdata,"dr"
-
-; CHECK: .align 4
-
-; X64: "$cppxdata$?f@@YAXXZ":
-; X64-NEXT:          .long   429065506
-; X64-NEXT:          .long   4
-; X64-NEXT:          .long   ("$stateUnwindMap$?f@@YAXXZ")@IMGREL
-; X64-NEXT:          .long   2
-; X64-NEXT:          .long   ("$tryMap$?f@@YAXXZ")@IMGREL
-; X64-NEXT:          .long   6
-; X64-NEXT:          .long   ("$ip2state$?f@@YAXXZ")@IMGREL
-; X64-NEXT:          .long   32
-; X64-NEXT:          .long   0
-; X64-NEXT:          .long   1
-
-; X86: "L__ehtable$?f@@YAXXZ":
-; X86-NEXT:          .long   429065506
-; X86-NEXT:          .long   4
-; X86-NEXT:          .long   ("$stateUnwindMap$?f@@YAXXZ")
-; X86-NEXT:          .long   2
-; X86-NEXT:          .long   ("$tryMap$?f@@YAXXZ")
-; X86-NEXT:          .long   0
-; X86-NEXT:          .long   0
-; X86-NEXT:          .long   0
-; X86-NEXT:          .long   1
-
-
-; CHECK-NEXT:"$stateUnwindMap$?f@@YAXXZ":
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:"$tryMap$?f@@YAXXZ":
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   2
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   ("$handlerMap$0$?f@@YAXXZ")
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   2
-; CHECK-NEXT:        .long   3
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   ("$handlerMap$1$?f@@YAXXZ")
-; CHECK-NEXT:"$handlerMap$0$?f@@YAXXZ":
-; CHECK-NEXT:        .long   8
-; CHECK-NEXT:        .long   "??_R0H@8"
-; CHECK-NEXT:        .long   "{{.?}}L?f@@YAXXZ$frame_escape_0"
-; CHECK-NEXT:        .long   "?f@@YAXXZ.catch"
-; X64-NEXT:          .long   ".L?f@@YAXXZ.catch$parent_frame_offset"
-; CHECK-NEXT:"$handlerMap$1$?f@@YAXXZ":
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   "??_R0N@8"
-; CHECK-NEXT:        .long   "{{.?}}L?f@@YAXXZ$frame_escape_1"
-; CHECK-NEXT:        .long   "?f@@YAXXZ.catch1"
-; X64-NEXT:          .long   ".L?f@@YAXXZ.catch1$parent_frame_offset"
-
-; X64-NEXT:"$ip2state$?f@@YAXXZ":
-; X64-NEXT:        .long   .Lfunc_begin0
-; X64-NEXT:        .long   2
-; X64-NEXT:        .long   .Ltmp0
-; X64-NEXT:        .long   0
-; X64-NEXT:        .long   .Lfunc_begin1
-; X64-NEXT:        .long   3
-; X64-NEXT:        .long   .Lfunc_begin2
-; X64-NEXT:        .long   -1
-; X64-NEXT:        .long   .Ltmp13
-; X64-NEXT:        .long   1
-; X64-NEXT:        .long   .Ltmp16
-; X64-NEXT:        .long   0
-
-
-; X86: "___ehhandler$?f@@YAXXZ": # @"__ehhandler$?f@@YAXXZ"
-; X86: movl $"L__ehtable$?f@@YAXXZ", %eax
-; X86: jmp ___CxxFrameHandler3 # TAILCALL
-
-
-declare void @"\01?may_throw@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-declare void @llvm.donothing()
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?f@@YAXXZ" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { "wineh-parent"="?f@@YAXXZ" }
diff --git a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll b/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
deleted file mode 100644
index b5cfd65030ab..000000000000
--- a/test/CodeGen/WinEH/cppeh-prepared-cleanups.ll
+++ /dev/null
@@ -1,245 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-%struct.S = type { i8 }
-
-$"\01??_DS@@QEAA@XZ" = comdat any
-
-$"\01??_R0H@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-
-; CHECK-LABEL: "?test1@@YAXXZ":
-; CHECK:             .seh_handlerdata
-; CHECK-NEXT:        .long   ("$cppxdata$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT: .align 4
-; CHECK-NEXT:"$cppxdata$?test1@@YAXXZ":
-; CHECK-NEXT:        .long   429065506
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   ("$stateUnwindMap$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   2
-; CHECK-NEXT:        .long   ("$ip2state$?test1@@YAXXZ")@IMGREL
-; CHECK-NEXT:        .long   32
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:"$stateUnwindMap$?test1@@YAXXZ":
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   "?test1@@YAXXZ.cleanup"@IMGREL
-; CHECK-NEXT:"$ip2state$?test1@@YAXXZ":
-; CHECK-NEXT:        .long   .Lfunc_begin0@IMGREL
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   .Ltmp0@IMGREL
-; CHECK-NEXT:        .long   0
-
-define void @"\01?test1@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %unwindhelp = alloca i64
-  %tmp = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 0, i32* %tmp
-  %0 = bitcast i32* %tmp to i8*
-  call void (...) @llvm.localescape()
-  store volatile i64 -2, i64* %unwindhelp
-  %1 = bitcast i64* %unwindhelp to i8*
-  call void @llvm.eh.unwindhelp(i8* %1)
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #8
-          to label %unreachable unwind label %lpad1
-
-lpad1:                                            ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test1@@YAXXZ.cleanup")
-  indirectbr i8* %recover, []
-
-unreachable:                                      ; preds = %entry
-  unreachable
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind
-define linkonce_odr void @"\01??_DS@@QEAA@XZ"(%struct.S* %this) unnamed_addr #1 comdat align 2 {
-entry:
-  %this.addr = alloca %struct.S*, align 8
-  store %struct.S* %this, %struct.S** %this.addr, align 8
-  %this1 = load %struct.S*, %struct.S** %this.addr
-  call void @"\01??1S@@QEAA@XZ"(%struct.S* %this1) #4
-  ret void
-}
-
-; CHECK-LABEL: "?test2@@YAX_N@Z":
-; CHECK:             .seh_handlerdata
-; CHECK-NEXT:        .long   ("$cppxdata$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT: .align 4
-; CHECK-NEXT:"$cppxdata$?test2@@YAX_N@Z":
-; CHECK-NEXT:        .long   429065506
-; CHECK-NEXT:        .long   2
-; CHECK-NEXT:        .long   ("$stateUnwindMap$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   4
-; CHECK-NEXT:        .long   ("$ip2state$?test2@@YAX_N@Z")@IMGREL
-; CHECK-NEXT:        .long   40
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:"$stateUnwindMap$?test2@@YAX_N@Z":
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   "?test2@@YAX_N@Z.cleanup"@IMGREL
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   "?test2@@YAX_N@Z.cleanup1"@IMGREL
-; CHECK-NEXT:"$ip2state$?test2@@YAX_N@Z":
-; CHECK-NEXT:        .long   .Lfunc_begin1@IMGREL
-; CHECK-NEXT:        .long   -1
-; CHECK-NEXT:        .long   .Ltmp7@IMGREL
-; CHECK-NEXT:        .long   0
-; CHECK-NEXT:        .long   .Ltmp9@IMGREL
-; CHECK-NEXT:        .long   1
-; CHECK-NEXT:        .long   .Ltmp12@IMGREL
-; CHECK-NEXT:        .long   0
-
-define void @"\01?test2@@YAX_N@Z"(i1 zeroext %b) #2 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-  %b.addr = alloca i8, align 1
-  %s = alloca %struct.S, align 1
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %s1 = alloca %struct.S, align 1
-  %frombool = zext i1 %b to i8
-  store i8 %frombool, i8* %b.addr, align 1
-  call void (...) @llvm.localescape(%struct.S* %s, %struct.S* %s1)
-  call void @"\01?may_throw@@YAXXZ"()
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad1
-
-invoke.cont:                                      ; preds = %entry
-  %1 = load i8, i8* %b.addr, align 1
-  %tobool = trunc i8 %1 to i1
-  br i1 %tobool, label %if.then, label %if.else
-
-if.then:                                          ; preds = %invoke.cont
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont3 unwind label %lpad3
-
-invoke.cont3:                                     ; preds = %if.then
-  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
-  br label %if.end
-
-lpad1:                                            ; preds = %entry, %if.end
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
-  indirectbr i8* %recover, []
-
-lpad3:                                            ; preds = %if.then
-  %3 = landingpad { i8*, i32 }
-          cleanup
-  %recover4 = call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup1", i32 0, void (i8*, i8*)* @"\01?test2@@YAX_N@Z.cleanup")
-  indirectbr i8* %recover4, []
-
-if.else:                                          ; preds = %invoke.cont
-  call void @"\01?dont_throw@@YAXXZ"() #4
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %invoke.cont3
-  invoke void @"\01?may_throw@@YAXXZ"()
-          to label %invoke.cont4 unwind label %lpad1
-
-invoke.cont4:                                     ; preds = %if.end
-  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
-  ret void
-}
-
-declare void @"\01?may_throw@@YAXXZ"() #3
-
-; Function Attrs: nounwind
-declare void @"\01?dont_throw@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01??1S@@QEAA@XZ"(%struct.S*) #1
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #4
-
-define internal void @"\01?test1@@YAXXZ.cleanup"(i8*, i8*) #5 {
-entry:
-  %s = alloca %struct.S, align 1
-  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #4
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #6
-
-; Function Attrs: nounwind
-declare void @llvm.eh.unwindhelp(i8*) #4
-
-define internal void @"\01?test2@@YAX_N@Z.cleanup"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %s.i8 = call i8* @llvm.localrecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 0)
-  %s = bitcast i8* %s.i8 to %struct.S*
-  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s) #4
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret void
-
-stub:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  unreachable
-}
-
-define internal void @"\01?test2@@YAX_N@Z.cleanup1"(i8*, i8*) #7 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %s1.i8 = call i8* @llvm.localrecover(i8* bitcast (void (i1)* @"\01?test2@@YAX_N@Z" to i8*), i8* %1, i32 1)
-  %s1 = bitcast i8* %s1.i8 to %struct.S*
-  call void @"\01??_DS@@QEAA@XZ"(%struct.S* %s1) #4
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret void
-
-stub:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  unreachable
-}
-
-declare void @llvm.donothing()
-
-attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test1@@YAXXZ" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test2@@YAX_N@Z" }
-attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { nounwind }
-attributes #5 = { "wineh-parent"="?test1@@YAXXZ" }
-attributes #6 = { nounwind readnone }
-attributes #7 = { "wineh-parent"="?test2@@YAX_N@Z" }
-attributes #8 = { noreturn }
diff --git a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll b/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
deleted file mode 100644
index 87ccc9d9dedd..000000000000
--- a/test/CodeGen/WinEH/cppeh-shared-empty-catch.ll
+++ /dev/null
@@ -1,110 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following source, built with -O2
-;
-; void f() {
-;   try {
-;     g();
-;     try {
-;       throw;
-;     } catch (int) {
-;     }
-;   } catch (...) {
-;   }
-; }
-;
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-
-; CHECK-LABEL: define void @"\01?f@@YAXXZ"()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape()
-; CHECK:   invoke void @"\01?g@@YAXXZ"()
-
-; Function Attrs: nounwind
-define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  invoke void @"\01?g@@YAXXZ"()
-          to label %invoke.cont unwind label %lpad
-
-; CHECK-LABEL: invoke.cont:
-; CHECK:   invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
-; CHECK:           to label %unreachable unwind label %[[LPAD1_LABEL:lpad[0-9]+]]
-
-invoke.cont:                                      ; preds = %entry
-  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #4
-          to label %unreachable unwind label %lpad1
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  br label %catch2
-
-; Note: Even though this landing pad has two catch clauses, it only has one action because both
-;       handlers do the same thing.
-; CHECK: [[LPAD1_LABEL]]:
-; CHECK:   landingpad { i8*, i32 }
-; CHECK-NEXT:           catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-; CHECK-NEXT:           catch i8* null
-; CHECK-NEXT:   [[RECOVER:\%.+]] = call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?f@@YAXXZ.catch")
-; CHECK-NEXT:   indirectbr i8* [[RECOVER]], [label %try.cont4]
-
-lpad1:                                            ; preds = %invoke.cont
-  %2 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %3 = extractvalue { i8*, i32 } %2, 0
-  br label %catch2
-
-catch2:                                           ; preds = %lpad1, %lpad
-  %exn.slot.0 = phi i8* [ %3, %lpad1 ], [ %1, %lpad ]
-  tail call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null) #3
-  tail call void @llvm.eh.endcatch() #3
-  br label %try.cont4
-
-try.cont4:                                        ; preds = %catch, %catch2
-  ret void
-
-unreachable:                                      ; preds = %invoke.cont
-  unreachable
-
-; CHECK: }
-}
-
-declare void @"\01?g@@YAXXZ"() #1
-
-declare i32 @__CxxFrameHandler3(...)
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { noreturn }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 (trunk 235112) (llvm/trunk 235121)"}
diff --git a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll b/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
deleted file mode 100644
index 092135368158..000000000000
--- a/test/CodeGen/WinEH/cppeh-similar-catch-blocks.ll
+++ /dev/null
@@ -1,394 +0,0 @@
-; RUN: opt -mtriple=x86_64-pc-windows-msvc -winehprepare -S -o - < %s | FileCheck %s
-
-; This test is based on the following code:
-;
-; int main(void) {
-;   try {
-;     try {
-;       throw 'a';
-;     } catch (char c) {
-;       printf("%c\n", c);
-;     }
-;     throw 1;
-;   } catch(int x) {
-;     printf("%d\n", x);
-;   } catch(...) {
-;     printf("...\n");
-;   }
-;   try {
-;     try {
-;       throw 'b';
-;     } catch (char c) {
-;       printf("%c\n", c);
-;     }
-;     throw 2;
-;   } catch(int x) {
-;     printf("%d\n", x);
-;   } catch (char c) {
-;     printf("%c\n", c);
-;   } catch(...) {
-;     printf("...\n");
-;   }
-;   return 0;
-; }
-
-; This test is just checking for failures in processing the IR.
-; Extensive handler matching is not required.
-
-; ModuleID = 'cppeh-similar-catch-blocks.cpp'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0D@8" = comdat any
-
-$"_CT??_R0D@81" = comdat any
-
-$_CTA1D = comdat any
-
-$_TI1D = comdat any
-
-$"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = comdat any
-
-$"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
-@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
-@__ImageBase = external constant i8
-@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
-@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%c\0A\00", comdat, align 1
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@" = linkonce_odr unnamed_addr constant [5 x i8] c"...\0A\00", comdat, align 1
-@"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [4 x i8] c"%d\0A\00", comdat, align 1
-
-; This is just a minimal check to verify that main was handled by WinEHPrepare.
-; CHECK: define i32 @main()
-; CHECK: entry:
-; CHECK:   call void (...) @llvm.localescape(i32* [[X_PTR:\%.+]], i32* [[X2_PTR:\%.+]], i8* [[C2_PTR:\%.+]], i8* [[C3_PTR:\%.+]], i8* [[C_PTR:\%.+]])
-; CHECK:   invoke void @_CxxThrowException
-; CHECK: }
-
-; Function Attrs: uwtable
-define i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %retval = alloca i32, align 4
-  %tmp = alloca i8, align 1
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  %c = alloca i8, align 1
-  %tmp3 = alloca i32, align 4
-  %x = alloca i32, align 4
-  %tmp20 = alloca i8, align 1
-  %c28 = alloca i8, align 1
-  %tmp34 = alloca i32, align 4
-  %c48 = alloca i8, align 1
-  %x56 = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i8 97, i8* %tmp
-  invoke void @_CxxThrowException(i8* %tmp, %eh.ThrowInfo* @_TI1D) #4
-          to label %unreachable unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %catch.dispatch
-
-catch.dispatch:                                   ; preds = %lpad
-  %sel = load i32, i32* %ehselector.slot
-  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
-  %matches = icmp eq i32 %sel, %3
-  br i1 %matches, label %catch, label %catch.dispatch5
-
-catch:                                            ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn, i8* %c) #2
-  %4 = load i8, i8* %c, align 1
-  %conv = sext i8 %4 to i32
-  %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv)
-          to label %invoke.cont unwind label %lpad2
-
-invoke.cont:                                      ; preds = %catch
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont
-
-try.cont:                                         ; preds = %invoke.cont
-  store i32 1, i32* %tmp3
-  %5 = bitcast i32* %tmp3 to i8*
-  invoke void @_CxxThrowException(i8* %5, %eh.ThrowInfo* @_TI1H) #4
-          to label %unreachable unwind label %lpad4
-
-lpad2:                                            ; preds = %catch
-  %6 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %7 = extractvalue { i8*, i32 } %6, 0
-  store i8* %7, i8** %exn.slot
-  %8 = extractvalue { i8*, i32 } %6, 1
-  store i32 %8, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %catch.dispatch5
-
-lpad4:                                            ; preds = %try.cont
-  %9 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %10 = extractvalue { i8*, i32 } %9, 0
-  store i8* %10, i8** %exn.slot
-  %11 = extractvalue { i8*, i32 } %9, 1
-  store i32 %11, i32* %ehselector.slot
-  br label %catch.dispatch5
-
-catch.dispatch5:                                  ; preds = %lpad4, %lpad2, %catch.dispatch
-  %sel6 = load i32, i32* %ehselector.slot
-  %12 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
-  %matches7 = icmp eq i32 %sel6, %12
-  br i1 %matches7, label %catch13, label %catch8
-
-catch13:                                          ; preds = %catch.dispatch5
-  %exn14 = load i8*, i8** %exn.slot
-  %13 = bitcast i32* %x to i8*
-  call void @llvm.eh.begincatch(i8* %exn14, i8* %13) #2
-  %14 = load i32, i32* %x, align 4
-  %call18 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %14)
-          to label %invoke.cont17 unwind label %lpad16
-
-invoke.cont17:                                    ; preds = %catch13
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont19
-
-try.cont19:                                       ; preds = %invoke.cont17, %invoke.cont11
-  store i8 98, i8* %tmp20
-  invoke void @_CxxThrowException(i8* %tmp20, %eh.ThrowInfo* @_TI1D) #4
-          to label %unreachable unwind label %lpad21
-
-catch8:                                           ; preds = %catch.dispatch5
-  %exn9 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn9, i8* null) #2
-  %call12 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
-          to label %invoke.cont11 unwind label %lpad10
-
-invoke.cont11:                                    ; preds = %catch8
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont19
-
-lpad10:                                           ; preds = %catch8
-  %15 = landingpad { i8*, i32 }
-          cleanup
-  %16 = extractvalue { i8*, i32 } %15, 0
-  store i8* %16, i8** %exn.slot
-  %17 = extractvalue { i8*, i32 } %15, 1
-  store i32 %17, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-lpad16:                                           ; preds = %catch13
-  %18 = landingpad { i8*, i32 }
-          cleanup
-  %19 = extractvalue { i8*, i32 } %18, 0
-  store i8* %19, i8** %exn.slot
-  %20 = extractvalue { i8*, i32 } %18, 1
-  store i32 %20, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-lpad21:                                           ; preds = %try.cont19
-  %21 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-          catch i8* null
-  %22 = extractvalue { i8*, i32 } %21, 0
-  store i8* %22, i8** %exn.slot
-  %23 = extractvalue { i8*, i32 } %21, 1
-  store i32 %23, i32* %ehselector.slot
-  br label %catch.dispatch22
-
-catch.dispatch22:                                 ; preds = %lpad21
-  %sel23 = load i32, i32* %ehselector.slot
-  %24 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
-  %matches24 = icmp eq i32 %sel23, %24
-  br i1 %matches24, label %catch25, label %catch.dispatch36
-
-catch25:                                          ; preds = %catch.dispatch22
-  %exn26 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn26, i8* %c28) #2
-  %25 = load i8, i8* %c28, align 1
-  %conv29 = sext i8 %25 to i32
-  %call32 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv29)
-          to label %invoke.cont31 unwind label %lpad30
-
-invoke.cont31:                                    ; preds = %catch25
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont33
-
-try.cont33:                                       ; preds = %invoke.cont31
-  store i32 2, i32* %tmp34
-  %26 = bitcast i32* %tmp34 to i8*
-  invoke void @_CxxThrowException(i8* %26, %eh.ThrowInfo* @_TI1H) #4
-          to label %unreachable unwind label %lpad35
-
-lpad30:                                           ; preds = %catch25
-  %27 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
-          catch i8* null
-  %28 = extractvalue { i8*, i32 } %27, 0
-  store i8* %28, i8** %exn.slot
-  %29 = extractvalue { i8*, i32 } %27, 1
-  store i32 %29, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %catch.dispatch36
-
-lpad35:                                           ; preds = %try.cont33
-  %30 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)
-          catch i8* null
-  %31 = extractvalue { i8*, i32 } %30, 0
-  store i8* %31, i8** %exn.slot
-  %32 = extractvalue { i8*, i32 } %30, 1
-  store i32 %32, i32* %ehselector.slot
-  br label %catch.dispatch36
-
-catch.dispatch36:                                 ; preds = %lpad35, %lpad30, %catch.dispatch22
-  %sel37 = load i32, i32* %ehselector.slot
-  %33 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #2
-  %matches38 = icmp eq i32 %sel37, %33
-  br i1 %matches38, label %catch53, label %catch.fallthrough
-
-catch53:                                          ; preds = %catch.dispatch36
-  %exn54 = load i8*, i8** %exn.slot
-  %34 = bitcast i32* %x56 to i8*
-  call void @llvm.eh.begincatch(i8* %exn54, i8* %34) #2
-  %35 = load i32, i32* %x56, align 4
-  %call59 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PMGGPEJJ@?$CFd?6?$AA@", i32 0, i32 0), i32 %35)
-          to label %invoke.cont58 unwind label %lpad57
-
-invoke.cont58:                                    ; preds = %catch53
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont60
-
-try.cont60:                                       ; preds = %invoke.cont58, %invoke.cont51, %invoke.cont43
-  ret i32 0
-
-catch.fallthrough:                                ; preds = %catch.dispatch36
-  %36 = call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*)) #2
-  %matches39 = icmp eq i32 %sel37, %36
-  br i1 %matches39, label %catch45, label %catch40
-
-catch45:                                          ; preds = %catch.fallthrough
-  %exn46 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn46, i8* %c48) #2
-  %37 = load i8, i8* %c48, align 1
-  %conv49 = sext i8 %37 to i32
-  %call52 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01??_C@_03PJCJOCBM@?$CFc?6?$AA@", i32 0, i32 0), i32 %conv49)
-          to label %invoke.cont51 unwind label %lpad50
-
-invoke.cont51:                                    ; preds = %catch45
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont60
-
-catch40:                                          ; preds = %catch.fallthrough
-  %exn41 = load i8*, i8** %exn.slot
-  call void @llvm.eh.begincatch(i8* %exn41, i8* null) #2
-  %call44 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01??_C@_04MPPNMCOK@?4?4?4?6?$AA@", i32 0, i32 0))
-          to label %invoke.cont43 unwind label %lpad42
-
-invoke.cont43:                                    ; preds = %catch40
-  call void @llvm.eh.endcatch() #2
-  br label %try.cont60
-
-lpad42:                                           ; preds = %catch40
-  %38 = landingpad { i8*, i32 }
-          cleanup
-  %39 = extractvalue { i8*, i32 } %38, 0
-  store i8* %39, i8** %exn.slot
-  %40 = extractvalue { i8*, i32 } %38, 1
-  store i32 %40, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-lpad50:                                           ; preds = %catch45
-  %41 = landingpad { i8*, i32 }
-          cleanup
-  %42 = extractvalue { i8*, i32 } %41, 0
-  store i8* %42, i8** %exn.slot
-  %43 = extractvalue { i8*, i32 } %41, 1
-  store i32 %43, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-lpad57:                                           ; preds = %catch53
-  %44 = landingpad { i8*, i32 }
-          cleanup
-  %45 = extractvalue { i8*, i32 } %44, 0
-  store i8* %45, i8** %exn.slot
-  %46 = extractvalue { i8*, i32 } %44, 1
-  store i32 %46, i32* %ehselector.slot
-  call void @llvm.eh.endcatch() #2
-  br label %eh.resume
-
-eh.resume:                                        ; preds = %lpad57, %lpad50, %lpad42, %lpad16, %lpad10
-  %exn61 = load i8*, i8** %exn.slot
-  %sel62 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn61, 0
-  %lpad.val63 = insertvalue { i8*, i32 } %lpad.val, i32 %sel62, 1
-  resume { i8*, i32 } %lpad.val63
-
-unreachable:                                      ; preds = %try.cont33, %try.cont19, %try.cont, %entry
-  unreachable
-}
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #2
-
-declare i32 @printf(i8*, ...) #3
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #2
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 235214) (llvm/trunk 235213)"}
diff --git a/test/CodeGen/WinEH/cppeh-state-calc-1.ll b/test/CodeGen/WinEH/cppeh-state-calc-1.ll
deleted file mode 100644
index abc5d5292cf7..000000000000
--- a/test/CodeGen/WinEH/cppeh-state-calc-1.ll
+++ /dev/null
@@ -1,289 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; This test was generated from the following code.
-;
-; void test() {
-;   try {
-;     try {
-;       try {
-;         two();
-;         throw 2;
-;       } catch (int x) {
-;         catch_two();
-;       }
-;       a();
-;       throw 'a';
-;     } catch (char c) {
-;       catch_a();
-;     }
-;     one();
-;     throw 1;
-;   } catch(int x) { 
-;     catch_one();
-;   } catch(...) {
-;     catch_all();
-;   }
-; }
-;
-; The function calls before the throws were declared as 'noexcept' and are
-; just here to make blocks easier to identify in the IR.
-
-; ModuleID = '<stdin>'
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
-%eh.CatchHandlerType = type { i32, i8* }
-%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
-%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
-%eh.ThrowInfo = type { i32, i32, i32, i32 }
-
-$"\01??_R0H@8" = comdat any
-
-$"\01??_R0D@8" = comdat any
-
-$"_CT??_R0H@84" = comdat any
-
-$_CTA1H = comdat any
-
-$_TI1H = comdat any
-
-$"_CT??_R0D@81" = comdat any
-
-$_CTA1D = comdat any
-
-$_TI1D = comdat any
-
-@"\01??_7type_info@@6B@" = external constant i8*
-@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
-@llvm.eh.handlertype.H.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*) }, section "llvm.metadata"
-@"\01??_R0D@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".D\00" }, comdat
-@llvm.eh.handlertype.D.0 = private unnamed_addr constant %eh.CatchHandlerType { i32 0, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i8*) }, section "llvm.metadata"
-@__ImageBase = external constant i8
-@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
-@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-@"_CT??_R0D@81" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0D@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 1, i32 0 }, section ".xdata", comdat
-@_CTA1D = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0D@81" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
-@_TI1D = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1D to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
-
-; Function Attrs: nounwind uwtable
-define void @"\01?test@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %tmp = alloca i32, align 4
-  %x = alloca i32, align 4
-  %tmp2 = alloca i8, align 1
-  %c = alloca i8, align 1
-  %tmp11 = alloca i32, align 4
-  %x21 = alloca i32, align 4
-  call void @"\01?two@@YAXXZ"() #3
-  store i32 2, i32* %tmp
-  %0 = bitcast i32* %tmp to i8*
-  call void (...) @llvm.localescape(i32* %x, i8* %c, i32* %x21)
-  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #5
-          to label %unreachable unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 0, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
-  indirectbr i8* %recover, [label %try.cont, label %try.cont10, label %try.cont22]
-
-try.cont:                                         ; preds = %lpad
-  call void @"\01?a@@YAXXZ"() #3
-  store i8 97, i8* %tmp2
-  invoke void @_CxxThrowException(i8* %tmp2, %eh.ThrowInfo* @_TI1D) #5
-          to label %unreachable unwind label %lpad3
-
-lpad3:                                            ; preds = %try.cont
-  %2 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.D.0
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %recover1 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.D.0 to i8*), i32 1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch1", i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
-  indirectbr i8* %recover1, [label %try.cont10, label %try.cont22]
-
-try.cont10:                                       ; preds = %lpad3, %lpad
-  call void @"\01?one@@YAXXZ"() #3
-  store i32 1, i32* %tmp11
-  %3 = bitcast i32* %tmp11 to i8*
-  invoke void @_CxxThrowException(i8* %3, %eh.ThrowInfo* @_TI1H) #5
-          to label %unreachable unwind label %lpad12
-
-lpad12:                                           ; preds = %try.cont10
-  %4 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-          catch i8* null
-  %recover2 = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*), i32 2, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch2", i32 1, i8* null, i32 -1, i8* (i8*, i8*)* @"\01?test@@YAXXZ.catch3")
-  indirectbr i8* %recover2, [label %try.cont22]
-
-try.cont22:                                       ; preds = %lpad12, %lpad3, %lpad
-  ret void
-
-unreachable:                                      ; preds = %try.cont10, %try.cont, %entry
-  unreachable
-}
-
-; Function Attrs: nounwind
-declare void @"\01?two@@YAXXZ"() #1
-
-declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
-
-declare i32 @__CxxFrameHandler3(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #2
-
-; Function Attrs: nounwind
-declare void @llvm.eh.begincatch(i8* nocapture, i8* nocapture) #3
-
-; Function Attrs: nounwind
-declare void @"\01?catch_two@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @llvm.eh.endcatch() #3
-
-; Function Attrs: nounwind
-declare void @"\01?a@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_a@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?one@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_all@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare void @"\01?catch_one@@YAXXZ"() #1
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #3
-
-define internal i8* @"\01?test@@YAXXZ.catch"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %x.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 0)
-  %x = bitcast i8* %x.i8 to i32*
-  %2 = bitcast i32* %x to i8*
-  call void @"\01?catch_two@@YAXXZ"() #3
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont)
-
-stub:                                             ; preds = %entry
-  %3 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-; Function Attrs: nounwind readnone
-declare void @llvm.donothing() #2
-
-define internal i8* @"\01?test@@YAXXZ.catch1"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  call void @"\01?catch_a@@YAXXZ"() #3
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont10)
-
-stub:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-define internal i8* @"\01?test@@YAXXZ.catch2"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  %x21.i8 = call i8* @llvm.localrecover(i8* bitcast (void ()* @"\01?test@@YAXXZ" to i8*), i8* %1, i32 2)
-  %x21 = bitcast i8* %x21.i8 to i32*
-  %2 = bitcast i32* %x21 to i8*
-  call void @"\01?catch_one@@YAXXZ"() #3
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
-
-stub:                                             ; preds = %entry
-  %3 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-define internal i8* @"\01?test@@YAXXZ.catch3"(i8*, i8*) #4 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-entry:
-  call void @"\01?catch_all@@YAXXZ"() #3
-  invoke void @llvm.donothing()
-          to label %entry.split unwind label %stub
-
-entry.split:                                      ; preds = %entry
-  ret i8* blockaddress(@"\01?test@@YAXXZ", %try.cont22)
-
-stub:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %recover = call i8* (...) @llvm.eh.actions()
-  unreachable
-}
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #3
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #2
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="?test@@YAXXZ" }
-attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-attributes #4 = { "wineh-parent"="?test@@YAXXZ" }
-attributes #5 = { noreturn }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 (trunk 236059)"}
-
-; CHECK-LABEL: "$cppxdata$?test@@YAXXZ":
-; CHECK-NEXT: 	.long	429065506
-; CHECK-NEXT: 	.long
-; CHECK-NEXT: 	.long	("$stateUnwindMap$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: 	.long
-; CHECK-NEXT: 	.long	("$tryMap$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: 	.long
-; CHECK-NEXT: 	.long	("$ip2state$?test@@YAXXZ")@IMGREL
-; CHECK-NEXT: 	.long	40
-; CHECK-NEXT: 	.long	0
-; CHECK-NEXT: 	.long	1
-; CHECK: "$stateUnwindMap$?test@@YAXXZ":
-; CHECK: "$tryMap$?test@@YAXXZ":
-; CHECK: "$handlerMap$0$?test@@YAXXZ":
-; CHECK: "$ip2state$?test@@YAXXZ":
-; CHECK-NEXT: 	.long	.Lfunc_begin0@IMGREL
-; CHECK-NEXT: 	.long	-1
-; CHECK-NEXT: 	.long	.Ltmp0@IMGREL
-; CHECK-NEXT: 	.long	2
-; CHECK-NEXT: 	.long	.Ltmp3@IMGREL
-; CHECK-NEXT: 	.long	1
-; CHECK-NEXT: 	.long	.Ltmp6@IMGREL
-; CHECK-NEXT: 	.long	0
-; CHECK-NEXT: 	.long	.Lfunc_begin1@IMGREL
-; CHECK-NEXT: 	.long	3
-; CHECK-NEXT: 	.long	.Lfunc_begin2@IMGREL
-; CHECK-NEXT: 	.long	4
-; CHECK-NEXT: 	.long	.Lfunc_begin3@IMGREL
-; CHECK-NEXT: 	.long	5
-; CHECK-NEXT: 	.long	.Lfunc_begin4@IMGREL
-; CHECK-NEXT: 	.long	6
diff --git a/test/CodeGen/WinEH/seh-catch-all.ll b/test/CodeGen/WinEH/seh-catch-all.ll
deleted file mode 100644
index 5ac2295a5b41..000000000000
--- a/test/CodeGen/WinEH/seh-catch-all.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@str.__except = internal unnamed_addr constant [9 x i8] c"__except\00", align 1
-
-; Function Attrs: uwtable
-
-declare i32 @puts(i8*)
-
-define void @may_crash() {
-entry:
-  store volatile i32 42, i32* null, align 4
-  ret void
-}
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.frameaddress(i32)
-
-; Function Attrs: uwtable
-define void @seh_catch_all() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  invoke void @may_crash()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  br label %__try.cont
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  store i8* %1, i8** %exn.slot
-  %2 = extractvalue { i8*, i32 } %0, 1
-  store i32 %2, i32* %ehselector.slot
-  br label %__except
-
-__except:                                         ; preds = %lpad
-  %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @str.__except, i32 0, i32 0))
-  br label %__try.cont
-
-__try.cont:                                       ; preds = %__except, %invoke.cont
-  ret void
-}
-
-; CHECK-LABEL: define void @seh_catch_all()
-; CHECK: landingpad
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@seh_catch_all, %lpad.split))
-; CHECK-NEXT: indirectbr
-;
-; CHECK: lpad.split:
-; CHECK-NOT: extractvalue
-; CHECK: call i32 @puts
diff --git a/test/CodeGen/WinEH/seh-exception-code.ll b/test/CodeGen/WinEH/seh-exception-code.ll
deleted file mode 100644
index 2998e7982133..000000000000
--- a/test/CodeGen/WinEH/seh-exception-code.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-; WinEHPrepare was crashing during phi demotion.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc18.0.0"
-
-declare i32 @__C_specific_handler(...)
-
-@str = linkonce_odr unnamed_addr constant [16 x i8] c"caught it! %lx\0A\00", align 1
-
-; Function Attrs: nounwind uwtable
-declare void @maycrash()
-
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...)
-
-; Function Attrs: nounwind uwtable
-define void @doit() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  invoke void @maycrash()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  invoke void @maycrash()
-          to label %__try.cont unwind label %lpad.1
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  br label %__except
-
-lpad.1:                                           ; preds = %invoke.cont, %lpad
-  %2 = landingpad { i8*, i32 }
-          catch i8* null
-  %3 = extractvalue { i8*, i32 } %2, 0
-  br label %__except
-
-__except:                                         ; preds = %lpad, %lpad.1
-  %exn.slot.0 = phi i8* [ %3, %lpad.1 ], [ %1, %lpad ]
-  %4 = ptrtoint i8* %exn.slot.0 to i64
-  %5 = trunc i64 %4 to i32
-  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str, i64 0, i64 0), i32 %5)
-  br label %__try.cont
-
-__try.cont:                                       ; preds = %invoke.cont, %__except
-  ret void
-}
-
-; CHECK-LABEL: define void @doit()
-; CHECK: landingpad
-; CHECK: indirectbr i8* %{{[^,]*}}, [label %[[except_split1:.*]]]
-; CHECK: [[except_split1]]:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: br label %__except
-;
-; CHECK: landingpad
-; CHECK: indirectbr i8* %{{[^,]*}}, [label %[[except_split2:.*]]]
-; CHECK: [[except_split2]]:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: br label %__except
-;
-; CHECK: __except:
-; CHECK: phi
-; CHECK: call i32 (i8*, ...) @printf
diff --git a/test/CodeGen/WinEH/seh-exception-code2.ll b/test/CodeGen/WinEH/seh-exception-code2.ll
deleted file mode 100644
index 0356956502c0..000000000000
--- a/test/CodeGen/WinEH/seh-exception-code2.ll
+++ /dev/null
@@ -1,91 +0,0 @@
-; RUN: opt -winehprepare -S < %s | FileCheck %s
-
-; WinEHPrepare was crashing during phi demotion.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc18.0.0"
-
-declare i32 @__C_specific_handler(...)
-
-@str = linkonce_odr unnamed_addr constant [16 x i8] c"caught it! %lx\0A\00", align 1
-
-declare void @maycrash()
-declare void @finally(i1 %abnormal)
-declare i32 @printf(i8* nocapture readonly, ...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-; Function Attrs: nounwind uwtable
-define void @doit() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  invoke void @maycrash()
-          to label %invoke.cont unwind label %lpad.1
-
-invoke.cont:                                      ; preds = %entry
-  invoke void @maycrash()
-          to label %__try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %lp0 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*)
-  %ehptr.0 = extractvalue { i8*, i32 } %lp0, 0
-  %ehsel.0 = extractvalue { i8*, i32 } %lp0, 1
-  call void @finally(i1 true)
-  br label %ehdispatch
-
-lpad.1:                                           ; preds = %invoke.cont, %lpad
-  %lp1 = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*)
-  %ehptr.1 = extractvalue { i8*, i32 } %lp1, 0
-  %ehsel.1 = extractvalue { i8*, i32 } %lp1, 1
-  br label %ehdispatch
-
-ehdispatch:
-  %ehptr.2 = phi i8* [ %ehptr.0, %lpad ], [ %ehptr.1, %lpad.1 ]
-  %ehsel.2 = phi i32 [ %ehsel.0, %lpad ], [ %ehsel.1, %lpad.1 ]
-  %mysel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@doit@@" to i8*))
-  %matches = icmp eq i32 %ehsel.2, %mysel
-  br i1 %matches, label %__except, label %eh.resume
-
-__except:                                         ; preds = %lpad, %lpad.1
-  %t4 = ptrtoint i8* %ehptr.2 to i64
-  %t5 = trunc i64 %t4 to i32
-  %call = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @str, i64 0, i64 0), i32 %t5)
-  br label %__try.cont
-
-__try.cont:                                       ; preds = %invoke.cont, %__except
-  call void @finally(i1 false)
-  ret void
-
-eh.resume:
-  %ehvals0 = insertvalue { i8*, i32 } undef, i8* %ehptr.2, 0
-  %ehvals = insertvalue { i8*, i32 } %ehvals0, i32 %ehsel.2, 1
-  resume { i8*, i32 } %ehvals
-}
-
-define internal i32 @"\01?filt$0@0@doit@@"(i8* %exception_pointers, i8* %frame_pointer) #1 {
-entry:
-  %0 = bitcast i8* %exception_pointers to { i32*, i8* }*
-  %1 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %0, i32 0, i32 0
-  %2 = load i32*, i32** %1
-  %3 = load i32, i32* %2
-  %cmp = icmp eq i32 %3, -1073741819
-  %4 = zext i1 %cmp to i32
-  ret i32 %4
-}
-
-; CHECK-LABEL: define void @doit()
-; CHECK: %lp0 = landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i8*
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-; CHECK-NEXT: indirectbr i8* %{{[^,]*}}, [label %__except]
-;
-; CHECK: %lp1 = landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8*
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-; CHECK-NEXT: indirectbr i8* %{{[^,]*}}, [label %__except]
-;
-; CHECK: __except:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: call i32 (i8*, ...) @printf
diff --git a/test/CodeGen/WinEH/seh-inlined-finally.ll b/test/CodeGen/WinEH/seh-inlined-finally.ll
deleted file mode 100644
index 157adf0c8183..000000000000
--- a/test/CodeGen/WinEH/seh-inlined-finally.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-; Check that things work when the mid-level optimizer inlines the finally
-; block.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-%struct._RTL_CRITICAL_SECTION = type { %struct._RTL_CRITICAL_SECTION_DEBUG*, i32, i32, i8*, i8*, i64 }
-%struct._RTL_CRITICAL_SECTION_DEBUG = type { i16, i16, %struct._RTL_CRITICAL_SECTION*, %struct._LIST_ENTRY, i32, i32, i32, i16, i16 }
-%struct._LIST_ENTRY = type { %struct._LIST_ENTRY*, %struct._LIST_ENTRY* }
-
-declare i32 @puts(i8*)
-declare void @may_crash()
-declare i32 @__C_specific_handler(...)
-declare i8* @llvm.localrecover(i8*, i8*, i32) #1
-declare i8* @llvm.localaddress()
-declare void @llvm.localescape(...)
-declare dllimport void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION*)
-declare dllimport void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION*)
-
-define void @use_finally() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  invoke void @may_crash()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  %call.i = tail call i32 @puts(i8* null)
-  ret void
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          cleanup
-  %call.i2 = tail call i32 @puts(i8* null)
-  resume { i8*, i32 } %0
-}
-
-; CHECK-LABEL: define void @use_finally()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @use_finally.cleanup)
-; CHECK-NEXT: indirectbr i8* %recover, []
-
-; Function Attrs: nounwind uwtable
-define i32 @call_may_crash_locked() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  %p = alloca %struct._RTL_CRITICAL_SECTION, align 8
-  call void (...) @llvm.localescape(%struct._RTL_CRITICAL_SECTION* %p)
-  call void @EnterCriticalSection(%struct._RTL_CRITICAL_SECTION* %p)
-  invoke void @may_crash()
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  %tmp2 = call i8* @llvm.localaddress()
-  %tmp3 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp2, i32 0) #2
-  %tmp6 = bitcast i8* %tmp3 to %struct._RTL_CRITICAL_SECTION*
-  call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp6)
-  ret i32 42
-
-lpad:                                             ; preds = %entry
-  %tmp7 = landingpad { i8*, i32 }
-            cleanup
-  %tmp8 = call i8* @llvm.localaddress()
-  %tmp9 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %tmp8, i32 0)
-  %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
-  call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
-  resume { i8*, i32 } %tmp7
-}
-
-; CHECK-LABEL: define i32 @call_may_crash_locked()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @call_may_crash_locked.cleanup)
-; CHECK-NEXT: indirectbr i8* %recover, []
-
-; CHECK-LABEL: define internal void @call_may_crash_locked.cleanup(i8*, i8*)
-; CHECK: %tmp9 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @call_may_crash_locked to i8*), i8* %1, i32 0)
-; CHECK: %tmp12 = bitcast i8* %tmp9 to %struct._RTL_CRITICAL_SECTION*
-; CHECK: call void @LeaveCriticalSection(%struct._RTL_CRITICAL_SECTION* %tmp12)
diff --git a/test/CodeGen/WinEH/seh-outlined-finally-win32.ll b/test/CodeGen/WinEH/seh-outlined-finally-win32.ll
deleted file mode 100644
index 3649433c4b61..000000000000
--- a/test/CodeGen/WinEH/seh-outlined-finally-win32.ll
+++ /dev/null
@@ -1,172 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-; Test case based on this code:
-;
-; extern "C" int _abnormal_termination();
-; #pragma intrinsic(_abnormal_termination)
-; extern "C" int printf(const char *, ...);
-; extern "C" void may_crash() {
-;   *(volatile int *)0 = 42;
-; }
-; int main() {
-;   int myres = 0;
-;   __try {
-;     __try {
-;       may_crash();
-;     } __finally {
-;       printf("inner finally %d\n", _abnormal_termination());
-;       may_crash();
-;     }
-;   } __finally {
-;     printf("outer finally %d\n", _abnormal_termination());
-;   }
-; }
-;
-; Note that if the inner finally crashes, the outer finally still runs. There
-; is nothing like a std::terminate call in this situation.
-
-target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "i686-pc-windows-msvc"
-
-$"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@" = comdat any
-
-$"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@" = comdat any
-
-@"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", comdat, align 1
-@"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@" = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", comdat, align 1
-
-; Function Attrs: nounwind
-define void @may_crash() #0 {
-entry:
-  store volatile i32 42, i32* null, align 4
-  ret void
-}
-
-; Function Attrs: nounwind
-define i32 @main() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
-entry:
-  %myres = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 0, i32* %myres, align 4
-  invoke void @may_crash() #4
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  %0 = call i8* @llvm.frameaddress(i32 0)
-  invoke void @"\01?fin$1@0@main@@"(i8 zeroext 0, i8* %0) #4
-          to label %invoke.cont.2 unwind label %lpad.1
-
-invoke.cont.2:                                    ; preds = %invoke.cont
-  %1 = call i8* @llvm.frameaddress(i32 0)
-  call void @"\01?fin$0@0@main@@"(i8 zeroext 0, i8* %1)
-  ret i32 0
-
-lpad:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %3 = extractvalue { i8*, i32 } %2, 0
-  store i8* %3, i8** %exn.slot
-  %4 = extractvalue { i8*, i32 } %2, 1
-  store i32 %4, i32* %ehselector.slot
-  %5 = call i8* @llvm.frameaddress(i32 0)
-  invoke void @"\01?fin$1@0@main@@"(i8 zeroext 1, i8* %5) #4
-          to label %invoke.cont.3 unwind label %lpad.1
-
-lpad.1:                                           ; preds = %lpad, %invoke.cont
-  %6 = landingpad { i8*, i32 }
-          cleanup
-  %7 = extractvalue { i8*, i32 } %6, 0
-  store i8* %7, i8** %exn.slot
-  %8 = extractvalue { i8*, i32 } %6, 1
-  store i32 %8, i32* %ehselector.slot
-  br label %ehcleanup
-
-invoke.cont.3:                                    ; preds = %lpad
-  br label %ehcleanup
-
-ehcleanup:                                        ; preds = %invoke.cont.3, %lpad.1
-  %9 = call i8* @llvm.frameaddress(i32 0)
-  call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %9)
-  br label %eh.resume
-
-eh.resume:                                        ; preds = %ehcleanup
-  %exn = load i8*, i8** %exn.slot
-  %sel = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
-  %lpad.val.4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
-  resume { i8*, i32 } %lpad.val.4
-}
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ()* @main.cleanup)
-; CHECK-NEXT: indirectbr
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ()* @main.cleanup.1)
-; CHECK-NEXT: indirectbr
-
-; CHECK-LABEL: define internal void @main.cleanup()
-; CHECK: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %{{.*}})
-; CHECK: call void @"\01?fin$1@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-; CHECK: call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-
-; CHECK-LABEL: define internal void @main.cleanup.1()
-; CHECK: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %{{.*}})
-; CHECK: call void @"\01?fin$0@0@main@@"(i8 zeroext 1, i8* %{{.*}})
-
-; Function Attrs: noinline nounwind
-define internal void @"\01?fin$0@0@main@@"(i8 zeroext %abnormal_termination, i8* %frame_pointer) #1 {
-entry:
-  %frame_pointer.addr = alloca i8*, align 4
-  %abnormal_termination.addr = alloca i8, align 1
-  %0 = call i8* @llvm.frameaddress(i32 1)
-  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
-  store i8* %frame_pointer, i8** %frame_pointer.addr, align 4
-  store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
-  %2 = zext i8 %abnormal_termination to i32
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"\01??_C@_0BC@LHHILCPN@outer?5finally?5?$CFd?6?$AA@", i32 0, i32 0), i32 %2)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.frameaddress(i32) #2
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.x86.seh.recoverfp(i8*, i8*) #2
-
-declare i32 @printf(i8*, ...) #3
-
-; Function Attrs: noinline nounwind
-define internal void @"\01?fin$1@0@main@@"(i8 zeroext %abnormal_termination, i8* %frame_pointer) #1 {
-entry:
-  %frame_pointer.addr = alloca i8*, align 4
-  %abnormal_termination.addr = alloca i8, align 1
-  %0 = call i8* @llvm.frameaddress(i32 1)
-  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
-  store i8* %frame_pointer, i8** %frame_pointer.addr, align 4
-  store i8 %abnormal_termination, i8* %abnormal_termination.addr, align 1
-  %2 = zext i8 %abnormal_termination to i32
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @"\01??_C@_0BC@JELAHKN@inner?5finally?5?$CFd?6?$AA@", i32 0, i32 0), i32 %2)
-  call void @may_crash()
-  ret void
-}
-
-declare i32 @_except_handler3(...)
-
-attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind readnone }
-attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #4 = { noinline }
-
-!llvm.ident = !{!0}
-
-!0 = !{!"clang version 3.7.0 "}
diff --git a/test/CodeGen/WinEH/seh-outlined-finally.ll b/test/CodeGen/WinEH/seh-outlined-finally.ll
deleted file mode 100644
index 529f85b9602b..000000000000
--- a/test/CodeGen/WinEH/seh-outlined-finally.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s | FileCheck %s
-
-; Test case based on this code:
-;
-; extern "C" int _abnormal_termination();
-; #pragma intrinsic(_abnormal_termination)
-; extern "C" int printf(const char *, ...);
-; extern "C" void may_crash() {
-;   *(volatile int *)0 = 42;
-; }
-; int main() {
-;   int myres = 0;
-;   __try {
-;     __try {
-;       may_crash();
-;     } __finally {
-;       printf("inner finally %d\n", _abnormal_termination());
-;       may_crash();
-;     }
-;   } __finally {
-;     printf("outer finally %d\n", _abnormal_termination());
-;   }
-; }
-;
-; Note that if the inner finally crashes, the outer finally still runs. There
-; is nothing like a std::terminate call in this situation.
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-@str_outer_finally = linkonce_odr unnamed_addr constant [18 x i8] c"outer finally %d\0A\00", align 1
-@str_inner_finally = linkonce_odr unnamed_addr constant [18 x i8] c"inner finally %d\0A\00", align 1
-
-; Function Attrs: nounwind uwtable
-define void @may_crash() #0 {
-entry:
-  store volatile i32 42, i32* null, align 4
-  ret void
-}
-
-; Function Attrs: uwtable
-define i32 @main() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  %myres = alloca i32, align 4
-  %exn.slot = alloca i8*
-  %ehselector.slot = alloca i32
-  store i32 0, i32* %myres, align 4
-  invoke void @may_crash() #4
-          to label %invoke.cont unwind label %lpad
-
-invoke.cont:                                      ; preds = %entry
-  %0 = call i8* @llvm.localaddress()
-  invoke void @"\01?fin$1@0@main@@"(i1 zeroext false, i8* %0) #4
-          to label %invoke.cont2 unwind label %lpad1
-
-invoke.cont2:                                     ; preds = %invoke.cont
-  %1 = call i8* @llvm.localaddress()
-  call void @"\01?fin$0@0@main@@"(i1 zeroext false, i8* %1)
-  ret i32 0
-
-lpad:                                             ; preds = %entry
-  %2 = landingpad { i8*, i32 }
-          cleanup
-  %3 = extractvalue { i8*, i32 } %2, 0
-  store i8* %3, i8** %exn.slot
-  %4 = extractvalue { i8*, i32 } %2, 1
-  store i32 %4, i32* %ehselector.slot
-  %5 = call i8* @llvm.localaddress()
-  invoke void @"\01?fin$1@0@main@@"(i1 zeroext true, i8* %5) #4
-          to label %invoke.cont3 unwind label %lpad1
-
-lpad1:                                            ; preds = %lpad, %invoke.cont
-  %6 = landingpad { i8*, i32 }
-          cleanup
-  %7 = extractvalue { i8*, i32 } %6, 0
-  store i8* %7, i8** %exn.slot
-  %8 = extractvalue { i8*, i32 } %6, 1
-  store i32 %8, i32* %ehselector.slot
-  br label %ehcleanup
-
-invoke.cont3:                                     ; preds = %lpad
-  br label %ehcleanup
-
-ehcleanup:                                        ; preds = %invoke.cont3, %lpad1
-  %9 = call i8* @llvm.localaddress()
-  call void @"\01?fin$0@0@main@@"(i1 zeroext true, i8* %9)
-  br label %eh.resume
-
-eh.resume:                                        ; preds = %ehcleanup
-  %exn = load i8*, i8** %exn.slot
-  %sel = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
-  %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
-  resume { i8*, i32 } %lpad.val4
-}
-
-; CHECK-NOT: define internal void @
-
-; CHECK-LABEL: define i32 @main()
-; CHECK: invoke void @may_crash()
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$1@0@main@@", i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
-; CHECK-NEXT: indirectbr
-;
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i1, i8*)* @"\01?fin$0@0@main@@")
-; CHECK-NEXT: indirectbr
-
-; There should not be any *new* cleanup helpers, just the existing ones.
-; CHECK-NOT: define internal void @
-; CHECK: define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
-; CHECK-NOT: define internal void @
-; CHECK: define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer)
-; CHECK-NOT: define internal void @
-
-define internal void @"\01?fin$0@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
-entry:
-  %frame_pointer.addr = alloca i8*, align 8
-  %abnormal_termination.addr = alloca i8, align 1
-  store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
-  %frombool = zext i1 %abnormal_termination to i8
-  store i8 %frombool, i8* %abnormal_termination.addr, align 1
-  %0 = zext i1 %abnormal_termination to i32
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_outer_finally, i32 0, i32 0), i32 %0)
-  ret void
-}
-
-declare i32 @printf(i8*, ...) #2
-
-define internal void @"\01?fin$1@0@main@@"(i1 zeroext %abnormal_termination, i8* %frame_pointer) #2 {
-entry:
-  %frame_pointer.addr = alloca i8*, align 8
-  %abnormal_termination.addr = alloca i8, align 1
-  store i8* %frame_pointer, i8** %frame_pointer.addr, align 8
-  %frombool = zext i1 %abnormal_termination to i8
-  store i8 %frombool, i8* %abnormal_termination.addr, align 1
-  %0 = zext i1 %abnormal_termination to i32
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @str_inner_finally, i32 0, i32 0), i32 %0)
-  call void @may_crash()
-  ret void
-}
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localaddress() #3
-
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-attributes #4 = { noinline }
diff --git a/test/CodeGen/WinEH/seh-prepared-basic.ll b/test/CodeGen/WinEH/seh-prepared-basic.ll
deleted file mode 100644
index b6a30309f1c1..000000000000
--- a/test/CodeGen/WinEH/seh-prepared-basic.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: llc < %s | FileCheck %s
-
-; Test case based on this code:
-; extern "C" unsigned long _exception_code();
-; extern "C" int filt(unsigned long);
-; extern "C" void g();
-; extern "C" void do_except() {
-;   __try {
-;     g();
-;   } __except(filt(_exception_code())) {
-;   }
-; }
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-; Function Attrs: uwtable
-define void @do_except() #0 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-entry:
-  call void (...) @llvm.localescape()
-  invoke void @g() #5
-          to label %__try.cont unwind label %lpad1
-
-lpad1:                                            ; preds = %entry
-  %ehvals = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*)
-  %recover = call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@do_except@@" to i8*), i32 -1, i8* blockaddress(@do_except, %__try.cont))
-  indirectbr i8* %recover, [label %__try.cont]
-
-__try.cont:                                       ; preds = %lpad1, %entry
-  ret void
-}
-
-; CHECK-LABEL: do_except:
-; CHECK: .seh_handler __C_specific_handler
-; CHECK-NOT: jmpq *
-; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp{{.*}}
-; CHECK-NEXT: .long .Ltmp{{.*}}
-; CHECK-NEXT: .long "?filt$0@0@do_except@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{.*}}@IMGREL
-
-; Function Attrs: noinline nounwind
-define internal i32 @"\01?filt$0@0@do_except@@"(i8* nocapture readonly %exception_pointers, i8* nocapture readnone %frame_pointer) #1 {
-entry:
-  %0 = bitcast i8* %exception_pointers to i32**
-  %1 = load i32*, i32** %0, align 8
-  %2 = load i32, i32* %1, align 4
-  %call = tail call i32 @filt(i32 %2) #4
-  ret i32 %call
-}
-
-declare i32 @filt(i32) #2
-
-declare void @g() #2
-
-declare i32 @__C_specific_handler(...)
-
-; Function Attrs: nounwind readnone
-declare i32 @llvm.eh.typeid.for(i8*) #3
-
-; Function Attrs: nounwind
-declare i8* @llvm.eh.actions(...) #4
-
-; Function Attrs: nounwind
-declare void @llvm.localescape(...) #4
-
-; Function Attrs: nounwind readnone
-declare i8* @llvm.localrecover(i8*, i8*, i32) #3
-
-attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" "wineh-parent"="do_except" }
-attributes #1 = { noinline nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
-attributes #4 = { nounwind }
-attributes #5 = { noinline }
-
-!llvm.module.flags = !{!0}
-!llvm.ident = !{!1}
-
-!0 = !{i32 1, !"PIC Level", i32 2}
-!1 = !{!"clang version 3.7.0 "}
diff --git a/test/CodeGen/WinEH/seh-resume-phi.ll b/test/CodeGen/WinEH/seh-resume-phi.ll
deleted file mode 100644
index d2bd64167d22..000000000000
--- a/test/CodeGen/WinEH/seh-resume-phi.ll
+++ /dev/null
@@ -1,66 +0,0 @@
-; RUN: opt -S -winehprepare < %s | FileCheck %s
-
-target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-windows-msvc"
-
-declare void @might_crash(i8* %ehptr)
-declare i32 @filt()
-declare void @cleanup()
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-define void @resume_phi() personality i32 (...)* @__C_specific_handler {
-entry:
-  invoke void @might_crash(i8* null)
-          to label %return unwind label %lpad1
-
-lpad1:
-  %ehvals1 = landingpad { i8*, i32 }
-          catch i32 ()* @filt
-  %ehptr1 = extractvalue { i8*, i32 } %ehvals1, 0
-  %ehsel1 = extractvalue { i8*, i32 } %ehvals1, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %ehsel1, %filt_sel
-  br i1 %matches, label %__except, label %eh.resume
-
-__except:
-  invoke void @might_crash(i8* %ehptr1)
-          to label %return unwind label %lpad2
-
-lpad2:
-  %ehvals2 = landingpad { i8*, i32 }
-          cleanup
-  %ehptr2 = extractvalue { i8*, i32 } %ehvals2, 0
-  %ehsel2 = extractvalue { i8*, i32 } %ehvals2, 1
-  call void @cleanup()
-  br label %eh.resume
-
-return:
-  ret void
-
-eh.resume:
-  %ehptr.phi = phi i8* [ %ehptr1, %lpad1 ], [ %ehptr2, %lpad2 ]
-  %ehsel.phi = phi i32 [ %ehsel1, %lpad1 ], [ %ehsel2, %lpad2 ]
-  %ehval.phi1 = insertvalue { i8*, i32 } undef, i8* %ehptr.phi, 0
-  %ehval.phi2 = insertvalue { i8*, i32 } %ehval.phi1, i32 %ehsel.phi, 1
-  resume { i8*, i32 } %ehval.phi2
-}
-
-; CHECK-LABEL: define void @resume_phi()
-; CHECK: invoke void @might_crash(i8* null)
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
-; CHECK-SAME: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@resume_phi, %__except))
-; CHECK-NEXT: indirectbr {{.*}} [label %__except]
-;
-; CHECK: __except:
-; CHECK: call i32 @llvm.eh.exceptioncode()
-; CHECK: invoke void @might_crash(i8* %{{.*}})
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void (i8*, i8*)* @resume_phi.cleanup)
-; CHECK-NEXT: indirectbr {{.*}} []
-
-; CHECK-LABEL: define internal void @resume_phi.cleanup(i8*, i8*)
-; CHECK: call void @cleanup()
diff --git a/test/CodeGen/WinEH/seh-simple.ll b/test/CodeGen/WinEH/seh-simple.ll
deleted file mode 100644
index 060186484aec..000000000000
--- a/test/CodeGen/WinEH/seh-simple.ll
+++ /dev/null
@@ -1,233 +0,0 @@
-; RUN: opt -S -winehprepare -mtriple=x86_64-windows-msvc < %s \
-; RUN: 		| FileCheck %s --check-prefix=CHECK --check-prefix=X64
-
-; This test should also pass in 32-bit using _except_handler3.
-; RUN: sed -e 's/__C_specific_handler/_except_handler3/' %s \
-; RUN: 		| opt -S -winehprepare -mtriple=i686-windows-msvc \
-; RUN: 		| FileCheck %s --check-prefix=CHECK --check-prefix=X86
-
-declare void @cleanup()
-declare i32 @filt()
-declare void @might_crash()
-declare i32 @__C_specific_handler(...)
-declare i32 @llvm.eh.typeid.for(i8*)
-
-define i32 @simple_except_store() personality i32 (...)* @__C_specific_handler {
-entry:
-  %retval = alloca i32
-  store i32 0, i32* %retval
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-          catch i32 ()* @filt
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %sel, %filt_sel
-  br i1 %matches, label %__except, label %eh.resume
-
-__except:
-  store i32 1, i32* %retval
-  br label %return
-
-return:
-  %r = load i32, i32* %retval
-  ret i32 %r
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @simple_except_store()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@simple_except_store, %__except))
-; CHECK-NEXT: indirectbr {{.*}} [label %__except]
-
-define i32 @catch_all() personality i32 (...)* @__C_specific_handler {
-entry:
-  %retval = alloca i32
-  store i32 0, i32* %retval
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-          catch i8* null
-  store i32 1, i32* %retval
-  br label %return
-
-return:
-  %r = load i32, i32* %retval
-  ret i32 %r
-}
-
-; CHECK-LABEL: define i32 @catch_all()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i8* null
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* null, i32 -1, i8* blockaddress(@catch_all, %lpad.split))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.split]
-;
-; CHECK: lpad.split:
-; CHECK: store i32 1, i32* %retval
-
-
-define i32 @except_phi() personality i32 (...)* @__C_specific_handler {
-entry:
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-          catch i32 ()* @filt
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %sel, %filt_sel
-  br i1 %matches, label %return, label %eh.resume
-
-return:
-  %r = phi i32 [0, %entry], [1, %lpad]
-  ret i32 %r
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @except_phi()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_phi, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-define i32 @except_join() personality i32 (...)* @__C_specific_handler {
-entry:
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-          catch i32 ()* @filt
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %sel, %filt_sel
-  br i1 %matches, label %return, label %eh.resume
-
-return:
-  ret i32 0
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @except_join()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@except_join, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: ret i32 0
-
-define i32 @lpad_phi() personality i32 (...)* @__C_specific_handler {
-entry:
-  invoke void @might_crash()
-          to label %cont unwind label %lpad
-
-cont:
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ncalls.1 = phi i32 [ 0, %entry ], [ 1, %cont ]
-  %ehvals = landingpad { i8*, i32 }
-          catch i32 ()* @filt
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %sel, %filt_sel
-  br i1 %matches, label %return, label %eh.resume
-
-return:
-  %r = phi i32 [2, %cont], [%ncalls.1, %lpad]
-  ret i32 %r
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @lpad_phi()
-; CHECK: alloca i32
-; CHECK: store i32 0, i32*
-; CHECK: invoke void @might_crash()
-; CHECK: store i32 1, i32*
-; CHECK: invoke void @might_crash()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(i32 0, void ({{.*}})* @lpad_phi.cleanup, i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@lpad_phi, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: load i32, i32*
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 2, %cont ], [ %{{.*}}, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-define i32 @cleanup_and_except() personality i32 (...)* @__C_specific_handler {
-entry:
-  invoke void @might_crash()
-          to label %return unwind label %lpad
-
-lpad:
-  %ehvals = landingpad { i8*, i32 }
-          cleanup
-          catch i32 ()* @filt
-  call void @cleanup()
-  %sel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_sel = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @filt to i8*))
-  %matches = icmp eq i32 %sel, %filt_sel
-  br i1 %matches, label %return, label %eh.resume
-
-return:
-  %r = phi i32 [0, %entry], [1, %lpad]
-  ret i32 %r
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
-}
-
-; CHECK-LABEL: define i32 @cleanup_and_except()
-; CHECK: landingpad { i8*, i32 }
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch i32 ()* @filt
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions(
-; CHECK: i32 0, void ({{.*}})* @cleanup_and_except.cleanup,
-; CHECK: i32 1, i8* bitcast (i32 ()* @filt to i8*), i32 -1, i8* blockaddress(@cleanup_and_except, %lpad.return_crit_edge))
-; CHECK-NEXT: indirectbr {{.*}} [label %lpad.return_crit_edge]
-;
-; CHECK: lpad.return_crit_edge:
-; CHECK: br label %return
-;
-; CHECK: return:
-; CHECK-NEXT: %r = phi i32 [ 0, %entry ], [ 1, %lpad.return_crit_edge ]
-; CHECK-NEXT: ret i32 %r
-
-; FIXME: This cleanup is an artifact of bad demotion.
-; X64-LABEL: define internal void @lpad_phi.cleanup(i8*, i8*)
-; X86-LABEL: define internal void @lpad_phi.cleanup()
-; X86: call i8* @llvm.frameaddress(i32 1)
-; CHECK: call i8* @llvm.localrecover({{.*}})
-; CHECK: load i32
-; CHECK: store i32 %{{.*}}, i32*
diff --git a/test/CodeGen/WinEH/wineh-cloning.ll b/test/CodeGen/WinEH/wineh-cloning.ll
new file mode 100644
index 000000000000..c13e0a163641
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-cloning.ll
@@ -0,0 +1,391 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -S -winehprepare  < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @__C_specific_handler(...)
+
+declare void @f()
+
+declare void @llvm.foo(i32) nounwind
+declare void @llvm.bar() nounwind
+declare i32 @llvm.qux() nounwind
+declare i1 @llvm.baz() nounwind
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; %x def colors: {entry} subset of use colors; must spill
+  %x = call i32 @llvm.qux()
+  invoke void @f()
+    to label %noreturn unwind label %catch.switch
+catch.switch:
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  %cp = catchpad within %cs []
+  br label %noreturn
+noreturn:
+  ; %x use colors: {entry, cleanup}
+  call void @llvm.foo(i32 %x)
+  unreachable
+}
+; Need two copies of the call to @h, one under entry and one under catch.
+; Currently we generate a load for each, though we shouldn't need one
+; for the use in entry's copy.
+; CHECK-LABEL: define void @test1(
+; CHECK: entry:
+; CHECK:   %x = call i32 @llvm.qux()
+; CHECK:   invoke void @f()
+; CHECK:     to label %[[EntryCopy:[^ ]+]] unwind label %catch
+; CHECK: catch.switch:
+; CHECK:   %cs = catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK:   catchpad within %cs []
+; CHECK-NEXT: call void @llvm.foo(i32 %x)
+; CHECK: [[EntryCopy]]:
+; CHECK:   call void @llvm.foo(i32 %x)
+
+
+define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f()
+    to label %exit unwind label %cleanup
+cleanup:
+  cleanuppad within none []
+  br label %exit
+exit:
+  call void @llvm.bar()
+  ret void
+}
+; Need two copies of %exit's call to @f -- the subsequent ret is only
+; valid when coming from %entry, but on the path from %cleanup, this
+; might be a valid call to @f which might dynamically not return.
+; CHECK-LABEL: define void @test2(
+; CHECK: entry:
+; CHECK:   invoke void @f()
+; CHECK:     to label %[[exit:[^ ]+]] unwind label %cleanup
+; CHECK: cleanup:
+; CHECK:   cleanuppad within none []
+; CHECK:   call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: [[exit]]:
+; CHECK:   call void @llvm.bar()
+; CHECK-NEXT: ret void
+
+
+define void @test3() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f()
+    to label %invoke.cont unwind label %catch.switch
+invoke.cont:
+  invoke void @f()
+    to label %exit unwind label %cleanup
+catch.switch:
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  catchpad within %cs []
+  br label %shared
+cleanup:
+  cleanuppad within none []
+  br label %shared
+shared:
+  call void @llvm.bar()
+  br label %exit
+exit:
+  ret void
+}
+; Need two copies of %shared's call to @f (similar to @test2 but
+; the two regions here are siblings, not parent-child).
+; CHECK-LABEL: define void @test3(
+; CHECK:   invoke void @f()
+; CHECK:   invoke void @f()
+; CHECK:     to label %[[exit:[^ ]+]] unwind
+; CHECK: catch:
+; CHECK:   catchpad within %cs []
+; CHECK-NEXT: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: cleanup:
+; CHECK:   cleanuppad within none []
+; CHECK:   call void @llvm.bar()
+; CHECK-NEXT: unreachable
+; CHECK: [[exit]]:
+; CHECK:   ret void
+
+
+define void @test4() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f()
+    to label %shared unwind label %catch.switch
+catch.switch:
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  catchpad within %cs []
+  br label %shared
+shared:
+  %x = call i32 @llvm.qux()
+  %i = call i32 @llvm.qux()
+  %zero.trip = icmp eq i32 %i, 0
+  br i1 %zero.trip, label %exit, label %loop
+loop:
+  %i.loop = phi i32 [ %i, %shared ], [ %i.dec, %loop.tail ]
+  %b = call i1 @llvm.baz()
+  br i1 %b, label %left, label %right
+left:
+  %y = call i32 @llvm.qux()
+  br label %loop.tail
+right:
+  call void @llvm.foo(i32 %x)
+  br label %loop.tail
+loop.tail:
+  %i.dec = sub i32 %i.loop, 1
+  %done = icmp eq i32 %i.dec, 0
+  br i1 %done, label %exit, label %loop
+exit:
+  call void @llvm.foo(i32 %x)
+  unreachable
+}
+; Make sure we can clone regions that have internal control
+; flow and SSA values.  Here we need two copies of everything
+; from %shared to %exit.
+; CHECK-LABEL: define void @test4(
+; CHECK:  entry:
+; CHECK:    to label %[[shared_E:[^ ]+]] unwind label %catch.switch
+; CHECK:  catch:
+; CHECK:    catchpad within %cs []
+; CHECK:    [[x_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    [[i_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    [[zt_C:%[^ ]+]] = icmp eq i32 [[i_C]], 0
+; CHECK:    br i1 [[zt_C]], label %[[exit_C:[^ ]+]], label %[[loop_C:[^ ]+]]
+; CHECK:  [[shared_E]]:
+; CHECK:    [[x_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    [[i_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    [[zt_E:%[^ ]+]] = icmp eq i32 [[i_E]], 0
+; CHECK:    br i1 [[zt_E]], label %[[exit_E:[^ ]+]], label %[[loop_E:[^ ]+]]
+; CHECK:  [[loop_C]]:
+; CHECK:    [[iloop_C:%[^ ]+]] = phi i32 [ [[i_C]], %catch ], [ [[idec_C:%[^ ]+]], %[[looptail_C:[^ ]+]] ]
+; CHECK:    [[b_C:%[^ ]+]] = call i1 @llvm.baz()
+; CHECK:    br i1 [[b_C]], label %[[left_C:[^ ]+]], label %[[right_C:[^ ]+]]
+; CHECK:  [[loop_E]]:
+; CHECK:    [[iloop_E:%[^ ]+]] = phi i32 [ [[i_E]], %[[shared_E]] ], [ [[idec_E:%[^ ]+]], %[[looptail_E:[^ ]+]] ]
+; CHECK:    [[b_E:%[^ ]+]] = call i1 @llvm.baz()
+; CHECK:    br i1 [[b_E]], label %[[left_E:[^ ]+]], label %[[right_E:[^ ]+]]
+; CHECK:  [[left_C]]:
+; CHECK:    [[y_C:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    br label %[[looptail_C]]
+; CHECK:  [[left_E]]:
+; CHECK:    [[y_E:%[^ ]+]] = call i32 @llvm.qux()
+; CHECK:    br label %[[looptail_E]]
+; CHECK:  [[right_C]]:
+; CHECK:    call void @llvm.foo(i32 [[x_C]])
+; CHECK:    br label %[[looptail_C]]
+; CHECK:  [[right_E]]:
+; CHECK:    call void @llvm.foo(i32 [[x_E]])
+; CHECK:    br label %[[looptail_E]]
+; CHECK:  [[looptail_C]]:
+; CHECK:    [[idec_C]] = sub i32 [[iloop_C]], 1
+; CHECK:    [[done_C:%[^ ]+]] = icmp eq i32 [[idec_C]], 0
+; CHECK:    br i1 [[done_C]], label %[[exit_C]], label %[[loop_C]]
+; CHECK:  [[looptail_E]]:
+; CHECK:    [[idec_E]] = sub i32 [[iloop_E]], 1
+; CHECK:    [[done_E:%[^ ]+]] = icmp eq i32 [[idec_E]], 0
+; CHECK:    br i1 [[done_E]], label %[[exit_E]], label %[[loop_E]]
+; CHECK:  [[exit_C]]:
+; CHECK:    call void @llvm.foo(i32 [[x_C]])
+; CHECK:    unreachable
+; CHECK:  [[exit_E]]:
+; CHECK:    call void @llvm.foo(i32 [[x_E]])
+; CHECK:    unreachable
+
+
+define void @test5() personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f()
+    to label %exit unwind label %outer
+outer:
+  %o = cleanuppad within none []
+  %x = call i32 @llvm.qux()
+  invoke void @f() [ "funclet"(token %o) ]
+    to label %outer.ret unwind label %catch.switch
+catch.switch:
+  %cs = catchswitch within %o [label %inner] unwind to caller
+inner:
+  %i = catchpad within %cs []
+  catchret from %i to label %outer.post-inner
+outer.post-inner:
+  call void @llvm.foo(i32 %x)
+  br label %outer.ret
+outer.ret:
+  cleanupret from %o unwind to caller
+exit:
+  ret void
+}
+; Simple nested case (catch-inside-cleanup).  Nothing needs
+; to be cloned.  The def and use of %x are both in %outer
+; and so don't need to be spilled.
+; CHECK-LABEL: define void @test5(
+; CHECK:      outer:
+; CHECK:        %x = call i32 @llvm.qux()
+; CHECK-NEXT:   invoke void @f()
+; CHECK-NEXT:     to label %outer.ret unwind label %catch.switch
+; CHECK:      inner:
+; CHECK-NEXT:   %i = catchpad within %cs []
+; CHECK-NEXT:   catchret from %i to label %outer.post-inner
+; CHECK:      outer.post-inner:
+; CHECK-NEXT:   call void @llvm.foo(i32 %x)
+; CHECK-NEXT:   br label %outer.ret
+
+
+define void @test9() personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f()
+    to label %invoke.cont unwind label %left
+invoke.cont:
+  invoke void @f()
+    to label %unreachable unwind label %right
+left:
+  %cp.left = cleanuppad within none []
+  call void @llvm.foo(i32 1)
+  invoke void @f() [ "funclet"(token %cp.left) ]
+    to label %unreachable unwind label %right
+right:
+  %cp.right = cleanuppad within none []
+  call void @llvm.foo(i32 2)
+  invoke void @f() [ "funclet"(token %cp.right) ]
+    to label %unreachable unwind label %left
+unreachable:
+  unreachable
+}
+; This is an irreducible loop with two funclets that enter each other.
+; CHECK-LABEL: define void @test9(
+; CHECK:     entry:
+; CHECK:               to label %invoke.cont unwind label %[[LEFT:.+]]
+; CHECK:     invoke.cont:
+; CHECK:               to label %[[UNREACHABLE_ENTRY:.+]] unwind label %[[RIGHT:.+]]
+; CHECK:     [[LEFT]]:
+; CHECK:       call void @llvm.foo(i32 1)
+; CHECK:       invoke void @f()
+; CHECK:               to label %[[UNREACHABLE_LEFT:.+]] unwind label %[[RIGHT]]
+; CHECK:     [[RIGHT]]:
+; CHECK:       call void @llvm.foo(i32 2)
+; CHECK:       invoke void @f()
+; CHECK:               to label %[[UNREACHABLE_RIGHT:.+]] unwind label %[[LEFT]]
+; CHECK:     [[UNREACHABLE_RIGHT]]:
+; CHECK:       unreachable
+; CHECK:     [[UNREACHABLE_LEFT]]:
+; CHECK:       unreachable
+; CHECK:     [[UNREACHABLE_ENTRY]]:
+; CHECK:       unreachable
+
+
+define void @test10() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f()
+    to label %unreachable unwind label %inner
+inner:
+  %cleanup = cleanuppad within none []
+  ; make sure we don't overlook this cleanupret and try to process
+  ; successor %outer as a child of inner.
+  cleanupret from %cleanup unwind label %outer
+outer:
+  %cs = catchswitch within none [label %catch.body] unwind to caller
+
+catch.body:
+  %catch = catchpad within %cs []
+  catchret from %catch to label %exit
+exit:
+  ret void
+unreachable:
+  unreachable
+}
+; CHECK-LABEL: define void @test10(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   invoke
+; CHECK-NEXT:     to label %unreachable unwind label %inner
+; CHECK:      inner:
+; CHECK-NEXT:   %cleanup = cleanuppad within none []
+; CHECK-NEXT:   cleanupret from %cleanup unwind label %outer
+; CHECK:      outer:
+; CHECK-NEXT:   %cs = catchswitch within none [label %catch.body] unwind to caller
+; CHECK:      catch.body:
+; CHECK-NEXT:   %catch = catchpad within %cs []
+; CHECK-NEXT:   catchret from %catch to label %exit
+; CHECK:      exit:
+; CHECK-NEXT:   ret void
+
+define void @test11() personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f()
+    to label %exit unwind label %cleanup.outer
+cleanup.outer:
+  %outer = cleanuppad within none []
+  invoke void @f() [ "funclet"(token %outer) ]
+    to label %outer.cont unwind label %cleanup.inner
+outer.cont:
+  br label %merge
+cleanup.inner:
+  %inner = cleanuppad within %outer []
+  br label %merge
+merge:
+  call void @llvm.bar()
+  unreachable
+exit:
+  ret void
+}
+; merge.end will get cloned for outer and inner, but is implausible
+; from inner, so the call @f() in inner's copy of merge should be
+; rewritten to call @f()
+; CHECK-LABEL: define void @test11()
+; CHECK:      %inner = cleanuppad within %outer []
+; CHECK-NEXT: call void @llvm.bar()
+; CHECK-NEXT: unreachable
+
+define void @test12() personality i32 (...)* @__CxxFrameHandler3 !dbg !5 {
+entry:
+  invoke void @f()
+    to label %cont unwind label %left, !dbg !8
+cont:
+  invoke void @f()
+    to label %exit unwind label %right
+left:
+  cleanuppad within none []
+  br label %join
+right:
+  cleanuppad within none []
+  br label %join
+join:
+  ; This call will get cloned; make sure we can handle cloning
+  ; instructions with debug metadata attached.
+  call void @llvm.bar(), !dbg !9
+  unreachable
+exit:
+  ret void
+}
+
+; CHECK-LABEL: define void @test13()
+; CHECK: ret void
+define void @test13() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ret void
+
+unreachable:
+  cleanuppad within none []
+  unreachable
+}
+
+;; Debug info (from test12)
+
+; Make sure the DISubprogram doesn't get cloned
+; CHECK-LABEL: !llvm.module.flags
+; CHECK-NOT: !DISubprogram
+; CHECK: !{{[0-9]+}} = distinct !DISubprogram(name: "test12"
+; CHECK-NOT: !DISubprogram
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !2, producer: "compiler", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !3, subprograms: !4)
+!2 = !DIFile(filename: "test.cpp", directory: ".")
+!3 = !{}
+!4 = !{!5}
+!5 = distinct !DISubprogram(name: "test12", scope: !2, file: !2, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !3)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = !DILocation(line: 1, scope: !5)
+!9 = !DILocation(line: 2, scope: !5)
diff --git a/test/CodeGen/WinEH/wineh-demotion.ll b/test/CodeGen/WinEH/wineh-demotion.ll
new file mode 100644
index 000000000000..411952d84bb6
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-demotion.ll
@@ -0,0 +1,356 @@
+; RUN: opt -mtriple=x86_64-pc-windows-msvc -S -winehprepare  < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare i32 @g()
+
+declare void @h(i32)
+
+declare i1 @i()
+
+declare void @llvm.bar() nounwind
+
+; CHECK-LABEL: @test1(
+define void @test1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; Spill slot should be inserted here
+  ; CHECK: [[Slot:%[^ ]+]] = alloca
+  ; Can't store for %phi at these defs because the lifetimes overlap
+  ; CHECK-NOT: store
+  %x = call i32 @g()
+  %y = call i32 @g()
+  br i1 %B, label %left, label %right
+left:
+  ; CHECK: left:
+  ; CHECK-NEXT: store i32 %x, i32* [[Slot]]
+  ; CHECK-NEXT: invoke void @f
+  invoke void @f()
+          to label %exit unwind label %merge
+right:
+  ; CHECK: right:
+  ; CHECK-NEXT: store i32 %y, i32* [[Slot]]
+  ; CHECK-NEXT: invoke void @f
+  invoke void @f()
+          to label %exit unwind label %merge
+merge:
+  ; CHECK: merge:
+  ; CHECK-NOT: = phi
+  %phi = phi i32 [ %x, %left ], [ %y, %right ]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp = catchpad within %cs1 []
+  ; CHECK: catch:
+  ; CHECK: [[Reload:%[^ ]+]] = load i32, i32* [[Slot]]
+  ; CHECK-NEXT: call void @h(i32 [[Reload]])
+  call void @h(i32 %phi) [ "funclet"(token %cp) ]
+  catchret from %cp to label %exit
+
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br i1 %B, label %left, label %right
+left:
+  ; Need two stores here because %x and %y interfere so they need 2 slots
+  ; CHECK: left:
+  ; CHECK:   store i32 1, i32* [[Slot1:%[^ ]+]]
+  ; CHECK:   store i32 1, i32* [[Slot2:%[^ ]+]]
+  ; CHECK-NEXT: invoke void @f
+  invoke void @f()
+          to label %exit unwind label %merge.inner
+right:
+  ; Need two stores here because %x and %y interfere so they need 2 slots
+  ; CHECK: right:
+  ; CHECK-DAG:   store i32 2, i32* [[Slot1]]
+  ; CHECK-DAG:   store i32 2, i32* [[Slot2]]
+  ; CHECK: invoke void @f
+  invoke void @f()
+          to label %exit unwind label %merge.inner
+merge.inner:
+  ; CHECK: merge.inner:
+  ; CHECK-NOT: = phi
+  ; CHECK: catchswitch within none
+  %x = phi i32 [ 1, %left ], [ 2, %right ]
+  %cs1 = catchswitch within none [label %catch.inner] unwind label %merge.outer
+
+catch.inner:
+  %cpinner = catchpad within %cs1 []
+  ; Need just one store here because only %y is affected
+  ; CHECK: catch.inner:
+  %z = call i32 @g() [ "funclet"(token %cpinner) ]
+  ; CHECK:   store i32 %z
+  ; CHECK-NEXT: invoke void @f
+  invoke void @f() [ "funclet"(token %cpinner) ]
+          to label %catchret.inner unwind label %merge.outer
+
+catchret.inner:
+  catchret from %cpinner to label %exit
+
+merge.outer:
+  %y = phi i32 [ %x, %merge.inner ], [ %z, %catch.inner ]
+  ; CHECK: merge.outer:
+  ; CHECK-NOT: = phi
+  ; CHECK: catchswitch within none
+  %cs2 = catchswitch within none [label %catch.outer] unwind to caller
+
+catch.outer:
+  %cpouter = catchpad within %cs2 []
+  ; CHECK: catch.outer:
+  ; CHECK: [[CatchPad:%[^ ]+]] = catchpad within %cs2 []
+  ; Need to load x and y from two different slots since they're both live
+  ; and can have different values (if we came from catch.inner)
+  ; CHECK-DAG: load i32, i32* [[Slot1]]
+  ; CHECK-DAG: load i32, i32* [[Slot2]]
+  ; CHECK: catchret from [[CatchPad]] to label
+  call void @h(i32 %x) [ "funclet"(token %cpouter) ]
+  call void @h(i32 %y) [ "funclet"(token %cpouter) ]
+  catchret from %cpouter to label %exit
+
+exit:
+  ret void
+}
+
+; test4: don't need stores for %phi.inner, as its only use is to feed %phi.outer
+;        %phi.outer needs stores in %left, %right, and %join
+; CHECK-LABEL: @test4(
+define void @test4(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; CHECK:      entry:
+  ; CHECK:        [[Slot:%[^ ]+]] = alloca
+  ; CHECK-NEXT:   br
+  br i1 %B, label %left, label %right
+left:
+  ; CHECK: left:
+  ; CHECK-NOT: store
+  ; CHECK: store i32 %l, i32* [[Slot]]
+  ; CHECK-NEXT: invoke void @f
+  %l = call i32 @g()
+  invoke void @f()
+          to label %join unwind label %catchpad.inner
+right:
+  ; CHECK: right:
+  ; CHECK-NOT: store
+  ; CHECK: store i32 %r, i32* [[Slot]]
+  ; CHECK-NEXT: invoke void @f
+  %r = call i32 @g()
+  invoke void @f()
+          to label %join unwind label %catchpad.inner
+catchpad.inner:
+   ; CHECK: catchpad.inner:
+   ; CHECK-NEXT: catchswitch within none
+   %phi.inner = phi i32 [ %l, %left ], [ %r, %right ]
+   %cs1 = catchswitch within none [label %catch.inner] unwind label %catchpad.outer
+catch.inner:
+   %cp1 = catchpad within %cs1 []
+   catchret from %cp1 to label %join
+join:
+  ; CHECK: join:
+  ; CHECK-NOT: store
+  ; CHECK: store i32 %j, i32* [[Slot]]
+  ; CHECK-NEXT: invoke void @f
+   %j = call i32 @g()
+   invoke void @f()
+           to label %exit unwind label %catchpad.outer
+
+catchpad.outer:
+   ; CHECK: catchpad.outer:
+   ; CHECK-NEXT: catchswitch within none
+   %phi.outer = phi i32 [ %phi.inner, %catchpad.inner ], [ %j, %join ]
+   %cs2 = catchswitch within none [label %catch.outer] unwind to caller
+catch.outer:
+   ; CHECK: catch.outer:
+   ; CHECK:   [[Reload:%[^ ]+]] = load i32, i32* [[Slot]]
+   ; CHECK:   call void @h(i32 [[Reload]])
+   %cp2 = catchpad within %cs2 []
+   call void @h(i32 %phi.outer) [ "funclet"(token %cp2) ]
+   catchret from %cp2 to label %exit
+exit:
+   ret void
+}
+
+; CHECK-LABEL: @test5(
+define void @test5() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; need store for %phi.cleanup
+  ; CHECK:      entry:
+  ; CHECK:        store i32 1, i32* [[CleanupSlot:%[^ ]+]]
+  ; CHECK-NEXT:   invoke void @f
+  invoke void @f()
+          to label %invoke.cont unwind label %cleanup
+
+invoke.cont:
+  ; need store for %phi.cleanup
+  ; CHECK:      invoke.cont:
+  ; CHECK-NEXT:   store i32 2, i32* [[CleanupSlot]]
+  ; CHECK-NEXT:   invoke void @f
+  invoke void @f()
+          to label %invoke.cont2 unwind label %cleanup
+
+cleanup:
+  ; cleanup phi can be loaded at cleanup entry
+  ; CHECK: cleanup:
+  ; CHECK-NEXT: cleanuppad within none []
+  ; CHECK: [[CleanupReload:%[^ ]+]] = load i32, i32* [[CleanupSlot]]
+  %phi.cleanup = phi i32 [ 1, %entry ], [ 2, %invoke.cont ]
+  %cp = cleanuppad within none []
+  %b = call i1 @i() [ "funclet"(token %cp) ]
+  br i1 %b, label %left, label %right
+
+left:
+  ; CHECK: left:
+  ; CHECK:   call void @h(i32 [[CleanupReload]]
+  call void @h(i32 %phi.cleanup) [ "funclet"(token %cp) ]
+  br label %merge
+
+right:
+  ; CHECK: right:
+  ; CHECK:   call void @h(i32 [[CleanupReload]]
+  call void @h(i32 %phi.cleanup) [ "funclet"(token %cp) ]
+  br label %merge
+
+merge:
+  ; need store for %phi.catch
+  ; CHECK:      merge:
+  ; CHECK-NEXT:   store i32 [[CleanupReload]], i32* [[CatchSlot:%[^ ]+]]
+  ; CHECK-NEXT:   cleanupret
+  cleanupret from %cp unwind label %catchswitch
+
+invoke.cont2:
+  ; need store for %phi.catch
+  ; CHECK:      invoke.cont2:
+  ; CHECK-NEXT:   store i32 3, i32* [[CatchSlot]]
+  ; CHECK-NEXT:   invoke void @f
+  invoke void @f()
+          to label %exit unwind label %catchswitch
+
+catchswitch:
+  ; CHECK: catchswitch:
+  ; CHECK-NEXT: catchswitch within none
+  %phi.catch = phi i32 [ %phi.cleanup, %merge ], [ 3, %invoke.cont2 ]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  ; CHECK: catch:
+  ; CHECK:   catchpad within %cs1
+  ; CHECK:   [[CatchReload:%[^ ]+]] = load i32, i32* [[CatchSlot]]
+  ; CHECK:   call void @h(i32 [[CatchReload]]
+  %cp2 = catchpad within %cs1 []
+  call void @h(i32 %phi.catch) [ "funclet"(token %cp2) ]
+  catchret from %cp2 to label %exit
+
+exit:
+  ret void
+}
+
+; We used to demote %x, but we don't need to anymore.
+; CHECK-LABEL: @test6(
+define void @test6() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; CHECK: entry:
+  ; CHECK: %x = invoke i32 @g()
+  ; CHECK-NEXT: to label %loop unwind label %to_caller
+  %x = invoke i32 @g()
+          to label %loop unwind label %to_caller
+to_caller:
+  %cp1 = cleanuppad within none []
+  cleanupret from %cp1 unwind to caller
+loop:
+  invoke void @f()
+          to label %loop unwind label %cleanup
+cleanup:
+  ; CHECK: cleanup:
+  ; CHECK:   call void @h(i32 %x)
+  %cp2 = cleanuppad within none []
+  call void @h(i32 %x) [ "funclet"(token %cp2) ]
+  cleanupret from %cp2 unwind to caller
+}
+
+; CHECK-LABEL: @test7(
+define void @test7() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; %x is an EH pad phi, so gets stored in pred here
+  ; CHECK: entry:
+  ; CHECK:   store i32 1, i32* [[SlotX:%[^ ]+]]
+  ; CHECK:   invoke void @f()
+  invoke void @f()
+     to label %invoke.cont unwind label %catchpad
+invoke.cont:
+  ; %x is an EH pad phi, so gets stored in pred here
+  ; CHECK: invoke.cont:
+  ; CHECK:   store i32 2, i32* [[SlotX]]
+  ; CHECK:   invoke void @f()
+  invoke void @f()
+    to label %exit unwind label %catchpad
+catchpad:
+  ; %x phi should be eliminated
+  ; CHECK: catchpad:
+  ; CHECK-NEXT: catchswitch within none
+  %x = phi i32 [ 1, %entry ], [ 2, %invoke.cont ]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+  ; CHECK: catch:
+  ; CHECK-NEXT: %[[CatchPad:[^ ]+]] = catchpad within %cs1 []
+  %cp = catchpad within %cs1 []
+  %b = call i1 @i() [ "funclet"(token %cp) ]
+  br i1 %b, label %left, label %right
+left:
+  ; Edge from %left to %join needs to be split so that
+  ; the load of %x can be inserted *after* the catchret
+  ; CHECK: left:
+  ; CHECK-NEXT: catchret from %[[CatchPad]] to label %[[SplitLeft:[^ ]+]]
+  catchret from %cp to label %join
+  ; CHECK: [[SplitLeft]]:
+  ; CHECK:   [[LoadX:%[^ ]+]] = load i32, i32* [[SlotX]]
+  ; CHECK:   br label %join
+right:
+  ; Edge from %right to %join needs to be split so that
+  ; the load of %y can be inserted *after* the catchret
+  ; CHECK: right:
+  ; CHECK:   %y = call i32 @g()
+  ; CHECK:   catchret from %[[CatchPad]] to label %join
+  %y = call i32 @g() [ "funclet"(token %cp) ]
+  catchret from %cp to label %join
+join:
+  ; CHECK: join:
+  ; CHECK:   %phi = phi i32 [ [[LoadX]], %[[SplitLeft]] ], [ %y, %right ]
+  %phi = phi i32 [ %x, %left ], [ %y, %right ]
+  call void @h(i32 %phi)
+  br label %exit
+exit:
+  ret void
+}
+
+; CHECK-LABEL: @test8(
+define void @test8() personality i32 (...)* @__CxxFrameHandler3 { entry:
+  invoke void @f()
+          to label %done unwind label %cleanup1
+  invoke void @f()
+          to label %done unwind label %cleanup2
+
+done:
+  ret void
+
+cleanup1:
+  ; CHECK: [[CleanupPad1:%[^ ]+]] = cleanuppad within none []
+  ; CHECK-NEXT: call void @llvm.bar()
+  ; CHECK-NEXT: cleanupret from [[CleanupPad1]]
+  %cp0 = cleanuppad within none []
+  br label %cleanupexit
+
+cleanup2:
+  ; CHECK: cleanuppad within none []
+  ; CHECK-NEXT: call void @llvm.bar()
+  ; CHECK-NEXT: unreachable
+  %cp1 = cleanuppad within none []
+  br label %cleanupexit
+
+cleanupexit:
+  call void @llvm.bar()
+  cleanupret from %cp0 unwind label %cleanup2
+}
diff --git a/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll b/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll
new file mode 100644
index 000000000000..17d6e70ad1eb
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-intrinsics-invalid.ll
@@ -0,0 +1,26 @@
+; RUN: sed -e s/.T1:// %s | not opt -lint -disable-output 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: sed -e s/.T2:// %s | not opt -lint -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @f()
+
+;T1: declare i8* @llvm.eh.exceptionpointer.p0i8(i32)
+;T1:
+;T1: define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+;T1:   call i8* @llvm.eh.exceptionpointer.p0i8(i32 0)
+;T1:   ret void
+;T1: }
+;CHECK1: Intrinsic has incorrect argument type!
+;CHECK1-NEXT: i8* (i32)* @llvm.eh.exceptionpointer.p0i8
+
+;T2: declare i8* @llvm.eh.exceptionpointer.p0i8(token)
+;T2:
+;T2: define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+;T2:   call i8* @llvm.eh.exceptionpointer.p0i8(token undef)
+;T2:   ret void
+;T2: }
+;CHECK2: eh.exceptionpointer argument must be a catchpad
+;CHECK2-NEXT:  call i8* @llvm.eh.exceptionpointer.p0i8(token undef)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/CodeGen/WinEH/wineh-intrinsics.ll b/test/CodeGen/WinEH/wineh-intrinsics.ll
new file mode 100644
index 000000000000..3658792a3843
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-intrinsics.ll
@@ -0,0 +1,44 @@
+; RUN: opt -lint -disable-output < %s
+
+; This test is meant to prove that the verifier does not report errors for correct
+; use of the llvm.eh.exceptionpointer intrinsic.
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare i8* @llvm.eh.exceptionpointer.p0i8(token)
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+
+declare void @f(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void (...) @f(i32 1)
+     to label %exit unwind label %catchpad
+catchpad:
+  %cs1 = catchswitch within none [label %do_catch] unwind to caller
+do_catch:
+  %catch = catchpad within %cs1 [i32 1]
+  %exn = call i8* @llvm.eh.exceptionpointer.p0i8(token %catch)
+  call void (...) @f(i8* %exn)
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
+
+define void @test2() personality i32 (...)* @ProcessManagedException {
+entry:
+  invoke void (...) @f(i32 1)
+     to label %exit unwind label %catchpad
+catchpad:
+  %cs1 = catchswitch within none [label %do_catch] unwind to caller
+do_catch:
+  %catch = catchpad within %cs1 [i32 1]
+  %exn = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch)
+  call void (...) @f(i8 addrspace(1)* %exn)
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @ProcessManagedException(...)
diff --git a/test/CodeGen/WinEH/wineh-no-demotion.ll b/test/CodeGen/WinEH/wineh-no-demotion.ll
new file mode 100644
index 000000000000..4fb84db89093
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-no-demotion.ll
@@ -0,0 +1,130 @@
+; RUN: opt -mtriple=x86_x64-pc-windows-msvc -S -winehprepare -disable-demotion -disable-cleanups < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @__C_specific_handler(...)
+
+declare void @f()
+
+declare i32 @g()
+
+declare void @h(i32)
+
+; CHECK-LABEL: @test1(
+define void @test1(i1 %bool) personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f()
+          to label %invoke.cont unwind label %left
+
+invoke.cont:
+  invoke void @f()
+          to label %exit unwind label %inner
+
+left:
+  %0 = cleanuppad within none []
+  br i1 %bool, label %shared, label %cleanupret
+
+cleanupret:
+  cleanupret from %0 unwind label %right
+
+right:
+  %1 = cleanuppad within none []
+  br label %shared
+
+shared:
+  %x = call i32 @g()
+  invoke void @f() [ "funclet"(token %0) ]
+          to label %shared.cont unwind label %inner
+
+shared.cont:
+  unreachable
+
+inner:
+  %phi = phi i32 [ %x, %shared ], [ 0, %invoke.cont ]
+  %i = cleanuppad within none []
+  call void @h(i32 %phi)
+  unreachable
+
+; CHECK: %phi = phi i32 [ %x, %shared ], [ 0, %invoke.cont ], [ %x.for.left, %shared.for.left ]
+; CHECK: %i = cleanuppad within none []
+; CHECK: call void @h(i32 %phi)
+
+exit:
+  unreachable
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i1 %bool) personality i32 (...)* @__C_specific_handler {
+entry:
+  invoke void @f()
+          to label %shared.cont unwind label %left
+
+left:
+  %0 = cleanuppad within none []
+  br i1 %bool, label %shared, label %cleanupret
+
+cleanupret:
+  cleanupret from %0 unwind label %right
+
+right:
+  %1 = cleanuppad within none []
+  br label %shared
+
+shared:
+  %x = call i32 @g()
+  invoke void @f() [ "funclet"(token %0) ]
+          to label %shared.cont unwind label %inner
+
+shared.cont:
+  unreachable
+
+inner:
+  %i = cleanuppad within none []
+  call void @h(i32 %x)
+  unreachable
+
+; CHECK: %x1 = phi i32 [ %x.for.left, %shared.for.left ], [ %x, %shared ]
+; CHECK: %i = cleanuppad within none []
+; CHECK: call void @h(i32 %x1)
+
+exit:
+  unreachable
+}
+
+; CHECK-LABEL: @test4(
+define void @test4(i1 %x) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f()
+          to label %invoke.cont1 unwind label %left
+
+invoke.cont1:
+  invoke void @f()
+          to label %exit unwind label %right
+
+left:
+  %0 = cleanuppad within none []
+  br label %shared
+
+right:
+  %1 = cleanuppad within none []
+  br i1 %x, label %shared, label %right.other
+
+right.other:
+  br label %shared
+
+shared:
+  %phi = phi i32 [ 1, %left ], [ 0, %right ], [ -1, %right.other ]
+  call void @h(i32 %phi)
+  unreachable
+
+; CHECK: %phi = phi i32 [ 0, %right ], [ -1, %right.other ]
+; CHECK: call void @h(i32 %phi)
+
+; CHECK: %phi.for.left = phi i32 [ 1, %left ]
+; CHECK: call void @h(i32 %phi.for.left)
+
+exit:
+  unreachable
+}
+
+declare void @__std_terminate()
diff --git a/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll b/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll
new file mode 100644
index 000000000000..f5889f03965b
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-statenumbering-cleanups.ll
@@ -0,0 +1,62 @@
+; RUN: sed -e s/.Cxx:// %s | opt -mtriple=x86-pc-windows-msvc -S -x86-winehstate | FileCheck %s
+; RUN: sed -e s/.SEH:// %s | opt -mtriple=x86-pc-windows-msvc -S -x86-winehstate | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare i32 @_except_handler3(...)
+declare void @dummy_filter()
+
+declare void @f(i32)
+
+; CHECK-LABEL: define void @test2(
+;Cxx: define void @test2(i1 %b) personality i32 (...)* @__CxxFrameHandler3 {
+;SEH: define void @test2(i1 %b) personality i32 (...)* @_except_handler3 {
+entry:
+  ; CHECK: entry:
+  ; CHECK:   store i32 1
+  ; CHECK:   invoke void @f(i32 1)
+  invoke void @f(i32 1)
+    to label %exit unwind label %cleanup.pad
+cleanup.pad:
+  %cleanup = cleanuppad within none []
+  br i1 %b, label %left, label %right
+left:
+  cleanupret from %cleanup unwind label %catch.pad
+right:
+  cleanupret from %cleanup unwind label %catch.pad
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+;SEH: %catch = catchpad within %cs1 [void ()* @dummy_filter]
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
+
+; CHECK-LABEL: define void @test3(
+;Cxx: define void @test3() personality i32 (...)* @__CxxFrameHandler3 {
+;SEH: define void @test3() personality i32 (...)* @_except_handler3 {
+entry:
+  ; CHECK: entry:
+  ; CHECK:   store i32 0
+  ; CHECK:   invoke void @f(i32 1)
+  invoke void @f(i32 1)
+    to label %exit unwind label %cleanup.pad
+cleanup.pad:
+  ; CHECK: cleanup.pad:
+  ; CHECK:   store i32 1
+  ; CHECK:   invoke void @f(i32 0)
+  %cleanup = cleanuppad within none []
+  invoke void @f(i32 0)
+    to label %unreachable unwind label %catch.pad
+unreachable:
+  unreachable
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+;Cxx: %catch = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+;SEH: %catch = catchpad within %cs1 [void ()* @dummy_filter]
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/WinEH/wineh-statenumbering.ll b/test/CodeGen/WinEH/wineh-statenumbering.ll
new file mode 100644
index 000000000000..dab7fde61a66
--- /dev/null
+++ b/test/CodeGen/WinEH/wineh-statenumbering.ll
@@ -0,0 +1,148 @@
+; RUN: opt -mtriple=i686-pc-windows-msvc -S -x86-winehstate  < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i32 -1, i32 0, i32 4, i8* null }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT??_R0H@84"] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat
+
+define i32 @main() #0 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %tmp = alloca i32, align 4
+  ; CHECK: entry:
+  ; CHECK:   store i32 -1
+  ; CHECK:   call void @g(i32 3)
+  call void @g(i32 3)
+  store i32 0, i32* %tmp, align 4
+  %0 = bitcast i32* %tmp to i8*
+  ; CHECK:   store i32 0
+  ; CHECK:   invoke void @_CxxThrowException(
+  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* nonnull @_TI1H) #1
+          to label %unreachable.for.entry unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  ; CHECK: catch:
+  ; CHECK:   store i32 2
+  ; CHECK:   invoke void @_CxxThrowException(
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch.1
+
+catch.dispatch.1:                                 ; preds = %catch
+  %cs2 = catchswitch within %1 [label %catch.3] unwind to caller
+catch.3:                                          ; preds = %catch.dispatch.1
+  %2 = catchpad within %cs2 [i8* null, i32 u0x40, i8* null]
+  ; CHECK: catch.3:
+  ; CHECK:   store i32 3
+  ; CHECK:   call void @g(i32 1)
+  call void @g(i32 1)
+  catchret from %2 to label %try.cont
+
+try.cont:                                         ; preds = %catch.3
+  ; CHECK: try.cont:
+  ; CHECK:   store i32 1
+  ; CHECK:   call void @g(i32 2)
+  call void @g(i32 2)
+  unreachable
+
+unreachable:                                      ; preds = %catch
+  unreachable
+
+unreachable.for.entry:                            ; preds = %entry
+  unreachable
+}
+
+define i32 @nopads() #0 personality i32 (...)* @__CxxFrameHandler3 {
+  ret i32 0
+}
+
+; CHECK-LABEL: define i32 @nopads()
+; CHECK-NEXT: ret i32 0
+; CHECK-NOT: __ehhandler$nopads
+
+; CHECK-LABEL: define void @PR25926()
+define void @PR25926() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; CHECK: entry:
+  ; CHECK:   store i32 -1
+  ; CHECK:   store i32 0
+  ; CHECK:   invoke void @_CxxThrowException(
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null)
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %0 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  ; CHECK: catch:
+  ; CHECK:   store i32 3
+  ; CHECK:   invoke void @_CxxThrowException(
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %1) ]
+          to label %unreachable1 unwind label %catch.dispatch1
+
+catch.dispatch1:                                  ; preds = %catch
+  %2 = catchswitch within %1 [label %catch2] unwind label %ehcleanup
+
+catch2:                                           ; preds = %catch.dispatch1
+  %3 = catchpad within %2 [i8* null, i32 64, i8* null]
+  catchret from %3 to label %try.cont
+
+try.cont:                                         ; preds = %catch2
+  ; CHECK: try.cont:
+  ; CHECK:   store i32 1
+  ; CHECK:   call void @dtor()
+  call void @dtor() #3 [ "funclet"(token %1) ]
+  catchret from %1 to label %try.cont4
+
+try.cont4:                                        ; preds = %try.cont
+  ret void
+
+ehcleanup:                                        ; preds = %catch.dispatch1
+  %4 = cleanuppad within %1 []
+  ; CHECK: ehcleanup:
+  ; CHECK:   store i32 -1
+  ; CHECK:   call void @dtor()
+  call void @dtor() #3 [ "funclet"(token %4) ]
+  cleanupret from %4 unwind to caller
+
+unreachable:                                      ; preds = %entry
+  unreachable
+
+unreachable1:                                     ; preds = %catch
+  unreachable
+}
+
+declare void @g(i32) #0
+
+declare void @dtor()
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 3.8.0 (trunk 245153) (llvm/trunk 245238)"}
diff --git a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
index 795d4647a3f6..609dbc155ed9 100644
--- a/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
+++ b/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s 
 ; PR933
+; REQUIRES: default_triple
 
 define fastcc i1 @test() {
         ret i1 true
diff --git a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
index dd670648daf6..332816e22cda 100644
--- a/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
+++ b/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -6,8 +6,8 @@
 define i32 @test(i32 %argc, i8** %argv) nounwind {
 entry:
 ; CHECK: cmpl	$2
-; CHECK-NEXT: jne
-; CHECK-NEXT: %bb2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
 
 	switch i32 %argc, label %UnifiedReturnBlock [
 		 i32 1, label %bb
diff --git a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
index 62c503da35a6..65b577b1e7d7 100644
--- a/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
+++ b/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s
 
-@__gthrw_pthread_once = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@__gthrw_pthread_once = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
 
 define weak i32 @pthread_once(i32*, void ()*) {
   ret i32 0
diff --git a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
index a9e3f33ec618..2ca003e052aa 100644
--- a/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
+++ b/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -8,7 +8,7 @@ target triple = "i386-pc-linux-gnu"
 @__resp = thread_local global %struct.__res_state* @_res		; <%struct.__res_state**> [#uses=1]
 @_res = global %struct.__res_state zeroinitializer, section ".bss"		; <%struct.__res_state*> [#uses=1]
 
-@__libc_resp = hidden thread_local alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
+@__libc_resp = hidden thread_local alias %struct.__res_state*, %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
 
 define i32 @foo() {
 ; CHECK-LABEL: foo:
diff --git a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
index d60d0c2fb0bc..d484b45a5763 100644
--- a/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
+++ b/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -6,7 +6,7 @@
 	%struct.locale_data = type { i8*, i8*, i32, i32, { void (%struct.locale_data*)*, %struct.anon }, i32, i32, i32, [0 x %struct.locale_data_value] }
 	%struct.locale_data_value = type { i32* }
 
-@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l
+@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*), i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l
 
 define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind  {
 entry:
diff --git a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
index 422d68e7ff49..de95e7925f08 100644
--- a/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
+++ b/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -1,8 +1,10 @@
-; RUN: llc < %s -march=x86 | not grep movsd
-; RUN: llc < %s -march=x86 | grep movw
-; RUN: llc < %s -march=x86 | grep addw
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; These transforms are turned off for load volatiles and stores.
 ; Check that they weren't turned off for all loads and stores!
+; CHECK-LABEL: f:
+; CHECK-NOT: movsd
+; CHECK: movw
+; CHECK: addw
 
 @atomic = global double 0.000000e+00		; <double*> [#uses=1]
 @atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
index 757f1ff68253..84d373d70a2d 100644
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,18 +1,19 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,+mmx | FileCheck %s
 ; originally from PR2687, but things don't work that way any more.
 ; there are no MMX instructions here; we use XMM.
 
 define <2 x double> @a(<2 x i32> %x) nounwind {
 entry:
+; CHECK-LABEL: a
+; CHECK-NOT: unpcklpd
   %y = sitofp <2 x i32> %x to <2 x double>
   ret <2 x double> %y
 }
 
 define <2 x i32> @b(<2 x double> %x) nounwind {
 entry:
+; CHECK-LABEL: b
+; CHECK-NOT: unpckhpd
   %y = fptosi <2 x double> %x to <2 x i32>
   ret <2 x i32> %y
 }
@@ -21,12 +22,18 @@ entry:
 
 define <2 x double> @a2(x86_mmx %x) nounwind {
 entry:
+; CHECK-LABEL: a2
+; CHECK: cvtpi2pd
+; CHECK-NOT: cvtpi2pd
   %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
   ret <2 x double> %y
 }
 
 define x86_mmx @b2(<2 x double> %x) nounwind {
 entry:
+; CHECK-LABEL: b2
+; CHECK: cvttpd2pi
+; CHECK-NOT: cvttpd2pi
   %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
   ret x86_mmx %y
 }
diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
index 6c177e5b5f5a..2abb5ba7cd52 100644
--- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
+++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -76,15 +76,15 @@ declare i64 @strlen(i8*) nounwind readonly
 
 declare void @llvm.stackrestore(i8*) nounwind
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s1", line: 2, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !2, type: !3)
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18)
+!0 = !DILocalVariable(name: "s1", line: 2, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !2, type: !3)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !6}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !5)
 !7 = !DILocation(line: 2, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "str.0", line: 3, scope: !1, file: !2, type: !9)
+!8 = !DILocalVariable(name: "str.0", line: 3, scope: !1, file: !2, type: !9)
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, scope: !2, baseType: !10)
 !10 = !DICompositeType(tag: DW_TAG_array_type, size: 8, align: 8, scope: !2, baseType: !5, elements: !11)
 !11 = !{!12}
diff --git a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
index 7c87598d0d9c..609be3bb2e54 100644
--- a/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
+++ b/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -4,15 +4,23 @@
 ; a shr (X, -8) that gets subsequently "optimized away" as undef
 ; PR4254
 
+; after fixing PR24373
+; shlq $56, %rdi
+; sarq $48, %rdi
+; folds into
+; movsbq %dil, %rax
+; shlq $8, %rax
+; which is better for x86
+
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
 
 define i64 @foo(i64 %b) nounwind readnone {
 entry:
 ; CHECK-LABEL: foo:
-; CHECK: shlq $56, %rdi
-; CHECK: sarq $48, %rdi
-; CHECK: leaq 1(%rdi), %rax
+; CHECK: movsbq %dil, %rax
+; CHECK: shlq $8, %rax
+; CHECK: orq $1, %rax
 	%shl = shl i64 %b, 56		; <i64> [#uses=1]
 	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
 	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
index dfb98bb1ab39..a74aa2dd4623 100644
--- a/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ b/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,9 @@
 ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
-; CHECK: subq    $40, %rsp
-; CHECK: movaps  %xmm8, 16(%rsp)
-; CHECK: movaps  %xmm7, (%rsp)
+; CHECK: pushq   %rbp
+; CHECK: subq    $32, %rsp
+; CHECK: leaq    32(%rsp), %rbp
+; CHECK: movaps  %xmm8, -16(%rbp)
+; CHECK: movaps  %xmm7, -32(%rbp)
 
 define i32 @a() nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
index 8bb3dc63a3b9..71a560a63ec5 100644
--- a/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
+++ b/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
index 92419fcb8b81..e26a8608a496 100644
--- a/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
+++ b/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
 entry:
diff --git a/test/CodeGen/X86/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll
index bda7340b3643..06a56ad90205 100644
--- a/test/CodeGen/X86/2009-10-16-Scope.ll
+++ b/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -24,9 +24,9 @@ declare i32 @foo(i32) ssp
 
 !0 = !DILocation(line: 5, column: 2, scope: !1)
 !1 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !2)
-!2 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count_", line: 5, scope: !5, file: !3, type: !6)
+!2 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !8, retainedTypes: !9)
+!4 = !DILocalVariable(name: "count_", line: 5, scope: !5, file: !3, type: !6)
 !5 = distinct !DILexicalBlock(line: 1, column: 1, file: null, scope: !1)
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !7 = !DILocation(line: 6, column: 1, scope: !2)
diff --git a/test/CodeGen/X86/2010-01-18-DbgValue.ll b/test/CodeGen/X86/2010-01-18-DbgValue.ll
index db56ae65d51e..c15e7a79bfa1 100644
--- a/test/CodeGen/X86/2010-01-18-DbgValue.ll
+++ b/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -6,7 +6,7 @@
 %struct.Pt = type { double, double }
 %struct.Rect = type { %struct.Pt, %struct.Pt }
 
-define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp !dbg !1 {
 entry:
 ;CHECK: DEBUG_VALUE
   %retval = alloca double                         ; <double*> [#uses=2]
@@ -31,10 +31,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!21}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "my_r0", line: 11, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !19, scope: !2, type: !4, function: double (%struct.Rect*)* @foo)
+!0 = !DILocalVariable(name: "my_r0", line: 11, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !19, scope: !2, type: !4)
 !2 = !DIFile(filename: "b2.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !7}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
index e65edac86ecc..eb077c074bc2 100644
--- a/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
+++ b/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -1,4 +1,6 @@
 ; RUN: llc -O1 < %s
+; REQUIRES: default_triple
+
 ; ModuleID = 'pr6157.bc'
 ; formerly crashed in SelectionDAGBuilder
 
@@ -16,7 +18,7 @@ entry:
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
 
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16)
 !1 = !DIDerivedType(tag: DW_TAG_const_type, size: 192, align: 64, file: !15, scope: !0, baseType: !2)
 !2 = !DICompositeType(tag: DW_TAG_structure_type, name: "C", line: 1, size: 192, align: 64, file: !15, scope: !0, elements: !3)
 !3 = !{!4, !6, !7}
@@ -24,9 +26,9 @@ declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 1, size: 64, align: 64, offset: 64, file: !15, scope: !2, baseType: !5)
 !7 = !DIDerivedType(tag: DW_TAG_member, name: "z", line: 1, size: 64, align: 64, offset: 128, file: !15, scope: !2, baseType: !5)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !9, file: !0, type: !2)
+!8 = !DILocalVariable(name: "t", line: 5, scope: !9, file: !0, type: !2)
 !9 = distinct !DILexicalBlock(line: 0, column: 0, file: null, scope: !10)
-!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !0, type: !11)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !0, type: !11)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!13}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
index 3b99e91915f0..f157d5011b02 100644
--- a/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -7,7 +7,7 @@
 
 %0 = type { double }
 
-define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone !dbg !1 {
 entry:
   tail call void @llvm.dbg.value(metadata float %a, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   tail call void @llvm.dbg.value(metadata float %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !DILocation(scope: !1)
@@ -199,10 +199,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!48}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!1 = !DISubprogram(name: "__divsc3", linkageName: "__divsc3", line: 1922, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 1922, file: !45, scope: !2, type: !4, function: %0 (float, float, float, float)* @__divsc3, variables: !43)
+!0 = !DILocalVariable(name: "a", line: 1921, arg: 1, scope: !1, file: !2, type: !9)
+!1 = distinct !DISubprogram(name: "__divsc3", linkageName: "__divsc3", line: 1922, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 1922, file: !45, scope: !2, type: !4, variables: !43)
 !2 = !DIFile(filename: "libgcc2.c", directory: "/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !45, enums: !47, retainedTypes: !47, subprograms: !44, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !45, enums: !47, retainedTypes: !47, subprograms: !44, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !9, !9, !9, !9}
 !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "SCtype", line: 170, file: !46, scope: !7, baseType: !8)
@@ -210,14 +210,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "complex float", size: 64, align: 32, encoding: DW_ATE_complex_float)
 !9 = !DIDerivedType(tag: DW_TAG_typedef, name: "SFtype", line: 167, file: !46, scope: !7, baseType: !10)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 1921, arg: 0, scope: !1, file: !2, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "denom", line: 1923, scope: !15, file: !2, type: !9)
+!11 = !DILocalVariable(name: "b", line: 1921, arg: 2, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "c", line: 1921, arg: 3, scope: !1, file: !2, type: !9)
+!13 = !DILocalVariable(name: "d", line: 1921, arg: 4, scope: !1, file: !2, type: !9)
+!14 = !DILocalVariable(name: "denom", line: 1923, scope: !15, file: !2, type: !9)
 !15 = distinct !DILexicalBlock(line: 1922, column: 0, file: !45, scope: !1)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ratio", line: 1923, scope: !15, file: !2, type: !9)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 1923, scope: !15, file: !2, type: !9)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 1923, scope: !15, file: !2, type: !9)
+!16 = !DILocalVariable(name: "ratio", line: 1923, scope: !15, file: !2, type: !9)
+!17 = !DILocalVariable(name: "x", line: 1923, scope: !15, file: !2, type: !9)
+!18 = !DILocalVariable(name: "y", line: 1923, scope: !15, file: !2, type: !9)
 !19 = !DILocation(line: 1929, scope: !15)
 !20 = !DILocation(line: 1931, scope: !15)
 !21 = !DILocation(line: 1932, scope: !15)
diff --git a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
index 3670c556aa79..a34e7bd9fe43 100644
--- a/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
+++ b/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-darwin10"
 
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.a*)* @bar to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
 
-define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
+define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp !dbg !9 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.a* %myvar, i64 0, metadata !8, metadata !DIExpression()), !dbg !DILocation(scope: !9)
   %0 = getelementptr inbounds %struct.a, %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
@@ -26,14 +26,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !0 = !DIGlobalVariable(name: "ret", line: 7, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !3)
 !1 = !DIFile(filename: "foo.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !36, enums: !37, retainedTypes: !37, subprograms: !32, globals: !31, imports:  !37)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !36, enums: !37, retainedTypes: !37, subprograms: !32, globals: !31, imports:  !37)
 !3 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 12, arg: 0, scope: !5, file: !1, type: !3)
-!5 = !DISubprogram(name: "foo", linkageName: "foo", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 13, file: !36, scope: !1, type: !6, function: void (i32)* @foo, variables: !33)
+!4 = !DILocalVariable(name: "x", line: 12, arg: 1, scope: !5, file: !1, type: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 13, file: !36, scope: !1, type: !6, variables: !33)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !3}
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "myvar", line: 17, arg: 0, scope: !9, file: !1, type: !13)
-!9 = !DISubprogram(name: "bar", linkageName: "bar", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !36, scope: !1, type: !10, function: i8* (%struct.a*)* @bar, variables: !34)
+!8 = !DILocalVariable(name: "myvar", line: 17, arg: 1, scope: !9, file: !1, type: !13)
+!9 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !36, scope: !1, type: !10, variables: !34)
 !10 = !DISubroutineType(types: !11)
 !11 = !{!12, !13}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: null)
@@ -42,15 +42,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !15 = !{!16, !17}
 !16 = !DIDerivedType(tag: DW_TAG_member, name: "c", line: 3, size: 32, align: 32, file: !36, scope: !14, baseType: !3)
 !17 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 4, size: 64, align: 64, offset: 64, file: !36, scope: !14, baseType: !13)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 22, arg: 0, scope: !19, file: !1, type: !3)
-!19 = !DISubprogram(name: "main", linkageName: "main", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 22, file: !36, scope: !1, type: !20, variables: !35)
+!18 = !DILocalVariable(name: "argc", line: 22, arg: 1, scope: !19, file: !1, type: !3)
+!19 = distinct !DISubprogram(name: "main", linkageName: "main", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 22, file: !36, scope: !1, type: !20, variables: !35)
 !20 = !DISubroutineType(types: !21)
 !21 = !{!3, !3, !22}
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !23)
 !23 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !36, scope: !1, baseType: !24)
 !24 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 22, arg: 0, scope: !19, file: !1, type: !22)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 23, scope: !27, file: !1, type: !14)
+!25 = !DILocalVariable(name: "argv", line: 22, arg: 2, scope: !19, file: !1, type: !22)
+!26 = !DILocalVariable(name: "e", line: 23, scope: !27, file: !1, type: !14)
 !27 = distinct !DILexicalBlock(line: 22, column: 0, file: !36, scope: !19)
 !28 = !DILocation(line: 18, scope: !29)
 !29 = distinct !DILexicalBlock(line: 17, column: 0, file: !36, scope: !9)
diff --git a/test/CodeGen/X86/2010-05-28-Crash.ll b/test/CodeGen/X86/2010-05-28-Crash.ll
index c5201614fdd1..7967d45c2ee8 100644
--- a/test/CodeGen/X86/2010-05-28-Crash.ll
+++ b/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -regalloc=basic < %s | FileCheck %s
 ; Test to check separate label for inlined function argument.
 
-define i32 @foo(i32 %y) nounwind optsize ssp {
+define i32 @foo(i32 %y) nounwind optsize ssp !dbg !1 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   %0 = tail call i32 (...) @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
@@ -13,10 +13,10 @@ declare i32 @zoo(...)
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @bar(i32 %x) nounwind optsize ssp {
+define i32 @bar(i32 %x) nounwind optsize ssp !dbg !8 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !7, metadata !DIExpression()), !dbg !DILocation(scope: !8)
-  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
+  tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1, inlinedAt: !DILocation(scope: !8))
   %0 = tail call i32 (...) @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
   %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
   ret i32 %1, !dbg !13
@@ -25,15 +25,15 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!20}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 2, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !18, scope: !2, type: !4, function: i32 (i32)* @foo, variables: !15)
+!0 = !DILocalVariable(name: "y", line: 2, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !18, scope: !2, type: !4, variables: !15)
 !2 = !DIFile(filename: "f.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 0, scope: !8, file: !2, type: !6)
-!8 = !DISubprogram(name: "bar", linkageName: "bar", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !18, scope: !2, type: !4, function: i32 (i32)* @bar, variables: !16)
+!7 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !2, type: !6)
+!8 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !18, scope: !2, type: !4, variables: !16)
 !9 = !DILocation(line: 3, scope: !10)
 !10 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !1)
 !11 = !{i32 1}
@@ -46,7 +46,7 @@ entry:
 !18 = !DIFile(filename: "f.c", directory: "/tmp")
 !19 = !{}
 
-;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: DEBUG_VALUE: bar:x <- %E
 ;CHECK: Ltmp
 ;CHECK:	DEBUG_VALUE: foo:y <- 1{{$}}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
index 757c92808e11..1be800cdfcf0 100644
--- a/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
+++ b/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -7,8 +7,8 @@ target triple = "x86_64-apple-darwin10.2"
 
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
 
-define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
-;CHECK: DEBUG_VALUE: baz:this <- RDI{{$}}
+define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 !dbg !8 {
+;CHECK: DEBUG_VALUE: baz:this <- %RDI{{$}}
 entry:
   tail call void @llvm.dbg.value(metadata %struct.foo* %this, i64 0, metadata !15, metadata !DIExpression()), !dbg !DILocation(scope: !8)
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !16, metadata !DIExpression()), !dbg !DILocation(scope: !8)
@@ -23,35 +23,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!34}
 !llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !1, file: !3, type: !12)
-!1 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEi", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* null)
+!0 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !1, file: !3, type: !12)
+!1 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEi", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !31, scope: !2, type: !9)
 !2 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 3, size: 32, align: 32, file: !31, scope: !3, elements: !5)
 !3 = !DIFile(filename: "foo.cp", directory: "/tmp/")
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 LLVM build", isOptimized: true, emissionKind: 0, file: !31, enums: !32, retainedTypes: !32, subprograms: !33)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 LLVM build", isOptimized: true, emissionKind: 0, file: !31, enums: !32, retainedTypes: !32, subprograms: !33)
 !5 = !{!6, !1, !8}
 !6 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 8, size: 32, align: 32, file: !31, scope: !2, baseType: !7)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", linkageName: "_ZN3foo3bazEi", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 15, file: !31, scope: !2, type: !9, function: i32 (%struct.foo*, i32)* @_ZN3foo3bazEi)
+!8 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3foo3bazEi", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 15, file: !31, scope: !2, type: !9)
 !9 = !DISubroutineType(types: !10)
 !10 = !{!7, !11, !7}
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !2)
 !12 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !31, scope: !3, baseType: !13)
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !2)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 11, arg: 0, scope: !1, file: !3, type: !7)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 15, arg: 0, scope: !8, file: !3, type: !12)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 15, arg: 0, scope: !8, file: !3, type: !7)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 19, arg: 0, scope: !18, file: !3, type: !7)
-!18 = !DISubprogram(name: "main", linkageName: "main", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !31, scope: !3, type: !19)
+!14 = !DILocalVariable(name: "x", line: 11, arg: 2, scope: !1, file: !3, type: !7)
+!15 = !DILocalVariable(name: "this", line: 15, arg: 1, scope: !8, file: !3, type: !12)
+!16 = !DILocalVariable(name: "x", line: 15, arg: 2, scope: !8, file: !3, type: !7)
+!17 = !DILocalVariable(name: "argc", line: 19, arg: 1, scope: !18, file: !3, type: !7)
+!18 = distinct !DISubprogram(name: "main", linkageName: "main", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !31, scope: !3, type: !19)
 !19 = !DISubroutineType(types: !20)
 !20 = !{!7, !7, !21}
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !22)
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !31, scope: !3, baseType: !23)
 !23 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 19, arg: 0, scope: !18, file: !3, type: !21)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 20, scope: !26, file: !3, type: !2)
+!24 = !DILocalVariable(name: "argv", line: 19, arg: 2, scope: !18, file: !3, type: !21)
+!25 = !DILocalVariable(name: "a", line: 20, scope: !26, file: !3, type: !2)
 !26 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !27)
 !27 = distinct !DILexicalBlock(line: 19, column: 0, file: !31, scope: !18)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 21, scope: !26, file: !3, type: !7)
+!28 = !DILocalVariable(name: "b", line: 21, scope: !26, file: !3, type: !7)
 !29 = !DILocation(line: 16, scope: !30)
 !30 = distinct !DILexicalBlock(line: 15, column: 0, file: !31, scope: !8)
 !31 = !DIFile(filename: "foo.cp", directory: "/tmp/")
diff --git a/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
index 3ce36eec400a..5e565a1a667f 100644
--- a/test/CodeGen/X86/2010-07-06-DbgCrash.ll
+++ b/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -1,18 +1,19 @@
 ; RUN: llc -O0 -relocation-model pic < %s -o /dev/null
+; REQUIRES: default_triple
 ; PR7545
 @.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
 @.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
 @C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0)]
 !38 = !DIFile(filename: "pbmsrch.c", directory: "/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch")
-!39 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !109, enums: !108, retainedTypes: !108)
+!39 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !109, enums: !108, retainedTypes: !108)
 !46 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !109, baseType: !47)
 !47 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!97 = !DISubprogram(name: "main", linkageName: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !39, type: !98)
+!97 = distinct !DISubprogram(name: "main", linkageName: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !39, type: !98)
 !98 = !DISubroutineType(types: !99)
 !99 = !{!100}
 !100 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !101 = !{[2 x i8*]* @C.9.2167}
-!102 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "find_strings", line: 75, scope: !103, file: !38, type: !104)
+!102 = !DILocalVariable(name: "find_strings", line: 75, scope: !103, file: !38, type: !104)
 !103 = distinct !DILexicalBlock(line: 73, column: 0, file: null, scope: !97)
 !104 = !DICompositeType(tag: DW_TAG_array_type, size: 85312, align: 64, file: !109, baseType: !46, elements: !105)
 !105 = !{!106}
diff --git a/test/CodeGen/X86/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll
index 6129e78fd348..d305d678c596 100644
--- a/test/CodeGen/X86/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -3,7 +3,7 @@
 
 %struct.SVal = type { i8*, i32 }
 
-define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp !dbg !17 {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !23, metadata !DIExpression()), !dbg !24
@@ -31,7 +31,7 @@ return:                                           ; preds = %bb2
   ret i32 %.0, !dbg !29
 }
 
-define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 !dbg !16 {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata %struct.SVal* %this, i64 0, metadata !31, metadata !DIExpression()), !dbg !34
@@ -47,7 +47,7 @@ return:                                           ; preds = %entry
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !20 {
 entry:
   %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
   %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
@@ -81,7 +81,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !0 = !DISubprogram(name: "SVal", line: 11, isLocal: false, isDefinition: false, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14)
 !1 = !DICompositeType(tag: DW_TAG_structure_type, name: "SVal", line: 1, size: 128, align: 64, file: !47, scope: !2, elements: !4)
 !2 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !46, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 1, file: !47, enums: !48, retainedTypes: !48, subprograms: !46, imports:  null)
 !4 = !{!5, !7, !0, !9}
 !5 = !DIDerivedType(tag: DW_TAG_member, name: "Data", line: 7, size: 64, align: 64, file: !47, scope: !1, baseType: !6)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: null)
@@ -94,35 +94,35 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !DISubroutineType(types: !15)
 !15 = !{null, !12}
-!16 = !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14, function: void (%struct.SVal*)* @_ZN4SValC1Ev)
-!17 = !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 16, file: !47, scope: !2, type: !18, function: i32 (i32, %struct.SVal*)* @_Z3fooi4SVal)
+!16 = distinct !DISubprogram(name: "SVal", linkageName: "_ZN4SValC1Ev", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !47, scope: !1, type: !14)
+!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi4SVal", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 16, file: !47, scope: !2, type: !18)
 !18 = !DISubroutineType(types: !19)
 !19 = !{!13, !13, !1}
-!20 = !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 23, file: !47, scope: !2, type: !21, function: i32 ()* @main)
+!20 = distinct !DISubprogram(name: "main", linkageName: "main", line: 23, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 23, file: !47, scope: !2, type: !21)
 !21 = !DISubroutineType(types: !22)
 !22 = !{!13}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 0, scope: !17, file: !2, type: !13)
+!23 = !DILocalVariable(name: "i", line: 16, arg: 1, scope: !17, file: !2, type: !13)
 !24 = !DILocation(line: 16, scope: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "location", line: 16, arg: 0, scope: !17, file: !2, type: !26)
+!25 = !DILocalVariable(name: "location", line: 16, arg: 2, scope: !17, file: !2, type: !26)
 !26 = !DIDerivedType(tag: DW_TAG_reference_type, name: "SVal", size: 64, align: 64, file: !47, scope: !2, baseType: !1)
 !27 = !DILocation(line: 17, scope: !28)
 !28 = distinct !DILexicalBlock(line: 16, column: 0, file: !47, scope: !17)
 !29 = !DILocation(line: 18, scope: !28)
 !30 = !DILocation(line: 20, scope: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 11, arg: 0, scope: !16, file: !2, type: !32)
+!31 = !DILocalVariable(name: "this", line: 11, arg: 1, scope: !16, file: !2, type: !32)
 !32 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !47, scope: !2, baseType: !33)
 !33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !47, scope: !2, baseType: !1)
 !34 = !DILocation(line: 11, scope: !16)
 !35 = !DILocation(line: 11, scope: !36)
 !36 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !37)
 !37 = distinct !DILexicalBlock(line: 11, column: 0, file: !47, scope: !16)
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 24, scope: !39, file: !2, type: !1)
+!38 = !DILocalVariable(name: "v", line: 24, scope: !39, file: !2, type: !1)
 !39 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !40)
 !40 = distinct !DILexicalBlock(line: 23, column: 0, file: !47, scope: !20)
 !41 = !DILocation(line: 24, scope: !39)
 !42 = !DILocation(line: 25, scope: !39)
 !43 = !DILocation(line: 26, scope: !39)
-!44 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 26, scope: !39, file: !2, type: !13)
+!44 = !DILocalVariable(name: "k", line: 26, scope: !39, file: !2, type: !13)
 !45 = !DILocation(line: 27, scope: !39)
 !47 = !DIFile(filename: "small.cc", directory: "/Users/manav/R8248330")
 !48 = !{}
diff --git a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
index d94bd1c79f91..4303ca991a86 100644
--- a/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
+++ b/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -2,12 +2,12 @@
 ; Radar 8286101
 ; CHECK: .file   {{[0-9]+}} "<stdin>"
 
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !0 {
 entry:
   ret i32 42, !dbg !8
 }
 
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
 entry:
   ret i32 21, !dbg !10
 }
@@ -15,13 +15,13 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!17}
 
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !14, scope: !1, type: !3, function: i32 ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !14, scope: !1, type: !3)
 !1 = !DIFile(filename: "", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114084)", isOptimized: false, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16, subprograms: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114084)", isOptimized: false, emissionKind: 0, file: !15, enums: !16, retainedTypes: !16, subprograms: !13)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !15, scope: !7, type: !3, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !15, scope: !7, type: !3)
 !7 = !DIFile(filename: "bug.c", directory: "/private/tmp")
 !8 = !DILocation(line: 53, column: 13, scope: !9)
 !9 = distinct !DILexicalBlock(line: 53, column: 11, file: !14, scope: !0)
diff --git a/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
index 124cc9a430e8..b091003585c2 100644
--- a/test/CodeGen/X86/2010-11-02-DbgParameter.ll
+++ b/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -6,7 +6,7 @@ target triple = "i386-apple-darwin11.0.0"
 
 %struct.bar = type { i32, i32 }
 
-define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp !dbg !0 {
 ; CHECK: TAG_formal_parameter
 entry:
   tail call void @llvm.dbg.value(metadata %struct.bar* %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
@@ -18,13 +18,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!19}
 
-!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !17, scope: !1, type: !3, function: i32 (%struct.bar*)* @foo, variables: !16)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !17, scope: !1, type: !3, variables: !16)
 !1 = !DIFile(filename: "one.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 117922)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18, subprograms: !15, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 117922)", isOptimized: true, emissionKind: 0, file: !17, enums: !18, retainedTypes: !18, subprograms: !15, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 0, scope: !0, file: !1, type: !7)
+!6 = !DILocalVariable(name: "i", line: 3, arg: 1, scope: !0, file: !1, type: !7)
 !7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !17, scope: !1, baseType: !8)
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 2, size: 64, align: 32, file: !17, scope: !1, elements: !9)
 !9 = !{!10, !11}
diff --git a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
index 0ded66fa3bf9..661ec94fee4e 100644
--- a/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
+++ b/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 @.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
 @str = internal constant [21 x i8] c"Failing test vector:\00"
 
-define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i64 %a, i64 0, metadata !10, metadata !DIExpression()), !dbg !18
   tail call void @llvm.dbg.value(metadata i64 %b, i64 0, metadata !11, metadata !DIExpression()), !dbg !19
@@ -38,7 +38,7 @@ if.then:                                          ; preds = %while.body
   ret i64 %b.addr.0, !dbg !23
 }
 
-define i32 @main() nounwind optsize ssp {
+define i32 @main() nounwind optsize ssp !dbg !6 {
 entry:
   %call = tail call i32 @rand() nounwind optsize, !dbg !24
   tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !DIExpression()), !dbg !24
@@ -78,24 +78,24 @@ declare i32 @puts(i8* nocapture) nounwind
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!33}
 
-!0 = !DISubprogram(name: "gcd", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !31, scope: !1, type: !3, function: i64 (i64, i64)* @gcd, variables: !29)
+!0 = distinct !DISubprogram(name: "gcd", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !31, scope: !1, type: !3, variables: !29)
 !1 = !DIFile(filename: "rem_small.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124117)", isOptimized: true, emissionKind: 1, file: !31, enums: !32, retainedTypes: !32, subprograms: !28, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124117)", isOptimized: true, emissionKind: 1, file: !31, enums: !32, retainedTypes: !32, subprograms: !28, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !31, scope: !1, type: !7, function: i32 ()* @main, variables: !30)
+!6 = distinct !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !31, scope: !1, type: !7, variables: !30)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 0, scope: !0, file: !1, type: !5)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 0, scope: !0, file: !1, type: !5)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 6, scope: !13, file: !1, type: !5)
+!10 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !0, file: !1, type: !5)
+!11 = !DILocalVariable(name: "b", line: 5, arg: 2, scope: !0, file: !1, type: !5)
+!12 = !DILocalVariable(name: "c", line: 6, scope: !13, file: !1, type: !5)
 !13 = distinct !DILexicalBlock(line: 5, column: 52, file: !31, scope: !0)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "m", line: 26, scope: !15, file: !1, type: !16)
+!14 = !DILocalVariable(name: "m", line: 26, scope: !15, file: !1, type: !16)
 !15 = distinct !DILexicalBlock(line: 25, column: 12, file: !31, scope: !6)
 !16 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z_s", line: 27, scope: !15, file: !1, type: !9)
+!17 = !DILocalVariable(name: "z_s", line: 27, scope: !15, file: !1, type: !9)
 !18 = !DILocation(line: 5, column: 41, scope: !0)
 !19 = !DILocation(line: 5, column: 49, scope: !0)
 !20 = !DILocation(line: 7, column: 5, scope: !13)
diff --git a/test/CodeGen/X86/2011-10-21-widen-cmp.ll b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
index 2fe645b07815..cb4648c382f7 100644
--- a/test/CodeGen/X86/2011-10-21-widen-cmp.ll
+++ b/test/CodeGen/X86/2011-10-21-widen-cmp.ll
@@ -1,15 +1,23 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-target triple = "x86_64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
 
 ; Check that a <4 x float> compare is generated and that we are
 ; not stuck in an endless loop.
 
-; CHECK: cmp_2_floats
-; CHECK: cmpordps
-; CHECK: ret
-
 define void @cmp_2_floats() {
+; CHECK-LABEL: cmp_2_floats:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cmpordps %xmm0, %xmm0
+; CHECK-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; CHECK-NEXT:    psllq $32, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5],xmm0[6,7]
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-NEXT:    pslld $31, %xmm0
+; CHECK-NEXT:    blendvps %xmm0, %xmm0
+; CHECK-NEXT:    movlps %xmm0, (%rax)
+; CHECK-NEXT:    retq
 entry:
   %0 = fcmp oeq <2 x float> undef, undef
   %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
@@ -17,11 +25,13 @@ entry:
   ret void
 }
 
-; CHECK: cmp_2_doubles
-; CHECK: cmpordpd
-; CHECK: blendvpd
-; CHECK: ret
 define void @cmp_2_doubles() {
+; CHECK-LABEL: cmp_2_doubles:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    cmpordpd %xmm0, %xmm0
+; CHECK-NEXT:    blendvpd %xmm0, %xmm0
+; CHECK-NEXT:    movapd %xmm0, (%rax)
+; CHECK-NEXT:    retq
 entry:
   %0 = fcmp oeq <2 x double> undef, undef
   %1 = select <2 x i1> %0, <2 x double> undef, <2 x double> undef
@@ -29,11 +39,11 @@ entry:
   ret void
 }
 
-; CHECK: mp_11193
-; CHECK: psraw   $15
-; CHECK: ret
-define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET)
-nounwind {
+define void @mp_11193(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+; CHECK-LABEL: mp_11193:
+; CHECK:       # BB#0: # %allocas
+; CHECK-NEXT:    movl $-1082130432, (%rsi) # imm = 0xFFFFFFFFBF800000
+; CHECK-NEXT:    retq
 allocas:
   %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00, float 9.000000e+00, float 1.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
   %t = extractelement <8 x i1> %bincmp, i32 0
diff --git a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
index 2a1a5c9fb3ea..e6ba7551421d 100644
--- a/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
+++ b/test/CodeGen/X86/2011-12-06-AVXVectorExtractCombine.ll
@@ -4,13 +4,14 @@
 define void @test(<4 x i32>* nocapture %p) nounwind {
   ; CHECK-LABEL: test:
   ; CHECK: vpxor %xmm0, %xmm0, %xmm0
-  ; CHECK-NEXT: vpmaxsd {{.*}}, %xmm0, %xmm0
-  ; CHECK-NEXT: vmovdqu	%xmm0, (%rdi)
+  ; CHECK-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0
+  ; CHECK-NEXT: vmovdqu %xmm0, (%rdi)
   ; CHECK-NEXT: ret
-  %a = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> <i32 -8, i32 -9, i32 -10, i32 -11>, <4 x i32> zeroinitializer) nounwind
-  %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
-  %c = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-  store <4 x i32> %c, <4 x i32>* %p, align 1
+  %a = load <4 x i32>, <4 x i32>* %p, align 1
+  %b = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a, <4 x i32> zeroinitializer) nounwind
+  %c = shufflevector <4 x i32> %b, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3>
+  %d = shufflevector <8 x i32> %c, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  store <4 x i32> %d, <4 x i32>* %p, align 1
   ret void
 }
 
diff --git a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
index 78cdfcf0e1f0..539d5547d5f1 100644
--- a/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
+++ b/test/CodeGen/X86/2011-20-21-zext-ui2fp.ll
@@ -1,13 +1,17 @@
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-target triple = "x86_64-unknown-linux-gnu"
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
 
 ; Check that the booleans are converted using zext and not via sext.
 ; 0x1 means that we only look at the first bit.
 
-;CHECK: 0x1
-;CHECK-LABEL: ui_to_fp_conv:
-;CHECK: ret
 define void @ui_to_fp_conv(<8 x float> * nocapture %aFOO, <8 x float>* nocapture %RET) nounwind {
+; CHECK-LABEL: ui_to_fp_conv:
+; CHECK:       # BB#0: # %allocas
+; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,0.000000e+00,0.000000e+00]
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    movups %xmm1, 16(%rsi)
+; CHECK-NEXT:    movups %xmm0, (%rsi)
+; CHECK-NEXT:    retq
 allocas:
   %bincmp = fcmp olt <8 x float> <float 1.000000e+00, float 1.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> , <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
   %bool2float = uitofp <8 x i1> %bincmp to <8 x float>
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 677c902668bc..92ec107a0079 100644
--- a/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -2,20 +2,20 @@
 
 define void @endless_loop() {
 ; CHECK-LABEL: endless_loop:
-; CHECK-NEXT:     # BB#0:
-; CHECK-NEXT:	vmovaps	(%eax), %ymm0
-; CHECK-NEXT:	vextractf128	$1, %ymm0, %xmm0
-; CHECK-NEXT:	vmovsldup	%xmm0, %xmm0    # xmm0 = xmm0[0,0,2,2]
-; CHECK-NEXT:	vmovddup	%xmm0, %xmm1    # xmm1 = xmm0[0,0]
-; CHECK-NEXT:	vinsertf128	$1, %xmm1, %ymm0, %ymm1
-; CHECK-NEXT:	vxorps	%xmm2, %xmm2, %xmm2
-; CHECK-NEXT:	vblendps	$128, %ymm1, %ymm2, %ymm1 # ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
-; CHECK-NEXT:	vxorps	%ymm2, %ymm2, %ymm2
-; CHECK-NEXT:	vblendps	$1, %ymm0, %ymm2, %ymm0 # ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
-; CHECK-NEXT:	vmovaps	%ymm0, (%eax)
-; CHECK-NEXT:	vmovaps	%ymm1, (%eax)
-; CHECK-NEXT:	vzeroupper
-; CHECK-NEXT:	retl
+; CHECK-NEXT:  # BB#0:
+; CHECK-NEXT:    vmovaps (%eax), %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
+; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
+; CHECK-NEXT:    vxorps %ymm2, %ymm2, %ymm2
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7]
+; CHECK-NEXT:    vmovaps %ymm0, (%eax)
+; CHECK-NEXT:    vmovaps %ymm1, (%eax)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retl
 entry:
   %0 = load <8 x i32>, <8 x i32> addrspace(1)* undef, align 32
   %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
diff --git a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
index a19aa52f302f..816577be15e7 100644
--- a/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
+++ b/test/CodeGen/X86/2012-08-17-legalizer-crash.ll
@@ -26,6 +26,5 @@ if.end:                                           ; preds = %if.then, %entry
   ret void
 
 ; CHECK-LABEL: fn1:
-; CHECK: shrq $32, [[REG:%.*]]
-; CHECK: sete
+; CHECK: jb
 }
diff --git a/test/CodeGen/X86/2012-1-10-buildvector.ll b/test/CodeGen/X86/2012-1-10-buildvector.ll
index d1c0266941fd..eb237847e1bc 100644
--- a/test/CodeGen/X86/2012-1-10-buildvector.ll
+++ b/test/CodeGen/X86/2012-1-10-buildvector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mattr=+avx -mtriple=i686-unknown-unknown | FileCheck %s
 
 define void @bad_cast() {
diff --git a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
index a27db95ba127..50b486c6f925 100644
--- a/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-handlemove-dbg.ll
@@ -14,7 +14,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp {
+define signext i16 @subdivp(%struct.node.0.27* nocapture %p, double %dsq, double %tolsq, %struct.hgstruct.2.29* nocapture byval align 8 %hg) nounwind uwtable readonly ssp !dbg !14 {
 entry:
   call void @llvm.dbg.declare(metadata %struct.hgstruct.2.29* %hg, metadata !4, metadata !DIExpression()), !dbg !DILocation(scope: !14)
   %type = getelementptr inbounds %struct.node.0.27, %struct.node.0.27* %p, i64 0, i32 0
@@ -38,15 +38,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !13, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !13, globals: !2)
 !2 = !{}
-!4 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hg", line: 725, arg: 4, scope: !14, file: !5, type: !6)
+!4 = !DILocalVariable(name: "hg", line: 725, arg: 4, scope: !14, file: !5, type: !6)
 !5 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
 !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "hgstruct", line: 492, file: !11, baseType: !7)
 !7 = !DICompositeType(tag: DW_TAG_structure_type, line: 487, size: 512, align: 64, file: !11)
 !11 = !DIFile(filename: "MultiSource/Benchmarks/Olden/bh/newbh.c", directory: "MultiSource/Benchmarks/Olden/bh")
 !12 = !{i32 1, !"Debug Info Version", i32 3}
 !13 = !{!14}
-!14 = !DISubprogram(name: "subdivp", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !11, scope: !5, type: !15, function: i16 (%struct.node.0.27*, double, double, %struct.hgstruct.2.29* )* @subdivp)
+!14 = distinct !DISubprogram(name: "subdivp", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !11, scope: !5, type: !15)
 !15 = !DISubroutineType(types: !16)
 !16 = !{null}
diff --git a/test/CodeGen/X86/2012-11-30-misched-dbg.ll b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
index 22227faab942..7ed416e36c22 100644
--- a/test/CodeGen/X86/2012-11-30-misched-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-misched-dbg.ll
@@ -14,7 +14,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp {
+define i32 @AttachGalley(%union.rec** nocapture %suspend_pt) nounwind uwtable ssp !dbg !21 {
 entry:
   %num14075 = alloca [20 x i8], align 16
   br label %if.end33
@@ -65,10 +65,10 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !19, enums: !2, retainedTypes: !2, subprograms: !20, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 168918) (llvm/trunk 168920)", isOptimized: true, emissionKind: 0, file: !19, enums: !2, retainedTypes: !2, subprograms: !20, globals: !2)
 !1 = !{!2}
 !2 = !{}
-!4 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "num1", line: 815, scope: !5, file: !14, type: !15)
+!4 = !DILocalVariable(name: "num1", line: 815, scope: !5, file: !14, type: !15)
 !5 = distinct !DILexicalBlock(line: 815, column: 0, file: !14, scope: !6)
 !6 = distinct !DILexicalBlock(line: 812, column: 0, file: !14, scope: !7)
 !7 = distinct !DILexicalBlock(line: 807, column: 0, file: !14, scope: !8)
@@ -86,7 +86,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 !19 = !DIFile(filename: "MultiSource/Benchmarks/MiBench/consumer-typeset/z19.c", directory: "MultiSource/Benchmarks/MiBench/consumer-typeset")
 
 !20 = !{!21}
-!21 = !DISubprogram(name: "AttachGalley", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: i32 (%union.rec**)* @AttachGalley)
+!21 = distinct !DISubprogram(name: "AttachGalley", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22)
 !22 = !DISubroutineType(types: !23)
 !23 = !{null}
 
@@ -99,7 +99,7 @@ declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
 %"class.__gnu_cxx::hash_map" = type { %"class.__gnu_cxx::hashtable" }
 %"class.__gnu_cxx::hashtable" = type { i64, i64, i64, i64, i64, i64 }
 
-define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @main() uwtable ssp personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !37 {
 entry:
   %X = alloca %"class.__gnu_cxx::hash_map", align 8
   br i1 undef, label %cond.true, label %cond.end
@@ -134,11 +134,11 @@ declare void @_Znwm()
 
 !llvm.dbg.cu = !{!30}
 
-!30 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169129) (llvm/trunk 169135)", isOptimized: true, emissionKind: 0, file: !34, enums: !2, retainedTypes: !2, subprograms: !36)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "X", line: 29, scope: !37, type: !32)
+!30 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169129) (llvm/trunk 169135)", isOptimized: true, emissionKind: 0, file: !34, enums: !2, retainedTypes: !2, subprograms: !36)
+!31 = !DILocalVariable(name: "X", line: 29, scope: !37, type: !32)
 !32 = !DIDerivedType(tag: DW_TAG_typedef, name: "HM", line: 28, file: !34, baseType: null)
 !33 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
 !34 = !DIFile(filename: "SingleSource/Benchmarks/Shootout-C++/hash.cpp", directory: "SingleSource/Benchmarks/Shootout-C++")
 !35 = !{i32 1, !"Debug Info Version", i32 3}
 !36 = !{!37}
-!37 = !DISubprogram(name: "main", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22, function: void ()* @main)
+!37 = distinct !DISubprogram(name: "main", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !19, scope: !14, type: !22)
diff --git a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
index 77c017eb0e36..3f7a10ae035b 100644
--- a/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
+++ b/test/CodeGen/X86/2012-11-30-regpres-dbg.ll
@@ -11,7 +11,7 @@
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define void @test() unnamed_addr uwtable ssp align 2 {
+define void @test() unnamed_addr uwtable ssp align 2 !dbg !2 {
 entry:
   %callback = alloca %struct.btCompoundLeafCallback, align 8
   br i1 undef, label %if.end, label %if.then
@@ -36,10 +36,10 @@ invoke.cont44:                                    ; preds = %if.end
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 168984) (llvm/trunk 168983)", isOptimized: true, emissionKind: 0, file: !6, subprograms: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 168984) (llvm/trunk 168983)", isOptimized: true, emissionKind: 0, file: !6, subprograms: !1)
 !1 = !{!2}
-!2 = !DISubprogram(name: "test", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !5, type: !7, function: void ()* @test)
-!3 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "callback", line: 214, scope: !2, type: !4)
+!2 = distinct !DISubprogram(name: "test", isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !5, type: !7)
+!3 = !DILocalVariable(name: "callback", line: 214, scope: !2, type: !4)
 !4 = !DICompositeType(tag: DW_TAG_structure_type, name: "btCompoundLeafCallback", line: 90, size: 512, align: 64, file: !6)
 !5 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
 !6 = !DIFile(filename: "MultiSource/Benchmarks/Bullet/btCompoundCollisionAlgorithm.cpp", directory: "MultiSource/Benchmarks/Bullet")
diff --git a/test/CodeGen/X86/3dnow-intrinsics.ll b/test/CodeGen/X86/3dnow-intrinsics.ll
index 0b27bf2d1853..fe8b95ec4655 100644
--- a/test/CodeGen/X86/3dnow-intrinsics.ll
+++ b/test/CodeGen/X86/3dnow-intrinsics.ll
@@ -277,7 +277,7 @@ entry:
 declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
 
 define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone {
-; CHECK: pswapd
+; CHECK: pswapd {{.*#+}} mm0 = mem[1,0]
 entry:
   %0 = bitcast <2 x float> %a to x86_mmx
   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
@@ -286,7 +286,7 @@ entry:
 }
 
 define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
-; CHECK: pswapd
+; CHECK: pswapd {{.*#+}} mm0 = mem[1,0]
 entry:
   %0 = bitcast <2 x i32> %a to x86_mmx
   %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
diff --git a/test/CodeGen/X86/GC/alloc_loop.ll b/test/CodeGen/X86/GC/alloc_loop.ll
index 2a505e80aac8..b924e1cee069 100644
--- a/test/CodeGen/X86/GC/alloc_loop.ll
+++ b/test/CodeGen/X86/GC/alloc_loop.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 
 declare i8* @llvm_gc_allocate(i32)
diff --git a/test/CodeGen/X86/GC/cg-O0.ll b/test/CodeGen/X86/GC/cg-O0.ll
index b4929425e94a..1a390c9eb1c1 100644
--- a/test/CodeGen/X86/GC/cg-O0.ll
+++ b/test/CodeGen/X86/GC/cg-O0.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -O0
+; REQUIRES: default_triple
 
 define i32 @main() {
 entry:
diff --git a/test/CodeGen/X86/GC/dynamic-frame-size.ll b/test/CodeGen/X86/GC/dynamic-frame-size.ll
index a3583d46a29a..9ec9b8b08507 100644
--- a/test/CodeGen/X86/GC/dynamic-frame-size.ll
+++ b/test/CodeGen/X86/GC/dynamic-frame-size.ll
@@ -17,12 +17,12 @@ define void @test(i8* %ptr) gc "erlang" {
 ; CHECK: .note.gc
 ; CHECK-NEXT: .align 8
 ; safe point count
-; CHECK .short	1
-; CHECK .long	.Ltmp0
+; CHECK: .short	1
+; CHECK: .long	.Ltmp0
 ; stack frame size (in words)
-; CHECK .short	-1
+; CHECK: .short	-1
 ; stack arity (arguments on the stack)
-; CHECK .short	0
+; CHECK: .short	0
 ; live root count
-; CHECK .short	0
+; CHECK: .short	0
 
diff --git a/test/CodeGen/X86/GC/lower_gcroot.ll b/test/CodeGen/X86/GC/lower_gcroot.ll
index c2d418ac50ef..8cccd78100f5 100644
--- a/test/CodeGen/X86/GC/lower_gcroot.ll
+++ b/test/CodeGen/X86/GC/lower_gcroot.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 	%Env = type i8*
 
diff --git a/test/CodeGen/X86/MachineBranchProb.ll b/test/CodeGen/X86/MachineBranchProb.ll
index 408c6b9151c3..ee1c658d4c55 100644
--- a/test/CodeGen/X86/MachineBranchProb.ll
+++ b/test/CodeGen/X86/MachineBranchProb.ll
@@ -18,9 +18,9 @@ for.cond2:                                        ; preds = %for.inc, %for.cond
   %or.cond = or i1 %tobool, %cmp4
   br i1 %or.cond, label %for.inc20, label %for.inc, !prof !0
 ; CHECK: BB#1: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#4(3615818718)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}1.53%) BB#4({{[0-9a-fx/= ]+}}98.47%)
 ; CHECK: BB#4: derived from LLVM BB %for.cond2
-; CHECK: Successors according to CFG: BB#3(56008718) BB#2(3559810000)
+; CHECK: Successors according to CFG: BB#3({{[0-9a-fx/= ]+}}1.55%) BB#2({{[0-9a-fx/= ]+}}98.45%)
 
 for.inc:                                          ; preds = %for.cond2
   %shl = shl i32 %bit.0, 1
diff --git a/test/CodeGen/X86/MachineSink-DbgValue.ll b/test/CodeGen/X86/MachineSink-DbgValue.ll
index 6f057c5f18e6..457d9beb37d5 100644
--- a/test/CodeGen/X86/MachineSink-DbgValue.ll
+++ b/test/CodeGen/X86/MachineSink-DbgValue.ll
@@ -3,7 +3,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
-define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp {
+define i32 @foo(i32 %i, i32* nocapture %c) nounwind uwtable readonly ssp !dbg !1 {
   tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !12
   %ab = load i32, i32* %c, align 1, !dbg !14
   tail call void @llvm.dbg.value(metadata i32* %c, i64 0, metadata !7, metadata !DIExpression()), !dbg !13
@@ -28,17 +28,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports:  null)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, function: i32 (i32, i32*)* @foo, variables: !19)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-211.10.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, variables: !19)
 !2 = !DIFile(filename: "a.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
+!7 = !DILocalVariable(name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !0, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 3, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "a", line: 3, scope: !11, file: !2, type: !9)
 !11 = distinct !DILexicalBlock(line: 2, column: 25, file: !20, scope: !1)
 !12 = !DILocation(line: 2, column: 13, scope: !1)
 !13 = !DILocation(line: 2, column: 22, scope: !1)
diff --git a/test/CodeGen/X86/MergeConsecutiveStores.ll b/test/CodeGen/X86/MergeConsecutiveStores.ll
index c8f249b7529d..70af4184e8a2 100644
--- a/test/CodeGen/X86/MergeConsecutiveStores.ll
+++ b/test/CodeGen/X86/MergeConsecutiveStores.ll
@@ -1,13 +1,10 @@
-; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx < %s | FileCheck %s
-; RUN: llc -march=x86-64 -mcpu=corei7 -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -addr-sink-using-gep=1 < %s | FileCheck %s
 
 %struct.A = type { i8, i8, i8, i8, i8, i8, i8, i8 }
 %struct.B = type { i32, i32, i32, i32, i32, i32, i32, i32 }
 
-; CHECK: merge_const_store
+; CHECK-LABEL: merge_const_store:
 ; save 1,2,3 ... as one big integer.
 ; CHECK: movabsq $578437695752307201
 ; CHECK: ret
@@ -42,7 +39,7 @@ define void @merge_const_store(i32 %count, %struct.A* nocapture %p) nounwind uwt
 }
 
 ; No vectors because we use noimplicitfloat
-; CHECK: merge_const_store_no_vec
+; CHECK-LABEL: merge_const_store_no_vec:
 ; CHECK-NOT: vmovups
 ; CHECK: ret
 define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimplicitfloat{
@@ -76,7 +73,7 @@ define void @merge_const_store_no_vec(i32 %count, %struct.B* nocapture %p) noimp
 }
 
 ; Move the constants using a single vector store.
-; CHECK: merge_const_store_vec
+; CHECK-LABEL: merge_const_store_vec:
 ; CHECK: vmovups
 ; CHECK: ret
 define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind uwtable noinline ssp {
@@ -110,7 +107,7 @@ define void @merge_const_store_vec(i32 %count, %struct.B* nocapture %p) nounwind
 }
 
 ; Move the first 4 constants as a single vector. Move the rest as scalars.
-; CHECK: merge_nonconst_store
+; CHECK-LABEL: merge_nonconst_store:
 ; CHECK: movl $67305985
 ; CHECK: movb
 ; CHECK: movb
@@ -291,12 +288,16 @@ block4:                                       ; preds = %4, %.lr.ph
   ret void
 }
 
-;; On x86, even unaligned copies can be merged to vector ops.
+;; On x86, even unaligned copies should be merged to vector ops.
+;; TODO: however, this cannot happen at the moment, due to brokenness
+;; in MergeConsecutiveStores. See UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
 ; CHECK-LABEL: merge_loads_no_align:
 ;  load:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
 ;  store:
-; CHECK: vmovups
+; CHECK-NOT: vmovups ;; TODO
 ; CHECK: ret
 define void @merge_loads_no_align(i32 %count, %struct.B* noalias nocapture %q, %struct.B* noalias nocapture %p) nounwind uwtable noinline ssp {
   %a1 = icmp sgt i32 %count, 0
@@ -335,7 +336,7 @@ block4:                                       ; preds = %4, %.lr.ph
 
 ; Make sure that we merge the consecutive load/store sequence below and use a
 ; word (16 bit) instead of a byte copy.
-; CHECK: MergeLoadStoreBaseIndexOffset
+; CHECK-LABEL: MergeLoadStoreBaseIndexOffset:
 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 ; CHECK: movw    [[REG]], (%{{.*}})
 define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
@@ -367,7 +368,7 @@ define void @MergeLoadStoreBaseIndexOffset(i64* %a, i8* %b, i8* %c, i32 %n) {
 ; Make sure that we merge the consecutive load/store sequence below and use a
 ; word (16 bit) instead of a byte copy even if there are intermediate sign
 ; extensions.
-; CHECK: MergeLoadStoreBaseIndexOffsetSext
+; CHECK-LABEL: MergeLoadStoreBaseIndexOffsetSext:
 ; CHECK: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 ; CHECK: movw    [[REG]], (%{{.*}})
 define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
@@ -399,7 +400,7 @@ define void @MergeLoadStoreBaseIndexOffsetSext(i8* %a, i8* %b, i8* %c, i32 %n) {
 
 ; However, we can only merge ignore sign extensions when they are on all memory
 ; computations;
-; CHECK: loadStoreBaseIndexOffsetSextNoSex
+; CHECK-LABEL: loadStoreBaseIndexOffsetSextNoSex:
 ; CHECK-NOT: movw    (%{{.*}},%{{.*}}), [[REG:%[a-z]+]]
 ; CHECK-NOT: movw    [[REG]], (%{{.*}})
 define void @loadStoreBaseIndexOffsetSextNoSex(i8* %a, i8* %b, i8* %c, i32 %n) {
@@ -481,10 +482,8 @@ define void @merge_vec_extract_stores(<8 x float> %v1, <8 x float> %v2, <4 x flo
   ret void
 
 ; CHECK-LABEL: merge_vec_extract_stores
-; CHECK:      vmovaps %xmm0, 48(%rdi)
-; CHECK-NEXT: vextractf128 $1, %ymm0, 64(%rdi)
-; CHECK-NEXT: vmovaps %xmm1, 80(%rdi)
-; CHECK-NEXT: vextractf128 $1, %ymm1, 96(%rdi)
+; CHECK:      vmovups %ymm0, 48(%rdi)
+; CHECK-NEXT: vmovups %ymm1, 80(%rdi)
 ; CHECK-NEXT: vzeroupper
 ; CHECK-NEXT: retq
 }
diff --git a/test/CodeGen/X86/StackColoring-dbg.ll b/test/CodeGen/X86/StackColoring-dbg.ll
index 98c27f44fabc..91fe7f819383 100644
--- a/test/CodeGen/X86/StackColoring-dbg.ll
+++ b/test/CodeGen/X86/StackColoring-dbg.ll
@@ -27,9 +27,9 @@ declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!23}
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !1, enums: !{}, retainedTypes: !{})
 !1 = !DIFile(filename: "t.c", directory: "")
 !16 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!2 = !DISubprogram()
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 16, scope: !2, file: !1, type: !16)
+!2 = distinct !DISubprogram()
+!22 = !DILocalVariable(name: "x", line: 16, scope: !2, file: !1, type: !16)
 !23 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/add-nsw-sext.ll b/test/CodeGen/X86/add-nsw-sext.ll
new file mode 100644
index 000000000000..0a6f6c315c13
--- /dev/null
+++ b/test/CodeGen/X86/add-nsw-sext.ll
@@ -0,0 +1,168 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; The fundamental problem: an add separated from other arithmetic by a sext can't
+; be combined with the later instructions. However, if the first add is 'nsw',
+; then we can promote the sext ahead of that add to allow optimizations.
+
+define i64 @add_nsw_consts(i32 %i) {
+; CHECK-LABEL: add_nsw_consts:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    addq $12, %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  %idx = add i64 %ext, 7
+  ret i64 %idx
+}
+
+; An x86 bonus: If we promote the sext ahead of the 'add nsw',
+; we allow LEA formation and eliminate an add instruction.
+
+define i64 @add_nsw_sext_add(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext_add:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  %idx = add i64 %x, %ext
+  ret i64 %idx
+}
+
+; Throw in a scale (left shift) because an LEA can do that too.
+; Use a negative constant (LEA displacement) to verify that's handled correctly.
+
+define i64 @add_nsw_sext_lsh_add(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext_lsh_add:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, -5
+  %ext = sext i32 %add to i64
+  %shl = shl i64 %ext, 3
+  %idx = add i64 %x, %shl
+  ret i64 %idx
+}
+
+; Don't promote the sext if it has no users. The wider add instruction needs an
+; extra byte to encode.
+
+define i64 @add_nsw_sext(i32 %i, i64 %x) {
+; CHECK-LABEL: add_nsw_sext:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    addl $5, %edi
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  ret i64 %ext
+}
+
+; The typical use case: a 64-bit system where an 'int' is used as an index into an array.
+
+define i8* @gep8(i32 %i, i8* %x) {
+; CHECK-LABEL: gep8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq 5(%rax,%rsi), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  %idx = getelementptr i8, i8* %x, i64 %ext
+  ret i8* %idx
+}
+
+define i16* @gep16(i32 %i, i16* %x) {
+; CHECK-LABEL: gep16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq -10(%rsi,%rax,2), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, -5
+  %ext = sext i32 %add to i64
+  %idx = getelementptr i16, i16* %x, i64 %ext
+  ret i16* %idx
+}
+
+define i32* @gep32(i32 %i, i32* %x) {
+; CHECK-LABEL: gep32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq 20(%rsi,%rax,4), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  %idx = getelementptr i32, i32* %x, i64 %ext
+  ret i32* %idx
+}
+
+define i64* @gep64(i32 %i, i64* %x) {
+; CHECK-LABEL: gep64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    leaq -40(%rsi,%rax,8), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, -5
+  %ext = sext i32 %add to i64
+  %idx = getelementptr i64, i64* %x, i64 %ext
+  ret i64* %idx
+}
+
+; LEA can't scale by 16, but the adds can still be combined into an LEA.
+
+define i128* @gep128(i32 %i, i128* %x) {
+; CHECK-LABEL: gep128:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %edi, %rax
+; CHECK-NEXT:    shlq $4, %rax
+; CHECK-NEXT:    leaq 80(%rax,%rsi), %rax
+; CHECK-NEXT:    retq
+
+  %add = add nsw i32 %i, 5
+  %ext = sext i32 %add to i64
+  %idx = getelementptr i128, i128* %x, i64 %ext
+  ret i128* %idx
+}
+
+; A bigger win can be achieved when there is more than one use of the
+; sign extended value. In this case, we can eliminate sign extension
+; instructions plus use more efficient addressing modes for memory ops.
+
+define void @PR20134(i32* %a, i32 %i) {
+; CHECK-LABEL: PR20134:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movslq %esi, %rax
+; CHECK-NEXT:    movl 4(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    addl 8(%rdi,%rax,4), %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi,%rax,4)
+; CHECK-NEXT:    retq
+
+  %add1 = add nsw i32 %i, 1
+  %idx1 = sext i32 %add1 to i64
+  %gep1 = getelementptr i32, i32* %a, i64 %idx1
+  %load1 = load i32, i32* %gep1, align 4
+
+  %add2 = add nsw i32 %i, 2
+  %idx2 = sext i32 %add2 to i64
+  %gep2 = getelementptr i32, i32* %a, i64 %idx2
+  %load2 = load i32, i32* %gep2, align 4
+
+  %add3 = add i32 %load1, %load2
+  %idx3 = sext i32 %i to i64
+  %gep3 = getelementptr i32, i32* %a, i64 %idx3
+  store i32 %add3, i32* %gep3, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/aliases.ll b/test/CodeGen/X86/aliases.ll
index 3f19a064323c..50c7b929c827 100644
--- a/test/CodeGen/X86/aliases.ll
+++ b/test/CodeGen/X86/aliases.ll
@@ -2,7 +2,7 @@
 ; RUN: -relocation-model=pic | FileCheck %s
 
 @thread_var = thread_local global i32 42, align 4
-@thread_alias = thread_local(localdynamic) alias i32* @thread_var
+@thread_alias = thread_local(localdynamic) alias i32, i32* @thread_var
 
 ; CHECK-LABEL: get_thread_var
 define i32* @get_thread_var() {
@@ -19,10 +19,10 @@ define i32* @get_thread_alias() {
 @bar = global i32 42
 
 ; CHECK-DAG: .globl	foo1
-@foo1 = alias i32* @bar
+@foo1 = alias i32, i32* @bar
 
 ; CHECK-DAG: .globl	foo2
-@foo2 = alias i32* @bar
+@foo2 = alias i32, i32* @bar
 
 %FunTy = type i32()
 
@@ -30,35 +30,35 @@ define i32 @foo_f() {
   ret i32 0
 }
 ; CHECK-DAG: .weak	bar_f
-@bar_f = weak alias %FunTy* @foo_f
+@bar_f = weak alias %FunTy, %FunTy* @foo_f
 
-@bar_l = linkonce_odr alias i32* @bar
+@bar_l = linkonce_odr alias i32, i32* @bar
 ; CHECK-DAG: .weak	bar_l
 
-@bar_i = internal alias i32* @bar
+@bar_i = internal alias i32, i32* @bar
 
 ; CHECK-DAG: .globl	A
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, bitcast (i32* @bar to i64*)
 
 ; CHECK-DAG: .globl	bar_h
 ; CHECK-DAG: .hidden	bar_h
-@bar_h = hidden alias i32* @bar
+@bar_h = hidden alias i32, i32* @bar
 
 ; CHECK-DAG: .globl	bar_p
 ; CHECK-DAG: .protected	bar_p
-@bar_p = protected alias i32* @bar
+@bar_p = protected alias i32, i32* @bar
 
 ; CHECK-DAG: test2 = bar+4
-@test2 = alias getelementptr(i32, i32 *@bar, i32 1)
+@test2 = alias i32, getelementptr(i32, i32* @bar, i32 1)
 
 ; CHECK-DAG: test3 = 42
-@test3 = alias inttoptr(i32 42 to i32*)
+@test3 = alias i32, inttoptr(i32 42 to i32*)
 
 ; CHECK-DAG: test4 = bar
-@test4 = alias inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
+@test4 = alias i32, inttoptr(i64 ptrtoint (i32* @bar to i64) to i32*)
 
 ; CHECK-DAG: test5 = test2-bar
-@test5 = alias inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
+@test5 = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @test2 to i32),
                                  i32 ptrtoint (i32* @bar to i32)) to i32*)
 
 ; CHECK-DAG: .globl	test
diff --git a/test/CodeGen/X86/and-encoding.ll b/test/CodeGen/X86/and-encoding.ll
new file mode 100644
index 000000000000..f7bbac2a4bd9
--- /dev/null
+++ b/test/CodeGen/X86/and-encoding.ll
@@ -0,0 +1,41 @@
+; RUN: llc -show-mc-encoding < %s | FileCheck %s
+
+; Test that the direct object emission selects the and variant with 8 bit
+; immediate.
+; We used to get this wrong when using direct object emission, but not when
+; reading assembly.
+
+
+target triple = "x86_64-pc-linux"
+
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK: andq    $-32, %rsp              # encoding: [0x48,0x83,0xe4,0xe0]
+  %foo = alloca i8, align 32
+  ret void
+}
+
+define void @f2(i1 *%x, i16 *%y) {
+; CHECK-LABEL: f2:
+; CHECK: andl	$1, %eax                # encoding: [0x83,0xe0,0x01]
+  %a = load i1, i1* %x
+  %b = zext i1 %a to i16
+  store i16 %b, i16* %y
+  ret void
+}
+
+define i32 @f3(i1 *%x) {
+; CHECK-LABEL: f3:
+; CHECK: andl	$1, %eax                # encoding: [0x83,0xe0,0x01]
+  %a = load i1, i1* %x
+  %b = zext i1 %a to i32
+  ret i32 %b
+}
+
+define i64 @f4(i1 *%x) {
+; CHECK-LABEL: f4:
+; CHECK: andl	$1, %eax                # encoding: [0x83,0xe0,0x01]
+  %a = load i1, i1* %x
+  %b = zext i1 %a to i64
+  ret i64 %b
+}
diff --git a/test/CodeGen/X86/atomic-flags.ll b/test/CodeGen/X86/atomic-flags.ll
new file mode 100644
index 000000000000..e0c4a915965c
--- /dev/null
+++ b/test/CodeGen/X86/atomic-flags.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s
+
+; Make sure that flags are properly preserved despite atomic optimizations.
+
+define i32 @atomic_and_flags_1(i8* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: atomic_and_flags_1:
+
+  ; Generate flags value, and use it.
+  ; CHECK:      cmpl
+  ; CHECK-NEXT: jne
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %L1, label %L2
+
+L1:
+  ; The following pattern will get folded.
+  ; CHECK: incb
+  %1 = load atomic i8, i8* %p seq_cst, align 1
+  %2 = add i8 %1, 1 ; This forces the INC instruction to be generated.
+  store atomic i8 %2, i8* %p release, align 1
+
+  ; Use the comparison result again. We need to rematerialize the comparison
+  ; somehow. This test checks that cmpl gets emitted again, but any
+  ; rematerialization would work (the optimizer used to clobber the flags with
+  ; the add).
+  ; CHECK-NEXT: cmpl
+  ; CHECK-NEXT: jne
+  br i1 %cmp, label %L3, label %L4
+
+L2:
+  ret i32 2
+
+L3:
+  ret i32 3
+
+L4:
+  ret i32 4
+}
+
+; Same as above, but using 2 as immediate to avoid the INC instruction.
+define i32 @atomic_and_flags_2(i8* %p, i32 %a, i32 %b) {
+; CHECK-LABEL: atomic_and_flags_2:
+  ; CHECK:      cmpl
+  ; CHECK-NEXT: jne
+  %cmp = icmp eq i32 %a, %b
+  br i1 %cmp, label %L1, label %L2
+L1:
+  ; CHECK: addb
+  %1 = load atomic i8, i8* %p seq_cst, align 1
+  %2 = add i8 %1, 2
+  store atomic i8 %2, i8* %p release, align 1
+  ; CHECK-NEXT: cmpl
+  ; CHECK-NEXT: jne
+  br i1 %cmp, label %L3, label %L4
+L2:
+  ret i32 2
+L3:
+  ret i32 3
+L4:
+  ret i32 4
+}
diff --git a/test/CodeGen/X86/atomic-minmax-i6432.ll b/test/CodeGen/X86/atomic-minmax-i6432.ll
index 4989bc14ef86..d5d3fa6db5e8 100644
--- a/test/CodeGen/X86/atomic-minmax-i6432.ll
+++ b/test/CodeGen/X86/atomic-minmax-i6432.ll
@@ -8,7 +8,7 @@ define void @atomic_maxmin_i6432() {
   %1 = atomicrmw max  i64* @sc64, i64 5 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock cmpxchg8b
@@ -16,7 +16,7 @@ define void @atomic_maxmin_i6432() {
   %2 = atomicrmw min  i64* @sc64, i64 6 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock cmpxchg8b
@@ -24,7 +24,7 @@ define void @atomic_maxmin_i6432() {
   %3 = atomicrmw umax i64* @sc64, i64 7 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: seta
+; LINUX: sbbl
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock cmpxchg8b
@@ -32,7 +32,7 @@ define void @atomic_maxmin_i6432() {
   %4 = atomicrmw umin i64* @sc64, i64 8 acquire
 ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]]
 ; LINUX: cmpl
-; LINUX: setb
+; LINUX: sbbl
 ; LINUX: cmovne
 ; LINUX: cmovne
 ; LINUX: lock cmpxchg8b
diff --git a/test/CodeGen/X86/atomic-non-integer.ll b/test/CodeGen/X86/atomic-non-integer.ll
new file mode 100644
index 000000000000..98fcd96d3e4c
--- /dev/null
+++ b/test/CodeGen/X86/atomic-non-integer.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck %s
+
+; Note: This test is testing that the lowering for atomics matches what we
+; currently emit for non-atomics + the atomic restriction.  The presence of
+; particular lowering detail in these tests should not be read as requiring
+; that detail for correctness unless it's related to the atomicity itself.
+; (Specifically, there were reviewer questions about the lowering for halfs
+;  and their calling convention which remain unresolved.)
+
+define void @store_half(half* %fptr, half %v) {
+; CHECK-LABEL: @store_half
+; CHECK: movq	%rdi, %rbx
+; CHECK: callq	__gnu_f2h_ieee
+; CHECK: movw	%ax, (%rbx)
+  store atomic half %v, half* %fptr unordered, align 2
+  ret void
+}
+
+define void @store_float(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float
+; CHECK: movd	%xmm0, %eax
+; CHECK: movl	%eax, (%rdi)
+  store atomic float %v, float* %fptr unordered, align 4
+  ret void
+}
+
+define void @store_double(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double
+; CHECK: movd	%xmm0, %rax
+; CHECK: movq	%rax, (%rdi)
+  store atomic double %v, double* %fptr unordered, align 8
+  ret void
+}
+
+define void @store_fp128(fp128* %fptr, fp128 %v) {
+; CHECK-LABEL: @store_fp128
+; CHECK: callq	__sync_lock_test_and_set_16
+  store atomic fp128 %v, fp128* %fptr unordered, align 16
+  ret void
+}
+
+define half @load_half(half* %fptr) {
+; CHECK-LABEL: @load_half
+; CHECK: movw	(%rdi), %ax
+; CHECK: movzwl	%ax, %edi
+; CHECK: jmp	__gnu_h2f_ieee
+  %v = load atomic half, half* %fptr unordered, align 2
+  ret half %v
+}
+
+define float @load_float(float* %fptr) {
+; CHECK-LABEL: @load_float
+; CHECK: movl	(%rdi), %eax
+; CHECK: movd	%eax, %xmm0
+  %v = load atomic float, float* %fptr unordered, align 4
+  ret float %v
+}
+
+define double @load_double(double* %fptr) {
+; CHECK-LABEL: @load_double
+; CHECK: movq	(%rdi), %rax
+; CHECK: movd	%rax, %xmm0
+  %v = load atomic double, double* %fptr unordered, align 8
+  ret double %v
+}
+
+define fp128 @load_fp128(fp128* %fptr) {
+; CHECK-LABEL: @load_fp128
+; CHECK: callq	__sync_val_compare_and_swap_16
+  %v = load atomic fp128, fp128* %fptr unordered, align 16
+  ret fp128 %v
+}
+
+
+; sanity check the seq_cst lowering since that's the 
+; interesting one from an ordering perspective on x86.
+
+define void @store_float_seq_cst(float* %fptr, float %v) {
+; CHECK-LABEL: @store_float_seq_cst
+; CHECK: movd	%xmm0, %eax
+; CHECK: xchgl	%eax, (%rdi)
+  store atomic float %v, float* %fptr seq_cst, align 4
+  ret void
+}
+
+define void @store_double_seq_cst(double* %fptr, double %v) {
+; CHECK-LABEL: @store_double_seq_cst
+; CHECK: movd	%xmm0, %rax
+; CHECK: xchgq	%rax, (%rdi)
+  store atomic double %v, double* %fptr seq_cst, align 8
+  ret void
+}
+
+define float @load_float_seq_cst(float* %fptr) {
+; CHECK-LABEL: @load_float_seq_cst
+; CHECK: movl	(%rdi), %eax
+; CHECK: movd	%eax, %xmm0
+  %v = load atomic float, float* %fptr seq_cst, align 4
+  ret float %v
+}
+
+define double @load_double_seq_cst(double* %fptr) {
+; CHECK-LABEL: @load_double_seq_cst
+; CHECK: movq	(%rdi), %rax
+; CHECK: movd	%rax, %xmm0
+  %v = load atomic double, double* %fptr seq_cst, align 8
+  ret double %v
+}
diff --git a/test/CodeGen/X86/atomic128.ll b/test/CodeGen/X86/atomic128.ll
index dea7d482f989..c41269b0b606 100644
--- a/test/CodeGen/X86/atomic128.ll
+++ b/test/CodeGen/X86/atomic128.ll
@@ -119,16 +119,9 @@ define void @fetch_and_min(i128* %p, i128 %bits) {
 ; CHECK-DAG:     movq 8(%rdi), %rdx
 
 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK:         cmpq %rsi, %rax
-; CHECK:         setbe [[CMP:%[a-z0-9]+]]
-; CHECK:         cmpq [[INCHI]], %rdx
-; CHECK:         setle [[HICMP:%[a-z0-9]+]]
-; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK:         movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK:         testb [[CMP]], [[CMP]]
-; CHECK:         movq %rsi, %rbx
+; CHECK:         cmpq
+; CHECK:         sbbq
+; CHECK:         setg
 ; CHECK:         cmovneq %rax, %rbx
 ; CHECK:         movq [[INCHI]], %rcx
 ; CHECK:         cmovneq %rdx, %rcx
@@ -151,16 +144,9 @@ define void @fetch_and_max(i128* %p, i128 %bits) {
 ; CHECK-DAG:     movq 8(%rdi), %rdx
 
 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK:         cmpq %rsi, %rax
-; CHECK:         setae [[CMP:%[a-z0-9]+]]
-; CHECK:         cmpq [[INCHI]], %rdx
-; CHECK:         setge [[HICMP:%[a-z0-9]+]]
-; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK:         movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK:         testb [[CMP]], [[CMP]]
-; CHECK:         movq %rsi, %rbx
+; CHECK:         cmpq
+; CHECK:         sbbq
+; CHECK:         setge
 ; CHECK:         cmovneq %rax, %rbx
 ; CHECK:         movq [[INCHI]], %rcx
 ; CHECK:         cmovneq %rdx, %rcx
@@ -183,16 +169,9 @@ define void @fetch_and_umin(i128* %p, i128 %bits) {
 ; CHECK-DAG:     movq 8(%rdi), %rdx
 
 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK:         cmpq %rsi, %rax
-; CHECK:         setbe [[CMP:%[a-z0-9]+]]
-; CHECK:         cmpq [[INCHI]], %rdx
-; CHECK:         setbe [[HICMP:%[a-z0-9]+]]
-; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK:         movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK:         testb [[CMP]], [[CMP]]
-; CHECK:         movq %rsi, %rbx
+; CHECK:         cmpq
+; CHECK:         sbbq
+; CHECK:         seta
 ; CHECK:         cmovneq %rax, %rbx
 ; CHECK:         movq [[INCHI]], %rcx
 ; CHECK:         cmovneq %rdx, %rcx
@@ -215,16 +194,9 @@ define void @fetch_and_umax(i128* %p, i128 %bits) {
 ; CHECK-DAG:     movq 8(%rdi), %rdx
 
 ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]:
-; CHECK:         cmpq %rax, %rsi
-; CHECK:         setb [[CMP:%[a-z0-9]+]]
-; CHECK:         cmpq [[INCHI]], %rdx
-; CHECK:         seta [[HICMP:%[a-z0-9]+]]
-; CHECK:         je [[USE_LO:.?LBB[0-9]+_[0-9]+]]
-
-; CHECK:         movb [[HICMP]], [[CMP]]
-; CHECK: [[USE_LO]]:
-; CHECK:         testb [[CMP]], [[CMP]]
-; CHECK:         movq %rsi, %rbx
+; CHECK:         cmpq
+; CHECK:         sbbq
+; CHECK:         setb
 ; CHECK:         cmovneq %rax, %rbx
 ; CHECK:         movq [[INCHI]], %rcx
 ; CHECK:         cmovneq %rdx, %rcx
diff --git a/test/CodeGen/X86/atomic_mi.ll b/test/CodeGen/X86/atomic_mi.ll
index 7a6204fc8930..356d9dcff6fa 100644
--- a/test/CodeGen/X86/atomic_mi.ll
+++ b/test/CodeGen/X86/atomic_mi.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s --check-prefix X64
-; RUN: llc < %s -march=x86 -verify-machineinstrs | FileCheck %s --check-prefix X32
-; RUN: llc < %s -march=x86-64 -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefix X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-incdec -verify-machineinstrs | FileCheck %s --check-prefix SLOW_INC
 
 ; This file checks that atomic (non-seq_cst) stores of immediate values are
 ; done in one mov instruction and not 2. More precisely, it makes sure that the
@@ -14,7 +14,11 @@
 ; The binary operations supported are currently add, and, or, xor.
 ; sub is not supported because they are translated by an addition of the
 ; negated immediate.
-; Finally, we also check the same kind of pattern for inc/dec
+;
+; We also check the same patterns:
+; - For inc/dec.
+; - For register instead of immediate operands.
+; - For floating point operations.
 
 ; seq_cst stores are left as (lock) xchgl, but we try to check every other
 ; attribute at least once.
@@ -25,10 +29,10 @@
 ; an implicit lock prefix, so making it explicit is not required.
 
 define void @store_atomic_imm_8(i8* %p) {
-; X64-LABEL: store_atomic_imm_8
+; X64-LABEL: store_atomic_imm_8:
 ; X64: movb
 ; X64-NOT: movb
-; X32-LABEL: store_atomic_imm_8
+; X32-LABEL: store_atomic_imm_8:
 ; X32: movb
 ; X32-NOT: movb
   store atomic i8 42, i8* %p release, align 1
@@ -36,10 +40,10 @@ define void @store_atomic_imm_8(i8* %p) {
 }
 
 define void @store_atomic_imm_16(i16* %p) {
-; X64-LABEL: store_atomic_imm_16
+; X64-LABEL: store_atomic_imm_16:
 ; X64: movw
 ; X64-NOT: movw
-; X32-LABEL: store_atomic_imm_16
+; X32-LABEL: store_atomic_imm_16:
 ; X32: movw
 ; X32-NOT: movw
   store atomic i16 42, i16* %p monotonic, align 2
@@ -47,12 +51,12 @@ define void @store_atomic_imm_16(i16* %p) {
 }
 
 define void @store_atomic_imm_32(i32* %p) {
-; X64-LABEL: store_atomic_imm_32
+; X64-LABEL: store_atomic_imm_32:
 ; X64: movl
 ; X64-NOT: movl
 ;   On 32 bits, there is an extra movl for each of those functions
 ;   (probably for alignment reasons).
-; X32-LABEL: store_atomic_imm_32
+; X32-LABEL: store_atomic_imm_32:
 ; X32: movl 4(%esp), %eax
 ; X32: movl
 ; X32-NOT: movl
@@ -61,12 +65,12 @@ define void @store_atomic_imm_32(i32* %p) {
 }
 
 define void @store_atomic_imm_64(i64* %p) {
-; X64-LABEL: store_atomic_imm_64
+; X64-LABEL: store_atomic_imm_64:
 ; X64: movq
 ; X64-NOT: movq
 ;   These are implemented with a CAS loop on 32 bit architectures, and thus
 ;   cannot be optimized in the same way as the others.
-; X32-LABEL: store_atomic_imm_64
+; X32-LABEL: store_atomic_imm_64:
 ; X32: cmpxchg8b
   store atomic i64 42, i64* %p release, align 8
   ret void
@@ -75,7 +79,7 @@ define void @store_atomic_imm_64(i64* %p) {
 ; If an immediate is too big to fit in 32 bits, it cannot be store in one mov,
 ; even on X64, one must use movabsq that can only target a register.
 define void @store_atomic_imm_64_big(i64* %p) {
-; X64-LABEL: store_atomic_imm_64_big
+; X64-LABEL: store_atomic_imm_64_big:
 ; X64: movabsq
 ; X64: movq
   store atomic i64 100000000000, i64* %p monotonic, align 8
@@ -84,9 +88,9 @@ define void @store_atomic_imm_64_big(i64* %p) {
 
 ; It would be incorrect to replace a lock xchgl by a movl
 define void @store_atomic_imm_32_seq_cst(i32* %p) {
-; X64-LABEL: store_atomic_imm_32_seq_cst
+; X64-LABEL: store_atomic_imm_32_seq_cst:
 ; X64: xchgl
-; X32-LABEL: store_atomic_imm_32_seq_cst
+; X32-LABEL: store_atomic_imm_32_seq_cst:
 ; X32: xchgl
   store atomic i32 42, i32* %p seq_cst, align 4
   ret void
@@ -94,12 +98,12 @@ define void @store_atomic_imm_32_seq_cst(i32* %p) {
 
 ; ----- ADD -----
 
-define void @add_8(i8* %p) {
-; X64-LABEL: add_8
+define void @add_8i(i8* %p) {
+; X64-LABEL: add_8i:
 ; X64-NOT: lock
 ; X64: addb
 ; X64-NOT: movb
-; X32-LABEL: add_8
+; X32-LABEL: add_8i:
 ; X32-NOT: lock
 ; X32: addb
 ; X32-NOT: movb
@@ -109,12 +113,27 @@ define void @add_8(i8* %p) {
   ret void
 }
 
-define void @add_16(i16* %p) {
+define void @add_8r(i8* %p, i8 %v) {
+; X64-LABEL: add_8r:
+; X64-NOT: lock
+; X64: addb
+; X64-NOT: movb
+; X32-LABEL: add_8r:
+; X32-NOT: lock
+; X32: addb
+; X32-NOT: movb
+  %1 = load atomic i8, i8* %p seq_cst, align 1
+  %2 = add i8 %1, %v
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @add_16i(i16* %p) {
 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: add_16
+; X64-LABEL: add_16i:
 ; X64-NOT: addw
-; X32-LABEL: add_16
+; X32-LABEL: add_16i:
 ; X32-NOT: addw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = add i16 %1, 2
@@ -122,12 +141,25 @@ define void @add_16(i16* %p) {
   ret void
 }
 
-define void @add_32(i32* %p) {
-; X64-LABEL: add_32
+define void @add_16r(i16* %p, i16 %v) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: add_16r:
+; X64-NOT: addw
+; X32-LABEL: add_16r:
+; X32-NOT: addw [.*], (
+  %1 = load atomic i16, i16* %p acquire, align 2
+  %2 = add i16 %1, %v
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @add_32i(i32* %p) {
+; X64-LABEL: add_32i:
 ; X64-NOT: lock
 ; X64: addl
 ; X64-NOT: movl
-; X32-LABEL: add_32
+; X32-LABEL: add_32i:
 ; X32-NOT: lock
 ; X32: addl
 ; X32-NOT: movl
@@ -137,23 +169,94 @@ define void @add_32(i32* %p) {
   ret void
 }
 
-define void @add_64(i64* %p) {
-; X64-LABEL: add_64
+define void @add_32r(i32* %p, i32 %v) {
+; X64-LABEL: add_32r:
+; X64-NOT: lock
+; X64: addl
+; X64-NOT: movl
+; X32-LABEL: add_32r:
+; X32-NOT: lock
+; X32: addl
+; X32-NOT: movl
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = add i32 %1, %v
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret void
+}
+
+; The following is a corner case where the load is added to itself. The pattern
+; matching should not fold this. We only test with 32-bit add, but the same
+; applies to other sizes and operations.
+define void @add_32r_self(i32* %p) {
+; X64-LABEL: add_32r_self:
+; X64-NOT: lock
+; X64: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X64: addl %[[R]], %[[R]]
+; X64: movl %[[R]], (%[[M]])
+; X32-LABEL: add_32r_self:
+; X32-NOT: lock
+; X32: movl (%[[M:[a-z]+]]), %[[R:[a-z]+]]
+; X32: addl %[[R]], %[[R]]
+; X32: movl %[[R]], (%[[M]])
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = add i32 %1, %1
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret void
+}
+
+; The following is a corner case where the load's result is returned. The
+; optimizer isn't allowed to duplicate the load because it's atomic.
+define i32 @add_32r_ret_load(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_ret_load:
+; X64-NOT: lock
+; X64:      movl (%rdi), %eax
+; X64-NEXT: addl %eax, %esi
+; X64-NEXT: movl %esi, (%rdi)
+; X64-NEXT: retq
+; X32-LABEL: add_32r_ret_load:
+; X32-NOT: lock
+; X32:      movl 4(%esp), %[[P:[a-z]+]]
+; X32-NEXT: movl (%[[P]]),
+; X32-NOT: %[[P]]
+; More code here, we just don't want it to load from P.
+; X32: movl %{{.*}}, (%[[P]])
+; X32-NEXT: retl
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = add i32 %1, %v
+  store atomic i32 %2, i32* %p monotonic, align 4
+  ret i32 %1
+}
+
+define void @add_64i(i64* %p) {
+; X64-LABEL: add_64i:
 ; X64-NOT: lock
 ; X64: addq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'addq'.
-; X32-LABEL: add_64
+; X32-LABEL: add_64i:
   %1 = load atomic i64, i64* %p acquire, align 8
   %2 = add i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
 }
 
-define void @add_32_seq_cst(i32* %p) {
-; X64-LABEL: add_32_seq_cst
+define void @add_64r(i64* %p, i64 %v) {
+; X64-LABEL: add_64r:
+; X64-NOT: lock
+; X64: addq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'addq'.
+; X32-LABEL: add_64r:
+  %1 = load atomic i64, i64* %p acquire, align 8
+  %2 = add i64 %1, %v
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @add_32i_seq_cst(i32* %p) {
+; X64-LABEL: add_32i_seq_cst:
 ; X64: xchgl
-; X32-LABEL: add_32_seq_cst
+; X32-LABEL: add_32i_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = add i32 %1, 2
@@ -161,14 +264,25 @@ define void @add_32_seq_cst(i32* %p) {
   ret void
 }
 
+define void @add_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: add_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: add_32r_seq_cst:
+; X32: xchgl
+  %1 = load atomic i32, i32* %p monotonic, align 4
+  %2 = add i32 %1, %v
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
 ; ----- AND -----
 
-define void @and_8(i8* %p) {
-; X64-LABEL: and_8
+define void @and_8i(i8* %p) {
+; X64-LABEL: and_8i:
 ; X64-NOT: lock
 ; X64: andb
 ; X64-NOT: movb
-; X32-LABEL: and_8
+; X32-LABEL: and_8i:
 ; X32-NOT: lock
 ; X32: andb
 ; X32-NOT: movb
@@ -178,12 +292,27 @@ define void @and_8(i8* %p) {
   ret void
 }
 
-define void @and_16(i16* %p) {
+define void @and_8r(i8* %p, i8 %v) {
+; X64-LABEL: and_8r:
+; X64-NOT: lock
+; X64: andb
+; X64-NOT: movb
+; X32-LABEL: and_8r:
+; X32-NOT: lock
+; X32: andb
+; X32-NOT: movb
+  %1 = load atomic i8, i8* %p monotonic, align 1
+  %2 = and i8 %1, %v
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @and_16i(i16* %p) {
 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: and_16
+; X64-LABEL: and_16i:
 ; X64-NOT: andw
-; X32-LABEL: and_16
+; X32-LABEL: and_16i:
 ; X32-NOT: andw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = and i16 %1, 2
@@ -191,12 +320,25 @@ define void @and_16(i16* %p) {
   ret void
 }
 
-define void @and_32(i32* %p) {
-; X64-LABEL: and_32
+define void @and_16r(i16* %p, i16 %v) {
+;   Currently the transformation is not done on 16 bit accesses, as the backend
+;   treat 16 bit arithmetic as expensive on X86/X86_64.
+; X64-LABEL: and_16r:
+; X64-NOT: andw
+; X32-LABEL: and_16r:
+; X32-NOT: andw [.*], (
+  %1 = load atomic i16, i16* %p acquire, align 2
+  %2 = and i16 %1, %v
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @and_32i(i32* %p) {
+; X64-LABEL: and_32i:
 ; X64-NOT: lock
 ; X64: andl
 ; X64-NOT: movl
-; X32-LABEL: and_32
+; X32-LABEL: and_32i:
 ; X32-NOT: lock
 ; X32: andl
 ; X32-NOT: movl
@@ -206,23 +348,51 @@ define void @and_32(i32* %p) {
   ret void
 }
 
-define void @and_64(i64* %p) {
-; X64-LABEL: and_64
+define void @and_32r(i32* %p, i32 %v) {
+; X64-LABEL: and_32r:
+; X64-NOT: lock
+; X64: andl
+; X64-NOT: movl
+; X32-LABEL: and_32r:
+; X32-NOT: lock
+; X32: andl
+; X32-NOT: movl
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = and i32 %1, %v
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @and_64i(i64* %p) {
+; X64-LABEL: and_64i:
 ; X64-NOT: lock
 ; X64: andq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'andq'.
-; X32-LABEL: and_64
+; X32-LABEL: and_64i:
   %1 = load atomic i64, i64* %p acquire, align 8
   %2 = and i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
 }
 
-define void @and_32_seq_cst(i32* %p) {
-; X64-LABEL: and_32_seq_cst
+define void @and_64r(i64* %p, i64 %v) {
+; X64-LABEL: and_64r:
+; X64-NOT: lock
+; X64: andq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'andq'.
+; X32-LABEL: and_64r:
+  %1 = load atomic i64, i64* %p acquire, align 8
+  %2 = and i64 %1, %v
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @and_32i_seq_cst(i32* %p) {
+; X64-LABEL: and_32i_seq_cst:
 ; X64: xchgl
-; X32-LABEL: and_32_seq_cst
+; X32-LABEL: and_32i_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = and i32 %1, 2
@@ -230,14 +400,25 @@ define void @and_32_seq_cst(i32* %p) {
   ret void
 }
 
+define void @and_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: and_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: and_32r_seq_cst:
+; X32: xchgl
+  %1 = load atomic i32, i32* %p monotonic, align 4
+  %2 = and i32 %1, %v
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
 ; ----- OR -----
 
-define void @or_8(i8* %p) {
-; X64-LABEL: or_8
+define void @or_8i(i8* %p) {
+; X64-LABEL: or_8i:
 ; X64-NOT: lock
 ; X64: orb
 ; X64-NOT: movb
-; X32-LABEL: or_8
+; X32-LABEL: or_8i:
 ; X32-NOT: lock
 ; X32: orb
 ; X32-NOT: movb
@@ -247,10 +428,25 @@ define void @or_8(i8* %p) {
   ret void
 }
 
-define void @or_16(i16* %p) {
-; X64-LABEL: or_16
+define void @or_8r(i8* %p, i8 %v) {
+; X64-LABEL: or_8r:
+; X64-NOT: lock
+; X64: orb
+; X64-NOT: movb
+; X32-LABEL: or_8r:
+; X32-NOT: lock
+; X32: orb
+; X32-NOT: movb
+  %1 = load atomic i8, i8* %p acquire, align 1
+  %2 = or i8 %1, %v
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @or_16i(i16* %p) {
+; X64-LABEL: or_16i:
 ; X64-NOT: orw
-; X32-LABEL: or_16
+; X32-LABEL: or_16i:
 ; X32-NOT: orw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = or i16 %1, 2
@@ -258,12 +454,23 @@ define void @or_16(i16* %p) {
   ret void
 }
 
-define void @or_32(i32* %p) {
-; X64-LABEL: or_32
+define void @or_16r(i16* %p, i16 %v) {
+; X64-LABEL: or_16r:
+; X64-NOT: orw
+; X32-LABEL: or_16r:
+; X32-NOT: orw [.*], (
+  %1 = load atomic i16, i16* %p acquire, align 2
+  %2 = or i16 %1, %v
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @or_32i(i32* %p) {
+; X64-LABEL: or_32i:
 ; X64-NOT: lock
 ; X64: orl
 ; X64-NOT: movl
-; X32-LABEL: or_32
+; X32-LABEL: or_32i:
 ; X32-NOT: lock
 ; X32: orl
 ; X32-NOT: movl
@@ -273,23 +480,51 @@ define void @or_32(i32* %p) {
   ret void
 }
 
-define void @or_64(i64* %p) {
-; X64-LABEL: or_64
+define void @or_32r(i32* %p, i32 %v) {
+; X64-LABEL: or_32r:
+; X64-NOT: lock
+; X64: orl
+; X64-NOT: movl
+; X32-LABEL: or_32r:
+; X32-NOT: lock
+; X32: orl
+; X32-NOT: movl
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = or i32 %1, %v
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @or_64i(i64* %p) {
+; X64-LABEL: or_64i:
 ; X64-NOT: lock
 ; X64: orq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'orq'.
-; X32-LABEL: or_64
+; X32-LABEL: or_64i:
   %1 = load atomic i64, i64* %p acquire, align 8
   %2 = or i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
 }
 
-define void @or_32_seq_cst(i32* %p) {
-; X64-LABEL: or_32_seq_cst
+define void @or_64r(i64* %p, i64 %v) {
+; X64-LABEL: or_64r:
+; X64-NOT: lock
+; X64: orq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'orq'.
+; X32-LABEL: or_64r:
+  %1 = load atomic i64, i64* %p acquire, align 8
+  %2 = or i64 %1, %v
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @or_32i_seq_cst(i32* %p) {
+; X64-LABEL: or_32i_seq_cst:
 ; X64: xchgl
-; X32-LABEL: or_32_seq_cst
+; X32-LABEL: or_32i_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = or i32 %1, 2
@@ -297,14 +532,25 @@ define void @or_32_seq_cst(i32* %p) {
   ret void
 }
 
+define void @or_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: or_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: or_32r_seq_cst:
+; X32: xchgl
+  %1 = load atomic i32, i32* %p monotonic, align 4
+  %2 = or i32 %1, %v
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
 ; ----- XOR -----
 
-define void @xor_8(i8* %p) {
-; X64-LABEL: xor_8
+define void @xor_8i(i8* %p) {
+; X64-LABEL: xor_8i:
 ; X64-NOT: lock
 ; X64: xorb
 ; X64-NOT: movb
-; X32-LABEL: xor_8
+; X32-LABEL: xor_8i:
 ; X32-NOT: lock
 ; X32: xorb
 ; X32-NOT: movb
@@ -314,10 +560,25 @@ define void @xor_8(i8* %p) {
   ret void
 }
 
-define void @xor_16(i16* %p) {
-; X64-LABEL: xor_16
+define void @xor_8r(i8* %p, i8 %v) {
+; X64-LABEL: xor_8r:
+; X64-NOT: lock
+; X64: xorb
+; X64-NOT: movb
+; X32-LABEL: xor_8r:
+; X32-NOT: lock
+; X32: xorb
+; X32-NOT: movb
+  %1 = load atomic i8, i8* %p acquire, align 1
+  %2 = xor i8 %1, %v
+  store atomic i8 %2, i8* %p release, align 1
+  ret void
+}
+
+define void @xor_16i(i16* %p) {
+; X64-LABEL: xor_16i:
 ; X64-NOT: xorw
-; X32-LABEL: xor_16
+; X32-LABEL: xor_16i:
 ; X32-NOT: xorw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = xor i16 %1, 2
@@ -325,12 +586,23 @@ define void @xor_16(i16* %p) {
   ret void
 }
 
-define void @xor_32(i32* %p) {
-; X64-LABEL: xor_32
+define void @xor_16r(i16* %p, i16 %v) {
+; X64-LABEL: xor_16r:
+; X64-NOT: xorw
+; X32-LABEL: xor_16r:
+; X32-NOT: xorw [.*], (
+  %1 = load atomic i16, i16* %p acquire, align 2
+  %2 = xor i16 %1, %v
+  store atomic i16 %2, i16* %p release, align 2
+  ret void
+}
+
+define void @xor_32i(i32* %p) {
+; X64-LABEL: xor_32i:
 ; X64-NOT: lock
 ; X64: xorl
 ; X64-NOT: movl
-; X32-LABEL: xor_32
+; X32-LABEL: xor_32i:
 ; X32-NOT: lock
 ; X32: xorl
 ; X32-NOT: movl
@@ -340,23 +612,51 @@ define void @xor_32(i32* %p) {
   ret void
 }
 
-define void @xor_64(i64* %p) {
-; X64-LABEL: xor_64
+define void @xor_32r(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r:
+; X64-NOT: lock
+; X64: xorl
+; X64-NOT: movl
+; X32-LABEL: xor_32r:
+; X32-NOT: lock
+; X32: xorl
+; X32-NOT: movl
+  %1 = load atomic i32, i32* %p acquire, align 4
+  %2 = xor i32 %1, %v
+  store atomic i32 %2, i32* %p release, align 4
+  ret void
+}
+
+define void @xor_64i(i64* %p) {
+; X64-LABEL: xor_64i:
 ; X64-NOT: lock
 ; X64: xorq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'xorq'.
-; X32-LABEL: xor_64
+; X32-LABEL: xor_64i:
   %1 = load atomic i64, i64* %p acquire, align 8
   %2 = xor i64 %1, 2
   store atomic i64 %2, i64* %p release, align 8
   ret void
 }
 
-define void @xor_32_seq_cst(i32* %p) {
-; X64-LABEL: xor_32_seq_cst
+define void @xor_64r(i64* %p, i64 %v) {
+; X64-LABEL: xor_64r:
+; X64-NOT: lock
+; X64: xorq
+; X64-NOT: movq
+;   We do not check X86-32 as it cannot do 'xorq'.
+; X32-LABEL: xor_64r:
+  %1 = load atomic i64, i64* %p acquire, align 8
+  %2 = xor i64 %1, %v
+  store atomic i64 %2, i64* %p release, align 8
+  ret void
+}
+
+define void @xor_32i_seq_cst(i32* %p) {
+; X64-LABEL: xor_32i_seq_cst:
 ; X64: xchgl
-; X32-LABEL: xor_32_seq_cst
+; X32-LABEL: xor_32i_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = xor i32 %1, 2
@@ -364,18 +664,29 @@ define void @xor_32_seq_cst(i32* %p) {
   ret void
 }
 
+define void @xor_32r_seq_cst(i32* %p, i32 %v) {
+; X64-LABEL: xor_32r_seq_cst:
+; X64: xchgl
+; X32-LABEL: xor_32r_seq_cst:
+; X32: xchgl
+  %1 = load atomic i32, i32* %p monotonic, align 4
+  %2 = xor i32 %1, %v
+  store atomic i32 %2, i32* %p seq_cst, align 4
+  ret void
+}
+
 ; ----- INC -----
 
 define void @inc_8(i8* %p) {
-; X64-LABEL: inc_8
+; X64-LABEL: inc_8:
 ; X64-NOT: lock
 ; X64: incb
 ; X64-NOT: movb
-; X32-LABEL: inc_8
+; X32-LABEL: inc_8:
 ; X32-NOT: lock
 ; X32: incb
 ; X32-NOT: movb
-; SLOW_INC-LABEL: inc_8
+; SLOW_INC-LABEL: inc_8:
 ; SLOW_INC-NOT: incb
 ; SLOW_INC-NOT: movb
   %1 = load atomic i8, i8* %p seq_cst, align 1
@@ -387,11 +698,11 @@ define void @inc_8(i8* %p) {
 define void @inc_16(i16* %p) {
 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: inc_16
+; X64-LABEL: inc_16:
 ; X64-NOT: incw
-; X32-LABEL: inc_16
+; X32-LABEL: inc_16:
 ; X32-NOT: incw
-; SLOW_INC-LABEL: inc_16
+; SLOW_INC-LABEL: inc_16:
 ; SLOW_INC-NOT: incw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = add i16 %1, 1
@@ -400,15 +711,15 @@ define void @inc_16(i16* %p) {
 }
 
 define void @inc_32(i32* %p) {
-; X64-LABEL: inc_32
+; X64-LABEL: inc_32:
 ; X64-NOT: lock
 ; X64: incl
 ; X64-NOT: movl
-; X32-LABEL: inc_32
+; X32-LABEL: inc_32:
 ; X32-NOT: lock
 ; X32: incl
 ; X32-NOT: movl
-; SLOW_INC-LABEL: inc_32
+; SLOW_INC-LABEL: inc_32:
 ; SLOW_INC-NOT: incl
 ; SLOW_INC-NOT: movl
   %1 = load atomic i32, i32* %p acquire, align 4
@@ -418,13 +729,13 @@ define void @inc_32(i32* %p) {
 }
 
 define void @inc_64(i64* %p) {
-; X64-LABEL: inc_64
+; X64-LABEL: inc_64:
 ; X64-NOT: lock
 ; X64: incq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'incq'.
-; X32-LABEL: inc_64
-; SLOW_INC-LABEL: inc_64
+; X32-LABEL: inc_64:
+; SLOW_INC-LABEL: inc_64:
 ; SLOW_INC-NOT: incq
 ; SLOW_INC-NOT: movq
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -434,9 +745,9 @@ define void @inc_64(i64* %p) {
 }
 
 define void @inc_32_seq_cst(i32* %p) {
-; X64-LABEL: inc_32_seq_cst
+; X64-LABEL: inc_32_seq_cst:
 ; X64: xchgl
-; X32-LABEL: inc_32_seq_cst
+; X32-LABEL: inc_32_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = add i32 %1, 1
@@ -447,15 +758,15 @@ define void @inc_32_seq_cst(i32* %p) {
 ; ----- DEC -----
 
 define void @dec_8(i8* %p) {
-; X64-LABEL: dec_8
+; X64-LABEL: dec_8:
 ; X64-NOT: lock
 ; X64: decb
 ; X64-NOT: movb
-; X32-LABEL: dec_8
+; X32-LABEL: dec_8:
 ; X32-NOT: lock
 ; X32: decb
 ; X32-NOT: movb
-; SLOW_INC-LABEL: dec_8
+; SLOW_INC-LABEL: dec_8:
 ; SLOW_INC-NOT: decb
 ; SLOW_INC-NOT: movb
   %1 = load atomic i8, i8* %p seq_cst, align 1
@@ -467,11 +778,11 @@ define void @dec_8(i8* %p) {
 define void @dec_16(i16* %p) {
 ;   Currently the transformation is not done on 16 bit accesses, as the backend
 ;   treat 16 bit arithmetic as expensive on X86/X86_64.
-; X64-LABEL: dec_16
+; X64-LABEL: dec_16:
 ; X64-NOT: decw
-; X32-LABEL: dec_16
+; X32-LABEL: dec_16:
 ; X32-NOT: decw
-; SLOW_INC-LABEL: dec_16
+; SLOW_INC-LABEL: dec_16:
 ; SLOW_INC-NOT: decw
   %1 = load atomic i16, i16* %p acquire, align 2
   %2 = sub i16 %1, 1
@@ -480,15 +791,15 @@ define void @dec_16(i16* %p) {
 }
 
 define void @dec_32(i32* %p) {
-; X64-LABEL: dec_32
+; X64-LABEL: dec_32:
 ; X64-NOT: lock
 ; X64: decl
 ; X64-NOT: movl
-; X32-LABEL: dec_32
+; X32-LABEL: dec_32:
 ; X32-NOT: lock
 ; X32: decl
 ; X32-NOT: movl
-; SLOW_INC-LABEL: dec_32
+; SLOW_INC-LABEL: dec_32:
 ; SLOW_INC-NOT: decl
 ; SLOW_INC-NOT: movl
   %1 = load atomic i32, i32* %p acquire, align 4
@@ -498,13 +809,13 @@ define void @dec_32(i32* %p) {
 }
 
 define void @dec_64(i64* %p) {
-; X64-LABEL: dec_64
+; X64-LABEL: dec_64:
 ; X64-NOT: lock
 ; X64: decq
 ; X64-NOT: movq
 ;   We do not check X86-32 as it cannot do 'decq'.
-; X32-LABEL: dec_64
-; SLOW_INC-LABEL: dec_64
+; X32-LABEL: dec_64:
+; SLOW_INC-LABEL: dec_64:
 ; SLOW_INC-NOT: decq
 ; SLOW_INC-NOT: movq
   %1 = load atomic i64, i64* %p acquire, align 8
@@ -514,12 +825,157 @@ define void @dec_64(i64* %p) {
 }
 
 define void @dec_32_seq_cst(i32* %p) {
-; X64-LABEL: dec_32_seq_cst
+; X64-LABEL: dec_32_seq_cst:
 ; X64: xchgl
-; X32-LABEL: dec_32_seq_cst
+; X32-LABEL: dec_32_seq_cst:
 ; X32: xchgl
   %1 = load atomic i32, i32* %p monotonic, align 4
   %2 = sub i32 %1, 1
   store atomic i32 %2, i32* %p seq_cst, align 4
   ret void
 }
+
+; ----- FADD -----
+
+define void @fadd_32r(float* %loc, float %val) {
+; X64-LABEL: fadd_32r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addss (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movss %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_32r:
+; Don't check x86-32.
+; LLVM's SSE handling is conservative on x86-32 even without using atomics.
+  %floc = bitcast float* %loc to i32*
+  %1 = load atomic i32, i32* %floc seq_cst, align 4
+  %2 = bitcast i32 %1 to float
+  %add = fadd float %2, %val
+  %3 = bitcast float %add to i32
+  store atomic i32 %3, i32* %floc release, align 4
+  ret void
+}
+
+define void @fadd_64r(double* %loc, double %val) {
+; X64-LABEL: fadd_64r:
+; X64-NOT: lock
+; X64-NOT: mov
+; X64: addsd (%[[M:[a-z]+]]), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: movsd %[[XMM]], (%[[M]])
+; X32-LABEL: fadd_64r:
+; Don't check x86-32 (see comment above).
+  %floc = bitcast double* %loc to i64*
+  %1 = load atomic i64, i64* %floc seq_cst, align 8
+  %2 = bitcast i64 %1 to double
+  %add = fadd double %2, %val
+  %3 = bitcast double %add to i64
+  store atomic i64 %3, i64* %floc release, align 8
+  ret void
+}
+
+@glob32 = global float 0.000000e+00, align 4
+@glob64 = global double 0.000000e+00, align 8
+
+; Floating-point add to a global using an immediate.
+define void @fadd_32g() {
+; X64-LABEL: fadd_32g:
+; X64-NOT: lock
+; X64:      movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss glob32(%rip), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], glob32(%rip)
+; X32-LABEL: fadd_32g:
+; Don't check x86-32 (see comment above).
+  %i = load atomic i32, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
+  %f = bitcast i32 %i to float
+  %add = fadd float %f, 1.000000e+00
+  %s = bitcast float %add to i32
+  store atomic i32 %s, i32* bitcast (float* @glob32 to i32*) monotonic, align 4
+  ret void
+}
+
+define void @fadd_64g() {
+; X64-LABEL: fadd_64g:
+; X64-NOT: lock
+; X64:      movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd glob64(%rip), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], glob64(%rip)
+; X32-LABEL: fadd_64g:
+; Don't check x86-32 (see comment above).
+  %i = load atomic i64, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
+  %f = bitcast i64 %i to double
+  %add = fadd double %f, 1.000000e+00
+  %s = bitcast double %add to i64
+  store atomic i64 %s, i64* bitcast (double* @glob64 to i64*) monotonic, align 8
+  ret void
+}
+
+; Floating-point add to a hard-coded immediate location using an immediate.
+define void @fadd_32imm() {
+; X64-LABEL: fadd_32imm:
+; X64-NOT: lock
+; X64:      movl $3735928559, %e[[M:[a-z]+]]
+; X64:      movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss (%r[[M]]), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], (%r[[M]])
+; X32-LABEL: fadd_32imm:
+; Don't check x86-32 (see comment above).
+  %i = load atomic i32, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
+  %f = bitcast i32 %i to float
+  %add = fadd float %f, 1.000000e+00
+  %s = bitcast float %add to i32
+  store atomic i32 %s, i32* inttoptr (i32 3735928559 to i32*) monotonic, align 4
+  ret void
+}
+
+define void @fadd_64imm() {
+; X64-LABEL: fadd_64imm:
+; X64-NOT: lock
+; X64:      movl $3735928559, %e[[M:[a-z]+]]
+; X64:      movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd (%r[[M]]), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], (%r[[M]])
+; X32-LABEL: fadd_64imm:
+; Don't check x86-32 (see comment above).
+  %i = load atomic i64, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
+  %f = bitcast i64 %i to double
+  %add = fadd double %f, 1.000000e+00
+  %s = bitcast double %add to i64
+  store atomic i64 %s, i64* inttoptr (i64 3735928559 to i64*) monotonic, align 8
+  ret void
+}
+
+; Floating-point add to a stack location.
+define void @fadd_32stack() {
+; X64-LABEL: fadd_32stack:
+; X64-NOT: lock
+; X64:      movss .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addss [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
+; X64-NEXT: movss %[[XMM]], [[STACKOFF]](%rsp)
+; X32-LABEL: fadd_32stack:
+; Don't check x86-32 (see comment above).
+  %ptr = alloca i32, align 4
+  %bc3 = bitcast i32* %ptr to float*
+  %load = load atomic i32, i32* %ptr acquire, align 4
+  %bc0 = bitcast i32 %load to float
+  %fadd = fadd float 1.000000e+00, %bc0
+  %bc1 = bitcast float %fadd to i32
+  store atomic i32 %bc1, i32* %ptr release, align 4
+  ret void
+}
+
+define void @fadd_64stack() {
+; X64-LABEL: fadd_64stack:
+; X64-NOT: lock
+; X64:      movsd .{{[A-Z0-9_]+}}(%rip), %[[XMM:xmm[0-9]+]]
+; X64-NEXT: addsd [[STACKOFF:-?[0-9]+]](%rsp), %[[XMM]]
+; X64-NEXT: movsd %[[XMM]], [[STACKOFF]](%rsp)
+; X32-LABEL: fadd_64stack:
+; Don't check x86-32 (see comment above).
+  %ptr = alloca i64, align 8
+  %bc3 = bitcast i64* %ptr to double*
+  %load = load atomic i64, i64* %ptr acquire, align 8
+  %bc0 = bitcast i64 %load to double
+  %fadd = fadd double 1.000000e+00, %bc0
+  %bc1 = bitcast double %fadd to i64
+  store atomic i64 %bc1, i64* %ptr release, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/avg.ll b/test/CodeGen/X86/avg.ll
new file mode 100644
index 000000000000..f1c636a73305
--- /dev/null
+++ b/test/CodeGen/X86/avg.ll
@@ -0,0 +1,724 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512BW
+
+define void @avg_v4i8(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: avg_v4i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pavgb %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovd %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i8:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovd (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = load <4 x i8>, <4 x i8>* %b
+  %3 = zext <4 x i8> %1 to <4 x i32>
+  %4 = zext <4 x i8> %2 to <4 x i32>
+  %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <4 x i32> %5, %4
+  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <4 x i32> %7 to <4 x i8>
+  store <4 x i8> %8, <4 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i8(<8 x i8>* %a, <8 x i8>* %b) {
+; SSE2-LABEL: avg_v8i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    pavgb %xmm0, %xmm1
+; SSE2-NEXT:    movq %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v8i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v8i8:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = load <8 x i8>, <8 x i8>* %b
+  %3 = zext <8 x i8> %1 to <8 x i32>
+  %4 = zext <8 x i8> %2 to <8 x i32>
+  %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <8 x i32> %5, %4
+  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <8 x i32> %7 to <8 x i8>
+  store <8 x i8> %8, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i8(<16 x i8>* %a, <16 x i8>* %b) {
+; SSE2-LABEL: avg_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rsi), %xmm0
+; SSE2-NEXT:    pavgb (%rdi), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rsi), %xmm0
+; AVX-NEXT:    vpavgb (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = load <16 x i8>, <16 x i8>* %b
+  %3 = zext <16 x i8> %1 to <16 x i32>
+  %4 = zext <16 x i8> %2 to <16 x i32>
+  %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <16 x i32> %5, %4
+  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <16 x i32> %7 to <16 x i8>
+  store <16 x i8> %8, <16 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) {
+; AVX2-LABEL: avg_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rsi), %ymm0
+; AVX2-NEXT:    vpavgb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v32i8:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rsi), %ymm0
+; AVX512BW-NEXT:    vpavgb (%rdi), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i8>, <32 x i8>* %a
+  %2 = load <32 x i8>, <32 x i8>* %b
+  %3 = zext <32 x i8> %1 to <32 x i32>
+  %4 = zext <32 x i8> %2 to <32 x i32>
+  %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <32 x i32> %5, %4
+  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <32 x i32> %7 to <32 x i8>
+  store <32 x i8> %8, <32 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) {
+; AVX512BW-LABEL: avg_v64i8:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0
+; AVX512BW-NEXT:    vpavgb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <64 x i8>, <64 x i8>* %a
+  %2 = load <64 x i8>, <64 x i8>* %b
+  %3 = zext <64 x i8> %1 to <64 x i32>
+  %4 = zext <64 x i8> %2 to <64 x i32>
+  %5 = add nuw nsw <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <64 x i32> %5, %4
+  %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <64 x i32> %7 to <64 x i8>
+  store <64 x i8> %8, <64 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v4i16(<4 x i16>* %a, <4 x i16>* %b) {
+; SSE2-LABEL: avg_v4i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    pavgw %xmm0, %xmm1
+; SSE2-NEXT:    movq %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT:    vpavgw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i16:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgw %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = load <4 x i16>, <4 x i16>* %b
+  %3 = zext <4 x i16> %1 to <4 x i32>
+  %4 = zext <4 x i16> %2 to <4 x i32>
+  %5 = add nuw nsw <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <4 x i32> %5, %4
+  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <4 x i32> %7 to <4 x i16>
+  store <4 x i16> %8, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i16(<8 x i16>* %a, <8 x i16>* %b) {
+; SSE2-LABEL: avg_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rsi), %xmm0
+; SSE2-NEXT:    pavgw (%rdi), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rsi), %xmm0
+; AVX-NEXT:    vpavgw (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = load <8 x i16>, <8 x i16>* %b
+  %3 = zext <8 x i16> %1 to <8 x i32>
+  %4 = zext <8 x i16> %2 to <8 x i32>
+  %5 = add nuw nsw <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <8 x i32> %5, %4
+  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <8 x i32> %7 to <8 x i16>
+  store <8 x i16> %8, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) {
+; AVX2-LABEL: avg_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rsi), %ymm0
+; AVX2-NEXT:    vpavgw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v16i16:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rsi), %ymm0
+; AVX512BW-NEXT:    vpavgw (%rdi), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = load <16 x i16>, <16 x i16>* %b
+  %3 = zext <16 x i16> %1 to <16 x i32>
+  %4 = zext <16 x i16> %2 to <16 x i32>
+  %5 = add nuw nsw <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <16 x i32> %5, %4
+  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <16 x i32> %7 to <16 x i16>
+  store <16 x i16> %8, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) {
+; AVX512BW-LABEL: avg_v32i16:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu16 (%rsi), %zmm0
+; AVX512BW-NEXT:    vpavgw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i16>, <32 x i16>* %a
+  %2 = load <32 x i16>, <32 x i16>* %b
+  %3 = zext <32 x i16> %1 to <32 x i32>
+  %4 = zext <32 x i16> %2 to <32 x i32>
+  %5 = add nuw nsw <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %6 = add nuw nsw <32 x i32> %5, %4
+  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <32 x i32> %7 to <32 x i16>
+  store <32 x i16> %8, <32 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v4i8_2(<4 x i8>* %a, <4 x i8>* %b) {
+; SSE2-LABEL: avg_v4i8_2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pavgb %xmm0, %xmm1
+; SSE2-NEXT:    movd %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i8_2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovd %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i8_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovd (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = load <4 x i8>, <4 x i8>* %b
+  %3 = zext <4 x i8> %1 to <4 x i32>
+  %4 = zext <4 x i8> %2 to <4 x i32>
+  %5 = add nuw nsw <4 x i32> %3, %4
+  %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <4 x i32> %7 to <4 x i8>
+  store <4 x i8> %8, <4 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i8_2(<8 x i8>* %a, <8 x i8>* %b) {
+; SSE2-LABEL: avg_v8i8_2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    pavgb %xmm0, %xmm1
+; SSE2-NEXT:    movq %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v8i8_2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v8i8_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = load <8 x i8>, <8 x i8>* %b
+  %3 = zext <8 x i8> %1 to <8 x i32>
+  %4 = zext <8 x i8> %2 to <8 x i32>
+  %5 = add nuw nsw <8 x i32> %3, %4
+  %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <8 x i32> %7 to <8 x i8>
+  store <8 x i8> %8, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i8_2(<16 x i8>* %a, <16 x i8>* %b) {
+; SSE2-LABEL: avg_v16i8_2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    pavgb (%rsi), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v16i8_2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX-NEXT:    vpavgb (%rsi), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = load <16 x i8>, <16 x i8>* %b
+  %3 = zext <16 x i8> %1 to <16 x i32>
+  %4 = zext <16 x i8> %2 to <16 x i32>
+  %5 = add nuw nsw <16 x i32> %3, %4
+  %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <16 x i32> %7 to <16 x i8>
+  store <16 x i8> %8, <16 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i8_2(<32 x i8>* %a, <32 x i8>* %b) {
+; AVX2-LABEL: avg_v32i8_2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX2-NEXT:    vpavgb (%rsi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v32i8_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT:    vpavgb (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i8>, <32 x i8>* %a
+  %2 = load <32 x i8>, <32 x i8>* %b
+  %3 = zext <32 x i8> %1 to <32 x i32>
+  %4 = zext <32 x i8> %2 to <32 x i32>
+  %5 = add nuw nsw <32 x i32> %3, %4
+  %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <32 x i32> %7 to <32 x i8>
+  store <32 x i8> %8, <32 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v64i8_2(<64 x i8>* %a, <64 x i8>* %b) {
+; AVX512BW-LABEL: avg_v64i8_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0
+; AVX512BW-NEXT:    vpavgb %zmm0, %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <64 x i8>, <64 x i8>* %a
+  %2 = load <64 x i8>, <64 x i8>* %b
+  %3 = zext <64 x i8> %1 to <64 x i32>
+  %4 = zext <64 x i8> %2 to <64 x i32>
+  %5 = add nuw nsw <64 x i32> %4, %4
+  %6 = add nuw nsw <64 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <64 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <64 x i32> %7 to <64 x i8>
+  store <64 x i8> %8, <64 x i8>* undef, align 4
+  ret void
+}
+
+
+define void @avg_v4i16_2(<4 x i16>* %a, <4 x i16>* %b) {
+; SSE2-LABEL: avg_v4i16_2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    pavgw %xmm0, %xmm1
+; SSE2-NEXT:    movq %xmm1, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i16_2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i16_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vmovq (%rsi), %xmm1
+; AVX512BW-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = load <4 x i16>, <4 x i16>* %b
+  %3 = zext <4 x i16> %1 to <4 x i32>
+  %4 = zext <4 x i16> %2 to <4 x i32>
+  %5 = add nuw nsw <4 x i32> %3, %4
+  %6 = add nuw nsw <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <4 x i32> %6, <i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <4 x i32> %7 to <4 x i16>
+  store <4 x i16> %8, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i16_2(<8 x i16>* %a, <8 x i16>* %b) {
+; SSE2-LABEL: avg_v8i16_2:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    pavgw (%rsi), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v8i16_2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX-NEXT:    vpavgw (%rsi), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = load <8 x i16>, <8 x i16>* %b
+  %3 = zext <8 x i16> %1 to <8 x i32>
+  %4 = zext <8 x i16> %2 to <8 x i32>
+  %5 = add nuw nsw <8 x i32> %3, %4
+  %6 = add nuw nsw <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <8 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <8 x i32> %7 to <8 x i16>
+  store <8 x i16> %8, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i16_2(<16 x i16>* %a, <16 x i16>* %b) {
+; AVX2-LABEL: avg_v16i16_2:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX2-NEXT:    vpavgw (%rsi), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v16i16_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT:    vpavgw (%rsi), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = load <16 x i16>, <16 x i16>* %b
+  %3 = zext <16 x i16> %1 to <16 x i32>
+  %4 = zext <16 x i16> %2 to <16 x i32>
+  %5 = add nuw nsw <16 x i32> %3, %4
+  %6 = add nuw nsw <16 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <16 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <16 x i32> %7 to <16 x i16>
+  store <16 x i16> %8, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i16_2(<32 x i16>* %a, <32 x i16>* %b) {
+; AVX512BW-LABEL: avg_v32i16_2:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
+; AVX512BW-NEXT:    vpavgw (%rsi), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i16>, <32 x i16>* %a
+  %2 = load <32 x i16>, <32 x i16>* %b
+  %3 = zext <32 x i16> %1 to <32 x i32>
+  %4 = zext <32 x i16> %2 to <32 x i32>
+  %5 = add nuw nsw <32 x i32> %3, %4
+  %6 = add nuw nsw <32 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %7 = lshr <32 x i32> %6, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %8 = trunc <32 x i32> %7 to <32 x i16>
+  store <32 x i16> %8, <32 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v4i8_const(<4 x i8>* %a) {
+; SSE2-LABEL: avg_v4i8_const:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movd %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i8_const:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vmovd %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i8_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovd (%rdi), %xmm0
+; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovd %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i8>, <4 x i8>* %a
+  %2 = zext <4 x i8> %1 to <4 x i32>
+  %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
+  %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <4 x i32> %4 to <4 x i8>
+  store <4 x i8> %5, <4 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i8_const(<8 x i8>* %a) {
+; SSE2-LABEL: avg_v8i8_const:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movq %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v8i8_const:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v8i8_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <8 x i8>, <8 x i8>* %a
+  %2 = zext <8 x i8> %1 to <8 x i32>
+  %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <8 x i32> %4 to <8 x i8>
+  store <8 x i8> %5, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i8_const(<16 x i8>* %a) {
+; SSE2-LABEL: avg_v16i8_const:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    pavgb {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v16i8_const:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX-NEXT:    vpavgb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <16 x i8>, <16 x i8>* %a
+  %2 = zext <16 x i8> %1 to <16 x i32>
+  %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <16 x i32> %4 to <16 x i8>
+  store <16 x i8> %5, <16 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i8_const(<32 x i8>* %a) {
+; AVX2-LABEL: avg_v32i8_const:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX2-NEXT:    vpavgb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v32i8_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i8>, <32 x i8>* %a
+  %2 = zext <32 x i8> %1 to <32 x i32>
+  %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <32 x i32> %4 to <32 x i8>
+  store <32 x i8> %5, <32 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v64i8_const(<64 x i8>* %a) {
+; AVX512BW-LABEL: avg_v64i8_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
+; AVX512BW-NEXT:    vpavgb {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <64 x i8>, <64 x i8>* %a
+  %2 = zext <64 x i8> %1 to <64 x i32>
+  %3 = add nuw nsw <64 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <64 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <64 x i32> %4 to <64 x i8>
+  store <64 x i8> %5, <64 x i8>* undef, align 4
+  ret void
+}
+
+define void @avg_v4i16_const(<4 x i16>* %a) {
+; SSE2-LABEL: avg_v4i16_const:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    pavgw {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movq %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX2-LABEL: avg_v4i16_const:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v4i16_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovq (%rdi), %xmm0
+; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <4 x i16>, <4 x i16>* %a
+  %2 = zext <4 x i16> %1 to <4 x i32>
+  %3 = add nuw nsw <4 x i32> %2, <i32 1, i32 2, i32 3, i32 4>
+  %4 = lshr <4 x i32> %3, <i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <4 x i32> %4 to <4 x i16>
+  store <4 x i16> %5, <4 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v8i16_const(<8 x i16>* %a) {
+; SSE2-LABEL: avg_v8i16_const:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    pavgw {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movdqu %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; AVX-LABEL: avg_v8i16_const:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa (%rdi), %xmm0
+; AVX-NEXT:    vpavgw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX-NEXT:    retq
+  %1 = load <8 x i16>, <8 x i16>* %a
+  %2 = zext <8 x i16> %1 to <8 x i32>
+  %3 = add nuw nsw <8 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <8 x i32> %4 to <8 x i16>
+  store <8 x i16> %5, <8 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v16i16_const(<16 x i16>* %a) {
+; AVX2-LABEL: avg_v16i16_const:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX2-NEXT:    vpavgw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: avg_v16i16_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqa (%rdi), %ymm0
+; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512BW-NEXT:    vmovdqu %ymm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <16 x i16>, <16 x i16>* %a
+  %2 = zext <16 x i16> %1 to <16 x i32>
+  %3 = add nuw nsw <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <16 x i32> %4 to <16 x i16>
+  store <16 x i16> %5, <16 x i16>* undef, align 4
+  ret void
+}
+
+define void @avg_v32i16_const(<32 x i16>* %a) {
+; AVX512BW-LABEL: avg_v32i16_const:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
+; AVX512BW-NEXT:    vpavgw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+  %1 = load <32 x i16>, <32 x i16>* %a
+  %2 = zext <32 x i16> %1 to <32 x i32>
+  %3 = add nuw nsw <32 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
+  %4 = lshr <32 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %5 = trunc <32 x i32> %4 to <32 x i16>
+  store <32 x i16> %5, <32 x i16>* undef, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/avx-cvt-2.ll b/test/CodeGen/X86/avx-cvt-2.ll
index 583c7d5947bf..c849312f2367 100644
--- a/test/CodeGen/X86/avx-cvt-2.ll
+++ b/test/CodeGen/X86/avx-cvt-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
 
 ; Check that we generate vector conversion from float to narrower int types
diff --git a/test/CodeGen/X86/avx-cvt.ll b/test/CodeGen/X86/avx-cvt.ll
index 6df3e5324c11..27339898efdb 100644
--- a/test/CodeGen/X86/avx-cvt.ll
+++ b/test/CodeGen/X86/avx-cvt.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
 define <8 x float> @sitofp00(<8 x i32> %a) nounwind {
@@ -113,8 +114,7 @@ define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
 define void @fpext() nounwind uwtable {
 ; CHECK-LABEL: fpext:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vcvtss2sd -{{[0-9]+}}(%rsp), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovsd %xmm0, -{{[0-9]+}}(%rsp)
 ; CHECK-NEXT:    retq
   %f = alloca float, align 4
@@ -138,7 +138,7 @@ declare double @llvm.nearbyint.f64(double %p)
 define float @floor_f32(float %a) {
 ; CHECK-LABEL: floor_f32:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vroundss $1, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vroundss $9, %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %res = call float @llvm.floor.f32(float %a)
   ret float %res
diff --git a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index e2f690bff232..4867869863e3 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -143,3 +143,69 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
   ret <8 x i16> %res
 }
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxbw:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxdq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_sse41_pmovsxwq:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/avx-intrinsics-x86.ll b/test/CodeGen/X86/avx-intrinsics-x86.ll
index 28a0272ecf02..206be2396cba 100644
--- a/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,8 +1,9 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mattr=avx,aes,pclmul | FileCheck %s
 
 define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_aesni_aesdec:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaesdec %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -13,7 +14,7 @@ declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_aesni_aesdeclast:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaesdeclast %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -24,7 +25,7 @@ declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind read
 
 define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_aesni_aesenc:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaesenc %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -35,7 +36,7 @@ declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_aesni_aesenclast:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaesenclast %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -46,7 +47,7 @@ declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind read
 
 define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
 ; CHECK-LABEL: test_x86_aesni_aesimc:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaesimc %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
@@ -57,7 +58,7 @@ declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
 
 define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
 ; CHECK-LABEL: test_x86_aesni_aeskeygenassist:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaeskeygenassist $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
@@ -68,7 +69,7 @@ declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readno
 
 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_add_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -79,7 +80,7 @@ declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_cmp_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -90,7 +91,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounw
 
 define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_cmp_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -101,7 +102,7 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw
 
 define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comieq_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -114,7 +115,7 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comige_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -127,7 +128,7 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comigt_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -140,7 +141,7 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comile_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setbe %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -153,7 +154,7 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comilt_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -166,7 +167,7 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno
 
 define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_comineq_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -179,7 +180,7 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn
 
 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtdq2pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
@@ -190,7 +191,7 @@ declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
 
 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtdq2ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
@@ -201,7 +202,7 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtpd2dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -212,7 +213,7 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
 
 define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtpd2ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
@@ -223,7 +224,7 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtps2dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
@@ -234,7 +235,7 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
 
 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtps2pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
@@ -245,7 +246,7 @@ declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
 
 define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtsd2si:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtsd2si %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
@@ -256,7 +257,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
 
 define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_cvtsd2ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtsd2ss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
@@ -267,7 +268,7 @@ declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvtsi2sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    vcvtsi2sdl %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
@@ -279,7 +280,7 @@ declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnon
 
 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse2_cvtss2sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
@@ -290,7 +291,7 @@ declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind
 
 define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvttpd2dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -301,7 +302,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvttps2dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
@@ -312,7 +313,7 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
 
 define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_cvttsd2si:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttsd2si %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
@@ -323,7 +324,7 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_div_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -335,7 +336,7 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_max_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -346,7 +347,7 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_max_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -357,7 +358,7 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_min_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -368,7 +369,7 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_min_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -379,7 +380,7 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind
 
 define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_movmsk_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmovmskpd %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
@@ -392,7 +393,7 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_mul_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -403,7 +404,7 @@ declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind
 
 define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_packssdw_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
@@ -414,7 +415,7 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea
 
 define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_packsswb_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
@@ -425,7 +426,7 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea
 
 define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_packuswb_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
@@ -436,7 +437,7 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea
 
 define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_padds_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -447,7 +448,7 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_padds_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -458,7 +459,7 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_paddus_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -469,7 +470,7 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon
 
 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_paddus_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -480,7 +481,7 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon
 
 define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pavg_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -491,7 +492,7 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pavg_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -502,7 +503,7 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmadd_wd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
@@ -513,7 +514,7 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon
 
 define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmaxs_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -524,7 +525,7 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmaxu_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -535,7 +536,7 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmins_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -546,7 +547,7 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pminu_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -557,7 +558,7 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
 
 define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_sse2_pmovmskb_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovmskb %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
@@ -568,7 +569,7 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmulh_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -579,7 +580,7 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmulhu_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -590,7 +591,7 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon
 
 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_pmulu_dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
@@ -601,7 +602,7 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon
 
 define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psad_bw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
@@ -612,7 +613,7 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psll_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpslld %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -623,7 +624,7 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psll_q:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -634,7 +635,7 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psll_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -645,7 +646,7 @@ declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse2_pslli_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpslld $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -656,7 +657,7 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
 
 define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
 ; CHECK-LABEL: test_x86_sse2_pslli_q:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsllq $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
@@ -667,7 +668,7 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse2_pslli_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -678,7 +679,7 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psra_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -689,7 +690,7 @@ declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psra_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -700,7 +701,7 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse2_psrai_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrad $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -711,7 +712,7 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse2_psrai_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsraw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -722,7 +723,7 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psrl_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -733,7 +734,7 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psrl_q:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
@@ -744,7 +745,7 @@ declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psrl_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -755,7 +756,7 @@ declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse2_psrli_d:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrld $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
@@ -766,7 +767,7 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
 
 define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
 ; CHECK-LABEL: test_x86_sse2_psrli_q:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrlq $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
@@ -777,7 +778,7 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse2_psrli_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
@@ -788,7 +789,7 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
 
 define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psubs_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -799,7 +800,7 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psubs_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -810,7 +811,7 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psubus_b:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -821,7 +822,7 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon
 
 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse2_psubus_w:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -832,7 +833,7 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon
 
 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_sqrt_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
@@ -843,7 +844,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse2_sqrt_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
@@ -854,7 +855,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse2_storel_dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovlps %xmm0, (%eax)
 ; CHECK-NEXT:    retl
@@ -867,7 +868,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
   ; add operation forces the execution domain.
 ; CHECK-LABEL: test_x86_sse2_storeu_dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vpaddb LCPI77_0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovdqu %xmm0, (%eax)
@@ -882,7 +883,7 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
   ; fadd operation forces the execution domain.
 ; CHECK-LABEL: test_x86_sse2_storeu_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; CHECK-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
@@ -898,7 +899,7 @@ declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
 
 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_sub_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -909,7 +910,7 @@ declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind
 
 define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomieq_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -922,7 +923,7 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomige_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -935,7 +936,7 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomigt_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -948,7 +949,7 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomile_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setbe %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -961,7 +962,7 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomilt_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -974,7 +975,7 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse2_ucomineq_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomisd %xmm1, %xmm0
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -987,7 +988,7 @@ declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind read
 
 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse3_addsub_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -998,7 +999,7 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwi
 
 define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse3_addsub_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1009,7 +1010,7 @@ declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind
 
 define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse3_hadd_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhaddpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -1020,7 +1021,7 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind
 
 define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse3_hadd_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhaddps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1031,7 +1032,7 @@ declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind re
 
 define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse3_hsub_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhsubpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
@@ -1042,7 +1043,7 @@ declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind
 
 define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse3_hsub_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhsubps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1053,7 +1054,7 @@ declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind re
 
 define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
 ; CHECK-LABEL: test_x86_sse3_ldu_dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vlddqu (%eax), %xmm0
 ; CHECK-NEXT:    retl
@@ -1065,7 +1066,7 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
 
 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
 ; CHECK-LABEL: test_x86_sse41_blendvpd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
@@ -1076,7 +1077,7 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d
 
 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
 ; CHECK-LABEL: test_x86_sse41_blendvps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
@@ -1087,7 +1088,7 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa
 
 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse41_dppd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
@@ -1098,7 +1099,7 @@ declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwi
 
 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse41_dpps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1109,7 +1110,7 @@ declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind
 
 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse41_insertps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[3]
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1121,7 +1122,7 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounw
 
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse41_mpsadbw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
@@ -1132,7 +1133,7 @@ declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_packusdw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
@@ -1143,7 +1144,7 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno
 
 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse41_pblendvb:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
@@ -1154,7 +1155,7 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun
 
 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse41_phminposuw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphminposuw %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -1165,7 +1166,7 @@ declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pmaxsb:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1176,7 +1177,7 @@ declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pmaxsd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1187,7 +1188,7 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pmaxud:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1198,7 +1199,7 @@ declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pmaxuw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1209,7 +1210,7 @@ declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
 
 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pminsb:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -1220,7 +1221,7 @@ declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
 
 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pminsd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1231,7 +1232,7 @@ declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pminud:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminud %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -1242,7 +1243,7 @@ declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
 
 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pminuw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -1251,75 +1252,9 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbd:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbq:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxbq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxbw:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxdq:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwd:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxwd %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovsxwq:
-; CHECK:       # BB#0:
-; CHECK-NEXT:    vpmovsxwq %xmm0, %xmm0
-; CHECK-NEXT:    retl
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxbd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
@@ -1330,7 +1265,7 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxbq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
@@ -1341,7 +1276,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
 
 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxbw:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
@@ -1352,7 +1287,7 @@ declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxdq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
@@ -1363,7 +1298,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
 
 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxwd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
@@ -1374,7 +1309,7 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_sse41_pmovzxwq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
@@ -1385,7 +1320,7 @@ declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
 
 define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_sse41_pmuldq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
@@ -1396,7 +1331,7 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
 
 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_sse41_ptestc:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -1409,7 +1344,7 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 
 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_sse41_ptestnzc:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1422,7 +1357,7 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
 
 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_sse41_ptestz:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1435,7 +1370,7 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 
 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_sse41_round_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundpd $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
@@ -1446,7 +1381,7 @@ declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readno
 
 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse41_round_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundps $7, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
@@ -1457,7 +1392,7 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
 
 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_sse41_round_sd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
@@ -1468,7 +1403,7 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n
 
 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse41_round_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
@@ -1479,7 +1414,7 @@ declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) noun
 
 define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestri128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1493,7 +1428,7 @@ declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nou
 
 define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestri128_load:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovdqa (%eax), %xmm0
@@ -1511,7 +1446,7 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) {
 
 define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestria128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1526,7 +1461,7 @@ declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) no
 
 define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestric128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1541,7 +1476,7 @@ declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) no
 
 define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestrio128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1556,7 +1491,7 @@ declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) no
 
 define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestris128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1571,7 +1506,7 @@ declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) no
 
 define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestriz128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestri $7, %xmm1, %xmm0
@@ -1586,7 +1521,7 @@ declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) no
 
 define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
 ; CHECK-NEXT:    vpcmpestrm $7, %xmm1, %xmm0
@@ -1599,7 +1534,7 @@ declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i
 
 define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2) {
 ; CHECK-LABEL: test_x86_sse42_pcmpestrm128_load:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    movl $7, %edx
@@ -1613,7 +1548,7 @@ define <16 x i8> @test_x86_sse42_pcmpestrm128_load(<16 x i8> %a0, <16 x i8>* %a2
 
 define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistri128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    retl
@@ -1625,7 +1560,7 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read
 
 define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistri128_load:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    vmovdqa (%ecx), %xmm0
@@ -1641,7 +1576,7 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) {
 
 define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistria128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1654,7 +1589,7 @@ declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistric128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -1667,7 +1602,7 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistrio128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    seto %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1680,7 +1615,7 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistris128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    sets %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1693,7 +1628,7 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistriz128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistri $7, %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1706,7 +1641,7 @@ declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind rea
 
 define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpistrm $7, %xmm1, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
@@ -1717,7 +1652,7 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin
 
 define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1) {
 ; CHECK-LABEL: test_x86_sse42_pcmpistrm128_load:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vpcmpistrm $7, (%eax), %xmm0
 ; CHECK-NEXT:    retl
@@ -1729,7 +1664,7 @@ define <16 x i8> @test_x86_sse42_pcmpistrm128_load(<16 x i8> %a0, <16 x i8>* %a1
 
 define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_add_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1740,7 +1675,7 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read
 
 define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_cmp_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1751,7 +1686,7 @@ declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind
 
 define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_cmp_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
@@ -1762,7 +1697,7 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind
 
 define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comieq_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1775,7 +1710,7 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comige_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1788,7 +1723,7 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comigt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1801,7 +1736,7 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comile_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setbe %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1814,7 +1749,7 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comilt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -1827,7 +1762,7 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_comineq_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -1840,7 +1775,7 @@ declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_cvtsi2ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl $7, %eax
 ; CHECK-NEXT:    vcvtsi2ssl %eax, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
@@ -1852,7 +1787,7 @@ declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
 
 define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_cvtss2si:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtss2si %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1863,7 +1798,7 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_cvttss2si:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1874,7 +1809,7 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_div_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1885,7 +1820,7 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind read
 
 define void @test_x86_sse_ldmxcsr(i8* %a0) {
 ; CHECK-LABEL: test_x86_sse_ldmxcsr:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vldmxcsr (%eax)
 ; CHECK-NEXT:    retl
@@ -1898,7 +1833,7 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
 
 define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_max_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1909,7 +1844,7 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read
 
 define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_max_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1920,7 +1855,7 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read
 
 define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_min_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1931,7 +1866,7 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read
 
 define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_min_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1942,7 +1877,7 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read
 
 define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_movmsk_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmovmskps %xmm0, %eax
 ; CHECK-NEXT:    retl
   %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
@@ -1954,7 +1889,7 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_mul_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -1965,7 +1900,7 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read
 
 define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_rcp_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrcpps %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1976,7 +1911,7 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_rcp_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1987,7 +1922,7 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_rsqrt_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrsqrtps %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -1998,7 +1933,7 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_rsqrt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2009,7 +1944,7 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_sqrt_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtps %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2020,7 +1955,7 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
 
 define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_sse_sqrt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
@@ -2031,7 +1966,7 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
 
 define void @test_x86_sse_stmxcsr(i8* %a0) {
 ; CHECK-LABEL: test_x86_sse_stmxcsr:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vstmxcsr (%eax)
 ; CHECK-NEXT:    retl
@@ -2043,7 +1978,7 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
 
 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_storeu_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovups %xmm0, (%eax)
 ; CHECK-NEXT:    retl
@@ -2055,7 +1990,7 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
 
 define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_sub_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsubss %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
@@ -2066,7 +2001,7 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read
 
 define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomieq_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2079,7 +2014,7 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomige_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setae %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2092,7 +2027,7 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomigt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2105,7 +2040,7 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomile_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setbe %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2118,7 +2053,7 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomilt_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -2131,7 +2066,7 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_sse_ucomineq_ss:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vucomiss %xmm1, %xmm0
 ; CHECK-NEXT:    setne %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2144,7 +2079,7 @@ declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnon
 
 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
 ; CHECK-LABEL: test_x86_ssse3_pabs_b_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpabsb %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
@@ -2155,7 +2090,7 @@ declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
 
 define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_ssse3_pabs_d_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpabsd %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
@@ -2166,7 +2101,7 @@ declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
 
 define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
 ; CHECK-LABEL: test_x86_ssse3_pabs_w_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpabsw %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
@@ -2177,7 +2112,7 @@ declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
 
 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phadd_d_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphaddd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2188,7 +2123,7 @@ declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind rea
 
 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phadd_sw_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphaddsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2199,7 +2134,7 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind re
 
 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phadd_w_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphaddw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2210,7 +2145,7 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind rea
 
 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phsub_d_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphsubd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2221,7 +2156,7 @@ declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind rea
 
 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phsub_sw_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphsubsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2232,7 +2167,7 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind re
 
 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_phsub_w_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vphsubw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2243,7 +2178,7 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind rea
 
 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
@@ -2254,7 +2189,7 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind
 
 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_pmul_hr_sw_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2265,7 +2200,7 @@ declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind
 
 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_pshuf_b_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -2276,7 +2211,7 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind rea
 
 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_psign_b_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsignb %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
@@ -2287,7 +2222,7 @@ declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind rea
 
 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_psign_d_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsignd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
@@ -2298,7 +2233,7 @@ declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind rea
 
 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK-LABEL: test_x86_ssse3_psign_w_128:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpsignw %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
@@ -2309,7 +2244,7 @@ declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind rea
 
 define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_addsub_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2320,7 +2255,7 @@ declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nou
 
 define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_addsub_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2331,7 +2266,7 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi
 
 define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
 ; CHECK-LABEL: test_x86_avx_blendv_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
@@ -2342,7 +2277,7 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4
 
 define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
 ; CHECK-LABEL: test_x86_avx_blendv_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vblendvps %ymm2, %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
@@ -2353,7 +2288,7 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f
 
 define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_cmp_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
@@ -2364,7 +2299,7 @@ declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) no
 
 define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_cmp_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpordps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2373,7 +2308,7 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
 
 define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_cmp_ps_256_pseudo_op:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %ymm1
 ; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %ymm1
 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %ymm1
@@ -2446,7 +2381,7 @@ declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounw
 
 define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvt_pd2_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtpd2psy %ymm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -2458,7 +2393,7 @@ declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
 
 define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvt_pd2dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtpd2dqy %ymm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -2470,7 +2405,7 @@ declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
 
 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvt_ps2_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
@@ -2481,7 +2416,7 @@ declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
 
 define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvt_ps2dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -2492,7 +2427,7 @@ declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvtdq2_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
@@ -2503,7 +2438,7 @@ declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
 
 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvtdq2_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
@@ -2514,7 +2449,7 @@ declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
 
 define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvtt_pd2dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttpd2dqy %ymm0, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -2526,7 +2461,7 @@ declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
 
 define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_cvtt_ps2dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
@@ -2537,7 +2472,7 @@ declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
 
 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2548,7 +2483,7 @@ declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwi
 
 define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_hadd_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2559,7 +2494,7 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw
 
 define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_hadd_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2570,7 +2505,7 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind
 
 define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_hsub_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhsubpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2581,7 +2516,7 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw
 
 define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_hsub_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vhsubps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2592,7 +2527,7 @@ declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind
 
 define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 ; CHECK-LABEL: test_x86_avx_ldu_dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vlddqu (%eax), %ymm0
 ; CHECK-NEXT:    retl
@@ -2602,107 +2537,107 @@ define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
 declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
 
 
-define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x i64> %mask) {
 ; CHECK-LABEL: test_x86_avx_maskload_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovpd (%eax), %xmm0, %xmm0
 ; CHECK-NEXT:    retl
-  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %mask) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
 
 
-define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x i64> %mask) {
 ; CHECK-LABEL: test_x86_avx_maskload_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovpd (%eax), %ymm0, %ymm0
 ; CHECK-NEXT:    retl
-  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %mask) ; <<4 x double>> [#uses=1]
   ret <4 x double> %res
 }
-declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
 
 
-define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x i32> %mask) {
 ; CHECK-LABEL: test_x86_avx_maskload_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovps (%eax), %xmm0, %xmm0
 ; CHECK-NEXT:    retl
-  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %mask) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
 
 
-define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x i32> %mask) {
 ; CHECK-LABEL: test_x86_avx_maskload_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovps (%eax), %ymm0, %ymm0
 ; CHECK-NEXT:    retl
-  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %mask) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
 
 
-define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2) {
 ; CHECK-LABEL: test_x86_avx_maskstore_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovpd %xmm1, %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %mask, <2 x double> %a2)
   ret void
 }
-declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
 
 
-define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x i64> %mask, <4 x double> %a2) {
 ; CHECK-LABEL: test_x86_avx_maskstore_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovpd %ymm1, %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
-  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %mask, <4 x double> %a2)
   ret void
 }
-declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
 
 
-define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2) {
 ; CHECK-LABEL: test_x86_avx_maskstore_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovps %xmm1, %xmm0, (%eax)
 ; CHECK-NEXT:    retl
-  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %mask, <4 x float> %a2)
   ret void
 }
-declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
 
 
-define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x i32> %mask, <8 x float> %a2) {
 ; CHECK-LABEL: test_x86_avx_maskstore_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
-  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %mask, <8 x float> %a2)
   ret void
 }
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
 
 
 define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_max_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2713,7 +2648,7 @@ declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwi
 
 define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_max_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2724,7 +2659,7 @@ declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind
 
 define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_min_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
@@ -2735,7 +2670,7 @@ declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwi
 
 define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_min_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
@@ -2746,7 +2681,7 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind
 
 define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_movmsk_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmovmskpd %ymm0, %eax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -2758,7 +2693,7 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
 
 define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_movmsk_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vmovmskps %ymm0, %eax
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -2775,7 +2710,7 @@ declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
 
 define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_x86_avx_ptestc_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %ymm1, %ymm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -2789,7 +2724,7 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
 
 define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_x86_avx_ptestnzc_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %ymm1, %ymm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2803,7 +2738,7 @@ declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
 
 define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_x86_avx_ptestz_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vptest %ymm1, %ymm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -2817,7 +2752,7 @@ declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
 
 define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_rcp_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrcpps %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2828,7 +2763,7 @@ declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_round_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundpd $7, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
@@ -2839,7 +2774,7 @@ declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind read
 
 define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_round_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vroundps $7, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
@@ -2850,7 +2785,7 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
 
 define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_rsqrt_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vrsqrtps %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2861,7 +2796,7 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
 
 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_sqrt_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtpd %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
@@ -2872,7 +2807,7 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
 
 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_sqrt_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vsqrtps %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
@@ -2885,7 +2820,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
   ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
   ; add operation forces the execution domain.
 ; CHECK-LABEL: test_x86_avx_storeu_dq_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
@@ -2905,7 +2840,7 @@ declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
   ; add operation forces the execution domain.
 ; CHECK-LABEL: test_x86_avx_storeu_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
@@ -2921,7 +2856,7 @@ declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
 
 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_storeu_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovups %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
@@ -2934,7 +2869,7 @@ declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
 
 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
 ; CHECK-NEXT:    retl
@@ -2946,7 +2881,7 @@ declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
 
 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
 ; CHECK-LABEL: test_x86_avx_vbroadcastf128_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vbroadcastf128 (%eax), %ymm0
 ; CHECK-NEXT:    retl
@@ -2958,7 +2893,7 @@ declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
 
 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vperm2f128_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
@@ -2969,7 +2904,7 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>,
 
 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vperm2f128_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
@@ -2980,7 +2915,7 @@ declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8
 
 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_x86_avx_vperm2f128_si_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[0,1]
 ; CHECK-NEXT:    retl
   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
@@ -2991,7 +2926,7 @@ declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) noun
 
 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_vpermil_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
@@ -3002,7 +2937,7 @@ declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnon
 
 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
 ; CHECK-LABEL: test_x86_avx_vpermil_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
@@ -3013,7 +2948,7 @@ declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind rea
 
 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_vpermil_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,0,0]
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
@@ -3024,7 +2959,7 @@ declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
 
 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
 ; CHECK-LABEL: test_x86_avx_vpermil_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,1,0,0,7,5,4,4]
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
@@ -3035,7 +2970,7 @@ declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readn
 
 define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
@@ -3046,7 +2981,7 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
 
 define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
@@ -3054,10 +2989,18 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64>
 }
 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
 
+define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
+; CHECK-LABEL: test_x86_avx_vpermilvar_pd_256_2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpermilpd {{.*}}, %ymm0, %ymm0 ## ymm0 = ymm0[1,0,2,3]
+; CHECK-NEXT:    retl
+  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
 
 define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
@@ -3065,7 +3008,7 @@ define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
 }
 define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, <4 x i32>* %a1) {
 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_load:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vpermilps (%eax), %xmm0, %xmm0
 ; CHECK-NEXT:    retl
@@ -3078,7 +3021,7 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
 
 define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_x86_avx_vpermilvar_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
 ; CHECK-NEXT:    retl
   %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
@@ -3089,7 +3032,7 @@ declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) noun
 
 define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestc_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -3102,7 +3045,7 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
 
 define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestc_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -3116,7 +3059,7 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
 
 define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestc_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -3129,7 +3072,7 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestc_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 ; CHECK-NEXT:    sbbl %eax, %eax
 ; CHECK-NEXT:    andl $1, %eax
@@ -3143,7 +3086,7 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn
 
 define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3156,7 +3099,7 @@ declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readn
 
 define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestnzc_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3170,7 +3113,7 @@ declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind r
 
 define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3183,7 +3126,7 @@ declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnon
 
 define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestnzc_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 ; CHECK-NEXT:    seta %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3197,7 +3140,7 @@ declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind rea
 
 define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestz_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3210,7 +3153,7 @@ declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnon
 
 define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestz_pd_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestpd %ymm1, %ymm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3224,7 +3167,7 @@ declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind rea
 
 define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestz_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %xmm1, %xmm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3237,7 +3180,7 @@ declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
 
 define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: test_x86_avx_vtestz_ps_256:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vtestps %ymm1, %ymm0
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    movzbl %al, %eax
@@ -3251,7 +3194,7 @@ declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readn
 
 define void @test_x86_avx_vzeroall() {
 ; CHECK-LABEL: test_x86_avx_vzeroall:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vzeroall
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -3263,7 +3206,7 @@ declare void @llvm.x86.avx.vzeroall() nounwind
 
 define void @test_x86_avx_vzeroupper() {
 ; CHECK-LABEL: test_x86_avx_vzeroupper:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -3276,7 +3219,7 @@ declare void @llvm.x86.avx.vzeroupper() nounwind
 
 define void @monitor(i8* %P, i32 %E, i32 %H) nounwind {
 ; CHECK-LABEL: monitor:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
@@ -3290,7 +3233,7 @@ declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
 
 define void @mwait(i32 %E, i32 %H) nounwind {
 ; CHECK-LABEL: mwait:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    mwait
@@ -3302,7 +3245,7 @@ declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
 
 define void @sfence() nounwind {
 ; CHECK-LABEL: sfence:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    sfence
 ; CHECK-NEXT:    retl
   tail call void @llvm.x86.sse.sfence()
@@ -3312,7 +3255,7 @@ declare void @llvm.x86.sse.sfence() nounwind
 
 define void @lfence() nounwind {
 ; CHECK-LABEL: lfence:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    lfence
 ; CHECK-NEXT:    retl
   tail call void @llvm.x86.sse2.lfence()
@@ -3322,7 +3265,7 @@ declare void @llvm.x86.sse2.lfence() nounwind
 
 define void @mfence() nounwind {
 ; CHECK-LABEL: mfence:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    mfence
 ; CHECK-NEXT:    retl
   tail call void @llvm.x86.sse2.mfence()
@@ -3332,7 +3275,7 @@ declare void @llvm.x86.sse2.mfence() nounwind
 
 define void @clflush(i8* %p) nounwind {
 ; CHECK-LABEL: clflush:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    clflush (%eax)
 ; CHECK-NEXT:    retl
@@ -3343,7 +3286,7 @@ declare void @llvm.x86.sse2.clflush(i8*) nounwind
 
 define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
 ; CHECK-LABEL: crc32_32_8:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    crc32b {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    retl
@@ -3354,7 +3297,7 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 
 define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
 ; CHECK-LABEL: crc32_32_16:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    crc32w {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    retl
@@ -3365,7 +3308,7 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
 
 define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: crc32_32_32:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    retl
@@ -3376,9 +3319,9 @@ declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
 
 define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
 ; CHECK-LABEL: movnt_dq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    vpaddq LCPI282_0, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq LCPI277_0, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovntdq %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retl
@@ -3391,7 +3334,7 @@ declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
 
 define void @movnt_ps(i8* %p, <8 x float> %a) nounwind {
 ; CHECK-LABEL: movnt_ps:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vmovntps %ymm0, (%eax)
 ; CHECK-NEXT:    vzeroupper
@@ -3404,7 +3347,7 @@ declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
 define void @movnt_pd(i8* %p, <4 x double> %a1) nounwind {
   ; add operation forces the execution domain.
 ; CHECK-LABEL: movnt_pd:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
@@ -3421,7 +3364,7 @@ declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
 ; Check for pclmulqdq
 define <2 x i64> @test_x86_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_x86_pclmulqdq:
-; CHECK:       # BB#0:
+; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpclmulqdq $0, %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retl
   %res = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) ; <<2 x i64>> [#uses=1]
diff --git a/test/CodeGen/X86/avx-isa-check.ll b/test/CodeGen/X86/avx-isa-check.ll
new file mode 100644
index 000000000000..77bfbd4bb423
--- /dev/null
+++ b/test/CodeGen/X86/avx-isa-check.ll
@@ -0,0 +1,570 @@
+; check AVX2 instructions that are disabled in case avx512VL/avx512BW present
+   
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
+
+define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = and <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = and <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %x = and <4 x i64> %a, %y
+  ret <4 x i64> %x
+}
+
+define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %y = xor <2 x i64> %a2, <i64 -1, i64 -1>
+  %x = and <2 x i64> %a, %y
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = or <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1>
+  %x = xor <4 x i64> %a2, %b
+  ret <4 x i64> %x
+}
+
+define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = or <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
+
+define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
+  ; Force the execution domain with an add.
+  %a2 = add <2 x i64> %a, <i64 1, i64 1>
+  %x = xor <2 x i64> %a2, %b
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = add <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = add <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = add <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = add <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
+  %x = sub <4 x i64> %i, %j
+  ret <4 x i64> %x
+}
+
+define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %x = sub <8 x i32> %i, %j
+  ret <8 x i32> %x
+}
+
+define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = sub <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %x = sub <32 x i8> %i, %j
+  ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %x = mul <16 x i16> %i, %j
+  ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <32 x i8> %i, %j
+  %x = sext <32 x i1> %bincmp to <32 x i8>
+  ret <32 x i8> %x
+}
+
+define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i16> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i16>
+  ret <16 x i16> %x
+}
+
+define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i32> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i32>
+  ret <8 x i32> %x
+}
+
+define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+  %x = add <2 x i64> %i, %j
+  ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+  %x = add <4 x i32> %i, %j
+  ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %x = add <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %x = add <16 x i8> %i, %j
+  ret <16 x i8> %x
+}
+
+define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone {
+  %x = sub <2 x i64> %i, %j
+  ret <2 x i64> %x
+}
+
+define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone {
+  %x = sub <4 x i32> %i, %j
+  ret <4 x i32> %x
+}
+
+define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %x = sub <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %x = sub <16 x i8> %i, %j
+  ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %x = mul <8 x i16> %i, %j
+  ret <8 x i16> %x
+}
+
+define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %bincmp = icmp slt <8 x i16> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i16>
+  ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %bincmp = icmp slt <16 x i8> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i8>
+  ret <16 x i8> %x
+}
+
+define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone {
+  %bincmp = icmp eq <8 x i16> %i, %j
+  %x = sext <8 x i1> %bincmp to <8 x i16>
+  ret <8 x i16> %x
+}
+
+define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
+  %bincmp = icmp eq <16 x i8> %i, %j
+  %x = sext <16 x i1> %bincmp to <16 x i8>
+  ret <16 x i8> %x
+}
+
+define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) {
+  %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  ret <8 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) {
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) {
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+  ret <16 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) {
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i8> %shuffle
+}
+
+define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) {
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
+  ret <2 x i64> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) {
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
+  ret <4 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) {
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
+  ret <8 x i32> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
+  ret <4 x double> %shuffle
+}
+
+define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
+  %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
+  %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
+  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
+  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double>
+  ret <2 x double> %bitcast64
+}
+
+define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) {
+  %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24>
+  ret <16 x i16> %shuffle
+}
+
+define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
+  %r1 = extractelement <2 x i64> %x, i32 0
+  %r2 = extractelement <2 x i64> %x, i32 1
+  store i64 %r2, i64* %dst, align 1
+  ret i64 %r1
+}
+
+define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
+  %r1 = extractelement <4 x i32> %x, i32 1
+  %r2 = extractelement <4 x i32> %x, i32 3
+  store i32 %r2, i32* %dst, align 1
+  ret i32 %r1
+}
+
+define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
+  %r1 = extractelement <8 x i16> %x, i32 1
+  %r2 = extractelement <8 x i16> %x, i32 3
+  store i16 %r2, i16* %dst, align 1
+  ret i16 %r1
+}
+
+define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
+  %r1 = extractelement <16 x i8> %x, i32 1
+  %r2 = extractelement <16 x i8> %x, i32 3
+  store i8 %r2, i8* %dst, align 1
+  ret i8 %r1
+}
+
+define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
+  %val = load i64, i64* %ptr
+  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
+  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
+  ret <2 x i64> %r2
+}
+
+define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
+  %val = load i32, i32* %ptr
+  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
+  ret <4 x i32> %r2
+}
+
+define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
+  %val = load i16, i16* %ptr
+  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
+  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
+  ret <8 x i16> %r2
+}
+
+define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
+  %val = load i8, i8* %ptr
+  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
+  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
+  ret <16 x i8> %r2
+}
+
+define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) {
+  %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
+  ret <4 x i32> %shuffle
+}
+
+define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) {
+ %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
+  ret <4 x i32> %shuffle
+}
+
+define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) {
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <16 x i8> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) {
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  ret <16 x i16> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
+; vmovshdup 256 test
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; vmovshdup 128 test 
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
+; vmovsldup 256 test
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; vmovsldup 128 test
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x float> %shuffle
+}
+
+define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
+  %a = load double, double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
+  ret <2 x double> %shuffle
+}
+
+define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
+  %a = load double, double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define void @store_floats(<4 x float> %x, i64* %p) {
+  %a = fadd <4 x float> %x, %x
+  %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %c = bitcast <2 x float> %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_double(<2 x double> %x, i64* %p) {
+  %a = fadd <2 x double> %x, %x
+  %b = extractelement <2 x double> %a, i32 0
+  %c = bitcast double %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define void @store_h_double(<2 x double> %x, i64* %p) {
+  %a = fadd <2 x double> %x, %x
+  %b = extractelement <2 x double> %a, i32 1
+  %c = bitcast double %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
+
+define <2 x double> @test39(double* %ptr) nounwind {
+  %a = load double, double* %ptr
+  %v = insertelement <2 x double> undef, double %a, i32 0
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+  }
+
+define <2 x double> @test40(<2 x double>* %ptr) nounwind {
+  %v = load  <2 x double>,  <2 x double>* %ptr
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+  }
+
+define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
+  %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x double> %shuffle
+}
+
+define <8 x i32> @ashr_v8i32(<8 x i32> %a, <8 x i32> %b) {
+  %shift = ashr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i32> @lshr_v8i32(<8 x i32> %a, <8 x i32> %b) {
+  %shift = lshr <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i32> @shl_v8i32(<8 x i32> %a, <8 x i32> %b) {
+  %shift = shl <8 x i32> %a, %b
+  ret <8 x i32> %shift
+}
+
+define <8 x i32> @ashr_const_v8i32(<8 x i32> %a) {
+  %shift = ashr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <8 x i32> @lshr_const_v8i32(<8 x i32> %a) {
+  %shift = lshr <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <8 x i32> @shl_const_v8i32(<8 x i32> %a) {
+  %shift = shl <8 x i32> %a,  <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i32> %shift
+}
+
+define <4 x i64> @ashr_v4i64(<4 x i64> %a, <4 x i64> %b) {
+  %shift = ashr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i64> @lshr_v4i64(<4 x i64> %a, <4 x i64> %b) {
+  %shift = lshr <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i64> @shl_v4i64(<4 x i64> %a, <4 x i64> %b) {
+  %shift = shl <4 x i64> %a, %b
+  ret <4 x i64> %shift
+}
+
+define <4 x i64> @ashr_const_v4i64(<4 x i64> %a) {
+  %shift = ashr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %shift
+}
+
+define <4 x i64> @lshr_const_v4i64(<4 x i64> %a) {
+  %shift = lshr <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %shift
+}
+
+define <4 x i64> @shl_const_v4i64(<4 x i64> %a) {
+  %shift = shl <4 x i64> %a,  <i64 3, i64 3, i64 3, i64 3>
+  ret <4 x i64> %shift
+}
+
+define <16 x i16> @ashr_v16i16(<16 x i16> %a, <16 x i16> %b) {
+  %shift = ashr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i16> @lshr_v16i16(<16 x i16> %a, <16 x i16> %b) {
+  %shift = lshr <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i16> @shl_v16i16(<16 x i16> %a, <16 x i16> %b) {
+  %shift = shl <16 x i16> %a, %b
+  ret <16 x i16> %shift
+}
+
+define <16 x i16> @ashr_const_v16i16(<16 x i16> %a) {
+  %shift = ashr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <16 x i16> @lshr_const_v16i16(<16 x i16> %a) {
+  %shift = lshr <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <16 x i16> @shl_const_v16i16(<16 x i16> %a) {
+  %shift = shl <16 x i16> %a,  <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <16 x i16> %shift
+}
+
+define <4 x i32> @ashr_v4i32(<4 x i32> %a, <4 x i32> %b) {
+  %shift = ashr <4 x i32> %a, %b
+  ret <4 x i32> %shift
+}
+
+define <4 x i32> @shl_const_v4i32(<4 x i32> %a) {
+  %shift = shl <4 x i32> %a,  <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i32> %shift
+}
+
+define <2 x i64> @ashr_v2i64(<2 x i64> %a, <2 x i64> %b) {
+  %shift = ashr <2 x i64> %a, %b
+  ret <2 x i64> %shift
+}
+
+define <2 x i64> @shl_const_v2i64(<2 x i64> %a) {
+  %shift = shl <2 x i64> %a,  <i64 3, i64 3>
+  ret <2 x i64> %shift
+}
+
+define <8 x i16> @ashr_v8i16(<8 x i16> %a, <8 x i16> %b) {
+  %shift = ashr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <8 x i16> @lshr_v8i16(<8 x i16> %a, <8 x i16> %b) {
+  %shift = lshr <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %a, <8 x i16> %b) {
+  %shift = shl <8 x i16> %a, %b
+  ret <8 x i16> %shift
+}
+
+define <8 x i16> @ashr_const_v8i16(<8 x i16> %a) {
+  %shift = ashr <8 x i16> %a,<i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <8 x i16> @lshr_const_v8i16(<8 x i16> %a) {
+  %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
+
+define <8 x i16> @shl_const_v8i16(<8 x i16> %a) {
+  %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <8 x i16> %shift
+}
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
index d2f213bac7bb..d7eceb7cce66 100644
--- a/test/CodeGen/X86/avx-load-store.ll
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -88,7 +88,7 @@ entry:
   ret void
 }
 
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
 
 ; CHECK_O0: _f_f
 ; CHECK-O0: vmovss LCPI
@@ -105,7 +105,7 @@ cif_mask_mixed:                                   ; preds = %allocas
   br i1 undef, label %cif_mixed_test_all, label %cif_mixed_test_any_check
 
 cif_mixed_test_all:                               ; preds = %cif_mask_mixed
-  call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x float> <float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, <8 x float> undef) nounwind
+  call void @llvm.x86.avx.maskstore.ps.256(i8* undef, <8 x i32> <i32 -1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <8 x float> undef) nounwind
   unreachable
 
 cif_mixed_test_any_check:                         ; preds = %cif_mask_mixed
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index e71ac473b44d..e9e7d5aea273 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -1,4 +1,6 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
 
 define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
 ; CHECK-LABEL: andpd256:
diff --git a/test/CodeGen/X86/avx-shift.ll b/test/CodeGen/X86/avx-shift.ll
index 83585b536095..033a95276608 100644
--- a/test/CodeGen/X86/avx-shift.ll
+++ b/test/CodeGen/X86/avx-shift.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
 ;;; Shift left
diff --git a/test/CodeGen/X86/avx-shuffle-x86_32.ll b/test/CodeGen/X86/avx-shuffle-x86_32.ll
index 78b4888cfa16..fae5b41abfa6 100755
--- a/test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ b/test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -1,8 +1,26 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s
 
+; Avoid unnecessary vinsertf128
 define <4 x i64> @test1(<4 x i64> %a) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; CHECK-NEXT:    retl
  %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
  ret <4 x i64>%b
- ; CHECK-LABEL: test1:
- ; CHECK-NOT: vinsertf128
- }
+}
+
+define <8 x i16> @test2(<4 x i16>* %v) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = xmm0[0],zero
+; CHECK-NEXT:    retl
+  %v9 = load <4 x i16>, <4 x i16> * %v, align 8
+  %v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %v11 = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %v10, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %v11
+}
diff --git a/test/CodeGen/X86/avx-splat.ll b/test/CodeGen/X86/avx-splat.ll
index 3ea7e386c426..ebaaf0e8d00d 100644
--- a/test/CodeGen/X86/avx-splat.ll
+++ b/test/CodeGen/X86/avx-splat.ll
@@ -1,26 +1,34 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
 
-
-; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
-; CHECK-NEXT: vinsertf128 $1
 define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcA:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <32 x i8> %shuffle
 }
 
-; CHECK: vpshufb {{.*}} ## xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
-; CHECK-NEXT: vinsertf128 $1
 define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcB:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,11,10,11,10,11,10,11,10,11,10,11,10,11,10,11]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <16 x i16> %shuffle
 }
 
-; CHECK: vmovq
-; CHECK-NEXT: vmovddup %xmm
-; CHECK-NEXT: vinsertf128 $1
 define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcC:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovq %rdi, %xmm0
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
@@ -29,9 +37,12 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vmovddup %xmm
-; CHECK-NEXT: vinsertf128 $1
 define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcD:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
   %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
@@ -43,8 +54,23 @@ entry:
 ; Test this turns into a broadcast:
 ;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
 ;
-; CHECK: vbroadcastss
 define <8 x float> @funcE() nounwind {
+; CHECK-LABEL: funcE:
+; CHECK:       ## BB#0: ## %for_exit499
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    ## implicit-def: %YMM0
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne LBB4_2
+; CHECK-NEXT:  ## BB#1: ## %load.i1247
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    movq %rsp, %rbp
+; CHECK-NEXT:    andq $-32, %rsp
+; CHECK-NEXT:    subq $1312, %rsp ## imm = 0x520
+; CHECK-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
+; CHECK-NEXT:    movq %rbp, %rsp
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:  LBB4_2: ## %__load_and_broadcast_32.exit1249
+; CHECK-NEXT:    retq
 allocas:
   %udx495 = alloca [18 x [18 x float]], align 32
   br label %for_test505.preheader
@@ -69,29 +95,79 @@ __load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_ex
   ret <8 x float> %load_broadcast12281250
 }
 
-; CHECK: vpermilps $4
-; CHECK-NEXT: vinsertf128 $1
 define <8 x float> @funcF(i32 %val) nounwind {
+; CHECK-LABEL: funcF:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovd %edi, %xmm0
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
   %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
   %tmp = bitcast <8 x i32> %ret7 to <8 x float>
   ret <8 x float> %tmp
 }
 
-; CHECK: vpermilps $0
-; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcG:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x float> %shuffle
 }
 
-; CHECK: vextractf128  $1
-; CHECK-NEXT: vpermilps $85
-; CHECK-NEXT: vinsertf128  $1
 define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
+; CHECK-LABEL: funcH:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x float> %shuffle
 }
 
+define <2 x double> @splat_load_2f64_11(<2 x double>* %ptr) {
+; CHECK-LABEL: splat_load_2f64_11:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    retq
+  %x = load <2 x double>, <2 x double>* %ptr
+  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %x1
+}
+
+define <4 x double> @splat_load_4f64_2222(<4 x double>* %ptr) {
+; CHECK-LABEL: splat_load_4f64_2222:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT:    retq
+  %x = load <4 x double>, <4 x double>* %ptr
+  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x double> %x1
+}
+
+define <4 x float> @splat_load_4f32_0000(<4 x float>* %ptr) {
+; CHECK-LABEL: splat_load_4f32_0000:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    retq
+  %x = load <4 x float>, <4 x float>* %ptr
+  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
+  ret <4 x float> %x1
+}
+
+define <8 x float> @splat_load_8f32_77777777(<8 x float>* %ptr) {
+; CHECK-LABEL: splat_load_8f32_77777777:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss 28(%rdi), %ymm0
+; CHECK-NEXT:    retq
+  %x = load <8 x float>, <8 x float>* %ptr
+  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  ret <8 x float> %x1
+}
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 8b8c11b85875..86b0628aa0bc 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,7 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
-; CHECK: vbroadcastsd (%
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: A:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i64, i64* %ptr, align 8
   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -11,8 +15,11 @@ entry:
   ret <4 x i64> %vecinit6.i
 }
 
-; CHECK: vbroadcastss (%
 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: B:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i32, i32* %ptr, align 4
   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -22,8 +29,11 @@ entry:
   ret <8 x i32> %vecinit6.i
 }
 
-; CHECK: vbroadcastsd (%
 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: C:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load double, double* %ptr, align 8
   %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
@@ -33,8 +43,11 @@ entry:
   ret <4 x double> %vecinit6.i
 }
 
-; CHECK: vbroadcastss (%
 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: D:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <8 x float> undef, float %q, i32 0
@@ -46,8 +59,11 @@ entry:
 
 ;;;; 128-bit versions
 
-; CHECK: vbroadcastss (%
 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: e:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -57,12 +73,14 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
-
-; CHECK: _e2
-; CHECK-NOT: vbroadcastss
-; CHECK: ret
+; Don't broadcast constants on pre-AVX2 hardware.
 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
-    %vecinit.i = insertelement <4 x float> undef, float      0xbf80000000000000, i32 0
+; CHECK-LABEL: _e2:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
+; CHECK-NEXT:    retq
+entry:
+   %vecinit.i = insertelement <4 x float> undef, float       0xbf80000000000000, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float 0xbf80000000000000, i32 3
@@ -70,8 +88,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
 }
 
 
-; CHECK: vbroadcastss (%
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: F:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i32, i32* %ptr, align 4
   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -81,12 +102,158 @@ entry:
   ret <4 x i32> %vecinit6.i
 }
 
+; FIXME: Pointer adjusted broadcasts
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i32>, <4 x i32>* %ptr
+  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i32>, <4 x i32>* %ptr
+  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovaps (%rdi), %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x i32>, <8 x i32>* %ptr
+  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x float>, <4 x float>* %ptr
+  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x float>, <4 x float>* %ptr
+  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x float>, <8 x float>* %ptr
+  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x i64>, <2 x i64>* %ptr
+  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x i64>, <2 x i64>* %ptr
+  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovapd (%rdi), %ymm0
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i64>, <4 x i64>* %ptr
+  %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x double>, <2 x double>* %ptr
+  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x double>, <2 x double>* %ptr
+  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x double>, <4 x double>* %ptr
+  %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x double> %ret
+}
+
 ; Unsupported vbroadcasts
 
-; CHECK: _G
-; CHECK-NOT: broadcast (%
-; CHECK: ret
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: G:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT:    retq
 entry:
   %q = load i64, i64* %ptr, align 8
   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
@@ -94,18 +261,21 @@ entry:
   ret <2 x i64> %vecinit2.i
 }
 
-; CHECK: _H
-; CHECK-NOT: broadcast
-; CHECK: ret
 define <4 x i32> @H(<4 x i32> %a) {
+; CHECK-LABEL: H:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-NEXT:    retq
+entry:
   %x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
   ret <4 x i32> %x
 }
 
-; CHECK: _I
-; CHECK-NOT: broadcast (%
-; CHECK: ret
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: I:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
 entry:
   %q = load double, double* %ptr, align 4
   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -113,10 +283,13 @@ entry:
   ret <2 x double> %vecinit2.i
 }
 
-; CHECK: _RR
-; CHECK: vbroadcastss (%
-; CHECK: ret
 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _RR:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    movl %eax, (%rax)
+; CHECK-NEXT:    retq
 entry:
   %q = load float, float* %ptr, align 4
   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -129,11 +302,11 @@ entry:
   ret <4 x float> %vecinit6.i
 }
 
-
-; CHECK: _RR2
-; CHECK: vbroadcastss (%
-; CHECK: ret
 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _RR2:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load float, float* %ptr, align 4
   %v = insertelement <4 x float> undef, float %q, i32 0
@@ -141,16 +314,15 @@ entry:
   ret <4 x float> %t
 }
 
-
 ; These tests check that a vbroadcast instruction is used when we have a splat
 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
 ; (via the insertelements).
 
-; CHECK-LABEL: splat_concat1
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss (%
-; CHECK-NEXT: ret
 define <8 x float> @splat_concat1(float* %p) {
+; CHECK-LABEL: splat_concat1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT:    retq
   %1 = load float, float* %p, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = insertelement <4 x float> %2, float %1, i32 1
@@ -160,11 +332,11 @@ define <8 x float> @splat_concat1(float* %p) {
   ret <8 x float> %6
 }
 
-; CHECK-LABEL: splat_concat2
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss (%
-; CHECK-NEXT: ret
 define <8 x float> @splat_concat2(float* %p) {
+; CHECK-LABEL: splat_concat2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT:    retq
   %1 = load float, float* %p, align 4
   %2 = insertelement <4 x float> undef, float %1, i32 0
   %3 = insertelement <4 x float> %2, float %1, i32 1
@@ -178,11 +350,11 @@ define <8 x float> @splat_concat2(float* %p) {
   ret <8 x float> %10
 }
 
-; CHECK-LABEL: splat_concat3
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd (%
-; CHECK-NEXT: ret
 define <4 x double> @splat_concat3(double* %p) {
+; CHECK-LABEL: splat_concat3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT:    retq
   %1 = load double, double* %p, align 8
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
@@ -190,11 +362,11 @@ define <4 x double> @splat_concat3(double* %p) {
   ret <4 x double> %4
 }
 
-; CHECK-LABEL: splat_concat4
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd (%
-; CHECK-NEXT: ret
 define <4 x double> @splat_concat4(double* %p) {
+; CHECK-LABEL: splat_concat4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT:    retq
   %1 = load double, double* %p, align 8
   %2 = insertelement <2 x double> undef, double %1, i32 0
   %3 = insertelement <2 x double> %2, double %1, i32 1
@@ -203,4 +375,3 @@ define <4 x double> @splat_concat4(double* %p) {
   %6 = shufflevector <2 x double> %3, <2 x double> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x double> %6
 }
-
diff --git a/test/CodeGen/X86/avx-vperm2x128.ll b/test/CodeGen/X86/avx-vperm2x128.ll
index 4e43f6f51921..0958008d9a3e 100644
--- a/test/CodeGen/X86/avx-vperm2x128.ll
+++ b/test/CodeGen/X86/avx-vperm2x128.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
 
@@ -262,13 +263,13 @@ entry:
   ret <8 x float> %shuffle
 }
 
-;; Test zero mask generation. 
+;; Test zero mask generation.
 ;; PR22984: https://llvm.org/bugs/show_bug.cgi?id=22984
 ;; Prefer xor+vblendpd over vperm2f128 because that has better performance.
 
 define <4 x double> @vperm2z_0x08(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x08:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
@@ -277,7 +278,7 @@ define <4 x double> @vperm2z_0x08(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x18(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x18:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 ; ALL-NEXT:    retq
@@ -287,7 +288,7 @@ define <4 x double> @vperm2z_0x18(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x28(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x28:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -296,7 +297,7 @@ define <4 x double> @vperm2z_0x28(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x38(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x38:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
 ; ALL-NEXT:    retq
@@ -306,8 +307,9 @@ define <4 x double> @vperm2z_0x38(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x80(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x80:
-; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL:       ## BB#0:
+; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x double> %s
@@ -315,7 +317,7 @@ define <4 x double> @vperm2z_0x80(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x81(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x81:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> %a, <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -324,8 +326,9 @@ define <4 x double> @vperm2z_0x81(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x82(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x82:
-; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[0,1],zero,zero
+; ALL:       ## BB#0:
+; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x double> %s
@@ -333,7 +336,7 @@ define <4 x double> @vperm2z_0x82(<4 x double> %a) {
 
 define <4 x double> @vperm2z_0x83(<4 x double> %a) {
 ; ALL-LABEL: vperm2z_0x83:
-; ALL:       # BB#0:
+; ALL:       ## BB#0:
 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
 ; ALL-NEXT:    retq
   %s = shufflevector <4 x double> <double 0.0, double 0.0, double undef, double undef>, <4 x double> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
@@ -343,10 +346,21 @@ define <4 x double> @vperm2z_0x83(<4 x double> %a) {
 ;; With AVX2 select the integer version of the instruction. Use an add to force the domain selection.
 
 define <4 x i64> @vperm2z_int_0x83(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: vperm2z_int_0x83:
-; ALL:       # BB#0:
-; AVX1:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
-; AVX2:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-LABEL: vperm2z_int_0x83:
+; AVX1:       ## BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: vperm2z_int_0x83:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],zero,zero
+; AVX2-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
   %s = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
   %c = add <4 x i64> %b, %s
   ret <4 x i64> %c
diff --git a/test/CodeGen/X86/avx-win64.ll b/test/CodeGen/X86/avx-win64.ll
index dc6bd594450f..64bc398a97ea 100644
--- a/test/CodeGen/X86/avx-win64.ll
+++ b/test/CodeGen/X86/avx-win64.ll
@@ -42,6 +42,4 @@ safe_if_after_false:                              ; preds = %safe_if_run_false,
 }
 
 declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/avx.ll b/test/CodeGen/X86/avx.ll
index f71ec5c10e69..341dd867e4ff 100644
--- a/test/CodeGen/X86/avx.ll
+++ b/test/CodeGen/X86/avx.ll
@@ -32,7 +32,7 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
 ; On X32, account for the argument's move to registers
 ; X32: movl    4(%esp), %eax
 ; CHECK-NOT: mov
-; CHECK: insertps    $48
+; CHECK: vinsertps    $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; CHECK-NEXT: ret
   %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
@@ -46,7 +46,7 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
 ; X32: movl    4(%esp), %eax
 ; CHECK-NOT: mov
 ;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: insertps    $96, 4(%{{...}}), %
+; CHECK: vinsertps    $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 ; CHECK-NEXT: ret
   %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
@@ -60,7 +60,7 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
 ; X32: movl    8(%esp), %ecx
 ; CHECK-NOT: mov
 ;; Try to match a bit more of the instr, since we need the load's offset.
-; CHECK: vinsertps    $192, 12(%{{...}},%{{...}}), %
+; CHECK: vinsertps    $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
 ; CHECK-NEXT: ret
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
   %2 = load <4 x float>, <4 x float>* %1, align 16
diff --git a/test/CodeGen/X86/avx2-conversions.ll b/test/CodeGen/X86/avx2-conversions.ll
index 9b6d5aa5eeae..176292768253 100755
--- a/test/CodeGen/X86/avx2-conversions.ll
+++ b/test/CodeGen/X86/avx2-conversions.ll
@@ -1,136 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
 
-; CHECK: trunc4
-; CHECK: vpermd
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <4 x i32> @trunc4(<4 x i64> %A) nounwind {
+; CHECK-LABEL: trunc4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,2,4,6,u,u,u,u>
+; CHECK-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %B = trunc <4 x i64> %A to <4 x i32>
   ret <4 x i32>%B
 }
 
-; CHECK: trunc8
-; CHECK: vpshufb
-; CHECK-NOT: vinsert
-; CHECK: ret
-
 define <8 x i16> @trunc8(<8 x i32> %A) nounwind {
+; CHECK-LABEL: trunc8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %B = trunc <8 x i32> %A to <8 x i16>
   ret <8 x i16>%B
 }
 
-; CHECK: sext4
-; CHECK: vpmovsxdq
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <4 x i64> @sext4(<4 x i32> %A) nounwind {
+; CHECK-LABEL: sext4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %B = sext <4 x i32> %A to <4 x i64>
   ret <4 x i64>%B
 }
 
-; CHECK: sext8
-; CHECK: vpmovsxwd
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <8 x i32> @sext8(<8 x i16> %A) nounwind {
+; CHECK-LABEL: sext8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxwd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %B = sext <8 x i16> %A to <8 x i32>
   ret <8 x i32>%B
 }
 
-; CHECK: zext4
-; CHECK: vpmovzxdq
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <4 x i64> @zext4(<4 x i32> %A) nounwind {
+; CHECK-LABEL: zext4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; CHECK-NEXT:    retq
   %B = zext <4 x i32> %A to <4 x i64>
   ret <4 x i64>%B
 }
 
-; CHECK: zext8
-; CHECK: vpmovzxwd
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <8 x i32> @zext8(<8 x i16> %A) nounwind {
+; CHECK-LABEL: zext8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    retq
   %B = zext <8 x i16> %A to <8 x i32>
   ret <8 x i32>%B
 }
-; CHECK: zext_8i8_8i32
-; CHECK: vpmovzxwd
-; CHECK: vpand
-; CHECK: ret
+
 define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind {
-  %B = zext <8 x i8> %A to <8 x i32>  
+; CHECK-LABEL: zext_8i8_8i32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; CHECK-NEXT:    retq
+  %B = zext <8 x i8> %A to <8 x i32>
   ret <8 x i32>%B
 }
 
-; CHECK-LABEL: zext_16i8_16i16:
-; CHECK: vpmovzxbw
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) {
+; CHECK-LABEL: zext_16i8_16i16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT:    retq
   %t = zext <16 x i8> %z to <16 x i16>
   ret <16 x i16> %t
 }
 
-; CHECK-LABEL: sext_16i8_16i16:
-; CHECK: vpmovsxbw
-; CHECK-NOT: vinsert
-; CHECK: ret
 define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) {
+; CHECK-LABEL: sext_16i8_16i16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %t = sext <16 x i8> %z to <16 x i16>
   ret <16 x i16> %t
 }
 
-; CHECK-LABEL: trunc_16i16_16i8:
-; CHECK: vpshufb
-; CHECK: vpshufb
-; CHECK: vpunpcklqdq
-; CHECK: ret
 define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) {
+; CHECK-LABEL: trunc_16i16_16i8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; CHECK-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; CHECK-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %t = trunc <16 x i16> %z to <16 x i8>
   ret <16 x i8> %t
 }
 
-; CHECK: load_sext_test1
-; CHECK: vpmovsxdq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret 
 define <4 x i64> @load_sext_test1(<4 x i32> *%ptr) {
+; CHECK-LABEL: load_sext_test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxdq (%rdi), %ymm0
+; CHECK-NEXT:    retq
  %X = load <4 x i32>, <4 x i32>* %ptr
  %Y = sext <4 x i32> %X to <4 x i64>
  ret <4 x i64>%Y
 }
 
-; CHECK: load_sext_test2
-; CHECK: vpmovsxbq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret 
 define <4 x i64> @load_sext_test2(<4 x i8> *%ptr) {
+; CHECK-LABEL: load_sext_test2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbq (%rdi), %ymm0
+; CHECK-NEXT:    retq
  %X = load <4 x i8>, <4 x i8>* %ptr
  %Y = sext <4 x i8> %X to <4 x i64>
  ret <4 x i64>%Y
 }
 
-; CHECK: load_sext_test3
-; CHECK: vpmovsxwq (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret 
 define <4 x i64> @load_sext_test3(<4 x i16> *%ptr) {
+; CHECK-LABEL: load_sext_test3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxwq (%rdi), %ymm0
+; CHECK-NEXT:    retq
  %X = load <4 x i16>, <4 x i16>* %ptr
  %Y = sext <4 x i16> %X to <4 x i64>
  ret <4 x i64>%Y
 }
 
-; CHECK: load_sext_test4
-; CHECK: vpmovsxwd (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret 
 define <8 x i32> @load_sext_test4(<8 x i16> *%ptr) {
+; CHECK-LABEL: load_sext_test4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxwd (%rdi), %ymm0
+; CHECK-NEXT:    retq
  %X = load <8 x i16>, <8 x i16>* %ptr
  %Y = sext <8 x i16> %X to <8 x i32>
  ret <8 x i32>%Y
 }
 
-; CHECK: load_sext_test5
-; CHECK: vpmovsxbd (%r{{[^,]*}}), %ymm{{.*}}
-; CHECK: ret 
 define <8 x i32> @load_sext_test5(<8 x i8> *%ptr) {
+; CHECK-LABEL: load_sext_test5:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsxbd (%rdi), %ymm0
+; CHECK-NEXT:    retq
  %X = load <8 x i8>, <8 x i8>* %ptr
  %Y = sext <8 x i8> %X to <8 x i32>
  ret <8 x i32>%Y
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
index a30d8371775c..36b6da5ef960 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -83,3 +83,123 @@ define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) {
 }
 declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone
 
+
+define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
+  ; CHECK-LABEL: test_x86_avx2_vbroadcast_sd_pd_256:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+  ; CHECK-NEXT:    retl
+  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
+  ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
+  ; CHECK-NEXT:    retl
+  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
+  ; CHECK-LABEL: test_x86_avx2_vbroadcast_ss_ps_256:
+  ; CHECK:       ## BB#0:
+  ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+  ; CHECK-NEXT:    retl
+  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
+
+
+define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastb_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastb_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT:    retl
+  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
+
+
+define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastw_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
+
+
+define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastw_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT:    retl
+  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
+
+
+define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
+
+
+define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retl
+  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
+
+
+define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT:    retl
+  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
+
+
+define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
+; CHECK-LABEL: test_x86_avx2_pbroadcastq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retl
+  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll
index 5b607afef91c..606aca9dc02b 100644
--- a/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -641,30 +641,6 @@ define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
 declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
 
 
-define <4 x double> @test_x86_avx2_vbroadcast_sd_pd_256(<2 x double> %a0) {
-  ; CHECK: vbroadcastsd
-  %res = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0) ; <<4 x double>> [#uses=1]
-  ret <4 x double> %res
-}
-declare <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double>) nounwind readonly
-
-
-define <4 x float> @test_x86_avx2_vbroadcast_ss_ps(<4 x float> %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float>) nounwind readonly
-
-
-define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0) ; <<8 x float>> [#uses=1]
-  ret <8 x float> %res
-}
-declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
-
-
 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
   ; CHECK: vpblendd
   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
@@ -681,70 +657,6 @@ define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
 
 
-define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
-  ; CHECK: vpbroadcastb
-  %res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
-}
-declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
-
-
-define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
-  ; CHECK: vpbroadcastb
-  %res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
-
-
-define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
-  ; CHECK: vpbroadcastw
-  %res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
-
-
-define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
-  ; CHECK: vpbroadcastw
-  %res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
-
-
-define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
-
-
-define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
-  ; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
-  %res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
-  ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
-
-
-define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
-  ; CHECK: vpbroadcastq
-  %res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
-
-
-define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
-  ; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
-  %res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
-  ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
-
-
 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
   ; Check that the arguments are swapped between the intrinsic definition
   ; and its lowering. Indeed, the offsets are the first source in
@@ -756,15 +668,15 @@ define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
 
 
-define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x float> %a1) {
+define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
   ; Check that the arguments are swapped between the intrinsic definition
   ; and its lowering. Indeed, the offsets are the first source in
   ; the instruction.
   ; CHECK: vpermps %ymm0, %ymm1, %ymm0
-  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   ret <8 x float> %res
 }
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
 
 
 define <4 x i64> @test_x86_avx2_vperm2i128(<4 x i64> %a0, <4 x i64> %a1) {
diff --git a/test/CodeGen/X86/avx2-nontemporal.ll b/test/CodeGen/X86/avx2-nontemporal.ll
index 4d28a979712a..058358f13b86 100644
--- a/test/CodeGen/X86/avx2-nontemporal.ll
+++ b/test/CodeGen/X86/avx2-nontemporal.ll
@@ -1,21 +1,18 @@
 ; RUN: llc < %s -march=x86 -mattr=+avx2 | FileCheck %s
 
-define void @f(<8 x float> %A, i8* %B, <4 x double> %C, i32 %D, <4 x i64> %E) {
-; CHECK: vmovntps
+define void @f(<8 x float> %A, i8* %B, <4 x double> %C, <4 x i64> %E) {
+; CHECK: vmovntps %y
   %cast = bitcast i8* %B to <8 x float>*
   %A2 = fadd <8 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
-  store <8 x float> %A2, <8 x float>* %cast, align 16, !nontemporal !0
-; CHECK: vmovntdq
+  store <8 x float> %A2, <8 x float>* %cast, align 32, !nontemporal !0
+; CHECK: vmovntdq %y
   %cast1 = bitcast i8* %B to <4 x i64>*
   %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
-  store <4 x i64> %E2, <4 x i64>* %cast1, align 16, !nontemporal !0
-; CHECK: vmovntpd
+  store <4 x i64> %E2, <4 x i64>* %cast1, align 32, !nontemporal !0
+; CHECK: vmovntpd %y
   %cast2 = bitcast i8* %B to <4 x double>*
   %C2 = fadd <4 x double> %C, <double 0x0, double 0x0, double 0x0, double 0x4200000000000000>
-  store <4 x double> %C2, <4 x double>* %cast2, align 16, !nontemporal !0
-; CHECK: movnti
-  %cast3 = bitcast i8* %B to i32*
-  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  store <4 x double> %C2, <4 x double>* %cast2, align 32, !nontemporal !0
   ret void
 }
 
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 94dcdcabdd33..6b77edb155a4 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -1,7 +1,11 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
 
-; CHECK: vpbroadcastb (%
 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB16:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i8, i8* %ptr, align 4
   %q0 = insertelement <16 x i8> undef, i8 %q, i32 0
@@ -22,8 +26,12 @@ entry:
   %qf = insertelement <16 x i8> %qe, i8 %q, i32 15
   ret <16 x i8> %qf
 }
-; CHECK: vpbroadcastb (%
+
 define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: BB32:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i8, i8* %ptr, align 4
   %q0 = insertelement <32 x i8> undef, i8 %q, i32 0
@@ -61,9 +69,12 @@ entry:
   %q2f = insertelement <32 x i8> %q2e, i8 %q, i32 31
   ret <32 x i8> %q2f
 }
-; CHECK: vpbroadcastw (%
 
 define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: W16:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i16, i16* %ptr, align 4
   %q0 = insertelement <8 x i16> undef, i16 %q, i32 0
@@ -76,8 +87,12 @@ entry:
   %q7 = insertelement <8 x i16> %q6, i16 %q, i32 7
   ret <8 x i16> %q7
 }
-; CHECK: vpbroadcastw (%
+
 define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: WW16:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i16, i16* %ptr, align 4
   %q0 = insertelement <16 x i16> undef, i16 %q, i32 0
@@ -98,8 +113,12 @@ entry:
   %qf = insertelement <16 x i16> %qe, i16 %q, i32 15
   ret <16 x i16> %qf
 }
-; CHECK: vbroadcastss (%
+
 define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: D32:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i32, i32* %ptr, align 4
   %q0 = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -108,8 +127,12 @@ entry:
   %q3 = insertelement <4 x i32> %q2, i32 %q, i32 3
   ret <4 x i32> %q3
 }
-; CHECK: vbroadcastss (%
+
 define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: DD32:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i32, i32* %ptr, align 4
   %q0 = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -122,16 +145,24 @@ entry:
   %q7 = insertelement <8 x i32> %q6, i32 %q, i32 7
   ret <8 x i32> %q7
 }
-; CHECK: vpbroadcastq (%
+
 define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: Q64:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i64, i64* %ptr, align 4
   %q0 = insertelement <2 x i64> undef, i64 %q, i32 0
   %q1 = insertelement <2 x i64> %q0, i64 %q, i32 1
   ret <2 x i64> %q1
 }
-; CHECK: vbroadcastsd (%
+
 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: QQ64:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
+; CHECK-NEXT:    retq
 entry:
   %q = load i64, i64* %ptr, align 4
   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -141,9 +172,214 @@ entry:
   ret <4 x i64> %q3
 }
 
+; FIXME: Pointer adjusted broadcasts
+
+define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastb 1(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <16 x i8>, <16 x i8>* %ptr
+  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <16 x i8>, <16 x i8>* %ptr
+  %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <32 x i8> %ret
+}
+
+define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastb 1(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <32 x i8>, <32 x i8>* %ptr
+  %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <32 x i8> %ret
+}
+
+define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i16_8i16_11111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastw 2(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x i16>, <8 x i16>* %ptr
+  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x i16>, <8 x i16>* %ptr
+  %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastw 2(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <16 x i16>, <16 x i16>* %ptr
+  %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i16> %ret
+}
+
+define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i32_4i32_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i32>, <4 x i32>* %ptr
+  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_4i32_33333333:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i32>, <4 x i32>* %ptr
+  %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8i32_8i32_55555555:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x i32>, <8 x i32>* %ptr
+  %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x i32> %ret
+}
+
+define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f32_4f32_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 4(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x float>, <4 x float>* %ptr
+  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_4f32_33333333:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 12(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x float>, <4 x float>* %ptr
+  %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x float> %ret
+}
+
+define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_8f32_8f32_55555555:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss 20(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <8 x float>, <8 x float>* %ptr
+  %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <8 x float> %ret
+}
+
+define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2i64_2i64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastq 8(%rdi), %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x i64>, <2 x i64>* %ptr
+  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_2i64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x i64>, <2 x i64>* %ptr
+  %ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4i64_4i64_2222:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x i64>, <4 x i64>* %ptr
+  %ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x i64> %ret
+}
+
+define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_2f64_2f64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovaps (%rdi), %xmm0
+; CHECK-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x double>, <2 x double>* %ptr
+  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_2f64_1111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 8(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <2 x double>, <2 x double>* %ptr
+  %ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x double> %ret
+}
+
+define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: load_splat_4f64_4f64_2222:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastsd 16(%rdi), %ymm0
+; CHECK-NEXT:    retq
+entry:
+  %ld = load <4 x double>, <4 x double>* %ptr
+  %ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
+  ret <4 x double> %ret
+}
+
 ; make sure that we still don't support broadcast double into 128-bit vector
 ; this used to crash
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: I:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; CHECK-NEXT:    retq
 entry:
   %q = load double, double* %ptr, align 4
   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -151,28 +387,33 @@ entry:
   ret <2 x double> %vecinit2.i
 }
 
-; CHECK: V111
-; CHECK: vpbroadcastd
-; CHECK: ret
 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V111:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i32> %g
 }
 
-; CHECK: V113
-; CHECK: vbroadcastss
-; CHECK: ret
 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
+; CHECK-LABEL: V113:
+; CHECK:       ## BB#0: ## %entry
+; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
+; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
 entry:
   %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
   ret <8 x float> %g
 }
 
-; CHECK: _e2
-; CHECK: vbroadcastss
-; CHECK: ret
 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %vecinit.i = insertelement <4 x float> undef, float        0xbf80000000000000, i32 0
   %vecinit2.i = insertelement <4 x float> %vecinit.i, float  0xbf80000000000000, i32 1
   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
@@ -180,10 +421,11 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
   ret <4 x float> %vecinit6.i
 }
 
-; CHECK: _e4
-; CHECK-NOT: broadcast
-; CHECK: ret
 define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _e4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
+; CHECK-NEXT:    retq
   %vecinit0.i = insertelement <8 x i8> undef, i8       52, i32 0
   %vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
   %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
@@ -197,6 +439,17 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
 
 
 define void @crash() nounwind alwaysinline {
+; CHECK-LABEL: crash:
+; CHECK:       ## BB#0: ## %WGLoopsEntry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    je LBB31_1
+; CHECK-NEXT:  ## BB#2: ## %ret
+; CHECK-NEXT:    retq
+; CHECK-NEXT:    .align 4, 0x90
+; CHECK-NEXT:  LBB31_1: ## %footer349VF
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    jmp LBB31_1
 WGLoopsEntry:
   br i1 undef, label %ret, label %footer329VF
 
@@ -223,135 +476,151 @@ ret:
   ret void
 }
 
-; CHECK: _inreg0
-; CHECK: broadcastss
-; CHECK: ret
 define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg0:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovd %edi, %xmm0
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
   %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
   ret <8 x i32> %wide
 }
 
-; CHECK: _inreg1
-; CHECK: broadcastss
-; CHECK: ret
 define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %in = insertelement <8 x float> undef, float %scalar, i32 0
   %wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
   ret <8 x float> %wide
 }
 
-; CHECK: _inreg2
-; CHECK: broadcastss
-; CHECK: ret
 define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %in = insertelement <4 x float> undef, float %scalar, i32 0
   %wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %wide
 }
 
-; CHECK: _inreg3
-; CHECK: broadcastsd
-; CHECK: ret
 define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
+; CHECK-LABEL: _inreg3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %in = insertelement <4 x double> undef, double %scalar, i32 0
   %wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %wide
 }
 
-;CHECK-LABEL: _inreg8xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
 define   <8 x float> @_inreg8xfloat(<8 x float> %a) {
+; CHECK-LABEL: _inreg8xfloat:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
   ret <8 x float> %b
 }
 
-;CHECK-LABEL: _inreg4xfloat:
-;CHECK: vbroadcastss
-;CHECK: ret
 define   <4 x float> @_inreg4xfloat(<4 x float> %a) {
+; CHECK-LABEL: _inreg4xfloat:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
   ret <4 x float> %b
 }
 
-;CHECK-LABEL: _inreg16xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
 define   <16 x i16> @_inreg16xi16(<16 x i16> %a) {
+; CHECK-LABEL: _inreg16xi16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
   ret <16 x i16> %b
 }
 
-;CHECK-LABEL: _inreg8xi16:
-;CHECK: vpbroadcastw
-;CHECK: ret
 define   <8 x i16> @_inreg8xi16(<8 x i16> %a) {
+; CHECK-LABEL: _inreg8xi16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
   ret <8 x i16> %b
 }
 
-
-;CHECK-LABEL: _inreg4xi64:
-;CHECK: vbroadcastsd
-;CHECK: ret
 define   <4 x i64> @_inreg4xi64(<4 x i64> %a) {
+; CHECK-LABEL: _inreg4xi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
   ret <4 x i64> %b
 }
 
-;CHECK-LABEL: _inreg2xi64:
-;CHECK: vpbroadcastq
-;CHECK: ret
 define   <2 x i64> @_inreg2xi64(<2 x i64> %a) {
+; CHECK-LABEL: _inreg2xi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %b
 }
 
-;CHECK-LABEL: _inreg4xdouble:
-;CHECK: vbroadcastsd
-;CHECK: ret
 define   <4 x double> @_inreg4xdouble(<4 x double> %a) {
+; CHECK-LABEL: _inreg4xdouble:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %b
 }
 
-;CHECK-LABEL: _inreg2xdouble:
-;CHECK: vmovddup
-;CHECK: ret
 define   <2 x double> @_inreg2xdouble(<2 x double> %a) {
+; CHECK-LABEL: _inreg2xdouble:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; CHECK-NEXT:    retq
   %b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
   ret <2 x double> %b
 }
 
-;CHECK-LABEL: _inreg8xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
 define   <8 x i32> @_inreg8xi32(<8 x i32> %a) {
+; CHECK-LABEL: _inreg8xi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
   ret <8 x i32> %b
 }
 
-;CHECK-LABEL: _inreg4xi32:
-;CHECK: vbroadcastss
-;CHECK: ret
 define   <4 x i32> @_inreg4xi32(<4 x i32> %a) {
+; CHECK-LABEL: _inreg4xi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %b
 }
 
-;CHECK-LABEL: _inreg32xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
 define   <32 x i8> @_inreg32xi8(<32 x i8> %a) {
+; CHECK-LABEL: _inreg32xi8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
   ret <32 x i8> %b
 }
 
-;CHECK-LABEL: _inreg16xi8:
-;CHECK: vpbroadcastb
-;CHECK: ret
 define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
+; CHECK-LABEL: _inreg16xi8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
   ret <16 x i8> %b
 }
@@ -360,11 +629,11 @@ define   <16 x i8> @_inreg16xi8(<16 x i8> %a) {
 ; formed from a concat_vectors (via the shufflevector) of two BUILD_VECTORs
 ; (via the insertelements).
 
-; CHECK-LABEL: splat_concat1
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
 define <8 x float> @splat_concat1(float %f) {
+; CHECK-LABEL: splat_concat1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %1 = insertelement <4 x float> undef, float %f, i32 0
   %2 = insertelement <4 x float> %1, float %f, i32 1
   %3 = insertelement <4 x float> %2, float %f, i32 2
@@ -373,11 +642,11 @@ define <8 x float> @splat_concat1(float %f) {
   ret <8 x float> %5
 }
 
-; CHECK-LABEL: splat_concat2
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastss
-; CHECK-NEXT: ret
 define <8 x float> @splat_concat2(float %f) {
+; CHECK-LABEL: splat_concat2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %1 = insertelement <4 x float> undef, float %f, i32 0
   %2 = insertelement <4 x float> %1, float %f, i32 1
   %3 = insertelement <4 x float> %2, float %f, i32 2
@@ -390,22 +659,22 @@ define <8 x float> @splat_concat2(float %f) {
   ret <8 x float> %9
 }
 
-; CHECK-LABEL: splat_concat3
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
 define <4 x double> @splat_concat3(double %d) {
+; CHECK-LABEL: splat_concat3:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %1 = insertelement <2 x double> undef, double %d, i32 0
   %2 = insertelement <2 x double> %1, double %d, i32 1
   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   ret <4 x double> %3
 }
 
-; CHECK-LABEL: splat_concat4
-; CHECK-NOT: vinsertf128
-; CHECK: vbroadcastsd
-; CHECK-NEXT: ret
 define <4 x double> @splat_concat4(double %d) {
+; CHECK-LABEL: splat_concat4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT:    retq
   %1 = insertelement <2 x double> undef, double %d, i32 0
   %2 = insertelement <2 x double> %1, double %d, i32 1
   %3 = insertelement <2 x double> undef, double %d, i32 0
diff --git a/test/CodeGen/X86/avx512-arith.ll b/test/CodeGen/X86/avx512-arith.ll
index 1ecd1007905a..9220e4f269cd 100644
--- a/test/CodeGen/X86/avx512-arith.ll
+++ b/test/CodeGen/X86/avx512-arith.ll
@@ -1,4 +1,9 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq | FileCheck --check-prefix=CHECK --check-prefix=AVX512DQ %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512dq -mattr=+avx512bw -mattr=+avx512vl| FileCheck --check-prefix=CHECK --check-prefix=SKX %s
 
 define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
 ; CHECK-LABEL: addpd512:
@@ -83,18 +88,54 @@ entry:
 }
 
 define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
-; CHECK-LABEL: imulq512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
-; CHECK-NEXT:    vpsrlq $32, %zmm0, %zmm3
-; CHECK-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
-; CHECK-NEXT:    vpsllq $32, %zmm3, %zmm3
-; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
-; CHECK-NEXT:    vpsrlq $32, %zmm1, %zmm1
-; CHECK-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT:    vpsllq $32, %zmm0, %zmm0
-; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
-; CHECK-NEXT:    retq
+; AVX512F-LABEL: imulq512:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT:    vpsrlq $32, %zmm0, %zmm3
+; AVX512F-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512F-NEXT:    vpsllq $32, %zmm3, %zmm3
+; AVX512F-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
+; AVX512F-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; AVX512F-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    vpsllq $32, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: imulq512:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512VL-NEXT:    vpsrlq $32, %zmm0, %zmm3
+; AVX512VL-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512VL-NEXT:    vpsllq $32, %zmm3, %zmm3
+; AVX512VL-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
+; AVX512VL-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; AVX512VL-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512VL-NEXT:    vpsllq $32, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: imulq512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm2
+; AVX512BW-NEXT:    vpsrlq $32, %zmm0, %zmm3
+; AVX512BW-NEXT:    vpmuludq %zmm3, %zmm1, %zmm3
+; AVX512BW-NEXT:    vpsllq $32, %zmm3, %zmm3
+; AVX512BW-NEXT:    vpaddq %zmm3, %zmm2, %zmm2
+; AVX512BW-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmuludq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsllq $32, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: imulq512:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: imulq512:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmullq %zmm0, %zmm1, %zmm0
+; SKX-NEXT:    retq
   %z = mul <8 x i64>%x, %y
   ret <8 x i64>%z
 }
@@ -463,10 +504,13 @@ entry:
   ret <8 x i64>%d
 }
 
-; CHECK-LABEL: test_mask_vaddps
-; CHECK: vaddps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vaddps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -475,10 +519,13 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vmulps
-; CHECK: vmulps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vmulps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vmulps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -487,10 +534,13 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vminps
-; CHECK: vminps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vminps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vminps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -500,10 +550,41 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vminpd
-; CHECK: vminpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
+; AVX512F-LABEL: test_mask_vminpd:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512F-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_mask_vminpd:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; AVX512VL-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: test_mask_vminpd:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512BW-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: test_mask_vminpd:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512DQ-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: test_mask_vminpd:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; SKX-NEXT:    vminpd %zmm2, %zmm1, %zmm0 {%k1}
+; SKX-NEXT:    retq
                                      <8 x double> %j, <8 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
@@ -513,10 +594,13 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_mask_vmaxps
-; CHECK: vmaxps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vmaxps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vmaxps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -526,10 +610,41 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vmaxpd
-; CHECK: vmaxpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
+; AVX512F-LABEL: test_mask_vmaxpd:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512F-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512F-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_mask_vmaxpd:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; AVX512VL-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: test_mask_vmaxpd:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512BW-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512BW-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: test_mask_vmaxpd:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; AVX512DQ-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: test_mask_vmaxpd:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; SKX-NEXT:    vpcmpneqd %ymm4, %ymm3, %k1
+; SKX-NEXT:    vmaxpd %zmm2, %zmm1, %zmm0 {%k1}
+; SKX-NEXT:    retq
                                      <8 x double> %j, <8 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
@@ -539,10 +654,13 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_mask_vsubps
-; CHECK: vsubps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vsubps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vsubps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -551,10 +669,13 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vdivps
-; CHECK: vdivps {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
+; CHECK-LABEL: test_mask_vdivps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqd %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vdivps %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <16 x float> %j, <16 x i32> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
@@ -563,10 +684,13 @@ define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i,
   ret <16 x float> %r
 }
 
-; CHECK-LABEL: test_mask_vaddpd
-; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}}}
-; CHECK: ret
 define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm4, %zmm4, %zmm4
+; CHECK-NEXT:    vpcmpneqq %zmm4, %zmm3, %k1
+; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <8 x double> %j, <8 x i64> %mask1)
                                      nounwind readnone {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
@@ -575,10 +699,13 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_maskz_vaddpd
-; CHECK: vaddpd {{%zmm[0-9]{1,2}, %zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}}}
-; CHECK: ret
 define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
+; CHECK-LABEL: test_maskz_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
                                       <8 x i64> %mask1) nounwind readnone {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %x = fadd <8 x double> %i, %j
@@ -586,10 +713,13 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_mask_fold_vaddpd
-; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]}.*}}
-; CHECK: ret
 define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_fold_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; CHECK-NEXT:    vpcmpneqq %zmm3, %zmm2, %k1
+; CHECK-NEXT:    vaddpd (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT:    retq
                                      <8 x double>* %j,  <8 x i64> %mask1)
                                      nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
@@ -599,10 +729,13 @@ define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_maskz_fold_vaddpd
-; CHECK: vaddpd (%rdi), {{.*%zmm[0-9]{1,2}, %zmm[0-9]{1,2} {%k[1-7]} {z}.*}}
-; CHECK: ret
 define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
+; CHECK-LABEL: test_maskz_fold_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
                                       <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %tmp = load <8 x double>, <8 x double>* %j, align 8
@@ -611,10 +744,11 @@ define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j,
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*}}
-; CHECK: ret
 define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
+; CHECK-LABEL: test_broadcast_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %tmp = load double, double* %j
   %b = insertelement <8 x double> undef, double %tmp, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef,
@@ -623,10 +757,14 @@ define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind
   ret <8 x double> %x
 }
 
-; CHECK-LABEL: test_mask_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]}.*}}
-; CHECK: ret
 define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i,
+; CHECK-LABEL: test_mask_broadcast_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; CHECK-NEXT:    vpcmpneqq %zmm0, %zmm2, %k1
+; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm1, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
                                       double* %j, <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %tmp = load double, double* %j
@@ -638,10 +776,13 @@ define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double>
   ret <8 x double> %r
 }
 
-; CHECK-LABEL: test_maskz_broadcast_vaddpd
-; CHECK: vaddpd (%rdi){1to8}, %zmm{{.*{%k[1-7]} {z}.*}}
-; CHECK: ret
 define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
+; CHECK-LABEL: test_maskz_broadcast_vaddpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; CHECK-NEXT:    vpcmpneqq %zmm2, %zmm1, %k1
+; CHECK-NEXT:    vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
                                        <8 x i64> %mask1) nounwind {
   %mask = icmp ne <8 x i64> %mask1, zeroinitializer
   %tmp = load double, double* %j
@@ -652,3 +793,104 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
   %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
   ret <8 x double> %r
 }
+
+define <16 x float>  @test_fxor(<16 x float> %a) {
+; AVX512F-LABEL: test_fxor:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: test_fxor:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: test_fxor:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpxord {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: test_fxor:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: test_fxor:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vxorps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    retq
+
+  %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  ret <16 x float>%res
+}
+
+define <8 x float>  @test_fxor_8f32(<8 x float> %a) {
+; CHECK-LABEL: test_fxor_8f32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  ret <8 x float>%res
+}
+
+define <8 x double> @fabs_v8f64(<8 x double> %p)
+; AVX512F-LABEL: fabs_v8f64:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fabs_v8f64:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: fabs_v8f64:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: fabs_v8f64:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: fabs_v8f64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    retq
+{
+  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+  ret <8 x double> %t
+}
+declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
+
+define <16 x float> @fabs_v16f32(<16 x float> %p)
+; AVX512F-LABEL: fabs_v16f32:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: fabs_v16f32:
+; AVX512VL:       ## BB#0:
+; AVX512VL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512VL-NEXT:    retq
+;
+; AVX512BW-LABEL: fabs_v16f32:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: fabs_v16f32:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512DQ-NEXT:    retq
+;
+; SKX-LABEL: fabs_v16f32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    retq
+{
+  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
+  ret <16 x float> %t
+}
+declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
diff --git a/test/CodeGen/X86/avx512-bugfix-25270.ll b/test/CodeGen/X86/avx512-bugfix-25270.ll
new file mode 100644
index 000000000000..d024475274b4
--- /dev/null
+++ b/test/CodeGen/X86/avx512-bugfix-25270.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+declare void @Print__512(<16 x i32>) #0
+
+define void @bar__512(<16 x i32>* %var) #0 {
+; CHECK-LABEL: bar__512:
+; CHECK:       ## BB#0: ## %allocas
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    subq $112, %rsp
+; CHECK-NEXT:    movq %rdi, %rbx
+; CHECK-NEXT:    vmovdqu32 (%rbx), %zmm0
+; CHECK-NEXT:    vmovups %zmm0, (%rsp) ## 64-byte Spill
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1
+; CHECK-NEXT:    vmovdqa32 %zmm1, (%rbx)
+; CHECK-NEXT:    callq _Print__512
+; CHECK-NEXT:    vmovups (%rsp), %zmm0 ## 64-byte Reload
+; CHECK-NEXT:    callq _Print__512
+; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0
+; CHECK-NEXT:    vmovdqa32 %zmm0, (%rbx)
+; CHECK-NEXT:    addq $112, %rsp
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    retq
+allocas:
+  %var_load_load = load <16 x i32>, <16 x i32>* %var, align 1
+  store <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>, <16 x i32>* %var, align 64
+  call void @Print__512(<16 x i32> %var_load_load)
+ ; %var_load_load value should be reloaded
+  call void @Print__512(<16 x i32> %var_load_load)
+  store <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32>* %var, align 64
+  ret void
+}
+
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/avx512-build-vector.ll b/test/CodeGen/X86/avx512-build-vector.ll
index e5373c575c1a..0f89aa71162e 100644
--- a/test/CodeGen/X86/avx512-build-vector.ll
+++ b/test/CodeGen/X86/avx512-build-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
 define <16 x i32> @test2(<16 x i32> %x) {
diff --git a/test/CodeGen/X86/avx512-calling-conv.ll b/test/CodeGen/X86/avx512-calling-conv.ll
index edb6bef1a4ac..a61aeba5aff9 100644
--- a/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/test/CodeGen/X86/avx512-calling-conv.ll
@@ -1,55 +1,167 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
 
-; KNL-LABEL: test1
-; KNL: vxorps
 define <16 x i1> @test1() {
+; ALL_X64-LABEL: test1:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test1:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; KNL_X32-NEXT:    retl
   ret <16 x i1> zeroinitializer
 }
 
-; SKX-LABEL: test2
-; SKX: vpmovb2m
-; SKX: vpmovb2m
-; SKX: kandw
-; SKX: vpmovm2b
-; KNL-LABEL: test2
-; KNL: vpmovsxbd
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: vpmovdb
 define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
+; KNL-LABEL: test2:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1 {%k1}
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test2:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k0
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    kandw %k0, %k1, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test2:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_X32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_X32-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL_X32-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL_X32-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL_X32-NEXT:    vptestmd %zmm1, %zmm1, %k1 {%k1}
+; KNL_X32-NEXT:    vpbroadcastd LCPI1_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL_X32-NEXT:    retl
   %c = and <16 x i1>%a, %b
   ret <16 x i1> %c
 }
 
-; SKX-LABEL: test3
-; SKX: vpmovw2m
-; SKX: vpmovw2m
-; SKX: kandb
-; SKX: vpmovm2w
 define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
+; KNL-LABEL: test3:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1 {%k1}
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqw %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test3:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT:    vpmovw2m %xmm1, %k0
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    kandb %k0, %k1, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test3:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    vpmovsxwq %xmm1, %zmm1
+; KNL_X32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [63,0,63,0,63,0,63,0,63,0,63,0,63,0,63,0]
+; KNL_X32-NEXT:    vpsllvq %zmm2, %zmm1, %zmm1
+; KNL_X32-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL_X32-NEXT:    vpsllvq %zmm2, %zmm0, %zmm0
+; KNL_X32-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL_X32-NEXT:    vptestmq %zmm1, %zmm1, %k1 {%k1}
+; KNL_X32-NEXT:    vpbroadcastd LCPI2_1, %zmm0
+; KNL_X32-NEXT:    vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpmovqw %zmm0, %xmm0
+; KNL_X32-NEXT:    retl
   %c = and <8 x i1>%a, %b
   ret <8 x i1> %c
 }
 
-; SKX-LABEL: test4
-; SKX: vpmovd2m
-; SKX: vpmovd2m
-; SKX: kandw
-; SKX: vpmovm2d
 define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
+; KNL-LABEL: test4:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test4:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    kandw %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test4:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT:    retl
   %c = and <4 x i1>%a, %b
   ret <4 x i1> %c
 }
 
-; SKX-LABEL: test5
-; SKX: vpcmpgtd
-; SKX: vpmovm2w
-; SKX: call
-; SKX: vpmovzxwd
 declare <8 x i1> @func8xi1(<8 x i1> %a)
+
 define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
+; KNL-LABEL: test5:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:  Ltmp0:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    callq _func8xi1
+; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
+; KNL-NEXT:    vpsrad $31, %ymm0, %ymm0
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test5:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:  Ltmp0:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    callq _func8xi1
+; SKX-NEXT:    vpmovzxwd %xmm0, %ymm0
+; SKX-NEXT:    vpslld $31, %ymm0, %ymm0
+; SKX-NEXT:    vpsrad $31, %ymm0, %ymm0
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test5:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    subl $12, %esp
+; KNL_X32-NEXT:  Ltmp0:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
+; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL_X32-NEXT:    calll L_func8xi1$stub
+; KNL_X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL_X32-NEXT:    vpslld $31, %ymm0, %ymm0
+; KNL_X32-NEXT:    vpsrad $31, %ymm0, %ymm0
+; KNL_X32-NEXT:    addl $12, %esp
+; KNL_X32-NEXT:    retl
   %cmpRes = icmp sgt <8 x i32>%a, %b
   %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
   %res = sext <8 x i1>%resi to <8 x i32>
@@ -58,14 +170,50 @@ define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
 
 declare <16 x i1> @func16xi1(<16 x i1> %a)
 
-; KNL-LABEL: test6
-; KNL: vpbroadcastd
-; KNL: vpmovdb
-; KNL: call
-; KNL: vpmovzxbd
-; KNL: vpslld  $31, %zmm
-; KNL: vpsrad  $31, %zmm
 define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
+; KNL-LABEL: test6:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:  Ltmp1:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    callq _func16xi1
+; KNL-NEXT:    vpmovzxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vpsrad $31, %zmm0, %zmm0
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test6:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:  Ltmp1:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    callq _func16xi1
+; SKX-NEXT:    vpmovzxbd %xmm0, %zmm0
+; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
+; SKX-NEXT:    vpsrad $31, %zmm0, %zmm0
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test6:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    subl $12, %esp
+; KNL_X32-NEXT:  Ltmp1:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
+; KNL_X32-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
+; KNL_X32-NEXT:    vpbroadcastd LCPI5_0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL_X32-NEXT:    calll L_func16xi1$stub
+; KNL_X32-NEXT:    vpmovzxbd %xmm0, %zmm0
+; KNL_X32-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL_X32-NEXT:    vpsrad $31, %zmm0, %zmm0
+; KNL_X32-NEXT:    addl $12, %esp
+; KNL_X32-NEXT:    retl
   %cmpRes = icmp sgt <16 x i32>%a, %b
   %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
   %res = sext <16 x i1>%resi to <16 x i32>
@@ -73,82 +221,265 @@ define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
 }
 
 declare <4 x i1> @func4xi1(<4 x i1> %a)
-; SKX-LABEL: test7
-; SKX: vpmovm2d
-; SKX: call
-; SKX: vpslld  $31, %xmm
-; SKX: vpsrad  $31, %xmm
 
 define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
+; KNL-LABEL: test7:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:  Ltmp2:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    callq _func4xi1
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test7:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:  Ltmp2:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    callq _func4xi1
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpsrad $31, %xmm0, %xmm0
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test7:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    subl $12, %esp
+; KNL_X32-NEXT:  Ltmp2:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
+; KNL_X32-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL_X32-NEXT:    calll L_func4xi1$stub
+; KNL_X32-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL_X32-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL_X32-NEXT:    addl $12, %esp
+; KNL_X32-NEXT:    retl
   %cmpRes = icmp sgt <4 x i32>%a, %b
   %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
   %res = sext <4 x i1>%resi to <4 x i32>
   ret <4 x i32> %res
 }
 
-; SKX-LABEL: test7a
-; SKX: call
-; SKX: vpmovw2m  %xmm0, %k0
-; SKX: kandb
 define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
+; KNL-LABEL: test7a:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rax
+; KNL-NEXT:  Ltmp3:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    callq _func8xi1
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    movb $85, %al
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1 {%k1}
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqw %zmm0, %xmm0
+; KNL-NEXT:    popq %rax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test7a:
+; SKX:       ## BB#0:
+; SKX-NEXT:    pushq %rax
+; SKX-NEXT:  Ltmp3:
+; SKX-NEXT:    .cfi_def_cfa_offset 16
+; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    callq _func8xi1
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k0
+; SKX-NEXT:    movb $85, %al
+; SKX-NEXT:    kmovb %eax, %k1
+; SKX-NEXT:    kandb %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    popq %rax
+; SKX-NEXT:    retq
+;
+; KNL_X32-LABEL: test7a:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    subl $12, %esp
+; KNL_X32-NEXT:  Ltmp3:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
+; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
+; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL_X32-NEXT:    calll L_func8xi1$stub
+; KNL_X32-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL_X32-NEXT:    vpsllvq LCPI7_0, %zmm0, %zmm0
+; KNL_X32-NEXT:    movb $85, %al
+; KNL_X32-NEXT:    movzbl %al, %eax
+; KNL_X32-NEXT:    kmovw %eax, %k1
+; KNL_X32-NEXT:    vptestmq %zmm0, %zmm0, %k1 {%k1}
+; KNL_X32-NEXT:    vpbroadcastd LCPI7_1, %zmm0
+; KNL_X32-NEXT:    vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; KNL_X32-NEXT:    vpmovqw %zmm0, %xmm0
+; KNL_X32-NEXT:    addl $12, %esp
+; KNL_X32-NEXT:    retl
   %cmpRes = icmp sgt <8 x i32>%a, %b
   %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
   %res = and <8 x i1>%resi,  <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   ret <8 x i1> %res
 }
 
-
-; KNL_X32-LABEL: test8
-; KNL_X32: testb $1, 4(%esp)
-; KNL_X32:jne
-
-; KNL-LABEL: test8
-; KNL: testb   $1, %dil
-; KNL:jne
-
 define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
+; ALL_X64-LABEL: test8:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    testb $1, %dil
+; ALL_X64-NEXT:    jne LBB8_2
+; ALL_X64-NEXT:  ## BB#1:
+; ALL_X64-NEXT:    vmovaps %zmm1, %zmm0
+; ALL_X64-NEXT:  LBB8_2:
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test8:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    jne LBB8_2
+; KNL_X32-NEXT:  ## BB#1:
+; KNL_X32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_X32-NEXT:  LBB8_2:
+; KNL_X32-NEXT:    retl
   %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
   ret <16 x i8> %res
 }
 
-; KNL-LABEL: test9
-; KNL: vucomisd
-; KNL: setb
 define i1 @test9(double %a, double %b) {
+; ALL_X64-LABEL: test9:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    vucomisd %xmm0, %xmm1
+; ALL_X64-NEXT:    setb %al
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test9:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0
+; KNL_X32-NEXT:    vucomisd {{[0-9]+}}(%esp), %xmm0
+; KNL_X32-NEXT:    setb %al
+; KNL_X32-NEXT:    retl
   %c = fcmp ugt double %a, %b
   ret i1 %c
 }
 
-; KNL_X32-LABEL: test10
-; KNL_X32: testb $1, 12(%esp)
-; KNL_X32: cmovnel
-
-; KNL-LABEL: test10
-; KNL: testb   $1, %dl
-; KNL: cmovel
 define i32 @test10(i32 %a, i32 %b, i1 %cond) {
+; ALL_X64-LABEL: test10:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    testb $1, %dl
+; ALL_X64-NEXT:    cmovel %esi, %edi
+; ALL_X64-NEXT:    movl %edi, %eax
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test10:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; KNL_X32-NEXT:    cmovnel %eax, %ecx
+; KNL_X32-NEXT:    movl (%ecx), %eax
+; KNL_X32-NEXT:    retl
   %c = select i1 %cond, i32 %a, i32 %b
   ret i32 %c
 }
 
-; KNL-LABEL: test11
-; KNL: cmp
-; KNL: setg
 define i1 @test11(i32 %a, i32 %b) {
+; ALL_X64-LABEL: test11:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    cmpl %esi, %edi
+; ALL_X64-NEXT:    setg %al
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test11:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    setg %al
+; KNL_X32-NEXT:    retl
   %c = icmp sgt i32 %a, %b
   ret i1 %c
 }
 
-; KNL-LABEL: test12
-; KNL: callq _test11
-;; return value in %al
-; KNL: movzbl	%al, %ebx
-; KNL: callq _test10
-; KNL: testb   $1, %bl
-
 define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
+; ALL_X64-LABEL: test12:
+; ALL_X64:       ## BB#0:
+; ALL_X64-NEXT:    pushq %rbp
+; ALL_X64-NEXT:  Ltmp4:
+; ALL_X64-NEXT:    .cfi_def_cfa_offset 16
+; ALL_X64-NEXT:    pushq %r14
+; ALL_X64-NEXT:  Ltmp5:
+; ALL_X64-NEXT:    .cfi_def_cfa_offset 24
+; ALL_X64-NEXT:    pushq %rbx
+; ALL_X64-NEXT:  Ltmp6:
+; ALL_X64-NEXT:    .cfi_def_cfa_offset 32
+; ALL_X64-NEXT:  Ltmp7:
+; ALL_X64-NEXT:    .cfi_offset %rbx, -32
+; ALL_X64-NEXT:  Ltmp8:
+; ALL_X64-NEXT:    .cfi_offset %r14, -24
+; ALL_X64-NEXT:  Ltmp9:
+; ALL_X64-NEXT:    .cfi_offset %rbp, -16
+; ALL_X64-NEXT:    movl %esi, %r14d
+; ALL_X64-NEXT:    movl %edi, %ebp
+; ALL_X64-NEXT:    movl %edx, %esi
+; ALL_X64-NEXT:    callq _test11
+; ALL_X64-NEXT:    movzbl %al, %ebx
+; ALL_X64-NEXT:    movl %ebp, %edi
+; ALL_X64-NEXT:    movl %r14d, %esi
+; ALL_X64-NEXT:    movl %ebx, %edx
+; ALL_X64-NEXT:    callq _test10
+; ALL_X64-NEXT:    xorl %ecx, %ecx
+; ALL_X64-NEXT:    testb $1, %bl
+; ALL_X64-NEXT:    cmovel %ecx, %eax
+; ALL_X64-NEXT:    popq %rbx
+; ALL_X64-NEXT:    popq %r14
+; ALL_X64-NEXT:    popq %rbp
+; ALL_X64-NEXT:    retq
+;
+; KNL_X32-LABEL: test12:
+; KNL_X32:       ## BB#0:
+; KNL_X32-NEXT:    pushl %ebx
+; KNL_X32-NEXT:  Ltmp4:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
+; KNL_X32-NEXT:    pushl %edi
+; KNL_X32-NEXT:  Ltmp5:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 12
+; KNL_X32-NEXT:    pushl %esi
+; KNL_X32-NEXT:  Ltmp6:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
+; KNL_X32-NEXT:    subl $16, %esp
+; KNL_X32-NEXT:  Ltmp7:
+; KNL_X32-NEXT:    .cfi_def_cfa_offset 32
+; KNL_X32-NEXT:  Ltmp8:
+; KNL_X32-NEXT:    .cfi_offset %esi, -16
+; KNL_X32-NEXT:  Ltmp9:
+; KNL_X32-NEXT:    .cfi_offset %edi, -12
+; KNL_X32-NEXT:  Ltmp10:
+; KNL_X32-NEXT:    .cfi_offset %ebx, -8
+; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    movl %edi, (%esp)
+; KNL_X32-NEXT:    calll _test11
+; KNL_X32-NEXT:    movb %al, %bl
+; KNL_X32-NEXT:    movzbl %bl, %eax
+; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
+; KNL_X32-NEXT:    movl %edi, (%esp)
+; KNL_X32-NEXT:    calll _test10
+; KNL_X32-NEXT:    xorl %ecx, %ecx
+; KNL_X32-NEXT:    testb $1, %bl
+; KNL_X32-NEXT:    cmovel %ecx, %eax
+; KNL_X32-NEXT:    addl $16, %esp
+; KNL_X32-NEXT:    popl %esi
+; KNL_X32-NEXT:    popl %edi
+; KNL_X32-NEXT:    popl %ebx
+; KNL_X32-NEXT:    retl
   %cond = call i1 @test11(i32 %a1, i32 %b1)
   %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
   %res1 = select i1 %cond, i32 %res, i32 0
   ret i32 %res1
-}
\ No newline at end of file
+}
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index a211bcd38c9c..586a29545014 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s
 
 ; CHECK-LABEL: sitof32
 ; CHECK: vcvtdq2ps %zmm
@@ -8,6 +8,70 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind {
   ret <16 x float> %b
 }
 
+; CHECK-LABEL: sltof864
+; CHECK: vcvtqq2pd
+define <8 x double> @sltof864(<8 x i64> %a) {
+  %b = sitofp <8 x i64> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+; CHECK-LABEL: sltof464
+; CHECK: vcvtqq2pd
+define <4 x double> @sltof464(<4 x i64> %a) {
+  %b = sitofp <4 x i64> %a to <4 x double>
+  ret <4 x double> %b
+}
+
+; CHECK-LABEL: sltof2f32
+; CHECK: vcvtqq2ps
+define <2 x float> @sltof2f32(<2 x i64> %a) {
+  %b = sitofp <2 x i64> %a to <2 x float>
+  ret <2 x float>%b
+}
+
+; CHECK-LABEL: sltof4f32_mem
+; CHECK: vcvtqq2psy (%rdi)
+define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
+  %a1 = load <4 x i64>, <4 x i64>* %a, align 8
+  %b = sitofp <4 x i64> %a1 to <4 x float>
+  ret <4 x float>%b
+}
+
+; CHECK-LABEL: f64tosl
+; CHECK: vcvttpd2qq
+define <4 x i64> @f64tosl(<4 x double> %a) {
+  %b = fptosi <4 x double> %a to <4 x i64>
+  ret <4 x i64> %b
+}
+
+; CHECK-LABEL: f32tosl
+; CHECK: vcvttps2qq
+define <4 x i64> @f32tosl(<4 x float> %a) {
+  %b = fptosi <4 x float> %a to <4 x i64>
+  ret <4 x i64> %b
+}
+
+; CHECK-LABEL: sltof432
+; CHECK: vcvtqq2ps
+define <4 x float> @sltof432(<4 x i64> %a) {
+  %b = sitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+; CHECK-LABEL: ultof432
+; CHECK: vcvtuqq2ps
+define <4 x float> @ultof432(<4 x i64> %a) {
+  %b = uitofp <4 x i64> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+; CHECK-LABEL: ultof64
+; CHECK: vcvtuqq2pd
+define <8 x double> @ultof64(<8 x i64> %a) {
+  %b = uitofp <8 x i64> %a to <8 x double>
+  ret <8 x double> %b
+}
+
 ; CHECK-LABEL: fptosi00
 ; CHECK: vcvttps2dq %zmm
 ; CHECK: ret
@@ -64,16 +128,39 @@ define <8 x i32> @fptosi01(<8 x double> %a) {
   ret <8 x i32> %b
 }
 
+; CHECK-LABEL: fptosi03
+; CHECK: vcvttpd2dq %ymm
+; CHECK: ret
+define <4 x i32> @fptosi03(<4 x double> %a) {
+  %b = fptosi <4 x double> %a to <4 x i32>
+  ret <4 x i32> %b
+}
+
 ; CHECK-LABEL: fptrunc00
 ; CHECK: vcvtpd2ps %zmm
 ; CHECK-NEXT: vcvtpd2ps %zmm
-; CHECK-NEXT: vinsertf64x4    $1
+; CHECK-NEXT: vinsertf
 ; CHECK: ret
 define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
   %a = fptrunc <16 x double> %b to <16 x float>
   ret <16 x float> %a
 }
 
+; CHECK-LABEL: fptrunc01
+; CHECK: vcvtpd2ps %ymm
+define <4 x float> @fptrunc01(<4 x double> %b) {
+  %a = fptrunc <4 x double> %b to <4 x float>
+  ret <4 x float> %a
+}
+
+; CHECK-LABEL: fptrunc02
+; CHECK: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
+define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
+  %a = fptrunc <4 x double> %b to <4 x float>
+  %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
+  ret <4 x float> %c
+}
+
 ; CHECK-LABEL: fpext00
 ; CHECK: vcvtps2pd %ymm0, %zmm0
 ; CHECK: ret
@@ -82,6 +169,16 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind {
   ret <8 x double> %a
 }
 
+; CHECK-LABEL: fpext01
+; CHECK: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
+; CHECK: ret
+define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
+  %a = fpext <4 x float> %b to <4 x double>
+  %mask = fcmp ogt <4 x double>%a1, %b1
+  %c = select <4 x i1>%mask,  <4 x double>%a, <4 x double>zeroinitializer
+  ret <4 x double> %c
+}
+
 ; CHECK-LABEL: funcA
 ; CHECK: vcvtsi2sdq (%rdi){{.*}} encoding: [0x62
 ; CHECK: ret
@@ -182,12 +279,14 @@ define i32 @float_to_int(float %x) {
    ret i32 %res
 }
 
-; CHECK-LABEL: uitof64
-; CHECK: vcvtudq2pd
-; CHECK: vextracti64x4
-; CHECK: vcvtudq2pd
-; CHECK: ret
 define <16 x double> @uitof64(<16 x i32> %a) nounwind {
+; CHECK-LABEL: uitof64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm2
+; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq 
   %b = uitofp <16 x i32> %a to <16 x double>
   ret <16 x double> %b
 }
@@ -257,7 +356,7 @@ define double @uitofp03(i32 %a) nounwind {
 }
 
 ; CHECK-LABEL: @sitofp_16i1_float
-; CHECK: vpbroadcastd
+; CHECK: vpmovm2d
 ; CHECK: vcvtdq2ps
 define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
   %mask = icmp slt <16 x i32> %a, zeroinitializer
@@ -301,7 +400,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
 
 
 ; CHECK-LABEL: @sitofp_8i1_double
-; CHECK: vpbroadcastq
+; CHECK: vpmovm2d
 ; CHECK: vcvtdq2pd
 define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
   %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
@@ -310,7 +409,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
 }
 
 ; CHECK-LABEL: @uitofp_16i8
-; CHECK:  vpmovzxbd  
+; CHECK:  vpmovzxbd
 ; CHECK: vcvtudq2ps
 define <16 x float> @uitofp_16i8(<16 x i8>%a) {
   %b = uitofp <16 x i8> %a to <16 x float>
diff --git a/test/CodeGen/X86/avx512-ext.ll b/test/CodeGen/X86/avx512-ext.ll
new file mode 100644
index 000000000000..bc1509684475
--- /dev/null
+++ b/test/CodeGen/X86/avx512-ext.ll
@@ -0,0 +1,1835 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
+define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbw (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i16>
+  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+  ret <8 x i16> %ret
+}
+
+define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbw (%rdi), %xmm1
+; KNL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; KNL-NEXT:    vpsraw $15, %xmm0, %xmm0
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbw (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i16>
+  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+  ret <8 x i16> %ret
+}
+
+
+define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8mem_to_16x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x8mem_to_16x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbw (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = zext <16 x i8> %a to <16 x i16>
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8mem_to_16x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpmovsxbw (%rdi), %ymm1
+; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16x8mem_to_16x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbw (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = sext <16 x i8> %a to <16 x i16>
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x8_to_16x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovzxbw %xmm0, %ymm0
+; SKX-NEXT:    retq
+  %x   = zext <16 x i8> %a to <16 x i16>
+  ret <16 x i16> %x
+}
+
+define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x16_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x8_to_16x16_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <16 x i8> %a to <16 x i16>
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %ret
+}
+
+define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+; ALL-LABEL: sext_16x8_to_16x16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbw %xmm0, %ymm0
+; ALL-NEXT:    retq
+  %x   = sext <16 x i8> %a to <16 x i16>
+  ret <16 x i16> %x
+}
+
+define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8_to_16x16_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
+; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16x8_to_16x16_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k1
+; SKX-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = sext <16 x i8> %a to <16 x i16>
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+  ret <16 x i16> %ret
+}
+
+define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_32x8mem_to_32x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
+; KNL-NEXT:    vmovaps %zmm2, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_32x8mem_to_32x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT:    vpmovb2m %ymm0, %k1
+; SKX-NEXT:    vpmovzxbw (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <32 x i8>,<32 x i8> *%i,align 1
+  %x   = zext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %ret
+}
+
+define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_32x8mem_to_32x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbw 16(%rdi), %ymm1
+; KNL-NEXT:    vpmovsxbw (%rdi), %ymm2
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpand %ymm2, %ymm3, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm1
+; KNL-NEXT:    vmovaps %zmm2, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_32x8mem_to_32x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT:    vpmovb2m %ymm0, %k1
+; SKX-NEXT:    vpmovsxbw (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <32 x i8>,<32 x i8> *%i,align 1
+  %x   = sext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %ret
+}
+
+define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+; KNL-LABEL: zext_32x8_to_32x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vmovaps %zmm2, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_32x8_to_32x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovzxbw %ymm0, %zmm0
+; SKX-NEXT:    retq
+  %x   = zext <32 x i8> %a to <32 x i16>
+  ret <32 x i16> %x
+}
+
+define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_32x8_to_32x16_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_32x8_to_32x16_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT:    vpmovb2m %ymm1, %k1
+; SKX-NEXT:    vpmovzxbw %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %ret
+}
+
+define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+; KNL-LABEL: sext_32x8_to_32x16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbw %xmm0, %ymm2
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; KNL-NEXT:    vpmovsxbw %xmm0, %ymm1
+; KNL-NEXT:    vmovaps %zmm2, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_32x8_to_32x16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0
+; SKX-NEXT:    retq
+  %x   = sext <32 x i8> %a to <32 x i16>
+  ret <32 x i16> %x
+}
+
+define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_32x8_to_32x16_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT:    vpmovsxbw %xmm2, %ymm2
+; KNL-NEXT:    vpmovsxbw %xmm0, %ymm0
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_32x8_to_32x16_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT:    vpmovb2m %ymm1, %k1
+; SKX-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = sext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+  ret <32 x i16> %ret
+}
+
+define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x8mem_to_4x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x8mem_to_4x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = zext <4 x i8> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x8mem_to_4x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxbd (%rdi), %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_4x8mem_to_4x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxbd (%rdi), %ymm0
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8mem_to_16x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x8mem_to_16x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = zext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8mem_to_16x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16x8mem_to_16x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = sext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x8_to_16x32_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x8_to_16x32_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x8_to_16x32_mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16x8_to_16x32_mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k1
+; SKX-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = sext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+; ALL-LABEL: zext_16x8_to_16x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT:    retq
+  %x = zext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
+; ALL-LABEL: sext_16x8_to_16x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; ALL-NEXT:    retq
+  %x = sext <16 x i8> %i to <16 x i32>
+  ret <16 x i32> %x
+}
+
+define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x8mem_to_2x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_2x8mem_to_2x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = zext <2 x i8> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x8mem_to_2x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovsxbq (%rdi), %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_2x8mem_to_2x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = sext <2 x i8> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x8mem_to_2x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbq (%rdi), %xmm0
+; ALL-NEXT:    retq
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = sext <2 x i8> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x8mem_to_4x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x8mem_to_4x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = zext <4 x i8> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x8mem_to_4x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovsxbq (%rdi), %ymm1
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_4x8mem_to_4x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x8mem_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbq (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x8mem_to_8x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x8mem_to_8x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x8mem_to_8x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x8mem_to_8x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x8mem_to_8x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
+; ALL-NEXT:    retq
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x16mem_to_4x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x16mem_to_4x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = zext <4 x i16> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x16mem_to_4x32mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxwd (%rdi), %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_4x16mem_to_4x32mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwd (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x16mem_to_4x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwd (%rdi), %xmm0
+; ALL-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %x
+}
+
+
+define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16mem_to_8x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x16mem_to_8x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = zext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x16mem_to_8x32mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxwd (%rdi), %ymm0
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x16mem_to_8x32mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwd (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x16mem_to_8x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwd (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %x
+}
+
+define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x32mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; KNL-NEXT:    vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x16_to_8x32mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT:    vpmovw2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x16_to_8x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovzxwd %xmm0, %ymm0
+; SKX-NEXT:    retq
+  %x   = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %x
+}
+
+define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x16mem_to_16x32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x16mem_to_16x32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = zext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_16x16mem_to_16x32mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16x16mem_to_16x32mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = sext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_16x16mem_to_16x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwd (%rdi), %zmm0
+; ALL-NEXT:    retq
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = sext <16 x i16> %a to <16 x i32>
+  ret <16 x i32> %x
+}
+define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_16x16_to_16x32mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_16x16_to_16x32mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm1
+; SKX-NEXT:    vpmovb2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
+; ALL-LABEL: zext_16x16_to_16x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %x   = zext <16 x i16> %a to <16 x i32>
+  ret <16 x i32> %x
+}
+
+define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x16mem_to_2x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_2x16mem_to_2x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = zext <2 x i16> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x16mem_to_2x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovsxwq (%rdi), %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_2x16mem_to_2x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = sext <2 x i16> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x16mem_to_2x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwq (%rdi), %xmm0
+; ALL-NEXT:    retq
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = sext <2 x i16> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x16mem_to_4x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x16mem_to_4x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = zext <4 x i16> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x16mem_to_4x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovsxwq (%rdi), %ymm1
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_4x16mem_to_4x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x16mem_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwq (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16mem_to_8x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x16mem_to_8x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = zext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x16mem_to_8x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x16mem_to_8x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x16mem_to_8x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxwq (%rdi), %zmm0
+; ALL-NEXT:    retq
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x16_to_8x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x16_to_8x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT:    vpmovw2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
+; ALL-LABEL: zext_8x16_to_8x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwq %xmm0, %zmm0
+; ALL-NEXT:    retq
+  %ret   = zext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %ret
+}
+
+define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_2x32mem_to_2x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_2x32mem_to_2x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxdq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = zext <2 x i32> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_2x32mem_to_2x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; KNL-NEXT:    vpmovsxdq (%rdi), %xmm1
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_2x32mem_to_2x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxdq (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = sext <2 x i32> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_2x32mem_to_2x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxdq (%rdi), %xmm0
+; ALL-NEXT:    retq
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x32mem_to_4x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x32mem_to_4x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxdq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = zext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_4x32mem_to_4x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; KNL-NEXT:    vpmovsxdq (%rdi), %ymm1
+; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_4x32mem_to_4x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxdq (%rdi), %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = sext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_4x32mem_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxdq (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = sext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
+; ALL-LABEL: sext_4x32_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxdq %xmm0, %ymm0
+; ALL-NEXT:    retq
+  %x   = sext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_4x32_to_4x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_4x32_to_4x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x32mem_to_8x64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x32mem_to_8x64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = zext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: sext_8x32mem_to_8x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8x32mem_to_8x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k1
+; SKX-NEXT:    vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = sext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
+; ALL-LABEL: sext_8x32mem_to_8x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxdq (%rdi), %zmm0
+; ALL-NEXT:    retq
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = sext <8 x i32> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
+; ALL-LABEL: sext_8x32_to_8x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxdq %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %x   = sext <8 x i32> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
+; KNL-LABEL: zext_8x32_to_8x64mask:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm1, %zmm1
+; KNL-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8x32_to_8x64mask:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm1, %xmm1
+; SKX-NEXT:    vpmovw2m %xmm1, %k1
+; SKX-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %x   = zext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
+; ALL-LABEL: fptrunc_test:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vcvtpd2ps %zmm0, %ymm0
+; ALL-NEXT:    retq
+  %b = fptrunc <8 x double> %a to <8 x float>
+  ret <8 x float> %b
+}
+
+define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
+; ALL-LABEL: fpext_test:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vcvtps2pd %ymm0, %zmm0
+; ALL-NEXT:    retq
+  %b = fpext <8 x float> %a to <8 x double>
+  ret <8 x double> %b
+}
+
+define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
+; ALL-LABEL: zext_16i1_to_16xi32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    kmovw %edi, %k1
+; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
+  %a = bitcast i16 %b to <16 x i1>
+  %c = zext <16 x i1> %a to <16 x i32>
+  ret <16 x i32> %c
+}
+
+define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
+; KNL-LABEL: zext_8i1_to_8xi64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    movzbl %dil, %eax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: zext_8i1_to_8xi64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovb %edi, %k1
+; SKX-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; SKX-NEXT:    retq
+  %a = bitcast i8 %b to <8 x i1>
+  %c = zext <8 x i1> %a to <8 x i64>
+  ret <8 x i64> %c
+}
+
+define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
+; KNL-LABEL: trunc_16i8_to_16i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_16i8_to_16i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k0
+; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    retq
+  %mask_b = trunc <16 x i8>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
+; KNL-LABEL: trunc_16i32_to_16i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_16i32_to_16i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
+; SKX-NEXT:    vpmovd2m %zmm0, %k0
+; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    retq
+  %mask_b = trunc <16 x i32>%a to <16 x i1>
+  %mask = bitcast <16 x i1> %mask_b to i16
+  ret i16 %mask
+}
+
+define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
+; KNL-LABEL: trunc_4i32_to_4i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_4i32_to_4i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k1
+; SKX-NEXT:    kandw %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    retq
+  %mask_a = trunc <4 x i32>%a to <4 x i1>
+  %mask_b = trunc <4 x i32>%b to <4 x i1>
+  %a_and_b = and <4 x i1>%mask_a, %mask_b
+  %res = sext <4 x i1>%a_and_b to <4 x i32>
+  ret <4 x i32>%res
+}
+
+
+define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
+; KNL-LABEL: trunc_8i16_to_8i1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_8i16_to_8i1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k0
+; SKX-NEXT:    kmovb %k0, %eax
+; SKX-NEXT:    retq
+  %mask_b = trunc <8 x i16>%a to <8 x i1>
+  %mask = bitcast <8 x i1> %mask_b to i8
+  ret i8 %mask
+}
+
+define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; KNL-NEXT:    knotw %k0, %k1
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8i1_8i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT:    knotb %k0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %ymm0
+; SKX-NEXT:    retq
+  %x = icmp slt <8 x i32> %a1, %a2
+  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  %y = sext <8 x i1> %x1 to <8 x i32>
+  ret <8 x i32> %y
+}
+
+
+define i16 @trunc_i32_to_i1(i32 %a) {
+; ALL-LABEL: trunc_i32_to_i1:
+; ALL:       ## BB#0:
+; ALL-NEXT:    andl $1, %edi
+; ALL-NEXT:    kmovw %edi, %k0
+; ALL-NEXT:    movw $-4, %ax
+; ALL-NEXT:    kmovw %eax, %k1
+; ALL-NEXT:    korw %k0, %k1, %k0
+; ALL-NEXT:    kmovw %k0, %eax
+; ALL-NEXT:    retq
+  %a_i = trunc i32 %a to i1
+  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
+  %res = bitcast <16 x i1> %maskv to i16
+  ret i16 %res
+}
+
+define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8i1_8i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    retq
+  %x = icmp slt <8 x i32> %a1, %a2
+  %y = sext <8 x i1> %x to <8 x i16>
+  ret <8 x i16> %y
+}
+
+define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
+; KNL-LABEL: sext_16i1_16i32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_16i1_16i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtd %zmm0, %zmm1, %k0
+; SKX-NEXT:    vpmovm2d %k0, %zmm0
+; SKX-NEXT:    retq
+  %x = icmp slt <16 x i32> %a1, %a2
+  %y = sext <16 x i1> %x to <16 x i32>
+  ret <16 x i32> %y
+}
+
+define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
+; KNL-LABEL: sext_8i1_8i64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtd %ymm0, %ymm1, %ymm0
+; KNL-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: sext_8i1_8i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtd %ymm0, %ymm1, %k0
+; SKX-NEXT:    vpmovm2q %k0, %zmm0
+; SKX-NEXT:    retq
+  %x = icmp slt <8 x i32> %a1, %a2
+  %y = sext <8 x i1> %x to <8 x i64>
+  ret <8 x i64> %y
+}
+
+define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
+; ALL-LABEL: extload_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovsxbq (%rdi), %zmm0
+; ALL-NEXT:    vmovdqa64 %zmm0, (%rsi)
+; ALL-NEXT:    retq
+  %sign_load = load <8 x i8>, <8 x i8>* %a
+  %c = sext <8 x i8> %sign_load to <8 x i64>
+  store <8 x i64> %c, <8 x i64>* %res
+  ret void
+}
+
+define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
+; KNL-LABEL: test21:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:    pushq %r15
+; KNL-NEXT:    pushq %r14
+; KNL-NEXT:    pushq %r13
+; KNL-NEXT:    pushq %r12
+; KNL-NEXT:    pushq %rbx
+; KNL-NEXT:    vpmovsxbd %xmm7, %zmm7
+; KNL-NEXT:    vpslld $31, %zmm7, %zmm7
+; KNL-NEXT:    vpmovsxbd %xmm6, %zmm6
+; KNL-NEXT:    vpslld $31, %zmm6, %zmm6
+; KNL-NEXT:    vpmovsxbd %xmm5, %zmm5
+; KNL-NEXT:    vpslld $31, %zmm5, %zmm5
+; KNL-NEXT:    vpmovsxbd %xmm4, %zmm4
+; KNL-NEXT:    vpslld $31, %zmm4, %zmm4
+; KNL-NEXT:    vptestmd %zmm4, %zmm4, %k0
+; KNL-NEXT:    kshiftlw $14, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %edx
+; KNL-NEXT:    kshiftlw $15, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    kshiftlw $13, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %ecx
+; KNL-NEXT:    kshiftlw $12, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %edi
+; KNL-NEXT:    kshiftlw $11, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %esi
+; KNL-NEXT:    kshiftlw $10, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r13d
+; KNL-NEXT:    kshiftlw $9, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r8d
+; KNL-NEXT:    kshiftlw $8, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r10d
+; KNL-NEXT:    kshiftlw $7, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r11d
+; KNL-NEXT:    kshiftlw $6, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %ebx
+; KNL-NEXT:    kshiftlw $5, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %ebp
+; KNL-NEXT:    kshiftlw $4, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r14d
+; KNL-NEXT:    kshiftlw $3, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r15d
+; KNL-NEXT:    kshiftlw $2, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r9d
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kmovw %k1, %r12d
+; KNL-NEXT:    vptestmd %zmm5, %zmm5, %k1
+; KNL-NEXT:    kshiftlw $0, %k0, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vmovd %eax, %xmm4
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    kshiftlw $14, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $1, %edx, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %edx
+; KNL-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT:    kshiftlw $15, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $2, %ecx, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    kshiftlw $13, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $3, %edi, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %edi
+; KNL-NEXT:    kshiftlw $12, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $4, %esi, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %esi
+; KNL-NEXT:    kshiftlw $11, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $5, %r13d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r13d
+; KNL-NEXT:    kshiftlw $10, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $6, %r8d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r8d
+; KNL-NEXT:    kshiftlw $9, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $7, %r10d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r10d
+; KNL-NEXT:    kshiftlw $8, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $8, %r11d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r11d
+; KNL-NEXT:    kshiftlw $7, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $9, %ebx, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %ebx
+; KNL-NEXT:    kshiftlw $6, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $10, %ebp, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %ebp
+; KNL-NEXT:    kshiftlw $5, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $11, %r14d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r14d
+; KNL-NEXT:    kshiftlw $4, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $12, %r15d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r15d
+; KNL-NEXT:    kshiftlw $3, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $13, %r9d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %edx
+; KNL-NEXT:    movl %edx, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT:    kshiftlw $2, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $14, %r12d, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r12d
+; KNL-NEXT:    kshiftlw $1, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $15, %eax, %xmm4, %xmm4
+; KNL-NEXT:    kmovw %k0, %r9d
+; KNL-NEXT:    vptestmd %zmm6, %zmm6, %k0
+; KNL-NEXT:    kshiftlw $0, %k1, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vmovd %ecx, %xmm5
+; KNL-NEXT:    kmovw %k1, %edx
+; KNL-NEXT:    kshiftlw $14, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT:    kshiftlw $15, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $2, %edi, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %eax
+; KNL-NEXT:    kshiftlw $13, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $3, %esi, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %edi
+; KNL-NEXT:    kshiftlw $12, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $4, %r13d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %ecx
+; KNL-NEXT:    kshiftlw $11, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $5, %r8d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r8d
+; KNL-NEXT:    kshiftlw $10, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $6, %r10d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r13d
+; KNL-NEXT:    kshiftlw $9, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $7, %r11d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %esi
+; KNL-NEXT:    movl %esi, -{{[0-9]+}}(%rsp) ## 4-byte Spill
+; KNL-NEXT:    kshiftlw $8, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $8, %ebx, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %ebx
+; KNL-NEXT:    kshiftlw $7, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $9, %ebp, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %ebp
+; KNL-NEXT:    kshiftlw $6, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $10, %r14d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r10d
+; KNL-NEXT:    kshiftlw $5, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $11, %r15d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r11d
+; KNL-NEXT:    kshiftlw $4, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $12, -{{[0-9]+}}(%rsp), %xmm5, %xmm5 ## 4-byte Folded Reload
+; KNL-NEXT:    kmovw %k1, %esi
+; KNL-NEXT:    kshiftlw $3, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $13, %r12d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r14d
+; KNL-NEXT:    kshiftlw $2, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $14, %r9d, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r9d
+; KNL-NEXT:    kshiftlw $1, %k0, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    vpinsrb $15, %edx, %xmm5, %xmm5
+; KNL-NEXT:    kmovw %k1, %r15d
+; KNL-NEXT:    vptestmd %zmm7, %zmm7, %k1
+; KNL-NEXT:    kshiftlw $0, %k0, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vmovd %eax, %xmm6
+; KNL-NEXT:    kmovw %k0, %r12d
+; KNL-NEXT:    kshiftlw $14, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $1, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    kshiftlw $15, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $2, %edi, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %edx
+; KNL-NEXT:    kshiftlw $13, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $3, %ecx, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %ecx
+; KNL-NEXT:    kshiftlw $12, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $4, %r8d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r8d
+; KNL-NEXT:    kshiftlw $11, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $5, %r13d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r13d
+; KNL-NEXT:    kshiftlw $10, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $6, -{{[0-9]+}}(%rsp), %xmm6, %xmm6 ## 4-byte Folded Reload
+; KNL-NEXT:    kmovw %k0, %edi
+; KNL-NEXT:    kshiftlw $9, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $7, %ebx, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %ebx
+; KNL-NEXT:    kshiftlw $8, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $8, %ebp, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %ebp
+; KNL-NEXT:    kshiftlw $7, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $9, %r10d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r10d
+; KNL-NEXT:    kshiftlw $6, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $10, %r11d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r11d
+; KNL-NEXT:    kshiftlw $5, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $11, %esi, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %esi
+; KNL-NEXT:    kshiftlw $4, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $12, %r14d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r14d
+; KNL-NEXT:    kshiftlw $3, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $13, %r9d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r9d
+; KNL-NEXT:    kshiftlw $2, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $14, %r15d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r15d
+; KNL-NEXT:    kshiftlw $1, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vpinsrb $15, %r12d, %xmm6, %xmm6
+; KNL-NEXT:    kmovw %k0, %r12d
+; KNL-NEXT:    kshiftlw $0, %k1, %k0
+; KNL-NEXT:    kshiftrw $15, %k0, %k0
+; KNL-NEXT:    vmovd %edx, %xmm7
+; KNL-NEXT:    kmovw %k0, %edx
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $2, %ecx, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $3, %r8d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $4, %r13d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $5, %edi, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $6, %ebx, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $7, %ebp, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $8, %r10d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $9, %r11d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $10, %esi, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $11, %r14d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $12, %r9d, %xmm7, %xmm7
+; KNL-NEXT:    vpinsrb $13, %r15d, %xmm7, %xmm7
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpand %ymm0, %ymm4, %ymm0
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero,xmm5[8],zero,xmm5[9],zero,xmm5[10],zero,xmm5[11],zero,xmm5[12],zero,xmm5[13],zero,xmm5[14],zero,xmm5[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpand %ymm1, %ymm4, %ymm1
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero,xmm6[8],zero,xmm6[9],zero,xmm6[10],zero,xmm6[11],zero,xmm6[12],zero,xmm6[13],zero,xmm6[14],zero,xmm6[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpand %ymm2, %ymm4, %ymm2
+; KNL-NEXT:    vpinsrb $14, %r12d, %xmm7, %xmm4
+; KNL-NEXT:    vpinsrb $15, %edx, %xmm4, %xmm4
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpsraw $15, %ymm4, %ymm4
+; KNL-NEXT:    vpand %ymm3, %ymm4, %ymm3
+; KNL-NEXT:    popq %rbx
+; KNL-NEXT:    popq %r12
+; KNL-NEXT:    popq %r13
+; KNL-NEXT:    popq %r14
+; KNL-NEXT:    popq %r15
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test21:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %zmm2, %zmm2
+; SKX-NEXT:    vpmovb2m %zmm2, %k1
+; SKX-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; SKX-NEXT:    vmovdqu16 %zmm0, %zmm3 {%k1}
+; SKX-NEXT:    kshiftrq $32, %k1, %k1
+; SKX-NEXT:    vmovdqu16 %zmm1, %zmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm3, %zmm0
+; SKX-NEXT:    vmovaps %zmm2, %zmm1
+; SKX-NEXT:    retq
+  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
+  ret <64 x i16> %ret
+}
+
diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll
new file mode 100644
index 000000000000..703f7832588c
--- /dev/null
+++ b/test/CodeGen/X86/avx512-extract-subvector.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
+
+
+define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT:    retq
+  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  ret <8 x i16> %r1
+}
+
+define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v32i16_first_element:
+; SKX:       ## BB#0:
+; SKX-NEXT:    retq
+  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %r1
+}
+
+define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti32x4 $2, %zmm0, %xmm0
+; SKX-NEXT:    retq
+  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+  ret <16 x i8> %r1
+}
+
+define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector128_v64i8_first_element:
+; SKX:       ## BB#0:
+; SKX-NEXT:    retq
+  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i8> %r1
+}
+
+
+define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
+; SKX-LABEL: extract_subvector256_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT:    retq
+  %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  ret <16 x i16> %r1
+}
+
+define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
+; SKX-LABEL: extract_subvector256_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; SKX-NEXT:    retq
+  %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
+  ret <32 x i8> %r1
+}
diff --git a/test/CodeGen/X86/avx512-fma.ll b/test/CodeGen/X86/avx512-fma.ll
index ed046de005cf..9279441a23c7 100644
--- a/test/CodeGen/X86/avx512-fma.ll
+++ b/test/CodeGen/X86/avx512-fma.ll
@@ -1,81 +1,93 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
 
-; CHECK-LABEL: test_x86_fmadd_ps_z
-; CHECK: vfmadd213ps     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fmadd_ps_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fadd <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fmsub_ps_z
-; CHECK: vfmsub213ps     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fmsub_ps_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fsub <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fnmadd_ps_z
-; CHECK: vfnmadd213ps     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fnmadd_ps_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fsub <16 x float> %a2, %x
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fnmsub_ps_z
-; CHECK: vfnmsub213ps     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; ALL-LABEL: test_x86_fnmsub_ps_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
-  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
-						  float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, 
-						  float -0.000000e+00>, %x
+                          float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
+                          float -0.000000e+00>, %x
   %res = fsub <16 x float> %y, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fmadd_pd_z
-; CHECK: vfmadd213pd     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; ALL-LABEL: test_x86_fmadd_pd_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <8 x double> %a0, %a1
   %res = fadd <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
-; CHECK-LABEL: test_x86_fmsub_pd_z
-; CHECK: vfmsub213pd     %zmm2, %zmm1, %zmm0
-; CHECK: ret
 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; ALL-LABEL: test_x86_fmsub_pd_z:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul <8 x double> %a0, %a1
   %res = fsub <8 x double> %x, %a2
   ret <8 x double> %res
 }
 
 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
-; CHECK-LABEL: test_x86_fmsub_213:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_x86_fmsub_213:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
+; ALL-NEXT:    vmovaps %zmm1, %zmm0
+; ALL-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
   ret double %res
 }
 
 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
-; CHECK-LABEL: test_x86_fmsub_213_m:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; KNL-LABEL: test_x86_fmsub_213_m:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
+; KNL-NEXT:    vmovaps %zmm1, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_x86_fmsub_213_m:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vfmsub213sd (%rdi), %xmm1, %xmm0
+; SKX-NEXT:    retq
   %a2 = load double , double *%a2_ptr
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
@@ -83,11 +95,11 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
 }
 
 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
-; CHECK-LABEL: test_x86_fmsub_231_m:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_x86_fmsub_231_m:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
+; ALL-NEXT:    vmovaps %zmm1, %zmm0
+; ALL-NEXT:    retq
   %a2 = load double , double *%a2_ptr
   %x = fmul double %a0, %a2
   %res = fsub double %x, %a1
@@ -95,21 +107,21 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
 }
 
 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; CHECK-LABEL: test231_br:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test231_br:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmadd231ps {{.*}}(%rip){1to16}, %zmm0, %zmm1
+; ALL-NEXT:    vmovaps %zmm1, %zmm0
+; ALL-NEXT:    retq
   %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   %b2 = fadd <16 x float> %b1, %a2
   ret <16 x float> %b2
 }
 
 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; CHECK-LABEL: test213_br:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test213_br:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmadd213ps {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; ALL-NEXT:    retq
   %b1 = fmul <16 x float> %a1, %a2
   %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
   ret <16 x float> %b2
@@ -117,16 +129,17 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 
 ;mask (a*c+b , a)
 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd132_ps:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
-; CHECK-NEXT:    retq
+; KNL-LABEL: test_x86_fmadd132_ps:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
+; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_x86_fmadd132_ps:
 ; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 ; SKX-NEXT:    vfmadd132ps (%rdi), %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    retq
@@ -139,17 +152,18 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1
 
 ;mask (a*c+b , b)
 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd231_ps:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; KNL-LABEL: test_x86_fmadd231_ps:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT:    vmovaps %zmm1, %zmm0
+; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_x86_fmadd231_ps:
 ; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 ; SKX-NEXT:    vfmadd231ps (%rdi), %zmm0, %zmm1 {%k1}
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
@@ -163,17 +177,18 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1
 
 ;mask (b*a+c , b)
 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; CHECK-LABEL: test_x86_fmadd213_ps:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovsxbd %xmm2, %zmm2
-; CHECK-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm2, %zmm2
-; CHECK-NEXT:    vptestmd %zmm2, %zmm2, %k1
-; CHECK-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0
-; CHECK-NEXT:    retq
+; KNL-LABEL: test_x86_fmadd213_ps:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
+; KNL-NEXT:    vmovaps %zmm1, %zmm0
+; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: test_x86_fmadd213_ps:
 ; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 ; SKX-NEXT:    vfmadd213ps (%rdi), %zmm0, %zmm1 {%k1}
 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
diff --git a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index 3fca5a89a6a4..3bc67cceaab5 100644
--- a/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s
 
 declare <16 x float> @llvm.x86.avx512.gather.dps.512 (<16 x float>, i8*, <16 x i32>, i16, i32)
@@ -14,7 +15,7 @@ define void @gather_mask_dps(<16 x i32> %ind, <16 x float> %src, i16 %mask, i8*
 ; CHECK-LABEL: gather_mask_dps:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vgatherdps (%rsi,%zmm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vscatterdps %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -29,7 +30,7 @@ define void @gather_mask_dpd(<8 x i32> %ind, <8 x double> %src, i8 %mask, i8* %b
 ; CHECK-LABEL: gather_mask_dpd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vgatherdpd (%rsi,%ymm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
 ; CHECK-NEXT:    vscatterdpd %zmm1, (%rdx,%ymm0,4) {%k1}
@@ -44,7 +45,7 @@ define void @gather_mask_qps(<8 x i64> %ind, <8 x float> %src, i8 %mask, i8* %ba
 ; CHECK-LABEL: gather_mask_qps:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vgatherqps (%rsi,%zmm0,4), %ymm1 {%k2}
 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vscatterqps %ymm1, (%rdx,%zmm0,4) {%k1}
@@ -59,7 +60,7 @@ define void @gather_mask_qpd(<8 x i64> %ind, <8 x double> %src, i8 %mask, i8* %b
 ; CHECK-LABEL: gather_mask_qpd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vgatherqpd (%rsi,%zmm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vscatterqpd %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -86,7 +87,7 @@ define void @gather_mask_dd(<16 x i32> %ind, <16 x i32> %src, i16 %mask, i8* %ba
 ; CHECK-LABEL: gather_mask_dd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherdd (%rsi,%zmm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vpscatterdd %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -101,7 +102,7 @@ define void @gather_mask_qd(<8 x i64> %ind, <8 x i32> %src, i8 %mask, i8* %base,
 ; CHECK-LABEL: gather_mask_qd:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherqd (%rsi,%zmm0,4), %ymm1 {%k2}
 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vpscatterqd %ymm1, (%rdx,%zmm0,4) {%k1}
@@ -116,7 +117,7 @@ define void @gather_mask_qq(<8 x i64> %ind, <8 x i64> %src, i8 %mask, i8* %base,
 ; CHECK-LABEL: gather_mask_qq:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherqq (%rsi,%zmm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vpscatterqq %zmm1, (%rdx,%zmm0,4) {%k1}
@@ -131,7 +132,7 @@ define void @gather_mask_dq(<8 x i32> %ind, <8 x i64> %src, i8 %mask, i8* %base,
 ; CHECK-LABEL: gather_mask_dq:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %edi, %k1
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherdq (%rsi,%ymm0,4), %zmm1 {%k2}
 ; CHECK-NEXT:    vpaddd {{.*}}(%rip), %ymm0, %ymm0
 ; CHECK-NEXT:    vpscatterdq %zmm1, (%rdx,%ymm0,4) {%k1}
@@ -239,8 +240,8 @@ define void @scatter_mask_qps_execdomain(<8 x i64> %ind, <8 x float>* %src, i8 %
 define void @gather_qps(<8 x i64> %ind, <8 x float> %src, i8* %base, i8* %stbuf)  {
 ; CHECK-LABEL: gather_qps:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
 ; CHECK-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k2}
 ; CHECK-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
 ; CHECK-NEXT:    vscatterqps %ymm1, (%rsi,%zmm0,4) {%k1}
@@ -256,7 +257,7 @@ declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, i8* , i32, i32);
 define void @prefetch(<8 x i64> %ind, i8* %base) {
 ; CHECK-LABEL: prefetch:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
 ; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
 ; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
@@ -278,12 +279,12 @@ define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, i8* %x1
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherqpd (%rdi,%xmm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
-  %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+  %res1 = call <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
   %res2 = fadd <2 x double> %res, %res1
   ret <2 x double> %res2
 }
@@ -311,12 +312,12 @@ define <4 x double>@test_int_x86_avx512_gather3div4_df(<4 x double> %x0, i8* %x1
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,4), %ymm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherqpd (%rdi,%ymm1,2), %ymm0 {%k1}
 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
   %res = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+  %res1 = call <4 x double> @llvm.x86.avx512.gather3div4.df(<4 x double> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
   %res2 = fadd <4 x double> %res, %res1
   ret <4 x double> %res2
 }
@@ -329,7 +330,7 @@ define <8 x i32>@test_int_x86_avx512_gather3div4_di(<4 x i64> %x0, i8* %x1, <4 x
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpgatherqq (%rdi,%ymm1,8), %ymm0 {%k1}
 ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
@@ -347,12 +348,12 @@ define <4 x float>@test_int_x86_avx512_gather3div4_sf(<4 x float> %x0, i8* %x1,
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherqps (%rdi,%xmm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 0)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3div4.sf(<4 x float> %x0, i8* %x1, <2 x i64> %x2, i8 -1, i32 2)
   %res2 = fadd <4 x float> %res, %res1
   ret <4 x float> %res2
 }
@@ -363,7 +364,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div4_si(<4 x i32> %x0, i8* %x1, <2 x
 ; CHECK-LABEL: test_int_x86_avx512_gather3div4_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm2 {%k2}
 ; CHECK-NEXT:    vpgatherqd (%rdi,%xmm1,4), %xmm0 {%k1}
@@ -383,12 +384,12 @@ define <4 x float>@test_int_x86_avx512_gather3div8_sf(<4 x float> %x0, i8* %x1,
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,4), %xmm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherqps (%rdi,%ymm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 0)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3div8.sf(<4 x float> %x0, i8* %x1, <4 x i64> %x2, i8 -1, i32 2)
   %res2 = fadd <4 x float> %res, %res1
   ret <4 x float> %res2
 }
@@ -400,7 +401,7 @@ define <4 x i32>@test_int_x86_avx512_gather3div8_si(<4 x i32> %x0, i8* %x1, <4 x
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,4), %xmm2 {%k2}
 ; CHECK-NEXT:    vpgatherqd (%rdi,%ymm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
@@ -419,12 +420,12 @@ define <2 x double>@test_int_x86_avx512_gather3siv2_df(<2 x double> %x0, i8* %x1
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
-  %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res1 = call <2 x double> @llvm.x86.avx512.gather3siv2.df(<2 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
   %res2 = fadd <2 x double> %res, %res1
   ret <2 x double> %res2
 }
@@ -452,12 +453,12 @@ define <4 x double>@test_int_x86_avx512_gather3siv4_df(<4 x double> %x0, i8* %x1
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,4), %ymm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherdpd (%rdi,%xmm1,2), %ymm0 {%k1}
 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
   %res = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res1 = call <4 x double> @llvm.x86.avx512.gather3siv4.df(<4 x double> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
   %res2 = fadd <4 x double> %res, %res1
   ret <4 x double> %res2
 }
@@ -485,12 +486,12 @@ define <4 x float>@test_int_x86_avx512_gather3siv4_sf(<4 x float> %x0, i8* %x1,
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherdps (%rdi,%xmm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 4)
-  %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 0)
+  %res1 = call <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 2)
   %res2 = fadd <4 x float> %res, %res1
   ret <4 x float> %res2
 }
@@ -501,14 +502,14 @@ define <4 x i32>@test_int_x86_avx512_gather3siv4_si(<4 x i32> %x0, i8* %x1, <4 x
 ; CHECK-LABEL: test_int_x86_avx512_gather3siv4_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,4), %xmm2 {%k2}
-; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,0), %xmm0 {%k1}
+; CHECK-NEXT:    vpgatherdd (%rdi,%xmm1,2), %xmm0 {%k1}
 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
 ; CHECK-NEXT:    retq
   %res = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 -1, i32 4)
-  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 0)
+  %res1 = call <4 x i32> @llvm.x86.avx512.gather3siv4.si(<4 x i32> %x0, i8* %x1, <4 x i32> %x2, i8 %x3, i32 2)
   %res2 = add <4 x i32> %res, %res1
   ret <4 x i32> %res2
 }
@@ -521,12 +522,12 @@ define <8 x float>@test_int_x86_avx512_gather3siv8_sf(<8 x float> %x0, i8* %x1,
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
 ; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm2 {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
-; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
+; CHECK-NEXT:    vgatherdps (%rdi,%ymm1,2), %ymm0 {%k1}
 ; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
   %res = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
-  %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 0)
+  %res1 = call <8 x float> @llvm.x86.avx512.gather3siv8.sf(<8 x float> %x0, i8* %x1, <8 x i32> %x2, i8 -1, i32 2)
   %res2 = fadd <8 x float> %res, %res1
   ret <8 x float> %res2
 }
@@ -538,13 +539,13 @@ define <8 x i32>@test_int_x86_avx512_gather3siv8_si(<8 x i32> %x0, i8* %x1, <8 x
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
 ; CHECK-NEXT:    vmovaps %zmm0, %zmm2
-; CHECK-NEXT:    kmovw %k1, %k2
+; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,4), %ymm2 {%k2}
-; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,0), %ymm0 {%k1}
+; CHECK-NEXT:    vpgatherdd (%rdi,%ymm1,2), %ymm0 {%k1}
 ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
 ; CHECK-NEXT:    retq
   %res = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 4)
-  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 0)
+  %res1 = call <8 x i32> @llvm.x86.avx512.gather3siv8.si(<8 x i32> %x0, i8* %x1, <8 x i32> %x2, i8 %x3, i32 2)
   %res2 = add <8 x i32> %res, %res1
   ret <8 x i32> %res2
 }
@@ -555,11 +556,11 @@ define void@test_int_x86_avx512_scatterdiv2_df(i8* %x0, i8 %x1, <2 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_df:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
-; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
+; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,2) {%k2}
 ; CHECK-NEXT:    vscatterqpd %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 -1, <2 x i64> %x2, <2 x double> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv2.df(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x double> %x3, i32 4)
   ret void
 }
@@ -570,11 +571,11 @@ define void@test_int_x86_avx512_scatterdiv2_di(i8* %x0, i8 %x1, <2 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv2_di:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpscatterqq %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 %x1, <2 x i64> %x2, <2 x i64> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv2.di(i8* %x0, i8 -1, <2 x i64> %x2, <2 x i64> %x3, i32 4)
   ret void
 }
@@ -585,11 +586,11 @@ define void@test_int_x86_avx512_scatterdiv4_df(i8* %x0, i8 %x1, <4 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_df:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x double> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv4.df(i8* %x0, i8 -1, <4 x i64> %x2, <4 x double> %x3, i32 4)
   ret void
 }
@@ -600,11 +601,11 @@ define void@test_int_x86_avx512_scatterdiv4_di(i8* %x0, i8 %x1, <4 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_di:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpscatterqq %ymm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i64> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv4.di(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i64> %x3, i32 4)
   ret void
 }
@@ -615,11 +616,11 @@ define void@test_int_x86_avx512_scatterdiv4_sf(i8* %x0, i8 %x1, <2 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_sf:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x float> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv4.sf(i8* %x0, i8 -1, <2 x i64> %x2, <4 x float> %x3, i32 4)
   ret void
 }
@@ -630,11 +631,11 @@ define void@test_int_x86_avx512_scatterdiv4_si(i8* %x0, i8 %x1, <2 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv4_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
-; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,2) {%k2}
 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 -1, <2 x i64> %x2, <4 x i32> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv4.si(i8* %x0, i8 %x1, <2 x i64> %x2, <4 x i32> %x3, i32 4)
   ret void
 }
@@ -645,11 +646,11 @@ define void@test_int_x86_avx512_scatterdiv8_sf(i8* %x0, i8 %x1, <4 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_sf:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterqps %xmm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x float> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv8.sf(i8* %x0, i8 -1, <4 x i64> %x2, <4 x float> %x3, i32 4)
   ret void
 }
@@ -660,11 +661,11 @@ define void@test_int_x86_avx512_scatterdiv8_si(i8* %x0, i8 %x1, <4 x i64> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scatterdiv8_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpscatterqd %xmm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 %x1, <4 x i64> %x2, <4 x i32> %x3, i32 2)
   call void @llvm.x86.avx512.scatterdiv8.si(i8* %x0, i8 -1, <4 x i64> %x2, <4 x i32> %x3, i32 4)
   ret void
 }
@@ -675,11 +676,11 @@ define void@test_int_x86_avx512_scattersiv2_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_df:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
-; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
+; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,2) {%k2}
 ; CHECK-NEXT:    vscatterdpd %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 -1, <4 x i32> %x2, <2 x double> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv2.df(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x double> %x3, i32 4)
   ret void
 }
@@ -690,11 +691,11 @@ define void@test_int_x86_avx512_scattersiv2_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv2_di:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
-; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
+; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,2) {%k2}
 ; CHECK-NEXT:    vpscatterdq %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 -1, <4 x i32> %x2, <2 x i64> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv2.di(i8* %x0, i8 %x1, <4 x i32> %x2, <2 x i64> %x3, i32 4)
   ret void
 }
@@ -705,11 +706,11 @@ define void@test_int_x86_avx512_scattersiv4_df(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_df:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterdpd %ymm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x double> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv4.df(i8* %x0, i8 -1, <4 x i32> %x2, <4 x double> %x3, i32 4)
   ret void
 }
@@ -720,11 +721,11 @@ define void@test_int_x86_avx512_scattersiv4_di(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_di:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    kxnorw %k2, %k2, %k2
-; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,0) {%k2}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k2
+; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,2) {%k2}
 ; CHECK-NEXT:    vpscatterdq %ymm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i64> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv4.di(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i64> %x3, i32 4)
   ret void
 }
@@ -735,11 +736,11 @@ define void@test_int_x86_avx512_scattersiv4_sf(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_sf:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterdps %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x float> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv4.sf(i8* %x0, i8 -1, <4 x i32> %x2, <4 x float> %x3, i32 4)
   ret void
 }
@@ -750,11 +751,11 @@ define void@test_int_x86_avx512_scattersiv4_si(i8* %x0, i8 %x1, <4 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv4_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 %x1, <4 x i32> %x2, <4 x i32> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv4.si(i8* %x0, i8 -1, <4 x i32> %x2, <4 x i32> %x3, i32 4)
   ret void
 }
@@ -765,11 +766,11 @@ define void@test_int_x86_avx512_scattersiv8_sf(i8* %x0, i8 %x1, <8 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_sf:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vscatterdps %ymm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x float> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv8.sf(i8* %x0, i8 -1, <8 x i32> %x2, <8 x float> %x3, i32 4)
   ret void
 }
@@ -780,11 +781,11 @@ define void@test_int_x86_avx512_scattersiv8_si(i8* %x0, i8 %x1, <8 x i32> %x2, <
 ; CHECK-LABEL: test_int_x86_avx512_scattersiv8_si:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovb %esi, %k1
-; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,0) {%k1}
-; CHECK-NEXT:    kxnorw %k1, %k1, %k1
+; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,2) {%k1}
+; CHECK-NEXT:    kxnorw %k0, %k0, %k1
 ; CHECK-NEXT:    vpscatterdd %ymm1, (%rdi,%ymm0,4) {%k1}
 ; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 0)
+  call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 %x1, <8 x i32> %x2, <8 x i32> %x3, i32 2)
   call void @llvm.x86.avx512.scattersiv8.si(i8* %x0, i8 -1, <8 x i32> %x2, <8 x i32> %x3, i32 4)
   ret void
 }
diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll
index 6f985f0bf3a7..41ec62c7e047 100644
--- a/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/test/CodeGen/X86/avx512-insert-extract.ll
@@ -12,14 +12,24 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
   ret <16 x float> %rrr3
 }
 
-;CHECK-LABEL: test2:
-;KNL: vinsertf32x4 $0
-;SKX: vinsertf64x2 $0
-;CHECK: vextractf32x4 $3
-;KNL: vinsertf32x4 $3
-;SKX: vinsertf64x2 $3
-;CHECK: ret
 define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
+; KNL-LABEL: test2:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
+; KNL-NEXT:    vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
+; KNL-NEXT:    vextractf32x4 $3, %zmm0, %xmm2
+; KNL-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
+; KNL-NEXT:    vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test2:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vmovhpd (%rdi), %xmm0, %xmm2
+; SKX-NEXT:    vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
+; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm2
+; SKX-NEXT:    vmovsd %xmm1, %xmm2, %xmm1
+; SKX-NEXT:    vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
   %rrr = load double, double* %br
   %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
   %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@@ -36,12 +46,22 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
   ret <16 x float> %rrr2
 }
 
-;CHECK-LABEL: test4:
-;CHECK: vextracti32x4 $2
-;KNL: vinserti32x4 $0
-;SKX: vinserti64x2 $0
-;CHECK: ret
 define <8 x i64> @test4(<8 x i64> %x) nounwind {
+; KNL-LABEL: test4:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; KNL-NEXT:    vmovq %xmm1, %rax
+; KNL-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
+; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test4:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti64x2 $2, %zmm0, %xmm1
+; SKX-NEXT:    vmovq %xmm1, %rax
+; SKX-NEXT:    vpinsrq $1, %rax, %xmm0, %xmm1
+; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
   %eee = extractelement <8 x i64> %x, i32 4
   %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
   ret <8 x i64> %rrr2
@@ -142,7 +162,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
 ;CHECK: andl    $1, %eax
 ;CHECK: kmovw   %eax, %k0
 ;CHECK: movw    $-4
-;CHECK: korw    
+;CHECK: korw
 define i16 @test13(i32 %a, i32 %b) {
   %cmp_res = icmp ult i32 %a, %b
   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
@@ -211,3 +231,476 @@ define i8 @test17(i1 *%addr, i8 %a) {
   ret i8 %x2
 }
 
+define i64 @extract_v8i64(<8 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v8i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrq $1, %xmm0, %rax
+; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm0
+; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <8 x i64> %x, i32 1
+  %r2 = extractelement <8 x i64> %x, i32 3
+  store i64 %r2, i64* %dst, align 1
+  ret i64 %r1
+}
+
+define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v4i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrq $1, %xmm0, %rax
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <4 x i64> %x, i32 1
+  %r2 = extractelement <4 x i64> %x, i32 3
+  store i64 %r2, i64* %dst, align 1
+  ret i64 %r1
+}
+
+define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) {
+; SKX-LABEL: extract_v2i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vmovq %xmm0, %rax
+; SKX-NEXT:    vpextrq $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <2 x i64> %x, i32 0
+  %r2 = extractelement <2 x i64> %x, i32 1
+  store i64 %r2, i64* %dst, align 1
+  ret i64 %r1
+}
+
+define i32 @extract_v16i32(<16 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v16i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrd $1, %xmm0, %eax
+; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <16 x i32> %x, i32 1
+  %r2 = extractelement <16 x i32> %x, i32 5
+  store i32 %r2, i32* %dst, align 1
+  ret i32 %r1
+}
+
+define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v8i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrd $1, %xmm0, %eax
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT:    vpextrd $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <8 x i32> %x, i32 1
+  %r2 = extractelement <8 x i32> %x, i32 5
+  store i32 %r2, i32* %dst, align 1
+  ret i32 %r1
+}
+
+define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) {
+; SKX-LABEL: extract_v4i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrd $1, %xmm0, %eax
+; SKX-NEXT:    vpextrd $3, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <4 x i32> %x, i32 1
+  %r2 = extractelement <4 x i32> %x, i32 3
+  store i32 %r2, i32* %dst, align 1
+  ret i32 %r1
+}
+
+define i16 @extract_v32i16(<32 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrw $1, %xmm0, %eax
+; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <32 x i16> %x, i32 1
+  %r2 = extractelement <32 x i16> %x, i32 9
+  store i16 %r2, i16* %dst, align 1
+  ret i16 %r1
+}
+
+define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v16i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrw $1, %xmm0, %eax
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT:    vpextrw $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <16 x i16> %x, i32 1
+  %r2 = extractelement <16 x i16> %x, i32 9
+  store i16 %r2, i16* %dst, align 1
+  ret i16 %r1
+}
+
+define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) {
+; SKX-LABEL: extract_v8i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrw $1, %xmm0, %eax
+; SKX-NEXT:    vpextrw $3, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <8 x i16> %x, i32 1
+  %r2 = extractelement <8 x i16> %x, i32 3
+  store i16 %r2, i16* %dst, align 1
+  ret i16 %r1
+}
+
+define i8 @extract_v64i8(<64 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrb $1, %xmm0, %eax
+; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm0
+; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <64 x i8> %x, i32 1
+  %r2 = extractelement <64 x i8> %x, i32 17
+  store i8 %r2, i8* %dst, align 1
+  ret i8 %r1
+}
+
+define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v32i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrb $1, %xmm0, %eax
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; SKX-NEXT:    vpextrb $1, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <32 x i8> %x, i32 1
+  %r2 = extractelement <32 x i8> %x, i32 17
+  store i8 %r2, i8* %dst, align 1
+  ret i8 %r1
+}
+
+define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) {
+; SKX-LABEL: extract_v16i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpextrb $1, %xmm0, %eax
+; SKX-NEXT:    vpextrb $3, %xmm0, (%rdi)
+; SKX-NEXT:    retq
+  %r1 = extractelement <16 x i8> %x, i32 1
+  %r2 = extractelement <16 x i8> %x, i32 3
+  store i8 %r2, i8* %dst, align 1
+  ret i8 %r1
+}
+
+define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v8i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    vextracti64x2 $1, %zmm0, %xmm1
+; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %val = load i64, i64* %ptr
+  %r1 = insertelement <8 x i64> %x, i64 %val, i32 1
+  %r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
+  ret <8 x i64> %r2
+}
+
+define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v4i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrq $1, %rdi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %val = load i64, i64* %ptr
+  %r1 = insertelement <4 x i64> %x, i64 %val, i32 1
+  %r2 = insertelement <4 x i64> %r1, i64 %y, i32 3
+  ret <4 x i64> %r2
+}
+
+define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) {
+; SKX-LABEL: insert_v2i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrq $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT:    vpinsrq $3, %rdi, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %val = load i64, i64* %ptr
+  %r1 = insertelement <2 x i64> %x, i64 %val, i32 1
+  %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3
+  ret <2 x i64> %r2
+}
+
+define <16 x i32> @insert_v16i32(<16 x i32> %x, i32 %y, i32* %ptr) {
+; SKX-LABEL: insert_v16i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
+; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %val = load i32, i32* %ptr
+  %r1 = insertelement <16 x i32> %x, i32 %val, i32 1
+  %r2 = insertelement <16 x i32> %r1, i32 %y, i32 5
+  ret <16 x i32> %r2
+}
+
+define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
+; KNL-LABEL: insert_v8i32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v8i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrd $1, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %val = load i32, i32* %ptr
+  %r1 = insertelement <8 x i32> %x, i32 %val, i32 1
+  %r2 = insertelement <8 x i32> %r1, i32 %y, i32 5
+  ret <8 x i32> %r2
+}
+
+define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) {
+; KNL-LABEL: insert_v4i32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
+; KNL-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v4i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrd $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %val = load i32, i32* %ptr
+  %r1 = insertelement <4 x i32> %x, i32 %val, i32 1
+  %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3
+  ret <4 x i32> %r2
+}
+
+define <32 x i16> @insert_v32i16(<32 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v32i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm2
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; KNL-NEXT:    vpinsrw $1, %edi, %xmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    vextracti32x4 $1, %zmm0, %xmm1
+; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %val = load i16, i16* %ptr
+  %r1 = insertelement <32 x i16> %x, i16 %val, i32 1
+  %r2 = insertelement <32 x i16> %r1, i16 %y, i32 9
+  ret <32 x i16> %r2
+}
+
+define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v16i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v16i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrw $1, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %val = load i16, i16* %ptr
+  %r1 = insertelement <16 x i16> %x, i16 %val, i32 1
+  %r2 = insertelement <16 x i16> %r1, i16 %y, i32 9
+  ret <16 x i16> %r2
+}
+
+define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) {
+; KNL-LABEL: insert_v8i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
+; KNL-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v8i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrw $1, (%rsi), %xmm0, %xmm0
+; SKX-NEXT:    vpinsrw $5, %edi, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %val = load i16, i16* %ptr
+  %r1 = insertelement <8 x i16> %x, i16 %val, i32 1
+  %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5
+  ret <8 x i16> %r2
+}
+
+define <64 x i8> @insert_v64i8(<64 x i8> %x, i8 %y, i8* %ptr) {
+; KNL-LABEL: insert_v64i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm2
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7]
+; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; KNL-NEXT:    vpinsrb $2, %edi, %xmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; SKX-NEXT:    vpinsrb $2, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $3, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %val = load i8, i8* %ptr
+  %r1 = insertelement <64 x i8> %x, i8 %val, i32 1
+  %r2 = insertelement <64 x i8> %r1, i8 %y, i32 50
+  ret <64 x i8> %r2
+}
+
+define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
+; SKX-LABEL: insert_v32i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrb $1, (%rsi), %xmm0, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrb $1, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %val = load i8, i8* %ptr
+  %r1 = insertelement <32 x i8> %x, i8 %val, i32 1
+  %r2 = insertelement <32 x i8> %r1, i8 %y, i32 17
+  ret <32 x i8> %r2
+}
+
+define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) {
+; KNL-LABEL: insert_v16i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
+; KNL-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: insert_v16i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrb $3, (%rsi), %xmm0, %xmm0
+; SKX-NEXT:    vpinsrb $10, %edi, %xmm0, %xmm0
+; SKX-NEXT:    retq
+  %val = load i8, i8* %ptr
+  %r1 = insertelement <16 x i8> %x, i8 %val, i32 3
+  %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10
+  ret <16 x i8> %r2
+}
+
+define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
+; KNL-LABEL: test_insert_128_v8i64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
+; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v8i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
+; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %r = insertelement <8 x i64> %x, i64 %y, i32 1
+  ret <8 x i64> %r
+}
+
+define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
+; KNL-LABEL: test_insert_128_v16i32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
+; KNL-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v16i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm1
+; SKX-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %r = insertelement <16 x i32> %x, i32 %y, i32 1
+  ret <16 x i32> %r
+}
+
+define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
+; KNL-LABEL: test_insert_128_v8f64:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v8f64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm1
+; SKX-NEXT:    vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %r = insertelement <8 x double> %x, double %y, i32 1
+  ret <8 x double> %r
+}
+
+define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
+; KNL-LABEL: test_insert_128_v16f32:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
+; KNL-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v16f32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vinsertps $16, %xmm1, %xmm0, %xmm1
+; SKX-NEXT:    vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %r = insertelement <16 x float> %x, float %y, i32 1
+  ret <16 x float> %r
+}
+
+define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
+; KNL-LABEL: test_insert_128_v16i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v16i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrw $2, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %r = insertelement <16 x i16> %x, i16 %y, i32 10
+  ret <16 x i16> %r
+}
+
+define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
+; KNL-LABEL: test_insert_128_v32i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test_insert_128_v32i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT:    vpinsrb $4, %edi, %xmm1, %xmm1
+; SKX-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT:    retq
+  %r = insertelement <32 x i8> %x, i8 %y, i32 20
+  ret <32 x i8> %r
+}
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 7642cd4e6c5c..764e13638485 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -1,60 +1,94 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
 
 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kortestz
-; CHECK: kortestw
-; CHECK: sete
 define i32 @test_kortestz(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kortestz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k0
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    kortestw %k0, %k1
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1)
   ret i32 %res
 }
 
 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kortestc
-; CHECK: kortestw
-; CHECK: sbbl
 define i32 @test_kortestc(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kortestc:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k0
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    kortestw %k0, %k1
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
   %res = call i32 @llvm.x86.avx512.kortestc.w(i16 %a0, i16 %a1)
   ret i32 %res
 }
 
 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
-; CHECK-LABEL: test_kand
-; CHECK: kandw
-; CHECK: kandw
 define i16 @test_kand(i16 %a0, i16 %a1) {
+; CHECK-LABEL: test_kand:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movw $8, %ax
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    kandw %k0, %k1, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    kandw %k1, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
   %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
   ret i16 %t2
 }
 
 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
-; CHECK-LABEL: test_knot
-; CHECK: knotw
 define i16 @test_knot(i16 %a0) {
+; CHECK-LABEL: test_knot:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    knotw %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
   ret i16 %res
 }
 
 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
 
-; CHECK-LABEL: unpckbw_test
-; CHECK: kunpckbw
-; CHECK:ret
 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
+; CHECK-LABEL: unpckbw_test:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    kunpckbw %k1, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
   ret i16 %res
 }
 
 define <16 x float> @test_rcp_ps_512(<16 x float> %a0) {
-  ; CHECK: vrcp14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4c,0xc0]
+; CHECK-LABEL: test_rcp_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrcp14ps %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
 
 define <8 x double> @test_rcp_pd_512(<8 x double> %a0) {
-  ; CHECK: vrcp14pd {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x4c,0xc0]
+; CHECK-LABEL: test_rcp_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrcp14pd %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
@@ -63,7 +97,10 @@ declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i
 declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
 
 define <8 x double> @test7(<8 x double> %a) {
-; CHECK: vrndscalepd {{.*}}encoding: [0x62,0xf3,0xfd,0x48,0x09,0xc0,0x0b]
+; CHECK-LABEL: test7:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrndscalepd $11, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
   ret <8 x double>%res
 }
@@ -71,121 +108,246 @@ define <8 x double> @test7(<8 x double> %a) {
 declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
 
 define <16 x float> @test8(<16 x float> %a) {
-; CHECK: vrndscaleps {{.*}}encoding: [0x62,0xf3,0x7d,0x48,0x08,0xc0,0x0b]
+; CHECK-LABEL: test8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrndscaleps $11, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
   ret <16 x float>%res
 }
 
 define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) {
-  ; CHECK: vrsqrt14ps {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x4e,0xc0]
+; CHECK-LABEL: test_rsqrt_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrsqrt14ps %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
 
 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
-  ; CHECK: vrsqrt14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4f,0xc0]
+; CHECK-LABEL: test_rsqrt14_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrsqrt14ss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.avx512.rsqrt14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <4 x float> @test_rcp14_ss(<4 x float> %a0) {
-  ; CHECK: vrcp14ss {{.*}}encoding: [0x62,0xf2,0x7d,0x08,0x4d,0xc0]
+; CHECK-LABEL: test_rcp14_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vrcp14ss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.avx512.rcp14.ss(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
 
 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
-  ; CHECK-LABEL: test_sqrt_pd_512
-  ; CHECK: vsqrtpd
-  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4) 
+; CHECK-LABEL: test_sqrt_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4)
   ret <8 x double> %res
 }
 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
-  ; CHECK-LABEL: test_sqrt_ps_512
-  ; CHECK: vsqrtps
-  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 
+; CHECK-LABEL: test_sqrt_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtps %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
-  ; CHECK-LABEL: test_sqrt_round_ps_512
-  ; CHECK: vsqrtps {rz-sae}
-  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) 
+; CHECK-LABEL: test_sqrt_round_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
 
 define <8 x double> @test_getexp_pd_512(<8 x double> %a0) {
-  ; CHECK-LABEL: test_getexp_pd_512
-  ; CHECK: vgetexppd
-  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4) 
+; CHECK-LABEL: test_getexp_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vgetexppd %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 4)
   ret <8 x double> %res
 }
 define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) {
-  ; CHECK-LABEL: test_getexp_round_pd_512
-  ; CHECK: vgetexppd {sae}
-  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 8) 
+; CHECK-LABEL: test_getexp_round_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vgetexppd {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 -1, i32 8)
   ret <8 x double> %res
 }
 declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
 
 define <16 x float> @test_getexp_ps_512(<16 x float> %a0) {
-  ; CHECK-LABEL: test_getexp_ps_512
-  ; CHECK: vgetexpps
-  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) 
+; CHECK-LABEL: test_getexp_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vgetexpps %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) {
-  ; CHECK-LABEL: test_getexp_round_ps_512
-  ; CHECK: vgetexpps {sae}
-  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) 
+; CHECK-LABEL: test_getexp_round_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vgetexpps {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
 
-define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
-  ; CHECK: vsqrtss {{.*}}encoding: [0x62
-  %res = call <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_sqrt_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vsqrtss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vsqrtss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vsqrtss {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT:    vsqrtss {rz-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 2)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 3)
+
+  %res.1 = fadd <4 x float> %res0, %res1
+  %res.2 = fadd <4 x float> %res2, %res3
+  %res   = fadd <4 x float> %res.1, %res.2
   ret <4 x float> %res
 }
-declare <4 x float> @llvm.x86.avx512.sqrt.ss(<4 x float>, <4 x float>) nounwind readnone
 
-define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1) {
-  ; CHECK: vsqrtsd {{.*}}encoding: [0x62
-  %res = call <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_sqrt_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vsqrtsd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vsqrtsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vsqrtsd {ru-sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT:    vsqrtsd {rz-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddpd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 2)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 3)
+
+  %res.1 = fadd <2 x double> %res0, %res1
+  %res.2 = fadd <2 x double> %res2, %res3
+  %res   = fadd <2 x double> %res.1, %res.2
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.x86.avx512.sqrt.sd(<2 x double>, <2 x double>) nounwind readnone
 
 define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
-  ; CHECK: vcvtsd2si {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse2_cvtsd2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsd2si %xmm0, %rax
+; CHECK-NEXT:    retq
   %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
   ret i64 %res
 }
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 
 define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
-  ; CHECK: vcvtsi2sdq {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse2_cvtsi642sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2sdq %rdi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
 
-define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
-  ; CHECK: vcvttsd2si {{.*}}encoding: [0x62
-  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
-  ret i64 %res
+define i64 @test_x86_avx512_cvttsd2si64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttsd2si %xmm0, %rcx
+; CHECK-NEXT:    vcvttsd2si {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res0 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
 }
-declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32) nounwind readnone
 
+define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2usi:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttsd2usi %xmm0, %ecx
+; CHECK-NEXT:    vcvttsd2usi {sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttsd2si %xmm0, %ecx
+; CHECK-NEXT:    vcvttsd2si {sae}, %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
+
+
+
+define i64 @test_x86_avx512_cvttsd2usi64(<2 x double> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttsd2usi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttsd2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvttsd2usi {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res0 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32) nounwind readnone
 
 define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
-  ; CHECK: vcvtss2si {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse_cvtss2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtss2si %xmm0, %rax
+; CHECK-NEXT:    retq
   %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
   ret i64 %res
 }
@@ -193,37 +355,139 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
 
 
 define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
-  ; CHECK: vcvtsi2ssq {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_sse_cvtsi642ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsi2ssq %rdi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
 
 
-define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
-  ; CHECK: vcvttss2si {{.*}}encoding: [0x62
-  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
-  ret i64 %res
+define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2si:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttss2si {sae}, %xmm0, %ecx
+; CHECK-NEXT:    vcvttss2si %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
 }
-declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2si64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2si64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttss2si %xmm0, %rcx
+; CHECK-NEXT:    vcvttss2si {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res0 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2usi:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttss2usi {sae}, %xmm0, %ecx
+; CHECK-NEXT:    vcvttss2usi %xmm0, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    retq
+  %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
+  %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
+  %res2 = add i32 %res0, %res1
+  ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
+
+define i64 @test_x86_avx512_cvttss2usi64(<4 x float> %a0) {
+; CHECK-LABEL: test_x86_avx512_cvttss2usi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvttss2usi %xmm0, %rcx
+; CHECK-NEXT:    vcvttss2usi {sae}, %xmm0, %rax
+; CHECK-NEXT:    addq %rcx, %rax
+; CHECK-NEXT:    retq
+  %res0 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 4) ;
+  %res1 = call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %a0, i32 8) ;
+  %res2 = add i64 %res0, %res1
+  ret i64 %res2
+}
+declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone
 
 define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) {
-  ; CHECK: vcvtsd2usi {{.*}}encoding: [0x62
+; CHECK-LABEL: test_x86_avx512_cvtsd2usi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtsd2usi %xmm0, %rax
+; CHECK-NEXT:    retq
   %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1]
   ret i64 %res
 }
 declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone
 
 define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) {
-  ; CHECK: vcvtph2ps  %ymm0, %zmm0    ## encoding: [0x62,0xf2,0x7d,0x48,0x13,0xc0]
+; CHECK-LABEL: test_x86_vcvtph2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtph2ps %ymm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae(<16 x i16> %a0) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtph2ps {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrk(<16 x i16> %a0,<16 x float> %a1, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtph2ps %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> %a1, i16 %mask, i32 4)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_sae_rrkz(<16 x i16> %a0, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_sae_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtph2ps {sae}, %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 8)
+  ret <16 x float> %res
+}
+
+define <16 x float> @test_x86_vcvtph2ps_512_rrkz(<16 x i16> %a0, i16 %mask) {
+; CHECK-LABEL: test_x86_vcvtph2ps_512_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtph2ps %ymm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
+  ret <16 x float> %res
+}
+
 declare <16 x float> @llvm.x86.avx512.mask.vcvtph2ps.512(<16 x i16>, <16 x float>, i16, i32) nounwind readonly
 
 
 define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
-  ; CHECK: vcvtps2ph $2, %zmm0, %ymm0  ## encoding: [0x62,0xf3,0x7d,0x48,0x1d,0xc0,0x02]
+; CHECK-LABEL: test_x86_vcvtps2ph_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcvtps2ph $2, %zmm0, %ymm0
+; CHECK-NEXT:    retq
   %res = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
   ret <16 x i16> %res
 }
@@ -231,65 +495,124 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0) {
 declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
 
 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
-  ; CHECK: vbroadcastss
+; CHECK-LABEL: test_x86_vbroadcast_ss_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
 
 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
-  ; CHECK: vbroadcastsd
+; CHECK-LABEL: test_x86_vbroadcast_sd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
 
-define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0) {
-  ; CHECK: vbroadcastss
-  %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float> %a0) ; <<16 x float>> [#uses=1]
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.ps.512(<4 x float>) nounwind readonly
+define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_512:
+; CHECK: kmovw   %edi, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
 
-define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) {
-  ; CHECK: vbroadcastsd
-  %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double> %a0) ; <<8 x double>> [#uses=1]
-  ret <8 x double> %res
+  %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1) 
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask) 
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask) 
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
 }
-declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly
+declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
 
-define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32>  %a0) {
-  ; CHECK: vpbroadcastd
-  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1]
-  ret <16 x i32> %res
+
+define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_sd_pd_512:
+; CHECK: kmovw   %eax, %k1
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0
+
+  %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1) 
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask) 
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask) 
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res2, %res3
+  ret <8 x double> %res4
 }
-declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly
+declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
+
+define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
+  %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
+  %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_x86_pbroadcastd_i32_512(i32  %a0) {
-  ; CHECK: vpbroadcastd
+; CHECK-LABEL: test_x86_pbroadcastd_i32_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1]
   ret <16 x i32> %res
 }
 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly
 
-define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) {
-  ; CHECK: vpbroadcastq
-  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1]
-  ret <8 x i64> %res
+define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
+  %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
+  %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
 }
-declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly
+declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) {
-  ; CHECK: vpbroadcastq
+; CHECK-LABEL: test_x86_pbroadcastq_i64_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1]
   ret <8 x i64> %res
 }
 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly
 
 define <16 x i32> @test_conflict_d(<16 x i32> %a) {
-  ; CHECK: movw $-1, %ax
-  ; CHECK: vpxor
-  ; CHECK: vpconflictd
+; CHECK-LABEL: test_conflict_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpconflictd %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
@@ -297,9 +620,10 @@ define <16 x i32> @test_conflict_d(<16 x i32> %a) {
 declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
 
 define <8 x i64> @test_conflict_q(<8 x i64> %a) {
-  ; CHECK: movb $-1, %al
-  ; CHECK: vpxor
-  ; CHECK: vpconflictq
+; CHECK-LABEL: test_conflict_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpconflictq %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
@@ -307,21 +631,32 @@ define <8 x i64> @test_conflict_q(<8 x i64> %a) {
 declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
 
 define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) {
-  ; CHECK: vpconflictd
+; CHECK-LABEL: test_maskz_conflict_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpconflictd %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ; CHECK: vpconflictq
+; CHECK-LABEL: test_mask_conflict_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
-  ; CHECK: movw $-1, %ax
-  ; CHECK: vpxor
-  ; CHECK: vplzcntd
+; CHECK-LABEL: test_lzcnt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vplzcntd %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
@@ -329,9 +664,10 @@ define <16 x i32> @test_lzcnt_d(<16 x i32> %a) {
 declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
 
 define <8 x i64> @test_lzcnt_q(<8 x i64> %a) {
-  ; CHECK: movb $-1, %al
-  ; CHECK: vpxor
-  ; CHECK: vplzcntq
+; CHECK-LABEL: test_lzcnt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vplzcntq %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
@@ -340,37 +676,34 @@ declare <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64>, <8 x i64>, i8) no
 
 
 define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-  ; CHECK: vplzcntd
+; CHECK-LABEL: test_mask_lzcnt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vplzcntd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ; CHECK: vplzcntq
+; CHECK-LABEL: test_mask_lzcnt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret <8 x i64> %res
 }
 
-define <16 x i32> @test_ctlz_d(<16 x i32> %a) {
-  ; CHECK-LABEL: test_ctlz_d
-  ; CHECK: vplzcntd
-  %res = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %a, i1 false)
-  ret <16 x i32> %res
-}
-
-declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) nounwind readonly
-
-define <8 x i64> @test_ctlz_q(<8 x i64> %a) {
-  ; CHECK-LABEL: test_ctlz_q
-  ; CHECK: vplzcntq
-  %res = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %a, i1 false)
-  ret <8 x i64> %res
-}
-
-declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1) nounwind readonly
-
 define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
-  ; CHECK: vblendmps %zmm1, %zmm0
+; CHECK-LABEL: test_x86_mask_blend_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float> %a1, <16 x float> %a2, i16 %a0) ; <<16 x float>> [#uses=1]
   ret <16 x float> %res
 }
@@ -378,14 +711,23 @@ define <16 x float> @test_x86_mask_blend_ps_512(i16 %a0, <16 x float> %a1, <16 x
 declare <16 x float> @llvm.x86.avx512.mask.blend.ps.512(<16 x float>, <16 x float>, i16) nounwind readonly
 
 define <8 x double> @test_x86_mask_blend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
-  ; CHECK: vblendmpd %zmm1, %zmm0
+; CHECK-LABEL: test_x86_mask_blend_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a1, <8 x double> %a2, i8 %a0) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
 }
 
 define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x double>* %ptr, i8 %mask) {
-  ; CHECK-LABEL: test_x86_mask_blend_pd_512_memop
-  ; CHECK: vblendmpd (%
+; CHECK-LABEL: test_x86_mask_blend_pd_512_memop:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vblendmpd (%rdi), %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %b = load <8 x double>, <8 x double>* %ptr
   %res = call <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double> %a, <8 x double> %b, i8 %mask) ; <<8 x double>> [#uses=1]
   ret <8 x double> %res
@@ -393,28 +735,45 @@ define <8 x double> @test_x86_mask_blend_pd_512_memop(<8 x double> %a, <8 x doub
 declare <8 x double> @llvm.x86.avx512.mask.blend.pd.512(<8 x double>, <8 x double>, i8) nounwind readonly
 
 define <16 x i32> @test_x86_mask_blend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) {
-  ; CHECK: vpblendmd
+; CHECK-LABEL: test_x86_mask_blend_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpblendmd %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32> %a1, <16 x i32> %a2, i16 %a0) ; <<16 x i32>> [#uses=1]
   ret <16 x i32> %res
 }
 declare <16 x i32> @llvm.x86.avx512.mask.blend.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly
 
 define <8 x i64> @test_x86_mask_blend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
-  ; CHECK: vpblendmq
+; CHECK-LABEL: test_x86_mask_blend_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpblendmq %zmm1, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64> %a1, <8 x i64> %a2, i8 %a0) ; <<8 x i64>> [#uses=1]
   ret <8 x i64> %res
 }
 declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly
 
  define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
- ;CHECK: vcmpleps {sae}{{.*}}encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
+; CHECK-LABEL: test_cmpps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
    %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
    ret i16 %res
  }
  declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
 
  define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
- ;CHECK: vcmpneqpd %zmm{{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
+; CHECK-LABEL: test_cmppd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
    %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
    ret i8 %res
  }
@@ -422,7 +781,10 @@ declare <8 x i64> @llvm.x86.avx512.mask.blend.q.512(<8 x i64>, <8 x i64>, i8) no
 
  ; fp min - max
 define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) {
-  ; CHECK: vmaxpd
+; CHECK-LABEL: test_vmaxpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double>zeroinitializer, i8 -1, i32 4)
   ret <8 x double> %res
@@ -431,7 +793,10 @@ declare <8 x double> @llvm.x86.avx512.mask.max.pd.512(<8 x double>, <8 x double>
                     <8 x double>, i8, i32)
 
 define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) {
-  ; CHECK: vminpd
+; CHECK-LABEL: test_vminpd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vminpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double>zeroinitializer, i8 -1, i32 4)
   ret <8 x double> %res
@@ -441,11 +806,14 @@ declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>
 
  declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsd{{.*}}{%k1} 
 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pabs_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpabsd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpabsd %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
   %res2 = add <16 x i32> %res, %res1
@@ -454,11 +822,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32>
 
 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsq{{.*}}{%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pabs_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpabsq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpabsq %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -466,21 +838,33 @@ define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x
 }
 
 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1) {
-  ; CHECK: vptestmq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
+; CHECK-LABEL: test_vptestmq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vptestmq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
   ret i8 %res
 }
 declare i8 @llvm.x86.avx512.mask.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
 
 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK: vptestmd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
+; CHECK-LABEL: test_vptestmd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vptestmd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
   ret i16 %res
 }
 declare i16 @llvm.x86.avx512.mask.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
 
 define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
-; CHECK: vmovups {{.*}}encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
+; CHECK-LABEL: test_store1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovups %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
   call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
   ret void
 }
@@ -488,7 +872,11 @@ define void @test_store1(<16 x float> %data, i8* %ptr, i16 %mask) {
 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
 
 define void @test_store2(<8 x double> %data, i8* %ptr, i8 %mask) {
-; CHECK: vmovupd {{.*}}encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
+; CHECK-LABEL: test_store2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovupd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
   call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
   ret void
 }
@@ -565,32 +953,45 @@ declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
 
 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
 ; CHECK-LABEL: test_valign_q:
-; CHECK: valignq $2, %zmm1, %zmm0, %zmm0
-  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> zeroinitializer, i8 -1)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    valignq $2, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
 ; CHECK-LABEL: test_mask_valign_q:
-; CHECK: valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
-  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i8 2, <8 x i64> %src, i8 %mask)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    valignq $2, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
   ret <8 x i64> %res
 }
 
-declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i8, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
 
 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
 ; CHECK-LABEL: test_maskz_valign_d:
-; CHECK: valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
-  %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i8 5, <16 x i32> zeroinitializer, i16 %mask)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
 
-declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i8, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
 
 define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
- ; CHECK-LABEL: test_mask_store_ss
- ; CHECK: vmovss %xmm0, (%rdi) {%k1}     ## encoding: [0x62,0xf1,0x7e,0x09,0x11,0x07]
+; CHECK-LABEL: test_mask_store_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovss %xmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
  call void @llvm.x86.avx512.mask.store.ss(i8* %ptr, <4 x float> %data, i8 %mask)
  ret void
 }
@@ -598,15 +999,22 @@ define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
 declare void @llvm.x86.avx512.mask.store.ss(i8*, <4 x float>, i8 )
 
 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: test_pcmpeq_d
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpeq_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
   ret i16 %res
 }
 
 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_d
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpeq_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
   ret i16 %res
 }
@@ -614,15 +1022,23 @@ define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
 
 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
-; CHECK-LABEL: test_pcmpeq_q
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpeq_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
   ret i8 %res
 }
 
 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_q
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpeq_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret i8 %res
 }
@@ -630,15 +1046,22 @@ define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
 
 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
-; CHECK-LABEL: test_pcmpgt_d
-; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpgt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
   ret i16 %res
 }
 
 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_d
-; CHECK: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpgt_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
   ret i16 %res
 }
@@ -646,15 +1069,23 @@ define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
 
 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
-; CHECK-LABEL: test_pcmpgt_q
-; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_pcmpgt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
   ret i8 %res
 }
 
 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_q
-; CHECK: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_pcmpgt_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   ret i8 %res
 }
@@ -662,58 +1093,95 @@ define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
 
 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK_LABEL: test_cmp_d_512
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_cmp_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %esi
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    vmovd %r8d, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
   %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
   %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
   %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
   %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
   %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
   %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
   %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-; CHECK_LABEL: test_mask_cmp_d_512
-; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_cmp_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %esi
+; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnled %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    vmovd %r8d, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
   %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
   %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
   %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
   %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
   %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
   %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
@@ -722,58 +1190,95 @@ define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask)
 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
 
 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK_LABEL: test_ucmp_d_512
-; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_ucmp_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpequd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %esi
+; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    vmovd %r8d, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-; CHECK_LABEL: test_mask_ucmp_d_512
-; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_ucmp_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpcmpequd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %esi
+; CHECK-NEXT:    vpcmpnequd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordud %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    vmovd %r8d, %xmm0
+; CHECK-NEXT:    vpinsrw $1, %r9d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $2, %r10d, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $3, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $4, %edi, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    vpinsrw $7, %edx, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
-; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
-; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
-; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
-; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
-; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
-; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
-; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
@@ -782,58 +1287,112 @@ define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask
 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
 
 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
-; CHECK_LABEL: test_cmp_q_512
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_cmp_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r11d
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    movzbl %r8b, %esi
+; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r9b, %esi
+; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r10b, %esi
+; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r11b, %esi
+; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dil, %esi
+; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-; CHECK_LABEL: test_mask_cmp_q_512
-; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_cmp_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunordq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r11d
+; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmpordq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    movzbl %r8b, %esi
+; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r9b, %esi
+; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r10b, %esi
+; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r11b, %esi
+; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dil, %esi
+; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
@@ -842,58 +1401,112 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
-; CHECK_LABEL: test_ucmp_q_512
-; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
+; CHECK-LABEL: test_ucmp_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpequq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %r11d
+; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    movzbl %r8b, %esi
+; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r9b, %esi
+; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r10b, %esi
+; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r11b, %esi
+; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dil, %esi
+; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-; CHECK_LABEL: test_mask_ucmp_q_512
-; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
+; CHECK-LABEL: test_mask_ucmp_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpcmpequq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r8d
+; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r9d
+; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r10d
+; CHECK-NEXT:    vpcmpunorduq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %r11d
+; CHECK-NEXT:    vpcmpnequq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %ecx
+; CHECK-NEXT:    vpcmporduq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %edx
+; CHECK-NEXT:    movzbl %r8b, %esi
+; CHECK-NEXT:    vpinsrb $0, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r9b, %esi
+; CHECK-NEXT:    vpinsrb $2, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r10b, %esi
+; CHECK-NEXT:    vpinsrb $4, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %r11b, %esi
+; CHECK-NEXT:    vpinsrb $6, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dil, %esi
+; CHECK-NEXT:    vpinsrb $8, %esi, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
-; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
-; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
-; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
-; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
-; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
-; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
-; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
@@ -903,57 +1516,77 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounw
 
 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
 ; CHECK-LABEL: test_mask_vextractf32x4:
-; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
-  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vextractf32x4 $2, %zmm1, %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
   ret <4 x float> %res
 }
 
-declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
 
 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
 ; CHECK-LABEL: test_mask_vextracti64x4:
-; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
-  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vextracti64x4 $2, %zmm1, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
   ret <4 x i64> %res
 }
 
-declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
 
 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
 ; CHECK-LABEL: test_maskz_vextracti32x4:
-; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
-  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
   ret <4 x i32> %res
 }
 
-declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
 
 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
 ; CHECK-LABEL: test_vextractf64x4:
-; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
-  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vextractf64x4 $2, %zmm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
   ret <4 x double> %res
 }
 
-declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
 
 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_pslli_d
-  ; CHECK: vpslld
+; CHECK-LABEL: test_x86_avx512_pslli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpslld $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_pslli_d
-  ; CHECK: vpslld $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_pslli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpslld $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_d
-  ; CHECK: vpslld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_pslli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpslld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -961,22 +1594,33 @@ define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_pslli_q
-  ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_avx512_pslli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsllq $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_pslli_q
-  ; CHECK: vpsllq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_pslli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_pslli_q
-  ; CHECK: vpsllq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_pslli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -984,22 +1628,31 @@ define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_psrli_d
-  ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_avx512_psrli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrld $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrli_d
-  ; CHECK: vpsrld $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrld $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_d
-  ; CHECK: vpsrld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrli_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrld $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1007,22 +1660,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_psrli_q
-  ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_avx512_psrli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrlq $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrli_q
-  ; CHECK: vpsrlq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrli_q
-  ; CHECK: vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrli_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1030,22 +1694,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_psrai_d
-  ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_avx512_psrai_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrad $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrai_d
-  ; CHECK: vpsrad $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrai_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrad $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_d
-  ; CHECK: vpsrad $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrai_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrad $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1053,22 +1726,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
-  ; CHECK-LABEL: test_x86_avx512_psrai_q
-  ; CHECK: vpsraq
+; CHECK-LABEL: test_x86_avx512_psrai_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsraq $7, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrai_q
-  ; CHECK: vpsraq $7, %zmm0, %zmm1 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrai_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsraq $7, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrai_q
-  ; CHECK: vpsraq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrai_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsraq $7, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1076,22 +1760,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psll_d
-  ; CHECK: vpslld
+; CHECK-LABEL: test_x86_avx512_psll_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpslld %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psll_d
-  ; CHECK: vpslld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psll_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpslld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psll_d
-  ; CHECK: vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psll_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpslld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1099,22 +1792,33 @@ define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i
 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psll_q
-  ; CHECK: vpsllq
+; CHECK-LABEL: test_x86_avx512_psll_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psll_q
-  ; CHECK: vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psll_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psll_q
-  ; CHECK: vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psll_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1122,22 +1826,31 @@ define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8
 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrl_d
-  ; CHECK: vpsrld
+; CHECK-LABEL: test_x86_avx512_psrl_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrl_d
-  ; CHECK: vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrl_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrld %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_d
-  ; CHECK: vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrl_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1145,22 +1858,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i
 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrl_q
-  ; CHECK: vpsrlq
+; CHECK-LABEL: test_x86_avx512_psrl_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrl_q
-  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrl_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrl_q
-  ; CHECK: vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrl_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1168,22 +1892,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8
 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psra_d
-  ; CHECK: vpsrad
+; CHECK-LABEL: test_x86_avx512_psra_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psra_d
-  ; CHECK: vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psra_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrad %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psra_d
-  ; CHECK: vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psra_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1191,22 +1924,33 @@ define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i
 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psra_q
-  ; CHECK: vpsraq
+; CHECK-LABEL: test_x86_avx512_psra_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psra_q
-  ; CHECK: vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psra_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsraq %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psra_q
-  ; CHECK: vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psra_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1214,22 +1958,31 @@ define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8
 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psllv_d
-  ; CHECK: vpsllvd
+; CHECK-LABEL: test_x86_avx512_psllv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psllv_d
-  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psllv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsllvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_d
-  ; CHECK: vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psllv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1237,22 +1990,33 @@ define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1,
 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psllv_q
-  ; CHECK: vpsllvq
+; CHECK-LABEL: test_x86_avx512_psllv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psllv_q
-  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psllv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psllv_q
-  ; CHECK: vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psllv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1261,22 +2025,31 @@ declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>,
 
 
 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrav_d
-  ; CHECK: vpsravd
+; CHECK-LABEL: test_x86_avx512_psrav_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrav_d
-  ; CHECK: vpsravd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrav_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsravd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_d
-  ; CHECK: vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrav_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1284,22 +2057,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1,
 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrav_q
-  ; CHECK: vpsravq
+; CHECK-LABEL: test_x86_avx512_psrav_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsravq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrav_q
-  ; CHECK: vpsravq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrav_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsravq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrav_q
-  ; CHECK: vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrav_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1307,22 +2091,31 @@ define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8
 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
 
 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrlv_d
-  ; CHECK: vpsrlvd
+; CHECK-LABEL: test_x86_avx512_psrlv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d
-  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrlv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d
-  ; CHECK: vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrlv_d:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   ret <16 x i32> %res
 }
@@ -1330,22 +2123,33 @@ define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1,
 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
-  ; CHECK-LABEL: test_x86_avx512_psrlv_q
-  ; CHECK: vpsrlvq
+; CHECK-LABEL: test_x86_avx512_psrlv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q
-  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_x86_avx512_mask_psrlv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   ret <8 x i64> %res
 }
 
 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q
-  ; CHECK: vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_x86_avx512_maskz_psrlv_q:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   ret <8 x i64> %res
 }
@@ -1353,8 +2157,10 @@ define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8
 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
 
 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
-  ; CHECK-LABEL: test_x86_avx512_psrlv_q_memop
-  ; CHECK: vpsrlvq (%
+; CHECK-LABEL: test_x86_avx512_psrlv_q_memop:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsrlvq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr
   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret <8 x i64> %res
@@ -1365,64 +2171,80 @@ declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>
 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
 
 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vsubps_rn
-  ; CHECK: vsubps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 0)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vsubps_rd
-  ; CHECK: vsubps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 1)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vsubps_ru
-  ; CHECK: vsubps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vsubps_rz
-  ; CHECK: vsubps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
+; CHECK-LABEL: test_vsubps_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vmulps_rn
-  ; CHECK: vmulps {rn-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 0)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vmulps_rd
-  ; CHECK: vmulps {rd-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 1)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vmulps_ru
-  ; CHECK: vmulps {ru-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
-  ; CHECK-LABEL: test_vmulps_rz
-  ; CHECK: vmulps {rz-sae}{{.*}} ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
@@ -1430,32 +2252,44 @@ define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
 
 ;; mask float
 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_rn
-  ; CHECK: vmulps {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_rd
-  ; CHECK: vmulps {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_ru
-  ; CHECK: vmulps {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_rz
-  ; CHECK: vmulps {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
+; CHECK-LABEL: test_vmulps_mask_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> zeroinitializer, i16 %mask, i32 3)
   ret <16 x float> %res
@@ -1463,32 +2297,48 @@ define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16
 
 ;; With Passthru value
 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_passthru_rn
-  ; CHECK: vmulps {rn-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> %passthru, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_passthru_rd
-  ; CHECK: vmulps {rd-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> %passthru, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_passthru_ru
-  ; CHECK: vmulps {ru-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> %passthru, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
-  ; CHECK-LABEL: test_vmulps_mask_passthru_rz
-  ; CHECK: vmulps {rz-sae}{{.*}}{%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
+; CHECK-LABEL: test_vmulps_mask_passthru_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
                     <16 x float> %passthru, i16 %mask, i32 3)
   ret <16 x float> %res
@@ -1496,47 +2346,69 @@ define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float>
 
 ;; mask double
 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_vmulpd_mask_rn
-  ; CHECK: vmulpd {rn-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double> zeroinitializer, i8 %mask, i32 0)
   ret <8 x double> %res
 }
 
 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_vmulpd_mask_rd
-  ; CHECK: vmulpd {rd-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double> zeroinitializer, i8 %mask, i32 1)
   ret <8 x double> %res
 }
 
 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_vmulpd_mask_ru
-  ; CHECK: vmulpd {ru-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double> zeroinitializer, i8 %mask, i32 2)
   ret <8 x double> %res
 }
 
 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
-  ; CHECK-LABEL: test_vmulpd_mask_rz
-  ; CHECK: vmulpd {rz-sae}{{.*}}{%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
+; CHECK-LABEL: test_vmulpd_mask_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
                     <8 x double> zeroinitializer, i8 %mask, i32 3)
   ret <8 x double> %res
 }
 
 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_xor_epi32
-  ;CHECK: vpxord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xef,0xc1]
+; CHECK-LABEL: test_xor_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_xor_epi32
-  ;CHECK: vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
+; CHECK-LABEL: test_mask_xor_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpxord %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
@@ -1544,15 +2416,21 @@ define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %
 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_or_epi32
-  ;CHECK: vpord {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xeb,0xc1]
+; CHECK-LABEL: test_or_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_or_epi32
-  ;CHECK: vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
+; CHECK-LABEL: test_mask_or_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
@@ -1560,15 +2438,21 @@ define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %p
 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_and_epi32
-  ;CHECK: vpandd {{.*}}encoding: [0x62,0xf1,0x7d,0x48,0xdb,0xc1]
+; CHECK-LABEL: test_and_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_and_epi32
-  ;CHECK: vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
+; CHECK-LABEL: test_mask_and_epi32:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpandd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
@@ -1576,15 +2460,22 @@ define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %
 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
-  ;CHECK-LABEL: test_xor_epi64
-  ;CHECK: vpxorq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
+; CHECK-LABEL: test_xor_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_xor_epi64
-  ;CHECK: vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
+; CHECK-LABEL: test_mask_xor_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
@@ -1592,15 +2483,22 @@ define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %pass
 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
-  ;CHECK-LABEL: test_or_epi64
-  ;CHECK: vporq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
+; CHECK-LABEL: test_or_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_or_epi64
-  ;CHECK: vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
+; CHECK-LABEL: test_mask_or_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
@@ -1608,15 +2506,22 @@ define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passT
 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
-  ;CHECK-LABEL: test_and_epi64
-  ;CHECK: vpandq {{.*}}encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
+; CHECK-LABEL: test_and_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_and_epi64
-  ;CHECK: vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
+; CHECK-LABEL: test_mask_and_epi64:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
@@ -1625,53 +2530,73 @@ declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i6
 
 
 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_add_epi32_rr
-  ;CHECK: vpaddd %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
+; CHECK-LABEL: test_mask_add_epi32_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rrk
-  ;CHECK: vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
+; CHECK-LABEL: test_mask_add_epi32_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rrkz
-  ;CHECK: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
+; CHECK-LABEL: test_mask_add_epi32_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_add_epi32_rm
-  ;CHECK: vpaddd (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rmk
-  ;CHECK: vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
+; CHECK-LABEL: test_mask_add_epi32_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rmkz
-  ;CHECK: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
-  ;CHECK-LABEL: test_mask_add_epi32_rmb
-  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1680,8 +2605,12 @@ define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
 }
 
 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rmbk
-  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
+; CHECK-LABEL: test_mask_add_epi32_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1690,8 +2619,11 @@ define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
 }
 
 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi32_rmbkz
-  ;CHECK: vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
+; CHECK-LABEL: test_mask_add_epi32_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1702,53 +2634,73 @@ define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %ma
 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rr
-  ;CHECK: vpsubd %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
+; CHECK-LABEL: test_mask_sub_epi32_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rrk
-  ;CHECK: vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
+; CHECK-LABEL: test_mask_sub_epi32_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rrkz
-  ;CHECK: vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
+; CHECK-LABEL: test_mask_sub_epi32_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rm
-  ;CHECK: vpsubd (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubd (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rmk
-  ;CHECK: vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
+; CHECK-LABEL: test_mask_sub_epi32_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpsubd (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rmkz
-  ;CHECK: vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rmb
-  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1757,8 +2709,12 @@ define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
 }
 
 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rmbk
-  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
+; CHECK-LABEL: test_mask_sub_epi32_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1767,8 +2723,11 @@ define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i3
 }
 
 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi32_rmbkz
-  ;CHECK: vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
+; CHECK-LABEL: test_mask_sub_epi32_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -1779,53 +2738,77 @@ define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %ma
 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
-  ;CHECK-LABEL: test_mask_add_epi64_rr
-  ;CHECK: vpaddq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
+; CHECK-LABEL: test_mask_add_epi64_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rrk
-  ;CHECK: vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
+; CHECK-LABEL: test_mask_add_epi64_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rrkz
-  ;CHECK: vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
+; CHECK-LABEL: test_mask_add_epi64_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_add_epi64_rm
-  ;CHECK: vpaddq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rmk
-  ;CHECK: vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
+; CHECK-LABEL: test_mask_add_epi64_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rmkz
-  ;CHECK: vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
-  ;CHECK-LABEL: test_mask_add_epi64_rmb
-  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1834,8 +2817,13 @@ define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
 }
 
 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rmbk
-  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
+; CHECK-LABEL: test_mask_add_epi64_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1844,8 +2832,12 @@ define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
 }
 
 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_add_epi64_rmbkz
-  ;CHECK: vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
+; CHECK-LABEL: test_mask_add_epi64_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1856,53 +2848,77 @@ define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask)
 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rr
-  ;CHECK: vpsubq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
+; CHECK-LABEL: test_mask_sub_epi64_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rrk
-  ;CHECK: vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
+; CHECK-LABEL: test_mask_sub_epi64_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rrkz
-  ;CHECK: vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
+; CHECK-LABEL: test_mask_sub_epi64_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rm
-  ;CHECK: vpsubq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rmk
-  ;CHECK: vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
+; CHECK-LABEL: test_mask_sub_epi64_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rmkz
-  ;CHECK: vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <8 x i64>, <8 x i64>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rmb
-  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1911,8 +2927,13 @@ define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
 }
 
 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rmbk
-  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
+; CHECK-LABEL: test_mask_sub_epi64_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1921,8 +2942,12 @@ define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64>
 }
 
 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_sub_epi64_rmbkz
-  ;CHECK: vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
+; CHECK-LABEL: test_mask_sub_epi64_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1933,53 +2958,77 @@ define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask)
 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rr
-  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0     ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
+; CHECK-LABEL: test_mask_mul_epi32_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rrk
-  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
+; CHECK-LABEL: test_mask_mul_epi32_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rrkz
-  ;CHECK: vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
+; CHECK-LABEL: test_mask_mul_epi32_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rm
-  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0    ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rmk
-  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
+; CHECK-LABEL: test_mask_mul_epi32_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rmkz
-  ;CHECK: vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rmb
-  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -1989,8 +3038,13 @@ define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
 }
 
 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rmbk
-  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
+; CHECK-LABEL: test_mask_mul_epi32_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2000,8 +3054,12 @@ define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
 }
 
 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epi32_rmbkz
-  ;CHECK: vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
+; CHECK-LABEL: test_mask_mul_epi32_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2013,53 +3071,77 @@ define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
 
 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rr
-  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
+; CHECK-LABEL: test_mask_mul_epu32_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rrk
-  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
+; CHECK-LABEL: test_mask_mul_epu32_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rrkz
-  ;CHECK: vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
+; CHECK-LABEL: test_mask_mul_epu32_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rm
-  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rm:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rmk
-  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
+; CHECK-LABEL: test_mask_mul_epu32_rmk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rmkz
-  ;CHECK: vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   ret < 8 x i64> %res
 }
 
 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rmb
-  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmb:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2069,8 +3151,13 @@ define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
 }
 
 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rmbk
-  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
+; CHECK-LABEL: test_mask_mul_epu32_rmbk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2080,8 +3167,12 @@ define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64>
 }
 
 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
-  ;CHECK-LABEL: test_mask_mul_epu32_rmbkz
-  ;CHECK: vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
+; CHECK-LABEL: test_mask_mul_epu32_rmbkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i64, i64* %ptr_b
   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
@@ -2093,53 +3184,73 @@ define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask
 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
 
 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rr_512
-  ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
+; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rrk_512
-  ;CHECK: vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
+; CHECK-LABEL: test_mask_mullo_epi32_rrk_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rrkz_512
-  ;CHECK: vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
+; CHECK-LABEL: test_mask_mullo_epi32_rrkz_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rm_512
-  ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rm_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmulld (%rdi), %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rmk_512
-  ;CHECK: vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
+; CHECK-LABEL: test_mask_mullo_epi32_rmk_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmulld (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rmkz_512
-  ;CHECK: vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmkz_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   ret < 16 x i32> %res
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rmb_512
-  ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2148,8 +3259,12 @@ define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rmbk_512
-  ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
+; CHECK-LABEL: test_mask_mullo_epi32_rmbk_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovaps %zmm1, %zmm0
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2158,8 +3273,11 @@ define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <1
 }
 
 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
-  ;CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512
-  ;CHECK: vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
+; CHECK-LABEL: test_mask_mullo_epi32_rmbkz_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -2170,359 +3288,515 @@ define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae
-  ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae
-  ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae
-  ;CHECK: vaddps  {ru-sae}, %zmm1, %zmm0, %zmm0  {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae
-  ;CHECK: vaddps  {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_add_round_ps_current
-  ;CHECK: vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_add_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae
-  ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae
-  ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae
-  ;CHECK: vaddps  {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae
-  ;CHECK: vaddps  {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_add_round_ps_current
-  ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_add_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_add_round_ps_rn_sae
-  ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_add_round_ps_rd_sae
-  ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_add_round_ps_ru_sae
-  ;CHECK: vaddps  {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_add_round_ps_rz_sae
-  ;CHECK: vaddps  {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_add_round_ps_current
-  ;CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_add_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 
 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae
-  ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae
-  ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae
-  ;CHECK: vsubps  {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae
-  ;CHECK: vsubps  {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current
-  ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_sub_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vsubps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_sub_round_ps_rn_sae
-  ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_sub_round_ps_rd_sae
-  ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_sub_round_ps_ru_sae
-  ;CHECK: vsubps  {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_sub_round_ps_rz_sae
-  ;CHECK: vsubps  {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_sub_round_ps_current
-  ;CHECK: vsubps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_sub_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vsubps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae
-  ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae
-  ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae
-  ;CHECK: vdivps  {ru-sae}, %zmm1, %zmm0, %zmm0  {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae
-  ;CHECK: vdivps  {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_div_round_ps_current
-  ;CHECK: vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_div_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae
-  ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae
-  ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae
-  ;CHECK: vdivps  {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae
-  ;CHECK: vdivps  {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_div_round_ps_current
-  ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_div_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdivps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 
 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_div_round_ps_rn_sae
-  ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_div_round_ps_rd_sae
-  ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   ret <16 x float> %res
 }
 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_div_round_ps_ru_sae
-  ;CHECK: vdivps  {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_div_round_ps_rz_sae
-  ;CHECK: vdivps  {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_div_round_ps_current
-  ;CHECK: vdivps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_div_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vdivps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 
 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_min_round_ps_sae
-  ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_min_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_min_round_ps_current
-  ;CHECK: vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_min_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_min_round_ps_sae
-  ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_min_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_min_round_ps_current
-  ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_min_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vminps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_min_round_ps_sae
-  ;CHECK: vminps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_min_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_min_round_ps_current
-  ;CHECK: vminps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_min_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vminps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
 
 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_max_round_ps_sae
-  ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_max_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_maskz_max_round_ps_current
-  ;CHECK: vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-LABEL: test_mm512_maskz_max_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_max_round_ps_sae
-  ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-LABEL: test_mm512_mask_max_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_mask_max_round_ps_current
-  ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1} 
+; CHECK-LABEL: test_mm512_mask_max_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_max_round_ps_sae
-  ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 
+; CHECK-LABEL: test_mm512_max_round_ps_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
   ret <16 x float> %res
 }
 
 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
-  ;CHECK-LABEL: test_mm512_max_round_ps_current
-  ;CHECK: vmaxps %zmm1, %zmm0, %zmm0
+; CHECK-LABEL: test_mm512_max_round_ps_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   ret <16 x float> %res
 }
@@ -2531,50 +3805,81 @@ declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>
 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
 
 define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rn
-; CHECK: vaddss  {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 0)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rd
-; CHECK: vaddss  {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 1)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_ru
-; CHECK: vaddss  {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 2)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_rz
-; CHECK: vaddss  {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 3)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_ss_current
-; CHECK: vaddss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_ss_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_add_ss_rn
-; CHECK: vaddss  {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_add_ss_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 0)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_add_ss_rn
-; CHECK: vaddss  {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_add_ss_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 0)
   ret <4 x float> %res
 }
@@ -2582,50 +3887,81 @@ define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) {
 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
 
 define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rn
-; CHECK: vaddsd  {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 0)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rd
-; CHECK: vaddsd  {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd {rd-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 1)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_ru
-; CHECK: vaddsd  {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_ru:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd {ru-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 2)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_rz
-; CHECK: vaddsd  {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_rz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 3)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_add_sd_current
-; CHECK: vaddsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_add_sd_current:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_add_sd_rn
-; CHECK: vaddsd  {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_add_sd_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 0)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_add_sd_rn
-; CHECK: vaddsd  {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_add_sd_rn:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vaddsd {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 0)
   ret <2 x double> %res
 }
@@ -2633,86 +3969,130 @@ define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) {
 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
 
 define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_ss_sae
-; CHECK: vmaxss  {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_ss_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_ss_sae
-; CHECK: vmaxss  {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_ss_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxss {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_max_ss_sae
-; CHECK: vmaxss  {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_ss_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxss {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_ss
-; CHECK: vmaxss  %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_ss
-; CHECK: vmaxss  %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
   ret <4 x float> %res
 }
 
 define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) {
-; CHECK-LABEL: test_max_ss
-; CHECK: vmaxss  %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
   ret <4 x float> %res
 }
 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
 
 define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_sd_sae
-; CHECK: vmaxsd  {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_sd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_sd_sae
-; CHECK: vmaxsd  {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_sd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_max_sd_sae
-; CHECK: vmaxsd  {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_sd_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxsd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
-; CHECK-LABEL: test_mask_max_sd
-; CHECK: vmaxsd  %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-LABEL: test_mask_max_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
-; CHECK-LABEL: test_maskz_max_sd
-; CHECK: vmaxsd  %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-LABEL: test_maskz_max_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
   ret <2 x double> %res
 }
 
 define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
-; CHECK-LABEL: test_max_sd
-; CHECK: vmaxsd  %xmm1, %xmm0, %xmm0
+; CHECK-LABEL: test_max_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
   ret <2 x double> %res
 }
@@ -2720,8 +4100,8 @@ define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) {
 define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) {
 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
@@ -2730,8 +4110,8 @@ declare <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double>, i32, i32) nounwin
 define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) {
 ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1]
   ret <2 x double> %res
 }
@@ -2740,8 +4120,8 @@ declare <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double>, i64, i32) nounwin
 define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) {
 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
@@ -2750,8 +4130,8 @@ declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind
 define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) {
 ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
 }
@@ -2760,8 +4140,8 @@ declare <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float>, i64, i32) nounwind
 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
@@ -2770,9 +4150,9 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b)
 define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr)
 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movl (%rdi), %eax 
-; CHECK-NEXT:    vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %b = load i32, i32* %ptr
   %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1]
@@ -2782,8 +4162,8 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32*
 define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b)
 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2ssl %edi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
@@ -2793,7 +4173,7 @@ define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, i32* %ptr)
 ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vcvtusi2ssl (%rdi), %xmm0, %xmm0
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    retq
 {
   %b = load i32, i32* %ptr
   %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
@@ -2804,8 +4184,8 @@ declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind r
 define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
 ; CHECK-LABEL: _mm_cvt_roundu64_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
@@ -2814,8 +4194,8 @@ define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b)
 define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b)
 ; CHECK-LABEL: _mm_cvtu64_ss:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2ssq %rdi, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2ssq %rdi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1]
   ret <4 x float> %res
@@ -2825,8 +4205,8 @@ declare <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float>, i64, i32) nounwind
 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
 ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -2836,8 +4216,8 @@ declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind read
 define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
 ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -2846,8 +4226,8 @@ define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b)
 define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b)
 ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vcvtusi2sdq %rdi, %xmm0, %xmm0 
-; CHECK-NEXT:    retq 
+; CHECK-NEXT:    vcvtusi2sdq %rdi, %xmm0, %xmm0
+; CHECK-NEXT:    retq
 {
   %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1]
   ret <2 x double> %res
@@ -2855,7 +4235,10 @@ define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b
 declare <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double>, i64, i32) nounwind readnone
 
 define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
-  ; CHECK: vpmaxsq {{.*}}encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xc1]
+; CHECK-LABEL: test_vpmaxq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %a0, <8 x i64> %a1,
                     <8 x i64>zeroinitializer, i8 -1)
   ret <8 x i64> %res
@@ -2863,7 +4246,10 @@ define <8 x i64> @test_vpmaxq(<8 x i64> %a0, <8 x i64> %a1) {
 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
 define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK: vpminud {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xc1]
+; CHECK-LABEL: test_vpminud:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %a0, <16 x i32> %a1,
                     <16 x i32>zeroinitializer, i16 -1)
   ret <16 x i32> %res
@@ -2871,29 +4257,39 @@ define <16 x i32> @test_vpminud(<16 x i32> %a0, <16 x i32> %a1) {
 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
 define <16 x i32> @test_vpmaxsd(<16 x i32> %a0, <16 x i32> %a1) {
-  ; CHECK: vpmaxsd {{.*}}encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xc1]
+; CHECK-LABEL: test_vpmaxsd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %a0, <16 x i32> %a1,
                     <16 x i32>zeroinitializer, i16 -1)
   ret <16 x i32> %res
 }
 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512
-; CHECK-NOT: call 
-; CHECK: vpmaxsd %zmm
-; CHECK: {%k1} 
 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   %res2 = add <16 x i32> %res, %res1
   ret <16 x i32> %res2
 }
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512
-; CHECK-NOT: call 
-; CHECK: vpmaxsq %zmm
-; CHECK: {%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -2902,11 +4298,14 @@ define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %
 
 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512
-; CHECK-NOT: call 
-; CHECK: vpmaxud %zmm
-; CHECK: {%k1} 
 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmaxud %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   %res2 = add <16 x i32> %res, %res1
@@ -2915,11 +4314,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32
 
 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512
-; CHECK-NOT: call 
-; CHECK: vpmaxuq %zmm
-; CHECK: {%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -2928,11 +4331,14 @@ define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %
 
 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512
-; CHECK-NOT: call 
-; CHECK: vpminsd %zmm
-; CHECK: {%k1} 
 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmins_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   %res2 = add <16 x i32> %res, %res1
@@ -2941,22 +4347,29 @@ define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32
 
 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512
-; CHECK-NOT: call 
-; CHECK: vpminsq %zmm
-; CHECK: {%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmins_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpminsq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
   ret <8 x i64> %res2
 }
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512
-; CHECK-NOT: call 
-; CHECK: vpminud %zmm
-; CHECK: {%k1} 
 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pminu_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpminud %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   %res2 = add <16 x i32> %res, %res1
@@ -2965,11 +4378,15 @@ define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32
 
 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512
-; CHECK-NOT: call 
-; CHECK: vpminuq %zmm
-; CHECK: {%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pminu_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpminuq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -2978,24 +4395,34 @@ define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %
 
 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2d {{.*}}{%k1} 
-define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vpermi2d %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
   %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
-  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
   %res2 = add <16 x i32> %res, %res1
   ret <16 x i32> %res2
 }
 
 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK:  vpermi2pd {{.*}}{%k1} 
 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vaddpd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
   %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
   %res2 = fadd <8 x double> %res, %res1
@@ -3004,11 +4431,15 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
 
 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2ps {{.*}}{%k1} 
 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vaddps %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
   %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
   %res2 = fadd <16 x float> %res, %res1
@@ -3017,11 +4448,16 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
 
 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2q {{.*}}{%k1} 
 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermi2q %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -3030,37 +4466,54 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
 
 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2d {{.*}}{%k1} {z}
-define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm2
+; CHECK-NEXT:    vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpermt2d %zmm1, %zmm0, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
   %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
-  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
   %res2 = add <16 x i32> %res, %res1
   ret <16 x i32> %res2
 }
 
 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2pd {{.*}}{%k1} {z}
-define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm2
+; CHECK-NEXT:    vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm1
+; CHECK-NEXT:    vaddpd %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %x2s = load double, double* %x2ptr
+  %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
+  %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
   %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
-  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+  %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
   %res2 = fadd <8 x double> %res, %res1
   ret <8 x double> %res2
 }
 
 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2ps {{.*}}{%k1} {z}
 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vaddps %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
   %res2 = fadd <16 x float> %res, %res1
@@ -3070,11 +4523,16 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0,
 
 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
 
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2q {{.*}}{%k1} {z}
 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermt2q %zmm2, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpermt2q %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   %res2 = add <8 x i64> %res, %res1
@@ -3083,12 +4541,15 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x
 
 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2d {{.*}}{%k1}
-; CHECK-NOT: {z}
 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm1, %zmm3
+; CHECK-NEXT:    vpermt2d %zmm2, %zmm0, %zmm3 {%k1}
+; CHECK-NEXT:    vpermt2d %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpaddd %zmm1, %zmm3, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   %res2 = add <16 x i32> %res, %res1
@@ -3096,11 +4557,15 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
 }
 
 declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefpd{{.*}}{%k1} 
 define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vscalefpd {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vscalefpd {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3)
   %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0)
   %res2 = fadd <8 x double> %res, %res1
@@ -3108,13 +4573,1849 @@ define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8
 }
 
 declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefps{{.*}}{%k1} 
 define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vscalefps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vscalefps {rn-sae}, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
   %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2)
   %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0)
   %res2 = fadd <16 x float> %res, %res1
   ret <16 x float> %res2
 }
+
+declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
+; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vunpckhps {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
+; CHECK-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vunpcklps {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
+; CHECK-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[2],k1[2],zmm2[4],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} zmm3 = k1[0],zmm0[0],k1[2],zmm0[2],k1[4],zmm0[4],k1[6],zmm0[6]
+; CHECK-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpunpckhqdq {{.*#+}} zmm2 = zmm2[1],k1[1],zmm2[3],k1[3],zmm2[5],k1[5],zmm2[7],k1[7]
+; CHECK-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpunpckhdq {{.*#+}} zmm2 = zmm2[2],k1[2],zmm2[3],k1[3],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[14],k1[14],zmm2[15],k1[15]
+; CHECK-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpunpckldq {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[12],k1[12],zmm2[13],k1[13]
+; CHECK-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovqb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovqb %zmm0, (%rdi)
+; CHECK-NEXT:    vpmovqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovsqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovsqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovsqb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsqb %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovusqb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovusqb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovusqb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovusqb %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovqw %zmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovqw %zmm0, (%rdi)
+; CHECK-NEXT:    vpmovqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovsqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovsqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovsqw %zmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsqw %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovusqw %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovusqw %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovusqw %zmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovusqw %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovqd %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+    %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+    %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmov.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i32> %res0, %res1
+    %res4 = add <8 x i32> %res3, %res2
+    ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovqd %zmm0, (%rdi)
+; CHECK-NEXT:    vpmovqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovsqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovsqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovsqd %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+    %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+    %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i32> %res0, %res1
+    %res4 = add <8 x i32> %res3, %res2
+    ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsqd %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmovusqd %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovusqd %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovusqd %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+    %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+    %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i32> %res0, %res1
+    %res4 = add <8 x i32> %res3, %res2
+    ret <8 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(i8* %ptr, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovusqd %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusqd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(i8* %ptr, <8 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovdb %zmm0, (%rdi)
+; CHECK-NEXT:    vpmovdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmov.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovsdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovsdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovsdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsdb %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovusdb %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpmovusdb %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpmovusdb %zmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovusdb %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusdb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovdw %zmm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+    %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+    %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i16> %res0, %res1
+    %res4 = add <16 x i16> %res3, %res2
+    ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovdw %zmm0, (%rdi)
+; CHECK-NEXT:    vpmovdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovsdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovsdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovsdw %zmm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+    %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+    %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i16> %res0, %res1
+    %res4 = add <16 x i16> %res3, %res2
+    ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovsdw %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovsdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmovusdw %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpmovusdw %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpmovusdw %zmm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+    %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+    %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+    %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i16> %res0, %res1
+    %res4 = add <16 x i16> %res3, %res2
+    ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(i8* %ptr, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovusdw %zmm0, (%rdi)
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpmovusdw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(i8* %ptr, <16 x i32> %x1, i16 %x2)
+    ret void
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtdq2pd %ymm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtdq2ps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtdq2ps {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtdq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2dq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtpd2dq {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtpd2ps {ru-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 2)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2udq {ru-sae}, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtpd2udq {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtps2dq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtps2dq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtps2pd {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtps2udq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtps2udq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 2)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 0)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2dq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttpd2dq {sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtudq2pd %ymm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+
+declare <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32>, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtudq2ps %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtudq2ps {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.cvtudq2ps.512(<16 x i32> %x0, <16 x float> %x1, i16 -1, i32 0)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2udq %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttpd2udq {sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvttps2dq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttps2dq {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvttps2udq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttps2udq {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+
+declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32)
+define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32)
+define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
+; CHECK-LABEL: test_getexp_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vgetexpss %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vgetexpss {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vgetexpss {sae}, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT:    vgetexpss {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddps %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+
+  %res.1 = fadd <4 x float> %res0, %res1
+  %res.2 = fadd <4 x float> %res2, %res3
+  %res   = fadd <4 x float> %res.1, %res.2
+  ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
+; CHECK-LABEL: test_getexp_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vgetexpsd %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vgetexpsd %xmm1, %xmm0, %xmm4
+; CHECK-NEXT:    vgetexpsd {sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vgetexpsd {sae}, %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vaddpd %xmm2, %xmm3, %xmm1
+; CHECK-NEXT:    vaddpd %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
+
+  %res.1 = fadd <2 x double> %res0, %res1
+  %res.2 = fadd <2 x double> %res2, %res3
+  %res   = fadd <2 x double> %res.1, %res.2
+  ret <2 x double> %res
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
+
+define i8@test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcmpnltsd {sae}, %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    shlb $7, %al
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    retq
+
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+  ret i8 %res4
+}
+
+define i8@test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_sd_all:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpunordsd {sae}, %xmm1, %xmm0, %k0
+; CHECK-NEXT:    vcmplesd %xmm1, %xmm0, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k0
+; CHECK-NEXT:    vcmpnltsd {sae}, %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vcmpneqsd %xmm1, %xmm0, %k2
+; CHECK-NEXT:    korw %k1, %k2, %k1
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k2
+; CHECK-NEXT:    kandw %k2, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    shlb $7, %al
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    retq
+
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+
+  %res11 = or i8 %res1, %res2
+  %res12 = or i8 %res3, %res4
+  %res13 = or i8 %res11, %res12
+  ret i8 %res13
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
+
+define i8@test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcmpunordss %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    shlb $7, %al
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    retq
+
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
+  ret i8 %res2
+}
+
+
+define i8@test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss_all:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcmpless %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vcmpunordss {sae}, %xmm1, %xmm0, %k0 {%k1}
+; CHECK-NEXT:    vcmpneqss %xmm1, %xmm0, %k1
+; CHECK-NEXT:    vcmpnltss {sae}, %xmm1, %xmm0, %k1 {%k1}
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k2
+; CHECK-NEXT:    kandw %k2, %k1, %k1
+; CHECK-NEXT:    kandw %k1, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    shlb $7, %al
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    retq
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
+
+  %res11 = and i8 %res1, %res2
+  %res12 = and i8 %res3, %res4
+  %res13 = and i8 %res11, %res12
+  ret i8 %res13
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT:    vshuff32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
+
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res3, %res2
+  ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm2 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT:    vshufi32x4 {{.*#+}} zmm0 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],zmm1[2,3,0,1]
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vgetmantpd $11, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vgetmantpd $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vgetmantps $11, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vgetmantps $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_sd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm2, %zmm3
+; CHECK-NEXT:    vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
+; CHECK-NEXT:    vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
+; CHECK-NEXT:    vgetmantsd $11, %xmm1, %xmm0, %xmm5
+; CHECK-NEXT:    vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vaddpd %xmm4, %xmm3, %xmm0
+; CHECK-NEXT:    vaddpd %xmm5, %xmm2, %xmm1
+; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res  = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> zeroinitializer, i8 %x3, i32 4)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 8)
+  %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 -1, i32 4)
+  %res11 = fadd <2 x double> %res, %res1
+  %res12 = fadd <2 x double> %res2, %res3
+  %res13 = fadd <2 x double> %res11, %res12
+  ret <2 x double> %res13
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ss:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vgetmantss $11, %xmm1, %xmm0, %xmm4
+; CHECK-NEXT:    vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddps %xmm4, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res  = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> zeroinitializer, i8 %x3, i32 4)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 8)
+  %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 -1, i32 4)
+  %res11 = fadd <4 x float> %res, %res1
+  %res12 = fadd <4 x float> %res2, %res3
+  %res13 = fadd <4 x float> %res11, %res12
+  ret <4 x float> %res13
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd {{.*#+}} zmm2 = zmm2[0],k1[1],zmm2[3],k1[2],zmm2[5],k1[4],zmm2[6],k1[6]
+; CHECK-NEXT:    vshufpd {{.*#+}} zmm3 = k1[0],zmm0[1],k1[3],zmm0[2],k1[5],zmm0[4],k1[6],zmm0[6]
+; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vaddpd %zmm3, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
+
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res3, %res2
+  ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vshufps {{.*#+}} zmm2 = zmm2[2,1],k1[1,0],zmm2[6,5],k1[5,4],zmm2[10,9],k1[9,8],zmm2[14,13],k1[13,12]
+; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd {{.*#+}} zmm1 = zmm1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT:    vpermilpd {{.*#+}} zmm2 = k1[0,1,3,2,5,4,6,6]
+; CHECK-NEXT:    vpermilpd {{.*#+}} zmm0 = zmm0[0,1,3,2,5,4,6,6]
+; CHECK-NEXT:    vaddpd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res3, %res2
+  ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpermilps {{.*#+}} zmm1 = zmm1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT:    vpermilps {{.*#+}} zmm2 = k1[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
+; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res3, %res2
+  ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpermilpd %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vaddpd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res2, %res3
+  ret <8 x double> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpermilps %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpermilps %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpermilps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res2, %res3
+  ret <8 x double> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float>, <4 x float>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<4 x float> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ss2sd_round:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vcvtss2sd {sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<4 x float> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double>, <2 x double>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<2 x double> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_sd2ss_round:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vcvtsd2ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vcvtsd2ss {rn-sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<2 x double> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
+
+define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
+
+define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovsldup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT:    vmovsldup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT:    vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovshdup {{.*#+}} zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT:    vmovshdup {{.*#+}} zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT:    vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovddup {{.*#+}} zmm1 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vmovddup {{.*#+}} zmm2 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res2, %res3
+  ret <8 x double> %res4
+}
+
+define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_eq_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_eq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcomisd %xmm1, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_eq:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vucomisd %xmm1, %xmm0
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_lt_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt_sae:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vucomisd {sae}, %xmm1, %xmm0
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_comi_sd_lt:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vcomisd %xmm1, %xmm0
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
+  ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_sd_lt:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vucomisd %xmm1, %xmm0
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
+
+define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
+; CHECK-LABEL: test_x86_avx512_ucomi_ss_lt:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vucomiss %xmm1, %xmm0
+; CHECK-NEXT:    sbbl %eax, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+  %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
+  ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
+declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  ret <4 x float> %res
+}
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
+  ret <4 x float> %res
+}
+
+define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vmovsd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
+  ret <2 x double> %res
+}
+
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
+  ret <2 x double> %res
+}
+
+define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vmovaps %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  ret <2 x double> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff32x4 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK: vaddps %zmm0, %zmm2, %zmm0
+
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
+  %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
+  %res4 = fadd <16 x float> %res1, %res2
+  %res5 = fadd <16 x float> %res3, %res4
+  ret <16 x float> %res5
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff64x2 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK: vaddpd %zmm0, %zmm2, %zmm0
+
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
+  %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
+  %res4 = fadd <8 x double> %res1, %res2
+  %res5 = fadd <8 x double> %res3, %res4
+  ret <8 x double> %res5
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi32x4 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
+  %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+  %res4 = add <16 x i32> %res1, %res2
+  %res5 = add <16 x i32> %res3, %res4
+  ret <16 x i32> %res5
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi64x2 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
+  %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
+  %res4 = add <8 x i64> %res1, %res2
+  %res5 = add <8 x i64> %res3, %res4
+  ret <8 x i64> %res5
+}
+
diff --git a/test/CodeGen/X86/avx512-logic.ll b/test/CodeGen/X86/avx512-logic.ll
index 140ce3b1ec56..c973b706e8fc 100644
--- a/test/CodeGen/X86/avx512-logic.ll
+++ b/test/CodeGen/X86/avx512-logic.ll
@@ -1,9 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
 
-; CHECK-LABEL: vpandd
-; CHECK: vpandd %zmm
-; CHECK: ret
 define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpandd:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -12,10 +17,12 @@ entry:
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpord
-; CHECK: vpord %zmm
-; CHECK: ret
 define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpord:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -24,10 +31,12 @@ entry:
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpxord
-; CHECK: vpxord %zmm
-; CHECK: ret
 define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpxord:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; ALL-NEXT:    vpxord %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
@@ -36,10 +45,12 @@ entry:
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: vpandq
-; CHECK: vpandq %zmm
-; CHECK: ret
 define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpandq:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -47,10 +58,12 @@ entry:
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vporq
-; CHECK: vporq %zmm
-; CHECK: ret
 define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vporq:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -58,10 +71,12 @@ entry:
   ret <8 x i64> %x
 }
 
-; CHECK-LABEL: vpxorq
-; CHECK: vpxorq %zmm
-; CHECK: ret
 define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
+; ALL-LABEL: vpxorq:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   ; Force the execution domain with an add.
   %a2 = add <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
@@ -70,28 +85,31 @@ entry:
 }
 
 
-; CHECK-LABEL: orq_broadcast
-; CHECK: vporq LCP{{.*}}(%rip){1to8}, %zmm0, %zmm0
-; CHECK: ret
 define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
+; ALL-LABEL: orq_broadcast:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; ALL-NEXT:    retq
   %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
   ret <8 x i64> %b
 }
 
-; CHECK-LABEL: andd512fold
-; CHECK: vpandd (%
-; CHECK: ret
 define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
+; ALL-LABEL: andd512fold:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpandd (%rdi), %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   %a = load <16 x i32>, <16 x i32>* %x, align 4
   %b = and <16 x i32> %y, %a
   ret <16 x i32> %b
 }
 
-; CHECK-LABEL: andqbrst
-; CHECK: vpandq  (%rdi){1to8}, %zmm
-; CHECK: ret
 define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
+; ALL-LABEL: andqbrst:
+; ALL:       ## BB#0: ## %entry
+; ALL-NEXT:    vpandq (%rdi){1to8}, %zmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   %a = load i64, i64* %ap, align 8
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
@@ -99,3 +117,93 @@ entry:
   %d = and <8 x i64> %p1, %c
   ret <8 x i64>%d
 }
+
+define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: and_v64i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vandps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: and_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = and <64 x i8> %a, %b
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: or_v64i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: or_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = or <64 x i8> %a, %b
+  ret <64 x i8> %res
+}
+
+define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
+; KNL-LABEL: xor_v64i8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vxorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: xor_v64i8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = xor <64 x i8> %a, %b
+  ret <64 x i8> %res
+}
+
+define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: and_v32i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vandps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: and_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = and <32 x i16> %a, %b
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: or_v32i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: or_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = or <32 x i16> %a, %b
+  ret <32 x i16> %res
+}
+
+define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
+; KNL-LABEL: xor_v32i16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vxorps %ymm2, %ymm0, %ymm0
+; KNL-NEXT:    vxorps %ymm3, %ymm1, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: xor_v32i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %res = xor <32 x i16> %a, %b
+  ret <32 x i16> %res
+}
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index d2efd7d6db6e..015c70a6ba08 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,39 +1,48 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL --check-prefix=CHECK
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX --check-prefix=CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
 
-; CHECK-LABEL: mask16
-; CHECK: kmovw
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw
 define i16 @mask16(i16 %x) {
+; CHECK-LABEL: mask16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    knotw %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <16 x i1> %m1 to i16
   ret i16 %ret
 }
 
-; CHECK-LABEL: mask8
-; KNL: kmovw
-; KNL-NEXT: knotw
-; KNL-NEXT: kmovw
-; SKX: kmovb
-; SKX-NEXT: knotb
-; SKX-NEXT: kmovb
-
 define i8 @mask8(i8 %x) {
+; KNL-LABEL: mask8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    movzbl %dil, %eax
+; KNL-NEXT:    kmovw %eax, %k0
+; KNL-NEXT:    knotw %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: mask8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovb %edi, %k0
+; SKX-NEXT:    knotb %k0, %k0
+; SKX-NEXT:    kmovb %k0, %eax
+; SKX-NEXT:    retq
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
   %ret = bitcast <8 x i1> %m1 to i8
   ret i8 %ret
 }
 
-; CHECK-LABEL: mask16_mem
-; CHECK: kmovw ([[ARG1:%rdi|%rcx]]), %k{{[0-7]}}
-; CHECK-NEXT: knotw
-; CHECK-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; CHECK: ret
-
 define void @mask16_mem(i16* %ptr) {
+; CHECK-LABEL: mask16_mem:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw (%rdi), %k0
+; CHECK-NEXT:    knotw %k0, %k0
+; CHECK-NEXT:    kmovw %k0, (%rdi)
+; CHECK-NEXT:    retq
   %x = load i16, i16* %ptr, align 4
   %m0 = bitcast i16 %x to <16 x i1>
   %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -42,15 +51,20 @@ define void @mask16_mem(i16* %ptr) {
   ret void
 }
 
-; CHECK-LABEL: mask8_mem
-; KNL: kmovw ([[ARG1]]), %k{{[0-7]}}
-; KNL-NEXT: knotw
-; KNL-NEXT: kmovw %k{{[0-7]}}, ([[ARG1]])
-; SKX: kmovb ([[ARG1]]), %k{{[0-7]}}
-; SKX-NEXT: knotb
-; SKX-NEXT: kmovb %k{{[0-7]}}, ([[ARG1]])
-
 define void @mask8_mem(i8* %ptr) {
+; KNL-LABEL: mask8_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    kmovw (%rdi), %k0
+; KNL-NEXT:    knotw %k0, %k0
+; KNL-NEXT:    kmovw %k0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: mask8_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovb (%rdi), %k0
+; SKX-NEXT:    knotb %k0, %k0
+; SKX-NEXT:    kmovb %k0, (%rdi)
+; SKX-NEXT:    retq
   %x = load i8, i8* %ptr, align 4
   %m0 = bitcast i8 %x to <8 x i1>
   %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -59,11 +73,16 @@ define void @mask8_mem(i8* %ptr) {
   ret void
 }
 
-; CHECK-LABEL: mand16
-; CHECK: kandw
-; CHECK: kxorw
-; CHECK: korw
 define i16 @mand16(i16 %x, i16 %y) {
+; CHECK-LABEL: mand16:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    kandw %k1, %k0, %k2
+; CHECK-NEXT:    kxorw %k1, %k0, %k0
+; CHECK-NEXT:    korw %k0, %k2, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
   %ma = bitcast i16 %x to <16 x i1>
   %mb = bitcast i16 %y to <16 x i1>
   %mc = and <16 x i1> %ma, %mb
@@ -73,56 +92,68 @@ define i16 @mand16(i16 %x, i16 %y) {
   ret i16 %ret
 }
 
-; CHECK-LABEL: shuf_test1
-; CHECK: kshiftrw        $8
 define i8 @shuf_test1(i16 %v) nounwind {
+; KNL-LABEL: shuf_test1:
+; KNL:       ## BB#0:
+; KNL-NEXT:    kmovw %edi, %k0
+; KNL-NEXT:    kshiftrw $8, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: shuf_test1:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovw %edi, %k0
+; SKX-NEXT:    kshiftrw $8, %k0, %k0
+; SKX-NEXT:    kmovb %k0, %eax
+; SKX-NEXT:    retq
    %v1 = bitcast i16 %v to <16 x i1>
    %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    %mask1 = bitcast <8 x i1> %mask to i8
    ret i8 %mask1
 }
 
-; CHECK-LABEL: zext_test1
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
 define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
+; CHECK-LABEL: zext_test1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
+; CHECK-NEXT:    kshiftlw $10, %k0, %k0
+; CHECK-NEXT:    kshiftrw $15, %k0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   %res = zext i1 %cmp_res.i1 to i32
   ret i32 %res
-}
-
-; CHECK-LABEL: zext_test2
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
-define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
+}define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   %res = zext i1 %cmp_res.i1 to i16
   ret i16 %res
-}
-
-; CHECK-LABEL: zext_test3
-; CHECK: kshiftlw
-; CHECK: kshiftrw
-; CHECK: kmovw
-
-define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
+}define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
   %cmp_res = icmp ugt <16 x i32> %a, %b
   %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
   %res = zext i1 %cmp_res.i1 to i8
   ret i8 %res
 }
 
-; CHECK-LABEL: conv1
-; KNL: kmovw   %k0, %eax
-; KNL: movb    %al, (%rdi)
-; SKX: kmovb   %k0, (%rdi)
 define i8 @conv1(<8 x i1>* %R) {
+; KNL-LABEL: conv1:
+; KNL:       ## BB#0: ## %entry
+; KNL-NEXT:    kxnorw %k0, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
+; KNL-NEXT:    movb $-2, %al
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: conv1:
+; SKX:       ## BB#0: ## %entry
+; SKX-NEXT:    kxnorw %k0, %k0, %k0
+; SKX-NEXT:    kmovb %k0, (%rdi)
+; SKX-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    movb $-2, %al
+; SKX-NEXT:    retq
 entry:
   store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
 
@@ -133,12 +164,27 @@ entry:
   ret i8 %mask_convert
 }
 
-; SKX-LABEL: test4
-; SKX: vpcmpgt
-; SKX: knot
-; SKX: vpcmpgt
-; SKX: vpmovm2d
 define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
+; KNL-LABEL: test4:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
+; KNL-NEXT:    vpsrad $31, %xmm0, %xmm0
+; KNL-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm1
+; KNL-NEXT:    vpmovqd %zmm1, %ymm1
+; KNL-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test4:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k0
+; SKX-NEXT:    knotw %k0, %k1
+; SKX-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1}
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    retq
   %x_gt_y = icmp sgt <4 x i64> %x, %y
   %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
   %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
@@ -146,30 +192,27 @@ define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1
   ret <4 x i32> %resse
 }
 
-; SKX-LABEL: test5
-; SKX: vpcmpgt
-; SKX: knot
-; SKX: vpcmpgt
-; SKX: vpmovm2q
 define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
+; KNL-LABEL: test5:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm1
+; KNL-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test5:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k0
+; SKX-NEXT:    knotw %k0, %k1
+; SKX-NEXT:    vpcmpgtq %xmm3, %xmm2, %k0 {%k1}
+; SKX-NEXT:    vpmovm2q %k0, %xmm0
+; SKX-NEXT:    retq
   %x_gt_y = icmp slt <2 x i64> %x, %y
   %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
   %resse = sext <2 x i1>%res to <2 x i64>
   ret <2 x i64> %resse
-}
-
-; KNL-LABEL: test6
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: kmovw   %eax, %k1
-; KNL vptestmd {{.*}}, %k0 {%k1}
-
-; SKX-LABEL: test6
-; SKX: vpmovb2m
-; SKX: kmovw   %eax, %k1
-; SKX: kandw
-define void @test6(<16 x i1> %mask)  {
+}define void @test6(<16 x i1> %mask)  {
 allocas:
   %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   %b = bitcast <16 x i1> %a to i16
@@ -182,19 +225,30 @@ true:
 false:
   ret void
 }
-
-; KNL-LABEL: test7
-; KNL: vpmovsxwq
-; KNL: vpandq
-; KNL: vptestmq {{.*}}, %k0
-; KNL: korw
-
-; SKX-LABEL: test7
-; SKX: vpmovw2m
-; SKX: kmovb   %eax, %k1
-; SKX: korb
-
 define void @test7(<8 x i1> %mask)  {
+; KNL-LABEL: test7:
+; KNL:       ## BB#0: ## %allocas
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; KNL-NEXT:    movb $85, %al
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test7:
+; SKX:       ## BB#0: ## %allocas
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:    vpmovw2m %xmm0, %k0
+; SKX-NEXT:    movb $85, %al
+; SKX-NEXT:    kmovb %eax, %k1
+; SKX-NEXT:    korb %k1, %k0, %k0
+; SKX-NEXT:    kmovb %k0, %eax
+; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    retq
 allocas:
   %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   %b = bitcast <8 x i1> %a to i8
@@ -207,22 +261,35 @@ true:
 false:
   ret void
 }
-
-; KNL-LABEL: test8
-; KNL: vpxord  %zmm2, %zmm2, %zmm2
-; KNL: jg
-; KNL: vpcmpltud       %zmm2, %zmm1, %k1
-; KNL: jmp
-; KNL: vpcmpgtd        %zmm2, %zmm0, %k1
-
-; SKX-LABEL: test8
-; SKX: jg
-; SKX: vpcmpltud {{.*}}, %k0
-; SKX: vpmovm2b
-; SKX: vpcmpgtd {{.*}}, %k0
-; SKX: vpmovm2b
-
 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test8:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; KNL-NEXT:    cmpl %esi, %edi
+; KNL-NEXT:    jg LBB14_1
+; KNL-NEXT:  ## BB#2:
+; KNL-NEXT:    vpcmpltud %zmm2, %zmm1, %k1
+; KNL-NEXT:    jmp LBB14_3
+; KNL-NEXT:  LBB14_1:
+; KNL-NEXT:    vpcmpgtd %zmm2, %zmm0, %k1
+; KNL-NEXT:  LBB14_3:
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test8:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; SKX-NEXT:    cmpl %esi, %edi
+; SKX-NEXT:    jg LBB14_1
+; SKX-NEXT:  ## BB#2:
+; SKX-NEXT:    vpcmpltud %zmm2, %zmm1, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB14_1:
+; SKX-NEXT:    vpcmpgtd %zmm2, %zmm0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    retq
   %cond = icmp sgt i32 %a1, %b1
   %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
   %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
@@ -230,91 +297,121 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
   %res = sext <16 x i1> %mix to <16 x i8>
   ret <16 x i8> %res
 }
-
-; KNL-LABEL: test9
-; KNL: jg
-; KNL: vpmovsxbd       %xmm1, %zmm0
-; KNL: jmp
-; KNL: vpmovsxbd       %xmm0, %zmm0
-
-; SKX-LABEL: test9
-; SKX: vpmovb2m        %xmm1, %k0
-; SKX: vpmovm2b        %k0, %xmm0
-; SKX: retq
-; SKX: vpmovb2m        %xmm0, %k0
-; SKX: vpmovm2b        %k0, %xmm0
-
 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test9:
+; KNL:       ## BB#0:
+; KNL-NEXT:    cmpl %esi, %edi
+; KNL-NEXT:    jg LBB15_1
+; KNL-NEXT:  ## BB#2:
+; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
+; KNL-NEXT:    jmp LBB15_3
+; KNL-NEXT:  LBB15_1:
+; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
+; KNL-NEXT:  LBB15_3:
+; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
+; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test9:
+; SKX:       ## BB#0:
+; SKX-NEXT:    cmpl %esi, %edi
+; SKX-NEXT:    jg LBB15_1
+; SKX-NEXT:  ## BB#2:
+; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
+; SKX-NEXT:    jmp LBB15_3
+; SKX-NEXT:  LBB15_1:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:  LBB15_3:
+; SKX-NEXT:    vpmovb2m %xmm0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    retq
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
   ret <16 x i1>%c
-}
-
-; KNL-LABEL: test10
-; KNL: jg
-; KNL: vpmovsxwq       %xmm1, %zmm0
-; KNL: jmp
-; KNL: vpmovsxwq       %xmm0, %zmm0
-
-; SKX-LABEL: test10
-; SKX: jg
-; SKX: vpmovw2m        %xmm1, %k0
-; SKX: vpmovm2w        %k0, %xmm0
-; SKX: retq
-; SKX: vpmovw2m        %xmm0, %k0
-; SKX: vpmovm2w        %k0, %xmm0
-define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
+}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
   ret <8 x i1>%c
 }
 
-; SKX-LABEL: test11
-; SKX: jg
-; SKX: vpmovd2m        %xmm1, %k0
-; SKX: vpmovm2d        %k0, %xmm0
-; SKX: retq
-; SKX: vpmovd2m        %xmm0, %k0
-; SKX: vpmovm2d        %k0, %xmm0
 define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test11:
+; KNL:       ## BB#0:
+; KNL-NEXT:    cmpl %esi, %edi
+; KNL-NEXT:    jg LBB17_2
+; KNL-NEXT:  ## BB#1:
+; KNL-NEXT:    vmovaps %zmm1, %zmm0
+; KNL-NEXT:  LBB17_2:
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test11:
+; SKX:       ## BB#0:
+; SKX-NEXT:    cmpl %esi, %edi
+; SKX-NEXT:    jg LBB17_1
+; SKX-NEXT:  ## BB#2:
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
+; SKX-NEXT:    jmp LBB17_3
+; SKX-NEXT:  LBB17_1:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:  LBB17_3:
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
+; SKX-NEXT:    vpmovm2d %k0, %xmm0
+; SKX-NEXT:    retq
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
   ret <4 x i1>%c
 }
 
-; KNL-LABEL: test12
-; KNL: movl    %edi, %eax
 define i32 @test12(i32 %x, i32 %y)  {
+; CHECK-LABEL: test12:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    retq
   %a = bitcast i16 21845 to <16 x i1>
   %b = extractelement <16 x i1> %a, i32 0
   %c = select i1 %b, i32 %x, i32 %y
   ret i32 %c
 }
 
-; KNL-LABEL: test13
-; KNL: movl    %esi, %eax
 define i32 @test13(i32 %x, i32 %y)  {
+; CHECK-LABEL: test13:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    retq
   %a = bitcast i16 21845 to <16 x i1>
   %b = extractelement <16 x i1> %a, i32 3
   %c = select i1 %b, i32 %x, i32 %y
   ret i32 %c
-}
-
-; SKX-LABEL: test14
-; SKX: movb     $11, %al
-; SKX: kmovb    %eax, %k0
-; SKX: vpmovm2d %k0, %xmm0
-
-define <4 x i1> @test14()  {
+}define <4 x i1> @test14()  {
   %a = bitcast i16 21845 to <16 x i1>
   %b = extractelement <16 x i1> %a, i32 2
   %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
   ret <4 x i1> %c
 }
 
-; KNL-LABEL: test15
-; KNL: cmovgw
 define <16 x i1> @test15(i32 %x, i32 %y)  {
+; KNL-LABEL: test15:
+; KNL:       ## BB#0:
+; KNL-NEXT:    cmpl %esi, %edi
+; KNL-NEXT:    movw $21845, %ax ## imm = 0x5555
+; KNL-NEXT:    movw $1, %cx
+; KNL-NEXT:    cmovgw %ax, %cx
+; KNL-NEXT:    kmovw %ecx, %k1
+; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test15:
+; SKX:       ## BB#0:
+; SKX-NEXT:    cmpl %esi, %edi
+; SKX-NEXT:    movw $21845, %ax ## imm = 0x5555
+; SKX-NEXT:    movw $1, %cx
+; SKX-NEXT:    cmovgw %ax, %cx
+; SKX-NEXT:    kmovw %ecx, %k0
+; SKX-NEXT:    vpmovm2b %k0, %xmm0
+; SKX-NEXT:    retq
   %a = bitcast i16 21845 to <16 x i1>
   %b = bitcast i16 1 to <16 x i1>
   %mask = icmp sgt i32 %x, %y
@@ -322,27 +419,914 @@ define <16 x i1> @test15(i32 %x, i32 %y)  {
   ret <16 x i1> %c
 }
 
-; SKX-LABEL: test16
-; SKX: kxnorw  %k1, %k1, %k1
-; SKX: kshiftrw        $15, %k1, %k1
-; SKX: kshiftlq        $5, %k1, %k1
-; SKX: korq    %k1, %k0, %k0
-; SKX: vpmovm2b        %k0, %zmm0
 define <64 x i8> @test16(i64 %x) {
+; KNL-LABEL: test16:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:  Ltmp0:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:  Ltmp1:
+; KNL-NEXT:    .cfi_offset %rbp, -16
+; KNL-NEXT:    movq %rsp, %rbp
+; KNL-NEXT:  Ltmp2:
+; KNL-NEXT:    .cfi_def_cfa_register %rbp
+; KNL-NEXT:    pushq %r15
+; KNL-NEXT:    pushq %r14
+; KNL-NEXT:    pushq %r13
+; KNL-NEXT:    pushq %r12
+; KNL-NEXT:    pushq %rbx
+; KNL-NEXT:    andq $-32, %rsp
+; KNL-NEXT:    subq $128, %rsp
+; KNL-NEXT:  Ltmp3:
+; KNL-NEXT:    .cfi_offset %rbx, -56
+; KNL-NEXT:  Ltmp4:
+; KNL-NEXT:    .cfi_offset %r12, -48
+; KNL-NEXT:  Ltmp5:
+; KNL-NEXT:    .cfi_offset %r13, -40
+; KNL-NEXT:  Ltmp6:
+; KNL-NEXT:    .cfi_offset %r14, -32
+; KNL-NEXT:  Ltmp7:
+; KNL-NEXT:    .cfi_offset %r15, -24
+; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    shrq $32, %rax
+; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
+; KNL-NEXT:    movl $271, %eax ## imm = 0x10F
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    movl %edi, %ecx
+; KNL-NEXT:    andl $1, %ecx
+; KNL-NEXT:    vmovd %ecx, %xmm0
+; KNL-NEXT:    movl $257, %ecx ## imm = 0x101
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $258, %ecx ## imm = 0x102
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $259, %ecx ## imm = 0x103
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $260, %ecx ## imm = 0x104
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $261, %ecx ## imm = 0x105
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $262, %ecx ## imm = 0x106
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $263, %ecx ## imm = 0x107
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $264, %ecx ## imm = 0x108
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $265, %ecx ## imm = 0x109
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $266, %ecx ## imm = 0x10A
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $267, %ecx ## imm = 0x10B
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $268, %ecx ## imm = 0x10C
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $269, %ecx ## imm = 0x10D
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    movl $270, %ecx ## imm = 0x10E
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
+; KNL-NEXT:    movl $1, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm0
+; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %r15d
+; KNL-NEXT:    movq %r15, %rdx
+; KNL-NEXT:    shrq $17, %rdx
+; KNL-NEXT:    andb $1, %dl
+; KNL-NEXT:    je LBB22_2
+; KNL-NEXT:  ## BB#1:
+; KNL-NEXT:    movb $-1, %dl
+; KNL-NEXT:  LBB22_2:
+; KNL-NEXT:    movq %r15, %r11
+; KNL-NEXT:    shrq $16, %r11
+; KNL-NEXT:    andb $1, %r11b
+; KNL-NEXT:    je LBB22_4
+; KNL-NEXT:  ## BB#3:
+; KNL-NEXT:    movb $-1, %r11b
+; KNL-NEXT:  LBB22_4:
+; KNL-NEXT:    movq %r15, %r10
+; KNL-NEXT:    shrq $18, %r10
+; KNL-NEXT:    andb $1, %r10b
+; KNL-NEXT:    je LBB22_6
+; KNL-NEXT:  ## BB#5:
+; KNL-NEXT:    movb $-1, %r10b
+; KNL-NEXT:  LBB22_6:
+; KNL-NEXT:    movq %r15, %r9
+; KNL-NEXT:    shrq $19, %r9
+; KNL-NEXT:    andb $1, %r9b
+; KNL-NEXT:    je LBB22_8
+; KNL-NEXT:  ## BB#7:
+; KNL-NEXT:    movb $-1, %r9b
+; KNL-NEXT:  LBB22_8:
+; KNL-NEXT:    movq %r15, %rbx
+; KNL-NEXT:    shrq $20, %rbx
+; KNL-NEXT:    andb $1, %bl
+; KNL-NEXT:    je LBB22_10
+; KNL-NEXT:  ## BB#9:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB22_10:
+; KNL-NEXT:    movq %r15, %r12
+; KNL-NEXT:    shrq $21, %r12
+; KNL-NEXT:    andb $1, %r12b
+; KNL-NEXT:    je LBB22_12
+; KNL-NEXT:  ## BB#11:
+; KNL-NEXT:    movb $-1, %r12b
+; KNL-NEXT:  LBB22_12:
+; KNL-NEXT:    movq %r15, %r14
+; KNL-NEXT:    shrq $22, %r14
+; KNL-NEXT:    andb $1, %r14b
+; KNL-NEXT:    je LBB22_14
+; KNL-NEXT:  ## BB#13:
+; KNL-NEXT:    movb $-1, %r14b
+; KNL-NEXT:  LBB22_14:
+; KNL-NEXT:    movq %r15, %r8
+; KNL-NEXT:    shrq $23, %r8
+; KNL-NEXT:    andb $1, %r8b
+; KNL-NEXT:    je LBB22_16
+; KNL-NEXT:  ## BB#15:
+; KNL-NEXT:    movb $-1, %r8b
+; KNL-NEXT:  LBB22_16:
+; KNL-NEXT:    movq %r15, %r13
+; KNL-NEXT:    shrq $24, %r13
+; KNL-NEXT:    andb $1, %r13b
+; KNL-NEXT:    je LBB22_18
+; KNL-NEXT:  ## BB#17:
+; KNL-NEXT:    movb $-1, %r13b
+; KNL-NEXT:  LBB22_18:
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $25, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_20
+; KNL-NEXT:  ## BB#19:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_20:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $26, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_22
+; KNL-NEXT:  ## BB#21:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_22:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movl $272, %esi ## imm = 0x110
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $27, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_24
+; KNL-NEXT:  ## BB#23:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_24:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movl $273, %eax ## imm = 0x111
+; KNL-NEXT:    bextrl %esi, %edi, %esi
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $28, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB22_26
+; KNL-NEXT:  ## BB#25:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB22_26:
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vmovd %esi, %xmm2
+; KNL-NEXT:    movl $274, %esi ## imm = 0x112
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $29, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB22_28
+; KNL-NEXT:  ## BB#27:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB22_28:
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %esi, %edi, %eax
+; KNL-NEXT:    movzbl %r11b, %esi
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $30, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB22_30
+; KNL-NEXT:  ## BB#29:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB22_30:
+; KNL-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT:    movl $275, %eax ## imm = 0x113
+; KNL-NEXT:    bextrl %eax, %edi, %r11d
+; KNL-NEXT:    movzbl %dl, %edx
+; KNL-NEXT:    vmovd %esi, %xmm3
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $31, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_32
+; KNL-NEXT:  ## BB#31:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_32:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    vpinsrb $3, %r11d, %xmm2, %xmm2
+; KNL-NEXT:    movl $276, %eax ## imm = 0x114
+; KNL-NEXT:    bextrl %eax, %edi, %esi
+; KNL-NEXT:    movl $277, %r11d ## imm = 0x115
+; KNL-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r10b, %r10d
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_34
+; KNL-NEXT:  ## BB#33:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_34:
+; KNL-NEXT:    vpinsrb $4, %esi, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r11d, %edi, %edx
+; KNL-NEXT:    movl $278, %r11d ## imm = 0x116
+; KNL-NEXT:    vpinsrb $2, %r10d, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r9b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shlq $63, %rcx
+; KNL-NEXT:    sarq $63, %rcx
+; KNL-NEXT:    vmovd %ecx, %xmm4
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $2, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_36
+; KNL-NEXT:  ## BB#35:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_36:
+; KNL-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r11d, %edi, %edx
+; KNL-NEXT:    movl $279, %r9d ## imm = 0x117
+; KNL-NEXT:    vpinsrb $3, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %bl, %ebx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $2, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $3, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_38
+; KNL-NEXT:  ## BB#37:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_38:
+; KNL-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r9d, %edi, %edx
+; KNL-NEXT:    movl $280, %esi ## imm = 0x118
+; KNL-NEXT:    vpinsrb $4, %ebx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r12b, %ebx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $3, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $4, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_40
+; KNL-NEXT:  ## BB#39:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_40:
+; KNL-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %esi, %edi, %ecx
+; KNL-NEXT:    movl $281, %edx ## imm = 0x119
+; KNL-NEXT:    vpinsrb $5, %ebx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r14b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $4, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $5, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_42
+; KNL-NEXT:  ## BB#41:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_42:
+; KNL-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $282, %edx ## imm = 0x11A
+; KNL-NEXT:    vpinsrb $6, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r8b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %bl
+; KNL-NEXT:    shrb $6, %bl
+; KNL-NEXT:    andb $1, %bl
+; KNL-NEXT:    je LBB22_44
+; KNL-NEXT:  ## BB#43:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB22_44:
+; KNL-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %eax
+; KNL-NEXT:    movl $283, %ecx ## imm = 0x11B
+; KNL-NEXT:    vpinsrb $7, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r13b, %esi
+; KNL-NEXT:    movzbl %bl, %edx
+; KNL-NEXT:    vpinsrb $6, %edx, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %bl
+; KNL-NEXT:    shrb $7, %bl
+; KNL-NEXT:    je LBB22_46
+; KNL-NEXT:  ## BB#45:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB22_46:
+; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    movl $284, %edx ## imm = 0x11C
+; KNL-NEXT:    vpinsrb $8, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
+; KNL-NEXT:    movzbl %al, %esi
+; KNL-NEXT:    movzbl %bl, %eax
+; KNL-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $8, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_48
+; KNL-NEXT:  ## BB#47:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_48:
+; KNL-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $285, %edx ## imm = 0x11D
+; KNL-NEXT:    vpinsrb $9, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT:    movzbl %sil, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $8, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $9, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_50
+; KNL-NEXT:  ## BB#49:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_50:
+; KNL-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $286, %edx ## imm = 0x11E
+; KNL-NEXT:    vpinsrb $10, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT:    movzbl %sil, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $9, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $10, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_52
+; KNL-NEXT:  ## BB#51:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_52:
+; KNL-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %edx
+; KNL-NEXT:    vpinsrb $11, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $10, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $11, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_54
+; KNL-NEXT:  ## BB#53:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_54:
+; KNL-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
+; KNL-NEXT:    shrl $31, %edi
+; KNL-NEXT:    vpinsrb $12, %ecx, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $11, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $12, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_56
+; KNL-NEXT:  ## BB#55:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_56:
+; KNL-NEXT:    vpinsrb $15, %edi, %xmm2, %xmm2
+; KNL-NEXT:    vpinsrb $13, %ecx, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $12, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $13, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_58
+; KNL-NEXT:  ## BB#57:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_58:
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT:    vpinsrb $14, %ecx, %xmm3, %xmm2
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $13, %eax, %xmm4, %xmm3
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $14, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB22_60
+; KNL-NEXT:  ## BB#59:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB22_60:
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm1
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm2
+; KNL-NEXT:    shrq $15, %r15
+; KNL-NEXT:    andb $1, %r15b
+; KNL-NEXT:    je LBB22_62
+; KNL-NEXT:  ## BB#61:
+; KNL-NEXT:    movb $-1, %r15b
+; KNL-NEXT:  LBB22_62:
+; KNL-NEXT:    movzbl %r15b, %eax
+; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT:    leaq -40(%rbp), %rsp
+; KNL-NEXT:    popq %rbx
+; KNL-NEXT:    popq %r12
+; KNL-NEXT:    popq %r13
+; KNL-NEXT:    popq %r14
+; KNL-NEXT:    popq %r15
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovq %rdi, %k0
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    kshiftrw $15, %k1, %k1
+; SKX-NEXT:    kshiftlq $5, %k1, %k1
+; SKX-NEXT:    korq %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    retq
   %a = bitcast i64 %x to <64 x i1>
   %b = insertelement <64 x i1>%a, i1 true, i32 5
   %c = sext <64 x i1>%b to <64 x i8>
   ret <64 x i8>%c
 }
 
-; SKX-LABEL: test17
-; SKX: setg    %al
-; SKX: andl    $1, %eax
-; SKX: kmovw   %eax, %k1
-; SKX: kshiftlq        $5, %k1, %k1
-; SKX: korq    %k1, %k0, %k0
-; SKX: vpmovm2b        %k0, %zmm0
 define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
+; KNL-LABEL: test17:
+; KNL:       ## BB#0:
+; KNL-NEXT:    pushq %rbp
+; KNL-NEXT:  Ltmp8:
+; KNL-NEXT:    .cfi_def_cfa_offset 16
+; KNL-NEXT:  Ltmp9:
+; KNL-NEXT:    .cfi_offset %rbp, -16
+; KNL-NEXT:    movq %rsp, %rbp
+; KNL-NEXT:  Ltmp10:
+; KNL-NEXT:    .cfi_def_cfa_register %rbp
+; KNL-NEXT:    pushq %r15
+; KNL-NEXT:    pushq %r14
+; KNL-NEXT:    pushq %r13
+; KNL-NEXT:    pushq %r12
+; KNL-NEXT:    pushq %rbx
+; KNL-NEXT:    andq $-32, %rsp
+; KNL-NEXT:    subq $128, %rsp
+; KNL-NEXT:  Ltmp11:
+; KNL-NEXT:    .cfi_offset %rbx, -56
+; KNL-NEXT:  Ltmp12:
+; KNL-NEXT:    .cfi_offset %r12, -48
+; KNL-NEXT:  Ltmp13:
+; KNL-NEXT:    .cfi_offset %r13, -40
+; KNL-NEXT:  Ltmp14:
+; KNL-NEXT:    .cfi_offset %r14, -32
+; KNL-NEXT:  Ltmp15:
+; KNL-NEXT:    .cfi_offset %r15, -24
+; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    shrq $32, %rax
+; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
+; KNL-NEXT:    movl %edi, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    vmovd %eax, %xmm0
+; KNL-NEXT:    movl $257, %eax ## imm = 0x101
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $258, %eax ## imm = 0x102
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $259, %eax ## imm = 0x103
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $3, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $260, %eax ## imm = 0x104
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $261, %eax ## imm = 0x105
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $262, %eax ## imm = 0x106
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $263, %eax ## imm = 0x107
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $7, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $264, %eax ## imm = 0x108
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $265, %eax ## imm = 0x109
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $9, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $266, %eax ## imm = 0x10A
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $267, %eax ## imm = 0x10B
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $11, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $268, %eax ## imm = 0x10C
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $269, %eax ## imm = 0x10D
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $13, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $270, %eax ## imm = 0x10E
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0
+; KNL-NEXT:    movl $271, %eax ## imm = 0x10F
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm1
+; KNL-NEXT:    cmpl %edx, %esi
+; KNL-NEXT:    setg %al
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm0
+; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %r15d
+; KNL-NEXT:    movq %r15, %rdx
+; KNL-NEXT:    shrq $17, %rdx
+; KNL-NEXT:    andb $1, %dl
+; KNL-NEXT:    je LBB23_2
+; KNL-NEXT:  ## BB#1:
+; KNL-NEXT:    movb $-1, %dl
+; KNL-NEXT:  LBB23_2:
+; KNL-NEXT:    movq %r15, %r11
+; KNL-NEXT:    shrq $16, %r11
+; KNL-NEXT:    andb $1, %r11b
+; KNL-NEXT:    je LBB23_4
+; KNL-NEXT:  ## BB#3:
+; KNL-NEXT:    movb $-1, %r11b
+; KNL-NEXT:  LBB23_4:
+; KNL-NEXT:    movq %r15, %r10
+; KNL-NEXT:    shrq $18, %r10
+; KNL-NEXT:    andb $1, %r10b
+; KNL-NEXT:    je LBB23_6
+; KNL-NEXT:  ## BB#5:
+; KNL-NEXT:    movb $-1, %r10b
+; KNL-NEXT:  LBB23_6:
+; KNL-NEXT:    movq %r15, %r9
+; KNL-NEXT:    shrq $19, %r9
+; KNL-NEXT:    andb $1, %r9b
+; KNL-NEXT:    je LBB23_8
+; KNL-NEXT:  ## BB#7:
+; KNL-NEXT:    movb $-1, %r9b
+; KNL-NEXT:  LBB23_8:
+; KNL-NEXT:    movq %r15, %rbx
+; KNL-NEXT:    shrq $20, %rbx
+; KNL-NEXT:    andb $1, %bl
+; KNL-NEXT:    je LBB23_10
+; KNL-NEXT:  ## BB#9:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB23_10:
+; KNL-NEXT:    movq %r15, %r12
+; KNL-NEXT:    shrq $21, %r12
+; KNL-NEXT:    andb $1, %r12b
+; KNL-NEXT:    je LBB23_12
+; KNL-NEXT:  ## BB#11:
+; KNL-NEXT:    movb $-1, %r12b
+; KNL-NEXT:  LBB23_12:
+; KNL-NEXT:    movq %r15, %r14
+; KNL-NEXT:    shrq $22, %r14
+; KNL-NEXT:    andb $1, %r14b
+; KNL-NEXT:    je LBB23_14
+; KNL-NEXT:  ## BB#13:
+; KNL-NEXT:    movb $-1, %r14b
+; KNL-NEXT:  LBB23_14:
+; KNL-NEXT:    movq %r15, %r8
+; KNL-NEXT:    shrq $23, %r8
+; KNL-NEXT:    andb $1, %r8b
+; KNL-NEXT:    je LBB23_16
+; KNL-NEXT:  ## BB#15:
+; KNL-NEXT:    movb $-1, %r8b
+; KNL-NEXT:  LBB23_16:
+; KNL-NEXT:    movq %r15, %r13
+; KNL-NEXT:    shrq $24, %r13
+; KNL-NEXT:    andb $1, %r13b
+; KNL-NEXT:    je LBB23_18
+; KNL-NEXT:  ## BB#17:
+; KNL-NEXT:    movb $-1, %r13b
+; KNL-NEXT:  LBB23_18:
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $25, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_20
+; KNL-NEXT:  ## BB#19:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_20:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $26, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_22
+; KNL-NEXT:  ## BB#21:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_22:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movl $272, %esi ## imm = 0x110
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $27, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_24
+; KNL-NEXT:  ## BB#23:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_24:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movl $273, %eax ## imm = 0x111
+; KNL-NEXT:    bextrl %esi, %edi, %esi
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $28, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB23_26
+; KNL-NEXT:  ## BB#25:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB23_26:
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    bextrl %eax, %edi, %eax
+; KNL-NEXT:    vmovd %esi, %xmm2
+; KNL-NEXT:    movl $274, %esi ## imm = 0x112
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $29, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB23_28
+; KNL-NEXT:  ## BB#27:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB23_28:
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %esi, %edi, %eax
+; KNL-NEXT:    movzbl %r11b, %esi
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shrq $30, %rcx
+; KNL-NEXT:    andb $1, %cl
+; KNL-NEXT:    je LBB23_30
+; KNL-NEXT:  ## BB#29:
+; KNL-NEXT:    movb $-1, %cl
+; KNL-NEXT:  LBB23_30:
+; KNL-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
+; KNL-NEXT:    movl $275, %eax ## imm = 0x113
+; KNL-NEXT:    bextrl %eax, %edi, %r11d
+; KNL-NEXT:    movzbl %dl, %edx
+; KNL-NEXT:    vmovd %esi, %xmm3
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $31, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_32
+; KNL-NEXT:  ## BB#31:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_32:
+; KNL-NEXT:    movq %rax, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    movq %rcx, {{[0-9]+}}(%rsp) ## 8-byte Spill
+; KNL-NEXT:    vpinsrb $3, %r11d, %xmm2, %xmm2
+; KNL-NEXT:    movl $276, %eax ## imm = 0x114
+; KNL-NEXT:    bextrl %eax, %edi, %esi
+; KNL-NEXT:    movl $277, %r11d ## imm = 0x115
+; KNL-NEXT:    vpinsrb $1, %edx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r10b, %r10d
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_34
+; KNL-NEXT:  ## BB#33:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_34:
+; KNL-NEXT:    vpinsrb $4, %esi, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r11d, %edi, %edx
+; KNL-NEXT:    movl $278, %r11d ## imm = 0x116
+; KNL-NEXT:    vpinsrb $2, %r10d, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r9b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    movq %r15, %rcx
+; KNL-NEXT:    shlq $63, %rcx
+; KNL-NEXT:    sarq $63, %rcx
+; KNL-NEXT:    vmovd %ecx, %xmm4
+; KNL-NEXT:    vpinsrb $1, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $2, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_36
+; KNL-NEXT:  ## BB#35:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_36:
+; KNL-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r11d, %edi, %edx
+; KNL-NEXT:    movl $279, %r9d ## imm = 0x117
+; KNL-NEXT:    vpinsrb $3, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %bl, %ebx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $2, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $3, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_38
+; KNL-NEXT:  ## BB#37:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_38:
+; KNL-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %r9d, %edi, %edx
+; KNL-NEXT:    movl $280, %esi ## imm = 0x118
+; KNL-NEXT:    vpinsrb $4, %ebx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r12b, %ebx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $3, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $4, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_40
+; KNL-NEXT:  ## BB#39:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_40:
+; KNL-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %esi, %edi, %ecx
+; KNL-NEXT:    movl $281, %edx ## imm = 0x119
+; KNL-NEXT:    vpinsrb $5, %ebx, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r14b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $4, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %al
+; KNL-NEXT:    shrb $5, %al
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_42
+; KNL-NEXT:  ## BB#41:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_42:
+; KNL-NEXT:    vpinsrb $8, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $282, %edx ## imm = 0x11A
+; KNL-NEXT:    vpinsrb $6, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r8b, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $5, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %bl
+; KNL-NEXT:    shrb $6, %bl
+; KNL-NEXT:    andb $1, %bl
+; KNL-NEXT:    je LBB23_44
+; KNL-NEXT:  ## BB#43:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB23_44:
+; KNL-NEXT:    vpinsrb $9, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %eax
+; KNL-NEXT:    movl $283, %ecx ## imm = 0x11B
+; KNL-NEXT:    vpinsrb $7, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movzbl %r13b, %esi
+; KNL-NEXT:    movzbl %bl, %edx
+; KNL-NEXT:    vpinsrb $6, %edx, %xmm4, %xmm4
+; KNL-NEXT:    movb %r15b, %bl
+; KNL-NEXT:    shrb $7, %bl
+; KNL-NEXT:    je LBB23_46
+; KNL-NEXT:  ## BB#45:
+; KNL-NEXT:    movb $-1, %bl
+; KNL-NEXT:  LBB23_46:
+; KNL-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %ecx, %edi, %ecx
+; KNL-NEXT:    movl $284, %edx ## imm = 0x11C
+; KNL-NEXT:    vpinsrb $8, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rax ## 8-byte Reload
+; KNL-NEXT:    movzbl %al, %esi
+; KNL-NEXT:    movzbl %bl, %eax
+; KNL-NEXT:    vpinsrb $7, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $8, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_48
+; KNL-NEXT:  ## BB#47:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_48:
+; KNL-NEXT:    vpinsrb $11, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $285, %edx ## imm = 0x11D
+; KNL-NEXT:    vpinsrb $9, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT:    movzbl %sil, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $8, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $9, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_50
+; KNL-NEXT:  ## BB#49:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_50:
+; KNL-NEXT:    vpinsrb $12, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %ecx
+; KNL-NEXT:    movl $286, %edx ## imm = 0x11E
+; KNL-NEXT:    vpinsrb $10, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rsi ## 8-byte Reload
+; KNL-NEXT:    movzbl %sil, %esi
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $9, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $10, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_52
+; KNL-NEXT:  ## BB#51:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_52:
+; KNL-NEXT:    vpinsrb $13, %ecx, %xmm2, %xmm2
+; KNL-NEXT:    bextrl %edx, %edi, %edx
+; KNL-NEXT:    vpinsrb $11, %esi, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $10, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $11, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_54
+; KNL-NEXT:  ## BB#53:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_54:
+; KNL-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
+; KNL-NEXT:    shrl $31, %edi
+; KNL-NEXT:    vpinsrb $12, %ecx, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $11, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $12, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_56
+; KNL-NEXT:  ## BB#55:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_56:
+; KNL-NEXT:    vpinsrb $15, %edi, %xmm2, %xmm2
+; KNL-NEXT:    vpinsrb $13, %ecx, %xmm3, %xmm3
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $12, %eax, %xmm4, %xmm4
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $13, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_58
+; KNL-NEXT:  ## BB#57:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_58:
+; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; KNL-NEXT:    vpinsrb $14, %ecx, %xmm3, %xmm2
+; KNL-NEXT:    movq {{[0-9]+}}(%rsp), %rcx ## 8-byte Reload
+; KNL-NEXT:    movzbl %cl, %ecx
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $13, %eax, %xmm4, %xmm3
+; KNL-NEXT:    movq %r15, %rax
+; KNL-NEXT:    shrq $14, %rax
+; KNL-NEXT:    andb $1, %al
+; KNL-NEXT:    je LBB23_60
+; KNL-NEXT:  ## BB#59:
+; KNL-NEXT:    movb $-1, %al
+; KNL-NEXT:  LBB23_60:
+; KNL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; KNL-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm1
+; KNL-NEXT:    movzbl %al, %eax
+; KNL-NEXT:    vpinsrb $14, %eax, %xmm3, %xmm2
+; KNL-NEXT:    shrq $15, %r15
+; KNL-NEXT:    andb $1, %r15b
+; KNL-NEXT:    je LBB23_62
+; KNL-NEXT:  ## BB#61:
+; KNL-NEXT:    movb $-1, %r15b
+; KNL-NEXT:  LBB23_62:
+; KNL-NEXT:    movzbl %r15b, %eax
+; KNL-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm2
+; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
+; KNL-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; KNL-NEXT:    vpcmpgtb %ymm0, %ymm2, %ymm0
+; KNL-NEXT:    leaq -40(%rbp), %rsp
+; KNL-NEXT:    popq %rbx
+; KNL-NEXT:    popq %r12
+; KNL-NEXT:    popq %r13
+; KNL-NEXT:    popq %r14
+; KNL-NEXT:    popq %r15
+; KNL-NEXT:    popq %rbp
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test17:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovq %rdi, %k0
+; SKX-NEXT:    cmpl %edx, %esi
+; SKX-NEXT:    setg %al
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    kmovw %eax, %k1
+; SKX-NEXT:    kshiftlq $5, %k1, %k1
+; SKX-NEXT:    korq %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2b %k0, %zmm0
+; SKX-NEXT:    retq
   %a = bitcast i64 %x to <64 x i1>
   %b = icmp sgt i32 %y, %z
   %c = insertelement <64 x i1>%a, i1 %b, i32 5
@@ -350,8 +1334,38 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
   ret <64 x i8>%d
 }
 
-; KNL-LABEL: test18
 define <8 x i1> @test18(i8 %a, i16 %y) {
+; KNL-LABEL: test18:
+; KNL:       ## BB#0:
+; KNL-NEXT:    movzbl %dil, %eax
+; KNL-NEXT:    kmovw %eax, %k0
+; KNL-NEXT:    kmovw %esi, %k1
+; KNL-NEXT:    kshiftlw $7, %k1, %k2
+; KNL-NEXT:    kshiftrw $15, %k2, %k2
+; KNL-NEXT:    kshiftlw $6, %k1, %k1
+; KNL-NEXT:    kshiftrw $15, %k1, %k1
+; KNL-NEXT:    kshiftlw $6, %k1, %k1
+; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    kshiftlw $7, %k2, %k1
+; KNL-NEXT:    korw %k1, %k0, %k1
+; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovqw %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test18:
+; SKX:       ## BB#0:
+; SKX-NEXT:    kmovb %edi, %k0
+; SKX-NEXT:    kmovw %esi, %k1
+; SKX-NEXT:    kshiftlw $6, %k1, %k2
+; SKX-NEXT:    kshiftrw $15, %k2, %k2
+; SKX-NEXT:    kshiftlw $7, %k1, %k1
+; SKX-NEXT:    kshiftrw $15, %k1, %k1
+; SKX-NEXT:    kshiftlb $7, %k1, %k1
+; SKX-NEXT:    kshiftlb $6, %k2, %k2
+; SKX-NEXT:    korb %k2, %k0, %k0
+; SKX-NEXT:    korb %k1, %k0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    retq
   %b = bitcast i8 %a to <8 x i1>
   %b1 = bitcast i16 %y to <16 x i1>
   %el1 = extractelement <16 x i1>%b1, i32 8
@@ -360,50 +1374,76 @@ define <8 x i1> @test18(i8 %a, i16 %y) {
   %d = insertelement <8 x i1>%c, i1 %el2, i32 6
   ret <8 x i1>%d
 }
-
-; KNL-LABEL: test19
-; KNL: movzbl  %dil, %eax
-; KNL: kmovw   %eax, %k0
-; KNL: kshiftlw        $13, %k0, %k0
-; KNL: kshiftrw        $15, %k0, %k0
-; KNL: kmovw   %k0, %eax
-; KNL: andl    $1, %eax
-; KNL: testb   %al, %al
-
-define <8 x i1> @test19(i8 %a) {
-  %b = bitcast i8 %a to <8 x i1>
-  %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
-  ret <8 x i1> %c
-}
-
-; KNL-LABEL: test20
-; KNL: movzbl  %dil, %eax
-; KNL: kmovw   %eax, %k0
-; KNL: kshiftlw        $13, %k0, %k1
-; KNL: kshiftrw        $15, %k1, %k1
-; KNL: kshiftlw        $12, %k0, %k0
-; KNL: kshiftrw        $15, %k0, %k0
-; KNL: kshiftlw        $4, %k0, %k0
-; KNL: kshiftlw        $1, %k1, %k2
-; KNL: korw    %k0, %k2, %k0
-; KNL: kshiftlw        $6, %k1, %k1
-; KNL: korw    %k1, %k0, %k1
-define <8 x i1> @test20(i8 %a, i16 %y) {
-  %b = bitcast i8 %a to <8 x i1>
-  %c = shufflevector < 8 x i1>%b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
-  ret <8 x i1> %c
-}
-
-; KNL-LABEL: test21
-; KNL: vpand %ymm
-; KNL: vextracti128    $1, %ymm2
-; KNL: vpand %ymm
-
-; SKX-LABEL: test21
-; SKX: vpmovb2m
-; SKX: vmovdqu16 {{.*}}%k1
-
 define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
+; KNL-LABEL: test21:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpsraw $15, %ymm3, %ymm3
+; KNL-NEXT:    vpand %ymm0, %ymm3, %ymm0
+; KNL-NEXT:    vextracti128 $1, %ymm2, %xmm2
+; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
+; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
+; KNL-NEXT:    vpand %ymm1, %ymm2, %ymm1
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test21:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
+; SKX-NEXT:    vpmovb2m %ymm1, %k1
+; SKX-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; SKX-NEXT:    vmovdqu16 %zmm0, %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
   %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
   ret <32 x i16> %ret
 }
+
+define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
+; KNL-LABEL: test22:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpextrd $3, %xmm0, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    vpextrd $2, %xmm0, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    vpextrd $1, %xmm0, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    vmovd %xmm0, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test22:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
+; SKX-NEXT:    vpmovd2m %xmm0, %k0
+; SKX-NEXT:    kmovb %k0, (%rdi)
+; SKX-NEXT:    retq
+  store <4 x i1> %a, <4 x i1>* %addr
+  ret void
+}
+
+define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
+; KNL-LABEL: test23:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpextrq $1, %xmm0, %rax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    vmovq %xmm0, %rax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    movb %al, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test23:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
+; SKX-NEXT:    vpmovq2m %xmm0, %k0
+; SKX-NEXT:    kmovb %k0, (%rdi)
+; SKX-NEXT:    retq
+  store <2 x i1> %a, <2 x i1>* %addr
+  ret void
+}
diff --git a/test/CodeGen/X86/avx512-skx-insert-subvec.ll b/test/CodeGen/X86/avx512-skx-insert-subvec.ll
new file mode 100644
index 000000000000..c54010cd91b9
--- /dev/null
+++ b/test/CodeGen/X86/avx512-skx-insert-subvec.ll
@@ -0,0 +1,135 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw -mattr=+avx512dq -mattr=+avx512vl| FileCheck %s
+
+define <8 x i1> @test(<2 x i1> %a) {
+; CHECK-LABEL: test:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlb $2, %k0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i1> %res
+}
+
+define <8 x i1> @test1(<2 x i1> %a) {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlb $4, %k0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = shufflevector <2 x i1> %a, <2 x i1> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
+  ret <8 x i1> %res
+}
+
+define <8 x i1> @test2(<2 x i1> %a) {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %zmm0
+; CHECK-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,0,1],zmm0[0,1,0,1]
+; CHECK-NEXT:    vpsllq $63, %zmm0, %zmm0
+; CHECK-NEXT:    vpmovq2m %zmm0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = shufflevector <2 x i1> %a, <2 x i1> zeroinitializer, <8 x i32> <i32 3, i32 3, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef>
+  ret <8 x i1> %res
+}
+
+define <8 x i1> @test3(<4 x i1> %a) {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlb $4, %k0, %k0
+; CHECK-NEXT:    kshiftrb $4, %k0, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <4 x i1> %a, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i1> %res
+}
+
+define <8 x i1> @test4(<4 x i1> %a, <4 x i1>%b) {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlb $4, %k0, %k0
+; CHECK-NEXT:    kshiftrb $4, %k0, %k1
+; CHECK-NEXT:    korb %k0, %k1, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i1> %res
+}
+
+define <4 x i1> @test5(<2 x i1> %a, <2 x i1>%b) {
+; CHECK-LABEL: test5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kshiftrw $2, %k0, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %xmm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i1> %res
+}
+
+define <16 x i1> @test6(<2 x i1> %a, <2 x i1>%b) {
+; CHECK-LABEL: test6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllq $63, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kshiftrw $2, %k0, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k0
+; CHECK-NEXT:    kunpckbw %k0, %k0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %xmm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <2 x i1> %a, <2 x i1> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i1> %res
+}
+
+define <32 x i1> @test7(<4 x i1> %a, <4 x i1>%b) {
+; CHECK-LABEL: test7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
+; CHECK-NEXT:    kshiftlb $4, %k0, %k0
+; CHECK-NEXT:    kshiftrb $4, %k0, %k1
+; CHECK-NEXT:    korb %k0, %k1, %k0
+; CHECK-NEXT:    kunpckbw %k0, %k0, %k0
+; CHECK-NEXT:    kunpckwd %k0, %k0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %ymm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <4 x i1> %a, <4 x i1> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i1> %res
+}
+
+define <64 x i1> @test8(<8 x i1> %a, <8 x i1>%b) {
+; CHECK-LABEL: test8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpsllw $15, %xmm1, %xmm1
+; CHECK-NEXT:    vpmovw2m %xmm1, %k0
+; CHECK-NEXT:    vpsllw $15, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovw2m %xmm0, %k1
+; CHECK-NEXT:    kunpckdq %k1, %k0, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %zmm0
+; CHECK-NEXT:    retq
+
+  %res = shufflevector <8 x i1> %a, <8 x i1> %b, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <64 x i1> %res
+}
+
diff --git a/test/CodeGen/X86/avx512-trunc-ext.ll b/test/CodeGen/X86/avx512-trunc-ext.ll
deleted file mode 100644
index f25458972e42..000000000000
--- a/test/CodeGen/X86/avx512-trunc-ext.ll
+++ /dev/null
@@ -1,961 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX 
- 
- 
-; KNL-LABEL: trunc_16x32_to_16x8
-; KNL: vpmovdb
-; KNL: ret
-define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
-  %x = trunc <16 x i32> %i to <16 x i8>
-  ret <16 x i8> %x
-}
-
-; KNL-LABEL: trunc_8x64_to_8x16
-; KNL: vpmovqw
-; KNL: ret
-define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
-  %x = trunc <8 x i64> %i to <8 x i16>
-  ret <8 x i16> %x
-}
-
-;SKX-LABEL: zext_8x8mem_to_8x16:                  
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1     
-;SKX-NEXT:  vpmovzxbw (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq                            
-define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = zext <8 x i8> %a to <8 x i16>  
-  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer  
-  ret <8 x i16> %ret
-}
-
-;SKX-LABEL: sext_8x8mem_to_8x16:                  
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1     
-;SKX-NEXT:  vpmovsxbw (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq                       
-define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = sext <8 x i8> %a to <8 x i16>  
-  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer  
-  ret <8 x i16> %ret
-}
-
-;SKX-LABEL: zext_16x8mem_to_16x16:                
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm0, %k1     
-;SKX-NEXT:  vpmovzxbw (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq            
-define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i8>,<16 x i8> *%i,align 1
-  %x   = zext <16 x i8> %a to <16 x i16>  
-  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer  
-  ret <16 x i16> %ret
-}
-
-;SKX-LABEL: sext_16x8mem_to_16x16:                
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm0, %k1     
-;SKX-NEXT:  vpmovsxbw (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq  
-define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i8>,<16 x i8> *%i,align 1
-  %x   = sext <16 x i8> %a to <16 x i16>  
-  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer  
-  ret <16 x i16> %ret
-}
-
-;SKX-LABEL: zext_16x8_to_16x16:                   
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovzxbw %xmm0, %ymm0    
-;SKX-NEXT:  retq  
-define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {  
-  %x   = zext <16 x i8> %a to <16 x i16>  
-  ret <16 x i16> %x
-}
-
-;SKX-LABEL: zext_16x8_to_16x16_mask:              
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm1, %k1     
-;SKX-NEXT:  vpmovzxbw %xmm0, %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq 
-define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {  
-  %x   = zext <16 x i8> %a to <16 x i16> 
-  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer   
-  ret <16 x i16> %ret
-}
-
-;SKX-LABEL: sext_16x8_to_16x16:                   
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxbw %xmm0, %ymm0    
-;SKX-NEXT:  retq
-define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {  
-  %x   = sext <16 x i8> %a to <16 x i16>  
-  ret <16 x i16> %x
-}
-
-;SKX-LABEL: sext_16x8_to_16x16_mask:              
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm1, %k1     
-;SKX-NEXT:  vpmovsxbw %xmm0, %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq 
-define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {  
-  %x   = sext <16 x i8> %a to <16 x i16> 
-  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer   
-  ret <16 x i16> %ret
-}
-
-;SKX-LABEL: zext_32x8mem_to_32x16:                
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %ymm0, %k1     
-;SKX-NEXT:  vpmovzxbw (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq                       
-define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
-  %a   = load <32 x i8>,<32 x i8> *%i,align 1
-  %x   = zext <32 x i8> %a to <32 x i16>  
-  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
-  ret <32 x i16> %ret
-}
-
-;SKX-LABEL: sext_32x8mem_to_32x16:                
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %ymm0, %k1     
-;SKX-NEXT:  vpmovsxbw (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
-  %a   = load <32 x i8>,<32 x i8> *%i,align 1
-  %x   = sext <32 x i8> %a to <32 x i16>  
-  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
-  ret <32 x i16> %ret
-}
-
-;SKX-LABEL: zext_32x8_to_32x16:                   
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovzxbw %ymm0, %zmm0    
-;SKX-NEXT:  retq 
-define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {  
-  %x   = zext <32 x i8> %a to <32 x i16>  
-  ret <32 x i16> %x
-}
-
-;SKX-LABEL: zext_32x8_to_32x16_mask:              
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %ymm1, %k1    
-;SKX-NEXT:  vpmovzxbw %ymm0, %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {  
-  %x   = zext <32 x i8> %a to <32 x i16>
-  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
-  ret <32 x i16> %ret
-}
-
-;SKX-LABEL: sext_32x8_to_32x16:                   
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxbw %ymm0, %zmm0    
-;SKX-NEXT:  retq
-define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {  
-  %x   = sext <32 x i8> %a to <32 x i16>  
-  ret <32 x i16> %x
-}
-
-;SKX-LABEL: sext_32x8_to_32x16_mask:              
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %ymm1, %k1     
-;SKX-NEXT:  vpmovsxbw %ymm0, %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {  
-  %x   = sext <32 x i8> %a to <32 x i16>
-  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
-  ret <32 x i16> %ret
-}
-
-;SKX-LABEL: zext_4x8mem_to_4x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m    %xmm0, %k1      
-;SKX-NEXT:  vpmovzxbd    (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq                            
-define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i8>,<4 x i8> *%i,align 1
-  %x   = zext <4 x i8> %a to <4 x i32>  
-  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer  
-  ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m    %xmm0, %k1      
-;SKX-NEXT:  vpmovsxbd    (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq       
-define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i8>,<4 x i8> *%i,align 1
-  %x   = sext <4 x i8> %a to <4 x i32>  
-  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer  
-  ret <4 x i32> %ret
-}
-
-;SKX-LABEL: zext_8x8mem_to_8x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m    %xmm0, %k1      
-;SKX-NEXT:  vpmovzxbd    (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq    
-define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = zext <8 x i8> %a to <8 x i32>  
-  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer  
-  ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x8mem_to_8x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m    %xmm0, %k1      
-;SKX-NEXT:  vpmovsxbd    (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq         
-define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = sext <8 x i8> %a to <8 x i32>  
-  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer  
-  ret <8 x i32> %ret
-}
-
-;KNL-LABEL: zext_16x8mem_to_16x32:   
-;KNL:       vpmovzxbd    (%rdi), %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq 
-define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i8>,<16 x i8> *%i,align 1
-  %x   = zext <16 x i8> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;KNL-LABEL: sext_16x8mem_to_16x32:   
-;KNL:       vpmovsxbd    (%rdi), %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq  
-define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i8>,<16 x i8> *%i,align 1
-  %x   = sext <16 x i8> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;KNL-LABEL: zext_16x8_to_16x32_mask:                    
-;KNL:       vpmovzxbd %xmm0, %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq                
-define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
-  %x   = zext <16 x i8> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;KNL-LABEL: sext_16x8_to_16x32_mask:                    
-;KNL:       vpmovsxbd %xmm0, %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq
-define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
-  %x   = sext <16 x i8> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-; KNL-LABEL: zext_16x8_to_16x32
-; KNL: vpmovzxbd {{.*}}%zmm
-; KNL: ret
-define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
-  %x = zext <16 x i8> %i to <16 x i32>
-  ret <16 x i32> %x
-}
-
-; KNL-LABEL: sext_16x8_to_16x32
-; KNL: vpmovsxbd {{.*}}%zmm
-; KNL: ret
-define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
-  %x = sext <16 x i8> %i to <16 x i32>
-  ret <16 x i32> %x
-}
-
-;SKX-LABEL: zext_2x8mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxbq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i8>,<2 x i8> *%i,align 1
-  %x   = zext <2 x i8> %a to <2 x i64>
-  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-;SKX-LABEL: sext_2x8mem_to_2x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxbq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i8>,<2 x i8> *%i,align 1
-  %x   = sext <2 x i8> %a to <2 x i64>
-  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-;SKX-LABEL: sext_2x8mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxbq (%rdi), %xmm0   
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
-  %a   = load <2 x i8>,<2 x i8> *%i,align 1
-  %x   = sext <2 x i8> %a to <2 x i64>
-  ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x8mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxbq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i8>,<4 x i8> *%i,align 1
-  %x   = zext <4 x i8> %a to <4 x i64>
-  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxbq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i8>,<4 x i8> *%i,align 1
-  %x   = sext <4 x i8> %a to <4 x i64>
-  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x8mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxbq (%rdi), %ymm0   
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
-  %a   = load <4 x i8>,<4 x i8> *%i,align 1
-  %x   = sext <4 x i8> %a to <4 x i64>
-  ret <4 x i64> %x
-}
-
-;KNL-LABEL: zext_8x8mem_to_8x64:
-;KNL:       vpmovzxbq (%rdi), %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq
-define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = zext <8 x i8> %a to <8 x i64>
-  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;KNL-LABEL: sext_8x8mem_to_8x64mask:
-;KNL:       vpmovsxbq (%rdi), %zmm0 {%k1} {z} 
-;KNL-NEXT:  retq
-define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = sext <8 x i8> %a to <8 x i64>
-  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;KNL-LABEL: sext_8x8mem_to_8x64:
-;KNL:       vpmovsxbq (%rdi), %zmm0   
-;KNL-NEXT:  retq
-define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
-  %a   = load <8 x i8>,<8 x i8> *%i,align 1
-  %x   = sext <8 x i8> %a to <8 x i64>
-  ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_4x16mem_to_4x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwd (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = zext <4 x i16> %a to <4 x i32>
-  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
-  ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x32mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwd (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = sext <4 x i16> %a to <4 x i32>
-  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
-  ret <4 x i32> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwd (%rdi), %xmm0   
-;SKX-NEXT:  retq
-define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = sext <4 x i16> %a to <4 x i32>
-  ret <4 x i32> %x
-}
-
-
-;SKX-LABEL: zext_8x16mem_to_8x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwd (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = zext <8 x i16> %a to <8 x i32>
-  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
-  ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x32mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwd (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = sext <8 x i16> %a to <8 x i32>
-  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
-  ret <8 x i32> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwd (%rdi), %ymm0   
-;SKX-NEXT:  retq
-define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = sext <8 x i16> %a to <8 x i32>
-  ret <8 x i32> %x
-}
-
-;SKX-LABEL: zext_8x16_to_8x32mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm1, %k1
-;SKX-NEXT:  vpmovzxwd %xmm0, %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
-  %x   = zext <8 x i16> %a to <8 x i32>
-  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
-  ret <8 x i32> %ret
-}
-
-;SKX-LABEL: zext_8x16_to_8x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovzxwd %xmm0, %ymm0    
-;SKX-NEXT:  retq
-define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
-  %x   = zext <8 x i16> %a to <8 x i32>
-  ret <8 x i32> %x
-}
-
-;SKX-LABEL: zext_16x16mem_to_16x32:
-;KNL-LABEL: zext_16x16mem_to_16x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwd (%rdi), %zmm0 {%k1} {z} 
-;KNL:       vpmovzxwd (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i16>,<16 x i16> *%i,align 1
-  %x   = zext <16 x i16> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;SKX-LABEL: sext_16x16mem_to_16x32mask:
-;KNL-LABEL: sext_16x16mem_to_16x32mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwd (%rdi), %zmm0 {%k1} {z} 
-;KNL:       vpmovsxwd (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
-  %a   = load <16 x i16>,<16 x i16> *%i,align 1
-  %x   = sext <16 x i16> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;SKX-LABEL: sext_16x16mem_to_16x32:
-;KNL-LABEL: sext_16x16mem_to_16x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwd (%rdi), %zmm0   
-;KNL:       vpmovsxwd (%rdi), %zmm0   
-;SKX-NEXT:  retq
-define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
-  %a   = load <16 x i16>,<16 x i16> *%i,align 1
-  %x   = sext <16 x i16> %a to <16 x i32>
-  ret <16 x i32> %x
-}
-;SKX-LABEL: zext_16x16_to_16x32mask:
-;KNL-LABEL: zext_16x16_to_16x32mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovb2m  %xmm1, %k1
-;SKX-NEXT:  vpmovzxwd %ymm0, %zmm0 {%k1} {z} 
-;KNL:       vpmovzxwd %ymm0, %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
-  %x   = zext <16 x i16> %a to <16 x i32>
-  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
-  ret <16 x i32> %ret
-}
-
-;SKX-LABEL: zext_16x16_to_16x32:
-;KNL-LABEL: zext_16x16_to_16x32:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovzxwd %ymm0, %zmm0    
-;KNL:       vpmovzxwd %ymm0, %zmm0    
-;SKX-NEXT:  retq
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
-  %x   = zext <16 x i16> %a to <16 x i32>
-  ret <16 x i32> %x
-}
-
-;SKX-LABEL: zext_2x16mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i16>,<2 x i16> *%i,align 1
-  %x   = zext <2 x i16> %a to <2 x i64>
-  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x16mem_to_2x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i16>,<2 x i16> *%i,align 1
-  %x   = sext <2 x i16> %a to <2 x i64>
-  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x16mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwq (%rdi), %xmm0   
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
-  %a   = load <2 x i16>,<2 x i16> *%i,align 1
-  %x   = sext <2 x i16> %a to <2 x i64>
-  ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x16mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = zext <4 x i16> %a to <4 x i64>
-  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = sext <4 x i16> %a to <4 x i64>
-  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x16mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwq (%rdi), %ymm0   
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
-  %a   = load <4 x i16>,<4 x i16> *%i,align 1
-  %x   = sext <4 x i16> %a to <4 x i64>
-  ret <4 x i64> %x
-}
-
-;SKX-LABEL: zext_8x16mem_to_8x64:
-;KNL-LABEL: zext_8x16mem_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxwq (%rdi), %zmm0 {%k1} {z} 
-;KNL:       vpmovzxwq (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = zext <8 x i16> %a to <8 x i64>
-  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x64mask:
-;KNL-LABEL: sext_8x16mem_to_8x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxwq (%rdi), %zmm0 {%k1} {z} 
-;KNL:       vpmovsxwq (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = sext <8 x i16> %a to <8 x i64>
-  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x16mem_to_8x64:
-;KNL-LABEL: sext_8x16mem_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxwq (%rdi), %zmm0   
-;KNL:       vpmovsxwq (%rdi), %zmm0   
-;SKX-NEXT:  retq
-define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
-  %a   = load <8 x i16>,<8 x i16> *%i,align 1
-  %x   = sext <8 x i16> %a to <8 x i64>
-  ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_8x16_to_8x64mask:
-;KNL-LABEL: zext_8x16_to_8x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm1, %k1
-;SKX-NEXT:  vpmovzxwq %xmm0, %zmm0 {%k1} {z} 
-;KNL:       vpmovzxwq %xmm0, %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
-  %x   = zext <8 x i16> %a to <8 x i64>
-  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: zext_8x16_to_8x64:
-;KNL-LABEL: zext_8x16_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovzxwq %xmm0, %zmm0    
-;KNL:       vpmovzxwq %xmm0, %zmm0    
-;SKX-NEXT:  retq
-; KNL: ret
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
-  %ret   = zext <8 x i16> %a to <8 x i64>
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: zext_2x32mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxdq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i32>,<2 x i32> *%i,align 1
-  %x   = zext <2 x i32> %a to <2 x i64>
-  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x32mem_to_2x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovq2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxdq (%rdi), %xmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
-  %a   = load <2 x i32>,<2 x i32> *%i,align 1
-  %x   = sext <2 x i32> %a to <2 x i64>
-  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
-  ret <2 x i64> %ret
-}
-
-;SKX-LABEL: sext_2x32mem_to_2x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxdq (%rdi), %xmm0   
-;SKX-NEXT:  retq
-define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
-  %a   = load <2 x i32>,<2 x i32> *%i,align 1
-  %x   = sext <2 x i32> %a to <2 x i64>
-  ret <2 x i64> %x
-}
-
-;SKX-LABEL: zext_4x32mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxdq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i32>,<4 x i32> *%i,align 1
-  %x   = zext <4 x i32> %a to <4 x i64>
-  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x32mem_to_4x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxdq (%rdi), %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
-  %a   = load <4 x i32>,<4 x i32> *%i,align 1
-  %x   = sext <4 x i32> %a to <4 x i64>
-  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: sext_4x32mem_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxdq (%rdi), %ymm0   
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
-  %a   = load <4 x i32>,<4 x i32> *%i,align 1
-  %x   = sext <4 x i32> %a to <4 x i64>
-  ret <4 x i64> %x
-}
-
-;SKX-LABEL: sext_4x32_to_4x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxdq %xmm0, %ymm0    
-;SKX-NEXT:  retq
-define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
-  %x   = sext <4 x i32> %a to <4 x i64>
-  ret <4 x i64> %x
-}
-
-;SKX-LABEL: zext_4x32_to_4x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovd2m  %xmm1, %k1
-;SKX-NEXT:  vpmovzxdq %xmm0, %ymm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
-  %x   = zext <4 x i32> %a to <4 x i64>
-  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
-  ret <4 x i64> %ret
-}
-
-;SKX-LABEL: zext_8x32mem_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovzxdq (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i32>,<8 x i32> *%i,align 1
-  %x   = zext <8 x i32> %a to <8 x i64>
-  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x32mem_to_8x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm0, %k1
-;SKX-NEXT:  vpmovsxdq (%rdi), %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
-  %a   = load <8 x i32>,<8 x i32> *%i,align 1
-  %x   = sext <8 x i32> %a to <8 x i64>
-  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-
-;SKX-LABEL: sext_8x32mem_to_8x64:
-;KNL-LABEL: sext_8x32mem_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxdq (%rdi), %zmm0   
-;KNL:       vpmovsxdq (%rdi), %zmm0   
-;SKX-NEXT:  retq
-define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
-  %a   = load <8 x i32>,<8 x i32> *%i,align 1
-  %x   = sext <8 x i32> %a to <8 x i64>
-  ret <8 x i64> %x
-}
-
-;SKX-LABEL: sext_8x32_to_8x64:
-;KNL-LABEL: sext_8x32_to_8x64:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovsxdq %ymm0, %zmm0    
-;KNL:       vpmovsxdq %ymm0, %zmm0    
-;SKX-NEXT:  retq
-define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
-  %x   = sext <8 x i32> %a to <8 x i64>
-  ret <8 x i64> %x
-}
-
-;SKX-LABEL: zext_8x32_to_8x64mask:
-;KNL-LABEL: zext_8x32_to_8x64mask:
-;SKX:       ## BB#0:
-;SKX-NEXT:  vpmovw2m  %xmm1, %k1
-;SKX-NEXT:  vpmovzxdq %ymm0, %zmm0 {%k1} {z} 
-;KNL:       vpmovzxdq %ymm0, %zmm0 {%k1} {z} 
-;SKX-NEXT:  retq
-define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
-  %x   = zext <8 x i32> %a to <8 x i64>
-  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
-  ret <8 x i64> %ret
-}
-;KNL-LABEL: fptrunc_test
-;KNL: vcvtpd2ps {{.*}}%zmm
-;KNL: ret
-define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
-  %b = fptrunc <8 x double> %a to <8 x float>
-  ret <8 x float> %b
-}
-
-;KNL-LABEL: fpext_test
-;KNL: vcvtps2pd {{.*}}%zmm
-;KNL: ret
-define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
-  %b = fpext <8 x float> %a to <8 x double>
-  ret <8 x double> %b
-}
-
-; KNL-LABEL: zext_16i1_to_16xi32
-; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; KNL: ret
-define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
-  %a = bitcast i16 %b to <16 x i1>
-  %c = zext <16 x i1> %a to <16 x i32>
-  ret <16 x i32> %c
-}
-
-; KNL-LABEL: zext_8i1_to_8xi64
-; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; KNL: ret
-define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
-  %a = bitcast i8 %b to <8 x i1>
-  %c = zext <8 x i1> %a to <8 x i64>
-  ret <8 x i64> %c
-}
-
-; KNL-LABEL: trunc_16i8_to_16i1
-; KNL: vpmovsxbd
-; KNL: vpandd
-; KNL: vptestmd
-; KNL: ret
-; SKX-LABEL: trunc_16i8_to_16i1
-; SKX: vpmovb2m %xmm
-define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
-  %mask_b = trunc <16 x i8>%a to <16 x i1>
-  %mask = bitcast <16 x i1> %mask_b to i16
-  ret i16 %mask
-}
-
-; KNL-LABEL: trunc_16i32_to_16i1
-; KNL: vpandd
-; KNL: vptestmd
-; KNL: ret
-; SKX-LABEL: trunc_16i32_to_16i1
-; SKX: vpmovd2m %zmm
-define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
-  %mask_b = trunc <16 x i32>%a to <16 x i1>
-  %mask = bitcast <16 x i1> %mask_b to i16
-  ret i16 %mask
-}
-
-; SKX-LABEL: trunc_4i32_to_4i1
-; SKX: vpmovd2m        %xmm
-; SKX: kandw
-; SKX: vpmovm2d
-define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
-  %mask_a = trunc <4 x i32>%a to <4 x i1>
-  %mask_b = trunc <4 x i32>%b to <4 x i1>
-  %a_and_b = and <4 x i1>%mask_a, %mask_b
-  %res = sext <4 x i1>%a_and_b to <4 x i32>
-  ret <4 x i32>%res
-}
-
-; KNL-LABEL: trunc_8i16_to_8i1
-; KNL: vpmovsxwq
-; KNL: vpandq LCP{{.*}}(%rip){1to8}
-; KNL: vptestmq
-; KNL: ret
-
-; SKX-LABEL: trunc_8i16_to_8i1
-; SKX: vpmovw2m %xmm
-define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
-  %mask_b = trunc <8 x i16>%a to <8 x i1>
-  %mask = bitcast <8 x i1> %mask_b to i8
-  ret i8 %mask
-}
-
-; KNL-LABEL: sext_8i1_8i32
-; KNL: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; SKX: vpmovm2d
-; KNL: ret
-define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-  %x = icmp slt <8 x i32> %a1, %a2
-  %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-  %y = sext <8 x i1> %x1 to <8 x i32>
-  ret <8 x i32> %y
-}
-
-; KNL-LABEL: trunc_v16i32_to_v16i16
-; KNL: vpmovdw
-; KNL: ret
-define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
-  %1 = trunc <16 x i32> %x to <16 x i16>
-  ret <16 x i16> %1
-}
-
-; KNL-LABEL: trunc_i32_to_i1
-; KNL: movw    $-4, %ax
-; KNL: kmovw   %eax, %k1
-; KNL: korw
-define i16 @trunc_i32_to_i1(i32 %a) {
-  %a_i = trunc i32 %a to i1
-  %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
-  %res = bitcast <16 x i1> %maskv to i16
-  ret i16 %res
-}
-
-; KNL-LABEL: sext_8i1_8i16
-; SKX: vpmovm2w
-; KNL: ret
-define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-  %x = icmp slt <8 x i32> %a1, %a2
-  %y = sext <8 x i1> %x to <8 x i16>
-  ret <8 x i16> %y
-}
-
-; KNL-LABEL: sext_16i1_16i32
-; SKX: vpmovm2d
-; KNL: ret
-define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
-  %x = icmp slt <16 x i32> %a1, %a2
-  %y = sext <16 x i1> %x to <16 x i32>
-  ret <16 x i32> %y
-}
-
-; KNL-LABEL: sext_8i1_8i64
-; SKX: vpmovm2q
-; KNL: ret
-define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-  %x = icmp slt <8 x i32> %a1, %a2
-  %y = sext <8 x i1> %x to <8 x i64>
-  ret <8 x i64> %y
-}
-
-; KNL-LABEL: @extload_v8i64
-; KNL: vpmovsxbq
-define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
-  %sign_load = load <8 x i8>, <8 x i8>* %a
-  %c = sext <8 x i8> %sign_load to <8 x i64>
-  store <8 x i64> %c, <8 x i64>* %res
-  ret void
-}
-
-;SKX-LABEL: test21:
-;SKX:       vmovdqu16 %zmm0, %zmm3 {%k1}
-;SKX-NEXT:  kshiftrq  $32, %k1, %k1
-;SKX-NEXT:  vmovdqu16 %zmm1, %zmm2 {%k1}
-define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
-  %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
-  ret <64 x i16> %ret
-}  
-
diff --git a/test/CodeGen/X86/avx512-trunc.ll b/test/CodeGen/X86/avx512-trunc.ll
new file mode 100644
index 000000000000..e4e5c2b8a1d5
--- /dev/null
+++ b/test/CodeGen/X86/avx512-trunc.ll
@@ -0,0 +1,488 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+
+ attributes #0 = { nounwind }
+
+define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_16x32_to_16x8:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdb %zmm0, %xmm0
+; ALL-NEXT:    retq
+  %x = trunc <16 x i32> %i to <16 x i8>
+  ret <16 x i8> %x
+}
+
+define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_8x64_to_8x16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqw %zmm0, %xmm0
+; ALL-NEXT:    retq
+  %x = trunc <8 x i64> %i to <8 x i16>
+  ret <8 x i16> %x
+}
+
+define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
+; ALL-LABEL: trunc_v16i32_to_v16i16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    retq
+  %1 = trunc <16 x i32> %x to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qb_512:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqw %zmm0, %xmm0
+; ALL-NEXT:    retq
+  %x = trunc <8 x i64> %i to <8 x i8>
+  ret <8 x i8> %x
+}
+
+define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
+; ALL-LABEL: trunc_qb_512_mem:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqb %zmm0, (%rdi)
+; ALL-NEXT:    retq
+    %x = trunc <8 x i64> %i to <8 x i8>
+    store <8 x i8> %x, <8 x i8>* %res
+    ret void
+}
+
+define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qb_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qb_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqd %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <4 x i64> %i to <4 x i8>
+  ret <4 x i8> %x
+}
+
+define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
+; KNL-LABEL: trunc_qb_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT:    vmovd %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qb_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqb %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <4 x i64> %i to <4 x i8>
+    store <4 x i8> %x, <4 x i8>* %res
+    ret void
+}
+
+define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qb_128:
+; ALL:       ## BB#0:
+; ALL-NEXT:    retq
+  %x = trunc <2 x i64> %i to <2 x i8>
+  ret <2 x i8> %x
+}
+
+define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
+; KNL-LABEL: trunc_qb_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT:    vmovd %xmm0, %eax
+; KNL-NEXT:    movw %ax, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qb_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqb %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <2 x i64> %i to <2 x i8>
+    store <2 x i8> %x, <2 x i8>* %res
+    ret void
+}
+
+define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qw_512:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqw %zmm0, %xmm0
+; ALL-NEXT:    retq
+  %x = trunc <8 x i64> %i to <8 x i16>
+  ret <8 x i16> %x
+}
+
+define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
+; ALL-LABEL: trunc_qw_512_mem:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqw %zmm0, (%rdi)
+; ALL-NEXT:    retq
+    %x = trunc <8 x i64> %i to <8 x i16>
+    store <8 x i16> %x, <8 x i16>* %res
+    ret void
+}
+
+define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qw_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qw_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqd %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <4 x i64> %i to <4 x i16>
+  ret <4 x i16> %x
+}
+
+define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
+; KNL-LABEL: trunc_qw_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; KNL-NEXT:    vmovq %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qw_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqw %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <4 x i64> %i to <4 x i16>
+    store <4 x i16> %x, <4 x i16>* %res
+    ret void
+}
+
+define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qw_128:
+; ALL:       ## BB#0:
+; ALL-NEXT:    retq
+  %x = trunc <2 x i64> %i to <2 x i16>
+  ret <2 x i16> %x
+}
+
+define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
+; KNL-LABEL: trunc_qw_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; KNL-NEXT:    vmovd %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qw_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqw %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <2 x i64> %i to <2 x i16>
+    store <2 x i16> %x, <2 x i16>* %res
+    ret void
+}
+
+define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
+; ALL-LABEL: trunc_qd_512:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqd %zmm0, %ymm0
+; ALL-NEXT:    retq
+  %x = trunc <8 x i64> %i to <8 x i32>
+  ret <8 x i32> %x
+}
+
+define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
+; ALL-LABEL: trunc_qd_512_mem:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovqd %zmm0, (%rdi)
+; ALL-NEXT:    retq
+    %x = trunc <8 x i64> %i to <8 x i32>
+    store <8 x i32> %x, <8 x i32>* %res
+    ret void
+}
+
+define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
+; KNL-LABEL: trunc_qd_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qd_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqd %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <4 x i64> %i to <4 x i32>
+  ret <4 x i32> %x
+}
+
+define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
+; KNL-LABEL: trunc_qd_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL-NEXT:    vmovaps %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qd_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqd %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <4 x i64> %i to <4 x i32>
+    store <4 x i32> %x, <4 x i32>* %res
+    ret void
+}
+
+define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
+; ALL-LABEL: trunc_qd_128:
+; ALL:       ## BB#0:
+; ALL-NEXT:    retq
+  %x = trunc <2 x i64> %i to <2 x i32>
+  ret <2 x i32> %x
+}
+
+define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
+; KNL-LABEL: trunc_qd_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL-NEXT:    vmovq %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_qd_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovqd %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <2 x i64> %i to <2 x i32>
+    store <2 x i32> %x, <2 x i32>* %res
+    ret void
+}
+
+define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_db_512:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdb %zmm0, %xmm0
+; ALL-NEXT:    retq
+  %x = trunc <16 x i32> %i to <16 x i8>
+  ret <16 x i8> %x
+}
+
+define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
+; ALL-LABEL: trunc_db_512_mem:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdb %zmm0, (%rdi)
+; ALL-NEXT:    retq
+    %x = trunc <16 x i32> %i to <16 x i8>
+    store <16 x i8> %x, <16 x i8>* %res
+    ret void
+}
+
+define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
+; KNL-LABEL: trunc_db_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_db_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <8 x i32> %i to <8 x i8>
+  ret <8 x i8> %x
+}
+
+define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
+; KNL-LABEL: trunc_db_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; KNL-NEXT:    vmovq %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_db_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdb %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <8 x i32> %i to <8 x i8>
+    store <8 x i8> %x, <8 x i8>* %res
+    ret void
+}
+
+define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
+; ALL-LABEL: trunc_db_128:
+; ALL:       ## BB#0:
+; ALL-NEXT:    retq
+  %x = trunc <4 x i32> %i to <4 x i8>
+  ret <4 x i8> %x
+}
+
+define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
+; KNL-LABEL: trunc_db_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; KNL-NEXT:    vmovd %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_db_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdb %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <4 x i32> %i to <4 x i8>
+    store <4 x i8> %x, <4 x i8>* %res
+    ret void
+}
+
+define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
+; ALL-LABEL: trunc_dw_512:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    retq
+  %x = trunc <16 x i32> %i to <16 x i16>
+  ret <16 x i16> %x
+}
+
+define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
+; ALL-LABEL: trunc_dw_512_mem:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovdw %zmm0, (%rdi)
+; ALL-NEXT:    retq
+    %x = trunc <16 x i32> %i to <16 x i16>
+    store <16 x i16> %x, <16 x i16>* %res
+    ret void
+}
+
+define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
+; KNL-LABEL: trunc_dw_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_dw_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdw %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <8 x i32> %i to <8 x i16>
+  ret <8 x i16> %x
+}
+
+define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
+; KNL-LABEL: trunc_dw_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    vmovaps %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_dw_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdw %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <8 x i32> %i to <8 x i16>
+    store <8 x i16> %x, <8 x i16>* %res
+    ret void
+}
+
+define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
+; KNL-LABEL: trunc_dw_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; KNL-NEXT:    vmovq %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_dw_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovdw %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <4 x i32> %i to <4 x i16>
+    store <4 x i16> %x, <4 x i16>* %res
+    ret void
+}
+
+define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
+; KNL-LABEL: trunc_wb_512:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_wb_512:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovwb %zmm0, %ymm0
+; SKX-NEXT:    retq
+  %x = trunc <32 x i16> %i to <32 x i8>
+  ret <32 x i8> %x
+}
+
+define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_512_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vpmovsxwd %ymm1, %zmm1
+; KNL-NEXT:    vpmovdb %zmm1, %xmm1
+; KNL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT:    vmovaps %ymm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_wb_512_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovwb %zmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <32 x i16> %i to <32 x i8>
+    store <32 x i8> %x, <32 x i8>* %res
+    ret void
+}
+
+define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
+; KNL-LABEL: trunc_wb_256:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_wb_256:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovwb %ymm0, %xmm0
+; SKX-NEXT:    retq
+  %x = trunc <16 x i16> %i to <16 x i8>
+  ret <16 x i8> %x
+}
+
+define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_256_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
+; KNL-NEXT:    vpmovdb %zmm0, %xmm0
+; KNL-NEXT:    vmovaps %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_wb_256_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovwb %ymm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <16 x i16> %i to <16 x i8>
+    store <16 x i8> %x, <16 x i8>* %res
+    ret void
+}
+
+define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
+; ALL-LABEL: trunc_wb_128:
+; ALL:       ## BB#0:
+; ALL-NEXT:    retq
+  %x = trunc <8 x i16> %i to <8 x i8>
+  ret <8 x i8> %x
+}
+
+define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
+; KNL-LABEL: trunc_wb_128_mem:
+; KNL:       ## BB#0:
+; KNL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; KNL-NEXT:    vmovq %xmm0, (%rdi)
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: trunc_wb_128_mem:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpmovwb %xmm0, (%rdi)
+; SKX-NEXT:    retq
+    %x = trunc <8 x i16> %i to <8 x i8>
+    store <8 x i8> %x, <8 x i8>* %res
+    ret void
+}
diff --git a/test/CodeGen/X86/avx512-vbroadcast.ll b/test/CodeGen/X86/avx512-vbroadcast.ll
index 854f1019f0f8..4f679f9aca6f 100644
--- a/test/CodeGen/X86/avx512-vbroadcast.ll
+++ b/test/CodeGen/X86/avx512-vbroadcast.ll
@@ -1,47 +1,54 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 
 define   <16 x i32> @_inreg16xi32(i32 %a) {
-; CHECK-LABEL: _inreg16xi32:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _inreg16xi32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastd %edi, %zmm0
+; ALL-NEXT:    retq
   %b = insertelement <16 x i32> undef, i32 %a, i32 0
   %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
   ret <16 x i32> %c
 }
 
 define   <8 x i64> @_inreg8xi64(i64 %a) {
-; CHECK-LABEL: _inreg8xi64:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _inreg8xi64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
+; ALL-NEXT:    retq
   %b = insertelement <8 x i64> undef, i64 %a, i32 0
   %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
   ret <8 x i64> %c
 }
 
-;CHECK-LABEL: _ss16xfloat_v4
-;CHECK: vbroadcastss %xmm0, %zmm0
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
+; ALL-LABEL: _ss16xfloat_v4:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
+; ALL-NEXT:    retq
   %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %b
 }
 
 define   <16 x float> @_inreg16xfloat(float %a) {
-; CHECK-LABEL: _inreg16xfloat:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _inreg16xfloat:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
+; ALL-NEXT:    retq
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %c
 }
 
-;CHECK-LABEL: _ss16xfloat_mask:
-;CHECK: vbroadcastss %xmm0, %zmm1 {%k1}
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_mask:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; ALL-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
+; ALL-NEXT:    vmovaps %zmm1, %zmm0
+; ALL-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -49,10 +56,13 @@ define   <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %m
   ret <16 x float> %r
 }
 
-;CHECK-LABEL: _ss16xfloat_maskz:
-;CHECK: vbroadcastss %xmm0, %zmm0 {%k1} {z}
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_maskz:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
@@ -60,20 +70,24 @@ define   <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
   ret <16 x float> %r
 }
 
-;CHECK-LABEL: _ss16xfloat_load:
-;CHECK: vbroadcastss (%{{.*}}, %zmm
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_load(float* %a.ptr) {
+; ALL-LABEL: _ss16xfloat_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss (%rdi), %zmm0
+; ALL-NEXT:    retq
   %a = load float, float* %a.ptr
   %b = insertelement <16 x float> undef, float %a, i32 0
   %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %c
 }
 
-;CHECK-LABEL: _ss16xfloat_mask_load:
-;CHECK: vbroadcastss (%rdi), %zmm0 {%k1}
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_mask_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
+; ALL-NEXT:    retq
   %a = load float, float* %a.ptr
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
@@ -82,10 +96,13 @@ define   <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16
   ret <16 x float> %r
 }
 
-;CHECK-LABEL: _ss16xfloat_maskz_load:
-;CHECK: vbroadcastss (%rdi), %zmm0 {%k1} {z}
-;CHECK: ret
 define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
+; ALL-LABEL: _ss16xfloat_maskz_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
+; ALL-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
   %a = load float, float* %a.ptr
   %mask = icmp ne <16 x i32> %mask1, zeroinitializer
   %b = insertelement <16 x float> undef, float %a, i32 0
@@ -95,19 +112,23 @@ define   <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1)
 }
 
 define   <8 x double> @_inreg8xdouble(double %a) {
-; CHECK-LABEL: _inreg8xdouble:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _inreg8xdouble:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT:    retq
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double> %c
 }
 
-;CHECK-LABEL: _sd8xdouble_mask:
-;CHECK: vbroadcastsd %xmm0, %zmm1 {%k1}
-;CHECK: ret
 define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_mask:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; ALL-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
+; ALL-NEXT:    vmovaps %zmm1, %zmm0
+; ALL-NEXT:    retq
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -115,10 +136,13 @@ define   <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %m
   ret <8 x double> %r
 }
 
-;CHECK-LABEL: _sd8xdouble_maskz:
-;CHECK: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
-;CHECK: ret
 define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_maskz:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
@@ -126,20 +150,24 @@ define   <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
   ret <8 x double> %r
 }
 
-;CHECK-LABEL: _sd8xdouble_load:
-;CHECK: vbroadcastsd (%rdi), %zmm
-;CHECK: ret
 define   <8 x double> @_sd8xdouble_load(double* %a.ptr) {
+; ALL-LABEL: _sd8xdouble_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0
+; ALL-NEXT:    retq
   %a = load double, double* %a.ptr
   %b = insertelement <8 x double> undef, double %a, i32 0
   %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double> %c
 }
 
-;CHECK-LABEL: _sd8xdouble_mask_load:
-;CHECK: vbroadcastsd (%rdi), %zmm0 {%k1}
-;CHECK: ret
 define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
+; ALL-LABEL: _sd8xdouble_mask_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpcmpneqd %zmm2, %zmm1, %k1
+; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
+; ALL-NEXT:    retq
   %a = load double, double* %a.ptr
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
@@ -149,9 +177,12 @@ define   <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8
 }
 
 define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
-; CHECK-LABEL: _sd8xdouble_maskz_load:
-; CHECK:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
-; CHECK:    ret
+; ALL-LABEL: _sd8xdouble_maskz_load:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; ALL-NEXT:    vpcmpneqd %zmm1, %zmm0, %k1
+; ALL-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
   %a = load double, double* %a.ptr
   %mask = icmp ne <8 x i32> %mask1, zeroinitializer
   %b = insertelement <8 x double> undef, double %a, i32 0
@@ -161,32 +192,32 @@ define   <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1)
 }
 
 define   <16 x i32> @_xmm16xi32(<16 x i32> %a) {
-; CHECK-LABEL: _xmm16xi32:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _xmm16xi32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT:    retq
   %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
   ret <16 x i32> %b
 }
 
 define   <16 x float> @_xmm16xfloat(<16 x float> %a) {
-; CHECK-LABEL: _xmm16xfloat:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: _xmm16xfloat:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
+; ALL-NEXT:    retq
   %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float> %b
 }
 
 define <16 x i32> @test_vbroadcast() {
-; CHECK-LABEL: test_vbroadcast:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpxord %zmm0, %zmm0, %zmm0
-; CHECK-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
-; CHECK-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK-NEXT:    knotw %k1, %k1
-; CHECK-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_vbroadcast:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vcmpunordps %zmm0, %zmm0, %k1
+; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; ALL-NEXT:    knotw %k1, %k1
+; ALL-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
+; ALL-NEXT:    retq
 entry:
   %0 = sext <16 x i1> zeroinitializer to <16 x i32>
   %1 = fcmp uno <16 x float> undef, zeroinitializer
@@ -198,10 +229,10 @@ entry:
 ; We implement the set1 intrinsics with vector initializers.  Verify that the
 ; IR generated will produce broadcasts at the end.
 define <8 x double> @test_set1_pd(double %d) #2 {
-; CHECK-LABEL: test_set1_pd:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_set1_pd:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
   %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
@@ -215,10 +246,10 @@ entry:
 }
 
 define <8 x i64> @test_set1_epi64(i64 %d) #2 {
-; CHECK-LABEL: test_set1_epi64:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_set1_epi64:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vpbroadcastq %rdi, %zmm0
+; ALL-NEXT:    retq
 entry:
   %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
   %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
@@ -232,10 +263,10 @@ entry:
 }
 
 define <16 x float> @test_set1_ps(float %f) #2 {
-; CHECK-LABEL: test_set1_ps:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_set1_ps:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
   %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
@@ -257,10 +288,10 @@ entry:
 }
 
 define <16 x i32> @test_set1_epi32(i32 %f) #2 {
-; CHECK-LABEL: test_set1_epi32:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_set1_epi32:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vpbroadcastd %edi, %zmm0
+; ALL-NEXT:    retq
 entry:
   %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
   %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
@@ -284,10 +315,10 @@ entry:
 ; We implement the scalar broadcast intrinsics with vector initializers.
 ; Verify that the IR generated will produce the broadcast at the end.
 define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
-; CHECK-LABEL: test_mm512_broadcastsd_pd:
-; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
-; CHECK-NEXT:    retq
+; ALL-LABEL: test_mm512_broadcastsd_pd:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT:    retq
 entry:
   %0 = extractelement <2 x double> %a, i32 0
   %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
@@ -301,30 +332,69 @@ entry:
   ret <8 x double> %vecinit7.i
 }
 
-; CHECK-LABEL: test1
-; CHECK: vbroadcastss
 define <16 x float> @test1(<8 x float>%a)  {
+; ALL-LABEL: test1:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss %xmm0, %zmm0
+; ALL-NEXT:    retq
   %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
   ret <16 x float>%res
 }
 
-; CHECK-LABEL: test2
-; CHECK: vbroadcastsd
 define <8 x double> @test2(<4 x double>%a)  {
+; ALL-LABEL: test2:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
+; ALL-NEXT:    retq
   %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
   ret <8 x double>%res
 }
 
-; CHECK-LABEL: test3
-; CHECK: vpbroadcastd
-define <16 x i32> @test3(<8 x i32>%a)  {
+define <64 x i8> @_invec32xi8(<32 x i8>%a)  {
+; AVX512F-LABEL: _invec32xi8:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX512F-NEXT:    vmovaps %zmm0, %zmm1
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: _invec32xi8:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vpbroadcastb %xmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
+  ret <64 x i8>%res
+}
+
+define <32 x i16> @_invec16xi16(<16 x i16>%a)  {
+; AVX512F-LABEL: _invec16xi16:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX512F-NEXT:    vmovaps %zmm0, %zmm1
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: _invec16xi16:
+; AVX512BW:       # BB#0:
+; AVX512BW-NEXT:    vpbroadcastw %xmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
+  ret <32 x i16>%res
+}
+
+define <16 x i32> @_invec8xi32(<8 x i32>%a)  {
+; ALL-LABEL: _invec8xi32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastd %xmm0, %zmm0
+; ALL-NEXT:    retq
   %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
   ret <16 x i32>%res
 }
 
-; CHECK-LABEL: test4
-; CHECK: vpbroadcastq
-define <8 x i64> @test4(<4 x i64>%a)  {
+define <8 x i64> @_invec4xi64(<4 x i64>%a)  {
+; ALL-LABEL: _invec4xi64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
+; ALL-NEXT:    retq
   %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
   ret <8 x i64>%res
 }
+
diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll
index 6a4a3aa7e371..a8c558df9de8 100644
--- a/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -152,7 +152,6 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind {
   ret <8 x i32> %max
 }
 
-
 define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
 ; KNL-LABEL: test12:
 ; KNL:       ## BB#0:
@@ -166,6 +165,32 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind {
   ret i16 %res1
 }
 
+define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind {
+; SKX-LABEL: test12_v32i32:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
+; SKX-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; SKX-NEXT:    kunpckwd %k0, %k1, %k0
+; SKX-NEXT:    kmovd %k0, %eax
+; SKX-NEXT:    retq
+  %res = icmp eq <32 x i32> %a, %b
+  %res1 = bitcast <32 x i1> %res to i32
+  ret i32 %res1
+}
+
+define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind {
+; SKX-LABEL: test12_v64i16:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpcmpeqw %zmm2, %zmm0, %k0
+; SKX-NEXT:    vpcmpeqw %zmm3, %zmm1, %k1
+; SKX-NEXT:    kunpckdq %k0, %k1, %k0
+; SKX-NEXT:    kmovq %k0, %rax
+; SKX-NEXT:    retq
+  %res = icmp eq <64 x i16> %a, %b
+  %res1 = bitcast <64 x i1> %res to i64
+  ret i64 %res1
+}
+
 define <16 x i32> @test13(<16 x float>%a, <16 x float>%b)
 ; KNL-LABEL: test13:
 ; KNL:       ## BB#0:
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 71bf63ed44d0..5f3d16d4efbb 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -1,15 +1,51 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding| FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
 
 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
-; CHECK-LABEL: test_pcmpeq_b
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpeq_b:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_pcmpeq_b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp0:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
   ret i64 %res
 }
 
 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_b
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpeq_b:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpeq_b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp1:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
   ret i64 %res
 }
@@ -17,15 +53,35 @@ define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
 
 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
-; CHECK-LABEL: test_pcmpeq_w
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpeq_w:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_pcmpeq_w:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
   ret i32 %res
 }
 
 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-; CHECK-LABEL: test_mask_pcmpeq_w
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpeq_w:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpeq_w:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
   ret i32 %res
 }
@@ -33,15 +89,49 @@ define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
 
 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
-; CHECK-LABEL: test_pcmpgt_b
-; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpgt_b:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_pcmpgt_b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp2:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
   ret i64 %res
 }
 
 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_b
-; CHECK: vpcmpgtb %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpgt_b:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpgt_b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp3:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
   %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
   ret i64 %res
 }
@@ -49,357 +139,839 @@ define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
 
 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
-; CHECK-LABEL: test_pcmpgt_w
-; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 ##
+; AVX512BW-LABEL: test_pcmpgt_w:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_pcmpgt_w:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
   ret i32 %res
 }
 
 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-; CHECK-LABEL: test_mask_pcmpgt_w
-; CHECK: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ##
+; AVX512BW-LABEL: test_mask_pcmpgt_w:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_pcmpgt_w:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
   ret i32 %res
 }
 
 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
 
-define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
-; CHECK_LABEL: test_cmp_b_512
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
+define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; AVX512BW-LABEL: test_cmp_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rdx
+; AVX512BW-NEXT:    addq %rcx, %rdx
+; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rdx, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_cmp_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $68, %esp
+; AVX512F-32-NEXT:  .Ltmp4:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    addl (%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $68, %esp
+; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
-  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
   %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
-  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
+  %ret1 = add i64 %res0, %res1
   %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
-  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
+  %ret2 = add i64 %ret1, %res2
   %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
-  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
+  %ret3 = add i64 %ret2, %res3
   %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
-  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
+  %ret4 = add i64 %ret3, %res4
   %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
-  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
+  %ret5 = add i64 %ret4, %res5
   %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
-  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
+  %ret6 = add i64 %ret5, %res6
   %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
-  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
-  ret <8 x i64> %vec7
+  %ret7 = add i64 %ret6, %res7
+  ret i64 %ret7
 }
 
-define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
-; CHECK_LABEL: test_mask_cmp_b_512
-; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
+define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; AVX512BW-LABEL: test_mask_cmp_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rdx
+; AVX512BW-NEXT:    addq %rcx, %rdx
+; AVX512BW-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rdx, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_cmp_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $68, %esp
+; AVX512F-32-NEXT:  .Ltmp5:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpunordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnleb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpordb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $68, %esp
+; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
-  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
-  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret1 = add i64 %res0, %res1
   %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
-  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret2 = add i64 %ret1, %res2
   %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
-  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret3 = add i64 %ret2, %res3
   %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
-  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret4 = add i64 %ret3, %res4
   %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
-  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret5 = add i64 %ret4, %res5
   %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
-  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret6 = add i64 %ret5, %res6
   %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
-  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
-  ret <8 x i64> %vec7
+  %ret7 = add i64 %ret6, %res7
+  ret i64 %ret7
 }
 
 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
 
-define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
-; CHECK_LABEL: test_ucmp_b_512
-; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
+define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
+; AVX512BW-LABEL: test_ucmp_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rdx
+; AVX512BW-NEXT:    addq %rcx, %rdx
+; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rdx, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_ucmp_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $68, %esp
+; AVX512F-32-NEXT:  .Ltmp6:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    addl (%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $68, %esp
+; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
-  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
   %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
-  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
+  %ret1 = add i64 %res0, %res1
   %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
-  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
+  %ret2 = add i64 %ret1, %res2
   %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
-  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
+  %ret3 = add i64 %ret2, %res3
   %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
-  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
+  %ret4 = add i64 %ret3, %res4
   %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
-  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
+  %ret5 = add i64 %ret4, %res5
   %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
-  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
+  %ret6 = add i64 %ret5, %res6
   %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
-  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
-  ret <8 x i64> %vec7
+  %ret7 = add i64 %ret6, %res7
+  ret i64 %ret7
 }
 
-define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
-; CHECK_LABEL: test_mask_ucmp_b_512
-; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
+define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
+; AVX512BW-LABEL: test_mask_x86_avx512_ucmp_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rcx, %rax
+; AVX512BW-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rcx
+; AVX512BW-NEXT:    addq %rax, %rcx
+; AVX512BW-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rdx
+; AVX512BW-NEXT:    addq %rcx, %rdx
+; AVX512BW-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    addq %rdx, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $68, %esp
+; AVX512F-32-NEXT:  .Ltmp7:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpcmpequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpunordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnequb %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    vpcmpordub %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $68, %esp
+; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
-  %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
-; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
-  %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
-; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
+  %ret1 = add i64 %res0, %res1
   %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
-  %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
-; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
+  %ret2 = add i64 %ret1, %res2
   %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
-  %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
-; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
+  %ret3 = add i64 %ret2, %res3
   %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
-  %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
-; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
+  %ret4 = add i64 %ret3, %res4
   %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
-  %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
-; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
+  %ret5 = add i64 %ret4, %res5
   %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
-  %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
-; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
+  %ret6 = add i64 %ret5, %res6
   %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
-  %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
-  ret <8 x i64> %vec7
+  %ret7 = add i64 %ret6, %res7
+  ret i64 %ret7
 }
 
 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
 
-define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK_LABEL: test_cmp_w_512
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
+define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; AVX512BW-LABEL: test_cmp_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %edx
+; AVX512BW-NEXT:    addl %ecx, %edx
+; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %edx, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_cmp_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %edx
+; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
-  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
   %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
-  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
+  %ret1 = add i32 %res0, %res1
   %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
-  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
+  %ret2 = add i32 %ret1, %res2
   %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
-  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
+  %ret3 = add i32 %ret2, %res3
   %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
-  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
+  %ret4 = add i32 %ret3, %res4
   %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
-  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
+  %ret5 = add i32 %ret4, %res5
   %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
-  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
+  %ret6 = add i32 %ret5, %res6
   %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
-  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
-  ret <8 x i32> %vec7
+  %ret7 = add i32 %ret6, %res7
+  ret i32 %ret7
 }
 
-define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
-; CHECK_LABEL: test_mask_cmp_w_512
-; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
+define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; AVX512BW-LABEL: test_mask_cmp_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %edx
+; AVX512BW-NEXT:    addl %ecx, %edx
+; AVX512BW-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %edx, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_cmp_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    vpcmpltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpunordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnlew %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %edx
+; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmpordw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
-  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
-  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
+  %ret1 = add i32 %res0, %res1
   %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
-  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret2 = add i32 %ret1, %res2
   %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
-  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret3 = add i32 %ret2, %res3
   %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
-  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret4 = add i32 %ret3, %res4
   %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
-  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
+  %ret5 = add i32 %ret4, %res5
   %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
-  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret6 = add i32 %ret5, %res6
   %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
-  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
-  ret <8 x i32> %vec7
+  %ret7 = add i32 %ret6, %res7
+  ret i32 %ret7
 }
 
 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
 
-define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK_LABEL: test_ucmp_w_512
-; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
+define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
+; AVX512BW-LABEL: test_ucmp_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %edx
+; AVX512BW-NEXT:    addl %ecx, %edx
+; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %edx, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_ucmp_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %edx
+; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
-  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
-  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
+  %ret1 = add i32 %res0, %res1
   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
-  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
+  %ret2 = add i32 %ret1, %res2
   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
-  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
+  %ret3 = add i32 %ret2, %res3
   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
-  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
+  %ret4 = add i32 %ret3, %res4
   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
-  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
+  %ret5 = add i32 %ret4, %res5
   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
-  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
+  %ret6 = add i32 %ret5, %res6
   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
-  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
-  ret <8 x i32> %vec7
+  %ret7 = add i32 %ret6, %res7
+  ret i32 %ret7
 }
 
-define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
-; CHECK_LABEL: test_mask_ucmp_w_512
-; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
+define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
+; AVX512BW-LABEL: test_mask_ucmp_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %ecx, %eax
+; AVX512BW-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %ecx
+; AVX512BW-NEXT:    addl %eax, %ecx
+; AVX512BW-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %edx
+; AVX512BW-NEXT:    addl %ecx, %edx
+; AVX512BW-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    addl %edx, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_ucmp_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpcmpequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpunorduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnequw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %ecx, %eax
+; AVX512F-32-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %ecx
+; AVX512F-32-NEXT:    addl %eax, %ecx
+; AVX512F-32-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %edx
+; AVX512F-32-NEXT:    addl %ecx, %edx
+; AVX512F-32-NEXT:    vpcmporduw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    addl %edx, %eax
+; AVX512F-32-NEXT:    retl
   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
-  %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
-; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
-  %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
-; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret1 = add i32 %res0, %res1
   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
-  %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
-; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret2 = add i32 %ret1, %res2
   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
-  %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
-; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret3 = add i32 %ret2, %res3
   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
-  %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
-; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret4 = add i32 %ret3, %res4
   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
-  %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
-; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret5 = add i32 %ret4, %res5
   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
-  %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
-; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
+  %ret6 = add i32 %ret5, %res6
   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
-  %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
-  ret <8 x i32> %vec7
+  %ret7 = add i32 %ret6, %res7
+  ret i32 %ret7
 }
 
 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
 
-; CHECK-LABEL: test_x86_mask_blend_b_256
-; CHECK: vpblendmb
-define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
-  %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
-  ret <32 x i8> %res
-}
-declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_w_256
-define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
-  ; CHECK: vpblendmw
-  %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
-  ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_b_512
-; CHECK: vpblendmb
-define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
-  %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
-  ret <64 x i8> %res
-}
 declare <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8>, <64 x i8>, i64) nounwind readonly
 
-; CHECK-LABEL: test_x86_mask_blend_w_512
 define <32 x i16> @test_x86_mask_blend_w_512(i32 %mask, <32 x i16> %a1, <32 x i16> %a2) {
-  ; CHECK: vpblendmw
-  %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
+; AVX512BW-LABEL: test_x86_mask_blend_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_x86_mask_blend_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpblendmw %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512F-32-NEXT:    retl
+    %res = call <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16> %a1, <32 x i16> %a2, i32 %mask) ; <<32 x i16>> [#uses=1]
   ret <32 x i16> %res
 }
 declare <32 x i16> @llvm.x86.avx512.mask.blend.w.512(<32 x i16>, <32 x i16>, i32) nounwind readonly
 
-; CHECK-LABEL: test_x86_mask_blend_b_128
-; CHECK: vpblendmb
-define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
-  %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
-  ret <16 x i8> %res
+define <64 x i8> @test_x86_mask_blend_b_512(i64 %a0, <64 x i8> %a1, <64 x i8> %a2) {
+; AVX512BW-LABEL: test_x86_mask_blend_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_x86_mask_blend_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpblendmb %zmm1, %zmm0, %zmm0 {%k1}
+; AVX512F-32-NEXT:    retl
+  %res = call <64 x i8> @llvm.x86.avx512.mask.blend.b.512(<64 x i8> %a1, <64 x i8> %a2, i64 %a0) ; <<64 x i8>> [#uses=1]
+  ret <64 x i8> %res
 }
-declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
-
-; CHECK-LABEL: test_x86_mask_blend_w_128
-define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
-  ; CHECK: vpblendmw
-  %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
 
 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rr_512
-  ;CHECK: vpackssdw       %zmm1, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rrk_512
-  ;CHECK: vpackssdw       %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rrkz_512
-  ;CHECK: vpackssdw       %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi32_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rm_512
-  ;CHECK: vpackssdw       (%rdi), %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rmk_512
-  ;CHECK: vpackssdw       (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rmkz_512
-  ;CHECK: vpackssdw       (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rmb_512
-  ;CHECK: vpackssdw       (%rdi){1to16}, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmb_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmb_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -408,8 +980,20 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
 }
 
 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rmbk_512
-  ;CHECK: vpackssdw       (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmbk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmbk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -418,8 +1002,18 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <3
 }
 
 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi32_rmbkz_512
-  ;CHECK: vpackssdw       (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi32_rmbkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi32_rmbkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -430,45 +1024,110 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i
 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
 
 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rr_512
-  ;CHECK: vpacksswb       %zmm1, %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rrk_512
-  ;CHECK: vpacksswb       %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0xd1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rrkz_512
-  ;CHECK: vpacksswb       %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0xc1]
+; AVX512BW-LABEL: test_mask_packs_epi16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rm_512
-  ;CHECK: vpacksswb       (%rdi), %zmm0, %zmm0  ## encoding: [0x62,0xf1,0xfd,0x48,0x63,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rmk_512
-  ;CHECK: vpacksswb       (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x63,0x0f]
+; AVX512BW-LABEL: test_mask_packs_epi16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packs_epi16_rmkz_512
-  ;CHECK: vpacksswb       (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x63,0x07]
+; AVX512BW-LABEL: test_mask_packs_epi16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packs_epi16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   ret <64 x i8> %res
@@ -478,53 +1137,118 @@ declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64
 
 
 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rr_512
-  ;CHECK: vpackusdw       %zmm1, %zmm0, %zmm0  
+; AVX512BW-LABEL: test_mask_packus_epi32_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rrk_512
-  ;CHECK: vpackusdw       %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rrkz_512
-  ;CHECK: vpackusdw       %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rm_512
-  ;CHECK: vpackusdw       (%rdi), %zmm0, %zmm0  
+; AVX512BW-LABEL: test_mask_packus_epi32_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rmk_512
-  ;CHECK: vpackusdw       (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rmkz_512
-  ;CHECK: vpackusdw       (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <16 x i32>, <16 x i32>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rmb_512
-  ;CHECK: vpackusdw       (%rdi){1to16}, %zmm0, %zmm0  
+; AVX512BW-LABEL: test_mask_packus_epi32_rmb_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmb_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -533,8 +1257,20 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
 }
 
 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rmbk_512
-  ;CHECK: vpackusdw       (%rdi){1to16}, %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rmbk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmbk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -543,8 +1279,18 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <
 }
 
 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi32_rmbkz_512
-  ;CHECK: vpackusdw       (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_packus_epi32_rmbkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi32_rmbkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
@@ -555,45 +1301,110 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b,
 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
 
 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rr_512
-  ;CHECK: vpackuswb       %zmm1, %zmm0, %zmm0  
+; AVX512BW-LABEL: test_mask_packus_epi16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rrk_512
-  ;CHECK: vpackuswb       %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_packus_epi16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rrkz_512
-  ;CHECK: vpackuswb       %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_packus_epi16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rm_512
-  ;CHECK: vpackuswb       (%rdi), %zmm0, %zmm0  
+; AVX512BW-LABEL: test_mask_packus_epi16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rmk_512
-  ;CHECK: vpackuswb       (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_packus_epi16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   ret <64 x i8> %res
 }
 
 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
-  ;CHECK-LABEL: test_mask_packus_epi16_rmkz_512
-  ;CHECK: vpackuswb       (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_packus_epi16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_packus_epi16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   ret <64 x i8> %res
@@ -602,45 +1413,102 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt
 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
 
 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rr_512
-  ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0     
+; AVX512BW-LABEL: test_mask_adds_epi16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rrk_512
-  ;CHECK: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_adds_epi16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rrkz_512
-  ;CHECK: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_adds_epi16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rm_512
-  ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0    
+; AVX512BW-LABEL: test_mask_adds_epi16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rmk_512
-  ;CHECK: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_adds_epi16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epi16_rmkz_512
-  ;CHECK: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_adds_epi16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epi16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
@@ -649,45 +1517,102 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rr_512
-  ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0     
+; AVX512BW-LABEL: test_mask_subs_epi16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rrk_512
-  ;CHECK: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_subs_epi16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rrkz_512
-  ;CHECK: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_subs_epi16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rm_512
-  ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0    
+; AVX512BW-LABEL: test_mask_subs_epi16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rmk_512
-  ;CHECK: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_subs_epi16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epi16_rmkz_512
-  ;CHECK: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_subs_epi16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epi16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
@@ -696,45 +1621,102 @@ define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rr_512
-  ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0     
+; AVX512BW-LABEL: test_mask_adds_epu16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rrk_512
-  ;CHECK: vpaddusw %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_adds_epu16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rrkz_512
-  ;CHECK: vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_adds_epu16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rm_512
-  ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0    
+; AVX512BW-LABEL: test_mask_adds_epu16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rmk_512
-  ;CHECK: vpaddusw (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_adds_epu16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_adds_epu16_rmkz_512
-  ;CHECK: vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_adds_epu16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_adds_epu16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
@@ -743,45 +1725,102 @@ define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr
 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rr_512
-  ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0     
+; AVX512BW-LABEL: test_mask_subs_epu16_rr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rrk_512
-  ;CHECK: vpsubusw %zmm1, %zmm0, %zmm2 {%k1} 
+; AVX512BW-LABEL: test_mask_subs_epu16_rrk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rrk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rrkz_512
-  ;CHECK: vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_subs_epu16_rrkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rrkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rm_512
-  ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0    
+; AVX512BW-LABEL: test_mask_subs_epu16_rm_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rm_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rmk_512
-  ;CHECK: vpsubusw (%rdi), %zmm0, %zmm1 {%k1} 
+; AVX512BW-LABEL: test_mask_subs_epu16_rmk_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rmk_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   ret <32 x i16> %res
 }
 
 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
-  ;CHECK-LABEL: test_mask_subs_epu16_rmkz_512
-  ;CHECK: vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} 
+; AVX512BW-LABEL: test_mask_subs_epu16_rmkz_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_mask_subs_epu16_rmkz_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    retl
   %b = load <32 x i16>, <32 x i16>* %ptr_b
   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   ret <32 x i16> %res
@@ -791,11 +1830,24 @@ declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <3
 
 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_b_512
-; CHECK-NOT: call 
-; CHECK: vpmaxsb %zmm
-; CHECK: {%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -804,11 +1856,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_w_512
-; CHECK-NOT: call 
-; CHECK: vpmaxsw %zmm
-; CHECK: {%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -817,11 +1880,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16
 
 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_b_512
-; CHECK-NOT: call 
-; CHECK: vpmaxub %zmm
-; CHECK: {%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -830,11 +1906,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_w_512
-; CHECK-NOT: call 
-; CHECK: vpmaxuw %zmm
-; CHECK: {%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -843,11 +1930,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16
 
 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_b_512
-; CHECK-NOT: call 
-; CHECK: vpminsb %zmm
-; CHECK: {%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -856,11 +1956,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_w_512
-; CHECK-NOT: call 
-; CHECK: vpminsw %zmm
-; CHECK: {%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmins_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmins_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -869,11 +1980,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16
 
 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_b_512
-; CHECK-NOT: call 
-; CHECK: vpminub %zmm
-; CHECK: {%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -882,11 +2006,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %
 
 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_w_512
-; CHECK-NOT: call 
-; CHECK: vpminuw %zmm
-; CHECK: {%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pminu_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pminu_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -895,11 +2030,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16
 
 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2w %zmm{{.*}}{%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -908,11 +2056,24 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32
 
 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermt2w %zmm{{.*}}{%k1} {z}
 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -921,11 +2082,24 @@ define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <3
 
 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2w %zmm{{.*}}{%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512BW-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm3
+; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
+; AVX512F-32-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm3, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -934,11 +2108,24 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
 
 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_b_512
-; CHECK-NOT: call 
-; CHECK: vpavgb %zmm
-; CHECK: {%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpavgb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -947,11 +2134,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pavg_b_512(<64 x i8> %x0, <64 x i8> %x
 
 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pavg_w_512
-; CHECK-NOT: call 
-; CHECK: vpavgw %zmm
-; CHECK: {%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pavg_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pavg_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpavgw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -960,11 +2158,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pavg_w_512(<32 x i16> %x0, <32 x i16>
 
 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pshuf_b_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpshufb %zmm{{.*}}{%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpshufb %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -973,11 +2184,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %
 
 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_w_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsw{{.*}}{%k1} 
 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpabsw %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpabsw %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -986,11 +2208,24 @@ define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16>
 
 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_b_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsb{{.*}}{%k1} 
 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pabs_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpabsb %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pabs_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm1 {%k1}
+; AVX512F-32-NEXT:    vpabsb %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
   %res2 = add <64 x i8> %res, %res1
@@ -999,12 +2234,22 @@ define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: {%k1} 
-; CHECK: vpmulhuw {{.*}}encoding: [0x62
 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -1013,12 +2258,22 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i1
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulh_w_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: {%k1} 
-; CHECK: vpmulhw {{.*}}encoding: [0x62
 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
@@ -1027,14 +2282,627 @@ define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16
 
 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
 
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhr_sw_512
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: {%k1} 
-; CHECK: vpmulhrsw {{.*}}encoding: [0x62
 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   %res2 = add <32 x i16> %res, %res1
   ret <32 x i16> %res2
 }
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT:    vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpmovwb %zmm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT:    retl
+    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+    %res3 = add <32 x i8> %res0, %res1
+    %res4 = add <32 x i8> %res3, %res2
+    ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi)
+; AVX512BW-NEXT:    vpmovwb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax)
+; AVX512F-32-NEXT:    vpmovwb %zmm0, (%eax) {%k1}
+; AVX512F-32-NEXT:    retl
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+    ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT:    vpmovswb %zmm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpmovswb %zmm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT:    retl
+    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+    %res3 = add <32 x i8> %res0, %res1
+    %res4 = add <32 x i8> %res3, %res2
+    ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi)
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpmovswb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx)
+; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    vpmovswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    retl
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+    ret void
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
+; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
+; AVX512BW-NEXT:    vpmovuswb %zmm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1}
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z}
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-32-NEXT:    retl
+    %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
+    %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
+    %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
+    %res3 = add <32 x i8> %res0, %res1
+    %res4 = add <32 x i8> %res3, %res2
+    ret <32 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi)
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx)
+; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    vpmovuswb %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    retl
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
+    ret void
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovw %edi, %k1
+; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
+; AVX512BW-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm2 = zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31],zmm2[40],k1[40],zmm2[41],k1[41],zmm2[42],k1[42],zmm2[43],k1[43],zmm2[44],k1[44],zmm2[45],k1[45],zmm2[46],k1[46],zmm2[47],k1[47],zmm2[56],k1[56],zmm2[57],k1[57],zmm2[58],k1[58],zmm2[59],k1[59],zmm2[60],k1[60],zmm2[61],k1[61],zmm2[62],k1[62],zmm2[63],k1[63]
+; AVX512F-32-NEXT:    vpunpckhbw {{.*#+}} zmm0 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+  %res2 = add <64 x i8> %res, %res1
+  ret <64 x i8> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
+; AVX512BW-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[32],k1[32],zmm2[33],k1[33],zmm2[34],k1[34],zmm2[35],k1[35],zmm2[36],k1[36],zmm2[37],k1[37],zmm2[38],k1[38],zmm2[39],k1[39],zmm2[48],k1[48],zmm2[49],k1[49],zmm2[50],k1[50],zmm2[51],k1[51],zmm2[52],k1[52],zmm2[53],k1[53],zmm2[54],k1[54],zmm2[55],k1[55]
+; AVX512F-32-NEXT:    vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
+  %res2 = add <64 x i8> %res, %res1
+  ret <64 x i8> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
+; AVX512BW-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm2 = zmm2[4],k1[4],zmm2[5],k1[5],zmm2[6],k1[6],zmm2[7],k1[7],zmm2[12],k1[12],zmm2[13],k1[13],zmm2[14],k1[14],zmm2[15],k1[15],zmm2[20],k1[20],zmm2[21],k1[21],zmm2[22],k1[22],zmm2[23],k1[23],zmm2[28],k1[28],zmm2[29],k1[29],zmm2[30],k1[30],zmm2[31],k1[31]
+; AVX512F-32-NEXT:    vpunpckhwd {{.*#+}} zmm0 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
+; AVX512BW-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm2 = zmm2[0],k1[0],zmm2[1],k1[1],zmm2[2],k1[2],zmm2[3],k1[3],zmm2[8],k1[8],zmm2[9],k1[9],zmm2[10],k1[10],zmm2[11],k1[11],zmm2[16],k1[16],zmm2[17],k1[17],zmm2[18],k1[18],zmm2[19],k1[19],zmm2[24],k1[24],zmm2[25],k1[25],zmm2[26],k1[26],zmm2[27],k1[27]
+; AVX512F-32-NEXT:    vpunpcklwd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+
+declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_palignr_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k1
+; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_palignr_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
+; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-32-NEXT:    vpaddb %zmm0, %zmm2, %zmm0
+; AVX512F-32-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
+  %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
+  %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
+  %res3 = add <64 x i8> %res, %res1
+  %res4 = add <64 x i8> %res3, %res2
+  ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512F-32-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddw %zmm3, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
+  %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res3, %res2
+  ret <32 x i16> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_psll_dq_512(<8 x i64> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_dq_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpslldq $8, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpslldq $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psll_dq_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpslldq $8, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpslldq $4, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
+  %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_psrl_dq_512(<8 x i64> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrldq $8, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpsrldq $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psrl_dq_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpsrldq $8, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpsrldq $4, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
+  %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
+
+define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psadb_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_psadb_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1
+; AVX512F-32-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
+  %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
+  %res2 = add  <8 x i64> %res, %res1
+  ret  <8 x i64> %res2
+}
+
+declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
+
+define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k0
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    kunpckwd %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_wd:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckwd %k1, %k0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
+  %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
+  ret i32 %res
+}
+
+declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
+
+define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
+; AVX512BW-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k0
+; AVX512BW-NEXT:    kmovq %rsi, %k1
+; AVX512BW-NEXT:    kunpckdq %k1, %k0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_kunpck_qd:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp8:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
+  %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
+  ret i64 %res
+}
+
+declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
+
+define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtb2mask_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    subl $12, %esp
+; AVX512F-32-NEXT:  .Ltmp9:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512F-32-NEXT:    kmovq %k0, (%esp)
+; AVX512F-32-NEXT:    movl (%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT:    addl $12, %esp
+; AVX512F-32-NEXT:    retl
+    %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
+    ret i64 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
+
+define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtw2mask_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpmovw2m %zmm0, %k0
+; AVX512F-32-NEXT:    kmovd %k0, %eax
+; AVX512F-32-NEXT:    retl
+    %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
+    ret i32 %res
+}
+
+declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
+
+define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdi, %k0
+; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k0
+; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
+  ret <64 x i8> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
+
+define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
+; AVX512BW-LABEL: test_int_x86_avx512_cvtmask2w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k0
+; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_cvtmask2w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    vpmovm2w %k0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
+  ret <32 x i16> %res
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edi, %k1
+; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm3, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
+  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res3, %res2
+  ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i8, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %esi, %k1
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpaddw %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
+  %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res3, %res2
+  ret <32 x i16> %res4
+}
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index f5413896789a..1db6756c23a8 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -3763,6 +3763,38 @@ define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16>
   ret <16 x i16> %res2
 }
 
+; CHECK-LABEL: test_x86_mask_blend_b_256
+; CHECK: vpblendmb
+define <32 x i8> @test_x86_mask_blend_b_256(i32 %a0, <32 x i8> %a1, <32 x i8> %a2) {
+  %res = call <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8> %a1, <32 x i8> %a2, i32 %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.blend.b.256(<32 x i8>, <32 x i8>, i32) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_256
+define <16 x i16> @test_x86_mask_blend_w_256(i16 %mask, <16 x i16> %a1, <16 x i16> %a2) {
+  ; CHECK: vpblendmw
+  %res = call <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16> %a1, <16 x i16> %a2, i16 %mask) ; <<16 x i16>> [#uses=1]
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.blend.w.256(<16 x i16>, <16 x i16>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_b_128
+; CHECK: vpblendmb
+define <16 x i8> @test_x86_mask_blend_b_128(i16 %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  %res = call <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8> %a1, <16 x i8> %a2, i16 %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.blend.b.128(<16 x i8>, <16 x i8>, i16) nounwind readonly
+
+; CHECK-LABEL: test_x86_mask_blend_w_128
+define <8 x i16> @test_x86_mask_blend_w_128(i8 %mask, <8 x i16> %a1, <8 x i16> %a2) {
+  ; CHECK: vpblendmw
+  %res = call <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16> %a1, <8 x i16> %a2, i8 %mask) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.blend.w.128(<8 x i16>, <8 x i16>, i8) nounwind readonly
+
 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmulhu_w_128
@@ -3843,3 +3875,719 @@ define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i
   %res2 = add <16 x i16> %res, %res1
   ret <16 x i16> %res2
 }
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
+; CHECK:       vpmovwb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovwb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovwb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
+; CHECK:  vpmovwb %xmm0, (%rdi)
+; CHECK:  vpmovwb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
+; CHECK:       vpmovswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovswb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
+; CHECK:  vpmovswb %xmm0, (%rdi)
+; CHECK:  vpmovswb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
+; CHECK:       vpmovuswb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovuswb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovuswb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
+; CHECK:  vpmovuswb %xmm0, (%rdi)
+; CHECK:  vpmovuswb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
+; CHECK:       vpmovwb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovwb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovwb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
+; CHECK:  vpmovwb %ymm0, (%rdi)
+; CHECK:  vpmovwb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
+; CHECK:       vpmovswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovswb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
+; CHECK:  vpmovswb %ymm0, (%rdi)
+; CHECK:  vpmovswb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
+; CHECK:       vpmovuswb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovuswb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovuswb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
+; CHECK:  vpmovuswb %ymm0, (%rdi)
+; CHECK:  vpmovuswb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
+    call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
+; CHECK:         vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[8],k1[8],xmm2[9],k1[9],xmm2[10],k1[10],xmm2[11],k1[11],xmm2[12],k1[12],xmm2[13],k1[13],xmm2[14],k1[14],xmm2[15],k1[15]
+; CHECK-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+  %res2 = add <16 x i8> %res, %res1
+  ret <16 x i8> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
+; CHECK:         vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3],xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
+; CHECK-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+  %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
+  %res2 = add <16 x i8> %res, %res1
+  ret <16 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
+; CHECK:         vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15],ymm2[24],k1[24],ymm2[25],k1[25],ymm2[26],k1[26],ymm2[27],k1[27],ymm2[28],k1[28],ymm2[29],k1[29],ymm2[30],k1[30],ymm2[31],k1[31]
+; CHECK-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+  %res2 = add <32 x i8> %res, %res1
+  ret <32 x i8> %res2
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
+; CHECK:         vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[16],k1[16],ymm2[17],k1[17],ymm2[18],k1[18],ymm2[19],k1[19],ymm2[20],k1[20],ymm2[21],k1[21],ymm2[22],k1[22],ymm2[23],k1[23]
+; CHECK-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+  %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
+  %res2 = add <32 x i8> %res, %res1
+  ret <32 x i8> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
+; CHECK:         vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1],xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
+; CHECK:         vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[4],k1[4],xmm2[5],k1[5],xmm2[6],k1[6],xmm2[7],k1[7]
+; CHECK-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+  %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
+; CHECK:         vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[2],k1[2],ymm2[3],k1[3],ymm2[8],k1[8],ymm2[9],k1[9],ymm2[10],k1[10],ymm2[11],k1[11]
+; CHECK-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
+; CHECK:         vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[4],k1[4],ymm2[5],k1[5],ymm2[6],k1[6],ymm2[7],k1[7],ymm2[12],k1[12],ymm2[13],k1[13],ymm2[14],k1[14],ymm2[15],k1[15]
+; CHECK-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
+  %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_palignr_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
+  %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
+  %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
+  %res3 = add <16 x i8> %res, %res1
+  %res4 = add <16 x i8> %res3, %res2
+  ret <16 x i8> %res4
+}
+
+declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_palignr_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddb %ymm3, %ymm2, %ymm1
+; CHECK-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
+  %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
+  %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
+  %res3 = add <32 x i8> %res, %res1
+  %res4 = add <32 x i8> %res3, %res2
+  ret <32 x i8> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4)
+  %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1
+; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4)
+  %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res3, %res2
+  ret <16 x i16> %res4
+}
+
+declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
+
+define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
+; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
+  %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
+  %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
+  %res3 = add <32 x i8> %res, %res1
+  %res4 = add <32 x i8> %res2, %res3
+  ret <32 x i8> %res4
+}
+
+declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
+  %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
+  %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i8> %res, %res1
+  %res4 = add <16 x i8> %res2, %res3
+  ret <16 x i8> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
+  %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
+  %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res2, %res3
+  ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
+  %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
+  %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
+
+define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0]
+; CHECK-NEXT:    vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0]
+; CHECK-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
+; CHECK-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
+  %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
+  %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
+  %res3 = add <64 x i8> %res, %res1
+  %res4 = add <64 x i8> %res2, %res3
+  ret <64 x i8> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0]
+; CHECK-NEXT:    vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0]
+; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
+; CHECK-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
+  %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
+  %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res2, %res3
+  ret <32 x i16> %res4
+}
+
+declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
+
+define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovb2m %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
+    ret i16 %res
+}
+
+declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
+
+define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovb2m %ymm0, %k0
+; CHECK-NEXT:    kmovd %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
+    ret i32 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
+
+define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovw2m %xmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
+    ret i8 %res
+}
+
+declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
+
+define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovw2m %ymm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
+    ret i16 %res
+}
+
+declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
+
+define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
+  ret <16 x i8> %res
+}
+
+declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
+
+define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k0
+; CHECK-NEXT:    vpmovm2b %k0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
+  ret <32 x i8> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
+
+define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
+  ret <8 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
+
+define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    vpmovm2w %k0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
+  ret <16 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
+  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res3, %res2
+  ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i8, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1)
+  %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i8, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1)
+  %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res3, %res2
+  ret <16 x i16> %res4
+}
diff --git a/test/CodeGen/X86/avx512cd-intrinsics.ll b/test/CodeGen/X86/avx512cd-intrinsics.ll
new file mode 100644
index 000000000000..29f17bbc0190
--- /dev/null
+++ b/test/CodeGen/X86/avx512cd-intrinsics.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s
+
+define <16 x i32> @test_x86_vbroadcastmw_512(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_512
+  ; CHECK: vpbroadcastmw2d %k0, %zmm0
+  %res = call <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16 %a0) ; 
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.x86.avx512.broadcastmw.512(i16)
+
+define <8 x i64> @test_x86_broadcastmb_512(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_512
+  ; CHECK: vpbroadcastmb2q %k0, %zmm0
+  %res = call <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8 %a0) ; 
+  ret <8 x i64> %res
+}
+declare <8 x i64> @llvm.x86.avx512.broadcastmb.512(i8)
+
diff --git a/test/CodeGen/X86/avx512cdvl-intrinsics.ll b/test/CodeGen/X86/avx512cdvl-intrinsics.ll
new file mode 100644
index 000000000000..14e91e1a8768
--- /dev/null
+++ b/test/CodeGen/X86/avx512cdvl-intrinsics.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s
+
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly
+
+declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vplzcntd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  %res2 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vplzcntd %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vplzcntq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vplzcntq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vplzcntq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpconflictd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2)
+  %res2 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpconflictd %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpconflictq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpconflictq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpconflictq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+define <8 x i32> @test_x86_vbroadcastmw_256(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_256
+  ; CHECK: vpbroadcastmw2d %k0, %ymm0
+  %res = call <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16 %a0) ; 
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx512.broadcastmw.256(i16)
+
+define <4 x i32> @test_x86_vbroadcastmw_128(i16 %a0) {
+  ; CHECK: test_x86_vbroadcastmw_128
+  ; CHECK: vpbroadcastmw2d %k0, %xmm0
+  %res = call <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16 %a0) ; 
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx512.broadcastmw.128(i16)
+
+define <4 x i64> @test_x86_broadcastmb_256(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_256
+  ; CHECK: vpbroadcastmb2q %k0, %ymm0
+  %res = call <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8 %a0) ; 
+  ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx512.broadcastmb.256(i8)
+
+define <2 x i64> @test_x86_broadcastmb_128(i8 %a0) {
+  ; CHECK: test_x86_broadcastmb_128
+  ; CHECK: vpbroadcastmb2q %k0, %xmm0
+  %res = call <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8 %a0) ; 
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.avx512.broadcastmb.128(i8)
diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll
new file mode 100644
index 000000000000..a59fe393f556
--- /dev/null
+++ b/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -0,0 +1,667 @@
+
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2qq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2qq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2uqq {ru-sae}, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2uqq {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2qq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtps2qq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2uqq {ru-sae}, %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtps2uqq {rn-sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 2)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvtps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 0)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_qq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2pd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtqq2pd {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_qq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtqq2ps {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2qq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2qq {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_512(<8 x double> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2uqq %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2uqq {sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2qq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttps2qq {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32)
+
+define <8 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_512(<8 x float> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2uqq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvttps2uqq {sae}, %ymm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x0, <8 x i64> %x1, i8 -1, i32 8)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_512(<8 x i64> %x0, <8 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2pd %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2pd {rn-sae}, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtuqq2pd.512(<8 x i64> %x0, <8 x double> %x1, i8 -1, i32 0)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_512(<8 x i64> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ps %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2ps {rn-sae}, %zmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 %x2, i32 4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtuqq2ps.512(<8 x i64> %x0, <8 x float> %x1, i8 -1, i32 0)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducepd {{.*}}{%k1}
+; CHECK: vreducepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_reduce_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 8, <8 x double> %x2, i8 %x3, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.reduce.pd.512(<8 x double> %x0, i32 4, <8 x double> %x2, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreduceps
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vreduceps
+define <16 x float>@test_int_x86_avx512_mask_reduce_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 44, <16 x float> %x2, i16 %x3, i32 8)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.reduce.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 4)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangepd
+; CKECK: {%k1}
+; CHECK: vrangepd
+; CHECK: {sae}
+define <8 x double>@test_int_x86_avx512_mask_range_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
+  %res = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 8, <8 x double> %x3, i8 %x4, i32 4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.range.pd.512(<8 x double> %x0, <8 x double> %x1, i32 4, <8 x double> %x3, i8 -1, i32 8)
+  %res2 = fadd <8 x double> %res, %res1
+  ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangeps
+; CKECK: {%k1}
+; CHECK: vrangeps
+; CHECK: {sae}
+define <16 x float>@test_int_x86_avx512_mask_range_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
+  %res = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 88, <16 x float> %x3, i16 %x4, i32 4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.range.ps.512(<16 x float> %x0, <16 x float> %x1, i32 4, <16 x float> %x3, i16 -1, i32 8)
+  %res2 = fadd <16 x float> %res, %res1
+  ret <16 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ss
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducess
+; CKECK: {%k1}
+; CHECK: vreducess
+; CHECK: {sae}
+define <4 x float>@test_int_x86_avx512_mask_reduce_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ss
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangess
+; CHECK: {sae}
+; CKECK: {%k1}
+; CHECK: vrangess
+; CHECK: {sae}
+define <4 x float>@test_int_x86_avx512_mask_range_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4, i32 8)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vreducesd
+; CKECK: {%k1}
+; CHECK: vreducesd
+; CHECK: {sae}
+define <2 x double>@test_int_x86_avx512_mask_reduce_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32, i32)
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrangesd
+; CKECK: {%k1}
+; CHECK: vrangesd
+; CHECK: {sae}
+define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4, i32 4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 4, i32 8)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res2, %res3
+  ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res  = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res2, %res3
+  ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+  %res3 = fadd <8 x double> %res, %res1
+  %res4 = fadd <8 x double> %res3, %res2
+  ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res3, %res2
+  ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm3, %zmm2, %zmm1
+; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+  %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasspd
+; CHECK: {%k1}
+; CHECK: vfpclasspd
+; CHECK: kmovb   %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_512(<8 x double> %x0, i8 %x1) {
+    %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 2, i8 %x1)
+    %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.512(<8 x double> %x0, i32 4, i8 -1)
+    %res2 = add i8 %res, %res1
+    ret i8 %res2
+}
+declare i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float>, i32, i16)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_512
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclassps
+; CHECK: vfpclassps
+; CHECK: {%k1}
+; CHECK: kmov
+define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) {
+    %res = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 %x1)
+    %res1 = call i16 @llvm.x86.avx512.mask.fpclass.ps.512(<16 x float> %x0, i32 4, i16 -1)
+    %res2 = add i16 %res, %res1
+    ret i16 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_sd
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vfpclasssd
+; CHECK: %k0 {%k1}
+; CHECK: vfpclasssd
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_sd(<2 x double> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 2, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.sd(<2 x double> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ss
+; CHECK-NOT: call
+; CHECK: kmovw
+; CHECK: vfpclassss
+; CHECK: %k0
+; CHECK: {%k1}
+; CHECK: kmovw
+; CHECK: vfpclassss
+; CHECK: %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ss(<4 x float> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ss(<4 x float> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x2_512(<4 x float> %x0, <16 x float> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %zmm0
+; CHECK-NEXT:    vaddps %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float>  %x0, <16 x float> %x2, i16 %x3)
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %x3)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x2.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
+  %res3 = fadd <16 x float> %res, %res1
+  %res4 = fadd <16 x float> %res3, %res2
+  ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x2_512(<4 x i32> %x0, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT:    vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32>  %x0, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %x3)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x2.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res3, %res2
+  ret <16 x i32> %res4
+}
+
+declare i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32>)
+
+define i16@test_int_x86_avx512_cvtd2mask_512(<16 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovd2m %zmm0, %k0
+; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    retq
+  %res = call i16 @llvm.x86.avx512.cvtd2mask.512(<16 x i32> %x0)
+  ret i16 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_512(<8 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovq2m %zmm0, %k0
+; CHECK-NEXT:    kmovb %k0, %eax
+; CHECK-NEXT:    retq
+  %res = call i8 @llvm.x86.avx512.cvtq2mask.512(<8 x i64> %x0)
+  ret i8 %res
+}
+
+declare <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16)
+
+define <16 x i32>@test_int_x86_avx512_cvtmask2d_512(i16 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.cvtmask2d.512(i16 %x0)
+  ret <16 x i32> %res
+}
+
+declare <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8)
+
+define <8 x i64>@test_int_x86_avx512_cvtmask2q_512(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.cvtmask2q.512(i8 %x0)
+  ret <8 x i64> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_broadcastf32x8_512(<8 x float> %x0, <16 x float> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff32x4 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vaddps %zmm1, %zmm0, %zmm0
+; CHECK: vaddps %zmm0, %zmm2, %zmm0
+
+  %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 -1)
+  %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> %x2, i16 %mask)
+  %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x8.512(<8 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
+  %res4 = fadd <16 x float> %res1, %res2
+  %res5 = fadd <16 x float> %res3, %res4
+  ret <16 x float> %res5
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double>, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_broadcastf64x2_512(<2 x double> %x0, <8 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_512:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshuff64x2 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vaddpd %zmm1, %zmm0, %zmm0
+; CHECK: vaddpd %zmm0, %zmm2, %zmm0
+
+  %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 -1)
+  %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> %x2, i8 %mask)
+  %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x2.512(<2 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
+  %res4 = fadd <8 x double> %res1, %res2
+  %res5 = fadd <8 x double> %res3, %res4
+  ret <8 x double> %res5
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x8_512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
+; CHECK: kmovw %edi, %k1
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi32x4 $68, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddd %zmm1, %zmm0, %zmm0
+; CHECK: vpaddd %zmm0, %zmm2, %zmm0
+
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> %x2, i16 %mask)
+  %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x8.512(<8 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
+  %res4 = add <16 x i32> %res1, %res2
+  %res5 = add <16 x i32> %res3, %res4
+  ret <16 x i32> %res5
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x2_512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_512:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm2 {%k1} {z}
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm1 {%k1}
+; CHECK: vshufi64x2 $0, %zmm0, %zmm0, %zmm0
+; CHECK: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK: vpaddq %zmm0, %zmm2, %zmm0
+
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> %x2, i8 %mask)
+  %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x2.512(<2 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
+  %res4 = add <8 x i64> %res1, %res2
+  %res5 = add <8 x i64> %res3, %res4
+  ret <8 x i64> %res5
+}
diff --git a/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
index c577abee6640..2065322009da 100644
--- a/test/CodeGen/X86/avx512dqvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -1134,7 +1134,7 @@ define <16 x float> @test_mask_xor_ps_rmb_512(<16 x float> %a, float* %ptr_b) {
 
 define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <16 x float> %passThru, i16 %mask) {
   ;CHECK-LABEL: test_mask_xor_ps_rmbk_512
-  ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x57,0x0f]
+  ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm1 {%k1} 
   %q = load float, float* %ptr_b
   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
@@ -1144,7 +1144,7 @@ define <16 x float> @test_mask_xor_ps_rmbk_512(<16 x float> %a, float* %ptr_b, <
 
 define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b, i16 %mask) {
   ;CHECK-LABEL: test_mask_xor_ps_rmbkz_512
-  ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x57,0x07]
+  ;CHECK: vxorps (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} 
   %q = load float, float* %ptr_b
   %vecinit.i = insertelement <16 x float> undef, float %q, i32 0
   %b = shufflevector <16 x float> %vecinit.i, <16 x float> undef, <16 x i32> zeroinitializer
@@ -1152,4 +1152,816 @@ define <16 x float> @test_mask_xor_ps_rmbkz_512(<16 x float> %a, float* %ptr_b,
   ret <16 x float> %res
 }
 
-declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
\ No newline at end of file
+declare <16 x float> @llvm.x86.avx512.mask.xor.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2qq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2qq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtpd2qq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2uqq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2uqq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtpd2uqq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2qq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2qq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2qq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2uqq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvtps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2uqq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2uqq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvtps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_qq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtqq2pd %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_qq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtqq2pd %ymm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtqq2ps %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_qq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_qq2ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtqq2ps %ymm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2qq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2qq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2qq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttpd2qq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_128(<2 x double> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2uqq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_pd2uqq_256(<4 x double> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2uqq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttpd2uqq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttps2qq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2qq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2qq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2qq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttps2qq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_128(<2 x i64> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2pd %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtuqq2pd.128(<2 x i64> %x0, <2 x double> %x1, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_uqq2pd_256(<4 x i64> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2pd %ymm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtuqq2pd.256(<4 x i64> %x0, <4 x double> %x1, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_128(<2 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2ps %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.128(<2 x i64> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_uqq2ps_256(<4 x i64> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_uqq2ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtuqq2ps %ymm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtuqq2ps.256(<4 x i64> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_128(<4 x float> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttps2uqq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 %x2)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.cvttps2uqq.128(<4 x float> %x0, <2 x i64> %x1, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_cvtt_ps2uqq_256(<4 x float> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2uqq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttps2uqq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 %x2)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x0, <4 x i64> %x1, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducepd {{.*}}{%k1} 
+; CHECK: vreducepd
+define <2 x double>@test_int_x86_avx512_mask_reduce_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.reduce.pd.128(<2 x double> %x0, i32 8, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreducepd {{.*}}{%k1} 
+; CHECK: vreducepd
+define <4 x double>@test_int_x86_avx512_mask_reduce_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.reduce.pd.256(<4 x double> %x0, i32 0, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreduceps {{.*}}{%k1} 
+; CHECK: vreduceps
+define <4 x float>@test_int_x86_avx512_mask_reduce_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.reduce.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_reduce_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vreduceps {{.*}}{%k1} 
+; CHECK: vreduceps
+define <8 x float>@test_int_x86_avx512_mask_reduce_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.reduce.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangepd {{.*}}{%k1} 
+; CHECK: vrangepd
+define <2 x double>@test_int_x86_avx512_mask_range_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 4, <2 x double> %x3, i8 %x4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.range.pd.128(<2 x double> %x0, <2 x double> %x1, i32 8, <2 x double> %x3, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangepd {{.*}}{%k1} 
+; CHECK: vrangepd
+define <4 x double>@test_int_x86_avx512_mask_range_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 4, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.range.pd.256(<4 x double> %x0, <4 x double> %x1, i32 88, <4 x double> %x3, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangeps {{.*}}{%k1} 
+; CHECK: vrangeps
+define <4 x float>@test_int_x86_avx512_mask_range_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 4, <4 x float> %x3, i8 %x4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.range.ps.128(<4 x float> %x0, <4 x float> %x1, i32 88, <4 x float> %x3, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_range_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vrangeps {{.*}}{%k1} 
+; CHECK: vrangeps
+define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 4, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.range.ps.256(<8 x float> %x0, <8 x float> %x1, i32 88, <8 x float> %x3, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res3, %res2
+  ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vpaddq %ymm3, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
+  %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
+  %res3 = add <4 x i64> %res, %res1
+  %res4 = add <4 x i64> %res3, %res2
+  ret <4 x i64> %res4
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vfpclassps
+; CHECK: {%k1} 
+; CHECK: vfpclassps
+; CHECK: kmovb   %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ps_128(<4 x float> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 2, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.128(<4 x float> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_ps_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vfpclassps
+; CHECK: {%k1} 
+; CHECK: vfpclassps
+; CHECK: kmovb   %k0
+define i8 @test_int_x86_avx512_mask_fpclass_ps_256(<8 x float> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 2, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.ps.256(<8 x float> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_128
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vfpclasspd
+; CHECK: {%k1} 
+; CHECK: vfpclasspd
+; CHECK: kmovb   %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_128(<2 x double> %x0, i8 %x1) {
+  %res =  call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 4, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.128(<2 x double> %x0, i32 2, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double>, i32, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_fpclass_pd_256
+; CHECK-NOT: call 
+; CHECK: kmov 
+; CHECK: vfpclasspd
+; CHECK: {%k1} 
+; CHECK: vfpclasspd
+; CHECK: kmovb   %k0
+define i8 @test_int_x86_avx512_mask_fpclass_pd_256(<4 x double> %x0, i8 %x1) {
+  %res = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 2, i8 %x1)
+  %res1 = call i8 @llvm.x86.avx512.mask.fpclass.pd.256(<4 x double> %x0, i32 4, i8 -1)
+  %res2 = add i8 %res, %res1
+  ret i8 %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcastf32x2 %xmm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float>  %x0, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %x3)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x2.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res3, %res2
+  ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>  %x0, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3)
+  %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res3, %res2
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_broadcasti32x2_128(<4 x i32> %x0, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k1
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vbroadcasti32x2 %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32>  %x0, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x3)
+  %res2 = call <4 x i32> @llvm.x86.avx512.mask.broadcasti32x2.128(<4 x i32> %x0, <4 x i32> %x2, i8 -1)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+
+declare i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_128(<4 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovd2m %xmm0, %k0
+; CHECK-NEXT:    kmovb %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i8 @llvm.x86.avx512.cvtd2mask.128(<4 x i32> %x0)
+    ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32>)
+
+define i8@test_int_x86_avx512_cvtd2mask_256(<8 x i32> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtd2mask_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovd2m %ymm0, %k0
+; CHECK-NEXT:    kmovb %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i8 @llvm.x86.avx512.cvtd2mask.256(<8 x i32> %x0)
+    ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_128(<2 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovq2m %xmm0, %k0
+; CHECK-NEXT:    kmovb %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i8 @llvm.x86.avx512.cvtq2mask.128(<2 x i64> %x0)
+    ret i8 %res
+}
+
+declare i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64>)
+
+define i8@test_int_x86_avx512_cvtq2mask_256(<4 x i64> %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtq2mask_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    vpmovq2m %ymm0, %k0
+; CHECK-NEXT:    kmovb %k0, %eax
+; CHECK-NEXT:    retq
+    %res = call i8 @llvm.x86.avx512.cvtq2mask.256(<4 x i64> %x0)
+    ret i8 %res
+}
+
+declare <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8)
+
+define <4 x i32>@test_int_x86_avx512_cvtmask2d_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.cvtmask2d.128(i8 %x0)
+  ret <4 x i32> %res
+}
+
+declare <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8)
+
+define <8 x i32>@test_int_x86_avx512_cvtmask2d_256(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k0
+; CHECK-NEXT:    vpmovm2d %k0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.cvtmask2d.256(i8 %x0)
+  ret <8 x i32> %res
+}
+
+declare <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8)
+
+define <2 x i64>@test_int_x86_avx512_cvtmask2q_128(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.cvtmask2q.128(i8 %x0)
+  ret <2 x i64> %res
+}
+
+declare <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8)
+
+define <4 x i64>@test_int_x86_avx512_cvtmask2q_256(i8 %x0) {
+; CHECK-LABEL: test_int_x86_avx512_cvtmask2q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovb %edi, %k0
+; CHECK-NEXT:    vpmovm2q %k0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.cvtmask2q.256(i8 %x0)
+  ret <4 x i64> %res
+}
+declare <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_broadcastf64x2_256(<2 x double> %x0, <4 x double> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf64x2_256:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshuff64x2 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vaddpd %ymm1, %ymm0, %ymm0
+; CHECK: vaddpd %ymm0, %ymm2, %ymm0
+
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 -1)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> %x2, i8 %mask)
+  %res3 = call <4 x double> @llvm.x86.avx512.mask.broadcastf64x2.256(<2 x double> %x0, <4 x double> zeroinitializer, i8 %mask)
+  %res4 = fadd <4 x double> %res1, %res2
+  %res5 = fadd <4 x double> %res3, %res4
+  ret <4 x double> %res5
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_broadcasti64x2_256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti64x2_256:
+; CHECK: kmovb %edi, %k1
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshufi64x2 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vpaddq %ymm1, %ymm0, %ymm0
+; CHECK: vpaddq %ymm0, %ymm2, %ymm0
+
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 -1)
+  %res2 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> %x2, i8 %mask)
+  %res3 = call <4 x i64> @llvm.x86.avx512.mask.broadcasti64x2.256(<2 x i64> %x0, <4 x i64> zeroinitializer, i8 %mask)
+  %res4 = add <4 x i64> %res1, %res2
+  %res5 = add <4 x i64> %res3, %res4
+  ret <4 x i64> %res5
+}
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index fb7c93dc53b3..d9e8728c5ca6 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -1867,7 +1867,7 @@ define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i
 
 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   ;CHECK-LABEL: test_mask_xor_epi32_rmbkz_128
-  ;CHECK: vpxord  (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]  
+  ;CHECK: vpxord  (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
   %q = load i32, i32* %ptr_b
   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
@@ -2299,7 +2299,7 @@ define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_add_ps_256
-  ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vaddps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2321,7 +2321,7 @@ define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_add_ps_128
-  ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vaddps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2343,7 +2343,7 @@ define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_sub_ps_256
-  ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vsubps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2365,7 +2365,7 @@ define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_sub_ps_128
-  ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vsubps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2387,7 +2387,7 @@ define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_mul_ps_256
-  ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vmulps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2409,7 +2409,7 @@ define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_mul_ps_128
-  ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vmulps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2431,7 +2431,7 @@ define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_div_ps_256
-  ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vdivps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2453,7 +2453,7 @@ define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_div_ps_128
-  ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vdivps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2475,7 +2475,7 @@ define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_max_ps_256
-  ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vmaxps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2497,7 +2497,7 @@ define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_max_ps_128
-  ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vmaxps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2519,7 +2519,7 @@ define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1
 
 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_min_ps_256
-  ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1} 
+  ;CHECK: vminps %ymm1, %ymm0, %ymm2 {%k1}
   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   ret <8 x float> %res
 }
@@ -2541,7 +2541,7 @@ define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1
 
 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   ;CHECK-LABEL: test_mm512_mask_min_ps_128
-  ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1} 
+  ;CHECK: vminps %xmm1, %xmm0, %xmm2 {%k1}
   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   ret <4 x float> %res
 }
@@ -2591,9 +2591,9 @@ declare <8 x float> @llvm.x86.avx512.mask.getexp.ps.256(<8 x float>, <8 x float>
 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxsd %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2604,9 +2604,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %
 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxsd %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2617,9 +2617,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %
 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxsq %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2630,9 +2630,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %
 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxsq %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2643,9 +2643,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %
 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxud %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2656,9 +2656,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %
 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxud %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2669,9 +2669,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %
 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxuq %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2682,9 +2682,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %
 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpmaxuq %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2695,9 +2695,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %
 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminsd %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2708,9 +2708,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %
 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminsd %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2721,9 +2721,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %
 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminsq %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2734,9 +2734,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %
 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminsq %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2747,9 +2747,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %
 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminud %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
@@ -2760,9 +2760,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %
 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminud %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
@@ -2773,9 +2773,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %
 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_128
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminuq %xmm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
@@ -2786,9 +2786,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %
 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_256
-; CHECK-NOT: call 
+; CHECK-NOT: call
 ; CHECK: vpminuq %ymm
-; CHECK: {%k1} 
+; CHECK: {%k1}
 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
@@ -2799,8 +2799,8 @@ define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %
 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_128
-; CHECK-NOT: call 
-; CHECK: kmov 
+; CHECK-NOT: call
+; CHECK: kmov
 ; CHECK: vpermt2d %xmm{{.*}}{%k1}
 ; CHECK-NOT: {z}
 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
@@ -2813,8 +2813,8 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i
 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_128
-; CHECK-NOT: call 
-; CHECK: kmov 
+; CHECK-NOT: call
+; CHECK: kmov
 ; CHECK: vpermt2d %xmm{{.*}}{%k1} {z}
 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
@@ -2826,8 +2826,8 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x
 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_256
-; CHECK-NOT: call 
-; CHECK: kmov 
+; CHECK-NOT: call
+; CHECK: kmov
 ; CHECK: vpermt2d %ymm{{.*}}{%k1}
 ; CHECK-NOT: {z}
 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
@@ -2840,8 +2840,8 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i
 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_256
-; CHECK-NOT: call 
-; CHECK: kmov 
+; CHECK-NOT: call
+; CHECK: kmov
 ; CHECK: vpermt2d {{.*}}{%k1} {z}
 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
@@ -2853,9 +2853,9 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x
 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2pd %xmm{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %xmm{{.*}}{%k1}
 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
   %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
@@ -2866,9 +2866,9 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0,
 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2pd %ymm{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2pd %ymm{{.*}}{%k1}
 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
   %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
@@ -2879,9 +2879,9 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0,
 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2ps %xmm{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %xmm{{.*}}{%k1}
 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
   %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
@@ -2892,9 +2892,9 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <
 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpermi2ps %ymm{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpermi2ps %ymm{{.*}}{%k1}
 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
   %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
@@ -2905,9 +2905,9 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsq{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsq{{.*}}{%k1}
 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
   %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
@@ -2918,9 +2918,9 @@ define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x
 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsq{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsq{{.*}}{%k1}
 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
   %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
@@ -2931,9 +2931,9 @@ define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x
 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsd{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsd{{.*}}{%k1}
 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
   %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
@@ -2944,9 +2944,9 @@ define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x
 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vpabsd{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vpabsd{{.*}}{%k1}
 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
   %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
@@ -2958,9 +2958,9 @@ define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x
 declare <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefpd{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefpd{{.*}}{%k1}
 define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
@@ -2971,9 +2971,9 @@ define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2
 declare <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
 
 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefpd{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefpd{{.*}}{%k1}
 define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   %res = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   %res1 = call <4 x double> @llvm.x86.avx512.mask.scalef.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
@@ -2983,9 +2983,9 @@ define <4 x double>@test_int_x86_avx512_mask_scalef_pd_256(<4 x double> %x0, <4
 
 declare <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_128
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefps{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefps{{.*}}{%k1}
 define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
@@ -2995,12 +2995,2809 @@ define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x
 
 declare <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
 ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_256
-; CHECK-NOT: call 
-; CHECK: kmov 
-; CHECK: vscalefps{{.*}}{%k1} 
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vscalefps{{.*}}{%k1}
 define <8 x float>@test_int_x86_avx512_mask_scalef_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   %res = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   %res1 = call <8 x float> @llvm.x86.avx512.mask.scalef.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   %res2 = fadd <8 x float> %res, %res1
   ret <8 x float> %res2
-}
\ No newline at end of file
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
+; CHECK:         vunpckhpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[1],k1[1]
+; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[1],xmm1[1]
+  %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
+; CHECK:         vunpckhpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
+; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+  %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
+; CHECK:         vunpckhps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+  %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
+; CHECK:       ## BB#0:
+; CHECK:         vunpckhps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
+; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+  %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
+; CHECK:         vunpcklpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0]
+; CHECK-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
+  %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
+; CHECK:         vunpcklpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
+; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+  %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
+; CHECK:         vunpcklps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
+; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+  %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
+; CHECK:         vunpcklps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
+; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+  %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
+; CHECK:         vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[2],k1[2],xmm2[3],k1[3]
+; CHECK-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
+; CHECK:         vpunpckldq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0],xmm2[1],k1[1]
+; CHECK-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+  %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
+; CHECK:       ## BB#0:
+; CHECK:         vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[2],k1[2],ymm2[3],k1[3],ymm2[6],k1[6],ymm2[7],k1[7]
+; CHECK-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
+; CHECK:         vpunpckldq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[1],k1[1],ymm2[4],k1[4],ymm2[5],k1[5]
+; CHECK-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+  %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
+; CHECK:         vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[1],k1[1]
+; CHECK-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[1],xmm1[1]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
+; CHECK:         vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[0]
+; CHECK-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1]
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
+  %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
+; CHECK:         vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[0],ymm2[2],k1[2]
+; CHECK-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
+; CHECK:         vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[1],k1[1],ymm2[3],k1[3]
+; CHECK-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1]
+; CHECK-NEXT:    ## ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+  %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_128:
+; CHECK:       vpmovqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_128:
+; CHECK:  vpmovqb %xmm0, (%rdi)
+; CHECK:  vpmovqb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_128:
+; CHECK:       vpmovsqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_128:
+; CHECK:  vpmovsqb %xmm0, (%rdi)
+; CHECK:  vpmovsqb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_128:
+; CHECK:       vpmovusqb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.128(<2 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_128:
+; CHECK:  vpmovusqb %xmm0, (%rdi)
+; CHECK:  vpmovusqb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_256:
+; CHECK:       vpmovqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qb_mem_256:
+; CHECK:  vpmovqb %ymm0, (%rdi)
+; CHECK:  vpmovqb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_256:
+; CHECK:       vpmovsqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qb_mem_256:
+; CHECK:  vpmovsqb %ymm0, (%rdi)
+; CHECK:  vpmovsqb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_256:
+; CHECK:       vpmovusqb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.256(<4 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qb_mem_256:
+; CHECK:  vpmovusqb %ymm0, (%rdi)
+; CHECK:  vpmovusqb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qb.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_128:
+; CHECK:       vpmovqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_128:
+; CHECK:  vpmovqw %xmm0, (%rdi)
+; CHECK:  vpmovqw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_128:
+; CHECK:       vpmovsqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_128:
+; CHECK:  vpmovsqw %xmm0, (%rdi)
+; CHECK:  vpmovsqw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_128:
+; CHECK:       vpmovusqw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.128(<2 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_128:
+; CHECK:  vpmovusqw %xmm0, (%rdi)
+; CHECK:  vpmovusqw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_256:
+; CHECK:       vpmovqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qw_mem_256:
+; CHECK:  vpmovqw %ymm0, (%rdi)
+; CHECK:  vpmovqw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_256:
+; CHECK:       vpmovsqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qw_mem_256:
+; CHECK:  vpmovsqw %ymm0, (%rdi)
+; CHECK:  vpmovsqw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_256:
+; CHECK:       vpmovusqw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.256(<4 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qw_mem_256:
+; CHECK:  vpmovusqw %ymm0, (%rdi)
+; CHECK:  vpmovusqw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qw.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_128:
+; CHECK:       vpmovqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqd %xmm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_128:
+; CHECK:  vpmovqd %xmm0, (%rdi)
+; CHECK:  vpmovqd %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_128:
+; CHECK:       vpmovsqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqd %xmm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_128:
+; CHECK:  vpmovsqd %xmm0, (%rdi)
+; CHECK:  vpmovsqd %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_128:
+; CHECK:       vpmovusqd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqd %xmm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.128(<2 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_128(i8* %ptr, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_128:
+; CHECK:  vpmovusqd %xmm0, (%rdi)
+; CHECK:  vpmovusqd %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.128(i8* %ptr, <2 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmov_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_256:
+; CHECK:       vpmovqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovqd %ymm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmov.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_qd_mem_256:
+; CHECK:  vpmovqd %ymm0, (%rdi)
+; CHECK:  vpmovqd %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovs_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_256:
+; CHECK:       vpmovsqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsqd %ymm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovs.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_qd_mem_256:
+; CHECK:  vpmovsqd %ymm0, (%rdi)
+; CHECK:  vpmovsqd %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovus_qd_256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_256:
+; CHECK:       vpmovusqd %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusqd %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusqd %ymm0, %xmm0
+    %res0 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 -1)
+    %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> %x1, i8 %x2)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovus.qd.256(<4 x i64> %x0, <4 x i32> zeroinitializer, i8 %x2)
+    %res3 = add <4 x i32> %res0, %res1
+    %res4 = add <4 x i32> %res3, %res2
+    ret <4 x i32> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_256(i8* %ptr, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_qd_mem_256:
+; CHECK:  vpmovusqd %ymm0, (%rdi)
+; CHECK:  vpmovusqd %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.qd.mem.256(i8* %ptr, <4 x i64> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_128:
+; CHECK:       vpmovdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovdb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_128:
+; CHECK:  vpmovdb %xmm0, (%rdi)
+; CHECK:  vpmovdb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_128:
+; CHECK:       vpmovsdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsdb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_128:
+; CHECK:  vpmovsdb %xmm0, (%rdi)
+; CHECK:  vpmovsdb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_128:
+; CHECK:       vpmovusdb %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusdb %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusdb %xmm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.128(<4 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_128:
+; CHECK:  vpmovusdb %xmm0, (%rdi)
+; CHECK:  vpmovusdb %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_256:
+; CHECK:       vpmovdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovdb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_db_mem_256:
+; CHECK:  vpmovdb %ymm0, (%rdi)
+; CHECK:  vpmovdb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_256:
+; CHECK:       vpmovsdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsdb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_db_mem_256:
+; CHECK:  vpmovsdb %ymm0, (%rdi)
+; CHECK:  vpmovsdb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_256:
+; CHECK:       vpmovusdb %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusdb %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusdb %ymm0, %xmm0
+    %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 -1)
+    %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> %x1, i8 %x2)
+    %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.256(<8 x i32> %x0, <16 x i8> zeroinitializer, i8 %x2)
+    %res3 = add <16 x i8> %res0, %res1
+    %res4 = add <16 x i8> %res3, %res2
+    ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_db_mem_256:
+; CHECK:  vpmovusdb %ymm0, (%rdi)
+; CHECK:  vpmovusdb %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.db.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_128:
+; CHECK:       vpmovdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovdw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_128:
+; CHECK:  vpmovdw %xmm0, (%rdi)
+; CHECK:  vpmovdw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_128:
+; CHECK:       vpmovsdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsdw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_128:
+; CHECK:  vpmovsdw %xmm0, (%rdi)
+; CHECK:  vpmovsdw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_128:
+; CHECK:       vpmovusdw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusdw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusdw %xmm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.128(<4 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_128(i8* %ptr, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_128:
+; CHECK:  vpmovusdw %xmm0, (%rdi)
+; CHECK:  vpmovusdw %xmm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.128(i8* %ptr, <4 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_256:
+; CHECK:       vpmovdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovdw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmov_dw_mem_256:
+; CHECK:  vpmovdw %ymm0, (%rdi)
+; CHECK:  vpmovdw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmov.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_256:
+; CHECK:       vpmovsdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovsdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovsdw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_dw_mem_256:
+; CHECK:  vpmovsdw %ymm0, (%rdi)
+; CHECK:  vpmovsdw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovs.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_dw_256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_256:
+; CHECK:       vpmovusdw %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:  vpmovusdw %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:  vpmovusdw %ymm0, %xmm0
+    %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 -1)
+    %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> %x1, i8 %x2)
+    %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.dw.256(<8 x i32> %x0, <8 x i16> zeroinitializer, i8 %x2)
+    %res3 = add <8 x i16> %res0, %res1
+    %res4 = add <8 x i16> %res3, %res2
+    ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_256(i8* %ptr, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_dw_mem_256:
+; CHECK:  vpmovusdw %ymm0, (%rdi)
+; CHECK:  vpmovusdw %ymm0, (%rdi) {%k1}
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 -1)
+    call void @llvm.x86.avx512.mask.pmovus.dw.mem.256(i8* %ptr, <8 x i32> %x1, i8 %x2)
+    ret void
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtdq2pd %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2dq %ymm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2ps %ymm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2ps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2ps %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps(<2 x double> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2udq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_pd2udq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtpd2udq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtpd2udq %ymm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2pd %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2pd %xmm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtps2udq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2udq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtps2udq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtps2udq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2dq %ymm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_128(<2 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2udq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.128(<2 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_pd2udq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttpd2udq %ymm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttps2dq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttps2dq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvttps2udq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ps2udq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvttps2udq %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtudq2pd %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtudq2pd %xmm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vcvtudq2ps %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vcvtudq2ps %ymm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double>, i32, <2 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscalepd {{.*}}{%k1}
+; CHECK: vrndscalepd
+define <2 x double>@test_int_x86_avx512_mask_rndscale_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+  %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 4, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.rndscale.pd.128(<2 x double> %x0, i32 88, <2 x double> %x2, i8 -1)
+  %res2 = fadd <2 x double> %res, %res1
+  ret <2 x double> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double>, i32, <4 x double>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_pd_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscalepd {{.*}}{%k1}
+; CHECK: vrndscalepd
+define <4 x double>@test_int_x86_avx512_mask_rndscale_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+  %res = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 4, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.rndscale.pd.256(<4 x double> %x0, i32 88, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float>, i32, <4 x float>, i8)
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_128
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscaleps {{.*}}{%k1}
+; CHECK: vrndscaleps
+define <4 x float>@test_int_x86_avx512_mask_rndscale_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+  %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 88, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.rndscale.ps.128(<4 x float> %x0, i32 4, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+; CHECK-LABEL: @test_int_x86_avx512_mask_rndscale_ps_256
+; CHECK-NOT: call
+; CHECK: kmov
+; CHECK: vrndscaleps {{.*}}{%k1}
+; CHECK: vrndscaleps
+define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+  %res = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 5, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.rndscale.ps.256(<8 x float> %x0, i32 66, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    ## ymm3 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vshuff32x4 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    ## ymm3 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT:    vshuff64x2 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vshufi32x4 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT:    vshufi64x2 $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1],ymm1[2,3]
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edi, %k1
+; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res  = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vgetmantpd $11, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 %x3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> zeroinitializer, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double> %x0, i32 11, <2 x double> %x2, i8 -1)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res2, %res3
+  ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_getmant_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vgetmantpd $11, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vgetmantpd $11, %ymm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.getmant.pd.256(<4 x double> %x0, i32 11, <4 x double> %x2, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_getmant_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vgetmantps $11, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vgetmantps $11, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ps.128(<4 x float> %x0, i32 11, <4 x float> %x2, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_getmant_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_getmant_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vgetmantps $11, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vgetmantps $11, %ymm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.getmant.ps.256(<8 x float> %x0, i32 11, <8 x float> %x2, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[0],k1[1]
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    ## xmm3 = k1[0],xmm0[1]
+; CHECK-NEXT:    vshufpd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ## xmm0 = xmm0[0],xmm1[1]
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 %x4)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> %x3, i8 -1)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 22, <2 x double> zeroinitializer, i8 %x4)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res2, %res3
+  ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[0],k1[1],ymm2[3],k1[2]
+; CHECK-NEXT:    vshufpd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
+  %res2 = fadd <4 x double> %res, %res1
+  ret <4 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    ## xmm2 = xmm2[2,1],k1[1,0]
+; CHECK-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    ## xmm0 = xmm0[2,1],xmm1[1,0]
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
+  %res2 = fadd <4 x float> %res, %res1
+  ret <4 x float> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    ## ymm2 = ymm2[2,1],k1[1,0],ymm2[6,5],k1[5,4]
+; CHECK-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
+  %res2 = fadd <8 x float> %res, %res1
+  ret <8 x float> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    valignd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
+    %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
+  %res3 = add <4 x i32> %res, %res1
+    %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    valignd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    valignd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    valignq $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    valignq $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_valign_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    valignq $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    valignq $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    ## ymm1 = ymm1[0,1,3,2]
+; CHECK-NEXT:    vpermilpd $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    ## ymm2 = k1[0,1,3,2]
+; CHECK-NEXT:    vpermilpd $22, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,1,3,2]
+; CHECK-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    ## xmm1 = xmm1[1,0]
+; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    ## xmm2 = k1[1,0]
+; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm0
+; CHECK-NEXT:    ## xmm0 = xmm0[1,0]
+; CHECK-NEXT:    vaddpd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res3, %res2
+  ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    ## ymm1 = ymm1[2,1,1,0,6,5,5,4]
+; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    ## ymm2 = k1[2,1,1,0,6,5,5,4]
+; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[2,1,1,0,6,5,5,4]
+; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res3, %res2
+  ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    ## xmm1 = xmm1[2,1,1,0]
+; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    ## xmm2 = k1[2,1,1,0]
+; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm0
+; CHECK-NEXT:    ## xmm0 = xmm0[2,1,1,0]
+; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1
+; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res3, %res2
+  ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpermilps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm1
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res3, %res2
+  ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpermilps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+
+  %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
+
+define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
+  %res2 = add <4 x i32> %res, %res1
+  ret <4 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
+
+define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
+
+define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
+
+define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z}
+; CHECK-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1)
+  %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
+  %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
+  %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
+  %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
+  %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
+  %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <4 x i64> %res, %res1
+  %res4 = add <4 x i64> %res2, %res3
+  ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
+  %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
+  %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
+  %res3 = add <2 x i64> %res, %res1
+  %res4 = add <2 x i64> %res2, %res3
+  ret <2 x i64> %res4
+}
+
+define <4 x float> @test_x86_vcvtph2ps_128(<8 x i16> %a0) {
+  ; CHECK: test_x86_vcvtph2ps_128
+  ; CHECK: vcvtph2ps  %xmm0, %xmm0    
+  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrk(<8 x i16> %a0,<4 x float> %a1, i8 %mask) {
+  ; CHECK: test_x86_vcvtph2ps_128_rrk
+  ; CHECK: vcvtph2ps  %xmm0, %xmm1 {%k1}
+  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> %a1, i8 %mask)
+  ret <4 x float> %res
+}
+
+
+define <4 x float> @test_x86_vcvtph2ps_128_rrkz(<8 x i16> %a0, i8 %mask) {
+  ; CHECK: test_x86_vcvtph2ps_128_rrkz
+  ; CHECK: vcvtph2ps  %xmm0, %xmm0 {%k1} {z}
+  %res = call <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16> %a0, <4 x float> zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.vcvtph2ps.128(<8 x i16>, <4 x float>, i8) nounwind readonly
+
+define <8 x float> @test_x86_vcvtph2ps_256(<8 x i16> %a0) {
+  ; CHECK: test_x86_vcvtph2ps_256
+  ; CHECK: vcvtph2ps  %xmm0, %ymm0
+  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 -1)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrk(<8 x i16> %a0,<8 x float> %a1, i8 %mask) {
+  ; CHECK: test_x86_vcvtph2ps_256_rrk
+  ; CHECK: vcvtph2ps  %xmm0, %ymm1 {%k1} 
+  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> %a1, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_vcvtph2ps_256_rrkz(<8 x i16> %a0, i8 %mask) {
+  ; CHECK: test_x86_vcvtph2ps_256_rrkz
+  ; CHECK: vcvtph2ps  %xmm0, %ymm0 {%k1} {z}
+  %res = call <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16> %a0, <8 x float> zeroinitializer, i8 %mask)
+  ret <8 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vcvtph2ps.256(<8 x i16>, <8 x float>, i8) nounwind readonly
+
+define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0) {
+  ; CHECK: test_x86_vcvtps2ph_128
+  ; CHECK: vcvtps2ph $2, %xmm0, %xmm0
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
+  ret <8 x i16> %res
+}
+
+
+declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float>, i32, <8 x i16>, i8) nounwind readonly
+
+define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
+  ; CHECK: test_x86_vcvtps2ph_256
+  ; CHECK: vcvtps2ph $2, %ymm0, %xmm0
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
+  ret <8 x i16> %res
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
+
+declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax 
+; CHECK-NEXT:    kmovw %eax, %k1 
+; CHECK-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} 
+; CHECK-NEXT:    ## xmm1 = xmm0[0,0,2,2]
+; CHECK-NEXT:    vmovsldup %xmm0, %xmm2 {%k1} {z} 
+; CHECK-NEXT:    ## xmm2 = xmm0[0,0,2,2]
+; CHECK-NEXT:    vmovsldup %xmm0, %xmm0 
+; CHECK-NEXT:    ## xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax 
+; CHECK-NEXT:    kmovw %eax, %k1 
+; CHECK-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} 
+; CHECK-NEXT:    ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vmovsldup %ymm0, %ymm2 {%k1} {z} 
+; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vmovsldup %ymm0, %ymm0 
+; CHECK-NEXT:    ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 
+; CHECK-NEXT:    retq 
+  %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax 
+; CHECK-NEXT:    kmovw %eax, %k1 
+; CHECK-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} 
+; CHECK-NEXT:    ## xmm1 = xmm0[1,1,3,3]
+; CHECK-NEXT:    vmovshdup %xmm0, %xmm2 {%k1} {z} 
+; CHECK-NEXT:    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT:    vmovshdup %xmm0, %xmm0 
+; CHECK-NEXT:    ## xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 
+; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 
+; CHECK-NEXT:    retq 
+  %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax 
+; CHECK-NEXT:    kmovw %eax, %k1 
+; CHECK-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} 
+; CHECK-NEXT:    ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT:    vmovshdup %ymm0, %ymm2 {%k1} {z} 
+; CHECK-NEXT:    ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT:    vmovshdup %ymm0, %ymm0 
+; CHECK-NEXT:    ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 
+; CHECK-NEXT:    vaddps %ymm0, %ymm2, %ymm0 
+; CHECK-NEXT:    retq 
+  %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovddup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT:    ## xmm1 = xmm0[0,0]
+; CHECK-NEXT:    vmovddup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT:    ## xmm2 = xmm0[0,0]
+; CHECK-NEXT:    vmovddup %xmm0, %xmm0
+; CHECK-NEXT:    ## xmm0 = xmm0[0,0]
+; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
+  %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
+  %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
+  %res3 = fadd <2 x double> %res, %res1
+  %res4 = fadd <2 x double> %res2, %res3
+  ret <2 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vmovddup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    ## ymm1 = ymm0[0,0,2,2]
+; CHECK-NEXT:    vmovddup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2]
+; CHECK-NEXT:    vmovddup %ymm0, %ymm0
+; CHECK-NEXT:    ## ymm0 = ymm0[0,0,2,2]
+; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+
+define <8 x float> @test_rsqrt_ps_256_rr(<8 x float> %a0) {
+; CHECK-LABEL: test_rsqrt_ps_256_rr:
+; CHECK: vrsqrt14ps %ymm0, %ymm0
+  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_rsqrt_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_256_rrkz:
+; CHECK: vrsqrt14ps %ymm0, %ymm0 {%k1} {z}
+  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_rsqrt_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_256_rrk:
+; CHECK: vrsqrt14ps %ymm0, %ymm1 {%k1}
+  %res = call <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rr(<4 x float> %a0) {
+; CHECK-LABEL: test_rsqrt_ps_128_rr:
+; CHECK: vrsqrt14ps %xmm0, %xmm0
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_128_rrkz:
+; CHECK: vrsqrt14ps %xmm0, %xmm0 {%k1} {z}
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_rsqrt_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_ps_128_rrk:
+; CHECK: vrsqrt14ps %xmm0, %xmm1 {%k1}
+  %res = call <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.rsqrt14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.rsqrt14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
+
+define <8 x float> @test_rcp_ps_256_rr(<8 x float> %a0) {
+; CHECK-LABEL: test_rcp_ps_256_rr:
+; CHECK: vrcp14ps %ymm0, %ymm0
+  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 -1)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_rcp_ps_256_rrkz(<8 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_256_rrkz:
+; CHECK: vrcp14ps %ymm0, %ymm0 {%k1} {z}
+  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_rcp_ps_256_rrk(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_256_rrk:
+; CHECK: vrcp14ps %ymm0, %ymm1 {%k1}
+  %res = call <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float> %a0, <8 x float> %a1, i8 %mask)
+  ret <8 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rr(<4 x float> %a0) {
+; CHECK-LABEL: test_rcp_ps_128_rr:
+; CHECK: vrcp14ps %xmm0, %xmm0
+  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rrkz(<4 x float> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_128_rrkz:
+; CHECK: vrcp14ps %xmm0, %xmm0 {%k1} {z}
+  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_rcp_ps_128_rrk(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_ps_128_rrk:
+; CHECK: vrcp14ps %xmm0, %xmm1 {%k1}
+  %res = call <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.avx512.rcp14.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+declare <4 x float> @llvm.x86.avx512.rcp14.ps.128(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x double> @test_rsqrt_pd_256_rr(<4 x double> %a0) {
+; CHECK-LABEL: test_rsqrt_pd_256_rr:
+; CHECK: vrsqrt14pd %ymm0, %ymm0
+  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_rsqrt_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_256_rrkz:
+; CHECK: vrsqrt14pd %ymm0, %ymm0 {%k1} {z}
+  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_rsqrt_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_256_rrk:
+; CHECK: vrsqrt14pd %ymm0, %ymm1 {%k1}
+  %res = call <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rr(<2 x double> %a0) {
+; CHECK-LABEL: test_rsqrt_pd_128_rr:
+; CHECK: vrsqrt14pd %xmm0, %xmm0
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_128_rrkz:
+; CHECK: vrsqrt14pd %xmm0, %xmm0 {%k1} {z}
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_rsqrt_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rsqrt_pd_128_rrk:
+; CHECK: vrsqrt14pd %xmm0, %xmm1 {%k1}
+  %res = call <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.rsqrt14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.rsqrt14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_rcp_pd_256_rr(<4 x double> %a0) {
+; CHECK-LABEL: test_rcp_pd_256_rr:
+; CHECK: vrcp14pd %ymm0, %ymm0
+  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 -1)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_rcp_pd_256_rrkz(<4 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_256_rrkz:
+; CHECK: vrcp14pd %ymm0, %ymm0 {%k1} {z}
+  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_rcp_pd_256_rrk(<4 x double> %a0, <4 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_256_rrk:
+; CHECK: vrcp14pd %ymm0, %ymm1 {%k1}
+  %res = call <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %mask)
+  ret <4 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rr(<2 x double> %a0) {
+; CHECK-LABEL: test_rcp_pd_128_rr:
+; CHECK: vrcp14pd %xmm0, %xmm0
+  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 -1)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rrkz(<2 x double> %a0, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_128_rrkz:
+; CHECK: vrcp14pd %xmm0, %xmm0 {%k1} {z}
+  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> zeroinitializer, i8 %mask)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_rcp_pd_128_rrk(<2 x double> %a0, <2 x double> %a1, i8 %mask) {
+; CHECK-LABEL: test_rcp_pd_128_rrk:
+; CHECK: vrcp14pd %xmm0, %xmm1 {%k1}
+  %res = call <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double> %a0, <2 x double> %a1, i8 %mask)
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.avx512.rcp14.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+declare <2 x double> @llvm.x86.avx512.rcp14.pd.128(<2 x double>, <2 x double>, i8) nounwind readnone
+
+define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
+; CHECK: kmovw   %eax, %k1
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
+
+  %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1) 
+  %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask) 
+  %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask) 
+  %res3 = fadd <4 x double> %res, %res1
+  %res4 = fadd <4 x double> %res2, %res3
+  ret <4 x double> %res4
+}
+declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
+
+define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
+; CHECK: kmovw   %eax, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
+
+  %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1) 
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask) 
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask) 
+  %res3 = fadd <8 x float> %res, %res1
+  %res4 = fadd <8 x float> %res2, %res3
+  ret <8 x float> %res4
+}
+declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
+
+define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
+; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
+; CHECK: kmovw   %eax, %k1
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+
+  %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1) 
+  %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask) 
+  %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask) 
+  %res3 = fadd <4 x float> %res, %res1
+  %res4 = fadd <4 x float> %res2, %res3
+  ret <4 x float> %res4
+}
+declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
+
+
+declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshuff32x4 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vaddps %ymm1, %ymm0, %ymm0
+; CHECK: vaddps %ymm0, %ymm2, %ymm0
+
+  %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
+  %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
+  %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
+  %res4 = fadd <8 x float> %res1, %res2
+  %res5 = fadd <8 x float> %res3, %res4
+  ret <8 x float> %res5
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
+; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
+; CHECK: kmovw %eax, %k1
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm2 {%k1} {z}
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm1 {%k1}
+; CHECK: vshufi32x4 $0, %ymm0, %ymm0, %ymm0
+; CHECK: vpaddd %ymm1, %ymm0, %ymm0
+; CHECK: vpaddd %ymm0, %ymm2, %ymm0
+
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
+  %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
+  %res4 = add <8 x i32> %res1, %res2
+  %res5 = add <8 x i32> %res3, %res4
+  ret <8 x i32> %res5
+}
diff --git a/test/CodeGen/X86/bit-piece-comment.ll b/test/CodeGen/X86/bit-piece-comment.ll
new file mode 100644
index 000000000000..6ce858b11dcf
--- /dev/null
+++ b/test/CodeGen/X86/bit-piece-comment.ll
@@ -0,0 +1,64 @@
+; RUN: llc -filetype=asm < %s
+;
+; We check that we don't crash when printing assembly comments that include
+; a DW_OP_bit_piece
+;
+; Regenerate from
+; void fn1() {
+; struct {
+;   int dword[2];
+; } u;
+; u.dword[1] = 0;
+; };
+; via clang++ -g -fno-integrated-as -Os
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+%struct.anon = type { [2 x i32] }
+
+; Function Attrs: norecurse nounwind optsize readnone uwtable
+define void @_Z3fn1v() #0 !dbg !4 {
+entry:
+  tail call void @llvm.dbg.declare(metadata %struct.anon* undef, metadata !8, metadata !19), !dbg !20
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !8, metadata !21), !dbg !20
+  ret void, !dbg !22
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { norecurse nounwind optsize readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256088) (llvm/trunk 256097)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.cpp", directory: "/mnt/extra")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{!8}
+!8 = !DILocalVariable(name: "u", scope: !4, file: !1, line: 4, type: !9)
+!9 = !DICompositeType(tag: DW_TAG_structure_type, scope: !4, file: !1, line: 2, size: 64, align: 32, elements: !10)
+!10 = !{!11}
+!11 = !DIDerivedType(tag: DW_TAG_member, name: "dword", scope: !9, file: !1, line: 3, baseType: !12, size: 64, align: 32)
+!12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 64, align: 32, elements: !14)
+!13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !{!15}
+!15 = !DISubrange(count: 2)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 256088) (llvm/trunk 256097)"}
+!19 = !DIExpression()
+!20 = !DILocation(line: 4, column: 5, scope: !4)
+!21 = !DIExpression(DW_OP_bit_piece, 32, 32)
+!22 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/bitreverse.ll b/test/CodeGen/X86/bitreverse.ll
new file mode 100644
index 000000000000..e3bc8ace38ab
--- /dev/null
+++ b/test/CodeGen/X86/bitreverse.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 %s -o - | FileCheck %s
+
+; These tests just check that the plumbing is in place for @llvm.bitreverse. The
+; actual output is massive at the moment as llvm.bitreverse is not yet legal.
+
+declare <2 x i16> @llvm.bitreverse.v2i16(<2 x i16>) readnone
+
+define <2 x i16> @f(<2 x i16> %a) {
+; CHECK-LABEL: f:
+; CHECK: shll
+  %b = call <2 x i16> @llvm.bitreverse.v2i16(<2 x i16> %a)
+  ret <2 x i16> %b
+}
+
+declare i8 @llvm.bitreverse.i8(i8) readnone
+
+define i8 @g(i8 %a) {
+; CHECK-LABEL: g:
+; CHECK: shlb
+  %b = call i8 @llvm.bitreverse.i8(i8 %a)
+  ret i8 %b
+}
diff --git a/test/CodeGen/X86/branchfolding-catchpads.ll b/test/CodeGen/X86/branchfolding-catchpads.ll
new file mode 100644
index 000000000000..0468b3c314f6
--- /dev/null
+++ b/test/CodeGen/X86/branchfolding-catchpads.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()
+declare i16 @f()
+
+define i16 @test1(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %cmp = icmp eq i16 %a, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %call1 = invoke i16 @f()
+          to label %cleanup unwind label %catch.dispatch
+
+if.else:
+  %call2 = invoke i16 @f()
+          to label %cleanup unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs = catchswitch within none [ label %catch, label %catch.2 ] unwind to caller
+
+catch:
+  catchpad within %cs [i8* null, i32 8, i8* null]
+  call void @throw() noreturn
+  br label %unreachable
+
+catch.2:
+  catchpad within %cs [i8* null, i32 64, i8* null]
+  store i8 1, i8* %b
+  call void @throw() noreturn
+  br label %unreachable
+
+cleanup:
+  %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ]
+  ret i16 %retval
+
+unreachable:
+  unreachable
+}
+
+; This test verifies the case where two funclet blocks meet the old criteria
+; to be placed at the end.  The order of the blocks is not important for the
+; purposes of this test.  The failure mode is an infinite loop during
+; compilation.
+;
+; CHECK-LABEL: .def     test1;
+
+define i16 @test2(i16 %a, i8* %b) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %cmp = icmp eq i16 %a, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+  %call1 = invoke i16 @f()
+          to label %cleanup unwind label %catch.dispatch
+
+if.else:
+  %call2 = invoke i16 @f()
+          to label %cleanup unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs = catchswitch within none [ label %catch, label %catch.2, label %catch.3 ] unwind to caller
+
+catch:
+  catchpad within %cs [i8* null, i32 8, i8* null]
+  call void @throw() noreturn
+  br label %unreachable
+
+catch.2:
+  %c2 = catchpad within %cs [i8* null, i32 32, i8* null]
+  store i8 1, i8* %b
+  catchret from %c2 to label %cleanup
+
+catch.3:
+  %c3 = catchpad within %cs [i8* null, i32 64, i8* null]
+  store i8 2, i8* %b
+  catchret from %c3 to label %cleanup
+
+cleanup:
+  %retval = phi i16 [ %call1, %if.then ], [ %call2, %if.else ], [ -1, %catch.2 ], [ -1, %catch.3 ]
+  ret i16 %retval
+
+unreachable:
+  unreachable
+}
+
+; This test verifies the case where three funclet blocks all meet the old
+; criteria to be placed at the end.  The order of the blocks is not important
+; for the purposes of this test.  The failure mode is an infinite loop during
+; compilation.
+;
+; CHECK-LABEL: .def     test2;
+
diff --git a/test/CodeGen/X86/buildvec-insertvec.ll b/test/CodeGen/X86/buildvec-insertvec.ll
index 73dbe1f650a1..fd7290d58179 100644
--- a/test/CodeGen/X86/buildvec-insertvec.ll
+++ b/test/CodeGen/X86/buildvec-insertvec.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s
 
 define void @foo(<3 x float> %in, <4 x i8>* nocapture %out) nounwind {
diff --git a/test/CodeGen/X86/catchpad-realign-savexmm.ll b/test/CodeGen/X86/catchpad-realign-savexmm.ll
new file mode 100644
index 000000000000..1160101792ff
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-realign-savexmm.ll
@@ -0,0 +1,53 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -verify-machineinstrs < %s | FileCheck %s
+
+; We should store -2 into UnwindHelp in a slot immediately after the last XMM
+; CSR save.
+
+declare void @g()
+declare i32 @__CxxFrameHandler3(...)
+
+@fp_global = global double 0.0
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+  %v = load double, double* @fp_global
+  call void @g()
+  %v1 = fadd double %v, 1.0
+  store double %v1, double* @fp_global
+  invoke void @g()
+      to label %return unwind label %catch.dispatch
+
+return:
+  ret void
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %p = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  catchret from %p to label %return
+}
+
+; CHECK: f: # @f
+; CHECK: pushq   %rbp
+; CHECK: .seh_pushreg 5
+; CHECK: subq    $64, %rsp
+; CHECK: .seh_stackalloc 64
+; CHECK: leaq    64(%rsp), %rbp
+; CHECK: .seh_setframe 5, 64
+; CHECK: movaps  %xmm6, -16(%rbp)        # 16-byte Spill
+; CHECK: .seh_savexmm 6, 48
+; CHECK: .seh_endprologue
+; CHECK: movq    $-2, -24(%rbp)
+; CHECK: movsd   fp_global(%rip), %xmm6  # xmm6 = mem[0],zero
+; CHECK: callq   g
+; CHECK: addsd   __real@3ff0000000000000(%rip), %xmm6
+; CHECK: movsd   %xmm6, fp_global(%rip)
+; CHECK: .Ltmp{{.*}}
+; CHECK: callq   g
+; CHECK: .Ltmp{{.*}}
+; CHECK: .LBB{{.*}} # Block address taken
+; CHECK: movaps  -16(%rbp), %xmm6
+; CHECK: addq    $64, %rsp
+; CHECK: popq    %rbp
+; CHECK: retq
+; CHECK: .seh_handlerdata
diff --git a/test/CodeGen/X86/catchpad-regmask.ll b/test/CodeGen/X86/catchpad-regmask.ll
new file mode 100644
index 000000000000..0d436f6eb595
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-regmask.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on this code:
+;
+; extern "C" int array[4];
+; extern "C" void global_array(int idx1, int idx2, int idx3) {
+;   try {
+;     array[idx1] = 111;
+;     throw;
+;   } catch (...) {
+;     array[idx2] = 222;
+;   }
+;   array[idx3] = 333;
+; }
+; extern "C" __declspec(dllimport) int imported;
+; extern "C" void access_imported() {
+;   try {
+;     imported = 111;
+;     throw;
+;   } catch (...) {
+;     imported = 222;
+;   }
+;   imported = 333;
+; }
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+@array = external global [4 x i32], align 16
+@imported = external dllimport global i32, align 4
+
+; Function Attrs: uwtable
+define void @global_array(i32 %idx1, i32 %idx2, i32 %idx3) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %idxprom = sext i32 %idx1 to i64
+  %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom
+  store i32 111, i32* %arrayidx, align 4, !tbaa !2
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  %idxprom1 = sext i32 %idx2 to i64
+  %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom1
+  store i32 222, i32* %arrayidx2, align 4, !tbaa !2
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  %idxprom3 = sext i32 %idx3 to i64
+  %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32]* @array, i64 0, i64 %idxprom3
+  store i32 333, i32* %arrayidx4, align 4, !tbaa !2
+  ret void
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+; CHECK-LABEL: global_array: # @global_array
+; CHECK: pushq %rbp
+; 	First array access
+; CHECK: movslq  %ecx, %[[idx:[^ ]*]]
+; CHECK: leaq    array(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $111, (%[[base]],%[[idx]],4)
+;	Might throw an exception and return to below...
+; CHECK: callq   _CxxThrowException
+; 	Third array access must remat the address of array
+; CHECK: movslq  {{.*}}, %[[idx:[^ ]*]]
+; CHECK: leaq    array(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $333, (%[[base]],%[[idx]],4)
+; CHECK: popq %rbp
+; CHECK: retq
+
+; CHECK: "?catch$2@?0?global_array@4HA":
+; CHECK: pushq   %rbp
+; CHECK: movslq  {{.*}}, %[[idx:[^ ]*]]
+; CHECK: leaq    array(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $222, (%[[base]],%[[idx]],4)
+; CHECK: popq    %rbp
+; CHECK: retq                            # CATCHRET
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: uwtable
+define void @access_imported() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  store i32 111, i32* @imported, align 4, !tbaa !2
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  store i32 222, i32* @imported, align 4, !tbaa !2
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  store i32 333, i32* @imported, align 4, !tbaa !2
+  ret void
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+; CHECK-LABEL: access_imported: # @access_imported
+; CHECK: pushq %rbp
+; CHECK: movq    __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $111, (%[[base]])
+;	Might throw an exception and return to below...
+; CHECK: callq   _CxxThrowException
+; 	Third access must reload the address of imported
+; CHECK: movq    __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $333, (%[[base]])
+; CHECK: popq %rbp
+; CHECK: retq
+
+; CHECK: "?catch$2@?0?access_imported@4HA":
+; CHECK: pushq   %rbp
+; CHECK: movq    __imp_imported(%rip), %[[base:[^ ]*]]
+; CHECK: movl    $222, (%[[base]])
+; CHECK: popq    %rbp
+; CHECK: retq                            # CATCHRET
+
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"PIC Level", i32 2}
+!1 = !{!"clang version 3.8.0 "}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"int", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/X86/catchpad-weight.ll b/test/CodeGen/X86/catchpad-weight.ll
new file mode 100644
index 000000000000..60939bc6b03e
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-weight.ll
@@ -0,0 +1,82 @@
+; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s
+
+; Check if the edge weight to the catchpad is calculated correctly.
+
+; CHECK: Successors according to CFG: BB#2(0x7ffff100 / 0x80000000 = 100.00%) BB#1(0x00000800 / 0x80000000 = 0.00%) BB#3(0x00000400 / 0x80000000 = 0.00%) BB#4(0x00000200 / 0x80000000 = 0.00%) BB#5(0x00000100 / 0x80000000 = 0.00%)
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64--windows-msvc18.0.0"
+
+%rtti.TypeDescriptor7 = type { i8**, i8*, [8 x i8] }
+%struct.HasDtor = type { i8 }
+
+$"\01??_R0?AUA@@@8" = comdat any
+
+$"\01??_R0?AUB@@@8" = comdat any
+
+$"\01??_R0?AUC@@@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0?AUA@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUA@@\00" }, comdat
+@"\01??_R0?AUB@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUB@@\00" }, comdat
+@"\01??_R0?AUC@@@8" = linkonce_odr global %rtti.TypeDescriptor7 { i8** @"\01??_7type_info@@6B@", i8* null, [8 x i8] c".?AUC@@\00" }, comdat
+
+; Function Attrs: uwtable
+define i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %o = alloca %struct.HasDtor, align 1
+  %0 = getelementptr inbounds %struct.HasDtor, %struct.HasDtor* %o, i64 0, i32 0
+  call void @llvm.lifetime.start(i64 1, i8* %0) #4
+  invoke void @"\01?may_throw@@YAXXZ"()
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch.5] unwind label %catch.dispatch.1
+
+catch.5:                                          ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 0, i8* null]
+  catchret from %1 to label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch, %catch.3, %catch.5
+  call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* nonnull %o) #4
+  call void @llvm.lifetime.end(i64 1, i8* %0) #4
+  ret i32 0
+
+catch.dispatch.1:                                 ; preds = %catch.dispatch
+  %cs2 = catchswitch within none [label %catch.3] unwind label %catch.dispatch.2
+
+catch.3:                                          ; preds = %catch.dispatch.1
+  %2 = catchpad within %cs2 [%rtti.TypeDescriptor7* @"\01??_R0?AUB@@@8", i32 0, i8* null]
+  catchret from %2 to label %try.cont
+
+catch.dispatch.2:                                 ; preds = %catch.dispatch.1
+  %cs3 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch:                                            ; preds = %catch.dispatch.2
+  %3 = catchpad within %cs3 [%rtti.TypeDescriptor7* @"\01??_R0?AUC@@@8", i32 0, i8* null]
+  catchret from %3 to label %try.cont
+
+ehcleanup:                                        ; preds = %catchendblock
+  %4 = cleanuppad within none []
+  call void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor* nonnull %o) #4 [ "funclet"(token %4) ]
+  cleanupret from %4 unwind to caller
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @"\01?may_throw@@YAXXZ"() #2
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare void @"\01??1HasDtor@@QEAA@XZ"(%struct.HasDtor*) #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
diff --git a/test/CodeGen/X86/catchret-empty-fallthrough.ll b/test/CodeGen/X86/catchret-empty-fallthrough.ll
new file mode 100644
index 000000000000..7ad103303171
--- /dev/null
+++ b/test/CodeGen/X86/catchret-empty-fallthrough.ll
@@ -0,0 +1,53 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; BranchFolding used to remove our empty landingpad block, which is
+; undesirable.
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__C_specific_handler(...)
+
+declare void @bar()
+
+define void @foo(i1 %cond) personality i32 (...)* @__C_specific_handler {
+entry:
+  br i1 %cond, label %return, label %try
+
+try:                                              ; preds = %entry
+  invoke void @bar()
+          to label %fallthrough unwind label %dispatch
+
+dispatch:                                         ; preds = %try
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %dispatch
+  %0 = catchpad within %cs1 [i8* null]
+  catchret from %0 to label %return
+
+fallthrough:                                      ; preds = %try
+  unreachable
+
+return:                                           ; preds = %catch, %entry
+  ret void
+}
+
+; CHECK-LABEL: foo: # @foo
+; CHECK: testb $1, %cl
+; CHECK: je .LBB0_[[try:[0-9]+]]
+; CHECK: .LBB0_[[return:[0-9]+]]:
+; CHECK: retq
+; CHECK: .LBB0_[[try]]:
+; CHECK: .Ltmp0:
+; CHECK: callq bar
+; CHECK: .Ltmp1:
+; CHECK: .LBB0_[[catch:[0-9]+]]:
+
+; CHECK: .seh_handlerdata
+; CHECK-NEXT: .Lfoo$parent_frame_offset = 32
+; CHECK-NEXT: .long   (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long   .Ltmp0@IMGREL+1
+; CHECK-NEXT: .long   .Ltmp1@IMGREL+1
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   .LBB0_[[catch]]@IMGREL
diff --git a/test/CodeGen/X86/catchret-fallthrough.ll b/test/CodeGen/X86/catchret-fallthrough.ll
new file mode 100644
index 000000000000..6a94b290e823
--- /dev/null
+++ b/test/CodeGen/X86/catchret-fallthrough.ll
@@ -0,0 +1,42 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+; We used to have an issue where we inserted an MBB between invoke.cont.3 and
+; its fallthrough target of ret void.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc18.0.0"
+
+@some_global = global i32 0
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @g()
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @g()
+          to label %invoke.cont.3 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  catchret from %0 to label %nrvo.skipdtor
+
+invoke.cont.3:                                    ; preds = %entry
+  store i32 123, i32* @some_global
+  br label %nrvo.skipdtor
+
+nrvo.skipdtor:                                    ; preds = %invoke.cont.3, %invoke.cont.4
+  ret void
+}
+
+; CHECK-LABEL: _f: # @f
+; CHECK: calll _g
+; CHECK: movl $123, _some_global
+; CHECK-NOT: jmp
+; CHECK-NOT: movl {{.*}}, %esp
+; CHECK: retl
+; CHECK: addl $12, %ebp
+; CHECK: jmp LBB0_{{.*}}
diff --git a/test/CodeGen/X86/cleanuppad-inalloca.ll b/test/CodeGen/X86/cleanuppad-inalloca.ll
new file mode 100644
index 000000000000..2e34ada52e6b
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-inalloca.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on this C++:
+; struct A {
+;   int x;
+;   A();
+;   A(const A &a);
+;   ~A();
+; };
+; extern "C" void takes_two(A a1, A a2);
+; extern "C" void passes_two() { takes_two(A(), A()); }
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686--windows-msvc"
+
+%struct.A = type { i32 }
+
+define void @passes_two() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %argmem = alloca inalloca <{ %struct.A, %struct.A }>, align 4
+  %0 = getelementptr inbounds <{ %struct.A, %struct.A }>, <{ %struct.A, %struct.A }>* %argmem, i32 0, i32 1
+  %call = call x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %0)
+  %1 = getelementptr inbounds <{ %struct.A, %struct.A }>, <{ %struct.A, %struct.A }>* %argmem, i32 0, i32 0
+  %call1 = invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %1)
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  call void @takes_two(<{ %struct.A, %struct.A }>* inalloca nonnull %argmem)
+  ret void
+
+ehcleanup:                                        ; preds = %entry
+  %2 = cleanuppad within none []
+  call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %0) [ "funclet"(token %2) ]
+  cleanupret from %2 unwind to caller
+}
+
+; CHECK: _passes_two:
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: subl ${{[0-9]+}}, %esp
+; CHECK: movl $8, %eax
+; CHECK: calll __chkstk
+; CHECK: calll "??0A@@QAE@XZ"
+; CHECK: calll "??0A@@QAE@XZ"
+; CHECK: calll _takes_two
+; 	ESP must be restored via EBP due to "dynamic" alloca.
+; CHECK: leal -{{[0-9]+}}(%ebp), %esp
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: "?dtor$2@?0?passes_two@4HA":
+; CHECK: pushl %ebp
+; CHECK: subl $8, %esp
+; CHECK: addl $12, %ebp
+; CHECK: {{movl|leal}} -{{[0-9]+}}(%ebp), %ecx
+; CHECK: calll "??1A@@QAE@XZ"
+; CHECK: addl $8, %esp
+; CHECK: retl
+
+declare void @takes_two(<{ %struct.A, %struct.A }>* inalloca) #0
+
+declare x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* returned) #0
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A*) #0
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/cleanuppad-large-codemodel.ll b/test/CodeGen/X86/cleanuppad-large-codemodel.ll
new file mode 100644
index 000000000000..8ffb97d8dd68
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-large-codemodel.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc -code-model=large -o - < %s | FileCheck %s
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @bar()
+
+define void @foo() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @bar()
+    to label %exit unwind label %cleanup
+cleanup:
+  %c = cleanuppad within none []
+  call void @bar() [ "funclet"(token %c) ]
+  cleanupret from %c unwind to caller
+exit:
+  ret void
+}
+
+; CHECK: foo: # @foo
+; CHECK: movabsq $bar, %[[reg:[^ ]*]]
+; CHECK: callq *%[[reg]]
+; CHECK: retq
+
+; CHECK: "?dtor$2@?0?foo@4HA":
+; CHECK: movabsq $bar, %[[reg:[^ ]*]]
+; CHECK: callq *%[[reg]]
+; CHECK: retq                            # CLEANUPRET
diff --git a/test/CodeGen/X86/cleanuppad-realign.ll b/test/CodeGen/X86/cleanuppad-realign.ll
new file mode 100644
index 000000000000..5a565cc1570f
--- /dev/null
+++ b/test/CodeGen/X86/cleanuppad-realign.ll
@@ -0,0 +1,78 @@
+; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+declare i32 @__CxxFrameHandler3(...)
+declare void @Dtor(i64* %o)
+declare void @f(i32)
+
+define void @realigned_cleanup() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  ; Overalign %o to cause stack realignment.
+  %o = alloca i64, align 32
+  invoke void @f(i32 1)
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  call void @Dtor(i64* %o)
+  ret void
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  call void @Dtor(i64* %o) [ "funclet"(token %0) ]
+  cleanupret from %0 unwind to caller
+}
+
+; X86-LABEL: _realigned_cleanup: # @realigned_cleanup
+; X86:         pushl   %ebp
+; X86:         movl    %esp, %ebp
+; X86:         pushl   %ebx
+; X86:         pushl   %edi
+; X86:         pushl   %esi
+; X86:         andl    $-32, %esp
+; X86:         subl    $96, %esp
+; X86:         movl    %esp, %esi
+;	EBP will reload from this offset.
+; X86:         movl    %ebp, 28(%esi)
+; 	The last EH reg field is the state number, so dtor adjust is this +4.
+; X86:         movl    $-1, 72(%esi)
+
+; X86-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
+; X86:         pushl   %ebp
+; X86:         leal    -76(%ebp), %esi
+; X86:         movl    28(%esi), %ebp
+;	We used to have a bug where we clobbered ESI after the prologue.
+; X86-NOT: 	movl {{.*}}, %esi
+; X86:         popl    %ebp
+; X86:         retl                            # CLEANUPRET
+
+; X64-LABEL: realigned_cleanup: # @realigned_cleanup
+; X64:         pushq   %rbp
+; X64:         .seh_pushreg 5
+; X64:         pushq   %rbx
+; X64:         .seh_pushreg 3
+; X64:         subq    $104, %rsp
+; X64:         .seh_stackalloc 104
+; X64:         leaq    96(%rsp), %rbp
+; X64:         .seh_setframe 5, 96
+; X64:         .seh_endprologue
+; X64:         andq    $-32, %rsp
+; X64:         movq    %rsp, %rbx
+;	RBP will reload from this offset.
+; X64:         movq    %rbp, 56(%rbx)
+; X64: 	       movq    $-2, (%rbp)
+
+; X64-LABEL: "?dtor$2@?0?realigned_cleanup@4HA":
+; X64:         movq    %rdx, 16(%rsp)
+; X64:         pushq   %rbp
+; X64:         .seh_pushreg 5
+; X64:         pushq   %rbx
+; X64:         .seh_pushreg 3
+; X64:         subq    $40, %rsp
+; X64:         .seh_stackalloc 40
+; X64:         leaq    96(%rdx), %rbp
+; X64:         .seh_endprologue
+; X64: 	       andq    $-32, %rdx
+; X64: 	       movq    %rdx, %rbx
+; X64-NOT: 	mov{{.*}}, %rbx
+; X64:         popq    %rbp
+; X64:         retq                            # CLEANUPRET
diff --git a/test/CodeGen/X86/clz.ll b/test/CodeGen/X86/clz.ll
index 6a6f5256f44d..4a094480c931 100644
--- a/test/CodeGen/X86/clz.ll
+++ b/test/CodeGen/X86/clz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
 
 declare i8 @llvm.cttz.i8(i8, i1)
 declare i16 @llvm.cttz.i16(i16, i1)
@@ -10,131 +10,151 @@ declare i32 @llvm.ctlz.i32(i32, i1)
 declare i64 @llvm.ctlz.i64(i64, i1)
 
 define i8 @cttz_i8(i8 %x)  {
+; CHECK-LABEL: cttz_i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    bsfl %eax, %eax
+; CHECK-NEXT:    retq
   %tmp = call i8 @llvm.cttz.i8( i8 %x, i1 true )
   ret i8 %tmp
-; CHECK-LABEL: cttz_i8:
-; CHECK: bsfl
-; CHECK-NOT: cmov
-; CHECK: ret
 }
 
 define i16 @cttz_i16(i16 %x)  {
+; CHECK-LABEL: cttz_i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsfw %di, %ax
+; CHECK-NEXT:    retq
   %tmp = call i16 @llvm.cttz.i16( i16 %x, i1 true )
   ret i16 %tmp
-; CHECK-LABEL: cttz_i16:
-; CHECK: bsfw
-; CHECK-NOT: cmov
-; CHECK: ret
 }
 
 define i32 @cttz_i32(i32 %x)  {
+; CHECK-LABEL: cttz_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsfl %edi, %eax
+; CHECK-NEXT:    retq
   %tmp = call i32 @llvm.cttz.i32( i32 %x, i1 true )
   ret i32 %tmp
-; CHECK-LABEL: cttz_i32:
-; CHECK: bsfl
-; CHECK-NOT: cmov
-; CHECK: ret
 }
 
 define i64 @cttz_i64(i64 %x)  {
+; CHECK-LABEL: cttz_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsfq %rdi, %rax
+; CHECK-NEXT:    retq
   %tmp = call i64 @llvm.cttz.i64( i64 %x, i1 true )
   ret i64 %tmp
-; CHECK-LABEL: cttz_i64:
-; CHECK: bsfq
-; CHECK-NOT: cmov
-; CHECK: ret
 }
 
 define i8 @ctlz_i8(i8 %x) {
-entry:
+; CHECK-LABEL: ctlz_i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    bsrl %eax, %eax
+; CHECK-NEXT:    xorl $7, %eax
+; CHECK-NEXT:    retq
   %tmp2 = call i8 @llvm.ctlz.i8( i8 %x, i1 true )
   ret i8 %tmp2
-; CHECK-LABEL: ctlz_i8:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $7,
-; CHECK: ret
 }
 
 define i16 @ctlz_i16(i16 %x) {
-entry:
+; CHECK-LABEL: ctlz_i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsrw %di, %ax
+; CHECK-NEXT:    xorl $15, %eax
+; CHECK-NEXT:    retq
   %tmp2 = call i16 @llvm.ctlz.i16( i16 %x, i1 true )
   ret i16 %tmp2
-; CHECK-LABEL: ctlz_i16:
-; CHECK: bsrw
-; CHECK-NOT: cmov
-; CHECK: xorl $15,
-; CHECK: ret
 }
 
 define i32 @ctlz_i32(i32 %x) {
+; CHECK-LABEL: ctlz_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsrl %edi, %eax
+; CHECK-NEXT:    xorl $31, %eax
+; CHECK-NEXT:    retq
   %tmp = call i32 @llvm.ctlz.i32( i32 %x, i1 true )
   ret i32 %tmp
-; CHECK-LABEL: ctlz_i32:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $31,
-; CHECK: ret
 }
 
 define i64 @ctlz_i64(i64 %x) {
+; CHECK-LABEL: ctlz_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsrq %rdi, %rax
+; CHECK-NEXT:    xorq $63, %rax
+; CHECK-NEXT:    retq
   %tmp = call i64 @llvm.ctlz.i64( i64 %x, i1 true )
   ret i64 %tmp
-; CHECK-LABEL: ctlz_i64:
-; CHECK: bsrq
-; CHECK-NOT: cmov
-; CHECK: xorq $63,
-; CHECK: ret
 }
 
-define i32 @ctlz_i32_cmov(i32 %n) {
-entry:
-; Generate a cmov to handle zero inputs when necessary.
-; CHECK-LABEL: ctlz_i32_cmov:
-; CHECK: bsrl
-; CHECK: cmov
-; CHECK: xorl $31,
-; CHECK: ret
+define i32 @ctlz_i32_zero_test(i32 %n) {
+; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
+
+; CHECK-LABEL: ctlz_i32_zero_test:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je .LBB8_2
+; CHECK-NEXT:  # BB#1: # %cond.false
+; CHECK-NEXT:    bsrl %edi, %eax
+; CHECK-NEXT:    xorl $31, %eax
+; CHECK-NEXT:  .LBB8_2: # %cond.end
+; CHECK-NEXT:    retq
   %tmp1 = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
   ret i32 %tmp1
 }
 
 define i32 @ctlz_i32_fold_cmov(i32 %n) {
-entry:
 ; Don't generate the cmovne when the source is known non-zero (and bsr would
 ; not set ZF).
 ; rdar://9490949
+; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
+;        codegen doesn't know how to delete the movl and je.
+
 ; CHECK-LABEL: ctlz_i32_fold_cmov:
-; CHECK: bsrl
-; CHECK-NOT: cmov
-; CHECK: xorl $31,
-; CHECK: ret
+; CHECK:       # BB#0:
+; CHECK-NEXT:    orl $1, %edi
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    je .LBB9_2
+; CHECK-NEXT:  # BB#1: # %cond.false
+; CHECK-NEXT:    bsrl %edi, %eax
+; CHECK-NEXT:    xorl $31, %eax
+; CHECK-NEXT:  .LBB9_2: # %cond.end
+; CHECK-NEXT:    retq
   %or = or i32 %n, 1
   %tmp1 = call i32 @llvm.ctlz.i32(i32 %or, i1 false)
   ret i32 %tmp1
 }
 
 define i32 @ctlz_bsr(i32 %n) {
-entry:
 ; Don't generate any xors when a 'ctlz' intrinsic is actually used to compute
 ; the most significant bit, which is what 'bsr' does natively.
+
 ; CHECK-LABEL: ctlz_bsr:
-; CHECK: bsrl
-; CHECK-NOT: xorl
-; CHECK: ret
+; CHECK:       # BB#0:
+; CHECK-NEXT:    bsrl %edi, %eax
+; CHECK-NEXT:    retq
   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 true)
   %bsr = xor i32 %ctlz, 31
   ret i32 %bsr
 }
 
-define i32 @ctlz_bsr_cmov(i32 %n) {
-entry:
-; Same as ctlz_bsr, but ensure this happens even when there is a potential
-; zero.
-; CHECK-LABEL: ctlz_bsr_cmov:
-; CHECK: bsrl
-; CHECK-NOT: xorl
-; CHECK: ret
+define i32 @ctlz_bsr_zero_test(i32 %n) {
+; Generate a test and branch to handle zero inputs because bsr/bsf are very slow.
+; FIXME: The compare and branch are produced late in IR (by CodeGenPrepare), and
+;        codegen doesn't know how to combine the $32 and $31 into $63.
+
+; CHECK-LABEL: ctlz_bsr_zero_test:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl $32, %eax
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    je .LBB11_2
+; CHECK-NEXT:  # BB#1: # %cond.false
+; CHECK-NEXT:    bsrl %edi, %eax
+; CHECK-NEXT:    xorl $31, %eax
+; CHECK-NEXT:  .LBB11_2: # %cond.end
+; CHECK-NEXT:    xorl $31, %eax
+; CHECK-NEXT:    retq
   %ctlz = call i32 @llvm.ctlz.i32(i32 %n, i1 false)
   %bsr = xor i32 %ctlz, 31
   ret i32 %bsr
diff --git a/test/CodeGen/X86/cmp.ll b/test/CodeGen/X86/cmp.ll
index 584179aacbc9..eb9a29011428 100644
--- a/test/CodeGen/X86/cmp.ll
+++ b/test/CodeGen/X86/cmp.ll
@@ -211,3 +211,47 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
 ; CHECK:  shrl	$16, %edi
 ; CHECK:  cmpl	%esi, %edi
 }
+
+define i8 @test16(i16 signext %L) {
+  %lshr  = lshr i16 %L, 15
+  %trunc = trunc i16 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test16:
+; CHECK:  testw   %di, %di
+; CHECK:  setns   %al
+}
+
+define i8 @test17(i32 %L) {
+  %lshr  = lshr i32 %L, 31
+  %trunc = trunc i32 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test17:
+; CHECK:  testl   %edi, %edi
+; CHECK:  setns   %al
+}
+
+define i8 @test18(i64 %L) {
+  %lshr  = lshr i64 %L, 63
+  %trunc = trunc i64 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test18:
+; CHECK:  testq   %rdi, %rdi
+; CHECK:  setns   %al
+}
+
+define zeroext i1 @test19(i32 %L) {
+  %lshr  = lshr i32 %L, 31
+  %trunc = trunc i32 %lshr to i1
+  %not   = xor i1 %trunc, 1
+  ret i1 %not
+
+; CHECK-LABEL: test19:
+; CHECK:  testl   %edi, %edi
+; CHECK:  setns   %al
+}
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
index 61123930887b..e21ba2a14cf5 100644
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -1,24 +1,72 @@
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s -check-prefix=i386
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=i386f
 
-declare i32 @bar()
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s -check-prefix=x8664-sahf
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mcpu=corei7 %s -o - | FileCheck %s -check-prefix=x8664-sahf
+
+declare i32 @foo()
+declare i32 @bar(i64)
 
 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
-; CHECK-LABEL: test_intervening_call:
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+; i386-LABEL: test_intervening_call:
+; i386: cmpxchg8b
+; i386-NEXT: pushl %eax
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: popl %eax
+; i386-NEXT: movl %edx, 4(%esp)
+; i386-NEXT: movl %eax, (%esp)
+; i386-NEXT: calll bar
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: jne
 
-; CHECK-NEXT: call[[LQ]] bar
+; i386f-LABEL: test_intervening_call:
+; i386f: cmpxchg8b
+; i386f-NEXT: movl %eax, (%esp)
+; i386f-NEXT: movl %edx, 4(%esp)
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll bar
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_intervening_call:
+; x8664: cmpxchgq
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
+; x8664-NEXT: movq %rax, %rdi
+; x8664-NEXT: callq bar
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+; x8664-NEXT: jne
+
+; x8664-sahf-LABEL: test_intervening_call:
+; x8664-sahf: cmpxchgq
+; x8664-sahf: pushq %rax
+; x8664-sahf-NEXT: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf-NEXT: popq %rax
+; x8664-sahf-NEXT: movq %rax, %rdi
+; x8664-sahf-NEXT: callq bar
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
+; x8664-sahf-NEXT: jne
 
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
-; CHECK-NEXT: jne
   %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+  %v = extractvalue { i64, i1 } %cx, 0
   %p = extractvalue { i64, i1 } %cx, 1
-  call i32 @bar()
+  call i32 @bar(i64 %v)
   br i1 %p, label %t, label %f
 
 t:
@@ -30,10 +78,22 @@ f:
 
 ; Interesting in producing a clobber without any function calls.
 define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) {
-; CHECK-LABEL: test_control_flow:
+; i386-LABEL: test_control_flow:
+; i386: cmpxchg
+; i386-NEXT: jne
+
+; i386f-LABEL: test_control_flow:
+; i386f: cmpxchg
+; i386f-NEXT: jne
+
+; x8664-LABEL: test_control_flow:
+; x8664: cmpxchg
+; x8664-NEXT: jne
+
+; x8664-sahf-LABEL: test_control_flow:
+; x8664-sahf: cmpxchg
+; x8664-sahf-NEXT: jne
 
-; CHECK: cmpxchg
-; CHECK-NEXT: jne
 entry:
   %cmp = icmp sgt i32 %i, %j
   br i1 %cmp, label %loop_start, label %cond.end
@@ -67,20 +127,54 @@ cond.end:
 ; This one is an interesting case because CMOV doesn't have a chain
 ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
 define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) {
-; CHECK-LABEL: test_feed_cmov:
+; i386-LABEL: test_feed_cmov:
+; i386: cmpxchgl
+; i386-NEXT: seto %al
+; i386-NEXT: lahf
+; i386-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386-NEXT: calll foo
+; i386-NEXT: pushl %eax
+; i386-NEXT: movl [[FLAGS]], %eax
+; i386-NEXT: addb $127, %al
+; i386-NEXT: sahf
+; i386-NEXT: popl %eax
 
-; CHECK: cmpxchg
-; CHECK: pushf[[LQ:[lq]]]
-; CHECK-NEXT: pop[[LQ]] [[FLAGS:%.*]]
+; i386f-LABEL: test_feed_cmov:
+; i386f: cmpxchgl
+; i386f-NEXT: seto %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
+; i386f-NEXT: calll foo
+; i386f-NEXT: pushl %eax
+; i386f-NEXT: movl [[FLAGS]], %eax
+; i386f-NEXT: addb $127, %al
+; i386f-NEXT: sahf
+; i386f-NEXT: popl %eax
 
-; CHECK-NEXT: call[[LQ]] bar
+; x8664-LABEL: test_feed_cmov:
+; x8664: cmpxchg
+; x8664: pushfq
+; x8664-NEXT: popq [[FLAGS:%.*]]
+; x8664-NEXT: callq foo
+; x8664-NEXT: pushq [[FLAGS]]
+; x8664-NEXT: popfq
+
+; x8664-sahf-LABEL: test_feed_cmov:
+; x8664-sahf: cmpxchgl
+; x8664-sahf: seto %al
+; x8664-sahf-NEXT: lahf
+; x8664-sahf-NEXT: movq %rax, [[FLAGS:%.*]]
+; x8664-sahf-NEXT: callq foo
+; x8664-sahf-NEXT: pushq %rax
+; x8664-sahf-NEXT: movq [[FLAGS]], %rax
+; x8664-sahf-NEXT: addb $127, %al
+; x8664-sahf-NEXT: sahf
+; x8664-sahf-NEXT: popq %rax
 
-; CHECK-NEXT: push[[LQ]] [[FLAGS]]
-; CHECK-NEXT: popf[[LQ]]
   %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
   %success = extractvalue { i32, i1 } %res, 1
 
-  %rhs = call i32 @bar()
+  %rhs = call i32 @foo()
 
   %ret = select i1 %success, i32 %new, i32 %rhs
   ret i32 %ret
diff --git a/test/CodeGen/X86/coal-sections.ll b/test/CodeGen/X86/coal-sections.ll
new file mode 100644
index 000000000000..05b2a8c8bf87
--- /dev/null
+++ b/test/CodeGen/X86/coal-sections.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+; Check that *coal* sections are not emitted.
+
+; CHECK: .section  __TEXT,__text,regular,pure_instructions{{$}}
+; CHECK-NEXT: .globl  _foo
+
+; CHECK: .section  __TEXT,__const{{$}}
+; CHECK-NEXT: .globl  _a
+
+; CHECK: .section  __DATA,__data{{$}}
+; CHECK-NEXT: .globl  _b
+
+@a = weak_odr constant [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 16
+@b = weak global i32 5, align 4
+@g = common global i32* null, align 8
+
+; Function Attrs: nounwind ssp uwtable
+define weak i32* @foo() {
+entry:
+  store i32* getelementptr inbounds ([4 x i32], [4 x i32]* @a, i64 0, i64 0), i32** @g, align 8
+  ret i32* @b
+}
diff --git a/test/CodeGen/X86/coalescer-win64.ll b/test/CodeGen/X86/coalescer-win64.ll
new file mode 100644
index 000000000000..ff084ae5b9e0
--- /dev/null
+++ b/test/CodeGen/X86/coalescer-win64.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -verify-coalescing | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+@fnptr = external global void ()*
+
+define void @test1() {
+entry:
+  %p = load void ()*, void ()** @fnptr
+  tail call void %p()
+  ret void
+}
+
+; CHECK-LABEL: test1{{$}}
+; CHECK: .seh_proc test1{{$}}
+; CHECK: rex64 jmpq *fnptr(%rip)
+; CHECK: .seh_endproc
diff --git a/test/CodeGen/X86/code_placement_cold_loop_blocks.ll b/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
new file mode 100644
index 000000000000..592d1ce45bb6
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_cold_loop_blocks.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() !prof !1 {
+; Test if a cold block in a loop will be placed at the end of the function
+; chain.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq d
+
+entry:
+  br label %header
+
+header:
+  call void @b()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !4
+
+if.then:
+  call void @c()
+  br label %if.end
+
+if.else:
+  call void @d()
+  br label %if.end
+
+if.end:
+  call void @e()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header, label %end, !prof !5
+
+end:
+  call void @f()
+  ret void
+}
+
+define void @nested_loop_0() !prof !1 {
+; Test if a block that is cold in the inner loop but not cold in the outer loop
+; will merged to the outer loop chain.
+;
+; CHECK-LABEL: nested_loop_0:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq e
+; CHECK: callq b
+; CHECK: callq f
+
+entry:
+  br label %header
+
+header:
+  call void @b()
+  %call4 = call zeroext i1 @a()
+  br i1 %call4, label %header2, label %end
+
+header2:
+  call void @c()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+  call void @d()
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %header2, label %header, !prof !3
+
+if.else:
+  call void @e()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header2, label %header, !prof !3
+
+end:
+  call void @f()
+  ret void
+}
+
+define void @nested_loop_1() !prof !1 {
+; Test if a cold block in an inner loop will be placed at the end of the
+; function chain.
+;
+; CHECK-LABEL: nested_loop_1:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq e
+; CHECK: callq d
+
+entry:
+  br label %header
+
+header:
+  call void @b()
+  br label %header2
+
+header2:
+  call void @c()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %end, label %if.else, !prof !4
+
+if.else:
+  call void @d()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header2, label %header, !prof !5
+
+end:
+  call void @e()
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+declare void @f()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 10}
+!4 = !{!"branch_weights", i32 1000, i32 1}
+!5 = !{!"branch_weights", i32 100, i32 1}
diff --git a/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll b/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
new file mode 100644
index 000000000000..79b4883fb1d6
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() {
+; Test that when determining the edge probability from a node in an inner loop
+; to a node in an outer loop, the weights on edges in the inner loop should be
+; ignored if we are building the chain for the outer loop.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %while.body, label %if.end.1, !prof !1
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.then.1, label %while.cond
+
+if.then.1:
+  call void @d()
+  br label %while.cond
+
+while.cond:
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %while.body, label %if.end
+
+if.end.1:
+  call void @d()
+  br label %if.end
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @bar() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its peer loop, the weights on edges in the first loop should be
+; ignored.
+;
+; CHECK-LABEL: bar:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then, label %while.body, !prof !2
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %while.body, label %if.end, !prof !2
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @par() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its outer loop, the weights on edges in the outer loop should be
+; ignored if we are building the chain for the inner loop.
+;
+; CHECK-LABEL: par:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq b
+
+entry:
+  br label %if.cond
+
+if.cond:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !3
+
+if.then:
+  call void @b()
+  br label %if.end
+
+if.else:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.end, label %exit, !prof !4
+
+if.end:
+  call void @d()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.cond, label %if.end.2, !prof !2
+
+if.end.2:
+  call void @e()
+  br label %if.cond
+
+exit:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+
+!1 = !{!"branch_weights", i32 10, i32 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 100}
+!4 = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/CodeGen/X86/code_placement_loop_rotation.ll b/test/CodeGen/X86/code_placement_loop_rotation.ll
new file mode 100644
index 000000000000..3ec5961486e8
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_loop_rotation.ll
@@ -0,0 +1,80 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -precise-rotation-cost < %s | FileCheck %s -check-prefix=CHECK-PROFILE
+
+define void @foo() {
+; Test that not all edges in the loop chain are fall through without profile
+; data.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq g
+; CHECK: callq h
+
+entry:
+  br label %header
+
+header:
+  call void @e()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+  call void @f()
+  br label %if.end
+
+if.else:
+  call void @g()
+  br label %if.end
+
+if.end:
+  call void @h()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header, label %end
+
+end:
+  ret void
+}
+
+define void @bar() !prof !1 {
+; Test that all edges in the loop chain are fall through with profile data.
+;
+; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq g
+; CHECK-PROFILE: callq h
+; CHECK-PROFILE: callq e
+; CHECK-PROFILE: callq f
+
+entry:
+  br label %header
+
+header:
+  call void @e()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+  call void @f()
+  br label %if.end
+
+if.else:
+  call void @g()
+  br label %if.end
+
+if.end:
+  call void @h()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header, label %end
+
+end:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @e()
+declare void @f()
+declare void @g()
+declare void @h()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 16, i32 16}
diff --git a/test/CodeGen/X86/code_placement_loop_rotation2.ll b/test/CodeGen/X86/code_placement_loop_rotation2.ll
new file mode 100644
index 000000000000..6d8b3c99cd05
--- /dev/null
+++ b/test/CodeGen/X86/code_placement_loop_rotation2.ll
@@ -0,0 +1,122 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux -precise-rotation-cost < %s | FileCheck %s -check-prefix=CHECK-PROFILE
+
+define void @foo() {
+; Test a nested loop case when profile data is not available.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq b
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq e
+; CHECK: callq f
+; CHECK: callq g
+; CHECK: callq h
+
+entry:
+  br label %header
+
+header:
+  call void @b()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+  br label %header2
+
+header2:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then2, label %if.else2, !prof !2
+
+if.then2:
+  call void @d()
+  br label %if.end2
+
+if.else2:
+  call void @e()
+  br label %if.end2
+
+if.end2:
+  call void @f()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header2, label %if.end
+
+if.else:
+  call void @g()
+  br label %if.end
+
+if.end:
+  call void @h()
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %header, label %end
+
+end:
+  ret void
+}
+
+define void @bar() !prof !1 {
+; Test a nested loop case when profile data is available.
+;
+; CHECK-PROFILE-LABEL: bar:
+; CHECK-PROFILE: callq e
+; CHECK-PROFILE: callq f
+; CHECK-PROFILE: callq c
+; CHECK-PROFILE: callq d
+; CHECK-PROFILE: callq h
+; CHECK-PROFILE: callq b
+; CHECK-PROFILE: callq g
+
+entry:
+  br label %header
+
+header:
+  call void @b()
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !2
+
+if.then:
+  br label %header2
+
+header2:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then2, label %if.else2, !prof !2
+
+if.then2:
+  call void @d()
+  br label %if.end2
+
+if.else2:
+  call void @e()
+  br label %if.end2
+
+if.end2:
+  call void @f()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %header2, label %if.end
+
+if.else:
+  call void @g()
+  br label %if.end
+
+if.end:
+  call void @h()
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %header, label %end
+
+end:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+declare void @f()
+declare void @g()
+declare void @h()
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 16, i32 16}
diff --git a/test/CodeGen/X86/codegen-prepare-cast.ll b/test/CodeGen/X86/codegen-prepare-cast.ll
index 1ab8017e8858..c5c2d64f63d8 100644
--- a/test/CodeGen/X86/codegen-prepare-cast.ll
+++ b/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: @_Dmain
 ; CHECK: load i8, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str, i32 0, i32 0)
-; CHECK ret
+; CHECK: ret
 define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
 entry:
         %tmp = getelementptr [7 x i8], [7 x i8]* @.str, i32 0, i32 0              ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/coff-comdat.ll b/test/CodeGen/X86/coff-comdat.ll
index 18f418959ec9..712825a99100 100644
--- a/test/CodeGen/X86/coff-comdat.ll
+++ b/test/CodeGen/X86/coff-comdat.ll
@@ -53,7 +53,7 @@ define x86_fastcallcc void @f8() comdat($f8) {
 $vftable = comdat largest
 
 @some_name = private unnamed_addr constant [2 x i8*] zeroinitializer, comdat($vftable)
-@vftable = alias getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+@vftable = alias i8*, getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
 
 ; CHECK: .section        .text,"xr",discard,_f1
 ; CHECK: .globl  _f1
diff --git a/test/CodeGen/X86/combine-and.ll b/test/CodeGen/X86/combine-and.ll
index bb46ac539171..fddf18d1bdb0 100644
--- a/test/CodeGen/X86/combine-and.ll
+++ b/test/CodeGen/X86/combine-and.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s
 ;
 ; Verify that the DAGCombiner is able to fold a vector AND into a blend
diff --git a/test/CodeGen/X86/combine-avx-intrinsics.ll b/test/CodeGen/X86/combine-avx-intrinsics.ll
index f610f7fcb91e..64e081523c1f 100644
--- a/test/CodeGen/X86/combine-avx-intrinsics.ll
+++ b/test/CodeGen/X86/combine-avx-intrinsics.ll
@@ -19,24 +19,6 @@ define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0) {
 ; CHECK: ret
 
 
-define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
-  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a0, <4 x double> %a1)
-  ret <4 x double> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
-  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a0, <8 x float> %a1)
-  ret <8 x float> %1
-}
-; CHECK-LABEL: test_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
 define <4 x double> @test2_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
   %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 0)
   ret <4 x double> %1
@@ -55,24 +37,6 @@ define <8 x float> @test2_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1)
 ; CHECK: ret
 
 
-define <4 x double> @test2_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
-  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> zeroinitializer)
-  ret <4 x double> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test2_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
-  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> zeroinitializer)
-  ret <8 x float> %1
-}
-; CHECK-LABEL: test2_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
 define <4 x double> @test3_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
   %1 = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 -1)
   ret <4 x double> %1
@@ -91,29 +55,6 @@ define <8 x float> @test3_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1)
 ; CHECK: ret
 
 
-define <4 x double> @test3_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1) {
-  %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <4 x double>
-  %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %Mask)
-  ret <4 x double> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_pd_256
-; CHECK-NOT: vblendvpd
-; CHECK: ret
-
-
-define <8 x float> @test3_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1) {
-  %Mask = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <8 x float>
-  %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %Mask)
-  ret <8 x float> %1
-}
-; CHECK-LABEL: test3_x86_avx_blendv_ps_256
-; CHECK-NOT: vblendvps
-; CHECK: ret
-
-
-
 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32)
 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32)
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
diff --git a/test/CodeGen/X86/combine-avx2-intrinsics.ll b/test/CodeGen/X86/combine-avx2-intrinsics.ll
index 8794f8b86849..2714b26c9141 100644
--- a/test/CodeGen/X86/combine-avx2-intrinsics.ll
+++ b/test/CodeGen/X86/combine-avx2-intrinsics.ll
@@ -3,56 +3,6 @@
 ; Verify that the backend correctly combines AVX2 builtin intrinsics.
 
 
-define <8 x i32> @test_psra_1(<8 x i32> %A) {
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 3)
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
-  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 2)
-  ret <8 x i32> %3
-}
-; CHECK-LABEL: test_psra_1
-; CHECK: vpsrad $8, %ymm0, %ymm0
-; CHECK-NEXT: ret
-
-define <16 x i16> @test_psra_2(<16 x i16> %A) {
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 3)
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 2)
-  ret <16 x i16> %3
-}
-; CHECK-LABEL: test_psra_2
-; CHECK: vpsraw $8, %ymm0, %ymm0
-; CHECK-NEXT: ret
-
-define <16 x i16> @test_psra_3(<16 x i16> %A) {
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
-  ret <16 x i16> %3
-}
-; CHECK-LABEL: test_psra_3
-; CHECK-NOT: vpsraw
-; CHECK: ret
-
-define <8 x i32> @test_psra_4(<8 x i32> %A) {
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
-  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
-  ret <8 x i32> %3
-}
-; CHECK-LABEL: test_psra_4
-; CHECK-NOT: vpsrad
-; CHECK: ret
-
-
-define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
-  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a0, <32 x i8> %a1)
-  ret <32 x i8> %res
-}
-; CHECK-LABEL: test_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) {
   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7)
   ret <16 x i16> %res
@@ -80,15 +30,6 @@ define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) {
 ; CHECK: ret
 
 
-define <32 x i8> @test2_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
-  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> zeroinitializer)
-  ret <32 x i8> %res
-}
-; CHECK-LABEL: test2_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
 define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0)
   ret <16 x i16> %res
@@ -116,16 +57,6 @@ define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK: ret
 
 
-define <32 x i8> @test3_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1) {
-  %1 = bitcast <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1> to <32 x i8>
-  %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %1)
-  ret <32 x i8> %res
-}
-; CHECK-LABEL: test3_x86_avx2_pblendvb
-; CHECK-NOT: vpblendvb
-; CHECK: ret
-
-
 define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1)
   ret <16 x i16> %res
@@ -153,12 +84,7 @@ define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK: ret
 
 
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32)
 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32)
 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32)
-declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>)
-declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32)
-declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>)
-declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32)
 
diff --git a/test/CodeGen/X86/combine-multiplies.ll b/test/CodeGen/X86/combine-multiplies.ll
new file mode 100644
index 000000000000..5e51edbf52f9
--- /dev/null
+++ b/test/CodeGen/X86/combine-multiplies.ll
@@ -0,0 +1,163 @@
+; RUN: llc < %s -mattr=sse2 -mtriple=i386-unknown-linux-gnu | FileCheck %s
+
+; Source file looks something like this:
+;
+; typedef int AAA[100][100];
+;
+; void testCombineMultiplies(AAA a,int lll)
+; {
+;   int LOC = lll + 5;
+;
+;   a[LOC][LOC] = 11;
+;
+;   a[LOC][20] = 22;
+;   a[LOC+20][20] = 33;
+; }
+;
+; We want to make sure we don't generate 2 multiply instructions,
+; one for a[LOC][] and one for a[LOC+20]. visitMUL in DAGCombiner.cpp
+; should combine the instructions in such a way to avoid the extra
+; multiply.
+;
+; Output looks roughly like this:
+;
+;	movl	8(%esp), %eax
+;	movl	12(%esp), %ecx
+;	imull	$400, %ecx, %edx        # imm = 0x190
+;	leal	(%edx,%eax), %esi
+;	movl	$11, 2020(%esi,%ecx,4)
+;	movl	$22, 2080(%edx,%eax)
+;	movl	$33, 10080(%edx,%eax)
+;
+; CHECK-LABEL: testCombineMultiplies
+; CHECK: imull $400, [[ARG1:%[a-z]+]], [[MUL:%[a-z]+]] # imm = 0x190
+; CHECK-NEXT: leal ([[MUL]],[[ARG2:%[a-z]+]]), [[LEA:%[a-z]+]]
+; CHECK-NEXT: movl $11, {{[0-9]+}}([[LEA]],[[ARG1]],4)
+; CHECK-NEXT: movl $22, {{[0-9]+}}([[MUL]],[[ARG2]])
+; CHECK-NEXT: movl $33, {{[0-9]+}}([[MUL]],[[ARG2]])
+; CHECK: retl
+;
+
+; Function Attrs: nounwind
+define void @testCombineMultiplies([100 x i32]* nocapture %a, i32 %lll) {
+entry:
+  %add = add nsw i32 %lll, 5
+  %arrayidx1 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 %add
+  store i32 11, i32* %arrayidx1, align 4
+  %arrayidx3 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add, i32 20
+  store i32 22, i32* %arrayidx3, align 4
+  %add4 = add nsw i32 %lll, 25
+  %arrayidx6 = getelementptr inbounds [100 x i32], [100 x i32]* %a, i32 %add4, i32 20
+  store i32 33, i32* %arrayidx6, align 4
+  ret void
+}
+
+
+; Test for the same optimization on vector multiplies.
+;
+; Source looks something like this:
+;
+; typedef int v4int __attribute__((__vector_size__(16)));
+;
+; v4int x;
+; v4int v2, v3;
+; void testCombineMultiplies_splat(v4int v1) {
+;   v2 = (v1 + (v4int){ 11, 11, 11, 11 }) * (v4int) {22, 22, 22, 22};
+;   v3 = (v1 + (v4int){ 33, 33, 33, 33 }) * (v4int) {22, 22, 22, 22};
+;   x = (v1 + (v4int){ 11, 11, 11, 11 });
+; }
+;
+; Output looks something like this:
+;
+; testCombineMultiplies_splat:                              # @testCombineMultiplies_splat
+; # BB#0:                                 # %entry
+; 	movdqa	.LCPI1_0, %xmm1         # xmm1 = [11,11,11,11]
+; 	paddd	%xmm0, %xmm1
+; 	movdqa	.LCPI1_1, %xmm2         # xmm2 = [22,22,22,22]
+; 	pshufd	$245, %xmm0, %xmm3      # xmm3 = xmm0[1,1,3,3]
+; 	pmuludq	%xmm2, %xmm0
+; 	pshufd	$232, %xmm0, %xmm0      # xmm0 = xmm0[0,2,2,3]
+; 	pmuludq	%xmm2, %xmm3
+; 	pshufd	$232, %xmm3, %xmm2      # xmm2 = xmm3[0,2,2,3]
+; 	punpckldq	%xmm2, %xmm0    # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; 	movdqa	.LCPI1_2, %xmm2         # xmm2 = [242,242,242,242]
+;	paddd	%xmm0, %xmm2
+;	paddd	.LCPI1_3, %xmm0
+;	movdqa	%xmm2, v2
+;	movdqa	%xmm0, v3
+;	movdqa	%xmm1, x
+;	retl
+;
+; Again, we want to make sure we don't generate two different multiplies.
+; We should have a single multiply for "v1 * {22, 22, 22, 22}" (made up of two
+; pmuludq instructions), followed by two adds. Without this optimization, we'd
+; do 2 adds, followed by 2 multiplies (i.e. 4 pmuludq instructions).
+;
+; CHECK-LABEL: testCombineMultiplies_splat
+; CHECK:       movdqa .LCPI1_0, [[C11:%xmm[0-9]]]
+; CHECK-NEXT:  paddd %xmm0, [[C11]]
+; CHECK-NEXT:  movdqa .LCPI1_1, [[C22:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
+; CHECK-NEXT:  pmuludq [[C22]], [[T2:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
+; CHECK-NEXT:  pmuludq [[C22]], [[T4:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $232, [[T4]], [[T5:%xmm[0-9]]]
+; CHECK-NEXT:  punpckldq [[T5]], [[T6:%xmm[0-9]]]
+; CHECK-NEXT:  movdqa .LCPI1_2, [[C242:%xmm[0-9]]]
+; CHECK-NEXT:  paddd [[T6]], [[C242]]
+; CHECK-NEXT:  paddd .LCPI1_3, [[C726:%xmm[0-9]]]
+; CHECK-NEXT:  movdqa [[C242]], v2
+; CHECK-NEXT:  [[C726]], v3
+; CHECK-NEXT:  [[C11]], x
+; CHECK-NEXT:  retl 
+
+@v2 = common global <4 x i32> zeroinitializer, align 16
+@v3 = common global <4 x i32> zeroinitializer, align 16
+@x = common global <4 x i32> zeroinitializer, align 16
+
+; Function Attrs: nounwind
+define void @testCombineMultiplies_splat(<4 x i32> %v1) {
+entry:
+  %add1 = add <4 x i32> %v1, <i32 11, i32 11, i32 11, i32 11>
+  %mul1 = mul <4 x i32> %add1, <i32 22, i32 22, i32 22, i32 22>
+  %add2 = add <4 x i32> %v1, <i32 33, i32 33, i32 33, i32 33>
+  %mul2 = mul <4 x i32> %add2, <i32 22, i32 22, i32 22, i32 22>
+  store <4 x i32> %mul1, <4 x i32>* @v2, align 16
+  store <4 x i32> %mul2, <4 x i32>* @v3, align 16
+  store <4 x i32> %add1, <4 x i32>* @x, align 16
+  ret void
+}
+
+; Finally, check the non-splatted vector case. This is very similar
+; to the previous test case, except for the vector values.
+;
+; CHECK-LABEL: testCombineMultiplies_non_splat
+; CHECK:       movdqa .LCPI2_0, [[C11:%xmm[0-9]]]
+; CHECK-NEXT:  paddd %xmm0, [[C11]]
+; CHECK-NEXT:  movdqa .LCPI2_1, [[C22:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $245, %xmm0, [[T1:%xmm[0-9]]]
+; CHECK-NEXT:  pmuludq [[C22]], [[T2:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $232, [[T2]], [[T3:%xmm[0-9]]]
+; CHECK-NEXT:  pshufd $245, [[C22]], [[T7:%xmm[0-9]]]
+; CHECK-NEXT:  pmuludq [[T1]], [[T7]]
+; CHECK-NEXT:  pshufd $232, [[T7]], [[T5:%xmm[0-9]]]
+; CHECK-NEXT:  punpckldq [[T5]], [[T6:%xmm[0-9]]]
+; CHECK-NEXT:  movdqa .LCPI2_2, [[C242:%xmm[0-9]]]
+; CHECK-NEXT:  paddd [[T6]], [[C242]]
+; CHECK-NEXT:  paddd .LCPI2_3, [[C726:%xmm[0-9]]]
+; CHECK-NEXT:  movdqa [[C242]], v2
+; CHECK-NEXT:  [[C726]], v3
+; CHECK-NEXT:  [[C11]], x
+; CHECK-NEXT:  retl 
+; Function Attrs: nounwind
+define void @testCombineMultiplies_non_splat(<4 x i32> %v1) {
+entry:
+  %add1 = add <4 x i32> %v1, <i32 11, i32 22, i32 33, i32 44>
+  %mul1 = mul <4 x i32> %add1, <i32 22, i32 33, i32 44, i32 55>
+  %add2 = add <4 x i32> %v1, <i32 33, i32 44, i32 55, i32 66>
+  %mul2 = mul <4 x i32> %add2, <i32 22, i32 33, i32 44, i32 55>
+  store <4 x i32> %mul1, <4 x i32>* @v2, align 16
+  store <4 x i32> %mul2, <4 x i32>* @v3, align 16
+  store <4 x i32> %add1, <4 x i32>* @x, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll
index 970f1762c1b8..e17cfbeeee12 100644
--- a/test/CodeGen/X86/combine-or.ll
+++ b/test/CodeGen/X86/combine-or.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
 
 
diff --git a/test/CodeGen/X86/combine-sse2-intrinsics.ll b/test/CodeGen/X86/combine-sse2-intrinsics.ll
deleted file mode 100644
index fa500e5d8d67..000000000000
--- a/test/CodeGen/X86/combine-sse2-intrinsics.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
-
-; Verify that the backend correctly combines SSE2 builtin intrinsics.
-
-
-define <4 x i32> @test_psra_1(<4 x i32> %A) {
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 3)
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
-  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 2)
-  ret <4 x i32> %3
-}
-; CHECK-LABEL: test_psra_1
-; CHECK: psrad $8, %xmm0
-; CHECK-NEXT: ret
-
-define <8 x i16> @test_psra_2(<8 x i16> %A) {
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 3)
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 2)
-  ret <8 x i16> %3
-}
-; CHECK-LABEL: test_psra_2
-; CHECK: psraw $8, %xmm0
-; CHECK-NEXT: ret
-
-define <4 x i32> @test_psra_3(<4 x i32> %A) {
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
-  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %2, i32 0)
-  ret <4 x i32> %3
-}
-; CHECK-LABEL: test_psra_3
-; CHECK-NOT: psrad
-; CHECK: ret
-
-
-define <8 x i16> @test_psra_4(<8 x i16> %A) {
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
-  ret <8 x i16> %3
-}
-; CHECK-LABEL: test_psra_4
-; CHECK-NOT: psraw
-; CHECK: ret
-
-
-declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>)
-declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32)
-declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32)
-
diff --git a/test/CodeGen/X86/combine-sse41-intrinsics.ll b/test/CodeGen/X86/combine-sse41-intrinsics.ll
index 254991aec094..1916883c201b 100644
--- a/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -19,33 +19,6 @@ define <4 x float> @test_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK: ret
 
 
-define <2 x double> @test_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
-  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> zeroinitializer)
-  ret <2 x double> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
-  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> zeroinitializer)
-  ret <4 x float> %1
-}
-; CHECK-LABEL: test_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
-  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> zeroinitializer)
-  ret <16 x i8> %1
-}
-; CHECK-LABEL: test_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
 define <8 x i16> @test_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
   %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 0)
   ret <8 x i16> %1
@@ -75,39 +48,6 @@ define <4 x float> @test2_x86_sse41_blend_ps(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-NEXT: ret
 
 
-define <2 x double> @test2_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
-  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <2 x double>
-  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %Mask )
-  ret <2 x double> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <4 x float> @test2_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
-  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x float>
-  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %Mask)
-  ret <4 x float> %1
-}
-; CHECK-LABEL: test2_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
-define <16 x i8> @test2_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
-  %Mask = bitcast <2 x i64> <i64 -1, i64 -1> to <16 x i8>
-  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %Mask)
-  ret <16 x i8> %1
-}
-; CHECK-LABEL: test2_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: movaps %xmm1, %xmm0
-; CHECK-NEXT: ret
-
-
 define <8 x i16> @test2_x86_sse41_pblend_w(<8 x i16> %a0, <8 x i16> %a1) {
   %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 -1)
   ret <8 x i16> %1
@@ -136,33 +76,6 @@ define <4 x float> @test3_x86_sse41_blend_ps(<4 x float> %a0) {
 ; CHECK: ret
 
 
-define <2 x double> @test3_x86_sse41_blendv_pd(<2 x double> %a0, <2 x double> %a1) {
-  %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a1 )
-  ret <2 x double> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_pd
-; CHECK-NOT: blendvpd
-; CHECK: ret
-
-
-define <4 x float> @test3_x86_sse41_blendv_ps(<4 x float> %a0, <4 x float> %a1) {
-  %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a0, <4 x float> %a1)
-  ret <4 x float> %1
-}
-; CHECK-LABEL: test3_x86_sse41_blendv_ps
-; CHECK-NOT: blendvps
-; CHECK: ret
-
-
-define <16 x i8> @test3_x86_sse41_pblendv_b(<16 x i8> %a0, <16 x i8> %a1) {
-  %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> %a1)
-  ret <16 x i8> %1
-}
-; CHECK-LABEL: test3_x86_sse41_pblendv_b
-; CHECK-NOT: pblendvb
-; CHECK: ret
-
-
 define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
   %1 = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a0, i32 7)
   ret <8 x i16> %1
@@ -174,9 +87,5 @@ define <8 x i16> @test3_x86_sse41_pblend_w(<8 x i16> %a0) {
 
 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32)
 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32)
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32)
-declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>)
 
diff --git a/test/CodeGen/X86/commute-two-addr.ll b/test/CodeGen/X86/commute-two-addr.ll
index 5b01e2f4e90d..656c385e2bc7 100644
--- a/test/CodeGen/X86/commute-two-addr.ll
+++ b/test/CodeGen/X86/commute-two-addr.ll
@@ -39,7 +39,7 @@ define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8
 entry:
 ; DARWIN-LABEL: t3:
 ; DARWIN: shlq $32, %rcx
-; DARWIN-NEXT: leaq (%rax,%rcx), %rax
+; DARWIN-NEXT: orq %rcx, %rax
 ; DARWIN-NEXT: shll $8
 ; DARWIN-NOT: leaq
   %tmp21 = zext i32 %lb to i64
diff --git a/test/CodeGen/X86/constant-hoisting-and.ll b/test/CodeGen/X86/constant-hoisting-and.ll
new file mode 100644
index 000000000000..611445f4a249
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-and.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O3 -march=x86-64 |FileCheck %s
+define i64 @foo(i1 %z, i64 %data1, i64 %data2)
+{
+; If constant 4294967294 is hoisted to a variable, then we won't be able to use
+; the implicit zero extension of 32-bit operations to handle the AND.
+entry:
+  %val1 = and i64 %data1, 4294967294
+  br i1 %z, label %End, label %L_val2
+
+; CHECK: andl    $-2, {{.*}}
+; CHECK: andl    $-2, {{.*}}
+L_val2:
+  %val2 = and i64 %data2, 4294967294
+  br label %End
+
+End:
+  %p1 = phi i64 [%val1,%entry], [%val2,%L_val2]
+  ret i64 %p1
+}
diff --git a/test/CodeGen/X86/constant-hoisting-cmp.ll b/test/CodeGen/X86/constant-hoisting-cmp.ll
new file mode 100644
index 000000000000..4e9e49487287
--- /dev/null
+++ b/test/CodeGen/X86/constant-hoisting-cmp.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -march=x86-64 |FileCheck %s
+define i64 @foo(i64 %data1, i64 %data2, i64 %data3)
+{
+; If constant 4294967295 is hoisted to a variable, then we won't be able to
+; use a shift right by 32 to optimize the compare.
+entry:
+  %val1 = add i64 %data3, 1
+  %x = icmp ugt i64 %data1, 4294967295
+  br i1 %x, label %End, label %L_val2
+
+; CHECK: shrq    $32, {{.*}}
+; CHECK: shrq    $32, {{.*}}
+L_val2:
+  %val2 = add i64 %data3, 2
+  %y = icmp ugt i64 %data2, 4294967295
+  br i1 %y, label %End, label %L_val3
+
+L_val3:
+  %val3 = add i64 %data3, 3
+  br label %End
+
+End:
+  %p1 = phi i64 [%val1,%entry], [%val2,%L_val2], [%val3,%L_val3]
+  ret i64 %p1
+}
diff --git a/test/CodeGen/X86/copysign-constant-magnitude.ll b/test/CodeGen/X86/copysign-constant-magnitude.ll
index 537d6298ddf4..6c577a2cfcc7 100644
--- a/test/CodeGen/X86/copysign-constant-magnitude.ll
+++ b/test/CodeGen/X86/copysign-constant-magnitude.ll
@@ -5,13 +5,13 @@ target triple = "x86_64-apple-macosx10.10.0"
 
 define void @test_copysign_const_magnitude_d(double %X) {
 ; CHECK: [[SIGNMASK:L.+]]:
-; CHECK-NEXT:   .quad -9223372036854775808    ## double -0.000000e+00
-; CHECK-NEXT:   .quad 0                       ## double 0.000000e+00
+; CHECK-NEXT:   .quad -9223372036854775808    ## double -0
+; CHECK-NEXT:   .quad 0                       ## double 0
 ; CHECK: [[ZERO:L.+]]:
 ; CHECK-NEXT:   .space 16
 ; CHECK: [[ONE:L.+]]:
-; CHECK-NEXT:   .quad 4607182418800017408     ## double 1.000000e+00
-; CHECK-NEXT:   .quad 0                       ## double 0.000000e+00
+; CHECK-NEXT:   .quad 4607182418800017408     ## double 1
+; CHECK-NEXT:   .quad 0                       ## double 0
 ; CHECK-LABEL: test_copysign_const_magnitude_d:
 
 ; CHECK: id
@@ -50,17 +50,17 @@ define void @test_copysign_const_magnitude_d(double %X) {
 
 define void @test_copysign_const_magnitude_f(float %X) {
 ; CHECK: [[SIGNMASK:L.+]]:
-; CHECK-NEXT:   .long	2147483648              ## float -0.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	2147483648              ## float -0
+; CHECK-NEXT:   .long	0                       ## float 0
+; CHECK-NEXT:   .long	0                       ## float 0
+; CHECK-NEXT:   .long	0                       ## float 0
 ; CHECK: [[ZERO:L.+]]:
 ; CHECK-NEXT:   .space 16
 ; CHECK: [[ONE:L.+]]:
-; CHECK-NEXT:   .long	1065353216              ## float 1.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
-; CHECK-NEXT:   .long	0                       ## float 0.000000e+00
+; CHECK-NEXT:   .long	1065353216              ## float 1
+; CHECK-NEXT:   .long	0                       ## float 0
+; CHECK-NEXT:   .long	0                       ## float 0
+; CHECK-NEXT:   .long	0                       ## float 0
 ; CHECK-LABEL: test_copysign_const_magnitude_f:
 
 ; CHECK: id
diff --git a/test/CodeGen/X86/cppeh-nounwind.ll b/test/CodeGen/X86/cppeh-nounwind.ll
deleted file mode 100644
index d9bc001a92df..000000000000
--- a/test/CodeGen/X86/cppeh-nounwind.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
-
-; Sometimes invokes of nounwind functions make it through to CodeGen, especially
-; at -O0, where Clang sometimes optimistically annotates functions as nounwind.
-; WinEHPrepare ends up outlining functions, and emitting references to LSDA
-; labels. Make sure we emit the LSDA in that case.
-
-declare i32 @__CxxFrameHandler3(...)
-declare void @nounwind_func() nounwind
-declare void @cleanup()
-
-define void @should_emit_tables() personality i32 (...)* @__CxxFrameHandler3 {
-entry:
-  invoke void @nounwind_func()
-      to label %done unwind label %lpad
-
-done:
-  ret void
-
-lpad:
-  %vals = landingpad { i8*, i32 }
-      cleanup
-  call void @cleanup()
-  resume { i8*, i32 } %vals
-}
-
-; CHECK: _should_emit_tables:
-; CHECK: calll _nounwind_func
-; CHECK: retl
-
-; CHECK: L__ehtable$should_emit_tables:
-
-; CHECK: ___ehhandler$should_emit_tables:
-; CHECK: movl $L__ehtable$should_emit_tables, %eax
-; CHECK: jmp ___CxxFrameHandler3 # TAILCALL
diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll
new file mode 100644
index 000000000000..c229521cc9a4
--- /dev/null
+++ b/test/CodeGen/X86/cxx_tlscc64.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare void @_ZN1SC1Ev(%struct.S*)
+declare void @_ZN1SD1Ev(%struct.S*)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+; Every GPR should be saved - except rdi, rax, and rsp
+; CHECK-LABEL: _ZTW2sg
+; CHECK: pushq %r11
+; CHECK: pushq %r10
+; CHECK: pushq %r9
+; CHECK: pushq %r8
+; CHECK: pushq %rsi
+; CHECK: pushq %rdx
+; CHECK: pushq %rcx
+; CHECK: pushq %rbx
+; CHECK: callq
+; CHECK: jne
+; CHECK: callq
+; CHECK: tlv_atexit
+; CHECK: callq
+; CHECK: popq %rbx
+; CHECK: popq %rcx
+; CHECK: popq %rdx
+; CHECK: popq %rsi
+; CHECK: popq %r8
+; CHECK: popq %r9
+; CHECK: popq %r10
+; CHECK: popq %r11
+; SHRINK-LABEL: _ZTW2sg
+; SHRINK: callq
+; SHRINK: jne
+; SHRINK: pushq %r11
+; SHRINK: pushq %r10
+; SHRINK: pushq %r9
+; SHRINK: pushq %r8
+; SHRINK: pushq %rsi
+; SHRINK: pushq %rdx
+; SHRINK: pushq %rcx
+; SHRINK: pushq %rbx
+; SHRINK: callq
+; SHRINK: tlv_atexit
+; SHRINK: popq %rbx
+; SHRINK: popq %rcx
+; SHRINK: popq %rdx
+; SHRINK: popq %rsi
+; SHRINK: popq %r8
+; SHRINK: popq %r9
+; SHRINK: popq %r10
+; SHRINK: popq %r11
+; SHRINK: LBB{{.*}}:
+; SHRINK: callq
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+  %.b.i = load i1, i1* @__tls_guard, align 1
+  br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+  store i1 true, i1* @__tls_guard, align 1
+  tail call void @_ZN1SC1Ev(%struct.S* nonnull @sg) #2
+  %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (void (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) #2
+  br label %__tls_init.exit
+
+__tls_init.exit:
+  ret %struct.S* @sg
+}
diff --git a/test/CodeGen/X86/dag-fmf-cse.ll b/test/CodeGen/X86/dag-fmf-cse.ll
new file mode 100644
index 000000000000..ac8c5000aba4
--- /dev/null
+++ b/test/CodeGen/X86/dag-fmf-cse.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=fma -enable-unsafe-fp-math -enable-fmf-dag=1 | FileCheck %s
+
+; If fast-math-flags are propagated correctly, the mul1 expression
+; should be recognized as a factor in the last fsub, so we should
+; see a mul and add, not a mul and fma:
+; a * b - (-a * b) ---> (a * b) + (a * b)
+
+define float @fmf_should_not_break_cse(float %a, float %b) {
+; CHECK-LABEL: fmf_should_not_break_cse:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vaddss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+
+  %mul1 = fmul fast float %a, %b
+  %nega = fsub fast float 0.0, %a
+  %mul2 = fmul fast float %nega, %b
+  %abx2 = fsub fast float %mul1, %mul2
+  ret float %abx2
+}
+
diff --git a/test/CodeGen/X86/dag-merge-fast-accesses.ll b/test/CodeGen/X86/dag-merge-fast-accesses.ll
new file mode 100644
index 000000000000..867881d83d3f
--- /dev/null
+++ b/test/CodeGen/X86/dag-merge-fast-accesses.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-slow-unaligned-mem-16 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-unaligned-mem-16 | FileCheck %s --check-prefix=SLOW
+
+; Verify that the DAGCombiner is creating unaligned 16-byte loads and stores
+; if and only if those are fast.
+
+define void @merge_const_vec_store(i64* %ptr) {
+; FAST-LABEL: merge_const_vec_store:
+; FAST:       # BB#0:
+; FAST-NEXT:    xorps %xmm0, %xmm0
+; FAST-NEXT:    movups %xmm0, (%rdi)
+; FAST-NEXT:    retq
+;
+; SLOW-LABEL: merge_const_vec_store:
+; SLOW:       # BB#0:
+; SLOW-NEXT:    movq $0, (%rdi)
+; SLOW-NEXT:    movq $0, 8(%rdi)
+; SLOW-NEXT:    retq
+
+  %idx0 = getelementptr i64, i64* %ptr, i64 0
+  %idx1 = getelementptr i64, i64* %ptr, i64 1
+
+  store i64 0, i64* %idx0, align 8
+  store i64 0, i64* %idx1, align 8
+  ret void
+}
+
+
+define void @merge_vec_element_store(<4 x double> %v, double* %ptr) {
+; FAST-LABEL: merge_vec_element_store:
+; FAST:       # BB#0:
+; FAST-NEXT:    movups %xmm0, (%rdi)
+; FAST-NEXT:    retq
+;
+; SLOW-LABEL: merge_vec_element_store:
+; SLOW:       # BB#0:
+; SLOW-NEXT:    movlpd %xmm0, (%rdi)
+; SLOW-NEXT:    movhpd %xmm0, 8(%rdi)
+; SLOW-NEXT:    retq
+
+  %vecext0 = extractelement <4 x double> %v, i32 0
+  %vecext1 = extractelement <4 x double> %v, i32 1
+
+  %idx0 = getelementptr double, double* %ptr, i64 0
+  %idx1 = getelementptr double, double* %ptr, i64 1
+
+  store double %vecext0, double* %idx0, align 8
+  store double %vecext1, double* %idx1, align 8
+  ret void
+}
+
+
+;; TODO: FAST *should* be:
+;;    movups (%rdi), %xmm0
+;;    movups %xmm0, 40(%rdi)
+;; ..but is not currently. See the UseAA FIXME in DAGCombiner.cpp
+;; visitSTORE.
+
+define void @merge_vec_load_and_stores(i64 *%ptr) {
+; FAST-LABEL: merge_vec_load_and_stores:
+; FAST:       # BB#0:
+; FAST-NEXT:    movq (%rdi), %rax
+; FAST-NEXT:    movq 8(%rdi), %rcx
+; FAST-NEXT:    movq %rax, 40(%rdi)
+; FAST-NEXT:    movq %rcx, 48(%rdi)
+; FAST-NEXT:    retq
+;
+; SLOW-LABEL: merge_vec_load_and_stores:
+; SLOW:       # BB#0:
+; SLOW-NEXT:    movq (%rdi), %rax
+; SLOW-NEXT:    movq 8(%rdi), %rcx
+; SLOW-NEXT:    movq %rax, 40(%rdi)
+; SLOW-NEXT:    movq %rcx, 48(%rdi)
+; SLOW-NEXT:    retq
+
+  %idx0 = getelementptr i64, i64* %ptr, i64 0
+  %idx1 = getelementptr i64, i64* %ptr, i64 1
+
+  %ld0 = load i64, i64* %idx0, align 4
+  %ld1 = load i64, i64* %idx1, align 4
+
+  %idx4 = getelementptr i64, i64* %ptr, i64 5
+  %idx5 = getelementptr i64, i64* %ptr, i64 6
+
+  store i64 %ld0, i64* %idx4, align 4
+  store i64 %ld1, i64* %idx5, align 4
+  ret void
+}
+
diff --git a/test/CodeGen/X86/darwin-tls.ll b/test/CodeGen/X86/darwin-tls.ll
new file mode 100644
index 000000000000..ca9a998ccc75
--- /dev/null
+++ b/test/CodeGen/X86/darwin-tls.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+@a = thread_local global i32 4, align 4
+
+define i32 @f2(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) {
+entry:
+; Parameters are in %edi, %esi, %edx, %ecx, %r8d, there is no need to save
+; these parameters except the one in %edi, before making the TLS call.
+; %edi is used to pass parameter to the TLS call.
+; CHECK-NOT: movl %r8d
+; CHECK-NOT: movl %ecx
+; CHECK-NOT: movl %edx
+; CHECK-NOT: movl %esi
+; CHECK: movq {{.*}}TLVP{{.*}}, %rdi
+; CHECK-NEXT: callq
+; CHECK-NEXT: movl (%rax),
+; CHECK-NOT: movl {{.*}}, %esi
+; CHECK-NOT: movl {{.*}}, %edx
+; CHECK-NOT: movl {{.*}}, %ecx
+; CHECK-NOT: movl {{.*}}, %r8d
+; CHECK: callq
+  %0 = load i32, i32* @a, align 4
+  %call = tail call i32 @f3(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5)
+  %add = add nsw i32 %call, %0
+  ret i32 %add
+}
+
+declare i32 @f3(i32, i32, i32, i32, i32)
diff --git a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
index 20d0129c3e89..54bd48926834 100644
--- a/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen-branch-folding.ll
@@ -48,7 +48,7 @@
 @.str2 = private unnamed_addr constant [2 x i8] c"-\00", align 1
 
 ; Function Attrs: uwtable
-define void @_Z3barii(i32 %param1, i32 %param2) #0 {
+define void @_Z3barii(i32 %param1, i32 %param2) #0 !dbg !24 {
 entry:
   %var1 = alloca %struct.AAA3, align 1
   %var2 = alloca %struct.AAA3, align 1
@@ -113,7 +113,7 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!44, !45}
 !llvm.ident = !{!46}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !23, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !23, globals: !2, imports: !2)
 !1 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -137,26 +137,26 @@ attributes #2 = { nounwind readnone }
 !21 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !22)
 !22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !"_ZTS4AAA3")
 !23 = !{!24, !35, !40}
-!24 = !DISubprogram(name: "bar", linkageName: "_Z3barii", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !25, type: !26, function: void (i32, i32)* @_Z3barii, variables: !29)
+!24 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barii", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !25, type: !26, variables: !29)
 !25 = !DIFile(filename: "dbg-changes-codegen-branch-folding.cpp", directory: "/tmp/dbginfo")
 !26 = !DISubroutineType(types: !27)
 !27 = !{null, !28, !28}
 !28 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !29 = !{!30, !31, !32, !33, !34}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param1", line: 11, arg: 1, scope: !24, file: !25, type: !28)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "param2", line: 11, arg: 2, scope: !24, file: !25, type: !28)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 12, scope: !24, file: !25, type: !15)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var1", line: 17, scope: !24, file: !25, type: !"_ZTS4AAA3")
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var2", line: 18, scope: !24, file: !25, type: !"_ZTS4AAA3")
-!35 = !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !17, variables: !36)
+!30 = !DILocalVariable(name: "param1", line: 11, arg: 1, scope: !24, file: !25, type: !28)
+!31 = !DILocalVariable(name: "param2", line: 11, arg: 2, scope: !24, file: !25, type: !28)
+!32 = !DILocalVariable(name: "temp", line: 12, scope: !24, file: !25, type: !15)
+!33 = !DILocalVariable(name: "var1", line: 17, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!34 = !DILocalVariable(name: "var2", line: 18, scope: !24, file: !25, type: !"_ZTS4AAA3")
+!35 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN4AAA3aSEPKc", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !17, variables: !36)
 !36 = !{!37, !39}
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!37 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4AAA3")
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
-!40 = !DISubprogram(name: "AAA3", linkageName: "_ZN4AAA3C2EPKc", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !11, variables: !41)
+!39 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!40 = distinct !DISubprogram(name: "AAA3", linkageName: "_ZN4AAA3C2EPKc", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !"_ZTS4AAA3", type: !12, declaration: !11, variables: !41)
 !41 = !{!42, !43}
-!42 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
-!43 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!42 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!43 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
 !44 = !{i32 2, !"Dwarf Version", i32 4}
 !45 = !{i32 2, !"Debug Info Version", i32 3}
 !46 = !{!"clang version 3.5.0 "}
@@ -169,36 +169,36 @@ attributes #2 = { nounwind readnone }
 !53 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !51)
 !54 = !DILocation(line: 16, scope: !53)
 !55 = !DILocation(line: 17, scope: !24)
-!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!56 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
 !57 = !DILocation(line: 0, scope: !40, inlinedAt: !55)
 !58 = !{i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0)}
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!59 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
 !60 = !DILocation(line: 5, scope: !40, inlinedAt: !55)
 !61 = !DILocation(line: 5, scope: !62, inlinedAt: !55)
 !62 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !40)
 !63 = !DILocation(line: 18, scope: !24)
-!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
+!64 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !40, type: !38)
 !65 = !DILocation(line: 0, scope: !40, inlinedAt: !63)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
+!66 = !DILocalVariable(name: "value", line: 5, arg: 2, scope: !40, file: !25, type: !15)
 !67 = !DILocation(line: 5, scope: !40, inlinedAt: !63)
 !68 = !DILocation(line: 5, scope: !62, inlinedAt: !63)
 !69 = !DILocation(line: 20, scope: !70)
 !70 = distinct !DILexicalBlock(line: 20, column: 0, file: !1, scope: !24)
-!71 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!71 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !72 = !DILocation(line: 21, scope: !70)
 !73 = !DILocation(line: 0, scope: !35, inlinedAt: !72)
 !74 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str1, i64 0, i64 0)}
-!75 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!75 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !76 = !DILocation(line: 6, scope: !35, inlinedAt: !72)
-!77 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!77 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !78 = !DILocation(line: 23, scope: !70)
 !79 = !DILocation(line: 0, scope: !35, inlinedAt: !78)
 !80 = !{i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str2, i64 0, i64 0)}
-!81 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!81 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !82 = !DILocation(line: 6, scope: !35, inlinedAt: !78)
-!83 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
+!83 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, type: !38)
 !84 = !DILocation(line: 24, scope: !24)
 !85 = !DILocation(line: 0, scope: !35, inlinedAt: !84)
-!86 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
+!86 = !DILocalVariable(name: "value", line: 6, arg: 2, scope: !35, file: !25, type: !15)
 !87 = !DILocation(line: 6, scope: !35, inlinedAt: !84)
 !88 = !DILocation(line: 25, scope: !24)
diff --git a/test/CodeGen/X86/dbg-changes-codegen.ll b/test/CodeGen/X86/dbg-changes-codegen.ll
index b15e4bd4bf2d..bee86b4617c7 100644
--- a/test/CodeGen/X86/dbg-changes-codegen.ll
+++ b/test/CodeGen/X86/dbg-changes-codegen.ll
@@ -44,7 +44,7 @@
 define zeroext i1 @_ZN3Foo3batEv(%struct.Foo* %this) #0 align 2 {
 entry:
   %0 = load %struct.Foo*, %struct.Foo** @pfoo, align 8
-  tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+  tail call void @llvm.dbg.value(metadata %struct.Foo* %0, i64 0, metadata !62, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   %cmp.i = icmp eq %struct.Foo* %0, %this
   ret i1 %cmp.i
 }
@@ -53,7 +53,7 @@ entry:
 define void @_Z3bazv() #1 {
 entry:
   %0 = load %struct.Wibble*, %struct.Wibble** @wibble1, align 8
-  tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !DIExpression()), !dbg !DILocation(scope: !DISubprogram())
+  tail call void @llvm.dbg.value(metadata %struct.Flibble* undef, i64 0, metadata !65, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   %1 = load %struct.Wibble*, %struct.Wibble** @wibble2, align 8
   %cmp.i = icmp ugt %struct.Wibble* %1, %0
   br i1 %cmp.i, label %if.then.i, label %_ZN7Flibble3barEP6Wibble.exit
@@ -75,9 +75,10 @@ attributes #0 = { nounwind readonly uwtable "less-precise-fpmad"="false" "no-fra
 attributes #1 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-frame-pointer-elim-non-leaf"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #2 = { nounwind readnone }
 
+!1 = distinct !DISubprogram()
 
 !17 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: null)
 !45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "arg", line: 4, arg: 2, scope: !DISubprogram(), type: !17)
+!62 = !DILocalVariable(name: "arg", line: 4, arg: 2, scope: !1, type: !17)
 !64 = !{%struct.Flibble* undef}
-!65 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !DISubprogram(), type: !45)
+!65 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !1, type: !45)
diff --git a/test/CodeGen/X86/dbg-combine.ll b/test/CodeGen/X86/dbg-combine.ll
index 5eb2ea9df513..3e78c316a06f 100644
--- a/test/CodeGen/X86/dbg-combine.ll
+++ b/test/CodeGen/X86/dbg-combine.ll
@@ -24,7 +24,7 @@
 
 ; ModuleID = 'dbg-combine.c'
 ; Function Attrs: nounwind uwtable
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
 entry:
   %elems = alloca i32, align 4
   %saved_stack = alloca i8*
@@ -74,11 +74,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227074)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227074)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "dbg-combine.c", directory: "/home/probinson/projects/scratch")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
@@ -86,12 +86,12 @@ attributes #2 = { nounwind }
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.7.0 (trunk 227074)"}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "elems", line: 3, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "elems", line: 3, scope: !4, file: !5, type: !8)
 !13 = !DIExpression()
 !14 = !DILocation(line: 3, column: 8, scope: !4)
 !15 = !DILocation(line: 4, column: 15, scope: !4)
 !16 = !DILocation(line: 4, column: 4, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array1", line: 4, scope: !4, file: !5, type: !18)
+!17 = !DILocalVariable(name: "array1", line: 4, scope: !4, file: !5, type: !18)
 !18 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !8, elements: !19)
 !19 = !{!20}
 !20 = !DISubrange(count: -1)
@@ -105,7 +105,7 @@ attributes #2 = { nounwind }
 !28 = !DILocation(line: 7, column: 13, scope: !4)
 !29 = !DILocation(line: 8, column: 15, scope: !4)
 !30 = !DILocation(line: 8, column: 4, scope: !4)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array2", line: 8, scope: !4, file: !5, type: !18)
+!31 = !DILocalVariable(name: "array2", line: 8, scope: !4, file: !5, type: !18)
 !32 = !DILocation(line: 8, column: 8, scope: !4)
 !33 = !DILocation(line: 9, column: 4, scope: !4)
 !34 = !DILocation(line: 9, column: 13, scope: !4)
diff --git a/test/CodeGen/X86/debugloc-argsize.ll b/test/CodeGen/X86/debugloc-argsize.ll
new file mode 100644
index 000000000000..0283154abab2
--- /dev/null
+++ b/test/CodeGen/X86/debugloc-argsize.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+; CHECK-LABEL: _Z3foov:
+; CHECK: .loc    1 4 3 prologue_end
+; CHECK: .cfi_escape 0x2e, 0x10
+define void @_Z3foov() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !4 {
+entry:
+  tail call void @_Z3bariii(i32 0, i32 1, i32 2) #1, !dbg !10
+  invoke void @_Z3bariii(i32 4, i32 5, i32 6) #1
+          to label %try.cont unwind label %lpad, !dbg !11
+
+lpad:                                             ; preds = %entry
+  %0 = landingpad { i8*, i32 }
+          catch i8* null, !dbg !13
+  %1 = extractvalue { i8*, i32 } %0, 0, !dbg !13
+  %2 = tail call i8* @__cxa_begin_catch(i8* %1) #2, !dbg !14
+  tail call void @__cxa_end_catch(), !dbg !15
+  br label %try.cont, !dbg !15
+
+try.cont:                                         ; preds = %entry, %lpad
+  ret void, !dbg !17
+}
+
+; Function Attrs: optsize
+declare void @_Z3bariii(i32, i32, i32) #0
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { optsize }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 249520)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.cpp", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 249520)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 6, column: 5, scope: !12)
+!12 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 7)
+!13 = !DILocation(line: 10, column: 1, scope: !12)
+!14 = !DILocation(line: 7, column: 3, scope: !12)
+!15 = !DILocation(line: 9, column: 3, scope: !16)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 7, column: 17)
+!17 = !DILocation(line: 10, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index fd07a3f55100..9543d6c4d749 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -94,3 +94,35 @@ define i8 @test9(i8 %x) nounwind {
 ; CHECK: shrl $11
 ; CHECK: ret
 }
+
+define i32 @testsize1(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize1:
+; CHECK: divl
+}
+
+define i32 @testsize2(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize2:
+; CHECK: divl
+}
+
+define i32 @testsize3(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize3:
+; CHECK: shrl
+}
+
+define i32 @testsize4(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize4:
+; CHECK: divl
+}
diff --git a/test/CodeGen/X86/dllexport-x86_64.ll b/test/CodeGen/X86/dllexport-x86_64.ll
index bb5e92f98c7d..58e25f923971 100644
--- a/test/CodeGen/X86/dllexport-x86_64.ll
+++ b/test/CodeGen/X86/dllexport-x86_64.ll
@@ -53,22 +53,22 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .globl alias
 ; CHECK: alias = notExported
-@alias = dllexport alias void()* @notExported
+@alias = dllexport alias void(), void()* @notExported
 
 ; CHECK: .globl alias2
 ; CHECK: alias2 = f1
-@alias2 = dllexport alias void()* @f1
+@alias2 = dllexport alias void(), void()* @f1
 
 ; CHECK: .globl alias3
 ; CHECK: alias3 = notExported
-@alias3 = dllexport alias void()* @notExported
+@alias3 = dllexport alias void(), void()* @notExported
 
 ; CHECK: .weak weak_alias
 ; CHECK: weak_alias = f1
-@weak_alias = weak_odr dllexport alias void()* @f1
+@weak_alias = weak_odr dllexport alias void(), void()* @f1
 
 @blob = global [6 x i8] c"\B8*\00\00\00\C3", section ".text", align 16
-@blob_alias = dllexport alias bitcast ([6 x i8]* @blob to i32 ()*)
+@blob_alias = dllexport alias i32 (), bitcast ([6 x i8]* @blob to i32 ()*)
 
 ; CHECK: .section .drectve
 ; WIN32: /EXPORT:f1
diff --git a/test/CodeGen/X86/dllexport.ll b/test/CodeGen/X86/dllexport.ll
index 915567de5bf7..cde0955410b7 100644
--- a/test/CodeGen/X86/dllexport.ll
+++ b/test/CodeGen/X86/dllexport.ll
@@ -74,19 +74,19 @@ define weak_odr dllexport void @weak1() {
 
 ; CHECK: .globl _alias
 ; CHECK: _alias = _notExported
-@alias = dllexport alias void()* @notExported
+@alias = dllexport alias void(), void()* @notExported
 
 ; CHECK: .globl _alias2
 ; CHECK: _alias2 = _f1
-@alias2 = dllexport alias void()* @f1
+@alias2 = dllexport alias void(), void()* @f1
 
 ; CHECK: .globl _alias3
 ; CHECK: _alias3 = _notExported
-@alias3 = dllexport alias void()* @notExported
+@alias3 = dllexport alias void(), void()* @notExported
 
 ; CHECK: .weak _weak_alias
 ; CHECK: _weak_alias = _f1
-@weak_alias = weak_odr dllexport alias void()* @f1
+@weak_alias = weak_odr dllexport alias void(), void()* @f1
 
 ; CHECK: .section .drectve
 ; CHECK-CL-NOT: not_exported
diff --git a/test/CodeGen/X86/dwarf-comp-dir.ll b/test/CodeGen/X86/dwarf-comp-dir.ll
index 27b8b1552ec1..31d2724aade3 100644
--- a/test/CodeGen/X86/dwarf-comp-dir.ll
+++ b/test/CodeGen/X86/dwarf-comp-dir.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !7, subprograms: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !7, subprograms: !2, globals: !2)
 !2 = !{}
 !3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
 !4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
diff --git a/test/CodeGen/X86/dynamic-allocas-VLAs.ll b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
index 2925f243b0e3..b0334d6a63ef 100644
--- a/test/CodeGen/X86/dynamic-allocas-VLAs.ll
+++ b/test/CodeGen/X86/dynamic-allocas-VLAs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mcpu=generic -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
-; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
+; RUN: llc < %s -mcpu=generic -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s -check-prefix=FORCE-ALIGN
 ; rdar://11496434
 
 ; no VLAs or dynamic alignment
diff --git a/test/CodeGen/X86/eh-null-personality.ll b/test/CodeGen/X86/eh-null-personality.ll
new file mode 100644
index 000000000000..536f060db8d9
--- /dev/null
+++ b/test/CodeGen/X86/eh-null-personality.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; We should treat non-Function personalities as the unknown personality, which
+; is usually Itanium.
+
+declare void @g()
+declare void @terminate(i8*)
+
+define void @f() personality i8* null {
+  invoke void @g()
+    to label %ret unwind label %lpad
+ret:
+  ret void
+lpad:
+  %vals = landingpad { i8*, i32 } catch i8* null
+  %ptr = extractvalue { i8*, i32 } %vals, 0
+  call void @terminate(i8* %ptr)
+  unreachable
+}
+
+; CHECK: f:
+; CHECK: callq g
+; CHECK: retq
+; CHECK: movq %rax, %rdi
+; CHECK: callq terminate
diff --git a/test/CodeGen/X86/eh_frame.ll b/test/CodeGen/X86/eh_frame.ll
index 3b792b235cb5..0472e773df56 100644
--- a/test/CodeGen/X86/eh_frame.ll
+++ b/test/CodeGen/X86/eh_frame.ll
@@ -7,8 +7,8 @@
 @bar1 = constant i8* bitcast (i32* @foo to i8*), section "my_bar1", align 8
 
 
-; STATIC: .section	.eh_frame,"a",@progbits
+; STATIC: .section	.eh_frame,"a",@unwind
 ; STATIC: .section	my_bar1,"a",@progbits
 
-; PIC:	.section	.eh_frame,"a",@progbits
+; PIC:	.section	.eh_frame,"a",@unwind
 ; PIC:	.section	my_bar1,"aw",@progbits
diff --git a/test/CodeGen/X86/emutls-pic.ll b/test/CodeGen/X86/emutls-pic.ll
new file mode 100644
index 000000000000..11676aff1892
--- /dev/null
+++ b/test/CodeGen/X86/emutls-pic.ll
@@ -0,0 +1,168 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-android -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32:      movl my_emutls_v_xyz@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll my_emutls_get_address@PLT
+; X64-LABEL: my_get_xyz:
+; X64:      movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq my_emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+@i = thread_local global i32 15
+@j = internal thread_local global i32 42
+@k = internal thread_local global i32 0, align 8
+
+define i32 @f1() {
+entry:
+  %tmp1 = load i32, i32* @i
+  ret i32 %tmp1
+}
+
+; X32-LABEL: f1:
+; X32:      movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f1:
+; X64:      movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+  ret i32* @i
+}
+
+; X32-LABEL: f2:
+; X64-LABEL: f2:
+
+
+define i32 @f3() {
+entry:
+  %tmp1 = load i32, i32* @i  ; <i32> [#uses=1]
+  ret i32 %tmp1
+}
+
+; X32-LABEL: f3:
+; X64-LABEL: f3:
+
+
+define i32* @f4() nounwind {
+entry:
+  ret i32* @i
+}
+
+; X32-LABEL: f4:
+; X64-LABEL: f4:
+
+
+define i32 @f5() nounwind {
+entry:
+  %0 = load i32, i32* @j, align 4
+  %1 = load i32, i32* @k, align 4
+  %add = add nsw i32 %0, %1
+  ret i32 %add
+}
+
+; X32-LABEL: f5:
+; X32:      movl __emutls_v.j@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %esi
+; X32-NEXT: movl __emutls_v.k@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: addl (%eax), %esi
+; X32-NEXT: movl %esi, %eax
+
+; X64-LABEL: f5:
+; X64:      movq __emutls_v.j@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %ebx
+; X64-NEXT: movq __emutls_v.k@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: addl (%rax), %ebx
+; X64-NEXT: movl %ebx, %eax
+
+;;;;; 32-bit targets
+
+; X32:      .data
+; X32-LABEL: __emutls_v.i:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i
+
+; X32:      .section .rodata,
+; X32-LABEL: __emutls_t.i:
+; X32-NEXT: .long 15
+
+; X32:      .data
+; X32-LABEL: __emutls_v.j:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.j
+
+; X32:      .section .rodata,
+; X32-LABEL: __emutls_t.j:
+; X32-NEXT: .long 42
+
+; X32:      .data
+; X32-LABEL: __emutls_v.k:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 8
+; X32-NEXT: .long 0
+; X32-NEXT: .long 0
+
+; X32-NOT:   __emutls_t.k:
+
+;;;;; 64-bit targets
+
+; X64:      .data
+; X64-LABEL: __emutls_v.i:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i
+
+; X64:      .section .rodata,
+; X64-LABEL: __emutls_t.i:
+; X64-NEXT: .long 15
+
+; X64:      .data
+; X64-LABEL: __emutls_v.j:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.j
+
+; X64:      .section .rodata,
+; X64-LABEL: __emutls_t.j:
+; X64-NEXT: .long 42
+
+; X64:      .data
+; X64-LABEL: __emutls_v.k:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 8
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad 0
+
+; X64-NOT:   __emutls_t.k:
diff --git a/test/CodeGen/X86/emutls-pie.ll b/test/CodeGen/X86/emutls-pie.ll
new file mode 100644
index 000000000000..45e5c38c0d8a
--- /dev/null
+++ b/test/CodeGen/X86/emutls-pie.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-gnu -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mcpu=generic -mtriple=i386-linux-android -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mcpu=generic -mtriple=x86_64-linux-android -relocation-model=pic -enable-pie \
+; RUN:   | FileCheck -check-prefix=X64 %s
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32:      movl my_emutls_v_xyz@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll my_emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+; X64-LABEL: my_get_xyz:
+; X64:      movq my_emutls_v_xyz@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq my_emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+@i = thread_local global i32 15
+@i2 = external thread_local global i32
+
+define i32 @f1() {
+; X32-LABEL: f1:
+; X32:      movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
+; X64-LABEL: f1:
+; X64:      movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+; X64-NEXT: movl (%rax), %eax
+; X64-NEXT: popq %rcx
+; X64-NEXT: retq
+
+entry:
+  %tmp1 = load i32, i32* @i
+  ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32-LABEL: f2:
+; X32:      movl __emutls_v.i@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f2:
+; X64:      movq __emutls_v.i@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+  ret i32* @i
+}
+
+define i32 @f3() {
+; X32-LABEL: f3:
+; X32:      movl __emutls_v.i2@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f3:
+; X64:      movq __emutls_v.i2@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+  %tmp1 = load i32, i32* @i2
+  ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32-LABEL: f4:
+; X32:      movl __emutls_v.i2@GOT(%ebx), %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: calll __emutls_get_address@PLT
+; X64-LABEL: f4:
+; X64:      movq __emutls_v.i2@GOTPCREL(%rip), %rdi
+; X64-NEXT: callq __emutls_get_address@PLT
+
+entry:
+  ret i32* @i2
+}
+
+;;;;; 32-bit targets
+
+; X32:      .data
+; X32-LABEL: __emutls_v.i:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i
+
+; X32:      .section .rodata,
+; X32-LABEL: __emutls_t.i:
+; X32-NEXT: .long 15
+
+; X32-NOT:   __emutls_v.i2
+; X32-NOT:   __emutls_t.i2
+
+;;;;; 64-bit targets
+
+; X64:      .data
+; X64-LABEL: __emutls_v.i:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i
+
+; X64:      .section .rodata,
+; X64-LABEL: __emutls_t.i:
+; X64-NEXT: .long 15
+
+; X64-NOT:   __emutls_v.i2
+; X64-NOT:   __emutls_t.i2
diff --git a/test/CodeGen/X86/emutls.ll b/test/CodeGen/X86/emutls.ll
new file mode 100644
index 000000000000..9266fe962df2
--- /dev/null
+++ b/test/CodeGen/X86/emutls.ll
@@ -0,0 +1,347 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-gnu | FileCheck -check-prefix=X64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86-linux-android | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android | FileCheck -check-prefix=X64 %s
+
+; Copied from tls.ll; emulated TLS model is not implemented
+; for *-pc-win32 and *-pc-winows targets yet.
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+; X32-LABEL: my_get_xyz:
+; X32:         movl $my_emutls_v_xyz, (%esp)
+; X32-NEXT:    calll my_emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+; X64-LABEL: my_get_xyz:
+; X64:         movl $my_emutls_v_xyz, %edi
+; X64-NEXT:    callq my_emutls_get_address
+; X64-NEXT:    movl (%rax), %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+@i1 = thread_local global i32 15
+@i2 = external thread_local global i32
+@i3 = internal thread_local global i32 15
+@i4 = hidden thread_local global i32 15
+@i5 = external hidden thread_local global i32
+@s1 = thread_local global i16 15
+@b1 = thread_local global i8 0
+
+define i32 @f1() {
+; X32-LABEL: f1:
+; X32:         movl $__emutls_v.i1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+; X64-LABEL: f1:
+; X64:         movl $__emutls_v.i1, %edi
+; X64-NEXT:    callq __emutls_get_address
+; X64-NEXT:    movl (%rax), %eax
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+
+entry:
+  %tmp1 = load i32, i32* @i1
+  ret i32 %tmp1
+}
+
+define i32* @f2() {
+; X32-LABEL: f2:
+; X32:         movl $__emutls_v.i1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+; X64-LABEL: f2:
+; X64:         movl $__emutls_v.i1, %edi
+; X64-NEXT:    callq __emutls_get_address
+; X64-NEXT:    popq %rcx
+; X64-NEXT:    retq
+
+entry:
+  ret i32* @i1
+}
+
+define i32 @f3() nounwind {
+; X32-LABEL: f3:
+; X32:         movl $__emutls_v.i2, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i32, i32* @i2
+  ret i32 %tmp1
+}
+
+define i32* @f4() {
+; X32-LABEL: f4:
+; X32:         movl $__emutls_v.i2, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  ret i32* @i2
+}
+
+define i32 @f5() nounwind {
+; X32-LABEL: f5:
+; X32:         movl $__emutls_v.i3, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i32, i32* @i3
+  ret i32 %tmp1
+}
+
+define i32* @f6() {
+; X32-LABEL: f6:
+; X32:         movl $__emutls_v.i3, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  ret i32* @i3
+}
+
+define i32 @f7() {
+; X32-LABEL: f7:
+; X32:         movl $__emutls_v.i4, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i32, i32* @i4
+  ret i32 %tmp1
+}
+
+define i32* @f8() {
+; X32-LABEL: f8:
+; X32:         movl $__emutls_v.i4, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  ret i32* @i4
+}
+
+define i32 @f9() {
+; X32-LABEL: f9:
+; X32:         movl $__emutls_v.i5, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i32, i32* @i5
+  ret i32 %tmp1
+}
+
+define i32* @f10() {
+; X32-LABEL: f10:
+; X32:         movl $__emutls_v.i5, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  ret i32* @i5
+}
+
+define i16 @f11() {
+; X32-LABEL: f11:
+; X32:         movl $__emutls_v.s1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movzwl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i16, i16* @s1
+  ret i16 %tmp1
+}
+
+define i32 @f12() {
+; X32-LABEL: f12:
+; X32:         movl $__emutls_v.s1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movswl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i16, i16* @s1
+  %tmp2 = sext i16 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+define i8 @f13() {
+; X32-LABEL: f13:
+; X32:         movl $__emutls_v.b1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movb (%eax), %al
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i8, i8* @b1
+  ret i8 %tmp1
+}
+
+define i32 @f14() {
+; X32-LABEL: f14:
+; X32:         movl $__emutls_v.b1, (%esp)
+; X32-NEXT:    calll __emutls_get_address
+; X32-NEXT:    movsbl (%eax), %eax
+; X32-NEXT:    addl $12, %esp
+; X32-NEXT:    retl
+
+entry:
+  %tmp1 = load i8, i8* @b1
+  %tmp2 = sext i8 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+;;;;;;;;;;;;;; 32-bit __emutls_v. and __emutls_t.
+
+; X32       .section .data.rel.local,
+; X32-LABEL: __emutls_v.i1:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i1
+
+; X32       .section .rodata,
+; X32-LABEL: __emutls_t.i1:
+; X32-NEXT: .long 15
+
+; X32-NOT:   __emutls_v.i2
+
+; X32       .section .data.rel.local,
+; X32-LABEL: __emutls_v.i3:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i3
+
+; X32       .section .rodata,
+; X32-LABEL: __emutls_t.i3:
+; X32-NEXT: .long 15
+
+; X32       .section .data.rel.local,
+; X32-LABEL: __emutls_v.i4:
+; X32-NEXT: .long 4
+; X32-NEXT: .long 4
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.i4
+
+; X32       .section .rodata,
+; X32-LABEL: __emutls_t.i4:
+; X32-NEXT: .long 15
+
+; X32-NOT:   __emutls_v.i5:
+; X32       .hidden __emutls_v.i5
+; X32-NOT:   __emutls_v.i5:
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.s1:
+; X32-NEXT: .long 2
+; X32-NEXT: .long 2
+; X32-NEXT: .long 0
+; X32-NEXT: .long __emutls_t.s1
+
+; X32 .section .rodata,
+; X32-LABEL: __emutls_t.s1:
+; X32-NEXT: .short 15
+
+; X32 .section .data.rel.local,
+; X32-LABEL: __emutls_v.b1:
+; X32-NEXT: .long 1
+; X32-NEXT: .long 1
+; X32-NEXT: .long 0
+; X32-NEXT: .long 0
+
+; X32-NOT:   __emutls_t.b1
+
+;;;;;;;;;;;;;; 64-bit __emutls_v. and __emutls_t.
+
+; X64       .section .data.rel.local,
+; X64-LABEL: __emutls_v.i1:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i1
+
+; X64       .section .rodata,
+; X64-LABEL: __emutls_t.i1:
+; X64-NEXT: .long 15
+
+; X64-NOT:   __emutls_v.i2
+
+; X64       .section .data.rel.local,
+; X64-LABEL: __emutls_v.i3:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i3
+
+; X64       .section .rodata,
+; X64-LABEL: __emutls_t.i3:
+; X64-NEXT: .long 15
+
+; X64       .section .data.rel.local,
+; X64-LABEL: __emutls_v.i4:
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 4
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.i4
+
+; X64       .section .rodata,
+; X64-LABEL: __emutls_t.i4:
+; X64-NEXT: .long 15
+
+; X64-NOT:   __emutls_v.i5:
+; X64       .hidden __emutls_v.i5
+; X64-NOT:   __emutls_v.i5:
+
+; X64       .section .data.rel.local,
+; X64-LABEL: __emutls_v.s1:
+; X64-NEXT: .quad 2
+; X64-NEXT: .quad 2
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad __emutls_t.s1
+
+; X64       .section .rodata,
+; X64-LABEL: __emutls_t.s1:
+; X64-NEXT: .short 15
+
+; X64       .section .data.rel.local,
+; X64-LABEL: __emutls_v.b1:
+; X64-NEXT: .quad 1
+; X64-NEXT: .quad 1
+; X64-NEXT: .quad 0
+; X64-NEXT: .quad 0
+
+; X64-NOT:  __emutls_t.b1
diff --git a/test/CodeGen/X86/emutls_generic.ll b/test/CodeGen/X86/emutls_generic.ll
new file mode 100644
index 000000000000..b99a195426c2
--- /dev/null
+++ b/test/CodeGen/X86/emutls_generic.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -emulated-tls -mtriple=i686-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=X86_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -march=x86 -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=X86_32 %s
+; RUN: llc < %s -emulated-tls -mtriple=x86_64-linux-android -relocation-model=pic \
+; RUN:     | FileCheck -check-prefix=X86_64 %s
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic \
+; RUN:     | FileCheck %s
+
+; Make sure that TLS symbols are emitted in expected order.
+
+@external_x = external thread_local global i32, align 8
+@external_y = thread_local global i8 7, align 2
+@internal_y = internal thread_local global i64 9, align 16
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i8* @get_external_y() {
+entry:
+  ret i8* @external_y
+}
+
+define i64* @get_internal_y() {
+entry:
+  ret i64* @internal_y
+}
+
+; CHECK-LABEL: get_external_x:
+; CHECK-NOT:   _tls_get_address
+; CHECK:       __emutls_get_address
+; CHECK-LABEL: get_external_y:
+; CHECK:       __emutls_get_address
+; CHECK-NOT:   _tls_get_address
+; CHECK-LABEL: get_internal_y:
+; CHECK-NOT:   __emutls_t.external_x:
+; CHECK-NOT:   __emutls_v.external_x:
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK:       __emutls_t.external_y
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK:       __emutls_t.internal_y
+
+; X86_32-LABEL:  get_external_x:
+; X86_32:        movl __emutls_v.external_x
+; X86_32:        calll __emutls_get_address
+; X86_32-LABEL:  get_external_y:
+; X86_32:        movl __emutls_v.external_y
+; X86_32:        calll __emutls_get_address
+; X86_32-LABEL:  get_internal_y:
+; X86_32:      movl __emutls_v.internal_y
+; X86_32:      calll __emutls_get_address
+; X86_32-NOT:   __emutls_t.external_x
+; X86_32-NOT:   __emutls_v.external_x:
+; X86_32:        .data
+; X86_32:        .align 4
+; X86_32-LABEL:  __emutls_v.external_y:
+; X86_32-NEXT:   .long 1
+; X86_32-NEXT:   .long 2
+; X86_32-NEXT:   .long 0
+; X86_32-NEXT:   .long __emutls_t.external_y
+; X86_32:        .section .rodata,
+; X86_32-LABEL:  __emutls_t.external_y:
+; X86_32-NEXT:   .byte 7
+; X86_32:        .data
+; X86_32:        .align 4
+; X86_32-LABEL:  __emutls_v.internal_y:
+; X86_32-NEXT:   .long 8
+; X86_32-NEXT:   .long 16
+; X86_32-NEXT:   .long 0
+; X86_32-NEXT:   .long __emutls_t.internal_y
+; X86_32-LABEL:  __emutls_t.internal_y:
+; X86_32-NEXT:   .quad 9
+; X86_64-LABEL:  get_external_x:
+; X86_64:      __emutls_v.external_x
+; X86_64:      __emutls_get_address
+; X86_64-LABEL:  get_external_y:
+; X86_64:      __emutls_v.external_y
+; X86_64:      __emutls_get_address
+; X86_64-LABEL:  get_internal_y:
+; X86_64:      __emutls_v.internal_y
+; X86_64:      __emutls_get_address
+; X86_64-NOT:   __emutls_t.external_x
+; X86_64-NOT:   __emutls_v.external_x:
+; X86_64:        .align 8
+; X86_64-LABEL:  __emutls_v.external_y:
+; X86_64-NEXT:   .quad 1
+; X86_64-NEXT:   .quad 2
+; X86_64-NEXT:   .quad 0
+; X86_64-NEXT:   .quad __emutls_t.external_y
+; X86_64-NOT:    __emutls_v.external_x:
+; X86_64:        .section .rodata,
+; X86_64-LABEL:  __emutls_t.external_y:
+; X86_64-NEXT:   .byte 7
+; X86_64:        .data
+; X86_64:        .align 8
+; X86_64-LABEL:  __emutls_v.internal_y:
+; X86_64-NEXT:   .quad 8
+; X86_64-NEXT:   .quad 16
+; X86_64-NEXT:   .quad 0
+; X86_64-NEXT:   .quad __emutls_t.internal_y
+; X86_64:        .section .rodata,
+; X86_64-LABEL:  __emutls_t.internal_y:
+; X86_64-NEXT:   .quad 9
diff --git a/test/CodeGen/X86/exedeps-movq.ll b/test/CodeGen/X86/exedeps-movq.ll
index a5873be6f27f..c1c60981edf5 100644
--- a/test/CodeGen/X86/exedeps-movq.ll
+++ b/test/CodeGen/X86/exedeps-movq.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
 
@@ -66,3 +67,21 @@ define void @store_int(<4 x i32> %x, <2 x float>* %p) {
   ret void
 }
 
+define void @store_h_double(<2 x double> %x, i64* %p) {
+; SSE-LABEL: store_h_double:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm0, %xmm0
+; SSE-NEXT:    movhpd %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: store_h_double:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovhpd %xmm0, (%rdi)
+; AVX-NEXT:    retq
+  %a = fadd <2 x double> %x, %x
+  %b = extractelement <2 x double> %a, i32 1
+  %c = bitcast double %b to i64
+  store i64 %c, i64* %p
+  ret void
+}
diff --git a/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/test/CodeGen/X86/expand-vr64-gr64-copy.mir
new file mode 100644
index 000000000000..8ce1c7eaae70
--- /dev/null
+++ b/test/CodeGen/X86/expand-vr64-gr64-copy.mir
@@ -0,0 +1,36 @@
+# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+3dnow -o /dev/null %s | FileCheck %s
+# This test verifies that the ExpandPostRA pass expands the GR64 <-> VR64
+# copies into appropriate MMX_MOV instructions.
+
+--- |
+
+  define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone {
+  entry:
+    %0 = bitcast <2 x i32> %a to x86_mmx
+    %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0)
+    %2 = bitcast x86_mmx %1 to <2 x i32>
+    ret <2 x i32> %2
+  }
+
+  declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
+
+...
+---
+name:            test_pswapdsi
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: %xmm0
+
+    %xmm0 = PSHUFDri killed %xmm0, -24
+    MOVPQI2QImr %rsp, 1, _, -8, _, killed %xmm0
+    %mm0 = PSWAPDrm %rsp, 1, _, -8, _
+  ; CHECK:      %rax = MMX_MOVD64from64rr %mm0
+  ; CHECK-NEXT: %mm0 = MMX_MOVD64to64rr %rax
+    %rax = COPY %mm0
+    %mm0 = COPY %rax
+    MMX_MOVQ64mr %rsp, 1, _, -16, _, killed %mm0
+    %xmm0 = MOVQI2PQIrm %rsp, 1, _, -16, _
+    %xmm0 = PSHUFDri killed %xmm0, -44
+    RETQ %xmm0
+...
diff --git a/test/CodeGen/X86/extractelement-legalization-cycle.ll b/test/CodeGen/X86/extractelement-legalization-cycle.ll
new file mode 100644
index 000000000000..d75f03ba1680
--- /dev/null
+++ b/test/CodeGen/X86/extractelement-legalization-cycle.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; When the extractelement is converted to a load the store can be re-used.
+; This will, however, introduce a cycle into the selection DAG (the load
+; of the extractelement index is dependent on the store, and so after the
+; conversion it becomes dependent on the new load, which is dependent on
+; the index).  Make sure we skip the store, and conservatively instead
+; use a store to the stack.
+
+define float @foo(i32* %i, <4 x float>* %v) {
+; CHECK-LABEL: foo:
+; CHECK:    movaps %xmm0, -[[OFFSET:[0-9]+]](%rsp)
+; CHECK:    movss -[[OFFSET]](%rsp,{{.*}}), %xmm0 {{.*}}
+; CHECK-NEXT:    retq
+  %1 = load <4 x float>, <4 x float>* %v, align 16
+  %mul = fmul <4 x float> %1, %1
+  store <4 x float> %mul, <4 x float>* %v, align 16
+  %2 = load i32, i32* %i, align 4
+  %vecext = extractelement <4 x float> %mul, i32 %2
+  ret float %vecext
+}
diff --git a/test/CodeGen/X86/extractelement-shuffle.ll b/test/CodeGen/X86/extractelement-shuffle.ll
index d1ba9a845800..1b04c41d5c6f 100644
--- a/test/CodeGen/X86/extractelement-shuffle.ll
+++ b/test/CodeGen/X86/extractelement-shuffle.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 ; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
 ; following program.  The bug is DAGCombine assumes that the bit convert
diff --git a/test/CodeGen/X86/fadd-combines.ll b/test/CodeGen/X86/fadd-combines.ll
new file mode 100644
index 000000000000..2df0e06dc252
--- /dev/null
+++ b/test/CodeGen/X86/fadd-combines.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+
+define float @fadd_zero_f32(float %x) #0 {
+; CHECK-LABEL: fadd_zero_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
+  %y = fadd float %x, 0.0
+  ret float %y
+}
+
+define <4 x float> @fadd_zero_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_zero_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, zeroinitializer
+  ret <4 x float> %y
+}
+
+; CHECK: float 3
+define float @fadd_2const_f32(float %x) #0 {
+; CHECK-LABEL: fadd_2const_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    addss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, 1.0
+  %z = fadd float %y, 2.0
+  ret float %z
+}
+
+; CHECK: float 5
+; CHECK: float 5
+; CHECK: float 5
+; CHECK: float 5
+define <4 x float> @fadd_2const_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_2const_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    addps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %z = fadd <4 x float> %y, <float 4.0, float 3.0, float 2.0, float 1.0>
+  ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_x_fmul_x_c_f32(float %x) #0 {
+; CHECK-LABEL: fadd_x_fmul_x_c_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fmul float %x, 2.0
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+; CHECK: float 2
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+define <4 x float> @fadd_x_fmul_x_c_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_x_fmul_x_c_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %z = fadd <4 x float> %x, %y
+  ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_fmul_x_c_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_x_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fmul float %x, 2.0
+  %z = fadd float %y, %x
+  ret float %z
+}
+
+; CHECK: float 2
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+define <4 x float> @fadd_fmul_x_c_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_x_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %z = fadd <4 x float> %y, %x
+  ret <4 x float> %z
+}
+
+; CHECK: float 4
+define float @fadd_fadd_x_x_fmul_x_c_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, %x
+  %z = fmul float %x, 2.0
+  %w = fadd float %y, %z
+  ret float %w
+}
+
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+; CHECK: float 6
+define <4 x float> @fadd_fadd_x_x_fmul_x_c_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fmul_x_c_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, %x
+  %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %w = fadd <4 x float> %y, %z
+  ret <4 x float> %w
+}
+
+; CHECK: float 4
+define float @fadd_fmul_x_c_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, %x
+  %z = fmul float %x, 2.0
+  %w = fadd float %z, %y
+  ret float %w
+}
+
+; CHECK: float 3
+; CHECK: float 4
+; CHECK: float 5
+; CHECK: float 6
+define <4 x float> @fadd_fmul_x_c_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fmul_x_c_fadd_x_x_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, %x
+  %z = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %w = fadd <4 x float> %z, %y
+  ret <4 x float> %w
+}
+
+; CHECK: float 3
+define float @fadd_x_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_x_fadd_x_x_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, %x
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+define <4 x float> @fadd_x_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_x_fadd_x_x_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, %x
+  %z = fadd <4 x float> %x, %y
+  ret <4 x float> %z
+}
+
+; CHECK: float 3
+define float @fadd_fadd_x_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_x_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, %x
+  %z = fadd float %y, %x
+  ret float %z
+}
+
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+; CHECK: float 3
+define <4 x float> @fadd_fadd_x_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_x_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, %x
+  %z = fadd <4 x float> %y, %x
+  ret <4 x float> %z
+}
+
+; CHECK: float 4
+define float @fadd_fadd_x_x_fadd_x_x_f32(float %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulss {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd float %x, %x
+  %z = fadd float %y, %y
+  ret float %z
+}
+
+; CHECK: float 4
+; CHECK: float 4
+; CHECK: float 4
+; CHECK: float 4
+define <4 x float> @fadd_fadd_x_x_fadd_x_x_4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: fadd_fadd_x_x_fadd_x_x_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %y = fadd <4 x float> %x, %x
+  %z = fadd <4 x float> %y, %y
+  ret <4 x float> %z
+}
+
+attributes #0 = { "less-precise-fpmad"="true" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fast-isel-bitcasts-avx.ll b/test/CodeGen/X86/fast-isel-bitcasts-avx.ll
new file mode 100644
index 000000000000..03cefbc86822
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bitcasts-avx.ll
@@ -0,0 +1,244 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+;
+; Bitcasts between 256-bit vector types are no-ops since no instruction is
+; needed for the conversion.
+
+define <4 x i64> @v8i32_to_v4i64(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i32> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @v16i16_to_v4i64(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i16> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @v32i8_to_v4i64(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <32 x i8> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @v4f64_to_v4i64(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x double> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @v8f32_to_v4i64(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v4i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x float> %a to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <8 x i32> @v4i64_to_v8i32(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i64> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @v16i16_to_v8i32(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i16> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @v32i8_to_v8i32(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <32 x i8> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @v4f64_to_v8i32(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x double> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @v8f32_to_v8i32(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v8i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x float> %a to <8 x i32>
+  ret <8 x i32> %1
+}
+
+define <16 x i16> @v4i64_to_v16i16(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i64> %a to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @v8i32_to_v16i16(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i32> %a to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @v32i8_to_v16i16(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <32 x i8> %a to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @v4f64_to_v16i16(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x double> %a to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @v8f32_to_v16i16(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v16i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x float> %a to <16 x i16>
+  ret <16 x i16> %1
+}
+
+define <32 x i8> @v16i16_to_v32i8(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i16> %a to <32 x i8>
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @v4i64_to_v32i8(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i64> %a to <32 x i8>
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @v8i32_to_v32i8(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i32> %a to <32 x i8>
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @v4f64_to_v32i8(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x double> %a to <32 x i8>
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @v8f32_to_v32i8(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v32i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x float> %a to <32 x i8>
+  ret <32 x i8> %1
+}
+
+define <8 x float> @v32i8_to_v8f32(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <32 x i8> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x float> @v16i16_to_v8f32(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i16> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x float> @v4i64_to_v8f32(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i64> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x float> @v8i32_to_v8f32(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i32> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <8 x float> @v4f64_to_v8f32(<4 x double> %a) {
+;CHECK-LABEL: v4f64_to_v8f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x double> %a to <8 x float>
+  ret <8 x float> %1
+}
+
+define <4 x double> @v8f32_to_v4f64(<8 x float> %a) {
+;CHECK-LABEL: v8f32_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x float> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <4 x double> @v32i8_to_v4f64(<32 x i8> %a) {
+;CHECK-LABEL: v32i8_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <32 x i8> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <4 x double> @v16i16_to_v4f64(<16 x i16> %a) {
+;CHECK-LABEL: v16i16_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i16> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <4 x double> @v4i64_to_v4f64(<4 x i64> %a) {
+;CHECK-LABEL: v4i64_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i64> %a to <4 x double>
+  ret <4 x double> %1
+}
+
+define <4 x double> @v8i32_to_v4f64(<8 x i32> %a) {
+;CHECK-LABEL: v8i32_to_v4f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i32> %a to <4 x double>
+  ret <4 x double> %1
+}
diff --git a/test/CodeGen/X86/fast-isel-bitcasts.ll b/test/CodeGen/X86/fast-isel-bitcasts.ll
new file mode 100644
index 000000000000..892b517fe873
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-bitcasts.ll
@@ -0,0 +1,245 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -fast-isel-abort=1 -asm-verbose=0 | FileCheck %s
+;
+; Bitcasts between 128-bit vector types are no-ops since no instruction is
+; needed for the conversion.
+
+define <2 x i64> @v4i32_to_v2i64(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i32> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @v8i16_to_v2i64(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i16> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @v16i8_to_v2i64(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i8> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @v2f64_to_v2i64(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x double> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @v4f32_to_v2i64(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v2i64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x float> %a to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <4 x i32> @v2i64_to_v4i32(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x i64> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @v8i16_to_v4i32(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i16> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @v16i8_to_v4i32(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i8> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @v2f64_to_v4i32(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x double> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @v4f32_to_v4i32(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v4i32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x float> %a to <4 x i32>
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @v2i64_to_v8i16(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x i64> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @v4i32_to_v8i16(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i32> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @v16i8_to_v8i16(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i8> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @v2f64_to_v8i16(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x double> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @v4f32_to_v8i16(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v8i16:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x float> %a to <8 x i16>
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @v8i16_to_v16i8(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i16> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @v2i64_to_v16i8(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x i64> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @v4i32_to_v16i8(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i32> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @v2f64_to_v16i8(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x double> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @v4f32_to_v16i8(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v16i8:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x float> %a to <16 x i8>
+  ret <16 x i8> %1
+}
+
+define <4 x float> @v16i8_to_v4f32(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i8> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x float> @v8i16_to_v4f32(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i16> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x float> @v2i64_to_v4f32(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x i64> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x float> @v4i32_to_v4f32(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i32> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <4 x float> @v2f64_to_v4f32(<2 x double> %a) {
+;CHECK-LABEL: v2f64_to_v4f32:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x double> %a to <4 x float>
+  ret <4 x float> %1
+}
+
+define <2 x double> @v4f32_to_v2f64(<4 x float> %a) {
+;CHECK-LABEL: v4f32_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x float> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <2 x double> @v16i8_to_v2f64(<16 x i8> %a) {
+;CHECK-LABEL: v16i8_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <16 x i8> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <2 x double> @v8i16_to_v2f64(<8 x i16> %a) {
+;CHECK-LABEL: v8i16_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <8 x i16> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <2 x double> @v2i64_to_v2f64(<2 x i64> %a) {
+;CHECK-LABEL: v2i64_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <2 x i64> %a to <2 x double>
+  ret <2 x double> %1
+}
+
+define <2 x double> @v4i32_to_v2f64(<4 x i32> %a) {
+;CHECK-LABEL: v4i32_to_v2f64:
+;CHECK-NEXT: .cfi_startproc
+;CHECK-NEXT: ret
+  %1 = bitcast <4 x i32> %a to <2 x double>
+  ret <2 x double> %1
+}
diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll
index d7b64ed3a5b8..e262448468eb 100644
--- a/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -1,5 +1,18 @@
-; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
-; RUN: llc -O0 -mtriple=x86_64-windows-itanium -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-windows-itanium -asm-verbose=false -verify-machineinstrs < %s | FileCheck %s
+
+; Fast-isel mustn't add a block to the MBB successor/predecessor list twice.
+; The machine verifier will catch and complain about this case.
+; CHECK-LABEL: baz
+; CHECK: retq
+define void @baz() {
+entry:
+  br i1 undef, label %exit, label %exit
+
+exit:
+  ret void
+}
+
 ; rdar://8337108
 
 ; Fast-isel shouldn't try to look through the compare because it's in a
diff --git a/test/CodeGen/X86/fast-isel-deadcode.ll b/test/CodeGen/X86/fast-isel-deadcode.ll
new file mode 100644
index 000000000000..0a53d60f8352
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-deadcode.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s | FileCheck %s
+;
+; Generated with clang -O2 -S -emit-llvm
+;
+; /* Test 1 */
+; extern "C" bool bar (long double);
+; __attribute__((optnone))
+; extern "C" bool foo(long double x, long double y)
+; {
+;   return (x == y) || (bar(x));
+; }
+;
+; /* Test 2 */
+; struct FVector {
+;   float x, y, z;
+;   inline __attribute__((always_inline)) FVector(float f): x(f), y(f), z(f) {}
+;   inline __attribute__((always_inline)) FVector func(float p) const
+;   {
+;     if( x == 1.f ) {
+;       return *this;
+;     } else if( x < p ) {
+;       return FVector(0.f);
+;     }
+;     return FVector(x);
+;   }
+; };
+; 
+; __attribute__((optnone))
+; int main()
+; {
+;   FVector v(1.0);
+;   v = v.func(1.e-8);
+;   return 0;
+; }
+;
+; ModuleID = 'test.cpp'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.FVector = type { float, float, float }
+
+define zeroext i1 @foo(x86_fp80 %x, x86_fp80 %y) noinline optnone {
+entry:
+  %x.addr = alloca x86_fp80, align 16
+  %y.addr = alloca x86_fp80, align 16
+  store x86_fp80 %x, x86_fp80* %x.addr, align 16
+  store x86_fp80 %y, x86_fp80* %y.addr, align 16
+  %0 = load x86_fp80, x86_fp80* %x.addr, align 16
+  %1 = load x86_fp80, x86_fp80* %y.addr, align 16
+  %cmp = fcmp oeq x86_fp80 %0, %1
+
+; Test 1
+; Make sure that there is no dead code generated
+; from Fast-ISel Phi-node handling. We should only
+; see one movb of the constant 1, feeding the PHI
+; node in lor.end. This covers the code path with
+; handlePHINodesInSuccessorBlocks() returning true.
+;
+; CHECK-LABEL: foo:
+; CHECK: movb $1,
+; CHECK-NOT: movb $1,
+; CHECK-LABEL: .LBB0_1:
+
+  br i1 %cmp, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %entry
+  %2 = load x86_fp80, x86_fp80* %x.addr, align 16
+  %call = call zeroext i1 @bar(x86_fp80 %2)
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %3 = phi i1 [ true, %entry ], [ %call, %lor.rhs ]
+  ret i1 %3
+}
+
+declare zeroext i1 @bar(x86_fp80)
+
+define i32 @main() noinline optnone {
+entry:
+  %retval = alloca i32, align 4
+  %v = alloca %struct.FVector, align 4
+  %ref.tmp = alloca %struct.FVector, align 4
+  %tmp = alloca { <2 x float>, float }, align 8
+  store i32 0, i32* %retval, align 4
+  %0 = bitcast %struct.FVector* %v to i8*
+  call void @llvm.lifetime.start(i64 12, i8* %0) nounwind
+  %x.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
+  store float 1.000000e+00, float* %x.i, align 4
+  %y.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 1
+  store float 1.000000e+00, float* %y.i, align 4
+  %z.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
+  store float 1.000000e+00, float* %z.i, align 4
+  %x.i.1 = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 0
+  %1 = load float, float* %x.i.1, align 4
+  %cmp.i = fcmp oeq float %1, 1.000000e+00
+  br i1 %cmp.i, label %if.then.i, label %if.else.i
+
+if.then.i:                                        ; preds = %entry
+  %retval.sroa.0.0..sroa_cast.i = bitcast %struct.FVector* %v to <2 x float>*
+  %retval.sroa.0.0.copyload.i = load <2 x float>, <2 x float>* %retval.sroa.0.0..sroa_cast.i, align 4
+  %retval.sroa.6.0..sroa_idx16.i = getelementptr inbounds %struct.FVector, %struct.FVector* %v, i64 0, i32 2
+  %retval.sroa.6.0.copyload.i = load float, float* %retval.sroa.6.0..sroa_idx16.i, align 4
+  br label %func.exit
+
+if.else.i:                                        ; preds = %entry
+
+; Test 2
+; In order to feed the first PHI node in func.exit handlePHINodesInSuccessorBlocks()
+; generates a local value instruction, but it cannot handle the second PHI node and
+; returns false to let SelectionDAGISel handle both cases. Make sure the generated 
+; local value instruction is removed.
+; CHECK-LABEL: main:
+; CHECK-LABEL: .LBB1_2:
+; CHECK:       xorps [[REG:%xmm[0-7]]], [[REG]]
+; CHECK-NOT:   xorps [[REG]], [[REG]]
+; CHECK-LABEL: .LBB1_3:
+
+  %cmp3.i = fcmp olt float %1, 0x3E45798EE0000000
+  br i1 %cmp3.i, label %func.exit, label %if.end.5.i
+
+if.end.5.i:                                       ; preds = %if.else.i
+  %retval.sroa.0.0.vec.insert13.i = insertelement <2 x float> undef, float %1, i32 0
+  %retval.sroa.0.4.vec.insert15.i = insertelement <2 x float> %retval.sroa.0.0.vec.insert13.i, float %1, i32 1
+  br label %func.exit
+
+func.exit:                         ; preds = %if.then.i, %if.else.i, %if.end.5.i
+  %retval.sroa.6.0.i = phi float [ %retval.sroa.6.0.copyload.i, %if.then.i ], [ %1, %if.end.5.i ], [ 0.000000e+00, %if.else.i ]
+  %retval.sroa.0.0.i = phi <2 x float> [ %retval.sroa.0.0.copyload.i, %if.then.i ], [ %retval.sroa.0.4.vec.insert15.i, %if.end.5.i ], [ zeroinitializer, %if.else.i ]
+  %.fca.0.insert.i = insertvalue { <2 x float>, float } undef, <2 x float> %retval.sroa.0.0.i, 0
+  %.fca.1.insert.i = insertvalue { <2 x float>, float } %.fca.0.insert.i, float %retval.sroa.6.0.i, 1
+  store { <2 x float>, float } %.fca.1.insert.i, { <2 x float>, float }* %tmp, align 8
+  %2 = bitcast { <2 x float>, float }* %tmp to i8*
+  %3 = bitcast %struct.FVector* %ref.tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 12, i32 4, i1 false)
+  %4 = bitcast %struct.FVector* %v to i8*
+  %5 = bitcast %struct.FVector* %ref.tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 12, i32 4, i1 false)
+  %6 = bitcast %struct.FVector* %v to i8*
+  call void @llvm.lifetime.end(i64 12, i8* %6) nounwind
+  ret i32 0
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) argmemonly nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) argmemonly nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) argmemonly nounwind
diff --git a/test/CodeGen/X86/fast-isel-emutls.ll b/test/CodeGen/X86/fast-isel-emutls.ll
new file mode 100644
index 000000000000..cb8012c0fa39
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-emutls.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -emulated-tls -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | FileCheck %s
+; PR3654
+
+@v = thread_local global i32 0
+define i32 @f() nounwind {
+entry:
+          %t = load i32, i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}
+
+; CHECK-LABEL: f:
+; CHECK:      movl __emutls_v.v@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll __emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
+
+@alias = internal alias i32, i32* @v
+define i32 @f_alias() nounwind {
+entry:
+          %t = load i32, i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}
+
+; CHECK-LABEL: f_alias:
+; CHECK:      movl __emutls_v.v@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll __emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
+
+; Use my_emutls_get_address like __emutls_get_address.
+@my_emutls_v_xyz = external global i8*, align 4
+declare i8* @my_emutls_get_address(i8*)
+
+define i32 @my_get_xyz() {
+entry:
+  %call = call i8* @my_emutls_get_address(i8* bitcast (i8** @my_emutls_v_xyz to i8*))
+  %0 = bitcast i8* %call to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: my_get_xyz:
+; CHECK:      movl my_emutls_v_xyz@GOT(%ebx), %eax
+; CHECK-NEXT: movl %eax, (%esp)
+; CHECK-NEXT: calll my_emutls_get_address@PLT
+; CHECK-NEXT: movl (%eax), %eax
diff --git a/test/CodeGen/X86/fast-isel-nontemporal.ll b/test/CodeGen/X86/fast-isel-nontemporal.ll
new file mode 100644
index 000000000000..6a174dbf5a8a
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-nontemporal.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4a -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE4A
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx -fast-isel -O0 < %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+
+define void @test_nti32(i32* nocapture %ptr, i32 %X) {
+; ALL-LABEL: test_nti32:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    movntil %esi, (%rdi)
+; ALL-NEXT:    retq
+entry:
+  store i32 %X, i32* %ptr, align 4, !nontemporal !1
+  ret void
+}
+
+define void @test_nti64(i64* nocapture %ptr, i64 %X) {
+; ALL-LABEL: test_nti64:
+; ALL:       # BB#0: # %entry
+; ALL-NEXT:    movntiq %rsi, (%rdi)
+; ALL-NEXT:    retq
+entry:
+  store i64 %X, i64* %ptr, align 8, !nontemporal !1
+  ret void
+}
+
+define void @test_ntfloat(float* nocapture %ptr, float %X) {
+; SSE2-LABEL: test_ntfloat:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movss %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_ntfloat:
+; SSE4A:       # BB#0: # %entry
+; SSE4A-NEXT:    movntss %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; AVX-LABEL: test_ntfloat:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovss %xmm0, (%rdi)
+; AVX-NEXT:    retq
+entry:
+  store float %X, float* %ptr, align 4, !nontemporal !1
+  ret void
+}
+
+define void @test_ntdouble(double* nocapture %ptr, double %X) {
+; SSE2-LABEL: test_ntdouble:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsd %xmm0, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSE4A-LABEL: test_ntdouble:
+; SSE4A:       # BB#0: # %entry
+; SSE4A-NEXT:    movntsd %xmm0, (%rdi)
+; SSE4A-NEXT:    retq
+;
+; AVX-LABEL: test_ntdouble:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovsd %xmm0, (%rdi)
+; AVX-NEXT:    retq
+entry:
+  store double %X, double* %ptr, align 8, !nontemporal !1
+  ret void
+}
+
+define void @test_nt4xfloat(<4 x float>* nocapture %ptr, <4 x float> %X) {
+; SSE-LABEL: test_nt4xfloat:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movntps %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_nt4xfloat:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovntps %xmm0, (%rdi)
+; AVX-NEXT:    retq
+entry:
+  store <4 x float> %X, <4 x float>* %ptr, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_nt2xdouble(<2 x double>* nocapture %ptr, <2 x double> %X) {
+; SSE-LABEL: test_nt2xdouble:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movntpd %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_nt2xdouble:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovntpd %xmm0, (%rdi)
+; AVX-NEXT:    retq
+entry:
+  store <2 x double> %X, <2 x double>* %ptr, align 16, !nontemporal !1
+  ret void
+}
+
+define void @test_nt2xi64(<2 x i64>* nocapture %ptr, <2 x i64> %X) {
+; SSE-LABEL: test_nt2xi64:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movntdq %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_nt2xi64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vmovntdq %xmm0, (%rdi)
+; AVX-NEXT:    retq
+entry:
+  store <2 x i64> %X, <2 x i64>* %ptr, align 16, !nontemporal !1
+  ret void
+}
+
+!1 = !{i32 1}
diff --git a/test/CodeGen/X86/fast-isel-stackcheck.ll b/test/CodeGen/X86/fast-isel-stackcheck.ll
new file mode 100644
index 000000000000..3b7318fa77d9
--- /dev/null
+++ b/test/CodeGen/X86/fast-isel-stackcheck.ll
@@ -0,0 +1,44 @@
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; selectiondag stack protector uses a GuardReg which the fast-isel stack
+; protection code did not but the state was not reset properly.
+; The optnone attribute on @bar forces fast-isel.
+
+; CHECK-LABEL: foo:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK-NOT: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+define void @foo() #0 {
+entry:
+  %_tags = alloca [3 x i32], align 4
+  ret void
+}
+
+; CHECK-LABEL: bar:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
+define void @bar() #1 {
+entry:
+  %vt = alloca [2 x double], align 16
+  br i1 undef, label %cleanup.4091, label %for.cond.3850
+
+unreachable:
+  unreachable
+
+for.cond.3850:
+  br i1 undef, label %land.rhs.3853, label %land.end.3857
+
+land.rhs.3853:
+  br label %land.end.3857
+
+land.end.3857:
+  %0 = phi i1 [ false, %for.cond.3850 ], [ false, %land.rhs.3853 ]
+  br i1 %0, label %unreachable, label %unreachable
+
+cleanup.4091:
+  ret void
+}
+
+attributes #0 = { ssp }
+attributes #1 = { noinline optnone ssp }
diff --git a/test/CodeGen/X86/fast-isel-tls.ll b/test/CodeGen/X86/fast-isel-tls.ll
index 18bb9c13ff01..0b7a5d9759d2 100644
--- a/test/CodeGen/X86/fast-isel-tls.ll
+++ b/test/CodeGen/X86/fast-isel-tls.ll
@@ -13,7 +13,7 @@ entry:
 ; CHECK: leal	v@TLSGD
 ; CHECK: __tls_get_addr
 
-@alias = internal alias i32* @v
+@alias = internal alias i32, i32* @v
 define i32 @f_alias() nounwind {
 entry:
           %t = load i32, i32* @v
diff --git a/test/CodeGen/X86/fdiv-combine.ll b/test/CodeGen/X86/fdiv-combine.ll
index b65e9d01ab8b..d9d9ac401fb5 100644
--- a/test/CodeGen/X86/fdiv-combine.ll
+++ b/test/CodeGen/X86/fdiv-combine.ll
@@ -1,9 +1,11 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
 
-; Anything more than one division using a single divisor operand
+; More than one 'arcp' division using a single divisor operand
 ; should be converted into a reciprocal and multiplication.
 
-define float @div1_arcp(float %x, float %y, float %z) #0 {
+; Don't do anything for just one division.
+
+define float @div1_arcp(float %x, float %y, float %z) {
 ; CHECK-LABEL: div1_arcp:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    divss %xmm1, %xmm0
@@ -12,13 +14,15 @@ define float @div1_arcp(float %x, float %y, float %z) #0 {
   ret float %div1
 }
 
-define float @div2_arcp(float %x, float %y, float %z) #0 {
-; CHECK-LABEL: div2_arcp:
+; All math instructions are 'arcp', so optimize.
+
+define float @div2_arcp_all(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_all:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    divss %xmm2, %xmm3
-; CHECK-NEXT:    mulss %xmm1, %xmm0
 ; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
 ; CHECK-NEXT:    mulss %xmm3, %xmm0
 ; CHECK-NEXT:    retq
   %div1 = fdiv arcp float %x, %z
@@ -27,10 +31,57 @@ define float @div2_arcp(float %x, float %y, float %z) #0 {
   ret float %div2
 }
 
+; The first division is not 'arcp', so do not optimize.
+
+define float @div2_arcp_partial1(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divss %xmm2, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
+; CHECK-NEXT:    divss %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv float %x, %z
+  %mul = fmul arcp float %div1, %y
+  %div2 = fdiv arcp float %mul, %z
+  ret float %div2
+}
+
+; The second division is not 'arcp', so do not optimize.
+
+define float @div2_arcp_partial2(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divss %xmm2, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
+; CHECK-NEXT:    divss %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %z
+  %mul = fmul arcp float %div1, %y
+  %div2 = fdiv float %mul, %z
+  ret float %div2
+}
+
+; The multiply is not 'arcp', but that does not prevent optimizing the divisions.
+
+define float @div2_arcp_partial3(float %x, float %y, float %z) {
+; CHECK-LABEL: div2_arcp_partial3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    divss %xmm2, %xmm3
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    mulss %xmm1, %xmm0
+; CHECK-NEXT:    mulss %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %div1 = fdiv arcp float %x, %z
+  %mul = fmul float %div1, %y
+  %div2 = fdiv arcp float %mul, %z
+  ret float %div2
+}
+
 ; If the reciprocal is already calculated, we should not
 ; generate an extra multiplication by 1.0. 
 
-define double @div3_arcp(double %x, double %y, double %z) #0 {
+define double @div3_arcp(double %x, double %y, double %z) {
 ; CHECK-LABEL: div3_arcp:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movsd{{.*#+}} xmm2 = mem[0],zero
@@ -44,7 +95,7 @@ define double @div3_arcp(double %x, double %y, double %z) #0 {
   ret double %ret
 }
 
-define void @PR24141() #0 {
+define void @PR24141() {
 ; CHECK-LABEL: PR24141:
 ; CHECK:	callq
 ; CHECK-NEXT:	divsd
@@ -57,11 +108,9 @@ while.body:
   %call = call { double, double } @g(double %x.0)
   %xv0 = extractvalue { double, double } %call, 0
   %xv1 = extractvalue { double, double } %call, 1
-  %div = fdiv double %xv0, %xv1
+  %div = fdiv arcp double %xv0, %xv1
   br label %while.body
 }
 
 declare { double, double } @g(double)
 
-; FIXME: If the backend understands 'arcp', then this attribute is unnecessary.
-attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fdiv.ll b/test/CodeGen/X86/fdiv.ll
index 0749682e2f68..226e6d269c3b 100644
--- a/test/CodeGen/X86/fdiv.ll
+++ b/test/CodeGen/X86/fdiv.ll
@@ -1,41 +1,69 @@
-; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -enable-unsafe-fp-math | FileCheck %s
 
 define double @exact(double %x) {
 ; Exact division by a constant converted to multiplication.
-; CHECK: @exact
-; CHECK: mulsd
+; CHECK-LABEL: exact:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %div = fdiv double %x, 2.0
   ret double %div
 }
 
 define double @inexact(double %x) {
 ; Inexact division by a constant converted to multiplication.
-; CHECK: @inexact
-; CHECK: mulsd
-  %div = fdiv double %x, 0x41DFFFFFFFC00000 
+; CHECK-LABEL: inexact:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    mulsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %div = fdiv double %x, 0x41DFFFFFFFC00000
   ret double %div
 }
 
 define double @funky(double %x) {
 ; No conversion to multiplication if too funky.
-; CHECK: @funky
-; CHECK: divsd
+; CHECK-LABEL: funky:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xorpd %xmm1, %xmm1
+; CHECK-NEXT:    divsd %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %div = fdiv double %x, 0.0
   ret double %div
 }
 
 define double @denormal1(double %x) {
 ; Don't generate multiplication by a denormal.
-; CHECK: @denormal1
-; CHECK: divsd
+; CHECK-LABEL: denormal1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %div = fdiv double %x, 0x7FD0000000000001
   ret double %div
 }
 
 define double @denormal2(double %x) {
 ; Don't generate multiplication by a denormal.
-; CHECK: @denormal
-; CHECK: divsd
+; CHECK-LABEL: denormal2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divsd {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %div = fdiv double %x, 0x7FEFFFFFFFFFFFFF
   ret double %div
 }
+
+; Deleting the negates does not require unsafe-fp-math.
+
+define float @double_negative(float %x, float %y) #0 {
+; CHECK-LABEL: double_negative:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    divss %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %neg1 = fsub float -0.0, %x
+  %neg2 = fsub float -0.0, %y
+  %div = fdiv float %neg1, %neg2
+  ret float %div
+}
+
+attributes #0 = { "unsafe-fp-math"="false" }
+
diff --git a/test/CodeGen/X86/fixup-lea.ll b/test/CodeGen/X86/fixup-lea.ll
new file mode 100644
index 000000000000..1ddc099ffd62
--- /dev/null
+++ b/test/CodeGen/X86/fixup-lea.ll
@@ -0,0 +1,34 @@
+;RUN: llc < %s -march=x86 | FileCheck %s
+
+define void @foo(i32 inreg %dns) minsize {
+entry:
+; CHECK-LABEL: foo
+; CHECK: dec
+  br label %for.body
+
+for.body:
+  %i.05 = phi i16 [ %dec, %for.body ], [ 0, %entry ]
+  %dec = add i16 %i.05, -1
+  %conv = zext i16 %dec to i32
+  %cmp = icmp slt i32 %conv, %dns
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+define void @bar(i32 inreg %dns) minsize {
+entry:
+; CHECK-LABEL: bar
+; CHECK: inc
+  br label %for.body
+
+for.body:
+  %i.05 = phi i16 [ %inc, %for.body ], [ 0, %entry ]
+  %inc = add i16 %i.05, 1
+  %conv = zext i16 %inc to i32
+  %cmp = icmp slt i32 %conv, %dns
+  br i1 %cmp, label %for.body, label %for.end
+for.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/float-asmprint.ll b/test/CodeGen/X86/float-asmprint.ll
index 5de9700fc064..0108430ee93e 100644
--- a/test/CodeGen/X86/float-asmprint.ll
+++ b/test/CodeGen/X86/float-asmprint.ll
@@ -9,6 +9,8 @@
 @var64 = global double -0.0, align 8
 @var32 = global float -0.0, align 4
 @var16 = global half -0.0, align 2
+@var4f32 = global <4 x float> <float -0.0, float 0.0, float 1.0, float 2.0>
+@var4f16 = global <4 x half> <half -0.0, half 0.0, half 1.0, half 2.0>
 
 ; CHECK: var128:
 ; CHECK-NEXT: .quad 0                         # fp128 -0
@@ -39,3 +41,16 @@
 ; CHECK-NEXT: .short 32768                    # half -0
 ; CHECK-NEXT: .size
 
+; CHECK: var4f32:
+; CHECK-NEXT: .long 2147483648               # float -0
+; CHECK-NEXT: .long 0                        # float 0
+; CHECK-NEXT: .long 1065353216               # float 1
+; CHECK-NEXT: .long 1073741824               # float 2
+; CHECK-NEXT: .size
+
+; CHECK: var4f16:
+; CHECK-NEXT: .short 32768                    # half -0
+; CHECK-NEXT: .short 0                        # half 0
+; CHECK-NEXT: .short 15360                    # half 1
+; CHECK-NEXT: .short 16384                    # half 2
+; CHECK-NEXT: .size
diff --git a/test/CodeGen/X86/floor-soft-float.ll b/test/CodeGen/X86/floor-soft-float.ll
index 7bb738513f54..3b28ecc6379d 100644
--- a/test/CodeGen/X86/floor-soft-float.ll
+++ b/test/CodeGen/X86/floor-soft-float.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 declare float @llvm.floor.f32(float)
 
 ; CHECK-SOFT-FLOAT: callq floorf
-; CHECK-HARD-FLOAT: roundss $1, %xmm0, %xmm0
+; CHECK-HARD-FLOAT: roundss $9, %xmm0, %xmm0
 define float @myfloor(float %a) {
   %val = tail call float @llvm.floor.f32(float %a)
   ret float %val
diff --git a/test/CodeGen/X86/fma-commute-x86.ll b/test/CodeGen/X86/fma-commute-x86.ll
new file mode 100644
index 000000000000..162a97ac025c
--- /dev/null
+++ b/test/CodeGen/X86/fma-commute-x86.ll
@@ -0,0 +1,761 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s
+; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s
+
+attributes #0 = { nounwind }
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmadd213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfmadd213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfmadd132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfmadd231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %ymm0
+; CHECK-NEXT: vfmadd213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmadd213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmadd231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfmadd213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_baa_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfmadd132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_aba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfmadd231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmadd_bba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %ymm0
+; CHECK-NEXT: vfmadd213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmadd213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfnmadd132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfnmadd231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %ymm0
+; CHECK-NEXT: vfnmadd213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmadd213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmadd231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfnmadd213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_baa_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfnmadd132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_aba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfnmadd231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmadd_bba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %ymm0
+; CHECK-NEXT: vfnmadd213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmsub213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfmsub213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfmsub132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfmsub231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %ymm0
+; CHECK-NEXT: vfmsub213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfmsub213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfmsub231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfmsub213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_baa_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfmsub132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_aba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfmsub231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fmsub_bba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %ymm0
+; CHECK-NEXT: vfmsub213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmsub213ss %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub231ps (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213ps (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  ret <4 x float> %res
+}
+
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfnmsub132ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rcx), %ymm0
+; CHECK-NEXT: vfnmsub231ps (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_ps_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovaps	(%rdx), %ymm0
+; CHECK-NEXT: vfnmsub213ps (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
+  ret <8 x float> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}}
+; CHECK-NEXT: vfnmsub213sd %xmm1, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub132pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %xmm0
+; CHECK-NEXT: vfnmsub231pd (%rdx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %xmm0
+; CHECK-NEXT: vfnmsub213pd (%rcx), %xmm0, %xmm0
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  ret <2 x double> %res
+}
+
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_baa_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfnmsub132pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_aba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rcx), %ymm0
+; CHECK-NEXT: vfnmsub231pd (%rdx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
+define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
+; CHECK-LABEL: test_x86_fnmsub_bba_pd_y:
+; CHECK:       # BB#0:
+; CHECK-NEXT: vmovapd	(%rdx), %ymm0
+; CHECK-NEXT: vfnmsub213pd (%rcx), %ymm0, %ymm0
+; CHECK-NEXT: retq
+  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
+  ret <4 x double> %res
+}
+
diff --git a/test/CodeGen/X86/fma-do-not-commute.ll b/test/CodeGen/X86/fma-do-not-commute.ll
index 1f6a19cfff83..89be0795d206 100644
--- a/test/CodeGen/X86/fma-do-not-commute.ll
+++ b/test/CodeGen/X86/fma-do-not-commute.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx"
 
 ; CHECK-LABEL: test1:
 ; %arg lives in xmm0 and it shouldn't be redefined until it is used in the FMA.
-; CHECK-NOT {{.*}}, %xmm0
+; CHECK-NOT: {{.*}}, %xmm0
 ; %addr lives in rdi.
 ; %addr2 lives in rsi.
 ; CHECK: vmovss (%rsi), [[ADDR2:%xmm[0-9]+]]
diff --git a/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
index f7d0cdf3c65a..8d0318bb93e0 100644
--- a/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
+++ b/test/CodeGen/X86/fma-intrinsics-phi-213-to-231.ll
@@ -1,8 +1,337 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
 
-; CHECK-LABEL: fmaddsubpd_loop:
-; CHECK:   vfmaddsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmaddsubpd_loop_128:
+; CHECK:   vfmaddsub231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddpd_loop_128:
+; CHECK:   vfmsubadd231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddpd_loop_128:
+; CHECK:   vfmadd231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubpd_loop_128:
+; CHECK:   vfmsub231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmaddpd_loop_128:
+; CHECK:   vfnmadd231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubpd_loop_128:
+; CHECK:   vfnmsub231pd %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <2 x double> %c.addr.0
+}
+
+declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+
+; CHECK-LABEL: fmaddsubps_loop_128:
+; CHECK:   vfmaddsub231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmaddsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubaddps_loop_128:
+; CHECK:   vfmsubadd231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmsubaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmaddps_loop_128:
+; CHECK:   vfmadd231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fmsubps_loop_128:
+; CHECK:   vfmsub231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmaddps_loop_128:
+; CHECK:   vfnmadd231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fnmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubps_loop_128:
+; CHECK:   vfnmsub231ps %xmm1, %xmm0, %xmm2
+; CHECK:   vmovaps %xmm2, %xmm0
+; CHECK-NEXT: retq
+define <4 x float> @fnmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x float> %c.addr.0
+}
+
+declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+; CHECK-LABEL: fmaddsubpd_loop_256:
+; CHECK:   vfmaddsub231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
   br label %for.cond
 
@@ -24,9 +353,11 @@ for.end:
   ret <4 x double> %c.addr.0
 }
 
-; CHECK-LABEL: fmsubaddpd_loop:
-; CHECK:   vfmsubadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmsubaddpd_loop_256:
+; CHECK:   vfmsubadd231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
   br label %for.cond
 
@@ -48,9 +379,11 @@ for.end:
   ret <4 x double> %c.addr.0
 }
 
-; CHECK-LABEL: fmaddpd_loop:
-; CHECK:   vfmadd231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmaddpd_loop_256:
+; CHECK:   vfmadd231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
   br label %for.cond
 
@@ -72,9 +405,11 @@ for.end:
   ret <4 x double> %c.addr.0
 }
 
-; CHECK-LABEL: fmsubpd_loop:
-; CHECK:   vfmsub231pd        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+; CHECK-LABEL: fmsubpd_loop_256:
+; CHECK:   vfmsub231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
 entry:
   br label %for.cond
 
@@ -96,15 +431,71 @@ for.end:
   ret <4 x double> %c.addr.0
 }
 
+; CHECK-LABEL: fnmaddpd_loop_256:
+; CHECK:   vfnmadd231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubpd_loop_256:
+; CHECK:   vfnmsub231pd %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <4 x double> %c.addr.0
+}
+
 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
 
 
-; CHECK-LABEL: fmaddsubps_loop:
-; CHECK:   vfmaddsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmaddsubps_loop_256:
+; CHECK:   vfmaddsub231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmaddsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
 entry:
   br label %for.cond
 
@@ -126,9 +517,11 @@ for.end:
   ret <8 x float> %c.addr.0
 }
 
-; CHECK-LABEL: fmsubaddps_loop:
-; CHECK:   vfmsubadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmsubaddps_loop_256:
+; CHECK:   vfmsubadd231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmsubaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
 entry:
   br label %for.cond
 
@@ -150,9 +543,11 @@ for.end:
   ret <8 x float> %c.addr.0
 }
 
-; CHECK-LABEL: fmaddps_loop:
-; CHECK:   vfmadd231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmaddps_loop_256:
+; CHECK:   vfmadd231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
 entry:
   br label %for.cond
 
@@ -174,9 +569,11 @@ for.end:
   ret <8 x float> %c.addr.0
 }
 
-; CHECK-LABEL: fmsubps_loop:
-; CHECK:   vfmsub231ps        %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}}
-define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; CHECK-LABEL: fmsubps_loop_256:
+; CHECK:   vfmsub231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
 entry:
   br label %for.cond
 
@@ -198,7 +595,61 @@ for.end:
   ret <8 x float> %c.addr.0
 }
 
+; CHECK-LABEL: fnmaddps_loop_256:
+; CHECK:   vfnmadd231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fnmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
+; CHECK-LABEL: fnmsubps_loop_256:
+; CHECK:   vfnmsub231ps %ymm1, %ymm0, %ymm2
+; CHECK:   vmovaps %ymm2, %ymm0
+; CHECK-NEXT: retq
+define <8 x float> @fnmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
+entry:
+  br label %for.cond
+
+for.cond:
+  %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %iter
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  br label %for.inc
+
+for.inc:
+  %0 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0)
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret <8 x float> %c.addr.0
+}
+
 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
diff --git a/test/CodeGen/X86/fma-intrinsics-x86.ll b/test/CodeGen/X86/fma-intrinsics-x86.ll
index 881436386bac..cf4c8933fcab 100644
--- a/test/CodeGen/X86/fma-intrinsics-x86.ll
+++ b/test/CodeGen/X86/fma-intrinsics-x86.ll
@@ -1,95 +1,149 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
+; RUN: llc < %s -mtriple=x86_64-pc-windows -march=x86-64 -mcpu=core-avx2 -mattr=+fma,+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=corei7-avx -mattr=+fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
 
 ; VFMADD
 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_ss:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132ss     (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
+
+define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddss %xmm2, %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_sd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
+
+define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+  ret <2 x double> %res
+}
 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
 
 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmadd213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -97,90 +151,144 @@ declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4
 
 ; VFMSUB
 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_ss:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
+
+define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_sd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
+
+define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+  ret <2 x double> %res
+}
 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
 
 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsub213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -188,90 +296,144 @@ declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4
 
 ; VFNMADD
 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_ss:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
+
+define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_sd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
+
+define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+  ret <2 x double> %res
+}
 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
 
 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmadd213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -279,90 +441,144 @@ declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4
 
 ; VFNMSUB
 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_ss:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
+
+define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovaps	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+  ret <4 x float> %res
+}
 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_sd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
+
+define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd:
+; CHECK-NEXT:  # BB#0:
+;
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
+; CHECK-FMA-NEXT:    vmovapd	%xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0
+;
+; CHECK-NEXT: retq
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+  ret <2 x double> %res
+}
 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
 
 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfnmsub213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -370,60 +586,72 @@ declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4
 
 ; VFMADDSUB
 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmaddsub213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -431,60 +659,72 @@ declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>,
 
 ; VFMSUBADD
 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
 
 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %xmm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %xmm1, %xmm0
+;
+; CHECK-FMA-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0
+;
+; CHECK-FMA4-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
+;
+; CHECK-NEXT: retq
   %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
 
 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovaps (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213ps (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
 
 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
-; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
-; CHECK-FMA:       # BB#0:
-; CHECK-FMA-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0
-; CHECK-FMA-NEXT:    retq
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
+; CHECK-NEXT:  # BB#0:
 ;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK-FMA4-NEXT:    retq
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vmovapd (%{{(rcx|rdx)}}), %ymm{{0|1}}
+; CHECK-FMA-WIN-NEXT: vfmsubadd213pd (%r8), %ymm1, %ymm0
+;
+; CHECK-FMA-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0
+;
+; CHECK-FMA4-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
+;
+; CHECK-NEXT: retq
   %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
diff --git a/test/CodeGen/X86/fma-scalar-memfold.ll b/test/CodeGen/X86/fma-scalar-memfold.ll
new file mode 100644
index 000000000000..0ceaa562a5d4
--- /dev/null
+++ b/test/CodeGen/X86/fma-scalar-memfold.ll
@@ -0,0 +1,383 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s
+
+attributes #0 = { nounwind }
+
+declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
+
+declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define void @fmadd_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmadd_aab_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmadd_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmadd_aba_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmsub_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmsub_aab_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmsub_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fmsub_aba_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fnmadd_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmadd_aab_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fnmadd_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmadd_aba_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fnmsub_aab_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmsub_aab_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub213ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fnmsub_aba_ss(float* %a, float* %b) #0 {
+; CHECK-LABEL: fnmsub_aba_ss:
+; CHECK:      vmovss (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub132ss (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovss %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmadd_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmadd_aab_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fmadd_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmadd_aba_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmadd132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fmsub_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmsub_aab_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fmsub_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fmsub_aba_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfmsub132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fnmadd_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmadd_aab_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fnmadd_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmadd_aba_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmadd132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fnmsub_aab_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmsub_aab_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub213sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fnmsub_aba_sd(double* %a, double* %b) #0 {
+; CHECK-LABEL: fnmsub_aba_sd:
+; CHECK:      vmovsd (%rcx), %[[XMM:xmm[0-9]+]]
+; CHECK-NEXT: vfnmsub132sd (%rdx), %[[XMM]], %[[XMM]]
+; CHECK-NEXT: vmovlpd %[[XMM]], (%rcx)
+; CHECK-NEXT: ret
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+
diff --git a/test/CodeGen/X86/fma_patterns.ll b/test/CodeGen/X86/fma_patterns.ll
index a27b760face7..76a4acf00f90 100644
--- a/test/CodeGen/X86/fma_patterns.ll
+++ b/test/CodeGen/X86/fma_patterns.ll
@@ -1,212 +1,1195 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
 
-; CHECK: test_x86_fmadd_ps
-; CHECK: vfmadd213ps     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps
-; CHECK_FMA4: vfmaddps     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+;
+; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
+;
+
+define float @test_f32_fmadd(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f32_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f32_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul float %a0, %a1
+  %res = fadd float %x, %a2
+  ret float %res
+}
+
+define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f32_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f32_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
   %x = fmul <4 x float> %a0, %a1
   %res = fadd <4 x float> %x, %a2
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fmsub_ps
-; CHECK: fmsub213ps     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps
-; CHECK_FMA4: vfmsubps     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f32_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f32_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x float> %a0, %a1
+  %res = fadd <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+define double @test_f64_fmadd(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul double %a0, %a1
+  %res = fadd double %x, %a2
+  ret double %res
+}
+
+define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_2f64_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_2f64_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %x = fmul <2 x double> %a0, %a1
+  %res = fadd <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f64_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f64_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <4 x double> %a0, %a1
+  %res = fadd <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+;
+; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
+;
+
+define float @test_f32_fmsub(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f32_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f32_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul float %a0, %a1
+  %res = fsub float %x, %a2
+  ret float %res
+}
+
+define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f32_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f32_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
   %x = fmul <4 x float> %a0, %a1
   %res = fsub <4 x float> %x, %a2
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fnmadd_ps
-; CHECK: fnmadd213ps     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ps
-; CHECK_FMA4: vfnmaddps     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f32_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f32_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %x, %a2
+  ret <8 x float> %res
+}
+
+define double @test_f64_fmsub(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul double %a0, %a1
+  %res = fsub double %x, %a2
+  ret double %res
+}
+
+define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_2f64_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_2f64_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %x, %a2
+  ret <2 x double> %res
+}
+
+define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f64_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f64_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %x, %a2
+  ret <4 x double> %res
+}
+
+;
+; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
+;
+
+define float @test_f32_fnmadd(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f32_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f32_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul float %a0, %a1
+  %res = fsub float %a2, %x
+  ret float %res
+}
+
+define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f32_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f32_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
   %x = fmul <4 x float> %a0, %a1
   %res = fsub <4 x float> %a2, %x
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fnmsub_ps
-; CHECK: fnmsub213ps     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmsub_ps
-; CHECK_FMA4: fnmsubps     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f32_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f32_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x float> %a0, %a1
+  %res = fsub <8 x float> %a2, %x
+  ret <8 x float> %res
+}
+
+define double @test_f64_fnmadd(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul double %a0, %a1
+  %res = fsub double %a2, %x
+  ret double %res
+}
+
+define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_2f64_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_2f64_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %x = fmul <2 x double> %a0, %a1
+  %res = fsub <2 x double> %a2, %x
+  ret <2 x double> %res
+}
+
+define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f64_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f64_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <4 x double> %a0, %a1
+  %res = fsub <4 x double> %a2, %x
+  ret <4 x double> %res
+}
+
+;
+; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
+;
+
+define float @test_f32_fnmsub(float %a0, float %a1, float %a2) {
+; FMA-LABEL: test_f32_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f32_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f32_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul float %a0, %a1
+  %y = fsub float -0.000000e+00, %x
+  %res = fsub float %y, %a2
+  ret float %res
+}
+
+define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f32_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f32_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
   %x = fmul <4 x float> %a0, %a1
   %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
   %res = fsub <4 x float> %y, %a2
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fmadd_ps_y
-; CHECK: vfmadd213ps     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps_y
-; CHECK_FMA4: vfmaddps     %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  %x = fmul <8 x float> %a0, %a1
-  %res = fadd <8 x float> %x, %a2
-  ret <8 x float> %res
-}
-
-; CHECK: test_x86_fmsub_ps_y
-; CHECK: vfmsub213ps     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps_y
-; CHECK_FMA4: vfmsubps     %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  %x = fmul <8 x float> %a0, %a1
-  %res = fsub <8 x float> %x, %a2
-  ret <8 x float> %res
-}
-
-; CHECK: test_x86_fnmadd_ps_y
-; CHECK: vfnmadd213ps     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ps_y
-; CHECK_FMA4: vfnmaddps     %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <8 x float> @test_x86_fnmadd_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
-  %x = fmul <8 x float> %a0, %a1
-  %res = fsub <8 x float> %a2, %x
-  ret <8 x float> %res
-}
-
-; CHECK: test_x86_fnmsub_ps_y
-; CHECK: vfnmsub213ps     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-define <8 x float> @test_x86_fnmsub_ps_y(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+; FMA-LABEL: test_8f32_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f32_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f32_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
   %x = fmul <8 x float> %a0, %a1
   %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
   %res = fsub <8 x float> %y, %a2
   ret <8 x float> %res
 }
 
-; CHECK: test_x86_fmadd_pd_y
-; CHECK: vfmadd213pd     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_pd_y
-; CHECK_FMA4: vfmaddpd     %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <4 x double> @test_x86_fmadd_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
-  %x = fmul <4 x double> %a0, %a1
-  %res = fadd <4 x double> %x, %a2
-  ret <4 x double> %res
+define double @test_f64_fnmsub(double %a0, double %a1, double %a2) {
+; FMA-LABEL: test_f64_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul double %a0, %a1
+  %y = fsub double -0.000000e+00, %x
+  %res = fsub double %y, %a2
+  ret double %res
 }
 
-; CHECK: test_x86_fmsub_pd_y
-; CHECK: vfmsub213pd     %ymm2, %ymm1, %ymm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_pd_y
-; CHECK_FMA4: vfmsubpd     %ymm2, %ymm1, %ymm0, %ymm0
-; CHECK_FMA4: ret
-define <4 x double> @test_x86_fmsub_pd_y(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
-  %x = fmul <4 x double> %a0, %a1
-  %res = fsub <4 x double> %x, %a2
-  ret <4 x double> %res
-}
-
-; CHECK: test_x86_fmsub_pd
-; CHECK: vfmsub213pd     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_pd
-; CHECK_FMA4: vfmsubpd     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <2 x double> @test_x86_fmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_2f64_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_2f64_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
   %x = fmul <2 x double> %a0, %a1
-  %res = fsub <2 x double> %x, %a2
+  %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x
+  %res = fsub <2 x double> %y, %a2
   ret <2 x double> %res
 }
 
-; CHECK: test_x86_fnmadd_ss
-; CHECK: vfnmadd213ss    %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_ss
-; CHECK_FMA4: vfnmaddss    %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define float @test_x86_fnmadd_ss(float %a0, float %a1, float %a2) {
-  %x = fmul float %a0, %a1
-  %res = fsub float %a2, %x
-  ret float %res
+define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+; FMA-LABEL: test_4f64_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f64_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f64_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %x = fmul <4 x double> %a0, %a1
+  %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
+  %res = fsub <4 x double> %y, %a2
+  ret <4 x double> %res
 }
 
-; CHECK: test_x86_fnmadd_sd
-; CHECK: vfnmadd213sd     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmadd_sd
-; CHECK_FMA4: vfnmaddsd     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define double @test_x86_fnmadd_sd(double %a0, double %a1, double %a2) {
-  %x = fmul double %a0, %a1
-  %res = fsub double %a2, %x
-  ret double %res
-}
+;
+; Load Folding Patterns
+;
 
-; CHECK: test_x86_fmsub_sd
-; CHECK: vfmsub213sd     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_sd
-; CHECK_FMA4: vfmsubsd     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define double @test_x86_fmsub_sd(double %a0, double %a1, double %a2) {
-  %x = fmul double %a0, %a1
-  %res = fsub double %x, %a2
-  ret double %res
-}
-
-; CHECK: test_x86_fnmsub_ss
-; CHECK: vfnmsub213ss     %xmm2, %xmm1, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fnmsub_ss
-; CHECK_FMA4: vfnmsubss     %xmm2, %xmm1, %xmm0, %xmm0
-; CHECK_FMA4: ret
-define float @test_x86_fnmsub_ss(float %a0, float %a1, float %a2) {
-  %x = fsub float -0.000000e+00, %a0
-  %y = fmul float %x, %a1
-  %res = fsub float %y, %a2
-  ret float %res
-}
-
-; CHECK: test_x86_fmadd_ps_load
-; CHECK: vmovaps         (%rdi), %xmm2
-; CHECK: vfmadd213ps     %xmm1, %xmm2, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmadd_ps_load
-; CHECK_FMA4: vfmaddps     %xmm1, (%rdi), %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmadd_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+define <4 x float> @test_4f32_fmadd_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
+; FMA-LABEL: test_4f32_fmadd_load:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd132ps (%rdi), %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_4f32_fmadd_load:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_4f32_fmadd_load:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps (%rdi), %xmm2
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
   %x = load <4 x float>, <4 x float>* %a0
   %y = fmul <4 x float> %x, %a1
   %res = fadd <4 x float> %y, %a2
   ret <4 x float> %res
 }
 
-; CHECK: test_x86_fmsub_ps_load
-; CHECK: vmovaps         (%rdi), %xmm2
-; CHECK: fmsub213ps     %xmm1, %xmm2, %xmm0
-; CHECK: ret
-; CHECK_FMA4: test_x86_fmsub_ps_load
-; CHECK_FMA4: vfmsubps     %xmm1, (%rdi), %xmm0, %xmm0
-; CHECK_FMA4: ret
-define <4 x float> @test_x86_fmsub_ps_load(<4 x float>* %a0, <4 x float> %a1, <4 x float> %a2) {
-  %x = load <4 x float>, <4 x float>* %a0
-  %y = fmul <4 x float> %x, %a1
-  %res = fsub <4 x float> %y, %a2
-  ret <4 x float> %res
+define <2 x double> @test_2f64_fmsub_load(<2 x double>* %a0, <2 x double> %a1, <2 x double> %a2) {
+; FMA-LABEL: test_2f64_fmsub_load:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub132pd (%rdi), %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_2f64_fmsub_load:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_2f64_fmsub_load:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovapd (%rdi), %xmm2
+; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %x = load <2 x double>, <2 x double>* %a0
+  %y = fmul <2 x double> %x, %a1
+  %res = fsub <2 x double> %y, %a2
+  ret <2 x double> %res
 }
 
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_add_x_one_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_add_x_one_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_add_x_one_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <4 x float> %a, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_add_x_one:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_add_x_one:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_add_x_one:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <4 x float> %y, %a
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_add_x_negone_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_add_x_negone_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_add_x_negone_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <4 x float> %a, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_add_x_negone:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_add_x_negone:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_add_x_negone:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <4 x float> %y, %a
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_one_x_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_one_x_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_one_x_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  %m = fmul <4 x float> %s, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_one_x:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_one_x:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_one_x:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  %m = fmul <4 x float> %y, %s
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_negone_x_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_negone_x_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_negone_x_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
+  %m = fmul <4 x float> %s, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_negone_x:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_negone_x:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_negone_x:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x
+  %m = fmul <4 x float> %y, %s
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_x_one_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_x_one_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_x_one_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <4 x float> %s, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_x_one:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_x_one:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_x_one:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <4 x float> %y, %s
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_sub_x_negone_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_sub_x_negone_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_sub_x_negone_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <4 x float> %s, %y
+  ret <4 x float> %m
+}
+
+define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) {
+; FMA-LABEL: test_v4f32_mul_y_sub_x_negone:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_mul_y_sub_x_negone:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_mul_y_sub_x_negone:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <4 x float> %y, %s
+  ret <4 x float> %m
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+define float @test_f32_interp(float %x, float %y, float %t) {
+; FMA-LABEL: test_f32_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
+; FMA-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f32_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddss %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT:    vfmaddss %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f32_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub float 1.0, %t
+  %tx = fmul float %x, %t
+  %ty = fmul float %y, %t1
+  %r = fadd float %tx, %ty
+  ret float %r
+}
+
+define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) {
+; FMA-LABEL: test_v4f32_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
+; FMA-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT:    vfmaddps %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm3
+; AVX512-NEXT:    vfmadd213ps %xmm3, %xmm2, %xmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul <4 x float> %x, %t
+  %ty = fmul <4 x float> %y, %t1
+  %r = fadd <4 x float> %tx, %ty
+  ret <4 x float> %r
+}
+
+define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) {
+; FMA-LABEL: test_v8f32_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
+; FMA-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f32_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %ymm1, %ymm1, %ymm2, %ymm1
+; FMA4-NEXT:    vfmaddps %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f32_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm1, %ymm3
+; AVX512-NEXT:    vfmadd213ps %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul <8 x float> %x, %t
+  %ty = fmul <8 x float> %y, %t1
+  %r = fadd <8 x float> %tx, %ty
+  ret <8 x float> %r
+}
+
+define double @test_f64_interp(double %x, double %y, double %t) {
+; FMA-LABEL: test_f64_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
+; FMA-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddsd %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT:    vfmaddsd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm0, %xmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub double 1.0, %t
+  %tx = fmul double %x, %t
+  %ty = fmul double %y, %t1
+  %r = fadd double %tx, %ty
+  ret double %r
+}
+
+define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) {
+; FMA-LABEL: test_v2f64_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
+; FMA-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v2f64_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %xmm1, %xmm1, %xmm2, %xmm1
+; FMA4-NEXT:    vfmaddpd %xmm1, %xmm2, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v2f64_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm1, %xmm3
+; AVX512-NEXT:    vfmadd213pd %xmm3, %xmm2, %xmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
+  %tx = fmul <2 x double> %x, %t
+  %ty = fmul <2 x double> %y, %t1
+  %r = fadd <2 x double> %tx, %ty
+  ret <2 x double> %r
+}
+
+define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) {
+; FMA-LABEL: test_v4f64_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
+; FMA-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f64_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm1, %ymm1, %ymm2, %ymm1
+; FMA4-NEXT:    vfmaddpd %ymm1, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f64_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm1, %ymm3
+; AVX512-NEXT:    vfmadd213pd %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
+  %tx = fmul <4 x double> %x, %t
+  %ty = fmul <4 x double> %y, %t1
+  %r = fadd <4 x double> %tx, %ty
+  ret <4 x double> %r
+}
+
+;
+; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
+;
+
+define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; FMA-LABEL: test_v4f32_fneg_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <4 x float> %a0, %a1
+  %add = fadd <4 x float> %mul, %a2
+  %neg = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  ret <4 x float> %neg
+}
+
+define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; FMA-LABEL: test_v4f64_fneg_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %mul = fmul <4 x double> %a0, %a1
+  %sub = fsub <4 x double> %mul, %a2
+  %neg = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  ret <4 x double> %neg
+}
+
+define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; FMA-LABEL: test_v4f32_fneg_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <4 x float> %a0, %a1
+  %neg0 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+  %add = fadd <4 x float> %neg0, %a2
+  %neg1 = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  ret <4 x float> %neg1
+}
+
+define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; FMA-LABEL: test_v4f64_fneg_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %mul = fmul <4 x double> %a0, %a1
+  %neg0 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+  %sub = fsub <4 x double> %neg0, %a2
+  %neg1 = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  ret <4 x double> %neg1
+}
+
+;
+; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+;
+
+define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 {
+; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; FMA:       # BB#0:
+; FMA-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vmulps {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmulps {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd <4 x float> %m0, %m1
+  ret <4 x float> %a
+}
+
+;
+; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+;
+
+define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 {
+; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %xmm1, {{.*}}(%rip), %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
+  %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd <4 x float> %m1, %y
+  ret <4 x float> %a
+}
+
+; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
+
+define double @test_f64_fneg_fmul(double %x, double %y) #0 {
+; FMA-LABEL: test_f64_fneg_fmul:
+; FMA:       # BB#0:
+; FMA-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_f64_fneg_fmul:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
+; FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_f64_fneg_fmul:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %m = fmul nsz double %x, %y
+  %n = fsub double -0.0, %m
+  ret double %n
+}
+
+define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
+; FMA-LABEL: test_v4f32_fneg_fmul:
+; FMA:       # BB#0:
+; FMA-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f32_fneg_fmul:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f32_fneg_fmul:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %m = fmul nsz <4 x float> %x, %y
+  %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m
+  ret <4 x float> %n
+}
+
+define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
+; FMA-LABEL: test_v4f64_fneg_fmul:
+; FMA:       # BB#0:
+; FMA-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
+; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v4f64_fneg_fmul:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vxorpd %ymm2, %ymm2, %ymm2
+; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v4f64_fneg_fmul:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vxorps %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %m = fmul nsz <4 x double> %x, %y
+  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  ret <4 x double> %n
+}
+
+define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 {
+; ALL-LABEL: test_v4f64_fneg_fmul_no_nsz:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; ALL-NEXT:    vxorpd {{.*}}(%rip), %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %m = fmul <4 x double> %x, %y
+  %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  ret <4 x double> %n
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fma_patterns_wide.ll b/test/CodeGen/X86/fma_patterns_wide.ll
index 04db2d76cd8c..7b6509ad51c7 100644
--- a/test/CodeGen/X86/fma_patterns_wide.ll
+++ b/test/CodeGen/X86/fma_patterns_wide.ll
@@ -1,84 +1,821 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512
 
-; CHECK-LABEL: test_x86_fmadd_ps_y_wide
-; CHECK: vfmadd213ps
-; CHECK: vfmadd213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmadd_ps_y_wide
-; CHECK_FMA4: vfmaddps
-; CHECK_FMA4: vfmaddps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+;
+; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z)
+;
+
+define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_16f32_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_16f32_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fadd <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fmsub_ps_y_wide
-; CHECK: vfmsub213ps
-; CHECK: vfmsub213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmsub_ps_y_wide
-; CHECK_FMA4: vfmsubps
-; CHECK_FMA4: vfmsubps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f64_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f64_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z)
+;
+
+define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_16f32_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_16f32_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fsub <16 x float> %x, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fnmadd_ps_y_wide
-; CHECK: vfnmadd213ps
-; CHECK: vfnmadd213ps
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fnmadd_ps_y_wide
-; CHECK_FMA4: vfnmaddps
-; CHECK_FMA4: vfnmaddps
-; CHECK_FMA4: ret
-define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f64_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f64_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z)
+;
+
+define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmadd213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_16f32_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_16f32_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %res = fsub <16 x float> %a2, %x
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fnmsub_ps_y_wide
-; CHECK: vfnmsub213ps
-; CHECK: vfnmsub213ps
-; CHECK: ret
-define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f64_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f64_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %a2, %x
+  ret <8 x double> %res
+}
+
+;
+; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z)
+;
+
+define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_16f32_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_16f32_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
   %x = fmul <16 x float> %a0, %a1
   %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
   %res = fsub <16 x float> %y, %a2
   ret <16 x float> %res
 }
 
-; CHECK-LABEL: test_x86_fmadd_pd_y_wide
-; CHECK: vfmadd213pd
-; CHECK: vfmadd213pd
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmadd_pd_y_wide
-; CHECK_FMA4: vfmaddpd
-; CHECK_FMA4: vfmaddpd
-; CHECK_FMA4: ret
-define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f64_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f64_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
   %x = fmul <8 x double> %a0, %a1
-  %res = fadd <8 x double> %x, %a2
+  %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x
+  %res = fsub <8 x double> %y, %a2
   ret <8 x double> %res
 }
 
-; CHECK-LABEL: test_x86_fmsub_pd_y_wide
-; CHECK: vfmsub213pd
-; CHECK: vfmsub213pd
-; CHECK: ret
-; CHECK_FMA4-LABEL: test_x86_fmsub_pd_y_wide
-; CHECK_FMA4: vfmsubpd
-; CHECK_FMA4: vfmsubpd
-; CHECK_FMA4: ret
-define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-  %x = fmul <8 x double> %a0, %a1
-  %res = fsub <8 x double> %x, %a2
+;
+; Load Folding Patterns
+;
+
+define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) {
+; FMA-LABEL: test_16f32_fmadd_load:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd132ps (%rdi), %ymm2, %ymm0
+; FMA-NEXT:    vfmadd132ps 32(%rdi), %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_16f32_fmadd_load:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm2, (%rdi), %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_16f32_fmadd_load:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps (%rdi), %zmm2
+; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %x = load <16 x float>, <16 x float>* %a0
+  %y = fmul <16 x float> %x, %a1
+  %res = fadd <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) {
+; FMA-LABEL: test_8f64_fmsub_load:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub132pd (%rdi), %ymm2, %ymm0
+; FMA-NEXT:    vfmsub132pd 32(%rdi), %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_8f64_fmsub_load:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_8f64_fmsub_load:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovapd (%rdi), %zmm2
+; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %x = load <8 x double>, <8 x double>* %a0
+  %y = fmul <8 x double> %x, %a1
+  %res = fsub <8 x double> %y, %a2
   ret <8 x double> %res
 }
+
+;
+; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y)
+;
+
+define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_add_x_one_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_add_x_one_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_add_x_one_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <16 x float> %a, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_add_x_one:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_add_x_one:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_add_x_one:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
+  %m = fmul <8 x double> %y, %a
+  ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_add_x_negone_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_add_x_negone_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_add_x_negone_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <16 x float> %a, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_add_x_negone:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_add_x_negone:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_add_x_negone:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+  %m = fmul <8 x double> %y, %a
+  ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_one_x_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_one_x_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_one_x_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  %m = fmul <16 x float> %s, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_one_x:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_one_x:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_one_x:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x
+  %m = fmul <8 x double> %y, %s
+  ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_negone_x_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_negone_x_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_negone_x_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x
+  %m = fmul <16 x float> %s, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_negone_x:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_negone_x:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_negone_x:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x
+  %m = fmul <8 x double> %y, %s
+  ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_x_one_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_x_one_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_x_one_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
+  %m = fmul <16 x float> %s, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_x_one:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_x_one:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_x_one:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>
+  %m = fmul <8 x double> %y, %s
+  ret <8 x double> %m
+}
+
+define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) {
+; FMA-LABEL: test_v16f32_mul_sub_x_negone_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213ps %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213ps %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_mul_sub_x_negone_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_mul_sub_x_negone_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>
+  %m = fmul <16 x float> %s, %y
+  ret <16 x float> %m
+}
+
+define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) {
+; FMA-LABEL: test_v8f64_mul_y_sub_x_negone:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm2, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213pd %ymm3, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_mul_y_sub_x_negone:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_mul_y_sub_x_negone:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
+  %m = fmul <8 x double> %y, %s
+  ret <8 x double> %m
+}
+
+;
+; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y))
+;
+
+define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) {
+; FMA-LABEL: test_v16f32_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213ps %ymm3, %ymm5, %ymm3
+; FMA-NEXT:    vfnmadd213ps %ymm2, %ymm4, %ymm2
+; FMA-NEXT:    vfmadd213ps %ymm2, %ymm4, %ymm0
+; FMA-NEXT:    vfmadd213ps %ymm3, %ymm5, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3
+; FMA4-NEXT:    vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2
+; FMA4-NEXT:    vfmaddps %ymm2, %ymm4, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm3
+; AVX512-NEXT:    vfmadd213ps %zmm3, %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
+  %tx = fmul <16 x float> %x, %t
+  %ty = fmul <16 x float> %y, %t1
+  %r = fadd <16 x float> %tx, %ty
+  ret <16 x float> %r
+}
+
+define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) {
+; FMA-LABEL: test_v8f64_interp:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm3, %ymm5, %ymm3
+; FMA-NEXT:    vfnmadd213pd %ymm2, %ymm4, %ymm2
+; FMA-NEXT:    vfmadd213pd %ymm2, %ymm4, %ymm0
+; FMA-NEXT:    vfmadd213pd %ymm3, %ymm5, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_interp:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3
+; FMA4-NEXT:    vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2
+; FMA4-NEXT:    vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_interp:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmovaps %zmm2, %zmm3
+; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm3
+; AVX512-NEXT:    vfmadd213pd %zmm3, %zmm2, %zmm0
+; AVX512-NEXT:    retq
+  %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
+  %tx = fmul <8 x double> %x, %t
+  %ty = fmul <8 x double> %y, %t1
+  %r = fadd <8 x double> %tx, %ty
+  ret <8 x double> %r
+}
+
+;
+; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z)
+;
+
+define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+; FMA-LABEL: test_v16f32_fneg_fmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <16 x float> %a0, %a1
+  %add = fadd <16 x float> %mul, %a2
+  %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  ret <16 x float> %neg
+}
+
+define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfnmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfnmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <8 x double> %a0, %a1
+  %sub = fsub <8 x double> %mul, %a2
+  %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  ret <8 x double> %neg
+}
+
+define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 {
+; FMA-LABEL: test_v16f32_fneg_fnmadd:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmsub213ps %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fnmadd:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmsubps %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fnmadd:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <16 x float> %a0, %a1
+  %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul
+  %add = fadd <16 x float> %neg0, %a2
+  %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add
+  ret <16 x float> %neg1
+}
+
+define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 {
+; FMA-LABEL: test_v8f64_fneg_fnmsub:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfmadd213pd %ymm5, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fnmsub:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fnmsub:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %mul = fmul <8 x double> %a0, %a1
+  %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul
+  %sub = fsub <8 x double> %neg0, %a2
+  %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub
+  ret <8 x double> %neg1
+}
+
+;
+; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
+;
+
+define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 {
+; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; FMA:       # BB#0:
+; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
+; FMA-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
+; FMA4-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0>
+  %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd <16 x float> %m0, %m1
+  ret <16 x float> %a
+}
+
+;
+; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
+;
+
+define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 {
+; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; FMA:       # BB#0:
+; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0
+; FMA-NEXT:    vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0
+; FMA4-NEXT:    vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
+  %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>
+  %a  = fadd <16 x float> %m1, %y
+  ret <16 x float> %a
+}
+
+; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0)
+
+define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 {
+; FMA-LABEL: test_v16f32_fneg_fmul:
+; FMA:       # BB#0:
+; FMA-NEXT:    vxorps %ymm4, %ymm4, %ymm4
+; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213ps %ymm4, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v16f32_fneg_fmul:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vxorps %ymm4, %ymm4, %ymm4
+; FMA4-NEXT:    vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v16f32_fneg_fmul:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT:    vfnmsub213ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %m = fmul nsz <16 x float> %x, %y
+  %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m
+  ret <16 x float> %n
+}
+
+define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmul:
+; FMA:       # BB#0:
+; FMA-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
+; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm2, %ymm0
+; FMA-NEXT:    vfnmsub213pd %ymm4, %ymm3, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmul:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vxorpd %ymm4, %ymm4, %ymm4
+; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmul:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; AVX512-NEXT:    vfnmsub213pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %m = fmul nsz <8 x double> %x, %y
+  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  ret <8 x double> %n
+}
+
+define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 {
+; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; FMA:       # BB#0:
+; FMA-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
+; FMA-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
+; FMA-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; FMA-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
+; FMA-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
+; FMA-NEXT:    retq
+;
+; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; FMA4:       # BB#0:
+; FMA4-NEXT:    vmulpd %ymm3, %ymm1, %ymm1
+; FMA4-NEXT:    vmulpd %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vmovapd {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
+; FMA4-NEXT:    vxorpd %ymm2, %ymm0, %ymm0
+; FMA4-NEXT:    vxorpd %ymm2, %ymm1, %ymm1
+; FMA4-NEXT:    retq
+;
+; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vmulpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vxorpd {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT:    retq
+  %m = fmul <8 x double> %x, %y
+  %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m
+  ret <8 x double> %n
+}
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/fmaxnum.ll b/test/CodeGen/X86/fmaxnum.ll
index 23678c46dba0..ebfbd064572a 100644
--- a/test/CodeGen/X86/fmaxnum.ll
+++ b/test/CodeGen/X86/fmaxnum.ll
@@ -1,4 +1,5 @@
-; RUN: llc  -march=x86 -mtriple=i386-linux-gnu  < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
 
 declare float @fmaxf(float, float)
 declare double @fmax(double, double)
@@ -7,44 +8,232 @@ declare float @llvm.maxnum.f32(float, float)
 declare double @llvm.maxnum.f64(double, double)
 declare x86_fp80 @llvm.maxnum.f80(x86_fp80, x86_fp80)
 
+declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>)
+declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
+
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
 ; CHECK-LABEL: @test_fmaxf
-; CHECK: calll fmaxf
+; SSE:         movaps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm2, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm3
+; SSE-NEXT:    andps %xmm1, %xmm3
+; SSE-NEXT:    maxss %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm1, %xmm2
+; SSE-NEXT:    orps %xmm3, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxss %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define float @test_fmaxf(float %x, float %y) {
   %z = call float @fmaxf(float %x, float %y) readnone
   ret float %z
 }
 
+; CHECK-LABEL: @test_fmaxf_minsize
+; CHECK:       jmp fmaxf
+define float @test_fmaxf_minsize(float %x, float %y) minsize {
+  %z = call float @fmaxf(float %x, float %y) readnone
+  ret float %z
+}
+
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
 ; CHECK-LABEL: @test_fmax
-; CHECK: calll fmax
+; SSE:         movapd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordsd %xmm2, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm3
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    maxsd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm1, %xmm2
+; SSE-NEXT:    orpd %xmm3, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define double @test_fmax(double %x, double %y) {
   %z = call double @fmax(double %x, double %y) readnone
   ret double %z
 }
 
 ; CHECK-LABEL: @test_fmaxl
-; CHECK: calll fmaxl
+; CHECK: callq fmaxl
 define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
   %z = call x86_fp80 @fmaxl(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmaxf
-; CHECK: calll fmaxf
+; SSE:         movaps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm2, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm3
+; SSE-NEXT:    andps %xmm1, %xmm3
+; SSE-NEXT:    maxss %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm1, %xmm2
+; SSE-NEXT:    orps %xmm3, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxss %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define float @test_intrinsic_fmaxf(float %x, float %y) {
   %z = call float @llvm.maxnum.f32(float %x, float %y) readnone
   ret float %z
 }
 
+
 ; CHECK-LABEL: @test_intrinsic_fmax
-; CHECK: calll fmax
+; SSE:         movapd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordsd %xmm2, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm3
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    maxsd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm1, %xmm2
+; SSE-NEXT:    orpd %xmm3, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define double @test_intrinsic_fmax(double %x, double %y) {
   %z = call double @llvm.maxnum.f64(double %x, double %y) readnone
   ret double %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmaxl
-; CHECK: calll fmaxl
+; CHECK: callq fmaxl
 define x86_fp80 @test_intrinsic_fmaxl(x86_fp80 %x, x86_fp80 %y) {
   %z = call x86_fp80 @llvm.maxnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
 }
+
+; CHECK-LABEL: @test_intrinsic_fmax_v2f32
+; SSE:         movaps %xmm1, %xmm2
+; SSE-NEXT:    maxps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordps %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxps %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
+define <2 x float> @test_intrinsic_fmax_v2f32(<2 x float> %x, <2 x float> %y) {
+  %z = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
+  ret <2 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v4f32
+; SSE:         movaps %xmm1, %xmm2
+; SSE-NEXT:    maxps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordps %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxps %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
+define <4 x float> @test_intrinsic_fmax_v4f32(<4 x float> %x, <4 x float> %y) {
+  %z = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
+  ret <4 x float> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v2f64
+; SSE:         movapd %xmm1, %xmm2
+; SSE-NEXT:    maxpd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm2, %xmm0
+; SSE-NEXT:    orpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxpd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
+define <2 x double> @test_intrinsic_fmax_v2f64(<2 x double> %x, <2 x double> %y) {
+  %z = call <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
+  ret <2 x double> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v4f64
+; SSE:         movapd  %xmm2, %xmm4
+; SSE-NEXT:    maxpd %xmm0, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm2
+; SSE-NEXT:    andnpd  %xmm4, %xmm0
+; SSE-NEXT:    orpd  %xmm2, %xmm0
+; SSE-NEXT:    movapd  %xmm3, %xmm2
+; SSE-NEXT:    maxpd %xmm1, %xmm2
+; SSE-NEXT:    cmpunordpd  %xmm1, %xmm1
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    andnpd  %xmm2, %xmm1
+; SSE-NEXT:    orpd  %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxpd  %ymm0, %ymm1, %ymm2
+; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; AVX-NEXT:    retq
+define <4 x double> @test_intrinsic_fmax_v4f64(<4 x double> %x, <4 x double> %y) {
+  %z = call <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
+  ret <4 x double> %z
+}
+
+; CHECK-LABEL: @test_intrinsic_fmax_v8f64
+; SSE:         movapd  %xmm4, %xmm8
+; SSE-NEXT:    maxpd %xmm0, %xmm8
+; SSE-NEXT:    cmpunordpd  %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm4
+; SSE-NEXT:    andnpd  %xmm8, %xmm0
+; SSE-NEXT:    orpd  %xmm4, %xmm0
+; SSE-NEXT:    movapd  %xmm5, %xmm4
+; SSE-NEXT:    maxpd %xmm1, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm1, %xmm1
+; SSE-NEXT:    andpd %xmm1, %xmm5
+; SSE-NEXT:    andnpd  %xmm4, %xmm1
+; SSE-NEXT:    orpd  %xmm5, %xmm1
+; SSE-NEXT:    movapd  %xmm6, %xmm4
+; SSE-NEXT:    maxpd %xmm2, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm2, %xmm2
+; SSE-NEXT:    andpd %xmm2, %xmm6
+; SSE-NEXT:    andnpd  %xmm4, %xmm2
+; SSE-NEXT:    orpd  %xmm6, %xmm2
+; SSE-NEXT:    movapd  %xmm7, %xmm4
+; SSE-NEXT:    maxpd %xmm3, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm3, %xmm3
+; SSE-NEXT:    andpd %xmm3, %xmm7
+; SSE-NEXT:    andnpd  %xmm4, %xmm3
+; SSE-NEXT:    orpd  %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX:         vmaxpd  %ymm0, %ymm2, %ymm4
+; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT:    vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
+; AVX-NEXT:    vmaxpd  %ymm1, %ymm3, %ymm2
+; AVX-NEXT:    vcmpunordpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT:    vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    retq
+define <8 x double> @test_intrinsic_fmax_v8f64(<8 x double> %x, <8 x double> %y) {
+  %z = call <8 x double> @llvm.maxnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
+  ret <8 x double> %z
+}
+
diff --git a/test/CodeGen/X86/fminnum.ll b/test/CodeGen/X86/fminnum.ll
index 1e33cf4696af..afe8b804f267 100644
--- a/test/CodeGen/X86/fminnum.ll
+++ b/test/CodeGen/X86/fminnum.ll
@@ -1,4 +1,5 @@
-; RUN: llc  -march=x86 -mtriple=i386-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx  < %s | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
 
 declare float @fminf(float, float)
 declare double @fmin(double, double)
@@ -10,85 +11,219 @@ declare x86_fp80 @llvm.minnum.f80(x86_fp80, x86_fp80)
 declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>)
 declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>)
 declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>)
 declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
 
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
 ; CHECK-LABEL: @test_fminf
-; CHECK: jmp fminf
+; SSE:         movaps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm2, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm3
+; SSE-NEXT:    andps %xmm1, %xmm3
+; SSE-NEXT:    minss %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm1, %xmm2
+; SSE-NEXT:    orps %xmm3, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminss %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define float @test_fminf(float %x, float %y) {
   %z = call float @fminf(float %x, float %y) readnone
   ret float %z
 }
 
+; FIXME: As the vector tests show, the SSE run shouldn't need this many moves.
+
 ; CHECK-LABEL: @test_fmin
-; CHECK: jmp fmin
+; SSE:         movapd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordsd %xmm2, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm3
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    minsd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm1, %xmm2
+; SSE-NEXT:    orpd %xmm3, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define double @test_fmin(double %x, double %y) {
   %z = call double @fmin(double %x, double %y) readnone
   ret double %z
 }
 
 ; CHECK-LABEL: @test_fminl
-; CHECK: calll fminl
+; CHECK: callq fminl
 define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
   %z = call x86_fp80 @fminl(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fminf
-; CHECK: jmp fminf
+; SSE:         movaps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm2, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm3
+; SSE-NEXT:    andps %xmm1, %xmm3
+; SSE-NEXT:    minss %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm1, %xmm2
+; SSE-NEXT:    orps %xmm3, %xmm2
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminss %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define float @test_intrinsic_fminf(float %x, float %y) {
   %z = call float @llvm.minnum.f32(float %x, float %y) readnone
   ret float %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmin
-; CHECK: jmp fmin
+; SSE:         movapd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordsd %xmm2, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm3
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    minsd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm1, %xmm2
+; SSE-NEXT:    orpd %xmm3, %xmm2
+; SSE-NEXT:    movapd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminsd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define double @test_intrinsic_fmin(double %x, double %y) {
   %z = call double @llvm.minnum.f64(double %x, double %y) readnone
   ret double %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fminl
-; CHECK: calll fminl
+; CHECK: callq fminl
 define x86_fp80 @test_intrinsic_fminl(x86_fp80 %x, x86_fp80 %y) {
   %z = call x86_fp80 @llvm.minnum.f80(x86_fp80 %x, x86_fp80 %y) readnone
   ret x86_fp80 %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmin_v2f32
-; CHECK: calll fminf
-; CHECK: calll fminf
+; SSE:         movaps %xmm1, %xmm2
+; SSE-NEXT:    minps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordps %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminps %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define <2 x float> @test_intrinsic_fmin_v2f32(<2 x float> %x, <2 x float> %y) {
   %z = call <2 x float> @llvm.minnum.v2f32(<2 x float> %x, <2 x float> %y) readnone
   ret <2 x float> %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmin_v4f32
-; CHECK: calll fminf
-; CHECK: calll fminf
-; CHECK: calll fminf
-; CHECK: calll fminf
+; SSE:         movaps %xmm1, %xmm2
+; SSE-NEXT:    minps %xmm0, %xmm2
+; SSE-NEXT:    cmpunordps %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminps %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define <4 x float> @test_intrinsic_fmin_v4f32(<4 x float> %x, <4 x float> %y) {
   %z = call <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) readnone
   ret <4 x float> %z
 }
 
 ; CHECK-LABEL: @test_intrinsic_fmin_v2f64
-; CHECK: calll fmin
-; CHECK: calll fmin
+; SSE:         movapd %xmm1, %xmm2
+; SSE-NEXT:    minpd %xmm0, %xmm2
+; SSE-NEXT:    cmpunordpd %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm1
+; SSE-NEXT:    andnpd %xmm2, %xmm0
+; SSE-NEXT:    orpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX:         vminpd %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vcmpunordpd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
+; AVX-NEXT:    retq
 define <2 x double> @test_intrinsic_fmin_v2f64(<2 x double> %x, <2 x double> %y) {
   %z = call <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) readnone
   ret <2 x double> %z
 }
 
+; CHECK-LABEL: @test_intrinsic_fmin_v4f64
+; SSE:         movapd  %xmm2, %xmm4
+; SSE-NEXT:    minpd %xmm0, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm2
+; SSE-NEXT:    andnpd  %xmm4, %xmm0
+; SSE-NEXT:    orpd  %xmm2, %xmm0
+; SSE-NEXT:    movapd  %xmm3, %xmm2
+; SSE-NEXT:    minpd %xmm1, %xmm2
+; SSE-NEXT:    cmpunordpd  %xmm1, %xmm1
+; SSE-NEXT:    andpd %xmm1, %xmm3
+; SSE-NEXT:    andnpd  %xmm2, %xmm1
+; SSE-NEXT:    orpd  %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX:         vminpd  %ymm0, %ymm1, %ymm2
+; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT:    vblendvpd %ymm0, %ymm1, %ymm2, %ymm0
+; AVX-NEXT:    retq
+define <4 x double> @test_intrinsic_fmin_v4f64(<4 x double> %x, <4 x double> %y) {
+  %z = call <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone
+  ret <4 x double> %z
+}
+
 ; CHECK-LABEL: @test_intrinsic_fmin_v8f64
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
-; CHECK: calll fmin
+; SSE:         movapd  %xmm4, %xmm8
+; SSE-NEXT:    minpd %xmm0, %xmm8
+; SSE-NEXT:    cmpunordpd  %xmm0, %xmm0
+; SSE-NEXT:    andpd %xmm0, %xmm4
+; SSE-NEXT:    andnpd  %xmm8, %xmm0
+; SSE-NEXT:    orpd  %xmm4, %xmm0
+; SSE-NEXT:    movapd  %xmm5, %xmm4
+; SSE-NEXT:    minpd %xmm1, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm1, %xmm1
+; SSE-NEXT:    andpd %xmm1, %xmm5
+; SSE-NEXT:    andnpd  %xmm4, %xmm1
+; SSE-NEXT:    orpd  %xmm5, %xmm1
+; SSE-NEXT:    movapd  %xmm6, %xmm4
+; SSE-NEXT:    minpd %xmm2, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm2, %xmm2
+; SSE-NEXT:    andpd %xmm2, %xmm6
+; SSE-NEXT:    andnpd  %xmm4, %xmm2
+; SSE-NEXT:    orpd  %xmm6, %xmm2
+; SSE-NEXT:    movapd  %xmm7, %xmm4
+; SSE-NEXT:    minpd %xmm3, %xmm4
+; SSE-NEXT:    cmpunordpd  %xmm3, %xmm3
+; SSE-NEXT:    andpd %xmm3, %xmm7
+; SSE-NEXT:    andnpd  %xmm4, %xmm3
+; SSE-NEXT:    orpd  %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX:         vminpd  %ymm0, %ymm2, %ymm4
+; AVX-NEXT:    vcmpunordpd %ymm0, %ymm0, %ymm0
+; AVX-NEXT:    vblendvpd %ymm0, %ymm2, %ymm4, %ymm0
+; AVX-NEXT:    vminpd  %ymm1, %ymm3, %ymm2
+; AVX-NEXT:    vcmpunordpd %ymm1, %ymm1, %ymm1
+; AVX-NEXT:    vblendvpd %ymm1, %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    retq
 define <8 x double> @test_intrinsic_fmin_v8f64(<8 x double> %x, <8 x double> %y) {
   %z = call <8 x double> @llvm.minnum.v8f64(<8 x double> %x, <8 x double> %y) readnone
   ret <8 x double> %z
diff --git a/test/CodeGen/X86/fmul-combines.ll b/test/CodeGen/X86/fmul-combines.ll
index 7d75611e1330..564ce42fdb75 100644
--- a/test/CodeGen/X86/fmul-combines.ll
+++ b/test/CodeGen/X86/fmul-combines.ll
@@ -56,10 +56,10 @@ define <4 x float> @fmul_c3_c4_v4f32(<4 x float> %x) #0 {
 }
 
 ; We should be able to pre-multiply the two constant vectors.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; CHECK: float 5
+; CHECK: float 12
+; CHECK: float 21
+; CHECK: float 32
 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat:
 ; CHECK: mulps
 ; CHECK-NOT: mulps
@@ -71,10 +71,10 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat(<4 x float> %x) #0 {
 }
 
 ; Same as above, but reverse operands to make sure non-canonical form is also handled.
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; CHECK: float 5
+; CHECK: float 12
+; CHECK: float 21
+; CHECK: float 32
 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_non_canonical:
 ; CHECK: mulps
 ; CHECK-NOT: mulps
@@ -86,15 +86,13 @@ define <4 x float> @fmul_v4f32_two_consts_no_splat_non_canonical(<4 x float> %x)
 }
 
 ; More than one use of a constant multiply should not inhibit the optimization.
-; Instead of a chain of 2 dependent mults, this test will have 2 independent mults. 
-; CHECK: float 5.000000e+00
-; CHECK: float 1.200000e+01
-; CHECK: float 2.100000e+01
-; CHECK: float 3.200000e+01
+; Instead of a chain of 2 dependent mults, this test will have 2 independent mults.
+; CHECK: float 6
+; CHECK: float 14
+; CHECK: float 24
+; CHECK: float 36
 ; CHECK-LABEL: fmul_v4f32_two_consts_no_splat_multiple_use:
 ; CHECK: mulps
-; CHECK: mulps
-; CHECK: addps
 ; CHECK: ret
 define <4 x float> @fmul_v4f32_two_consts_no_splat_multiple_use(<4 x float> %x) #0 {
   %y = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>
@@ -112,10 +110,10 @@ define <4 x float> @PR22698_splats(<4 x float> %a) #0 {
   %mul3 = fmul fast <4 x float> %a, %mul2
   ret <4 x float> %mul3
 
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
-; CHECK: float 2.400000e+01
+; CHECK: float 24
+; CHECK: float 24
+; CHECK: float 24
+; CHECK: float 24
 ; CHECK-LABEL: PR22698_splats:
 ; CHECK: mulps
 ; CHECK: ret
@@ -128,10 +126,10 @@ define <4 x float> @PR22698_no_splats(<4 x float> %a) #0 {
   %mul3 = fmul fast <4 x float> %a, %mul2
   ret <4 x float> %mul3
 
-; CHECK: float 4.500000e+01
-; CHECK: float 1.200000e+02
-; CHECK: float 2.310000e+02
-; CHECK: float 3.840000e+02
+; CHECK: float 45
+; CHECK: float 120
+; CHECK: float 231
+; CHECK: float 384
 ; CHECK-LABEL: PR22698_no_splats:
 ; CHECK: mulps
 ; CHECK: ret
diff --git a/test/CodeGen/X86/fold-load-binops.ll b/test/CodeGen/X86/fold-load-binops.ll
index 6d501c74fe57..43966f60718b 100644
--- a/test/CodeGen/X86/fold-load-binops.ll
+++ b/test/CodeGen/X86/fold-load-binops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
 
diff --git a/test/CodeGen/X86/fold-load-unops.ll b/test/CodeGen/X86/fold-load-unops.ll
index fcde0218158a..bedda3f297da 100644
--- a/test/CodeGen/X86/fold-load-unops.ll
+++ b/test/CodeGen/X86/fold-load-unops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
 
diff --git a/test/CodeGen/X86/fold-push.ll b/test/CodeGen/X86/fold-push.ll
new file mode 100644
index 000000000000..eaf91351021f
--- /dev/null
+++ b/test/CodeGen/X86/fold-push.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK -check-prefix=NORMAL
+; RUN: llc < %s -mtriple=i686-windows -mattr=call-reg-indirect | FileCheck %s -check-prefix=CHECK -check-prefix=SLM
+
+declare void @foo(i32 %r)
+
+define void @test(i32 %a, i32 %b) optsize nounwind {
+; CHECK-LABEL: test:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: addl $4, %esp
+; CHECK: nop
+; NORMAL: pushl (%esp)
+; SLM: movl (%esp), [[RELOAD:%e..]]
+; SLM-NEXT: pushl [[RELOAD]]
+; CHECK: calll
+; CHECK-NEXT: addl $4, %esp
+  %c = add i32 %a, %b
+  call void @foo(i32 %c)
+  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+  call void @foo(i32 %c)
+  ret void
+}
+
+define void @test_min(i32 %a, i32 %b) minsize nounwind {
+; CHECK-LABEL: test_min:
+; CHECK: movl [[EAX:%e..]], (%esp)
+; CHECK-NEXT: pushl [[EAX]]
+; CHECK-NEXT: calll
+; CHECK-NEXT: popl
+; CHECK: nop
+; CHECK: pushl (%esp)
+; CHECK: calll
+; CHECK-NEXT: popl
+  %c = add i32 %a, %b
+  call void @foo(i32 %c)
+  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+  call void @foo(i32 %c)
+  ret void
+}
diff --git a/test/CodeGen/X86/force-align-stack-alloca.ll b/test/CodeGen/X86/force-align-stack-alloca.ll
index a9ba20f45e84..d0cf34170081 100644
--- a/test/CodeGen/X86/force-align-stack-alloca.ll
+++ b/test/CodeGen/X86/force-align-stack-alloca.ll
@@ -3,7 +3,7 @@
 ; arbitrarily force alignment up to 32-bytes for i386 hoping that this will
 ; exceed any ABI provisions.
 ;
-; RUN: llc < %s -mcpu=generic -force-align-stack -stack-alignment=32 | FileCheck %s
+; RUN: llc < %s -mcpu=generic -stackrealign -stack-alignment=32 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386-unknown-linux-gnu"
diff --git a/test/CodeGen/X86/force-align-stack.ll b/test/CodeGen/X86/force-align-stack.ll
index ffcbf8a908c8..fa94ad4dcd86 100644
--- a/test/CodeGen/X86/force-align-stack.ll
+++ b/test/CodeGen/X86/force-align-stack.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; RUN: llc < %s -relocation-model=static -stackrealign | FileCheck %s
 ; Tests to make sure that we always align the stack out to the minimum needed - 
 ; in this case 16-bytes.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
diff --git a/test/CodeGen/X86/fp-fast.ll b/test/CodeGen/X86/fp-fast.ll
index 27af5738ca3e..fa31b9c9e128 100644
--- a/test/CodeGen/X86/fp-fast.ll
+++ b/test/CodeGen/X86/fp-fast.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s
 
 define float @test1(float %a) {
diff --git a/test/CodeGen/X86/fp-logic.ll b/test/CodeGen/X86/fp-logic.ll
new file mode 100644
index 000000000000..64c3f6b79a23
--- /dev/null
+++ b/test/CodeGen/X86/fp-logic.ll
@@ -0,0 +1,264 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s | FileCheck %s
+
+; PR22428: https://llvm.org/bugs/show_bug.cgi?id=22428
+; f1, f2, f3, and f4 should use an integer logic instruction.
+; f9 and f10 should use an FP (SSE) logic instruction.
+;
+; f5, f6, f7, and f8 are less clear.
+;
+; For f5 and f6, we can save a register move by using an FP logic instruction,
+; but we may need to calculate the relative costs of an SSE op vs. int op vs. 
+; scalar <-> SSE register moves.
+;
+; For f7 and f8, the SSE instructions don't take immediate operands, so if we
+; use one of those, we either have to load a constant from memory or move the
+; scalar immediate value from an integer register over to an SSE register.
+; Optimizing for size may affect that decision. Also, note that there are no
+; scalar versions of the FP logic ops, so if we want to fold a load into a
+; logic op, we have to load or splat a 16-byte vector constant.
+
+; 1 FP operand, 1 int operand, int result
+
+define i32 @f1(float %x, i32 %y) {
+; CHECK-LABEL: f1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %bc1, %y
+  ret i32 %and
+}
+
+; Swap operands of the logic op.
+
+define i32 @f2(float %x, i32 %y) {
+; CHECK-LABEL: f2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %y, %bc1
+  ret i32 %and
+}
+
+; 1 FP operand, 1 constant operand, int result
+
+define i32 @f3(float %x) {
+; CHECK-LABEL: f3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %bc1, 1
+  ret i32 %and
+}
+
+; Swap operands of the logic op.
+
+define i32 @f4(float %x) {
+; CHECK-LABEL: f4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl $2, %eax
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 2, %bc1
+  ret i32 %and
+}
+
+; 1 FP operand, 1 integer operand, FP result
+
+define float @f5(float %x, i32 %y) {
+; CHECK-LABEL: f5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %bc1, %y
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+; Swap operands of the logic op.
+
+define float @f6(float %x, i32 %y) {
+; CHECK-LABEL: f6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    andl %edi, %eax
+; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %y, %bc1
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+; 1 FP operand, 1 constant operand, FP result
+
+define float @f7(float %x) {
+; CHECK-LABEL: f7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %bc1, 3
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+; Swap operands of the logic op.
+
+define float @f8(float %x) {
+; CHECK-LABEL: f8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 4, %bc1
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+; 2 FP operands, int result
+
+define i32 @f9(float %x, float %y) {
+; CHECK-LABEL: f9:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %bc2 = bitcast float %y to i32
+  %and = and i32 %bc1, %bc2
+  ret i32 %and
+}
+
+; 2 FP operands, FP result
+
+define float @f10(float %x, float %y) {
+; CHECK-LABEL: f10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %bc2 = bitcast float %y to i32
+  %and = and i32 %bc1, %bc2
+  %bc3 = bitcast i32 %and to float
+  ret float %bc3
+}
+
+define float @or(float %x, float %y) {
+; CHECK-LABEL: or:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %bc2 = bitcast float %y to i32
+  %and = or i32 %bc1, %bc2
+  %bc3 = bitcast i32 %and to float
+  ret float %bc3
+}
+
+define float @xor(float %x, float %y) {
+; CHECK-LABEL: xor:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    xorps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %bc2 = bitcast float %y to i32
+  %and = xor i32 %bc1, %bc2
+  %bc3 = bitcast i32 %and to float
+  ret float %bc3
+}
+
+define float @f7_or(float %x) {
+; CHECK-LABEL: f7_or:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    orps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = or i32 %bc1, 3
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+define float @f7_xor(float %x) {
+; CHECK-LABEL: f7_xor:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    xorps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = xor i32 %bc1, 3
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
+; Make sure that doubles work too.
+
+define double @doubles(double %x, double %y) {
+; CHECK-LABEL: doubles:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andpd %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast double %x to i64
+  %bc2 = bitcast double %y to i64
+  %and = and i64 %bc1, %bc2
+  %bc3 = bitcast i64 %and to double
+  ret double %bc3
+}
+
+define double @f7_double(double %x) {
+; CHECK-LABEL: f7_double:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    andpd %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast double %x to i64
+  %and = and i64 %bc1, 3
+  %bc2 = bitcast i64 %and to double
+  ret double %bc2
+}
+
+; Grabbing the sign bit is a special case that could be handled
+; by movmskps/movmskpd, but if we're not shifting it over, then
+; a simple FP logic op is cheaper.
+
+define float @movmsk(float %x) {
+; CHECK-LABEL: movmsk:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+
+  %bc1 = bitcast float %x to i32
+  %and = and i32 %bc1, 2147483648
+  %bc2 = bitcast i32 %and to float
+  ret float %bc2
+}
+
diff --git a/test/CodeGen/X86/fp128-calling-conv.ll b/test/CodeGen/X86/fp128-calling-conv.ll
new file mode 100644
index 000000000000..e1dab30847c8
--- /dev/null
+++ b/test/CodeGen/X86/fp128-calling-conv.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L;  // x86_64-linux-android
+@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+; The first few parameters are passed in registers and the other are on stack.
+
+define fp128 @TestParam_FP128_0(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+  ret fp128 %d0
+; CHECK-LABEL: TestParam_FP128_0:
+; CHECK-NOT:   mov
+; CHECK:       retq
+}
+
+define fp128 @TestParam_FP128_1(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+  ret fp128 %d1
+; CHECK-LABEL: TestParam_FP128_1:
+; CHECK:       movaps  %xmm1, %xmm0
+; CHECK-NEXT:  retq
+}
+
+define fp128 @TestParam_FP128_7(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+  ret fp128 %d7
+; CHECK-LABEL: TestParam_FP128_7:
+; CHECK:       movaps  %xmm7, %xmm0
+; CHECK-NEXT:  retq
+}
+
+define fp128 @TestParam_FP128_8(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+  ret fp128 %d8
+; CHECK-LABEL: TestParam_FP128_8:
+; CHECK:       movaps 8(%rsp), %xmm0
+; CHECK-NEXT:  retq
+}
+
+define fp128 @TestParam_FP128_9(fp128 %d0, fp128 %d1, fp128 %d2, fp128 %d3, fp128 %d4, fp128 %d5, fp128 %d6, fp128 %d7, fp128 %d8, fp128 %d9, fp128 %d10, fp128 %d11, fp128 %d12, fp128 %d13, fp128 %d14, fp128 %d15, fp128 %d16, fp128 %d17, fp128 %d18, fp128 %d19) {
+entry:
+  ret fp128 %d9
+; CHECK-LABEL: TestParam_FP128_9:
+; CHECK:       movaps 24(%rsp), %xmm0
+; CHECK-NEXT:  retq
+}
diff --git a/test/CodeGen/X86/fp128-cast.ll b/test/CodeGen/X86/fp128-cast.ll
new file mode 100644
index 000000000000..73878e31d0ef
--- /dev/null
+++ b/test/CodeGen/X86/fp128-cast.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; Check soft floating point conversion function calls.
+
+@vi32 = common global i32 0, align 4
+@vi64 = common global i64 0, align 8
+@vu32 = common global i32 0, align 4
+@vu64 = common global i64 0, align 8
+@vf32 = common global float 0.000000e+00, align 4
+@vf64 = common global double 0.000000e+00, align 8
+@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
+
+define void @TestFPExtF32_F128() {
+entry:
+  %0 = load float, float* @vf32, align 4
+  %conv = fpext float %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestFPExtF32_F128:
+; CHECK:       movss      vf32(%rip), %xmm0
+; CHECK-NEXT:  callq      __extendsftf2
+; CHECK-NEXT:  movaps     %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @TestFPExtF64_F128() {
+entry:
+  %0 = load double, double* @vf64, align 8
+  %conv = fpext double %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestFPExtF64_F128:
+; CHECK:       movsd      vf64(%rip), %xmm0
+; CHECK-NEXT:  callq      __extenddftf2
+; CHECK-NEXT:  movapd     %xmm0, vf128(%rip)
+; CHECK:       ret
+}
+
+define void @TestFPToSIF128_I32() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptosi fp128 %0 to i32
+  store i32 %conv, i32* @vi32, align 4
+  ret void
+; CHECK-LABEL: TestFPToSIF128_I32:
+; CHECK:        movaps     vf128(%rip), %xmm0
+; CHECK-NEXT:   callq      __fixtfsi
+; CHECK-NEXT:   movl       %eax, vi32(%rip)
+; CHECK:        retq
+}
+
+define void @TestFPToUIF128_U32() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptoui fp128 %0 to i32
+  store i32 %conv, i32* @vu32, align 4
+  ret void
+; CHECK-LABEL: TestFPToUIF128_U32:
+; CHECK:        movaps     vf128(%rip), %xmm0
+; CHECK-NEXT:   callq      __fixunstfsi
+; CHECK-NEXT:   movl       %eax, vu32(%rip)
+; CHECK:        retq
+}
+
+define void @TestFPToSIF128_I64() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptosi fp128 %0 to i32
+  %conv1 = sext i32 %conv to i64
+  store i64 %conv1, i64* @vi64, align 8
+  ret void
+; CHECK-LABEL: TestFPToSIF128_I64:
+; CHECK:       movaps      vf128(%rip), %xmm0
+; CHECK-NEXT:  callq       __fixtfsi
+; CHECK-NEXT:  cltq
+; CHECK-NEXT:  movq        %rax, vi64(%rip)
+; CHECK:       retq
+}
+
+define void @TestFPToUIF128_U64() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptoui fp128 %0 to i32
+  %conv1 = zext i32 %conv to i64
+  store i64 %conv1, i64* @vu64, align 8
+  ret void
+; CHECK-LABEL: TestFPToUIF128_U64:
+; CHECK:       movaps      vf128(%rip), %xmm0
+; CHECK-NEXT:  callq       __fixunstfsi
+; CHECK-NEXT:  movl        %eax, %eax
+; CHECK-NEXT:  movq        %rax, vu64(%rip)
+; CHECK:       retq
+}
+
+define void @TestFPTruncF128_F32() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptrunc fp128 %0 to float
+  store float %conv, float* @vf32, align 4
+  ret void
+; CHECK-LABEL: TestFPTruncF128_F32:
+; CHECK:       movaps      vf128(%rip), %xmm0
+; CHECK-NEXT:  callq       __trunctfsf2
+; CHECK-NEXT:  movss       %xmm0, vf32(%rip)
+; CHECK:       retq
+}
+
+define void @TestFPTruncF128_F64() {
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %conv = fptrunc fp128 %0 to double
+  store double %conv, double* @vf64, align 8
+  ret void
+; CHECK-LABEL: TestFPTruncF128_F64:
+; CHECK:       movapd      vf128(%rip), %xmm0
+; CHECK-NEXT:  callq       __trunctfdf2
+; CHECK-NEXT:  movsd       %xmm0, vf64(%rip)
+; CHECK:       retq
+}
+
+define void @TestSIToFPI32_F128() {
+entry:
+  %0 = load i32, i32* @vi32, align 4
+  %conv = sitofp i32 %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestSIToFPI32_F128:
+; CHECK:       movl       vi32(%rip), %edi
+; CHECK-NEXT:  callq      __floatsitf
+; CHECK-NEXT:  movaps     %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @TestUIToFPU32_F128() #2 {
+entry:
+  %0 = load i32, i32* @vu32, align 4
+  %conv = uitofp i32 %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestUIToFPU32_F128:
+; CHECK:       movl       vu32(%rip), %edi
+; CHECK-NEXT:  callq      __floatunsitf
+; CHECK-NEXT:  movaps     %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @TestSIToFPI64_F128(){
+entry:
+  %0 = load i64, i64* @vi64, align 8
+  %conv = sitofp i64 %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestSIToFPI64_F128:
+; CHECK:       movq       vi64(%rip), %rdi
+; CHECK-NEXT:  callq      __floatditf
+; CHECK-NEXT:  movaps     %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @TestUIToFPU64_F128() #2 {
+entry:
+  %0 = load i64, i64* @vu64, align 8
+  %conv = uitofp i64 %0 to fp128
+  store fp128 %conv, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: TestUIToFPU64_F128:
+; CHECK:       movq       vu64(%rip), %rdi
+; CHECK-NEXT:  callq      __floatunditf
+; CHECK-NEXT:  movaps     %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define i32 @TestConst128(fp128 %v) {
+entry:
+  %cmp = fcmp ogt fp128 %v, 0xL00000000000000003FFF000000000000
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestConst128:
+; CHECK:       movaps {{.*}}, %xmm1
+; CHECK-NEXT:  callq __gttf2
+; CHECK-NEXT:  test
+; CHECK:       retq
+}
+
+; C code:
+;  struct TestBits_ieee_ext {
+;    unsigned v1;
+;    unsigned v2;
+; };
+; union TestBits_LDU {
+;   FP128 ld;
+;   struct TestBits_ieee_ext bits;
+; };
+; int TestBits128(FP128 ld) {
+;   union TestBits_LDU u;
+;   u.ld = ld * ld;
+;   return ((u.bits.v1 | u.bits.v2)  == 0);
+; }
+define i32 @TestBits128(fp128 %ld) {
+entry:
+  %mul = fmul fp128 %ld, %ld
+  %0 = bitcast fp128 %mul to i128
+  %shift = lshr i128 %0, 32
+  %or5 = or i128 %shift, %0
+  %or = trunc i128 %or5 to i32
+  %cmp = icmp eq i32 %or, 0
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestBits128:
+; CHECK:       movaps %xmm0, %xmm1
+; CHECK-NEXT:  callq __multf3
+; CHECK-NEXT:  movaps %xmm0, (%rsp)
+; CHECK-NEXT:  movq (%rsp),
+; CHECK-NEXT:  movq %
+; CHECK-NEXT:  shrq $32,
+; CHECK:       orl
+; CHECK-NEXT:  sete %al
+; CHECK-NEXT:  movzbl %al, %eax
+; CHECK:       retq
+;
+; If TestBits128 fails due to any llvm or clang change,
+; please make sure the original simplified C code will
+; be compiled into correct IL and assembly code, not
+; just this TestBits128 test case. Better yet, try to
+; test the whole libm and its test cases.
+}
+
+; C code: (compiled with -target x86_64-linux-android)
+; typedef long double __float128;
+; __float128 TestPair128(unsigned long a, unsigned long b) {
+;   unsigned __int128 n;
+;   unsigned __int128 v1 = ((unsigned __int128)a << 64);
+;   unsigned __int128 v2 = (unsigned __int128)b;
+;   n = (v1 | v2) + 3;
+;   return *(__float128*)&n;
+; }
+define fp128 @TestPair128(i64 %a, i64 %b) {
+entry:
+  %conv = zext i64 %a to i128
+  %shl = shl nuw i128 %conv, 64
+  %conv1 = zext i64 %b to i128
+  %or = or i128 %shl, %conv1
+  %add = add i128 %or, 3
+  %0 = bitcast i128 %add to fp128
+  ret fp128 %0
+; CHECK-LABEL: TestPair128:
+; CHECK:       addq $3, %rsi
+; CHECK:       movq %rsi, -24(%rsp)
+; CHECK:       movq %rdi, -16(%rsp)
+; CHECK:       movaps -24(%rsp), %xmm0
+; CHECK-NEXT:  retq
+}
+
+define fp128 @TestTruncCopysign(fp128 %x, i32 %n) {
+entry:
+  %cmp = icmp sgt i32 %n, 50000
+  br i1 %cmp, label %if.then, label %cleanup
+
+if.then:                                          ; preds = %entry
+  %conv = fptrunc fp128 %x to double
+  %call = tail call double @copysign(double 0x7FF0000000000000, double %conv) #2
+  %conv1 = fpext double %call to fp128
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %if.then
+  %retval.0 = phi fp128 [ %conv1, %if.then ], [ %x, %entry ]
+  ret fp128 %retval.0
+; CHECK-LABEL: TestTruncCopysign:
+; CHECK:       callq __trunctfdf2
+; CHECK-NEXT:  andpd {{.*}}, %xmm0
+; CHECK-NEXT:  orpd {{.*}}, %xmm0
+; CHECK-NEXT:  callq __extenddftf2
+; CHECK:       retq
+}
+
+declare double @copysign(double, double) #1
+
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/fp128-compare.ll b/test/CodeGen/X86/fp128-compare.ll
new file mode 100644
index 000000000000..b5d4fbe1b74e
--- /dev/null
+++ b/test/CodeGen/X86/fp128-compare.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+define i32 @TestComp128GT(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp ogt fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128GT:
+; CHECK:       callq __gttf2
+; CHECK:       setg  %al
+; CHECK:       movzbl %al, %eax
+; CHECK:       retq
+}
+
+define i32 @TestComp128GE(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp oge fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128GE:
+; CHECK:       callq __getf2
+; CHECK:       testl %eax, %eax
+; CHECK:       setns %al
+; CHECK:       movzbl %al, %eax
+; CHECK:       retq
+}
+
+define i32 @TestComp128LT(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp olt fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128LT:
+; CHECK:       callq __lttf2
+; CHECK-NEXT:  shrl $31, %eax
+; CHECK:       retq
+;
+; The 'shrl' is a special optimization in llvm to combine
+; the effect of 'fcmp olt' and 'zext'. The main purpose is
+; to test soften call to __lttf2.
+}
+
+define i32 @TestComp128LE(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp ole fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128LE:
+; CHECK:       callq __letf2
+; CHECK-NEXT:  testl %eax, %eax
+; CHECK:       setle %al
+; CHECK:       movzbl %al, %eax
+; CHECK:       retq
+}
+
+define i32 @TestComp128EQ(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp oeq fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128EQ:
+; CHECK:       callq __eqtf2
+; CHECK-NEXT:  testl %eax, %eax
+; CHECK:       sete %al
+; CHECK:       movzbl %al, %eax
+; CHECK:       retq
+}
+
+define i32 @TestComp128NE(fp128 %d1, fp128 %d2) {
+entry:
+  %cmp = fcmp une fp128 %d1, %d2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK-LABEL: TestComp128NE:
+; CHECK:       callq __netf2
+; CHECK-NEXT:  testl %eax, %eax
+; CHECK:       setne %al
+; CHECK:       movzbl %al, %eax
+; CHECK:       retq
+}
+
+define fp128 @TestMax(fp128 %x, fp128 %y) {
+entry:
+  %cmp = fcmp ogt fp128 %x, %y
+  %cond = select i1 %cmp, fp128 %x, fp128 %y
+  ret fp128 %cond
+; CHECK-LABEL: TestMax:
+; CHECK: movaps %xmm1
+; CHECK: movaps %xmm0
+; CHECK: callq __gttf2
+; CHECK: movaps {{.*}}, %xmm0
+; CHECK: testl %eax, %eax
+; CHECK: movaps {{.*}}, %xmm0
+; CHECK: retq
+}
diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll
new file mode 100644
index 000000000000..77160674ab20
--- /dev/null
+++ b/test/CodeGen/X86/fp128-i128.ll
@@ -0,0 +1,320 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; These tests were generated from simplified libm C code.
+; When compiled for the x86_64-linux-android target,
+; long double is mapped to f128 type that should be passed
+; in SSE registers. When the f128 type calling convention
+; problem was fixed, old llvm code failed to handle f128 values
+; in several f128/i128 type operations. These unit tests hopefully
+; will catch regression in any future change in this area.
+; To modified or enhance these test cases, please consult libm
+; code pattern and compile with -target x86_64-linux-android
+; to generate IL. The __float128 keyword if not accepted by
+; clang, just define it to "long double".
+;
+
+; typedef long double __float128;
+; union IEEEl2bits {
+;   __float128 e;
+;   struct {
+;     unsigned long manl :64;
+;     unsigned long manh :48;
+;     unsigned int exp :15;
+;     unsigned int sign :1;
+;   } bits;
+;   struct {
+;     unsigned long manl :64;
+;     unsigned long manh :48;
+;     unsigned int expsign :16;
+;   } xbits;
+; };
+
+; C code:
+; void foo(__float128 x);
+; void TestUnionLD1(__float128 s, unsigned long n) {
+;      union IEEEl2bits u;
+;      __float128 w;
+;      u.e = s;
+;      u.bits.manh = n;
+;      w = u.e;
+;      foo(w);
+; }
+define void @TestUnionLD1(fp128 %s, i64 %n) #0 {
+entry:
+  %0 = bitcast fp128 %s to i128
+  %1 = zext i64 %n to i128
+  %bf.value = shl nuw i128 %1, 64
+  %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480
+  %bf.clear = and i128 %0, -5192296858534809181786422619668481
+  %bf.set = or i128 %bf.shl, %bf.clear
+  %2 = bitcast i128 %bf.set to fp128
+  tail call void @foo(fp128 %2) #2
+  ret void
+; CHECK-LABEL: TestUnionLD1:
+; CHECK:       movaps %xmm0, -24(%rsp)
+; CHECK-NEXT:  movq -24(%rsp), %rax
+; CHECK-NEXT:  movabsq $281474976710655, %rcx
+; CHECK-NEXT:  andq %rdi, %rcx
+; CHECK-NEXT:  movabsq $-281474976710656, %rdx
+; CHECK-NEXT:  andq -16(%rsp), %rdx
+; CHECK-NEXT:  movq %rax, -40(%rsp)
+; CHECK-NEXT:  orq %rcx, %rdx
+; CHECK-NEXT:  movq %rdx, -32(%rsp)
+; CHECK-NEXT:  movaps -40(%rsp), %xmm0
+; CHECK-NEXT:  jmp foo
+}
+
+; C code:
+; __float128 TestUnionLD2(__float128 s) {
+;      union IEEEl2bits u;
+;      __float128 w;
+;      u.e = s;
+;      u.bits.manl = 0;
+;      w = u.e;
+;      return w;
+; }
+define fp128 @TestUnionLD2(fp128 %s) #0 {
+entry:
+  %0 = bitcast fp128 %s to i128
+  %bf.clear = and i128 %0, -18446744073709551616
+  %1 = bitcast i128 %bf.clear to fp128
+  ret fp128 %1
+; CHECK-LABEL: TestUnionLD2:
+; CHECK:       movaps %xmm0, -24(%rsp)
+; CHECK-NEXT:  movq -16(%rsp), %rax
+; CHECK-NEXT:  movq %rax, -32(%rsp)
+; CHECK-NEXT:  movq $0, -40(%rsp)
+; CHECK-NEXT:  movaps -40(%rsp), %xmm0
+; CHECK-NEXT:  retq
+}
+
+; C code:
+; __float128 TestI128_1(__float128 x)
+; {
+;  union IEEEl2bits z;
+;  z.e = x;
+;  z.bits.sign = 0;
+;  return (z.e < 0.1L) ? 1.0L : 2.0L;
+; }
+define fp128 @TestI128_1(fp128 %x) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %bf.clear = and i128 %0, 170141183460469231731687303715884105727
+  %1 = bitcast i128 %bf.clear to fp128
+  %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999
+  %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000
+  ret fp128 %cond
+; CHECK-LABEL: TestI128_1:
+; CHECK:       movaps %xmm0,
+; CHECK:       movabsq $9223372036854775807,
+; CHECK:       callq __lttf2
+; CHECK:       testl %eax, %eax
+; CHECK:       movaps {{.*}}, %xmm0
+; CHECK:       retq
+}
+
+; C code:
+; __float128 TestI128_2(__float128 x, __float128 y)
+; {
+;  unsigned short hx;
+;  union IEEEl2bits ge_u;
+;  ge_u.e = x;
+;  hx = ge_u.xbits.expsign;
+;  return (hx & 0x8000) == 0 ? x : y;
+; }
+define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %cmp = icmp sgt i128 %0, -1
+  %cond = select i1 %cmp, fp128 %x, fp128 %y
+  ret fp128 %cond
+; CHECK-LABEL: TestI128_2:
+; CHECK:       movaps %xmm0, -24(%rsp)
+; CHECK-NEXT:  cmpq $0, -16(%rsp)
+; CHECK-NEXT:  jns
+; CHECK:       movaps %xmm1, %xmm0
+; CHECK:       retq
+}
+
+; C code:
+; __float128 TestI128_3(__float128 x, int *ex)
+; {
+;  union IEEEl2bits u;
+;  u.e = x;
+;  if (u.bits.exp == 0) {
+;    u.e *= 0x1.0p514;
+;    u.bits.exp = 0x3ffe;
+;  }
+;  return (u.e);
+; }
+define fp128 @TestI128_3(fp128 %x, i32* nocapture readnone %ex) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %bf.cast = and i128 %0, 170135991163610696904058773219554885632
+  %cmp = icmp eq i128 %bf.cast, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %mul = fmul fp128 %x, 0xL00000000000000004201000000000000
+  %1 = bitcast fp128 %mul to i128
+  %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633
+  %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ]
+  %2 = bitcast i128 %u.sroa.0.0 to fp128
+  ret fp128 %2
+; CHECK-LABEL: TestI128_3:
+; CHECK:       movaps %xmm0,
+; CHECK:       movabsq $9223090561878065152,
+; CHECK:       testq
+; CHECK:       callq __multf3
+; CHECK-NEXT:  movaps %xmm0
+; CHECK:       movabsq $-9223090561878065153,
+; CHECK:       movabsq $4611123068473966592,
+; CHECK:       retq
+}
+
+; C code:
+; __float128 TestI128_4(__float128 x)
+; {
+;  union IEEEl2bits u;
+;  __float128 df;
+;  u.e = x;
+;  u.xbits.manl = 0;
+;  df = u.e;
+;  return x + df;
+; }
+define fp128 @TestI128_4(fp128 %x) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %bf.clear = and i128 %0, -18446744073709551616
+  %1 = bitcast i128 %bf.clear to fp128
+  %add = fadd fp128 %1, %x
+  ret fp128 %add
+; CHECK-LABEL: TestI128_4:
+; CHECK:       movaps %xmm0, %xmm1
+; CHECK-NEXT:  movaps %xmm1, 16(%rsp)
+; CHECK-NEXT:  movq 24(%rsp), %rax
+; CHECK-NEXT:  movq %rax, 8(%rsp)
+; CHECK-NEXT:  movq $0, (%rsp)
+; CHECK-NEXT:  movaps (%rsp), %xmm0
+; CHECK-NEXT:  callq __addtf3
+; CHECK:       retq
+}
+
+@v128 = common global i128 0, align 16
+@v128_2 = common global i128 0, align 16
+
+; C code:
+; unsigned __int128 v128, v128_2;
+; void TestShift128_2() {
+;   v128 = ((v128 << 96) | v128_2);
+; }
+define void @TestShift128_2() #2 {
+entry:
+  %0 = load i128, i128* @v128, align 16
+  %shl = shl i128 %0, 96
+  %1 = load i128, i128* @v128_2, align 16
+  %or = or i128 %shl, %1
+  store i128 %or, i128* @v128, align 16
+  ret void
+; CHECK-LABEL: TestShift128_2:
+; CHECK:       movq v128(%rip), %rax
+; CHECK-NEXT:  shlq $32, %rax
+; CHECK-NEXT:  movq v128_2(%rip), %rcx
+; CHECK-NEXT:  orq v128_2+8(%rip), %rax
+; CHECK-NEXT:  movq %rcx, v128(%rip)
+; CHECK-NEXT:  movq %rax, v128+8(%rip)
+; CHECK-NEXT:  retq
+}
+
+define fp128 @acosl(fp128 %x) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %bf.clear = and i128 %0, -18446744073709551616
+  %1 = bitcast i128 %bf.clear to fp128
+  %add = fadd fp128 %1, %x
+  ret fp128 %add
+; CHECK-LABEL: acosl:
+; CHECK:       movaps %xmm0, %xmm1
+; CHECK-NEXT:  movaps %xmm1, 16(%rsp)
+; CHECK-NEXT:  movq 24(%rsp), %rax
+; CHECK-NEXT:  movq %rax, 8(%rsp)
+; CHECK-NEXT:  movq $0, (%rsp)
+; CHECK-NEXT:  movaps (%rsp), %xmm0
+; CHECK-NEXT:  callq __addtf3
+; CHECK:       retq
+}
+
+; Compare i128 values and check i128 constants.
+define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
+entry:
+  %0 = bitcast fp128 %x to i128
+  %cmp = icmp sgt i128 %0, -1
+  %cond = select i1 %cmp, fp128 %x, fp128 %y
+  ret fp128 %cond
+; CHECK-LABEL: TestComp:
+; CHECK:       movaps %xmm0, -24(%rsp)
+; CHECK-NEXT:  cmpq $0, -16(%rsp)
+; CHECK-NEXT:  jns
+; CHECK:       movaps %xmm1, %xmm0
+; CHECK:       retq
+}
+
+declare void @foo(fp128) #1
+
+; Test logical operations on fp128 values.
+define fp128 @TestFABS_LD(fp128 %x) #0 {
+entry:
+  %call = tail call fp128 @fabsl(fp128 %x) #2
+  ret fp128 %call
+; CHECK-LABEL: TestFABS_LD
+; CHECK:       andps {{.*}}, %xmm0
+; CHECK-NEXT:  retq
+}
+
+declare fp128 @fabsl(fp128) #1
+
+declare fp128 @copysignl(fp128, fp128) #1
+
+; Test more complicated logical operations generated from copysignl.
+define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, { fp128, fp128 }* byval nocapture readonly align 16 %z) #0 {
+entry:
+  %z.realp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 0
+  %z.real = load fp128, fp128* %z.realp, align 16
+  %z.imagp = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %z, i64 0, i32 1
+  %z.imag4 = load fp128, fp128* %z.imagp, align 16
+  %cmp = fcmp ogt fp128 %z.real, %z.imag4
+  %sub = fsub fp128 %z.imag4, %z.imag4
+  br i1 %cmp, label %if.then, label %cleanup
+
+if.then:                                          ; preds = %entry
+  %call = tail call fp128 @fabsl(fp128 %sub) #2
+  br label %cleanup
+
+cleanup:                                          ; preds = %entry, %if.then
+  %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ]
+  %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ]
+  %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2
+  %0 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 0
+  %1 = getelementptr inbounds { fp128, fp128 }, { fp128, fp128 }* %agg.result, i64 0, i32 1
+  store fp128 %call.sink, fp128* %0, align 16
+  store fp128 %call5, fp128* %1, align 16
+  ret void
+; CHECK-LABEL: TestCopySign
+; CHECK-NOT:   call
+; CHECK:       callq __subtf3
+; CHECK-NOT:   call
+; CHECK:       callq __gttf2
+; CHECK-NOT:   call
+; CHECK:       andps {{.*}}, %xmm0
+; CHECK:       retq
+}
+
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/fp128-libcalls.ll b/test/CodeGen/X86/fp128-libcalls.ll
new file mode 100644
index 000000000000..ee5fa447448c
--- /dev/null
+++ b/test/CodeGen/X86/fp128-libcalls.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; Check all soft floating point library function calls.
+
+@vf64 = common global double 0.000000e+00, align 8
+@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16
+
+define void @Test128Add(fp128 %d1, fp128 %d2) {
+entry:
+  %add = fadd fp128 %d1, %d2
+  store fp128 %add, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128Add:
+; CHECK:       callq __addtf3
+; CHECK-NEXT:  movaps %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128_1Add(fp128 %d1){
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %add = fadd fp128 %0, %d1
+  store fp128 %add, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128_1Add:
+; CHECK:       movaps  %xmm0, %xmm1
+; CHECK-NEXT:  movaps  vf128(%rip), %xmm0
+; CHECK-NEXT:  callq   __addtf3
+; CHECK-NEXT:  movaps  %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128Sub(fp128 %d1, fp128 %d2){
+entry:
+  %sub = fsub fp128 %d1, %d2
+  store fp128 %sub, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128Sub:
+; CHECK:       callq __subtf3
+; CHECK-NEXT:  movaps %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128_1Sub(fp128 %d1){
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %sub = fsub fp128 %0, %d1
+  store fp128 %sub, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128_1Sub:
+; CHECK:       movaps  %xmm0, %xmm1
+; CHECK-NEXT:  movaps  vf128(%rip), %xmm0
+; CHECK-NEXT:  callq   __subtf3
+; CHECK-NEXT:  movaps  %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128Mul(fp128 %d1, fp128 %d2){
+entry:
+  %mul = fmul fp128 %d1, %d2
+  store fp128 %mul, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128Mul:
+; CHECK:       callq __multf3
+; CHECK-NEXT:  movaps %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128_1Mul(fp128 %d1){
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %mul = fmul fp128 %0, %d1
+  store fp128 %mul, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128_1Mul:
+; CHECK:       movaps  %xmm0, %xmm1
+; CHECK-NEXT:  movaps  vf128(%rip), %xmm0
+; CHECK-NEXT:  callq   __multf3
+; CHECK-NEXT:  movaps  %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128Div(fp128 %d1, fp128 %d2){
+entry:
+  %div = fdiv fp128 %d1, %d2
+  store fp128 %div, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128Div:
+; CHECK:       callq __divtf3
+; CHECK-NEXT:  movaps %xmm0, vf128(%rip)
+; CHECK:       retq
+}
+
+define void @Test128_1Div(fp128 %d1){
+entry:
+  %0 = load fp128, fp128* @vf128, align 16
+  %div = fdiv fp128 %0, %d1
+  store fp128 %div, fp128* @vf128, align 16
+  ret void
+; CHECK-LABEL: Test128_1Div:
+; CHECK:       movaps  %xmm0, %xmm1
+; CHECK-NEXT:  movaps  vf128(%rip), %xmm0
+; CHECK-NEXT:  callq   __divtf3
+; CHECK-NEXT:  movaps  %xmm0, vf128(%rip)
+; CHECK:       retq
+}
diff --git a/test/CodeGen/X86/fp128-load.ll b/test/CodeGen/X86/fp128-load.ll
new file mode 100644
index 000000000000..73bacf87275e
--- /dev/null
+++ b/test/CodeGen/X86/fp128-load.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L;  // x86_64-linux-android
+@my_fp128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+define fp128 @get_fp128() {
+entry:
+  %0 = load fp128, fp128* @my_fp128, align 16
+  ret fp128 %0
+; CHECK-LABEL: get_fp128:
+; CHECK:       movaps my_fp128(%rip), %xmm0
+; CHECK-NEXT:  retq
+}
+
+@TestLoadExtend.data = internal unnamed_addr constant [2 x float] [float 0x3FB99999A0000000, float 0x3FC99999A0000000], align 4
+
+define fp128 @TestLoadExtend(fp128 %x, i32 %n) {
+entry:
+  %idxprom = sext i32 %n to i64
+  %arrayidx = getelementptr inbounds [2 x float], [2 x float]* @TestLoadExtend.data, i64 0, i64 %idxprom
+  %0 = load float, float* %arrayidx, align 4
+  %conv = fpext float %0 to fp128
+  ret fp128 %conv
+; CHECK-LABEL: TestLoadExtend:
+; CHECK:       movslq  %edi, %rax
+; CHECK-NEXT:  movss   TestLoadExtend.data(,%rax,4), %xmm0
+; CHECK-NEXT:  callq   __extendsftf2
+; CHECK:       retq
+}
+
+; CHECK-LABEL:  my_fp128:
+; CHECK-NEXT:  .quad   0
+; CHECK-NEXT:  .quad   4611404543450677248
+; CHECK-NEXT:  .size   my_fp128, 16
diff --git a/test/CodeGen/X86/fp128-store.ll b/test/CodeGen/X86/fp128-store.ll
new file mode 100644
index 000000000000..ca3af637cff5
--- /dev/null
+++ b/test/CodeGen/X86/fp128-store.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-android -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-linux-gnu -mattr=+mmx | FileCheck %s
+
+; __float128 myFP128 = 1.0L;  // x86_64-linux-android
+@myFP128 = global fp128 0xL00000000000000003FFF000000000000, align 16
+
+define void @set_FP128(fp128 %x) {
+entry:
+  store fp128 %x, fp128* @myFP128, align 16
+  ret void
+; CHECK-LABEL: set_FP128:
+; CHECK:       movaps  %xmm0, myFP128(%rip)
+; CHECK-NEXT:  retq
+}
diff --git a/test/CodeGen/X86/fpcmp-soft-fp.ll b/test/CodeGen/X86/fpcmp-soft-fp.ll
new file mode 100644
index 000000000000..58d57017d18a
--- /dev/null
+++ b/test/CodeGen/X86/fpcmp-soft-fp.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium -mtriple=x86-linux-gnu -float-abi=soft | FileCheck %s 
+
+define i1 @test1(double %d) #0 {
+entry:
+  %cmp = fcmp ule double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test1:
+; CHECK: calll __gtdf2
+; CHECK: setle
+; CHECK: retl
+ 
+define i1 @test2(double %d) #0 {
+entry:
+  %cmp = fcmp ult double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test2:
+; CHECK: calll __gedf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test3(double %d) #0 {
+entry:
+  %cmp = fcmp ugt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test3:
+; CHECK: calll __ledf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test4(double %d) #0 {
+entry:
+  %cmp = fcmp uge double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test4:
+; CHECK: calll __ltdf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test5(double %d) #0 {
+entry:
+  %cmp = fcmp ole double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test5:  
+; CHECK: calll __ledf2
+; CHECK: setle
+; CHECK: retl
+
+define i1 @test6(double %d) #0 {
+entry:
+  %cmp = fcmp olt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test6:
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test7(double %d) #0 {
+entry:
+  %cmp = fcmp ogt double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test7:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: retl
+
+define i1 @test8(double %d) #0 {
+entry:
+  %cmp = fcmp oge double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test8:
+; CHECK: calll __gedf2
+; CHECK: setns
+; CHECK: retl
+
+define i1 @test9(double %d) #0 {
+entry:
+  %cmp = fcmp oeq double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test9:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: retl
+
+define i1 @test10(double %d) #0 {
+entry:
+  %cmp = fcmp ueq double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test10:
+; CHECK: calll __eqdf2
+; CHECK: sete
+; CHECK: calll __unorddf2
+; CHECK: setne
+; CHECK: retl
+
+define i1 @test11(double %d) #0 {
+entry:
+  %cmp = fcmp one double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test11:
+; CHECK: calll __gtdf2
+; CHECK: setg
+; CHECK: calll __ltdf2
+; CHECK: sets
+; CHECK: retl
+
+define i1 @test12(double %d) #0 {
+entry:
+  %cmp = fcmp une double %d, 0.000000e+00
+  ret i1 %cmp
+}
+; CHECK-LABEL: test12:
+; CHECK: calll __nedf2
+; CHECK: setne
+; CHECK: retl
+
+attributes #0 = { "use-soft-float"="true" }
diff --git a/test/CodeGen/X86/fpstack-debuginstr-kill.ll b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
index 34398414a76c..2ee67dc190bd 100644
--- a/test/CodeGen/X86/fpstack-debuginstr-kill.ll
+++ b/test/CodeGen/X86/fpstack-debuginstr-kill.ll
@@ -3,7 +3,7 @@
 @g1 = global double 0.000000e+00, align 8
 @g2 = global i32 0, align 4
 
-define void @_Z16fpuop_arithmeticjj(i32, i32) {
+define void @_Z16fpuop_arithmeticjj(i32, i32) !dbg !4 {
 entry:
   switch i32 undef, label %sw.bb.i1921 [
   ]
@@ -43,27 +43,27 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!24, !25}
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !21, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (http://llvm.org/git/clang 8444ae7cfeaefae031f8fedf0d1435ca3b14d90b) (http://llvm.org/git/llvm 886f0101a7d176543b831f5efb74c03427244a55)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !21, imports: !2)
 !1 = !DIFile(filename: "fpu_ieee.cpp", directory: "x87stackifier")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "fpuop_arithmetic", linkageName: "_Z16fpuop_arithmeticjj", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !6, type: !7, function: void (i32, i32)* @_Z16fpuop_arithmeticjj, variables: !10)
+!4 = distinct !DISubprogram(name: "fpuop_arithmetic", linkageName: "_Z16fpuop_arithmeticjj", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !6, type: !7, variables: !10)
 !5 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
 !6 = !DIFile(filename: "f1.cpp", directory: "x87stackifier")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !10 = !{!11, !12, !13, !18, !20}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 1, scope: !4, file: !6, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 11, arg: 2, scope: !4, file: !6, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 14, scope: !4, file: !6, type: !14)
+!11 = !DILocalVariable(name: "", line: 11, arg: 1, scope: !4, file: !6, type: !9)
+!12 = !DILocalVariable(name: "", line: 11, arg: 2, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "x", line: 14, scope: !4, file: !6, type: !14)
 !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_extended", line: 3, file: !5, baseType: !15)
 !15 = !DIDerivedType(tag: DW_TAG_typedef, name: "fpu_register", line: 2, file: !5, baseType: !16)
 !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "uae_f64", line: 1, file: !5, baseType: !17)
 !17 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 15, scope: !4, file: !6, type: !19)
+!18 = !DILocalVariable(name: "a", line: 15, scope: !4, file: !6, type: !19)
 !19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "value", line: 16, scope: !4, file: !6, type: !14)
+!20 = !DILocalVariable(name: "value", line: 16, scope: !4, file: !6, type: !14)
 !21 = !{!22, !23}
 !22 = !DIGlobalVariable(name: "g1", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !14, variable: double* @g1)
 !23 = !DIGlobalVariable(name: "g2", line: 6, isLocal: false, isDefinition: true, scope: null, file: !6, type: !19, variable: i32* @g2)
diff --git a/test/CodeGen/X86/frem-msvc32.ll b/test/CodeGen/X86/frem-msvc32.ll
new file mode 100644
index 000000000000..01144eb44de4
--- /dev/null
+++ b/test/CodeGen/X86/frem-msvc32.ll
@@ -0,0 +1,12 @@
+; Make sure that 32-bit FREM is promoted to 64-bit FREM on 32-bit MSVC.
+
+; MSVC does not have a 32-bit fmodf function, so it must be promoted to
+; a 64-bit fmod rtlib call.
+; RUN: llc -mtriple=i686-pc-windows-msvc -O0 < %s  | FileCheck %s
+
+; CHECK: @do_frem32
+; CHECK: {{_fmod$}}
+define float @do_frem32(float %a, float %b) {
+    %val = frem float %a, %b
+    ret float %val
+}
diff --git a/test/CodeGen/X86/funclet-layout.ll b/test/CodeGen/X86/funclet-layout.ll
new file mode 100644
index 000000000000..0942645cf5a4
--- /dev/null
+++ b/test/CodeGen/X86/funclet-layout.ll
@@ -0,0 +1,158 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = internal global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }
+
+define void @test1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @g()
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  br label %catch.loop
+
+catch.loop:
+  br i1 %B, label %catchret, label %catch.loop
+
+catchret:
+  catchret from %cp to label %try.cont
+
+try.cont:
+  ret void
+
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: test1:
+
+; The entry funclet contains %entry and %try.cont
+; CHECK: # %entry
+; CHECK: # %try.cont
+; CHECK: retq
+
+; The catch funclet contains %catch and %catchret
+; CHECK: # %catch{{$}}
+; CHECK: # %catchret
+; CHECK: retq
+
+declare void @g()
+
+
+define i32 @test2(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1 ["funclet"(token %0)]
+          to label %unreachable unwind label %catch.dispatch.1
+
+catch.dispatch.1:                                 ; preds = %catch
+  %cs2 = catchswitch within %0 [label %catch.3] unwind to caller
+
+catch.3:                                          ; preds = %catch.dispatch.1
+  %1 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+  catchret from %1 to label %try.cont
+
+try.cont:                                         ; preds = %catch.3
+  catchret from %0 to label %try.cont.5
+
+try.cont.5:                                       ; preds = %try.cont
+  ret i32 0
+
+unreachable:                                      ; preds = %catch, %entry
+  unreachable
+}
+
+; CHECK-LABEL: test2:
+
+; The parent function contains %entry and %try.cont.5
+; CHECK: .seh_proc
+; CHECK: # %entry
+; CHECK: # %try.cont.5
+; CHECK: retq
+
+; The inner catch funclet contains %catch.3
+; CHECK: .seh_proc
+; CHECK: # %catch.3{{$}}
+; CHECK: retq
+
+; The outer catch funclet contains %catch
+; CHECK: .seh_proc
+; CHECK: # %catch{{$}}
+; CHECK: callq _CxxThrowException
+; CHECK: # %unreachable
+; CHECK: ud2
+
+
+define void @test3(i1 %V) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch.2] unwind label %catch.dispatch.1
+
+catch.2:                                          ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  tail call void @exit(i32 0) #2 [ "funclet"(token %0) ]
+  unreachable
+
+catch.dispatch.1:                                 ; preds = %catch.dispatch
+  %cs2 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch.1
+  %1 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+  tail call void @exit(i32 0) #2 [ "funclet"(token %1) ]
+  unreachable
+
+try.cont:                                         ; preds = %entry
+  br i1 %V, label %exit_one, label %exit_two
+
+exit_one:
+  tail call void @exit(i32 0)
+  unreachable
+
+exit_two:
+  tail call void @exit(i32 0)
+  unreachable
+}
+
+; CHECK-LABEL: test3:
+
+; The entry funclet contains %entry and %try.cont
+; CHECK: # %entry
+; CHECK: # %try.cont
+; CHECK: callq exit
+; CHECK-NOT: # exit_one
+; CHECK-NOT: # exit_two
+; CHECK: ud2
+
+; The catch(...) funclet contains %catch.2
+; CHECK: # %catch.2{{$}}
+; CHECK: callq exit
+; CHECK: ud2
+
+; The catch(int) funclet contains %catch
+; CHECK: # %catch{{$}}
+; CHECK: callq exit
+; CHECK: ud2
+
+declare void @exit(i32) noreturn nounwind
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/CodeGen/X86/function-alias.ll b/test/CodeGen/X86/function-alias.ll
new file mode 100644
index 000000000000..d68d75d5578a
--- /dev/null
+++ b/test/CodeGen/X86/function-alias.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; "data" constant
+@0 = private constant <{ i8, i8 }> <{i8 15, i8 11}>, section ".text"
+
+; function-typed alias
+@ud2 = alias void (), bitcast (<{ i8, i8 }>* @0 to void ()*)
+
+; Check that "ud2" is emitted as a function symbol.
+; CHECK: .type{{.*}}ud2,@function
diff --git a/test/CodeGen/X86/gcc_except_table.ll b/test/CodeGen/X86/gcc_except_table.ll
index 82064c2a3907..92ea539bcf77 100644
--- a/test/CodeGen/X86/gcc_except_table.ll
+++ b/test/CodeGen/X86/gcc_except_table.ll
@@ -18,9 +18,9 @@ define i32 @main() uwtable optsize ssp personality i8* bitcast (i32 (...)* @__gx
 ; MINGW64: .seh_setframe 5, 32
 ; MINGW64: callq _Unwind_Resume
 ; MINGW64: .seh_handlerdata
+; MINGW64: .seh_endproc
 ; MINGW64: GCC_except_table0:
 ; MINGW64: Lexception0:
-; MINGW64: .seh_endproc
 
 ; MINGW32: .cfi_startproc
 ; MINGW32: .cfi_personality 0, ___gxx_personality_v0
diff --git a/test/CodeGen/X86/global-sections.ll b/test/CodeGen/X86/global-sections.ll
index 82547a606742..92440f2b3316 100644
--- a/test/CodeGen/X86/global-sections.ll
+++ b/test/CodeGen/X86/global-sections.ll
@@ -117,7 +117,7 @@ bb7:
 
 ; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
 
-; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: .section __TEXT,__const{{$}}
 ; DARWIN: _G2:
 ; DARWIN:    .long 42
 
@@ -176,7 +176,6 @@ bb7:
 ; LINUX: .weak  "foo bar"
 ; LINUX: "foo bar":
 
-; DARWIN: .section              __DATA,__datacoal_nt,coalesced
 ; DARWIN: .globl        "_foo bar"
 ; DARWIN:       .weak_definition "_foo bar"
 ; DARWIN: "_foo bar":
@@ -190,7 +189,7 @@ bb7:
 ; LINUX:   .byte        1
 ; LINUX:   .size        G6, 1
 
-; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .section __TEXT,__const{{$}}
 ; DARWIN:  .globl _G6
 ; DARWIN:  .weak_definition _G6
 ; DARWIN:_G6:
@@ -239,7 +238,7 @@ bb7:
 @G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
 
 
-; DARWIN:       .section        __DATA,__datacoal_nt,coalesced
+; DARWIN:       .section        __DATA,__data{{$}}
 ; DARWIN: .globl _G10
 ; DARWIN:       .weak_definition _G10
 ; DARWIN:       .align  5
diff --git a/test/CodeGen/X86/h-register-store.ll b/test/CodeGen/X86/h-register-store.ll
index 0adb2b148c39..0e6a0236d2c3 100644
--- a/test/CodeGen/X86/h-register-store.ll
+++ b/test/CodeGen/X86/h-register-store.ll
@@ -7,6 +7,15 @@
 ; X64-NEXT: movb %ah, (%rsi)
 ; X64-NOT:      mov
 
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X32
+; X32:      mov
+; X32-NEXT: movb %ah, (%esi)
+; X32:      mov
+; X32-NEXT: movb %ah, (%esi)
+; X32:      mov
+; X32-NEXT: movb %ah, (%esi)
+; X32-NOT:      mov
+
 ; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
 ; W64-NOT:      mov
 ; W64:      movb %ch, (%rdx)
@@ -16,14 +25,14 @@
 ; W64:      movb %ch, (%rdx)
 ; W64-NOT:      mov
 
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; X32-NOT:      mov
-; X32:      movb %ah, (%e
-; X32-NOT:      mov
-; X32:      movb %ah, (%e
-; X32-NOT:      mov
-; X32:      movb %ah, (%e
-; X32-NOT:      mov
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86
+; X86-NOT:      mov
+; X86:      movb %ah, (%e
+; X86-NOT:      mov
+; X86:      movb %ah, (%e
+; X86-NOT:      mov
+; X86:      movb %ah, (%e
+; X86-NOT:      mov
 
 ; Use h-register extract and store.
 
diff --git a/test/CodeGen/X86/h-registers-0.ll b/test/CodeGen/X86/h-registers-0.ll
index 6a5ccaa1e76f..9b72916ea743 100644
--- a/test/CodeGen/X86/h-registers-0.ll
+++ b/test/CodeGen/X86/h-registers-0.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X86-64
 ; RUN: llc < %s -mattr=-bmi -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
 ; RUN: llc < %s -mattr=-bmi -march=x86    | FileCheck %s -check-prefix=X86-32
 
diff --git a/test/CodeGen/X86/h-registers-1.ll b/test/CodeGen/X86/h-registers-1.ll
index 7254325a9265..469d5517b40b 100644
--- a/test/CodeGen/X86/h-registers-1.ll
+++ b/test/CodeGen/X86/h-registers-1.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc -mattr=-bmi < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s
 
 ; LLVM creates virtual registers for values live across blocks
 ; based on the type of the value. Make sure that the extracts
diff --git a/test/CodeGen/X86/h-registers-3.ll b/test/CodeGen/X86/h-registers-3.ll
index 29d0c280c4fb..58b02b7df21f 100644
--- a/test/CodeGen/X86/h-registers-3.ll
+++ b/test/CodeGen/X86/h-registers-3.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=x86    | grep mov | count 1
 ; RUN: llc < %s -march=x86-64 | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | grep mov | count 1
 
 define zeroext i8 @foo() nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll
index 8a726370f19a..3b2518e28f58 100644
--- a/test/CodeGen/X86/half.ll
+++ b/test/CodeGen/X86/half.ll
@@ -77,7 +77,7 @@ define i64 @test_fptosi_i64(half* %p) #0 {
 ; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi
 ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee
 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax
-; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: popq %rcx
 ; CHECK-LIBCALL-NEXT: retq
 
 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
@@ -127,7 +127,7 @@ define i64 @test_fptoui_i64(half* %p) #0 {
 ; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]]
 ; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0
 ; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]]
-; CHECK-LIBCALL-NEXT: popq %rdx
+; CHECK-LIBCALL-NEXT: popq %rcx
 ; CHECK-LIBCALL-NEXT: retq
 
 ; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]]
diff --git a/test/CodeGen/X86/hhvm-cc.ll b/test/CodeGen/X86/hhvm-cc.ll
new file mode 100644
index 000000000000..3b729ed72f1c
--- /dev/null
+++ b/test/CodeGen/X86/hhvm-cc.ll
@@ -0,0 +1,241 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare hhvmcc i64 @bar(i64, i64, i64) nounwind
+
+; Simply check we can modify %rbx and %rbp before returning via call to bar.
+define hhvmcc i64 @foo(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  foo:
+; CHECK-DAG:    movl $1, %ebx
+; CHECK-DAG:    movl $3, %ebp
+; CHECK:        jmp bar
+  %ret = musttail call hhvmcc i64 @bar(i64 1, i64 %b, i64 3)
+  ret i64 %ret
+}
+
+; Check that we can read and modify %rbx returned from PHP function.
+define hhvmcc i64 @mod_return(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  mod_return:
+; CHECK-NEXT:   {{^#.*}}
+; CHECK-NEXT:   callq bar
+; CHECK-NEXT:   incq %rbx
+  %tmp = call hhvmcc i64 @bar(i64 %a, i64 %b, i64 %c)
+  %retval = add i64 %tmp, 1
+  ret i64 %retval
+}
+
+%rettype = type { i64, i64, i64, i64, i64, i64, i64,
+                  i64, i64, i64, i64, i64, i64, i64
+}
+
+; Check that we can return up to 14 64-bit args in registers.
+define hhvmcc %rettype @return_all(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  return_all:
+; CHECK-DAG:    movl $1, %ebx
+; CHECK-DAG:    movl $2, %ebp
+; CHECK-DAG:    movl $3, %edi
+; CHECK-DAG:    movl $4, %esi
+; CHECK-DAG:    movl $5, %edx
+; CHECK-DAG:    movl $6, %ecx
+; CHECK-DAG:    movl $7, %r8
+; CHECK-DAG:    movl $8, %r9
+; CHECK-DAG:    movl $9, %eax
+; CHECK-DAG:    movl $10, %r10
+; CHECK-DAG:    movl $11, %r11
+; CHECK-DAG:    movl $12, %r13
+; CHECK-DAG:    movl $13, %r14
+; CHECK-DAG:    movl $14, %r15
+; CHECK:        retq
+  %r1 = insertvalue %rettype zeroinitializer, i64 1, 0
+  %r2 = insertvalue %rettype %r1, i64 2, 1
+  %r3 = insertvalue %rettype %r2, i64 3, 2
+  %r4 = insertvalue %rettype %r3, i64 4, 3
+  %r5 = insertvalue %rettype %r4, i64 5, 4
+  %r6 = insertvalue %rettype %r5, i64 6, 5
+  %r7 = insertvalue %rettype %r6, i64 7, 6
+  %r8 = insertvalue %rettype %r7, i64 8, 7
+  %r9 = insertvalue %rettype %r8, i64 9, 8
+  %r10 = insertvalue %rettype %r9, i64 10, 9
+  %r11 = insertvalue %rettype %r10, i64 11, 10
+  %r12 = insertvalue %rettype %r11, i64 12, 11
+  %r13 = insertvalue %rettype %r12, i64 13, 12
+  %r14 = insertvalue %rettype %r13, i64 14, 13
+  ret %rettype %r14
+}
+
+declare hhvmcc void @return_all_tc(i64, i64, i64, i64, i64, i64, i64, i64,
+                                 i64, i64, i64, i64, i64, i64, i64)
+
+; Check that we can return up to 14 64-bit args in registers via tail call.
+define hhvmcc void @test_return_all_tc(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_return_all_tc:
+; CHECK-NEXT:   {{^#.*}}
+; CHECK-DAG:    movl $1, %ebx
+; CHECK-DAG:    movl $3, %ebp
+; CHECK-DAG:    movl $4, %r15
+; CHECK-DAG:    movl $5, %edi
+; CHECK-DAG:    movl $6, %esi
+; CHECK-DAG:    movl $7, %edx
+; CHECK-DAG:    movl $8, %ecx
+; CHECK-DAG:    movl $9, %r8
+; CHECK-DAG:    movl $10, %r9
+; CHECK-DAG:    movl $11, %eax
+; CHECK-DAG:    movl $12, %r10
+; CHECK-DAG:    movl $13, %r11
+; CHECK-DAG:    movl $14, %r13
+; CHECK-DAG:    movl $15, %r14
+; CHECK:        jmp  return_all_tc
+  tail call hhvmcc void @return_all_tc(
+    i64 1, i64 %b, i64 3, i64 4, i64 5, i64 6, i64 7,
+    i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15)
+  ret void
+}
+
+declare hhvmcc {i64, i64} @php_short(i64, i64, i64, i64)
+
+define hhvmcc i64 @test_php_short(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_php_short:
+; CHECK-NEXT:   {{^#.*}}
+; CHECK-NEXT:   movl $42, %r15
+; CHECK-NEXT:   callq php_short
+; CHECK-NEXT:   leaq (%rbp,%r12), %rbx
+; CHECK-NEXT:   retq
+  %pair = call hhvmcc {i64, i64} @php_short(i64 %a, i64 %b, i64 %c, i64 42)
+  %fp = extractvalue {i64, i64} %pair, 1
+  %rv = add i64 %fp, %b
+  ret i64 %rv
+}
+
+declare hhvmcc %rettype @php_all(i64, i64, i64, i64, i64, i64, i64,
+                                 i64, i64, i64, i64, i64, i64, i64, i64)
+
+; Check that we can pass 15 arguments in registers.
+; Also check that %r12 (2nd arg) is not spilled.
+define hhvmcc i64 @test_php_all(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_php_all:
+; CHECK-NEXT:   {{^#.*}}
+; CHECK-NOT:    sub
+; CHECK-NOT:    sub
+; CHECK-DAG:    movl $1, %ebx
+; CHECK-DAG:    movl $3, %ebp
+; CHECK-DAG:    movl $4, %r15
+; CHECK-DAG:    movl $5, %edi
+; CHECK-DAG:    movl $6, %esi
+; CHECK-DAG:    movl $7, %edx
+; CHECK-DAG:    movl $8, %ecx
+; CHECK-DAG:    movl $9, %r8
+; CHECK-DAG:    movl $10, %r9
+; CHECK-DAG:    movl $11, %eax
+; CHECK-DAG:    movl $12, %r10
+; CHECK-DAG:    movl $13, %r11
+; CHECK-DAG:    movl $14, %r13
+; CHECK-DAG:    movl $15, %r14
+; CHECK:        callq php_all
+  %pair = call hhvmcc %rettype @php_all(
+    i64 1, i64 %b, i64 3, i64 4, i64 5, i64 6, i64 7,
+    i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15)
+  %fp = extractvalue %rettype %pair, 1
+  %rv = add i64 %fp, %b
+  ret i64 %rv
+}
+
+declare hhvmcc void @svcreq(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+                             i64, i64)
+
+define hhvmcc void @test_svcreq(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_svcreq:
+; CHECK-DAG:    movl $42, %r10
+; CHECK-DAG:    movl $1, %edi
+; CHECK-DAG:    movl $2, %esi
+; CHECK-DAG:    movl $3, %edx
+; CHECK-DAG:    movl $4, %ecx
+; CHECK-DAG:    movl $5, %r8
+; CHECK-DAG:    movl $6, %r9
+; CHECK:        jmp svcreq
+  tail call hhvmcc void @svcreq(i64 %a, i64 %b, i64 %c, i64 undef, i64 1,
+                                i64 2, i64 3, i64 4, i64 5, i64 6, i64 undef,
+                                i64 42)
+  ret void
+}
+
+declare hhvm_ccc void @helper_short(i64, i64, i64, i64, i64, i64, i64)
+
+; Pass all arguments in registers and check that we don't adjust stack
+; for the call.
+define hhvmcc void @test_helper_short(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_helper_short:
+; CHECK-NOT:    push
+; CHECK-NOT:    sub
+; CHECK-DAG:    movl $1, %edi
+; CHECK-DAG:    movl $2, %esi
+; CHECK-DAG:    movl $3, %edx
+; CHECK-DAG:    movl $4, %ecx
+; CHECK-DAG:    movl $5, %r8
+; CHECK-DAG:    movl $6, %r9
+; CHECK:        callq helper_short
+  call hhvm_ccc void @helper_short(i64 %c, i64 1, i64 2, i64 3, i64 4,
+                                   i64 5, i64 6)
+  ret void
+}
+
+declare hhvm_ccc void @helper(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)
+
+define hhvmcc void @test_helper(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_helper:
+; CHECK-DAG:    movl $1, %edi
+; CHECK-DAG:    movl $2, %esi
+; CHECK-DAG:    movl $3, %edx
+; CHECK-DAG:    movl $4, %ecx
+; CHECK-DAG:    movl $5, %r8
+; CHECK-DAG:    movl $6, %r9
+; CHECK:        callq helper
+  call hhvm_ccc void @helper(i64 %c, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6,
+                             i64 7, i64 8, i64 9)
+  ret void
+}
+
+; When we enter function with HHVM calling convention, the stack is aligned
+; at 16 bytes. This means we align objects on the stack differently and
+; adjust the stack differently for calls.
+declare hhvm_ccc void @stack_helper(i64, i64, i64)
+declare hhvm_ccc void @stack_helper2(<2 x double>, i64)
+
+define hhvmcc void @test_stack_helper(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_stack_helper:
+; CHECK-NOT:    push
+; CHECK:        subq $32, %rsp
+; CHECK:        movaps  16(%rsp), %xmm0
+; CHECK:        callq stack_helper2
+  %t1 = alloca <2 x double>, align 16
+  %t2 = alloca i64, align 8
+  %t3 = alloca i64, align 8
+  %load3 = load i64, i64 *%t3
+  call hhvm_ccc void @stack_helper(i64 %c, i64 %load3, i64 42)
+  %load = load <2 x double>, <2 x double> *%t1
+  %load2 = load i64, i64 *%t2
+  call hhvm_ccc void @stack_helper2(<2 x double> %load, i64 %load2)
+  ret void
+}
+
+; Check that we are not adjusting the stack before calling the helper.
+define hhvmcc void @test_stack_helper2(i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+; CHECK-LABEL:  test_stack_helper2:
+; CHECK-NOT:    push
+; CHECK-NOT:    subq
+  call hhvm_ccc void @stack_helper(i64 %c, i64 7, i64 42)
+  ret void
+}
+
diff --git a/test/CodeGen/X86/i386-shrink-wrapping.ll b/test/CodeGen/X86/i386-shrink-wrapping.ll
new file mode 100644
index 000000000000..748c397143c5
--- /dev/null
+++ b/test/CodeGen/X86/i386-shrink-wrapping.ll
@@ -0,0 +1,113 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i386-apple-macosx"
+
+@a = common global i32 0, align 4
+@d = internal unnamed_addr global i1 false
+@b = common global i32 0, align 4
+@e = common global i8 0, align 1
+@f = common global i8 0, align 1
+@c = common global i32 0, align 4
+@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
+
+
+; Check that we are clobbering the flags when they are live-in of the
+; prologue block and the prologue needs to adjust the stack.
+; PR25607.
+;
+; CHECK-LABEL: eflagsLiveInPrologue:
+;
+; DISABLE: pushl
+; DISABLE-NEXT: pushl
+; DISABLE-NEXT: subl $20, %esp
+;
+; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]]
+; CHECK-NEXT: cmpl $0, ([[A]])
+; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $1, _d
+;
+; CHECK: [[PREHEADER_LABEL]]:
+; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]]
+; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]]
+; CHECK-NEXT: testl [[TMP1]], [[TMP1]]
+; CHECK-NEXT: je  [[FOREND_LABEL:LBB[0-9_]+]]
+;
+; Skip the loop.
+; [...]
+;
+; The for.end block is split to accomadate the different selects.
+; We are interested in the one with the call, so skip until the branch.
+; CHECK: [[FOREND_LABEL]]:
+; CHECK-NEXT: movb _d, [[D:%[a-z]+]]
+; [...]
+; CHECK: jne [[CALL_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $6, [[D]]
+;
+; CHECK: [[CALL_LABEL]]
+;
+; ENABLE-NEXT: pushl
+; ENABLE-NEXT: pushl
+; We must not use sub here otherwise we will clobber the eflags.
+; ENABLE-NEXT: leal -20(%esp), %esp
+;
+; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
+; CHECK-NEXT: movb [[D]], ([[E]])
+; CHECK-NEXT: L_f$non_lazy_ptr, [[F:%[a-z]+]]
+; CHECK-NEXT: movsbl ([[F]]), [[CONV:%[a-z]+]]
+; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]]
+; The eflags is used in the next instruction.
+; If that instruction disappear, we are not exercising the bug
+; anymore.
+; CHECK-NEXT: cmovnel {{%[a-z]+}}, [[CONV]]
+;
+; Skip all the crust of vaarg lowering.
+; CHECK: calll L_varfunc$stub
+; Set the return value to 0.
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: addl $20, %esp
+; CHECK-NEXT: popl
+; CHECK-NEXT: popl
+; CHECK-NEXT: retl
+define i32 @eflagsLiveInPrologue() #0 {
+entry:
+  %tmp = load i32, i32* @a, align 4
+  %tobool = icmp eq i32 %tmp, 0
+  br i1 %tobool, label %for.cond.preheader, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i1 true, i1* @d, align 1
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %if.then, %entry
+  %tmp1 = load i32, i32* @b, align 4
+  %tobool14 = icmp eq i32 %tmp1, 0
+  br i1 %tobool14, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %for.cond.preheader
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  br label %for.body
+
+for.end:                                          ; preds = %for.cond.preheader
+  %.b3 = load i1, i1* @d, align 1
+  %tmp2 = select i1 %.b3, i8 0, i8 6
+  store i8 %tmp2, i8* @e, align 1
+  %tmp3 = load i8, i8* @f, align 1
+  %conv = sext i8 %tmp3 to i32
+  %add = add nsw i32 %conv, 1
+  %rem = srem i32 %tmp1, %add
+  store i32 %rem, i32* @c, align 4
+  %conv2 = select i1 %.b3, i32 0, i32 6
+  %call = tail call i32 (i8*, ...) @varfunc(i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %conv2) #1
+  ret i32 0
+}
+
+; Function Attrs: nounwind
+declare i32 @varfunc(i8* nocapture readonly, ...) #0
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+mmx,+sse" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind }
diff --git a/test/CodeGen/X86/immediate_merging.ll b/test/CodeGen/X86/immediate_merging.ll
new file mode 100644
index 000000000000..8aef9c279b31
--- /dev/null
+++ b/test/CodeGen/X86/immediate_merging.ll
@@ -0,0 +1,82 @@
+; RUN: llc -o - -mtriple=i386-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+@c = common global i32 0, align 4
+@e = common global i32 0, align 4
+@x = common global i32 0, align 4
+@f = common global i32 0, align 4
+@h = common global i32 0, align 4
+@i = common global i32 0, align 4
+
+; Test -Os to make sure immediates with multiple users don't get pulled in to
+; instructions.
+define i32 @foo() optsize {
+; CHECK-LABEL: foo:
+; CHECK: movl $1234, [[R1:%[a-z]+]]
+; CHECK-NOT: movl $1234, a
+; CHECK-NOT: movl $1234, b
+; CHECK-NOT: movl $12, c
+; CHECK-NOT: cmpl $12, e
+; CHECK: movl [[R1]], a
+; CHECK: movl [[R1]], b
+
+entry:
+  store i32 1234, i32* @a
+  store i32 1234, i32* @b
+  store i32 12, i32* @c
+  %0 = load i32, i32* @e
+  %cmp = icmp eq i32 %0, 12
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @x
+  br label %if.end
+
+; New block.. Make sure 1234 isn't live across basic blocks from before.
+; CHECK: movl $1234, f
+; CHECK: movl $555, [[R3:%[a-z]+]]
+; CHECK-NOT: movl $555, h
+; CHECK-NOT: addl $555, i
+; CHECK: movl [[R3]], h
+; CHECK: addl [[R3]], i
+
+if.end:                                           ; preds = %if.then, %entry
+  store i32 1234, i32* @f
+  store i32 555, i32* @h
+  %1 = load i32, i32* @i
+  %add1 = add nsw i32 %1, 555
+  store i32 %add1, i32* @i
+  ret i32 0
+}
+
+; Test -O2 to make sure that all immediates get pulled in to their users.
+define i32 @foo2() {
+; CHECK-LABEL: foo2:
+; CHECK: movl $1234, a
+; CHECK: movl $1234, b
+
+entry:
+  store i32 1234, i32* @a
+  store i32 1234, i32* @b
+
+  ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #1
+
+@AA = common global [100 x i8] zeroinitializer, align 1
+
+; memset gets lowered in DAG. Constant merging should hoist all the
+; immediates used to store to the individual memory locations. Make
+; sure we don't directly store the immediates.
+define void @foomemset() optsize {
+; CHECK-LABEL: foomemset:
+; CHECK-NOT: movl ${{.*}}, AA
+; CHECK: mov{{l|q}} %{{e|r}}ax, AA
+
+entry:
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @AA, i32 0, i32 0), i8 33, i32 24, i32 1, i1 false)
+  ret void
+}
diff --git a/test/CodeGen/X86/implicit-null-check.ll b/test/CodeGen/X86/implicit-null-check.ll
index fd7a902eefc1..8b905f5d23b6 100644
--- a/test/CodeGen/X86/implicit-null-check.ll
+++ b/test/CodeGen/X86/implicit-null-check.ll
@@ -101,6 +101,40 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
   ret i32 %t1
 }
 
+define i32 @imp_null_check_via_mem_comparision(i32* %x, i32 %val) {
+; CHECK-LABEL: _imp_null_check_via_mem_comparision
+; CHECK: Ltmp9:
+; CHECK: cmpl   %esi, 4(%rdi)
+; CHECK: jge    LBB4_2
+; CHECK: movl   $100, %eax
+; CHECK: retq
+; CHECK: Ltmp8:
+; CHECK: movl   $42, %eax
+; CHECK: retq
+; CHECK: LBB4_2:
+; CHECK: movl   $200, %eax
+; CHECK: retq
+
+ entry:
+  %c = icmp eq i32* %x, null
+  br i1 %c, label %is_null, label %not_null, !make.implicit !0
+
+ is_null:
+  ret i32 42
+
+ not_null:
+  %x.loc = getelementptr i32, i32* %x, i32 1
+  %t = load i32, i32* %x.loc
+  %m = icmp slt i32 %t, %val
+  br i1 %m, label %ret_100, label %ret_200
+
+ ret_100:
+  ret i32 100
+
+ ret_200:
+  ret i32 200
+}
+
 !0 = !{}
 
 ; CHECK-LABEL: __LLVM_FaultMaps:
@@ -113,7 +147,7 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
 ; CHECK-NEXT: .short 0
 
 ; # functions:
-; CHECK-NEXT: .long 4
+; CHECK-NEXT: .long 5
 
 ; FunctionAddr:
 ; CHECK-NEXT: .quad _imp_null_check_add_result
@@ -167,9 +201,22 @@ define i32 @imp_null_check_hoist_over_unrelated_load(i32* %x, i32* %y, i32* %z)
 ; Fault[0].HandlerOffset:
 ; CHECK-NEXT: .long Ltmp0-_imp_null_check_load
 
+; FunctionAddr:
+; CHECK-NEXT: .quad     _imp_null_check_via_mem_comparision
+; NumFaultingPCs
+; CHECK-NEXT: .long   1
+; Reserved:
+; CHECK-NEXT: .long   0
+; Fault[0].Type:
+; CHECK-NEXT: .long   1
+; Fault[0].FaultOffset:
+; CHECK-NEXT: .long   Ltmp9-_imp_null_check_via_mem_comparision
+; Fault[0].HandlerOffset:
+; CHECK-NEXT: .long   Ltmp8-_imp_null_check_via_mem_comparision
+
 ; OBJDUMP: FaultMap table:
 ; OBJDUMP-NEXT: Version: 0x1
-; OBJDUMP-NEXT: NumFunctions: 4
+; OBJDUMP-NEXT: NumFunctions: 5
 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
 ; OBJDUMP-NEXT: Fault kind: FaultingLoad, faulting PC offset: 0, handling PC offset: 5
 ; OBJDUMP-NEXT: FunctionAddress: 0x000000, NumFaultingPCs: 1
diff --git a/test/CodeGen/X86/imul.ll b/test/CodeGen/X86/imul.ll
index c64b4e302b92..9d4d19332dbb 100644
--- a/test/CodeGen/X86/imul.ll
+++ b/test/CodeGen/X86/imul.ll
@@ -108,3 +108,66 @@ define i64 @mul40_64(i64 %A) {
     %mul = mul i64 %A, 40
     ret i64 %mul
 }
+
+define i32 @mul4_32_minsize(i32 %A) minsize {
+; X64-LABEL: mul4_32_minsize:
+; X64: leal
+; X86-LABEL: mul4_32_minsize:
+; X86: shll
+    %mul = mul i32 %A, 4
+    ret i32 %mul
+}
+
+define i32 @mul40_32_minsize(i32 %A) minsize {
+; X64-LABEL: mul40_32_minsize:
+; X64: imull
+; X86-LABEL: mul40_32_minsize:
+; X86: imull
+    %mul = mul i32 %A, 40
+    ret i32 %mul
+}
+
+define i32 @mul33_32(i32 %A) {
+; X64-LABEL: mul33_32:
+; X64: shll
+; X64-NEXT: leal
+; X86-LABEL: mul33_32:
+; X86: shll
+; X86-NEXT: addl
+    %mul = mul i32 %A, 33
+    ret i32 %mul
+}
+
+define i32 @mul31_32(i32 %A) {
+; X64-LABEL: mul31_32:
+; X64: shll
+; X64-NEXT: subl
+; X86-LABEL: mul31_32:
+; X86: shll
+; X86-NEXT: subl
+    %mul = mul i32 %A, 31
+    ret i32 %mul
+}
+
+define i32 @mul0_32(i32 %A) {
+; X64-LABEL: mul0_32:
+; X64: xorl	%eax, %eax
+    %mul = mul i32 %A, 0
+    ret i32 %mul
+}
+
+define i32 @mul4294967295_32(i32 %A) {
+; X64-LABEL: mul4294967295_32:
+; X64: negl	%edi
+; X64-NEXT:	movl	%edi, %eax
+    %mul = mul i32 %A, 4294967295
+    ret i32 %mul
+}
+
+define i64 @mul18446744073709551615_64(i64 %A) {
+; X64-LABEL: mul18446744073709551615_64:
+; X64: negq	%rdi
+; X64-NEXT:	movq	%rdi, %rax
+    %mul = mul i64 %A, 18446744073709551615
+    ret i64 %mul
+}
diff --git a/test/CodeGen/X86/inalloca-stdcall.ll b/test/CodeGen/X86/inalloca-stdcall.ll
index e5f6ea70e9cb..4f7e4092a99c 100644
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@@ -14,8 +14,9 @@ define void @g() {
   %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
   store i32 13, i32* %f1
   store i32 42, i32* %f2
-; CHECK: movl    $13, (%esp)
-; CHECK: movl    $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl    $13, (%eax)
+; CHECK: movl    $42, 4(%eax)
   call x86_stdcallcc void @f(%Foo* inalloca %b)
 ; CHECK: calll   _f@8
 ; CHECK-NOT: %esp
diff --git a/test/CodeGen/X86/inalloca.ll b/test/CodeGen/X86/inalloca.ll
index 904366219ab7..e523c945a69f 100644
--- a/test/CodeGen/X86/inalloca.ll
+++ b/test/CodeGen/X86/inalloca.ll
@@ -14,8 +14,9 @@ entry:
   %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
   store i32 13, i32* %f1
   store i32 42, i32* %f2
-; CHECK: movl    $13, (%esp)
-; CHECK: movl    $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl    $13, (%eax)
+; CHECK: movl    $42, 4(%eax)
   call void @f(%Foo* inalloca %b)
 ; CHECK: calll   _f
   ret void
@@ -33,8 +34,9 @@ entry:
   %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
   store i32 13, i32* %f1
   store i32 42, i32* %f2
-; CHECK: movl    $13, (%esp)
-; CHECK: movl    $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK: movl    $13, (%eax)
+; CHECK: movl    $42, 4(%eax)
   call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
 ; CHECK: movl    $1, %eax
 ; CHECK: calll   _inreg_with_inalloca
@@ -53,8 +55,9 @@ entry:
   %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
   store i32 13, i32* %f1
   store i32 42, i32* %f2
-; CHECK-DAG: movl    $13, (%esp)
-; CHECK-DAG: movl    $42, 4(%esp)
+; CHECK: movl %esp, %eax
+; CHECK-DAG: movl    $13, (%eax)
+; CHECK-DAG: movl    $42, 4(%eax)
   call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
 ; CHECK-DAG: xorl    %ecx, %ecx
 ; CHECK: calll   _thiscall_with_inalloca
diff --git a/test/CodeGen/X86/inconsistent_landingpad.ll b/test/CodeGen/X86/inconsistent_landingpad.ll
new file mode 100644
index 000000000000..495e999c4a95
--- /dev/null
+++ b/test/CodeGen/X86/inconsistent_landingpad.ll
@@ -0,0 +1,30 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+
+define void @test() personality i32 (...)* @dummy_personality {
+; CHECK: The landingpad instruction should have a consistent result type inside a function
+entry:
+  invoke void @dummy1()
+          to label %next unwind label %unwind1
+
+unwind1:
+  %lp1 = landingpad token
+            cleanup
+  br label %return
+
+next:
+  invoke void @dummy2()
+          to label %return unwind label %unwind2
+
+unwind2:
+  %lp2 = landingpad { i8*, i32 }
+            cleanup
+  br label %return
+
+return:
+  ret void
+}
+
+declare void @dummy1()
+declare void @dummy2()
+
+declare i32 @dummy_personality(...)
diff --git a/test/CodeGen/X86/inline-asm-2addr.ll b/test/CodeGen/X86/inline-asm-2addr.ll
index 4a2c7fc5ebac..079f883186fb 100644
--- a/test/CodeGen/X86/inline-asm-2addr.ll
+++ b/test/CodeGen/X86/inline-asm-2addr.ll
@@ -1,9 +1,18 @@
-; RUN: llc < %s -march=x86-64 | not grep movq
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
 
 define i64 @t(i64 %a, i64 %b) nounwind ssp {
 entry:
+; CHECK-LABEL: t:
 	%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind		; <i64> [#uses=1]
+; CHECK:      #APP
+; CHECK-NEXT: rorq    %[[REG1:.*]]
+; CHECK-NEXT: #NO_APP
 	%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind		; <i64> [#uses=1]
+; CHECK-NEXT: #APP
+; CHECK-NEXT: rorq    %[[REG2:.*]]
+; CHECK-NEXT: #NO_APP
 	%0 = add i64 %asmtmp1, %asmtmp		; <i64> [#uses=1]
+; CHECK-NEXT: leaq    (%[[REG2]],%[[REG1]]), %rax
 	ret i64 %0
+; CHECK:      retq
 }
diff --git a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
index b55571bcba09..970b9943948f 100644
--- a/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
+++ b/test/CodeGen/X86/inline-asm-sp-clobber-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+; RUN: llc < %s -stackrealign -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
 
 %struct.foo = type { [88 x i8] }
 
diff --git a/test/CodeGen/X86/inline-sse.ll b/test/CodeGen/X86/inline-sse.ll
new file mode 100644
index 000000000000..78d6b762b5e5
--- /dev/null
+++ b/test/CodeGen/X86/inline-sse.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse2 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; PR16133 - we must treat XMM registers as v4f32 as SSE1 targets don't permit other vector types.
+
+define void @nop() nounwind {
+; X32-LABEL: nop:
+; X32:       # BB#0:
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $32, %esp
+; X32-NEXT:    #APP
+; X32-NEXT:    #NO_APP
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
+;
+; X64-LABEL: nop:
+; X64:       # BB#0:
+; X64-NEXT:    subq    $24, %rsp
+; X64-NEXT:    #APP
+; X64-NEXT:    #NO_APP
+; X64-NEXT:    movaps %xmm0, (%rsp)
+; X64-NEXT:    addq    $24, %rsp
+; X64-NEXT:    retq
+  %1 = alloca <4 x float>, align 16
+  %2 = call <4 x float> asm "", "=x,~{dirflag},~{fpsr},~{flags}"()
+  store <4 x float> %2, <4 x float>* %1, align 16
+  ret void
+}
diff --git a/test/CodeGen/X86/insertps-from-constantpool.ll b/test/CodeGen/X86/insertps-from-constantpool.ll
new file mode 100644
index 000000000000..cfcfeacad067
--- /dev/null
+++ b/test/CodeGen/X86/insertps-from-constantpool.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=X64
+
+; Test for case where insertps folds the load of an insertion element from a constant pool.
+
+define <4 x float> @fold_from_constantpool(<4 x float> %a) {
+; X32-LABEL: fold_from_constantpool:
+; X32:       # BB#0:
+; X32-NEXT:    insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: fold_from_constantpool:
+; X64:       # BB#0:
+; X64-NEXT:    insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
+; X64-NEXT:    retq
+  %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, float 0.0, float 0.0>, i8 64)
+  ret <4 x float> %1
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/insertps-unfold-load-bug.ll b/test/CodeGen/X86/insertps-unfold-load-bug.ll
new file mode 100644
index 000000000000..bf7c4bc4d7b9
--- /dev/null
+++ b/test/CodeGen/X86/insertps-unfold-load-bug.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mtriple=i686-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=X32
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 < %s | FileCheck %s -check-prefix=X64
+
+; Test for case where insertps was folding the load of the insertion element, but a later optimization
+; was then manipulating the load.
+
+define <4 x float> @insertps_unfold(<4 x float>* %v0, <4 x float>* %v1) {
+; X32-LABEL: insertps_unfold:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X32-NEXT:    movaps (%eax), %xmm0
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X32-NEXT:    addps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: insertps_unfold:
+; X64:       # BB#0:
+; X64-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; X64-NEXT:    movaps (%rdi), %xmm0
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X64-NEXT:    addps %xmm1, %xmm0
+; X64-NEXT:    retq
+  %a = getelementptr inbounds <4 x float>, <4 x float>* %v1, i64 0, i64 1
+  %b = load float, float* %a, align 4
+  %c = insertelement <4 x float> undef, float %b, i32 0
+  %d = load <4 x float>, <4 x float>* %v1, align 16
+  %e = load <4 x float>, <4 x float>* %v0, align 16
+  %f = shufflevector <4 x float> %e, <4 x float> %d, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  %g = fadd <4 x float> %c, %f
+  ret <4 x float> %g
+}
diff --git a/test/CodeGen/X86/int-intrinsic.ll b/test/CodeGen/X86/int-intrinsic.ll
index 45a9b0f15c67..b253e6c5f3b0 100644
--- a/test/CodeGen/X86/int-intrinsic.ll
+++ b/test/CodeGen/X86/int-intrinsic.ll
@@ -11,7 +11,7 @@ bb.entry:
   ret void
 }
 
-; CHECK: int	$-128
+; CHECK: int	$128
 ; CHECK: ret
 define void @primitive_int128 () {
 bb.entry:
diff --git a/test/CodeGen/X86/late-address-taken.ll b/test/CodeGen/X86/late-address-taken.ll
new file mode 100644
index 000000000000..f98c53595abb
--- /dev/null
+++ b/test/CodeGen/X86/late-address-taken.ll
@@ -0,0 +1,68 @@
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -enable-shrink-wrap=false | FileCheck %s
+; Make sure shrink-wrapping does not break the lowering of exception handling.
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -enable-shrink-wrap=true | FileCheck %s
+
+; Repro cases from PR25168
+
+; test @catchret - catchret target is not address-taken until PEI
+; splits it into lea/mov followed by ret.  Make sure the MBB is
+; handled, both by tempting BranchFolding to merge it with %early_out
+; and delete it, and by checking that we emit a proper reference
+; to it in the LEA
+
+declare void @ProcessCLRException()
+declare void @f()
+
+define void @catchret(i1 %b) personality void ()* @ProcessCLRException {
+entry:
+  br i1 %b, label %body, label %early_out
+early_out:
+  ret void
+body:
+  invoke void @f()
+          to label %exit unwind label %catch.pad
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+  %catch = catchpad within %cs1 [i32 33554467]
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
+; CHECK-LABEL: catchret:  # @catchret
+; CHECK: [[Exit:^[^ :]+]]: # Block address taken
+; CHECK-NEXT:              # %exit
+; CHECK: # %catch.body
+; CHECK: .seh_endprolog
+; CHECK: leaq [[Exit]](%rip), %rax
+; CHECK: retq # CATCHRET
+
+
+; test @setjmp - similar to @catchret, but the MBB in question
+; is the one generated when the setjmp's block is split
+
+@buf = internal global [5 x i8*] zeroinitializer
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+declare i8* @llvm.stacksave() nounwind
+declare i32 @llvm.eh.sjlj.setjmp(i8*) nounwind
+declare void @llvm.eh.sjlj.longjmp(i8*) nounwind
+
+define void @setjmp(i1 %b) nounwind {
+entry:
+  br i1 %b, label %early_out, label %sj
+early_out:
+  ret void
+sj:
+  %fp = call i8* @llvm.frameaddress(i32 0)
+  store i8* %fp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 0), align 16
+  %sp = call i8* @llvm.stacksave()
+  store i8* %sp, i8** getelementptr inbounds ([5 x i8*], [5 x i8*]* @buf, i64 0, i64 2), align 16
+  call i32 @llvm.eh.sjlj.setjmp(i8* bitcast ([5 x i8*]* @buf to i8*))
+  ret void
+}
+; CHECK-LABEL: setjmp: # @setjmp
+; CHECK: # %sj
+; CHECK: leaq [[Label:\..+]](%rip), %[[Reg:.+]]{{$}}
+; CHECK-NEXT: movq %[[Reg]], buf
+; CHECK: {{^}}[[Label]]:  # Block address taken
+; CHECK-NEXT:              # %sj
diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll
new file mode 100644
index 000000000000..571f2d9084c4
--- /dev/null
+++ b/test/CodeGen/X86/lea-opt.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -mtriple=x86_64-linux -enable-x86-lea-opt | FileCheck %s
+
+%struct.anon1 = type { i32, i32, i32 }
+%struct.anon2 = type { i32, [32 x i32], i32 }
+
+@arr1 = external global [65 x %struct.anon1], align 16
+@arr2 = external global [65 x %struct.anon2], align 16
+
+define void @test1(i64 %x) nounwind {
+entry:
+  %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+  %tmp = load i32, i32* %a, align 4
+  %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+  %tmp1 = load i32, i32* %b, align 4
+  %sub = sub i32 %tmp, %tmp1
+  %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+  %tmp2 = load i32, i32* %c, align 4
+  %add = add nsw i32 %sub, %tmp2
+  switch i32 %add, label %sw.epilog [
+    i32 1, label %sw.bb.1
+    i32 2, label %sw.bb.2
+  ]
+
+sw.bb.1:                                          ; preds = %entry
+  store i32 111, i32* %b, align 4
+  store i32 222, i32* %c, align 4
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  store i32 333, i32* %b, align 4
+  store i32 444, i32* %c, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
+  ret void
+; CHECK-LABEL: test1:
+; CHECK:	leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]]
+; CHECK:	movl arr1(,[[REG1]],4), {{.*}}
+; CHECK:	leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]]
+; CHECK:	subl arr1+4(,[[REG1]],4), {{.*}}
+; CHECK:	leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]]
+; CHECK:	addl arr1+8(,[[REG1]],4), {{.*}}
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+}
+
+define void @test2(i64 %x) nounwind optsize {
+entry:
+  %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+  %tmp = load i32, i32* %a, align 4
+  %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+  %tmp1 = load i32, i32* %b, align 4
+  %sub = sub i32 %tmp, %tmp1
+  %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+  %tmp2 = load i32, i32* %c, align 4
+  %add = add nsw i32 %sub, %tmp2
+  switch i32 %add, label %sw.epilog [
+    i32 1, label %sw.bb.1
+    i32 2, label %sw.bb.2
+  ]
+
+sw.bb.1:                                          ; preds = %entry
+  store i32 111, i32* %b, align 4
+  store i32 222, i32* %c, align 4
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  store i32 333, i32* %b, align 4
+  store i32 444, i32* %c, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
+  ret void
+; CHECK-LABEL: test2:
+; CHECK:	leaq (%rdi,%rdi,2), [[REG1:%[a-z]+]]
+; CHECK:	leaq arr1+4(,[[REG1]],4), [[REG2:%[a-z]+]]
+; CHECK:	movl -4([[REG2]]), {{.*}}
+; CHECK:	subl ([[REG2]]), {{.*}}
+; CHECK:	leaq arr1+8(,[[REG1]],4), [[REG3:%[a-z]+]]
+; CHECK:	addl ([[REG3]]), {{.*}}
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+}
+
+; Check that LEA optimization pass takes into account a resultant address
+; displacement when choosing a LEA instruction for replacing a redundant
+; address recalculation.
+
+define void @test3(i64 %x) nounwind optsize {
+entry:
+  %a = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 2
+  %tmp = load i32, i32* %a, align 4
+  %b = getelementptr inbounds [65 x %struct.anon2], [65 x %struct.anon2]* @arr2, i64 0, i64 %x, i32 0
+  %tmp1 = load i32, i32* %b, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  switch i32 %add, label %sw.epilog [
+    i32 1, label %sw.bb.1
+    i32 2, label %sw.bb.2
+  ]
+
+sw.bb.1:                                          ; preds = %entry
+  store i32 111, i32* %a, align 4
+  store i32 222, i32* %b, align 4
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  store i32 333, i32* %a, align 4
+  store i32 444, i32* %b, align 4
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
+  ret void
+; CHECK-LABEL: test3:
+; CHECK:	imulq {{.*}}, [[REG1:%[a-z]+]]
+; CHECK:	leaq arr2+132([[REG1]]), [[REG2:%[a-z]+]]
+; CHECK:	leaq arr2([[REG1]]), [[REG3:%[a-z]+]]
+
+; REG3's definition is closer to movl than REG2's, but the pass still chooses
+; REG2 because it provides the resultant address displacement fitting 1 byte.
+
+; CHECK:	movl ([[REG2]]), {{.*}}
+; CHECK:	addl ([[REG3]]), {{.*}}
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
+; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+}
diff --git a/test/CodeGen/X86/lit.local.cfg b/test/CodeGen/X86/lit.local.cfg
index 8ed58f119c4f..4a1dd86abc45 100644
--- a/test/CodeGen/X86/lit.local.cfg
+++ b/test/CodeGen/X86/lit.local.cfg
@@ -4,7 +4,7 @@
 #
 # It should be possible to remove this override once all the bots have cycled
 # cleanly.
-config.suffixes = ['.ll', '.test', '.txt']
+config.suffixes = ['.ll', '.mir', '.test', '.txt']
 
 if not 'X86' in config.root.targets:
     config.unsupported = True
diff --git a/test/CodeGen/X86/frameescape.ll b/test/CodeGen/X86/localescape.ll
similarity index 76%
rename from test/CodeGen/X86/frameescape.ll
rename to test/CodeGen/X86/localescape.ll
index 179a936304ba..3cd174df0b71 100644
--- a/test/CodeGen/X86/frameescape.ll
+++ b/test/CodeGen/X86/localescape.ll
@@ -1,19 +1,20 @@
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
 ; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
 
-declare void @llvm.localescape(...)
 declare i8* @llvm.frameaddress(i32)
+declare void @llvm.localescape(...)
+declare i8* @llvm.localaddress()
 declare i8* @llvm.localrecover(i8*, i8*, i32)
 declare i32 @printf(i8*, ...)
 
 @str = internal constant [10 x i8] c"asdf: %d\0A\00"
 
 define void @print_framealloc_from_fp(i8* %fp) {
-  %a.i8 = call i8* @llvm.localrecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 0)
+  %a.i8 = call i8* @llvm.localrecover(i8* bitcast (void(i32)* @alloc_func to i8*), i8* %fp, i32 0)
   %a = bitcast i8* %a.i8 to i32*
   %a.val = load i32, i32* %a
   call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %a.val)
-  %b.i8 = call i8* @llvm.localrecover(i8* bitcast (void()* @alloc_func to i8*), i8* %fp, i32 1)
+  %b.i8 = call i8* @llvm.localrecover(i8* bitcast (void(i32)* @alloc_func to i8*), i8* %fp, i32 1)
   %b = bitcast i8* %b.i8 to i32*
   %b.val = load i32, i32* %b
   call i32 (i8*, ...) @printf(i8* getelementptr ([10 x i8], [10 x i8]* @str, i32 0, i32 0), i32 %b.val)
@@ -58,47 +59,52 @@ define void @print_framealloc_from_fp(i8* %fp) {
 ; X86: popl    %esi
 ; X86: retl
 
-define void @alloc_func() {
+define void @alloc_func(i32 %n) {
   %a = alloca i32
   %b = alloca i32, i32 2
   call void (...) @llvm.localescape(i32* %a, i32* %b)
   store i32 42, i32* %a
   store i32 13, i32* %b
-  %fp = call i8* @llvm.frameaddress(i32 0)
-  call void @print_framealloc_from_fp(i8* %fp)
+
+  ; Force usage of EBP with a dynamic alloca.
+  alloca i8, i32 %n
+
+  %lp = call i8* @llvm.localaddress()
+  call void @print_framealloc_from_fp(i8* %lp)
   ret void
 }
 
 ; X64-LABEL: alloc_func:
-; X64: subq    $48, %rsp
-; X64: .seh_stackalloc 48
-; X64: leaq    48(%rsp), %rbp
-; X64: .seh_setframe 5, 48
-; X64: .Lalloc_func$frame_escape_0 = 44
-; X64: .Lalloc_func$frame_escape_1 = 36
+; X64: pushq   %rbp
+; X64: subq    $16, %rsp
+; X64: .seh_stackalloc 16
+; X64: leaq    16(%rsp), %rbp
+; X64: .seh_setframe 5, 16
+; X64: .Lalloc_func$frame_escape_0 = -4
+; X64: .Lalloc_func$frame_escape_1 = -12
 ; X64: movl $42, -4(%rbp)
 ; X64: movl $13, -12(%rbp)
-; X64: leaq    -48(%rbp), %rcx
+; X64: movq 	%rbp, %rcx
 ; X64: callq print_framealloc_from_fp
 ; X64: retq
 
 ; X86-LABEL: alloc_func:
 ; X86: pushl   %ebp
 ; X86: movl    %esp, %ebp
-; X86: subl    $16, %esp
+; X86: subl    $12, %esp
 ; X86: Lalloc_func$frame_escape_0 = -4
 ; X86: Lalloc_func$frame_escape_1 = -12
 ; X86: movl    $42, -4(%ebp)
 ; X86: movl    $13, -12(%ebp)
-; X86: movl    %ebp, (%esp)
+; X86: pushl   %ebp
 ; X86: calll   _print_framealloc_from_fp
-; X86: addl    $16, %esp
+; X86: movl    %ebp, %esp
 ; X86: popl    %ebp
 ; X86: retl
 
 ; Helper to make this a complete program so it can be compiled and tested.
 define i32 @main() {
-  call void @alloc_func()
+  call void @alloc_func(i32 3)
   ret i32 0
 }
 
@@ -126,3 +132,12 @@ define void @alloc_func_no_frameaddr() {
 ; X64: retq
 
 ; X86-LABEL: alloc_func_no_frameaddr:
+; X86: subl    $12, %esp
+; X86: Lalloc_func_no_frameaddr$frame_escape_0 = 8
+; X86: Lalloc_func_no_frameaddr$frame_escape_1 = 4
+; X86: movl $42, 8(%esp)
+; X86: movl $13, 4(%esp)
+; X86: movl $0, (%esp)
+; X86: calll _print_framealloc_from_fp
+; X86: addl $12, %esp
+; X86: retl
diff --git a/test/CodeGen/X86/lower-vec-shift-2.ll b/test/CodeGen/X86/lower-vec-shift-2.ll
index fb8fbba71fca..281461577004 100644
--- a/test/CodeGen/X86/lower-vec-shift-2.ll
+++ b/test/CodeGen/X86/lower-vec-shift-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE2
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
 
diff --git a/test/CodeGen/X86/lsr-static-addr.ll b/test/CodeGen/X86/lsr-static-addr.ll
index 1765ed7871d8..97451e5573fe 100644
--- a/test/CodeGen/X86/lsr-static-addr.ll
+++ b/test/CodeGen/X86/lsr-static-addr.ll
@@ -18,7 +18,7 @@
 ; ATOM-NEXT: movsd A(,%rax,8)
 ; ATOM-NEXT: mulsd
 ; ATOM-NEXT: movsd
-; ATOM-NEXT: leaq 1(%rax), %rax
+; ATOM-NEXT: incq %rax
 
 @A = external global [0 x double]
 
diff --git a/test/CodeGen/X86/machine-combiner-int-vec.ll b/test/CodeGen/X86/machine-combiner-int-vec.ll
new file mode 100644
index 000000000000..dc1ce77e13b7
--- /dev/null
+++ b/test/CodeGen/X86/machine-combiner-int-vec.ll
@@ -0,0 +1,112 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse2 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx2 < %s | FileCheck %s --check-prefix=AVX
+
+; Verify that 128-bit vector logical ops are reassociated.
+
+define <4 x i32> @reassociate_and_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_and_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_and_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = and <4 x i32> %x2, %t0
+  %t2 = and <4 x i32> %x3, %t1
+  ret <4 x i32> %t2
+}
+
+define <4 x i32> @reassociate_or_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_or_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    por %xmm3, %xmm2
+; SSE-NEXT:    por %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_or_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = or <4 x i32> %x2, %t0
+  %t2 = or <4 x i32> %x3, %t1
+  ret <4 x i32> %t2
+}
+
+define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
+; SSE-LABEL: reassociate_xor_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    paddd %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_xor_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %t0 = add <4 x i32> %x0, %x1
+  %t1 = xor <4 x i32> %x2, %t0
+  %t2 = xor <4 x i32> %x3, %t1
+  ret <4 x i32> %t2
+}
+
+; Verify that 256-bit vector logical ops are reassociated.
+
+define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_and_v8i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpand %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = and <8 x i32> %x2, %t0
+  %t2 = and <8 x i32> %x3, %t1
+  ret <8 x i32> %t2
+}
+
+define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_or_v8i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpor %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = or <8 x i32> %x2, %t0
+  %t2 = or <8 x i32> %x3, %t1
+  ret <8 x i32> %t2
+}
+
+define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
+; AVX-LABEL: reassociate_xor_v8i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vpxor %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+
+  %t0 = add <8 x i32> %x0, %x1
+  %t1 = xor <8 x i32> %x2, %t0
+  %t2 = xor <8 x i32> %x3, %t1
+  ret <8 x i32> %t2
+}
+
diff --git a/test/CodeGen/X86/machine-combiner-int.ll b/test/CodeGen/X86/machine-combiner-int.ll
new file mode 100644
index 000000000000..4a1ba1a980ae
--- /dev/null
+++ b/test/CodeGen/X86/machine-combiner-int.ll
@@ -0,0 +1,194 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -stop-after machine-combiner -o /dev/null 2>&1 | FileCheck %s --check-prefix=DEAD
+
+; Verify that integer multiplies are reassociated. The first multiply in 
+; each test should be independent of the result of the preceding add (lea).
+
+; TODO: This test does not actually test i16 machine instruction reassociation 
+; because the operands are being promoted to i32 types.
+
+define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) {
+; CHECK-LABEL: reassociate_muls_i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    leal   (%rdi,%rsi), %eax
+; CHECK-NEXT:    imull  %ecx, %edx
+; CHECK-NEXT:    imull  %edx, %eax
+; CHECK-NEXT:    retq
+  %t0 = add i16 %x0, %x1
+  %t1 = mul i16 %x2, %t0
+  %t2 = mul i16 %x3, %t1
+  ret i16 %t2
+}
+
+define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_muls_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    leal   (%rdi,%rsi), %eax
+; CHECK-NEXT:    imull  %ecx, %edx
+; CHECK-NEXT:    imull  %edx, %eax
+; CHECK-NEXT:    retq
+
+; DEAD:       ADD32rr
+; DEAD-NEXT:  IMUL32rr{{.*}}implicit-def dead %eflags
+; DEAD-NEXT:  IMUL32rr{{.*}}implicit-def dead %eflags
+
+  %t0 = add i32 %x0, %x1
+  %t1 = mul i32 %x2, %t0
+  %t2 = mul i32 %x3, %t1
+  ret i32 %t2
+}
+
+define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_muls_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    leaq   (%rdi,%rsi), %rax
+; CHECK-NEXT:    imulq  %rcx, %rdx
+; CHECK-NEXT:    imulq  %rdx, %rax
+; CHECK-NEXT:    retq
+  %t0 = add i64 %x0, %x1
+  %t1 = mul i64 %x2, %t0
+  %t2 = mul i64 %x3, %t1
+  ret i64 %t2
+}
+
+; Verify that integer 'ands' are reassociated. The first 'and' in 
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_ands_i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    andb  %cl, %dl
+; CHECK-NEXT:    andb  %dil, %dl
+; CHECK_NEXT:    movb  %dx, %ax
+; CHECK_NEXT:    retq
+  %t0 = sub i8 %x0, %x1
+  %t1 = and i8 %x2, %t0
+  %t2 = and i8 %x3, %t1
+  ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_ands_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    andl  %ecx, %edx
+; CHECK-NEXT:    andl  %edi, %edx
+; CHECK_NEXT:    movl  %edx, %eax
+; CHECK_NEXT:    retq
+  %t0 = sub i32 %x0, %x1
+  %t1 = and i32 %x2, %t0
+  %t2 = and i32 %x3, %t1
+  ret i32 %t2
+}
+
+define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_ands_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    andq  %rcx, %rdx
+; CHECK-NEXT:    andq  %rdi, %rdx
+; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK_NEXT:    retq
+  %t0 = sub i64 %x0, %x1
+  %t1 = and i64 %x2, %t0
+  %t2 = and i64 %x3, %t1
+  ret i64 %t2
+}
+
+; Verify that integer 'ors' are reassociated. The first 'or' in 
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_ors_i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    orb   %cl, %dl
+; CHECK-NEXT:    orb   %dil, %dl
+; CHECK_NEXT:    movb  %dx, %ax
+; CHECK_NEXT:    retq
+  %t0 = sub i8 %x0, %x1
+  %t1 = or i8 %x2, %t0
+  %t2 = or i8 %x3, %t1
+  ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_ors_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    orl   %ecx, %edx
+; CHECK-NEXT:    orl   %edi, %edx
+; CHECK_NEXT:    movl  %edx, %eax
+; CHECK_NEXT:    retq
+  %t0 = sub i32 %x0, %x1
+  %t1 = or i32 %x2, %t0
+  %t2 = or i32 %x3, %t1
+  ret i32 %t2
+}
+
+define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_ors_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    orq   %rcx, %rdx
+; CHECK-NEXT:    orq   %rdi, %rdx
+; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK_NEXT:    retq
+  %t0 = sub i64 %x0, %x1
+  %t1 = or i64 %x2, %t0
+  %t2 = or i64 %x3, %t1
+  ret i64 %t2
+}
+
+; Verify that integer 'xors' are reassociated. The first 'xor' in
+; each test should be independent of the result of the preceding sub.
+
+define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
+; CHECK-LABEL: reassociate_xors_i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subb  %sil, %dil
+; CHECK-NEXT:    xorb  %cl, %dl
+; CHECK-NEXT:    xorb  %dil, %dl
+; CHECK_NEXT:    movb  %dx, %ax
+; CHECK_NEXT:    retq
+  %t0 = sub i8 %x0, %x1
+  %t1 = xor i8 %x2, %t0
+  %t2 = xor i8 %x3, %t1
+  ret i8 %t2
+}
+
+; TODO: No way to test i16? These appear to always get promoted to i32.
+
+define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: reassociate_xors_i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subl  %esi, %edi
+; CHECK-NEXT:    xorl  %ecx, %edx
+; CHECK-NEXT:    xorl  %edi, %edx
+; CHECK_NEXT:    movl  %edx, %eax
+; CHECK_NEXT:    retq
+  %t0 = sub i32 %x0, %x1
+  %t1 = xor i32 %x2, %t0
+  %t2 = xor i32 %x3, %t1
+  ret i32 %t2
+}
+
+define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
+; CHECK-LABEL: reassociate_xors_i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    subq  %rsi, %rdi
+; CHECK-NEXT:    xorq  %rcx, %rdx
+; CHECK-NEXT:    xorq  %rdi, %rdx
+; CHECK-NEXT:    movq  %rdx, %rax
+; CHECK_NEXT:    retq
+  %t0 = sub i64 %x0, %x1
+  %t1 = xor i64 %x2, %t0
+  %t2 = xor i64 %x3, %t1
+  ret i64 %t2
+}
+
diff --git a/test/CodeGen/X86/machine-combiner.ll b/test/CodeGen/X86/machine-combiner.ll
index 0943bebbb099..3fbb233696c8 100644
--- a/test/CodeGen/X86/machine-combiner.ll
+++ b/test/CodeGen/X86/machine-combiner.ll
@@ -144,7 +144,7 @@ define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
   ret float %t2
 }
 
-; Verify that SSE and AVX scalar single-precison multiplies are reassociated.
+; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
 
 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 ; SSE-LABEL: reassociate_muls1:
@@ -166,7 +166,7 @@ define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
   ret float %t2
 }
 
-; Verify that SSE and AVX scalar double-precison adds are reassociated.
+; Verify that SSE and AVX scalar double-precision adds are reassociated.
 
 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 ; SSE-LABEL: reassociate_adds_double:
@@ -188,7 +188,7 @@ define double @reassociate_adds_double(double %x0, double %x1, double %x2, doubl
   ret double %t2
 }
 
-; Verify that SSE and AVX scalar double-precison multiplies are reassociated.
+; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
 
 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 ; SSE-LABEL: reassociate_muls_double:
@@ -210,3 +210,464 @@ define double @reassociate_muls_double(double %x0, double %x1, double %x2, doubl
   ret double %t2
 }
 
+; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
+
+define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_adds_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulps %xmm1, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm2
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fmul <4 x float> %x0, %x1
+  %t1 = fadd <4 x float> %x2, %t0
+  %t2 = fadd <4 x float> %x3, %t1
+  ret <4 x float> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
+
+define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_adds_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    mulpd %xmm1, %xmm0
+; SSE-NEXT:    addpd %xmm3, %xmm2
+; SSE-NEXT:    addpd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_adds_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fmul <2 x double> %x0, %x1
+  %t1 = fadd <2 x double> %x2, %t0
+  %t2 = fadd <2 x double> %x3, %t1
+  ret <2 x double> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
+
+define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_muls_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    mulps %xmm3, %xmm2
+; SSE-NEXT:    mulps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_muls_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x float> %x0, %x1
+  %t1 = fmul <4 x float> %x2, %t0
+  %t2 = fmul <4 x float> %x3, %t1
+  ret <4 x float> %t2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
+
+define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_muls_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm1, %xmm0
+; SSE-NEXT:    mulpd %xmm3, %xmm2
+; SSE-NEXT:    mulpd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_muls_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <2 x double> %x0, %x1
+  %t1 = fmul <2 x double> %x2, %t0
+  %t2 = fmul <2 x double> %x3, %t1
+  ret <2 x double> %t2
+}
+
+; Verify that AVX 256-bit vector single-precision adds are reassociated.
+
+define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_adds_v8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fmul <8 x float> %x0, %x1
+  %t1 = fadd <8 x float> %x2, %t0
+  %t2 = fadd <8 x float> %x3, %t1
+  ret <8 x float> %t2
+}
+
+; Verify that AVX 256-bit vector double-precision adds are reassociated.
+
+define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_adds_v4f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fmul <4 x double> %x0, %x1
+  %t1 = fadd <4 x double> %x2, %t0
+  %t2 = fadd <4 x double> %x3, %t1
+  ret <4 x double> %t2
+}
+
+; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
+
+define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_muls_v8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <8 x float> %x0, %x1
+  %t1 = fmul <8 x float> %x2, %t0
+  %t2 = fmul <8 x float> %x3, %t1
+  ret <8 x float> %t2
+}
+
+; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
+
+define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_muls_v4f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x double> %x0, %x1
+  %t1 = fmul <4 x double> %x2, %t0
+  %t2 = fmul <4 x double> %x3, %t1
+  ret <4 x double> %t2
+}
+
+; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
+
+define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
+; SSE-LABEL: reassociate_mins_single:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    minss %xmm3, %xmm2
+; SSE-NEXT:    minss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_mins_single:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fdiv float %x0, %x1
+  %cmp1 = fcmp olt float %x2, %t0
+  %sel1 = select i1 %cmp1, float %x2, float %t0
+  %cmp2 = fcmp olt float %x3, %sel1
+  %sel2 = select i1 %cmp2, float %x3, float %sel1
+  ret float %sel2
+}
+
+; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
+
+define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
+; SSE-LABEL: reassociate_maxs_single:
+; SSE:       # BB#0:
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    maxss %xmm3, %xmm2
+; SSE-NEXT:    maxss %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_maxs_single:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fdiv float %x0, %x1
+  %cmp1 = fcmp ogt float %x2, %t0
+  %sel1 = select i1 %cmp1, float %x2, float %t0
+  %cmp2 = fcmp ogt float %x3, %sel1
+  %sel2 = select i1 %cmp2, float %x3, float %sel1
+  ret float %sel2
+}
+
+; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
+
+define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
+; SSE-LABEL: reassociate_mins_double:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    minsd %xmm3, %xmm2
+; SSE-NEXT:    minsd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_mins_double:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fdiv double %x0, %x1
+  %cmp1 = fcmp olt double %x2, %t0
+  %sel1 = select i1 %cmp1, double %x2, double %t0
+  %cmp2 = fcmp olt double %x3, %sel1
+  %sel2 = select i1 %cmp2, double %x3, double %sel1
+  ret double %sel2
+}
+
+; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
+
+define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
+; SSE-LABEL: reassociate_maxs_double:
+; SSE:       # BB#0:
+; SSE-NEXT:    divsd %xmm1, %xmm0
+; SSE-NEXT:    maxsd %xmm3, %xmm2
+; SSE-NEXT:    maxsd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_maxs_double:
+; AVX:       # BB#0:
+; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fdiv double %x0, %x1
+  %cmp1 = fcmp ogt double %x2, %t0
+  %sel1 = select i1 %cmp1, double %x2, double %t0
+  %cmp2 = fcmp ogt double %x3, %sel1
+  %sel2 = select i1 %cmp2, double %x3, double %sel1
+  ret double %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
+
+define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_mins_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    minps %xmm3, %xmm2
+; SSE-NEXT:    minps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_mins_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x float> %x0, %x1
+  %cmp1 = fcmp olt <4 x float> %x2, %t0
+  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+  %cmp2 = fcmp olt <4 x float> %x3, %sel1
+  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+  ret <4 x float> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
+
+define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; SSE-LABEL: reassociate_maxs_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    maxps %xmm3, %xmm2
+; SSE-NEXT:    maxps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_maxs_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x float> %x0, %x1
+  %cmp1 = fcmp ogt <4 x float> %x2, %t0
+  %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+  %cmp2 = fcmp ogt <4 x float> %x3, %sel1
+  %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+  ret <4 x float> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
+
+define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_mins_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm1, %xmm0
+; SSE-NEXT:    minpd %xmm3, %xmm2
+; SSE-NEXT:    minpd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_mins_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <2 x double> %x0, %x1
+  %cmp1 = fcmp olt <2 x double> %x2, %t0
+  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+  %cmp2 = fcmp olt <2 x double> %x3, %sel1
+  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+  ret <2 x double> %sel2
+}
+
+; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
+
+define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; SSE-LABEL: reassociate_maxs_v2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    addpd %xmm1, %xmm0
+; SSE-NEXT:    maxpd %xmm3, %xmm2
+; SSE-NEXT:    maxpd %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: reassociate_maxs_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %t0 = fadd <2 x double> %x0, %x1
+  %cmp1 = fcmp ogt <2 x double> %x2, %t0
+  %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+  %cmp2 = fcmp ogt <2 x double> %x3, %sel1
+  %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+  ret <2 x double> %sel2
+}
+
+; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
+
+define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_mins_v8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <8 x float> %x0, %x1
+  %cmp1 = fcmp olt <8 x float> %x2, %t0
+  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+  %cmp2 = fcmp olt <8 x float> %x3, %sel1
+  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+  ret <8 x float> %sel2
+}
+
+; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
+
+define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
+; AVX-LABEL: reassociate_maxs_v8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <8 x float> %x0, %x1
+  %cmp1 = fcmp ogt <8 x float> %x2, %t0
+  %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+  %cmp2 = fcmp ogt <8 x float> %x3, %sel1
+  %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+  ret <8 x float> %sel2
+}
+
+; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
+
+define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_mins_v4f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x double> %x0, %x1
+  %cmp1 = fcmp olt <4 x double> %x2, %t0
+  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+  %cmp2 = fcmp olt <4 x double> %x3, %sel1
+  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+  ret <4 x double> %sel2
+}
+
+; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
+
+define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
+; AVX-LABEL: reassociate_maxs_v4f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
+; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %t0 = fadd <4 x double> %x0, %x1
+  %cmp1 = fcmp ogt <4 x double> %x2, %t0
+  %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+  %cmp2 = fcmp ogt <4 x double> %x3, %sel1
+  %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+  ret <4 x double> %sel2
+}
+
+; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
+; Verify that reassociation is not happening needlessly or wrongly.
+
+declare double @bar()
+
+define double @reassociate_adds_from_calls() {
+; AVX-LABEL: reassociate_adds_from_calls:
+; AVX:       callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
+; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
+
+  %x0 = call double @bar()
+  %x1 = call double @bar()
+  %x2 = call double @bar()
+  %x3 = call double @bar()
+  %t0 = fadd double %x0, %x1
+  %t1 = fadd double %t0, %x2
+  %t2 = fadd double %t1, %x3
+  ret double %t2
+}
+
+define double @already_reassociated() {
+; AVX-LABEL: already_reassociated:
+; AVX:       callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
+; AVX-NEXT:  callq   bar
+; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
+; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
+; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
+; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
+
+  %x0 = call double @bar()
+  %x1 = call double @bar()
+  %x2 = call double @bar()
+  %x3 = call double @bar()
+  %t0 = fadd double %x0, %x1
+  %t1 = fadd double %x2, %x3
+  %t2 = fadd double %t0, %t1
+  ret double %t2
+}
+
diff --git a/test/CodeGen/X86/machine-cp.ll b/test/CodeGen/X86/machine-cp.ll
index aaed0f0a23dc..143a1c3787a0 100644
--- a/test/CodeGen/X86/machine-cp.ll
+++ b/test/CodeGen/X86/machine-cp.ll
@@ -66,29 +66,23 @@ while.end:                                        ; preds = %while.body, %entry
 ;
 ; CHECK-LABEL: foo:
 ; CHECK: psllw $7,
-; CHECK: psllw $7,
-; CHECK-NEXT: pand
-; CHECK-NEXT: pcmpgtb 
-; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC:%xmm[0-9]+]]
-; Machine propagation used to delete the first copy as the
-; first few uses were <undef>.
-; CHECK-NEXT: movdqa [[SRC]], [[CPY1:%xmm[0-9]+]]
-; CHECK-NEXT: movdqa [[SRC]], [[CPY2:%xmm[0-9]+]]
-; CHECK-NEXT: punpckhbw [[SRC]],
+; CHECK: psllw $7, [[SRC1:%xmm[0-9]+]]
+; CHECK-NEXT: pand {{.*}}(%rip), [[SRC1]]
+; CHECK-NEXT: pcmpgtb [[SRC1]], [[SRC2:%xmm[0-9]+]]
+; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC2]]
+; CHECK-NEXT: movdqa [[SRC2]], [[CPY1:%xmm[0-9]+]]
+; CHECK-NEXT: punpcklbw %xmm{{[0-9]+}}, [[CPY1]]
 ; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; undef use, we do not care.
-; CHECK: punpcklwd [[CPY1]],
-; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; CHECK: punpcklbw [[CPY2]], [[CPY2]]
-; CHECK-NEXT: punpckhwd [[CPY2]], [[CPY2]]
-; CHECK-NEXT pslld $31, [[CPY2]]
-; Check that CPY1 is not redefined.
-; CHECK-NOT: , [[CPY1]]
-; CHECK: punpcklbw [[CPY1]], [[CPY1]]
-; CHECK-NEXT: punpcklwd [[CPY1]], [[CPY1]]
-; CHECK-NEXT pslld $31, [[CPY1]]
+; CHECK-NOT:  , [[CPY1]]
+; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY1]]
+; CHECK-NEXT: pslld $31, [[CPY1]]
+; CHECK-NEXT: psrad $31, [[CPY1]]
+; CHECK: punpckhbw %xmm{{[0-9]+}}, [[CPY2:%xmm[0-9]+]]
+; Check that CPY2 is not redefined.
+; CHECK-NOT:  , [[CPY2]]
+; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY2]]
+; CHECK-NEXT: pslld $31, [[CPY2]]
+; CHECK-NEXT: psrad $31, [[CPY2]]
 define <16 x float> @foo(<16 x float> %x) {
 bb:
   %v3 = icmp slt <16 x i32> undef, zeroinitializer
diff --git a/test/CodeGen/X86/machine-trace-metrics-crash.ll b/test/CodeGen/X86/machine-trace-metrics-crash.ll
index 1d0ee79f04a9..048260c51fe3 100644
--- a/test/CodeGen/X86/machine-trace-metrics-crash.ll
+++ b/test/CodeGen/X86/machine-trace-metrics-crash.ll
@@ -54,9 +54,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: 1)
 !1 = !DIFile(filename: "24199.cpp", directory: "/bin")
 !2 = !{i32 2, !"Debug Info Version", i32 3}
-!3 = !DISubprogram(linkageName: "foo", file: !1, line: 18, isLocal: false, isDefinition: true, scopeLine: 18, function: void (%struct.A*)* @foo)
+!3 = distinct !DISubprogram(linkageName: "foo", file: !1, line: 18, isLocal: false, isDefinition: true, scopeLine: 18)
 !4 = !DIExpression()
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !3, flags: DIFlagArtificial | DIFlagObjectPointer)
+!5 = !DILocalVariable(name: "this", arg: 1, scope: !3, flags: DIFlagArtificial | DIFlagObjectPointer)
 !6 = !DILocation(line: 0, scope: !3)
 
 
diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll
index de16e5ddc06b..b7280d87d3b7 100644
--- a/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/test/CodeGen/X86/masked_gather_scatter.ll
@@ -1,19 +1,51 @@
-; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=knl < %s | FileCheck %s -check-prefix=KNL
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_64
+; RUN: llc -mtriple=i386-unknown-linux-gnu  -mattr=+avx512f < %s | FileCheck %s --check-prefix=KNL_32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX
+; RUN: llc -mtriple=i386-unknown-linux-gnu  -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=SKX_32
+; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
+
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; KNL-LABEL: test1
-; KNL: kxnorw  %k1, %k1, %k1
-; KNL: vgatherdps      (%rdi,%zmm0,4), %zmm1 {%k1}
+
+; SCALAR-LABEL: test1
+; SCALAR:      extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+
 define <16 x float> @test1(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test1:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test1:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test1:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
   %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
-  
+
   %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
@@ -21,11 +53,41 @@ define <16 x float> @test1(float* %base, <16 x i32> %ind) {
 declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
 declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
 declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
-  
-; KNL-LABEL: test2
-; KNL: kmovw %esi, %k1
-; KNL: vgatherdps      (%rdi,%zmm0,4), %zmm1 {%k1}
+
+
+; SCALAR-LABEL: test2
+; SCALAR:      extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: br label %else
+; SCALAR: else:
+; SCALAR-NEXT:  %res.phi.else = phi
+; SCALAR-NEXT:  %Mask1 = extractelement <16 x i1> %imask, i32 1
+; SCALAR-NEXT:  %ToLoad1 = icmp eq i1 %Mask1, true
+; SCALAR-NEXT:  br i1 %ToLoad1, label %cond.load1, label %else2
+
 define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test2:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kmovw %esi, %k1
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test2:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test2:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovw %esi, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
   %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
@@ -37,10 +99,28 @@ define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
   ret <16 x float> %res
 }
 
-; KNL-LABEL: test3
-; KNL: kmovw %esi, %k1
-; KNL: vpgatherdd      (%rdi,%zmm0,4), %zmm1 {%k1}
 define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test3:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kmovw %esi, %k1
+; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test3:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test3:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovw %esi, %k1
+; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
   %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -52,13 +132,38 @@ define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
   ret <16 x i32> %res
 }
 
-; KNL-LABEL: test4
-; KNL: kmovw %esi, %k1
-; KNL: kmovw
-; KNL: vpgatherdd
-; KNL: vpgatherdd
 
 define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test4:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kmovw %esi, %k1
+; KNL_64-NEXT:    kmovw %k1, %k2
+; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm2
+; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
+; KNL_64-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test4:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    kmovw %k1, %k2
+; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm2
+; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
+; KNL_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test4:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovw %esi, %k1
+; SKX-NEXT:    kmovw %k1, %k2
+; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
+; SKX-NEXT:    vmovaps %zmm1, %zmm2
+; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
+; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
   %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -71,12 +176,46 @@ define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
   ret <16 x i32> %res
 }
 
-; KNL-LABEL: test5
-; KNL: kmovw %k1, %k2
-; KNL: vpscatterdd {{.*}}%k2
-; KNL: vpscatterdd {{.*}}%k1
+
+; SCALAR-LABEL: test5
+; SCALAR:        %Mask0 = extractelement <16 x i1> %imask, i32 0
+; SCALAR-NEXT:   %ToStore0 = icmp eq i1 %Mask0, true
+; SCALAR-NEXT:   br i1 %ToStore0, label %cond.store, label %else
+; SCALAR: cond.store:
+; SCALAR-NEXT:  %Elt0 = extractelement <16 x i32> %val, i32 0
+; SCALAR-NEXT:  %Ptr0 = extractelement <16 x i32*> %gep.random, i32 0
+; SCALAR-NEXT:  store i32 %Elt0, i32* %Ptr0, align 4
+; SCALAR-NEXT:  br label %else
+; SCALAR: else:
+; SCALAR-NEXT: %Mask1 = extractelement <16 x i1> %imask, i32 1
+; SCALAR-NEXT:  %ToStore1 = icmp eq i1 %Mask1, true
+; SCALAR-NEXT:  br i1 %ToStore1, label %cond.store1, label %else2
 
 define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
+; KNL_64-LABEL: test5:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kmovw %esi, %k1
+; KNL_64-NEXT:    kmovw %k1, %k2
+; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
+; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test5:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    kmovw %k1, %k2
+; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
+; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test5:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovw %esi, %k1
+; SKX-NEXT:    kmovw %k1, %k2
+; SKX-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
+; SKX-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
   %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
@@ -91,12 +230,44 @@ define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
 declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
 declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
 
-; KNL-LABEL: test6
-; KNL: kxnorw  %k1, %k1, %k1
-; KNL: kxnorw  %k2, %k2, %k2
-; KNL: vpgatherqd      (,%zmm{{.*}}), %ymm{{.*}} {%k2}
-; KNL: vpscatterqd     %ymm{{.*}}, (,%zmm{{.*}}) {%k1}
+
+; SCALAR-LABEL: test6
+; SCALAR:        store i32 %Elt0, i32* %Ptr01, align 4
+; SCALAR-NEXT:   %Elt1 = extractelement <8 x i32> %a1, i32 1
+; SCALAR-NEXT:   %Ptr12 = extractelement <8 x i32*> %ptr, i32 1
+; SCALAR-NEXT:   store i32 %Elt1, i32* %Ptr12, align 4
+; SCALAR-NEXT:   %Elt2 = extractelement <8 x i32> %a1, i32 2
+; SCALAR-NEXT:   %Ptr23 = extractelement <8 x i32*> %ptr, i32 2
+; SCALAR-NEXT:   store i32 %Elt2, i32* %Ptr23, align 4
+
 define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
+; KNL_64-LABEL: test6:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k2
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test6:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vpmovsxdq %ymm1, %zmm2
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k2
+; KNL_32-NEXT:    vpgatherqd (,%zmm2), %ymm1 {%k2}
+; KNL_32-NEXT:    vpscatterqd %ymm0, (,%zmm2) {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test6:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    kxnorw %k0, %k0, %k2
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
 
   %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 
@@ -104,13 +275,41 @@ define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
   ret <8 x i32>%a
 }
 
-; In this case the index should be promoted to <8 x i64> for KNL
-; KNL-LABEL: test7
-; KNL: vpmovsxdq %ymm0, %zmm0
-; KNL: kmovw   %k1, %k2
-; KNL: vpgatherqd {{.*}} {%k2}
-; KNL: vpgatherqd {{.*}} {%k1}
 define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
+;
+; KNL_64-LABEL: test7:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    movzbl %sil, %eax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT:    kmovw %k1, %k2
+; KNL_64-NEXT:    vpgatherqd (%rdi,%zmm0,4), %ymm1 {%k2}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm2
+; KNL_64-NEXT:    vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
+; KNL_64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test7:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT:    kmovw %k1, %k2
+; KNL_32-NEXT:    vpgatherqd (%eax,%zmm0,4), %ymm1 {%k2}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm2
+; KNL_32-NEXT:    vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
+; KNL_32-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test7:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovb %esi, %k1
+; SKX-NEXT:    kmovw %k1, %k2
+; SKX-NEXT:    vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2}
+; SKX-NEXT:    vmovaps %zmm1, %zmm2
+; SKX-NEXT:    vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
+; SKX-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
+; SKX-NEXT:    retq
 
   %broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
   %broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
@@ -125,18 +324,1751 @@ define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
 
 ; No uniform base in this case, index <8 x i64> contains addresses,
 ; each gather call will be split into two
-; KNL-LABEL: test8
-; KNL: kshiftrw        $8, %k1, %k2
-; KNL: vpgatherqd
-; KNL: vpgatherqd
-; KNL: vinserti64x4
-; KNL: vpgatherqd
-; KNL: vpgatherqd
-; KNL: vinserti64x4
 define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
+; KNL_64-LABEL: test8:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kmovw %edi, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    kmovw %k2, %k3
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k3}
+; KNL_64-NEXT:    kmovw %k1, %k3
+; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k3}
+; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm4
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test8:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; KNL_32-NEXT:    kmovw %k1, %k2
+; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k2}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm2
+; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test8:
+; SKX:       # BB#0:
+; SKX-NEXT:    kmovw %edi, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    kmovw %k2, %k3
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k3}
+; SKX-NEXT:    kmovw %k1, %k3
+; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k3}
+; SKX-NEXT:    vinserti32x8 $1, %ymm2, %zmm3, %zmm4
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT:    vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test8:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
+; SKX_32-NEXT:    kmovw %k1, %k2
+; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k2}
+; SKX_32-NEXT:    vmovaps %zmm1, %zmm2
+; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
+; SKX_32-NEXT:    retl
+
   %imask = bitcast i16 %mask to <16 x i1>
   %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
   %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
+
+%struct.RT = type { i8, [10 x [20 x i32]], i8 }
+%struct.ST = type { i32, double, %struct.RT }
+
+; Masked gather for agregate types
+; Test9 and Test10 should give the same result (scalar and vector indices in GEP)
+
+
+define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
+; KNL_64-LABEL: test9:
+; KNL_64:       # BB#0: # %entry
+; KNL_64-NEXT:    vpbroadcastq %rdi, %zmm2
+; KNL_64-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm1, %zmm4
+; KNL_64-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm1, %zmm1
+; KNL_64-NEXT:    vpsllq $32, %zmm1, %zmm1
+; KNL_64-NEXT:    vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm4
+; KNL_64-NEXT:    vpsrlq $32, %zmm0, %zmm0
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm0
+; KNL_64-NEXT:    vpsllq $32, %zmm0, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test9:
+; KNL_32:       # BB#0: # %entry
+; KNL_32-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm2
+; KNL_32-NEXT:    vpbroadcastd .LCPI8_0, %ymm3
+; KNL_32-NEXT:    vpmulld %ymm3, %ymm1, %ymm1
+; KNL_32-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL_32-NEXT:    vpbroadcastd .LCPI8_1, %ymm3
+; KNL_32-NEXT:    vpmulld %ymm3, %ymm0, %ymm0
+; KNL_32-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT:    vpbroadcastd .LCPI8_2, %ymm1
+; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test9:
+; SKX:       # BB#0: # %entry
+; SKX-NEXT:    vpbroadcastq %rdi, %zmm2
+; SKX-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; SKX-NEXT:    vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX-NEXT:    retq
+entry:
+  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  ret <8 x i32> %res
+}
+
+define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
+; KNL_64-LABEL: test10:
+; KNL_64:       # BB#0: # %entry
+; KNL_64-NEXT:    vpbroadcastq %rdi, %zmm2
+; KNL_64-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm1, %zmm4
+; KNL_64-NEXT:    vpsrlq $32, %zmm1, %zmm1
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm1, %zmm1
+; KNL_64-NEXT:    vpsllq $32, %zmm1, %zmm1
+; KNL_64-NEXT:    vpaddq %zmm1, %zmm4, %zmm1
+; KNL_64-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm3
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm4
+; KNL_64-NEXT:    vpsrlq $32, %zmm0, %zmm0
+; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm0
+; KNL_64-NEXT:    vpsllq $32, %zmm0, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; KNL_64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test10:
+; KNL_32:       # BB#0: # %entry
+; KNL_32-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm2
+; KNL_32-NEXT:    vpbroadcastd .LCPI9_0, %ymm3
+; KNL_32-NEXT:    vpmulld %ymm3, %ymm1, %ymm1
+; KNL_32-NEXT:    vpmovqd %zmm0, %ymm0
+; KNL_32-NEXT:    vpbroadcastd .LCPI9_1, %ymm3
+; KNL_32-NEXT:    vpmulld %ymm3, %ymm0, %ymm0
+; KNL_32-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT:    vpbroadcastd .LCPI9_2, %ymm1
+; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test10:
+; SKX:       # BB#0: # %entry
+; SKX-NEXT:    vpbroadcastq %rdi, %zmm2
+; SKX-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; SKX-NEXT:    vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX-NEXT:    retq
+entry:
+  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
+  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
+
+  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  ret <8 x i32> %res
+}
+
+; Splat index in GEP, requires broadcast
+define <16 x float> @test11(float* %base, i32 %ind) {
+; KNL_64-LABEL: test11:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpbroadcastd %esi, %zmm1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test11:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %zmm1
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test11:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpbroadcastd %esi, %zmm1
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; SKX-NEXT:    retq
+
+  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+; We are checking the uniform base here. It is taken directly from input to vgatherdps
+define <16 x float> @test12(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test12:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test12:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test12:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+; The same as the previous, but the mask is undefined
+define <16 x float> @test13(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test13:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test13:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test13:
+; SKX:       # BB#0:
+; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> undef, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+; The base pointer is not splat, can't find unform base
+define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
+; KNL_64-LABEL: test14:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
+; KNL_64-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL_64-NEXT:    vpbroadcastq %xmm0, %zmm0
+; KNL_64-NEXT:    vmovd %esi, %xmm1
+; KNL_64-NEXT:    vpbroadcastd %xmm1, %ymm1
+; KNL_64-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT:    vpsllq $2, %zmm1, %zmm1
+; KNL_64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; KNL_64-NEXT:    kshiftrw $8, %k0, %k1
+; KNL_64-NEXT:    vgatherqps (,%zmm0), %ymm1 {%k1}
+; KNL_64-NEXT:    vgatherqps (,%zmm0), %ymm2 {%k1}
+; KNL_64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test14:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm1
+; KNL_32-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL_32-NEXT:    vpbroadcastd %xmm0, %zmm0
+; KNL_32-NEXT:    vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
+; KNL_32-NEXT:    vpaddd %zmm1, %zmm0, %zmm1
+; KNL_32-NEXT:    vgatherdps (,%zmm1), %zmm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test14:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpinsrq $1, %rdi, %xmm0, %xmm1
+; SKX-NEXT:    vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT:    vpbroadcastq %xmm0, %zmm0
+; SKX-NEXT:    vmovd %esi, %xmm1
+; SKX-NEXT:    vpbroadcastd %xmm1, %ymm1
+; SKX-NEXT:    vpmovsxdq %ymm1, %zmm1
+; SKX-NEXT:    vpsllq $2, %zmm1, %zmm1
+; SKX-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    kshiftrw $8, %k0, %k1
+; SKX-NEXT:    vgatherqps (,%zmm0), %ymm1 {%k1}
+; SKX-NEXT:    vgatherqps (,%zmm0), %ymm2 {%k1}
+; SKX-NEXT:    vinsertf32x8 $1, %ymm1, %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test14:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm1
+; SKX_32-NEXT:    vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX_32-NEXT:    vpbroadcastd %xmm0, %zmm0
+; SKX_32-NEXT:    vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
+; SKX_32-NEXT:    vpaddd %zmm1, %zmm0, %zmm1
+; SKX_32-NEXT:    vgatherdps (,%zmm1), %zmm0 {%k1}
+; SKX_32-NEXT:    retl
+
+  %broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1
+  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> undef, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
+declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+
+; Gather smaller than existing instruction
+define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
+;
+; KNL_64-LABEL: test15:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; KNL_64-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm2
+; KNL_64-NEXT:    vpmovsxdq %ymm1, %zmm0
+; KNL_64-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL_64-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL_64-NEXT:    vgatherqps (%rdi,%zmm2,4), %ymm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test15:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; KNL_32-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7]
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm2
+; KNL_32-NEXT:    vpmovsxdq %ymm1, %zmm0
+; KNL_32-NEXT:    vpsllvq .LCPI14_0, %zmm0, %zmm0
+; KNL_32-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL_32-NEXT:    vgatherqps (%eax,%zmm2,4), %ymm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test15:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test15:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
+; SKX_32-NEXT:    retl
+
+  %sext_ind = sext <4 x i32> %ind to <4 x i64>
+  %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
+  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
+  ret <4 x float>%res
+}
+
+; Gather smaller than existing instruction
+define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) {
+;
+; KNL_64-LABEL: test16:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL_64-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test16:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL_32-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT:    vpsllvq .LCPI15_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test16:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
+; SKX-NEXT:    vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test16:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+
+  %sext_ind = sext <4 x i32> %ind to <4 x i64>
+  %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
+  %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
+  ret <4 x double>%res
+}
+
+define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) {
+;
+; KNL_64-LABEL: test17:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vgatherqpd (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test17:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpsllvq .LCPI16_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vgatherqpd (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test17:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
+; SKX-NEXT:    vgatherqpd (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test17:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vgatherqpd (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
+  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
+  ret <2 x double>%res
+}
+
+declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
+
+define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
+;
+; KNL_64-LABEL: test18:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT:    vpmovsxdq %ymm2, %zmm2
+; KNL_64-NEXT:    vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test18:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT:    vpmovsxdq %ymm2, %zmm2
+; KNL_32-NEXT:    vpsllvq .LCPI17_0, %zmm2, %zmm2
+; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test18:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpslld $31, %xmm2, %xmm2
+; SKX-NEXT:    vpmovd2m %xmm2, %k1
+; SKX-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test18:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpslld $31, %xmm2, %xmm2
+; SKX_32-NEXT:    vpmovd2m %xmm2, %k1
+; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  ret void
+}
+
+define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) {
+;
+; KNL_64-LABEL: test19:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL_64-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test19:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT:    vpsrad $31, %xmm1, %xmm1
+; KNL_32-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti64x4 $0, %ymm1, %zmm3, %zmm1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpsllvq .LCPI18_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test19:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX-NEXT:    vpmovd2m %xmm1, %k1
+; SKX-NEXT:    vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test19:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
+; SKX_32-NEXT:    retl
+  %gep = getelementptr double, double* %ptr, <4 x i64> %ind
+  call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
+  ret void
+}
+
+; Data type requires widening
+define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
+;
+; KNL_64-LABEL: test20:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_64-NEXT:    vmovq {{.*#+}} xmm2 = xmm2[0],zero
+; KNL_64-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT:    vpmovsxdq %ymm2, %zmm2
+; KNL_64-NEXT:    vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test20:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; KNL_32-NEXT:    vmovq {{.*#+}} xmm2 = xmm2[0],zero
+; KNL_32-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT:    vpmovsxdq %ymm2, %zmm2
+; KNL_32-NEXT:    vpsllvq .LCPI19_0, %zmm2, %zmm2
+; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test20:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT:    vpmovq2m %xmm2, %k0
+; SKX-NEXT:    kshiftlw $2, %k0, %k0
+; SKX-NEXT:    kshiftrw $2, %k0, %k1
+; SKX-NEXT:    vscatterqps %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test20:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT:    vpmovq2m %xmm2, %k0
+; SKX_32-NEXT:    kshiftlw $2, %k0, %k0
+; SKX_32-NEXT:    kshiftrw $2, %k0, %k1
+; SKX_32-NEXT:    vscatterdps %xmm0, (,%xmm1) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
+  ret void
+}
+
+; Data type requires promotion
+define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
+;
+; KNL_64-LABEL: test21:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT:    vpsllq $63, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test21:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti32x4 $0, %xmm2, %zmm3, %zmm2
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT:    vpsllvq .LCPI20_0, %zmm2, %zmm2
+; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test21:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
+; SKX-NEXT:    vpmovq2m %xmm2, %k0
+; SKX-NEXT:    kshiftlw $2, %k0, %k0
+; SKX-NEXT:    kshiftrw $2, %k0, %k1
+; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test21:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
+; SKX_32-NEXT:    vpmovq2m %xmm2, %k0
+; SKX_32-NEXT:    kshiftlw $2, %k0, %k0
+; SKX_32-NEXT:    kshiftrw $2, %k0, %k1
+; SKX_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
+  ret void
+}
+
+; The result type requires widening
+declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
+
+define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
+;
+;
+; KNL_64-LABEL: test22:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_64-NEXT:    vmovq {{.*#+}} xmm1 = xmm1[0],zero
+; KNL_64-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_64-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_64-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test22:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; KNL_32-NEXT:    vmovq {{.*#+}} xmm1 = xmm1[0],zero
+; KNL_32-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; KNL_32-NEXT:    vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm3[4,5,6,7]
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm0
+; KNL_32-NEXT:    vpmovsxdq %ymm1, %zmm1
+; KNL_32-NEXT:    vpsllvq .LCPI21_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test22:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT:    vpmovq2m %xmm1, %k0
+; SKX-NEXT:    kshiftlw $2, %k0, %k0
+; SKX-NEXT:    kshiftrw $2, %k0, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test22:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k0
+; SKX_32-NEXT:    kshiftlw $2, %k0, %k0
+; SKX_32-NEXT:    kshiftrw $2, %k0, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
+  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
+  ret <2 x float>%res
+}
+
+declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
+
+define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
+;
+; KNL_64-LABEL: test23:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test23:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpsllvq .LCPI22_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test23:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
+; SKX-NEXT:    vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test23:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
+  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
+  ret <2 x i32>%res
+}
+
+define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
+; KNL_64-LABEL: test24:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    movb $3, %al
+; KNL_64-NEXT:    movzbl %al, %eax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test24:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; KNL_32-NEXT:    vinserti32x4 $0, .LCPI23_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vpsllvq .LCPI23_1, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test24:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test24:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_32-NEXT:    vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
+; SKX_32-NEXT:    retl
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
+  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+  ret <2 x i32>%res
+}
+
+define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) {
+;
+; KNL_64-LABEL: test25:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_64-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_64-NEXT:    vpsllq $63, %zmm1, %zmm1
+; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_64-NEXT:    vpgatherqq (%rdi,%zmm0,8), %zmm2 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test25:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpxord %zmm3, %zmm3, %zmm3
+; KNL_32-NEXT:    vinserti32x4 $0, %xmm1, %zmm3, %zmm1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpsllvq .LCPI24_0, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vpgatherqq (%eax,%zmm0,8), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test25:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX-NEXT:    vpmovq2m %xmm1, %k1
+; SKX-NEXT:    vpgatherqq (%rdi,%xmm0,8), %xmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test25:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
+; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vpgatherqq (%eax,%xmm0,8), %xmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
+  ret <2 x i64>%res
+}
+
+define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
+;
+; KNL_64-LABEL: test26:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    movb $3, %al
+; KNL_64-NEXT:    movzbl %al, %eax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vpgatherqq (%rdi,%zmm0,8), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test26:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; KNL_32-NEXT:    vinserti32x4 $0, .LCPI25_0, %zmm2, %zmm2
+; KNL_32-NEXT:    vpsllvq .LCPI25_1, %zmm2, %zmm2
+; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT:    vpgatherqq (%eax,%zmm0,8), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test26:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    vpgatherqq (%rdi,%xmm0,8), %xmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test26:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_32-NEXT:    vpgatherqq (%eax,%xmm0,8), %xmm1 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
+; SKX_32-NEXT:    retl
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
+  ret <2 x i64>%res
+}
+
+; Result type requires widening; all-ones mask
+define <2 x float> @test27(float* %base, <2 x i32> %ind) {
+;
+; KNL_64-LABEL: test27:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm1
+; KNL_64-NEXT:    movb $3, %al
+; KNL_64-NEXT:    movzbl %al, %eax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vgatherqps (%rdi,%zmm1,4), %ymm0 {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test27:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm1
+; KNL_32-NEXT:    movb $3, %cl
+; KNL_32-NEXT:    movzbl %cl, %ecx
+; KNL_32-NEXT:    kmovw %ecx, %k1
+; KNL_32-NEXT:    vgatherqps (%eax,%zmm1,4), %ymm0 {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test27:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,2,2,3]
+; SKX-NEXT:    movb $3, %al
+; SKX-NEXT:    kmovb %eax, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%xmm1,4), %xmm0 {%k1}
+; SKX-NEXT:    retq
+  %sext_ind = sext <2 x i32> %ind to <2 x i64>
+  %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
+  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
+  ret <2 x float>%res
+}
+
+; Data type requires promotion, mask is all-ones
+define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
+;
+;
+; KNL_64-LABEL: test28:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_64-NEXT:    movb $3, %al
+; KNL_64-NEXT:    movzbl %al, %eax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test28:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; KNL_32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; KNL_32-NEXT:    vinserti32x4 $0, .LCPI27_0, %zmm2, %zmm2
+; KNL_32-NEXT:    vpsllvq .LCPI27_1, %zmm2, %zmm2
+; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; KNL_32-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test28:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT:    movb $3, %al
+; SKX-NEXT:    kmovb %eax, %k1
+; SKX-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test28:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX_32-NEXT:    movb $3, %al
+; SKX_32-NEXT:    kmovb %eax, %k1
+; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
+  ret void
+}
+
+
+; SCALAR-LABEL: test29
+; SCALAR:      extractelement <16 x float*>
+; SCALAR-NEXT: load float
+; SCALAR-NEXT: insertelement <16 x float>
+; SCALAR-NEXT: extractelement <16 x float*>
+; SCALAR-NEXT: load float
+
+define <16 x float> @test29(float* %base, <16 x i32> %ind) {
+; KNL_64-LABEL: test29:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    movw $44, %ax
+; KNL_64-NEXT:    kmovw %eax, %k1
+; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test29:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movw $44, %cx
+; KNL_32-NEXT:    kmovw %ecx, %k1
+; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test29:
+; SKX:       # BB#0:
+; SKX-NEXT:    movw $44, %ax
+; SKX-NEXT:    kmovw %eax, %k1
+; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    retq
+
+  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
+  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
+
+  %sext_ind = sext <16 x i32> %ind to <16 x i64>
+  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
+
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
+  ret <16 x float>%res
+}
+
+; Check non-power-of-2 case. It should be scalarized.
+declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
+define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
+; KNL_64-LABEL: test30:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    andl $1, %edx
+; KNL_64-NEXT:    kmovw %edx, %k1
+; KNL_64-NEXT:    andl $1, %esi
+; KNL_64-NEXT:    kmovw %esi, %k2
+; KNL_64-NEXT:    movl %edi, %eax
+; KNL_64-NEXT:    andl $1, %eax
+; KNL_64-NEXT:    kmovw %eax, %k0
+; KNL_64-NEXT:    vpmovsxdq %xmm1, %ymm1
+; KNL_64-NEXT:    vpsllq $2, %ymm1, %ymm1
+; KNL_64-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
+; KNL_64-NEXT:    # implicit-def: %XMM0
+; KNL_64-NEXT:    testb $1, %dil
+; KNL_64-NEXT:    je .LBB29_2
+; KNL_64-NEXT:  # BB#1: # %cond.load
+; KNL_64-NEXT:    vmovq %xmm1, %rax
+; KNL_64-NEXT:    vmovd (%rax), %xmm0
+; KNL_64-NEXT:  .LBB29_2: # %else
+; KNL_64-NEXT:    kmovw %k2, %eax
+; KNL_64-NEXT:    movl %eax, %ecx
+; KNL_64-NEXT:    andl $1, %ecx
+; KNL_64-NEXT:    testb %cl, %cl
+; KNL_64-NEXT:    je .LBB29_4
+; KNL_64-NEXT:  # BB#3: # %cond.load1
+; KNL_64-NEXT:    vpextrq $1, %xmm1, %rcx
+; KNL_64-NEXT:    vpinsrd $1, (%rcx), %xmm0, %xmm0
+; KNL_64-NEXT:  .LBB29_4: # %else2
+; KNL_64-NEXT:    kmovw %k1, %ecx
+; KNL_64-NEXT:    movl %ecx, %edx
+; KNL_64-NEXT:    andl $1, %edx
+; KNL_64-NEXT:    testb %dl, %dl
+; KNL_64-NEXT:    je .LBB29_6
+; KNL_64-NEXT:  # BB#5: # %cond.load4
+; KNL_64-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; KNL_64-NEXT:    vmovq %xmm1, %rdx
+; KNL_64-NEXT:    vpinsrd $2, (%rdx), %xmm0, %xmm0
+; KNL_64-NEXT:  .LBB29_6: # %else5
+; KNL_64-NEXT:    kmovw %k0, %edx
+; KNL_64-NEXT:    vmovd %edx, %xmm1
+; KNL_64-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_64-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_64-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test30:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    andl $1, %eax
+; KNL_32-NEXT:    kmovw %eax, %k1
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    andl $1, %eax
+; KNL_32-NEXT:    kmovw %eax, %k2
+; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    movl %eax, %ecx
+; KNL_32-NEXT:    andl $1, %ecx
+; KNL_32-NEXT:    kmovw %ecx, %k0
+; KNL_32-NEXT:    vpslld $2, %xmm1, %xmm1
+; KNL_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
+; KNL_32-NEXT:    # implicit-def: %XMM0
+; KNL_32-NEXT:    testb $1, %al
+; KNL_32-NEXT:    je .LBB29_2
+; KNL_32-NEXT:  # BB#1: # %cond.load
+; KNL_32-NEXT:    vmovd %xmm1, %eax
+; KNL_32-NEXT:    vmovd (%eax), %xmm0
+; KNL_32-NEXT:  .LBB29_2: # %else
+; KNL_32-NEXT:    kmovw %k2, %eax
+; KNL_32-NEXT:    movl %eax, %ecx
+; KNL_32-NEXT:    andl $1, %ecx
+; KNL_32-NEXT:    testb %cl, %cl
+; KNL_32-NEXT:    je .LBB29_4
+; KNL_32-NEXT:  # BB#3: # %cond.load1
+; KNL_32-NEXT:    vpextrd $1, %xmm1, %ecx
+; KNL_32-NEXT:    vpinsrd $1, (%ecx), %xmm0, %xmm0
+; KNL_32-NEXT:  .LBB29_4: # %else2
+; KNL_32-NEXT:    kmovw %k1, %ecx
+; KNL_32-NEXT:    movl %ecx, %edx
+; KNL_32-NEXT:    andl $1, %edx
+; KNL_32-NEXT:    testb %dl, %dl
+; KNL_32-NEXT:    je .LBB29_6
+; KNL_32-NEXT:  # BB#5: # %cond.load4
+; KNL_32-NEXT:    vpextrd $2, %xmm1, %edx
+; KNL_32-NEXT:    vpinsrd $2, (%edx), %xmm0, %xmm0
+; KNL_32-NEXT:  .LBB29_6: # %else5
+; KNL_32-NEXT:    kmovw %k0, %edx
+; KNL_32-NEXT:    vmovd %edx, %xmm1
+; KNL_32-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; KNL_32-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
+; KNL_32-NEXT:    vblendvps %xmm1, %xmm0, %xmm2, %xmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test30:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpslld $31, %xmm2, %xmm2
+; SKX-NEXT:    vpmovd2m %xmm2, %k1
+; SKX-NEXT:    kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    vpmovsxdq %xmm1, %ymm1
+; SKX-NEXT:    vpsllq $2, %ymm1, %ymm1
+; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT:    # implicit-def: %XMM0
+; SKX-NEXT:    andb $1, %al
+; SKX-NEXT:    je .LBB29_2
+; SKX-NEXT:  # BB#1: # %cond.load
+; SKX-NEXT:    vmovq %xmm1, %rax
+; SKX-NEXT:    vmovd (%rax), %xmm0
+; SKX-NEXT:  .LBB29_2: # %else
+; SKX-NEXT:    kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT:    andb $1, %al
+; SKX-NEXT:    je .LBB29_4
+; SKX-NEXT:  # BB#3: # %cond.load1
+; SKX-NEXT:    vpextrq $1, %xmm1, %rax
+; SKX-NEXT:    vpinsrd $1, (%rax), %xmm0, %xmm0
+; SKX-NEXT:  .LBB29_4: # %else2
+; SKX-NEXT:    kmovb %k1, -{{[0-9]+}}(%rsp)
+; SKX-NEXT:    movb -{{[0-9]+}}(%rsp), %al
+; SKX-NEXT:    andb $1, %al
+; SKX-NEXT:    je .LBB29_6
+; SKX-NEXT:  # BB#5: # %cond.load4
+; SKX-NEXT:    vextracti128 $1, %ymm1, %xmm1
+; SKX-NEXT:    vmovq %xmm1, %rax
+; SKX-NEXT:    vpinsrd $2, (%rax), %xmm0, %xmm0
+; SKX-NEXT:  .LBB29_6: # %else5
+; SKX-NEXT:    vmovdqa32 %xmm0, %xmm3 {%k1}
+; SKX-NEXT:    vmovaps %zmm3, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test30:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    subl $12, %esp
+; SKX_32-NEXT:  .Ltmp0:
+; SKX_32-NEXT:    .cfi_def_cfa_offset 16
+; SKX_32-NEXT:    vpslld $31, %xmm2, %xmm2
+; SKX_32-NEXT:    vpmovd2m %xmm2, %k1
+; SKX_32-NEXT:    kmovb %k1, {{[0-9]+}}(%esp)
+; SKX_32-NEXT:    vpslld $2, %xmm1, %xmm1
+; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm2
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT:    # implicit-def: %XMM1
+; SKX_32-NEXT:    andb $1, %al
+; SKX_32-NEXT:    je .LBB29_2
+; SKX_32-NEXT:  # BB#1: # %cond.load
+; SKX_32-NEXT:    vmovd %xmm2, %eax
+; SKX_32-NEXT:    vmovd (%eax), %xmm1
+; SKX_32-NEXT:  .LBB29_2: # %else
+; SKX_32-NEXT:    kmovb %k1, {{[0-9]+}}(%esp)
+; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
+; SKX_32-NEXT:    andb $1, %al
+; SKX_32-NEXT:    je .LBB29_4
+; SKX_32-NEXT:  # BB#3: # %cond.load1
+; SKX_32-NEXT:    vpextrd $1, %xmm2, %eax
+; SKX_32-NEXT:    vpinsrd $1, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT:  .LBB29_4: # %else2
+; SKX_32-NEXT:    vmovdqa32 {{[0-9]+}}(%esp), %xmm0
+; SKX_32-NEXT:    kmovb %k1, (%esp)
+; SKX_32-NEXT:    movb (%esp), %al
+; SKX_32-NEXT:    andb $1, %al
+; SKX_32-NEXT:    je .LBB29_6
+; SKX_32-NEXT:  # BB#5: # %cond.load4
+; SKX_32-NEXT:    vpextrd $2, %xmm2, %eax
+; SKX_32-NEXT:    vpinsrd $2, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT:  .LBB29_6: # %else5
+; SKX_32-NEXT:    vmovdqa32 %xmm1, %xmm0 {%k1}
+; SKX_32-NEXT:    addl $12, %esp
+; SKX_32-NEXT:    retl
+
+  %sext_ind = sext <3 x i32> %ind to <3 x i64>
+  %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
+  %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
+  ret <3 x i32>%res
+}
+
+declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
+
+; KNL-LABEL: test31
+; KNL: vpgatherqq
+; KNL: vpgatherqq
+define <16 x float*> @test31(<16 x float**> %ptrs) {
+; KNL_64-LABEL: test31:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_64-NEXT:    kxnorw %k0, %k0, %k2
+; KNL_64-NEXT:    vpgatherqq (,%zmm0), %zmm2 {%k2}
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k1
+; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm3 {%k1}
+; KNL_64-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_64-NEXT:    vmovaps %zmm3, %zmm1
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test31:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
+; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test31:
+; SKX:       # BB#0:
+; SKX-NEXT:    kxnorw %k0, %k0, %k1
+; SKX-NEXT:    kxnorw %k0, %k0, %k2
+; SKX-NEXT:    vpgatherqq (,%zmm0), %zmm2 {%k2}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vpgatherqq (,%zmm1), %zmm3 {%k1}
+; SKX-NEXT:    vmovaps %zmm2, %zmm0
+; SKX-NEXT:    vmovaps %zmm3, %zmm1
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test31:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
+; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
+; SKX_32-NEXT:    retl
+
+  %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
+  ret <16 x float*>%res
+}
+
+define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
+; KNL_64-LABEL: test_gather_16i32:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_gather_16i32:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_gather_16i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vextracti32x8 $1, %zmm3, %ymm2
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT:    vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_gather_16i32:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+  %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
+  ret <16 x i32> %res
+}
+define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
+; KNL_64-LABEL: test_gather_16i64:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vpgatherqq (,%zmm0), %zmm3 {%k1}
+; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm4 {%k2}
+; KNL_64-NEXT:    vmovaps %zmm3, %zmm0
+; KNL_64-NEXT:    vmovaps %zmm4, %zmm1
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_gather_16i64:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    pushl %ebp
+; KNL_32-NEXT:  .Ltmp0:
+; KNL_32-NEXT:    .cfi_def_cfa_offset 8
+; KNL_32-NEXT:  .Ltmp1:
+; KNL_32-NEXT:    .cfi_offset %ebp, -8
+; KNL_32-NEXT:    movl %esp, %ebp
+; KNL_32-NEXT:  .Ltmp2:
+; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
+; KNL_32-NEXT:    andl $-64, %esp
+; KNL_32-NEXT:    subl $64, %esp
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
+; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_32-NEXT:    vpgatherdq (,%ymm0), %zmm2 {%k1}
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vpgatherdq (,%ymm0), %zmm1 {%k2}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    movl %ebp, %esp
+; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_gather_16i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vpgatherqq (,%zmm0), %zmm3 {%k1}
+; SKX-NEXT:    vpgatherqq (,%zmm1), %zmm4 {%k2}
+; SKX-NEXT:    vmovaps %zmm3, %zmm0
+; SKX-NEXT:    vmovaps %zmm4, %zmm1
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_gather_16i64:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    pushl %ebp
+; SKX_32-NEXT:  .Ltmp1:
+; SKX_32-NEXT:    .cfi_def_cfa_offset 8
+; SKX_32-NEXT:  .Ltmp2:
+; SKX_32-NEXT:    .cfi_offset %ebp, -8
+; SKX_32-NEXT:    movl %esp, %ebp
+; SKX_32-NEXT:  .Ltmp3:
+; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
+; SKX_32-NEXT:    andl $-64, %esp
+; SKX_32-NEXT:    subl $64, %esp
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
+; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
+; SKX_32-NEXT:    vpgatherdq (,%ymm0), %zmm2 {%k1}
+; SKX_32-NEXT:    vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vpgatherdq (,%ymm0), %zmm1 {%k2}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    movl %ebp, %esp
+; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    retl
+  %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+  ret <16 x i64> %res
+}
+declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
+; KNL_64-LABEL: test_gather_16f32:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    vextractf64x4 $1, %zmm3, %ymm2
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vgatherqps (,%zmm1), %ymm2 {%k2}
+; KNL_64-NEXT:    vgatherqps (,%zmm0), %ymm3 {%k1}
+; KNL_64-NEXT:    vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_gather_16f32:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_gather_16f32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    vextractf32x8 $1, %zmm3, %ymm2
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vgatherqps (,%zmm1), %ymm2 {%k2}
+; SKX-NEXT:    vgatherqps (,%zmm0), %ymm3 {%k1}
+; SKX-NEXT:    vinsertf32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_gather_16f32:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    retl
+  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+  ret <16 x float> %res
+}
+define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
+; KNL_64-LABEL: test_gather_16f64:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vgatherqpd (,%zmm0), %zmm3 {%k1}
+; KNL_64-NEXT:    vgatherqpd (,%zmm1), %zmm4 {%k2}
+; KNL_64-NEXT:    vmovaps %zmm3, %zmm0
+; KNL_64-NEXT:    vmovaps %zmm4, %zmm1
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_gather_16f64:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    pushl %ebp
+; KNL_32-NEXT:  .Ltmp3:
+; KNL_32-NEXT:    .cfi_def_cfa_offset 8
+; KNL_32-NEXT:  .Ltmp4:
+; KNL_32-NEXT:    .cfi_offset %ebp, -8
+; KNL_32-NEXT:    movl %esp, %ebp
+; KNL_32-NEXT:  .Ltmp5:
+; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
+; KNL_32-NEXT:    andl $-64, %esp
+; KNL_32-NEXT:    subl $64, %esp
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vmovapd 8(%ebp), %zmm1
+; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_32-NEXT:    vgatherdpd (,%ymm0), %zmm2 {%k1}
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vgatherdpd (,%ymm0), %zmm1 {%k2}
+; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
+; KNL_32-NEXT:    movl %ebp, %esp
+; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_gather_16f64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vgatherqpd (,%zmm0), %zmm3 {%k1}
+; SKX-NEXT:    vgatherqpd (,%zmm1), %zmm4 {%k2}
+; SKX-NEXT:    vmovaps %zmm3, %zmm0
+; SKX-NEXT:    vmovaps %zmm4, %zmm1
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_gather_16f64:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    pushl %ebp
+; SKX_32-NEXT:  .Ltmp4:
+; SKX_32-NEXT:    .cfi_def_cfa_offset 8
+; SKX_32-NEXT:  .Ltmp5:
+; SKX_32-NEXT:    .cfi_offset %ebp, -8
+; SKX_32-NEXT:    movl %esp, %ebp
+; SKX_32-NEXT:  .Ltmp6:
+; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
+; SKX_32-NEXT:    andl $-64, %esp
+; SKX_32-NEXT:    subl $64, %esp
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
+; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
+; SKX_32-NEXT:    vgatherdpd (,%ymm0), %zmm2 {%k1}
+; SKX_32-NEXT:    vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vgatherdpd (,%ymm0), %zmm1 {%k2}
+; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
+; SKX_32-NEXT:    movl %ebp, %esp
+; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    retl
+  %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+  ret <16 x double> %res
+}
+declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
+; KNL_64-LABEL: test_scatter_16i32:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vpscatterqd %ymm3, (,%zmm0) {%k1}
+; KNL_64-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
+; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k2}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_scatter_16i32:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_scatter_16i32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vpscatterqd %ymm3, (,%zmm0) {%k1}
+; SKX-NEXT:    vextracti32x8 $1, %zmm3, %ymm0
+; SKX-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k2}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_scatter_16i32:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
+; KNL_64-LABEL: test_scatter_16i64:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vpscatterqq %zmm3, (,%zmm0) {%k1}
+; KNL_64-NEXT:    vpscatterqq %zmm4, (,%zmm1) {%k2}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_scatter_16i64:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    pushl %ebp
+; KNL_32-NEXT:  .Ltmp6:
+; KNL_32-NEXT:    .cfi_def_cfa_offset 8
+; KNL_32-NEXT:  .Ltmp7:
+; KNL_32-NEXT:    .cfi_offset %ebp, -8
+; KNL_32-NEXT:    movl %esp, %ebp
+; KNL_32-NEXT:  .Ltmp8:
+; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
+; KNL_32-NEXT:    andl $-64, %esp
+; KNL_32-NEXT:    subl $64, %esp
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
+; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_32-NEXT:    vpscatterdq %zmm2, (,%ymm0) {%k1}
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
+; KNL_32-NEXT:    movl %ebp, %esp
+; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_scatter_16i64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vpscatterqq %zmm3, (,%zmm0) {%k1}
+; SKX-NEXT:    vpscatterqq %zmm4, (,%zmm1) {%k2}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_scatter_16i64:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    pushl %ebp
+; SKX_32-NEXT:  .Ltmp7:
+; SKX_32-NEXT:    .cfi_def_cfa_offset 8
+; SKX_32-NEXT:  .Ltmp8:
+; SKX_32-NEXT:    .cfi_offset %ebp, -8
+; SKX_32-NEXT:    movl %esp, %ebp
+; SKX_32-NEXT:  .Ltmp9:
+; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
+; SKX_32-NEXT:    andl $-64, %esp
+; SKX_32-NEXT:    subl $64, %esp
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
+; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
+; SKX_32-NEXT:    vpscatterdq %zmm2, (,%ymm0) {%k1}
+; SKX_32-NEXT:    vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
+; SKX_32-NEXT:    movl %ebp, %esp
+; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
+define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
+; KNL_64-LABEL: test_scatter_16f32:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vscatterqps %ymm3, (,%zmm0) {%k1}
+; KNL_64-NEXT:    vextractf64x4 $1, %zmm3, %ymm0
+; KNL_64-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k2}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_scatter_16f32:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_scatter_16f32:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vscatterqps %ymm3, (,%zmm0) {%k1}
+; SKX-NEXT:    vextractf32x8 $1, %zmm3, %ymm0
+; SKX-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k2}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_scatter_16f32:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
+define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
+; KNL_64-LABEL: test_scatter_16f64:
+; KNL_64:       # BB#0:
+; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
+; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
+; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_64-NEXT:    vscatterqpd %zmm3, (,%zmm0) {%k1}
+; KNL_64-NEXT:    vscatterqpd %zmm4, (,%zmm1) {%k2}
+; KNL_64-NEXT:    retq
+;
+; KNL_32-LABEL: test_scatter_16f64:
+; KNL_32:       # BB#0:
+; KNL_32-NEXT:    pushl %ebp
+; KNL_32-NEXT:  .Ltmp9:
+; KNL_32-NEXT:    .cfi_def_cfa_offset 8
+; KNL_32-NEXT:  .Ltmp10:
+; KNL_32-NEXT:    .cfi_offset %ebp, -8
+; KNL_32-NEXT:    movl %esp, %ebp
+; KNL_32-NEXT:  .Ltmp11:
+; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
+; KNL_32-NEXT:    andl $-64, %esp
+; KNL_32-NEXT:    subl $64, %esp
+; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; KNL_32-NEXT:    vmovapd 8(%ebp), %zmm1
+; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
+; KNL_32-NEXT:    vscatterdpd %zmm2, (,%ymm0) {%k1}
+; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
+; KNL_32-NEXT:    movl %ebp, %esp
+; KNL_32-NEXT:    popl %ebp
+; KNL_32-NEXT:    retl
+;
+; SKX-LABEL: test_scatter_16f64:
+; SKX:       # BB#0:
+; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
+; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
+; SKX-NEXT:    vptestmd %zmm2, %zmm2, %k1
+; SKX-NEXT:    kshiftrw $8, %k1, %k2
+; SKX-NEXT:    vscatterqpd %zmm3, (,%zmm0) {%k1}
+; SKX-NEXT:    vscatterqpd %zmm4, (,%zmm1) {%k2}
+; SKX-NEXT:    retq
+;
+; SKX_32-LABEL: test_scatter_16f64:
+; SKX_32:       # BB#0:
+; SKX_32-NEXT:    pushl %ebp
+; SKX_32-NEXT:  .Ltmp10:
+; SKX_32-NEXT:    .cfi_def_cfa_offset 8
+; SKX_32-NEXT:  .Ltmp11:
+; SKX_32-NEXT:    .cfi_offset %ebp, -8
+; SKX_32-NEXT:    movl %esp, %ebp
+; SKX_32-NEXT:  .Ltmp12:
+; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
+; SKX_32-NEXT:    andl $-64, %esp
+; SKX_32-NEXT:    subl $64, %esp
+; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
+; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
+; SKX_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
+; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
+; SKX_32-NEXT:    vscatterdpd %zmm2, (,%ymm0) {%k1}
+; SKX_32-NEXT:    vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
+; SKX_32-NEXT:    movl %ebp, %esp
+; SKX_32-NEXT:    popl %ebp
+; SKX_32-NEXT:    retl
+  call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll
index 6c16e634a59f..c29933e266b2 100644
--- a/test/CodeGen/X86/masked_memop.ll
+++ b/test/CodeGen/X86/masked_memop.ll
@@ -1,7 +1,8 @@
-; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=knl < %s | FileCheck %s -check-prefix=AVX512
-; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=AVX_SCALAR
-; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=skx < %s | FileCheck %s -check-prefix=SKX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=knl < %s | FileCheck %s --check-prefix=AVX512
+; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX2
+; RUN: opt -mtriple=x86_64-apple-darwin -codegenprepare -mcpu=corei7-avx -S < %s | FileCheck %s --check-prefix=AVX_SCALAR
+; RUN: llc -mtriple=x86_64-apple-darwin  -mcpu=skx < %s | FileCheck %s --check-prefix=SKX
 
 ; AVX512-LABEL: test1
 ; AVX512: vmovdqu32       (%rdi), %zmm0 {%k1} {z}
@@ -139,18 +140,55 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double
   ret <4 x double> %res
 }
 
-; AVX2-LABEL: test11
+; AVX2-LABEL: test11a
 ; AVX2: vmaskmovps
 ; AVX2: vblendvps
 
-; SKX-LABEL: test11
-; SKX: vmovaps {{.*}}{%k1}
-define <8 x float> @test11(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
+; SKX-LABEL: test11a
+; SKX: vmovaps (%rdi), %ymm1 {%k1}
+; AVX512-LABEL: test11a
+; AVX512: kshiftlw $8
+; AVX512: kshiftrw $8
+; AVX512: vmovups (%rdi), %zmm1 {%k1}
+define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float> %dst) {
   %mask = icmp eq <8 x i32> %trigger, zeroinitializer
   %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
   ret <8 x float> %res
 }
 
+; SKX-LABEL: test11b
+; SKX: vmovdqu32 (%rdi), %ymm1 {%k1}
+; AVX512-LABEL: test11b
+; AVX512: kshiftlw        $8
+; AVX512: kshiftrw        $8
+; AVX512: vmovdqu32 (%rdi), %zmm1 {%k1}
+define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
+  %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
+  ret <8 x i32> %res
+}
+
+; SKX-LABEL: test11c
+; SKX: vmovaps (%rdi), %ymm0 {%k1} {z}
+; AVX512-LABEL: test11c
+; AVX512: kshiftlw  $8
+; AVX512: kshiftrw  $8
+; AVX512: vmovups (%rdi), %zmm0 {%k1} {z}
+define <8 x float> @test11c(<8 x i1> %mask, <8 x float>* %addr) {
+  %res = call <8 x float> @llvm.masked.load.v8f32(<8 x float>* %addr, i32 32, <8 x i1> %mask, <8 x float> zeroinitializer)
+  ret <8 x float> %res
+}
+
+; SKX-LABEL: test11d
+; SKX: vmovdqu32 (%rdi), %ymm0 {%k1} {z}
+; AVX512-LABEL: test11d
+; AVX512: kshiftlw  $8
+; AVX512: kshiftrw  $8
+; AVX512: vmovdqu32 (%rdi), %zmm0 {%k1} {z}
+define <8 x i32> @test11d(<8 x i1> %mask, <8 x i32>* %addr) {
+  %res = call <8 x i32> @llvm.masked.load.v8i32(<8 x i32>* %addr, i32 4, <8 x i1> %mask, <8 x i32> zeroinitializer)
+  ret <8 x i32> %res
+}
+
 ; AVX2-LABEL: test12
 ; AVX2: vpmaskmovd %ymm
 
@@ -190,10 +228,13 @@ define void @test14(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %val) {
 ; AVX2-LABEL: test15
 ; AVX2: vpmaskmovd
 
-; SKX-LABEL: test15
-; SKX: kshiftl
-; SKX: kshiftr
-; SKX: vmovdqu32 {{.*}}{%k1}
+; SKX-LABEL: test15:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
+; SKX-NEXT:    vpcmpeqq %xmm2, %xmm0, %k1
+; SKX-NEXT:    vpmovqd %xmm1, (%rdi) {%k1}
+; SKX-NEXT:    retq
 define void @test15(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %val) {
   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
   call void @llvm.masked.store.v2i32(<2 x i32>%val, <2 x i32>* %addr, i32 4, <2 x i1>%mask)
@@ -232,12 +273,58 @@ define <2 x i32> @test17(<2 x i32> %trigger, <2 x i32>* %addr, <2 x i32> %dst) {
 ; AVX2-LABEL: test18
 ; AVX2: vmaskmovps
 ; AVX2-NOT: blend
+; AVX2: ret
 define <2 x float> @test18(<2 x i32> %trigger, <2 x float>* %addr) {
+; SKX-LABEL: test18:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; SKX-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SKX-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; SKX-NEXT:    kshiftlw $2, %k0, %k0
+; SKX-NEXT:    kshiftrw $2, %k0, %k1
+; SKX-NEXT:    vmovups (%rdi), %xmm0 {%k1} {z}
+; SKX-NEXT:    retq
   %mask = icmp eq <2 x i32> %trigger, zeroinitializer
   %res = call <2 x float> @llvm.masked.load.v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>undef)
   ret <2 x float> %res
 }
 
+; AVX_SCALAR-LABEL: test19
+; AVX_SCALAR: load <4 x float>, <4 x float>* %addr, align 4
+
+define <4 x float> @test19(<4 x i32> %trigger, <4 x float>* %addr) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>, <4 x float>undef)
+  ret <4 x float> %res
+}
+
+; AVX_SCALAR-LABEL: test20
+; AVX_SCALAR: load float, {{.*}}, align 4
+; AVX_SCALAR: insertelement <4 x float> undef, float
+; AVX_SCALAR: select <4 x i1> <i1 true, i1 false, i1 true, i1 true>
+
+define <4 x float> @test20(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %src0) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  %res = call <4 x float> @llvm.masked.load.v4f32(<4 x float>* %addr, i32 16, <4 x i1><i1 true, i1 false, i1 true, i1 true>, <4 x float> %src0)
+  ret <4 x float> %res
+}
+
+; AVX_SCALAR-LABEL: test21
+; AVX_SCALAR: store <4 x i32> %val
+define void @test21(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 true, i1 true, i1 true>)
+  ret void
+}
+
+; AVX_SCALAR-LABEL: test22
+; AVX_SCALAR: extractelement <4 x i32> %val, i32 0
+; AVX_SCALAR:  store i32
+define void @test22(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %val) {
+  %mask = icmp eq <4 x i32> %trigger, zeroinitializer
+  call void @llvm.masked.store.v4i32(<4 x i32>%val, <4 x i32>* %addr, i32 4, <4 x i1><i1 true, i1 false, i1 false, i1 false>)
+  ret void
+}
 
 declare <16 x i32> @llvm.masked.load.v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
 declare <4 x i32> @llvm.masked.load.v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
@@ -251,6 +338,7 @@ declare void @llvm.masked.store.v16f32(<16 x float>, <16 x float>*, i32, <16 x i
 declare void @llvm.masked.store.v16f32p(<16 x float>*, <16 x float>**, i32, <16 x i1>)
 declare <16 x float> @llvm.masked.load.v16f32(<16 x float>*, i32, <16 x i1>, <16 x float>)
 declare <8 x float> @llvm.masked.load.v8f32(<8 x float>*, i32, <8 x i1>, <8 x float>)
+declare <8 x i32> @llvm.masked.load.v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
 declare <4 x float> @llvm.masked.load.v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
 declare <2 x float> @llvm.masked.load.v2f32(<2 x float>*, i32, <2 x i1>, <2 x float>)
 declare <8 x double> @llvm.masked.load.v8f64(<8 x double>*, i32, <8 x i1>, <8 x double>)
@@ -260,3 +348,415 @@ declare void @llvm.masked.store.v8f64(<8 x double>, <8 x double>*, i32, <8 x i1>
 declare void @llvm.masked.store.v2f64(<2 x double>, <2 x double>*, i32, <2 x i1>)
 declare void @llvm.masked.store.v2i64(<2 x i64>, <2 x i64>*, i32, <2 x i1>)
 
+declare <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>*, i32, <16 x i1>, <16 x i32*>)
+
+; AVX512-LABEL: test23
+; AVX512: vmovdqu64       64(%rdi), %zmm1 {%k2} {z}
+; AVX512: vmovdqu64       (%rdi), %zmm0 {%k1} {z}
+
+define <16 x i32*> @test23(<16 x i32*> %trigger, <16 x i32*>* %addr) {
+  %mask = icmp eq <16 x i32*> %trigger, zeroinitializer
+  %res = call <16 x i32*> @llvm.masked.load.v16p0i32(<16 x i32*>* %addr, i32 4, <16 x i1>%mask, <16 x i32*>zeroinitializer)
+  ret <16 x i32*> %res
+}
+
+%mystruct = type { i16, i16, [1 x i8*] }
+
+declare <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>*, i32, <16 x i1>, <16 x %mystruct*>)
+
+define <16 x %mystruct*> @test24(<16 x i1> %mask, <16 x %mystruct*>* %addr) {
+; AVX512-LABEL: test24:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test24:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm1, %ymm4
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm1, %ymm3
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm1, %ymm2
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm0, %ymm1
+; AVX2-NEXT:    vmovdqa %ymm4, %ymm0
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test24:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k1} {z}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm1 {%k1} {z}
+; SKX-NEXT:    retq
+  %res = call <16 x %mystruct*> @llvm.masked.load.v16p0mystruct(<16 x %mystruct*>* %addr, i32 4, <16 x i1>%mask, <16 x %mystruct*>zeroinitializer)
+  ret <16 x %mystruct*> %res
+}
+
+define void @test_store_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
+; AVX512-LABEL: test_store_16i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test_store_16i64:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT:    vpmaskmovq %ymm1, %ymm5, (%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmaskmovq %ymm4, %ymm1, 96(%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vpmaskmovq %ymm3, %ymm1, 64(%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmaskmovq %ymm2, %ymm0, 32(%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test_store_16i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vmovdqu64 %zmm1, (%rdi) {%k1}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovdqu64 %zmm2, 64(%rdi) {%k1}
+; SKX-NEXT:    retq
+  call void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.store.v16i64(<16 x i64> %src0, <16 x i64>* %ptrs, i32, <16 x i1> %mask)
+define void @test_store_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
+; AVX512-LABEL: test_store_16f64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test_store_16f64:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT:    vmaskmovpd %ymm1, %ymm5, (%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vmaskmovpd %ymm4, %ymm1, 96(%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vmaskmovpd %ymm3, %ymm1, 64(%rdi)
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vmaskmovpd %ymm2, %ymm0, 32(%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test_store_16f64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vmovupd %zmm1, (%rdi) {%k1}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovupd %zmm2, 64(%rdi) {%k1}
+; SKX-NEXT:    retq
+  call void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.store.v16f64(<16 x double> %src0, <16 x double>* %ptrs, i32, <16 x i1> %mask)
+define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
+; AVX512-LABEL: test_load_16i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    vmovaps %zmm2, %zmm1
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test_load_16i64:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT:    vpmaskmovq (%rdi), %ymm5, %ymm9
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT:    vpmaskmovq 32(%rdi), %ymm7, %ymm8
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm6, %xmm6
+; AVX2-NEXT:    vpsrad $31, %xmm6, %xmm6
+; AVX2-NEXT:    vpmovsxdq %xmm6, %ymm6
+; AVX2-NEXT:    vpmaskmovq 64(%rdi), %ymm6, %ymm10
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm9, %ymm1, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm7, %ymm8, %ymm2, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm6, %ymm10, %ymm3, %ymm2
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmaskmovq 96(%rdi), %ymm0, %ymm3
+; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vmovapd %ymm5, %ymm0
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test_load_16i64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovdqu64 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    vmovaps %zmm2, %zmm1
+; SKX-NEXT:    retq
+  %res = call <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+  ret <16 x i64> %res
+}
+declare <16 x i64> @llvm.masked.load.v16i64(<16 x i64>* %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
+; AVX512-LABEL: test_load_16f64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k1
+; AVX512-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    vmovaps %zmm2, %zmm1
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test_load_16f64:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpsrad $31, %xmm5, %xmm5
+; AVX2-NEXT:    vpmovsxdq %xmm5, %ymm5
+; AVX2-NEXT:    vmaskmovpd (%rdi), %ymm5, %ymm9
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm7 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT:    vmaskmovpd 32(%rdi), %ymm7, %ymm8
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm6, %xmm6
+; AVX2-NEXT:    vpsrad $31, %xmm6, %xmm6
+; AVX2-NEXT:    vpmovsxdq %xmm6, %ymm6
+; AVX2-NEXT:    vmaskmovpd 64(%rdi), %ymm6, %ymm10
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm9, %ymm1, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm7, %ymm8, %ymm2, %ymm1
+; AVX2-NEXT:    vblendvpd %ymm6, %ymm10, %ymm3, %ymm2
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vmaskmovpd 96(%rdi), %ymm0, %ymm3
+; AVX2-NEXT:    vblendvpd %ymm0, %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vmovapd %ymm5, %ymm0
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test_load_16f64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
+; SKX-NEXT:    vpmovb2m %xmm0, %k1
+; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    vmovaps %zmm2, %zmm1
+; SKX-NEXT:    retq
+  %res = call <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+  ret <16 x double> %res
+}
+declare <16 x double> @llvm.masked.load.v16f64(<16 x double>* %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+
+define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32 x double> %src0)  {
+; AVX512-LABEL: test_load_32f64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX512-NEXT:    vpmovsxbd %xmm5, %zmm5
+; AVX512-NEXT:    vpslld $31, %zmm5, %zmm5
+; AVX512-NEXT:    vptestmd %zmm5, %zmm5, %k1
+; AVX512-NEXT:    vmovupd 128(%rdi), %zmm3 {%k1}
+; AVX512-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512-NEXT:    vptestmd %zmm0, %zmm0, %k2
+; AVX512-NEXT:    vmovupd (%rdi), %zmm1 {%k2}
+; AVX512-NEXT:    kshiftrw $8, %k1, %k1
+; AVX512-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
+; AVX512-NEXT:    kshiftrw $8, %k2, %k1
+; AVX512-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
+; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    vmovaps %zmm2, %zmm1
+; AVX512-NEXT:    vmovaps %zmm3, %zmm2
+; AVX512-NEXT:    vmovaps %zmm4, %zmm3
+; AVX512-NEXT:    retq
+;
+; AVX2-LABEL: test_load_32f64:
+; AVX2:       ## BB#0:
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:  Ltmp0:
+; AVX2-NEXT:    .cfi_def_cfa_offset 16
+; AVX2-NEXT:  Ltmp1:
+; AVX2-NEXT:    .cfi_offset %rbp, -16
+; AVX2-NEXT:    movq %rsp, %rbp
+; AVX2-NEXT:  Ltmp2:
+; AVX2-NEXT:    .cfi_def_cfa_register %rbp
+; AVX2-NEXT:    andq $-32, %rsp
+; AVX2-NEXT:    subq $32, %rsp
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm8 = xmm0[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm8, %xmm8
+; AVX2-NEXT:    vpsrad $31, %xmm8, %xmm8
+; AVX2-NEXT:    vpmovsxdq %xmm8, %ymm8
+; AVX2-NEXT:    vmaskmovpd 32(%rsi), %ymm8, %ymm9
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm10 = xmm0[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm10 = xmm10[0],zero,zero,zero,xmm10[1],zero,zero,zero,xmm10[2],zero,zero,zero,xmm10[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm10, %xmm10
+; AVX2-NEXT:    vpsrad $31, %xmm10, %xmm10
+; AVX2-NEXT:    vpmovsxdq %xmm10, %ymm10
+; AVX2-NEXT:    vmaskmovpd 64(%rsi), %ymm10, %ymm11
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm12 = xmm0[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm12 = xmm12[0],zero,zero,zero,xmm12[1],zero,zero,zero,xmm12[2],zero,zero,zero,xmm12[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm12, %xmm12
+; AVX2-NEXT:    vpsrad $31, %xmm12, %xmm12
+; AVX2-NEXT:    vpmovsxdq %xmm12, %ymm12
+; AVX2-NEXT:    vmaskmovpd 96(%rsi), %ymm12, %ymm13
+; AVX2-NEXT:    vblendvpd %ymm8, %ymm9, %ymm2, %ymm8
+; AVX2-NEXT:    vblendvpd %ymm10, %ymm11, %ymm3, %ymm9
+; AVX2-NEXT:    vblendvpd %ymm12, %ymm13, %ymm4, %ymm11
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm2[1,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm3, %xmm3
+; AVX2-NEXT:    vpsrad $31, %xmm3, %xmm3
+; AVX2-NEXT:    vpmovsxdq %xmm3, %ymm3
+; AVX2-NEXT:    vmaskmovpd 160(%rsi), %ymm3, %ymm10
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm4, %xmm4
+; AVX2-NEXT:    vpsrad $31, %xmm4, %xmm4
+; AVX2-NEXT:    vpmovsxdq %xmm4, %ymm4
+; AVX2-NEXT:    vmaskmovpd 192(%rsi), %ymm4, %ymm12
+; AVX2-NEXT:    vblendvpd %ymm3, %ymm10, %ymm6, %ymm3
+; AVX2-NEXT:    vmovapd 16(%rbp), %ymm6
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm12, %ymm7, %ymm4
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm7 = xmm2[3,1,2,3]
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpsrad $31, %xmm7, %xmm7
+; AVX2-NEXT:    vpmovsxdq %xmm7, %ymm7
+; AVX2-NEXT:    vmaskmovpd 224(%rsi), %ymm7, %ymm10
+; AVX2-NEXT:    vblendvpd %ymm7, %ymm10, %ymm6, %ymm6
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vmaskmovpd (%rsi), %ymm0, %ymm7
+; AVX2-NEXT:    vblendvpd %ymm0, %ymm7, %ymm1, %ymm0
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT:    vpmovsxdq %xmm1, %ymm1
+; AVX2-NEXT:    vmaskmovpd 128(%rsi), %ymm1, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm1, %ymm2, %ymm5, %ymm1
+; AVX2-NEXT:    vmovapd %ymm1, 128(%rdi)
+; AVX2-NEXT:    vmovapd %ymm0, (%rdi)
+; AVX2-NEXT:    vmovapd %ymm6, 224(%rdi)
+; AVX2-NEXT:    vmovapd %ymm4, 192(%rdi)
+; AVX2-NEXT:    vmovapd %ymm3, 160(%rdi)
+; AVX2-NEXT:    vmovapd %ymm11, 96(%rdi)
+; AVX2-NEXT:    vmovapd %ymm9, 64(%rdi)
+; AVX2-NEXT:    vmovapd %ymm8, 32(%rdi)
+; AVX2-NEXT:    movq %rdi, %rax
+; AVX2-NEXT:    movq %rbp, %rsp
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; SKX-LABEL: test_load_32f64:
+; SKX:       ## BB#0:
+; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
+; SKX-NEXT:    vpmovb2m %ymm0, %k1
+; SKX-NEXT:    vmovupd (%rdi), %zmm1 {%k1}
+; SKX-NEXT:    kshiftrd $16, %k1, %k2
+; SKX-NEXT:    vmovupd 128(%rdi), %zmm3 {%k2}
+; SKX-NEXT:    kshiftrw $8, %k1, %k1
+; SKX-NEXT:    vmovupd 64(%rdi), %zmm2 {%k1}
+; SKX-NEXT:    kshiftrw $8, %k2, %k1
+; SKX-NEXT:    vmovupd 192(%rdi), %zmm4 {%k1}
+; SKX-NEXT:    vmovaps %zmm1, %zmm0
+; SKX-NEXT:    vmovaps %zmm2, %zmm1
+; SKX-NEXT:    vmovaps %zmm3, %zmm2
+; SKX-NEXT:    vmovaps %zmm4, %zmm3
+; SKX-NEXT:    retq
+  %res = call <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
+  ret <32 x double> %res
+}
+declare <32 x double> @llvm.masked.load.v32f64(<32 x double>* %ptrs, i32, <32 x i1> %mask, <32 x double> %src0)
diff --git a/test/CodeGen/X86/materialize.ll b/test/CodeGen/X86/materialize.ll
new file mode 100644
index 000000000000..695bf0fa5b98
--- /dev/null
+++ b/test/CodeGen/X86/materialize.ll
@@ -0,0 +1,184 @@
+; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
+; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
+
+define i32 @one32_nooptsize() {
+entry:
+  ret i32 1
+
+; When not optimizing for size, use mov.
+; CHECK32-LABEL: one32_nooptsize:
+; CHECK32:       movl $1, %eax
+; CHECK32-NEXT:  retl
+; CHECK64-LABEL: one32_nooptsize:
+; CHECK64:       movl $1, %eax
+; CHECK64-NEXT:  retq
+}
+
+define i32 @one32() optsize {
+entry:
+  ret i32 1
+
+; CHECK32-LABEL: one32:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  incl %eax
+; CHECK32-NEXT:  retl
+
+; FIXME: Figure out the best approach in 64-bit mode.
+; CHECK64-LABEL: one32:
+; CHECK64:       movl $1, %eax
+; CHECK64-NEXT:  retq
+}
+
+define i32 @one32_minsize() minsize {
+entry:
+  ret i32 1
+
+; On 32-bit, xor-inc is preferred over push-pop.
+; CHECK32-LABEL: one32_minsize:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  incl %eax
+; CHECK32-NEXT:  retl
+
+; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
+; pop into a 64-bit register even when we just need 32 bits.
+; CHECK64-LABEL: one32_minsize:
+; CHECK64:       pushq $1
+; CHECK64:       .cfi_adjust_cfa_offset 8
+; CHECK64:       popq %rax
+; CHECK64:       .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT:  retq
+}
+
+define i64 @one64_minsize() minsize {
+entry:
+  ret i64 1
+; On 64-bit we don't do xor-inc yet, so push-pop it is.
+; CHECK64-LABEL: one64_minsize:
+; CHECK64:       pushq $1
+; CHECK64:       .cfi_adjust_cfa_offset 8
+; CHECK64:       popq %rax
+; CHECK64:       .cfi_adjust_cfa_offset -8
+; CHECK64-NEXT:  retq
+
+; On Win64 we can't adjust the stack unless there's a frame pointer.
+; CHECKWIN64-LABEL: one64_minsize:
+; CHECKWIN64:       movl $1, %eax
+; CHECKWIN64-NEXT:  retq
+}
+
+define i32 @minus_one32() optsize {
+entry:
+  ret i32 -1
+
+; CHECK32-LABEL: minus_one32:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  decl %eax
+; CHECK32-NEXT:  retl
+}
+
+define i32 @minus_one32_minsize() minsize {
+entry:
+  ret i32 -1
+
+; xor-dec is preferred over push-pop.
+; CHECK32-LABEL: minus_one32_minsize:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  decl %eax
+; CHECK32-NEXT:  retl
+}
+
+define i16 @one16() optsize {
+entry:
+  ret i16 1
+
+; CHECK32-LABEL: one16:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  incl %eax
+; CHECK32-NEXT:  retl
+}
+
+define i16 @minus_one16() optsize {
+entry:
+  ret i16 -1
+
+; CHECK32-LABEL: minus_one16:
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  decl %eax
+; CHECK32-NEXT:  retl
+}
+
+define i32 @minus_five32() minsize {
+entry:
+  ret i32 -5
+
+; CHECK32-LABEL: minus_five32:
+; CHECK32: pushl $-5
+; CHECK32: popl %eax
+; CHECK32: retl
+}
+
+define i64 @minus_five64() minsize {
+entry:
+  ret i64 -5
+
+; CHECK64-LABEL: minus_five64:
+; CHECK64: pushq $-5
+; CHECK64:       .cfi_adjust_cfa_offset 8
+; CHECK64: popq %rax
+; CHECK64:       .cfi_adjust_cfa_offset -8
+; CHECK64: retq
+}
+
+define i32 @rematerialize_minus_one() optsize {
+entry:
+  ; Materialize -1 (thiscall forces it into %ecx).
+  tail call x86_thiscallcc void @f(i32 -1)
+
+  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+  ; spilling it to the stack.
+  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+  ; -1 should be re-materialized here instead of getting spilled above.
+  ret i32 -1
+
+; CHECK32-LABEL: rematerialize_minus_one
+; CHECK32:       xorl %ecx, %ecx
+; CHECK32-NEXT:  decl %ecx
+; CHECK32:       calll
+; CHECK32:       xorl %eax, %eax
+; CHECK32-NEXT:  decl %eax
+; CHECK32-NOT:   %eax
+; CHECK32:       retl
+}
+
+define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
+entry:
+  ; Materialize -1 (thiscall forces it into %ecx).
+  tail call x86_thiscallcc void @f(i32 -1)
+
+  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
+  ; spilling it to the stack.
+  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
+
+  ; Define eflags.
+  %a = icmp ne i32 %x, 123
+  %b = zext i1 %a to i32
+  ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
+  ; It must therefore not use the xor-dec lowering.
+  %c = select i1 %a, i32 %b, i32 -1
+  ret i32 %c
+
+; CHECK32-LABEL: rematerialize_minus_one_eflags
+; CHECK32:       xorl %ecx, %ecx
+; CHECK32-NEXT:  decl %ecx
+; CHECK32:       calll
+; CHECK32:       cmpl
+; CHECK32:       setne
+; CHECK32-NOT:   xorl
+; CHECK32:       movl $-1
+; CHECK32:       cmov
+; CHECK32:       retl
+}
+
+declare x86_thiscallcc void @f(i32)
diff --git a/test/CodeGen/X86/mcu-abi.ll b/test/CodeGen/X86/mcu-abi.ll
new file mode 100644
index 000000000000..966fd4521f2d
--- /dev/null
+++ b/test/CodeGen/X86/mcu-abi.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -mtriple=i686-pc-elfiamcu | FileCheck %s
+
+%struct.st12_t = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+; CHECK-LABEL: test_ints:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_ints(i32 %a, i32 %b, i32 %c, i32 %d) #0 {
+entry:
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %c, %r1
+  %r3 = add i32 %d, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_floats:
+; CHECK: addl    %edx, %eax
+; CHECK-NEXT: imull   %ecx, %eax
+; CHECK-NEXT: addl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @test_floats(i32 %a, i32 %b, float %c, float %d) #0 {
+entry:
+  %ci = bitcast float %c to i32
+  %di = bitcast float %d to i32
+  %r1 = add i32 %b, %a
+  %r2 = mul i32 %ci, %r1
+  %r3 = add i32 %di, %r2
+  ret i32 %r3
+}
+
+; CHECK-LABEL: test_doubles:
+; CHECK: addl    4(%esp), %eax
+; CHECK-NEXT: adcl    8(%esp), %edx
+; CHECK-NEXT: retl
+define double @test_doubles(double %d1, double %d2) #0 {
+entry:
+    %d1i = bitcast double %d1 to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %d1i, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: test_mixed_doubles:
+; CHECK: addl    %ecx, %eax
+; CHECK-NEXT: adcl    $0, %edx
+; CHECK-NEXT: retl
+define double @test_mixed_doubles(double %d2, i32 %i) #0 {
+entry:
+    %iext = zext i32 %i to i64
+    %d2i = bitcast double %d2 to i64
+    %r = add i64 %iext, %d2i
+    %rd = bitcast i64 %r to double
+    ret double %rd
+}
+
+; CHECK-LABEL: ret_large_struct:
+; CHECK: pushl   %esi
+; CHECK-NEXT: movl    %eax, %esi
+; CHECK-NEXT: leal    8(%esp), %edx
+; CHECK-NEXT: movl    $48, %ecx
+; CHECK-NEXT: calll   memcpy
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: popl    %esi
+; CHECK-NOT:  retl $4
+; CHECK-NEXT: retl
+define void @ret_large_struct(%struct.st12_t* noalias nocapture sret %agg.result, %struct.st12_t* byval nocapture readonly align 4 %r) #0 {
+entry:
+  %0 = bitcast %struct.st12_t* %agg.result to i8*
+  %1 = bitcast %struct.st12_t* %r to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 48, i32 1, i1 false)
+  ret void
+}
+
+; CHECK-LABEL: var_args:
+; CHECK: movl    4(%esp), %eax
+; CHECK-NEXT: retl
+define i32 @var_args(i32 %i1, ...) #0 {
+entry:
+  ret i32 %i1
+}
+
+; CHECK-LABEL: test_lib_args:
+; CHECK: movl %edx, %eax
+; CHECK: calll __fixsfsi
+define i32 @test_lib_args(float %a, float %b) #0 {
+  %ret = fptosi float %b to i32
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test_fp128:
+; CHECK: movl    (%eax), %e[[CX:..]]
+; CHECK-NEXT: movl    4(%eax), %e[[DX:..]]
+; CHECK-NEXT: movl    8(%eax), %e[[SI:..]]
+; CHECK-NEXT: movl    12(%eax), %e[[AX:..]]
+; CHECK-NEXT: movl    %e[[AX]], 12(%esp)
+; CHECK-NEXT: movl    %e[[SI]], 8(%esp)
+; CHECK-NEXT: movl    %e[[DX]], 4(%esp)
+; CHECK-NEXT: movl    %e[[CX]], (%esp)
+; CHECK-NEXT: calll   __fixtfsi
+define i32 @test_fp128(fp128* %ptr) #0 {
+  %v = load fp128, fp128* %ptr
+  %ret = fptosi fp128 %v to i32
+  ret i32 %ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #1
+
+attributes #0 = { nounwind "use-soft-float"="true"}
+attributes #1 = { nounwind argmemonly }
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 0111c0d433f1..7ef61c9a677b 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -3,6 +3,8 @@
 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
 ; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
 ; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s -check-prefix=NHM_64
+
 
 @.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
 @.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
@@ -179,13 +181,25 @@ entry:
 ; NOSSE: movl $2021161080
 ; NOSSE: movl $2021161080
 
+;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first
+;;; store overlap with the second load and second store respectively.
+;;;
+;;; Is either of the sequences ideal?
+
 ; X86-64-LABEL: t4:
-; X86-64: movabsq $8680820740569200760, %rax
-; X86-64: movq %rax
-; X86-64: movq %rax
-; X86-64: movq %rax
-; X86-64: movw $120
-; X86-64: movl $2021161080
+; X86-64: movabsq  $33909456017848440, %rax ## imm = 0x78787878787878
+; X86-64: movq     %rax, -10(%rsp)
+; X86-64: movabsq  $8680820740569200760, %rax ## imm = 0x7878787878787878
+; X86-64: movq     %rax, -16(%rsp)
+; X86-64: movq     %rax, -24(%rsp)
+; X86-64: movq     %rax, -32(%rsp)
+
+; NHM_64-LABEL: t4:
+; NHM_64: movups   _.str2+14(%rip), %xmm0
+; NHM_64: movups   %xmm0, -26(%rsp)
+; NHM_64: movups   _.str2(%rip), %xmm0
+; NHM_64: movaps   %xmm0, -40(%rsp)
+
   %tmp1 = alloca [30 x i8]
   %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index f582571252b5..4351014192bb 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* nocapture, i8 addrspace(256)* nocapture, i64, i32, i1) nounwind
 
 
 ; Variable memcpy's should lower to calls.
@@ -59,6 +60,26 @@ entry:
 ; DARWIN: movq
 }
 
+define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize noredzone {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX-LABEL: test3_minsize:
+; LINUX: memcpy
+
+; DARWIN-LABEL: test3_minsize:
+; DARWIN: memcpy
+}
+
+define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind optsize minsize noredzone {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX-LABEL: test3_minsize_optsize:
+; LINUX: memcpy
+
+; DARWIN-LABEL: test3_minsize_optsize:
+; DARWIN: memcpy
+}
+
 ; Large constant memcpy's should be inlined when not optimizing for size.
 define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
 entry:
@@ -118,3 +139,15 @@ define void @PR15348(i8* %a, i8* %b) {
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 17, i32 0, i1 false)
   ret void
 }
+
+; Memcpys from / to address space 256 should be lowered to appropriate loads /
+; stores if small enough.
+define void @addrspace256(i8 addrspace(256)* %a, i8 addrspace(256)* %b) nounwind {
+  tail call void @llvm.memcpy.p256i8.p256i8.i64(i8 addrspace(256)* %a, i8 addrspace(256)* %b, i64 16, i32 8, i1 false)
+  ret void
+; LINUX-LABEL: addrspace256:
+; LINUX: movq %gs:
+; LINUX: movq %gs:
+; LINUX: movq {{.*}}, %gs:
+; LINUX: movq {{.*}}, %gs:
+}
diff --git a/test/CodeGen/X86/merge-store-partially-alias-loads.ll b/test/CodeGen/X86/merge-store-partially-alias-loads.ll
new file mode 100644
index 000000000000..8e148aa76d38
--- /dev/null
+++ b/test/CodeGen/X86/merge-store-partially-alias-loads.ll
@@ -0,0 +1,52 @@
+; REQUIRES: asserts
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck -check-prefix=X86 %s
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -debug-only=isel < %s 2>&1 | FileCheck -check-prefix=DBGDAG %s
+
+; It's OK to merge the load / store of the first 2 components, but
+; they must not be placed on the same chain after merging.
+
+; X86-LABEL: {{^}}merge_store_partial_overlap_load:
+; X86-DAG: movw ([[BASEREG:%[a-z]+]]), [[LO2:%[a-z]+]]
+; X86-DAG: movb 2([[BASEREG]]), [[HI1:%[a-z]+]]
+
+; X86-NEXT: movw [[LO2]], 1([[BASEREG]])
+; X86-NEXT: movb [[HI1]], 3([[BASEREG]])
+; X86-NEXT: retq
+
+; DBGDAG-LABEL: Optimized lowered selection DAG: BB#0 'merge_store_partial_overlap_load:'
+; DBGDAG: [[ENTRYTOKEN:t[0-9]+]]: ch = EntryToken
+; DBGDAG-DAG: [[BASEPTR:t[0-9]+]]: i64,ch = CopyFromReg [[ENTRYTOKEN]],
+; DBGDAG-DAG: [[ADDPTR:t[0-9]+]]: i64 = add [[BASEPTR]], Constant:i64<2>
+
+; DBGDAG-DAG: [[LD2:t[0-9]+]]: i16,ch = load<LD2[%tmp81](align=1)> [[ENTRYTOKEN]], [[BASEPTR]], undef:i64
+; DBGDAG-DAG: [[LD1:t[0-9]+]]: i8,ch = load<LD1[%tmp12]> [[ENTRYTOKEN]], [[ADDPTR]], undef:i64
+
+; DBGDAG: [[LOADTOKEN:t[0-9]+]]: ch = TokenFactor [[LD2]]:1, [[LD1]]:1
+
+; DBGDAG-DAG: [[ST2:t[0-9]+]]: ch = store<ST2[%tmp10](align=1)> [[LOADTOKEN]], [[LD2]], t{{[0-9]+}}, undef:i64
+; DBGDAG-DAG: [[ST1:t[0-9]+]]: ch = store<ST1[%tmp14]> [[ST2]], [[LD1]], t{{[0-9]+}}, undef:i64
+; DBGDAG: X86ISD::RET_FLAG [[ST1]],
+
+; DBGDAG: Type-legalized selection DAG: BB#0 'merge_store_partial_overlap_load:'
+define void @merge_store_partial_overlap_load([4 x i8]* %tmp) {
+  %tmp8 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 0
+  %tmp10 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 1
+  %tmp12 = getelementptr inbounds [4 x i8], [4 x i8]* %tmp, i32 0, i8 2
+  %tmp14 = getelementptr [4 x i8], [4 x i8]* %tmp, i32 0, i8 3
+
+  %tmp9 = load i8, i8* %tmp8, align 1   ; base + 0
+  %tmp11 = load i8, i8* %tmp10, align 1 ; base + 1
+  %tmp13 = load i8, i8* %tmp12, align 1 ; base + 2
+
+  store i8 %tmp9, i8* %tmp10, align 1   ; base + 1
+  store i8 %tmp11, i8* %tmp12, align 1  ; base + 2
+  store i8 %tmp13, i8* %tmp14, align 1  ; base + 3
+
+; Should emit
+; load base + 0, base + 1
+; store base + 1, base + 2
+; load base + 2
+; store base + 3
+
+  ret void
+}
diff --git a/test/CodeGen/X86/misched-code-difference-with-debug.ll b/test/CodeGen/X86/misched-code-difference-with-debug.ll
index 0f1f382c49a8..0a1ea830a41d 100644
--- a/test/CodeGen/X86/misched-code-difference-with-debug.ll
+++ b/test/CodeGen/X86/misched-code-difference-with-debug.ll
@@ -43,7 +43,7 @@ entry:
 ; CHECK-LABEL: test_with_debug
 ; CHECK: movl [[A]], [[B]]
 ; CHECK-NEXT: movl [[A]], [[C]]
-define void @test_with_debug() {
+define void @test_with_debug() !dbg !13 {
 entry:
   %c = alloca %class.C, align 1
   %0 = load i8, i8* @argc, align 1
@@ -62,26 +62,26 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !20, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !20, imports: !2)
 !1 = !DIFile(filename: "test.cpp", directory: "")
 !2 = !{}
 !3 = !{!4}
 !4 = !DICompositeType(tag: DW_TAG_class_type, name: "C", line: 2, size: 8, align: 8, file: !1, elements: !5, identifier: "_ZTS1C")
 !5 = !{!6}
-!6 = !DISubprogram(name: "test", file: !1, scope: !"_ZTS1C", type: !7)
+!6 = !DISubprogram(name: "test", file: !1, scope: !"_ZTS1C", type: !7, isDefinition: false)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !10, !11, !11, !11, null}
 !9 = !DIBasicType(encoding: DW_ATE_signed, size: 32, align: 32, name: "int")
 !10 = !DIDerivedType(baseType: !"_ZTS1C", tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !12 = !{!13}
-!13 = !DISubprogram(name: "test_with_debug", linkageName: "test_with_debug", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !14, type: !15, function: void ()* @test_with_debug, variables: !17)
+!13 = distinct !DISubprogram(name: "test_with_debug", linkageName: "test_with_debug", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !14, type: !15, variables: !17)
 !14 = !DIFile(filename: "test.cpp", directory: "")
 !15 = !DISubroutineType(types: !16)
 !16 = !{null}
 !17 = !{!18, !19}
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 7, scope: !13, file: !14, type: !"_ZTS1C")
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "lc", line: 8, scope: !13, file: !14, type: !11)
+!18 = !DILocalVariable(name: "c", line: 7, scope: !13, file: !14, type: !"_ZTS1C")
+!19 = !DILocalVariable(name: "lc", line: 8, scope: !13, file: !14, type: !11)
 !20 = !{!21}
 !21 = !DIGlobalVariable(name: "argc", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !11, variable: i8* @argc)
 !22 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
index 2727e3eb0280..9841381b560d 100644
--- a/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
+++ b/test/CodeGen/X86/mmx-arg-passing-x86-64.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
 ;
 ; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
diff --git a/test/CodeGen/X86/mmx-arg-passing.ll b/test/CodeGen/X86/mmx-arg-passing.ll
index 4e0031076200..67ccb9e32dde 100644
--- a/test/CodeGen/X86/mmx-arg-passing.ll
+++ b/test/CodeGen/X86/mmx-arg-passing.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | FileCheck %s --check-prefix=X86-32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s --check-prefix=X86-64
 ;
diff --git a/test/CodeGen/X86/mmx-coalescing.ll b/test/CodeGen/X86/mmx-coalescing.ll
new file mode 100644
index 000000000000..a515e5ee3754
--- /dev/null
+++ b/test/CodeGen/X86/mmx-coalescing.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+
+%SA = type <{ %union.anon, i32, [4 x i8], i8*, i8*, i8*, i32, [4 x i8] }>
+%union.anon = type { <1 x i64> }
+
+; Check that extra movd (copy) instructions aren't generated.
+
+define i32 @test(%SA* %pSA, i16* %A, i32 %B, i32 %C, i32 %D, i8* %E) {
+entry:
+; CHECK-LABEL: test
+; CHECK:       # BB#0:
+; CHECK-NEXT:  pshufw
+; CHECK-NEXT:  movd
+; CHECK-NOT:  movd
+; CHECK-NEXT:  testl
+  %shl = shl i32 1, %B
+  %shl1 = shl i32 %C, %B
+  %shl2 = shl i32 1, %D
+  %v = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 0, i32 0
+  %v0 = load <1 x i64>, <1 x i64>* %v, align 8
+  %SA0 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 1
+  %v1 = load i32, i32* %SA0, align 4
+  %SA1 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 3
+  %v2 = load i8*, i8** %SA1, align 8
+  %SA2 = getelementptr inbounds %SA, %SA* %pSA, i64 0, i32 4
+  %v3 = load i8*, i8** %SA2, align 8
+  %v4 = bitcast <1 x i64> %v0 to <4 x i16>
+  %v5 = bitcast <4 x i16> %v4 to x86_mmx
+  %v6 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v5, i8 -18)
+  %v7 = bitcast x86_mmx %v6 to <4 x i16>
+  %v8 = bitcast <4 x i16> %v7 to <1 x i64>
+  %v9 = extractelement <1 x i64> %v8, i32 0
+  %v10 = bitcast i64 %v9 to <2 x i32>
+  %v11 = extractelement <2 x i32> %v10, i32 0
+  %cmp = icmp eq i32 %v11, 0
+  br i1 %cmp, label %if.A, label %if.B
+
+if.A:
+; CHECK: %if.A
+; CHECK-NEXT:  movd
+; CHECK-NEXT:  psllq
+  %pa = phi <1 x i64> [ %v8, %entry ], [ %vx, %if.C ]
+  %v17 = extractelement <1 x i64> %pa, i32 0
+  %v18 = bitcast i64 %v17 to x86_mmx
+  %v19 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %v18, i32 %B) #2
+  %v20 = bitcast x86_mmx %v19 to i64
+  %v21 = insertelement <1 x i64> undef, i64 %v20, i32 0
+  %cmp3 = icmp eq i64 %v20, 0
+  br i1 %cmp3, label %if.C, label %merge
+
+if.B:
+  %v34 = bitcast <1 x i64> %v8 to <4 x i16>
+  %v35 = bitcast <4 x i16> %v34 to x86_mmx
+  %v36 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v35, i8 -18)
+  %v37 = bitcast x86_mmx %v36 to <4 x i16>
+  %v38 = bitcast <4 x i16> %v37 to <1 x i64>
+  br label %if.C
+
+if.C:
+  %vx = phi <1 x i64> [ %v21, %if.A ], [ %v38, %if.B ]
+  %cvt = bitcast <1 x i64> %vx to <2 x i32>
+  %ex = extractelement <2 x i32> %cvt, i32 0
+  %cmp2 = icmp eq i32 %ex, 0
+  br i1 %cmp2, label %if.A, label %merge
+
+merge:
+; CHECK: %merge
+; CHECK-NOT:  movd
+; CHECK-NEXT:  pshufw
+  %vy = phi <1 x i64> [ %v21, %if.A ], [ %vx, %if.C ]
+  %v130 = bitcast <1 x i64> %vy to <4 x i16>
+  %v131 = bitcast <4 x i16> %v130 to x86_mmx
+  %v132 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %v131, i8 -18)
+  %v133 = bitcast x86_mmx %v132 to <4 x i16>
+  %v134 = bitcast <4 x i16> %v133 to <1 x i64>
+  %v135 = extractelement <1 x i64> %v134, i32 0
+  %v136 = bitcast i64 %v135 to <2 x i32>
+  %v137 = extractelement <2 x i32> %v136, i32 0
+  ret i32 %v137
+}
+
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8)
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
diff --git a/test/CodeGen/X86/mmx-intrinsics.ll b/test/CodeGen/X86/mmx-intrinsics.ll
index 39d481b16e7a..7647fccb5803 100644
--- a/test/CodeGen/X86/mmx-intrinsics.ll
+++ b/test/CodeGen/X86/mmx-intrinsics.ll
@@ -1,12 +1,13 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s
-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64
 
 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddw
+; ALL-LABEL: @test1
+; ALL: phaddw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -22,7 +23,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtd
+; ALL-LABEL: @test88
+; ALL: pcmpgtd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -38,7 +40,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtw
+; ALL-LABEL: @test87
+; ALL: pcmpgtw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -54,7 +57,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpgtb
+; ALL-LABEL: @test86
+; ALL: pcmpgtb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -70,7 +74,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqd
+; ALL-LABEL: @test85
+; ALL: pcmpeqd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -86,7 +91,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqw
+; ALL-LABEL: @test84
+; ALL: pcmpeqw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -102,7 +108,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pcmpeqb
+; ALL-LABEL: @test83
+; ALL: pcmpeqb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -118,7 +125,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckldq
+; ALL-LABEL: @test82
+; X86: punpckldq {{.*#+}} mm0 = mm0[0],mem[0]
+; X64: punpckldq {{.*#+}} mm0 = mm0[0],mm1[0]
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -134,7 +143,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpcklwd
+; ALL-LABEL: @test81
+; X86: punpcklwd {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1]
+; X64: punpcklwd {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1]
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -150,7 +161,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpcklbw
+; ALL-LABEL: @test80
+; X86: punpcklbw {{.*#+}} mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3]
+; X64: punpcklbw {{.*#+}} mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -166,7 +179,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhdq
+; ALL-LABEL: @test79
+; X86: punpckhdq {{.*#+}} mm0 = mm0[1],mem[1]
+; X64: punpckhdq {{.*#+}} mm0 = mm0[1],mm1[1]
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -182,7 +197,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhwd
+; ALL-LABEL: @test78
+; X86: punpckhwd {{.*#+}} mm0 = mm0[2],mem[2],mm0[3],mem[3]
+; X64: punpckhwd {{.*#+}} mm0 = mm0[2],mm1[2],mm0[3],mm1[3]
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -198,7 +215,9 @@ entry:
 declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: punpckhbw
+; ALL-LABEL: @test77
+; X86: punpckhbw {{.*#+}} mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7]
+; X64: punpckhbw {{.*#+}} mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7]
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -214,7 +233,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packuswb
+; ALL-LABEL: @test76
+; ALL: packuswb
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -230,7 +250,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packssdw
+; ALL-LABEL: @test75
+; ALL: packssdw
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -246,7 +267,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: packsswb
+; ALL-LABEL: @test74
+; ALL: packsswb
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -262,7 +284,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
 
 define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrad
+; ALL-LABEL: @test73
+; ALL: psrad
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -276,7 +299,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
 
 define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psraw
+; ALL-LABEL: @test72
+; ALL: psraw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -290,7 +314,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
 
 define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrlq
+; ALL-LABEL: @test71
+; ALL: psrlq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -302,7 +327,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
 
 define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrld
+; ALL-LABEL: @test70
+; ALL: psrld
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -316,7 +342,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
 
 define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psrlw
+; ALL-LABEL: @test69
+; ALL: psrlw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -330,7 +357,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
 
 define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psllq
+; ALL-LABEL: @test68
+; ALL: psllq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -342,7 +370,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
 
 define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pslld
+; ALL-LABEL: @test67
+; ALL: pslld
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -356,7 +385,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
 
 define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: psllw
+; ALL-LABEL: @test66
+; ALL: psllw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -370,7 +400,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrad
+; ALL-LABEL: @test65
+; ALL: psrad
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -386,7 +417,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psraw
+; ALL-LABEL: @test64
+; ALL: psraw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -402,7 +434,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrlq
+; ALL-LABEL: @test63
+; ALL: psrlq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -416,7 +449,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrld
+; ALL-LABEL: @test62
+; ALL: psrld
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -432,7 +466,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psrlw
+; ALL-LABEL: @test61
+; ALL: psrlw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -448,7 +483,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psllq
+; ALL-LABEL: @test60
+; ALL: psllq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var.i = bitcast i64 %0 to x86_mmx
@@ -462,7 +498,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pslld
+; ALL-LABEL: @test59
+; ALL: pslld
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
@@ -478,7 +515,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psllw
+; ALL-LABEL: @test58
+; ALL: psllw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
@@ -494,7 +532,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pxor
+; ALL-LABEL: @test56
+; ALL: pxor
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -510,7 +549,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: por
+; ALL-LABEL: @test55
+; ALL: por
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -526,7 +566,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pandn
+; ALL-LABEL: @test54
+; ALL: pandn
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -542,7 +583,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pand
+; ALL-LABEL: @test53
+; ALL: pand
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -558,7 +600,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmullw
+; ALL-LABEL: @test52
+; ALL: pmullw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -572,7 +615,8 @@ entry:
 }
 
 define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmullw
+; ALL-LABEL: @test51
+; ALL: pmullw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -588,7 +632,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhw
+; ALL-LABEL: @test50
+; ALL: pmulhw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -604,7 +649,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaddwd
+; ALL-LABEL: @test49
+; ALL: pmaddwd
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -620,7 +666,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubusw
+; ALL-LABEL: @test48
+; ALL: psubusw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -636,7 +683,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubusb
+; ALL-LABEL: @test47
+; ALL: psubusb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -652,7 +700,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubsw
+; ALL-LABEL: @test46
+; ALL: psubsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -668,7 +717,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubsb
+; ALL-LABEL: @test45
+; ALL: psubsb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -682,7 +732,8 @@ entry:
 }
 
 define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubq
+; ALL-LABEL: @test44
+; ALL: psubq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var = bitcast i64 %0 to x86_mmx
@@ -698,7 +749,8 @@ declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
 declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubd
+; ALL-LABEL: @test43
+; ALL: psubd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -714,7 +766,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubw
+; ALL-LABEL: @test42
+; ALL: psubw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -730,7 +783,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psubb
+; ALL-LABEL: @test41
+; ALL: psubb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -746,7 +800,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddusw
+; ALL-LABEL: @test40
+; ALL: paddusw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -762,7 +817,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddusb
+; ALL-LABEL: @test39
+; ALL: paddusb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -778,7 +834,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddsw
+; ALL-LABEL: @test38
+; ALL: paddsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -794,7 +851,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddsb
+; ALL-LABEL: @test37
+; ALL: paddsb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -810,7 +868,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddq
+; ALL-LABEL: @test36
+; ALL: paddq
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var = bitcast i64 %0 to x86_mmx
@@ -824,7 +883,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddd
+; ALL-LABEL: @test35
+; ALL: paddd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -840,7 +900,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddw
+; ALL-LABEL: @test34
+; ALL: paddw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -856,7 +917,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: paddb
+; ALL-LABEL: @test33
+; ALL: paddb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -872,7 +934,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psadbw
+; ALL-LABEL: @test32
+; ALL: psadbw
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -886,7 +949,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pminsw
+; ALL-LABEL: @test31
+; ALL: pminsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -902,7 +966,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pminub
+; ALL-LABEL: @test30
+; ALL: pminub
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -918,7 +983,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaxsw
+; ALL-LABEL: @test29
+; ALL: pmaxsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -934,7 +1000,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaxub
+; ALL-LABEL: @test28
+; ALL: pmaxub
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -950,7 +1017,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pavgw
+; ALL-LABEL: @test27
+; ALL: pavgw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -966,7 +1034,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pavgb
+; ALL-LABEL: @test26
+; ALL: pavgb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -982,7 +1051,8 @@ entry:
 declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
 
 define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
-; CHECK: movntq
+; ALL-LABEL: @test25
+; ALL: movntq
 entry:
   %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
   %0 = extractelement <1 x i64> %a, i32 0
@@ -994,7 +1064,8 @@ entry:
 declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
 
 define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pmovmskb
+; ALL-LABEL: @test24
+; ALL: pmovmskb
 entry:
   %0 = bitcast <1 x i64> %a to <8 x i8>
   %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
@@ -1005,7 +1076,8 @@ entry:
 declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
 
 define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
-; CHECK: maskmovq
+; ALL-LABEL: @test23
+; ALL: maskmovq
 entry:
   %0 = bitcast <1 x i64> %n to <8 x i8>
   %1 = bitcast <1 x i64> %d to <8 x i8>
@@ -1018,7 +1090,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhuw
+; ALL-LABEL: @test22
+; ALL: pmulhuw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1034,7 +1107,9 @@ entry:
 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
 
 define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pshufw
+; ALL-LABEL: @test21
+; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
+; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1046,9 +1121,10 @@ entry:
 }
 
 define i32 @test21_2(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: test21_2
-; CHECK: pshufw
-; CHECK: movd
+; ALL-LABEL: @test21_2
+; X86: pshufw {{.*#+}} mm0 = mem[3,0,0,0]
+; X64: pshufw {{.*#+}} mm0 = mm0[3,0,0,0]
+; ALL: movd
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1062,7 +1138,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmuludq
+; ALL-LABEL: @test20
+; ALL: pmuludq
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1076,7 +1153,8 @@ entry:
 declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
 
 define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: cvtpi2pd
+; ALL-LABEL: @test19
+; ALL: cvtpi2pd
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %1 = bitcast <2 x i32> %0 to x86_mmx
@@ -1087,7 +1165,8 @@ entry:
 declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
 
 define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
-; CHECK: cvttpd2pi
+; ALL-LABEL: @test18
+; ALL: cvttpd2pi
 entry:
   %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
   %1 = bitcast x86_mmx %0 to <2 x i32>
@@ -1099,7 +1178,8 @@ entry:
 declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
 
 define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
-; CHECK: cvtpd2pi
+; ALL-LABEL: @test17
+; ALL: cvtpd2pi
 entry:
   %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
   %1 = bitcast x86_mmx %0 to <2 x i32>
@@ -1111,7 +1191,8 @@ entry:
 declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
 
 define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: palignr
+; ALL-LABEL: @test16
+; ALL: palignr
 entry:
   %0 = extractelement <1 x i64> %a, i32 0
   %mmx_var = bitcast i64 %0 to x86_mmx
@@ -1125,7 +1206,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
 
 define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsd
+; ALL-LABEL: @test15
+; ALL: pabsd
 entry:
   %0 = bitcast <1 x i64> %a to <2 x i32>
   %1 = bitcast <2 x i32> %0 to x86_mmx
@@ -1139,7 +1221,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
 
 define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsw
+; ALL-LABEL: @test14
+; ALL: pabsw
 entry:
   %0 = bitcast <1 x i64> %a to <4 x i16>
   %1 = bitcast <4 x i16> %0 to x86_mmx
@@ -1153,7 +1236,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
 
 define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
-; CHECK: pabsb
+; ALL-LABEL: @test13
+; ALL: pabsb
 entry:
   %0 = bitcast <1 x i64> %a to <8 x i8>
   %1 = bitcast <8 x i8> %0 to x86_mmx
@@ -1167,7 +1251,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignd
+; ALL-LABEL: @test12
+; ALL: psignd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1183,7 +1268,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignw
+; ALL-LABEL: @test11
+; ALL: psignw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1199,7 +1285,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: psignb
+; ALL-LABEL: @test10
+; ALL: psignb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1215,7 +1302,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pshufb
+; ALL-LABEL: @test9
+; ALL: pshufb
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1231,7 +1319,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmulhrsw
+; ALL-LABEL: @test8
+; ALL: pmulhrsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1247,7 +1336,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: pmaddubsw
+; ALL-LABEL: @test7
+; ALL: pmaddubsw
 entry:
   %0 = bitcast <1 x i64> %b to <8 x i8>
   %1 = bitcast <1 x i64> %a to <8 x i8>
@@ -1263,7 +1353,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubsw
+; ALL-LABEL: @test6
+; ALL: phsubsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1279,7 +1370,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubd
+; ALL-LABEL: @test5
+; ALL: phsubd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1295,7 +1387,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phsubw
+; ALL-LABEL: @test4
+; ALL: phsubw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1311,7 +1404,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddsw
+; ALL-LABEL: @test3
+; ALL: phaddsw
 entry:
   %0 = bitcast <1 x i64> %b to <4 x i16>
   %1 = bitcast <1 x i64> %a to <4 x i16>
@@ -1327,7 +1421,8 @@ entry:
 declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
 
 define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
-; CHECK: phaddd
+; ALL-LABEL: @test2
+; ALL: phaddd
 entry:
   %0 = bitcast <1 x i64> %b to <2 x i32>
   %1 = bitcast <1 x i64> %a to <2 x i32>
@@ -1341,16 +1436,18 @@ entry:
 }
 
 define <4 x float> @test89(<4 x float> %a, x86_mmx %b) nounwind {
-; CHECK: cvtpi2ps
+; ALL-LABEL: @test89
+; ALL: cvtpi2ps
   %c = tail call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a, x86_mmx %b)
   ret <4 x float> %c
 }
 
 declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
 
-; CHECK-LABEL: test90
+; ALL-LABEL: test90
 define void @test90() {
-; CHECK: emms
+; ALL-LABEL: @test90
+; ALL: emms
   call void @llvm.x86.mmx.emms()
   ret void
 }
diff --git a/test/CodeGen/X86/mmx-only.ll b/test/CodeGen/X86/mmx-only.ll
new file mode 100644
index 000000000000..35598d5f6e19
--- /dev/null
+++ b/test/CodeGen/X86/mmx-only.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s
+
+; Test that turning off sse doesn't turn off mmx.
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+; CHECK-LABEL: @test88
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
diff --git a/test/CodeGen/X86/movntdq-no-avx.ll b/test/CodeGen/X86/movntdq-no-avx.ll
index cc35e201e6b3..2bf09dd6f581 100644
--- a/test/CodeGen/X86/movntdq-no-avx.ll
+++ b/test/CodeGen/X86/movntdq-no-avx.ll
@@ -5,7 +5,7 @@
 
 define void @test(<2 x i64>* nocapture %a, <2 x i64> %b) nounwind optsize {
 entry:
-  store <2 x i64> %b, <2 x i64>* %a, align 16, !nontemporal !0
+  store <2 x i64> %b, <2 x i64>* %a, align 32, !nontemporal !0
   ret void
 }
 
diff --git a/test/CodeGen/X86/movpc32-check.ll b/test/CodeGen/X86/movpc32-check.ll
new file mode 100644
index 000000000000..606af3c898f4
--- /dev/null
+++ b/test/CodeGen/X86/movpc32-check.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -relocation-model=pic | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-pc-linux"
+
+; Function Attrs: nounwind
+define void @test() #0 !dbg !4 {
+entry:
+  call void bitcast (void (...)* @bar to void ()*)(), !dbg !11
+  ret void, !dbg !12
+}
+
+declare void @bar(...) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="i686" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git 3490ab8630d5643f71f1f04e46984f05b27b8d67) (http://llvm.org/git/llvm.git d2643e2ff955ed234944fe3c6b4ffc1250085843)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "movpc-test")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git 3490ab8630d5643f71f1f04e46984f05b27b8d67) (http://llvm.org/git/llvm.git d2643e2ff955ed234944fe3c6b4ffc1250085843)"}
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 1, scope: !4)
+
+; CHECK: calll .L0$pb
+; CHECK-NEXT: .Ltmp3:
+; CHECK-NEXT: .cfi_adjust_cfa_offset 4
+; CHECK-NEXT: .L0$pb:
+; CHECK-NEXT: popl
+; CHECK-NEXT: .Ltmp4:
+; CHECK-NEXT: .cfi_adjust_cfa_offset -4
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll
index b02f9ec45e7f..de4c87cf30ad 100644
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED 
+; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
 
 %class.Class = type { i32 }
 %struct.s = type { i64 }
@@ -357,3 +357,26 @@ entry:
   call void @good(i32 9, i32 10, i32 11, i32 12)
   ret void
 }
+
+; Make sure the add does not prevent folding loads into pushes.
+; val1 and val2 will not be folded into pushes since they have
+; an additional use, but val3 should be.
+; NORMAL-LABEL: test13:
+; NORMAL: movl ([[P1:%e..]]), [[V1:%e..]]
+; NORMAL-NEXT: movl ([[P2:%e..]]), [[V2:%e..]]
+; NORMAL-NEXT: , [[ADD:%e..]]
+; NORMAL-NEXT: pushl [[ADD]]
+; NORMAL-NEXT: pushl ([[P3:%e..]])
+; NORMAL-NEXT: pushl [[V2]]
+; NORMAL-NEXT: pushl [[V1]]
+; NORMAL-NEXT: calll _good
+; NORMAL: movl [[P3]], %eax
+define i32* @test13(i32* inreg %ptr1, i32* inreg %ptr2, i32* inreg %ptr3) optsize {
+entry:
+  %val1 = load i32, i32* %ptr1
+  %val2 = load i32, i32* %ptr2
+  %val3 = load i32, i32* %ptr3
+  %add = add i32 %val1, %val2
+  call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
+  ret i32* %ptr3
+}
diff --git a/test/CodeGen/X86/mult-alt-x86.ll b/test/CodeGen/X86/mult-alt-x86.ll
index 5174f85adb9f..1c83fedad3ce 100644
--- a/test/CodeGen/X86/mult-alt-x86.ll
+++ b/test/CodeGen/X86/mult-alt-x86.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as
 ; ModuleID = 'mult-alt-x86.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i686-pc-win32"
diff --git a/test/CodeGen/X86/musttail-varargs.ll b/test/CodeGen/X86/musttail-varargs.ll
index 3613f4c08cce..247d78776b80 100644
--- a/test/CodeGen/X86/musttail-varargs.ll
+++ b/test/CodeGen/X86/musttail-varargs.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux | FileCheck %s --check-prefix=LINUX
+; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-linux-gnux32 | FileCheck %s --check-prefix=LINUX-X32
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=x86_64-windows | FileCheck %s --check-prefix=WINDOWS
 ; RUN: llc < %s -enable-tail-merge=0 -mtriple=i686-windows | FileCheck %s --check-prefix=X86
 
@@ -57,6 +58,40 @@ define void @f_thunk(i8* %this, ...) {
 ; LINUX-DAG: movb {{.*}}, %al
 ; LINUX: jmpq *{{.*}}  # TAILCALL
 
+; LINUX-X32-LABEL: f_thunk:
+; LINUX-X32-DAG: movl %edi, {{.*}}
+; LINUX-X32-DAG: movq %rsi, {{.*}}
+; LINUX-X32-DAG: movq %rdx, {{.*}}
+; LINUX-X32-DAG: movq %rcx, {{.*}}
+; LINUX-X32-DAG: movq %r8, {{.*}}
+; LINUX-X32-DAG: movq %r9, {{.*}}
+; LINUX-X32-DAG: movb %al, {{.*}}
+; LINUX-X32-DAG: movaps %xmm0, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm1, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm2, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm3, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm4, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm5, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm6, {{[0-9]*}}(%esp)
+; LINUX-X32-DAG: movaps %xmm7, {{[0-9]*}}(%esp)
+; LINUX-X32: callq get_f
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm0
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm1
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm2
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm3
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm4
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm5
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm6
+; LINUX-X32-DAG: movaps {{[0-9]*}}(%esp), %xmm7
+; LINUX-X32-DAG: movl {{.*}}, %edi
+; LINUX-X32-DAG: movq {{.*}}, %rsi
+; LINUX-X32-DAG: movq {{.*}}, %rdx
+; LINUX-X32-DAG: movq {{.*}}, %rcx
+; LINUX-X32-DAG: movq {{.*}}, %r8
+; LINUX-X32-DAG: movq {{.*}}, %r9
+; LINUX-X32-DAG: movb {{.*}}, %al
+; LINUX-X32: jmpq *{{.*}}  # TAILCALL
+
 ; WINDOWS-LABEL: f_thunk:
 ; WINDOWS-NOT: mov{{.}}ps
 ; WINDOWS-DAG: movq %rdx, {{.*}}
@@ -92,6 +127,10 @@ define void @g_thunk(i8* %fptr_i8, ...) {
 ; LINUX-NOT: movq
 ; LINUX: jmpq *%rdi  # TAILCALL
 
+; LINUX-X32-LABEL: g_thunk:
+; LINUX-X32-DAG: movl %edi, %[[REG:e[abcd]x|ebp|esi|edi|r8|r9|r1[0-5]]]
+; LINUX-X32-DAG: jmpq *%[[REG]]  # TAILCALL
+
 ; WINDOWS-LABEL: g_thunk:
 ; WINDOWS-NOT: movq
 ; WINDOWS: jmpq *%rcx # TAILCALL
@@ -130,6 +169,10 @@ else:
 ; LINUX: jne
 ; LINUX: jmpq *{{.*}} # TAILCALL
 ; LINUX: jmpq *{{.*}} # TAILCALL
+; LINUX-X32-LABEL: h_thunk:
+; LINUX-X32: jne
+; LINUX-X32: jmpq *{{.*}} # TAILCALL
+; LINUX-X32: jmpq *{{.*}} # TAILCALL
 ; WINDOWS-LABEL: h_thunk:
 ; WINDOWS: jne
 ; WINDOWS: jmpq *{{.*}} # TAILCALL
diff --git a/test/CodeGen/X86/nontemporal-2.ll b/test/CodeGen/X86/nontemporal-2.ll
index 8c08b3c163c0..c9767f88488c 100644
--- a/test/CodeGen/X86/nontemporal-2.ll
+++ b/test/CodeGen/X86/nontemporal-2.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s -check-prefix=CHECK -check-prefix=AVX2
 
 ; Make sure that we generate non-temporal stores for the test cases below.
 ; We use xorps for zeroing, so domain information isn't available anymore.
@@ -300,4 +300,19 @@ define void @test_op_v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8>* %dst) {
   ret void
 }
 
+; 256-bit NT stores require 256-bit alignment.
+; FIXME: For AVX, we could lower this to 2x movntps %xmm. Taken further, we
+; could even scalarize to movnti when we have 1-alignment: nontemporal is
+; probably always worth even some 20 instruction scalarization.
+define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
+; CHECK-LABEL: test_unaligned_v8f32:
+; SSE: movntps %xmm
+; SSE: movntps %xmm
+; AVX-NOT: movnt
+; AVX: vmovups %ymm
+  %r = fadd <8 x float> %a, %b
+  store <8 x float> %r, <8 x float>* %dst, align 16, !nontemporal !1
+  ret void
+}
+
 !1 = !{i32 1}
diff --git a/test/CodeGen/X86/nontemporal.ll b/test/CodeGen/X86/nontemporal.ll
index f9385df36421..9a2f23596f79 100644
--- a/test/CodeGen/X86/nontemporal.ll
+++ b/test/CodeGen/X86/nontemporal.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-unknown-unknown | FileCheck %s
 
-define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
+define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E, i64 %F) {
 ; CHECK: movntps
   %cast = bitcast i8* %B to <4 x float>*
   %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
@@ -13,9 +13,12 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
   %cast2 = bitcast i8* %B to <2 x double>*
   %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
   store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
-; CHECK: movnti
+; CHECK: movntil
   %cast3 = bitcast i8* %B to i32*
-  store i32 %D, i32* %cast3, align 16, !nontemporal !0
+  store i32 %D, i32* %cast3, align 1, !nontemporal !0
+; CHECK: movntiq
+  %cast4 = bitcast i8* %B to i64*
+  store i64 %F, i64* %cast4, align 1, !nontemporal !0
   ret void
 }
 
diff --git a/test/CodeGen/X86/null-streamer.ll b/test/CodeGen/X86/null-streamer.ll
index 3f5abfd40f29..e80f3fcbe58d 100644
--- a/test/CodeGen/X86/null-streamer.ll
+++ b/test/CodeGen/X86/null-streamer.ll
@@ -14,11 +14,11 @@ define void @f1() {
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: " ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: " ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
 !1 = !DIFile(filename: "file.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 ()* null, variables: !2)
+!4 = distinct !DISubprogram(name: "", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !1, type: !6, variables: !2)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index 5d05ad9c4544..39e6fd0e6a59 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,4 +1,10 @@
-; RUN: llc < %s -march=x86 | grep movw | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; This test should get one and only one register to register mov.
+; CHECK-LABEL: t:
+; CHECK:     movw
+; CHECK-NOT: movw
+; CHECK:     ret
 
 define signext i16 @t()   {
 entry:
diff --git a/test/CodeGen/X86/or-branch.ll b/test/CodeGen/X86/or-branch.ll
index 9db948adb465..4899a0fc7e88 100644
--- a/test/CodeGen/X86/or-branch.ll
+++ b/test/CodeGen/X86/or-branch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=0 | FileCheck %s --check-prefix=JUMP2
-; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=1 | FileCheck %s --check-prefix=JUMP1
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=0 | FileCheck %s --check-prefix=JUMP2 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=i386-unknown-unknown -jump-is-expensive=1 | FileCheck %s --check-prefix=JUMP1 --check-prefix=CHECK
 
 define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
 ; JUMP2-LABEL: foo:
@@ -25,4 +25,30 @@ UnifiedReturnBlock:
   ret void
 }
 
+; If the branch is unpredictable, don't add another branch
+; regardless of whether they are expensive or not.
+
+define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind {
+; CHECK-LABEL: unpredictable:
+; CHECK-DAG:     sete
+; CHECK-DAG:     setl
+; CHECK:         orb
+; CHECK:         jne
+entry:
+  %tmp1 = icmp eq i32 %X, 0
+  %tmp3 = icmp slt i32 %Y, 5
+  %tmp4 = or i1 %tmp3, %tmp1
+  br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock, !unpredictable !0
+
+cond_true:
+  %tmp5 = tail call i32 (...) @bar( )
+  ret void
+
+UnifiedReturnBlock:
+  ret void
+}
+
 declare i32 @bar(...)
+
+!0 = !{}
+
diff --git a/test/CodeGen/X86/or-lea.ll b/test/CodeGen/X86/or-lea.ll
new file mode 100644
index 000000000000..f45a639ffa2c
--- /dev/null
+++ b/test/CodeGen/X86/or-lea.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; InstCombine and DAGCombiner transform an 'add' into an 'or'
+; if there are no common bits from the incoming operands.
+; LEA instruction selection should be able to see through that
+; transform and reduce add/shift/or instruction counts.
+
+define i32 @or_shift1_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift1_and1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi,2), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 1
+  %and = and i32 %y, 1
+  %or = or i32 %and, %shl
+  ret i32 %or
+}
+
+define i32 @or_shift1_and1_swapped(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift1_and1_swapped:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi,2), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 1
+  %and = and i32 %y, 1
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i32 @or_shift2_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift2_and1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi,4), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 2
+  %and = and i32 %y, 1
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i32 @or_shift3_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi,8), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 3
+  %and = and i32 %y, 1
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i32 @or_shift3_and7(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $7, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi,8), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 3
+  %and = and i32 %y, 7
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+; The shift is too big for an LEA.
+
+define i32 @or_shift4_and1(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift4_and1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    shll $4, %edi
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leal (%rsi,%rdi), %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 4
+  %and = and i32 %y, 1
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+; The mask is too big for the shift, so the 'or' isn't equivalent to an 'add'.
+
+define i32 @or_shift3_and8(i32 %x, i32 %y) {
+; CHECK-LABEL: or_shift3_and8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    leal (,%rdi,8), %eax
+; CHECK-NEXT:    andl $8, %esi
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    retq
+
+  %shl = shl i32 %x, 3
+  %and = and i32 %y, 8
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+; 64-bit operands should work too.
+
+define i64 @or_shift1_and1_64(i64 %x, i64 %y) {
+; CHECK-LABEL: or_shift1_and1_64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    leaq (%rsi,%rdi,2), %rax
+; CHECK-NEXT:    retq
+
+  %shl = shl i64 %x, 1
+  %and = and i64 %y, 1
+  %or = or i64 %and, %shl
+  ret i64 %or
+}
+
diff --git a/test/CodeGen/X86/palignr.ll b/test/CodeGen/X86/palignr.ll
index dfa2cedf45a2..d75506cadfa2 100644
--- a/test/CodeGen/X86/palignr.ll
+++ b/test/CodeGen/X86/palignr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=x86 -mcpu=core2 -mattr=+ssse3 | FileCheck %s
 ; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=CHECK-YONAH %s
 
diff --git a/test/CodeGen/X86/patchpoint-verifiable.mir b/test/CodeGen/X86/patchpoint-verifiable.mir
new file mode 100644
index 000000000000..300ecaf002f2
--- /dev/null
+++ b/test/CodeGen/X86/patchpoint-verifiable.mir
@@ -0,0 +1,42 @@
+# RUN: llc -mtriple=x86_64-apple-darwin -stop-after branch-folder -start-after branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# This test verifies that the machine verifier won't report an error when
+# verifying the PATCHPOINT instruction.
+
+--- |
+
+  define void @small_patchpoint_codegen(i64 %p1, i64 %p2, i64 %p3, i64 %p4) {
+  entry:
+    %result = tail call i64 (i64, i32, i8*, i32, ...) @llvm.experimental.patchpoint.i64(i64 5, i32 5, i8* null, i32 2, i64 %p1, i64 %p2)
+    ret void
+  }
+
+  declare i64 @llvm.experimental.patchpoint.i64(i64, i32, i8*, i32, ...)
+
+...
+---
+name:            small_patchpoint_codegen
+tracksRegLiveness: true
+liveins:
+  - { reg: '%rdi' }
+  - { reg: '%rsi' }
+frameInfo:
+  hasPatchPoint: true
+  stackSize:     8
+  adjustsStack:  true
+  hasCalls:      true
+fixedStack:
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16 }
+body: |
+  bb.0.entry:
+    liveins: %rdi, %rsi, %rbp
+
+    frame-setup PUSH64r killed %rbp, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    CFI_INSTRUCTION .cfi_offset %rbp, -16
+    %rbp = frame-setup MOV64rr %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_register %rbp
+  ; CHECK: PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+    PATCHPOINT 5, 5, 0, 2, 0, %rdi, %rsi, csr_64, implicit-def dead early-clobber %r11, implicit-def %rsp, implicit-def dead %rax
+    %rbp = POP64r implicit-def %rsp, implicit %rsp
+    RETQ
+...
diff --git a/test/CodeGen/X86/peephole-na-phys-copy-folding.ll b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
new file mode 100644
index 000000000000..bf457814079c
--- /dev/null
+++ b/test/CodeGen/X86/peephole-na-phys-copy-folding.ll
@@ -0,0 +1,190 @@
+; RUN: llc -verify-machineinstrs -mtriple=i386-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+sahf %s -o - | FileCheck %s
+
+; The peephole optimizer can elide some physical register copies such as
+; EFLAGS. Make sure the flags are used directly, instead of needlessly using
+; lahf, when possible.
+
+@L = external global i32
+@M = external global i8
+declare i32 @bar(i64)
+
+; CHECK-LABEL: plus_one
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: incl L
+define i1 @plus_one() {
+entry:
+  %loaded_L = load i32, i32* @L
+  %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc.
+  store i32 %val, i32* @L
+  %loaded_M = load i8, i8* @M
+  %masked = and i8 %loaded_M, 8
+  %M_is_true = icmp ne i8 %masked, 0
+  %L_is_false = icmp eq i32 %val, 0
+  %cond = and i1 %L_is_false, %M_is_true
+  br i1 %cond, label %exit2, label %exit
+
+exit:
+  ret i1 true
+
+exit2:
+  ret i1 false
+}
+
+; CHECK-LABEL: plus_forty_two
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: addl $42,
+define i1 @plus_forty_two() {
+entry:
+  %loaded_L = load i32, i32* @L
+  %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc.
+  store i32 %val, i32* @L
+  %loaded_M = load i8, i8* @M
+  %masked = and i8 %loaded_M, 8
+  %M_is_true = icmp ne i8 %masked, 0
+  %L_is_false = icmp eq i32 %val, 0
+  %cond = and i1 %L_is_false, %M_is_true
+  br i1 %cond, label %exit2, label %exit
+
+exit:
+  ret i1 true
+
+exit2:
+  ret i1 false
+}
+
+; CHECK-LABEL: minus_one
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: decl L
+define i1 @minus_one() {
+entry:
+  %loaded_L = load i32, i32* @L
+  %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec.
+  store i32 %val, i32* @L
+  %loaded_M = load i8, i8* @M
+  %masked = and i8 %loaded_M, 8
+  %M_is_true = icmp ne i8 %masked, 0
+  %L_is_false = icmp eq i32 %val, 0
+  %cond = and i1 %L_is_false, %M_is_true
+  br i1 %cond, label %exit2, label %exit
+
+exit:
+  ret i1 true
+
+exit2:
+  ret i1 false
+}
+
+; CHECK-LABEL: minus_forty_two
+; CHECK-NOT: seto
+; CHECK-NOT: lahf
+; CHECK-NOT: sahf
+; CHECK-NOT: pushf
+; CHECK-NOT: popf
+; CHECK: addl $-42,
+define i1 @minus_forty_two() {
+entry:
+  %loaded_L = load i32, i32* @L
+  %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec.
+  store i32 %val, i32* @L
+  %loaded_M = load i8, i8* @M
+  %masked = and i8 %loaded_M, 8
+  %M_is_true = icmp ne i8 %masked, 0
+  %L_is_false = icmp eq i32 %val, 0
+  %cond = and i1 %L_is_false, %M_is_true
+  br i1 %cond, label %exit2, label %exit
+
+exit:
+  ret i1 true
+
+exit2:
+  ret i1 false
+}
+
+; CHECK-LABEL: test_intervening_call:
+; CHECK:       cmpxchg
+; CHECK:       seto %al
+; CHECK-NEXT:  lahf
+; CHECK:       call{{[lq]}} bar
+; CHECK:       addb $127, %al
+; CHECK-NEXT:  sahf
+define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
+  ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
+  %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+  %v = extractvalue { i64, i1 } %cx, 0
+  %p = extractvalue { i64, i1 } %cx, 1
+  call i32 @bar(i64 %v)
+  br i1 %p, label %t, label %f
+
+t:
+  ret i64 42
+
+f:
+  ret i64 0
+}
+
+; CHECK-LABEL: test_two_live_flags:
+; CHECK:       cmpxchg
+; CHECK:       seto %al
+; CHECK-NEXT:  lahf
+; Save result of the first cmpxchg into D.
+; CHECK-NEXT:  mov{{[lq]}} %[[AX:[er]ax]], %[[D:[re]d[xi]]]
+; CHECK:       cmpxchg
+; CHECK-NEXT:  sete %al
+; Save result of the second cmpxchg onto the stack.
+; CHECK-NEXT:  push{{[lq]}} %[[AX]]
+; Restore result of the first cmpxchg from D, put it back in EFLAGS.
+; CHECK-NEXT:  mov{{[lq]}} %[[D]], %[[AX]]
+; CHECK-NEXT:  addb $127, %al
+; CHECK-NEXT:  sahf
+; Restore result of the second cmpxchg from the stack.
+; CHECK-NEXT:  pop{{[lq]}} %[[AX]]
+; Test from EFLAGS restored from first cmpxchg, jump if that fails.
+; CHECK-NEXT:  jne
+; Fallthrough to test the second cmpxchg's result.
+; CHECK:       testb %al, %al
+; CHECK-NEXT:  je
+define i64 @test_two_live_flags(
+       i64* %foo0, i64 %bar0, i64 %baz0,
+       i64* %foo1, i64 %bar1, i64 %baz1) {
+  %cx0 = cmpxchg i64* %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
+  %p0 = extractvalue { i64, i1 } %cx0, 1
+  %cx1 = cmpxchg i64* %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst
+  %p1 = extractvalue { i64, i1 } %cx1, 1
+  %flag = and i1 %p0, %p1
+  br i1 %flag, label %t, label %f
+
+t:
+  ret i64 42
+
+f:
+  ret i64 0
+}
+
+; CHECK-LABEL: asm_clobbering_flags:
+; CHECK:       test
+; CHECK-NEXT:  setg
+; CHECK-NEXT:  #APP
+; CHECK-NEXT:  bsfl
+; CHECK-NEXT:  #NO_APP
+; CHECK-NEXT:  movl
+; CHECK-NEXT:  ret
+define i1 @asm_clobbering_flags(i32* %mem) {
+  %val = load i32, i32* %mem, align 4
+  %cmp = icmp sgt i32 %val, 0
+  %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val)
+  store i32 %res, i32* %mem, align 4
+  ret i1 %cmp
+}
diff --git a/test/CodeGen/X86/pmul.ll b/test/CodeGen/X86/pmul.ll
index dbe5bd646c7f..37b6fdf7cfeb 100644
--- a/test/CodeGen/X86/pmul.ll
+++ b/test/CodeGen/X86/pmul.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 | FileCheck %s --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
 
 define <16 x i8> @mul8c(<16 x i8> %i) nounwind  {
 ; SSE2-LABEL: mul8c:
@@ -34,16 +35,34 @@ define <16 x i8> @mul8c(<16 x i8> %i) nounwind  {
 ; SSE41-NEXT:    packuswb %xmm0, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: mul8c:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxbw {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 entry:
   %A = mul <16 x i8> %i, < i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117, i8 117 >
   ret <16 x i8> %A
 }
 
 define <8 x i16> @mul16c(<8 x i16> %i) nounwind  {
-; ALL-LABEL: mul16c:
-; ALL:       # BB#0: # %entry
-; ALL-NEXT:    pmullw {{.*}}(%rip), %xmm0
-; ALL-NEXT:    retq
+; SSE-LABEL: mul16c:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: mul16c:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 >
   ret <8 x i16> %A
@@ -65,22 +84,38 @@ define <4 x i32> @a(<4 x i32> %i) nounwind  {
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: a:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
   ret <4 x i32> %A
 }
 
 define <2 x i64> @b(<2 x i64> %i) nounwind  {
-; ALL-LABEL: b:
-; ALL:       # BB#0: # %entry
-; ALL-NEXT:    movdqa {{.*#+}} xmm1 = [117,117]
-; ALL-NEXT:    movdqa %xmm0, %xmm2
-; ALL-NEXT:    pmuludq %xmm1, %xmm2
-; ALL-NEXT:    psrlq $32, %xmm0
-; ALL-NEXT:    pmuludq %xmm1, %xmm0
-; ALL-NEXT:    psllq $32, %xmm0
-; ALL-NEXT:    paddq %xmm2, %xmm0
-; ALL-NEXT:    retq
+; SSE-LABEL: b:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [117,117]
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    psrlq $32, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm0
+; SSE-NEXT:    psllq $32, %xmm0
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: b:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [117,117]
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <2 x i64> %i, < i64 117, i64 117 >
   ret <2 x i64> %A
@@ -123,16 +158,34 @@ define <16 x i8> @mul8(<16 x i8> %i, <16 x i8> %j) nounwind  {
 ; SSE41-NEXT:    packuswb %xmm0, %xmm2
 ; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: mul8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbw %xmm1, %ymm1
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmullw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 entry:
   %A = mul <16 x i8> %i, %j
   ret <16 x i8> %A
 }
 
 define <8 x i16> @mul16(<8 x i16> %i, <8 x i16> %j) nounwind  {
-; ALL-LABEL: mul16:
-; ALL:       # BB#0: # %entry
-; ALL-NEXT:    pmullw %xmm1, %xmm0
-; ALL-NEXT:    retq
+; SSE-LABEL: mul16:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmullw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: mul16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <8 x i16> %i, %j
   ret <8 x i16> %A
@@ -154,26 +207,44 @@ define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
 ; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: c:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <4 x i32> %i, %j
   ret <4 x i32> %A
 }
 
 define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
-; ALL-LABEL: d:
-; ALL:       # BB#0: # %entry
-; ALL-NEXT:    movdqa %xmm0, %xmm2
-; ALL-NEXT:    pmuludq %xmm1, %xmm2
-; ALL-NEXT:    movdqa %xmm1, %xmm3
-; ALL-NEXT:    psrlq $32, %xmm3
-; ALL-NEXT:    pmuludq %xmm0, %xmm3
-; ALL-NEXT:    psllq $32, %xmm3
-; ALL-NEXT:    paddq %xmm3, %xmm2
-; ALL-NEXT:    psrlq $32, %xmm0
-; ALL-NEXT:    pmuludq %xmm1, %xmm0
-; ALL-NEXT:    psllq $32, %xmm0
-; ALL-NEXT:    paddq %xmm2, %xmm0
-; ALL-NEXT:    retq
+; SSE-LABEL: d:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pmuludq %xmm1, %xmm2
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    psrlq $32, %xmm3
+; SSE-NEXT:    pmuludq %xmm0, %xmm3
+; SSE-NEXT:    psllq $32, %xmm3
+; SSE-NEXT:    paddq %xmm3, %xmm2
+; SSE-NEXT:    psrlq $32, %xmm0
+; SSE-NEXT:    pmuludq %xmm1, %xmm0
+; SSE-NEXT:    psllq $32, %xmm0
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: d:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpsrlq $32, %xmm1, %xmm3
+; AVX2-NEXT:    vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT:    vpsllq $32, %xmm3, %xmm3
+; AVX2-NEXT:    vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpsrlq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsllq $32, %xmm0, %xmm0
+; AVX2-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <2 x i64> %i, %j
   ret <2 x i64> %A
@@ -210,6 +281,17 @@ define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
 ; SSE41-NEXT:    pmulld {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
 ; SSE41-NEXT:    addq $40, %rsp
 ; SSE41-NEXT:    retq
+;
+; AVX2-LABEL: e:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    subq $40, %rsp
+; AVX2-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX2-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX2-NEXT:    callq foo
+; AVX2-NEXT:    vmovdqa (%rsp), %xmm0 # 16-byte Reload
+; AVX2-NEXT:    vpmulld {{[0-9]+}}(%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT:    addq $40, %rsp
+; AVX2-NEXT:    retq
 entry:
   ; Use a call to force spills.
   call void @foo()
@@ -218,27 +300,47 @@ entry:
 }
 
 define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
-; ALL-LABEL: f:
-; ALL:       # BB#0: # %entry
-; ALL-NEXT:    subq $40, %rsp
-; ALL-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
-; ALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
-; ALL-NEXT:    callq foo
-; ALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
-; ALL-NEXT:    movdqa %xmm0, %xmm2
-; ALL-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
-; ALL-NEXT:    pmuludq %xmm3, %xmm2
-; ALL-NEXT:    movdqa %xmm3, %xmm1
-; ALL-NEXT:    psrlq $32, %xmm1
-; ALL-NEXT:    pmuludq %xmm0, %xmm1
-; ALL-NEXT:    psllq $32, %xmm1
-; ALL-NEXT:    paddq %xmm1, %xmm2
-; ALL-NEXT:    psrlq $32, %xmm0
-; ALL-NEXT:    pmuludq %xmm3, %xmm0
-; ALL-NEXT:    psllq $32, %xmm0
-; ALL-NEXT:    paddq %xmm2, %xmm0
-; ALL-NEXT:    addq $40, %rsp
-; ALL-NEXT:    retq
+; SSE-LABEL: f:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    subq $40, %rsp
+; SSE-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
+; SSE-NEXT:    callq foo
+; SSE-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    movdqa {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload
+; SSE-NEXT:    pmuludq %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    psrlq $32, %xmm1
+; SSE-NEXT:    pmuludq %xmm0, %xmm1
+; SSE-NEXT:    psllq $32, %xmm1
+; SSE-NEXT:    paddq %xmm1, %xmm2
+; SSE-NEXT:    psrlq $32, %xmm0
+; SSE-NEXT:    pmuludq %xmm3, %xmm0
+; SSE-NEXT:    psllq $32, %xmm0
+; SSE-NEXT:    paddq %xmm2, %xmm0
+; SSE-NEXT:    addq $40, %rsp
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: f:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    subq $40, %rsp
+; AVX2-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
+; AVX2-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; AVX2-NEXT:    callq foo
+; AVX2-NEXT:    vmovdqa {{[0-9]+}}(%rsp), %xmm2 # 16-byte Reload
+; AVX2-NEXT:    vmovdqa (%rsp), %xmm3 # 16-byte Reload
+; AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm0
+; AVX2-NEXT:    vpsrlq $32, %xmm2, %xmm1
+; AVX2-NEXT:    vpmuludq %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrlq $32, %xmm3, %xmm1
+; AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpsllq $32, %xmm1, %xmm1
+; AVX2-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    addq $40, %rsp
+; AVX2-NEXT:    retq
 entry:
   ; Use a call to force spills.
   call void @foo()
@@ -247,31 +349,76 @@ entry:
 }
 
 define <4 x i64> @b1(<4 x i64> %i) nounwind  {
-; AVX2-LABEL: @b1
-; AVX2: vpbroadcastq
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsrlq  $32 
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq  $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: retq
+; SSE-LABEL: b1:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [117,117]
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pmuludq %xmm2, %xmm3
+; SSE-NEXT:    psrlq $32, %xmm0
+; SSE-NEXT:    pmuludq %xmm2, %xmm0
+; SSE-NEXT:    psllq $32, %xmm0
+; SSE-NEXT:    paddq %xmm3, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm3
+; SSE-NEXT:    pmuludq %xmm2, %xmm3
+; SSE-NEXT:    psrlq $32, %xmm1
+; SSE-NEXT:    pmuludq %xmm2, %xmm1
+; SSE-NEXT:    psllq $32, %xmm1
+; SSE-NEXT:    paddq %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: b1:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <4 x i64> %i, < i64 117, i64 117, i64 117, i64 117 >
   ret <4 x i64> %A
 }
 
 define <4 x i64> @b2(<4 x i64> %i, <4 x i64> %j) nounwind  {
-; AVX2-LABEL: @b2
-; AVX2:  vpmuludq
-; AVX2-NEXT: vpsrlq  $32
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq  $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: vpsrlq  $32
-; AVX2-NEXT: vpmuludq
-; AVX2-NEXT: vpsllq  $32
-; AVX2-NEXT: vpaddq
-; AVX2-NEXT: retq
+; SSE-LABEL: b2:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa %xmm0, %xmm4
+; SSE-NEXT:    pmuludq %xmm2, %xmm4
+; SSE-NEXT:    movdqa %xmm2, %xmm5
+; SSE-NEXT:    psrlq $32, %xmm5
+; SSE-NEXT:    pmuludq %xmm0, %xmm5
+; SSE-NEXT:    psllq $32, %xmm5
+; SSE-NEXT:    paddq %xmm5, %xmm4
+; SSE-NEXT:    psrlq $32, %xmm0
+; SSE-NEXT:    pmuludq %xmm2, %xmm0
+; SSE-NEXT:    psllq $32, %xmm0
+; SSE-NEXT:    paddq %xmm4, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pmuludq %xmm3, %xmm2
+; SSE-NEXT:    movdqa %xmm3, %xmm4
+; SSE-NEXT:    psrlq $32, %xmm4
+; SSE-NEXT:    pmuludq %xmm1, %xmm4
+; SSE-NEXT:    psllq $32, %xmm4
+; SSE-NEXT:    paddq %xmm4, %xmm2
+; SSE-NEXT:    psrlq $32, %xmm1
+; SSE-NEXT:    pmuludq %xmm3, %xmm1
+; SSE-NEXT:    psllq $32, %xmm1
+; SSE-NEXT:    paddq %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: b2:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpsrlq $32, %ymm1, %ymm3
+; AVX2-NEXT:    vpmuludq %ymm3, %ymm0, %ymm3
+; AVX2-NEXT:    vpsllq $32, %ymm3, %ymm3
+; AVX2-NEXT:    vpaddq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
+; AVX2-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
+; AVX2-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    retq
 entry:
   %A = mul <4 x i64> %i, %j
   ret <4 x i64> %A
diff --git a/test/CodeGen/X86/pop-stack-cleanup.ll b/test/CodeGen/X86/pop-stack-cleanup.ll
new file mode 100644
index 000000000000..bcf7594065f3
--- /dev/null
+++ b/test/CodeGen/X86/pop-stack-cleanup.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=CHECK 
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=LINUX64
+
+declare void @param1(i32 %a)
+declare i32 @param2_ret(i32 %a, i32 %b)
+declare i64 @param2_ret64(i32 %a, i32 %b)
+declare void @param2(i32 %a, i32 %b)
+declare void @param3(i32 %a, i32 %b, i32 %c)
+declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64)
+
+
+define void @test() minsize nounwind {
+; CHECK-LABEL: test:
+; CHECK: calll _param1
+; CHECK-NEXT: popl %eax
+; CHECK: calll _param2
+; CHECK-NEXT: popl %eax
+; CHECK-NEXT: popl %ecx
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-NEXT: pushl %eax
+; CHECK: calll _param3
+; CHECK-NEXT: addl $12, %esp
+; CHECK: calll _param2_ret64
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %ecx
+  call void @param1(i32 1)
+  call void @param2(i32 1, i32 2)
+  %ret = call i32 @param2_ret(i32 1, i32 2)
+  call void @param3(i32 1, i32 2, i32 %ret)
+  %ret64 = call i64 @param2_ret64(i32 1, i32 2)  
+  ret void
+}
+
+define void @negative(i32 %k) {
+; CHECK-LABEL: negative:
+; CHECK: calll _param1
+; CHECK-NEXT: addl $4, %esp
+; CHECK: calll _param2
+; CHECK-NEXT: addl $8, %esp
+; CHECK: calll _param3
+; CHECK-NEXT: movl %ebp, %esp
+  %v = alloca i32, i32 %k
+  call void @param1(i32 1)
+  call void @param2(i32 1, i32 2)
+  call void @param3(i32 1, i32 2, i32 3)
+  ret void
+}
+
+define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind {
+; CHECK-LABEL: spill:
+; CHECK-DAG: movl %ecx,
+; CHECK-DAG: movl %edx,
+; CHECK: calll _param2_ret
+; CHECK-NEXT: popl %ecx
+; CHECK-NEXT: popl %edx
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: calll _spill
+  %i = call i32 @param2_ret(i32 1, i32 2)
+  call void @spill(i32 %a, i32 %b, i32 %c)
+  ret void
+}
+
+define void @test_linux64(i32 %size) minsize nounwind {
+; LINUX64-LABEL: test_linux64:
+; LINUX64: pushq %rbp
+; LINUX64: callq param8
+; LINUX64-NEXT: popq %rax
+; LINUX64-NEXT: popq %rcx
+
+  %a = alloca i64, i32 %size, align 8
+  call void @param8(i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8)
+  ret void
+}
diff --git a/test/CodeGen/X86/powi.ll b/test/CodeGen/X86/powi.ll
index c3d68312ce15..17d3e3e7d33c 100644
--- a/test/CodeGen/X86/powi.ll
+++ b/test/CodeGen/X86/powi.ll
@@ -1,10 +1,38 @@
-; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 | FileCheck %s
 ; Ideally this would compile to 5 multiplies.
 
-define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
-entry:
-  %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
-  ret double %0
+define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {
+; CHECK-LABEL: pow_wrapper:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movapd %xmm0, %xmm1
+; CHECK-NEXT:    mulsd %xmm1, %xmm1
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    mulsd %xmm1, %xmm1
+; CHECK-NEXT:    mulsd %xmm1, %xmm0
+; CHECK-NEXT:    mulsd %xmm1, %xmm1
+; CHECK-NEXT:    mulsd %xmm0, %xmm1
+; CHECK-NEXT:    movapd %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  ret double %ret
+}
+
+define double @pow_wrapper_optsize(double %a) optsize {
+; CHECK-LABEL: pow_wrapper_optsize:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl  $15, %edi
+; CHECK-NEXT:    jmp
+  %ret = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  ret double %ret
+}
+
+define double @pow_wrapper_minsize(double %a) minsize {
+; CHECK-LABEL: pow_wrapper_minsize:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl  $128, %edi
+; CHECK-NEXT:    jmp
+  %ret = tail call double @llvm.powi.f64(double %a, i32 128) nounwind ; <double> [#uses=1]
+  ret double %ret
 }
 
 declare double @llvm.powi.f64(double, i32) nounwind readonly
diff --git a/test/CodeGen/X86/pr11415.ll b/test/CodeGen/X86/pr11415.ll
index 6c32a2206a7e..73c497014116 100644
--- a/test/CodeGen/X86/pr11415.ll
+++ b/test/CodeGen/X86/pr11415.ll
@@ -4,15 +4,17 @@
 ; defining %0 before it was read. This caused us to omit the
 ; movq	-8(%rsp), %rdx
 
+; CHECK: pushq	%rax
 ; CHECK: 	#APP
 ; CHECK-NEXT:	#NO_APP
 ; CHECK-NEXT:	movq	%rcx, %rax
-; CHECK-NEXT:	movq	%rax, -8(%rsp)
-; CHECK-NEXT:	movq	-8(%rsp), %rdx
+; CHECK-NEXT:	movq	%rax, (%rsp)
+; CHECK-NEXT:	movq	(%rsp), %rdx
 ; CHECK-NEXT:	#APP
 ; CHECK-NEXT:	#NO_APP
 ; CHECK-NEXT:	movq	%rdx, %rax
-; CHECK-NEXT:	movq	%rdx, -8(%rsp)
+; CHECK-NEXT:	movq	%rdx, (%rsp)
+; CHECK-NEXT:	popq	%rcx
 ; CHECK-NEXT:	ret
 
 define i64 @foo() {
diff --git a/test/CodeGen/X86/pr11468.ll b/test/CodeGen/X86/pr11468.ll
index f721df11586b..7a2cc5b1a60d 100644
--- a/test/CodeGen/X86/pr11468.ll
+++ b/test/CodeGen/X86/pr11468.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -stackrealign -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
 ; PR11468
 
 define void @f(i64 %sz) uwtable {
diff --git a/test/CodeGen/X86/pr11985.ll b/test/CodeGen/X86/pr11985.ll
index fa378502f724..aae00de112d3 100644
--- a/test/CodeGen/X86/pr11985.ll
+++ b/test/CodeGen/X86/pr11985.ll
@@ -1,6 +1,28 @@
-; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=prescott | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=prescott | FileCheck %s --check-prefix=PRESCOTT
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
+
+;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first
+;;; store overlap with the second load and second store respectively.
+;;;
+;;; Is either of these sequences ideal? 
 
 define float @foo(i8* nocapture %buf, float %a, float %b) nounwind uwtable {
+; PRESCOTT-LABEL: foo:
+; PRESCOTT:       # BB#0: # %entry
+; PRESCOTT-NEXT:    movq   .Ltmp0+14(%rip), %rax
+; PRESCOTT-NEXT:    movq   %rax, 14(%rdi)
+; PRESCOTT-NEXT:    movq   .Ltmp0+8(%rip), %rax
+; PRESCOTT-NEXT:    movq   %rax, 8(%rdi)
+; PRESCOTT-NEXT:    movq   .Ltmp0(%rip), %rax
+; PRESCOTT-NEXT:    movq   %rax, (%rdi)
+;
+; NEHALEM-LABEL: foo:
+; NEHALEM:       # BB#0: # %entry
+; NEHALEM-NEXT:    movq .Ltmp0+14(%rip), %rax
+; NEHALEM-NEXT:    movq %rax, 14(%rdi)
+; NEHALEM-NEXT:    movups .Ltmp0(%rip), %xmm2
+; NEHALEM-NEXT:    movups %xmm2, (%rdi)
+
 entry:
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %buf, i8* blockaddress(@foo, %out), i64 22, i32 1, i1 false)
   br label %out
@@ -8,12 +30,6 @@ entry:
 out:                                              ; preds = %entry
   %add = fadd float %a, %b
   ret float %add
-; CHECK: foo
-; CHECK: movw .L{{.*}}+20(%rip), %{{.*}}
-; CHECK: movl .L{{.*}}+16(%rip), %{{.*}}
-; CHECK: movq .L{{.*}}+8(%rip), %{{.*}}
-; CHECK: movq .L{{.*}}(%rip), %{{.*}}
-; CHECK: ret
 }
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
index a6b721a7a6f1..2228fbbaa53b 100644
--- a/test/CodeGen/X86/pr13577.ll
+++ b/test/CodeGen/X86/pr13577.ll
@@ -6,10 +6,7 @@
 ; CHECK-NEXT: .long 2139095040
 
 ; CHECK-LABEL: foo:
-; CHECK: movq {{.*}}, %rax
-; CHECK: shlq $48, %rax
-; CHECK: sets %al
-; CHECK: testb %al, %al
+; CHECK: testb $-128, -15(%rsp)
 ; CHECK: flds LCPI0_0(%rip)
 ; CHECK: flds LCPI0_1(%rip)
 ; CHECK: fcmovne %st(1), %st(0)
diff --git a/test/CodeGen/X86/pr15267.ll b/test/CodeGen/X86/pr15267.ll
index 95d7deb34170..9fc754aa1128 100644
--- a/test/CodeGen/X86/pr15267.ll
+++ b/test/CodeGen/X86/pr15267.ll
@@ -1,138 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7-avx | FileCheck %s
 
 define <4 x i3> @test1(<4 x i3>* %in) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $3, %ecx
+; CHECK-NEXT:    andl $7, %ecx
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    andl $7, %edx
+; CHECK-NEXT:    vmovd %edx, %xmm0
+; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $6, %ecx
+; CHECK-NEXT:    andl $7, %ecx
+; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    shrl $9, %eax
+; CHECK-NEXT:    andl $7, %eax
+; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %ret = load <4 x i3>, <4 x i3>* %in, align 1
   ret <4 x i3> %ret
 }
-; CHECK-LABEL: test1
-; CHECK: movzwl
-; CHECK: shrl $3
-; CHECK: andl $7
-; CHECK: andl $7
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $6
-; CHECK: andl $7
-; CHECK: pinsrd $2
-; CHECK: shrl $9
-; CHECK: andl $7
-; CHECK: pinsrd $3
-; CHECK: ret
 
 define <4 x i1> @test2(<4 x i1>* %in) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    vmovd %edx, %xmm0
+; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $2, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    shrl $3, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %ret = load <4 x i1>, <4 x i1>* %in, align 1
   ret <4 x i1> %ret
 }
 
-; CHECK-LABEL: test2
-; CHECK: movzbl
-; CHECK: shrl
-; CHECK: andl $1
-; CHECK: andl $1
-; CHECK: vmovd
-; CHECK: pinsrd $1
-; CHECK: shrl $2
-; CHECK: andl $1
-; CHECK: pinsrd $2
-; CHECK: shrl $3
-; CHECK: andl $1
-; CHECK: pinsrd $3
-; CHECK: ret
-
 define <4 x i64> @test3(<4 x i1>* %in) nounwind {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movzbl (%rdi), %eax
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shlq $62, %rcx
+; CHECK-NEXT:    sarq $63, %rcx
+; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    shlq $63, %rdx
+; CHECK-NEXT:    sarq $63, %rdx
+; CHECK-NEXT:    vmovd %edx, %xmm0
+; CHECK-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shlq $61, %rcx
+; CHECK-NEXT:    sarq $63, %rcx
+; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    shlq $60, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm1
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; CHECK-NEXT:    retq
   %wide.load35 = load <4 x i1>, <4 x i1>* %in, align 1
   %sext = sext <4 x i1> %wide.load35 to <4 x i64>
   ret <4 x i64> %sext
 }
 
-; CHECK-LABEL: test3
-; CHECK: movzbl
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vmovd
-; CHECK: vpinsrd
-; CHECK: movq
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vpinsrd
-; CHECK: shlq
-; CHECK: sarq
-; CHECK: vpinsrd
-; CHECK: vpmovsxdq
-; CHECK: vmovd
-; CHECK: vpinsrd
-; CHECK: vpmovsxdq
-; CHECK: vinsertf128
-; CHECK: ret
-
 define <16 x i4> @test4(<16 x i4>* %in) nounwind {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $4, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    andl $15, %edx
+; CHECK-NEXT:    vmovd %edx, %xmm0
+; CHECK-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $8, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $12, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $16, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $20, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $24, %ecx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrl $28, %ecx
+; CHECK-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $32, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $36, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $40, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $44, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $48, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $52, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    movq %rax, %rcx
+; CHECK-NEXT:    shrq $56, %rcx
+; CHECK-NEXT:    andl $15, %ecx
+; CHECK-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; CHECK-NEXT:    shrq $60, %rax
+; CHECK-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %ret = load <16 x i4>, <16 x i4>* %in, align 1
   ret <16 x i4> %ret
 }
-
-; CHECK-LABEL: test4
-; CHECK: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: movl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vmovd
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movl
-; CHECK-NEXT: shrl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: movq
-; CHECK-NEXT: shrq
-; CHECK-NEXT: andl
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: shrq
-; CHECK-NEXT: vpinsrb
-; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/pr17631.ll b/test/CodeGen/X86/pr17631.ll
index 98f951f1b10c..08c393d29d69 100644
--- a/test/CodeGen/X86/pr17631.ll
+++ b/test/CodeGen/X86/pr17631.ll
@@ -30,5 +30,5 @@ define <8 x float> @foo(<8 x float> %y, i64* %p, double %x) {
 
 ; CHECK: foo
 ; CHECK-NOT: vzeroupper
-; CHECK: _ftol2
+; CHECK: {{cvtt|fist}}
 ; CHECK: ret
diff --git a/test/CodeGen/X86/pr21529.ll b/test/CodeGen/X86/pr21529.ll
deleted file mode 100644
index 655bc844f503..000000000000
--- a/test/CodeGen/X86/pr21529.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc -show-mc-encoding < %s | FileCheck %s
-
-; Test that the direct object emission selects the and variant with 8 bit
-; immediate.
-; We used to get this wrong when using direct object emission, but not when
-; reading assembly.
-
-; CHECK: andq    $-32, %rsp              # encoding: [0x48,0x83,0xe4,0xe0]
-
-target triple = "x86_64-pc-linux"
-
-define void @f() {
-  %foo = alloca i8, align 32
-  ret void
-}
diff --git a/test/CodeGen/X86/pr22019.ll b/test/CodeGen/X86/pr22019.ll
index 4cee5d704d3a..cfc53cb6be0b 100644
--- a/test/CodeGen/X86/pr22019.ll
+++ b/test/CodeGen/X86/pr22019.ll
@@ -20,4 +20,4 @@ define void @pselect() {
 @var = global i32 0
 
 ; CHECK: alias = var
-@alias = alias i32* @var
+@alias = alias i32, i32* @var
diff --git a/test/CodeGen/X86/pr23900.ll b/test/CodeGen/X86/pr23900.ll
deleted file mode 100644
index cbc77161c042..000000000000
--- a/test/CodeGen/X86/pr23900.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc -filetype=obj %s -o %t.o
-; RUN: llvm-nm %t.o | FileCheck %s
-
-; Test that it doesn't crash (and produces an object file).
-; This use to pass a symbol with a null name to code that expected a valid
-; C string.
-
-; CHECK:         U __CxxFrameHandler3
-; CHECK:         T f
-; CHECK:         t f.cleanup
-; CHECK:         U g
-; CHECK:         U h
-
-
-target triple = "x86_64-pc-windows-msvc18.0.0"
-define void @f(i32 %x) personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
-  invoke void @h()
-          to label %invoke.cont unwind label %lpad
-invoke.cont:
-  ret void
-lpad:
- landingpad { i8*, i32 }
-          cleanup
-  call void @g(i32 %x)
-  ret void
-}
-declare void @h()
-declare i32 @__CxxFrameHandler3(...)
-declare void @g(i32 %x)
diff --git a/test/CodeGen/X86/pr24139.ll b/test/CodeGen/X86/pr24139.ll
new file mode 100644
index 000000000000..fbe55abcbf7c
--- /dev/null
+++ b/test/CodeGen/X86/pr24139.ll
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
+
+; Check that we do not get excessive spilling from splitting of constant live ranges.
+
+; CHECK-LABEL: PR24139:
+; CHECK: # 16-byte Spill
+; CHECK-NOT: # 16-byte Spill
+; CHECK: retq
+
+define <2 x double> @PR24139(<2 x double> %arg, <2 x double> %arg1, <2 x double> %arg2) {
+  %tmp = bitcast <2 x double> %arg to <4 x float>
+  %tmp3 = fmul <4 x float> %tmp, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp4 = bitcast <2 x double> %arg to <4 x i32>
+  %tmp5 = and <4 x i32> %tmp4, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp6 = or <4 x i32> %tmp5, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp7 = bitcast <4 x i32> %tmp6 to <4 x float>
+  %tmp8 = fadd <4 x float> %tmp3, %tmp7
+  %tmp9 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp8) #2
+  %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>
+  %tmp11 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp9) #2
+  %tmp12 = fmul <4 x float> %tmp11, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp13 = fsub <4 x float> %tmp, %tmp12
+  %tmp14 = fmul <4 x float> %tmp11, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp15 = fsub <4 x float> %tmp13, %tmp14
+  %tmp16 = fmul <4 x float> %tmp15, %tmp15
+  %tmp17 = fmul <4 x float> %tmp15, %tmp16
+  %tmp18 = fmul <4 x float> %tmp16, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp19 = fadd <4 x float> %tmp18, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp20 = fmul <4 x float> %tmp16, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp21 = fadd <4 x float> %tmp20, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp22 = fmul <4 x float> %tmp16, %tmp19
+  %tmp23 = fadd <4 x float> %tmp22, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp24 = fmul <4 x float> %tmp16, %tmp21
+  %tmp25 = fadd <4 x float> %tmp24, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp26 = fmul <4 x float> %tmp16, %tmp23
+  %tmp27 = fadd <4 x float> %tmp26, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp28 = fmul <4 x float> %tmp17, %tmp25
+  %tmp29 = fadd <4 x float> %tmp15, %tmp28
+  %tmp30 = and <2 x i64> %tmp10, <i64 4294967297, i64 4294967297>
+  %tmp31 = bitcast <2 x i64> %tmp30 to <4 x i32>
+  %tmp32 = icmp eq <4 x i32> %tmp31, zeroinitializer
+  %tmp33 = sext <4 x i1> %tmp32 to <4 x i32>
+  %tmp34 = bitcast <4 x i32> %tmp33 to <4 x float>
+  %tmp35 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp27, <4 x float> %tmp29, <4 x float> %tmp34) #2
+  %tmp36 = and <2 x i64> %tmp10, <i64 8589934594, i64 8589934594>
+  %tmp37 = bitcast <2 x i64> %tmp36 to <4 x i32>
+  %tmp38 = icmp eq <4 x i32> %tmp37, zeroinitializer
+  %tmp39 = sext <4 x i1> %tmp38 to <4 x i32>
+  %tmp40 = bitcast <4 x float> %tmp35 to <4 x i32>
+  %tmp41 = xor <4 x i32> %tmp40, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp42 = bitcast <4 x i32> %tmp41 to <4 x float>
+  %tmp43 = bitcast <4 x i32> %tmp39 to <4 x float>
+  %tmp44 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp42, <4 x float> %tmp35, <4 x float> %tmp43) #2
+  %tmp45 = bitcast <2 x double> %arg1 to <4 x float>
+  %tmp46 = fmul <4 x float> %tmp45, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp47 = bitcast <2 x double> %arg1 to <4 x i32>
+  %tmp48 = and <4 x i32> %tmp47, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp49 = or <4 x i32> %tmp48, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp50 = bitcast <4 x i32> %tmp49 to <4 x float>
+  %tmp51 = fadd <4 x float> %tmp46, %tmp50
+  %tmp52 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp51) #2
+  %tmp53 = bitcast <4 x i32> %tmp52 to <2 x i64>
+  %tmp54 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp52) #2
+  %tmp55 = fmul <4 x float> %tmp54, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp56 = fsub <4 x float> %tmp45, %tmp55
+  %tmp57 = fmul <4 x float> %tmp54, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp58 = fsub <4 x float> %tmp56, %tmp57
+  %tmp59 = fmul <4 x float> %tmp58, %tmp58
+  %tmp60 = fmul <4 x float> %tmp58, %tmp59
+  %tmp61 = fmul <4 x float> %tmp59, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp62 = fadd <4 x float> %tmp61, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp63 = fmul <4 x float> %tmp59, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp64 = fadd <4 x float> %tmp63, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp65 = fmul <4 x float> %tmp59, %tmp62
+  %tmp66 = fadd <4 x float> %tmp65, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp67 = fmul <4 x float> %tmp59, %tmp64
+  %tmp68 = fadd <4 x float> %tmp67, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp69 = fmul <4 x float> %tmp59, %tmp66
+  %tmp70 = fadd <4 x float> %tmp69, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp71 = fmul <4 x float> %tmp60, %tmp68
+  %tmp72 = fadd <4 x float> %tmp58, %tmp71
+  %tmp73 = and <2 x i64> %tmp53, <i64 4294967297, i64 4294967297>
+  %tmp74 = bitcast <2 x i64> %tmp73 to <4 x i32>
+  %tmp75 = icmp eq <4 x i32> %tmp74, zeroinitializer
+  %tmp76 = sext <4 x i1> %tmp75 to <4 x i32>
+  %tmp77 = bitcast <4 x i32> %tmp76 to <4 x float>
+  %tmp78 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp70, <4 x float> %tmp72, <4 x float> %tmp77) #2
+  %tmp79 = and <2 x i64> %tmp53, <i64 8589934594, i64 8589934594>
+  %tmp80 = bitcast <2 x i64> %tmp79 to <4 x i32>
+  %tmp81 = icmp eq <4 x i32> %tmp80, zeroinitializer
+  %tmp82 = sext <4 x i1> %tmp81 to <4 x i32>
+  %tmp83 = bitcast <4 x float> %tmp78 to <4 x i32>
+  %tmp84 = xor <4 x i32> %tmp83, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp85 = bitcast <4 x i32> %tmp84 to <4 x float>
+  %tmp86 = bitcast <4 x i32> %tmp82 to <4 x float>
+  %tmp87 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp85, <4 x float> %tmp78, <4 x float> %tmp86) #2
+  %tmp88 = fadd <4 x float> %tmp44, %tmp87
+  %tmp89 = bitcast <2 x double> %arg2 to <4 x float>
+  %tmp90 = fmul <4 x float> %tmp89, <float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000, float 0x3FE45F3060000000>
+  %tmp91 = bitcast <2 x double> %arg2 to <4 x i32>
+  %tmp92 = and <4 x i32> %tmp91, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp93 = or <4 x i32> %tmp92, <i32 1056964608, i32 1056964608, i32 1056964608, i32 1056964608>
+  %tmp94 = bitcast <4 x i32> %tmp93 to <4 x float>
+  %tmp95 = fadd <4 x float> %tmp90, %tmp94
+  %tmp96 = tail call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp95) #2
+  %tmp97 = bitcast <4 x i32> %tmp96 to <2 x i64>
+  %tmp98 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp96) #2
+  %tmp99 = fmul <4 x float> %tmp98, <float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000, float 0x3FF921FB40000000>
+  %tmp100 = fsub <4 x float> %tmp89, %tmp99
+  %tmp101 = fmul <4 x float> %tmp98, <float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000, float 0x3E74442D00000000>
+  %tmp102 = fsub <4 x float> %tmp100, %tmp101
+  %tmp103 = fmul <4 x float> %tmp102, %tmp102
+  %tmp104 = fmul <4 x float> %tmp102, %tmp103
+  %tmp105 = fmul <4 x float> %tmp103, <float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000, float 0xBF56493260000000>
+  %tmp106 = fadd <4 x float> %tmp105, <float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000, float 0x3FA55406C0000000>
+  %tmp107 = fmul <4 x float> %tmp103, <float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000, float 0xBF29918DC0000000>
+  %tmp108 = fadd <4 x float> %tmp107, <float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000, float 0x3F81106840000000>
+  %tmp109 = fmul <4 x float> %tmp103, %tmp106
+  %tmp110 = fadd <4 x float> %tmp109, <float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000, float 0xBFDFFFFBE0000000>
+  %tmp111 = fmul <4 x float> %tmp103, %tmp108
+  %tmp112 = fadd <4 x float> %tmp111, <float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000, float 0xBFC5555420000000>
+  %tmp113 = fmul <4 x float> %tmp103, %tmp110
+  %tmp114 = fadd <4 x float> %tmp113, <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  %tmp115 = fmul <4 x float> %tmp104, %tmp112
+  %tmp116 = fadd <4 x float> %tmp102, %tmp115
+  %tmp117 = and <2 x i64> %tmp97, <i64 4294967297, i64 4294967297>
+  %tmp118 = bitcast <2 x i64> %tmp117 to <4 x i32>
+  %tmp119 = icmp eq <4 x i32> %tmp118, zeroinitializer
+  %tmp120 = sext <4 x i1> %tmp119 to <4 x i32>
+  %tmp121 = bitcast <4 x i32> %tmp120 to <4 x float>
+  %tmp122 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp114, <4 x float> %tmp116, <4 x float> %tmp121) #2
+  %tmp123 = and <2 x i64> %tmp97, <i64 8589934594, i64 8589934594>
+  %tmp124 = bitcast <2 x i64> %tmp123 to <4 x i32>
+  %tmp125 = icmp eq <4 x i32> %tmp124, zeroinitializer
+  %tmp126 = sext <4 x i1> %tmp125 to <4 x i32>
+  %tmp127 = bitcast <4 x float> %tmp122 to <4 x i32>
+  %tmp128 = xor <4 x i32> %tmp127, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %tmp129 = bitcast <4 x i32> %tmp128 to <4 x float>
+  %tmp130 = bitcast <4 x i32> %tmp126 to <4 x float>
+  %tmp131 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %tmp129, <4 x float> %tmp122, <4 x float> %tmp130) #2
+  %tmp132 = fadd <4 x float> %tmp88, %tmp131
+  %tmp133 = bitcast <4 x float> %tmp132 to <2 x double>
+  ret <2 x double> %tmp133
+}
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
diff --git a/test/CodeGen/X86/pr24602.ll b/test/CodeGen/X86/pr24602.ll
new file mode 100644
index 000000000000..9c029aeefec9
--- /dev/null
+++ b/test/CodeGen/X86/pr24602.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown  | FileCheck %s
+
+; PR24602: Make sure we don't barf on non-foldable code (with opaque constants).
+
+; CHECK-LABEL: pr24602:
+; CHECK-NEXT: # BB#0
+; CHECK-NEXT: movabsq $-10000000000, [[CST:%[a-z0-9]+]]
+; CHECK-NEXT: imulq [[CST]], %rsi
+; CHECK-NEXT: leaq (%rdi,%rsi,8), %rax
+; CHECK-NEXT: movq [[CST]], (%rdi,%rsi,8)
+; CHECK-NEXT: retq
+define i64* @pr24602(i64* %p, i64 %n) nounwind {
+  %mul = mul nsw i64 %n, -10000000000
+  %add.ptr = getelementptr inbounds i64, i64* %p, i64 %mul
+  store i64 -10000000000, i64* %add.ptr
+  ret i64* %add.ptr
+}
diff --git a/test/CodeGen/X86/pr25828.ll b/test/CodeGen/X86/pr25828.ll
new file mode 100644
index 000000000000..8fbabc7d0c6d
--- /dev/null
+++ b/test/CodeGen/X86/pr25828.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-pc-windows-msvc -relocation-model=pic | FileCheck %s
+; MOVPC32r should not generate CFI under windows
+
+; CHECK-LABEL: _foo:
+; CHECK-NOT: .cfi_adjust_cfa_offset
+define void @foo(i8) {
+entry-block:
+  switch i8 %0, label %bb2 [
+    i8 1, label %bb1
+    i8 2, label %bb2
+    i8 3, label %bb3
+    i8 4, label %bb4
+    i8 5, label %bb5
+  ]
+
+bb1:
+  ret void
+
+bb2:
+  ret void
+
+bb3:
+  ret void
+
+bb4:
+  ret void
+
+bb5:
+  ret void
+}
diff --git a/test/CodeGen/X86/prolog-push-seq.ll b/test/CodeGen/X86/prolog-push-seq.ll
new file mode 100644
index 000000000000..f23791aef922
--- /dev/null
+++ b/test/CodeGen/X86/prolog-push-seq.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i386-pc-windows-msvc18.0.0"
+
+declare x86_thiscallcc void @bar(i32 %a, i32 %b)
+
+define fastcc void @foo(i32 %a, i32 %b) #0 {
+; CHECK-LABEL: foo:
+; CHECK: subl $64, %esp
+; CHECK-NEXT: pushl
+; CHECK-NEXT: calll _bar
+  %local = alloca i32, i32 16
+  call x86_thiscallcc void @bar(i32 %a, i32 %b)
+  call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()
+  ret void
+}
+
+attributes #0 = { nounwind optsize "no-frame-pointer-elim-non-leaf"}
\ No newline at end of file
diff --git a/test/CodeGen/X86/pseudo_cmov_lower.ll b/test/CodeGen/X86/pseudo_cmov_lower.ll
new file mode 100644
index 000000000000..c59e3478ff51
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower.ll
@@ -0,0 +1,267 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -o - | FileCheck %s 
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo1:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo1(i32 %v1, i32 %v2, i32 %v3) nounwind {
+entry:
+  %cmp = icmp slt i32 %v1, 0
+  %v2.v3 = select i1 %cmp, i32 %v2, i32 %v3
+  %v1.v2 = select i1 %cmp, i32 %v1, i32 %v2
+  %sub = sub i32 %v1.v2, %v2.v3
+  ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. This makes
+; sure the code for the lowering for opposite conditions gets tested.
+; CHECK-LABEL: foo11:
+; CHECK: js
+; CHECK-NOT: js
+; CHECK-NOT: jns
+define i32 @foo11(i32 %v1, i32 %v2, i32 %v3) nounwind {
+entry:
+  %cmp1 = icmp slt i32 %v1, 0
+  %v2.v3 = select i1 %cmp1, i32 %v2, i32 %v3
+  %cmp2 = icmp sge i32 %v1, 0
+  %v1.v2 = select i1 %cmp2, i32 %v1, i32 %v2
+  %sub = sub i32 %v1.v2, %v2.v3
+  ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo2:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo2(i8 %v1, i8 %v2, i8 %v3) nounwind {
+entry:
+  %cmp = icmp slt i8 %v1, 0
+  %v2.v3 = select i1 %cmp, i8 %v2, i8 %v3
+  %v1.v2 = select i1 %cmp, i8 %v1, i8 %v2
+  %t1 = sext i8 %v2.v3 to i32
+  %t2 = sext i8 %v1.v2 to i32
+  %sub = sub i32 %t1, %t2
+  ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo3:
+; CHECK: js
+; CHECK-NOT: js
+define i32 @foo3(i16 %v1, i16 %v2, i16 %v3) nounwind {
+entry:
+  %cmp = icmp slt i16 %v1, 0
+  %v2.v3 = select i1 %cmp, i16 %v2, i16 %v3
+  %v1.v2 = select i1 %cmp, i16 %v1, i16 %v2
+  %t1 = sext i16 %v2.v3 to i32
+  %t2 = sext i16 %v1.v2 to i32
+  %sub = sub i32 %t1, %t2
+  ret i32 %sub
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo4:
+; CHECK: js
+; CHECK-NOT: js
+define float @foo4(i32 %v1, float %v2, float %v3, float %v4) nounwind {
+entry:
+  %cmp = icmp slt i32 %v1, 0
+  %t1 = select i1 %cmp, float %v2, float %v3
+  %t2 = select i1 %cmp, float %v3, float %v4
+  %sub = fsub float %t1, %t2
+  ret float %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo5:
+; CHECK: je
+; CHECK-NOT: je
+define double @foo5(i32 %v1, double %v2, double %v3, double %v4) nounwind {
+entry:
+  %cmp = icmp eq i32 %v1, 0
+  %t1 = select i1 %cmp, double %v2, double %v3
+  %t2 = select i1 %cmp, double %v3, double %v4
+  %sub = fsub double %t1, %t2
+  ret double %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo6:
+; CHECK: je
+; CHECK-NOT: je
+define <4 x float> @foo6(i32 %v1, <4 x float> %v2, <4 x float> %v3, <4 x float> %v4) nounwind {
+entry:
+  %cmp = icmp eq i32 %v1, 0
+  %t1 = select i1 %cmp, <4 x float> %v2, <4 x float> %v3
+  %t2 = select i1 %cmp, <4 x float> %v3, <4 x float> %v4
+  %sub = fsub <4 x float> %t1, %t2
+  ret <4 x float> %sub
+}
+
+; This test checks that only a single je gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo7:
+; CHECK: je
+; CHECK-NOT: je
+define <2 x double> @foo7(i32 %v1, <2 x double> %v2, <2 x double> %v3, <2 x double> %v4) nounwind {
+entry:
+  %cmp = icmp eq i32 %v1, 0
+  %t1 = select i1 %cmp, <2 x double> %v2, <2 x double> %v3
+  %t2 = select i1 %cmp, <2 x double> %v3, <2 x double> %v4
+  %sub = fsub <2 x double> %t1, %t2
+  ret <2 x double> %sub
+}
+
+; This test checks that only a single ja gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR. This combines
+; all the supported types together into one long string of selects based
+; on the same condition.
+; CHECK-LABEL: foo8:
+; CHECK: ja
+; CHECK-NOT: ja
+define void @foo8(i32 %v1,
+                  i8 %v2, i8 %v3,
+                  i16 %v12, i16 %v13,
+                  i32 %v22, i32 %v23,
+                  float %v32, float %v33,
+                  double %v42, double %v43,
+                  <4 x float> %v52, <4 x float> %v53,
+                  <2 x double> %v62, <2 x double> %v63,
+                  <8 x float> %v72, <8 x float> %v73,
+                  <4 x double> %v82, <4 x double> %v83,
+                  <16 x float> %v92, <16 x float> %v93,
+                  <8 x double> %v102, <8 x double> %v103,
+                  i8 * %dst) nounwind {
+entry:
+  %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 2
+  %a11 = bitcast i8* %add.ptr11 to i16*
+
+  %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
+  %a21 = bitcast i8* %add.ptr21 to i32*
+
+  %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
+  %a31 = bitcast i8* %add.ptr31 to float*
+
+  %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
+  %a41 = bitcast i8* %add.ptr41 to double*
+
+  %add.ptr51 = getelementptr inbounds i8, i8* %dst, i32 32
+  %a51 = bitcast i8* %add.ptr51 to <4 x float>*
+
+  %add.ptr61 = getelementptr inbounds i8, i8* %dst, i32 48
+  %a61 = bitcast i8* %add.ptr61 to <2 x double>*
+
+  %add.ptr71 = getelementptr inbounds i8, i8* %dst, i32 64
+  %a71 = bitcast i8* %add.ptr71 to <8 x float>*
+
+  %add.ptr81 = getelementptr inbounds i8, i8* %dst, i32 128
+  %a81 = bitcast i8* %add.ptr81 to <4 x double>*
+
+  %add.ptr91 = getelementptr inbounds i8, i8* %dst, i32 64
+  %a91 = bitcast i8* %add.ptr91 to <16 x float>*
+
+  %add.ptr101 = getelementptr inbounds i8, i8* %dst, i32 128
+  %a101 = bitcast i8* %add.ptr101 to <8 x double>*
+
+  ; These operations are necessary, because select of two single use loads
+  ; ends up getting optimized into a select of two leas, followed by a
+  ; single load of the selected address.
+  %t13 = xor i16 %v13, 11
+  %t23 = xor i32 %v23, 1234
+  %t33 = fadd float %v33, %v32
+  %t43 = fadd double %v43, %v42
+  %t53 = fadd <4 x float> %v53, %v52
+  %t63 = fadd <2 x double> %v63, %v62
+  %t73 = fsub <8 x float> %v73, %v72
+  %t83 = fsub <4 x double> %v83, %v82
+  %t93 = fsub <16 x float> %v93, %v92
+  %t103 = fsub <8 x double> %v103, %v102
+
+  %cmp = icmp ugt i32 %v1, 31
+  %t11 = select i1 %cmp, i16 %v12, i16 %t13
+  %t21 = select i1 %cmp, i32 %v22, i32 %t23
+  %t31 = select i1 %cmp, float %v32, float %t33
+  %t41 = select i1 %cmp, double %v42, double %t43
+  %t51 = select i1 %cmp, <4 x float> %v52, <4 x float> %t53
+  %t61 = select i1 %cmp, <2 x double> %v62, <2 x double> %t63
+  %t71 = select i1 %cmp, <8 x float> %v72, <8 x float> %t73
+  %t81 = select i1 %cmp, <4 x double> %v82, <4 x double> %t83
+  %t91 = select i1 %cmp, <16 x float> %v92, <16 x float> %t93
+  %t101 = select i1 %cmp, <8 x double> %v102, <8 x double> %t103
+
+  store i16 %t11, i16* %a11, align 2
+  store i32 %t21, i32* %a21, align 4
+  store float %t31, float* %a31, align 4
+  store double %t41, double* %a41, align 8
+  store <4 x float> %t51, <4 x float>* %a51, align 16
+  store <2 x double> %t61, <2 x double>* %a61, align 16
+  store <8 x float> %t71, <8 x float>* %a71, align 32
+  store <4 x double> %t81, <4 x double>* %a81, align 32
+  store <16 x float> %t91, <16 x float>* %a91, align 32
+  store <8 x double> %t101, <8 x double>* %a101, align 32
+
+  ret void
+}
+
+; This test checks that only a single ja gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; on the same condition.
+; Contrary to my expectations, this doesn't exercise the code for
+; CMOV_V8I1, CMOV_V16I1, CMOV_V32I1, or CMOV_V64I1.  Instead the selects all
+; get lowered into vector length number of selects, which all eventually turn
+; into a huge number of CMOV_GR8, which are all contiguous, so the optimization
+; kicks in as long as CMOV_GR8 is supported. I couldn't find a way to get
+; CMOV_V*I1 pseudo-opcodes to get generated. If a way exists to get CMOV_V*1
+; pseudo-opcodes to be generated, this test should be replaced with one that
+; tests those opcodes.
+;
+; CHECK-LABEL: foo9:
+; CHECK: ja
+; CHECK-NOT: ja
+define void @foo9(i32 %v1,
+                  <8 x i1> %v12, <8 x i1> %v13,
+                  <16 x i1> %v22, <16 x i1> %v23,
+                  <32 x i1> %v32, <32 x i1> %v33,
+                  <64 x i1> %v42, <64 x i1> %v43,
+                  i8 * %dst) nounwind {
+entry:
+  %add.ptr11 = getelementptr inbounds i8, i8* %dst, i32 0
+  %a11 = bitcast i8* %add.ptr11 to <8 x i1>*
+
+  %add.ptr21 = getelementptr inbounds i8, i8* %dst, i32 4
+  %a21 = bitcast i8* %add.ptr21 to <16 x i1>*
+
+  %add.ptr31 = getelementptr inbounds i8, i8* %dst, i32 8
+  %a31 = bitcast i8* %add.ptr31 to <32 x i1>*
+
+  %add.ptr41 = getelementptr inbounds i8, i8* %dst, i32 16
+  %a41 = bitcast i8* %add.ptr41 to <64 x i1>*
+
+  ; These operations are necessary, because select of two single use loads
+  ; ends up getting optimized into a select of two leas, followed by a
+  ; single load of the selected address.
+  %t13 = xor <8 x i1> %v13, %v12
+  %t23 = xor <16 x i1> %v23, %v22
+  %t33 = xor <32 x i1> %v33, %v32
+  %t43 = xor <64 x i1> %v43, %v42
+
+  %cmp = icmp ugt i32 %v1, 31
+  %t11 = select i1 %cmp, <8 x i1> %v12, <8 x i1> %t13
+  %t21 = select i1 %cmp, <16 x i1> %v22, <16 x i1> %t23
+  %t31 = select i1 %cmp, <32 x i1> %v32, <32 x i1> %t33
+  %t41 = select i1 %cmp, <64 x i1> %v42, <64 x i1> %t43
+
+  store <8 x i1> %t11, <8 x i1>* %a11, align 16
+  store <16 x i1> %t21, <16 x i1>* %a21, align 4
+  store <32 x i1> %t31, <32 x i1>* %a31, align 8
+  store <64 x i1> %t41, <64 x i1>* %a41, align 16
+
+  ret void
+}
diff --git a/test/CodeGen/X86/pseudo_cmov_lower1.ll b/test/CodeGen/X86/pseudo_cmov_lower1.ll
new file mode 100644
index 000000000000..4ce131bb8645
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-linux-gnu -mattr=+sse2 -o - | FileCheck %s 
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s 
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo1:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo1(float %p1, double %p2, double %p3) nounwind {
+entry:
+  %c1 = fcmp oge float %p1, 0.000000e+00
+  %d0 = fadd double %p2, 1.25e0
+  %d1 = fadd double %p3, 1.25e0
+  %d2 = select i1 %c1, double %d0, double %d1
+  %d3 = select i1 %c1, double %d0, double %p2
+  %d4 = select i1 %c1, double %p3, double %d1
+  %d5 = fsub double %d2, %d3
+  %d6 = fadd double %d5, %d4
+  ret double %d6
+}
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.
+; CHECK-LABEL: foo2:
+; CHECK: jae
+; CHECK-NOT: jae
+define float @foo2(float %p1, float %p2, float %p3) nounwind {
+entry:
+  %c1 = fcmp oge float %p1, 0.000000e+00
+  %d0 = fadd float %p2, 1.25e0
+  %d1 = fadd float %p3, 1.25e0
+  %d2 = select i1 %c1, float %d0, float %d1
+  %d3 = select i1 %c1, float %d1, float %p2
+  %d4 = select i1 %c1, float %d0, float %p3
+  %d5 = fsub float %d2, %d3
+  %d6 = fadd float %d5, %d4
+  ret float %d6
+}
+
diff --git a/test/CodeGen/X86/pseudo_cmov_lower2.ll b/test/CodeGen/X86/pseudo_cmov_lower2.ll
new file mode 100644
index 000000000000..0133963b36d0
--- /dev/null
+++ b/test/CodeGen/X86/pseudo_cmov_lower2.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -o - | FileCheck %s 
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.  The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.
+;
+; CHECK-LABEL: foo1:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo1(float %p1, double %p2, double %p3) nounwind {
+entry:
+  %c1 = fcmp oge float %p1, 0.000000e+00
+  %d0 = fadd double %p2, 1.25e0
+  %d1 = fadd double %p3, 1.25e0
+  %d2 = select i1 %c1, double %d0, double %d1
+  %d3 = select i1 %c1, double %d2, double %p2
+  %d4 = select i1 %c1, double %d3, double %p3
+  %d5 = fsub double %d2, %d3
+  %d6 = fadd double %d5, %d4
+  ret double %d6
+}
+
+; This test checks that only a single jae gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.  The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.
+;
+; CHECK-LABEL: foo2:
+; CHECK: jae
+; CHECK-NOT: jae
+define double @foo2(float %p1, double %p2, double %p3) nounwind {
+entry:
+  %c1 = fcmp oge float %p1, 0.000000e+00
+  %d0 = fadd double %p2, 1.25e0
+  %d1 = fadd double %p3, 1.25e0
+  %d2 = select i1 %c1, double %d0, double %d1
+  %d3 = select i1 %c1, double %p2, double %d2
+  %d4 = select i1 %c1, double %p3, double %d3
+  %d5 = fsub double %d2, %d3
+  %d6 = fadd double %d5, %d4
+  ret double %d6
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.  The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.  It also tests to make sure all
+; the operands of the resulting instructions are from the proper places.
+;
+; CHECK-LABEL: foo3:
+; CHECK:          js
+; CHECK-NOT: js
+; CHECK-LABEL: # BB#1:
+; CHECK-DAG:      movapd  %xmm2, %xmm1
+; CHECK-DAG:      movapd  %xmm2, %xmm0
+; CHECK-LABEL:.LBB2_2:
+; CHECK:          divsd   %xmm1, %xmm0
+; CHECK:          ret
+define double @foo3(i32 %p1, double %p2, double %p3,
+                             double %p4, double %p5) nounwind {
+entry:
+  %c1 = icmp slt i32 %p1, 0
+  %d2 = select i1 %c1, double %p2, double %p3
+  %d3 = select i1 %c1, double %p3, double %p4
+  %d4 = select i1 %c1, double %d2, double %d3
+  %d5 = fdiv double %d4, %d3
+  ret double %d5
+}
+
+; This test checks that only a single js gets generated in the final code
+; for lowering the CMOV pseudos that get created for this IR.  The tricky part
+; of this test is that it tests the special PHI operand rewriting code in
+; X86TargetLowering::EmitLoweredSelect.  It also tests to make sure all
+; the operands of the resulting instructions are from the proper places
+; when the "opposite condition" handling code in the compiler is used.
+; This should be the same code as foo3 above, because we use the opposite
+; condition code in the second two selects, but we also swap the operands
+; of the selects to give the same actual computation.
+;
+; CHECK-LABEL: foo4:
+; CHECK:          js
+; CHECK-NOT: js
+; CHECK-LABEL: # BB#1:
+; CHECK-DAG:      movapd  %xmm2, %xmm1
+; CHECK-DAG:      movapd  %xmm2, %xmm0
+; CHECK-LABEL:.LBB3_2:
+; CHECK:          divsd   %xmm1, %xmm0
+; CHECK:          ret
+define double @foo4(i32 %p1, double %p2, double %p3,
+                             double %p4, double %p5) nounwind {
+entry:
+  %c1 = icmp slt i32 %p1, 0
+  %d2 = select i1 %c1, double %p2, double %p3
+  %c2 = icmp sge i32 %p1, 0
+  %d3 = select i1 %c2, double %p4, double %p3
+  %d4 = select i1 %c2, double %d3, double %d2
+  %d5 = fdiv double %d4, %d3
+  ret double %d5
+}
diff --git a/test/CodeGen/X86/psubus.ll b/test/CodeGen/X86/psubus.ll
index 4b83b55997e2..c6d118d6da69 100644
--- a/test/CodeGen/X86/psubus.ll
+++ b/test/CodeGen/X86/psubus.ll
@@ -1,11 +1,22 @@
-; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSSE3
-; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
-; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
 
 define void @test1(i16* nocapture %head) nounwind {
+; SSE-LABEL: test1:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1:
+; AVX:       ## BB#0: ## %vector.ph
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <8 x i16>*
@@ -15,30 +26,22 @@ vector.ph:
   %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
   store <8 x i16> %5, <8 x i16>* %1, align 2
   ret void
-
-; SSSE3: @test1
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusw LCPI0_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test1
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test1
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test2(i16* nocapture %head) nounwind {
+; SSE-LABEL: test2:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    psubusw {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2:
+; AVX:       ## BB#0: ## %vector.ph
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vpsubusw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <8 x i16>*
@@ -48,30 +51,46 @@ vector.ph:
   %5 = select <8 x i1> %3, <8 x i16> %4, <8 x i16> zeroinitializer
   store <8 x i16> %5, <8 x i16>* %1, align 2
   ret void
-
-; SSSE3: @test2
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusw LCPI1_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test2
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test2
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusw LCPI1_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test3(i16* nocapture %head, i16 zeroext %w) nounwind {
+; SSE2-LABEL: test3:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    movdqu (%rdi), %xmm1
+; SSE2-NEXT:    psubusw %xmm0, %xmm1
+; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test3:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    movd %esi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    movdqu (%rdi), %xmm1
+; SSSE3-NEXT:    psubusw %xmm0, %xmm1
+; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    retq
+;
+; AVX1-LABEL: test3:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovd %esi, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
+; AVX1-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test3:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovd %esi, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
+; AVX2-NEXT:    vpsubusw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <8 x i16> undef, i16 %w, i32 0
   %broadcast15 = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer
@@ -83,36 +102,22 @@ vector.ph:
   %6 = select <8 x i1> %4, <8 x i16> zeroinitializer, <8 x i16> %5
   store <8 x i16> %6, <8 x i16>* %2, align 2
   ret void
-
-; SSSE3: @test3
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; SSSE3-NEXT: movdqu (%rdi), %xmm1
-; SSSE3-NEXT: psubusw %xmm0, %xmm1
-; SSSE3-NEXT: movdqu %xmm1, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test3
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovd %esi, %xmm0
-; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
-; AVX1-NEXT: vmovdqu (%rdi), %xmm1
-; AVX1-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test3
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu (%rdi), %xmm1
-; AVX2-NEXT: vpsubusw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test4(i8* nocapture %head) nounwind {
+; SSE-LABEL: test4:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4:
+; AVX:       ## BB#0: ## %vector.ph
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <16 x i8>*
@@ -122,30 +127,22 @@ vector.ph:
   %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
   store <16 x i8> %5, <16 x i8>* %1, align 1
   ret void
-
-; SSSE3: @test4
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusb LCPI3_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test4
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test4
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusb LCPI3_0(%rip), %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test5(i8* nocapture %head) nounwind {
+; SSE-LABEL: test5:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    psubusb {{.*}}(%rip), %xmm0
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test5:
+; AVX:       ## BB#0: ## %vector.ph
+; AVX-NEXT:    vmovdqu (%rdi), %xmm0
+; AVX-NEXT:    vpsubusb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <16 x i8>*
@@ -155,30 +152,49 @@ vector.ph:
   %5 = select <16 x i1> %3, <16 x i8> %4, <16 x i8> zeroinitializer
   store <16 x i8> %5, <16 x i8>* %1, align 1
   ret void
-
-; SSSE3: @test5
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movdqu (%rdi), %xmm0
-; SSSE3-NEXT: psubusb LCPI4_0(%rip), %xmm0
-; SSSE3-NEXT: movdqu %xmm0, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test5
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovdqu (%rdi), %xmm0
-; AVX1-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test5
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %xmm0
-; AVX2-NEXT: vpsubusb LCPI4_0(%rip), %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test6(i8* nocapture %head, i8 zeroext %w) nounwind {
+; SSE2-LABEL: test6:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    movdqu (%rdi), %xmm1
+; SSE2-NEXT:    psubusb %xmm0, %xmm1
+; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test6:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    movd %esi, %xmm0
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    movdqu (%rdi), %xmm1
+; SSSE3-NEXT:    psubusb %xmm0, %xmm1
+; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    retq
+;
+; AVX1-LABEL: test6:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovd %esi, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu (%rdi), %xmm1
+; AVX1-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test6:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovd %esi, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqu (%rdi), %xmm1
+; AVX2-NEXT:    vpsubusb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vmovdqu %xmm0, (%rdi)
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <16 x i8> undef, i8 %w, i32 0
   %broadcast15 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer
@@ -190,38 +206,41 @@ vector.ph:
   %6 = select <16 x i1> %4, <16 x i8> zeroinitializer, <16 x i8> %5
   store <16 x i8> %6, <16 x i8>* %2, align 1
   ret void
-
-; SSSE3: @test6
-; SSSE3:      # BB#0:
-; SSSE3-NEXT: movd %esi, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pshufb %xmm1, %xmm0
-; SSSE3-NEXT: movdqu (%rdi), %xmm1
-; SSSE3-NEXT: psubusb %xmm0, %xmm1
-; SSSE3-NEXT: movdqu %xmm1, (%rdi)
-; SSSE3-NEXT: retq
-
-; AVX1: @test6
-; AVX1:      # BB#0:
-; AVX1-NEXT: vmovd %esi, %xmm0
-; AVX1-NEXT: vpxor %xmm1, %xmm1
-; AVX1-NEXT: vpshufb %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu (%rdi), %xmm1
-; AVX1-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX1-NEXT: retq
-
-; AVX2: @test6
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
-; AVX2-NEXT: vmovdqu (%rdi), %xmm1
-; AVX2-NEXT: vpsubusb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vmovdqu %xmm0, (%rdi)
-; AVX2-NEXT: retq
 }
 
 define void @test7(i16* nocapture %head) nounwind {
+; SSE-LABEL: test7:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    movdqu 16(%rdi), %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    psubusw %xmm2, %xmm0
+; SSE-NEXT:    psubusw %xmm2, %xmm1
+; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test7:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpgtw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test7:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <16 x i16>*
@@ -231,17 +250,47 @@ vector.ph:
   %5 = select <16 x i1> %3, <16 x i16> %4, <16 x i16> zeroinitializer
   store <16 x i16> %5, <16 x i16>* %1, align 2
   ret void
-
-; AVX2: @test7
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusw LCPI6_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
 
 define void @test8(i16* nocapture %head) nounwind {
+; SSE-LABEL: test8:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    movdqu 16(%rdi), %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32767,32767,32767,32767,32767,32767,32767,32767]
+; SSE-NEXT:    psubusw %xmm2, %xmm0
+; SSE-NEXT:    psubusw %xmm2, %xmm1
+; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test8:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [65534,65534,65534,65534,65534,65534,65534,65534]
+; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32769,32769,32769,32769,32769,32769,32769,32769]
+; AVX1-NEXT:    vpaddw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test8:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; AVX2-NEXT:    vpsubusw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i16, i16* %head, i64 0
   %1 = bitcast i16* %0 to <16 x i16>*
@@ -252,16 +301,63 @@ vector.ph:
   store <16 x i16> %5, <16 x i16>* %1, align 2
   ret void
 
-; AVX2: @test8
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusw LCPI7_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
 
 define void @test9(i16* nocapture %head, i16 zeroext %w) nounwind {
+; SSE2-LABEL: test9:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    movdqu (%rdi), %xmm1
+; SSE2-NEXT:    movdqu 16(%rdi), %xmm2
+; SSE2-NEXT:    psubusw %xmm0, %xmm1
+; SSE2-NEXT:    psubusw %xmm0, %xmm2
+; SSE2-NEXT:    movdqu %xmm2, 16(%rdi)
+; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test9:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    movd %esi, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    movdqu (%rdi), %xmm1
+; SSSE3-NEXT:    movdqu 16(%rdi), %xmm2
+; SSSE3-NEXT:    psubusw %xmm0, %xmm1
+; SSSE3-NEXT:    psubusw %xmm0, %xmm2
+; SSSE3-NEXT:    movdqu %xmm2, 16(%rdi)
+; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    retq
+;
+; AVX1-LABEL: test9:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovd %esi, %xmm1
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm3
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm2, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test9:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovd %esi, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
+; AVX2-NEXT:    vpsubusw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <16 x i16> undef, i16 %w, i32 0
   %broadcast15 = shufflevector <16 x i16> %0, <16 x i16> undef, <16 x i32> zeroinitializer
@@ -273,19 +369,41 @@ vector.ph:
   %6 = select <16 x i1> %4, <16 x i16> zeroinitializer, <16 x i16> %5
   store <16 x i16> %6, <16 x i16>* %2, align 2
   ret void
-
-; AVX2: @test9
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT: vmovdqu (%rdi), %ymm1
-; AVX2-NEXT: vpsubusw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
 
 define void @test10(i8* nocapture %head) nounwind {
+; SSE-LABEL: test10:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    movdqu 16(%rdi), %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT:    psubusb %xmm2, %xmm0
+; SSE-NEXT:    psubusb %xmm2, %xmm1
+; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test10:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpcmpgtb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test10:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <32 x i8>*
@@ -296,16 +414,47 @@ vector.ph:
   store <32 x i8> %5, <32 x i8>* %1, align 1
   ret void
 
-; AVX2: @test10
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusb LCPI9_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
 
 define void @test11(i8* nocapture %head) nounwind {
+; SSE-LABEL: test11:
+; SSE:       ## BB#0: ## %vector.ph
+; SSE-NEXT:    movdqu (%rdi), %xmm0
+; SSE-NEXT:    movdqu 16(%rdi), %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT:    psubusb %xmm2, %xmm0
+; SSE-NEXT:    psubusb %xmm2, %xmm1
+; SSE-NEXT:    movdqu %xmm1, 16(%rdi)
+; SSE-NEXT:    movdqu %xmm0, (%rdi)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test11:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX1-NEXT:    vxorps %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [254,254,254,254,254,254,254,254,254,254,254,254,254,254,254,254]
+; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vxorps %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [129,129,129,129,129,129,129,129,129,129,129,129,129,129,129,129]
+; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test11:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm0
+; AVX2-NEXT:    vpsubusb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = getelementptr inbounds i8, i8* %head, i64 0
   %1 = bitcast i8* %0 to <32 x i8>*
@@ -315,17 +464,66 @@ vector.ph:
   %5 = select <32 x i1> %3, <32 x i8> %4, <32 x i8> zeroinitializer
   store <32 x i8> %5, <32 x i8>* %1, align 1
   ret void
-
-; AVX2: @test11
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovdqu (%rdi), %ymm0
-; AVX2-NEXT: vpsubusb LCPI10_0(%rip), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
 
 define void @test12(i8* nocapture %head, i8 zeroext %w) nounwind {
+; SSE2-LABEL: test12:
+; SSE2:       ## BB#0: ## %vector.ph
+; SSE2-NEXT:    movd %esi, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    movdqu (%rdi), %xmm1
+; SSE2-NEXT:    movdqu 16(%rdi), %xmm2
+; SSE2-NEXT:    psubusb %xmm0, %xmm1
+; SSE2-NEXT:    psubusb %xmm0, %xmm2
+; SSE2-NEXT:    movdqu %xmm2, 16(%rdi)
+; SSE2-NEXT:    movdqu %xmm1, (%rdi)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: test12:
+; SSSE3:       ## BB#0: ## %vector.ph
+; SSSE3-NEXT:    movd %esi, %xmm0
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    movdqu (%rdi), %xmm1
+; SSSE3-NEXT:    movdqu 16(%rdi), %xmm2
+; SSSE3-NEXT:    psubusb %xmm0, %xmm1
+; SSSE3-NEXT:    psubusb %xmm0, %xmm2
+; SSSE3-NEXT:    movdqu %xmm2, 16(%rdi)
+; SSSE3-NEXT:    movdqu %xmm1, (%rdi)
+; SSSE3-NEXT:    retq
+;
+; AVX1-LABEL: test12:
+; AVX1:       ## BB#0: ## %vector.ph
+; AVX1-NEXT:    vmovups (%rdi), %ymm0
+; AVX1-NEXT:    vmovd %esi, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm3
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm2, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps %ymm3, %ymm0, %ymm0
+; AVX1-NEXT:    vmovups %ymm0, (%rdi)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test12:
+; AVX2:       ## BB#0: ## %vector.ph
+; AVX2-NEXT:    vmovd %esi, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vmovdqu (%rdi), %ymm1
+; AVX2-NEXT:    vpsubusb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vmovdqu %ymm0, (%rdi)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
 vector.ph:
   %0 = insertelement <32 x i8> undef, i8 %w, i32 0
   %broadcast15 = shufflevector <32 x i8> %0, <32 x i8> undef, <32 x i32> zeroinitializer
@@ -337,14 +535,4 @@ vector.ph:
   %6 = select <32 x i1> %4, <32 x i8> zeroinitializer, <32 x i8> %5
   store <32 x i8> %6, <32 x i8>* %2, align 1
   ret void
-
-; AVX2: @test12
-; AVX2:      # BB#0:
-; AVX2-NEXT: vmovd %esi, %xmm0
-; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
-; AVX2-NEXT: vmovdqu (%rdi), %ymm1
-; AVX2-NEXT: vpsubusb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rdi)
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
 }
diff --git a/test/CodeGen/X86/push-cfi-debug.ll b/test/CodeGen/X86/push-cfi-debug.ll
new file mode 100644
index 000000000000..cc00fab525ab
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-debug.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+
+; Function Attrs: optsize
+declare void @foo(i32, i32) #0
+declare x86_stdcallcc void @stdfoo(i32, i32) #0
+
+; CHECK-LABEL: test1:
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $2
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $1
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll foo
+; CHECK: addl $16, %esp
+; CHECK: .cfi_adjust_cfa_offset -16
+; CHECK: subl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset 8
+; CHECK: pushl $4
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: pushl $3
+; CHECK: .cfi_adjust_cfa_offset 4
+; CHECK: calll stdfoo
+; CHECK: .cfi_adjust_cfa_offset -8
+; CHECK: addl $8, %esp
+; CHECK: .cfi_adjust_cfa_offset -8
+define void @test1() #0 !dbg !4 {
+entry:
+  tail call void @foo(i32 1, i32 2) #1, !dbg !10
+  tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11
+  ret void, !dbg !12
+}
+
+attributes #0 = { nounwind optsize }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "foo")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250289)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 5, column: 3, scope: !4)
+!12 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/CodeGen/X86/push-cfi-obj.ll b/test/CodeGen/X86/push-cfi-obj.ll
new file mode 100644
index 000000000000..33291ec3318a
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi-obj.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s
+
+; On darwin, check that we manage to generate the compact unwind section
+; DARWIN: Name: __compact_unwind
+; DARWIN: Segment: __LD
+
+; LINUX:         Name: .eh_frame
+; LINUX-NEXT:    Type: SHT_PROGBITS (0x1)
+; LINUX-NEXT:    Flags [ (0x2)
+; LINUX-NEXT:      SHF_ALLOC (0x2)
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    Address: 0x0
+; LINUX-NEXT:    Offset: 0x68
+; LINUX-NEXT:    Size: 64
+; LINUX-NEXT:    Link: 0
+; LINUX-NEXT:    Info: 0
+; LINUX-NEXT:    AddressAlignment: 4
+; LINUX-NEXT:    EntrySize: 0
+; LINUX-NEXT:    Relocations [
+; LINUX-NEXT:    ]
+; LINUX-NEXT:    SectionData (
+; LINUX-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
+; LINUX-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
+; LINUX-NEXT:      0020: 1C000000 24000000 00000000 1D000000  |....$...........|
+; LINUX-NEXT:      0030: 04000000 00410E08 8502420D 05432E10  |.....A....B..C..|
+; LINUX-NEXT:    )
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+
+define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+attributes #0 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/push-cfi.ll b/test/CodeGen/X86/push-cfi.ll
new file mode 100644
index 000000000000..6389708f42cc
--- /dev/null
+++ b/test/CodeGen/X86/push-cfi.ll
@@ -0,0 +1,304 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
+declare void @empty()
+
+; When we use an invoke, we expect a .cfi_escape GNU_ARGS_SIZE
+; with size 16 before the invocation. Without FP, we also expect
+; .cfi_adjust_cfa_offset after each push.
+; Darwin should not generate pushes in either circumstance.
+; CHECK-LABEL: test1_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test1_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; DARWIN: pushl %ebp
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
+; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset,
+; so darwin should not generate pushes.
+; CHECK-LABEL: test2_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; DARWIN-NOT: .cfi_escape
+; DARWIN-NOT: pushl
+define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test2_fp:
+; CHECK-NOT: .cfi_escape
+; CHECK-NOT: .cfi_adjust_cfa_offset
+; CHECK: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $24, %esp
+define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_nofp:
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @empty()
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or
+; cfi_adjust_cfa_offset.
+; CHECK-LABEL: test3_fp:
+; LINUX: pushl %ebp
+; LINUX-NOT: .cfi_escape
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX-NOT: pushl
+; LINUX: retl
+define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @empty()
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; Different sized stacks need different GNU_ARGS_SIZEs
+; CHECK-LABEL: test4:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x20
+; LINUX: subl    $8, %esp
+; LINUX-NEXT: pushl   $11
+; LINUX-NEXT: pushl   $10
+; LINUX-NEXT: pushl   $9
+; LINUX-NEXT: pushl   $8
+; LINUX-NEXT: pushl   $7
+; LINUX-NEXT: pushl   $6
+; LINUX-NEXT: calll   large
+; LINUX-NEXT: addl $32, %esp
+define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @large(i32 6, i32 7, i32 8, i32 9, i32 10, i32 11)
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
+; without parameters, but don't need to adjust the cfa offset
+; CHECK-LABEL: test5_nofp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @empty()
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; CHECK-LABEL: test5_fp:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: call
+; LINUX-NEXT: addl $16, %esp
+; LINUX: .cfi_escape 0x2e, 0x00
+; LINUX-NOT: .cfi_adjust_cfa_offset
+; LINUX: call
+define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @empty()
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; CHECK-LABEL: test6:
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+; LINUX: .cfi_escape 0x2e, 0x10
+; LINUX: call
+define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @good(i32 5, i32 6, i32 7, i32 8)
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; Darwin should generate pushes in the presense of FP and an unwind table,
+; but not FP and invoke.
+; CHECK-LABEL: test7:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN: .cfi_def_cfa_register %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN: pushl   $4
+; DARWIN-NEXT: pushl   $3
+; DARWIN-NEXT: pushl   $2
+; DARWIN-NEXT: pushl   $1
+; DARWIN-NEXT: call
+define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; CHECK-LABEL: test8:
+; DARWIN: pushl %ebp
+; DARWIN: movl %esp, %ebp
+; DARWIN-NOT: .cfi_adjust_cfa_offset
+; DARWIN-NOT: pushl
+define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+attributes #0 = { optsize }
+attributes #1 = { optsize "no-frame-pointer-elim"="true" }
diff --git a/test/CodeGen/X86/ragreedy-hoist-spill.ll b/test/CodeGen/X86/ragreedy-hoist-spill.ll
index e7dda5349568..46b65bd24fc0 100644
--- a/test/CodeGen/X86/ragreedy-hoist-spill.ll
+++ b/test/CodeGen/X86/ragreedy-hoist-spill.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -regalloc=greedy | FileCheck %s
 
-; This testing case is reduced from 254.gap SyFgets funciton.
+; This testing case is reduced from 254.gap SyFgets function.
 ; We make sure a spill is not hoisted to a hotter outer loop.
 
 %struct.TMP.1 = type { %struct.TMP.2*, %struct.TMP.2*, [1024 x i8] }
diff --git a/test/CodeGen/X86/rem_crash.ll b/test/CodeGen/X86/rem_crash.ll
new file mode 100644
index 000000000000..8363b22ab65f
--- /dev/null
+++ b/test/CodeGen/X86/rem_crash.ll
@@ -0,0 +1,257 @@
+; RUN: llc < %s
+
+define i8 @test_minsize_uu8(i8 %x) minsize optsize {
+entry:
+  %0 = udiv i8 %x, 10
+  %1 = urem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_minsize_ss8(i8 %x) minsize optsize {
+entry:
+  %0 = sdiv i8 %x, 10
+  %1 = srem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_minsize_us8(i8 %x) minsize optsize {
+entry:
+  %0 = udiv i8 %x, 10
+  %1 = srem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_minsize_su8(i8 %x) minsize optsize {
+entry:
+  %0 = sdiv i8 %x, 10
+  %1 = urem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i16 @test_minsize_uu16(i16 %x) minsize optsize {
+entry:
+  %0 = udiv i16 %x, 10
+  %1 = urem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_minsize_ss16(i16 %x) minsize optsize {
+entry:
+  %0 = sdiv i16 %x, 10
+  %1 = srem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_minsize_us16(i16 %x) minsize optsize {
+entry:
+  %0 = udiv i16 %x, 10
+  %1 = srem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_minsize_su16(i16 %x) minsize optsize {
+entry:
+  %0 = sdiv i16 %x, 10
+  %1 = urem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i32 @test_minsize_uu32(i32 %x) minsize optsize {
+entry:
+  %0 = udiv i32 %x, 10
+  %1 = urem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_minsize_ss32(i32 %x) minsize optsize {
+entry:
+  %0 = sdiv i32 %x, 10
+  %1 = srem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_minsize_us32(i32 %x) minsize optsize {
+entry:
+  %0 = udiv i32 %x, 10
+  %1 = srem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_minsize_su32(i32 %x) minsize optsize {
+entry:
+  %0 = sdiv i32 %x, 10
+  %1 = urem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i64 @test_minsize_uu64(i64 %x) minsize optsize {
+entry:
+  %0 = udiv i64 %x, 10
+  %1 = urem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_minsize_ss64(i64 %x) minsize optsize {
+entry:
+  %0 = sdiv i64 %x, 10
+  %1 = srem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_minsize_us64(i64 %x) minsize optsize {
+entry:
+  %0 = udiv i64 %x, 10
+  %1 = srem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_minsize_su64(i64 %x) minsize optsize {
+entry:
+  %0 = sdiv i64 %x, 10
+  %1 = urem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i8 @test_uu8(i8 %x) optsize {
+entry:
+  %0 = udiv i8 %x, 10
+  %1 = urem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_ss8(i8 %x) optsize {
+entry:
+  %0 = sdiv i8 %x, 10
+  %1 = srem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_us8(i8 %x) optsize {
+entry:
+  %0 = udiv i8 %x, 10
+  %1 = srem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i8 @test_su8(i8 %x) optsize {
+entry:
+  %0 = sdiv i8 %x, 10
+  %1 = urem i8 %x, 10
+  %res = add i8 %0, %1
+  ret i8 %res
+}
+
+define i16 @test_uu16(i16 %x) optsize {
+entry:
+  %0 = udiv i16 %x, 10
+  %1 = urem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_ss16(i16 %x) optsize {
+entry:
+  %0 = sdiv i16 %x, 10
+  %1 = srem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_us16(i16 %x) optsize {
+entry:
+  %0 = udiv i16 %x, 10
+  %1 = srem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i16 @test_su16(i16 %x) optsize {
+entry:
+  %0 = sdiv i16 %x, 10
+  %1 = urem i16 %x, 10
+  %res = add i16 %0, %1
+  ret i16 %res
+}
+
+define i32 @test_uu32(i32 %x) optsize {
+entry:
+  %0 = udiv i32 %x, 10
+  %1 = urem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_ss32(i32 %x) optsize {
+entry:
+  %0 = sdiv i32 %x, 10
+  %1 = srem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_us32(i32 %x) optsize {
+entry:
+  %0 = udiv i32 %x, 10
+  %1 = srem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i32 @test_su32(i32 %x) optsize {
+entry:
+  %0 = sdiv i32 %x, 10
+  %1 = urem i32 %x, 10
+  %res = add i32 %0, %1
+  ret i32 %res
+}
+
+define i64 @test_uu64(i64 %x) optsize {
+entry:
+  %0 = udiv i64 %x, 10
+  %1 = urem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_ss64(i64 %x) optsize {
+entry:
+  %0 = sdiv i64 %x, 10
+  %1 = srem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_us64(i64 %x) optsize {
+entry:
+  %0 = udiv i64 %x, 10
+  %1 = srem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
+
+define i64 @test_su64(i64 %x) optsize {
+entry:
+  %0 = sdiv i64 %x, 10
+  %1 = urem i64 %x, 10
+  %res = add i64 %0, %1
+  ret i64 %res
+}
diff --git a/test/CodeGen/X86/remat-invalid-liveness.ll b/test/CodeGen/X86/remat-invalid-liveness.ll
deleted file mode 100644
index c6b43b0dd3e4..000000000000
--- a/test/CodeGen/X86/remat-invalid-liveness.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: llc %s -mcpu=core2 -o - | FileCheck %s
-; This test was failing while tracking the liveness in the register scavenger
-; during the branching folding pass. The allocation of the subregisters was
-; incorrect.
-; I.e., the faulty pattern looked like:
-; CH = movb 64
-; ECX = movl 3 <- CH was killed here.
-; CH = subb CH, ...
-;
-; This reduced test case triggers the crash before the fix, but does not
-; strictly speaking check that the resulting code is correct.
-; To check that the code is actually correct we would need to check the
-; liveness of the produced code.
-;
-; Currently, we check that after ECX = movl 3, we do not have subb CH,
-; whereas CH could have been redefine in between and that would have been
-; totally fine.
-; <rdar://problem/16582185>
-target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
-target triple = "i386-apple-macosx10.9"
-
-%struct.A = type { %struct.B, %struct.C, %struct.D*, [1 x i8*] }
-%struct.B = type { i32, [4 x i8] }
-%struct.C = type { i128 }
-%struct.D = type { {}*, [0 x i32] }
-%union.E = type { i32 }
-
-; CHECK-LABEL: __XXX1:
-; CHECK: movl $3, %ecx
-; CHECK-NOT: subb %{{[a-z]+}}, %ch
-; Function Attrs: nounwind optsize ssp
-define fastcc void @__XXX1(%struct.A* %ht) #0 {
-entry:
-  %const72 = bitcast i128 72 to i128
-  %const3 = bitcast i128 3 to i128
-  switch i32 undef, label %if.end196 [
-    i32 1, label %sw.bb.i
-    i32 3, label %sw.bb2.i
-  ]
-
-sw.bb.i:                                          ; preds = %entry
-  %call.i.i.i = tail call i32 undef(%struct.A* %ht, i8 zeroext 22, i32 undef, i32 0, %struct.D* undef)
-  %bf.load.i.i = load i128, i128* undef, align 4
-  %bf.lshr.i.i = lshr i128 %bf.load.i.i, %const72
-  %shl1.i.i = shl nuw nsw i128 %bf.lshr.i.i, 8
-  %shl.i.i = trunc i128 %shl1.i.i to i32
-  br i1 undef, label %cond.false10.i.i, label %__XXX2.exit.i.i
-
-__XXX2.exit.i.i:                    ; preds = %sw.bb.i
-  %extract11.i.i.i = lshr i128 %bf.load.i.i, %const3
-  %extract.t12.i.i.i = trunc i128 %extract11.i.i.i to i32
-  %bf.cast7.i.i.i = and i32 %extract.t12.i.i.i, 3
-  %arrayidx.i.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 %bf.cast7.i.i.i
-  br label %cond.end12.i.i
-
-cond.false10.i.i:                                 ; preds = %sw.bb.i
-  %arrayidx.i6.i.i = getelementptr inbounds %struct.A, %struct.A* %ht, i32 0, i32 3, i32 0
-  br label %cond.end12.i.i
-
-cond.end12.i.i:                                   ; preds = %cond.false10.i.i, %__XXX2.exit.i.i
-  %.sink.in.i.i = phi i8** [ %arrayidx.i.i.i, %__XXX2.exit.i.i ], [ %arrayidx.i6.i.i, %cond.false10.i.i ]
-  %.sink.i.i = load i8*, i8** %.sink.in.i.i, align 4
-  %tmp = bitcast i8* %.sink.i.i to %union.E*
-  br i1 undef, label %for.body.i.i, label %if.end196
-
-for.body.i.i:                                     ; preds = %for.body.i.i, %cond.end12.i.i
-  %weak.i.i = getelementptr inbounds %union.E, %union.E* %tmp, i32 undef, i32 0
-  %tmp1 = load i32, i32* %weak.i.i, align 4
-  %cmp36.i.i = icmp ne i32 %tmp1, %shl.i.i
-  %or.cond = and i1 %cmp36.i.i, false
-  br i1 %or.cond, label %for.body.i.i, label %if.end196
-
-sw.bb2.i:                                         ; preds = %entry
-  %bf.lshr.i85.i = lshr i128 undef, %const72
-  br i1 undef, label %if.end196, label %__XXX2.exit.i95.i
-
-__XXX2.exit.i95.i:                  ; preds = %sw.bb2.i
-  %extract11.i.i91.i = lshr i128 undef, %const3
-  br label %if.end196
-
-if.end196:                                        ; preds = %__XXX2.exit.i95.i, %sw.bb2.i, %for.body.i.i, %cond.end12.i.i, %entry
-  ret void
-}
-
-attributes #0 = { nounwind optsize ssp "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
diff --git a/test/CodeGen/X86/rodata-relocs.ll b/test/CodeGen/X86/rodata-relocs.ll
index 9228ea1f621f..6379ef1bf737 100644
--- a/test/CodeGen/X86/rodata-relocs.ll
+++ b/test/CodeGen/X86/rodata-relocs.ll
@@ -32,15 +32,15 @@ target triple = "x86_64-unknown-linux-gnu"
 ; PIC: .section .rodata.cst16,"aM",@progbits,16
 ; PIC: e:
 ; PIC: e1:
-; PIC: .section .data.rel.ro.local,"aw",@progbits
+; PIC: .section .data.rel.ro,"aw",@progbits
 ; PIC: p:
 ; PIC: t:
-; PIC: .section .data.rel.ro,"aw",@progbits
+; PIC-NOT: .section
 ; PIC: p1:
 ; PIC: t1:
-; PIC: .section .data.rel,"aw",@progbits
+; PIC: .data
 ; PIC: p2:
 ; PIC: t2:
-; PIC: .section .data.rel.local,"aw",@progbits
+; PIC-NOT: .section
 ; PIC: p3:
 ; PIC: t3:
diff --git a/test/CodeGen/X86/rounding-ops.ll b/test/CodeGen/X86/rounding-ops.ll
index 69f4bfb9f47d..15a11d1d6a96 100644
--- a/test/CodeGen/X86/rounding-ops.ll
+++ b/test/CodeGen/X86/rounding-ops.ll
@@ -6,10 +6,10 @@ define float @test1(float %x) nounwind  {
   ret float %call
 
 ; CHECK-SSE-LABEL: test1:
-; CHECK-SSE: roundss $1
+; CHECK-SSE: roundss $9
 
 ; CHECK-AVX-LABEL: test1:
-; CHECK-AVX: vroundss $1
+; CHECK-AVX: vroundss $9
 }
 
 declare float @floorf(float) nounwind readnone
@@ -19,10 +19,10 @@ define double @test2(double %x) nounwind  {
   ret double %call
 
 ; CHECK-SSE-LABEL: test2:
-; CHECK-SSE: roundsd $1
+; CHECK-SSE: roundsd $9
 
 ; CHECK-AVX-LABEL: test2:
-; CHECK-AVX: vroundsd $1
+; CHECK-AVX: vroundsd $9
 }
 
 declare double @floor(double) nounwind readnone
@@ -58,10 +58,10 @@ define float @test5(float %x) nounwind  {
   ret float %call
 
 ; CHECK-SSE-LABEL: test5:
-; CHECK-SSE: roundss $2
+; CHECK-SSE: roundss $10
 
 ; CHECK-AVX-LABEL: test5:
-; CHECK-AVX: vroundss $2
+; CHECK-AVX: vroundss $10
 }
 
 declare float @ceilf(float) nounwind readnone
@@ -71,10 +71,10 @@ define double @test6(double %x) nounwind  {
   ret double %call
 
 ; CHECK-SSE-LABEL: test6:
-; CHECK-SSE: roundsd $2
+; CHECK-SSE: roundsd $10
 
 ; CHECK-AVX-LABEL: test6:
-; CHECK-AVX: vroundsd $2
+; CHECK-AVX: vroundsd $10
 }
 
 declare double @ceil(double) nounwind readnone
@@ -110,10 +110,10 @@ define float @test9(float %x) nounwind  {
   ret float %call
 
 ; CHECK-SSE-LABEL: test9:
-; CHECK-SSE: roundss $3
+; CHECK-SSE: roundss $11
 
 ; CHECK-AVX-LABEL: test9:
-; CHECK-AVX: vroundss $3
+; CHECK-AVX: vroundss $11
 }
 
 declare float @truncf(float) nounwind readnone
@@ -123,10 +123,10 @@ define double @test10(double %x) nounwind  {
   ret double %call
 
 ; CHECK-SSE-LABEL: test10:
-; CHECK-SSE: roundsd $3
+; CHECK-SSE: roundsd $11
 
 ; CHECK-AVX-LABEL: test10:
-; CHECK-AVX: vroundsd $3
+; CHECK-AVX: vroundsd $11
 }
 
 declare double @trunc(double) nounwind readnone
diff --git a/test/CodeGen/X86/safestack.ll b/test/CodeGen/X86/safestack.ll
new file mode 100644
index 000000000000..1ff9a050aefb
--- /dev/null
+++ b/test/CodeGen/X86/safestack.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mtriple=i386-linux < %s -o - | FileCheck --check-prefix=LINUX-I386 %s
+; RUN: llc -mtriple=x86_64-linux < %s -o - | FileCheck --check-prefix=LINUX-X64 %s
+; RUN: llc -mtriple=i386-linux-android < %s -o - | FileCheck --check-prefix=ANDROID-I386 %s
+; RUN: llc -mtriple=x86_64-linux-android < %s -o - | FileCheck --check-prefix=ANDROID-X64 %s
+
+define void @_Z1fv() safestack {
+entry:
+  %x = alloca i32, align 4
+  %0 = bitcast i32* %x to i8*
+  call void @_Z7CapturePi(i32* nonnull %x)
+  ret void
+}
+
+declare void @_Z7CapturePi(i32*)
+
+; LINUX-X64: movq __safestack_unsafe_stack_ptr@GOTTPOFF(%rip), %[[A:.*]]
+; LINUX-X64: movq %fs:(%[[A]]), %[[B:.*]]
+; LINUX-X64: leaq -16(%[[B]]), %[[C:.*]]
+; LINUX-X64: movq %[[C]], %fs:(%[[A]])
+
+; LINUX-I386: movl __safestack_unsafe_stack_ptr@INDNTPOFF, %[[A:.*]]
+; LINUX-I386: movl %gs:(%[[A]]), %[[B:.*]]
+; LINUX-I386: leal -16(%[[B]]), %[[C:.*]]
+; LINUX-I386: movl %[[C]], %gs:(%[[A]])
+
+; ANDROID-I386: movl %gs:36, %[[A:.*]]
+; ANDROID-I386: leal -16(%[[A]]), %[[B:.*]]
+; ANDROID-I386: movl %[[B]], %gs:36
+
+; ANDROID-X64: movq %fs:72, %[[A:.*]]
+; ANDROID-X64: leaq -16(%[[A]]), %[[B:.*]]
+; ANDROID-X64: movq %[[B]], %fs:72
diff --git a/test/CodeGen/X86/sar_fold.ll b/test/CodeGen/X86/sar_fold.ll
new file mode 100644
index 000000000000..bd0d0c7057d3
--- /dev/null
+++ b/test/CodeGen/X86/sar_fold.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s
+
+define i32 @shl16sar15(i32 %a) #0 {
+; CHECK-LABEL: shl16sar15:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+  %1 = shl i32 %a, 16
+  %2 = ashr exact i32 %1, 15
+  ret i32 %2
+}
+
+define i32 @shl16sar17(i32 %a) #0 {
+; CHECK-LABEL: shl16sar17:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+  %1 = shl i32 %a, 16
+  %2 = ashr exact i32 %1, 17
+  ret i32 %2
+}
+
+define i32 @shl24sar23(i32 %a) #0 {
+; CHECK-LABEL: shl24sar23:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+  %1 = shl i32 %a, 24
+  %2 = ashr exact i32 %1, 23
+  ret i32 %2
+}
+
+define i32 @shl24sar25(i32 %a) #0 {
+; CHECK-LABEL: shl24sar25:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+  %1 = shl i32 %a, 24
+  %2 = ashr exact i32 %1, 25
+  ret i32 %2
+}
diff --git a/test/CodeGen/X86/sar_fold64.ll b/test/CodeGen/X86/sar_fold64.ll
new file mode 100644
index 000000000000..7b33bb8c0616
--- /dev/null
+++ b/test/CodeGen/X86/sar_fold64.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+define i32 @shl48sar47(i64 %a) #0 {
+; CHECK-LABEL: shl48sar47:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movswq %di, %rax
+  %1 = shl i64 %a, 48
+  %2 = ashr exact i64 %1, 47
+  %3 = trunc i64 %2 to i32
+  ret i32 %3
+}
+
+define i32 @shl48sar49(i64 %a) #0 {
+; CHECK-LABEL: shl48sar49:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movswq %di, %rax
+  %1 = shl i64 %a, 48
+  %2 = ashr exact i64 %1, 49
+  %3 = trunc i64 %2 to i32
+  ret i32 %3
+}
+
+define i32 @shl56sar55(i64 %a) #0 {
+; CHECK-LABEL: shl56sar55:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsbq %dil, %rax
+  %1 = shl i64 %a, 56
+  %2 = ashr exact i64 %1, 55
+  %3 = trunc i64 %2 to i32
+  ret i32 %3
+}
+
+define i32 @shl56sar57(i64 %a) #0 {
+; CHECK-LABEL: shl56sar57:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movsbq %dil, %rax
+  %1 = shl i64 %a, 56
+  %2 = ashr exact i64 %1, 57
+  %3 = trunc i64 %2 to i32
+  ret i32 %3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll
new file mode 100644
index 000000000000..d112d2340bdb
--- /dev/null
+++ b/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -0,0 +1,151 @@
+; Check that scalar FP conversions to signed and unsigned int64 are using
+; reasonable sequences, across platforms and target switches.
+;
+; The signed case is straight forward, and the tests here basically
+; ensure successful compilation (f80 with avx512 was broken at one point).
+;
+; For the unsigned case there are many possible sequences, so to avoid
+; a fragile test we just check for the presence of a few key instructions.
+; AVX512 on Intel64 can use vcvtts[ds]2usi directly for float and double.
+; Otherwise the sequence will involve an FP subtract (fsub, subss or subsd),
+; and a truncating conversion (cvtts[ds]2si, fisttp, or fnstcw+fist).  When
+; both a subtract and fnstcw are needed, they can occur in either order.
+;
+; The interesting subtargets are AVX512F (vcvtts[ds]2usi), SSE3 (fisttp),
+; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only).
+;
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE3_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc   -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=i386-pc-windows-msvc     -mattr=-sse  | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu   -mattr=-sse  | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+
+; CHECK-LABEL: f_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subss|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subss
+; SSE2_64: cvttss2si
+; SSE3_32: {{subss|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subss
+; SSE3_64: cvttss2si
+; AVX512_32: {{subss|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttss2usi
+; CHECK: ret
+define i64 @f_to_u64(float %a) nounwind {
+  %r = fptoui float %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: f_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttss2si
+; SSE3_32: fistt
+; SSE3_64: cvttss2si
+; AVX512_32: fistt
+; AVX512_64: vcvttss2si
+; CHECK: ret
+define i64 @f_to_s64(float %a) nounwind {
+  %r = fptosi float %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: d_to_u64
+; X87-DAG: fsub
+; X87-DAG: fnstcw
+; X87: fist
+; SSE2_32-DAG: {{subsd|fsub}}
+; SSE2_32-DAG: fnstcw
+; SSE2_32: fist
+; SSE2_64: subsd
+; SSE2_64: cvttsd2si
+; SSE3_32: {{subsd|fsub}}
+; SSE3_32: fistt
+; SSE3_64: subsd
+; SSE3_64: cvttsd2si
+; AVX512_32: {{subsd|fsub}}
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2usi
+; CHECK: ret
+define i64 @d_to_u64(double %a) nounwind {
+  %r = fptoui double %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: d_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: cvttsd2si
+; SSE3_32: fistt
+; SSE3_64: cvttsd2si
+; AVX512_32: fistt
+; AVX512_64: vcvttsd2si
+; CHECK: ret
+define i64 @d_to_s64(double %a) nounwind {
+  %r = fptosi double %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: x_to_u64
+; CHECK-DAG: fsub
+; X87-DAG: fnstcw
+; SSE2_32-DAG: fnstcw
+; SSE2_64-DAG: fnstcw
+; CHECK: fist
+; CHECK: ret
+define i64 @x_to_u64(x86_fp80 %a) nounwind {
+  %r = fptoui x86_fp80 %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: x_to_s64
+; X87: fnstcw
+; X87: fist
+; SSE2_32: fnstcw
+; SSE2_32: fist
+; SSE2_64: fnstcw
+; SSE2_64: fist
+; SSE3_32: fistt
+; SSE3_64: fistt
+; AVX512_32: fistt
+; AVX512_64: fistt
+; CHECK: ret
+define i64 @x_to_s64(x86_fp80 %a) nounwind {
+  %r = fptosi x86_fp80 %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: t_to_u64
+; CHECK: __fixunstfdi
+; CHECK: ret
+define i64 @t_to_u64(fp128 %a) nounwind {
+  %r = fptoui fp128 %a to i64
+  ret i64 %r
+}
+
+; CHECK-LABEL: t_to_s64
+; CHECK: __fixtfdi
+; CHECK: ret
+define i64 @t_to_s64(fp128 %a) nounwind {
+  %r = fptosi fp128 %a to i64
+  ret i64 %r
+}
diff --git a/test/CodeGen/X86/scalar-int-to-fp.ll b/test/CodeGen/X86/scalar-int-to-fp.ll
new file mode 100644
index 000000000000..93039859cdfb
--- /dev/null
+++ b/test/CodeGen/X86/scalar-int-to-fp.ll
@@ -0,0 +1,132 @@
+; Verify that scalar integer conversions to FP compile successfully
+; (at one time long double failed with avx512f), and that reasonable
+; instruction sequences are selected based on subtarget features.
+; Due to the plethora of reasonable sequences we just check for
+; one key instruction, usually a cvt or fild, allowing the test
+; to be relatively easily updated when sequences are improved.
+;
+; RUN: llc < %s -mtriple=i386-unknown-unknown     -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown   -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512_64
+; RUN: llc < %s -mtriple=i386-unknown-unknown     -mattr=+sse2    | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown   -mattr=+sse2    | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2_64
+; RUN: llc < %s -mtriple=i386-unknown-unknown     -mattr=-sse     | FileCheck %s --check-prefix=CHECK --check-prefix=X87
+
+; CHECK-LABEL: u32_to_f
+; AVX512_32: vcvtusi2ssl
+; AVX512_64: vcvtusi2ssl
+; SSE2_32: cvtsd2ss
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @u32_to_f(i32 %a) nounwind {
+  %r = uitofp i32 %a to float
+  ret float %r
+}
+
+; CHECK-LABEL: s32_to_f
+; AVX512_32: vcvtsi2ssl
+; AVX512_64: vcvtsi2ssl
+; SSE2_32: cvtsi2ssl
+; SSE2_64: cvtsi2ssl
+; X87: fildl
+define float @s32_to_f(i32 %a) nounwind {
+  %r = sitofp i32 %a to float
+  ret float %r
+}
+
+; CHECK-LABEL: u32_to_d
+; AVX512_32: vcvtusi2sdl
+; AVX512_64: vcvtusi2sdl
+; SSE2_32: subsd
+; SSE2_64: cvtsi2sdq
+; X87: fildll
+define double @u32_to_d(i32 %a) nounwind {
+  %r = uitofp i32 %a to double
+  ret double %r
+}
+
+; CHECK-LABEL: s32_to_d
+; AVX512_32: vcvtsi2sdl
+; AVX512_64: vcvtsi2sdl
+; SSE2_32: cvtsi2sdl
+; SSE2_64: cvtsi2sdl
+; X87: fildl
+define double @s32_to_d(i32 %a) nounwind {
+  %r = sitofp i32 %a to double
+  ret double %r
+}
+
+; CHECK-LABEL: u32_to_x
+; AVX512_32: vsubsd
+; AVX512_64: vsubsd
+; SSE2_32: subsd
+; SSE2_64: fildll
+; X87: fildll
+define x86_fp80 @u32_to_x(i32 %a) nounwind {
+  %r = uitofp i32 %a to x86_fp80
+  ret x86_fp80 %r
+}
+
+; CHECK-LABEL: s32_to_x
+; CHECK: fildl
+define x86_fp80 @s32_to_x(i32 %a) nounwind {
+  %r = sitofp i32 %a to x86_fp80
+  ret x86_fp80 %r
+}
+
+; CHECK-LABEL: u64_to_f
+; AVX512_32: fildll
+; AVX512_64: vcvtusi2ssq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @u64_to_f(i64 %a) nounwind {
+  %r = uitofp i64 %a to float
+  ret float %r
+}
+
+; CHECK-LABEL: s64_to_f
+; AVX512_32: fildll
+; AVX512_64: vcvtsi2ssq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2ssq
+; X87: fildll
+define float @s64_to_f(i64 %a) nounwind {
+  %r = sitofp i64 %a to float
+  ret float %r
+}
+
+; CHECK-LABEL: u64_to_d
+; AVX512_32: vpunpckldq
+; AVX512_64: vcvtusi2sdq
+; SSE2_32: punpckldq
+; SSE2_64: punpckldq
+; X87: fildll
+define double @u64_to_d(i64 %a) nounwind {
+  %r = uitofp i64 %a to double
+  ret double %r
+}
+
+; CHECK-LABEL: s64_to_d
+; AVX512_32: fildll
+; AVX512_64: vcvtsi2sdq
+; SSE2_32: fildll
+; SSE2_64: cvtsi2sdq
+; X87: fildll
+define double @s64_to_d(i64 %a) nounwind {
+  %r = sitofp i64 %a to double
+  ret double %r
+}
+
+; CHECK-LABEL: u64_to_x
+; CHECK: fildll
+define x86_fp80 @u64_to_x(i64 %a) nounwind {
+  %r = uitofp i64 %a to x86_fp80
+  ret x86_fp80 %r
+}
+
+; CHECK-LABEL: s64_to_x
+; CHECK: fildll
+define x86_fp80 @s64_to_x(i64 %a) nounwind {
+  %r = sitofp i64 %a to x86_fp80
+  ret x86_fp80 %r
+}
diff --git a/test/CodeGen/X86/sdiv-pow2.ll b/test/CodeGen/X86/sdiv-pow2.ll
new file mode 100644
index 000000000000..e89f76931e18
--- /dev/null
+++ b/test/CodeGen/X86/sdiv-pow2.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+; No attributes, should not use idiv
+define i32 @test1(i32 inreg %x) {
+entry:
+  %div = sdiv i32 %x, 16
+  ret i32 %div
+; CHECK-LABEL: test1:
+; CHECK-NOT: idivl
+; CHECK: ret
+}
+
+; Has minsize (-Oz) attribute, should generate idiv
+define i32 @test2(i32 inreg %x) minsize {
+entry:
+  %div = sdiv i32 %x, 16
+  ret i32 %div
+; CHECK-LABEL: test2:
+; CHECK: idivl
+; CHECK: ret
+}
+
+; Has optsize (-Os) attribute, should not generate idiv
+define i32 @test3(i32 inreg %x) optsize {
+entry:
+  %div = sdiv i32 %x, 16
+  ret i32 %div
+; CHECK-LABEL: test3:
+; CHECK-NOT: idivl
+; CHECK: ret
+}
+
+
diff --git a/test/CodeGen/X86/seh-catch-all-win32.ll b/test/CodeGen/X86/seh-catch-all-win32.ll
index a4ea8ab78c79..e8da7ab971b1 100644
--- a/test/CodeGen/X86/seh-catch-all-win32.ll
+++ b/test/CodeGen/X86/seh-catch-all-win32.ll
@@ -22,23 +22,16 @@ entry:
           to label %__try.cont unwind label %lpad
 
 lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 ()* @"filt$main" to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 1
-  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
-  %matches = icmp eq i32 %1, %2
-  br i1 %matches, label %__except, label %eh.resume
+  %cs1 = catchswitch within none [label %__except] unwind to caller
 
 __except:                                         ; preds = %lpad
-  %3 = load i32, i32* %__exceptioncode, align 4
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
-  br label %__try.cont
+  %p = catchpad within %cs1 [i8* bitcast (i32 ()* @"filt$main" to i8*)]
+  %code = load i32, i32* %__exceptioncode, align 4
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %code) #4 [ "funclet"(token %p) ]
+  catchret from %p to label %__try.cont
 
 __try.cont:                                       ; preds = %entry, %__except
   ret i32 0
-
-eh.resume:                                        ; preds = %lpad
-  resume { i8*, i32 } %0
 }
 
 define internal i32 @"filt$main"() {
@@ -68,33 +61,31 @@ entry:
 ; CHECK: pushl %esi
 
 ; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%ebp)
+; CHECK: movl %esp, [[reg_offs:[-0-9]+]](%ebp)
 ; CHECK: movl $L__ehtable$main,
-; 	EH state 0
+;       EH state 0
 ; CHECK: movl $0, -16(%ebp)
 ; CHECK: calll _crash
 ; CHECK: popl %esi
 ; CHECK: popl %edi
 ; CHECK: popl %ebx
 ; CHECK: retl
-; CHECK: # Block address taken
-; 	stackrestore
+; CHECK: LBB0_[[lpbb:[0-9]+]]: # %__except{{$}}
+;       stackrestore
 ; CHECK: movl -24(%ebp), %esp
-; 	EH state -1
+;       EH state -1
 ; CHECK: movl [[code_offs]](%ebp), %[[code:[a-z]+]]
-; CHECK: movl $-1, -16(%ebp)
 ; CHECK-DAG: movl %[[code]], 4(%esp)
 ; CHECK-DAG: movl $_str, (%esp)
 ; CHECK: calll _printf
 
 ; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
+; CHECK: Lmain$parent_frame_offset = [[reg_offs]]
 ; CHECK: .align 4
 ; CHECK: L__ehtable$main
 ; CHECK-NEXT: .long -1
 ; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB0_[[lpbb]]
 
 ; CHECK-LABEL: _filt$main:
 ; CHECK: pushl %ebp
diff --git a/test/CodeGen/X86/seh-catch-all.ll b/test/CodeGen/X86/seh-catch-all.ll
index 1c1a3c2139d6..c6a2e4a1094a 100644
--- a/test/CodeGen/X86/seh-catch-all.ll
+++ b/test/CodeGen/X86/seh-catch-all.ll
@@ -2,6 +2,7 @@
 
 @str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1
 
+declare i32 @llvm.eh.exceptioncode(token)
 declare i32 @__C_specific_handler(...)
 declare void @crash()
 declare i32 @printf(i8* nocapture readonly, ...) nounwind
@@ -11,20 +12,17 @@ entry:
   invoke void @crash()
           to label %__try.cont unwind label %lpad
 
-lpad:
-  %0 = landingpad { i8*, i32 }
-          catch i8* null
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = ptrtoint i8* %1 to i64
-  %3 = trunc i64 %2 to i32
-  call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i64 0, i64 0), i32 %3)
-  br label %__try.cont
-
 __try.cont:
   ret i32 0
 
-eh.resume:
-  resume { i8*, i32 } %0
+lpad:
+  %cs1 = catchswitch within none [label %catchall] unwind to caller
+
+catchall:
+  %p = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  %code = call i32 @llvm.eh.exceptioncode(token %p)
+  call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i64 0, i64 0), i32 %code) [ "funclet"(token %p) ]
+  catchret from %p to label %__try.cont
 }
 
 ; Check that we can get the exception code from eax to the printf.
@@ -32,14 +30,17 @@ eh.resume:
 ; CHECK-LABEL: main:
 ; CHECK: callq crash
 ; CHECK: retq
-; CHECK: # Block address taken
+; CHECK: .LBB0_2: # %catchall
 ; CHECK: leaq str(%rip), %rcx
 ; CHECK: movl %eax, %edx
 ; CHECK: callq printf
 
 ; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 1
+; CHECK-NEXT: .Lmain$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
 ; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL+1
 ; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .long .LBB0_2@IMGREL
+; CHECK-NEXT: .Llsda_end0:
diff --git a/test/CodeGen/X86/seh-catchpad.ll b/test/CodeGen/X86/seh-catchpad.ll
new file mode 100644
index 000000000000..d9b4c5c6bcf5
--- /dev/null
+++ b/test/CodeGen/X86/seh-catchpad.ll
@@ -0,0 +1,198 @@
+; RUN: llc < %s | FileCheck %s
+
+; Based on the source:
+; extern "C" int puts(const char *);
+; extern "C" int printf(const char *, ...);
+; extern "C" int do_div(int a, int b) { return a / b; }
+; extern "C" int filt();
+; int main() {
+;   __try {
+;     __try {
+;       do_div(1, 0);
+;     } __except (1) {
+;       __try {
+;         do_div(1, 0);
+;       } __finally {
+;         puts("finally");
+;       }
+;     }
+;   } __except (filt()) {
+;     puts("caught");
+;   }
+;   return 0;
+; }
+
+; ModuleID = 't.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+$"\01??_C@_07MKBLAIAL@finally?$AA@" = comdat any
+
+$"\01??_C@_06IBDBCMGJ@caught?$AA@" = comdat any
+
+@"\01??_C@_07MKBLAIAL@finally?$AA@" = linkonce_odr unnamed_addr constant [8 x i8] c"finally\00", comdat, align 1
+@"\01??_C@_06IBDBCMGJ@caught?$AA@" = linkonce_odr unnamed_addr constant [7 x i8] c"caught\00", comdat, align 1
+
+; Function Attrs: nounwind readnone
+define i32 @do_div(i32 %a, i32 %b) #0 {
+entry:
+  %div = sdiv i32 %a, %b
+  ret i32 %div
+}
+
+define i32 @main() #1 personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
+entry:
+  %call = invoke i32 @do_div(i32 1, i32 0) #4
+          to label %__try.cont.12 unwind label %catch.dispatch
+
+__except.2:                                       ; preds = %__except
+  %call4 = invoke i32 @do_div(i32 1, i32 0) #4
+          to label %invoke.cont.3 unwind label %ehcleanup
+
+invoke.cont.3:                                    ; preds = %__except.2
+  invoke fastcc void @"\01?fin$0@0@main@@"() #4
+          to label %__try.cont.12 unwind label %catch.dispatch.7
+
+__except.9:                                       ; preds = %__except.ret
+  %call11 = tail call i32 @puts(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @"\01??_C@_06IBDBCMGJ@caught?$AA@", i64 0, i64 0))
+  br label %__try.cont.12
+
+__try.cont.12:                                    ; preds = %invoke.cont.3, %entry, %__except.9
+  ret i32 0
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %__except] unwind label %catch.dispatch.7
+
+__except:                                         ; preds = %catch.dispatch
+  %cp1 = catchpad within %cs1 [i8* null]
+  catchret from %cp1 to label %__except.2
+
+ehcleanup:                                        ; preds = %__except.2
+  %cp2 = cleanuppad within none []
+  invoke fastcc void @"\01?fin$0@0@main@@"() #4 [ "funclet"(token %cp2) ]
+          to label %invoke.cont.6 unwind label %catch.dispatch.7
+
+invoke.cont.6:                                    ; preds = %ehcleanup
+  cleanupret from %cp2 unwind label %catch.dispatch.7
+
+catch.dispatch.7:
+  %cs2 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret:                                     ; preds = %catch.dispatch.7
+  %cp3 = catchpad within %cs2 [i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@main@@" to i8*)]
+  catchret from %cp3 to label %__except.9
+}
+
+; CHECK: main:                                   # @main
+; CHECK: .seh_proc main
+; CHECK:         .seh_handler __C_specific_handler, @unwind, @except
+; CHECK:         pushq   %rbp
+; CHECK:         .seh_pushreg 5
+; CHECK:         subq    $32, %rsp
+; CHECK:         .seh_stackalloc 32
+; CHECK:         leaq    32(%rsp), %rbp
+; CHECK:         .seh_setframe 5, 32
+; CHECK:         .seh_endprologue
+; CHECK: .Ltmp0:
+; CHECK:         movl    $1, %ecx
+; CHECK:         xorl    %edx, %edx
+; CHECK:         callq   do_div
+; CHECK: .Ltmp1:
+; CHECK: .LBB1_[[epilogue:[0-9]+]]:                                # %__try.cont.12
+; CHECK:         xorl    %eax, %eax
+; CHECK:         addq    $32, %rsp
+; CHECK:         popq    %rbp
+; CHECK:         retq
+; CHECK: .LBB1_[[except1bb:[0-9]+]]:                                # %__except
+; CHECK: .Ltmp2:
+; CHECK:         movl    $1, %ecx
+; CHECK:         xorl    %edx, %edx
+; CHECK:         callq   do_div
+; CHECK: .Ltmp3:
+; CHECK:         callq   "?fin$0@0@main@@"
+; CHECK:         jmp     .LBB1_[[epilogue]]
+; CHECK: .LBB1_[[except2bb:[0-9]+]]:                                # %__except.ret
+; CHECK:         leaq    "??_C@_06IBDBCMGJ@caught?$AA@"(%rip), %rcx
+; CHECK:         callq   puts
+; CHECK:         jmp     .LBB1_[[epilogue]]
+
+; CHECK:         .seh_handlerdata
+; CHECK-NEXT:         .Lmain$parent_frame_offset = 32
+; CHECK-NEXT:         .long   (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT:         .long   .Ltmp0@IMGREL+1
+; CHECK-NEXT:         .long   .Ltmp1@IMGREL+1
+; CHECK-NEXT:         .long   1
+; CHECK-NEXT:         .long   .LBB1_[[except1bb]]@IMGREL
+; CHECK-NEXT:         .long   .Ltmp0@IMGREL+1
+; CHECK-NEXT:         .long   .Ltmp1@IMGREL+1
+; CHECK-NEXT:         .long   "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT:         .long   .LBB1_[[except2bb]]@IMGREL
+; CHECK-NEXT:         .long   .Ltmp2@IMGREL+1
+; CHECK-NEXT:         .long   .Ltmp3@IMGREL+1
+; CHECK-NEXT:         .long   "?dtor$[[finbb:[0-9]+]]@?0?main@4HA"@IMGREL
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   .Ltmp2@IMGREL+1
+; CHECK-NEXT:         .long   .Ltmp3@IMGREL+1
+; CHECK-NEXT:         .long   "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT:         .long   .LBB1_3@IMGREL
+; CHECK-NEXT:         .long   .Ltmp6@IMGREL+1
+; CHECK-NEXT:         .long   .Ltmp7@IMGREL+1
+; CHECK-NEXT:         .long   "?filt$0@0@main@@"@IMGREL
+; CHECK-NEXT:         .long   .LBB1_3@IMGREL
+; CHECK-NEXT: .Llsda_end0:
+
+; CHECK:         .text
+; CHECK:         .seh_endproc
+
+; CHECK: "?dtor$[[finbb]]@?0?main@4HA":
+; CHECK: .seh_proc "?dtor$[[finbb]]@?0?main@4HA"
+; CHECK:         .seh_handler __C_specific_handler, @unwind, @except
+; CHECK: .LBB1_[[finbb]]:                                # %ehcleanup
+; CHECK:         movq    %rdx, 16(%rsp)
+; CHECK:         pushq   %rbp
+; CHECK:         .seh_pushreg 5
+; CHECK:         subq    $32, %rsp
+; CHECK:         .seh_stackalloc 32
+; CHECK:         leaq    32(%rdx), %rbp
+; CHECK:         .seh_endprologue
+; CHECK:         callq   "?fin$0@0@main@@"
+; CHECK:         nop
+; CHECK:         addq    $32, %rsp
+; CHECK:         popq    %rbp
+; CHECK:         retq
+; CHECK:         .seh_handlerdata
+; CHECK:         .seh_endproc
+
+define internal i32 @"\01?filt$0@0@main@@"(i8* nocapture readnone %exception_pointers, i8* nocapture readnone %frame_pointer) #1 {
+entry:
+  %call = tail call i32 @filt()
+  ret i32 %call
+}
+
+; CHECK: "?filt$0@0@main@@":                     # @"\01?filt$0@0@main@@"
+; CHECK: .seh_proc "?filt$0@0@main@@"
+; CHECK:         .seh_endprologue
+; CHECK:         rex64 jmp       filt  # TAILCALL
+; CHECK:         .seh_handlerdata
+
+declare i32 @filt() #1
+
+declare i32 @__C_specific_handler(...)
+
+; Function Attrs: noinline nounwind
+define internal fastcc void @"\01?fin$0@0@main@@"() #2 {
+entry:
+  %call = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01??_C@_07MKBLAIAL@finally?$AA@", i64 0, i64 0)) #5
+  ret void
+}
+
+; Function Attrs: nounwind
+declare i32 @puts(i8* nocapture readonly) #3
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { noinline }
+attributes #5 = { nounwind }
diff --git a/test/CodeGen/X86/seh-except-finally.ll b/test/CodeGen/X86/seh-except-finally.ll
index 0630d001bb76..b29788cd015d 100644
--- a/test/CodeGen/X86/seh-except-finally.ll
+++ b/test/CodeGen/X86/seh-except-finally.ll
@@ -38,84 +38,63 @@ entry:
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
   invoke void @crash() #5
-          to label %invoke.cont unwind label %lpad
+          to label %invoke.cont unwind label %__finally
 
 invoke.cont:                                      ; preds = %entry
   %0 = call i8* @llvm.localaddress()
   invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext false, i8* %0) #5
-          to label %invoke.cont2 unwind label %lpad1
+          to label %invoke.cont2 unwind label %catch.dispatch
 
 invoke.cont2:                                     ; preds = %invoke.cont
   br label %__try.cont
 
-lpad:                                             ; preds = %entry
-  %1 = landingpad { i8*, i32 }
-          cleanup
-          catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
-  %2 = extractvalue { i8*, i32 } %1, 0
-  store i8* %2, i8** %exn.slot
-  %3 = extractvalue { i8*, i32 } %1, 1
-  store i32 %3, i32* %ehselector.slot
-  %4 = call i8* @llvm.localaddress()
-  invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext true, i8* %4) #5
-          to label %invoke.cont3 unwind label %lpad1
+__finally:                                             ; preds = %entry
+  %cleanuppad = cleanuppad within none []
+  %locals = call i8* @llvm.localaddress()
+  invoke void @"\01?fin$0@0@use_both@@"(i1 zeroext true, i8* %locals) #5 [ "funclet"(token %cleanuppad) ]
+          to label %invoke.cont3 unwind label %catch.dispatch
 
-lpad1:                                            ; preds = %lpad, %invoke.cont
-  %5 = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)
-  %6 = extractvalue { i8*, i32 } %5, 0
-  store i8* %6, i8** %exn.slot
-  %7 = extractvalue { i8*, i32 } %5, 1
-  store i32 %7, i32* %ehselector.slot
-  br label %catch.dispatch
-
-invoke.cont3:                                     ; preds = %lpad
-  br label %catch.dispatch
+invoke.cont3:                                     ; preds = %__finally
+  cleanupret from %cleanuppad unwind label %catch.dispatch
 
 catch.dispatch:                                   ; preds = %invoke.cont3, %lpad1
-  %sel = load i32, i32* %ehselector.slot
-  %8 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)) #6
-  %matches = icmp eq i32 %sel, %8
-  br i1 %matches, label %__except, label %eh.resume
+  %cs1 = catchswitch within none [label %__except] unwind to caller
 
 __except:                                         ; preds = %catch.dispatch
-  %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @"\01??_C@_08MLCMLGHM@__except?$AA@", i32 0, i32 0))
-  br label %__try.cont
+  %catchpad = catchpad within %cs1 [i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_both@@" to i8*)]
+  %call = call i32 @puts(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @"\01??_C@_08MLCMLGHM@__except?$AA@", i32 0, i32 0)) [ "funclet"(token %catchpad) ]
+  catchret from %catchpad to label %__try.cont
 
 __try.cont:                                       ; preds = %__except, %invoke.cont2
   ret void
-
-eh.resume:                                        ; preds = %catch.dispatch
-  %exn = load i8*, i8** %exn.slot
-  %sel4 = load i32, i32* %ehselector.slot
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
-  %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel4, 1
-  resume { i8*, i32 } %lpad.val5
 }
 
 ; CHECK-LABEL: use_both:
 ; CHECK: .Ltmp0
 ; CHECK: callq crash
 ; CHECK: .Ltmp1
-; CHECK: .Ltmp3
-; CHECK: callq "?fin$0@0@use_both@@"
 ; CHECK: .Ltmp4
+; CHECK: callq "?fin$0@0@use_both@@"
+; CHECK: .Ltmp5
 ; CHECK: retq
 ;
 ; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .Luse_both$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
 ; CHECK-NEXT: .long .Ltmp1@IMGREL+1
-; CHECK-NEXT: .long "?fin$0@0@use_both@@"@IMGREL
+; CHECK-NEXT: .long "?dtor$2@?0?use_both@4HA"@IMGREL
 ; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
 ; CHECK-NEXT: .long .Ltmp1@IMGREL+1
 ; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
-; CHECK-NEXT: .long .Ltmp3@IMGREL
+; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL
 ; CHECK-NEXT: .long .Ltmp4@IMGREL+1
+; CHECK-NEXT: .long .Ltmp5@IMGREL+1
 ; CHECK-NEXT: .long "?filt$0@0@use_both@@"@IMGREL
-; CHECK-NEXT: .long .Ltmp{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .long .LBB0_{{[0-9]+}}@IMGREL
+; CHECK-NEXT: .Llsda_end0:
 
 ; Function Attrs: noinline nounwind
 define internal i32 @"\01?filt$0@0@use_both@@"(i8* %exception_pointers, i8* %frame_pointer) #2 {
diff --git a/test/CodeGen/X86/seh-exception-code.ll b/test/CodeGen/X86/seh-exception-code.ll
new file mode 100644
index 000000000000..20e1544e0b59
--- /dev/null
+++ b/test/CodeGen/X86/seh-exception-code.ll
@@ -0,0 +1,38 @@
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O0 < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @f(i32)
+declare i32 @__C_specific_handler(...)
+declare i32 @llvm.eh.exceptioncode(token)
+
+define void @ehcode() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
+entry:
+  invoke void @f(i32 0)
+          to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs = catchswitch within none [label %__except] unwind to caller
+
+__except:                                         ; preds = %catch.dispatch
+  %pad = catchpad within %cs [i8* null]
+  catchret from %pad to label %__except.1
+
+__except.1:                                       ; preds = %__except
+  %code = call i32 @llvm.eh.exceptioncode(token %pad)
+  call void @f(i32 %code)
+  br label %__try.cont
+
+__try.cont:                                       ; preds = %entry, %__except.1
+  ret void
+}
+
+; CHECK-LABEL: ehcode:
+; CHECK: xorl %ecx, %ecx
+; CHECK: callq f
+
+; CHECK: # %__except
+; CHECK: movl %eax, %ecx
+; CHECK-NEXT: callq f
diff --git a/test/CodeGen/X86/seh-filter.ll b/test/CodeGen/X86/seh-filter.ll
deleted file mode 100644
index 37ed15841a93..000000000000
--- a/test/CodeGen/X86/seh-filter.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc -O0 -mtriple=x86_64-windows-msvc < %s | FileCheck %s
-
-declare void @g()
-define void @f() personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*) {
-  invoke void @g() to label %return unwind label %lpad
-
-return:
-  ret void
-
-lpad:
-  %ehptrs = landingpad {i8*, i32}
-    filter [0 x i8*] zeroinitializer
-  call void @__cxa_call_unexpected(i8* null)
-  unreachable
-}
-declare i32 @__C_specific_handler(...)
-declare void @__cxa_call_unexpected(i8*)
-
-; We don't emit entries for filters.
-; CHECK: .seh_handlerdata
-; CHECK: .long 0
diff --git a/test/CodeGen/X86/seh-finally.ll b/test/CodeGen/X86/seh-finally.ll
index 350cd932f481..2ef1c984851c 100644
--- a/test/CodeGen/X86/seh-finally.ll
+++ b/test/CodeGen/X86/seh-finally.ll
@@ -17,50 +17,42 @@ invoke.cont:                                      ; preds = %entry
   ret i32 0
 
 lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          cleanup
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = extractvalue { i8*, i32 } %0, 1
-  %call2 = invoke i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0))
-          to label %invoke.cont1 unwind label %terminate.lpad
-
-invoke.cont1:                                     ; preds = %lpad
-  resume { i8*, i32 } %0
-
-terminate.lpad:                                   ; preds = %lpad
-  %3 = landingpad { i8*, i32 }
-          catch i8* null
-  call void @abort()
-  unreachable
+  %p = cleanuppad within none []
+  %call2 = call i32 @puts(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @str_recovered, i64 0, i64 0)) [ "funclet"(token %p) ]
+  cleanupret from %p unwind to caller
 }
 
 ; X64-LABEL: main:
 ; X64: retq
 
 ; X64: .seh_handlerdata
-; X64-NEXT: .long 1
-; X64-NEXT: .long .Ltmp0@IMGREL
-; X64-NEXT: .long .Ltmp1@IMGREL
-; X64-NEXT: .long main.cleanup@IMGREL
-; X64-NEXT: .long 0
+; X64-NEXT: .Lmain$parent_frame_offset = 32
+; X64-NEXT: .long   (.Llsda_end0-.Llsda_begin0)/16 # Number of call sites
+; X64-NEXT: .Llsda_begin0:
+; X64-NEXT: .long   .Ltmp0@IMGREL+1 # LabelStart
+; X64-NEXT: .long   .Ltmp1@IMGREL+1 # LabelEnd
+; X64-NEXT: .long   "?dtor$2@?0?main@4HA"@IMGREL # FinallyFunclet
+; X64-NEXT: .long   0               # Null
+; X64-NEXT: .Llsda_end0:
 
-; X64-LABEL: main.cleanup:
+; X64-LABEL: "?dtor$2@?0?main@4HA":
 ; X64: callq puts
 ; X64: retq
 
 ; X86-LABEL: _main:
 ; X86: retl
 
-; X86: .section .xdata,"dr"
-; X86: L__ehtable$main:
-; X86-NEXT: .long -1
-; X86-NEXT: .long 0
-; X86-NEXT: .long _main.cleanup
-
-; X86-LABEL: _main.cleanup:
+; X86-LABEL: "?dtor$2@?0?main@4HA":
+; X86: LBB0_2:
 ; X86: calll _puts
 ; X86: retl
 
+; X86: .section .xdata,"dr"
+; X86: L__ehtable$main:
+; X86-NEXT: .long -1 # ToState
+; X86-NEXT: .long 0  # Null
+; X86-NEXT: .long "?dtor$2@?0?main@4HA" # FinallyFunclet
+
 declare i32 @__C_specific_handler(...)
 
 declare i32 @puts(i8*)
diff --git a/test/CodeGen/X86/seh-safe-div-win32.ll b/test/CodeGen/X86/seh-safe-div-win32.ll
index b1bcde2c7ff3..643af3a472fb 100644
--- a/test/CodeGen/X86/seh-safe-div-win32.ll
+++ b/test/CodeGen/X86/seh-safe-div-win32.ll
@@ -28,35 +28,25 @@ entry:
   %r = alloca i32, align 4
   store i32 42, i32* %r
   invoke void @try_body(i32* %r, i32* %n, i32* %d)
-          to label %__try.cont unwind label %lpad
+          to label %__try.cont unwind label %lpad0
 
-lpad:
-  %vals = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 ()* @safe_div_filt0 to i8*)
-          catch i8* bitcast (i32 ()* @safe_div_filt1 to i8*)
-  %ehptr = extractvalue { i8*, i32 } %vals, 0
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt0 to i8*))
-  %is_filt0 = icmp eq i32 %sel, %filt0_val
-  br i1 %is_filt0, label %handler0, label %eh.dispatch1
-
-eh.dispatch1:
-  %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @safe_div_filt1 to i8*))
-  %is_filt1 = icmp eq i32 %sel, %filt1_val
-  br i1 %is_filt1, label %handler1, label %eh.resume
+lpad0:
+  %cs0 = catchswitch within none [label %handler0] unwind label %lpad1
 
 handler0:
-  call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0))
+  %p0 = catchpad within %cs0 [i8* bitcast (i32 ()* @safe_div_filt0 to i8*)]
+  call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0)) [ "funclet"(token %p0) ]
   store i32 -1, i32* %r, align 4
-  br label %__try.cont
+  catchret from %p0 to label %__try.cont
+
+lpad1:
+  %cs1 = catchswitch within none [label %handler1] unwind to caller
 
 handler1:
-  call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0))
+  %p1 = catchpad within %cs1 [i8* bitcast (i32 ()* @safe_div_filt1 to i8*)]
+  call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0)) [ "funclet"(token %p1) ]
   store i32 -2, i32* %r, align 4
-  br label %__try.cont
-
-eh.resume:
-  resume { i8*, i32 } %vals
+  catchret from %p1 to label %__try.cont
 
 __try.cont:
   %safe_ret = load i32, i32* %r, align 4
@@ -75,15 +65,13 @@ __try.cont:
 
 ; Landing pad code
 
-; CHECK: [[handler0:Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler0
+; CHECK: [[handler0:LBB0_[0-9]+]]: # %handler0
 ; 	Restore SP
 ; CHECK: movl {{.*}}(%ebp), %esp
 ; CHECK: calll _puts
 ; CHECK: jmp [[cont_bb]]
 
-; CHECK: [[handler1:Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler1
+; CHECK: [[handler1:LBB0_[0-9]+]]: # %handler1
 ; 	Restore SP
 ; CHECK: movl {{.*}}(%ebp), %esp
 ; CHECK: calll _puts
diff --git a/test/CodeGen/X86/seh-safe-div.ll b/test/CodeGen/X86/seh-safe-div.ll
index 699e58ee8bae..60918cf07058 100644
--- a/test/CodeGen/X86/seh-safe-div.ll
+++ b/test/CodeGen/X86/seh-safe-div.ll
@@ -27,35 +27,25 @@ define i32 @safe_div(i32* %n, i32* %d) personality i8* bitcast (i32 (...)* @__C_
 entry:
   %r = alloca i32, align 4
   invoke void @try_body(i32* %r, i32* %n, i32* %d)
-          to label %__try.cont unwind label %lpad
+          to label %__try.cont unwind label %lpad0
 
-lpad:
-  %vals = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)
-          catch i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)
-  %ehptr = extractvalue { i8*, i32 } %vals, 0
-  %sel = extractvalue { i8*, i32 } %vals, 1
-  %filt0_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*))
-  %is_filt0 = icmp eq i32 %sel, %filt0_val
-  br i1 %is_filt0, label %handler0, label %eh.dispatch1
-
-eh.dispatch1:
-  %filt1_val = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*))
-  %is_filt1 = icmp eq i32 %sel, %filt1_val
-  br i1 %is_filt1, label %handler1, label %eh.resume
+lpad0:
+  %cs0 = catchswitch within none [label %handler0] unwind label %lpad1
 
 handler0:
-  call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0))
+  %p0 = catchpad within %cs0 [i8* bitcast (i32 (i8*, i8*)* @safe_div_filt0 to i8*)]
+  call void @puts(i8* getelementptr ([27 x i8], [27 x i8]* @str1, i32 0, i32 0)) [ "funclet"(token %p0) ]
   store i32 -1, i32* %r, align 4
-  br label %__try.cont
+  catchret from %p0 to label %__try.cont
+
+lpad1:
+  %cs1 = catchswitch within none [label %handler1] unwind to caller
 
 handler1:
-  call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0))
+  %p1 = catchpad within %cs1 [i8* bitcast (i32 (i8*, i8*)* @safe_div_filt1 to i8*)]
+  call void @puts(i8* getelementptr ([29 x i8], [29 x i8]* @str2, i32 0, i32 0)) [ "funclet"(token %p1) ]
   store i32 -2, i32* %r, align 4
-  br label %__try.cont
-
-eh.resume:
-  resume { i8*, i32 } %vals
+  catchret from %p1 to label %__try.cont
 
 __try.cont:
   %safe_ret = load i32, i32* %r, align 4
@@ -68,7 +58,7 @@ __try.cont:
 ; CHECK: .seh_proc safe_div
 ; CHECK: .seh_handler __C_specific_handler, @unwind, @except
 ; CHECK: .Ltmp0:
-; CHECK: leaq [[rloc:.*\(%rsp\)]], %rcx
+; CHECK: leaq [[rloc:.*\(%rbp\)]], %rcx
 ; CHECK: callq try_body
 ; CHECK-NEXT: .Ltmp1
 ; CHECK: [[cont_bb:\.LBB0_[0-9]+]]:
@@ -77,32 +67,32 @@ __try.cont:
 
 ; Landing pad code
 
-; CHECK: [[handler0:\.Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler0
+; CHECK: [[handler0:\.LBB0_[0-9]+]]: # %handler0
 ; CHECK: callq puts
 ; CHECK: movl $-1, [[rloc]]
 ; CHECK: jmp [[cont_bb]]
 
-; CHECK: [[handler1:\.Ltmp[0-9]+]]: # Block address taken
-; CHECK: # %handler1
+; CHECK: [[handler1:\.LBB0_[0-9]+]]: # %handler1
 ; CHECK: callq puts
 ; CHECK: movl $-2, [[rloc]]
 ; CHECK: jmp [[cont_bb]]
 
 ; CHECK: .seh_handlerdata
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .Lsafe_div$parent_frame_offset
+; CHECK-NEXT: .long (.Llsda_end0-.Llsda_begin0)/16
+; CHECK-NEXT: .Llsda_begin0:
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
 ; CHECK-NEXT: .long .Ltmp1@IMGREL+1
 ; CHECK-NEXT: .long safe_div_filt0@IMGREL
 ; CHECK-NEXT: .long [[handler0]]@IMGREL
-; CHECK-NEXT: .long .Ltmp0@IMGREL
+; CHECK-NEXT: .long .Ltmp0@IMGREL+1
 ; CHECK-NEXT: .long .Ltmp1@IMGREL+1
 ; CHECK-NEXT: .long safe_div_filt1@IMGREL
 ; CHECK-NEXT: .long [[handler1]]@IMGREL
+; CHECK-NEXT: .Llsda_end0:
 ; CHECK: .text
 ; CHECK: .seh_endproc
 
-
 define void @try_body(i32* %r, i32* %n, i32* %d) {
 entry:
   %0 = load i32, i32* %n, align 4
diff --git a/test/CodeGen/X86/seh-stack-realign-win32.ll b/test/CodeGen/X86/seh-stack-realign-win32.ll
deleted file mode 100644
index f3ab71803ca7..000000000000
--- a/test/CodeGen/X86/seh-stack-realign-win32.ll
+++ /dev/null
@@ -1,99 +0,0 @@
-; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
-
-; 32-bit catch-all has to use a filter function because that's how it saves the
-; exception code.
-
-@str = linkonce_odr unnamed_addr constant [27 x i8] c"GetExceptionCode(): 0x%lx\0A\00", align 1
-
-declare i32 @_except_handler3(...)
-declare void @crash()
-declare i32 @printf(i8* nocapture readonly, ...) nounwind
-declare i32 @llvm.eh.typeid.for(i8*)
-declare i8* @llvm.frameaddress(i32)
-declare i8* @llvm.localrecover(i8*, i8*, i32)
-declare void @llvm.localescape(...)
-declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
-
-define i32 @main() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
-entry:
-  ; The EH code allocation is overaligned, triggering realignment.
-  %__exceptioncode = alloca i32, align 8
-  call void (...) @llvm.localescape(i32* %__exceptioncode)
-  invoke void @crash() #5
-          to label %__try.cont unwind label %lpad
-
-lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 ()* @"filt$main" to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 1
-  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
-  %matches = icmp eq i32 %1, %2
-  br i1 %matches, label %__except, label %eh.resume
-
-__except:                                         ; preds = %lpad
-  %3 = load i32, i32* %__exceptioncode, align 4
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
-  br label %__try.cont
-
-__try.cont:                                       ; preds = %entry, %__except
-  ret i32 0
-
-eh.resume:                                        ; preds = %lpad
-  resume { i8*, i32 } %0
-}
-
-define internal i32 @"filt$main"() {
-entry:
-  %ebp = tail call i8* @llvm.frameaddress(i32 1)
-  %parentfp = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %ebp)
-  %code.i8 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %parentfp, i32 0)
-  %__exceptioncode = bitcast i8* %code.i8 to i32*
-  %info.addr = getelementptr inbounds i8, i8* %ebp, i32 -20
-  %0 = bitcast i8* %info.addr to i32***
-  %1 = load i32**, i32*** %0, align 4
-  %2 = load i32*, i32** %1, align 4
-  %3 = load i32, i32* %2, align 4
-  store i32 %3, i32* %__exceptioncode, align 4
-  ret i32 1
-}
-
-; Check that we can get the exception code from eax to the printf.
-
-; CHECK-LABEL: _main:
-; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%esi)
-; CHECK: movl $L__ehtable$main,
-;       EH state 0
-; CHECK: movl $0, 40(%esi)
-; CHECK: calll _crash
-; CHECK: retl
-; CHECK: # Block address taken
-;       stackrestore
-; CHECK: movl -24(%ebp), %esp
-; CHECK: movl $Lmain$parent_frame_offset, %eax
-; CHECK: negl %eax
-; CHECK: leal -24(%ebp,%eax), %esi
-; CHECK: movl 12(%esi), %ebp    # 4-byte Reload
-;       EH state -1
-; CHECK: movl [[code_offs]](%esi), %[[code:[a-z]+]]
-; CHECK: movl $-1, 40(%esi)
-; CHECK-DAG: movl %[[code]], 4(%esp)
-; CHECK-DAG: movl $_str, (%esp)
-; CHECK: calll _printf
-
-; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
-; CHECK: L__ehtable$main
-; CHECK-NEXT: .long -1
-; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
-
-; CHECK-LABEL: _filt$main:
-; CHECK: pushl %ebp
-; CHECK: movl %esp, %ebp
-; CHECK: movl (%ebp), %[[oldebp:[a-z]+]]
-; CHECK: movl -20(%[[oldebp]]), %[[ehinfo:[a-z]+]]
-; CHECK: movl (%[[ehinfo]]), %[[ehrec:[a-z]+]]
-; CHECK: movl (%[[ehrec]]), %[[ehcode:[a-z]+]]
-; CHECK: movl %[[ehcode]], {{.*}}(%{{.*}})
diff --git a/test/CodeGen/X86/seh-stack-realign.ll b/test/CodeGen/X86/seh-stack-realign.ll
index f2fb28a081f9..654cad347f6b 100644
--- a/test/CodeGen/X86/seh-stack-realign.ll
+++ b/test/CodeGen/X86/seh-stack-realign.ll
@@ -23,23 +23,16 @@ entry:
           to label %__try.cont unwind label %lpad
 
 lpad:                                             ; preds = %entry
-  %0 = landingpad { i8*, i32 }
-          catch i8* bitcast (i32 ()* @"filt$main" to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 1
-  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"filt$main" to i8*)) #4
-  %matches = icmp eq i32 %1, %2
-  br i1 %matches, label %__except, label %eh.resume
+  %cs1 = catchswitch within none [label %__except] unwind to caller
 
 __except:                                         ; preds = %lpad
-  %3 = load i32, i32* %__exceptioncode, align 4
-  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %3) #4
-  br label %__try.cont
+  %p = catchpad within %cs1 [i8* bitcast (i32 ()* @"filt$main" to i8*)]
+  %code = load i32, i32* %__exceptioncode, align 4
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @str, i32 0, i32 0), i32 %code) #4 [ "funclet"(token %p) ]
+  catchret from %p to label %__try.cont
 
 __try.cont:                                       ; preds = %entry, %__except
   ret i32 0
-
-eh.resume:                                        ; preds = %lpad
-  resume { i8*, i32 } %0
 }
 
 define internal i32 @"filt$main"() {
@@ -61,35 +54,30 @@ entry:
 
 ; CHECK-LABEL: _main:
 ; CHECK: Lmain$frame_escape_0 = [[code_offs:[-0-9]+]]
-; CHECK: Lmain$frame_escape_1 = [[reg_offs:[-0-9]+]]
-; CHECK: movl %esp, [[reg_offs]](%esi)
+; CHECK: movl %esp, [[reg_offs:[-0-9]+]](%esi)
 ; CHECK: movl $L__ehtable$main,
 ;       EH state 0
 ; CHECK: movl $0, 40(%esi)
 ; CHECK: calll _crash
 ; CHECK: retl
-; CHECK: # Block address taken
+; CHECK: LBB0_[[lpbb:[0-9]+]]: # %__except
 ;       Restore ESP
 ; CHECK: movl -24(%ebp), %esp
 ;       Restore ESI
-; CHECK: movl $Lmain$parent_frame_offset, %eax
-; CHECK: negl %eax
-; CHECK: leal -24(%ebp,%eax), %esi
+; CHECK: leal -44(%ebp), %esi
 ;       Restore EBP
-; CHECK: movl 12(%esi), %ebp    # 4-byte Reload
-;       EH state -1
+; CHECK: movl 12(%esi), %ebp
 ; CHECK: movl [[code_offs]](%esi), %[[code:[a-z]+]]
-; CHECK: movl $-1, 40(%esi)
 ; CHECK-DAG: movl %[[code]], 4(%esp)
 ; CHECK-DAG: movl $_str, (%esp)
 ; CHECK: calll _printf
 
 ; CHECK: .section .xdata,"dr"
-; CHECK: Lmain$parent_frame_offset = Lmain$frame_escape_1
+; CHECK: Lmain$parent_frame_offset = [[reg_offs]]
 ; CHECK: L__ehtable$main
 ; CHECK-NEXT: .long -1
 ; CHECK-NEXT: .long _filt$main
-; CHECK-NEXT: .long Ltmp{{[0-9]+}}
+; CHECK-NEXT: .long LBB0_[[lpbb]]
 
 ; CHECK-LABEL: _filt$main:
 ; CHECK: pushl %ebp
diff --git a/test/CodeGen/X86/setcc-lowering.ll b/test/CodeGen/X86/setcc-lowering.ll
index 3149fb51576f..77739e72fcc8 100644
--- a/test/CodeGen/X86/setcc-lowering.ll
+++ b/test/CodeGen/X86/setcc-lowering.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s
 
 ; Verify that we don't crash during codegen due to a wrong lowering
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
index 6f1ddbdc6aca..b4847c54ffaf 100644
--- a/test/CodeGen/X86/setcc.ll
+++ b/test/CodeGen/X86/setcc.ll
@@ -34,3 +34,23 @@ entry:
   %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
   ret i64 %iftmp.2.0
 }
+
+@v4 = common global i32 0, align 4
+
+define i32 @t4(i32 %a) {
+entry:
+; CHECK-LABEL: t4:
+; CHECK:  movq    _v4@GOTPCREL(%rip), %rax
+; CHECK:  cmpl    $1, (%rax)
+; CHECK:  sbbl    %eax, %eax
+; CHECK:  andl    $32768, %eax
+; CHECK:  leal    65536(%rax,%rax), %eax
+  %0 = load i32, i32* @v4, align 4
+  %not.tobool = icmp eq i32 %0, 0
+  %conv.i = sext i1 %not.tobool to i16
+  %call.lobit = lshr i16 %conv.i, 15
+  %add.i.1 = add nuw nsw i16 %call.lobit, 1
+  %conv4.2 = zext i16 %add.i.1 to i32
+  %add = shl nuw nsw i32 %conv4.2, 16
+  ret i32 %add
+}
diff --git a/test/CodeGen/X86/shift-bmi2.ll b/test/CodeGen/X86/shift-bmi2.ll
index 63b6ec55fac8..fdeddffdfb0e 100644
--- a/test/CodeGen/X86/shift-bmi2.ll
+++ b/test/CodeGen/X86/shift-bmi2.ll
@@ -30,11 +30,10 @@ entry:
   %x = load i32, i32* %p
   %shl = shl i32 %x, %shamt
 ; BMI2: shl32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: shl32p
-; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -75,7 +74,7 @@ entry:
   %x = load i64, i64* %p
   %shl = shl i64 %x, %shamt
 ; BMI264: shl64p
-; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -107,11 +106,10 @@ entry:
   %x = load i32, i32* %p
   %shl = lshr i32 %x, %shamt
 ; BMI2: lshr32p
-; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: lshr32p
-; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -130,7 +128,7 @@ entry:
   %x = load i64, i64* %p
   %shl = lshr i64 %x, %shamt
 ; BMI264: lshr64p
-; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
@@ -153,10 +151,10 @@ entry:
   %shl = ashr i32 %x, %shamt
 ; BMI2: ashr32p
 ; Source order scheduling prevents folding, rdar:14208996.
-; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI2: ret
 ; BMI264: ashr32p
-; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i32 %shl
 }
@@ -175,7 +173,7 @@ entry:
   %x = load i64, i64* %p
   %shl = ashr i64 %x, %shamt
 ; BMI264: ashr64p
-; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
+; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
 ; BMI264: ret
   ret i64 %shl
 }
diff --git a/test/CodeGen/X86/shrink-wrap-chkstk.ll b/test/CodeGen/X86/shrink-wrap-chkstk.ll
new file mode 100644
index 000000000000..c0b2b45e676f
--- /dev/null
+++ b/test/CodeGen/X86/shrink-wrap-chkstk.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
+
+; chkstk cannot come before the usual prologue, since it adjusts ESP.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+%struct.S = type { [12 x i8] }
+
+define x86_thiscallcc void @call_inalloca(i1 %x) {
+entry:
+  %argmem = alloca inalloca <{ %struct.S }>, align 4
+  %argidx1 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 0
+  %argidx2 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0, i32 0, i32 1
+  store i8 42, i8* %argidx2, align 4
+  br i1 %x, label %bb1, label %bb2
+
+bb1:
+  store i8 42, i8* %argidx1, align 4
+  br label %bb2
+
+bb2:
+  call void @inalloca_params(<{ %struct.S }>* inalloca nonnull %argmem)
+  ret void
+}
+
+; CHECK-LABEL: _call_inalloca: # @call_inalloca
+; CHECK: pushl %ebp
+; CHECK: movl %esp, %ebp
+; CHECK: movl $12, %eax
+; CHECK: calll __chkstk
+; CHECK: calll _inalloca_params
+; CHECK: movl %ebp, %esp
+; CHECK: popl %ebp
+; CHECK: retl
+
+declare void @inalloca_params(<{ %struct.S }>* inalloca)
diff --git a/test/CodeGen/X86/slow-div.ll b/test/CodeGen/X86/slow-div.ll
index 52223824bf96..82928521ac2b 100644
--- a/test/CodeGen/X86/slow-div.ll
+++ b/test/CodeGen/X86/slow-div.ll
@@ -25,4 +25,19 @@ entry:
   ret i64 %div
 }
 
+; Verify that no extra code is generated when optimizing for size.
+
+define i32 @div32_optsize(i32 %a, i32 %b) optsize {
+; DIV32-LABEL: div32_optsize:
+; DIV32-NOT: divb
+  %div = sdiv i32 %a, %b
+  ret i32 %div
+}
+
+define i32 @div32_minsize(i32 %a, i32 %b) minsize {
+; DIV32-LABEL: div32_minsize:
+; DIV32-NOT: divb
+  %div = sdiv i32 %a, %b
+  ret i32 %div
+}
 
diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll
new file mode 100644
index 000000000000..27cbef681b7e
--- /dev/null
+++ b/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -0,0 +1,95 @@
+; Intel chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3      2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m     2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m     2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah         2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott      2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=core2         2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=penryn        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bonnell       2>&1 | FileCheck %s --check-prefix=SLOW
+
+; Intel chips with fast unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont    2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem       2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge   2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge     2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell       2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell     2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl           2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake       2>&1 | FileCheck %s --check-prefix=FAST
+
+; AMD chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-4      2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-xp     2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8            2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron       2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64      2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon-fx     2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=k8-sse3       2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=opteron-sse3  2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=athlon64-sse3 2>&1 | FileCheck %s --check-prefix=SLOW
+
+; AMD chips with fast unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
+
+; Other chips with slow unaligned memory accesses
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=c3-2          2>&1 | FileCheck %s --check-prefix=SLOW
+
+; Verify that the slow/fast unaligned memory attribute is set correctly for each CPU model.
+; Slow chips use 4-byte stores. Fast chips with SSE or later use something other than 4-byte stores.
+; Chips that don't have SSE use 4-byte stores either way, so they're not tested.
+
+; Also verify that SSE4.2 or SSE4a imply fast unaligned accesses.
+
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4.2       2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=sse4a        2>&1 | FileCheck %s --check-prefix=FAST
+
+define void @store_zeros(i8* %a) {
+; SLOW-NOT: not a recognized processor
+; SLOW-LABEL: store_zeros:
+; SLOW:       # BB#0:
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+; SLOW-NEXT:    movl
+;
+; FAST-NOT: not a recognized processor
+; FAST-LABEL: store_zeros:
+; FAST:       # BB#0:
+; FAST-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; FAST-NOT:     movl
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i32 1, i1 false)
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+
diff --git a/test/CodeGen/X86/soft-fp.ll b/test/CodeGen/X86/soft-fp.ll
index fa38d1044a48..138e66c394ba 100644
--- a/test/CodeGen/X86/soft-fp.ll
+++ b/test/CodeGen/X86/soft-fp.ll
@@ -1,7 +1,14 @@
-; RUN: llc < %s -march=x86    -mattr=+sse2,+soft-float | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+soft-float | FileCheck %s
+; RUN: llc < %s -march=x86    -mattr=+mmx,+sse,+soft-float \
+; RUN:     | FileCheck %s --check-prefix=SOFT1 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2,+soft-float \
+; RUN:     | FileCheck %s --check-prefix=SOFT2 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse \
+; RUN:     | FileCheck %s --check-prefix=SSE1 --check-prefix=CHECK
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 \
+; RUN:     | FileCheck %s --check-prefix=SSE2 --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-gnux32 -mattr=+mmx,+sse2,+soft-float | FileCheck %s
 
-; CHECK-NOT: xmm{[0-9]+}
+; CHECK-NOT: xmm{{[0-9]+}}
 
 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
@@ -14,6 +21,8 @@ entry:
 	call void @bar(%struct.__va_list_tag* %va3) nounwind
 	call void @llvm.va_end(i8* %va12)
 	ret i32 undef
+; CHECK-LABEL: t1:
+; CHECK:       ret{{[lq]}}
 }
 
 declare void @llvm.va_start(i8*) nounwind
@@ -26,4 +35,23 @@ define float @t2(float %a, float %b) nounwind readnone {
 entry:
 	%0 = fadd float %a, %b		; <float> [#uses=1]
 	ret float %0
+; CHECK-LABEL: t2:
+; SOFT1-NOT:   xmm{{[0-9]+}}
+; SOFT2-NOT:   xmm{{[0-9]+}}
+; SSE1:        xmm{{[0-9]+}}
+; SSE2:        xmm{{[0-9]+}}
+; CHECK:       ret{{[lq]}}
+}
+
+; soft-float means no SSE instruction and passing fp128 as pair of i64.
+define fp128 @t3(fp128 %a, fp128 %b) nounwind readnone {
+entry:
+	%0 = fadd fp128 %b, %a
+	ret fp128 %0
+; CHECK-LABEL: t3:
+; SOFT1-NOT:   xmm{{[0-9]+}}
+; SOFT2-NOT:   xmm{{[0-9]+}}
+; SSE1:        xmm{{[0-9]+}}
+; SSE2:        xmm{{[0-9]+}}
+; CHECK:       ret{{[lq]}}
 }
diff --git a/test/CodeGen/X86/soft-sitofp.ll b/test/CodeGen/X86/soft-sitofp.ll
new file mode 100644
index 000000000000..acb4bb906e70
--- /dev/null
+++ b/test/CodeGen/X86/soft-sitofp.ll
@@ -0,0 +1,169 @@
+; RUN: llc -mtriple=i386-pc-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
+
+; Function Attrs: nounwind
+; CHECK-LABEL: s64_to_d:
+; CHECK: call{{l|q}} __floatdidf
+define double @s64_to_d(i64 %n) #0 {
+entry:
+  %conv = sitofp i64 %n to double
+  ret double %conv
+}
+
+; CHECK-LABEL: s64_to_f:
+; CHECK: call{{l|q}} __floatdisf
+define float @s64_to_f(i64 %n) #0 {
+entry:
+  %conv = sitofp i64 %n to float
+  ret float %conv
+}
+
+; CHECK-LABEL: s32_to_d:
+; CHECK: call{{l|q}} __floatsidf
+define double @s32_to_d(i32 %n) #0 {
+entry:
+  %conv = sitofp i32 %n to double
+  ret double %conv
+}
+
+; CHECK-LABEL: s32_to_f:
+; CHECK: call{{l|q}} __floatsisf
+define float @s32_to_f(i32 %n) #0 {
+entry:
+  %conv = sitofp i32 %n to float
+  ret float %conv
+}
+
+; CHECK-LABEL: u64_to_d:
+; CHECK: call{{l|q}} __floatundidf
+define double @u64_to_d(i64 %n) #0 {
+entry:
+  %conv = uitofp i64 %n to double
+  ret double %conv
+}
+
+; CHECK-LABEL: u64_to_f:
+; CHECK: call{{l|q}} __floatundisf
+define float @u64_to_f(i64 %n) #0 {
+entry:
+  %conv = uitofp i64 %n to float
+  ret float %conv
+}
+
+; CHECK-LABEL: u32_to_d:
+; CHECK: call{{l|q}} __floatunsidf
+define double @u32_to_d(i32 %n) #0 {
+entry:
+  %conv = uitofp i32 %n to double
+  ret double %conv
+}
+
+; CHECK-LABEL: u32_to_f:
+; CHECK: call{{l|q}} __floatunsisf
+define float @u32_to_f(i32 %n) #0 {
+entry:
+  %conv = uitofp i32 %n to float
+  ret float %conv
+}
+
+; CHECK-LABEL: d_to_s64:
+; CHECK: call{{l|q}} __fixdfdi
+define i64 @d_to_s64(double %n) #0 {
+entry:
+  %conv = fptosi double %n to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: d_to_s32:
+; CHECK: call{{l|q}} __fixdfsi
+define i32 @d_to_s32(double %n) #0 {
+entry:
+  %conv = fptosi double %n to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_s64:
+; CHECK: call{{l|q}} __fixsfdi
+define i64 @f_to_s64(float %n) #0 {
+entry:
+  %conv = fptosi float %n to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: f_to_s32:
+; CHECK: call{{l|q}} __fixsfsi
+define i32 @f_to_s32(float %n) #0 {
+entry:
+  %conv = fptosi float %n to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: d_to_u64:
+; CHECK: call{{l|q}} __fixunsdfdi
+define i64 @d_to_u64(double %n) #0 {
+entry:
+  %conv = fptoui double %n to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: d_to_u32:
+; CHECK: call{{l|q}} __fixunsdfsi
+define i32 @d_to_u32(double %n) #0 {
+entry:
+  %conv = fptoui double %n to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_u64:
+; CHECK: call{{l|q}} __fixunssfdi
+define i64 @f_to_u64(float %n) #0 {
+entry:
+  %conv = fptoui float %n to i64
+  ret i64 %conv
+}
+
+; CHECK-LABEL: f_to_u32:
+; CHECK: call{{l|q}} __fixunssfsi
+define i32 @f_to_u32(float %n) #0 {
+entry:
+  %conv = fptoui float %n to i32
+  ret i32 %conv
+}
+
+; CHECK-LABEL: f_to_s8:
+; CHECK: call{{l|q}} __fixsfsi
+define i8 @f_to_s8(float %f, i8 %i) #0 {
+entry:
+  %conv = fptosi float %f to i8
+  %add = add i8 %conv, %i
+  ret i8 %add
+}
+
+; CHECK-LABEL: f_to_u8:
+; CHECK: call{{l|q}} __fixunssfsi
+define i8 @f_to_u8(float %f, i8 %i) #0 {
+entry:
+  %conv = fptoui float %f to i8
+  %add = add i8 %conv, %i
+  ret i8 %add
+}
+
+; CHECK-LABEL: f_to_s16:
+; CHECK: call{{l|q}} __fixsfsi
+define i16 @f_to_s16(float %f, i16 %i) #0 {
+entry:
+  %conv = fptosi float %f to i16
+  %add = add i16 %conv, %i
+  ret i16 %add
+}
+
+; CHECK-LABEL: f_to_u16:
+; CHECK: call{{l|q}} __fixunssfsi
+define i16 @f_to_u16(float %f, i16 %i) #0 {
+entry:
+  %conv = fptoui float %f to i16
+  %add = add i16 %conv, %i
+  ret i16 %add
+}
+
+attributes #0 = { nounwind "use-soft-float"="true" }
diff --git a/test/CodeGen/X86/splat-for-size.ll b/test/CodeGen/X86/splat-for-size.ll
index 635aa821d78a..277472f49b3a 100644
--- a/test/CodeGen/X86/splat-for-size.ll
+++ b/test/CodeGen/X86/splat-for-size.ll
@@ -1,141 +1,191 @@
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx2 < %s | FileCheck %s -check-prefix=CHECK --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
 
-; Check constant loads of every 128-bit and 256-bit vector type 
+; Check constant loads of every 128-bit and 256-bit vector type
 ; for size optimization using splat ops available with AVX and AVX2.
 
 ; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
 define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
+; CHECK-LABEL: splat_v2f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
+; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %add = fadd <2 x double> %x, <double 1.0, double 1.0>
   ret <2 x double> %add
-; CHECK-LABEL: splat_v2f64
-; CHECK: vmovddup
-; CHECK: vaddpd 
-; CHECK-NEXT: retq
 }
 
-define <4 x double> @splat_v4f64(<4 x double> %x) #0 {
+define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
+; CHECK-LABEL: splat_v4f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
+; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
   ret <4 x double> %add
-; CHECK-LABEL: splat_v4f64
-; CHECK: vbroadcastsd 
-; CHECK-NEXT: vaddpd
-; CHECK-NEXT: retq
 }
 
 define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
+; CHECK-LABEL: splat_v4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
   ret <4 x float> %add
-; CHECK-LABEL: splat_v4f32
-; CHECK: vbroadcastss 
-; CHECK-NEXT: vaddps
-; CHECK-NEXT: retq
 }
 
-define <8 x float> @splat_v8f32(<8 x float> %x) #0 {
+define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
+; CHECK-LABEL: splat_v8f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
+; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
   %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
   ret <8 x float> %add
-; CHECK-LABEL: splat_v8f32
-; CHECK: vbroadcastss 
-; CHECK-NEXT: vaddps
-; CHECK-NEXT: retq
 }
 
 ; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
 ; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
-define <2 x i64> @splat_v2i64(<2 x i64> %x) #0 {
+define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
+; CHECK-LABEL: splat_v2i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovddup {{.*#+}} xmm1 = mem[0,0]
+; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
   %add = add <2 x i64> %x, <i64 1, i64 1>
   ret <2 x i64> %add
-; CHECK-LABEL: splat_v2i64
-; CHECK: vmovddup 
-; CHECK: vpaddq
-; CHECK-NEXT: retq
 }
 
 ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
 ; and then we fake it: use vmovddup to splat 64-bit value.
 define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
+; AVX-LABEL: splat_v4i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT:    vmovddup {{.*#+}} xmm2 = mem[0,0]
+; AVX-NEXT:    vpaddq %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpaddq %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
   %add = add <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i64> %add
-; CHECK-LABEL: splat_v4i64
-; AVX: vmovddup
-; AVX: vpaddq 
-; AVX: vpaddq 
-; AVX2: vpbroadcastq 
-; AVX2: vpaddq 
-; CHECK: retq
 }
 
 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
-define <4 x i32> @splat_v4i32(<4 x i32> %x) #0 {
+define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
+; AVX-LABEL: splat_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
+; AVX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %add = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %add
-; CHECK-LABEL: splat_v4i32
-; AVX: vbroadcastss
-; AVX2: vpbroadcastd 
-; CHECK-NEXT: vpaddd 
-; CHECK-NEXT: retq
 }
 
 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
 define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
+; AVX-LABEL: splat_v8i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX-NEXT:    vpaddd %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
   %add = add <8 x i32> %x, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i32> %add
-; CHECK-LABEL: splat_v8i32
-; AVX: vbroadcastss
-; AVX: vpaddd 
-; AVX: vpaddd 
-; AVX2: vpbroadcastd 
-; AVX2: vpaddd 
-; CHECK: retq
 }
 
 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
-define <8 x i16> @splat_v8i16(<8 x i16> %x) #0 {
+define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
+; AVX-LABEL: splat_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %xmm1
+; AVX2-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %add = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ret <8 x i16> %add
-; CHECK-LABEL: splat_v8i16
-; AVX-NOT: broadcast
-; AVX2: vpbroadcastw 
-; CHECK: vpaddw 
-; CHECK-NEXT: retq
 }
 
 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
 define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
+; AVX-LABEL: splat_v16i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
+; AVX-NEXT:    vpaddw %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpaddw %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpaddw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
   %add = add <16 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ret <16 x i16> %add
-; CHECK-LABEL: splat_v16i16
-; AVX-NOT: broadcast
-; AVX: vpaddw 
-; AVX: vpaddw 
-; AVX2: vpbroadcastw 
-; AVX2: vpaddw 
-; CHECK: retq
 }
 
 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
-define <16 x i8> @splat_v16i8(<16 x i8> %x) #0 {
+define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
+; AVX-LABEL: splat_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpaddb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v16i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %xmm1
+; AVX2-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %add = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ret <16 x i8> %add
-; CHECK-LABEL: splat_v16i8
-; AVX-NOT: broadcast
-; AVX2: vpbroadcastb 
-; CHECK: vpaddb 
-; CHECK-NEXT: retq
 }
 
 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
 define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
+; AVX-LABEL: splat_v32i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: splat_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
   %add = add <32 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   ret <32 x i8> %add
-; CHECK-LABEL: splat_v32i8
-; AVX-NOT: broadcast
-; AVX: vpaddb 
-; AVX: vpaddb 
-; AVX2: vpbroadcastb 
-; AVX2: vpaddb 
-; CHECK: retq
 }
 
 ; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
@@ -144,7 +194,7 @@ define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
 
 @A = common global <3 x i64> zeroinitializer, align 32
 
-define <8 x i64> @pr23259() #0 {
+define <8 x i64> @pr23259() #1 {
 entry:
   %0 = load <4 x i64>, <4 x i64>* bitcast (<3 x i64>* @A to <4 x i64>*), align 32
   %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
@@ -153,3 +203,4 @@ entry:
 }
 
 attributes #0 = { optsize }
+attributes #1 = { minsize }
diff --git a/test/CodeGen/X86/sqrt-fastmath.ll b/test/CodeGen/X86/sqrt-fastmath.ll
index 9b851db8121c..386409a674ef 100644
--- a/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
 
@@ -99,8 +100,8 @@ define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
 ; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
 ; ESTIMATE:       # BB#0:
 ; ESTIMATE-NEXT:    vrsqrtps %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulps %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT:    vmulps %xmm0, %xmm2, %xmm0
+; ESTIMATE-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; ESTIMATE-NEXT:    vmulps %xmm0, %xmm1, %xmm0
 ; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
 ; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
 ; ESTIMATE-NEXT:    vmulps %xmm1, %xmm0, %xmm0
@@ -124,8 +125,8 @@ define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
 ; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
 ; ESTIMATE:       # BB#0:
 ; ESTIMATE-NEXT:    vrsqrtps %ymm0, %ymm1
-; ESTIMATE-NEXT:    vmulps %ymm1, %ymm1, %ymm2
-; ESTIMATE-NEXT:    vmulps %ymm0, %ymm2, %ymm0
+; ESTIMATE-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; ESTIMATE-NEXT:    vmulps %ymm0, %ymm1, %ymm0
 ; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
 ; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
 ; ESTIMATE-NEXT:    vmulps %ymm1, %ymm0, %ymm0
diff --git a/test/CodeGen/X86/sse-align-12.ll b/test/CodeGen/X86/sse-align-12.ll
index 9441cc0002fb..4fbb6e42ccae 100644
--- a/test/CodeGen/X86/sse-align-12.ll
+++ b/test/CodeGen/X86/sse-align-12.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -march=x86-64 -mcpu=nehalem | FileCheck %s
 
 define <4 x float> @a(<4 x float>* %y) nounwind {
diff --git a/test/CodeGen/X86/sse-minmax.ll b/test/CodeGen/X86/sse-minmax.ll
index e4d0373299fb..f0341277851d 100644
--- a/test/CodeGen/X86/sse-minmax.ll
+++ b/test/CodeGen/X86/sse-minmax.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin -mcpu=nehalem -asm-verbose=false -enable-no-nans-fp-math  | FileCheck -check-prefix=FINITE %s
 
 ; Some of these patterns can be matched as SSE min or max. Some of
-; then can be matched provided that the operands are swapped.
+; them can be matched provided that the operands are swapped.
 ; Some of them can't be matched at all and require a comparison
 ; and a conditional branch.
 
diff --git a/test/CodeGen/X86/sse-only.ll b/test/CodeGen/X86/sse-only.ll
new file mode 100644
index 000000000000..3fe9faaba850
--- /dev/null
+++ b/test/CodeGen/X86/sse-only.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s
+
+; Test that turning off mmx doesn't turn off sse
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movapd (%ecx), %xmm0
+; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0
+; CHECK-NEXT:    movapd %xmm0, (%eax)
+; CHECK-NEXT:    retl
+	%tmp3 = load <2 x double>, <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+}
diff --git a/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
index fab4f90279e8..63751e1ab7e1 100644
--- a/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
+++ b/test/CodeGen/X86/sse-scalar-fp-arith-unary.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse2 < %s   | FileCheck --check-prefix=SSE %s
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=sse4.1 < %s | FileCheck --check-prefix=SSE %s
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=avx < %s    | FileCheck --check-prefix=AVX %s
diff --git a/test/CodeGen/X86/sse2-vector-shifts.ll b/test/CodeGen/X86/sse2-vector-shifts.ll
index 45028cf4bd37..d1c7adb6263b 100644
--- a/test/CodeGen/X86/sse2-vector-shifts.ll
+++ b/test/CodeGen/X86/sse2-vector-shifts.ll
@@ -1,367 +1,373 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s
 
 ; SSE2 Logical Shift Left
 
 define <8 x i16> @test_sllw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sllw_1:
-; CHECK-NOT: psllw   $0, %xmm0
-; CHECK: ret
-
 define <8 x i16> @test_sllw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    paddw %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sllw_2:
-; CHECK: paddw   %xmm0, %xmm0
-; CHECK-NEXT: ret
-
 define <8 x i16> @test_sllw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sllw_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psllw $15, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sllw_3:
-; CHECK: psllw $15, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_slld_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_slld_1:
-; CHECK-NOT: pslld   $0, %xmm0
-; CHECK: ret
-
 define <4 x i32> @test_slld_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    paddd %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_slld_2:
-; CHECK: paddd   %xmm0, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_slld_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_slld_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    pslld $31, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_slld_3:
-; CHECK: pslld $31, %xmm0
-; CHECK-NEXT: ret
-
 define <2 x i64> @test_sllq_1(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <2 x i64> %InVec, <i64 0, i64 0>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_sllq_1:
-; CHECK-NOT: psllq   $0, %xmm0
-; CHECK: ret
-
 define <2 x i64> @test_sllq_2(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    paddq %xmm0, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <2 x i64> %InVec, <i64 1, i64 1>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_sllq_2:
-; CHECK: paddq   %xmm0, %xmm0
-; CHECK-NEXT: ret
-
 define <2 x i64> @test_sllq_3(<2 x i64> %InVec) {
+; CHECK-LABEL: test_sllq_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psllq $63, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = shl <2 x i64> %InVec, <i64 63, i64 63>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_sllq_3:
-; CHECK: psllq $63, %xmm0
-; CHECK-NEXT: ret
-
 ; SSE2 Arithmetic Shift
 
 define <8 x i16> @test_sraw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sraw_1:
-; CHECK-NOT: psraw   $0, %xmm0
-; CHECK: ret
-
 define <8 x i16> @test_sraw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psraw $1, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sraw_2:
-; CHECK: psraw   $1, %xmm0
-; CHECK-NEXT: ret
-
 define <8 x i16> @test_sraw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_sraw_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psraw $15, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_sraw_3:
-; CHECK: psraw   $15, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_srad_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srad_1:
-; CHECK-NOT: psrad   $0, %xmm0
-; CHECK: ret
-
 define <4 x i32> @test_srad_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrad $1, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srad_2:
-; CHECK: psrad   $1, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_srad_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srad_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srad_3:
-; CHECK: psrad   $31, %xmm0
-; CHECK-NEXT: ret
-
 ; SSE Logical Shift Right
 
 define <8 x i16> @test_srlw_1(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_srlw_1:
-; CHECK-NOT: psrlw   $0, %xmm0
-; CHECK: ret
-
 define <8 x i16> @test_srlw_2(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrlw $1, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_srlw_2:
-; CHECK: psrlw   $1, %xmm0
-; CHECK-NEXT: ret
-
 define <8 x i16> @test_srlw_3(<8 x i16> %InVec) {
+; CHECK-LABEL: test_srlw_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrlw $15, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
   ret <8 x i16> %shl
 }
 
-; CHECK-LABEL: test_srlw_3:
-; CHECK: psrlw $15, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_srld_1(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srld_1:
-; CHECK-NOT: psrld   $0, %xmm0
-; CHECK: ret
-
 define <4 x i32> @test_srld_2(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrld $1, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srld_2:
-; CHECK: psrld   $1, %xmm0
-; CHECK-NEXT: ret
-
 define <4 x i32> @test_srld_3(<4 x i32> %InVec) {
+; CHECK-LABEL: test_srld_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrld $31, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31>
   ret <4 x i32> %shl
 }
 
-; CHECK-LABEL: test_srld_3:
-; CHECK: psrld $31, %xmm0
-; CHECK-NEXT: ret
-
 define <2 x i64> @test_srlq_1(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_1:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <2 x i64> %InVec, <i64 0, i64 0>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_srlq_1:
-; CHECK-NOT: psrlq   $0, %xmm0
-; CHECK: ret
-
 define <2 x i64> @test_srlq_2(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_2:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrlq $1, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <2 x i64> %InVec, <i64 1, i64 1>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_srlq_2:
-; CHECK: psrlq   $1, %xmm0
-; CHECK-NEXT: ret
-
 define <2 x i64> @test_srlq_3(<2 x i64> %InVec) {
+; CHECK-LABEL: test_srlq_3:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    psrlq $63, %xmm0
+; CHECK-NEXT:    retq
 entry:
   %shl = lshr <2 x i64> %InVec, <i64 63, i64 63>
   ret <2 x i64> %shl
 }
 
-; CHECK-LABEL: test_srlq_3:
-; CHECK: psrlq $63, %xmm0
-; CHECK-NEXT: ret
-
-
-; CHECK-LABEL: sra_sra_v4i32:
-; CHECK: psrad $6, %xmm0
-; CHECK-NEXT: retq
 define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_sra_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrad $6, %xmm0
+; CHECK-NEXT:    retq
   %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
   %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4>
   ret <4 x i32> %sra1
 }
 
-; CHECK-LABEL: @srl_srl_v4i32
-; CHECK: psrld $6, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_srl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrld $6, %xmm0
+; CHECK-NEXT:    retq
   %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
   %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
   ret <4 x i32> %srl1
 }
 
-; CHECK-LABEL: @srl_shl_v4i32
-; CHECK: andps
-; CHECK-NEXT: retq
 define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_shl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
   %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4>
   ret <4 x i32> %srl1
 }
 
-; CHECK-LABEL: @srl_sra_31_v4i32
-; CHECK: psrld $31, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: srl_sra_31_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrld $31, %xmm0
+; CHECK-NEXT:    retq
   %sra = ashr <4 x i32> %x, %y
   %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31>
   ret <4 x i32> %srl1
 }
 
-; CHECK-LABEL: @shl_shl_v4i32
-; CHECK: pslld $6, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_shl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pslld $6, %xmm0
+; CHECK-NEXT:    retq
   %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
   %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
   ret <4 x i32> %shl1
 }
 
-; CHECK-LABEL: @shl_sra_v4i32
-; CHECK: andps
-; CHECK-NEXT: ret
 define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_sra_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
   %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4>
   ret <4 x i32> %shl1
 }
 
-; CHECK-LABEL: @shl_srl_v4i32
-; CHECK: pslld $3, %xmm0
-; CHECK-NEXT: pand
-; CHECK-NEXT: ret
 define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_srl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pslld $3, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
   %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shl1
 }
 
-; CHECK-LABEL: @shl_zext_srl_v4i32
-; CHECK: andps
-; CHECK-NEXT: ret
 define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind {
+; CHECK-LABEL: shl_zext_srl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    andps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
   %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
   %zext = zext <4 x i16> %srl to <4 x i32>
   %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2>
   ret <4 x i32> %shl
 }
 
-; CHECK: @sra_trunc_srl_v4i32
-; CHECK: psrad $19, %xmm0
-; CHECK-NEXT: retq
 define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_trunc_srl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrad $19, %xmm0
+; CHECK-NEXT:    retq
   %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16>
   %trunc = trunc <4 x i32> %srl to <4 x i16>
   %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3>
   ret <4 x i16> %sra
 }
 
-; CHECK-LABEL: @shl_zext_shl_v4i32
-; CHECK: pand
-; CHECK-NEXT: pslld $19, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind {
+; CHECK-LABEL: shl_zext_shl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    pslld $19, %xmm0
+; CHECK-NEXT:    retq
   %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2>
   %ext = zext <4 x i16> %shl0 to <4 x i32>
   %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17>
   ret <4 x i32> %shl1
 }
 
-; CHECK-LABEL: @sra_v4i32
-; CHECK: psrad $3, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: sra_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrad $3, %xmm0
+; CHECK-NEXT:    retq
   %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
   ret <4 x i32> %sra
 }
 
-; CHECK-LABEL: @srl_v4i32
-; CHECK: psrld $3, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: srl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    psrld $3, %xmm0
+; CHECK-NEXT:    retq
   %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
   ret <4 x i32> %sra
 }
 
-; CHECK-LABEL: @shl_v4i32
-; CHECK: pslld $3, %xmm0
-; CHECK-NEXT: ret
 define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind {
+; CHECK-LABEL: shl_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pslld $3, %xmm0
+; CHECK-NEXT:    retq
   %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
   ret <4 x i32> %sra
 }
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index d3ee3c6f0454..ed84905b1907 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; Tests for SSE2 and below, without SSE3+.
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
 
diff --git a/test/CodeGen/X86/sse3-avx-addsub-2.ll b/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 71efa3f8f105..79317e4576b9 100644
--- a/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -1,11 +1,20 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
 
 ; Verify that we correctly generate 'addsub' instructions from
 ; a sequence of vector extracts + float add/sub + vector inserts.
 
 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, %2
@@ -24,13 +33,17 @@ define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
   %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
   ret <4 x float> %vecinsert4
 }
-; CHECK-LABEL: test1
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test2(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 2
   %2 = extractelement <4 x float> %B, i32 2
   %sub2 = fsub float %1, %2
@@ -41,13 +54,17 @@ define <4 x float> @test2(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test2
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, %2
@@ -58,13 +75,17 @@ define <4 x float> @test3(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test3
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 2
   %2 = extractelement <4 x float> %B, i32 2
   %sub = fsub float %1, %2
@@ -75,13 +96,17 @@ define <4 x float> @test4(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test4
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test5:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test5:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub2 = fsub float %1, %2
@@ -92,13 +117,17 @@ define <4 x float> @test5(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 1
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test5
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test6(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test6:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, %2
@@ -117,13 +146,18 @@ define <4 x float> @test6(<4 x float> %A, <4 x float> %B) {
   %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
   ret <4 x float> %vecinsert4
 }
-; CHECK-LABEL: test6
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
+; SSE-LABEL: test7:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd %xmm2, %xmm0
+; SSE-NEXT:    addsubpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test7:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x double> %A, i32 0
   %2 = extractelement <4 x double> %B, i32 0
   %sub = fsub double %1, %2
@@ -142,15 +176,17 @@ define <4 x double> @test7(<4 x double> %A, <4 x double> %B) {
   %vecinsert4 = insertelement <4 x double> %vecinsert3, double %sub2, i32 2
   ret <4 x double> %vecinsert4
 }
-; CHECK-LABEL: test7
-; SSE: addsubpd
-; SSE-NEXT: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
 
 define <2 x double> @test8(<2 x double> %A, <2 x double> %B) {
+; SSE-LABEL: test8:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <2 x double> %A, i32 0
   %2 = extractelement <2 x double> %B, i32 0
   %sub = fsub double %1, %2
@@ -161,13 +197,18 @@ define <2 x double> @test8(<2 x double> %A, <2 x double> %B) {
   %vecinsert2 = insertelement <2 x double> %vecinsert1, double %add, i32 1
   ret <2 x double> %vecinsert2
 }
-; CHECK-LABEL: test8
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK: ret
-
 
 define <8 x float> @test9(<8 x float> %A, <8 x float> %B) {
+; SSE-LABEL: test9:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm2, %xmm0
+; SSE-NEXT:    addsubps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test9:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %1 = extractelement <8 x float> %A, i32 0
   %2 = extractelement <8 x float> %B, i32 0
   %sub = fsub float %1, %2
@@ -202,65 +243,118 @@ define <8 x float> @test9(<8 x float> %A, <8 x float> %B) {
   %vecinsert8 = insertelement <8 x float> %vecinsert7, float %sub4, i32 6
   ret <8 x float> %vecinsert8
 }
-; CHECK-LABEL: test9
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
 
 ; Verify that we don't generate addsub instruction for the following
 ; functions.
+
 define <4 x float> @test10(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test10:
+; SSE:       # BB#0:
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test10:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, %2
   %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
   ret <4 x float> %vecinsert1
 }
-; CHECK-LABEL: test10
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test11(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test11:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test11:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 2
   %2 = extractelement <4 x float> %B, i32 2
   %sub = fsub float %1, %2
   %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2
   ret <4 x float> %vecinsert1
 }
-; CHECK-LABEL: test11
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test12(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test12:
+; SSE:       # BB#0:
+; SSE-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT:    movshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    movsldup {{.*#+}} xmm0 = xmm1[0,0,2,2]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test12:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 1
   %2 = extractelement <4 x float> %B, i32 1
   %add = fadd float %1, %2
   %vecinsert1 = insertelement <4 x float> undef, float %add, i32 1
   ret <4 x float> %vecinsert1
 }
-; CHECK-LABEL: test12
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test13(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test13:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,1,2,0]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test13:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,2,0]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 3
   %2 = extractelement <4 x float> %B, i32 3
   %add = fadd float %1, %2
   %vecinsert1 = insertelement <4 x float> undef, float %add, i32 3
   ret <4 x float> %vecinsert1
 }
-; CHECK-LABEL: test13
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test14:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    subss %xmm1, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    subss %xmm1, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,1,1,3]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test14:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, %2
@@ -271,12 +365,32 @@ define <4 x float> @test14(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %sub2, i32 2
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test14
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test15:
+; SSE:       # BB#0:
+; SSE-NEXT:    movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE-NEXT:    movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; SSE-NEXT:    addss %xmm3, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[0,0,2,1]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test15:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[0],xmm2[0],xmm0[2,3]
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 1
   %2 = extractelement <4 x float> %B, i32 1
   %add = fadd float %1, %2
@@ -287,12 +401,43 @@ define <4 x float> @test15(<4 x float> %A, <4 x float> %B) {
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
   ret <4 x float> %vecinsert2
 }
-; CHECK-LABEL: test15
-; CHECK-NOT: addsubps
-; CHECK: ret
-
 
 define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    subss %xmm0, %xmm2
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm3 = xmm3[1,0]
+; SSE-NEXT:    movapd %xmm1, %xmm4
+; SSE-NEXT:    shufpd {{.*#+}} xmm4 = xmm4[1,0]
+; SSE-NEXT:    subss %xmm4, %xmm3
+; SSE-NEXT:    movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE-NEXT:    addss %xmm0, %xmm4
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    addss %xmm0, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsubss %xmm0, %xmm0, %xmm2
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
+; AVX-NEXT:    vsubss %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; AVX-NEXT:    vaddss %xmm0, %xmm4, %xmm4
+; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm4[0],xmm2[2,3]
+; AVX-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
+; AVX-NEXT:    retq
   %1 = extractelement <4 x float> %A, i32 0
   %2 = extractelement <4 x float> %B, i32 0
   %sub = fsub float %1, undef
@@ -311,11 +456,17 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
   %vecinsert4 = insertelement <4 x float> %vecinsert3, float %sub2, i32 2
   ret <4 x float> %vecinsert4
 }
-; CHECK-LABEL: test16
-; CHECK-NOT: addsubps
-; CHECK: ret
 
 define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
+; SSE-LABEL: test_v2f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_v2f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %v2 = extractelement <2 x float> %v0, i32 0
   %v3 = extractelement <2 x float> %v1, i32 0
   %v4 = extractelement <2 x float> %v0, i32 1
@@ -326,6 +477,3 @@ define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
   %res1 = insertelement <2 x float> %res0, float %add, i32 1
   ret <2 x float> %res1
 }
-; CHECK-LABEL: test_v2f32
-; CHECK: addsubps %xmm1, %xmm0
-; CHECK-NEXT: retq
diff --git a/test/CodeGen/X86/sse3-avx-addsub.ll b/test/CodeGen/X86/sse3-avx-addsub.ll
index 76141fc876ae..8665edf8f1d5 100644
--- a/test/CodeGen/X86/sse3-avx-addsub.ll
+++ b/test/CodeGen/X86/sse3-avx-addsub.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=SSE -check-prefix=CHECK
-; RUN: llc < %s -march=x86-64 -mcpu=corei7-avx | FileCheck %s -check-prefix=AVX -check-prefix=CHECK
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
 
 ; Test ADDSUB ISel patterns.
 
@@ -35,109 +36,207 @@
 ; }
 
 define <4 x float> @test1(<4 x float> %A, <4 x float> %B) {
+; SSE-LABEL: test1:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %sub = fsub <4 x float> %A, %B
   %add = fadd <4 x float> %A, %B
   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecinit6
 }
-; CHECK-LABEL: test1
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <8 x float> @test2(<8 x float> %A, <8 x float> %B) {
+; SSE-LABEL: test2:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm2, %xmm0
+; SSE-NEXT:    addsubps %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %sub = fsub <8 x float> %A, %B
   %add = fadd <8 x float> %A, %B
   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   ret <8 x float> %vecinit14
 }
-; CHECK-LABEL: test2
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
 
 define <4 x double> @test3(<4 x double> %A, <4 x double> %B) {
+; SSE-LABEL: test3:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd %xmm2, %xmm0
+; SSE-NEXT:    addsubpd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test3:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
   %sub = fsub <4 x double> %A, %B
   %add = fadd <4 x double> %A, %B
   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x double> %vecinit6
 }
-; CHECK-LABEL: test3
-; SSE: addsubpd
-; SSE: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
 
 define <2 x double> @test4(<2 x double> %A, <2 x double> %B) #0 {
+; SSE-LABEL: test4:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
   %add = fadd <2 x double> %A, %B
   %sub = fsub <2 x double> %A, %B
   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %vecinit2
 }
-; CHECK-LABEL: test4
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK-NEXT: ret
-
 
 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
+; SSE-LABEL: test1b:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %B
   %add = fadd <4 x float> %A, %1
   %sub = fsub <4 x float> %A, %1
   %vecinit6 = shufflevector <4 x float> %sub, <4 x float> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x float> %vecinit6
 }
-; CHECK-LABEL: test1b
-; SSE: addsubps
-; AVX: vaddsubps
-; CHECK-NEXT: ret
-
 
 define <8 x float> @test2b(<8 x float> %A, <8 x float>* %B) {
+; SSE-LABEL: test2b:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    addsubps 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
   %1 = load <8 x float>, <8 x float>* %B
   %add = fadd <8 x float> %A, %1
   %sub = fsub <8 x float> %A, %1
   %vecinit14 = shufflevector <8 x float> %sub, <8 x float> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
   ret <8 x float> %vecinit14
 }
-; CHECK-LABEL: test2b
-; SSE: addsubps
-; SSE-NEXT: addsubps
-; AVX: vaddsubps
-; AVX-NOT: vaddsubps
-; CHECK: ret
-
 
 define <4 x double> @test3b(<4 x double> %A, <4 x double>* %B) {
+; SSE-LABEL: test3b:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test3b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
   %1 = load <4 x double>, <4 x double>* %B
   %add = fadd <4 x double> %A, %1
   %sub = fsub <4 x double> %A, %1
   %vecinit6 = shufflevector <4 x double> %sub, <4 x double> %add, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x double> %vecinit6
 }
-; CHECK-LABEL: test3b
-; SSE: addsubpd
-; SSE: addsubpd
-; AVX: vaddsubpd
-; AVX-NOT: vaddsubpd
-; CHECK: ret
-
 
 define <2 x double> @test4b(<2 x double> %A, <2 x double>* %B) {
+; SSE-LABEL: test4b:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4b:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
   %1 = load <2 x double>, <2 x double>* %B
   %sub = fsub <2 x double> %A, %1
   %add = fadd <2 x double> %A, %1
   %vecinit2 = shufflevector <2 x double> %sub, <2 x double> %add, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %vecinit2
 }
-; CHECK-LABEL: test4b
-; SSE: addsubpd
-; AVX: vaddsubpd
-; CHECK-NEXT: ret
 
+define <4 x float> @test1c(<4 x float> %A, <4 x float>* %B) {
+; SSE-LABEL: test1c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test1c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = load <4 x float>, <4 x float>* %B
+  %add = fadd <4 x float> %A, %1
+  %sub = fsub <4 x float> %A, %1
+  %vecinit6 = shufflevector <4 x float> %add, <4 x float> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x float> %vecinit6
+}
+
+define <8 x float> @test2c(<8 x float> %A, <8 x float>* %B) {
+; SSE-LABEL: test2c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps (%rdi), %xmm0
+; SSE-NEXT:    addsubps 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test2c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubps (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %1 = load <8 x float>, <8 x float>* %B
+  %add = fadd <8 x float> %A, %1
+  %sub = fsub <8 x float> %A, %1
+  %vecinit14 = shufflevector <8 x float> %add, <8 x float> %sub, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x float> %vecinit14
+}
+
+define <4 x double> @test3c(<4 x double> %A, <4 x double>* %B) {
+; SSE-LABEL: test3c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    addsubpd 16(%rdi), %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test3c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %1 = load <4 x double>, <4 x double>* %B
+  %add = fadd <4 x double> %A, %1
+  %sub = fsub <4 x double> %A, %1
+  %vecinit6 = shufflevector <4 x double> %add, <4 x double> %sub, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
+  ret <4 x double> %vecinit6
+}
+
+define <2 x double> @test4c(<2 x double> %A, <2 x double>* %B) {
+; SSE-LABEL: test4c:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd (%rdi), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test4c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vaddsubpd (%rdi), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = load <2 x double>, <2 x double>* %B
+  %sub = fsub <2 x double> %A, %1
+  %add = fadd <2 x double> %A, %1
+  %vecinit2 = shufflevector <2 x double> %add, <2 x double> %sub, <2 x i32> <i32 2, i32 1>
+  ret <2 x double> %vecinit2
+}
diff --git a/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..217be9aeae3a
--- /dev/null
+++ b/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll
@@ -0,0 +1,171 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c
+
+define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_addsub_pd:
+; X32:       # BB#0:
+; X32-NEXT:    addsubpd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_addsub_pd:
+; X64:       # BB#0:
+; X64-NEXT:    addsubpd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_addsub_ps:
+; X32:       # BB#0:
+; X32-NEXT:    addsubps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_addsub_ps:
+; X64:       # BB#0:
+; X64-NEXT:    addsubps %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_hadd_pd:
+; X32:       # BB#0:
+; X32-NEXT:    haddpd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hadd_pd:
+; X64:       # BB#0:
+; X64-NEXT:    haddpd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_hadd_ps:
+; X32:       # BB#0:
+; X32-NEXT:    haddps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hadd_ps:
+; X64:       # BB#0:
+; X64-NEXT:    haddps %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+; X32-LABEL: test_mm_hsub_pd:
+; X32:       # BB#0:
+; X32-NEXT:    hsubpd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hsub_pd:
+; X64:       # BB#0:
+; X64-NEXT:    hsubpd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+; X32-LABEL: test_mm_hsub_ps:
+; X32:       # BB#0:
+; X32-NEXT:    hsubps %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hsub_ps:
+; X64:       # BB#0:
+; X64-NEXT:    hsubps %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i64> @test_mm_lddqu_si128(i8* %a0) {
+; X32-LABEL: test_mm_lddqu_si128:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    lddqu (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_lddqu_si128:
+; X64:       # BB#0:
+; X64-NEXT:    lddqu (%rdi), %xmm0
+; X64-NEXT:    retq
+  %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
+  %res = bitcast <16 x i8> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+define <2 x double> @test_mm_loaddup_pd(double* %a0) {
+; X32-LABEL: test_mm_loaddup_pd:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movddup (%eax), %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_loaddup_pd:
+; X64:       # BB#0:
+; X64-NEXT:    movddup (%rdi), %xmm0
+; X64-NEXT:    retq
+  %ld = load double, double* %a0
+  %res0 = insertelement <2 x double> undef, double %ld, i32 0
+  %res1 = insertelement <2 x double> %res0, double %ld, i32 1
+  ret <2 x double> %res1
+}
+
+define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) {
+; X32-LABEL: test_mm_movedup_pd:
+; X32:       # BB#0:
+; X32-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_movedup_pd:
+; X64:       # BB#0:
+; X64-NEXT:    movddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT:    retq
+  %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer
+  ret <2 x double> %res
+}
+
+define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
+; X32-LABEL: test_mm_movehdup_ps:
+; X32:       # BB#0:
+; X32-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_movehdup_ps:
+; X64:       # BB#0:
+; X64-NEXT:    movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-NEXT:    retq
+  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
+; X32-LABEL: test_mm_moveldup_ps:
+; X32:       # BB#0:
+; X32-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_moveldup_ps:
+; X64:       # BB#0:
+; X64-NEXT:    movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; X64-NEXT:    retq
+  %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x float> %res
+}
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 398675276c66..2c24478706e6 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; These are tests for SSE3 codegen.
 
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 --mattr=+sse3 | FileCheck %s --check-prefix=X64
@@ -269,8 +270,10 @@ entry:
 define <4 x i32> @t17() nounwind {
 ; X64-LABEL: t17:
 ; X64:       ## BB#0: ## %entry
-; X64-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
-; X64-NEXT:    andpd {{.*}}(%rip), %xmm0
+; X64-NEXT:    movaps (%rax), %xmm0
+; X64-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
+; X64-NEXT:    pxor %xmm1, %xmm1
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; X64-NEXT:    retq
 entry:
   %tmp1 = load <4 x float>, <4 x float>* undef, align 16
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
index 6fab98e70a89..75f69ffd6db9 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
@@ -42,7 +42,6 @@ define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
 
 
-
 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: mpsadbw
   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<8 x i16>> [#uses=1]
@@ -59,3 +58,49 @@ define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
 
 
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: pmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: pmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: pmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: pmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: pmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: pmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
diff --git a/test/CodeGen/X86/sse41-intrinsics-x86.ll b/test/CodeGen/X86/sse41-intrinsics-x86.ll
index 771e4024336c..ceff4f9782e9 100644
--- a/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -162,54 +162,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
 
 
-define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
-  ; CHECK: pmovsxbd
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
-  ; CHECK: pmovsxbq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
-  ; CHECK: pmovsxbw
-  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
-  ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
-  ; CHECK: pmovsxdq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
-  ; CHECK: pmovsxwd
-  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
-  ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
-  ; CHECK: pmovsxwq
-  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
-  ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
   ; CHECK: pmovzxbd
   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
diff --git a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
index a16e79277143..a7e48d8ac038 100644
--- a/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
+++ b/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -1,109 +1,188 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
 
 define <8 x i16> @test_llvm_x86_sse41_pmovsxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbw
-; SSE41: pmovsxbw (%rdi), %xmm0
-; AVX:  vpmovsxbw (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbw:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbw:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %1)
-  ret <8 x i16> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = sext <8 x i8> %2 to <8 x i16>
+  ret <8 x i16> %3
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovsxbd(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbd
-; SSE41: pmovsxbd (%rdi), %xmm0
-; AVX:  vpmovsxbd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbd:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbd:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %1)
-  ret <4 x i32> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i8> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxbq(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxbq
-; SSE41: pmovsxbq (%rdi), %xmm0
-; AVX:  vpmovsxbq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxbq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxbq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovsxwd(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwd
-; SSE41: pmovsxwd (%rdi), %xmm0
-; AVX:  vpmovsxwd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwd:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxwd:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1)
-  ret <4 x i32> %2
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i16> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxwq(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxwq
-; SSE41: pmovsxwq (%rdi), %xmm0
-; AVX:  vpmovsxwq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxwq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxwq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovsxdq(<4 x i32>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovsxdq
-; SSE41: pmovsxdq (%rdi), %xmm0
-; AVX:  vpmovsxdq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovsxdq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovsxdq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX-NEXT:    retq
   %1 = load <4 x i32>, <4 x i32>* %a, align 1
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %1)
-  ret <2 x i64> %2
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
 
 define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbw
-; SSE41: pmovzxbw (%rdi), %xmm0
-; AVX:  vpmovzxbw (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbw:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbw:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
   ret <8 x i16> %2
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbd
-; SSE41: pmovzxbd (%rdi), %xmm0
-; AVX:  vpmovzxbd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbd:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbd:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
   ret <4 x i32> %2
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxbq
-; SSE41: pmovzxbq (%rdi), %xmm0
-; AVX:  vpmovzxbq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxbq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxbq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
   %1 = load <16 x i8>, <16 x i8>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
   ret <2 x i64> %2
 }
 
 define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwd
-; SSE41: pmovzxwd (%rdi), %xmm0
-; AVX:  vpmovzxwd (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwd:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxwd:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
   ret <4 x i32> %2
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxwq
-; SSE41: pmovzxwq (%rdi), %xmm0
-; AVX:  vpmovzxwq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxwq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxwq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX-NEXT:    retq
   %1 = load <8 x i16>, <8 x i16>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
   ret <2 x i64> %2
 }
 
 define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
-; CHECK-LABEL: test_llvm_x86_sse41_pmovzxdq
-; SSE41: pmovzxdq (%rdi), %xmm0
-; AVX:  vpmovzxdq (%rdi), %xmm0
+; SSE41-LABEL: test_llvm_x86_sse41_pmovzxdq:
+; SSE41:       ## BB#0:
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: test_llvm_x86_sse41_pmovzxdq:
+; AVX:       ## BB#0:
+; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX-NEXT:    retq
   %1 = load <4 x i32>, <4 x i32>* %a, align 1
   %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
   ret <2 x i64> %2
@@ -115,9 +194,3 @@ declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>)
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>)
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>)
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>)
diff --git a/test/CodeGen/X86/sse41.ll b/test/CodeGen/X86/sse41.ll
index 8532c012aa9b..0a83a9753b81 100644
--- a/test/CodeGen/X86/sse41.ll
+++ b/test/CodeGen/X86/sse41.ll
@@ -31,49 +31,6 @@ define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
   ret <16 x i8> %tmp1
 }
 
-define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
-; X32-LABEL: pmovsxbd_1:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pmovsxbd (%eax), %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: pmovsxbd_1:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pmovsxbd (%rdi), %xmm0
-; X64-NEXT:    retq
-entry:
-	%0 = load i32, i32* %p, align 4
-	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
-	%2 = insertelement <4 x i32> %1, i32 0, i32 1
-	%3 = insertelement <4 x i32> %2, i32 0, i32 2
-	%4 = insertelement <4 x i32> %3, i32 0, i32 3
-	%5 = bitcast <4 x i32> %4 to <16 x i8>
-	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
-	%7 = bitcast <4 x i32> %6 to <2 x i64>
-	ret <2 x i64> %7
-}
-
-define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
-; X32-LABEL: pmovsxwd_1:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    pmovsxwd (%eax), %xmm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: pmovsxwd_1:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    pmovsxwd (%rdi), %xmm0
-; X64-NEXT:    retq
-entry:
-	%0 = load i64, i64* %p		; <i64> [#uses=1]
-	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
-	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
-	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	ret <2 x i64> %3
-}
-
 define <2 x i64> @pmovzxbq_1() nounwind {
 ; X32-LABEL: pmovzxbq_1:
 ; X32:       ## BB#0: ## %entry
@@ -94,8 +51,6 @@ entry:
 	ret <2 x i64> %3
 }
 
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
 
 define i32 @extractps_1(<4 x float> %v) nounwind {
@@ -137,7 +92,7 @@ define float @ext_1(<4 x float> %v) nounwind {
 ; X32:       ## BB#0:
 ; X32-NEXT:    pushl %eax
 ; X32-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; X32-NEXT:    addss LCPI7_0, %xmm0
+; X32-NEXT:    addss LCPI5_0, %xmm0
 ; X32-NEXT:    movss %xmm0, (%esp)
 ; X32-NEXT:    flds (%esp)
 ; X32-NEXT:    popl %eax
@@ -204,7 +159,7 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) noun
 define <4 x float> @blendps_not_insertps_1(<4 x float> %t1, float %t2) nounwind {
 ; X32-LABEL: blendps_not_insertps_1:
 ; X32:       ## BB#0:
-; X32-NEXT:    movss   {{.*#+}} xmm1
+; X32-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; X32-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; X32-NEXT:    retl
 ;
@@ -839,12 +794,12 @@ define <4 x float> @insertps_from_vector_load(<4 x float> %a, <4 x float>* nocap
 ; X32-LABEL: insertps_from_vector_load:
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X32-NEXT:    insertps    $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: insertps_from_vector_load:
 ; X64:       ## BB#0:
-; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
+; X64-NEXT:    insertps    $48, (%{{...}}), {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
 ; X64-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48)
@@ -857,12 +812,12 @@ define <4 x float> @insertps_from_vector_load_offset(<4 x float> %a, <4 x float>
 ; X32-LABEL: insertps_from_vector_load_offset:
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
+; X32-NEXT:    insertps    $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: insertps_from_vector_load_offset:
 ; X64:       ## BB#0:
-; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[1],xmm0[3]
+; X64-NEXT:    insertps    $32, 4(%{{...}}), {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
 ; X64-NEXT:    retq
   %1 = load <4 x float>, <4 x float>* %pb, align 16
   %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96)
@@ -876,13 +831,13 @@ define <4 x float> @insertps_from_vector_load_offset_2(<4 x float> %a, <4 x floa
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X32-NEXT:    shll $4, %ecx
-; X32-NEXT:    insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
+; X32-NEXT:    insertps    $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: insertps_from_vector_load_offset_2:
 ; X64:       ## BB#0:
 ; X64-NEXT:    shlq $4, %rsi
-; X64-NEXT:    insertps {{.*#+}} xmm0 = mem[3],xmm0[1,2,3]
+; X64-NEXT:    insertps    $0, 12(%{{...}},%{{...}}), {{.*#+}} xmm0 = mem[0],xmm0[1,2,3]
 ; X64-NEXT:    retq
   %1 = getelementptr inbounds <4 x float>, <4 x float>* %pb, i64 %index
   %2 = load <4 x float>, <4 x float>* %1, align 16
@@ -1013,12 +968,12 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) {
 ; X32-LABEL: pr20087:
 ; X32:       ## BB#0:
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
+; X32-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: pr20087:
 ; X64:       ## BB#0:
-; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[2]
+; X64-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],mem[0]
 ; X64-NEXT:    retq
   %load = load <4 x float> , <4 x float> *%ptr
   %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2>
diff --git a/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..f93a16a5eb3d
--- /dev/null
+++ b/test/CodeGen/X86/sse4a-intrinsics-fast-isel.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=ALL  --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=ALL  --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse4a-builtins.c
+
+define <2 x i64> @test_mm_extracti_si64(<2 x i64> %x) {
+; X32-LABEL: test_mm_extracti_si64:
+; X32:       # BB#0:
+; X32-NEXT:    extrq $2, $3, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_extracti_si64:
+; X64:       # BB#0:
+; X64-NEXT:    extrq $2, $3, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind readnone
+
+define <2 x i64> @test_mm_extract_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_extract_si64:
+; X32:       # BB#0:
+; X32-NEXT:    extrq %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_extract_si64:
+; X64:       # BB#0:
+; X64-NEXT:    extrq %xmm1, %xmm0
+; X64-NEXT:    retq
+  %bc = bitcast <2 x i64> %y to <16 x i8>
+  %res = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %bc)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_inserti_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_inserti_si64:
+; X32:       # BB#0:
+; X32-NEXT:    insertq $6, $5, %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_inserti_si64:
+; X64:       # BB#0:
+; X64-NEXT:    insertq $6, $5, %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
+
+define <2 x i64> @test_mm_insert_si64(<2 x i64> %x, <2 x i64> %y) {
+; X32-LABEL: test_mm_insert_si64:
+; X32:       # BB#0:
+; X32-NEXT:    insertq %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_insert_si64:
+; X64:       # BB#0:
+; X64-NEXT:    insertq %xmm1, %xmm0
+; X64-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind readnone
+
+define void @test_stream_sd(i8* %p, <2 x double> %a) {
+; X32-LABEL: test_stream_sd:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movntsd %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_stream_sd:
+; X64:       # BB#0:
+; X64-NEXT:    movntsd %xmm0, (%rdi)
+; X64-NEXT:    retq
+  call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a)
+  ret void
+}
+declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) nounwind readnone
+
+define void @test_mm_stream_ss(i8* %p, <4 x float> %a) {
+; X32-LABEL: test_mm_stream_ss:
+; X32:       # BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movntss %xmm0, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_stream_ss:
+; X64:       # BB#0:
+; X64-NEXT:    movntss %xmm0, (%rdi)
+; X64-NEXT:    retq
+  call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a)
+  ret void
+}
+declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/X86/sse_partial_update.ll b/test/CodeGen/X86/sse_partial_update.ll
index 377c3b7d6ead..8d61428420f6 100644
--- a/test/CodeGen/X86/sse_partial_update.ll
+++ b/test/CodeGen/X86/sse_partial_update.ll
@@ -90,3 +90,36 @@ entry:
 declare void @callee2(float, float)
 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
+define <2 x double> @load_fold_cvtss2sd_int(<4 x float> *%a) {
+; CHECK-LABEL: load_fold_cvtss2sd_int:
+; CHECK:       movaps   (%rdi), %xmm1
+; CHECK-NEXT:  xorps    %xmm0, %xmm0
+; CHECK-NEXT:  cvtss2sd %xmm1, %xmm0
+; CHECK-NEXT:  retq
+  %ld = load <4 x float>, <4 x float> *%a
+  %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+  ret <2 x double> %x
+}
+
+define <2 x double> @load_fold_cvtss2sd_int_optsize(<4 x float> *%a) optsize {
+; CHECK-LABEL: load_fold_cvtss2sd_int_optsize:
+; CHECK:       xorps    %xmm0, %xmm0
+; CHECK-NEXT:  cvtss2sd (%rdi), %xmm0
+; CHECK-NEXT:  retq
+  %ld = load <4 x float>, <4 x float> *%a
+  %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+  ret <2 x double> %x
+}
+
+define <2 x double> @load_fold_cvtss2sd_int_minsize(<4 x float> *%a) minsize {
+; CHECK-LABEL: load_fold_cvtss2sd_int_minsize:
+; CHECK:       xorps    %xmm0, %xmm0
+; CHECK-NEXT:  cvtss2sd (%rdi), %xmm0
+; CHECK-NEXT:  retq
+  %ld = load <4 x float>, <4 x float> *%a
+  %x = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> <double 0x0, double 0x0>, <4 x float> %ld)
+  ret <2 x double> %x
+}
+
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
diff --git a/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll b/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
new file mode 100644
index 000000000000..4f7ff20c6e0d
--- /dev/null
+++ b/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll
@@ -0,0 +1,290 @@
+; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X32
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL  --check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c
+
+define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi8:
+; X32:       # BB#0:
+; X32-NEXT:    pabsb %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_abs_epi8:
+; X64:       # BB#0:
+; X64-NEXT:    pabsb %xmm0, %xmm0
+; X64-NEXT:    retq
+  %arg = bitcast <2 x i64> %a0 to <16 x i8>
+  %call = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %arg)
+  %res = bitcast <16 x i8> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    pabsw %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_abs_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    pabsw %xmm0, %xmm0
+; X64-NEXT:    retq
+  %arg = bitcast <2 x i64> %a0 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %arg)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) {
+; X32-LABEL: test_mm_abs_epi32:
+; X32:       # BB#0:
+; X32-NEXT:    pabsd %xmm0, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_abs_epi32:
+; X64:       # BB#0:
+; X64-NEXT:    pabsd %xmm0, %xmm0
+; X64-NEXT:    retq
+  %arg = bitcast <2 x i64> %a0 to <4 x i32>
+  %call = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %arg)
+  %res = bitcast <4 x i32> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_alignr_epi8:
+; X32:       # BB#0:
+; X32-NEXT:    palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
+; X32-NEXT:    movdqa %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_alignr_epi8:
+; X64:       # BB#0:
+; X64-NEXT:    palignr {{.*#}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1]
+; X64-NEXT:    movdqa %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+  %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  %res = bitcast <16 x i8> %shuf to <2 x i64>
+  ret <2 x i64> %res
+}
+
+define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadd_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    phaddw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hadd_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    phaddw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadd_epi32:
+; X32:       # BB#0:
+; X32-NEXT:    phaddd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hadd_epi32:
+; X64:       # BB#0:
+; X64-NEXT:    phaddd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+  %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+  %res = bitcast <4 x i32> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hadds_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    phaddsw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hadds_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    phaddsw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsub_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    phsubw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hsub_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    phsubw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsub_epi32:
+; X32:       # BB#0:
+; X32-NEXT:    phsubd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hsub_epi32:
+; X64:       # BB#0:
+; X64-NEXT:    phsubd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+  %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+  %res = bitcast <4 x i32> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_hsubs_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    phsubsw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_hsubs_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    phsubsw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_maddubs_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    pmaddubsw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_maddubs_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    pmaddubsw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+  %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_mulhrs_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    pmulhrsw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_mulhrs_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    pmulhrsw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_shuffle_epi8:
+; X32:       # BB#0:
+; X32-NEXT:    pshufb %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_shuffle_epi8:
+; X64:       # BB#0:
+; X64-NEXT:    pshufb %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+  %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
+  %res = bitcast <16 x i8> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi8:
+; X32:       # BB#0:
+; X32-NEXT:    psignb %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_sign_epi8:
+; X64:       # BB#0:
+; X64-NEXT:    psignb %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
+  %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
+  %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1)
+  %res = bitcast <16 x i8> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi16:
+; X32:       # BB#0:
+; X32-NEXT:    psignw %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_sign_epi16:
+; X64:       # BB#0:
+; X64-NEXT:    psignw %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
+  %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
+  %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1)
+  %res = bitcast <8 x i16> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) {
+; X32-LABEL: test_mm_sign_epi32:
+; X32:       # BB#0:
+; X32-NEXT:    psignd %xmm1, %xmm0
+; X32-NEXT:    retl
+;
+; X64-LABEL: test_mm_sign_epi32:
+; X64:       # BB#0:
+; X64-NEXT:    psignd %xmm1, %xmm0
+; X64-NEXT:    retq
+  %arg0 = bitcast <2 x i64> %a0 to <4 x i32>
+  %arg1 = bitcast <2 x i64> %a1 to <4 x i32>
+  %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1)
+  %res = bitcast <4 x i32> %call to <2 x i64>
+  ret <2 x i64> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/stack-align-memcpy.ll b/test/CodeGen/X86/stack-align-memcpy.ll
index 0cc3aa848891..129fb0c6b1f6 100644
--- a/test/CodeGen/X86/stack-align-memcpy.ll
+++ b/test/CodeGen/X86/stack-align-memcpy.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -force-align-stack -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
+; RUN: llc < %s -stackrealign -mtriple i386-apple-darwin -mcpu=i486 | FileCheck %s
 
 %struct.foo = type { [88 x i8] }
 
diff --git a/test/CodeGen/X86/stack-folding-adx-x86_64.ll b/test/CodeGen/X86/stack-folding-adx-x86_64.ll
new file mode 100644
index 000000000000..5f109f09aa19
--- /dev/null
+++ b/test/CodeGen/X86/stack-folding-adx-x86_64.ll
@@ -0,0 +1,45 @@
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+; Stack reload folding tests.
+;
+; By including a nop call with sideeffects we can force a partial register spill of the
+; relevant registers and check that the reload is correctly folded into the instruction.
+
+define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) {
+  ;CHECK-LABEL: stack_fold_addcarry_u32
+  ;CHECK:       adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.addcarry.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3)
+  ret i8 %2;
+}
+declare i8 @llvm.x86.addcarry.u32(i8, i32, i32, i8*)
+
+define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) {
+  ;CHECK-LABEL: stack_fold_addcarry_u64
+  ;CHECK:       adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.addcarry.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3)
+  ret i8 %2;
+}
+declare i8 @llvm.x86.addcarry.u64(i8, i64, i64, i8*)
+
+define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3) {
+  ;CHECK-LABEL: stack_fold_addcarryx_u32
+  ;CHECK:       adcxl {{-?[0-9]*}}(%rsp), %ecx {{.*#+}} 4-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.addcarryx.u32(i8 %a0, i32 %a1, i32 %a2, i8* %a3)
+  ret i8 %2;
+}
+declare i8 @llvm.x86.addcarryx.u32(i8, i32, i32, i8*)
+
+define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3) {
+  ;CHECK-LABEL: stack_fold_addcarryx_u64
+  ;CHECK:       adcxq {{-?[0-9]*}}(%rsp), %rcx {{.*#+}} 8-byte Folded Reload
+  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
+  %2 = tail call i8 @llvm.x86.addcarryx.u64(i8 %a0, i64 %a1, i64 %a2, i8* %a3)
+  ret i8 %2;
+}
+declare i8 @llvm.x86.addcarryx.u64(i8, i64, i64, i8*)
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index 63aa742bdf01..b86ec0ea22ff 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+f16c < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -946,7 +946,15 @@ define <8 x float> @stack_fold_insertf128(<4 x float> %a0, <4 x float> %a1) {
   ret <8 x float> %2
 }
 
-; TODO stack_fold_insertps
+define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_insertps
+  ;CHECK:       vinsertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  ;CHECK-NEXT:                                                                              {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
   ;CHECK-LABEL: stack_fold_maxpd
@@ -1411,7 +1419,7 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno
 
 define double @stack_fold_roundsd(double %a0) optsize {
   ;CHECK-LABEL: stack_fold_roundsd
-  ;CHECK:       vroundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  ;CHECK:       vroundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = call double @llvm.floor.f64(double %a0)
   ret double %2
@@ -1423,7 +1431,7 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n
 
 define float @stack_fold_roundss(float %a0) optsize {
   ;CHECK-LABEL: stack_fold_roundss
-  ;CHECK:       vroundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+  ;CHECK:       vroundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = call float @llvm.floor.f32(float %a0)
   ret float %2
@@ -1494,7 +1502,7 @@ define <8 x float> @stack_fold_shufps_ymm(<8 x float> %a0, <8 x float> %a1) {
   ;CHECK-LABEL: stack_fold_shufps_ymm
   ;CHECK:       vshufps $148, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
+  %2 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 1, i32 9, i32 10, i32 4, i32 5, i32 13, i32 14>
   ret <8 x float> %2
 }
 
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
index f9fcbaabdebb..105115bc7d25 100644
--- a/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -314,7 +314,7 @@ define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
 }
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
 
-define float @stack_fold_cvtsd2ss(double %a0) optsize {
+define float @stack_fold_cvtsd2ss(double %a0) minsize {
   ;CHECK-LABEL: stack_fold_cvtsd2ss
   ;CHECK:       cvtsd2ss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -331,7 +331,7 @@ define <4 x float> @stack_fold_cvtsd2ss_int(<2 x double> %a0) optsize {
 }
 declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
 
-define double @stack_fold_cvtsi2sd(i32 %a0) optsize {
+define double @stack_fold_cvtsi2sd(i32 %a0) minsize {
   ;CHECK-LABEL: stack_fold_cvtsi2sd
   ;CHECK:       cvtsi2sdl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -365,7 +365,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
 }
 declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
 
-define float @stack_fold_cvtsi2ss(i32 %a0) optsize {
+define float @stack_fold_cvtsi2ss(i32 %a0) minsize {
   ;CHECK-LABEL: stack_fold_cvtsi2ss
   ;CHECK:       cvtsi2ssl {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
@@ -399,7 +399,7 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
 }
 declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
 
-define double @stack_fold_cvtss2sd(float %a0) optsize {
+define double @stack_fold_cvtss2sd(float %a0) minsize {
   ;CHECK-LABEL: stack_fold_cvtss2sd
   ;CHECK:       cvtss2sd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
@@ -637,7 +637,15 @@ define <4 x float> @stack_fold_hsubps(<4 x float> %a0, <4 x float> %a1) {
 }
 declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
 
-; TODO stack_fold_insertps
+define <4 x float> @stack_fold_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ;CHECK-LABEL: stack_fold_insertps
+  ;CHECK:       insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  ;CHECK-NEXT:                                                        {{.*#+}} xmm0 = zero,mem[0],xmm0[2,3]
+  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+  %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209)
+  ret <4 x float> %2
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
 
 define <2 x double> @stack_fold_maxpd(<2 x double> %a0, <2 x double> %a1) {
   ;CHECK-LABEL: stack_fold_maxpd
@@ -886,7 +894,7 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
 
 define double @stack_fold_roundsd(double %a0) optsize {
   ;CHECK-LABEL: stack_fold_roundsd
-  ;CHECK:       roundsd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
+  ;CHECK:       roundsd $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = call double @llvm.floor.f64(double %a0)
   ret double %2
@@ -896,9 +904,9 @@ declare double @llvm.floor.f64(double) nounwind readnone
 ; TODO stack_fold_roundsd_int
 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
 
-define float @stack_fold_roundss(float %a0) optsize {
+define float @stack_fold_roundss(float %a0) minsize {
   ;CHECK-LABEL: stack_fold_roundss
-  ;CHECK:       roundss $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
+  ;CHECK:       roundss $9, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
   %2 = call float @llvm.floor.f32(float %a0)
   ret float %2
@@ -968,7 +976,7 @@ declare double @llvm.sqrt.f64(double) nounwind readnone
 ; TODO stack_fold_sqrtsd_int
 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
 
-define float @stack_fold_sqrtss(float %a0) optsize {
+define float @stack_fold_sqrtss(float %a0) minsize {
   ;CHECK-LABEL: stack_fold_sqrtss
   ;CHECK:       sqrtss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
diff --git a/test/CodeGen/X86/stack-folding-int-avx1.ll b/test/CodeGen/X86/stack-folding-int-avx1.ll
index fec297d5e9d4..15ffb1d2dcc5 100644
--- a/test/CodeGen/X86/stack-folding-int-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx1.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+aes,+pclmul < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -671,55 +671,55 @@ define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbd
   ;CHECK:       vpmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
-  ret <4 x i32> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i8> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbq
-  ;CHECK:       pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
+  ;CHECK:       vpmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
 
 define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbw
   ;CHECK:       vpmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
-  ret <8 x i16> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = sext <8 x i8> %2 to <8 x i16>
+  ret <8 x i16> %3
 }
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxdq
   ;CHECK:       vpmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
 
 define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwd
   ;CHECK:       vpmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
-  ret <4 x i32> %2
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i16> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwq
   ;CHECK:       vpmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
 
 define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovzxbd
diff --git a/test/CodeGen/X86/stack-folding-int-avx2.ll b/test/CodeGen/X86/stack-folding-int-avx2.ll
index a164fbbc7a6a..235a10ed4678 100644
--- a/test/CodeGen/X86/stack-folding-int-avx2.ll
+++ b/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -12,7 +12,7 @@ define <4 x double> @stack_fold_broadcastsd_ymm(<2 x double> %a0) {
   ;CHECK-LABEL: stack_fold_broadcastsd_ymm
   ;CHECK:       vbroadcastsd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x double> @llvm.x86.avx2.vbroadcast.sd.pd.256(<2 x double> %a0)
+  %2 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
   ; fadd forces execution domain
   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
   ret <4 x double> %3
@@ -23,7 +23,7 @@ define <4 x float> @stack_fold_broadcastss(<4 x float> %a0) {
   ;CHECK-LABEL: stack_fold_broadcastss
   ;CHECK:       vbroadcastss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x float> @llvm.x86.avx2.vbroadcast.ss.ps(<4 x float> %a0)
+  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
   ; fadd forces execution domain
   %3 = fadd <4 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0>
   ret <4 x float> %3
@@ -34,7 +34,7 @@ define <8 x float> @stack_fold_broadcastss_ymm(<4 x float> %a0) {
   ;CHECK-LABEL: stack_fold_broadcastss_ymm
   ;CHECK:       vbroadcastss {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float> %a0)
+  %2 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
   ; fadd forces execution domain
   %3 = fadd <8 x float> %2, <float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0, float 0x0>
   ret <8 x float> %3
@@ -286,81 +286,73 @@ define <16 x i8> @stack_fold_pbroadcastb(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastb
   ;CHECK:       vpbroadcastb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0)
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
   ret <16 x i8> %2
 }
-declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
 
 define <32 x i8> @stack_fold_pbroadcastb_ymm(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastb_ymm
   ;CHECK:       vpbroadcastb {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0)
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <32 x i32> zeroinitializer
   ret <32 x i8> %2
 }
-declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
 
 define <4 x i32> @stack_fold_pbroadcastd(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastd
   ;CHECK:       vpbroadcastd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0)
+  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
   ; add forces execution domain
   %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
   ret <4 x i32> %3
 }
-declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
 
 define <8 x i32> @stack_fold_pbroadcastd_ymm(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastd_ymm
   ;CHECK:       vpbroadcastd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0)
+  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <8 x i32> zeroinitializer
   ; add forces execution domain
   %3 = add <8 x i32> %2, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i32> %3
 }
-declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
 
 define <2 x i64> @stack_fold_pbroadcastq(<2 x i64> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastq
   ;CHECK:       vpbroadcastq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0)
+  %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
   ; add forces execution domain
   %3 = add <2 x i64> %2, <i64 1, i64 1>
   ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
 
 define <4 x i64> @stack_fold_pbroadcastq_ymm(<2 x i64> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastq_ymm
   ;CHECK:       vpbroadcastq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0)
+  %2 = shufflevector <2 x i64> %a0, <2 x i64> undef, <4 x i32> zeroinitializer
   ; add forces execution domain
   %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i64> %3
 }
-declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
 
 define <8 x i16> @stack_fold_pbroadcastw(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastw
   ;CHECK:       vpbroadcastw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0)
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
   ret <8 x i16> %2
 }
-declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
 
 define <16 x i16> @stack_fold_pbroadcastw_ymm(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pbroadcastw_ymm
   ;CHECK:       vpbroadcastw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0)
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <16 x i32> zeroinitializer
   ret <16 x i16> %2
 }
-declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
 
 define <32 x i8> @stack_fold_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1) {
   ;CHECK-LABEL: stack_fold_pcmpeqb
@@ -455,28 +447,28 @@ declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
 
 define <4 x double> @stack_fold_permpd(<4 x double> %a0) {
   ;CHECK-LABEL: stack_fold_permpd
-  ;CHECK:   vpermpd $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  ;CHECK:   vpermpd $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %2 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
   ; fadd forces execution domain
   %3 = fadd <4 x double> %2, <double 0x0, double 0x0, double 0x0, double 0x0>
   ret <4 x double> %3
 }
 
-define <8 x float> @stack_fold_permps(<8 x float> %a0, <8 x float> %a1) {
+define <8 x float> @stack_fold_permps(<8 x i32> %a0, <8 x float> %a1) {
   ;CHECK-LABEL: stack_fold_permps
   ;CHECK:       vpermps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x float> %a0)
+  %2 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
   ret <8 x float> %2
 }
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x float>) nounwind readonly
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
 
 define <4 x i64> @stack_fold_permq(<4 x i64> %a0) {
   ;CHECK-LABEL: stack_fold_permq
-  ;CHECK:   vpermq $255, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+  ;CHECK:   vpermq $235, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %2 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
   ; add forces execution domain
   %3 = add <4 x i64> %2, <i64 1, i64 1, i64 1, i64 1>
   ret <4 x i64> %3
@@ -684,28 +676,25 @@ define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbw
   ;CHECK:       vpmovsxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0)
+  %2 = sext <16 x i8> %a0 to <16 x i16>
   ret <16 x i16> %2
 }
-declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
 
 define <4 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxdq
   ;CHECK:       vpmovsxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0)
+  %2 = sext <4 x i32> %a0 to <4 x i64>
   ret <4 x i64> %2
 }
-declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
 
 define <8 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwd
   ;CHECK:       vpmovsxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0)
+  %2 = sext <8 x i16> %a0 to <8 x i32>
   ret <8 x i32> %2
 }
-declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
 
 define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwq
diff --git a/test/CodeGen/X86/stack-folding-int-sse42.ll b/test/CodeGen/X86/stack-folding-int-sse42.ll
index e814ae6df501..f732607851fc 100644
--- a/test/CodeGen/X86/stack-folding-int-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-int-sse42.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -698,55 +698,55 @@ define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbd
   ;CHECK:       pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0)
-  ret <4 x i32> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i8> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbq
   ;CHECK:       pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
 
 define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxbw
   ;CHECK:       pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0)
-  ret <8 x i16> %2
+  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = sext <8 x i8> %2 to <8 x i16>
+  ret <8 x i16> %3
 }
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxdq
   ;CHECK:       pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
 
 define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwd
   ;CHECK:       pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0)
-  ret <4 x i32> %2
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = sext <4 x i16> %2 to <4 x i32>
+  ret <4 x i32> %3
 }
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
 
 define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
   ;CHECK-LABEL: stack_fold_pmovsxwq
   ;CHECK:       pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
   %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
-  %2 = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0)
-  ret <2 x i64> %2
+  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %3 = sext <2 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
 }
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
 
 define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
   ;CHECK-LABEL: stack_fold_pmovzxbd
diff --git a/test/CodeGen/X86/stack-folding-mmx.ll b/test/CodeGen/X86/stack-folding-mmx.ll
index 8a5d4e2770dc..3b1a4956726f 100644
--- a/test/CodeGen/X86/stack-folding-mmx.ll
+++ b/test/CodeGen/X86/stack-folding-mmx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+ssse3 | FileCheck %s
 
 define x86_mmx @stack_fold_cvtpd2pi(<2 x double> %a0) {
   ;CHECK-LABEL: stack_fold_cvtpd2pi
@@ -59,6 +59,33 @@ declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
 ; TODO stack_fold_movq_load
 ; TODO stack_fold_movq_store
 
+define x86_mmx @stack_fold_pabsb(x86_mmx %a0) {
+  ;CHECK-LABEL: stack_fold_pabsb
+  ;CHECK:       pabsb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pabsd(x86_mmx %a0) {
+  ;CHECK-LABEL: stack_fold_pabsd
+  ;CHECK:       pabsd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pabsw(x86_mmx %a0) {
+  ;CHECK-LABEL: stack_fold_pabsw
+  ;CHECK:       pabsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %a0) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
 define x86_mmx @stack_fold_packssdw(x86_mmx %a, x86_mmx %b) {
   ;CHECK-LABEL: stack_fold_packssdw
   ;CHECK:       packssdw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -158,6 +185,15 @@ define x86_mmx @stack_fold_paddw(x86_mmx %a, x86_mmx %b) {
 }
 declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
 
+define x86_mmx @stack_fold_palignr(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_palignr
+  ;CHECK:       palignr $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a, x86_mmx %b, i8 1) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
 define x86_mmx @stack_fold_pand(x86_mmx %a, x86_mmx %b) {
   ;CHECK-LABEL: stack_fold_pand
   ;CHECK:       pand {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -248,8 +284,71 @@ define x86_mmx @stack_fold_pcmpgtw(x86_mmx %a, x86_mmx %b) {
 }
 declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
 
+define x86_mmx @stack_fold_phaddd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phaddd
+  ;CHECK:       phaddd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phaddsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phaddsw
+  ;CHECK:       phaddsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phaddw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phaddw
+  ;CHECK:       phaddw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubd(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phsubd
+  ;CHECK:       phsubd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phsubsw
+  ;CHECK:       phsubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_phsubw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_phsubw
+  ;CHECK:       phsubw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
 ; TODO stack_fold_pinsrw
 
+define x86_mmx @stack_fold_pmaddubsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmaddubsw
+  ;CHECK:       pmaddubsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
 define x86_mmx @stack_fold_pmaddwd(x86_mmx %a, x86_mmx %b) {
   ;CHECK-LABEL: stack_fold_pmaddwd
   ;CHECK:       pmaddwd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -295,6 +394,15 @@ define x86_mmx @stack_fold_pminub(x86_mmx %a, x86_mmx %b) {
 }
 declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
 
+define x86_mmx @stack_fold_pmulhrsw(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pmulhrsw
+  ;CHECK:       pmulhrsw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
 define x86_mmx @stack_fold_pmulhuw(x86_mmx %a, x86_mmx %b) {
   ;CHECK-LABEL: stack_fold_pmulhuw
   ;CHECK:       pmulhuw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
@@ -349,7 +457,16 @@ define x86_mmx @stack_fold_psadbw(x86_mmx %a, x86_mmx %b) {
 }
 declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
 
-define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
+define x86_mmx @stack_fold_pshufb(x86_mmx %a, x86_mmx %b) {
+  ;CHECK-LABEL: stack_fold_pshufb
+  ;CHECK:       pshufb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a, x86_mmx %b) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_pshufw(x86_mmx %a) {
   ;CHECK-LABEL: stack_fold_pshufw
   ;CHECK:       pshufw $1, {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
   %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
@@ -358,6 +475,33 @@ define x86_mmx @stack_fold_pshufw(x86_mmx %a, x86_mmx %b) {
 }
 declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
 
+define x86_mmx @stack_fold_psignb(x86_mmx %a0, x86_mmx %a1) {
+  ;CHECK-LABEL: stack_fold_psignb
+  ;CHECK:       psignb {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psignd(x86_mmx %a0, x86_mmx %a1) {
+  ;CHECK-LABEL: stack_fold_psignd
+  ;CHECK:       psignd {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define x86_mmx @stack_fold_psignw(x86_mmx %a0, x86_mmx %a1) {
+  ;CHECK-LABEL: stack_fold_psignw
+  ;CHECK:       psignw {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
+  %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"()
+  %2 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1) nounwind readnone
+  ret x86_mmx %2
+}
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
 define x86_mmx @stack_fold_pslld(x86_mmx %a, x86_mmx %b) {
   ;CHECK-LABEL: stack_fold_pslld
   ;CHECK:       pslld {{-?[0-9]*}}(%rsp), {{%mm[0-7]}} {{.*#+}} 8-byte Folded Reload
diff --git a/test/CodeGen/X86/stack-folding-x86_64.ll b/test/CodeGen/X86/stack-folding-x86_64.ll
index 211227916a09..f96880d0237a 100644
--- a/test/CodeGen/X86/stack-folding-x86_64.ll
+++ b/test/CodeGen/X86/stack-folding-x86_64.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/stack-folding-xop.ll b/test/CodeGen/X86/stack-folding-xop.ll
index 44a0d1dc6582..d0c48b400804 100644
--- a/test/CodeGen/X86/stack-folding-xop.ll
+++ b/test/CodeGen/X86/stack-folding-xop.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx,+xop < %s | FileCheck %s
+; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop < %s | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/stack-probe-size.ll b/test/CodeGen/X86/stack-probe-size.ll
index 21482c3abded..4d1f88d11172 100644
--- a/test/CodeGen/X86/stack-probe-size.ll
+++ b/test/CodeGen/X86/stack-probe-size.ll
@@ -6,10 +6,9 @@
 ; stack probe size equals the page size (4096 bytes for all x86 targets), and
 ; this is unlikely to change in the future.
 ;
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s
 
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
-target triple = "i686-pc-windows-msvc"
 
 define i32 @test1() "stack-probe-size"="0" {
   %buffer = alloca [4095 x i8]
diff --git a/test/CodeGen/X86/stack-protector-dbginfo.ll b/test/CodeGen/X86/stack-protector-dbginfo.ll
index 3aba19464b9d..237b96603c00 100644
--- a/test/CodeGen/X86/stack-protector-dbginfo.ll
+++ b/test/CodeGen/X86/stack-protector-dbginfo.ll
@@ -8,7 +8,7 @@
 @a = external global { i64, [56 x i8] }, align 32
 
 ; Function Attrs: nounwind sspreq
-define i32 @_Z18read_response_sizev() #0 {
+define i32 @_Z18read_response_sizev() #0 !dbg !9 {
 entry:
   tail call void @llvm.dbg.value(metadata !22, i64 0, metadata !23, metadata !DIExpression()), !dbg !39
   %0 = load i64, i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0), align 8, !dbg !40
@@ -25,7 +25,7 @@ attributes #0 = { sspreq }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21, !72}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !5, subprograms: !8, globals: !20, imports: !5)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !5, subprograms: !8, globals: !20, imports: !5)
 !1 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
 !2 = !{!3}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !1, scope: !4, elements: !6)
@@ -34,22 +34,22 @@ attributes #0 = { sspreq }
 !6 = !{!7}
 !7 = !DIEnumerator(name: "max_frame_size", value: 0) ; [ DW_TAG_enumerator ] [max_frame_size :: 0]
 !8 = !{!9, !24, !41, !65}
-!9 = !DISubprogram(name: "read_response_size", linkageName: "_Z18read_response_sizev", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 27, file: !1, scope: !10, type: !11, function: i32 ()* @_Z18read_response_sizev, variables: !14)
+!9 = distinct !DISubprogram(name: "read_response_size", linkageName: "_Z18read_response_sizev", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 27, file: !1, scope: !10, type: !11, variables: !14)
 !10 = !DIFile(filename: "<unknown>", directory: "/Users/matt/ryan_bug")
 !11 = !DISubroutineType(types: !12)
 !12 = !{!13}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !{!15, !19}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 28, scope: !9, file: !10, type: !16)
+!15 = !DILocalVariable(name: "b", line: 28, scope: !9, file: !10, type: !16)
 !16 = !DICompositeType(tag: DW_TAG_structure_type, name: "B", line: 16, size: 32, align: 32, file: !1, elements: !17)
 !17 = !{!18}
 !18 = !DIDerivedType(tag: DW_TAG_member, name: "end_of_file", line: 17, size: 32, align: 32, file: !1, scope: !16, baseType: !13)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 29, scope: !9, file: !10, type: !13)
+!19 = !DILocalVariable(name: "c", line: 29, scope: !9, file: !10, type: !13)
 !20 = !{}
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i64* getelementptr inbounds ({ i64, [56 x i8] }, { i64, [56 x i8] }* @a, i32 0, i32 0)}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
-!24 = !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
+!23 = !DILocalVariable(name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!24 = distinct !DISubprogram(name: "min<unsigned long long>", linkageName: "_ZN3__13minIyEERKT_S3_RS1_", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !25, type: !27, templateParams: !33, variables: !35)
 !25 = !DINamespace(name: "__1", line: 1, file: !26, scope: null)
 !26 = !DIFile(filename: "main.cpp", directory: "/Users/matt/ryan_bug")
 !27 = !DISubroutineType(types: !28)
@@ -61,12 +61,12 @@ attributes #0 = { sspreq }
 !33 = !{!34}
 !34 = !DITemplateTypeParameter(name: "_Tp", type: !31)
 !35 = !{!36, !37}
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 12, arg: 1, scope: !24, file: !10, type: !29)
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
+!36 = !DILocalVariable(name: "p1", line: 12, arg: 1, scope: !24, file: !10, type: !29)
+!37 = !DILocalVariable(name: "p2", line: 12, arg: 2, scope: !24, file: !10, type: !32)
 !38 = !DILocation(line: 33, scope: !9)
 !39 = !DILocation(line: 12, scope: !24, inlinedAt: !38)
 !40 = !DILocation(line: 9, scope: !41, inlinedAt: !59)
-!41 = !DISubprogram(name: "min<unsigned long long, __1::A>", linkageName: "_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !25, type: !42, templateParams: !53, variables: !55)
+!41 = distinct !DISubprogram(name: "min<unsigned long long, __1::A>", linkageName: "_ZN3__13minIyNS_1AEEERKT_S4_RS2_T0_", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !25, type: !42, templateParams: !53, variables: !55)
 !42 = !DISubroutineType(types: !43)
 !43 = !{!29, !29, !32, !44}
 !44 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", size: 8, align: 8, file: !1, scope: !25, elements: !45)
@@ -80,17 +80,17 @@ attributes #0 = { sspreq }
 !53 = !{!34, !54}
 !54 = !DITemplateTypeParameter(name: "_Compare", type: !44)
 !55 = !{!56, !57, !58}
-!56 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 7, arg: 1, scope: !41, file: !10, type: !29)
-!57 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p2", line: 7, arg: 2, scope: !41, file: !10, type: !32)
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
+!56 = !DILocalVariable(name: "p1", line: 7, arg: 1, scope: !41, file: !10, type: !29)
+!57 = !DILocalVariable(name: "p2", line: 7, arg: 2, scope: !41, file: !10, type: !32)
+!58 = !DILocalVariable(name: "p3", line: 8, arg: 3, scope: !41, file: !10, type: !44)
 !59 = !DILocation(line: 13, scope: !24, inlinedAt: !38)
 !63 = !{i32 undef}
-!64 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
-!65 = !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
+!64 = !DILocalVariable(name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!65 = distinct !DISubprogram(name: "operator()", linkageName: "_ZN3__11AclERKiS2_", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !25, type: !47, declaration: !46, variables: !66)
 !66 = !{!67, !69, !70}
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !65, type: !68)
 !68 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !44)
-!69 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 2, arg: 3, scope: !65, file: !10, type: !50)
+!69 = !DILocalVariable(name: "p1", line: 1, arg: 2, scope: !65, file: !10, type: !50)
+!70 = !DILocalVariable(name: "", line: 2, arg: 3, scope: !65, file: !10, type: !50)
 !71 = !DILocation(line: 1, scope: !65, inlinedAt: !40)
 !72 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/CodeGen/X86/stack-protector-weight.ll b/test/CodeGen/X86/stack-protector-weight.ll
index 4220a4c46a0a..dea66d28e3dd 100644
--- a/test/CodeGen/X86/stack-protector-weight.ll
+++ b/test/CodeGen/X86/stack-protector-weight.ll
@@ -2,13 +2,13 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin -print-machineinstrs=expand-isel-pseudos -enable-selectiondag-sp=false %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=IR
 
 ; SELDAG: # Machine code for function test_branch_weights:
-; SELDAG: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; SELDAG: Successors according to CFG: BB#[[SUCCESS:[0-9]+]]({{[0-9a-fx/= ]+}}100.00%) BB#[[FAILURE:[0-9]+]]
 ; SELDAG: BB#[[FAILURE]]:
 ; SELDAG: CALL64pcrel32 <es:__stack_chk_fail>
 ; SELDAG: BB#[[SUCCESS]]:
 
 ; IR: # Machine code for function test_branch_weights:
-; IR: Successors according to CFG: BB#[[SUCCESS:[0-9]+]](1048575) BB#[[FAILURE:[0-9]+]](1)
+; IR: Successors according to CFG: BB#[[SUCCESS:[0-9]+]]({{[0-9a-fx/= ]+}}100.00%) BB#[[FAILURE:[0-9]+]]
 ; IR: BB#[[SUCCESS]]:
 ; IR: BB#[[FAILURE]]:
 ; IR: CALL64pcrel32 <ga:@__stack_chk_fail>
diff --git a/test/CodeGen/X86/stackmap-frame-setup.ll b/test/CodeGen/X86/stackmap-frame-setup.ll
new file mode 100644
index 000000000000..076e2482f8ba
--- /dev/null
+++ b/test/CodeGen/X86/stackmap-frame-setup.ll
@@ -0,0 +1,20 @@
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=x86_64-apple-darwin -mcpu=corei7 -stop-after machine-sink %s | FileCheck %s --check-prefix=ISEL
+; RUN: llc -o /dev/null -verify-machineinstrs -mtriple=x86_64-apple-darwin -mcpu=corei7 -fast-isel -fast-isel-abort=1 -stop-after machine-sink %s | FileCheck %s --check-prefix=FAST-ISEL
+
+define void @caller_meta_leaf() {
+entry:
+  %metadata = alloca i64, i32 3, align 8
+  store i64 11, i64* %metadata
+  store i64 12, i64* %metadata
+  store i64 13, i64* %metadata
+; ISEL:      ADJCALLSTACKDOWN64 0, 0, implicit-def
+; ISEL-NEXT: STACKMAP
+; ISEL-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def
+  call void (i64, i32, ...) @llvm.experimental.stackmap(i64 4, i32 0, i64* %metadata)
+; FAST-ISEL:      ADJCALLSTACKDOWN64 0, 0, implicit-def
+; FAST-ISEL-NEXT: STACKMAP
+; FAST-ISEL-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def
+  ret void
+}
+
+declare void @llvm.experimental.stackmap(i64, i32, ...)
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
index 4af33e1f5478..fa2621e7d2fe 100644
--- a/test/CodeGen/X86/statepoint-allocas.ll
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -16,12 +16,12 @@ define i32 addrspace(1)* @test(i32 addrspace(1)* %ptr) gc "statepoint-example" {
 ; CHECK: movq   %rdi, (%rsp)
 ; CHECK: callq return_i1
 ; CHECK: movq   (%rsp), %rax
-; CHECK: popq   %rdx
+; CHECK: popq   %rcx
 ; CHECK: retq
 entry:
   %alloca = alloca i32 addrspace(1)*, align 8
   store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
-  call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)** %alloca)
   %rel = load i32 addrspace(1)*, i32 addrspace(1)** %alloca
   ret i32 addrspace(1)* %rel
 }
@@ -33,16 +33,16 @@ define i32 addrspace(1)* @test2(i32 addrspace(1)* %ptr) gc "statepoint-example"
 ; CHECK: movq   %rdi, (%rsp)
 ; CHECK: callq return_i1
 ; CHECK: xorl   %eax, %eax
-; CHECK: popq   %rdx
+; CHECK: popq   %rcx
 ; CHECK: retq
 entry:
   %alloca = alloca i32 addrspace(1)*, align 8
   store i32 addrspace(1)* %ptr, i32 addrspace(1)** %alloca
-  call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
+  call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 1, i32 addrspace(1)** %alloca)
   ret i32 addrspace(1)* null
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
 
 
 ; CHECK-LABEL: .section .llvm_stackmaps
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
index 8f352b7728c3..a8fa3cb37782 100644
--- a/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -5,10 +5,13 @@
 target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"
 
+%struct = type { i64, i64 }
+
 declare zeroext i1 @return_i1()
 declare zeroext i32 @return_i32()
 declare i32* @return_i32ptr()
 declare float @return_float()
+declare %struct @return_struct()
 declare void @varargf(i32, ...)
 
 define i1 @test_i1_return() gc "statepoint-example" {
@@ -17,11 +20,11 @@ define i1 @test_i1_return() gc "statepoint-example" {
 ; state arguments to the statepoint
 ; CHECK: pushq %rax
 ; CHECK: callq return_i1
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call1
 }
 
@@ -29,11 +32,11 @@ define i32 @test_i32_return() gc "statepoint-example" {
 ; CHECK-LABEL: test_i32_return
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
   ret i32 %call1
 }
 
@@ -41,11 +44,11 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
 ; CHECK-LABEL: test_i32ptr_return
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32ptr
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token)
   ret i32* %call1
 }
 
@@ -56,23 +59,35 @@ define float @test_float_return() gc "statepoint-example" {
 ; CHECK: popq %rax
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token)
   ret float %call1
 }
 
+define %struct @test_struct_return() gc "statepoint-example" {
+; CHECK-LABEL: test_struct_return
+; CHECK: pushq %rax
+; CHECK: callq return_struct
+; CHECK: popq %rcx
+; CHECK: retq
+entry:
+  %safepoint_token = tail call token (i64, i32, %struct ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_structf(i64 0, i32 0, %struct ()* @return_struct, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token)
+  ret %struct %call1
+}
+
 define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
 ; CHECK-LABEL: test_relocate
 ; Check that an ununsed relocate has no code-generation impact
 ; CHECK: pushq %rax
 ; CHECK: callq return_i1
-; CHECK-NEXT: .Ltmp9:
-; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: .Ltmp11:
+; CHECK-NEXT: popq %rcx
 ; CHECK-NEXT: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
-  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token,  i32 7, i32 7)
-  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call2
 }
 
@@ -81,7 +96,7 @@ define void @test_void_vararg() gc "statepoint-example" {
 ; Check a statepoint wrapping a *void* returning vararg function works
 ; CHECK: callq varargf
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
+  %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0)
   ;; if we try to use the result from a statepoint wrapping a
   ;; non-void-returning varargf, we will experience a crash.
   ret void
@@ -92,26 +107,54 @@ define i1 @test_i1_return_patchable() gc "statepoint-example" {
 ; A patchable variant of test_i1_return
 ; CHECK: pushq %rax
 ; CHECK: nopl
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()*null, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call1
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
+declare void @consume(i32 addrspace(1)* %obj)
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.result.i32(i32)
+define i1 @test_cross_bb(i32 addrspace(1)* %a, i1 %external_cond) gc "statepoint-example" {
+; CHECK-LABEL: test_cross_bb
+; CHECK: movq
+; CHECK: callq return_i1
+; CHECK: %left
+; CHECK: movq
+; CHECK-NEXT: callq consume
+; CHECK: retq
+entry:
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %a)
+  br i1 %external_cond, label %left, label %right
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.result.p0i32(i32)
+left:
+  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
+  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+  call void @consume(i32 addrspace(1)* %call1)
+  ret i1 %call2
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
-declare float @llvm.experimental.gc.result.f32(i32)
+right:
+  ret i1 true
+}
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
 
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_structf(i64, i32, %struct ()*, i32, i32, ...)
+declare %struct @llvm.experimental.gc.result.struct(token)
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
+
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
diff --git a/test/CodeGen/X86/statepoint-far-call.ll b/test/CodeGen/X86/statepoint-far-call.ll
index cd8dd0f35a20..2ebf38c5c019 100644
--- a/test/CodeGen/X86/statepoint-far-call.ll
+++ b/test/CodeGen/X86/statepoint-far-call.ll
@@ -14,9 +14,9 @@ define void @test_far_call() gc "statepoint-example" {
 ; CHECK: retq
 
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* inttoptr (i64 140727162896504 to void ()*), i32 0, i32 0, i32 0, i32 0)  
   ret void
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) 
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
index 698229e705f4..d97bc0c75602 100644
--- a/test/CodeGen/X86/statepoint-forward.ll
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -25,8 +25,8 @@ entry:
   %before = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
-  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token,  i32 7, i32 7)
   %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
@@ -44,8 +44,8 @@ entry:
   %cmp1 = call i1 @f(i32 addrspace(1)* %v)
   call void @llvm.assume(i1 %cmp1)
   store i32 addrspace(1)* %v, i32 addrspace(1)* addrspace(1)* %p
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
-  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token %safepoint_token,  i32 7, i32 7)
   %after = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(1)* %pnew
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
@@ -72,7 +72,7 @@ entry:
   %before = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp1 = call i1 @f(i32 addrspace(1)* %before)
   call void @llvm.assume(i1 %cmp1)
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
   %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
@@ -90,7 +90,7 @@ entry:
   %cmp1 = call i1 @f(i32 addrspace(1)* %v)
   call void @llvm.assume(i1 %cmp1)
   store i32 addrspace(1)* %v, i32 addrspace(1)** %p
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0)
   %after = load i32 addrspace(1)*, i32 addrspace(1)** %p
   %cmp2 = call i1 @f(i32 addrspace(1)* %after)
   ret i1 %cmp2
@@ -102,5 +102,5 @@ entry:
 }
 
 declare void @llvm.assume(i1)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* addrspace(1)* @llvm.experimental.gc.relocate.p1p1i32(token, i32, i32) #3
diff --git a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
index 61b8ded2c472..b4ba0964fdd6 100644
--- a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
@@ -18,11 +18,11 @@ define i1 @test_i1_return() gc "statepoint-example" {
 ; state arguments to the statepoint
 ; CHECK: pushq %rax
 ; CHECK: callq return_i1
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call1
 }
 
@@ -30,11 +30,11 @@ define i32 @test_i32_return() gc "statepoint-example" {
 ; CHECK-LABEL: test_i32_return
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
-  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 0, i32 0)
+  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
   ret i32 %call1
 }
 
@@ -42,11 +42,11 @@ define i32* @test_i32ptr_return() gc "statepoint-example" {
 ; CHECK-LABEL: test_i32ptr_return
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32ptr
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
-  %call1 = call i32* @llvm.experimental.gc.result.p0i32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i32* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p0i32f(i64 0, i32 0, i32* ()* @return_i32ptr, i32 0, i32 1, i32 0, i32 0)
+  %call1 = call i32* @llvm.experimental.gc.result.p0i32(token %safepoint_token)
   ret i32* %call1
 }
 
@@ -57,8 +57,8 @@ define float @test_float_return() gc "statepoint-example" {
 ; CHECK: popq %rax
 ; CHECK: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 1, i32 0, i32 0)
-  %call1 = call float @llvm.experimental.gc.result.f32(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, float ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_f32f(i64 0, i32 0, float ()* @return_float, i32 0, i32 1, i32 0, i32 0)
+  %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token)
   ret float %call1
 }
 
@@ -68,12 +68,12 @@ define i1 @test_relocate(i32 addrspace(1)* %a) gc "statepoint-example" {
 ; CHECK: pushq %rax
 ; CHECK: callq return_i1
 ; CHECK-NEXT: .Ltmp9:
-; CHECK-NEXT: popq %rdx
+; CHECK-NEXT: popq %rcx
 ; CHECK-NEXT: retq
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
-  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 7, i32 7)
-  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 1, i32 0, i32 0, i32 addrspace(1)* %a)
+  %call1 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 7, i32 7)
+  %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call2
 }
 
@@ -82,7 +82,7 @@ define void @test_void_vararg() gc "statepoint-example" {
 ; Check a statepoint wrapping a *void* returning vararg function works
 ; CHECK: callq varargf
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 1, i32 42, i32 43, i32 0, i32 0)
+  %safepoint_token = tail call token (i64, i32, void (i32, ...)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64 0, i32 0, void (i32, ...)* @varargf, i32 2, i32 1, i32 42, i32 43, i32 0, i32 0)
   ;; if we try to use the result from a statepoint wrapping a
   ;; non-void-returning varargf, we will experience a crash.
   ret void
@@ -92,12 +92,12 @@ define i32 @test_transition_args() gc "statepoint-example" {
 ; CHECK-LABEL: test_transition_args
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
   %val = alloca i32
-  %safepoint_token = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 2, i32* %val, i64 42, i32 0)
-  %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+  %safepoint_token = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 0, i32 0, i32 ()* @return_i32, i32 0, i32 1, i32 2, i32* %val, i64 42, i32 0)
+  %call1 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
   ret i32 %call1
 }
 
@@ -105,29 +105,29 @@ define i32 @test_transition_args_2() gc "statepoint-example" {
 ; CHECK-LABEL: test_transition_args_2
 ; CHECK: pushq %rax
 ; CHECK: callq return_i32
-; CHECK: popq %rdx
+; CHECK: popq %rcx
 ; CHECK: retq
 entry:
   %val = alloca i32
   %arg = alloca i8
-  %safepoint_token = call i32 (i64, i32, i32 (i32, i8*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64 0, i32 0, i32 (i32, i8*)* @return_i32_with_args, i32 2, i32 1, i32 0, i8* %arg, i32 2, i32* %val, i64 42, i32 0)
-  %call1 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token)
+  %safepoint_token = call token (i64, i32, i32 (i32, i8*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64 0, i32 0, i32 (i32, i8*)* @return_i32_with_args, i32 2, i32 1, i32 0, i8* %arg, i32 2, i32* %val, i64 42, i32 0)
+  %call1 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
   ret i32 %call1
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64, i32, i32 (i32, i8*)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.result.i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i32f(i64, i32, i32 ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32i32p0i8f(i64, i32, i32 (i32, i8*)*, i32, i32, ...)
+declare i32 @llvm.experimental.gc.result.i32(token)
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.result.p0i32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_p0i32f(i64, i32, i32* ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.result.p0i32(token)
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
-declare float @llvm.experimental.gc.result.f32(i32)
+declare token @llvm.experimental.gc.statepoint.p0f_f32f(i64, i32, float ()*, i32, i32, ...)
+declare float @llvm.experimental.gc.result.f32(token)
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi32varargf(i64, i32, void (i32, ...)*, i32, i32, ...)
 
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
\ No newline at end of file
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
\ No newline at end of file
diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll
index 81b9ab89ebca..1d38b2facc73 100644
--- a/test/CodeGen/X86/statepoint-invoke.ll
+++ b/test/CodeGen/X86/statepoint-invoke.ll
@@ -14,13 +14,13 @@ entry:
   ; CHECK: Ltmp{{[0-9]+}}:
   ; CHECK: callq some_call
   ; CHECK: Ltmp{{[0-9]+}}:
-  %0 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+  %0 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
           to label %invoke_safepoint_normal_dest unwind label %exceptional_return
 
 invoke_safepoint_normal_dest:
   ; CHECK: movq
-  %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 13, i32 13)
-  %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %0, i32 14, i32 14)
+  %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 13, i32 13)
+  %obj1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %0, i32 14, i32 14)
   br label %normal_return
 
 normal_return:
@@ -31,11 +31,10 @@ exceptional_return:
   ; CHECK: Ltmp{{[0-9]+}}:
   ; CHECK: movq
   ; CHECK: retq
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
-  %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+  %obj.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
+  %obj1.relocated1 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
   ret i64 addrspace(1)* %obj1.relocated1
 }
 ; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
@@ -51,22 +50,21 @@ entry:
   ; CHECK: .Ltmp{{[0-9]+}}:
   ; CHECK: callq some_other_call
   ; CHECK: .Ltmp{{[0-9]+}}:
-  %0 = invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+  %0 = invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 0, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @some_other_call, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
           to label %normal_return unwind label %exceptional_return
 
 normal_return:
   ; CHECK: popq 
   ; CHECK: retq
-  %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %0)
+  %ret_val = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %0)
   ret i64 addrspace(1)* %ret_val
 
 exceptional_return:
   ; CHECK: .Ltmp{{[0-9]+}}:
   ; CHECK: movq
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+  %obj.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
   ret i64 addrspace(1)* %obj.relocated
 }
 ; CHECK-LABEL: GCC_except_table{{[0-9]+}}:
@@ -85,14 +83,14 @@ left:
   ; CHECK: movq %rdx, 8(%rsp)
   ; CHECK: movq
   ; CHECK: callq some_call
-  %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)
+  %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val1, i64 addrspace(1)* %val2)
            to label %left.relocs unwind label %exceptional_return.left
 
 left.relocs:
   ; CHECK: movq (%rsp),
   ; CHECK: movq 8(%rsp), [[REGVAL2:%[a-z]+]]
-  %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
-  %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+  %val1.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
+  %val2.relocated_left = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
   br label %normal_return
 
 right:
@@ -100,37 +98,35 @@ right:
   ; CHECK: movq
   ; CHECK: movq %rdx, (%rsp)
   ; CHECK: callq some_call
-  %sp2 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
+  %sp2 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* %val2, i64 addrspace(1)* %val3)
            to label %right.relocs unwind label %exceptional_return.right
 
 right.relocs:
   ; CHECK: movq (%rsp), [[REGVAL2]]
   ; CHECK: movq
-  %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 13, i32 13)
-  %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp2, i32 14, i32 14)
+  %val2.relocated_right = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 13, i32 13)
+  %val3.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp2, i32 14, i32 14)
   br label %normal_return
 
 normal_return:
   ; CHECK-LABEL: %normal_return
   ; CHECK: cmoveq {{.*}}[[REGVAL2]]{{.*}}
-  ; CHECK retq
+  ; CHECK: retq
   %a1 = phi i64 addrspace(1)* [%val1.relocated, %left.relocs], [%val3.relocated, %right.relocs]
   %a2 = phi i64 addrspace(1)* [%val2.relocated_left, %left.relocs], [%val2.relocated_right, %right.relocs]
   %ret = select i1 %cond, i64 addrspace(1)* %a1, i64 addrspace(1)* %a2
   ret i64 addrspace(1)* %ret
 
 exceptional_return.left:
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
+  %val.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
   ret i64 addrspace(1)* %val.relocated2
 
 exceptional_return.right:
-  %landing_pad1 = landingpad { i8*, i32 }
+  %landing_pad1 = landingpad token
           cleanup
-  %relocate_token1 = extractvalue { i8*, i32 } %landing_pad1, 1
-  %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token1, i32 13, i32 13)
+  %val.relocated3 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad1, i32 13, i32 13)
   ret i64 addrspace(1)* %val.relocated3
 }
 
@@ -139,7 +135,7 @@ define i64 addrspace(1)* @test_null_undef(i64 addrspace(1)* %val1)
 ; CHECK-LABEL: test_null_undef:
 entry:
   ; CHECK: callq some_call
-  %sp1 = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef)
+  %sp1 = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i64 addrspace(1)* null, i64 addrspace(1)* undef)
            to label %normal_return unwind label %exceptional_return
 
 normal_return:
@@ -147,16 +143,15 @@ normal_return:
   ; CHECK: xorl %eax, %eax
   ; CHECK-NEXT: popq
   ; CHECK-NEXT: retq
-  %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 13, i32 13)
-  %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %sp1, i32 14, i32 14)
+  %null.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 13, i32 13)
+  %undef.relocated = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %sp1, i32 14, i32 14)
   ret i64 addrspace(1)* %null.relocated
 
 exceptional_return:
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 13, i32 13)
-  %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+  %null.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 13, i32 13)
+  %undef.relocated2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
   ret i64 addrspace(1)* %null.relocated2
 }
 
@@ -168,14 +163,14 @@ entry:
   %aa = addrspacecast i32* %a to i32 addrspace(1)*
   %c = inttoptr i64 15 to i64 addrspace(1)*
   ; CHECK: callq
-  %sp = invoke i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c)
+  %sp = invoke token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @some_call, i32 1, i32 0, i64 addrspace(1)* %val1, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %aa, i64 addrspace(1)* %c)
            to label %normal_return unwind label %exceptional_return
 
 normal_return:
   ; CHECK: leaq
   ; CHECK-NEXT: popq
   ; CHECK-NEXT: retq
-  %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %sp, i32 13, i32 13)
+  %aa.rel = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %sp, i32 13, i32 13)
   %aa.converted = bitcast i32 addrspace(1)* %aa.rel to i64 addrspace(1)*
   ret i64 addrspace(1)* %aa.converted
 
@@ -183,16 +178,15 @@ exceptional_return:
   ; CHECK: movl	$15
   ; CHECK-NEXT: popq
   ; CHECK-NEXT: retq
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %relocate_token, i32 14, i32 14)
+  %aa.rel2 = call coldcc i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %landing_pad, i32 14, i32 14)
   ret i64 addrspace(1)* %aa.rel2
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...)
 
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token)
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
index a4aa747af8cf..d4784212810f 100644
--- a/test/CodeGen/X86/statepoint-stack-usage.ll
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -16,17 +16,17 @@ define i32 @back_to_back_calls(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 a
 ; CHECK: movq	%rsi, (%rsp)
 ; There should be no more than three moves
 ; CHECK-NOT: movq
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
-  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
-  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
-  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
 ; CHECK: callq
 ; This is the key check.  There should NOT be any memory moves here
 ; CHECK-NOT: movq
-  %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
-  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
-  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
-  %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
+  %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+  %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12)
 ; CHECK: callq
   ret i32 1
 }
@@ -39,17 +39,17 @@ define i32 @reserve_first(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrsp
 ; CHECK: movq	%rdi, 16(%rsp)
 ; CHECK: movq	%rdx, 8(%rsp)
 ; CHECK: movq	%rsi, (%rsp)
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
-  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
-  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
-  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
 ; CHECK: callq
 ; This is the key check.  There should NOT be any memory moves here
 ; CHECK-NOT: movq
-  %safepoint_token2 = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
-  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
-  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
-  %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 12)
+  %safepoint_token2 = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 addrspace(1)* %a1, i32 0, i32 addrspace(1)* %c1, i32 0, i32 0, i32 addrspace(1)* %c1, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+  %c2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 12)
 ; CHECK: callq
   ret i32 1
 }
@@ -63,25 +63,25 @@ entry:
   ; CHECK: movq	%rdx, 8(%rsp)
   ; CHECK: movq	%rsi, (%rsp)
   ; CHECK: callq
-  %safepoint_token = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
+  %safepoint_token = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 addrspace(1)* %c)
                    to label %normal_return unwind label %exceptional_return
 
 normal_return:
-  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 12)
-  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 13)
-  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 12, i32 14)
+  %a1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 12)
+  %b1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 13)
+  %c1 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 12, i32 14)
   ; Should work even through bitcasts
   %c1.casted = bitcast i32 addrspace(1)* %c1 to i8 addrspace(1)*
   ; This is the key check.  There should NOT be any memory moves here
   ; CHECK-NOT: movq
   ; CHECK: callq
-  %safepoint_token2 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
+  %safepoint_token2 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %c1.casted, i32 addrspace(1)* %b1, i32 addrspace(1)* %a1)
                     to label %normal_return2 unwind label %exceptional_return2
 
 normal_return2:
-  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 14)
-  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token2, i32 12, i32 13)
-  %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token2, i32 12, i32 12)
+  %a2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 14)
+  %b2 = tail call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token2, i32 12, i32 13)
+  %c2 = tail call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token2, i32 12, i32 12)
   ret i32 1
 
 exceptional_return:
@@ -96,10 +96,10 @@ exceptional_return2:
 }
 
 ; Function Attrs: nounwind
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #3
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #3
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 declare i32 @"personality_function"()
 
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index e18476cee53c..4f8b2ce6efd9 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
-; RUN: llc < %s -mtriple="x86_64-pc-win64-coff" | FileCheck %s
+; RUN: llc < %s -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
 
 ; This test is a sanity check to ensure statepoints are generating StackMap
 ; sections correctly.  This is not intended to be a rigorous test of the 
@@ -11,7 +11,7 @@ declare zeroext i1 @return_i1()
 
 define i1 @test(i32 addrspace(1)* %ptr_base, i32 %arg)
   gc "statepoint-example" {
-; CHECK-LABEL: test
+; CHECK-LABEL: test:
 ; Do we see two spills for the local values and the store to the
 ; alloca?
 ; CHECK: subq	$40, %rsp
@@ -25,11 +25,11 @@ entry:
   %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
   store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
   %ptr_derived = getelementptr i32, i32 addrspace(1)* %ptr_base, i32 %arg
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
-  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
-  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
-  %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9)
+  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10)
+  %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11)
 ; 
   ret i1 %call1
 }
@@ -53,11 +53,11 @@ define i1 @test_derived_arg(i32 addrspace(1)* %ptr_base,
 entry:
   %metadata1 = alloca i32 addrspace(1)*, i32 2, align 8
   store i32 addrspace(1)* null, i32 addrspace(1)** %metadata1
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
-  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 9)
-  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 9, i32 10)
-  %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token, i32 11, i32 11)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 2, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* null, i32 addrspace(1)* %ptr_base, i32 addrspace(1)* %ptr_derived, i32 addrspace(1)* null)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
+  %a = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 9)
+  %b = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 9, i32 10)
+  %c = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token, i32 11, i32 11)
 ; 
   ret i1 %call1
 }
@@ -66,15 +66,15 @@ entry:
 define i1 @test_id() gc "statepoint-example" {
 ; CHECK-LABEL: test_id
 entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
+  %safepoint_token = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 237, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token)
   ret i1 %call1
 }
 
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i1 @llvm.experimental.gc.result.i1(token)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
 
 ; CHECK-LABEL: .section .llvm_stackmaps
 ; CHECK-NEXT:  __LLVM_StackMaps:
@@ -94,18 +94,19 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK-NEXT:   .quad 40
 ; CHECK-NEXT:   .quad test_derived_arg
 ; CHECK-NEXT:   .quad 40
+; CHECK-NEXT:   .quad test_id
+; CHECK-NEXT:   .quad 8
 
 ;
 ; test
 ;
 
-; Large Constants
-; Statepoint ID only
-; CHECK: .quad	0
+; Statepoint ID
+; CHECK-NEXT: .quad	0
 
 ; Callsites
 ; Constant arguments
-; CHECK: .long	.Ltmp1-test
+; CHECK-NEXT: .long	.Ltmp1-test
 ; CHECK: .short	0
 ; CHECK: .short	11
 ; SmallConstant (0)
@@ -123,8 +124,8 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK: .byte	8
 ; CHECK: .short	0
 ; CHECK: .long	2
-; Direct Spill Slot [RSP+0]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+0]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
@@ -143,23 +144,23 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK: .byte	8
 ; CHECK: .short	0
 ; CHECK: .long	0
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
-; Direct Spill Slot [RSP+8]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+8]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	8
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
@@ -171,15 +172,13 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 
 ;
 ; test_derived_arg
-;
 
-; Large Constants
-; Statepoint ID only
-; CHECK: .quad	0
+; Statepoint ID
+; CHECK-NEXT: .quad	0
 
 ; Callsites
 ; Constant arguments
-; CHECK: .long	.Ltmp3-test_derived_arg
+; CHECK-NEXT: .long	.Ltmp3-test_derived_arg
 ; CHECK: .short	0
 ; CHECK: .short	11
 ; SmallConstant (0)
@@ -192,8 +191,8 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK: .byte	8
 ; CHECK: .short	0
 ; CHECK: .long	2
-; Direct Spill Slot [RSP+0]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+0]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
@@ -212,23 +211,23 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK: .byte	8
 ; CHECK: .short	0
 ; CHECK: .long	0
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
-; Direct Spill Slot [RSP+8]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+8]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	8
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
-; Direct Spill Slot [RSP+16]
-; CHECK: .byte	2
+; Indirect Spill Slot [RSP+16]
+; CHECK: .byte	3
 ; CHECK: .byte	8
 ; CHECK: .short	7
 ; CHECK: .long	16
@@ -239,13 +238,12 @@ declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
 ; CHECK: .align	8
 
 ; Records for the test_id function:
-; No large constants
 
 ; The Statepoint ID:
-; CHECK: .quad	237
+; CHECK-NEXT: .quad	237
 
 ; Instruction Offset
-; CHECK: .long	.Ltmp5-test_id
+; CHECK-NEXT: .long	.Ltmp5-test_id
 
 ; Reserved:
 ; CHECK: .short	0
diff --git a/test/CodeGen/X86/stdarg.ll b/test/CodeGen/X86/stdarg.ll
index 18d502ad5834..42cbcb1008d3 100644
--- a/test/CodeGen/X86/stdarg.ll
+++ b/test/CodeGen/X86/stdarg.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; CHECK: testb %al, %al
 
 %struct.__va_list_tag = type { i32, i32, i8*, i8* }
 
@@ -8,6 +7,15 @@ entry:
   %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
   %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
   call void @llvm.va_start(i8* %ap12)
+; CHECK: testb %al, %al
+
+; These test for specific offsets, which is very fragile. Still, the test needs
+; to ensure that va_list has the correct element types.
+;
+; CHECK-DAG: movq {{.*}}, 192(%rsp)
+; CHECK-DAG: movq {{.*}}, 184(%rsp)
+; CHECK-DAG: movl {{.*}}, 180(%rsp)
+; CHECK-DAG: movl {{.*}}, 176(%rsp)
   %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
   call void @bar(%struct.__va_list_tag* %ap3) nounwind
   call void @llvm.va_end(i8* %ap12)
diff --git a/test/CodeGen/X86/stores-merging.ll b/test/CodeGen/X86/stores-merging.ll
index d6daa573b4ae..9e479bd71b98 100644
--- a/test/CodeGen/X86/stores-merging.ll
+++ b/test/CodeGen/X86/stores-merging.ll
@@ -7,17 +7,51 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @e = common global %structTy zeroinitializer, align 4
 
-; CHECK-LABEL: f
-define void @f() {
-entry:
+;; Ensure that MergeConsecutiveStores doesn't incorrectly reorder
+;; store operations.  The first test stores in increasing address
+;; order, the second in decreasing -- but in both cases should have
+;; the same result in memory in the end.
 
-; CHECK:   movabsq	$528280977409, %rax
+; CHECK-LABEL: redundant_stores_merging:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
 ; CHECK:   movq    %rax, e+4(%rip)
-; CHECK:   movl    $456, e+8(%rip)
-
+define void @redundant_stores_merging() {
+entry:
   store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
   store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
   store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
   ret void
 }
 
+;; This variant tests PR25154.
+; CHECK-LABEL: redundant_stores_merging_reverse:
+; CHECK:   movl    $123, e+8(%rip)
+; CHECK:   movabsq $1958505086977, %rax
+; CHECK:   movq    %rax, e+4(%rip)
+define void @redundant_stores_merging_reverse() {
+entry:
+  store i32 123, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 456, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 2), align 4
+  store i32 1, i32* getelementptr inbounds (%structTy, %structTy* @e, i64 0, i32 1), align 4
+  ret void
+}
+
+@b = common global [8 x i8] zeroinitializer, align 2
+
+;; The 2-byte store to offset 3 overlaps the 2-byte store to offset 2;
+;; these must not be reordered in MergeConsecutiveStores such that the
+;; store to 3 comes first (e.g. by merging the stores to 0 and 2 into
+;; a movl, after the store to 3).
+
+;; CHECK-LABEL: overlapping_stores_merging:
+;; CHECK:  movw    $0, b+2(%rip)
+;; CHECK:  movw    $2, b+3(%rip)
+;; CHECK:  movw    $1, b(%rip)
+define void @overlapping_stores_merging() {
+entry:
+  store i16 0, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 2) to i16*), align 2
+  store i16 2, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 3) to i16*), align 1
+  store i16 1, i16* bitcast (i8* getelementptr inbounds ([8 x i8], [8 x i8]* @b, i64 0, i64 0) to i16*), align 2
+  ret void
+}
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 2cf3aafe5471..6a2cbe1ec6ca 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -5,7 +5,7 @@
 
 ;      CHECK: movabsq $2305843009482129440, %r
 ; CHECK-NEXT: btq %rax, %r
-; CHECK-NEXT: jae
+; CHECK-NEXT: jb
 ;     CHECK: movl  $671088640, %e
 ; CHECK-NEXT: btq %rax, %r
 ; CHECK-NEXT: jae
@@ -145,13 +145,13 @@ sw.epilog:
 ; CHECK: cmpl $10
 ; CHECK: je
 ; CHECK: cmpl $20
+; CHECK: je
+; CHECK: cmpl $30
 ; CHECK: jne
 ; CHECK: cmpl $40
 ; CHECK: je
 ; CHECK: cmpl $50
-; CHECK: jne
-; CHECK: cmpl $30
-; CHECK: jne
+; CHECK: je
 ; CHECK: cmpl $60
 ; CHECK: jne
 }
diff --git a/test/CodeGen/X86/switch-edge-weight.ll b/test/CodeGen/X86/switch-edge-weight.ll
new file mode 100644
index 000000000000..b8cb7b1280ad
--- /dev/null
+++ b/test/CodeGen/X86/switch-edge-weight.ll
@@ -0,0 +1,281 @@
+; RUN: llc -march=x86-64 -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s
+
+declare void @foo(i32)
+
+; CHECK-LABEL: test
+
+define void @test(i32 %x) nounwind {
+entry:
+  switch i32 %x, label %sw.default [
+    i32 1, label %sw.bb
+    i32 155, label %sw.bb
+    i32 156, label %sw.bb
+    i32 157, label %sw.bb
+    i32 158, label %sw.bb
+    i32 159, label %sw.bb
+    i32 1134, label %sw.bb
+    i32 1140, label %sw.bb
+  ], !prof !1
+
+sw.bb:
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.default:
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#4: [0, 1133] (65 = 60 + 5)
+; BB#0 to BB#5: [1134, UINT32_MAX] (25 = 20 + 5)
+; CHECK: Successors according to CFG: BB#4({{[0-9a-fx/= ]+}}72.22%) BB#5({{[0-9a-fx/= ]+}}27.78%)
+;
+; CHECK: BB#4:
+; BB#4 to BB#1: [155, 159] (50)
+; BB#4 to BB#5: [0, 1133] - [155, 159] (15 = 10 + 5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}76.92%) BB#7({{[0-9a-fx/= ]+}}23.08%)
+;
+; CHECK: BB#5:
+; BB#5 to BB#1: {1140} (10)
+; BB#5 to BB#6: [1134, UINT32_MAX] - {1140} (15 = 10 + 5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}40.00%) BB#6({{[0-9a-fx/= ]+}}60.00%)
+;
+; CHECK: BB#6:
+; BB#6 to BB#1: {1134} (10)
+; BB#6 to BB#2: [1134, UINT32_MAX] - {1134, 1140} (5)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}66.67%) BB#2({{[0-9a-fx/= ]+}}33.33%)
+}
+
+; CHECK-LABEL: test2
+
+define void @test2(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is an edge from jump table to default
+; statement.
+
+  switch i32 %x, label %sw.default [
+    i32 1, label %sw.bb
+    i32 10, label %sw.bb2
+    i32 11, label %sw.bb3
+    i32 12, label %sw.bb4
+    i32 13, label %sw.bb5
+    i32 14, label %sw.bb5
+  ], !prof !3
+
+sw.bb:
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:
+  call void @foo(i32 2)
+  br label %sw.epilog
+
+sw.bb3:
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.bb4:
+  call void @foo(i32 4)
+  br label %sw.epilog
+
+sw.bb5:
+  call void @foo(i32 5)
+  br label %sw.epilog
+
+sw.default:
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: {0} + [15, UINT32_MAX] (5)
+; BB#0 to BB#8: [1, 14] (jump table) (65 = 60 + 5)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}7.14%) BB#8({{[0-9a-fx/= ]+}}92.86%
+;
+; CHECK: BB#8:
+; BB#8 to BB#1: {1} (10)
+; BB#8 to BB#6: [2, 9] (5)
+; BB#8 to BB#2: {10} (10)
+; BB#8 to BB#3: {11} (10)
+; BB#8 to BB#4: {12} (10)
+; BB#8 to BB#5: {13, 14} (20)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}15.38%) BB#6({{[0-9a-fx/= ]+}}7.69%) BB#2({{[0-9a-fx/= ]+}}15.38%) BB#3({{[0-9a-fx/= ]+}}15.38%) BB#4({{[0-9a-fx/= ]+}}15.38%) BB#5({{[0-9a-fx/= ]+}}30.77%)
+}
+
+; CHECK-LABEL: test3
+
+define void @test3(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is no edge from jump table to default
+; statement.
+
+  switch i32 %x, label %sw.default [
+    i32 10, label %sw.bb
+    i32 11, label %sw.bb2
+    i32 12, label %sw.bb3
+    i32 13, label %sw.bb4
+    i32 14, label %sw.bb5
+  ], !prof !2
+
+sw.bb:
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:
+  call void @foo(i32 2)
+  br label %sw.epilog
+
+sw.bb3:
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.bb4:
+  call void @foo(i32 4)
+  br label %sw.epilog
+
+sw.bb5:
+  call void @foo(i32 5)
+  br label %sw.epilog
+
+sw.default:
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [0, 9] + [15, UINT32_MAX] {10}
+; BB#0 to BB#8: [10, 14] (jump table) (50)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}16.67%) BB#8({{[0-9a-fx/= ]+}}83.33%)
+;
+; CHECK: BB#8:
+; BB#8 to BB#1: {10} (10)
+; BB#8 to BB#2: {11} (10)
+; BB#8 to BB#3: {12} (10)
+; BB#8 to BB#4: {13} (10)
+; BB#8 to BB#5: {14} (10)
+; CHECK: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}20.00%) BB#2({{[0-9a-fx/= ]+}}20.00%) BB#3({{[0-9a-fx/= ]+}}20.00%) BB#4({{[0-9a-fx/= ]+}}20.00%) BB#5({{[0-9a-fx/= ]+}}20.00%)
+}
+
+; CHECK-LABEL: test4
+
+define void @test4(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is no edge from bit test to default basic
+; block.
+
+  switch i32 %x, label %sw.default [
+    i32 1, label %sw.bb
+    i32 111, label %sw.bb2
+    i32 112, label %sw.bb3
+    i32 113, label %sw.bb3
+    i32 114, label %sw.bb2
+    i32 115, label %sw.bb2
+  ], !prof !3
+
+sw.bb:
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:
+  call void @foo(i32 2)
+  br label %sw.epilog
+
+sw.bb3:
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.default:
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [0, 110] + [116, UINT32_MAX] (20)
+; BB#0 to BB#7: [111, 115] (bit test) (50)
+; CHECK: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}28.57%) BB#7({{[0-9a-fx/= ]+}}71.43%)
+;
+; CHECK: BB#7:
+; BB#7 to BB#2: {111, 114, 115} (30)
+; BB#7 to BB#3: {112, 113} (20)
+; CHECK: Successors according to CFG: BB#2({{[0-9a-fx/= ]+}}60.00%) BB#3({{[0-9a-fx/= ]+}}40.00%)
+}
+
+; CHECK-LABEL: test5
+
+define void @test5(i32 %x) nounwind {
+entry:
+
+; In this switch statement, there is an edge from jump table to default basic
+; block.
+
+  switch i32 %x, label %sw.default [
+    i32 1, label %sw.bb
+    i32 5, label %sw.bb2
+    i32 7, label %sw.bb3
+    i32 9, label %sw.bb4
+    i32 31, label %sw.bb5
+  ], !prof !2
+
+sw.bb:
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.bb3:
+  call void @foo(i32 2)
+  br label %sw.epilog
+
+sw.bb4:
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.bb5:
+  call void @foo(i32 4)
+  br label %sw.epilog
+
+sw.default:
+  call void @foo(i32 5)
+  br label %sw.epilog
+
+sw.epilog:
+  ret void
+
+; Check if weights are correctly assigned to edges generated from switch
+; statement.
+;
+; CHECK: BB#0:
+; BB#0 to BB#6: [10, UINT32_MAX] (15)
+; BB#0 to BB#8: [1, 5, 7, 9] (jump table) (45)
+; CHECK: Successors according to CFG: BB#8({{[0-9a-fx/= ]+}}25.00%) BB#9({{[0-9a-fx/= ]+}}75.00%)
+}
+
+!1 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} 
+!2 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} 
+!3 = !{!"branch_weights", i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10} 
diff --git a/test/CodeGen/X86/switch-jump-table.ll b/test/CodeGen/X86/switch-jump-table.ll
index a84fb4aafd17..896a067da230 100644
--- a/test/CodeGen/X86/switch-jump-table.ll
+++ b/test/CodeGen/X86/switch-jump-table.ll
@@ -1,17 +1,18 @@
-; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-gnu-linux < %s | FileCheck %s -check-prefix=CHECK
+; RUN: llc -mtriple=i686-pc-gnu-linux -print-machineinstrs=expand-isel-pseudos %s -o /dev/null 2>&1 | FileCheck %s -check-prefix=CHECK-JT-PROB
 
 
 ; An unreachable default destination is replaced with the most popular case label.
 
-define void @sum2(i32 %x, i32* %to) {
-; CHECK-LABEL: sum2:
+define void @foo(i32 %x, i32* %to) {
+; CHECK-LABEL: foo:
 ; CHECK: movl 4(%esp), [[REG:%e[a-z]{2}]]
 ; CHECK: cmpl $3, [[REG]]
-; CHECK: jbe .LBB0_1
+; CHECK: ja .LBB0_6
+; CHECK-NEXT: # BB#1:
+; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4)
 ; CHECK: movl $4
 ; CHECK: retl
-; CHECK-LABEL: .LBB0_1:
-; CHECK-NEXT: jmpl *.LJTI0_0(,[[REG]],4)
 
 entry:
   switch i32 %x, label %default [
@@ -48,5 +49,44 @@ default:
 ; CHECK-NEXT: .long  .LBB0_3
 ; CHECK-NEXT: .long  .LBB0_4
 ; CHECK-NEXT: .long  .LBB0_5
-; CHECK-NOT: .long
 }
+
+; Check if branch probabilities are correctly assigned to the jump table.
+
+define void @bar(i32 %x, i32* %to) {
+; CHECK-JT-PROB-LABEL: bar:
+; CHECK-JT-PROB: Successors according to CFG: BB#6({{[0-9a-fx/= ]+}}14.29%) BB#8({{[0-9a-fx/= ]+}}85.71%)
+; CHECK-JT-PROB: Successors according to CFG: BB#1({{[0-9a-fx/= ]+}}16.67%) BB#2({{[0-9a-fx/= ]+}}16.67%) BB#3({{[0-9a-fx/= ]+}}16.67%) BB#4({{[0-9a-fx/= ]+}}16.67%) BB#5({{[0-9a-fx/= ]+}}33.33%)
+
+entry:
+  switch i32 %x, label %default [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb4
+    i32 5, label %bb4
+  ], !prof !1
+bb0:
+  store i32 0, i32* %to
+  br label %exit
+bb1:
+  store i32 1, i32* %to
+  br label %exit
+bb2:
+  store i32 2, i32* %to
+  br label %exit
+bb3:
+  store i32 3, i32* %to
+  br label %exit
+bb4:
+  store i32 4, i32* %to
+  br label %exit
+default:
+  store i32 5, i32* %to
+  br label %exit
+exit:
+  ret void
+}
+
+!1 = !{!"branch_weights", i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16}
diff --git a/test/CodeGen/X86/switch-order-weight.ll b/test/CodeGen/X86/switch-order-weight.ll
index 207e0b3f707b..8c0c1a7d8108 100644
--- a/test/CodeGen/X86/switch-order-weight.ll
+++ b/test/CodeGen/X86/switch-order-weight.ll
@@ -13,8 +13,8 @@ entry:
 ; CHECK-LABEL: test1:
 ; CHECK-NOT: unr
 ; CHECK: cmpl $10
-; CHECK: bar
 ; CHECK: cmpl $20
+; CHECK: bar
 
 if.then:
   tail call void @unr(i32 23) noreturn nounwind
diff --git a/test/CodeGen/X86/switch.ll b/test/CodeGen/X86/switch.ll
index 748fd6f238b1..46587341ea74 100644
--- a/test/CodeGen/X86/switch.ll
+++ b/test/CodeGen/X86/switch.ll
@@ -51,7 +51,7 @@ return: ret void
 ; CHECK-LABEL: simple_ranges
 ; CHECK: leal -100
 ; CHECK: cmpl $4
-; CHECK: jae
+; CHECK: jb
 ; CHECK: cmpl $3
 ; CHECK: ja
 
@@ -90,7 +90,7 @@ return: ret void
 ; but with 6-8, the whole switch is suitable for a jump table.
 ; CHECK-LABEL: jt_is_better
 ; CHECK: cmpl $8
-; CHECK: jbe
+; CHECK: ja
 ; CHECK: jmpq *.LJTI
 }
 
@@ -107,7 +107,6 @@ entry:
     i32 2, label %bb2
     i32 5, label %bb2
     i32 8, label %bb2
-
   ]
 bb0: tail call void @g(i32 0) br label %return
 bb1: tail call void @g(i32 1) br label %return
@@ -116,6 +115,10 @@ return: ret void
 
 ; This could be lowered as a jump table, but bit tests is more efficient.
 ; CHECK-LABEL: bt_is_better
+; The bit test on 2,5,8 is unnecessary as all cases cover the rage [0, 8].
+; The range check guarantees that cases other than 0,3,6 and 1,4,7 must be
+; in 2,5,8.
+;
 ; 73 = 2^0 + 2^3 + 2^6
 ; CHECK: movl $73
 ; CHECK: btl
@@ -123,7 +126,74 @@ return: ret void
 ; CHECK: movl $146
 ; CHECK: btl
 ; 292 = 2^2 + 2^5 + 2^8
-; CHECK: movl $292
+; CHECK-NOT: movl $292
+; CHECK-NOT: btl
+}
+
+define void @bt_is_better2(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 0, label %bb0
+    i32 3, label %bb0
+    i32 6, label %bb0
+    i32 1, label %bb1
+    i32 4, label %bb1
+    i32 7, label %bb1
+    i32 2, label %bb2
+    i32 8, label %bb2
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; This will also be lowered as bit test, but as the range [0,8] is not fully
+; covered (5 missing), the default statement can be jumped to and we end up
+; with one more branch.
+; CHECK-LABEL: bt_is_better2
+;
+; 73 = 2^0 + 2^3 + 2^6
+; CHECK: movl $73
+; CHECK: btl
+; 146 = 2^1 + 2^4 + 2^7
+; CHECK: movl $146
+; CHECK: btl
+; 260 = 2^2 + 2^8
+; CHECK: movl $260
+; CHECK: btl
+}
+
+define void @bt_is_better3(i32 %x) {
+entry:
+  switch i32 %x, label %return [
+    i32 10, label %bb0
+    i32 13, label %bb0
+    i32 16, label %bb0
+    i32 11, label %bb1
+    i32 14, label %bb1
+    i32 17, label %bb1
+    i32 12, label %bb2
+    i32 18, label %bb2
+  ]
+bb0: tail call void @g(i32 0) br label %return
+bb1: tail call void @g(i32 1) br label %return
+bb2: tail call void @g(i32 2) br label %return
+return: ret void
+
+; We don't have to subtract 10 from the case value to let the range become
+; [0, 8], as each value in the range [10, 18] can be represented by bits in a
+; word. Then we still need a branch to jump to the default statement for the
+; range [0, 10).
+; CHECK-LABEL: bt_is_better3
+;
+; 74752 = 2^10 + 2^13 + 2^16
+; CHECK: movl $74752
+; CHECK: btl
+; 149504 = 2^11 + 2^14 + 2^17
+; CHECK: movl $149504
+; CHECK: btl
+; 266240 = 2^12 + 2^15 + 2^18
+; CHECK: movl $266240
 ; CHECK: btl
 }
 
@@ -410,6 +480,9 @@ return: ret void
 ; Cases 1,4,7 have a very large branch weight (which shouldn't overflow), so
 ; their bit test should come first. 0,3,6 and 2,5,8,9 both have a weight of 12,
 ; but the latter set has more cases, so should be tested for earlier.
+; The bit test on 0,3,6 is unnecessary as all cases cover the rage [0, 9].
+; The range check guarantees that cases other than 1,4,7 and 2,5,8,9 must be
+; in 0,3,6.
 
 ; CHECK-LABEL: bt_order_by_weight
 ; 146 = 2^1 + 2^4 + 2^7
@@ -419,8 +492,8 @@ return: ret void
 ; CHECK: movl $804
 ; CHECK: btl
 ; 73 = 2^0 + 2^3 + 2^6
-; CHECK: movl $73
-; CHECK: btl
+; CHECK-NOT: movl $73
+; CHECK-NOT: btl
 }
 
 !1 = !{!"branch_weights",
diff --git a/test/CodeGen/X86/swizzle-2.ll b/test/CodeGen/X86/swizzle-2.ll
index 697af843abb1..fd81573edec9 100644
--- a/test/CodeGen/X86/swizzle-2.ll
+++ b/test/CodeGen/X86/swizzle-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s
 
 ; Test that we correctly fold a shuffle that performs a swizzle of another
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsave.ll b/test/CodeGen/X86/system-intrinsics-64-xsave.ll
new file mode 100644
index 000000000000..feec9516220b
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsave.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl  %edx, %eax
+; CHECK: movl  %esi, %edx
+; CHECK: xsave (%rdi)
+  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xsave64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave64
+; CHECK: movl    %edx, %eax
+; CHECK: movl    %esi, %edx
+; CHECK: xsave64 (%rdi)
+  call void @llvm.x86.xsave64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave64(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xrstor (%rdi)
+  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
+
+define void @test_xrstor64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xrstor64 (%rdi)
+  call void @llvm.x86.xrstor64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsavec.ll b/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
new file mode 100644
index 000000000000..068034886515
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsavec.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xsavec (%rdi)
+  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
+
+define void @test_xsavec64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsavec64 (%rdi)
+  call void @llvm.x86.xsavec64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll b/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
new file mode 100644
index 000000000000..ee0a5360da8e
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsaveopt.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsaveopt (%rdi)
+  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
+
+define void @test_xsaveopt64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt64
+; CHECK: movl       %edx, %eax
+; CHECK: movl       %esi, %edx
+; CHECK: xsaveopt64 (%rdi)
+  call void @llvm.x86.xsaveopt64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64-xsaves.ll b/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
new file mode 100644
index 000000000000..5c1c5be4e7e2
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-64-xsaves.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl   %edx, %eax
+; CHECK: movl   %esi, %edx
+; CHECK: xsaves (%rdi)
+  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xsaves64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves64
+; CHECK: movl     %edx, %eax
+; CHECK: movl     %esi, %edx
+; CHECK: xsaves64 (%rdi)
+  call void @llvm.x86.xsaves64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves64(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl    %edx, %eax
+; CHECK: movl    %esi, %edx
+; CHECK: xrstors (%rdi)
+  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
+
+define void @test_xrstors64(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors64
+; CHECK: movl      %edx, %eax
+; CHECK: movl      %esi, %edx
+; CHECK: xrstors64 (%rdi)
+  call void @llvm.x86.xrstors64(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors64(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-64.ll b/test/CodeGen/X86/system-intrinsics-64.ll
index 96c441773390..e18a79c2b614 100644
--- a/test/CodeGen/X86/system-intrinsics-64.ll
+++ b/test/CodeGen/X86/system-intrinsics-64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fxsr | FileCheck %s
 
 define void @test_fxsave(i8* %ptr) {
 ; CHECK-LABEL: test_fxsave
diff --git a/test/CodeGen/X86/system-intrinsics-xsave.ll b/test/CodeGen/X86/system-intrinsics-xsave.ll
new file mode 100644
index 000000000000..ff9fb7e247a4
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsave.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave | FileCheck %s
+
+define void @test_xsave(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsave
+; CHECK: movl  8(%esp), %edx
+; CHECK: movl  12(%esp), %eax
+; CHECK: movl  4(%esp), %ecx
+; CHECK: xsave (%ecx)
+  call void @llvm.x86.xsave(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsave(i8*, i32, i32)
+
+define void @test_xrstor(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstor
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xrstor (%ecx)
+  call void @llvm.x86.xrstor(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstor(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsavec.ll b/test/CodeGen/X86/system-intrinsics-xsavec.ll
new file mode 100644
index 000000000000..4a55ea9531b1
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsavec.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsavec | FileCheck %s
+
+define void @test_xsavec(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsavec
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xsavec (%ecx)
+  call void @llvm.x86.xsavec(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsavec(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsaveopt.ll b/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
new file mode 100644
index 000000000000..f9bd7acd5a7c
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsaveopt.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaveopt | FileCheck %s
+
+define void @test_xsaveopt(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaveopt
+; CHECK: movl     8(%esp), %edx
+; CHECK: movl     12(%esp), %eax
+; CHECK: movl     4(%esp), %ecx
+; CHECK: xsaveopt (%ecx)
+  call void @llvm.x86.xsaveopt(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaveopt(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics-xsaves.ll b/test/CodeGen/X86/system-intrinsics-xsaves.ll
new file mode 100644
index 000000000000..ca1c5c1a9ed0
--- /dev/null
+++ b/test/CodeGen/X86/system-intrinsics-xsaves.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+xsave,+xsaves | FileCheck %s
+
+define void @test_xsaves(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xsaves
+; CHECK: movl   8(%esp), %edx
+; CHECK: movl   12(%esp), %eax
+; CHECK: movl   4(%esp), %ecx
+; CHECK: xsaves (%ecx)
+  call void @llvm.x86.xsaves(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xsaves(i8*, i32, i32)
+
+define void @test_xrstors(i8* %ptr, i32 %hi, i32 %lo) {
+; CHECK-LABEL: test_xrstors
+; CHECK: movl    8(%esp), %edx
+; CHECK: movl    12(%esp), %eax
+; CHECK: movl    4(%esp), %ecx
+; CHECK: xrstors (%ecx)
+  call void @llvm.x86.xrstors(i8* %ptr, i32 %hi, i32 %lo)
+  ret void;
+}
+declare void @llvm.x86.xrstors(i8*, i32, i32)
diff --git a/test/CodeGen/X86/system-intrinsics.ll b/test/CodeGen/X86/system-intrinsics.ll
index 84fcd052d7db..90dc9cd21e67 100644
--- a/test/CodeGen/X86/system-intrinsics.ll
+++ b/test/CodeGen/X86/system-intrinsics.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i686-unknown-unknown   | FileCheck %s
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fxsr  | FileCheck %s
 
 define void @test_fxsave(i8* %ptr) {
 ; CHECK-LABEL: test_fxsave
diff --git a/test/CodeGen/X86/tail-dup-catchret.ll b/test/CodeGen/X86/tail-dup-catchret.ll
new file mode 100644
index 000000000000..3eeb24d20f2d
--- /dev/null
+++ b/test/CodeGen/X86/tail-dup-catchret.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+define void @f() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %entry, %catch
+  %b.0 = phi i1 [ false, %catch ], [ true, %entry ]
+  tail call void @h(i1 zeroext %b.0)
+  ret void
+}
+
+; CHECK-LABEL: _f:
+; CHECK: calll _g
+; CHECK: calll _h
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @h(i1 zeroext)
diff --git a/test/CodeGen/X86/tail-merge-wineh.ll b/test/CodeGen/X86/tail-merge-wineh.ll
new file mode 100644
index 000000000000..69c2fda6949b
--- /dev/null
+++ b/test/CodeGen/X86/tail-merge-wineh.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s | FileCheck %s
+
+; Started from this code:
+; void f() {
+;   try {
+;     try {
+;       throw 42;
+;     } catch (int) {
+;     }
+;     try {
+;       throw 42;
+;     } catch (int) {
+;     }
+;   } catch (int) {
+;   }
+; }
+
+; Don't tail merge the calls.
+; CHECK: calll __CxxThrowException@8
+; CHECK: calll __CxxThrowException@8
+
+; ModuleID = 'cppeh-pingpong.cpp'
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i8* bitcast (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i8*), i32 0, i32 -1, i32 0, i32 4, i8* null }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT??_R0H@84"] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat
+
+define void @"\01?f@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %i = alloca i32, align 4
+  %tmp = alloca i32, align 4
+  %tmp1 = alloca i32, align 4
+  store i32 0, i32* %i, align 4
+  store i32 42, i32* %tmp, align 4
+  %0 = bitcast i32* %tmp to i8*
+  invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind label %catch.dispatch.7
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  catchret from %1 to label %catchret.dest
+
+catchret.dest:                                    ; preds = %catch
+  br label %try.cont
+
+try.cont:                                         ; preds = %catchret.dest
+  store i32 42, i32* %tmp1, align 4
+  %2 = bitcast i32* %tmp1 to i8*
+  invoke void @_CxxThrowException(i8* %2, %eh.ThrowInfo* @_TI1H) #1
+          to label %unreachable unwind label %catch.dispatch.2
+
+catch.dispatch.2:                                 ; preds = %try.cont
+  %cs2 = catchswitch within none [label %catch.4] unwind label %catch.dispatch.7
+
+catch.4:                                          ; preds = %catch.dispatch.2
+  %3 = catchpad within %cs2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  catchret from %3 to label %catchret.dest.5
+
+catchret.dest.5:                                  ; preds = %catch.4
+  br label %try.cont.6
+
+try.cont.6:                                       ; preds = %catchret.dest.5
+  br label %try.cont.11
+
+catch.dispatch.7:
+  %cs3 = catchswitch within none [label %catch.9] unwind to caller
+
+catch.9:                                          ; preds = %catch.dispatch.7
+  %4 = catchpad within %cs3 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  catchret from %4 to label %catchret.dest.10
+
+catchret.dest.10:                                 ; preds = %catch.9
+  br label %try.cont.11
+
+try.cont.11:                                      ; preds = %catchret.dest.10, %try.cont.6
+  ret void
+
+unreachable:                                      ; preds = %try.cont, %entry
+  unreachable
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noreturn }
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index f590176d9815..bf778e5bad2b 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -277,8 +277,8 @@ declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
 
 ; CHECK-LABEL: foo:
 ; CHECK:        callq func
-; CHECK-NEXT: .LBB4_2:
 ; CHECK-NEXT:   popq
+; CHECK-NEXT: .LBB4_2:
 ; CHECK-NEXT:   ret
 
 define void @foo(i1* %V) nounwind {
@@ -371,6 +371,44 @@ return:
   ret void
 }
 
+; two_minsize - Same as two, but with minsize instead of optsize.
+
+; CHECK-LABEL: two_minsize:
+; CHECK-NOT: XYZ
+; CHECK: ret
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+
+define void @two_minsize() nounwind minsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  store volatile i32 0, i32* @XYZ
+  store volatile i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
 ; two_nosize - Same as two, but without the optsize attribute.
 ; Now two instructions are enough to be tail-duplicated.
 
diff --git a/test/CodeGen/X86/tailcall-mem-intrinsics.ll b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
index 0e0ab5c478fc..8e1e4f464baa 100644
--- a/test/CodeGen/X86/tailcall-mem-intrinsics.ll
+++ b/test/CodeGen/X86/tailcall-mem-intrinsics.ll
@@ -8,8 +8,8 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: tail_memset
-; CHECK; jmp memmove
+; CHECK-LABEL: tail_memmove
+; CHECK: jmp memmove
 define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 {
 entry:
   tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false)
diff --git a/test/CodeGen/X86/tailcall-msvc-conventions.ll b/test/CodeGen/X86/tailcall-msvc-conventions.ll
new file mode 100644
index 000000000000..98b02c9c07e8
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-msvc-conventions.ll
@@ -0,0 +1,189 @@
+; RUN: llc -mtriple=i686-unknown-linux-gnu -O1 < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-linux-gnu -O0 < %s | FileCheck %s
+
+; The MSVC family of x86 calling conventions makes tail calls really tricky.
+; Tests of all the various combinations should live here.
+
+declare i32 @cdecl_i32()
+declare void @cdecl_void()
+
+; Don't allow tail calling these cdecl functions, because we need to clear the
+; incoming stack arguments for these argument-clearing conventions.
+
+define x86_thiscallcc void @thiscall_cdecl_notail(i32 %a, i32 %b, i32 %c) {
+  tail call void @cdecl_void()
+  ret void
+}
+; CHECK-LABEL: thiscall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $8
+
+define x86_stdcallcc void @stdcall_cdecl_notail(i32 %a, i32 %b, i32 %c) {
+  tail call void @cdecl_void()
+  ret void
+}
+; CHECK-LABEL: stdcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $12
+
+define x86_vectorcallcc void @vectorcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) {
+  tail call void @cdecl_void()
+  ret void
+}
+; CHECK-LABEL: vectorcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $4
+
+define x86_fastcallcc void @fastcall_cdecl_notail(i32 inreg %a, i32 inreg %b, i32 %c) {
+  tail call void @cdecl_void()
+  ret void
+}
+; CHECK-LABEL: fastcall_cdecl_notail
+; CHECK: calll cdecl_void
+; CHECK: retl $4
+
+
+; Tail call to/from callee pop functions can work under the right circumstances:
+
+declare x86_thiscallcc void @no_args_method(i8*)
+declare x86_thiscallcc void @one_arg_method(i8*, i32)
+declare x86_thiscallcc void @two_args_method(i8*, i32, i32)
+declare void @ccall_func()
+declare void @ccall_func1(i32)
+
+define x86_thiscallcc void @thiscall_thiscall_tail(i8* %this) {
+entry:
+  tail call x86_thiscallcc void @no_args_method(i8* %this)
+  ret void
+}
+; CHECK-LABEL: thiscall_thiscall_tail:
+; CHECK: jmp no_args_method
+
+define x86_thiscallcc void @thiscall_thiscall_tail2(i8* %this, i32 %a, i32 %b) {
+entry:
+  tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+  ret void
+}
+; @two_args_method will take care of popping %a and %b from the stack for us.
+; CHECK-LABEL: thiscall_thiscall_tail2:
+; CHECK: jmp two_args_method
+
+define x86_thiscallcc void @thiscall_thiscall_notail(i8* %this, i32 %a, i32 %b, i32 %x) {
+entry:
+  tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+  ret void
+}
+; @two_args_method would not pop %x.
+; CHECK-LABEL: thiscall_thiscall_notail:
+; CHECK: calll two_args_method
+; CHECK: retl $12
+
+define x86_thiscallcc void @thiscall_thiscall_notail2(i8* %this, i32 %a) {
+entry:
+  tail call x86_thiscallcc void @no_args_method(i8* %this)
+  ret void
+}
+; @no_args_method would not pop %x for us. Make sure this is checked even
+; when there are no arguments to the call.
+; CHECK-LABEL: thiscall_thiscall_notail2:
+; CHECK: calll no_args_method
+; CHECK: retl $4
+
+define void @ccall_thiscall_tail(i8* %x) {
+entry:
+  tail call x86_thiscallcc void @no_args_method(i8* %x)
+  ret void
+}
+; Tail calling from ccall to thiscall works.
+; CHECK-LABEL: ccall_thiscall_tail:
+; CHECK: jmp no_args_method
+
+define void @ccall_thiscall_notail(i8* %x, i32 %y) {
+entry:
+  tail call x86_thiscallcc void @one_arg_method(i8* %x, i32 %y);
+  ret void
+}
+; @one_arg_method would pop %y off the stack.
+; CHECK-LABEL: ccall_thiscall_notail:
+; CHECK: calll one_arg_method
+
+define x86_thiscallcc void @thiscall_ccall_tail(i8* %this) {
+entry:
+  tail call void @ccall_func()
+  ret void
+}
+; Tail call from thiscall to ccall works if no arguments need popping.
+; CHECK-LABEL: thiscall_ccall_tail:
+; CHECK: jmp ccall_func
+
+define x86_thiscallcc void @thiscall_ccall_notail(i8* %this, i32 %x) {
+entry:
+  tail call void @ccall_func1(i32 %x)
+  ret void
+}
+; No tail call: %x needs to be popped.
+; CHECK-LABEL: thiscall_ccall_notail:
+; CHECK: calll ccall_func1
+; CHECK: retl $4
+
+%S = type { i32 (...)** }
+define x86_thiscallcc void @tailcall_through_pointer(%S* %this, i32 %a) {
+entry:
+  %0 = bitcast %S* %this to void (%S*, i32)***
+  %vtable = load void (%S*, i32)**, void (%S*, i32)*** %0
+  %1 = load void (%S*, i32)*, void (%S*, i32)** %vtable
+  tail call x86_thiscallcc void %1(%S* %this, i32 %a)
+  ret void
+}
+; Tail calling works through function pointers too.
+; CHECK-LABEL: tailcall_through_pointer:
+; CHECK: jmpl
+
+define x86_stdcallcc void @stdcall_cdecl_tail() {
+  tail call void @ccall_func()
+  ret void
+}
+; stdcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: stdcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_vectorcallcc void @vectorcall_cdecl_tail(i32 inreg %a, i32 inreg %b) {
+  tail call void @ccall_func()
+  ret void
+}
+; vectorcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: vectorcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_fastcallcc void @fastcall_cdecl_tail(i32 inreg %a, i32 inreg %b) {
+  tail call void @ccall_func()
+  ret void
+}
+; fastcall to cdecl works if no arguments need popping.
+; CHECK-LABEL: fastcall_cdecl_tail
+; CHECK: jmp ccall_func
+
+define x86_stdcallcc void @stdcall_thiscall_notail(i8* %this, i32 %a, i32 %b) {
+  tail call x86_thiscallcc void @two_args_method(i8* %this, i32 %a, i32 %b)
+  ret void
+}
+; two_args_method will not pop %this.
+; CHECK-LABEL: stdcall_thiscall_notail
+; CHECK: calll two_args_method
+
+define x86_stdcallcc void @stdcall_thiscall_tail(i32 %a, i32 %b) {
+  tail call x86_thiscallcc void @two_args_method(i8* null, i32 %a, i32 %b)
+  ret void
+}
+; The callee pop amounts match up.
+; CHECK-LABEL: stdcall_thiscall_tail
+; CHECK: jmp two_args_method
+
+declare x86_fastcallcc void @fastcall2(i32 inreg %a, i32 inreg %b)
+define void @cdecl_fastcall_tail(i32 %a, i32 %b) {
+  tail call x86_fastcallcc void @fastcall2(i32 %a, i32 %b)
+  ret void
+}
+; fastcall2 won't pop anything.
+; CHECK-LABEL: cdecl_fastcall_tail
+; CHECK: jmp fastcall2
diff --git a/test/CodeGen/X86/tailcall-readnone.ll b/test/CodeGen/X86/tailcall-readnone.ll
new file mode 100644
index 000000000000..b43f69120e7c
--- /dev/null
+++ b/test/CodeGen/X86/tailcall-readnone.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
+
+define void @f(i32** %p) unnamed_addr {
+entry:
+  %v = tail call i32* @g()
+  store i32* %v, i32** %p, align 8
+  ret void
+}
+; CHECK-LABEL: f:
+; CHECK: callq g
+; CHECK: movq    %rax, (%rbx)
+
+declare i32* @g() #2
+
+attributes #2 = { nounwind readnone }
diff --git a/test/CodeGen/X86/tls-android-negative.ll b/test/CodeGen/X86/tls-android-negative.ll
new file mode 100644
index 000000000000..e90b8914ab28
--- /dev/null
+++ b/test/CodeGen/X86/tls-android-negative.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck  %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck  %s
+
+; Make sure that some symboles are not emitted in emulated TLS model.
+
+@external_x = external thread_local global i32
+@external_y = thread_local global i32 7
+@internal_y = internal thread_local global i32 9
+@internal_y0 = internal thread_local global i32 0
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i32* @get_external_y() {
+entry:
+  ret i32* @external_y
+}
+
+define i32* @get_internal_y() {
+entry:
+  ret i32* @internal_y
+}
+
+define i32* @get_internal_y0() {
+entry:
+  ret i32* @internal_y0
+}
+
+; no direct access to emulated TLS variables.
+; no definition of emulated TLS variables.
+; no initializer for external TLS variables, __emutls_t.external_x
+; no initializer for 0-initialized TLS variables, __emutls_t.internal_y0
+; not global linkage for __emutls_t.external_y
+
+; CHECK-NOT: external_x@TLS
+; CHECK-NOT: external_y@TLS
+; CHECK-NOT: internal_y@TLS
+; CHECK-NOT: .size external_x
+; CHECK-NOT: .size external_y
+; CHECK-NOT: .size internal_y
+; CHECK-NOT: .size internal_y0
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_t.internal_y0:
+; CHECK-NOT: global __emutls_t.external_y
+; CHECK-NOT: global __emutls_v.internal_y
+; CHECK-NOT: global __emutls_v.internal_y0
+
+; CHECK:     __emutls_t.external_y
+
+; CHECK-NOT: external_x@TLS
+; CHECK-NOT: external_y@TLS
+; CHECK-NOT: internal_y@TLS
+; CHECK-NOT: .size external_x
+; CHECK-NOT: .size external_y
+; CHECK-NOT: .size internal_y
+; CHECK-NOT: .size internal_y0
+; CHECK-NOT: __emutls_v.external_x:
+; CHECK-NOT: __emutls_t.external_x:
+; CHECK-NOT: __emutls_t.internal_y0:
+; CHECK-NOT: global __emutls_t.external_y
+; CHECK-NOT: global __emutls_v.internal_y
+; CHECK-NOT: global __emutls_v.internal_y0
diff --git a/test/CodeGen/X86/tls-android.ll b/test/CodeGen/X86/tls-android.ll
new file mode 100644
index 000000000000..4156c7b3f5b9
--- /dev/null
+++ b/test/CodeGen/X86/tls-android.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -emulated-tls -march=x86 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck  %s
+; RUN: llc < %s -emulated-tls -march=x86-64 -mtriple=x86_64-linux-android -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+; Make sure that TLS symboles are emitted in expected order.
+
+@external_x = external thread_local global i32
+@external_y = thread_local global i32 7
+@internal_y = internal thread_local global i32 9
+
+define i32* @get_external_x() {
+entry:
+  ret i32* @external_x
+}
+
+define i32* @get_external_y() {
+entry:
+  ret i32* @external_y
+}
+
+define i32* @get_internal_y() {
+entry:
+  ret i32* @internal_y
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32-bit mode
+; CHECK-LABEL: get_external_x:
+; CHECK:  __emutls_v.external_x
+; CHECK:  __emutls_get_address
+
+; CHECK-LABEL: get_external_y:
+; CHECK:  __emutls_v.external_y
+; CHECK:  __emutls_get_address
+
+; CHECK-LABEL: get_internal_y:
+; CHECK:  __emutls_v.internal_y
+; CHECK:  __emutls_get_address
+
+; CHECK-NOT: __emutls_v.external_x:
+
+; CHECK:       .align 4
+; CHECK-LABEL: __emutls_v.external_y:
+; CHECK-NEXT:  .long 4
+; CHECK-NEXT:  .long 4
+; CHECK-NEXT:  .long 0
+; CHECK-NEXT:  .long __emutls_t.external_y
+; CHECK-LABEL: __emutls_t.external_y:
+; CHECK-NEXT:  .long 7
+
+; CHECK:       .align 4
+; CHECK-LABEL: __emutls_v.internal_y:
+; CHECK-NEXT:  .long 4
+; CHECK-NEXT:  .long 4
+; CHECK-NEXT:  .long 0
+; CHECK-NEXT:  .long __emutls_t.internal_y
+; CHECK-LABEL: __emutls_t.internal_y:
+; CHECK-NEXT:  .long 9
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64-bit mode
+; X64-LABEL: get_external_x:
+; X64:  __emutls_v.external_x
+; X64:  __emutls_get_address
+
+; X64-LABEL: get_external_y:
+; X64:  __emutls_v.external_y
+; X64:  __emutls_get_address
+
+; X64-LABEL: get_internal_y:
+; X64:  __emutls_v.internal_y
+; X64:  __emutls_get_address
+
+; X64-NOT: __emutls_v.external_x:
+
+; X64:       .align 8
+; X64-LABEL: __emutls_v.external_y:
+; X64-NEXT:  .quad 4
+; X64-NEXT:  .quad 4
+; X64-NEXT:  .quad 0
+; X64-NEXT:  .quad __emutls_t.external_y
+; X64-LABEL: __emutls_t.external_y:
+; X64-NEXT:  .long 7
+
+; X64:       .align 8
+; X64-LABEL: __emutls_v.internal_y:
+; X64-NEXT:  .quad 4
+; X64-NEXT:  .quad 4
+; X64-NEXT:  .quad 0
+; X64-NEXT:  .quad __emutls_t.internal_y
+; X64-LABEL: __emutls_t.internal_y:
+; X64-NEXT:  .long 9
diff --git a/test/CodeGen/X86/tls-models.ll b/test/CodeGen/X86/tls-models.ll
index 0fd785328211..2377da4f025a 100644
--- a/test/CodeGen/X86/tls-models.ll
+++ b/test/CodeGen/X86/tls-models.ll
@@ -18,6 +18,8 @@
 @external_le = external thread_local(localexec) global i32
 @internal_le = internal thread_local(localexec) global i32 42
 
+; See test cases for emulated model in emutls.ll, emutls-pic.ll and emutls-pie.ll.
+
 ; ----- no model specified -----
 
 define i32* @f1() {
diff --git a/test/CodeGen/X86/tls-pie.ll b/test/CodeGen/X86/tls-pie.ll
index 10fe1e94bbdc..235230e3c6a8 100644
--- a/test/CodeGen/X86/tls-pie.ll
+++ b/test/CodeGen/X86/tls-pie.ll
@@ -36,9 +36,13 @@ entry:
 define i32 @f3() {
 ; X32-LABEL: f3:
 ; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %eax
 ; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
 ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax
 ; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
 ; X32-NEXT: movl %gs:(%eax), %eax
@@ -56,9 +60,13 @@ entry:
 define i32* @f4() {
 ; X32-LABEL: f4:
 ; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %ecx
 ; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
 ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx
 ; X32-NEXT: movl %gs:0, %eax
 ; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
diff --git a/test/CodeGen/X86/token_landingpad.ll b/test/CodeGen/X86/token_landingpad.ll
new file mode 100644
index 000000000000..087b68bfce8a
--- /dev/null
+++ b/test/CodeGen/X86/token_landingpad.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+; This test verifies that SelectionDAG can handle landingPad of token type and not crash LLVM.
+
+define void @test() personality i32 (...)* @dummy_personality {
+entry:
+  invoke void @dummy()
+          to label %return unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %lp = landingpad token
+            cleanup
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @dummy()
+
+declare i32 @dummy_personality(...)
diff --git a/test/CodeGen/X86/trunc-store.ll b/test/CodeGen/X86/trunc-store.ll
new file mode 100644
index 000000000000..646b4b2c336d
--- /dev/null
+++ b/test/CodeGen/X86/trunc-store.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; With optimization at O2 we actually get the legalized function optimized
+; away through legalization and stack coloring, but check that we do all of
+; that here and don't crash during legalization.
+
+; Original program:
+; typedef enum { A, B, C, D } P;
+; struct { P x[2]; } a;
+
+; void fn2();
+; void fn1() {
+;   int b;
+;   unsigned c;
+;   for (;; c++) {
+;     fn2();
+;     unsigned n;
+;     for (; c; c++) {
+;       b = a.x[c] == A || a.x[c] == B || a.x[c] == D;
+;       if (b) n++;
+;     }
+;     if (n)
+;	for (;;)
+;	  ;
+;   }
+; }
+
+define void @fn1() {
+; CHECK-LABEL: fn1
+; CHECK: movb	$0, {{.*}}(%rsp)
+; CHECK: cmpq	$8, %rax
+for.cond:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %for.cond
+  %x42 = bitcast <4 x i4> zeroinitializer to i16
+  %x43 = icmp ne i16 %x42, 0
+  %x44 = select i1 %x43, i32 undef, i32 0
+  %x72 = bitcast <4 x i1> zeroinitializer to i4
+  %x73 = icmp ne i4 %x72, 0
+  %x74 = select i1 %x73, i32 %x44, i32 undef
+  %x84 = select i1 undef, i32 undef, i32 %x74
+  %x88 = icmp eq i64 undef, 8
+  br i1 %x88, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %0 = select i1 undef, i32 undef, i32 %x84
+  ret void
+}
diff --git a/test/CodeGen/X86/unaligned-32-byte-memops.ll b/test/CodeGen/X86/unaligned-32-byte-memops.ll
index d979c16f4abd..b9deb058cb3f 100644
--- a/test/CodeGen/X86/unaligned-32-byte-memops.ll
+++ b/test/CodeGen/X86/unaligned-32-byte-memops.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,+slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXSLOW
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx,-slow-unaligned-mem-32 | FileCheck %s --check-prefix=AVXFAST
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s --check-prefix=AVX2
@@ -75,12 +76,12 @@ define <8 x float> @combine_16_byte_loads_no_intrinsic(<4 x float>* %ptr) {
   ret <8 x float> %v3
 }
 
+; If the first load is 32-byte aligned, then the loads should be merged in all cases.
+
 define <8 x float> @combine_16_byte_loads_aligned(<4 x float>* %ptr) {
-;; FIXME: The first load is 32-byte aligned, so the second load should get merged.
 ; AVXSLOW-LABEL: combine_16_byte_loads_aligned:
 ; AVXSLOW:       # BB#0:
-; AVXSLOW-NEXT:    vmovaps 48(%rdi), %xmm0
-; AVXSLOW-NEXT:    vinsertf128 $1, 64(%rdi), %ymm0, %ymm0
+; AVXSLOW-NEXT:    vmovaps 48(%rdi), %ymm0
 ; AVXSLOW-NEXT:    retq
 ;
 ; AVXFAST-LABEL: combine_16_byte_loads_aligned:
diff --git a/test/CodeGen/X86/unaligned-spill-folding.ll b/test/CodeGen/X86/unaligned-spill-folding.ll
index 33e2daf9dc1b..dee94bce15a5 100644
--- a/test/CodeGen/X86/unaligned-spill-folding.ll
+++ b/test/CodeGen/X86/unaligned-spill-folding.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -relocation-model=pic < %s | FileCheck %s -check-prefix=UNALIGNED
 ; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=16 -relocation-model=pic < %s | FileCheck %s -check-prefix=ALIGNED
-; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -force-align-stack -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
+; RUN: llc -mtriple=i386-unknown-freebsd -mcpu=core2 -stack-alignment=4 -stackrealign -relocation-model=pic < %s | FileCheck %s -check-prefix=FORCEALIGNED
 
 @arr = internal unnamed_addr global [32 x i32] zeroinitializer, align 16
 
diff --git a/test/CodeGen/X86/unknown-location.ll b/test/CodeGen/X86/unknown-location.ll
index c018a49d135e..c41e529aa954 100644
--- a/test/CodeGen/X86/unknown-location.ll
+++ b/test/CodeGen/X86/unknown-location.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT:         idivl
 ; CHECK-NEXT:         .loc 1 4 3
 
-define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
+define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind !dbg !1 {
 entry:
   %a = add  i32 %w, %x, !dbg !8
   %b = sdiv i32 %a, %y
@@ -21,10 +21,10 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!12}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !10, scope: !2, type: !4, function: i32 (i32, i32, i32, i32)* @foo)
+!0 = !DILocalVariable(name: "x", line: 1, arg: 2, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !10, scope: !2, type: !4)
 !2 = !DIFile(filename: "test.c", directory: "/dir")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "producer", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "producer", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/CodeGen/X86/v2f32.ll b/test/CodeGen/X86/v2f32.ll
index 7beed52295ee..3b7160c71869 100644
--- a/test/CodeGen/X86/v2f32.ll
+++ b/test/CodeGen/X86/v2f32.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -o - | FileCheck %s --check-prefix=X64
 ; RUN: llc < %s -mcpu=yonah -march=x86 -mtriple=i386-linux-gnu -o - | FileCheck %s --check-prefix=X32
 
diff --git a/test/CodeGen/X86/vec_cast2.ll b/test/CodeGen/X86/vec_cast2.ll
index 1ba11f51baa2..dda50b7b94b7 100644
--- a/test/CodeGen/X86/vec_cast2.ll
+++ b/test/CodeGen/X86/vec_cast2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s --check-prefix=CHECK-WIDE
 
@@ -46,17 +47,19 @@ define <4 x float> @foo1_4(<4 x i8> %src) {
 define <8 x float> @foo2_8(<8 x i8> %src) {
 ; CHECK-LABEL: foo2_8:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; CHECK-NEXT:    vandps LCPI2_0, %ymm0, %ymm0
+; CHECK-NEXT:    vpand LCPI2_0, %xmm0, %xmm0
+; CHECK-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; CHECK-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; CHECK-NEXT:    retl
 ;
 ; CHECK-WIDE-LABEL: foo2_8:
 ; CHECK-WIDE:       ## BB#0:
 ; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; CHECK-WIDE-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; CHECK-WIDE-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; CHECK-WIDE-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; CHECK-WIDE-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; CHECK-WIDE-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; CHECK-WIDE-NEXT:    retl
@@ -96,25 +99,25 @@ define <8 x i8> @foo3_8(<8 x float> %src) {
 ; CHECK-WIDE:       ## BB#0:
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm2 ## xmm2 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
 ; CHECK-WIDE-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm0
 ; CHECK-WIDE-NEXT:    vzeroupper
@@ -133,13 +136,13 @@ define <4 x i8> @foo3_4(<4 x float> %src) {
 ; CHECK-WIDE:       ## BB#0:
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm1
-; CHECK-WIDE-NEXT:    vmovshdup %xmm0, %xmm2    ## xmm2 = xmm0[1,1,3,3]
+; CHECK-WIDE-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## xmm2 = xmm0[1,0]
+; CHECK-WIDE-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm2, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
-; CHECK-WIDE-NEXT:    vpermilps $231, %xmm0, %xmm0 ## xmm0 = xmm0[3,1,2,3]
+; CHECK-WIDE-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
 ; CHECK-WIDE-NEXT:    vcvttss2si %xmm0, %eax
 ; CHECK-WIDE-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm0
 ; CHECK-WIDE-NEXT:    retl
diff --git a/test/CodeGen/X86/vec_cmp_sint-128.ll b/test/CodeGen/X86/vec_cmp_sint-128.ll
new file mode 100644
index 000000000000..1407f71de714
--- /dev/null
+++ b/test/CodeGen/X86/vec_cmp_sint-128.ll
@@ -0,0 +1,722 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Equal
+;
+
+define <2 x i64> @eq_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: eq_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: eq_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: eq_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: eq_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @eq_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: eq_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @eq_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: eq_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @eq_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: eq_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Not Equal
+;
+
+define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ne_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ne_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ne_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ne_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ne_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ne_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ne_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Greater Than Or Equal
+;
+
+define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ge_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ge_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ge_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm1
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ge_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sge <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ge_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ge_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomged %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sge <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ge_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ge_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgew %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sge <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ge_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ge_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sge <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Greater Than
+;
+
+define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: gt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: gt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: gt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: gt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sgt <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: gt_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: gt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sgt <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: gt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: gt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sgt <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: gt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: gt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sgt <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Less Than Or Equal
+;
+
+define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: le_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: le_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: le_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: le_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sle <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: le_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: le_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomled %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sle <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: le_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: le_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomlew %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sle <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: le_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: le_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp sle <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Less Than
+;
+
+define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: lt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: lt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: lt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm1
+; SSE42-NEXT:    movdqa %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: lt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp slt <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: lt_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: lt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp slt <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: lt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtw %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: lt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp slt <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: lt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: lt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp slt <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
diff --git a/test/CodeGen/X86/vec_cmp_uint-128.ll b/test/CodeGen/X86/vec_cmp_uint-128.ll
new file mode 100644
index 000000000000..8bed14e7e5f5
--- /dev/null
+++ b/test/CodeGen/X86/vec_cmp_uint-128.ll
@@ -0,0 +1,860 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Equal
+;
+
+define <2 x i64> @eq_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: eq_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: eq_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: eq_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: eq_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @eq_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: eq_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @eq_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: eq_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @eq_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: eq_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: eq_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: eq_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomeqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp eq <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Not Equal
+;
+
+define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ne_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ne_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ne_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ne_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: ne_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: ne_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ne_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ne_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ne_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomneqb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ne <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Greater Than Or Equal
+;
+
+define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: ge_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ge_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ge_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pxor %xmm1, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ge_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp uge <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: ge_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ge_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxud %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ge_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxud %xmm0, %xmm1
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ge_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeud %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp uge <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: ge_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psubusw %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm0, %xmm0
+; SSE2-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: ge_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxuw %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: ge_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxuw %xmm0, %xmm1
+; SSE42-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: ge_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp uge <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: ge_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxub %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: ge_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: ge_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgeub %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp uge <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Greater Than
+;
+
+define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: gt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: gt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: gt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm1
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: gt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ugt <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: gt_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: gt_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: gt_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: gt_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtud %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: gt_v4i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX512-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+  %1 = icmp ugt <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: gt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: gt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ugt <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: gt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pcmpgtb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: gt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: gt_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomgtub %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ugt <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Less Than Or Equal
+;
+
+define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: le_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: le_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: le_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm1
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: le_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ule <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: le_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: le_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminud %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: le_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminud %xmm0, %xmm1
+; SSE42-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: le_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleud %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ule <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: le_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    psubusw %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: le_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminuw %xmm0, %xmm1
+; SSE41-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: le_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminuw %xmm0, %xmm1
+; SSE42-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: le_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ule <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: le_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminub %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: le_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm1
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: le_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomleub %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ule <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
+
+;
+; Less Than
+;
+
+define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: lt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: lt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    pxor %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT:    pand %xmm3, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: lt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pxor %xmm1, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm2
+; SSE42-NEXT:    movdqa %xmm2, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: lt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltuq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ult <2 x i64> %a, %b
+  %2 = sext <2 x i1> %1 to <2 x i64>
+  ret <2 x i64> %2
+}
+
+define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE-LABEL: lt_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: lt_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: lt_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: lt_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltud %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: lt_v4i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX512-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX512-NEXT:    vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <4 x i32> %a, %b
+  %2 = sext <4 x i1> %1 to <4 x i32>
+  ret <4 x i32> %2
+}
+
+define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE-LABEL: lt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtw %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: lt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltuw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ult <8 x i16> %a, %b
+  %2 = sext <8 x i1> %1 to <8 x i16>
+  ret <8 x i16> %2
+}
+
+define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE-LABEL: lt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm2
+; SSE-NEXT:    pcmpgtb %xmm0, %xmm2
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: lt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: lt_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpcomltub %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+  %1 = icmp ult <16 x i8> %a, %b
+  %2 = sext <16 x i1> %1 to <16 x i8>
+  ret <16 x i8> %2
+}
diff --git a/test/CodeGen/X86/vec_ctbits.ll b/test/CodeGen/X86/vec_ctbits.ll
index 318aca1d54cb..66114bc9c6bc 100644
--- a/test/CodeGen/X86/vec_ctbits.ll
+++ b/test/CodeGen/X86/vec_ctbits.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s
 
 declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1)
@@ -5,25 +6,63 @@ declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)
 declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
 
 define <2 x i64> @footz(<2 x i64> %a) nounwind {
+; CHECK-LABEL: footz:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsfq %rax, %rax
+; CHECK-NEXT:    movd %rax, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsfq %rax, %rax
+; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 
-; CHECK-LABEL: footz
-; CHECK: bsfq
-; CHECK: bsfq
 }
 define <2 x i64> @foolz(<2 x i64> %a) nounwind {
+; CHECK-LABEL: foolz:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsrq %rax, %rax
+; CHECK-NEXT:    xorq $63, %rax
+; CHECK-NEXT:    movd %rax, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsrq %rax, %rax
+; CHECK-NEXT:    xorq $63, %rax
+; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
   ret <2 x i64> %c
 
-; CHECK-LABEL: foolz
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: bsrq
-; CHECK: xorq $63
 }
 
 define <2 x i64> @foopop(<2 x i64> %a) nounwind {
+; CHECK-LABEL: foopop:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $1, %xmm1
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    psubq %xmm1, %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-NEXT:    pand %xmm1, %xmm2
+; CHECK-NEXT:    psrlq $2, %xmm0
+; CHECK-NEXT:    pand %xmm1, %xmm0
+; CHECK-NEXT:    paddq %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $4, %xmm1
+; CHECK-NEXT:    paddq %xmm0, %xmm1
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    pxor %xmm0, %xmm0
+; CHECK-NEXT:    psadbw %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
   ret <2 x i64> %c
 }
@@ -33,35 +72,73 @@ declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1)
 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
 
 define <2 x i32> @promtz(<2 x i32> %a) nounwind {
+; CHECK-LABEL: promtz:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsfq %rax, %rax
+; CHECK-NEXT:    movl $64, %ecx
+; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    movd %rax, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsfq %rax, %rax
+; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 false)
   ret <2 x i32> %c
 
-; CHECK: .quad 4294967296
-; CHECK: .quad 4294967296
-; CHECK-LABEL: promtz
-; CHECK: bsfq
-; CHECK: cmov
-; CHECK: bsfq
-; CHECK: cmov
 }
 define <2 x i32> @promlz(<2 x i32> %a) nounwind {
+; CHECK-LABEL: promlz:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsrq %rax, %rax
+; CHECK-NEXT:    movl $127, %ecx
+; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    xorq $63, %rax
+; CHECK-NEXT:    movd %rax, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; CHECK-NEXT:    movd %xmm0, %rax
+; CHECK-NEXT:    bsrq %rax, %rax
+; CHECK-NEXT:    cmoveq %rcx, %rax
+; CHECK-NEXT:    xorq $63, %rax
+; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; CHECK-NEXT:    psubq {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
   ret <2 x i32> %c
 
-; CHECK: .quad 4294967295
-; CHECK: .quad 4294967295
-; CHECK: .quad 32
-; CHECK: .quad 32
-; CHECK-LABEL: promlz
-; CHECK: pand
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: bsrq
-; CHECK: xorq $63
-; CHECK: psub
 }
 
 define <2 x i32> @prompop(<2 x i32> %a) nounwind {
+; CHECK-LABEL: prompop:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $1, %xmm1
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    psubq %xmm1, %xmm0
+; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [3689348814741910323,3689348814741910323]
+; CHECK-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-NEXT:    pand %xmm1, %xmm2
+; CHECK-NEXT:    psrlq $2, %xmm0
+; CHECK-NEXT:    pand %xmm1, %xmm0
+; CHECK-NEXT:    paddq %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $4, %xmm1
+; CHECK-NEXT:    paddq %xmm0, %xmm1
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    pxor %xmm0, %xmm0
+; CHECK-NEXT:    psadbw %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    retq
   %c = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %a)
   ret <2 x i32> %c
 }
diff --git a/test/CodeGen/X86/vec_extract-avx.ll b/test/CodeGen/X86/vec_extract-avx.ll
index fbb84170dc83..abb07233d35e 100644
--- a/test/CodeGen/X86/vec_extract-avx.ll
+++ b/test/CodeGen/X86/vec_extract-avx.ll
@@ -1,14 +1,18 @@
-target triple = "x86_64-unknown-unknown"
-
-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
 
 ; When extracting multiple consecutive elements from a larger
 ; vector into a smaller one, do it efficiently. We should use
 ; an EXTRACT_SUBVECTOR node internally rather than a bunch of
-; single element extractions. 
+; single element extractions.
 
 ; Extracting the low elements only requires using the right kind of store.
 define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+; CHECK-LABEL: low_v8f32_to_v4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovaps %xmm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %ext0 = extractelement <8 x float> %v, i32 0
   %ext1 = extractelement <8 x float> %v, i32 1
   %ext2 = extractelement <8 x float> %v, i32 2
@@ -19,15 +23,15 @@ define void @low_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
   %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
   store <4 x float> %ins3, <4 x float>* %ptr, align 16
   ret void
-
-; CHECK-LABEL: low_v8f32_to_v4f32
-; CHECK: vmovaps
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
 }
 
-; Extracting the high elements requires just one AVX instruction. 
+; Extracting the high elements requires just one AVX instruction.
 define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
+; CHECK-LABEL: high_v8f32_to_v4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %ext0 = extractelement <8 x float> %v, i32 4
   %ext1 = extractelement <8 x float> %v, i32 5
   %ext2 = extractelement <8 x float> %v, i32 6
@@ -38,17 +42,17 @@ define void @high_v8f32_to_v4f32(<8 x float> %v, <4 x float>* %ptr) {
   %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3
   store <4 x float> %ins3, <4 x float>* %ptr, align 16
   ret void
-
-; CHECK-LABEL: high_v8f32_to_v4f32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
 }
 
 ; Make sure element type doesn't alter the codegen. Note that
 ; if we were actually using the vector in this function and
 ; have AVX2, we should generate vextracti128 (the int version).
 define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
+; CHECK-LABEL: high_v8i32_to_v4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %ext0 = extractelement <8 x i32> %v, i32 4
   %ext1 = extractelement <8 x i32> %v, i32 5
   %ext2 = extractelement <8 x i32> %v, i32 6
@@ -59,24 +63,86 @@ define void @high_v8i32_to_v4i32(<8 x i32> %v, <4 x i32>* %ptr) {
   %ins3 = insertelement <4 x i32> %ins2, i32 %ext3, i32 3
   store <4 x i32> %ins3, <4 x i32>* %ptr, align 16
   ret void
-
-; CHECK-LABEL: high_v8i32_to_v4i32
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
 }
 
 ; Make sure that element size doesn't alter the codegen.
 define void @high_v4f64_to_v2f64(<4 x double> %v, <2 x double>* %ptr) {
+; CHECK-LABEL: high_v4f64_to_v2f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vextractf128 $1, %ymm0, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %ext0 = extractelement <4 x double> %v, i32 2
   %ext1 = extractelement <4 x double> %v, i32 3
   %ins0 = insertelement <2 x double> undef, double %ext0, i32 0
   %ins1 = insertelement <2 x double> %ins0, double %ext1, i32 1
   store <2 x double> %ins1, <2 x double>* %ptr, align 16
   ret void
-
-; CHECK-LABEL: high_v4f64_to_v2f64
-; CHECK: vextractf128
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+}
+
+; PR25320 Make sure that a widened (possibly legalized) vector correctly zero-extends upper elements.
+; FIXME - Ideally these should just call VMOVD/VMOVQ/VMOVSS/VMOVSD
+
+define void @legal_vzmovl_2i32_8i32(<2 x i32>* %in, <8 x i32>* %out) {
+; CHECK-LABEL: legal_vzmovl_2i32_8i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; CHECK-NEXT:    vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; CHECK-NEXT:    vmovaps %ymm0, (%rsi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %ld = load <2 x i32>, <2 x i32>* %in, align 8
+  %ext = extractelement <2 x i32> %ld, i64 0
+  %ins = insertelement <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %ext, i64 0
+  store <8 x i32> %ins, <8 x i32>* %out, align 32
+  ret void
+}
+
+define void @legal_vzmovl_2i64_4i64(<2 x i64>* %in, <4 x i64>* %out) {
+; CHECK-LABEL: legal_vzmovl_2i64_4i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovupd (%rdi), %xmm0
+; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; CHECK-NEXT:    vmovapd %ymm0, (%rsi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %ld = load <2 x i64>, <2 x i64>* %in, align 8
+  %ext = extractelement <2 x i64> %ld, i64 0
+  %ins = insertelement <4 x i64> <i64 undef, i64 0, i64 0, i64 0>, i64 %ext, i64 0
+  store <4 x i64> %ins, <4 x i64>* %out, align 32
+  ret void
+}
+
+define void @legal_vzmovl_2f32_8f32(<2 x float>* %in, <8 x float>* %out) {
+; CHECK-LABEL: legal_vzmovl_2f32_8f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vxorps %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
+; CHECK-NEXT:    vmovaps %ymm0, (%rsi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %ld = load <2 x float>, <2 x float>* %in, align 8
+  %ext = extractelement <2 x float> %ld, i64 0
+  %ins = insertelement <8 x float> <float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0
+  store <8 x float> %ins, <8 x float>* %out, align 32
+  ret void
+}
+
+define void @legal_vzmovl_2f64_4f64(<2 x double>* %in, <4 x double>* %out) {
+; CHECK-LABEL: legal_vzmovl_2f64_4f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovupd (%rdi), %xmm0
+; CHECK-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; CHECK-NEXT:    vmovapd %ymm0, (%rsi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %ld = load <2 x double>, <2 x double>* %in, align 8
+  %ext = extractelement <2 x double> %ld, i64 0
+  %ins = insertelement <4 x double> <double undef, double 0.0, double 0.0, double 0.0>, double %ext, i64 0
+  store <4 x double> %ins, <4 x double>* %out, align 32
+  ret void
 }
diff --git a/test/CodeGen/X86/vec_fabs.ll b/test/CodeGen/X86/vec_fabs.ll
index 960b5f27cf53..54f33b2bd224 100644
--- a/test/CodeGen/X86/vec_fabs.ll
+++ b/test/CodeGen/X86/vec_fabs.ll
@@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s
-
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s
 
 define <2 x double> @fabs_v2f64(<2 x double> %p)
 {
diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll
index 3b1b2f5c1c77..7834b2804247 100644
--- a/test/CodeGen/X86/vec_fp_to_int.ll
+++ b/test/CodeGen/X86/vec_fp_to_int.ll
@@ -10,19 +10,19 @@
 ; Double to Signed Integer
 ;
 
-define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
-; SSE2-LABEL: fptosi_2vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_2vf64:
+; AVX-LABEL: fptosi_2f64_to_2i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
 ; AVX-NEXT:    vmovq %rax, %xmm1
@@ -35,19 +35,19 @@ define <2 x i64> @fptosi_2vf64(<2 x double> %a) {
   ret <2 x i64> %cvt
 }
 
-define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
-; SSE2-LABEL: fptosi_2vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptosi_2f64_to_2i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_2vf64_i32:
+; AVX-LABEL: fptosi_2f64_to_2i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttsd2si %xmm0, %rax
 ; AVX-NEXT:    vmovq %rax, %xmm1
@@ -62,26 +62,53 @@ define <4 x i32> @fptosi_2vf64_i32(<2 x double> %a) {
   ret <4 x i32> %ext
 }
 
-define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
-; SSE2-LABEL: fptosi_4vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm3
-; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    movdqa %xmm3, %xmm1
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_2i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf64:
+; AVX-LABEL: fptosi_4f64_to_2i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %cvt = fptosi <4 x double> %ext to <4 x i32>
+  ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    movdqa %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptosi_4f64_to_4i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
@@ -102,27 +129,27 @@ define <4 x i64> @fptosi_4vf64(<4 x double> %a) {
   ret <4 x i64> %cvt
 }
 
-define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
-; SSE2-LABEL: fptosi_4vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) {
+; SSE-LABEL: fptosi_4f64_to_4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf64_i32:
+; AVX-LABEL: fptosi_4f64_to_4i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttpd2dqy %ymm0, %xmm0
 ; AVX-NEXT:    vzeroupper
@@ -135,33 +162,33 @@ define <4 x i32> @fptosi_4vf64_i32(<4 x double> %a) {
 ; Double to Unsigned Integer
 ;
 
-define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT:    movapd %xmm0, %xmm1
-; SSE2-NEXT:    subsd %xmm2, %xmm1
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
-; SSE2-NEXT:    ucomisd %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm1
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    movapd %xmm0, %xmm3
-; SSE2-NEXT:    subsd %xmm2, %xmm3
-; SSE2-NEXT:    cvttsd2si %xmm3, %rax
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
-; SSE2-NEXT:    ucomisd %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rcx
-; SSE2-NEXT:    movd %rcx, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT:    movapd %xmm0, %xmm1
+; SSE-NEXT:    subsd %xmm2, %xmm1
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rdx
+; SSE-NEXT:    ucomisd %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm3
+; SSE-NEXT:    subsd %xmm2, %xmm3
+; SSE-NEXT:    cvttsd2si %xmm3, %rax
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rcx
+; SSE-NEXT:    ucomisd %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rcx
+; SSE-NEXT:    movd %rcx, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_2vf64:
+; AVX-LABEL: fptoui_2f64_to_2i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
@@ -186,33 +213,33 @@ define <2 x i64> @fptoui_2vf64(<2 x double> %a) {
   ret <2 x i64> %cvt
 }
 
-define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    movapd %xmm0, %xmm2
-; SSE2-NEXT:    subsd %xmm1, %xmm2
-; SSE2-NEXT:    cvttsd2si %xmm2, %rax
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
-; SSE2-NEXT:    ucomisd %xmm1, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    movapd %xmm0, %xmm3
-; SSE2-NEXT:    subsd %xmm1, %xmm3
-; SSE2-NEXT:    cvttsd2si %xmm3, %rax
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
-; SSE2-NEXT:    ucomisd %xmm1, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rcx
-; SSE2-NEXT:    movd %rcx, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-NEXT:    movapd %xmm0, %xmm2
+; SSE-NEXT:    subsd %xmm1, %xmm2
+; SSE-NEXT:    cvttsd2si %xmm2, %rax
+; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rdx
+; SSE-NEXT:    ucomisd %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm3
+; SSE-NEXT:    subsd %xmm1, %xmm3
+; SSE-NEXT:    cvttsd2si %xmm3, %rax
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rcx
+; SSE-NEXT:    ucomisd %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rcx
+; SSE-NEXT:    movd %rcx, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_2vf64_i32:
+; AVX-LABEL: fptoui_2f64_to_2i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
 ; AVX-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
@@ -239,51 +266,101 @@ define <4 x i32> @fptoui_2vf64_i32(<2 x double> %a) {
   ret <4 x i32> %ext
 }
 
-define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movapd %xmm0, %xmm2
-; SSE2-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
-; SSE2-NEXT:    subsd %xmm3, %xmm0
-; SSE2-NEXT:    cvttsd2si %xmm0, %rcx
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm2, %rdx
-; SSE2-NEXT:    ucomisd %xmm3, %xmm2
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm0
-; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
-; SSE2-NEXT:    movapd %xmm2, %xmm4
-; SSE2-NEXT:    subsd %xmm3, %xmm4
-; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm2, %rdx
-; SSE2-NEXT:    ucomisd %xmm3, %xmm2
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm2
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE2-NEXT:    movapd %xmm1, %xmm2
-; SSE2-NEXT:    subsd %xmm3, %xmm2
-; SSE2-NEXT:    cvttsd2si %xmm2, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm1, %rdx
-; SSE2-NEXT:    ucomisd %xmm3, %xmm1
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT:    movapd %xmm1, %xmm4
-; SSE2-NEXT:    subsd %xmm3, %xmm4
-; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm1, %rax
-; SSE2-NEXT:    ucomisd %xmm3, %xmm1
-; SSE2-NEXT:    cmovaeq %rcx, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_2i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; SSE-NEXT:    movapd %xmm0, %xmm2
+; SSE-NEXT:    subsd %xmm1, %xmm2
+; SSE-NEXT:    cvttsd2si %xmm2, %rax
+; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rdx
+; SSE-NEXT:    ucomisd %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm3
+; SSE-NEXT:    subsd %xmm1, %xmm3
+; SSE-NEXT:    cvttsd2si %xmm3, %rax
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttsd2si %xmm0, %rdx
+; SSE-NEXT:    ucomisd %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    ucomisd %xmm1, %xmm0
+; SSE-NEXT:    cmovbq %rax, %rcx
+; SSE-NEXT:    movd %rcx, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf64:
+; AVX-LABEL: fptoui_4f64_to_2i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; AVX-NEXT:    vcvttsd2si %xmm1, %rax
+; AVX-NEXT:    vcvttsd2si %xmm0, %rcx
+; AVX-NEXT:    vmovd %ecx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vcvttsd2si %xmm0, %rax
+; AVX-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0
+; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %cvt = fptoui <4 x double> %ext to <4 x i32>
+  ret <4 x i32> %cvt
+}
+
+define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movapd %xmm0, %xmm2
+; SSE-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-NEXT:    subsd %xmm3, %xmm0
+; SSE-NEXT:    cvttsd2si %xmm0, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm2, %rdx
+; SSE-NEXT:    ucomisd %xmm3, %xmm2
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm0
+; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE-NEXT:    movapd %xmm2, %xmm4
+; SSE-NEXT:    subsd %xmm3, %xmm4
+; SSE-NEXT:    cvttsd2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm2, %rdx
+; SSE-NEXT:    ucomisd %xmm3, %xmm2
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE-NEXT:    movapd %xmm1, %xmm2
+; SSE-NEXT:    subsd %xmm3, %xmm2
+; SSE-NEXT:    cvttsd2si %xmm2, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm1, %rdx
+; SSE-NEXT:    ucomisd %xmm3, %xmm1
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    movapd %xmm1, %xmm4
+; SSE-NEXT:    subsd %xmm3, %xmm4
+; SSE-NEXT:    cvttsd2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm1, %rax
+; SSE-NEXT:    ucomisd %xmm3, %xmm1
+; SSE-NEXT:    cmovaeq %rcx, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptoui_4f64_to_4i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
@@ -326,53 +403,53 @@ define <4 x i64> @fptoui_4vf64(<4 x double> %a) {
   ret <4 x i64> %cvt
 }
 
-define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT:    movapd %xmm1, %xmm3
-; SSE2-NEXT:    subsd %xmm2, %xmm3
-; SSE2-NEXT:    cvttsd2si %xmm3, %rcx
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm1, %rdx
-; SSE2-NEXT:    ucomisd %xmm2, %xmm1
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm3
-; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT:    movapd %xmm1, %xmm4
-; SSE2-NEXT:    subsd %xmm2, %xmm4
-; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm1, %rdx
-; SSE2-NEXT:    ucomisd %xmm2, %xmm1
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
-; SSE2-NEXT:    movapd %xmm0, %xmm3
-; SSE2-NEXT:    subsd %xmm2, %xmm3
-; SSE2-NEXT:    cvttsd2si %xmm3, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm0, %rdx
-; SSE2-NEXT:    ucomisd %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm3
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    movapd %xmm0, %xmm4
-; SSE2-NEXT:    subsd %xmm2, %xmm4
-; SSE2-NEXT:    cvttsd2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttsd2si %xmm0, %rax
-; SSE2-NEXT:    ucomisd %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rcx, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT:    movapd %xmm1, %xmm3
+; SSE-NEXT:    subsd %xmm2, %xmm3
+; SSE-NEXT:    cvttsd2si %xmm3, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm1, %rdx
+; SSE-NEXT:    ucomisd %xmm2, %xmm1
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    movapd %xmm1, %xmm4
+; SSE-NEXT:    subsd %xmm2, %xmm4
+; SSE-NEXT:    cvttsd2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm1, %rdx
+; SSE-NEXT:    ucomisd %xmm2, %xmm1
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm1[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE-NEXT:    movapd %xmm0, %xmm3
+; SSE-NEXT:    subsd %xmm2, %xmm3
+; SSE-NEXT:    cvttsd2si %xmm3, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm0, %rdx
+; SSE-NEXT:    ucomisd %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm4
+; SSE-NEXT:    subsd %xmm2, %xmm4
+; SSE-NEXT:    cvttsd2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    ucomisd %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf64_i32:
+; AVX-LABEL: fptoui_4f64_to_4i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
 ; AVX-NEXT:    vcvttsd2si %xmm1, %rax
@@ -395,13 +472,13 @@ define <4 x i32> @fptoui_4vf64_i32(<4 x double> %a) {
 ; Float to Signed Integer
 ;
 
-define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
-; SSE2-LABEL: fptosi_4vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf32:
+; AVX-LABEL: fptosi_4f32_to_4i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttps2dq %xmm0, %xmm0
 ; AVX-NEXT:    retq
@@ -409,19 +486,19 @@ define <4 x i32> @fptosi_4vf32(<4 x float> %a) {
   ret <4 x i32> %cvt
 }
 
-define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
-; SSE2-LABEL: fptosi_4vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptosi_2f32_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf32_i64:
+; AVX-LABEL: fptosi_2f32_to_2i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttss2si %xmm0, %rax
 ; AVX-NEXT:    vmovq %rax, %xmm1
@@ -430,19 +507,45 @@ define <2 x i64> @fptosi_4vf32_i64(<4 x float> %a) {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
 ; AVX-NEXT:    retq
-  %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   %cvt = fptosi <2 x float> %shuf to <2 x i64>
   ret <2 x i64> %cvt
 }
 
-define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
-; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
-; SSE2-NEXT:    retq
+define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_8vf32:
+; AVX-LABEL: fptosi_4f32_to_2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT:    vcvttss2si %xmm1, %rax
+; AVX-NEXT:    vcvttss2si %xmm0, %rcx
+; AVX-NEXT:    vmovq %rcx, %xmm0
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %cvt = fptosi <4 x float> %a to <4 x i64>
+  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i64> %shuf
+}
+
+define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_8i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttps2dq %xmm0, %xmm0
+; SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptosi_8f32_to_8i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvttps2dq %ymm0, %ymm0
 ; AVX-NEXT:    retq
@@ -450,28 +553,28 @@ define <8 x i32> @fptosi_8vf32(<8 x float> %a) {
   ret <8 x i32> %cvt
 }
 
-define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    movaps %xmm0, %xmm1
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
-; SSE2-NEXT:    movaps %xmm0, %xmm1
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movd %rax, %xmm3
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptosi_4f32_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm2
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_8vf32_i64:
+; AVX-LABEL: fptosi_4f32_to_4i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
@@ -488,38 +591,81 @@ define <4 x i64> @fptosi_8vf32_i64(<8 x float> %a) {
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX-NEXT:    retq
-  %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %cvt = fptosi <4 x float> %shuf to <4 x i64>
   ret <4 x i64> %cvt
 }
 
+define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm2
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %rax, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptosi_8f32_to_4i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
+; AVX-NEXT:    vcvttss2si %xmm1, %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
+; AVX-NEXT:    vcvttss2si %xmm2, %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX-NEXT:    vcvttss2si %xmm0, %rax
+; AVX-NEXT:    vmovq %rax, %xmm2
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    vcvttss2si %xmm0, %rax
+; AVX-NEXT:    vmovq %rax, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %cvt = fptosi <8 x float> %a to <8 x i64>
+  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %shuf
+}
+
 ;
 ; Float to Unsigned Integer
 ;
 
-define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps %xmm0, %xmm1
-; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm2, %rax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm2, %rax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf32:
+; AVX-LABEL: fptoui_4f32_to_4i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
 ; AVX-NEXT:    vcvttss2si %xmm1, %rax
@@ -537,33 +683,33 @@ define <4 x i32> @fptoui_4vf32(<4 x float> %a) {
   ret <4 x i32> %cvt
 }
 
-define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movaps %xmm0, %xmm1
-; SSE2-NEXT:    subss %xmm2, %xmm1
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttss2si %xmm0, %rdx
-; SSE2-NEXT:    ucomiss %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm1
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    subss %xmm2, %xmm3
-; SSE2-NEXT:    cvttss2si %xmm3, %rax
-; SSE2-NEXT:    xorq %rcx, %rax
-; SSE2-NEXT:    cvttss2si %xmm0, %rcx
-; SSE2-NEXT:    ucomiss %xmm2, %xmm0
-; SSE2-NEXT:    cmovaeq %rax, %rcx
-; SSE2-NEXT:    movd %rcx, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptoui_2f32_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    subss %xmm2, %xmm1
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttss2si %xmm0, %rdx
+; SSE-NEXT:    ucomiss %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    subss %xmm2, %xmm3
+; SSE-NEXT:    cvttss2si %xmm3, %rax
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttss2si %xmm0, %rcx
+; SSE-NEXT:    ucomiss %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rcx
+; SSE-NEXT:    movd %rcx, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf32_i64:
+; AVX-LABEL: fptoui_2f32_to_2i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm2
@@ -584,50 +730,102 @@ define <2 x i64> @fptoui_4vf32_i64(<4 x float> %a) {
 ; AVX-NEXT:    vmovq %rcx, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
 ; AVX-NEXT:    retq
-  %shuf = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
+  %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1>
   %cvt = fptoui <2 x float> %shuf to <2 x i64>
   ret <2 x i64> %cvt
 }
 
-define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movaps %xmm2, %xmm3
-; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm3, %rax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
-; SSE2-NEXT:    cvttss2si %xmm2, %rax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
-; SSE2-NEXT:    cvttss2si %xmm2, %rax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT:    movaps %xmm1, %xmm2
-; SSE2-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm2, %rax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    movaps %xmm1, %xmm3
-; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT:    cvttss2si %xmm3, %rax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
-; SSE2-NEXT:    cvttss2si %xmm1, %rax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    movdqa %xmm2, %xmm1
-; SSE2-NEXT:    retq
+define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm1
+; SSE-NEXT:    subss %xmm2, %xmm1
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttss2si %xmm0, %rdx
+; SSE-NEXT:    ucomiss %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rdx
+; SSE-NEXT:    movd %rdx, %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    subss %xmm2, %xmm3
+; SSE-NEXT:    cvttss2si %xmm3, %rax
+; SSE-NEXT:    xorq %rcx, %rax
+; SSE-NEXT:    cvttss2si %xmm0, %rcx
+; SSE-NEXT:    ucomiss %xmm2, %xmm0
+; SSE-NEXT:    cmovaeq %rax, %rcx
+; SSE-NEXT:    movd %rcx, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_8vf32:
+; AVX-LABEL: fptoui_4f32_to_2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm2, %xmm1, %xmm3
+; AVX-NEXT:    vcvttss2si %xmm3, %rax
+; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm1, %rdx
+; AVX-NEXT:    vucomiss %xmm2, %xmm1
+; AVX-NEXT:    cmovaeq %rax, %rdx
+; AVX-NEXT:    vsubss %xmm2, %xmm0, %xmm1
+; AVX-NEXT:    vcvttss2si %xmm1, %rax
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm0, %rcx
+; AVX-NEXT:    vucomiss %xmm2, %xmm0
+; AVX-NEXT:    cmovaeq %rax, %rcx
+; AVX-NEXT:    vmovq %rcx, %xmm0
+; AVX-NEXT:    vmovq %rdx, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX-NEXT:    retq
+  %cvt = fptoui <4 x float> %a to <4 x i64>
+  %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x i64> %shuf
+}
+
+define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_8i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    movaps %xmm2, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm3, %rax
+; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
+; SSE-NEXT:    cvttss2si %xmm2, %rax
+; SSE-NEXT:    movd %eax, %xmm0
+; SSE-NEXT:    shufpd {{.*#+}} xmm2 = xmm2[1,0]
+; SSE-NEXT:    cvttss2si %xmm2, %rax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm2, %rax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    cvttss2si %xmm3, %rax
+; SSE-NEXT:    movd %eax, %xmm3
+; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %eax, %xmm2
+; SSE-NEXT:    shufpd {{.*#+}} xmm1 = xmm1[1,0]
+; SSE-NEXT:    cvttss2si %xmm1, %rax
+; SSE-NEXT:    movd %eax, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    movdqa %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptoui_8f32_to_8i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
@@ -658,54 +856,54 @@ define <8 x i32> @fptoui_8vf32(<8 x float> %a) {
   ret <8 x i32> %cvt
 }
 
-define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movaps %xmm0, %xmm2
-; SSE2-NEXT:    subss %xmm1, %xmm2
-; SSE2-NEXT:    cvttss2si %xmm2, %rcx
-; SSE2-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttss2si %xmm0, %rdx
-; SSE2-NEXT:    ucomiss %xmm1, %xmm0
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm2
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
-; SSE2-NEXT:    movaps %xmm3, %xmm4
-; SSE2-NEXT:    subss %xmm1, %xmm4
-; SSE2-NEXT:    cvttss2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttss2si %xmm3, %rdx
-; SSE2-NEXT:    ucomiss %xmm1, %xmm3
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm3
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE2-NEXT:    movaps %xmm0, %xmm3
-; SSE2-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
-; SSE2-NEXT:    movaps %xmm3, %xmm4
-; SSE2-NEXT:    subss %xmm1, %xmm4
-; SSE2-NEXT:    cvttss2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttss2si %xmm3, %rdx
-; SSE2-NEXT:    ucomiss %xmm1, %xmm3
-; SSE2-NEXT:    cmovaeq %rcx, %rdx
-; SSE2-NEXT:    movd %rdx, %xmm3
-; SSE2-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
-; SSE2-NEXT:    movapd %xmm0, %xmm4
-; SSE2-NEXT:    subss %xmm1, %xmm4
-; SSE2-NEXT:    cvttss2si %xmm4, %rcx
-; SSE2-NEXT:    xorq %rax, %rcx
-; SSE2-NEXT:    cvttss2si %xmm0, %rax
-; SSE2-NEXT:    ucomiss %xmm1, %xmm0
-; SSE2-NEXT:    cmovaeq %rcx, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    subss %xmm1, %xmm2
+; SSE-NEXT:    cvttss2si %xmm2, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm0, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    movaps %xmm3, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm3, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm3
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; SSE-NEXT:    movaps %xmm3, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm3, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm3
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    ucomiss %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_8vf32_i64:
+; AVX-LABEL: fptoui_4f32_to_4i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
 ; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
@@ -744,22 +942,113 @@ define <4 x i64> @fptoui_8vf32_i64(<8 x float> %a) {
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
 ; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX-NEXT:    retq
-  %shuf = shufflevector <8 x float> %a, <8 x float> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   %cvt = fptoui <4 x float> %shuf to <4 x i64>
   ret <4 x i64> %cvt
 }
 
+define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_4i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT:    movaps %xmm0, %xmm2
+; SSE-NEXT:    subss %xmm1, %xmm2
+; SSE-NEXT:    cvttss2si %xmm2, %rcx
+; SSE-NEXT:    movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm0, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm2
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[1,1,2,3]
+; SSE-NEXT:    movaps %xmm3, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm3, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm3
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE-NEXT:    movaps %xmm0, %xmm3
+; SSE-NEXT:    shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; SSE-NEXT:    movaps %xmm3, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm3, %rdx
+; SSE-NEXT:    ucomiss %xmm1, %xmm3
+; SSE-NEXT:    cmovaeq %rcx, %rdx
+; SSE-NEXT:    movd %rdx, %xmm3
+; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1,0]
+; SSE-NEXT:    movapd %xmm0, %xmm4
+; SSE-NEXT:    subss %xmm1, %xmm4
+; SSE-NEXT:    cvttss2si %xmm4, %rcx
+; SSE-NEXT:    xorq %rax, %rcx
+; SSE-NEXT:    cvttss2si %xmm0, %rax
+; SSE-NEXT:    ucomiss %xmm1, %xmm0
+; SSE-NEXT:    cmovaeq %rcx, %rax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-NEXT:    movdqa %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: fptoui_8f32_to_4i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm1, %xmm2, %xmm3
+; AVX-NEXT:    vcvttss2si %xmm3, %rax
+; AVX-NEXT:    movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm2, %rdx
+; AVX-NEXT:    vucomiss %xmm1, %xmm2
+; AVX-NEXT:    cmovaeq %rax, %rdx
+; AVX-NEXT:    vmovq %rdx, %xmm2
+; AVX-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
+; AVX-NEXT:    vsubss %xmm1, %xmm3, %xmm4
+; AVX-NEXT:    vcvttss2si %xmm4, %rax
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm3, %rdx
+; AVX-NEXT:    vucomiss %xmm1, %xmm3
+; AVX-NEXT:    cmovaeq %rax, %rdx
+; AVX-NEXT:    vmovq %rdx, %xmm3
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm3
+; AVX-NEXT:    vcvttss2si %xmm3, %rax
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm0, %rdx
+; AVX-NEXT:    vucomiss %xmm1, %xmm0
+; AVX-NEXT:    cmovaeq %rax, %rdx
+; AVX-NEXT:    vmovq %rdx, %xmm3
+; AVX-NEXT:    vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; AVX-NEXT:    vsubss %xmm1, %xmm0, %xmm4
+; AVX-NEXT:    vcvttss2si %xmm4, %rax
+; AVX-NEXT:    xorq %rcx, %rax
+; AVX-NEXT:    vcvttss2si %xmm0, %rcx
+; AVX-NEXT:    vucomiss %xmm1, %xmm0
+; AVX-NEXT:    cmovaeq %rax, %rcx
+; AVX-NEXT:    vmovq %rcx, %xmm0
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
+; AVX-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %cvt = fptoui <8 x float> %a to <8 x i64>
+  %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %shuf
+}
+
 ;
 ; Constant Folding
 ;
 
-define <2 x i64> @fptosi_2vf64c() {
-; SSE2-LABEL: fptosi_2vf64c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT:    retq
+define <2 x i64> @fptosi_2f64_to_2i64_const() {
+; SSE-LABEL: fptosi_2f64_to_2i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_2vf64c:
+; AVX-LABEL: fptosi_2f64_to_2i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,18446744073709551615]
 ; AVX-NEXT:    retq
@@ -767,13 +1056,13 @@ define <2 x i64> @fptosi_2vf64c() {
   ret <2 x i64> %cvt
 }
 
-define <4 x i32> @fptosi_2vf64c_i32() {
-; SSE2-LABEL: fptosi_2vf64c_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_2f64_to_2i32_const() {
+; SSE-LABEL: fptosi_2f64_to_2i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = <4294967295,1,u,u>
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_2vf64c_i32:
+; AVX-LABEL: fptosi_2f64_to_2i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <4294967295,1,u,u>
 ; AVX-NEXT:    retq
@@ -782,14 +1071,14 @@ define <4 x i32> @fptosi_2vf64c_i32() {
   ret <4 x i32> %ext
 }
 
-define <4 x i64> @fptosi_4vf64c() {
-; SSE2-LABEL: fptosi_4vf64c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
-; SSE2-NEXT:    retq
+define <4 x i64> @fptosi_4f64_to_4i64_const() {
+; SSE-LABEL: fptosi_4f64_to_4i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,18446744073709551613]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf64c:
+; AVX-LABEL: fptosi_4f64_to_4i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613]
 ; AVX-NEXT:    retq
@@ -797,13 +1086,13 @@ define <4 x i64> @fptosi_4vf64c() {
   ret <4 x i64> %cvt
 }
 
-define <4 x i32> @fptosi_4vf64c_i32() {
-; SSE2-LABEL: fptosi_4vf64c_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_4f64_to_4i32_const() {
+; SSE-LABEL: fptosi_4f64_to_4i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf64c_i32:
+; AVX-LABEL: fptosi_4f64_to_4i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3]
 ; AVX-NEXT:    retq
@@ -811,13 +1100,13 @@ define <4 x i32> @fptosi_4vf64c_i32() {
   ret <4 x i32> %cvt
 }
 
-define <2 x i64> @fptoui_2vf64c() {
-; SSE2-LABEL: fptoui_2vf64c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
-; SSE2-NEXT:    retq
+define <2 x i64> @fptoui_2f64_to_2i64_const() {
+; SSE-LABEL: fptoui_2f64_to_2i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_2vf64c:
+; AVX-LABEL: fptoui_2f64_to_2i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4]
 ; AVX-NEXT:    retq
@@ -825,13 +1114,13 @@ define <2 x i64> @fptoui_2vf64c() {
   ret <2 x i64> %cvt
 }
 
-define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
-; SSE2-LABEL: fptoui_2vf64c_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) {
+; SSE-LABEL: fptoui_2f64_to_2i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = <2,4,u,u>
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_2vf64c_i32:
+; AVX-LABEL: fptoui_2f64_to_2i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = <2,4,u,u>
 ; AVX-NEXT:    retq
@@ -840,14 +1129,14 @@ define <4 x i32> @fptoui_2vf64c_i32(<2 x double> %a) {
   ret <4 x i32> %ext
 }
 
-define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
-; SSE2-NEXT:    retq
+define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,8]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf64c:
+; AVX-LABEL: fptoui_4f64_to_4i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [2,4,6,8]
 ; AVX-NEXT:    retq
@@ -855,13 +1144,13 @@ define <4 x i64> @fptoui_4vf64c(<4 x double> %a) {
   ret <4 x i64> %cvt
 }
 
-define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
-; SSE2-LABEL: fptoui_4vf64c_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) {
+; SSE-LABEL: fptoui_4f64_to_4i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [2,4,6,8]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf64c_i32:
+; AVX-LABEL: fptoui_4f64_to_4i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [2,4,6,8]
 ; AVX-NEXT:    retq
@@ -869,13 +1158,13 @@ define <4 x i32> @fptoui_4vf64c_i32(<4 x double> %a) {
   ret <4 x i32> %cvt
 }
 
-define <4 x i32> @fptosi_4vf32c() {
-; SSE2-LABEL: fptosi_4vf32c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptosi_4f32_to_4i32_const() {
+; SSE-LABEL: fptosi_4f32_to_4i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf32c:
+; AVX-LABEL: fptosi_4f32_to_4i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3]
 ; AVX-NEXT:    retq
@@ -883,14 +1172,14 @@ define <4 x i32> @fptosi_4vf32c() {
   ret <4 x i32> %cvt
 }
 
-define <4 x i64> @fptosi_4vf32c_i64() {
-; SSE2-LABEL: fptosi_4vf32c_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
-; SSE2-NEXT:    retq
+define <4 x i64> @fptosi_4f32_to_4i64_const() {
+; SSE-LABEL: fptosi_4f32_to_4i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,18446744073709551615]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [2,3]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_4vf32c_i64:
+; AVX-LABEL: fptosi_4f32_to_4i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3]
 ; AVX-NEXT:    retq
@@ -898,14 +1187,14 @@ define <4 x i64> @fptosi_4vf32c_i64() {
   ret <4 x i64> %cvt
 }
 
-define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
-; SSE2-LABEL: fptosi_8vf32c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
-; SSE2-NEXT:    retq
+define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) {
+; SSE-LABEL: fptosi_8f32_to_8i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,4294967295,2,3]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptosi_8vf32c:
+; AVX-LABEL: fptosi_8f32_to_8i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295]
 ; AVX-NEXT:    retq
@@ -913,13 +1202,13 @@ define <8 x i32> @fptosi_8vf32c(<8 x float> %a) {
   ret <8 x i32> %cvt
 }
 
-define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
-; SSE2-LABEL: fptoui_4vf32c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
-; SSE2-NEXT:    retq
+define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) {
+; SSE-LABEL: fptoui_4f32_to_4i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf32c:
+; AVX-LABEL: fptoui_4f32_to_4i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,2,4,6]
 ; AVX-NEXT:    retq
@@ -927,14 +1216,14 @@ define <4 x i32> @fptoui_4vf32c(<4 x float> %a) {
   ret <4 x i32> %cvt
 }
 
-define <4 x i64> @fptoui_4vf32c_i64() {
-; SSE2-LABEL: fptoui_4vf32c_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
-; SSE2-NEXT:    retq
+define <4 x i64> @fptoui_4f32_to_4i64_const() {
+; SSE-LABEL: fptoui_4f32_to_4i64_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [4,8]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_4vf32c_i64:
+; AVX-LABEL: fptoui_4f32_to_4i64_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,8]
 ; AVX-NEXT:    retq
@@ -942,14 +1231,14 @@ define <4 x i64> @fptoui_4vf32c_i64() {
   ret <4 x i64> %cvt
 }
 
-define <8 x i32> @fptoui_8vf32c(<8 x float> %a) {
-; SSE2-LABEL: fptoui_8vf32c:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
-; SSE2-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
-; SSE2-NEXT:    retq
+define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) {
+; SSE-LABEL: fptoui_8f32_to_8i32_const:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,2,4,6]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [8,6,4,1]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: fptoui_8vf32c:
+; AVX-LABEL: fptoui_8f32_to_8i32_const:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1]
 ; AVX-NEXT:    retq
diff --git a/test/CodeGen/X86/vec_insert-5.ll b/test/CodeGen/X86/vec_insert-5.ll
index 4018a21090e7..14b57e76dc8f 100644
--- a/test/CodeGen/X86/vec_insert-5.ll
+++ b/test/CodeGen/X86/vec_insert-5.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=x86 -mattr=+sse2,+ssse3 | FileCheck %s
 ; There are no MMX operations in @t1
 
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index 4a3d08813904..fd98791815e7 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -11,20 +11,20 @@
 ; Signed Integer to Double
 ;
 
-define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
-; SSE2-LABEL: sitofp_2vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movapd %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
+; SSE-LABEL: sitofp_2i64_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_2vf64:
+; AVX-LABEL: sitofp_2i64_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX-NEXT:    vcvtsi2sdq %rax, %xmm0, %xmm1
@@ -37,13 +37,13 @@ define <2 x double> @sitofp_2vf64(<2 x i64> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_2vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @sitofp_2i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_2i32_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_2vf64_i32:
+; AVX-LABEL: sitofp_2i32_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
@@ -52,15 +52,31 @@ define <2 x double> @sitofp_2vf64_i32(<4 x i32> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_2vf64_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @sitofp_4i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_2vf64_i16:
+; AVX-LABEL: sitofp_4i32_to_2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
+  %cvt = sitofp <4 x i32> %a to <4 x double>
+  %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <2 x double> @sitofp_2i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_2i16_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_2i16_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
@@ -70,16 +86,42 @@ define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_2vf64_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @sitofp_8i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_2vf64_i8:
+; AVX1-LABEL: sitofp_8i16_to_2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_8i16_to_2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = sitofp <8 x i16> %a to <8 x double>
+  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <2 x double> @sitofp_2i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_2i8_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_2i8_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
@@ -89,28 +131,56 @@ define <2 x double> @sitofp_2vf64_i8(<16 x i8> %a) {
   ret <2 x double> %cvt
 }
 
-define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2sdq %rax, %xmm0
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT:    movapd %xmm2, %xmm0
-; SSE2-NEXT:    movapd %xmm3, %xmm1
-; SSE2-NEXT:    retq
+define <2 x double> @sitofp_16i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_4vf64:
+; AVX1-LABEL: sitofp_16i8_to_2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_16i8_to_2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = sitofp <16 x i8> %a to <16 x double>
+  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2sdq %rax, %xmm0
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm2, %xmm0
+; SSE-NEXT:    movapd %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: sitofp_4i64_to_4f64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
@@ -127,7 +197,7 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: sitofp_4vf64:
+; AVX2-LABEL: sitofp_4i64_to_4f64:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
@@ -147,16 +217,16 @@ define <4 x double> @sitofp_4vf64(<4 x i64> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_4vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x double> @sitofp_4i32_to_4f64(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf64_i32:
+; AVX-LABEL: sitofp_4i32_to_4f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
 ; AVX-NEXT:    retq
@@ -164,17 +234,17 @@ define <4 x double> @sitofp_4vf64_i32(<4 x i32> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_4vf64_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT:    retq
+define <4 x double> @sitofp_4i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_4i16_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $16, %xmm1
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf64_i16:
+; AVX-LABEL: sitofp_4i16_to_4f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
@@ -184,18 +254,44 @@ define <4 x double> @sitofp_4vf64_i16(<8 x i16> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_4vf64_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    psrad $24, %xmm1
-; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    cvtdq2pd %xmm1, %xmm1
-; SSE2-NEXT:    retq
+define <4 x double> @sitofp_8i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $16, %xmm1
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf64_i8:
+; AVX1-LABEL: sitofp_8i16_to_4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_8i16_to_4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = sitofp <8 x i16> %a to <8 x double>
+  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %shuf
+}
+
+define <4 x double> @sitofp_4i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_4i8_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $24, %xmm1
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_4i8_to_4f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
@@ -205,28 +301,56 @@ define <4 x double> @sitofp_4vf64_i8(<16 x i8> %a) {
   ret <4 x double> %cvt
 }
 
+define <4 x double> @sitofp_16i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $24, %xmm1
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm1, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: sitofp_16i8_to_4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_16i8_to_4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = sitofp <16 x i8> %a to <16 x double>
+  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %shuf
+}
+
 ;
 ; Unsigned Integer to Double
 ;
 
-define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
-; SSE2-LABEL: uitofp_2vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT:    subpd %xmm3, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm4, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    subpd %xmm3, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm2, %xmm1
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
+define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
+; SSE-LABEL: uitofp_2i64_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT:    subpd %xmm3, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    subpd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT:    addpd %xmm2, %xmm1
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_2vf64:
+; AVX-LABEL: uitofp_2i64_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
@@ -243,26 +367,26 @@ define <2 x double> @uitofp_2vf64(<2 x i64> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_2vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT:    subpd %xmm3, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm4, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    subpd %xmm3, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm2, %xmm1
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    retq
+define <2 x double> @uitofp_2i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_2i32_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT:    subpd %xmm3, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    subpd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT:    addpd %xmm2, %xmm1
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_2vf64_i32:
+; AVX-LABEL: uitofp_2i32_to_2f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
@@ -281,21 +405,64 @@ define <2 x double> @uitofp_2vf64_i32(<4 x i32> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_2vf64_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
-; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7]
-; SSE2-NEXT:    pand .LCPI10_0(%rip), %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [1127219200,1160773632,0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movapd {{.*#+}} xmm3 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT:    subpd %xmm3, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT:    addpd %xmm4, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE-NEXT:    subpd %xmm3, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT:    addpd %xmm2, %xmm1
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_2vf64_i16:
+; AVX1-LABEL: uitofp_4i32_to_2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vcvtdq2pd %xmm1, %ymm1
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_4i32_to_2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
+; AVX2-NEXT:    vcvtdq2pd %xmm1, %ymm1
+; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = uitofp <4 x i32> %a to <4 x double>
+  %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <2 x double> @uitofp_2i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_2i16_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uitofp_2i16_to_2f64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX-NEXT:    vpand .LCPI10_0(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
@@ -303,22 +470,44 @@ define <2 x double> @uitofp_2vf64_i16(<8 x i16> %a) {
   ret <2 x double> %cvt
 }
 
-define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_2vf64_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1]
-; SSE2-NEXT:    pand .LCPI11_0(%rip), %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <2 x double> @uitofp_8i16_to_2f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_2vf64_i8:
+; AVX1-LABEL: uitofp_8i16_to_2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_8i16_to_2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = uitofp <8 x i16> %a to <8 x double>
+  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <2 x double> @uitofp_2i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_2i8_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uitofp_2i8_to_2f64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX-NEXT:    vpand .LCPI11_0(%rip), %xmm0, %xmm0
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
@@ -326,34 +515,62 @@ define <2 x double> @uitofp_2vf64_i8(<16 x i8> %a) {
   ret <2 x double> %cvt
 }
 
-define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
-; SSE2-NEXT:    movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT:    subpd %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm5, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm3, %xmm5
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm5, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm3, %xmm2
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT:    retq
+define <2 x double> @uitofp_16i8_to_2f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_2f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_4vf64:
+; AVX1-LABEL: uitofp_16i8_to_2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_16i8_to_2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = uitofp <16 x i8> %a to <16 x double>
+  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %shuf
+}
+
+define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT:    subpd %xmm4, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
+; SSE-NEXT:    addpd %xmm5, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT:    subpd %xmm4, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[2,3,0,1]
+; SSE-NEXT:    addpd %xmm3, %xmm5
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    subpd %xmm4, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; SSE-NEXT:    addpd %xmm5, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE-NEXT:    subpd %xmm4, %xmm3
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[2,3,0,1]
+; SSE-NEXT:    addpd %xmm3, %xmm2
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uitofp_4i64_to_4f64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
@@ -377,7 +594,7 @@ define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_4vf64:
+; AVX2-LABEL: uitofp_4i64_to_4f64:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
 ; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [1127219200,1160773632,0,0]
@@ -404,54 +621,54 @@ define <4 x double> @uitofp_4vf64(<4 x i64> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_4vf64_i32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[2,2,3,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE2-NEXT:    movapd {{.*#+}} xmm4 = [4.503600e+15,1.934281e+25]
-; SSE2-NEXT:    subpd %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm0[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm5, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm1, %xmm5
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0]
-; SSE2-NEXT:    pand .LCPI13_2(%rip), %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm2, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
-; SSE2-NEXT:    subpd %xmm4, %xmm5
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm5[2,3,0,1]
-; SSE2-NEXT:    addpd %xmm5, %xmm2
-; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT:    retq
+define <4 x double> @uitofp_4i32_to_4f64(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    movdqa {{.*#+}} xmm3 = [1127219200,1160773632,0,0]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE-NEXT:    movapd {{.*#+}} xmm5 = [4.503600e+15,1.934281e+25]
+; SSE-NEXT:    subpd %xmm5, %xmm0
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[2,3,0,1]
+; SSE-NEXT:    addpd %xmm6, %xmm0
+; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; SSE-NEXT:    subpd %xmm5, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm4[2,3,0,1]
+; SSE-NEXT:    addpd %xmm4, %xmm6
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm6[0]
+; SSE-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm2[2,3,0,1]
+; SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    subpd %xmm5, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,3,0,1]
+; SSE-NEXT:    addpd %xmm2, %xmm1
+; SSE-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; SSE-NEXT:    subpd %xmm5, %xmm4
+; SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; SSE-NEXT:    addpd %xmm4, %xmm2
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_4vf64_i32:
+; AVX1-LABEL: uitofp_4i32_to_4f64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vpand .LCPI13_0(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm1
 ; AVX1-NEXT:    vcvtdq2pd %xmm1, %ymm1
 ; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
-; AVX1-NEXT:    vmulpd .LCPI13_1(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vmulpd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX1-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_4vf64_i32:
+; AVX2-LABEL: uitofp_4i32_to_4f64:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
 ; AVX2-NEXT:    vcvtdq2pd %xmm1, %ymm1
-; AVX2-NEXT:    vbroadcastsd .LCPI13_0(%rip), %ymm2
+; AVX2-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
 ; AVX2-NEXT:    vmulpd %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpbroadcastd .LCPI13_1(%rip), %xmm2
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
 ; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
 ; AVX2-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
@@ -460,18 +677,18 @@ define <4 x double> @uitofp_4vf64_i32(<4 x i32> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_4vf64_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x double> @uitofp_4i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_4i16_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_4vf64_i16:
+; AVX-LABEL: uitofp_4i16_to_4f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
@@ -481,19 +698,46 @@ define <4 x double> @uitofp_4vf64_i16(<8 x i16> %a) {
   ret <4 x double> %cvt
 }
 
-define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_4vf64_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    cvtdq2pd %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x double> @uitofp_8i16_to_4f64(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_4vf64_i8:
+; AVX1-LABEL: uitofp_8i16_to_4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_8i16_to_4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = uitofp <8 x i16> %a to <8 x double>
+  %shuf = shufflevector <8 x double> %cvt, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %shuf
+}
+
+define <4 x double> @uitofp_4i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_4i8_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uitofp_4i8_to_4f64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0
@@ -503,38 +747,53 @@ define <4 x double> @uitofp_4vf64_i8(<16 x i8> %a) {
   ret <4 x double> %cvt
 }
 
+define <4 x double> @uitofp_16i8_to_4f64(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_4f64:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    cvtdq2pd %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uitofp_16i8_to_4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_16i8_to_4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2pd %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = uitofp <16 x i8> %a to <16 x double>
+  %shuf = shufflevector <16 x double> %cvt, <16 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x double> %shuf
+}
+
 ;
 ; Signed Integer to Float
 ;
 
-define <4 x float> @sitofp_4vf32(<4 x i32> %a) {
-; SSE2-LABEL: sitofp_4vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
+; SSE-LABEL: sitofp_2i64_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
-; AVX-NEXT:    retq
-  %cvt = sitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %cvt
-}
-
-define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    movaps %xmm1, %xmm0
-; SSE2-NEXT:    retq
-;
-; AVX-LABEL: sitofp_4vf32_i64:
+; AVX-LABEL: sitofp_2i64_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -551,15 +810,62 @@ define <4 x float> @sitofp_4vf32_i64(<2 x i64> %a) {
   ret <4 x float> %ext
 }
 
-define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_4vf32_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f32_undef:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf32_i16:
+; AVX-LABEL: sitofp_4i64_to_4f32_undef:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT:    vmovq %xmm0, %rax
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT:    retq
+  %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %cvt = sitofp <4 x i64> %ext to <4 x float>
+  ret <4 x float> %cvt
+}
+
+define <4 x float> @sitofp_4i32_to_4f32(<4 x i32> %a) {
+; SSE-LABEL: sitofp_4i32_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_4i32_to_4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %cvt = sitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %cvt
+}
+
+define <4 x float> @sitofp_4i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_4i16_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_4i16_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
@@ -569,16 +875,45 @@ define <4 x float> @sitofp_4vf32_i16(<8 x i16> %a) {
   ret <4 x float> %cvt
 }
 
-define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_4vf32_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @sitofp_8i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_4vf32_i8:
+; AVX1-LABEL: sitofp_8i16_to_4f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_8i16_to_4f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = sitofp <8 x i16> %a to <8 x float>
+  %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuf
+}
+
+define <4 x float> @sitofp_4i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_4i8_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: sitofp_4i8_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
@@ -588,43 +923,59 @@ define <4 x float> @sitofp_4vf32_i8(<16 x i8> %a) {
   ret <4 x float> %cvt
 }
 
-define <8 x float> @sitofp_8vf32(<8 x i32> %a) {
-; SSE2-LABEL: sitofp_8vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
-; SSE2-NEXT:    retq
+define <4 x float> @sitofp_16i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: sitofp_8vf32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
-; AVX-NEXT:    retq
-  %cvt = sitofp <8 x i32> %a to <8 x float>
-  ret <8 x float> %cvt
+; AVX1-LABEL: sitofp_16i8_to_4f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_16i8_to_4f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = sitofp <16 x i8> %a to <16 x float>
+  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuf
 }
 
-define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
-; SSE2-LABEL: sitofp_4vf32_4i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
+; SSE-LABEL: sitofp_4i64_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_4vf32_4i64:
+; AVX1-LABEL: sitofp_4i64_to_4f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -642,7 +993,7 @@ define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: sitofp_4vf32_4i64:
+; AVX2-LABEL: sitofp_4i64_to_4f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
@@ -663,20 +1014,34 @@ define <4 x float> @sitofp_4vf32_4i64(<4 x i64> %a) {
   ret <4 x float> %cvt
 }
 
-define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: sitofp_8vf32_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <8 x float> @sitofp_8i32_to_8f32(<8 x i32> %a) {
+; SSE-LABEL: sitofp_8i32_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_8vf32_i16:
+; AVX-LABEL: sitofp_8i32_to_8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT:    retq
+  %cvt = sitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %cvt
+}
+
+define <8 x float> @sitofp_8i16_to_8f32(<8 x i16> %a) {
+; SSE-LABEL: sitofp_8i16_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $16, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: sitofp_8i16_to_8f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
@@ -685,7 +1050,7 @@ define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: sitofp_8vf32_i16:
+; AVX2-LABEL: sitofp_8i16_to_8f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
@@ -694,22 +1059,22 @@ define <8 x float> @sitofp_8vf32_i16(<8 x i16> %a) {
   ret <8 x float> %cvt
 }
 
-define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: sitofp_8vf32_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm1
-; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <8 x float> @sitofp_8i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_8i8_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: sitofp_8vf32_i8:
+; AVX1-LABEL: sitofp_8i8_to_8f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
@@ -718,9 +1083,9 @@ define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: sitofp_8vf32_i8:
+; AVX2-LABEL: sitofp_8i8_to_8f32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpmovzxbd %xmm0, %ymm0
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 ; AVX2-NEXT:    vpslld $24, %ymm0, %ymm0
 ; AVX2-NEXT:    vpsrad $24, %ymm0, %ymm0
 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
@@ -730,125 +1095,124 @@ define <8 x float> @sitofp_8vf32_i8(<16 x i8> %a) {
   ret <8 x float> %cvt
 }
 
+define <8 x float> @sitofp_16i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: sitofp_16i8_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm2
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE-NEXT:    psrad $24, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: sitofp_16i8_to_8f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sitofp_16i8_to_8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = sitofp <16 x i8> %a to <16 x float>
+  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuf
+}
+
 ;
 ; Unsigned Integer to Float
 ;
 
-define <4 x float> @uitofp_4vf32(<4 x i32> %a) {
-; SSE2-LABEL: uitofp_4vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    por .LCPI24_1(%rip), %xmm1
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    por .LCPI24_2(%rip), %xmm0
-; SSE2-NEXT:    addps .LCPI24_3(%rip), %xmm0
-; SSE2-NEXT:    addps %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
+; SSE-LABEL: uitofp_2i64_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB38_1
+; SSE-NEXT:  # BB#2:
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    jmp .LBB38_3
+; SSE-NEXT:  .LBB38_1:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:  .LBB38_3:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB38_4
+; SSE-NEXT:  # BB#5:
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
+; SSE-NEXT:  .LBB38_4:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT:    addss %xmm1, %xmm1
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_4vf32:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
-; AVX1-NEXT:    vaddps .LCPI24_2(%rip), %xmm0, %xmm0
-; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: uitofp_4vf32:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vpbroadcastd .LCPI24_0(%rip), %xmm1
-; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX2-NEXT:    vpbroadcastd .LCPI24_1(%rip), %xmm2
-; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
-; AVX2-NEXT:    vbroadcastss .LCPI24_2(%rip), %xmm2
-; AVX2-NEXT:    vaddps %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
-; AVX2-NEXT:    retq
-  %cvt = uitofp <4 x i32> %a to <4 x float>
-  ret <4 x float> %cvt
-}
-
-define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf32_i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB25_1
-; SSE2-NEXT:  # BB#2:
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB25_3
-; SSE2-NEXT:  .LBB25_1:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm0
-; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB25_3:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB25_4
-; SSE2-NEXT:  # BB#5:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    retq
-; SSE2-NEXT:  .LBB25_4:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm1
-; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    retq
-;
-; AVX-LABEL: uitofp_4vf32_i64:
+; AVX-LABEL: uitofp_2i64_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX-NEXT:    movl %eax, %ecx
 ; AVX-NEXT:    andl $1, %ecx
 ; AVX-NEXT:    testq %rax, %rax
-; AVX-NEXT:    js .LBB25_1
+; AVX-NEXT:    js .LBB38_1
 ; AVX-NEXT:  # BB#2:
 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX-NEXT:    jmp .LBB25_3
-; AVX-NEXT:  .LBB25_1:
+; AVX-NEXT:    jmp .LBB38_3
+; AVX-NEXT:  .LBB38_1:
 ; AVX-NEXT:    shrq %rax
 ; AVX-NEXT:    orq %rax, %rcx
 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
 ; AVX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; AVX-NEXT:  .LBB25_3:
+; AVX-NEXT:  .LBB38_3:
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    movl %eax, %ecx
 ; AVX-NEXT:    andl $1, %ecx
 ; AVX-NEXT:    testq %rax, %rax
-; AVX-NEXT:    js .LBB25_4
+; AVX-NEXT:    js .LBB38_4
 ; AVX-NEXT:  # BB#5:
 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
-; AVX-NEXT:    jmp .LBB25_6
-; AVX-NEXT:  .LBB25_4:
+; AVX-NEXT:    jmp .LBB38_6
+; AVX-NEXT:  .LBB38_4:
 ; AVX-NEXT:    shrq %rax
 ; AVX-NEXT:    orq %rax, %rcx
 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
 ; AVX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
-; AVX-NEXT:  .LBB25_6:
+; AVX-NEXT:  .LBB38_6:
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
 ; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    testq %rax, %rax
-; AVX-NEXT:    js .LBB25_8
+; AVX-NEXT:    js .LBB38_8
 ; AVX-NEXT:  # BB#7:
 ; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX-NEXT:  .LBB25_8:
+; AVX-NEXT:  .LBB38_8:
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
 ; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
 ; AVX-NEXT:    retq
@@ -857,15 +1221,147 @@ define <4 x float> @uitofp_4vf32_i64(<2 x i64> %a) {
   ret <4 x float> %ext
 }
 
-define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_4vf32_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f32_undef:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    js .LBB39_2
+; SSE-NEXT:  # BB#1:
+; SSE-NEXT:    xorps %xmm2, %xmm2
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:  .LBB39_2:
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB39_3
+; SSE-NEXT:  # BB#4:
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    jmp .LBB39_5
+; SSE-NEXT:  .LBB39_3:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:  .LBB39_5:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB39_6
+; SSE-NEXT:  # BB#7:
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    jmp .LBB39_8
+; SSE-NEXT:  .LBB39_6:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT:    addss %xmm1, %xmm1
+; SSE-NEXT:  .LBB39_8:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_4vf32_i16:
+; AVX-LABEL: uitofp_4i64_to_4f32_undef:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    andl $1, %ecx
+; AVX-NEXT:    testq %rax, %rax
+; AVX-NEXT:    js .LBB39_1
+; AVX-NEXT:  # BB#2:
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT:    jmp .LBB39_3
+; AVX-NEXT:  .LBB39_1:
+; AVX-NEXT:    shrq %rax
+; AVX-NEXT:    orq %rax, %rcx
+; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
+; AVX-NEXT:    vaddss %xmm1, %xmm1, %xmm1
+; AVX-NEXT:  .LBB39_3:
+; AVX-NEXT:    vmovq %xmm0, %rax
+; AVX-NEXT:    movl %eax, %ecx
+; AVX-NEXT:    andl $1, %ecx
+; AVX-NEXT:    testq %rax, %rax
+; AVX-NEXT:    js .LBB39_4
+; AVX-NEXT:  # BB#5:
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
+; AVX-NEXT:    jmp .LBB39_6
+; AVX-NEXT:  .LBB39_4:
+; AVX-NEXT:    shrq %rax
+; AVX-NEXT:    orq %rax, %rcx
+; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
+; AVX-NEXT:    vaddss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:  .LBB39_6:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    testq %rax, %rax
+; AVX-NEXT:    js .LBB39_8
+; AVX-NEXT:  # BB#7:
+; AVX-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
+; AVX-NEXT:  .LBB39_8:
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
+; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
+; AVX-NEXT:    retq
+  %ext = shufflevector <2 x i64> %a, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %cvt = uitofp <4 x i64> %ext to <4 x float>
+  ret <4 x float> %cvt
+}
+
+define <4 x float> @uitofp_4i32_to_4f32(<4 x i32> %a) {
+; SSE-LABEL: uitofp_4i32_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    por {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    por {{.*}}(%rip), %xmm0
+; SSE-NEXT:    addps {{.*}}(%rip), %xmm0
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uitofp_4i32_to_4f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7]
+; AVX1-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_4i32_to_4f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
+; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
+; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vaddps %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+  %cvt = uitofp <4 x i32> %a to <4 x float>
+  ret <4 x float> %cvt
+}
+
+define <4 x float> @uitofp_4i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_4i16_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uitofp_4i16_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
@@ -875,16 +1371,45 @@ define <4 x float> @uitofp_4vf32_i16(<8 x i16> %a) {
   ret <4 x float> %cvt
 }
 
-define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_4vf32_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @uitofp_8i16_to_4f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX-LABEL: uitofp_4vf32_i8:
+; AVX1-LABEL: uitofp_8i16_to_4f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_8i16_to_4f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %cvt = uitofp <8 x i16> %a to <8 x float>
+  %shuf = shufflevector <8 x float> %cvt, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuf
+}
+
+define <4 x float> @uitofp_4i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_4i8_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: uitofp_4i8_to_4f32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
@@ -894,186 +1419,168 @@ define <4 x float> @uitofp_4vf32_i8(<16 x i8> %a) {
   ret <4 x float> %cvt
 }
 
-define <8 x float> @uitofp_8vf32(<8 x i32> %a) {
-; SSE2-LABEL: uitofp_8vf32:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
-; SSE2-NEXT:    por %xmm4, %xmm3
-; SSE2-NEXT:    psrld $16, %xmm0
-; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
-; SSE2-NEXT:    por %xmm5, %xmm0
-; SSE2-NEXT:    movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
-; SSE2-NEXT:    addps %xmm6, %xmm0
-; SSE2-NEXT:    addps %xmm3, %xmm0
-; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    por %xmm4, %xmm2
-; SSE2-NEXT:    psrld $16, %xmm1
-; SSE2-NEXT:    por %xmm5, %xmm1
-; SSE2-NEXT:    addps %xmm6, %xmm1
-; SSE2-NEXT:    addps %xmm2, %xmm1
-; SSE2-NEXT:    retq
+define <4 x float> @uitofp_16i8_to_4f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_8vf32:
+; AVX1-LABEL: uitofp_16i8_to_4f32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vandps .LCPI28_0(%rip), %ymm0, %ymm1
-; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
-; AVX1-NEXT:    vmulps .LCPI28_1(%rip), %ymm0, %ymm0
-; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_8vf32:
+; AVX2-LABEL: uitofp_16i8_to_4f32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpbroadcastd .LCPI28_0(%rip), %ymm1
-; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
-; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
-; AVX2-NEXT:    vpbroadcastd .LCPI28_1(%rip), %ymm2
-; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
-; AVX2-NEXT:    vbroadcastss .LCPI28_2(%rip), %ymm2
-; AVX2-NEXT:    vaddps %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
-  %cvt = uitofp <8 x i32> %a to <8 x float>
-  ret <8 x float> %cvt
+  %cvt = uitofp <16 x i8> %a to <16 x float>
+  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %shuf
 }
 
-define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
-; SSE2-LABEL: uitofp_4vf32_4i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB29_1
-; SSE2-NEXT:  # BB#2:
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm3
-; SSE2-NEXT:    jmp .LBB29_3
-; SSE2-NEXT:  .LBB29_1:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm3
-; SSE2-NEXT:    addss %xmm3, %xmm3
-; SSE2-NEXT:  .LBB29_3:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB29_4
-; SSE2-NEXT:  # BB#5:
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm2
-; SSE2-NEXT:    jmp .LBB29_6
-; SSE2-NEXT:  .LBB29_4:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm2
-; SSE2-NEXT:    addss %xmm2, %xmm2
-; SSE2-NEXT:  .LBB29_6:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm1, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB29_7
-; SSE2-NEXT:  # BB#8:
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm1
-; SSE2-NEXT:    jmp .LBB29_9
-; SSE2-NEXT:  .LBB29_7:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    xorps %xmm1, %xmm1
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm1
-; SSE2-NEXT:    addss %xmm1, %xmm1
-; SSE2-NEXT:  .LBB29_9:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    movl %eax, %ecx
-; SSE2-NEXT:    andl $1, %ecx
-; SSE2-NEXT:    testq %rax, %rax
-; SSE2-NEXT:    js .LBB29_10
-; SSE2-NEXT:  # BB#11:
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rax, %xmm0
-; SSE2-NEXT:    jmp .LBB29_12
-; SSE2-NEXT:  .LBB29_10:
-; SSE2-NEXT:    shrq %rax
-; SSE2-NEXT:    orq %rax, %rcx
-; SSE2-NEXT:    xorps %xmm0, %xmm0
-; SSE2-NEXT:    cvtsi2ssq %rcx, %xmm0
-; SSE2-NEXT:    addss %xmm0, %xmm0
-; SSE2-NEXT:  .LBB29_12:
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
+; SSE-LABEL: uitofp_4i64_to_4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB45_1
+; SSE-NEXT:  # BB#2:
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm3
+; SSE-NEXT:    jmp .LBB45_3
+; SSE-NEXT:  .LBB45_1:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm3
+; SSE-NEXT:    addss %xmm3, %xmm3
+; SSE-NEXT:  .LBB45_3:
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB45_4
+; SSE-NEXT:  # BB#5:
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm2
+; SSE-NEXT:    jmp .LBB45_6
+; SSE-NEXT:  .LBB45_4:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm2
+; SSE-NEXT:    addss %xmm2, %xmm2
+; SSE-NEXT:  .LBB45_6:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE-NEXT:    movd %xmm1, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB45_7
+; SSE-NEXT:  # BB#8:
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm1
+; SSE-NEXT:    jmp .LBB45_9
+; SSE-NEXT:  .LBB45_7:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm1, %xmm1
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm1
+; SSE-NEXT:    addss %xmm1, %xmm1
+; SSE-NEXT:  .LBB45_9:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE-NEXT:    movd %xmm0, %rax
+; SSE-NEXT:    movl %eax, %ecx
+; SSE-NEXT:    andl $1, %ecx
+; SSE-NEXT:    testq %rax, %rax
+; SSE-NEXT:    js .LBB45_10
+; SSE-NEXT:  # BB#11:
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rax, %xmm0
+; SSE-NEXT:    jmp .LBB45_12
+; SSE-NEXT:  .LBB45_10:
+; SSE-NEXT:    shrq %rax
+; SSE-NEXT:    orq %rax, %rcx
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssq %rcx, %xmm0
+; SSE-NEXT:    addss %xmm0, %xmm0
+; SSE-NEXT:  .LBB45_12:
+; SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-NEXT:    unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_4vf32_4i64:
+; AVX1-LABEL: uitofp_4i64_to_4f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    movl %eax, %ecx
 ; AVX1-NEXT:    andl $1, %ecx
 ; AVX1-NEXT:    testq %rax, %rax
-; AVX1-NEXT:    js .LBB29_1
+; AVX1-NEXT:    js .LBB45_1
 ; AVX1-NEXT:  # BB#2:
 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX1-NEXT:    jmp .LBB29_3
-; AVX1-NEXT:  .LBB29_1:
+; AVX1-NEXT:    jmp .LBB45_3
+; AVX1-NEXT:  .LBB45_1:
 ; AVX1-NEXT:    shrq %rax
 ; AVX1-NEXT:    orq %rax, %rcx
 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
 ; AVX1-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:  .LBB29_3:
+; AVX1-NEXT:  .LBB45_3:
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    movl %eax, %ecx
 ; AVX1-NEXT:    andl $1, %ecx
 ; AVX1-NEXT:    testq %rax, %rax
-; AVX1-NEXT:    js .LBB29_4
+; AVX1-NEXT:    js .LBB45_4
 ; AVX1-NEXT:  # BB#5:
 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX1-NEXT:    jmp .LBB29_6
-; AVX1-NEXT:  .LBB29_4:
+; AVX1-NEXT:    jmp .LBB45_6
+; AVX1-NEXT:  .LBB45_4:
 ; AVX1-NEXT:    shrq %rax
 ; AVX1-NEXT:    orq %rax, %rcx
 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
 ; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:  .LBB29_6:
+; AVX1-NEXT:  .LBB45_6:
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
 ; AVX1-NEXT:    vmovq %xmm0, %rax
 ; AVX1-NEXT:    movl %eax, %ecx
 ; AVX1-NEXT:    andl $1, %ecx
 ; AVX1-NEXT:    testq %rax, %rax
-; AVX1-NEXT:    js .LBB29_7
+; AVX1-NEXT:    js .LBB45_7
 ; AVX1-NEXT:  # BB#8:
 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX1-NEXT:    jmp .LBB29_9
-; AVX1-NEXT:  .LBB29_7:
+; AVX1-NEXT:    jmp .LBB45_9
+; AVX1-NEXT:  .LBB45_7:
 ; AVX1-NEXT:    shrq %rax
 ; AVX1-NEXT:    orq %rax, %rcx
 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
 ; AVX1-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:  .LBB29_9:
+; AVX1-NEXT:  .LBB45_9:
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
 ; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX1-NEXT:    movl %eax, %ecx
 ; AVX1-NEXT:    andl $1, %ecx
 ; AVX1-NEXT:    testq %rax, %rax
-; AVX1-NEXT:    js .LBB29_10
+; AVX1-NEXT:    js .LBB45_10
 ; AVX1-NEXT:  # BB#11:
 ; AVX1-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
-; AVX1-NEXT:  .LBB29_10:
+; AVX1-NEXT:  .LBB45_10:
 ; AVX1-NEXT:    shrq %rax
 ; AVX1-NEXT:    orq %rax, %rcx
 ; AVX1-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
@@ -1082,65 +1589,65 @@ define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_4vf32_4i64:
+; AVX2-LABEL: uitofp_4i64_to_4f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    testq %rax, %rax
-; AVX2-NEXT:    js .LBB29_1
+; AVX2-NEXT:    js .LBB45_1
 ; AVX2-NEXT:  # BB#2:
 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm1
-; AVX2-NEXT:    jmp .LBB29_3
-; AVX2-NEXT:  .LBB29_1:
+; AVX2-NEXT:    jmp .LBB45_3
+; AVX2-NEXT:  .LBB45_1:
 ; AVX2-NEXT:    shrq %rax
 ; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm1
 ; AVX2-NEXT:    vaddss %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:  .LBB29_3:
+; AVX2-NEXT:  .LBB45_3:
 ; AVX2-NEXT:    vmovq %xmm0, %rax
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    testq %rax, %rax
-; AVX2-NEXT:    js .LBB29_4
+; AVX2-NEXT:    js .LBB45_4
 ; AVX2-NEXT:  # BB#5:
 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX2-NEXT:    jmp .LBB29_6
-; AVX2-NEXT:  .LBB29_4:
+; AVX2-NEXT:    jmp .LBB45_6
+; AVX2-NEXT:  .LBB45_4:
 ; AVX2-NEXT:    shrq %rax
 ; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
 ; AVX2-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:  .LBB29_6:
+; AVX2-NEXT:  .LBB45_6:
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
 ; AVX2-NEXT:    vmovq %xmm0, %rax
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    testq %rax, %rax
-; AVX2-NEXT:    js .LBB29_7
+; AVX2-NEXT:    js .LBB45_7
 ; AVX2-NEXT:  # BB#8:
 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm2
-; AVX2-NEXT:    jmp .LBB29_9
-; AVX2-NEXT:  .LBB29_7:
+; AVX2-NEXT:    jmp .LBB45_9
+; AVX2-NEXT:  .LBB45_7:
 ; AVX2-NEXT:    shrq %rax
 ; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm2
 ; AVX2-NEXT:    vaddss %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:  .LBB29_9:
+; AVX2-NEXT:  .LBB45_9:
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
 ; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
 ; AVX2-NEXT:    movl %eax, %ecx
 ; AVX2-NEXT:    andl $1, %ecx
 ; AVX2-NEXT:    testq %rax, %rax
-; AVX2-NEXT:    js .LBB29_10
+; AVX2-NEXT:    js .LBB45_10
 ; AVX2-NEXT:  # BB#11:
 ; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX2-NEXT:    vcvtsi2ssq %rax, %xmm0, %xmm0
 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
-; AVX2-NEXT:  .LBB29_10:
+; AVX2-NEXT:  .LBB45_10:
 ; AVX2-NEXT:    shrq %rax
 ; AVX2-NEXT:    orq %rax, %rcx
 ; AVX2-NEXT:    vcvtsi2ssq %rcx, %xmm0, %xmm0
@@ -1152,20 +1659,69 @@ define <4 x float> @uitofp_4vf32_4i64(<4 x i64> %a) {
   ret <4 x float> %cvt
 }
 
-define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
-; SSE2-LABEL: uitofp_8vf32_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pand .LCPI30_0(%rip), %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <8 x float> @uitofp_8i32_to_8f32(<8 x i32> %a) {
+; SSE-LABEL: uitofp_8i32_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [65535,65535,65535,65535]
+; SSE-NEXT:    movdqa %xmm0, %xmm3
+; SSE-NEXT:    pand %xmm2, %xmm3
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [1258291200,1258291200,1258291200,1258291200]
+; SSE-NEXT:    por %xmm4, %xmm3
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    movdqa {{.*#+}} xmm5 = [1392508928,1392508928,1392508928,1392508928]
+; SSE-NEXT:    por %xmm5, %xmm0
+; SSE-NEXT:    movaps {{.*#+}} xmm6 = [-5.497642e+11,-5.497642e+11,-5.497642e+11,-5.497642e+11]
+; SSE-NEXT:    addps %xmm6, %xmm0
+; SSE-NEXT:    addps %xmm3, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    por %xmm4, %xmm2
+; SSE-NEXT:    psrld $16, %xmm1
+; SSE-NEXT:    por %xmm5, %xmm1
+; SSE-NEXT:    addps %xmm6, %xmm1
+; SSE-NEXT:    addps %xmm2, %xmm1
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_8vf32_i16:
+; AVX1-LABEL: uitofp_8i32_to_8f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm1
+; AVX1-NEXT:    vcvtdq2ps %ymm1, %ymm1
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_8i32_to_8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
+; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vaddps %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = uitofp <8 x i32> %a to <8 x float>
+  ret <8 x float> %cvt
+}
+
+define <8 x float> @uitofp_8i16_to_8f32(<8 x i16> %a) {
+; SSE-LABEL: uitofp_8i16_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uitofp_8i16_to_8f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
@@ -1174,7 +1730,7 @@ define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_8vf32_i16:
+; AVX2-LABEL: uitofp_8i16_to_8f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
@@ -1183,36 +1739,35 @@ define <8 x float> @uitofp_8vf32_i16(<8 x i16> %a) {
   ret <8 x float> %cvt
 }
 
-define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
-; SSE2-LABEL: uitofp_8vf32_i8:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    cvtdq2ps %xmm2, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pand .LCPI31_0(%rip), %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm1
-; SSE2-NEXT:    movaps %xmm2, %xmm0
-; SSE2-NEXT:    retq
+define <8 x float> @uitofp_8i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_8i8_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: uitofp_8vf32_i8:
+; AVX1-LABEL: uitofp_8i8_to_8f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    vandps .LCPI31_0(%rip), %ymm0, %ymm0
 ; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: uitofp_8vf32_i8:
+; AVX2-LABEL: uitofp_8i8_to_8f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT:    vpbroadcastd .LCPI31_0(%rip), %ymm1
-; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -1220,28 +1775,61 @@ define <8 x float> @uitofp_8vf32_i8(<16 x i8> %a) {
   ret <8 x float> %cvt
 }
 
+define <8 x float> @uitofp_16i8_to_8f32(<16 x i8> %a) {
+; SSE-LABEL: uitofp_16i8_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm1
+; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: uitofp_16i8_to_8f32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: uitofp_16i8_to_8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %cvt = uitofp <16 x i8> %a to <16 x float>
+  %shuf = shufflevector <16 x float> %cvt, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x float> %shuf
+}
+
 ;
 ; Aggregates
 ;
 
 %Arguments = type <{ <8 x i8>, <8 x i16>, <8 x float>* }>
-define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) {
-; SSE2-LABEL: aggregate_sitofp_8f32_i16:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movq 24(%rdi), %rax
-; SSE2-NEXT:    movdqu 8(%rdi), %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm1
-; SSE2-NEXT:    cvtdq2ps %xmm1, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
-; SSE2-NEXT:    movaps %xmm0, (%rax)
-; SSE2-NEXT:    movaps %xmm1, 16(%rax)
-; SSE2-NEXT:    retq
+define void @aggregate_sitofp_8i16_to_8f32(%Arguments* nocapture readonly %a0) {
+; SSE-LABEL: aggregate_sitofp_8i16_to_8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq 24(%rdi), %rax
+; SSE-NEXT:    movdqu 8(%rdi), %xmm0
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE-NEXT:    psrad $16, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE-NEXT:    psrad $16, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    movaps %xmm0, 16(%rax)
+; SSE-NEXT:    movaps %xmm1, (%rax)
+; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: aggregate_sitofp_8f32_i16:
+; AVX1-LABEL: aggregate_sitofp_8i16_to_8f32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    movq 24(%rdi), %rax
 ; AVX1-NEXT:    vmovdqu 8(%rdi), %xmm0
@@ -1254,7 +1842,7 @@ define void @aggregate_sitofp_8f32_i16(%Arguments* nocapture readonly %a0) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: aggregate_sitofp_8f32_i16:
+; AVX2-LABEL: aggregate_sitofp_8i16_to_8f32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    movq 24(%rdi), %rax
 ; AVX2-NEXT:    vpmovsxwd 8(%rdi), %ymm0
diff --git a/test/CodeGen/X86/vec_minmax_sint.ll b/test/CodeGen/X86/vec_minmax_sint.ll
new file mode 100644
index 000000000000..419eb2bed743
--- /dev/null
+++ b/test/CodeGen/X86/vec_minmax_sint.ll
@@ -0,0 +1,2090 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Signed Maximum (GT)
+;
+
+define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_gt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sgt <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_gt_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm4
+; SSE41-NEXT:    pxor %xmm0, %xmm4
+; SSE41-NEXT:    pxor %xmm8, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm7, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa %xmm1, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sgt <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_gt_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sgt <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_gt_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE42-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sgt <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: max_gt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sgt <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: max_gt_v16i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sgt <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: max_gt_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v16i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sgt <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: max_gt_v32i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v32i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v32i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE42-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sgt <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Signed Maximum (GE)
+;
+
+define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_ge_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm3
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa %xmm1, %xmm3
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm3
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sge <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_ge_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT:    pxor %xmm9, %xmm5
+; SSE41-NEXT:    movdqa %xmm8, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm0
+; SSE41-NEXT:    pxor %xmm9, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa %xmm3, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm0, %xmm5
+; SSE42-NEXT:    movdqa %xmm2, %xmm6
+; SSE42-NEXT:    pcmpgtq %xmm4, %xmm6
+; SSE42-NEXT:    pxor %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX512-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sge <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_ge_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sge <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_ge_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE41-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE42-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sge <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: max_ge_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sge <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: max_ge_v16i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sge <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: max_ge_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v16i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sge <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: max_ge_v32i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v32i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE41-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v32i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE42-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sge <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Signed Minimum (LT)
+;
+
+define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_lt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa %xmm1, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp slt <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_lt_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    movdqa %xmm8, %xmm4
+; SSE41-NEXT:    pxor %xmm0, %xmm4
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm7, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa %xmm3, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE42-NEXT:    movdqa %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_lt_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp slt <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_lt_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsd %xmm2, %xmm0
+; SSE41-NEXT:    pminsd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsd %xmm2, %xmm0
+; SSE42-NEXT:    pminsd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: min_lt_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp slt <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: min_lt_v16i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: min_lt_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v16i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsb %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp slt <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: min_lt_v32i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v32i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsb %xmm2, %xmm0
+; SSE41-NEXT:    pminsb %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v32i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsb %xmm2, %xmm0
+; SSE42-NEXT:    pminsb %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp slt <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Signed Minimum (LE)
+;
+
+define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_le_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm3
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE42-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sle <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_le_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,0,2147483648,0]
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT:    pxor %xmm9, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    pxor %xmm8, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm0
+; SSE41-NEXT:    pxor %xmm9, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa %xmm1, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE42-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT:    pxor %xmm6, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE42-NEXT:    pxor %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX512-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sle <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_le_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sle <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_le_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsd %xmm2, %xmm0
+; SSE41-NEXT:    pminsd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsd %xmm2, %xmm0
+; SSE42-NEXT:    pminsd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sle <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: min_le_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sle <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: min_le_v16i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sle <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: min_le_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v16i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsb %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp sle <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: min_le_v32i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v32i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminsb %xmm2, %xmm0
+; SSE41-NEXT:    pminsb %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v32i8:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminsb %xmm2, %xmm0
+; SSE42-NEXT:    pminsb %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp sle <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @max_gt_v2i64c() {
+; SSE-LABEL: max_gt_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp sgt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @max_gt_v4i64c() {
+; SSE-LABEL: max_gt_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp sgt <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @max_gt_v4i32c() {
+; SSE-LABEL: max_gt_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp sgt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @max_gt_v8i32c() {
+; SSE-LABEL: max_gt_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp sgt <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @max_gt_v8i16c() {
+; SSE-LABEL: max_gt_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp sgt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @max_gt_v16i16c() {
+; SSE-LABEL: max_gt_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp sgt <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @max_gt_v16i8c() {
+; SSE-LABEL: max_gt_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp sgt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @max_ge_v2i64c() {
+; SSE-LABEL: max_ge_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp sge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @max_ge_v4i64c() {
+; SSE-LABEL: max_ge_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp sge <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @max_ge_v4i32c() {
+; SSE-LABEL: max_ge_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp sge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @max_ge_v8i32c() {
+; SSE-LABEL: max_ge_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp sge <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @max_ge_v8i16c() {
+; SSE-LABEL: max_ge_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp sge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @max_ge_v16i16c() {
+; SSE-LABEL: max_ge_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp sge <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @max_ge_v16i8c() {
+; SSE-LABEL: max_ge_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp sge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @min_lt_v2i64c() {
+; SSE-LABEL: min_lt_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp slt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @min_lt_v4i64c() {
+; SSE-LABEL: min_lt_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp slt <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @min_lt_v4i32c() {
+; SSE-LABEL: min_lt_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp slt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @min_lt_v8i32c() {
+; SSE-LABEL: min_lt_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp slt <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @min_lt_v8i16c() {
+; SSE-LABEL: min_lt_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp slt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @min_lt_v16i16c() {
+; SSE-LABEL: min_lt_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp slt <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @min_lt_v16i8c() {
+; SSE-LABEL: min_lt_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp slt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @min_le_v2i64c() {
+; SSE-LABEL: min_le_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp sle <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @min_le_v4i64c() {
+; SSE-LABEL: min_le_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp sle <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @min_le_v4i32c() {
+; SSE-LABEL: min_le_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp sle <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @min_le_v8i32c() {
+; SSE-LABEL: min_le_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp sle <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @min_le_v8i16c() {
+; SSE-LABEL: min_le_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp sle <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @min_le_v16i16c() {
+; SSE-LABEL: min_le_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp sle <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @min_le_v16i8c() {
+; SSE-LABEL: min_le_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp sle <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
diff --git a/test/CodeGen/X86/vec_minmax_uint.ll b/test/CodeGen/X86/vec_minmax_uint.ll
new file mode 100644
index 000000000000..6e48423c1520
--- /dev/null
+++ b/test/CodeGen/X86/vec_minmax_uint.ll
@@ -0,0 +1,2229 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Unsigned Maximum (GT)
+;
+
+define <2 x i64> @max_gt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_gt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm1, %xmm3
+; SSE42-NEXT:    pxor %xmm0, %xmm3
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ugt <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @max_gt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_gt_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm4
+; SSE41-NEXT:    pxor %xmm0, %xmm4
+; SSE41-NEXT:    pxor %xmm8, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm7, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm3, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    movdqa %xmm1, %xmm5
+; SSE42-NEXT:    pxor %xmm0, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT:    movdqa %xmm2, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    pxor %xmm4, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX512-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ugt <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @max_gt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_gt_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxud %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxud %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ugt <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @max_gt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_gt_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxud %xmm2, %xmm0
+; SSE41-NEXT:    pmaxud %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxud %xmm2, %xmm0
+; SSE42-NEXT:    pmaxud %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ugt <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @max_gt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: max_gt_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ugt <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @max_gt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: max_gt_v16i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_gt_v16i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_gt_v16i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ugt <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @max_gt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: max_gt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ugt <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @max_gt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: max_gt_v32i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: max_gt_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_gt_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_gt_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ugt <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Unsigned Maximum (GE)
+;
+
+define <2 x i64> @max_ge_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: max_ge_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm3
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    pxor %xmm1, %xmm3
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp uge <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @max_ge_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: max_ge_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT:    pxor %xmm9, %xmm5
+; SSE41-NEXT:    movdqa %xmm8, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm0
+; SSE41-NEXT:    pxor %xmm9, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm1, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    movdqa %xmm3, %xmm5
+; SSE42-NEXT:    pxor %xmm0, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT:    pxor %xmm6, %xmm5
+; SSE42-NEXT:    movdqa %xmm4, %xmm7
+; SSE42-NEXT:    pxor %xmm0, %xmm7
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE42-NEXT:    pxor %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX512-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX512-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp uge <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @max_ge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: max_ge_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxud %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxud %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp uge <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @max_ge_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: max_ge_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm2, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxud %xmm2, %xmm0
+; SSE41-NEXT:    pmaxud %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxud %xmm2, %xmm0
+; SSE42-NEXT:    pmaxud %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp uge <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @max_ge_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: max_ge_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psubusw %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp uge <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @max_ge_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: max_ge_v16i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psubusw %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    psubusw %xmm0, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm5, %xmm6
+; SSE2-NEXT:    pand %xmm6, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: max_ge_v16i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE41-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: max_ge_v16i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE42-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp uge <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @max_ge_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: max_ge_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp uge <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @max_ge_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: max_ge_v32i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: max_ge_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: max_ge_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: max_ge_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp uge <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Unsigned Minimum (LT)
+;
+
+define <2 x i64> @min_lt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_lt_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm2, %xmm3
+; SSE42-NEXT:    pxor %xmm0, %xmm3
+; SSE42-NEXT:    pxor %xmm1, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ult <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_lt_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    movdqa %xmm8, %xmm4
+; SSE41-NEXT:    pxor %xmm0, %xmm4
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm6
+; SSE41-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm7, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[1,1,3,3]
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm1, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    movdqa %xmm3, %xmm5
+; SSE42-NEXT:    pxor %xmm0, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT:    movdqa %xmm4, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    pxor %xmm2, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX512-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX512-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @min_lt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_lt_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminud %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminud %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ult <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @min_lt_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_lt_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminud %xmm2, %xmm0
+; SSE41-NEXT:    pminud %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminud %xmm2, %xmm0
+; SSE42-NEXT:    pminud %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @min_lt_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: min_lt_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminuw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminuw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ult <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @min_lt_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: min_lt_v16i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pcmpgtw %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm5, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_lt_v16i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminuw %xmm2, %xmm0
+; SSE41-NEXT:    pminuw %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_lt_v16i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminuw %xmm2, %xmm0
+; SSE42-NEXT:    pminuw %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @min_lt_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: min_lt_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ult <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @min_lt_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: min_lt_v32i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: min_lt_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_lt_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_lt_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ult <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Unsigned Minimum (LE)
+;
+
+define <2 x i64> @min_le_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: min_le_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pxor %xmm0, %xmm3
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm3, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm5, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE41-NEXT:    por %xmm0, %xmm3
+; SSE41-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE41-NEXT:    pxor %xmm3, %xmm0
+; SSE41-NEXT:    blendvpd %xmm2, %xmm1
+; SSE41-NEXT:    movapd %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v2i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm2
+; SSE42-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm1, %xmm0
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    pxor %xmm2, %xmm3
+; SSE42-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE42-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE42-NEXT:    pxor %xmm3, %xmm0
+; SSE42-NEXT:    blendvpd %xmm2, %xmm1
+; SSE42-NEXT:    movapd %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ule <2 x i64> %a, %b
+  %2 = select <2 x i1> %1, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: min_le_v4i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v4i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm8
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    pxor %xmm0, %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    movdqa %xmm6, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm5
+; SSE41-NEXT:    pcmpeqd %xmm9, %xmm9
+; SSE41-NEXT:    pxor %xmm9, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm6
+; SSE41-NEXT:    pxor %xmm0, %xmm6
+; SSE41-NEXT:    pxor %xmm8, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm7
+; SSE41-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[0,0,2,2]
+; SSE41-NEXT:    pcmpeqd %xmm6, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pand %xmm4, %xmm6
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm7[1,1,3,3]
+; SSE41-NEXT:    por %xmm6, %xmm0
+; SSE41-NEXT:    pxor %xmm9, %xmm0
+; SSE41-NEXT:    blendvpd %xmm8, %xmm2
+; SSE41-NEXT:    movdqa %xmm5, %xmm0
+; SSE41-NEXT:    blendvpd %xmm1, %xmm3
+; SSE41-NEXT:    movapd %xmm2, %xmm0
+; SSE41-NEXT:    movapd %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v4i64:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    movdqa %xmm0, %xmm4
+; SSE42-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE42-NEXT:    movdqa %xmm3, %xmm6
+; SSE42-NEXT:    pxor %xmm0, %xmm6
+; SSE42-NEXT:    movdqa %xmm1, %xmm5
+; SSE42-NEXT:    pxor %xmm0, %xmm5
+; SSE42-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE42-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE42-NEXT:    pxor %xmm6, %xmm5
+; SSE42-NEXT:    movdqa %xmm2, %xmm7
+; SSE42-NEXT:    pxor %xmm0, %xmm7
+; SSE42-NEXT:    pxor %xmm4, %xmm0
+; SSE42-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE42-NEXT:    pxor %xmm6, %xmm0
+; SSE42-NEXT:    blendvpd %xmm4, %xmm2
+; SSE42-NEXT:    movdqa %xmm5, %xmm0
+; SSE42-NEXT:    blendvpd %xmm1, %xmm3
+; SSE42-NEXT:    movapd %xmm2, %xmm0
+; SSE42-NEXT:    movapd %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v4i64:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX512-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX512-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX512-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ule <4 x i64> %a, %b
+  %2 = select <4 x i1> %1, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %2
+}
+
+define <4 x i32> @min_le_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: min_le_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminud %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v4i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminud %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ule <4 x i32> %a, %b
+  %2 = select <4 x i1> %1, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %2
+}
+
+define <8 x i32> @min_le_v8i32(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: min_le_v8i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v8i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminud %xmm2, %xmm0
+; SSE41-NEXT:    pminud %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v8i32:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminud %xmm2, %xmm0
+; SSE42-NEXT:    pminud %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v8i32:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ule <8 x i32> %a, %b
+  %2 = select <8 x i1> %1, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @min_le_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: min_le_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psubusw %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminuw %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v8i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminuw %xmm1, %xmm0
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ule <8 x i16> %a, %b
+  %2 = select <8 x i1> %1, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %2
+}
+
+define <16 x i16> @min_le_v16i16(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: min_le_v16i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    psubusw %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psubusw %xmm2, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: min_le_v16i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    pminuw %xmm2, %xmm0
+; SSE41-NEXT:    pminuw %xmm3, %xmm1
+; SSE41-NEXT:    retq
+;
+; SSE42-LABEL: min_le_v16i16:
+; SSE42:       # BB#0:
+; SSE42-NEXT:    pminuw %xmm2, %xmm0
+; SSE42-NEXT:    pminuw %xmm3, %xmm1
+; SSE42-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v16i16:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ule <16 x i16> %a, %b
+  %2 = select <16 x i1> %1, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %2
+}
+
+define <16 x i8> @min_le_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: min_le_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %1 = icmp ule <16 x i8> %a, %b
+  %2 = select <16 x i1> %1, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %2
+}
+
+define <32 x i8> @min_le_v32i8(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: min_le_v32i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: min_le_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: min_le_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: min_le_v32i8:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
+  %1 = icmp ule <32 x i8> %a, %b
+  %2 = select <32 x i1> %1, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %2
+}
+
+;
+; Constant Folding
+;
+
+define <2 x i64> @max_gt_v2i64c() {
+; SSE-LABEL: max_gt_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp ugt <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @max_gt_v4i64c() {
+; SSE-LABEL: max_gt_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp ugt <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @max_gt_v4i32c() {
+; SSE-LABEL: max_gt_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp ugt <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @max_gt_v8i32c() {
+; SSE-LABEL: max_gt_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp ugt <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @max_gt_v8i16c() {
+; SSE-LABEL: max_gt_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp ugt <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @max_gt_v16i16c() {
+; SSE-LABEL: max_gt_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp ugt <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @max_gt_v16i8c() {
+; SSE-LABEL: max_gt_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_gt_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp ugt <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @max_ge_v2i64c() {
+; SSE-LABEL: max_ge_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551615,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp uge <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @max_ge_v4i64c() {
+; SSE-LABEL: max_ge_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,7]
+; SSE-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551615,18446744073709551615,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp uge <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @max_ge_v4i32c() {
+; SSE-LABEL: max_ge_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967295,4294967295,7,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp uge <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @max_ge_v8i32c() {
+; SSE-LABEL: max_ge_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967295,4294967293,4294967293,4294967295]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967295,4294967293,4294967293,4294967295,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp uge <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @max_ge_v8i16c() {
+; SSE-LABEL: max_ge_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65535,65533,65533,65535,7,5,5,7]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp uge <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @max_ge_v16i16c() {
+; SSE-LABEL: max_ge_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65535,65534,65533,65532,65533,65534,65535,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65535,65534,65533,65532,65533,65534,65535,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp uge <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @max_ge_v16i8c() {
+; SSE-LABEL: max_ge_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: max_ge_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [255,254,253,252,253,254,255,0,7,6,5,4,5,6,7,8]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp uge <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @min_lt_v2i64c() {
+; SSE-LABEL: min_lt_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp ult <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @min_lt_v4i64c() {
+; SSE-LABEL: min_lt_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp ult <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @min_lt_v4i32c() {
+; SSE-LABEL: min_lt_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp ult <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @min_lt_v8i32c() {
+; SSE-LABEL: min_lt_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp ult <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @min_lt_v8i16c() {
+; SSE-LABEL: min_lt_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,65531,65531,65529,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16  1, i32 0
+  %3 = icmp ult <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @min_lt_v16i16c() {
+; SSE-LABEL: min_lt_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16  1, i32 0
+  %3 = icmp ult <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @min_lt_v16i8c() {
+; SSE-LABEL: min_lt_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_lt_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [1,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8  1, i32 0
+  %3 = icmp ult <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @min_le_v2i64c() {
+; SSE-LABEL: min_le_v2i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v2i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [18446744073709551609,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <2 x i64> <i64 -7, i64 7>, i64 -7, i32 0
+  %2 = insertelement <2 x i64> <i64 -1, i64 1>, i64 -1, i32 0
+  %3 = icmp ule <2 x i64> %1, %2
+  %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> %2
+  ret <2 x i64> %4
+}
+
+define <4 x i64> @min_le_v4i64c() {
+; SSE-LABEL: min_le_v4i64c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [18446744073709551609,18446744073709551609]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i64c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [18446744073709551609,18446744073709551609,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i64> <i64 -7, i64 -1, i64 1, i64 7>, i64 -7, i32 0
+  %2 = insertelement <4 x i64> <i64 -1, i64 -7, i64 7, i64 1>, i64 -1, i32 0
+  %3 = icmp ule <4 x i64> %1, %2
+  %4 = select <4 x i1> %3, <4 x i64> %1, <4 x i64> %2
+  ret <4 x i64> %4
+}
+
+define <4 x i32> @min_le_v4i32c() {
+; SSE-LABEL: min_le_v4i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v4i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [4294967289,4294967289,1,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <4 x i32> <i32 -7, i32 -1, i32 1, i32 7>, i32 -7, i32 0
+  %2 = insertelement <4 x i32> <i32 -1, i32 -7, i32 7, i32 1>, i32 -1, i32 0
+  %3 = icmp ule <4 x i32> %1, %2
+  %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> %2
+  ret <4 x i32> %4
+}
+
+define <8 x i32> @min_le_v8i32c() {
+; SSE-LABEL: min_le_v8i32c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [4294967289,4294967291,4294967291,4294967289]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i32c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [4294967289,4294967291,4294967291,4294967289,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i32> <i32 -7, i32 -5, i32 -3, i32 -1, i32 1, i32 3, i32 5, i32 7>, i32 -7, i32 0
+  %2 = insertelement <8 x i32> <i32 -1, i32 -3, i32 -5, i32 -7, i32 7, i32 5, i32 3, i32 1>, i32 -1, i32 0
+  %3 = icmp ule <8 x i32> %1, %2
+  %4 = select <8 x i1> %3, <8 x i32> %1, <8 x i32> %2
+  ret <8 x i32> %4
+}
+
+define <8 x i16> @min_le_v8i16c() {
+; SSE-LABEL: min_le_v8i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v8i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [65529,65531,65531,65529,1,3,3,1]
+; AVX-NEXT:    retq
+  %1 = insertelement <8 x i16> <i16 -7, i16 -5, i16 -3, i16 -1, i16 1, i16 3, i16 5, i16 7>, i16 -7, i32 0
+  %2 = insertelement <8 x i16> <i16 -1, i16 -3, i16 -5, i16 -7, i16 7, i16 5, i16 3, i16 1>, i16 -1, i32 0
+  %3 = icmp ule <8 x i16> %1, %2
+  %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> %2
+  ret <8 x i16> %4
+}
+
+define <16 x i16> @min_le_v16i16c() {
+; SSE-LABEL: min_le_v16i16c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [65529,65530,65531,65532,65531,65530,65529,0]
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i16c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [65529,65530,65531,65532,65531,65530,65529,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i16> <i16 -7, i16 -6, i16 -5, i16 -4, i16 -3, i16 -2, i16 -1, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, i16 -7, i32 0
+  %2 = insertelement <16 x i16> <i16 -1, i16 -2, i16 -3, i16 -4, i16 -5, i16 -6, i16 -7, i16 0, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 -1, i32 0
+  %3 = icmp ule <16 x i16> %1, %2
+  %4 = select <16 x i1> %3, <16 x i16> %1, <16 x i16> %2
+  ret <16 x i16> %4
+}
+
+define <16 x i8> @min_le_v16i8c() {
+; SSE-LABEL: min_le_v16i8c:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: min_le_v16i8c:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [249,250,251,252,251,250,249,0,1,2,3,4,3,2,1,0]
+; AVX-NEXT:    retq
+  %1 = insertelement <16 x i8> <i8 -7, i8 -6, i8 -5, i8 -4, i8 -3, i8 -2, i8 -1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, i8 -7, i32 0
+  %2 = insertelement <16 x i8> <i8 -1, i8 -2, i8 -3, i8 -4, i8 -5, i8 -6, i8 -7, i8 0, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, i8 -1, i32 0
+  %3 = icmp ule <16 x i8> %1, %2
+  %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> %2
+  ret <16 x i8> %4
+}
diff --git a/test/CodeGen/X86/vec_sdiv_to_shift.ll b/test/CodeGen/X86/vec_sdiv_to_shift.ll
index 56855d3c44eb..7f71a0c2ea5b 100644
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@@ -13,6 +13,19 @@ entry:
   ret <8 x i16> %0
 }
 
+define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
+entry:
+; CHECK: sdiv_vec8x16_minsize
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+
 define <4 x i32> @sdiv_zero(<4 x i32> %var) {
 entry:
 ; CHECK: sdiv_zero
diff --git a/test/CodeGen/X86/vec_trunc_sext.ll b/test/CodeGen/X86/vec_trunc_sext.ll
index dcfe423eb748..66af87c78187 100644
--- a/test/CodeGen/X86/vec_trunc_sext.ll
+++ b/test/CodeGen/X86/vec_trunc_sext.ll
@@ -1,5 +1,5 @@
-; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='-sse4.1' -o - | FileCheck %s -check-prefix=NO_SSE_41
-; RUN: llc %s -mtriple=x86_64-unknown-unknown -mattr='+sse4.1' -o - | FileCheck %s -check-prefix=SSE_41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-sse4.1 | FileCheck %s --check-prefix=NO_SSE_41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE_41
 
 ; PR20472 ( http://llvm.org/bugs/show_bug.cgi?id=20472 )
 ; When sexting a trunc'd vector value, we can't eliminate the zext.
@@ -9,22 +9,23 @@
 ; but that is beyond our current codegen capabilities.
 
 define <4 x i32> @trunc_sext(<4 x i16>* %in) {
+; NO_SSE_41-LABEL: trunc_sext:
+; NO_SSE_41:       # BB#0:
+; NO_SSE_41-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; NO_SSE_41-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; NO_SSE_41-NEXT:    pslld $24, %xmm0
+; NO_SSE_41-NEXT:    psrad $24, %xmm0
+; NO_SSE_41-NEXT:    retq
+;
+; SSE_41-LABEL: trunc_sext:
+; SSE_41:       # BB#0:
+; SSE_41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE_41-NEXT:    pslld $24, %xmm0
+; SSE_41-NEXT:    psrad $24, %xmm0
+; SSE_41-NEXT:    retq
   %load = load <4 x i16>, <4 x i16>* %in
   %trunc = trunc <4 x i16> %load to <4 x i8>
   %sext = sext <4 x i8> %trunc to <4 x i32>
   ret <4 x i32> %sext
-
-; NO_SSE_41-LABEL: trunc_sext:
-; NO_SSE_41: movq (%rdi), %xmm0
-; NO_SSE_41-NEXT: punpcklwd %xmm0, %xmm0
-; NO_SSE_41-NEXT: pslld $24, %xmm0
-; NO_SSE_41-NEXT: psrad $24, %xmm0
-; NO_SSE_41-NEXT: retq
-
-; SSE_41-LABEL: trunc_sext:
-; SSE_41: pmovzxwd (%rdi), %xmm0
-; SSE_41-NEXT: pslld $24, %xmm0
-; SSE_41-NEXT: psrad $24, %xmm0
-; SSE_41-NEXT: retq
 }
 
diff --git a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
new file mode 100644
index 000000000000..1f36d064f873
--- /dev/null
+++ b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math \
+; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+sse4.1 \
+; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx \
+; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 \
+; RUN:   | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+
+; CST: [[MASKCSTADDR:.LCPI[0-9_]+]]:
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+; CST-NEXT: .long 65535 # 0xffff
+
+; CST: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+; CST-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[FPMASKCSTADDR:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[MASKCSTADDR:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 65535 # 0xffff
+
+define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
+; SSE-LABEL: test_uitofp_v4i32_to_v4f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT:    psrld $16, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    mulps [[FPMASKCSTADDR]](%rip), %xmm0
+; SSE-NEXT:    addps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps [[MASKCSTADDR]](%rip), %xmm0, %xmm1
+; AVX-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; AVX-NEXT:    vmulps [[FPMASKCSTADDR]](%rip), %xmm0, %xmm0
+; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrld $16, %xmm0, %xmm1
+; AVX2-NEXT:    vcvtdq2ps %xmm1, %xmm1
+; AVX2-NEXT:    vbroadcastss [[FPMASKCSTADDR]](%rip), %xmm2
+; AVX2-NEXT:    vmulps %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpbroadcastd [[MASKCSTADDR]](%rip), %xmm2
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vcvtdq2ps %xmm0, %xmm0
+; AVX2-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = uitofp <4 x i32> %arg to <4 x float>
+  ret <4 x float> %tmp
+}
+
+; AVX: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+; AVX-NEXT: .long 65535 # 0xffff
+
+; AVX: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+; AVX-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[FPMASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 1199570944 # float 65536
+
+; AVX2: [[MASKCSTADDR_v8:.LCPI[0-9_]+]]:
+; AVX2-NEXT: .long 65535 # 0xffff
+
+define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
+; SSE-LABEL: test_uitofp_v8i32_to_v8f32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm2
+; SSE-NEXT:    psrld $16, %xmm2
+; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT:    movaps {{.*#+}} xmm3 = [6.553600e+04,6.553600e+04,6.553600e+04,6.553600e+04]
+; SSE-NEXT:    mulps %xmm3, %xmm2
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [65535,65535,65535,65535]
+; SSE-NEXT:    pand %xmm4, %xmm0
+; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0
+; SSE-NEXT:    addps %xmm2, %xmm0
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    psrld $16, %xmm2
+; SSE-NEXT:    cvtdq2ps %xmm2, %xmm2
+; SSE-NEXT:    mulps %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    cvtdq2ps %xmm1, %xmm1
+; SSE-NEXT:    addps %xmm2, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps [[MASKCSTADDR_v8]](%rip), %ymm0, %ymm1
+; AVX-NEXT:    vcvtdq2ps %ymm1, %ymm1
+; AVX-NEXT:    vpsrld $16, %xmm0, %xmm2
+; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
+; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT:    vmulps [[FPMASKCSTADDR_v8]](%rip), %ymm0, %ymm0
+; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    retq
+;
+; AVX2-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm1
+; AVX2-NEXT:    vcvtdq2ps %ymm1, %ymm1
+; AVX2-NEXT:    vbroadcastss [[FPMASKCSTADDR_v8]](%rip), %ymm2
+; AVX2-NEXT:    vmulps %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpbroadcastd [[MASKCSTADDR_v8]](%rip), %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
+; AVX2-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %tmp = uitofp <8 x i32> %arg to <8 x float>
+  ret <8 x float> %tmp
+}
diff --git a/test/CodeGen/X86/vec_uint_to_fp.ll b/test/CodeGen/X86/vec_uint_to_fp.ll
index 46cfcd9a9a12..ce0c11b2fa2a 100644
--- a/test/CodeGen/X86/vec_uint_to_fp.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp.ll
@@ -23,10 +23,10 @@
 ; CST-NEXT: .long	1392508928              ## 0x53000000
 
 ; CST: [[MAGICCSTADDR:LCPI0_[0-9]+]]:
-; CST-NEXT: .long	3539992704              ## float -5.497642e+11
-; CST-NEXT: .long	3539992704              ## float -5.497642e+11
-; CST-NEXT: .long	3539992704              ## float -5.497642e+11
-; CST-NEXT: .long	3539992704              ## float -5.497642e+11
+; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
+; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
+; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
+; CST-NEXT: .long	3539992704              ## float -5.49764202E+11
 
 ; AVX2: [[LOWCSTADDR:LCPI0_[0-9]+]]:
 ; AVX2-NEXT: .long	1258291200              ## 0x4b000000
diff --git a/test/CodeGen/X86/vector-blend.ll b/test/CodeGen/X86/vector-blend.ll
index e15daaa54a33..aaf81f2f9bb6 100644
--- a/test/CodeGen/X86/vector-blend.ll
+++ b/test/CodeGen/X86/vector-blend.ll
@@ -255,31 +255,32 @@ entry:
 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
 ; SSE2-LABEL: vsel_i8:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    andps %xmm2, %xmm0
-; SSE2-NEXT:    andnps %xmm1, %xmm2
-; SSE2-NEXT:    orps %xmm2, %xmm0
+; SSE2-NEXT:    movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE2-NEXT:    andps %xmm2, %xmm1
+; SSE2-NEXT:    andnps %xmm0, %xmm2
+; SSE2-NEXT:    orps %xmm1, %xmm2
+; SSE2-NEXT:    movaps %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: vsel_i8:
 ; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,zero,zero,zero,zero,zero,zero,zero
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8,9,10,11,12,13,14,15]
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
 ; SSSE3-NEXT:    por %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: vsel_i8:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    movdqa %xmm0, %xmm2
-; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pblendvb %xmm2, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; SSE41-NEXT:    pblendvb %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: vsel_i8:
 ; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,255,255,255,255,255,255,255]
-; AVX-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
+; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 entry:
   %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
@@ -623,49 +624,52 @@ entry:
 define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
 ; SSE2-LABEL: constant_pblendvb_avx2:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
+; SSE2-NEXT:    movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
 ; SSE2-NEXT:    movaps %xmm4, %xmm5
-; SSE2-NEXT:    andnps %xmm2, %xmm5
-; SSE2-NEXT:    andps %xmm4, %xmm0
-; SSE2-NEXT:    orps %xmm5, %xmm0
-; SSE2-NEXT:    andps %xmm4, %xmm1
-; SSE2-NEXT:    andnps %xmm3, %xmm4
-; SSE2-NEXT:    orps %xmm4, %xmm1
+; SSE2-NEXT:    andnps %xmm0, %xmm5
+; SSE2-NEXT:    andps %xmm4, %xmm2
+; SSE2-NEXT:    orps %xmm2, %xmm5
+; SSE2-NEXT:    andps %xmm4, %xmm3
+; SSE2-NEXT:    andnps %xmm1, %xmm4
+; SSE2-NEXT:    orps %xmm3, %xmm4
+; SSE2-NEXT:    movaps  %xmm5, %xmm0
+; SSE2-NEXT:    movaps  %xmm4, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: constant_pblendvb_avx2:
 ; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [0,1,128,3,128,128,128,7,128,128,128,128,128,128,128,128]
-; SSSE3-NEXT:    pshufb %xmm4, %xmm2
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [128,128,2,128,4,5,6,128,8,9,10,11,12,13,14,15]
-; SSSE3-NEXT:    pshufb %xmm5, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
+; SSSE3-NEXT:    pshufb %xmm4, %xmm0
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
+; SSSE3-NEXT:    pshufb %xmm5, %xmm2
 ; SSSE3-NEXT:    por %xmm2, %xmm0
-; SSSE3-NEXT:    pshufb %xmm4, %xmm3
-; SSSE3-NEXT:    pshufb %xmm5, %xmm1
+; SSSE3-NEXT:    pshufb %xmm4, %xmm1
+; SSSE3-NEXT:    pshufb %xmm5, %xmm3
 ; SSSE3-NEXT:    por %xmm3, %xmm1
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: constant_pblendvb_avx2:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    movdqa %xmm0, %xmm4
-; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pblendvb %xmm4, %xmm2
-; SSE41-NEXT:    pblendvb %xmm1, %xmm3
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
-; SSE41-NEXT:    movdqa %xmm3, %xmm1
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; SSE41-NEXT:    pblendvb %xmm2, %xmm4
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm4, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: constant_pblendvb_avx2:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpblendvb %xmm2, %xmm0, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    vextractf128    $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128    $1, %ymm0, %xmm3
+; AVX1-NEXT:    vmovdqa .LCPI18_0(%rip), %xmm4  # xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
+; AVX1-NEXT:    vpblendvb       %xmm4, %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpblendvb       %xmm4, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128     $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: constant_pblendvb_avx2:
 ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0,0,0,255,0,255,255,255,0]
 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
 entry:
diff --git a/test/CodeGen/X86/vector-idiv.ll b/test/CodeGen/X86/vector-idiv.ll
index 2e482a0f1430..1117e206e5b0 100644
--- a/test/CodeGen/X86/vector-idiv.ll
+++ b/test/CodeGen/X86/vector-idiv.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -march=x86-64 -mcpu=core2 -mattr=+sse4.1 < %s | FileCheck %s --check-prefix=SSE41
 ; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s --check-prefix=SSE
 ; RUN: llc -march=x86-64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=AVX
diff --git a/test/CodeGen/X86/vector-lzcnt-128.ll b/test/CodeGen/X86/vector-lzcnt-128.ll
index b43188b7c6ea..8bf0af68e6dc 100644
--- a/test/CodeGen/X86/vector-lzcnt-128.ll
+++ b/test/CodeGen/X86/vector-lzcnt-128.ll
@@ -1,13 +1,13 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl | FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
 
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm0, %rax
@@ -16,13 +16,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
 ; SSE2-NEXT:    cmoveq %rcx, %rax
 ; SSE2-NEXT:    xorq $63, %rax
 ; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    pshufd $78, %xmm0, %xmm0       # xmm0 = xmm0[2,3,0,1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE2-NEXT:    movd %xmm0, %rax
 ; SSE2-NEXT:    bsrq %rax, %rax
 ; SSE2-NEXT:    cmoveq %rcx, %rax
 ; SSE2-NEXT:    xorq $63, %rax
 ; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq %xmm0, %xmm1    # xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -34,13 +34,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
 ; SSE3-NEXT:    cmoveq %rcx, %rax
 ; SSE3-NEXT:    xorq $63, %rax
 ; SSE3-NEXT:    movd %rax, %xmm1
-; SSE3-NEXT:    pshufd $78, %xmm0, %xmm0       # xmm0 = xmm0[2,3,0,1]
+; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSE3-NEXT:    movd %xmm0, %rax
 ; SSE3-NEXT:    bsrq %rax, %rax
 ; SSE3-NEXT:    cmoveq %rcx, %rax
 ; SSE3-NEXT:    xorq $63, %rax
 ; SSE3-NEXT:    movd %rax, %xmm0
-; SSE3-NEXT:    punpcklqdq %xmm0, %xmm1    # xmm1 = xmm1[0],xmm0[0]
+; SSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSE3-NEXT:    retq
 ;
@@ -52,16 +52,15 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
 ; SSSE3-NEXT:    cmoveq %rcx, %rax
 ; SSSE3-NEXT:    xorq $63, %rax
 ; SSSE3-NEXT:    movd %rax, %xmm1
-; SSSE3-NEXT:    pshufd $78, %xmm0, %xmm0       # xmm0 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
 ; SSSE3-NEXT:    movd %xmm0, %rax
 ; SSSE3-NEXT:    bsrq %rax, %rax
 ; SSSE3-NEXT:    cmoveq %rcx, %rax
 ; SSSE3-NEXT:    xorq $63, %rax
 ; SSSE3-NEXT:    movd %rax, %xmm0
-; SSSE3-NEXT:    punpcklqdq %xmm0, %xmm1    # xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
-
 ;
 ; SSE41-LABEL: testv2i64:
 ; SSE41:       # BB#0:
@@ -94,11 +93,22 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv2i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv2i64:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 0)
   ret <2 x i64> %out
 }
 
-define <2 x i64> @testv2i64u(<2 x i64> %in) {
+define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm0, %rax
@@ -169,11 +179,22 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv2i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv2i64u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %in, i1 -1)
   ret <2 x i64> %out
 }
 
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE2-LABEL: testv4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
@@ -320,11 +341,22 @@ define <4 x i32> @testv4i32(<4 x i32> %in) {
 ; AVX-NEXT:    xorl $31, %eax
 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv4i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv4i32:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 0)
   ret <4 x i32> %out
 }
 
-define <4 x i32> @testv4i32u(<4 x i32> %in) {
+define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE2-LABEL: testv4i32u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
@@ -446,11 +478,22 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) {
 ; AVX-NEXT:    xorl $31, %eax
 ; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv4i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv4i32u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %in, i1 -1)
   ret <4 x i32> %out
 }
 
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE2-LABEL: testv8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pextrw $7, %xmm0, %eax
@@ -697,11 +740,27 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
 ; AVX-NEXT:    xorl $15, %ecx
 ; AVX-NEXT:    vpinsrw $7, %ecx, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv8i16:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv8i16:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 0)
   ret <8 x i16> %out
 }
 
-define <8 x i16> @testv8i16u(<8 x i16> %in) {
+define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE2-LABEL: testv8i16u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pextrw $7, %xmm0, %eax
@@ -903,29 +962,46 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) {
 ; AVX-NEXT:    xorl $15, %eax
 ; AVX-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv8i16u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxwd %xmm0, %ymm0
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    vpmovdw %ymm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv8i16u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 -1)
   ret <8 x i16> %out
 }
 
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE2-LABEL: testv16i8:
 ; SSE2:       # BB#0:
-; SSE2:         pushq %rbp
-; SSE2:         movaps %xmm0, -24(%rsp)
-; SSE2-NEXT:    movzbl -9(%rsp), %eax
+; SSE2-NEXT:    pushq %rbp
+; SSE2-NEXT:    pushq %rbx
+; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    bsrl %eax, %ecx
 ; SSE2-NEXT:    movl $15, %eax
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movzbl -10(%rsp), %ebx
-; SSE2-NEXT:    movzbl -11(%rsp), %edi
-; SSE2-NEXT:    movzbl -12(%rsp), %r9d
-; SSE2-NEXT:    movzbl -13(%rsp), %edx
-; SSE2-NEXT:    movzbl -14(%rsp), %r11d
-; SSE2-NEXT:    movzbl -15(%rsp), %esi
-; SSE2-NEXT:    movzbl -16(%rsp), %r8d
-; SSE2-NEXT:    movzbl -17(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE2-NEXT:    bsrl %ecx, %ecx
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
@@ -935,10 +1011,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    movzbl -18(%rsp), %edx
-; SSE2-NEXT:    movzbl -19(%rsp), %ecx
-; SSE2-NEXT:    movzbl -20(%rsp), %r10d
-; SSE2-NEXT:    movzbl -21(%rsp), %ebp
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebp
 ; SSE2-NEXT:    bsrl %ebp, %ebp
 ; SSE2-NEXT:    cmovel %eax, %ebp
 ; SSE2-NEXT:    xorl $7, %ebp
@@ -958,8 +1034,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    movzbl -22(%rsp), %esi
-; SSE2-NEXT:    movzbl -23(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE2-NEXT:    bsrl %ecx, %ecx
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
@@ -999,7 +1075,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm4
-; SSE2-NEXT:    movzbl -24(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE2-NEXT:    bsrl %ecx, %ecx
 ; SSE2-NEXT:    cmovel %eax, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
@@ -1014,22 +1090,23 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ;
 ; SSE3-LABEL: testv16i8:
 ; SSE3:       # BB#0:
-; SSE3:         pushq %rbp
-; SSE3:         movaps %xmm0, -24(%rsp)
-; SSE3-NEXT:    movzbl -9(%rsp), %eax
+; SSE3-NEXT:    pushq %rbp
+; SSE3-NEXT:    pushq %rbx
+; SSE3-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE3-NEXT:    bsrl %eax, %ecx
 ; SSE3-NEXT:    movl $15, %eax
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
 ; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    movzbl -10(%rsp), %ebx
-; SSE3-NEXT:    movzbl -11(%rsp), %edi
-; SSE3-NEXT:    movzbl -12(%rsp), %r9d
-; SSE3-NEXT:    movzbl -13(%rsp), %edx
-; SSE3-NEXT:    movzbl -14(%rsp), %r11d
-; SSE3-NEXT:    movzbl -15(%rsp), %esi
-; SSE3-NEXT:    movzbl -16(%rsp), %r8d
-; SSE3-NEXT:    movzbl -17(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE3-NEXT:    bsrl %ecx, %ecx
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
@@ -1039,10 +1116,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
 ; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    movzbl -18(%rsp), %edx
-; SSE3-NEXT:    movzbl -19(%rsp), %ecx
-; SSE3-NEXT:    movzbl -20(%rsp), %r10d
-; SSE3-NEXT:    movzbl -21(%rsp), %ebp
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebp
 ; SSE3-NEXT:    bsrl %ebp, %ebp
 ; SSE3-NEXT:    cmovel %eax, %ebp
 ; SSE3-NEXT:    xorl $7, %ebp
@@ -1062,8 +1139,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
 ; SSE3-NEXT:    movd %ecx, %xmm3
-; SSE3-NEXT:    movzbl -22(%rsp), %esi
-; SSE3-NEXT:    movzbl -23(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE3-NEXT:    bsrl %ecx, %ecx
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
@@ -1103,7 +1180,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
 ; SSE3-NEXT:    movd %ecx, %xmm4
-; SSE3-NEXT:    movzbl -24(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSE3-NEXT:    bsrl %ecx, %ecx
 ; SSE3-NEXT:    cmovel %eax, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
@@ -1118,22 +1195,23 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ;
 ; SSSE3-LABEL: testv16i8:
 ; SSSE3:       # BB#0:
-; SSSE3:         pushq %rbp
-; SSSE3:         movaps %xmm0, -24(%rsp)
-; SSSE3-NEXT:    movzbl -9(%rsp), %eax
+; SSSE3-NEXT:    pushq %rbp
+; SSSE3-NEXT:    pushq %rbx
+; SSSE3-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    bsrl %eax, %ecx
 ; SSSE3-NEXT:    movl $15, %eax
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movzbl -10(%rsp), %ebx
-; SSSE3-NEXT:    movzbl -11(%rsp), %edi
-; SSSE3-NEXT:    movzbl -12(%rsp), %r9d
-; SSSE3-NEXT:    movzbl -13(%rsp), %edx
-; SSSE3-NEXT:    movzbl -14(%rsp), %r11d
-; SSSE3-NEXT:    movzbl -15(%rsp), %esi
-; SSSE3-NEXT:    movzbl -16(%rsp), %r8d
-; SSSE3-NEXT:    movzbl -17(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSSE3-NEXT:    bsrl %ecx, %ecx
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
@@ -1143,10 +1221,10 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    movzbl -18(%rsp), %edx
-; SSSE3-NEXT:    movzbl -19(%rsp), %ecx
-; SSSE3-NEXT:    movzbl -20(%rsp), %r10d
-; SSSE3-NEXT:    movzbl -21(%rsp), %ebp
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebp
 ; SSSE3-NEXT:    bsrl %ebp, %ebp
 ; SSSE3-NEXT:    cmovel %eax, %ebp
 ; SSSE3-NEXT:    xorl $7, %ebp
@@ -1166,8 +1244,8 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    movzbl -22(%rsp), %esi
-; SSSE3-NEXT:    movzbl -23(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSSE3-NEXT:    bsrl %ecx, %ecx
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
@@ -1207,7 +1285,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm4
-; SSSE3-NEXT:    movzbl -24(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
 ; SSSE3-NEXT:    bsrl %ecx, %ecx
 ; SSSE3-NEXT:    cmovel %eax, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
@@ -1390,27 +1468,43 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
 ; AVX-NEXT:    xorl $7, %ecx
 ; AVX-NEXT:    vpinsrb $15, %ecx, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv16i8:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv16i8:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 0)
   ret <16 x i8> %out
 }
 
-define <16 x i8> @testv16i8u(<16 x i8> %in) {
+define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE2-LABEL: testv16i8u:
 ; SSE2:       # BB#0:
-; SSE2:         pushq %rbx
-; SSE2:         movaps %xmm0, -16(%rsp)
-; SSE2-NEXT:    movzbl -1(%rsp), %eax
+; SSE2-NEXT:    pushq %rbx
+; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    bsrl %eax, %eax
 ; SSE2-NEXT:    xorl $7, %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzbl -2(%rsp), %edi
-; SSE2-NEXT:    movzbl -3(%rsp), %edx
-; SSE2-NEXT:    movzbl -4(%rsp), %r9d
-; SSE2-NEXT:    movzbl -5(%rsp), %eax
-; SSE2-NEXT:    movzbl -6(%rsp), %r10d
-; SSE2-NEXT:    movzbl -7(%rsp), %ecx
-; SSE2-NEXT:    movzbl -8(%rsp), %r8d
-; SSE2-NEXT:    movzbl -9(%rsp), %esi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
 ; SSE2-NEXT:    bsrl %esi, %esi
 ; SSE2-NEXT:    xorl $7, %esi
 ; SSE2-NEXT:    movd %esi, %xmm1
@@ -1418,10 +1512,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE2-NEXT:    bsrl %eax, %eax
 ; SSE2-NEXT:    xorl $7, %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzbl -10(%rsp), %eax
-; SSE2-NEXT:    movzbl -11(%rsp), %esi
-; SSE2-NEXT:    movzbl -12(%rsp), %r11d
-; SSE2-NEXT:    movzbl -13(%rsp), %ebx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
 ; SSE2-NEXT:    bsrl %ebx, %ebx
 ; SSE2-NEXT:    xorl $7, %ebx
 ; SSE2-NEXT:    movd %ebx, %xmm2
@@ -1437,8 +1531,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE2-NEXT:    bsrl %ecx, %ecx
 ; SSE2-NEXT:    xorl $7, %ecx
 ; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    movzbl -14(%rsp), %ecx
-; SSE2-NEXT:    movzbl -15(%rsp), %edx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
 ; SSE2-NEXT:    bsrl %edx, %edx
 ; SSE2-NEXT:    xorl $7, %edx
 ; SSE2-NEXT:    movd %edx, %xmm1
@@ -1470,7 +1564,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE2-NEXT:    bsrl %r8d, %eax
 ; SSE2-NEXT:    xorl $7, %eax
 ; SSE2-NEXT:    movd %eax, %xmm4
-; SSE2-NEXT:    movzbl -16(%rsp), %eax
+; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE2-NEXT:    bsrl %eax, %eax
 ; SSE2-NEXT:    xorl $7, %eax
 ; SSE2-NEXT:    movd %eax, %xmm0
@@ -1483,20 +1577,20 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ;
 ; SSE3-LABEL: testv16i8u:
 ; SSE3:       # BB#0:
-; SSE3:         pushq %rbx
-; SSE3:         movaps %xmm0, -16(%rsp)
-; SSE3-NEXT:    movzbl -1(%rsp), %eax
+; SSE3-NEXT:    pushq %rbx
+; SSE3-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE3-NEXT:    bsrl %eax, %eax
 ; SSE3-NEXT:    xorl $7, %eax
 ; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    movzbl -2(%rsp), %edi
-; SSE3-NEXT:    movzbl -3(%rsp), %edx
-; SSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSE3-NEXT:    movzbl -5(%rsp), %eax
-; SSE3-NEXT:    movzbl -6(%rsp), %r10d
-; SSE3-NEXT:    movzbl -7(%rsp), %ecx
-; SSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSE3-NEXT:    movzbl -9(%rsp), %esi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
 ; SSE3-NEXT:    bsrl %esi, %esi
 ; SSE3-NEXT:    xorl $7, %esi
 ; SSE3-NEXT:    movd %esi, %xmm1
@@ -1504,10 +1598,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE3-NEXT:    bsrl %eax, %eax
 ; SSE3-NEXT:    xorl $7, %eax
 ; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    movzbl -10(%rsp), %eax
-; SSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSE3-NEXT:    movzbl -12(%rsp), %r11d
-; SSE3-NEXT:    movzbl -13(%rsp), %ebx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
 ; SSE3-NEXT:    bsrl %ebx, %ebx
 ; SSE3-NEXT:    xorl $7, %ebx
 ; SSE3-NEXT:    movd %ebx, %xmm2
@@ -1523,8 +1617,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE3-NEXT:    bsrl %ecx, %ecx
 ; SSE3-NEXT:    xorl $7, %ecx
 ; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    movzbl -14(%rsp), %ecx
-; SSE3-NEXT:    movzbl -15(%rsp), %edx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
 ; SSE3-NEXT:    bsrl %edx, %edx
 ; SSE3-NEXT:    xorl $7, %edx
 ; SSE3-NEXT:    movd %edx, %xmm1
@@ -1556,7 +1650,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSE3-NEXT:    bsrl %r8d, %eax
 ; SSE3-NEXT:    xorl $7, %eax
 ; SSE3-NEXT:    movd %eax, %xmm4
-; SSE3-NEXT:    movzbl -16(%rsp), %eax
+; SSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSE3-NEXT:    bsrl %eax, %eax
 ; SSE3-NEXT:    xorl $7, %eax
 ; SSE3-NEXT:    movd %eax, %xmm0
@@ -1569,20 +1663,20 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ;
 ; SSSE3-LABEL: testv16i8u:
 ; SSSE3:       # BB#0:
-; SSSE3:         pushq %rbx
-; SSSE3:         movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT:    movzbl -1(%rsp), %eax
+; SSSE3-NEXT:    pushq %rbx
+; SSSE3-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    bsrl %eax, %eax
 ; SSSE3-NEXT:    xorl $7, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzbl -2(%rsp), %edi
-; SSSE3-NEXT:    movzbl -3(%rsp), %edx
-; SSSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSSE3-NEXT:    movzbl -5(%rsp), %eax
-; SSSE3-NEXT:    movzbl -6(%rsp), %r10d
-; SSSE3-NEXT:    movzbl -7(%rsp), %ecx
-; SSSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSSE3-NEXT:    movzbl -9(%rsp), %esi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r9d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r10d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r8d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
 ; SSSE3-NEXT:    bsrl %esi, %esi
 ; SSSE3-NEXT:    xorl $7, %esi
 ; SSSE3-NEXT:    movd %esi, %xmm1
@@ -1590,10 +1684,10 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSSE3-NEXT:    bsrl %eax, %eax
 ; SSSE3-NEXT:    xorl $7, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzbl -10(%rsp), %eax
-; SSSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSSE3-NEXT:    movzbl -12(%rsp), %r11d
-; SSSE3-NEXT:    movzbl -13(%rsp), %ebx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %r11d
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ebx
 ; SSSE3-NEXT:    bsrl %ebx, %ebx
 ; SSSE3-NEXT:    xorl $7, %ebx
 ; SSSE3-NEXT:    movd %ebx, %xmm2
@@ -1609,8 +1703,8 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSSE3-NEXT:    bsrl %ecx, %ecx
 ; SSSE3-NEXT:    xorl $7, %ecx
 ; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    movzbl -14(%rsp), %ecx
-; SSSE3-NEXT:    movzbl -15(%rsp), %edx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
 ; SSSE3-NEXT:    bsrl %edx, %edx
 ; SSSE3-NEXT:    xorl $7, %edx
 ; SSSE3-NEXT:    movd %edx, %xmm1
@@ -1642,7 +1736,7 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; SSSE3-NEXT:    bsrl %r8d, %eax
 ; SSSE3-NEXT:    xorl $7, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm4
-; SSSE3-NEXT:    movzbl -16(%rsp), %eax
+; SSSE3-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
 ; SSSE3-NEXT:    bsrl %eax, %eax
 ; SSSE3-NEXT:    xorl $7, %eax
 ; SSSE3-NEXT:    movd %eax, %xmm0
@@ -1789,11 +1883,27 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) {
 ; AVX-NEXT:    xorl $7, %eax
 ; AVX-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv16i8u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv16i8u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %in, i1 -1)
   ret <16 x i8> %out
 }
 
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
 ; SSE-LABEL: foldv2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movl $55, %eax
@@ -1805,11 +1915,23 @@ define <2 x i64> @foldv2i64() {
 ; AVX-NEXT:    movl $55, %eax
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv2i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    movl $55, %eax
+; AVX512VLCD-NEXT:    vmovq %rax, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv2i64:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    movl $55, %eax
+; AVX512CD-NEXT:    vmovq %rax, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 0)
   ret <2 x i64> %out
 }
 
-define <2 x i64> @foldv2i64u() {
+define <2 x i64> @foldv2i64u() nounwind {
 ; SSE-LABEL: foldv2i64u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movl $55, %eax
@@ -1821,11 +1943,23 @@ define <2 x i64> @foldv2i64u() {
 ; AVX-NEXT:    movl $55, %eax
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv2i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    movl $55, %eax
+; AVX512VLCD-NEXT:    vmovq %rax, %xmm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv2i64u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    movl $55, %eax
+; AVX512CD-NEXT:    vmovq %rax, %xmm0
+; AVX512CD-NEXT:    retq
   %out = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> <i64 256, i64 -1>, i1 -1)
   ret <2 x i64> %out
 }
 
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
 ; SSE-LABEL: foldv4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [23,0,32,24]
@@ -1835,11 +1969,21 @@ define <4 x i32> @foldv4i32() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [23,0,32,24]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv4i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv4i32:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT:    retq
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 0)
   ret <4 x i32> %out
 }
 
-define <4 x i32> @foldv4i32u() {
+define <4 x i32> @foldv4i32u() nounwind {
 ; SSE-LABEL: foldv4i32u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [23,0,32,24]
@@ -1849,11 +1993,21 @@ define <4 x i32> @foldv4i32u() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [23,0,32,24]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv4i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv4i32u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [23,0,32,24]
+; AVX512CD-NEXT:    retq
   %out = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> <i32 256, i32 -1, i32 0, i32 255>, i1 -1)
   ret <4 x i32> %out
 }
 
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
 ; SSE-LABEL: foldv8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
@@ -1863,11 +2017,21 @@ define <8 x i16> @foldv8i16() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv8i16:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv8i16:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT:    retq
   %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 0)
   ret <8 x i16> %out
 }
 
-define <8 x i16> @foldv8i16u() {
+define <8 x i16> @foldv8i16u() nounwind {
 ; SSE-LABEL: foldv8i16u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
@@ -1877,11 +2041,21 @@ define <8 x i16> @foldv8i16u() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv8i16u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv8i16u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
+; AVX512CD-NEXT:    retq
   %out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88>, i1 -1)
   ret <8 x i16> %out
 }
 
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
 ; SSE-LABEL: foldv16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
@@ -1891,11 +2065,21 @@ define <16 x i8> @foldv16i8() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv16i8:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv16i8:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT:    retq
   %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 0)
   ret <16 x i8> %out
 }
 
-define <16 x i8> @foldv16i8u() {
+define <16 x i8> @foldv16i8u() nounwind {
 ; SSE-LABEL: foldv16i8u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
@@ -1905,6 +2089,16 @@ define <16 x i8> @foldv16i8u() {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv16i8u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv16i8u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
+; AVX512CD-NEXT:    retq
   %out = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32>, i1 -1)
   ret <16 x i8> %out
 }
diff --git a/test/CodeGen/X86/vector-lzcnt-256.ll b/test/CodeGen/X86/vector-lzcnt-256.ll
index 48abe1290528..1608bf53748d 100644
--- a/test/CodeGen/X86/vector-lzcnt-256.ll
+++ b/test/CodeGen/X86/vector-lzcnt-256.ll
@@ -1,9 +1,10 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s --check-prefix=AVX512VLCD --check-prefix=ALL --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=AVX512CD --check-prefix=ALL --check-prefix=AVX512
 
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -61,11 +62,22 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv4i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv4i64:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 0)
   ret <4 x i64> %out
 }
 
-define <4 x i64> @testv4i64u(<4 x i64> %in) {
+define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -113,11 +125,22 @@ define <4 x i64> @testv4i64u(<4 x i64> %in) {
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv4i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntq %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv4i64u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %in, i1 -1)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
 ; AVX1-LABEL: testv8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -211,11 +234,22 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
 ; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv8i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv8i32:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 0)
   ret <8 x i32> %out
 }
 
-define <8 x i32> @testv8i32u(<8 x i32> %in) {
+define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
 ; AVX1-LABEL: testv8i32u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -291,11 +325,22 @@ define <8 x i32> @testv8i32u(<8 x i32> %in) {
 ; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv8i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vplzcntd %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv8i32u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    retq
+
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %in, i1 -1)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
 ; AVX1-LABEL: testv16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -469,11 +514,27 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
 ; AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv16i16:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv16i16:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512CD-NEXT:    retq
   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
   ret <16 x i16> %out
 }
 
-define <16 x i16> @testv16i16u(<16 x i16> %in) {
+define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
 ; AVX1-LABEL: testv16i16u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -613,11 +674,27 @@ define <16 x i16> @testv16i16u(<16 x i16> %in) {
 ; AVX2-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv16i16u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512VLCD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv16i16u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512CD-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512CD-NEXT:    retq
   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
   ret <16 x i16> %out
 }
 
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
 ; AVX1-LABEL: testv32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -951,11 +1028,41 @@ define <32 x i8> @testv32i8(<32 x i8> %in) {
 ; AVX2-NEXT:    vpinsrb $15, %ecx, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv32i8:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512VLCD-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512VLCD-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX512VLCD-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv32i8:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX512CD-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512CD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX512CD-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512CD-NEXT:    retq
   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
   ret <32 x i8> %out
 }
 
-define <32 x i8> @testv32i8u(<32 x i8> %in) {
+define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
 ; AVX1-LABEL: testv32i8u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -1223,78 +1330,188 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) {
 ; AVX2-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm0
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VLCD-LABEL: testv32i8u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512VLCD-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512VLCD-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX512VLCD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512VLCD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512VLCD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VLCD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX512VLCD-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: testv32i8u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX512CD-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512CD-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512CD-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512CD-NEXT:    vmovdqa {{.*#+}} xmm2 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512CD-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX512CD-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512CD-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512CD-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512CD-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX512CD-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512CD-NEXT:    retq
   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
   ret <32 x i8> %out
 }
 
-define <4 x i64> @foldv4i64() {
+define <4 x i64> @foldv4i64() nounwind {
 ; AVX-LABEL: foldv4i64:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv4i64:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv4i64:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512CD-NEXT:    retq
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
   ret <4 x i64> %out
 }
 
-define <4 x i64> @foldv4i64u() {
+define <4 x i64> @foldv4i64u() nounwind {
 ; AVX-LABEL: foldv4i64u:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv4i64u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv4i64u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [55,0,64,56]
+; AVX512CD-NEXT:    retq
   %out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @foldv8i32() {
+define <8 x i32> @foldv8i32() nounwind {
 ; AVX-LABEL: foldv8i32:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv8i32:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv8i32:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512CD-NEXT:    retq
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
   ret <8 x i32> %out
 }
 
-define <8 x i32> @foldv8i32u() {
+define <8 x i32> @foldv8i32u() nounwind {
 ; AVX-LABEL: foldv8i32u:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv8i32u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv8i32u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
+; AVX512CD-NEXT:    retq
   %out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @foldv16i16() {
+define <16 x i16> @foldv16i16() nounwind {
 ; AVX-LABEL: foldv16i16:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv16i16:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv16i16:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512CD-NEXT:    retq
   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
   ret <16 x i16> %out
 }
 
-define <16 x i16> @foldv16i16u() {
+define <16 x i16> @foldv16i16u() nounwind {
 ; AVX-LABEL: foldv16i16u:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv16i16u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv16i16u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
+; AVX512CD-NEXT:    retq
   %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
   ret <16 x i16> %out
 }
 
-define <32 x i8> @foldv32i8() {
+define <32 x i8> @foldv32i8() nounwind {
 ; AVX-LABEL: foldv32i8:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv32i8:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv32i8:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512CD-NEXT:    retq
   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
   ret <32 x i8> %out
 }
 
-define <32 x i8> @foldv32i8u() {
+define <32 x i8> @foldv32i8u() nounwind {
 ; AVX-LABEL: foldv32i8u:
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
 ; AVX-NEXT:    retq
+;
+; AVX512VLCD-LABEL: foldv32i8u:
+; AVX512VLCD:       ## BB#0:
+; AVX512VLCD-NEXT:    vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512VLCD-NEXT:    retq
+;
+; AVX512CD-LABEL: foldv32i8u:
+; AVX512CD:       ## BB#0:
+; AVX512CD-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
+; AVX512CD-NEXT:    retq
   %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
   ret <32 x i8> %out
 }
diff --git a/test/CodeGen/X86/vector-lzcnt-512.ll b/test/CodeGen/X86/vector-lzcnt-512.ll
new file mode 100644
index 000000000000..20ea86e5d439
--- /dev/null
+++ b/test/CodeGen/X86/vector-lzcnt-512.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vplzcntq %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0)
+  ret <8 x i64> %out
+}
+
+define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vplzcntq %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1)
+  ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0)
+  ret <16 x i32> %out
+}
+
+define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1)
+  ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
+; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
+; ALL-NEXT:    vplzcntd %zmm1, %zmm1
+; ALL-NEXT:    vpmovdw %zmm1, %ymm1
+; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+;
+; AVX512BW-LABEL: testv32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
+; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 0)
+  ret <32 x i16> %out
+}
+
+define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwd %ymm0, %zmm0
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    vpmovdw %zmm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; ALL-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
+; ALL-NEXT:    vpmovzxwd %ymm1, %zmm1
+; ALL-NEXT:    vplzcntd %zmm1, %zmm1
+; ALL-NEXT:    vpmovdw %zmm1, %ymm1
+; ALL-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+;
+; AVX512BW-LABEL: testv32i16u:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vpmovzxwd %ymm1, %zmm1
+; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovdw %zmm1, %ymm1
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT:    vpsubw %ymm2, %ymm1, %ymm1
+; AVX512BW-NEXT:    vpmovzxwd %ymm0, %zmm0
+; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT:    vpsubw %ymm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %out = call <32 x i16> @llvm.ctlz.v32i16(<32 x i16> %in, i1 -1)
+  ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT:    vplzcntd %zmm2, %zmm2
+; ALL-NEXT:    vpmovdb %zmm2, %xmm2
+; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    vpmovdb %zmm0, %xmm0
+; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT:    vplzcntd %zmm2, %zmm2
+; ALL-NEXT:    vpmovdb %zmm2, %xmm2
+; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
+; ALL-NEXT:    vplzcntd %zmm1, %zmm1
+; ALL-NEXT:    vpmovdb %zmm1, %xmm1
+; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+;
+; AVX512BW-LABEL: testv64i8:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 0)
+  ret <64 x i8> %out
+}
+
+define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT:    vplzcntd %zmm2, %zmm2
+; ALL-NEXT:    vpmovdb %zmm2, %xmm2
+; ALL-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm0, %zmm0
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    vpmovdb %zmm0, %xmm0
+; ALL-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm2, %zmm2
+; ALL-NEXT:    vplzcntd %zmm2, %zmm2
+; ALL-NEXT:    vpmovdb %zmm2, %xmm2
+; ALL-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; ALL-NEXT:    vpmovzxbd %xmm1, %zmm1
+; ALL-NEXT:    vplzcntd %zmm1, %zmm1
+; ALL-NEXT:    vpmovdb %zmm1, %xmm1
+; ALL-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+;
+; AVX512BW-LABEL: testv64i8u:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; AVX512BW-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [24,24,24,24,24,24,24,24,24,24,24,24,24,24,24,24]
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm1, %zmm1
+; AVX512BW-NEXT:    vplzcntd %zmm1, %zmm1
+; AVX512BW-NEXT:    vpmovdb %zmm1, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
+; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX512BW-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm2, %zmm2
+; AVX512BW-NEXT:    vplzcntd %zmm2, %zmm2
+; AVX512BW-NEXT:    vpmovdb %zmm2, %xmm2
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT:    vpmovzxbd %xmm0, %zmm0
+; AVX512BW-NEXT:    vplzcntd %zmm0, %zmm0
+; AVX512BW-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm0, %xmm0
+; AVX512BW-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %out = call <64 x i8> @llvm.ctlz.v64i8(<64 x i8> %in, i1 -1)
+  ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.ctlz.v8i64(<8 x i64>, i1)
+declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1)
+declare <32 x i16> @llvm.ctlz.v32i16(<32 x i16>, i1)
+declare <64 x i8> @llvm.ctlz.v64i8(<64 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-merge-store-fp-constants.ll b/test/CodeGen/X86/vector-merge-store-fp-constants.ll
new file mode 100644
index 000000000000..a6fb32d48a7c
--- /dev/null
+++ b/test/CodeGen/X86/vector-merge-store-fp-constants.ll
@@ -0,0 +1,35 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=DEFAULTCPU -check-prefix=ALL %s
+; RUN: llc -march=x86-64 -mcpu=x86-64 -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefix=X8664CPU -check-prefix=ALL %s
+
+
+; ALL-LABEL: {{^}}merge_8_float_zero_stores:
+
+; DEFAULTCPU-DAG: movq $0, ([[PTR:%[a-z]+]])
+; DEFAULTCPU-DAG: movq $0, 8([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 16([[PTR]])
+; DEFAULTCPU-DAG: movq $0, 24([[PTR]])
+
+; X8664CPU: xorps [[ZEROREG:%xmm[0-9]+]], [[ZEROREG]]
+; X8664CPU-DAG: movups [[ZEROREG]], ([[PTR:%[a-z]+]])
+; X8664CPU-DAG: movups [[ZEROREG]], 16([[PTR:%[a-z]+]])
+
+; ALL: retq
+define void @merge_8_float_zero_stores(float* %ptr) {
+  %idx0 = getelementptr float, float* %ptr, i64 0
+  %idx1 = getelementptr float, float* %ptr, i64 1
+  %idx2 = getelementptr float, float* %ptr, i64 2
+  %idx3 = getelementptr float, float* %ptr, i64 3
+  %idx4 = getelementptr float, float* %ptr, i64 4
+  %idx5 = getelementptr float, float* %ptr, i64 5
+  %idx6 = getelementptr float, float* %ptr, i64 6
+  %idx7 = getelementptr float, float* %ptr, i64 7
+  store float 0.0, float* %idx0, align 4
+  store float 0.0, float* %idx1, align 4
+  store float 0.0, float* %idx2, align 4
+  store float 0.0, float* %idx3, align 4
+  store float 0.0, float* %idx4, align 4
+  store float 0.0, float* %idx5, align 4
+  store float 0.0, float* %idx6, align 4
+  store float 0.0, float* %idx7, align 4
+  ret void
+}
diff --git a/test/CodeGen/X86/vector-popcnt-128.ll b/test/CodeGen/X86/vector-popcnt-128.ll
index fef445de04ab..358bd4018290 100644
--- a/test/CodeGen/X86/vector-popcnt-128.ll
+++ b/test/CodeGen/X86/vector-popcnt-128.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -93,13 +92,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
 ; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in)
   ret <2 x i64> %out
 }
 
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE2-LABEL: testv4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -208,16 +207,16 @@ define <4 x i32> @testv4i32(<4 x i32> %in) {
 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT:    vpsadbw %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
 ; AVX-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; AVX-NEXT:    vpsadbw %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in)
   ret <4 x i32> %out
 }
 
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE2-LABEL: testv8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -316,7 +315,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) {
   ret <8 x i16> %out
 }
 
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE2-LABEL: testv16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -400,7 +399,7 @@ define <16 x i8> @testv16i8(<16 x i8> %in) {
   ret <16 x i8> %out
 }
 
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
 ; SSE-LABEL: foldv2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,64]
@@ -414,7 +413,7 @@ define <2 x i64> @foldv2i64() {
   ret <2 x i64> %out
 }
 
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
 ; SSE-LABEL: foldv4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,32,0,8]
@@ -428,7 +427,7 @@ define <4 x i32> @foldv4i32() {
   ret <4 x i32> %out
 }
 
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
 ; SSE-LABEL: foldv8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [1,16,0,8,0,3,2,3]
@@ -442,7 +441,7 @@ define <8 x i16> @foldv8i16() {
   ret <8 x i16> %out
 }
 
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
 ; SSE-LABEL: foldv16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1]
diff --git a/test/CodeGen/X86/vector-popcnt-256.ll b/test/CodeGen/X86/vector-popcnt-256.ll
index 7ce4f712483a..b0e39bdf49f9 100644
--- a/test/CodeGen/X86/vector-popcnt-256.ll
+++ b/test/CodeGen/X86/vector-popcnt-256.ll
@@ -1,9 +1,8 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -16,14 +15,14 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
 ; AVX1-NEXT:    vpshufb %xmm1, %xmm4, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
 ; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -38,13 +37,13 @@ define <4 x i64> @testv4i64(<4 x i64> %in) {
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
-; AVX2-NEXT:    vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
 ; AVX1-LABEL: testv8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -58,9 +57,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm5 = xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; AVX1-NEXT:    vpsadbw %xmm5, %xmm3, %xmm5
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm5, %xmm5
 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; AVX1-NEXT:    vpsadbw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpackuswb %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm5
 ; AVX1-NEXT:    vpshufb %xmm5, %xmm4, %xmm5
@@ -69,9 +68,9 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
 ; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; AVX1-NEXT:    vpsadbw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; AVX1-NEXT:    vpsadbw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsadbw %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
@@ -88,16 +87,16 @@ define <8 x i32> @testv8i32(<8 x i32> %in) {
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
-; AVX2-NEXT:    vpsadbw %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm2, %ymm2
 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
-; AVX2-NEXT:    vpsadbw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
 ; AVX1-LABEL: testv16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
@@ -142,7 +141,7 @@ define <16 x i16> @testv16i16(<16 x i16> %in) {
   ret <16 x i16> %out
 }
 
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
 ; AVX1-LABEL: testv32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -178,38 +177,38 @@ define <32 x i8> @testv32i8(<32 x i8> %in) {
   ret <32 x i8> %out
 }
 
-define <4 x i64> @foldv4i64() {
-; AVX-LABEL: foldv4i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,64,0,8]
-; AVX-NEXT:    retq
+define <4 x i64> @foldv4i64() nounwind {
+; ALL-LABEL: foldv4i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [1,64,0,8]
+; ALL-NEXT:    retq
   %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @foldv8i32() {
-; AVX-LABEL: foldv8i32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3]
-; AVX-NEXT:    retq
+define <8 x i32> @foldv8i32() nounwind {
+; ALL-LABEL: foldv8i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [1,32,0,8,16,3,2,3]
+; ALL-NEXT:    retq
   %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @foldv16i16() {
-; AVX-LABEL: foldv16i16:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1]
-; AVX-NEXT:    retq
+define <16 x i16> @foldv16i16() nounwind {
+; ALL-LABEL: foldv16i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [1,16,0,8,0,3,2,3,15,7,1,1,1,1,1,1]
+; ALL-NEXT:    retq
   %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>)
   ret <16 x i16> %out
 }
 
-define <32 x i8> @foldv32i8() {
-; AVX-LABEL: foldv32i8:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7]
-; AVX-NEXT:    retq
+define <32 x i8> @foldv32i8() nounwind {
+; ALL-LABEL: foldv32i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [0,8,0,8,0,3,2,3,7,7,1,1,1,1,1,1,1,1,0,0,1,2,3,4,5,6,7,8,2,2,3,7]
+; ALL-NEXT:    retq
   %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>)
   ret <32 x i8> %out
 }
diff --git a/test/CodeGen/X86/vector-popcnt-512.ll b/test/CodeGen/X86/vector-popcnt-512.ll
new file mode 100644
index 000000000000..54b7af6830c0
--- /dev/null
+++ b/test/CodeGen/X86/vector-popcnt-512.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT:    vpextrq $1, %xmm1, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vmovq %xmm1, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm1
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT:    vpextrq $1, %xmm2, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm2, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT:    vpextrq $1, %xmm2, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm2, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT:    vpextrq $1, %xmm0, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm0, %rax
+; ALL-NEXT:    popcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm0
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in)
+  ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT:    vpextrd $1, %xmm1, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm1, %ecx
+; ALL-NEXT:    popcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm2
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; ALL-NEXT:    vpextrd $2, %xmm1, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; ALL-NEXT:    vpextrd $3, %xmm1, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm2, %ecx
+; ALL-NEXT:    popcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm2, %ecx
+; ALL-NEXT:    popcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm2, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm0, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm0, %ecx
+; ALL-NEXT:    popcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm0, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm0, %eax
+; ALL-NEXT:    popcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in)
+  ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vpsllw $8, %ymm0, %ymm3
+; ALL-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
+; ALL-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
+  ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
+  ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
+declare <32 x i16> @llvm.ctpop.v32i16(<32 x i16>)
+declare <64 x i8> @llvm.ctpop.v64i8(<64 x i8>)
diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll
new file mode 100644
index 000000000000..4ad4aa46c5a0
--- /dev/null
+++ b/test/CodeGen/X86/vector-rotate-128.ll
@@ -0,0 +1,1595 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 rotates.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
+
+;
+; Variable Rotates
+;
+
+define <2 x i64> @var_rotate_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE2-LABEL: var_rotate_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [64,64]
+; SSE2-NEXT:    psubq %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    psllq %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psllq %xmm1, %xmm3
+; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlq %xmm3, %xmm1
+; SSE2-NEXT:    psrlq %xmm2, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    orpd %xmm4, %xmm1
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_rotate_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [64,64]
+; SSE41-NEXT:    psubq %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psllq %xmm1, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    psllq %xmm1, %xmm4
+; SSE41-NEXT:    pblendw {{.*#+}} xmm4 = xmm3[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlq %xmm2, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; SSE41-NEXT:    psrlq %xmm2, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    por %xmm4, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_rotate_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlvq %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_rotate_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: var_rotate_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [64,0,64,0]
+; X32-SSE-NEXT:    psubq %xmm1, %xmm2
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psllq %xmm3, %xmm4
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE-NEXT:    psllq %xmm1, %xmm3
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq %xmm3, %xmm1
+; X32-SSE-NEXT:    movq {{.*#+}} xmm2 = xmm2[0],zero
+; X32-SSE-NEXT:    psrlq %xmm2, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    orpd %xmm4, %xmm1
+; X32-SSE-NEXT:    movapd %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %b64 = sub <2 x i64> <i64 64, i64 64>, %b
+  %shl = shl <2 x i64> %a, %b
+  %lshr = lshr <2 x i64> %a, %b64
+  %or = or <2 x i64> %shl, %lshr
+  ret <2 x i64> %or
+}
+
+define <4 x i32> @var_rotate_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
+; SSE2-LABEL: var_rotate_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; SSE2-NEXT:    psubd %xmm1, %xmm2
+; SSE2-NEXT:    pslld $23, %xmm1
+; SSE2-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pmuludq %xmm0, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    psrld %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    psrlq $32, %xmm3
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psrld %xmm3, %xmm5
+; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm5[0],xmm4[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,3,2,3]
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm5 = xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    psrld %xmm5, %xmm6
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; SSE2-NEXT:    psrld %xmm2, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm6 = xmm0[0],xmm6[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_rotate_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; SSE41-NEXT:    psubd %xmm1, %xmm2
+; SSE41-NEXT:    pslld $23, %xmm1
+; SSE41-NEXT:    paddd {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    cvttps2dq %xmm1, %xmm1
+; SSE41-NEXT:    pmulld %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    psrld %xmm3, %xmm4
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psrlq $32, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    psrld %xmm3, %xmm5
+; SSE41-NEXT:    pblendw {{.*#+}} xmm5 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; SSE41-NEXT:    pxor %xmm3, %xmm3
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm4 = xmm2[0],zero,xmm2[1],zero
+; SSE41-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrld %xmm2, %xmm3
+; SSE41-NEXT:    psrld %xmm4, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm5[2,3],xmm0[4,5],xmm5[6,7]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_rotate_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32]
+; AVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddd {{.*}}(%rip), %xmm1, %xmm1
+; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT:    vpmulld %xmm0, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm3 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpsrld %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm4
+; AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm4 = xmm2[2],xmm4[2],xmm2[3],xmm4[3]
+; AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
+; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpsubd %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlvd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_rotate_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotd %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: var_rotate_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32]
+; X32-SSE-NEXT:    psubd %xmm1, %xmm2
+; X32-SSE-NEXT:    pslld $23, %xmm1
+; X32-SSE-NEXT:    paddd .LCPI1_1, %xmm1
+; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm0, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm3, %xmm4
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    psrldq {{.*#+}} xmm3 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psrld %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    psrlq $32, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm5
+; X32-SSE-NEXT:    psrld %xmm3, %xmm5
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm5[0],xmm4[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,3,2,3]
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm5
+; X32-SSE-NEXT:    punpckhdq {{.*#+}} xmm5 = xmm5[2],xmm4[2],xmm5[3],xmm4[3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm6
+; X32-SSE-NEXT:    psrld %xmm5, %xmm6
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1]
+; X32-SSE-NEXT:    psrld %xmm2, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm6 = xmm0[0],xmm6[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %b32 = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %b
+  %shl = shl <4 x i32> %a, %b
+  %lshr = lshr <4 x i32> %a, %b32
+  %or = or <4 x i32> %shl, %lshr
+  ret <4 x i32> %or
+}
+
+define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
+; SSE2-LABEL: var_rotate_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; SSE2-NEXT:    psubw %xmm1, %xmm3
+; SSE2-NEXT:    psllw $12, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psraw $15, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    psllw $8, %xmm4
+; SSE2-NEXT:    pand %xmm2, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm4, %xmm2
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    psraw $15, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    psllw $4, %xmm2
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    por %xmm5, %xmm2
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    psraw $15, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    psllw $2, %xmm2
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    por %xmm5, %xmm2
+; SSE2-NEXT:    paddw %xmm1, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    psllw $1, %xmm2
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    psllw $12, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    paddw %xmm3, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    paddw %xmm3, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    psrlw $2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    paddw %xmm3, %xmm3
+; SSE2-NEXT:    psraw $15, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_rotate_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; SSE41-NEXT:    psubw %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    psllw $12, %xmm0
+; SSE41-NEXT:    psllw $4, %xmm1
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm4
+; SSE41-NEXT:    paddw %xmm4, %xmm4
+; SSE41-NEXT:    movdqa %xmm3, %xmm6
+; SSE41-NEXT:    psllw $8, %xmm6
+; SSE41-NEXT:    movdqa %xmm3, %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm6, %xmm5
+; SSE41-NEXT:    movdqa %xmm5, %xmm1
+; SSE41-NEXT:    psllw $4, %xmm1
+; SSE41-NEXT:    movdqa %xmm4, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa %xmm5, %xmm1
+; SSE41-NEXT:    psllw $2, %xmm1
+; SSE41-NEXT:    paddw %xmm4, %xmm4
+; SSE41-NEXT:    movdqa %xmm4, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa %xmm5, %xmm1
+; SSE41-NEXT:    psllw $1, %xmm1
+; SSE41-NEXT:    paddw %xmm4, %xmm4
+; SSE41-NEXT:    movdqa %xmm4, %xmm0
+; SSE41-NEXT:    pblendvb %xmm1, %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    psllw $12, %xmm0
+; SSE41-NEXT:    psllw $4, %xmm2
+; SSE41-NEXT:    por %xmm0, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm1
+; SSE41-NEXT:    paddw %xmm1, %xmm1
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    psrlw $8, %xmm4
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    pblendvb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm2
+; SSE41-NEXT:    psrlw $4, %xmm2
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm2
+; SSE41-NEXT:    psrlw $2, %xmm2
+; SSE41-NEXT:    paddw %xmm1, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm2
+; SSE41-NEXT:    psrlw $1, %xmm2
+; SSE41-NEXT:    paddw %xmm1, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm3
+; SSE41-NEXT:    por %xmm5, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: var_rotate_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; AVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm3
+; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $2, %xmm1, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $1, %xmm1, %xmm4
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $12, %xmm2, %xmm3
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm3
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpsubw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
+; AVX2-NEXT:    vpshufb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
+; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_rotate_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: var_rotate_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; X32-SSE-NEXT:    psubw %xmm1, %xmm3
+; X32-SSE-NEXT:    psllw $12, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $8, %xmm4
+; X32-SSE-NEXT:    pand %xmm2, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    por %xmm4, %xmm2
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE-NEXT:    psraw $15, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm2, %xmm5
+; X32-SSE-NEXT:    psllw $4, %xmm2
+; X32-SSE-NEXT:    pand %xmm4, %xmm2
+; X32-SSE-NEXT:    por %xmm5, %xmm2
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE-NEXT:    psraw $15, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm2, %xmm5
+; X32-SSE-NEXT:    psllw $2, %xmm2
+; X32-SSE-NEXT:    pand %xmm4, %xmm2
+; X32-SSE-NEXT:    por %xmm5, %xmm2
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE-NEXT:    pandn %xmm2, %xmm4
+; X32-SSE-NEXT:    psllw $1, %xmm2
+; X32-SSE-NEXT:    pand %xmm1, %xmm2
+; X32-SSE-NEXT:    psllw $12, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    psraw $15, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE-NEXT:    pandn %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
+  %b16 = sub <8 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
+  %shl = shl <8 x i16> %a, %b
+  %lshr = lshr <8 x i16> %a, %b16
+  %or = or <8 x i16> %shl, %lshr
+  ret <8 x i16> %or
+}
+
+define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
+; SSE2-LABEL: var_rotate_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; SSE2-NEXT:    psubb %xmm1, %xmm4
+; SSE2-NEXT:    psllw $5, %xmm1
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psllw $4, %xmm5
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm5
+; SSE2-NEXT:    pand %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm5, %xmm2
+; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm6
+; SSE2-NEXT:    psllw $2, %xmm2
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    por %xmm6, %xmm2
+; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    pandn %xmm2, %xmm1
+; SSE2-NEXT:    paddb %xmm2, %xmm2
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    psllw $5, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    psrlw $2, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm3
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: var_rotate_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; SSE41-NEXT:    psubb %xmm3, %xmm2
+; SSE41-NEXT:    psllw $5, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    psllw $4, %xmm5
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT:    movdqa %xmm1, %xmm4
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm5, %xmm4
+; SSE41-NEXT:    movdqa %xmm4, %xmm5
+; SSE41-NEXT:    psllw $2, %xmm5
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT:    paddb %xmm3, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm5, %xmm4
+; SSE41-NEXT:    movdqa %xmm4, %xmm5
+; SSE41-NEXT:    paddb %xmm5, %xmm5
+; SSE41-NEXT:    paddb %xmm3, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm5, %xmm4
+; SSE41-NEXT:    psllw $5, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    paddb %xmm3, %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm5
+; SSE41-NEXT:    psrlw $4, %xmm5
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm5
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    pblendvb %xmm5, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    psrlw $2, %xmm2
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    psrlw $1, %xmm2
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    paddb %xmm3, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    pblendvb %xmm2, %xmm1
+; SSE41-NEXT:    por %xmm4, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: var_rotate_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX-NEXT:    vpsubb %xmm1, %xmm2, %xmm2
+; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsllw $4, %xmm0, %xmm3
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm3
+; AVX-NEXT:    vpsllw $2, %xmm3, %xmm4
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm4, %xmm4
+; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm4, %xmm3, %xmm3
+; AVX-NEXT:    vpaddb %xmm3, %xmm3, %xmm4
+; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm4, %xmm3, %xmm1
+; AVX-NEXT:    vpsllw $5, %xmm2, %xmm2
+; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm4
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm4, %xmm4
+; AVX-NEXT:    vpblendvb %xmm2, %xmm4, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpaddb %xmm3, %xmm3, %xmm3
+; AVX-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: var_rotate_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: var_rotate_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; X32-SSE-NEXT:    psubb %xmm1, %xmm4
+; X32-SSE-NEXT:    psllw $5, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm5
+; X32-SSE-NEXT:    psllw $4, %xmm5
+; X32-SSE-NEXT:    pand .LCPI3_1, %xmm5
+; X32-SSE-NEXT:    pand %xmm2, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    por %xmm5, %xmm2
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm2, %xmm6
+; X32-SSE-NEXT:    psllw $2, %xmm2
+; X32-SSE-NEXT:    pand .LCPI3_2, %xmm2
+; X32-SSE-NEXT:    pand %xmm5, %xmm2
+; X32-SSE-NEXT:    por %xmm6, %xmm2
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm1
+; X32-SSE-NEXT:    pandn %xmm2, %xmm1
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pand %xmm5, %xmm2
+; X32-SSE-NEXT:    psllw $5, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm0, %xmm6
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_3, %xmm0
+; X32-SSE-NEXT:    pand %xmm5, %xmm0
+; X32-SSE-NEXT:    por %xmm6, %xmm0
+; X32-SSE-NEXT:    paddb %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm0, %xmm6
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_4, %xmm0
+; X32-SSE-NEXT:    pand %xmm5, %xmm0
+; X32-SSE-NEXT:    por %xmm6, %xmm0
+; X32-SSE-NEXT:    paddb %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_5, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
+  %b8 = sub <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
+  %shl = shl <16 x i8> %a, %b
+  %lshr = lshr <16 x i8> %a, %b8
+  %or = or <16 x i8> %shl, %lshr
+  ret <16 x i8> %or
+}
+
+;
+; Constant Rotates
+;
+
+define <2 x i64> @constant_rotate_v2i64(<2 x i64> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v2i64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psllq $14, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psllq $4, %xmm1
+; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlq $50, %xmm1
+; SSE2-NEXT:    psrlq $60, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    orpd %xmm2, %xmm1
+; SSE2-NEXT:    movapd %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_rotate_v2i64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psllq $14, %xmm1
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psllq $4, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlq $50, %xmm1
+; SSE41-NEXT:    psrlq $60, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    por %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_rotate_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq $14, %xmm0, %xmm1
+; AVX1-NEXT:    vpsllq $4, %xmm0, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    vpsrlq $50, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrlq $60, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlq %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_rotate_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psllq $14, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllq $4, %xmm1
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq $50, %xmm1
+; X32-SSE-NEXT:    psrlq $60, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    orpd %xmm2, %xmm1
+; X32-SSE-NEXT:    movapd %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <2 x i64> %a, <i64 4, i64 14>
+  %lshr = lshr <2 x i64> %a, <i64 60, i64 50>
+  %or = or <2 x i64> %shl, %lshr
+  ret <2 x i64> %or
+}
+
+define <4 x i32> @constant_rotate_v4i32(<4 x i32> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v4i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrld $25, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psrld $27, %xmm3
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psrld $26, %xmm3
+; SSE2-NEXT:    psrld $28, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_rotate_v4i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; SSE41-NEXT:    pmulld %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psrld $25, %xmm2
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrld $27, %xmm3
+; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psrld $26, %xmm2
+; SSE41-NEXT:    psrld $28, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_rotate_v4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpsrld $25, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrld $27, %xmm0, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsrld $26, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_rotate_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    pmuludq %xmm1, %xmm2
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm1, %xmm3
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrld $25, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE-NEXT:    psrld $27, %xmm3
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm3[0],xmm1[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE-NEXT:    psrld $26, %xmm3
+; X32-SSE-NEXT:    psrld $28, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm0[0],xmm3[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm3[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
+  %lshr = lshr <4 x i32> %a, <i32 28, i32 27, i32 26, i32 25>
+  %or = or <4 x i32> %shl, %lshr
+  ret <4 x i32> %or
+}
+
+define <8 x i16> @constant_rotate_v8i16(<8 x i16> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v8i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE2-NEXT:    pmullw %xmm0, %xmm2
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psrlw $8, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,0,0,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    psrlw $2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pand %xmm3, %xmm1
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_rotate_v8i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; SSE41-NEXT:    pmullw %xmm1, %xmm2
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $8, %xmm3
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [256,61680,57568,53456,49344,45232,41120,37008]
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $4, %xmm3
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [512,57824,49600,41376,33152,24928,16704,8480]
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $2, %xmm3
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [1024,50112,33664,17216,768,49856,33408,16960]
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $1, %xmm3
+; SSE41-NEXT:    movaps {{.*#+}} xmm0 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: constant_rotate_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [256,61680,57568,53456,49344,45232,41120,37008]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [512,57824,49600,41376,33152,24928,16704,8480]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1024,50112,33664,17216,768,49856,33408,16960]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: constant_rotate_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubw {{.*}}(%rip), %xmm2, %xmm2
+; XOP-NEXT:    vpshlw %xmm2, %xmm0, %xmm0
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_rotate_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128]
+; X32-SSE-NEXT:    pmullw %xmm0, %xmm2
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,65535]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,0,0,0]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,65535,65535,0,0,65535,65535,0]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    pand %xmm3, %xmm1
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    por %xmm2, %xmm3
+; X32-SSE-NEXT:    por %xmm3, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+  %lshr = lshr <8 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9>
+  %or = or <8 x i16> %shl, %lshr
+  ret <8 x i16> %or
+}
+
+define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind {
+; SSE2-LABEL: constant_rotate_v16i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; SSE2-NEXT:    psllw $5, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm4
+; SSE2-NEXT:    psllw $4, %xmm4
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm4
+; SSE2-NEXT:    pand %xmm1, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm1
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    psllw $2, %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    por %xmm5, %xmm1
+; SSE2-NEXT:    paddb %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    paddb %xmm1, %xmm1
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; SSE2-NEXT:    psllw $5, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    psrlw $2, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    paddb %xmm4, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: constant_rotate_v16i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; SSE41-NEXT:    psllw $5, %xmm0
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psllw $4, %xmm3
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psllw $2, %xmm3
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    paddb %xmm3, %xmm3
+; SSE41-NEXT:    paddb %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; SSE41-NEXT:    psllw $5, %xmm0
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $4, %xmm3
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $2, %xmm3
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    psrlw $1, %xmm3
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm3
+; SSE41-NEXT:    paddb %xmm0, %xmm0
+; SSE41-NEXT:    pblendvb %xmm3, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: constant_rotate_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm2
+; AVX-NEXT:    vpsllw $2, %xmm2, %xmm3
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm3, %xmm2, %xmm2
+; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm3
+; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpblendvb %xmm1, %xmm3, %xmm2, %xmm1
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX-NEXT:    vpsllw $5, %xmm2, %xmm2
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm3, %xmm3
+; AVX-NEXT:    vpaddb %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: constant_rotate_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm1
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubb {{.*}}(%rip), %xmm2, %xmm2
+; XOP-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
+; XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_rotate_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; X32-SSE-NEXT:    psllw $5, %xmm3
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm1, %xmm1
+; X32-SSE-NEXT:    pcmpgtb %xmm3, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $4, %xmm4
+; X32-SSE-NEXT:    pand .LCPI7_1, %xmm4
+; X32-SSE-NEXT:    pand %xmm1, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm1
+; X32-SSE-NEXT:    por %xmm4, %xmm1
+; X32-SSE-NEXT:    paddb %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtb %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm1, %xmm5
+; X32-SSE-NEXT:    psllw $2, %xmm1
+; X32-SSE-NEXT:    pand .LCPI7_2, %xmm1
+; X32-SSE-NEXT:    pand %xmm4, %xmm1
+; X32-SSE-NEXT:    por %xmm5, %xmm1
+; X32-SSE-NEXT:    paddb %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtb %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm3
+; X32-SSE-NEXT:    pandn %xmm1, %xmm3
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pand %xmm4, %xmm1
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm4 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; X32-SSE-NEXT:    psllw $5, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm0, %xmm6
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_4, %xmm0
+; X32-SSE-NEXT:    pand %xmm5, %xmm0
+; X32-SSE-NEXT:    por %xmm6, %xmm0
+; X32-SSE-NEXT:    paddb %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm0, %xmm6
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_5, %xmm0
+; X32-SSE-NEXT:    pand %xmm5, %xmm0
+; X32-SSE-NEXT:    por %xmm6, %xmm0
+; X32-SSE-NEXT:    paddb %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtb %xmm4, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_6, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
+  %lshr = lshr <16 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %or = or <16 x i8> %shl, %lshr
+  ret <16 x i8> %or
+}
+
+;
+; Uniform Constant Rotates
+;
+
+define <2 x i64> @splatconstant_rotate_v2i64(<2 x i64> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllq $14, %xmm1
+; SSE-NEXT:    psrlq $50, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllq $14, %xmm0, %xmm1
+; AVX-NEXT:    vpsrlq $50, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotq $14, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllq $14, %xmm1
+; X32-SSE-NEXT:    psrlq $50, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <2 x i64> %a, <i64 14, i64 14>
+  %lshr = lshr <2 x i64> %a, <i64 50, i64 50>
+  %or = or <2 x i64> %shl, %lshr
+  ret <2 x i64> %or
+}
+
+define <4 x i32> @splatconstant_rotate_v4i32(<4 x i32> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pslld $4, %xmm1
+; SSE-NEXT:    psrld $28, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslld $4, %xmm0, %xmm1
+; AVX-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    pslld $4, %xmm1
+; X32-SSE-NEXT:    psrld $28, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+  %lshr = lshr <4 x i32> %a, <i32 28, i32 28, i32 28, i32 28>
+  %or = or <4 x i32> %shl, %lshr
+  ret <4 x i32> %or
+}
+
+define <8 x i16> @splatconstant_rotate_v8i16(<8 x i16> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllw $7, %xmm1
+; SSE-NEXT:    psrlw $9, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw $7, %xmm0, %xmm1
+; AVX-NEXT:    vpsrlw $9, %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotw $7, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllw $7, %xmm1
+; X32-SSE-NEXT:    psrlw $9, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %lshr = lshr <8 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+  %or = or <8 x i16> %shl, %lshr
+  ret <8 x i16> %or
+}
+
+define <16 x i8> @splatconstant_rotate_v16i8(<16 x i8> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllw $4, %xmm1
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psrlw $4, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllw $4, %xmm1
+; X32-SSE-NEXT:    pand .LCPI11_0, %xmm1
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %lshr = lshr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %or = or <16 x i8> %shl, %lshr
+  ret <16 x i8> %or
+}
+
+;
+; Masked Uniform Constant Rotates
+;
+
+define <2 x i64> @splatconstant_rotate_mask_v2i64(<2 x i64> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllq $15, %xmm1
+; SSE-NEXT:    psrlq $49, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllq $15, %xmm0, %xmm1
+; AVX-NEXT:    vpsrlq $49, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotq $15, %xmm0, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllq $15, %xmm1
+; X32-SSE-NEXT:    psrlq $49, %xmm0
+; X32-SSE-NEXT:    pand .LCPI12_0, %xmm0
+; X32-SSE-NEXT:    pand .LCPI12_1, %xmm1
+; X32-SSE-NEXT:    por %xmm0, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <2 x i64> %a, <i64 15, i64 15>
+  %lshr = lshr <2 x i64> %a, <i64 49, i64 49>
+  %rmask = and <2 x i64> %lshr, <i64 255, i64 127>
+  %lmask = and <2 x i64> %shl, <i64 65, i64 33>
+  %or = or <2 x i64> %lmask, %rmask
+  ret <2 x i64> %or
+}
+
+define <4 x i32> @splatconstant_rotate_mask_v4i32(<4 x i32> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    pslld $4, %xmm1
+; SSE-NEXT:    psrld $28, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpslld $4, %xmm0, %xmm1
+; AVX-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    pslld $4, %xmm1
+; X32-SSE-NEXT:    psrld $28, %xmm0
+; X32-SSE-NEXT:    pand .LCPI13_0, %xmm0
+; X32-SSE-NEXT:    pand .LCPI13_1, %xmm1
+; X32-SSE-NEXT:    por %xmm0, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <4 x i32> %a, <i32 4, i32 4, i32 4, i32 4>
+  %lshr = lshr <4 x i32> %a, <i32 28, i32 28, i32 28, i32 28>
+  %rmask = and <4 x i32> %lshr, <i32 127, i32 255, i32 511, i32 1023>
+  %lmask = and <4 x i32> %shl, <i32 1023, i32 511, i32 255, i32 127>
+  %or = or <4 x i32> %lmask, %rmask
+  ret <4 x i32> %or
+}
+
+define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v8i16:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllw $5, %xmm1
+; SSE-NEXT:    psrlw $11, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v8i16:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw $5, %xmm0, %xmm1
+; AVX-NEXT:    vpsrlw $11, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotw $5, %xmm0, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllw $5, %xmm1
+; X32-SSE-NEXT:    psrlw $11, %xmm0
+; X32-SSE-NEXT:    pand .LCPI14_0, %xmm0
+; X32-SSE-NEXT:    pand .LCPI14_1, %xmm1
+; X32-SSE-NEXT:    por %xmm0, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %lshr = lshr <8 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %rmask = and <8 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
+  %lmask = and <8 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
+  %or = or <8 x i16> %lmask, %rmask
+  ret <8 x i16> %or
+}
+
+define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind {
+; SSE-LABEL: splatconstant_rotate_mask_v16i8:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa %xmm0, %xmm1
+; SSE-NEXT:    psllw $4, %xmm1
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    psrlw $4, %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: splatconstant_rotate_mask_v16i8:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpsllw $4, %xmm0, %xmm1
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_rotate_mask_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOP-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_rotate_mask_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllw $4, %xmm1
+; X32-SSE-NEXT:    pand .LCPI15_0, %xmm1
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_3, %xmm1
+; X32-SSE-NEXT:    por %xmm0, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
+  %shl = shl <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %lshr = lshr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %rmask = and <16 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
+  %lmask = and <16 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+  %or = or <16 x i8> %lmask, %rmask
+  ret <16 x i8> %or
+}
diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll
new file mode 100644
index 000000000000..379b5fcb635f
--- /dev/null
+++ b/test/CodeGen/X86/vector-rotate-256.ll
@@ -0,0 +1,1089 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+
+;
+; Variable Rotates
+;
+
+define <4 x i64> @var_rotate_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; AVX1-LABEL: var_rotate_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [64,64]
+; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpsllq %xmm4, %xmm5, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
+; AVX1-NEXT:    vpsllq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm6
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm6[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm5, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm4[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm4[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubq %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlvq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_rotate_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vprotq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vprotq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_rotate_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vprotq %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vprotq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %b64 = sub <4 x i64> <i64 64, i64 64, i64 64, i64 64>, %b
+  %shl = shl <4 x i64> %a, %b
+  %lshr = lshr <4 x i64> %a, %b64
+  %or = or <4 x i64> %shl, %lshr
+  ret <4 x i64> %or
+}
+
+define <8 x i32> @var_rotate_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
+; AVX1-LABEL: var_rotate_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32]
+; AVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpsubd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpslld $23, %xmm4, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
+; AVX1-NEXT:    vpaddd %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vcvttps2dq %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpmulld %xmm6, %xmm4, %xmm4
+; AVX1-NEXT:    vpslld $23, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddd %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vcvttps2dq %xmm1, %xmm1
+; AVX1-NEXT:    vpmulld %xmm0, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm3[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpsrld %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpsrlq $32, %xmm3, %xmm5
+; AVX1-NEXT:    vpsrld %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm5[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpxor %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm7 = xmm3[2],xmm5[2],xmm3[3],xmm5[3]
+; AVX1-NEXT:    vpsrld %xmm7, %xmm6, %xmm7
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
+; AVX1-NEXT:    vpsrld %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7]
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpsrld %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpsrlq $32, %xmm2, %xmm6
+; AVX1-NEXT:    vpsrld %xmm6, %xmm0, %xmm6
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm6[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; AVX1-NEXT:    vpsrld %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
+; AVX1-NEXT:    vpsrld %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm5[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3],xmm0[4,5],xmm4[6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubd %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_rotate_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vprotd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vprotd %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_rotate_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vprotd %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vprotd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %b32 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>, %b
+  %shl = shl <8 x i32> %a, %b
+  %lshr = lshr <8 x i32> %a, %b32
+  %or = or <8 x i32> %shl, %lshr
+  ret <8 x i32> %or
+}
+
+define <16 x i16> @var_rotate_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
+; AVX1-LABEL: var_rotate_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [16,16,16,16,16,16,16,16]
+; AVX1-NEXT:    vpsubw %xmm1, %xmm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpsubw %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $12, %xmm4, %xmm5
+; AVX1-NEXT:    vpsllw $4, %xmm4, %xmm4
+; AVX1-NEXT:    vpor %xmm5, %xmm4, %xmm5
+; AVX1-NEXT:    vpaddw %xmm5, %xmm5, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpsllw $8, %xmm4, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm7, %xmm4, %xmm5
+; AVX1-NEXT:    vpsllw $4, %xmm5, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpsllw $2, %xmm5, %xmm7
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpsllw $1, %xmm5, %xmm7
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpsllw $12, %xmm1, %xmm6
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpor %xmm6, %xmm1, %xmm1
+; AVX1-NEXT:    vpaddw %xmm1, %xmm1, %xmm6
+; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm7, %xmm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $2, %xmm1, %xmm7
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vpsllw $1, %xmm1, %xmm7
+; AVX1-NEXT:    vpaddw %xmm6, %xmm6, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm6, %xmm7, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm1, %ymm1
+; AVX1-NEXT:    vpsllw $12, %xmm3, %xmm5
+; AVX1-NEXT:    vpsllw $4, %xmm3, %xmm3
+; AVX1-NEXT:    vpor %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddw %xmm3, %xmm3, %xmm5
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm6
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm6, %xmm4, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm3, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsrlw $2, %xmm3, %xmm4
+; AVX1-NEXT:    vpaddw %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm4
+; AVX1-NEXT:    vpaddw %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpsllw $12, %xmm2, %xmm4
+; AVX1-NEXT:    vpsllw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddw %xmm2, %xmm2, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX1-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX1-NEXT:    vpaddw %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX2-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpxor %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm1[4],ymm3[4],ymm1[5],ymm3[5],ymm1[6],ymm3[6],ymm1[7],ymm3[7],ymm1[12],ymm3[12],ymm1[13],ymm3[13],ymm1[14],ymm3[14],ymm1[15],ymm3[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsllvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm3[0],ymm1[1],ymm3[1],ymm1[2],ymm3[2],ymm1[3],ymm3[3],ymm1[8],ymm3[8],ymm1[9],ymm3[9],ymm1[10],ymm3[10],ymm1[11],ymm3[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX2-NEXT:    vpackusdw %ymm4, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm2[4],ymm3[4],ymm2[5],ymm3[5],ymm2[6],ymm3[6],ymm2[7],ymm3[7],ymm2[12],ymm3[12],ymm2[13],ymm3[13],ymm2[14],ymm3[14],ymm2[15],ymm3[15]
+; AVX2-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm3[0],ymm2[1],ymm3[1],ymm2[2],ymm3[2],ymm2[3],ymm3[3],ymm2[8],ymm3[8],ymm2[9],ymm3[9],ymm2[10],ymm3[10],ymm2[11],ymm3[11]
+; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_rotate_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vprotw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vprotw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_rotate_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vprotw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vprotw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %b16 = sub <16 x i16> <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>, %b
+  %shl = shl <16 x i16> %a, %b
+  %lshr = lshr <16 x i16> %a, %b16
+  %or = or <16 x i16> %shl, %lshr
+  ret <16 x i16> %or
+}
+
+define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
+; AVX1-LABEL: var_rotate_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX1-NEXT:    vpsubb %xmm1, %xmm3, %xmm8
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpsubb %xmm4, %xmm3, %xmm9
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpsllw $4, %xmm5, %xmm6
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm7, %xmm6, %xmm6
+; AVX1-NEXT:    vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm6, %xmm5, %xmm6
+; AVX1-NEXT:    vpsllw $2, %xmm6, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm6
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm6, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpand %xmm7, %xmm4, %xmm4
+; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm0, %xmm4
+; AVX1-NEXT:    vpsllw $2, %xmm4, %xmm6
+; AVX1-NEXT:    vpand %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpaddb %xmm3, %xmm3, %xmm4
+; AVX1-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendvb %xmm1, %xmm4, %xmm3, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlw $4, %xmm5, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $5, %xmm9, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT:    vpand %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsllw $5, %xmm8, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm7, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_rotate_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
+; AVX2-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm3
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm3
+; AVX2-NEXT:    vpsllw $2, %ymm3, %ymm4
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm4, %ymm3, %ymm3
+; AVX2-NEXT:    vpaddb %ymm3, %ymm3, %ymm4
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm4, %ymm3, %ymm1
+; AVX2-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm3
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm4
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm4, %ymm4
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm3, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_rotate_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vprotb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vprotb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_rotate_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vprotb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vprotb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %b8 = sub <32 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %b
+  %shl = shl <32 x i8> %a, %b
+  %lshr = lshr <32 x i8> %a, %b8
+  %or = or <32 x i8> %shl, %lshr
+  ret <32 x i8> %or
+}
+
+;
+; Constant Rotates
+;
+
+define <4 x i64> @constant_rotate_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllq $60, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllq $50, %xmm1, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsllq $14, %xmm0, %xmm3
+; AVX1-NEXT:    vpsllq $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vpsrlq $2, %xmm1, %xmm3
+; AVX1-NEXT:    vpsrlq $14, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpsrlq $50, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrlq $60, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpshlq %xmm4, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpshlq %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <4 x i64> %a, <i64 4, i64 14, i64 50, i64 60>
+  %lshr = lshr <4 x i64> %a, <i64 60, i64 50, i64 14, i64 2>
+  %or = or <4 x i64> %shl, %lshr
+  ret <4 x i64> %or
+}
+
+define <8 x i32> @constant_rotate_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrld $21, %xmm2, %xmm3
+; AVX1-NEXT:    vpsrld $23, %xmm2, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpsrld $22, %xmm2, %xmm4
+; AVX1-NEXT:    vpsrld $24, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
+; AVX1-NEXT:    vpsrld $25, %xmm0, %xmm3
+; AVX1-NEXT:    vpsrld $27, %xmm0, %xmm4
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpsrld $26, %xmm0, %xmm4
+; AVX1-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm4[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm3[2,3],xmm0[4,5],xmm3[6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %lshr = lshr <8 x i32> %a, <i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21>
+  %or = or <8 x i32> %shl, %lshr
+  ret <8 x i32> %or
+}
+
+define <16 x i16> @constant_rotate_v8i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v8i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [32896,28784,24672,20560,16448,12336,8224,4112]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [256,57568,49344,41120,32896,24672,16448,8224]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [512,49600,33152,16704,256,49344,32896,16448]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $1, %xmm2, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1024,33664,768,33408,512,33152,256,32896]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [256,61680,57568,53456,49344,45232,41120,37008]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [512,57824,49600,41376,33152,24928,16704,8480]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [1024,50112,33664,17216,768,49856,33408,16960]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2048,34688,1792,34432,1536,34176,1280,33920]
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v8i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm1
+; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm4
+; AVX2-NEXT:    vpsrld $16, %ymm4, %ymm4
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX2-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX2-NEXT:    vpackusdw %ymm4, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v8i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm3, %xmm4
+; XOPAVX1-NEXT:    vpshlw %xmm4, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpshlw %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v8i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm1
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm2, %xmm3
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshlw %xmm3, %xmm4, %xmm3
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpshlw %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  %lshr = lshr <16 x i16> %a, <i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1>
+  %or = or <16 x i16> %shl, %lshr
+  ret <16 x i16> %or
+}
+
+define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: constant_rotate_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm8, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX1-NEXT:    vpsllw $5, %xmm4, %xmm4
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpsllw $2, %xmm2, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpaddb %xmm2, %xmm2, %xmm5
+; AVX1-NEXT:    vpaddb %xmm7, %xmm7, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm5
+; AVX1-NEXT:    vpand %xmm8, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm4, %xmm5, %xmm0, %xmm4
+; AVX1-NEXT:    vpsllw $2, %xmm4, %xmm5
+; AVX1-NEXT:    vpand %xmm6, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpaddb %xmm4, %xmm4, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm3, %xmm5, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm9
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX1-NEXT:    vpsllw $5, %xmm5, %xmm5
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $2, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm5, %xmm5, %xmm7
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpaddb %xmm7, %xmm7, %xmm2
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm8, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm5, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm6, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm7, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $1, %xmm0, %xmm3
+; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendvb %xmm2, %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm9, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_rotate_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpsllw $2, %ymm2, %ymm3
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm3
+; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendvb %ymm1, %ymm3, %ymm2, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1,0,1,2,3,4,5,6,7]
+; AVX2-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm3
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $2, %ymm0, %ymm3
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $1, %ymm0, %ymm3
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm3, %ymm3
+; AVX2-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_rotate_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm3
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_rotate_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,8,7,6,5,4,3,2,1]
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm2, %xmm3
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm3, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>
+  %lshr = lshr <32 x i8> %a, <i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+  %or = or <32 x i8> %shl, %lshr
+  ret <32 x i8> %or
+}
+
+;
+; Uniform Constant Rotates
+;
+
+define <4 x i64> @splatconstant_rotate_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq $14, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllq $14, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlq $50, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlq $50, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllq $14, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlq $50, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotq $14, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotq $14, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotq $14, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotq $14, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <4 x i64> %a, <i64 14, i64 14, i64 14, i64 14>
+  %lshr = lshr <4 x i64> %a, <i64 50, i64 50, i64 50, i64 50>
+  %or = or <4 x i64> %shl, %lshr
+  ret <4 x i64> %or
+}
+
+define <8 x i32> @splatconstant_rotate_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld $4, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpslld $4, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrld $28, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpslld $4, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrld $28, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotd $4, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotd $4, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  %lshr = lshr <8 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
+  %or = or <8 x i32> %shl, %lshr
+  ret <8 x i32> %or
+}
+
+define <16 x i16> @splatconstant_rotate_v16i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllw $7, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlw $9, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $9, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlw $9, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotw $7, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotw $7, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotw $7, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotw $7, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <16 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %lshr = lshr <16 x i16> %a, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+  %or = or <16 x i16> %shl, %lshr
+  ret <16 x i16> %or
+}
+
+define <32 x i8> @splatconstant_rotate_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vorps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotb $4, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotb $4, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %or = or <32 x i8> %shl, %lshr
+  ret <32 x i8> %or
+}
+
+;
+; Masked Uniform Constant Rotates
+;
+
+define <4 x i64> @splatconstant_rotate_mask_v4i64(<4 x i64> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllq $15, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllq $15, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlq $49, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlq $49, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllq $15, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlq $49, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotq $15, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotq $15, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotq $15, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotq $15, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <4 x i64> %a, <i64 15, i64 15, i64 15, i64 15>
+  %lshr = lshr <4 x i64> %a, <i64 49, i64 49, i64 49, i64 49>
+  %rmask = and <4 x i64> %lshr, <i64 255, i64 127, i64 127, i64 255>
+  %lmask = and <4 x i64> %shl, <i64 33, i64 65, i64 129, i64 257>
+  %or = or <4 x i64> %lmask, %rmask
+  ret <4 x i64> %or
+}
+
+define <8 x i32> @splatconstant_rotate_mask_v8i32(<8 x i32> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v8i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpslld $4, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpslld $4, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrld $28, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrld $28, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v8i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpslld $4, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrld $28, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotd $4, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotd $4, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotd $4, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <8 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  %lshr = lshr <8 x i32> %a, <i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28, i32 28>
+  %rmask = and <8 x i32> %lshr, <i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511>
+  %lmask = and <8 x i32> %shl, <i32 511, i32 255, i32 127, i32 63, i32 31, i32 15, i32 7, i32 3>
+  %or = or <8 x i32> %lmask, %rmask
+  ret <8 x i32> %or
+}
+
+define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v16i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpsllw $5, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT:    vpsrlw $11, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $11, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm1, %ymm1
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v16i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm1
+; AVX2-NEXT:    vpsrlw $11, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotw $5, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotw $5, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotw $5, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotw $5, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <16 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  %lshr = lshr <16 x i16> %a, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %rmask = and <16 x i16> %lshr, <i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55, i16 55>
+  %lmask = and <16 x i16> %shl, <i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33, i16 33>
+  %or = or <16 x i16> %lmask, %rmask
+  ret <16 x i16> %or
+}
+
+define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind {
+; AVX1-LABEL: splatconstant_rotate_mask_v32i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpsllw $4, %xmm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $4, %xmm0, %xmm4
+; AVX1-NEXT:    vpand %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm2, %ymm1
+; AVX1-NEXT:    vorps %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splatconstant_rotate_mask_v32i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsllw $4, %ymm0, %ymm1
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
+; AVX2-NEXT:    vpor %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_rotate_mask_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vprotb $4, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_rotate_mask_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vprotb $4, %xmm0, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; XOPAVX2-NEXT:    vprotb $4, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+  %shl = shl <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %lshr = lshr <32 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+  %rmask = and <32 x i8> %lshr, <i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55, i8 55>
+  %lmask = and <32 x i8> %shl, <i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33, i8 33>
+  %or = or <32 x i8> %lmask, %rmask
+  ret <32 x i8> %or
+}
diff --git a/test/CodeGen/X86/vector-sext.ll b/test/CodeGen/X86/vector-sext.ll
index 8e79493ddd07..b63c3f084b22 100644
--- a/test/CodeGen/X86/vector-sext.ll
+++ b/test/CodeGen/X86/vector-sext.ll
@@ -1,19 +1,348 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
 ;
 ; Just one 32-bit run to make sure we do reasonable things there.
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mcpu=i686 -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41
+
+define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_16i8_to_8i16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_8i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %C = sext <8 x i8> %B to <8 x i16>
+  ret <8 x i16> %C
+}
+
+define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    psraw $8, %xmm2
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT:    psraw $8, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    psraw $8, %xmm2
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSSE3-NEXT:    psraw $8, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovsxbw %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_16i8_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_16i8_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_16i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = sext <16 x i8> %A to <16 x i16>
+  ret <16 x i16> %B
+}
+
+define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_16i8_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_4i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = sext <4 x i8> %B to <4 x i32>
+  ret <4 x i32> %C
+}
+
+define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    psrad $24, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    psrad $24, %xmm1
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    psrad $24, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    psrad $24, %xmm1
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbd %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_16i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxbd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_16i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT:    vpslld $24, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrad $24, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_8i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE41-NEXT:    pmovsxbd %xmm0, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %C = sext <8 x i8> %B to <8 x i32>
+  ret <8 x i32> %C
+}
+
+define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_16i8_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %C = sext <2 x i8> %B to <2 x i64>
+  ret <2 x i64> %C
+}
+
+define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_16i8_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $24, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    psrld $16, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm0
+; SSE2-NEXT:    psrad $24, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_16i8_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $24, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    psrld $16, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    psrad $31, %xmm0
+; SSSE3-NEXT:    psrad $24, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_16i8_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbq %xmm0, %xmm2
+; SSE41-NEXT:    psrld $16, %xmm0
+; SSE41-NEXT:    pmovsxbq %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_16i8_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbq %xmm0, %xmm1
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxbq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_16i8_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpslld $24, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_16i8_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm2
+; X32-SSE41-NEXT:    psrld $16, %xmm0
+; X32-SSE41-NEXT:    pmovsxbq %xmm0, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = sext <4 x i8> %B to <4 x i64>
+  ret <4 x i64> %C
+}
+
+define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_8i16_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_8i16_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_8i16_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_4i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxwd %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = sext <4 x i16> %B to <4 x i32>
+  ret <4 x i32> %C
+}
 
 define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: sext_8i16_to_8i32:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSE2-NEXT:    psrad $16, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSE2-NEXT:    psrad $16, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    retq
@@ -22,8 +351,7 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
 ; SSSE3-NEXT:    psrad $16, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; SSSE3-NEXT:    psrad $16, %xmm1
 ; SSSE3-NEXT:    movdqa %xmm2, %xmm0
 ; SSSE3-NEXT:    retq
@@ -58,7 +386,151 @@ define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
 ; X32-SSE41-NEXT:    retl
 entry:
   %B = sext <8 x i16> %A to <8 x i32>
-  ret <8 x i32>%B
+  ret <8 x i32> %B
+}
+
+define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_8i16_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_8i16_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_8i16_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %C = sext <2 x i16> %B to <2 x i64>
+  ret <2 x i64> %C
+}
+
+define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_8i16_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_8i16_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $16, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    psrad $31, %xmm0
+; SSSE3-NEXT:    psrad $16, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_8i16_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwq %xmm0, %xmm2
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovsxwq %xmm0, %xmm1
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: sext_8i16_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovsxwq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: sext_8i16_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT:    vpslld $16, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_8i16_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm2
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-SSE41-NEXT:    pmovsxwq %xmm0, %xmm1
+; X32-SSE41-NEXT:    movdqa %xmm2, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = sext <4 x i16> %B to <4 x i64>
+  ret <4 x i64> %C
+}
+
+define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: sext_4i32_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: sext_4i32_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: sext_4i32_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxdq %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: sext_4i32_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: sext_4i32_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pmovsxdq %xmm0, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %C = sext <2 x i32> %B to <2 x i64>
+  ret <2 x i64> %C
 }
 
 define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
@@ -114,7 +586,3185 @@ define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp
 ; X32-SSE41-NEXT:    retl
 entry:
   %B = sext <4 x i32> %A to <4 x i64>
-  ret <4 x i64>%B
+  ret <4 x i64> %B
+}
+
+define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) {
+; SSE-LABEL: load_sext_2i1_to_2i64:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movzbl (%rdi), %eax
+; SSE-NEXT:    movq %rax, %rcx
+; SSE-NEXT:    shlq $62, %rcx
+; SSE-NEXT:    sarq $63, %rcx
+; SSE-NEXT:    movd %rcx, %xmm1
+; SSE-NEXT:    shlq $63, %rax
+; SSE-NEXT:    sarq $63, %rax
+; SSE-NEXT:    movd %rax, %xmm0
+; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: load_sext_2i1_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $62, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vmovq %rcx, %xmm0
+; AVX-NEXT:    shlq $63, %rax
+; AVX-NEXT:    sarq $63, %rax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_2i1_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movzbl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $31, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    movd %ecx, %xmm0
+; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT:    shll $30, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $2, %eax, %xmm0
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i1>, <2 x i1>* %ptr
+ %Y = sext <2 x i1> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_2i8_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movzwl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_2i8_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movzwl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_2i8_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_2i8_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_2i8_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i8>, <2 x i8>* %ptr
+ %Y = sext <2 x i8> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_4i1_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movzbl (%rdi), %eax
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $60, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $61, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    shlq $63, %rax
+; SSE2-NEXT:    sarq $63, %rax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i1_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movzbl (%rdi), %eax
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $60, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $61, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    shlq $63, %rax
+; SSSE3-NEXT:    sarq $63, %rax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i1_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $62, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    shlq $63, %rdx
+; SSE41-NEXT:    sarq $63, %rdx
+; SSE41-NEXT:    movd %edx, %xmm0
+; SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $61, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
+; SSE41-NEXT:    shlq $60, %rax
+; SSE41-NEXT:    sarq $63, %rax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_4i1_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    movzbl (%rdi), %eax
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $62, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    movq %rax, %rdx
+; AVX-NEXT:    shlq $63, %rdx
+; AVX-NEXT:    sarq $63, %rdx
+; AVX-NEXT:    vmovd %edx, %xmm0
+; AVX-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $61, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    shlq $60, %rax
+; AVX-NEXT:    sarq $63, %rax
+; AVX-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i1_to_4i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $30, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    shll $31, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm0
+; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $29, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm0
+; X32-SSE41-NEXT:    shll $28, %eax
+; X32-SSE41-NEXT:    sarl $31, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i1>, <4 x i1>* %ptr
+ %Y = sext <4 x i1> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_4i8_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_4i1_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movzbl (%rdi), %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    shrl $2, %eax
+; SSE2-NEXT:    andl $1, %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; SSE2-NEXT:    psllq $63, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; SSE2-NEXT:    psllq $63, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i1_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movzbl (%rdi), %eax
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    shrl $2, %eax
+; SSSE3-NEXT:    andl $1, %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3]
+; SSSE3-NEXT:    psllq $63, %xmm0
+; SSSE3-NEXT:    psrad $31, %xmm0
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3]
+; SSSE3-NEXT:    psllq $63, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i1_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    movl %eax, %edx
+; SSE41-NEXT:    andl $1, %edx
+; SSE41-NEXT:    movd %edx, %xmm1
+; SSE41-NEXT:    pinsrd $1, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $2, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrd $2, %ecx, %xmm1
+; SSE41-NEXT:    shrl $3, %eax
+; SSE41-NEXT:    andl $1, %eax
+; SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT:    psllq $63, %xmm0
+; SSE41-NEXT:    psrad $31, %xmm0
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; SSE41-NEXT:    psllq $63, %xmm1
+; SSE41-NEXT:    psrad $31, %xmm1
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i1_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    movzbl (%rdi), %eax
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $62, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    shlq $63, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vmovd %edx, %xmm0
+; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $61, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $60, %rax
+; AVX1-NEXT:    sarq $63, %rax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i1_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    movzbl (%rdi), %eax
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $60, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vmovq %rcx, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $61, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vmovq %rcx, %xmm1
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $62, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vmovq %rcx, %xmm1
+; AVX2-NEXT:    shlq $63, %rax
+; AVX2-NEXT:    sarq $63, %rax
+; AVX2-NEXT:    vmovq %rax, %xmm2
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i1_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movzbl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    andl $1, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm1
+; X32-SSE41-NEXT:    pinsrd $1, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $2, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrd $2, %ecx, %xmm1
+; X32-SSE41-NEXT:    shrl $3, %eax
+; X32-SSE41-NEXT:    andl $1, %eax
+; X32-SSE41-NEXT:    pinsrd $3, %eax, %xmm1
+; X32-SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; X32-SSE41-NEXT:    psllq $63, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; X32-SSE41-NEXT:    psllq $63, %xmm1
+; X32-SSE41-NEXT:    psrad $31, %xmm1
+; X32-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i1>, <4 x i1>* %ptr
+ %Y = sext <4 x i1> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_4i8_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsbq 1(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    movsbq (%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movsbq 3(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    movsbq 2(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i8_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsbq 1(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    movsbq (%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    movsbq 3(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    movsbq 2(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i8_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxbq 2(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i8_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i8_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbq (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxbq 2(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = sext <4 x i8> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_8i1_to_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movsbq (%rdi), %rax
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shrq $7, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $60, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $58, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $57, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $61, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    shlq $59, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm3
+; SSE2-NEXT:    shlq $63, %rax
+; SSE2-NEXT:    sarq $63, %rax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_8i1_to_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movsbq (%rdi), %rax
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shrq $7, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $60, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $58, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $57, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $61, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    shlq $59, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm3
+; SSSE3-NEXT:    shlq $63, %rax
+; SSSE3-NEXT:    sarq $63, %rax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_8i1_to_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movsbq (%rdi), %rax
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $62, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    shlq $63, %rdx
+; SSE41-NEXT:    sarq $63, %rdx
+; SSE41-NEXT:    movd %edx, %xmm0
+; SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $61, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $60, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $59, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $58, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $57, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
+; SSE41-NEXT:    shrq $7, %rax
+; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_8i1_to_8i16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    movsbq (%rdi), %rax
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $62, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    movq %rax, %rdx
+; AVX-NEXT:    shlq $63, %rdx
+; AVX-NEXT:    sarq $63, %rdx
+; AVX-NEXT:    vmovd %edx, %xmm0
+; AVX-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $61, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $60, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrw $3, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $59, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $58, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $57, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    shrq $7, %rax
+; AVX-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_8i1_to_8i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movsbl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $30, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    shll $31, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm0
+; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $29, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $28, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $27, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $26, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $25, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm0
+; X32-SSE41-NEXT:    shrl $7, %eax
+; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_8i8_to_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_8i8_to_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_8i8_to_8i16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_8i8_to_8i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_8i1_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movzbl (%rdi), %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $6, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $2, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $4, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $5, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    shrl $7, %eax
+; SSE2-NEXT:    movzwl %ax, %eax
+; SSE2-NEXT:    movd %eax, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    pslld $31, %xmm0
+; SSE2-NEXT:    psrad $31, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pslld $31, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_8i1_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movzbl (%rdi), %eax
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $6, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $2, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $4, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $5, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    shrl $7, %eax
+; SSSE3-NEXT:    movzwl %ax, %eax
+; SSSE3-NEXT:    movd %eax, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    pslld $31, %xmm0
+; SSSE3-NEXT:    psrad $31, %xmm0
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    pslld $31, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_8i1_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movzbl (%rdi), %eax
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    movl %eax, %edx
+; SSE41-NEXT:    andl $1, %edx
+; SSE41-NEXT:    movd %edx, %xmm1
+; SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $2, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $3, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $4, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $5, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $6, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
+; SSE41-NEXT:    shrl $7, %eax
+; SSE41-NEXT:    movzwl %ax, %eax
+; SSE41-NEXT:    pinsrw $7, %eax, %xmm1
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT:    pslld $31, %xmm0
+; SSE41-NEXT:    psrad $31, %xmm0
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE41-NEXT:    pslld $31, %xmm1
+; SSE41-NEXT:    psrad $31, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_8i1_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    movsbq (%rdi), %rax
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $58, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    shlq $59, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vmovd %edx, %xmm0
+; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $57, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shrq $7, %rcx
+; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $62, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    shlq $63, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vmovd %edx, %xmm1
+; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $61, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $60, %rax
+; AVX1-NEXT:    sarq $63, %rax
+; AVX1-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_8i1_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    movsbq (%rdi), %rax
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $58, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    shlq $59, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vmovd %edx, %xmm0
+; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $57, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shrq $7, %rcx
+; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $62, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    shlq $63, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vmovd %edx, %xmm1
+; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm1, %xmm1
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $61, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $60, %rax
+; AVX2-NEXT:    sarq $63, %rax
+; AVX2-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_8i1_to_8i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movzbl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    andl $1, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm1
+; X32-SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $2, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $3, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $4, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $5, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $6, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
+; X32-SSE41-NEXT:    shrl $7, %eax
+; X32-SSE41-NEXT:    pinsrw $7, %eax, %xmm1
+; X32-SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; X32-SSE41-NEXT:    pslld $31, %xmm0
+; X32-SSE41-NEXT:    psrad $31, %xmm0
+; X32-SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; X32-SSE41-NEXT:    pslld $31, %xmm1
+; X32-SSE41-NEXT:    psrad $31, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i1>, <8 x i1>* %ptr
+ %Y = sext <8 x i1> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_8i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $24, %xmm0
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $24, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_8i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $24, %xmm0
+; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $24, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_8i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxbd 4(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_8i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxwd %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_8i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_8i8_to_8i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxbd 4(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = sext <8 x i8> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone {
+; SSE2-LABEL: load_sext_16i1_to_16i8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pushq %rbp
+; SSE2-NEXT:    pushq %r15
+; SSE2-NEXT:    pushq %r14
+; SSE2-NEXT:    pushq %r13
+; SSE2-NEXT:    pushq %r12
+; SSE2-NEXT:    pushq %rbx
+; SSE2-NEXT:    movswq (%rdi), %rax
+; SSE2-NEXT:    movq %rax, %r8
+; SSE2-NEXT:    movq %rax, %r9
+; SSE2-NEXT:    movq %rax, %r10
+; SSE2-NEXT:    movq %rax, %r11
+; SSE2-NEXT:    movq %rax, %r14
+; SSE2-NEXT:    movq %rax, %r15
+; SSE2-NEXT:    movq %rax, %r12
+; SSE2-NEXT:    movq %rax, %r13
+; SSE2-NEXT:    movq %rax, %rbx
+; SSE2-NEXT:    movq %rax, %rcx
+; SSE2-NEXT:    movq %rax, %rdx
+; SSE2-NEXT:    movq %rax, %rsi
+; SSE2-NEXT:    movq %rax, %rdi
+; SSE2-NEXT:    movq %rax, %rbp
+; SSE2-NEXT:    shlq $49, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm0
+; SSE2-NEXT:    movq %rax, %rbp
+; SSE2-NEXT:    movsbq %al, %rax
+; SSE2-NEXT:    shlq $57, %r8
+; SSE2-NEXT:    sarq $63, %r8
+; SSE2-NEXT:    movd %r8d, %xmm1
+; SSE2-NEXT:    shlq $53, %r9
+; SSE2-NEXT:    sarq $63, %r9
+; SSE2-NEXT:    movd %r9d, %xmm2
+; SSE2-NEXT:    shlq $61, %r10
+; SSE2-NEXT:    sarq $63, %r10
+; SSE2-NEXT:    movd %r10d, %xmm3
+; SSE2-NEXT:    shlq $51, %r11
+; SSE2-NEXT:    sarq $63, %r11
+; SSE2-NEXT:    movd %r11d, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    shlq $59, %r14
+; SSE2-NEXT:    sarq $63, %r14
+; SSE2-NEXT:    movd %r14d, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    shlq $55, %r15
+; SSE2-NEXT:    sarq $63, %r15
+; SSE2-NEXT:    movd %r15d, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSE2-NEXT:    shlq $63, %r12
+; SSE2-NEXT:    sarq $63, %r12
+; SSE2-NEXT:    movd %r12d, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSE2-NEXT:    shlq $50, %r13
+; SSE2-NEXT:    sarq $63, %r13
+; SSE2-NEXT:    movd %r13d, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    shlq $58, %rbx
+; SSE2-NEXT:    sarq $63, %rbx
+; SSE2-NEXT:    movd %ebx, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT:    shlq $54, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT:    shlq $62, %rdx
+; SSE2-NEXT:    sarq $63, %rdx
+; SSE2-NEXT:    movd %edx, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT:    shlq $52, %rsi
+; SSE2-NEXT:    sarq $63, %rsi
+; SSE2-NEXT:    movd %esi, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT:    shlq $60, %rdi
+; SSE2-NEXT:    sarq $63, %rdi
+; SSE2-NEXT:    movd %edi, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
+; SSE2-NEXT:    shrq $15, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm1
+; SSE2-NEXT:    shrq $7, %rax
+; SSE2-NEXT:    movd %eax, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT:    popq %rbx
+; SSE2-NEXT:    popq %r12
+; SSE2-NEXT:    popq %r13
+; SSE2-NEXT:    popq %r14
+; SSE2-NEXT:    popq %r15
+; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_16i1_to_16i8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pushq %rbp
+; SSSE3-NEXT:    pushq %r15
+; SSSE3-NEXT:    pushq %r14
+; SSSE3-NEXT:    pushq %r13
+; SSSE3-NEXT:    pushq %r12
+; SSSE3-NEXT:    pushq %rbx
+; SSSE3-NEXT:    movswq (%rdi), %rax
+; SSSE3-NEXT:    movq %rax, %r8
+; SSSE3-NEXT:    movq %rax, %r9
+; SSSE3-NEXT:    movq %rax, %r10
+; SSSE3-NEXT:    movq %rax, %r11
+; SSSE3-NEXT:    movq %rax, %r14
+; SSSE3-NEXT:    movq %rax, %r15
+; SSSE3-NEXT:    movq %rax, %r12
+; SSSE3-NEXT:    movq %rax, %r13
+; SSSE3-NEXT:    movq %rax, %rbx
+; SSSE3-NEXT:    movq %rax, %rcx
+; SSSE3-NEXT:    movq %rax, %rdx
+; SSSE3-NEXT:    movq %rax, %rsi
+; SSSE3-NEXT:    movq %rax, %rdi
+; SSSE3-NEXT:    movq %rax, %rbp
+; SSSE3-NEXT:    shlq $49, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm0
+; SSSE3-NEXT:    movq %rax, %rbp
+; SSSE3-NEXT:    movsbq %al, %rax
+; SSSE3-NEXT:    shlq $57, %r8
+; SSSE3-NEXT:    sarq $63, %r8
+; SSSE3-NEXT:    movd %r8d, %xmm1
+; SSSE3-NEXT:    shlq $53, %r9
+; SSSE3-NEXT:    sarq $63, %r9
+; SSSE3-NEXT:    movd %r9d, %xmm2
+; SSSE3-NEXT:    shlq $61, %r10
+; SSSE3-NEXT:    sarq $63, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm3
+; SSSE3-NEXT:    shlq $51, %r11
+; SSSE3-NEXT:    sarq $63, %r11
+; SSSE3-NEXT:    movd %r11d, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    shlq $59, %r14
+; SSSE3-NEXT:    sarq $63, %r14
+; SSSE3-NEXT:    movd %r14d, %xmm5
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    shlq $55, %r15
+; SSSE3-NEXT:    sarq $63, %r15
+; SSSE3-NEXT:    movd %r15d, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7]
+; SSSE3-NEXT:    shlq $63, %r12
+; SSSE3-NEXT:    sarq $63, %r12
+; SSSE3-NEXT:    movd %r12d, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7]
+; SSSE3-NEXT:    shlq $50, %r13
+; SSSE3-NEXT:    sarq $63, %r13
+; SSSE3-NEXT:    movd %r13d, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT:    shlq $58, %rbx
+; SSSE3-NEXT:    sarq $63, %rbx
+; SSSE3-NEXT:    movd %ebx, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT:    shlq $54, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT:    shlq $62, %rdx
+; SSSE3-NEXT:    sarq $63, %rdx
+; SSSE3-NEXT:    movd %edx, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT:    shlq $52, %rsi
+; SSSE3-NEXT:    sarq $63, %rsi
+; SSSE3-NEXT:    movd %esi, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT:    shlq $60, %rdi
+; SSSE3-NEXT:    sarq $63, %rdi
+; SSSE3-NEXT:    movd %edi, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7]
+; SSSE3-NEXT:    shrq $15, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm1
+; SSSE3-NEXT:    shrq $7, %rax
+; SSSE3-NEXT:    movd %eax, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT:    popq %rbx
+; SSSE3-NEXT:    popq %r12
+; SSSE3-NEXT:    popq %r13
+; SSSE3-NEXT:    popq %r14
+; SSSE3-NEXT:    popq %r15
+; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_16i1_to_16i8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movswq (%rdi), %rax
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $62, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    shlq $63, %rdx
+; SSE41-NEXT:    sarq $63, %rdx
+; SSE41-NEXT:    movd %edx, %xmm0
+; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $61, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $60, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $59, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $58, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $57, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
+; SSE41-NEXT:    movsbq %al, %rcx
+; SSE41-NEXT:    shrq $7, %rcx
+; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $55, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $54, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $53, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $52, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $51, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $50, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $49, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
+; SSE41-NEXT:    shrq $15, %rax
+; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_16i1_to_16i8:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    movswq (%rdi), %rax
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $62, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    movq %rax, %rdx
+; AVX-NEXT:    shlq $63, %rdx
+; AVX-NEXT:    sarq $63, %rdx
+; AVX-NEXT:    vmovd %edx, %xmm0
+; AVX-NEXT:    vpinsrb $1, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $61, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $2, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $60, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $3, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $59, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $58, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $5, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $57, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $6, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movsbq %al, %rcx
+; AVX-NEXT:    shrq $7, %rcx
+; AVX-NEXT:    vpinsrb $7, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $55, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $8, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $54, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $9, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $53, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $10, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $52, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $11, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $51, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $12, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $50, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $13, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    movq %rax, %rcx
+; AVX-NEXT:    shlq $49, %rcx
+; AVX-NEXT:    sarq $63, %rcx
+; AVX-NEXT:    vpinsrb $14, %ecx, %xmm0, %xmm0
+; AVX-NEXT:    shrq $15, %rax
+; AVX-NEXT:    vpinsrb $15, %eax, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_16i1_to_16i8:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movswl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $30, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    shll $31, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm0
+; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $29, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $28, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $27, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $26, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $25, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
+; X32-SSE41-NEXT:    movsbl %al, %ecx
+; X32-SSE41-NEXT:    shrl $7, %ecx
+; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $23, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $22, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $21, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $20, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $19, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $18, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $17, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
+; X32-SSE41-NEXT:    shrl $15, %eax
+; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <16 x i1>, <16 x i1>* %ptr
+ %Y = sext <16 x i1> %X to <16 x i8>
+ ret <16 x i8> %Y
+}
+
+define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) {
+; SSE2-LABEL: load_sext_16i1_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movzwl (%rdi), %eax
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $14, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $6, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $10, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $2, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $12, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $4, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm1
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $8, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $13, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $5, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $9, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm3
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $11, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $3, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    movl %eax, %ecx
+; SSE2-NEXT:    shrl $7, %ecx
+; SSE2-NEXT:    andl $1, %ecx
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    shrl $15, %eax
+; SSE2-NEXT:    movzwl %ax, %eax
+; SSE2-NEXT:    movd %eax, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psllw $15, %xmm0
+; SSE2-NEXT:    psraw $15, %xmm0
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSE2-NEXT:    psllw $15, %xmm1
+; SSE2-NEXT:    psraw $15, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_16i1_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movzwl (%rdi), %eax
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $14, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $6, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $10, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $2, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $12, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $4, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm1
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $8, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $13, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $5, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $9, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm3
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $11, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $3, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    movl %eax, %ecx
+; SSSE3-NEXT:    shrl $7, %ecx
+; SSSE3-NEXT:    andl $1, %ecx
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    shrl $15, %eax
+; SSSE3-NEXT:    movzwl %ax, %eax
+; SSSE3-NEXT:    movd %eax, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psllw $15, %xmm0
+; SSSE3-NEXT:    psraw $15, %xmm0
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; SSSE3-NEXT:    psllw $15, %xmm1
+; SSSE3-NEXT:    psraw $15, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_16i1_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movzwl (%rdi), %eax
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    movl %eax, %edx
+; SSE41-NEXT:    andl $1, %edx
+; SSE41-NEXT:    movd %edx, %xmm1
+; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $2, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $3, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $4, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $5, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $6, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $7, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $8, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $9, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $10, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $11, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $12, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $13, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
+; SSE41-NEXT:    movl %eax, %ecx
+; SSE41-NEXT:    shrl $14, %ecx
+; SSE41-NEXT:    andl $1, %ecx
+; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
+; SSE41-NEXT:    shrl $15, %eax
+; SSE41-NEXT:    movzwl %ax, %eax
+; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT:    psllw $15, %xmm0
+; SSE41-NEXT:    psraw $15, %xmm0
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; SSE41-NEXT:    psllw $15, %xmm1
+; SSE41-NEXT:    psraw $15, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_16i1_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    pushq %rbp
+; AVX1-NEXT:  .Ltmp0:
+; AVX1-NEXT:    .cfi_def_cfa_offset 16
+; AVX1-NEXT:    pushq %r15
+; AVX1-NEXT:  .Ltmp1:
+; AVX1-NEXT:    .cfi_def_cfa_offset 24
+; AVX1-NEXT:    pushq %r14
+; AVX1-NEXT:  .Ltmp2:
+; AVX1-NEXT:    .cfi_def_cfa_offset 32
+; AVX1-NEXT:    pushq %r13
+; AVX1-NEXT:  .Ltmp3:
+; AVX1-NEXT:    .cfi_def_cfa_offset 40
+; AVX1-NEXT:    pushq %r12
+; AVX1-NEXT:  .Ltmp4:
+; AVX1-NEXT:    .cfi_def_cfa_offset 48
+; AVX1-NEXT:    pushq %rbx
+; AVX1-NEXT:  .Ltmp5:
+; AVX1-NEXT:    .cfi_def_cfa_offset 56
+; AVX1-NEXT:  .Ltmp6:
+; AVX1-NEXT:    .cfi_offset %rbx, -56
+; AVX1-NEXT:  .Ltmp7:
+; AVX1-NEXT:    .cfi_offset %r12, -48
+; AVX1-NEXT:  .Ltmp8:
+; AVX1-NEXT:    .cfi_offset %r13, -40
+; AVX1-NEXT:  .Ltmp9:
+; AVX1-NEXT:    .cfi_offset %r14, -32
+; AVX1-NEXT:  .Ltmp10:
+; AVX1-NEXT:    .cfi_offset %r15, -24
+; AVX1-NEXT:  .Ltmp11:
+; AVX1-NEXT:    .cfi_offset %rbp, -16
+; AVX1-NEXT:    movswq (%rdi), %rax
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $55, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vmovd %ecx, %xmm0
+; AVX1-NEXT:    movq %rax, %r8
+; AVX1-NEXT:    movq %rax, %r10
+; AVX1-NEXT:    movq %rax, %r11
+; AVX1-NEXT:    movq %rax, %r14
+; AVX1-NEXT:    movq %rax, %r15
+; AVX1-NEXT:    movq %rax, %r9
+; AVX1-NEXT:    movq %rax, %r12
+; AVX1-NEXT:    movq %rax, %r13
+; AVX1-NEXT:    movq %rax, %rbx
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    movq %rax, %rsi
+; AVX1-NEXT:    movsbq %al, %rbp
+; AVX1-NEXT:    shlq $54, %rax
+; AVX1-NEXT:    sarq $63, %rax
+; AVX1-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $53, %r8
+; AVX1-NEXT:    sarq $63, %r8
+; AVX1-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $52, %r10
+; AVX1-NEXT:    sarq $63, %r10
+; AVX1-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $51, %r11
+; AVX1-NEXT:    sarq $63, %r11
+; AVX1-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $50, %r14
+; AVX1-NEXT:    sarq $63, %r14
+; AVX1-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $49, %r15
+; AVX1-NEXT:    sarq $63, %r15
+; AVX1-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
+; AVX1-NEXT:    shrq $15, %r9
+; AVX1-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
+; AVX1-NEXT:    shlq $63, %r13
+; AVX1-NEXT:    sarq $63, %r13
+; AVX1-NEXT:    vmovd %r13d, %xmm1
+; AVX1-NEXT:    shlq $62, %r12
+; AVX1-NEXT:    sarq $63, %r12
+; AVX1-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $61, %rbx
+; AVX1-NEXT:    sarq $63, %rbx
+; AVX1-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $60, %rdi
+; AVX1-NEXT:    sarq $63, %rdi
+; AVX1-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $59, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $58, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $57, %rsi
+; AVX1-NEXT:    sarq $63, %rsi
+; AVX1-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
+; AVX1-NEXT:    shrq $7, %rbp
+; AVX1-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    popq %rbx
+; AVX1-NEXT:    popq %r12
+; AVX1-NEXT:    popq %r13
+; AVX1-NEXT:    popq %r14
+; AVX1-NEXT:    popq %r15
+; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_16i1_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:  .Ltmp0:
+; AVX2-NEXT:    .cfi_def_cfa_offset 16
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:  .Ltmp1:
+; AVX2-NEXT:    .cfi_def_cfa_offset 24
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:  .Ltmp2:
+; AVX2-NEXT:    .cfi_def_cfa_offset 32
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:  .Ltmp3:
+; AVX2-NEXT:    .cfi_def_cfa_offset 40
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:  .Ltmp4:
+; AVX2-NEXT:    .cfi_def_cfa_offset 48
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:  .Ltmp5:
+; AVX2-NEXT:    .cfi_def_cfa_offset 56
+; AVX2-NEXT:  .Ltmp6:
+; AVX2-NEXT:    .cfi_offset %rbx, -56
+; AVX2-NEXT:  .Ltmp7:
+; AVX2-NEXT:    .cfi_offset %r12, -48
+; AVX2-NEXT:  .Ltmp8:
+; AVX2-NEXT:    .cfi_offset %r13, -40
+; AVX2-NEXT:  .Ltmp9:
+; AVX2-NEXT:    .cfi_offset %r14, -32
+; AVX2-NEXT:  .Ltmp10:
+; AVX2-NEXT:    .cfi_offset %r15, -24
+; AVX2-NEXT:  .Ltmp11:
+; AVX2-NEXT:    .cfi_offset %rbp, -16
+; AVX2-NEXT:    movswq (%rdi), %rax
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $55, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vmovd %ecx, %xmm0
+; AVX2-NEXT:    movq %rax, %r8
+; AVX2-NEXT:    movq %rax, %r10
+; AVX2-NEXT:    movq %rax, %r11
+; AVX2-NEXT:    movq %rax, %r14
+; AVX2-NEXT:    movq %rax, %r15
+; AVX2-NEXT:    movq %rax, %r9
+; AVX2-NEXT:    movq %rax, %r12
+; AVX2-NEXT:    movq %rax, %r13
+; AVX2-NEXT:    movq %rax, %rbx
+; AVX2-NEXT:    movq %rax, %rdi
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    movq %rax, %rsi
+; AVX2-NEXT:    movsbq %al, %rbp
+; AVX2-NEXT:    shlq $54, %rax
+; AVX2-NEXT:    sarq $63, %rax
+; AVX2-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $53, %r8
+; AVX2-NEXT:    sarq $63, %r8
+; AVX2-NEXT:    vpinsrw $2, %r8d, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $52, %r10
+; AVX2-NEXT:    sarq $63, %r10
+; AVX2-NEXT:    vpinsrw $3, %r10d, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $51, %r11
+; AVX2-NEXT:    sarq $63, %r11
+; AVX2-NEXT:    vpinsrw $4, %r11d, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $50, %r14
+; AVX2-NEXT:    sarq $63, %r14
+; AVX2-NEXT:    vpinsrw $5, %r14d, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $49, %r15
+; AVX2-NEXT:    sarq $63, %r15
+; AVX2-NEXT:    vpinsrw $6, %r15d, %xmm0, %xmm0
+; AVX2-NEXT:    shrq $15, %r9
+; AVX2-NEXT:    vpinsrw $7, %r9d, %xmm0, %xmm0
+; AVX2-NEXT:    shlq $63, %r13
+; AVX2-NEXT:    sarq $63, %r13
+; AVX2-NEXT:    vmovd %r13d, %xmm1
+; AVX2-NEXT:    shlq $62, %r12
+; AVX2-NEXT:    sarq $63, %r12
+; AVX2-NEXT:    vpinsrw $1, %r12d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $61, %rbx
+; AVX2-NEXT:    sarq $63, %rbx
+; AVX2-NEXT:    vpinsrw $2, %ebx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $60, %rdi
+; AVX2-NEXT:    sarq $63, %rdi
+; AVX2-NEXT:    vpinsrw $3, %edi, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $59, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $58, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vpinsrw $5, %edx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $57, %rsi
+; AVX2-NEXT:    sarq $63, %rsi
+; AVX2-NEXT:    vpinsrw $6, %esi, %xmm1, %xmm1
+; AVX2-NEXT:    shrq $7, %rbp
+; AVX2-NEXT:    vpinsrw $7, %ebp, %xmm1, %xmm1
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_16i1_to_16i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movzwl (%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    andl $1, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm1
+; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $2, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $3, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $4, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $5, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $6, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $7, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $8, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $9, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $10, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $11, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $12, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $13, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shrl $14, %ecx
+; X32-SSE41-NEXT:    andl $1, %ecx
+; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
+; X32-SSE41-NEXT:    shrl $15, %eax
+; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
+; X32-SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; X32-SSE41-NEXT:    psllw $15, %xmm0
+; X32-SSE41-NEXT:    psraw $15, %xmm0
+; X32-SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X32-SSE41-NEXT:    psllw $15, %xmm1
+; X32-SSE41-NEXT:    psraw $15, %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <16 x i1>, <16 x i1>* %ptr
+ %Y = sext <16 x i1> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone {
+; SSE2-LABEL: load_sext_32i1_to_32i8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pushq %rbp
+; SSE2-NEXT:    pushq %r15
+; SSE2-NEXT:    pushq %r14
+; SSE2-NEXT:    pushq %r13
+; SSE2-NEXT:    pushq %r12
+; SSE2-NEXT:    pushq %rbx
+; SSE2-NEXT:    movswq (%rdi), %rbx
+; SSE2-NEXT:    movq %rbx, %r10
+; SSE2-NEXT:    movq %rbx, %r8
+; SSE2-NEXT:    movq %rbx, %r9
+; SSE2-NEXT:    movq %rbx, %r11
+; SSE2-NEXT:    movq %rbx, %r14
+; SSE2-NEXT:    movq %rbx, %r15
+; SSE2-NEXT:    movq %rbx, %r12
+; SSE2-NEXT:    movq %rbx, %r13
+; SSE2-NEXT:    movq %rbx, %rdx
+; SSE2-NEXT:    movq %rbx, %rsi
+; SSE2-NEXT:    movq %rbx, %rcx
+; SSE2-NEXT:    movq %rbx, %rbp
+; SSE2-NEXT:    movq %rbx, %rax
+; SSE2-NEXT:    shlq $49, %rax
+; SSE2-NEXT:    sarq $63, %rax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    movq %rbx, %rax
+; SSE2-NEXT:    shlq $57, %r10
+; SSE2-NEXT:    sarq $63, %r10
+; SSE2-NEXT:    movd %r10d, %xmm15
+; SSE2-NEXT:    movq %rbx, %r10
+; SSE2-NEXT:    movsbq %bl, %rbx
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
+; SSE2-NEXT:    shlq $53, %r8
+; SSE2-NEXT:    sarq $63, %r8
+; SSE2-NEXT:    movd %r8d, %xmm8
+; SSE2-NEXT:    shlq $61, %r9
+; SSE2-NEXT:    sarq $63, %r9
+; SSE2-NEXT:    movd %r9d, %xmm2
+; SSE2-NEXT:    shlq $51, %r11
+; SSE2-NEXT:    sarq $63, %r11
+; SSE2-NEXT:    movd %r11d, %xmm9
+; SSE2-NEXT:    shlq $59, %r14
+; SSE2-NEXT:    sarq $63, %r14
+; SSE2-NEXT:    movd %r14d, %xmm5
+; SSE2-NEXT:    shlq $55, %r15
+; SSE2-NEXT:    sarq $63, %r15
+; SSE2-NEXT:    movd %r15d, %xmm10
+; SSE2-NEXT:    shlq $63, %r12
+; SSE2-NEXT:    sarq $63, %r12
+; SSE2-NEXT:    movd %r12d, %xmm0
+; SSE2-NEXT:    shlq $50, %r13
+; SSE2-NEXT:    sarq $63, %r13
+; SSE2-NEXT:    movd %r13d, %xmm11
+; SSE2-NEXT:    shlq $58, %rdx
+; SSE2-NEXT:    sarq $63, %rdx
+; SSE2-NEXT:    movd %edx, %xmm4
+; SSE2-NEXT:    shlq $54, %rsi
+; SSE2-NEXT:    sarq $63, %rsi
+; SSE2-NEXT:    movd %esi, %xmm12
+; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm6
+; SSE2-NEXT:    shlq $52, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm13
+; SSE2-NEXT:    shlq $60, %rax
+; SSE2-NEXT:    sarq $63, %rax
+; SSE2-NEXT:    movd %eax, %xmm7
+; SSE2-NEXT:    shrq $15, %r10
+; SSE2-NEXT:    movd %r10d, %xmm14
+; SSE2-NEXT:    shrq $7, %rbx
+; SSE2-NEXT:    movd %ebx, %xmm3
+; SSE2-NEXT:    movswq 2(%rdi), %rdx
+; SSE2-NEXT:    movq %rdx, %r8
+; SSE2-NEXT:    movq %rdx, %r9
+; SSE2-NEXT:    movq %rdx, %r10
+; SSE2-NEXT:    movq %rdx, %r11
+; SSE2-NEXT:    movq %rdx, %r14
+; SSE2-NEXT:    movq %rdx, %r15
+; SSE2-NEXT:    movq %rdx, %r12
+; SSE2-NEXT:    movq %rdx, %r13
+; SSE2-NEXT:    movq %rdx, %rbx
+; SSE2-NEXT:    movq %rdx, %rax
+; SSE2-NEXT:    movq %rdx, %rcx
+; SSE2-NEXT:    movq %rdx, %rsi
+; SSE2-NEXT:    movq %rdx, %rdi
+; SSE2-NEXT:    movq %rdx, %rbp
+; SSE2-NEXT:    shlq $49, %rbp
+; SSE2-NEXT:    sarq $63, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm1
+; SSE2-NEXT:    movq %rdx, %rbp
+; SSE2-NEXT:    movsbq %dl, %rdx
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
+; SSE2-NEXT:    shlq $57, %r8
+; SSE2-NEXT:    sarq $63, %r8
+; SSE2-NEXT:    movd %r8d, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
+; SSE2-NEXT:    shlq $53, %r9
+; SSE2-NEXT:    sarq $63, %r9
+; SSE2-NEXT:    movd %r9d, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
+; SSE2-NEXT:    shlq $61, %r10
+; SSE2-NEXT:    sarq $63, %r10
+; SSE2-NEXT:    movd %r10d, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSE2-NEXT:    shlq $51, %r11
+; SSE2-NEXT:    sarq $63, %r11
+; SSE2-NEXT:    movd %r11d, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSE2-NEXT:    shlq $59, %r14
+; SSE2-NEXT:    sarq $63, %r14
+; SSE2-NEXT:    movd %r14d, %xmm6
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT:    shlq $55, %r15
+; SSE2-NEXT:    sarq $63, %r15
+; SSE2-NEXT:    movd %r15d, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSE2-NEXT:    shlq $63, %r12
+; SSE2-NEXT:    sarq $63, %r12
+; SSE2-NEXT:    movd %r12d, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSE2-NEXT:    shlq $50, %r13
+; SSE2-NEXT:    sarq $63, %r13
+; SSE2-NEXT:    movd %r13d, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSE2-NEXT:    shlq $58, %rbx
+; SSE2-NEXT:    sarq $63, %rbx
+; SSE2-NEXT:    movd %ebx, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSE2-NEXT:    shlq $54, %rax
+; SSE2-NEXT:    sarq $63, %rax
+; SSE2-NEXT:    movd %eax, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT:    shlq $62, %rcx
+; SSE2-NEXT:    sarq $63, %rcx
+; SSE2-NEXT:    movd %ecx, %xmm4
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    shlq $52, %rsi
+; SSE2-NEXT:    sarq $63, %rsi
+; SSE2-NEXT:    movd %esi, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT:    shlq $60, %rdi
+; SSE2-NEXT:    sarq $63, %rdi
+; SSE2-NEXT:    movd %edi, %xmm3
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSE2-NEXT:    shrq $15, %rbp
+; SSE2-NEXT:    movd %ebp, %xmm2
+; SSE2-NEXT:    shrq $7, %rdx
+; SSE2-NEXT:    movd %edx, %xmm5
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSE2-NEXT:    popq %rbx
+; SSE2-NEXT:    popq %r12
+; SSE2-NEXT:    popq %r13
+; SSE2-NEXT:    popq %r14
+; SSE2-NEXT:    popq %r15
+; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_32i1_to_32i8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pushq %rbp
+; SSSE3-NEXT:    pushq %r15
+; SSSE3-NEXT:    pushq %r14
+; SSSE3-NEXT:    pushq %r13
+; SSSE3-NEXT:    pushq %r12
+; SSSE3-NEXT:    pushq %rbx
+; SSSE3-NEXT:    movswq (%rdi), %rbx
+; SSSE3-NEXT:    movq %rbx, %r10
+; SSSE3-NEXT:    movq %rbx, %r8
+; SSSE3-NEXT:    movq %rbx, %r9
+; SSSE3-NEXT:    movq %rbx, %r11
+; SSSE3-NEXT:    movq %rbx, %r14
+; SSSE3-NEXT:    movq %rbx, %r15
+; SSSE3-NEXT:    movq %rbx, %r12
+; SSSE3-NEXT:    movq %rbx, %r13
+; SSSE3-NEXT:    movq %rbx, %rdx
+; SSSE3-NEXT:    movq %rbx, %rsi
+; SSSE3-NEXT:    movq %rbx, %rcx
+; SSSE3-NEXT:    movq %rbx, %rbp
+; SSSE3-NEXT:    movq %rbx, %rax
+; SSSE3-NEXT:    shlq $49, %rax
+; SSSE3-NEXT:    sarq $63, %rax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    movq %rbx, %rax
+; SSSE3-NEXT:    shlq $57, %r10
+; SSSE3-NEXT:    sarq $63, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm15
+; SSSE3-NEXT:    movq %rbx, %r10
+; SSSE3-NEXT:    movsbq %bl, %rbx
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7]
+; SSSE3-NEXT:    shlq $53, %r8
+; SSSE3-NEXT:    sarq $63, %r8
+; SSSE3-NEXT:    movd %r8d, %xmm8
+; SSSE3-NEXT:    shlq $61, %r9
+; SSSE3-NEXT:    sarq $63, %r9
+; SSSE3-NEXT:    movd %r9d, %xmm2
+; SSSE3-NEXT:    shlq $51, %r11
+; SSSE3-NEXT:    sarq $63, %r11
+; SSSE3-NEXT:    movd %r11d, %xmm9
+; SSSE3-NEXT:    shlq $59, %r14
+; SSSE3-NEXT:    sarq $63, %r14
+; SSSE3-NEXT:    movd %r14d, %xmm5
+; SSSE3-NEXT:    shlq $55, %r15
+; SSSE3-NEXT:    sarq $63, %r15
+; SSSE3-NEXT:    movd %r15d, %xmm10
+; SSSE3-NEXT:    shlq $63, %r12
+; SSSE3-NEXT:    sarq $63, %r12
+; SSSE3-NEXT:    movd %r12d, %xmm0
+; SSSE3-NEXT:    shlq $50, %r13
+; SSSE3-NEXT:    sarq $63, %r13
+; SSSE3-NEXT:    movd %r13d, %xmm11
+; SSSE3-NEXT:    shlq $58, %rdx
+; SSSE3-NEXT:    sarq $63, %rdx
+; SSSE3-NEXT:    movd %edx, %xmm4
+; SSSE3-NEXT:    shlq $54, %rsi
+; SSSE3-NEXT:    sarq $63, %rsi
+; SSSE3-NEXT:    movd %esi, %xmm12
+; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm6
+; SSSE3-NEXT:    shlq $52, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm13
+; SSSE3-NEXT:    shlq $60, %rax
+; SSSE3-NEXT:    sarq $63, %rax
+; SSSE3-NEXT:    movd %eax, %xmm7
+; SSSE3-NEXT:    shrq $15, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm14
+; SSSE3-NEXT:    shrq $7, %rbx
+; SSSE3-NEXT:    movd %ebx, %xmm3
+; SSSE3-NEXT:    movswq 2(%rdi), %rdx
+; SSSE3-NEXT:    movq %rdx, %r8
+; SSSE3-NEXT:    movq %rdx, %r9
+; SSSE3-NEXT:    movq %rdx, %r10
+; SSSE3-NEXT:    movq %rdx, %r11
+; SSSE3-NEXT:    movq %rdx, %r14
+; SSSE3-NEXT:    movq %rdx, %r15
+; SSSE3-NEXT:    movq %rdx, %r12
+; SSSE3-NEXT:    movq %rdx, %r13
+; SSSE3-NEXT:    movq %rdx, %rbx
+; SSSE3-NEXT:    movq %rdx, %rax
+; SSSE3-NEXT:    movq %rdx, %rcx
+; SSSE3-NEXT:    movq %rdx, %rsi
+; SSSE3-NEXT:    movq %rdx, %rdi
+; SSSE3-NEXT:    movq %rdx, %rbp
+; SSSE3-NEXT:    shlq $49, %rbp
+; SSSE3-NEXT:    sarq $63, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm1
+; SSSE3-NEXT:    movq %rdx, %rbp
+; SSSE3-NEXT:    movsbq %dl, %rdx
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7]
+; SSSE3-NEXT:    shlq $57, %r8
+; SSSE3-NEXT:    sarq $63, %r8
+; SSSE3-NEXT:    movd %r8d, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7]
+; SSSE3-NEXT:    shlq $53, %r9
+; SSSE3-NEXT:    sarq $63, %r9
+; SSSE3-NEXT:    movd %r9d, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7]
+; SSSE3-NEXT:    shlq $61, %r10
+; SSSE3-NEXT:    sarq $63, %r10
+; SSSE3-NEXT:    movd %r10d, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7]
+; SSSE3-NEXT:    shlq $51, %r11
+; SSSE3-NEXT:    sarq $63, %r11
+; SSSE3-NEXT:    movd %r11d, %xmm5
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
+; SSSE3-NEXT:    shlq $59, %r14
+; SSSE3-NEXT:    sarq $63, %r14
+; SSSE3-NEXT:    movd %r14d, %xmm6
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT:    shlq $55, %r15
+; SSSE3-NEXT:    sarq $63, %r15
+; SSSE3-NEXT:    movd %r15d, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7]
+; SSSE3-NEXT:    shlq $63, %r12
+; SSSE3-NEXT:    sarq $63, %r12
+; SSSE3-NEXT:    movd %r12d, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7]
+; SSSE3-NEXT:    shlq $50, %r13
+; SSSE3-NEXT:    sarq $63, %r13
+; SSSE3-NEXT:    movd %r13d, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
+; SSSE3-NEXT:    shlq $58, %rbx
+; SSSE3-NEXT:    sarq $63, %rbx
+; SSSE3-NEXT:    movd %ebx, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7]
+; SSSE3-NEXT:    shlq $54, %rax
+; SSSE3-NEXT:    sarq $63, %rax
+; SSSE3-NEXT:    movd %eax, %xmm5
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT:    shlq $62, %rcx
+; SSSE3-NEXT:    sarq $63, %rcx
+; SSSE3-NEXT:    movd %ecx, %xmm4
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    shlq $52, %rsi
+; SSSE3-NEXT:    sarq $63, %rsi
+; SSSE3-NEXT:    movd %esi, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT:    shlq $60, %rdi
+; SSSE3-NEXT:    sarq $63, %rdi
+; SSSE3-NEXT:    movd %edi, %xmm3
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; SSSE3-NEXT:    shrq $15, %rbp
+; SSSE3-NEXT:    movd %ebp, %xmm2
+; SSSE3-NEXT:    shrq $7, %rdx
+; SSSE3-NEXT:    movd %edx, %xmm5
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7]
+; SSSE3-NEXT:    popq %rbx
+; SSSE3-NEXT:    popq %r12
+; SSSE3-NEXT:    popq %r13
+; SSSE3-NEXT:    popq %r14
+; SSSE3-NEXT:    popq %r15
+; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_32i1_to_32i8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movswq (%rdi), %rax
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $62, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    shlq $63, %rdx
+; SSE41-NEXT:    sarq $63, %rdx
+; SSE41-NEXT:    movd %edx, %xmm0
+; SSE41-NEXT:    pinsrb $1, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $61, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $2, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $60, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $3, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $59, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $4, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $58, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $5, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $57, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $6, %ecx, %xmm0
+; SSE41-NEXT:    movsbq %al, %rcx
+; SSE41-NEXT:    shrq $7, %rcx
+; SSE41-NEXT:    pinsrb $7, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $55, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $8, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $54, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $9, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $53, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $10, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $52, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $11, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $51, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $12, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $50, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $13, %ecx, %xmm0
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $49, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $14, %ecx, %xmm0
+; SSE41-NEXT:    shrq $15, %rax
+; SSE41-NEXT:    pinsrb $15, %eax, %xmm0
+; SSE41-NEXT:    movswq 2(%rdi), %rax
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $62, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    movq %rax, %rdx
+; SSE41-NEXT:    shlq $63, %rdx
+; SSE41-NEXT:    sarq $63, %rdx
+; SSE41-NEXT:    movd %edx, %xmm1
+; SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $61, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $60, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $59, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $58, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $57, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
+; SSE41-NEXT:    movsbq %al, %rcx
+; SSE41-NEXT:    shrq $7, %rcx
+; SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $55, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $54, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $53, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $52, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $51, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $50, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
+; SSE41-NEXT:    movq %rax, %rcx
+; SSE41-NEXT:    shlq $49, %rcx
+; SSE41-NEXT:    sarq $63, %rcx
+; SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
+; SSE41-NEXT:    shrq $15, %rax
+; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_32i1_to_32i8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    pushq %rbp
+; AVX1-NEXT:    pushq %r15
+; AVX1-NEXT:    pushq %r14
+; AVX1-NEXT:    pushq %r13
+; AVX1-NEXT:    pushq %r12
+; AVX1-NEXT:    pushq %rbx
+; AVX1-NEXT:    movslq (%rdi), %rax
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $47, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vmovd %ecx, %xmm0
+; AVX1-NEXT:    movq %rax, %r8
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    movq %rax, %r13
+; AVX1-NEXT:    movq %rax, %rsi
+; AVX1-NEXT:    movq %rax, %r10
+; AVX1-NEXT:    movq %rax, %r11
+; AVX1-NEXT:    movq %rax, %r9
+; AVX1-NEXT:    movq %rax, %rbx
+; AVX1-NEXT:    movq %rax, %r14
+; AVX1-NEXT:    movq %rax, %r15
+; AVX1-NEXT:    movq %rax, %r12
+; AVX1-NEXT:    movq %rax, %rbp
+; AVX1-NEXT:    shlq $46, %rbp
+; AVX1-NEXT:    sarq $63, %rbp
+; AVX1-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rbp
+; AVX1-NEXT:    shlq $45, %r8
+; AVX1-NEXT:    sarq $63, %r8
+; AVX1-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r8
+; AVX1-NEXT:    shlq $44, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    shlq $43, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rcx
+; AVX1-NEXT:    shlq $42, %rdi
+; AVX1-NEXT:    sarq $63, %rdi
+; AVX1-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rdi
+; AVX1-NEXT:    shlq $41, %r13
+; AVX1-NEXT:    sarq $63, %r13
+; AVX1-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r13
+; AVX1-NEXT:    shlq $40, %rsi
+; AVX1-NEXT:    sarq $63, %rsi
+; AVX1-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rsi
+; AVX1-NEXT:    shlq $39, %r10
+; AVX1-NEXT:    sarq $63, %r10
+; AVX1-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r10
+; AVX1-NEXT:    shlq $38, %r11
+; AVX1-NEXT:    sarq $63, %r11
+; AVX1-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
+; AVX1-NEXT:    movsbq %al, %r11
+; AVX1-NEXT:    shlq $37, %r9
+; AVX1-NEXT:    sarq $63, %r9
+; AVX1-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r9
+; AVX1-NEXT:    shlq $36, %rbx
+; AVX1-NEXT:    sarq $63, %rbx
+; AVX1-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rbx
+; AVX1-NEXT:    shlq $35, %r14
+; AVX1-NEXT:    sarq $63, %r14
+; AVX1-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r14
+; AVX1-NEXT:    shlq $34, %r15
+; AVX1-NEXT:    sarq $63, %r15
+; AVX1-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r15
+; AVX1-NEXT:    shlq $33, %r12
+; AVX1-NEXT:    sarq $63, %r12
+; AVX1-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %r12
+; AVX1-NEXT:    shrq $31, %rbp
+; AVX1-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
+; AVX1-NEXT:    movq %rax, %rbp
+; AVX1-NEXT:    shlq $63, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vmovd %edx, %xmm1
+; AVX1-NEXT:    movq %rax, %rdx
+; AVX1-NEXT:    movswq %ax, %rax
+; AVX1-NEXT:    shlq $62, %r8
+; AVX1-NEXT:    sarq $63, %r8
+; AVX1-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $61, %rcx
+; AVX1-NEXT:    sarq $63, %rcx
+; AVX1-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $60, %rdi
+; AVX1-NEXT:    sarq $63, %rdi
+; AVX1-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $59, %r13
+; AVX1-NEXT:    sarq $63, %r13
+; AVX1-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $58, %rsi
+; AVX1-NEXT:    sarq $63, %rsi
+; AVX1-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $57, %r10
+; AVX1-NEXT:    sarq $63, %r10
+; AVX1-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
+; AVX1-NEXT:    shrq $7, %r11
+; AVX1-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $55, %r9
+; AVX1-NEXT:    sarq $63, %r9
+; AVX1-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $54, %rbx
+; AVX1-NEXT:    sarq $63, %rbx
+; AVX1-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $53, %r14
+; AVX1-NEXT:    sarq $63, %r14
+; AVX1-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $52, %r15
+; AVX1-NEXT:    sarq $63, %r15
+; AVX1-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $51, %r12
+; AVX1-NEXT:    sarq $63, %r12
+; AVX1-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $50, %rbp
+; AVX1-NEXT:    sarq $63, %rbp
+; AVX1-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX1-NEXT:    shlq $49, %rdx
+; AVX1-NEXT:    sarq $63, %rdx
+; AVX1-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
+; AVX1-NEXT:    shrq $15, %rax
+; AVX1-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    popq %rbx
+; AVX1-NEXT:    popq %r12
+; AVX1-NEXT:    popq %r13
+; AVX1-NEXT:    popq %r14
+; AVX1-NEXT:    popq %r15
+; AVX1-NEXT:    popq %rbp
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_32i1_to_32i8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    pushq %rbp
+; AVX2-NEXT:    pushq %r15
+; AVX2-NEXT:    pushq %r14
+; AVX2-NEXT:    pushq %r13
+; AVX2-NEXT:    pushq %r12
+; AVX2-NEXT:    pushq %rbx
+; AVX2-NEXT:    movslq (%rdi), %rax
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $47, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vmovd %ecx, %xmm0
+; AVX2-NEXT:    movq %rax, %r8
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    movq %rax, %rdi
+; AVX2-NEXT:    movq %rax, %r13
+; AVX2-NEXT:    movq %rax, %rsi
+; AVX2-NEXT:    movq %rax, %r10
+; AVX2-NEXT:    movq %rax, %r11
+; AVX2-NEXT:    movq %rax, %r9
+; AVX2-NEXT:    movq %rax, %rbx
+; AVX2-NEXT:    movq %rax, %r14
+; AVX2-NEXT:    movq %rax, %r15
+; AVX2-NEXT:    movq %rax, %r12
+; AVX2-NEXT:    movq %rax, %rbp
+; AVX2-NEXT:    shlq $46, %rbp
+; AVX2-NEXT:    sarq $63, %rbp
+; AVX2-NEXT:    vpinsrb $1, %ebp, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rbp
+; AVX2-NEXT:    shlq $45, %r8
+; AVX2-NEXT:    sarq $63, %r8
+; AVX2-NEXT:    vpinsrb $2, %r8d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r8
+; AVX2-NEXT:    shlq $44, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vpinsrb $3, %edx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    shlq $43, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vpinsrb $4, %ecx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rcx
+; AVX2-NEXT:    shlq $42, %rdi
+; AVX2-NEXT:    sarq $63, %rdi
+; AVX2-NEXT:    vpinsrb $5, %edi, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rdi
+; AVX2-NEXT:    shlq $41, %r13
+; AVX2-NEXT:    sarq $63, %r13
+; AVX2-NEXT:    vpinsrb $6, %r13d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r13
+; AVX2-NEXT:    shlq $40, %rsi
+; AVX2-NEXT:    sarq $63, %rsi
+; AVX2-NEXT:    vpinsrb $7, %esi, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rsi
+; AVX2-NEXT:    shlq $39, %r10
+; AVX2-NEXT:    sarq $63, %r10
+; AVX2-NEXT:    vpinsrb $8, %r10d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r10
+; AVX2-NEXT:    shlq $38, %r11
+; AVX2-NEXT:    sarq $63, %r11
+; AVX2-NEXT:    vpinsrb $9, %r11d, %xmm0, %xmm0
+; AVX2-NEXT:    movsbq %al, %r11
+; AVX2-NEXT:    shlq $37, %r9
+; AVX2-NEXT:    sarq $63, %r9
+; AVX2-NEXT:    vpinsrb $10, %r9d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r9
+; AVX2-NEXT:    shlq $36, %rbx
+; AVX2-NEXT:    sarq $63, %rbx
+; AVX2-NEXT:    vpinsrb $11, %ebx, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rbx
+; AVX2-NEXT:    shlq $35, %r14
+; AVX2-NEXT:    sarq $63, %r14
+; AVX2-NEXT:    vpinsrb $12, %r14d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r14
+; AVX2-NEXT:    shlq $34, %r15
+; AVX2-NEXT:    sarq $63, %r15
+; AVX2-NEXT:    vpinsrb $13, %r15d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r15
+; AVX2-NEXT:    shlq $33, %r12
+; AVX2-NEXT:    sarq $63, %r12
+; AVX2-NEXT:    vpinsrb $14, %r12d, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %r12
+; AVX2-NEXT:    shrq $31, %rbp
+; AVX2-NEXT:    vpinsrb $15, %ebp, %xmm0, %xmm0
+; AVX2-NEXT:    movq %rax, %rbp
+; AVX2-NEXT:    shlq $63, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vmovd %edx, %xmm1
+; AVX2-NEXT:    movq %rax, %rdx
+; AVX2-NEXT:    movswq %ax, %rax
+; AVX2-NEXT:    shlq $62, %r8
+; AVX2-NEXT:    sarq $63, %r8
+; AVX2-NEXT:    vpinsrb $1, %r8d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $61, %rcx
+; AVX2-NEXT:    sarq $63, %rcx
+; AVX2-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $60, %rdi
+; AVX2-NEXT:    sarq $63, %rdi
+; AVX2-NEXT:    vpinsrb $3, %edi, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $59, %r13
+; AVX2-NEXT:    sarq $63, %r13
+; AVX2-NEXT:    vpinsrb $4, %r13d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $58, %rsi
+; AVX2-NEXT:    sarq $63, %rsi
+; AVX2-NEXT:    vpinsrb $5, %esi, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $57, %r10
+; AVX2-NEXT:    sarq $63, %r10
+; AVX2-NEXT:    vpinsrb $6, %r10d, %xmm1, %xmm1
+; AVX2-NEXT:    shrq $7, %r11
+; AVX2-NEXT:    vpinsrb $7, %r11d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $55, %r9
+; AVX2-NEXT:    sarq $63, %r9
+; AVX2-NEXT:    vpinsrb $8, %r9d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $54, %rbx
+; AVX2-NEXT:    sarq $63, %rbx
+; AVX2-NEXT:    vpinsrb $9, %ebx, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $53, %r14
+; AVX2-NEXT:    sarq $63, %r14
+; AVX2-NEXT:    vpinsrb $10, %r14d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $52, %r15
+; AVX2-NEXT:    sarq $63, %r15
+; AVX2-NEXT:    vpinsrb $11, %r15d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $51, %r12
+; AVX2-NEXT:    sarq $63, %r12
+; AVX2-NEXT:    vpinsrb $12, %r12d, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $50, %rbp
+; AVX2-NEXT:    sarq $63, %rbp
+; AVX2-NEXT:    vpinsrb $13, %ebp, %xmm1, %xmm1
+; AVX2-NEXT:    shlq $49, %rdx
+; AVX2-NEXT:    sarq $63, %rdx
+; AVX2-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
+; AVX2-NEXT:    shrq $15, %rax
+; AVX2-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm1
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    popq %rbx
+; AVX2-NEXT:    popq %r12
+; AVX2-NEXT:    popq %r13
+; AVX2-NEXT:    popq %r14
+; AVX2-NEXT:    popq %r15
+; AVX2-NEXT:    popq %rbp
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_32i1_to_32i8:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    pushl %esi
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    movswl (%eax), %ecx
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $30, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    movl %ecx, %esi
+; X32-SSE41-NEXT:    shll $31, %esi
+; X32-SSE41-NEXT:    sarl $31, %esi
+; X32-SSE41-NEXT:    movd %esi, %xmm0
+; X32-SSE41-NEXT:    pinsrb $1, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $29, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $2, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $28, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $3, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $27, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $4, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $26, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $5, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $25, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $6, %edx, %xmm0
+; X32-SSE41-NEXT:    movsbl %cl, %edx
+; X32-SSE41-NEXT:    shrl $7, %edx
+; X32-SSE41-NEXT:    pinsrb $7, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $23, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $8, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $22, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $9, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $21, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $10, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $20, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $11, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $19, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $12, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $18, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $13, %edx, %xmm0
+; X32-SSE41-NEXT:    movl %ecx, %edx
+; X32-SSE41-NEXT:    shll $17, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    pinsrb $14, %edx, %xmm0
+; X32-SSE41-NEXT:    shrl $15, %ecx
+; X32-SSE41-NEXT:    pinsrb $15, %ecx, %xmm0
+; X32-SSE41-NEXT:    movswl 2(%eax), %eax
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $30, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    movl %eax, %edx
+; X32-SSE41-NEXT:    shll $31, %edx
+; X32-SSE41-NEXT:    sarl $31, %edx
+; X32-SSE41-NEXT:    movd %edx, %xmm1
+; X32-SSE41-NEXT:    pinsrb $1, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $29, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $2, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $28, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $3, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $27, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $4, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $26, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $5, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $25, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $6, %ecx, %xmm1
+; X32-SSE41-NEXT:    movsbl %al, %ecx
+; X32-SSE41-NEXT:    shrl $7, %ecx
+; X32-SSE41-NEXT:    pinsrb $7, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $23, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $8, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $22, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $9, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $21, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $10, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $20, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $11, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $19, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $12, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $18, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $13, %ecx, %xmm1
+; X32-SSE41-NEXT:    movl %eax, %ecx
+; X32-SSE41-NEXT:    shll $17, %ecx
+; X32-SSE41-NEXT:    sarl $31, %ecx
+; X32-SSE41-NEXT:    pinsrb $14, %ecx, %xmm1
+; X32-SSE41-NEXT:    shrl $15, %eax
+; X32-SSE41-NEXT:    pinsrb $15, %eax, %xmm1
+; X32-SSE41-NEXT:    popl %esi
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <32 x i1>, <32 x i1>* %ptr
+ %Y = sext <32 x i1> %X to <32 x i8>
+ ret <32 x i8> %Y
+}
+
+define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) {
+; SSE2-LABEL: load_sext_16i8_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm0
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    psraw $8, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_16i8_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm0
+; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSSE3-NEXT:    psraw $8, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_16i8_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxbw 8(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_16i8_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_16i8_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_16i8_to_16i16:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxbw 8(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <16 x i8>, <16 x i8>* %ptr
+ %Y = sext <16 x i8> %X to <16 x i16>
+ ret <16 x i16> %Y
+}
+
+define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_2i16_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_2i16_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_2i16_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_2i16_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_2i16_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i16>, <2 x i16>* %ptr
+ %Y = sext <2 x i16> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_4i16_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i16_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i16_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_4i16_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i16_to_4i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_4i16_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movswq 2(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    movswq (%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm0
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    movswq 6(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm2
+; SSE2-NEXT:    movswq 4(%rdi), %rax
+; SSE2-NEXT:    movd %rax, %xmm1
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i16_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movswq 2(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    movswq (%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    movswq 6(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm2
+; SSSE3-NEXT:    movswq 4(%rdi), %rax
+; SSSE3-NEXT:    movd %rax, %xmm1
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i16_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxwq 4(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i16_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i16_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxwq (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxwq 4(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = sext <4 x i16> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) {
+; SSE2-LABEL: load_sext_8i16_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_8i16_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $16, %xmm0
+; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSSE3-NEXT:    psrad $16, %xmm1
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_8i16_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxwd 8(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_8i16_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxwd 8(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_8i16_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxwd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_8i16_to_8i32:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxwd 8(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <8 x i16>, <8 x i16>* %ptr
+ %Y = sext <8 x i16> %X to <8 x i32>
+ ret <8 x i32> %Y
+}
+
+define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) {
+; SSE2-LABEL: load_sext_2i32_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrad $31, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_2i32_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrad $31, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_2i32_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_sext_2i32_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_2i32_to_2i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <2 x i32>, <2 x i32>* %ptr
+ %Y = sext <2 x i32> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) {
+; SSE2-LABEL: load_sext_4i32_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa (%rdi), %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrad $31, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psrad $31, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_sext_4i32_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa (%rdi), %xmm0
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    psrad $31, %xmm2
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSSE3-NEXT:    psrad $31, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_sext_4i32_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
+; SSE41-NEXT:    pmovsxdq 8(%rdi), %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_sext_4i32_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovsxdq (%rdi), %xmm0
+; AVX1-NEXT:    vpmovsxdq 8(%rdi), %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_sext_4i32_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovsxdq (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; X32-SSE41-LABEL: load_sext_4i32_to_4i64:
+; X32-SSE41:       # BB#0: # %entry
+; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
+; X32-SSE41-NEXT:    pmovsxdq 8(%eax), %xmm1
+; X32-SSE41-NEXT:    retl
+entry:
+ %X = load <4 x i32>, <4 x i32>* %ptr
+ %Y = sext <4 x i32> %X to <4 x i64>
+ ret <4 x i64> %Y
 }
 
 define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
@@ -146,9 +3796,12 @@ define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp {
 ;
 ; X32-SSE41-LABEL: sext_2i8_to_i32:
 ; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41:         pmovsxbw %xmm0, %xmm0
+; X32-SSE41-NEXT:    pushl %eax
+; X32-SSE41-NEXT:  .Ltmp0:
+; X32-SSE41-NEXT:    .cfi_def_cfa_offset 8
+; X32-SSE41-NEXT:    pmovsxbw %xmm0, %xmm0
 ; X32-SSE41-NEXT:    movd %xmm0, %eax
-; X32-SSE41-NEXT:    popl %edx
+; X32-SSE41-NEXT:    popl %ecx
 ; X32-SSE41-NEXT:    retl
 entry:
   %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
@@ -157,242 +3810,6 @@ entry:
   ret i32 %Bc
 }
 
-define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_test1:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test1:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    psrad $16, %xmm0
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test1:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxwd (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test1:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxwd (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test1:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxwd (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <4 x i16>, <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i32>
- ret <4 x i32>%Y
-}
-
-define <4 x i32> @load_sext_test2(<4 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test2:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test2:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    psrad $24, %xmm0
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test2:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxbd (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test2:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxbd (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test2:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxbd (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <4 x i8>, <4 x i8>* %ptr
- %Y = sext <4 x i8> %X to <4 x i32>
- ret <4 x i32>%Y
-}
-
-define <2 x i64> @load_sext_test3(<2 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test3:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movzwl (%rdi), %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    psrad $24, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test3:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movzwl (%rdi), %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    psrad $24, %xmm0
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test3:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test3:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxbq (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test3:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <2 x i8>, <2 x i8>* %ptr
- %Y = sext <2 x i8> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-define <2 x i64> @load_sext_test4(<2 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_test4:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    psrad $16, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test4:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    psrad $16, %xmm0
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test4:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test4:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxwq (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test4:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <2 x i16>, <2 x i16>* %ptr
- %Y = sext <2 x i16> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-define <2 x i64> @load_sext_test5(<2 x i32> *%ptr) {
-; SSE2-LABEL: load_sext_test5:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test5:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    psrad $31, %xmm1
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test5:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxdq (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test5:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxdq (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test5:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxdq (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <2 x i32>, <2 x i32>* %ptr
- %Y = sext <2 x i32> %X to <2 x i64>
- ret <2 x i64>%Y
-}
-
-define <8 x i16> @load_sext_test6(<8 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_test6:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psraw $8, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_test6:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    psraw $8, %xmm0
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_test6:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX-LABEL: load_sext_test6:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpmovsxbw (%rdi), %xmm0
-; AVX-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_test6:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <8 x i8>, <8 x i8>* %ptr
- %Y = sext <8 x i8> %X to <8 x i16>
- ret <8 x i16>%Y
-}
-
 define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
 ; SSE2-LABEL: sext_4i1_to_4i64:
 ; SSE2:       # BB#0:
@@ -460,57 +3877,6 @@ define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
   ret <4 x i64> %extmask
 }
 
-define <16 x i16> @sext_16i8_to_16i16(<16 x i8> *%ptr) {
-; SSE2-LABEL: sext_16i8_to_16i16:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psraw $8, %xmm0
-; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psraw $8, %xmm1
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: sext_16i8_to_16i16:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    psraw $8, %xmm0
-; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    psraw $8, %xmm1
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: sext_16i8_to_16i16:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxbw (%rdi), %xmm0
-; SSE41-NEXT:    pmovsxbw 8(%rdi), %xmm1
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: sext_16i8_to_16i16:
-; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpmovsxbw (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxbw 8(%rdi), %xmm1
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: sext_16i8_to_16i16:
-; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovsxbw (%rdi), %ymm0
-; AVX2-NEXT:    retq
-;
-; X32-SSE41-LABEL: sext_16i8_to_16i16:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxbw (%eax), %xmm0
-; X32-SSE41-NEXT:    pmovsxbw 8(%eax), %xmm1
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <16 x i8>, <16 x i8>* %ptr
- %Y = sext <16 x i8> %X to <16 x i16>
- ret <16 x i16> %Y
-}
-
 define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
 ; SSE2-LABEL: sext_4i8_to_4i64:
 ; SSE2:       # BB#0:
@@ -577,125 +3943,3 @@ define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
   %extmask = sext <4 x i8> %mask to <4 x i64>
   ret <4 x i64> %extmask
 }
-
-define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) {
-; SSE2-LABEL: load_sext_4i8_to_4i64:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movsbq 1(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    movsbq (%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    movsbq 3(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    movsbq 2(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_4i8_to_4i64:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movsbq 1(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm1
-; SSSE3-NEXT:    movsbq (%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm0
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    movsbq 3(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm2
-; SSSE3-NEXT:    movsbq 2(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm1
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_4i8_to_4i64:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxbq (%rdi), %xmm0
-; SSE41-NEXT:    pmovsxbq 2(%rdi), %xmm1
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: load_sext_4i8_to_4i64:
-; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpmovsxbd (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_sext_4i8_to_4i64:
-; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovsxbq (%rdi), %ymm0
-; AVX2-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_4i8_to_4i64:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxbq (%eax), %xmm0
-; X32-SSE41-NEXT:    pmovsxbq 2(%eax), %xmm1
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <4 x i8>, <4 x i8>* %ptr
- %Y = sext <4 x i8> %X to <4 x i64>
- ret <4 x i64>%Y
-}
-
-define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) {
-; SSE2-LABEL: load_sext_4i16_to_4i64:
-; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movswq 2(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    movswq (%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE2-NEXT:    movswq 6(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    movswq 4(%rdi), %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSE2-NEXT:    retq
-;
-; SSSE3-LABEL: load_sext_4i16_to_4i64:
-; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movswq 2(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm1
-; SSSE3-NEXT:    movswq (%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm0
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT:    movswq 6(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm2
-; SSSE3-NEXT:    movswq 4(%rdi), %rax
-; SSSE3-NEXT:    movd %rax, %xmm1
-; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; SSSE3-NEXT:    retq
-;
-; SSE41-LABEL: load_sext_4i16_to_4i64:
-; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovsxwq (%rdi), %xmm0
-; SSE41-NEXT:    pmovsxwq 4(%rdi), %xmm1
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: load_sext_4i16_to_4i64:
-; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpmovsxwd (%rdi), %xmm0
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm1
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; AVX1-NEXT:    vpmovsxdq %xmm0, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: load_sext_4i16_to_4i64:
-; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovsxwq (%rdi), %ymm0
-; AVX2-NEXT:    retq
-;
-; X32-SSE41-LABEL: load_sext_4i16_to_4i64:
-; X32-SSE41:       # BB#0: # %entry
-; X32-SSE41-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE41-NEXT:    pmovsxwq (%eax), %xmm0
-; X32-SSE41-NEXT:    pmovsxwq 4(%eax), %xmm1
-; X32-SSE41-NEXT:    retl
-entry:
- %X = load <4 x i16>, <4 x i16>* %ptr
- %Y = sext <4 x i16> %X to <4 x i64>
- ret <4 x i64>%Y
-}
diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll
index 61b30154950d..771445df85e0 100644
--- a/test/CodeGen/X86/vector-shift-ashr-128.ll
+++ b/test/CodeGen/X86/vector-shift-ashr-128.ll
@@ -1,59 +1,115 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
 
 ;
 ; Variable Shifts
 ;
 
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-LABEL: var_shift_v2i64:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    movd %xmm1, %rcx
-; SSE2-NEXT:    sarq %cl, %rax
-; SSE2-NEXT:    movd %rax, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rcx
-; SSE2-NEXT:    sarq %cl, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    psrlq %xmm3, %xmm4
+; SSE2-NEXT:    psrlq %xmm1, %xmm2
+; SSE2-NEXT:    movsd {{.*#+}} xmm4 = xmm2[0],xmm4[1]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrlq %xmm3, %xmm2
+; SSE2-NEXT:    psrlq %xmm1, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT:    xorpd %xmm4, %xmm2
+; SSE2-NEXT:    psubq %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: var_shift_v2i64:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrq $1, %xmm0, %rax
-; SSE41-NEXT:    pextrq $1, %xmm1, %rcx
-; SSE41-NEXT:    sarq %cl, %rax
-; SSE41-NEXT:    movd %rax, %xmm2
-; SSE41-NEXT:    movd %xmm0, %rax
-; SSE41-NEXT:    movd %xmm1, %rcx
-; SSE41-NEXT:    sarq %cl, %rax
-; SSE41-NEXT:    movd %rax, %xmm0
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE41-NEXT:    movdqa %xmm2, %xmm3
+; SSE41-NEXT:    psrlq %xmm1, %xmm3
+; SSE41-NEXT:    pshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; SSE41-NEXT:    psrlq %xmm4, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm3
+; SSE41-NEXT:    psrlq %xmm1, %xmm3
+; SSE41-NEXT:    psrlq %xmm4, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm3[0,1,2,3],xmm0[4,5,6,7]
+; SSE41-NEXT:    pxor %xmm2, %xmm0
+; SSE41-NEXT:    psubq %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: var_shift_v2i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX-NEXT:    sarq %cl, %rax
-; AVX-NEXT:    vmovq %rax, %xmm2
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    vmovq %xmm1, %rcx
-; AVX-NEXT:    sarq %cl, %rax
-; AVX-NEXT:    vmovq %rax, %xmm0
-; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: var_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm3
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: var_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpsrlvq %xmm1, %xmm2, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; XOP-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX512-NEXT:    vpsrlvq %xmm1, %xmm2, %xmm3
+; AVX512-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    psrlq %xmm2, %xmm4
+; X32-SSE-NEXT:    movq {{.*#+}} xmm5 = xmm1[0],zero
+; X32-SSE-NEXT:    psrlq %xmm5, %xmm3
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm4 = xmm3[0],xmm4[1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq %xmm2, %xmm1
+; X32-SSE-NEXT:    psrlq %xmm5, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    xorpd %xmm4, %xmm1
+; X32-SSE-NEXT:    psubq %xmm4, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: var_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
@@ -119,11 +175,52 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE-NEXT:    psrad %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psrlq $32, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psrad %xmm2, %xmm4
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm5
+; X32-SSE-NEXT:    psrad %xmm4, %xmm5
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT:    psrad %xmm1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm5 = xmm0[0],xmm5[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: var_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psllw $12, %xmm1
@@ -216,11 +313,58 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
+; XOP-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $12, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psraw $8, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psraw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psraw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    psraw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: var_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
@@ -342,6 +486,99 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOP-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT:    vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT:    vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; X32-SSE-NEXT:    psllw $5, %xmm1
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15]
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm2, %xmm6
+; X32-SSE-NEXT:    psraw $4, %xmm2
+; X32-SSE-NEXT:    pand %xmm5, %xmm2
+; X32-SSE-NEXT:    por %xmm6, %xmm2
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm2, %xmm6
+; X32-SSE-NEXT:    psraw $2, %xmm2
+; X32-SSE-NEXT:    pand %xmm5, %xmm2
+; X32-SSE-NEXT:    por %xmm6, %xmm2
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X32-SSE-NEXT:    pandn %xmm2, %xmm4
+; X32-SSE-NEXT:    psraw $1, %xmm2
+; X32-SSE-NEXT:    pand %xmm5, %xmm2
+; X32-SSE-NEXT:    por %xmm4, %xmm2
+; X32-SSE-NEXT:    psrlw $8, %xmm2
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    pcmpgtw %xmm1, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE-NEXT:    pandn %xmm0, %xmm1
+; X32-SSE-NEXT:    psraw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    packuswb %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -350,71 +587,65 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: splatvar_shift_v2i64:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    movd %xmm2, %rcx
-; SSE2-NEXT:    sarq %cl, %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rcx
-; SSE2-NEXT:    sarq %cl, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
-; SSE2-NEXT:    retq
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
+; SSE-LABEL: splatvar_shift_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; SSE-NEXT:    psrlq %xmm1, %xmm2
+; SSE-NEXT:    psrlq %xmm1, %xmm0
+; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    psubq %xmm2, %xmm0
+; SSE-NEXT:    retq
 ;
-; SSE41-LABEL: splatvar_shift_v2i64:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE41-NEXT:    pextrq $1, %xmm0, %rax
-; SSE41-NEXT:    pextrq $1, %xmm1, %rcx
-; SSE41-NEXT:    sarq %cl, %rax
-; SSE41-NEXT:    movd %rax, %xmm2
-; SSE41-NEXT:    movd %xmm0, %rax
-; SSE41-NEXT:    movd %xmm1, %rcx
-; SSE41-NEXT:    sarq %cl, %rax
-; SSE41-NEXT:    movd %rax, %xmm0
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; SSE41-NEXT:    retq
+; AVX-LABEL: splatvar_shift_v2i64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
 ;
-; AVX1-LABEL: splatvar_shift_v2i64:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    vmovq %xmm1, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT:    retq
+; XOPAVX1-LABEL: splatvar_shift_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
 ;
-; AVX2-LABEL: splatvar_shift_v2i64:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    vmovq %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT:    retq
+; XOPAVX2-LABEL: splatvar_shift_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX512-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT:    psrlq %xmm1, %xmm2
+; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X32-SSE-NEXT:    pxor %xmm2, %xmm0
+; X32-SSE-NEXT:    psubq %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = ashr <2 x i64> %a, %splat
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
@@ -435,12 +666,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    xorps %xmm2, %xmm2
+; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT:    psrad %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i32> %a, %splat
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm1, %eax
@@ -462,12 +714,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
 ; AVX-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movd %xmm1, %eax
+; X32-SSE-NEXT:    movzwl %ax, %eax
+; X32-SSE-NEXT:    movd %eax, %xmm1
+; X32-SSE-NEXT:    psraw %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i16> %a, %splat
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -626,6 +900,113 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT:    vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT:    vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm3 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; X32-SSE-NEXT:    psllw $5, %xmm3
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm1, %xmm6
+; X32-SSE-NEXT:    psraw $4, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm6, %xmm1
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm1, %xmm6
+; X32-SSE-NEXT:    psraw $2, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm6, %xmm1
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X32-SSE-NEXT:    pandn %xmm1, %xmm4
+; X32-SSE-NEXT:    psraw $1, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm4, %xmm1
+; X32-SSE-NEXT:    psrlw $8, %xmm1
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psraw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = ashr <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -635,46 +1016,83 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v2i64:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    sarq %rax
-; SSE2-NEXT:    movd %rax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %rax
-; SSE2-NEXT:    sarq $7, %rax
-; SSE2-NEXT:    movd %rax, %xmm0
-; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrlq $7, %xmm1
+; SSE2-NEXT:    psrlq $1, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; SSE2-NEXT:    movapd {{.*#+}} xmm0 = [4611686018427387904,72057594037927936]
+; SSE2-NEXT:    xorpd %xmm0, %xmm1
+; SSE2-NEXT:    psubq %xmm0, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: constant_shift_v2i64:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrq $1, %xmm0, %rax
-; SSE41-NEXT:    sarq $7, %rax
-; SSE41-NEXT:    movd %rax, %xmm1
-; SSE41-NEXT:    movd %xmm0, %rax
-; SSE41-NEXT:    sarq %rax
-; SSE41-NEXT:    movd %rax, %xmm0
-; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrlq $7, %xmm1
+; SSE41-NEXT:    psrlq $1, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; SSE41-NEXT:    pxor %xmm1, %xmm0
+; SSE41-NEXT:    psubq %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: constant_shift_v2i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX-NEXT:    sarq $7, %rax
-; AVX-NEXT:    vmovq %rax, %xmm1
-; AVX-NEXT:    vmovq %xmm0, %rax
-; AVX-NEXT:    sarq %rax
-; AVX-NEXT:    vmovq %rax, %xmm0
-; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: constant_shift_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: constant_shift_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [4611686018427387904,72057594037927936]
+; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psrlq $7, %xmm2
+; X32-SSE-NEXT:    psrlq $1, %xmm1
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm1[0],xmm2[1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq $7, %xmm1
+; X32-SSE-NEXT:    psrlq $1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    xorpd %xmm2, %xmm1
+; X32-SSE-NEXT:    psubq %xmm2, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -720,11 +1138,42 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshad {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrad $7, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psrad $5, %xmm2
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psrad $6, %xmm2
+; X32-SSE-NEXT:    psrad $4, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -789,11 +1238,41 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psraw $4, %xmm1
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; X32-SSE-NEXT:    psraw $2, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    pand %xmm0, %xmm1
+; X32-SSE-NEXT:    psraw $1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
@@ -918,6 +1397,101 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; AVX512-NEXT:    vpsraw $4, %xmm3, %xmm4
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $2, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vpsraw $1, %xmm3, %xmm4
+; AVX512-NEXT:    vpaddw %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm2, %xmm4, %xmm3, %xmm2
+; AVX512-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX512-NEXT:    vpsraw $4, %xmm0, %xmm3
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $2, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsraw $1, %xmm0, %xmm3
+; AVX512-NEXT:    vpaddw %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm3, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT:    psllw $5, %xmm3
+; X32-SSE-NEXT:    punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm3[8],xmm4[9],xmm3[9],xmm4[10],xmm3[10],xmm4[11],xmm3[11],xmm4[12],xmm3[12],xmm4[13],xmm3[13],xmm4[14],xmm3[14],xmm4[15],xmm3[15]
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm1, %xmm6
+; X32-SSE-NEXT:    psraw $4, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm6, %xmm1
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm6
+; X32-SSE-NEXT:    pandn %xmm1, %xmm6
+; X32-SSE-NEXT:    psraw $2, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm6, %xmm1
+; X32-SSE-NEXT:    paddw %xmm4, %xmm4
+; X32-SSE-NEXT:    pxor %xmm5, %xmm5
+; X32-SSE-NEXT:    pcmpgtw %xmm4, %xmm5
+; X32-SSE-NEXT:    movdqa %xmm5, %xmm4
+; X32-SSE-NEXT:    pandn %xmm1, %xmm4
+; X32-SSE-NEXT:    psraw $1, %xmm1
+; X32-SSE-NEXT:    pand %xmm5, %xmm1
+; X32-SSE-NEXT:    por %xmm4, %xmm1
+; X32-SSE-NEXT:    psrlw $8, %xmm1
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm4
+; X32-SSE-NEXT:    movdqa %xmm4, %xmm5
+; X32-SSE-NEXT:    pandn %xmm0, %xmm5
+; X32-SSE-NEXT:    psraw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm4, %xmm0
+; X32-SSE-NEXT:    por %xmm5, %xmm0
+; X32-SSE-NEXT:    paddw %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtw %xmm3, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psraw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    packuswb %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -926,7 +1500,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-LABEL: splatconstant_shift_v2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -958,11 +1532,35 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; AVX2-NEXT:    vpsrlq $7, %xmm0, %xmm0
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrad $7, %xmm0, %xmm1
+; AVX512-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; AVX512-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrad $7, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    psrlq $7, %xmm0
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    retl
   %shift = ashr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrad $5, %xmm0
@@ -972,11 +1570,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrad $5, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsrad $5, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrad $5, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrad $5, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psraw $3, %xmm0
@@ -986,11 +1599,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsraw $3, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsraw $3, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsraw $3, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psraw $3, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $3, %xmm0
@@ -1008,6 +1636,31 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrlw $3, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; X32-SSE-NEXT:    pxor %xmm1, %xmm0
+; X32-SSE-NEXT:    psubb %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = ashr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll
index e4642558e0e4..0b9c318da047 100644
--- a/test/CodeGen/X86/vector-shift-ashr-256.ll
+++ b/test/CodeGen/X86/vector-shift-ashr-256.ll
@@ -1,65 +1,83 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 ;
 ; Variable Shifts
 ;
 
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: var_shift_v4i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpextrq $1, %xmm2, %rax
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpextrq $1, %xmm3, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm4
-; AVX1-NEXT:    vmovq %xmm2, %rax
-; AVX1-NEXT:    vmovq %xmm3, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm3
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    vmovq %xmm1, %rcx
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm3, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm2[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm6[4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpsrlq %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm5[4,5,6,7]
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm4
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm5 = xmm1[2,3,0,1]
+; AVX1-NEXT:    vpsrlq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrlq %xmm5, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
+; AVX1-NEXT:    vpxor %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: var_shift_v4i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT:    vpextrq $1, %xmm3, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm4
-; AVX2-NEXT:    vmovq %xmm2, %rax
-; AVX2-NEXT:    vmovq %xmm3, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm3
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    vmovq %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubq %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshaq %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; XOPAVX2-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm3
+; XOPAVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsubq %ymm3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpsrlvq %ymm1, %ymm2, %ymm3
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubq %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: var_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -94,11 +112,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshad %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: var_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -147,11 +187,40 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshaw %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshaw %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = ashr <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: var_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -234,6 +303,58 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -242,65 +363,64 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v4i64:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpextrq $1, %xmm2, %rdx
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX1-NEXT:    movb %al, %cl
-; AVX1-NEXT:    sarq %cl, %rdx
-; AVX1-NEXT:    vmovq %rdx, %xmm3
-; AVX1-NEXT:    vmovq %xmm2, %rsi
-; AVX1-NEXT:    vmovq %xmm1, %rdx
-; AVX1-NEXT:    movb %dl, %cl
-; AVX1-NEXT:    sarq %cl, %rsi
-; AVX1-NEXT:    vmovq %rsi, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rsi
-; AVX1-NEXT:    movb %al, %cl
-; AVX1-NEXT:    sarq %cl, %rsi
-; AVX1-NEXT:    vmovq %rsi, %xmm2
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    movb %dl, %cl
-; AVX1-NEXT:    sarq %cl, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm3, %xmm3
+; AVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm3
+; AVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: splatvar_shift_v4i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT:    vpextrq $1, %xmm2, %rax
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT:    vpextrq $1, %xmm3, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm4
-; AVX2-NEXT:    vmovq %xmm2, %rax
-; AVX2-NEXT:    vmovq %xmm3, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0]
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm3
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    vmovq %xmm1, %rcx
-; AVX2-NEXT:    sarq %cl, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
+; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX512-NEXT:    vpsrlq %xmm1, %ymm2, %ymm2
+; AVX512-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = ashr <4 x i64> %a, %splat
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -317,12 +437,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpsrad %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsrad %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpsrad %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = ashr <8 x i32> %a, %splat
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -341,12 +485,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vmovd %eax, %xmm1
 ; AVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vmovd %xmm1, %eax
+; XOPAVX1-NEXT:    movzwl %ax, %eax
+; XOPAVX1-NEXT:    vmovd %eax, %xmm1
+; XOPAVX1-NEXT:    vpsraw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsraw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vmovd %xmm1, %eax
+; XOPAVX2-NEXT:    movzwl %ax, %eax
+; XOPAVX2-NEXT:    vmovd %eax, %xmm1
+; XOPAVX2-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovd %xmm1, %eax
+; AVX512-NEXT:    movzwl %ax, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm1
+; AVX512-NEXT:    vpsraw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = ashr <16 x i16> %a, %splat
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -424,6 +595,59 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshab %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = ashr <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -433,51 +657,64 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX1-NEXT:    sarq $62, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm1, %rax
-; AVX1-NEXT:    sarq $31, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX1-NEXT:    sarq $7, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    sarq %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vpsrlq $62, %xmm1, %xmm2
+; AVX1-NEXT:    vpsrlq $31, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4294967296,2]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubq %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrlq $1, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [4611686018427387904,72057594037927936]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubq %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: constant_shift_v4i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX2-NEXT:    sarq $62, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm1, %rax
-; AVX2-NEXT:    sarq $31, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm1
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    sarq $7, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    sarq %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshaq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; XOPAVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [4611686018427387904,72057594037927936,4294967296,2]
+; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrad $7, %xmm0, %xmm1
@@ -500,11 +737,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshad {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshad {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -549,11 +804,39 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshaw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vpshaw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpshaw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = ashr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -630,6 +913,55 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT:    vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512-NEXT:    vpsraw $4, %ymm3, %ymm4
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX512-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512-NEXT:    vpsraw $4, %ymm0, %ymm3
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $2, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsraw $1, %ymm0, %ymm3
+; AVX512-NEXT:    vpaddw %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -638,7 +970,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -657,11 +989,36 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshaq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
+; XOPAVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrad $7, %ymm0, %ymm1
+; AVX512-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; AVX512-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX512-NEXT:    retq
   %shift = ashr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrad $5, %xmm0, %xmm1
@@ -674,11 +1031,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrad $5, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsrad $5, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsrad $5, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrad $5, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrad $5, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsraw $3, %xmm0, %xmm1
@@ -691,11 +1066,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsraw $3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsraw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsraw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsraw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsraw $3, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -720,6 +1113,34 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshab %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; XOPAVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = ashr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll
new file mode 100644
index 000000000000..147e58f4710e
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsravq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <8 x i64> %a, %b
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsravd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <16 x i32> %a, %b
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsravd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsravd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsravd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsravd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = ashr <32 x i16> %a, %b
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm5, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpsraw $2, %ymm5, %ymm6
+; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpsraw $1, %ymm5, %ymm6
+; AVX512DQ-NEXT:    vpaddw %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm6, %ymm5, %ymm4
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm2
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %shift = ashr <64 x i8> %a, %b
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsraq %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %shift = ashr <8 x i64> %a, %splat
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT:    vpsrad %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+  %shift = ashr <16 x i32> %a, %splat
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vmovd %xmm2, %eax
+; AVX512DQ-NEXT:    movzwl %ax, %eax
+; AVX512DQ-NEXT:    vmovd %eax, %xmm2
+; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovd %xmm1, %eax
+; AVX512BW-NEXT:    movzwl %ax, %eax
+; AVX512BW-NEXT:    vmovd %eax, %xmm1
+; AVX512BW-NEXT:    vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+  %shift = ashr <32 x i16> %a, %splat
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
+; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+  %shift = ashr <64 x i8> %a, %splat
+  ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsravq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsravd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsravd %ymm4, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsravd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsravd %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsravd %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackusdw %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = ashr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm2[8],ymm0[9],ymm2[9],ymm0[10],ymm2[10],ymm0[11],ymm2[11],ymm0[12],ymm2[12],ymm0[13],ymm2[13],ymm0[14],ymm2[14],ymm0[15],ymm2[15],ymm0[24],ymm2[24],ymm0[25],ymm2[25],ymm0[26],ymm2[26],ymm0[27],ymm2[27],ymm0[28],ymm2[28],ymm0[29],ymm2[29],ymm0[30],ymm2[30],ymm0[31],ymm2[31]
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $2, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm3, %ymm3, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsraw $1, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm0[0],ymm2[0],ymm0[1],ymm2[1],ymm0[2],ymm2[2],ymm0[3],ymm2[3],ymm0[4],ymm2[4],ymm0[5],ymm2[5],ymm0[6],ymm2[6],ymm0[7],ymm2[7],ymm0[16],ymm2[16],ymm0[17],ymm2[17],ymm0[18],ymm2[18],ymm0[19],ymm2[19],ymm0[20],ymm2[20],ymm0[21],ymm2[21],ymm0[22],ymm2[22],ymm0[23],ymm2[23]
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $2, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm2, %ymm2, %ymm8
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $1, %ymm0, %ymm5
+; AVX512DQ-NEXT:    vpaddw %ymm8, %ymm8, %ymm9
+; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm4, %ymm5
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm5, %ymm4, %ymm3
+; AVX512DQ-NEXT:    vpsraw $2, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsraw $1, %ymm3, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
+; AVX512DQ-NEXT:    vpsraw $4, %ymm1, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsraw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm9, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %shift = ashr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsraq $7, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrad $5, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = ashr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsraw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsraw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsraw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = ashr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512DQ-NEXT:    vpxor %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpxor %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
+; AVX512BW-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-lshr-128.ll b/test/CodeGen/X86/vector-shift-lshr-128.ll
index ca55800e2713..86e54612ae74 100644
--- a/test/CodeGen/X86/vector-shift-lshr-128.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-128.ll
@@ -1,13 +1,20 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
 
 ;
 ; Variable Shifts
 ;
 
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-LABEL: var_shift_v2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
@@ -39,11 +46,39 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psrlq %xmm3, %xmm2
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT:    movapd %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: var_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
@@ -109,11 +144,52 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psrldq {{.*#+}} xmm2 = xmm2[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE-NEXT:    psrld %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psrlq $32, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X32-SSE-NEXT:    psrld %xmm2, %xmm4
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm3 = xmm4[0],xmm3[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm4
+; X32-SSE-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm5
+; X32-SSE-NEXT:    psrld %xmm4, %xmm5
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; X32-SSE-NEXT:    psrld %xmm1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm5 = xmm0[0],xmm5[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm5[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X32-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: var_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psllw $12, %xmm1
@@ -206,11 +282,58 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
+; XOP-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $12, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $8, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: var_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psllw $5, %xmm1
@@ -281,6 +404,60 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $5, %xmm1
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_0, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    pandn %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_2, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -289,7 +466,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE-LABEL: splatvar_shift_v2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq %xmm1, %xmm0
@@ -299,12 +476,28 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    psrlq %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = lshr <2 x i64> %a, %splat
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
@@ -325,12 +518,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    xorps %xmm2, %xmm2
+; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT:    psrld %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i32> %a, %splat
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm1, %eax
@@ -352,12 +566,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
 ; AVX-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movd %xmm1, %eax
+; X32-SSE-NEXT:    movzwl %ax, %eax
+; X32-SSE-NEXT:    movd %eax, %xmm1
+; X32-SSE-NEXT:    psrlw %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i16> %a, %splat
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -454,6 +690,74 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT:    psllw $5, %xmm2
+; X32-SSE-NEXT:    pxor %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_0, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_2, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = lshr <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -463,7 +767,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -492,61 +796,118 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlq $7, %xmm1
+; X32-SSE-NEXT:    psrlq $1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    movapd %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v4i32:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:  movdqa  %xmm0, %xmm1
-; SSE2-NEXT:  psrld   $7, %xmm1
-; SSE2-NEXT:  movdqa  %xmm0, %xmm2
-; SSE2-NEXT:  psrld   $5, %xmm2
-; SSE2-NEXT:  movsd   {{.*#+}} xmm1 = xmm2[0],xmm1[1]
-; SSE2-NEXT:  pshufd  {{.*#+}} xmm1 = xmm1[1,3,2,3]
-; SSE2-NEXT:  movdqa  %xmm0, %xmm2
-; SSE2-NEXT:  psrld   $6, %xmm2
-; SSE2-NEXT:  psrld   $4, %xmm0
-; SSE2-NEXT:  movsd   {{.*#+}} xmm2 = xmm0[0],xmm2[1]
-; SSE2-NEXT:  pshufd  {{.*#+}} xmm0 = xmm2[0,2,2,3]
-; SSE2-NEXT:  punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; SSE2-NEXT:  retq
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrld $7, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrld $5, %xmm2
+; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psrld $6, %xmm2
+; SSE2-NEXT:    psrld $4, %xmm0
+; SSE2-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: constant_shift_v4i32:
-; SSE41:    # BB#0:
-; SSE41-NEXT:  movdqa %xmm0, %xmm1
-; SSE41-NEXT:  psrld  $7, %xmm1
-; SSE41-NEXT:  movdqa %xmm0, %xmm2
-; SSE41-NEXT:  psrld  $5, %xmm2
-; SSE41-NEXT:  pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT:  movdqa %xmm0, %xmm1
-; SSE41-NEXT:  psrld  $6, %xmm1
-; SSE41-NEXT:  psrld  $4, %xmm0
-; SSE41-NEXT:  pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; SSE41-NEXT:  pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
-; SSE41-NEXT:  retq
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrld $7, %xmm1
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    psrld $5, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrld $6, %xmm1
+; SSE41-NEXT:    psrld $4, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: constant_shift_v4i32:
-; AVX1:    # BB#0:
-; AVX1-NEXT:  vpsrld  $7, %xmm0, %xmm1
-; AVX1-NEXT:  vpsrld  $5, %xmm0, %xmm2
-; AVX1-NEXT:  vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
-; AVX1-NEXT:  vpsrld  $6, %xmm0, %xmm2
-; AVX1-NEXT:  vpsrld  $4, %xmm0, %xmm0
-; AVX1-NEXT:  vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
-; AVX1-NEXT:  vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
-; AVX1-NEXT:  retq
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpsrld $7, %xmm0, %xmm1
+; AVX1-NEXT:    vpsrld $5, %xmm0, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    vpsrld $6, %xmm0, %xmm2
+; AVX1-NEXT:    vpsrld $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
+; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: constant_shift_v4i32:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrld $7, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psrld $5, %xmm2
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psrld $6, %xmm2
+; X32-SSE-NEXT:    psrld $4, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -611,11 +972,41 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlw $4, %xmm1
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[0,2,2,3]
+; X32-SSE-NEXT:    psrlw $2, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,3,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm0 = [65535,0,65535,0,65535,0,65535,0]
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    pand %xmm0, %xmm1
+; X32-SSE-NEXT:    psrlw $1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -686,6 +1077,62 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsrlw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsrlw $1, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT:    psllw $5, %xmm2
+; X32-SSE-NEXT:    pxor %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psrlw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_2, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    psrlw $1, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_3, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -694,7 +1141,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlq $7, %xmm0
@@ -704,11 +1151,26 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlq $7, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrlq $7, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrld $5, %xmm0
@@ -718,11 +1180,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrld $5, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsrld $5, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrld $5, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrld $5, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $3, %xmm0
@@ -732,11 +1209,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrlw $3, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psrlw $3, %xmm0
@@ -748,6 +1240,25 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpsrlw $3, %xmm0, %xmm0
 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOP-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psrlw $3, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = lshr <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-lshr-256.ll b/test/CodeGen/X86/vector-shift-lshr-256.ll
index bb0cceed7720..ecc68cf2e278 100644
--- a/test/CodeGen/X86/vector-shift-lshr-256.ll
+++ b/test/CodeGen/X86/vector-shift-lshr-256.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 ;
 ; Variable Shifts
 ;
 
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: var_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -25,11 +28,33 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshlq %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubq %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: var_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -64,11 +89,33 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshld %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubd %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: var_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -117,11 +164,40 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshlw %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshlw %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubw %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = lshr <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: var_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -171,6 +247,46 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -179,7 +295,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -192,12 +308,30 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpsrlq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = lshr <4 x i64> %a, %splat
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -214,12 +348,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpsrld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsrld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpsrld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = lshr <8 x i32> %a, %splat
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -238,12 +396,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vmovd %eax, %xmm1
 ; AVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vmovd %xmm1, %eax
+; XOPAVX1-NEXT:    movzwl %ax, %eax
+; XOPAVX1-NEXT:    vmovd %eax, %xmm1
+; XOPAVX1-NEXT:    vpsrlw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vmovd %xmm1, %eax
+; XOPAVX2-NEXT:    movzwl %ax, %eax
+; XOPAVX2-NEXT:    vmovd %eax, %xmm1
+; XOPAVX2-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovd %xmm1, %eax
+; AVX512-NEXT:    movzwl %ax, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm1
+; AVX512-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = lshr <16 x i16> %a, %splat
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -292,6 +477,47 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; XOPAVX2-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm4
+; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm4, %xmm2
+; XOPAVX2-NEXT:    vpsubb %xmm1, %xmm3, %xmm1
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = lshr <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -301,7 +527,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -318,11 +544,32 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshlq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpsubq {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrld $7, %xmm0, %xmm1
@@ -345,11 +592,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -394,11 +659,39 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vpsubw {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = lshr <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -446,6 +739,43 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -454,7 +784,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
@@ -467,11 +797,29 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsrlq $7, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsrlq $7, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlq $7, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrld $5, %xmm0, %xmm1
@@ -484,11 +832,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrld $5, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsrld $5, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsrld $5, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrld $5, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrld $5, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsrlw $3, %xmm0, %xmm1
@@ -501,11 +867,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsrlw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsrlw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -522,6 +906,28 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpsubb {{.*}}(%rip), %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = lshr <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-lshr-512.ll b/test/CodeGen/X86/vector-shift-lshr-512.ll
new file mode 100644
index 000000000000..68644e61b0e5
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-lshr-512.ll
@@ -0,0 +1,317 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <8 x i64> %a, %b
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <16 x i32> %a, %b
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsrlvd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsrlvd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = lshr <32 x i16> %a, %b
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+
+  %shift = lshr <64 x i8> %a, %b
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %shift = lshr <8 x i64> %a, %splat
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT:    vpsrld %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+  %shift = lshr <16 x i32> %a, %splat
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vmovd %xmm2, %eax
+; AVX512DQ-NEXT:    movzwl %ax, %eax
+; AVX512DQ-NEXT:    vmovd %eax, %xmm2
+; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovd %xmm1, %eax
+; AVX512BW-NEXT:    movzwl %ax, %eax
+; AVX512BW-NEXT:    vmovd %eax, %xmm1
+; AVX512BW-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+  %shift = lshr <32 x i16> %a, %splat
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm3
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+  %shift = lshr <64 x i8> %a, %splat
+  ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlvq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlvd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm4 = ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm3 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsrlvd %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsrld $16, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsrlvd %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackusdw %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrlvw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = lshr <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63,63]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm7 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm8
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrlw $1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm7, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm8, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %shift = lshr <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrlq $7, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsrld $5, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = lshr <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = lshr <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31]
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrlw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsrlw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = lshr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shift-shl-128.ll b/test/CodeGen/X86/vector-shift-shl-128.ll
index 6dbd9eab2a72..9b59c6224ef2 100644
--- a/test/CodeGen/X86/vector-shift-shl-128.ll
+++ b/test/CodeGen/X86/vector-shift-shl-128.ll
@@ -1,13 +1,20 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+;
+; Just one 32-bit run to make sure we do reasonable things for i64 shifts.
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE2
 
 ;
 ; Variable Shifts
 ;
 
-define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE2-LABEL: var_shift_v2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
@@ -39,11 +46,37 @@ define <2 x i64> @var_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE-NEXT:    psllq %xmm3, %xmm2
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    psllq %xmm1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm2 = xmm0[0],xmm2[1]
+; X32-SSE-NEXT:    movapd %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, %b
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: var_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    pslld $23, %xmm1
@@ -79,11 +112,41 @@ define <4 x i32> @var_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pslld $23, %xmm1
+; X32-SSE-NEXT:    paddd .LCPI1_0, %xmm1
+; X32-SSE-NEXT:    cvttps2dq %xmm1, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm0, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm2, %xmm0
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, %b
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: var_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psllw $12, %xmm1
@@ -176,11 +239,56 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $12, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psllw $8, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psllw $4, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    psraw $15, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm3
+; X32-SSE-NEXT:    pandn %xmm0, %xmm3
+; X32-SSE-NEXT:    psllw $2, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm3, %xmm0
+; X32-SSE-NEXT:    paddw %xmm1, %xmm1
+; X32-SSE-NEXT:    psraw $15, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    psllw $1, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, %b
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: var_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    psllw $5, %xmm1
@@ -248,6 +356,56 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: var_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: var_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $5, %xmm1
+; X32-SSE-NEXT:    pxor %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_0, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI3_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm1, %xmm1
+; X32-SSE-NEXT:    pcmpgtb %xmm1, %xmm2
+; X32-SSE-NEXT:    movdqa %xmm2, %xmm1
+; X32-SSE-NEXT:    pandn %xmm0, %xmm1
+; X32-SSE-NEXT:    paddb %xmm0, %xmm0
+; X32-SSE-NEXT:    pand %xmm2, %xmm0
+; X32-SSE-NEXT:    por %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, %b
   ret <16 x i8> %shift
 }
@@ -256,7 +414,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
+define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind {
 ; SSE-LABEL: splatvar_shift_v2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psllq %xmm1, %xmm0
@@ -266,12 +424,28 @@ define <2 x i64> @splatvar_shift_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movq {{.*#+}} xmm1 = xmm1[0],zero
+; X32-SSE-NEXT:    psllq %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> zeroinitializer
   %shift = shl <2 x i64> %a, %splat
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
+define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    xorps %xmm2, %xmm2
@@ -292,12 +466,33 @@ define <4 x i32> @splatvar_shift_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX-NEXT:    vpslld %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOP-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    xorps %xmm2, %xmm2
+; X32-SSE-NEXT:    movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
+; X32-SSE-NEXT:    pslld %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i32> %a, %splat
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
+define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v8i16:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm1, %eax
@@ -319,12 +514,34 @@ define <8 x i16> @splatvar_shift_v8i16(<8 x i16> %a, <8 x i16> %b) {
 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
 ; AVX-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatvar_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOP-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; XOP-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movd %xmm1, %eax
+; X32-SSE-NEXT:    movzwl %ax, %eax
+; X32-SSE-NEXT:    movd %eax, %xmm1
+; X32-SSE-NEXT:    psllw %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i16> %a, %splat
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
+define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind {
 ; SSE2-LABEL: splatvar_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
@@ -417,6 +634,69 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatvar_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,3]
+; X32-SSE-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; X32-SSE-NEXT:    pshufhw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,4,4,4]
+; X32-SSE-NEXT:    psllw $5, %xmm2
+; X32-SSE-NEXT:    pxor %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_0, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI7_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    paddb %xmm0, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %splat = shufflevector <16 x i8> %b, <16 x i8> undef, <16 x i32> zeroinitializer
   %shift = shl <16 x i8> %a, %splat
   ret <16 x i8> %shift
@@ -426,7 +706,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
@@ -455,11 +735,35 @@ define <2 x i64> @constant_shift_v2i64(<2 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v2i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v2i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE-NEXT:    psllq $7, %xmm1
+; X32-SSE-NEXT:    psllq $1, %xmm0
+; X32-SSE-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; X32-SSE-NEXT:    movapd %xmm1, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, <i64 1, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v4i32:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
@@ -486,11 +790,38 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [16,32,64,128]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm1, %xmm0
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; X32-SSE-NEXT:    pmuludq %xmm2, %xmm1
+; X32-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X32-SSE-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X32-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, <i32 4, i32 5, i32 6, i32 7>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE-LABEL: constant_shift_v8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    pmullw {{.*}}(%rip), %xmm0
@@ -500,11 +831,27 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7]
+; AVX512-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pmullw .LCPI10_0, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE2-LABEL: constant_shift_v16i8:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
@@ -572,6 +919,58 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: constant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %xmm1, %xmm1
+; AVX512-NEXT:    vpsllw $4, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpsllw $2, %xmm0, %xmm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    vpaddb %xmm0, %xmm0, %xmm2
+; AVX512-NEXT:    vpaddb %xmm1, %xmm1, %xmm1
+; AVX512-NEXT:    vpblendvb %xmm1, %xmm2, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: constant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; X32-SSE-NEXT:    psllw $5, %xmm2
+; X32-SSE-NEXT:    pxor %xmm1, %xmm1
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $4, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_1, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pxor %xmm3, %xmm3
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm3
+; X32-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSE-NEXT:    pandn %xmm0, %xmm4
+; X32-SSE-NEXT:    psllw $2, %xmm0
+; X32-SSE-NEXT:    pand .LCPI11_2, %xmm0
+; X32-SSE-NEXT:    pand %xmm3, %xmm0
+; X32-SSE-NEXT:    por %xmm4, %xmm0
+; X32-SSE-NEXT:    paddb %xmm2, %xmm2
+; X32-SSE-NEXT:    pcmpgtb %xmm2, %xmm1
+; X32-SSE-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSE-NEXT:    pandn %xmm0, %xmm2
+; X32-SSE-NEXT:    paddb %xmm0, %xmm0
+; X32-SSE-NEXT:    pand %xmm1, %xmm0
+; X32-SSE-NEXT:    por %xmm2, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <16 x i8> %shift
 }
@@ -580,7 +979,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
+define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psllq $7, %xmm0
@@ -590,11 +989,26 @@ define <2 x i64> @splatconstant_shift_v2i64(<2 x i64> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsllq $7, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v2i64:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsllq $7, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v2i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllq $7, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v2i64:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllq $7, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <2 x i64> %a, <i64 7, i64 7>
   ret <2 x i64> %shift
 }
 
-define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
+define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    pslld $5, %xmm0
@@ -604,11 +1018,26 @@ define <4 x i32> @splatconstant_shift_v4i32(<4 x i32> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpslld $5, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v4i32:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpslld $5, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpslld $5, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v4i32:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    pslld $5, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5>
   ret <4 x i32> %shift
 }
 
-define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
+define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psllw $3, %xmm0
@@ -618,11 +1047,26 @@ define <8 x i16> @splatconstant_shift_v8i16(<8 x i16> %a) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v8i16:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpsllw $3, %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $3, %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v8i16:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $3, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <8 x i16> %shift
 }
 
-define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
+define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind {
 ; SSE-LABEL: splatconstant_shift_v16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    psllw $3, %xmm0
@@ -634,6 +1078,23 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) {
 ; AVX-NEXT:    vpsllw $3, %xmm0, %xmm0
 ; AVX-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; XOP-LABEL: splatconstant_shift_v16i8:
+; XOP:       # BB#0:
+; XOP-NEXT:    vpshlb {{.*}}(%rip), %xmm0, %xmm0
+; XOP-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $3, %xmm0, %xmm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT:    retq
+;
+; X32-SSE-LABEL: splatconstant_shift_v16i8:
+; X32-SSE:       # BB#0:
+; X32-SSE-NEXT:    psllw $3, %xmm0
+; X32-SSE-NEXT:    pand .LCPI15_0, %xmm0
+; X32-SSE-NEXT:    retl
   %shift = shl <16 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <16 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-shl-256.ll b/test/CodeGen/X86/vector-shift-shl-256.ll
index b287875f6541..3daf24f1a82e 100644
--- a/test/CodeGen/X86/vector-shift-shl-256.ll
+++ b/test/CodeGen/X86/vector-shift-shl-256.ll
@@ -1,11 +1,15 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+xop,+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=XOP --check-prefix=XOPAVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 
 ;
 ; Variable Shifts
 ;
 
-define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: var_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -25,11 +29,30 @@ define <4 x i64> @var_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshlq %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpshlq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <4 x i64> %a, %b
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: var_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -50,11 +73,30 @@ define <8 x i32> @var_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshld %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpshld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <8 x i32> %a, %b
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: var_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
@@ -103,11 +145,34 @@ define <16 x i16> @var_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vpsrld $16, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackusdw %ymm3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vpshlw %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vpshlw %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = shl <16 x i16> %a, %b
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: var_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -153,6 +218,39 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: var_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm3, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: var_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm3, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: var_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <32 x i8> %a, %b
   ret <32 x i8> %shift
 }
@@ -161,7 +259,7 @@ define <32 x i8> @var_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Uniform Variable Shifts
 ;
 
-define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
+define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -174,12 +272,30 @@ define <4 x i64> @splatvar_shift_v4i64(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpsllq %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsllq %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllq %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> zeroinitializer
   %shift = shl <4 x i64> %a, %splat
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
+define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -196,12 +312,36 @@ define <8 x i32> @splatvar_shift_v8i32(<8 x i32> %a, <8 x i32> %b) {
 ; AVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
 ; AVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpslld %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpslld %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7]
+; XOPAVX2-NEXT:    vpslld %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX512-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vpslld %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <8 x i32> %b, <8 x i32> undef, <8 x i32> zeroinitializer
   %shift = shl <8 x i32> %a, %splat
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
+define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
@@ -220,12 +360,39 @@ define <16 x i16> @splatvar_shift_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; AVX2-NEXT:    vmovd %eax, %xmm1
 ; AVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vmovd %xmm1, %eax
+; XOPAVX1-NEXT:    movzwl %ax, %eax
+; XOPAVX1-NEXT:    vmovd %eax, %xmm1
+; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpsllw %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vmovd %xmm1, %eax
+; XOPAVX2-NEXT:    movzwl %ax, %eax
+; XOPAVX2-NEXT:    vmovd %eax, %xmm1
+; XOPAVX2-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovd %xmm1, %eax
+; AVX512-NEXT:    movzwl %ax, %eax
+; AVX512-NEXT:    vmovd %eax, %xmm1
+; AVX512-NEXT:    vpsllw %xmm1, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <16 x i16> %b, <16 x i16> undef, <16 x i32> zeroinitializer
   %shift = shl <16 x i16> %a, %splat
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
+define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind {
 ; AVX1-LABEL: splatvar_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -270,6 +437,42 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatvar_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm2, %xmm2
+; XOPAVX1-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatvar_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; XOPAVX2-NEXT:    vpshlb %xmm3, %xmm2, %xmm2
+; XOPAVX2-NEXT:    vpshlb %xmm1, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatvar_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpbroadcastb %xmm1, %ymm1
+; AVX512-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %splat = shufflevector <32 x i8> %b, <32 x i8> undef, <32 x i32> zeroinitializer
   %shift = shl <32 x i8> %a, %splat
   ret <32 x i8> %shift
@@ -279,7 +482,7 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) {
 ; Constant Shifts
 ;
 
-define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -296,11 +499,29 @@ define <4 x i64> @constant_shift_v4i64(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshlq {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvq {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <4 x i64> %a, <i64 1, i64 7, i64 31, i64 62>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm0, %xmm1
@@ -313,11 +534,29 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshld {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllvd {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <8 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmullw {{.*}}(%rip), %xmm0, %xmm1
@@ -330,11 +569,30 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshlw {{.*}}(%rip), %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpmullw {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+; AVX512-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    retq
   %shift = shl <16 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: constant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -378,6 +636,40 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: constant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: constant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; XOPAVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX2-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: constant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512-NEXT:    vpsllw $5, %ymm1, %ymm1
+; AVX512-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX512-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
+; AVX512-NEXT:    vpblendvb %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <32 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
   ret <32 x i8> %shift
 }
@@ -386,7 +678,7 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) {
 ; Uniform Constant Shifts
 ;
 
-define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
+define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
@@ -399,11 +691,29 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v4i64:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsllq $7, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsllq $7, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v4i64:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllq $7, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v4i64:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllq $7, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
   ret <4 x i64> %shift
 }
 
-define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
+define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v8i32:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpslld $5, %xmm0, %xmm1
@@ -416,11 +726,29 @@ define <8 x i32> @splatconstant_shift_v8i32(<8 x i32> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpslld $5, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v8i32:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpslld $5, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpslld $5, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v8i32:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpslld $5, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v8i32:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpslld $5, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <8 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   ret <8 x i32> %shift
 }
 
-define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
+define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
@@ -433,11 +761,29 @@ define <16 x i16> @splatconstant_shift_v16i16(<16 x i16> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v16i16:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vpsllw $3, %xmm0, %xmm1
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpsllw $3, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v16i16:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v16i16:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <16 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
   ret <16 x i16> %shift
 }
 
-define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
+define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind {
 ; AVX1-LABEL: splatconstant_shift_v32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -454,6 +800,27 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) {
 ; AVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
 ; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; XOPAVX1-LABEL: splatconstant_shift_v32i8:
+; XOPAVX1:       # BB#0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; XOPAVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm1, %xmm1
+; XOPAVX1-NEXT:    vpshlb %xmm2, %xmm0, %xmm0
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: splatconstant_shift_v32i8:
+; XOPAVX2:       # BB#0:
+; XOPAVX2-NEXT:    vpsllw $3, %ymm0, %ymm0
+; XOPAVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; XOPAVX2-NEXT:    retq
+;
+; AVX512-LABEL: splatconstant_shift_v32i8:
+; AVX512:       ## BB#0:
+; AVX512-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX512-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT:    retq
   %shift = shl <32 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   ret <32 x i8> %shift
 }
diff --git a/test/CodeGen/X86/vector-shift-shl-512.ll b/test/CodeGen/X86/vector-shift-shl-512.ll
new file mode 100644
index 000000000000..26ddb1c127e1
--- /dev/null
+++ b/test/CodeGen/X86/vector-shift-shl-512.ll
@@ -0,0 +1,293 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+;
+; Variable Shifts
+;
+
+define <8 x i64> @var_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: var_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <8 x i64> %a, %b
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @var_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: var_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <16 x i32> %a, %b
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @var_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpxor %ymm4, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm6 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsllvd %ymm5, %ymm6, %ymm5
+; AVX512DQ-NEXT:    vpsrld $16, %ymm5, %ymm5
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsrld $16, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpackusdw %ymm5, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm2 = ymm3[4],ymm4[4],ymm3[5],ymm4[5],ymm3[6],ymm4[6],ymm3[7],ymm4[7],ymm3[12],ymm4[12],ymm3[13],ymm4[13],ymm3[14],ymm4[14],ymm3[15],ymm4[15]
+; AVX512DQ-NEXT:    vpunpckhwd {{.*#+}} ymm5 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
+; AVX512DQ-NEXT:    vpsllvd %ymm2, %ymm5, %ymm2
+; AVX512DQ-NEXT:    vpsrld $16, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[8],ymm4[8],ymm3[9],ymm4[9],ymm3[10],ymm4[10],ymm3[11],ymm4[11]
+; AVX512DQ-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
+; AVX512DQ-NEXT:    vpsllvd %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsrld $16, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpackusdw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: var_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = shl <32 x i16> %a, %b
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @var_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: var_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT:    vpand %ymm6, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm4, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpsllw $5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm6, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm3, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm3, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %shift = shl <64 x i8> %a, %b
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Variable Shifts
+;
+
+define <8 x i64> @splatvar_shift_v8i64(<8 x i64> %a, <8 x i64> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllq %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
+  %shift = shl <8 x i64> %a, %splat
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatvar_shift_v16i32(<16 x i32> %a, <16 x i32> %b) nounwind {
+; ALL-LABEL: splatvar_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; ALL-NEXT:    vmovss %xmm1, %xmm2, %xmm1
+; ALL-NEXT:    vpslld %xmm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %splat = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
+  %shift = shl <16 x i32> %a, %splat
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatvar_shift_v32i16(<32 x i16> %a, <32 x i16> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vmovd %xmm2, %eax
+; AVX512DQ-NEXT:    movzwl %ax, %eax
+; AVX512DQ-NEXT:    vmovd %eax, %xmm2
+; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw %xmm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatvar_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovd %xmm1, %eax
+; AVX512BW-NEXT:    movzwl %ax, %eax
+; AVX512BW-NEXT:    vmovd %eax, %xmm1
+; AVX512BW-NEXT:    vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %splat = shufflevector <32 x i16> %b, <32 x i16> undef, <32 x i32> zeroinitializer
+  %shift = shl <32 x i16> %a, %splat
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind {
+; AVX512DQ-LABEL: splatvar_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpbroadcastb %xmm2, %ymm2
+; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpsllw $5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm2, %ymm2, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm3
+; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm3
+; AVX512DQ-NEXT:    vpand %ymm4, %ymm3, %ymm3
+; AVX512DQ-NEXT:    vpblendvb %ymm2, %ymm3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+
+  %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+  %shift = shl <64 x i8> %a, %splat
+  ret <64 x i8> %shift
+}
+
+;
+; Constant Shifts
+;
+
+define <8 x i64> @constant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: constant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllvq {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <8 x i64> %a, <i64 1, i64 7, i64 31, i64 62, i64 1, i64 7, i64 31, i64 62>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @constant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: constant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <16 x i32> %a, <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 7>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @constant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768]
+; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpmullw %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: constant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsllvw {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = shl <32 x i16> %a, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: constant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsllw $4, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0,0,1,2,3,4,5,6,7,7,6,5,4,3,2,1,0]
+; AVX512DQ-NEXT:    vpsllw $5, %ymm4, %ymm4
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $2, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm5 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm4, %ymm4, %ymm6
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpaddb %ymm0, %ymm0, %ymm2
+; AVX512DQ-NEXT:    vpaddb %ymm6, %ymm6, %ymm7
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $4, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm3, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm4, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpsllw $2, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpand %ymm5, %ymm2, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm6, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpaddb %ymm1, %ymm1, %ymm2
+; AVX512DQ-NEXT:    vpblendvb %ymm7, %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+  %shift = shl <64 x i8> %a, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>
+  ret <64 x i8> %shift
+}
+
+;
+; Uniform Constant Shifts
+;
+
+define <8 x i64> @splatconstant_shift_v8i64(<8 x i64> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpsllq $7, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+  ret <8 x i64> %shift
+}
+
+define <16 x i32> @splatconstant_shift_v16i32(<16 x i32> %a) nounwind {
+; ALL-LABEL: splatconstant_shift_v16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpslld $5, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %shift = shl <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+  ret <16 x i32> %shift
+}
+
+define <32 x i16> @splatconstant_shift_v32i16(<32 x i16> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v32i16:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = shl <32 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  ret <32 x i16> %shift
+}
+
+define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
+; AVX512DQ-LABEL: splatconstant_shift_v64i8:
+; AVX512DQ:       ## BB#0:
+; AVX512DQ-NEXT:    vpsllw $3, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT:    vpsllw $3, %ymm1, %ymm1
+; AVX512DQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512DQ-NEXT:    retq
+;
+; AVX512BW-LABEL: splatconstant_shift_v64i8:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vpsllw $3, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+  %shift = shl <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+  ret <64 x i8> %shift
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v16.ll b/test/CodeGen/X86/vector-shuffle-128-v16.ll
index 124d6e8c8ba2..13a9543ddd90 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
@@ -469,6 +470,20 @@ define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(
   ret <16 x i8> %shuffle
 }
 
+define <16 x i8> @shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz(<16 x i8> %a) {
+; SSE-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
+; SSE:       # BB#0:
+; SSE-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v16i8_00_01_02_zz_04_05_06_zz_08_09_10_zz_12_13_14_zz:
+; AVX:       # BB#0:
+; AVX-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
+  ret <16 x i8> %shuffle
+}
+
 define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(<16 x i8> %a, <16 x i8> %b) {
 ; SSE2-LABEL: shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31:
 ; SSE2:       # BB#0:
@@ -1356,3 +1371,264 @@ define <16 x i8> @shuffle_v16i8_bitcast_unpack(<16 x i8> %a, <16 x i8> %b) {
   %bitcast8  = bitcast <8 x i16> %shuffle16 to <16 x i8>
   ret <16 x i8> %bitcast8
 }
+
+define <16 x i8> @insert_dup_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v16i8_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem_v16i8_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem_v16i8_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_mem_v16i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsbl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsbl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem_v16i8_sext_i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movsbl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_mem_v16i8_sext_i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movsbl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i8_sext_i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb (%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i8, i8* %ptr, align 1
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> zeroinitializer
+  ret <16 x i8> %tmp4
+}
+
+define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb 1(%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb 2(%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsbl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsbl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movsbl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movsbl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movsbl (%rdi), %eax
+; AVX2-NEXT:    shrl $8, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i8, i8* %ptr, align 1
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i8> %tmp4
+}
+
+define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
+; SSE2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movsbl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,2,1]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movsbl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movsbl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movsbl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movsbl (%rdi), %eax
+; AVX2-NEXT:    shrl $16, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i8, i8* %ptr, align 1
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+  ret <16 x i8> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-128-v2.ll b/test/CodeGen/X86/vector-shuffle-128-v2.ll
index ee68df581bfd..1d32f9e38523 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v2.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v2.ll
@@ -1,9 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VL
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-unknown"
@@ -23,6 +25,11 @@ define <2 x i64> @shuffle_v2i64_00(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpbroadcastq %xmm0, %xmm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_00:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq %xmm0, %xmm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 0>
   ret <2 x i64> %shuffle
 }
@@ -67,6 +74,11 @@ define <2 x i64> @shuffle_v2i64_22(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_22:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 2>
   ret <2 x i64> %shuffle
 }
@@ -135,6 +147,7 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
 ; AVX-NEXT:    retq
+
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 0>
   ret <2 x double> %shuffle
 }
@@ -191,6 +204,7 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm1[1,0]
 ; AVX-NEXT:    retq
+
   %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 2>
   ret <2 x double> %shuffle
 }
@@ -329,6 +343,11 @@ define <2 x i64> @shuffle_v2i64_03(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_03:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
 }
@@ -366,6 +385,11 @@ define <2 x i64> @shuffle_v2i64_03_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_03_copy:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm2[2,3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
 }
@@ -516,6 +540,11 @@ define <2 x i64> @shuffle_v2i64_21(<2 x i64> %a, <2 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_21:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
   ret <2 x i64> %shuffle
 }
@@ -553,6 +582,11 @@ define <2 x i64> @shuffle_v2i64_21_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_21_copy:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm1[2,3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 2, i32 1>
   ret <2 x i64> %shuffle
 }
@@ -725,6 +759,12 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_z1:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 1>
   ret <2 x i64> %shuffle
 }
@@ -750,11 +790,23 @@ define <2 x double> @shuffle_v2f64_1z(<2 x double> %a) {
 ; SSE-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: shuffle_v2f64_1z:
-; AVX:       # BB#0:
-; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
-; AVX-NEXT:    retq
+; AVX1-LABEL: shuffle_v2f64_1z:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2f64_1z:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_1z:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
   ret <2 x double> %shuffle
 }
@@ -767,11 +819,23 @@ define <2 x double> @shuffle_v2f64_z0(<2 x double> %a) {
 ; SSE-NEXT:    movapd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: shuffle_v2f64_z0:
-; AVX:       # BB#0:
-; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: shuffle_v2f64_z0:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2f64_z0:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_z0:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 0>
   ret <2 x double> %shuffle
 }
@@ -817,11 +881,23 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
 ; SSE-NEXT:    shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: shuffle_v2f64_bitcast_1z:
-; AVX:       # BB#0:
-; AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2f64_bitcast_1z:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; AVX512VL-NEXT:    retq
   %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1>
   %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float>
   %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
@@ -829,6 +905,66 @@ define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) {
   ret <2 x double> %bitcast64
 }
 
+define <2 x i64> @shuffle_v2i64_bitcast_z123(<2 x i64> %x) {
+; SSE2-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE2-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v2i64_bitcast_z123:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSSE3-NEXT:    andps {{.*}}(%rip), %xmm0
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v2i64_bitcast_z123:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX2-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v2i64_bitcast_z123:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovss {{.*}}(%rip), %xmm1
+; AVX512VL-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512VL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
+; AVX512VL-NEXT:    retq
+  %bitcast32 = bitcast <2 x i64> %x to <4 x float>
+  %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x i64>
+  %and = and <2 x i64> %bitcast64, <i64 -4294967296, i64 -1>
+  ret <2 x i64> %and
+}
+
 define <2 x i64> @insert_reg_and_zero_v2i64(i64 %a) {
 ; SSE-LABEL: insert_reg_and_zero_v2i64:
 ; SSE:       # BB#0:
@@ -850,10 +986,20 @@ define <2 x i64> @insert_mem_and_zero_v2i64(i64* %ptr) {
 ; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: insert_mem_and_zero_v2i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT:    retq
+; AVX1-LABEL: insert_mem_and_zero_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovq (%rdi), %xmm0
+; AVX512VL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -881,10 +1027,20 @@ define <2 x double> @insert_mem_and_zero_v2f64(double* %ptr) {
 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: insert_mem_and_zero_v2f64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; AVX-NEXT:    retq
+; AVX1-LABEL: insert_mem_and_zero_v2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v2f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovsd (%rdi), %xmm0
+; AVX512VL-NEXT:    retq
   %a = load double, double* %ptr
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
@@ -927,6 +1083,12 @@ define <2 x i64> @insert_reg_lo_v2i64(i64 %a, <2 x i64> %b) {
 ; AVX2-NEXT:    vmovq %rdi, %xmm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_reg_lo_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovq %rdi, %xmm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT:    retq
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x i64> %shuffle
@@ -965,6 +1127,12 @@ define <2 x i64> @insert_mem_lo_v2i64(i64* %ptr, <2 x i64> %b) {
 ; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
 ; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_lo_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX512VL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
@@ -995,11 +1163,23 @@ define <2 x i64> @insert_mem_hi_v2i64(i64* %ptr, <2 x i64> %b) {
 ; SSE-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: insert_mem_hi_v2i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT:    retq
+; AVX1-LABEL: insert_mem_hi_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_hi_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_hi_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovq (%rdi), %xmm1
+; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512VL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <2 x i64> undef, i64 %a, i32 0
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> %b, <2 x i32> <i32 2, i32 0>
@@ -1013,10 +1193,20 @@ define <2 x double> @insert_reg_lo_v2f64(double %a, <2 x double> %b) {
 ; SSE-NEXT:    movapd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: insert_reg_lo_v2f64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
-; AVX-NEXT:    retq
+; AVX1-LABEL: insert_reg_lo_v2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_reg_lo_v2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_reg_lo_v2f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT:    retq
   %v = insertelement <2 x double> undef, double %a, i32 0
   %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3>
   ret <2 x double> %shuffle
@@ -1071,8 +1261,6 @@ define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) {
 }
 
 define <2 x double> @insert_dup_reg_v2f64(double %a) {
-; FIXME: We should match movddup for SSE3 and higher here.
-;
 ; SSE2-LABEL: insert_dup_reg_v2f64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
@@ -1101,6 +1289,7 @@ define <2 x double> @insert_dup_reg_v2f64(double %a) {
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
   ret <2 x double> %shuffle
 }
+
 define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
 ; SSE2-LABEL: insert_dup_mem_v2f64:
 ; SSE2:       # BB#0:
@@ -1133,6 +1322,66 @@ define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
   ret <2 x double> %shuffle
 }
 
+define <2 x double> @insert_dup_mem128_v2f64(<2 x double>* %ptr) nounwind {
+; SSE2-LABEL: insert_dup_mem128_v2f64:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movaps (%rdi), %xmm0
+; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: insert_dup_mem128_v2f64:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem128_v2f64:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem128_v2f64:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: insert_dup_mem128_v2f64:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; AVX-NEXT:    retq
+  %v = load  <2 x double>,  <2 x double>* %ptr
+  %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %shuffle
+}
+
+
+define <2 x i64> @insert_dup_mem_v2i64(i64* %ptr) {
+; SSE-LABEL: insert_dup_mem_v2i64:
+; SSE:       # BB#0:
+; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_mem_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_dup_mem_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq (%rdi), %xmm0
+; AVX512VL-NEXT:    retq
+  %tmp = load i64, i64* %ptr, align 1
+  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <2 x i32> zeroinitializer
+  ret <2 x i64> %tmp2
+}
+
 define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
 ; SSE-LABEL: shuffle_mem_v2f64_10:
 ; SSE:       # BB#0:
@@ -1144,6 +1393,7 @@ define <2 x double> @shuffle_mem_v2f64_10(<2 x double>* %ptr) {
 ; AVX:       # BB#0:
 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
 ; AVX-NEXT:    retq
+
   %a = load <2 x double>, <2 x double>* %ptr
   %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   ret <2 x double> %shuffle
diff --git a/test/CodeGen/X86/vector-shuffle-128-v4.ll b/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 8612a5afa3d2..35c3b708fd08 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
@@ -952,6 +953,43 @@ define <4 x float> @shuffle_v4f32_0zz3(<4 x float> %a) {
   ret <4 x float> %shuffle
 }
 
+define <4 x float> @shuffle_v4f32_0z2z(<4 x float> %v) {
+; SSE2-LABEL: shuffle_v4f32_0z2z:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    xorps %xmm1, %xmm1
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE2-NEXT:    retq
+;
+; SSE3-LABEL: shuffle_v4f32_0z2z:
+; SSE3:       # BB#0:
+; SSE3-NEXT:    xorps %xmm1, %xmm1
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSE3-NEXT:    retq
+;
+; SSSE3-LABEL: shuffle_v4f32_0z2z:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    xorps %xmm1, %xmm1
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,0]
+; SSSE3-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuffle_v4f32_0z2z:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    xorps %xmm1, %xmm1
+; SSE41-NEXT:    blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_0z2z:
+; AVX:       # BB#0:
+; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
+; AVX-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %v, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 2, i32 4>
+  ret <4 x float> %shuffle
+}
+
 define <4 x float> @shuffle_v4f32_u051(<4 x float> %a, <4 x float> %b) {
 ; SSE-LABEL: shuffle_v4f32_u051:
 ; SSE:       # BB#0:
@@ -1591,6 +1629,43 @@ define <4 x i32> @shuffle_v4i32_bitcast_0415(<4 x i32> %a, <4 x i32> %b) {
   ret <4 x i32> %bitcast32
 }
 
+define <4 x float> @shuffle_v4f32_bitcast_4401(<4 x float> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4f32_bitcast_4401:
+; SSE:       # BB#0:
+; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE-NEXT:    movapd %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_bitcast_4401:
+; AVX:       # BB#0:
+; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,0,1,1]
+; AVX-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x i32> %b, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %2 = bitcast <4 x i32> %1 to <2 x double>
+  %3 = bitcast <4 x float> %a to <2 x double>
+  %4 = shufflevector <2 x double> %2, <2 x double> %3, <2 x i32> <i32 0, i32 2>
+  %5 = bitcast <2 x double> %4 to <4 x float>
+  ret <4 x float> %5
+}
+
+define <4 x float> @shuffle_v4f32_bitcast_0045(<4 x float> %a, <4 x i32> %b) {
+; SSE-LABEL: shuffle_v4f32_bitcast_0045:
+; SSE:       # BB#0:
+; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuffle_v4f32_bitcast_0045:
+; AVX:       # BB#0:
+; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,1]
+; AVX-NEXT:    retq
+  %1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  %2 = bitcast <4 x i32> %b to <4 x float>
+  %3 = shufflevector <4 x float> %1, <4 x float> %2, <4 x i32> <i32 1, i32 0, i32 4, i32 5>
+  ret <4 x float> %3
+}
+
 define <4 x i32> @insert_reg_and_zero_v4i32(i32 %a) {
 ; SSE-LABEL: insert_reg_and_zero_v4i32:
 ; SSE:       # BB#0:
@@ -1875,6 +1950,23 @@ define <4 x float> @shuffle_mem_v4f32_3210(<4 x float>* %ptr) {
   ret <4 x float> %shuffle
 }
 
+define <4 x i32> @insert_dup_mem_v4i32(i32* %ptr) {
+; SSE-LABEL: insert_dup_mem_v4i32:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: insert_dup_mem_v4i32:
+; AVX:       # BB#0:
+; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
+; AVX-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %tmp2
+}
+
 ;
 ; Shuffle to logical bit shifts
 ;
diff --git a/test/CodeGen/X86/vector-shuffle-128-v8.ll b/test/CodeGen/X86/vector-shuffle-128-v8.ll
index 6a29d33d6c5e..168b3e33bfcf 100644
--- a/test/CodeGen/X86/vector-shuffle-128-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-128-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
@@ -2145,3 +2146,254 @@ define <8 x i16> @shuffle_v8i16_8012345u(<8 x i16> %a) {
 
   ret <8 x i16> %shuffle
 }
+
+define <8 x i16> @insert_dup_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v8i16_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem_v8i16_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem_v8i16_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_mem_v8i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v8i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw (%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movswl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movswl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_mem_v8i16_sext_i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movswl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movswl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movswl (%rdi), %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i16, i16* %ptr, align 2
+  %tmp1 = sext i16 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> zeroinitializer
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
+; SSE2-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw 2(%rdi), %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movswl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movswl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movswl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movswl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movswl (%rdi), %eax
+; AVX2-NEXT:    shrl $16, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i16, i16* %ptr, align 2
+  %tmp1 = sext i16 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
+; SSE2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSE2:       # BB#0:
+; SSE2-NEXT:    movswl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,0]
+; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
+; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSSE3:       # BB#0:
+; SSSE3-NEXT:    movswl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; SSE41:       # BB#0:
+; SSE41-NEXT:    movswl (%rdi), %eax
+; SSE41-NEXT:    movd %eax, %xmm0
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movswl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movswl (%rdi), %eax
+; AVX2-NEXT:    shrl $16, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    retq
+  %tmp = load i16, i16* %ptr, align 2
+  %tmp1 = sext i16 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
+  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <8 x i16> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v16.ll b/test/CodeGen/X86/vector-shuffle-256-v16.ll
index df4994da6932..7e3dc6e294f8 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
 
@@ -158,11 +159,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
 ;
 ; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX2-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
-; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm1
+; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <16 x i16> %shuffle
@@ -1439,11 +1440,10 @@ define <16 x i16> @shuffle_v16i16_02_03_zz_zz_06_07_zz_zz_10_11_zz_zz_14_15_zz_z
 define <16 x i16> @shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz(<16 x i16> %a) {
 ; AVX1-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm1 = xmm0[4,5,2,3,4,5,6,7,6,7,10,11,4,5,6,7]
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
 ; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v16i16_16_zz_zz_zz_17_zz_zz_zz_18_zz_zz_zz_19_zz_zz_zz:
@@ -1702,21 +1702,21 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1
 ; AVX1-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
   ret <16 x i16> %shuffle
@@ -1726,21 +1726,21 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1
 ; AVX1-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
   ret <16 x i16> %shuffle
@@ -2444,13 +2444,13 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2
 ;
 ; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
@@ -2498,13 +2498,13 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3
 ;
 ; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
@@ -2647,13 +2647,13 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1
 ;
 ; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
@@ -2674,13 +2674,13 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1
 ;
 ; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
 ; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
 ; AVX2-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
 ; AVX2-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,6,7]
-; AVX2-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
-; AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; AVX2-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpunpckhwd {{.*#+}} ymm1 = ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15]
 ; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
 ; AVX2-NEXT:    retq
   %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
@@ -3250,6 +3250,90 @@ define <16 x i16> @shuffle_v16i16_23_uu_03_uu_20_20_05_uu_31_uu_11_uu_28_28_13_u
   ret <16 x i16> %shuffle
 }
 
+define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_0_16_1_17_2_18_3_19:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8_8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: shuffle_v16i16_4_20_5_21_6_22_7_23_u_u_u_u_u_u_u_u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %shuffle
+}
+
+define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i16> %shuffle
+}
+
 define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
 ; ALL-LABEL: insert_v16i16_0elt_into_zero_vector:
 ; ALL:       # BB#0:
@@ -3261,3 +3345,112 @@ define <16 x i16> @insert_v16i16_0elt_into_zero_vector(i16* %ptr) {
   ret <16 x i16> %i0
 }
 
+define <16 x i16> @concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31(<16 x i16> %a, <16 x i16> %b) {
+; AVX1-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_v16i16_0_1_2_3_4_5_6_7_24_25_26_27_28_29_30_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    retq
+  %alo = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %shuf = shufflevector <8 x i16> %alo, <8 x i16> %bhi, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <16 x i16> %shuf
+}
+
+define <16 x i16> @concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc(<16 x i16> %a, <16 x i16> %b) {
+; ALL-LABEL: concat_v16i16_8_9_10_11_12_13_14_15_24_25_26_27_28_29_30_31_bc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    retq
+  %ahi = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %bhi = shufflevector <16 x i16> %b, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %bc0hi = bitcast <8 x i16> %ahi to <16 x i8>
+  %bc1hi = bitcast <8 x i16> %bhi to <16 x i8>
+  %shuffle8 = shufflevector <16 x i8> %bc0hi, <16 x i8> %bc1hi, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %shuffle16 = bitcast <32 x i8> %shuffle8 to <16 x i16>
+  ret <16 x i16> %shuffle16
+}
+
+define <16 x i16> @insert_dup_mem_v16i16_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v16i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @insert_dup_mem_v16i16_sext_i16(i16* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movswl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movswl (%rdi), %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i16, i16* %ptr, align 2
+  %tmp1 = sext i16 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <8 x i16>
+  %tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> undef, <16 x i32> zeroinitializer
+  ret <16 x i16> %tmp4
+}
+
+define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
+; AVX1-LABEL: insert_dup_elt1_mem_v16i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <16 x i16> %tmp3
+}
+
+define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
+; AVX1-LABEL: insert_dup_elt3_mem_v16i16_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastw 2(%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
+  %tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
+  %tmp3 = shufflevector <8 x i16> %tmp2, <8 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <16 x i16> %tmp3
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v32.ll b/test/CodeGen/X86/vector-shuffle-256-v32.ll
index a0f43de75630..161a21cef030 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
@@ -324,7 +325,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
 ; AVX2-NEXT:    vpxor %ymm2, %ymm2, %ymm2
 ; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX2-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
@@ -947,6 +948,24 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
   ret <32 x i8> %shuffle
 }
 
+define <32 x i8> @shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm2 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255]
+; AVX1-NEXT:    vandps %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vandps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_zz_01_zz_03_zz_05_zz_07_zz_09_zz_11_zz_13_zz_15_zz_17_zz_19_zz_21_zz_23_zz_25_zz_27_zz_29_zz_31:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
+  ret <32 x i8> %shuffle
+}
+
 define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32(<32 x i8> %a, <32 x i8> %b) {
 ; AVX1-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32:
 ; AVX1:       # BB#0:
@@ -1737,7 +1756,8 @@ define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_
 ; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_zz_zz_zz_zz_33_zz_zz_zz_zz_zz_zz_zz_34_zz_zz_zz_zz_zz_zz_zz_35_zz_zz_zz_zz_zz_zz_zz:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1754,7 +1774,8 @@ define <32 x i8> @shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_
 ; AVX1-LABEL: shuffle_v32i8_32_zz_zz_zz_33_zz_zz_zz_34_zz_zz_zz_35_zz_zz_zz_36_zz_zz_zz_37_zz_zz_zz_38_zz_zz_zz_39_zz_zz_zz:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -1956,3 +1977,186 @@ define <32 x i8> @shuffle_v32i8_15_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_
   %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
   ret <32 x i8> %shuffle
 }
+
+define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16_16:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %xmm0
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; AVX1-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
+; AVX1-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i8> %shuffle
+}
+
+define <32 x i8> @insert_dup_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v32i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v32i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> zeroinitializer
+  ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_mem_v32i8_sext_i8(i8* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v32i8_sext_i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movsbl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v32i8_sext_i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb (%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i8, i8* %ptr, align 1
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> zeroinitializer
+  ret <32 x i8> %tmp4
+}
+
+define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_elt1_mem_v32i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb 1(%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(i32* %ptr) {
+; AVX1-LABEL: insert_dup_elt3_mem_v32i8_i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastb 3(%rdi), %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <32 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  ret <32 x i8> %tmp3
+}
+
+define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) {
+; AVX1-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    movsbl (%rdi), %eax
+; AVX1-NEXT:    vmovd %eax, %xmm0
+; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    movsbl (%rdi), %eax
+; AVX2-NEXT:    shrl $8, %eax
+; AVX2-NEXT:    vmovd %eax, %xmm0
+; AVX2-NEXT:    vpbroadcastb %xmm0, %ymm0
+; AVX2-NEXT:    retq
+  %tmp = load i8, i8* %ptr, align 1
+  %tmp1 = sext i8 %tmp to i32
+  %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
+  %tmp3 = bitcast <4 x i32> %tmp2 to <16 x i8>
+  %tmp4 = shufflevector <16 x i8> %tmp3, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  ret <32 x i8> %tmp4
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 62bf288a870d..7e33f5f3aa86 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -1,5 +1,7 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
+; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
 
 target triple = "x86_64-unknown-unknown"
 
@@ -14,6 +16,11 @@ define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0000:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x double> %shuffle
 }
@@ -29,6 +36,11 @@ define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0001:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
   ret <4 x double> %shuffle
 }
@@ -46,6 +58,11 @@ define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0020:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
   ret <4 x double> %shuffle
 }
@@ -62,6 +79,11 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0300:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
   ret <4 x double> %shuffle
 }
@@ -78,6 +100,11 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_1000:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   ret <4 x double> %shuffle
 }
@@ -93,6 +120,11 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_2200:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
   ret <4 x double> %shuffle
 }
@@ -109,6 +141,11 @@ define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3330:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
   ret <4 x double> %shuffle
 }
@@ -124,6 +161,11 @@ define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3210:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x double> %shuffle
 }
@@ -133,6 +175,7 @@ define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
 ; ALL:       # BB#0:
 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
 ; ALL-NEXT:    retq
+
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
   ret <4 x double> %shuffle
 }
@@ -146,6 +189,16 @@ define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
   ret <4 x double> %shuffle
 }
 
+define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64mem_0022:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
+; ALL-NEXT:    retq
+  %a = load  <4 x double>,  <4 x double>* %ptr
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  ret <4 x double> %shuffle
+}
+
 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
 ; ALL-LABEL: shuffle_v4f64_1032:
 ; ALL:       # BB#0:
@@ -183,17 +236,11 @@ define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
 }
 
 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
-; AVX1-LABEL: shuffle_v4f64_0423:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: shuffle_v4f64_0423:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
-; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
-; AVX2-NEXT:    retq
+; ALL-LABEL: shuffle_v4f64_0423:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
+; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
+; ALL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
   ret <4 x double> %shuffle
 }
@@ -273,19 +320,39 @@ define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
 }
 
 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_0145:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4f64_0145:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_0145:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0145:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x double> %shuffle
 }
 
 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_4501:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4f64_4501:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_4501:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_4501:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x double> %shuffle
 }
@@ -300,31 +367,67 @@ define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
 }
 
 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_1054:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4f64_1054:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_1054:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_1054:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
   ret <4 x double> %shuffle
 }
 
 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_3254:
-; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4f64_3254:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_3254:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3254:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
   ret <4 x double> %shuffle
 }
 
 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
-; ALL-LABEL: shuffle_v4f64_3276:
-; ALL:       # BB#0:
-; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4f64_3276:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_3276:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3276:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
   ret <4 x double> %shuffle
 }
@@ -353,6 +456,13 @@ define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_0415:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX512VL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   ret <4 x double> %shuffle
 }
@@ -366,6 +476,65 @@ define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
   ret <4 x double> %shuffle
 }
 
+define <4 x double> @shuffle_v4f64_15uu(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_15uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
+; ALL-LABEL: shuffle_v4f64_11uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_22uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_22uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_22uu:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX512VL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
+  ret <4 x double> %shuffle
+}
+
+define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
+; AVX1-LABEL: shuffle_v4f64_3333:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4f64_3333:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4f64_3333:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX512VL-NEXT:    retq
+  %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x double> %shuffle
+}
+
 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4i64_0000:
 ; AVX1:       # BB#0:
@@ -377,6 +546,11 @@ define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0000:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -392,6 +566,11 @@ define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0001:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
   ret <4 x i64> %shuffle
 }
@@ -409,6 +588,11 @@ define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0020:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -425,6 +609,11 @@ define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0112:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
   ret <4 x i64> %shuffle
 }
@@ -441,6 +630,11 @@ define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0300:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -457,6 +651,11 @@ define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1000:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -472,6 +671,11 @@ define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_2200:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -488,6 +692,11 @@ define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3330:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -503,6 +712,11 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3210:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -520,6 +734,12 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0124:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq %xmm1, %ymm1
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
   ret <4 x i64> %shuffle
 }
@@ -527,17 +747,24 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-LABEL: shuffle_v4i64_0142:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v4i64_0142:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0142:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm1
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
   ret <4 x i64> %shuffle
 }
@@ -548,16 +775,23 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v4i64_0412:
 ; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
-; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0412:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq %xmm1, %xmm1
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
   ret <4 x i64> %shuffle
 }
@@ -577,15 +811,31 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4012:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
   ret <4 x i64> %shuffle
 }
 
 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: shuffle_v4i64_0145:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4i64_0145:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_0145:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0145:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x i64> %shuffle
 }
@@ -604,15 +854,32 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0451:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
   ret <4 x i64> %shuffle
 }
 
 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
-; ALL-LABEL: shuffle_v4i64_4501:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: shuffle_v4i64_4501:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_4501:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4501:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
   ret <4 x i64> %shuffle
 }
@@ -631,6 +898,13 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_4015:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
   ret <4 x i64> %shuffle
 }
@@ -648,6 +922,12 @@ define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_2u35:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
   ret <4 x i64> %shuffle
 }
@@ -668,6 +948,13 @@ define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1251:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
   ret <4 x i64> %shuffle
 }
@@ -684,6 +971,12 @@ define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1054:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
   ret <4 x i64> %shuffle
 }
@@ -700,6 +993,12 @@ define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3254:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
+; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
   ret <4 x i64> %shuffle
 }
@@ -716,6 +1015,12 @@ define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3276:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
   ret <4 x i64> %shuffle
 }
@@ -732,6 +1037,12 @@ define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_1076:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
   ret <4 x i64> %shuffle
 }
@@ -750,6 +1061,13 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_0415:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   ret <4 x i64> %shuffle
 }
@@ -765,6 +1083,11 @@ define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_z4z6:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
   ret <4 x i64> %shuffle
 }
@@ -780,6 +1103,11 @@ define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_5zuz:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
   ret <4 x i64> %shuffle
 }
@@ -794,10 +1122,74 @@ define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_40u2:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
   ret <4 x i64> %shuffle
 }
 
+define <4 x i64> @shuffle_v4i64_15uu(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_15uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
+; ALL-LABEL: shuffle_v4i64_11uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_22uu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_22uu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_22uu:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
+; AVX512VL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
+  ret <4 x i64> %shuffle
+}
+
+define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
+; AVX1-LABEL: shuffle_v4i64_3333:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v4i64_3333:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v4i64_3333:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
+; AVX512VL-NEXT:    retq
+  %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  ret <4 x i64> %shuffle
+}
+
 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
 ; ALL-LABEL: stress_test1:
 ; ALL:         retq
@@ -820,10 +1212,20 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
 }
 
 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
-; ALL-LABEL: insert_mem_and_zero_v4i64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT:    retq
+; AVX1-LABEL: insert_mem_and_zero_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v4i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovq (%rdi), %xmm0
+; AVX512VL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -831,21 +1233,43 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
 }
 
 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
-; ALL-LABEL: insert_reg_and_zero_v4f64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT:    retq
+; AVX1-LABEL: insert_reg_and_zero_v4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_reg_and_zero_v4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
+; AVX512VL-NEXT:    retq
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   ret <4 x double> %shuffle
 }
 
 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
-; ALL-LABEL: insert_mem_and_zero_v4f64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT:    retq
+; AVX1-LABEL: insert_mem_and_zero_v4f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_mem_and_zero_v4f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_mem_and_zero_v4f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovsd (%rdi), %xmm0
+; AVX512VL-NEXT:    retq
   %a = load double, double* %ptr
   %v = insertelement <4 x double> undef, double %a, i32 0
   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
@@ -864,10 +1288,20 @@ define <4 x double> @splat_mem_v4f64(double* %ptr) {
 }
 
 define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
-; ALL-LABEL: splat_mem_v4i64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: splat_mem_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat_mem_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: splat_mem_v4i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT:    retq
   %a = load i64, i64* %ptr
   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
@@ -896,6 +1330,11 @@ define <4 x double> @splat_v4f64(<2 x double> %r) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: splat_v4f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
+; AVX512VL-NEXT:    retq
   %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
   ret <4 x double> %1
 }
@@ -911,44 +1350,67 @@ define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT:    retq
   %v = load <2 x i64>, <2 x i64>* %ptr
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x i64> %shuffle
 }
 
 define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
-; AVX1-LABEL: splat_mem_v4f64_from_v2f64:
-; AVX1:       # BB#0:
-; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: splat_mem_v4f64_from_v2f64:
-; AVX2:       # BB#0:
-; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
-; AVX2-NEXT:    retq
+; ALL-LABEL: splat_mem_v4f64_from_v2f64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
+; ALL-NEXT:    retq
   %v = load <2 x double>, <2 x double>* %ptr
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   ret <4 x double> %shuffle
 }
 
 define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
-; ALL-LABEL: splat128_mem_v4i64_from_v2i64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovaps (%rdi), %xmm0
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps (%rdi), %xmm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovdqa64 (%rdi), %xmm0
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
   %v = load <2 x i64>, <2 x i64>* %ptr
   %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   ret <4 x i64> %shuffle
 }
 
 define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
-; ALL-LABEL: splat128_mem_v4f64_from_v2f64:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovaps (%rdi), %xmm0
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    retq
+; AVX1-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovaps (%rdi), %xmm0
+; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vmovapd (%rdi), %xmm0
+; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
   %v = load <2 x double>, <2 x double>* %ptr
   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   ret <4 x double> %shuffle
@@ -964,6 +1426,11 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
 ; AVX2:       # BB#0:
 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
 ; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: bitcast_v4f64_0426:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; AVX512VL-NEXT:    retq
   %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
   %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
   %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
@@ -972,3 +1439,69 @@ define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
   %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
   ret <4 x double> %bitcast64
 }
+
+define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
+; AVX1-LABEL: concat_v4i64_0167:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_v4i64_0167:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: concat_v4i64_0167:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX512VL-NEXT:    retq
+  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
+  %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
+  %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i64> %shuffle64
+}
+
+define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
+; AVX1-LABEL: concat_v4i64_0145_bc:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_v4i64_0145_bc:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: concat_v4i64_0145_bc:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    retq
+  %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
+  %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
+  %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
+  %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
+  %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
+  ret <4 x i64> %shuffle64
+}
+
+define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
+; AVX1-LABEL: insert_dup_mem_v4i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: insert_dup_mem_v4i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512VL-LABEL: insert_dup_mem_v4i64:
+; AVX512VL:       # BB#0:
+; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
+; AVX512VL-NEXT:    retq
+  %tmp = load i64, i64* %ptr, align 1
+  %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
+  %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
+  ret <4 x i64> %tmp2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index bc72e0a66177..e8b886afd1ae 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
 
@@ -72,10 +73,10 @@ define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
 ; AVX1-LABEL: shuffle_v8f32_00040000:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
-; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
-; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v8f32_00040000:
@@ -830,6 +831,87 @@ define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
   ret <8 x float> %shuffle
 }
 
+define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu1111:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_44444444:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_44444444:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_1188uuuu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu3210:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_uuuu1188:
+; ALL:       # BB#0:
+; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
+; ALL-LABEL: shuffle_v8f32_1111uuuu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
+; AVX1-LABEL: shuffle_v8f32_5555uuuu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8f32_5555uuuu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x float> %shuffle
+}
+
 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-LABEL: shuffle_v8i32_00000000:
 ; AVX1:       # BB#0:
@@ -899,10 +981,10 @@ define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-LABEL: shuffle_v8i32_00040000:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
-; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
-; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
-; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
+; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
+; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
+; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: shuffle_v8i32_00040000:
@@ -1895,6 +1977,73 @@ define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
   ret <8 x i32> %shuffle
 }
 
+define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_uuuu1111:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_uuuu1111:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
+; ALL-LABEL: shuffle_v8i32_2222uuuu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
+; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_44444444:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_44444444:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
+  ret <8 x i32> %shuffle
+}
+
+define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: shuffle_v8i32_5555uuuu:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuffle_v8i32_5555uuuu:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+  %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <8 x i32> %shuffle
+}
+
 define <8 x float> @splat_mem_v8f32_2(float* %p) {
 ; ALL-LABEL: splat_mem_v8f32_2:
 ; ALL:       # BB#0:
@@ -2098,3 +2247,59 @@ define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
   ret <8 x i32> %shuffle
 }
 
+define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
+; AVX1-LABEL: concat_v8i32_0123CDEF:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: concat_v8i32_0123CDEF:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
+; AVX2-NEXT:    retq
+  %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %shuf
+}
+
+define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
+; ALL-LABEL: concat_v8i32_4567CDEF_bc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    retq
+  %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
+  %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
+  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
+  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
+  ret <8 x i32> %shuffle32
+}
+
+define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
+; ALL-LABEL: concat_v8f32_4567CDEF_bc:
+; ALL:       # BB#0:
+; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
+; ALL-NEXT:    retq
+  %a0 = bitcast <8 x float> %f0 to <4 x i64>
+  %a1 = bitcast <8 x float> %f1 to <8 x i32>
+  %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
+  %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
+  %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
+  ret <8 x float> %shuffle32
+}
+
+define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
+; ALL-LABEL: insert_dup_mem_v8i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
+; ALL-NEXT:    retq
+  %tmp = load i32, i32* %ptr, align 4
+  %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
+  ret <8 x i32> %tmp2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll
index 406d52406d95..bef54b05041b 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 
@@ -12,6 +13,25 @@ define <16 x float> @shuffle_v16f32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d
   ret <16 x float> %shuffle
 }
 
+define <16 x float> @shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_00_zz_01_zz_04_zz_05_zz_08_zz_09_zz_0c_zz_0d_zz:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <16 x i32><i32 0, i32 16, i32 1, i32 16, i32 4, i32 16, i32 5, i32 16, i32 8, i32 16, i32 9, i32 16, i32 12, i32 16, i32 13, i32 16>
+  ret <16 x float> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_vunpcklps_swap(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_vunpcklps_swap:
+; ALL:       # BB#0:
+; ALL-NEXT:    vunpcklps {{.*#+}} zmm0 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[12],zmm0[12],zmm1[13],zmm0[13]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 20, i32 4, i32 21, i32 5, i32 24, i32 8, i32 25, i32 9, i32 28, i32 12, i32 29, i32 13>
+  ret <16 x float> %shuffle
+}
+
 define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d(<16 x i32> %a, <16 x i32> %b) {
 ; ALL-LABEL: shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1d:
 ; ALL:       # BB#0:
@@ -21,6 +41,16 @@ define <16 x i32> @shuffle_v16i32_00_10_01_11_04_14_05_15_08_18_09_19_0c_1c_0d_1
   ret <16 x i32> %shuffle
 }
 
+define <16 x i32> @shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_zz_10_zz_11_zz_14_zz_15_zz_18_zz_19_zz_1c_zz_1d:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i32> zeroinitializer, <16 x i32> %b, <16 x i32><i32 15, i32 16, i32 13, i32 17, i32 11, i32 20, i32 9, i32 21, i32 7, i32 24, i32 5, i32 25, i32 3, i32 28, i32 1, i32 29>
+  ret <16 x i32> %shuffle
+}
+
 define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x float> %a, <16 x float> %b) {
 ; ALL-LABEL: shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
 ; ALL:       # BB#0:
@@ -30,6 +60,16 @@ define <16 x float> @shuffle_v16f32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f
   ret <16 x float> %shuffle
 }
 
+define <16 x float> @shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f(<16 x float> %a, <16 x float> %b) {
+; ALL-LABEL: shuffle_v16f32_zz_12_zz_13_zz_16_zz_17_zz_1a_zz_1b_zz_1e_zz_1f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; ALL-NEXT:    vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x float> zeroinitializer, <16 x float> %b, <16 x i32><i32 0, i32 18, i32 0, i32 19, i32 4, i32 22, i32 4, i32 23, i32 6, i32 26, i32 6, i32 27, i32 8, i32 30, i32 8, i32 31>
+  ret <16 x float> %shuffle
+}
+
 define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f(<16 x i32> %a, <16 x i32> %b) {
 ; ALL-LABEL: shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1f:
 ; ALL:       # BB#0:
@@ -38,3 +78,97 @@ define <16 x i32> @shuffle_v16i32_02_12_03_13_06_16_07_17_0a_1a_0b_1b_0e_1e_0f_1
   %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
   ret <16 x i32> %shuffle
 }
+
+define <16 x i32> @shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz(<16 x i32> %a, <16 x i32> %b) {
+; ALL-LABEL: shuffle_v16i32_02_zz_03_zz_06_zz_07_zz_0a_zz_0b_zz_0e_zz_0f_zz:
+; ALL:       # BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vpunpckhdq {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <16 x i32> %a, <16 x i32> zeroinitializer, <16 x i32><i32 2, i32 30, i32 3, i32 28, i32 6, i32 26, i32 7, i32 24, i32 10, i32 22, i32 11, i32 20, i32 14, i32 18, i32 15, i32 16>
+  ret <16 x i32> %shuffle
+}
+
+define <16 x float> @shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x float> %a)  {
+; ALL-LABEL: shuffle_v16f32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT:    vpermps %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x float> %c
+}
+
+define <16 x i32> @shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01(<16 x i32> %a)  {
+; ALL-LABEL: shuffle_v16i32_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1>
+; ALL-NEXT:    vpermd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1>
+  ret <16 x i32> %c
+}
+
+define <16 x i32> @shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32> %b)  {
+; ALL-LABEL: shuffle_v16i32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %c
+}
+
+define <16 x float> @shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float> %b)  {
+; ALL-LABEL: shuffle_v16f32_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT:    vpermt2ps %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <16 x float> %a, <16 x float> %b, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %c
+}
+
+define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x float> %a, <16 x float>* %b)  {
+; ALL-LABEL: shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT:    vpermt2ps (%rdi), %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %c = load <16 x float>, <16 x float>* %b
+  %d = shufflevector <16 x float> %a, <16 x float> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x float> %d
+}
+
+define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b)  {
+; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24]
+; ALL-NEXT:    vpermt2d (%rdi), %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %c = load <16 x i32>, <16 x i32>* %b
+  %d = shufflevector <16 x i32> %a, <16 x i32> %c, <16 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24>
+  ret <16 x i32> %d
+}
+
+define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a, <16 x i32> %b)  {
+; ALL-LABEL: shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqa32 {{.*#+}} zmm2 = <0,1,2,19,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT:    vpermt2d %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i32> %c
+}
+
+define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
+; ALL-LABEL: shuffle_v16f32_extract_256:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovups (%rsi), %zmm0
+; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT:    retq
+  %ptr_a = bitcast float* %a to <16 x float>*
+  %v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
+  %v2 = shufflevector <16 x float> %v_a, <16 x float> undef, <8 x i32>  <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  ret <8 x float> %v2
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll
new file mode 100644
index 000000000000..ab809beb4b48
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+
+target triple = "x86_64-unknown-unknown"
+
+define <32 x i16> @shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f(<32 x i16> %a)  {
+; ALL-LABEL: shuffle_v32i16_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_01_02_05_u_u_07_u_0a_01_00_05_u_04_07_u_0a_1f:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqu16 {{.*#+}} zmm1 = <2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,1,2,5,u,u,7,u,10,1,0,5,u,4,7,u,10,31>
+; ALL-NEXT:    vpermw %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <32 x i16> %a, <32 x i16> undef, <32 x i32> <i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 1, i32 2, i32 5, i32 undef, i32 undef, i32 7, i32 undef, i32 10, i32 1,  i32 0, i32 5, i32 undef, i32 4, i32 7, i32 undef, i32 10, i32 31>
+  ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38(<32 x i16> %a, <32 x i16> %b)  {
+; ALL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqu16 {{.*#+}} zmm2 = [15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,24,15,31,14,22,13,29,4,28,11,27,10,26,9,25,8,56]
+; ALL-NEXT:    vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 24, i32 15, i32 31, i32 14, i32 22, i32 13, i32 29, i32 4, i32 28, i32 11, i32 27, i32 10, i32 26, i32 9, i32 25, i32 8, i32 56>
+  ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u(<32 x i16> %a, <32 x i16> %b)  {
+; ALL-LABEL: shuffle_v16i32_0_32_1_33_2_34_3_35_8_40_9_41_u_u_u_u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqu16 {{.*#+}} zmm2 = <0,32,1,33,2,34,3,35,8,40,9,41,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT:    vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 8, i32 40, i32 9, i32 41, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i16> %c
+}
+
+define <32 x i16> @shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u(<32 x i16> %a, <32 x i16> %b)  {
+; ALL-LABEL: shuffle_v16i32_4_36_5_37_6_38_7_39_12_44_13_45_u_u_u_u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovdqu16 {{.*#+}} zmm2 = <4,36,5,37,6,38,7,39,12,44,13,45,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
+; ALL-NEXT:    vpermt2w %zmm1, %zmm2, %zmm0
+; ALL-NEXT:    retq
+  %c = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> <i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 12, i32 44, i32 13, i32 45, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <32 x i16> %c
+}
diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll
index 62d4af7809b6..631968f6afa2 100644
--- a/test/CodeGen/X86/vector-shuffle-512-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll
@@ -1,1452 +1,2209 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
 
 target triple = "x86_64-unknown-unknown"
 
 define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vbroadcastsd %xmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000010:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00000010:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000010:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00000200:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00000200:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00000200:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00003000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00003000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00003000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00040000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00040000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00040000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00500000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2,3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00500000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00500000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_06000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2],ymm0[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_06000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_06000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_70000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm0[0,1,2],ymm1[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_70000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    movl $7, %eax
+; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
+; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_70000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-32-NEXT:    movl $7, %eax
+; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01014545:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_01014545:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01014545:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00112233:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00112233:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00112233:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00001111:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8f64_00001111:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00001111:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_81a3c5e7:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_81a3c5e7:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_81a3c5e7:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08080808:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08080808:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08080808:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08084c4c:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vinsertf128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm2[0],ymm3[1],ymm2[2],ymm3[3]
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08084c4c:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08084c4c:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_8823cc67:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vbroadcastsd %xmm3, %ymm3
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3]
-; ALL-NEXT:    vbroadcastsd %xmm1, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_8823cc67:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_9832dc76:
-; ALL:       # BB#0:
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_9832dc76:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_9810dc54:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[1,0,3,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_9810dc54:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08194c5d:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08194c5d:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08194c5d:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_2a3b6e7f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm3
-; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_2a3b6e7f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_2a3b6e7f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08192a3b:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08192a3b:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08192a3b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_08991abb:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm0[0],ymm2[1,2,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2,3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_08991abb:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_091b2d3f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1],ymm3[2],ymm2[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_091b2d3f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_091b2d3f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_09ab1def:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm3 = ymm0[1,0,2,2]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_09ab1def:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00014445:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00014445:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00014445:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00204464:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00204464:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00204464:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_03004744:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_03004744:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_03004744:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10005444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10005444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10005444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_22006644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_22006644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_22006644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_33307774:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_33307774:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_33307774:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_32107654:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_32107654:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_32107654:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00234467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00234467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00234467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00224466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00224466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00224466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10325476:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10325476:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10325476:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_11335577:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_11335577:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_11335577:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10235467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10235467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10235467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10225466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10225466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10225466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00015444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00015444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00015444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00204644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00204644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00204644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_03004474:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_03004474:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_03004474:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10004444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10004444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10004444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_22006446:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_22006446:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_22006446:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_33307474:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_33307474:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_33307474:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_32104567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_32104567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_32104567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00236744:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00236744:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00236744:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00226644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00226644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00226644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_10324567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_10324567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_10324567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_11334567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_11334567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_11334567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01235467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_01235467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01235467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_01235466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_01235466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_01235466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_002u6u44:
-; ALL:       # BB#0:
-; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_002u6u44:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_00uu66uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm1
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_103245uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,0,3,2]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_103245uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_103245uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_1133uu67:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_1133uu67:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_1133uu67:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_0uu354uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_0uu354uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_0uu354uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_uuu3uu66:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_uuu3uu66:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
+; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_uuu3uu66:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
+; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_c348cda0:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm2[0,1]
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vbroadcastsd %xmm1, %ymm4
-; ALL-NEXT:    vblendpd {{.*#+}} ymm4 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm2 = ymm4[0],ymm2[1,2],ymm4[3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm1 = ymm3[0,1],ymm1[2],ymm3[3]
-; ALL-NEXT:    vbroadcastsd %xmm0, %ymm0
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_c348cda0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
+; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
   ret <8 x double> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_f511235a:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextractf64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm3 = ymm3[2,3,1,3]
-; ALL-NEXT:    vmovddup {{.*#+}} ymm4 = ymm1[0,0,2,2]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm3 = ymm3[0,1,2],ymm4[3]
-; ALL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,1]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2,3]
-; ALL-NEXT:    vextractf64x4 $1, %zmm1, %ymm1
-; ALL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
-; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
-; ALL-NEXT:    vinsertf64x4 $1, %ymm3, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_f511235a:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
+; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_f511235a:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
+; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
   ret <8 x double> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpbroadcastq %xmm0, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000010:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000010:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000010:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00000200:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00000200:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00000200:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00003000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00003000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00003000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00040000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00040000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00040000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00500000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,0]
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00500000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00500000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_06000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,0,0]
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_06000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_06000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_70000000:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm0[0,1,2,3,4,5],ymm1[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[3,0,0,0]
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_70000000:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    movl $7, %eax
+; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
+; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_70000000:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512F-32-NEXT:    movl $7, %eax
+; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
+; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01014545:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
-; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+; AVX512F-LABEL: shuffle_v8i64_01014545:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01014545:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
+; AVX512F-32-NEXT:    retl
+
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00112233:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,1,1]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,3,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00112233:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00112233:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00001111:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,1,1,1]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00001111:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00001111:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_81a3c5e7:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08080808:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08080808:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08080808:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08084c4c:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm2, %ymm2
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3],ymm2[4,5],ymm3[6,7]
-; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08084c4c:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08084c4c:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_8823cc67:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
-; ALL-NEXT:    vpbroadcastq %xmm3, %ymm3
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1,2,3],ymm2[4,5,6,7]
-; ALL-NEXT:    vpbroadcastq %xmm1, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_8823cc67:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_9832dc76:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
-; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_9832dc76:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_9810dc54:
-; ALL:       # BB#0:
-; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm2
-; ALL-NEXT:    vpshufd {{.*#+}} ymm2 = ymm2[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm1
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
-; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_9810dc54:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08194c5d:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,0,2,1]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[0,1,1,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08194c5d:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08194c5d:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_2a3b6e7f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm3[2,1,3,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_2a3b6e7f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_2a3b6e7f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08192a3b:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,2,2,3]
-; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08192a3b:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08192a3b:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_08991abb:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm2 = ymm1[0,0,1,1]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm0[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm2, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_08991abb:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_091b2d3f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpermq {{.*#+}} ymm3 = ymm0[2,1,3,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3],ymm3[4,5],ymm2[6,7]
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_091b2d3f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_091b2d3f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_09ab1def:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpshufd {{.*#+}} ymm3 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_09ab1def:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00014445:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00014445:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00014445:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00204464:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00204464:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00204464:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_03004744:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_03004744:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_03004744:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10005444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10005444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10005444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_22006644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_22006644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_22006644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_33307774:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_33307774:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_33307774:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_32107654:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_32107654:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_32107654:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00234467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00234467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00234467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00224466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00224466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00224466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10325476:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10325476:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10325476:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_11335577:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_11335577:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_11335577:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10235467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,3]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10235467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10235467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10225466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,2,2]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,2,2]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10225466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10225466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00015444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,0,1]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00015444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00015444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00204644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00204644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00204644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_03004474:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,3,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,3,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_03004474:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_03004474:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10004444:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[1,0,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10004444:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10004444:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_22006446:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[2,2,0,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,0,0,2]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_22006446:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_22006446:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_33307474:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,3,3,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,0,3,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_33307474:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_33307474:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_32104567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[3,2,1,0]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_32104567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_32104567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00236744:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm0[0,0,2,3]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,3,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00236744:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00236744:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00226644:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00226644:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00226644:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_10324567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_10324567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_10324567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_11334567:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_11334567:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_11334567:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01235467:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_01235467:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01235467:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_01235466:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[1,0,2,2]
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_01235466:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_01235466:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_002u6u44:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,0,0]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_002u6u44:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_00uu66uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpbroadcastq %xmm0, %ymm1
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_103245uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_103245uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_1133uu67:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm0[2,3,2,3,6,7,6,7]
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_1133uu67:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_0uu354uu:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[2,3,0,1,6,7,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_uuu3uu66:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
+; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
+; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   ret <8 x i64> %shuffle
 }
 
 define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_6caa87e5:
-; ALL:       # BB#0:
-; ALL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
-; ALL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; ALL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm1[0,1,2,3],ymm2[4,5],ymm1[6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm3 = ymm3[0,1],ymm0[2,3],ymm3[4,5],ymm0[6,7]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
-; ALL-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,4,5]
-; ALL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
-; ALL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
+; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   ret <8 x i64> %shuffle
 }
 
 define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_082a4c6e:
-; ALL:       # BB#0:
-; ALL-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_082a4c6e:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_082a4c6e:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x double> %shuffle
 }
 
+define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
+;
+; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
+  ret <8 x double> %shuffle
+}
+
 define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_082a4c6e:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   ret <8 x i64> %shuffle
 }
 
+define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
+;
+; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
+  ret <8 x i64> %shuffle
+}
+
 define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
-; ALL-LABEL: shuffle_v8f64_193b5d7f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8f64_193b5d7f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_193b5d7f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x double> %shuffle
 }
 
+define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
+;
+; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
+  ret <8 x double> %shuffle
+}
+
 define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
-; ALL-LABEL: shuffle_v8i64_193b5d7f:
-; ALL:       # BB#0:
-; ALL-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
-; ALL-NEXT:    retq
+;
+; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT:    retl
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   ret <8 x i64> %shuffle
 }
+
+define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
+;
+; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_maskz:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm2
+; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
+; AVX512F-32-NEXT:    vpsllvq .LCPI122_0, %zmm2, %zmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
+  ret <8 x double> %res
+}
+
+define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshufi64x2_512_mask:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
+; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm2
+; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
+; AVX512F-32-NEXT:    vpsllvq .LCPI123_0, %zmm2, %zmm2
+; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
+; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
+  ret <8 x i64> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %x1   = load <8 x double>,<8 x double> *%ptr,align 1
+  %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT:    vpsllvq .LCPI125_0, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
+  ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
+; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
+; AVX512F-32-NEXT:    vpsllvq .LCPI126_0, %zmm1, %zmm1
+; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+  %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
+  %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
+  ret <8 x double> %res
+}
+
+define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
+; AVX512F-LABEL: test_vshuff32x4_512:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_vshuff32x4_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
+; AVX512F-32-NEXT:    retl
+  %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
+  ret <16 x float> %res
+}
diff --git a/test/CodeGen/X86/vector-shuffle-combining.ll b/test/CodeGen/X86/vector-shuffle-combining.ll
index 92c59e2fca08..75ce9753525b 100644
--- a/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
diff --git a/test/CodeGen/X86/vector-shuffle-mmx.ll b/test/CodeGen/X86/vector-shuffle-mmx.ll
index dbccd2694b07..37f9ea98949f 100644
--- a/test/CodeGen/X86/vector-shuffle-mmx.ll
+++ b/test/CodeGen/X86/vector-shuffle-mmx.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X32 %s
 ; RUN: llc < %s -mtriple=x86_64-darwin -mattr=+mmx,+sse2 | FileCheck --check-prefix=X64 %s
 
diff --git a/test/CodeGen/X86/vector-shuffle-sse1.ll b/test/CodeGen/X86/vector-shuffle-sse1.ll
index 66e53bbb7502..548de4ce6ea3 100644
--- a/test/CodeGen/X86/vector-shuffle-sse1.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=x86-64 -mattr=-sse2 | FileCheck %s --check-prefix=SSE1
 
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/vector-shuffle-sse4a.ll b/test/CodeGen/X86/vector-shuffle-sse4a.ll
index 26062335cc16..eec915d91bbb 100644
--- a/test/CodeGen/X86/vector-shuffle-sse4a.ll
+++ b/test/CodeGen/X86/vector-shuffle-sse4a.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+sse4a | FileCheck %s --check-prefix=ALL --check-prefix=BTVER2
 
@@ -5,6 +6,35 @@
 ; EXTRQI
 ;
 
+; A length of zero is equivalent to a bit length of 64.
+define <2 x i64> @extrqi_len0_idx0(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len0_idx0:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @extrqi_len8_idx16(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len8_idx16:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+; If the length + index exceeds the bottom 64 bits the result is undefined.
+define <2 x i64> @extrqi_len32_idx48(<2 x i64> %a) {
+; ALL-LABEL: extrqi_len32_idx48:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a, i8 32, i8 48)
+  ret <2 x i64> %1
+}
+
 define <16 x i8> @shuf_0zzzuuuuuuuuuuuu(<16 x i8> %a0) {
 ; BTVER1-LABEL: shuf_0zzzuuuuuuuuuuuu:
 ; BTVER1:       # BB#0:
@@ -36,6 +66,24 @@ define <16 x i8> @shuf_0zzzzzzz1zzzzzzz(<16 x i8> %a0) {
   ret <16 x i8> %s
 }
 
+define <16 x i8> @shuf_2zzzzzzz3zzzzzzz(<16 x i8> %a0) {
+; BTVER1-LABEL: shuf_2zzzzzzz3zzzzzzz:
+; BTVER1:       # BB#0:
+; BTVER1-NEXT:    movaps %xmm0, %xmm1
+; BTVER1-NEXT:    extrq {{.*#+}} xmm1 = xmm1[3],zero,zero,zero,zero,zero,zero,zero,xmm1[u,u,u,u,u,u,u,u]
+; BTVER1-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; BTVER1-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; BTVER1-NEXT:    retq
+;
+; BTVER2-LABEL: shuf_2zzzzzzz3zzzzzzz:
+; BTVER2:       # BB#0:
+; BTVER2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; BTVER2-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; BTVER2-NEXT:    retq
+  %s = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 2, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  ret <16 x i8> %s
+}
+
 define <16 x i8> @shuf_01zzuuuuuuuuuuuu(<16 x i8> %a0) {
 ; BTVER1-LABEL: shuf_01zzuuuuuuuuuuuu:
 ; BTVER1:       # BB#0:
@@ -139,6 +187,35 @@ define <4 x i32> @shuf_0z1z(<4 x i32> %a0) {
 ; INSERTQI
 ;
 
+; A length of zero is equivalent to a bit length of 64.
+define <2 x i64> @insertqi_len0_idx0(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len0_idx0:
+; ALL:       # BB#0:
+; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6,7],xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @insertqi_len8_idx16(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len8_idx16:
+; ALL:       # BB#0:
+; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3,4,5,6,7,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+; If the length + index exceeds the bottom 64 bits the result is undefined
+define <2 x i64> @insertqi_len32_idx48(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: insertqi_len32_idx48:
+; ALL:       # BB#0:
+; ALL-NEXT:    insertq {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a, <2 x i64> %b, i8 32, i8 48)
+  ret <2 x i64> %1
+}
+
 define <16 x i8> @shuf_0_0_2_3_uuuu_uuuu_uuuu(<16 x i8> %a0, <16 x i8> %a1) {
 ; ALL-LABEL: shuf_0_0_2_3_uuuu_uuuu_uuuu:
 ; ALL:       # BB#0:
@@ -219,3 +296,66 @@ define <8 x i16> @shuf_089uuuuu(<8 x i16> %a0, <8 x i16> %a1) {
   %s = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   ret <8 x i16> %s
 }
+
+;
+; Special Cases
+;
+
+; Out of range.
+define <16 x i8> @shuffle_8_18_uuuuuuuuuuuuuu(<16 x i8> %a, <16 x i8> %b) {
+; BTVER1-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
+; BTVER1:       # BB#0:
+; BTVER1-NEXT:    psrld $16, %xmm1
+; BTVER1-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; BTVER1-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; BTVER1-NEXT:    retq
+;
+; BTVER2-LABEL: shuffle_8_18_uuuuuuuuuuuuuu:
+; BTVER2:       # BB#0:
+; BTVER2-NEXT:    vpsrld $16, %xmm1, %xmm1
+; BTVER2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; BTVER2-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; BTVER2-NEXT:    retq
+  %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 18, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; BTVER1-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER1:       # BB#0:
+; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
+; BTVER1-NEXT:    retq
+;
+; BTVER2-LABEL: shuffle_uu_0_5_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER2:       # BB#0:
+; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,0,5,5,4,4,5,5,4,4,5,5,6,6,7,7]
+; BTVER2-NEXT:    retq
+  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 0, i32 5, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; BTVER1-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER1:       # BB#0:
+; BTVER1-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
+; BTVER1-NEXT:    retq
+;
+; BTVER2-LABEL: shuffle_uu_16_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; BTVER2:       # BB#0:
+; BTVER2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[u],zero,xmm0[4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
+; BTVER2-NEXT:    retq
+  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 16, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<16 x i8> %v) {
+; ALL-LABEL: shuffle_uu_uu_4_16_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
+; ALL:       # BB#0:
+; ALL-NEXT:    extrq {{.*#+}} xmm0 = xmm0[2,3,4],zero,zero,zero,zero,zero,xmm0[u,u,u,u,u,u,u,u]
+; ALL-NEXT:    retq
+  %1 = shufflevector <16 x i8> %v, <16 x i8> zeroinitializer, <16 x i32> <i32 undef, i32 undef, i32 4, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %1
+}
+
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/CodeGen/X86/vector-shuffle-v1.ll b/test/CodeGen/X86/vector-shuffle-v1.ll
new file mode 100644
index 000000000000..a387f894a067
--- /dev/null
+++ b/test/CodeGen/X86/vector-shuffle-v1.ll
@@ -0,0 +1,439 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512F
+; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512dq| FileCheck %s --check-prefix=VL_BW_DQ
+
+target triple = "x86_64-unknown-unknown"
+
+define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
+; AVX512F-LABEL: shuf2i1_1_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf2i1_1_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %b = shufflevector <2 x i1> %a, <2 x i1> undef, <2 x i32> <i32 1, i32 0>
+  ret <2 x i1> %b
+}
+
+define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
+; AVX512F-LABEL: shuf2i1_1_2:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movl $1, %eax
+; AVX512F-NEXT:    vmovq %rax, %xmm1
+; AVX512F-NEXT:    vpalignr {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf2i1_1_2:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT:    movb $1, %al
+; VL_BW_DQ-NEXT:    kmovb %eax, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm1
+; VL_BW_DQ-NEXT:    vpalignr $8, %xmm0, %xmm1, %xmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %b = shufflevector <2 x i1> %a, <2 x i1> <i1 1, i1 0>, <2 x i32> <i32 1, i32 2>
+  ret <2 x i1> %b
+}
+
+
+define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
+; AVX512F-LABEL: shuf4i1_3_2_10:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %xmm0
+; VL_BW_DQ-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
+; VL_BW_DQ-NEXT:    vpslld $31, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovd2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %b = shufflevector <4 x i1> %a, <4 x i1> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+  ret <4 x i1> %b
+}
+
+define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %a1, <8 x i64> %b1) {
+; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpcmpeqq %zmm2, %zmm0, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [3,6,1,0,3,7,7,0]
+; AVX512F-NEXT:    vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpcmpeqq %zmm2, %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %a2 = icmp eq <8 x i64> %a, %a1
+  %b2 = icmp eq <8 x i64> %b, %b1
+  %c = shufflevector <8 x i1> %a2, <8 x i1> %b2, <8 x i32> <i32 3, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+  ret <8 x i1> %c
+}
+
+define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<16 x i32> %a, <16 x i32> %b, <16 x i32> %a1, <16 x i32> %b1) {
+; AVX512F-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm0, %k1
+; AVX512F-NEXT:    vpcmpeqd %zmm3, %zmm1, %k2
+; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0
+; AVX512F-NEXT:    vmovdqu32 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT:    vmovdqu32 %zmm0, %zmm2 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa32 {{.*#+}} zmm3 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512F-NEXT:    vpermt2d %zmm1, %zmm3, %zmm2
+; AVX512F-NEXT:    vpslld $31, %zmm2, %zmm1
+; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqu32 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpcmpeqd %zmm2, %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpcmpeqd %zmm3, %zmm1, %k1
+; VL_BW_DQ-NEXT:    vpmovm2d %k1, %zmm0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %zmm1
+; VL_BW_DQ-NEXT:    vmovdqa32 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; VL_BW_DQ-NEXT:    vpermt2d %zmm0, %zmm2, %zmm1
+; VL_BW_DQ-NEXT:    vpslld $31, %zmm1, %zmm0
+; VL_BW_DQ-NEXT:    vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2b %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %a2 = icmp eq <16 x i32> %a, %a1
+  %b2 = icmp eq <16 x i32> %b, %b1
+  %c = shufflevector <16 x i1> %a2, <16 x i1> %b2, <16 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
+  ret <16 x i1> %c
+}
+
+define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<32 x i1> %a) {
+; AVX512F-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vperm2i128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
+; AVX512F-NEXT:    vpshufb {{.*#+}} ymm1 = ymm1[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
+; AVX512F-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
+; AVX512F-NEXT:    vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpsllw $7, %ymm0, %ymm0
+; VL_BW_DQ-NEXT:    vpmovb2m %ymm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2w %k0, %zmm0
+; VL_BW_DQ-NEXT:    vmovdqu16 {{.*#+}} zmm1 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0,3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; VL_BW_DQ-NEXT:    vpermw %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT:    vpsllw $15, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovw2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2b %k0, %ymm0
+; VL_BW_DQ-NEXT:    retq
+  %b = shufflevector <32 x i1> %a, <32 x i1> undef, <32 x i32> <i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0, i32 3, i32 6, i32 22, i32 12, i32 3, i32 7, i32 7, i32 0, i32 3, i32 6, i32 1, i32 13, i32 3, i32 21, i32 7, i32 0>
+  ret <32 x i1> %b
+}
+
+define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm1 {%k1} {z}
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm2
+; AVX512F-NEXT:    vpermq %zmm1, %zmm2, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm1
+; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_u_2_u_u_2_u_2_u:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1
+; VL_BW_DQ-NEXT:    vpermq %zmm0, %zmm1, %zmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2w %k0, %xmm0
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector < 8 x i1> %b, <8 x i1>undef, <8 x i32> <i32 undef, i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef>
+  ret <8 x i1> %c
+}
+
+define i8 @shuf8i1_10_2_9_u_3_u_2_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_10_2_9_u_3_u_2_u:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = <8,2,10,u,3,u,2,u>
+; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector < 8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 10, i32 2, i32 9, i32 undef, i32 3, i32 undef, i32 2, i32 undef>
+  %d = bitcast <8 x i1> %c to i8
+  ret i8 %d
+}
+
+define i8 @shuf8i1_0_1_4_5_u_u_u_u(i8 %a) {
+; AVX512F-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_0_1_4_5_u_u_u_u:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vshufi64x2 $8, %zmm0, %zmm0, %zmm0 # zmm0 = zmm0[0,1,4,5,0,1,0,1]
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector < 8 x i1> %b, <8 x i1> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+  %d = bitcast <8 x i1> %c to i8
+  ret i8 %d
+}
+
+define i8 @shuf8i1_9_6_1_0_3_7_7_0(i8 %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
+; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_0_3_7_7_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,6,1,0,3,7,7,0]
+; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector <8 x i1> %b, <8 x i1> zeroinitializer, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+  %d = bitcast <8 x i1>%c to i8
+  ret i8 %d
+}
+
+define i8 @shuf8i1_9_6_1_10_3_7_7_0(i8 %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
+; AVX512F-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,10,4,5,6,7]
+; VL_BW_DQ-NEXT:    vpxord %zmm2, %zmm2, %zmm2
+; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm2, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector <8 x i1> zeroinitializer, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 10, i32 3, i32 7, i32 7, i32 0>
+  %d = bitcast <8 x i1>%c to i8
+  ret i8 %d
+}
+
+define i8 @shuf8i1__9_6_1_10_3_7_7_1(i8 %a) {
+; AVX512F-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    movzbl %dil, %eax
+; AVX512F-NEXT:    kmovw %eax, %k1
+; AVX512F-NEXT:    movb $51, %al
+; AVX512F-NEXT:    movzbl %al, %eax
+; AVX512F-NEXT:    kmovw %eax, %k2
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm1 {%k2} {z}
+; AVX512F-NEXT:    vmovdqu64 %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
+; AVX512F-NEXT:    vpsllq $63, %zmm1, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1__9_6_1_10_3_7_7_1:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovb %edi, %k0
+; VL_BW_DQ-NEXT:    movb $51, %al
+; VL_BW_DQ-NEXT:    kmovb %eax, %k1
+; VL_BW_DQ-NEXT:    vpmovm2q %k1, %zmm0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm1
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [9,6,1,0,3,7,7,1]
+; VL_BW_DQ-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i8 %a to <8 x i1>
+  %c = shufflevector <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x i1> %b, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 1>
+  %c1 = bitcast <8 x i1>%c to i8
+  ret i8 %c1
+}
+
+define i8 @shuf8i1_9_6_1_10_3_7_7_0_all_ones(<8 x i1> %a) {
+; AVX512F-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
+; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm2
+; AVX512F-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
+; AVX512F-NEXT:    vpsllq $63, %zmm2, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf8i1_9_6_1_10_3_7_7_0_all_ones:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    vpsllw $15, %xmm0, %xmm0
+; VL_BW_DQ-NEXT:    vpmovw2m %xmm0, %k0
+; VL_BW_DQ-NEXT:    vpmovm2q %k0, %zmm0
+; VL_BW_DQ-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [9,1,2,3,4,5,6,7]
+; VL_BW_DQ-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm2
+; VL_BW_DQ-NEXT:    vpermt2q %zmm0, %zmm1, %zmm2
+; VL_BW_DQ-NEXT:    vpsllq $63, %zmm2, %zmm0
+; VL_BW_DQ-NEXT:    vpmovq2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovb %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %c = shufflevector <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1> %a, <8 x i32> <i32 9, i32 6, i32 1, i32 0, i32 3, i32 7, i32 7, i32 0>
+  %c1 = bitcast <8 x i1>%c to i8
+  ret i8 %c1
+}
+
+
+define i16 @shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0(i16 %a) {
+; AVX512F-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    kmovw %edi, %k1
+; AVX512F-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
+; AVX512F-NEXT:    vpbroadcastd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf16i1_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kmovw %edi, %k0
+; VL_BW_DQ-NEXT:    vpmovm2d %k0, %zmm0
+; VL_BW_DQ-NEXT:    vpbroadcastd %xmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpslld $31, %zmm0, %zmm0
+; VL_BW_DQ-NEXT:    vpmovd2m %zmm0, %k0
+; VL_BW_DQ-NEXT:    kmovw %k0, %eax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i16 %a to <16 x i1>
+  %c = shufflevector < 16 x i1> %b, <16 x i1> undef, <16 x i32> zeroinitializer
+  %d = bitcast <16 x i1> %c to i16
+  ret i16 %d
+}
+
+define i64 @shuf64i1_zero(i64 %a) {
+; AVX512F-LABEL: shuf64i1_zero:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    pushq %rbp
+; AVX512F-NEXT:  .Ltmp0:
+; AVX512F-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-NEXT:  .Ltmp1:
+; AVX512F-NEXT:    .cfi_offset %rbp, -16
+; AVX512F-NEXT:    movq %rsp, %rbp
+; AVX512F-NEXT:  .Ltmp2:
+; AVX512F-NEXT:    .cfi_def_cfa_register %rbp
+; AVX512F-NEXT:    andq $-32, %rsp
+; AVX512F-NEXT:    subq $32, %rsp
+; AVX512F-NEXT:    movb $0, (%rsp)
+; AVX512F-NEXT:    movl (%rsp), %ecx
+; AVX512F-NEXT:    movq %rcx, %rax
+; AVX512F-NEXT:    shlq $32, %rax
+; AVX512F-NEXT:    orq %rcx, %rax
+; AVX512F-NEXT:    movq %rbp, %rsp
+; AVX512F-NEXT:    popq %rbp
+; AVX512F-NEXT:    retq
+;
+; VL_BW_DQ-LABEL: shuf64i1_zero:
+; VL_BW_DQ:       # BB#0:
+; VL_BW_DQ-NEXT:    kxorq %k0, %k0, %k0
+; VL_BW_DQ-NEXT:    kmovq %k0, %rax
+; VL_BW_DQ-NEXT:    retq
+  %b = bitcast i64 %a to <64 x i1>
+  %c = shufflevector < 64 x i1> zeroinitializer, <64 x i1> undef, <64 x i32> zeroinitializer
+  %d = bitcast <64 x i1> %c to i64
+  ret i64 %d
+}
diff --git a/test/CodeGen/X86/vector-trunc.ll b/test/CodeGen/X86/vector-trunc.ll
index 2480e676cad0..8c02c5a5433f 100644
--- a/test/CodeGen/X86/vector-trunc.ll
+++ b/test/CodeGen/X86/vector-trunc.ll
@@ -1,38 +1,590 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
 
-define <4 x i32> @trunc2x2i64(<2 x i64> %a, <2 x i64> %b) {
-; SSE2-LABEL: trunc2x2i64:
+define <8 x i32> @trunc8i64_8i32(<8 x i64> %a) {
+; SSE2-LABEL: trunc8i64_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc8i64_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc8i64_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc8i64_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc8i64_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc8i64_8i32:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <8 x i64> %a to <8 x i32>
+  ret <8 x i32> %0
+}
+
+define <8 x i16> @trunc8i64_8i16(<8 x i64> %a) {
+; SSE2-LABEL: trunc8i64_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pextrw $4, %xmm1, %eax
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pextrw $4, %xmm3, %edx
+; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    movd %eax, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc8i64_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
+; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    movd %eax, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
+; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc8i64_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pxor %xmm4, %xmm4
+; SSE41-NEXT:    pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7]
+; SSE41-NEXT:    packusdw %xmm3, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7]
+; SSE41-NEXT:    packusdw %xmm1, %xmm0
+; SSE41-NEXT:    packusdw %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc8i64_8i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7]
+; AVX1-NEXT:    vpackusdw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc8i64_8i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc8i64_8i16:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <8 x i64> %a to <8 x i16>
+  ret <8 x i16> %0
+}
+
+define void @trunc8i64_8i8(<8 x i64> %a) {
+; SSE-LABEL: trunc8i64_8i8:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE-NEXT:    pand %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm4, %xmm2
+; SSE-NEXT:    packuswb %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm4, %xmm0
+; SSE-NEXT:    packuswb %xmm1, %xmm0
+; SSE-NEXT:    packuswb %xmm2, %xmm0
+; SSE-NEXT:    packuswb %xmm0, %xmm0
+; SSE-NEXT:    movq %xmm0, (%rax)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc8i64_8i8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    vmovq %xmm0, (%rax)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc8i64_8i8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc8i64_8i8:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovqb %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <8 x i64> %a to <8 x i8>
+  store <8 x i8> %0, <8 x i8>* undef, align 4
+  ret void
+}
+
+define <8 x i16> @trunc8i32_8i16(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pslld $16, %xmm1
+; SSE2-NEXT:    psrad $16, %xmm1
+; SSE2-NEXT:    pslld $16, %xmm0
+; SSE2-NEXT:    psrad $16, %xmm0
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc8i32_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT:    pshufb %xmm2, %xmm1
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc8i32_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSE41-NEXT:    pshufb %xmm2, %xmm1
+; SSE41-NEXT:    pshufb %xmm2, %xmm0
+; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc8i32_8i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc8i32_8i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc8i32_8i16:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <8 x i32> %a to <8 x i16>
+  ret <8 x i16> %0
+}
+
+define void @trunc8i32_8i8(<8 x i32> %a) {
+; SSE2-LABEL: trunc8i32_8i8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    packuswb %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm0, %xmm0
+; SSE2-NEXT:    movq %xmm0, (%rax)
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc8i32_8i8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSSE3-NEXT:    pshufb %xmm2, %xmm1
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    movq %xmm0, (%rax)
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc8i32_8i8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; SSE41-NEXT:    pshufb %xmm2, %xmm1
+; SSE41-NEXT:    pshufb %xmm2, %xmm0
+; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT:    movq %xmm0, (%rax)
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc8i32_8i8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vmovq %xmm0, (%rax)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc8i32_8i8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX2-NEXT:    vmovq %xmm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc8i32_8i8:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT:    vmovq %xmm0, (%rax)
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <8 x i32> %a to <8 x i8>
+  store <8 x i8> %0, <8 x i8>* undef, align 4
+  ret void
+}
+
+define void @trunc16i32_16i8(<16 x i32> %a) {
+; SSE-LABEL: trunc16i32_16i8:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE-NEXT:    pand %xmm4, %xmm3
+; SSE-NEXT:    pand %xmm4, %xmm2
+; SSE-NEXT:    packuswb %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm4, %xmm1
+; SSE-NEXT:    pand %xmm4, %xmm0
+; SSE-NEXT:    packuswb %xmm1, %xmm0
+; SSE-NEXT:    packuswb %xmm2, %xmm0
+; SSE-NEXT:    movdqu %xmm0, (%rax)
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc16i32_16i8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vandps %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc16i32_16i8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128,0,1,4,5,8,9,12,13,128,128,128,128,128,128,128,128]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT:    vpshufb %xmm3, %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
+; AVX2-NEXT:    vpshufb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vmovdqu %xmm0, (%rax)
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc16i32_16i8:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovdb %zmm0, (%rax)
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <16 x i32> %a to <16 x i8>
+  store <16 x i8> %0, <16 x i8>* undef, align 4
+  ret void
+}
+
+define <8 x i32> @trunc2x4i64_8i32(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: trunc2x4i64_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc2x4i64_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc2x4i64_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,1,0,2]
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5,6,7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc2x4i64_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc2x4i64_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i32:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
+; AVX512BW-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <4 x i64> %a to <4 x i32>
+  %1 = trunc <4 x i64> %b to <4 x i32>
+  %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: trunc2x4i64_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pextrw $4, %xmm1, %eax
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pextrw $4, %xmm3, %edx
+; SSE2-NEXT:    movd %edx, %xmm1
+; SSE2-NEXT:    movd %eax, %xmm3
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSE2-NEXT:    pextrw $4, %xmm2, %eax
+; SSE2-NEXT:    movd %eax, %xmm1
+; SSE2-NEXT:    movd %ecx, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: trunc2x4i64_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pextrw $4, %xmm1, %eax
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pextrw $4, %xmm3, %edx
+; SSSE3-NEXT:    movd %edx, %xmm1
+; SSSE3-NEXT:    movd %eax, %xmm3
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
+; SSSE3-NEXT:    pextrw $4, %xmm2, %eax
+; SSSE3-NEXT:    movd %eax, %xmm1
+; SSSE3-NEXT:    movd %ecx, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: trunc2x4i64_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pextrw $4, %xmm0, %eax
+; SSE41-NEXT:    pinsrw $1, %eax, %xmm0
+; SSE41-NEXT:    movd %xmm1, %eax
+; SSE41-NEXT:    pinsrw $2, %eax, %xmm0
+; SSE41-NEXT:    pextrw $4, %xmm1, %eax
+; SSE41-NEXT:    pinsrw $3, %eax, %xmm0
+; SSE41-NEXT:    movd %xmm2, %eax
+; SSE41-NEXT:    pinsrw $4, %eax, %xmm0
+; SSE41-NEXT:    pextrw $4, %xmm2, %eax
+; SSE41-NEXT:    pinsrw $5, %eax, %xmm0
+; SSE41-NEXT:    movd %xmm3, %eax
+; SSE41-NEXT:    pinsrw $6, %eax, %xmm0
+; SSE41-NEXT:    pextrw $4, %xmm3, %eax
+; SSE41-NEXT:    pinsrw $7, %eax, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc2x4i64_8i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc2x4i64_8i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,2,4,6,u,u,u,u>
+; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX2-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc2x4i64_8i16:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT:    vpmovqd %zmm1, %ymm1
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT:    retq
+entry:
+  %0 = trunc <4 x i64> %a to <4 x i16>
+  %1 = trunc <4 x i64> %b to <4 x i16>
+  %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @trunc2x2i64_4i32(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: trunc2x2i64_4i32:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: trunc2x2i64:
+; SSSE3-LABEL: trunc2x2i64_4i32:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: trunc2x2i64:
+; SSE41-LABEL: trunc2x2i64_4i32:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
 ; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc2x2i64:
-; AVX:       # BB#0: # %entry
-; AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
-; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
-; AVX-NEXT:    retq
-
-
+; AVX1-LABEL: trunc2x2i64_4i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: trunc2x2i64_4i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX2-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc2x2i64_4i32:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,2]
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT:    vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <2 x i64> %a to <2 x i32>
   %1 = trunc <2 x i64> %b to <2 x i32>
@@ -40,28 +592,32 @@ entry:
   ret <4 x i32> %2
 }
 
-define i64 @trunc2i64(<2 x i64> %inval) {
-; SSE-LABEL: trunc2i64:
+define i64 @trunc2i64_i64(<2 x i64> %inval) {
+; SSE-LABEL: trunc2i64_i64:
 ; SSE:       # BB#0: # %entry
 ; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; SSE-NEXT:    movd %xmm0, %rax
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: trunc2i64:
+; AVX-LABEL: trunc2i64_i64:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-
-
+;
+; AVX512BW-LABEL: trunc2i64_i64:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <2 x i64> %inval to <2 x i32>
   %1 = bitcast <2 x i32> %0 to i64
   ret i64 %1
 }
 
-define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
-; SSE2-LABEL: trunc2x4i32:
+define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: trunc2x4i32_8i16:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
@@ -72,7 +628,7 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
 ; SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: trunc2x4i32:
+; SSSE3-LABEL: trunc2x4i32_8i16:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
@@ -80,7 +636,7 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: trunc2x4i32:
+; SSE41-LABEL: trunc2x4i32_8i16:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
@@ -88,17 +644,21 @@ define <8 x i16> @trunc2x4i32(<4 x i32> %a, <4 x i32> %b) {
 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc2x4i32:
+; AVX-LABEL: trunc2x4i32_8i16:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc2x4i32_8i16:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <4 x i32> %a to <4 x i16>
   %1 = trunc <4 x i32> %b to <4 x i16>
@@ -107,8 +667,8 @@ entry:
 }
 
 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
-define i64 @trunc4i32(<4 x i32> %inval) {
-; SSE2-LABEL: trunc4i32:
+define i64 @trunc4i32_i64(<4 x i32> %inval) {
+; SSE2-LABEL: trunc4i32_i64:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
@@ -116,35 +676,37 @@ define i64 @trunc4i32(<4 x i32> %inval) {
 ; SSE2-NEXT:    movd %xmm0, %rax
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: trunc4i32:
+; SSSE3-LABEL: trunc4i32_i64:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; SSSE3-NEXT:    movd %xmm0, %rax
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: trunc4i32:
+; SSE41-LABEL: trunc4i32_i64:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; SSE41-NEXT:    movd %xmm0, %rax
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc4i32:
+; AVX-LABEL: trunc4i32_i64:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc4i32_i64:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <4 x i32> %inval to <4 x i16>
   %1 = bitcast <4 x i16> %0 to i64
   ret i64 %1
 }
 
-define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: trunc2x8i16:
+define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: trunc2x8i16_16i8:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255]
 ; SSE2-NEXT:    pand %xmm2, %xmm1
@@ -152,7 +714,7 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE2-NEXT:    packuswb %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: trunc2x8i16:
+; SSSE3-LABEL: trunc2x8i16_16i8:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
@@ -160,7 +722,7 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: trunc2x8i16:
+; SSE41-LABEL: trunc2x8i16_16i8:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
 ; SSE41-NEXT:    pshufb %xmm2, %xmm1
@@ -168,17 +730,21 @@ define <16 x i8> @trunc2x8i16(<8 x i16> %a, <8 x i16> %b) {
 ; SSE41-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc2x8i16:
+; AVX-LABEL: trunc2x8i16_16i8:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
 ; AVX-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
 ; AVX-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; AVX-NEXT:    retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc2x8i16_16i8:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpshufb %xmm2, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <8 x i16> %a to <8 x i8>
   %1 = trunc <8 x i16> %b to <8 x i8>
@@ -187,51 +753,58 @@ entry:
 }
 
 ; PR15524 http://llvm.org/bugs/show_bug.cgi?id=15524
-define i64 @trunc8i16(<8 x i16> %inval) {
-; SSE2-LABEL: trunc8i16:
+define i64 @trunc8i16_i64(<8 x i16> %inval) {
+; SSE2-LABEL: trunc8i16_i64:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    packuswb %xmm0, %xmm0
 ; SSE2-NEXT:    movd %xmm0, %rax
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: trunc8i16:
+; SSSE3-LABEL: trunc8i16_i64:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 ; SSSE3-NEXT:    movd %xmm0, %rax
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: trunc8i16:
+; SSE41-LABEL: trunc8i16_i64:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 ; SSE41-NEXT:    movd %xmm0, %rax
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: trunc8i16:
+; AVX-LABEL: trunc8i16_i64:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 ; AVX-NEXT:    vmovq %xmm0, %rax
 ; AVX-NEXT:    retq
-
-
-
-
+;
+; AVX512BW-LABEL: trunc8i16_i64:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT:    vmovq %xmm0, %rax
+; AVX512BW-NEXT:    retq
 entry:
   %0 = trunc <8 x i16> %inval to <8 x i8>
   %1 = bitcast <8 x i8> %0 to i64
   ret i64 %1
 }
 
-define <16 x i8> @trunc16i64_const() {
-; SSE-LABEL: trunc16i64_const:
+define <16 x i8> @trunc16i64_16i8_const() {
+; SSE-LABEL: trunc16i64_16i8_const:
 ; SSE:       # BB#0: # %entry
 ; SSE-NEXT:    xorps %xmm0, %xmm0
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: trunc16i64_const:
+; AVX-LABEL: trunc16i64_16i8_const:
 ; AVX:       # BB#0: # %entry
 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    retq
+;
+; AVX512BW-LABEL: trunc16i64_16i8_const:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
 
 entry:
   %0 = trunc <16 x i64> zeroinitializer to <16 x i8>
diff --git a/test/CodeGen/X86/vector-tzcnt-128.ll b/test/CodeGen/X86/vector-tzcnt-128.ll
index 422fe052d38b..f1714d4845de 100644
--- a/test/CodeGen/X86/vector-tzcnt-128.ll
+++ b/test/CodeGen/X86/vector-tzcnt-128.ll
@@ -1,13 +1,12 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
-target triple = "x86_64-unknown-unknown"
-
-define <2 x i64> @testv2i64(<2 x i64> %in) {
+define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm0, %rax
@@ -87,7 +86,7 @@ define <2 x i64> @testv2i64(<2 x i64> %in) {
   ret <2 x i64> %out
 }
 
-define <2 x i64> @testv2i64u(<2 x i64> %in) {
+define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64u:
 ; SSE2:       # BB#0:
 ; SSE2-NEXT:    movd %xmm0, %rax
@@ -152,1521 +151,755 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) {
   ret <2 x i64> %out
 }
 
-define <4 x i32> @testv4i32(<4 x i32> %in) {
+define <4 x i32> @testv4i32(<4 x i32> %in) nounwind {
 ; SSE2-LABEL: testv4i32:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE2-NEXT:    movd %xmm1, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movl $32, %ecx
-; SSE2-NEXT:    cmovel %ecx, %eax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE2-NEXT:    movd %xmm2, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    cmovel %ecx, %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    cmovel %ecx, %eax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    cmovel %ecx, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    psubd %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psrld $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubd %xmm0, %xmm2
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm0, %xmm3
+; SSE2-NEXT:    psrld $2, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    paddd %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psrld $4, %xmm0
+; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    psadbw %xmm1, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    psadbw %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv4i32:
 ; SSE3:       # BB#0:
-; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE3-NEXT:    movd %xmm1, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movl $32, %ecx
-; SSE3-NEXT:    cmovel %ecx, %eax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE3-NEXT:    movd %xmm2, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    cmovel %ecx, %eax
-; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    cmovel %ecx, %eax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    cmovel %ecx, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    pxor %xmm2, %xmm2
+; SSE3-NEXT:    psubd %xmm0, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psrld $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubd %xmm0, %xmm2
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT:    movdqa %xmm2, %xmm3
+; SSE3-NEXT:    pand %xmm0, %xmm3
+; SSE3-NEXT:    psrld $2, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    paddd %xmm3, %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psrld $4, %xmm0
+; SSE3-NEXT:    paddd %xmm2, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT:    psadbw %xmm1, %xmm2
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT:    psadbw %xmm1, %xmm0
+; SSE3-NEXT:    packuswb %xmm2, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv4i32:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSSE3-NEXT:    movd %xmm1, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movl $32, %ecx
-; SSSE3-NEXT:    cmovel %ecx, %eax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSSE3-NEXT:    movd %xmm2, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    cmovel %ecx, %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    cmovel %ecx, %eax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    cmovel %ecx, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    psubd %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm0, %xmm2
+; SSSE3-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm4
+; SSSE3-NEXT:    pand %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm5
+; SSSE3-NEXT:    pshufb %xmm4, %xmm5
+; SSSE3-NEXT:    psrlw $4, %xmm2
+; SSSE3-NEXT:    pand %xmm3, %xmm2
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    paddb %xmm5, %xmm0
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    psadbw %xmm1, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    psadbw %xmm1, %xmm0
+; SSSE3-NEXT:    packuswb %xmm2, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv4i32:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrd $1, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    movl $32, %ecx
-; SSE41-NEXT:    cmovel %ecx, %eax
-; SSE41-NEXT:    movd %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm1
-; SSE41-NEXT:    pextrd $2, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    cmovel %ecx, %eax
-; SSE41-NEXT:    pinsrd $2, %eax, %xmm1
-; SSE41-NEXT:    pextrd $3, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    cmovel %ecx, %eax
-; SSE41-NEXT:    pinsrd $3, %eax, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    psubd %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm0, %xmm2
+; SSE41-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm2, %xmm4
+; SSE41-NEXT:    pand %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    pshufb %xmm4, %xmm5
+; SSE41-NEXT:    psrlw $4, %xmm2
+; SSE41-NEXT:    pand %xmm3, %xmm2
+; SSE41-NEXT:    pshufb %xmm2, %xmm0
+; SSE41-NEXT:    paddb %xmm5, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE41-NEXT:    psadbw %xmm1, %xmm2
+; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT:    psadbw %xmm1, %xmm0
+; SSE41-NEXT:    packuswb %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: testv4i32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpextrd $1, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    movl $32, %ecx
-; AVX-NEXT:    cmovel %ecx, %eax
-; AVX-NEXT:    vmovd %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vmovd %edx, %xmm1
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrd $2, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    cmovel %ecx, %eax
-; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    cmovel %ecx, %eax
-; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: testv4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: testv4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
+; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
+; AVX2-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 0)
   ret <4 x i32> %out
 }
 
-define <4 x i32> @testv4i32u(<4 x i32> %in) {
+define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind {
 ; SSE2-LABEL: testv4i32u:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE2-NEXT:    movd %xmm1, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE2-NEXT:    movd %xmm2, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    psubd %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psrld $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubd %xmm0, %xmm2
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm0, %xmm3
+; SSE2-NEXT:    psrld $2, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    paddd %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psrld $4, %xmm0
+; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE2-NEXT:    psadbw %xmm1, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    psadbw %xmm1, %xmm0
+; SSE2-NEXT:    packuswb %xmm2, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv4i32u:
 ; SSE3:       # BB#0:
-; SSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSE3-NEXT:    movd %xmm1, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSE3-NEXT:    movd %xmm2, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    pxor %xmm2, %xmm2
+; SSE3-NEXT:    psubd %xmm0, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psrld $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubd %xmm0, %xmm2
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [858993459,858993459,858993459,858993459]
+; SSE3-NEXT:    movdqa %xmm2, %xmm3
+; SSE3-NEXT:    pand %xmm0, %xmm3
+; SSE3-NEXT:    psrld $2, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    paddd %xmm3, %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psrld $4, %xmm0
+; SSE3-NEXT:    paddd %xmm2, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE3-NEXT:    psadbw %xmm1, %xmm2
+; SSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE3-NEXT:    psadbw %xmm1, %xmm0
+; SSE3-NEXT:    packuswb %xmm2, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv4i32u:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3]
-; SSSE3-NEXT:    movd %xmm1, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3]
-; SSSE3-NEXT:    movd %xmm2, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
-; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    psubd %xmm0, %xmm2
+; SSSE3-NEXT:    pand %xmm0, %xmm2
+; SSSE3-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm2, %xmm4
+; SSSE3-NEXT:    pand %xmm3, %xmm4
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm5
+; SSSE3-NEXT:    pshufb %xmm4, %xmm5
+; SSSE3-NEXT:    psrlw $4, %xmm2
+; SSSE3-NEXT:    pand %xmm3, %xmm2
+; SSSE3-NEXT:    pshufb %xmm2, %xmm0
+; SSSE3-NEXT:    paddb %xmm5, %xmm0
+; SSSE3-NEXT:    movdqa %xmm0, %xmm2
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSSE3-NEXT:    psadbw %xmm1, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    psadbw %xmm1, %xmm0
+; SSSE3-NEXT:    packuswb %xmm2, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv4i32u:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrd $1, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    movd %xmm0, %ecx
-; SSE41-NEXT:    bsfl %ecx, %ecx
-; SSE41-NEXT:    movd %ecx, %xmm1
-; SSE41-NEXT:    pinsrd $1, %eax, %xmm1
-; SSE41-NEXT:    pextrd $2, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrd $2, %eax, %xmm1
-; SSE41-NEXT:    pextrd $3, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrd $3, %eax, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    psubd %xmm0, %xmm2
+; SSE41-NEXT:    pand %xmm0, %xmm2
+; SSE41-NEXT:    psubd {{.*}}(%rip), %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm2, %xmm4
+; SSE41-NEXT:    pand %xmm3, %xmm4
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm0, %xmm5
+; SSE41-NEXT:    pshufb %xmm4, %xmm5
+; SSE41-NEXT:    psrlw $4, %xmm2
+; SSE41-NEXT:    pand %xmm3, %xmm2
+; SSE41-NEXT:    pshufb %xmm2, %xmm0
+; SSE41-NEXT:    paddb %xmm5, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm2
+; SSE41-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; SSE41-NEXT:    psadbw %xmm1, %xmm2
+; SSE41-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE41-NEXT:    psadbw %xmm1, %xmm0
+; SSE41-NEXT:    packuswb %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: testv4i32u:
-; AVX:       # BB#0:
-; AVX-NEXT:    vpextrd $1, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vmovd %xmm0, %ecx
-; AVX-NEXT:    bsfl %ecx, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm1
-; AVX-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrd $2, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrd $2, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: testv4i32u:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubd {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: testv4i32u:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpsubd %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT:    vpsubd %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %xmm3, %xmm4, %xmm3
+; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb %xmm0, %xmm4, %xmm0
+; AVX2-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
   %out = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %in, i1 -1)
   ret <4 x i32> %out
 }
 
-define <8 x i16> @testv8i16(<8 x i16> %in) {
+define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
 ; SSE2-LABEL: testv8i16:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    pextrw $7, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %cx
-; SSE2-NEXT:    movw $16, %ax
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    pextrw $3, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    pextrw $5, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    pextrw $1, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE2-NEXT:    pextrw $6, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    pextrw $4, %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    bsfw %cx, %cx
-; SSE2-NEXT:    cmovew %ax, %cx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    paddw %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psrlw $4, %xmm2
+; SSE2-NEXT:    paddw %xmm1, %xmm2
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psllw $8, %xmm0
+; SSE2-NEXT:    paddb %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $8, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv8i16:
 ; SSE3:       # BB#0:
-; SSE3-NEXT:    pextrw $7, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %cx
-; SSE3-NEXT:    movw $16, %ax
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm1
-; SSE3-NEXT:    pextrw $3, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT:    pextrw $5, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm3
-; SSE3-NEXT:    pextrw $1, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm1
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSE3-NEXT:    pextrw $6, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm3
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE3-NEXT:    pextrw $4, %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    movd %xmm0, %ecx
-; SSE3-NEXT:    bsfw %cx, %cx
-; SSE3-NEXT:    cmovew %ax, %cx
-; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    psubw %xmm0, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubw %xmm0, %xmm1
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $2, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    paddw %xmm2, %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    psrlw $4, %xmm2
+; SSE3-NEXT:    paddw %xmm1, %xmm2
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psllw $8, %xmm0
+; SSE3-NEXT:    paddb %xmm2, %xmm0
+; SSE3-NEXT:    psrlw $8, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv8i16:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pextrw $7, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %cx
-; SSSE3-NEXT:    movw $16, %ax
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    pextrw $3, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    pextrw $5, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    pextrw $1, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
-; SSSE3-NEXT:    pextrw $6, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    pextrw $2, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT:    pextrw $4, %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    movd %xmm0, %ecx
-; SSSE3-NEXT:    bsfw %cx, %cx
-; SSSE3-NEXT:    cmovew %ax, %cx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    psubw %xmm0, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSSE3-NEXT:    pand %xmm0, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; SSSE3-NEXT:    psrlw $4, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa %xmm3, %xmm0
+; SSSE3-NEXT:    psllw $8, %xmm0
+; SSSE3-NEXT:    paddb %xmm3, %xmm0
+; SSSE3-NEXT:    psrlw $8, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv8i16:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrw $1, %xmm0, %eax
-; SSE41-NEXT:    bsfw %ax, %cx
-; SSE41-NEXT:    movw $16, %ax
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    movd %xmm0, %edx
-; SSE41-NEXT:    bsfw %dx, %dx
-; SSE41-NEXT:    cmovew %ax, %dx
-; SSE41-NEXT:    movd %edx, %xmm1
-; SSE41-NEXT:    pinsrw $1, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $2, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $2, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $3, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $3, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $4, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $4, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $5, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $5, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $6, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $6, %ecx, %xmm1
-; SSE41-NEXT:    pextrw $7, %xmm0, %ecx
-; SSE41-NEXT:    bsfw %cx, %cx
-; SSE41-NEXT:    cmovew %ax, %cx
-; SSE41-NEXT:    pinsrw $7, %ecx, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    psubw %xmm0, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
+; SSE41-NEXT:    psrlw $4, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    psllw $8, %xmm0
+; SSE41-NEXT:    paddb %xmm3, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: testv8i16:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %cx
-; AVX-NEXT:    movw $16, %ax
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vmovd %xmm0, %edx
-; AVX-NEXT:    bsfw %dx, %dx
-; AVX-NEXT:    cmovew %ax, %dx
-; AVX-NEXT:    vmovd %edx, %xmm1
-; AVX-NEXT:    vpinsrw $1, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $2, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $2, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $3, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $3, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $4, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $4, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $5, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $5, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $6, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $6, %ecx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $7, %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    cmovew %ax, %cx
-; AVX-NEXT:    vpinsrw $7, %ecx, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpsubw %xmm0, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 0)
   ret <8 x i16> %out
 }
 
-define <8 x i16> @testv8i16u(<8 x i16> %in) {
+define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
 ; SSE2-LABEL: testv8i16u:
 ; SSE2:       # BB#0:
-; SSE2-NEXT:    pextrw $7, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pextrw $3, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    pextrw $5, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pextrw $1, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE2-NEXT:    pextrw $6, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    pextrw $2, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    pextrw $4, %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm1
-; SSE2-NEXT:    movd %xmm0, %eax
-; SSE2-NEXT:    bsfw %ax, %ax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubw %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    paddw %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psrlw $4, %xmm2
+; SSE2-NEXT:    paddw %xmm1, %xmm2
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    psllw $8, %xmm0
+; SSE2-NEXT:    paddb %xmm2, %xmm0
+; SSE2-NEXT:    psrlw $8, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv8i16u:
 ; SSE3:       # BB#0:
-; SSE3-NEXT:    pextrw $7, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pextrw $3, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT:    pextrw $5, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pextrw $1, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm3
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSE3-NEXT:    pextrw $6, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    pextrw $2, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT:    pextrw $4, %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm1
-; SSE3-NEXT:    movd %xmm0, %eax
-; SSE3-NEXT:    bsfw %ax, %ax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    psubw %xmm0, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubw %xmm0, %xmm1
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [13107,13107,13107,13107,13107,13107,13107,13107]
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $2, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    paddw %xmm2, %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    psrlw $4, %xmm2
+; SSE3-NEXT:    paddw %xmm1, %xmm2
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm2
+; SSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSE3-NEXT:    psllw $8, %xmm0
+; SSE3-NEXT:    paddb %xmm2, %xmm0
+; SSE3-NEXT:    psrlw $8, %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv8i16u:
 ; SSSE3:       # BB#0:
-; SSSE3-NEXT:    pextrw $7, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pextrw $3, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    pextrw $5, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pextrw $1, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
-; SSSE3-NEXT:    pextrw $6, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    pextrw $2, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSSE3-NEXT:    pextrw $4, %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm1
-; SSSE3-NEXT:    movd %xmm0, %eax
-; SSSE3-NEXT:    bsfw %ax, %ax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    psubw %xmm0, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSSE3-NEXT:    pand %xmm0, %xmm2
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; SSSE3-NEXT:    psrlw $4, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm3
+; SSSE3-NEXT:    paddb %xmm4, %xmm3
+; SSSE3-NEXT:    movdqa %xmm3, %xmm0
+; SSSE3-NEXT:    psllw $8, %xmm0
+; SSSE3-NEXT:    paddb %xmm3, %xmm0
+; SSSE3-NEXT:    psrlw $8, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv8i16u:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:   pextrw $1, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   movd %xmm0, %ecx
-; SSE41-NEXT:   bsfw %cx, %cx
-; SSE41-NEXT:   movd %ecx, %xmm1
-; SSE41-NEXT:   pinsrw $1, %eax, %xmm1
-; SSE41-NEXT:   pextrw $2, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $2, %eax, %xmm1
-; SSE41-NEXT:   pextrw $3, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $3, %eax, %xmm1
-; SSE41-NEXT:   pextrw $4, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $4, %eax, %xmm1
-; SSE41-NEXT:   pextrw $5, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $5, %eax, %xmm1
-; SSE41-NEXT:   pextrw $6, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $6, %eax, %xmm1
-; SSE41-NEXT:   pextrw $7, %xmm0, %eax
-; SSE41-NEXT:   bsfw %ax, %ax
-; SSE41-NEXT:   pinsrw $7, %eax, %xmm1
-; SSE41-NEXT:   movdqa %xmm1, %xmm0
-; SSE41-NEXT:   retq
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    psubw %xmm0, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    psubw {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm2
+; SSE41-NEXT:    pand %xmm0, %xmm2
+; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm3, %xmm4
+; SSE41-NEXT:    pshufb %xmm2, %xmm4
+; SSE41-NEXT:    psrlw $4, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm3
+; SSE41-NEXT:    paddb %xmm4, %xmm3
+; SSE41-NEXT:    movdqa %xmm3, %xmm0
+; SSE41-NEXT:    psllw $8, %xmm0
+; SSE41-NEXT:    paddb %xmm3, %xmm0
+; SSE41-NEXT:    psrlw $8, %xmm0
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: testv8i16u:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vmovd %xmm0, %ecx
-; AVX-NEXT:    bsfw %cx, %cx
-; AVX-NEXT:    vmovd %ecx, %xmm1
-; AVX-NEXT:    vpinsrw $1, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $2, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $3, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $4, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $5, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $6, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX-NEXT:    bsfw %ax, %ax
-; AVX-NEXT:    vpinsrw $7, %eax, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpsubw %xmm0, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %in, i1 -1)
   ret <8 x i16> %out
 }
 
-define <16 x i8> @testv16i8(<16 x i8> %in) {
+define <16 x i8> @testv16i8(<16 x i8> %in) nounwind {
 ; SSE2-LABEL: testv16i8:
 ; SSE2:       # BB#0:
-; SSE2:         pushq %rbp
-; SSE2:         pushq %r14
-; SSE2:         pushq %rbx
-; SSE2:         movaps %xmm0, -16(%rsp)
-; SSE2-NEXT:    movzbl -1(%rsp), %eax
-; SSE2-NEXT:    bsfl %eax, %edx
-; SSE2-NEXT:    movl $32, %eax
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    movl $8, %ecx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    movzbl -2(%rsp), %r14d
-; SSE2-NEXT:    movzbl -3(%rsp), %ebx
-; SSE2-NEXT:    movzbl -4(%rsp), %r9d
-; SSE2-NEXT:    movzbl -5(%rsp), %edi
-; SSE2-NEXT:    movzbl -6(%rsp), %r11d
-; SSE2-NEXT:    movzbl -7(%rsp), %edx
-; SSE2-NEXT:    movzbl -8(%rsp), %r8d
-; SSE2-NEXT:    movzbl -9(%rsp), %esi
-; SSE2-NEXT:    bsfl %esi, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    bsfl %edi, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm2
-; SSE2-NEXT:    movzbl -10(%rsp), %edi
-; SSE2-NEXT:    movzbl -11(%rsp), %esi
-; SSE2-NEXT:    movzbl -12(%rsp), %r10d
-; SSE2-NEXT:    movzbl -13(%rsp), %ebp
-; SSE2-NEXT:    bsfl %ebp, %ebp
-; SSE2-NEXT:    cmovel %eax, %ebp
-; SSE2-NEXT:    cmpl $32, %ebp
-; SSE2-NEXT:    cmovel %ecx, %ebp
-; SSE2-NEXT:    movd %ebp, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    bsfl %ebx, %ebx
-; SSE2-NEXT:    cmovel %eax, %ebx
-; SSE2-NEXT:    cmpl $32, %ebx
-; SSE2-NEXT:    cmovel %ecx, %ebx
-; SSE2-NEXT:    movd %ebx, %xmm1
-; SSE2-NEXT:    bsfl %esi, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    bsfl %edx, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm3
-; SSE2-NEXT:    movzbl -14(%rsp), %edx
-; SSE2-NEXT:    movzbl -15(%rsp), %esi
-; SSE2-NEXT:    bsfl %esi, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    bsfl %r14d, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    bsfl %edi, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    bsfl %r11d, %esi
-; SSE2-NEXT:    cmovel %eax, %esi
-; SSE2-NEXT:    cmpl $32, %esi
-; SSE2-NEXT:    cmovel %ecx, %esi
-; SSE2-NEXT:    movd %esi, %xmm0
-; SSE2-NEXT:    bsfl %edx, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE2-NEXT:    bsfl %r9d, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    bsfl %r10d, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    bsfl %r8d, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm4
-; SSE2-NEXT:    movzbl -16(%rsp), %edx
-; SSE2-NEXT:    bsfl %edx, %edx
-; SSE2-NEXT:    cmovel %eax, %edx
-; SSE2-NEXT:    cmpl $32, %edx
-; SSE2-NEXT:    cmovel %ecx, %edx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    popq %rbx
-; SSE2-NEXT:    popq %r14
-; SSE2-NEXT:    popq %rbp
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    paddb %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    paddb %xmm1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv16i8:
 ; SSE3:       # BB#0:
-; SSE3:         pushq %rbp
-; SSE3:         pushq %r14
-; SSE3:         pushq %rbx
-; SSE3:         movaps %xmm0, -16(%rsp)
-; SSE3-NEXT:    movzbl -1(%rsp), %eax
-; SSE3-NEXT:    bsfl %eax, %edx
-; SSE3-NEXT:    movl $32, %eax
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    movl $8, %ecx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm0
-; SSE3-NEXT:    movzbl -2(%rsp), %r14d
-; SSE3-NEXT:    movzbl -3(%rsp), %ebx
-; SSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSE3-NEXT:    movzbl -5(%rsp), %edi
-; SSE3-NEXT:    movzbl -6(%rsp), %r11d
-; SSE3-NEXT:    movzbl -7(%rsp), %edx
-; SSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSE3-NEXT:    movzbl -9(%rsp), %esi
-; SSE3-NEXT:    bsfl %esi, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm1
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT:    bsfl %edi, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm2
-; SSE3-NEXT:    movzbl -10(%rsp), %edi
-; SSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSE3-NEXT:    movzbl -12(%rsp), %r10d
-; SSE3-NEXT:    movzbl -13(%rsp), %ebp
-; SSE3-NEXT:    bsfl %ebp, %ebp
-; SSE3-NEXT:    cmovel %eax, %ebp
-; SSE3-NEXT:    cmpl $32, %ebp
-; SSE3-NEXT:    cmovel %ecx, %ebp
-; SSE3-NEXT:    movd %ebp, %xmm0
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT:    bsfl %ebx, %ebx
-; SSE3-NEXT:    cmovel %eax, %ebx
-; SSE3-NEXT:    cmpl $32, %ebx
-; SSE3-NEXT:    cmovel %ecx, %ebx
-; SSE3-NEXT:    movd %ebx, %xmm1
-; SSE3-NEXT:    bsfl %esi, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm2
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE3-NEXT:    bsfl %edx, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm3
-; SSE3-NEXT:    movzbl -14(%rsp), %edx
-; SSE3-NEXT:    movzbl -15(%rsp), %esi
-; SSE3-NEXT:    bsfl %esi, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm1
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT:    bsfl %r14d, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm0
-; SSE3-NEXT:    bsfl %edi, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm3
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT:    bsfl %r11d, %esi
-; SSE3-NEXT:    cmovel %eax, %esi
-; SSE3-NEXT:    cmpl $32, %esi
-; SSE3-NEXT:    cmovel %ecx, %esi
-; SSE3-NEXT:    movd %esi, %xmm0
-; SSE3-NEXT:    bsfl %edx, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm2
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSE3-NEXT:    bsfl %r9d, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm0
-; SSE3-NEXT:    bsfl %r10d, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm3
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT:    bsfl %r8d, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm4
-; SSE3-NEXT:    movzbl -16(%rsp), %edx
-; SSE3-NEXT:    bsfl %edx, %edx
-; SSE3-NEXT:    cmovel %eax, %edx
-; SSE3-NEXT:    cmpl $32, %edx
-; SSE3-NEXT:    cmovel %ecx, %edx
-; SSE3-NEXT:    movd %edx, %xmm0
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT:    popq %rbx
-; SSE3-NEXT:    popq %r14
-; SSE3-NEXT:    popq %rbp
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    psubb %xmm0, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubb %xmm0, %xmm1
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $2, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    paddb %xmm2, %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $4, %xmm0
+; SSE3-NEXT:    paddb %xmm1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv16i8:
 ; SSSE3:       # BB#0:
-; SSSE3:         pushq %rbp
-; SSSE3:         pushq %r14
-; SSSE3:         pushq %rbx
-; SSSE3:         movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT:    movzbl -1(%rsp), %eax
-; SSSE3-NEXT:    bsfl %eax, %edx
-; SSSE3-NEXT:    movl $32, %eax
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    movl $8, %ecx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    movzbl -2(%rsp), %r14d
-; SSSE3-NEXT:    movzbl -3(%rsp), %ebx
-; SSSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSSE3-NEXT:    movzbl -5(%rsp), %edi
-; SSSE3-NEXT:    movzbl -6(%rsp), %r11d
-; SSSE3-NEXT:    movzbl -7(%rsp), %edx
-; SSSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSSE3-NEXT:    movzbl -9(%rsp), %esi
-; SSSE3-NEXT:    bsfl %esi, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %edi, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm2
-; SSSE3-NEXT:    movzbl -10(%rsp), %edi
-; SSSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSSE3-NEXT:    movzbl -12(%rsp), %r10d
-; SSSE3-NEXT:    movzbl -13(%rsp), %ebp
-; SSSE3-NEXT:    bsfl %ebp, %ebp
-; SSSE3-NEXT:    cmovel %eax, %ebp
-; SSSE3-NEXT:    cmpl $32, %ebp
-; SSSE3-NEXT:    cmovel %ecx, %ebp
-; SSSE3-NEXT:    movd %ebp, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT:    bsfl %ebx, %ebx
-; SSSE3-NEXT:    cmovel %eax, %ebx
-; SSSE3-NEXT:    cmpl $32, %ebx
-; SSSE3-NEXT:    cmovel %ecx, %ebx
-; SSSE3-NEXT:    movd %ebx, %xmm1
-; SSSE3-NEXT:    bsfl %esi, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    bsfl %edx, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm3
-; SSSE3-NEXT:    movzbl -14(%rsp), %edx
-; SSSE3-NEXT:    movzbl -15(%rsp), %esi
-; SSSE3-NEXT:    bsfl %esi, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %r14d, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm0
-; SSSE3-NEXT:    bsfl %edi, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %r11d, %esi
-; SSSE3-NEXT:    cmovel %eax, %esi
-; SSSE3-NEXT:    cmpl $32, %esi
-; SSSE3-NEXT:    cmovel %ecx, %esi
-; SSSE3-NEXT:    movd %esi, %xmm0
-; SSSE3-NEXT:    bsfl %edx, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
-; SSSE3-NEXT:    bsfl %r9d, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    bsfl %r10d, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %r8d, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm4
-; SSSE3-NEXT:    movzbl -16(%rsp), %edx
-; SSSE3-NEXT:    bsfl %edx, %edx
-; SSSE3-NEXT:    cmovel %eax, %edx
-; SSSE3-NEXT:    cmpl $32, %edx
-; SSSE3-NEXT:    cmovel %ecx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT:    popq %rbx
-; SSSE3-NEXT:    popq %r14
-; SSSE3-NEXT:    popq %rbp
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    psubb %xmm0, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm3
+; SSSE3-NEXT:    pand %xmm2, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm4
+; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    psrlw $4, %xmm1
+; SSSE3-NEXT:    pand %xmm2, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    paddb %xmm4, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv16i8:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrb $1, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %edx
-; SSE41-NEXT:    movl $32, %eax
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    movl $8, %ecx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pextrb $0, %xmm0, %esi
-; SSE41-NEXT:    bsfl %esi, %esi
-; SSE41-NEXT:    cmovel %eax, %esi
-; SSE41-NEXT:    cmpl $32, %esi
-; SSE41-NEXT:    cmovel %ecx, %esi
-; SSE41-NEXT:    movd %esi, %xmm1
-; SSE41-NEXT:    pinsrb $1, %edx, %xmm1
-; SSE41-NEXT:    pextrb $2, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $2, %edx, %xmm1
-; SSE41-NEXT:    pextrb $3, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $3, %edx, %xmm1
-; SSE41-NEXT:    pextrb $4, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $4, %edx, %xmm1
-; SSE41-NEXT:    pextrb $5, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $5, %edx, %xmm1
-; SSE41-NEXT:    pextrb $6, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $6, %edx, %xmm1
-; SSE41-NEXT:    pextrb $7, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $7, %edx, %xmm1
-; SSE41-NEXT:    pextrb $8, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $8, %edx, %xmm1
-; SSE41-NEXT:    pextrb $9, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $9, %edx, %xmm1
-; SSE41-NEXT:    pextrb $10, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $10, %edx, %xmm1
-; SSE41-NEXT:    pextrb $11, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $11, %edx, %xmm1
-; SSE41-NEXT:    pextrb $12, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $12, %edx, %xmm1
-; SSE41-NEXT:    pextrb $13, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $13, %edx, %xmm1
-; SSE41-NEXT:    pextrb $14, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $14, %edx, %xmm1
-; SSE41-NEXT:    pextrb $15, %xmm0, %edx
-; SSE41-NEXT:    bsfl %edx, %edx
-; SSE41-NEXT:    cmovel %eax, %edx
-; SSE41-NEXT:    cmpl $32, %edx
-; SSE41-NEXT:    cmovel %ecx, %edx
-; SSE41-NEXT:    pinsrb $15, %edx, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    psubb %xmm0, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    psrlw $4, %xmm1
+; SSE41-NEXT:    pand %xmm2, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    paddb %xmm4, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: testv16i8:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %edx
-; AVX-NEXT:    movl $32, %eax
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    movl $8, %ecx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpextrb $0, %xmm0, %esi
-; AVX-NEXT:    bsfl %esi, %esi
-; AVX-NEXT:    cmovel %eax, %esi
-; AVX-NEXT:    cmpl $32, %esi
-; AVX-NEXT:    cmovel %ecx, %esi
-; AVX-NEXT:    vmovd %esi, %xmm1
-; AVX-NEXT:    vpinsrb $1, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $2, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $3, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $3, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $4, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $5, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $5, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $6, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $7, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $7, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $8, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $8, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $9, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $9, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $10, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $11, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $11, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $12, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $13, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $13, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $14, %edx, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $15, %xmm0, %edx
-; AVX-NEXT:    bsfl %edx, %edx
-; AVX-NEXT:    cmovel %eax, %edx
-; AVX-NEXT:    cmpl $32, %edx
-; AVX-NEXT:    cmovel %ecx, %edx
-; AVX-NEXT:    vpinsrb $15, %edx, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpsubb %xmm0, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 0)
   ret <16 x i8> %out
 }
 
-define <16 x i8> @testv16i8u(<16 x i8> %in) {
+define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind {
 ; SSE2-LABEL: testv16i8u:
 ; SSE2:       # BB#0:
-; SSE2:         pushq %rbx
-; SSE2:         movaps %xmm0, -16(%rsp)
-; SSE2-NEXT:    movzbl -1(%rsp), %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzbl -2(%rsp), %r11d
-; SSE2-NEXT:    movzbl -3(%rsp), %eax
-; SSE2-NEXT:    movzbl -4(%rsp), %r9d
-; SSE2-NEXT:    movzbl -5(%rsp), %edi
-; SSE2-NEXT:    movzbl -6(%rsp), %r10d
-; SSE2-NEXT:    movzbl -7(%rsp), %ecx
-; SSE2-NEXT:    movzbl -8(%rsp), %r8d
-; SSE2-NEXT:    movzbl -9(%rsp), %edx
-; SSE2-NEXT:    bsfl %edx, %edx
-; SSE2-NEXT:    movd %edx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    bsfl %edi, %edx
-; SSE2-NEXT:    movd %edx, %xmm0
-; SSE2-NEXT:    movzbl -10(%rsp), %edx
-; SSE2-NEXT:    movzbl -11(%rsp), %esi
-; SSE2-NEXT:    movzbl -12(%rsp), %edi
-; SSE2-NEXT:    movzbl -13(%rsp), %ebx
-; SSE2-NEXT:    bsfl %ebx, %ebx
-; SSE2-NEXT:    movd %ebx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    bsfl %esi, %eax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklbw %xmm0, %xmm3    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    bsfl %ecx, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movzbl -14(%rsp), %eax
-; SSE2-NEXT:    movzbl -15(%rsp), %ecx
-; SSE2-NEXT:    bsfl %ecx, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm1
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    bsfl %r11d, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    bsfl %edx, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    bsfl %r10d, %ecx
-; SSE2-NEXT:    movd %ecx, %xmm0
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE2-NEXT:    bsfl %r9d, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    bsfl %edi, %eax
-; SSE2-NEXT:    movd %eax, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    bsfl %r8d, %eax
-; SSE2-NEXT:    movd %eax, %xmm4
-; SSE2-NEXT:    movzbl -16(%rsp), %eax
-; SSE2-NEXT:    bsfl %eax, %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    popq %rbx
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE2-NEXT:    psubb %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm0, %xmm2
+; SSE2-NEXT:    psrlw $2, %xmm1
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    paddb %xmm2, %xmm1
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    psrlw $4, %xmm0
+; SSE2-NEXT:    paddb %xmm1, %xmm0
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE3-LABEL: testv16i8u:
 ; SSE3:       # BB#0:
-; SSE3:         pushq %rbx
-; SSE3:         movaps %xmm0, -16(%rsp)
-; SSE3-NEXT:    movzbl -1(%rsp), %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    movzbl -2(%rsp), %r11d
-; SSE3-NEXT:    movzbl -3(%rsp), %eax
-; SSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSE3-NEXT:    movzbl -5(%rsp), %edi
-; SSE3-NEXT:    movzbl -6(%rsp), %r10d
-; SSE3-NEXT:    movzbl -7(%rsp), %ecx
-; SSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSE3-NEXT:    movzbl -9(%rsp), %edx
-; SSE3-NEXT:    bsfl %edx, %edx
-; SSE3-NEXT:    movd %edx, %xmm1
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT:    bsfl %edi, %edx
-; SSE3-NEXT:    movd %edx, %xmm0
-; SSE3-NEXT:    movzbl -10(%rsp), %edx
-; SSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSE3-NEXT:    movzbl -12(%rsp), %edi
-; SSE3-NEXT:    movzbl -13(%rsp), %ebx
-; SSE3-NEXT:    bsfl %ebx, %ebx
-; SSE3-NEXT:    movd %ebx, %xmm2
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    bsfl %esi, %eax
-; SSE3-NEXT:    movd %eax, %xmm3
-; SSE3-NEXT:    punpcklbw %xmm0, %xmm3    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT:    bsfl %ecx, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    movzbl -14(%rsp), %eax
-; SSE3-NEXT:    movzbl -15(%rsp), %ecx
-; SSE3-NEXT:    bsfl %ecx, %ecx
-; SSE3-NEXT:    movd %ecx, %xmm1
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE3-NEXT:    bsfl %r11d, %ecx
-; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    bsfl %edx, %ecx
-; SSE3-NEXT:    movd %ecx, %xmm2
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT:    bsfl %r10d, %ecx
-; SSE3-NEXT:    movd %ecx, %xmm0
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm3
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSE3-NEXT:    bsfl %r9d, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    bsfl %edi, %eax
-; SSE3-NEXT:    movd %eax, %xmm2
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE3-NEXT:    bsfl %r8d, %eax
-; SSE3-NEXT:    movd %eax, %xmm4
-; SSE3-NEXT:    movzbl -16(%rsp), %eax
-; SSE3-NEXT:    bsfl %eax, %eax
-; SSE3-NEXT:    movd %eax, %xmm0
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE3-NEXT:    popq %rbx
+; SSE3-NEXT:    pxor %xmm1, %xmm1
+; SSE3-NEXT:    psubb %xmm0, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
+; SSE3-NEXT:    psubb %xmm0, %xmm1
+; SSE3-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; SSE3-NEXT:    movdqa %xmm1, %xmm2
+; SSE3-NEXT:    pand %xmm0, %xmm2
+; SSE3-NEXT:    psrlw $2, %xmm1
+; SSE3-NEXT:    pand %xmm0, %xmm1
+; SSE3-NEXT:    paddb %xmm2, %xmm1
+; SSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSE3-NEXT:    psrlw $4, %xmm0
+; SSE3-NEXT:    paddb %xmm1, %xmm0
+; SSE3-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE3-NEXT:    retq
 ;
 ; SSSE3-LABEL: testv16i8u:
 ; SSSE3:       # BB#0:
-; SSSE3:         pushq %rbx
-; SSSE3:         movaps %xmm0, -16(%rsp)
-; SSSE3-NEXT:    movzbl -1(%rsp), %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzbl -2(%rsp), %r11d
-; SSSE3-NEXT:    movzbl -3(%rsp), %eax
-; SSSE3-NEXT:    movzbl -4(%rsp), %r9d
-; SSSE3-NEXT:    movzbl -5(%rsp), %edi
-; SSSE3-NEXT:    movzbl -6(%rsp), %r10d
-; SSSE3-NEXT:    movzbl -7(%rsp), %ecx
-; SSSE3-NEXT:    movzbl -8(%rsp), %r8d
-; SSSE3-NEXT:    movzbl -9(%rsp), %edx
-; SSSE3-NEXT:    bsfl %edx, %edx
-; SSSE3-NEXT:    movd %edx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %edi, %edx
-; SSSE3-NEXT:    movd %edx, %xmm0
-; SSSE3-NEXT:    movzbl -10(%rsp), %edx
-; SSSE3-NEXT:    movzbl -11(%rsp), %esi
-; SSSE3-NEXT:    movzbl -12(%rsp), %edi
-; SSSE3-NEXT:    movzbl -13(%rsp), %ebx
-; SSSE3-NEXT:    bsfl %ebx, %ebx
-; SSSE3-NEXT:    movd %ebx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    bsfl %esi, %eax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklbw %xmm0, %xmm3    # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %ecx, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    movzbl -14(%rsp), %eax
-; SSSE3-NEXT:    movzbl -15(%rsp), %ecx
-; SSSE3-NEXT:    bsfl %ecx, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm1
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSSE3-NEXT:    bsfl %r11d, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    bsfl %edx, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %r10d, %ecx
-; SSSE3-NEXT:    movd %ecx, %xmm0
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm3
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
-; SSSE3-NEXT:    bsfl %r9d, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    bsfl %edi, %eax
-; SSSE3-NEXT:    movd %eax, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSSE3-NEXT:    bsfl %r8d, %eax
-; SSSE3-NEXT:    movd %eax, %xmm4
-; SSSE3-NEXT:    movzbl -16(%rsp), %eax
-; SSSE3-NEXT:    bsfl %eax, %eax
-; SSSE3-NEXT:    movd %eax, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSSE3-NEXT:    popq %rbx
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    psubb %xmm0, %xmm1
+; SSSE3-NEXT:    pand %xmm0, %xmm1
+; SSSE3-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm3
+; SSSE3-NEXT:    pand %xmm2, %xmm3
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSSE3-NEXT:    movdqa %xmm0, %xmm4
+; SSSE3-NEXT:    pshufb %xmm3, %xmm4
+; SSSE3-NEXT:    psrlw $4, %xmm1
+; SSSE3-NEXT:    pand %xmm2, %xmm1
+; SSSE3-NEXT:    pshufb %xmm1, %xmm0
+; SSSE3-NEXT:    paddb %xmm4, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: testv16i8u:
 ; SSE41:       # BB#0:
-; SSE41-NEXT:    pextrb $1, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pextrb $0, %xmm0, %ecx
-; SSE41-NEXT:    bsfl %ecx, %ecx
-; SSE41-NEXT:    movd %ecx, %xmm1
-; SSE41-NEXT:    pinsrb $1, %eax, %xmm1
-; SSE41-NEXT:    pextrb $2, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $2, %eax, %xmm1
-; SSE41-NEXT:    pextrb $3, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $3, %eax, %xmm1
-; SSE41-NEXT:    pextrb $4, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $4, %eax, %xmm1
-; SSE41-NEXT:    pextrb $5, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $5, %eax, %xmm1
-; SSE41-NEXT:    pextrb $6, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $6, %eax, %xmm1
-; SSE41-NEXT:    pextrb $7, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $7, %eax, %xmm1
-; SSE41-NEXT:    pextrb $8, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $8, %eax, %xmm1
-; SSE41-NEXT:    pextrb $9, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $9, %eax, %xmm1
-; SSE41-NEXT:    pextrb $10, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $10, %eax, %xmm1
-; SSE41-NEXT:    pextrb $11, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $11, %eax, %xmm1
-; SSE41-NEXT:    pextrb $12, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $12, %eax, %xmm1
-; SSE41-NEXT:    pextrb $13, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $13, %eax, %xmm1
-; SSE41-NEXT:    pextrb $14, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $14, %eax, %xmm1
-; SSE41-NEXT:    pextrb $15, %xmm0, %eax
-; SSE41-NEXT:    bsfl %eax, %eax
-; SSE41-NEXT:    pinsrb $15, %eax, %xmm1
-; SSE41-NEXT:    movdqa %xmm1, %xmm0
+; SSE41-NEXT:    pxor %xmm1, %xmm1
+; SSE41-NEXT:    psubb %xmm0, %xmm1
+; SSE41-NEXT:    pand %xmm0, %xmm1
+; SSE41-NEXT:    psubb {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; SSE41-NEXT:    movdqa %xmm1, %xmm3
+; SSE41-NEXT:    pand %xmm2, %xmm3
+; SSE41-NEXT:    movdqa {{.*#+}} xmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; SSE41-NEXT:    movdqa %xmm0, %xmm4
+; SSE41-NEXT:    pshufb %xmm3, %xmm4
+; SSE41-NEXT:    psrlw $4, %xmm1
+; SSE41-NEXT:    pand %xmm2, %xmm1
+; SSE41-NEXT:    pshufb %xmm1, %xmm0
+; SSE41-NEXT:    paddb %xmm4, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: testv16i8u:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX-NEXT:    bsfl %ecx, %ecx
-; AVX-NEXT:    vmovd %ecx, %xmm1
-; AVX-NEXT:    vpinsrb $1, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $2, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $3, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $4, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $5, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $6, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $7, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $8, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $9, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $10, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $11, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $12, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $13, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $14, %eax, %xmm1, %xmm1
-; AVX-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX-NEXT:    bsfl %eax, %eax
-; AVX-NEXT:    vpinsrb $15, %eax, %xmm1, %xmm0
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpsubb %xmm0, %xmm1, %xmm1
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
   %out = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %in, i1 -1)
   ret <16 x i8> %out
 }
 
-define <2 x i64> @foldv2i64() {
+define <2 x i64> @foldv2i64() nounwind {
 ; SSE-LABEL: foldv2i64:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movl $8, %eax
@@ -1682,7 +915,7 @@ define <2 x i64> @foldv2i64() {
   ret <2 x i64> %out
 }
 
-define <2 x i64> @foldv2i64u() {
+define <2 x i64> @foldv2i64u() nounwind {
 ; SSE-LABEL: foldv2i64u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movl $8, %eax
@@ -1698,7 +931,7 @@ define <2 x i64> @foldv2i64u() {
   ret <2 x i64> %out
 }
 
-define <4 x i32> @foldv4i32() {
+define <4 x i32> @foldv4i32() nounwind {
 ; SSE-LABEL: foldv4i32:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1712,7 +945,7 @@ define <4 x i32> @foldv4i32() {
   ret <4 x i32> %out
 }
 
-define <4 x i32> @foldv4i32u() {
+define <4 x i32> @foldv4i32u() nounwind {
 ; SSE-LABEL: foldv4i32u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1726,7 +959,7 @@ define <4 x i32> @foldv4i32u() {
   ret <4 x i32> %out
 }
 
-define <8 x i16> @foldv8i16() {
+define <8 x i16> @foldv8i16() nounwind {
 ; SSE-LABEL: foldv8i16:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1740,7 +973,7 @@ define <8 x i16> @foldv8i16() {
   ret <8 x i16> %out
 }
 
-define <8 x i16> @foldv8i16u() {
+define <8 x i16> @foldv8i16u() nounwind {
 ; SSE-LABEL: foldv8i16u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1754,7 +987,7 @@ define <8 x i16> @foldv8i16u() {
   ret <8 x i16> %out
 }
 
-define <16 x i8> @foldv16i8() {
+define <16 x i8> @foldv16i8() nounwind {
 ; SSE-LABEL: foldv16i8:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
@@ -1768,7 +1001,7 @@ define <16 x i8> @foldv16i8() {
   ret <16 x i8> %out
 }
 
-define <16 x i8> @foldv16i8u() {
+define <16 x i8> @foldv16i8u() nounwind {
 ; SSE-LABEL: foldv16i8u:
 ; SSE:       # BB#0:
 ; SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
diff --git a/test/CodeGen/X86/vector-tzcnt-256.ll b/test/CodeGen/X86/vector-tzcnt-256.ll
index 8f744f79f85f..a9ee27cc51f0 100644
--- a/test/CodeGen/X86/vector-tzcnt-256.ll
+++ b/test/CodeGen/X86/vector-tzcnt-256.ll
@@ -1,1190 +1,525 @@
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 
-target triple = "x86_64-unknown-unknown"
-
-define <4 x i64> @testv4i64(<4 x i64> %in) {
+define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    movl $64, %ecx
-; AVX1-NEXT:    cmoveq %rcx, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm1, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    cmoveq %rcx, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    cmoveq %rcx, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    cmoveq %rcx, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,1]
+; AVX1-NEXT:    vpsubq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm6, %xmm1
+; AVX1-NEXT:    vpaddb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpsadbw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsadbw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv4i64:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    movl $64, %ecx
-; AVX2-NEXT:    cmoveq %rcx, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm1, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    cmoveq %rcx, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm1
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    cmoveq %rcx, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    cmoveq %rcx, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 0)
   ret <4 x i64> %out
 }
 
-define <4 x i64> @testv4i64u(<4 x i64> %in) {
+define <4 x i64> @testv4i64u(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm1, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm1
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX1-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm2
-; AVX1-NEXT:    vmovq %xmm0, %rax
-; AVX1-NEXT:    bsfq %rax, %rax
-; AVX1-NEXT:    vmovq %rax, %xmm0
-; AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubq %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubq %xmm0, %xmm2, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm3, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,1]
+; AVX1-NEXT:    vpsubq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm6, %xmm1
+; AVX1-NEXT:    vpaddb %xmm5, %xmm1, %xmm1
+; AVX1-NEXT:    vpsadbw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubq %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsadbw %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv4i64u:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrq $1, %xmm1, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm1, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm1
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm2
-; AVX2-NEXT:    vmovq %xmm0, %rax
-; AVX2-NEXT:    bsfq %rax, %rax
-; AVX2-NEXT:    vmovq %rax, %xmm0
-; AVX2-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %in, i1 -1)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @testv8i32(<8 x i32> %in) {
+define <8 x i32> @testv8i32(<8 x i32> %in) nounwind {
 ; AVX1-LABEL: testv8i32:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %ecx
-; AVX1-NEXT:    movl $32, %eax
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vmovd %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    vmovd %edx, %xmm2
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $2, %xmm1, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $3, %xmm1, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrd $1, %xmm0, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vmovd %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    vmovd %edx, %xmm2
-; AVX1-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $2, %xmm0, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $3, %xmm0, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    cmovel %eax, %ecx
-; AVX1-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
+; AVX1-NEXT:    vpsubd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpaddb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm5, %xmm5
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv8i32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %ecx
-; AVX2-NEXT:    movl $32, %eax
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vmovd %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    vmovd %edx, %xmm2
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $2, %xmm1, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $3, %xmm1, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrd $1, %xmm0, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vmovd %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    vmovd %edx, %xmm2
-; AVX2-NEXT:    vpinsrd $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $2, %xmm0, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vpinsrd $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $3, %xmm0, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    cmovel %eax, %ecx
-; AVX2-NEXT:    vpinsrd $3, %ecx, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 0)
   ret <8 x i32> %out
 }
 
-define <8 x i32> @testv8i32u(<8 x i32> %in) {
+define <8 x i32> @testv8i32u(<8 x i32> %in) nounwind {
 ; AVX1-LABEL: testv8i32u:
 ; AVX1:       # BB#0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrd $1, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $2, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $3, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrd $1, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $2, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubd %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpsubd %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,1,1,1]
+; AVX1-NEXT:    vpsubd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm5
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm6, %xmm2
+; AVX1-NEXT:    vpaddb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm5 = xmm2[2],xmm1[2],xmm2[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm5, %xmm5
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm5, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubd %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm3
+; AVX1-NEXT:    vpshufb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm6, %xmm0
+; AVX1-NEXT:    vpaddb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm3, %xmm3
+; AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv8i32u:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrd $1, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vmovd %xmm1, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $2, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $3, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrd $1, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vmovd %xmm0, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $2, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrd $3, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubd %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpsubd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX2-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
+; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %in, i1 -1)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @testv16i16(<16 x i16> %in) {
+define <16 x i16> @testv16i16(<16 x i16> %in) nounwind {
 ; AVX1-LABEL: testv16i16:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %cx
-; AVX1-NEXT:    movw $16, %ax
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vmovd %xmm1, %edx
-; AVX1-NEXT:    bsfw %dx, %dx
-; AVX1-NEXT:    cmovew %ax, %dx
-; AVX1-NEXT:    vmovd %edx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vmovd %xmm0, %edx
-; AVX1-NEXT:    bsfw %dx, %dx
-; AVX1-NEXT:    cmovew %ax, %dx
-; AVX1-NEXT:    vmovd %edx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    cmovew %ax, %cx
-; AVX1-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $8, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv16i16:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %cx
-; AVX2-NEXT:    movw $16, %ax
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vmovd %xmm1, %edx
-; AVX2-NEXT:    bsfw %dx, %dx
-; AVX2-NEXT:    cmovew %ax, %dx
-; AVX2-NEXT:    vmovd %edx, %xmm2
-; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $2, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $3, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $4, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $5, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $6, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $7, %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrw $1, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vmovd %xmm0, %edx
-; AVX2-NEXT:    bsfw %dx, %dx
-; AVX2-NEXT:    cmovew %ax, %dx
-; AVX2-NEXT:    vmovd %edx, %xmm2
-; AVX2-NEXT:    vpinsrw $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $2, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $2, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $3, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $3, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $4, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $4, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $5, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $5, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $6, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $6, %ecx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $7, %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    cmovew %ax, %cx
-; AVX2-NEXT:    vpinsrw $7, %ecx, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubw %ymm0, %ymm1, %ymm1
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllw $8, %ymm0, %ymm1
+; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 0)
   ret <16 x i16> %out
 }
 
-define <16 x i16> @testv16i16u(<16 x i16> %in) {
+define <16 x i16> @testv16i16u(<16 x i16> %in) nounwind {
 ; AVX1-LABEL: testv16i16u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vmovd %xmm1, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vmovd %xmm0, %ecx
-; AVX1-NEXT:    bsfw %cx, %cx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX1-NEXT:    bsfw %ax, %ax
-; AVX1-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubw %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubw %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpsrlw $4, %xmm2, %xmm2
+; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsllw $8, %xmm2, %xmm4
+; AVX1-NEXT:    vpaddb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv16i16u:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrw $1, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vmovd %xmm1, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $2, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $3, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $4, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $5, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $6, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $7, %xmm1, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrw $1, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vmovd %xmm0, %ecx
-; AVX2-NEXT:    bsfw %cx, %cx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrw $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $2, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $3, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $4, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $5, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $6, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrw $7, %xmm0, %eax
-; AVX2-NEXT:    bsfw %ax, %ax
-; AVX2-NEXT:    vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubw %ymm0, %ymm1, %ymm1
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpsllw $8, %ymm0, %ymm1
+; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %in, i1 -1)
   ret <16 x i16> %out
 }
 
-define <32 x i8> @testv32i8(<32 x i8> %in) {
+define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
 ; AVX1-LABEL: testv32i8:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %edx
-; AVX1-NEXT:    movl $32, %eax
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    movl $8, %ecx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpextrb $0, %xmm1, %esi
-; AVX1-NEXT:    bsfl %esi, %esi
-; AVX1-NEXT:    cmovel %eax, %esi
-; AVX1-NEXT:    cmpl $32, %esi
-; AVX1-NEXT:    cmovel %ecx, %esi
-; AVX1-NEXT:    vmovd %esi, %xmm2
-; AVX1-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $3, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $5, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $7, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $8, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $9, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $11, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $13, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $15, %xmm1, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $1, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpextrb $0, %xmm0, %esi
-; AVX1-NEXT:    bsfl %esi, %esi
-; AVX1-NEXT:    cmovel %eax, %esi
-; AVX1-NEXT:    cmpl $32, %esi
-; AVX1-NEXT:    cmovel %ecx, %esi
-; AVX1-NEXT:    vmovd %esi, %xmm2
-; AVX1-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $3, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $5, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $7, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $8, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $9, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $11, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $13, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $15, %xmm0, %edx
-; AVX1-NEXT:    bsfl %edx, %edx
-; AVX1-NEXT:    cmovel %eax, %edx
-; AVX1-NEXT:    cmpl $32, %edx
-; AVX1-NEXT:    cmovel %ecx, %edx
-; AVX1-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT:    vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv32i8:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %edx
-; AVX2-NEXT:    movl $32, %eax
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    movl $8, %ecx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpextrb $0, %xmm1, %esi
-; AVX2-NEXT:    bsfl %esi, %esi
-; AVX2-NEXT:    cmovel %eax, %esi
-; AVX2-NEXT:    cmpl $32, %esi
-; AVX2-NEXT:    cmovel %ecx, %esi
-; AVX2-NEXT:    vmovd %esi, %xmm2
-; AVX2-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $2, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $3, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $4, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $5, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $6, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $7, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $8, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $9, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $10, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $11, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $12, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $13, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $14, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $15, %xmm1, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrb $1, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpextrb $0, %xmm0, %esi
-; AVX2-NEXT:    bsfl %esi, %esi
-; AVX2-NEXT:    cmovel %eax, %esi
-; AVX2-NEXT:    cmpl $32, %esi
-; AVX2-NEXT:    cmovel %ecx, %esi
-; AVX2-NEXT:    vmovd %esi, %xmm2
-; AVX2-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $2, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $2, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $3, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $3, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $4, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $4, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $5, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $5, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $6, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $6, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $7, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $7, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $8, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $8, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $9, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $9, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $10, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $10, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $11, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $11, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $12, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $12, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $13, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $13, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $14, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $14, %edx, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $15, %xmm0, %edx
-; AVX2-NEXT:    bsfl %edx, %edx
-; AVX2-NEXT:    cmovel %eax, %edx
-; AVX2-NEXT:    cmpl $32, %edx
-; AVX2-NEXT:    cmovel %ecx, %edx
-; AVX2-NEXT:    vpinsrb $15, %edx, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm0, %ymm1, %ymm1
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 0)
   ret <32 x i8> %out
 }
 
-define <32 x i8> @testv32i8u(<32 x i8> %in) {
+define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
 ; AVX1-LABEL: testv32i8u:
 ; AVX1:       # BB#0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpextrb $0, %xmm1, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX1-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX1-NEXT:    bsfl %ecx, %ecx
-; AVX1-NEXT:    vmovd %ecx, %xmm2
-; AVX1-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX1-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX1-NEXT:    bsfl %eax, %eax
-; AVX1-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubb %xmm0, %xmm2, %xmm2
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpsrlw $4, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpshufb %xmm1, %xmm5, %xmm1
+; AVX1-NEXT:    vpaddb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm2
+; AVX1-NEXT:    vpshufb %xmm2, %xmm5, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: testv32i8u:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT:    vpextrb $1, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpextrb $0, %xmm1, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $2, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $3, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $4, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $5, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $6, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $7, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $8, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $9, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $10, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $11, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $12, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $13, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $14, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $15, %xmm1, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm1
-; AVX2-NEXT:    vpextrb $1, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpextrb $0, %xmm0, %ecx
-; AVX2-NEXT:    bsfl %ecx, %ecx
-; AVX2-NEXT:    vmovd %ecx, %xmm2
-; AVX2-NEXT:    vpinsrb $1, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $2, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $2, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $3, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $3, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $4, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $4, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $5, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $5, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $6, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $6, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $7, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $7, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $8, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $8, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $9, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $9, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $10, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $10, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $11, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $11, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $12, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $12, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $13, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $13, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $14, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $14, %eax, %xmm2, %xmm2
-; AVX2-NEXT:    vpextrb $15, %xmm0, %eax
-; AVX2-NEXT:    bsfl %eax, %eax
-; AVX2-NEXT:    vpinsrb $15, %eax, %xmm2, %xmm0
-; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm0, %ymm1, %ymm1
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX2-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %in, i1 -1)
   ret <32 x i8> %out
 }
 
-define <4 x i64> @foldv4i64() {
-; AVX-LABEL: foldv4i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX-NEXT:    retq
+define <4 x i64> @foldv4i64() nounwind {
+; ALL-LABEL: foldv4i64:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT:    retq
   %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
   ret <4 x i64> %out
 }
 
-define <4 x i64> @foldv4i64u() {
-; AVX-LABEL: foldv4i64u:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,64,0]
-; AVX-NEXT:    retq
+define <4 x i64> @foldv4i64u() nounwind {
+; ALL-LABEL: foldv4i64u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,64,0]
+; ALL-NEXT:    retq
   %out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
   ret <4 x i64> %out
 }
 
-define <8 x i32> @foldv8i32() {
-; AVX-LABEL: foldv8i32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX-NEXT:    retq
+define <8 x i32> @foldv8i32() nounwind {
+; ALL-LABEL: foldv8i32:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT:    retq
   %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
   ret <8 x i32> %out
 }
 
-define <8 x i32> @foldv8i32u() {
-; AVX-LABEL: foldv8i32u:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
-; AVX-NEXT:    retq
+define <8 x i32> @foldv8i32u() nounwind {
+; ALL-LABEL: foldv8i32u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
+; ALL-NEXT:    retq
   %out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
   ret <8 x i32> %out
 }
 
-define <16 x i16> @foldv16i16() {
-; AVX-LABEL: foldv16i16:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX-NEXT:    retq
+define <16 x i16> @foldv16i16() nounwind {
+; ALL-LABEL: foldv16i16:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT:    retq
   %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
   ret <16 x i16> %out
 }
 
-define <16 x i16> @foldv16i16u() {
-; AVX-LABEL: foldv16i16u:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
-; AVX-NEXT:    retq
+define <16 x i16> @foldv16i16u() nounwind {
+; ALL-LABEL: foldv16i16u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
+; ALL-NEXT:    retq
   %out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
   ret <16 x i16> %out
 }
 
-define <32 x i8> @foldv32i8() {
-; AVX-LABEL: foldv32i8:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX-NEXT:    retq
+define <32 x i8> @foldv32i8() nounwind {
+; ALL-LABEL: foldv32i8:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT:    retq
   %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
   ret <32 x i8> %out
 }
 
-define <32 x i8> @foldv32i8u() {
-; AVX-LABEL: foldv32i8u:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
-; AVX-NEXT:    retq
+define <32 x i8> @foldv32i8u() nounwind {
+; ALL-LABEL: foldv32i8u:
+; ALL:       # BB#0:
+; ALL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
+; ALL-NEXT:    retq
   %out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
   ret <32 x i8> %out
 }
diff --git a/test/CodeGen/X86/vector-tzcnt-512.ll b/test/CodeGen/X86/vector-tzcnt-512.ll
new file mode 100644
index 000000000000..9265fad0176c
--- /dev/null
+++ b/test/CodeGen/X86/vector-tzcnt-512.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+
+define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT:    vpextrq $1, %xmm1, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vmovq %xmm1, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm1
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT:    vpextrq $1, %xmm2, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm2, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT:    vpextrq $1, %xmm2, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm2, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm2
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; ALL-NEXT:    vpextrq $1, %xmm0, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm3
+; ALL-NEXT:    vmovq %xmm0, %rax
+; ALL-NEXT:    tzcntq %rax, %rax
+; ALL-NEXT:    vmovq %rax, %xmm0
+; ALL-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0)
+  ret <8 x i64> %out
+}
+
+define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind {
+; ALL-LABEL: testv8i64u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
+; ALL-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    vplzcntq %zmm0, %zmm0
+; ALL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm1
+; ALL-NEXT:    vpsubq %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1)
+  ret <8 x i64> %out
+}
+
+define <16 x i32> @testv16i32(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vextracti32x4 $3, %zmm0, %xmm1
+; ALL-NEXT:    vpextrd $1, %xmm1, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm1, %ecx
+; ALL-NEXT:    tzcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm2
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm2, %xmm2
+; ALL-NEXT:    vpextrd $2, %xmm1, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm2, %xmm2
+; ALL-NEXT:    vpextrd $3, %xmm1, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm2, %xmm1
+; ALL-NEXT:    vextracti32x4 $2, %zmm0, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm2, %ecx
+; ALL-NEXT:    tzcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; ALL-NEXT:    vextracti32x4 $1, %zmm0, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm2, %ecx
+; ALL-NEXT:    tzcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm2, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm2
+; ALL-NEXT:    vpextrd $1, %xmm0, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vmovd %xmm0, %ecx
+; ALL-NEXT:    tzcntl %ecx, %ecx
+; ALL-NEXT:    vmovd %ecx, %xmm3
+; ALL-NEXT:    vpinsrd $1, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $2, %xmm0, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $2, %eax, %xmm3, %xmm3
+; ALL-NEXT:    vpextrd $3, %xmm0, %eax
+; ALL-NEXT:    tzcntl %eax, %eax
+; ALL-NEXT:    vpinsrd $3, %eax, %xmm3, %xmm0
+; ALL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; ALL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 0)
+  ret <16 x i32> %out
+}
+
+define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind {
+; ALL-LABEL: testv16i32u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; ALL-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
+; ALL-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; ALL-NEXT:    vplzcntd %zmm0, %zmm0
+; ALL-NEXT:    vpbroadcastd {{.*}}(%rip), %zmm1
+; ALL-NEXT:    vpsubd %zmm0, %zmm1, %zmm0
+; ALL-NEXT:    retq
+  %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 -1)
+  ret <16 x i32> %out
+}
+
+define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
+; ALL-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT:    vpsllw $8, %ymm0, %ymm5
+; ALL-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
+; ALL-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0)
+  ret <32 x i16> %out
+}
+
+define <32 x i16> @testv32i16u(<32 x i16> %in) nounwind {
+; ALL-LABEL: testv32i16u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
+; ALL-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT:    vpsllw $8, %ymm0, %ymm5
+; ALL-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
+; ALL-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; ALL-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsllw $8, %ymm1, %ymm2
+; ALL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; ALL-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 -1)
+  ret <32 x i16> %out
+}
+
+define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
+; ALL-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0)
+  ret <64 x i8> %out
+}
+
+define <64 x i8> @testv64i8u(<64 x i8> %in) nounwind {
+; ALL-LABEL: testv64i8u:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; ALL-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
+; ALL-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; ALL-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; ALL-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; ALL-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; ALL-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; ALL-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; ALL-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; ALL-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
+; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; ALL-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; ALL-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; ALL-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; ALL-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; ALL-NEXT:    retq
+  %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 -1)
+  ret <64 x i8> %out
+}
+
+declare <8 x i64> @llvm.cttz.v8i64(<8 x i64>, i1)
+declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1)
+declare <32 x i16> @llvm.cttz.v32i16(<32 x i16>, i1)
+declare <64 x i8> @llvm.cttz.v64i8(<64 x i8>, i1)
diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll
index b119f5eb89f6..b8024203ab2f 100644
--- a/test/CodeGen/X86/vector-zext.ll
+++ b/test/CodeGen/X86/vector-zext.ll
@@ -1,8 +1,267 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_16i8_to_8i16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %C = zext <8 x i8> %B to <8 x i16>
+  ret <8 x i16> %C
+}
+
+; PR17654
+define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) {
+; SSE2-LABEL: zext_16i8_to_16i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_16i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_16i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_16i8_to_16i16:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_16i8_to_16i16:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    retq
+entry:
+  %B = zext <16 x i8> %A to <16 x i16>
+  ret <16 x i16> %B
+}
+
+define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_16i8_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = zext <4 x i8> %B to <4 x i32>
+  ret <4 x i32> %C
+}
+
+define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_16i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_16i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %C = zext <8 x i8> %B to <8 x i32>
+  ret <8 x i32> %C
+}
+
+define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_16i8_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+  %C = zext <2 x i8> %B to <2 x i64>
+  ret <2 x i64> %C
+}
+
+define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_16i8_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_16i8_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_16i8_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    psrld $16, %xmm0
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_16i8_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; AVX1-NEXT:    vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_16i8_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = zext <4 x i8> %B to <4 x i64>
+  ret <4 x i64> %C
+}
+
+define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i16_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i16_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_8i16_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = zext <4 x i16> %B to <4 x i32>
+  ret <4 x i32> %C
+}
 
 define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: zext_8i16_to_8i32:
@@ -10,8 +269,7 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: zext_8i16_to_8i32:
@@ -19,16 +277,15 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: zext_8i16_to_8i32:
 ; SSE41:       # BB#0: # %entry
 ; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
-; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: zext_8i16_to_8i32:
@@ -48,35 +305,139 @@ entry:
   ret <8 x i32>%B
 }
 
+define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i16_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i16_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_8i16_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %C = zext <2 x i16> %B to <2 x i64>
+  ret <2 x i64> %C
+}
+
+define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_8i16_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i16_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i16_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_8i16_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_8i16_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX2-NEXT:    vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15]
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %C = zext <4 x i16> %B to <4 x i64>
+  ret <4 x i64> %C
+}
+
+define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: zext_4i32_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_4i32_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_4i32_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: zext_4i32_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %C = zext <2 x i32> %B to <2 x i64>
+  ret <2 x i64> %C
+}
+
 define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp {
 ; SSE2-LABEL: zext_4i32_to_4i64:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSE2-NEXT:    pand %xmm3, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSE2-NEXT:    pand %xmm3, %xmm1
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: zext_4i32_to_4i64:
 ; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSSE3-NEXT:    pand %xmm3, %xmm2
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSSE3-NEXT:    pand %xmm3, %xmm1
-; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: zext_4i32_to_4i64:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [4294967295,4294967295]
-; SSE41-NEXT:    pand %xmm3, %xmm2
-; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
-; SSE41-NEXT:    pand %xmm3, %xmm1
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
+; SSE41-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: zext_4i32_to_4i64:
@@ -96,101 +457,190 @@ entry:
   ret <4 x i64>%B
 }
 
-define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
-; SSE2-LABEL: zext_8i8_to_8i32:
+define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_2i8_to_2i64:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE2-NEXT:    pand %xmm1, %xmm2
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movzwl (%rdi), %eax
+; SSE2-NEXT:    movd %eax, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: zext_8i8_to_8i32:
+; SSSE3-LABEL: load_zext_2i8_to_2i64:
 ; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movdqa %xmm0, %xmm2
-; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSSE3-NEXT:    pand %xmm1, %xmm2
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    pand %xmm0, %xmm1
-; SSSE3-NEXT:    movdqa %xmm2, %xmm0
+; SSSE3-NEXT:    movzwl (%rdi), %eax
+; SSSE3-NEXT:    movd %eax, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: zext_8i8_to_8i32:
+; SSE41-LABEL: load_zext_2i8_to_2i64:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [255,255,255,255]
-; SSE41-NEXT:    pand %xmm1, %xmm2
-; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE41-NEXT:    pand %xmm0, %xmm1
-; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: zext_8i8_to_8i32:
-; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
-; AVX1-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: zext_8i8_to_8i32:
-; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
-; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    retq
+; AVX-LABEL: load_zext_2i8_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
 entry:
-  %t = zext <8 x i8> %z to <8 x i32>
-  ret <8 x i32> %t
+ %X = load <2 x i8>, <2 x i8>* %ptr
+ %Y = zext <2 x i8> %X to <2 x i64>
+ ret <2 x i64> %Y
 }
 
-; PR17654
-define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %z) {
-; SSE2-LABEL: zext_16i8_to_16i16:
+define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_4i8_to_4i32:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    movdqa %xmm0, %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSE2-NEXT:    retq
 ;
-; SSSE3-LABEL: zext_16i8_to_16i16:
+; SSSE3-LABEL: load_zext_4i8_to_4i32:
 ; SSSE3:       # BB#0: # %entry
-; SSSE3-NEXT:    movdqa %xmm0, %xmm1
-; SSSE3-NEXT:    pxor %xmm2, %xmm2
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
 ; SSSE3-NEXT:    retq
 ;
-; SSE41-LABEL: zext_16i8_to_16i16:
+; SSE41-LABEL: load_zext_4i8_to_4i32:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: zext_16i8_to_16i16:
+; AVX-LABEL: load_zext_4i8_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = zext <4 x i8> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_4i8_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0]
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_4i8_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,zero,zero,zero,zero,xmm1[12],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_4i8_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_zext_4i8_to_4i64:
 ; AVX1:       # BB#0: # %entry
-; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: zext_16i8_to_16i16:
+; AVX2-LABEL: load_zext_4i8_to_4i64:
 ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero
 ; AVX2-NEXT:    retq
 entry:
-  %t = zext <16 x i8> %z to <16 x i16>
-  ret <16 x i16> %t
+ %X = load <4 x i8>, <4 x i8>* %ptr
+ %Y = zext <4 x i8> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
+define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_8i8_to_8i16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_8i8_to_8i16:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_8i8_to_8i16:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_zext_8i8_to_8i16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX-NEXT:    retq
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = zext <8 x i8> %X to <8 x i16>
+ ret <8 x i16> %Y
+}
+
+define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) {
+; SSE2-LABEL: load_zext_8i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_8i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[6],zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[14],zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_8i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_zext_8i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_zext_8i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
+; AVX2-NEXT:    retq
+entry:
+ %X = load <8 x i8>, <8 x i8>* %ptr
+ %Y = zext <8 x i8> %X to <8 x i32>
+ ret <8 x i32> %Y
 }
 
 define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
@@ -200,8 +650,7 @@ define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_zext_16i8_to_16i16:
@@ -210,8 +659,7 @@ define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) {
 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_zext_16i8_to_16i16:
@@ -237,6 +685,112 @@ entry:
  ret <16 x i16> %Y
 }
 
+define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_2i16_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_2i16_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_2i16_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_zext_2i16_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+ %X = load <2 x i16>, <2 x i16>* %ptr
+ %Y = zext <2 x i16> %X to <2 x i64>
+ ret <2 x i64> %Y
+}
+
+define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_4i16_to_4i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_4i16_to_4i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_4i16_to_4i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_zext_4i16_to_4i32:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; AVX-NEXT:    retq
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = zext <4 x i16> %X to <4 x i32>
+ ret <4 x i32> %Y
+}
+
+define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) {
+; SSE2-LABEL: load_zext_4i16_to_4i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [65535,0,0,0,65535,0,0,0]
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_4i16_to_4i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,zero,zero,zero,zero,xmm0[4,5],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[8,9],zero,zero,zero,zero,zero,zero,xmm1[12,13],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_4i16_to_4i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: load_zext_4i16_to_4i64:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: load_zext_4i16_to_4i64:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX2-NEXT:    retq
+entry:
+ %X = load <4 x i16>, <4 x i16>* %ptr
+ %Y = zext <4 x i16> %X to <4 x i64>
+ ret <4 x i64> %Y
+}
+
 define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
 ; SSE2-LABEL: load_zext_8i16_to_8i32:
 ; SSE2:       # BB#0: # %entry
@@ -244,8 +798,7 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_zext_8i16_to_8i32:
@@ -254,8 +807,7 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_zext_8i16_to_8i32:
@@ -278,28 +830,56 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) {
 entry:
  %X = load <8 x i16>, <8 x i16>* %ptr
  %Y = zext <8 x i16> %X to <8 x i32>
- ret <8 x i32>%Y
+ ret <8 x i32> %Y
+}
+
+define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) {
+; SSE2-LABEL: load_zext_2i32_to_2i64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: load_zext_2i32_to_2i64:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: load_zext_2i32_to_2i64:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: load_zext_2i32_to_2i64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
+; AVX-NEXT:    retq
+entry:
+ %X = load <2 x i32>, <2 x i32>* %ptr
+ %Y = zext <2 x i32> %X to <2 x i64>
+ ret <2 x i64> %Y
 }
 
 define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
 ; SSE2-LABEL: load_zext_4i32_to_4i64:
 ; SSE2:       # BB#0: # %entry
 ; SSE2-NEXT:    movdqa (%rdi), %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSE2-NEXT:    pand %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: load_zext_4i32_to_4i64:
 ; SSSE3:       # BB#0: # %entry
 ; SSSE3-NEXT:    movdqa (%rdi), %xmm1
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [4294967295,4294967295]
-; SSSE3-NEXT:    pand %xmm2, %xmm0
-; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
-; SSSE3-NEXT:    pand %xmm2, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: load_zext_4i32_to_4i64:
@@ -322,7 +902,56 @@ define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) {
 entry:
  %X = load <4 x i32>, <4 x i32>* %ptr
  %Y = zext <4 x i32> %X to <4 x i64>
- ret <4 x i64>%Y
+ ret <4 x i64> %Y
+}
+
+define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) {
+; SSE2-LABEL: zext_8i8_to_8i32:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: zext_8i8_to_8i32:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: zext_8i8_to_8i32:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: zext_8i8_to_8i32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: zext_8i8_to_8i32:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    retq
+entry:
+  %t = zext <8 x i8> %z to <8 x i32>
+  ret <8 x i32> %t
 }
 
 define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp {
@@ -415,17 +1044,14 @@ entry:
 define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
 ; SSE2-LABEL: shuf_zext_8i8_to_8i32:
 ; SSE2:       # BB#0: # %entry
-; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
-; SSE2-NEXT:    packuswb %xmm0, %xmm0
-; SSE2-NEXT:    pxor %xmm1, %xmm1
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
-; SSE2-NEXT:    pandn %xmm0, %xmm1
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
+; SSE2-NEXT:    packuswb %xmm1, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: shuf_zext_8i8_to_8i32:
@@ -433,25 +1059,27 @@ define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) {
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 ; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm0
-; SSSE3-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
 ; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
-; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: shuf_zext_8i8_to_8i32:
 ; SSE41:       # BB#0: # %entry
-; SSE41-NEXT:    movdqa %xmm0, %xmm1
-; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
-; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
-; SSE41-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero
+; SSE41-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
 ; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: shuf_zext_8i8_to_8i32:
 ; AVX1:       # BB#0: # %entry
 ; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
 ; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -465,3 +1093,346 @@ entry:
   %Z = bitcast <32 x i8> %B to <8 x i32>
   ret <8 x i32> %Z
 }
+
+define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    psrlq $48, %xmm0
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; AVX-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  %Z = bitcast <16 x i8> %B to <2 x i64>
+  ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+  %Z = bitcast <32 x i8> %B to <4 x i64>
+  ret <4 x i64> %Z
+}
+
+define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSSE3-NEXT:    pxor %xmm1, %xmm1
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8>
+  %Z = bitcast <8 x i16> %B to <2 x i64>
+  ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; SSE41-NEXT:    pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; AVX1-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3]
+; AVX2-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8>
+  %Z = bitcast <16 x i16> %B to <4 x i64>
+  ret <4 x i64> %Z
+}
+
+define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE-LABEL: shuf_zext_8i16_to_4i32_offset1:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8>
+  %Z = bitcast <8 x i16> %B to <4 x i32>
+  ret <4 x i32> %Z
+}
+
+define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8>
+  %Z = bitcast <16 x i16> %B to <8 x i32>
+  ret <8 x i32> %Z
+}
+
+define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    pxor %xmm2, %xmm2
+; SSSE3-NEXT:    movdqa %xmm1, %xmm0
+; SSSE3-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
+; SSSE3-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3]
+; SSE41-NEXT:    pxor %xmm2, %xmm2
+; SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
+; SSE41-NEXT:    pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; SSE41-NEXT:    movdqa %xmm2, %xmm1
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
+; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7]
+; AVX1-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16>
+  %Z = bitcast <16 x i16> %B to <8 x i32>
+  ret <8 x i32> %Z
+}
+
+define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX-NEXT:    retq
+entry:
+  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4>
+  %Z = bitcast <4 x i32> %B to <2 x i64>
+  ret <2 x i64> %Z
+}
+
+define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp {
+; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm1
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE2-NEXT:    retq
+;
+; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSSE3:       # BB#0: # %entry
+; SSSE3-NEXT:    movdqa %xmm0, %xmm1
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [0,0,4294967295,0]
+; SSSE3-NEXT:    pand %xmm1, %xmm0
+; SSSE3-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSSE3-NEXT:    retq
+;
+; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; SSE41:       # BB#0: # %entry
+; SSE41-NEXT:    movdqa %xmm0, %xmm1
+; SSE41-NEXT:    pxor %xmm0, %xmm0
+; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
+; SSE41-NEXT:    psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vinsertps {{.*#+}} xmm1 = xmm0[3],zero,zero,zero
+; AVX1-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3]
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT:    retq
+entry:
+  %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4>
+  %Z = bitcast <8 x i32> %B to <4 x i64>
+  ret <4 x i64> %Z
+}
diff --git a/test/CodeGen/X86/vector-zmov.ll b/test/CodeGen/X86/vector-zmov.ll
index 298683559054..e378a3244b4e 100644
--- a/test/CodeGen/X86/vector-zmov.ll
+++ b/test/CodeGen/X86/vector-zmov.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
diff --git a/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
new file mode 100644
index 000000000000..2ff8c3a9028f
--- /dev/null
+++ b/test/CodeGen/X86/virtual-registers-cleared-in-machine-functions-liveins.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown -o /dev/null -stop-after machine-scheduler %s | FileCheck %s --check-prefix=PRE-RA
+; RUN: llc -mtriple=x86_64-unknown-unknown -o /dev/null -stop-after prologepilog %s | FileCheck %s --check-prefix=POST-RA
+
+; This test verifies that the virtual register references in machine function's
+; liveins are cleared after register allocation.
+
+define i32 @test(i32 %a, i32 %b) {
+body:
+  %c = mul i32 %a, %b
+  ret i32 %c
+}
+
+; PRE-RA: liveins:
+; PRE-RA-NEXT: - { reg: '%edi', virtual-reg: '%0' }
+; PRE-RA-NEXT: - { reg: '%esi', virtual-reg: '%1' }
+
+; POST-RA: liveins:
+; POST-RA-NEXT: - { reg: '%edi' }
+; POST-RA-NEXT: - { reg: '%esi' }
diff --git a/test/CodeGen/X86/vmovq.ll b/test/CodeGen/X86/vmovq.ll
new file mode 100644
index 000000000000..45d350c743e2
--- /dev/null
+++ b/test/CodeGen/X86/vmovq.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.1 | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx    | FileCheck %s --check-prefix=AVX
+
+define <2 x i64> @PR25554(<2 x i64> %v0, <2 x i64> %v1) {
+; SSE-LABEL: PR25554:
+; SSE:       # BB#0:
+; SSE-NEXT:    movl $1, %eax
+; SSE-NEXT:    movd %rax, %xmm1
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; SSE-NEXT:    paddq %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR25554:
+; AVX:       # BB#0:
+; AVX-NEXT:    movl $1, %eax
+; AVX-NEXT:    vmovq %rax, %xmm1
+; AVX-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
+; AVX-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+
+  %c1 = or <2 x i64> %v0, <i64 1, i64 0>
+  %c2 = add <2 x i64> %c1, <i64 0, i64 1>
+  ret <2 x i64> %c2
+}
+
diff --git a/test/CodeGen/X86/vselect-2.ll b/test/CodeGen/X86/vselect-2.ll
index fe4cfba08b8a..8e0f4a4ef447 100644
--- a/test/CodeGen/X86/vselect-2.ll
+++ b/test/CodeGen/X86/vselect-2.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE41
 
diff --git a/test/CodeGen/X86/vselect-avx.ll b/test/CodeGen/X86/vselect-avx.ll
index de04a097de02..002561042688 100644
--- a/test/CodeGen/X86/vselect-avx.ll
+++ b/test/CodeGen/X86/vselect-avx.ll
@@ -62,13 +62,15 @@ bb:
 
 ; CHECK-LABEL: test3:
 ; Compute the mask.
-;	CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
+; CHECK: vpcmpeqd {{%xmm[0-9]+}}, {{%xmm[0-9]+}}, [[MASK:%xmm[0-9]+]]
 ; Do not shrink the bit of the mask.
-; CHECK-NOT: vpslld	$31, [[MASK]], {{%xmm[0-9]+}}
+; CHECK-NOT: vpslld $31, [[MASK]], {{%xmm[0-9]+}}
 ; Use the mask in the blend.
-; CHECK-NEXT:	vblendvps	[[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
-; Use the mask in the and.
-; CHECK-NEXT: vpand LCPI2_2(%rip), [[MASK]], {{%xmm[0-9]+}} 
+; CHECK-NEXT: vblendvps [[MASK]], %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; Shuffle mask to truncate.
+; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; CHECK: vpshufb %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
+; CHECK: vpshufb %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}}
 ; CHECK: retq
 define void @test3(<4 x i32> %induction30, <4 x i16>* %tmp16, <4 x i16>* %tmp17,  <4 x i16> %tmp3, <4 x i16> %tmp12) {
   %tmp6 = srem <4 x i32> %induction30, <i32 3, i32 3, i32 3, i32 3>
diff --git a/test/CodeGen/X86/vselect-minmax.ll b/test/CodeGen/X86/vselect-minmax.ll
index 5ed687f50576..edf2a442918a 100644
--- a/test/CodeGen/X86/vselect-minmax.ll
+++ b/test/CodeGen/X86/vselect-minmax.ll
@@ -1,5578 +1,11060 @@
-; RUN: llc -march=x86-64 -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2
-; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
-; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
-; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
-; RUN: llc -march=x86-64 -mcpu=knl < %s | FileCheck %s  -check-prefix=AVX2 -check-prefix=AVX512F
-; RUN: llc -march=x86-64 -mcpu=skx < %s | FileCheck %s  -check-prefix=AVX512BW -check-prefix=AVX512VL -check-prefix=AVX512F
-
-define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=AVX --check-prefix=AVX512F --check-prefix=AVX512BW --check-prefix=AVX512VL
 
+define <16 x i8> @test1(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test1:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test1:
-; SSE4: pminsb
-
-; AVX1-LABEL: test1:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test1:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test1:
-; AVX512VL: vpminsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test1:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test2(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test2:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test2:
-; SSE4: pminsb
-
-; AVX1-LABEL: test2:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test2:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test2:
-; AVX512VL: vpminsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test2:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test3(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test3:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test3:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test3:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test3:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test3:
-; AVX512VL: vpmaxsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test3:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test4(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test4:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test4:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test4:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test4:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test4:
-; AVX512VL: vpmaxsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test4:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test5:
-; SSE2: pminub
-
-; AVX1-LABEL: test5:
-; AVX1: vpminub
-
-; AVX2-LABEL: test5:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test5:
-; AVX512VL: vpminub 
+define <16 x i8> @test5(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test5:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test5:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test6:
-; SSE2: pminub
-
-; AVX1-LABEL: test6:
-; AVX1: vpminub
-
-; AVX2-LABEL: test6:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test6:
-; AVX512VL: vpminub
+define <16 x i8> @test6(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test6:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test6:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test7:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test7:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test7:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test7:
-; AVX512VL: vpmaxub
+define <16 x i8> @test7(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test7:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test7:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.a, <16 x i8> %load.b
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test8:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test8:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test8:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test8:
-; AVX512VL: vpmaxub
+define <16 x i8> @test8(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test8:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test8:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %a, <16 x i8> %b
+  ret <16 x i8> %sel
 }
 
-define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test9:
-; SSE2: pminsw
-
-; AVX1-LABEL: test9:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test9:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test9:
-; AVX512VL: vpminsw 
+define <8 x i16> @test9(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test9:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test9:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test10:
-; SSE2: pminsw
-
-; AVX1-LABEL: test10:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test10:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test10:
-; AVX512VL: vpminsw
+define <8 x i16> @test10(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test10:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test10:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test11:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test11:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test11:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test11:
-; AVX512VL: vpmaxsw
+define <8 x i16> @test11(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test11:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test11:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test12:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test12:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test12:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test12:
-; AVX512VL: vpmaxsw
+define <8 x i16> @test12(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test12:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test12:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test13(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test13:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test13:
-; SSE4: pminuw
-
-; AVX1-LABEL: test13:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test13:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test13:
-; AVX512VL: vpminuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test13:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test14(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test14:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    psubusw %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test14:
-; SSE4: pminuw
-
-; AVX1-LABEL: test14:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test14:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test14:
-; AVX512VL: vpminuw 
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test14:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test15:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test15:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test15:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test15:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test15:
-; AVX512VL: vpmaxuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test15:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.a, <8 x i16> %load.b
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test16(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test16:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    psubusw %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpeqw %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test16:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test16:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test16:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test16:
-; AVX512VL: vpmaxuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test16:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %a, <8 x i16> %b
+  ret <8 x i16> %sel
 }
 
-define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test17:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test17:
-; SSE4: pminsd
-
-; AVX1-LABEL: test17:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test17:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test17:
-; AVX512VL: vpminsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test17:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test18:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test18:
-; SSE4: pminsd
-
-; AVX1-LABEL: test18:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test18:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test18:
-; AVX512VL: vpminsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test18:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test19:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test19:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test19:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test19:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test19:
-; AVX512VL: vpmaxsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test19:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test20(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test20:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test20:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test20:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test20:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test20:
-; AVX512VL: vpmaxsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test20:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test21(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test21:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test21:
-; SSE4: pminud
-
-; AVX1-LABEL: test21:
-; AVX1: vpminud
-
-; AVX2-LABEL: test21:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test21:
-; AVX512VL: vpminud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test21:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test22(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test22:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test22:
-; SSE4: pminud
-
-; AVX1-LABEL: test22:
-; AVX1: vpminud
-
-; AVX2-LABEL: test22:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test22:
-; AVX512VL: vpminud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test22:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test23(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test23:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test23:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test23:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test23:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test23:
-; AVX512VL: vpmaxud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test23:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.a, <4 x i32> %load.b
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test24(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test24:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test24:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test24:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test24:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test24:
-; AVX512VL: vpmaxud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test24:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %a, <4 x i32> %b
+  ret <4 x i32> %sel
 }
 
-define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test25(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test25:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test25:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm2, %xmm0
+; SSE4-NEXT:    pminsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test25:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test25:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test25:
-; AVX512VL: vpminsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test25:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test26(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test26:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test26:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm2, %xmm0
+; SSE4-NEXT:    pminsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test26:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test26:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test26:
-; AVX512VL: vpminsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test26:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test27(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test27:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test27:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test27:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test27:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test27:
-; AVX512VL: vpmaxsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test27:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test28(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test28:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test28:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test28:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test28:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test28:
-; AVX512VL: vpmaxsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test28:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test29(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test29:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test29:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test29:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test29:
-; AVX512VL: vpminub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test29:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test30(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test30:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test30:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test30:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test30:
-; AVX512VL: vpminub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test30:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test31(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test31:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test31:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test31:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test31:
-; AVX512VL: vpmaxub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test31:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.a, <32 x i8> %load.b
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test32(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test32:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test32:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test32:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test32:
-; AVX512VL: vpmaxub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test32:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %a, <32 x i8> %b
+  ret <32 x i8> %sel
 }
 
-define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test33(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test33:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test33:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test33:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test33:
-; AVX512VL: vpminsw 
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test33:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test34(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test34:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test34:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test34:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test34:
-; AVX512VL: vpminsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test34:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test35(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test35:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test35:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test35:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test35:
-; AVX512VL: vpmaxsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test35:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test36(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test36:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test36:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test36:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test36:
-; AVX512VL: vpmaxsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test36:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test37(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test37:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pcmpgtw %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm5, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test37:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm2, %xmm0
+; SSE4-NEXT:    pminuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test37:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test37:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test37:
-; AVX512VL: vpminuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test37:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test38(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test38:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    psubusw %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psubusw %xmm2, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test38:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm2, %xmm0
+; SSE4-NEXT:    pminuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test38:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test38:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test38:
-; AVX512VL: vpminuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test38:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test39(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test39:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test39:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test39:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test39:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test39:
-; AVX512VL: vpmaxuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test39:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.a, <16 x i16> %load.b
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test40(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test40:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psubusw %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    psubusw %xmm0, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm5, %xmm6
+; SSE2-NEXT:    pand %xmm6, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test40:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test40:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test40:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test40:
-; AVX512VL: vpmaxuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test40:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %a, <16 x i16> %b
+  ret <16 x i16> %sel
 }
 
-define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test41(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test41:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test41:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm2, %xmm0
+; SSE4-NEXT:    pminsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test41:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test41:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test41:
-; AVX512VL: vpminsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test41:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test42(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test42:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test42:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm2, %xmm0
+; SSE4-NEXT:    pminsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test42:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test42:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test42:
-; AVX512VL: vpminsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test42:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test43(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test43:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test43:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test43:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test43:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test43:
-; AVX512VL: vpmaxsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test43:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test44(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test44:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm6
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test44:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test44:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test44:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test44:
-; AVX512VL: vpmaxsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test44:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test45(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test45:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test45:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm2, %xmm0
+; SSE4-NEXT:    pminud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test45:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test45:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test45:
-; AVX512VL: vpminud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test45:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test46(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test46:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test46:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm2, %xmm0
+; SSE4-NEXT:    pminud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test46:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test46:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test46:
-; AVX512VL: vpminud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test46:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test47(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test47:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm4, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm4
+; SSE2-NEXT:    por %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test47:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm2, %xmm0
+; SSE4-NEXT:    pmaxud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test47:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test47:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test47:
-; AVX512VL: vpmaxud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test47:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.a, <8 x i32> %load.b
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test48(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test48:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm2, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test48:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm2, %xmm0
+; SSE4-NEXT:    pmaxud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test48:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test48:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test48:
-; AVX512VL: vpmaxud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test48:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %a, <8 x i32> %b
+  ret <8 x i32> %sel
 }
 
-define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test49(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test49:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test49:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test49:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test49:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test49:
-; AVX512VL: vpmaxsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test49:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test50(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test50:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test50:
-; SSE4: pmaxsb
-
-; AVX1-LABEL: test50:
-; AVX1: vpmaxsb
-
-; AVX2-LABEL: test50:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test50:
-; AVX512VL: vpmaxsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test50:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test51(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test51:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test51:
-; SSE4: pminsb
-
-; AVX1-LABEL: test51:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test51:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test51:
-; AVX512VL: vpminsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test51:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i8> @test52(<16 x i8> %a, <16 x i8> %b) {
+; SSE2-LABEL: test52:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test52:
-; SSE4: pminsb
-
-; AVX1-LABEL: test52:
-; AVX1: vpminsb
-
-; AVX2-LABEL: test52:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test52:
-; AVX512VL: vpminsb
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test52:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test53:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test53:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test53:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test53:
-; AVX512VL: vpmaxub
+define <16 x i8> @test53(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test53:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test53:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test54:
-; SSE2: pmaxub
-
-; AVX1-LABEL: test54:
-; AVX1: vpmaxub
-
-; AVX2-LABEL: test54:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test54:
-; AVX512VL: vpmaxub
+define <16 x i8> @test54(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test54:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test54:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test55:
-; SSE2: pminub
-
-; AVX1-LABEL: test55:
-; AVX1: vpminub
-
-; AVX2-LABEL: test55:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test55:
-; AVX512VL: vpminub
+define <16 x i8> @test55(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test55:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test55:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <16 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <16 x i8>*
-  %load.a = load <16 x i8>, <16 x i8>* %ptr.a, align 2
-  %load.b = load <16 x i8>, <16 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i8> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i8> %load.b, <16 x i8> %load.a
-  store <16 x i8> %sel, <16 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test56:
-; SSE2: pminub
-
-; AVX1-LABEL: test56:
-; AVX1: vpminub
-
-; AVX2-LABEL: test56:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test56:
-; AVX512VL: vpminub
+define <16 x i8> @test56(<16 x i8> %a, <16 x i8> %b) {
+; SSE-LABEL: test56:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test56:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i8> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i8> %b, <16 x i8> %a
+  ret <16 x i8> %sel
 }
 
-define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test57:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test57:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test57:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test57:
-; AVX512VL: vpmaxsw
+define <8 x i16> @test57(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test57:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test57:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test58:
-; SSE2: pmaxsw
-
-; AVX1-LABEL: test58:
-; AVX1: vpmaxsw
-
-; AVX2-LABEL: test58:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test58:
-; AVX512VL: vpmaxsw
+define <8 x i16> @test58(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test58:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test58:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test59:
-; SSE2: pminsw
-
-; AVX1-LABEL: test59:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test59:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test59:
-; AVX512VL: vpminsw
+define <8 x i16> @test59(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test59:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test59:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; SSE2-LABEL: test60:
-; SSE2: pminsw
-
-; AVX1-LABEL: test60:
-; AVX1: vpminsw
-
-; AVX2-LABEL: test60:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test60:
-; AVX512VL: vpminsw
+define <8 x i16> @test60(<8 x i16> %a, <8 x i16> %b) {
+; SSE-LABEL: test60:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: test60:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test61(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test61:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test61:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test61:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test61:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test61:
-; AVX512VL: vpmaxuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test61:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test62(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test62:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    psubusw %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test62:
-; SSE4: pmaxuw
-
-; AVX1-LABEL: test62:
-; AVX1: vpmaxuw
-
-; AVX2-LABEL: test62:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test62:
-; AVX512VL: vpmaxuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test62:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test63(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test63:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test63:
-; SSE4: pminuw
-
-; AVX1-LABEL: test63:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test63:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test63:
-; AVX512VL: vpminuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test63:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <8 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <8 x i16>*
-  %load.a = load <8 x i16>, <8 x i16>* %ptr.a, align 2
-  %load.b = load <8 x i16>, <8 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i16> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i16> %load.b, <8 x i16> %load.a
-  store <8 x i16> %sel, <8 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i16> @test64(<8 x i16> %a, <8 x i16> %b) {
+; SSE2-LABEL: test64:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    psubusw %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    pcmpeqw %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test64:
-; SSE4: pminuw
-
-; AVX1-LABEL: test64:
-; AVX1: vpminuw
-
-; AVX2-LABEL: test64:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test64:
-; AVX512VL: vpminuw
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test64:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i16> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i16> %b, <8 x i16> %a
+  ret <8 x i16> %sel
 }
 
-define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test65(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test65:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test65:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test65:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test65:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test65:
-; AVX512VL: vpmaxsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test65:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp slt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test66(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test66:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test66:
-; SSE4: pmaxsd
-
-; AVX1-LABEL: test66:
-; AVX1: vpmaxsd
-
-; AVX2-LABEL: test66:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test66:
-; AVX512VL: vpmaxsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test66:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sle <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test67(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test67:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test67:
-; SSE4: pminsd
-
-; AVX1-LABEL: test67:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test67:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test67:
-; AVX512VL: vpminsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test67:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sgt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test68(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test68:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test68:
-; SSE4: pminsd
-
-; AVX1-LABEL: test68:
-; AVX1: vpminsd
-
-; AVX2-LABEL: test68:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test68:
-; AVX512VL: vpminsd
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test68:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp sge <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test69(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test69:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test69:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test69:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test69:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test69:
-; AVX512VL: vpmaxud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test69:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ult <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test70(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test70:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test70:
-; SSE4: pmaxud
-
-; AVX1-LABEL: test70:
-; AVX1: vpmaxud
-
-; AVX2-LABEL: test70:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test70:
-; AVX512VL: vpmaxud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test70:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ule <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test71(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test71:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test71:
-; SSE4: pminud
-
-; AVX1-LABEL: test71:
-; AVX1: vpminud
-
-; AVX2-LABEL: test71:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test71:
-; AVX512VL: vpminud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test71:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp ugt <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i32>*
-  %load.a = load <4 x i32>, <4 x i32>* %ptr.a, align 2
-  %load.b = load <4 x i32>, <4 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <4 x i32> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i32> %load.b, <4 x i32> %load.a
-  store <4 x i32> %sel, <4 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 4
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <4 x i32> @test72(<4 x i32> %a, <4 x i32> %b) {
+; SSE2-LABEL: test72:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pxor %xmm1, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
 ; SSE4-LABEL: test72:
-; SSE4: pminud
-
-; AVX1-LABEL: test72:
-; AVX1: vpminud
-
-; AVX2-LABEL: test72:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test72:
-; AVX512VL: vpminud
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX-LABEL: test72:
+; AVX:       # BB#0: # %entry
+; AVX-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+entry:
+  %cmp = icmp uge <4 x i32> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i32> %b, <4 x i32> %a
+  ret <4 x i32> %sel
 }
 
-define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test73(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test73:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test73:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test73:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test73:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test73:
-; AVX512VL: vpmaxsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test73:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test74(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test74:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test74:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test74:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test74:
-; AVX2: vpmaxsb
-
-; AVX512VL-LABEL: test74:
-; AVX512VL: vpmaxsb 
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test74:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test75(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test75:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test75:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm2, %xmm0
+; SSE4-NEXT:    pminsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test75:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test75:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test75:
-; AVX512VL: vpminsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test75:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test76(<32 x i8> %a, <32 x i8> %b) {
+; SSE2-LABEL: test76:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test76:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm2, %xmm0
+; SSE4-NEXT:    pminsb %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test76:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test76:
-; AVX2: vpminsb
-
-; AVX512VL-LABEL: test76:
-; AVX512VL: vpminsb
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test76:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test77(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test77:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test77:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test77:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test77:
-; AVX512VL: vpmaxub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test77:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test78(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test78:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm2, %xmm0
+; SSE-NEXT:    pmaxub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test78:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test78:
-; AVX2: vpmaxub
-
-; AVX512VL-LABEL: test78:
-; AVX512VL: vpmaxub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test78:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test79(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test79:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test79:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test79:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test79:
-; AVX512VL: vpminub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test79:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <32 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <32 x i8>*
-  %load.a = load <32 x i8>, <32 x i8>* %ptr.a, align 2
-  %load.b = load <32 x i8>, <32 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <32 x i8> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i8> %load.b, <32 x i8> %load.a
-  store <32 x i8> %sel, <32 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i8> @test80(<32 x i8> %a, <32 x i8> %b) {
+; SSE-LABEL: test80:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm2, %xmm0
+; SSE-NEXT:    pminub %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test80:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test80:
-; AVX2: vpminub
-
-; AVX512VL-LABEL: test80:
-; AVX512VL: vpminub
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test80:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminub %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <32 x i8> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i8> %b, <32 x i8> %a
+  ret <32 x i8> %sel
 }
 
-define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test81(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test81:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test81:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test81:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test81:
-; AVX512VL: vpmaxsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test81:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test82(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test82:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm2, %xmm0
+; SSE-NEXT:    pmaxsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test82:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test82:
-; AVX2: vpmaxsw
-
-; AVX512VL-LABEL: test82:
-; AVX512VL: vpmaxsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test82:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test83(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test83:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test83:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test83:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test83:
-; AVX512VL: vpminsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test83:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test84(<16 x i16> %a, <16 x i16> %b) {
+; SSE-LABEL: test84:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm2, %xmm0
+; SSE-NEXT:    pminsw %xmm3, %xmm1
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test84:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test84:
-; AVX2: vpminsw
-
-; AVX512VL-LABEL: test84:
-; AVX512VL: vpminsw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test84:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test85(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test85:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm2, %xmm4
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm3, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test85:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test85:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test85:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test85:
-; AVX512VL: vpmaxuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test85:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test86(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test86:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    psubusw %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    psubusw %xmm2, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test86:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm2, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test86:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test86:
-; AVX2: vpmaxuw
-
-; AVX512VL-LABEL: test86:
-; AVX512VL: vpmaxuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test86:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test87(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test87:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    pcmpgtw %xmm6, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm2, %xmm4
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm3, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test87:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm2, %xmm0
+; SSE4-NEXT:    pminuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test87:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test87:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test87:
-; AVX512VL: vpminuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test87:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <16 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <16 x i16>*
-  %load.a = load <16 x i16>, <16 x i16>* %ptr.a, align 2
-  %load.b = load <16 x i16>, <16 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i16> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i16> %load.b, <16 x i16> %load.a
-  store <16 x i16> %sel, <16 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i16> @test88(<16 x i16> %a, <16 x i16> %b) {
+; SSE2-LABEL: test88:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    psubusw %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm6
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    psubusw %xmm0, %xmm5
+; SSE2-NEXT:    pcmpeqw %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test88:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm2, %xmm0
+; SSE4-NEXT:    pminuw %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test88:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminuw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test88:
-; AVX2: vpminuw
-
-; AVX512VL-LABEL: test88:
-; AVX512VL: vpminuw
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test88:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i16> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i16> %b, <16 x i16> %a
+  ret <16 x i16> %sel
 }
 
-define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test89(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test89:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test89:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test89:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test89:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test89:
-; AVX512VL: vpmaxsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test89:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test90(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test90:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm7
+; SSE2-NEXT:    movdqa %xmm6, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm7
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm7
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test90:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm2, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test90:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test90:
-; AVX2: vpmaxsd
-
-; AVX512VL-LABEL: test90:
-; AVX512VL: vpmaxsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test90:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test91(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test91:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test91:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm2, %xmm0
+; SSE4-NEXT:    pminsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test91:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test91:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test91:
-; AVX512VL: vpminsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test91:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test92(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test92:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm6
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm7
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm7
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm7, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test92:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm2, %xmm0
+; SSE4-NEXT:    pminsd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test92:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminsd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test92:
-; AVX2: vpminsd
-
-; AVX512VL-LABEL: test92:
-; AVX512VL: vpminsd
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test92:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test93(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test93:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm2, %xmm4
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm3, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test93:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm2, %xmm0
+; SSE4-NEXT:    pmaxud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test93:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test93:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test93:
-; AVX512VL: vpmaxud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test93:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test94(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test94:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test94:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm2, %xmm0
+; SSE4-NEXT:    pmaxud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test94:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test94:
-; AVX2: vpmaxud
-
-; AVX512VL-LABEL: test94:
-; AVX512VL: vpmaxud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test94:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test95(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test95:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm4
+; SSE2-NEXT:    pand %xmm4, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm2, %xmm4
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm3, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test95:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm2, %xmm0
+; SSE4-NEXT:    pminud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test95:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test95:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test95:
-; AVX512VL: vpminud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test95:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
-define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i32>*
-  %load.a = load <8 x i32>, <8 x i32>* %ptr.a, align 2
-  %load.b = load <8 x i32>, <8 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i32> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i32> %load.b, <8 x i32> %load.a
-  store <8 x i32> %sel, <8 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i32> @test96(<8 x i32> %a, <8 x i32> %b) {
+; SSE2-LABEL: test96:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    pxor %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm0, %xmm8
+; SSE2-NEXT:    pxor %xmm6, %xmm8
+; SSE2-NEXT:    pxor %xmm2, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm6
+; SSE2-NEXT:    pxor %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm6
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm1, %xmm5
+; SSE2-NEXT:    por %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test96:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm2, %xmm0
+; SSE4-NEXT:    pminud %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test96:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpminud %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
 ; AVX2-LABEL: test96:
-; AVX2: vpminud
-
-; AVX512VL-LABEL: test96:
-; AVX512VL: vpminud
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: test96:
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i32> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i32> %b, <8 x i32> %a
+  ret <8 x i32> %sel
 }
 
 ; ----------------------------
 
-define void @test97(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test97(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test97:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test97:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm4, %xmm0
+; SSE4-NEXT:    pminsb %xmm5, %xmm1
+; SSE4-NEXT:    pminsb %xmm6, %xmm2
+; SSE4-NEXT:    pminsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test97:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test97:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test97:
-; AVX512BW: vpminsb {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test98(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test98(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test98:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm8, %xmm12
+; SSE2-NEXT:    pcmpgtb %xmm7, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT:    movdqa %xmm12, %xmm3
+; SSE2-NEXT:    pxor %xmm13, %xmm3
+; SSE2-NEXT:    movdqa %xmm9, %xmm14
+; SSE2-NEXT:    pcmpgtb %xmm6, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm2
+; SSE2-NEXT:    pxor %xmm13, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pcmpgtb %xmm5, %xmm15
+; SSE2-NEXT:    movdqa %xmm15, %xmm10
+; SSE2-NEXT:    pxor %xmm13, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm11, %xmm13
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    pandn %xmm4, %xmm13
+; SSE2-NEXT:    por %xmm13, %xmm11
+; SSE2-NEXT:    pandn %xmm1, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm15, %xmm10
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm6, %xmm2
+; SSE2-NEXT:    por %xmm14, %xmm2
+; SSE2-NEXT:    pandn %xmm8, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm12, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test98:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm4, %xmm0
+; SSE4-NEXT:    pminsb %xmm5, %xmm1
+; SSE4-NEXT:    pminsb %xmm6, %xmm2
+; SSE4-NEXT:    pminsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test98:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test98:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test98:
-; AVX512BW: vpminsb {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test99(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test99(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test99:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pcmpgtb %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pcmpgtb %xmm6, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pcmpgtb %xmm5, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm1, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm2, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test99:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsb %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test99:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test99:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test99:
-; AVX512BW: vpmaxsb {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test100(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test100(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test100:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pcmpgtb %xmm8, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm3
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pcmpgtb %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm4, %xmm15
+; SSE2-NEXT:    pcmpgtb %xmm10, %xmm15
+; SSE2-NEXT:    pxor %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm10, %xmm15
+; SSE2-NEXT:    pandn %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm14
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm14, %xmm11
+; SSE2-NEXT:    pandn %xmm9, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm2
+; SSE2-NEXT:    por %xmm13, %xmm2
+; SSE2-NEXT:    pandn %xmm8, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm12, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test100:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsb %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test100:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test100:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test100:
-; AVX512BW: vpmaxsb {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test101(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test101(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test101:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm4, %xmm0
+; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm6, %xmm2
+; SSE-NEXT:    pminub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test101:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test101:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test101:
-; AVX512BW: vpminub {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test102(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test102(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test102:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm4, %xmm0
+; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm6, %xmm2
+; SSE-NEXT:    pminub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test102:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test102:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test102:
-; AVX512BW: vpminub {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test103(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test103(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test103:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm4, %xmm0
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm6, %xmm2
+; SSE-NEXT:    pmaxub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test103:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test103:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test103:
-; AVX512BW: vpmaxub {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test104(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test104(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test104:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm4, %xmm0
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm6, %xmm2
+; SSE-NEXT:    pmaxub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test104:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test104:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test104:
-; AVX512BW: vpmaxub {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %a, <64 x i8> %b
+  ret <64 x i8> %sel
 }
 
-define void @test105(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test105(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test105:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm6, %xmm2
+; SSE-NEXT:    pminsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test105:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test105:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test105:
-; AVX512BW: vpminsw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test106(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test106(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test106:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm6, %xmm2
+; SSE-NEXT:    pminsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test106:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test106:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test106:
-; AVX512BW: vpminsw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test107(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test107(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test107:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm6, %xmm2
+; SSE-NEXT:    pmaxsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test107:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test107:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test107:
-; AVX512BW: vpmaxsw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test108(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test108(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test108:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm6, %xmm2
+; SSE-NEXT:    pmaxsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test108:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test108:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test108:
-; AVX512BW: vpmaxsw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test109(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test109(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test109:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm10, %xmm9
+; SSE2-NEXT:    pcmpgtw %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtw %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    pcmpgtw %xmm11, %xmm10
+; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test109:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm4, %xmm0
+; SSE4-NEXT:    pminuw %xmm5, %xmm1
+; SSE4-NEXT:    pminuw %xmm6, %xmm2
+; SSE4-NEXT:    pminuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test109:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test109:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test109:
-; AVX512BW: vpminuw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test110(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test110(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test110:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    psubusw %xmm7, %xmm3
+; SSE2-NEXT:    pxor %xmm12, %xmm12
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT:    psubusw %xmm6, %xmm2
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT:    psubusw %xmm5, %xmm1
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    psubusw %xmm4, %xmm11
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm1, %xmm10
+; SSE2-NEXT:    pandn %xmm5, %xmm1
+; SSE2-NEXT:    por %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm9
+; SSE2-NEXT:    pandn %xmm6, %xmm2
+; SSE2-NEXT:    por %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test110:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm4, %xmm0
+; SSE4-NEXT:    pminuw %xmm5, %xmm1
+; SSE4-NEXT:    pminuw %xmm6, %xmm2
+; SSE4-NEXT:    pminuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test110:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test110:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test110:
-; AVX512BW: vpminuw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test111(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test111(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test111:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm1, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm2, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test111:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm4, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm5, %xmm1
+; SSE4-NEXT:    pmaxuw %xmm6, %xmm2
+; SSE4-NEXT:    pmaxuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test111:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test111:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test111:
-; AVX512BW: vpmaxuw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test112(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test112(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test112:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    psubusw %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm9, %xmm9
+; SSE2-NEXT:    pcmpeqw %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    psubusw %xmm2, %xmm10
+; SSE2-NEXT:    pcmpeqw %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    psubusw %xmm1, %xmm11
+; SSE2-NEXT:    pcmpeqw %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    psubusw %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqw %xmm9, %xmm12
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm0
+; SSE2-NEXT:    pand %xmm11, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm1
+; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test112:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm4, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm5, %xmm1
+; SSE4-NEXT:    pmaxuw %xmm6, %xmm2
+; SSE4-NEXT:    pmaxuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test112:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test112:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test112:
-; AVX512BW: vpmaxuw {{.*}}
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %a, <32 x i16> %b
+  ret <32 x i16> %sel
 }
 
-define void @test113(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test113(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test113:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm1
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test113:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm4, %xmm0
+; SSE4-NEXT:    pminsd %xmm5, %xmm1
+; SSE4-NEXT:    pminsd %xmm6, %xmm2
+; SSE4-NEXT:    pminsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test113:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test113:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test113:
-; AVX512F: vpminsd {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test114(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test114(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test114:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm8, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT:    movdqa %xmm12, %xmm3
+; SSE2-NEXT:    pxor %xmm13, %xmm3
+; SSE2-NEXT:    movdqa %xmm9, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm2
+; SSE2-NEXT:    pxor %xmm13, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm15
+; SSE2-NEXT:    movdqa %xmm15, %xmm10
+; SSE2-NEXT:    pxor %xmm13, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm11, %xmm13
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    pandn %xmm4, %xmm13
+; SSE2-NEXT:    por %xmm13, %xmm11
+; SSE2-NEXT:    pandn %xmm1, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm15, %xmm10
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm6, %xmm2
+; SSE2-NEXT:    por %xmm14, %xmm2
+; SSE2-NEXT:    pandn %xmm8, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm12, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test114:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm4, %xmm0
+; SSE4-NEXT:    pminsd %xmm5, %xmm1
+; SSE4-NEXT:    pminsd %xmm6, %xmm2
+; SSE4-NEXT:    pminsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test114:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test114:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test114:
-; AVX512F: vpminsd {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test115(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test115(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test115:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm1, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm2, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test115:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsd %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test115:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test115:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test115:
-; AVX512F: vpmaxsd {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test116(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test116(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test116:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm3
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm4, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm15
+; SSE2-NEXT:    pxor %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm10, %xmm15
+; SSE2-NEXT:    pandn %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm14
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm14, %xmm11
+; SSE2-NEXT:    pandn %xmm9, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm2
+; SSE2-NEXT:    por %xmm13, %xmm2
+; SSE2-NEXT:    pandn %xmm8, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm3
+; SSE2-NEXT:    por %xmm12, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test116:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsd %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test116:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test116:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test116:
-; AVX512F: vpmaxsd {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test117(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test117(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test117:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm10, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm10
+; SSE2-NEXT:    pand %xmm10, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm9, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test117:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm4, %xmm0
+; SSE4-NEXT:    pminud %xmm5, %xmm1
+; SSE4-NEXT:    pminud %xmm6, %xmm2
+; SSE4-NEXT:    pminud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test117:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test117:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test117:
-; AVX512F: vpminud {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test118(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test118(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test118:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    movdqa %xmm3, %xmm12
+; SSE2-NEXT:    pxor %xmm14, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm14, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm13
+; SSE2-NEXT:    pxor %xmm14, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pxor %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm10, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm2, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm13, %xmm9
+; SSE2-NEXT:    pandn %xmm3, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test118:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm4, %xmm0
+; SSE4-NEXT:    pminud %xmm5, %xmm1
+; SSE4-NEXT:    pminud %xmm6, %xmm2
+; SSE4-NEXT:    pminud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test118:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test118:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test118:
-; AVX512F: vpminud {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test119(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test119(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test119:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm10
+; SSE2-NEXT:    por %xmm1, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm2, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test119:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm4, %xmm0
+; SSE4-NEXT:    pmaxud %xmm5, %xmm1
+; SSE4-NEXT:    pmaxud %xmm6, %xmm2
+; SSE4-NEXT:    pmaxud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test119:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test119:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test119:
-; AVX512F: vpmaxud {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test120(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test120(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test120:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pxor %xmm14, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm14, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm14, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm15
+; SSE2-NEXT:    pxor %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm10, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm0
+; SSE2-NEXT:    por %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm2, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm9
+; SSE2-NEXT:    por %xmm13, %xmm9
+; SSE2-NEXT:    pandn %xmm3, %xmm12
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test120:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm4, %xmm0
+; SSE4-NEXT:    pmaxud %xmm5, %xmm1
+; SSE4-NEXT:    pmaxud %xmm6, %xmm2
+; SSE4-NEXT:    pmaxud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test120:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test120:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test120:
-; AVX512F: vpmaxud {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %a, <16 x i32> %b
+  ret <16 x i32> %sel
 }
 
-define void @test121(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test121(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test121:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm9, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test121:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm8, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test121:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test121:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test121:
-; AVX512F: vpminsq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test122(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test122(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test122:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm2, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm2, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm3, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test122:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test122:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test122:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm6
+; AVX2-NEXT:    vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test122:
-; AVX512F: vpminsq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test123(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test123(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test123:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm9, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test123:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test123:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test123:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test123:
-; AVX512F: vpmaxsq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test124(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test124(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test124:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm2, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm3, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test124:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm12
+; SSE4-NEXT:    pcmpgtq %xmm8, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test124:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test124:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm6
+; AVX2-NEXT:    vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test124:
-; AVX512F: vpmaxsq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test125(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test125(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test125:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm9, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test125:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm8, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test125:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test125:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test125:
-; AVX512F: vpminuq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test126(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test126(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test126:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm2, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm2, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm3, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test126:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm9
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm7, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm3, %xmm8
+; SSE4-NEXT:    pxor %xmm0, %xmm8
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm8
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm8
+; SSE4-NEXT:    movdqa %xmm6, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    pxor %xmm9, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm9, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm8, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test126:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test126:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm7
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT:    vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test126:
-; AVX512F: vpminuq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test127(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test127(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test127:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm9 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm9, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    por %xmm11, %xmm0
+; SSE2-NEXT:    pand %xmm12, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm12
+; SSE2-NEXT:    por %xmm12, %xmm1
+; SSE2-NEXT:    pand %xmm10, %xmm2
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm10, %xmm2
+; SSE2-NEXT:    pand %xmm8, %xmm3
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test127:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm7, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    pxor %xmm8, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm8, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test127:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test127:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm6
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test127:
-; AVX512F: vpmaxuq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test128(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test128(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test128:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm9, %xmm14
+; SSE2-NEXT:    pandn %xmm4, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm2, %xmm15
+; SSE2-NEXT:    pandn %xmm5, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm3, %xmm13
+; SSE2-NEXT:    pandn %xmm6, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm8 # 16-byte Folded Reload
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test128:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm9
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm7, %xmm8
+; SSE4-NEXT:    pxor %xmm0, %xmm8
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm8
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm8
+; SSE4-NEXT:    movdqa %xmm2, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm9, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm9, %xmm4
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm6
+; SSE4-NEXT:    movdqa %xmm8, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm7
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    movapd %xmm5, %xmm1
+; SSE4-NEXT:    movapd %xmm6, %xmm2
+; SSE4-NEXT:    movapd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test128:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm0, %ymm2, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm1, %ymm3, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test128:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm7
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT:    vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm0, %ymm2, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm1, %ymm3, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test128:
-; AVX512F: vpmaxuq {{.*}}
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %a, <8 x i64> %b
+  ret <8 x i64> %sel
 }
 
-define void @test129(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp slt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test129(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test129:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm3
+; SSE2-NEXT:    pcmpgtb %xmm8, %xmm3
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pcmpgtb %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test129:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsb %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test129:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test129:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test129:
-; AVX512BW: vpmaxsb
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test130(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sle <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test130(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test130:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm12
+; SSE2-NEXT:    pcmpgtb %xmm7, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT:    movdqa %xmm12, %xmm9
+; SSE2-NEXT:    pxor %xmm13, %xmm9
+; SSE2-NEXT:    movdqa %xmm8, %xmm14
+; SSE2-NEXT:    pcmpgtb %xmm6, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm2
+; SSE2-NEXT:    pxor %xmm13, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pcmpgtb %xmm5, %xmm15
+; SSE2-NEXT:    movdqa %xmm15, %xmm10
+; SSE2-NEXT:    pxor %xmm13, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm11, %xmm13
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    pandn %xmm0, %xmm13
+; SSE2-NEXT:    por %xmm13, %xmm11
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm15, %xmm10
+; SSE2-NEXT:    pandn %xmm6, %xmm14
+; SSE2-NEXT:    pandn %xmm8, %xmm2
+; SSE2-NEXT:    por %xmm14, %xmm2
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm12, %xmm9
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test130:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsb %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsb %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsb %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test130:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test130:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test130:
-; AVX512BW: vpmaxsb
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test131(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sgt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test131(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test131:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pcmpgtb %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pcmpgtb %xmm6, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pcmpgtb %xmm5, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtb %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test131:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm4, %xmm0
+; SSE4-NEXT:    pminsb %xmm5, %xmm1
+; SSE4-NEXT:    pminsb %xmm6, %xmm2
+; SSE4-NEXT:    pminsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test131:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test131:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test131:
-; AVX512BW: vpminsb
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test132(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp sge <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test132(<64 x i8> %a, <64 x i8> %b) {
+; SSE2-LABEL: test132:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pcmpgtb %xmm3, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pcmpgtb %xmm8, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pcmpgtb %xmm1, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm4, %xmm15
+; SSE2-NEXT:    pcmpgtb %xmm10, %xmm15
+; SSE2-NEXT:    pxor %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm15
+; SSE2-NEXT:    pandn %xmm10, %xmm0
+; SSE2-NEXT:    por %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm5, %xmm14
+; SSE2-NEXT:    pandn %xmm1, %xmm11
+; SSE2-NEXT:    por %xmm14, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm8, %xmm2
+; SSE2-NEXT:    por %xmm13, %xmm2
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm12, %xmm9
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test132:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsb %xmm4, %xmm0
+; SSE4-NEXT:    pminsb %xmm5, %xmm1
+; SSE4-NEXT:    pminsb %xmm6, %xmm2
+; SSE4-NEXT:    pminsb %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test132:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test132:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test132:
-; AVX512BW: vpminsb
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test133(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ult <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test133(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test133:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm4, %xmm0
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm6, %xmm2
+; SSE-NEXT:    pmaxub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test133:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test133:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test133:
-; AVX512BW: vpmaxub
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test134(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ule <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test134(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test134:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxub %xmm4, %xmm0
+; SSE-NEXT:    pmaxub %xmm5, %xmm1
+; SSE-NEXT:    pmaxub %xmm6, %xmm2
+; SSE-NEXT:    pmaxub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test134:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test134:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test134:
-; AVX512BW: vpmaxub
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test135(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp ugt <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test135(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test135:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm4, %xmm0
+; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm6, %xmm2
+; SSE-NEXT:    pminub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test135:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test135:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test135:
-; AVX512BW: vpminub
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test136(i8* nocapture %a, i8* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i8, i8* %a, i64 %index
-  %gep.b = getelementptr inbounds i8, i8* %b, i64 %index
-  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
-  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
-  %load.a = load <64 x i8>, <64 x i8>* %ptr.a, align 2
-  %load.b = load <64 x i8>, <64 x i8>* %ptr.b, align 2
-  %cmp = icmp uge <64 x i8> %load.a, %load.b
-  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
-  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
-  %index.next = add i64 %index, 32
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <64 x i8> @test136(<64 x i8> %a, <64 x i8> %b) {
+; SSE-LABEL: test136:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminub %xmm4, %xmm0
+; SSE-NEXT:    pminub %xmm5, %xmm1
+; SSE-NEXT:    pminub %xmm6, %xmm2
+; SSE-NEXT:    pminub %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test136:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminub %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminub %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminub %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test136:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminub %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminub %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test136:
-; AVX512BW: vpminub
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminub %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <64 x i8> %a, %b
+  %sel = select <64 x i1> %cmp, <64 x i8> %b, <64 x i8> %a
+  ret <64 x i8> %sel
 }
 
-define void @test137(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp slt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test137(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test137:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm6, %xmm2
+; SSE-NEXT:    pmaxsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test137:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test137:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test137:
-; AVX512BW: vpmaxsw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test138(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sle <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test138(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test138:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pmaxsw %xmm4, %xmm0
+; SSE-NEXT:    pmaxsw %xmm5, %xmm1
+; SSE-NEXT:    pmaxsw %xmm6, %xmm2
+; SSE-NEXT:    pmaxsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test138:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test138:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test138:
-; AVX512BW: vpmaxsw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test139(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sgt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test139(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test139:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm6, %xmm2
+; SSE-NEXT:    pminsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test139:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test139:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test139:
-; AVX512BW: vpminsw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test140(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp sge <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test140(<32 x i16> %a, <32 x i16> %b) {
+; SSE-LABEL: test140:
+; SSE:       # BB#0: # %entry
+; SSE-NEXT:    pminsw %xmm4, %xmm0
+; SSE-NEXT:    pminsw %xmm5, %xmm1
+; SSE-NEXT:    pminsw %xmm6, %xmm2
+; SSE-NEXT:    pminsw %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test140:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test140:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test140:
-; AVX512BW: vpminsw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test141(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ult <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test141(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test141:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm4
+; SSE2-NEXT:    pandn %xmm11, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test141:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm4, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm5, %xmm1
+; SSE4-NEXT:    pmaxuw %xmm6, %xmm2
+; SSE4-NEXT:    pmaxuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test141:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test141:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test141:
-; AVX512BW: vpmaxuw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test142(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ule <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test142(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test142:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    psubusw %xmm7, %xmm3
+; SSE2-NEXT:    pxor %xmm12, %xmm12
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT:    psubusw %xmm6, %xmm2
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT:    psubusw %xmm5, %xmm1
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    psubusw %xmm4, %xmm11
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm1, %xmm5
+; SSE2-NEXT:    pandn %xmm10, %xmm1
+; SSE2-NEXT:    por %xmm5, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm6
+; SSE2-NEXT:    pandn %xmm9, %xmm2
+; SSE2-NEXT:    por %xmm6, %xmm2
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test142:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxuw %xmm4, %xmm0
+; SSE4-NEXT:    pmaxuw %xmm5, %xmm1
+; SSE4-NEXT:    pmaxuw %xmm6, %xmm2
+; SSE4-NEXT:    pmaxuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test142:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test142:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test142:
-; AVX512BW: vpmaxuw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test143(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp ugt <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test143(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test143:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    pcmpgtw %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    pcmpgtw %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm0
+; SSE2-NEXT:    pcmpgtw %xmm12, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm4
+; SSE2-NEXT:    pandn %xmm11, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test143:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm4, %xmm0
+; SSE4-NEXT:    pminuw %xmm5, %xmm1
+; SSE4-NEXT:    pminuw %xmm6, %xmm2
+; SSE4-NEXT:    pminuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test143:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test143:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test143:
-; AVX512BW: vpminuw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test144(i16* nocapture %a, i16* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i16, i16* %a, i64 %index
-  %gep.b = getelementptr inbounds i16, i16* %b, i64 %index
-  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
-  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
-  %load.a = load <32 x i16>, <32 x i16>* %ptr.a, align 2
-  %load.b = load <32 x i16>, <32 x i16>* %ptr.b, align 2
-  %cmp = icmp uge <32 x i16> %load.a, %load.b
-  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
-  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
-  %index.next = add i64 %index, 16
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <32 x i16> @test144(<32 x i16> %a, <32 x i16> %b) {
+; SSE2-LABEL: test144:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm3
+; SSE2-NEXT:    psubusw %xmm8, %xmm3
+; SSE2-NEXT:    pxor %xmm12, %xmm12
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm3
+; SSE2-NEXT:    movdqa %xmm6, %xmm2
+; SSE2-NEXT:    psubusw %xmm9, %xmm2
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm2
+; SSE2-NEXT:    movdqa %xmm5, %xmm1
+; SSE2-NEXT:    psubusw %xmm10, %xmm1
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm1
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    psubusw %xmm0, %xmm11
+; SSE2-NEXT:    pcmpeqw %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm1, %xmm5
+; SSE2-NEXT:    pandn %xmm10, %xmm1
+; SSE2-NEXT:    por %xmm5, %xmm1
+; SSE2-NEXT:    pand %xmm2, %xmm6
+; SSE2-NEXT:    pandn %xmm9, %xmm2
+; SSE2-NEXT:    por %xmm6, %xmm2
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test144:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminuw %xmm4, %xmm0
+; SSE4-NEXT:    pminuw %xmm5, %xmm1
+; SSE4-NEXT:    pminuw %xmm6, %xmm2
+; SSE4-NEXT:    pminuw %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test144:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminuw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminuw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test144:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminuw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminuw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512BW-LABEL: test144:
-; AVX512BW: vpminuw
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <32 x i16> %a, %b
+  %sel = select <32 x i1> %cmp, <32 x i16> %b, <32 x i16> %a
+  ret <32 x i16> %sel
 }
 
-define void @test145(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp slt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test145(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test145:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm3
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm2, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test145:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsd %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test145:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test145:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test145:
-; AVX512F: vpmaxsd
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test146(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sle <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test146(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test146:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm13, %xmm13
+; SSE2-NEXT:    movdqa %xmm12, %xmm9
+; SSE2-NEXT:    pxor %xmm13, %xmm9
+; SSE2-NEXT:    movdqa %xmm8, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm2
+; SSE2-NEXT:    pxor %xmm13, %xmm2
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm15
+; SSE2-NEXT:    movdqa %xmm15, %xmm10
+; SSE2-NEXT:    pxor %xmm13, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm11, %xmm13
+; SSE2-NEXT:    pandn %xmm4, %xmm11
+; SSE2-NEXT:    pandn %xmm0, %xmm13
+; SSE2-NEXT:    por %xmm13, %xmm11
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm15, %xmm10
+; SSE2-NEXT:    pandn %xmm6, %xmm14
+; SSE2-NEXT:    pandn %xmm8, %xmm2
+; SSE2-NEXT:    por %xmm14, %xmm2
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm12, %xmm9
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test146:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxsd %xmm4, %xmm0
+; SSE4-NEXT:    pmaxsd %xmm5, %xmm1
+; SSE4-NEXT:    pmaxsd %xmm6, %xmm2
+; SSE4-NEXT:    pmaxsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test146:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test146:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test146:
-; AVX512F: vpmaxsd
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test147(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sgt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test147(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test147:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pandn %xmm8, %xmm3
+; SSE2-NEXT:    por %xmm7, %xmm3
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test147:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm4, %xmm0
+; SSE4-NEXT:    pminsd %xmm5, %xmm1
+; SSE4-NEXT:    pminsd %xmm6, %xmm2
+; SSE4-NEXT:    pminsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test147:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test147:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test147:
-; AVX512F: vpminsd
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test148(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp sge <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test148(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test148:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm2, %xmm8
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm2
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm1, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm4, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm15
+; SSE2-NEXT:    pxor %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm15
+; SSE2-NEXT:    pandn %xmm10, %xmm0
+; SSE2-NEXT:    por %xmm15, %xmm0
+; SSE2-NEXT:    pandn %xmm5, %xmm14
+; SSE2-NEXT:    pandn %xmm1, %xmm11
+; SSE2-NEXT:    por %xmm14, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm8, %xmm2
+; SSE2-NEXT:    por %xmm13, %xmm2
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm12, %xmm9
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test148:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminsd %xmm4, %xmm0
+; SSE4-NEXT:    pminsd %xmm5, %xmm1
+; SSE4-NEXT:    pminsd %xmm6, %xmm2
+; SSE4-NEXT:    pminsd %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test148:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminsd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminsd %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminsd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test148:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminsd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminsd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test148:
-; AVX512F: vpminsd
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsd %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test149(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ult <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test149(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test149:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm4
+; SSE2-NEXT:    pandn %xmm11, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test149:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm4, %xmm0
+; SSE4-NEXT:    pmaxud %xmm5, %xmm1
+; SSE4-NEXT:    pmaxud %xmm6, %xmm2
+; SSE4-NEXT:    pmaxud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test149:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test149:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test149:
-; AVX512F: vpmaxud
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test150(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ule <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test150(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test150:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    movdqa %xmm3, %xmm12
+; SSE2-NEXT:    pxor %xmm14, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm14, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm13
+; SSE2-NEXT:    pxor %xmm14, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    movdqa %xmm1, %xmm15
+; SSE2-NEXT:    pxor %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm10, %xmm0
+; SSE2-NEXT:    por %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm1, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm13, %xmm9
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test150:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pmaxud %xmm4, %xmm0
+; SSE4-NEXT:    pmaxud %xmm5, %xmm1
+; SSE4-NEXT:    pmaxud %xmm6, %xmm2
+; SSE4-NEXT:    pmaxud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test150:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpmaxud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpmaxud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpmaxud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test150:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpmaxud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpmaxud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test150:
-; AVX512F: vpmaxud
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test151(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp ugt <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test151(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test151:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm0, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm0
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm0
+; SSE2-NEXT:    pand %xmm0, %xmm4
+; SSE2-NEXT:    pandn %xmm11, %xmm0
+; SSE2-NEXT:    por %xmm4, %xmm0
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test151:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm4, %xmm0
+; SSE4-NEXT:    pminud %xmm5, %xmm1
+; SSE4-NEXT:    pminud %xmm6, %xmm2
+; SSE4-NEXT:    pminud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test151:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test151:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test151:
-; AVX512F: vpminud
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
-define void @test152(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
-  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
-  %load.a = load <16 x i32>, <16 x i32>* %ptr.a, align 2
-  %load.b = load <16 x i32>, <16 x i32>* %ptr.b, align 2
-  %cmp = icmp uge <16 x i32> %load.a, %load.b
-  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
-  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <16 x i32> @test152(<16 x i32> %a, <16 x i32> %b) {
+; SSE2-LABEL: test152:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm0, %xmm10
+; SSE2-NEXT:    movdqa {{.*#+}} xmm14 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm0
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    movdqa %xmm7, %xmm12
+; SSE2-NEXT:    pxor %xmm14, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm0, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm14, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm14, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm9
+; SSE2-NEXT:    pxor %xmm0, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm15
+; SSE2-NEXT:    pxor %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    movdqa %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm10, %xmm0
+; SSE2-NEXT:    por %xmm14, %xmm0
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm1, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm13, %xmm9
+; SSE2-NEXT:    pandn %xmm7, %xmm12
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test152:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    pminud %xmm4, %xmm0
+; SSE4-NEXT:    pminud %xmm5, %xmm1
+; SSE4-NEXT:    pminud %xmm6, %xmm2
+; SSE4-NEXT:    pminud %xmm7, %xmm3
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test152:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vpminud %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vpminud %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test152:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpminud %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpminud %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test152:
-; AVX512F: vpminud
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminud %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <16 x i32> %a, %b
+  %sel = select <16 x i1> %cmp, <16 x i32> %b, <16 x i32> %a
+  ret <16 x i32> %sel
 }
 
 ; -----------------------
 
-define void @test153(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test153(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test153:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm4, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test153:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm8, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test153:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test153:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test153:
-; AVX512F: vpmaxsq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp slt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test154(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test154(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test154:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm2, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm9, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm2, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm3, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test154:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test154:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test154:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm6
+; AVX2-NEXT:    vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test154:
-; AVX512F: vpmaxsq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sle <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test155(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test155(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test155:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test155:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test155:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm5, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm0, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm5, %ymm6, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test155:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm1, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm2, %ymm0, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test155:
-; AVX512F: vpminsq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sgt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test156(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test156:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm9, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm2, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm3, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test156:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm12
+; SSE4-NEXT:    pcmpgtq %xmm8, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test156:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm3, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm2, %xmm7
+; AVX1-NEXT:    vpxor %xmm5, %xmm7, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test156:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm3, %ymm4
+; AVX2-NEXT:    vpcmpeqd %ymm5, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm5, %ymm4, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm2, %ymm6
+; AVX2-NEXT:    vpxor %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test156:
-; AVX512F: vpminsq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminsq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp sge <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test157(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test157(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test157:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm2, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm6, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm1, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm5, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm0, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm4, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test157:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm8, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test157:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test157:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test157:
-; AVX512F: vpmaxuq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ult <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test158(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test158(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test158:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm7, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm3, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm5, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm2, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm4, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm9, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm9, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm2, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm3, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test158:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm7, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    pxor %xmm8, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test158:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test158:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm7
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT:    vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test158:
-; AVX512F: vpmaxuq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ule <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test159(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test159(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test159:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm11 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm11, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm9, %xmm10
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm10[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm9[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm9
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm10[1,1,3,3]
+; SSE2-NEXT:    por %xmm9, %xmm8
+; SSE2-NEXT:    movdqa %xmm6, %xmm9
+; SSE2-NEXT:    pxor %xmm11, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm12
+; SSE2-NEXT:    pcmpgtd %xmm9, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm12[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm9, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm13, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm9 = xmm12[1,1,3,3]
+; SSE2-NEXT:    por %xmm10, %xmm9
+; SSE2-NEXT:    movdqa %xmm5, %xmm10
+; SSE2-NEXT:    pxor %xmm11, %xmm10
+; SSE2-NEXT:    movdqa %xmm1, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    movdqa %xmm12, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm10, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm10, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm12[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm10
+; SSE2-NEXT:    movdqa %xmm4, %xmm12
+; SSE2-NEXT:    pxor %xmm11, %xmm12
+; SSE2-NEXT:    pxor %xmm0, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, %xmm13
+; SSE2-NEXT:    pcmpgtd %xmm12, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm13[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm12, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    pand %xmm14, %xmm12
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    por %xmm12, %xmm11
+; SSE2-NEXT:    pand %xmm11, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm11
+; SSE2-NEXT:    por %xmm4, %xmm11
+; SSE2-NEXT:    pand %xmm10, %xmm5
+; SSE2-NEXT:    pandn %xmm1, %xmm10
+; SSE2-NEXT:    por %xmm5, %xmm10
+; SSE2-NEXT:    pand %xmm9, %xmm6
+; SSE2-NEXT:    pandn %xmm2, %xmm9
+; SSE2-NEXT:    por %xmm6, %xmm9
+; SSE2-NEXT:    pand %xmm8, %xmm7
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    por %xmm7, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm10, %xmm1
+; SSE2-NEXT:    movdqa %xmm9, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test159:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm7, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm3, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    movdqa %xmm6, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm2, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    movdqa %xmm5, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    movdqa %xmm1, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm4, %xmm12
+; SSE4-NEXT:    pxor %xmm0, %xmm12
+; SSE4-NEXT:    pxor %xmm8, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test159:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test159:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm6
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test159:
-; AVX512F: vpminuq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp ugt <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test160(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
-  %load.a = load <8 x i64>, <8 x i64>* %ptr.a, align 2
-  %load.b = load <8 x i64>, <8 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <8 x i64> %load.a, %load.b
-  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
-  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
+define <8 x i64> @test160(<8 x i64> %a, <8 x i64> %b) {
+; SSE2-LABEL: test160:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa %xmm7, %xmm11
+; SSE2-NEXT:    movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill
+; SSE2-NEXT:    movdqa %xmm3, %xmm7
+; SSE2-NEXT:    movdqa %xmm2, %xmm3
+; SSE2-NEXT:    movdqa %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm0, %xmm9
+; SSE2-NEXT:    movdqa {{.*#+}} xmm10 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm7, %xmm8
+; SSE2-NEXT:    pxor %xmm10, %xmm8
+; SSE2-NEXT:    movdqa %xmm11, %xmm0
+; SSE2-NEXT:    pxor %xmm10, %xmm0
+; SSE2-NEXT:    movdqa %xmm0, %xmm11
+; SSE2-NEXT:    pcmpgtd %xmm8, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm8, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pand %xmm12, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm12 = xmm11[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm12
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT:    movdqa %xmm12, %xmm8
+; SSE2-NEXT:    pxor %xmm1, %xmm8
+; SSE2-NEXT:    movdqa %xmm3, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm6, %xmm13
+; SSE2-NEXT:    pxor %xmm10, %xmm13
+; SSE2-NEXT:    movdqa %xmm13, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm13
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm13[1,1,3,3]
+; SSE2-NEXT:    pand %xmm15, %xmm11
+; SSE2-NEXT:    pshufd {{.*#+}} xmm13 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm11, %xmm13
+; SSE2-NEXT:    movdqa %xmm2, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    movdqa %xmm5, %xmm14
+; SSE2-NEXT:    pxor %xmm10, %xmm14
+; SSE2-NEXT:    movdqa %xmm14, %xmm15
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm15
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm15[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm14
+; SSE2-NEXT:    pshufd {{.*#+}} xmm15 = xmm15[1,1,3,3]
+; SSE2-NEXT:    por %xmm14, %xmm15
+; SSE2-NEXT:    movdqa %xmm9, %xmm11
+; SSE2-NEXT:    pxor %xmm10, %xmm11
+; SSE2-NEXT:    pxor %xmm4, %xmm10
+; SSE2-NEXT:    movdqa %xmm10, %xmm14
+; SSE2-NEXT:    pcmpgtd %xmm11, %xmm14
+; SSE2-NEXT:    pcmpeqd %xmm11, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm11 = xmm14[0,0,2,2]
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm10[1,1,3,3]
+; SSE2-NEXT:    pand %xmm11, %xmm0
+; SSE2-NEXT:    movdqa %xmm13, %xmm10
+; SSE2-NEXT:    pxor %xmm1, %xmm10
+; SSE2-NEXT:    pshufd {{.*#+}} xmm14 = xmm14[1,1,3,3]
+; SSE2-NEXT:    por %xmm0, %xmm14
+; SSE2-NEXT:    movdqa %xmm15, %xmm11
+; SSE2-NEXT:    pxor %xmm1, %xmm11
+; SSE2-NEXT:    pxor %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm4, %xmm14
+; SSE2-NEXT:    pandn %xmm9, %xmm1
+; SSE2-NEXT:    por %xmm14, %xmm1
+; SSE2-NEXT:    pandn %xmm5, %xmm15
+; SSE2-NEXT:    pandn %xmm2, %xmm11
+; SSE2-NEXT:    por %xmm15, %xmm11
+; SSE2-NEXT:    pandn %xmm6, %xmm13
+; SSE2-NEXT:    pandn %xmm3, %xmm10
+; SSE2-NEXT:    por %xmm13, %xmm10
+; SSE2-NEXT:    pandn -{{[0-9]+}}(%rsp), %xmm12 # 16-byte Folded Reload
+; SSE2-NEXT:    pandn %xmm7, %xmm8
+; SSE2-NEXT:    por %xmm12, %xmm8
+; SSE2-NEXT:    movdqa %xmm1, %xmm0
+; SSE2-NEXT:    movdqa %xmm11, %xmm1
+; SSE2-NEXT:    movdqa %xmm10, %xmm2
+; SSE2-NEXT:    movdqa %xmm8, %xmm3
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test160:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm8
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    movdqa %xmm7, %xmm9
+; SSE4-NEXT:    pxor %xmm0, %xmm9
+; SSE4-NEXT:    pcmpgtq %xmm10, %xmm9
+; SSE4-NEXT:    pcmpeqd %xmm12, %xmm12
+; SSE4-NEXT:    pxor %xmm12, %xmm9
+; SSE4-NEXT:    movdqa %xmm2, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    movdqa %xmm6, %xmm10
+; SSE4-NEXT:    pxor %xmm0, %xmm10
+; SSE4-NEXT:    pcmpgtq %xmm11, %xmm10
+; SSE4-NEXT:    pxor %xmm12, %xmm10
+; SSE4-NEXT:    movdqa %xmm1, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    movdqa %xmm5, %xmm11
+; SSE4-NEXT:    pxor %xmm0, %xmm11
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm11
+; SSE4-NEXT:    pxor %xmm12, %xmm11
+; SSE4-NEXT:    movdqa %xmm8, %xmm13
+; SSE4-NEXT:    pxor %xmm0, %xmm13
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm13, %xmm0
+; SSE4-NEXT:    pxor %xmm12, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm8
+; SSE4-NEXT:    movdqa %xmm11, %xmm0
+; SSE4-NEXT:    blendvpd %xmm5, %xmm1
+; SSE4-NEXT:    movdqa %xmm10, %xmm0
+; SSE4-NEXT:    blendvpd %xmm6, %xmm2
+; SSE4-NEXT:    movdqa %xmm9, %xmm0
+; SSE4-NEXT:    blendvpd %xmm7, %xmm3
+; SSE4-NEXT:    movapd %xmm8, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test160:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm6, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm8, %xmm8, %xmm8
+; AVX1-NEXT:    vpxor %xmm8, %xmm4, %xmm4
+; AVX1-NEXT:    vxorps %xmm5, %xmm1, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm3, %xmm6
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm6, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vinsertf128 $1, %xmm4, %ymm6, %ymm4
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm6, %xmm6
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm7, %xmm7
+; AVX1-NEXT:    vpcmpgtq %xmm6, %xmm7, %xmm6
+; AVX1-NEXT:    vpxor %xmm8, %xmm6, %xmm6
+; AVX1-NEXT:    vxorps %xmm5, %xmm0, %xmm7
+; AVX1-NEXT:    vxorps %xmm5, %xmm2, %xmm5
+; AVX1-NEXT:    vpcmpgtq %xmm7, %xmm5, %xmm5
+; AVX1-NEXT:    vpxor %xmm8, %xmm5, %xmm5
+; AVX1-NEXT:    vinsertf128 $1, %xmm6, %ymm5, %ymm5
+; AVX1-NEXT:    vblendvpd %ymm5, %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vblendvpd %ymm4, %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test160:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm4
+; AVX2-NEXT:    vpxor %ymm4, %ymm1, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm3, %ymm6
+; AVX2-NEXT:    vpcmpgtq %ymm5, %ymm6, %ymm5
+; AVX2-NEXT:    vpcmpeqd %ymm6, %ymm6, %ymm6
+; AVX2-NEXT:    vpxor %ymm6, %ymm5, %ymm5
+; AVX2-NEXT:    vpxor %ymm4, %ymm0, %ymm7
+; AVX2-NEXT:    vpxor %ymm4, %ymm2, %ymm4
+; AVX2-NEXT:    vpcmpgtq %ymm7, %ymm4, %ymm4
+; AVX2-NEXT:    vpxor %ymm6, %ymm4, %ymm4
+; AVX2-NEXT:    vblendvpd %ymm4, %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vblendvpd %ymm5, %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    retq
+;
 ; AVX512F-LABEL: test160:
-; AVX512F: vpminuq
+; AVX512F:       # BB#0: # %entry
+; AVX512F-NEXT:    vpminuq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+entry:
+  %cmp = icmp uge <8 x i64> %a, %b
+  %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a
+  ret <8 x i64> %sel
 }
 
-define void @test161(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test161:
-; AVX512VL: vpminsq
+define <4 x i64> @test161(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test161:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test161:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test161:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test161:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test161:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test162(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test162:
-; AVX512VL: vpminsq
+define <4 x i64> @test162(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test162:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test162:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test162:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test162:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test162:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test163(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test163:
-; AVX512VL: vpmaxsq 
+define <4 x i64> @test163(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test163:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test163:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test163:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test163:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test163:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test164(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test164:
-; AVX512VL: vpmaxsq
+define <4 x i64> @test164(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test164:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test164:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm6
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test164:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test164:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test164:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test165(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test165:
-; AVX512VL: vpminuq 
+define <4 x i64> @test165(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test165:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm0, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm2, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test165:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm4, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test165:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test165:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test165:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test166(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test166:
-; AVX512VL: vpminuq
+define <4 x i64> @test166(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test166:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test166:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm7
+; SSE4-NEXT:    pxor %xmm0, %xmm7
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test166:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test166:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test166:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test167(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test167:
-; AVX512VL: vpmaxuq
+define <4 x i64> @test167(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test167:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm4, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm2, %xmm5
+; SSE2-NEXT:    pxor %xmm4, %xmm5
+; SSE2-NEXT:    pxor %xmm0, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm0
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    por %xmm5, %xmm0
+; SSE2-NEXT:    pand %xmm6, %xmm1
+; SSE2-NEXT:    pandn %xmm3, %xmm6
+; SSE2-NEXT:    por %xmm6, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test167:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test167:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test167:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test167:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test168(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test168:
-; AVX512VL: vpmaxuq
+define <4 x i64> @test168(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test168:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    pandn %xmm2, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm1, %xmm8
+; SSE2-NEXT:    pandn %xmm3, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test168:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm4, %xmm7
+; SSE4-NEXT:    pxor %xmm0, %xmm7
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm4, %xmm2
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm3
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    movapd %xmm3, %xmm1
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test168:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test168:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test168:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %a, <4 x i64> %b
+  ret <4 x i64> %sel
 }
 
-define void @test169(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test169:
-; AVX512VL: vpmaxsq
+define <4 x i64> @test169(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test169:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test169:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test169:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test169:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test169:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test170(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test170:
-; AVX512VL: vpmaxsq
+define <4 x i64> @test170(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test170:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm1, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test170:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test170:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test170:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test170:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test171(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test171:
-; AVX512VL: vpminsq
+define <4 x i64> @test171(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test171:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test171:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test171:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test171:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test171:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test172(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test172:
-; AVX512VL: vpminsq
+define <4 x i64> @test172(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test172:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm1, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test172:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm6
+; SSE4-NEXT:    pcmpgtq %xmm4, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test172:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm4
+; AVX1-NEXT:    vpxor %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test172:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %ymm0, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test172:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test173(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test173:
-; AVX512VL: vpmaxuq
+define <4 x i64> @test173(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test173:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test173:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm4, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test173:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test173:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test173:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test174(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test174:
-; AVX512VL: vpmaxuq
+define <4 x i64> @test174(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test174:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm1, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test174:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm7
+; SSE4-NEXT:    pxor %xmm0, %xmm7
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test174:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test174:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test174:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test175(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test175:
-; AVX512VL: vpminuq
+define <4 x i64> @test175(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test175:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm3, %xmm4
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    movdqa %xmm1, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    movdqa %xmm6, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm4
+; SSE2-NEXT:    movdqa %xmm2, %xmm6
+; SSE2-NEXT:    pxor %xmm5, %xmm6
+; SSE2-NEXT:    pxor %xmm0, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm7
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm5
+; SSE2-NEXT:    por %xmm2, %xmm5
+; SSE2-NEXT:    pand %xmm4, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm5, %xmm0
+; SSE2-NEXT:    movdqa %xmm4, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test175:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm3, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm1, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm2, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    pxor %xmm4, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test175:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test175:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test175:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test176(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
-  %load.a = load <4 x i64>, <4 x i64>* %ptr.a, align 2
-  %load.b = load <4 x i64>, <4 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <4 x i64> %load.a, %load.b
-  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
-  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test176:
-; AVX512VL: vpminuq
+define <4 x i64> @test176(<4 x i64> %a, <4 x i64> %b) {
+; SSE2-LABEL: test176:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm7 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm4
+; SSE2-NEXT:    pxor %xmm7, %xmm4
+; SSE2-NEXT:    movdqa %xmm3, %xmm5
+; SSE2-NEXT:    pxor %xmm7, %xmm5
+; SSE2-NEXT:    movdqa %xmm5, %xmm6
+; SSE2-NEXT:    pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
+; SSE2-NEXT:    pand %xmm8, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm8 = xmm6[1,1,3,3]
+; SSE2-NEXT:    por %xmm4, %xmm8
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT:    movdqa %xmm8, %xmm9
+; SSE2-NEXT:    pxor %xmm4, %xmm9
+; SSE2-NEXT:    movdqa %xmm0, %xmm6
+; SSE2-NEXT:    pxor %xmm7, %xmm6
+; SSE2-NEXT:    pxor %xmm2, %xmm7
+; SSE2-NEXT:    movdqa %xmm7, %xmm5
+; SSE2-NEXT:    pcmpgtd %xmm6, %xmm5
+; SSE2-NEXT:    pshufd {{.*#+}} xmm10 = xmm5[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT:    pshufd {{.*#+}} xmm6 = xmm7[1,1,3,3]
+; SSE2-NEXT:    pand %xmm10, %xmm6
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT:    por %xmm6, %xmm5
+; SSE2-NEXT:    pxor %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm2, %xmm5
+; SSE2-NEXT:    pandn %xmm0, %xmm4
+; SSE2-NEXT:    por %xmm5, %xmm4
+; SSE2-NEXT:    pandn %xmm3, %xmm8
+; SSE2-NEXT:    pandn %xmm1, %xmm9
+; SSE2-NEXT:    por %xmm8, %xmm9
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
+; SSE2-NEXT:    movdqa %xmm9, %xmm1
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test176:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm4
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm6
+; SSE4-NEXT:    pxor %xmm0, %xmm6
+; SSE4-NEXT:    movdqa %xmm3, %xmm5
+; SSE4-NEXT:    pxor %xmm0, %xmm5
+; SSE4-NEXT:    pcmpgtq %xmm6, %xmm5
+; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
+; SSE4-NEXT:    pxor %xmm6, %xmm5
+; SSE4-NEXT:    movdqa %xmm4, %xmm7
+; SSE4-NEXT:    pxor %xmm0, %xmm7
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm7, %xmm0
+; SSE4-NEXT:    pxor %xmm6, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm4
+; SSE4-NEXT:    movdqa %xmm5, %xmm0
+; SSE4-NEXT:    blendvpd %xmm3, %xmm1
+; SSE4-NEXT:    movapd %xmm4, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test176:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vmovaps {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vxorps %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT:    vxorps %xmm3, %xmm4, %xmm4
+; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm4, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT:    vpxor %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vxorps %xmm3, %xmm0, %xmm5
+; AVX1-NEXT:    vxorps %xmm3, %xmm1, %xmm3
+; AVX1-NEXT:    vpcmpgtq %xmm5, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm4, %xmm3, %xmm3
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
+; AVX1-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test176:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm2
+; AVX2-NEXT:    vpxor %ymm2, %ymm0, %ymm3
+; AVX2-NEXT:    vpxor %ymm2, %ymm1, %ymm2
+; AVX2-NEXT:    vpcmpgtq %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm3, %ymm3
+; AVX2-NEXT:    vpxor %ymm3, %ymm2, %ymm2
+; AVX2-NEXT:    vblendvpd %ymm2, %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test176:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <4 x i64> %a, %b
+  %sel = select <4 x i1> %cmp, <4 x i64> %b, <4 x i64> %a
+  ret <4 x i64> %sel
 }
 
-define void @test177(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test177:
-; AVX512VL: vpminsq
+define <2 x i64> @test177(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test177:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test177:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test177:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test177:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test177:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test178(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test178:
-; AVX512VL: vpminsq
+define <2 x i64> @test178(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test178:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test178:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test178:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test178:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test178:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test179(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test179:
-; AVX512VL: vpmaxsq
+define <2 x i64> @test179(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test179:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test179:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test179:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test179:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test179:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test180(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test180:
-; AVX512VL: vpmaxsq
+define <2 x i64> @test180(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test180:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test180:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa %xmm1, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test180:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test180:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test180:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test181(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test181:
-; AVX512VL: vpminuq
+define <2 x i64> @test181(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test181:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test181:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm2, %xmm3
+; SSE4-NEXT:    pxor %xmm0, %xmm3
+; SSE4-NEXT:    pxor %xmm1, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test181:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test181:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test181:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test182(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test182:
-; AVX512VL: vpminuq
+define <2 x i64> @test182(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test182:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test182:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    pxor %xmm2, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test182:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test182:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test182:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test183(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test183:
-; AVX512VL: vpmaxuq
+define <2 x i64> @test183(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test183:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pand %xmm3, %xmm0
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    por %xmm3, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test183:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm3
+; SSE4-NEXT:    pxor %xmm0, %xmm3
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test183:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test183:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test183:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test184(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test184:
-; AVX512VL: vpmaxuq
+define <2 x i64> @test184(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test184:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm0, %xmm3
+; SSE2-NEXT:    pandn %xmm1, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test184:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm2, %xmm1
+; SSE4-NEXT:    movapd %xmm1, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test184:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test184:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test184:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %a, <2 x i64> %b
+  ret <2 x i64> %sel
 }
 
-define void @test185(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp slt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test185:
-; AVX512VL: vpmaxsq
+define <2 x i64> @test185(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test185:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test185:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test185:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test185:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test185:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp slt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test186(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sle <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test186:
-; AVX512VL: vpmaxsq
+define <2 x i64> @test186(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test186:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test186:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT:    pcmpeqd %xmm3, %xmm3
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test186:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test186:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test186:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sle <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test187(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sgt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test187:
-; AVX512VL: vpminsq
+define <2 x i64> @test187(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test187:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test187:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    pcmpgtq %xmm1, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test187:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test187:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test187:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sgt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test188(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp sge <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test188:
-; AVX512VL: vpminsq
+define <2 x i64> @test188(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test188:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test188:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa %xmm1, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm2, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test188:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test188:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vpcmpgtq %xmm0, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test188:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminsq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp sge <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test189(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ult <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test189:
-; AVX512VL: vpmaxuq
+define <2 x i64> @test189(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test189:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test189:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm2, %xmm3
+; SSE4-NEXT:    pxor %xmm0, %xmm3
+; SSE4-NEXT:    pxor %xmm1, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test189:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test189:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test189:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ult <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test190(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ule <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test190:
-; AVX512VL: vpmaxuq
+define <2 x i64> @test190(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test190:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test190:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    pxor %xmm2, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test190:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test190:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test190:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ule <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test191(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp ugt <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test191:
-; AVX512VL: vpminuq
+define <2 x i64> @test191(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test191:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm0, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    pand %xmm2, %xmm1
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test191:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    movdqa %xmm1, %xmm3
+; SSE4-NEXT:    pxor %xmm0, %xmm3
+; SSE4-NEXT:    pxor %xmm2, %xmm0
+; SSE4-NEXT:    pcmpgtq %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test191:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test191:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test191:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp ugt <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
 
-define void @test192(i32* nocapture %a, i32* nocapture %b) nounwind {
-vector.ph:
-  br label %vector.body
-
-vector.body:                                      ; preds = %vector.body, %vector.ph
-  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-  %gep.a = getelementptr inbounds i32, i32* %a, i64 %index
-  %gep.b = getelementptr inbounds i32, i32* %b, i64 %index
-  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
-  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
-  %load.a = load <2 x i64>, <2 x i64>* %ptr.a, align 2
-  %load.b = load <2 x i64>, <2 x i64>* %ptr.b, align 2
-  %cmp = icmp uge <2 x i64> %load.a, %load.b
-  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
-  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
-  %index.next = add i64 %index, 8
-  %loop = icmp eq i64 %index.next, 16384
-  br i1 %loop, label %for.end, label %vector.body
-
-for.end:                                          ; preds = %vector.body
-  ret void
-
-; AVX512VL-LABEL: test192:
-; AVX512VL: vpminuq
+define <2 x i64> @test192(<2 x i64> %a, <2 x i64> %b) {
+; SSE2-LABEL: test192:
+; SSE2:       # BB#0: # %entry
+; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT:    movdqa %xmm0, %xmm3
+; SSE2-NEXT:    pxor %xmm2, %xmm3
+; SSE2-NEXT:    pxor %xmm1, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm4
+; SSE2-NEXT:    pcmpgtd %xmm3, %xmm4
+; SSE2-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; SSE2-NEXT:    pand %xmm5, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; SSE2-NEXT:    por %xmm2, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    pandn %xmm1, %xmm3
+; SSE2-NEXT:    pandn %xmm0, %xmm2
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE4-LABEL: test192:
+; SSE4:       # BB#0: # %entry
+; SSE4-NEXT:    movdqa %xmm0, %xmm2
+; SSE4-NEXT:    movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    pxor %xmm1, %xmm3
+; SSE4-NEXT:    pcmpgtq %xmm0, %xmm3
+; SSE4-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE4-NEXT:    pxor %xmm3, %xmm0
+; SSE4-NEXT:    blendvpd %xmm1, %xmm2
+; SSE4-NEXT:    movapd %xmm2, %xmm0
+; SSE4-NEXT:    retq
+;
+; AVX1-LABEL: test192:
+; AVX1:       # BB#0: # %entry
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX1-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: test192:
+; AVX2:       # BB#0: # %entry
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX2-NEXT:    vpxor %xmm2, %xmm0, %xmm3
+; AVX2-NEXT:    vpxor %xmm2, %xmm1, %xmm2
+; AVX2-NEXT:    vpcmpgtq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vpcmpeqd %xmm3, %xmm3, %xmm3
+; AVX2-NEXT:    vpxor %xmm3, %xmm2, %xmm2
+; AVX2-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512BW-LABEL: test192:
+; AVX512BW:       # BB#0: # %entry
+; AVX512BW-NEXT:    vpminuq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT:    retq
+entry:
+  %cmp = icmp uge <2 x i64> %a, %b
+  %sel = select <2 x i1> %cmp, <2 x i64> %b, <2 x i64> %a
+  ret <2 x i64> %sel
 }
diff --git a/test/CodeGen/X86/vselect.ll b/test/CodeGen/X86/vselect.ll
index 8c8092888834..359ea7eb3ee5 100644
--- a/test/CodeGen/X86/vselect.ll
+++ b/test/CodeGen/X86/vselect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2,-sse4.1 < %s | FileCheck %s
 
 ; Verify that we don't emit packed vector shifts instructions if the
diff --git a/test/CodeGen/X86/vshift_scalar.ll b/test/CodeGen/X86/vshift_scalar.ll
index 9dd8478caaed..87eec3f9e97b 100644
--- a/test/CodeGen/X86/vshift_scalar.ll
+++ b/test/CodeGen/X86/vshift_scalar.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s
+; REQUIRES: default_triple
 
 ; Legalization test that requires scalarizing a vector.
 
diff --git a/test/CodeGen/X86/wide-integer-cmp.ll b/test/CodeGen/X86/wide-integer-cmp.ll
new file mode 100644
index 000000000000..c45a0541e6a7
--- /dev/null
+++ b/test/CodeGen/X86/wide-integer-cmp.ll
@@ -0,0 +1,130 @@
+; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s
+
+
+define i32 @branch_eq(i64 %a, i64 %b) {
+entry:
+  %cmp = icmp eq i64 %a, %b
+	br i1 %cmp, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 2
+
+; CHECK-LABEL: branch_eq:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: xorl 16(%esp), [[LHSHi]]
+; CHECK: xorl 12(%esp), [[LHSLo]]
+; CHECK: orl [[LHSHi]], [[LHSLo]]
+; CHECK: jne [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_slt(i64 %a, i64 %b) {
+entry:
+  %cmp = icmp slt i64 %a, %b
+	br i1 %cmp, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 2
+
+; CHECK-LABEL: branch_slt:
+; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]]
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 12(%esp), [[LHSLo]]
+; CHECK: sbbl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @branch_ule(i64 %a, i64 %b) {
+entry:
+  %cmp = icmp ule i64 %a, %b
+	br i1 %cmp, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 2
+
+; CHECK-LABEL: branch_ule:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: jb [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @set_gt(i64 %a, i64 %b) {
+entry:
+  %cmp = icmp sgt i64 %a, %b
+  %res = select i1 %cmp, i32 1, i32 0
+  ret i32 %res
+
+; CHECK-LABEL: set_gt:
+; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]]
+; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]]
+; CHECK: cmpl 4(%esp), [[RHSLo]]
+; CHECK: sbbl 8(%esp), [[RHSHi]]
+; CHECK: setl %al
+; CHECK: retl
+}
+
+define i32 @test_wide(i128 %a, i128 %b) {
+entry:
+  %cmp = icmp slt i128 %a, %b
+	br i1 %cmp, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 2
+
+; CHECK-LABEL: test_wide:
+; CHECK: cmpl 24(%esp)
+; CHECK: sbbl 28(%esp)
+; CHECK: sbbl 32(%esp)
+; CHECK: sbbl 36(%esp)
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
+
+define i32 @test_carry_false(i64 %a, i64 %b) {
+entry:
+  %x = and i64 %a, -4294967296 ;0xffffffff00000000
+  %y = and i64 %b, -4294967296
+  %cmp = icmp slt i64 %x, %y
+	br i1 %cmp, label %bb1, label %bb2
+bb1:
+  ret i32 1
+bb2:
+  ret i32 2
+
+; The comparison of the low bits will be folded to a CARRY_FALSE node. Make
+; sure the code can handle that.
+; CHECK-LABEL: carry_false:
+; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]]
+; CHECK: cmpl 16(%esp), [[LHSHi]]
+; CHECK: jge [[FALSE:.LBB[0-9_]+]]
+; CHECK: movl $1, %eax
+; CHECK: retl
+; CHECK: [[FALSE]]:
+; CHECK: movl $2, %eax
+; CHECK: retl
+}
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll
index 6f1bd7541231..fad1fa32559a 100644
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -191,9 +191,7 @@ define void @rot(%i8vec3pack* nocapture sret %result, %i8vec3pack* %X, %i8vec3pa
 ; CHECK-NEXT:    movd    %[[CONSTANT1]], %e[[R1:[abcd]]]x
 ; CHECK-NEXT:    movw    %[[R1]]x, (%[[PTR1:.*]])
 ; CHECK-NEXT:    movb    $1, 2(%[[PTR1]])
-; CHECK-NEXT:    movl    (%[[PTR0]]), [[TMP1:%e[abcd]+x]]
-; CHECK-NEXT:    movl    [[TMP1]], [[TMP2:.*]]
-; CHECK-NEXT:    pmovzxbd [[TMP2]], %[[X0:xmm[0-9]+]]
+; CHECK-NEXT:    pmovzxbd (%[[PTR0]]), %[[X0:xmm[0-9]+]]
 ; CHECK-NEXT:    movdqa  %[[X0]], %[[X1:xmm[0-9]+]]
 ; CHECK-NEXT:    psrld   $1, %[[X1]]
 ; CHECK-NEXT:    pblendw $192, %[[X0]], %[[X1]]
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll
index 302805213d06..66ba6350c8a8 100644
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s
 
 target triple = "x86_64-unknown-unknown"
diff --git a/test/CodeGen/X86/win-catchpad-csrs.ll b/test/CodeGen/X86/win-catchpad-csrs.ll
new file mode 100644
index 000000000000..327ee45b4326
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-csrs.ll
@@ -0,0 +1,268 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+declare i32 @getint()
+declare void @useints(...)
+declare void @f(i32 %p)
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_catch_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %a = call i32 @getint()
+  %b = call i32 @getint()
+  %c = call i32 @getint()
+  %d = call i32 @getint()
+  call void (...) @useints(i32 %a, i32 %b, i32 %c, i32 %d)
+  invoke void @f(i32 1)
+          to label %try.cont unwind label %catch.dispatch
+
+try.cont:
+  ret i32 0
+
+catch.dispatch:
+  %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+  %h1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  call void @f(i32 2) [ "funclet"(token %h1) ]
+  catchret from %h1 to label %try.cont
+}
+
+; X86-LABEL: _try_catch_catch:
+; X86: pushl %ebp
+; X86: movl %esp, %ebp
+; X86: pushl %ebx
+; X86: pushl %edi
+; X86: pushl %esi
+; X86: subl ${{[0-9]+}}, %esp
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _getint
+; X86: calll _useints
+; X86: movl $0, -{{[0-9]+}}(%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _f
+; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont
+; X86: popl %esi
+; X86: popl %edi
+; X86: popl %ebx
+; X86: popl %ebp
+; X86: retl
+
+; X86: [[restorebb:LBB0_[0-9]+]]:
+; X86: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X86: pushl %ebp
+; X86-NOT: pushl
+; X86: subl $16, %esp
+; X86: addl $12, %ebp
+; X86: movl $1, -{{[0-9]+}}(%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _f
+; X86: movl $[[restorebb]], %eax
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$try_catch_catch:
+; X86: $handlerMap$0$try_catch_catch:
+; X86:   .long   0
+; X86:   .long   "??_R0H@8"
+; X86:   .long   0
+; X86:   .long   "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"
+
+; X64-LABEL: try_catch_catch:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: pushq %rdi
+; X64: .seh_pushreg 7
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rsp), %rbp
+; X64: .seh_setframe 5, 32
+; X64: .seh_endprologue
+; X64: movq $-2, (%rbp)
+; X64: callq getint
+; X64: callq getint
+; X64: callq getint
+; X64: callq getint
+; X64: callq useints
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB0_[0-9]+]]: # Block address taken
+; X64-NEXT:                      # %try.cont
+; X64: addq $40, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: pushq %rdi
+; X64: .seh_pushreg 7
+; X64: pushq %rbx
+; X64: .seh_pushreg 3
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: movl $2, %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $40, %rsp
+; X64: popq %rbx
+; X64: popq %rdi
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_catch_catch:
+; X64:   .long   0
+; X64:   .long   "??_R0H@8"@IMGREL
+; X64:   .long   0
+; X64:   .long   "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64:   .long   88
+
+define i32 @try_one_csr() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %a = call i32 @getint()
+  %b = call i32 @getint()
+  call void (...) @useints(i32 %a)
+  invoke void @f(i32 1)
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+  %0 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  catchret from %0 to label %try.cont
+
+try.cont:
+  ret i32 0
+}
+
+; X64-LABEL: try_one_csr:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64-NOT: pushq
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rsp), %rbp
+; X64: .seh_setframe 5, 32
+; X64: .seh_endprologue
+; X64: callq getint
+; X64: callq getint
+; X64: callq useints
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB1_[0-9]+]]: # Block address taken
+; X64-NEXT:                      # %try.cont
+; X64: addq $40, %rsp
+; X64-NOT: popq
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_one_csr@4HA":
+; X64: LBB1_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: pushq %rsi
+; X64: .seh_pushreg 6
+; X64: subq $40, %rsp
+; X64: .seh_stackalloc 40
+; X64: leaq 32(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $40, %rsp
+; X64: popq %rsi
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_one_csr:
+; X64:   .long   0
+; X64:   .long   "??_R0H@8"@IMGREL
+; X64:   .long   0
+; X64:   .long   "?catch$[[catch1bb]]@?0?try_one_csr@4HA"@IMGREL
+; X64:   .long   72
+
+define i32 @try_no_csr() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f(i32 1)
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+  %cp1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  catchret from %cp1 to label %try.cont
+
+try.cont:
+  ret i32 0
+}
+
+; X64-LABEL: try_no_csr:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64-NOT: pushq
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB2_[0-9]+]]: # Block address taken
+; X64-NEXT:                      # %try.cont
+; X64: addq $48, %rsp
+; X64-NOT: popq
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_no_csr@4HA":
+; X64: LBB2_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: leaq [[contbb]](%rip), %rax
+; X64: addq $32, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: $handlerMap$0$try_no_csr:
+; X64:   .long   0
+; X64:   .long   "??_R0H@8"@IMGREL
+; X64:   .long   0
+; X64:   .long   "?catch$[[catch1bb]]@?0?try_no_csr@4HA"@IMGREL
+; X64:   .long   56
diff --git a/test/CodeGen/X86/win-catchpad-nested-cxx.ll b/test/CodeGen/X86/win-catchpad-nested-cxx.ll
new file mode 100644
index 000000000000..ac4598385cd1
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-nested-cxx.ll
@@ -0,0 +1,105 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s \
+; RUN:     | FileCheck --check-prefix=CHECK --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s \
+; RUN:     | FileCheck --check-prefix=CHECK --check-prefix=X64 %s
+
+; Loosely based on IR for this C++ source code:
+;   void f(int p);
+;   void try_in_catch() {
+;     try {
+;       f(1);
+;     } catch (...) {
+;       try {
+;         f(2);
+;       } catch (...) {
+;         f(3);
+;       }
+;     }
+;   }
+
+declare void @f(i32 %p)
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_in_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f(i32 1)
+          to label %try.cont unwind label %catch.dispatch.1
+try.cont:
+  ret i32 0
+
+catch.dispatch.1:
+  %cs1 = catchswitch within none [label %handler1] unwind to caller
+handler1:
+  %h1 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  invoke void @f(i32 2) [ "funclet"(token %h1) ]
+          to label %catchret1 unwind label %catch.dispatch.2
+catchret1:
+  catchret from %h1 to label %try.cont
+
+catch.dispatch.2:
+  %cs2 = catchswitch within %h1 [label %handler2] unwind to caller
+handler2:
+  %h2 = catchpad within %cs2 [i8* null, i32 64, i8* null]
+  call void @f(i32 3)
+  catchret from %h2 to label %catchret1
+}
+
+; X86-LABEL: L__ehtable$try_in_catch:
+; X64-LABEL: $cppxdata$try_in_catch:
+; CHECK-NEXT: .long   429065506
+; CHECK-NEXT: .long   4
+; CHECK-NEXT: .long   ($stateUnwindMap$try_in_catch)
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   ($tryMap$try_in_catch)
+; ip2state num + ptr
+; X86-NEXT: .long   0
+; X86-NEXT: .long   0
+; X64-NEXT: .long   7
+; X64-NEXT: .long   ($ip2state$try_in_catch)
+; unwindhelp offset
+; X64-NEXT: .long   40
+; CHECK-NEXT: .long   0
+; EHFlags
+; CHECK-NEXT: .long   1
+
+; CHECK: $tryMap$try_in_catch:
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   2
+; CHECK-NEXT: .long   3
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   ($handlerMap$0$try_in_catch)
+; CHECK-NEXT: .long   0
+; CHECK-NEXT: .long   0
+; CHECK-NEXT: .long   3
+; CHECK-NEXT: .long   1
+; CHECK-NEXT: .long   ($handlerMap$1$try_in_catch)
+
+; CHECK: $handlerMap$0$try_in_catch:
+; CHECK-NEXT:   .long   64
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .long   "?catch${{[0-9]+}}@?0?try_in_catch@4HA"
+; X64-NEXT:   .long   56
+
+; CHECK: $handlerMap$1$try_in_catch:
+; CHECK-NEXT:   .long   64
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .long   0
+; CHECK-NEXT:   .long   "?catch${{[0-9]+}}@?0?try_in_catch@4HA"
+; X64-NEXT:   .long   56
+
+; X64: $ip2state$try_in_catch:
+; X64-NEXT: .long   .Lfunc_begin0@IMGREL
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   .Ltmp0@IMGREL+1
+; X64-NEXT: .long   0
+; X64-NEXT: .long   .Ltmp1@IMGREL+1
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   "?catch$2@?0?try_in_catch@4HA"@IMGREL
+; X64-NEXT: .long   1
+; X64-NEXT: .long   .Ltmp2@IMGREL+1
+; X64-NEXT: .long   2
+; X64-NEXT: .long   .Ltmp3@IMGREL+1
+; X64-NEXT: .long   1
+; X64-NEXT: .long   "?catch$4@?0?try_in_catch@4HA"@IMGREL
+; X64-NEXT: .long   3
diff --git a/test/CodeGen/X86/win-catchpad-nested.ll b/test/CodeGen/X86/win-catchpad-nested.ll
new file mode 100644
index 000000000000..7afcd9cc1f3e
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-nested.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+
+declare void @f()
+
+define void @test1() personality void ()* @ProcessCLRException {
+entry:
+  invoke void @f()
+          to label %exit unwind label %catch.dispatch.1
+exit:
+  ret void
+
+catch.dispatch.1:
+  %cs1 = catchswitch within none [label %outer.catch] unwind to caller
+
+outer.catch:
+  %cp1 = catchpad within %cs1 [i32 1]
+  invoke void @f() [ "funclet"(token %cp1) ]
+          to label %outer.ret unwind label %catch.dispatch.2
+outer.ret:
+  catchret from %cp1 to label %exit
+
+catch.dispatch.2:
+  %cs2 = catchswitch within %cp1 [label %inner.catch] unwind to caller
+inner.catch:
+  %cp2 = catchpad within %cs2 [i32 2]
+  catchret from %cp2 to label %outer.ret
+}
+
+; Check the catchret targets
+; CHECK-LABEL: test1: # @test1
+; CHECK: [[Exit:^[^: ]+]]: # Block address taken
+; CHECK-NEXT:              # %exit
+; CHECK: [[OuterRet:^[^: ]+]]: # Block address taken
+; CHECK-NEXT:                  # %outer.ret
+; CHECK-NEXT: leaq [[Exit]](%rip), %rax
+; CHECK:      retq   # CATCHRET
+; CHECK: {{^[^: ]+}}: # %inner.catch
+; CHECK: .seh_endprolog
+; CHECK-NEXT: leaq [[OuterRet]](%rip), %rax
+; CHECK:      retq   # CATCHRET
diff --git a/test/CodeGen/X86/win-catchpad-varargs.ll b/test/CodeGen/X86/win-catchpad-varargs.ll
new file mode 100644
index 000000000000..6508f3bd7d64
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad-varargs.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s --check-prefix=X64
+; RUN: llc -mtriple=i686-windows-msvc < %s | FileCheck %s --check-prefix=X86
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare i32 @__CxxFrameHandler3(...)
+declare void @g()
+
+define i32 @f(i32 %a, ...) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %ap = alloca i8*
+  invoke void @g()
+          to label %return unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %argp.cur = load i8*, i8** %ap
+  %1 = bitcast i8* %argp.cur to i32*
+  %arg2 = load i32, i32* %1
+  call void @llvm.va_end(i8* %ap1)
+  catchret from %0 to label %return
+
+return:                                           ; preds = %entry, %catch
+  %retval.0 = phi i32 [ %arg2, %catch ], [ -1, %entry ]
+  ret i32 %retval.0
+}
+
+; X64-LABEL: .seh_proc f
+; X64: pushq %rbp
+; X64: subq $64, %rsp
+; X64: leaq 64(%rsp), %rbp
+; X64: movq $-2, -8(%rbp)
+; X64: movl    $-1, -20(%rbp) # 4-byte Folded Spill
+; X64: callq g
+; X64: .LBB0_1
+; X64: movl    -20(%rbp), %eax # 4-byte Reload
+; X64: addq $64, %rsp
+; X64: popq %rbp
+
+; X64-LABEL: "?catch${{[0-9]}}@?0?f@4HA":
+; X64: .seh_proc "?catch${{[0-9]}}@?0?f@4HA"
+; X64:         movq    %rdx, 16(%rsp)
+; X64:         pushq   %rbp
+; X64:         subq    $32, %rsp
+; X64:         leaq    64(%rdx), %rbp
+; arg2 is at RBP+40:
+; start at arg2
+; + 8 for arg1
+; + 8 for retaddr
+; + 8 for RBP
+; + 64 for stackalloc
+; - 64 for setframe
+; = 40
+; X64:         movl    24(%rbp), %eax
+; X64:         movl    %eax, -20(%rbp)  # 4-byte Spill
+; X64:         leaq    .LBB0_1(%rip), %rax
+; X64:         addq    $32, %rsp
+; X64:         popq    %rbp
+; X64:         retq                            # CATCHRET
+
+; X86-LABEL: _f:                                     # @f
+; X86:         pushl   %ebp
+; X86:         movl    %esp, %ebp
+; X86:         pushl   %ebx
+; X86:         pushl   %edi
+; X86:         pushl   %esi
+; X86:         subl    $24, %esp
+; X86: 	       movl    $-1, -36(%ebp)
+; X86:         calll   _g
+; X86: LBB0_[[retbb:[0-9]+]]:
+; X86:         movl    -36(%ebp), %eax
+; X86:         addl    $24, %esp
+; X86:         popl    %esi
+; X86:         popl    %edi
+; X86:         popl    %ebx
+; X86:         popl    %ebp
+; X86:         retl
+
+; X86: LBB0_[[restorebb:[0-9]+]]: # Block address taken
+; X86: addl $12, %ebp
+; arg2 is at EBP offset 12:
+; + 4 for arg1
+; + 4 for retaddr
+; + 4 for EBP
+; X86: movl 12(%ebp), %eax
+; X86: movl %eax, -36(%ebp)
+; X86: jmp LBB0_[[retbb]]
+
+; X86-LABEL: "?catch${{[0-9]}}@?0?f@4HA":
+; X86:         pushl   %ebp
+; X86:         addl    $12, %ebp
+; Done due to mov %esp, %ebp
+; X86:         leal    12(%ebp), %eax
+; X86:         movl    $LBB0_[[restorebb]], %eax
+; X86:         popl    %ebp
+; X86:         retl                            # CATCHRET
diff --git a/test/CodeGen/X86/win-catchpad.ll b/test/CodeGen/X86/win-catchpad.ll
new file mode 100644
index 000000000000..836c53bda8e6
--- /dev/null
+++ b/test/CodeGen/X86/win-catchpad.ll
@@ -0,0 +1,353 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+; Loosely based on IR for this C++ source code:
+;   void f(int p);
+;   int main() {
+;     try {
+;       f(1);
+;     } catch (int e) {
+;       f(e);
+;     } catch (...) {
+;       f(3);
+;     }
+;   }
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i8*, i32, i32, i32, i32, i8* }
+%eh.CatchableTypeArray.1 = type { i32, [1 x %eh.CatchableType*] }
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+
+$"\01??_R0H@8" = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+
+
+declare void @f(i32 %p, i32* %l)
+declare i1 @getbool()
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @try_catch_catch() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %e.addr = alloca i32
+  %local = alloca i32
+  invoke void @f(i32 1, i32* %local)
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs = catchswitch within none [label %handler1, label %handler2] unwind to caller
+
+handler1:
+  %h1 = catchpad within %cs [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %e.addr]
+  %e = load i32, i32* %e.addr
+  call void @f(i32 %e, i32* %local) [ "funclet"(token %h1) ]
+  catchret from %h1 to label %try.cont
+
+handler2:
+  %h2 = catchpad within %cs [i8* null, i32 64, i8* null]
+  call void @f(i32 3, i32* %local) [ "funclet"(token %h2) ]
+  catchret from %h2 to label %try.cont
+
+try.cont:
+  ret i32 0
+}
+
+; X86-LABEL: _try_catch_catch:
+; X86: movl %esp, -[[sp_offset:[0-9]+]](%ebp)
+; X86: movl $0, -{{[0-9]+}}(%ebp)
+; X86: leal -[[local_offs:[0-9]+]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl $1, (%esp)
+; X86: calll _f
+; X86: [[contbb:LBB0_[0-9]+]]: # %try.cont
+; X86: retl
+
+; X86: [[restorebb1:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT:                        # %handler1
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; FIXME: These should be de-duplicated.
+; X86: [[restorebb2:LBB0_[0-9]+]]: # Block address taken
+; X86-NEXT:                        # %handler2
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: movl %esp, -[[sp_offset]](%ebp)
+; X86-DAG: movl -32(%ebp), %[[e_reg:[a-z]+]]
+; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl $1, -{{[0-9]+}}(%ebp)
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl %[[e_reg]], (%esp)
+; X86: calll _f
+; X86-NEXT: movl $[[restorebb1]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X86: LBB0_[[catch2bb]]: # %handler2{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: movl %esp, -[[sp_offset]](%ebp)
+; X86-DAG: leal -[[local_offs]](%ebp), %[[addr_reg:[a-z]+]]
+; X86-DAG: movl $1, -{{[0-9]+}}(%ebp)
+; X86-DAG: movl %[[addr_reg]], 4(%esp)
+; X86-DAG: movl $3, (%esp)
+; X86: calll _f
+; X86-NEXT: movl $[[restorebb2]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$try_catch_catch:
+; X86: $handlerMap$0$try_catch_catch:
+; X86-NEXT:   .long   0
+; X86-NEXT:   .long   "??_R0H@8"
+; X86-NEXT:   .long   -20
+; X86-NEXT:   .long   "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"
+; X86-NEXT:   .long   64
+; X86-NEXT:   .long   0
+; X86-NEXT:   .long   0
+; X86-NEXT:   .long   "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"
+
+; X64-LABEL: try_catch_catch:
+; X64: Lfunc_begin0:
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: movq $-2, -8(%rbp)
+; X64: .Ltmp0
+; X64-DAG: leaq -[[local_offs:[0-9]+]](%rbp), %rdx
+; X64-DAG: movl $1, %ecx
+; X64: callq f
+; X64: [[contbb:\.LBB0_[0-9]+]]: # Block address taken
+; X64-NEXT:                      # %try.cont
+; X64: addq $48, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catch1bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch1bb]]: # %handler1{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
+; X64-DAG: movl -12(%rbp), %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64: "?catch$[[catch2bb:[0-9]+]]@?0?try_catch_catch@4HA":
+; X64: LBB0_[[catch2bb]]: # %handler2{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64-DAG: leaq -[[local_offs]](%rbp), %rdx
+; X64-DAG: movl $3, %ecx
+; X64: callq f
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64: $cppxdata$try_catch_catch:
+; X64-NEXT: .long   429065506
+; X64-NEXT: .long   2
+; X64-NEXT: .long   ($stateUnwindMap$try_catch_catch)@IMGREL
+; X64-NEXT: .long   1
+; X64-NEXT: .long   ($tryMap$try_catch_catch)@IMGREL
+; X64-NEXT: .long   5
+; X64-NEXT: .long   ($ip2state$try_catch_catch)@IMGREL
+; X64-NEXT: .long   40
+; X64-NEXT: .long   0
+; X64-NEXT: .long   1
+
+; X64: $tryMap$try_catch_catch:
+; X64-NEXT: .long   0
+; X64-NEXT: .long   0
+; X64-NEXT: .long   1
+; X64-NEXT: .long   2
+; X64-NEXT: .long   ($handlerMap$0$try_catch_catch)@IMGREL
+
+; X64: $handlerMap$0$try_catch_catch:
+; X64-NEXT:   .long   0
+; X64-NEXT:   .long   "??_R0H@8"@IMGREL
+; X64-NEXT:   .long   36
+; X64-NEXT:   .long   "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT:   .long   56
+; X64-NEXT:   .long   64
+; X64-NEXT:   .long   0
+; X64-NEXT:   .long   0
+; X64-NEXT:   .long   "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT:   .long   56
+
+; X64: $ip2state$try_catch_catch:
+; X64-NEXT: .long   .Lfunc_begin0@IMGREL
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   .Ltmp0@IMGREL+1
+; X64-NEXT: .long   0
+; X64-NEXT: .long   .Ltmp1@IMGREL+1
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   "?catch$[[catch1bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long   1
+; X64-NEXT: .long   "?catch$[[catch2bb]]@?0?try_catch_catch@4HA"@IMGREL
+; X64-NEXT: .long   1
+
+
+define i32 @branch_to_normal_dest() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @f(i32 1, i32* null)
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp1 = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  br label %loop
+
+loop:
+  %V = call i1 @getbool() [ "funclet"(token %cp1) ]
+  br i1 %V, label %loop, label %catch.done
+
+catch.done:
+  catchret from %cp1 to label %try.cont
+
+try.cont:
+  ret i32 0
+}
+
+; X86-LABEL: _branch_to_normal_dest:
+; X86: calll _f
+
+; X86: [[contbb:LBB1_[0-9]+]]: # %try.cont
+; X86: retl
+
+; X86: [[restorebb:LBB1_[0-9]+]]: # Block address taken
+; X86-NEXT:                       # %catch.done
+; X86-NEXT: addl $12, %ebp
+; X86: jmp [[contbb]]
+
+; X86: "?catch$[[catchbb:[0-9]+]]@?0?branch_to_normal_dest@4HA":
+; X86: LBB1_[[catchbb]]: # %catch{{$}}
+; X86: pushl %ebp
+; X86: subl $8, %esp
+; X86: addl $12, %ebp
+; X86: LBB1_[[loopbb:[0-9]+]]: # %loop
+; X86: movl    $1, -16(%ebp)
+; X86: calll   _getbool
+; X86: testb   $1, %al
+; X86: jne LBB1_[[loopbb]]
+; X86: # %catch.done
+; X86-NEXT: movl $[[restorebb]], %eax
+; X86-NEXT: addl $8, %esp
+; X86-NEXT: popl %ebp
+; X86-NEXT: retl
+
+; X86: L__ehtable$branch_to_normal_dest:
+; X86: $handlerMap$0$branch_to_normal_dest:
+; X86-NEXT:   .long   64
+; X86-NEXT:   .long   0
+; X86-NEXT:   .long   0
+; X86-NEXT:   .long   "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"
+
+; X64-LABEL: branch_to_normal_dest:
+; X64: # %entry
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $48, %rsp
+; X64: .seh_stackalloc 48
+; X64: leaq 48(%rsp), %rbp
+; X64: .seh_setframe 5, 48
+; X64: .seh_endprologue
+; X64: .Ltmp[[before_call:[0-9]+]]:
+; X64: callq f
+; X64: .Ltmp[[after_call:[0-9]+]]:
+; X64: [[contbb:\.LBB1_[0-9]+]]: # Block address taken
+; X64-NEXT:                      # %try.cont
+; X64: addq $48, %rsp
+; X64: popq %rbp
+; X64: retq
+
+; X64: "?catch$[[catchbb:[0-9]+]]@?0?branch_to_normal_dest@4HA":
+; X64: LBB1_[[catchbb]]: # %catch{{$}}
+; X64: movq %rdx, 16(%rsp)
+; X64: pushq %rbp
+; X64: .seh_pushreg 5
+; X64: subq $32, %rsp
+; X64: .seh_stackalloc 32
+; X64: leaq 48(%rdx), %rbp
+; X64: .seh_endprologue
+; X64: .LBB1_[[normal_dest_bb:[0-9]+]]: # %loop
+; X64: callq   getbool
+; X64: testb   $1, %al
+; X64: jne     .LBB1_[[normal_dest_bb]]
+; X64: # %catch.done
+; X64: leaq [[contbb]](%rip), %rax
+; X64-NEXT: addq $32, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+
+; X64-LABEL: $cppxdata$branch_to_normal_dest:
+; X64-NEXT: .long   429065506
+; X64-NEXT: .long   2
+; X64-NEXT: .long   ($stateUnwindMap$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long   1
+; X64-NEXT: .long   ($tryMap$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long   4
+; X64-NEXT: .long   ($ip2state$branch_to_normal_dest)@IMGREL
+; X64-NEXT: .long   40
+; X64-NEXT: .long   0
+; X64-NEXT: .long   1
+
+; X64-LABEL: $stateUnwindMap$branch_to_normal_dest:
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   0
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   0
+
+; X64-LABEL: $tryMap$branch_to_normal_dest:
+; X64-NEXT: .long   0
+; X64-NEXT: .long   0
+; X64-NEXT: .long   1
+; X64-NEXT: .long   1
+; X64-NEXT: .long   ($handlerMap$0$branch_to_normal_dest)@IMGREL
+
+; X64-LABEL: $handlerMap$0$branch_to_normal_dest:
+; X64-NEXT: .long   64
+; X64-NEXT: .long   0
+; X64-NEXT: .long   0
+; X64-NEXT: .long   "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"@IMGREL
+; X64-NEXT: .long   56
+
+; X64-LABEL: $ip2state$branch_to_normal_dest:
+; X64-NEXT: .long   .Lfunc_begin1@IMGREL
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   .Ltmp[[before_call]]@IMGREL+1
+; X64-NEXT: .long   0
+; X64-NEXT: .long   .Ltmp[[after_call]]@IMGREL+1
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   "?catch$[[catchbb]]@?0?branch_to_normal_dest@4HA"@IMGREL
+; X64-NEXT: .long   1
diff --git a/test/CodeGen/X86/win-cleanuppad.ll b/test/CodeGen/X86/win-cleanuppad.ll
new file mode 100644
index 000000000000..4b0a543a876a
--- /dev/null
+++ b/test/CodeGen/X86/win-cleanuppad.ll
@@ -0,0 +1,199 @@
+; RUN: llc -verify-machineinstrs -mtriple=i686-pc-windows-msvc < %s | FileCheck --check-prefix=X86 %s
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc < %s | FileCheck --check-prefix=X64 %s
+
+%struct.Dtor = type { i8 }
+
+define void @simple_cleanup() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %o = alloca %struct.Dtor, align 1
+  invoke void @f(i32 1)
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o) #2
+  ret void
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o) #2 [ "funclet"(token %0) ]
+  cleanupret from %0 unwind to caller
+}
+
+; CHECK: simple_cleanup:                         # @simple_cleanup
+; CHECK:         pushq   %rbp
+; CHECK:         subq    $48, %rsp
+; CHECK:         leaq    48(%rsp), %rbp
+; CHECK:         movq    $-2, -8(%rbp)
+; CHECK:         movl    $1, %ecx
+; CHECK:         callq   f
+; CHECK:         callq   "??1Dtor@@QAE@XZ"
+; CHECK:         nop
+; CHECK:         addq    $48, %rsp
+; CHECK:         popq    %rbp
+; CHECK:         retq
+
+; CHECK: "?dtor$2@?0?simple_cleanup@4HA":
+; CHECK:         callq   "??1Dtor@@QAE@XZ"
+; CHECK:         retq
+
+; CHECK: $cppxdata$simple_cleanup:
+; CHECK-NEXT:         .long   429065506
+; CHECK-NEXT:         .long   1
+; CHECK-NEXT:         .long   ($stateUnwindMap$simple_cleanup)@IMGREL
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   3
+; CHECK-NEXT:         .long   ($ip2state$simple_cleanup)@IMGREL
+; UnwindHelp offset should match the -2 store above
+; CHECK-NEXT:         .long   40
+; CHECK-NEXT:         .long   0
+; CHECK-NEXT:         .long   1
+
+declare void @f(i32) #0
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+declare x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor*) #1
+
+define void @nested_cleanup() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %o1 = alloca %struct.Dtor, align 1
+  %o2 = alloca %struct.Dtor, align 1
+  invoke void @f(i32 1)
+          to label %invoke.cont unwind label %cleanup.outer
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @f(i32 2)
+          to label %invoke.cont.1 unwind label %cleanup.inner
+
+invoke.cont.1:                                    ; preds = %invoke.cont
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o2) #2
+  invoke void @f(i32 3)
+          to label %invoke.cont.2 unwind label %cleanup.outer
+
+invoke.cont.2:                                    ; preds = %invoke.cont.1
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o1) #2
+  ret void
+
+cleanup.inner:                                        ; preds = %invoke.cont
+  %0 = cleanuppad within none []
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o2) #2 [ "funclet"(token %0) ]
+  cleanupret from %0 unwind label %cleanup.outer
+
+cleanup.outer:                                      ; preds = %invoke.cont.1, %cleanup.inner, %entry
+  %1 = cleanuppad within none []
+  call x86_thiscallcc void @"\01??1Dtor@@QAE@XZ"(%struct.Dtor* %o1) #2 [ "funclet"(token %1) ]
+  cleanupret from %1 unwind to caller
+}
+
+; X86-LABEL: _nested_cleanup:
+; X86: movl    $1, (%esp)
+; X86: calll   _f
+; X86: movl    $2, (%esp)
+; X86: calll   _f
+; X86: movl    $3, (%esp)
+; X86: calll   _f
+
+; X86: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA":
+; X86: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}}
+; X86: pushl %ebp
+; X86: leal    {{.*}}(%ebp), %ecx
+; X86: calll   "??1Dtor@@QAE@XZ"
+; X86: popl %ebp
+; X86: retl
+
+; X86: "?dtor$[[cleanup_outer:[0-9]+]]@?0?nested_cleanup@4HA":
+; X86: LBB1_[[cleanup_outer]]: # %cleanup.outer{{$}}
+; X86: pushl %ebp
+; X86: leal    {{.*}}(%ebp), %ecx
+; X86: calll   "??1Dtor@@QAE@XZ"
+; X86: popl %ebp
+; X86: retl
+
+; X86: L__ehtable$nested_cleanup:
+; X86:         .long   429065506
+; X86:         .long   2
+; X86:         .long   ($stateUnwindMap$nested_cleanup)
+; X86:         .long   0
+; X86:         .long   0
+; X86:         .long   0
+; X86:         .long   0
+; X86:         .long   0
+; X86:         .long   1
+; X86: $stateUnwindMap$nested_cleanup:
+; X86:         .long   -1
+; X86:         .long   "?dtor$[[cleanup_outer]]@?0?nested_cleanup@4HA"
+; X86:         .long   0
+; X86:         .long   "?dtor$[[cleanup_inner]]@?0?nested_cleanup@4HA"
+
+; X64-LABEL: nested_cleanup:
+; X64: .Lfunc_begin1:
+; X64: .Ltmp13:
+; X64: movl    $1, %ecx
+; X64: callq   f
+; X64: .Ltmp15:
+; X64: movl    $2, %ecx
+; X64: callq   f
+; X64: .Ltmp16:
+; X64: callq   "??1Dtor@@QAE@XZ"
+; X64: .Ltmp17:
+; X64: movl    $3, %ecx
+; X64: callq   f
+; X64: .Ltmp18:
+
+; X64: "?dtor$[[cleanup_inner:[0-9]+]]@?0?nested_cleanup@4HA":
+; X64: LBB1_[[cleanup_inner]]: # %cleanup.inner{{$}}
+; X64: pushq %rbp
+; X64: leaq    {{.*}}(%rbp), %rcx
+; X64: callq   "??1Dtor@@QAE@XZ"
+; X64: popq %rbp
+; X64: retq
+
+; X64:        .seh_handlerdata
+; X64:        .text
+; X64:        .seh_endproc
+
+; X64: "?dtor$[[cleanup_outer:[0-9]+]]@?0?nested_cleanup@4HA":
+; X64: LBB1_[[cleanup_outer]]: # %cleanup.outer{{$}}
+; X64: pushq %rbp
+; X64: leaq    {{.*}}(%rbp), %rcx
+; X64: callq   "??1Dtor@@QAE@XZ"
+; X64: popq %rbp
+; X64: retq
+
+; X64:        .section .xdata,"dr"
+; X64-NEXT: .align  4
+; X64: $cppxdata$nested_cleanup:
+; X64-NEXT: .long   429065506
+; X64-NEXT: .long   2
+; X64-NEXT: .long   ($stateUnwindMap$nested_cleanup)@IMGREL
+; X64-NEXT: .long   0
+; X64-NEXT: .long   0
+; X64-NEXT: .long   5
+; X64-NEXT: .long   ($ip2state$nested_cleanup)@IMGREL
+; X64-NEXT: .long   56
+; X64-NEXT: .long   0
+; X64-NEXT: .long   1
+
+; X64: $stateUnwindMap$nested_cleanup:
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   "?dtor$[[cleanup_outer]]@?0?nested_cleanup@4HA"@IMGREL
+; X64-NEXT: .long   0
+; X64-NEXT: .long   "?dtor$[[cleanup_inner]]@?0?nested_cleanup@4HA"@IMGREL
+
+; X64: $ip2state$nested_cleanup:
+; X64-NEXT: .long   .Lfunc_begin1@IMGREL
+; X64-NEXT: .long   -1
+; X64-NEXT: .long   .Ltmp13@IMGREL
+; X64-NEXT: .long   0
+; X64-NEXT: .long   .Ltmp15@IMGREL
+; X64-NEXT: .long   1
+; X64-NEXT: .long   .Ltmp17@IMGREL
+; X64-NEXT: .long   0
+; X64-NEXT: .long   .Ltmp18@IMGREL+1
+; X64-NEXT: .long   -1
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/CodeGen/X86/win-funclet-cfi.ll b/test/CodeGen/X86/win-funclet-cfi.ll
new file mode 100644
index 000000000000..2151cdc7bb4b
--- /dev/null
+++ b/test/CodeGen/X86/win-funclet-cfi.ll
@@ -0,0 +1,95 @@
+; RUN: llc -mtriple=x86_64-windows-msvc < %s | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @"\01?f@@YAXXZ"(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  invoke void @g()
+          to label %unreachable unwind label %cleanupblock
+
+cleanupblock:
+  %cleanp = cleanuppad within none []
+  call void @g() [ "funclet"(token %cleanp) ]
+  cleanupret from %cleanp unwind label %catch.dispatch
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  call void @g() [ "funclet"(token %cp) ]
+  catchret from %cp to label %try.cont
+
+try.cont:
+  ret void
+
+unreachable:
+  unreachable
+}
+
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Destructors need CFI but they shouldn't use the .seh_handler directive.
+; CHECK: "?dtor$[[cleanup:[0-9]+]]@?0??f@@YAXXZ@4HA":
+; CHECK: .seh_proc "?dtor$[[cleanup]]@?0??f@@YAXXZ@4HA"
+; CHECK-NOT: .seh_handler __CxxFrameHandler3
+; CHECK: LBB0_[[cleanup]]: # %cleanupblock{{$}}
+
+; Emit CFI for pushing RBP.
+; CHECK: movq    %rdx, 16(%rsp)
+; CHECK: pushq   %rbp
+; CHECK: .seh_pushreg 5
+
+; Emit CFI for allocating from the stack pointer.
+; CHECK: subq    $32, %rsp
+; CHECK: .seh_stackalloc 32
+
+; CHECK: leaq    48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
+
+; Prologue is done, emit the .seh_endprologue directive.
+; CHECK: .seh_endprologue
+
+; Make sure there is a nop after a call if the call precedes the epilogue.
+; CHECK: callq g
+; CHECK-NEXT: nop
+
+; Don't emit a reference to the LSDA.
+; CHECK: .seh_handlerdata
+; CHECK-NOT:  .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .text
+; CHECK: .seh_endproc
+
+; CHECK: "?catch$[[catch:[0-9]+]]@?0??f@@YAXXZ@4HA":
+; CHECK: .seh_proc "?catch$[[catch]]@?0??f@@YAXXZ@4HA"
+; CHECK-NEXT: .seh_handler __CxxFrameHandler3, @unwind, @except
+; CHECK: LBB0_[[catch]]: # %catch{{$}}
+
+; Emit CFI for pushing RBP.
+; CHECK: movq    %rdx, 16(%rsp)
+; CHECK: pushq   %rbp
+; CHECK: .seh_pushreg 5
+
+; Emit CFI for allocating from the stack pointer.
+; CHECK: subq    $32, %rsp
+; CHECK: .seh_stackalloc 32
+
+; CHECK: leaq    48(%rdx), %rbp
+; CHECK-NOT: .seh_setframe
+
+; Prologue is done, emit the .seh_endprologue directive.
+; CHECK: .seh_endprologue
+
+; Make sure there is at least one instruction after a call before the epilogue.
+; CHECK: callq g
+; CHECK-NEXT: leaq    .LBB0_{{[0-9]+}}(%rip), %rax
+
+; Emit a reference to the LSDA.
+; CHECK: .seh_handlerdata
+; CHECK-NEXT:  .long   ("$cppxdata$?f@@YAXXZ")@IMGREL
+; CHECK-NEXT: .text
+; CHECK: .seh_endproc
diff --git a/test/CodeGen/X86/win_eh_prepare.ll b/test/CodeGen/X86/win-mixed-ehpersonality.ll
similarity index 55%
rename from test/CodeGen/X86/win_eh_prepare.ll
rename to test/CodeGen/X86/win-mixed-ehpersonality.ll
index 3e3f9af05822..f7b6d0702ebe 100644
--- a/test/CodeGen/X86/win_eh_prepare.ll
+++ b/test/CodeGen/X86/win-mixed-ehpersonality.ll
@@ -1,6 +1,4 @@
-; RUN: opt -S -winehprepare -dwarfehprepare -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
-
-; FIXME: Add and test outlining here.
+; RUN: llc -mtriple x86_64-pc-windows-msvc < %s | FileCheck %s
 
 declare void @maybe_throw()
 
@@ -20,19 +18,13 @@ cont:
   ret i32 0
 
 lpad:
-  %ehvals = landingpad { i8*, i32 }
-      cleanup
-      catch i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*)
-  %ehsel = extractvalue { i8*, i32 } %ehvals, 1
-  %filt_g_sel = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*))
-  %matches = icmp eq i32 %ehsel, %filt_g_sel
-  br i1 %matches, label %ret1, label %eh.resume
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  %p = catchpad within %cs [i8* bitcast (i32 (i8*, i8*)* @filt_g to i8*)]
+  catchret from %p to label %ret1
 
 ret1:
   ret i32 1
-
-eh.resume:
-  resume { i8*, i32 } %ehvals
 }
 
 define internal i32 @filt_g(i8*, i8*) {
@@ -40,14 +32,14 @@ define internal i32 @filt_g(i8*, i8*) {
   ret i32 %g
 }
 
-; CHECK-LABEL: define i32 @use_seh()
-; CHECK: invoke void @maybe_throw()
-; CHECK-NEXT: to label %cont unwind label %lpad
-; CHECK: landingpad
-; CHECK-NEXT: cleanup
-; CHECK-NEXT: catch
-; CHECK-NEXT: call i8* (...) @llvm.eh.actions({{.*}})
-
+; CHECK-LABEL: use_seh:
+; CHECK: callq maybe_throw
+; CHECK: xorl %eax, %eax
+; CHECK: .LBB0_[[epilogue:[0-9]+]]
+; CHECK: retq
+; CHECK: # %catch{{$}}
+; CHECK: movl $1, %eax
+; CHECK: jmp .LBB0_[[epilogue]]
 
 ; A MinGW64-ish EH style. It could happen if a binary uses both MSVC CRT and
 ; mingw CRT and is linked with LTO.
@@ -75,8 +67,15 @@ eh.resume:
   resume { i8*, i32 } %ehvals
 }
 
-; CHECK-LABEL: define i32 @use_gcc()
-; CHECK: invoke void @maybe_throw()
-; CHECK-NEXT: to label %cont unwind label %lpad
-; CHECK: eh.resume:
-; CHECK: call void @_Unwind_Resume(i8* %exn.obj)
+; CHECK-LABEL: use_gcc:
+; CHECK: callq maybe_throw
+; CHECK: xorl %eax, %eax
+;
+; CHECK: # %lpad
+; CHECK: cmpl $2, %edx
+; CHECK: jne
+;
+; CHECK: # %ret1
+; CHECK: movl $1, %eax
+;
+; CHECK: callq _Unwind_Resume
diff --git a/test/CodeGen/X86/win32-eh-states.ll b/test/CodeGen/X86/win32-eh-states.ll
index 0aae8c4d0189..2777d6644e6a 100644
--- a/test/CodeGen/X86/win32-eh-states.ll
+++ b/test/CodeGen/X86/win32-eh-states.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=i686-pc-windows-msvc < %s | FileCheck %s
+; RUN: llc -mtriple=i686-pc-windows-msvc   < %s | FileCheck %s --check-prefix=X86
+; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s | FileCheck %s --check-prefix=X64
 
 ; Based on this source:
 ; extern "C" void may_throw(int);
@@ -33,82 +34,174 @@ $"\01??_R0H@8" = comdat any
 define void @f() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
 entry:
   invoke void @may_throw(i32 1)
-          to label %invoke.cont unwind label %lpad
+          to label %invoke.cont unwind label %lpad.1
 
 invoke.cont:                                      ; preds = %entry
   invoke void @may_throw(i32 2)
-          to label %try.cont.9 unwind label %lpad.1
+          to label %try.cont.9 unwind label %lpad
 
 try.cont.9:                                       ; preds = %invoke.cont.3, %invoke.cont, %catch.7
-  ; FIXME: Something about our CFG breaks TailDuplication. This empy asm blocks
-  ; it so we can focus on testing the state numbering.
-  call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"()
   ret void
 
 lpad:                                             ; preds = %catch, %entry
-  %0 = landingpad { i8*, i32 }
-          catch %eh.CatchHandlerType* @llvm.eh.handlertype.H.0
-  %1 = extractvalue { i8*, i32 } %0, 0
-  %2 = extractvalue { i8*, i32 } %0, 1
-  br label %catch.dispatch.4
-
-lpad.1:                                           ; preds = %invoke.cont
-  %3 = landingpad { i8*, i32 }
-          catch i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)
-  %4 = extractvalue { i8*, i32 } %3, 0
-  %5 = extractvalue { i8*, i32 } %3, 1
-  %6 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
-  %matches = icmp eq i32 %5, %6
-  br i1 %matches, label %catch, label %catch.dispatch.4
-
-catch.dispatch.4:                                 ; preds = %lpad.1, %lpad
-  %exn.slot.0 = phi i8* [ %4, %lpad.1 ], [ %1, %lpad ]
-  %ehselector.slot.0 = phi i32 [ %5, %lpad.1 ], [ %2, %lpad ]
-  %.pre = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.H.0 to i8*)) #3
-  %matches6 = icmp eq i32 %ehselector.slot.0, %.pre
-  br i1 %matches6, label %catch.7, label %eh.resume
-
-catch.7:                                          ; preds = %catch.dispatch.4
-  tail call void @llvm.eh.begincatch(i8* %exn.slot.0, i8* null) #3
-  tail call void @may_throw(i32 4)
-  tail call void @llvm.eh.endcatch() #3
-  br label %try.cont.9
+  %cs1 = catchswitch within none [label %catch] unwind label %lpad.1
 
 catch:                                            ; preds = %lpad.1
-  tail call void @llvm.eh.begincatch(i8* %4, i8* null) #3
-  invoke void @may_throw(i32 3)
-          to label %invoke.cont.3 unwind label %lpad
+  %p1 = catchpad within %cs1 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  invoke void @may_throw(i32 3) [ "funclet"(token %p1) ]
+          to label %invoke.cont.3 unwind label %lpad.1
 
 invoke.cont.3:                                    ; preds = %catch
-  tail call void @llvm.eh.endcatch() #3
-  br label %try.cont.9
+  catchret from %p1 to label %try.cont.9
 
-eh.resume:                                        ; preds = %catch.dispatch.4
-  %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
-  %lpad.val.12 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
-  resume { i8*, i32 } %lpad.val.12
+lpad.1:                                           ; preds = %invoke.cont
+  %cs2 = catchswitch within none [label %catch.7] unwind to caller
+
+catch.7:
+  %p2 = catchpad within %cs2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+  call void @may_throw(i32 4) [ "funclet"(token %p2) ]
+  catchret from %p2 to label %try.cont.9
 }
 
-; CHECK-LABEL: _f:
-; CHECK: movl $-1, [[state:[-0-9]+]](%ebp)
-; CHECK: movl $___ehhandler$f, {{.*}}
+; X86-LABEL: _f:
+; X86: movl $-1, [[state:[-0-9]+]](%ebp)
+; X86: movl $___ehhandler$f, {{.*}}
 ;
-; CHECK: movl $0, [[state]](%ebp)
-; CHECK: movl $1, (%esp)
-; CHECK: calll _may_throw
+; X86: movl $0, [[state]](%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _may_throw
 ;
-; CHECK: movl $1, [[state]](%ebp)
-; CHECK: movl $2, (%esp)
-; CHECK: calll _may_throw
+; X86: movl $1, [[state]](%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $3, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $3, [[state]](%ebp)
+; X86: movl $4, (%esp)
+; X86: calll _may_throw
 
-; CHECK-LABEL: _f.catch:
-; CHECK: movl $4, Lf$frame_escape_{{[0-9]+.*}}
-; CHECK: movl $4, (%esp)
-; CHECK: calll _may_throw
 
-; CHECK-LABEL: _f.catch.1:
-; CHECK: movl $3, Lf$frame_escape_{{[0-9]+.*}}
-; CHECK: movl $3, (%esp)
-; CHECK: calll _may_throw
+; X64-LABEL: f:
+; X64-LABEL: $ip2state$f:
+; X64-NEXT:   .long .Lfunc_begin0@IMGREL
+; X64-NEXT:   .long -1
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long 0
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long 1
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long -1
+; X64-NEXT:   .long "?catch${{.*}}@?0?f@4HA"@IMGREL
+; X64-NEXT:   .long 2
+; X64-NEXT:   .long "?catch${{.*}}@?0?f@4HA"@IMGREL
+; X64-NEXT:   .long 3
 
-; CHECK: .safeseh ___ehhandler$f
+; Based on this source:
+; extern "C" void may_throw(int);
+; struct S { ~S(); };
+; void g() {
+;   S x;
+;   try {
+;     may_throw(-1);
+;   } catch (...) {
+;     may_throw(0);
+;     {
+;       S y;
+;       may_throw(1);
+;     }
+;     may_throw(2);
+;   }
+; }
+
+%struct.S = type { i8 }
+declare void @"\01??1S@@QEAA@XZ"(%struct.S*)
+
+define void @g() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %x = alloca %struct.S, align 1
+  %y = alloca %struct.S, align 1
+  invoke void @may_throw(i32 -1)
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %0 = catchswitch within none [label %catch] unwind label %ehcleanup5
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+  invoke void @may_throw(i32 0) [ "funclet"(token %1) ]
+          to label %invoke.cont unwind label %ehcleanup5
+
+invoke.cont:                                      ; preds = %catch
+  invoke void @may_throw(i32 1) [ "funclet"(token %1) ]
+          to label %invoke.cont2 unwind label %ehcleanup
+
+invoke.cont2:                                     ; preds = %invoke.cont
+  invoke void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %y) [ "funclet"(token %1) ]
+          to label %invoke.cont3 unwind label %ehcleanup5
+
+invoke.cont3:                                     ; preds = %invoke.cont2
+  invoke void @may_throw(i32 2) [ "funclet"(token %1) ]
+          to label %invoke.cont4 unwind label %ehcleanup5
+
+invoke.cont4:                                     ; preds = %invoke.cont3
+  catchret from %1 to label %try.cont
+
+try.cont:                                         ; preds = %invoke.cont4
+  call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %x)
+  ret void
+
+ehcleanup:                                        ; preds = %invoke.cont
+  %2 = cleanuppad within %1 []
+  call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %y) [ "funclet"(token %2) ]
+  cleanupret from %2 unwind label %ehcleanup5
+
+ehcleanup5:                                       ; preds = %invoke.cont2, %invoke.cont3, %ehcleanup, %catch, %catch.dispatch
+  %3 = cleanuppad within none []
+  call void @"\01??1S@@QEAA@XZ"(%struct.S* nonnull %x) [ "funclet"(token %3) ]
+  cleanupret from %3 unwind to caller
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+; X86-LABEL: _g:
+; X86: movl $-1, [[state:[-0-9]+]](%ebp)
+; X86: movl $___ehhandler$g, {{.*}}
+;
+; X86: movl $1, [[state]](%ebp)
+; X86: movl $-1, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $0, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $3, [[state]](%ebp)
+; X86: movl $1, (%esp)
+; X86: calll _may_throw
+;
+; X86: movl $2, [[state]](%ebp)
+; X86: movl $2, (%esp)
+; X86: calll _may_throw
+
+; X64-LABEL: g:
+; X64-LABEL: $ip2state$g:
+; X64-NEXT:   .long .Lfunc_begin1@IMGREL
+; X64-NEXT:   .long -1
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long 1
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long -1
+; X64-NEXT:   .long "?catch${{.*}}@?0?g@4HA"@IMGREL
+; X64-NEXT:   .long 2
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long 3
+; X64-NEXT:   .long .Ltmp{{.*}}@IMGREL+1
+; X64-NEXT:   .long 2
+
+
+; X86: .safeseh ___ehhandler$f
+; X86: .safeseh ___ehhandler$g
diff --git a/test/CodeGen/X86/win32-eh.ll b/test/CodeGen/X86/win32-eh.ll
index 3ee4723ce5f3..73c7b486a55a 100644
--- a/test/CodeGen/X86/win32-eh.ll
+++ b/test/CodeGen/X86/win32-eh.ll
@@ -15,18 +15,14 @@ define internal i32 @catchall_filt() {
 define void @use_except_handler3() personality i32 (...)* @_except_handler3 {
 entry:
   invoke void @may_throw_or_crash()
-      to label %cont unwind label %catchall
+      to label %cont unwind label %lpad
 cont:
   ret void
-catchall:
-  %0 = landingpad { i8*, i32 }
-      catch i8* bitcast (i32 ()* @catchall_filt to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 1
-  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4
-  %matches = icmp eq i32 %1, %2
-  br i1 %matches, label %cont, label %eh.resume
-eh.resume:
-  resume { i8*, i32 } %0
+lpad:
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  %p = catchpad within %cs [i8* bitcast (i32 ()* @catchall_filt to i8*)]
+  catchret from %p to label %cont
 }
 
 ; CHECK-LABEL: _use_except_handler3:
@@ -47,28 +43,25 @@ eh.resume:
 ; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
 ; CHECK: movl %[[next]], %fs:0
 ; CHECK: retl
+; CHECK: LBB1_2: # %catch{{$}}
 
 ; CHECK: .section .xdata,"dr"
 ; CHECK-LABEL: L__ehtable$use_except_handler3:
 ; CHECK-NEXT:  .long   -1
 ; CHECK-NEXT:  .long   _catchall_filt
-; CHECK-NEXT:  .long   Ltmp{{[0-9]+}}
+; CHECK-NEXT:  .long   LBB1_2
 
 define void @use_except_handler4() personality i32 (...)* @_except_handler4 {
 entry:
   invoke void @may_throw_or_crash()
-      to label %cont unwind label %catchall
+      to label %cont unwind label %lpad
 cont:
   ret void
-catchall:
-  %0 = landingpad { i8*, i32 }
-      catch i8* bitcast (i32 ()* @catchall_filt to i8*)
-  %1 = extractvalue { i8*, i32 } %0, 1
-  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @catchall_filt to i8*)) #4
-  %matches = icmp eq i32 %1, %2
-  br i1 %matches, label %cont, label %eh.resume
-eh.resume:
-  resume { i8*, i32 } %0
+lpad:
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  %p = catchpad within %cs [i8* bitcast (i32 ()* @catchall_filt to i8*)]
+  catchret from %p to label %cont
 }
 
 ; CHECK-LABEL: _use_except_handler4:
@@ -89,6 +82,7 @@ eh.resume:
 ; CHECK: movl -28(%ebp), %[[next:[^ ,]*]]
 ; CHECK: movl %[[next]], %fs:0
 ; CHECK: retl
+; CHECK: LBB2_2: # %catch{{$}}
 
 ; CHECK: .section .xdata,"dr"
 ; CHECK-LABEL: L__ehtable$use_except_handler4:
@@ -98,20 +92,19 @@ eh.resume:
 ; CHECK-NEXT:  .long   0
 ; CHECK-NEXT:  .long   -2
 ; CHECK-NEXT:  .long   _catchall_filt
-; CHECK-NEXT:  .long   Ltmp{{[0-9]+}}
+; CHECK-NEXT:  .long   LBB2_2
 
 define void @use_CxxFrameHandler3() personality i32 (...)* @__CxxFrameHandler3 {
   invoke void @may_throw_or_crash()
       to label %cont unwind label %catchall
 cont:
   ret void
+
 catchall:
-  %ehvals = landingpad { i8*, i32 }
-      catch i8* null
-  %ehptr = extractvalue { i8*, i32 } %ehvals, 0
-  call void @llvm.eh.begincatch(i8* %ehptr, i8* null)
-  call void @llvm.eh.endcatch()
-  br label %cont
+  %cs = catchswitch within none [label %catch] unwind to caller
+catch:
+  %p = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %p to label %cont
 }
 
 ; CHECK-LABEL: _use_CxxFrameHandler3:
diff --git a/test/CodeGen/X86/win32-pic-jumptable.ll b/test/CodeGen/X86/win32-pic-jumptable.ll
index cabd36ae395d..3a8ef2d0b916 100644
--- a/test/CodeGen/X86/win32-pic-jumptable.ll
+++ b/test/CodeGen/X86/win32-pic-jumptable.ll
@@ -1,16 +1,20 @@
 ; RUN: llc < %s -relocation-model=pic | FileCheck %s
 
 ; CHECK:        calll L0$pb
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset 4
 ; CHECK-NEXT: L0$pb:
 ; CHECK-NEXT:   popl %eax
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset -4
 ; CHECK-NEXT:   addl LJTI0_0(,%ecx,4), %eax
 ; CHECK-NEXT:   jmpl *%eax
 
 ; CHECK:      LJTI0_0:
+; CHECK-NEXT:   .long LBB0_2-L0$pb
+; CHECK-NEXT:   .long LBB0_3-L0$pb
 ; CHECK-NEXT:   .long LBB0_4-L0$pb
 ; CHECK-NEXT:   .long LBB0_5-L0$pb
-; CHECK-NEXT:   .long LBB0_6-L0$pb
-; CHECK-NEXT:   .long LBB0_7-L0$pb
 
 
 target triple = "i686--windows-itanium"
diff --git a/test/CodeGen/X86/win32-seh-catchpad-realign.ll b/test/CodeGen/X86/win32-seh-catchpad-realign.ll
new file mode 100644
index 000000000000..23aeea37c117
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-catchpad-realign.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s | FileCheck %s
+
+; The aligned alloca means that we have to realign the stack, which forces the
+; use of ESI to address local variables.
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686--windows-msvc"
+
+; Function Attrs: nounwind
+define void @realigned_try() personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  %x = alloca [4 x i32], align 16
+  %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %x, i32 0, i32 0
+  invoke void @useit(i32* %arrayidx)
+          to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret:                                     ; preds = %catch.dispatch
+  %pad = catchpad within %cs1 [i8* bitcast (i32 ()* @"\01?filt$0@0@realigned_try@@" to i8*)]
+  catchret from %pad to label %__try.cont
+
+__try.cont:                                       ; preds = %entry, %__except.ret
+  ret void
+}
+
+; Function Attrs: nounwind argmemonly
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@realigned_try@@"() {
+entry:
+  ret i32 1
+}
+
+declare void @useit(i32*)
+
+declare i32 @_except_handler3(...)
+
+; CHECK-LABEL: _realigned_try:
+; Prologue
+; CHECK: pushl   %ebp
+; CHECK: movl    %esp, %ebp
+; CHECK: pushl   %ebx
+; CHECK: pushl   %edi
+; CHECK: pushl   %esi
+; CHECK: andl    $-16, %esp
+; CHECK: subl    $64, %esp
+; CHECK: movl    %esp, %esi
+; Spill EBP
+; CHECK: movl    %ebp, 12(%esi)
+; Spill ESP
+; CHECK: movl    %esp, 36(%esi)
+; The state is stored at ESI+56, the end of the node is ESI+60.
+; CHECK: movl    $-1, 56(%esi)
+;
+; __try
+; CHECK: calll _useit
+;
+; Epilogue
+; CHECK: LBB0_2:       # %__try.cont
+; CHECK: leal    -12(%ebp), %esp
+; CHECK: popl    %esi
+; CHECK: popl    %edi
+; CHECK: popl    %ebx
+; CHECK: popl    %ebp
+; CHECK: retl
+;
+; CHECK: LBB0_1:                                 # %__except.ret
+; Restore ESP
+; CHECK: movl    -24(%ebp), %esp
+; Recompute ESI by subtracting 60 from the end of the registration node.
+; CHECK: leal    -60(%ebp), %esi
+; Restore EBP
+; CHECK: movl    12(%esi), %ebp
+; Rejoin normal control flow
+; CHECK: jmp     LBB0_2
diff --git a/test/CodeGen/X86/win32-seh-catchpad.ll b/test/CodeGen/X86/win32-seh-catchpad.ll
new file mode 100644
index 000000000000..224e96f8b8f0
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-catchpad.ll
@@ -0,0 +1,231 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @try_except() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  %__exception_code = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* %__exception_code)
+  invoke void @f(i32 1) #3
+          to label %invoke.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret:                                     ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @try_except_filter_catchall to i8*)]
+  catchret from %0 to label %__except
+
+__except:                                         ; preds = %__except.ret
+  call void @f(i32 2)
+  br label %__try.cont
+
+__try.cont:                                       ; preds = %__except, %invoke.cont
+  call void @f(i32 3)
+  ret void
+
+invoke.cont:                                      ; preds = %entry
+  br label %__try.cont
+}
+
+; CHECK-LABEL: _try_except:
+;     Store state #0
+; CHECK: movl $0, -[[state:[0-9]+]](%ebp)
+; CHECK: movl $1, (%esp)
+; CHECK: calll _f
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: retl
+
+;   __except
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+
+; CHECK: .section        .xdata,"dr"
+; CHECK: L__ehtable$try_except:
+; CHECK:         .long   -1                          # ToState
+; CHECK:         .long   _try_except_filter_catchall # Filter
+; CHECK:         .long   LBB0_1
+
+define internal i32 @try_except_filter_catchall() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 1)
+  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @try_except to i8*), i8* %0)
+  %2 = call i8* @llvm.localrecover(i8* bitcast (void ()* @try_except to i8*), i8* %1, i32 0)
+  %__exception_code = bitcast i8* %2 to i32*
+  %3 = getelementptr inbounds i8, i8* %0, i32 -20
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8*, i8** %4, align 4
+  %6 = bitcast i8* %5 to { i32*, i8* }*
+  %7 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %6, i32 0, i32 0
+  %8 = load i32*, i32** %7, align 4
+  %9 = load i32, i32* %8, align 4
+  store i32 %9, i32* %__exception_code, align 4
+  ret i32 1
+}
+
+define void @nested_exceptions() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  %__exception_code = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* %__exception_code)
+  invoke void @crash() #3
+          to label %__try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %__except.ret] unwind label %catch.dispatch.11
+
+__except.ret:                                     ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %0 to label %__try.cont
+
+__try.cont:                                       ; preds = %entry, %__except.ret
+  invoke void @crash() #3
+          to label %__try.cont.9 unwind label %catch.dispatch.5
+
+catch.dispatch.5:                                 ; preds = %__try.cont
+  %cs2 = catchswitch within none [label %__except.ret.7] unwind label %catch.dispatch.11
+
+__except.ret.7:                                   ; preds = %catch.dispatch.5
+  %1 = catchpad within %cs2 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %1 to label %__try.cont.9
+
+__try.cont.9:                                     ; preds = %__try.cont, %__except.ret.7
+  invoke void @crash() #3
+          to label %__try.cont.15 unwind label %catch.dispatch.11
+
+catch.dispatch.11:                                ; preds = %catchendblock, %catchendblock.6, %__try.cont.9
+  %cs3 = catchswitch within none [label %__except.ret.13] unwind label %catch.dispatch.17
+
+__except.ret.13:                                  ; preds = %catch.dispatch.11
+  %2 = catchpad within %cs3 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %2 to label %__try.cont.15
+
+__try.cont.15:                                    ; preds = %__try.cont.9, %__except.ret.13
+  invoke void @crash() #3
+          to label %__try.cont.35 unwind label %catch.dispatch.17
+
+catch.dispatch.17:                                ; preds = %catchendblock.12, %__try.cont.15
+  %cs4 = catchswitch within none [label %__except.ret.19] unwind to caller
+
+__except.ret.19:                                  ; preds = %catch.dispatch.17
+  %3 = catchpad within %cs4 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %3 to label %__except.20
+
+__except.20:                                      ; preds = %__except.ret.19
+  invoke void @crash() #3
+          to label %__try.cont.27 unwind label %catch.dispatch.23
+
+catch.dispatch.23:                                ; preds = %__except.20
+  %cs5 = catchswitch within none [label %__except.ret.25] unwind to caller
+
+__except.ret.25:                                  ; preds = %catch.dispatch.23
+  %4 = catchpad within %cs5 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %4 to label %__try.cont.27
+
+__try.cont.27:                                    ; preds = %__except.20, %__except.ret.25
+  invoke void @crash() #3
+          to label %__try.cont.35 unwind label %catch.dispatch.30
+
+catch.dispatch.30:                                ; preds = %__try.cont.27
+  %cs6 = catchswitch within none [label %__except.ret.32] unwind to caller
+
+__except.ret.32:                                  ; preds = %catch.dispatch.30
+  %5 = catchpad within %cs6 [i8* bitcast (i32 ()* @nested_exceptions_filter_catchall to i8*)]
+  catchret from %5 to label %__try.cont.35
+
+__try.cont.35:                                    ; preds = %__try.cont.15, %__try.cont.27, %__except.ret.32
+  ret void
+}
+
+; This table is equivalent to the one produced by MSVC, even if it isn't in
+; quite the same order.
+
+; CHECK-LABEL: _nested_exceptions:
+; CHECK: L__ehtable$nested_exceptions:
+; CHECK:         .long   -1
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+; CHECK:         .long   0
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+; CHECK:         .long   1
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+; CHECK:         .long   1
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+; CHECK:         .long   -1
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+; CHECK:         .long   -1
+; CHECK:         .long   _nested_exceptions_filter_catchall
+; CHECK:         .long   LBB
+
+declare void @crash() #0
+
+define internal i32 @nested_exceptions_filter_catchall() #0 {
+entry:
+  %0 = call i8* @llvm.frameaddress(i32 1)
+  %1 = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void ()* @nested_exceptions to i8*), i8* %0)
+  %2 = call i8* @llvm.localrecover(i8* bitcast (void ()* @nested_exceptions to i8*), i8* %1, i32 0)
+  %__exception_code3 = bitcast i8* %2 to i32*
+  %3 = getelementptr inbounds i8, i8* %0, i32 -20
+  %4 = bitcast i8* %3 to i8**
+  %5 = load i8*, i8** %4, align 4
+  %6 = bitcast i8* %5 to { i32*, i8* }*
+  %7 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %6, i32 0, i32 0
+  %8 = load i32*, i32** %7, align 4
+  %9 = load i32, i32* %8, align 4
+  store i32 %9, i32* %__exception_code3, align 4
+  ret i32 1
+}
+
+define void @code_in_catchpad() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  invoke void @f(i32 1) #3
+          to label %__except unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %__except.ret] unwind to caller
+
+__except.ret:                                     ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* bitcast (i32 ()* @try_except_filter_catchall to i8*)]
+  call void @f(i32 2) [ "funclet"(token %0) ]
+  catchret from %0 to label %__except
+
+__except:
+  ret void
+}
+
+; CHECK-LABEL: _code_in_catchpad:
+; CHECK: # %__except.ret
+; CHECK-NEXT:         movl    -24(%ebp), %esp
+; CHECK-NEXT:         addl    $12, %ebp
+; CHECK-NEXT:         movl    $-1, -16(%ebp)
+; CHECK-NEXT:         movl    $2, (%esp)
+; CHECK-NEXT:         calll   _f
+
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.frameaddress(i32) #1
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*) #1
+
+; Function Attrs: nounwind readnone
+declare i8* @llvm.localrecover(i8*, i8*, i32) #1
+
+declare void @f(i32) #0
+
+declare i32 @_except_handler3(...)
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...) #2
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind }
+attributes #3 = { noinline }
diff --git a/test/CodeGen/X86/win32-seh-nested-finally.ll b/test/CodeGen/X86/win32-seh-nested-finally.ll
new file mode 100644
index 000000000000..c283a35d70cf
--- /dev/null
+++ b/test/CodeGen/X86/win32-seh-nested-finally.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @nested_finally() #0 personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  invoke void @f(i32 1) #3
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @f(i32 2) #3
+          to label %invoke.cont.1 unwind label %ehcleanup.3
+
+invoke.cont.1:                                    ; preds = %invoke.cont
+  call void @f(i32 3) #3
+  ret void
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  invoke void @f(i32 2) #3 [ "funclet"(token %0) ]
+          to label %invoke.cont.2 unwind label %ehcleanup.3
+
+invoke.cont.2:                                    ; preds = %ehcleanup
+  cleanupret from %0 unwind label %ehcleanup.3
+
+ehcleanup.3:                                      ; preds = %invoke.cont.2, %ehcleanup.end, %invoke.cont
+  %1 = cleanuppad within none []
+  call void @f(i32 3) #3 [ "funclet"(token %1) ]
+  cleanupret from %1 unwind to caller
+}
+
+declare void @f(i32) #0
+
+declare i32 @_except_handler3(...)
+
+attributes #0 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { noinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone }
+attributes #3 = { noinline }
+
+; CHECK: _nested_finally:
+; CHECK: movl $-1, -[[state:[0-9]+]](%ebp)
+; CHECK: movl {{.*}}, %fs:0
+; CHECK: movl $1, -[[state]](%ebp)
+; CHECK: movl $1, (%esp)
+; CHECK: calll _f
+; CHECK: movl $0, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: retl
+
+; CHECK: LBB0_[[inner:[0-9]+]]: # %ehcleanup
+; CHECK: pushl %ebp
+; CHECK: addl $12, %ebp
+; CHECK: movl $0, -[[state]](%ebp)
+; CHECK: movl $2, (%esp)
+; CHECK: calll _f
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: LBB0_[[outer:[0-9]+]]: # %ehcleanup.3
+; CHECK: pushl %ebp
+; CHECK: addl $12, %ebp
+; CHECK: movl $-1, -[[state]](%ebp)
+; CHECK: movl $3, (%esp)
+; CHECK: calll _f
+; CHECK: popl %ebp
+; CHECK: retl
+
+; CHECK: L__ehtable$nested_finally:
+; CHECK:        .long   -1 # ToState
+; CHECK:        .long   0  # Null
+; CHECK:        .long   "?dtor$[[outer]]@?0?nested_finally@4HA" # FinallyFunclet
+; CHECK:        .long   0  # ToState
+; CHECK:        .long   0  # Null
+; CHECK:        .long   "?dtor$[[inner]]@?0?nested_finally@4HA" # FinallyFunclet
diff --git a/test/CodeGen/X86/win32-spill-xmm.ll b/test/CodeGen/X86/win32-spill-xmm.ll
new file mode 100644
index 000000000000..0db97cfe20f0
--- /dev/null
+++ b/test/CodeGen/X86/win32-spill-xmm.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=generic -mtriple=i686-pc-windows-msvc -mattr=+sse < %s | FileCheck %s
+
+; Check proper alignment of spilled vector
+
+; CHECK-LABEL: spill_ok
+; CHECK: subl    $32, %esp
+; CHECK: movaps  %xmm3, (%esp)
+; CHECK: movl    $0, 16(%esp)
+; CHECK: calll   _bar
+define void @spill_ok(i32, <16 x float> *) {
+entry:
+  %2 = alloca i32, i32 %0
+  %3 = load <16 x float>, <16 x float> * %1, align 64
+  tail call void @bar(<16 x float> %3, i32 0) nounwind
+  ret void
+}
+
+declare void @bar(<16 x float> %a, i32 %b)
+
+; Check that proper alignment of spilled vector does not affect vargs
+
+; CHECK-LABEL: vargs_not_affected
+; CHECK: leal    28(%ebp), %eax
+define i32 @vargs_not_affected(<4 x float> %v, i8* %f, ...) {
+entry:
+  %ap = alloca i8*, align 4
+  %0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %0)
+  %argp.cur = load i8*, i8** %ap, align 4
+  %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4
+  store i8* %argp.next, i8** %ap, align 4
+  %1 = bitcast i8* %argp.cur to i32*
+  %2 = load i32, i32* %1, align 4
+  call void @llvm.va_end(i8* %0)
+  ret i32 %2
+}
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/X86/win64_frame.ll b/test/CodeGen/X86/win64_frame.ll
index 477b3144d9e7..27d78dbe5479 100644
--- a/test/CodeGen/X86/win64_frame.ll
+++ b/test/CodeGen/X86/win64_frame.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=CHECK --check-prefix=PUSHF
+; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+sahf | FileCheck %s --check-prefix=SAHF
 
 define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" {
   ; CHECK-LABEL: f1:
@@ -118,6 +119,73 @@ define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="
   ; CHECK:        leaq    224(%rbp), %rsp
 }
 
+define i64 @f9() {
+entry:
+  ; CHECK-LABEL: f9:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK-NEXT: movq    %rsp, %rbp
+  ; CHECK:      .seh_setframe 5, 0
+  ; CHECK:      .seh_endprologue
+
+  %call = call i64 asm sideeffect "pushf\0A\09popq $0\0A", "=r,~{dirflag},~{fpsr},~{flags}"()
+  ; CHECK-NEXT: #APP
+  ; CHECK-NEXT: pushfq
+  ; CHECK-NEXT: popq    %rax
+  ; CHECK:      #NO_APP
+
+  ret i64 %call
+  ; CHECK-NEXT: popq    %rbp
+  ; CHECK-NEXT: retq
+}
+
+declare i64 @dummy()
+
+define i64 @f10(i64* %foo, i64 %bar, i64 %baz) {
+  ; CHECK-LABEL: f10:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK:      pushq   %rsi
+  ; CHECK:      .seh_pushreg 6
+  ; CHECK:      pushq   %rdi
+  ; CHECK:      .seh_pushreg 7
+  ; CHECK:      subq    $32, %rsp
+  ; CHECK:      .seh_stackalloc 32
+  ; CHECK:      leaq    32(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 32
+  ; CHECK:      .seh_endprologue
+
+  %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
+  ; PUSHF:      lock cmpxchgq
+  ; PUSHF-NEXT: pushfq
+  ; PUSHF-NEXT: popq %[[REG:.*]]
+  ; SAHF:       lock cmpxchgq
+  ; SAHF-NEXT:  seto    %al
+  ; SAHF-NEXT:  lahf
+
+  %v = extractvalue { i64, i1 } %cx, 0
+  %p = extractvalue { i64, i1 } %cx, 1
+
+  %call = call i64 @dummy()
+  ; PUSHF:      callq dummy
+  ; PUSHF-NEXT: pushq %[[REG]]
+  ; PUSHF-NEXT: popfq
+  ; SAHF:       callq dummy
+  ; SAHF-NEXT:  pushq
+  ; SAHF:       addb $127, %al
+  ; SAHF-NEXT:  sahf
+  ; SAHF-NEXT:  popq
+
+  %sel = select i1 %p, i64 %call, i64 %bar
+  ; CHECK-NEXT: cmovneq
+
+  ret i64 %sel
+  ; CHECK-NEXT: addq    $32, %rsp
+  ; CHECK-NEXT: popq    %rdi
+  ; CHECK-NEXT: popq    %rsi
+  ; CHECK-NEXT: popq    %rbp
+}
+
 declare i8* @llvm.returnaddress(i32) nounwind readnone
 
 declare void @llvm.va_start(i8*) nounwind
diff --git a/test/CodeGen/X86/win64_sibcall.ll b/test/CodeGen/X86/win64_sibcall.ll
new file mode 100644
index 000000000000..4001f638c2ab
--- /dev/null
+++ b/test/CodeGen/X86/win64_sibcall.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux         | FileCheck %s -check-prefix=LINUX
+
+%Object = type <{ [0 x i64*]* }>
+
+define void @C1(%Object addrspace(1)* %param0) gc "coreclr" {
+entry:
+
+; WIN_X64: # BB#0:
+; WIN_X64:	pushq	%rax
+; LINUX:   # BB#0:                                 # %entry
+; LINUX:	movq	$0, -8(%rsp)
+
+  %this = alloca %Object addrspace(1)*
+  store %Object addrspace(1)* null, %Object addrspace(1)** %this
+  store %Object addrspace(1)* %param0, %Object addrspace(1)** %this
+  br label %0
+
+; <label>:0                                       ; preds = %entry
+  %1 = load %Object addrspace(1)*, %Object addrspace(1)** %this, align 8
+
+; WIN_X64:	xorl	%r8d, %r8d
+; WIN_X64:	popq	%rax
+; WIN_X64:	rex64 jmp	C2              # TAILCALL
+; LINUX:	xorl	%edx, %edx
+; LINUX:	jmp	C2                      # TAILCALL
+
+  tail call void @C2(%Object addrspace(1)* %1, i32 0, %Object addrspace(1)* null)
+  ret void
+}
+
+declare void @C2(%Object addrspace(1)*, i32, %Object addrspace(1)*)
+
+; Function Attrs: nounwind
+declare void @llvm.localescape(...) #0
+
+attributes #0 = { nounwind }
+
diff --git a/test/CodeGen/X86/win_coreclr_chkstk.ll b/test/CodeGen/X86/win_coreclr_chkstk.ll
new file mode 100644
index 000000000000..c9a5fc2b3288
--- /dev/null
+++ b/test/CodeGen/X86/win_coreclr_chkstk.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=x86_64-pc-linux         | FileCheck %s -check-prefix=LINUX
+
+; By default, windows CoreCLR requires an inline prologue stack expansion check
+; if more than 4096 bytes are allocated on the stack.
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+define i32 @main4k() nounwind {
+entry:
+; WIN_X64-LABEL:main4k:
+; WIN_X64: # BB#0:
+; WIN_X64:      movl    $4096, %eax
+; WIN_X64:      movq    %rcx, 8(%rsp)
+; WIN_X64:	movq	%rdx, 16(%rsp)
+; WIN_X64:	xorq	%rcx, %rcx
+; WIN_X64:	movq	%rsp, %rdx
+; WIN_X64:	subq	%rax, %rdx
+; WIN_X64:	cmovbq	%rcx, %rdx
+; WIN_X64:	movq	%gs:16, %rcx
+; WIN_X64:	cmpq	%rcx, %rdx
+; WIN_X64:	jae	.LBB0_3
+; WIN_X64:# BB#1:
+; WIN_X64:	andq	$-4096, %rdx
+; WIN_X64:.LBB0_2:
+; WIN_X64:	leaq	-4096(%rcx), %rcx
+; WIN_X64:	movb	$0, (%rcx)
+; WIN_X64:	cmpq	%rcx, %rdx
+; WIN_X64:	jne	.LBB0_2
+; WIN_X64:.LBB0_3:
+; WIN_X64:	movq	8(%rsp), %rcx
+; WIN_X64:	movq	16(%rsp), %rdx
+; WIN_X64:	subq	%rax, %rsp
+; WIN_X64:	xorl	%eax, %eax
+; WIN_X64:	addq	$4096, %rsp
+; WIN_X64:	retq
+; LINUX-LABEL:main4k:
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with frame pointer
+define i32 @main4k_frame() nounwind "no-frame-pointer-elim"="true" {
+entry:
+; WIN_X64-LABEL:main4k_frame:
+; WIN_X64:      movq    %rcx,   16(%rsp)
+; WIN_X64:      movq    %gs:16, %rcx
+; LINUX-LABEL:main4k_frame:
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with INT args
+define i32 @main4k_intargs(i32 %x, i32 %y) nounwind {
+entry:
+; WIN_X64:      movq    %rcx,   8(%rsp)
+; WIN_X64:      movq    %gs:16, %rcx
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  %t = add i32 %x, %y
+  ret i32 %t
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with FP regs
+define i32 @main4k_fpargs(double %x, double %y) nounwind {
+entry:
+; WIN_X64:      movq    %rcx,   8(%rsp)
+; WIN_X64:      movq    %gs:16, %rcx
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 0
+}
+
+; Prolog stack allocation >= 4096 bytes will require the probe sequence
+; Case with mixed regs
+define i32 @main4k_mixargs(double %x, i32 %y) nounwind {
+entry:
+; WIN_X64:      movq    %gs:16, %rcx
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 %y
+}
+
+; Make sure we don't emit the probe for a smaller prolog stack allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X64-NOT:  movq    %gs:16, %rcx
+; WIN_X64:      retq
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [128 x i8]
+  ret i32 0
+}
+
+; Make sure we don't emit the probe sequence if not on windows even if the
+; caller has the Win64 calling convention.
+define x86_64_win64cc i32 @main4k_win64() nounwind {
+entry:
+; WIN_X64:      movq    %gs:16, %rcx
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 0
+}
+
+declare i32 @bar(i8*) nounwind
+
+; Within-body inline probe expansion
+define x86_64_win64cc i32 @main4k_alloca(i64 %n) nounwind {
+entry:
+; WIN_X64: 	callq	bar
+; WIN_X64:  	movq	%gs:16, [[R:%r.*]]
+; WIN_X64: 	callq	bar
+; LINUX: 	callq	bar
+; LINUX-NOT:  	movq	%gs:16, [[R:%r.*]]
+; LINUX: 	callq	bar
+  %a = alloca i8, i64 1024
+  %ra = call i32 @bar(i8* %a) nounwind
+  %b = alloca i8, i64 %n
+  %rb = call i32 @bar(i8* %b) nounwind
+  %r = add i32 %ra, %rb
+  ret i32 %r
+}
+
+; Influence of stack-probe-size attribute
+; Note this is not exposed in coreclr
+define i32 @test_probe_size() "stack-probe-size"="8192" nounwind {
+; WIN_X64-NOT:  movq    %gs:16, %rcx
+; WIN_X64: 	retq
+; LINUX-NOT:    movq    %gs:16, %rcx
+; LINUX: 	retq
+  %a = alloca [4096 x i8]
+  ret i32 0
+}
diff --git a/test/CodeGen/X86/win_ftol2.ll b/test/CodeGen/X86/win_ftol2.ll
deleted file mode 100644
index dfa6e3aa76bd..000000000000
--- a/test/CodeGen/X86/win_ftol2.ll
+++ /dev/null
@@ -1,166 +0,0 @@
-; RUN: llc < %s -mtriple=i686-pc-win32 -mcpu=generic | FileCheck %s -check-prefix=FTOL
-; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=COMPILERRT
-; RUN: llc < %s -mattr=-sse -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=FTOL_2
-
-; Win32 targets use the MSVCRT _ftol2 runtime function for fptoui to i64. This
-; function has a nonstandard calling convention: the input value is expected on
-; the x87 stack instead of the callstack. The input value is popped by the
-; callee. Mingw32 uses normal cdecl compiler-rt functions.
-
-define i64 @double_ui64(double %x) nounwind {
-entry:
-; COMPILERRT: @double_ui64
-; COMPILERRT-NOT: calll __ftol2
-; FTOL: @double_ui64
-; FTOL: fldl
-; FTOL: calll __ftol2
-; FTOL-NOT: fstp
-  %0 = fptoui double %x to i64
-  ret i64 %0
-}
-
-define i64 @float_ui64(float %x) nounwind {
-entry:
-; COMPILERRT: @float_ui64
-; COMPILERRT-NOT: calll __ftol2
-; FTOL: @float_ui64
-; FTOL: flds
-; FTOL: calll __ftol2
-; FTOL-NOT: fstp
-  %0 = fptoui float %x to i64
-  ret i64 %0
-}
-
-define i64 @double_ui64_2(double %x, double %y, double %z) nounwind {
-; COMPILERRT: @double_ui64_2
-; FTOL: @double_ui64_2
-; FTOL_2: @double_ui64_2
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %z
-; FTOL_2: fldl
-;; stack is %y %z
-; FTOL_2: fldl
-;; stack is %x %y %z
-; FTOL_2: fdiv %st(0), %st(1)
-;; stack is %x %1 %z
-; FTOL_2: fsubp %st(2)
-;; stack is %1 %2
-; FTOL_2: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-;; stack is %2 %1
-; FTOL_2: calll __ftol2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-; FTOL_2: calll __ftol2
-;; stack is empty
-
-  %1 = fdiv double %x, %y
-  %2 = fsub double %x, %z
-  %3 = fptoui double %2 to i64
-  %4 = fptoui double %1 to i64
-  %5 = sub i64 %4, %3
-  ret i64 %5
-}
-
-define i64 @double_ui64_3(double %x, double %y, double %z) nounwind {
-; COMPILERRT: @double_ui64_3
-; FTOL: @double_ui64_3
-; FTOL_2: @double_ui64_3
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %z
-; FTOL_2: fldl
-;; stack is %y %z
-; FTOL_2: fldl
-;; stack is %x %y %z
-; FTOL_2: fdiv %st(0), %st(1)
-;; stack is %x %1 %z
-; FTOL_2: fsubp %st(2)
-;; stack is %1 %2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-;; stack is %1 %2 (still)
-; FTOL_2: calll __ftol2
-; FTOL_2-NOT: fxch
-; FTOL_2-NOT: fld
-; FTOL_2-NOT: fst
-; FTOL_2: calll __ftol2
-;; stack is empty
-
-  %1 = fdiv double %x, %y
-  %2 = fsub double %x, %z
-  %3 = fptoui double %1 to i64
-  %4 = fptoui double %2 to i64
-  %5 = sub i64 %4, %3
-  ret i64 %5
-}
-
-define {double, i64} @double_ui64_4(double %x, double %y) nounwind {
-; COMPILERRT: @double_ui64_4
-; FTOL: @double_ui64_4
-; FTOL_2: @double_ui64_4
-;; stack is empty
-; FTOL_2: fldl
-;; stack is %y
-; FTOL_2: fldl
-;; stack is %x %y
-; FTOL_2: fxch
-;; stack is %y %x
-; FTOL_2: calll __ftol2
-;; stack is %x
-; FTOL_2: fld %st(0)
-;; stack is %x %x
-; FTOL_2: calll __ftol2
-;; stack is %x
-
-  %1 = fptoui double %y to i64
-  %2 = fptoui double %x to i64
-  %3 = sub i64 %2, %1
-  %4 = insertvalue {double, i64} undef, double %x, 0
-  %5 = insertvalue {double, i64} %4, i64 %3, 1
-  ret {double, i64} %5
-}
-
-define i32 @double_ui32_5(double %X) {
-; FTOL: @double_ui32_5
-; FTOL: calll __ftol2
-  %tmp.1 = fptoui double %X to i32
-  ret i32 %tmp.1
-}
-
-define i64 @double_ui64_5(double %X) {
-; FTOL: @double_ui64_5
-; FTOL: calll __ftol2
-  %tmp.1 = fptoui double %X to i64
-  ret i64 %tmp.1
-}
-
-define double @pr23957_32(double %A) {
-; FTOL-LABEL: @pr23957_32
-; FTOL: fldl
-; FTOL-NEXT: fld %st(0)
-; FTOL-NEXT: calll __ftol2
-  %B = fptoui double %A to i32
-  %C = uitofp i32 %B to double
-  %D = fsub double %C, %A
-  ret double %D
-}
-
-define double @pr23957_64(double %A) {
-; FTOL-LABEL: @pr23957_64
-; FTOL: fldl
-; FTOL-NEXT: fld %st(0)
-; FTOL-NEXT: calll __ftol2
-  %B = fptoui double %A to i64
-  %C = uitofp i64 %B to double
-  %D = fsub double %C, %A
-  ret double %D
-}
diff --git a/test/CodeGen/X86/wineh-coreclr.ll b/test/CodeGen/X86/wineh-coreclr.ll
new file mode 100644
index 000000000000..b61876827cac
--- /dev/null
+++ b/test/CodeGen/X86/wineh-coreclr.ll
@@ -0,0 +1,267 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr -verify-machineinstrs < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+declare void @f(i32)
+declare void @g(i8 addrspace(1)*)
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+
+; Simplified IR for pseudo-C# like the following:
+; void test1() {
+;   try {
+;     f(1);
+;     try {
+;       f(2);
+;     } catch (type1) {
+;       f(3);
+;     } catch (type2) {
+;       f(4);
+;       try {
+;         f(5);
+;       } fault {
+;         f(6);
+;       }
+;     }
+;   } finally {
+;     f(7);
+;   }
+;   f(8);
+; }
+
+; CHECK-LABEL: test1:     # @test1
+; CHECK-NEXT: [[L_begin:.*func_begin.*]]:
+define void @test1() personality i8* bitcast (void ()* @ProcessCLRException to i8*) {
+entry:
+; CHECK: # %entry
+; CHECK: leaq [[FPOffset:[0-9]+]](%rsp), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rsp, [[PSPSymOffset:[0-9]+]](%rsp)
+; CHECK: [[L_before_f1:.+]]:
+; CHECK-NEXT: movl $1, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f1:.+]]:
+  invoke void @f(i32 1)
+    to label %inner_try unwind label %finally.pad
+inner_try:
+; CHECK: # %inner_try
+; CHECK: [[L_before_f2:.+]]:
+; CHECK-NEXT: movl $2, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f2:.+]]:
+  invoke void @f(i32 2)
+    to label %finally.clone unwind label %catch1.pad
+catch1.pad:
+  %cs1 = catchswitch within none [label %catch1.body, label %catch2.body] unwind label %finally.pad
+catch1.body:
+  %catch1 = catchpad within %cs1 [i32 1]
+; CHECK: .seh_proc [[L_catch1:[^ ]+]]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+;                        ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+;                              ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rdx, %rcx
+;             ^ exception pointer passed in rdx
+; CHECK-NEXT: callq g
+  %exn1 = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch1)
+  call void @g(i8 addrspace(1)* %exn1) [ "funclet"(token %catch1) ]
+; CHECK: [[L_before_f3:.+]]:
+; CHECK-NEXT: movl $3, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f3:.+]]:
+  invoke void @f(i32 3) [ "funclet"(token %catch1) ]
+    to label %catch1.ret unwind label %finally.pad
+catch1.ret:
+  catchret from %catch1 to label %finally.clone
+catch2.body:
+  %catch2 = catchpad within %cs1 [i32 2]
+; CHECK: .seh_proc [[L_catch2:[^ ]+]]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+;                        ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+;                              ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: movq %rdx, %rcx
+;             ^ exception pointer passed in rdx
+; CHECK-NEXT: callq g
+  %exn2 = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch2)
+  call void @g(i8 addrspace(1)* %exn2) [ "funclet"(token %catch2) ]
+; CHECK: [[L_before_f4:.+]]:
+; CHECK-NEXT: movl $4, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f4:.+]]:
+  invoke void @f(i32 4) [ "funclet"(token %catch2) ]
+    to label %try_in_catch unwind label %finally.pad
+try_in_catch:
+; CHECK: # %try_in_catch
+; CHECK: [[L_before_f5:.+]]:
+; CHECK-NEXT: movl $5, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f5:.+]]:
+  invoke void @f(i32 5) [ "funclet"(token %catch2) ]
+    to label %catch2.ret unwind label %fault.pad
+fault.pad:
+; CHECK: .seh_proc [[L_fault:[^ ]+]]
+  %fault = cleanuppad within none [i32 undef]
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+;                        ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+;                              ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK: [[L_before_f6:.+]]:
+; CHECK-NEXT: movl $6, %ecx
+; CHECK-NEXT: callq f
+; CHECK-NEXT: [[L_after_f6:.+]]:
+  invoke void @f(i32 6) [ "funclet"(token %fault) ]
+    to label %fault.ret unwind label %finally.pad
+fault.ret:
+  cleanupret from %fault unwind label %finally.pad
+catch2.ret:
+  catchret from %catch2 to label %finally.clone
+finally.clone:
+  call void @f(i32 7)
+  br label %tail
+finally.pad:
+; CHECK: .seh_proc [[L_finally:[^ ]+]]
+  %finally = cleanuppad within none []
+; CHECK: .seh_stackalloc [[FuncletFrameSize:[0-9]+]]
+;                        ^ all funclets use the same frame size
+; CHECK: movq [[PSPSymOffset]](%rcx), %rcx
+;                              ^ establisher frame pointer passed in rcx
+; CHECK: movq %rcx, [[PSPSymOffset]](%rsp)
+; CHECK: leaq [[FPOffset]](%rcx), %rbp
+; CHECK: .seh_endprologue
+; CHECK-NEXT: movl $7, %ecx
+; CHECK-NEXT: callq f
+  call void @f(i32 7) [ "funclet"(token %finally) ]
+  cleanupret from %finally unwind to caller
+tail:
+  call void @f(i32 8)
+  ret void
+; CHECK: [[L_end:.*func_end.*]]:
+}
+
+; FIXME: Verify that the new clauses are correct and re-enable these checks.
+
+; Now check for EH table in xdata (following standard xdata)
+; CHECKX-LABEL: .section .xdata
+; standard xdata comes here
+; CHECKX:      .long 4{{$}}
+;                   ^ number of funclets
+; CHECKX-NEXT: .long [[L_catch1]]-[[L_begin]]
+;                   ^ offset from L_begin to start of 1st funclet
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+;                   ^ offset from L_begin to start of 2nd funclet
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+;                   ^ offset from L_begin to start of 3rd funclet
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset from L_begin to start of 4th funclet
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+;                   ^ offset from L_begin to end of last funclet
+; CHECKX-NEXT: .long 7
+;                   ^ number of EH clauses
+; Clause 1: call f(2) is guarded by catch1
+; CHECKX-NEXT: .long 0
+;                   ^ flags (0 => catch handler)
+; CHECKX-NEXT: .long ([[L_before_f2]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_catch1]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 1
+;                   ^ type token of catch (from catchpad)
+; Clause 2: call f(2) is also guarded by catch2
+; CHECKX-NEXT: .long 0
+;                   ^ flags (0 => catch handler)
+; CHECKX-NEXT: .long ([[L_before_f2]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_catch2]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 2
+;                   ^ type token of catch (from catchpad)
+; Clause 3: calls f(1) and f(2) are guarded by finally
+; CHECKX-NEXT: .long 2
+;                   ^ flags (2 => finally handler)
+; CHECKX-NEXT: .long ([[L_before_f1]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f2]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+;                   ^ type token slot (null for finally)
+; Clause 4: call f(3) is guarded by finally
+;           This is a "duplicate" because the protected range (f(3))
+;           is in funclet catch1 but the finally's immediate parent
+;           is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+;                   ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f3]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f3]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+;                   ^ type token slot (null for finally)
+; Clause 5: call f(5) is guarded by fault
+; CHECKX-NEXT: .long 4
+;                   ^ flags (4 => fault handler)
+; CHECKX-NEXT: .long ([[L_before_f5]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f5]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_fault]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+;                   ^ type token slot (null for fault)
+; Clause 6: calls f(4) and f(5) are guarded by finally
+;           This is a "duplicate" because the protected range (f(4)-f(5))
+;           is in funclet catch2 but the finally's immediate parent
+;           is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+;                   ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f4]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f5]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+;                   ^ type token slot (null for finally)
+; Clause 7: call f(6) is guarded by finally
+;           This is a "duplicate" because the protected range (f(3))
+;           is in funclet catch1 but the finally's immediate parent
+;           is the main function, not that funclet.
+; CHECKX-NEXT: .long 10
+;                   ^ flags (2 => finally handler | 8 => duplicate)
+; CHECKX-NEXT: .long ([[L_before_f6]]-[[L_begin]])+1
+;                   ^ offset of start of clause
+; CHECKX-NEXT: .long ([[L_after_f6]]-[[L_begin]])+1
+;                   ^ offset of end of clause
+; CHECKX-NEXT: .long [[L_finally]]-[[L_begin]]
+;                   ^ offset of start of handler
+; CHECKX-NEXT: .long [[L_end]]-[[L_begin]]
+;                   ^ offset of end of handler
+; CHECKX-NEXT: .long 0
+;                   ^ type token slot (null for finally)
diff --git a/test/CodeGen/X86/wineh-exceptionpointer.ll b/test/CodeGen/X86/wineh-exceptionpointer.ll
new file mode 100644
index 000000000000..f6fd4fe7c525
--- /dev/null
+++ b/test/CodeGen/X86/wineh-exceptionpointer.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple=x86_64-pc-windows-coreclr < %s | FileCheck %s
+
+declare void @ProcessCLRException()
+declare i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token)
+declare void @f()
+declare void @g(i32 addrspace(1)*)
+
+; CHECK-LABEL: test1: # @test1
+define void @test1() personality i8* bitcast (void ()* @ProcessCLRException to i8*) {
+entry:
+  invoke void @f()
+    to label %exit unwind label %catch.pad
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+  ; CHECK: {{^[^: ]+}}: # %catch.body
+  ; CHECK: movq %rdx, %rcx
+  ; CHECK-NEXT: callq g
+  %catch = catchpad within %cs1 [i32 5]
+  %exn = call i8 addrspace(1)* @llvm.eh.exceptionpointer.p1i8(token %catch)
+  %cast_exn = bitcast i8 addrspace(1)* %exn to i32 addrspace(1)*
+  call void @g(i32 addrspace(1)* %cast_exn) [ "funclet"(token %catch) ]
+  catchret from %catch to label %exit
+exit:
+  ret void
+}
diff --git a/test/CodeGen/X86/wineh-no-ehpads.ll b/test/CodeGen/X86/wineh-no-ehpads.ll
new file mode 100644
index 000000000000..fd6798f2e088
--- /dev/null
+++ b/test/CodeGen/X86/wineh-no-ehpads.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @g()
+declare i32 @__CxxFrameHandler3(...)
+
+define void @personality_no_ehpad() personality i32 (...)* @__CxxFrameHandler3 {
+  call void @g()
+  ret void
+}
+
+; CHECK-LABEL: personality_no_ehpad: # @personality_no_ehpad
+; CHECK-NOT: movq $-2,
+; CHECK: callq g
+; CHECK: nop
+; CHECK: retq
+
+; Shouldn't have any LSDA either.
+; CHECK-NOT: cppxdata
diff --git a/test/CodeGen/X86/x32-function_pointer-3.ll b/test/CodeGen/X86/x32-function_pointer-3.ll
index 5eaf85d8f931..f5687b8a9de2 100644
--- a/test/CodeGen/X86/x32-function_pointer-3.ll
+++ b/test/CodeGen/X86/x32-function_pointer-3.ll
@@ -3,7 +3,7 @@
 
 ; Test calling function pointer passed in struct
 
-;    The fuction argument `h' in
+;    The function argument `h' in
 
 ;    struct foo {
 ;      void (*f) (void);
diff --git a/test/CodeGen/X86/x32-indirectbr.ll b/test/CodeGen/X86/x32-indirectbr.ll
new file mode 100644
index 000000000000..7c83827990c7
--- /dev/null
+++ b/test/CodeGen/X86/x32-indirectbr.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic -fast-isel | FileCheck %s
+; Bug 22859
+;
+; x32 pointers are 32-bits wide. x86-64 indirect branches use the full 64-bit
+; registers. Therefore, x32 CodeGen needs to zero extend indirectbr's target to
+; 64-bit.
+
+define i8 @test1() nounwind ssp {
+entry:
+  %0 = select i1 undef,                           ; <i8*> [#uses=1]
+              i8* blockaddress(@test1, %bb),
+              i8* blockaddress(@test1, %bb6)
+  indirectbr i8* %0, [label %bb, label %bb6]
+bb:                                               ; preds = %entry
+  ret i8 1
+
+bb6:                                              ; preds = %entry
+  ret i8 2
+}
+; CHECK-LABEL: @test1
+; We are looking for a movl ???, %r32 followed by a 64-bit jmp through the
+; same register.
+; CHECK: movl {{.*}}, %{{e|r}}[[REG:.[^d]*]]{{d?}}
+; CHECK-NEXT: jmpq *%r[[REG]]
+
diff --git a/test/CodeGen/X86/x32-landingpad.ll b/test/CodeGen/X86/x32-landingpad.ll
new file mode 100644
index 000000000000..b026a31a4045
--- /dev/null
+++ b/test/CodeGen/X86/x32-landingpad.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-none-none-gnux32 -mcpu=generic -fast-isel | FileCheck %s
+;
+; Ensures that landingpad instructions in x32 use the right Exception Pointer
+; and Exception Selector registers.
+
+declare void @foo()
+declare void @bar(i8*, i32) noreturn
+declare i32 @__gxx_personality_v0(...)
+
+define void @test1() uwtable personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @foo() to label %done unwind label %lpad
+done:
+  ret void
+lpad:
+  %0 = landingpad { i8*, i32 } cleanup
+; The Exception Pointer is %eax; the Exception Selector, %edx.
+; CHECK: LBB{{[^%]*}} %lpad
+; CHECK-DAG: movl %eax, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+; CHECK: callq bar
+  %1 = extractvalue { i8*, i32 } %0, 0
+  %2 = extractvalue { i8*, i32 } %0, 1
+  call void @bar(i8* %1, i32 %2)
+  unreachable
+}
diff --git a/test/CodeGen/X86/x32-va_start.ll b/test/CodeGen/X86/x32-va_start.ll
new file mode 100644
index 000000000000..a48468880507
--- /dev/null
+++ b/test/CodeGen/X86/x32-va_start.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-linux-gnux32 -mattr=-sse | FileCheck %s -check-prefix=CHECK -check-prefix=NOSSE
+;
+; Verifies that x32 va_start lowering is sane. To regenerate this test, use
+; cat <<EOF |
+; #include <stdarg.h>
+;
+; int foo(float a, const char* fmt, ...) {
+;   va_list ap;
+;   va_start(ap, fmt);
+;   int value = va_arg(ap, int);
+;   va_end(ap);
+;   return value;
+; }
+; EOF
+; build/bin/clang -mx32 -O3 -o- -S -emit-llvm -xc -
+;
+target datalayout = "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnux32"
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @foo(float %a, i8* nocapture readnone %fmt, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 16
+  %0 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*
+  call void @llvm.lifetime.start(i64 16, i8* %0) #2
+  call void @llvm.va_start(i8* %0)
+; SSE: subl $72, %esp
+; SSE: testb %al, %al
+; SSE: je .[[NOFP:.*]]
+; SSE-DAG: movaps %xmm1
+; SSE-DAG: movaps %xmm2
+; SSE-DAG: movaps %xmm3
+; SSE-DAG: movaps %xmm4
+; SSE-DAG: movaps %xmm5
+; SSE-DAG: movaps %xmm6
+; SSE-DAG: movaps %xmm7
+; NOSSE-NOT: xmm
+; SSE: .[[NOFP]]:
+; CHECK-DAG: movq %r9
+; CHECK-DAG: movq %r8
+; CHECK-DAG: movq %rcx
+; CHECK-DAG: movq %rdx
+; CHECK-DAG: movq %rsi
+  %gp_offset_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 0
+  %gp_offset = load i32, i32* %gp_offset_p, align 16
+  %fits_in_gp = icmp ult i32 %gp_offset, 41
+  br i1 %fits_in_gp, label %vaarg.in_reg, label %vaarg.in_mem
+; CHECK: cmpl $40, [[COUNT:.*]]
+; CHECK: ja .[[IN_MEM:.*]]
+
+vaarg.in_reg:                                     ; preds = %entry
+  %1 = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 3
+  %reg_save_area = load i8*, i8** %1, align 4
+  %2 = getelementptr i8, i8* %reg_save_area, i32 %gp_offset
+  %3 = add i32 %gp_offset, 8
+  store i32 %3, i32* %gp_offset_p, align 16
+  br label %vaarg.end
+; CHECK: movl {{[^,]*}}, [[ADDR:.*]]
+; CHECK: addl [[COUNT]], [[ADDR]]
+; SSE: jmp .[[END:.*]]
+; NOSSE: movl ([[ADDR]]), %eax
+; NOSSE: retq
+; CHECK: .[[IN_MEM]]:
+vaarg.in_mem:                                     ; preds = %entry
+  %overflow_arg_area_p = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0, i32 2
+  %overflow_arg_area = load i8*, i8** %overflow_arg_area_p, align 8
+  %overflow_arg_area.next = getelementptr i8, i8* %overflow_arg_area, i32 8
+  store i8* %overflow_arg_area.next, i8** %overflow_arg_area_p, align 8
+  br label %vaarg.end
+; CHECK: movl {{[^,]*}}, [[ADDR]]
+; NOSSE: movl ([[ADDR]]), %eax
+; NOSSE: retq
+; SSE: .[[END]]:
+
+vaarg.end:                                        ; preds = %vaarg.in_mem, %vaarg.in_reg
+  %vaarg.addr.in = phi i8* [ %2, %vaarg.in_reg ], [ %overflow_arg_area, %vaarg.in_mem ]
+  %vaarg.addr = bitcast i8* %vaarg.addr.in to i32*
+  %4 = load i32, i32* %vaarg.addr, align 4
+  call void @llvm.va_end(i8* %0)
+  call void @llvm.lifetime.end(i64 16, i8* %0) #2
+  ret i32 %4
+; SSE: movl ([[ADDR]]), %eax
+; SSE: retq
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+; Function Attrs: nounwind
+declare void @llvm.va_start(i8*) nounwind
+
+; Function Attrs: nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+
diff --git a/test/CodeGen/X86/x86-32-intrcc.ll b/test/CodeGen/X86/x86-32-intrcc.ll
new file mode 100644
index 000000000000..908da3d11206
--- /dev/null
+++ b/test/CodeGen/X86/x86-32-intrcc.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=i686-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=i686-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i32, i32, i32, i32, i32 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i32)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills eax, putting original esp at +4.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+  ; CHECK-LABEL: test_isr_no_ecode:
+  ; CHECK: pushl %eax
+  ; CHECK: movl 12(%esp), %eax
+  ; CHECK: popl %eax
+  ; CHECK: iretl
+  ; CHECK0-LABEL: test_isr_no_ecode:
+  ; CHECK0: pushl %eax
+  ; CHECK0: leal 4(%esp), %eax
+  ; CHECK0: movl 8(%eax), %eax
+  ; CHECK0: popl %eax
+  ; CHECK0: iretl
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i32, i32* %pflags, align 4
+  call void asm sideeffect "", "r"(i32 %flags)
+  ret void
+}
+
+; Spills eax and ecx, putting original esp at +8. Stack is adjusted up another 4 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i32 %ecode) {
+  ; CHECK-LABEL: test_isr_ecode
+  ; CHECK: pushl %ecx
+  ; CHECK: pushl %eax
+  ; CHECK: movl 8(%esp), %eax
+  ; CHECK: movl 20(%esp), %ecx
+  ; CHECK: popl %eax
+  ; CHECK: popl %ecx
+  ; CHECK: addl $4, %esp
+  ; CHECK: iretl
+  ; CHECK0-LABEL: test_isr_ecode
+  ; CHECK0: pushl %ecx
+  ; CHECK0: pushl %eax
+  ; CHECK0: movl 8(%esp), %eax
+  ; CHECK0: leal 12(%esp), %ecx
+  ; CHECK0: movl 8(%ecx), %ecx
+  ; CHECK0: popl %eax
+  ; CHECK0: popl %ecx
+  ; CHECK0: addl $4, %esp
+  ; CHECK0: iretl
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i32, i32* %pflags, align 4
+  call x86_fastcallcc void asm sideeffect "", "r,r"(i32 %flags, i32 %ecode)
+  ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i32 %ecode) {
+  call void asm sideeffect "", "~{eax},~{ebx},~{ebp}"()
+  ; CHECK-LABEL: test_isr_clobbers
+  ; CHECK-SSE-NEXT: pushl %ebp
+  ; CHECK-SSE-NEXT: pushl %ebx
+  ; CHECK-SSE-NEXT; pushl %eax
+  ; CHECK-SSE-NEXT: popl %eax
+  ; CHECK-SSE-NEXT: popl %ebx
+  ; CHECK-SSE-NEXT: popl %ebp
+  ; CHECK-SSE-NEXT: addl $4, %esp
+  ; CHECK-SSE-NEXT: iretl
+  ; CHECK0-LABEL: test_isr_clobbers
+  ; CHECK0-SSE-NEXT: pushl %ebp
+  ; CHECK0-SSE-NEXT: pushl %ebx
+  ; CHECK0-SSE-NEXT; pushl %eax
+  ; CHECK0-SSE-NEXT: popl %eax
+  ; CHECK0-SSE-NEXT: popl %ebx
+  ; CHECK0-SSE-NEXT: popl %ebp
+  ; CHECK0-SSE-NEXT: addl $4, %esp
+  ; CHECK0-SSE-NEXT: iretl
+  ret void
+}
+
diff --git a/test/CodeGen/X86/x86-64-baseptr.ll b/test/CodeGen/X86/x86-64-baseptr.ll
index 7fd94fa10f6c..ad8334719b32 100644
--- a/test/CodeGen/X86/x86-64-baseptr.ll
+++ b/test/CodeGen/X86/x86-64-baseptr.ll
@@ -1,5 +1,5 @@
-; RUN: llc -mtriple=x86_64-pc-linux -force-align-stack -stack-alignment=32 < %s | FileCheck %s
-; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -force-align-stack -stack-alignment=32 < %s | FileCheck -check-prefix=X32ABI %s
+; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -stack-alignment=32 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -stack-alignment=32 < %s | FileCheck -check-prefix=X32ABI %s
 ; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655
 
 ; Make sure the correct register gets set up as the base pointer
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
index f2380f23b8ee..7515c46f7cee 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-left.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s
 ; Verify that for the architectures that are known to have poor latency
 ; double precision shift instructions we generate alternative sequence 
 ; of instructions with lower latencies instead of shld instruction.
@@ -8,11 +8,9 @@
 ;    return (a << 1) | (b >> 63);
 ;}
 
-; CHECK:             lshift1:
-; CHECK:             addq    {{.*}},{{.*}}
-; CHECK-NEXT:        shrq    $63, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
-
+; CHECK-LABEL:       lshift1:
+; CHECK:             shrq    $63, %rsi
+; CHECK-NEXT:        leaq    (%rsi,%rdi,2), %rax
 
 define i64 @lshift1(i64 %a, i64 %b) nounwind readnone uwtable {
 entry:
@@ -27,10 +25,9 @@ entry:
 ;    return (a << 2) | (b >> 62);
 ;}
 
-; CHECK:             lshift2:
-; CHECK:             shlq    $2, {{.*}}
-; CHECK-NEXT:        shrq    $62, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
+; CHECK-LABEL:       lshift2:
+; CHECK:             shrq    $62, %rsi
+; CHECK-NEXT:        leaq    (%rsi,%rdi,4), %rax
 
 define i64 @lshift2(i64 %a, i64 %b) nounwind readnone uwtable {
 entry:
diff --git a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
index 5edaad89df4c..5e3f22941713 100644
--- a/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
+++ b/test/CodeGen/X86/x86-64-double-precision-shift-right.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mcpu=bdver1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 | FileCheck %s
 ; Verify that for the architectures that are known to have poor latency
 ; double precision shift instructions we generate alternative sequence 
 ; of instructions with lower latencies instead of shrd instruction.
@@ -61,10 +61,9 @@ define i64 @rshift7(i64 %a, i64 %b) nounwind readnone uwtable {
 ;    return (a >> 63) | (b << 1);
 ;}
 
-; CHECK:             rshift63:
-; CHECK:             shrq    $63, {{.*}}
-; CHECK-NEXT:        leaq    ({{.*}},{{.*}}), {{.*}}
-; CHECK-NEXT:        orq     {{.*}}, {{.*}}
+; CHECK-LABEL:       rshift63:
+; CHECK:             shrq    $63, %rdi
+; CHECK-NEXT:        leaq    (%rdi,%rsi,2), %rax
 
 define i64 @rshift63(i64 %a, i64 %b) nounwind readnone uwtable {
   %1 = lshr i64 %a, 63
diff --git a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
index 08d0257a0e5c..ba559aa2ff0e 100644
--- a/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
+++ b/test/CodeGen/X86/x86-64-double-shifts-Oz-Os-O2.ll
@@ -9,7 +9,7 @@
 ;     return (a << 10) | (b >> 54);
 ; }
 
-; Function Attrs: minsize nounwind optsize readnone uwtable
+; Function Attrs: minsize nounwind readnone uwtable
 define i64 @_Z8lshift10mm(i64 %a, i64 %b) #0 {
 entry:
 ; CHECK:   shldq   $10
@@ -19,7 +19,7 @@ entry:
   ret i64 %or
 }
 
-attributes #0 = { minsize nounwind optsize readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { minsize nounwind readnone uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 
 ; clang -Os -c test2.cpp -emit-llvm -S
diff --git a/test/CodeGen/X86/x86-64-intrcc.ll b/test/CodeGen/X86/x86-64-intrcc.ll
new file mode 100644
index 000000000000..8f70b391fa10
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-intrcc.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -O0 < %s | FileCheck %s -check-prefix=CHECK0
+
+%struct.interrupt_frame = type { i64, i64, i64, i64, i64 }
+
+@llvm.used = appending global [3 x i8*] [i8* bitcast (void (%struct.interrupt_frame*)* @test_isr_no_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_ecode to i8*), i8* bitcast (void (%struct.interrupt_frame*, i64)* @test_isr_clobbers to i8*)], section "llvm.metadata"
+
+; Spills rax, putting original esp at +8.
+; No stack adjustment if declared with no error code
+define x86_intrcc void @test_isr_no_ecode(%struct.interrupt_frame* %frame) {
+  ; CHECK-LABEL: test_isr_no_ecode:
+  ; CHECK: pushq %rax
+  ; CHECK: movq 24(%rsp), %rax
+  ; CHECK: popq %rax
+  ; CHECK: iretq
+  ; CHECK0-LABEL: test_isr_no_ecode:
+  ; CHECK0: pushq %rax
+  ; CHECK0: leaq 8(%rsp), %rax
+  ; CHECK0: movq 16(%rax), %rax
+  ; CHECK0: popq %rax
+  ; CHECK0: iretq
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i64, i64* %pflags, align 4
+  call void asm sideeffect "", "r"(i64 %flags)
+  ret void
+}
+
+; Spills rax and rcx, putting original rsp at +16. Stack is adjusted up another 8 bytes
+; before return, popping the error code.
+define x86_intrcc void @test_isr_ecode(%struct.interrupt_frame* %frame, i64 %ecode) {
+  ; CHECK-LABEL: test_isr_ecode
+  ; CHECK: pushq %rax
+  ; CHECK: pushq %rcx
+  ; CHECK: movq 16(%rsp), %rax
+  ; CHECK: movq 40(%rsp), %rcx
+  ; CHECK: popq %rcx
+  ; CHECK: popq %rax
+  ; CHECK: addq $8, %rsp
+  ; CHECK: iretq
+  ; CHECK0-LABEL: test_isr_ecode
+  ; CHECK0: pushq %rax
+  ; CHECK0: pushq %rcx
+  ; CHECK0: movq 16(%rsp), %rax
+  ; CHECK0: leaq 24(%rsp), %rcx
+  ; CHECK0: movq 16(%rcx), %rcx
+  ; CHECK0: popq %rcx
+  ; CHECK0: popq %rax
+  ; CHECK0: addq $8, %rsp
+  ; CHECK0: iretq
+  %pflags = getelementptr inbounds %struct.interrupt_frame, %struct.interrupt_frame* %frame, i32 0, i32 2
+  %flags = load i64, i64* %pflags, align 4
+  call void asm sideeffect "", "r,r"(i64 %flags, i64 %ecode)
+  ret void
+}
+
+; All clobbered registers must be saved
+define x86_intrcc void @test_isr_clobbers(%struct.interrupt_frame* %frame, i64 %ecode) {
+  call void asm sideeffect "", "~{rax},~{rbx},~{rbp},~{r11},~{xmm0}"()
+  ; CHECK-LABEL: test_isr_clobbers
+  ; CHECK-SSE-NEXT: pushq %rax
+  ; CHECK-SSE-NEXT; pushq %r11
+  ; CHECK-SSE-NEXT: pushq %rbp
+  ; CHECK-SSE-NEXT: pushq %rbx
+  ; CHECK-SSE-NEXT: movaps %xmm0
+  ; CHECK-SSE-NEXT: movaps %xmm0
+  ; CHECK-SSE-NEXT: popq %rbx
+  ; CHECK-SSE-NEXT: popq %rbp
+  ; CHECK-SSE-NEXT: popq %r11
+  ; CHECK-SSE-NEXT: popq %rax
+  ; CHECK-SSE-NEXT: addq $8, %rsp
+  ; CHECK-SSE-NEXT: iretq
+  ; CHECK0-LABEL: test_isr_clobbers
+  ; CHECK0-SSE-NEXT: pushq %rax
+  ; CHECK0-SSE-NEXT; pushq %r11
+  ; CHECK0-SSE-NEXT: pushq %rbp
+  ; CHECK0-SSE-NEXT: pushq %rbx
+  ; CHECK0-SSE-NEXT: movaps %xmm0
+  ; CHECK0-SSE-NEXT: movaps %xmm0
+  ; CHECK0-SSE-NEXT: popq %rbx
+  ; CHECK0-SSE-NEXT: popq %rbp
+  ; CHECK0-SSE-NEXT: popq %r11
+  ; CHECK0-SSE-NEXT: popq %rax
+  ; CHECK0-SSE-NEXT: addq $8, %rsp
+  ; CHECK0-SSE-NEXT: iretq
+  ret void
+}
\ No newline at end of file
diff --git a/test/CodeGen/X86/x86-64-ms_abi-vararg.ll b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
new file mode 100644
index 000000000000..e3436521a5bd
--- /dev/null
+++ b/test/CodeGen/X86/x86-64-ms_abi-vararg.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-pc-linux-gnu | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define x86_64_win64cc void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq   %r9, 40(%rsp)
+; CHECK: movq   %r8, 32(%rsp)
+; CHECK: movq   %rdx, 24(%rsp)
+; CHECK: leaq   24(%rsp), %rax
+
+  %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
+  %ap.0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+declare void @llvm.va_copy(i8*, i8*) nounwind
+declare void @llvm.va_end(i8*) nounwind
+
+; CHECK-LABEL: f5:
+; CHECK: pushq
+; CHECK: leaq 56(%rsp),
+define x86_64_win64cc i8** @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  ret i8** %ap
+}
+
+; CHECK-LABEL: f4:
+; CHECK: pushq
+; CHECK: leaq 48(%rsp),
+define x86_64_win64cc i8** @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  ret i8** %ap
+}
+
+; CHECK-LABEL: f3:
+; CHECK: pushq
+; CHECK: leaq 40(%rsp),
+define x86_64_win64cc i8** @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  ret i8** %ap
+}
+
+; WinX86_64 uses char* for va_list. Verify that the correct amount of bytes
+; are copied using va_copy.
+
+; CHECK-LABEL: copy1:
+; CHECK: leaq 32(%rsp), [[REG_copy1:%[a-z]+]]
+; CHECK: movq [[REG_copy1]], 8(%rsp)
+; CHECK: movq [[REG_copy1]], (%rsp)
+; CHECK: ret
+define x86_64_win64cc void @copy1(i64 %a0, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %cp = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  %cp.0 = bitcast i8** %cp to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
+  ret void
+}
+
+; CHECK-LABEL: copy4:
+; CHECK: leaq 56(%rsp), [[REG_copy4:%[a-z]+]]
+; CHECK: movq [[REG_copy4]], 8(%rsp)
+; CHECK: movq [[REG_copy4]], (%rsp)
+; CHECK: ret
+define x86_64_win64cc void @copy4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %cp = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  %cp.0 = bitcast i8** %cp to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  call void @llvm.va_copy(i8* %cp.0, i8* %ap.0)
+  ret void
+}
+
+; CHECK-LABEL: arg4:
+; va_start:
+; CHECK: leaq 48(%rsp), [[REG_arg4_1:%[a-z]+]]
+; CHECK: movq [[REG_arg4_1]], (%rsp)
+; va_arg:
+; CHECK: leaq 52(%rsp), [[REG_arg4_2:%[a-z]+]]
+; CHECK: movq [[REG_arg4_2]], (%rsp)
+; CHECK: movl 48(%rsp), %eax
+; CHECK: ret
+define x86_64_win64cc i32 @arg4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap.0 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap.0)
+  %tmp = va_arg i8** %ap, i32
+  ret i32 %tmp
+}
diff --git a/test/CodeGen/X86/x86-64-pic-10.ll b/test/CodeGen/X86/x86-64-pic-10.ll
index 8790fa6072b3..d76cf6a17552 100644
--- a/test/CodeGen/X86/x86-64-pic-10.ll
+++ b/test/CodeGen/X86/x86-64-pic-10.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
 ; RUN: grep "callq	g@PLT" %t1
 
-@g = weak alias i32 ()* @f
+@g = weak alias i32 (), i32 ()* @f
 
 define void @h() {
 entry:
diff --git a/test/CodeGen/X86/x86-fold-pshufb.ll b/test/CodeGen/X86/x86-fold-pshufb.ll
index c29e592bfe83..84af4f5d4b86 100644
--- a/test/CodeGen/X86/x86-fold-pshufb.ll
+++ b/test/CodeGen/X86/x86-fold-pshufb.ll
@@ -1,11 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -relocation-model=pic -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-unknown -mattr=+ssse3 < %s | FileCheck %s
 
 ; Verify that the backend correctly folds the shuffle in function 'fold_pshufb'
 ; into a simple load from constant pool.
 
 define <2 x i64> @fold_pshufb() {
 ; CHECK-LABEL: fold_pshufb:
-; CHECK:       # BB#0:
+; CHECK:       # BB#0: # %entry
 ; CHECK-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0]
 ; CHECK-NEXT:    retq
 entry:
@@ -14,4 +16,20 @@ entry:
   ret <2 x i64> %1
 }
 
+; The pshufb from function @pr24562 was wrongly folded into its first operand
+; as a result of a late target shuffle combine on the legalized selection dag.
+;
+; Check that the pshufb is correctly folded to a zero vector.
+
+define <2 x i64> @pr24562() {
+; CHECK-LABEL: pr24562:
+; CHECK:       # BB#0: # %entry
+; CHECK-NEXT:    xorps %xmm0, %xmm0
+; CHECK-NEXT:    retq
+entry:
+  %0 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>) #2
+  %1 = bitcast <16 x i8> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
diff --git a/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll b/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll
new file mode 100644
index 000000000000..4cb11bf3f5cd
--- /dev/null
+++ b/test/CodeGen/X86/x86-sanitizer-shrink-wrapping.ll
@@ -0,0 +1,40 @@
+; RUN: llc -o - < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+; Even if the target supports shrink-wrapping, the prologue and epilogue
+; must not move because a crash can happen anywhere and sanitizers need
+; to be able to unwind from the PC of the crash.
+; CHECK-LABEL: sanitize:
+; CHECK: pushq
+; CHECK: incl 40
+; CHECK: popq
+; CHECK-NEXT: retq
+; CHECK: movl $40, %edi
+; CHECK-NEXT callq ___asan_report_load4
+define  void @sanitize() #0 {
+entry:
+  %tmp = load i8, i8* inttoptr (i64 17592186044421 to i8*)
+  %tmp1 = icmp ne i8 %tmp, 0
+  br i1 %tmp1, label %if.then, label %else
+
+if.then:
+  %tmp3 = icmp sge i8 3, %tmp
+  br i1 %tmp3, label %else, label %end
+
+else:
+  call void @__asan_report_load4(i64 40)
+  call void asm sideeffect "", ""()
+  unreachable
+
+end:
+  %tmp6 = load i32, i32* inttoptr (i64 40 to i32*), align 8
+  %inc = add nsw i32 %tmp6, 1
+  store i32 %inc, i32* inttoptr (i64 40 to i32*), align 8
+  ret void
+}
+
+attributes #0 = { sanitize_address nounwind "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" }
+
+declare void @__asan_report_load4(i64)
diff --git a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 248a9202e997..99b27efe7f54 100644
--- a/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -39,10 +39,10 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwin
 ; Also test the general purpose constant folding of int->fp.
 define void @foo2(<4 x float>* noalias %result) nounwind {
 ; CHECK-LABEL: LCPI2_0:
-; CHECK-NEXT: .long 1082130432              ## float 4.000000e+00
-; CHECK-NEXT: .long 1084227584              ## float 5.000000e+00
-; CHECK-NEXT: .long 1086324736              ## float 6.000000e+00
-; CHECK-NEXT: .long 1088421888              ## float 7.000000e+00
+; CHECK-NEXT: .long 1082130432              ## float 4
+; CHECK-NEXT: .long 1084227584              ## float 5
+; CHECK-NEXT: .long 1086324736              ## float 6
+; CHECK-NEXT: .long 1088421888              ## float 7
 ; CHECK-LABEL: foo2:
 ; CHECK:  movaps LCPI2_0(%rip), %xmm0
 
@@ -72,10 +72,10 @@ define <4 x float> @foo3(<4 x float> %val, <4 x float> %test) nounwind {
 ; Test the general purpose constant folding of uint->fp.
 define void @foo4(<4 x float>* noalias %result) nounwind {
 ; CHECK-LABEL: LCPI4_0:
-; CHECK-NEXT: .long 1065353216              ## float 1.000000e+00
-; CHECK-NEXT: .long 1123942400              ## float 1.270000e+02
-; CHECK-NEXT: .long 1124073472              ## float 1.280000e+02
-; CHECK-NEXT: .long 1132396544              ## float 2.550000e+02
+; CHECK-NEXT: .long 1065353216              ## float 1
+; CHECK-NEXT: .long 1123942400              ## float 127
+; CHECK-NEXT: .long 1124073472              ## float 128
+; CHECK-NEXT: .long 1132396544              ## float 255
 ; CHECK-LABEL: foo4:
 ; CHECK:  movaps LCPI4_0(%rip), %xmm0
 
diff --git a/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
new file mode 100644
index 000000000000..7c00f407b1e0
--- /dev/null
+++ b/test/CodeGen/X86/x86-shrink-wrap-unwind.ll
@@ -0,0 +1,153 @@
+; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK
+;
+; This test checks that we do not use shrink-wrapping when
+; the function does not have any frame pointer and may unwind.
+; This is a workaround for a limitation in the emission of
+; the CFI directives, that are not correct in such case.
+; PR25614
+;
+; Note: This test cannot be merged with the shrink-wrapping tests
+; because the booleans set on the command line take precedence on
+; the target logic that disable shrink-wrapping.
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64-apple-macosx"
+
+
+; No shrink-wrapping should occur here, until the CFI information are fixed.
+; CHECK-LABEL: framelessUnwind:
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], 4(%rsp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq 4(%rsp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; CHECK: [[EXIT_LABEL]]:
+;
+; Without shrink-wrapping, epilogue is in the exit block.
+; Epilogue code. (What we pop does not matter.)
+; CHECK-NEXT: popq
+;
+; CHECK-NEXT: retq
+define i32 @framelessUnwind(i32 %a, i32 %b) #0 {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+declare i32 @doSomething(i32, i32*)
+
+attributes #0 = { "no-frame-pointer-elim"="false" }
+
+; Shrink-wrapping should occur here. We have a frame pointer.
+; CHECK-LABEL: frameUnwind:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; CHECK: pushq %rbp
+; CHECK: movq %rsp, %rbp
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], -4(%rbp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq -4(%rbp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; Epilogue code. (What we pop does not matter.)
+; CHECK: popq %rbp
+;
+; CHECK: [[EXIT_LABEL]]:
+; CHECK-NEXT: retq
+define i32 @frameUnwind(i32 %a, i32 %b) #1 {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+attributes #1 = { "no-frame-pointer-elim"="true" }
+
+; Shrink-wrapping should occur here. We do not have to unwind.
+; CHECK-LABEL: framelessnoUnwind:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]]
+; CHECK-NEXT: cmpl %esi, [[ARG0CPY]]
+; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Store %a in the alloca.
+; CHECK: movl [[ARG0CPY]], 4(%rsp)
+; Set the alloca address in the second argument.
+; CHECK-NEXT: leaq 4(%rsp), %rsi
+; Set the first argument to zero.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _doSomething
+;
+; Epilogue code.
+; CHECK-NEXT: addq
+;
+; CHECK: [[EXIT_LABEL]]:
+; CHECK-NEXT: retq
+define i32 @framelessnoUnwind(i32 %a, i32 %b) #2 {
+  %tmp = alloca i32, align 4
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  store i32 %a, i32* %tmp, align 4
+  %tmp4 = call i32 @doSomething(i32 0, i32* %tmp)
+  br label %false
+
+false:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %a, %0 ]
+  ret i32 %tmp.0
+}
+
+attributes #2 = { "no-frame-pointer-elim"="false" nounwind }
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll
index 8c91335d91a2..34e56919468b 100644
--- a/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -445,9 +445,9 @@ if.end:                                           ; preds = %for.body, %if.else
 ; CHECK-NEXT: xorl %eax, %eax
 ; CHECK-NEXT: %esi, %edi
 ; CHECK-NEXT: %esi, %edx
+; CHECK-NEXT: %esi, %ecx
 ; CHECK-NEXT: %esi, %r8d
 ; CHECK-NEXT: %esi, %r9d
-; CHECK-NEXT: %esi, %ecx
 ; CHECK-NEXT: callq _someVariadicFunc
 ; CHECK-NEXT: movl %eax, %esi
 ; CHECK-NEXT: shll $3, %esi
@@ -532,7 +532,11 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt
 ;
 ; CHECK: movl $24599, [[TMP2:%e[a-z]+]]
 ; CHECK-NEXT: btl [[TMP]], [[TMP2]]
-; CHECK-NEXT: jb [[CLEANUP]]
+; CHECK-NEXT: jae [[LOR_LHS_FALSE:LBB[0-9_]+]]
+;
+; CHECK: [[CLEANUP]]: ## %cleanup
+; DISABLE: popq
+; CHECK-NEXT: retq
 ;
 ; CHECK: [[LOR_LHS_FALSE]]: ## %lor.lhs.false
 ; CHECK: cmpl $134, %e[[BF_LOAD2]]
@@ -551,10 +555,6 @@ declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rt
 ; CHECK-NEXT: je [[CLEANUP]]
 ;
 ; CHECK: movb $1, 57(%rax)
-;
-; CHECK: [[CLEANUP]]: ## %cleanup
-; DISABLE: popq
-; CHECK-NEXT: retq
 define void @useLEA(%struct.rtx_def* readonly %x) {
 entry:
   %cmp = icmp eq %struct.rtx_def* %x, null
@@ -637,3 +637,245 @@ if.end:
 declare void @abort() #0
 
 attributes #0 = { noreturn nounwind }
+
+
+; Make sure that we handle infinite loops properly When checking that the Save
+; and Restore blocks are control flow equivalent, the loop searches for the
+; immediate (post) dominator for the (restore) save blocks. When either the Save
+; or Restore block is located in an infinite loop the only immediate (post)
+; dominator is itself. In this case, we cannot perform shrink wrapping, but we
+; should return gracefully and continue compilation.
+; The only condition for this test is the compilation finishes correctly.
+;
+; CHECK-LABEL: infiniteloop
+; CHECK: retq
+define void @infiniteloop() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %for.body ]
+  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with a body bigger than just one block.
+; CHECK-LABEL: infiniteloop2
+; CHECK: retq
+define void @infiniteloop2() {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.then:
+  %ptr = alloca i32, i32 4
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %sum.03 = phi i32 [ 0, %if.then ], [ %add, %body1 ], [ 1, %body2]
+  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %add = add nsw i32 %call, %sum.03
+  store i32 %add, i32* %ptr
+  br i1 undef, label %body1, label %body2
+
+body1:
+  tail call void asm sideeffect "nop", "~{ebx}"()
+  br label %for.body
+
+body2:
+  tail call void asm sideeffect "nop", "~{ebx}"()
+  br label %for.body
+
+if.end:
+  ret void
+}
+
+; Another infinite loop test this time with two nested infinite loop.
+; CHECK-LABEL: infiniteloop3
+; CHECK: retq
+define void @infiniteloop3() {
+entry:
+  br i1 undef, label %loop2a, label %body
+
+body:                                             ; preds = %entry
+  br i1 undef, label %loop2a, label %end
+
+loop1:                                            ; preds = %loop2a, %loop2b
+  %var.phi = phi i32* [ %next.phi, %loop2b ], [ %var, %loop2a ]
+  %next.phi = phi i32* [ %next.load, %loop2b ], [ %next.var, %loop2a ]
+  %0 = icmp eq i32* %var, null
+  %next.load = load i32*, i32** undef
+  br i1 %0, label %loop2a, label %loop2b
+
+loop2a:                                           ; preds = %loop1, %body, %entry
+  %var = phi i32* [ null, %body ], [ null, %entry ], [ %next.phi, %loop1 ]
+  %next.var = phi i32* [ undef, %body ], [ null, %entry ], [ %next.load, %loop1 ]
+  br label %loop1
+
+loop2b:                                           ; preds = %loop1
+  %gep1 = bitcast i32* %var.phi to i32*
+  %next.ptr = bitcast i32* %gep1 to i32**
+  store i32* %next.phi, i32** %next.ptr
+  br label %loop1
+
+end:
+  ret void
+}
+
+; Check that we just don't bail out on RegMask.
+; In this case, the RegMask does not touch a CSR so we are good to go!
+; CHECK-LABEL: regmask:
+;
+; Compare the arguments and jump to exit.
+; No prologue needed.
+; ENABLE: cmpl %esi, %edi
+; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; Prologue code.
+; (What we push does not matter. It should be some random sratch register.)
+; CHECK: pushq
+;
+; Compare the arguments and jump to exit.
+; After the prologue is set.
+; DISABLE: cmpl %esi, %edi
+; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; CHECK: nop
+; Set the first argument to zero.
+; CHECK: xorl %edi, %edi
+; Set the second argument to addr.
+; CHECK-NEXT: movq %rdx, %rsi
+; CHECK-NEXT: callq _doSomething
+; CHECK-NEXT: popq
+; CHECK-NEXT: retq
+;
+; CHECK: [[EXIT_LABEL]]:
+; Set the first argument to 6.
+; CHECK-NEXT: movl $6, %edi
+; Set the second argument to addr.
+; CHECK-NEXT: movq %rdx, %rsi
+;
+; Without shrink-wrapping, we need to restore the stack before
+; making the tail call.
+; Epilogue code.
+; DISABLE-NEXT: popq
+;
+; CHECK-NEXT: jmp _doSomething
+define i32 @regmask(i32 %a, i32 %b, i32* %addr) {
+  %tmp2 = icmp slt i32 %a, %b
+  br i1 %tmp2, label %true, label %false
+
+true:
+  ; Clobber a CSR so that we check something on the regmask
+  ; of the tail call.
+  tail call void asm sideeffect "nop", "~{ebx}"()
+  %tmp4 = call i32 @doSomething(i32 0, i32* %addr)
+  br label %end
+
+false:
+  %tmp5 = tail call i32 @doSomething(i32 6, i32* %addr)
+  br label %end
+
+end:
+  %tmp.0 = phi i32 [ %tmp4, %true ], [ %tmp5, %false ]
+  ret i32 %tmp.0
+}
+
+@b = internal unnamed_addr global i1 false
+@c = internal unnamed_addr global i8 0, align 1
+@a = common global i32 0, align 4
+
+; Make sure the prologue does not clobber the EFLAGS when
+; it is live accross.
+; PR25629.
+; Note: The registers may change in the following patterns, but
+; because they imply register hierarchy (e.g., eax, al) this is
+; tricky to write robust patterns.
+;
+; CHECK-LABEL: useLEAForPrologue:
+;
+; Prologue is at the beginning of the function when shrink-wrapping
+; is disabled.
+; DISABLE: pushq
+; The stack adjustment can use SUB instr because we do not need to
+; preserve the EFLAGS at this point.
+; DISABLE-NEXT: subq $16, %rsp
+;
+; Load the value of b.
+; CHECK: movb _b(%rip), [[BOOL:%cl]]
+; Extract i1 from the loaded value.
+; CHECK-NEXT: andb $1, [[BOOL]]
+; Create the zero value for the select assignment.
+; CHECK-NEXT: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
+; CHECK-NEXT: testb [[BOOL]], [[BOOL]]
+; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]]
+;
+; CHECK: movb $48, [[CMOVE_VAL:%al]]
+;
+; CHECK: [[STOREC_LABEL]]:
+;
+; ENABLE-NEXT: pushq
+; For the stack adjustment, we need to preserve the EFLAGS.
+; ENABLE-NEXT: leaq -16(%rsp), %rsp
+;
+; Technically, we should use CMOVE_VAL here or its subregister.
+; CHECK-NEXT: movb %al, _c(%rip)
+; testb set the EFLAGS read here.
+; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]]
+;
+; The code of the loop is not interesting.
+; [...]
+;
+; CHECK: [[VARFUNC_CALL]]:
+; Set the null parameter.
+; CHECK-NEXT: xorl %edi, %edi
+; CHECK-NEXT: callq _varfunc
+;
+; Set the return value.
+; CHECK-NEXT: xorl %eax, %eax
+;
+; Epilogue code.
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq
+; CHECK-NEXT: retq
+define i32 @useLEAForPrologue(i32 %d, i32 %a, i8 %c) #3 {
+entry:
+  %tmp = alloca i3
+  %.b = load i1, i1* @b, align 1
+  %bool = select i1 %.b, i8 0, i8 48
+  store i8 %bool, i8* @c, align 1
+  br i1 %.b, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  tail call void asm sideeffect "nop", "~{ebx}"()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %inc6 = phi i8 [ %c, %for.body.lr.ph ], [ %inc, %for.body ]
+  %cond5 = phi i32 [ %a, %for.body.lr.ph ], [ %conv3, %for.body ]
+  %cmp2 = icmp slt i32 %d, %cond5
+  %conv3 = zext i1 %cmp2 to i32
+  %inc = add i8 %inc6, 1
+  %cmp = icmp slt i8 %inc, 45
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  store i32 %conv3, i32* @a, align 4
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  %call = tail call i32 (i8*) @varfunc(i8* null)
+  ret i32 0
+}
+
+declare i32 @varfunc(i8* nocapture readonly)
+
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/X86/x86-win64-shrink-wrapping.ll b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
new file mode 100644
index 000000000000..395de686d2e2
--- /dev/null
+++ b/test/CodeGen/X86/x86-win64-shrink-wrapping.ll
@@ -0,0 +1,126 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s --check-prefix=CHECK --check-prefix=ENABLE
+; RUN: llc %s -o - -enable-shrink-wrap=false | FileCheck %s --check-prefix=CHECK --check-prefix=DISABLE
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "x86_64--windows-gnu"
+
+; The output of this function with or without shrink-wrapping
+; shouldn't change.
+; Indeed, the epilogue block would have been if.else, meaning
+; after the pops, we will have additional instruction (jump, mov,
+; etc.) prior to the return and this is forbidden for Win64.
+; CHECK-LABEL: loopInfoSaveOutsideLoop:
+; CHECK: push
+; CHECK: push
+; CHECK-NOT: popq
+; CHECK: popq
+; CHECK: popq
+; CHECK-NOT: popq
+; CHECK-NEXT: retq
+define i32 @loopInfoSaveOutsideLoop(i32 %cond, i32 %N) #0 {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:                                    ; preds = %entry
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.preheader
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  tail call void asm "nop", "~{ebx}"()
+  %shl = shl i32 %add, 3
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  %sum.1 = phi i32 [ %shl, %for.end ], [ %mul, %if.else ]
+  ret i32 %sum.1
+}
+
+; When we can sink the epilogue of the function into an existing exit block,
+; this is Ok for shrink-wrapping to kicks in.
+; CHECK-LABEL: loopInfoSaveOutsideLoop2:
+; ENABLE: testl %ecx, %ecx
+; ENABLE-NEXT: je [[ELSE_LABEL:.LBB[0-9_]+]]
+;
+; Prologue code.
+; Make sure we save the CSR used in the inline asm: rbx.
+; CHECK: pushq %rbp
+; CHECK: pushq %rbx
+;
+; DISABLE: testl %ecx, %ecx
+; DISABLE-NEXT: je [[ELSE_LABEL:.LBB[0-9_]+]]
+;
+; CHECK: nop
+; CHECK: xorl [[SUM:%eax]], [[SUM]]
+; CHECK-NEXT: movl $10, [[IV:%e[a-z]+]]
+;
+; CHECK: [[LOOP_LABEL:.LBB[0-9_]+]]: # %for.body
+; CHECK: movl $1, [[TMP:%e[a-z]+]]
+; CHECK: addl [[TMP]], [[SUM]]
+; CHECK-NEXT: decl [[IV]]
+; CHECK-NEXT: jne [[LOOP_LABEL]]
+; Next BB.
+; CHECK: nop
+; CHECK: shll $3, [[SUM]]
+;
+; DISABLE: jmp [[EPILOG_BB:.LBB[0-9_]+]]
+;
+; ENABLE-NEXT: popq %rbx
+; ENABLE-NEXT: popq %rbp
+; ENABLE-NEXT: retq
+;
+; CHECK: [[ELSE_LABEL]]: # %if.else
+; Shift second argument by one and store into returned register.
+; CHECK: addl %edx, %edx
+; CHECK: movl %edx, %eax
+;
+; DISABLE: [[EPILOG_BB]]: # %if.end
+; DISABLE-NEXT: popq %rbx
+;
+; CHECK: retq
+;
+define i32 @loopInfoSaveOutsideLoop2(i32 %cond, i32 %N) #0 {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %if.else, label %for.preheader
+
+for.preheader:                                    ; preds = %entry
+  tail call void asm "nop", ""()
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.preheader
+  %i.05 = phi i32 [ %inc, %for.body ], [ 0, %for.preheader ]
+  %sum.04 = phi i32 [ %add, %for.body ], [ 0, %for.preheader ]
+  %call = tail call i32 asm "movl $$1, $0", "=r,~{ebx}"()
+  %add = add nsw i32 %call, %sum.04
+  %inc = add nuw nsw i32 %i.05, 1
+  %exitcond = icmp eq i32 %inc, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  tail call void asm "nop", "~{ebx}"()
+  %shl = shl i32 %add, 3
+  ret i32 %shl
+
+if.else:                                          ; preds = %entry
+  %mul = shl nsw i32 %N, 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %for.end
+  ret i32 %mul
+}
+
+attributes #0 = { uwtable }
diff --git a/test/CodeGen/X86/xop-intrinsics-x86_64.ll b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
index 2516116f7697..3b4c6ea12107 100644
--- a/test/CodeGen/X86/xop-intrinsics-x86_64.ll
+++ b/test/CodeGen/X86/xop-intrinsics-x86_64.ll
@@ -61,15 +61,14 @@ define <8 x float> @test_int_x86_xop_vpermil2ps_256(<8 x float> %a0, <8 x float>
 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 
 define <2 x i64> @test_int_x86_xop_vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) {
-  ; CHECK: vpcmov
+  ; CHECK: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
   %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) ;
   ret <2 x i64> %res
 }
 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
 
 define <4 x i64> @test_int_x86_xop_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) {
-  ; CHECK: vpcmov
-  ; CHECK: ymm
+  ; CHECK: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
   %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) ;
   ret <4 x i64> %res
 }
@@ -805,6 +804,34 @@ define <8 x i16> @test_int_x86_xop_vprotw(<8 x i16> %a0, <8 x i16> %a1) {
 }
 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone
 
+define <16 x i8> @test_int_x86_xop_vprotbi(<16 x i8> %a0) {
+  ; CHECK: vprotb
+  %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 1) ;
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone
+
+define <4 x i32> @test_int_x86_xop_vprotdi(<4 x i32> %a0) {
+  ; CHECK: vprotd
+  %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 -2) ;
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone
+
+define <2 x i64> @test_int_x86_xop_vprotqi(<2 x i64> %a0) {
+  ; CHECK: vprotq
+  %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 3) ;
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone
+
+define <8 x i16> @test_int_x86_xop_vprotwi(<8 x i16> %a0) {
+  ; CHECK: vprotw
+  %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 -4) ;
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone
+
 define <16 x i8> @test_int_x86_xop_vpshab(<16 x i8> %a0, <16 x i8> %a1) {
   ; CHECK: vpshab
   %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) ;
diff --git a/test/CodeGen/X86/xop-pcmov.ll b/test/CodeGen/X86/xop-pcmov.ll
new file mode 100644
index 000000000000..77aefe993b29
--- /dev/null
+++ b/test/CodeGen/X86/xop-pcmov.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+xop | FileCheck %s
+
+define <4 x double> @pcmov_4f64(<4 x double> %a, <4 x double> %b, <4 x double> %m) {
+; CHECK-LABEL: pcmov_4f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = bitcast <4 x double> %m to <4 x i64>
+  %2 = bitcast <4 x double> %a to <4 x i64>
+  %3 = and <4 x i64> %1, %2
+  %4 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %5 = bitcast <4 x double> %b to <4 x i64>
+  %6 = and <4 x i64> %4, %5
+  %7 = or <4 x i64> %3, %6
+  %8 = bitcast <4 x i64> %7 to <4 x double>
+  ret <4 x double> %8
+}
+
+define <2 x double> @pcmov_2f64(<2 x double> %a, <2 x double> %b, <2 x double> %m) {
+; CHECK-LABEL: pcmov_2f64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = bitcast <2 x double> %m to <2 x i64>
+  %2 = bitcast <2 x double> %a to <2 x i64>
+  %3 = and <2 x i64> %1, %2
+  %4 = xor <2 x i64> %1, <i64 -1, i64 -1>
+  %5 = bitcast <2 x double> %b to <2 x i64>
+  %6 = and <2 x i64> %4, %5
+  %7 = or <2 x i64> %3, %6
+  %8 = bitcast <2 x i64> %7 to <2 x double>
+  ret <2 x double> %8
+}
+
+define <8 x float> @pcmov_8f32(<8 x float> %a, <8 x float> %b, <8 x float> %m) {
+; CHECK-LABEL: pcmov_8f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = bitcast <8 x float> %m to <8 x i32>
+  %2 = bitcast <8 x float> %a to <8 x i32>
+  %3 = and <8 x i32> %1, %2
+  %4 = xor <8 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = bitcast <8 x float> %b to <8 x i32>
+  %6 = and <8 x i32> %4, %5
+  %7 = or <8 x i32> %3, %6
+  %8 = bitcast <8 x i32> %7 to <8 x float>
+  ret <8 x float> %8
+}
+
+define <4 x float> @pcmov_4f32(<4 x float> %a, <4 x float> %b, <4 x float> %m) {
+; CHECK-LABEL: pcmov_4f32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = bitcast <4 x float> %m to <4 x i32>
+  %2 = bitcast <4 x float> %a to <4 x i32>
+  %3 = and <4 x i32> %1, %2
+  %4 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %5 = bitcast <4 x float> %b to <4 x i32>
+  %6 = and <4 x i32> %4, %5
+  %7 = or <4 x i32> %3, %6
+  %8 = bitcast <4 x i32> %7 to <4 x float>
+  ret <4 x float> %8
+}
+
+define <4 x i64> @pcmov_4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %m) {
+; CHECK-LABEL: pcmov_4i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = and <4 x i64> %a, %m
+  %2 = xor <4 x i64> %m, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %3 = and <4 x i64> %b, %2
+  %4 = or <4 x i64> %1, %3
+  ret <4 x i64> %4
+}
+
+define <2 x i64> @pcmov_2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %m) {
+; CHECK-LABEL: pcmov_2i64:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = and <2 x i64> %a, %m
+  %2 = xor <2 x i64> %m, <i64 -1, i64 -1>
+  %3 = and <2 x i64> %b, %2
+  %4 = or <2 x i64> %1, %3
+  ret <2 x i64> %4
+}
+
+define <8 x i32> @pcmov_8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %m) {
+; CHECK-LABEL: pcmov_8i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = and <8 x i32> %a, %m
+  %2 = xor <8 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <8 x i32> %b, %2
+  %4 = or <8 x i32> %1, %3
+  ret <8 x i32> %4
+}
+
+define <4 x i32> @pcmov_4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %m) {
+; CHECK-LABEL: pcmov_4i32:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = and <4 x i32> %a, %m
+  %2 = xor <4 x i32> %m, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %3 = and <4 x i32> %b, %2
+  %4 = or <4 x i32> %1, %3
+  ret <4 x i32> %4
+}
+
+define <16 x i16> @pcmov_16i16(<16 x i16> %a, <16 x i16> %b, <16 x i16> %m) {
+; CHECK-LABEL: pcmov_16i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = and <16 x i16> %a, %m
+  %2 = xor <16 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <16 x i16> %b, %2
+  %4 = or <16 x i16> %1, %3
+  ret <16 x i16> %4
+}
+
+define <8 x i16> @pcmov_8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %m) {
+; CHECK-LABEL: pcmov_8i16:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = and <8 x i16> %a, %m
+  %2 = xor <8 x i16> %m, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  %3 = and <8 x i16> %b, %2
+  %4 = or <8 x i16> %1, %3
+  ret <8 x i16> %4
+}
+
+define <32 x i8> @pcmov_32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %m) {
+; CHECK-LABEL: pcmov_32i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %ymm2, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %1 = and <32 x i8> %a, %m
+  %2 = xor <32 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <32 x i8> %b, %2
+  %4 = or <32 x i8> %1, %3
+  ret <32 x i8> %4
+}
+
+define <16 x i8> @pcmov_16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %m) {
+; CHECK-LABEL: pcmov_16i8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vpcmov %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %1 = and <16 x i8> %a, %m
+  %2 = xor <16 x i8> %m, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %3 = and <16 x i8> %b, %2
+  %4 = or <16 x i8> %1, %3
+  ret <16 x i8> %4
+}
diff --git a/test/CodeGen/XCore/aliases.ll b/test/CodeGen/XCore/aliases.ll
index b7ad416968f4..62427dad9b7c 100644
--- a/test/CodeGen/XCore/aliases.ll
+++ b/test/CodeGen/XCore/aliases.ll
@@ -5,9 +5,9 @@ define void @a_val() nounwind {
 @b_val = constant i32 42, section ".cp.rodata"
 @c_val = global i32 42
 
-@a = alias void ()* @a_val
-@b = alias i32* @b_val
-@c = alias i32* @c_val
+@a = alias void (), void ()* @a_val
+@b = alias i32, i32* @b_val
+@c = alias i32, i32* @c_val
 
 ; CHECK-LABEL: a_addr:
 ; CHECK: ldap r11, a
diff --git a/test/CodeGen/XCore/dwarf_debug.ll b/test/CodeGen/XCore/dwarf_debug.ll
index ba71dc798a04..6c8f389e8a98 100644
--- a/test/CodeGen/XCore/dwarf_debug.ll
+++ b/test/CodeGen/XCore/dwarf_debug.ll
@@ -9,7 +9,7 @@
 ; CHECK: .loc 1 2 0 prologue_end      # test.c:2:0
 ; CHECK: add r0, r0, 1
 ; CHECK: retsp 2
-define i32 @f(i32 %a) {
+define i32 @f(i32 %a) !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -23,16 +23,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}
-!0 = !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !1, type: !6, function: i32 (i32)* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !1, type: !6, variables: !2)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !4, file: !1, type: !8)
+!11 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !4, file: !1, type: !8)
 !12 = !DILocation(line: 2, scope: !4)
 
diff --git a/test/DebugInfo/AArch64/big-endian.ll b/test/DebugInfo/AArch64/big-endian.ll
index 45f49da63077..22b7af9df3d4 100644
--- a/test/DebugInfo/AArch64/big-endian.ll
+++ b/test/DebugInfo/AArch64/big-endian.ll
@@ -9,7 +9,7 @@ target triple = "aarch64_be--none-eabi"
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "-", directory: "/work/validation")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/AArch64/bitfields.ll b/test/DebugInfo/AArch64/bitfields.ll
index 5f0caab286b8..867b5be91ee1 100644
--- a/test/DebugInfo/AArch64/bitfields.ll
+++ b/test/DebugInfo/AArch64/bitfields.ll
@@ -54,7 +54,7 @@ target triple = "aarch64_be--linux-gnu"
 !llvm.module.flags = !{!13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "bitfields.c", directory: "/")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/AArch64/cfi-eof-prologue.ll b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
index 756443a951dd..c4681ca12fbe 100644
--- a/test/DebugInfo/AArch64/cfi-eof-prologue.ll
+++ b/test/DebugInfo/AArch64/cfi-eof-prologue.ll
@@ -26,7 +26,7 @@ target triple = "aarch64-apple-ios"
 @_ZTV1B = external unnamed_addr constant [4 x i8*]
 
 ; Function Attrs: nounwind
-define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !28 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !38), !dbg !39
   %0 = getelementptr inbounds %struct.B, %struct.B* %this, i64 0, i32 0, !dbg !40
@@ -39,7 +39,7 @@ entry:
 declare %struct.A* @_ZN1AC2Ev(%struct.A*)
 
 ; Function Attrs: nounwind
-define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !32 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !38), !dbg !44
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !45, metadata !38) #3, !dbg !47
@@ -61,7 +61,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!35, !36}
 !llvm.ident = !{!37}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4, !13}
@@ -89,13 +89,13 @@ attributes #3 = { nounwind }
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !26 = !DISubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !"_ZTS1A", type: !23, containingType: !"_ZTS1A")
 !27 = !{!28, !32}
-!28 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC2Ev, declaration: !8, variables: !29)
+!28 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !29)
 !29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
-!32 = !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC1Ev, declaration: !8, variables: !33)
+!32 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !33)
 !33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
 !35 = !{i32 2, !"Dwarf Version", i32 4}
 !36 = !{i32 2, !"Debug Info Version", i32 3}
 !37 = !{!"clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)"}
@@ -106,7 +106,7 @@ attributes #3 = { nounwind }
 !42 = !{!"vtable pointer", !43, i64 0}
 !43 = !{!"Simple C/C++ TBAA"}
 !44 = !DILocation(line: 0, scope: !32)
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!45 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !46 = !DILocation(line: 9, scope: !32)
 !47 = !DILocation(line: 0, scope: !28, inlinedAt: !46)
 !48 = !DILocation(line: 9, scope: !28, inlinedAt: !46)
diff --git a/test/DebugInfo/AArch64/coalescing.ll b/test/DebugInfo/AArch64/coalescing.ll
index b1d566197126..5f69895c7363 100644
--- a/test/DebugInfo/AArch64/coalescing.ll
+++ b/test/DebugInfo/AArch64/coalescing.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios"
 
 ; Function Attrs: nounwind optsize
-define void @_Z5startv() #0 {
+define void @_Z5startv() #0 !dbg !4 {
 entry:
   %size = alloca i32, align 4
   %0 = bitcast i32* %size to i8*, !dbg !15
@@ -44,22 +44,22 @@ attributes #3 = { nounwind optsize }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "start", linkageName: "_Z5startv", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @_Z5startv, variables: !9)
+!4 = distinct !DISubprogram(name: "start", linkageName: "_Z5startv", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !9)
 !5 = !DIFile(filename: "test1.c", directory: "")
 !6 = !DIFile(filename: "test1.c", directory: "")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "size", line: 4, scope: !4, file: !6, type: !11)
+!10 = !DILocalVariable(name: "size", line: 4, scope: !4, file: !6, type: !11)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !12 = !{i32 2, !"Dwarf Version", i32 2}
 !13 = !{i32 2, !"Debug Info Version", i32 3}
 !14 = !{!"clang version 3.6.0 (trunk 223149) (llvm/trunk 223115)"}
 !15 = !DILocation(line: 5, column: 3, scope: !4)
-!16 = !DIExpression()
+!16 = !DIExpression(DW_OP_deref)
 !17 = !DILocation(line: 4, column: 12, scope: !4)
 !18 = !DILocation(line: 8, column: 1, scope: !4)
diff --git a/test/DebugInfo/AArch64/constant-dbgloc.ll b/test/DebugInfo/AArch64/constant-dbgloc.ll
index a71b869f5366..aae518725fa4 100644
--- a/test/DebugInfo/AArch64/constant-dbgloc.ll
+++ b/test/DebugInfo/AArch64/constant-dbgloc.ll
@@ -13,7 +13,7 @@ target triple = "aarch64--linux-gnueabihf"
 ; CHECK: movn
 
 ; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -23,11 +23,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/AArch64/dwarfdump.ll b/test/DebugInfo/AArch64/dwarfdump.ll
index 3e95a6fe314f..57dcdf82d69e 100644
--- a/test/DebugInfo/AArch64/dwarfdump.ll
+++ b/test/DebugInfo/AArch64/dwarfdump.ll
@@ -18,7 +18,7 @@
 ; CHECK-4: DW_AT_high_pc [DW_FORM_data4] (0x00000008)
 ; CHECK-3: DW_AT_high_pc [DW_FORM_addr] (0x0000000000000008)
 
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !3 {
   ret i32 0, !dbg !8
 }
 
@@ -27,10 +27,10 @@ attributes #0 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !9, enums: !1, retainedTypes: !1, subprograms: !2, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !9, enums: !1, retainedTypes: !1, subprograms: !2, globals: !1, imports:  !1)
 !1 = !{}
 !2 = !{!3}
-!3 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !4, type: !5, function: i32 ()* @main, variables: !1)
+!3 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !4, type: !5, variables: !1)
 !4 = !DIFile(filename: "tmp.c", directory: "/home/tim/llvm/build")
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
diff --git a/test/DebugInfo/AArch64/frameindices.ll b/test/DebugInfo/AArch64/frameindices.ll
index 029077423368..5fb03dfc8834 100644
--- a/test/DebugInfo/AArch64/frameindices.ll
+++ b/test/DebugInfo/AArch64/frameindices.ll
@@ -42,7 +42,7 @@ target triple = "aarch64-apple-ios"
 @a = global i64 0, align 8
 @b = global i32* null, align 8
 
-define void @_Z3f131A(%struct.A* nocapture readonly %p1) #0 {
+define void @_Z3f131A(%struct.A* nocapture readonly %p1) #0 !dbg !25 {
 entry:
   %agg.tmp = alloca %struct.A, align 8
   tail call void @llvm.dbg.declare(metadata %struct.A* %p1, metadata !30, metadata !46), !dbg !47
@@ -67,7 +67,7 @@ declare void @_Z2f91A(%struct.A*) #0
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #3
 
-define void @_Z3f111A(%struct.A* nocapture readonly %p1) #0 {
+define void @_Z3f111A(%struct.A* nocapture readonly %p1) #0 !dbg !31 {
 entry:
   %agg.tmp.i = alloca %struct.A, align 8
   tail call void @llvm.dbg.declare(metadata %struct.A* %p1, metadata !33, metadata !46), !dbg !63
@@ -83,7 +83,7 @@ entry:
   ret void, !dbg !73
 }
 
-define void @_Z3f16v() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define void @_Z3f16v() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !34 {
 entry:
   %agg.tmp.i.i = alloca %struct.A, align 8
   %d = alloca %struct.B, align 1
@@ -160,7 +160,7 @@ attributes #5 = { builtin }
 !llvm.module.flags = !{!43, !44}
 !llvm.ident = !{!45}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !24, globals: !40, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !24, globals: !40, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4, !12, !14}
@@ -185,21 +185,21 @@ attributes #5 = { builtin }
 !22 = !DISubroutineType(types: !23)
 !23 = !{null, !19}
 !24 = !{!25, !31, !34}
-!25 = !DISubprogram(name: "f13", linkageName: "_Z3f131A", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !26, type: !27, function: void (%struct.A*)* @_Z3f131A, variables: !29)
+!25 = distinct !DISubprogram(name: "f13", linkageName: "_Z3f131A", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !26, type: !27, variables: !29)
 !26 = !DIFile(filename: "test.cpp", directory: "")
 !27 = !DISubroutineType(types: !28)
 !28 = !{null, !"_ZTS1A"}
 !29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 13, arg: 1, scope: !25, file: !26, type: !"_ZTS1A")
-!31 = !DISubprogram(name: "f11", linkageName: "_Z3f111A", line: 17, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !5, scope: !26, type: !27, function: void (%struct.A*)* @_Z3f111A, variables: !32)
+!30 = !DILocalVariable(name: "p1", line: 13, arg: 1, scope: !25, file: !26, type: !"_ZTS1A")
+!31 = distinct !DISubprogram(name: "f11", linkageName: "_Z3f111A", line: 17, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !5, scope: !26, type: !27, variables: !32)
 !32 = !{!33}
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
-!34 = !DISubprogram(name: "f16", linkageName: "_Z3f16v", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !5, scope: !26, type: !35, function: void ()* @_Z3f16v, variables: !37)
+!33 = !DILocalVariable(name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
+!34 = distinct !DISubprogram(name: "f16", linkageName: "_Z3f16v", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !5, scope: !26, type: !35, variables: !37)
 !35 = !DISubroutineType(types: !36)
 !36 = !{null}
 !37 = !{!38, !39}
-!38 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 19, scope: !34, file: !26, type: !"_ZTS1A")
-!39 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 20, scope: !34, file: !26, type: !"_ZTS1B")
+!38 = !DILocalVariable(name: "c", line: 19, scope: !34, file: !26, type: !"_ZTS1A")
+!39 = !DILocalVariable(name: "d", line: 20, scope: !34, file: !26, type: !"_ZTS1B")
 !40 = !{!41, !42}
 !41 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !26, type: !20, variable: i64* @a)
 !42 = !DIGlobalVariable(name: "b", line: 7, isLocal: false, isDefinition: true, scope: null, file: !26, type: !12, variable: i32** @b)
@@ -234,7 +234,7 @@ attributes #5 = { builtin }
 !71 = !DILocation(line: 15, column: 3, scope: !25, inlinedAt: !66)
 !72 = !DILocation(line: 16, column: 1, scope: !25, inlinedAt: !66)
 !73 = !DILocation(line: 17, column: 27, scope: !31)
-!74 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
+!74 = !DILocalVariable(name: "p1", line: 17, arg: 1, scope: !31, file: !26, type: !"_ZTS1A")
 !75 = distinct !DILocation(line: 22, column: 3, scope: !34)
 !76 = !DIExpression(DW_OP_bit_piece, 8, 120)
 !77 = !DILocation(line: 17, column: 12, scope: !31, inlinedAt: !75)
diff --git a/test/DebugInfo/AArch64/prologue_end.ll b/test/DebugInfo/AArch64/prologue_end.ll
new file mode 100644
index 000000000000..a70112229959
--- /dev/null
+++ b/test/DebugInfo/AArch64/prologue_end.ll
@@ -0,0 +1,43 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple aarch64-apple-darwin -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+;   func();
+;   func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+  ; CHECK: prologue_end_test:
+  ; CHECK: .cfi_startproc
+  ; CHECK: stp x29, x30
+  ; CHECK: mov x29, sp
+  ; CHECK: sub sp, sp
+  ; CHECK: .loc 1 3 3 prologue_end
+  ; CHECK: bl _func
+  ; CHECK: bl _func
+entry:
+  %call = call i32 @func(), !dbg !11
+  %call1 = call i32 @func(), !dbg !12
+  ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/AArch64/struct_by_value.ll b/test/DebugInfo/AArch64/struct_by_value.ll
index dc9ac42e8a0f..edbef82557cb 100644
--- a/test/DebugInfo/AArch64/struct_by_value.ll
+++ b/test/DebugInfo/AArch64/struct_by_value.ll
@@ -31,7 +31,7 @@ target triple = "arm64-apple-ios3.0.0"
 %struct.five = type { i32, i32, i32, i32, i32 }
 
 ; Function Attrs: nounwind ssp
-define i32 @return_five_int(%struct.five* %f) #0 {
+define i32 @return_five_int(%struct.five* %f) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.declare(metadata %struct.five* %f, metadata !17, metadata !DIExpression(DW_OP_deref)), !dbg !18
   %a = getelementptr inbounds %struct.five, %struct.five* %f, i32 0, i32 0, !dbg !19
@@ -48,11 +48,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16, !20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "LLVM version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "LLVM version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "struct_by_value.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "return_five_int", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !5, type: !6, function: i32 (%struct.five*)* @return_five_int, variables: !2)
+!4 = distinct !DISubprogram(name: "return_five_int", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "struct_by_value.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -65,7 +65,7 @@ attributes #1 = { nounwind readnone }
 !14 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 6, size: 32, align: 32, offset: 96, file: !1, scope: !9, baseType: !8)
 !15 = !DIDerivedType(tag: DW_TAG_member, name: "e", line: 7, size: 32, align: 32, offset: 128, file: !1, scope: !9, baseType: !8)
 !16 = !{i32 2, !"Dwarf Version", i32 2}
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 13, arg: 1, scope: !4, file: !5, type: !9)
+!17 = !DILocalVariable(name: "f", line: 13, arg: 1, scope: !4, file: !5, type: !9)
 !18 = !DILocation(line: 13, scope: !4)
 !19 = !DILocation(line: 16, scope: !4)
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/ARM/PR16736.ll b/test/DebugInfo/ARM/PR16736.ll
index 9d4992210a24..dd299a2a97d0 100644
--- a/test/DebugInfo/ARM/PR16736.ll
+++ b/test/DebugInfo/ARM/PR16736.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -filetype=asm < %s | FileCheck %s
-; CHECK: @DEBUG_VALUE: h:x <- [R{{.*}}+{{.*}}]
+; CHECK: @DEBUG_VALUE: h:x <- [%R{{.*}}+{{.*}}]
 ; generated from:
 ; clang -cc1 -triple  thumbv7 -S -O1 arm.cpp  -g
 ;
@@ -13,7 +13,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:64:64-
 target triple = "thumbv7-apple-ios"
 
 ; Function Attrs: nounwind
-define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 {
+define arm_aapcscc void @_Z1hiiiif(i32, i32, i32, i32, float %x) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !12, metadata !DIExpression()), !dbg !18
   tail call void @llvm.dbg.value(metadata i32 %1, i64 0, metadata !13, metadata !DIExpression()), !dbg !18
@@ -41,11 +41,11 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17, !21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 190804) (llvm/trunk 190797)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 190804) (llvm/trunk 190797)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "/<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "h", linkageName: "_Z1hiiiif", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: void (i32, i32, i32, i32, float)* @_Z1hiiiif, variables: !11)
+!4 = distinct !DISubprogram(name: "h", linkageName: "_Z1hiiiif", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !11)
 !5 = !DIFile(filename: "/arm.cpp", directory: "")
 !6 = !DIFile(filename: "/arm.cpp", directory: "")
 !7 = !DISubroutineType(types: !8)
@@ -53,11 +53,11 @@ attributes #3 = { nounwind }
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !11 = !{!12, !13, !14, !15, !16}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 1, scope: !4, file: !6, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 2, scope: !4, file: !6, type: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 3, scope: !4, file: !6, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 3, arg: 4, scope: !4, file: !6, type: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 3, arg: 5, scope: !4, file: !6, type: !10)
+!12 = !DILocalVariable(name: "", line: 3, arg: 1, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "", line: 3, arg: 2, scope: !4, file: !6, type: !9)
+!14 = !DILocalVariable(name: "", line: 3, arg: 3, scope: !4, file: !6, type: !9)
+!15 = !DILocalVariable(name: "", line: 3, arg: 4, scope: !4, file: !6, type: !9)
+!16 = !DILocalVariable(name: "x", line: 3, arg: 5, scope: !4, file: !6, type: !10)
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !DILocation(line: 3, scope: !4)
 !19 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/ARM/bitfield.ll b/test/DebugInfo/ARM/bitfield.ll
index 9b41e4e4f7bc..4c528d06df11 100644
--- a/test/DebugInfo/ARM/bitfield.ll
+++ b/test/DebugInfo/ARM/bitfield.ll
@@ -26,7 +26,7 @@ target triple = "thumbv7-apple-ios"
 !llvm.module.flags = !{!11, !12, !13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "test.i", directory: "/")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/ARM/cfi-eof-prologue.ll b/test/DebugInfo/ARM/cfi-eof-prologue.ll
index 07e4d8a717b6..7430551fd610 100644
--- a/test/DebugInfo/ARM/cfi-eof-prologue.ll
+++ b/test/DebugInfo/ARM/cfi-eof-prologue.ll
@@ -27,7 +27,7 @@ target triple = "thumbv7-apple-ios"
 @_ZTV1B = external unnamed_addr constant [4 x i8*]
 
 ; Function Attrs: nounwind
-define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC2Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !28 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !30, metadata !40), !dbg !41
   %0 = getelementptr inbounds %struct.B, %struct.B* %this, i32 0, i32 0, !dbg !42
@@ -40,7 +40,7 @@ entry:
 declare %struct.A* @_ZN1AC2Ev(%struct.A*)
 
 ; Function Attrs: nounwind
-define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 {
+define %struct.B* @_ZN1BC1Ev(%struct.B* %this) unnamed_addr #0 align 2 !dbg !32 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !34, metadata !40), !dbg !46
   tail call void @llvm.dbg.value(metadata %struct.B* %this, i64 0, metadata !47, metadata !40) #3, !dbg !49
@@ -62,7 +62,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!35, !36, !37, !38}
 !llvm.ident = !{!39}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224279) (llvm/trunk 224283)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !27, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4, !13}
@@ -90,13 +90,13 @@ attributes #3 = { nounwind }
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !26 = !DISubprogram(name: "~A", line: 3, isLocal: false, isDefinition: false, virtuality: DW_VIRTUALITY_virtual, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !"_ZTS1A", type: !23, containingType: !"_ZTS1A")
 !27 = !{!28, !32}
-!28 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC2Ev, declaration: !8, variables: !29)
+!28 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !29)
 !29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"_ZTS1B")
-!32 = !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, function: %struct.B* (%struct.B*)* @_ZN1BC1Ev, declaration: !8, variables: !33)
+!32 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC1Ev", line: 9, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !5, scope: !"_ZTS1B", type: !9, declaration: !8, variables: !33)
 !33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !31)
 !35 = !{i32 2, !"Dwarf Version", i32 4}
 !36 = !{i32 2, !"Debug Info Version", i32 3}
 !37 = !{i32 1, !"wchar_size", i32 4}
@@ -109,7 +109,7 @@ attributes #3 = { nounwind }
 !44 = !{!"vtable pointer", !45, i64 0}
 !45 = !{!"Simple C/C++ TBAA"}
 !46 = !DILocation(line: 0, scope: !32)
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!47 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !48 = !DILocation(line: 9, scope: !32)
 !49 = !DILocation(line: 0, scope: !28, inlinedAt: !48)
 !50 = !DILocation(line: 9, scope: !28, inlinedAt: !48)
diff --git a/test/DebugInfo/ARM/constant-dbgloc.ll b/test/DebugInfo/ARM/constant-dbgloc.ll
index c4a5703f3e8b..5fd219af86c2 100644
--- a/test/DebugInfo/ARM/constant-dbgloc.ll
+++ b/test/DebugInfo/ARM/constant-dbgloc.ll
@@ -13,7 +13,7 @@ target triple = "armv7--linux-gnueabihf"
 ; CHECK: mvn
 
 ; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -23,11 +23,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/ARM/float-args.ll b/test/DebugInfo/ARM/float-args.ll
new file mode 100644
index 000000000000..9ec7db2bdf07
--- /dev/null
+++ b/test/DebugInfo/ARM/float-args.ll
@@ -0,0 +1,45 @@
+; RUN: %llc_dwarf -filetype=obj -mattr=+vfp2 -float-abi=hard < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: %llc_dwarf -filetype=obj -mattr=-vfp2 -float-abi=soft < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; REQUIRES: object-emission
+
+; Generated by clang -O1 -g from the following C source:
+; float foo(float p) {
+;   return p;
+; }
+
+; When using the soft-float calling convention, we have to look through a
+; bitcast to find the register which contains the argument.
+
+; CHECK:     0x{{[0-9a-f]*}}:  DW_TAG_formal_parameter
+; CHECK-NEXT:                      DW_AT_location
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--none-eabi"
+
+define float @foo(float %p) !dbg !4 {
+entry:
+  tail call void @llvm.dbg.value(metadata float %p, i64 0, metadata !9, metadata !15), !dbg !16
+  ret float %p, !dbg !18
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "p", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !DIExpression()
+!16 = !DILocation(line: 1, column: 17, scope: !4)
+!17 = !DILocation(line: 2, column: 12, scope: !4)
+!18 = !DILocation(line: 2, column: 3, scope: !4)
diff --git a/test/DebugInfo/ARM/header.ll b/test/DebugInfo/ARM/header.ll
index 6723f093a59b..bb1f141c84d6 100644
--- a/test/DebugInfo/ARM/header.ll
+++ b/test/DebugInfo/ARM/header.ll
@@ -12,17 +12,17 @@
 
 ; CHECK:  .section        __DWARF,__debug_str,regular,debug
 
-define void @f() {
+define void @f() !dbg !4 {
   ret void, !dbg !9
 }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "/foo/test.c", directory: "/foo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
index daf26507a036..806bbc3b1d1f 100644
--- a/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
+++ b/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll
@@ -17,7 +17,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-
 target triple = "thumbv7-apple-ios8.0.0"
 
 ; Function Attrs: nounwind optsize readnone
-define void @run(float %r) #0 {
+define void @run(float %r) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.declare(metadata float %r, metadata !11, metadata !DIExpression()), !dbg !22
   %conv = fptosi float %r to i32, !dbg !23
@@ -67,25 +67,25 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !33}
 !llvm.ident = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "/Volumes/Data/radar/15464571")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "run", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !5, scope: !6, type: !7, function: void (float)* @run, variables: !10)
+!4 = distinct !DISubprogram(name: "run", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !10)
 !5 = !DIFile(filename: "test.c", directory: "/Volumes/Data/radar/15464571")
 !6 = !DIFile(filename: "test.c", directory: "/Volumes/Data/radar/15464571")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !10 = !{!11, !12, !14, !18}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "r", line: 1, arg: 1, scope: !4, file: !6, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "count", line: 3, scope: !4, file: !6, type: !13)
+!11 = !DILocalVariable(name: "r", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!12 = !DILocalVariable(name: "count", line: 3, scope: !4, file: !6, type: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla", line: 4, scope: !4, file: !6, type: !15)
+!14 = !DILocalVariable(name: "vla", line: 4, scope: !4, file: !6, type: !15)
 !15 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !9, elements: !16)
 !16 = !{!17}
 !17 = !DISubrange(count: -1)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !19, file: !6, type: !13)
+!18 = !DILocalVariable(name: "i", line: 6, scope: !19, file: !6, type: !13)
 !19 = distinct !DILexicalBlock(line: 6, column: 0, file: !5, scope: !4)
 !20 = !{i32 2, !"Dwarf Version", i32 2}
 !21 = !{!"clang version 3.4 "}
diff --git a/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll b/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
index e68c5b824163..de3bee248a1f 100644
--- a/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
+++ b/test/DebugInfo/ARM/multiple-constant-uses-drops-dbgloc.ll
@@ -19,7 +19,7 @@ target triple = "armv7--linux-gnueabihf"
 @b = common global i32 0, align 4
 
 ; Function Attrs: nounwind
-define void @proc() #0 {
+define void @proc() #0 !dbg !4 {
 entry:
   store i8 65, i8* @ch, align 1, !dbg !17
   store i32 0, i32* @b, align 4, !dbg !18
@@ -32,11 +32,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.module.flags = !{!12, !13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "proc", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @proc, variables: !2)
+!4 = distinct !DISubprogram(name: "proc", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !{!8, !10}
diff --git a/test/DebugInfo/ARM/prologue_end.ll b/test/DebugInfo/ARM/prologue_end.ll
new file mode 100644
index 000000000000..b663b98e0387
--- /dev/null
+++ b/test/DebugInfo/ARM/prologue_end.ll
@@ -0,0 +1,46 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple armv7-apple-darwin -o - | FileCheck %s
+; RUN: llc -disable-fp-elim -O0 %s -mtriple thumbv7-apple-darwin -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+;   func();
+;   func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+  ; CHECK: prologue_end_test:
+  ; CHECK: .cfi_startproc
+  ; CHECK: push {r7, lr}
+  ; CHECK: {{mov r7, sp|add r7, sp}}
+  ; CHECK: sub sp
+  ; CHECK: .loc 1 3 3 prologue_end
+  ; CHECK: bl {{_func|Ltmp}}
+  ; CHECK: bl {{_func|Ltmp}}
+entry:
+  %call = call i32 @func(), !dbg !13
+  %call1 = call i32 @func(), !dbg !14
+  ret void, !dbg !15
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9, !10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 1, !"min_enum_size", i32 4}
+!11 = !{i32 1, !"PIC Level", i32 2}
+!12 = !{!"clang version 3.7.0 (trunk 242129)"}
+!13 = !DILocation(line: 3, column: 3, scope: !4)
+!14 = !DILocation(line: 4, column: 3, scope: !4)
+!15 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/ARM/s-super-register.ll b/test/DebugInfo/ARM/s-super-register.ll
index def87695ccce..887d37e27635 100644
--- a/test/DebugInfo/ARM/s-super-register.ll
+++ b/test/DebugInfo/ARM/s-super-register.ll
@@ -9,7 +9,7 @@ target triple = "thumbv7-apple-macosx10.6.7"
 ; 0x9d   DW_OP_bit_piece
 ; CHECK:            Location description: 90 {{.. .. ((93 ..)|(9d .. ..)) $}}
 
-define void @_Z3foov() optsize ssp {
+define void @_Z3foov() optsize ssp !dbg !1 {
 entry:
   %call = tail call float @_Z3barv() optsize, !dbg !11
   tail call void @llvm.dbg.value(metadata float %call, i64 0, metadata !5, metadata !DIExpression()), !dbg !11
@@ -40,15 +40,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports:  null)
-!1 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, function: void ()* @_Z3foov, variables: !17)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130845)", isOptimized: true, emissionKind: 1, file: !18, enums: !19, retainedTypes: !19, subprograms: !16, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !18, scope: !2, type: !3, variables: !17)
 !2 = !DIFile(filename: "k.cc", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 6, scope: !6, file: !2, type: !7)
+!5 = !DILocalVariable(name: "k", line: 6, scope: !6, file: !2, type: !7)
 !6 = distinct !DILexicalBlock(line: 5, column: 12, file: !18, scope: !1)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!8 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !9, file: !2, type: !7)
+!8 = !DILocalVariable(name: "y", line: 8, scope: !9, file: !2, type: !7)
 !9 = distinct !DILexicalBlock(line: 7, column: 25, file: !18, scope: !10)
 !10 = distinct !DILexicalBlock(line: 7, column: 3, file: !18, scope: !6)
 !11 = !DILocation(line: 6, column: 18, scope: !6)
diff --git a/test/DebugInfo/ARM/selectiondag-deadcode.ll b/test/DebugInfo/ARM/selectiondag-deadcode.ll
index 1b6528269f7a..fe5e87658dde 100644
--- a/test/DebugInfo/ARM/selectiondag-deadcode.ll
+++ b/test/DebugInfo/ARM/selectiondag-deadcode.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -filetype=asm < %s | FileCheck %s
 target triple = "thumbv7-apple-ios7.0.0"
 %class.Matrix3.0.6.10 = type { [9 x float] }
-define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 {
+define arm_aapcscc void @_Z9GetMatrixv(%class.Matrix3.0.6.10* noalias nocapture sret %agg.result) #0 !dbg !39 {
   br i1 fcmp oeq (float fadd (float fadd (float fmul (float undef, float undef), float fmul (float undef, float undef)), float fmul (float undef, float undef)), float 0.000000e+00), label %_ZN7Vector39NormalizeEv.exit, label %1
   tail call arm_aapcscc void @_ZL4Sqrtd() #3
   br label %_ZN7Vector39NormalizeEv.exit
@@ -21,7 +21,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare arm_aapcscc void @_ZL4Sqrtd() #2
 !4 = !DICompositeType(tag: DW_TAG_class_type, name: "Matrix3", line: 20, size: 288, align: 32, file: !5, identifier: "_ZTS7Matrix3")
 !5 = !DIFile(filename: "test.ii", directory: "/Volumes/Data/radar/15094721")
-!39 = !DISubprogram(name: "GetMatrix", linkageName: "_Z9GetMatrixv", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 32, file: !5, scope: !40, type: !41, function: void (%class.Matrix3.0.6.10*)* @_Z9GetMatrixv)
+!39 = distinct !DISubprogram(name: "GetMatrix", linkageName: "_Z9GetMatrixv", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 32, file: !5, scope: !40, type: !41)
 !40 = !DIFile(filename: "test.ii", directory: "/Volumes/Data/radar/15094721")
 !41 = !DISubroutineType(types: null)
-!45 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "matrix", line: 35, scope: !39, file: !40, type: !4)
+!45 = !DILocalVariable(name: "matrix", line: 35, scope: !39, file: !40, type: !4)
diff --git a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
index 396eb4fe1946..9bd7becb35b8 100644
--- a/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
+++ b/test/DebugInfo/ARM/single-constant-use-preserves-dbgloc.ll
@@ -16,7 +16,7 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "armv7--linux-gnueabihf"
 
 ; Function Attrs: nounwind
-define i32 @main() {
+define i32 @main() !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   %x = alloca i32, align 4
@@ -48,17 +48,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !{i32 2, !"Dwarf Version", i32 4}
 !9 = !{i32 2, !"Debug Info Version", i32 3}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, file: !1, line: 3, type: !7)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 3, type: !7)
 !11 = !DIExpression()
 !12 = !DILocation(line: 3, column: 9, scope: !4)
 !13 = !DILocation(line: 4, column: 9, scope: !14)
diff --git a/test/DebugInfo/ARM/tls.ll b/test/DebugInfo/ARM/tls.ll
index e349120b6075..4cb707ad4956 100644
--- a/test/DebugInfo/ARM/tls.ll
+++ b/test/DebugInfo/ARM/tls.ll
@@ -1,5 +1,8 @@
-; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s | FileCheck %s
-;
+; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi < %s \
+; RUN:     | FileCheck %s --check-prefix=CHECK
+; RUN: llc -O0 -filetype=asm -mtriple=armv7-linux-gnuehabi -emulated-tls < %s \
+; RUN:     | FileCheck %s --check-prefix=EMU
+
 ; Generated with clang with source
 ; __thread int x;
 
@@ -16,7 +19,10 @@
 ; The debug relocation of the address of the tls variable
 ; CHECK: .long x(tlsldo)
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+; TODO: Add expected output for -emulated-tls tests.
+; EMU-NOT: .long x(tlsldo)
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "tls.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/COFF/asan-module-ctor.ll b/test/DebugInfo/COFF/asan-module-ctor.ll
index 3f8b129ca9b6..f132eb425706 100644
--- a/test/DebugInfo/COFF/asan-module-ctor.ll
+++ b/test/DebugInfo/COFF/asan-module-ctor.ll
@@ -24,7 +24,7 @@ target triple = "i686-pc-win32"
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 1, void ()* @asan.module_ctor }]
 
 ; Function Attrs: nounwind sanitize_address
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
 entry:
   ret i32 0, !dbg !10
 }
@@ -82,14 +82,14 @@ attributes #0 = { nounwind sanitize_address "less-precise-fpmad"="false" "no-fra
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "asan.c", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "asan.c", directory: "D:C")
 !6 = !DISubroutineType(types: !2)
-!7 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"CodeView", i32 1}
 !8 = !{i32 1, !"Debug Info Version", i32 3}
 !9 = !{!"clang version 3.5.0 "}
 !10 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/COFF/asan-module-without-functions.ll b/test/DebugInfo/COFF/asan-module-without-functions.ll
index 88eec67a4bdb..fe337fb24769 100644
--- a/test/DebugInfo/COFF/asan-module-without-functions.ll
+++ b/test/DebugInfo/COFF/asan-module-without-functions.ll
@@ -45,9 +45,9 @@ define internal void @asan.module_dtor() {
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
 !1 = !DIFile(filename: "asan.c", directory: "D:\5C")
 !2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"CodeView", i32 1}
 !4 = !{i32 1, !"Debug Info Version", i32 3}
 !5 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index bc2a11d066b5..f3e52df54be0 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -O0 < %s | FileCheck --check-prefix=X86 %s
-; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview -section-symbols | FileCheck --check-prefix=OBJ32 %s
+; RUN: llc -mcpu=core2 -mtriple=i686-pc-win32 -o - -O0 < %s | llvm-mc -triple=i686-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview | FileCheck --check-prefix=OBJ32 %s
 ; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -O0 < %s | FileCheck --check-prefix=X64 %s
-; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview -section-symbols | FileCheck --check-prefix=OBJ64 %s
+; RUN: llc -mcpu=core2 -mtriple=x86_64-pc-win32 -o - -O0 < %s | llvm-mc -triple=x86_64-pc-win32 -filetype=obj | llvm-readobj -s -sr -codeview | FileCheck --check-prefix=OBJ64 %s
 
 ; This LL file was generated by running clang on the following code:
 ; D:\asm.c:
@@ -19,7 +19,6 @@
 ; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:^L.*]]:
 ;
 ; X86-LABEL: .section        .debug$S,"dr"
@@ -95,6 +94,7 @@
 ; OBJ32-NEXT:   0x44 IMAGE_REL_I386_SECREL _f
 ; OBJ32-NEXT:   0x48 IMAGE_REL_I386_SECTION _f
 ; OBJ32-NEXT: ]
+; OBJ32:    CodeViewDebugInfo [
 ; OBJ32:      Subsection [
 ; OBJ32-NEXT:   Type: 0xF1
 ; OBJ32-NOT:    ]
@@ -125,20 +125,21 @@
 ; OBJ32-NEXT:     ColEnd: 0
 ; OBJ32-NEXT:   ]
 ; OBJ32-NEXT: ]
-; OBJ32:    }
 
 ; X64-LABEL: f:
 ; X64-NEXT: .L{{.*}}:{{$}}
 ; X64-NEXT: [[START:.*]]:{{$}}
 ; X64:      # BB
-; X64:      subq    $40, %rsp
+; X64:      pushq %rbp
+; X64-NEXT: subq    $32, %rsp
+; X64-NEXT: leaq    32(%rsp), %rbp
 ; X64-NEXT: [[ASM_LINE:.*]]:{{$}}
 ; X64:      [[CALL_LINE:.*]]:{{$}}
 ; X64:      callq   g
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
-; X64:      addq    $40, %rsp
+; X64:      addq    $32, %rsp
+; X64-NEXT: popq %rbp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
 ; X64-LABEL: .section        .debug$S,"dr"
@@ -224,22 +225,22 @@
 ; OBJ64:        ProcStart {
 ; OBJ64-NEXT:     DisplayName: f
 ; OBJ64-NEXT:     Section: f
-; OBJ64-NEXT:     CodeSize: 0xE
+; OBJ64-NEXT:     CodeSize: 0x17
 ; OBJ64-NEXT:   }
 ; OBJ64-NEXT:   ProcEnd
 ; OBJ64-NEXT: ]
 ; OBJ64:      FunctionLineTable [
 ; OBJ64-NEXT:   Name: f
 ; OBJ64-NEXT:   Flags: 0x1
-; OBJ64-NEXT:   CodeSize: 0xE
+; OBJ64-NEXT:   CodeSize: 0x17
 ; OBJ64-NEXT:   FilenameSegment [
 ; OBJ64-NEXT:     Filename: D:\asm.c
 ; OBJ64-NEXT:     +0x0: 3
 ; FIXME: An empty __asm stmt creates an extra entry.
 ; See PR18679 for the details.
-; OBJ64-NEXT:     +0x4: 4
-; OBJ64-NEXT:     +0x4: 5
-; OBJ64-NEXT:     +0x9: 6
+; OBJ64-NEXT:     +0xA: 4
+; OBJ64-NEXT:     +0xC: 5
+; OBJ64-NEXT:     +0x11: 6
 ; OBJ64-NEXT:     ColStart: 0
 ; OBJ64-NEXT:     ColEnd: 0
 ; OBJ64-NEXT:     ColStart: 0
@@ -250,10 +251,9 @@
 ; OBJ64-NEXT:     ColEnd: 0
 ; OBJ64-NEXT:   ]
 ; OBJ64-NEXT: ]
-; OBJ64:    }
 
 ; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
 entry:
   call void asm sideeffect inteldialect ".align 4", "~{dirflag},~{fpsr},~{flags}"() #2, !dbg !12
   call void @g(), !dbg !13
@@ -270,16 +270,16 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "asm.c", directory: "D:\5C")
 !6 = !DIFile(filename: "asm.c", directory: "D:C")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5 "}
 !12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/COFF/cpp-mangling.ll b/test/DebugInfo/COFF/cpp-mangling.ll
index 137377847aae..25461baa2266 100644
--- a/test/DebugInfo/COFF/cpp-mangling.ll
+++ b/test/DebugInfo/COFF/cpp-mangling.ll
@@ -29,15 +29,15 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: i32 (i32)* @"\01?bar@foo@@YAHH@Z", variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "src.cpp", directory: "D:\5C")
 !6 = !DIFile(filename: "src.cpp", directory: "D:C")
 !7 = !DISubroutineType(types: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"CodeView", i32 1}
 !9 = !{i32 2, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.6.0 "}
 !11 = !DILocation(line: 3, scope: !4)
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index 3dedacebc140..70bc0022cfb2 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -26,7 +26,6 @@
 ; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
 ; X86-LABEL: .section        .debug$S,"dr"
@@ -158,7 +157,6 @@
 ; OBJ32-NEXT:     ColEnd: 0
 ; OBJ32-NEXT:   ]
 ; OBJ32-NEXT: ]
-; OBJ32:    }
 
 ; X64-LABEL: f:
 ; X64-NEXT: .L{{.*}}:{{$}}
@@ -174,7 +172,6 @@
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
 ; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
 ; X64-LABEL: .section        .debug$S,"dr"
@@ -326,10 +323,9 @@
 ; OBJ64-NEXT:     ColEnd: 0
 ; OBJ64-NEXT:   ]
 ; OBJ64-NEXT: ]
-; OBJ64:    }
 
 ; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
 entry:
   call void @g(), !dbg !12
   call void @g(), !dbg !15
@@ -346,16 +342,16 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "input.c", directory: "D:\5C")
 !6 = !DIFile(filename: "input.c", directory: "D:C")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5 "}
 !12 = !DILocation(line: 1, scope: !13)
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index bbf97dd4afc0..4c044fa9c4fa 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -28,7 +28,6 @@
 ; X86:      calll   _z
 ; X86-NEXT: [[X_RETURN:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_X:.*]]:
 ;
 ; X86-LABEL: _y:
@@ -37,7 +36,6 @@
 ; X86:      calll   _z
 ; X86-NEXT: [[Y_RETURN:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_Y:.*]]:
 ;
 ; X86-LABEL: _f:
@@ -50,7 +48,6 @@
 ; X86:      calll   _z
 ; X86-NEXT: [[F_RETURN:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
 ; X86-LABEL: .section        .debug$S,"dr"
@@ -310,7 +307,6 @@
 ; OBJ32-NEXT:     ColEnd: 73
 ; OBJ32-NEXT:   ]
 ; OBJ32-NEXT: ]
-; OBJ32:    }
 
 ; X64-LABEL: x:
 ; X64-NEXT: .L{{.*}}:
@@ -322,7 +318,6 @@
 ; X64-NEXT: [[X_EPILOG_AND_RET:.*]]:
 ; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_X:.*]]:
 ;
 ; X64-LABEL: y:
@@ -335,7 +330,6 @@
 ; X64-NEXT: [[Y_EPILOG_AND_RET:.*]]:
 ; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_Y:.*]]:
 ;
 ; X64-LABEL: f:
@@ -352,7 +346,6 @@
 ; X64-NEXT: [[F_EPILOG_AND_RET:.*]]:
 ; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
 ; X64-LABEL: .section        .debug$S,"dr"
@@ -633,10 +626,9 @@
 ; OBJ64-NEXT:     ColEnd: 73
 ; OBJ64-NEXT:   ]
 ; OBJ64-NEXT: ]
-; OBJ64:    }
 
 ; Function Attrs: nounwind
-define void @x() #0 {
+define void @x() #0 !dbg !4 {
 entry:
   call void @z(), !dbg !14
   ret void, !dbg !15
@@ -645,14 +637,14 @@ entry:
 declare void @z() #1
 
 ; Function Attrs: nounwind
-define void @y() #0 {
+define void @y() #0 !dbg !9 {
 entry:
   call void @z(), !dbg !16
   ret void, !dbg !17
 }
 
 ; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !10 {
 entry:
   call void @x(), !dbg !18
   call void @y(), !dbg !19
@@ -667,18 +659,18 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4, !9, !10}
-!4 = !DISubprogram(name: "x", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @x, variables: !2)
+!4 = distinct !DISubprogram(name: "x", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "source.c", directory: "D:\5C")
 !6 = !DIFile(filename: "source.c", directory: "D:C")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DISubprogram(name: "y", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: void ()* @y, variables: !2)
-!10 = !DISubprogram(name: "f", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
-!11 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = distinct !DISubprogram(name: "y", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
+!10 = distinct !DISubprogram(name: "f", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !7, variables: !2)
+!11 = !{i32 2, !"CodeView", i32 1}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
 !13 = !{!"clang version 3.5 "}
 !14 = !DILocation(line: 4, column: 42, scope: !4)
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index 0d9857c7831d..2103df07f6dc 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -17,7 +17,6 @@
 ; X86:      calll   _g
 ; X86-NEXT: [[RETURN_STMT:.*]]:
 ; X86:      ret
-; X86-NEXT: L{{.*}}:
 ; X86-NEXT: [[END_OF_F:.*]]:
 ;
 ; X86-LABEL: .section        .debug$S,"dr"
@@ -113,7 +112,6 @@
 ; OBJ32-NEXT:     ColEnd: 0
 ; OBJ32-NEXT:   ]
 ; OBJ32-NEXT: ]
-; OBJ32:    }
 
 ; X64-LABEL: f:
 ; X64-NEXT: .L{{.*}}:{{$}}
@@ -125,7 +123,6 @@
 ; X64-NEXT: [[EPILOG_AND_RET:.*]]:
 ; X64:      addq    $40, %rsp
 ; X64-NEXT: ret
-; X64-NEXT: .L{{.*}}:
 ; X64-NEXT: [[END_OF_F:.*]]:
 ;
 ; X64-LABEL: .section        .debug$S,"dr"
@@ -228,10 +225,9 @@
 ; OBJ64-NEXT:     ColEnd: 0
 ; OBJ64-NEXT:   ]
 ; OBJ64-NEXT: ]
-; OBJ64:    }
 
 ; Function Attrs: nounwind
-define void @f() #0 {
+define void @f() #0 !dbg !4 {
 entry:
   call void @g(), !dbg !12
   ret void, !dbg !13
@@ -246,16 +242,16 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "D:\5C")
 !6 = !DIFile(filename: "test.c", directory: "D:C")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"CodeView", i32 1}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5 "}
 !12 = !DILocation(line: 4, scope: !4)
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 054a2e89a8fb..28b8d2859efa 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -62,15 +62,15 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.cpp", directory: "D:\5C")
 !2 = !{}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "spam", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @"\01?spam@@YAXXZ", variables: !2)
+!4 = distinct !DISubprogram(name: "spam", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.cpp", directory: "D:C")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "bar", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = distinct !DISubprogram(name: "bar", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"CodeView", i32 1}
 !9 = !{i32 1, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5.0 "}
 !11 = !DILocation(line: 8, scope: !4)
diff --git a/test/DebugInfo/2009-10-16-Phi.ll b/test/DebugInfo/Generic/2009-10-16-Phi.ll
similarity index 100%
rename from test/DebugInfo/2009-10-16-Phi.ll
rename to test/DebugInfo/Generic/2009-10-16-Phi.ll
diff --git a/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll
similarity index 86%
rename from test/DebugInfo/2009-11-03-InsertExtractValue.ll
rename to test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll
index f8382af3abc7..c992a43c858c 100644
--- a/test/DebugInfo/2009-11-03-InsertExtractValue.ll
+++ b/test/DebugInfo/Generic/2009-11-03-InsertExtractValue.ll
@@ -9,7 +9,7 @@
 !2 = !DISubroutineType(types: !3)
 !3 = !{null}
 !4 = !DIFile(filename: "/foo", directory: "bar.cpp")
-!5 = !DICompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !{}, retainedTypes: !{})
+!5 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: true, emissionKind: 0, file: !4, enums: !{}, retainedTypes: !{})
 
 define <{i32, i32}> @f1() {
 ; CHECK: !dbgx ![[NUMBER:[0-9]+]]
diff --git a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll
similarity index 65%
rename from test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
rename to test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll
index 32d4c0ac6c39..40dacf8848b2 100644
--- a/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
+++ b/test/DebugInfo/Generic/2009-11-05-DeadGlobalVariable.ll
@@ -2,7 +2,7 @@
 ; Here variable bar is optimized away. Do not trip over while trying to generate debug info.
 
 
-define i32 @foo() nounwind uwtable readnone ssp {
+define i32 @foo() nounwind uwtable readnone ssp !dbg !5 {
 entry:
   ret i32 42, !dbg !15
 }
@@ -10,10 +10,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !17, scope: !6, type: !7, function: i32 ()* @foo)
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !17, scope: !6, type: !7)
 !6 = !DIFile(filename: "fb.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
diff --git a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll
similarity index 71%
rename from test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
rename to test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll
index 79f949b438f8..a871a257cc8b 100644
--- a/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
+++ b/test/DebugInfo/Generic/2009-11-06-NamelessGlobalVariable.ll
@@ -4,7 +4,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
 !2 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 2, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @0)
diff --git a/test/DebugInfo/2009-11-10-CurrentFn.ll b/test/DebugInfo/Generic/2009-11-10-CurrentFn.ll
similarity index 59%
rename from test/DebugInfo/2009-11-10-CurrentFn.ll
rename to test/DebugInfo/Generic/2009-11-10-CurrentFn.ll
index 228edec93c42..b0961135ebf0 100644
--- a/test/DebugInfo/2009-11-10-CurrentFn.ll
+++ b/test/DebugInfo/Generic/2009-11-10-CurrentFn.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -o /dev/null
 
-define void @bar(i32 %i) nounwind uwtable ssp {
+define void @bar(i32 %i) nounwind uwtable ssp !dbg !5 {
 entry:
   tail call void (...) @foo() nounwind, !dbg !14
   ret void, !dbg !16
@@ -13,15 +13,15 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 139632)", isOptimized: true, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !17, scope: !6, type: !7, function: void (i32)* @bar, variables: !9)
+!5 = distinct !DISubprogram(name: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !17, scope: !6, type: !7, variables: !9)
 !6 = !DIFile(filename: "cf.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
 !9 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 1, scope: !5, file: !17, type: !12)
+!11 = !DILocalVariable(name: "i", line: 3, arg: 1, scope: !5, file: !17, type: !12)
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !13 = !DILocation(line: 3, column: 14, scope: !5)
 !14 = !DILocation(line: 4, column: 3, scope: !15)
diff --git a/test/DebugInfo/2010-01-05-DbgScope.ll b/test/DebugInfo/Generic/2010-01-05-DbgScope.ll
similarity index 70%
rename from test/DebugInfo/2010-01-05-DbgScope.ll
rename to test/DebugInfo/Generic/2010-01-05-DbgScope.ll
index d5baa1984227..c6d7ca85847f 100644
--- a/test/DebugInfo/2010-01-05-DbgScope.ll
+++ b/test/DebugInfo/Generic/2010-01-05-DbgScope.ll
@@ -13,8 +13,8 @@ entry:
 
 !0 = !DILocation(line: 571, column: 3, scope: !1)
 !1 = distinct !DILexicalBlock(line: 1, column: 1, file: !11, scope: !2)
-!2 = !DISubprogram(name: "foo", linkageName: "foo", line: 561, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3, type: !4)
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !11, enums: !12, retainedTypes: !12, subprograms: !13)
+!2 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 561, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !3, type: !4)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !11, enums: !12, retainedTypes: !12, subprograms: !13)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
diff --git a/test/DebugInfo/2010-03-12-llc-crash.ll b/test/DebugInfo/Generic/2010-03-12-llc-crash.ll
similarity index 58%
rename from test/DebugInfo/2010-03-12-llc-crash.ll
rename to test/DebugInfo/Generic/2010-03-12-llc-crash.ll
index 60e657ce5ce6..aaa013c803f5 100644
--- a/test/DebugInfo/2010-03-12-llc-crash.ll
+++ b/test/DebugInfo/Generic/2010-03-12-llc-crash.ll
@@ -1,5 +1,5 @@
 ; RUN: llc -O0 < %s -o /dev/null
-; llc should not crash on this invalid input.
+; llc should not crash on this optimized out debug info.
 ; PR6588
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
@@ -9,10 +9,10 @@ entry:
   ret void
 }
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sy", line: 890, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 892, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !3, type: !4)
+!0 = !DILocalVariable(name: "sy", line: 890, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 892, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !3, type: !4)
 !2 = !DIFile(filename: "qpainter.h", directory: "QtGui")
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !9, enums: !10, retainedTypes: !10)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.1", isOptimized: true, emissionKind: 0, file: !9, enums: !10, retainedTypes: !10)
 !4 = !DISubroutineType(types: !6)
 !5 = !DIFile(filename: "splineeditor.cpp", directory: "src")
 !6 = !{null}
diff --git a/test/DebugInfo/2010-03-19-DbgDeclare.ll b/test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll
similarity index 65%
rename from test/DebugInfo/2010-03-19-DbgDeclare.ll
rename to test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll
index 40d96e1d443e..fe7eaebc4ed5 100644
--- a/test/DebugInfo/2010-03-19-DbgDeclare.ll
+++ b/test/DebugInfo/Generic/2010-03-19-DbgDeclare.ll
@@ -9,12 +9,12 @@ entry:
 }
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!5}
-!2 = !DICompileUnit(language: DW_LANG_Mips_Assembler, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !4, enums: !3, retainedTypes: !3, subprograms: !3, globals: !3, imports:  !3)
+!2 = distinct !DICompileUnit(language: DW_LANG_Mips_Assembler, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !4, enums: !3, retainedTypes: !3, subprograms: !3, globals: !3, imports:  !3)
 !3 = !{}
 !0 = !DILocation(line: 662302, column: 26, scope: !1)
-!1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "foo", scope: !6)
+!1 = !DILocalVariable(name: "foo", scope: !6)
 !4 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
-!6 = !DISubprogram()
+!6 = distinct !DISubprogram()
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !5 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/2010-03-24-MemberFn.ll b/test/DebugInfo/Generic/2010-03-24-MemberFn.ll
similarity index 77%
rename from test/DebugInfo/2010-03-24-MemberFn.ll
rename to test/DebugInfo/Generic/2010-03-24-MemberFn.ll
index 627364ea4f00..5f63ce295fad 100644
--- a/test/DebugInfo/2010-03-24-MemberFn.ll
+++ b/test/DebugInfo/Generic/2010-03-24-MemberFn.ll
@@ -2,7 +2,7 @@
 ; Here _ZN1S3fooEv is defined in header file identified as AT_decl_file no. 2 in debug info.
 %struct.S = type <{ i8 }>
 
-define i32 @_Z3barv() nounwind ssp {
+define i32 @_Z3barv() nounwind ssp !dbg !3 {
 entry:
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %0 = alloca i32                                 ; <i32*> [#uses=2]
@@ -20,7 +20,7 @@ return:                                           ; preds = %entry
   ret i32 %retval1, !dbg !16
 }
 
-define linkonce_odr i32 @_ZN1S3fooEv(%struct.S* %this) nounwind ssp align 2 {
+define linkonce_odr i32 @_ZN1S3fooEv(%struct.S* %this) nounwind ssp align 2 !dbg !12 {
 entry:
   %this_addr = alloca %struct.S*                  ; <%struct.S**> [#uses=1]
   %retval = alloca i32                            ; <i32*> [#uses=1]
@@ -39,25 +39,25 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!5}
 !llvm.module.flags = !{!28}
 
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s1", line: 3, scope: !1, file: !4, type: !9)
+!0 = !DILocalVariable(name: "s1", line: 3, scope: !1, file: !4, type: !9)
 !1 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !2)
 !2 = distinct !DILexicalBlock(line: 3, column: 0, file: !25, scope: !3)
-!3 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !25, scope: !4, type: !6, function: i32 ()* @_Z3barv)
+!3 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !25, scope: !4, type: !6)
 !4 = !DIFile(filename: "one.cc", directory: "/tmp/")
-!5 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !25, enums: !27, retainedTypes: !27, subprograms: !24, imports:  null)
+!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !25, enums: !27, retainedTypes: !27, subprograms: !24, imports:  null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", line: 2, size: 8, align: 8, file: !26, scope: !4, elements: !11)
 !10 = !DIFile(filename: "one.h", directory: "/tmp/")
 !11 = !{!12}
-!12 = !DISubprogram(name: "foo", linkageName: "_ZN1S3fooEv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !26, scope: !9, type: !13, function: i32 (%struct.S*)* @_ZN1S3fooEv)
+!12 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1S3fooEv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !26, scope: !9, type: !13)
 !13 = !DISubroutineType(types: !14)
 !14 = !{!8, !15}
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !9)
 !16 = !DILocation(line: 3, scope: !1)
 !17 = !DILocation(line: 3, scope: !3)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 0, scope: !12, file: !10, type: !19)
+!18 = !DILocalVariable(name: "this", line: 3, arg: 1, scope: !12, file: !10, type: !19)
 !19 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !25, scope: !4, baseType: !20)
 !20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !25, scope: !4, baseType: !9)
 !21 = !DILocation(line: 3, scope: !12)
diff --git a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll
similarity index 76%
rename from test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
rename to test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll
index 45ed4962f7b4..a85ad9adcf7d 100644
--- a/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
+++ b/test/DebugInfo/Generic/2010-04-06-NestedFnDbgInfo.ll
@@ -13,7 +13,7 @@
 ; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_low_pc
 ; CHECK-NOT: DW_TAG
-; CHECK: DW_AT_linkage_name {{.*}} "_ZZN1B2fnEvEN1A3fooEv"
+; CHECK: DW_AT_name {{.*}} "foo"
 ; And just double check that there's no out of line definition that references
 ; this subprogram.
 ; CHECK-NOT: DW_AT_specification {{.*}} {[[FOO_INL]]}
@@ -21,7 +21,7 @@
 %class.A = type { i8 }
 %class.B = type { i8 }
 
-define i32 @main() ssp {
+define i32 @main() ssp !dbg !2 {
 entry:
   %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
   %b = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
@@ -35,7 +35,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 {
+define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 !dbg !10 {
 entry:
   %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
   %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
@@ -54,7 +54,7 @@ entry:
   ret i32 %0, !dbg !32
 }
 
-define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 {
+define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 !dbg !23 {
 entry:
   %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
   %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
@@ -70,40 +70,40 @@ entry:
 !llvm.module.flags = !{!40}
 !37 = !{!2, !10, !23}
 
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 16, scope: !1, file: !3, type: !8)
+!0 = !DILocalVariable(name: "b", line: 16, scope: !1, file: !3, type: !8)
 !1 = distinct !DILexicalBlock(line: 15, column: 12, file: !38, scope: !2)
-!2 = !DISubprogram(name: "main", linkageName: "main", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 15, file: !38, scope: !3, type: !5, function: i32 ()* @main)
+!2 = distinct !DISubprogram(name: "main", linkageName: "main", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 15, file: !38, scope: !3, type: !5)
 !3 = !DIFile(filename: "one.cc", directory: "/tmp")
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.5", isOptimized: false, emissionKind: 0, file: !38, enums: !39, retainedTypes: !39, subprograms: !37, imports:  null)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang 1.5", isOptimized: false, emissionKind: 0, file: !38, enums: !39, retainedTypes: !39, subprograms: !37, imports:  null)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DICompositeType(tag: DW_TAG_class_type, name: "B", line: 2, size: 8, align: 8, file: !38, scope: !3, elements: !9)
 !9 = !{!10}
-!10 = !DISubprogram(name: "fn", linkageName: "_ZN1B2fnEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !38, scope: !8, type: !11, function: i32 (%class.A*)* @_ZN1B2fnEv)
+!10 = distinct !DISubprogram(name: "fn", linkageName: "_ZN1B2fnEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !38, scope: !8, type: !11)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!7, !13}
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !8)
 !14 = !DILocation(line: 16, column: 5, scope: !1)
 !15 = !DILocation(line: 17, column: 3, scope: !1)
 !16 = !DILocation(line: 18, column: 1, scope: !2)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !10, file: !3, type: !13)
+!17 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !10, file: !3, type: !13)
 !18 = !DILocation(line: 4, column: 7, scope: !10)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 9, scope: !20, file: !3, type: !21)
+!19 = !DILocalVariable(name: "a", line: 9, scope: !20, file: !3, type: !21)
 !20 = distinct !DILexicalBlock(line: 4, column: 12, file: !38, scope: !10)
 !21 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 5, size: 8, align: 8, file: !38, scope: !10, elements: !22)
 !22 = !{!23}
-!23 = !DISubprogram(name: "foo", linkageName: "_ZZN1B2fnEvEN1A3fooEv", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !38, scope: !21, type: !24, function: i32 (%class.A*)* @_ZZN1B2fnEvEN1A3fooEv)
+!23 = distinct !DISubprogram(name: "foo", linkageName: "_ZZN1B2fnEvEN1A3fooEv", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !38, scope: !21, type: !24)
 !24 = !DISubroutineType(types: !25)
 !25 = !{!7, !26}
 !26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, file: !38, scope: !3, baseType: !21)
 !27 = !DILocation(line: 9, column: 7, scope: !20)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 10, scope: !20, file: !3, type: !7)
+!28 = !DILocalVariable(name: "i", line: 10, scope: !20, file: !3, type: !7)
 !29 = !DILocation(line: 10, column: 9, scope: !20)
 !30 = !DILocation(line: 10, column: 5, scope: !20)
 !31 = !DILocation(line: 11, column: 5, scope: !20)
 !32 = !DILocation(line: 12, column: 3, scope: !10)
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 7, arg: 0, scope: !23, file: !3, type: !26)
+!33 = !DILocalVariable(name: "this", line: 7, arg: 1, scope: !23, file: !3, type: !26)
 !34 = !DILocation(line: 7, column: 11, scope: !23)
 !35 = !DILocation(line: 7, column: 19, scope: !36)
 !36 = distinct !DILexicalBlock(line: 7, column: 17, file: !38, scope: !23)
diff --git a/test/DebugInfo/2010-04-19-FramePtr.ll b/test/DebugInfo/Generic/2010-04-19-FramePtr.ll
similarity index 73%
rename from test/DebugInfo/2010-04-19-FramePtr.ll
rename to test/DebugInfo/Generic/2010-04-19-FramePtr.ll
index ed6a4fdb54e1..3b7280028c01 100644
--- a/test/DebugInfo/2010-04-19-FramePtr.ll
+++ b/test/DebugInfo/Generic/2010-04-19-FramePtr.ll
@@ -4,7 +4,7 @@
 ; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
 
 
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !1 {
 entry:
   %retval = alloca i32                            ; <i32*> [#uses=2]
   %0 = alloca i32                                 ; <i32*> [#uses=2]
@@ -24,9 +24,9 @@ return:                                           ; preds = %entry
 !9 = !{!1}
 
 !0 = !DILocation(line: 2, scope: !1)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !10, scope: null, type: !4, function: i32 ()* @foo)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !10, scope: null, type: !4)
 !2 = !DIFile(filename: "a.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll
similarity index 77%
rename from test/DebugInfo/2010-05-03-DisableFramePtr.ll
rename to test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll
index 660e9dba498b..c67ed73dac6c 100644
--- a/test/DebugInfo/2010-05-03-DisableFramePtr.ll
+++ b/test/DebugInfo/Generic/2010-05-03-DisableFramePtr.ll
@@ -18,10 +18,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!19}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "userUPP", line: 7, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "DisposeDMNotificationUPP", linkageName: "DisposeDMNotificationUPP", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !16, scope: null, type: !4)
+!0 = !DILocalVariable(name: "userUPP", line: 7, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "DisposeDMNotificationUPP", linkageName: "DisposeDMNotificationUPP", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !16, scope: null, type: !4)
 !2 = !DIFile(filename: "t.c", directory: "/Users/echeng/LLVM/radars/r7937664/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !16, enums: !17, retainedTypes: !17, subprograms: !18)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !16, enums: !17, retainedTypes: !17, subprograms: !18)
 !4 = !DISubroutineType(types: !5)
 !5 = !{null, !6}
 !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "DMNotificationUPP", line: 6, file: !16, scope: !2, baseType: !7)
diff --git a/test/DebugInfo/2010-05-03-OriginDIE.ll b/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll
similarity index 80%
rename from test/DebugInfo/2010-05-03-OriginDIE.ll
rename to test/DebugInfo/Generic/2010-05-03-OriginDIE.ll
index da1e57caba14..9ebfb06cc5e3 100644
--- a/test/DebugInfo/2010-05-03-OriginDIE.ll
+++ b/test/DebugInfo/Generic/2010-05-03-OriginDIE.ll
@@ -52,28 +52,28 @@ declare void @uuid_LtoB(i8*, i8*)
 !llvm.module.flags = !{!41}
 !0 = !DILocation(line: 808, scope: !1)
 !1 = distinct !DILexicalBlock(line: 807, column: 0, file: !39, scope: !2)
-!2 = !DISubprogram(name: "gpt2gpm", linkageName: "gpt2gpm", line: 807, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !39, scope: null, type: !5)
+!2 = distinct !DISubprogram(name: "gpt2gpm", linkageName: "gpt2gpm", line: 807, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !39, scope: null, type: !5)
 !3 = !DIFile(filename: "G.c", directory: "/tmp")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "llvm-gcc", isOptimized: true, emissionKind: 0, file: !39, enums: !18, retainedTypes: !18, subprograms: !40)
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "llvm-gcc", isOptimized: true, emissionKind: 0, file: !39, enums: !18, retainedTypes: !18, subprograms: !40)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !DILocation(line: 810, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "data", line: 201, arg: 0, scope: !9, file: !10, type: !11)
-!9 = !DISubprogram(name: "_OSSwapInt64", linkageName: "_OSSwapInt64", line: 202, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: null, type: !5)
+!8 = !DILocalVariable(name: "data", line: 201, arg: 1, scope: !9, file: !10, type: !11)
+!9 = distinct !DISubprogram(name: "_OSSwapInt64", linkageName: "_OSSwapInt64", line: 202, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: null, type: !5)
 !10 = !DIFile(filename: "OSByteOrder.h", directory: "/usr/include/libkern/ppc")
 !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", line: 59, file: !36, scope: !3, baseType: !13)
 !12 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "long long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
 !14 = !DILocation(line: 202, scope: !9, inlinedAt: !7)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "base", line: 92, arg: 0, scope: !16, file: !10, type: !17)
-!16 = !DISubprogram(name: "OSReadSwapInt64", linkageName: "OSReadSwapInt64", line: 95, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !38, scope: null, type: !5)
+!15 = !DILocalVariable(name: "base", line: 92, arg: 2, scope: !16, file: !10, type: !17)
+!16 = distinct !DISubprogram(name: "OSReadSwapInt64", linkageName: "OSReadSwapInt64", line: 95, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !38, scope: null, type: !5)
 !17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: null)
 !18 = !{}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "byteOffset", line: 94, arg: 0, scope: !16, file: !10, type: !20)
+!19 = !DILocalVariable(name: "byteOffset", line: 94, arg: 3, scope: !16, file: !10, type: !20)
 !20 = !DIDerivedType(tag: DW_TAG_typedef, name: "uintptr_t", line: 114, file: !37, scope: !3, baseType: !22)
 !21 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
 !22 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u", line: 100, scope: !24, file: !10, type: !25)
+!23 = !DILocalVariable(name: "u", line: 100, scope: !24, file: !10, type: !25)
 !24 = distinct !DILexicalBlock(line: 95, column: 0, file: !38, scope: !16)
 !25 = !DICompositeType(tag: DW_TAG_union_type, line: 97, size: 64, align: 64, file: !38, scope: !16, elements: !26)
 !26 = !{!27, !28}
@@ -84,7 +84,7 @@ declare void @uuid_LtoB(i8*, i8*)
 !31 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !32 = !{!33}
 !33 = !DISubrange(count: 2)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "addr", line: 96, scope: !24, file: !10, type: !35)
+!34 = !DILocalVariable(name: "addr", line: 96, scope: !24, file: !10, type: !35)
 !35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !39, scope: !3, baseType: !11)
 !36 = !DIFile(filename: "stdint.h", directory: "/usr/4.2.1/include")
 !37 = !DIFile(filename: "types.h", directory: "/usr/include/ppc")
diff --git a/test/DebugInfo/2010-05-10-MultipleCU.ll b/test/DebugInfo/Generic/2010-05-10-MultipleCU.ll
similarity index 56%
rename from test/DebugInfo/2010-05-10-MultipleCU.ll
rename to test/DebugInfo/Generic/2010-05-10-MultipleCU.ll
index 6513165ac248..486dbf0e940c 100644
--- a/test/DebugInfo/2010-05-10-MultipleCU.ll
+++ b/test/DebugInfo/Generic/2010-05-10-MultipleCU.ll
@@ -7,12 +7,12 @@
 ; CHECK: Compile Unit:
 ; CHECK: Compile Unit:
 
-define i32 @foo() nounwind readnone ssp {
+define i32 @foo() nounwind readnone ssp !dbg !2 {
 return:
   ret i32 42, !dbg !0
 }
 
-define i32 @bar() nounwind readnone ssp {
+define i32 @bar() nounwind readnone ssp !dbg !10 {
 return:
   ret i32 21, !dbg !8
 }
@@ -24,17 +24,17 @@ return:
 
 !0 = !DILocation(line: 3, scope: !1)
 !1 = distinct !DILexicalBlock(line: 2, column: 0, file: !18, scope: !2)
-!2 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !18, scope: !3, type: !5, function: i32 ()* @foo)
+!2 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !18, scope: !3, type: !5)
 !3 = !DIFile(filename: "a.c", directory: "/tmp/")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !16)
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !16)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DILocation(line: 3, scope: !9)
 !9 = distinct !DILexicalBlock(line: 2, column: 0, file: !20, scope: !10)
-!10 = !DISubprogram(name: "bar", linkageName: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !11, type: !13, function: i32 ()* @bar)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !11, type: !13)
 !11 = !DIFile(filename: "b.c", directory: "/tmp/")
-!12 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !20, enums: !19, retainedTypes: !19, subprograms: !17)
+!12 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !20, enums: !19, retainedTypes: !19, subprograms: !17)
 !13 = !DISubroutineType(types: !14)
 !14 = !{!15}
 !15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll
similarity index 67%
rename from test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
rename to test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll
index 27a0535bc8c3..f01cf6db905c 100644
--- a/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
+++ b/test/DebugInfo/Generic/2010-06-29-InlinedFnLocalVar.ll
@@ -12,7 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !DIExpression()), !dbg !19
@@ -25,20 +25,20 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!28}
 
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
 !1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!60 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!59 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!60 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
 !11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
diff --git a/test/DebugInfo/2010-07-19-Crash.ll b/test/DebugInfo/Generic/2010-07-19-Crash.ll
similarity index 58%
rename from test/DebugInfo/2010-07-19-Crash.ll
rename to test/DebugInfo/Generic/2010-07-19-Crash.ll
index fbfe9f80189d..9565a2f09cbc 100644
--- a/test/DebugInfo/2010-07-19-Crash.ll
+++ b/test/DebugInfo/Generic/2010-07-19-Crash.ll
@@ -2,7 +2,7 @@
 ; PR7662
 ; Do not add variables to !11 because it is a declaration entry.
 
-define i32 @bar() nounwind readnone ssp {
+define i32 @bar() nounwind readnone ssp !dbg !0 {
 entry:
   ret i32 42, !dbg !9
 }
@@ -12,14 +12,14 @@ entry:
 !llvm.dbg.sp = !{!0, !6, !11}
 !llvm.dbg.lv.foo = !{!7}
 
-!0 = !DISubprogram(name: "bar", linkageName: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3, function: i32 ()* @bar)
+!0 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
 !1 = !DIFile(filename: "one.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.8", isOptimized: true, emissionKind: 0, file: !12, enums: !14, retainedTypes: !14, subprograms: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang 2.8", isOptimized: true, emissionKind: 0, file: !12, enums: !14, retainedTypes: !14, subprograms: !13)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "one", line: 8, scope: !8, file: !1, type: !5)
+!6 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
+!7 = !DILocalVariable(name: "one", line: 8, scope: !8, file: !1, type: !5)
 !8 = distinct !DILexicalBlock(line: 7, column: 18, file: !12, scope: !6)
 !9 = !DILocation(line: 4, column: 3, scope: !10)
 !10 = distinct !DILexicalBlock(line: 3, column: 11, file: !12, scope: !0)
diff --git a/test/DebugInfo/2010-10-01-crash.ll b/test/DebugInfo/Generic/2010-10-01-crash.ll
similarity index 59%
rename from test/DebugInfo/2010-10-01-crash.ll
rename to test/DebugInfo/Generic/2010-10-01-crash.ll
index 5c736ba85552..712c32a7f627 100644
--- a/test/DebugInfo/2010-10-01-crash.ll
+++ b/test/DebugInfo/Generic/2010-10-01-crash.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -O0 %s -o /dev/null
 
-define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
+define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp !dbg !0 {
 entry:
   call void @llvm.dbg.declare(metadata i32* %rect, metadata !23, metadata !DIExpression()), !dbg !24
   ret void
@@ -13,11 +13,11 @@ declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!27}
-!0 = !DISubprogram(name: "CGRectStandardize", linkageName: "CGRectStandardize", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !1, scope: null, function: void (i32*, i32*)* @CGRectStandardize)
+!0 = distinct !DISubprogram(name: "CGRectStandardize", linkageName: "CGRectStandardize", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !1, scope: null)
 !1 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
-!2 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 2.9 (trunk 115292)", isOptimized: true, runtimeVersion: 1, emissionKind: 0, file: !25, enums: !26, retainedTypes: !26)
+!2 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 2.9 (trunk 115292)", isOptimized: true, runtimeVersion: 1, emissionKind: 0, file: !25, enums: !26, retainedTypes: !26, subprograms: !{!0})
 !5 = !DIDerivedType(tag: DW_TAG_typedef, name: "CGRect", line: 49, file: !25, baseType: null)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rect", line: 53, arg: 0, scope: !0, file: !1, type: !5)
+!23 = !DILocalVariable(name: "rect", line: 53, arg: 2, scope: !0, file: !1, type: !5)
 !24 = !DILocation(line: 53, column: 33, scope: !0)
 !25 = !DIFile(filename: "GSFusedSilica.m", directory: "/Volumes/Data/Users/sabre/Desktop")
 !26 = !{}
diff --git a/test/DebugInfo/Generic/Inputs/gmlt.ll b/test/DebugInfo/Generic/Inputs/gmlt.ll
new file mode 100644
index 000000000000..b03a80b4deae
--- /dev/null
+++ b/test/DebugInfo/Generic/Inputs/gmlt.ll
@@ -0,0 +1,153 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s
+
+; Generated from the following source compiled with clang++ -gmlt:
+; void f1() {}
+; void __attribute__((section("__TEXT,__bar"))) f2() {}
+; void __attribute__((always_inline)) f3() { f1(); }
+; void f4() { f3(); }
+
+; Check that
+;  * -gmlt includes no DW_TAG_subprograms for subprograms without inlined
+;    subroutines.
+;  * yet still produces DW_AT_ranges and a range list in debug_ranges that
+;    describes those subprograms
+
+; CHECK: DW_TAG_compile_unit
+; CHECK:   DW_AT_ranges [DW_FORM_sec_offset] (0x00000000
+; CHECK-NOT: {{DW_TAG|NULL}}
+
+; Omitting the subprograms without inlined subroutines is not possible
+; currently on Darwin as dsymutil will drop the whole CU if it has no subprograms
+; (which happens with this optimization if there are no inlined subroutines).
+
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN:    DW_AT_name {{.*}} "f1"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; DARWIN:    DW_AT_name {{.*}} "f2"
+; DARWIN-NOT: {{DW_TAG|NULL}}
+; DARWIN:  DW_TAG_subprogram
+; DARWIN-NOT: DW_TAG
+; Can't check the abstract_origin value across the DARWIN/CHECK checking and
+; ordering, so don't bother - just trust me, it refers to f3 down there.
+; DARWIN:    DW_AT_abstract_origin
+; DARWIN-NOT: {{DW_TAG|NULL}}
+
+
+; FIXME: Emitting separate abstract definitions is inefficient when we could
+; just attach the DW_AT_name to the inlined_subroutine directly. Except that
+; would produce many string relocations. Implement string indexing in the
+; skeleton CU to address the relocation problem, then remove abstract
+; definitions from -gmlt here.
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_name {{.*}} "f3"
+
+; FIXME: We don't really need DW_AT_inline, consumers can ignore this due to
+; the absence of high_pc/low_pc/ranges and know that they just need it for
+; retrieving the name of a concrete inlined instance
+
+; CHECK-NOT: {{DW_TAG|DW_AT|NULL}}
+
+; Check that we only provide the minimal attributes on a subprogram to save space.
+; CHECK:   DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_low_pc
+; CHECK-NEXT:     DW_AT_high_pc
+; CHECK-NEXT:     DW_AT_name
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK:     DW_TAG_inlined_subroutine
+
+; As mentioned above - replace DW_AT_abstract_origin with DW_AT_name to save
+; space once we have support for string indexing in non-dwo sections
+
+; CHECK-NEXT:       DW_AT_abstract_origin {{.*}} "f3"
+; CHECK-NEXT:       DW_AT_low_pc
+; CHECK-NEXT:       DW_AT_high_pc
+; CHECK-NEXT:       DW_AT_call_file
+; CHECK-NEXT:       DW_AT_call_line
+
+; Make sure we don't have any other subprograms here (subprograms with no
+; inlined subroutines are omitted by design to save space)
+
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+; CHECK: NULL
+
+
+; CHECK: .debug_ranges contents:
+
+; ... some addresses (depends on platform (such as platforms with function
+; reordering in the linker), and looks wonky on platforms with zero values
+; written in relocation places (dumper needs to	be fixed to read the
+; relocations rather than interpret that as the end of a range list))
+
+; CHECK: 00000000 <End of list>
+
+
+; Check that we don't emit any pubnames or pubtypes under -gmlt
+; CHECK: .debug_pubnames contents:
+; CHECK-NOT: Offset
+
+; CHECK: .debug_pubtypes contents:
+; CHECK-NOT: Offset
+
+; CHECK: .apple{{.*}} contents:
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f1v() #0 !dbg !4 {
+entry:
+  ret void, !dbg !13
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f2v() #0 section "__TEXT,__bar" !dbg !7 {
+entry:
+  ret void, !dbg !14
+}
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @_Z2f3v() #1 !dbg !8 {
+entry:
+  call void @_Z2f1v(), !dbg !15
+  ret void, !dbg !16
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z2f4v() #0 !dbg !9 {
+entry:
+  call void @_Z2f1v() #2, !dbg !17
+  ret void, !dbg !19
+}
+
+attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4, !7, !8, !9}
+!4 = distinct !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.6.0 "}
+!13 = !DILocation(line: 1, column: 12, scope: !4)
+!14 = !DILocation(line: 2, column: 53, scope: !7)
+!15 = !DILocation(line: 3, column: 44, scope: !8)
+!16 = !DILocation(line: 3, column: 50, scope: !8)
+!17 = !DILocation(line: 3, column: 44, scope: !8, inlinedAt: !18)
+!18 = !DILocation(line: 4, column: 13, scope: !9)
+!19 = !DILocation(line: 4, column: 19, scope: !9)
diff --git a/test/DebugInfo/PR20038.ll b/test/DebugInfo/Generic/PR20038.ll
similarity index 77%
rename from test/DebugInfo/PR20038.ll
rename to test/DebugInfo/Generic/PR20038.ll
index 8691895f41ac..3e9145f03e85 100644
--- a/test/DebugInfo/PR20038.ll
+++ b/test/DebugInfo/Generic/PR20038.ll
@@ -1,6 +1,9 @@
 ; REQUIRES: object-emission
 
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; For some reason, the output when targetting sparc is not quite as expected.
+; XFAIL: sparc
+
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; IR generated from clang -O0 with:
 ; struct C {
@@ -52,7 +55,7 @@
 @b = external global i8
 
 ; Function Attrs: nounwind
-define void @_Z4fun4v() #0 {
+define void @_Z4fun4v() #0 !dbg !12 {
 entry:
   %this.addr.i.i = alloca %struct.C*, align 8, !dbg !21
   %this.addr.i = alloca %struct.C*, align 8, !dbg !22
@@ -86,7 +89,7 @@ cleanup.done:                                     ; preds = %cleanup.action, %la
 }
 
 ; Function Attrs: alwaysinline nounwind
-define void @_ZN1CD1Ev(%struct.C* %this) unnamed_addr #1 align 2 {
+define void @_ZN1CD1Ev(%struct.C* %this) unnamed_addr #1 align 2 !dbg !17 {
 entry:
   %this.addr.i = alloca %struct.C*, align 8, !dbg !37
   %this.addr = alloca %struct.C*, align 8
@@ -100,7 +103,7 @@ entry:
 }
 
 ; Function Attrs: alwaysinline nounwind
-define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 {
+define void @_ZN1CD2Ev(%struct.C* %this) unnamed_addr #1 align 2 !dbg !16 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -120,7 +123,7 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -132,12 +135,12 @@ attributes #2 = { nounwind readnone }
 !9 = !{null, !10}
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
 !11 = !{!12, !16, !17}
-!12 = !DISubprogram(name: "fun4", linkageName: "_Z4fun4v", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !13, type: !14, function: void ()* @_Z4fun4v, variables: !2)
+!12 = distinct !DISubprogram(name: "fun4", linkageName: "_Z4fun4v", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !13, type: !14, variables: !2)
 !13 = !DIFile(filename: "PR20038.cpp", directory: "/tmp/dbginfo")
 !14 = !DISubroutineType(types: !15)
 !15 = !{null}
-!16 = !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, function: void (%struct.C*)* @_ZN1CD2Ev, declaration: !7, variables: !2)
-!17 = !DISubprogram(name: "~C", linkageName: "_ZN1CD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, function: void (%struct.C*)* @_ZN1CD1Ev, declaration: !7, variables: !2)
+!16 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, declaration: !7, variables: !2)
+!17 = distinct !DISubprogram(name: "~C", linkageName: "_ZN1CD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !5, scope: !"_ZTS1C", type: !8, declaration: !7, variables: !2)
 !18 = !{i32 2, !"Dwarf Version", i32 4}
 !19 = !{i32 2, !"Debug Info Version", i32 3}
 !20 = !{!"clang version 3.5.0 "}
@@ -149,15 +152,15 @@ attributes #2 = { nounwind readnone }
 !26 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
 !27 = !DILocation(line: 5, scope: !28)
 !28 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !12)
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
+!29 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
 !30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
 !31 = !DILocation(line: 0, scope: !17, inlinedAt: !22)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!32 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
 !33 = !DILocation(line: 0, scope: !16, inlinedAt: !21)
 
-!129 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
-!132 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
-!232 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!129 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !30)
+!132 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
+!232 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !16, type: !30)
 
 !34 = !DILocation(line: 5, scope: !35)
 !35 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !36)
diff --git a/test/DebugInfo/accel-table-hash-collisions.ll b/test/DebugInfo/Generic/accel-table-hash-collisions.ll
similarity index 96%
rename from test/DebugInfo/accel-table-hash-collisions.ll
rename to test/DebugInfo/Generic/accel-table-hash-collisions.ll
index 5e109a7de5ed..ff9c7851826f 100644
--- a/test/DebugInfo/accel-table-hash-collisions.ll
+++ b/test/DebugInfo/Generic/accel-table-hash-collisions.ll
@@ -69,7 +69,7 @@
 !llvm.module.flags = !{!17, !18, !19}
 !llvm.ident = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231548) (llvm/trunk 231547)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "hash-collisions.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16}
diff --git a/test/DebugInfo/array.ll b/test/DebugInfo/Generic/array.ll
similarity index 71%
rename from test/DebugInfo/array.ll
rename to test/DebugInfo/Generic/array.ll
index f6c79f77c6f2..2f181e3b5953 100644
--- a/test/DebugInfo/array.ll
+++ b/test/DebugInfo/Generic/array.ll
@@ -1,7 +1,7 @@
 ; RUN: %llc_dwarf -O0 < %s | FileCheck %s
 ; Do not emit AT_upper_bound for an unbounded array.
 ; radar 9241695
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !0 {
 entry:
   %retval = alloca i32, align 4
   %a = alloca [0 x i32], align 4
@@ -15,13 +15,13 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!16}
 
-!0 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !14, scope: !1, type: !3, function: i32 ()* @main)
+!0 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 3, file: !14, scope: !1, type: !3)
 !1 = !DIFile(filename: "array.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129138)", isOptimized: false, emissionKind: 0, file: !14, enums: !15, retainedTypes: !15, subprograms: !13, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 129138)", isOptimized: false, emissionKind: 0, file: !14, enums: !15, retainedTypes: !15, subprograms: !13, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 4, scope: !7, file: !1, type: !8)
+!6 = !DILocalVariable(name: "a", line: 4, scope: !7, file: !1, type: !8)
 !7 = distinct !DILexicalBlock(line: 3, column: 12, file: !14, scope: !0)
 !8 = !DICompositeType(tag: DW_TAG_array_type, align: 32, file: !14, scope: !2, baseType: !5, elements: !9)
 !9 = !{!10}
diff --git a/test/DebugInfo/block-asan.ll b/test/DebugInfo/Generic/block-asan.ll
similarity index 89%
rename from test/DebugInfo/block-asan.ll
rename to test/DebugInfo/Generic/block-asan.ll
index 393816b34857..92119631444c 100644
--- a/test/DebugInfo/block-asan.ll
+++ b/test/DebugInfo/Generic/block-asan.ll
@@ -20,7 +20,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 %struct.__block_byref_x = type { i8*, %struct.__block_byref_x*, i32, i32, i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @foo() #0 {
+define void @foo() #0 !dbg !4 {
 entry:
   %x = alloca %struct.__block_byref_x, align 8
   call void @llvm.dbg.declare(metadata %struct.__block_byref_x* %x, metadata !12, metadata !22), !dbg !23
@@ -58,11 +58,11 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!8, !9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "block.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "block.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
@@ -70,7 +70,7 @@ attributes #3 = { nounwind }
 !9 = !{i32 2, !"Debug Info Version", i32 3}
 !10 = !{i32 1, !"PIC Level", i32 2}
 !11 = !{!"clang version 3.6.0 (trunk 223120) (llvm/trunk 223119)"}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 4, scope: !4, file: !5, type: !13)
+!12 = !DILocalVariable(name: "x", line: 4, scope: !4, file: !5, type: !13)
 !13 = !DICompositeType(tag: DW_TAG_structure_type, size: 224, flags: DIFlagBlockByrefStruct, file: !1, scope: !5, elements: !14)
 !14 = !{!15, !17, !18, !20, !21}
 !15 = !DIDerivedType(tag: DW_TAG_member, name: "__isa", size: 64, align: 64, file: !1, scope: !5, baseType: !16)
diff --git a/test/DebugInfo/bug_null_debuginfo.ll b/test/DebugInfo/Generic/bug_null_debuginfo.ll
similarity index 57%
rename from test/DebugInfo/bug_null_debuginfo.ll
rename to test/DebugInfo/Generic/bug_null_debuginfo.ll
index 5aaa3834a5f1..09e36db42b49 100644
--- a/test/DebugInfo/bug_null_debuginfo.ll
+++ b/test/DebugInfo/Generic/bug_null_debuginfo.ll
@@ -3,6 +3,6 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 0, file: !1, globals:  null)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, isOptimized: false, emissionKind: 0, file: !1, globals:  null)
 !1 = !DIFile(filename: "t", directory: "")
 !2 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/constant-pointers.ll b/test/DebugInfo/Generic/constant-pointers.ll
similarity index 77%
rename from test/DebugInfo/constant-pointers.ll
rename to test/DebugInfo/Generic/constant-pointers.ll
index 8e0a242f1bad..af0b6b0743c0 100644
--- a/test/DebugInfo/constant-pointers.ll
+++ b/test/DebugInfo/Generic/constant-pointers.ll
@@ -19,7 +19,7 @@
 ; CHECK:     DW_AT_const_value [DW_FORM_udata] (0)
 
 ; Function Attrs: nounwind uwtable
-define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 {
+define weak_odr void @_Z4funcILPv0ELPFvvE0ELi42EEvv() #0 !dbg !4 {
 entry:
   ret void, !dbg !18
 }
@@ -30,11 +30,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!15, !16}
 !llvm.ident = !{!17}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func<nullptr, nullptr, 42>", linkageName: "_Z4funcILPv0ELPFvvE0ELi42EEvv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_Z4funcILPv0ELPFvvE0ELi42EEvv, templateParams: !8, variables: !2)
+!4 = distinct !DISubprogram(name: "func<nullptr, nullptr, 42>", linkageName: "_Z4funcILPv0ELPFvvE0ELi42EEvv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, templateParams: !8, variables: !2)
 !5 = !DIFile(filename: "constant-pointers.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/constant-sdnodes-have-dbg-location.ll b/test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll
similarity index 53%
rename from test/DebugInfo/constant-sdnodes-have-dbg-location.ll
rename to test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll
index 1f502a2a262a..833774185182 100644
--- a/test/DebugInfo/constant-sdnodes-have-dbg-location.ll
+++ b/test/DebugInfo/Generic/constant-sdnodes-have-dbg-location.ll
@@ -1,9 +1,9 @@
-; RUN: llc -debug < %s 2>&1 | FileCheck %s
+; RUN: llc -debug -dag-dump-verbose < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
-; CHECK: 0x{{[0-9,a-f]+}}: i32 = Constant<-1>test.c:4:5
+; CHECK: t{{[0-9]+}}: i32 = Constant<-1>test.c:4:5
 
-define i32 @main() {
+define i32 @main() !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -13,11 +13,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll b/test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll
similarity index 50%
rename from test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll
rename to test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll
index 2f7ecd8b061e..6cf9178269c4 100644
--- a/test/DebugInfo/constantfp-sdnodes-have-dbg-location.ll
+++ b/test/DebugInfo/Generic/constantfp-sdnodes-have-dbg-location.ll
@@ -1,9 +1,9 @@
-; RUN: llc -debug < %s 2>&1 | FileCheck %s
+; RUN: llc -debug -dag-dump-verbose < %s 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
-; CHECK: 0x{{[0-9,a-f]+}}: f64 = ConstantFP<1.500000e+00>test.c:3:5
+; CHECK: t{{[0-9]+}}: f64 = ConstantFP<1.500000e+00>test.c:3:5
 
-define double @f() {
+define double @f() !dbg !4 {
 entry:
   ret double 1.500000e+00, !dbg !10
 }
@@ -11,11 +11,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, subprograms: !3)
 !1 = !DIFile(filename: "test.c", directory: "/home/user/clang-llvm/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, function: double ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/DebugInfo/cross-cu-inlining.ll b/test/DebugInfo/Generic/cross-cu-inlining.ll
similarity index 77%
rename from test/DebugInfo/cross-cu-inlining.ll
rename to test/DebugInfo/Generic/cross-cu-inlining.ll
index 436bdda6473b..d95b43400607 100644
--- a/test/DebugInfo/cross-cu-inlining.ll
+++ b/test/DebugInfo/Generic/cross-cu-inlining.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: object-emission
 
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck -implicit-check-not=DW_TAG %s
-; RUN: %llc_dwarf -dwarf-accel-tables=Enable -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --check-prefix=CHECK-ACCEL --check-prefix=CHECK %s
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck -implicit-check-not=DW_TAG %s
+; RUN: %llc_dwarf -dwarf-accel-tables=Enable -dwarf-linkage-names=Enable -O0 -filetype=obj < %s | llvm-dwarfdump - | FileCheck --check-prefix=CHECK-ACCEL --check-prefix=CHECK %s
 
 ; Build from source:
 ; $ clang++ a.cpp b.cpp -g -c -emit-llvm
@@ -66,7 +66,7 @@
 @i = external global i32
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %x.addr.i = alloca i32, align 4
   %retval = alloca i32, align 4
@@ -84,7 +84,7 @@ entry:
 }
 
 ; Function Attrs: alwaysinline nounwind uwtable
-define i32 @_Z4funci(i32 %x) #1 {
+define i32 @_Z4funci(i32 %x) #1 !dbg !12 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
@@ -112,19 +112,19 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18, !18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !2, subprograms: !11, globals: !2, imports: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !2, subprograms: !11, globals: !2, imports: !2)
 !10 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
 !11 = !{!12}
-!12 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !13, type: !14, function: i32 (i32)* @_Z4funci, variables: !2)
+!12 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !13, type: !14, variables: !2)
 !13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
 !14 = !DISubroutineType(types: !15)
 !15 = !{!8, !8}
@@ -132,9 +132,9 @@ attributes #3 = { nounwind }
 !17 = !{i32 2, !"Debug Info Version", i32 3}
 !18 = !{!"clang version 3.5.0 "}
 !19 = !DILocation(line: 4, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+!20 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
 
-!120 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
+!120 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !12, file: !13, type: !8)
 
 !21 = !DILocation(line: 1, scope: !12, inlinedAt: !19)
 !22 = !DILocation(line: 2, scope: !12, inlinedAt: !19)
diff --git a/test/DebugInfo/cross-cu-linkonce-distinct.ll b/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll
similarity index 76%
rename from test/DebugInfo/cross-cu-linkonce-distinct.ll
rename to test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll
index b8ce57e70352..1713ea112d44 100644
--- a/test/DebugInfo/cross-cu-linkonce-distinct.ll
+++ b/test/DebugInfo/Generic/cross-cu-linkonce-distinct.ll
@@ -49,7 +49,7 @@
 @y = global i32 (i32)* @_Z4funci, align 8
 
 ; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !4 {
   %1 = alloca i32, align 4
   store i32 %i, i32* %1, align 4
   call void @llvm.dbg.declare(metadata i32* %1, metadata !22, metadata !DIExpression()), !dbg !23
@@ -68,11 +68,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!19, !20}
 !llvm.ident = !{!21, !21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
 !1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z4funci, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -80,16 +80,16 @@ attributes #1 = { nounwind readnone }
 !9 = !{!10}
 !10 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !5, type: !11, variable: i32 (i32)** @x)
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !6)
-!12 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !13, enums: !2, retainedTypes: !2, subprograms: !14, globals: !17, imports: !2)
+!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !13, enums: !2, retainedTypes: !2, subprograms: !14, globals: !17, imports: !2)
 !13 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
 !14 = !{!15}
-!15 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !13, scope: !16, type: !6, function: i32 (i32)* @_Z4funci, variables: !2)
+!15 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !13, scope: !16, type: !6, variables: !2)
 !16 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
 !17 = !{!18}
 !18 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !11, variable: i32 (i32)** @y)
 !19 = !{i32 2, !"Dwarf Version", i32 4}
 !20 = !{i32 1, !"Debug Info Version", i32 3}
 !21 = !{!"clang version 3.5.0 "}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!22 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !23 = !DILocation(line: 1, scope: !4)
 !24 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cross-cu-linkonce.ll b/test/DebugInfo/Generic/cross-cu-linkonce.ll
similarity index 75%
rename from test/DebugInfo/cross-cu-linkonce.ll
rename to test/DebugInfo/Generic/cross-cu-linkonce.ll
index 3638825fc9c7..983871a1d0f1 100644
--- a/test/DebugInfo/cross-cu-linkonce.ll
+++ b/test/DebugInfo/Generic/cross-cu-linkonce.ll
@@ -29,7 +29,7 @@
 @y = global i32 (i32)* @_Z4funci, align 8
 
 ; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr i32 @_Z4funci(i32 %i) #0 {
+define linkonce_odr i32 @_Z4funci(i32 %i) #0 !dbg !4 {
   %1 = alloca i32, align 4
   store i32 %i, i32* %1, align 4
   call void @llvm.dbg.declare(metadata i32* %1, metadata !20, metadata !DIExpression()), !dbg !21
@@ -48,11 +48,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19, !19}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
 !1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4funci, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
 !6 = !DIFile(filename: "func.h", directory: "/tmp/dbginfo")
 !7 = !DISubroutineType(types: !8)
@@ -61,13 +61,13 @@ attributes #1 = { nounwind readnone }
 !10 = !{!11}
 !11 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @x)
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !7)
-!13 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !14, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
+!13 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !14, enums: !2, retainedTypes: !2, subprograms: !3, globals: !15, imports: !2)
 !14 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo")
 !15 = !{!16}
 !16 = !DIGlobalVariable(name: "y", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !12, variable: i32 (i32)** @y)
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !{i32 1, !"Debug Info Version", i32 3}
 !19 = !{!"clang version 3.5.0 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!20 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !6, type: !9)
 !21 = !DILocation(line: 1, scope: !4)
 !22 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/cu-range-hole.ll b/test/DebugInfo/Generic/cu-range-hole.ll
similarity index 72%
rename from test/DebugInfo/cu-range-hole.ll
rename to test/DebugInfo/Generic/cu-range-hole.ll
index 19a305483fb5..12651a844ef1 100644
--- a/test/DebugInfo/cu-range-hole.ll
+++ b/test/DebugInfo/Generic/cu-range-hole.ll
@@ -14,7 +14,7 @@
 ; CHECK: DW_TAG_subprogram
 
 ; Function Attrs: nounwind uwtable
-define i32 @b(i32 %c) #0 {
+define i32 @b(i32 %c) #0 !dbg !5 {
 entry:
   %c.addr = alloca i32, align 4
   store i32 %c, i32* %c.addr, align 4
@@ -38,7 +38,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @d(i32 %e) #0 {
+define i32 @d(i32 %e) #0 !dbg !10 {
 entry:
   %e.addr = alloca i32, align 4
   store i32 %e, i32* %e.addr, align 4
@@ -56,19 +56,19 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!11, !12}
 
 !0 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
 !2 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
 !3 = !{}
 !4 = !{!5, !10}
-!5 = !DISubprogram(name: "b", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !2, scope: !6, type: !7, function: i32 (i32)* @b, variables: !3)
+!5 = distinct !DISubprogram(name: "b", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !2, scope: !6, type: !7, variables: !3)
 !6 = !DIFile(filename: "b.c", directory: "/usr/local/google/home/echristo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "d", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !2, scope: !6, type: !7, function: i32 (i32)* @d, variables: !3)
+!10 = distinct !DISubprogram(name: "d", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !2, scope: !6, type: !7, variables: !3)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!13 = !DILocalVariable(name: "c", line: 1, arg: 1, scope: !5, file: !6, type: !9)
 !14 = !DILocation(line: 1, scope: !5)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "e", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+!15 = !DILocalVariable(name: "e", line: 3, arg: 1, scope: !10, file: !6, type: !9)
 !16 = !DILocation(line: 3, scope: !10)
diff --git a/test/DebugInfo/cu-ranges.ll b/test/DebugInfo/Generic/cu-ranges.ll
similarity index 70%
rename from test/DebugInfo/cu-ranges.ll
rename to test/DebugInfo/Generic/cu-ranges.ll
index 3cec5c88f385..ab5577984e69 100644
--- a/test/DebugInfo/cu-ranges.ll
+++ b/test/DebugInfo/Generic/cu-ranges.ll
@@ -18,7 +18,7 @@
 ; CHECK: 00000000 <End of list>
 
 ; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 section "__TEXT,__foo" {
+define i32 @foo(i32 %a) #0 section "__TEXT,__foo" !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -32,7 +32,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %a) #0 {
+define i32 @bar(i32 %a) #0 !dbg !9 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -49,23 +49,23 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !14 = !DILocation(line: 1, scope: !4)
 !15 = !DILocation(line: 2, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
+!16 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
 !17 = !DILocation(line: 5, scope: !9)
 !18 = !DILocation(line: 6, scope: !9)
 
diff --git a/test/DebugInfo/X86/dbg-at-specficiation.ll b/test/DebugInfo/Generic/dbg-at-specficiation.ll
similarity index 79%
rename from test/DebugInfo/X86/dbg-at-specficiation.ll
rename to test/DebugInfo/Generic/dbg-at-specficiation.ll
index ccb5ab142cda..7302aaecb93f 100644
--- a/test/DebugInfo/X86/dbg-at-specficiation.ll
+++ b/test/DebugInfo/Generic/dbg-at-specficiation.ll
@@ -8,7 +8,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 140253)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 140253)", isOptimized: true, emissionKind: 0, file: !11, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3)
 !2 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: [10 x i32]* @a)
diff --git a/test/DebugInfo/dead-argument-order.ll b/test/DebugInfo/Generic/dead-argument-order.ll
similarity index 79%
rename from test/DebugInfo/dead-argument-order.ll
rename to test/DebugInfo/Generic/dead-argument-order.ll
index 2dd556d096d6..a6451a038cdb 100644
--- a/test/DebugInfo/dead-argument-order.ll
+++ b/test/DebugInfo/Generic/dead-argument-order.ll
@@ -36,7 +36,7 @@
 %struct.S = type { i32 }
 
 ; Function Attrs: nounwind readnone uwtable
-define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 {
+define i32 @_Z8function1Si(i32 %s.coerce, i32 %i) #0 !dbg !9 {
 entry:
   tail call void @llvm.dbg.declare(metadata %struct.S* undef, metadata !14, metadata !DIExpression()), !dbg !20
   tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
@@ -57,7 +57,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !8, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !8, globals: !2, imports: !2)
 !1 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -66,13 +66,13 @@ attributes #1 = { nounwind readnone }
 !6 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 1, size: 32, align: 32, file: !1, scope: !"_ZTS1S", baseType: !7)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !{!9}
-!9 = !DISubprogram(name: "function", linkageName: "_Z8function1Si", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !10, type: !11, function: i32 (i32, i32)* @_Z8function1Si, variables: !13)
+!9 = distinct !DISubprogram(name: "function", linkageName: "_Z8function1Si", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !10, type: !11, variables: !13)
 !10 = !DIFile(filename: "dead-argument-order.cpp", directory: "/tmp/dbginfo")
 !11 = !DISubroutineType(types: !12)
 !12 = !{!7, !4, !7}
 !13 = !{!14, !15}
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 2, arg: 1, scope: !9, file: !10, type: !"_ZTS1S")
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 2, scope: !9, file: !10, type: !7)
+!14 = !DILocalVariable(name: "s", line: 2, arg: 1, scope: !9, file: !10, type: !"_ZTS1S")
+!15 = !DILocalVariable(name: "i", line: 2, arg: 2, scope: !9, file: !10, type: !7)
 !16 = !{i32 2, !"Dwarf Version", i32 4}
 !17 = !{i32 2, !"Debug Info Version", i32 3}
 !18 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/debug-info-always-inline.ll b/test/DebugInfo/Generic/debug-info-always-inline.ll
similarity index 100%
rename from test/DebugInfo/debug-info-always-inline.ll
rename to test/DebugInfo/Generic/debug-info-always-inline.ll
diff --git a/test/DebugInfo/debug-info-qualifiers.ll b/test/DebugInfo/Generic/debug-info-qualifiers.ll
similarity index 83%
rename from test/DebugInfo/debug-info-qualifiers.ll
rename to test/DebugInfo/Generic/debug-info-qualifiers.ll
index 1f8f6c419932..aa197dd9f6fa 100644
--- a/test/DebugInfo/debug-info-qualifiers.ll
+++ b/test/DebugInfo/Generic/debug-info-qualifiers.ll
@@ -35,7 +35,7 @@
 %class.A = type { i8 }
 
 ; Function Attrs: nounwind
-define void @_Z1gv() #0 {
+define void @_Z1gv() #0 !dbg !17 {
   %a = alloca %class.A, align 1
   %pl = alloca { i64, i64 }, align 8
   %pr = alloca { i64, i64 }, align 8
@@ -61,7 +61,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
 !1 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -76,22 +76,22 @@ attributes #1 = { nounwind readnone }
 !13 = !DISubprogram(name: "r", linkageName: "_ZNKO1A1rEv", line: 7, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagObjectPointer | DIFlagRValueReference, isOptimized: false, scopeLine: 7, file: !5, scope: !"_ZTS1A", type: !14)
 !14 = !DISubroutineType(flags: DIFlagRValueReference, types: !9)
 !16 = !{!17}
-!17 = !DISubprogram(name: "g", linkageName: "_Z1gv", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !18, type: !19, function: void ()* @_Z1gv, variables: !2)
+!17 = distinct !DISubprogram(name: "g", linkageName: "_Z1gv", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !18, type: !19, variables: !2)
 !18 = !DIFile(filename: "debug-info-qualifiers.cpp", directory: "")
 !19 = !DISubroutineType(types: !20)
 !20 = !{null}
 !21 = !{i32 2, !"Dwarf Version", i32 4}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5 "}
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 11, scope: !17, file: !18, type: !4)
+!24 = !DILocalVariable(name: "a", line: 11, scope: !17, file: !18, type: !4)
 !25 = !DILocation(line: 11, scope: !17)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pl", line: 16, scope: !17, file: !18, type: !27)
+!26 = !DILocalVariable(name: "pl", line: 16, scope: !17, file: !18, type: !27)
 !27 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !28, extraData: !"_ZTS1A")
 !28 = !DISubroutineType(flags: DIFlagLValueReference, types: !29)
 !29 = !{null, !30}
 !30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !31 = !DILocation(line: 16, scope: !17)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "pr", line: 21, scope: !17, file: !18, type: !33)
+!32 = !DILocalVariable(name: "pr", line: 21, scope: !17, file: !18, type: !33)
 !33 = !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: !34, extraData: !"_ZTS1A")
 !34 = !DISubroutineType(flags: DIFlagRValueReference, types: !29)
 !35 = !DILocation(line: 21, scope: !17)
diff --git a/test/DebugInfo/debuginfofinder-forward-declaration.ll b/test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll
similarity index 80%
rename from test/DebugInfo/debuginfofinder-forward-declaration.ll
rename to test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll
index db143260e02b..62c151bc8d76 100644
--- a/test/DebugInfo/debuginfofinder-forward-declaration.ll
+++ b/test/DebugInfo/Generic/debuginfofinder-forward-declaration.ll
@@ -27,7 +27,7 @@
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (http://llvm.org/git/clang.git 247b30a043eb8f39ea3708e7e995089da0a6b00f) (http://llvm.org/git/llvm.git 6ecc7365a89c771fd229bdd9ffcc178684ea1aa5)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "minimal.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/debuginfofinder-multiple-cu.ll b/test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll
similarity index 54%
rename from test/DebugInfo/debuginfofinder-multiple-cu.ll
rename to test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll
index 4088dd54f53f..a4d226b86a69 100644
--- a/test/DebugInfo/debuginfofinder-multiple-cu.ll
+++ b/test/DebugInfo/Generic/debuginfofinder-multiple-cu.ll
@@ -11,29 +11,29 @@
 ;CHECK: Subprogram: f from /tmp/test1.c:1
 ;CHECK: Subprogram: g from /tmp/test2.c:1
 
-define void @f() {
+define void @f() !dbg !4 {
   ret void, !dbg !14
 }
 
-define void @g() {
+define void @g() !dbg !11 {
   ret void, !dbg !15
 }
 
 !llvm.dbg.cu = !{!0, !8}
 !llvm.module.flags = !{!13, !16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test1.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test1.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
+!8 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (192092)", isOptimized: false, emissionKind: 0, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
 !9 = !DIFile(filename: "test2.c", directory: "/tmp")
 !10 = !{!11}
-!11 = !DISubprogram(name: "g", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !12, type: !6, function: void ()* @g, variables: !2)
+!11 = distinct !DISubprogram(name: "g", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !9, scope: !12, type: !6, variables: !2)
 !12 = !DIFile(filename: "test2.c", directory: "/tmp")
 !13 = !{i32 2, !"Dwarf Version", i32 4}
 !14 = !DILocation(line: 1, scope: !4)
diff --git a/test/DebugInfo/Generic/def-line.ll b/test/DebugInfo/Generic/def-line.ll
new file mode 100644
index 000000000000..3ab7feaf9545
--- /dev/null
+++ b/test/DebugInfo/Generic/def-line.ll
@@ -0,0 +1,93 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Given the following source, ensure that the decl_line/file is correctly
+; emitted and omitted on definitions if it mismatches/matches the declaration
+
+; struct foo {
+;   static void f1() {
+;   }
+;   static void f2();
+;   static void f3();
+; };
+; void foo::f2() {
+;   f1(); // just to ensure f1 is emitted
+; }
+; #line 1 "bar.cpp"
+; void foo::f3() {
+; }
+
+; Skip the declarations
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+; CHECK: DW_TAG_subprogram
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+; CHECK:   DW_AT_decl_line {{.*}}7
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+; CHECK:   DW_AT_specification {{.*}}f2
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_file}}
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL|DW_AT_decl_line|DW_AT_decl_file}}
+; CHECK:   DW_AT_specification {{.*}}f1
+
+; CHECK: DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_AT_decl_file {{.*}}bar.cpp
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_AT_decl_line {{.*}}1
+; CHECK-NOT: {{DW_TAG|NULL}}
+; CHECK:   DW_AT_specification {{.*}}f3
+
+; Function Attrs: uwtable
+define void @_ZN3foo2f2Ev() #0 align 2 !dbg !12 {
+entry:
+  call void @_ZN3foo2f1Ev(), !dbg !19
+  ret void, !dbg !20
+}
+
+; Function Attrs: nounwind uwtable
+define linkonce_odr void @_ZN3foo2f1Ev() #1 align 2 !dbg !15 {
+entry:
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind uwtable
+define void @_ZN3foo2f3Ev() #1 align 2 !dbg !13 {
+entry:
+  ret void, !dbg !22
+}
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 249440) (llvm/trunk 249465)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "def-line.cpp", directory: "/tmp/dbginfo")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", file: !1, line: 1, size: 8, align: 8, elements: !5, identifier: "_ZTS3foo")
+!5 = !{!6, !9, !10}
+!6 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", scope: !"_ZTS3foo", file: !1, line: 2, type: !7, isLocal: false, isDefinition: false, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", scope: !"_ZTS3foo", file: !1, line: 4, type: !7, isLocal: false, isDefinition: false, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false)
+!10 = !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ev", scope: !"_ZTS3foo", file: !1, line: 5, type: !7, isLocal: false, isDefinition: false, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false)
+!11 = !{!12, !13, !15}
+!12 = distinct !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", scope: !"_ZTS3foo", file: !1, line: 7, type: !7, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, declaration: !9, variables: !2)
+!13 = distinct !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ev", scope: !"_ZTS3foo", file: !14, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, declaration: !10, variables: !2)
+!14 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
+!15 = distinct !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", scope: !"_ZTS3foo", file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, declaration: !6, variables: !2)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 249440) (llvm/trunk 249465)"}
+!19 = !DILocation(line: 8, column: 3, scope: !12)
+!20 = !DILocation(line: 9, column: 1, scope: !12)
+!21 = !DILocation(line: 3, column: 3, scope: !15)
+!22 = !DILocation(line: 2, column: 1, scope: !13)
diff --git a/test/DebugInfo/Generic/discriminator.ll b/test/DebugInfo/Generic/discriminator.ll
new file mode 100644
index 000000000000..6dbd558c5025
--- /dev/null
+++ b/test/DebugInfo/Generic/discriminator.ll
@@ -0,0 +1,52 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf < %s -filetype=obj | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+; Given the following source, ensure that the discriminator is emitted for
+; the inlined callsite.
+
+;void xyz();
+;static void __attribute__((always_inline)) bar() { xyz(); }
+;void foo() {
+;  bar(); bar();
+;}
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NOT: DW_AT_GNU_discriminator
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NOT: {{DW_TAG|NULL}}
+;CHECK: DW_AT_GNU_discriminator{{.*}}0x01
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 !dbg !4 {
+  tail call void @_Z3xyzv(), !dbg !11
+  tail call void @_Z3xyzv(), !dbg !13
+  ret void, !dbg !16
+}
+
+declare void @_Z3xyzv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 252497)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", scope: !1, file: !1, line: 2, type: !5, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 252497)"}
+!11 = !DILocation(line: 2, column: 52, scope: !7, inlinedAt: !12)
+!12 = distinct !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 2, column: 52, scope: !7, inlinedAt: !14)
+!14 = distinct !DILocation(line: 4, column: 10, scope: !15)
+!15 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+!16 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/dwarf-public-names.ll b/test/DebugInfo/Generic/dwarf-public-names.ll
similarity index 72%
rename from test/DebugInfo/dwarf-public-names.ll
rename to test/DebugInfo/Generic/dwarf-public-names.ll
index bd340578bfb0..b98a8e7415c7 100644
--- a/test/DebugInfo/dwarf-public-names.ll
+++ b/test/DebugInfo/Generic/dwarf-public-names.ll
@@ -55,7 +55,7 @@
 @global_variable = global %struct.C zeroinitializer, align 1
 @_ZN2ns25global_namespace_variableE = global i32 1, align 4
 
-define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 !dbg !3 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -67,18 +67,18 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 !dbg !18 {
 entry:
   %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
   ret i32 %0, !dbg !33
 }
 
-define i32 @_Z15global_functionv() nounwind uwtable {
+define i32 @_Z15global_functionv() nounwind uwtable !dbg !19 {
 entry:
   ret i32 -1, !dbg !34
 }
 
-define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable !dbg !20 {
 entry:
   call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
   ret void, !dbg !36
@@ -90,10 +90,10 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports:  !1)
 !1 = !{}
 !2 = !{!3, !18, !19, !20}
-!3 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !12, variables: !1)
+!3 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, declaration: !12, variables: !1)
 !4 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
 !5 = !DISubroutineType(types: !6)
 !6 = !{null, !7}
@@ -108,9 +108,9 @@ attributes #1 = { nounwind readnone }
 !15 = !DISubroutineType(types: !16)
 !16 = !{!11}
 !17 = !{} ; previously: invalid DW_TAG_base_type
-!18 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !14, variables: !1)
-!19 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, function: i32 ()* @_Z15global_functionv, variables: !1)
-!20 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !1)
+!18 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, declaration: !14, variables: !1)
+!19 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, variables: !1)
+!20 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, variables: !1)
 !21 = !DINamespace(name: "ns", line: 23, file: !4, scope: null)
 !22 = !DISubroutineType(types: !23)
 !23 = !{null}
@@ -118,7 +118,7 @@ attributes #1 = { nounwind readnone }
 !25 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: !8, file: !4, type: !11, variable: i32* @_ZN1C22static_member_variableE, declaration: !10)
 !26 = !DIGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !4, type: !8, variable: %struct.C* @global_variable)
 !27 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !21, file: !4, type: !11, variable: i32* @_ZN2ns25global_namespace_variableE)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
+!28 = !DILocalVariable(name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
 !29 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !8)
 !30 = !DILocation(line: 9, scope: !3)
 !31 = !DILocation(line: 10, scope: !3)
diff --git a/test/DebugInfo/empty.ll b/test/DebugInfo/Generic/empty.ll
similarity index 82%
rename from test/DebugInfo/empty.ll
rename to test/DebugInfo/Generic/empty.ll
index ad26b500b354..f787039885b9 100644
--- a/test/DebugInfo/empty.ll
+++ b/test/DebugInfo/Generic/empty.ll
@@ -24,7 +24,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
 !2 = !{}
 !3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
 !4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
diff --git a/test/DebugInfo/enum-types.ll b/test/DebugInfo/Generic/enum-types.ll
similarity index 65%
rename from test/DebugInfo/enum-types.ll
rename to test/DebugInfo/Generic/enum-types.ll
index 7b50f5e9087c..e71fcbc52bfb 100644
--- a/test/DebugInfo/enum-types.ll
+++ b/test/DebugInfo/Generic/enum-types.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 ;
-; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: %llc_dwarf -filetype=obj -O0 -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; Make sure we can handle enums with the same identifier but in enum types of
 ; different compile units.
@@ -21,7 +21,7 @@
 ; CHECK: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[ENUM]]
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4topA2EA(i32 %sa) #0 {
+define void @_Z4topA2EA(i32 %sa) #0 !dbg !7 {
 entry:
   %sa.addr = alloca i32, align 4
   store i32 %sa, i32* %sa.addr, align 4
@@ -33,7 +33,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4topB2EA(i32 %sa) #0 {
+define void @_Z4topB2EA(i32 %sa) #0 !dbg !17 {
 entry:
   %sa.addr = alloca i32, align 4
   store i32 %sa, i32* %sa.addr, align 4
@@ -48,31 +48,31 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!19, !20}
 !llvm.ident = !{!21, !21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !6, globals: !11, imports: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !6, globals: !11, imports: !11)
 !1 = !DIFile(filename: "a.cpp", directory: "")
 !2 = !{!3}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !1, elements: !4, identifier: "_ZTS2EA")
 !4 = !{!5}
 !5 = !DIEnumerator(name: "EA_0", value: 0) ; [ DW_TAG_enumerator ] [EA_0 :: 0]
 !6 = !{!7}
-!7 = !DISubprogram(name: "topA", linkageName: "_Z4topA2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !8, type: !9, function: void (i32)* @_Z4topA2EA, variables: !11)
+!7 = distinct !DISubprogram(name: "topA", linkageName: "_Z4topA2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !8, type: !9, variables: !11)
 !8 = !DIFile(filename: "a.cpp", directory: "")
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !"_ZTS2EA"}
 !11 = !{}
-!12 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !13, enums: !14, retainedTypes: !14, subprograms: !16, globals: !11, imports: !11)
+!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)", isOptimized: false, emissionKind: 1, file: !13, enums: !14, retainedTypes: !14, subprograms: !16, globals: !11, imports: !11)
 !13 = !DIFile(filename: "b.cpp", directory: "")
 !14 = !{!15}
 !15 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "EA", line: 1, size: 32, align: 32, file: !13, elements: !4, identifier: "_ZTS2EA")
 !16 = !{!17}
-!17 = !DISubprogram(name: "topB", linkageName: "_Z4topB2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !13, scope: !18, type: !9, function: void (i32)* @_Z4topB2EA, variables: !11)
+!17 = distinct !DISubprogram(name: "topB", linkageName: "_Z4topB2EA", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !13, scope: !18, type: !9, variables: !11)
 !18 = !DIFile(filename: "b.cpp", directory: "")
 !19 = !{i32 2, !"Dwarf Version", i32 2}
 !20 = !{i32 2, !"Debug Info Version", i32 3}
 !21 = !{!"clang version 3.5.0 (trunk 214102:214133) (llvm/trunk 214102:214132)"}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 5, arg: 1, scope: !7, file: !8, type: !"_ZTS2EA")
+!22 = !DILocalVariable(name: "sa", line: 5, arg: 1, scope: !7, file: !8, type: !"_ZTS2EA")
 !23 = !DILocation(line: 5, column: 14, scope: !7)
 !24 = !DILocation(line: 6, column: 1, scope: !7)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 5, arg: 1, scope: !17, file: !18, type: !"_ZTS2EA")
+!25 = !DILocalVariable(name: "sa", line: 5, arg: 1, scope: !17, file: !18, type: !"_ZTS2EA")
 !26 = !DILocation(line: 5, column: 14, scope: !17)
 !27 = !DILocation(line: 6, column: 1, scope: !17)
diff --git a/test/DebugInfo/enum.ll b/test/DebugInfo/Generic/enum.ll
similarity index 83%
rename from test/DebugInfo/enum.ll
rename to test/DebugInfo/Generic/enum.ll
index fd07a92ae41b..6d1bddc2f728 100644
--- a/test/DebugInfo/enum.ll
+++ b/test/DebugInfo/Generic/enum.ll
@@ -36,7 +36,7 @@
 @a = global i64 0, align 8
 
 ; Function Attrs: nounwind uwtable
-define void @_Z4funcv() #0 {
+define void @_Z4funcv() #0 !dbg !13 {
 entry:
   %b = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %b, metadata !20, metadata !DIExpression()), !dbg !22
@@ -53,7 +53,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !24}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !11, subprograms: !12, globals: !17, imports: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !11, subprograms: !12, globals: !17, imports: !11)
 !1 = !DIFile(filename: "enum.cpp", directory: "/tmp")
 !2 = !{!3, !8}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "e1", line: 1, size: 64, align: 64, file: !1, elements: !4)
@@ -66,14 +66,14 @@ attributes #1 = { nounwind readnone }
 !10 = !DIEnumerator(name: "X", value: 0) ; [ DW_TAG_enumerator ] [X :: 0]
 !11 = !{}
 !12 = !{!13}
-!13 = !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !14, type: !15, function: void ()* @_Z4funcv, variables: !11)
+!13 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !14, type: !15, variables: !11)
 !14 = !DIFile(filename: "enum.cpp", directory: "/tmp")
 !15 = !DISubroutineType(types: !16)
 !16 = !{null}
 !17 = !{!18}
 !18 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !14, type: !3, variable: i64* @a)
 !19 = !{i32 2, !"Dwarf Version", i32 3}
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !13, file: !14, type: !21)
+!20 = !DILocalVariable(name: "b", line: 4, scope: !13, file: !14, type: !21)
 !21 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !22 = !DILocation(line: 4, scope: !13)
 !23 = !DILocation(line: 5, scope: !13)
diff --git a/test/DebugInfo/global.ll b/test/DebugInfo/Generic/global.ll
similarity index 74%
rename from test/DebugInfo/global.ll
rename to test/DebugInfo/Generic/global.ll
index cf2d26b36acd..6cac9a2de05e 100644
--- a/test/DebugInfo/global.ll
+++ b/test/DebugInfo/Generic/global.ll
@@ -16,7 +16,7 @@
 ; CHECK: DW_TAG_variable
 
 ; Function Attrs: nounwind readnone uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   ret i32 0, !dbg !12
 }
@@ -26,11 +26,11 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
 !1 = !DIFile(filename: "global.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "global.cpp", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/gmlt.test b/test/DebugInfo/Generic/gmlt.test
similarity index 100%
rename from test/DebugInfo/gmlt.test
rename to test/DebugInfo/Generic/gmlt.test
diff --git a/test/DebugInfo/Generic/gvn.ll b/test/DebugInfo/Generic/gvn.ll
new file mode 100644
index 000000000000..f9fb7ab5520f
--- /dev/null
+++ b/test/DebugInfo/Generic/gvn.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+;
+; Produced at -O2 from:
+; int a, b;
+; void f1(int *p1) {
+;   if (b)
+;     a = 1;
+;   if (a && *p1)
+;     f4();
+; }
+; int f2(int);
+; void f3(void) {
+;   a = f2(1);
+;   f1(&a);
+; }
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios"
+
+@a = common global i32 0, align 4
+@b = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define void @f3() #0 !dbg !12 {
+entry:
+  ; Verify that the call still has a debug location after GVN.
+  ; CHECK: %call = tail call i32 @f2(i32 1) #{{[0-9]}}, !dbg
+  %call = tail call i32 @f2(i32 1) #3, !dbg !36
+  store i32 %call, i32* @a, align 4, !dbg !36, !tbaa !25
+  tail call void @llvm.dbg.value(metadata i32* @a, i64 0, metadata !11, metadata !21) #3, !dbg !39
+  %0 = load i32, i32* @b, align 4, !dbg !39, !tbaa !25
+  %tobool.i = icmp eq i32 %0, 0, !dbg !39
+  br i1 %tobool.i, label %if.end.i, label %land.lhs.true.i.thread, !dbg !40
+
+land.lhs.true.i.thread:                           ; preds = %entry
+  store i32 1, i32* @a, align 4, !dbg !41, !tbaa !25
+  br label %if.then.3.i, !dbg !42
+
+if.end.i:                                         ; preds = %entry
+  ; This instruction has no debug location -- in this
+  ; particular case it was removed by a bug in SimplifyCFG.
+  %.pr = load i32, i32* @a, align 4
+
+  ; GVN is supposed to replace the load of %.pr with a direct reference to %call.
+  ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
+  %tobool2.i = icmp eq i32 %.pr, 0, !dbg !43
+  br i1 %tobool2.i, label %f1.exit, label %if.then.3.i, !dbg !43
+
+if.then.3.i:                                      ; preds = %if.end.i, %land.lhs.true.i.thread
+  %call.i = tail call i32 bitcast (i32 (...)* @f4 to i32 ()*)() #3, !dbg !44
+  br label %f1.exit, !dbg !44
+
+f1.exit:                                          ; preds = %if.end.i, %if.then.3.i
+  ret void, !dbg !45
+}
+
+declare i32 @f2(i32)
+declare i32 @f4(...)
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+attributes #0 = { nounwind }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!18, !19}
+!llvm.ident = !{!20}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 245562) (llvm/trunk 245569)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4, !12}
+!4 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 2, type: !6, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !10)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null, !8}
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11}
+!11 = !DILocalVariable(name: "p1", arg: 1, scope: !4, file: !1, line: 2, type: !8)
+!12 = distinct !DISubprogram(name: "f3", scope: !1, file: !1, line: 9, type: !13, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null}
+!15 = !{!16, !17}
+!16 = !DIGlobalVariable(name: "a", scope: !0, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, variable: i32* @a)
+!17 = !DIGlobalVariable(name: "b", scope: !0, file: !1, line: 1, type: !9, isLocal: false, isDefinition: true, variable: i32* @b)
+!18 = !{i32 2, !"Dwarf Version", i32 2}
+!19 = !{i32 2, !"Debug Info Version", i32 3}
+!20 = !{!"clang version 3.8.0 (trunk 245562) (llvm/trunk 245569)"}
+!21 = !DIExpression()
+!22 = !DILocation(line: 2, scope: !4)
+!23 = !DILocation(line: 3, scope: !24)
+!24 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3)
+!25 = !{!26, !26, i64 0}
+!26 = !{!"int", !27, i64 0}
+!27 = !{!"omnipotent char", !28, i64 0}
+!28 = !{!"Simple C/C++ TBAA"}
+!29 = !DILocation(line: 3, scope: !4)
+!30 = !DILocation(line: 4, scope: !24)
+!31 = !DILocation(line: 5, scope: !32)
+!32 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5)
+!33 = !DILocation(line: 5, scope: !4)
+!34 = !DILocation(line: 6, scope: !32)
+!35 = !DILocation(line: 7, scope: !4)
+!36 = !DILocation(line: 5, scope: !32, inlinedAt: !37)
+!37 = distinct !DILocation(line: 11, scope: !12)
+!38 = !DILocation(line: 10, scope: !12)
+!39 = !DILocation(line: 2, scope: !4, inlinedAt: !37)
+!40 = !DILocation(line: 3, scope: !24, inlinedAt: !37)
+!41 = !DILocation(line: 3, scope: !4, inlinedAt: !37)
+!42 = !DILocation(line: 4, scope: !24, inlinedAt: !37)
+!43 = !DILocation(line: 5, scope: !4, inlinedAt: !37)
+!44 = !DILocation(line: 6, scope: !32, inlinedAt: !37)
+!45 = !DILocation(line: 12, scope: !12)
diff --git a/test/DebugInfo/incorrect-variable-debugloc.ll b/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
similarity index 89%
rename from test/DebugInfo/incorrect-variable-debugloc.ll
rename to test/DebugInfo/Generic/incorrect-variable-debugloc.ll
index 930f8c92cf65..cf713ee3b970 100644
--- a/test/DebugInfo/incorrect-variable-debugloc.ll
+++ b/test/DebugInfo/Generic/incorrect-variable-debugloc.ll
@@ -38,11 +38,11 @@
 
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_name {{.*}} "C"
-; CHECK:   DW_TAG_subprogram
-; CHECK-NOT: DW_TAG
+; CHECK: [[M_FN3_DECL:.*]]:  DW_TAG_subprogram
+; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_AT_name {{.*}} "m_fn3"
 
-; CHECK: DW_AT_specification {{.*}} "_ZN1C5m_fn3Ev"
+; CHECK: DW_AT_specification {{.*}} {[[M_FN3_DECL]]}
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
@@ -58,7 +58,7 @@
 @__asan_gen_1 = private unnamed_addr constant [13 x i8] c"1 32 1 3 tmp\00", align 1
 
 ; Function Attrs: noreturn sanitize_address
-define i32 @_Z3fn1v() #0 {
+define i32 @_Z3fn1v() #0 !dbg !22 {
 entry:
   %MyAlloca = alloca [64 x i8], align 32, !dbg !39
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !39
@@ -116,7 +116,7 @@ entry:
 }
 
 ; Function Attrs: sanitize_address
-define void @_ZN1C5m_fn3Ev(%struct.C* nocapture %this) #1 align 2 {
+define void @_ZN1C5m_fn3Ev(%struct.C* nocapture %this) #1 align 2 !dbg !28 {
 entry:
   %MyAlloca = alloca [64 x i8], align 32, !dbg !48
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64, !dbg !48
@@ -336,7 +336,7 @@ attributes #3 = { nounwind readnone }
 !llvm.module.flags = !{!36, !37}
 !llvm.ident = !{!38}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !14}
@@ -358,19 +358,19 @@ attributes #3 = { nounwind readnone }
 !19 = !{null, !20}
 !20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1B")
 !21 = !{!22, !28, !32}
-!22 = !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !5, scope: !23, type: !24, function: i32 ()* @_Z3fn1v, variables: !26)
+!22 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !5, scope: !23, type: !24, variables: !26)
 !23 = !DIFile(filename: "incorrect-variable-debug-loc.cpp", directory: "/tmp/dbginfo")
 !24 = !DISubroutineType(types: !25)
 !25 = !{!8}
 !26 = !{!27}
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "A", line: 17, scope: !22, file: !23, type: !"_ZTS1C")
-!28 = !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !"_ZTS1C", type: !11, function: void (%struct.C*)* @_ZN1C5m_fn3Ev, declaration: !10, variables: !29)
+!27 = !DILocalVariable(name: "A", line: 17, scope: !22, file: !23, type: !"_ZTS1C")
+!28 = distinct !DISubprogram(name: "m_fn3", linkageName: "_ZN1C5m_fn3Ev", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !"_ZTS1C", type: !11, declaration: !10, variables: !29)
 !29 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
-!32 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18, declaration: !17, variables: !33)
+!32 = distinct !DISubprogram(name: "m_fn2", linkageName: "_ZN1B5m_fn2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !"_ZTS1B", type: !18, declaration: !17, variables: !33)
 !33 = !{!34}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !35)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !32, type: !35)
 !35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
 !36 = !{i32 2, !"Dwarf Version", i32 4}
 !37 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/incorrect-variable-debugloc1.ll b/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
similarity index 85%
rename from test/DebugInfo/incorrect-variable-debugloc1.ll
rename to test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
index 3ece94ac8884..33af0baeb658 100644
--- a/test/DebugInfo/incorrect-variable-debugloc1.ll
+++ b/test/DebugInfo/Generic/incorrect-variable-debugloc1.ll
@@ -27,7 +27,7 @@
 ; DWARF4: Location description: 10 0d 9f
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %c = alloca i32, align 4
   tail call void @llvm.dbg.value(metadata i32 13, i64 0, metadata !10, metadata !16), !dbg !17
@@ -53,17 +53,17 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223522)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 223522)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !9)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "test.cpp", directory: "/home/kromanova/ngh/ToT_latest/llvm/test/DebugInfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 5, scope: !4, file: !5, type: !11)
+!10 = !DILocalVariable(name: "c", line: 5, scope: !4, file: !5, type: !11)
 !11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !8)
 !12 = !{i32 2, !"Dwarf Version", i32 2}
 !13 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/inheritance.ll b/test/DebugInfo/Generic/inheritance.ll
similarity index 84%
rename from test/DebugInfo/inheritance.ll
rename to test/DebugInfo/Generic/inheritance.ll
index 8a29d2e576a0..802c4f195d47 100644
--- a/test/DebugInfo/inheritance.ll
+++ b/test/DebugInfo/Generic/inheritance.ll
@@ -105,11 +105,11 @@ return:                                           ; preds = %bb2
 
 declare void @_ZdlPv(i8*) nounwind
 
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tst", line: 13, scope: !1, file: !4, type: !8)
+!0 = !DILocalVariable(name: "tst", line: 13, scope: !1, file: !4, type: !8)
 !1 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !2)
 !2 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !3)
-!3 = !DISubprogram(name: "main", linkageName: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !5)
-!4 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !45, retainedTypes: !45)
+!3 = distinct !DISubprogram(name: "main", linkageName: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !5)
+!4 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !45, retainedTypes: !45)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -118,7 +118,7 @@ declare void @_ZdlPv(i8*) nounwind
 !10 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$test1", line: 1, size: 64, align: 64, file: !44, scope: !8, baseType: !11)
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !12)
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", scope: !4, baseType: !5)
-!13 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !46, enums: !45, retainedTypes: !45)
+!13 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !46, enums: !45, retainedTypes: !45)
 !14 = !DISubprogram(name: "test1", line: 1, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate, isOptimized: false, scope: !8, type: !15)
 !15 = !DISubroutineType(types: !16)
 !16 = !{null, !17}
@@ -129,22 +129,22 @@ declare void @_ZdlPv(i8*) nounwind
 !21 = !DILocation(line: 11, scope: !1)
 !22 = !DILocation(line: 13, scope: !1)
 !23 = !DILocation(line: 14, scope: !1)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 0, scope: !25, file: !4, type: !26)
-!25 = !DISubprogram(name: "test1", linkageName: "_ZN5test1C1Ev", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !15)
+!24 = !DILocalVariable(name: "this", line: 13, arg: 1, scope: !25, file: !4, type: !26)
+!25 = distinct !DISubprogram(name: "test1", linkageName: "_ZN5test1C1Ev", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !4, type: !15)
 !26 = !DIDerivedType(tag: DW_TAG_const_type, size: 64, align: 64, flags: DIFlagArtificial, file: !4, baseType: !27)
 !27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !4, baseType: !8)
 !28 = !DILocation(line: 1, scope: !25)
 !29 = !DILocation(line: 1, scope: !30)
 !30 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !31)
 !31 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !25)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !33, file: !4, type: !26)
-!33 = !DISubprogram(name: "~test1", linkageName: "_ZN5test1D1Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
+!32 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !33, file: !4, type: !26)
+!33 = distinct !DISubprogram(name: "~test1", linkageName: "_ZN5test1D1Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
 !34 = !DILocation(line: 4, scope: !33)
 !35 = !DILocation(line: 5, scope: !36)
 !36 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !33)
 !37 = !DILocation(line: 6, scope: !36)
-!38 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 0, scope: !39, file: !4, type: !26)
-!39 = !DISubprogram(name: "~test1", linkageName: "_ZN5test1D0Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
+!38 = !DILocalVariable(name: "this", line: 4, arg: 1, scope: !39, file: !4, type: !26)
+!39 = distinct !DISubprogram(name: "~test1", linkageName: "_ZN5test1D0Ev", line: 4, isLocal: false, isDefinition: true, virtuality: DW_VIRTUALITY_virtual, virtualIndex: 6, isOptimized: false, scope: !8, type: !15, containingType: !8)
 !40 = !DILocation(line: 4, scope: !39)
 !41 = !DILocation(line: 5, scope: !42)
 !42 = distinct !DILexicalBlock(line: 0, column: 0, file: !44, scope: !39)
diff --git a/test/DebugInfo/inline-debug-info-multiret.ll b/test/DebugInfo/Generic/inline-debug-info-multiret.ll
similarity index 84%
rename from test/DebugInfo/inline-debug-info-multiret.ll
rename to test/DebugInfo/Generic/inline-debug-info-multiret.ll
index d86e6abbd80b..5be261ac3741 100644
--- a/test/DebugInfo/inline-debug-info-multiret.ll
+++ b/test/DebugInfo/Generic/inline-debug-info-multiret.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-apple-darwin12.0.0"
 @global_var = external global i32
 
 ; copy of above function with multiple returns
-define i32 @_Z4testi(i32 %k)  {
+define i32 @_Z4testi(i32 %k)  !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   %k.addr = alloca i32, align 4
@@ -57,7 +57,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @_Z8test_exti(i32)
 
-define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !10 {
 entry:
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
@@ -122,22 +122,22 @@ attributes #2 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!31}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4, !10}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "test.cpp", directory: "")
 !6 = !DIFile(filename: "test.cpp", directory: "")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, function: i32 ()* @_Z5test2v, variables: !2)
+!10 = distinct !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!9}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
 !14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k2", line: 5, scope: !4, file: !6, type: !9)
+!15 = !DILocalVariable(name: "k2", line: 5, scope: !4, file: !6, type: !9)
 !16 = !DILocation(line: 5, scope: !4)
 !17 = !DILocation(line: 6, scope: !4)
 !18 = !DILocation(line: 7, scope: !4)
@@ -146,7 +146,7 @@ attributes #2 = { nounwind }
 !21 = !DILocation(line: 14, scope: !22)
 !22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
 !23 = !DILocation(line: 15, scope: !22)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 16, scope: !10, file: !6, type: !9)
+!24 = !DILocalVariable(name: "e", line: 16, scope: !10, file: !6, type: !9)
 !25 = !DILocation(line: 16, scope: !10)
 !26 = !DILocation(line: 17, scope: !27)
 !27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
diff --git a/test/DebugInfo/inline-debug-info.ll b/test/DebugInfo/Generic/inline-debug-info.ll
similarity index 84%
rename from test/DebugInfo/inline-debug-info.ll
rename to test/DebugInfo/Generic/inline-debug-info.ll
index 908093ca2552..a5d55a4f98a1 100644
--- a/test/DebugInfo/inline-debug-info.ll
+++ b/test/DebugInfo/Generic/inline-debug-info.ll
@@ -41,7 +41,7 @@ target triple = "x86_64-apple-darwin12.0.0"
 @_ZTIi = external constant i8*
 @global_var = external global i32
 
-define i32 @_Z4testi(i32 %k)  {
+define i32 @_Z4testi(i32 %k)  !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   %k.addr = alloca i32, align 4
@@ -75,7 +75,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i32 @_Z8test_exti(i32)
 
-define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define i32 @_Z5test2v() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !10 {
 entry:
   %exn.slot = alloca i8*
   %ehselector.slot = alloca i32
@@ -140,22 +140,22 @@ attributes #2 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!31}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4, !10}
-!4 = !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, function: i32 (i32)* @_Z4testi, variables: !2)
+!4 = distinct !DISubprogram(name: "test", linkageName: "_Z4testi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "test.cpp", directory: "")
 !6 = !DIFile(filename: "test.cpp", directory: "")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, function: i32 ()* @_Z5test2v, variables: !2)
+!10 = distinct !DISubprogram(name: "test2", linkageName: "_Z5test2v", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !6, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!9}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
+!13 = !DILocalVariable(name: "k", line: 4, arg: 1, scope: !4, file: !6, type: !9)
 !14 = !DILocation(line: 4, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k2", line: 5, scope: !4, file: !6, type: !9)
+!15 = !DILocalVariable(name: "k2", line: 5, scope: !4, file: !6, type: !9)
 !16 = !DILocation(line: 5, scope: !4)
 !17 = !DILocation(line: 6, scope: !4)
 !18 = !DILocation(line: 7, scope: !4)
@@ -164,7 +164,7 @@ attributes #2 = { nounwind }
 !21 = !DILocation(line: 14, scope: !22)
 !22 = distinct !DILexicalBlock(line: 13, column: 0, file: !5, scope: !10)
 !23 = !DILocation(line: 15, scope: !22)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 16, scope: !10, file: !6, type: !9)
+!24 = !DILocalVariable(name: "e", line: 16, scope: !10, file: !6, type: !9)
 !25 = !DILocation(line: 16, scope: !10)
 !26 = !DILocation(line: 17, scope: !27)
 !27 = distinct !DILexicalBlock(line: 16, column: 0, file: !5, scope: !10)
diff --git a/test/DebugInfo/inline-no-debug-info.ll b/test/DebugInfo/Generic/inline-no-debug-info.ll
similarity index 77%
rename from test/DebugInfo/inline-no-debug-info.ll
rename to test/DebugInfo/Generic/inline-no-debug-info.ll
index 9ecd58031eb1..443ba9da6b59 100644
--- a/test/DebugInfo/inline-no-debug-info.ll
+++ b/test/DebugInfo/Generic/inline-no-debug-info.ll
@@ -43,7 +43,7 @@ entry:
 }
 
 ; Function Attrs: nounwind uwtable
-define void @caller() #0 {
+define void @caller() #0 !dbg !4 {
 entry:
   tail call void @callee(), !dbg !12
   ret void, !dbg !12
@@ -55,14 +55,14 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (210174)", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (210174)", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
 !2 = !{}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "caller", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @caller, variables: !2)
+!4 = distinct !DISubprogram(name: "caller", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/code/llvm/build0")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "callee2", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "callee2", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !8 = !{i32 2, !"Dwarf Version", i32 4}
 !9 = !{i32 2, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5.0 (210174)"}
diff --git a/test/DebugInfo/inline-scopes.ll b/test/DebugInfo/Generic/inline-scopes.ll
similarity index 81%
rename from test/DebugInfo/inline-scopes.ll
rename to test/DebugInfo/Generic/inline-scopes.ll
index 45324f6a4b96..432c58b79e14 100644
--- a/test/DebugInfo/inline-scopes.ll
+++ b/test/DebugInfo/Generic/inline-scopes.ll
@@ -37,7 +37,7 @@
 ; CHECK:     DW_AT_abstract_origin
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval.i2 = alloca i32, align 4
   %b.i3 = alloca i8, align 1
@@ -95,23 +95,23 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !10, !12}
-!4 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
 !6 = !DIFile(filename: "y.cc", directory: "/tmp/dbginfo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !7, variables: !2)
+!10 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !7, variables: !2)
 !11 = !DIFile(filename: "inline-scopes.cpp", directory: "/tmp/dbginfo")
-!12 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !7, variables: !2)
+!12 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !7, variables: !2)
 !13 = !{i32 2, !"Dwarf Version", i32 4}
 !14 = !{i32 1, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.5.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 3, scope: !17, file: !11, type: !18)
+!16 = !DILocalVariable(name: "b", line: 3, scope: !17, file: !11, type: !18)
 !17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !12)
 !18 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
 !19 = !DILocation(line: 3, scope: !17, inlinedAt: !20)
@@ -119,7 +119,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !21 = !DILocation(line: 4, scope: !17, inlinedAt: !20)
 !22 = !DILocation(line: 5, scope: !12, inlinedAt: !20)
 !23 = !DILocation(line: 6, scope: !12, inlinedAt: !20)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 2, scope: !25, file: !6, type: !18)
+!24 = !DILocalVariable(name: "b", line: 2, scope: !25, file: !6, type: !18)
 !25 = distinct !DILexicalBlock(line: 2, column: 0, file: !5, scope: !26)
 !26 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !10)
 !27 = !DILocation(line: 2, scope: !25, inlinedAt: !28)
diff --git a/test/DebugInfo/inlined-arguments.ll b/test/DebugInfo/Generic/inlined-arguments.ll
similarity index 67%
rename from test/DebugInfo/inlined-arguments.ll
rename to test/DebugInfo/Generic/inlined-arguments.ll
index 912ebb1321ee..af4820845a03 100644
--- a/test/DebugInfo/inlined-arguments.ll
+++ b/test/DebugInfo/Generic/inlined-arguments.ll
@@ -23,7 +23,7 @@
 ; CHECK: DW_AT_name{{.*}}"y"
 
 ; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
+define void @_Z2f2v() #0 !dbg !4 {
   tail call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !16, metadata !DIExpression()), !dbg !18
   tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !20, metadata !DIExpression()), !dbg !18
   tail call void @_Z2f3i(i32 2), !dbg !21
@@ -31,7 +31,7 @@ define void @_Z2f2v() #0 {
 }
 
 ; Function Attrs: uwtable
-define void @_Z2f1ii(i32 %x, i32 %y) #0 {
+define void @_Z2f1ii(i32 %x, i32 %y) #0 !dbg !8 {
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !13, metadata !DIExpression()), !dbg !23
   tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !14, metadata !DIExpression()), !dbg !23
   tail call void @_Z2f3i(i32 %y), !dbg !24
@@ -50,27 +50,27 @@ attributes #2 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "exp.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "f1", linkageName: "_Z2f1ii", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !9, function: void (i32, i32)* @_Z2f1ii, variables: !12)
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1ii", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !9, variables: !12)
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !11, !11}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13, !14}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
+!13 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
+!14 = !DILocalVariable(name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
 !15 = !{i32 undef}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
+!16 = !DILocalVariable(name: "x", line: 6, arg: 1, scope: !8, file: !5, type: !11)
 !17 = !DILocation(line: 4, scope: !4)
 !18 = !DILocation(line: 6, scope: !8, inlinedAt: !17)
 !19 = !{i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
+!20 = !DILocalVariable(name: "y", line: 6, arg: 2, scope: !8, file: !5, type: !11)
 !21 = !DILocation(line: 7, scope: !8, inlinedAt: !17)
 !22 = !DILocation(line: 5, scope: !4)
 !23 = !DILocation(line: 6, scope: !8)
diff --git a/test/DebugInfo/inlined-vars.ll b/test/DebugInfo/Generic/inlined-vars.ll
similarity index 61%
rename from test/DebugInfo/inlined-vars.ll
rename to test/DebugInfo/Generic/inlined-vars.ll
index 8c7823748864..a294380bb5c8 100644
--- a/test/DebugInfo/inlined-vars.ll
+++ b/test/DebugInfo/Generic/inlined-vars.ll
@@ -2,7 +2,7 @@
 ; RUN: %llc_dwarf -O0 < %s | FileCheck %s -check-prefix VARIABLE
 ; PR 13202
 
-define i32 @main() uwtable {
+define i32 @main() uwtable !dbg !5 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !18, metadata !DIExpression()), !dbg !21
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !22, metadata !DIExpression()), !dbg !23
@@ -18,37 +18,37 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 159419)", isOptimized: true, emissionKind: 0, file: !26, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports:  !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 159419)", isOptimized: true, emissionKind: 0, file: !26, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports:  !2)
 !1 = !{i32 0}
 !2 = !{}
 !3 = !{!5, !10}
-!5 = !DISubprogram(name: "main", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !26, scope: !6, type: !7, function: i32 ()* @main, variables: !2)
+!5 = distinct !DISubprogram(name: "main", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !26, scope: !6, type: !7, variables: !2)
 !6 = !DIFile(filename: "inline-bug.cc", directory: "/tmp/dbginfo/pr13202")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "f", linkageName: "_ZL1fi", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !26, scope: !6, type: !11, variables: !13)
+!10 = distinct !DISubprogram(name: "f", linkageName: "_ZL1fi", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !26, scope: !6, type: !11, variables: !13)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!9, !9}
 !13 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+!15 = !DILocalVariable(name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
 
 ; Two DW_TAG_formal_parameter: one abstract and one inlined.
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 ; ARGUMENT-NOT: {{.*Abbrev.*DW_TAG_formal_parameter}}
 
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9)
+!16 = !DILocalVariable(name: "local", line: 4, scope: !10, file: !6, type: !9)
 
 ; Two DW_TAG_variable: one abstract and one inlined.
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE: {{.*Abbrev.*DW_TAG_variable}}
 ; VARIABLE-NOT: {{.*Abbrev.*DW_TAG_variable}}
 
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
+!18 = !DILocalVariable(name: "argument", line: 3, arg: 1, scope: !10, file: !6, type: !9)
 !19 = !DILocation(line: 11, column: 10, scope: !5)
 !21 = !DILocation(line: 3, column: 25, scope: !10, inlinedAt: !19)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 4, scope: !10, file: !6, type: !9)
+!22 = !DILocalVariable(name: "local", line: 4, scope: !10, file: !6, type: !9)
 !23 = !DILocation(line: 4, column: 16, scope: !10, inlinedAt: !19)
 !24 = !DILocation(line: 5, column: 3, scope: !10, inlinedAt: !19)
 !25 = !DILocation(line: 6, column: 3, scope: !10, inlinedAt: !19)
diff --git a/test/DebugInfo/Generic/lit.local.cfg b/test/DebugInfo/Generic/lit.local.cfg
new file mode 100644
index 000000000000..f22d4aabd730
--- /dev/null
+++ b/test/DebugInfo/Generic/lit.local.cfg
@@ -0,0 +1,3 @@
+if not config.target_triple:
+    config.unsupported = True
+
diff --git a/test/DebugInfo/location-verifier.ll b/test/DebugInfo/Generic/location-verifier.ll
similarity index 69%
rename from test/DebugInfo/location-verifier.ll
rename to test/DebugInfo/Generic/location-verifier.ll
index f44a37d7fca5..aa725a2369a4 100644
--- a/test/DebugInfo/location-verifier.ll
+++ b/test/DebugInfo/Generic/location-verifier.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
 entry:
   ret i32 42, !dbg !13
 }
@@ -15,11 +15,11 @@ attributes #0 = { nounwind ssp uwtable }
 !llvm.module.flags = !{!9, !10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/lto-comp-dir.ll b/test/DebugInfo/Generic/lto-comp-dir.ll
similarity index 74%
rename from test/DebugInfo/lto-comp-dir.ll
rename to test/DebugInfo/Generic/lto-comp-dir.ll
index 4bafb7bdd2e7..8d5da504a82e 100644
--- a/test/DebugInfo/lto-comp-dir.ll
+++ b/test/DebugInfo/Generic/lto-comp-dir.ll
@@ -40,13 +40,13 @@
 ; }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z4funcv() #0 {
+define void @_Z4funcv() #0 !dbg !4 {
 entry:
   ret void, !dbg !19
 }
 
 ; Function Attrs: uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !11 {
 entry:
   call void @_Z4funcv(), !dbg !20
   ret i32 0, !dbg !21
@@ -59,18 +59,18 @@ attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18, !18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z4funcv, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "a.cpp", directory: "/tmp/dbginfo/a")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
+!8 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !9, enums: !2, retainedTypes: !2, subprograms: !10, globals: !2, imports: !2)
 !9 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
 !10 = !{!11}
-!11 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !9, scope: !12, type: !13, function: i32 ()* @main, variables: !2)
+!11 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !9, scope: !12, type: !13, variables: !2)
 !12 = !DIFile(filename: "b.cpp", directory: "/tmp/dbginfo/b")
 !13 = !DISubroutineType(types: !14)
 !14 = !{!15}
diff --git a/test/DebugInfo/member-order.ll b/test/DebugInfo/Generic/member-order.ll
similarity index 77%
rename from test/DebugInfo/member-order.ll
rename to test/DebugInfo/Generic/member-order.ll
index f07e4f85cded..55ada4f829b7 100644
--- a/test/DebugInfo/member-order.ll
+++ b/test/DebugInfo/Generic/member-order.ll
@@ -25,7 +25,7 @@
 %struct.foo = type { i8 }
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 {
+define void @_ZN3foo2f1Ev(%struct.foo* %this) #0 align 2 !dbg !14 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -43,7 +43,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15, !20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
 !1 = !DIFile(filename: "member-order.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -57,9 +57,9 @@ attributes #1 = { nounwind readnone }
 !11 = !DISubprogram(name: "f2", linkageName: "_ZN3foo2f2Ev", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !4, type: !7)
 !12 = !{i32 786468}
 !13 = !{!14}
-!14 = !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: null, type: !7, function: void (%struct.foo*)* @_ZN3foo2f1Ev, declaration: !6, variables: !2)
+!14 = distinct !DISubprogram(name: "f1", linkageName: "_ZN3foo2f1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: null, type: !7, declaration: !6, variables: !2)
 !15 = !{i32 2, !"Dwarf Version", i32 4}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !17)
+!16 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !17)
 !17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
 !18 = !DILocation(line: 0, scope: !14)
 !19 = !DILocation(line: 7, scope: !14)
diff --git a/test/DebugInfo/member-pointers.ll b/test/DebugInfo/Generic/member-pointers.ll
similarity index 89%
rename from test/DebugInfo/member-pointers.ll
rename to test/DebugInfo/Generic/member-pointers.ll
index 0cf9ead84218..1570c07ddb22 100644
--- a/test/DebugInfo/member-pointers.ll
+++ b/test/DebugInfo/Generic/member-pointers.ll
@@ -23,7 +23,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !15, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !15, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5, !10}
 !5 = !DIGlobalVariable(name: "x", line: 4, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i64* @x)
diff --git a/test/DebugInfo/missing-abstract-variable.ll b/test/DebugInfo/Generic/missing-abstract-variable.ll
similarity index 76%
rename from test/DebugInfo/missing-abstract-variable.ll
rename to test/DebugInfo/Generic/missing-abstract-variable.ll
index 8a576c2bfd34..ee4f1666d2b7 100644
--- a/test/DebugInfo/missing-abstract-variable.ll
+++ b/test/DebugInfo/Generic/missing-abstract-variable.ll
@@ -5,7 +5,7 @@
 ; The formal parameter 'b' for Function 'x' when inlined within 'a' is lost on
 ; mips and powerpc64 (and on x86_64 at at least -O2). Presumably this is a
 ; SelectionDAG issue (do mips/powerpc64 use FastISel?).
-; XFAIL: mips, powerpc64, s390x
+; XFAIL: mips, powerpc64, s390x, sparc
 
 ; Build from the following source with clang -O2.
 
@@ -37,7 +37,7 @@
 ;   x(u);
 ; }
 
-; CHECK: DW_TAG_subprogram
+; CHECK: [[X_DECL:.*]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "x"
 ; CHECK-NOT: {{DW_TAG|NULL}}
@@ -57,7 +57,7 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} "_Z1xb"
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[X_DECL]]}
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:     DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
@@ -78,7 +78,7 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:     DW_AT_abstract_origin {{.*}} "_Z1xb"
+; CHECK:     DW_AT_abstract_origin {{.*}} {[[X_DECL]]}
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; FIXME: This formal parameter goes missing at least at -O2 (& on
 ; mips/powerpc), maybe before that. Perhaps SelectionDAG is to blame (and
@@ -97,7 +97,7 @@
 @t = external global i32
 
 ; Function Attrs: uwtable
-define void @_Z1bv() #0 {
+define void @_Z1bv() #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i1 false, i64 0, metadata !25, metadata !DIExpression()), !dbg !27
   tail call void @_Z1fi(i32 0), !dbg !28
@@ -105,7 +105,7 @@ entry:
 }
 
 ; Function Attrs: uwtable
-define void @_Z1ab(i1 zeroext %u) #0 {
+define void @_Z1ab(i1 zeroext %u) #0 !dbg !8 {
 entry:
   tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !13, metadata !DIExpression()), !dbg !30
   tail call void @llvm.dbg.value(metadata i1 %u, i64 0, metadata !31, metadata !DIExpression()), !dbg !33
@@ -135,24 +135,24 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !8, !14}
-!4 = !DISubprogram(name: "b", linkageName: "_Z1bv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !1, scope: !5, type: !6, function: void ()* @_Z1bv, variables: !2)
+!4 = distinct !DISubprogram(name: "b", linkageName: "_Z1bv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "missing-abstract-variables.cc", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "a", linkageName: "_Z1ab", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !9, function: void (i1)* @_Z1ab, variables: !12)
+!8 = distinct !DISubprogram(name: "a", linkageName: "_Z1ab", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !9, variables: !12)
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !11}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
 !12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", line: 17, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DISubprogram(name: "x", linkageName: "_Z1xb", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !9, variables: !15)
+!13 = !DILocalVariable(name: "u", line: 17, arg: 1, scope: !8, file: !5, type: !11)
+!14 = distinct !DISubprogram(name: "x", linkageName: "_Z1xb", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !9, variables: !15)
 !15 = !{!16, !17}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20)
+!16 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!17 = !DILocalVariable(name: "s", line: 7, scope: !18, file: !5, type: !20)
 !18 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !19)
 !19 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !14)
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -160,13 +160,13 @@ attributes #2 = { nounwind readnone }
 !22 = !{i32 2, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 "}
 !24 = !{i1 false}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!25 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
 !26 = !DILocation(line: 14, scope: !4)
 !27 = !DILocation(line: 5, scope: !14, inlinedAt: !26)
 !28 = !DILocation(line: 10, scope: !14, inlinedAt: !26)
 !29 = !DILocation(line: 15, scope: !4)
 !30 = !DILocation(line: 17, scope: !8)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
+!31 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !14, file: !5, type: !11)
 !32 = !DILocation(line: 18, scope: !8)
 !33 = !DILocation(line: 5, scope: !14, inlinedAt: !32)
 !34 = !DILocation(line: 6, scope: !19, inlinedAt: !32)
@@ -175,7 +175,7 @@ attributes #2 = { nounwind readnone }
 !37 = !{!"int", !38, i64 0}
 !38 = !{!"omnipotent char", !39, i64 0}
 !39 = !{!"Simple C/C++ TBAA"}
-!40 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 7, scope: !18, file: !5, type: !20)
+!40 = !DILocalVariable(name: "s", line: 7, scope: !18, file: !5, type: !20)
 !41 = !DILocation(line: 8, scope: !18, inlinedAt: !32)
 !42 = !DILocation(line: 9, scope: !18, inlinedAt: !32)
 !43 = !DILocation(line: 10, scope: !14, inlinedAt: !32)
diff --git a/test/DebugInfo/multiline.ll b/test/DebugInfo/Generic/multiline.ll
similarity index 86%
rename from test/DebugInfo/multiline.ll
rename to test/DebugInfo/Generic/multiline.ll
index e6b43239fda3..7740bb6918a2 100644
--- a/test/DebugInfo/multiline.ll
+++ b/test/DebugInfo/Generic/multiline.ll
@@ -42,7 +42,7 @@
 
 
 ; Function Attrs: nounwind uwtable
-define void @f2() #0 {
+define void @f2() #0 !dbg !4 {
 entry:
   call void (...) @f1(), !dbg !11
   call void (...) @f1(), !dbg !12
@@ -62,11 +62,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 225000) (llvm/trunk 224999)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @f2, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "multiline.c", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/namespace.ll b/test/DebugInfo/Generic/namespace.ll
similarity index 84%
rename from test/DebugInfo/namespace.ll
rename to test/DebugInfo/Generic/namespace.ll
index 85ef7356205c..e446806249f9 100644
--- a/test/DebugInfo/namespace.ll
+++ b/test/DebugInfo/Generic/namespace.ll
@@ -1,7 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
-; RUN: llvm-dwarfdump %t | FileCheck %s
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump - | FileCheck %s
 ; CHECK: debug_info contents
 ; CHECK: [[NS1:0x[0-9a-f]*]]:{{ *}}DW_TAG_namespace
 ; CHECK-NEXT: DW_AT_name{{.*}} = "A"
@@ -206,13 +205,13 @@
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, i8* null }]
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @_ZN1A1B2f1Ev() #0 {
+define i32 @_ZN1A1B2f1Ev() #0 !dbg !10 {
 entry:
   ret i32 0, !dbg !60
 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZN1A1B2f1Ei(i32) #0 {
+define void @_ZN1A1B2f1Ei(i32) #0 !dbg !14 {
 entry:
   %.addr = alloca i32, align 4
   store i32 %0, i32* %.addr, align 4
@@ -223,7 +222,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" {
+define internal void @__cxx_global_var_init() section "__TEXT,__StaticInit,regular,pure_instructions" !dbg !17 {
 entry:
   %call = call i32 @_ZN1A1B2f1Ev(), !dbg !65
   store i32 %call, i32* @_ZN1A1B1iE, align 4, !dbg !65
@@ -231,7 +230,7 @@ entry:
 }
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @_Z4funcb(i1 zeroext %b) #0 {
+define i32 @_Z4funcb(i1 zeroext %b) #0 !dbg !21 {
 entry:
   %retval = alloca i32, align 4
   %b.addr = alloca i8, align 1
@@ -261,7 +260,7 @@ return:                                           ; preds = %if.end, %if.then
   ret i32 %5, !dbg !71
 }
 
-define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" {
+define internal void @__cxx_global_var_init1() section "__TEXT,__StaticInit,regular,pure_instructions" !dbg !25 {
 entry:
   %0 = load i32, i32* @_ZN1A1B1iE, align 4, !dbg !72
   store i32 %0, i32* @_ZN1A1B7var_fwdE, align 4, !dbg !72
@@ -269,7 +268,7 @@ entry:
 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZN1A1B8func_fwdEv() #0 {
+define void @_ZN1A1B8func_fwdEv() #0 !dbg !26 {
 entry:
   ret void, !dbg !73
 }
@@ -288,7 +287,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!57, !58}
 !llvm.ident = !{!59}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !33)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !33)
 !1 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4, !8}
@@ -298,24 +297,24 @@ attributes #1 = { nounwind readnone }
 !7 = !DINamespace(name: "A", line: 5, file: !1, scope: null)
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 8, align: 8, file: !5, scope: !6, elements: !2, identifier: "_ZTSN1A1B3barE")
 !9 = !{!10, !14, !17, !21, !25, !26, !27}
-!10 = !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ev", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !11, function: i32 ()* @_ZN1A1B2f1Ev, variables: !2)
+!10 = distinct !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ev", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !5, scope: !6, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!13}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!14 = !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ei", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !15, function: void (i32)* @_ZN1A1B2f1Ei, variables: !2)
+!14 = distinct !DISubprogram(name: "f1", linkageName: "_ZN1A1B2f1Ei", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !5, scope: !6, type: !15, variables: !2)
 !15 = !DISubroutineType(types: !16)
 !16 = !{null, !13}
-!17 = !DISubprogram(name: "__cxx_global_var_init", line: 20, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !5, scope: !18, type: !19, function: void ()* @__cxx_global_var_init, variables: !2)
+!17 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 20, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !5, scope: !18, type: !19, variables: !2)
 !18 = !DIFile(filename: "foo.cpp", directory: "/tmp")
 !19 = !DISubroutineType(types: !20)
 !20 = !{null}
-!21 = !DISubprogram(name: "func", linkageName: "_Z4funcb", line: 21, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 21, file: !5, scope: !18, type: !22, function: i32 (i1)* @_Z4funcb, variables: !2)
+!21 = distinct !DISubprogram(name: "func", linkageName: "_Z4funcb", line: 21, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 21, file: !5, scope: !18, type: !22, variables: !2)
 !22 = !DISubroutineType(types: !23)
 !23 = !{!13, !24}
 !24 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!25 = !DISubprogram(name: "__cxx_global_var_init1", line: 44, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 44, file: !5, scope: !18, type: !19, function: void ()* @__cxx_global_var_init1, variables: !2)
-!26 = !DISubprogram(name: "func_fwd", linkageName: "_ZN1A1B8func_fwdEv", line: 47, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 47, file: !5, scope: !6, type: !19, function: void ()* @_ZN1A1B8func_fwdEv, variables: !2)
-!27 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_debug_info_namespace.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !28, type: !29, function: void ()* @_GLOBAL__sub_I_debug_info_namespace.cpp, variables: !2)
+!25 = distinct !DISubprogram(name: "__cxx_global_var_init1", line: 44, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 44, file: !5, scope: !18, type: !19, variables: !2)
+!26 = distinct !DISubprogram(name: "func_fwd", linkageName: "_ZN1A1B8func_fwdEv", line: 47, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 47, file: !5, scope: !6, type: !19, variables: !2)
+!27 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_debug_info_namespace.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !28, type: !29, variables: !2)
 !28 = !DIFile(filename: "debug-info-namespace.cpp", directory: "/tmp")
 !29 = !DISubroutineType(types: !2)
 !30 = !{!31, !32}
@@ -349,12 +348,12 @@ attributes #1 = { nounwind readnone }
 !58 = !{i32 2, !"Debug Info Version", i32 3}
 !59 = !{!"clang version 3.6.0 "}
 !60 = !DILocation(line: 3, column: 12, scope: !10)
-!61 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 4, arg: 1, scope: !14, file: !18, type: !13)
+!61 = !DILocalVariable(name: "", line: 4, arg: 1, scope: !14, file: !18, type: !13)
 !62 = !DIExpression()
 !63 = !DILocation(line: 4, column: 12, scope: !14)
 !64 = !DILocation(line: 4, column: 16, scope: !14)
 !65 = !DILocation(line: 20, column: 12, scope: !17)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 21, arg: 1, scope: !21, file: !18, type: !24)
+!66 = !DILocalVariable(name: "b", line: 21, arg: 1, scope: !21, file: !18, type: !24)
 !67 = !DILocation(line: 21, column: 15, scope: !21)
 !68 = !DILocation(line: 22, column: 7, scope: !21)
 !69 = !DILocation(line: 24, column: 5, scope: !38)
diff --git a/test/DebugInfo/namespace_function_definition.ll b/test/DebugInfo/Generic/namespace_function_definition.ll
similarity index 64%
rename from test/DebugInfo/namespace_function_definition.ll
rename to test/DebugInfo/Generic/namespace_function_definition.ll
index 44340635484a..58a144fb1189 100644
--- a/test/DebugInfo/namespace_function_definition.ll
+++ b/test/DebugInfo/Generic/namespace_function_definition.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; Generated from clang with the following source:
 ; namespace ns {
@@ -19,7 +19,7 @@
 ; CHECK: NULL
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN2ns4funcEv() #0 {
+define void @_ZN2ns4funcEv() #0 !dbg !4 {
 entry:
   ret void, !dbg !11
 }
@@ -30,11 +30,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "namespace_function_definition.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_ZN2ns4funcEv, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/namespace_inline_function_definition.ll b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
similarity index 74%
rename from test/DebugInfo/namespace_inline_function_definition.ll
rename to test/DebugInfo/Generic/namespace_inline_function_definition.ll
index c14152065b72..cca5e19db445 100644
--- a/test/DebugInfo/namespace_inline_function_definition.ll
+++ b/test/DebugInfo/Generic/namespace_inline_function_definition.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: object-emission
 
-; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: %llc_dwarf -O0 -filetype=obj -dwarf-linkage-names=Enable < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 
 ; Generate from clang with the following source. Note that the definition of
 ; the inline function follows its use to workaround another bug that should be
@@ -35,7 +35,7 @@
 @x = external global i32
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %i.addr.i = alloca i32, align 4
   %retval = alloca i32, align 4
@@ -49,7 +49,7 @@ entry:
 }
 
 ; Function Attrs: alwaysinline nounwind uwtable
-define i32 @_ZN2ns4funcEi(i32 %i) #1 {
+define i32 @_ZN2ns4funcEi(i32 %i) #1 !dbg !9 {
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
@@ -70,16 +70,16 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "namespace_inline_function_definition.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEi", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !10, type: !11, function: i32 (i32)* @_ZN2ns4funcEi, variables: !2)
+!9 = distinct !DISubprogram(name: "func", linkageName: "_ZN2ns4funcEi", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !10, type: !11, variables: !2)
 !10 = !DINamespace(name: "ns", line: 1, file: !1, scope: null)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!8, !8}
@@ -87,9 +87,9 @@ attributes #2 = { nounwind readnone }
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.5.0 "}
 !16 = !DILocation(line: 5, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+!17 = !DILocalVariable(name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
 
-!117 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
+!117 = !DILocalVariable(name: "i", line: 6, arg: 1, scope: !9, file: !5, type: !8)
 
 !18 = !DILocation(line: 6, scope: !9, inlinedAt: !16)
 !19 = !DILocation(line: 6, scope: !9)
diff --git a/test/DebugInfo/nodebug.ll b/test/DebugInfo/Generic/nodebug.ll
similarity index 77%
rename from test/DebugInfo/nodebug.ll
rename to test/DebugInfo/Generic/nodebug.ll
index 3ef3119a0c77..6f20aecaaf5e 100644
--- a/test/DebugInfo/nodebug.ll
+++ b/test/DebugInfo/Generic/nodebug.ll
@@ -37,11 +37,11 @@ attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "nodebug.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/piece-verifier.ll b/test/DebugInfo/Generic/piece-verifier.ll
similarity index 73%
rename from test/DebugInfo/piece-verifier.ll
rename to test/DebugInfo/Generic/piece-verifier.ll
index 9d7efd6a396f..e1f5c24a21dc 100644
--- a/test/DebugInfo/piece-verifier.ll
+++ b/test/DebugInfo/Generic/piece-verifier.ll
@@ -3,7 +3,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 {
+define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
   call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
@@ -23,11 +23,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "pieces.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i64, i32)* @foo, variables: !15)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !15)
 !5 = !DIFile(filename: "pieces.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -39,13 +39,13 @@ attributes #1 = { nounwind readnone }
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
 !14 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, offset: 64, file: !1, scope: !10, baseType: !8)
 !15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !{i32 1, !"Debug Info Version", i32 3}
 !19 = !{!"clang version 3.5 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!20 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !21 = !DILocation(line: 3, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !23 = !DILocation(line: 4, scope: !4)
 !24 = !DIExpression(DW_OP_deref, DW_OP_bit_piece, 0, 64)
 !25 = !{}
diff --git a/test/DebugInfo/Generic/ptrsize.ll b/test/DebugInfo/Generic/ptrsize.ll
new file mode 100755
index 000000000000..dfdcb5c0e79f
--- /dev/null
+++ b/test/DebugInfo/Generic/ptrsize.ll
@@ -0,0 +1,47 @@
+; REQUIRES: object-emission
+
+; RUN: %llc_dwarf -O0 -filetype=obj < %s > %t
+; RUN: llvm-dwarfdump %t | FileCheck %s
+
+; Check that pointers and references get emitted without size information in
+; DWARF, even if they are so specified in the IR
+
+; CHECK: 0x[[O1:[0-9a-f]+]]:   DW_TAG_pointer_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK-NOT: DW_AT_byte_size
+; CHECK: 0x[[O2:[0-9a-f]+]]:   DW_TAG_
+
+; CHECK: 0x[[O3:[0-9a-f]+]]:   DW_TAG_reference_type
+; CHECK-NEXT: DW_AT_type [DW_FORM_ref4]
+; CHECK-NOT: DW_AT_byte_size
+
+define i32 @foo() !dbg !4 {
+entry:
+  ret i32 0, !dbg !13
+}
+
+define i32 @bar() !dbg !5 {
+entry:
+  ret i32 0, !dbg !16
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dwarf-test.c", directory: "test")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = distinct !DISubprogram(name: "foo", scope: !0, file: !1, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 6, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "bar", scope: !0, file: !1, line: 6, type: !15, isLocal: false, isDefinition: true, scopeLine: 6, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!9}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, scope: !0, baseType: !8, size: 64, align: 64)
+!10 = !DIDerivedType(tag: DW_TAG_reference_type, scope: !0, baseType: !8, size: 64, align: 64)
+!11 = !{i32 2, !"Dwarf Version", i32 3}
+!12 = !{i32 1, !"Debug Info Version", i32 3}
+!13 = !DILocation(line: 7, scope: !4)
+!14 = !{!10}
+!15 = !DISubroutineType(types: !14)
+!16 = !DILocation(line: 7, scope: !5)
diff --git a/test/DebugInfo/X86/recursive_inlining.ll b/test/DebugInfo/Generic/recursive_inlining.ll
similarity index 79%
rename from test/DebugInfo/X86/recursive_inlining.ll
rename to test/DebugInfo/Generic/recursive_inlining.ll
index 7825646c7c8d..02c147e7aa68 100644
--- a/test/DebugInfo/X86/recursive_inlining.ll
+++ b/test/DebugInfo/Generic/recursive_inlining.ll
@@ -38,16 +38,16 @@
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK: DW_TAG_member
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: DW_TAG_subprogram
+; CHECK: [[M_FN2_DECL:.*]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
 ; CHECK:     DW_AT_name {{.*}} "m_fn2"
 ; CHECK-NOT: {{DW_TAG|NULL}}
-; CHECK: [[M_FN2_THIS_DECL:.*]]:     DW_TAG_formal_parameter
+; CHECK:     DW_TAG_formal_parameter
 
 ; The abstract definition of C::m_fn2
 ; CHECK: [[M_FN2_ABS_DEF:.*]]: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK:   DW_AT_specification {{.*}} "_ZN1C5m_fn2Ev"
+; CHECK:   DW_AT_specification {{.*}} {[[M_FN2_DECL]]}
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_inline
 ; CHECK-NOT: {{DW_TAG|NULL}}
@@ -63,7 +63,7 @@
 ; The concrete definition of C::m_fn2
 ; CHECK: DW_TAG_subprogram
 ; CHECK-NOT: DW_TAG
-; CHECK:   DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
+; CHECK:   DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]}
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:   DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
@@ -78,7 +78,7 @@
 ; Inlined C::m_fn2:
 ; CHECK:         DW_TAG_inlined_subroutine
 ; CHECK-NOT: DW_TAG
-; CHECK:           DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]} "_ZN1C5m_fn2Ev"
+; CHECK:           DW_AT_abstract_origin {{.*}} {[[M_FN2_ABS_DEF]]}
 ; CHECK-NOT: {{DW_TAG|NULL}}
 ; CHECK:           DW_TAG_formal_parameter
 ; CHECK-NOT: DW_TAG
@@ -91,7 +91,7 @@
 @x = global %struct.C* null, align 8
 
 ; Function Attrs: nounwind
-define void @_Z3fn6v() #0 {
+define void @_Z3fn6v() #0 !dbg !14 {
 entry:
   tail call void @_Z3fn8v() #3, !dbg !31
   %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !32, !tbaa !33
@@ -114,7 +114,7 @@ _ZN1C5m_fn2Ev.exit:                               ; preds = %entry, %if.then.i
 declare void @_Z3fn8v() #1
 
 ; Function Attrs: nounwind
-define linkonce_odr void @_ZN1C5m_fn2Ev(%struct.C* nocapture readonly %this) #0 align 2 {
+define linkonce_odr void @_ZN1C5m_fn2Ev(%struct.C* nocapture readonly %this) #0 align 2 !dbg !22 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.C* %this, i64 0, metadata !24, metadata !DIExpression()), !dbg !49
   tail call void @_Z3fn8v() #3, !dbg !50
@@ -147,7 +147,7 @@ _Z3fn6v.exit:                                     ; preds = %if.end, %if.then.i.
 }
 
 ; Function Attrs: nounwind
-define void @_Z3fn3v() #0 {
+define void @_Z3fn3v() #0 !dbg !18 {
 entry:
   br label %tailrecurse
 
@@ -170,7 +170,7 @@ if.then.i.i:                                      ; preds = %tailrecurse
 }
 
 ; Function Attrs: nounwind
-define void @_Z3fn4v() #0 {
+define void @_Z3fn4v() #0 !dbg !19 {
 entry:
   %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !72, !tbaa !33
   tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !72
@@ -178,7 +178,7 @@ entry:
 }
 
 ; Function Attrs: nounwind
-define void @_Z3fn5v() #0 {
+define void @_Z3fn5v() #0 !dbg !20 {
 entry:
   %0 = load %struct.C*, %struct.C** @x, align 8, !dbg !73, !tbaa !33
   tail call void @_ZN1C5m_fn2Ev(%struct.C* %0), !dbg !73
@@ -199,7 +199,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!28, !29}
 !llvm.ident = !{!30}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !26, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !26, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
 !2 = !{}
 !3 = !{!4}
@@ -213,17 +213,17 @@ attributes #3 = { nounwind }
 !11 = !{null, !12}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1C")
 !13 = !{!14, !18, !19, !20, !21, !22}
-!14 = !DISubprogram(name: "fn6", linkageName: "_Z3fn6v", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn6v, variables: !2)
+!14 = distinct !DISubprogram(name: "fn6", linkageName: "_Z3fn6v", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !5, scope: !15, type: !16, variables: !2)
 !15 = !DIFile(filename: "recursive_inlining.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch/missing_concrete_variable_on_darwin/reduce")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null}
-!18 = !DISubprogram(name: "fn3", linkageName: "_Z3fn3v", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 20, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn3v, variables: !2)
-!19 = !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn4v, variables: !2)
-!20 = !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 22, file: !5, scope: !15, type: !16, function: void ()* @_Z3fn5v, variables: !2)
-!21 = !DISubprogram(name: "fn7", linkageName: "_Z3fn7v", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !15, type: !16, variables: !2)
-!22 = !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10, function: void (%struct.C*)* @_ZN1C5m_fn2Ev, declaration: !9, variables: !23)
+!18 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn3v", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 20, file: !5, scope: !15, type: !16, variables: !2)
+!19 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 21, file: !5, scope: !15, type: !16, variables: !2)
+!20 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 22, file: !5, scope: !15, type: !16, variables: !2)
+!21 = distinct !DISubprogram(name: "fn7", linkageName: "_Z3fn7v", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !15, type: !16, variables: !2)
+!22 = distinct !DISubprogram(name: "m_fn2", linkageName: "_ZN1C5m_fn2Ev", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !5, scope: !"_ZTS1C", type: !10, declaration: !9, variables: !23)
 !23 = !{!24}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
 !26 = !{!27}
 !27 = !DIGlobalVariable(name: "x", line: 13, isLocal: false, isDefinition: true, scope: null, file: !15, type: !25, variable: %struct.C** @x)
@@ -236,7 +236,7 @@ attributes #3 = { nounwind }
 !34 = !{!"any pointer", !35, i64 0}
 !35 = !{!"omnipotent char", !36, i64 0}
 !36 = !{!"Simple C/C++ TBAA"}
-!37 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!37 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !38 = !DILocation(line: 0, scope: !22, inlinedAt: !32)
 !39 = !DILocation(line: 8, scope: !22, inlinedAt: !32)
 !40 = !DILocation(line: 9, scope: !41, inlinedAt: !32)
@@ -256,7 +256,7 @@ attributes #3 = { nounwind }
 !54 = !DILocation(line: 20, scope: !18, inlinedAt: !55)
 !55 = !DILocation(line: 10, scope: !22)
 !56 = !DILocation(line: 17, scope: !14, inlinedAt: !54)
-!57 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!57 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !58 = !DILocation(line: 0, scope: !22, inlinedAt: !56)
 !59 = !DILocation(line: 8, scope: !22, inlinedAt: !56)
 !60 = !DILocation(line: 9, scope: !41, inlinedAt: !56)
@@ -266,7 +266,7 @@ attributes #3 = { nounwind }
 !64 = !DILocation(line: 16, scope: !14, inlinedAt: !65)
 !65 = !DILocation(line: 20, scope: !18)
 !66 = !DILocation(line: 17, scope: !14, inlinedAt: !65)
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !22, type: !25)
 !68 = !DILocation(line: 0, scope: !22, inlinedAt: !66)
 !69 = !DILocation(line: 8, scope: !22, inlinedAt: !66)
 !70 = !DILocation(line: 9, scope: !41, inlinedAt: !66)
diff --git a/test/DebugInfo/restrict.ll b/test/DebugInfo/Generic/restrict.ll
similarity index 75%
rename from test/DebugInfo/restrict.ll
rename to test/DebugInfo/Generic/restrict.ll
index 71d94f60c965..b0536acd03cc 100644
--- a/test/DebugInfo/restrict.ll
+++ b/test/DebugInfo/Generic/restrict.ll
@@ -17,7 +17,7 @@
 
 
 ; Function Attrs: nounwind uwtable
-define void @_Z3fooPv(i8* noalias %dst) #0 {
+define void @_Z3fooPv(i8* noalias %dst) #0 !dbg !4 {
 entry:
   %dst.addr = alloca i8*, align 8
   store i8* %dst, i8** %dst.addr, align 8
@@ -35,11 +35,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooPv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i8*)* @_Z3fooPv, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooPv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "restrict.c", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
@@ -48,6 +48,6 @@ attributes #1 = { nounwind readnone }
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.5.0 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dst", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "dst", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !14 = !DILocation(line: 1, scope: !4)
 !15 = !DILocation(line: 2, scope: !4)
diff --git a/test/DebugInfo/Generic/skeletoncu.ll b/test/DebugInfo/Generic/skeletoncu.ll
new file mode 100644
index 000000000000..ca040e302564
--- /dev/null
+++ b/test/DebugInfo/Generic/skeletoncu.ll
@@ -0,0 +1,16 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; XFAIL: hexagon
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK: DW_AT_GNU_dwo_name {{.*}}"my.dwo"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+
diff --git a/test/DebugInfo/sugared-constants.ll b/test/DebugInfo/Generic/sugared-constants.ll
similarity index 79%
rename from test/DebugInfo/sugared-constants.ll
rename to test/DebugInfo/Generic/sugared-constants.ll
index 421fe1dcd85c..5e32b794afde 100644
--- a/test/DebugInfo/sugared-constants.ll
+++ b/test/DebugInfo/Generic/sugared-constants.ll
@@ -22,7 +22,7 @@
 ; CHECK: DW_AT_const_value [DW_FORM_udata] (7)
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !10, metadata !DIExpression()), !dbg !21
   tail call void @_Z4funci(i32 42), !dbg !22
@@ -50,22 +50,22 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !9)
+!4 = distinct !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "const.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10, !12, !15}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 5, scope: !4, file: !5, type: !11)
+!10 = !DILocalVariable(name: "i", line: 5, scope: !4, file: !5, type: !11)
 !11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 7, scope: !4, file: !5, type: !13)
+!12 = !DILocalVariable(name: "j", line: 7, scope: !4, file: !5, type: !13)
 !13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
 !14 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 9, scope: !4, file: !5, type: !16)
+!15 = !DILocalVariable(name: "c", line: 9, scope: !4, file: !5, type: !16)
 !16 = !DIBasicType(tag: DW_TAG_base_type, name: "char16_t", size: 16, align: 16, encoding: 16)
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/template-recursive-void.ll b/test/DebugInfo/Generic/template-recursive-void.ll
similarity index 92%
rename from test/DebugInfo/template-recursive-void.ll
rename to test/DebugInfo/Generic/template-recursive-void.ll
index 30eaee3f75d3..645f1795c76c 100644
--- a/test/DebugInfo/template-recursive-void.ll
+++ b/test/DebugInfo/Generic/template-recursive-void.ll
@@ -25,7 +25,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!36, !37}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 187958) (llvm/trunk 187964)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 187958) (llvm/trunk 187964)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "debug-info-template-recursive.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/tu-composite.ll b/test/DebugInfo/Generic/tu-composite.ll
similarity index 85%
rename from test/DebugInfo/tu-composite.ll
rename to test/DebugInfo/Generic/tu-composite.ll
index 47fb8bc31482..77f99d2f8976 100644
--- a/test/DebugInfo/tu-composite.ll
+++ b/test/DebugInfo/Generic/tu-composite.ll
@@ -87,7 +87,7 @@
 @_ZTI1C = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0) }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 {
+define void @_ZN1C3fooEv(%struct.C* %this) unnamed_addr #0 align 2 !dbg !31 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -100,7 +100,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4testv() #0 {
+define void @_Z4testv() #0 !dbg !32 {
 entry:
   %B = alloca %struct.bar, align 1
   %A = alloca [3 x %struct.bar], align 1
@@ -123,7 +123,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!35, !59}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !30, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !30, globals: !2, imports: !2)
 !1 = !DIFile(filename: "tmp.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4, !18, !19, !22, !23, !24}
@@ -153,32 +153,32 @@ attributes #1 = { nounwind readnone }
 !28 = !{!29}
 !29 = !DITemplateTypeParameter(name: "T", type: !"_ZTS3bar")
 !30 = !{!31, !32}
-!31 = !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: null, type: !14, function: void (%struct.C*)* @_ZN1C3fooEv, declaration: !13, variables: !2)
-!32 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !7, type: !33, function: void ()* @_Z4testv, variables: !2)
+!31 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1C3fooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: null, type: !14, declaration: !13, variables: !2)
+!32 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 20, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 20, file: !1, scope: !7, type: !33, variables: !2)
 !33 = !DISubroutineType(types: !34)
 !34 = !{null}
 !35 = !{i32 2, !"Dwarf Version", i32 2}
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !37)
+!36 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !37)
 !37 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1C")
 !38 = !DILocation(line: 0, scope: !31)
 !39 = !DILocation(line: 5, scope: !31)
-!40 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "B", line: 21, scope: !32, file: !7, type: !41)
+!40 = !DILocalVariable(name: "B", line: 21, scope: !32, file: !7, type: !41)
 !41 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz", line: 8, file: !1, baseType: !"_ZTS3bar")
 !42 = !DILocation(line: 21, scope: !32)
-!43 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "A", line: 22, scope: !32, file: !7, type: !44)
+!43 = !DILocalVariable(name: "A", line: 22, scope: !32, file: !7, type: !44)
 !44 = !DICompositeType(tag: DW_TAG_array_type, size: 24, align: 8, baseType: !"_ZTS3bar", elements: !45)
 !45 = !{!46}
 !46 = !DISubrange(count: 3)
 !47 = !DILocation(line: 22, scope: !32)
-!48 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "B2", line: 23, scope: !32, file: !7, type: !49)
+!48 = !DILocalVariable(name: "B2", line: 23, scope: !32, file: !7, type: !49)
 !49 = !DIDerivedType(tag: DW_TAG_typedef, name: "baz2", line: 10, file: !1, scope: !"_ZTS1D", baseType: !"_ZTS3bar")
 !50 = !DILocation(line: 23, scope: !32)
-!51 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 24, scope: !32, file: !7, type: !22)
+!51 = !DILocalVariable(name: "e", line: 24, scope: !32, file: !7, type: !22)
 !52 = !DILocation(line: 24, scope: !32)
-!53 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 25, scope: !32, file: !7, type: !54)
+!53 = !DILocalVariable(name: "p", line: 25, scope: !32, file: !7, type: !54)
 !54 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTSN1D7Nested2E")
 !55 = !DILocation(line: 25, scope: !32)
-!56 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 26, scope: !32, file: !7, type: !24)
+!56 = !DILocalVariable(name: "t", line: 26, scope: !32, file: !7, type: !24)
 !57 = !DILocation(line: 26, scope: !32)
 !58 = !DILocation(line: 27, scope: !32)
 !59 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/tu-member-pointer.ll b/test/DebugInfo/Generic/tu-member-pointer.ll
similarity index 84%
rename from test/DebugInfo/tu-member-pointer.ll
rename to test/DebugInfo/Generic/tu-member-pointer.ll
index a46c41313cf5..8b1eb3bb6d14 100644
--- a/test/DebugInfo/tu-member-pointer.ll
+++ b/test/DebugInfo/Generic/tu-member-pointer.ll
@@ -16,7 +16,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!10, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !5, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !5, imports: !2)
 !1 = !DIFile(filename: "foo.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/two-cus-from-same-file.ll b/test/DebugInfo/Generic/two-cus-from-same-file.ll
similarity index 67%
rename from test/DebugInfo/two-cus-from-same-file.ll
rename to test/DebugInfo/Generic/two-cus-from-same-file.ll
index 9dbd64a3fb92..65d376c814d9 100644
--- a/test/DebugInfo/two-cus-from-same-file.ll
+++ b/test/DebugInfo/Generic/two-cus-from-same-file.ll
@@ -13,7 +13,7 @@
 @str = private unnamed_addr constant [4 x i8] c"FOO\00"
 @str1 = private unnamed_addr constant [6 x i8] c"Main!\00"
 
-define void @foo() nounwind {
+define void @foo() nounwind !dbg !5 {
 entry:
   %puts = tail call i32 @puts(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @str, i32 0, i32 0)), !dbg !23
   ret void, !dbg !25
@@ -21,7 +21,7 @@ entry:
 
 declare i32 @puts(i8* nocapture) nounwind
 
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind !dbg !12 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !21, metadata !DIExpression()), !dbg !26
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !22, metadata !DIExpression()), !dbg !27
@@ -35,16 +35,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0, !9}
 !llvm.module.flags = !{!33}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports: !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !32, scope: !6, type: !7, function: void ()* @foo, variables: !1)
+!5 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !32, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "foo.c", directory: "/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !10, globals: !1, imports: !1)
+!9 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156513)", isOptimized: true, emissionKind: 1, file: !32, enums: !1, retainedTypes: !1, subprograms: !10, globals: !1, imports: !1)
 !10 = !{!12}
-!12 = !DISubprogram(name: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !32, scope: !6, type: !13, function: i32 (i32, i8**)* @main, variables: !19)
+!12 = distinct !DISubprogram(name: "main", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !32, scope: !6, type: !13, variables: !19)
 !13 = !DISubroutineType(types: !14)
 !14 = !{!15, !15, !16}
 !15 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -52,8 +52,8 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !18)
 !18 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !19 = !{!21, !22}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 11, arg: 1, scope: !12, file: !6, type: !15)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 11, arg: 2, scope: !12, file: !6, type: !16)
+!21 = !DILocalVariable(name: "argc", line: 11, arg: 1, scope: !12, file: !6, type: !15)
+!22 = !DILocalVariable(name: "argv", line: 11, arg: 2, scope: !12, file: !6, type: !16)
 !23 = !DILocation(line: 6, column: 3, scope: !24)
 !24 = distinct !DILexicalBlock(line: 5, column: 16, file: !32, scope: !5)
 !25 = !DILocation(line: 7, column: 1, scope: !24)
diff --git a/test/DebugInfo/typedef.ll b/test/DebugInfo/Generic/typedef.ll
similarity index 82%
rename from test/DebugInfo/typedef.ll
rename to test/DebugInfo/Generic/typedef.ll
index 591995e9b256..3cf4dffe9372 100644
--- a/test/DebugInfo/typedef.ll
+++ b/test/DebugInfo/Generic/typedef.ll
@@ -18,7 +18,7 @@
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "typedef.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/unconditional-branch.ll b/test/DebugInfo/Generic/unconditional-branch.ll
similarity index 78%
rename from test/DebugInfo/unconditional-branch.ll
rename to test/DebugInfo/Generic/unconditional-branch.ll
index 220a0e3dbe30..9325e1b27ada 100644
--- a/test/DebugInfo/unconditional-branch.ll
+++ b/test/DebugInfo/Generic/unconditional-branch.ll
@@ -18,7 +18,7 @@
 ;}
 
 ; Function Attrs: nounwind
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
 entry:
   %i.addr = alloca i32, align 4
   store i32 %i, i32* %i.addr, align 4
@@ -45,11 +45,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204712)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204712)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "D:\5Cwork\5CEPRs\5C396363")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "D:CworkCEPRsC396363")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
@@ -57,7 +57,7 @@ attributes #1 = { nounwind readnone }
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5.0 (204712)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DILocation(line: 1, scope: !4)
 !14 = !DILocation(line: 2, scope: !4)
 !15 = !DILocation(line: 4, scope: !16)
diff --git a/test/DebugInfo/varargs.ll b/test/DebugInfo/Generic/varargs.ll
similarity index 80%
rename from test/DebugInfo/varargs.ll
rename to test/DebugInfo/Generic/varargs.ll
index 93dbfa1e0ace..8567bf715145 100644
--- a/test/DebugInfo/varargs.ll
+++ b/test/DebugInfo/Generic/varargs.ll
@@ -50,7 +50,7 @@
 %struct.A = type { i8 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1biz(i32 %c, ...) #0 {
+define void @_Z1biz(i32 %c, ...) #0 !dbg !14 {
   %1 = alloca i32, align 4
   %a = alloca %struct.A, align 1
   %fptr = alloca void (i32, ...)*, align 8
@@ -72,7 +72,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !2, imports: !2)
 !1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
 !2 = !{}
 !3 = !{!4}
@@ -84,18 +84,18 @@ attributes #1 = { nounwind readnone }
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !13 = !{!14}
-!14 = !DISubprogram(name: "b", linkageName: "_Z1biz", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !15, type: !16, function: void (i32, ...)* @_Z1biz, variables: !2)
+!14 = distinct !DISubprogram(name: "b", linkageName: "_Z1biz", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !15, type: !16, variables: !2)
 !15 = !DIFile(filename: "llvm/tools/clang/test/CodeGenCXX/debug-info-varargs.cpp", directory: "radar/13690847")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null, !10, null}
 !18 = !{i32 2, !"Dwarf Version", i32 2}
 !19 = !{i32 1, !"Debug Info Version", i32 3}
 !20 = !{!"clang version 3.5 "}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 13, arg: 1, scope: !14, file: !15, type: !10)
+!21 = !DILocalVariable(name: "c", line: 13, arg: 1, scope: !14, file: !15, type: !10)
 !22 = !DILocation(line: 13, scope: !14)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 16, scope: !14, file: !15, type: !4)
+!23 = !DILocalVariable(name: "a", line: 16, scope: !14, file: !15, type: !4)
 !24 = !DILocation(line: 16, scope: !14)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "fptr", line: 18, scope: !14, file: !15, type: !26)
+!25 = !DILocalVariable(name: "fptr", line: 18, scope: !14, file: !15, type: !26)
 !26 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
 !27 = !DILocation(line: 18, scope: !14)
 !28 = !DILocation(line: 22, scope: !14)
diff --git a/test/DebugInfo/version.ll b/test/DebugInfo/Generic/version.ll
similarity index 69%
rename from test/DebugInfo/version.ll
rename to test/DebugInfo/Generic/version.ll
index f18a3e2871e1..936e08872233 100644
--- a/test/DebugInfo/version.ll
+++ b/test/DebugInfo/Generic/version.ll
@@ -6,7 +6,7 @@
 ; Make sure we are generating DWARF version 3 when module flag says so.
 ; CHECK: Compile Unit: length = {{.*}} version = 0x0003
 
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -18,11 +18,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185475)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185475)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "CodeGen/dwarf-version.c", directory: "test")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-dwp.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..da059f73b9ffecb4eeadae50000cfc1624e00486
GIT binary patch
literal 2000
zcmb7E&ubG=5T3W2w84O+u4pSN3sp!CUE8$U;*T^cN{b4DG*B;LH`%n&xEqp9v?nnj
z=*5FKPagaW{0Btzsy9y_`xhu4E8<M{z3pa5OdXgvZ@%xFw{PdY-KRHh-_(QvWdaJY
zpD7d|liZ&d`_m$%;Sl6-p6KT(ffJFs`)#*0`B~SrSww}V?ScNg^8MuQ$Ic2^O#J$X
zw(%+P^N;qT^kx3TFYS~L6I4~w6GEG&RmGGz+bbAJy;pdDZ4y9R0-UZJt1}nVm(rxs
z4Y)j0H*ptNMdrAH+e{jyNvBCMYzbAMTQK#kaAcx5?ez*+L3&zw^is!gISn$jsMtF!
zz_u#QCT0MfaUzD;W%MiPtCXN!M_)(3gMJU)M3+rey*jO+1SEBV2Nct&ZpAfth5{-L
z+uJg`ZmV7QJ#*1ovF6Qe(CT;(&HVYrh2;x5Guvo%ADxR~`J8+W8*07};l>@i1?Bq-
zP8S^92bqRWTck)0;HHj}<D+tQPe$<v#c4moY}g%+>xkFU>7D4#ahpScj_CwCy)V!6
zh~1L>QH<mIgOdL;b;hHL)IAx+kHdN2c)y7MSAWhRAMekwJl-F$s;<-7D%+0Ja=TXb
zv9FLu-E#+WzY<T|L96W7YHc@Y$KkqH^W%B2({u*}SQqCjopRl)x=++ogD7urqcpIc
zaUDAtPkrRQq{_Fl{FRI<aIJv^$4EOT!6j8FBiGI%&M{Upka8{EYmQNYEkQcxxN$B0
z);LD`A`NKeFhnEQZX&}m5<i#VT2vWMxb^|U93$md32uaC(a5zs$Z(9rS^2@3j7&u<
kg_D;Un50jv(Z_@~uKo>bq_nSB!#4e>(B6e18u_#S0@jhKP5=M^

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o b/test/DebugInfo/Inputs/dwarfdump-macho-relocs.macho.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..a5f8476df5e759c0c01891bdff22b5756cd3a5ed
GIT binary patch
literal 2364
zcmb7E&1(}u6rag9v1w}DS`U69izrm3HM9k*g0_A}^q^L&2W8x(8=IPJ!e(1P5EKN_
zThAUndGsh=6+GI%z?<Mf!J~*;-*4y5Hr+<858k{tZ+`QeHy>}_fB*7poe<Iz)R1<O
zI%zbdBwI*dAovHVU?Ryl=8%{^aSCW2K@z1x?@p+F-M@75>ZRqr@wT!9J}mp9Tjxcc
zhXw<yRH^39EJu`D$>;D;j_nQRi)SSEq|~%mn!Bl5VGu!-I&o#<;;9(Ek3Em=E#7Yo
z5F*v)Q+}GS)+k5eB;P_inEb^x^0}o_;LT(IEAumaqs+TjzN%koU`mqjE%W$nyE58&
zN%z4=*Kxmr>)-TRtNDiUxJU}xh=BPP|H$Vyn>A1QZXI;1@+&dlRpxuneB6ae<fr)>
zrP=60lKXwle7l(7`x^P8{F-h#$@d|_w*$qnQaa`r-fec=Z;H{Und8_$`O1wt<rS(q
z|8~`%R_fe|_T|qr-#7;sh!3UwmF|Ow68U*EsuIv`=n2VsTVS*)$wlU4dO>w)hN4hh
zA5}kMOrGC2<}>eajPi;k+c`hnn+-s&CxT)6W>TB+1+u*uCOuEuF2M3~?cp`~my1t7
z79V}K-<|vc{3kTUM{n)xZ{P`m@mG7vx&EeHquOxhy`WWX_|C}C*igY42!lEQmNPs$
zI()FO#~G;A=Ii^HV+RU*#Pn@9s7%k*n}c<?JkuBqJo-~wL!s9S#s09~6qQCp)ZMBt
zs(vVBpWL1APg{9Avn}5zFVizl4T(OvQI7Xm`9rzGxikGo=^(6K!pa}X<!~Oeq<a6|
z(W~th2PguW)Fy6I4!h<%lv5}`nXRCW&(M70a_So5{n4o9lArDp4jG7{&^;l{B-1yx
zI5|;7TIQB;ix9-5&}9mL_QA9JPg_rNW8@b}qovs=Bcepf;xZtH7jq#=-X)U%#ei#5
z#HOnW`B@@OKPJwZe!quLa`>TLQebf~=EotI#EDw{PRRwRqws018aJ!Udl!YgV@tbt
zUH1Z^u*!DQ=1=TvX-Vfpfr-5RICXrzK1YqiM=`*-_*N1){bq&<DdkevdTJ$dA(el9
zv4jyK8^0r~Tc)!;BlNq`^lbcqtc@~l_nb{<Gdoim4C&eyV(@}qBy)!C2w9qz!E1s=
t%@aI>zW^5B2I6EQ>ySm<^ryIglq_&GCN_}ckc9xa0YM`Q<))%~`whgi@0b7p

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h b/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h
new file mode 100644
index 000000000000..66e6b00a2bbf
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro-cmd.h
@@ -0,0 +1 @@
+#define M4 Value4
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.cc b/test/DebugInfo/Inputs/dwarfdump-macro.cc
new file mode 100644
index 000000000000..5abcb255c6c7
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro.cc
@@ -0,0 +1,11 @@
+#define M1 Value1
+#include "dwarfdump-macro.h"
+#define M2(x, y)   ((x)+(y)* Value2)
+
+// Built with GCC
+// $ mkdir -p /tmp/dbginfo
+// $ cp dwarfdump-macro.cc /tmp/dbginfo
+// $ cp dwarfdump-macro.h /tmp/dbginfo
+// $ cp dwarfdump-macro-cmd.h /tmp/dbginfo
+// $ cd /tmp/dbginfo
+// $ g++ -c -g3 -O0 -DM3=Value3 -include dwarfdump-macro-cmd.h dwarfdump-macro.cc -o <output>
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.h b/test/DebugInfo/Inputs/dwarfdump-macro.h
new file mode 100644
index 000000000000..98f0206cb43f
--- /dev/null
+++ b/test/DebugInfo/Inputs/dwarfdump-macro.h
@@ -0,0 +1,5 @@
+
+
+
+#undef M1
+#define M1 NewValue1
diff --git a/test/DebugInfo/Inputs/dwarfdump-macro.o b/test/DebugInfo/Inputs/dwarfdump-macro.o
new file mode 100644
index 0000000000000000000000000000000000000000..5f1cb5e2707fff49506985eed40b65d91767e739
GIT binary patch
literal 5616
zcmbtYTW{mW6{ajFO`F8YHf@n0FG_%-avd=wXLt*W#I#6BM5s$e`m%i?WLxVs)|R)n
z+<5bn#{l^){So~=ed}9*JhbP`O+!i>7wrJz%zWQDXXczUGaTx>B<pQ88ip(l<4a@J
zlc-??@2}eXthsNzXS`!Ho*2L1`QYv5&c{1X8ozG5|7Q11U_0<@GO4$iv2E7=nMVoS
zmhlvxjmA$q@4}PK36vYAVVr+6yM1<kfBmv^Jv+O-G0)D>R1NK%U7wrJaq=|+Kr9WT
zEb}7nmSxLkowJu$_jmXceMSz(d6C3QrawAF1<DjyXPA!0(<(HHitZpwj*<*;UyAmt
zLeoJxO|m4)6Tk_dP5PIlES^pfxw=_gwC)#omluoqdFyhqWD=8N8mGNpMM!eT-9a>!
zi~`e9A+a@Uzzw4lNwX29-r)vrO&<reJ*j{9{N{GKK4laljucw6(LXsUkCW(YaG7Tz
zPA1c&8$r+ofD+T9NJTd}=_ZpR9glL2j}FrED4FH}w`|LDcsN-=&_l58@I2&$X;E^B
zIKIb0j1k8ioBA{$9_2(f9`ywiIu0SeV-YWKsqedi<!5XZCi93Om*ffBI1roZL0Z5}
z17=l(p9{Zs>|R!s$x)P*!=xCDag|z3J5IVaoJR!Zr%{|@L{`W!0{`O_*Qdi5Hp+3p
zWA?BQ>%2@(CLq-A*p9nfA#6)=#MdZ@J=d`v?iaHJs8S$R9JYc<EhlgY_2+ve=v9V+
zJes!e(vT8&-qG1)lBbXam>o!jjukqgHShQ<^*DiTI%Hm;47F@045&BnP<JIXi1Jbh
zf{+>-;dInXM``gCN2m~gK1`FM97Sww#>5AZkX1}wzPS<QF}2;5m>yKiv3cm20bo>+
z+8ttvuyW%ntd_`_gp~@hcwi-gPWz$ng~TQ_@El^h7+_rrMuZh$I*x1kBya<$9LuH<
zXiaJrXq2=l^(-2K2AH+y9h<Kg9JCT}YKGMET#vc|wH@lYXjPl~hQOnZje(cMdf+qe
ztXj<;U(E~_q31#2f_m&d5{Td{PKC1Fp%68m8Zm^5Kj>+L;C5ANpv3nW?8bBmn^bHu
zzI>D+OpG0bKCx-&64!^!_?~My^F1^m&KTwx%MUS$=hzT#N^Bd}IdLt|!DU<(UZu%4
zt$;ePwtR;Mt_v9>u=pWWp=P_1EMh{Q>=0&S2Y~|zqGvfS!695*)7g+S=G4ZdDP}!u
z5FU6xXd8x~y#m&f6jHH0gn~>$SXY3MCY{iY`No#^@z4v+kd-9#Dy$Z+q*JBx!vj1y
z@Q`H*uuKa}Z3K_%9>{HwbP?BFAc3QNU@FoF*l+w$P%6v`5|B973HCCRUT|OFnZPQn
z$W{EY-3s>_hnQEwv&yh=bnCtmzTP3KjL@9(Y`XG}Yf1NjQvOLb<{%Ix=8H;N-0Y(A
zRW%aOrB&UE+oWdrF~3y+R0DFX&2O6YQ@9IGOSqr(*ejGJ<}e2+iw@wjS0%)K1XkUr
z)TE~0g2NQ<4>oDFr*Nek%$6;NcgXIi%o4j!!UNtlSvtZ?AUi6uiATjd2b&TuN8_4c
z$=*;2B&!&d<57n1Dt;g6ce`+w9wlY|bkr@o<KZNlCS^2=%ltT+l(sPch?w)=iOI=6
zVlAj!i;tse&ndgTw2bnhU~{+<k9*)KUXA$56A-&{$?KAiZ6fl)818-Y(j%mM**ID=
z8;=hUGK@WcQ|N{jCAPQVuqx3`MMK=i>$5&aPu2!i*ZR7Z$r$8eeM#f9>vOdjSQ{Ar
zK6wbdQc=94xEC_kF9L-PhwNI+7GBQ2LAV`gJ4y%%wwmN9jIAISi~I8Ga&iBllvq#9
zN@sNZesTFAM6fAzH59%=l|eWd*!%M!jmsV$#TY+6+k>><Px7LSva~-MCTx}P?BG7i
z$A{By!bFFpbu_!WpA+~DZ8Qz|!W~kMP-7c9Ht*O*bE~<vJp}7-05+O`!?bLWX%3zC
z!)LE{TkZD4?q2)V?jN}d9C_OYjMnPz?ePu$GaG&FQxv{jKVM$I{QUg%;&Soq#xP*l
ze9>xyz1_!+8Rp%q>*eee>g6r3pUc*A{;)L6^VxD{n5TDl(43#%UzD@c)7$yq4D)t=
zH4_jUSF|BVb14d+);a}-4fE{g`g#uEqUPdeIX7XJoxA1i>;?Ot{{J2yvoCVPz<&$$
zvaffvcKsiOPk_U}7WlsmE5ZF$v(l>T{{h&y;P=1ke+IfZe!c7q;CD5mF8dYmbqrOG
zgq|&GS=D8~0HW>_%38nW8vo#x@h;SFzz==J@*%H`uc5BnMCyCNvwv7b8>_nP@dga#
z{)rDL*$%Jev#$RP2*fNYAZqVD-bib>)R*;Nf#0P6ozTaBp!6d3Wc@9WvaQSgUFho=
zw(GJ_0IRPZl#}{DU`H!;ee7>(ylnpX0dKX00*3J&_|^0GOTadb|DnkLZ@3xttdFmV
zS_9>_jeq^S2D2_4rJG=w$0kp3ew${9_@<SMIIh(7(lmat4g%6tQK`~R1EaHEn#SAf
zARtZSSL=BFPsB66BjkC78v1N8M_3ToN5^%7MdG`>Ylt){{7(Y^NYof<J*uC)2AB5G
Pz8)pH{t=gJ2TlA3(*_r_

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/dwarfdump-test.cc b/test/DebugInfo/Inputs/dwarfdump-test.cc
index 14295d3cffa0..815c33dd3ac3 100644
--- a/test/DebugInfo/Inputs/dwarfdump-test.cc
+++ b/test/DebugInfo/Inputs/dwarfdump-test.cc
@@ -27,3 +27,5 @@ int main() {
 // $ cp <output> output2.dwz
 // $ dwz -m output.dwz -r output1.dwz output2.dwz
 // $ rm output2.dwz
+
+// The mach-o version was generated using clang-3.6.2.
diff --git a/test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o b/test/DebugInfo/Inputs/dwarfdump-test.macho-i386.o
new file mode 100644
index 0000000000000000000000000000000000000000..b53c8a6959e30f8fc8502cfe96dd32392a9b6ffe
GIT binary patch
literal 3620
zcmb7HZERCj7(Tb{EqDD|ai~NTmRN8zp<@FV0*mWp70^xRJ_1hh(yiTA(spI-VBnM?
zB(4F%Fwq!(fF%13#26#`Ln0(I{y-A_WAukVAi?;Ng@`VI*5^6*cDuAS;>q6op7-lH
z?|IL;@9nPK{`K!gLI^uZ0XaZJd{i6qEDdGGQ3M@^A35VmKGRRvv$3)4LXfU^hqrex
z$eir~F`i5LJ;wSFCLqe8>(N->=%5}?4GdGE>#f^bI-fO+crreNgl`i7qXrl*V`?)P
zV+@Q)U*AX!0mT?UA^s+ulg>iKJTJjW#8WX^i!r{3=X=z%02pJ)!wfFL$Ye*PPcg=B
z)c67{w@eJ_WIReR62tvvF`hK*a8|QK{|m-QBsCbzl<tKXHHax-Okyu4oFxyQZU1m2
zp{IDn#b>({7(B3R2xLlM0)zL;eb3oO(&<D@PeqbUqok%^BhZ1Mt_m12t^&hvOf7jB
z7{h(LWI>8+`i&|?05RLlOJRh|X9<SPBOU24#`x33c>O*wvis6y_p%IkYXHlhHZeN9
zMqQw?<k3Hz#QIr%G_^OLit2h>t0iA?7#L9~G`A6y1_n~&uwcZ7^nnpn)G!pE%MD;m
z!;~;F3@@AuUiZSB^^$dV8_5!UDd>H3y>G!gAR|U{u=&A!%$(+R_dDsyT|e~8(j#Hd
zk#_A^&up&!ZZ14`Bz!lwB#>(jIBxuq&*zVYXOFbc<s5-r8#G@*bLjHY9hWLspi)Ns
zCrtMO+tp0?E|;Eh1gM$;1hv4-wf}zOcp-EcH_Rv>?;1~KH5o>;!Q|7{2If!{`!?|1
zAjM{TTsWvzpjOl|&YZM}MX%w(mqJmc=e<^;^}?h&DprBX0&k>i&we~5h)V{!Z70Uh
z(jY}hW9R23kL^VILrQL92rpl@97yT;4&p&j6UYaIkHC}ImO=mt=a4B@PQm@04AEfj
zEaxuQlRFUp5FFTQm7aI`tduEw&jWxjsnYWjKu*H)U+c<E3uUpg(pRP0eSYUM-(sZ)
zchY84>^`@9gL@-xmE9}szO7Xh)}um>AZ!$xJk6d4*i}_*ty+g};N%1a4=Qhme1^}~
zs)Cmm0Ls&DHv%durJpN^l^;+-E{k_FVBD&(xeDPzc-Gw`dJ9p}D#UHZdOpM7$f0t<
zOq)L_j5P{)TtK`7vdN~ZD0h5bfg6L0u_G!Xx;HX15FJgXS7&3HY_Q*G>_U~7(q5sc
zewX&V>T@=!e&==71#bUT70xrN%cTvfcBeLFQvrwo-h&6#$6VTT>T>5b_<W{%7^^zB
zLx`%6(DE<<mIq}>e*#~C>OHZMOnf+1y*AhwY^biuMg}u=o#D1{OIKK5+t^Sy5Kl!C
zwPn(*%O=xx(Y`_XZEhV+CiiVhL^2r>(M22|1-<u$`ZWvcwUKBv92a<^5$e751MzsV
z%cgobBqQ-uiJ^g!qEGaT>`<KF3L*H`aKpUCA<TI_3l|d8GQ-+m=9`P9n~dL??<Nxr
zm;Rd3j~4ueJv?{lvbN*WRCznPlP2w3x>~mIkooB(@g8s(2k#VUKzYhMXY4`B)Ukcy
zjSw$~8rRhSb}G2*z{&BEo2P>Ak8R@0tCQ}{-8X-J;-Omm1bn7IAAtC-UIbkQ{Ro-`
zu?umnTOf{uAj)4*z>se;cz{X_zQ3GqFlE_{Ain|PkR1)YfX9YK*MhwbbMA%s1&l%s
zOvth|_&+|xCoKVSu%RocmK`4*hx{NJIjxzPbhAbw*t<Zz($DDJ0n9O&F4JNQCFH{3
z&B7?;EN@wDk&gXyMUgH9xv@yc8(HjENM70BDU@+d@q1LqBv#`$_EFH>Ts|*zXz^)>
zZO?dlIcQrCYWgdmm$?+#x?$^pO|DgBOT%^zm@>B_+fm|{iAzF{l+*@;d#k}wC2up7
z{7JXb<h9`VbF4J^ZTQjthskG5e#zu-fz$s@lMk5sr%ajqXFT^y&RWsmZ*q^RhfUg9
z8`{AnuLj4Z@Q&kW&gaGtku&^6jX^$Vk+YCbTV&K|`nW|-Kt5oRhaeAI<S1m_BJYI!
zqD97sa5}}eSco(H?T53Qe#Ihpz`*^%Pv%`?%G@vYU{cS21uC&Yj!KetQSk2ZesI1M
i^Ht`Yca5&}4}lBa=%s~bTfQOdMe0rLwIxb+=6?WJju`X+

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/fat-test.o b/test/DebugInfo/Inputs/fat-test.o
new file mode 100644
index 0000000000000000000000000000000000000000..8159cc749772abe9914b1a0830d743002955025d
GIT binary patch
literal 5000
zcmdT|&1(}u6rXKA8cht93RV0dLPe!E1{;m2SSo&0ZB?|W;9*VMZ5xS6NY+w8Bzh7Q
zJb3ZoK}9_2AE02*(u3BM2SsX6UIfots=v20lP;6pWTS%d!OMK?yx+WeGxL6(&Mdy1
zW{mkX##*UlEcgX_pVwvIiZc2_wsBPNN8bZ6j=lZ<brAp>eE=5oL(B9NgF@CoL)3PW
zcVq;4D8|S(IYk<wj}ToqjVY7Yd3)&4`Jtt@T9bIT{<5tOyisHL%DA4%jpeCd*AJfS
zJAK$b#H#NTJrd6h@^zxS#XeZ#rH6-$#$+{~#Mvf{K32!X5igs`8DygRHz9DkL>UT*
z%2MHoS2By%2o>+1usbQr9*=n0{D>poyugvgO^<lRbZ*=zRj;>kymX<EHS}EiGQm~F
zPvXgasqY`h%MV|&7Oci^gE+^XV!_jO*K6~ykRDO-4k&o@9`Vep1;_P{3A`i1zaY-B
zBFcH;{^8PDkg9*r75@h7;89%KXu|MvYg(FYHT0UePj^t#_b~Jn)JepYPM=)+cH;|Q
zSjaMuFnI%0^raU+NvNhpmLUgn@f}AQ@H1_twkg^k@I~8#Yoi#PWWMOp_TI>z@ZN9>
zmf`C}$Nq4beBKOgVH@$QX+NSX@T<{$P>qeH&2G~unQ?wq`siTB1_sU!#wLwoDU;8|
zQt?DQ5!*77#p&-(CD4fPOvQI>wVQ4~nk!r}OKfWQE`3*WLZ3_*Ste(){xg03Vk#>G
z%Z$nU__p}X8-uiNS}yf(fRfm4sjb@ngfu#a@K+HSghHyZAj$yqTYJk`qI@e%A*UeI
zSgMO#Q){YLz>qr_Ep{-<SIAl_sZ_Ve1_L3IpSUND^V2L%0+IP&wwslo3gSM&W#v9|
zzWd+#>4dc#jQuG;eHJ*fxKTI1^7y&&Q{#BH{0}G8{3`JzKh^h-<GJzEdU1{=KfS8E
zUYmao{M4u5efEgw#!rU8ll&CmiA!z|mCAYM{<-nfGsVAwI(TmUG$roSEtLP1pQ1K?
zI)-R8IX^W-PwQTkt~VP!QH<#fHAqkOxXFqOVBhd}lVC{BOLIh0F+S}C)g5F8J}uxI
zmA}hDWUokBw&gpf*1L%Qrji)1aqlTikjnj(7;aoc_7taNg}BMwk}3?h_s=MXcR??D
zz;Ju>+=cxB^tJ~Kw|CA5*ps~XfZ@g+-Y{(W81t!oK^Vjp(vMl}iwLGIrnNbu0~^^1
zjP@G2T-R-WIPk(=fsyin;l>3zFo*{(2#mQJYjMl}PlX?kYxqHHb$XM0gAL5t0?G(V
z`uqNGzDX#h@J;d4G?+tV4OV%RB+#vCZ2{}1`tTo@=FRt+9y~}Id5m;?+SyABZl=cw
tMVR)2;!ei`;_Wdab}q7ou_vw8Zn8&o7XkF-o@mo~rYe-bRrE^C*e@I33YGu>

literal 0
HcmV?d00001

diff --git a/test/DebugInfo/Inputs/gmlt.ll b/test/DebugInfo/Inputs/gmlt.ll
index 1436abedfab4..b03a80b4deae 100644
--- a/test/DebugInfo/Inputs/gmlt.ll
+++ b/test/DebugInfo/Inputs/gmlt.ll
@@ -98,26 +98,26 @@
 ; CHECK: .apple{{.*}} contents:
 
 ; Function Attrs: nounwind uwtable
-define void @_Z2f1v() #0 {
+define void @_Z2f1v() #0 !dbg !4 {
 entry:
   ret void, !dbg !13
 }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z2f2v() #0 section "__TEXT,__bar" {
+define void @_Z2f2v() #0 section "__TEXT,__bar" !dbg !7 {
 entry:
   ret void, !dbg !14
 }
 
 ; Function Attrs: alwaysinline nounwind uwtable
-define void @_Z2f3v() #1 {
+define void @_Z2f3v() #1 !dbg !8 {
 entry:
   call void @_Z2f1v(), !dbg !15
   ret void, !dbg !16
 }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z2f4v() #0 {
+define void @_Z2f4v() #0 !dbg !9 {
 entry:
   call void @_Z2f1v() #2, !dbg !17
   ret void, !dbg !19
@@ -131,16 +131,16 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !7, !8, !9}
-!4 = !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z2f1v, variables: !2)
+!4 = distinct !DISubprogram(name: "f1", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "gmlt.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
-!8 = !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z2f3v, variables: !2)
-!9 = !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f4v, variables: !2)
+!7 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f4", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 2, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.6.0 "}
diff --git a/test/DebugInfo/Inputs/line.ll b/test/DebugInfo/Inputs/line.ll
index d56c7e01a17f..5bdd3b9eb346 100644
--- a/test/DebugInfo/Inputs/line.ll
+++ b/test/DebugInfo/Inputs/line.ll
@@ -12,7 +12,7 @@
 ; CHECK: cmp
 
 ; Function Attrs: nounwind uwtable
-define i32 @_Z1fii(i32 %a, i32 %b) #0 {
+define i32 @_Z1fii(i32 %a, i32 %b) #0 !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   %b.addr = alloca i32, align 4
@@ -39,11 +39,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227472) (llvm/trunk 227476)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227472) (llvm/trunk 227476)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "line.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32, i32)* @_Z1fii, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "line.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/MIR/X86/lit.local.cfg b/test/DebugInfo/MIR/X86/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir b/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir
new file mode 100644
index 000000000000..84be910aaf74
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/live-debug-values-3preds.mir
@@ -0,0 +1,299 @@
+# RUN: llc -run-pass=livedebugvalues -march=x86-64 -o /dev/null %s | FileCheck %s
+
+# Test the extension of debug ranges from 3 predecessors.
+# Generated from the source file LiveDebugValues-3preds.c:
+# #include <stdio.h>
+# int add(int x, int y, int z, int a) {
+#  int i;
+#  for (i = 0; i < x * y; i++) {
+#    if (i < x) {
+#      a = a * x;
+#      break;
+#    }
+#    if (i < y) {
+#      a = a * y;
+#      break;
+#    }
+#    if (i < z) {
+#      a = a * z;
+#      break;
+#    }
+#  }
+#  return a;
+# }
+# with clang -g -O1 -c -emit-llvm LiveDebugValues-3preds.c -S -o live-debug-values-3preds.ll
+# then llc -stop-after stackmap-liveness live-debug-values-3preds.ll -o /dev/null > live-debug-values-3preds.mir
+
+# DBG_VALUE for variables "x", "y" and "z" are extended into BB#9 from its
+# predecessors BB#0, BB#2 and BB#8.
+# CHECK:      bb.9.for.end:
+# CHECK:      DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+# CHECK-NEXT: DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+# CHECK-NEXT: DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+
+
+--- |
+  ; ModuleID = 'live-debug-values-3preds.ll'
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+  
+  ; Function Attrs: norecurse nounwind readnone uwtable
+  define i32 @add(i32 %x, i32 %y, i32 %z, i32 %a) #0 !dbg !4 {
+  entry:
+    tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !17), !dbg !18
+    tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !10, metadata !17), !dbg !19
+    tail call void @llvm.dbg.value(metadata i32 %z, i64 0, metadata !11, metadata !17), !dbg !21
+    tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !12, metadata !17), !dbg !23
+    tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !17), !dbg !25
+    %mul = mul nsw i32 %y, %x, !dbg !26
+    %cmp.24 = icmp sgt i32 %mul, 0, !dbg !30
+    br i1 %cmp.24, label %for.body.preheader, label %for.end, !dbg !31
+  
+  for.body.preheader:                               ; preds = %entry
+    br label %for.body, !dbg !32
+  
+  for.cond:                                         ; preds = %if.end.6
+    %cmp = icmp slt i32 %inc, %mul, !dbg !30
+    br i1 %cmp, label %for.body, label %for.end, !dbg !31
+  
+  for.body:                                         ; preds = %for.cond, %for.body.preheader
+    %i.025 = phi i32 [ %inc, %for.cond ], [ 0, %for.body.preheader ]
+    %0 = icmp sgt i32 %x, 0
+    br i1 %0, label %if.then, label %if.end, !dbg !35
+  
+  if.then:                                          ; preds = %for.body
+    %mul2 = mul nsw i32 %a, %x, !dbg !36
+    tail call void @llvm.dbg.value(metadata i32 %mul2, i64 0, metadata !12, metadata !17), !dbg !23
+    br label %for.end, !dbg !38
+  
+  if.end:                                           ; preds = %for.body
+    %1 = icmp sgt i32 %y, 0
+    br i1 %1, label %if.then.4, label %if.end.6, !dbg !39
+  
+  if.then.4:                                        ; preds = %if.end
+    %mul5 = mul nsw i32 %a, %y, !dbg !40
+    tail call void @llvm.dbg.value(metadata i32 %mul5, i64 0, metadata !12, metadata !17), !dbg !23
+    br label %for.end, !dbg !43
+  
+  if.end.6:                                         ; preds = %if.end
+    %2 = icmp sgt i32 %z, 0
+    %inc = add nuw nsw i32 %i.025, 1, !dbg !44
+    tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !13, metadata !17), !dbg !25
+    br i1 %2, label %if.then.8, label %for.cond, !dbg !45
+  
+  if.then.8:                                        ; preds = %if.end.6
+    %mul9 = mul nsw i32 %a, %z, !dbg !46
+    tail call void @llvm.dbg.value(metadata i32 %mul9, i64 0, metadata !12, metadata !17), !dbg !23
+    br label %for.end, !dbg !49
+  
+  for.end:                                          ; preds = %for.cond, %if.then.8, %if.then.4, %if.then, %entry
+    %a.addr.0 = phi i32 [ %mul2, %if.then ], [ %mul5, %if.then.4 ], [ %mul9, %if.then.8 ], [ %a, %entry ], [ %a, %for.cond ]
+    ret i32 %a.addr.0, !dbg !50
+  }
+  
+  ; Function Attrs: nounwind readnone
+  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+  
+  attributes #0 = { norecurse nounwind readnone uwtable }
+  attributes #1 = { nounwind readnone }
+  
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!14, !15}
+  !llvm.ident = !{!16}
+  
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+  !1 = !DIFile(filename: "LiveDebugValues-3preds.c", directory: "/home/vt/julia/test/tvvikram")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "add", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+  !5 = !DISubroutineType(types: !6)
+  !6 = !{!7, !7, !7, !7, !7}
+  !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !8 = !{!9, !10, !11, !12, !13}
+  !9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+  !10 = !DILocalVariable(name: "y", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+  !11 = !DILocalVariable(name: "z", arg: 3, scope: !4, file: !1, line: 1, type: !7)
+  !12 = !DILocalVariable(name: "a", arg: 4, scope: !4, file: !1, line: 1, type: !7)
+  !13 = !DILocalVariable(name: "i", scope: !4, file: !1, line: 2, type: !7)
+  !14 = !{i32 2, !"Dwarf Version", i32 4}
+  !15 = !{i32 2, !"Debug Info Version", i32 3}
+  !16 = !{!"clang version 3.8.0 (trunk 253049) "}
+  !17 = !DIExpression()
+  !18 = !DILocation(line: 1, column: 13, scope: !4)
+  !19 = !DILocation(line: 1, column: 20, scope: !20)
+  !20 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+  !21 = !DILocation(line: 1, column: 27, scope: !22)
+  !22 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 2)
+  !23 = !DILocation(line: 1, column: 34, scope: !24)
+  !24 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 3)
+  !25 = !DILocation(line: 2, column: 7, scope: !20)
+  !26 = !DILocation(line: 3, column: 21, scope: !27)
+  !27 = !DILexicalBlockFile(scope: !28, file: !1, discriminator: 1)
+  !28 = distinct !DILexicalBlock(scope: !29, file: !1, line: 3, column: 3)
+  !29 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+  !30 = !DILocation(line: 3, column: 17, scope: !27)
+  !31 = !DILocation(line: 3, column: 3, scope: !27)
+  !32 = !DILocation(line: 4, column: 11, scope: !33)
+  !33 = distinct !DILexicalBlock(scope: !34, file: !1, line: 4, column: 9)
+  !34 = distinct !DILexicalBlock(scope: !28, file: !1, line: 3, column: 31)
+  !35 = !DILocation(line: 4, column: 9, scope: !34)
+  !36 = !DILocation(line: 5, column: 13, scope: !37)
+  !37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 4, column: 16)
+  !38 = !DILocation(line: 6, column: 7, scope: !37)
+  !39 = !DILocation(line: 8, column: 9, scope: !34)
+  !40 = !DILocation(line: 9, column: 13, scope: !41)
+  !41 = distinct !DILexicalBlock(scope: !42, file: !1, line: 8, column: 16)
+  !42 = distinct !DILexicalBlock(scope: !34, file: !1, line: 8, column: 9)
+  !43 = !DILocation(line: 10, column: 7, scope: !41)
+  !44 = !DILocation(line: 3, column: 27, scope: !28)
+  !45 = !DILocation(line: 12, column: 9, scope: !34)
+  !46 = !DILocation(line: 13, column: 13, scope: !47)
+  !47 = distinct !DILexicalBlock(scope: !48, file: !1, line: 12, column: 16)
+  !48 = distinct !DILexicalBlock(scope: !34, file: !1, line: 12, column: 9)
+  !49 = !DILocation(line: 14, column: 7, scope: !47)
+  !50 = !DILocation(line: 17, column: 3, scope: !4)
+
+...
+---
+name:            add
+alignment:       4
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:         
+  - { reg: '%edi' }
+  - { reg: '%esi' }
+  - { reg: '%edx' }
+  - { reg: '%ecx' }
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    false
+  hasCalls:        false
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+body:             |
+  bb.0.entry:
+    successors: %bb.1.for.body.preheader(20), %bb.9.for.end(12)
+    liveins: %ecx, %edi, %edx, %esi
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %r8d = MOV32rr %esi, debug-location !26
+    %r8d = IMUL32rr killed %r8d, %edi, implicit-def dead %eflags, debug-location !26
+    TEST32rr %r8d, %r8d, implicit-def %eflags, debug-location !31
+    JLE_1 %bb.9.for.end, implicit %eflags
+  
+  bb.1.for.body.preheader:
+    successors: %bb.3.for.body(0)
+    liveins: %ecx, %edi, %edx, %esi, %r8d
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags
+  
+  bb.3.for.body (align 4):
+    successors: %bb.4.if.then(4), %bb.5.if.end(124)
+    liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    TEST32rr %edi, %edi, implicit-def %eflags, debug-location !35
+    JG_1 %bb.4.if.then, implicit %eflags
+  
+  bb.5.if.end:
+    successors: %bb.6.if.then.4(4), %bb.7.if.end.6(124)
+    liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    TEST32rr %esi, %esi, implicit-def %eflags, debug-location !39
+    JG_1 %bb.6.if.then.4, implicit %eflags
+  
+  bb.7.if.end.6:
+    successors: %bb.8.if.then.8(4), %bb.2.for.cond(124)
+    liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    TEST32rr %edx, %edx, implicit-def %eflags, debug-location !45
+    JG_1 %bb.8.if.then.8, implicit %eflags
+  
+  bb.2.for.cond:
+    successors: %bb.3.for.body(124), %bb.9.for.end(4)
+    liveins: %eax, %ecx, %edi, %edx, %esi, %r8d
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %eax = INC32r killed %eax, implicit-def dead %eflags, debug-location !44
+    DBG_VALUE debug-use %eax, debug-use _, !13, !17, debug-location !25
+    CMP32rr %eax, %r8d, implicit-def %eflags, debug-location !31
+    JL_1 %bb.3.for.body, implicit %eflags
+    JMP_1 %bb.9.for.end
+  
+  bb.4.if.then:
+    liveins: %ecx, %edi
+  
+    DBG_VALUE debug-use %edi, debug-use _, !9, !17, debug-location !18
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %ecx = IMUL32rr killed %ecx, killed %edi, implicit-def dead %eflags, debug-location !36
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %eax = MOV32rr killed %ecx, debug-location !50
+    RETQ %eax, debug-location !50
+  
+  bb.6.if.then.4:
+    liveins: %ecx, %esi
+  
+    DBG_VALUE debug-use %esi, debug-use _, !10, !17, debug-location !19
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %ecx = IMUL32rr killed %ecx, killed %esi, implicit-def dead %eflags, debug-location !40
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %eax = MOV32rr killed %ecx, debug-location !50
+    RETQ %eax, debug-location !50
+  
+  bb.8.if.then.8:
+    successors: %bb.9.for.end(0)
+    liveins: %ecx, %edx
+  
+    DBG_VALUE debug-use %edx, debug-use _, !11, !17, debug-location !21
+    DBG_VALUE debug-use %ecx, debug-use _, !12, !17, debug-location !23
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %ecx = IMUL32rr killed %ecx, killed %edx, implicit-def dead %eflags, debug-location !46
+  
+  bb.9.for.end:
+    liveins: %ecx
+  
+    DBG_VALUE 0, 0, !13, !17, debug-location !25
+    %eax = MOV32rr killed %ecx, debug-location !50
+    RETQ %eax, debug-location !50
+
+...
diff --git a/test/DebugInfo/MIR/X86/live-debug-values.mir b/test/DebugInfo/MIR/X86/live-debug-values.mir
new file mode 100644
index 000000000000..0af408a635f6
--- /dev/null
+++ b/test/DebugInfo/MIR/X86/live-debug-values.mir
@@ -0,0 +1,260 @@
+# RUN: llc -run-pass=livedebugvalues -march=x86-64 -o /dev/null %s | FileCheck %s
+
+# Test the extension of debug ranges from predecessors.
+# Generated from the source file LiveDebugValues.c:
+# #include <stdio.h>
+# int m;
+# extern int inc(int n); 
+# extern int change(int n); 
+# extern int modify(int n); 
+# int main(int argc, char **argv) {
+#   int n;
+#   if (argc != 2)
+#     n = 2;
+#   else
+#     n = atoi(argv[1]);
+#   n = change(n);
+#   if (n > 10) {
+#     m = modify(n);
+#     m = m + n;  // var `m' doesn't has a dbg.value
+#   }   
+#   else
+#     m = inc(n); // var `m' doesn't has a dbg.value
+#   printf("m(main): %d\n", m); 
+#   return 0;
+# }
+# with clang -g -O3 -c -emit-llvm LiveDebugValues.c -S -o live-debug-values.ll
+# then llc -stop-after stackmap-liveness live-debug-values.ll -o /dev/null > live-debug-values.mir
+# This case will also produce multiple locations but only the debug range
+# extension is tested here. This test case is tested with DWARF information under
+# llvm/test/DebugInfo/live-debug-values.ll and present here for testing under
+# MIR->MIR serialization.
+
+# DBG_VALUE for variable "n" is extended into BB#5 from its predecessors BB#3
+# and BB#4.
+# CHECK:      bb.5.if.end.7:
+# CHECK:      DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+# CHECK-NEXT: DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+
+
+--- |
+  ; ModuleID = 'live-debug-values.ll'
+  target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-unknown-linux-gnu"
+  
+  @m = common global i32 0, align 4
+  @.str = private unnamed_addr constant [13 x i8] c"m(main): %d\0A\00", align 1
+  
+  ; Function Attrs: nounwind uwtable
+  define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 !dbg !4 {
+  entry:
+    tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !12, metadata !20), !dbg !21
+    tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !13, metadata !20), !dbg !22
+    %cmp = icmp eq i32 %argc, 2, !dbg !24
+    br i1 %cmp, label %if.else, label %if.end, !dbg !26
+  
+  if.else:                                          ; preds = %entry
+    %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !27
+    %0 = load i8*, i8** %arrayidx, align 8, !dbg !27, !tbaa !28
+    %call = tail call i32 (i8*, ...) bitcast (i32 (...)* @atoi to i32 (i8*, ...)*)(i8* %0) #4, !dbg !32
+    tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !20), !dbg !33
+    br label %if.end
+  
+  if.end:                                           ; preds = %if.else, %entry
+    %n.0 = phi i32 [ %call, %if.else ], [ 2, %entry ]
+    %call1 = tail call i32 @change(i32 %n.0) #4, !dbg !34
+    tail call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !14, metadata !20), !dbg !33
+    %cmp2 = icmp sgt i32 %call1, 10, !dbg !35
+    br i1 %cmp2, label %if.then.3, label %if.else.5, !dbg !37
+  
+  if.then.3:                                        ; preds = %if.end
+    %call4 = tail call i32 @modify(i32 %call1) #4, !dbg !38
+    %add = add nsw i32 %call4, %call1, !dbg !40
+    br label %if.end.7, !dbg !41
+  
+  if.else.5:                                        ; preds = %if.end
+    %call6 = tail call i32 @inc(i32 %call1) #4, !dbg !42
+    br label %if.end.7
+  
+  if.end.7:                                         ; preds = %if.else.5, %if.then.3
+    %storemerge = phi i32 [ %call6, %if.else.5 ], [ %add, %if.then.3 ]
+    store i32 %storemerge, i32* @m, align 4, !dbg !43, !tbaa !44
+    %call8 = tail call i32 (i8*, ...) @printf(i8* nonnull getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %storemerge) #4, !dbg !46
+    ret i32 0, !dbg !47
+  }
+  
+  declare i32 @atoi(...) #1
+  
+  declare i32 @change(i32) #1
+  
+  declare i32 @modify(i32) #1
+  
+  declare i32 @inc(i32) #1
+  
+  ; Function Attrs: nounwind
+  declare i32 @printf(i8* nocapture readonly, ...) #2
+  
+  ; Function Attrs: nounwind readnone
+  declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+  
+  attributes #0 = { nounwind uwtable }
+  attributes #1 = { nounwind }
+  attributes #2 = { nounwind }
+  attributes #3 = { nounwind readnone }
+  attributes #4 = { nounwind }
+  
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!17, !18}
+  !llvm.ident = !{!19}
+  
+  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+  !1 = !DIFile(filename: "LiveDebugValues.c", directory: "/home/vt/julia/test/tvvikram")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+  !5 = !DISubroutineType(types: !6)
+  !6 = !{!7, !7, !8}
+  !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+  !8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+  !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+  !10 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+  !11 = !{!12, !13, !14}
+  !12 = !DILocalVariable(name: "argc", arg: 1, scope: !4, file: !1, line: 6, type: !7)
+  !13 = !DILocalVariable(name: "argv", arg: 2, scope: !4, file: !1, line: 6, type: !8)
+  !14 = !DILocalVariable(name: "n", scope: !4, file: !1, line: 7, type: !7)
+  !15 = !{!16}
+  !16 = !DIGlobalVariable(name: "m", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @m)
+  !17 = !{i32 2, !"Dwarf Version", i32 4}
+  !18 = !{i32 2, !"Debug Info Version", i32 3}
+  !19 = !{!"clang version 3.8.0 (trunk 253049)"}
+  !20 = !DIExpression()
+  !21 = !DILocation(line: 6, column: 14, scope: !4)
+  !22 = !DILocation(line: 6, column: 27, scope: !23)
+  !23 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+  !24 = !DILocation(line: 8, column: 12, scope: !25)
+  !25 = distinct !DILexicalBlock(scope: !4, file: !1, line: 8, column: 7)
+  !26 = !DILocation(line: 8, column: 7, scope: !4)
+  !27 = !DILocation(line: 11, column: 14, scope: !25)
+  !28 = !{!29, !29, i64 0}
+  !29 = !{!"any pointer", !30, i64 0}
+  !30 = !{!"omnipotent char", !31, i64 0}
+  !31 = !{!"Simple C/C++ TBAA"}
+  !32 = !DILocation(line: 11, column: 9, scope: !25)
+  !33 = !DILocation(line: 7, column: 7, scope: !23)
+  !34 = !DILocation(line: 12, column: 7, scope: !4)
+  !35 = !DILocation(line: 13, column: 9, scope: !36)
+  !36 = distinct !DILexicalBlock(scope: !4, file: !1, line: 13, column: 7)
+  !37 = !DILocation(line: 13, column: 7, scope: !4)
+  !38 = !DILocation(line: 14, column: 9, scope: !39)
+  !39 = distinct !DILexicalBlock(scope: !36, file: !1, line: 13, column: 15)
+  !40 = !DILocation(line: 15, column: 11, scope: !39)
+  !41 = !DILocation(line: 16, column: 3, scope: !39)
+  !42 = !DILocation(line: 18, column: 9, scope: !36)
+  !43 = !DILocation(line: 15, column: 7, scope: !39)
+  !44 = !{!45, !45, i64 0}
+  !45 = !{!"int", !30, i64 0}
+  !46 = !DILocation(line: 19, column: 3, scope: !4)
+  !47 = !DILocation(line: 20, column: 3, scope: !4)
+
+...
+---
+name:            main
+alignment:       4
+exposesReturnsTwice: false
+hasInlineAsm:    false
+isSSA:           false
+tracksRegLiveness: true
+tracksSubRegLiveness: false
+liveins:         
+  - { reg: '%edi' }
+  - { reg: '%rsi' }
+calleeSavedRegisters: [ '%bh', '%bl', '%bp', '%bpl', '%bx', '%ebp', '%ebx', 
+                        '%rbp', '%rbx', '%r12', '%r13', '%r14', '%r15', 
+                        '%r12b', '%r13b', '%r14b', '%r15b', '%r12d', '%r13d', 
+                        '%r14d', '%r15d', '%r12w', '%r13w', '%r14w', '%r15w' ]
+frameInfo:       
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       8
+  offsetAdjustment: 0
+  maxAlignment:    0
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+fixedStack:      
+  - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, callee-saved-register: '%rbx' }
+body:             |
+  bb.0.entry:
+    successors: %bb.1.if.else(16), %bb.2.if.end(16)
+    liveins: %edi, %rsi, %rbx
+  
+    frame-setup PUSH64r killed %rbx, implicit-def %rsp, implicit %rsp
+    CFI_INSTRUCTION .cfi_def_cfa_offset 16
+    CFI_INSTRUCTION .cfi_offset %rbx, -16
+    DBG_VALUE debug-use %edi, debug-use _, !12, !20, debug-location !21
+    DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+    %eax = MOV32rr %edi
+    DBG_VALUE debug-use %eax, debug-use _, !12, !20, debug-location !21
+    %edi = MOV32ri 2
+    CMP32ri8 killed %eax, 2, implicit-def %eflags, debug-location !26
+    JNE_1 %bb.2.if.end, implicit %eflags
+  
+  bb.1.if.else:
+    successors: %bb.2.if.end(0)
+    liveins: %rsi
+  
+    DBG_VALUE debug-use %rsi, debug-use _, !13, !20, debug-location !22
+    %rdi = MOV64rm killed %rsi, 1, _, 8, _, debug-location !27 :: (load 8 from %ir.arrayidx, !tbaa !28)
+    dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al, debug-location !32
+    CALL64pcrel32 @atoi, csr_64, implicit %rsp, implicit %rdi, implicit %al, implicit-def %rsp, implicit-def %eax, debug-location !32
+    %edi = MOV32rr %eax, debug-location !32
+    DBG_VALUE debug-use %edi, debug-use _, !14, !20, debug-location !33
+  
+  bb.2.if.end:
+    successors: %bb.3.if.then.3(16), %bb.4.if.else.5(16)
+    liveins: %edi
+  
+    CALL64pcrel32 @change, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !34
+    %ebx = MOV32rr %eax, debug-location !34
+    DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+    CMP32ri8 %ebx, 11, implicit-def %eflags, debug-location !37
+    JL_1 %bb.4.if.else.5, implicit killed %eflags, debug-location !37
+  
+  bb.3.if.then.3:
+    successors: %bb.5.if.end.7(0)
+    liveins: %ebx
+  
+    DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+    %edi = MOV32rr %ebx, debug-location !38
+    CALL64pcrel32 @modify, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !38
+    %ecx = MOV32rr %eax, debug-location !38
+    %ecx = ADD32rr killed %ecx, killed %ebx, implicit-def dead %eflags, debug-location !40
+    JMP_1 %bb.5.if.end.7
+  
+  bb.4.if.else.5:
+    successors: %bb.5.if.end.7(0)
+    liveins: %ebx
+  
+    DBG_VALUE debug-use %ebx, debug-use _, !14, !20, debug-location !33
+    %edi = MOV32rr killed %ebx, debug-location !42
+    CALL64pcrel32 @inc, csr_64, implicit %rsp, implicit %edi, implicit-def %rsp, implicit-def %eax, debug-location !42
+    %ecx = MOV32rr %eax, debug-location !42
+  
+  bb.5.if.end.7:
+    liveins: %ecx
+  
+    MOV32mr %rip, 1, _, @m, _, %ecx, debug-location !43 :: (store 4 into @m, !tbaa !44)
+    dead undef %edi = MOV32ri64 @.str, implicit-def %rdi, debug-location !46
+    dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al, debug-location !47
+    %esi = MOV32rr killed %ecx, debug-location !46
+    CALL64pcrel32 @printf, csr_64, implicit %rsp, implicit %rdi, implicit %esi, implicit %al, implicit-def %rsp, implicit-def dead %eax, debug-location !46
+    %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, debug-location !47
+    %rbx = POP64r implicit-def %rsp, implicit %rsp, debug-location !47
+    RETQ %eax, debug-location !47
+
+...
diff --git a/test/DebugInfo/MIR/lit.local.cfg b/test/DebugInfo/MIR/lit.local.cfg
new file mode 100644
index 000000000000..e69aa5765356
--- /dev/null
+++ b/test/DebugInfo/MIR/lit.local.cfg
@@ -0,0 +1,2 @@
+config.suffixes = ['.mir']
+
diff --git a/test/DebugInfo/Mips/InlinedFnLocalVar.ll b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
index 2badeba2f579..fe661522da7c 100644
--- a/test/DebugInfo/Mips/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/Mips/InlinedFnLocalVar.ll
@@ -12,7 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !59, metadata !DIExpression()), !dbg !19
@@ -25,20 +25,20 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!28}
 
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
 !1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!60 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!59 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!60 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
 !11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
diff --git a/test/DebugInfo/Mips/delay-slot.ll b/test/DebugInfo/Mips/delay-slot.ll
index df01775a12e6..e4cb9eac7cef 100644
--- a/test/DebugInfo/Mips/delay-slot.ll
+++ b/test/DebugInfo/Mips/delay-slot.ll
@@ -16,8 +16,8 @@
 ; FIXME: The next address probably ought to be 0x0000000000000004 but there's
 ;        a constant initialization before the prologue's end.
 ; CHECK: 0x0000000000000008      2      0      1   0             0  is_stmt prologue_end
-; CHECK: 0x0000000000000028      3      0      1   0             0  is_stmt
-; CHECK: 0x0000000000000038      4      0      1   0             0  is_stmt
+; CHECK: 0x000000000000002c      3      0      1   0             0  is_stmt
+; CHECK: 0x000000000000003c      4      0      1   0             0  is_stmt
 ; CHECK: 0x0000000000000048      5      0      1   0             0  is_stmt
 ; CHECK: 0x0000000000000058      5      0      1   0             0  is_stmt end_sequence
 
@@ -26,7 +26,7 @@ target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
 target triple = "mips--linux-gnu"
 
 ; Function Attrs: nounwind
-define i32 @foo(i32 %x) #0 {
+define i32 @foo(i32 %x) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !DIExpression()), !dbg !13
   %tobool = icmp ne i32 %x, 0, !dbg !14
@@ -56,11 +56,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5.0"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DILocation(line: 1, scope: !4)
 !14 = !DILocation(line: 2, scope: !15)
 !15 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
diff --git a/test/DebugInfo/Mips/dsr-fixed-objects.ll b/test/DebugInfo/Mips/dsr-fixed-objects.ll
new file mode 100644
index 000000000000..ee98272859a7
--- /dev/null
+++ b/test/DebugInfo/Mips/dsr-fixed-objects.ll
@@ -0,0 +1,156 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -O1 -filetype=obj <%s | \
+; RUN:    llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F0
+; RUN: llc -march=mips -mcpu=mips32r2 -O1 -filetype=obj <%s | \
+; RUN:    llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F1
+
+; void foo(int *);
+;
+; int f0(int a, int b, int c, int d, int e) {
+;   int x = a + b + c + d + e;
+;   foo(&x);
+;   return x;
+; }
+;
+; int f1(int a, int b, int c, int d, int e) {
+;   int x __attribute__((aligned(16))) = a + b + c + d + e;
+;   foo(&x);
+;   return x;
+; }
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+declare void @foo(i32*)
+
+; F0: DW_AT_location [DW_FORM_sec_offset]   (0x00000014)
+; F0: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x0000006b] = "x")
+;
+; x -> DW_OP_reg1(51)
+; F0: 0x00000014: Beginning address offset: 0x0000000000000028
+; F0:                Ending address offset: 0x0000000000000030
+; F0:                 Location description: 51
+
+define i32 @f0(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e) !dbg !4 {
+entry:
+  %x = alloca i32, align 4
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !9,  metadata !DIExpression()), !dbg !27
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !10, metadata !DIExpression()), !dbg !28
+  tail call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !11, metadata !DIExpression()), !dbg !29
+  tail call void @llvm.dbg.value(metadata i32 %d, i64 0, metadata !12, metadata !DIExpression()), !dbg !30
+  tail call void @llvm.dbg.value(metadata i32 %e, i64 0, metadata !13, metadata !DIExpression()), !dbg !31
+  %0 = bitcast i32* %x to i8*, !dbg !32
+  call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !32
+  %add = add nsw i32 %b, %a, !dbg !33
+  %add1 = add nsw i32 %add, %c, !dbg !34
+  %add2 = add nsw i32 %add1, %d, !dbg !35
+  %add3 = add nsw i32 %add2, %e, !dbg !36
+  tail call void @llvm.dbg.value(metadata i32 %add3, i64 0, metadata !14, metadata !DIExpression()), !dbg !37
+  store i32 %add3, i32* %x, align 4, !dbg !37, !tbaa !38
+  tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !14, metadata !26), !dbg !37
+  call void @foo(i32* nonnull %x) #4, !dbg !42
+  call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !14, metadata !26), !dbg !37
+  %1 = load i32, i32* %x, align 4, !dbg !43, !tbaa !38
+  call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !44
+  ret i32 %1, !dbg !45
+}
+
+
+; F1: DW_AT_location [DW_FORM_sec_offset]   (0x00000033)
+; F1: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x0000006b] = "x")
+
+; x -> DW_OP_reg1(51)
+; F1: 0x00000033: Beginning address offset: 0x0000000000000080
+; F1:                Ending address offset: 0x0000000000000088
+; F1:                 Location description: 51
+
+define i32 @f1(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d, i32 signext %e) !dbg !15 {
+entry:
+  %x = alloca i32, align 16
+  tail call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !17, metadata !DIExpression()), !dbg !46
+  tail call void @llvm.dbg.value(metadata i32 %b, i64 0, metadata !18, metadata !DIExpression()), !dbg !47
+  tail call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !19, metadata !DIExpression()), !dbg !48
+  tail call void @llvm.dbg.value(metadata i32 %d, i64 0, metadata !20, metadata !DIExpression()), !dbg !49
+  tail call void @llvm.dbg.value(metadata i32 %e, i64 0, metadata !21, metadata !DIExpression()), !dbg !50
+  %0 = bitcast i32* %x to i8*, !dbg !51
+  call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !51
+  %add = add nsw i32 %b, %a, !dbg !52
+  %add1 = add nsw i32 %add, %c, !dbg !53
+  %add2 = add nsw i32 %add1, %d, !dbg !54
+  %add3 = add nsw i32 %add2, %e, !dbg !55
+  tail call void @llvm.dbg.value(metadata i32 %add3, i64 0, metadata !22, metadata !DIExpression()), !dbg !56
+  store i32 %add3, i32* %x, align 16, !dbg !56, !tbaa !38
+  tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !22, metadata !26), !dbg !56
+  call void @foo(i32* nonnull %x) #4, !dbg !57
+  call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !22, metadata !26), !dbg !56
+  %1 = load i32, i32* %x, align 16, !dbg !58, !tbaa !38
+  call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !59
+  ret i32 %1, !dbg !60
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!23, !24}
+!llvm.ident = !{!25}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/vk/repos/tmp/dwarf")
+!2 = !{}
+!3 = !{!4, !15}
+!4 = distinct !DISubprogram(name: "f0", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7, !7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10, !11, !12, !13, !14}
+!9 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 4, type: !7)
+!11 = !DILocalVariable(name: "c", arg: 3, scope: !4, file: !1, line: 4, type: !7)
+!12 = !DILocalVariable(name: "d", arg: 4, scope: !4, file: !1, line: 4, type: !7)
+!13 = !DILocalVariable(name: "e", arg: 5, scope: !4, file: !1, line: 4, type: !7)
+!14 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 5, type: !7)
+!15 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, variables: !16)
+!16 = !{!17, !18, !19, !20, !21, !22}
+!17 = !DILocalVariable(name: "a", arg: 1, scope: !15, file: !1, line: 11, type: !7)
+!18 = !DILocalVariable(name: "b", arg: 2, scope: !15, file: !1, line: 11, type: !7)
+!19 = !DILocalVariable(name: "c", arg: 3, scope: !15, file: !1, line: 11, type: !7)
+!20 = !DILocalVariable(name: "d", arg: 4, scope: !15, file: !1, line: 11, type: !7)
+!21 = !DILocalVariable(name: "e", arg: 5, scope: !15, file: !1, line: 11, type: !7)
+!22 = !DILocalVariable(name: "x", scope: !15, file: !1, line: 12, type: !7)
+!23 = !{i32 2, !"Dwarf Version", i32 4}
+!24 = !{i32 2, !"Debug Info Version", i32 3}
+!25 = !{!"clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)"}
+!26 = !DIExpression(DW_OP_deref)
+!27 = !DILocation(line: 4, column: 12, scope: !4)
+!28 = !DILocation(line: 4, column: 19, scope: !4)
+!29 = !DILocation(line: 4, column: 26, scope: !4)
+!30 = !DILocation(line: 4, column: 33, scope: !4)
+!31 = !DILocation(line: 4, column: 40, scope: !4)
+!32 = !DILocation(line: 5, column: 3, scope: !4)
+!33 = !DILocation(line: 5, column: 13, scope: !4)
+!34 = !DILocation(line: 5, column: 17, scope: !4)
+!35 = !DILocation(line: 5, column: 21, scope: !4)
+!36 = !DILocation(line: 5, column: 25, scope: !4)
+!37 = !DILocation(line: 5, column: 7, scope: !4)
+!38 = !{!39, !39, i64 0}
+!39 = !{!"int", !40, i64 0}
+!40 = !{!"omnipotent char", !41, i64 0}
+!41 = !{!"Simple C/C++ TBAA"}
+!42 = !DILocation(line: 6, column: 3, scope: !4)
+!43 = !DILocation(line: 7, column: 10, scope: !4)
+!44 = !DILocation(line: 8, column: 1, scope: !4)
+!45 = !DILocation(line: 7, column: 3, scope: !4)
+!46 = !DILocation(line: 11, column: 12, scope: !15)
+!47 = !DILocation(line: 11, column: 19, scope: !15)
+!48 = !DILocation(line: 11, column: 26, scope: !15)
+!49 = !DILocation(line: 11, column: 33, scope: !15)
+!50 = !DILocation(line: 11, column: 40, scope: !15)
+!51 = !DILocation(line: 12, column: 3, scope: !15)
+!52 = !DILocation(line: 12, column: 42, scope: !15)
+!53 = !DILocation(line: 12, column: 46, scope: !15)
+!54 = !DILocation(line: 12, column: 50, scope: !15)
+!55 = !DILocation(line: 12, column: 54, scope: !15)
+!56 = !DILocation(line: 12, column: 7, scope: !15)
+!57 = !DILocation(line: 13, column: 3, scope: !15)
+!58 = !DILocation(line: 14, column: 10, scope: !15)
+!59 = !DILocation(line: 15, column: 1, scope: !15)
+!60 = !DILocation(line: 14, column: 3, scope: !15)
diff --git a/test/DebugInfo/Mips/dsr-non-fixed-objects.ll b/test/DebugInfo/Mips/dsr-non-fixed-objects.ll
new file mode 100644
index 000000000000..7bd68318ca51
--- /dev/null
+++ b/test/DebugInfo/Mips/dsr-non-fixed-objects.ll
@@ -0,0 +1,125 @@
+; RUN: llc -march=mips -mcpu=mips32r2 -O0 -filetype=obj <%s | \
+; RUN:    llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F2
+; RUN: llc -march=mips -mcpu=mips32r2 -O0 -filetype=obj <%s | \
+; RUN:    llvm-dwarfdump -debug-dump=all - | FileCheck %s -check-prefix=F3
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare void @foo(i32*)
+
+; void foo(int *);
+;
+; int f2(int a, int b) {
+;   int c __attribute__((aligned(16))) = a + b;
+;   foo(&c);
+;   return c;
+; }
+;
+; int *f3(int a, int b) {
+;   int c __attribute__((aligned(16))) = a + b;
+;   int *w = alloca(c);
+;   foo(&c);
+;   return w;
+; }
+
+; c -> DW_OP_breg29(r29): 16
+; F2: DW_AT_location [DW_FORM_exprloc]      (<0x2> 8d 10 )
+; F2: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000065] = "c")
+
+; Function Attrs: nounwind
+define i32 @f2(i32 signext %a, i32 signext %b) !dbg !4 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c = alloca i32, align 16
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !15, metadata !16), !dbg !17
+  store i32 %b, i32* %b.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !18, metadata !16), !dbg !19
+  call void @llvm.dbg.declare(metadata i32* %c, metadata !20, metadata !16), !dbg !21
+  %0 = load i32, i32* %a.addr, align 4, !dbg !22
+  %1 = load i32, i32* %b.addr, align 4, !dbg !23
+  %add = add nsw i32 %0, %1, !dbg !24
+  store i32 %add, i32* %c, align 16, !dbg !21
+  call void @foo(i32* %c), !dbg !25
+  %2 = load i32, i32* %c, align 16, !dbg !26
+  ret i32 %2, !dbg !27
+}
+
+; c -> DW_OP_breg23(r23): 16
+; F3: DW_AT_location [DW_FORM_exprloc]      (<0x2> 87 10 )
+; F3: DW_AT_name [DW_FORM_strp]     ( .debug_str[0x00000065] = "c")
+
+define i32* @f3(i32 signext %a, i32 signext %b) !dbg !8 {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c = alloca i32, align 16
+  %w = alloca i32*, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !28, metadata !16), !dbg !29
+  store i32 %b, i32* %b.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !30, metadata !16), !dbg !31
+  call void @llvm.dbg.declare(metadata i32* %c, metadata !32, metadata !16), !dbg !33
+  %0 = load i32, i32* %a.addr, align 4, !dbg !34
+  %1 = load i32, i32* %b.addr, align 4, !dbg !35
+  %add = add nsw i32 %0, %1, !dbg !36
+  store i32 %add, i32* %c, align 16, !dbg !33
+  call void @llvm.dbg.declare(metadata i32** %w, metadata !37, metadata !16), !dbg !38
+  %2 = load i32, i32* %c, align 16, !dbg !39
+  %3 = alloca i8, i32 %2, !dbg !40
+  %4 = bitcast i8* %3 to i32*, !dbg !40
+  store i32* %4, i32** %w, align 4, !dbg !38
+  call void @foo(i32* %c), !dbg !41
+  %5 = load i32*, i32** %w, align 4, !dbg !42
+  ret i32* %5, !dbg !43
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+!llvm.ident = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/vk/repos/tmp/dwarf")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 20, type: !5, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = distinct !DISubprogram(name: "f3", scope: !1, file: !1, line: 27, type: !9, isLocal: false, isDefinition: true, scopeLine: 27, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11, !7, !7}
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 32, align: 32)
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!14 = !{!"clang version 3.8.0 (trunk 251783) (llvm/trunk 251781)"}
+!15 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 20, type: !7)
+!16 = !DIExpression()
+!17 = !DILocation(line: 20, column: 12, scope: !4)
+!18 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 20, type: !7)
+!19 = !DILocation(line: 20, column: 19, scope: !4)
+!20 = !DILocalVariable(name: "c", scope: !4, file: !1, line: 21, type: !7)
+!21 = !DILocation(line: 21, column: 7, scope: !4)
+!22 = !DILocation(line: 21, column: 40, scope: !4)
+!23 = !DILocation(line: 21, column: 44, scope: !4)
+!24 = !DILocation(line: 21, column: 42, scope: !4)
+!25 = !DILocation(line: 22, column: 3, scope: !4)
+!26 = !DILocation(line: 23, column: 10, scope: !4)
+!27 = !DILocation(line: 23, column: 3, scope: !4)
+!28 = !DILocalVariable(name: "a", arg: 1, scope: !8, file: !1, line: 27, type: !7)
+!29 = !DILocation(line: 27, column: 13, scope: !8)
+!30 = !DILocalVariable(name: "b", arg: 2, scope: !8, file: !1, line: 27, type: !7)
+!31 = !DILocation(line: 27, column: 20, scope: !8)
+!32 = !DILocalVariable(name: "c", scope: !8, file: !1, line: 28, type: !7)
+!33 = !DILocation(line: 28, column: 7, scope: !8)
+!34 = !DILocation(line: 28, column: 40, scope: !8)
+!35 = !DILocation(line: 28, column: 44, scope: !8)
+!36 = !DILocation(line: 28, column: 42, scope: !8)
+!37 = !DILocalVariable(name: "w", scope: !8, file: !1, line: 29, type: !11)
+!38 = !DILocation(line: 29, column: 8, scope: !8)
+!39 = !DILocation(line: 29, column: 19, scope: !8)
+!40 = !DILocation(line: 29, column: 12, scope: !8)
+!41 = !DILocation(line: 30, column: 3, scope: !8)
+!42 = !DILocation(line: 31, column: 10, scope: !8)
+!43 = !DILocation(line: 31, column: 3, scope: !8)
diff --git a/test/DebugInfo/Mips/fn-call-line.ll b/test/DebugInfo/Mips/fn-call-line.ll
index 18ed3dfa43ad..a3130ce8c6eb 100644
--- a/test/DebugInfo/Mips/fn-call-line.ll
+++ b/test/DebugInfo/Mips/fn-call-line.ll
@@ -52,7 +52,7 @@
 
 
 ; Function Attrs: nounwind uwtable
-define void @f2() #0 {
+define void @f2() #0 !dbg !4 {
 entry:
   call void (...) @f1(), !dbg !11
   call void (...) @f1(), !dbg !12
@@ -68,11 +68,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226641)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226641)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "fn-call-line.c", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @f2, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "fn-call-line.c", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/Mips/prologue_end.ll b/test/DebugInfo/Mips/prologue_end.ll
new file mode 100644
index 000000000000..d93836d84983
--- /dev/null
+++ b/test/DebugInfo/Mips/prologue_end.ll
@@ -0,0 +1,70 @@
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=static < %s | FileCheck %s -check-prefix=STATIC
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=static -disable-fp-elim < %s | FileCheck %s -check-prefix=STATIC-FP
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -O0 -mtriple mips-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s -check-prefix=PIC-FP
+
+; Generated using clang -O0 -emit-llvm -S -target mipsel-unknown-linux -g test.c -o test.ll
+; test.c:
+;
+; void hello_world(void) {
+;   printf("Hello, World!\n");
+; }
+
+@.str = private unnamed_addr constant [15 x i8] c"Hello, World!\0A\00", align 1
+
+define void @hello_world() #0 !dbg !4 {
+entry:
+; STATIC:	addiu	$sp, $sp, -{{[0-9]+}}
+; STATIC:	sw	$ra, {{[0-9]+}}($sp)
+; STATIC:	.loc	1 2 3 prologue_end
+; STATIC:	lui	$[[R0:[0-9]+]], %hi($.str)
+
+; STATIC-FP:	addiu	$sp, $sp, -{{[0-9]+}}
+; STATIC-FP:	sw	$ra, {{[0-9]+}}($sp)
+; STATIC-FP:	sw	$fp, {{[0-9]+}}($sp)
+; STATIC-FP:	move	$fp, $sp
+; STATIC-FP:	.loc	1 2 3 prologue_end
+; STATIC-FP:	lui	$[[R0:[0-9]+]], %hi($.str)
+
+; PIC:     	lui	$[[R0:[0-9]+]], %hi(_gp_disp)
+; PIC:     	addiu	$[[R0]], $[[R0]], %lo(_gp_disp)
+; PIC:     	addiu	$sp, $sp, -{{[0-9]+}}
+; PIC:     	sw	$ra, {{[0-9]+}}($sp)
+; PIC:     	addu	$[[R1:[0-9]+]], $[[R0]], $25
+; PIC:     	.loc	1 2 3 prologue_end
+; PIC:     	lw	$[[R2:[0-9]+]], %got($.str)($[[R1]])
+
+; PIC-FP:	lui	$[[R0:[0-9]+]], %hi(_gp_disp)
+; PIC-FP:	addiu	$[[R0]], $[[R0]], %lo(_gp_disp)
+; PIC-FP:	addiu	$sp, $sp, -{{[0-9]+}}
+; PIC-FP:	sw	$ra, {{[0-9]+}}($sp)
+; PIC-FP:	sw	$fp, {{[0-9]+}}($sp)
+; PIC-FP:	move	$fp, $sp
+; PIC-FP:	addu	$[[R1:[0-9]+]], $[[R0]], $25
+; PIC-FP:	.loc	1 2 3 prologue_end
+; PIC-FP:	lw	$[[R2:[0-9]+]], %got($.str)($[[R1]])
+
+  %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0)), !dbg !10
+  ret void, !dbg !11
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "hello_world", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0"}
+!10 = !DILocation(line: 2, column: 3, scope: !4)
+!11 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/DebugInfo/PDB/lit.local.cfg b/test/DebugInfo/PDB/DIA/lit.local.cfg
similarity index 100%
rename from test/DebugInfo/PDB/lit.local.cfg
rename to test/DebugInfo/PDB/DIA/lit.local.cfg
diff --git a/test/DebugInfo/PDB/pdbdump-flags.test b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
similarity index 77%
rename from test/DebugInfo/PDB/pdbdump-flags.test
rename to test/DebugInfo/PDB/DIA/pdbdump-flags.test
index badbf0784b5e..c2fffcb50620 100644
--- a/test/DebugInfo/PDB/pdbdump-flags.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-flags.test
@@ -1,7 +1,7 @@
-; RUN: llvm-pdbdump %p/Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
-; RUN: llvm-pdbdump -types %p/Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
-; RUN: llvm-pdbdump -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
-; RUN: llvm-pdbdump -types -compilands %p/Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
+; RUN: llvm-pdbdump %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=NO_ARGS
+; RUN: llvm-pdbdump -types %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=TYPES
+; RUN: llvm-pdbdump -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=COMPILANDS
+; RUN: llvm-pdbdump -types -compilands %p/../Inputs/empty.pdb | FileCheck %s -check-prefix=MULTIPLE
 
 ; Check that neither symbols nor compilands are dumped when neither argument specified.
 ; NO_ARGS: empty.pdb
diff --git a/test/DebugInfo/PDB/pdbdump-symbol-format.test b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
similarity index 87%
rename from test/DebugInfo/PDB/pdbdump-symbol-format.test
rename to test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
index 6dd15fee4c59..ea5bb13d2ff6 100644
--- a/test/DebugInfo/PDB/pdbdump-symbol-format.test
+++ b/test/DebugInfo/PDB/DIA/pdbdump-symbol-format.test
@@ -1,6 +1,6 @@
-; RUN: llvm-pdbdump -symbols %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
-; RUN: llvm-pdbdump -types %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
-; RUN: llvm-pdbdump -globals %p/Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
+; RUN: llvm-pdbdump -symbols %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=SYM_FORMAT %s
+; RUN: llvm-pdbdump -types %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=TYPES_FORMAT %s
+; RUN: llvm-pdbdump -globals %p/../Inputs/symbolformat.pdb | FileCheck --check-prefix=GLOBALS %s
 
 ; The format is func [0x<rva_start>+<prologue_length> - 0x<rva_end>-<epilogue_length>]
 ; SYM_FORMAT: ---SYMBOLS---
diff --git a/test/DebugInfo/PDB/pdbdump-headers.test b/test/DebugInfo/PDB/pdbdump-headers.test
new file mode 100644
index 000000000000..5c68cf40e3ef
--- /dev/null
+++ b/test/DebugInfo/PDB/pdbdump-headers.test
@@ -0,0 +1,12 @@
+; RUN: llvm-pdbdump --dump-headers %p/Inputs/empty.pdb | FileCheck %s
+
+; CHECK:      BlockSize: 4096
+; CHECK-NEXT: Unknown0: 2
+; CHECK-NEXT: NumBlocks: 25
+; CHECK-NEXT: NumDirectoryBytes: 136
+; CHECK-NEXT: Unknown1: 0
+; CHECK-NEXT: BlockMapAddr: 24
+; CHECK-NEXT: NumDirectoryBlocks: 1
+; CHECK-NEXT: BlockMapOffset: 98304
+; CHECK-NEXT: DirectoryBlocks: [23]
+; CHECK-NEXT: NumStreams: 17
diff --git a/test/DebugInfo/PowerPC/tls-fission.ll b/test/DebugInfo/PowerPC/tls-fission.ll
index 25bc9959e4d1..e8c6a13f754e 100644
--- a/test/DebugInfo/PowerPC/tls-fission.ll
+++ b/test/DebugInfo/PowerPC/tls-fission.ll
@@ -22,7 +22,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, splitDebugFilename: "tls.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, splitDebugFilename: "tls.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "tls.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/PowerPC/tls.ll b/test/DebugInfo/PowerPC/tls.ll
index 8ba350ac60d3..7e6597c743b9 100644
--- a/test/DebugInfo/PowerPC/tls.ll
+++ b/test/DebugInfo/PowerPC/tls.ll
@@ -17,7 +17,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "tls.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/Sparc/gnu-window-save.ll b/test/DebugInfo/Sparc/gnu-window-save.ll
index c2e0364307fa..d94cc7505f3a 100644
--- a/test/DebugInfo/Sparc/gnu-window-save.ll
+++ b/test/DebugInfo/Sparc/gnu-window-save.ll
@@ -38,7 +38,7 @@
 @.str = private unnamed_addr constant [14 x i8] c"hello, world\0A\00", align 1
 
 ; Function Attrs: nounwind
-define signext i32 @main() #0 {
+define signext i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -55,11 +55,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (http://llvm.org/git/clang.git 6a0714fee07fb7c4e32d3972b4fe2ce2f5678cf4) (llvm/ 672e88e934757f76d5c5e5258be41e7615094844)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "hello.c", directory: "/home/venkatra/work/benchmarks/test/hello")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "hello.c", directory: "/home/venkatra/work/benchmarks/test/hello")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/Sparc/prologue_end.ll b/test/DebugInfo/Sparc/prologue_end.ll
new file mode 100644
index 000000000000..43b1140620e7
--- /dev/null
+++ b/test/DebugInfo/Sparc/prologue_end.ll
@@ -0,0 +1,41 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple sparc -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+;   func();
+;   func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+  ; CHECK: prologue_end_test:
+  ; CHECK: .cfi_startproc
+  ; CHECK: save %sp
+  ; CHECK: .loc 1 3 3 prologue_end
+  ; CHECK: call func
+  ; CHECK: call func
+entry:
+  %call = call i32 @func(), !dbg !11
+  %call1 = call i32 @func(), !dbg !12
+  ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/SystemZ/prologue_end.ll b/test/DebugInfo/SystemZ/prologue_end.ll
new file mode 100644
index 000000000000..a62a8a75521e
--- /dev/null
+++ b/test/DebugInfo/SystemZ/prologue_end.ll
@@ -0,0 +1,42 @@
+; RUN: llc -disable-fp-elim -O0 %s -mtriple s390x-linux-gnu -o - | FileCheck %s
+
+; int func(void);
+; void prologue_end_test() {
+;   func();
+;   func();
+; }
+
+define void @prologue_end_test() nounwind uwtable !dbg !4 {
+  ; CHECK: prologue_end_test:
+  ; CHECK: .cfi_startproc
+  ; CHECK: aghi
+  ; CHECK: lgr
+  ; CHECK: .loc 1 3 3 prologue_end
+  ; CHECK: brasl {{.*}}, func
+  ; CHECK: brasl {{.*}}, func
+entry:
+  %call = call i32 @func(), !dbg !11
+  %call1 = call i32 @func(), !dbg !12
+  ret void, !dbg !13
+}
+
+declare i32 @func()
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 242129)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "prologue_end_test", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.7.0 (trunk 242129)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/DebugInfo/SystemZ/variable-loc.ll b/test/DebugInfo/SystemZ/variable-loc.ll
index 5f4fe258b976..6ace1b614248 100644
--- a/test/DebugInfo/SystemZ/variable-loc.ll
+++ b/test/DebugInfo/SystemZ/variable-loc.ll
@@ -29,7 +29,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare i32 @sum_array(i32*, i32) nounwind
 
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !14 {
 entry:
   %retval = alloca i32, align 4
   %main_arr = alloca [100 x i32], align 4
@@ -52,27 +52,27 @@ declare i32 @printf(i8*, ...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!30}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !29, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !29, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5, !11, !14}
-!5 = !DISubprogram(name: "populate_array", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !29, scope: !6, type: !7, function: void (i32*, i32)* @populate_array, variables: !1)
+!5 = distinct !DISubprogram(name: "populate_array", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !29, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "simple.c", directory: "/home/timnor01/a64-trunk/build")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9, !10}
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !DISubprogram(name: "sum_array", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !29, scope: !6, type: !12, function: i32 (i32*, i32)* @sum_array, variables: !1)
+!11 = distinct !DISubprogram(name: "sum_array", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !29, scope: !6, type: !12, variables: !1)
 !12 = !DISubroutineType(types: !13)
 !13 = !{!10, !9, !10}
-!14 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !29, scope: !6, type: !15, function: i32 ()* @main, variables: !1)
+!14 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !29, scope: !6, type: !15, variables: !1)
 !15 = !DISubroutineType(types: !16)
 !16 = !{!10}
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "main_arr", line: 19, scope: !18, file: !6, type: !19)
+!17 = !DILocalVariable(name: "main_arr", line: 19, scope: !18, file: !6, type: !19)
 !18 = distinct !DILexicalBlock(line: 18, column: 16, file: !29, scope: !14)
 !19 = !DICompositeType(tag: DW_TAG_array_type, size: 3200, align: 32, baseType: !10, elements: !{!20})
 !20 = !DISubrange(count: 99)
 !22 = !DILocation(line: 19, column: 7, scope: !18)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "val", line: 20, scope: !18, file: !6, type: !10)
+!23 = !DILocalVariable(name: "val", line: 20, scope: !18, file: !6, type: !10)
 !24 = !DILocation(line: 20, column: 7, scope: !18)
 !25 = !DILocation(line: 22, column: 3, scope: !18)
 !26 = !DILocation(line: 23, column: 9, scope: !18)
diff --git a/test/DebugInfo/X86/2010-04-13-PubType.ll b/test/DebugInfo/X86/2010-04-13-PubType.ll
index 85c499d59630..4688f959fbdb 100644
--- a/test/DebugInfo/X86/2010-04-13-PubType.ll
+++ b/test/DebugInfo/X86/2010-04-13-PubType.ll
@@ -5,7 +5,7 @@
 %struct.X = type opaque
 %struct.Y = type { i32 }
 
-define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp {
+define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp !dbg !1 {
 entry:
   %x_addr = alloca %struct.X*                     ; <%struct.X**> [#uses=1]
   %y_addr = alloca %struct.Y*                     ; <%struct.Y**> [#uses=1]
@@ -31,10 +31,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!20}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 7, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !18, scope: !2, type: !4, function: i32 (%struct.X*, %struct.Y*)* @foo)
+!0 = !DILocalVariable(name: "x", line: 7, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !18, scope: !2, type: !4)
 !2 = !DIFile(filename: "a.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !7, !9}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -45,7 +45,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !11 = !{!12}
 !12 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 5, size: 32, align: 32, file: !18, scope: !10, baseType: !6)
 !13 = !DILocation(line: 7, scope: !1)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 7, arg: 0, scope: !1, file: !2, type: !9)
+!14 = !DILocalVariable(name: "y", line: 7, arg: 2, scope: !1, file: !2, type: !9)
 !15 = !DILocation(line: 7, scope: !16)
 !16 = distinct !DILexicalBlock(line: 7, column: 0, file: !18, scope: !1)
 !17 = !{!1}
diff --git a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
index 1252f2cb32e3..aaf72311cb46 100644
--- a/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
+++ b/test/DebugInfo/X86/2011-09-26-GlobalVarContext.ll
@@ -5,7 +5,7 @@
 
 @GLB = common global i32 0, align 4
 
-define i32 @f() nounwind {
+define i32 @f() nounwind !dbg !5 {
   %LOC = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %LOC, metadata !15, metadata !DIExpression()), !dbg !17
   %1 = load i32, i32* @GLB, align 4, !dbg !18
@@ -19,17 +19,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !12, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !6, scope: !6, type: !7, function: i32 ()* @f)
+!5 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !6, scope: !6, type: !7)
 !6 = !DIFile(filename: "test.c", directory: "/work/llvm/vanilla/test/DebugInfo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!14}
 !14 = !DIGlobalVariable(name: "GLB", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @GLB)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "LOC", line: 4, scope: !16, file: !6, type: !9)
+!15 = !DILocalVariable(name: "LOC", line: 4, scope: !16, file: !6, type: !9)
 !16 = distinct !DILexicalBlock(line: 3, column: 9, file: !20, scope: !5)
 !17 = !DILocation(line: 4, column: 9, scope: !16)
 !18 = !DILocation(line: 4, column: 23, scope: !16)
diff --git a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
index fc3f69b3584c..2319c0b70881 100644
--- a/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
+++ b/test/DebugInfo/X86/2011-12-16-BadStructRef.ll
@@ -7,7 +7,7 @@
 %struct.bar = type { %struct.baz, %struct.baz* }
 %struct.baz = type { i32 }
 
-define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp !dbg !29 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
@@ -25,7 +25,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3barC1Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 !dbg !37 {
 entry:
   %this.addr = alloca %struct.bar*, align 8
   %x.addr = alloca i32, align 4
@@ -39,7 +39,7 @@ entry:
   ret void, !dbg !62
 }
 
-define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3barC2Ei(%struct.bar* %this, i32 %x) unnamed_addr uwtable ssp align 2 !dbg !40 {
 entry:
   %this.addr = alloca %struct.bar*, align 8
   %x.addr = alloca i32, align 4
@@ -57,7 +57,7 @@ entry:
   ret void, !dbg !68
 }
 
-define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 {
+define linkonce_odr void @_ZN3bazC1Ei(%struct.baz* %this, i32 %a) unnamed_addr uwtable ssp align 2 !dbg !43 {
 entry:
   %this.addr = alloca %struct.baz*, align 8
   %a.addr = alloca i32, align 4
@@ -71,7 +71,7 @@ entry:
   ret void, !dbg !74
 }
 
-define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN3bazC2Ei(%struct.baz* %this, i32 %a) unnamed_addr nounwind uwtable ssp align 2 !dbg !46 {
 entry:
   %this.addr = alloca %struct.baz*, align 8
   %a.addr = alloca i32, align 4
@@ -89,7 +89,7 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!83}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 146596)", isOptimized: false, emissionKind: 0, file: !82, enums: !1, retainedTypes: !3, subprograms: !27, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 146596)", isOptimized: false, emissionKind: 0, file: !82, enums: !1, retainedTypes: !3, subprograms: !27, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5, !9}
 !5 = !DICompositeType(tag: DW_TAG_class_type, name: "bar", line: 9, size: 128, align: 64, file: !82, elements: !7)
@@ -111,7 +111,7 @@ entry:
 !23 = !{null, !24, !12}
 !24 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !5)
 !27 = !{!29, !37, !40, !43, !46}
-!29 = !DISubprogram(name: "main", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: !6, type: !30, function: i32 (i32, i8**)* @main)
+!29 = distinct !DISubprogram(name: "main", line: 17, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: !6, type: !30)
 !30 = !DISubroutineType(types: !31)
 !31 = !{!12, !12, !32}
 !32 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !33)
@@ -119,45 +119,45 @@ entry:
 !34 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !35 = !{!36}
 !36 = !{} ; previously: invalid DW_TAG_base_type
-!37 = !DISubprogram(name: "bar", linkageName: "_ZN3barC1Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, function: void (%struct.bar*, i32)* @_ZN3barC1Ei, declaration: !21)
+!37 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3barC1Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, declaration: !21)
 !38 = !{!39}
 !39 = !{} ; previously: invalid DW_TAG_base_type
-!40 = !DISubprogram(name: "bar", linkageName: "_ZN3barC2Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, function: void (%struct.bar*, i32)* @_ZN3barC2Ei, declaration: !21)
+!40 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3barC2Ei", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !22, declaration: !21)
 !41 = !{!42}
 !42 = !{} ; previously: invalid DW_TAG_base_type
-!43 = !DISubprogram(name: "baz", linkageName: "_ZN3bazC1Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, function: void (%struct.baz*, i32)* @_ZN3bazC1Ei, declaration: !13)
+!43 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3bazC1Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, declaration: !13)
 !44 = !{!45}
 !45 = !{} ; previously: invalid DW_TAG_base_type
-!46 = !DISubprogram(name: "baz", linkageName: "_ZN3bazC2Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, function: void (%struct.baz*, i32)* @_ZN3bazC2Ei, declaration: !13)
-!49 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 16, arg: 1, scope: !29, file: !6, type: !12)
+!46 = distinct !DISubprogram(name: "baz", linkageName: "_ZN3bazC2Ei", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !82, scope: null, type: !14, declaration: !13)
+!49 = !DILocalVariable(name: "argc", line: 16, arg: 1, scope: !29, file: !6, type: !12)
 !50 = !DILocation(line: 16, column: 14, scope: !29)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 16, arg: 2, scope: !29, file: !6, type: !32)
+!51 = !DILocalVariable(name: "argv", line: 16, arg: 2, scope: !29, file: !6, type: !32)
 !52 = !DILocation(line: 16, column: 27, scope: !29)
-!53 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "myBar", line: 18, scope: !54, file: !6, type: !5)
+!53 = !DILocalVariable(name: "myBar", line: 18, scope: !54, file: !6, type: !5)
 !54 = distinct !DILexicalBlock(line: 17, column: 1, file: !82, scope: !29)
 !55 = !DILocation(line: 18, column: 9, scope: !54)
 !56 = !DILocation(line: 18, column: 17, scope: !54)
 !57 = !DILocation(line: 19, column: 5, scope: !54)
-!58 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !37, file: !6, type: !24)
+!58 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !37, file: !6, type: !24)
 !59 = !DILocation(line: 13, column: 5, scope: !37)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 13, arg: 2, scope: !37, file: !6, type: !12)
+!60 = !DILocalVariable(name: "x", line: 13, arg: 2, scope: !37, file: !6, type: !12)
 !61 = !DILocation(line: 13, column: 13, scope: !37)
 !62 = !DILocation(line: 13, column: 34, scope: !37)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !40, file: !6, type: !24)
+!63 = !DILocalVariable(name: "this", line: 13, arg: 1, flags: DIFlagArtificial, scope: !40, file: !6, type: !24)
 !64 = !DILocation(line: 13, column: 5, scope: !40)
-!65 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 13, arg: 2, scope: !40, file: !6, type: !12)
+!65 = !DILocalVariable(name: "x", line: 13, arg: 2, scope: !40, file: !6, type: !12)
 !66 = !DILocation(line: 13, column: 13, scope: !40)
 !67 = !DILocation(line: 13, column: 33, scope: !40)
 !68 = !DILocation(line: 13, column: 34, scope: !69)
 !69 = distinct !DILexicalBlock(line: 13, column: 33, file: !82, scope: !40)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !43, file: !6, type: !16)
+!70 = !DILocalVariable(name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !43, file: !6, type: !16)
 !71 = !DILocation(line: 6, column: 5, scope: !43)
-!72 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 2, scope: !43, file: !6, type: !12)
+!72 = !DILocalVariable(name: "a", line: 6, arg: 2, scope: !43, file: !6, type: !12)
 !73 = !DILocation(line: 6, column: 13, scope: !43)
 !74 = !DILocation(line: 6, column: 24, scope: !43)
-!75 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !46, file: !6, type: !16)
+!75 = !DILocalVariable(name: "this", line: 6, arg: 1, flags: DIFlagArtificial, scope: !46, file: !6, type: !16)
 !76 = !DILocation(line: 6, column: 5, scope: !46)
-!77 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 2, scope: !46, file: !6, type: !12)
+!77 = !DILocalVariable(name: "a", line: 6, arg: 2, scope: !46, file: !6, type: !12)
 !78 = !DILocation(line: 6, column: 13, scope: !46)
 !79 = !DILocation(line: 6, column: 23, scope: !46)
 !80 = !DILocation(line: 6, column: 24, scope: !81)
diff --git a/test/DebugInfo/X86/DIModuleContext.ll b/test/DebugInfo/X86/DIModuleContext.ll
new file mode 100644
index 000000000000..413b45c4ab48
--- /dev/null
+++ b/test/DebugInfo/X86/DIModuleContext.ll
@@ -0,0 +1,30 @@
+target triple = "x86_64-apple-macosx"
+; RUN: %llc_dwarf %s -o - -filetype=obj \
+; RUN:   | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; CHECK: DW_TAG_module
+; CHECK-NOT: NULL
+; CHECK: DW_TAG_structure_type
+
+; Hand-crafted based on
+; struct s;
+; struct s *s;
+
+%struct.s = type opaque
+
+@i = common global %struct.s* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, globals: !3, imports: !11)
+!1 = !DIFile(filename: "test.c", directory: "/")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "s", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: %struct.s** @i)
+!5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64, align: 64)
+!6 = !DICompositeType(tag: DW_TAG_structure_type, name: "s", scope: !9, file: !1, line: 1, flags: DIFlagFwdDecl)
+!7 = !{i32 2, !"Dwarf Version", i32 2}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !DIModule(scope: null, name: "Module", configMacros: "", includePath: ".", isysroot: "/")
+!10 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !9, line: 11)
+!11 = !{!10}
diff --git a/test/DebugInfo/X86/DW_AT_byte_size.ll b/test/DebugInfo/X86/DW_AT_byte_size.ll
index bfad4fe261db..d879f6732e76 100644
--- a/test/DebugInfo/X86/DW_AT_byte_size.ll
+++ b/test/DebugInfo/X86/DW_AT_byte_size.ll
@@ -10,7 +10,7 @@
 
 %struct.A = type { i32 }
 
-define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp {
+define i32 @_Z3fooP1A(%struct.A* %a) nounwind uwtable ssp !dbg !5 {
 entry:
   %a.addr = alloca %struct.A*, align 8
   store %struct.A* %a, %struct.A** %a.addr, align 8
@@ -26,10 +26,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 150996)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 150996)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooP1A", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !20, scope: !6, type: !7, function: i32 (%struct.A*)* @_Z3fooP1A)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooP1A", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !20, scope: !6, type: !7)
 !6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !10}
@@ -38,7 +38,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !11 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 1, size: 32, align: 32, file: !20, elements: !12)
 !12 = !{!13}
 !13 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, file: !20, scope: !11, baseType: !9)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !5, file: !6, type: !10)
+!16 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !5, file: !6, type: !10)
 !17 = !DILocation(line: 3, column: 13, scope: !5)
 !18 = !DILocation(line: 4, column: 3, scope: !19)
 !19 = distinct !DILexicalBlock(line: 3, column: 16, file: !20, scope: !5)
diff --git a/test/DebugInfo/X86/DW_AT_linkage_name.ll b/test/DebugInfo/X86/DW_AT_linkage_name.ll
index 7f18b5037d5a..a3f3da2298e3 100644
--- a/test/DebugInfo/X86/DW_AT_linkage_name.ll
+++ b/test/DebugInfo/X86/DW_AT_linkage_name.ll
@@ -34,7 +34,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 %struct.A = type { i8 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1AD2Ev(%struct.A* %this) unnamed_addr #0 align 2 !dbg !17 {
 entry:
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
@@ -47,7 +47,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1AD1Ev(%struct.A* %this) unnamed_addr #0 align 2 !dbg !18 {
 entry:
   %this.addr = alloca %struct.A*, align 8
   store %struct.A* %this, %struct.A** %this.addr, align 8
@@ -58,7 +58,7 @@ entry:
 }
 
 ; Function Attrs: ssp uwtable
-define void @_Z3foov() #2 {
+define void @_Z3foov() #2 !dbg !19 {
 entry:
   %a = alloca %struct.A, align 1
   call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !34, metadata !DIExpression()), !dbg !35
@@ -77,7 +77,7 @@ attributes #2 = { ssp uwtable }
 !llvm.module.flags = !{!23, !24}
 !llvm.ident = !{!25}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
 !1 = !DIFile(filename: "linkage-name.cpp", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -92,23 +92,23 @@ attributes #2 = { ssp uwtable }
 !13 = !DISubroutineType(types: !14)
 !14 = !{null, !9}
 !16 = !{!17, !18, !19}
-!17 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD2Ev, declaration: !12, variables: !2)
-!18 = !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, function: void (%struct.A*)* @_ZN1AD1Ev, declaration: !12, variables: !2)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !20, type: !21, function: void ()* @_Z3foov, variables: !2)
+!17 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, declaration: !12, variables: !2)
+!18 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !13, declaration: !12, variables: !2)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !20, type: !21, variables: !2)
 !20 = !DIFile(filename: "linkage-name.cpp", directory: "")
 !21 = !DISubroutineType(types: !22)
 !22 = !{null}
 !23 = !{i32 2, !"Dwarf Version", i32 2}
 !24 = !{i32 1, !"Debug Info Version", i32 3}
 !25 = !{!"clang version 3.5.0 "}
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !27)
+!26 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !27)
 !27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !28 = !DILocation(line: 0, scope: !17)
 !29 = !DILocation(line: 8, scope: !17)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !27)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !27)
 !31 = !DILocation(line: 0, scope: !18)
 !32 = !DILocation(line: 6, scope: !18)
 !33 = !DILocation(line: 8, scope: !18)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 11, scope: !19, file: !20, type: !"_ZTS1A")
+!34 = !DILocalVariable(name: "a", line: 11, scope: !19, file: !20, type: !"_ZTS1A")
 !35 = !DILocation(line: 11, scope: !19)
 !36 = !DILocation(line: 12, scope: !19)
diff --git a/test/DebugInfo/X86/DW_AT_location-reference.ll b/test/DebugInfo/X86/DW_AT_location-reference.ll
index 96d1e5099574..593611d76bc4 100644
--- a/test/DebugInfo/X86/DW_AT_location-reference.ll
+++ b/test/DebugInfo/X86/DW_AT_location-reference.ll
@@ -60,7 +60,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 @a = external global i32
 
-define void @f() nounwind {
+define void @f() nounwind !dbg !0 {
 entry:
   %call = tail call i32 @g(i32 0, i32 0) nounwind, !dbg !8
   store i32 %call, i32* @a, align 4, !dbg !8
@@ -102,12 +102,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!24}
 
-!0 = !DISubprogram(name: "f", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !23, scope: !1, type: !3, function: void ()* @f, variables: !22)
+!0 = distinct !DISubprogram(name: "f", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !23, scope: !1, type: !3, variables: !22)
 !1 = !DIFile(filename: "simple.c", directory: "/home/rengol01/temp/tests/dwarf/relocation")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !{}, retainedTypes: !{}, subprograms: !21, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk)", isOptimized: true, emissionKind: 1, file: !23, enums: !{}, retainedTypes: !{}, subprograms: !21, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 5, scope: !6, file: !1, type: !7)
+!5 = !DILocalVariable(name: "x", line: 5, scope: !6, file: !1, type: !7)
 !6 = distinct !DILexicalBlock(line: 4, column: 14, file: !23, scope: !0)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DILocation(line: 6, column: 3, scope: !6)
diff --git a/test/DebugInfo/X86/DW_AT_object_pointer.ll b/test/DebugInfo/X86/DW_AT_object_pointer.ll
index 86d67cae6b0d..4e6c7c83d1f4 100644
--- a/test/DebugInfo/X86/DW_AT_object_pointer.ll
+++ b/test/DebugInfo/X86/DW_AT_object_pointer.ll
@@ -12,7 +12,7 @@
 
 %class.A = type { i32 }
 
-define i32 @_Z3fooi(i32) nounwind uwtable ssp {
+define i32 @_Z3fooi(i32) nounwind uwtable ssp !dbg !5 {
 entry:
   %.addr = alloca i32, align 4
   %a = alloca %class.A, align 4
@@ -27,7 +27,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 !dbg !10 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -37,7 +37,7 @@ entry:
   ret void, !dbg !29
 }
 
-define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 {
+define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr nounwind uwtable ssp align 2 !dbg !20 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -51,15 +51,15 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 163586) (llvm/trunk 163570)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 163586) (llvm/trunk 163570)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5, !10, !20}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !6, scope: !6, type: !7, function: i32 (i32)* @_Z3fooi, variables: !1)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !6, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "bar.cpp", directory: "/Users/echristo/debug-tests")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, function: void (%class.A*)* @_ZN1AC1Ev, declaration: !17, variables: !1)
+!10 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, declaration: !17, variables: !1)
 !11 = !DISubroutineType(types: !12)
 !12 = !{null, !13}
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !14)
@@ -67,22 +67,22 @@ entry:
 !15 = !{!16, !17}
 !16 = !DIDerivedType(tag: DW_TAG_member, name: "m_a", line: 4, size: 32, align: 32, file: !37, scope: !14, baseType: !9)
 !17 = !DISubprogram(name: "A", line: 3, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !14, type: !11)
-!20 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !17, variables: !1)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !22, file: !6, type: !14)
+!20 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !11, declaration: !17, variables: !1)
+!21 = !DILocalVariable(name: "a", line: 8, scope: !22, file: !6, type: !14)
 !22 = distinct !DILexicalBlock(line: 7, column: 11, file: !6, scope: !5)
 !23 = !DILocation(line: 8, column: 5, scope: !22)
 !24 = !DILocation(line: 8, column: 6, scope: !22)
 !25 = !DILocation(line: 9, column: 3, scope: !22)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, file: !6, type: !27)
+!26 = !DILocalVariable(name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, file: !6, type: !27)
 !27 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
 !28 = !DILocation(line: 3, column: 3, scope: !10)
 !29 = !DILocation(line: 3, column: 18, scope: !10)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, file: !6, type: !27)
+!30 = !DILocalVariable(name: "this", line: 3, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, file: !6, type: !27)
 !31 = !DILocation(line: 3, column: 3, scope: !20)
 !32 = !DILocation(line: 3, column: 9, scope: !33)
 !33 = distinct !DILexicalBlock(line: 3, column: 7, file: !6, scope: !20)
 !34 = !DILocation(line: 3, column: 18, scope: !33)
 !35 = !DILocation(line: 7, scope: !5)
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 7, arg: 1, scope: !5, file: !6, type: !9)
+!36 = !DILocalVariable(name: "", line: 7, arg: 1, scope: !5, file: !6, type: !9)
 !37 = !DIFile(filename: "bar.cpp", directory: "/Users/echristo/debug-tests")
 !38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/DW_AT_specification.ll b/test/DebugInfo/X86/DW_AT_specification.ll
index 993e623c7511..2b8345b66b71 100644
--- a/test/DebugInfo/X86/DW_AT_specification.ll
+++ b/test/DebugInfo/X86/DW_AT_specification.ll
@@ -12,7 +12,7 @@
 
 @_ZZN3foo3barEvE1x = constant i32 0, align 4
 
-define void @_ZN3foo3barEv()  {
+define void @_ZN3foo3barEv()  !dbg !5 {
 entry:
   ret void, !dbg !25
 }
@@ -20,10 +20,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!28}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !27, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !27, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: null, type: !7, function: void ()* @_ZN3foo3barEv, declaration: !11)
+!5 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: null, type: !7, declaration: !11)
 !6 = !DIFile(filename: "nsNativeAppSupportBase.ii", directory: "/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/toolkit/library")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
diff --git a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
index c8e71a26b8fe..988a2b7daf4f 100644
--- a/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
+++ b/test/DebugInfo/X86/DW_AT_stmt_list_sec_offset.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "i686-pc-win32"
 
 ; Function Attrs: nounwind
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -30,11 +30,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "C:CProjects")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/X86/DW_TAG_friend.ll b/test/DebugInfo/X86/DW_TAG_friend.ll
index 8237c4086d31..8681f33e7e28 100644
--- a/test/DebugInfo/X86/DW_TAG_friend.ll
+++ b/test/DebugInfo/X86/DW_TAG_friend.ll
@@ -18,7 +18,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 153413) (llvm/trunk 153428)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 153413) (llvm/trunk 153428)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5, !17}
 !5 = !DIGlobalVariable(name: "a", line: 10, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/InlinedFnLocalVar.ll b/test/DebugInfo/X86/InlinedFnLocalVar.ll
index 74de8b6e6a92..a262fc8e88ca 100644
--- a/test/DebugInfo/X86/InlinedFnLocalVar.ll
+++ b/test/DebugInfo/X86/InlinedFnLocalVar.ll
@@ -12,7 +12,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !6 {
 entry:
   %0 = load i32, i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
   tail call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !109, metadata !DIExpression()), !dbg !19
@@ -25,20 +25,20 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!28}
 
-!0 = !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
+!0 = distinct !DISubprogram(name: "foo", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 9, file: !27, scope: !1, type: !3, variables: !24)
 !1 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !27, enums: !20, retainedTypes: !20, subprograms: !25, globals: !26, imports:  !20)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7, function: i32 ()* @bar)
+!6 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !27, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!5}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!9 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
-!109 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 9, arg: 0, scope: !0, file: !1, type: !5)
-!110 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 10, scope: !11, file: !1, type: !12)
+!109 = !DILocalVariable(name: "j", line: 9, arg: 1, scope: !0, file: !1, type: !5)
+!110 = !DILocalVariable(name: "xyz", line: 10, scope: !11, file: !1, type: !12)
 
 !11 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !0)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 10, size: 64, align: 32, file: !27, scope: !0, elements: !13)
diff --git a/test/DebugInfo/X86/aligned_stack_var.ll b/test/DebugInfo/X86/aligned_stack_var.ll
index 560125d6f40d..02a51f346ee7 100644
--- a/test/DebugInfo/X86/aligned_stack_var.ll
+++ b/test/DebugInfo/X86/aligned_stack_var.ll
@@ -15,7 +15,7 @@
 ; CHECK-NOT: {{DW_AT_location.*DW_FORM_block1.*0x.*91}}
 ; CHECK: NULL
 
-define void @_Z3runv() nounwind uwtable {
+define void @_Z3runv() nounwind uwtable !dbg !5 {
 entry:
   %x = alloca i32, align 32
   call void @llvm.dbg.declare(metadata i32* %x, metadata !9, metadata !DIExpression()), !dbg !12
@@ -27,14 +27,14 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", isOptimized: false, emissionKind: 0, file: !14, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 155696:155697) (llvm/trunk 155696)", isOptimized: false, emissionKind: 0, file: !14, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "run", linkageName: "_Z3runv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !14, scope: !6, type: !7, function: void ()* @_Z3runv, variables: !1)
+!5 = distinct !DISubprogram(name: "run", linkageName: "_Z3runv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !14, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "test.cc", directory: "/home/samsonov/debuginfo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 2, scope: !10, file: !6, type: !11)
+!9 = !DILocalVariable(name: "x", line: 2, scope: !10, file: !6, type: !11)
 !10 = distinct !DILexicalBlock(line: 1, column: 12, file: !14, scope: !5)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !DILocation(line: 2, column: 7, scope: !10)
diff --git a/test/DebugInfo/X86/arange-and-stub.ll b/test/DebugInfo/X86/arange-and-stub.ll
index bbc6de5e573c..39727753b8f9 100644
--- a/test/DebugInfo/X86/arange-and-stub.ll
+++ b/test/DebugInfo/X86/arange-and-stub.ll
@@ -1,10 +1,10 @@
 ; RUN: llc -generate-arange-section -relocation-model=pic < %s | FileCheck %s
 
-; CHECK:   .section        .data.rel,"aw",@progbits
+; CHECK:   .data
 ; CHECK-NOT: .section
 ; CHECK: .L_ZTId.DW.stub:
 
-; CHECK:  .section        .data.rel,"aw",@progbits
+; CHECK:  .data
 ; CHECK-NEXT: .Lsec_end0:
 
 target triple = "x86_64-linux-gnu"
@@ -12,11 +12,11 @@ target triple = "x86_64-linux-gnu"
 @_ZTId = external constant i8*
 @zed = global [1 x void ()*] [void ()* @bar]
 
-define void @foo() {
+define void @foo() !dbg !4 {
   ret void
 }
 
-define void @bar() personality i8* bitcast (void ()* @foo to i8*) {
+define void @bar() personality i8* bitcast (void ()* @foo to i8*) !dbg !9 {
   invoke void @foo()
           to label %invoke.cont unwind label %lpad
 
@@ -32,16 +32,16 @@ lpad:                                             ; preds = %0
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17, !18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234308) (llvm/trunk 234310)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234308) (llvm/trunk 234310)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !10, imports: !2)
 !1 = !DIFile(filename: "/Users/espindola/llvm/<stdin>", directory: "/Users/espindola/llvm/build")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", linkageName: "foo", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DIFile(filename: "/Users/espindola/llvm/test.cpp", directory: "/Users/espindola/llvm/build")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar_d", linkageName: "bar", scope: !5, file: !5, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar_d", linkageName: "bar", scope: !5, file: !5, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !10 = !{!11}
 !11 = !DIGlobalVariable(name: "zed", scope: !0, file: !5, line: 6, type: !12, isLocal: false, isDefinition: true, variable: [1 x void ()*]* @zed)
 !12 = !DICompositeType(tag: DW_TAG_array_type, baseType: !13, size: 64, align: 64, elements: !15)
diff --git a/test/DebugInfo/X86/arange.ll b/test/DebugInfo/X86/arange.ll
index 0b0c2a0b3ee7..f88cdb3a90ad 100644
--- a/test/DebugInfo/X86/arange.ll
+++ b/test/DebugInfo/X86/arange.ll
@@ -29,7 +29,7 @@
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !9, imports: !2)
 !1 = !DIFile(filename: "simple.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/X86/arguments.ll b/test/DebugInfo/X86/arguments.ll
index 0af119b241fc..8b3902765bbd 100644
--- a/test/DebugInfo/X86/arguments.ll
+++ b/test/DebugInfo/X86/arguments.ll
@@ -29,7 +29,7 @@
 %struct.foo = type { i32 }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 {
+define void @_Z4func3fooS_(%struct.foo* %f, %struct.foo* %g) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.declare(metadata %struct.foo* %f, metadata !19, metadata !DIExpression()), !dbg !20
   call void @llvm.dbg.declare(metadata %struct.foo* %g, metadata !21, metadata !DIExpression()), !dbg !20
@@ -49,11 +49,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!24}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4func3fooS_", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%struct.foo*, %struct.foo*)* @_Z4func3fooS_, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4func3fooS_", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "scratch.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8, !8}
@@ -67,9 +67,9 @@ attributes #1 = { nounwind readnone }
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
 !16 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !17)
 !17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 6, arg: 1, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "f", line: 6, arg: 1, scope: !4, file: !5, type: !8)
 !20 = !DILocation(line: 6, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "g", line: 6, arg: 2, scope: !4, file: !5, type: !8)
+!21 = !DILocalVariable(name: "g", line: 6, arg: 2, scope: !4, file: !5, type: !8)
 !22 = !DILocation(line: 7, scope: !4)
 !23 = !DILocation(line: 8, scope: !4)
 !24 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/array.ll b/test/DebugInfo/X86/array.ll
index 8da2079a0530..4ffe0a710b85 100644
--- a/test/DebugInfo/X86/array.ll
+++ b/test/DebugInfo/X86/array.ll
@@ -16,32 +16,34 @@
 ; Test that we only emit register-indirect locations for the array array.
 ; rdar://problem/14874886
 ;
-; CHECK:     ##DEBUG_VALUE: main:array <- [R{{.*}}+0]
-; CHECK-NOT: ##DEBUG_VALUE: main:array <- R{{.*}}
+; CHECK:     ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK:     ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK:     ##DEBUG_VALUE: main:array <- [%R{{.*}}+0]
+; CHECK-NOT: ##DEBUG_VALUE: main:array <- %R{{.*}}
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 @main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
 
 ; Function Attrs: nounwind ssp uwtable
-define void @f(i32* nocapture %p) #0 {
+define void @f(i32* nocapture %p) #0 !dbg !4 {
   tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !DIExpression()), !dbg !28
   store i32 42, i32* %p, align 4, !dbg !29, !tbaa !30
   ret void, !dbg !34
 }
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 !dbg !12 {
   %array = alloca [4 x i32], align 16
   tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !19, metadata !DIExpression()), !dbg !35
   tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !20, metadata !DIExpression()), !dbg !35
-  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
   %1 = bitcast [4 x i32]* %array to i8*, !dbg !36
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* bitcast ([4 x i32]* @main.array to i8*), i64 16, i32 16, i1 false), !dbg !36
-  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
   %2 = getelementptr inbounds [4 x i32], [4 x i32]* %array, i64 0, i64 0, !dbg !37
   call void @f(i32* %2), !dbg !37
-  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression()), !dbg !36
+  tail call void @llvm.dbg.value(metadata [4 x i32]* %array, i64 0, metadata !21, metadata !DIExpression(DW_OP_deref)), !dbg !36
   %3 = load i32, i32* %2, align 16, !dbg !38, !tbaa !30
   ret i32 %3, !dbg !38
 }
@@ -60,28 +62,28 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!25, !26}
 !llvm.ident = !{!27}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "array.c", directory: "")
 !2 = !{}
 !3 = !{!4, !12}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @f, variables: !10)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
 !5 = !DIFile(filename: "array.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!12 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !13, function: i32 (i32, i8**)* @main, variables: !18)
+!11 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !13, variables: !18)
 !13 = !DISubroutineType(types: !14)
 !14 = !{!9, !9, !15}
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
 !16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !17)
 !17 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !18 = !{!19, !20, !21}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 5, arg: 1, scope: !12, file: !5, type: !9)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 5, arg: 2, scope: !12, file: !5, type: !15)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array", line: 6, scope: !12, file: !5, type: !22)
+!19 = !DILocalVariable(name: "argc", line: 5, arg: 1, scope: !12, file: !5, type: !9)
+!20 = !DILocalVariable(name: "argv", line: 5, arg: 2, scope: !12, file: !5, type: !15)
+!21 = !DILocalVariable(name: "array", line: 6, scope: !12, file: !5, type: !22)
 !22 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, baseType: !9, elements: !23)
 !23 = !{!24}
 !24 = !DISubrange(count: 4)
diff --git a/test/DebugInfo/X86/array2.ll b/test/DebugInfo/X86/array2.ll
index e08c52640b3f..f456aae2e250 100644
--- a/test/DebugInfo/X86/array2.ll
+++ b/test/DebugInfo/X86/array2.ll
@@ -17,7 +17,7 @@
 ;
 ; CHECK: define i32 @main
 ; CHECK: call void @llvm.dbg.value(metadata i32 42, i64 0, metadata ![[ARRAY:[0-9]+]], metadata ![[EXPR:[0-9]+]])
-; CHECK: ![[ARRAY]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array",{{.*}} line: 6
+; CHECK: ![[ARRAY]] = !DILocalVariable(name: "array",{{.*}} line: 6
 ; CHECK: ![[EXPR]] = !DIExpression(DW_OP_bit_piece, 0, 32)
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -25,7 +25,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 @main.array = private unnamed_addr constant [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16
 
 ; Function Attrs: nounwind ssp uwtable
-define void @f(i32* %p) #0 {
+define void @f(i32* %p) #0 !dbg !4 {
 entry:
   %p.addr = alloca i32*, align 8
   store i32* %p, i32** %p.addr, align 8
@@ -40,7 +40,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** %argv) #0 {
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !10 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
@@ -72,17 +72,17 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "array.c", directory: "")
 !2 = !{}
 !3 = !{!4, !10}
-!4 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "array.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !11, function: i32 (i32, i8**)* @main, variables: !2)
+!10 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!9, !9, !13}
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
@@ -91,14 +91,14 @@ attributes #2 = { nounwind }
 !16 = !{i32 2, !"Dwarf Version", i32 2}
 !17 = !{i32 1, !"Debug Info Version", i32 3}
 !18 = !{!"clang version 3.5.0 "}
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!19 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !20 = !DILocation(line: 1, scope: !4)
 !21 = !DILocation(line: 2, scope: !4)
 !22 = !DILocation(line: 3, scope: !4)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 5, arg: 1, scope: !10, file: !5, type: !9)
+!23 = !DILocalVariable(name: "argc", line: 5, arg: 1, scope: !10, file: !5, type: !9)
 !24 = !DILocation(line: 5, scope: !10)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 5, arg: 2, scope: !10, file: !5, type: !13)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "array", line: 6, scope: !10, file: !5, type: !27)
+!25 = !DILocalVariable(name: "argv", line: 5, arg: 2, scope: !10, file: !5, type: !13)
+!26 = !DILocalVariable(name: "array", line: 6, scope: !10, file: !5, type: !27)
 !27 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 32, baseType: !9, elements: !28)
 !28 = !{!29}
 !29 = !DISubrange(count: 4)
diff --git a/test/DebugInfo/X86/bbjoin.ll b/test/DebugInfo/X86/bbjoin.ll
new file mode 100644
index 000000000000..8061a8d2ce9a
--- /dev/null
+++ b/test/DebugInfo/X86/bbjoin.ll
@@ -0,0 +1,101 @@
+; RUN: llc -mtriple=x86_64-apple-macosx10.9.0 %s -stop-after=livedebugvars \
+; RUN:   -o %t.s | FileCheck %s
+; Generated from:
+; void g(int *);
+; int f() {
+;   int x = 23;
+;   g(&x);
+;   if (x == 42)
+;     ++x;
+;   return x; // check that x is not a constant here.
+; }
+; CHECK: ![[X:.*]] = !DILocalVariable(name: "x",
+; CHECK: bb.0.entry:
+; CHECK:   DBG_VALUE 23, 0, ![[X]],
+; CHECK:   DBG_VALUE debug-use %rdi, debug-use _, ![[X]]
+; CHECK: bb.1.if.then:
+; CHECK:   DBG_VALUE debug-use %rdi, debug-use _, ![[X]],
+; CHECK: bb.2.if.end:
+; CHECK-NOT:  DBG_VALUE 23, 0, ![[X]],
+; CHECK:   RETQ %eax
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @f() #0 !dbg !4 {
+entry:
+  %x = alloca i32, align 4
+  %0 = bitcast i32* %x to i8*, !dbg !14
+  call void @llvm.lifetime.start(i64 4, i8* %0) #4, !dbg !14
+  tail call void @llvm.dbg.value(metadata i32 23, i64 0, metadata !9, metadata !15), !dbg !16
+  store i32 23, i32* %x, align 4, !dbg !16, !tbaa !17
+  tail call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+  call void @g(i32* nonnull %x) #4, !dbg !21
+  call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+  %1 = load i32, i32* %x, align 4, !dbg !22, !tbaa !17
+  %cmp = icmp eq i32 %1, 42, !dbg !24
+  br i1 %cmp, label %if.then, label %if.end, !dbg !25
+
+if.then:                                          ; preds = %entry
+  call void @llvm.dbg.value(metadata i32 43, i64 0, metadata !9, metadata !15), !dbg !16
+  store i32 43, i32* %x, align 4, !dbg !26, !tbaa !17
+  br label %if.end, !dbg !26
+
+if.end:                                           ; preds = %if.then, %entry
+  %2 = phi i32 [ 43, %if.then ], [ %1, %entry ], !dbg !27
+  call void @llvm.dbg.value(metadata i32* %x, i64 0, metadata !9, metadata !15), !dbg !16
+  call void @llvm.lifetime.end(i64 4, i8* %0) #4, !dbg !28
+  ret i32 %2, !dbg !29
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @g(i32*) #4
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { argmemonly nounwind }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255890) (llvm/trunk 255919)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "constant.c", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 3, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 2}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"PIC Level", i32 2}
+!13 = !{!"clang version 3.8.0 (trunk 255890) (llvm/trunk 255919)"}
+!14 = !DILocation(line: 3, column: 3, scope: !4)
+!15 = !DIExpression()
+!16 = !DILocation(line: 3, column: 7, scope: !4)
+!17 = !{!18, !18, i64 0}
+!18 = !{!"int", !19, i64 0}
+!19 = !{!"omnipotent char", !20, i64 0}
+!20 = !{!"Simple C/C++ TBAA"}
+!21 = !DILocation(line: 4, column: 3, scope: !4)
+!22 = !DILocation(line: 5, column: 7, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 7)
+!24 = !DILocation(line: 5, column: 9, scope: !23)
+!25 = !DILocation(line: 5, column: 7, scope: !4)
+!26 = !DILocation(line: 6, column: 5, scope: !23)
+!27 = !DILocation(line: 7, column: 10, scope: !4)
+!28 = !DILocation(line: 8, column: 1, scope: !4)
+!29 = !DILocation(line: 7, column: 3, scope: !4)
diff --git a/test/DebugInfo/X86/bitfields.ll b/test/DebugInfo/X86/bitfields.ll
index e895fd67e03d..e0fa4f6d9980 100644
--- a/test/DebugInfo/X86/bitfields.ll
+++ b/test/DebugInfo/X86/bitfields.ll
@@ -54,7 +54,7 @@ target triple = "x86_64-apple-macosx"
 !llvm.module.flags = !{!13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240548) (llvm/trunk 240554)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "bitfields.c", directory: "/")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/X86/block-capture.ll b/test/DebugInfo/X86/block-capture.ll
index fe026d656bb6..640973ed2a68 100644
--- a/test/DebugInfo/X86/block-capture.ll
+++ b/test/DebugInfo/X86/block-capture.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-apple-darwin"
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
-define internal void @__foo_block_invoke(i8* %.block_descriptor) #2 {
+define internal void @__foo_block_invoke(i8* %.block_descriptor) #2 !dbg !8 {
 entry:
   %.block_descriptor.addr = alloca i8*, align 8
   %block.addr = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, void (...)* }>*, align 8
@@ -62,14 +62,14 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!16, !17, !18, !19, !20, !21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.6.0 (trunk 223471)", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.6.0 (trunk 223471)", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.m", directory: "")
 !2 = !{}
 !3 = !{!8}
 !5 = !DIFile(filename: "foo.m", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "__foo_block_invoke", line: 2, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !9, function: void (i8*)* @__foo_block_invoke, variables: !2)
+!8 = distinct !DISubprogram(name: "__foo_block_invoke", line: 2, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !9, variables: !2)
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !11}
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
@@ -102,7 +102,7 @@ attributes #3 = { nounwind }
 !41 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
 !42 = !DIDerivedType(tag: DW_TAG_member, name: "Size", size: 64, align: 64, offset: 64, file: !1, scope: !5, baseType: !41)
 !43 = !DIExpression()
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 2, arg: 1, flags: DIFlagArtificial, scope: !8, file: !5, type: !48)
+!47 = !DILocalVariable(name: ".block_descriptor", line: 2, arg: 1, flags: DIFlagArtificial, scope: !8, file: !5, type: !48)
 !48 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !49)
 !49 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_1", line: 2, size: 320, align: 64, file: !1, scope: !5, elements: !50)
 !50 = !{!51, !52, !53, !54, !56, !65}
@@ -123,7 +123,7 @@ attributes #3 = { nounwind }
 !65 = !DIDerivedType(tag: DW_TAG_member, name: "block", line: 2, size: 64, align: 64, offset: 256, flags: DIFlagPublic, file: !1, scope: !5, baseType: !25)
 !66 = !DILocation(line: 2, column: 20, scope: !8)
 !67 = !DILocation(line: 2, column: 21, scope: !8)
-!68 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "block", line: 2, scope: !8, file: !5, type: !25)
+!68 = !DILocalVariable(name: "block", line: 2, scope: !8, file: !5, type: !25)
 !69 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
 !70 = !DILocation(line: 2, column: 9, scope: !8)
 !71 = !DILocation(line: 2, column: 23, scope: !72)
diff --git a/test/DebugInfo/X86/byvalstruct.ll b/test/DebugInfo/X86/byvalstruct.ll
index dedbfb0200b7..422789ae77b5 100644
--- a/test/DebugInfo/X86/byvalstruct.ll
+++ b/test/DebugInfo/X86/byvalstruct.ll
@@ -58,7 +58,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @llvm.used = appending global [5 x i8*] [i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([32 x i8], [32 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Bitmap" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
 
 ; Function Attrs: ssp uwtable
-define internal i8* @"\01-[Bitmap initWithCopy:andInfo:andLength:]"(%0* %self, i8* %_cmd, %0* %otherBitmap, %struct.ImageInfo* byval align 8 %info, i64 %length) #0 {
+define internal i8* @"\01-[Bitmap initWithCopy:andInfo:andLength:]"(%0* %self, i8* %_cmd, %0* %otherBitmap, %struct.ImageInfo* byval align 8 %info, i64 %length) #0 !dbg !7 {
 entry:
   %retval = alloca i8*, align 8
   %self.addr = alloca %0*, align 8
@@ -87,14 +87,14 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!24, !25, !26, !27, !38}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC_plus_plus, producer: "clang version 3.4 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC_plus_plus, producer: "clang version 3.4 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
 !1 = !DIFile(filename: "t.mm", directory: "")
 !2 = !{}
 !3 = !{!4}
 !4 = !DICompositeType(tag: DW_TAG_structure_type, name: "Bitmap", line: 8, size: 8, align: 8, flags: DIFlagObjcClassComplete, runtimeLang: DW_LANG_ObjC_plus_plus, file: !1, scope: !5, elements: !2)
 !5 = !DIFile(filename: "t.mm", directory: "")
 !6 = !{!7}
-!7 = !DISubprogram(name: "-[Bitmap initWithCopy:andInfo:andLength:]", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !8, function: i8* (%0*, i8*, %0*, %struct.ImageInfo*, i64)* @"\01-[Bitmap initWithCopy:andInfo:andLength:]", variables: !2)
+!7 = distinct !DISubprogram(name: "-[Bitmap initWithCopy:andInfo:andLength:]", line: 9, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !8, variables: !2)
 !8 = !DISubroutineType(types: !9)
 !9 = !{!4, !10, !11, !14, !15, !19}
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !4)
@@ -115,14 +115,14 @@ attributes #1 = { nounwind readnone }
 !25 = !{i32 1, !"Objective-C Image Info Version", i32 0}
 !26 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !27 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !7, file: !5, type: !14)
+!28 = !DILocalVariable(name: "self", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !7, file: !5, type: !14)
 !29 = !DILocation(line: 9, scope: !7)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", line: 9, arg: 2, flags: DIFlagArtificial, scope: !7, file: !5, type: !31)
+!30 = !DILocalVariable(name: "_cmd", line: 9, arg: 2, flags: DIFlagArtificial, scope: !7, file: !5, type: !31)
 !31 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 9, file: !1, baseType: !12)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "otherBitmap", line: 9, arg: 3, scope: !7, file: !5, type: !14)
-!33 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "info", line: 10, arg: 4, scope: !7, file: !5, type: !15)
+!32 = !DILocalVariable(name: "otherBitmap", line: 9, arg: 3, scope: !7, file: !5, type: !14)
+!33 = !DILocalVariable(name: "info", line: 10, arg: 4, scope: !7, file: !5, type: !15)
 !34 = !DILocation(line: 10, scope: !7)
-!35 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "length", line: 11, arg: 5, scope: !7, file: !5, type: !19)
+!35 = !DILocalVariable(name: "length", line: 11, arg: 5, scope: !7, file: !5, type: !19)
 !36 = !DILocation(line: 11, scope: !7)
 !37 = !DILocation(line: 13, scope: !7)
 !38 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/c-type-units.ll b/test/DebugInfo/X86/c-type-units.ll
index 6756bd45f88c..d8321380c535 100644
--- a/test/DebugInfo/X86/c-type-units.ll
+++ b/test/DebugInfo/X86/c-type-units.ll
@@ -17,7 +17,7 @@
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "simple.c", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/X86/coff_debug_info_type.ll b/test/DebugInfo/X86/coff_debug_info_type.ll
index fd86c88baa15..cede74d5dc6a 100644
--- a/test/DebugInfo/X86/coff_debug_info_type.ll
+++ b/test/DebugInfo/X86/coff_debug_info_type.ll
@@ -5,7 +5,9 @@
 ; CHECK:    .section  .apple_names
 ; CHECK:    .section  .apple_types
 
-; RUN: llc -mtriple=i686-pc-win32 -filetype=asm -O0 < %s | FileCheck -check-prefix=WIN32 %s
+; RUN: sed -e 's/"Dwarf Version"/"CodeView"/' %s \
+; RUN:     | llc -mtriple=i686-pc-win32 -filetype=asm -O0 \
+; RUN:     | FileCheck -check-prefix=WIN32 %s
 ; WIN32:    .section .debug$S,"dr"
 
 ; RUN: llc -mtriple=i686-pc-win32 -filetype=null -O0 < %s
@@ -17,7 +19,7 @@
 ; 	return 0;
 ; }
 
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -29,11 +31,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "C:CProjects")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/X86/coff_relative_names.ll b/test/DebugInfo/X86/coff_relative_names.ll
index 6b7864ab3979..7ca4b04a871f 100644
--- a/test/DebugInfo/X86/coff_relative_names.ll
+++ b/test/DebugInfo/X86/coff_relative_names.ll
@@ -11,7 +11,7 @@
 ; }
 
 ; Function Attrs: nounwind
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -23,11 +23,11 @@ attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"=
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "C:\5CProjects")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "C:CProjects")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/X86/concrete_out_of_line.ll b/test/DebugInfo/X86/concrete_out_of_line.ll
index c1e1f750e2cc..d34c1ae37e3f 100644
--- a/test/DebugInfo/X86/concrete_out_of_line.ll
+++ b/test/DebugInfo/X86/concrete_out_of_line.ll
@@ -18,12 +18,14 @@
 ; CHECK:     DW_AT_name {{.*}} "~nsAutoRefCnt"
 
 ; CHECK: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_decl_line {{.*}}18
 ; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D2
 ; CHECK-NEXT:     DW_AT_specification {{.*}} "~nsAutoRefCnt"
 ; CHECK-NEXT:     DW_AT_inline
 ; CHECK-NOT:      DW_AT
 ; CHECK: DW_TAG
 ; CHECK: DW_TAG_subprogram
+; CHECK-NEXT:     DW_AT_decl_line {{.*}}18
 ; CHECK-NEXT:     DW_AT_{{.*}}linkage_name {{.*}}D1
 ; CHECK-NEXT:     DW_AT_specification {{.*}} "~nsAutoRefCnt"
 ; CHECK-NEXT:     DW_AT_inline
@@ -58,14 +60,14 @@
 ; CHECK-NEXT: DW_AT_abstract_origin {{.*}} "_ZN17nsAutoRefCntD2Ev"
 
 
-define i32 @_ZN17nsAutoRefCnt7ReleaseEv() {
+define i32 @_ZN17nsAutoRefCnt7ReleaseEv() !dbg !5 {
 entry:
   store i32 1, i32* null, align 4, !dbg !50
   tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !54
   ret i32 0
 }
 
-define void @_ZN17nsAutoRefCntD1Ev() {
+define void @_ZN17nsAutoRefCntD1Ev() !dbg !23 {
 entry:
   tail call void @_Z8moz_freePv(i8* null) nounwind, !dbg !57
   ret void
@@ -76,10 +78,10 @@ declare void @_Z8moz_freePv(i8*)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!60}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 ()", isOptimized: true, emissionKind: 0, file: !59, enums: !1, retainedTypes: !1, subprograms: !3, globals: !47, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 ()", isOptimized: true, emissionKind: 0, file: !59, enums: !1, retainedTypes: !1, subprograms: !3, globals: !47, imports:  !1)
 !1 = !{}
 !3 = !{!5, !23, !27, !31}
-!5 = !DISubprogram(name: "Release", linkageName: "_ZN17nsAutoRefCnt7ReleaseEv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !6, scope: null, type: !7, function: i32 ()* @_ZN17nsAutoRefCnt7ReleaseEv , declaration: !12, variables: !20)
+!5 = distinct !DISubprogram(name: "Release", linkageName: "_ZN17nsAutoRefCnt7ReleaseEv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !6, scope: null, type: !7 , declaration: !12, variables: !20)
 !6 = !DIFile(filename: "nsAutoRefCnt.ii", directory: "/Users/espindola/mozilla-central/obj-x86_64-apple-darwin11.2.0/netwerk/base/src")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !10}
@@ -94,14 +96,14 @@ declare void @_Z8moz_freePv(i8*)
 !17 = !{null, !10}
 !18 = !{}
 !20 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !10)
-!23 = !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD1Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: void ()* @_ZN17nsAutoRefCntD1Ev, declaration: !15, variables: !24)
+!22 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !10)
+!23 = distinct !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD1Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, declaration: !15, variables: !24)
 !24 = !{!26}
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !23, file: !6, type: !10)
-!27 = !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, function: i32 ()* null, declaration: !15, variables: !28)
+!26 = !DILocalVariable(name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !23, file: !6, type: !10)
+!27 = distinct !DISubprogram(name: "~nsAutoRefCnt", linkageName: "_ZN17nsAutoRefCntD2Ev", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 18, file: !6, scope: null, type: !16, declaration: !15, variables: !28)
 !28 = !{!30}
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !10)
-!31 = !DISubprogram(name: "operator=", linkageName: "_ZN12nsAutoRefCntaSEi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: null, type: !32, declaration: !36, variables: !43)
+!30 = !DILocalVariable(name: "this", line: 18, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !10)
+!31 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN12nsAutoRefCntaSEi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !6, scope: null, type: !32, declaration: !36, variables: !43)
 !32 = !DISubroutineType(types: !33)
 !33 = !{!9, !34, !9}
 !34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !35)
@@ -114,8 +116,8 @@ declare void @_Z8moz_freePv(i8*)
 !41 = !DISubroutineType(types: !42)
 !42 = !{null, !34}
 !43 = !{!45, !46}
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 4, arg: 1, flags: DIFlagArtificial, scope: !31, file: !6, type: !34)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "aValue", line: 4, arg: 2, scope: !31, file: !6, type: !9)
+!45 = !DILocalVariable(name: "this", line: 4, arg: 1, flags: DIFlagArtificial, scope: !31, file: !6, type: !34)
+!46 = !DILocalVariable(name: "aValue", line: 4, arg: 2, scope: !31, file: !6, type: !9)
 !47 = !{!49}
 !49 = !DIGlobalVariable(name: "mRefCnt", line: 9, isLocal: false, isDefinition: true, scope: null, file: !6, type: !37, variable: i32* null)
 !50 = !DILocation(line: 5, column: 5, scope: !51, inlinedAt: !52)
diff --git a/test/DebugInfo/X86/constant-aggregate.ll b/test/DebugInfo/X86/constant-aggregate.ll
index daec3b4e7d66..35bf9a516807 100644
--- a/test/DebugInfo/X86/constant-aggregate.ll
+++ b/test/DebugInfo/X86/constant-aggregate.ll
@@ -40,7 +40,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3foo1S(i32 %s.coerce) #0 {
+define i32 @_Z3foo1S(i32 %s.coerce) #0 !dbg !12 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %s.coerce, i64 0, metadata !18, metadata !37), !dbg !38
   tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !37), !dbg !38
@@ -48,7 +48,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3foo1C(i32 %c.coerce) #0 {
+define i32 @_Z3foo1C(i32 %c.coerce) #0 !dbg !19 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %c.coerce, i64 0, metadata !23, metadata !37), !dbg !40
   tail call void @llvm.dbg.value(metadata i32 2, i64 0, metadata !23, metadata !37), !dbg !40
@@ -56,7 +56,7 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone ssp uwtable
-define i32 @_Z3barv() #0 {
+define i32 @_Z3barv() #0 !dbg !24 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !28, metadata !37), !dbg !42
   ret i32 3, !dbg !43
@@ -72,7 +72,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!33, !34, !35}
 !llvm.ident = !{!36}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 225364) (llvm/trunk 225366)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
 !1 = !DIFile(filename: "sroasplit-4.cpp", directory: "")
 !2 = !{}
 !3 = !{!4, !8}
@@ -84,23 +84,23 @@ attributes #1 = { nounwind readnone }
 !9 = !{!10}
 !10 = !DIDerivedType(tag: DW_TAG_member, name: "i", line: 8, size: 32, align: 32, flags: DIFlagPublic, file: !1, scope: !"_ZTS1C", baseType: !7)
 !11 = !{!12, !19, !24}
-!12 = !DISubprogram(name: "foo", linkageName: "_Z3foo1S", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !13, type: !14, function: i32 (i32)* @_Z3foo1S, variables: !17)
+!12 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo1S", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !13, type: !14, variables: !17)
 !13 = !DIFile(filename: "sroasplit-4.cpp", directory: "")
 !14 = !DISubroutineType(types: !15)
 !15 = !{!7, !16}
 !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "S", line: 1, file: !1, baseType: !"_ZTS1S")
 !17 = !{!18}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !12, file: !13, type: !16)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3foo1C", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !13, type: !20, function: i32 (i32)* @_Z3foo1C, variables: !22)
+!18 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !12, file: !13, type: !16)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foo1C", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !13, type: !20, variables: !22)
 !20 = !DISubroutineType(types: !21)
 !21 = !{!7, !"_ZTS1C"}
 !22 = !{!23}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 10, arg: 1, scope: !19, file: !13, type: !"_ZTS1C")
-!24 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !1, scope: !13, type: !25, function: i32 ()* @_Z3barv, variables: !27)
+!23 = !DILocalVariable(name: "c", line: 10, arg: 1, scope: !19, file: !13, type: !"_ZTS1C")
+!24 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 15, file: !1, scope: !13, type: !25, variables: !27)
 !25 = !DISubroutineType(types: !26)
 !26 = !{!7}
 !27 = !{!28}
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 16, scope: !24, file: !13, type: !29)
+!28 = !DILocalVariable(name: "a", line: 16, scope: !24, file: !13, type: !29)
 !29 = !DICompositeType(tag: DW_TAG_array_type, size: 32, align: 32, baseType: !30, elements: !31)
 !30 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !31 = !{!32}
diff --git a/test/DebugInfo/X86/cu-ranges-odr.ll b/test/DebugInfo/X86/cu-ranges-odr.ll
index 623e2fdd47a5..67da59a0e4bb 100644
--- a/test/DebugInfo/X86/cu-ranges-odr.ll
+++ b/test/DebugInfo/X86/cu-ranges-odr.ll
@@ -23,14 +23,14 @@
 @a = global %class.A zeroinitializer, align 4
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
 
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !14 {
 entry:
   call void @_ZN1AC2Ei(%class.A* @a, i32 0), !dbg !26
   ret void, !dbg !26
 }
 
 ; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 !dbg !18 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %i.addr = alloca i32, align 4
@@ -48,7 +48,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-define internal void @_GLOBAL__I_a() section ".text.startup" {
+define internal void @_GLOBAL__I_a() section ".text.startup" !dbg !19 {
 entry:
   call void @__cxx_global_var_init(), !dbg !32
   ret void, !dbg !32
@@ -61,7 +61,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!23, !24}
 !llvm.ident = !{!25}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 (trunk 199923) (llvm/trunk 199940)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !21, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 (trunk 199923) (llvm/trunk 199940)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !13, globals: !21, imports: !2)
 !1 = !DIFile(filename: "baz.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4}
@@ -74,12 +74,12 @@ attributes #1 = { nounwind readnone }
 !10 = !{null, !11, !7}
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !13 = !{!14, !18, !19}
-!14 = !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !15, type: !16, function: void ()* @__cxx_global_var_init, variables: !2)
+!14 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !15, type: !16, variables: !2)
 !15 = !DIFile(filename: "baz.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null}
-!18 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9, function: void (%class.A*, i32)* @_ZN1AC2Ei, declaration: !8, variables: !2)
-!19 = !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 3, file: !1, scope: !15, type: !20, function: void ()* @_GLOBAL__I_a, variables: !2)
+!18 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !"_ZTS1A", type: !9, declaration: !8, variables: !2)
+!19 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 3, file: !1, scope: !15, type: !20, variables: !2)
 !20 = !DISubroutineType(types: !2)
 !21 = !{!22}
 !22 = !DIGlobalVariable(name: "a", line: 8, isLocal: false, isDefinition: true, scope: null, file: !15, type: !4, variable: %class.A* @a)
@@ -87,9 +87,9 @@ attributes #1 = { nounwind readnone }
 !24 = !{i32 1, !"Debug Info Version", i32 3}
 !25 = !{!"clang version 3.5 (trunk 199923) (llvm/trunk 199940)"}
 !26 = !DILocation(line: 8, scope: !14)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !28)
+!27 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !18, type: !28)
 !28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !29 = !DILocation(line: 0, scope: !18)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 3, arg: 2, scope: !18, file: !15, type: !7)
+!30 = !DILocalVariable(name: "i", line: 3, arg: 2, scope: !18, file: !15, type: !7)
 !31 = !DILocation(line: 3, scope: !18)
 !32 = !DILocation(line: 3, scope: !19)
diff --git a/test/DebugInfo/X86/cu-ranges.ll b/test/DebugInfo/X86/cu-ranges.ll
index 5611a31cd5fb..543684596d9e 100644
--- a/test/DebugInfo/X86/cu-ranges.ll
+++ b/test/DebugInfo/X86/cu-ranges.ll
@@ -25,7 +25,7 @@
 ; NO-FUNCTION-SECTIONS-NOT: DW_AT_ranges
 
 ; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -39,7 +39,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !9 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
@@ -56,20 +56,20 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!13 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !14 = !DILocation(line: 1, scope: !4)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 2, arg: 1, scope: !9, file: !5, type: !8)
+!15 = !DILocalVariable(name: "b", line: 2, arg: 1, scope: !9, file: !5, type: !8)
 !16 = !DILocation(line: 2, scope: !9)
diff --git a/test/DebugInfo/X86/data_member_location.ll b/test/DebugInfo/X86/data_member_location.ll
index c17c45bcc696..5b33096864bb 100644
--- a/test/DebugInfo/X86/data_member_location.ll
+++ b/test/DebugInfo/X86/data_member_location.ll
@@ -34,7 +34,7 @@
 !llvm.module.flags = !{!13, !15}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
 !1 = !DIFile(filename: "data_member_location.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/X86/dbg-byval-parameter.ll b/test/DebugInfo/X86/dbg-byval-parameter.ll
index e07109e08f93..c80f8d90ca89 100644
--- a/test/DebugInfo/X86/dbg-byval-parameter.ll
+++ b/test/DebugInfo/X86/dbg-byval-parameter.ll
@@ -4,7 +4,7 @@
 %struct.Pt = type { double, double }
 %struct.Rect = type { %struct.Pt, %struct.Pt }
 
-define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp !dbg !1 {
 entry:
   %retval = alloca double                         ; <double*> [#uses=2]
   %0 = alloca double                              ; <double*> [#uses=2]
@@ -28,10 +28,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!21}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "my_r0", line: 11, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !2, type: !4, function: double (%struct.Rect*)* @foo)
+!0 = !DILocalVariable(name: "my_r0", line: 11, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !2, type: !4)
 !2 = !DIFile(filename: "b2.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !7}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/DebugInfo/X86/dbg-const-int.ll b/test/DebugInfo/X86/dbg-const-int.ll
index 8482abf373eb..7e90a14c21a2 100644
--- a/test/DebugInfo/X86/dbg-const-int.ll
+++ b/test/DebugInfo/X86/dbg-const-int.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.6.7"
 ; CHECK-NOT: DW_TAG
 ; CHECK:   DW_AT_name {{.*}} "i"
 
-define i32 @foo() nounwind uwtable readnone optsize ssp {
+define i32 @foo() nounwind uwtable readnone optsize ssp !dbg !1 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !DIExpression()), !dbg !9
   ret i32 42, !dbg !10
@@ -21,13 +21,13 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 132191)", isOptimized: true, emissionKind: 0, file: !13, enums: !14, retainedTypes: !14, subprograms: !11, imports:  null)
-!1 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !13, scope: !2, type: !3, function: i32 ()* @foo, variables: !12)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 132191)", isOptimized: true, emissionKind: 0, file: !13, enums: !14, retainedTypes: !14, subprograms: !11, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !13, scope: !2, type: !3, variables: !12)
 !2 = !DIFile(filename: "a.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !7, file: !2, type: !5)
+!6 = !DILocalVariable(name: "i", line: 2, scope: !7, file: !2, type: !5)
 !7 = distinct !DILexicalBlock(line: 1, column: 11, file: !13, scope: !1)
 !8 = !{i32 42}
 !9 = !DILocation(line: 2, column: 12, scope: !7)
diff --git a/test/DebugInfo/X86/dbg-const.ll b/test/DebugInfo/X86/dbg-const.ll
index 27a0eabfe493..6d710c512fd7 100644
--- a/test/DebugInfo/X86/dbg-const.ll
+++ b/test/DebugInfo/X86/dbg-const.ll
@@ -15,7 +15,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 ;CHECK:        ## DW_OP_consts
 ;CHECK-NEXT:  .byte	42
-define i32 @foobar() nounwind readonly noinline ssp {
+define i32 @foobar() nounwind readonly noinline ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !6, metadata !DIExpression()), !dbg !9
   %call = tail call i32 @bar(), !dbg !11
@@ -31,13 +31,13 @@ declare i32 @bar() nounwind readnone
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!17}
 
-!0 = !DISubprogram(name: "foobar", linkageName: "foobar", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !15, scope: !1, type: !3, function: i32 ()* @foobar, variables: !14)
+!0 = distinct !DISubprogram(name: "foobar", linkageName: "foobar", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !15, scope: !1, type: !3, variables: !14)
 !1 = !DIFile(filename: "mu.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114183)", isOptimized: true, emissionKind: 1, file: !15, enums: !16, retainedTypes: !16, subprograms: !13, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 114183)", isOptimized: true, emissionKind: 1, file: !15, enums: !16, retainedTypes: !16, subprograms: !13, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 15, scope: !7, file: !1, type: !5)
+!6 = !DILocalVariable(name: "j", line: 15, scope: !7, file: !1, type: !5)
 !7 = distinct !DILexicalBlock(line: 12, column: 52, file: !15, scope: !0)
 !8 = !{i32 42}
 !9 = !DILocation(line: 15, column: 12, scope: !7)
diff --git a/test/DebugInfo/X86/dbg-declare-arg.ll b/test/DebugInfo/X86/dbg-declare-arg.ll
index 85ff4b920ad1..c35bc8c5f629 100644
--- a/test/DebugInfo/X86/dbg-declare-arg.ll
+++ b/test/DebugInfo/X86/dbg-declare-arg.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-apple-macosx10.6.7"
 
 %class.A = type { i32, i32, i32, i32 }
 
-define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp {
+define void @_Z3fooi(%class.A* sret %agg.result, i32 %i) ssp !dbg !19 {
 entry:
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
@@ -48,7 +48,7 @@ nrvo.skipdtor:                                    ; preds = %nrvo.unused, %if.en
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 {
+define linkonce_odr void @_ZN1AD1Ev(%class.A* %this) unnamed_addr ssp align 2 !dbg !22 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -58,7 +58,7 @@ entry:
   ret void, !dbg !45
 }
 
-define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 {
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr nounwind ssp align 2 !dbg !25 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -74,7 +74,7 @@ entry:
 
 !0 = !DISubprogram(name: "~A", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !1, type: !11)
 !1 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 2, size: 128, align: 32, file: !51, scope: !2, elements: !4)
-!2 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !{}, retainedTypes: !{}, subprograms: !50)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 130127)", isOptimized: false, emissionKind: 1, file: !51, enums: !{}, retainedTypes: !{}, subprograms: !50)
 !3 = !DIFile(filename: "a.cc", directory: "/private/tmp")
 !4 = !{!5, !7, !8, !9, !0, !10, !14}
 !5 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 2, size: 32, align: 32, file: !51, scope: !3, baseType: !6)
@@ -91,16 +91,16 @@ entry:
 !16 = !{null, !13, !17}
 !17 = !DIDerivedType(tag: DW_TAG_reference_type, scope: !2, baseType: !18)
 !18 = !DIDerivedType(tag: DW_TAG_const_type, file: !3, baseType: !1)
-!19 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !20, function: void (%class.A*, i32)* @_Z3fooi)
+!19 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !20)
 !20 = !DISubroutineType(types: !21)
 !21 = !{!1}
-!22 = !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23, function: void (%class.A*)* @_ZN1AD1Ev)
+!22 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23)
 !23 = !DISubroutineType(types: !24)
 !24 = !{null}
-!25 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23, function: void (%class.A*)* @_ZN1AD2Ev)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 1, scope: !19, file: !3, type: !6)
+!25 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !51, scope: !3, type: !23)
+!26 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !19, file: !3, type: !6)
 !27 = !DILocation(line: 4, column: 11, scope: !19)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 5, scope: !29, file: !3, type: !6)
+!28 = !DILocalVariable(name: "j", line: 5, scope: !29, file: !3, type: !6)
 !29 = distinct !DILexicalBlock(line: 4, column: 14, file: !51, scope: !19)
 !30 = !DILocation(line: 5, column: 7, scope: !29)
 !31 = !DILocation(line: 5, column: 12, scope: !29)
@@ -109,16 +109,16 @@ entry:
 !34 = distinct !DILexicalBlock(line: 6, column: 16, file: !51, scope: !29)
 !35 = !DILocation(line: 8, column: 3, scope: !34)
 !36 = !DILocation(line: 9, column: 9, scope: !29)
-!37 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_a", line: 9, scope: !29, file: !3, type: !38)
+!37 = !DILocalVariable(name: "my_a", line: 9, scope: !29, file: !3, type: !38)
 !38 = !DIDerivedType(tag: DW_TAG_reference_type, file: !3, baseType: !1)
 !39 = !DILocation(line: 9, column: 5, scope: !29)
 !40 = !DILocation(line: 10, column: 3, scope: !29)
 !41 = !DILocation(line: 11, column: 3, scope: !29)
 !42 = !DILocation(line: 12, column: 1, scope: !29)
-!43 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !22, file: !3, type: !13)
+!43 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !22, file: !3, type: !13)
 !44 = !DILocation(line: 2, column: 47, scope: !22)
 !45 = !DILocation(line: 2, column: 61, scope: !22)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !25, file: !3, type: !13)
+!46 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !25, file: !3, type: !13)
 !47 = !DILocation(line: 2, column: 47, scope: !25)
 !48 = !DILocation(line: 2, column: 54, scope: !49)
 !49 = distinct !DILexicalBlock(line: 2, column: 52, file: !51, scope: !25)
diff --git a/test/DebugInfo/X86/dbg-declare.ll b/test/DebugInfo/X86/dbg-declare.ll
index a1420a0ba2ef..1d6cfe859596 100644
--- a/test/DebugInfo/X86/dbg-declare.ll
+++ b/test/DebugInfo/X86/dbg-declare.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -O0 -mtriple x86_64-apple-darwin
 ; <rdar://problem/11134152>
 
-define i32 @foo(i32* %x) nounwind uwtable ssp {
+define i32 @foo(i32* %x) nounwind uwtable ssp !dbg !5 {
 entry:
   %x.addr = alloca i32*, align 8
   %saved_stack = alloca i8*
@@ -30,21 +30,21 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153698)", isOptimized: false, emissionKind: 0, file: !26, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153698)", isOptimized: false, emissionKind: 0, file: !26, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !26, scope: !0, type: !7, function: i32 (i32*)* @foo)
+!5 = distinct !DISubprogram(name: "foo", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !26, scope: !0, type: !7)
 !6 = !DIFile(filename: "20020104-2.c", directory: "/Volumes/Sandbox/llvm")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !10}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
 !11 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !10)
+!14 = !DILocalVariable(name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !10)
 !15 = !DILocation(line: 5, column: 21, scope: !5)
 !16 = !DILocation(line: 7, column: 13, scope: !17)
 !17 = distinct !DILexicalBlock(line: 6, column: 1, file: !26, scope: !5)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 7, scope: !17, file: !6, type: !19)
+!18 = !DILocalVariable(name: "a", line: 7, scope: !17, file: !6, type: !19)
 !19 = !DICompositeType(tag: DW_TAG_array_type, align: 8, baseType: !20, elements: !21)
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !21 = !{!22}
diff --git a/test/DebugInfo/X86/dbg-file-name.ll b/test/DebugInfo/X86/dbg-file-name.ll
index cc18c0804db4..60954b9746ba 100644
--- a/test/DebugInfo/X86/dbg-file-name.ll
+++ b/test/DebugInfo/X86/dbg-file-name.ll
@@ -5,7 +5,7 @@
 
 declare i32 @printf(i8*, ...) nounwind
 
-define i32 @main() nounwind {
+define i32 @main() nounwind !dbg !6 {
   ret i32 0
 }
 
@@ -13,9 +13,9 @@ define i32 @main() nounwind {
 !llvm.module.flags = !{!12}
 
 !1 = !DIFile(filename: "simple.c", directory: "/Users/manav/one/two")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "LLVM build 00", isOptimized: true, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "LLVM build 00", isOptimized: true, emissionKind: 0, file: !10, enums: !11, retainedTypes: !11, subprograms: !9)
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", linkageName: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !10, scope: !1, type: !7, function: i32 ()* @main)
+!6 = distinct !DISubprogram(name: "main", linkageName: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !10, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!5}
 !9 = !{!6}
diff --git a/test/DebugInfo/X86/dbg-i128-const.ll b/test/DebugInfo/X86/dbg-i128-const.ll
index de804faabf4a..80ea1769b60e 100644
--- a/test/DebugInfo/X86/dbg-i128-const.ll
+++ b/test/DebugInfo/X86/dbg-i128-const.ll
@@ -4,7 +4,7 @@
 ; CHECK: DW_AT_const_value
 ; CHECK-NEXT: 42
 
-define i128 @__foo(i128 %a, i128 %b) nounwind {
+define i128 @__foo(i128 %a, i128 %b) nounwind !dbg !3 {
 entry:
   tail call void @llvm.dbg.value(metadata i128 42 , i64 0, metadata !1, metadata !DIExpression()), !dbg !11
   %add = add i128 %a, %b, !dbg !11
@@ -17,11 +17,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!16}
 
 !0 = !{i128 42 }
-!1 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "MAX", line: 29, scope: !2, file: !4, type: !8)
+!1 = !DILocalVariable(name: "MAX", line: 29, scope: !2, file: !4, type: !8)
 !2 = distinct !DILexicalBlock(line: 26, column: 0, file: !13, scope: !3)
-!3 = !DISubprogram(name: "__foo", linkageName: "__foo", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 26, file: !13, scope: !4, type: !6, function: i128 (i128, i128)* @__foo)
+!3 = distinct !DISubprogram(name: "__foo", linkageName: "__foo", line: 26, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 26, file: !13, scope: !4, type: !6)
 !4 = !DIFile(filename: "foo.c", directory: "/tmp")
-!5 = !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !13, enums: !15, retainedTypes: !15, subprograms: !12, imports:  null)
+!5 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clang", isOptimized: true, emissionKind: 0, file: !13, enums: !15, retainedTypes: !15, subprograms: !12, imports:  null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8, !8}
 !8 = !DIDerivedType(tag: DW_TAG_typedef, name: "ti_int", line: 78, file: !14, scope: !4, baseType: !10)
diff --git a/test/DebugInfo/X86/dbg-merge-loc-entry.ll b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
index 14265139b0fd..fc5c1bbd529d 100644
--- a/test/DebugInfo/X86/dbg-merge-loc-entry.ll
+++ b/test/DebugInfo/X86/dbg-merge-loc-entry.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-apple-darwin8"
 
 @__clz_tab = external constant [256 x i8]
 
-define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone !dbg !9 {
 entry:
   tail call void @llvm.dbg.value(metadata i128 %u, i64 0, metadata !14, metadata !DIExpression()), !dbg !15
   tail call void @llvm.dbg.value(metadata i64 0, i64 0, metadata !17, metadata !DIExpression()), !dbg !21
@@ -40,24 +40,24 @@ declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!32}
 
-!0 = !DISubprogram(name: "__udivmodti4", line: 879, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 879, file: !29, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "__udivmodti4", line: 879, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 879, file: !29, scope: !1, type: !3)
 !1 = !DIFile(filename: "foobar.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !29, enums: !31, retainedTypes: !31, subprograms: !28, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !29, enums: !31, retainedTypes: !31, subprograms: !28, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5, !5, !5, !8}
 !5 = !DIDerivedType(tag: DW_TAG_typedef, name: "UTItype", line: 166, file: !30, scope: !6, baseType: !7)
 !6 = !DIFile(filename: "foobar.h", directory: "/tmp")
 !7 = !DIBasicType(tag: DW_TAG_base_type, size: 128, align: 128, encoding: DW_ATE_unsigned)
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !29, scope: !1, baseType: !5)
-!9 = !DISubprogram(name: "__divti3", linkageName: "__divti3", line: 1094, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1094, file: !29, scope: !1, type: !10, function: i128 (i128, i128)* @__divti3)
+!9 = distinct !DISubprogram(name: "__divti3", linkageName: "__divti3", line: 1094, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1094, file: !29, scope: !1, type: !10)
 !10 = !DISubroutineType(types: !11)
 !11 = !{!12, !12, !12}
 !12 = !DIDerivedType(tag: DW_TAG_typedef, name: "TItype", line: 160, file: !30, scope: !6, baseType: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, size: 128, align: 128, encoding: DW_ATE_signed)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "u", line: 1093, arg: 0, scope: !9, file: !1, type: !12)
+!14 = !DILocalVariable(name: "u", line: 1093, arg: 1, scope: !9, file: !1, type: !12)
 !15 = !DILocation(line: 1093, scope: !9)
 !16 = !{i64 0}
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 1095, scope: !18, file: !1, type: !19)
+!17 = !DILocalVariable(name: "c", line: 1095, scope: !18, file: !1, type: !19)
 !18 = distinct !DILexicalBlock(line: 1094, column: 0, file: !29, scope: !9)
 !19 = !DIDerivedType(tag: DW_TAG_typedef, name: "word_type", line: 424, file: !30, scope: !6, baseType: !20)
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/X86/dbg-prolog-end.ll b/test/DebugInfo/X86/dbg-prolog-end.ll
index 870b09120bcf..c3802b9bfa5a 100644
--- a/test/DebugInfo/X86/dbg-prolog-end.ll
+++ b/test/DebugInfo/X86/dbg-prolog-end.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.6.7"
 
 ;CHECK: .loc	1 2 11 prologue_end
-define i32 @foo(i32 %i) nounwind ssp {
+define i32 @foo(i32 %i) nounwind ssp !dbg !1 {
 entry:
   %i.addr = alloca i32, align 4
   %j = alloca i32, align 4
@@ -24,7 +24,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @main() nounwind ssp {
+define i32 @main() nounwind ssp !dbg !6 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -36,16 +36,16 @@ entry:
 !llvm.module.flags = !{!21}
 !18 = !{!1, !6}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131100)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports:  null)
-!1 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !19, scope: !2, type: !3, function: i32 (i32)* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131100)", isOptimized: false, emissionKind: 0, file: !19, enums: !20, retainedTypes: !20, subprograms: !18, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !19, scope: !2, type: !3)
 !2 = !DIFile(filename: "/tmp/a.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !2, type: !3, function: i32 ()* @main)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !1, file: !2, type: !5)
+!6 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !2, type: !3)
+!7 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !1, file: !2, type: !5)
 !8 = !DILocation(line: 1, column: 13, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 2, scope: !10, file: !2, type: !5)
+!9 = !DILocalVariable(name: "j", line: 2, scope: !10, file: !2, type: !5)
 !10 = distinct !DILexicalBlock(line: 1, column: 16, file: !19, scope: !1)
 !11 = !DILocation(line: 2, column: 6, scope: !10)
 !12 = !DILocation(line: 2, column: 11, scope: !10)
diff --git a/test/DebugInfo/X86/dbg-subrange.ll b/test/DebugInfo/X86/dbg-subrange.ll
index e3bd07355cc3..5239daea800b 100644
--- a/test/DebugInfo/X86/dbg-subrange.ll
+++ b/test/DebugInfo/X86/dbg-subrange.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.2"
 @s = common global [4294967296 x i8] zeroinitializer, align 16
 ; CHECK: .quad 4294967296 ## DW_AT_count
 
-define void @bar() nounwind uwtable ssp {
+define void @bar() nounwind uwtable ssp !dbg !5 {
 entry:
   store i8 97, i8* getelementptr inbounds ([4294967296 x i8], [4294967296 x i8]* @s, i32 0, i64 0), align 1, !dbg !18
   ret void, !dbg !20
@@ -15,10 +15,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 144833)", isOptimized: false, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !11, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 144833)", isOptimized: false, emissionKind: 0, file: !21, enums: !1, retainedTypes: !1, subprograms: !3, globals: !11, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !21, scope: !6, type: !7, function: void ()* @bar)
+!5 = distinct !DISubprogram(name: "bar", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !21, scope: !6, type: !7)
 !6 = !DIFile(filename: "small.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
diff --git a/test/DebugInfo/X86/dbg-value-const-byref.ll b/test/DebugInfo/X86/dbg-value-const-byref.ll
index a54ef0590e41..2b4c1dc1a937 100644
--- a/test/DebugInfo/X86/dbg-value-const-byref.ll
+++ b/test/DebugInfo/X86/dbg-value-const-byref.ll
@@ -47,7 +47,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
 entry:
   %i = alloca i32, align 4
   call void @llvm.dbg.value(metadata i32 3, i64 0, metadata !10, metadata !DIExpression()), !dbg !15
@@ -56,7 +56,7 @@ entry:
   %call1 = call i32 (...) @f1() #3, !dbg !19
   call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !10, metadata !DIExpression()), !dbg !19
   store i32 %call1, i32* %i, align 4, !dbg !19, !tbaa !20
-  call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !DIExpression()), !dbg !24
+  call void @llvm.dbg.value(metadata i32* %i, i64 0, metadata !10, metadata !DIExpression(DW_OP_deref)), !dbg !24
   call void @f2(i32* %i) #3, !dbg !24
   ret i32 0, !dbg !25
 }
@@ -78,17 +78,17 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "dbg-value-const-byref.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "dbg-value-const-byref.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !4, file: !5, type: !8)
+!10 = !DILocalVariable(name: "i", line: 6, scope: !4, file: !5, type: !8)
 !11 = !{i32 2, !"Dwarf Version", i32 2}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
 !13 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/X86/dbg-value-dag-combine.ll b/test/DebugInfo/X86/dbg-value-dag-combine.ll
index 4331c6591369..6243be8aa4a6 100644
--- a/test/DebugInfo/X86/dbg-value-dag-combine.ll
+++ b/test/DebugInfo/X86/dbg-value-dag-combine.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 declare <4 x i32> @__amdil_get_global_id_int()
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
-define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind {
+define void @__OpenCL_test_kernel(i32 addrspace(1)* %ip) nounwind !dbg !0 {
 entry:
   call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !7, metadata !DIExpression()), !dbg !8
   %0 = call <4 x i32> @__amdil_get_global_id_int() nounwind
@@ -15,7 +15,7 @@ entry:
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !13, metadata !DIExpression()), !dbg !14
   %tmp2 = load i32, i32 addrspace(1)* %ip, align 4, !dbg !15
   %tmp3 = add i32 0, %tmp2, !dbg !15
-; CHECK:  ##DEBUG_VALUE: idx <- E{{..$}}
+; CHECK:  ##DEBUG_VALUE: idx <- %E{{..$}}
   call void @llvm.dbg.value(metadata i32 %tmp3, i64 0, metadata !13, metadata !DIExpression()), !dbg !15
   %arrayidx = getelementptr i32, i32 addrspace(1)* %ip, i32 %1, !dbg !16
   store i32 %tmp3, i32 addrspace(1)* %arrayidx, align 4, !dbg !16
@@ -24,20 +24,20 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!20}
 
-!0 = !DISubprogram(name: "__OpenCL_test_kernel", linkageName: "__OpenCL_test_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !1, type: !3, function: void (i32 addrspace(1)*)* @__OpenCL_test_kernel)
+!0 = distinct !DISubprogram(name: "__OpenCL_test_kernel", linkageName: "__OpenCL_test_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !19, scope: !1, type: !3)
 !1 = !DIFile(filename: "OCL6368.tmp.cl", directory: "E:CUsersCmvillmow.AMDCAppDataCLocalCTemp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !19, enums: !12, retainedTypes: !12, subprograms: !18, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !19, enums: !12, retainedTypes: !12, subprograms: !18, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null, !5}
 !5 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !6)
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ip", line: 1, arg: 0, scope: !0, file: !1, type: !5)
+!7 = !DILocalVariable(name: "ip", line: 1, arg: 1, scope: !0, file: !1, type: !5)
 !8 = !DILocation(line: 1, column: 42, scope: !0)
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "gid", line: 3, scope: !10, file: !1, type: !6)
+!9 = !DILocalVariable(name: "gid", line: 3, scope: !10, file: !1, type: !6)
 !10 = distinct !DILexicalBlock(line: 2, column: 1, file: !19, scope: !0)
 !11 = !DILocation(line: 3, column: 41, scope: !10)
 !12 = !{}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "idx", line: 4, scope: !10, file: !1, type: !6)
+!13 = !DILocalVariable(name: "idx", line: 4, scope: !10, file: !1, type: !6)
 !14 = !DILocation(line: 4, column: 20, scope: !10)
 !15 = !DILocation(line: 5, column: 15, scope: !10)
 !16 = !DILocation(line: 6, column: 18, scope: !10)
diff --git a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
index 3fa5cc5a98ab..7ec21f8cf7cb 100644
--- a/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
+++ b/test/DebugInfo/X86/dbg-value-inlined-parameter.ll
@@ -43,7 +43,7 @@
 
 @p = common global %struct.S1 zeroinitializer, align 8
 
-define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.S1* %sp, i64 0, metadata !9, metadata !DIExpression()), !dbg !20
   tail call void @llvm.dbg.value(metadata i32 %nums, i64 0, metadata !18, metadata !DIExpression()), !dbg !21
@@ -59,7 +59,7 @@ entry:
 
 declare float* @bar(i32) optsize
 
-define void @foobar() nounwind optsize ssp {
+define void @foobar() nounwind optsize ssp !dbg !6 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.S1* @p, i64 0, metadata !9, metadata !DIExpression()) nounwind, !dbg !31
   tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !18, metadata !DIExpression()) nounwind, !dbg !35
@@ -74,16 +74,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!43}
 
-!0 = !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !1, type: !3, function: i32 (%struct.S1*, i32)* @foo, variables: !41)
+!0 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !1, scope: !1, type: !3, variables: !41)
 !1 = !DIFile(filename: "nm2.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !{}, retainedTypes: !{}, subprograms: !39, globals: !40, imports:  !44)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125693)", isOptimized: true, emissionKind: 1, file: !42, enums: !{}, retainedTypes: !{}, subprograms: !39, globals: !40, imports:  !44)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "foobar", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: !1, type: !7, function: void ()* @foobar)
+!6 = distinct !DISubprogram(name: "foobar", line: 15, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10)
+!9 = !DILocalVariable(name: "sp", line: 7, arg: 1, scope: !0, file: !1, type: !10)
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !11)
 !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "S1", line: 4, file: !42, scope: !2, baseType: !12)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "S1", line: 1, size: 128, align: 64, file: !42, scope: !2, elements: !13)
@@ -92,7 +92,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !16)
 !16 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !17 = !DIDerivedType(tag: DW_TAG_member, name: "nums", line: 3, size: 32, align: 32, offset: 64, file: !42, scope: !1, baseType: !5)
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5)
+!18 = !DILocalVariable(name: "nums", line: 7, arg: 2, scope: !0, file: !1, type: !5)
 !19 = !DIGlobalVariable(name: "p", line: 14, isLocal: false, isDefinition: true, scope: !2, file: !1, type: !11, variable: %struct.S1* @p)
 !20 = !DILocation(line: 7, column: 13, scope: !0)
 !21 = !DILocation(line: 7, column: 21, scope: !0)
diff --git a/test/DebugInfo/X86/dbg-value-isel.ll b/test/DebugInfo/X86/dbg-value-isel.ll
index 04e4531d1d72..ae76beb81409 100644
--- a/test/DebugInfo/X86/dbg-value-isel.ll
+++ b/test/DebugInfo/X86/dbg-value-isel.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 @lvgv = internal constant [0 x i8*] zeroinitializer
 @llvm.global.annotations = appending global [1 x %0] [%0 { i8* bitcast (void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @sgv to i8*), i8* addrspacecast ([1 x i8] addrspace(2)* @fgv to i8*), i8* bitcast ([0 x i8*]* @lvgv to i8*), i32 0 }], section "llvm.metadata"
 
-define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind {
+define void @__OpenCL_nbt02_kernel(i32 addrspace(1)* %ip) nounwind !dbg !0 {
 entry:
   call void @llvm.dbg.value(metadata i32 addrspace(1)* %ip, i64 0, metadata !8, metadata !DIExpression()), !dbg !9
   %0 = call <4 x i32> @__amdil_get_local_id_int() nounwind
@@ -81,22 +81,22 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!22}
 
-!0 = !DISubprogram(name: "__OpenCL_nbt02_kernel", linkageName: "__OpenCL_nbt02_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !1, type: !3, function: void (i32 addrspace(1)*)* @__OpenCL_nbt02_kernel)
+!0 = distinct !DISubprogram(name: "__OpenCL_nbt02_kernel", linkageName: "__OpenCL_nbt02_kernel", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !20, scope: !1, type: !3)
 !1 = !DIFile(filename: "OCLlLwTXZ.cl", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !19, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "clc", isOptimized: false, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !19, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null, !5}
 !5 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !6)
 !6 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint", file: !20, scope: !2, baseType: !7)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "ip", line: 1, arg: 0, scope: !0, file: !1, type: !5)
+!8 = !DILocalVariable(name: "ip", line: 1, arg: 1, scope: !0, file: !1, type: !5)
 !9 = !DILocation(line: 1, column: 32, scope: !0)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tid", line: 3, scope: !11, file: !1, type: !6)
+!10 = !DILocalVariable(name: "tid", line: 3, scope: !11, file: !1, type: !6)
 !11 = distinct !DILexicalBlock(line: 2, column: 1, file: !1, scope: !0)
 !12 = !DILocation(line: 5, column: 24, scope: !11)
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "gid", line: 3, scope: !11, file: !1, type: !6)
+!13 = !DILocalVariable(name: "gid", line: 3, scope: !11, file: !1, type: !6)
 !14 = !DILocation(line: 6, column: 25, scope: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "lsz", line: 3, scope: !11, file: !1, type: !6)
+!15 = !DILocalVariable(name: "lsz", line: 3, scope: !11, file: !1, type: !6)
 !16 = !DILocation(line: 7, column: 26, scope: !11)
 !17 = !DILocation(line: 9, column: 24, scope: !11)
 !18 = !DILocation(line: 10, column: 1, scope: !0)
diff --git a/test/DebugInfo/X86/dbg-value-location.ll b/test/DebugInfo/X86/dbg-value-location.ll
index 219fa7a5ff34..df9b4085bf37 100644
--- a/test/DebugInfo/X86/dbg-value-location.ll
+++ b/test/DebugInfo/X86/dbg-value-location.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin10.0.0"
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp !dbg !0 {
 entry:
   call void @llvm.dbg.value(metadata i32 %dev, i64 0, metadata !12, metadata !DIExpression()), !dbg !13
   %tmp.i = load i32, i32* @dfm, align 4, !dbg !14
@@ -50,23 +50,23 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!29}
 
-!0 = !DISubprogram(name: "foo", line: 19510, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19510, file: !26, scope: !1, type: !3, function: i32 (i32, i64, i8*, i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 19510, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19510, file: !26, scope: !1, type: !3)
 !1 = !DIFile(filename: "/tmp/f.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124753)", isOptimized: true, emissionKind: 0, file: !27, enums: !28, retainedTypes: !28, subprograms: !24, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 124753)", isOptimized: true, emissionKind: 0, file: !27, enums: !28, retainedTypes: !28, subprograms: !24, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "bar3", line: 14827, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3, function: i32 (i32)* @bar3)
-!7 = !DISubprogram(name: "bar2", line: 15397, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3, function: i32 (i32)* @bar2)
-!8 = !DISubprogram(name: "bar", line: 12382, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !9, function: i32 (i32, i32*)* @bar)
+!6 = distinct !DISubprogram(name: "bar3", line: 14827, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!7 = distinct !DISubprogram(name: "bar2", line: 15397, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!8 = distinct !DISubprogram(name: "bar", line: 12382, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !9)
 !9 = !DISubroutineType(types: !10)
 !10 = !{!11}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "var", line: 19509, arg: 0, scope: !0, file: !1, type: !5)
+!12 = !DILocalVariable(name: "var", line: 19509, arg: 1, scope: !0, file: !1, type: !5)
 !13 = !DILocation(line: 19509, column: 20, scope: !0)
 !14 = !DILocation(line: 18091, column: 2, scope: !15, inlinedAt: !17)
 !15 = distinct !DILexicalBlock(line: 18086, column: 1, file: !26, scope: !16)
-!16 = !DISubprogram(name: "foo_bar", line: 18086, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
+!16 = distinct !DISubprogram(name: "foo_bar", line: 18086, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !26, scope: !1, type: !3)
 !17 = !DILocation(line: 19514, column: 2, scope: !18)
 !18 = distinct !DILexicalBlock(line: 19510, column: 1, file: !26, scope: !0)
 !22 = !DILocation(line: 18094, column: 2, scope: !15, inlinedAt: !17)
diff --git a/test/DebugInfo/X86/dbg-value-range.ll b/test/DebugInfo/X86/dbg-value-range.ll
index a8fa9073fa77..2bb5e021036f 100644
--- a/test/DebugInfo/X86/dbg-value-range.ll
+++ b/test/DebugInfo/X86/dbg-value-range.ll
@@ -2,7 +2,7 @@
 
 %struct.a = type { i32 }
 
-define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.a* %b, i64 0, metadata !6, metadata !DIExpression()), !dbg !13
   %tmp1 = getelementptr inbounds %struct.a, %struct.a* %b, i64 0, i32 0, !dbg !14
@@ -20,18 +20,18 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!24}
 
-!0 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !22, scope: !1, type: !3, function: i32 (%struct.a*)* @bar, variables: !21)
+!0 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !22, scope: !1, type: !3, variables: !21)
 !1 = !DIFile(filename: "bar.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 122997)", isOptimized: true, emissionKind: 1, file: !22, enums: !23, retainedTypes: !23, subprograms: !20, imports:  null)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 122997)", isOptimized: true, emissionKind: 1, file: !22, enums: !23, retainedTypes: !23, subprograms: !20, imports:  null)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 0, scope: !0, file: !1, type: !7)
+!6 = !DILocalVariable(name: "b", line: 5, arg: 1, scope: !0, file: !1, type: !7)
 !7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !8)
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "a", line: 1, size: 32, align: 32, file: !22, scope: !2, elements: !9)
 !9 = !{!10}
 !10 = !DIDerivedType(tag: DW_TAG_member, name: "c", line: 2, size: 32, align: 32, file: !22, scope: !1, baseType: !5)
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 6, scope: !12, file: !1, type: !5)
+!11 = !DILocalVariable(name: "x", line: 6, scope: !12, file: !1, type: !5)
 !12 = distinct !DILexicalBlock(line: 5, column: 22, file: !22, scope: !0)
 !13 = !DILocation(line: 5, column: 19, scope: !0)
 !14 = !DILocation(line: 6, column: 14, scope: !12)
diff --git a/test/DebugInfo/X86/dbg-value-terminator.ll b/test/DebugInfo/X86/dbg-value-terminator.ll
index 5f50b13aa024..8b8bdec18ab1 100644
--- a/test/DebugInfo/X86/dbg-value-terminator.ll
+++ b/test/DebugInfo/X86/dbg-value-terminator.ll
@@ -9,7 +9,7 @@
 ; CHECK: ##DEBUG_VALUE: foo:i
 %a = type { i32, i32 }
 
-define hidden fastcc %a* @test() #1 {
+define hidden fastcc %a* @test() #1 !dbg !1 {
 entry:
   %0 = icmp eq %a* undef, null, !dbg !12
   br i1 %0, label %"14", label %return, !dbg !12
@@ -113,17 +113,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports:  null)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, function: %a* ()* @test, variables: !19)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version", isOptimized: true, emissionKind: 1, file: !20, enums: !21, retainedTypes: !21, subprograms: !18, imports:  null)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !20, scope: !2, type: !3, variables: !19)
 !2 = !DIFile(filename: "a.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !5)
+!7 = !DILocalVariable(name: "c", line: 2, arg: 2, scope: !1, file: !2, type: !8)
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !0, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 3, scope: !11, file: !2, type: !9)
+!10 = !DILocalVariable(name: "a", line: 3, scope: !11, file: !2, type: !9)
 !11 = distinct !DILexicalBlock(line: 2, column: 25, file: !20, scope: !1)
 !12 = !DILocation(line: 2, column: 13, scope: !1)
 !18 = !{!1}
diff --git a/test/DebugInfo/X86/dbg_value_direct.ll b/test/DebugInfo/X86/dbg_value_direct.ll
index 6446dfe491a6..b7f1ec8ccaa3 100644
--- a/test/DebugInfo/X86/dbg_value_direct.ll
+++ b/test/DebugInfo/X86/dbg_value_direct.ll
@@ -23,7 +23,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 5 .addr \00", align 1
 
 ; Function Attrs: sanitize_address uwtable
-define void @_Z4funci(%struct.A* noalias sret %agg.result, i32) #0 "stack-protector-buffer-size"="1" {
+define void @_Z4funci(%struct.A* noalias sret %agg.result, i32) #0 "stack-protector-buffer-size"="1" !dbg !4 {
 entry:
   %MyAlloca = alloca [96 x i8], align 32
   %1 = ptrtoint [96 x i8]* %MyAlloca to i64
@@ -147,11 +147,11 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!22, !27}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "crash.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func", linkageName: "_Z4funci", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%struct.A*, i32)* @_Z4funci, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_Z4funci", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "crash.cpp", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !21}
@@ -168,8 +168,8 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !19 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
 !21 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !22 = !{i32 2, !"Dwarf Version", i32 3}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 6, arg: 1, scope: !4, file: !5, type: !21)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 7, scope: !4, file: !5, type: !8)
+!23 = !DILocalVariable(name: "", line: 6, arg: 1, scope: !4, file: !5, type: !21)
+!24 = !DILocalVariable(name: "a", line: 7, scope: !4, file: !5, type: !8)
 !25 = !DILocation(line: 7, scope: !4)
 !26 = !DILocation(line: 8, scope: !4)
 !27 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-dead-local-var.ll b/test/DebugInfo/X86/debug-dead-local-var.ll
index d56d17d06e61..9a2872aeb0a8 100644
--- a/test/DebugInfo/X86/debug-dead-local-var.ll
+++ b/test/DebugInfo/X86/debug-dead-local-var.ll
@@ -16,7 +16,7 @@
 ; CHECK: DW_TAG_structure_type
 
 ; Function Attrs: nounwind readnone uwtable
-define i32 @bar() #0 {
+define i32 @bar() #0 !dbg !4 {
 entry:
   ret i32 1, !dbg !21
 }
@@ -27,20 +27,20 @@ attributes #0 = { nounwind readnone uwtable "less-precise-fpmad"="false" "no-fra
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 209255) (llvm/trunk 209253)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "debug-dead-local-var.c", directory: "/usr/local/google/home/echristo")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "bar", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, function: i32 ()* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "debug-dead-local-var.c", directory: "/usr/local/google/home/echristo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "foo", line: 6, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !10, variables: !12)
+!9 = distinct !DISubprogram(name: "foo", line: 6, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !10, variables: !12)
 !10 = !DISubroutineType(types: !11)
 !11 = !{null}
 !12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "xyz", line: 8, scope: !9, file: !5, type: !14)
+!13 = !DILocalVariable(name: "xyz", line: 8, scope: !9, file: !5, type: !14)
 !14 = !DICompositeType(tag: DW_TAG_structure_type, name: "X", line: 8, size: 64, align: 32, file: !1, scope: !9, elements: !15)
 !15 = !{!16, !17}
 !16 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 8, size: 32, align: 32, file: !1, scope: !14, baseType: !8)
diff --git a/test/DebugInfo/X86/debug-info-access.ll b/test/DebugInfo/X86/debug-info-access.ll
index 21da7b2db1bb..c9a50395c716 100644
--- a/test/DebugInfo/X86/debug-info-access.ll
+++ b/test/DebugInfo/X86/debug-info-access.ll
@@ -96,7 +96,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 @u = global %union.U zeroinitializer, align 4
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4freev() #0 {
+define void @_Z4freev() #0 !dbg !30 {
   ret void, !dbg !41
 }
 
@@ -106,7 +106,7 @@ attributes #0 = { nounwind ssp uwtable }
 !llvm.module.flags = !{!38, !39}
 !llvm.ident = !{!40}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !29, globals: !34, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !29, globals: !34, imports: !2)
 !1 = !DIFile(filename: "/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp", directory: "")
 !2 = !{}
 !3 = !{!4, !12, !22}
@@ -136,7 +136,7 @@ attributes #0 = { nounwind ssp uwtable }
 !27 = !{null, !28}
 !28 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1U")
 !29 = !{!30}
-!30 = !DISubprogram(name: "free", linkageName: "_Z4freev", line: 35, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !31, type: !32, function: void ()* @_Z4freev, variables: !2)
+!30 = distinct !DISubprogram(name: "free", linkageName: "_Z4freev", line: 35, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !31, type: !32, variables: !2)
 !31 = !DIFile(filename: "/llvm/tools/clang/test/CodeGenCXX/debug-info-access.cpp", directory: "")
 !32 = !DISubroutineType(types: !33)
 !33 = !{null}
diff --git a/test/DebugInfo/X86/debug-info-block-captured-self.ll b/test/DebugInfo/X86/debug-info-block-captured-self.ll
index e79b093d031a..1c3dc24b3240 100644
--- a/test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ b/test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -64,14 +64,14 @@
 %0 = type opaque
 %struct.__block_descriptor = type { i64, i64 }
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 {
+define internal void @"__24-[Main initWithContext:]_block_invoke"(i8* %.block_descriptor, i8* %obj) #0 !dbg !38 {
   %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !84
   %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !84
   call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !86, metadata !110), !dbg !87
   ret void, !dbg !87
 }
 
-define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 {
+define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_descriptor, i8* %object) #0 !dbg !42 {
   %block = bitcast i8* %.block_descriptor to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, !dbg !103
   %block.captured-self = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, i32 0, i32 5, !dbg !103
   call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>* %block, metadata !105, metadata !109), !dbg !106
@@ -80,7 +80,7 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!108}
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !107, enums: !2, retainedTypes: !4, subprograms: !23, globals: !15, imports:  !15)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !107, enums: !2, retainedTypes: !4, subprograms: !23, globals: !15, imports:  !15)
 !1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
 !2 = !{!3}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 20, size: 32, align: 32, file: !107, elements: !4)
@@ -95,16 +95,16 @@ define internal void @"__24-[Main initWithContext:]_block_invoke_2"(i8* %.block_
 !32 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !33)
 !33 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !107)
 !34 = !DICompositeType(tag: DW_TAG_structure_type, name: "Main", line: 23, flags: DIFlagArtificial | DIFlagObjectPointer, runtimeLang: DW_LANG_ObjC, file: !107)
-!38 = !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke", line: 33, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 33, file: !1, scope: !1, type: !39, function: void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke", variables: !15)
+!38 = distinct !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke", line: 33, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 33, file: !1, scope: !1, type: !39, variables: !15)
 !39 = !DISubroutineType(types: !40)
 !40 = !{null, !41, !27}
 !41 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!42 = !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke_2", line: 35, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !1, type: !39, function: void (i8*, i8*)* @"__24-[Main initWithContext:]_block_invoke_2", variables: !15)
+!42 = distinct !DISubprogram(name: "__24-[Main initWithContext:]_block_invoke_2", line: 35, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 35, file: !1, scope: !1, type: !39, variables: !15)
 !84 = !DILocation(line: 33, scope: !38)
-!86 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 41, scope: !38, file: !1, type: !34)
+!86 = !DILocalVariable(name: "self", line: 41, scope: !38, file: !1, type: !34)
 !87 = !DILocation(line: 41, scope: !38)
 !103 = !DILocation(line: 35, scope: !42)
-!105 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 40, scope: !42, file: !1, type: !34)
+!105 = !DILocalVariable(name: "self", line: 40, scope: !42, file: !1, type: !34)
 !106 = !DILocation(line: 40, scope: !42)
 !107 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
 !108 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-info-blocks.ll b/test/DebugInfo/X86/debug-info-blocks.ll
index a9aadfdb7313..8a929491f28d 100644
--- a/test/DebugInfo/X86/debug-info-blocks.ll
+++ b/test/DebugInfo/X86/debug-info-blocks.ll
@@ -95,7 +95,7 @@ target triple = "x86_64-apple-darwin"
 @"\01L_OBJC_LABEL_CLASS_$" = internal global [1 x i8*] [i8* bitcast (%struct._class_t* @"OBJC_CLASS_$_A" to i8*)], section "__DATA, __objc_classlist, regular, no_dead_strip", align 8
 @llvm.used = appending global [14 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_SUP_REFS_$_" to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01L_OBJC_METH_VAR_NAME_2", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_A" to i8*), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @"\01L_OBJC_METH_VAR_TYPE_4", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._ivar_t] }* @"\01l_OBJC_$_INSTANCE_VARIABLES_A" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_5" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
 
-define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 {
+define internal i8* @"\01-[A init]"(%0* %self, i8* %_cmd) #0 !dbg !13 {
   %1 = alloca %0*, align 8
   %2 = alloca i8*, align 8
   %3 = alloca %struct._objc_super
@@ -147,7 +147,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 declare i8* @objc_msgSendSuper2(%struct._objc_super*, i8*, ...)
 
-define internal void @run(void ()* %block) #0 {
+define internal void @run(void ()* %block) #0 !dbg !39 {
   %1 = alloca void ()*, align 8
   store void ()* %block, void ()** %1, align 8
   call void @llvm.dbg.declare(metadata void ()** %1, metadata !72, metadata !DIExpression()), !dbg !73
@@ -161,7 +161,7 @@ define internal void @run(void ()* %block) #0 {
   ret void, !dbg !75
 }
 
-define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 {
+define internal void @"__9-[A init]_block_invoke"(i8* %.block_descriptor) #0 !dbg !27 {
   %1 = alloca i8*, align 8
   %2 = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>*, align 8
   %d = alloca %1*, align 8
@@ -206,7 +206,7 @@ declare i8* @objc_msgSend_fixup(i8*, %struct._message_ref_t*, ...)
 
 declare i8* @objc_msgSend(i8*, i8*, ...) #2
 
-define internal void @__copy_helper_block_(i8*, i8*) {
+define internal void @__copy_helper_block_(i8*, i8*) !dbg !31 {
   %3 = alloca i8*, align 8
   %4 = alloca i8*, align 8
   store i8* %0, i8** %3, align 8
@@ -228,7 +228,7 @@ define internal void @__copy_helper_block_(i8*, i8*) {
 
 declare void @_Block_object_assign(i8*, i8*, i32)
 
-define internal void @__destroy_helper_block_(i8*) {
+define internal void @__destroy_helper_block_(i8*) !dbg !35 {
   %2 = alloca i8*, align 8
   store i8* %0, i8** %2, align 8
   call void @llvm.dbg.declare(metadata i8** %2, metadata !105, metadata !DIExpression()), !dbg !106
@@ -243,7 +243,7 @@ define internal void @__destroy_helper_block_(i8*) {
 
 declare void @_Block_object_dispose(i8*, i32)
 
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !36 {
   %1 = alloca i32, align 4
   %a = alloca %0*, align 8
   store i32 0, i32* %1
@@ -270,7 +270,7 @@ attributes #3 = { nounwind }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!56, !57, !58, !59, !110}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports:  !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: false, runtimeVersion: 2, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports:  !2)
 !1 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/<unknown>", directory: "llvm/_build.ninja.Debug")
 !2 = !{}
 !3 = !{!4}
@@ -283,7 +283,7 @@ attributes #3 = { nounwind }
 !10 = !DIDerivedType(tag: DW_TAG_member, name: "ivar", line: 35, size: 32, align: 32, file: !5, scope: !6, baseType: !11)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13, !27, !31, !35, !36, !39}
-!13 = !DISubprogram(name: "-[A init]", line: 46, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 46, file: !5, scope: !6, type: !14, function: i8* (%0*, i8*)* @"\01-[A init]", variables: !2)
+!13 = distinct !DISubprogram(name: "-[A init]", line: 46, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 46, file: !5, scope: !6, type: !14, variables: !2)
 !14 = !DISubroutineType(types: !15)
 !15 = !{!16, !23, !24}
 !16 = !DIDerivedType(tag: DW_TAG_typedef, name: "id", line: 46, file: !5, baseType: !17)
@@ -297,19 +297,19 @@ attributes #3 = { nounwind }
 !24 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 46, flags: DIFlagArtificial, file: !5, baseType: !25)
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !26)
 !26 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_selector", flags: DIFlagFwdDecl, file: !1)
-!27 = !DISubprogram(name: "__9-[A init]_block_invoke", line: 49, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 49, file: !5, scope: !6, type: !28, function: void (i8*)* @"__9-[A init]_block_invoke", variables: !2)
+!27 = distinct !DISubprogram(name: "__9-[A init]_block_invoke", line: 49, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 49, file: !5, scope: !6, type: !28, variables: !2)
 !28 = !DISubroutineType(types: !29)
 !29 = !{null, !30}
 !30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: null)
-!31 = !DISubprogram(name: "__copy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !33, function: void (i8*, i8*)* @__copy_helper_block_, variables: !2)
+!31 = distinct !DISubprogram(name: "__copy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !33, variables: !2)
 !32 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/<unknown>", directory: "llvm/_build.ninja.Debug")
 !33 = !DISubroutineType(types: !34)
 !34 = !{null, !30, !30}
-!35 = !DISubprogram(name: "__destroy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !28, function: void (i8*)* @__destroy_helper_block_, variables: !2)
-!36 = !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 60, file: !5, scope: !6, type: !37, function: i32 ()* @main, variables: !2)
+!35 = distinct !DISubprogram(name: "__destroy_helper_block_", line: 52, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 52, file: !1, scope: !32, type: !28, variables: !2)
+!36 = distinct !DISubprogram(name: "main", line: 59, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 60, file: !5, scope: !6, type: !37, variables: !2)
 !37 = !DISubroutineType(types: !38)
 !38 = !{!11}
-!39 = !DISubprogram(name: "run", line: 39, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 40, file: !5, scope: !6, type: !40, function: void (void ()*)* @run, variables: !2)
+!39 = distinct !DISubprogram(name: "run", line: 39, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 40, file: !5, scope: !6, type: !40, variables: !2)
 !40 = !DISubroutineType(types: !41)
 !41 = !{null, !42}
 !42 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !43)
@@ -330,10 +330,10 @@ attributes #3 = { nounwind }
 !57 = !{i32 1, !"Objective-C Image Info Version", i32 0}
 !58 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
 !59 = !{i32 4, !"Objective-C Garbage Collection", i32 0}
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", line: 46, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, file: !32, type: !61)
+!60 = !DILocalVariable(name: "self", line: 46, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, file: !32, type: !61)
 !61 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !4)
 !62 = !DILocation(line: 46, scope: !13)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", line: 46, arg: 2, flags: DIFlagArtificial, scope: !13, file: !32, type: !64)
+!63 = !DILocalVariable(name: "_cmd", line: 46, arg: 2, flags: DIFlagArtificial, scope: !13, file: !32, type: !64)
 !64 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 46, file: !5, baseType: !25)
 !65 = !DILocation(line: 48, scope: !66)
 !66 = distinct !DILexicalBlock(line: 47, column: 0, file: !5, scope: !13)
@@ -342,11 +342,11 @@ attributes #3 = { nounwind }
 !69 = distinct !DILexicalBlock(line: 48, column: 0, file: !5, scope: !66)
 !70 = !DILocation(line: 53, scope: !69)
 !71 = !DILocation(line: 54, scope: !66)
-!72 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "block", line: 39, arg: 1, scope: !39, file: !6, type: !42)
+!72 = !DILocalVariable(name: "block", line: 39, arg: 1, scope: !39, file: !6, type: !42)
 !73 = !DILocation(line: 39, scope: !39)
 !74 = !DILocation(line: 41, scope: !39)
 !75 = !DILocation(line: 42, scope: !39)
-!76 = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", line: 49, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !77)
+!76 = !DILocalVariable(name: ".block_descriptor", line: 49, arg: 1, flags: DIFlagArtificial, scope: !27, file: !6, type: !77)
 !77 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !78)
 !78 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_literal_1", line: 49, size: 320, align: 64, file: !5, scope: !6, elements: !79)
 !79 = !{!80, !81, !82, !83, !84, !87}
@@ -359,9 +359,9 @@ attributes #3 = { nounwind }
 !86 = !DICompositeType(tag: DW_TAG_structure_type, name: "__block_descriptor_withcopydispose", line: 49, flags: DIFlagFwdDecl, file: !1)
 !87 = !DIDerivedType(tag: DW_TAG_member, name: "self", line: 49, size: 64, align: 64, offset: 256, file: !5, scope: !6, baseType: !61)
 !88 = !DILocation(line: 49, scope: !27)
-!89 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self", line: 52, scope: !27, file: !32, type: !23)
+!89 = !DILocalVariable(name: "self", line: 52, scope: !27, file: !32, type: !23)
 !90 = !DILocation(line: 52, scope: !27)
-!91 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 50, scope: !92, file: !6, type: !93)
+!91 = !DILocalVariable(name: "d", line: 50, scope: !92, file: !6, type: !93)
 !92 = distinct !DILexicalBlock(line: 49, column: 0, file: !5, scope: !27)
 !93 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !94)
 !94 = !DICompositeType(tag: DW_TAG_structure_type, name: "NSMutableDictionary", line: 30, align: 8, runtimeLang: DW_LANG_ObjC, file: !5, scope: !6, elements: !95)
@@ -372,12 +372,12 @@ attributes #3 = { nounwind }
 !99 = !DIDerivedType(tag: DW_TAG_inheritance, scope: !97, baseType: !9)
 !100 = !DILocation(line: 50, scope: !92)
 !101 = !DILocation(line: 51, scope: !92)
-!102 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !32, type: !30)
+!102 = !DILocalVariable(name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !32, type: !30)
 !103 = !DILocation(line: 52, scope: !31)
-!104 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 2, flags: DIFlagArtificial, scope: !31, file: !32, type: !30)
-!105 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !32, type: !30)
+!104 = !DILocalVariable(name: "", line: 52, arg: 2, flags: DIFlagArtificial, scope: !31, file: !32, type: !30)
+!105 = !DILocalVariable(name: "", line: 52, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !32, type: !30)
 !106 = !DILocation(line: 52, scope: !35)
-!107 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 61, scope: !36, file: !6, type: !61)
+!107 = !DILocalVariable(name: "a", line: 61, scope: !36, file: !6, type: !61)
 !108 = !DILocation(line: 61, scope: !36)
 !109 = !DILocation(line: 62, scope: !36)
 !110 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-info-packed-struct.ll b/test/DebugInfo/X86/debug-info-packed-struct.ll
index 6829c2d13740..50b68a714ea8 100644
--- a/test/DebugInfo/X86/debug-info-packed-struct.ll
+++ b/test/DebugInfo/X86/debug-info-packed-struct.ll
@@ -148,7 +148,7 @@ target triple = "x86_64-apple-darwin"
 !llvm.module.flags = !{!45, !46}
 !llvm.ident = !{!47}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 240791) (llvm/trunk 240790)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "/llvm/tools/clang/test/CodeGen/<stdin>", directory: "/llvm/_build.ninja.release")
 !2 = !{}
 !3 = !{!4, !18, !25, !35}
diff --git a/test/DebugInfo/X86/debug-info-static-member.ll b/test/DebugInfo/X86/debug-info-static-member.ll
index 1dc85b954876..a42279b0a4af 100644
--- a/test/DebugInfo/X86/debug-info-static-member.ll
+++ b/test/DebugInfo/X86/debug-info-static-member.ll
@@ -42,7 +42,7 @@
 @_ZN1C1bE = global i32 2, align 4
 @_ZN1C1cE = global i32 1, align 4
 
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
 entry:
   %retval = alloca i32, align 4
   %instance_C = alloca %class.C, align 4
@@ -59,10 +59,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!34}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 171914)", isOptimized: false, emissionKind: 0, file: !33, enums: !1, retainedTypes: !1, subprograms: !3, globals: !10, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 171914)", isOptimized: false, emissionKind: 0, file: !33, enums: !1, retainedTypes: !1, subprograms: !3, globals: !10, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !33, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !33, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "/usr/local/google/home/blaikie/Development/llvm/src/tools/clang/test/CodeGenCXX/debug-info-static-member.cpp", directory: "/home/blaikie/local/Development/llvm/build/clang/x86-64/Debug/llvm")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
@@ -85,7 +85,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !26 = !DIDerivedType(tag: DW_TAG_member, name: "d", line: 11, size: 32, align: 32, flags: DIFlagPublic, file: !33, scope: !13, baseType: !9)
 !27 = !DIGlobalVariable(name: "b", linkageName: "_ZN1C1bE", line: 15, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @_ZN1C1bE, declaration: !19)
 !28 = !DIGlobalVariable(name: "c", linkageName: "_ZN1C1cE", line: 16, isLocal: false, isDefinition: true, scope: null, file: !6, type: !9, variable: i32* @_ZN1C1cE, declaration: !23)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "instance_C", line: 20, scope: !5, file: !6, type: !13)
+!29 = !DILocalVariable(name: "instance_C", line: 20, scope: !5, file: !6, type: !13)
 !30 = !DILocation(line: 20, scope: !5)
 !31 = !DILocation(line: 21, scope: !5)
 !32 = !DILocation(line: 22, scope: !5)
diff --git a/test/DebugInfo/X86/debug-loc-asan.ll b/test/DebugInfo/X86/debug-loc-asan.ll
index 68bef4904800..2e0135574496 100644
--- a/test/DebugInfo/X86/debug-loc-asan.ll
+++ b/test/DebugInfo/X86/debug-loc-asan.ll
@@ -7,15 +7,14 @@
 ; int bar(int y) {
 ;   return y + 2;
 ; }
-; with "clang++ -S -emit-llvm -fsanitize=address -O0 -g test.cc"
+; with "clang++ -S -emit-llvm -mllvm -asan-skip-promotable-allocas=0 -fsanitize=address -O0 -g test.cc"
 
-; First, argument variable "y" resides in %rdi:
-; CHECK: DEBUG_VALUE: bar:y <- RDI
-
-; Then its address is stored in a location on a stack:
+; The address of the (potentially now malloc'ed) alloca ends up
+; in RDI, after which it is spilled to the stack. We record the
+; spill OFFSET on the stack for checking the debug info below.
+; CHECK: #DEBUG_VALUE: bar:y <- [%RDI+0]
 ; CHECK: movq %rdi, [[OFFSET:[0-9]+]](%rsp)
 ; CHECK-NEXT: [[START_LABEL:.Ltmp[0-9]+]]
-; CHECK-NEXT: DEBUG_VALUE: bar:y <- [RSP+[[OFFSET]]]
 ; This location should be valid until the end of the function.
 
 ; CHECK: .Ldebug_loc{{[0-9]+}}:
@@ -42,7 +41,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @__asan_gen_ = private unnamed_addr constant [16 x i8] c"1 32 4 6 y.addr\00", align 1
 
 ; Function Attrs: nounwind sanitize_address uwtable
-define i32 @_Z3bari(i32 %y) #0 {
+define i32 @_Z3bari(i32 %y) #0 !dbg !4 {
 entry:
   %MyAlloca = alloca [64 x i8], align 32
   %0 = ptrtoint [64 x i8]* %MyAlloca to i64
@@ -165,11 +164,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.cc", directory: "/llvm_cmake_gcc")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3bari, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.cc", directory: "/llvm_cmake_gcc")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -177,6 +176,6 @@ attributes #1 = { nounwind readnone }
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5.0 (209308)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "y", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DILocation(line: 2, scope: !4)
 !14 = !DIExpression(DW_OP_deref)
diff --git a/test/DebugInfo/X86/debug-loc-empty-entries.ll b/test/DebugInfo/X86/debug-loc-empty-entries.ll
index 3b997fd35e06..776bdbddfb02 100644
--- a/test/DebugInfo/X86/debug-loc-empty-entries.ll
+++ b/test/DebugInfo/X86/debug-loc-empty-entries.ll
@@ -24,7 +24,7 @@
 ;;   }
 
 ; Function Attrs: noreturn nounwind readnone
-define void @_Z3fn1v() #0 {
+define void @_Z3fn1v() #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata float 1.000000e+00, i64 0, metadata !9, metadata !14), !dbg !15
   br label %for.cond, !dbg !16
@@ -44,16 +44,16 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238517) (llvm/trunk 238524)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @_Z3fn1v, variables: !8)
+!4 = distinct !DISubprogram(name: "fn1", linkageName: "_Z3fn1v", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
 !5 = !DIFile(filename: "t.cpp", directory: "/Users/dexonsmith/data/llvm/bootstrap/play/delta2/testcase")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", scope: !4, file: !5, line: 2, type: !10)
+!9 = !DILocalVariable(name: "a", scope: !4, file: !5, line: 2, type: !10)
 !10 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/debug-loc-offset.ll b/test/DebugInfo/X86/debug-loc-offset.ll
index cf2fcc8e2229..d6302a1e4247 100644
--- a/test/DebugInfo/X86/debug-loc-offset.ll
+++ b/test/DebugInfo/X86/debug-loc-offset.ll
@@ -60,7 +60,7 @@
 %struct.A = type { i32 (...)**, i32 }
 
 ; Function Attrs: nounwind
-define i32 @_Z3bari(i32 %b) #0 {
+define i32 @_Z3bari(i32 %b) #0 !dbg !4 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
@@ -73,7 +73,7 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-define void @_Z3baz1A(%struct.A* %a) #2 {
+define void @_Z3baz1A(%struct.A* %a) #2 !dbg !14 {
 entry:
   %z = alloca i32, align 4
   call void @llvm.dbg.declare(metadata %struct.A* %a, metadata !24, metadata !DIExpression(DW_OP_deref)), !dbg !25
@@ -116,33 +116,33 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!18, !19}
 !llvm.ident = !{!20, !20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "debug-loc-offset1.cc", directory: "/llvm_cmake_gcc")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3bari, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3bari", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "debug-loc-offset1.cc", directory: "/llvm_cmake_gcc")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !11, subprograms: !13, globals: !2, imports: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210479)", isOptimized: false, emissionKind: 1, file: !10, enums: !2, retainedTypes: !11, subprograms: !13, globals: !2, imports: !2)
 !10 = !DIFile(filename: "debug-loc-offset2.cc", directory: "/llvm_cmake_gcc")
 !11 = !{!12}
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", line: 1, flags: DIFlagFwdDecl, file: !10, identifier: "_ZTS1A")
 !13 = !{!14}
-!14 = !DISubprogram(name: "baz", linkageName: "_Z3baz1A", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !10, scope: !15, type: !16, function: void (%struct.A*)* @_Z3baz1A, variables: !2)
+!14 = distinct !DISubprogram(name: "baz", linkageName: "_Z3baz1A", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !10, scope: !15, type: !16, variables: !2)
 !15 = !DIFile(filename: "debug-loc-offset2.cc", directory: "/llvm_cmake_gcc")
 !16 = !DISubroutineType(types: !17)
 !17 = !{null, !12}
 !18 = !{i32 2, !"Dwarf Version", i32 4}
 !19 = !{i32 2, !"Debug Info Version", i32 3}
 !20 = !{!"clang version 3.5.0 (210479)"}
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!21 = !DILocalVariable(name: "b", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !22 = !DILocation(line: 1, scope: !4)
 !23 = !DILocation(line: 2, scope: !4)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 6, arg: 1, scope: !14, file: !15, type: !"_ZTS1A")
+!24 = !DILocalVariable(name: "a", line: 6, arg: 1, scope: !14, file: !15, type: !"_ZTS1A")
 !25 = !DILocation(line: 6, scope: !14)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "z", line: 7, scope: !14, file: !15, type: !8)
+!26 = !DILocalVariable(name: "z", line: 7, scope: !14, file: !15, type: !8)
 !27 = !DILocation(line: 7, scope: !14)
 !28 = !DILocation(line: 8, scope: !29)
 !29 = distinct !DILexicalBlock(line: 8, column: 0, file: !10, scope: !14)
diff --git a/test/DebugInfo/X86/debug-ranges-offset.ll b/test/DebugInfo/X86/debug-ranges-offset.ll
index 19c3bcf37483..520f3c45e18c 100644
--- a/test/DebugInfo/X86/debug-ranges-offset.ll
+++ b/test/DebugInfo/X86/debug-ranges-offset.ll
@@ -19,7 +19,7 @@
 @_end = external hidden global i32
 
 ; Function Attrs: sanitize_memory uwtable
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !4 {
 entry:
   %p = alloca i32*, align 8
   %0 = ptrtoint i32** %p to i64, !dbg !19
@@ -82,7 +82,7 @@ if.end:                                           ; preds = %16, %if.then
 declare i8* @_Znwm(i64) #1
 
 ; Function Attrs: sanitize_memory uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !13 {
 entry:
   %p.i = alloca i32*, align 8
   %0 = ptrtoint i32** %p.i to i64, !dbg !30
@@ -202,20 +202,20 @@ attributes #4 = { builtin }
 !llvm.module.flags = !{!16, !17}
 !llvm.ident = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 207243) (llvm/trunk 207259)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4, !13}
-!4 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @_Z1fv, variables: !8)
+!4 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !8)
 !5 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10)
+!9 = !DILocalVariable(name: "p", line: 4, scope: !4, file: !5, type: !10)
 !10 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11)
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !1, scope: !5, type: !14, function: i32 ()* @main, variables: !2)
+!13 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 9, file: !1, scope: !5, type: !14, variables: !2)
 !14 = !DISubroutineType(types: !15)
 !15 = !{!12}
 !16 = !{i32 2, !"Dwarf Version", i32 4}
@@ -234,7 +234,7 @@ attributes #4 = { builtin }
 !29 = !DILocation(line: 7, scope: !4)
 !30 = !DILocation(line: 4, scope: !4, inlinedAt: !31)
 !31 = !DILocation(line: 10, scope: !13)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 4, scope: !4, file: !5, type: !10)
+!32 = !DILocalVariable(name: "p", line: 4, scope: !4, file: !5, type: !10)
 !33 = !DILocation(line: 5, scope: !21, inlinedAt: !31)
 !34 = !DILocation(line: 6, scope: !21, inlinedAt: !31)
 !35 = !DILocation(line: 7, scope: !4, inlinedAt: !31)
diff --git a/test/DebugInfo/X86/debug_frame.ll b/test/DebugInfo/X86/debug_frame.ll
index 48ce554f0119..563406ccaf90 100644
--- a/test/DebugInfo/X86/debug_frame.ll
+++ b/test/DebugInfo/X86/debug_frame.ll
@@ -4,7 +4,7 @@
 
 ; CHECK: .cfi_sections .debug_frame
 
-define void @f() nounwind {
+define void @f() nounwind !dbg !0 {
 entry:
   ret void
 }
@@ -13,9 +13,9 @@ entry:
 !llvm.module.flags = !{!7}
 !5 = !{!0}
 
-!0 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
+!0 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3)
 !1 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
 !6 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/llvm/build")
diff --git a/test/DebugInfo/X86/debugger-tune.ll b/test/DebugInfo/X86/debugger-tune.ll
new file mode 100644
index 000000000000..b685612d4a47
--- /dev/null
+++ b/test/DebugInfo/X86/debugger-tune.ll
@@ -0,0 +1,44 @@
+; Verify target-based defaults for "debugger tuning," and the ability to
+; override defaults.
+; We use existence of the debug_pubnames section to distinguish the GDB case,
+; and the apple_names section to distinguish the LLDB case. SCE has neither.
+
+; Verify defaults for various targets.
+; RUN: llc -mtriple=x86_64-scei-ps4 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=SCE %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-pc-freebsd -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
+
+; We can override defaults.
+; RUN: llc -mtriple=x86_64-scei-ps4 -filetype=obj -debugger-tune=gdb < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
+; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj -debugger-tune=lldb < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj -debugger-tune=sce < %s | llvm-readobj -sections - | FileCheck --check-prefix=SCE %s
+
+
+; GDB-NOT: apple_names
+; GDB: debug_pubnames
+; GDB-NOT: apple_names
+
+; LLDB-NOT: debug_pubnames
+; LLDB: apple_names
+; LLDB-NOT: debug_pubnames
+
+; SCE-NOT: debug_pubnames
+; SCE-NOT: apple_names
+
+
+@globalvar = global i32 0, align 4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 238808)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!1 = !DIFile(filename: "debugger-tune.cpp", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIGlobalVariable(name: "globalvar", scope: !0, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, variable: i32* @globalvar)
+!5 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.7.0 (trunk 238808)"}
diff --git a/test/DebugInfo/X86/decl-derived-member.ll b/test/DebugInfo/X86/decl-derived-member.ll
index 3b833580e9ca..5751f243521e 100644
--- a/test/DebugInfo/X86/decl-derived-member.ll
+++ b/test/DebugInfo/X86/decl-derived-member.ll
@@ -33,7 +33,7 @@ $_ZN4baseC2Ev = comdat any
 @_ZTV4base = external unnamed_addr constant [4 x i8*]
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, i8* null }]
 
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !10 {
 entry:
   call void @_ZN3fooC2Ev(%struct.foo* @f) #2, !dbg !33
   %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.foo*)* @_ZN3fooD2Ev to void (i8*)*), i8* bitcast (%struct.foo* @f to i8*), i8* @__dso_handle) #2, !dbg !33
@@ -41,7 +41,7 @@ entry:
 }
 
 ; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 {
+define linkonce_odr void @_ZN3fooC2Ev(%struct.foo* %this) unnamed_addr #0 comdat align 2 !dbg !14 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -53,7 +53,7 @@ entry:
 }
 
 ; Function Attrs: inlinehint uwtable
-define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 {
+define linkonce_odr void @_ZN3fooD2Ev(%struct.foo* %this) unnamed_addr #1 comdat align 2 !dbg !24 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -71,7 +71,7 @@ declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #3
 
 ; Function Attrs: inlinehint nounwind uwtable
-define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 {
+define linkonce_odr void @_ZN4baseC2Ev(%struct.base* %this) unnamed_addr #0 comdat align 2 !dbg !19 {
 entry:
   %this.addr = alloca %struct.base*, align 8
   store %struct.base* %this, %struct.base** %this.addr, align 8
@@ -100,7 +100,7 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!30, !31}
 !llvm.ident = !{!32}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !28, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !28, imports: !2)
 !1 = !DIFile(filename: "decl-derived-member.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !8}
@@ -110,23 +110,23 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !DIDerivedType(tag: DW_TAG_typedef, name: "base_type", line: 4, file: !1, baseType: !"_ZTS4base")
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "base", line: 1, flags: DIFlagFwdDecl, file: !1, identifier: "_ZTS4base")
 !9 = !{!10, !14, !19, !24, !26}
-!10 = !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !12, function: void ()* @__cxx_global_var_init, variables: !2)
+!10 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 8, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !11, type: !12, variables: !2)
 !11 = !DIFile(filename: "decl-derived-member.cpp", directory: "/tmp/dbginfo")
 !12 = !DISubroutineType(types: !13)
 !13 = !{null}
-!14 = !DISubprogram(name: "foo", linkageName: "_ZN3fooC2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, function: void (%struct.foo*)* @_ZN3fooC2Ev, declaration: !18, variables: !2)
+!14 = distinct !DISubprogram(name: "foo", linkageName: "_ZN3fooC2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, declaration: !18, variables: !2)
 !15 = !DISubroutineType(types: !16)
 !16 = !{null, !17}
 !17 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
 !18 = !DISubprogram(name: "foo", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS3foo", type: !15)
-!19 = !DISubprogram(name: "base", linkageName: "_ZN4baseC2Ev", line: 1, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !"_ZTS4base", type: !20, function: void (%struct.base*)* @_ZN4baseC2Ev, declaration: !23, variables: !2)
+!19 = distinct !DISubprogram(name: "base", linkageName: "_ZN4baseC2Ev", line: 1, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !"_ZTS4base", type: !20, declaration: !23, variables: !2)
 !20 = !DISubroutineType(types: !21)
 !21 = !{null, !22}
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS4base")
 !23 = !DISubprogram(name: "base", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS4base", type: !20)
-!24 = !DISubprogram(name: "~foo", linkageName: "_ZN3fooD2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, function: void (%struct.foo*)* @_ZN3fooD2Ev, declaration: !25, variables: !2)
+!24 = distinct !DISubprogram(name: "~foo", linkageName: "_ZN3fooD2Ev", line: 5, isLocal: false, isDefinition: true, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !"_ZTS3foo", type: !15, declaration: !25, variables: !2)
 !25 = !DISubprogram(name: "~foo", isLocal: false, isDefinition: false, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS3foo", type: !15)
-!26 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_decl_derived_member.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !27, function: void ()* @_GLOBAL__sub_I_decl_derived_member.cpp, variables: !2)
+!26 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_decl_derived_member.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !27, variables: !2)
 !27 = !DISubroutineType(types: !2)
 !28 = !{!29}
 !29 = !DIGlobalVariable(name: "f", line: 8, isLocal: false, isDefinition: true, scope: null, file: !11, type: !"_ZTS3foo", variable: %struct.foo* @f)
@@ -134,17 +134,17 @@ attributes #4 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !31 = !{i32 2, !"Debug Info Version", i32 3}
 !32 = !{!"clang version 3.7.0 (trunk 227104) (llvm/trunk 227103)"}
 !33 = !DILocation(line: 8, column: 5, scope: !10)
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !35)
+!34 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !14, type: !35)
 !35 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
 !36 = !DIExpression()
 !37 = !DILocation(line: 0, scope: !14)
 !38 = !DILocation(line: 5, column: 8, scope: !14)
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !24, type: !35)
+!39 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !24, type: !35)
 !40 = !DILocation(line: 0, scope: !24)
 !41 = !DILocation(line: 5, column: 8, scope: !42)
 !42 = distinct !DILexicalBlock(line: 5, column: 8, file: !1, scope: !24)
 !43 = !DILocation(line: 5, column: 8, scope: !24)
-!44 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !19, type: !45)
+!44 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !19, type: !45)
 !45 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4base")
 !46 = !DILocation(line: 0, scope: !19)
 !47 = !DILocation(line: 1, column: 8, scope: !19)
diff --git a/test/DebugInfo/X86/deleted-bit-piece.ll b/test/DebugInfo/X86/deleted-bit-piece.ll
index 63f3be5ea41e..b069cf8e399c 100644
--- a/test/DebugInfo/X86/deleted-bit-piece.ll
+++ b/test/DebugInfo/X86/deleted-bit-piece.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; CHECK: __Z3foov:
 ; CHECK: retq
 
-define void @_Z3foov() {
+define void @_Z3foov() !dbg !12 {
 entry:
   br i1 undef, label %exit, label %bb
 
@@ -28,7 +28,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !0 = !{i32 2, !"Dwarf Version", i32 2}
 !1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !5, subprograms: !11, globals: !4, imports: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !4, retainedTypes: !5, subprograms: !11, globals: !4, imports: !4)
 !3 = !DIFile(filename: "foo.cpp", directory: "/path/to/dir")
 !4 = !{}
 !5 = !{!6}
@@ -38,9 +38,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 !9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !"_ZT5Class", baseType: !9, size: 32, align: 32)
 !11 = !{!12}
-!12 = !DISubprogram(name: "foo", scope: null, file: !3, type: !13, isLocal: false, isDefinition: true, isOptimized: false, function: void ()* @_Z3foov)
+!12 = distinct !DISubprogram(name: "foo", scope: null, file: !3, type: !13, isLocal: false, isDefinition: true, isOptimized: false)
 !13 = !DISubroutineType(types: !14)
 !14 = !{null}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", scope: !12, type: !"_ZT5Class")
+!15 = !DILocalVariable(name: "v", scope: !12, type: !"_ZT5Class")
 !16 = !DIExpression(DW_OP_bit_piece, 32, 32)
 !17 = !DILocation(line: 2755, column: 9, scope: !12)
diff --git a/test/DebugInfo/X86/discriminator.ll b/test/DebugInfo/X86/discriminator.ll
index da5acc753c48..e9d8fa58c60d 100644
--- a/test/DebugInfo/X86/discriminator.ll
+++ b/test/DebugInfo/X86/discriminator.ll
@@ -11,7 +11,7 @@
 ; Manually generated debug nodes !14 and !15 to incorporate an
 ; arbitrary discriminator with value 42.
 
-define i32 @foo(i32 %i) #0 {
+define i32 @foo(i32 %i) #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   %i.addr = alloca i32, align 4
@@ -41,11 +41,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "discriminator.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "discriminator.c", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/dw_op_minus.ll b/test/DebugInfo/X86/dw_op_minus.ll
new file mode 100644
index 000000000000..1c486e06c513
--- /dev/null
+++ b/test/DebugInfo/X86/dw_op_minus.ll
@@ -0,0 +1,84 @@
+; Test dwarf codegen of DW_OP_minus.
+; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+; This was built by compiling the following source with SafeStack and
+; simplifying the result a little.
+; extern "C" {
+; void Capture(int *);
+; void f() {
+;   int buf[100];
+;   Capture(buf);
+; }
+; }
+; The interesting part is !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+
+define void @f() !dbg !4 {
+entry:
+  %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr
+  %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400
+  store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr
+  %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400
+  %buf = bitcast i8* %0 to [100 x i32]*
+  %1 = bitcast [100 x i32]* %buf to i8*, !dbg !16
+  call void @llvm.dbg.declare(metadata i8* %unsafe_stack_ptr, metadata !8, metadata !17), !dbg !18
+  %arraydecay = getelementptr inbounds [100 x i32], [100 x i32]* %buf, i64 0, i64 0, !dbg !19
+  call void @Capture(i32* %arraydecay), !dbg !20
+  store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr, !dbg !21
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
+
+declare void @Capture(i32*)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "1.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{!8}
+!8 = !DILocalVariable(name: "buf", scope: !4, file: !1, line: 5, type: !9)
+!9 = !DICompositeType(tag: DW_TAG_array_type, baseType: !10, size: 3200, align: 32, elements: !11)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12}
+!12 = !DISubrange(count: 100)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
+!16 = !DILocation(line: 5, column: 3, scope: !4)
+!17 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!18 = !DILocation(line: 5, column: 7, scope: !4)
+!19 = !DILocation(line: 6, column: 11, scope: !4)
+!20 = !DILocation(line: 6, column: 3, scope: !4)
+!21 = !DILocation(line: 7, column: 1, scope: !4)
+
+; RCX - 400
+; CHECK:      .short	6                       # Loc expr size
+; CHECK-NEXT: .byte	114                     # DW_OP_breg2
+; CHECK-NEXT: .byte	0                       # 0
+; CHECK-NEXT: .byte	16                      # DW_OP_constu
+; CHECK-NEXT: .byte	144                     # 400
+; CHECK-NEXT: .byte	3                       # DW_OP_minus
+; CHECK-NEXT: .byte	28
+
+; RCX is clobbered in call @Capture, but there is a spilled copy.
+; *(RSP + 8) - 400
+; CHECK:      .short	7                       # Loc expr size
+; CHECK-NEXT: .byte	119                     # DW_OP_breg7
+; CHECK-NEXT: .byte	8                       # 8
+; CHECK-NEXT: .byte	6                       # DW_OP_deref
+; CHECK-NEXT: .byte	16                      # DW_OP_constu
+; CHECK-NEXT: .byte	144                     # 400
+; CHECK-NEXT: .byte	3                       # DW_OP_minus
+; CHECK-NEXT: .byte	28
diff --git a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
index aae155414bd8..be0d89433e7c 100644
--- a/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
+++ b/test/DebugInfo/X86/dwarf-aranges-no-dwarf-labels.ll
@@ -26,21 +26,21 @@ target triple = "x86_64-unknown-linux-gnu"
 @global = global i32 2, align 4
 
 ; Function Attrs: nounwind readnone uwtable
-define i32 @_Z3fooi(i32 %bar) #0 {
+define i32 @_Z3fooi(i32 %bar) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %bar, i64 0, metadata !10, metadata !DIExpression()), !dbg !20
   ret i32 %bar, !dbg !20
 }
 
 ; Function Attrs: nounwind readnone uwtable
-define i32 @_Z4foo2i(i32 %bar2) #0 {
+define i32 @_Z4foo2i(i32 %bar2) #0 !dbg !11 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %bar2, i64 0, metadata !13, metadata !DIExpression()), !dbg !21
   ret i32 %bar2, !dbg !21
 }
 
 ; Function Attrs: nounwind readonly uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !14 {
 entry:
   %call = tail call i32 @_Z3fooi(i32 2), !dbg !22
   %call1 = tail call i32 @_Z4foo2i(i32 1), !dbg !22
@@ -60,21 +60,21 @@ attributes #2 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !26}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (191881)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !17, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (191881)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !17, imports: !2)
 !1 = !DIFile(filename: "tmp/debug_ranges/a.cc", directory: "/")
 !2 = !{}
 !3 = !{!4, !11, !14}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z3fooi, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "tmp/debug_ranges/a.cc", directory: "/")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bar", line: 2, arg: 1, scope: !4, file: !5, type: !8)
-!11 = !DISubprogram(name: "foo2", linkageName: "_Z4foo2i", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32)* @_Z4foo2i, variables: !12)
+!10 = !DILocalVariable(name: "bar", line: 2, arg: 1, scope: !4, file: !5, type: !8)
+!11 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2i", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !12)
 !12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "bar2", line: 3, arg: 1, scope: !11, file: !5, type: !8)
-!14 = !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !15, function: i32 ()* @main, variables: !2)
+!13 = !DILocalVariable(name: "bar2", line: 3, arg: 1, scope: !11, file: !5, type: !8)
+!14 = distinct !DISubprogram(name: "main", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !15, variables: !2)
 !15 = !DISubroutineType(types: !16)
 !16 = !{!8}
 !17 = !{!18}
diff --git a/test/DebugInfo/X86/dwarf-aranges.ll b/test/DebugInfo/X86/dwarf-aranges.ll
index 9924697247b3..cb2e87881286 100644
--- a/test/DebugInfo/X86/dwarf-aranges.ll
+++ b/test/DebugInfo/X86/dwarf-aranges.ll
@@ -48,7 +48,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @some_other = global i32 5, section "strange+section", align 4
 @some_bss = common global i32 0, align 4
 
-define void @some_code() {
+define void @some_code() !dbg !4 {
 entry:
   %0 = load i32, i32* @some_data, align 4, !dbg !14
   %1 = load i32, i32* @some_other, align 4, !dbg !14
@@ -62,11 +62,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/home/kayamon")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "some_code", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void ()* @some_code, variables: !2)
+!4 = distinct !DISubprogram(name: "some_code", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/home/kayamon")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/X86/dwarf-linkage-names.ll b/test/DebugInfo/X86/dwarf-linkage-names.ll
new file mode 100644
index 000000000000..65cf1914dd40
--- /dev/null
+++ b/test/DebugInfo/X86/dwarf-linkage-names.ll
@@ -0,0 +1,71 @@
+; DWARF linkage name attributes are optional; verify they are missing for
+; PS4 triple or when tuning for SCE.
+
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s -check-prefix LINKAGE1
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown < %s | FileCheck %s -check-prefix LINKAGE2
+; RUN: llc -O0 -mtriple=x86_64-scei-ps4 < %s | FileCheck %s -check-prefix NOLINKAGE
+; RUN: llc -O0 -mtriple=x86_64-unknown-unknown -debugger-tune=sce < %s | FileCheck %s -check-prefix NOLINKAGE
+
+; $ clang++ -emit-llvm -S -g dwarf-linkage-names.cpp
+; namespace test {
+;  int global_var;
+;  int bar() { return global_var; }
+;};
+
+; With linkage names, we get an attribute for the declaration (first) entry
+; for the global variable, and one for the function.
+
+; This assumes the variable will appear before the function.
+; LINKAGE1: .section .debug_info
+; LINKAGE1: DW_TAG_variable
+; LINKAGE1-NOT: DW_TAG
+; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}}
+; LINKAGE1: DW_TAG_subprogram
+; LINKAGE1-NOT: DW_TAG
+; LINKAGE1: {{DW_AT_(MIPS_)?linkage_name}}
+; LINKAGE1: .section
+
+; Also verify we see the mangled names. We do this as a separate pass to
+; avoid depending on the order of .debug_info and .debug_str sections.
+
+; LINKAGE2-DAG: .asciz   "_ZN4test10global_varE"
+; LINKAGE2-DAG: .asciz   "_ZN4test3barEv"
+
+; Without linkage names, verify there aren't any linkage-name attributes,
+; and no mangled names.
+
+; NOLINKAGE-NOT: {{DW_AT_(MIPS_)?linkage_name}}
+; NOLINKAGE-NOT: .asciz   "_ZN4test10global_varE"
+; NOLINKAGE-NOT: .asciz   "_ZN4test3barEv"
+
+@_ZN4test10global_varE = global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define i32 @_ZN4test3barEv() #0 !dbg !4 {
+entry:
+  %0 = load i32, i32* @_ZN4test10global_varE, align 4, !dbg !14
+  ret i32 %0, !dbg !15
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 244662)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !9)
+!1 = !DIFile(filename: "dwarf-linkage-names.cpp", directory: "/home/probinson/projects/scratch")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_ZN4test3barEv", scope: !5, file: !1, line: 3, type: !6, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DINamespace(name: "test", scope: null, file: !1, line: 1)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DIGlobalVariable(name: "global_var", linkageName: "_ZN4test10global_varE", scope: !5, file: !1, line: 2, type: !8, isLocal: false, isDefinition: true, variable: i32* @_ZN4test10global_varE)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.8.0 (trunk 244662)"}
+!14 = !DILocation(line: 3, column: 21, scope: !4)
+!15 = !DILocation(line: 3, column: 14, scope: !4)
diff --git a/test/DebugInfo/X86/dwarf-public-names.ll b/test/DebugInfo/X86/dwarf-public-names.ll
index c72da38832ee..d850899ce85a 100644
--- a/test/DebugInfo/X86/dwarf-public-names.ll
+++ b/test/DebugInfo/X86/dwarf-public-names.ll
@@ -43,7 +43,7 @@
 
 ; Skip the output to the header of the pubnames section.
 ; LINUX: debug_pubnames
-; LINUX-NEXT: unit_size = 0x00000128
+; LINUX-NEXT: unit_size = 0x0000012a
 
 ; Check for each name in the output.
 ; LINUX-DAG: "ns"
@@ -61,7 +61,7 @@
 @global_variable = global %struct.C zeroinitializer, align 1
 @_ZN2ns25global_namespace_variableE = global i32 1, align 4
 
-define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 {
+define void @_ZN1C15member_functionEv(%struct.C* %this) nounwind uwtable align 2 !dbg !3 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -73,18 +73,18 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 {
+define i32 @_ZN1C22static_member_functionEv() nounwind uwtable align 2 !dbg !18 {
 entry:
   %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !33
   ret i32 %0, !dbg !33
 }
 
-define i32 @_Z15global_functionv() nounwind uwtable {
+define i32 @_Z15global_functionv() nounwind uwtable !dbg !19 {
 entry:
   ret i32 -1, !dbg !34
 }
 
-define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable {
+define void @_ZN2ns25global_namespace_functionEv() nounwind uwtable !dbg !20 {
 entry:
   call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !35
   ret void, !dbg !36
@@ -96,10 +96,10 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!38}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (http://llvm.org/git/clang.git a09cd8103a6a719cb2628cdf0c91682250a17bd2) (http://llvm.org/git/llvm.git 47d03cec0afca0c01ae42b82916d1d731716cd20)", isOptimized: false, emissionKind: 0, file: !37, enums: !1, retainedTypes: !1, subprograms: !2, globals: !24, imports:  !1)
 !1 = !{}
 !2 = !{!3, !18, !19, !20}
-!3 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !12, variables: !1)
+!3 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !4, scope: null, type: !5, declaration: !12, variables: !1)
 !4 = !DIFile(filename: "dwarf-public-names.cpp", directory: "/usr2/kparzysz/s.hex/t")
 !5 = !DISubroutineType(types: !6)
 !6 = !{null, !7}
@@ -114,9 +114,9 @@ attributes #1 = { nounwind readnone }
 !15 = !DISubroutineType(types: !16)
 !16 = !{!11}
 !17 = !{} ; previously: invalid DW_TAG_base_type
-!18 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !14, variables: !1)
-!19 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, function: i32 ()* @_Z15global_functionv, variables: !1)
-!20 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !1)
+!18 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !4, scope: null, type: !15, declaration: !14, variables: !1)
+!19 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !4, scope: !4, type: !15, variables: !1)
+!20 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !4, scope: !21, type: !22, variables: !1)
 !21 = !DINamespace(name: "ns", line: 23, file: !4, scope: null)
 !22 = !DISubroutineType(types: !23)
 !23 = !{null}
@@ -124,7 +124,7 @@ attributes #1 = { nounwind readnone }
 !25 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", line: 7, isLocal: false, isDefinition: true, scope: !8, file: !4, type: !11, variable: i32* @_ZN1C22static_member_variableE, declaration: !10)
 !26 = !DIGlobalVariable(name: "global_variable", line: 17, isLocal: false, isDefinition: true, scope: null, file: !4, type: !8, variable: %struct.C* @global_variable)
 !27 = !DIGlobalVariable(name: "global_namespace_variable", linkageName: "_ZN2ns25global_namespace_variableE", line: 27, isLocal: false, isDefinition: true, scope: !21, file: !4, type: !11, variable: i32* @_ZN2ns25global_namespace_variableE)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
+!28 = !DILocalVariable(name: "this", line: 9, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !3, file: !4, type: !29)
 !29 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !8)
 !30 = !DILocation(line: 9, scope: !3)
 !31 = !DILocation(line: 10, scope: !3)
diff --git a/test/DebugInfo/X86/dwarf-pubnames-split.ll b/test/DebugInfo/X86/dwarf-pubnames-split.ll
index f17f7848f046..a8e4cc6e433a 100644
--- a/test/DebugInfo/X86/dwarf-pubnames-split.ll
+++ b/test/DebugInfo/X86/dwarf-pubnames-split.ll
@@ -12,7 +12,7 @@
 ; CHECK-NEXT: .long     .Lcu_begin0             # Offset of Compilation Unit Info
 
 ; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -24,11 +24,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 189287) (llvm/trunk 189296)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 189287) (llvm/trunk 189296)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
index 738b7194488f..73626597d23b 100644
--- a/test/DebugInfo/X86/earlydup-crash.ll
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -6,7 +6,7 @@
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp {
+define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp !dbg !2 {
 entry:
   br i1 undef, label %bb33, label %bb
 
@@ -44,11 +44,11 @@ declare void @foobar(i32)
 
 !llvm.dbg.cu = !{!4}
 !llvm.module.flags = !{!47}
-!0 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "frname_len", line: 517, scope: !1, file: !3, type: !38)
+!0 = !DILocalVariable(name: "frname_len", line: 517, scope: !1, file: !3, type: !38)
 !1 = distinct !DILexicalBlock(line: 515, column: 0, file: !44, scope: !2)
-!2 = !DISubprogram(name: "framework_construct_pathname", line: 515, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !44, scope: null, type: !5, function: i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname)
+!2 = distinct !DISubprogram(name: "framework_construct_pathname", line: 515, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, file: !44, scope: null, type: !5)
 !3 = !DIFile(filename: "darwin-c.c", directory: "/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config")
-!4 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !46, retainedTypes: !46, subprograms: !45)
+!4 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !44, enums: !46, retainedTypes: !46, subprograms: !45)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7, !9, !11}
 !7 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, file: !44, scope: !3, baseType: !8)
diff --git a/test/DebugInfo/X86/elf-names.ll b/test/DebugInfo/X86/elf-names.ll
index e4565c27cf72..2c8571927627 100644
--- a/test/DebugInfo/X86/elf-names.ll
+++ b/test/DebugInfo/X86/elf-names.ll
@@ -17,10 +17,10 @@
 
 %class.D = type { i32, i32, i32, i32 }
 
-@_ZN1DC1Ev = alias void (%class.D*)* @_ZN1DC2Ev
-@_ZN1DC1ERKS_ = alias void (%class.D*, %class.D*)* @_ZN1DC2ERKS_
+@_ZN1DC1Ev = alias void (%class.D*), void (%class.D*)* @_ZN1DC2Ev
+@_ZN1DC1ERKS_ = alias void (%class.D*, %class.D*), void (%class.D*, %class.D*)* @_ZN1DC2ERKS_
 
-define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable align 2 {
+define void @_ZN1DC2Ev(%class.D* nocapture %this) unnamed_addr nounwind uwtable align 2 !dbg !5 {
 entry:
   tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !29, metadata !DIExpression()), !dbg !36
   %c1 = getelementptr inbounds %class.D, %class.D* %this, i64 0, i32 0, !dbg !37
@@ -34,7 +34,7 @@ entry:
   ret void, !dbg !45
 }
 
-define void @_ZN1DC2ERKS_(%class.D* nocapture %this, %class.D* nocapture %d) unnamed_addr nounwind uwtable align 2 {
+define void @_ZN1DC2ERKS_(%class.D* nocapture %this, %class.D* nocapture %d) unnamed_addr nounwind uwtable align 2 !dbg !31 {
 entry:
   tail call void @llvm.dbg.value(metadata %class.D* %this, i64 0, metadata !34, metadata !DIExpression()), !dbg !46
   tail call void @llvm.dbg.value(metadata %class.D* %d, i64 0, metadata !35, metadata !DIExpression()), !dbg !46
@@ -62,10 +62,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!54}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 167506) (llvm/trunk 167505)", isOptimized: true, emissionKind: 0, file: !53, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 167506) (llvm/trunk 167505)", isOptimized: true, emissionKind: 0, file: !53, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5, !31}
-!5 = !DISubprogram(name: "D", linkageName: "_ZN1DC2Ev", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !6, scope: null, type: !7, function: void (%class.D*)* @_ZN1DC2Ev, declaration: !17, variables: !27)
+!5 = distinct !DISubprogram(name: "D", linkageName: "_ZN1DC2Ev", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !6, scope: null, type: !7, declaration: !17, variables: !27)
 !6 = !DIFile(filename: "foo.cpp", directory: "/usr/local/google/home/echristo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
@@ -84,12 +84,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !23 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !24)
 !24 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !10)
 !27 = !{!29}
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 12, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !5, file: !6, type: !30)
+!29 = !DILocalVariable(name: "this", line: 12, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !5, file: !6, type: !30)
 !30 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
-!31 = !DISubprogram(name: "D", linkageName: "_ZN1DC2ERKS_", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19, file: !6, scope: null, type: !21, function: void (%class.D*, %class.D*)* @_ZN1DC2ERKS_, declaration: !20, variables: !32)
+!31 = distinct !DISubprogram(name: "D", linkageName: "_ZN1DC2ERKS_", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 19, file: !6, scope: null, type: !21, declaration: !20, variables: !32)
 !32 = !{!34, !35}
-!34 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 19, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !6, type: !30)
-!35 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "d", line: 19, arg: 2, scope: !31, file: !6, type: !23)
+!34 = !DILocalVariable(name: "this", line: 19, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, file: !6, type: !30)
+!35 = !DILocalVariable(name: "d", line: 19, arg: 2, scope: !31, file: !6, type: !23)
 !36 = !DILocation(line: 12, scope: !5)
 !37 = !DILocation(line: 13, scope: !38)
 !38 = distinct !DILexicalBlock(line: 12, column: 0, file: !6, scope: !5)
diff --git a/test/DebugInfo/X86/empty-and-one-elem-array.ll b/test/DebugInfo/X86/empty-and-one-elem-array.ll
index 1250b9c64412..550a231f7699 100644
--- a/test/DebugInfo/X86/empty-and-one-elem-array.ll
+++ b/test/DebugInfo/X86/empty-and-one-elem-array.ll
@@ -5,7 +5,7 @@
 %struct.foo = type { i32, [1 x i32] }
 %struct.bar = type { i32, [0 x i32] }
 
-define i32 @func() nounwind uwtable ssp {
+define i32 @func() nounwind uwtable ssp !dbg !5 {
 entry:
   %my_foo = alloca %struct.foo, align 4
   %my_bar = alloca %struct.bar, align 4
@@ -63,15 +63,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "func", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !6, scope: !6, type: !7, function: i32 ()* @func, variables: !1)
+!5 = distinct !DISubprogram(name: "func", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 11, file: !6, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "test.c", directory: "/Volumes/Sandbox/llvm")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_foo", line: 12, scope: !11, file: !6, type: !12)
+!10 = !DILocalVariable(name: "my_foo", line: 12, scope: !11, file: !6, type: !12)
 !11 = distinct !DILexicalBlock(line: 11, column: 0, file: !6, scope: !5)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "foo", line: 1, size: 64, align: 32, file: !32, elements: !13)
 !13 = !{!14, !15}
@@ -81,7 +81,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !17 = !{!18}
 !18 = !DISubrange(count: 1)
 !19 = !DILocation(line: 12, scope: !11)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "my_bar", line: 13, scope: !11, file: !6, type: !21)
+!20 = !DILocalVariable(name: "my_bar", line: 13, scope: !11, file: !6, type: !21)
 !21 = !DICompositeType(tag: DW_TAG_structure_type, name: "bar", line: 6, size: 32, align: 32, file: !32, elements: !22)
 !22 = !{!23, !24}
 !23 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 7, size: 32, align: 32, file: !32, scope: !21, baseType: !9)
diff --git a/test/DebugInfo/X86/empty-array.ll b/test/DebugInfo/X86/empty-array.ll
index ce460a783c05..bad080cb7254 100644
--- a/test/DebugInfo/X86/empty-array.ll
+++ b/test/DebugInfo/X86/empty-array.ll
@@ -27,7 +27,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/empty.ll b/test/DebugInfo/X86/empty.ll
index 1f48a2f21246..695e9ca6ed3f 100644
--- a/test/DebugInfo/X86/empty.ll
+++ b/test/DebugInfo/X86/empty.ll
@@ -19,7 +19,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143523)", isOptimized: true, emissionKind: 0, file: !4, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2)
 !2 = !{}
 !3 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
 !4 = !DIFile(filename: "empty.c", directory: "/home/nlewycky")
diff --git a/test/DebugInfo/X86/ending-run.ll b/test/DebugInfo/X86/ending-run.ll
index 9e3584615df2..efb85aad73b6 100644
--- a/test/DebugInfo/X86/ending-run.ll
+++ b/test/DebugInfo/X86/ending-run.ll
@@ -8,7 +8,7 @@
 ; CHECK: 0x0000000000000000      7      0      1   0
 ; CHECK: 0x0000000000000004      8     18      1   0  0  is_stmt prologue_end
 
-define i32 @callee(i32 %x) nounwind uwtable ssp {
+define i32 @callee(i32 %x) nounwind uwtable ssp !dbg !5 {
 entry:
   %x.addr = alloca i32, align 4
   %y = alloca i32, align 4
@@ -29,17 +29,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153921) (llvm/trunk 153916)", isOptimized: false, emissionKind: 0, file: !19, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 153921) (llvm/trunk 153916)", isOptimized: false, emissionKind: 0, file: !19, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "callee", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !6, type: !7, function: i32 (i32)* @callee)
+!5 = distinct !DISubprogram(name: "callee", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !19, scope: !6, type: !7)
 !6 = !DIFile(filename: "ending-run.c", directory: "/Users/echristo/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !9)
+!12 = !DILocalVariable(name: "x", line: 5, arg: 1, scope: !5, file: !6, type: !9)
 !13 = !DILocation(line: 5, column: 5, scope: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 8, scope: !15, file: !6, type: !9)
+!14 = !DILocalVariable(name: "y", line: 8, scope: !15, file: !6, type: !9)
 !15 = distinct !DILexicalBlock(line: 7, column: 1, file: !19, scope: !5)
 !16 = !DILocation(line: 8, column: 9, scope: !15)
 !17 = !DILocation(line: 8, column: 18, scope: !15)
diff --git a/test/DebugInfo/X86/enum-class.ll b/test/DebugInfo/X86/enum-class.ll
index 57b2aa404cbb..fda0b4943a9d 100644
--- a/test/DebugInfo/X86/enum-class.ll
+++ b/test/DebugInfo/X86/enum-class.ll
@@ -8,7 +8,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157269) (llvm/trunk 157264)", isOptimized: false, emissionKind: 0, file: !22, enums: !1, retainedTypes: !15, subprograms: !15, globals: !17, imports:  !15)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157269) (llvm/trunk 157264)", isOptimized: false, emissionKind: 0, file: !22, enums: !1, retainedTypes: !15, subprograms: !15, globals: !17, imports:  !15)
 !1 = !{!3, !8, !12}
 !3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "A", line: 1, size: 32, align: 32, file: !4, baseType: !5, elements: !6)
 !4 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo/tmp")
diff --git a/test/DebugInfo/X86/enum-fwd-decl.ll b/test/DebugInfo/X86/enum-fwd-decl.ll
index f5deeb7d6712..ec862d10a704 100644
--- a/test/DebugInfo/X86/enum-fwd-decl.ll
+++ b/test/DebugInfo/X86/enum-fwd-decl.ll
@@ -6,7 +6,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 165274) (llvm/trunk 165272)", isOptimized: false, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 165274) (llvm/trunk 165272)", isOptimized: false, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "e", line: 2, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i16* @e)
diff --git a/test/DebugInfo/X86/externaltyperef.ll b/test/DebugInfo/X86/externaltyperef.ll
new file mode 100644
index 000000000000..c344d5f068c3
--- /dev/null
+++ b/test/DebugInfo/X86/externaltyperef.ll
@@ -0,0 +1,51 @@
+; REQUIRES: object-emission
+; RUN: %llc_dwarf -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; Manually derived by externalizing the composite types from:
+;
+;   namespace N { class B; }
+;   using N::B;
+;   class A;
+;   A *a;
+;
+; Test the direct use of an external type.
+; CHECK: DW_TAG_variable
+; CHECK:   DW_AT_type [DW_FORM_ref4]	  {{.*}}{[[PTR:.*]]}
+; CHECK: [[PTR]]: DW_TAG_pointer_type
+; CHECK:   DW_AT_type [DW_FORM_ref4]  	  {{.*}}{[[A:.*]]}
+; CHECK: [[A]]: DW_TAG_class_type
+; CHECK:   DW_AT_declaration [DW_FORM_flag]	(0x01)
+; CHECK:   DW_AT_signature [DW_FORM_ref_sig8]	(0x4e834ea939695c24)
+; CHECK: [[B:.*]]: DW_TAG_class_type
+; CHECK:   DW_AT_declaration [DW_FORM_flag]	(0x01)
+; CHECK:   DW_AT_signature [DW_FORM_ref_sig8]	(0x942e51c7addda5f7)
+; CHECK:   DW_TAG_imported_declaration
+; CHECK:     DW_AT_import [DW_FORM_ref4]  {{.*}}[[B]]
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+%class.A = type opaque
+
+@a = global %class.A* null, align 8
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 242039) (llvm/trunk 242046)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, globals: !5, imports: !11)
+!1 = !DIFile(filename: "test.cpp", directory: "/")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTS1A")
+!5 = !{!6}
+!6 = !DIGlobalVariable(name: "a", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: %class.A** @a)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1A", size: 64, align: 64)
+!8 = !DICompositeType(tag: DW_TAG_class_type, name: "B", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTS1B")
+!9 = !DICompositeType(tag: DW_TAG_class_type, name: "A", file: !1, flags: DIFlagExternalTypeRef, identifier: "_ZTSN1N1BE")
+!10 = !DINamespace(name: "N", scope: null, file: !1, line: 1)
+!11 = !{!12}
+!12 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !"_ZTSN1N1BE", line: 4)
+!13 = !{i32 2, !"Dwarf Version", i32 2}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"PIC Level", i32 2}
+!16 = !{!"clang version 3.7.0 (trunk 242039) (llvm/trunk 242046)"}
diff --git a/test/DebugInfo/X86/fission-cu.ll b/test/DebugInfo/X86/fission-cu.ll
index 493f84283e2e..d52333ce6d97 100644
--- a/test/DebugInfo/X86/fission-cu.ll
+++ b/test/DebugInfo/X86/fission-cu.ll
@@ -8,7 +8,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)", isOptimized: false, splitDebugFilename: "baz.dwo", emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 169021) (llvm/trunk 169020)", isOptimized: false, splitDebugFilename: "baz.dwo", emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @a)
diff --git a/test/DebugInfo/X86/fission-hash.ll b/test/DebugInfo/X86/fission-hash.ll
index 74764416d748..84568a92e154 100644
--- a/test/DebugInfo/X86/fission-hash.ll
+++ b/test/DebugInfo/X86/fission-hash.ll
@@ -9,7 +9,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 188230) (llvm/trunk 188234)", isOptimized: false, splitDebugFilename: "foo.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 188230) (llvm/trunk 188234)", isOptimized: false, splitDebugFilename: "foo.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 3}
diff --git a/test/DebugInfo/X86/fission-inline.ll b/test/DebugInfo/X86/fission-inline.ll
index 53e782756ca8..af7dd95e5b84 100644
--- a/test/DebugInfo/X86/fission-inline.ll
+++ b/test/DebugInfo/X86/fission-inline.ll
@@ -75,7 +75,7 @@
 ; RELOCS-NOT: RELOCATION RECORDS FOR [.rela.debug_ranges]
 
 ; Function Attrs: uwtable
-define void @_ZN3foo2f3Ez(...) #0 align 2 {
+define void @_ZN3foo2f3Ez(...) #0 align 2 !dbg !10 {
 entry:
   call void @_Z2f1v(), !dbg !26
   call void @_Z2f1v(), !dbg !25
@@ -92,7 +92,7 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, splitDebugFilename: "fission-inline.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !18)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, splitDebugFilename: "fission-inline.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !18)
 !1 = !DIFile(filename: "fission-inline.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -102,8 +102,8 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, null}
 !9 = !{!10, !11}
-!10 = !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ez", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 15, file: !1, scope: !"_ZTS3foo", type: !7, function: void (...)* @_ZN3foo2f3Ez, declaration: !6, variables: !2)
-!11 = !DISubprogram(name: "f2<int>", linkageName: "_ZN3foo2f2IiEEvv", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !"_ZTS3foo", type: !12, templateParams: !14, declaration: !17, variables: !2)
+!10 = distinct !DISubprogram(name: "f3", linkageName: "_ZN3foo2f3Ez", line: 15, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 15, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
+!11 = distinct !DISubprogram(name: "f2<int>", linkageName: "_ZN3foo2f2IiEEvv", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !"_ZTS3foo", type: !12, templateParams: !14, declaration: !17, variables: !2)
 !12 = !DISubroutineType(types: !13)
 !13 = !{null}
 !14 = !{!15}
diff --git a/test/DebugInfo/X86/fission-ranges.ll b/test/DebugInfo/X86/fission-ranges.ll
index e5eb25ce90c7..9c9fd7d6e6fe 100644
--- a/test/DebugInfo/X86/fission-ranges.ll
+++ b/test/DebugInfo/X86/fission-ranges.ll
@@ -29,16 +29,16 @@
 ; CHECK-NEXT:      Location description: 11 00
 ; CHECK-NEXT: {{^$}}
 ; CHECK-NEXT:   Beginning address index: 3
-; CHECK-NEXT:                    Length: 21
+; CHECK-NEXT:                    Length: 25
 ; CHECK-NEXT:      Location description: 50 93 04
 ; CHECK: [[E]]: Beginning address index: 4
-; CHECK-NEXT:                    Length: 19
+; CHECK-NEXT:                    Length: 23
 ; CHECK-NEXT:      Location description: 50 93 04
 ; CHECK: [[B]]: Beginning address index: 5
-; CHECK-NEXT:                    Length: 17
+; CHECK-NEXT:                    Length: 21
 ; CHECK-NEXT:      Location description: 50 93 04
 ; CHECK: [[D]]: Beginning address index: 6
-; CHECK-NEXT:                    Length: 17
+; CHECK-NEXT:                    Length: 21
 ; CHECK-NEXT:      Location description: 50 93 04
 
 ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo)
@@ -82,14 +82,14 @@
 @c = external global i32
 
 ; Function Attrs: nounwind uwtable
-define void @bar() #0 {
+define void @bar() #0 !dbg !4 {
 entry:
   tail call fastcc void @foo(), !dbg !27
   ret void, !dbg !28
 }
 
 ; Function Attrs: nounwind uwtable
-define internal fastcc void @foo() #0 {
+define internal fastcc void @foo() #0 !dbg !8 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 1, i64 0, metadata !13, metadata !DIExpression()), !dbg !30
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !14, metadata !DIExpression()), !dbg !31
@@ -153,26 +153,26 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26, !43}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191700) (llvm/trunk 191710)", isOptimized: true, splitDebugFilename: "small.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191700) (llvm/trunk 191710)", isOptimized: true, splitDebugFilename: "small.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "small.c", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !1, scope: !5, type: !6, function: void ()* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", line: 18, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 19, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "small.c", directory: "/usr/local/google/home/echristo/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "foo", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !9, function: void ()* @foo, variables: !12)
+!8 = distinct !DISubprogram(name: "foo", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !9, variables: !12)
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !11}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{!13, !14, !15, !16, !18, !19}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 2, arg: 1, scope: !8, file: !5, type: !11)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 4, scope: !8, file: !5, type: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !8, file: !5, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 5, scope: !8, file: !5, type: !17)
+!13 = !DILocalVariable(name: "p", line: 2, arg: 1, scope: !8, file: !5, type: !11)
+!14 = !DILocalVariable(name: "a", line: 4, scope: !8, file: !5, type: !11)
+!15 = !DILocalVariable(name: "b", line: 4, scope: !8, file: !5, type: !11)
+!16 = !DILocalVariable(name: "d", line: 5, scope: !8, file: !5, type: !17)
 !17 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 5, scope: !8, file: !5, type: !17)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "w", line: 12, scope: !20, file: !5, type: !25)
+!18 = !DILocalVariable(name: "e", line: 5, scope: !8, file: !5, type: !17)
+!19 = !DILocalVariable(name: "w", line: 12, scope: !20, file: !5, type: !25)
 !20 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !21)
 !21 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !22)
 !22 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !23)
diff --git a/test/DebugInfo/X86/float_const.ll b/test/DebugInfo/X86/float_const.ll
index 6e99ffcc1f21..cd94f74dcbe3 100644
--- a/test/DebugInfo/X86/float_const.ll
+++ b/test/DebugInfo/X86/float_const.ll
@@ -8,7 +8,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind optsize readnone uwtable
-define void @foo() #0 {
+define void @foo() #0 !dbg !7 {
 entry:
   tail call void @llvm.dbg.declare(metadata float* undef, metadata !13, metadata !19), !dbg !20
   tail call void @llvm.dbg.value(metadata i32 1078523331, i64 0, metadata !13, metadata !19), !dbg !20
@@ -31,20 +31,20 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!15, !16, !17}
 !llvm.ident = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227686)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 227686)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !6, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "")
 !2 = !{}
 !3 = !{!4}
 !4 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !5)
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !6 = !{!7}
-!7 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 1, file: !8, scope: !9, type: !10, function: void ()* @foo, variables: !12)
+!7 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 1, file: !8, scope: !9, type: !10, variables: !12)
 !8 = !DIFile(filename: "foo.c", directory: "")
 !9 = !DIFile(filename: "foo.c", directory: "")
 !10 = !DISubroutineType(types: !11)
 !11 = !{null}
 !12 = !{!13}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 2, scope: !7, file: !9, type: !14)
+!13 = !DILocalVariable(name: "a", line: 2, scope: !7, file: !9, type: !14)
 !14 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
 !15 = !{i32 2, !"Dwarf Version", i32 2}
 !16 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/formal_parameter.ll b/test/DebugInfo/X86/formal_parameter.ll
index e8a8b5f1803b..89be50bbd4a3 100644
--- a/test/DebugInfo/X86/formal_parameter.ll
+++ b/test/DebugInfo/X86/formal_parameter.ll
@@ -24,7 +24,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 ; CHECK-NOT: DW_AT_name {{.*}}map
 
 ; Function Attrs: nounwind ssp uwtable
-define void @foo(i32 %map) #0 {
+define void @foo(i32 %map) #0 !dbg !4 {
 entry:
   %map.addr = alloca i32, align 4
   store i32 %map, i32* %map.addr, align 4, !tbaa !15
@@ -59,17 +59,17 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "formal_parameter.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !9)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !9)
 !5 = !DIFile(filename: "formal_parameter.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "map", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!10 = !DILocalVariable(name: "map", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !11 = !{i32 2, !"Dwarf Version", i32 2}
 !12 = !{i32 1, !"Debug Info Version", i32 3}
 !13 = !{!"clang version 3.5.0 "}
diff --git a/test/DebugInfo/X86/frame-register.ll b/test/DebugInfo/X86/frame-register.ll
index bcf65a1a3853..e7f48597b348 100644
--- a/test/DebugInfo/X86/frame-register.ll
+++ b/test/DebugInfo/X86/frame-register.ll
@@ -14,7 +14,7 @@ declare i32 @foo(i32 %i) #0
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !9 {
 entry:
   %retval = alloca i32, align 4
   %i = alloca i32, align 4
@@ -33,27 +33,27 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "x.c", directory: "")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "x.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "main", line: 8, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !10, function: i32 ()* @main, variables: !2)
+!9 = distinct !DISubprogram(name: "main", line: 8, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 9, file: !1, scope: !5, type: !10, variables: !2)
 !10 = !DISubroutineType(types: !11)
 !11 = !{!8}
 !12 = !{i32 2, !"Dwarf Version", i32 4}
 !13 = !{i32 2, !"Debug Info Version", i32 3}
 !14 = !{!"clang version 3.7.0"}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !4, file: !5, type: !8)
 !16 = !DIExpression()
 !17 = !DILocation(line: 2, column: 10, scope: !4)
 !18 = !DILocation(line: 4, column: 10, scope: !4)
 !19 = !DILocation(line: 4, column: 3, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 10, scope: !9, file: !5, type: !8)
+!20 = !DILocalVariable(name: "i", line: 10, scope: !9, file: !5, type: !8)
 !21 = !DILocation(line: 10, column: 7, scope: !9)
 !22 = !DILocation(line: 11, column: 15, scope: !9)
 !23 = !DILocation(line: 11, column: 10, scope: !9)
diff --git a/test/DebugInfo/X86/generate-odr-hash.ll b/test/DebugInfo/X86/generate-odr-hash.ll
index 7ac9a69181f3..51f33e2730ab 100644
--- a/test/DebugInfo/X86/generate-odr-hash.ll
+++ b/test/DebugInfo/X86/generate-odr-hash.ll
@@ -180,7 +180,7 @@
 @llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
 
 ; Function Attrs: nounwind uwtable
-define void @_Z3foov() #0 {
+define void @_Z3foov() #0 !dbg !22 {
 entry:
   %b = alloca %struct.baz, align 1
   call void @llvm.dbg.declare(metadata %struct.baz* %b, metadata !46, metadata !DIExpression()), !dbg !48
@@ -190,14 +190,14 @@ entry:
 ; Function Attrs: nounwind readnone
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !26 {
 entry:
   call void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* @w), !dbg !50
   ret void, !dbg !50
 }
 
 ; Function Attrs: nounwind uwtable
-define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* %this) unnamed_addr #0 align 2 {
+define internal void @_ZN12_GLOBAL__N_16walrusC2Ev(%"struct.<anonymous namespace>::walrus"* %this) unnamed_addr #0 align 2 !dbg !27 {
 entry:
   %this.addr = alloca %"struct.<anonymous namespace>::walrus"*, align 8
   store %"struct.<anonymous namespace>::walrus"* %this, %"struct.<anonymous namespace>::walrus"** %this.addr, align 8
@@ -206,7 +206,7 @@ entry:
   ret void, !dbg !54
 }
 
-define internal void @_GLOBAL__I_a() section ".text.startup" {
+define internal void @_GLOBAL__I_a() section ".text.startup" !dbg !36 {
 entry:
   call void @__cxx_global_var_init(), !dbg !55
   ret void, !dbg !55
@@ -219,7 +219,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!43, !44}
 !llvm.ident = !{!45}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "bar.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !38, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "bar.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !21, globals: !38, imports: !2)
 !1 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !6, !14, !17}
@@ -241,12 +241,12 @@ attributes #1 = { nounwind readnone }
 !19 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 33, size: 32, align: 32, file: !1, scope: !"_ZTSN6wombatUt_E", baseType: !12)
 !20 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 34, size: 32, align: 32, offset: 32, file: !1, scope: !"_ZTSN6wombatUt_E", baseType: !12)
 !21 = !{!22, !26, !27, !36}
-!22 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !23, type: !24, function: void ()* @_Z3foov, variables: !2)
+!22 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !23, type: !24, variables: !2)
 !23 = !DIFile(filename: "bar.cpp", directory: "/tmp/dbginfo")
 !24 = !DISubroutineType(types: !25)
 !25 = !{null}
-!26 = !DISubprogram(name: "__cxx_global_var_init", line: 29, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 29, file: !1, scope: !23, type: !24, function: void ()* @__cxx_global_var_init, variables: !2)
-!27 = !DISubprogram(name: "walrus", linkageName: "_ZN12_GLOBAL__N_16walrusC2Ev", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32, function: void (%"struct.<anonymous namespace>::walrus"*)* @_ZN12_GLOBAL__N_16walrusC2Ev, declaration: !31, variables: !2)
+!26 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 29, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 29, file: !1, scope: !23, type: !24, variables: !2)
+!27 = distinct !DISubprogram(name: "walrus", linkageName: "_ZN12_GLOBAL__N_16walrusC2Ev", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !28, type: !32, declaration: !31, variables: !2)
 !28 = !DICompositeType(tag: DW_TAG_structure_type, name: "walrus", line: 24, size: 8, align: 8, file: !1, scope: !29, elements: !30)
 !29 = !DINamespace(line: 23, file: !1, scope: null)
 !30 = !{!31}
@@ -254,7 +254,7 @@ attributes #1 = { nounwind readnone }
 !32 = !DISubroutineType(types: !33)
 !33 = !{null, !34}
 !34 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !28)
-!36 = !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 25, file: !1, scope: !23, type: !37, function: void ()* @_GLOBAL__I_a, variables: !2)
+!36 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__I_a", line: 25, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, scopeLine: 25, file: !1, scope: !23, type: !37, variables: !2)
 !37 = !DISubroutineType(types: !2)
 !38 = !{!39, !40, !41, !42}
 !39 = !DIGlobalVariable(name: "b", line: 3, isLocal: false, isDefinition: true, scope: null, file: !23, type: !4, variable: %struct.bar* @b)
@@ -264,12 +264,12 @@ attributes #1 = { nounwind readnone }
 !43 = !{i32 2, !"Dwarf Version", i32 4}
 !44 = !{i32 1, !"Debug Info Version", i32 3}
 !45 = !{!"clang version 3.5 "}
-!46 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 7, scope: !22, file: !23, type: !47)
+!46 = !DILocalVariable(name: "b", line: 7, scope: !22, file: !23, type: !47)
 !47 = !DICompositeType(tag: DW_TAG_structure_type, name: "baz", line: 6, size: 8, align: 8, file: !1, scope: !22, elements: !2)
 !48 = !DILocation(line: 7, scope: !22)
 !49 = !DILocation(line: 8, scope: !22)
 !50 = !DILocation(line: 29, scope: !26)
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !27, type: !52)
+!51 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !27, type: !52)
 !52 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !28)
 !53 = !DILocation(line: 0, scope: !27)
 !54 = !DILocation(line: 25, scope: !27)
diff --git a/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
index ef249e8fab9c..20bb1b3f145f 100644
--- a/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
+++ b/test/DebugInfo/X86/ghost-sdnode-dbgvalues.ll
@@ -29,7 +29,7 @@
 ; when run with r221709 reverted, then it really doesn't test anything anymore.
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !8 {
 entry:
   call void @llvm.dbg.value(metadata i32 %a, i64 0, metadata !16, metadata !17), !dbg !18
   %conv = trunc i32 %a to i16, !dbg !19
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !7, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !7, globals: !2, imports: !2)
 !1 = !DIFile(filename: "ghost-sdnode-dbgvalues.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
@@ -76,7 +76,7 @@ attributes #1 = { nounwind readnone }
 !5 = !DIFile(filename: "/usr/include/sys/_types/_int16_t.h", directory: "/tmp")
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
 !7 = !{!8}
-!8 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !9, type: !10, function: i32 (i32)* @foo, variables: !2)
+!8 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !9, type: !10, variables: !2)
 !9 = !DIFile(filename: "ghost-sdnode-dbgvalues.c", directory: "/tmp")
 !10 = !DISubroutineType(types: !11)
 !11 = !{!12, !12}
@@ -84,22 +84,22 @@ attributes #1 = { nounwind readnone }
 !13 = !{i32 2, !"Dwarf Version", i32 2}
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.6.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !8, file: !9, type: !12)
+!16 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !8, file: !9, type: !12)
 !17 = !DIExpression()
 !18 = !DILocation(line: 3, column: 13, scope: !8)
 !19 = !DILocation(line: 4, column: 5, scope: !8)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 4, scope: !8, file: !9, type: !12)
+!20 = !DILocalVariable(name: "b", line: 4, scope: !8, file: !9, type: !12)
 !21 = !DILocation(line: 4, column: 9, scope: !8)
 !22 = !DILocation(line: 5, column: 5, scope: !8)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 5, scope: !8, file: !9, type: !12)
+!23 = !DILocalVariable(name: "c", line: 5, scope: !8, file: !9, type: !12)
 !24 = !DILocation(line: 5, column: 9, scope: !8)
 !25 = !DILocation(line: 6, column: 5, scope: !8)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d", line: 6, scope: !8, file: !9, type: !12)
+!26 = !DILocalVariable(name: "d", line: 6, scope: !8, file: !9, type: !12)
 !27 = !DILocation(line: 6, column: 9, scope: !8)
 !28 = !DILocation(line: 7, column: 5, scope: !8)
-!29 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e", line: 7, scope: !8, file: !9, type: !12)
+!29 = !DILocalVariable(name: "e", line: 7, scope: !8, file: !9, type: !12)
 !30 = !DILocation(line: 7, column: 9, scope: !8)
 !31 = !DILocation(line: 8, column: 5, scope: !8)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "f", line: 8, scope: !8, file: !9, type: !12)
+!32 = !DILocalVariable(name: "f", line: 8, scope: !8, file: !9, type: !12)
 !33 = !DILocation(line: 8, column: 9, scope: !8)
 !34 = !DILocation(line: 9, column: 5, scope: !8)
diff --git a/test/DebugInfo/X86/gnu-public-names-empty.ll b/test/DebugInfo/X86/gnu-public-names-empty.ll
index d87d8ef19008..4bd33df10dce 100644
--- a/test/DebugInfo/X86/gnu-public-names-empty.ll
+++ b/test/DebugInfo/X86/gnu-public-names-empty.ll
@@ -12,7 +12,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191846) (llvm/trunk 191866)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 191846) (llvm/trunk 191866)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll
index 2022dfaa687c..584c879c064a 100644
--- a/test/DebugInfo/X86/gnu-public-names.ll
+++ b/test/DebugInfo/X86/gnu-public-names.ll
@@ -240,7 +240,7 @@
 @_ZN5outer12_GLOBAL__N_11cE = internal global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 {
+define void @_ZN1C15member_functionEv(%struct.C* %this) #0 align 2 !dbg !20 {
 entry:
   %this.addr = alloca %struct.C*, align 8
   store %struct.C* %this, %struct.C** %this.addr, align 8
@@ -254,33 +254,33 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @_ZN1C22static_member_functionEv() #0 align 2 {
+define i32 @_ZN1C22static_member_functionEv() #0 align 2 !dbg !21 {
 entry:
   %0 = load i32, i32* @_ZN1C22static_member_variableE, align 4, !dbg !57
   ret i32 %0, !dbg !57
 }
 
 ; Function Attrs: nounwind uwtable
-define i32 @_Z15global_functionv() #0 {
+define i32 @_Z15global_functionv() #0 !dbg !22 {
 entry:
   ret i32 -1, !dbg !58
 }
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN2ns25global_namespace_functionEv() #0 {
+define void @_ZN2ns25global_namespace_functionEv() #0 !dbg !23 {
 entry:
   call void @_ZN1C15member_functionEv(%struct.C* @global_variable), !dbg !59
   ret void, !dbg !60
 }
 
 ; Function Attrs: nounwind uwtable
-define i32* @_Z2f3v() #0 {
+define i32* @_Z2f3v() #0 !dbg !26 {
 entry:
   ret i32* @_ZZ2f3vE1z, !dbg !61
 }
 
 ; Function Attrs: nounwind uwtable
-define i32 @_Z2f7v() #0 {
+define i32 @_Z2f7v() #0 !dbg !30 {
 entry:
   %0 = load i32, i32* @_ZN12_GLOBAL__N_11iE, align 4, !dbg !62
   %call = call i32* @_Z2f3v(), !dbg !62
@@ -300,7 +300,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!48, !49}
 !llvm.ident = !{!50}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !31, imports: !44)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !31, imports: !44)
 !1 = !DIFile(filename: "gnu-public-names.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !15}
@@ -320,17 +320,17 @@ attributes #1 = { nounwind readnone }
 !17 = !{!18}
 !18 = !DIDerivedType(tag: DW_TAG_member, name: "A", scope: !"_ZTSN2ns1DE", file: !1, line: 30, baseType: !7, size: 32, align: 32)
 !19 = !{!20, !21, !22, !23, !26, !30}
-!20 = !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 9, type: !9, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, function: void (%struct.C*)* @_ZN1C15member_functionEv, declaration: !8, variables: !2)
-!21 = !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_ZN1C22static_member_functionEv, declaration: !12, variables: !2)
-!22 = !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !1, file: !1, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z15global_functionv, variables: !2)
-!23 = !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !16, file: !1, line: 24, type: !24, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @_ZN2ns25global_namespace_functionEv, variables: !2)
+!20 = distinct !DISubprogram(name: "member_function", linkageName: "_ZN1C15member_functionEv", scope: !"_ZTS1C", file: !1, line: 9, type: !9, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, declaration: !8, variables: !2)
+!21 = distinct !DISubprogram(name: "static_member_function", linkageName: "_ZN1C22static_member_functionEv", scope: !"_ZTS1C", file: !1, line: 13, type: !13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, declaration: !12, variables: !2)
+!22 = distinct !DISubprogram(name: "global_function", linkageName: "_Z15global_functionv", scope: !1, file: !1, line: 19, type: !13, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!23 = distinct !DISubprogram(name: "global_namespace_function", linkageName: "_ZN2ns25global_namespace_functionEv", scope: !16, file: !1, line: 24, type: !24, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !24 = !DISubroutineType(types: !25)
 !25 = !{null}
-!26 = !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 40, type: !27, isLocal: false, isDefinition: true, scopeLine: 40, flags: DIFlagPrototyped, isOptimized: false, function: i32* ()* @_Z2f3v, variables: !2)
+!26 = distinct !DISubprogram(name: "f3", linkageName: "_Z2f3v", scope: !1, file: !1, line: 40, type: !27, isLocal: false, isDefinition: true, scopeLine: 40, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !27 = !DISubroutineType(types: !28)
 !28 = !{!29}
 !29 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64, align: 64)
-!30 = !DISubprogram(name: "f7", linkageName: "_Z2f7v", scope: !1, file: !1, line: 57, type: !13, isLocal: false, isDefinition: true, scopeLine: 57, flags: DIFlagPrototyped, isOptimized: false, function: i32 ()* @_Z2f7v, variables: !2)
+!30 = distinct !DISubprogram(name: "f7", linkageName: "_Z2f7v", scope: !1, file: !1, line: 57, type: !13, isLocal: false, isDefinition: true, scopeLine: 57, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !31 = !{!32, !33, !34, !35, !36, !37, !39, !41}
 !32 = !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", scope: !0, file: !1, line: 7, type: !7, isLocal: false, isDefinition: true, variable: i32* @_ZN1C22static_member_variableE, declaration: !6)
 !33 = !DIGlobalVariable(name: "global_variable", scope: !0, file: !1, line: 17, type: !"_ZTS1C", isLocal: false, isDefinition: true, variable: %struct.C* @global_variable)
@@ -351,7 +351,7 @@ attributes #1 = { nounwind readnone }
 !48 = !{i32 2, !"Dwarf Version", i32 4}
 !49 = !{i32 2, !"Debug Info Version", i32 3}
 !50 = !{!"clang version 3.7.0 (trunk 234897) (llvm/trunk 234911)"}
-!51 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, scope: !20, type: !52, flags: DIFlagArtificial | DIFlagObjectPointer)
+!51 = !DILocalVariable(name: "this", arg: 1, scope: !20, type: !52, flags: DIFlagArtificial | DIFlagObjectPointer)
 !52 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !"_ZTS1C", size: 64, align: 64)
 !53 = !DIExpression()
 !54 = !DILocation(line: 0, scope: !20)
diff --git a/test/DebugInfo/X86/header.ll b/test/DebugInfo/X86/header.ll
index 8575c4a672a2..1b210d0b90a2 100644
--- a/test/DebugInfo/X86/header.ll
+++ b/test/DebugInfo/X86/header.ll
@@ -11,17 +11,17 @@
 
 ; CHECK: .section .debug_str
 
-define void @f() {
+define void @f() !dbg !4 {
   ret void, !dbg !9
 }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "foo", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "/foo/test.c", directory: "/foo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/inline-member-function.ll b/test/DebugInfo/X86/inline-member-function.ll
index 73cd1947f4f4..712a20c3a4a8 100644
--- a/test/DebugInfo/X86/inline-member-function.ll
+++ b/test/DebugInfo/X86/inline-member-function.ll
@@ -37,7 +37,7 @@
 @i = global i32 0, align 4
 
 ; Function Attrs: uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !13 {
 entry:
   %this.addr.i = alloca %struct.foo*, align 8
   %x.addr.i = alloca i32, align 4
@@ -65,7 +65,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !21}
 !llvm.ident = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !18, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !18, imports: !2)
 !1 = !DIFile(filename: "inline.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -77,19 +77,19 @@ attributes #1 = { nounwind readnone }
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS3foo")
 !12 = !{!13, !17}
-!13 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !14, type: !15, function: i32 ()* @main, variables: !2)
+!13 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !14, type: !15, variables: !2)
 !14 = !DIFile(filename: "inline.cpp", directory: "/tmp/dbginfo")
 !15 = !DISubroutineType(types: !16)
 !16 = !{!9}
-!17 = !DISubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
+!17 = distinct !DISubprogram(name: "func", linkageName: "_ZN3foo4funcEi", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !"_ZTS3foo", type: !7, declaration: !6, variables: !2)
 !18 = !{!19}
 !19 = !DIGlobalVariable(name: "i", line: 5, isLocal: false, isDefinition: true, scope: null, file: !14, type: !9, variable: i32* @i)
 !20 = !{i32 2, !"Dwarf Version", i32 4}
 !21 = !{i32 1, !"Debug Info Version", i32 3}
 !22 = !{!"clang version 3.5.0 "}
 !23 = !DILocation(line: 8, scope: !13)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !25)
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !17, type: !25)
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS3foo")
 !26 = !DILocation(line: 0, scope: !17, inlinedAt: !23)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 2, arg: 2, scope: !17, file: !14, type: !9)
+!27 = !DILocalVariable(name: "x", line: 2, arg: 2, scope: !17, file: !14, type: !9)
 !28 = !DILocation(line: 2, scope: !17, inlinedAt: !23)
diff --git a/test/DebugInfo/X86/inline-seldag-test.ll b/test/DebugInfo/X86/inline-seldag-test.ll
index c8e75cf3fcb3..62b946611f4f 100644
--- a/test/DebugInfo/X86/inline-seldag-test.ll
+++ b/test/DebugInfo/X86/inline-seldag-test.ll
@@ -23,7 +23,7 @@
 ; ASM: testl
 
 ; Function Attrs: nounwind uwtable
-define void @func() #0 {
+define void @func() #0 !dbg !4 {
 entry:
   %y.addr.i = alloca i32, align 4
   %x = alloca i32, align 4
@@ -48,26 +48,26 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "inline-seldag-test.c", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "func", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @func, variables: !2)
+!4 = distinct !DISubprogram(name: "func", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "inline-seldag-test.c", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
+!8 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
 !9 = !DISubroutineType(types: !10)
 !10 = !{!11, !11}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !12 = !{i32 2, !"Dwarf Version", i32 4}
 !13 = !{i32 1, !"Debug Info Version", i32 3}
 !14 = !{!"clang version 3.5.0 "}
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 5, scope: !4, file: !5, type: !16)
+!15 = !DILocalVariable(name: "x", line: 5, scope: !4, file: !5, type: !16)
 !16 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !11)
 !17 = !DILocation(line: 5, scope: !4)
 !18 = !DILocation(line: 6, column: 7, scope: !4)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!19 = !DILocalVariable(name: "y", line: 1, arg: 1, scope: !8, file: !5, type: !11)
 !20 = !DILocation(line: 1, scope: !8, inlinedAt: !18)
 !21 = !DILocation(line: 2, scope: !8, inlinedAt: !18)
 !22 = !DILocation(line: 7, scope: !4)
diff --git a/test/DebugInfo/X86/inlined-formal-parameter.ll b/test/DebugInfo/X86/inlined-formal-parameter.ll
index 3cc4e5a65a66..49c1747d7d6d 100644
--- a/test/DebugInfo/X86/inlined-formal-parameter.ll
+++ b/test/DebugInfo/X86/inlined-formal-parameter.ll
@@ -25,7 +25,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-darwin"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @foo() #0 {
+define void @foo() #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !12, metadata !17) #3, !dbg !18
   tail call void @sink() #3, !dbg !20
@@ -48,19 +48,19 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 235110) (llvm/trunk 235108)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "t.c", directory: "/path/to/dir")
 !2 = !{}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
-!7 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !8, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!7 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !8, isLocal: true, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
 !8 = !DISubroutineType(types: !9)
 !9 = !{null, !10}
 !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !11 = !{!12}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !7, file: !1, line: 2, type: !10)
 !13 = !{i32 2, !"Dwarf Version", i32 2}
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{i32 1, !"PIC Level", i32 2}
diff --git a/test/DebugInfo/X86/inlined-indirect-value.ll b/test/DebugInfo/X86/inlined-indirect-value.ll
index 7f95691fbb8f..d203650d05c5 100644
--- a/test/DebugInfo/X86/inlined-indirect-value.ll
+++ b/test/DebugInfo/X86/inlined-indirect-value.ll
@@ -22,7 +22,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @x = common global i32 0, align 4
 @y = common global i32 0, align 4
 
-define i32 @main() {
+define i32 @main() !dbg !4 {
 ; CHECK: .loc 1 {{[89]}}
 ; CHECK-NOT: .loc
 ; CHECK: movl $1
@@ -49,15 +49,15 @@ select.end:                                       ; preds = %entry, %select.mid
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13, !14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports: !2)
 !1 = !DIFile(filename: "inline-break.c", directory: "/build/dir")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: true, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !5, isLocal: true, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 3, type: !5, isLocal: true, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
 !9 = !{!10, !12}
 !10 = !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 1, type: !11, isLocal: false, isDefinition: true, variable: i32* @x)
 !11 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
diff --git a/test/DebugInfo/X86/instcombine-instrinsics.ll b/test/DebugInfo/X86/instcombine-instrinsics.ll
index 8241e6498f96..f4f7e1403d32 100644
--- a/test/DebugInfo/X86/instcombine-instrinsics.ll
+++ b/test/DebugInfo/X86/instcombine-instrinsics.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 %struct.i14 = type { i64 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @init() #0 {
+define void @init() #0 !dbg !4 {
   %p = alloca %struct.i14*, align 8
   call void @llvm.dbg.declare(metadata %struct.i14** %p, metadata !11, metadata !DIExpression()), !dbg !18
   store %struct.i14* null, %struct.i14** %p, align 8, !dbg !18
@@ -54,18 +54,18 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "instcombine_intrinsics.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "init", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @init, variables: !2)
+!4 = distinct !DISubprogram(name: "init", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "instcombine_intrinsics.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !{i32 2, !"Dwarf Version", i32 2}
 !9 = !{i32 1, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5.0 "}
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p", line: 8, scope: !4, file: !5, type: !12)
+!11 = !DILocalVariable(name: "p", line: 8, scope: !4, file: !5, type: !12)
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !13)
 !13 = !DIDerivedType(tag: DW_TAG_typedef, name: "i14", line: 3, file: !1, baseType: !14)
 !14 = !DICompositeType(tag: DW_TAG_structure_type, line: 1, size: 64, align: 64, file: !1, elements: !15)
diff --git a/test/DebugInfo/X86/lexical_block.ll b/test/DebugInfo/X86/lexical_block.ll
index 2619f12f69ed..5fdfcdfe269b 100644
--- a/test/DebugInfo/X86/lexical_block.ll
+++ b/test/DebugInfo/X86/lexical_block.ll
@@ -22,7 +22,7 @@
 ; }
 
 ; Function Attrs: nounwind uwtable
-define void @_Z1bv() #0 {
+define void @_Z1bv() #0 !dbg !4 {
 entry:
   %i = alloca i32, align 4
   call void @llvm.dbg.declare(metadata i32* %i, metadata !11, metadata !DIExpression()), !dbg !14
@@ -48,18 +48,18 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "lexical_block.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "b", linkageName: "_Z1bv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @_Z1bv, variables: !2)
+!4 = distinct !DISubprogram(name: "b", linkageName: "_Z1bv", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "lexical_block.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !{i32 2, !"Dwarf Version", i32 4}
 !9 = !{i32 1, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5.0 "}
-!11 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 2, scope: !12, file: !5, type: !13)
+!11 = !DILocalVariable(name: "i", line: 2, scope: !12, file: !5, type: !13)
 !12 = distinct !DILexicalBlock(line: 2, column: 0, file: !1, scope: !4)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !DILocation(line: 2, scope: !12)
diff --git a/test/DebugInfo/X86/line-info.ll b/test/DebugInfo/X86/line-info.ll
index a3d957f8bc48..bdd91db9eecb 100644
--- a/test/DebugInfo/X86/line-info.ll
+++ b/test/DebugInfo/X86/line-info.ll
@@ -14,7 +14,7 @@
 ; int main() {
 ; }
 
-define i32 @foo(i32 %x) #0 {
+define i32 @foo(i32 %x) #0 !dbg !4 {
 entry:
   %x.addr = alloca i32, align 4
   store i32 %x, i32* %x.addr, align 4
@@ -27,7 +27,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !10 {
 entry:
   ret i32 0, !dbg !17
 }
@@ -38,21 +38,21 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports:  !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports:  !2)
 !1 = !DIFile(filename: "list0.c", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !2 = !{}
 !3 = !{!4, !10}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "./list0.h", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !6 = !DIFile(filename: "./list0.h", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !12, function: i32 ()* @main, variables: !2)
+!10 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !1, scope: !11, type: !12, variables: !2)
 !11 = !DIFile(filename: "list0.c", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !12 = !DISubroutineType(types: !13)
 !13 = !{!9}
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !6, type: !9)
+!14 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !6, type: !9)
 !15 = !DILocation(line: 1, scope: !4)
 !16 = !DILocation(line: 2, scope: !4)
 !17 = !DILocation(line: 3, scope: !18)
diff --git a/test/DebugInfo/X86/linkage-name.ll b/test/DebugInfo/X86/linkage-name.ll
index 330a7f9e53a1..4408d5132c46 100644
--- a/test/DebugInfo/X86/linkage-name.ll
+++ b/test/DebugInfo/X86/linkage-name.ll
@@ -9,7 +9,7 @@
 
 @a = global %class.A zeroinitializer, align 1
 
-define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 {
+define i32 @_ZN1A1aEi(%class.A* %this, i32 %b) nounwind uwtable ssp align 2 !dbg !5 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %b.addr = alloca i32, align 4
@@ -27,10 +27,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 152691) (llvm/trunk 152692)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.1 (trunk 152691) (llvm/trunk 152692)", isOptimized: false, emissionKind: 0, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !18, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: null, type: !7, function: i32 (%class.A*, i32)* @_ZN1A1aEi, declaration: !13)
+!5 = distinct !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !6, scope: null, type: !7, declaration: !13)
 !6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !10, !9}
@@ -41,10 +41,10 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !13 = !DISubprogram(name: "a", linkageName: "_ZN1A1aEi", line: 2, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, file: !6, scope: !11, type: !7)
 !18 = !{!20}
 !20 = !DIGlobalVariable(name: "a", line: 9, isLocal: false, isDefinition: true, scope: null, file: !6, type: !11, variable: %class.A* @a)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 5, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !22)
+!21 = !DILocalVariable(name: "this", line: 5, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !22)
 !22 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
 !23 = !DILocation(line: 5, column: 8, scope: !5)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 5, arg: 2, scope: !5, file: !6, type: !9)
+!24 = !DILocalVariable(name: "b", line: 5, arg: 2, scope: !5, file: !6, type: !9)
 !25 = !DILocation(line: 5, column: 14, scope: !5)
 !26 = !DILocation(line: 6, column: 4, scope: !27)
 !27 = distinct !DILexicalBlock(line: 5, column: 17, file: !6, scope: !5)
diff --git a/test/DebugInfo/X86/live-debug-values.ll b/test/DebugInfo/X86/live-debug-values.ll
new file mode 100644
index 000000000000..615d498041bb
--- /dev/null
+++ b/test/DebugInfo/X86/live-debug-values.ll
@@ -0,0 +1,152 @@
+; RUN: llc -filetype=asm %s -o - | FileCheck %s
+
+; Test the extension of debug ranges from predecessors.
+; Generated from the source file LiveDebugValues.c:
+; #include <stdio.h>
+; int m;
+; extern int inc(int n); 
+; extern int change(int n); 
+; extern int modify(int n); 
+; int main(int argc, char **argv) {
+;   int n;
+;   if (argc != 2)
+;     n = 2;
+;   else
+;     n = atoi(argv[1]);
+;   n = change(n);
+;   if (n > 10) {
+;     m = modify(n);
+;     m = m + n;  // var `m' doesn't has a dbg.value
+;   }
+;   else
+;     m = inc(n); // var `m' doesn't has a dbg.value
+;   printf("m(main): %d\n", m); 
+;   return 0;
+; }
+; with clang -g -O3 -emit-llvm -c LiveDebugValues.c -S -o live-debug-values.ll
+; This case will also produce multiple locations but only the debug range
+; extension is tested here.
+
+; DBG_VALUE for variable "n" is extended into BB#5 from its predecessors BB#3
+; and BB#4.
+; CHECK:       .LBB0_5:
+; CHECK-NEXT:  #DEBUG_VALUE: main:n <- %EBX
+; CHECK-NEXT:  #DEBUG_VALUE: main:argv <- %RSI
+
+; ModuleID = 'LiveDebugValues.c'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@m = common global i32 0, align 4
+@.str = private unnamed_addr constant [13 x i8] c"m(main): %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 !dbg !4 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !12, metadata !20), !dbg !21
+  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !13, metadata !20), !dbg !22
+  %cmp = icmp eq i32 %argc, 2, !dbg !24
+  br i1 %cmp, label %if.else, label %if.end, !dbg !26
+
+if.else:                                          ; preds = %entry
+  %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !27
+  %0 = load i8*, i8** %arrayidx, align 8, !dbg !27, !tbaa !28
+  %call = tail call i32 (i8*, ...) bitcast (i32 (...)* @atoi to i32 (i8*, ...)*)(i8* %0) #4, !dbg !32
+  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !14, metadata !20), !dbg !33
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.else
+  %n.0 = phi i32 [ %call, %if.else ], [ 2, %entry ]
+  %call1 = tail call i32 @change(i32 %n.0) #4, !dbg !34
+  tail call void @llvm.dbg.value(metadata i32 %call1, i64 0, metadata !14, metadata !20), !dbg !33
+  %cmp2 = icmp sgt i32 %call1, 10, !dbg !35
+  br i1 %cmp2, label %if.then.3, label %if.else.5, !dbg !37
+
+if.then.3:                                        ; preds = %if.end
+  %call4 = tail call i32 @modify(i32 %call1) #4, !dbg !38
+  %add = add nsw i32 %call4, %call1, !dbg !40
+  br label %if.end.7, !dbg !41
+
+if.else.5:                                        ; preds = %if.end
+  %call6 = tail call i32 @inc(i32 %call1) #4, !dbg !42
+  br label %if.end.7
+
+if.end.7:                                         ; preds = %if.else.5, %if.then.3
+  %storemerge = phi i32 [ %call6, %if.else.5 ], [ %add, %if.then.3 ]
+  store i32 %storemerge, i32* @m, align 4, !dbg !43, !tbaa !44
+  %call8 = tail call i32 (i8*, ...) @printf(i8* nonnull getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %storemerge) #4, !dbg !46
+  ret i32 0, !dbg !47
+}
+
+declare i32 @atoi(...) #1
+
+declare i32 @change(i32) #1
+
+declare i32 @modify(i32) #1
+
+declare i32 @inc(i32) #1
+
+; Function Attrs: nounwind
+declare i32 @printf(i8* nocapture readonly, ...) #2
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+attributes #0 = { nounwind uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 253049) ", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !15)
+!1 = !DIFile(filename: "LiveDebugValues.c", directory: "/home/vt/julia/test/tvvikram")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7, !8}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
+!10 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!11 = !{!12, !13, !14}
+!12 = !DILocalVariable(name: "argc", arg: 1, scope: !4, file: !1, line: 6, type: !7)
+!13 = !DILocalVariable(name: "argv", arg: 2, scope: !4, file: !1, line: 6, type: !8)
+!14 = !DILocalVariable(name: "n", scope: !4, file: !1, line: 7, type: !7)
+!15 = !{!16}
+!16 = !DIGlobalVariable(name: "m", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @m)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 253049) "}
+!20 = !DIExpression()
+!21 = !DILocation(line: 6, column: 14, scope: !4)
+!22 = !DILocation(line: 6, column: 27, scope: !23)
+!23 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 1)
+!24 = !DILocation(line: 8, column: 12, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !4, file: !1, line: 8, column: 7)
+!26 = !DILocation(line: 8, column: 7, scope: !4)
+!27 = !DILocation(line: 11, column: 14, scope: !25)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"any pointer", !30, i64 0}
+!30 = !{!"omnipotent char", !31, i64 0}
+!31 = !{!"Simple C/C++ TBAA"}
+!32 = !DILocation(line: 11, column: 9, scope: !25)
+!33 = !DILocation(line: 7, column: 7, scope: !23)
+!34 = !DILocation(line: 12, column: 7, scope: !4)
+!35 = !DILocation(line: 13, column: 9, scope: !36)
+!36 = distinct !DILexicalBlock(scope: !4, file: !1, line: 13, column: 7)
+!37 = !DILocation(line: 13, column: 7, scope: !4)
+!38 = !DILocation(line: 14, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !36, file: !1, line: 13, column: 15)
+!40 = !DILocation(line: 15, column: 11, scope: !39)
+!41 = !DILocation(line: 16, column: 3, scope: !39)
+!42 = !DILocation(line: 18, column: 9, scope: !36)
+!43 = !DILocation(line: 15, column: 7, scope: !39)
+!44 = !{!45, !45, i64 0}
+!45 = !{!"int", !30, i64 0}
+!46 = !DILocation(line: 19, column: 3, scope: !4)
+!47 = !DILocation(line: 20, column: 3, scope: !4)
diff --git a/test/DebugInfo/X86/low-pc-cu.ll b/test/DebugInfo/X86/low-pc-cu.ll
index 727a7bb3dec1..f98da6f5ae28 100644
--- a/test/DebugInfo/X86/low-pc-cu.ll
+++ b/test/DebugInfo/X86/low-pc-cu.ll
@@ -21,7 +21,7 @@
 ; CHECK-V4: DW_AT_high_pc [DW_FORM_data4]
 
 ; Function Attrs: nounwind uwtable
-define void @z() #0 {
+define void @z() #0 !dbg !4 {
 entry:
   ret void, !dbg !11
 }
@@ -32,11 +32,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (trunk 204164) (llvm/trunk 204183)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "z", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @z, variables: !2)
+!4 = distinct !DISubprogram(name: "z", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "z.c", directory: "/usr/local/google/home/echristo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/DebugInfo/X86/memberfnptr.ll b/test/DebugInfo/X86/memberfnptr.ll
index a53264d2c856..9b1c57dd7442 100644
--- a/test/DebugInfo/X86/memberfnptr.ll
+++ b/test/DebugInfo/X86/memberfnptr.ll
@@ -24,7 +24,7 @@ declare void @_ZN1A3fooEv(%struct.A*)
 !llvm.module.flags = !{!14, !15, !16}
 !llvm.ident = !{!17}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !10, imports: !2)
 !1 = !DIFile(filename: "memberfnptr.cpp", directory: "")
 !2 = !{}
 !3 = !{!4}
diff --git a/test/DebugInfo/X86/mi-print.ll b/test/DebugInfo/X86/mi-print.ll
index b5d7b0ef067f..086e88deffb0 100644
--- a/test/DebugInfo/X86/mi-print.ll
+++ b/test/DebugInfo/X86/mi-print.ll
@@ -14,7 +14,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
 ; Function Attrs: nounwind readnone ssp uwtable
-define i32 @bar(i32 %x) #0 {
+define i32 @bar(i32 %x) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !9, metadata !17), !dbg !18
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !19, metadata !17), !dbg !21
@@ -31,26 +31,26 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14, !15}
 !llvm.ident = !{!16}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "t.c", directory: "/Users/dexonsmith/data/llvm/debug-info/test/DebugInfo/X86")
 !2 = !{}
 !3 = !{!4, !10}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, function: i32 (i32)* @bar, variables: !8)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7, !7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !4, file: !1, line: 2, type: !7)
-!10 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: true, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 2, type: !7)
+!10 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: true, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
 !11 = !{!12}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!12 = !DILocalVariable(name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
 !13 = !{i32 2, !"Dwarf Version", i32 2}
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{i32 1, !"PIC Level", i32 2}
 !16 = !{!"clang version 3.7.0 (trunk 233919) (llvm/trunk 233920)"}
 !17 = !DIExpression()
 !18 = !DILocation(line: 2, column: 13, scope: !4)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
+!19 = !DILocalVariable(name: "x", arg: 1, scope: !10, file: !1, line: 1, type: !7)
 !20 = distinct !DILocation(line: 2, column: 25, scope: !4)
 !21 = !DILocation(line: 1, column: 20, scope: !10, inlinedAt: !20)
 !22 = !DILocation(line: 2, column: 18, scope: !4)
diff --git a/test/DebugInfo/X86/misched-dbg-value.ll b/test/DebugInfo/X86/misched-dbg-value.ll
index c660e1f72a9f..e85e241c589f 100644
--- a/test/DebugInfo/X86/misched-dbg-value.ll
+++ b/test/DebugInfo/X86/misched-dbg-value.ll
@@ -46,7 +46,7 @@
 @PtrGlb = common global %struct.Record* null, align 8
 @PtrGlbNext = common global %struct.Record* null, align 8
 
-define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize {
+define void @Proc8(i32* nocapture %Array1Par, [51 x i32]* nocapture %Array2Par, i32 %IntParI1, i32 %IntParI2) nounwind optsize !dbg !12 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %Array1Par, i64 0, metadata !23, metadata !DIExpression()), !dbg !64
   tail call void @llvm.dbg.value(metadata [51 x i32]* %Array2Par, i64 0, metadata !24, metadata !DIExpression()), !dbg !65
@@ -103,7 +103,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!83}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 175015)", isOptimized: true, emissionKind: 1, file: !82, enums: !1, retainedTypes: !10, subprograms: !11, globals: !29, imports:  !10)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 175015)", isOptimized: true, emissionKind: 1, file: !82, enums: !1, retainedTypes: !10, subprograms: !11, globals: !29, imports:  !10)
 !1 = !{!2}
 !2 = !DICompositeType(tag: DW_TAG_enumeration_type, line: 128, size: 32, align: 32, file: !82, elements: !4)
 !3 = !DIFile(filename: "dry.c", directory: "/Users/manmanren/test-Nov/rdar_13183203/test2")
@@ -115,7 +115,7 @@ attributes #1 = { nounwind readnone }
 !9 = !DIEnumerator(name: "Ident5", value: 10003) ; [ DW_TAG_enumerator ] [Ident5 :: 10003]
 !10 = !{}
 !11 = !{!12}
-!12 = !DISubprogram(name: "Proc8", line: 180, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 185, file: !82, scope: !3, type: !13, function: void (i32*, [51 x i32]*, i32, i32)* @Proc8, variables: !22)
+!12 = distinct !DISubprogram(name: "Proc8", line: 180, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 185, file: !82, scope: !3, type: !13, variables: !22)
 !13 = !DISubroutineType(types: !14)
 !14 = !{null, !15, !17, !21, !21}
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !16)
@@ -126,12 +126,12 @@ attributes #1 = { nounwind readnone }
 !20 = !DISubrange(count: 51)
 !21 = !DIDerivedType(tag: DW_TAG_typedef, name: "OneToFifty", line: 132, file: !82, baseType: !16)
 !22 = !{!23, !24, !25, !26, !27, !28}
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "Array1Par", line: 181, arg: 1, scope: !12, file: !3, type: !15)
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "Array2Par", line: 182, arg: 2, scope: !12, file: !3, type: !17)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "IntParI1", line: 183, arg: 3, scope: !12, file: !3, type: !21)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "IntParI2", line: 184, arg: 4, scope: !12, file: !3, type: !21)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "IntLoc", line: 186, scope: !12, file: !3, type: !21)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "IntIndex", line: 187, scope: !12, file: !3, type: !21)
+!23 = !DILocalVariable(name: "Array1Par", line: 181, arg: 1, scope: !12, file: !3, type: !15)
+!24 = !DILocalVariable(name: "Array2Par", line: 182, arg: 2, scope: !12, file: !3, type: !17)
+!25 = !DILocalVariable(name: "IntParI1", line: 183, arg: 3, scope: !12, file: !3, type: !21)
+!26 = !DILocalVariable(name: "IntParI2", line: 184, arg: 4, scope: !12, file: !3, type: !21)
+!27 = !DILocalVariable(name: "IntLoc", line: 186, scope: !12, file: !3, type: !21)
+!28 = !DILocalVariable(name: "IntIndex", line: 187, scope: !12, file: !3, type: !21)
 !29 = !{!30, !35, !36, !38, !39, !40, !42, !46, !63}
 !30 = !DIGlobalVariable(name: "Version", line: 111, isLocal: false, isDefinition: true, scope: null, file: !3, type: !31, variable: [4 x i8]* @Version)
 !31 = !DICompositeType(tag: DW_TAG_array_type, size: 32, align: 8, baseType: !32, elements: !33)
diff --git a/test/DebugInfo/X86/missing-file-line.ll b/test/DebugInfo/X86/missing-file-line.ll
index 455868cb1736..19064579af36 100644
--- a/test/DebugInfo/X86/missing-file-line.ll
+++ b/test/DebugInfo/X86/missing-file-line.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 %struct.S = type { %struct.S* }
 
-define void @f() {
+define void @f() !dbg !4 {
   %x = alloca %struct.S, align 8
   ; CHECK: DW_TAG_typedef
   ; CHECK-NOT: DW_AT_decl_file
@@ -36,17 +36,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "file.c", directory: "/dir")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: false, function: void ()* @f, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{null}
 !7 = !{i32 2, !"Dwarf Version", i32 4}
 !8 = !{i32 2, !"Debug Info Version", i32 3}
 !9 = !{!"clang"}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, file: !1, line: 8, type: !11)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 8, type: !11)
 !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "SS", baseType: !12)
 !12 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", size: 64, align: 64, elements: !13)
 !13 = !{!14}
diff --git a/test/DebugInfo/X86/multiple-aranges.ll b/test/DebugInfo/X86/multiple-aranges.ll
index 571454bd8d02..17a475d59345 100644
--- a/test/DebugInfo/X86/multiple-aranges.ll
+++ b/test/DebugInfo/X86/multiple-aranges.ll
@@ -44,14 +44,14 @@ target triple = "x86_64-unknown-linux-gnu"
 !llvm.dbg.cu = !{!0, !7}
 !llvm.module.flags = !{!12, !13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !3, imports: !2)
 !1 = !DIFile(filename: "test1.c", directory: "/home/kayamon")
 !2 = !{}
 !3 = !{!4}
 !4 = !DIGlobalVariable(name: "kittens", line: 1, isLocal: false, isDefinition: true, scope: null, file: !5, type: !6, variable: i32* @kittens)
 !5 = !DIFile(filename: "test1.c", directory: "/home/kayamon")
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!7 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !9, imports: !2)
+!7 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !8, enums: !2, retainedTypes: !2, subprograms: !2, globals: !9, imports: !2)
 !8 = !DIFile(filename: "test2.c", directory: "/home/kayamon")
 !9 = !{!10}
 !10 = !DIGlobalVariable(name: "rainbows", line: 1, isLocal: false, isDefinition: true, scope: null, file: !11, type: !6, variable: i32* @rainbows)
diff --git a/test/DebugInfo/X86/multiple-at-const-val.ll b/test/DebugInfo/X86/multiple-at-const-val.ll
index 01e3af20d2d4..97db71952905 100644
--- a/test/DebugInfo/X86/multiple-at-const-val.ll
+++ b/test/DebugInfo/X86/multiple-at-const-val.ll
@@ -19,7 +19,7 @@
 @_ZSt4cout = external global %"class.std::basic_ostream"
 @.str = private unnamed_addr constant [6 x i8] c"c is \00", align 1
 
-define i32 @main() {
+define i32 @main() !dbg !960 {
 entry:
   %call1.i = tail call %"class.std::basic_ostream"* @test(%"class.std::basic_ostream"* @_ZSt4cout, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i64 5)
   ret i32 0
@@ -32,7 +32,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!1803}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 174207)", isOptimized: true, emissionKind: 0, file: !1802, enums: !1, retainedTypes: !955, subprograms: !956, globals: !1786, imports:  !955)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 174207)", isOptimized: true, emissionKind: 0, file: !1802, enums: !1, retainedTypes: !955, subprograms: !956, globals: !1786, imports:  !955)
 !1 = !{!26}
 !4 = !DINamespace(name: "std", line: 48, scope: !5)
 !5 = !DIFile(filename: "os_base.h", directory: "/privite/tmp")
@@ -54,7 +54,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !79 = !DIDerivedType(tag: DW_TAG_typedef, name: "ostate", line: 327, file: !1801, scope: !49, baseType: !26)
 !955 = !{}
 !956 = !{!960}
-!960 = !DISubprogram(name: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 73, file: !1802, scope: null, type: !54, function: i32 ()* @main, variables: !955)
+!960 = distinct !DISubprogram(name: "main", line: 73, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 73, file: !1802, scope: null, type: !54, variables: !955)
 !961 = !DIFile(filename: "student2.cpp", directory: "/privite/tmp")
 !1786 = !{!1800}
 !1800 = !DIGlobalVariable(name: "badbit", linkageName: "badbit", line: 331, isLocal: true, isDefinition: true, scope: !5, file: !5, type: !78, variable: i32 1, declaration: !77)
diff --git a/test/DebugInfo/X86/nodebug_with_debug_loc.ll b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
index 8153b3352d4e..4ccf22bdc5aa 100644
--- a/test/DebugInfo/X86/nodebug_with_debug_loc.ll
+++ b/test/DebugInfo/X86/nodebug_with_debug_loc.ll
@@ -97,7 +97,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!23, !24}
 !llvm.ident = !{!25}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
@@ -108,23 +108,23 @@ attributes #3 = { nounwind }
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !10 = !{!11, !17}
-!11 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !12, type: !13, variables: !15)
+!11 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 14, file: !5, scope: !12, type: !13, variables: !15)
 !12 = !DIFile(filename: "repro.cpp", directory: "/tmp/dbginfo")
 !13 = !DISubroutineType(types: !14)
 !14 = !{null}
 !15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "str2", line: 15, scope: !11, file: !12, type: !"_ZTS6string")
-!17 = !DISubprogram(name: "s2", linkageName: "_Z2s2P6string", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !12, type: !18, variables: !21)
+!16 = !DILocalVariable(name: "str2", line: 15, scope: !11, file: !12, type: !"_ZTS6string")
+!17 = distinct !DISubprogram(name: "s2", linkageName: "_Z2s2P6string", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !12, type: !18, variables: !21)
 !18 = !DISubroutineType(types: !19)
 !19 = !{null, !20}
 !20 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !"_ZTS6string")
 !21 = !{!22}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
+!22 = !DILocalVariable(name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
 !23 = !{i32 2, !"Dwarf Version", i32 4}
 !24 = !{i32 2, !"Debug Info Version", i32 3}
 !25 = !{!"clang version 3.5.0 "}
 !26 = !DILocation(line: 15, scope: !11)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
+!27 = !DILocalVariable(name: "lhs", line: 13, arg: 1, scope: !17, file: !12, type: !20)
 !28 = !DILocation(line: 16, scope: !11)
 !29 = !DILocation(line: 13, scope: !17, inlinedAt: !28)
 !30 = !DILocation(line: 17, scope: !11)
diff --git a/test/DebugInfo/X86/nondefault-subrange-array.ll b/test/DebugInfo/X86/nondefault-subrange-array.ll
index 20406cfa0f16..fc82fa560a40 100644
--- a/test/DebugInfo/X86/nondefault-subrange-array.ll
+++ b/test/DebugInfo/X86/nondefault-subrange-array.ll
@@ -30,7 +30,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169136)", isOptimized: false, emissionKind: 0, file: !20, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %class.A* @a)
diff --git a/test/DebugInfo/X86/nophysreg.ll b/test/DebugInfo/X86/nophysreg.ll
index f4478efb67f8..ddf014b43b77 100644
--- a/test/DebugInfo/X86/nophysreg.ll
+++ b/test/DebugInfo/X86/nophysreg.ll
@@ -53,7 +53,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 %struct.A = type { i32*, i32 }
 
 ; Function Attrs: alwaysinline ssp uwtable
-define void @_Z2f21A(i32* %p5.coerce0, i32 %p5.coerce1) #0 {
+define void @_Z2f21A(i32* %p5.coerce0, i32 %p5.coerce1) #0 !dbg !11 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %p5.coerce0, i64 0, metadata !16, metadata !33), !dbg !34
   tail call void @llvm.dbg.value(metadata i32 %p5.coerce1, i64 0, metadata !16, metadata !35), !dbg !34
@@ -68,7 +68,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare void @_Z2f1Pii(i32*, i32) #2
 
 ; Function Attrs: ssp uwtable
-define void @_Z1fv() #3 {
+define void @_Z1fv() #3 !dbg !17 {
 entry:
   %x = alloca i32, align 4
   %ref.tmp = alloca i32, align 4
@@ -136,7 +136,7 @@ attributes #3 = { ssp uwtable }
 !llvm.module.flags = !{!29, !30, !31}
 !llvm.ident = !{!32}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227088) (llvm/trunk 227091)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227088) (llvm/trunk 227091)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !10, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.cpp", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -147,24 +147,24 @@ attributes #3 = { ssp uwtable }
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !DIDerivedType(tag: DW_TAG_member, name: "m2", line: 3, size: 32, align: 32, offset: 64, file: !1, scope: !"_ZTS1A", baseType: !8)
 !10 = !{!11, !17}
-!11 = !DISubprogram(name: "f2", linkageName: "_Z2f21A", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !12, type: !13, function: void (i32*, i32)* @_Z2f21A, variables: !15)
+!11 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f21A", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 7, file: !1, scope: !12, type: !13, variables: !15)
 !12 = !DIFile(filename: "test.cpp", directory: "")
 !13 = !DISubroutineType(types: !14)
 !14 = !{null, !"_ZTS1A"}
 !15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
-!17 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !12, type: !18, function: void ()* @_Z1fv, variables: !20)
+!16 = !DILocalVariable(name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
+!17 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !1, scope: !12, type: !18, variables: !20)
 !18 = !DISubroutineType(types: !19)
 !19 = !{null}
 !20 = !{!21, !23, !26, !27, !28}
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 14, scope: !22, file: !12, type: !8)
+!21 = !DILocalVariable(name: "x", line: 14, scope: !22, file: !12, type: !8)
 !22 = distinct !DILexicalBlock(line: 13, column: 18, file: !1, scope: !17)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 17, scope: !24, file: !12, type: !25)
+!23 = !DILocalVariable(name: "y", line: 17, scope: !24, file: !12, type: !25)
 !24 = distinct !DILexicalBlock(line: 16, column: 20, file: !1, scope: !22)
 !25 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "j", line: 19, scope: !24, file: !12, type: !25)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "I", line: 21, scope: !24, file: !12, type: !25)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "g", line: 24, scope: !24, file: !12, type: !"_ZTS1A")
+!26 = !DILocalVariable(name: "j", line: 19, scope: !24, file: !12, type: !25)
+!27 = !DILocalVariable(name: "I", line: 21, scope: !24, file: !12, type: !25)
+!28 = !DILocalVariable(name: "g", line: 24, scope: !24, file: !12, type: !"_ZTS1A")
 !29 = !{i32 2, !"Dwarf Version", i32 2}
 !30 = !{i32 2, !"Debug Info Version", i32 3}
 !31 = !{i32 1, !"PIC Level", i32 2}
@@ -196,7 +196,7 @@ attributes #3 = { ssp uwtable }
 !57 = !DILocation(line: 23, column: 15, scope: !24)
 !58 = !DILocation(line: 23, column: 7, scope: !24)
 !59 = !DILocation(line: 24, column: 9, scope: !24)
-!60 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
+!60 = !DILocalVariable(name: "p5", line: 7, arg: 1, scope: !11, file: !12, type: !"_ZTS1A")
 !61 = distinct !DILocation(line: 26, column: 7, scope: !24)
 !62 = !DILocation(line: 7, column: 42, scope: !11, inlinedAt: !61)
 !63 = !DILocation(line: 7, column: 48, scope: !11, inlinedAt: !61)
diff --git a/test/DebugInfo/X86/objc-fwd-decl.ll b/test/DebugInfo/X86/objc-fwd-decl.ll
index 5b0aca5f92f5..30024dc414b5 100644
--- a/test/DebugInfo/X86/objc-fwd-decl.ll
+++ b/test/DebugInfo/X86/objc-fwd-decl.ll
@@ -12,7 +12,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10, !11, !12, !14}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.1 (trunk 152054 trunk 152094)", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.1 (trunk 152054 trunk 152094)", isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 3, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %0** @a)
diff --git a/test/DebugInfo/X86/objc-property-void.ll b/test/DebugInfo/X86/objc-property-void.ll
index 0737fe2d2cb4..190d5fe00e7f 100644
--- a/test/DebugInfo/X86/objc-property-void.ll
+++ b/test/DebugInfo/X86/objc-property-void.ll
@@ -51,7 +51,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 @llvm.used = appending global [8 x i8*] [i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @"\01L_OBJC_METH_VAR_TYPE_", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01l_OBJC_$_INSTANCE_METHODS_Foo" to i8*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_PROP_NAME_ATTR_", i32 0, i32 0), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @"\01L_OBJC_PROP_NAME_ATTR_1", i32 0, i32 0), i8* bitcast ({ i32, i32, [1 x %struct._prop_t] }* @"\01l_OBJC_$_PROP_LIST_Foo" to i8*), i8* bitcast ([1 x i8*]* @"\01L_OBJC_LABEL_CLASS_$" to i8*)], section "llvm.metadata"
 
 ; Function Attrs: ssp uwtable
-define internal void @"\01-[Foo foo]"(%0* %self, i8* %_cmd) #0 {
+define internal void @"\01-[Foo foo]"(%0* %self, i8* %_cmd) #0 !dbg !10 {
 entry:
   %self.addr = alloca %0*, align 8
   %_cmd.addr = alloca i8*, align 8
@@ -72,7 +72,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18, !19, !20, !21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, isOptimized: false, runtimeVersion: 2, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
 !1 = !DIFile(filename: "-", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -82,7 +82,7 @@ attributes #1 = { nounwind readnone }
 !7 = !{!8}
 !8 = !DIObjCProperty(name: "foo", line: 2, attributes: 2117, file: !6)
 !9 = !{!10}
-!10 = !DISubprogram(name: "-[Foo foo]", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !6, type: !11, function: void (%0*, i8*)* @"\01-[Foo foo]", variables: !2)
+!10 = distinct !DISubprogram(name: "-[Foo foo]", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !5, scope: !6, type: !11, variables: !2)
 !11 = !DISubroutineType(types: !12)
 !12 = !{null, !13, !14}
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !4)
@@ -96,9 +96,9 @@ attributes #1 = { nounwind readnone }
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
 !23 = !{!""}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, type: !25)
+!24 = !DILocalVariable(name: "self", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !10, type: !25)
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !4)
 !26 = !DILocation(line: 0, scope: !10)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", arg: 2, flags: DIFlagArtificial, scope: !10, type: !28)
+!27 = !DILocalVariable(name: "_cmd", arg: 2, flags: DIFlagArtificial, scope: !10, type: !28)
 !28 = !DIDerivedType(tag: DW_TAG_typedef, name: "SEL", line: 5, file: !5, baseType: !15)
 !29 = !DILocation(line: 5, scope: !10)
diff --git a/test/DebugInfo/X86/op_deref.ll b/test/DebugInfo/X86/op_deref.ll
index 86f629e9c152..535c7390a54c 100644
--- a/test/DebugInfo/X86/op_deref.ll
+++ b/test/DebugInfo/X86/op_deref.ll
@@ -20,13 +20,13 @@
 ; right now, so we check the asm output:
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK
 ; vla should have a register-indirect address at one point.
-; ASM-CHECK: DEBUG_VALUE: vla <- RCX
+; ASM-CHECK: DEBUG_VALUE: vla <- [%RCX+0]
 ; ASM-CHECK: DW_OP_breg2
 
 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT
-; PRETTY-PRINT: DIExpression(DW_OP_deref, DW_OP_deref)
+; PRETTY-PRINT: DIExpression(DW_OP_deref)
 
-define void @testVLAwithSize(i32 %s) nounwind uwtable ssp {
+define void @testVLAwithSize(i32 %s) nounwind uwtable ssp !dbg !5 {
 entry:
   %s.addr = alloca i32, align 4
   %saved_stack = alloca i8*
@@ -80,24 +80,24 @@ declare void @llvm.stackrestore(i8*) nounwind
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156005) (llvm/trunk 156000)", isOptimized: false, emissionKind: 1, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 156005) (llvm/trunk 156000)", isOptimized: false, emissionKind: 1, file: !28, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "testVLAwithSize", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !28, scope: !6, type: !7, function: void (i32)* @testVLAwithSize, variables: !1)
+!5 = distinct !DISubprogram(name: "testVLAwithSize", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !28, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "bar.c", directory: "/Users/echristo/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!10 = !DILocalVariable(name: "s", line: 1, arg: 1, scope: !5, file: !6, type: !9)
 !11 = !DILocation(line: 1, column: 26, scope: !5)
 !12 = !DILocation(line: 3, column: 13, scope: !13)
 !13 = distinct !DILexicalBlock(line: 2, column: 1, file: !28, scope: !5)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla", line: 3, scope: !13, file: !6, type: !15)
+!14 = !DILocalVariable(name: "vla", line: 3, scope: !13, file: !6, type: !15)
 !15 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !9, elements: !16)
 !16 = !{!17}
 !17 = !DISubrange(count: -1)
 !18 = !DILocation(line: 3, column: 7, scope: !13)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 4, scope: !13, file: !6, type: !9)
+!19 = !DILocalVariable(name: "i", line: 4, scope: !13, file: !6, type: !9)
 !20 = !DILocation(line: 4, column: 7, scope: !13)
 !21 = !DILocation(line: 5, column: 8, scope: !22)
 !22 = distinct !DILexicalBlock(line: 5, column: 3, file: !28, scope: !13)
@@ -108,4 +108,4 @@ declare void @llvm.stackrestore(i8*) nounwind
 !27 = !DILocation(line: 8, column: 1, scope: !13)
 !28 = !DIFile(filename: "bar.c", directory: "/Users/echristo/tmp")
 !29 = !{i32 1, !"Debug Info Version", i32 3}
-!30 = !DIExpression(DW_OP_deref, DW_OP_deref)
+!30 = !DIExpression(DW_OP_deref)
diff --git a/test/DebugInfo/X86/parameters.ll b/test/DebugInfo/X86/parameters.ll
index 044f51e6e6b9..1d71efc0719d 100644
--- a/test/DebugInfo/X86/parameters.ll
+++ b/test/DebugInfo/X86/parameters.ll
@@ -40,7 +40,7 @@
 %"struct.pr14763::foo" = type { i8 }
 
 ; Function Attrs: uwtable
-define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 {
+define void @_ZN7pr147634funcENS_3fooE(%"struct.pr14763::foo"* noalias sret %agg.result, %"struct.pr14763::foo"* %f) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.declare(metadata %"struct.pr14763::foo"* %f, metadata !22, metadata !DIExpression(DW_OP_deref)), !dbg !24
   call void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"* %agg.result, %"struct.pr14763::foo"* %f), !dbg !25
@@ -53,7 +53,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare void @_ZN7pr147633fooC1ERKS0_(%"struct.pr14763::foo"*, %"struct.pr14763::foo"*) #2
 
 ; Function Attrs: uwtable
-define void @_ZN7pr147635func2EbNS_3fooE(i1 zeroext %b, %"struct.pr14763::foo"* %g) #0 {
+define void @_ZN7pr147635func2EbNS_3fooE(i1 zeroext %b, %"struct.pr14763::foo"* %g) #0 !dbg !17 {
 entry:
   %b.addr = alloca i8, align 1
   %frombool = zext i1 %b to i8
@@ -82,11 +82,11 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21, !33}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "pass.cpp", directory: "/tmp")
 !2 = !{}
 !3 = !{!4, !17}
-!4 = !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, function: void (%"struct.pr14763::foo"*, %"struct.pr14763::foo"*)* @_ZN7pr147634funcENS_3fooE, variables: !2)
+!4 = distinct !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DINamespace(name: "pr14763", line: 1, file: !1, scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -98,18 +98,18 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !8)
 !14 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !15)
 !15 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !8)
-!17 = !DISubprogram(name: "func2", linkageName: "_ZN7pr147635func2EbNS_3fooE", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !18, function: void (i1, %"struct.pr14763::foo"*)* @_ZN7pr147635func2EbNS_3fooE, variables: !2)
+!17 = distinct !DISubprogram(name: "func2", linkageName: "_ZN7pr147635func2EbNS_3fooE", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !18, variables: !2)
 !18 = !DISubroutineType(types: !19)
 !19 = !{null, !20, !8}
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
 !21 = !{i32 2, !"Dwarf Version", i32 3}
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", line: 6, arg: 1, scope: !4, file: !23, type: !8)
+!22 = !DILocalVariable(name: "f", line: 6, arg: 1, scope: !4, file: !23, type: !8)
 !23 = !DIFile(filename: "pass.cpp", directory: "/tmp")
 !24 = !DILocation(line: 6, scope: !4)
 !25 = !DILocation(line: 7, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 12, arg: 1, scope: !17, file: !23, type: !20)
+!26 = !DILocalVariable(name: "b", line: 12, arg: 1, scope: !17, file: !23, type: !20)
 !27 = !DILocation(line: 12, scope: !17)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "g", line: 12, arg: 2, scope: !17, file: !23, type: !8)
+!28 = !DILocalVariable(name: "g", line: 12, arg: 2, scope: !17, file: !23, type: !8)
 !29 = !DILocation(line: 13, scope: !30)
 !30 = distinct !DILexicalBlock(line: 13, column: 0, file: !1, scope: !17)
 !31 = !DILocation(line: 14, scope: !30)
diff --git a/test/DebugInfo/X86/pieces-1.ll b/test/DebugInfo/X86/pieces-1.ll
index f96e045f8285..9d4f5265b3c2 100644
--- a/test/DebugInfo/X86/pieces-1.ll
+++ b/test/DebugInfo/X86/pieces-1.ll
@@ -30,7 +30,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 {
+define i32 @foo(i64 %s.coerce0, i32 %s.coerce1) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.value(metadata i64 %s.coerce0, i64 0, metadata !20, metadata !24), !dbg !21
   call void @llvm.dbg.value(metadata i32 %s.coerce1, i64 0, metadata !22, metadata !27), !dbg !21
@@ -50,11 +50,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "pieces.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i64, i32)* @foo, variables: !15)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !15)
 !5 = !DIFile(filename: "pieces.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -66,13 +66,13 @@ attributes #1 = { nounwind readnone }
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
 !14 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 1, size: 32, align: 32, offset: 64, file: !1, scope: !10, baseType: !8)
 !15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !{i32 1, !"Debug Info Version", i32 3}
 !19 = !{!"clang version 3.5 "}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!20 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !21 = !DILocation(line: 3, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!22 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !23 = !DILocation(line: 4, scope: !4)
 !24 = !DIExpression(DW_OP_bit_piece, 0, 64)
 !25 = !{}
diff --git a/test/DebugInfo/X86/pieces-2.ll b/test/DebugInfo/X86/pieces-2.ll
index 7c39d323c3e3..8a43f452d32b 100644
--- a/test/DebugInfo/X86/pieces-2.ll
+++ b/test/DebugInfo/X86/pieces-2.ll
@@ -29,7 +29,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 %struct.Inner = type { i32, i64 }
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(%struct.Outer* byval align 8 %outer) #0 {
+define i32 @foo(%struct.Outer* byval align 8 %outer) #0 !dbg !4 {
 entry:
   call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !DIExpression()), !dbg !26
   %i1.sroa.0.0..sroa_idx = getelementptr inbounds %struct.Outer, %struct.Outer* %outer, i64 0, i32 0, i64 1, i32 0, !dbg !27
@@ -57,11 +57,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "sroasplit-1.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "sroasplit-1.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -82,10 +82,10 @@ attributes #2 = { nounwind }
 !22 = !{i32 2, !"Dwarf Version", i32 2}
 !23 = !{i32 1, !"Debug Info Version", i32 3}
 !24 = !{!"clang version 3.5.0 "}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!25 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !26 = !DILocation(line: 10, scope: !4)
 !27 = !DILocation(line: 11, scope: !4)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!28 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
 !29 = !DIExpression(DW_OP_bit_piece, 0, 32)
 !31 = !{i32 3, i32 0, i32 12}
 !32 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pieces-3.ll b/test/DebugInfo/X86/pieces-3.ll
index 97ff9aadfcfc..7a93e393b25f 100644
--- a/test/DebugInfo/X86/pieces-3.ll
+++ b/test/DebugInfo/X86/pieces-3.ll
@@ -26,7 +26,7 @@
 ; CHECK: .debug_loc
 ; CHECK: [[LOC]]:
 ; CHECK: Beginning address offset: 0x0000000000000000
-; CHECK:    Ending address offset: 0x0000000000000004
+; CHECK:    Ending address offset: 0x0000000000000008
 ; rdi, piece 0x00000008, piece 0x00000004, rsi, piece 0x00000004
 ; CHECK: Location description: 55 93 08 93 04 54 93 04 
 ;
@@ -35,7 +35,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 {
+define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 !dbg !4 {
   call void @llvm.dbg.value(metadata i64 %outer.coerce0, i64 0, metadata !24, metadata !25), !dbg !26
   call void @llvm.dbg.declare(metadata !{null}, metadata !27, metadata !28), !dbg !26
   call void @llvm.dbg.value(metadata i64 %outer.coerce1, i64 0, metadata !29, metadata !30), !dbg !26
@@ -67,11 +67,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "sroasplit-2.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "sroasplit-2.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -91,16 +91,16 @@ attributes #2 = { nounwind }
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !25 = !DIExpression(DW_OP_bit_piece, 0, 64)
 !26 = !DILocation(line: 10, scope: !4)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!27 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !28 = !DIExpression(DW_OP_bit_piece, 64, 64)
-!29 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!29 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !30 = !DIExpression(DW_OP_bit_piece, 96, 32)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!31 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !32 = !DIExpression(DW_OP_bit_piece, 64, 32)
 !33 = !DILocation(line: 11, scope: !4)
-!34 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!34 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
 !35 = !DIExpression(DW_OP_bit_piece, 0, 32)
 !36 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/pointer-type-size.ll b/test/DebugInfo/X86/pointer-type-size.ll
index f5abdfe89251..cc43c7604c08 100644
--- a/test/DebugInfo/X86/pointer-type-size.ll
+++ b/test/DebugInfo/X86/pointer-type-size.ll
@@ -11,7 +11,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 147882)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 147882)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "crass", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %struct.crass* @crass)
diff --git a/test/DebugInfo/X86/pr11300.ll b/test/DebugInfo/X86/pr11300.ll
index b68609dd7184..c6124687b2c4 100644
--- a/test/DebugInfo/X86/pr11300.ll
+++ b/test/DebugInfo/X86/pr11300.ll
@@ -14,7 +14,7 @@
 
 %struct.foo = type { i8 }
 
-define void @_Z3zedP3foo(%struct.foo* %x) uwtable {
+define void @_Z3zedP3foo(%struct.foo* %x) uwtable !dbg !5 {
 entry:
   %x.addr = alloca %struct.foo*, align 8
   store %struct.foo* %x, %struct.foo** %x.addr, align 8
@@ -26,7 +26,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 {
+define linkonce_odr void @_ZN3foo3barEv(%struct.foo* %this) nounwind uwtable align 2 !dbg !20 {
 entry:
   %this.addr = alloca %struct.foo*, align 8
   store %struct.foo* %this, %struct.foo** %this.addr, align 8
@@ -38,10 +38,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 0, file: !32, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5, !20}
-!5 = !DISubprogram(name: "zed", linkageName: "_Z3zedP3foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: !6, type: !7, function: void (%struct.foo*)* @_Z3zedP3foo)
+!5 = distinct !DISubprogram(name: "zed", linkageName: "_Z3zedP3foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !6, scope: !6, type: !7)
 !6 = !DIFile(filename: "/home/espindola/llvm/test.cc", directory: "/home/espindola/tmpfs/build")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
@@ -52,13 +52,13 @@ entry:
 !13 = !DISubroutineType(types: !14)
 !14 = !{null, !15}
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial, baseType: !10)
-!20 = !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: null, type: !13, function: void (%struct.foo*)* @_ZN3foo3barEv, declaration: !12)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!20 = distinct !DISubprogram(name: "bar", linkageName: "_ZN3foo3barEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !6, scope: null, type: !13, declaration: !12)
+!23 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !5, file: !6, type: !9)
 !24 = !DILocation(line: 4, column: 15, scope: !5)
 !25 = !DILocation(line: 4, column: 20, scope: !26)
 !26 = distinct !DILexicalBlock(line: 4, column: 18, file: !6, scope: !5)
 !27 = !DILocation(line: 4, column: 30, scope: !26)
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !20, file: !6, type: !15)
+!28 = !DILocalVariable(name: "this", line: 2, arg: 1, flags: DIFlagArtificial, scope: !20, file: !6, type: !15)
 !29 = !DILocation(line: 2, column: 8, scope: !20)
 !30 = !DILocation(line: 2, column: 15, scope: !31)
 !31 = distinct !DILexicalBlock(line: 2, column: 14, file: !6, scope: !20)
diff --git a/test/DebugInfo/X86/pr12831.ll b/test/DebugInfo/X86/pr12831.ll
index 98b9ce488603..89f599838977 100644
--- a/test/DebugInfo/X86/pr12831.ll
+++ b/test/DebugInfo/X86/pr12831.ll
@@ -9,10 +9,10 @@ target triple = "x86_64-unknown-linux-gnu"
 %class.anon = type { i8 }
 %class.anon.0 = type { i8 }
 
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
-@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = internal alias void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_" = internal alias void (%class.function*), void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_"
+@"_ZN8functionIFvvEEC1IZN17BPLFunctionWriter9writeExprEvE3$_0EET_" = internal alias void (%class.function*), void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_"
 
-define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 {
+define void @_ZN17BPLFunctionWriter9writeExprEv(%class.BPLFunctionWriter* %this) nounwind uwtable align 2 !dbg !5 {
 entry:
   %this.addr = alloca %class.BPLFunctionWriter*, align 8
   %agg.tmp = alloca %class.function, align 1
@@ -78,10 +78,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!162}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !161, enums: !1, retainedTypes: !1, subprograms: !3, globals: !128)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 ", isOptimized: false, emissionKind: 0, file: !161, enums: !1, retainedTypes: !1, subprograms: !3, globals: !128)
 !1 = !{}
 !3 = !{!5, !106, !107, !126, !127}
-!5 = !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !6, scope: null, type: !7, function: void (%class.BPLFunctionWriter*)* @_ZN17BPLFunctionWriter9writeExprEv, declaration: !103, variables: !1)
+!5 = distinct !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !6, scope: null, type: !7, declaration: !103, variables: !1)
 !6 = !DIFile(filename: "BPLFunctionWriter2.ii", directory: "/home/peter/crashdelta")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
@@ -156,8 +156,8 @@ entry:
 !99 = !DISubroutineType(types: !100)
 !100 = !{null}
 !103 = !DISubprogram(name: "writeExpr", linkageName: "_ZN17BPLFunctionWriter9writeExprEv", line: 17, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrivate | DIFlagPrototyped, isOptimized: false, scopeLine: 17, file: !6, scope: !10, type: !7)
-!106 = !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !59, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", templateParams: !82, declaration: !58, variables: !1)
-!107 = !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !108, function: void (%class.anon.0*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", templateParams: !111, declaration: !113, variables: !1)
+!106 = distinct !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_1_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !59, templateParams: !82, declaration: !58, variables: !1)
+!107 = distinct !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:23:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_1_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !108, templateParams: !111, declaration: !113, variables: !1)
 !108 = !DISubroutineType(types: !109)
 !109 = !{null, !110}
 !110 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !61)
@@ -172,32 +172,32 @@ entry:
 !119 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !26)
 !120 = !{!121}
 !121 = !DITemplateTypeParameter(name: "_Tp", type: !26)
-!126 = !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !23, function: void (%class.function*)* @"_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", templateParams: !47, declaration: !22, variables: !1)
-!127 = !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !117, function: void (%class.anon*)* @"_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", templateParams: !120, declaration: !116, variables: !1)
+!126 = distinct !DISubprogram(name: "function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN8functionIFvvEEC2IZN17BPLFunctionWriter9writeExprEvE3$_0EET_", line: 8, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !6, scope: null, type: !23, templateParams: !47, declaration: !22, variables: !1)
+!127 = distinct !DISubprogram(name: "_M_not_empty_function<BPLFunctionWriter::<lambda at BPLFunctionWriter2.ii:20:36> >", linkageName: "_ZN13_Base_manager21_M_not_empty_functionIZN17BPLFunctionWriter9writeExprEvE3$_0EEvRKT_", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: null, type: !117, templateParams: !120, declaration: !116, variables: !1)
 !128 = !{!130}
 !130 = !DIGlobalVariable(name: "__stored_locally", linkageName: "__stored_locally", line: 2, isLocal: true, isDefinition: true, scope: !114, file: !6, type: !131, variable: i1 1)
 !131 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !132)
 !132 = !DIBasicType(tag: DW_TAG_base_type, name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
-!133 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 19, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !134)
+!133 = !DILocalVariable(name: "this", line: 19, arg: 1, flags: DIFlagArtificial, scope: !5, file: !6, type: !134)
 !134 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
 !135 = !DILocation(line: 19, column: 39, scope: !5)
 !136 = !DILocation(line: 20, column: 17, scope: !137)
 !137 = distinct !DILexicalBlock(line: 19, column: 51, file: !6, scope: !5)
 !138 = !DILocation(line: 23, column: 17, scope: !137)
 !139 = !DILocation(line: 26, column: 15, scope: !137)
-!140 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !106, file: !6, type: !141)
+!140 = !DILocalVariable(name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !106, file: !6, type: !141)
 !141 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !20)
 !142 = !DILocation(line: 8, column: 45, scope: !106)
-!143 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__f", line: 8, arg: 2, scope: !106, file: !6, type: !61)
+!143 = !DILocalVariable(name: "__f", line: 8, arg: 2, scope: !106, file: !6, type: !61)
 !144 = !DILocation(line: 8, column: 63, scope: !106)
 !145 = !DILocation(line: 9, column: 9, scope: !146)
 !146 = distinct !DILexicalBlock(line: 8, column: 81, file: !6, scope: !106)
 !147 = !DILocation(line: 10, column: 13, scope: !146)
 !148 = !DILocation(line: 4, column: 5, scope: !149)
 !149 = distinct !DILexicalBlock(line: 3, column: 105, file: !6, scope: !107)
-!150 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !126, file: !6, type: !141)
+!150 = !DILocalVariable(name: "this", line: 8, arg: 1, flags: DIFlagArtificial, scope: !126, file: !6, type: !141)
 !151 = !DILocation(line: 8, column: 45, scope: !126)
-!152 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__f", line: 8, arg: 2, scope: !126, file: !6, type: !26)
+!152 = !DILocalVariable(name: "__f", line: 8, arg: 2, scope: !126, file: !6, type: !26)
 !153 = !DILocation(line: 8, column: 63, scope: !126)
 !154 = !DILocation(line: 9, column: 9, scope: !155)
 !155 = distinct !DILexicalBlock(line: 8, column: 81, file: !6, scope: !126)
diff --git a/test/DebugInfo/X86/pr13303.ll b/test/DebugInfo/X86/pr13303.ll
index df31a7994469..a369b4259583 100644
--- a/test/DebugInfo/X86/pr13303.ll
+++ b/test/DebugInfo/X86/pr13303.ll
@@ -5,7 +5,7 @@
 ; Check that the prologue ends with is_stmt here.
 ; CHECK: 0x0000000000000000 {{.*}} is_stmt
 
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -15,10 +15,10 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 160143)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 160143)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !12, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !12, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "PR13303.c", directory: "/home/probinson")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
diff --git a/test/DebugInfo/X86/pr19307.ll b/test/DebugInfo/X86/pr19307.ll
index e04131fe322c..39a800704b19 100644
--- a/test/DebugInfo/X86/pr19307.ll
+++ b/test/DebugInfo/X86/pr19307.ll
@@ -35,7 +35,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @.str = private unnamed_addr constant [7 x i8] c"items=\00", align 1
 
 ; Function Attrs: uwtable
-define void @_Z11parse_rangeRyS_Ss(i64* %offset, i64* %limit, %"class.std::basic_string"* %range) #0 {
+define void @_Z11parse_rangeRyS_Ss(i64* %offset, i64* %limit, %"class.std::basic_string"* %range) #0 !dbg !13 {
 entry:
   %offset.addr = alloca i64*, align 8
   %limit.addr = alloca i64*, align 8
@@ -84,7 +84,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!42, !43}
 !llvm.ident = !{!44}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !21)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (209308)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !21)
 !1 = !DIFile(filename: "pr19307.cc", directory: "/llvm_cmake_gcc")
 !2 = !{}
 !3 = !{!4, !6, !8}
@@ -97,7 +97,7 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !10 = !DINamespace(name: "std", line: 153, file: !11, scope: null)
 !11 = !DIFile(filename: "/usr/lib/gcc/x86_64-linux-gnu/4.6/../../../../include/c++/4.6/x86_64-linux-gnu/bits/c++config.h", directory: "/llvm_cmake_gcc")
 !12 = !{!13}
-!13 = !DISubprogram(name: "parse_range", linkageName: "_Z11parse_rangeRyS_Ss", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !14, type: !15, function: void (i64*, i64*, %"class.std::basic_string"*)* @_Z11parse_rangeRyS_Ss, variables: !2)
+!13 = distinct !DISubprogram(name: "parse_range", linkageName: "_Z11parse_rangeRyS_Ss", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !14, type: !15, variables: !2)
 !14 = !DIFile(filename: "pr19307.cc", directory: "/llvm_cmake_gcc")
 !15 = !DISubroutineType(types: !16)
 !16 = !{null, !17, !17, !19}
@@ -129,10 +129,10 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !42 = !{i32 2, !"Dwarf Version", i32 4}
 !43 = !{i32 2, !"Debug Info Version", i32 3}
 !44 = !{!"clang version 3.5.0 (209308)"}
-!45 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "offset", line: 3, arg: 1, scope: !13, file: !14, type: !17)
+!45 = !DILocalVariable(name: "offset", line: 3, arg: 1, scope: !13, file: !14, type: !17)
 !46 = !DILocation(line: 3, scope: !13)
-!47 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "limit", line: 3, arg: 2, scope: !13, file: !14, type: !17)
-!48 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "range", line: 4, arg: 3, scope: !13, file: !14, type: !19)
+!47 = !DILocalVariable(name: "limit", line: 3, arg: 2, scope: !13, file: !14, type: !17)
+!48 = !DILocalVariable(name: "range", line: 4, arg: 3, scope: !13, file: !14, type: !19)
 !49 = !DILocation(line: 4, scope: !13)
 !50 = !DILocation(line: 5, scope: !51)
 !51 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !13)
diff --git a/test/DebugInfo/X86/processes-relocations.ll b/test/DebugInfo/X86/processes-relocations.ll
index bb49e6bef784..e138eaaae6d8 100644
--- a/test/DebugInfo/X86/processes-relocations.ll
+++ b/test/DebugInfo/X86/processes-relocations.ll
@@ -13,7 +13,7 @@
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = !DICompileUnit(file: !1, language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, emissionKind: 1)
+!0 = distinct !DICompileUnit(file: !1, language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, emissionKind: 1)
 !1 = !DIFile(filename: "empty.c", directory: "/a")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/DebugInfo/X86/prologue-stack.ll b/test/DebugInfo/X86/prologue-stack.ll
index cd362e380951..b3447d344e14 100644
--- a/test/DebugInfo/X86/prologue-stack.ll
+++ b/test/DebugInfo/X86/prologue-stack.ll
@@ -6,7 +6,7 @@
 ;   return 0;
 ; }
 
-define i32 @isel_line_test2() nounwind uwtable {
+define i32 @isel_line_test2() nounwind uwtable !dbg !5 {
   ; The stack adjustment should be part of the prologue.
   ; CHECK: isel_line_test2:
   ; CHECK: {{subq|leaq}} {{.*}}, %rsp
@@ -21,10 +21,10 @@ declare i32 @callme(i32)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 164980) (llvm/trunk 164979)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.2 (trunk 164980) (llvm/trunk 164979)", isOptimized: false, emissionKind: 0, file: !13, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "isel_line_test2", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !13, scope: !6, type: !7, function: i32 ()* @isel_line_test2, variables: !1)
+!5 = distinct !DISubprogram(name: "isel_line_test2", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !13, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "bar.c", directory: "/usr/local/google/home/echristo/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
diff --git a/test/DebugInfo/X86/ref_addr_relocation.ll b/test/DebugInfo/X86/ref_addr_relocation.ll
index 488f98ef8e1f..fd074a3a2d91 100644
--- a/test/DebugInfo/X86/ref_addr_relocation.ll
+++ b/test/DebugInfo/X86/ref_addr_relocation.ll
@@ -58,7 +58,7 @@
 !llvm.dbg.cu = !{!0, !9}
 !llvm.module.flags = !{!14, !15}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !6, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !6, imports: !2)
 !1 = !DIFile(filename: "tu1.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
 !2 = !{}
 !3 = !{!4}
@@ -67,7 +67,7 @@
 !6 = !{!7}
 !7 = !DIGlobalVariable(name: "f", line: 2, isLocal: false, isDefinition: true, scope: null, file: !8, type: !4, variable: %struct.foo* @f)
 !8 = !DIFile(filename: "tu1.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
-!9 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !10, enums: !2, retainedTypes: !3, subprograms: !2, globals: !11, imports: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 191799)", isOptimized: false, emissionKind: 0, file: !10, enums: !2, retainedTypes: !3, subprograms: !2, globals: !11, imports: !2)
 !10 = !DIFile(filename: "tu2.cpp", directory: "/Users/manmanren/test-Nov/type_unique_air/ref_addr")
 !11 = !{!12}
 !12 = !DIGlobalVariable(name: "g", line: 2, isLocal: false, isDefinition: true, scope: null, file: !13, type: !4, variable: %struct.foo* @g)
diff --git a/test/DebugInfo/X86/reference-argument.ll b/test/DebugInfo/X86/reference-argument.ll
index 401553ec9711..9433b8785311 100644
--- a/test/DebugInfo/X86/reference-argument.ll
+++ b/test/DebugInfo/X86/reference-argument.ll
@@ -1,10 +1,18 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
+; RUN: llc -mtriple=x86_64-apple-macosx10.9.0 -filetype=obj -O0 < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 ; ModuleID = 'aggregate-indirect-arg.cpp'
 ; extracted from debuginfo-tests/aggregate-indirect-arg.cpp
 
-; v should not be a pointer.
-; CHECK: ##DEBUG_VALUE: foo:v <- RSI
-; rdar://problem/13658587
+; v should be a pointer.
+; CHECK:   DW_TAG_subprogram
+; CHECK:     DW_AT_specification {{.*}} "_ZN1A3fooE4SVal"
+; CHECK-NOT: DW_TAG_subprogram
+; CHECK:     DW_TAG_formal_parameter
+; CHECK:       DW_AT_name {{.*}} "this"
+; CHECK-NOT:   DW_TAG_subprogram
+; CHECK:     DW_TAG_formal_parameter
+;                                                    rsi+0
+; CHECK-NEXT:  DW_AT_location [DW_FORM_block1]      (<0x02> 74 00{{ *}})
+; CHECK-NEXT:  DW_AT_name {{.*}} "v"
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
@@ -14,14 +22,15 @@ target triple = "x86_64-apple-macosx10.9.0"
 
 declare void @_Z3barR4SVal(%class.SVal* %v)
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 declare i32 @main()
 ; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 {
+define linkonce_odr void @_ZN1A3fooE4SVal(%class.A* %this, %class.SVal* %v) nounwind ssp uwtable align 2 !dbg !35 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
   call void @llvm.dbg.declare(metadata %class.A** %this.addr, metadata !59, metadata !DIExpression()), !dbg !61
-  call void @llvm.dbg.declare(metadata %class.SVal* %v, metadata !62, metadata !DIExpression(DW_OP_deref)), !dbg !61
+  call void @llvm.dbg.value(metadata %class.SVal* %v, i64 0, metadata !62, metadata !DIExpression(DW_OP_deref)), !dbg !61
   %this1 = load %class.A*, %class.A** %this.addr
   call void @_Z3barR4SVal(%class.SVal* %v), !dbg !61
   ret void, !dbg !61
@@ -32,11 +41,11 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!47, !68}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "aggregate-indirect-arg.cpp", directory: "")
 !2 = !{}
 !3 = !{!4, !29, !33, !34, !35}
-!4 = !DISubprogram(name: "bar", linkageName: "_Z3barR4SVal", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !1, scope: !5, type: !6, function: void (%class.SVal*)* @_Z3barR4SVal, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barR4SVal", line: 19, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 19, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "aggregate-indirect-arg.cpp", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
@@ -58,13 +67,13 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !25 = !{null, !19, !26}
 !26 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !27)
 !27 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !9)
-!29 = !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !5, type: !30, function: i32 ()* @main, variables: !2)
+!29 = distinct !DISubprogram(name: "main", line: 25, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !1, scope: !5, type: !30, variables: !2)
 !30 = !DISubroutineType(types: !31)
 !31 = !{!32}
 !32 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!33 = !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD1Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, function: void (%class.SVal*)* @_ZN4SValD1Ev, declaration: !16, variables: !2)
-!34 = !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD2Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, function: void (%class.SVal*)* @_ZN4SValD2Ev, declaration: !16, variables: !2)
-!35 = !DISubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: null, type: !36, function: void (%class.A*, %class.SVal*)* @_ZN1A3fooE4SVal, declaration: !41, variables: !2)
+!33 = distinct !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD1Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, declaration: !16, variables: !2)
+!34 = distinct !DISubprogram(name: "~SVal", linkageName: "_ZN4SValD2Ev", line: 14, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 14, file: !1, scope: null, type: !17, declaration: !16, variables: !2)
+!35 = distinct !DISubprogram(name: "foo", linkageName: "_ZN1A3fooE4SVal", line: 22, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 22, file: !1, scope: null, type: !36, declaration: !41, variables: !2)
 !36 = !DISubroutineType(types: !37)
 !37 = !{null, !38, !9}
 !38 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !39)
@@ -75,24 +84,24 @@ declare void @_ZN4SValD2Ev(%class.SVal* %this)
 !44 = !DISubroutineType(types: !45)
 !45 = !{null, !38}
 !47 = !{i32 2, !"Dwarf Version", i32 3}
-!48 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "v", line: 19, arg: 1, scope: !4, file: !5, type: !8)
+!48 = !DILocalVariable(name: "v", line: 19, arg: 1, scope: !4, file: !5, type: !8)
 !49 = !DILocation(line: 19, scope: !4)
-!50 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "v", line: 26, scope: !29, file: !5, type: !9)
+!50 = !DILocalVariable(name: "v", line: 26, scope: !29, file: !5, type: !9)
 !51 = !DILocation(line: 26, scope: !29)
 !52 = !DILocation(line: 27, scope: !29)
 !53 = !DILocation(line: 28, scope: !29)
-!54 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 29, scope: !29, file: !5, type: !39)
+!54 = !DILocalVariable(name: "a", line: 29, scope: !29, file: !5, type: !39)
 !55 = !DILocation(line: 29, scope: !29)
 !56 = !DILocation(line: 30, scope: !29)
 !57 = !DILocation(line: 31, scope: !29)
 !58 = !DILocation(line: 32, scope: !29)
-!59 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 22, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !5, type: !60)
+!59 = !DILocalVariable(name: "this", line: 22, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !35, file: !5, type: !60)
 !60 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !39)
 !61 = !DILocation(line: 22, scope: !35)
-!62 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "v", line: 22, arg: 2, scope: !35, file: !5, type: !9)
-!63 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !33, file: !5, type: !64)
+!62 = !DILocalVariable(name: "v", line: 22, arg: 2, scope: !35, file: !5, type: !9)
+!63 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !33, file: !5, type: !64)
 !64 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !65 = !DILocation(line: 14, scope: !33)
-!66 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, file: !5, type: !64)
+!66 = !DILocalVariable(name: "this", line: 14, arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, file: !5, type: !64)
 !67 = !DILocation(line: 14, scope: !34)
 !68 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/rvalue-ref.ll b/test/DebugInfo/X86/rvalue-ref.ll
index 4e4da67beb08..c72a3aa2c04d 100644
--- a/test/DebugInfo/X86/rvalue-ref.ll
+++ b/test/DebugInfo/X86/rvalue-ref.ll
@@ -5,7 +5,7 @@
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
-define void @_Z3fooOi(i32* %i) uwtable ssp {
+define void @_Z3fooOi(i32* %i) uwtable ssp !dbg !5 {
 entry:
   %i.addr = alloca i32*, align 8
   store i32* %i, i32** %i.addr, align 8
@@ -23,16 +23,16 @@ declare i32 @printf(i8*, ...)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157054) (llvm/trunk 157060)", isOptimized: false, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.2 (trunk 157054) (llvm/trunk 157060)", isOptimized: false, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooOi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !16, scope: !6, type: !7, function: void (i32*)* @_Z3fooOi, variables: !1)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooOi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !16, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "foo.cpp", directory: "/Users/echristo/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9}
 !9 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !10)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!11 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !5, file: !6, type: !9)
 !12 = !DILocation(line: 4, column: 17, scope: !5)
 !13 = !DILocation(line: 6, column: 3, scope: !14)
 !14 = distinct !DILexicalBlock(line: 5, column: 1, file: !16, scope: !5)
diff --git a/test/DebugInfo/X86/safestack-byval.ll b/test/DebugInfo/X86/safestack-byval.ll
new file mode 100644
index 000000000000..f1f6b6c1d911
--- /dev/null
+++ b/test/DebugInfo/X86/safestack-byval.ll
@@ -0,0 +1,91 @@
+; Test dwarf codegen for DILocalVariable of a byval function argument that
+; points to neither an argument nor an alloca. This kind of IR is generated by
+; SafeStack for unsafe byval arguments.
+; RUN: llc -mtriple=x86_64-unknown-unknown -stop-after expand-isel-pseudos %s -o /dev/null | FileCheck %s
+
+; This was built by compiling the following source with SafeStack and
+; simplifying the result a little.
+; struct S {
+;   int a[100];
+; };
+;
+; int f(S zzz, unsigned long len) {
+;   return zzz.a[len];
+; }
+
+; CHECK: ![[ZZZ:.*]] = !DILocalVariable(name: "zzz",
+; CHECK: ![[ZZZ_EXPR:.*]] = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+; CHECK: DBG_VALUE {{.*}} ![[ZZZ]], ![[ZZZ_EXPR]]
+
+%struct.S = type { [100 x i32] }
+
+@__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+
+; Function Attrs: norecurse nounwind readonly safestack uwtable
+define i32 @_Z1f1Sm(%struct.S* byval nocapture readonly align 8 %zzz, i64 %len) #0 !dbg !12 {
+entry:
+  %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr, !dbg !22
+  %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400, !dbg !22
+  store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr, !dbg !22
+; !17 describes "zzz"
+  call void @llvm.dbg.declare(metadata i8* %unsafe_stack_ptr, metadata !17, metadata !23), !dbg !22
+  %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -400, !dbg !22
+  %zzz.unsafe-byval = bitcast i8* %0 to %struct.S*, !dbg !22
+  %1 = bitcast %struct.S* %zzz to i8*, !dbg !24
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 400, i32 8, i1 false), !dbg !24
+  tail call void @llvm.dbg.value(metadata i64 %len, i64 0, metadata !18, metadata !25), !dbg !24
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz.unsafe-byval, i64 0, i32 0, i64 %len, !dbg !26
+  %2 = load i32, i32* %arrayidx, align 4, !dbg !26, !tbaa !27
+  store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr, !dbg !31
+  ret i32 %2, !dbg !31
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
+
+attributes #0 = { norecurse nounwind readonly safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { argmemonly nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!19, !20}
+!llvm.ident = !{!21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254107) (llvm/trunk 254109)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "../llvm/1.cc", directory: "/tmp/build")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 3200, align: 32, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 3200, align: 32)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 3200, align: 32, elements: !9)
+!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1Sm", scope: !1, file: !1, line: 8, type: !13, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, variables: !16)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!8, !"_ZTS1S", !15}
+!15 = !DIBasicType(name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!16 = !{!17, !18}
+!17 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 8, type: !"_ZTS1S")
+!18 = !DILocalVariable(name: "len", arg: 2, scope: !12, file: !1, line: 8, type: !15)
+!19 = !{i32 2, !"Dwarf Version", i32 4}
+!20 = !{i32 2, !"Debug Info Version", i32 3}
+!21 = !{!"clang version 3.8.0 (trunk 254107) (llvm/trunk 254109)"}
+!22 = !DILocation(line: 8, column: 9, scope: !12)
+!23 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!24 = !DILocation(line: 8, column: 28, scope: !12)
+!25 = !DIExpression()
+!26 = !DILocation(line: 9, column: 10, scope: !12)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"int", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 9, column: 3, scope: !12)
diff --git a/test/DebugInfo/X86/sret.ll b/test/DebugInfo/X86/sret.ll
index ef8f2e6d65e1..45af28217419 100644
--- a/test/DebugInfo/X86/sret.ll
+++ b/test/DebugInfo/X86/sret.ll
@@ -14,11 +14,11 @@
 @_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00"
 @_ZTI1A = linkonce_odr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
 
-@_ZN1AC1Ei = alias void (%class.A*, i32)* @_ZN1AC2Ei
-@_ZN1AC1ERKS_ = alias void (%class.A*, %class.A*)* @_ZN1AC2ERKS_
+@_ZN1AC1Ei = alias void (%class.A*, i32), void (%class.A*, i32)* @_ZN1AC2Ei
+@_ZN1AC1ERKS_ = alias void (%class.A*, %class.A*), void (%class.A*, %class.A*)* @_ZN1AC2ERKS_
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 {
+define void @_ZN1AC2Ei(%class.A* %this, i32 %i) unnamed_addr #0 align 2 !dbg !49 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %i.addr = alloca i32, align 4
@@ -39,7 +39,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define void @_ZN1AC2ERKS_(%class.A* %this, %class.A* %rhs) unnamed_addr #0 align 2 {
+define void @_ZN1AC2ERKS_(%class.A* %this, %class.A* %rhs) unnamed_addr #0 align 2 !dbg !50 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %rhs.addr = alloca %class.A*, align 8
@@ -59,7 +59,7 @@ entry:
 }
 
 ; Function Attrs: nounwind uwtable
-define %class.A* @_ZN1AaSERKS_(%class.A* %this, %class.A* %rhs) #0 align 2 {
+define %class.A* @_ZN1AaSERKS_(%class.A* %this, %class.A* %rhs) #0 align 2 !dbg !51 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %rhs.addr = alloca %class.A*, align 8
@@ -77,7 +77,7 @@ entry:
 }
 
 ; Function Attrs: nounwind uwtable
-define i32 @_ZN1A7get_intEv(%class.A* %this) #0 align 2 {
+define i32 @_ZN1A7get_intEv(%class.A* %this) #0 align 2 !dbg !52 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -89,7 +89,7 @@ entry:
 }
 
 ; Function Attrs: uwtable
-define void @_ZN1B9AInstanceEv(%class.A* noalias sret %agg.result, %class.B* %this) #2 align 2 {
+define void @_ZN1B9AInstanceEv(%class.A* noalias sret %agg.result, %class.B* %this) #2 align 2 !dbg !53 {
 entry:
   %this.addr = alloca %class.B*, align 8
   %nrvo = alloca i1
@@ -114,7 +114,7 @@ nrvo.skipdtor:                                    ; preds = %nrvo.unused, %entry
 }
 
 ; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1AD2Ev(%class.A* %this) unnamed_addr #0 align 2 !dbg !63 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -124,7 +124,7 @@ entry:
 }
 
 ; Function Attrs: uwtable
-define i32 @main(i32 %argc, i8** %argv) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define i32 @main(i32 %argc, i8** %argv) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !54 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
@@ -189,7 +189,7 @@ terminate.lpad:                                   ; preds = %lpad
 }
 
 ; Function Attrs: nounwind uwtable
-define linkonce_odr void @_ZN1BC2Ev(%class.B* %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @_ZN1BC2Ev(%class.B* %this) unnamed_addr #0 align 2 !dbg !62 {
 entry:
   %this.addr = alloca %class.B*, align 8
   store %class.B* %this, %class.B** %this.addr, align 8
@@ -212,7 +212,7 @@ declare i8* @__cxa_begin_catch(i8*)
 declare void @_ZSt9terminatev()
 
 ; Function Attrs: uwtable
-define linkonce_odr void @_ZN1AD0Ev(%class.A* %this) unnamed_addr #2 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+define linkonce_odr void @_ZN1AD0Ev(%class.A* %this) unnamed_addr #2 align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !61 {
 entry:
   %this.addr = alloca %class.A*, align 8
   %exn.slot = alloca i8*
@@ -263,7 +263,7 @@ attributes #7 = { builtin nounwind }
 !llvm.module.flags = !{!64, !65}
 !llvm.ident = !{!66}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)", isOptimized: false, splitDebugFilename: "sret.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !48, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)", isOptimized: false, splitDebugFilename: "sret.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !48, globals: !2, imports: !2)
 !1 = !DIFile(filename: "sret.cpp", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4, !37}
@@ -305,51 +305,51 @@ attributes #7 = { builtin nounwind }
 !45 = !DISubroutineType(types: !46)
 !46 = !{!4, !42}
 !48 = !{!49, !50, !51, !52, !53, !54, !61, !62, !63}
-!49 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !1, scope: !"_ZTS1A", type: !15, function: void (%class.A*, i32)* @_ZN1AC2Ei, declaration: !14, variables: !2)
-!50 = !DISubprogram(name: "A", linkageName: "_ZN1AC2ERKS_", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !1, scope: !"_ZTS1A", type: !20, function: void (%class.A*, %class.A*)* @_ZN1AC2ERKS_, declaration: !19, variables: !2)
-!51 = !DISubprogram(name: "operator=", linkageName: "_ZN1AaSERKS_", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 28, file: !1, scope: !"_ZTS1A", type: !26, function: %class.A* (%class.A*, %class.A*)* @_ZN1AaSERKS_, declaration: !25, variables: !2)
-!52 = !DISubprogram(name: "get_int", linkageName: "_ZN1A7get_intEv", line: 33, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 34, file: !1, scope: !"_ZTS1A", type: !34, function: i32 (%class.A*)* @_ZN1A7get_intEv, declaration: !33, variables: !2)
-!53 = !DISubprogram(name: "AInstance", linkageName: "_ZN1B9AInstanceEv", line: 47, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 48, file: !1, scope: !"_ZTS1B", type: !45, function: void (%class.A*, %class.B*)* @_ZN1B9AInstanceEv, declaration: !44, variables: !2)
-!54 = !DISubprogram(name: "main", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !1, scope: !7, type: !55, function: i32 (i32, i8**)* @main, variables: !2)
+!49 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ei", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !1, scope: !"_ZTS1A", type: !15, declaration: !14, variables: !2)
+!50 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2ERKS_", line: 21, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 23, file: !1, scope: !"_ZTS1A", type: !20, declaration: !19, variables: !2)
+!51 = distinct !DISubprogram(name: "operator=", linkageName: "_ZN1AaSERKS_", line: 27, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 28, file: !1, scope: !"_ZTS1A", type: !26, declaration: !25, variables: !2)
+!52 = distinct !DISubprogram(name: "get_int", linkageName: "_ZN1A7get_intEv", line: 33, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 34, file: !1, scope: !"_ZTS1A", type: !34, declaration: !33, variables: !2)
+!53 = distinct !DISubprogram(name: "AInstance", linkageName: "_ZN1B9AInstanceEv", line: 47, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 48, file: !1, scope: !"_ZTS1B", type: !45, declaration: !44, variables: !2)
+!54 = distinct !DISubprogram(name: "main", line: 53, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !1, scope: !7, type: !55, variables: !2)
 !55 = !DISubroutineType(types: !56)
 !56 = !{!12, !12, !57}
 !57 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !58)
 !58 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !59)
 !59 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !60)
 !60 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!61 = !DISubprogram(name: "~A", linkageName: "_ZN1AD0Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, function: void (%class.A*)* @_ZN1AD0Ev, declaration: !29, variables: !2)
-!62 = !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 41, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 41, file: !1, scope: !"_ZTS1B", type: !40, function: void (%class.B*)* @_ZN1BC2Ev, declaration: !39, variables: !2)
-!63 = !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, function: void (%class.A*)* @_ZN1AD2Ev, declaration: !29, variables: !2)
+!61 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD0Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, declaration: !29, variables: !2)
+!62 = distinct !DISubprogram(name: "B", linkageName: "_ZN1BC2Ev", line: 41, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 41, file: !1, scope: !"_ZTS1B", type: !40, declaration: !39, variables: !2)
+!63 = distinct !DISubprogram(name: "~A", linkageName: "_ZN1AD2Ev", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !1, scope: !"_ZTS1A", type: !30, declaration: !29, variables: !2)
 !64 = !{i32 2, !"Dwarf Version", i32 4}
 !65 = !{i32 1, !"Debug Info Version", i32 3}
 !66 = !{!"clang version 3.5.0 (trunk 203283) (llvm/trunk 203307)"}
-!67 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !49, type: !68)
+!67 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !49, type: !68)
 !68 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !69 = !DILocation(line: 0, scope: !49)
-!70 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 16, arg: 2, scope: !49, file: !7, type: !12)
+!70 = !DILocalVariable(name: "i", line: 16, arg: 2, scope: !49, file: !7, type: !12)
 !71 = !DILocation(line: 16, scope: !49)
 !72 = !DILocation(line: 18, scope: !49)
 !73 = !DILocation(line: 19, scope: !49)
-!74 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !50, type: !68)
+!74 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !50, type: !68)
 !75 = !DILocation(line: 0, scope: !50)
-!76 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rhs", line: 21, arg: 2, scope: !50, file: !7, type: !22)
+!76 = !DILocalVariable(name: "rhs", line: 21, arg: 2, scope: !50, file: !7, type: !22)
 !77 = !DILocation(line: 21, scope: !50)
 !78 = !DILocation(line: 23, scope: !50)
 !79 = !DILocation(line: 24, scope: !50)
-!80 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !51, type: !68)
+!80 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !51, type: !68)
 !81 = !DILocation(line: 0, scope: !51)
-!82 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "rhs", line: 27, arg: 2, scope: !51, file: !7, type: !22)
+!82 = !DILocalVariable(name: "rhs", line: 27, arg: 2, scope: !51, file: !7, type: !22)
 !83 = !DILocation(line: 27, scope: !51)
 !84 = !DILocation(line: 29, scope: !51)
 !85 = !DILocation(line: 30, scope: !51)
-!86 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !52, type: !68)
+!86 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !52, type: !68)
 !87 = !DILocation(line: 0, scope: !52)
 !88 = !DILocation(line: 35, scope: !52)
-!89 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !53, type: !90)
+!89 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !53, type: !90)
 !90 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
 !91 = !DILocation(line: 0, scope: !53)
 !92 = !DILocation(line: 49, scope: !53)
-!93 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 49, scope: !53, file: !7, type: !4)
+!93 = !DILocalVariable(name: "a", line: 49, scope: !53, file: !7, type: !4)
 !94 = !DILocation(line: 50, scope: !53)
 !95 = !DILocation(line: 51, scope: !53)
 !96 = !DILocation(line: 51, scope: !97)
@@ -357,19 +357,19 @@ attributes #7 = { builtin nounwind }
 !98 = !DILocation(line: 51, scope: !99)
 !99 = distinct !DILexicalBlock(line: 51, column: 0, file: !1, scope: !100)
 !100 = distinct !DILexicalBlock(line: 51, column: 0, file: !1, scope: !53)
-!101 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !63, type: !68)
+!101 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !63, type: !68)
 !102 = !DILocation(line: 0, scope: !63)
 !103 = !DILocation(line: 8, scope: !63)
-!104 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 53, arg: 1, scope: !54, file: !7, type: !12)
+!104 = !DILocalVariable(name: "argc", line: 53, arg: 1, scope: !54, file: !7, type: !12)
 !105 = !DILocation(line: 53, scope: !54)
-!106 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 53, arg: 2, scope: !54, file: !7, type: !57)
-!107 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b", line: 55, scope: !54, file: !7, type: !37)
+!106 = !DILocalVariable(name: "argv", line: 53, arg: 2, scope: !54, file: !7, type: !57)
+!107 = !DILocalVariable(name: "b", line: 55, scope: !54, file: !7, type: !37)
 !108 = !DILocation(line: 55, scope: !54)
-!109 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "return_val", line: 56, scope: !54, file: !7, type: !12)
+!109 = !DILocalVariable(name: "return_val", line: 56, scope: !54, file: !7, type: !12)
 !110 = !DILocation(line: 56, scope: !54)
 !111 = !DILocation(line: 56, scope: !112)
 !112 = distinct !DILexicalBlock(line: 56, column: 0, file: !1, scope: !54)
-!113 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 58, scope: !54, file: !7, type: !4)
+!113 = !DILocalVariable(name: "a", line: 58, scope: !54, file: !7, type: !4)
 !114 = !DILocation(line: 58, scope: !54)
 !115 = !DILocation(line: 59, scope: !54)
 !116 = !DILocation(line: 60, scope: !54)
@@ -379,10 +379,10 @@ attributes #7 = { builtin nounwind }
 !120 = distinct !DILexicalBlock(line: 60, column: 0, file: !1, scope: !54)
 !121 = !DILocation(line: 60, scope: !122)
 !122 = distinct !DILexicalBlock(line: 60, column: 0, file: !1, scope: !54)
-!123 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !62, type: !90)
+!123 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !62, type: !90)
 !124 = !DILocation(line: 0, scope: !62)
 !125 = !DILocation(line: 41, scope: !62)
-!126 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !61, type: !68)
+!126 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !61, type: !68)
 !127 = !DILocation(line: 0, scope: !61)
 !128 = !DILocation(line: 8, scope: !61)
 !129 = !DILocation(line: 8, scope: !130)
diff --git a/test/DebugInfo/X86/sroasplit-1.ll b/test/DebugInfo/X86/sroasplit-1.ll
index 2179aa380953..11895a7f083e 100644
--- a/test/DebugInfo/X86/sroasplit-1.ll
+++ b/test/DebugInfo/X86/sroasplit-1.ll
@@ -25,7 +25,7 @@
 ; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], i64 0, metadata ![[VAR]], metadata ![[PIECE2:[0-9]+]])
 ; CHECK: ret i32 %[[A]]
 ; Read Var and Piece:
-; CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1",{{.*}} line: 11,
+; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11,
 ; CHECK: ![[PIECE1]] = !DIExpression(DW_OP_bit_piece, 32, 96)
 ; CHECK: ![[PIECE2]] = !DIExpression(DW_OP_bit_piece, 0, 32)
 
@@ -36,7 +36,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 %struct.Inner = type { i32, i64 }
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(%struct.Outer* byval align 8 %outer) #0 {
+define i32 @foo(%struct.Outer* byval align 8 %outer) #0 !dbg !4 {
 entry:
   %i1 = alloca %struct.Inner, align 8
   call void @llvm.dbg.declare(metadata %struct.Outer* %outer, metadata !25, metadata !2), !dbg !26
@@ -65,11 +65,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
 !1 = !DIFile(filename: "sroasplit-1.c", directory: "")
 !2 = !DIExpression()
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (%struct.Outer*)* @foo, variables: !{})
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !{})
 !5 = !DIFile(filename: "sroasplit-1.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -90,8 +90,8 @@ attributes #2 = { nounwind }
 !22 = !{i32 2, !"Dwarf Version", i32 2}
 !23 = !{i32 1, !"Debug Info Version", i32 3}
 !24 = !{!"clang version 3.5.0 "}
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!25 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !26 = !DILocation(line: 10, scope: !4)
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!27 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
 !28 = !DILocation(line: 11, scope: !4)
 !29 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-2.ll b/test/DebugInfo/X86/sroasplit-2.ll
index b209fafd4df1..bea1b26df285 100644
--- a/test/DebugInfo/X86/sroasplit-2.ll
+++ b/test/DebugInfo/X86/sroasplit-2.ll
@@ -23,10 +23,10 @@
 ; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce0, i64 0, metadata ![[O:[0-9]+]], metadata ![[PIECE1:[0-9]+]]),
 ; CHECK:  call void @llvm.dbg.value(metadata i64 %outer.coerce1, i64 0, metadata ![[O]], metadata ![[PIECE2:[0-9]+]]),
 ; CHECK:  call void @llvm.dbg.value({{.*}}, i64 0, metadata ![[I1:[0-9]+]], metadata ![[PIECE3:[0-9]+]]),
-; CHECK-DAG: ![[O]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer",{{.*}} line: 10
+; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10
 ; CHECK-DAG: ![[PIECE1]] = !DIExpression(DW_OP_bit_piece, 0, 64)
 ; CHECK-DAG: ![[PIECE2]] = !DIExpression(DW_OP_bit_piece, 64, 64)
-; CHECK-DAG: ![[I1]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1",{{.*}} line: 11
+; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11
 ; CHECK-DAG: ![[PIECE3]] = !DIExpression(DW_OP_bit_piece, 0, 32)
 
 ; ModuleID = 'sroasplit-2.c'
@@ -37,7 +37,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 %struct.Inner = type { i32, i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 {
+define i32 @foo(i64 %outer.coerce0, i64 %outer.coerce1) #0 !dbg !4 {
   %outer = alloca %struct.Outer, align 8
   %i1 = alloca %struct.Inner, align 4
   %1 = bitcast %struct.Outer* %outer to { i64, i64 }*
@@ -71,11 +71,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !3, globals: !{}, imports: !{})
 !1 = !DIFile(filename: "sroasplit-2.c", directory: "")
 !2 = !DIExpression()
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: i32 (i64, i64)* @foo, variables: !{})
+!4 = distinct !DISubprogram(name: "foo", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !{})
 !5 = !DIFile(filename: "sroasplit-2.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -95,8 +95,8 @@ attributes #2 = { nounwind }
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
+!24 = !DILocalVariable(name: "outer", line: 10, arg: 1, scope: !4, file: !5, type: !9)
 !25 = !DILocation(line: 10, scope: !4)
-!26 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i1", line: 11, scope: !4, file: !5, type: !14)
+!26 = !DILocalVariable(name: "i1", line: 11, scope: !4, file: !5, type: !14)
 !27 = !DILocation(line: 11, scope: !4)
 !28 = !DILocation(line: 12, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-3.ll b/test/DebugInfo/X86/sroasplit-3.ll
index 50b701f781d8..7a39beb137d8 100644
--- a/test/DebugInfo/X86/sroasplit-3.ll
+++ b/test/DebugInfo/X86/sroasplit-3.ll
@@ -4,7 +4,7 @@
 ; not partitioned into multiple allocas.
 ;
 ; CHECK: call void @llvm.dbg.value(metadata float %s.coerce, i64 0, metadata ![[VAR:[0-9]+]], metadata ![[EXPR:[0-9]+]])
-; CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s",{{.*}} line: 3,
+; CHECK: ![[VAR]] = !DILocalVariable(name: "s",{{.*}} line: 3,
 ; CHECK: ![[EXPR]] = !DIExpression(
 ; CHECK-NOT:                       DW_OP_bit_piece
 
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 %struct.S = type { float }
 
 ; Function Attrs: nounwind ssp uwtable
-define float @foo(float %s.coerce) #0 {
+define float @foo(float %s.coerce) #0 !dbg !4 {
 entry:
   %s = alloca %struct.S, align 4
   %coerce.dive = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0
@@ -41,11 +41,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/Volumes/Data/llvm/_build.ninja.debug")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: float (float)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/Volumes/Data/llvm/_build.ninja.debug")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -57,7 +57,7 @@ attributes #1 = { nounwind readnone }
 !13 = !{i32 2, !"Debug Info Version", i32 3}
 !14 = !{i32 1, !"PIC Level", i32 2}
 !15 = !{!"clang version 3.6.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
+!16 = !DILocalVariable(name: "s", line: 3, arg: 1, scope: !4, file: !5, type: !9)
 !17 = !DIExpression()
 !18 = !DILocation(line: 3, column: 20, scope: !4)
 !19 = !DILocation(line: 4, column: 2, scope: !4)
diff --git a/test/DebugInfo/X86/sroasplit-4.ll b/test/DebugInfo/X86/sroasplit-4.ll
index e9fcdf8eac92..2abf6e1ec9b4 100644
--- a/test/DebugInfo/X86/sroasplit-4.ll
+++ b/test/DebugInfo/X86/sroasplit-4.ll
@@ -45,7 +45,7 @@ target triple = "x86_64-apple-darwin"
 @t = external global i64
 
 ; Function Attrs: nounwind
-define i32 @_Z4testv() #0 {
+define i32 @_Z4testv() #0 !dbg !17 {
 entry:
   %retval = alloca i32, align 4
   %y = alloca %struct.p, align 8
@@ -109,7 +109,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !16, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4, !10}
@@ -126,7 +126,7 @@ attributes #3 = { nounwind }
 !14 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 10, size: 128, align: 64, offset: 64, file: !5, scope: !"_ZTS1r", baseType: !"_ZTS1p")
 !15 = !DIDerivedType(tag: DW_TAG_member, name: "y", line: 11, size: 128, align: 64, offset: 192, file: !5, scope: !"_ZTS1r", baseType: !"_ZTS1p")
 !16 = !{!17}
-!17 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !5, scope: !18, type: !19, function: i32 ()* @_Z4testv, variables: !2)
+!17 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 18, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 18, file: !5, scope: !18, type: !19, variables: !2)
 !18 = !DIFile(filename: "pr22393.cc", directory: "")
 !19 = !DISubroutineType(types: !20)
 !20 = !{!13}
@@ -137,10 +137,10 @@ attributes #3 = { nounwind }
 !25 = distinct !DILexicalBlock(line: 19, column: 0, file: !5, scope: !17)
 !26 = !DILocation(line: 19, scope: !17)
 !27 = !DILocation(line: 20, scope: !25)
-!28 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 21, scope: !17, file: !18, type: !"_ZTS1p")
+!28 = !DILocalVariable(name: "y", line: 21, scope: !17, file: !18, type: !"_ZTS1p")
 !29 = !DIExpression()
 !30 = !DILocation(line: 21, scope: !17)
-!31 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r", line: 22, scope: !17, file: !18, type: !"_ZTS1r")
+!31 = !DILocalVariable(name: "r", line: 22, scope: !17, file: !18, type: !"_ZTS1r")
 !32 = !DILocation(line: 22, scope: !17)
 !33 = !DILocation(line: 23, scope: !17)
 !34 = !DILocation(line: 24, scope: !17)
diff --git a/test/DebugInfo/X86/sroasplit-5.ll b/test/DebugInfo/X86/sroasplit-5.ll
index ce6b4544c56b..059cb19e92e2 100644
--- a/test/DebugInfo/X86/sroasplit-5.ll
+++ b/test/DebugInfo/X86/sroasplit-5.ll
@@ -27,7 +27,7 @@ target triple = "x86_64-unknown-linux-gnu"
 %struct.prog_src_register = type { i32, i24 }
 
 ; Function Attrs: nounwind
-define i64 @src_reg_for_float() #0 {
+define i64 @src_reg_for_float() #0 !dbg !4 {
 entry:
   %retval = alloca %struct.prog_src_register, align 4
   %a = alloca %struct.prog_src_register, align 4
@@ -66,11 +66,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "src_reg_for_float", line: 7, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, function: i64 ()* @src_reg_for_float, variables: !2)
+!4 = distinct !DISubprogram(name: "src_reg_for_float", line: 7, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 7, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "pr22495.c", directory: "")
 !6 = !DIFile(filename: "pr22495.c", directory: "")
 !7 = !DISubroutineType(types: !8)
@@ -82,10 +82,10 @@ attributes #2 = { nounwind }
 !13 = !{i32 2, !"Dwarf Version", i32 4}
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.7.0 "}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !4, file: !6, type: !9)
+!16 = !DILocalVariable(name: "a", line: 8, scope: !4, file: !6, type: !9)
 !17 = !DIExpression()
 !18 = !DILocation(line: 8, scope: !4)
 !19 = !DILocation(line: 9, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "local", line: 10, scope: !4, file: !6, type: !12)
+!20 = !DILocalVariable(name: "local", line: 10, scope: !4, file: !6, type: !12)
 !21 = !DILocation(line: 10, scope: !4)
 !22 = !DILocation(line: 11, scope: !4)
diff --git a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
index 86a1647d6fa4..a366853ad205 100644
--- a/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
+++ b/test/DebugInfo/X86/stmt-list-multiple-compile-units.ll
@@ -58,7 +58,7 @@
 ; ASM-NOT: Lcu_begin
 ; ASM: Lset[[LT:[0-9]+]] = Lline_table_start0-Lsection_line ## DW_AT_stmt_list
 ; ASM-NEXT: .long   Lset[[LT]]
-define i32 @test(i32 %a) nounwind uwtable ssp {
+define i32 @test(i32 %a) nounwind uwtable ssp !dbg !5 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -70,7 +70,7 @@ entry:
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
-define i32 @fn(i32 %a) nounwind uwtable ssp {
+define i32 @fn(i32 %a) nounwind uwtable ssp !dbg !13 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -81,23 +81,23 @@ entry:
 
 !llvm.dbg.cu = !{!0, !10}
 !llvm.module.flags = !{!25}
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3", isOptimized: false, emissionKind: 1, file: !23, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3", isOptimized: false, emissionKind: 1, file: !23, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "test", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !23, scope: !6, type: !7, function: i32 (i32)* @test, variables: !1)
+!5 = distinct !DISubprogram(name: "test", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !23, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "simple.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 172862)", isOptimized: false, emissionKind: 1, file: !24, enums: !1, retainedTypes: !1, subprograms: !11, globals: !1, imports:  !1)
+!10 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 172862)", isOptimized: false, emissionKind: 1, file: !24, enums: !1, retainedTypes: !1, subprograms: !11, globals: !1, imports:  !1)
 !11 = !{!13}
-!13 = !DISubprogram(name: "fn", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !24, scope: !14, type: !7, function: i32 (i32)* @fn, variables: !1)
+!13 = distinct !DISubprogram(name: "fn", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !24, scope: !14, type: !7, variables: !1)
 !14 = !DIFile(filename: "simple2.c", directory: "/private/tmp")
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !5, file: !6, type: !9)
+!15 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !5, file: !6, type: !9)
 !16 = !DILocation(line: 2, scope: !5)
 !17 = !DILocation(line: 4, scope: !18)
 !18 = distinct !DILexicalBlock(line: 3, column: 0, file: !23, scope: !5)
-!19 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !13, file: !14, type: !9)
+!19 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !13, file: !14, type: !9)
 !20 = !DILocation(line: 1, scope: !13)
 !21 = !DILocation(line: 2, scope: !22)
 !22 = distinct !DILexicalBlock(line: 1, column: 0, file: !24, scope: !13)
diff --git a/test/DebugInfo/X86/stmt-list.ll b/test/DebugInfo/X86/stmt-list.ll
index 1f5189e362a4..393e1b50ea8a 100644
--- a/test/DebugInfo/X86/stmt-list.ll
+++ b/test/DebugInfo/X86/stmt-list.ll
@@ -5,7 +5,7 @@
 ; CHECK:      .section        .debug_line,"",@progbits
 ; CHECK-NEXT: .Lline_table_start0:
 
-define void @f() {
+define void @f() !dbg !0 {
 entry:
   ret void
 }
@@ -14,9 +14,9 @@ entry:
 !llvm.module.flags = !{!7}
 !5 = !{!0}
 
-!0 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3, function: void ()* @f)
+!0 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !6, scope: !1, type: !3)
 !1 = !DIFile(filename: "test2.c", directory: "/home/espindola/llvm")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !6, enums: !{}, retainedTypes: !{}, subprograms: !5)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
 !6 = !DIFile(filename: "test2.c", directory: "/home/espindola/llvm")
diff --git a/test/DebugInfo/X86/stringpool.ll b/test/DebugInfo/X86/stringpool.ll
index 62b2b1bb55c0..9030ae02e534 100644
--- a/test/DebugInfo/X86/stringpool.ll
+++ b/test/DebugInfo/X86/stringpool.ll
@@ -6,7 +6,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143009)", isOptimized: true, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 143009)", isOptimized: true, emissionKind: 0, file: !8, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "yyyy", line: 1, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: i32* @yyyy)
diff --git a/test/DebugInfo/X86/struct-loc.ll b/test/DebugInfo/X86/struct-loc.ll
index 2c133b9b66ee..2236cd087d54 100644
--- a/test/DebugInfo/X86/struct-loc.ll
+++ b/test/DebugInfo/X86/struct-loc.ll
@@ -14,7 +14,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 152837) (llvm/trunk 152845)", isOptimized: false, emissionKind: 0, file: !11, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.1 (trunk 152837) (llvm/trunk 152845)", isOptimized: false, emissionKind: 0, file: !11, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "f", line: 5, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: %struct.foo* @f)
diff --git a/test/DebugInfo/X86/subrange-type.ll b/test/DebugInfo/X86/subrange-type.ll
index d83cca6c8776..7a81bac56d0d 100644
--- a/test/DebugInfo/X86/subrange-type.ll
+++ b/test/DebugInfo/X86/subrange-type.ll
@@ -7,7 +7,7 @@
 ; CHECK: [[SUBTYPE]]: DW_TAG_base_type
 ; CHECK-NEXT: DW_AT_name
 
-define i32 @main() nounwind uwtable {
+define i32 @main() nounwind uwtable !dbg !5 {
 entry:
   %retval = alloca i32, align 4
   %i = alloca [2 x i32], align 4
@@ -21,15 +21,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171472) (llvm/trunk 171487)", isOptimized: false, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171472) (llvm/trunk 171487)", isOptimized: false, emissionKind: 0, file: !17, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1, imports:  !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !6, type: !7, function: i32 ()* @main, variables: !1)
+!5 = distinct !DISubprogram(name: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !6, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "foo.c", directory: "/usr/local/google/home/echristo/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 4, scope: !11, file: !6, type: !12)
+!10 = !DILocalVariable(name: "i", line: 4, scope: !11, file: !6, type: !12)
 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !6, scope: !5)
 !12 = !DICompositeType(tag: DW_TAG_array_type, size: 64, align: 32, baseType: !9, elements: !13)
 !13 = !{!14}
diff --git a/test/DebugInfo/X86/subreg.ll b/test/DebugInfo/X86/subreg.ll
index b850ce800e37..3b95532ff52c 100644
--- a/test/DebugInfo/X86/subreg.ll
+++ b/test/DebugInfo/X86/subreg.ll
@@ -7,7 +7,7 @@
 ; CHECK-NEXT: .byte   147                # DW_OP_piece
 ; CHECK-NEXT: .byte   2                  # 2
 
-define i16 @f(i16 signext %zzz) nounwind {
+define i16 @f(i16 signext %zzz) nounwind !dbg !1 {
 entry:
   call void @llvm.dbg.value(metadata i16 %zzz, i64 0, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1)
   %conv = sext i16 %zzz to i32, !dbg !7
@@ -21,10 +21,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!11}
 !9 = !{!1}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "zzz", line: 3, arg: 1, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !10, scope: !2, type: !4, function: i16 (i16)* @f)
+!0 = !DILocalVariable(name: "zzz", line: 3, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !10, scope: !2, type: !4)
 !2 = !DIFile(filename: "/home/espindola/llvm/test.c", directory: "/home/espindola/tmpfs/build")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !{}, retainedTypes: !{}, subprograms: !9, imports:  null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: false, emissionKind: 1, file: !10, enums: !{}, retainedTypes: !{}, subprograms: !9, imports:  null)
 !4 = !DISubroutineType(types: !5)
 !5 = !{null}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
diff --git a/test/DebugInfo/X86/subregisters.ll b/test/DebugInfo/X86/subregisters.ll
index a4722aa8cd58..9dcb09f0869d 100644
--- a/test/DebugInfo/X86/subregisters.ll
+++ b/test/DebugInfo/X86/subregisters.ll
@@ -38,7 +38,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 @main.myBar = private unnamed_addr constant %struct.bar { i32 3, i32 4 }, align 4
 
 ; Function Attrs: noinline nounwind ssp uwtable
-define void @doSomething(%struct.bar* nocapture readonly %b) #0 {
+define void @doSomething(%struct.bar* nocapture readonly %b) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.bar* %b, i64 0, metadata !15, metadata !DIExpression()), !dbg !25
   %a1 = getelementptr inbounds %struct.bar, %struct.bar* %b, i64 0, i32 0, !dbg !26
@@ -55,7 +55,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare i32 @printf(i8* nocapture readonly, ...) #2
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @main() #3 {
+define i32 @main() #3 !dbg !17 {
 entry:
   %myBar = alloca i64, align 8, !dbg !34
   %tmpcast = bitcast i64* %myBar to %struct.bar*, !dbg !34
@@ -78,11 +78,11 @@ attributes #4 = { nounwind }
 !llvm.module.flags = !{!22, !23}
 !llvm.ident = !{!24}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "subregisters.c", directory: "")
 !2 = !{}
 !3 = !{!4, !17}
-!4 = !DISubprogram(name: "doSomething", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, function: void (%struct.bar*)* @doSomething, variables: !14)
+!4 = distinct !DISubprogram(name: "doSomething", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !1, scope: !5, type: !6, variables: !14)
 !5 = !DIFile(filename: "subregisters.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
@@ -93,13 +93,13 @@ attributes #4 = { nounwind }
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !13 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 5, size: 32, align: 32, offset: 32, file: !1, scope: !9, baseType: !12)
 !14 = !{!15, !16}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 10, arg: 1, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 12, scope: !4, file: !5, type: !12)
-!17 = !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !18, function: i32 ()* @main, variables: !20)
+!15 = !DILocalVariable(name: "b", line: 10, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "a", line: 12, scope: !4, file: !5, type: !12)
+!17 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 17, file: !1, scope: !5, type: !18, variables: !20)
 !18 = !DISubroutineType(types: !19)
 !19 = !{!12}
 !20 = !{!21}
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "myBar", line: 18, scope: !17, file: !5, type: !9)
+!21 = !DILocalVariable(name: "myBar", line: 18, scope: !17, file: !5, type: !9)
 !22 = !{i32 2, !"Dwarf Version", i32 2}
 !23 = !{i32 1, !"Debug Info Version", i32 3}
 !24 = !{!"clang version 3.5 "}
diff --git a/test/DebugInfo/X86/template.ll b/test/DebugInfo/X86/template.ll
index f846b29f5768..d7ce08437ca6 100644
--- a/test/DebugInfo/X86/template.ll
+++ b/test/DebugInfo/X86/template.ll
@@ -64,7 +64,7 @@
 @n = global %"struct.y_impl<int>::nested" zeroinitializer, align 1
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_template.cpp, i8* null }]
 
-define internal void @__cxx_global_var_init() section ".text.startup" {
+define internal void @__cxx_global_var_init() section ".text.startup" !dbg !10 {
 entry:
   %call = call i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv(), !dbg !36
   store i32 %call, i32* @glbl, align 4, !dbg !36
@@ -72,7 +72,7 @@ entry:
 }
 
 ; Function Attrs: nounwind uwtable
-define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv() #0 {
+define linkonce_odr i32 @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv() #0 !dbg !14 {
 entry:
   ret i32 3, !dbg !37
 }
@@ -89,7 +89,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!33, !34}
 !llvm.ident = !{!35}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224394) (llvm/trunk 224384)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !30, imports: !2)
 !1 = !DIFile(filename: "template.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !8}
@@ -99,11 +99,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DICompositeType(tag: DW_TAG_structure_type, name: "nested", line: 2, size: 8, align: 8, file: !1, scope: !"_ZTS6y_implIiE", elements: !2, identifier: "_ZTSN6y_implIiE6nestedE")
 !9 = !{!10, !14, !28}
-!10 = !DISubprogram(name: "__cxx_global_var_init", line: 3, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, function: void ()* @__cxx_global_var_init, variables: !2)
+!10 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 3, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, variables: !2)
 !11 = !DIFile(filename: "template.cpp", directory: "/tmp/dbginfo")
 !12 = !DISubroutineType(types: !13)
 !13 = !{null}
-!14 = !DISubprogram(name: "func<3, &glbl, y_impl, nullptr, 1, 2>", linkageName: "_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !11, type: !15, function: i32 ()* @_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv, templateParams: !17, variables: !2)
+!14 = distinct !DISubprogram(name: "func<3, &glbl, y_impl, nullptr, 1, 2>", linkageName: "_Z4funcILi3EXadL_Z4glblEE6y_implLDn0EJLi1ELi2EEEiv", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !11, type: !15, templateParams: !17, variables: !2)
 !15 = !DISubroutineType(types: !16)
 !16 = !{!7}
 !17 = !{!18, !19, !21, !22, !24}
@@ -117,7 +117,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !25 = !{!26, !27}
 !26 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, type: !7, value: i32 1)
 !27 = !DITemplateValueParameter(tag: DW_TAG_template_value_parameter, type: !7, value: i32 2)
-!28 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_template.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !29, function: void ()* @_GLOBAL__sub_I_template.cpp, variables: !2)
+!28 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_template.cpp", isLocal: true, isDefinition: true, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !11, type: !29, variables: !2)
 !29 = !DISubroutineType(types: !2)
 !30 = !{!31, !32}
 !31 = !DIGlobalVariable(name: "glbl", line: 3, isLocal: false, isDefinition: true, scope: null, file: !11, type: !7, variable: i32* @glbl)
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
index 55930f0bacf2..754ed25debc6 100644
--- a/test/DebugInfo/X86/tls.ll
+++ b/test/DebugInfo/X86/tls.ll
@@ -1,20 +1,30 @@
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu \
-; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
 
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu \
-; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-32 --check-prefix=GNUOP %s
 
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -split-dwarf=Enable \
-; RUN:   | FileCheck --check-prefix=FISSION --check-prefix=GNUOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=FISSION --check-prefix=GNUOP %s
 
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-scei-ps4 \
-; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
 
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-apple-darwin \
-; RUN:   | FileCheck --check-prefix=DARWIN --check-prefix=STDOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=DARWIN --check-prefix=STDOP %s
 
 ; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-freebsd \
-; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
+; RUN:   | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
+
+; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -emulated-tls \
+; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-64 \
+; RUN:     --check-prefix=EMUGNUOP --check-prefix=EMU %s
+
+; RUN: llc %s -o - -filetype=asm -O0 -mtriple=i386-linux-gnu -emulated-tls \
+; RUN:   | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-32 \
+; RUN:     --check-prefix=EMUGNUOP --check-prefix=EMU %s
+
+; TODO: Add expected output for -emulated-tls tests.
 
 ; FIXME: add relocation and DWARF expression support to llvm-dwarfdump & use
 ; that here instead of raw assembly printing
@@ -90,7 +100,7 @@
 @glbl = global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
-define weak_odr i32 @_Z4funcIXadL_Z4glblEEEiv() #0 {
+define weak_odr i32 @_Z4funcIXadL_Z4glblEEEiv() #0 !dbg !4 {
 entry:
   ret i32 0, !dbg !18
 }
@@ -101,11 +111,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!15, !16}
 !llvm.ident = !{!17}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "-.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !12, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, splitDebugFilename: "-.dwo", emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !12, imports: !2)
 !1 = !DIFile(filename: "tls.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "func<&glbl>", linkageName: "_Z4funcIXadL_Z4glblEEEiv", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @_Z4funcIXadL_Z4glblEEEiv, templateParams: !9, variables: !2)
+!4 = distinct !DISubprogram(name: "func<&glbl>", linkageName: "_Z4funcIXadL_Z4glblEEEiv", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, templateParams: !9, variables: !2)
 !5 = !DIFile(filename: "tls.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/DebugInfo/X86/type_units_with_addresses.ll b/test/DebugInfo/X86/type_units_with_addresses.ll
index 2a8d664e89b8..10a41972d9f2 100644
--- a/test/DebugInfo/X86/type_units_with_addresses.ll
+++ b/test/DebugInfo/X86/type_units_with_addresses.ll
@@ -112,7 +112,7 @@
 !llvm.module.flags = !{!34, !35}
 !llvm.ident = !{!36}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, splitDebugFilename: "tu.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !27, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, splitDebugFilename: "tu.dwo", emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !2, globals: !27, imports: !2)
 !1 = !DIFile(filename: "tu.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !9, !12, !13, !17, !18, !19, !23, !24}
diff --git a/test/DebugInfo/X86/union-const.ll b/test/DebugInfo/X86/union-const.ll
index e051c6297b70..1d3969aff52d 100644
--- a/test/DebugInfo/X86/union-const.ll
+++ b/test/DebugInfo/X86/union-const.ll
@@ -20,7 +20,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 %struct.anon = type { i32 }
 
 ; Function Attrs: nounwind readnone ssp uwtable
-define i32 @mfi_aen_setup() #0 {
+define i32 @mfi_aen_setup() #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.declare(metadata %union.mfi_evt* undef, metadata !16, metadata !21), !dbg !22
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !21), !dbg !22
@@ -40,11 +40,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18, !19}
 !llvm.ident = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226915) (llvm/trunk 226905)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 226915) (llvm/trunk 226905)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "union.c", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "mfi_aen_setup", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 ()* @mfi_aen_setup, variables: !15)
+!4 = distinct !DISubprogram(name: "mfi_aen_setup", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !15)
 !5 = !DIFile(filename: "union.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
@@ -56,7 +56,7 @@ attributes #1 = { nounwind readnone }
 !13 = !DIDerivedType(tag: DW_TAG_member, name: "reserved", line: 3, size: 32, align: 32, file: !1, scope: !11, baseType: !14)
 !14 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !15 = !{!16}
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 6, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "a", line: 6, scope: !4, file: !5, type: !8)
 !17 = !{i32 2, !"Dwarf Version", i32 2}
 !18 = !{i32 2, !"Debug Info Version", i32 3}
 !19 = !{i32 1, !"PIC Level", i32 2}
diff --git a/test/DebugInfo/X86/union-template.ll b/test/DebugInfo/X86/union-template.ll
index 172dcdcbb9db..1f91f2a129fe 100644
--- a/test/DebugInfo/X86/union-template.ll
+++ b/test/DebugInfo/X86/union-template.ll
@@ -11,7 +11,7 @@
 
 @_ZN7PR156371fE = global %"union.PR15637::Value" zeroinitializer, align 4
 
-define void @_ZN7PR156371gEf(float %value) #0 {
+define void @_ZN7PR156371gEf(float %value) #0 !dbg !4 {
 entry:
   %value.addr = alloca float, align 4
   %tempValue = alloca %"union.PR15637::Value", align 4
@@ -29,11 +29,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!28}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 178499) (llvm/trunk 178472)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports:  !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 178499) (llvm/trunk 178472)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !9, imports:  !2)
 !1 = !DIFile(filename: "foo.cc", directory: "/usr/local/google/home/echristo/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "g", linkageName: "_ZN7PR156371gEf", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void (float)* @_ZN7PR156371gEf, variables: !2)
+!4 = distinct !DISubprogram(name: "g", linkageName: "_ZN7PR156371gEf", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DINamespace(name: "PR15637", line: 1, file: !1, scope: null)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
@@ -51,9 +51,9 @@ attributes #1 = { nounwind readnone }
 !19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !12)
 !21 = !{!22}
 !22 = !DITemplateTypeParameter(name: "T", type: !8)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "value", line: 3, arg: 1, scope: !4, file: !11, type: !8)
+!23 = !DILocalVariable(name: "value", line: 3, arg: 1, scope: !4, file: !11, type: !8)
 !24 = !DILocation(line: 3, scope: !4)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "tempValue", line: 4, scope: !4, file: !11, type: !12)
+!25 = !DILocalVariable(name: "tempValue", line: 4, scope: !4, file: !11, type: !12)
 !26 = !DILocation(line: 4, scope: !4)
 !27 = !DILocation(line: 5, scope: !4)
 !28 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/X86/vector.ll b/test/DebugInfo/X86/vector.ll
index f95ad956dea6..3e2092bd806f 100644
--- a/test/DebugInfo/X86/vector.ll
+++ b/test/DebugInfo/X86/vector.ll
@@ -12,7 +12,7 @@
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171825) (llvm/trunk 171822)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 (trunk 171825) (llvm/trunk 171822)", isOptimized: false, emissionKind: 0, file: !12, enums: !1, retainedTypes: !1, subprograms: !1, globals: !3, imports:  !1)
 !1 = !{}
 !3 = !{!5}
 !5 = !DIGlobalVariable(name: "a", line: 3, isLocal: false, isDefinition: true, scope: null, file: !6, type: !7, variable: <4 x i32>* @a)
diff --git a/test/DebugInfo/X86/vla.ll b/test/DebugInfo/X86/vla.ll
index dfcc5f1cce2a..5a25f021894e 100644
--- a/test/DebugInfo/X86/vla.ll
+++ b/test/DebugInfo/X86/vla.ll
@@ -1,6 +1,6 @@
 ; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=asm %s -o - | FileCheck %s
 ; Ensure that we generate an indirect location for the variable length array a.
-; CHECK: ##DEBUG_VALUE: vla:a <- RDX
+; CHECK: ##DEBUG_VALUE: vla:a <- [%RDX+0]
 ; CHECK: DW_OP_breg1
 ; rdar://problem/13658587
 ;
@@ -21,7 +21,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-macosx10.8.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @vla(i32 %n) nounwind ssp uwtable {
+define i32 @vla(i32 %n) nounwind ssp uwtable !dbg !4 {
 entry:
   %n.addr = alloca i32, align 4
   %saved_stack = alloca i8*
@@ -57,7 +57,7 @@ declare i8* @llvm.stacksave() nounwind
 declare void @llvm.stackrestore(i8*) nounwind
 
 ; Function Attrs: nounwind ssp uwtable
-define i32 @main(i32 %argc, i8** %argv) nounwind ssp uwtable {
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp uwtable !dbg !9 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
@@ -75,33 +75,33 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!29}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.3 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "vla.c", directory: "")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "vla", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @vla, variables: !2)
+!4 = distinct !DISubprogram(name: "vla", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "vla.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, function: i32 (i32, i8**)* @main, variables: !2)
+!9 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
 !10 = !DISubroutineType(types: !11)
 !11 = !{!8, !8, !12}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !13)
 !13 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !14)
 !14 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "n", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "n", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !16 = !DILocation(line: 1, scope: !4)
 !17 = !DILocation(line: 2, scope: !4)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 2, scope: !4, file: !5, type: !19)
+!18 = !DILocalVariable(name: "a", line: 2, scope: !4, file: !5, type: !19)
 !19 = !DICompositeType(tag: DW_TAG_array_type, align: 32, baseType: !8, elements: !20)
 !20 = !{!21}
 !21 = !DISubrange(count: -1)
 !22 = !DILocation(line: 3, scope: !4)
 !23 = !DILocation(line: 4, scope: !4)
 !24 = !DILocation(line: 5, scope: !4)
-!25 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 7, arg: 1, scope: !9, file: !5, type: !8)
+!25 = !DILocalVariable(name: "argc", line: 7, arg: 1, scope: !9, file: !5, type: !8)
 !26 = !DILocation(line: 7, scope: !9)
-!27 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 7, arg: 2, scope: !9, file: !5, type: !12)
+!27 = !DILocalVariable(name: "argv", line: 7, arg: 2, scope: !9, file: !5, type: !12)
 !28 = !DILocation(line: 8, scope: !9)
 !29 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/debugmacinfo.test b/test/DebugInfo/debugmacinfo.test
new file mode 100644
index 000000000000..3f95169a7a4e
--- /dev/null
+++ b/test/DebugInfo/debugmacinfo.test
@@ -0,0 +1,27 @@
+RUN: llvm-dwarfdump -debug-dump=macro %p/Inputs/dwarfdump-macro.o \
+RUN:   | FileCheck %s -check-prefix TEST_MACINFO
+RUN: llvm-dwarfdump -debug-dump=line %p/Inputs/dwarfdump-macro.o \
+RUN:   | FileCheck %s -check-prefix TEST_LINE
+
+
+; This test verifies that llvm-dwarfdump tools know how to read .debug_macinfo
+; section. It also checks that the file numbers fits with those in the
+; .debug_line section.
+TEST_MACINFO: .debug_macinfo contents:
+TEST_MACINFO: DW_MACINFO_define - lineno: 0 macro: M3 Value3
+TEST_MACINFO: DW_MACINFO_start_file - lineno: 0 filenum: 1
+TEST_MACINFO:   DW_MACINFO_start_file - lineno: 0 filenum: 2
+TEST_MACINFO:     DW_MACINFO_define - lineno: 1 macro: M4 Value4
+TEST_MACINFO:   DW_MACINFO_end_file
+TEST_MACINFO:   DW_MACINFO_define - lineno: 1 macro: M1 Value1
+TEST_MACINFO:   DW_MACINFO_start_file - lineno: 2 filenum: 3
+TEST_MACINFO:     DW_MACINFO_undef - lineno: 4 macro: M1
+TEST_MACINFO:     DW_MACINFO_define - lineno: 5 macro: M1 NewValue1
+TEST_MACINFO:   DW_MACINFO_end_file
+TEST_MACINFO:   DW_MACINFO_define - lineno: 3 macro: M2(x,y) ((x)+(y)* Value2)
+TEST_MACINFO: DW_MACINFO_end_file
+
+TEST_LINE: .debug_line contents:
+TEST_LINE: file_names[  1]    0 0x00000000 0x00000000 dwarfdump-macro.cc
+TEST_LINE: file_names[  2]    1 0x00000000 0x00000000 dwarfdump-macro-cmd.h
+TEST_LINE: file_names[  3]    0 0x00000000 0x00000000 dwarfdump-macro.h
diff --git a/test/DebugInfo/dwarfdump-accel.test b/test/DebugInfo/dwarfdump-accel.test
index c5c3b0154c11..7c1838829562 100644
--- a/test/DebugInfo/dwarfdump-accel.test
+++ b/test/DebugInfo/dwarfdump-accel.test
@@ -57,7 +57,7 @@ CHECK-NOT: Magic
 Check ObjC specific accelerators.
 CHECK: .apple_objc contents:
 CHECK:     Name{{.*}}"TestInterface"
-CHECK-NOT Name
+CHECK-NOT: Name
 CHECK:     {Atom[0]: [[READONLY]]}
 CHECK:     {Atom[0]: [[ASSIGN]]}
 CHECK:     {Atom[0]: [[SETASSIGN]]}
diff --git a/test/DebugInfo/dwarfdump-dump-flags.test b/test/DebugInfo/dwarfdump-dump-flags.test
index 92b2d50f393b..4c10bede6f83 100644
--- a/test/DebugInfo/dwarfdump-dump-flags.test
+++ b/test/DebugInfo/dwarfdump-dump-flags.test
@@ -1,6 +1,9 @@
 ; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
 ; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
 ; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.elf-x86-64 -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=all | FileCheck %s -check-prefix DUMP_ALL
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=info | FileCheck %s -check-prefix DUMP_INFO
+; RUN: llvm-dwarfdump %p/Inputs/dwarfdump-test.macho-i386.o -debug-dump=ranges | FileCheck %s -check-prefix DUMP_RANGES
 
 ; DUMP_ALL: .debug_info
 ; DUMP_ALL: .debug_ranges
diff --git a/test/DebugInfo/dwarfdump-dwp.test b/test/DebugInfo/dwarfdump-dwp.test
new file mode 100644
index 000000000000..8aef636d4d9a
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-dwp.test
@@ -0,0 +1,53 @@
+RUN: llvm-dwarfdump %p/Inputs/dwarfdump-dwp.x86_64.o | FileCheck %s
+
+; Testing the following simple dwp file:
+; a.cpp:
+;   struct foo { };
+;   foo a;
+; b.cpp:
+;   struct bar { };
+;   bar b() {
+;   }
+
+; CHECK-LABEL: .debug_info.dwo contents:
+; CHECK: Compile Unit
+
+; Verify that the second CU uses the index for its abbrev offset
+; CHECK: Compile Unit
+; CHECK-SAME: abbr_offset = 0x0043
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG
+; CHECK:   DW_AT_name {{.*}} "b.cpp"
+
+; Verify that abbreviations are decoded using the abbrev offset in the index
+; CHECK:   DW_TAG_structure_type
+; CHECK:   DW_TAG_subprogram
+
+; CHECK-LABEL: .debug_types.dwo contents:
+; CHECK: Type Unit
+; CHECK:   DW_TAG_type_unit
+; CHECK:     DW_AT_stmt_list {{.*}}(0x00000000)
+; CHECK:     DW_TAG_structure_type
+; CHECK:       DW_AT_decl_file {{.*}} ("a.cpp")
+; CHECK: Type Unit
+; CHECK:   DW_TAG_type_unit
+; CHECK:     DW_AT_stmt_list {{.*}}(0x00000000)
+; CHECK:     DW_TAG_structure_type
+; CHECK:       DW_AT_decl_file {{.*}} ("b.cpp")
+
+; CHECK: .debug_cu_index contents:
+; CHECK-NEXT: version = 2 slots = 16
+; CHECK:      Index Signature          INFO                     ABBREV                   LINE                     STR_OFFSETS
+; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------
+; CHECK-NEXT:     3 0xfef104c25502f092 [0x0000002d, 0x0000005f) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+; CHECK-NEXT:     9 0x03c30756e2d45008 [0x00000000, 0x0000002d) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+
+; CHECK: .debug_tu_index contents:
+; CHECK-NEXT: version = 2 slots = 16
+; CHECK:      Index Signature          TYPES                    ABBREV                   LINE                     STR_OFFSETS
+; CHECK-NEXT: ----- ------------------ ------------------------ ------------------------ ------------------------ ------------------------
+; CHECK-NEXT:     9 0x1d02f3be30cc5688 [0x00000024, 0x00000048) [0x00000043, 0x0000008e) [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+; CHECK-NEXT:    13 0x3875c0e21cda63fc [0x00000000, 0x00000024) [0x00000000, 0x00000043) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+
+; TODO: use the index section offset info to correctly dump strings in debug info
+; TODO: use the index section offset info to correctly dump file names in debug info
diff --git a/test/DebugInfo/dwarfdump-macho-relocs.test b/test/DebugInfo/dwarfdump-macho-relocs.test
new file mode 100644
index 000000000000..95798a841caf
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-macho-relocs.test
@@ -0,0 +1,27 @@
+// RUN: llvm-dwarfdump -debug-dump=info %p/Inputs/dwarfdump-macho-relocs.macho.x86_64.o | FileCheck %s
+
+// The dumped file has 2 functions in different sections of the __TEXT segment.
+// Check that the addresses are are dumped correctly
+
+// Compiled with: clang -x c -g -c -o dwarfdump-macho-relocs.macho.x86_64.o dwarfdump-macho-relocs.test
+
+__attribute__((section("__TEXT,__blah")))
+int foo() {
+        return 42;
+}
+
+// CHECK:  DW_TAG_subprogram
+// CHECK-NEXT:    DW_AT_low_pc{{.*}}0x0000000000000020
+// CHECK-NEXT:    DW_AT_high_pc{{.*}}0x000000000000002b
+// CHECK-NEXT:    DW_AT_frame_base
+// CHECK-NEXT:    DW_AT_name{{.*}}"foo"
+
+int main() {
+        return foo();
+}
+
+// CHECK:  DW_TAG_subprogram
+// CHECK-NEXT:    DW_AT_low_pc{{.*}}0x0000000000000000
+// CHECK-NEXT:    DW_AT_high_pc{{.*}}0x000000000000001a
+// CHECK-NEXT:    DW_AT_frame_base
+// CHECK-NEXT:    DW_AT_name{{.*}}"main"
diff --git a/test/DebugInfo/dwarfdump-macho-universal.test b/test/DebugInfo/dwarfdump-macho-universal.test
new file mode 100644
index 000000000000..0b4777317d7a
--- /dev/null
+++ b/test/DebugInfo/dwarfdump-macho-universal.test
@@ -0,0 +1,17 @@
+Reuse a test input from llvm-dsymutil, it's perfect for what we want to exercise
+RUN: llvm-dwarfdump %S/Inputs/fat-test.o -debug-dump=info | FileCheck %s
+
+CHECK: fat-test.o (x86_64):	file format Mach-O 64-bit x86-64
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "x86_64_var"
+
+CHECK: fat-test.o (i386):	file format Mach-O 32-bit i386
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "i386_var"
+
+CHECK: fat-test.o (x86_64h):	file format Mach-O 64-bit x86-64
+CHECK: DW_TAG_variable
+CHECK-NOT: {{NULL|DW_TAG}}
+CHECK: DW_AT_name {{.*}} "x86_64h_var"
diff --git a/test/DebugInfo/dwo.ll b/test/DebugInfo/dwo.ll
new file mode 100644
index 000000000000..fd9aa16484c9
--- /dev/null
+++ b/test/DebugInfo/dwo.ll
@@ -0,0 +1,15 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK-NOT: DW_AT_GNU_dwo_name
+; REQUIRES: default_triple
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/DebugInfo/gvn.ll b/test/DebugInfo/gvn.ll
deleted file mode 100644
index 3ca3663bd831..000000000000
--- a/test/DebugInfo/gvn.ll
+++ /dev/null
@@ -1,135 +0,0 @@
-; RUN: opt < %s -O2 -gvn -S | FileCheck %s
-;
-; Produced at -O2 from:
-; struct context {
-;   int cur_pid
-; };
-; int a, b, c, f, d;
-; int pid_for_task(int);
-; sample(struct context *p1)
-; {
-;   if (c)
-;     b = a;
-;   if (a && p1->cur_pid)
-;     sample_internal();
-; }
-; callback() {
-;   f = pid_for_task(d);
-;   sample(&f);
-; }
-
-target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
-target triple = "arm64-apple-ios"
-
-%struct.context = type { i32 }
-
-@c = common global i32 0, align 4
-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-@d = common global i32 0, align 4
-@f = common global i32 0, align 4
-
-; Function Attrs: nounwind
-declare i32 @sample_internal(...)
-
-; Function Attrs: nounwind
-define i32 @callback() #0 {
-entry:
-  %0 = load i32, i32* @d, align 4, !dbg !37
-
-  ; Verify that the call still has a debug location after GVN.
-  ; CHECK: %call = tail call i32 @pid_for_task(i32 %0) #{{[0-9]}}, !dbg
-  %call = tail call i32 @pid_for_task(i32 %0) #3, !dbg !37
-
-  store i32 %call, i32* @f, align 4, !dbg !37
-  tail call void @llvm.dbg.value(metadata %struct.context* bitcast (i32* @f to %struct.context*), i64 0, metadata !25, metadata !26) #3, !dbg !38
-  %1 = load i32, i32* @c, align 4, !dbg !40
-  %tobool.i = icmp eq i32 %1, 0, !dbg !40
-  %.pr.i = load i32, i32* @a, align 4, !dbg !41
-  br i1 %tobool.i, label %if.end.i, label %if.then.i, !dbg !42
-
-if.then.i:                                        ; preds = %entry
-  store i32 %.pr.i, i32* @b, align 4, !dbg !43
-  br label %if.end.i, !dbg !43
-
-if.end.i:                                         ; preds = %if.then.i, %entry
-  %tobool1.i = icmp eq i32 %.pr.i, 0, !dbg !41
-
-  ; This instruction has no debug location -- in this
-  ; particular case it was removed by a bug in SimplifyCFG.
-  %2 = load i32, i32* @f, align 4
-
-  ; GVN is supposed to replace the load of @f with a direct reference to %call.
-  ; CHECK: %tobool2.i = icmp eq i32 %call, 0, !dbg
-  %tobool2.i = icmp eq i32 %2, 0, !dbg !41
-
-  %or.cond = or i1 %tobool1.i, %tobool2.i, !dbg !41
-  br i1 %or.cond, label %sample.exit, label %if.then.3.i, !dbg !41
-
-if.then.3.i:                                      ; preds = %if.end.i
-  %call.i = tail call i32 bitcast (i32 (...)* @sample_internal to i32 ()*)() #3, !dbg !44
-  br label %sample.exit, !dbg !44
-
-sample.exit:                                      ; preds = %if.end.i, %if.then.3.i
-  ret i32 undef, !dbg !45
-}
-
-declare i32 @pid_for_task(i32) #1
-
-; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
-
-attributes #0 = { nounwind }
-attributes #2 = { nounwind readnone }
-attributes #3 = { nounwind }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!22, !23}
-!llvm.ident = !{!24}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !16)
-!1 = !DIFile(filename: "test.c", directory: "/")
-!2 = !{}
-!3 = !{!4, !13}
-!4 = !DISubprogram(name: "sample", scope: !5, file: !5, line: 6, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!5 = !DIFile(filename: "test.i", directory: "/")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !9}
-!8 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 64)
-!10 = !DICompositeType(tag: DW_TAG_structure_type, name: "context", file: !5, line: 1, size: 32, align: 32, elements: !11)
-!11 = !{!12}
-!12 = !DIDerivedType(tag: DW_TAG_member, name: "cur_pid", scope: !10, file: !5, line: 2, baseType: !8, size: 32, align: 32)
-!13 = !DISubprogram(name: "callback", scope: !5, file: !5, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, isOptimized: false, function: i32 ()* @callback, variables: !2)
-!14 = !DISubroutineType(types: !15)
-!15 = !{!8}
-!16 = !{!17, !18, !19, !20, !21}
-!17 = !DIGlobalVariable(name: "a", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @a)
-!18 = !DIGlobalVariable(name: "b", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @b)
-!19 = !DIGlobalVariable(name: "c", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @c)
-!20 = !DIGlobalVariable(name: "f", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @f)
-!21 = !DIGlobalVariable(name: "d", scope: !0, file: !5, line: 4, type: !8, isLocal: false, isDefinition: true, variable: i32* @d)
-!22 = !{i32 2, !"Dwarf Version", i32 2}
-!23 = !{i32 2, !"Debug Info Version", i32 3}
-!24 = !{!"clang version 3.8.0 (trunk 244473) (llvm/trunk 244644)"}
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "p1", arg: 1, scope: !4, file: !5, line: 6, type: !9)
-!26 = !DIExpression()
-!27 = !DILocation(line: 6, scope: !4)
-!28 = !DILocation(line: 8, scope: !29)
-!29 = distinct !DILexicalBlock(scope: !4, file: !5, line: 8)
-!30 = !DILocation(line: 10, scope: !31)
-!31 = distinct !DILexicalBlock(scope: !4, file: !5, line: 10)
-!32 = !DILocation(line: 8, scope: !4)
-!33 = !DILocation(line: 9, scope: !29)
-!34 = !DILocation(line: 10, scope: !4)
-!35 = !DILocation(line: 11, scope: !31)
-!36 = !DILocation(line: 12, scope: !4)
-!37 = !DILocation(line: 14, scope: !13)
-!38 = !DILocation(line: 6, scope: !4, inlinedAt: !39)
-!39 = distinct !DILocation(line: 15, scope: !13)
-!40 = !DILocation(line: 8, scope: !29, inlinedAt: !39)
-!41 = !DILocation(line: 10, scope: !31, inlinedAt: !39)
-!42 = !DILocation(line: 8, scope: !4, inlinedAt: !39)
-!43 = !DILocation(line: 9, scope: !29, inlinedAt: !39)
-!44 = !DILocation(line: 11, scope: !31, inlinedAt: !39)
-!45 = !DILocation(line: 16, scope: !13)
diff --git a/test/DebugInfo/llvm-symbolizer.test b/test/DebugInfo/llvm-symbolizer.test
index 8a2aaaab3a3c..6b3c0494cdd5 100644
--- a/test/DebugInfo/llvm-symbolizer.test
+++ b/test/DebugInfo/llvm-symbolizer.test
@@ -118,7 +118,9 @@ CHECK-NEXT: main
 CHECK-NEXT: /tmp{{[/\\]}}cross-cu-inlining.c:11:0
 
 RUN: echo "unexisting-file 0x1234" > %t.input2
-RUN: llvm-symbolizer < %t.input2
+RUN: llvm-symbolizer < %t.input2 2>&1 | FileCheck %s --check-prefix=MISSING-FILE
+
+MISSING-FILE: LLVMSymbolizer: error reading file: {{[Nn]}}o such file or directory
 
 RUN: echo "%p/Inputs/macho-universal 0x1f84" > %t.input3
 RUN: llvm-symbolizer < %t.input3 | FileCheck %s --check-prefix=UNKNOWN-ARCH
@@ -154,7 +156,7 @@ RUN:   | FileCheck %s --check-prefix=STRIPPED
 STRIPPED:  global_func
 
 RUN: echo "%p/Inputs/dwarfdump-test4.elf-x86-64 0x62c" > %t.input7
-RUN: llvm-symbolizer --functions=short --use-symbol-table=false --demangle=false < %t.input7 \
+RUN: llvm-symbolizer --functions=short --demangle=false < %t.input7 \
 RUN:    | FileCheck %s --check-prefix=SHORT_FUNCTION_NAME
 
 SHORT_FUNCTION_NAME-NOT: _Z1cv
diff --git a/test/DebugInfo/skeletoncu.ll b/test/DebugInfo/skeletoncu.ll
new file mode 100644
index 000000000000..d70e9333336a
--- /dev/null
+++ b/test/DebugInfo/skeletoncu.ll
@@ -0,0 +1,17 @@
+; RUN: %llc_dwarf %s -filetype=obj -o %t
+; RUN: llvm-dwarfdump -debug-dump=info %t | FileCheck %s
+; XFAIL: hexagon
+; CHECK: DW_TAG_compile_unit
+; CHECK: DW_AT_GNU_dwo_id {{.*}}abcd
+; CHECK: DW_AT_GNU_dwo_name {{.*}}"my.dwo"
+; REQUIRES: default_triple
+ 
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+
diff --git a/test/Examples/Kaleidoscope/Chapter3.test b/test/Examples/Kaleidoscope/Chapter3.test
new file mode 100644
index 000000000000..b9c8ba6fad8a
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter3.test
@@ -0,0 +1,17 @@
+# RUN: Kaleidoscope-Ch3 < %s 2>&1 | FileCheck %s
+
+# Test basic parsing and IR generation.
+def foo(x) x + 1;
+foo(1);
+
+# CHECK:      define double @foo(double %x) {
+# CHECK-NEXT: entry:
+# CHECK-NEXT:   %addtmp = fadd double %x, 1.000000e+00
+# CHECK-NEXT:   ret double %addtmp
+# CHECK-NEXT: }
+
+# CHECK:      define double @__anon_expr() {
+# CHECK-NEXT: entry:
+# CHECK-NEXT:   %calltmp = call double @foo(double 1.000000e+00)
+# CHECK-NEXT:   ret double %calltmp
+# CHECK-NEXT: }
diff --git a/test/Examples/Kaleidoscope/Chapter4.test b/test/Examples/Kaleidoscope/Chapter4.test
new file mode 100644
index 000000000000..5fd0e42c9a6d
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter4.test
@@ -0,0 +1,17 @@
+# RUN: Kaleidoscope-Ch4 < %s 2>&1 | FileCheck %s
+
+# Test basic definition, binding, and execution.
+def foo(x) x + 1;
+def bar(x) foo(2 * x);
+bar(2);
+# CHECK: Evaluated to 5.000000
+
+# Test redefinition.
+def foo(x) x + 2;
+foo(2);
+# CHECK: Evaluated to 4.000000
+
+# Verify that 'bar' still calls the original 'foo'.
+bar(2);
+# CHECK: Evaluated to 5.000000
+
diff --git a/test/Examples/Kaleidoscope/Chapter5.test b/test/Examples/Kaleidoscope/Chapter5.test
new file mode 100644
index 000000000000..1ad902378edb
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter5.test
@@ -0,0 +1,19 @@
+# RUN: Kaleidoscope-Ch5 < %s 2>&1 | FileCheck %s
+
+# Test 'if' expression.
+def foo(x) if x < 10 then 0 else 1;
+foo(9);
+foo(11);
+# CHECK: Evaluated to 0.000000
+# CHECK: Evaluated to 1.000000
+
+# Test 'for' expression.
+extern printd(x);
+for i = 1, i < 5, 1.0 in
+  printd(i);
+# CHECK: 1.0
+# CHECK: 2.0
+# CHECK: 3.0
+# CHECK: 4.0
+# CHECK: 5.0
+# CHECK: Evaluated to 0.000000
\ No newline at end of file
diff --git a/test/Examples/Kaleidoscope/Chapter6.test b/test/Examples/Kaleidoscope/Chapter6.test
new file mode 100644
index 000000000000..cbdd01f52683
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter6.test
@@ -0,0 +1,15 @@
+# RUN: Kaleidoscope-Ch6 < %s 2>&1 | FileCheck %s
+
+# Test unary operator definition.
+def unary-(x) 0 - x;
+1 + (-1);
+# CHECK: Evaluated to 0.000000
+
+# Test binary operator definition.
+def binary> 10 (lhs rhs) rhs < lhs;
+def foo(x) if x > 10 then 0 else 1;
+foo(9);
+foo(11);
+# CHECK: Evaluated to 1.000000
+# CHECK: Evaluated to 0.000000
+
diff --git a/test/Examples/Kaleidoscope/Chapter7.test b/test/Examples/Kaleidoscope/Chapter7.test
new file mode 100644
index 000000000000..4843ca703aed
--- /dev/null
+++ b/test/Examples/Kaleidoscope/Chapter7.test
@@ -0,0 +1,15 @@
+# RUN: Kaleidoscope-Ch7 < %s 2>&1 | FileCheck %s
+
+# Sequence operator and iterative fibonacci function to test user defined vars.
+def binary : 1 (x y) y;
+
+def fibi(x)
+  var a = 1, b = 1, c in
+  (for i = 3, i < x in
+     c = a + b :
+     a = b :
+     b = c) :
+  b;
+
+fibi(10);
+# CHECK: Evaluated to 55.000000
diff --git a/test/Examples/lit.local.cfg b/test/Examples/lit.local.cfg
new file mode 100644
index 000000000000..462e3dc5d11d
--- /dev/null
+++ b/test/Examples/lit.local.cfg
@@ -0,0 +1 @@
+config.unsupported = True
diff --git a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
index cd4834b3f124..27c13e75e938 100644
--- a/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
index 24d8b2ceb4f6..6233faa8725c 100644
--- a/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/eh.ll b/test/ExecutionEngine/MCJIT/eh.ll
index b301b64767c1..ed057e14512f 100644
--- a/test/ExecutionEngine/MCJIT/eh.ll
+++ b/test/ExecutionEngine/MCJIT/eh.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
index 50ed321a0d62..5c15ba4f15a8 100644
--- a/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
+++ b/test/ExecutionEngine/MCJIT/multi-module-eh-a.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -extra-module=%p/Inputs/multi-module-eh-b.ll %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcLazy/global_aliases.ll b/test/ExecutionEngine/OrcLazy/global_aliases.ll
new file mode 100644
index 000000000000..61fde4bebf8b
--- /dev/null
+++ b/test/ExecutionEngine/OrcLazy/global_aliases.ll
@@ -0,0 +1,21 @@
+; RUN: lli -jit-kind=orc-lazy %s
+;
+; Test handling of global aliases for function and variables.
+
+@x = global i32 42, align 4
+@y = alias i32, i32* @x
+
+define i32 @foo() {
+entry:
+  %0 = load i32, i32* @y, align 4
+  ret i32 %0
+}
+
+@bar = alias i32(), i32()* @foo
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+  %0 = call i32() @bar()
+  %1 = sub i32 %0, 42
+  ret i32 %1
+}
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
index 936d7eebe09d..3c05a7105092 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-lg-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=large %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, aarch64, arm
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
index 02279226dd56..8d663973cfca 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh-sm-pic.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -jit-kind=orc-mcjit -relocation-model=pic -code-model=small %s
-; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm, asan, msan
+; XFAIL: cygwin, win32, mingw, mips-, mipsel-, i686, i386, darwin, aarch64, arm
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/eh.ll b/test/ExecutionEngine/OrcMCJIT/eh.ll
index 8a1b4d8f6dfa..6b7ee69255d8 100644
--- a/test/ExecutionEngine/OrcMCJIT/eh.ll
+++ b/test/ExecutionEngine/OrcMCJIT/eh.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -jit-kind=orc-mcjit %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll b/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
index ccde9aefe8e2..d3f9dd1a2335 100644
--- a/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/multi-module-eh-a.ll
@@ -1,5 +1,5 @@
 ; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-eh-b.ll %s
-; XFAIL: arm, cygwin, win32, mingw, asan, msan
+; XFAIL: arm, cygwin, win32, mingw
 declare i8* @__cxa_allocate_exception(i64)
 declare void @__cxa_throw(i8*, i8*, i8*)
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
index 04d269e2aebf..0387b932f1c5 100644
--- a/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/MachO_ARM64_relocations.s
@@ -55,6 +55,18 @@ ldr2:
     ldr  x0, [x0, _ptr@GOTPAGEOFF]
     ret
 
+# rtdyld-check: decode_operand(add1, 2) = (tgt+8)[11:2] << 2
+    .globl  _test_explicit_addend_reloc
+    .align  4
+_test_explicit_addend_reloc:
+add1:
+    add x0, x0, tgt@PAGEOFF+8
+
+    .align  3
+tgt:
+    .long 0
+    .long 0
+    .long 7
 
 # Test ARM64_RELOC_UNSIGNED relocation. The absolute 64-bit address of the
 # function should be stored at the 8-byte memory location.
diff --git a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
index 7ff3a8975769..1f5ec6735976 100644
--- a/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/ARM/MachO_ARM_PIC_relocations.s
@@ -40,6 +40,13 @@ insn4:
         .comm   baz, 4, 2
         .comm   foo, 4, 2
 
+	.section        __DATA,__data
+	.globl  _a
+	.align  2
+# rtdyld-check: *{4}bar_ofs = bar + 4
+bar_ofs:
+	.long   bar + 4
+
 # Check that the symbol pointer section entries are fixed up properly:
 # rtdyld-check: *{4}foo$non_lazy_ptr = foo
         .section	__DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
index 1d8d293a26a4..294ea3e43668 100644
--- a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_Mips64r2N64_PIC_relocations.s
@@ -2,6 +2,10 @@
 # RUN: llc -mtriple=mips64el-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_Mips64N64.o %S/Inputs/ExternalFunction.ll
 # RUN: llvm-rtdyld -triple=mips64el-unknown-linux -verify -map-section test_ELF_Mips64N64.o,.text=0x1000 -map-section test_ELF_ExternalFunction_Mips64N64.o,.text=0x10000 -check=%s %/T/test_ELF_Mips64N64.o %T/test_ELF_ExternalFunction_Mips64N64.o
 
+# RUN: llvm-mc -triple=mips64-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_Mips64N64.o %s
+# RUN: llc -mtriple=mips64-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_Mips64N64.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64-unknown-linux -verify -map-section test_ELF_Mips64N64.o,.text=0x1000 -map-section test_ELF_ExternalFunction_Mips64N64.o,.text=0x10000 -check=%s %/T/test_ELF_Mips64N64.o %T/test_ELF_ExternalFunction_Mips64N64.o
+
 	.data
 # Test R_MIPS_PC32 relocation.
 # rtdyld-check: *{4}(R_MIPS_PC32) = (foo - R_MIPS_PC32)[31:0]
@@ -39,13 +43,15 @@ bar:
 	sd	$4, 8($fp)
 
 # Test R_MIPS_26 relocation.
-# rtdyld-check:  decode_operand(insn1, 0)[25:0] = foo
+# rtdyld-check:  decode_operand(insn1, 0)[27:0] = foo[27:0]
 insn1:
+	.option pic0
 	jal   foo
+	.option pic2
 	nop
 
 # Test R_MIPS_PC16 relocation.
-# rtdyld-check:  decode_operand(insn2, 1)[15:0] = foo - insn2
+# rtdyld-check:  decode_operand(insn2, 1)[17:0] = (foo - insn2)[17:0]
 insn2:
 	bal   foo
 	nop
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s
new file mode 100644
index 000000000000..2d5d09a0ab34
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_N64R6_relocations.s
@@ -0,0 +1,54 @@
+# RUN: llvm-mc -triple=mips64el-unknown-linux -mcpu=mips64r6 -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_N64R6.o %s
+# RUN: llc -mtriple=mips64el-unknown-linux -mcpu=mips64r6 -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_N64R6.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64el-unknown-linux -mcpu=mips64r6 -verify -map-section test_ELF_N64R6.o,.text=0x1000 -map-section test_ELF_ExternalFunction_N64R6.o,.text=0x10000 -check=%s %/T/test_ELF_N64R6.o %T/test_ELF_ExternalFunction_N64R6.o
+
+# RUN: llvm-mc -triple=mips64-unknown-linux -mcpu=mips64r6 -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_N64R6.o %s
+# RUN: llc -mtriple=mips64-unknown-linux -mcpu=mips64r6 -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_N64R6.o %S/Inputs/ExternalFunction.ll
+# RUN: llvm-rtdyld -triple=mips64-unknown-linux -mcpu=mips64r6 -verify -map-section test_ELF_N64R6.o,.text=0x1000 -map-section test_ELF_ExternalFunction_N64R6.o,.text=0x10000 -check=%s %/T/test_ELF_N64R6.o %T/test_ELF_ExternalFunction_N64R6.o
+
+	.text
+	.abicalls
+	.nan	2008
+	.text
+	.set	nomicromips
+	.set	nomips16
+	.set	noreorder
+	.set	nomacro
+	.set	noat
+
+	.align	3
+	.globl	bar
+	.type	bar,@function
+
+bar:
+# Test R_MIPS_PC18_S3 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PC18_S3, 1)[20:0] = (foo - R_MIPS_PC18_S3)[20:0]
+R_MIPS_PC18_S3:
+	ldpc $6,foo
+
+# Test R_MIPS_PC19_S2 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PC19_S2, 1)[20:0] = (foo - R_MIPS_PC19_S2)[20:0]
+R_MIPS_PC19_S2:
+	lwpc $6,foo
+
+# Test R_MIPS_PC21_S2 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PC21_S2, 1)[22:0] = (foo - next_pc(R_MIPS_PC21_S2))[22:0]
+R_MIPS_PC21_S2:
+	bnezc	$5,foo
+
+# Test R_MIPS_PC26_S2 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PC26_S2, 0)[27:0] = (foo - next_pc(R_MIPS_PC26_S2))[27:0]
+R_MIPS_PC26_S2:
+	balc	foo
+
+# Test R_MIPS_PCHI16 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PCHI16, 1)[15:0] = (foo - R_MIPS_PCHI16 + 0x8000)[31:16]
+R_MIPS_PCHI16:
+	aluipc $5, %pcrel_hi(foo)
+
+# Test R_MIPS_PCLO16 relocation.
+# rtdyld-check:  decode_operand(R_MIPS_PCLO16, 2)[15:0] = (foo - R_MIPS_PCLO16)[15:0]
+R_MIPS_PCLO16:
+	addiu  $5, $5, %pcrel_lo(foo)
+
+	.size	bar, .-bar
diff --git a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
index 6c47262246ab..e4b51002e65f 100644
--- a/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/Mips/ELF_O32_PIC_relocations.s
@@ -1,10 +1,10 @@
 # RUN: llvm-mc -triple=mipsel-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
 # RUN: llc -mtriple=mipsel-unknown-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
-# RUN: llvm-rtdyld -triple=mipsel-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+# RUN: llvm-rtdyld -triple=mipsel-unknown-linux -verify -map-section test_ELF_O32.o,"<common symbols>"=0x7FF8 -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
 
 # RUN: llvm-mc -triple=mips-unknown-linux -relocation-model=pic -code-model=small -filetype=obj -o %T/test_ELF_O32.o %s
 # RUN: llc -mtriple=mips-unknown-linux -relocation-model=pic -filetype=obj -o %/T/test_ELF_ExternalFunction_O32.o %S/Inputs/ExternalFunction.ll
-# RUN: llvm-rtdyld -triple=mips-unknown-linux -verify -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
+# RUN: llvm-rtdyld -triple=mips-unknown-linux -verify -map-section test_ELF_O32.o,"<common symbols>"=0x7FF8 -map-section test_ELF_O32.o,.text=0x1000 -map-section test_ELF_ExternalFunction_O32.o,.text=0x10000 -check=%s %T/test_ELF_O32.o %T/test_ELF_ExternalFunction_O32.o
 
         .data
 # rtdyld-check: *{4}R_MIPS_32 = foo[31:0]
@@ -52,4 +52,13 @@ R_MIPS_HI16:
 R_MIPS_LO16:
 	lui	$1, %lo(foo)
 
+# rtdyld-check:  decode_operand(R_MIPS_HI16_ADDEND, 1)[15:0] = (var+0x8008)[31:16]
+R_MIPS_HI16_ADDEND:
+	lui	$2, %hi(var+8)
+
+# rtdyld-check:  decode_operand(R_MIPS_LO16_ADDEND, 2)[15:0] = (var+0x8)[15:0]
+R_MIPS_LO16_ADDEND:
+	lb	$2, %lo(var+8)($2)
+
 	.size	bar, .-bar
+	.comm	var,9,1
diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg b/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..5d33887ff0a4
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s
new file mode 100644
index 000000000000..ef40259c7b01
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/PowerPC/ppc32_elf_rel_addr16.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple=powerpc-unknown-linux-gnu -filetype=obj -o %T/ppc32_elf_rel_addr16.o %s
+# RUN: llvm-rtdyld -triple=powerpc-unknown-linux-gnu -verify -check=%s %T/ppc32_elf_rel_addr16.o
+	.text
+	.file	"ppc32_elf_rel_addr16.ll"
+	.globl	lookup
+	.align	2
+	.type	lookup,@function
+lookup:                                 # @lookup
+.Lfunc_begin0:
+# BB#0:
+	stw 31, -4(1)
+	stwu 1, -16(1)
+insn_hi:
+# Check the higher 16-bits of the symbol's absolute address
+# rtdyld-check: decode_operand(insn_hi, 1) = elements[31:16]
+	lis 4, elements@ha
+	slwi 3, 3, 2
+	mr 31, 1
+insn_lo:
+# Check the lower 16-bits of the symbol's absolute address
+# rtdyld-check: decode_operand(insn_lo, 2) = elements[15:0]
+	la 4, elements@l(4)
+	lwzx 3, 4, 3
+	addi 1, 1, 16
+	lwz 31, -4(1)
+	blr
+.Lfunc_end0:
+	.size	lookup, .Lfunc_end0-.Lfunc_begin0
+
+	.type	elements,@object        # @elements
+	.data
+	.globl	elements
+	.align	2
+elements:
+	.long	14                      # 0xe
+	.long	4                       # 0x4
+	.long	1                       # 0x1
+	.long	3                       # 0x3
+	.long	13                      # 0xd
+	.long	0                       # 0x0
+	.long	32                      # 0x20
+	.long	334                     # 0x14e
+	.size	elements, 32
+
+
+	.ident	"clang version 3.7.0 "
+	.section	".note.GNU-stack","",@progbits
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s b/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s
new file mode 100644
index 000000000000..0a96f7025011
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/COFF_i386.s
@@ -0,0 +1,66 @@
+// RUN: llvm-mc -triple i686-windows -filetype obj -o %t.obj %s
+// RUN: llvm-rtdyld -triple i686-windows -dummy-extern _OutputDebugStringA@4=0xfffffffe -dummy-extern _ExitProcess@4=0xffffffff -verify -check=%s %t.obj
+
+	.text
+
+	.def _main
+		.scl 2
+		.type 32
+	.endef
+	.global _main
+_main:
+rel1:
+	call _function				// IMAGE_REL_I386_REL32
+# rtdyld-check: decode_operand(rel1, 0) = (_function-_main-4-1)
+	xorl %eax, %eax
+	retl
+
+	.def _function
+		.scl 2
+		.type 32
+	.endef
+_function:
+rel2:
+	pushl string
+rel3:
+	calll *__imp__OutputDebugStringA	// IMAGE_REL_I386_DIR32
+# rtdyld-check: decode_operand(rel3, 3) = __imp__OutputDebugStringA
+	addl  $4, %esp
+	pushl $0
+rel4:
+	calll *__imp__ExitProcess		// IMAGE_REL_I386_DIR32
+# rtdyld-check: decode_operand(rel4, 3) = __imp__ExitProcess
+	addl  $4, %esp
+	retl
+
+	.data
+
+	.global __imp__OutputDebugStringA
+	.align 4
+__imp__OutputDebugStringA:
+	.long "_OutputDebugStringA@4"		// IMAGE_REL_I386_DIR32
+# rtdyld-check: *{4}__imp__OutputDebugStringA = 0xfffffffe
+
+	.global __imp__ExitProcess
+	.align 4
+__imp__ExitProcess:
+	.long "_ExitProcess@4"			// IMAGE_REL_I386_DIR32
+# rtdyld-check: *{4}__imp__ExitProcess = 0xffffffff
+
+	.global string
+	.align 1
+string:
+	.asciz "Hello World!\n"
+
+	.global relocations
+relocations:
+rel5:
+	.long _function@imgrel			// IMAGE_REL_I386_DIR32NB
+# rtdyld-check: *{4}rel5 = _function - section_addr(COFF_i386.s.tmp.obj, .text)
+rel6:
+# rtdyld-check: *{2}rel6 = 1
+	.secidx __imp__OutputDebugStringA	// IMAGE_REL_I386_SECTION
+rel7:
+# rtdyld-check: *{4}rel7 = relocations - section_addr(COFF_i386.s.tmp.obj, .data)
+	.secrel32 relocations			// IMAGE_REL_I386_SECREL
+
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64 b/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s
similarity index 80%
rename from test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64
rename to test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s
index 7029cf437c5e..a865bdbfc4c4 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64
+++ b/test/ExecutionEngine/RuntimeDyld/X86/COFF_x86_64.s
@@ -28,4 +28,13 @@ inst1:
 .Ltmp2:
 	.seh_endproc
 
+        .data
+	.globl  x                       # @x
+# rtdyld-check: *{8}x = F
+x:
+	.quad	F
 
+# Make sure the JIT doesn't bail out on BSS sections.
+        .bss
+bss_check:
+        .fill 8, 1, 0
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s
new file mode 100644
index 000000000000..50cc65079bd0
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_STT_FILE.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_FILE_x86-64.o %p/Inputs/ELF_STT_FILE_FILE.s
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_GLOBAL_x86-64.o %p/Inputs/ELF_STT_FILE_GLOBAL.s
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_STT_FILE_x86-64.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify %T/test_ELF_STT_FILE_GLOBAL_x86-64.o %T/test_ELF_STT_FILE_FILE_x86-64.o %T/test_ELF_STT_FILE_x86-64.o
+
+# Test that RTDyldELF ignores STT_FILE symbols, and in particular does
+# crash if we are relocating against a symbol that happens to have the
+# same name as an STT_FILE symbol.
+
+_main:
+    movq    foo.c@GOTPCREL(%rip), %rax
+    movq    bar.c@GOTPCREL(%rip), %rax
+    movq    $0, %rax
+    retq
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s
new file mode 100644
index 000000000000..7df9225359fd
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x64-64_PC8_relocations.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=x86_64-pc-linux -relocation-model=pic -filetype=obj -o %T/test_ELF_x86-64_PC8.o %s
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -map-section test_ELF_x86-64_PC8.o,.text.bar=0x10000 -map-section test_ELF_x86-64_PC8.o,.text.baz=0x10040 %T/test_ELF_x86-64_PC8.o
+# RUN: llvm-rtdyld -triple=x86_64-pc-linux -verify -map-section test_ELF_x86-64_PC8.o,.text.baz=0x10000 -map-section test_ELF_x86-64_PC8.o,.text.bar=0x10040 %T/test_ELF_x86-64_PC8.o
+
+# Test that R_X86_64_PC8 relocation works.
+
+  .section .text.bar,"ax"
+	.align	16, 0x90
+	.type	bar,@function
+bar:
+	retq
+.Ltmp1:
+	.size	bar, .Ltmp1-bar
+
+  .section .text.baz,"ax"
+	.align	16, 0x90
+	.type	baz,@function
+baz:
+  movq  %rdi, %rcx
+  jrcxz bar
+	retq
+.Ltmp2:
+	.size	baz, .Ltmp2-baz
+
+
+	.section	".note.GNU-stack","",@progbits
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s
new file mode 100644
index 000000000000..8bcba2c4bac7
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/ELF_x86_64_StubBuf.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=x86_64-apple-macosx10.10.0 -filetype=obj -o %T/test_ELF_x86_64_StubBuf.o %s
+# RUN: llvm-rtdyld -print-alloc-requests -triple=x86_64-pc-linux -dummy-extern _g=196608 -verify %T/test_ELF_x86_64_StubBuf.o
+
+# Compiled from Inputs/ELF/ELF_x86_64_StubBuf.ll
+
+# CHECK: allocateCodeSection(Size = 42, Alignment = 16, SectionName = __text)
+
+	.section	__TEXT,__text,regular,pure_instructions
+	.macosx_version_min 10, 10
+	.globl	_f
+	.align	4, 0x90
+_f:                                     ## @f
+	.cfi_startproc
+## BB#0:                                ## %entry
+	pushq	%rax
+Ltmp0:
+	.cfi_def_cfa_offset 16
+	callq	_g
+	callq	_g
+	callq	_g
+	popq	%rax
+	retq
+	.cfi_endproc
+
+
+.subsections_via_symbols
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s
new file mode 100644
index 000000000000..7280c51b5867
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_FILE.s
@@ -0,0 +1,3 @@
+.file "foo.c"
+.global bar.c
+bar.c:
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s
new file mode 100644
index 000000000000..a7e5342746e2
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_STT_FILE_GLOBAL.s
@@ -0,0 +1,2 @@
+.global foo.c
+foo.c:
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll
new file mode 100644
index 000000000000..625b487b8af3
--- /dev/null
+++ b/test/ExecutionEngine/RuntimeDyld/X86/Inputs/ELF_x86_64_StubBuf.ll
@@ -0,0 +1,12 @@
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+declare void @g()
+
+define void @f() {
+ entry:
+  call void @g()
+  call void @g()
+  call void @g()
+  ret void
+}
diff --git a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
index 2ef8cc439df1..c1762ab0be21 100644
--- a/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
+++ b/test/ExecutionEngine/RuntimeDyld/X86/MachO_x86-64_PIC_relocations.s
@@ -57,4 +57,22 @@ z1:
 z2:
         .quad   ds2
 
+# Test absolute symbols.
+# rtdyld-check: abssym = 0xdeadbeef
+        .globl  abssym
+abssym = 0xdeadbeef
+
+	# Test subtractor relocations.
+# rtdyld-check: *{8}z3 = z4 - z5 + 4
+z3:
+        .quad  z4 - z5 + 4
+
+        .section        __DATA,_tmp1
+z4:
+        .byte 1
+
+        .section        __DATA,_tmp2
+z5:
+        .byte 1
+
 .subsections_via_symbols
diff --git a/test/ExecutionEngine/lit.local.cfg b/test/ExecutionEngine/lit.local.cfg
index f6673df3c358..88e3e9b23883 100644
--- a/test/ExecutionEngine/lit.local.cfg
+++ b/test/ExecutionEngine/lit.local.cfg
@@ -1,4 +1,4 @@
-if config.root.host_arch in ['PowerPC', 'AArch64', 'SystemZ']:
+if config.root.host_arch in ['Sparc', 'PowerPC', 'AArch64', 'SystemZ']:
     config.unsupported = True
 
 # CMake and autoconf diverge in naming or host_arch
@@ -12,6 +12,9 @@ if 'aarch64' in config.root.target_triple \
 if 'hexagon' in config.root.target_triple:
     config.unsupported = True
 
+if 'sparc' in config.root.target_triple:
+    config.unsupported = True
+
 # ExecutionEngine tests are not expected to pass in a cross-compilation setup.
 if 'native' not in config.available_features:
     config.unsupported = True
diff --git a/test/Feature/OperandBundles/adce.ll b/test/Feature/OperandBundles/adce.ll
new file mode 100644
index 000000000000..a729ba710689
--- /dev/null
+++ b/test/Feature/OperandBundles/adce.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -adce < %s | FileCheck %s
+
+; While it is normally okay to DCE out calls to @readonly_function and
+; @readnone_function, we cannot do that if they're carrying operand
+; bundles since the presence of unknown operand bundles implies
+; arbitrary memory effects.
+
+declare void @readonly_function() readonly nounwind
+declare void @readnone_function() readnone nounwind
+
+define void @test0() {
+; CHECK-LABEL: @test0(
+ entry:
+  call void @readonly_function() [ "tag"() ]
+; CHECK: call void @readonly_function
+  ret void
+}
+
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+  call void @readnone_function() [ "tag"() ]
+; CHECK: call void @readnone_function
+  ret void
+}
+
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+; CHECK-NOT: @readonly_function(
+  call void @readonly_function() readonly [ "tag"() ]
+  ret void
+}
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+ entry:
+; CHECK-NOT: @readnone_function(
+  call void @readnone_function() readnone [ "tag"() ]
+  ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4(
+ entry:
+; CHECK-NOT: @readonly_function()
+  call void @readonly_function() [ "deopt"() ]
+  ret void
+}
diff --git a/test/Feature/OperandBundles/basic-aa-argmemonly.ll b/test/Feature/OperandBundles/basic-aa-argmemonly.ll
new file mode 100644
index 000000000000..aa9445886060
--- /dev/null
+++ b/test/Feature/OperandBundles/basic-aa-argmemonly.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+declare void @argmemonly_function(i32 *) argmemonly
+
+define i32 @test0(i32* %P, i32* noalias %P2) {
+; CHECK-LABEL: @test0(
+  %v1 = load i32, i32* %P
+; CHECK: %v1 = load i32, i32* %P
+  call void @argmemonly_function(i32* %P2) [ "tag"() ]
+; CHECK: call void @argmemonly_function(
+  %v2 = load i32, i32* %P
+; CHECK: %v2 = load i32, i32* %P
+  %diff = sub i32 %v1, %v2
+; CHECK: %diff = sub i32 %v1, %v2
+  ret i32 %diff
+; CHECK: ret i32 %diff
+}
+
+define i32 @test1(i32* %P, i32* noalias %P2) {
+; CHECK-LABEL: @test1(
+  %v1 = load i32, i32* %P
+  call void @argmemonly_function(i32* %P2) argmemonly [ "tag"() ]
+; CHECK: call void @argmemonly_function(
+  %v2 = load i32, i32* %P
+  %diff = sub i32 %v1, %v2
+  ret i32 %diff
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i32* %P, i32* noalias %P2) {
+; Note: in this test we //can// GVN %v1 and %v2 into one value in theory.  Calls
+; with deopt operand bundles are not argmemonly because they *read* the entire
+; heap, but they don't write to any location in the heap if the callee does not
+; deoptimize the caller.  This fact, combined with the fact that
+; @argmemonly_function is, well, an argmemonly function, can be used to conclude
+; that %P is not written to at the callsite.  However LLVM currently cannot
+; describe the "does not write to non-args, and reads the entire heap" effect on
+; a callsite.
+
+; CHECK-LABEL: @test2(
+  %v1 = load i32, i32* %P
+; CHECK: %v1 = load i32, i32* %P
+  call void @argmemonly_function(i32* %P2) [ "deopt"() ]
+; CHECK: call void @argmemonly_function(
+  %v2 = load i32, i32* %P
+; CHECK: %v2 = load i32, i32* %P
+  %diff = sub i32 %v1, %v2
+; CHECK: %diff = sub i32 %v1, %v2
+  ret i32 %diff
+; CHECK: ret i32 %diff
+}
diff --git a/test/Feature/OperandBundles/dse.ll b/test/Feature/OperandBundles/dse.ll
new file mode 100644
index 000000000000..9ddf7f02e384
--- /dev/null
+++ b/test/Feature/OperandBundles/dse.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @f()
+declare noalias i8* @malloc(i32) nounwind
+
+define void @test_0() {
+; CHECK-LABEL: @test_0(
+  %m = call i8* @malloc(i32 24)
+  tail call void @f() [ "unknown"(i8* %m) ]
+; CHECK: store i8 -19, i8* %m
+  store i8 -19, i8* %m
+  ret void
+}
+
+define i8* @test_1() {
+; CHECK-LABEL: @test_1(
+  %m = call i8* @malloc(i32 24)
+  tail call void @f() [ "unknown"(i8* %m) ]
+  store i8 -19, i8* %m
+  tail call void @f()
+  store i8 101, i8* %m
+
+; CHECK: tail call void @f() [ "unknown"(i8* %m) ]
+; CHECK: store i8 -19, i8* %m
+; CHECK: tail call void @f()
+; CHECK: store i8 101, i8* %m
+
+  ret i8* %m
+}
+
+define void @test_2() {
+; Since the deopt operand bundle does not escape %m (see caveat below), it is
+; legal to elide the final store that location.
+
+; CHECK-LABEL: @test_2(
+  %m = call i8* @malloc(i32 24)
+  tail call void @f() [ "deopt"(i8* %m) ]
+  store i8 -19, i8* %m
+  ret void
+
+; CHECK:  tail call void @f() [ "deopt"(i8* %m) ]
+; CHECK-NEXT  ret void
+}
+
+define i8* @test_3() {
+; Since the deopt operand bundle does not escape %m (see caveat below), @f
+; cannot observe the stores to %m
+
+; CHECK-LABEL: @test_3(
+  %m = call i8* @malloc(i32 24)
+  tail call void @f() [ "deopt"(i8* %m) ]
+  store i8 -19, i8* %m
+  tail call void @f()
+  store i8 101, i8* %m
+  ret i8* %m
+}
+
+
+; Caveat: technically, %m can only escape if the calling function is deoptimized
+; at the call site (i.e. the call returns to the "deopt" continuation).  Since
+; the calling function body will be invalidated in that case, the calling
+; function can be optimized under the assumption that %m does not escape.
diff --git a/test/Feature/OperandBundles/early-cse.ll b/test/Feature/OperandBundles/early-cse.ll
new file mode 100644
index 000000000000..fc201479d8ce
--- /dev/null
+++ b/test/Feature/OperandBundles/early-cse.ll
@@ -0,0 +1,89 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+; While it is normally okay to do memory optimizations over calls to
+; @readonly_function and @readnone_function, we cannot do that if
+; they're carrying unknown operand bundles since the presence of
+; unknown operand bundles implies arbitrary memory effects.
+
+declare void @readonly_function() readonly nounwind
+declare void @readnone_function() readnone nounwind
+
+define i32 @test0(i32* %x) {
+; CHECK-LABEL: @test0(
+ entry:
+  store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+  call void @readonly_function() [ "tag"() ]
+; CHECK: call void @readonly_function()
+
+  %v = load i32, i32* %x
+; CHECK: %v = load i32, i32* %x
+; CHECK: ret i32 %v
+  ret i32 %v
+}
+
+define i32 @test1(i32* %x) {
+; CHECK: @test1(
+ entry:
+  store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+  call void @readonly_function() readonly [ "tag"() ]
+; CHECK-NOT: call void @readonly_function
+  %v = load i32, i32* %x
+  ret i32 %v
+; CHECK: ret i32 100
+}
+
+define i32 @test3(i32* %x) {
+; CHECK-LABEL: @test3(
+ entry:
+  store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+  call void @readonly_function()
+; CHECK-NOT: call void @readonly_function
+  %v = load i32, i32* %x
+  ret i32 %v
+; CHECK: ret i32 100
+}
+
+define void @test4(i32* %x) {
+; CHECK-LABEL: @test4(
+ entry:
+  store i32 100, i32* %x
+; CHECK: store i32 100, i32* %x
+  call void @readnone_function() [ "tag"() ]
+; CHECK: call void @readnone_function
+  store i32 200, i32* %x
+; CHECK: store i32 200, i32* %x
+  ret void
+}
+
+define void @test5(i32* %x) {
+; CHECK-LABEL: @test5(
+ entry:
+  store i32 100, i32* %x
+; CHECK-NOT: store i32 100, i32* %x
+; CHECK-NOT: call void @readnone_function
+  call void @readnone_function() readnone [ "tag"() ]
+  store i32 200, i32* %x
+; CHECK: store i32 200, i32* %x
+  ret void
+}
+
+define void @test6(i32* %x) {
+; The "deopt" operand bundle does not make the call to
+; @readonly_function read-write; and so the nounwind readonly call can
+; be deleted.
+
+; CHECK-LABEL: @test6(
+ entry:
+
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  store i32 200, i32* %x
+; CHECK-NEXT:  ret void
+
+  store i32 100, i32* %x
+  call void @readonly_function() [ "deopt"() ]
+  store i32 200, i32* %x
+  ret void
+}
diff --git a/test/Feature/OperandBundles/function-attrs.ll b/test/Feature/OperandBundles/function-attrs.ll
new file mode 100644
index 000000000000..808f396fed8b
--- /dev/null
+++ b/test/Feature/OperandBundles/function-attrs.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -functionattrs < %s | FileCheck %s
+
+declare void @f_readonly() readonly
+declare void @f_readnone() readnone
+
+define void @test_0(i32* %x) {
+; FunctionAttrs must not infer readonly / readnone for %x
+
+; CHECK-LABEL: define void @test_0(i32* %x) {
+ entry:
+ ; CHECK: call void @f_readonly() [ "foo"(i32* %x) ]
+  call void @f_readonly() [ "foo"(i32* %x) ]
+  ret void
+}
+
+define void @test_1(i32* %x) {
+; FunctionAttrs must not infer readonly / readnone for %x
+
+; CHECK-LABEL: define void @test_1(i32* %x) {
+ entry:
+ ; CHECK: call void @f_readnone() [ "foo"(i32* %x) ]
+  call void @f_readnone() [ "foo"(i32* %x) ]
+  ret void
+}
+
+define void @test_2(i32* %x) {
+; The "deopt" operand bundle does not capture or write to %x.
+
+; CHECK-LABEL: define void @test_2(i32* nocapture readonly %x)
+ entry:
+  call void @f_readonly() [ "deopt"(i32* %x) ]
+  ret void
+}
diff --git a/test/Feature/OperandBundles/inliner-conservative.ll b/test/Feature/OperandBundles/inliner-conservative.ll
new file mode 100644
index 000000000000..d9f09f71d905
--- /dev/null
+++ b/test/Feature/OperandBundles/inliner-conservative.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+
+; Check that the inliner does not inline through arbitrary unknown
+; operand bundles.
+
+define i32 @callee() {
+ entry:
+  ret i32 2
+}
+
+define i32 @caller() {
+; CHECK: @caller(
+ entry:
+; CHECK: call i32 @callee() [ "unknown"() ]
+  %x = call i32 @callee() [ "unknown"() ]
+  ret i32 %x
+}
diff --git a/test/Feature/OperandBundles/merge-func.ll b/test/Feature/OperandBundles/merge-func.ll
new file mode 100644
index 000000000000..1fa6eb093084
--- /dev/null
+++ b/test/Feature/OperandBundles/merge-func.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Minor note: functions need to be at least three instructions long
+; to be considered by -mergefunc.
+
+declare i32 @foo(...)
+
+define i32 @f() {
+; CHECK-LABEL: @f(
+ entry:
+  %v0 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+  %v1 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+  %v2 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+
+; CHECK:  %v0 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+; CHECK:  %v1 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+; CHECK:  %v2 = call i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+
+  ret i32 %v2
+}
+
+define i32 @g() {
+; CHECK-LABEL: @g(
+ entry:
+  %v0 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+  %v1 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+  %v2 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+
+; CHECK:  %v0 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+; CHECK:  %v1 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+; CHECK:  %v2 = call i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+
+  ret i32 %v2
+}
+
+define i32 @f.invoke() personality i8 3 {
+; CHECK-LABEL: @f.invoke(
+ entry:
+; CHECK: %v0 = invoke i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+  %v0 = invoke i32 (...) @foo(i32 10) [ "foo"(i32 20) ]
+    to label %normal unwind label %exception
+
+ normal:
+  ret i32 %v0
+
+ exception:
+  %cleanup = landingpad i8 cleanup
+  ret i32 0
+}
+
+define i32 @g.invoke() personality i8 3 {
+; CHECK-LABEL: @g.invoke(
+ entry:
+; CHECK: %v0 = invoke i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+  %v0 = invoke i32 (...) @foo() [ "foo"(i32 10, i32 20) ]
+    to label %normal unwind label %exception
+
+ normal:
+  ret i32 %v0
+
+ exception:
+  %cleanup = landingpad i8 cleanup
+  ret i32 0
+}
diff --git a/test/Feature/OperandBundles/special-state.ll b/test/Feature/OperandBundles/special-state.ll
new file mode 100644
index 000000000000..56e337cc16b3
--- /dev/null
+++ b/test/Feature/OperandBundles/special-state.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+; This test isn't directly related to EarlyCSE or varargs.  It is just
+; using these as a vehicle for testing the correctness of
+; haveSameSpecialState around operand bundles.
+
+declare i32 @foo(...)
+
+define i32 @f() {
+; CHECK-LABEL: @f(
+ entry:
+; CHECK: %v0 = call i32 (...) @foo(
+; CHECK: %v1 = call i32 (...) @foo(
+; CHECK: %v = add i32 %v0, %v1
+; CHECK: ret i32 %v
+
+  %v0 = call i32 (...) @foo(i32 10) readonly [ "foo"(i32 20) ]
+  %v1 = call i32 (...) @foo() readonly [ "foo"(i32 10, i32 20) ]
+  %v = add i32 %v0, %v1
+  ret i32 %v
+}
diff --git a/test/Feature/alias2.ll b/test/Feature/alias2.ll
index e461854e7514..ae5bc11d71a9 100644
--- a/test/Feature/alias2.ll
+++ b/test/Feature/alias2.ll
@@ -9,20 +9,20 @@
 @v3 = global [2 x i16] zeroinitializer
 ; CHECK: @v3 = global [2 x i16] zeroinitializer
 
-@a1 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @a1 = alias bitcast (i32* @v1 to i16*)
+@a1 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @a1 = alias i16, bitcast (i32* @v1 to i16*)
 
-@a2 = alias bitcast([1 x i32]* @v2 to i32*)
-; CHECK: @a2 = alias getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
+@a2 = alias i32, bitcast([1 x i32]* @v2 to i32*)
+; CHECK: @a2 = alias i32, getelementptr inbounds ([1 x i32], [1 x i32]* @v2, i32 0, i32 0)
 
-@a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
-; CHECK: @a3 = alias addrspacecast (i32* @v1 to i32 addrspace(2)*)
+@a3 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
+; CHECK: @a3 = alias i32, addrspacecast (i32* @v1 to i32 addrspace(2)*)
 
-@a4 = alias bitcast (i32* @v1 to i16*)
-; CHECK: @a4 = alias bitcast (i32* @v1 to i16*)
+@a4 = alias i16, bitcast (i32* @v1 to i16*)
+; CHECK: @a4 = alias i16, bitcast (i32* @v1 to i16*)
 
-@a5 = thread_local(localdynamic) alias i32* @v1
-; CHECK: @a5 = thread_local(localdynamic) alias i32* @v1
+@a5 = thread_local(localdynamic) alias i32, i32* @v1
+; CHECK: @a5 = thread_local(localdynamic) alias i32, i32* @v1
 
-@a6 = alias getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
-; CHECK: @a6 = alias getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
+@a6 = alias i16, getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
+; CHECK: @a6 = alias i16, getelementptr ([2 x i16], [2 x i16]* @v3, i32 1, i32 1)
diff --git a/test/Feature/aliases.ll b/test/Feature/aliases.ll
index b3b9ceba527b..a8c95e9fd0c3 100644
--- a/test/Feature/aliases.ll
+++ b/test/Feature/aliases.ll
@@ -5,28 +5,28 @@
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32* @foo1 to i8*)], section "llvm.metadata"
 
 @bar = global i32 0
-@foo1 = alias i32* @bar
-@foo2 = alias i32* @bar
-@foo3 = alias i32* @foo2
-@foo4 = unnamed_addr alias i32* @foo2
+@foo1 = alias i32, i32* @bar
+@foo2 = alias i32, i32* @bar
+@foo3 = alias i32, i32* @foo2
+@foo4 = unnamed_addr alias i32, i32* @foo2
 
 ; Make sure the verifier does not complain about references to a global
 ; declaration from an initializer.
 @decl = external global i32
 @ptr = global i32* @decl
-@ptr_a = alias i32** @ptr
+@ptr_a = alias i32*, i32** @ptr
 
 %FunTy = type i32()
 
 define i32 @foo_f() {
   ret i32 0
 }
-@bar_f = weak_odr alias %FunTy* @foo_f
-@bar_ff = alias i32()* @bar_f
+@bar_f = weak_odr alias %FunTy, %FunTy* @foo_f
+@bar_ff = alias i32(), i32()* @bar_f
 
-@bar_i = internal alias i32* @bar
+@bar_i = internal alias i32, i32* @bar
 
-@A = alias bitcast (i32* @bar to i64*)
+@A = alias i64, bitcast (i32* @bar to i64*)
 
 define i32 @test() {
 entry:
diff --git a/test/Feature/callingconventions.ll b/test/Feature/callingconventions.ll
index 9aafb36c7573..ac4c5090a51d 100644
--- a/test/Feature/callingconventions.ll
+++ b/test/Feature/callingconventions.ll
@@ -59,4 +59,11 @@ define void @ghc_caller() {
   ret void
 }
 
+declare hhvm_ccc void @hhvm_c_callee()
+
+define hhvmcc void @hhvm_caller() {
+  call hhvm_ccc void @hhvm_c_callee()
+  ret void
+}
+
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Feature/comdat.ll b/test/Feature/comdat.ll
index c2a9d6396293..b0286c06ea0f 100644
--- a/test/Feature/comdat.ll
+++ b/test/Feature/comdat.ll
@@ -9,8 +9,8 @@ $f2 = comdat any
 @v = global i32 0, comdat($f)
 ; CHECK: @v = global i32 0, comdat($f)
 
-@a = alias i32* @v
-; CHECK: @a = alias i32* @v{{$}}
+@a = alias i32, i32* @v
+; CHECK: @a = alias i32, i32* @v{{$}}
 
 define void @f() comdat($f) {
   ret void
diff --git a/test/Feature/exception.ll b/test/Feature/exception.ll
index 7568ecfa5f75..2634692f4252 100644
--- a/test/Feature/exception.ll
+++ b/test/Feature/exception.ll
@@ -25,3 +25,112 @@ lpad:                                             ; preds = %entry
 declare void @_Z3quxv() optsize
 
 declare i32 @__gxx_personality_v0(...)
+
+define void @cleanupret0() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+pad:
+  %cp = cleanuppad within none [i7 4]
+  cleanupret from %cp unwind to caller
+exit:
+  ret void
+}
+
+; forward ref by name
+define void @cleanupret1() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+cleanup:
+  cleanupret from %cp unwind label %pad
+pad:
+  %cp = cleanuppad within none []
+  br label %cleanup
+exit:
+  ret void
+}
+
+; forward ref by ID
+define void @cleanupret2() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+cleanup:
+  cleanupret from %0 unwind label %pad
+pad:
+  %0 = cleanuppad within none []
+  br label %cleanup
+exit:
+  ret void
+}
+
+define void @catchret0() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+pad:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+  %cp = catchpad within %cs1 [i7 4]
+  catchret from %cp to label %exit
+exit:
+  ret void
+}
+
+; forward ref by name
+define void @catchret1() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+catchret:
+  catchret from %cp to label %exit
+pad:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+  %cp = catchpad within %cs1 [i7 4]
+  br label %catchret
+exit:
+  ret void
+}
+
+; forward ref by ID
+define void @catchret2() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %pad
+catchret:
+  catchret from %0 to label %exit
+pad:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+  %0 = catchpad within %cs1 [i7 4]
+  br label %catchret
+exit:
+  ret void
+}
+
+define i8 @catchpad() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  invoke void @_Z3quxv() optsize
+          to label %exit unwind label %bb2
+bb2:
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+catch:
+  catchpad within %cs1 [i7 4]
+  br label %exit
+exit:
+  ret i8 0
+}
+
+define void @cleanuppad() personality i32 (...)* @__gxx_personality_v0 {
+entry:
+  br label %try.cont
+
+try.cont:
+  invoke void @_Z3quxv() optsize
+          to label %try.cont unwind label %bb
+bb:
+  cleanuppad within none [i7 4]
+  ret void
+}
diff --git a/test/Feature/optnone-llc.ll b/test/Feature/optnone-llc.ll
index b848b19d528b..94f61efea4aa 100644
--- a/test/Feature/optnone-llc.ll
+++ b/test/Feature/optnone-llc.ll
@@ -3,11 +3,13 @@
 ; RUN: llc -O2 -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-Ox
 ; RUN: llc -O3 -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-Ox
 ; RUN: llc -misched-postra -debug %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=LLC-MORE
+; RUN: llc -O1 -debug-only=isel %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc -O1 -debug-only=isel -fast-isel=false %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOFAST
 
-; REQUIRES: asserts
+; REQUIRES: asserts, default_triple
 
 ; This test verifies that we don't run Machine Function optimizations
-; on optnone functions.
+; on optnone functions, and that we can turn off FastISel.
 
 ; Function Attrs: noinline optnone
 define i32 @_Z3fooi(i32 %x) #0 {
@@ -52,3 +54,7 @@ attributes #0 = { optnone noinline }
 
 ; Alternate post-RA scheduler.
 ; LLC-MORE: Skipping pass 'PostRA Machine Instruction Scheduler'
+
+; Selectively disable FastISel for optnone functions.
+; FAST:   FastISel is enabled
+; NOFAST: FastISel is disabled
diff --git a/test/Instrumentation/AddressSanitizer/basic.ll b/test/Instrumentation/AddressSanitizer/basic.ll
index 2490510fc761..9c4d416a1eff 100644
--- a/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/test/Instrumentation/AddressSanitizer/basic.ll
@@ -171,7 +171,7 @@ define void @memintr_test(i8* %a, i8* %b) nounwind uwtable sanitize_address {
 ; CHECK: ret void
 
 ; CHECK: define internal void @asan.module_ctor()
-; CHECK: call void @__asan_init_v5()
+; CHECK: call void @__asan_init()
 
 ; PROF
 ; CHECK: ![[PROF]] = !{!"branch_weights", i32 1, i32 100000}
diff --git a/test/Instrumentation/AddressSanitizer/debug_info.ll b/test/Instrumentation/AddressSanitizer/debug_info.ll
index 19d627857abe..0834d642df8f 100644
--- a/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -6,7 +6,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address {
+define i32 @_Z3zzzi(i32 %p) nounwind uwtable sanitize_address !dbg !5 {
 entry:
   %p.addr = alloca i32, align 4
   %r = alloca i32, align 4
@@ -33,23 +33,23 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169314)", isOptimized: true, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 169314)", isOptimized: true, emissionKind: 0, file: !16, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
 !1 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "zzz", linkageName: "_Z3zzzi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !16, scope: !6, type: !7, function: i32 (i32)* @_Z3zzzi, variables: !1)
+!5 = distinct !DISubprogram(name: "zzz", linkageName: "_Z3zzzi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !16, scope: !6, type: !7, variables: !1)
 !6 = !DIFile(filename: "a.cc", directory: "/usr/local/google/llvm_cmake_clang/tmp/debuginfo")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !5, file: !6, type: !9)
+!10 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !5, file: !6, type: !9)
 !11 = !DILocation(line: 1, scope: !5)
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r", line: 2, scope: !13, file: !6, type: !9)
+!12 = !DILocalVariable(name: "r", line: 2, scope: !13, file: !6, type: !9)
 
 ; Verify that debug descriptors for argument and local variable will be replaced
 ; with descriptors that end with OpDeref (encoded as 2).
-;   CHECK: ![[ARG_ID]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", arg: 1,{{.*}} line: 1
+;   CHECK: ![[ARG_ID]] = !DILocalVariable(name: "p", arg: 1,{{.*}} line: 1
 ;   CHECK: ![[OPDEREF]] = !DIExpression(DW_OP_deref)
-;   CHECK: ![[VAR_ID]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "r",{{.*}} line: 2
+;   CHECK: ![[VAR_ID]] = !DILocalVariable(name: "r",{{.*}} line: 2
 ; Verify that there are no more variable descriptors.
 ;   CHECK-NOT: !DILocalVariable(tag: DW_TAG_arg_variable
 ;   CHECK-NOT: !DILocalVariable(tag: DW_TAG_auto_variable
diff --git a/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
new file mode 100644
index 000000000000..39a41700fdff
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
@@ -0,0 +1,40 @@
+; This test checks that non-instrumented allocas stay in the first basic block.
+; Only first-basic-block allocas are considered stack slots, and moving them
+; breaks debug info.
+
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-instrument-allocas=1 -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define i32 @foo() sanitize_address {
+entry:
+  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  %non_instrumented1 = alloca i32, align 4
+
+  ; Regular alloca, will get instrumented (forced by the ptrtoint below).
+  %instrumented = alloca i32, align 4
+
+  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  %non_instrumented2 = alloca i32, align 4
+
+  br label %bb0
+
+bb0:
+  ; Won't be instrumented because of asan-skip-promotable-allocas.
+  %non_instrumented3 = alloca i32, align 4
+
+  %ptr = ptrtoint i32* %instrumented to i32
+  br label %bb1
+
+bb1:
+  ret i32 %ptr
+}
+
+; CHECK: entry:
+; CHECK: %non_instrumented1 = alloca i32, align 4
+; CHECK: %non_instrumented2 = alloca i32, align 4
+; CHECK: load i32, i32* @__asan_option_detect_stack_use_after_return
+; CHECK: bb0:
+; CHECK: %non_instrumented3 = alloca i32, align 4
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
deleted file mode 100644
index f096ac1828f2..000000000000
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-cstring.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-
-target datalayout = "e"
-target triple = "x86_64-apple-darwin10.0.0"
-
-@foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
-
-; CHECK: @foo = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
\ No newline at end of file
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
similarity index 60%
rename from test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
rename to test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
index f67155a29c2a..854f5cb851ea 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata-darwin.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
@@ -1,10 +1,14 @@
-; This test checks that we are not instrumenting globals in llvm.metadata
-; and other llvm internal globals.
+; This test checks that we are not instrumenting unnecessary globals
+; (llvm.metadata and other llvm internal globals).
 ; RUN: opt < %s -asan -asan-module -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
 
+@foo_noinst = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+
+; CHECK: @foo_noinst = private global [19 x i8] c"scannerWithString:\00", section "__TEXT,__objc_methname,cstring_literals"
+
 @.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
 @.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__DATA,__llvm_covmap"
 @.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
new file mode 100644
index 000000000000..fc0e676ec139
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
@@ -0,0 +1,35 @@
+; This test checks that we are not instrumenting unnecessary globals
+; (llvm.metadata, init_array sections, and other llvm internal globals).
+; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define internal void @_ZL3foov() #0 {
+entry:
+  ret void
+}
+
+@__call_foo = global void ()* @_ZL3foov, section ".preinit_array", align 8
+@__call_foo_2 = global void ()* @_ZL3foov, section ".init_array", align 8
+@__call_foo_3 = global void ()* @_ZL3foov, section ".fini_array", align 8
+
+; CHECK-NOT: asan_gen{{.*}}__call_foo
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  ret i32 0
+}
+
+@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
+@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__llvm_prf_data"
+@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
+
+; CHECK-NOT: {{asan_gen.*str_noinst}}
+; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
+; CHECK: {{asan_gen.*str_inst}}
+; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll b/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
deleted file mode 100644
index 93eca5bfd824..000000000000
--- a/test/Instrumentation/AddressSanitizer/do-not-instrument-llvm-metadata.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; This test checks that we are not instrumenting globals in llvm.metadata
-; and other llvm internal globals.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-@.str_noinst = private unnamed_addr constant [4 x i8] c"aaa\00", section "llvm.metadata"
-@.str_noinst_prof = private unnamed_addr constant [4 x i8] c"aaa\00", section "__llvm_prf_data"
-@.str_inst = private unnamed_addr constant [4 x i8] c"aaa\00"
-
-; CHECK-NOT: {{asan_gen.*str_noinst}}
-; CHECK-NOT: {{asan_gen.*str_noinst_prof}}
-; CHECK: {{asan_gen.*str_inst}}
-; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
index fcc166e966e1..24141ee2190c 100644
--- a/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
+++ b/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
@@ -6,9 +6,9 @@ target triple = "i686-pc-windows-msvc"
 ; no action should be taken for these globals
 $global_noinst = comdat largest
 @aliasee = private unnamed_addr constant [2 x i8] [i8 1, i8 2], comdat($global_noinst)
-@global_noinst = unnamed_addr alias [2 x i8]* @aliasee
+@global_noinst = unnamed_addr alias [2 x i8], [2 x i8]* @aliasee
 ; CHECK-NOT: {{asan_gen.*global_noinst}}
-; CHECK-DAG: @global_noinst = unnamed_addr alias [2 x i8]* @aliasee
+; CHECK-DAG: @global_noinst = unnamed_addr alias [2 x i8], [2 x i8]* @aliasee
 @global_inst = private constant [2 x i8] [i8 1, i8 2]
 ; CHECK-DAG: {{asan_gen.*global_inst}}
 ; CHECK: @asan.module_ctor
diff --git a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
index ceaf0e6fcfb6..f6354b1ee59d 100644
--- a/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
+++ b/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -7,8 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define void @foo(i32 %len) sanitize_address {
 entry:
+; CHECK-ALLOCA-LABEL: define void @foo
 ; CHECK-ALLOCA: __asan_alloca_poison
 ; CHECK-ALLOCA: __asan_allocas_unpoison
+; CHECK-ALLOCA: ret void
   %0 = alloca i32, align 4
   %1 = alloca i8*
   store volatile i32 %len, i32* %0, align 4
@@ -19,3 +21,17 @@ entry:
   ret void
 }
 
+; Test that dynamic alloca is not used for inalloca variables.
+define void @has_inalloca() uwtable sanitize_address {
+; CHECK-ALLOCA-LABEL: define void @has_inalloca
+; CHECK-ALLOCA-NOT: __asan_alloca_poison
+; CHECK-ALLOCA-NOT: __asan_alloca_unpoison
+; CHECK-ALLOCA: ret void
+entry:
+  %t = alloca inalloca i32
+  store i32 42, i32* %t
+  call void @pass_inalloca(i32* inalloca %t)
+  ret void
+}
+
+declare void @pass_inalloca(i32* inalloca)
diff --git a/test/Instrumentation/AddressSanitizer/keep_going.ll b/test/Instrumentation/AddressSanitizer/keep_going.ll
new file mode 100644
index 000000000000..4bb59e74e8f1
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/keep_going.ll
@@ -0,0 +1,14 @@
+; Test asan internal compiler flags:
+;   -asan-recover=1
+
+; RUN: opt < %s -asan -asan-recover -asan-module -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32* %p) sanitize_address {
+; CHECK: __asan_report_load4_noabort
+; CHECK-NOT: unreachable
+  %1 = load i32, i32* %p, align 4
+  ret i32 %1
+}
+
diff --git a/test/Instrumentation/AddressSanitizer/localescape.ll b/test/Instrumentation/AddressSanitizer/localescape.ll
new file mode 100644
index 000000000000..d9daa8c96b13
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/localescape.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -asan -asan-module -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+declare void @llvm.localescape(...) #1
+
+declare i32 @_except_handler3(...)
+declare void @may_throw(i32* %r)
+
+define i32 @main() sanitize_address personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  %r = alloca i32, align 4
+  %__exception_code = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+  %0 = bitcast i32* %r to i8*
+  store i32 0, i32* %r, align 4
+  invoke void @may_throw(i32* nonnull %r) #4
+          to label %__try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 }
+          catch i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)
+  %2 = extractvalue { i8*, i32 } %1, 1
+  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)) #1
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %__except, label %eh.resume
+
+__except:                                         ; preds = %lpad
+  store i32 1, i32* %r, align 4
+  br label %__try.cont
+
+__try.cont:                                       ; preds = %entry, %__except
+  %4 = load i32, i32* %r, align 4
+  ret i32 %4
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %1
+}
+
+; Check that the alloca remains static and the localescape call remains in the
+; entry block.
+
+; CHECK-LABEL: define i32 @main()
+; CHECK-NOT: br {{.*}}label
+; CHECK: %__exception_code = alloca i32, align 4
+; CHECK-NOT: br {{.*}}label
+; CHECK: call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@main@@"() #1 {
+entry:
+  %0 = tail call i8* @llvm.frameaddress(i32 1)
+  %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
+  %2 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+  %__exception_code = bitcast i8* %2 to i32*
+  %3 = getelementptr inbounds i8, i8* %0, i32 -20
+  %4 = bitcast i8* %3 to { i32*, i8* }**
+  %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
+  %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
+  %7 = load i32*, i32** %6, align 4
+  %8 = load i32, i32* %7, align 4
+  store i32 %8, i32* %__exception_code, align 4
+  ret i32 1
+}
+
+; CHECK-LABEL: define internal i32 @"\01?filt$0@0@main@@"()
+; CHECK: tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* {{.*}}, i32 0)
+
+define void @ScaleFilterCols_SSSE3(i8* %dst_ptr, i8* %src_ptr, i32 %dst_width, i32 %x, i32 %dx) sanitize_address {
+entry:
+  %dst_width.addr = alloca i32, align 4
+  store i32 %dst_width, i32* %dst_width.addr, align 4
+  %0 = call { i8*, i8*, i32, i32, i32 } asm sideeffect "", "=r,=r,={ax},=r,=r,=*rm,rm,rm,0,1,2,3,4,5,~{memory},~{cc},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{dirflag},~{fpsr},~{flags}"(i32* nonnull %dst_width.addr, i32 %x, i32 %dx, i8* %dst_ptr, i8* %src_ptr, i32 0, i32 0, i32 0, i32 %dst_width)
+  ret void
+}
+
+define void @ScaleColsUp2_SSE2() sanitize_address {
+entry:
+  ret void
+}
diff --git a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
index 9c058742dbf5..a8292204d27e 100644
--- a/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
+++ b/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -44,3 +44,34 @@ entry:
   call void asm sideeffect "mov %%rbx, %%rcx", "~{dirflag},~{fpsr},~{flags}"() nounwind
   ret void
 }
+
+; Test that dynamic alloca is not used when setjmp is present.
+%struct.__jmp_buf_tag = type { [8 x i64], i32, %struct.__sigset_t }
+%struct.__sigset_t = type { [16 x i64] }
+@_ZL3buf = internal global [1 x %struct.__jmp_buf_tag] zeroinitializer, align 16
+
+define void @Func3() uwtable sanitize_address {
+; CHECK-LABEL: define void @Func3
+; CHECK-NOT: __asan_option_detect_stack_use_after_return
+; CHECK-NOT: __asan_stack_malloc
+; CHECK: call void @__asan_handle_no_return
+; CHECK: call void @longjmp
+; CHECK: ret void
+entry:
+  %a = alloca i32, align 4
+  %call = call i32 @_setjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @_ZL3buf, i32 0, i32 0)) nounwind returns_twice
+  %cmp = icmp eq i32 0, %call
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @longjmp(%struct.__jmp_buf_tag* getelementptr inbounds ([1 x %struct.__jmp_buf_tag], [1 x %struct.__jmp_buf_tag]* @_ZL3buf, i32 0, i32 0), i32 1) noreturn nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  call void @_Z10escape_ptrPi(i32* %a)
+  ret void
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*) nounwind returns_twice
+declare void @longjmp(%struct.__jmp_buf_tag*, i32) noreturn nounwind
+declare void @_Z10escape_ptrPi(i32*)
diff --git a/test/Instrumentation/AddressSanitizer/twice.ll b/test/Instrumentation/AddressSanitizer/twice.ll
new file mode 100644
index 000000000000..9f7826f73952
--- /dev/null
+++ b/test/Instrumentation/AddressSanitizer/twice.ll
@@ -0,0 +1,8 @@
+; Check that the address sanitizer pass can be reused
+; RUN: opt < %s -S -run-twice -asan
+
+define void @foo(i64* %b) nounwind uwtable sanitize_address {
+  entry:
+  store i64 0, i64* %b, align 1
+  ret void
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/abilist.ll b/test/Instrumentation/DataFlowSanitizer/abilist.ll
index 90103f8b9804..8b30875a03fa 100644
--- a/test/Instrumentation/DataFlowSanitizer/abilist.ll
+++ b/test/Instrumentation/DataFlowSanitizer/abilist.ll
@@ -17,7 +17,7 @@ define i32 @functional(i32 %a, i32 %b) {
 ; CHECK: %[[CALL:.*]] = call { i32 (i32, i32)*, i16 } @"dfs$g"(i32 %0, i16 0)
 ; CHECK: %[[XVAL:.*]] = extractvalue { i32 (i32, i32)*, i16 } %[[CALL]], 0
 ; CHECK: ret {{.*}} %[[XVAL]]
-@discardg = alias i32 (i32, i32)* (i32)* @g
+@discardg = alias i32 (i32, i32)* (i32), i32 (i32, i32)* (i32)* @g
 
 declare void @custom1(i32 %a, i32 %b)
 
@@ -83,7 +83,7 @@ define i32 (i32, i32)* @g(i32) {
 ; CHECK: %[[IVAL0:.*]] = insertvalue { i32, i16 } undef, i32 %[[CALL]], 0
 ; CHECK: %[[IVAL1:.*]] = insertvalue { i32, i16 } %[[IVAL0]], i16 0, 1
 ; CHECK: ret { i32, i16 } %[[IVAL1]]
-@adiscard = alias i32 (i32, i32)* @discard
+@adiscard = alias i32 (i32, i32), i32 (i32, i32)* @discard
 
 ; CHECK: declare void @__dfsw_custom1(i32, i32, i16, i16)
 ; CHECK: declare i32 @__dfsw_custom2(i32, i32, i16, i16, i16*)
diff --git a/test/Instrumentation/DataFlowSanitizer/debug.ll b/test/Instrumentation/DataFlowSanitizer/debug.ll
index c18f5920fa3f..8fac157afd4e 100644
--- a/test/Instrumentation/DataFlowSanitizer/debug.ll
+++ b/test/Instrumentation/DataFlowSanitizer/debug.ll
@@ -1,6 +1,7 @@
 ; RUN: opt < %s -dfsan -dfsan-abilist=%S/Inputs/debuglist.txt -S | FileCheck %s
 
-; CHECK: !DISubprogram(name: "main",{{.*}} function: i32 ()* @main{{[,)]}}
+; CHECK: define i32 @main() {{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "main"
 
 ; Generated from a simple source file compiled with clang -g:
 ; int main() {
@@ -10,7 +11,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
 ; Function Attrs: nounwind uwtable
-define i32 @main() #0 {
+define i32 @main() #0 !dbg !4 {
 entry:
   ret i32 0, !dbg !12
 }
@@ -21,11 +22,11 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "debug.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!4 = distinct !DISubprogram(name: "main", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "debug.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/Instrumentation/DataFlowSanitizer/external_mask.ll b/test/Instrumentation/DataFlowSanitizer/external_mask.ll
new file mode 100644
index 000000000000..db4c14ed9580
--- /dev/null
+++ b/test/Instrumentation/DataFlowSanitizer/external_mask.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -dfsan -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define i32 @test(i32 %a, i32* nocapture readonly %b) #0 {
+; CHECK: @"dfs$test"
+; CHECK: %[[RV:.*]] load{{.*}}__dfsan_shadow_ptr_mask
+; CHECK: ptrtoint i32* {{.*}} to i64
+; CHECK: and {{.*}}%[[RV:.*]]
+; CHECK: mul i64
+  %1 = load i32, i32* %b, align 4
+  %2 = add nsw i32 %1, %a
+  ret i32 %2
+}
diff --git a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
index b14de5f9e513..a6cafd35e0b4 100644
--- a/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
+++ b/test/Instrumentation/DataFlowSanitizer/prefix-rename.ll
@@ -7,10 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
 module asm ".symver f1,f@@version1"
 
 ; CHECK: @"dfs$f2" = alias {{.*}} @"dfs$f1"
-@f2 = alias void ()* @f1
+@f2 = alias void (), void ()* @f1
 
 ; CHECK: @"dfs$g2" = alias {{.*}} @"dfs$g1"
-@g2 = alias bitcast (void (i8*)* @g1 to void (i16*)*)
+@g2 = alias void (i16*), bitcast (void (i8*)* @g1 to void (i16*)*)
 
 ; CHECK: define void @"dfs$f1"
 define void @f1() {
diff --git a/test/Instrumentation/InstrProfiling/PR23499.ll b/test/Instrumentation/InstrProfiling/PR23499.ll
index 5aae735120be..b10aeade2d09 100644
--- a/test/Instrumentation/InstrProfiling/PR23499.ll
+++ b/test/Instrumentation/InstrProfiling/PR23499.ll
@@ -1,21 +1,28 @@
-;; Check that data associated with linkonce odr functions are placed in
-;; the same comdat section as their associated function.
+;; Check that PGO instrumented variables associated with linkonce odr
+;; functions are placed in the same comdat section.
+
 
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-pc-win32-coff -instrprof -S | FileCheck %s --check-prefix=COFF
 
 $_Z3barIvEvv = comdat any
 
-@__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1
+@__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", align 1
+
+; CHECK: @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($__profv__Z3barIvEvv), align 1
+; CHECK: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profv__Z3barIvEvv), align 8
+; CHECK: @__profd__Z3barIvEvv = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i8*, [1 x i16] } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8* null, i8* null, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data", comdat($__profv__Z3barIvEvv), align 8
+
+; COFF: @__profn__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat, align 1
+; COFF: @__profc__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($__profn__Z3barIvEvv), align 8
+; COFF: @__profd__Z3barIvEvv = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i8*, [1 x i16] } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__Z3barIvEvv, i32 0, i32 0), i8* null, i8* null, [1 x i16] zeroinitializer }, section "{{.*}}__llvm_prf_data", comdat($__profn__Z3barIvEvv), align 8
 
-; CHECK: @__llvm_profile_name__Z3barIvEvv = linkonce_odr hidden constant [11 x i8] c"_Z3barIvEvv", section "{{.*}}__llvm_prf_names", comdat($_Z3barIvEvv), align 1
-; CHECK: @__llvm_profile_counters__Z3barIvEvv = linkonce_odr hidden global [1 x i64] zeroinitializer, section "{{.*}}__llvm_prf_cnts", comdat($_Z3barIvEvv), align 8
-; CHECK: @__llvm_profile_data__Z3barIvEvv = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 11, i32 1, i64 0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters__Z3barIvEvv, i32 0, i32 0) }, section "{{.*}}__llvm_prf_data", comdat($_Z3barIvEvv), align 8
 
 declare void @llvm.instrprof.increment(i8*, i64, i32, i32) #1
 
 define linkonce_odr void @_Z3barIvEvv() comdat {
 entry:
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__llvm_profile_name__Z3barIvEvv, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__Z3barIvEvv, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
diff --git a/test/Instrumentation/InstrProfiling/linkage.ll b/test/Instrumentation/InstrProfiling/linkage.ll
index fcca7754fa8f..110838b1c8cb 100644
--- a/test/Instrumentation/InstrProfiling/linkage.ll
+++ b/test/Instrumentation/InstrProfiling/linkage.ll
@@ -1,46 +1,49 @@
 ;; Check that runtime symbols get appropriate linkage.
 
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s --check-prefix=OTHER --check-prefix=COMMON
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s --check-prefix=LINUX --check-prefix=COMMON
 
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-@__llvm_profile_name_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
-@"__llvm_profile_name_linkage.ll:foo_internal" = internal constant [23 x i8] c"linkage.ll:foo_internal"
-@__llvm_profile_name_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline"
+@__profn_foo = hidden constant [3 x i8] c"foo"
+@__profn_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
+@"__profn_linkage.ll:foo_internal" = internal constant [23 x i8] c"linkage.ll:foo_internal"
+@__profn_foo_inline = linkonce_odr hidden constant [10 x i8] c"foo_inline"
 
-; CHECK: @__llvm_profile_counters_foo = hidden global
-; CHECK: @__llvm_profile_data_foo = hidden constant
+; COMMON: @__profc_foo = hidden global
+; COMMON: @__profd_foo = hidden global
 define void @foo() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
-; CHECK: @__llvm_profile_counters_foo_weak = weak hidden global
-; CHECK: @__llvm_profile_data_foo_weak = weak hidden constant
+; COMMON: @__profc_foo_weak = weak hidden global
+; COMMON: @__profd_foo_weak = weak hidden global
 define weak void @foo_weak() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @__llvm_profile_name_foo_weak, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @__profn_foo_weak, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
-; CHECK: @"__llvm_profile_counters_linkage.ll:foo_internal" = internal global
-; CHECK: @"__llvm_profile_data_linkage.ll:foo_internal" = internal constant
+; COMMON: @"__profc_linkage.ll:foo_internal" = internal global
+; COMMON: @"__profd_linkage.ll:foo_internal" = internal global
 define internal void @foo_internal() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__llvm_profile_name_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @"__profn_linkage.ll:foo_internal", i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
-; CHECK: @__llvm_profile_counters_foo_inline = linkonce_odr hidden global
-; CHECK: @__llvm_profile_data_foo_inline = linkonce_odr hidden constant
+; COMMON: @__profc_foo_inline = linkonce_odr hidden global
+; COMMON: @__profd_foo_inline = linkonce_odr hidden global
 define linkonce_odr void @foo_inline() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__llvm_profile_name_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @__profn_foo_inline, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
 declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
 
-; CHECK: @__llvm_profile_runtime = external global i32
+; OTHER: @__llvm_profile_runtime = external global i32
+; LINUX-NOT: @__llvm_profile_runtime = external global i32
 
-; CHECK: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
-; CHECK:   %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
-; CHECK:   ret i32 %[[REG]]
-; CHECK: }
+; OTHER: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
+; OTHER:   %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
+; OTHER:   ret i32 %[[REG]]
+; OTHER: }
+; LINUX-NOT: define linkonce_odr hidden i32 @__llvm_profile_runtime_user() {{.*}} {
+; LINUX-NOT:   %[[REG:.*]] = load i32, i32* @__llvm_profile_runtime
diff --git a/test/Instrumentation/InstrProfiling/no-counters.ll b/test/Instrumentation/InstrProfiling/no-counters.ll
index 0716b0d8c6f2..402fb65bc750 100644
--- a/test/Instrumentation/InstrProfiling/no-counters.ll
+++ b/test/Instrumentation/InstrProfiling/no-counters.ll
@@ -1,8 +1,8 @@
 ;; No instrumentation should be emitted if there are no counter increments.
 
 ; RUN: opt < %s -instrprof -S | FileCheck %s
-; CHECK-NOT: @__llvm_profile_counters
-; CHECK-NOT: @__llvm_profile_data
+; CHECK-NOT: @__profc
+; CHECK-NOT: @__profd
 ; CHECK-NOT: @__llvm_profile_runtime
 
 define void @foo() {
diff --git a/test/Instrumentation/InstrProfiling/noruntime.ll b/test/Instrumentation/InstrProfiling/noruntime.ll
index f0619c8686ff..17636cecdc22 100644
--- a/test/Instrumentation/InstrProfiling/noruntime.ll
+++ b/test/Instrumentation/InstrProfiling/noruntime.ll
@@ -6,10 +6,10 @@
 
 @__llvm_profile_runtime = global i32 0, align 4
 
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
+@__profn_foo = hidden constant [3 x i8] c"foo"
 
 define void @foo() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
diff --git a/test/Instrumentation/InstrProfiling/platform.ll b/test/Instrumentation/InstrProfiling/platform.ll
index 5f16cba91d91..4307349c8206 100644
--- a/test/Instrumentation/InstrProfiling/platform.ll
+++ b/test/Instrumentation/InstrProfiling/platform.ll
@@ -1,19 +1,24 @@
 ;; Checks for platform specific section names and initialization code.
 
 ; RUN: opt < %s -mtriple=x86_64-apple-macosx10.10.0 -instrprof -S | FileCheck %s -check-prefix=MACHO
-; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=ELF
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -instrprof -S | FileCheck %s -check-prefix=LINUX
+; RUN: opt < %s -mtriple=x86_64-unknown-freebsd -instrprof -S | FileCheck %s -check-prefix=FREEBSD
+; RUN: opt < %s -mtriple=x86_64-pc-solaris -instrprof -S | FileCheck %s -check-prefix=SOLARIS
 
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-; MACHO: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-; ELF: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__llvm_prf_names", align 1
+@__profn_foo = hidden constant [3 x i8] c"foo"
+; MACHO: @__profn_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+; ELF: @__profn_foo = hidden constant [3 x i8] c"foo", section "__llvm_prf_names", align 1
 
-; MACHO: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; ELF: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+; MACHO: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; ELF: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+
+; MACHO: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; LINUX: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; FREEBSD: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
+; SOLARIS: @__profd_foo = hidden {{.*}}, section "__llvm_prf_data", align 8
 
-; MACHO: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
-; ELF: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__llvm_prf_data", align 8
 define void @foo() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
@@ -23,7 +28,11 @@ declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
 ;; symbols by their sections.
 
 ; MACHO-NOT: define internal void @__llvm_profile_register_functions
-; ELF: define internal void @__llvm_profile_register_functions
+; LINUX-NOT: define internal void @__llvm_profile_register_functions
+; FREEBSD-NOT: define internal void @__llvm_profile_register_functions
+; SOLARIS: define internal void @__llvm_profile_register_functions
 
 ; MACHO-NOT: define internal void @__llvm_profile_init
-; ELF: define internal void @__llvm_profile_init
+; LINUX-NOT: define internal void @__llvm_profile_init
+; FREEBSD-NOT: define internal void @__llvm_profile_init
+; SOLARIS: define internal void @__llvm_profile_init
diff --git a/test/Instrumentation/InstrProfiling/profiling.ll b/test/Instrumentation/InstrProfiling/profiling.ll
index 52a6eadbff09..5f2a1bc6aae3 100644
--- a/test/Instrumentation/InstrProfiling/profiling.ll
+++ b/test/Instrumentation/InstrProfiling/profiling.ll
@@ -2,37 +2,37 @@
 
 target triple = "x86_64-apple-macosx10.10.0"
 
-@__llvm_profile_name_foo = hidden constant [3 x i8] c"foo"
-; CHECK: @__llvm_profile_name_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
-@__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00"
-; CHECK: @__llvm_profile_name_bar = hidden constant [4 x i8] c"bar\00", section "__DATA,__llvm_prf_names", align 1
-@baz_prof_name = hidden constant [3 x i8] c"baz"
-; CHECK: @baz_prof_name = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
+@__profn_foo = hidden constant [3 x i8] c"foo"
+; CHECK: @__profn_foo = hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+@__profn_bar = hidden constant [4 x i8] c"bar\00"
+; CHECK: @__profn_bar = hidden constant [4 x i8] c"bar\00", section "__DATA,__llvm_prf_names", align 1
+@__profn_baz = hidden constant [3 x i8] c"baz"
+; CHECK: @__profn_baz = hidden constant [3 x i8] c"baz", section "__DATA,__llvm_prf_names", align 1
 
-; CHECK: @__llvm_profile_counters_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_foo = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_foo = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_foo = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
 define void @foo() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
-; CHECK: @__llvm_profile_counters_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_bar = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_bar = hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_bar = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
 define void @bar() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_bar, i32 0, i32 0), i64 0, i32 1, i32 0)
   ret void
 }
 
-; CHECK: @__llvm_profile_counters_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-; CHECK: @__llvm_profile_data_baz = hidden constant {{.*}}, section "__DATA,__llvm_prf_data", align 8
+; CHECK: @__profc_baz = hidden global [3 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+; CHECK: @__profd_baz = hidden {{.*}}, section "__DATA,__llvm_prf_data", align 8
 define void @baz() {
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 0)
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 1)
-  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @baz_prof_name, i32 0, i32 0), i64 0, i32 3, i32 2)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 0)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 1)
+  call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_baz, i32 0, i32 0), i64 0, i32 3, i32 2)
   ret void
 }
 
 declare void @llvm.instrprof.increment(i8*, i64, i32, i32)
 
 ; CHECK: @__llvm_profile_runtime = external global i32
-; CHECK: @llvm.used = appending global {{.*}} @__llvm_profile_data_foo {{.*}} @__llvm_profile_data_bar {{.*}} @__llvm_profile_data_baz {{.*}} section "llvm.metadata"
+; CHECK: @llvm.used = appending global {{.*}} @__profd_foo {{.*}} @__profd_bar {{.*}} @__profd_baz {{.*}} section "llvm.metadata"
diff --git a/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll b/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll
new file mode 100644
index 000000000000..d58fbac56215
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/AArch64/vararg.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -msan -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+define i32 @foo(i32 %guard, ...) {
+  %vl = alloca %struct.__va_list, align 8
+  %1 = bitcast %struct.__va_list* %vl to i8*
+  call void @llvm.lifetime.start(i64 32, i8* %1)
+  call void @llvm.va_start(i8* %1)
+  call void @llvm.va_end(i8* %1)
+  call void @llvm.lifetime.end(i64 32, i8* %1)
+  ret i32 0
+}
+
+; First check if the variadic shadow values are saved in stack with correct
+; size (192 is total of general purpose registers size, 56, rounded to 16
+; plus total of floating-point registers size, 128).
+
+; CHECK-LABEL: @foo
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_va_arg_overflow_size_tls
+; CHECK: [[B:%.*]] = add i64 192, [[A]]
+; CHECK: alloca {{.*}} [[B]]
+
+; We expect three memcpy operations: one for the general purpose registers,
+; one for floating-point/SIMD ones, and one for thre remaining arguments.
+
+; Propagate the GR shadow values on for the va_list::__gp_top, adjust the 
+; offset in the __msan_va_arg_tls based on va_list:__gp_off, and finally
+; issue the memcpy.
+; CHECK: [[GRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}}
+; CHECK: [[GRSIZE:%.*]] = sub i64 56, {{%.*}}
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[GRP]], i64 [[GRSIZE]], i32 8, i1 false)
+
+; Propagate the VR shadow values on for the va_list::__vr_top, adjust the 
+; offset in the __msan_va_arg_tls based on va_list:__vr_off, and finally
+; issue the memcpy.
+; CHECK: [[VRP:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i64 {{%.*}}
+; CHECK: [[VRSIZE:%.*]] = sub i64 128, {{%.*}}
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[VRP]], i64 [[VRSIZE]], i32 8, i1 false)
+
+; Copy the remaining shadow values on the va_list::__stack position (it is
+; on the constant offset of 192 from __msan_va_arg_tls).
+; CHECK: [[STACK:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 192
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{%.*}}, i8* [[STACK]], i64 {{%.*}}, i32 16, i1 false)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.va_start(i8*) #2
+declare void @llvm.va_end(i8*) #2
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+define i32 @bar() {
+  %1 = call i32 (i32, ...) @foo(i32 0, i32 1, i32 2, double 3.000000e+00, 
+                                double 4.000000e+00, i32 5, i32 6,
+                                double 7.000000e+00, i32 8, i32 9, i32 10, i32 11)
+  ret i32 %1
+}
+
+; Save the incoming shadow value from the arguments in the __msan_va_arg_tls
+; array.  General purpose registers are saved at positions from 0 to 56, Floating
+; point and SIMD are saved from 64 to 192, and the remaining from 192.
+; CHECK-LABEL: @bar
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 8
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 64
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 80
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 16
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 24
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 96
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 32
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 40
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 48
+; CHECK: store {{.*}} @__msan_va_arg_tls {{.*}} 192
+; CHECK: store {{.*}} 8, {{.*}} @__msan_va_arg_overflow_size_tls
diff --git a/test/Instrumentation/MemorySanitizer/atomics.ll b/test/Instrumentation/MemorySanitizer/atomics.ll
index e896eaebdd3d..8033ed1e2d46 100644
--- a/test/Instrumentation/MemorySanitizer/atomics.ll
+++ b/test/Instrumentation/MemorySanitizer/atomics.ll
@@ -13,7 +13,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicRmwXchg
+; CHECK-LABEL: @AtomicRmwXchg
 ; CHECK: store i32 0,
 ; CHECK: atomicrmw xchg {{.*}} seq_cst
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
@@ -28,7 +28,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicRmwMax
+; CHECK-LABEL: @AtomicRmwMax
 ; CHECK: store i32 0,
 ; CHECK: atomicrmw max {{.*}} seq_cst
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
@@ -44,7 +44,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @Cmpxchg
+; CHECK-LABEL: @Cmpxchg
 ; CHECK: store { i32, i1 } zeroinitializer,
 ; CHECK: icmp
 ; CHECK: br
@@ -63,7 +63,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @CmpxchgMonotonic
+; CHECK-LABEL: @CmpxchgMonotonic
 ; CHECK: store { i32, i1 } zeroinitializer,
 ; CHECK: icmp
 ; CHECK: br
@@ -81,7 +81,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicLoad
+; CHECK-LABEL: @AtomicLoad
 ; CHECK: load atomic i32, i32* {{.*}} seq_cst, align 16
 ; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -96,7 +96,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicLoadAcquire
+; CHECK-LABEL: @AtomicLoadAcquire
 ; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
 ; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -111,7 +111,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicLoadMonotonic
+; CHECK-LABEL: @AtomicLoadMonotonic
 ; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
 ; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -126,7 +126,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @AtomicLoadUnordered
+; CHECK-LABEL: @AtomicLoadUnordered
 ; CHECK: load atomic i32, i32* {{.*}} acquire, align 16
 ; CHECK: [[SHADOW:%[01-9a-z_]+]] = load i32, i32* {{.*}}, align 16
 ; CHECK: store i32 {{.*}}[[SHADOW]], {{.*}} @__msan_retval_tls
@@ -141,7 +141,7 @@ entry:
   ret void
 }
 
-; CHECK: @AtomicStore
+; CHECK-LABEL: @AtomicStore
 ; CHECK-NOT: @__msan_param_tls
 ; CHECK: store i32 0, i32* {{.*}}, align 16
 ; CHECK: store atomic i32 %x, i32* %p seq_cst, align 16
@@ -156,7 +156,7 @@ entry:
   ret void
 }
 
-; CHECK: @AtomicStoreRelease
+; CHECK-LABEL: @AtomicStoreRelease
 ; CHECK-NOT: @__msan_param_tls
 ; CHECK: store i32 0, i32* {{.*}}, align 16
 ; CHECK: store atomic i32 %x, i32* %p release, align 16
@@ -171,7 +171,7 @@ entry:
   ret void
 }
 
-; CHECK: @AtomicStoreMonotonic
+; CHECK-LABEL: @AtomicStoreMonotonic
 ; CHECK-NOT: @__msan_param_tls
 ; CHECK: store i32 0, i32* {{.*}}, align 16
 ; CHECK: store atomic i32 %x, i32* %p release, align 16
@@ -186,7 +186,7 @@ entry:
   ret void
 }
 
-; CHECK: @AtomicStoreUnordered
+; CHECK-LABEL: @AtomicStoreUnordered
 ; CHECK-NOT: @__msan_param_tls
 ; CHECK: store i32 0, i32* {{.*}}, align 16
 ; CHECK: store atomic i32 %x, i32* %p release, align 16
diff --git a/test/Instrumentation/MemorySanitizer/check_access_address.ll b/test/Instrumentation/MemorySanitizer/check_access_address.ll
index 5e1a3f4442f2..723d6f0cd344 100644
--- a/test/Instrumentation/MemorySanitizer/check_access_address.ll
+++ b/test/Instrumentation/MemorySanitizer/check_access_address.ll
@@ -12,7 +12,7 @@ entry:
   ret <2 x i64> %x
 }
 
-; CHECK: @ByValArgumentShadowLargeAlignment
+; CHECK-LABEL: @ByValArgumentShadowLargeAlignment
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 16, i32 8, i1 false)
 ; CHECK: ret <2 x i64>
 
@@ -23,6 +23,6 @@ entry:
   ret i16 %x
 }
 
-; CHECK: @ByValArgumentShadowSmallAlignment
+; CHECK-LABEL: @ByValArgumentShadowSmallAlignment
 ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* {{.*}}, i8* {{.*}}, i64 2, i32 2, i1 false)
 ; CHECK: ret i16
diff --git a/test/Instrumentation/MemorySanitizer/msan_basic.ll b/test/Instrumentation/MemorySanitizer/msan_basic.ll
index 8b8e29709599..cacc9b749dd2 100644
--- a/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -28,7 +28,7 @@ entry:
   ret void
 }
 
-; CHECK: @Store
+; CHECK-LABEL: @Store
 ; CHECK: load {{.*}} @__msan_param_tls
 ; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
 ; CHECK: store
@@ -52,7 +52,7 @@ entry:
   ret void
 }
 
-; CHECK: @AlignedStore
+; CHECK-LABEL: @AlignedStore
 ; CHECK: load {{.*}} @__msan_param_tls
 ; CHECK-ORIGINS: load {{.*}} @__msan_param_origin_tls
 ; CHECK: store {{.*}} align 32
@@ -83,7 +83,7 @@ if.end:                                           ; preds = %entry, %if.then
 
 declare void @foo(...)
 
-; CHECK: @LoadAndCmp
+; CHECK-LABEL: @LoadAndCmp
 ; CHECK: = load
 ; CHECK: = load
 ; CHECK: call void @__msan_warning_noreturn()
@@ -97,7 +97,7 @@ entry:
   ret i32 123
 }
 
-; CHECK: @ReturnInt
+; CHECK-LABEL: @ReturnInt
 ; CHECK: store i32 0,{{.*}}__msan_retval_tls
 ; CHECK: ret i32
 
@@ -109,7 +109,7 @@ entry:
   ret void
 }
 
-; CHECK: @CopyRetVal
+; CHECK-LABEL: @CopyRetVal
 ; CHECK: load{{.*}}__msan_retval_tls
 ; CHECK: store
 ; CHECK: store
@@ -136,7 +136,7 @@ entry:
   ret void
 }
 
-; CHECK: @FuncWithPhi
+; CHECK-LABEL: @FuncWithPhi
 ; CHECK: = phi
 ; CHECK-NEXT: = phi
 ; CHECK: store
@@ -152,7 +152,7 @@ entry:
   ret void
 }
 
-; CHECK: @ShlConst
+; CHECK-LABEL: @ShlConst
 ; CHECK: = load
 ; CHECK: = load
 ; CHECK: shl
@@ -170,7 +170,7 @@ entry:
   ret void
 }
 
-; CHECK: @ShlNonConst
+; CHECK-LABEL: @ShlNonConst
 ; CHECK: = load
 ; CHECK: = load
 ; CHECK: = sext i1
@@ -187,7 +187,7 @@ entry:
   ret void
 }
 
-; CHECK: @SExt
+; CHECK-LABEL: @SExt
 ; CHECK: = load
 ; CHECK: = load
 ; CHECK: = sext
@@ -206,7 +206,7 @@ entry:
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
-; CHECK: @MemSet
+; CHECK-LABEL: @MemSet
 ; CHECK: call i8* @__msan_memset
 ; CHECK: ret void
 
@@ -220,7 +220,7 @@ entry:
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
-; CHECK: @MemCpy
+; CHECK-LABEL: @MemCpy
 ; CHECK: call i8* @__msan_memcpy
 ; CHECK: ret void
 
@@ -234,7 +234,7 @@ entry:
 
 declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
-; CHECK: @MemMove
+; CHECK-LABEL: @MemMove
 ; CHECK: call i8* @__msan_memmove
 ; CHECK: ret void
 
@@ -247,7 +247,7 @@ entry:
   ret i32 %cond
 }
 
-; CHECK: @Select
+; CHECK-LABEL: @Select
 ; CHECK: select i1
 ; CHECK-DAG: or i32
 ; CHECK-DAG: xor i32
@@ -271,7 +271,7 @@ entry:
   ret <8 x i16> %cond
 }
 
-; CHECK: @SelectVector
+; CHECK-LABEL: @SelectVector
 ; CHECK: select <8 x i1>
 ; CHECK-DAG: or <8 x i16>
 ; CHECK-DAG: xor <8 x i16>
@@ -295,7 +295,7 @@ entry:
   ret <8 x i16> %cond
 }
 
-; CHECK: @SelectVector2
+; CHECK-LABEL: @SelectVector2
 ; CHECK: select i1
 ; CHECK-DAG: or <8 x i16>
 ; CHECK-DAG: xor <8 x i16>
@@ -313,7 +313,7 @@ entry:
   ret { i64, i64 } %c
 }
 
-; CHECK: @SelectStruct
+; CHECK-LABEL: @SelectStruct
 ; CHECK: select i1 {{.*}}, { i64, i64 }
 ; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
 ; CHECK-ORIGINS: select i1
@@ -328,7 +328,7 @@ entry:
   ret { i64*, double } %c
 }
 
-; CHECK: @SelectStruct2
+; CHECK-LABEL: @SelectStruct2
 ; CHECK: select i1 {{.*}}, { i64, i64 }
 ; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
 ; CHECK-ORIGINS: select i1
@@ -343,7 +343,7 @@ entry:
   ret i8* %0
 }
 
-; CHECK: @IntToPtr
+; CHECK-LABEL: @IntToPtr
 ; CHECK: load i64, i64*{{.*}}__msan_param_tls
 ; CHECK-ORIGINS-NEXT: load i32, i32*{{.*}}__msan_param_origin_tls
 ; CHECK-NEXT: inttoptr
@@ -357,7 +357,7 @@ entry:
   ret i8* %0
 }
 
-; CHECK: @IntToPtr_ZExt
+; CHECK-LABEL: @IntToPtr_ZExt
 ; CHECK: load i16, i16*{{.*}}__msan_param_tls
 ; CHECK: zext
 ; CHECK-NEXT: inttoptr
@@ -374,7 +374,7 @@ entry:
   ret i32 %div
 }
 
-; CHECK: @Div
+; CHECK-LABEL: @Div
 ; CHECK: icmp
 ; CHECK: call void @__msan_warning
 ; CHECK-NOT: icmp
@@ -385,48 +385,99 @@ entry:
 
 ; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
 
-define zeroext i1 @ICmpSLT(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSLTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
   %1 = icmp slt i32 %x, 0
   ret i1 %1
 }
 
-; CHECK: @ICmpSLT
+; CHECK-LABEL: @ICmpSLTZero
 ; CHECK: icmp slt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp slt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret i1
 
-define zeroext i1 @ICmpSGE(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSGEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
   %1 = icmp sge i32 %x, 0
   ret i1 %1
 }
 
-; CHECK: @ICmpSGE
+; CHECK-LABEL: @ICmpSGEZero
 ; CHECK: icmp slt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp sge
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret i1
 
-define zeroext i1 @ICmpSGT(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSGTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
   %1 = icmp sgt i32 0, %x
   ret i1 %1
 }
 
-; CHECK: @ICmpSGT
+; CHECK-LABEL: @ICmpSGTZero
 ; CHECK: icmp slt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp sgt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret i1
 
-define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone sanitize_memory {
+define zeroext i1 @ICmpSLEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
   %1 = icmp sle i32 0, %x
   ret i1 %1
 }
 
-; CHECK: @ICmpSLE
+; CHECK-LABEL: @ICmpSLEZero
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sle
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+
+; Check that we propagate shadow for x<=-1, x>-1, etc (i.e. sign bit tests)
+
+define zeroext i1 @ICmpSLTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp slt i32 -1, %x
+  ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSLTAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sge i32 -1, %x
+  ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSGEAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sge
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSGTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sgt i32 %x, -1
+  ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSGTAllOnes
+; CHECK: icmp slt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp sgt
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret i1
+
+define zeroext i1 @ICmpSLEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp sle i32 %x, -1
+  ret i1 %1
+}
+
+; CHECK-LABEL: @ICmpSLEAllOnes
 ; CHECK: icmp slt
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp sle
@@ -437,18 +488,33 @@ define zeroext i1 @ICmpSLE(i32 %x) nounwind uwtable readnone sanitize_memory {
 ; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
 ; of the vector arguments.
 
-define <2 x i1> @ICmpSLT_vector(<2 x i32*> %x) nounwind uwtable readnone sanitize_memory {
+define <2 x i1> @ICmpSLT_vector_Zero(<2 x i32*> %x) nounwind uwtable readnone sanitize_memory {
   %1 = icmp slt <2 x i32*> %x, zeroinitializer
   ret <2 x i1> %1
 }
 
-; CHECK: @ICmpSLT_vector
+; CHECK-LABEL: @ICmpSLT_vector_Zero
 ; CHECK: icmp slt <2 x i64>
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp slt <2 x i32*>
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: ret <2 x i1>
 
+; Check that we propagate shadow for x<=-1, x>0, etc (i.e. sign bit tests)
+; of the vector arguments.
+
+define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone sanitize_memory {
+  %1 = icmp slt <2 x i32> <i32 -1, i32 -1>, %x
+  ret <2 x i1> %1
+}
+
+; CHECK-LABEL: @ICmpSLT_vector_AllOnes
+; CHECK: icmp slt <2 x i32>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: icmp slt <2 x i32>
+; CHECK-NOT: call void @__msan_warning
+; CHECK: ret <2 x i1>
+
 
 ; Check that we propagate shadow for unsigned relational comparisons with
 ; constants
@@ -459,7 +525,7 @@ entry:
   ret i1 %cmp
 }
 
-; CHECK: @ICmpUGTConst
+; CHECK-LABEL: @ICmpUGTConst
 ; CHECK: icmp ugt i32
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: icmp ugt i32
@@ -478,7 +544,7 @@ define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
   ret i32 %1
 }
 
-; CHECK: @ShadowLoadAlignmentLarge
+; CHECK-LABEL: @ShadowLoadAlignmentLarge
 ; CHECK: load volatile i32, i32* {{.*}} align 64
 ; CHECK: load i32, i32* {{.*}} align 64
 ; CHECK: ret i32
@@ -489,7 +555,7 @@ define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
   ret i32 %1
 }
 
-; CHECK: @ShadowLoadAlignmentSmall
+; CHECK-LABEL: @ShadowLoadAlignmentSmall
 ; CHECK: load volatile i32, i32* {{.*}} align 2
 ; CHECK: load i32, i32* {{.*}} align 2
 ; CHECK-ORIGINS: load i32, i32* {{.*}} align 4
@@ -505,7 +571,7 @@ define i32 @ExtractElement(<4 x i32> %vec, i32 %idx) sanitize_memory {
   ret i32 %x
 }
 
-; CHECK: @ExtractElement
+; CHECK-LABEL: @ExtractElement
 ; CHECK: extractelement
 ; CHECK: call void @__msan_warning
 ; CHECK: extractelement
@@ -516,7 +582,7 @@ define <4 x i32> @InsertElement(<4 x i32> %vec, i32 %idx, i32 %x) sanitize_memor
   ret <4 x i32> %vec1
 }
 
-; CHECK: @InsertElement
+; CHECK-LABEL: @InsertElement
 ; CHECK: insertelement
 ; CHECK: call void @__msan_warning
 ; CHECK: insertelement
@@ -528,7 +594,7 @@ define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) sanitize_memory
   ret <4 x i32> %vec2
 }
 
-; CHECK: @ShuffleVector
+; CHECK-LABEL: @ShuffleVector
 ; CHECK: shufflevector
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: shufflevector
@@ -543,7 +609,7 @@ define i32 @BSwap(i32 %x) nounwind uwtable readnone sanitize_memory {
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
-; CHECK: @BSwap
+; CHECK-LABEL: @BSwap
 ; CHECK-NOT: call void @__msan_warning
 ; CHECK: @llvm.bswap.i32
 ; CHECK-NOT: call void @__msan_warning
@@ -561,7 +627,7 @@ define void @StoreIntrinsic(i8* %p, <4 x float> %x) nounwind uwtable sanitize_me
 
 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
 
-; CHECK: @StoreIntrinsic
+; CHECK-LABEL: @StoreIntrinsic
 ; CHECK-NOT: br
 ; CHECK-NOT: = or
 ; CHECK: store <4 x i32> {{.*}} align 1
@@ -578,7 +644,7 @@ define <16 x i8> @LoadIntrinsic(i8* %p) nounwind uwtable sanitize_memory {
 
 declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %p) nounwind
 
-; CHECK: @LoadIntrinsic
+; CHECK-LABEL: @LoadIntrinsic
 ; CHECK: load <16 x i8>, <16 x i8>* {{.*}} align 1
 ; CHECK-ORIGINS: [[ORIGIN:%[01-9a-z]+]] = load i32, i32* {{.*}}
 ; CHECK-NOT: br
@@ -600,7 +666,7 @@ define <8 x i16> @Paddsw128(<8 x i16> %a, <8 x i16> %b) nounwind uwtable sanitiz
 
 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) nounwind
 
-; CHECK: @Paddsw128
+; CHECK-LABEL: @Paddsw128
 ; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
 ; CHECK-ORIGINS: load i32, i32* {{.*}} @__msan_param_origin_tls
 ; CHECK-NEXT: load <8 x i16>, <8 x i16>* {{.*}} @__msan_param_tls
@@ -623,7 +689,7 @@ define <8 x i8*> @VectorOfPointers(<8 x i8*>* %p) nounwind uwtable sanitize_memo
   ret <8 x i8*> %x
 }
 
-; CHECK: @VectorOfPointers
+; CHECK-LABEL: @VectorOfPointers
 ; CHECK: load <8 x i8*>, <8 x i8*>*
 ; CHECK: load <8 x i64>, <8 x i64>*
 ; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
@@ -638,7 +704,7 @@ define void @VACopy(i8* %p1, i8* %p2) nounwind uwtable sanitize_memory {
   ret void
 }
 
-; CHECK: @VACopy
+; CHECK-LABEL: @VACopy
 ; CHECK: call void @llvm.memset.p0i8.i64({{.*}}, i8 0, i64 24, i32 8, i1 false)
 ; CHECK: ret void
 
@@ -661,7 +727,7 @@ entry:
   ret void
 }
 
-; CHECK: @VAStart
+; CHECK-LABEL: @VAStart
 ; CHECK: call void @llvm.va_start
 ; CHECK-NOT: @__msan_va_arg_tls
 ; CHECK-NOT: @__msan_va_arg_overflow_size_tls
@@ -677,7 +743,7 @@ entry:
   ret void
 }
 
-; CHECK: @VolatileStore
+; CHECK-LABEL: @VolatileStore
 ; CHECK-NOT: @__msan_warning
 ; CHECK: ret void
 
@@ -700,7 +766,7 @@ if.end:                                           ; preds = %entry, %if.then
 
 declare void @bar()
 
-; CHECK: @NoSanitizeMemory
+; CHECK-LABEL: @NoSanitizeMemory
 ; CHECK-NOT: @__msan_warning
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK-NOT: @__msan_warning
@@ -719,7 +785,7 @@ entry:
 
 declare i32 @NoSanitizeMemoryAllocaHelper(i32* %p)
 
-; CHECK: @NoSanitizeMemoryAlloca
+; CHECK-LABEL: @NoSanitizeMemoryAlloca
 ; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 4, i32 4, i1 false)
 ; CHECK: call i32 @NoSanitizeMemoryAllocaHelper(i32*
 ; CHECK: ret i32
@@ -736,7 +802,7 @@ entry:
 
 declare i32 @NoSanitizeMemoryUndefHelper(i32 %x)
 
-; CHECK: @NoSanitizeMemoryAlloca
+; CHECK-LABEL: @NoSanitizeMemoryUndef
 ; CHECK: store i32 0, i32* {{.*}} @__msan_param_tls
 ; CHECK: call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
 ; CHECK: ret i32
@@ -790,7 +856,7 @@ entry:
   ret <2 x i64> %b
 }
 
-; CHECK: @ArgumentShadowAlignment
+; CHECK-LABEL: @ArgumentShadowAlignment
 ; CHECK: load <2 x i64>, <2 x i64>* {{.*}} @__msan_param_tls {{.*}}, align 8
 ; CHECK: store <2 x i64> {{.*}} @__msan_retval_tls {{.*}}, align 8
 ; CHECK: ret <2 x i64>
@@ -847,7 +913,7 @@ entry:
 ; "undef" and the first 2 structs go to general purpose registers;
 ; the third struct goes to the overflow area byval
 
-; CHECK: @VAArgStruct
+; CHECK-LABEL: @VAArgStruct
 ; undef
 ; CHECK: store i32 -1, i32* {{.*}}@__msan_va_arg_tls {{.*}}, align 8
 ; first struct through general purpose registers
@@ -864,6 +930,7 @@ entry:
 ; CHECK: call void (i32, ...) @VAArgStructFn
 ; CHECK: ret void
 
+
 declare i32 @InnerTailCall(i32 %a)
 
 define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
@@ -878,5 +945,41 @@ define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
 ; CHECK: tail call i32 @InnerTailCall
 ; CHECK: ret void
 
+
+declare i32 @MustTailCall(i32 %a)
+
+define i32 @CallMustTailCall(i32 %a) sanitize_memory {
+  %b = musttail call i32 @MustTailCall(i32 %a)
+  ret i32 %b
+}
+
+; For "musttail" calls we can not insert any shadow manipulating code between
+; call and the return instruction. And we don't need to, because everything is
+; taken care of in the callee.
+
+; CHECK-LABEL: define i32 @CallMustTailCall
+; CHECK: musttail call i32 @MustTailCall
+; No instrumentation between call and ret.
+; CHECK-NEXT: ret i32
+
+declare i32* @MismatchingMustTailCall(i32 %a)
+
+define i8* @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
+  %b = musttail call i32* @MismatchingMustTailCall(i32 %a)
+  %c = bitcast i32* %b to i8*
+  ret i8* %c
+}
+
+; For "musttail" calls we can not insert any shadow manipulating code between
+; call and the return instruction. And we don't need to, because everything is
+; taken care of in the callee.
+
+; CHECK-LABEL: define i8* @MismatchingCallMustTailCall
+; CHECK: musttail call i32* @MismatchingMustTailCall
+; No instrumentation between call and ret.
+; CHECK-NEXT: bitcast i32* {{.*}} to i8*
+; CHECK-NEXT: ret i8*
+
+
 ; CHECK-LABEL: define internal void @msan.module_ctor
 ; CHECK: call void @__msan_init()
diff --git a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
index e068f69ae4ba..7736d94717fe 100644
--- a/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
+++ b/test/Instrumentation/MemorySanitizer/mul_by_constant.ll
@@ -92,3 +92,26 @@ entry:
 ; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
 ; CHECK: [[B:%.*]] = mul <4 x i32> [[A]], <i32 1024, i32 0, i32 16, i32 16>
 ; CHECK: store <4 x i32> [[B]], <4 x i32>* {{.*}} @__msan_retval_tls
+
+
+; The constant in multiplication does not have to be a literal integer constant.
+@X = linkonce_odr global i8* null
+define i64 @MulNonIntegerConst(i64 %a) sanitize_memory {
+  %mul = mul i64 %a, ptrtoint (i8** @X to i64)
+  ret i64 %mul
+}
+
+; CHECK-LABEL: @MulNonIntegerConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul i64 [[A]], 1
+; CHECK: store i64 [[B]], {{.*}}@__msan_retval_tls
+
+define <2 x i64> @MulNonIntegerVectorConst(<2 x i64> %a) sanitize_memory {
+  %mul = mul <2 x i64> %a, <i64 3072, i64 ptrtoint (i8** @X to i64)>
+  ret <2 x i64> %mul
+}
+
+; CHECK-LABEL: @MulNonIntegerVectorConst(
+; CHECK: [[A:%.*]] = load {{.*}} @__msan_param_tls
+; CHECK: [[B:%.*]] = mul <2 x i64> [[A]], <i64 1024, i64 1>
+; CHECK: store <2 x i64> [[B]], {{.*}}@__msan_retval_tls
diff --git a/test/Instrumentation/MemorySanitizer/origin-alignment.ll b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
index ce0dbfc71909..abd8dd0e533f 100644
--- a/test/Instrumentation/MemorySanitizer/origin-alignment.ll
+++ b/test/Instrumentation/MemorySanitizer/origin-alignment.ll
@@ -24,7 +24,7 @@ entry:
 ; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 8
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 8
 ; CHECK: ret void
 
 
@@ -39,7 +39,7 @@ entry:
 ; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 add (i64 and (i64 ptrtoint {{.*}} to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
 ; CHECK: ret void
 
 
@@ -54,7 +54,7 @@ entry:
 ; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
 ; CHECK: ret void
 
 
@@ -69,5 +69,6 @@ entry:
 ; CHECK-ORIGINS1: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN0:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK-ORIGINS2: [[ORIGIN:%[01-9a-z]+]] = call i32 @__msan_chain_origin(i32 [[ORIGIN0]])
-; CHECK: store i32 [[ORIGIN]],  i32* inttoptr (i64 and (i64 add (i64 and (i64 ptrtoint {{.*}} i64 -4) to i32*), align 4
+; CHECK: store i32 [[ORIGIN]], {{.*}}, align 4
+
 ; CHECK: ret void
diff --git a/test/Instrumentation/MemorySanitizer/return_from_main.ll b/test/Instrumentation/MemorySanitizer/return_from_main.ll
index 81dc88834db1..82e2d13bc09b 100644
--- a/test/Instrumentation/MemorySanitizer/return_from_main.ll
+++ b/test/Instrumentation/MemorySanitizer/return_from_main.ll
@@ -10,7 +10,7 @@ entry:
 
 declare i32 @f() sanitize_memory
 
-; CHECK: @main
+; CHECK-LABEL: @main
 ; CHECK: call i32 @f()
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: br i1
diff --git a/test/Instrumentation/MemorySanitizer/store-origin.ll b/test/Instrumentation/MemorySanitizer/store-origin.ll
index cdfe280bc8cb..42bfac20822f 100644
--- a/test/Instrumentation/MemorySanitizer/store-origin.ll
+++ b/test/Instrumentation/MemorySanitizer/store-origin.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; Check that debug info for origin propagation code is set correctly.
 
 ; Function Attrs: nounwind
-define void @Store(i32* nocapture %p, i32 %x) #0 {
+define void @Store(i32* nocapture %p, i32 %x) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %p, i64 0, metadata !11, metadata !DIExpression()), !dbg !16
   tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !12, metadata !DIExpression()), !dbg !16
@@ -27,19 +27,19 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204220)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 (204220)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "../2.cc", directory: "/tmp/build0")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "Store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @Store, variables: !10)
+!4 = distinct !DISubprogram(name: "Store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
 !5 = !DIFile(filename: "../2.cc", directory: "/tmp/build0")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8, !9}
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !{!11, !12}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 2, scope: !4, file: !5, type: !9)
+!11 = !DILocalVariable(name: "p", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "x", line: 1, arg: 2, scope: !4, file: !5, type: !9)
 !13 = !{i32 2, !"Dwarf Version", i32 4}
 !14 = !{i32 1, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.5.0 (204220)"}
@@ -52,7 +52,7 @@ attributes #1 = { nounwind readnone }
 !22 = !DILocation(line: 3, scope: !4)
 
 
-; CHECK: @Store
+; CHECK-LABEL: @Store
 ; CHECK: load {{.*}} @__msan_param_tls
 ; CHECK: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
 ; CHECK: store {{.*}}!dbg ![[DBG:[01-9]+]]
diff --git a/test/Instrumentation/MemorySanitizer/unreachable.ll b/test/Instrumentation/MemorySanitizer/unreachable.ll
index e9a79ce0d0e1..ac5aea9a0771 100644
--- a/test/Instrumentation/MemorySanitizer/unreachable.ll
+++ b/test/Instrumentation/MemorySanitizer/unreachable.ll
@@ -18,7 +18,7 @@ exit:
   ret i32 %z
 }
 
-; CHECK: @Func
+; CHECK-LABEL: @Func
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: ret i32 42
 
@@ -34,6 +34,6 @@ xxx:
   br label %zzz
 }
 
-; CHECK: @UnreachableLoop
+; CHECK-LABEL: @UnreachableLoop
 ; CHECK: store i32 0, {{.*}} @__msan_retval_tls
 ; CHECK: ret i32 0
diff --git a/test/Instrumentation/MemorySanitizer/vector_cvt.ll b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
index a7d5f2115015..55e91c74a316 100644
--- a/test/Instrumentation/MemorySanitizer/vector_cvt.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_cvt.ll
@@ -15,7 +15,7 @@ entry:
   ret i32 %0
 }
 
-; CHECK: @test_cvtsd2si
+; CHECK-LABEL: @test_cvtsd2si
 ; CHECK: [[S:%[_01-9a-z]+]] = extractelement <2 x i64> {{.*}}, i32 0
 ; CHECK: icmp ne {{.*}}[[S]], 0
 ; CHECK: br
@@ -33,7 +33,7 @@ entry:
   ret <2 x double> %0
 }
 
-; CHECK: @test_cvtsi2sd
+; CHECK-LABEL: @test_cvtsi2sd
 ; CHECK: [[Sa:%[_01-9a-z]+]] = load i32, i32* {{.*}} @__msan_param_tls
 ; CHECK: [[Sout0:%[_01-9a-z]+]] = insertelement <2 x i64> <i64 -1, i64 -1>, i64 {{.*}}, i32 1
 ; Clear low half of result shadow
@@ -54,7 +54,7 @@ entry:
   ret x86_mmx %0
 }
 
-; CHECK: @test_cvtps2pi
+; CHECK-LABEL: @test_cvtps2pi
 ; CHECK: extractelement <4 x i32> {{.*}}, i32 0
 ; CHECK: extractelement <4 x i32> {{.*}}, i32 1
 ; CHECK: [[S:%[_01-9a-z]+]] = or i32
diff --git a/test/Instrumentation/MemorySanitizer/vector_shift.ll b/test/Instrumentation/MemorySanitizer/vector_shift.ll
index 91e4bd53c6a9..d7b47f011b1d 100644
--- a/test/Instrumentation/MemorySanitizer/vector_shift.ll
+++ b/test/Instrumentation/MemorySanitizer/vector_shift.ll
@@ -25,7 +25,7 @@ entry:
   ret i64 %6
 }
 
-; CHECK: @test_mmx
+; CHECK-LABEL: @test_mmx
 ; CHECK: = icmp ne i64 {{.*}}, 0
 ; CHECK: [[C:%.*]] = sext i1 {{.*}} to i64
 ; CHECK: [[A:%.*]] = call x86_mmx @llvm.x86.mmx.psll.d(
@@ -41,7 +41,7 @@ entry:
   ret <8 x i16> %0
 }
 
-; CHECK: @test_sse2_scalar
+; CHECK-LABEL: @test_sse2_scalar
 ; CHECK: = icmp ne i32 {{.*}}, 0
 ; CHECK: = sext i1 {{.*}} to i128
 ; CHECK: = bitcast i128 {{.*}} to <8 x i16>
@@ -57,7 +57,7 @@ entry:
   ret <8 x i16> %0
 }
 
-; CHECK: @test_sse2
+; CHECK-LABEL: @test_sse2
 ; CHECK: = bitcast <8 x i16> {{.*}} to i128
 ; CHECK: = trunc i128 {{.*}} to i64
 ; CHECK: = icmp ne i64 {{.*}}, 0
@@ -77,7 +77,7 @@ entry:
   ret <4 x i32> %0
 }
 
-; CHECK: @test_avx2
+; CHECK-LABEL: @test_avx2
 ; CHECK: = icmp ne <4 x i32> {{.*}}, zeroinitializer
 ; CHECK: = sext <4 x i1> {{.*}} to <4 x i32>
 ; CHECK: = call <4 x i32> @llvm.x86.avx2.psllv.d(
@@ -91,7 +91,7 @@ entry:
   ret <8 x i32> %0
 }
 
-; CHECK: @test_avx2_256
+; CHECK-LABEL: @test_avx2_256
 ; CHECK: = icmp ne <8 x i32> {{.*}}, zeroinitializer
 ; CHECK: = sext <8 x i1> {{.*}} to <8 x i32>
 ; CHECK: = call <8 x i32> @llvm.x86.avx2.psllv.d.256(
diff --git a/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
index 45b4c8cb0a61..93d0f881625f 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage-dbg.ll
@@ -25,7 +25,7 @@ target triple = "x86_64-unknown-linux-gnu"
 %struct.A = type { i32 }
 
 ; Function Attrs: nounwind readonly uwtable
-define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 {
+define i32 @_ZN1A1fEv(%struct.A* nocapture readonly %this) #0 align 2 !dbg !13 {
 entry:
   tail call void @llvm.dbg.value(metadata %struct.A* %this, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
   %x = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 0, !dbg !21
@@ -43,7 +43,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!17, !18}
 !llvm.ident = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210251)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (210251)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !12, globals: !2, imports: !2)
 !1 = !DIFile(filename: "../1.cc", directory: "/code/llvm/build0")
 !2 = !{}
 !3 = !{!4}
@@ -56,9 +56,9 @@ attributes #1 = { nounwind readnone }
 !10 = !{!7, !11}
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !12 = !{!13}
-!13 = !DISubprogram(name: "f", linkageName: "_ZN1A1fEv", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !9, function: i32 (%struct.A*)* @_ZN1A1fEv, declaration: !8, variables: !14)
+!13 = distinct !DISubprogram(name: "f", linkageName: "_ZN1A1fEv", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !"_ZTS1A", type: !9, declaration: !8, variables: !14)
 !14 = !{!15}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, type: !16)
+!15 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !13, type: !16)
 !16 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !17 = !{i32 2, !"Dwarf Version", i32 4}
 !18 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Instrumentation/SanitizerCoverage/coverage.ll b/test/Instrumentation/SanitizerCoverage/coverage.ll
index 659c03040f2f..71fdbbb5ada7 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage.ll
@@ -29,8 +29,8 @@ entry:
 }
 
 ; CHECK0-NOT: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
-; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
-; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor }
+; CHECK1: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null }
+; CHECK2: @llvm.global_ctors = {{.*}}{ i32 2, void ()* @sancov.module_ctor, i8* null }
 
 ; CHECK0-NOT: call void @__sanitizer_cov(
 ; CHECK0-NOT: call void @__sanitizer_cov_module_init(
diff --git a/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
index f9b8e712688c..8c330ea5bddf 100644
--- a/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
+++ b/test/Instrumentation/SanitizerCoverage/coverage2-dbg.ll
@@ -25,7 +25,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: [[B]] = !DILocation(line: 3, column: 5, scope: !{{.*}})
 ; CHECK: [[C]] = !DILocation(line: 4, column: 1, scope: !{{.*}})
 
-define void @_Z3fooPi(i32* %a) #0 {
+define void @_Z3fooPi(i32* %a) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !11, metadata !DIExpression()), !dbg !15
   %tobool = icmp eq i32* %a, null, !dbg !16
@@ -49,18 +49,18 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (217079)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (217079)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "if.cc", directory: "FOO")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", linkageName: "_Z3fooPi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @_Z3fooPi, variables: !10)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooPi", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !10)
 !5 = !DIFile(filename: "if.cc", directory: "FOO")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8}
 !8 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !10 = !{!11}
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!11 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !12 = !{i32 2, !"Dwarf Version", i32 4}
 !13 = !{i32 2, !"Debug Info Version", i32 3}
 !14 = !{!"clang version 3.6.0 (217079)"}
diff --git a/test/Instrumentation/SanitizerCoverage/seh.ll b/test/Instrumentation/SanitizerCoverage/seh.ll
new file mode 100644
index 000000000000..ce18334ed207
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/seh.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -sancov -sanitizer-coverage-level=0 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -S | FileCheck %s
+; RUN: opt < %s -sancov -sanitizer-coverage-level=2 -sanitizer-coverage-block-threshold=0 -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+declare i32 @llvm.eh.typeid.for(i8*) #2
+declare i8* @llvm.frameaddress(i32)
+declare i8* @llvm.x86.seh.recoverfp(i8*, i8*)
+declare i8* @llvm.localrecover(i8*, i8*, i32)
+declare void @llvm.localescape(...) #1
+
+declare i32 @_except_handler3(...)
+declare void @may_throw(i32* %r)
+
+define i32 @main() sanitize_address personality i8* bitcast (i32 (...)* @_except_handler3 to i8*) {
+entry:
+  %r = alloca i32, align 4
+  %__exception_code = alloca i32, align 4
+  call void (...) @llvm.localescape(i32* nonnull %__exception_code)
+  %0 = bitcast i32* %r to i8*
+  store i32 0, i32* %r, align 4
+  invoke void @may_throw(i32* nonnull %r) #4
+          to label %__try.cont unwind label %lpad
+
+lpad:                                             ; preds = %entry
+  %1 = landingpad { i8*, i32 }
+          catch i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)
+  %2 = extractvalue { i8*, i32 } %1, 1
+  %3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ()* @"\01?filt$0@0@main@@" to i8*)) #1
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %__except, label %eh.resume
+
+__except:                                         ; preds = %lpad
+  store i32 1, i32* %r, align 4
+  br label %__try.cont
+
+__try.cont:                                       ; preds = %entry, %__except
+  %4 = load i32, i32* %r, align 4
+  ret i32 %4
+
+eh.resume:                                        ; preds = %lpad
+  resume { i8*, i32 } %1
+}
+
+; Check that we don't do any instrumentation.
+
+; CHECK-LABEL: define i32 @main()
+; CHECK-NOT: load atomic i32, i32* {{.*}} monotonic, align 4, !nosanitize
+; CHECK-NOT: call void @__sanitizer_cov
+; CHECK: ret i32
+
+; Function Attrs: nounwind
+define internal i32 @"\01?filt$0@0@main@@"() #1 {
+entry:
+  %0 = tail call i8* @llvm.frameaddress(i32 1)
+  %1 = tail call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @main to i8*), i8* %0)
+  %2 = tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* %1, i32 0)
+  %__exception_code = bitcast i8* %2 to i32*
+  %3 = getelementptr inbounds i8, i8* %0, i32 -20
+  %4 = bitcast i8* %3 to { i32*, i8* }**
+  %5 = load { i32*, i8* }*, { i32*, i8* }** %4, align 4
+  %6 = getelementptr inbounds { i32*, i8* }, { i32*, i8* }* %5, i32 0, i32 0
+  %7 = load i32*, i32** %6, align 4
+  %8 = load i32, i32* %7, align 4
+  store i32 %8, i32* %__exception_code, align 4
+  ret i32 1
+}
+
+; CHECK-LABEL: define internal i32 @"\01?filt$0@0@main@@"()
+; CHECK: tail call i8* @llvm.localrecover(i8* bitcast (i32 ()* @main to i8*), i8* {{.*}}, i32 0)
+
+define void @ScaleFilterCols_SSSE3(i8* %dst_ptr, i8* %src_ptr, i32 %dst_width, i32 %x, i32 %dx) sanitize_address {
+entry:
+  %dst_width.addr = alloca i32, align 4
+  store i32 %dst_width, i32* %dst_width.addr, align 4
+  %0 = call { i8*, i8*, i32, i32, i32 } asm sideeffect "", "=r,=r,={ax},=r,=r,=*rm,rm,rm,0,1,2,3,4,5,~{memory},~{cc},~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{dirflag},~{fpsr},~{flags}"(i32* nonnull %dst_width.addr, i32 %x, i32 %dx, i8* %dst_ptr, i8* %src_ptr, i32 0, i32 0, i32 0, i32 %dst_width)
+  ret void
+}
+
+define void @ScaleColsUp2_SSE2() sanitize_address {
+entry:
+  ret void
+}
diff --git a/test/Instrumentation/SanitizerCoverage/switch-tracing.ll b/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
new file mode 100644
index 000000000000..aac56dbdeb78
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/switch-tracing.ll
@@ -0,0 +1,56 @@
+; Test -sanitizer-coverage-experimental-trace-compares=1 (instrumenting a switch)
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-experimental-trace-compares=1  -S | FileCheck %s --check-prefix=CHECK
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+declare void @_Z3bari(i32)
+define void @foo(i32 %x) {
+entry:
+; CHECK: __sancov_gen_cov_switch_values = internal global [5 x i64] [i64 3, i64 32, i64 1, i64 101, i64 1001]
+; CHECK: [[TMP:%[0-9]*]] = zext i32 %x to i64
+; CHECK-NEXT: call void @__sanitizer_cov_trace_switch(i64 [[TMP]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* @__sancov_gen_cov_switch_values, i32 0, i32 0))
+  switch i32 %x, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 101, label %sw.bb.1
+    i32 1001, label %sw.bb.2
+  ]
+
+sw.bb:                                            ; preds = %entry
+  tail call void @_Z3bari(i32 4)
+  br label %sw.epilog
+
+sw.bb.1:                                          ; preds = %entry
+  tail call void @_Z3bari(i32 5)
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  tail call void @_Z3bari(i32 6)
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb.2, %sw.bb.1, %sw.bb
+  ret void
+}
+
+define void @fooi72(i72 %x) {
+entry:
+  switch i72 %x, label %sw.epilog [
+    i72 1, label %sw.bb
+    i72 101, label %sw.bb.1
+    i72 1001, label %sw.bb.2
+  ]
+
+sw.bb:                                            ; preds = %entry
+  tail call void @_Z3bari(i32 4)
+  br label %sw.epilog
+
+sw.bb.1:                                          ; preds = %entry
+  tail call void @_Z3bari(i32 5)
+  br label %sw.epilog
+
+sw.bb.2:                                          ; preds = %entry
+  tail call void @_Z3bari(i32 6)
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %entry, %sw.bb.2, %sw.bb.1, %sw.bb
+  ret void
+}
diff --git a/test/Instrumentation/ThreadSanitizer/atomic.ll b/test/Instrumentation/ThreadSanitizer/atomic.ll
index db01bab8fe53..a10ca6c91a89 100644
--- a/test/Instrumentation/ThreadSanitizer/atomic.ll
+++ b/test/Instrumentation/ThreadSanitizer/atomic.ll
@@ -1994,5 +1994,5 @@ entry:
 !3 = !{}
 !4 = !DISubroutineType(types: !3)
 !5 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
 !7 = !DILocation(line: 100, column: 1, scope: !6)
diff --git a/test/JitListener/multiple.ll b/test/JitListener/multiple.ll
index 2ba06ef1f0db..1f69ddae4f53 100644
--- a/test/JitListener/multiple.ll
+++ b/test/JitListener/multiple.ll
@@ -50,7 +50,7 @@
 ; ModuleID = 'multiple.c'
 
 ; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -63,7 +63,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind uwtable
-define i32 @bar(i32 %a) #0 {
+define i32 @bar(i32 %a) #0 !dbg !9 {
 entry:
   %retval = alloca i32, align 4
   %a.addr = alloca i32, align 4
@@ -89,7 +89,7 @@ return:                                           ; preds = %if.end, %if.then
 }
 
 ; Function Attrs: nounwind uwtable
-define i32 @fubar(i32 %a) #0 {
+define i32 @fubar(i32 %a) #0 !dbg !10 {
 entry:
   %retval = alloca i32, align 4
   %a.addr = alloca i32, align 4
@@ -125,27 +125,27 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!11, !12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "multiple.c", directory: "F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener")
 !2 = !{}
 !3 = !{!4, !9, !10}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "multiple.c", directory: "F:CusersCakaylorCllvm-sCllvmCtestCJitListener")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, function: i32 (i32)* @bar, variables: !2)
-!10 = !DISubprogram(name: "fubar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !6, function: i32 (i32)* @fubar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 5, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
+!10 = distinct !DISubprogram(name: "fubar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 12, file: !1, scope: !5, type: !6, variables: !2)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32 2, !"Debug Info Version", i32 3}
 !13 = !{i32 1, !"PIC Level", i32 2}
 !14 = !{!"clang version 3.6.0 (trunk)"}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !16 = !DIExpression()
 !17 = !DILocation(line: 1, column: 13, scope: !4)
 !18 = !DILocation(line: 2, column: 10, scope: !4)
 !19 = !DILocation(line: 2, column: 3, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
+!20 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !9, file: !5, type: !8)
 !21 = !DILocation(line: 5, column: 13, scope: !9)
 !22 = !DILocation(line: 6, column: 7, scope: !23)
 !23 = distinct !DILexicalBlock(line: 6, column: 7, file: !1, scope: !9)
@@ -156,7 +156,7 @@ attributes #1 = { nounwind readnone }
 !28 = !DILocation(line: 9, column: 10, scope: !9)
 !29 = !DILocation(line: 9, column: 3, scope: !9)
 !30 = !DILocation(line: 10, column: 1, scope: !9)
-!31 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 12, arg: 1, scope: !10, file: !5, type: !8)
+!31 = !DILocalVariable(name: "a", line: 12, arg: 1, scope: !10, file: !5, type: !8)
 !32 = !DILocation(line: 12, column: 15, scope: !10)
 !33 = !DILocation(line: 13, column: 11, scope: !10)
 !34 = !DILocation(line: 13, column: 3, scope: !10)
diff --git a/test/JitListener/simple.ll b/test/JitListener/simple.ll
index d98eef54e4ac..bfa11b7e533e 100644
--- a/test/JitListener/simple.ll
+++ b/test/JitListener/simple.ll
@@ -16,7 +16,7 @@
 ; ModuleID = 'simple.c'
 
 ; Function Attrs: nounwind uwtable
-define i32 @foo(i32 %a) #0 {
+define i32 @foo(i32 %a) #0 !dbg !4 {
 entry:
   %a.addr = alloca i32, align 4
   store i32 %a, i32* %a.addr, align 4
@@ -35,11 +35,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "simple.c", directory: "F:\5Cusers\5Cakaylor\5Cllvm-s\5Cllvm\5Ctest\5CJitListener")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "simple.c", directory: "F:CusersCakaylorCllvm-sCllvmCtestCJitListener")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
@@ -47,7 +47,7 @@ attributes #1 = { nounwind readnone }
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.6.0 (trunk)"}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!12 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !13 = !DIExpression()
 !14 = !DILocation(line: 1, column: 13, scope: !4)
 !15 = !DILocation(line: 2, column: 10, scope: !4)
diff --git a/test/LTO/X86/Inputs/invalid.ll.bc b/test/LTO/X86/Inputs/invalid.ll.bc
index a85c3644b3abb93a450b18dcfa20b5fbf01ed143..60c7afffbc2308e4ceb58663b5e84d21fba6e1c1 100644
GIT binary patch
delta 502
zcmX@Zw1L&o$@#!NQxOIRRt5%!Bpw45pU8dB-hQ^z4eb?RJpmL~<Y8b~1eD`$Qea>L
z(!vu(wamP^jEp)wCWt6FyLBvOkTi07;=wmzqT-PN$H2y(hs{sBSR~!p0wow2n1MQ^
zJ0AQ80``W9g~E*76I&H@M2cAw0+<5WnB0KU3=Gcm7#JGBEFhs`H$hQhqvL`V8Vn2$
zI&5JR_eR&ZOP&$rdozLWp#guM0srF<d>;b%j0KL!-cyvhp(uMOQ7+A((|S*{^#y0k
zEzCB1nr&K`t<QWADNA6l&R{psXfFbi4ebRJ+6xlc%QV=_ez2BRw3j!u$8)q7O0*Zv
zXs?{nsPpav-@6BVUk~uTF<?$Qu~6p8LYXsQ@=krC%qc~gBaO1B64|azaJD?*Y&io=
z?l^2Q#o3~R*?P)heW0y>nyjxdTW>hbb3C)XAfmmfpt&T2y&|E#M54W9LA&I+VE%Uu
z{GS5&o=o6-?7;s}f$t5_8wwh-cMi%Oa+JHID08P#?#=_5Cl6&`G0LS$1X^rxwwlvy
z&7i=*uo4)SY{1ar<|@(vx}3c#qF{!zEztfA&XyOLCu=dvX%qsLu>+L^@d+rUG%b-)
Za5?eB!_uK+!a}ATHUXXn6QCRj003g2t5pC1

delta 144
zcmdnMdWK2V$@#!NMIHu*00ssI?j{8WCLk>|(NN3Clgr3O#e-{tqVkcT1|CVpA{F)%
zCj^pKELh=i>;ofERiFd|12d48?s)JY2-q1WP84S3nz&R!LZFx>A%H2s4Jhd>#>mhB
kq!}1K8Hi8(7%jvOWELtgfbC&(FkraEFgc4!js>Iw0BZaq0RR91

diff --git a/test/LTO/X86/bcsection.ll b/test/LTO/X86/bcsection.ll
index e65ade623536..bcd6bc328488 100644
--- a/test/LTO/X86/bcsection.ll
+++ b/test/LTO/X86/bcsection.ll
@@ -15,6 +15,8 @@
 ; RUN: llvm-lto -exported-symbol=main -exported-symbol=_main -o %T/bcsection.macho.o %T/bcsection.macho.bco
 ; RUN: llvm-nm %T/bcsection.macho.o | FileCheck %s
 
+; REQUIRES: default_triple
+
 ; CHECK: main
 define i32 @main() {
   ret i32 0
diff --git a/test/LTO/X86/current-section.ll b/test/LTO/X86/current-section.ll
index f79b378318df..49eee49ae623 100644
--- a/test/LTO/X86/current-section.ll
+++ b/test/LTO/X86/current-section.ll
@@ -1,4 +1,5 @@
 ; RUN: llvm-as < %s >%t1
 ; RUN: llvm-lto -o %t2 %t1
+; REQUIRES: default_triple
 
 module asm ".align 4"
diff --git a/test/LTO/X86/diagnostic-handler-noexit.ll b/test/LTO/X86/diagnostic-handler-noexit.ll
index be768c900f14..597419ff64f5 100644
--- a/test/LTO/X86/diagnostic-handler-noexit.ll
+++ b/test/LTO/X86/diagnostic-handler-noexit.ll
@@ -4,10 +4,10 @@
 
 ; RUN: llvm-as <%s >%t1
 ; RUN: llvm-as <%s >%t2
-; RUN: not llvm-lto -o /dev/null %t1 %t2 2>&1 | FileCheck %s
+; RUN: not llvm-lto -use-diagnostic-handler -o /dev/null %t1 %t2 2>&1 | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: Linking globals named 'goodboy': symbol multiply defined!
+; CHECK: llvm-lto: error: Linking globals named 'goodboy': symbol multiply defined!
 ; CHECK: llvm-lto{{.*}}: error adding file
 @goodboy = global i32 3203383023, align 4    ; 0xbeefbeef
diff --git a/test/LTO/X86/diagnostic-handler-remarks.ll b/test/LTO/X86/diagnostic-handler-remarks.ll
index 4da9101117ec..a368a677b2fe 100644
--- a/test/LTO/X86/diagnostic-handler-remarks.ll
+++ b/test/LTO/X86/diagnostic-handler-remarks.ll
@@ -9,7 +9,7 @@
 
 ; RUN: llvm-lto -pass-remarks=inline -use-diagnostic-handler \
 ; RUN:         -exported-symbol _main -o %t.o %t.bc 2>&1 | \
-; RUN:     FileCheck %s -allow-empty -check-prefix=REMARKS
+; RUN:     FileCheck %s -allow-empty -check-prefix=REMARKS_DH
 ; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
 
 ; Confirm that -pass-remarks are not printed by default.
@@ -24,14 +24,19 @@
 ; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
 
 ; REMARKS: remark:
+; REMARKS_DH: llvm-lto: remark:
 ; CHECK-NOT: remark:
+; CHECK-NOT: llvm-lto:
 ; NM-NOT: foo
 ; NM: main
 
 target triple = "x86_64-apple-darwin"
 
+declare i32 @bar()
+
 define i32 @foo() {
-  ret i32 7
+  %a = call i32 @bar()
+  ret i32 %a
 }
 
 define i32 @main() {
diff --git a/test/LTO/X86/disable-verify.ll b/test/LTO/X86/disable-verify.ll
new file mode 100644
index 000000000000..5d2508a96c07
--- /dev/null
+++ b/test/LTO/X86/disable-verify.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s >%t.bc
+; RUN: llvm-lto -debug-pass=Arguments -exported-symbol=_f -o /dev/null %t.bc 2>&1 -disable-verify | FileCheck %s
+; RUN: llvm-lto -debug-pass=Arguments -exported-symbol=_f -o /dev/null %t.bc 2>&1 | FileCheck %s -check-prefix=VERIFY
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; -disable-verify should disable verification from the optimization pipeline.
+; CHECK: Pass Arguments: -verify -internalize
+; CHECK-NOT: -verify
+
+; VERIFY: Pass Arguments: -verify -internalize
+; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify
+
+define void @f() {
+entry:
+  ret void
+}
diff --git a/test/LTO/X86/invalid.ll b/test/LTO/X86/invalid.ll
index 5b6996d4ad35..b9d0bca091d0 100644
--- a/test/LTO/X86/invalid.ll
+++ b/test/LTO/X86/invalid.ll
@@ -1,4 +1,4 @@
 ; RUN: not llvm-lto %S/Inputs/invalid.ll.bc 2>&1 | FileCheck %s
 
 
-; CHECK: llvm-lto{{.*}}: error loading file '{{.*}}/Inputs/invalid.ll.bc': Unknown attribute kind (48)
+; CHECK: llvm-lto{{.*}}: error loading file '{{.*}}/Inputs/invalid.ll.bc': Unknown attribute kind (52)
diff --git a/test/LTO/X86/list-symbols.ll b/test/LTO/X86/list-symbols.ll
index 41b7d00deecc..1d98b390b18f 100644
--- a/test/LTO/X86/list-symbols.ll
+++ b/test/LTO/X86/list-symbols.ll
@@ -1,6 +1,7 @@
 ; RUN: llvm-as -o %T/1.bc %s
 ; RUN: llvm-as -o %T/2.bc %S/Inputs/list-symbols.ll
 ; RUN: llvm-lto -list-symbols-only %T/1.bc %T/2.bc | FileCheck %s
+; REQUIRES: default_triple
 
 ; CHECK-LABEL: 1.bc:
 ; CHECK-DAG: foo
diff --git a/test/LTO/X86/llvm-lto-output.ll b/test/LTO/X86/llvm-lto-output.ll
new file mode 100644
index 000000000000..56a9a5284b1e
--- /dev/null
+++ b/test/LTO/X86/llvm-lto-output.ll
@@ -0,0 +1,21 @@
+; Test the various output formats of the llvm-lto utility
+;
+; RUN: llvm-as < %s > %t1
+;
+; RUN: llvm-lto -exported-symbol=main -save-merged-module -filetype=asm -o %t2 %t1
+; RUN: llvm-dis -o - %t2.merged.bc | FileCheck %s
+; CHECK: @main()
+
+; RUN: FileCheck --check-prefix=ASM %s < %t2
+; RUN: llvm-lto -exported-symbol=main -filetype=obj -o %t2 %t1
+; RUN: llvm-objdump -d %t2 | FileCheck --check-prefix=ASM %s
+; ASM: main:
+;
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() {
+entry:
+  ret i32 23
+}
+
diff --git a/test/LTO/X86/parallel.ll b/test/LTO/X86/parallel.ll
new file mode 100644
index 000000000000..9a8494c127f1
--- /dev/null
+++ b/test/LTO/X86/parallel.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc
+; RUN: llvm-nm %t.o.0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-nm %t.o.1 | FileCheck --check-prefix=CHECK1 %s
+
+; FIXME: Investigate test failures on these architecures.
+; UNSUPPORTED: mips, mipsel, aarch64, powerpc64
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK0-NOT: bar
+; CHECK0: T foo
+; CHECK0-NOT: bar
+define void @foo() {
+  call void @bar()
+  ret void
+}
+
+; CHECK1-NOT: foo
+; CHECK1: T bar
+; CHECK1-NOT: foo
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/LibDriver/thin.test b/test/LibDriver/thin.test
new file mode 100644
index 000000000000..4ab9d61952bf
--- /dev/null
+++ b/test/LibDriver/thin.test
@@ -0,0 +1,9 @@
+RUN: echo foo > %t
+
+RUN: llvm-lib  -out:%t.a %t
+RUN: FileCheck --check-prefix=FAT %s < %t.a
+FAT: !<arch>
+
+RUN: llvm-lib  -out:%t.thin.a -llvmlibthin %t
+RUN: FileCheck --check-prefix=THIN %s < %t.thin.a
+THIN: !<thin>
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index 1e25d3eef0c6..1844e936baf2 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -2,9 +2,9 @@
 ; RUN: llvm-as %s -o %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
 
-; CHECK: @bar = global i32 ()* @foo2
+; CHECK: @bar = global i32 ()* @foo.2
 
-; CHECK:      define internal i32 @foo2() {
+; CHECK:      define internal i32 @foo.2() {
 ; CHECK-NEXT:   ret i32 7
 ; CHECK-NEXT: }
 
diff --git a/test/Linker/2003-04-23-LinkOnceLost.ll b/test/Linker/2003-04-23-LinkOnceLost.ll
index e4528906e024..e2b600c877c0 100644
--- a/test/Linker/2003-04-23-LinkOnceLost.ll
+++ b/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -1,11 +1,10 @@
-; This fails because the linker renames the non-opaque type not the opaque 
-; one...
-
-; RUN: echo " define linkonce void @foo() { ret void } " | \
-; RUN:   llvm-as -o %t.2.bc
-; RUN: llvm-as %s -o %t.1.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; RUN: echo " define linkonce void @foo() { ret void } " > %t.ll
+; RUN: llvm-link %s %t.ll -S | FileCheck %s
 ; CHECK: linkonce{{.*}}foo
 
 declare void @foo()
 
+define void @use_foo() {
+  call void @foo()
+  ret void
+}
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index 0261fe3a9208..f511be1bf22f 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -1,20 +1,18 @@
-; RUN: llvm-as %S/Inputs/2003-05-31-LinkerRename.ll -o %t.1.bc
-; RUN: llvm-as  %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | FileCheck %s
+; RUN: llvm-link %S/Inputs/2003-05-31-LinkerRename.ll %s -S | FileCheck %s
 
-; CHECK: @bar = global i32 ()* @foo2
+; CHECK: @bar = global i32 ()* @foo.2
 
-; CHECK:      define internal i32 @foo2() {
+; CHECK:      define internal i32 @foo.2() {
 ; CHECK-NEXT:   ret i32 7
 ; CHECK-NEXT: }
 
-; CHECK: declare i32 @foo()
-
 ; CHECK:      define i32 @test() {
 ; CHECK-NEXT:   %X = call i32 @foo()
 ; CHECK-NEXT:   ret i32 %X
 ; CHECK-NEXT: }
 
+; CHECK: declare i32 @foo()
+
 declare i32 @foo()
 
 define i32 @test() {
diff --git a/test/Linker/2008-03-05-AliasReference.ll b/test/Linker/2008-03-05-AliasReference.ll
index 078479424b02..1423049c8172 100644
--- a/test/Linker/2008-03-05-AliasReference.ll
+++ b/test/Linker/2008-03-05-AliasReference.ll
@@ -8,7 +8,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-unknown-linux-gnu"
 @foo = weak global i32 0		; <i32*> [#uses=1]
 
-@bar = weak alias i32* @foo		; <i32*> [#uses=1]
+@bar = weak alias i32, i32* @foo		; <i32*> [#uses=1]
 
 define i32 @baz() nounwind  {
 entry:
diff --git a/test/Linker/2008-07-06-AliasFnDecl.ll b/test/Linker/2008-07-06-AliasFnDecl.ll
index 8e8c8454d941..555899a31acb 100644
--- a/test/Linker/2008-07-06-AliasFnDecl.ll
+++ b/test/Linker/2008-07-06-AliasFnDecl.ll
@@ -3,7 +3,7 @@
 ; RUN: llvm-as %p/2008-07-06-AliasFnDecl2.ll -o %t2.bc
 ; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
 
-@b = alias void ()* @a
+@b = alias void (), void ()* @a
 
 define void @a() nounwind  {
 entry:
diff --git a/test/Linker/2008-07-06-AliasWeakDest.ll b/test/Linker/2008-07-06-AliasWeakDest.ll
index e631175444c0..8db492363aa4 100644
--- a/test/Linker/2008-07-06-AliasWeakDest.ll
+++ b/test/Linker/2008-07-06-AliasWeakDest.ll
@@ -7,9 +7,9 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
-@sched_clock = alias i64 ()* @native_sched_clock
+@sched_clock = alias i64 (), i64 ()* @native_sched_clock
 
-@foo = alias i32* @realfoo
+@foo = alias i32, i32* @realfoo
 @realfoo = global i32 0
 
 define i64 @native_sched_clock() nounwind  {
diff --git a/test/Linker/2009-09-03-mdnode.ll b/test/Linker/2009-09-03-mdnode.ll
index ec444d316b78..77c6b2d93faa 100644
--- a/test/Linker/2009-09-03-mdnode.ll
+++ b/test/Linker/2009-09-03-mdnode.ll
@@ -26,6 +26,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
-!0 = !DISubprogram(name: "main", linkageName: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
+!0 = distinct !DISubprogram(name: "main", linkageName: "main", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
 !2 = !DIFile(filename: "a.c", directory: "/home/rich/ellcc/test/source")
diff --git a/test/Linker/2009-09-03-mdnode2.ll b/test/Linker/2009-09-03-mdnode2.ll
index d9d52680b7b0..69b8d6595eb3 100644
--- a/test/Linker/2009-09-03-mdnode2.ll
+++ b/test/Linker/2009-09-03-mdnode2.ll
@@ -21,6 +21,6 @@ declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
 
 declare void @llvm.dbg.region.end(metadata) nounwind readnone
 
-!0 = !DISubprogram(name: "f", linkageName: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
+!0 = distinct !DISubprogram(name: "f", linkageName: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "ellcc 0.1.0", isOptimized: true, emissionKind: 0, file: !2)
 !2 = !DIFile(filename: "b.c", directory: "/home/rich/ellcc/test/source")
diff --git a/test/Linker/2011-08-04-DebugLoc.ll b/test/Linker/2011-08-04-DebugLoc.ll
index 710ddf7774ff..71e89a5072e5 100644
--- a/test/Linker/2011-08-04-DebugLoc.ll
+++ b/test/Linker/2011-08-04-DebugLoc.ll
@@ -9,7 +9,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !1 {
   ret i32 42, !dbg !6
 }
 
@@ -17,8 +17,8 @@ define i32 @foo() nounwind ssp {
 !llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
-!1 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3, function: i32 ()* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
+!1 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3)
 !2 = !DIFile(filename: "a.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
diff --git a/test/Linker/2011-08-04-DebugLoc2.ll b/test/Linker/2011-08-04-DebugLoc2.ll
index bc524dae91de..c5f77ac69cda 100644
--- a/test/Linker/2011-08-04-DebugLoc2.ll
+++ b/test/Linker/2011-08-04-DebugLoc2.ll
@@ -6,7 +6,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
-define i32 @bar() nounwind ssp {
+define i32 @bar() nounwind ssp !dbg !1 {
   ret i32 21, !dbg !6
 }
 
@@ -14,8 +14,8 @@ define i32 @bar() nounwind ssp {
 !llvm.module.flags = !{!11}
 !llvm.dbg.sp = !{!1}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
-!1 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3, function: i32 ()* @bar)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-209.11) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !9, retainedTypes: !9, subprograms: !10)
+!1 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !2, type: !3)
 !2 = !DIFile(filename: "b.c", directory: "/private/tmp")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
diff --git a/test/Linker/2011-08-04-Metadata.ll b/test/Linker/2011-08-04-Metadata.ll
index fc1af754abd4..a685142ab715 100644
--- a/test/Linker/2011-08-04-Metadata.ll
+++ b/test/Linker/2011-08-04-Metadata.ll
@@ -13,7 +13,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @x = internal global i32 0, align 4
 
-define void @foo() nounwind uwtable ssp {
+define void @foo() nounwind uwtable ssp !dbg !1 {
 entry:
   store i32 1, i32* @x, align 4, !dbg !7
   ret void, !dbg !7
@@ -24,8 +24,8 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
-!1 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @foo)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
+!1 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3)
 !2 = !DIFile(filename: "/tmp/one.c", directory: "/Volumes/Lalgate/Slate/D")
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
diff --git a/test/Linker/2011-08-04-Metadata2.ll b/test/Linker/2011-08-04-Metadata2.ll
index 5eb231b2e364..07baebff4f4e 100644
--- a/test/Linker/2011-08-04-Metadata2.ll
+++ b/test/Linker/2011-08-04-Metadata2.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 @x = internal global i32 0, align 4
 
-define void @bar() nounwind uwtable ssp {
+define void @bar() nounwind uwtable ssp !dbg !1 {
 entry:
   store i32 1, i32* @x, align 4, !dbg !7
   ret void, !dbg !7
@@ -19,8 +19,8 @@ entry:
 !llvm.dbg.sp = !{!1}
 !llvm.dbg.gv = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
-!1 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3, function: void ()* @bar)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 ()", isOptimized: true, emissionKind: 0, file: !9, enums: !{}, retainedTypes: !{}, subprograms: !10)
+!1 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !9, scope: !2, type: !3)
 !2 = !DIFile(filename: "/tmp/two.c", directory: "/Volumes/Lalgate/Slate/D")
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
diff --git a/test/Linker/2011-08-18-unique-class-type.ll b/test/Linker/2011-08-18-unique-class-type.ll
index 1692a90bf7e6..e466bceb0cd3 100644
--- a/test/Linker/2011-08-18-unique-class-type.ll
+++ b/test/Linker/2011-08-18-unique-class-type.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 %"class.N1::A" = type { i8 }
 
-define void @_Z3fooN2N11AE() nounwind uwtable ssp {
+define void @_Z3fooN2N11AE() nounwind uwtable ssp !dbg !5 {
 entry:
   %mya = alloca %"class.N1::A", align 1
   call void @llvm.dbg.declare(metadata %"class.N1::A"* %mya, metadata !9, metadata !DIExpression()), !dbg !13
@@ -20,15 +20,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3fooN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !16, scope: !6, type: !7, function: void ()* @_Z3fooN2N11AE)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !16, scope: !6, type: !7)
 !6 = !DIFile(filename: "n1.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "mya", line: 4, arg: 1, scope: !5, file: !6, type: !10)
+!9 = !DILocalVariable(name: "mya", line: 4, arg: 1, scope: !5, file: !6, type: !10)
 !10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
 !11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
 !12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-class-type2.ll b/test/Linker/2011-08-18-unique-class-type2.ll
index 1befb4deacc9..8821dd37fadf 100644
--- a/test/Linker/2011-08-18-unique-class-type2.ll
+++ b/test/Linker/2011-08-18-unique-class-type2.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 %"class.N1::A" = type { i8 }
 
-define void @_Z3barN2N11AE() nounwind uwtable ssp {
+define void @_Z3barN2N11AE() nounwind uwtable ssp !dbg !5 {
 entry:
   %youra = alloca %"class.N1::A", align 1
   call void @llvm.dbg.declare(metadata %"class.N1::A"* %youra, metadata !9, metadata !DIExpression()), !dbg !13
@@ -18,15 +18,15 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !16, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "bar", linkageName: "_Z3barN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scope: !6, type: !7, function: void ()* @_Z3barN2N11AE)
+!5 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barN2N11AE", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scope: !6, type: !7)
 !6 = !DIFile(filename: "n2.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "youra", line: 4, arg: 1, scope: !5, file: !6, type: !10)
+!9 = !DILocalVariable(name: "youra", line: 4, arg: 1, scope: !5, file: !6, type: !10)
 !10 = !DICompositeType(tag: DW_TAG_class_type, name: "A", line: 3, size: 8, align: 8, file: !17, scope: !11, elements: !2)
 !11 = !DINamespace(name: "N1", line: 2, file: !17, scope: null)
 !12 = !DIFile(filename: "./n.h", directory: "/private/tmp")
diff --git a/test/Linker/2011-08-18-unique-debug-type.ll b/test/Linker/2011-08-18-unique-debug-type.ll
index b1da9354f8ce..11a1c4ecb731 100644
--- a/test/Linker/2011-08-18-unique-debug-type.ll
+++ b/test/Linker/2011-08-18-unique-debug-type.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
-define i32 @foo() nounwind uwtable ssp {
+define i32 @foo() nounwind uwtable ssp !dbg !5 {
 entry:
   ret i32 1, !dbg !10
 }
@@ -12,11 +12,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @foo)
+!5 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7)
 !6 = !DIFile(filename: "one.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
diff --git a/test/Linker/2011-08-18-unique-debug-type2.ll b/test/Linker/2011-08-18-unique-debug-type2.ll
index 40958dabf880..5b68ba0bf295 100644
--- a/test/Linker/2011-08-18-unique-debug-type2.ll
+++ b/test/Linker/2011-08-18-unique-debug-type2.ll
@@ -4,7 +4,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-macosx10.7.0"
 
-define i32 @bar() nounwind uwtable ssp {
+define i32 @bar() nounwind uwtable ssp !dbg !5 {
 entry:
   ret i32 2, !dbg !10
 }
@@ -12,11 +12,11 @@ entry:
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 137954)", isOptimized: true, emissionKind: 0, file: !12, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !1 = !{!2}
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7, function: i32 ()* @bar)
+!5 = distinct !DISubprogram(name: "bar", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !12, scope: !6, type: !7)
 !6 = !DIFile(filename: "two.c", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
diff --git a/test/Linker/ConstantGlobals.ll b/test/Linker/ConstantGlobals.ll
index 49f86a51bd7f..58c0d711a07e 100644
--- a/test/Linker/ConstantGlobals.ll
+++ b/test/Linker/ConstantGlobals.ll
@@ -6,3 +6,7 @@
 
 ; CHECK-DAG: @Y = external global [1 x i32]
 @Y = external global [1 x i32]
+
+define [1 x i32]* @use-Y() {
+  ret [1 x i32] *@Y
+}
diff --git a/test/Linker/DbgDeclare.ll b/test/Linker/DbgDeclare.ll
index a6ffa915c071..23c00a32dbc0 100644
--- a/test/Linker/DbgDeclare.ll
+++ b/test/Linker/DbgDeclare.ll
@@ -14,7 +14,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
-define i32 @main(i32 %argc, i8** %argv) uwtable ssp {
+define i32 @main(i32 %argc, i8** %argv) uwtable ssp !dbg !5 {
 entry:
   %retval = alloca i32, align 4
   %argc.addr = alloca i32, align 4
@@ -37,10 +37,10 @@ declare void @test(i32, i8**)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!21}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !20, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !20, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, function: i32 (i32, i8**)* @main, variables: !2)
+!5 = distinct !DISubprogram(name: "main", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !20, scope: null, type: !7, variables: !2)
 !6 = !DIFile(filename: "main.cpp", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9, !9, !10}
@@ -49,9 +49,9 @@ declare void @test(i32, i8**)
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
 !12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 3, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "argc", line: 3, arg: 1, scope: !5, file: !6, type: !9)
 !15 = !DILocation(line: 3, scope: !5)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 3, arg: 2, scope: !5, file: !6, type: !10)
+!16 = !DILocalVariable(name: "argv", line: 3, arg: 2, scope: !5, file: !6, type: !10)
 !17 = !DILocation(line: 5, scope: !18)
 !18 = distinct !DILexicalBlock(line: 4, column: 0, file: !20, scope: !5)
 !19 = !DILocation(line: 6, scope: !18)
diff --git a/test/Linker/DbgDeclare2.ll b/test/Linker/DbgDeclare2.ll
index b716833ecc66..2335f126d8e4 100644
--- a/test/Linker/DbgDeclare2.ll
+++ b/test/Linker/DbgDeclare2.ll
@@ -5,7 +5,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
-define void @test(i32 %argc, i8** %argv) uwtable ssp {
+define void @test(i32 %argc, i8** %argv) uwtable ssp !dbg !5 {
 entry:
   %argc.addr = alloca i32, align 4
   %argv.addr = alloca i8**, align 8
@@ -50,10 +50,10 @@ declare i32 @puts(i8*)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !25, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 173515)", isOptimized: true, emissionKind: 0, file: !25, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2)
 !2 = !{}
 !3 = !{!5}
-!5 = !DISubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, function: void (i32, i8**)* @test, variables: !2)
+!5 = distinct !DISubprogram(name: "print_args", linkageName: "test", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !26, scope: null, type: !7, variables: !2)
 !6 = !DIFile(filename: "test.cpp", directory: "/private/tmp")
 !7 = !DISubroutineType(types: !8)
 !8 = !{null, !9, !10}
@@ -62,10 +62,10 @@ declare i32 @puts(i8*)
 !11 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !12)
 !12 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 4, arg: 1, scope: !5, file: !6, type: !9)
+!14 = !DILocalVariable(name: "argc", line: 4, arg: 1, scope: !5, file: !6, type: !9)
 !15 = !DILocation(line: 4, scope: !5)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 4, arg: 2, scope: !5, file: !6, type: !10)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !18, file: !6, type: !9)
+!16 = !DILocalVariable(name: "argv", line: 4, arg: 2, scope: !5, file: !6, type: !10)
+!17 = !DILocalVariable(name: "i", line: 6, scope: !18, file: !6, type: !9)
 !18 = distinct !DILexicalBlock(line: 6, column: 0, file: !26, scope: !19)
 !19 = distinct !DILexicalBlock(line: 5, column: 0, file: !26, scope: !5)
 !20 = !DILocation(line: 6, scope: !18)
diff --git a/test/Linker/Inputs/PR8300.b.ll b/test/Linker/Inputs/PR8300.b.ll
index 9e538f5d286a..ce17f5f0dbe1 100644
--- a/test/Linker/Inputs/PR8300.b.ll
+++ b/test/Linker/Inputs/PR8300.b.ll
@@ -1,7 +1,7 @@
 %foo = type { [8 x i8] }
 %bar = type { [9 x i8] }
 
-@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+@zed = alias void (%foo*), bitcast (void (%bar*)* @xyz to void (%foo*)*)
 
 define void @xyz(%bar* %this) {
 entry:
diff --git a/test/Linker/Inputs/alias.ll b/test/Linker/Inputs/alias.ll
index f379476e7654..d4a734f58251 100644
--- a/test/Linker/Inputs/alias.ll
+++ b/test/Linker/Inputs/alias.ll
@@ -1,3 +1,3 @@
 @zed = global i32 42
-@foo = alias i32* @zed
-@foo2 = alias bitcast (i32* @zed to i16*)
+@foo = alias i32, i32* @zed
+@foo2 = alias i16, bitcast (i32* @zed to i16*)
diff --git a/test/Linker/Inputs/available_externally_over_decl.ll b/test/Linker/Inputs/available_externally_over_decl.ll
new file mode 100644
index 000000000000..6bd0a939957b
--- /dev/null
+++ b/test/Linker/Inputs/available_externally_over_decl.ll
@@ -0,0 +1,10 @@
+@h = global void ()* @f
+@h2 = global void ()* @g
+
+define available_externally void @f() {
+  ret void
+}
+
+define available_externally void @g() {
+  ret void
+}
diff --git a/test/Linker/Inputs/comdat11.ll b/test/Linker/Inputs/comdat11.ll
new file mode 100644
index 000000000000..5b7f74cf0b24
--- /dev/null
+++ b/test/Linker/Inputs/comdat11.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+define void @zed() {
+  call void @bar()
+  ret void
+}
+define void @bar() comdat($foo) {
+  ret void
+}
diff --git a/test/Linker/Inputs/comdat13.ll b/test/Linker/Inputs/comdat13.ll
new file mode 100644
index 000000000000..85515210ed7e
--- /dev/null
+++ b/test/Linker/Inputs/comdat13.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = internal global i8 1, comdat
+define i8* @zed() {
+  call void @bax()
+  ret i8* @foo
+}
+define internal void @bax() comdat($foo) {
+  ret void
+}
diff --git a/test/Linker/Inputs/comdat14.ll b/test/Linker/Inputs/comdat14.ll
new file mode 100644
index 000000000000..5e79fbcdacc1
--- /dev/null
+++ b/test/Linker/Inputs/comdat14.ll
@@ -0,0 +1,12 @@
+$c = comdat any
+
+@v2 = weak dllexport global i32 0, comdat ($c)
+define i32* @f2() {
+  ret i32* @v2
+}
+
+@v3 = weak alias i32, i32* @v2
+define i32* @f3() {
+  ret i32* @v3
+}
+
diff --git a/test/Linker/Inputs/comdat15.ll b/test/Linker/Inputs/comdat15.ll
new file mode 100644
index 000000000000..5d2d41bba6aa
--- /dev/null
+++ b/test/Linker/Inputs/comdat15.ll
@@ -0,0 +1,6 @@
+$a1 = comdat any
+@baz = private global i32 42, comdat($a1)
+@a1 = internal alias i32, i32* @baz
+define i32* @abc() {
+  ret i32* @a1
+}
diff --git a/test/Linker/Inputs/comdat5.ll b/test/Linker/Inputs/comdat5.ll
index 19739eb1b5da..236faaee0ae0 100644
--- a/test/Linker/Inputs/comdat5.ll
+++ b/test/Linker/Inputs/comdat5.ll
@@ -4,6 +4,6 @@ $foo = comdat largest
 
 @zed = external constant i8
 @some_name = private unnamed_addr constant [2 x i8*] [i8* @zed, i8* bitcast (void ()* @bar to i8*)], comdat($foo)
-@foo = alias getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+@foo = alias i8*, getelementptr([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
 
 declare void @bar() unnamed_addr
diff --git a/test/Linker/Inputs/comdat8.ll b/test/Linker/Inputs/comdat8.ll
index a2833b05cf13..fbf099cb9aef 100644
--- a/test/Linker/Inputs/comdat8.ll
+++ b/test/Linker/Inputs/comdat8.ll
@@ -1,4 +1,4 @@
 $c1 = comdat largest
 
 @some_name = private unnamed_addr constant i32 42, comdat($c1)
-@c1 = alias i32* @some_name
+@c1 = alias i32, i32* @some_name
diff --git a/test/Linker/Inputs/ctors2.ll b/test/Linker/Inputs/ctors2.ll
new file mode 100644
index 000000000000..e2fe5ff429c0
--- /dev/null
+++ b/test/Linker/Inputs/ctors2.ll
@@ -0,0 +1,6 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* @foo }]
+define void @bar() comdat($foo) {
+  ret void
+}
diff --git a/test/Linker/Inputs/ctors3.ll b/test/Linker/Inputs/ctors3.ll
new file mode 100644
index 000000000000..449ccbd90faf
--- /dev/null
+++ b/test/Linker/Inputs/ctors3.ll
@@ -0,0 +1,7 @@
+$foo = comdat any
+%t = type { i8 }
+@foo = global %t zeroinitializer, comdat
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* getelementptr (%t, %t* @foo, i32 0, i32 0) }]
+define internal void @bar() comdat($foo) {
+  ret void
+}
diff --git a/test/Linker/Inputs/funcimport.ll b/test/Linker/Inputs/funcimport.ll
new file mode 100644
index 000000000000..25cfcfc2a657
--- /dev/null
+++ b/test/Linker/Inputs/funcimport.ll
@@ -0,0 +1,28 @@
+define i32 @main() #0 {
+entry:
+  call void (...) @weakalias()
+  call void (...) @analias()
+  %call = call i32 (...) @referencestatics()
+  %call1 = call i32 (...) @referenceglobals()
+  %call2 = call i32 (...) @referencecommon()
+  call void (...) @setfuncptr()
+  call void (...) @callfuncptr()
+  call void (...) @callweakfunc()
+  ret i32 0
+}
+
+declare void @weakalias(...) #1
+
+declare void @analias(...) #1
+
+declare i32 @referencestatics(...) #1
+
+declare i32 @referenceglobals(...) #1
+
+declare i32 @referencecommon(...) #1
+
+declare void @setfuncptr(...) #1
+
+declare void @callfuncptr(...) #1
+
+declare void @callweakfunc(...) #1
diff --git a/test/Linker/Inputs/funcimport_appending_global.ll b/test/Linker/Inputs/funcimport_appending_global.ll
new file mode 100644
index 000000000000..413b890b02ad
--- /dev/null
+++ b/test/Linker/Inputs/funcimport_appending_global.ll
@@ -0,0 +1,6 @@
+@v = weak global i8 1
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* @v}]
+
+define void @foo() {
+  ret void
+}
diff --git a/test/Linker/Inputs/internalize-lazy.ll b/test/Linker/Inputs/internalize-lazy.ll
new file mode 100644
index 000000000000..43f9a7ab7455
--- /dev/null
+++ b/test/Linker/Inputs/internalize-lazy.ll
@@ -0,0 +1,8 @@
+define linkonce_odr void @g() {
+  ret void
+}
+
+define void @f() {
+  call void @g()
+  ret void
+}
diff --git a/test/Linker/Inputs/linkage.c.ll b/test/Linker/Inputs/linkage.c.ll
new file mode 100644
index 000000000000..6aedf5ab111b
--- /dev/null
+++ b/test/Linker/Inputs/linkage.c.ll
@@ -0,0 +1,4 @@
+@X = global i32 5
+@U = global i32 6
+define i32 @foo() { ret i32 7 }
+define i32 @unused() { ret i32 8 }
diff --git a/test/Linker/Inputs/mdlocation.ll b/test/Linker/Inputs/mdlocation.ll
index 22473db46097..9c2f65d0a59a 100644
--- a/test/Linker/Inputs/mdlocation.ll
+++ b/test/Linker/Inputs/mdlocation.ll
@@ -1,13 +1,9 @@
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+!named = !{!0, !1, !2, !3, !4, !5}
 
-!0 = !DISubprogram() ; Use this as a scope.
+!0 = distinct !DISubprogram() ; Use this as a scope.
 !1 = !DILocation(line: 3, column: 7, scope: !0)
 !2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
 !3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-!4 = distinct !DISubprogram() ; Test actual remapping.
-!5 = !DILocation(line: 3, column: 7, scope: !4)
-!6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-!7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
 ; Test distinct nodes.
-!8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-!9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
+!4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+!5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
diff --git a/test/Linker/Inputs/only-needed-debug-metadata.ll b/test/Linker/Inputs/only-needed-debug-metadata.ll
new file mode 100644
index 000000000000..ec7f02f4d194
--- /dev/null
+++ b/test/Linker/Inputs/only-needed-debug-metadata.ll
@@ -0,0 +1,27 @@
+@X = external global i32
+
+declare i32 @foo()
+
+define void @bar() !dbg !4 {
+	load i32, i32* @X, !dbg !10
+	call i32 @foo(), !dbg !11
+	ret void, !dbg !12
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkused.b.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 6, column: 7, scope: !4)
+!11 = !DILocation(line: 6, column: 3, scope: !4)
+!12 = !DILocation(line: 7, column: 1, scope: !4)
diff --git a/test/Linker/Inputs/only-needed-named-metadata.ll b/test/Linker/Inputs/only-needed-named-metadata.ll
new file mode 100644
index 000000000000..fa7bc2e3cc87
--- /dev/null
+++ b/test/Linker/Inputs/only-needed-named-metadata.ll
@@ -0,0 +1,9 @@
+@X = external global i32
+
+declare i32 @foo()
+
+define void @bar() {
+	load i32, i32* @X
+	call i32 @foo()
+	ret void
+}
diff --git a/test/Linker/Inputs/opaque.ll b/test/Linker/Inputs/opaque.ll
index f164abd586d1..a5f27cba418e 100644
--- a/test/Linker/Inputs/opaque.ll
+++ b/test/Linker/Inputs/opaque.ll
@@ -11,3 +11,11 @@ define void @f1()  {
   getelementptr %A, %A* null, i32 0
   ret void
 }
+
+define %A* @use_g2() {
+ ret %A* @g2
+}
+
+define %B* @use_g3() {
+  ret %B* @g3
+}
diff --git a/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
index efa8ec9c67b0..a817cf071078 100644
--- a/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
+++ b/test/Linker/Inputs/replaced-function-matches-first-subprogram.ll
@@ -1,6 +1,6 @@
 %struct.Class = type { i8 }
 
-define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 !dbg !4 {
 entry:
   %this.addr = alloca %struct.Class*, align 8
   store %struct.Class* %this, %struct.Class** %this.addr, align 8
@@ -12,11 +12,11 @@ entry:
 !llvm.module.flags = !{!8, !9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "t2.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
 !6 = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
 !7 = !DISubroutineType(types: !2)
diff --git a/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll b/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll
deleted file mode 100644
index 71c080173a3d..000000000000
--- a/test/Linker/Inputs/subprogram-linkonce-weak-odr.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-define weak_odr i32 @foo(i32 %a, i32 %b) {
-entry:
-  %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !3)
-  ret i32 %sum, !dbg !DILocation(line: 3, scope: !3)
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-
-!llvm.dbg.cu = !{!1}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
-!2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !4, scope: !4, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !5)
-!4 = !DIFile(filename: "foo.h", directory: "/path/to/dir")
-!5 = !DISubroutineType(types: !{})
diff --git a/test/Linker/Inputs/subprogram-linkonce-weak.ll b/test/Linker/Inputs/subprogram-linkonce-weak.ll
index f5399e244a9c..5e6627d90c97 100644
--- a/test/Linker/Inputs/subprogram-linkonce-weak.ll
+++ b/test/Linker/Inputs/subprogram-linkonce-weak.ll
@@ -1,4 +1,4 @@
-define weak i32 @foo(i32 %a, i32 %b) {
+define weak i32 @foo(i32 %a, i32 %b) !dbg !3 {
 entry:
   %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg !DILocation(line: 52, scope: !3)
   ret i32 %sum, !dbg !DILocation(line: 53, scope: !3)
@@ -10,7 +10,7 @@ declare i32 @fastadd(i32, i32)
 !0 = !{i32 2, !"Debug Info Version", i32 3}
 
 !llvm.dbg.cu = !{!1}
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3}, emissionKind: 1)
 !2 = !DIFile(filename: "foo.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", function: i32 (i32, i32)* @foo, type: !4)
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 51, name: "foo", type: !4)
 !4 = !DISubroutineType(types: !{})
diff --git a/test/Linker/Inputs/testlink.ll b/test/Linker/Inputs/testlink.ll
index 263d9e77d1ab..22a66399be09 100644
--- a/test/Linker/Inputs/testlink.ll
+++ b/test/Linker/Inputs/testlink.ll
@@ -53,4 +53,6 @@ define internal void @testIntern() {
   ret void
 }
 
-declare void @VecSizeCrash1(%VecSize)
+define void @VecSizeCrash1(%VecSize) {
+  ret void
+}
diff --git a/test/Linker/Inputs/thinlto_funcimport_debug.ll b/test/Linker/Inputs/thinlto_funcimport_debug.ll
new file mode 100644
index 000000000000..846a5ea001d5
--- /dev/null
+++ b/test/Linker/Inputs/thinlto_funcimport_debug.ll
@@ -0,0 +1,38 @@
+; ModuleID = 'dbg_main.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+  %call = tail call i32 @func1(i32 10) #2, !dbg !11
+  %call1 = tail call i32 @func2(i32 10) #2, !dbg !12
+  ret i32 0, !dbg !13
+}
+
+declare i32 @func1(i32) #1
+
+declare i32 @func2(i32) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "dbg_main.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 3, scope: !4)
+!13 = !DILocation(line: 6, column: 1, scope: !4)
diff --git a/test/Linker/Inputs/type-unique-alias.ll b/test/Linker/Inputs/type-unique-alias.ll
index 3ee162ccfcfe..5a3dc7d37011 100644
--- a/test/Linker/Inputs/type-unique-alias.ll
+++ b/test/Linker/Inputs/type-unique-alias.ll
@@ -1,4 +1,4 @@
 %u = type { i8 }
 
 @g2 = global %u zeroinitializer
-@a = weak alias %u* @g2
+@a = weak alias %u, %u* @g2
diff --git a/test/Linker/Inputs/type-unique-dst-types2.ll b/test/Linker/Inputs/type-unique-dst-types2.ll
index b565c6d73649..7770ea3cca07 100644
--- a/test/Linker/Inputs/type-unique-dst-types2.ll
+++ b/test/Linker/Inputs/type-unique-dst-types2.ll
@@ -1,3 +1,7 @@
 %A.11 = type { %B }
 %B = type { i8 }
 @g1 = external global %A.11
+
+define %A.11* @use_g1() {
+  ret %A.11* @g1
+}
diff --git a/test/Linker/Inputs/type-unique-dst-types3.ll b/test/Linker/Inputs/type-unique-dst-types3.ll
index c5794ad839a2..8a5ac2694791 100644
--- a/test/Linker/Inputs/type-unique-dst-types3.ll
+++ b/test/Linker/Inputs/type-unique-dst-types3.ll
@@ -1,2 +1,6 @@
 %A.11 = type opaque
 @g2 = external global %A.11
+
+define %A.11* @use_g2() {
+  ret %A.11* @g2
+}
diff --git a/test/Linker/Inputs/type-unique-inheritance-a.ll b/test/Linker/Inputs/type-unique-inheritance-a.ll
index 75cc7e1c838a..0a1c107580bb 100644
--- a/test/Linker/Inputs/type-unique-inheritance-a.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-a.ll
@@ -47,7 +47,7 @@
 %class.Base = type { i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !15 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %class.A, align 4
@@ -66,7 +66,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !25}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
 !2 = !{}
 !3 = !{!4, !8}
@@ -81,14 +81,14 @@ attributes #1 = { nounwind readnone }
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !13 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 4, size: 32, align: 32, offset: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS1A", baseType: !12)
 !14 = !{!15}
-!15 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !16, type: !17, function: void (i32)* @_Z1fi, variables: !2)
+!15 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 5, file: !1, scope: !16, type: !17, variables: !2)
 !16 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
 !17 = !DISubroutineType(types: !18)
 !18 = !{null, !12}
 !19 = !{i32 2, !"Dwarf Version", i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 5, arg: 1, scope: !15, file: !16, type: !12)
+!20 = !DILocalVariable(name: "a", line: 5, arg: 1, scope: !15, file: !16, type: !12)
 !21 = !DILocation(line: 5, scope: !15)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 6, scope: !15, file: !16, type: !4)
+!22 = !DILocalVariable(name: "t", line: 6, scope: !15, file: !16, type: !4)
 !23 = !DILocation(line: 6, scope: !15)
 !24 = !DILocation(line: 7, scope: !15)
 !25 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/Inputs/type-unique-inheritance-b.ll b/test/Linker/Inputs/type-unique-inheritance-b.ll
index dd89b8dd42df..e87b96b9c791 100644
--- a/test/Linker/Inputs/type-unique-inheritance-b.ll
+++ b/test/Linker/Inputs/type-unique-inheritance-b.ll
@@ -5,7 +5,7 @@
 %class.Base = type { i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !20 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %class.B, align 8
@@ -19,7 +19,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !24 {
 entry:
   %retval = alloca i32, align 4
   %a = alloca %class.A, align 4
@@ -40,7 +40,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!27, !38}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git f54e02f969d02d640103db73efc30c45439fceab) (http://llvm.org/git/llvm.git 284353b55896cb1babfaa7add7c0a363245342d2)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !19, globals: !2, imports: !2)
 !1 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
 !2 = !{}
 !3 = !{!4, !11, !15}
@@ -60,20 +60,20 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !17 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 4, size: 32, align: 32, flags: DIFlagPrivate, file: !5, scope: !"_ZTS4Base", baseType: !8)
 !18 = !DIDerivedType(tag: DW_TAG_member, name: "x", line: 4, size: 32, align: 32, offset: 32, flags: DIFlagPrivate, file: !12, scope: !"_ZTS1A", baseType: !8)
 !19 = !{!20, !24}
-!20 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !21, type: !22, function: void (i32)* @_Z1gi, variables: !2)
+!20 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !21, type: !22, variables: !2)
 !21 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/inher")
 !22 = !DISubroutineType(types: !23)
 !23 = !{null, !8}
-!24 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !21, type: !25, function: i32 ()* @main, variables: !2)
+!24 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 9, file: !1, scope: !21, type: !25, variables: !2)
 !25 = !DISubroutineType(types: !26)
 !26 = !{!8}
 !27 = !{i32 2, !"Dwarf Version", i32 2}
-!28 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !20, file: !21, type: !8)
+!28 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !20, file: !21, type: !8)
 !29 = !DILocation(line: 4, scope: !20)
-!30 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !20, file: !21, type: !4)
+!30 = !DILocalVariable(name: "t", line: 5, scope: !20, file: !21, type: !4)
 !31 = !DILocation(line: 5, scope: !20)
 !32 = !DILocation(line: 6, scope: !20)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 10, scope: !24, file: !21, type: !11)
+!33 = !DILocalVariable(name: "a", line: 10, scope: !24, file: !21, type: !11)
 !34 = !DILocation(line: 10, scope: !24)
 !35 = !DILocation(line: 11, scope: !24)
 !36 = !DILocation(line: 12, scope: !24)
diff --git a/test/Linker/Inputs/type-unique-simple2-a.ll b/test/Linker/Inputs/type-unique-simple2-a.ll
index c07157bf87b6..2a52e89b6fd3 100644
--- a/test/Linker/Inputs/type-unique-simple2-a.ll
+++ b/test/Linker/Inputs/type-unique-simple2-a.ll
@@ -44,7 +44,7 @@
 %struct.Base = type { i32, %struct.Base* }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !12 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 8
@@ -63,7 +63,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!16, !22}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
@@ -75,14 +75,14 @@ attributes #1 = { nounwind readnone }
 !9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 3, size: 64, align: 64, offset: 64, file: !5, scope: !"_ZTS4Base", baseType: !10)
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4Base")
 !11 = !{!12}
-!12 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !13, type: !14, function: void (i32)* @_Z1fi, variables: !2)
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !13, type: !14, variables: !2)
 !13 = !DIFile(filename: "foo.cpp", directory: ".")
 !14 = !DISubroutineType(types: !15)
 !15 = !{null, !8}
 !16 = !{i32 2, !"Dwarf Version", i32 2}
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !12, file: !13, type: !8)
+!17 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !12, file: !13, type: !8)
 !18 = !DILocation(line: 3, scope: !12)
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 4, scope: !12, file: !13, type: !4)
+!19 = !DILocalVariable(name: "t", line: 4, scope: !12, file: !13, type: !4)
 !20 = !DILocation(line: 4, scope: !12)
 !21 = !DILocation(line: 5, scope: !12)
 !22 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/Inputs/type-unique-simple2-b.ll b/test/Linker/Inputs/type-unique-simple2-b.ll
index 817f88704de5..7e1c6aabd9a5 100644
--- a/test/Linker/Inputs/type-unique-simple2-b.ll
+++ b/test/Linker/Inputs/type-unique-simple2-b.ll
@@ -3,7 +3,7 @@
 %struct.Base = type { i32, %struct.Base* }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !12 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 8
@@ -17,7 +17,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !16 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -36,7 +36,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!19, !28}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git 8a3f9e46cb988d2c664395b21910091e3730ae82) (http://llvm.org/git/llvm.git 4699e9549358bc77824a59114548eecc3f7c523c)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !11, globals: !2, imports: !2)
 !1 = !DIFile(filename: "bar.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
@@ -48,17 +48,17 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !9 = !DIDerivedType(tag: DW_TAG_member, name: "b", line: 3, size: 64, align: 64, offset: 64, file: !5, scope: !"_ZTS4Base", baseType: !10)
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS4Base")
 !11 = !{!12, !16}
-!12 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !13, type: !14, function: void (i32)* @_Z1gi, variables: !2)
+!12 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !13, type: !14, variables: !2)
 !13 = !DIFile(filename: "bar.cpp", directory: ".")
 !14 = !DISubroutineType(types: !15)
 !15 = !{null, !8}
-!16 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !13, type: !17, function: i32 ()* @main, variables: !2)
+!16 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !13, type: !17, variables: !2)
 !17 = !DISubroutineType(types: !18)
 !18 = !{!8}
 !19 = !{i32 2, !"Dwarf Version", i32 2}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !12, file: !13, type: !8)
+!20 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !12, file: !13, type: !8)
 !21 = !DILocation(line: 4, scope: !12)
-!22 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !12, file: !13, type: !4)
+!22 = !DILocalVariable(name: "t", line: 5, scope: !12, file: !13, type: !4)
 !23 = !DILocation(line: 5, scope: !12)
 !24 = !DILocation(line: 6, scope: !12)
 !25 = !DILocation(line: 8, scope: !16)
diff --git a/test/Linker/Inputs/visibility.ll b/test/Linker/Inputs/visibility.ll
index 2cd112ed37f2..286bee5d783b 100644
--- a/test/Linker/Inputs/visibility.ll
+++ b/test/Linker/Inputs/visibility.ll
@@ -7,9 +7,9 @@ $c1 = comdat any
 @v4 = hidden global i32 1, comdat($c1)
 
 ; Aliases
-@a1 = weak hidden alias i32* @v1
-@a2 = weak protected alias i32* @v2
-@a3 = weak hidden alias i32* @v3
+@a1 = weak hidden alias i32, i32* @v1
+@a2 = weak protected alias i32, i32* @v2
+@a3 = weak hidden alias i32, i32* @v3
 
 ; Functions
 define weak hidden void @f1() {
diff --git a/test/Linker/alias.ll b/test/Linker/alias.ll
index bce51ad9836f..ae9da70174e8 100644
--- a/test/Linker/alias.ll
+++ b/test/Linker/alias.ll
@@ -1,16 +1,37 @@
-; RUN: llvm-link %s %S/Inputs/alias.ll -S -o - | FileCheck %s
-; RUN: llvm-link %S/Inputs/alias.ll %s -S -o - | FileCheck %s
+; RUN: llvm-link %s %S/Inputs/alias.ll -S -o - | FileCheck --check-prefix=C1 %s
+; RUN: llvm-link %S/Inputs/alias.ll %s -S -o - | FileCheck --check-prefix=C2 %s
+
+; FIXME:
+; The C1 direction is incorrect.
+; When moving an alias to an existing module and we want to discard the aliasee
+; (the C2 case), the IRMover knows to copy the aliasee as internal.
+; When moving a replacement to an aliasee to a module that has an alias (C1),
+; a replace all uses with blindly changes the alias.
+; The C1 case doesn't happen when using a system linker with a plugin because
+; the linker does full symbol resolution first.
+; Given that this is a problem only with llvm-link and its 1 module at a time
+; linking, it should probably learn to changes the aliases in the destination
+; before using the IRMover.
 
 @foo = weak global i32 0
-; CHECK-DAG: @foo = alias i32* @zed
+; C1-DAG: @foo = alias i32, i32* @zed
+; C2-DAG: @foo = alias i32, i32* @zed
 
-@bar = alias i32* @foo
-; CHECK-DAG: @bar = alias i32* @foo
+@bar = alias i32, i32* @foo
+; C1-DAG: @bar = alias i32, i32* @foo
+
+; C2-DAG: @foo.1 = internal global i32 0
+; C2-DAG: @bar = alias i32, i32* @foo.1
 
 @foo2 = weak global i32 0
-; CHECK-DAG: @foo2 = alias bitcast (i32* @zed to i16*)
+; C1-DAG: @foo2 = alias i16, bitcast (i32* @zed to i16*)
+; C2-DAG: @foo2 = alias i16, bitcast (i32* @zed to i16*)
 
-@bar2 = alias i32* @foo2
-; CHECK-DAG: @bar2 = alias bitcast (i16* @foo2 to i32*)
+@bar2 = alias i32, i32* @foo2
+; C1-DAG: @bar2 = alias i32, bitcast (i16* @foo2 to i32*)
 
-; CHECK-DAG: @zed = global i32 42
+; C2-DAG: @foo2.2 = internal global i32 0
+; C2-DAG: @bar2 = alias i32, i32* @foo2.2
+
+; C1-DAG: @zed = global i32 42
+; C2-DAG: @zed = global i32 42
diff --git a/test/Linker/available_externally_over_decl.ll b/test/Linker/available_externally_over_decl.ll
new file mode 100644
index 000000000000..0104967ef544
--- /dev/null
+++ b/test/Linker/available_externally_over_decl.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-link -S %s %p/Inputs/available_externally_over_decl.ll | FileCheck %s
+
+declare void @f()
+
+define available_externally void @g() {
+  ret void
+}
+
+define void ()* @main() {
+  call void @g()
+  ret void ()* @f
+}
+
+; CHECK-DAG: define available_externally void @g() {
+; CHECK-DAG: define available_externally void @f() {
diff --git a/test/Linker/comdat11.ll b/test/Linker/comdat11.ll
new file mode 100644
index 000000000000..dbade4104fe3
--- /dev/null
+++ b/test/Linker/comdat11.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat11.ll -o - | FileCheck %s
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+
+; CHECK: define void @zed() {
+; CHECK:   call void @bar()
+; CHECK:   ret void
+; CHECK: }
+
+; CHECK: declare void @bar()
diff --git a/test/Linker/comdat12.ll b/test/Linker/comdat12.ll
new file mode 100644
index 000000000000..d06e222b63ac
--- /dev/null
+++ b/test/Linker/comdat12.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link %s -S -o - | FileCheck %s
+
+$foo = comdat largest
+define internal void @foo() comdat($foo) {
+  ret void
+}
+
+; CHECK-NOT: foo
diff --git a/test/Linker/comdat13.ll b/test/Linker/comdat13.ll
new file mode 100644
index 000000000000..d1e382a2f278
--- /dev/null
+++ b/test/Linker/comdat13.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat13.ll -o - | FileCheck %s
+
+; In Inputs/comdat13.ll a function not in the $foo comdat (zed) references an
+; internal function in the comdat $foo.
+; The IR would be ilegal on ELF ("relocation refers to discarded section"),
+; but COFF linkers seem to just duplicate the comdat.
+
+$foo = comdat any
+@foo = internal global i8 0, comdat
+define i8* @bar() {
+       ret i8* @foo
+}
+
+; CHECK: $foo = comdat any
+
+; CHECK: @foo = internal global i8 0, comdat
+; CHECK: @foo.1 = internal global i8 1, comdat($foo)
+
+; CHECK:      define i8* @bar() {
+; CHECK-NEXT:   ret i8* @foo
+; CHECK-NEXT: }
+
+; CHECK:      define i8* @zed() {
+; CHECK-NEXT:   call void @bax()
+; CHECK-NEXT:   ret i8* @foo.1
+; CHECK-NEXT: }
+
+; CHECK:      define internal void @bax() comdat($foo) {
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
diff --git a/test/Linker/comdat14.ll b/test/Linker/comdat14.ll
new file mode 100644
index 000000000000..793f8573a1f5
--- /dev/null
+++ b/test/Linker/comdat14.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat14.ll -o - | FileCheck %s
+
+$c = comdat any
+
+@v = global i32 0, comdat ($c)
+
+; CHECK: @v = global i32 0, comdat($c)
+; CHECK: @v2 = extern_weak dllexport global i32
+; CHECK: @v3 = extern_weak global i32
diff --git a/test/Linker/comdat15.ll b/test/Linker/comdat15.ll
new file mode 100644
index 000000000000..cf900263105a
--- /dev/null
+++ b/test/Linker/comdat15.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-link -S %s %p/Inputs/comdat15.ll -o - | FileCheck %s
+
+$a1 = comdat any
+@bar = global i32 0, comdat($a1)
+
+; CHECK: @bar = global i32 0, comdat($a1)
+; CHECK: @baz = private global i32 42, comdat($a1)
+; CHECK: @a1 = internal alias i32, i32* @baz
+
diff --git a/test/Linker/comdat6.ll b/test/Linker/comdat6.ll
index 15be2fe58036..3dcc9e3b29d7 100644
--- a/test/Linker/comdat6.ll
+++ b/test/Linker/comdat6.ll
@@ -5,6 +5,6 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 $foo = comdat largest
 @foo = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast (void ()* @bar to i8*)], comdat($foo)
 
-; CHECK: @foo = alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
+; CHECK: @foo = alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @some_name, i32 0, i32 1)
 
 declare void @bar() unnamed_addr
diff --git a/test/Linker/comdat8.ll b/test/Linker/comdat8.ll
index 5ca352a85099..490f8053c550 100644
--- a/test/Linker/comdat8.ll
+++ b/test/Linker/comdat8.ll
@@ -3,6 +3,6 @@
 $c1 = comdat largest
 
 @some_name = private unnamed_addr constant i32 42, comdat($c1)
-@c1 = alias i8* inttoptr (i32 ptrtoint (i32* @some_name to i32) to i8*)
+@c1 = alias i8, inttoptr (i32 ptrtoint (i32* @some_name to i32) to i8*)
 
 ; CHECK: COMDAT key involves incomputable alias size.
diff --git a/test/Linker/comdat9.ll b/test/Linker/comdat9.ll
index f155a6e35626..4f6f2cfb845d 100644
--- a/test/Linker/comdat9.ll
+++ b/test/Linker/comdat9.ll
@@ -1,19 +1,22 @@
 ; RUN: llvm-link %s -S -o - | FileCheck %s
 
 $c = comdat any
-@a = alias void ()* @f
+@a = alias void (), void ()* @f
 define internal void @f() comdat($c) {
   ret void
 }
 
 ; CHECK-DAG: $c = comdat any
-; CHECK-DAG: @a = alias void ()* @f
+; CHECK-DAG: @a = alias void (), void ()* @f
 ; CHECK-DAG: define internal void @f() comdat($c)
 
 $f2 = comdat largest
 define internal void @f2() comdat($f2) {
   ret void
 }
+define void @f3() comdat($f2) {
+  ret void
+}
 
 ; CHECK-DAG: $f2 = comdat largest
 ; CHECK-DAG: define internal void @f2() comdat {
diff --git a/test/Linker/comdat_group.ll b/test/Linker/comdat_group.ll
new file mode 100644
index 000000000000..486a6ffb9b1d
--- /dev/null
+++ b/test/Linker/comdat_group.ll
@@ -0,0 +1,18 @@
+; Ensure complete comdat group is materialized
+; RUN: llvm-link %s -S | FileCheck %s
+; CHECK: $linkoncecomdat = comdat any
+; CHECK: @linkoncecomdat = linkonce global i32 2
+; CHECK: @linkoncecomdat_unref_var = linkonce global i32 2, comdat($linkoncecomdat)
+; CHECK: define linkonce void @linkoncecomdat_unref_func() comdat($linkoncecomdat)
+
+$linkoncecomdat = comdat any
+@linkoncecomdat = linkonce global i32 2, comdat($linkoncecomdat)
+@linkoncecomdat_unref_var = linkonce global i32 2, comdat($linkoncecomdat)
+define linkonce void @linkoncecomdat_unref_func() comdat($linkoncecomdat) {
+  ret void
+}
+; Reference one member of comdat so that comdat is generated.
+define void @ref_linkoncecomdat() {
+  load i32, i32* @linkoncecomdat, align 4
+  ret void
+}
diff --git a/test/Linker/constructor-comdat.ll b/test/Linker/constructor-comdat.ll
index dfc899208aa3..e62990157a96 100644
--- a/test/Linker/constructor-comdat.ll
+++ b/test/Linker/constructor-comdat.ll
@@ -4,8 +4,8 @@
 $_ZN3fooIiEC5Ev = comdat any
 ; CHECK: $_ZN3fooIiEC5Ev = comdat any
 
-@_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
-; CHECK: @_ZN3fooIiEC1Ev = weak_odr alias void ()* @_ZN3fooIiEC2Ev
+@_ZN3fooIiEC1Ev = weak_odr alias void (), void ()* @_ZN3fooIiEC2Ev
+; CHECK: @_ZN3fooIiEC1Ev = weak_odr alias void (), void ()* @_ZN3fooIiEC2Ev
 
 ; CHECK: define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
 define weak_odr void @_ZN3fooIiEC2Ev() comdat($_ZN3fooIiEC5Ev) {
diff --git a/test/Linker/ctors.ll b/test/Linker/ctors.ll
index 67bf45637180..37dba23d4c91 100644
--- a/test/Linker/ctors.ll
+++ b/test/Linker/ctors.ll
@@ -3,6 +3,9 @@
 ; RUN: llvm-link %p/Inputs/ctors.ll %s -S -o - | \
 ; RUN:   FileCheck --check-prefix=ALL --check-prefix=CHECK2 %s
 
+; Test the bitcode writer too. It used to crash.
+; RUN: llvm-link %s %p/Inputs/ctors.ll -o %t.bc
+
 @v = weak global i8 0
 ; CHECK1: @v = weak global i8 0
 ; CHECK2: @v = weak global i8 1
diff --git a/test/Linker/ctors2.ll b/test/Linker/ctors2.ll
new file mode 100644
index 000000000000..9b7a70eb7cd1
--- /dev/null
+++ b/test/Linker/ctors2.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-link -S %s %p/Inputs/ctors2.ll -o - | FileCheck %s
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/Linker/ctors3.ll b/test/Linker/ctors3.ll
new file mode 100644
index 000000000000..e62b92dca0b4
--- /dev/null
+++ b/test/Linker/ctors3.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link -S %s %p/Inputs/ctors3.ll -o - | FileCheck %s
+
+$foo = comdat any
+%t = type { i8 }
+@foo = global %t zeroinitializer, comdat
+
+; CHECK: @foo = global %t zeroinitializer, comdat
+; CHECK: @llvm.global_ctors = appending global [0 x { i32, void ()*, i8* }] zeroinitializer
diff --git a/test/Linker/ctors4.ll b/test/Linker/ctors4.ll
new file mode 100644
index 000000000000..c4500841f174
--- /dev/null
+++ b/test/Linker/ctors4.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link -S %s -o - | FileCheck %s
+
+define void @f() {
+  ret void
+}
+
+; We lazy link @v, which causes llvm.global_ctors to have the corresponding
+; entry.
+@v = linkonce global i8 42
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* @v }]
+
diff --git a/test/Linker/ctors5.ll b/test/Linker/ctors5.ll
new file mode 100644
index 000000000000..99124061bb32
--- /dev/null
+++ b/test/Linker/ctors5.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-link -S %s | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @f }]
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null }]
+
+define void @f() {
+  ret void
+}
diff --git a/test/Linker/debug-info-version-a.ll b/test/Linker/debug-info-version-a.ll
index 8cc85b167f71..43f374ff5b00 100644
--- a/test/Linker/debug-info-version-a.ll
+++ b/test/Linker/debug-info-version-a.ll
@@ -11,6 +11,6 @@
 !llvm.dbg.cu = !{!1}
 
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !3)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !3)
 !2 = !DIFile(filename: "a.c", directory: "")
 !3 = !{}
diff --git a/test/Linker/debug-info-version-b.ll b/test/Linker/debug-info-version-b.ll
index b2452f437023..e3ef814e3a5a 100644
--- a/test/Linker/debug-info-version-b.ll
+++ b/test/Linker/debug-info-version-b.ll
@@ -5,6 +5,6 @@
 !llvm.dbg.cu = !{!1}
 
 !0 = !{i32 2, !"Debug Info Version", i32 42}
-!1 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: false, file: !"I AM UNEXPECTED!")
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: false, file: !"I AM UNEXPECTED!")
 !2 = !{!"b.c", !""}
 !3 = !{}
diff --git a/test/Linker/distinct.ll b/test/Linker/distinct.ll
index c8e5c89eb095..d88d8ae16d9a 100644
--- a/test/Linker/distinct.ll
+++ b/test/Linker/distinct.ll
@@ -6,6 +6,8 @@
 
 ; CHECK: @global = linkonce global i32 0
 @global = linkonce global i32 0
+; Add an external reference to @global so that it gets linked in.
+@alias = alias i32, i32* @global
 
 ; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !0, !1, !2, !9, !10, !11, !12, !13, !14}
 !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
diff --git a/test/Linker/drop-debug.ll b/test/Linker/drop-debug.ll
index 9c1072a75de2..06689872e12c 100644
--- a/test/Linker/drop-debug.ll
+++ b/test/Linker/drop-debug.ll
@@ -3,4 +3,4 @@
 ;; drop-debug.bc was created from "void f(void) {}" with clang 3.5 and
 ; -gline-tables-only, so it contains old debug info.
 
-; CHECK: warning: ignoring debug info with an invalid version (1) in {{.*}}/Inputs/drop-debug.bc
+; CHECK: WARNING: ignoring debug info with an invalid version (1) in {{.*}}/Inputs/drop-debug.bc
diff --git a/test/Linker/funcimport.ll b/test/Linker/funcimport.ll
new file mode 100644
index 000000000000..38deafd3e3f1
--- /dev/null
+++ b/test/Linker/funcimport.ll
@@ -0,0 +1,195 @@
+; First ensure that the ThinLTO handling in llvm-link and llvm-lto handles
+; bitcode without function summary sections gracefully.
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-link %t.bc -functionindex=%t.bc -S
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Ensure statics are promoted/renamed correctly from this file (all but
+; constant variable need promotion).
+; RUN: llvm-link %t.bc -functionindex=%t3.thinlto.bc -S | FileCheck %s --check-prefix=EXPORTSTATIC
+; EXPORTSTATIC-DAG: @staticvar.llvm.1 = hidden global
+; EXPORTSTATIC-DAG: @staticconstvar = internal unnamed_addr constant
+; EXPORTSTATIC-DAG: @P.llvm.1 = hidden global void ()* null
+; EXPORTSTATIC-DAG: define hidden i32 @staticfunc.llvm.1
+; EXPORTSTATIC-DAG: define hidden void @staticfunc2.llvm.1
+
+; Ensure that both weak alias to an imported function and strong alias to a
+; non-imported function are correctly turned into declarations.
+; Also ensures that alias to a linkonce function is turned into a declaration
+; and that the associated linkonce function is not in the output, as it is
+; lazily linked and never referenced/materialized.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc1:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB1
+; IMPORTGLOB1-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB1-DAG: declare void @weakalias
+; IMPORTGLOB1-DAG: declare void @analias
+; IMPORTGLOB1-NOT: @linkoncealias
+; IMPORTGLOB1-NOT: @linkoncefunc
+; IMPORTGLOB1-NOT: declare void @globalfunc2
+
+; Ensure that weak alias to a non-imported function is correctly
+; turned into a declaration, but that strong alias to an imported function
+; is imported as alias.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc2:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB2
+; IMPORTGLOB2-DAG: declare void @analias
+; IMPORTGLOB2-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB2-DAG: declare void @weakalias
+; IMPORTGLOB2-NOT: declare void @globalfunc1
+
+; Ensure that strong alias imported in second pass of importing ends up
+; as an alias.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc1:%t.bc -import=globalfunc2:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB3
+; IMPORTGLOB3-DAG: declare void @analias
+; IMPORTGLOB3-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB3-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB3-DAG: declare void @weakalias
+
+; Ensure that strong alias imported in first pass of importing ends up
+; as an alias, and that seeing the alias definition during a second inlining
+; pass is handled correctly.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=globalfunc2:%t.bc -import=globalfunc1:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB4
+; IMPORTGLOB4-DAG: declare void @analias
+; IMPORTGLOB4-DAG: define available_externally void @globalfunc2
+; IMPORTGLOB4-DAG: define available_externally void @globalfunc1
+; IMPORTGLOB4-DAG: declare void @weakalias
+
+; An alias to an imported function is imported as alias if the function is not
+; available_externally.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=linkoncefunc:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOB5
+; IMPORTGLOB5-DAG: linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+; IMPORTGLOB5-DAG: define linkonce_odr void @linkoncefunc()
+
+; Ensure that imported static variable and function references are correctly
+; promoted and renamed (including static constant variable).
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referencestatics:%t.bc -S | FileCheck %s --check-prefix=IMPORTSTATIC
+; IMPORTSTATIC-DAG: @staticvar.llvm.1 = available_externally hidden global
+; IMPORTSTATIC-DAG: @staticconstvar.llvm.1 = internal unnamed_addr constant
+; IMPORTSTATIC-DAG: define available_externally i32 @referencestatics
+; IMPORTSTATIC-DAG: %call = call i32 @staticfunc.llvm.1
+; IMPORTSTATIC-DAG: %0 = load i32, i32* @staticvar.llvm.1
+; IMPORTSTATIC-DAG: declare hidden i32 @staticfunc.llvm.1
+
+; Ensure that imported global (external) function and variable references
+; are handled correctly (including referenced variable imported as
+; available_externally definition)
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referenceglobals:%t.bc -S | FileCheck %s --check-prefix=IMPORTGLOBALS
+; IMPORTGLOBALS-DAG: @globalvar = available_externally global
+; IMPORTGLOBALS-DAG: declare void @globalfunc1()
+; IMPORTGLOBALS-DAG: define available_externally i32 @referenceglobals
+
+; Ensure that common variable correctly imported as common defition.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=referencecommon:%t.bc -S | FileCheck %s --check-prefix=IMPORTCOMMON
+; IMPORTCOMMON-DAG: @commonvar = common global
+; IMPORTCOMMON-DAG: define available_externally i32 @referencecommon
+
+; Ensure that imported static function pointer correctly promoted and renamed.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=callfuncptr:%t.bc -S | FileCheck %s --check-prefix=IMPORTFUNCPTR
+; IMPORTFUNCPTR-DAG: @P.llvm.1 = available_externally hidden global void ()* null
+; IMPORTFUNCPTR-DAG: define available_externally void @callfuncptr
+; IMPORTFUNCPTR-DAG: %0 = load void ()*, void ()** @P.llvm.1
+
+; Ensure that imported weak function reference/definition handled properly.
+; Imported weak_any definition should be skipped with warning, and imported
+; reference should turned into an external_weak declaration.
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=callweakfunc:%t.bc -import=weakfunc:%t.bc -S 2>&1 | FileCheck %s --check-prefix=IMPORTWEAKFUNC
+; IMPORTWEAKFUNC-DAG: Ignoring import request for weak-any function weakfunc
+; IMPORTWEAKFUNC-DAG: declare extern_weak void @weakfunc
+; IMPORTWEAKFUNC-DAG: define available_externally void @callweakfunc
+; IMPORTWEAKFUNC-NOT: @weakvar = extern_weak global i32, align 4
+
+@globalvar = global i32 1, align 4
+@staticvar = internal global i32 1, align 4
+@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
+@commonvar = common global i32 0, align 4
+@P = internal global void ()* null, align 8
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+  ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+  ret void
+}
+
+define linkonce_odr void @linkoncefunc() #0 {
+entry:
+  ret void
+}
+
+define i32 @referencestatics(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %call = call i32 @staticfunc()
+  %0 = load i32, i32* @staticvar, align 4
+  %add = add nsw i32 %call, %0
+  %1 = load i32, i32* %i.addr, align 4
+  %idxprom = sext i32 %1 to i64
+  %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
+  %2 = load i32, i32* %arrayidx, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+define i32 @referenceglobals(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @globalfunc1()
+  %0 = load i32, i32* @globalvar, align 4
+  ret i32 %0
+}
+
+define i32 @referencecommon(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* @commonvar, align 4
+  ret i32 %0
+}
+
+define void @setfuncptr() #0 {
+entry:
+  store void ()* @staticfunc2, void ()** @P, align 8
+  ret void
+}
+
+define void @callfuncptr() #0 {
+entry:
+  %0 = load void ()*, void ()** @P, align 8
+  call void %0()
+  ret void
+}
+
+@weakvar = weak global i32 1, align 4
+define weak void @weakfunc() #0 {
+entry:
+  ret void
+}
+
+define void @callweakfunc() #0 {
+entry:
+  call void @weakfunc()
+  ret void
+}
+
+define internal i32 @staticfunc() #0 {
+entry:
+  ret i32 1
+}
+
+define internal void @staticfunc2() #0 {
+entry:
+  ret void
+}
diff --git a/test/Linker/funcimport_appending_global.ll b/test/Linker/funcimport_appending_global.ll
new file mode 100644
index 000000000000..190d31ee8c7f
--- /dev/null
+++ b/test/Linker/funcimport_appending_global.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_appending_global.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now
+; RUN: llvm-link %t.bc -functionindex=%t3.thinlto.bc -import=foo:%t2.bc -S | FileCheck %s
+
+; Ensure that global constructor (appending linkage) is not imported
+; CHECK-NOT: @llvm.global_ctors = {{.*}}@foo
+
+declare void @f()
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @f, i8* null}]
+
+define i32 @main() {
+entry:
+  call void @foo()
+  ret i32 0
+}
+
+declare void @foo()
diff --git a/test/Linker/global_ctors.ll b/test/Linker/global_ctors.ll
index 49df81a00759..cc28471df59d 100644
--- a/test/Linker/global_ctors.ll
+++ b/test/Linker/global_ctors.ll
@@ -1,6 +1,5 @@
-; RUN: llvm-as %s -o %t.new.bc
-; RUN: llvm-link %t.new.bc %S/Inputs/old_global_ctors.3.4.bc | llvm-dis | FileCheck %s
-; RUN: llvm-link %S/Inputs/old_global_ctors.3.4.bc %t.new.bc | llvm-dis | FileCheck %s
+; RUN: llvm-link -S %s %S/Inputs/old_global_ctors.3.4.bc | FileCheck %s
+; RUN: llvm-link -S %S/Inputs/old_global_ctors.3.4.bc %s | FileCheck %s
 
 ; old_global_ctors.3.4.bc contains the following LLVM IL, assembled into
 ; bitcode by llvm-as from 3.4.  It uses a two element @llvm.global_ctors array.
diff --git a/test/Linker/internalize-lazy.ll b/test/Linker/internalize-lazy.ll
new file mode 100644
index 000000000000..480335927b51
--- /dev/null
+++ b/test/Linker/internalize-lazy.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-link -S -internalize %s %p/Inputs/internalize-lazy.ll | FileCheck %s
+
+; CHECK: define internal void @f
+; CHECK: define internal void @g
diff --git a/test/Linker/link-flags.ll b/test/Linker/link-flags.ll
new file mode 100644
index 000000000000..c901b699575a
--- /dev/null
+++ b/test/Linker/link-flags.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-link -S %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CU
+; RUN: llvm-link -S -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=C -check-prefix=CN
+; RUN: llvm-link -S -internalize %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CI
+; RUN: llvm-link -S -internalize -only-needed %S/Inputs/linkage.b.ll %S/Inputs/linkage.c.ll | FileCheck %s -check-prefix=B -check-prefix=CN
+
+C-LABEL: @X = global i32 5
+CI-LABEL: @X = internal global i32 5
+CU-LABEL:@U = global i32 6
+CI-LABEL:@U = internal global i32 6
+CN-NOT:@U
+
+B-LABEL: define void @bar() {
+
+C-LABEL: define i32 @foo()
+CI-LABEL: define internal i32 @foo()
+
+CU-LABEL:define i32 @unused() {
+CI-LABEL:define internal i32 @unused() {
+CN-NOT:@unused()
diff --git a/test/Linker/mdlocation.ll b/test/Linker/mdlocation.ll
index 9acc6701599b..b42058bac60e 100644
--- a/test/Linker/mdlocation.ll
+++ b/test/Linker/mdlocation.ll
@@ -2,33 +2,25 @@
 
 ; Test that DILocations are remapped properly.
 
-; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !0, !1, !2, !3, !10, !11, !12, !13, !14, !15}
-!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
+; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11}
+!named = !{!0, !1, !2, !3, !4, !5}
 
-; CHECK:      !0 = !DISubprogram(
+; CHECK:      !0 = distinct !DISubprogram(
 ; CHECK-NEXT: !1 = !DILocation(line: 3, column: 7, scope: !0)
 ; CHECK-NEXT: !2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
 ; CHECK-NEXT: !3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-; CHECK-NEXT: !4 = distinct !DISubprogram(
-; CHECK-NEXT: !5 = !DILocation(line: 3, column: 7, scope: !4)
-; CHECK-NEXT: !6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-; CHECK-NEXT: !7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
-; CHECK-NEXT: !8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-; CHECK-NEXT: !9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
-; CHECK-NEXT: !10 = distinct !DISubprogram(
-; CHECK-NEXT: !11 = !DILocation(line: 3, column: 7, scope: !10)
-; CHECK-NEXT: !12 = !DILocation(line: 3, column: 7, scope: !10, inlinedAt: !11)
-; CHECK-NEXT: !13 = !DILocation(line: 3, column: 7, scope: !10, inlinedAt: !12)
-; CHECK-NEXT: !14 = distinct !DILocation(line: 3, column: 7, scope: !0)
-; CHECK-NEXT: !15 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !14)
-!0 = !DISubprogram() ; Use this as a scope.
+; CHECK-NEXT: !4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+; CHECK-NEXT: !5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
+; CHECK-NEXT: !6 = distinct !DISubprogram(
+; CHECK-NEXT: !7 = !DILocation(line: 3, column: 7, scope: !6)
+; CHECK-NEXT: !8 = !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !7)
+; CHECK-NEXT: !9 = !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !8)
+; CHECK-NEXT: !10 = distinct !DILocation(line: 3, column: 7, scope: !6)
+; CHECK-NEXT: !11 = distinct !DILocation(line: 3, column: 7, scope: !6, inlinedAt: !10)
+!0 = distinct !DISubprogram() ; Use this as a scope.
 !1 = !DILocation(line: 3, column: 7, scope: !0)
 !2 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !1)
 !3 = !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !2)
-!4 = distinct !DISubprogram() ; Test actual remapping.
-!5 = !DILocation(line: 3, column: 7, scope: !4)
-!6 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !5)
-!7 = !DILocation(line: 3, column: 7, scope: !4, inlinedAt: !6)
 ; Test distinct nodes.
-!8 = distinct !DILocation(line: 3, column: 7, scope: !0)
-!9 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !8)
+!4 = distinct !DILocation(line: 3, column: 7, scope: !0)
+!5 = distinct !DILocation(line: 3, column: 7, scope: !0, inlinedAt: !4)
diff --git a/test/Linker/only-needed-debug-metadata.ll b/test/Linker/only-needed-debug-metadata.ll
new file mode 100644
index 000000000000..f327fe03bf48
--- /dev/null
+++ b/test/Linker/only-needed-debug-metadata.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/only-needed-debug-metadata.ll -o %t2.bc
+
+; Without -only-needed, we need to link in both DISubprogram.
+; RUN: llvm-link -S %t2.bc %t.bc | FileCheck %s
+; CHECK: distinct !DISubprogram(name: "foo"
+; CHECK: distinct !DISubprogram(name: "unused"
+
+; With -only-needed, we only need to link in foo's DISubprogram.
+; RUN: llvm-link -S -only-needed %t2.bc %t.bc | FileCheck %s -check-prefix=ONLYNEEDED
+; ONLYNEEDED: distinct !DISubprogram(name: "foo"
+; ONLYNEEDED-NOT: distinct !DISubprogram(name: "unused"
+
+@X = global i32 5
+@U = global i32 6
+@U_linkonce = linkonce_odr hidden global i32 6
+define i32 @foo() !dbg !4 {
+    ret i32 7, !dbg !20
+}
+define i32 @unused() !dbg !10 {
+    ret i32 8, !dbg !21
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !13)
+!1 = !DIFile(filename: "linkused2.c", directory: "/usr/local/google/home/tejohnson/llvm/tmp")
+!2 = !{}
+!3 = !{!4, !10}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, type: !5, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "x", arg: 1, scope: !4, file: !1, line: 4, type: !7)
+!10 = distinct !DISubprogram(name: "unused", scope: !1, file: !1, line: 8, type: !11, isLocal: false, isDefinition: true, scopeLine: 8, isOptimized: true, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!7}
+!13 = !{!14, !15}
+!14 = !DIGlobalVariable(name: "X", scope: !0, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, variable: i32* @X)
+!15 = !DIGlobalVariable(name: "U", scope: !0, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, variable: i32* @U)
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!19 = !DIExpression()
+!20 = !DILocation(line: 4, column: 13, scope: !4)
+!21 = !DILocation(line: 9, column: 3, scope: !10)
diff --git a/test/Linker/only-needed-named-metadata.ll b/test/Linker/only-needed-named-metadata.ll
new file mode 100644
index 000000000000..d14b525fa023
--- /dev/null
+++ b/test/Linker/only-needed-named-metadata.ll
@@ -0,0 +1,65 @@
+; Without -only-needed we should lazy link linkonce globals, and the
+; metadata reference should not cause them to be linked.
+; RUN: llvm-link -S %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s
+; CHECK-NOT:@U_linkonce
+; CHECK-NOT:@unused_linkonce()
+
+; With -only-needed the metadata references should not cause any of the
+; otherwise unreferenced globals to be linked. This also ensures that the
+; metadata references don't provoke the module linker to create declarations,
+; which are illegal for aliases and globals in comdats.
+; Note that doing -only-needed with the comdat shown below leads to a only
+; part of the comdat group being linked, which is not technically correct.
+; RUN: llvm-link -S -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s -check-prefix=ONLYNEEDED
+; RUN: llvm-link -S -internalize -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll | FileCheck %s -check-prefix=ONLYNEEDED
+; ONLYNEEDED-NOT:@U
+; ONLYNEEDED-NOT:@U_linkonce
+; ONLYNEEDED-NOT:@unused()
+; ONLYNEEDED-NOT:@unused_linkonce()
+; ONLYNEEDED-NOT:@linkoncealias
+; ONLYNEEDED-NOT:@linkoncefunc2()
+; ONLYNEEDED-NOT:@weakalias
+; ONLYNEEDED-NOT:@globalfunc1()
+; ONLYNEEDED-NOT:@analias
+; ONLYNEEDED-NOT:@globalfunc2()
+
+; Test -only-needed link with the modules preserved instead of freeing to
+; catch any cross-module references to metadata, which the bitcode writer
+; will assert on.
+; RUN: llvm-link -preserve-modules -o %t3.bc -only-needed %S/Inputs/only-needed-named-metadata.ll %S/only-needed-named-metadata.ll
+
+@X = global i32 5
+@U = global i32 6
+@U_linkonce = linkonce_odr hidden global i32 6
+define i32 @foo() { ret i32 7 }
+define i32 @unused() { ret i32 8 }
+define linkonce_odr hidden i32 @unused_linkonce() { ret i32 8 }
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc2 to void (...)*)
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+  ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+  ret void
+}
+
+$linkoncefunc2 = comdat any
+define linkonce_odr void @linkoncefunc2() #0 comdat {
+entry:
+  ret void
+}
+
+!llvm.named = !{!0, !1, !2, !3, !4, !5, !6}
+!0 = !{i32 ()* @unused}
+!1 = !{i32* @U}
+!2 = !{i32 ()* @unused_linkonce}
+!3 = !{i32* @U_linkonce}
+!4 = !{void (...)* @weakalias}
+!5 = !{void (...)* @analias}
+!6 = !{void (...)* @linkoncealias}
diff --git a/test/Linker/opaque.ll b/test/Linker/opaque.ll
index 4f3f398f8f1b..6fd1ae90d4f4 100644
--- a/test/Linker/opaque.ll
+++ b/test/Linker/opaque.ll
@@ -19,3 +19,7 @@
 %C = type { %A }
 
 @g1 = external global %B
+
+define %B* @use_g1() {
+  ret %B* @g1
+}
diff --git a/test/Linker/override-with-internal-linkage.ll b/test/Linker/override-with-internal-linkage.ll
index d3a794799322..59bd214c204f 100644
--- a/test/Linker/override-with-internal-linkage.ll
+++ b/test/Linker/override-with-internal-linkage.ll
@@ -3,14 +3,14 @@
 
 ; CHECK-LABEL: define i32 @main(
 ; CHECK-NEXT: entry:
-; CHECK-NEXT: call i32 @foo2(
+; CHECK-NEXT: call i32 @foo.2(
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
   %a = call i32 @foo(i32 2)
   ret i32 %a
 }
 
-; CHECK-LABEL: define internal i32 @foo2(
+; CHECK-LABEL: define internal i32 @foo.2(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: %add = add nsw i32 %i, %i
 ; CHECK-NEXT: ret i32 %add
diff --git a/test/Linker/pr21494.ll b/test/Linker/pr21494.ll
index 8a17233b0eb6..40b57cad3cf5 100644
--- a/test/Linker/pr21494.ll
+++ b/test/Linker/pr21494.ll
@@ -6,10 +6,10 @@
 @g2 = linkonce_odr global i8 0
 ; CHECK-NOT: @g2
 
-@a1 = private alias i8* @g1
+@a1 = private alias i8, i8* @g1
 ; CHECK-NOT: @a1
 
-@a2 = linkonce_odr alias i8* @g2
+@a2 = linkonce_odr alias i8, i8* @g2
 ; CHECK-NOT: @a2
 
 define private void @f1() {
diff --git a/test/Linker/prologuedata.ll b/test/Linker/prologuedata.ll
index 70204fdaacdd..3015d50867e9 100644
--- a/test/Linker/prologuedata.ll
+++ b/test/Linker/prologuedata.ll
@@ -1,21 +1,21 @@
 ; RUN: llvm-link %s -S -o - | FileCheck %s
 
 @g1 = global void()* @f2
-; CHECK: @g1 = global void ()* @f2
+; CHECK-DAG: @g1 = global void ()* @f2
 
 @p1 = global i8 42
-; CHECK: @p1 = global i8 42
+; CHECK-DAG: @p1 = global i8 42
 
 @p2 = internal global i8 43
-; CHECK: @p2 = internal global i8 43
+; CHECK-DAG: @p2 = internal global i8 43
 
 define void @f1() prologue i8* @p1 {
   ret void
 }
-; CHECK: define void @f1() prologue i8* @p1 {
+; CHECK-DAG: define void @f1() prologue i8* @p1 {
 
 define internal void @f2() prologue i8* @p2 {
   ret void
 }
 
-; CHECK: define internal void @f2() prologue i8* @p2 {
+; CHECK-DAG: define internal void @f2() prologue i8* @p2 {
diff --git a/test/Linker/replaced-function-matches-first-subprogram.ll b/test/Linker/replaced-function-matches-first-subprogram.ll
index fc7e653786e6..30b8be992bb5 100644
--- a/test/Linker/replaced-function-matches-first-subprogram.ll
+++ b/test/Linker/replaced-function-matches-first-subprogram.ll
@@ -15,17 +15,18 @@
 
 %struct.Class = type { i8 }
 
-define i32 @_Z3foov() {
+; CHECK: define i32 @_Z3foov(){{.*}} !dbg ![[SP1:[0-9]+]]
+define i32 @_Z3foov() !dbg !4 {
 entry:
   %tmp = alloca %struct.Class, align 1
   %call = call i32 @_ZN5ClassIiE3fooEv(%struct.Class* %tmp), !dbg !14
   ret i32 %call, !dbg !14
 }
 
-; CHECK: define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this){{.*}}{
+; CHECK: define weak_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this){{.*}} !dbg ![[SP2:[0-9]+]] {
 ; CHECK-NOT: }
 ; CHECK: !dbg ![[LOC:[0-9]+]]
-define linkonce_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 {
+define linkonce_odr i32 @_ZN5ClassIiE3fooEv(%struct.Class* %this) align 2 !dbg !7 {
 entry:
   %this.addr = alloca %struct.Class*, align 8
   store %struct.Class* %this, %struct.Class** %this.addr, align 8
@@ -39,30 +40,28 @@ entry:
 !llvm.ident = !{!13}
 
 ; Extract out the list of subprograms from each compile unit.
-; CHECK-DAG: ![[CU1]] = !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
-; CHECK-DAG: ![[CU2]] = !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+; CHECK-DAG: ![[CU1]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
+; CHECK-DAG: ![[CU2]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 (trunk 224193) (llvm/trunk 224197)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "t1.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1")
 !2 = !{}
 
 ; Extract out each compile unit's single subprogram.  The replaced subprogram's
 ; function should drop to null in the first compile unit.
-; CHECK-DAG: ![[SPs1]] = !{![[SP1:[0-9]+]], ![[SP2r:[0-9]+]]}
-; CHECK-DAG: ![[SPs2]] = !{![[SP2:[0-9]+]]}
+; CHECK-DAG: ![[SPs1]] = !{![[SP1]], ![[SP2r:[0-9]+]]}
+; CHECK-DAG: ![[SPs2]] = !{![[SP2]]}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @_Z3foov, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "t1.cpp", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d1")
 !6 = !DISubroutineType(types: !2)
 
-; Extract out the file from the replaced subprogram.  Confirm that each
-; subprogram is pointing at the correct function.
-; CHECK-DAG: ![[SP1]] = !DISubprogram({{.*}} function: i32 ()* @_Z3foov
-; CHECK-DAG: ![[SP2]] = !DISubprogram({{.*}} file: ![[FILE:[0-9]+]],{{.*}} function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv
+; Extract out the file from the replaced subprogram.
+; CHECK-DAG: ![[SP2]] = distinct !DISubprogram({{.*}} file: ![[FILE:[0-9]+]],
 
 ; We can't use CHECK-NOT/CHECK-SAME with a CHECK-DAG, so rely on field order to
 ; prove that there's no function: here.
 ; CHECK-DAG: ![[SP2r]] = {{.*}}!DISubprogram({{.*}} isOptimized: false, variables:
-!7 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !8, scope: !9, type: !6, function: i32 (%struct.Class*)* @_ZN5ClassIiE3fooEv, variables: !2)
+!7 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !8, scope: !9, type: !6, variables: !2)
 
 ; The new subprogram should be pointing at the new directory.
 ; CHECK-DAG: ![[FILE]] = !DIFile(filename: "../t.h", directory: "/Users/dexonsmith/data/llvm/staging/test/Linker/repro/d2")
diff --git a/test/Linker/subprogram-linkonce-weak-odr.ll b/test/Linker/subprogram-linkonce-weak-odr.ll
deleted file mode 100644
index c1fa0a5adeb5..000000000000
--- a/test/Linker/subprogram-linkonce-weak-odr.ll
+++ /dev/null
@@ -1,177 +0,0 @@
-; RUN: llvm-link %s %S/Inputs/subprogram-linkonce-weak-odr.ll -S -o %t1
-; RUN: FileCheck %s -check-prefix=LW -check-prefix=CHECK <%t1
-; RUN: llvm-link %S/Inputs/subprogram-linkonce-weak-odr.ll %s -S -o %t2
-; RUN: FileCheck %s -check-prefix=WL -check-prefix=CHECK <%t2
-
-; This testcase tests the following flow:
-;  - File A defines a linkonce_odr version of @foo which has inlined into @bar.
-;  - File B defines a weak_odr version of @foo (identical definition).
-;  - Linkage rules state File B version of @foo wins.
-;  - Debug info for the subprograms of @foo match exactly.  Without
-;    intervention, the same subprogram would show up in both compile units, and
-;    it would get associated with the compile unit where it was linkonce.
-;  - @bar has inlined debug info related to the linkonce_odr @foo.
-;
-; This checks a corner case for the fix for PR22792, where subprograms match
-; exactly.  It's a companion for subprogram-linkonce-weak.ll.
-
-; The LW prefix means linkonce (this file) first, then weak (the other file).
-; The WL prefix means weak (the other file) first, then linkonce (this file).
-
-; We'll see @bar before @foo if this file is first.
-; LW-LABEL: define i32 @bar(
-; LW: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
-; LW: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-; LW-LABEL: define weak_odr i32 @foo(
-; LW: %sum = add i32 %a, %b, !dbg ![[FOOADD:[0-9]+]]
-; LW: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-
-; We'll see @foo before @bar if this file is second.
-; WL-LABEL: define weak_odr i32 @foo(
-; WL: %sum = add i32 %a, %b, !dbg ![[FOOADD:[0-9]+]]
-; WL: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-; WL-LABEL: define i32 @bar(
-; WL: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
-; WL: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-
-define i32 @bar(i32 %a, i32 %b) {
-entry:
-  %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4,
-                                          inlinedAt: !DILocation(line: 12, scope: !3))
-  ret i32 %sum, !dbg !DILocation(line: 13, scope: !3)
-}
-
-define linkonce_odr i32 @foo(i32 %a, i32 %b) {
-entry:
-  %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4)
-  ret i32 %sum, !dbg !DILocation(line: 3, scope: !4)
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 2, !"Debug Info Version", i32 3}
-
-; CHECK-LABEL: !llvm.dbg.cu =
-; LW-SAME: !{![[LCU:[0-9]+]], ![[WCU:[0-9]+]]}
-; WL-SAME: !{![[WCU:[0-9]+]], ![[LCU:[0-9]+]]}
-!llvm.dbg.cu = !{!1}
-
-; LW: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; LW: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; LW: ![[BARSP]] = !DISubprogram(name: "bar",
-; LW-SAME: function: i32 (i32, i32)* @bar
-; LW: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; LW-NOT: function:
-; LW-SAME: ){{$}}
-; LW: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; LW: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; LW: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; LW-SAME: function: i32 (i32, i32)* @foo
-; LW: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
-; LW: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
-; LW: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
-; LW: ![[FOOADD]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]])
-; LW: ![[FOORET]] = !DILocation(line: 3, scope: ![[WEAKFOOSP]])
-
-; Same as above, but reordered.
-; WL: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; WL: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; WL-SAME: function: i32 (i32, i32)* @foo
-; WL: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; Note: for symmetry, LSPs would have a different copy of the subprogram.
-; WL: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[BARSP]] = !DISubprogram(name: "bar",
-; WL-SAME: function: i32 (i32, i32)* @bar
-; WL: ![[FOOADD]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]])
-; WL: ![[FOORET]] = !DILocation(line: 3, scope: ![[WEAKFOOSP]])
-; WL: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[WEAKFOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
-; WL: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
-; WL: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
-
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
-!2 = !DIFile(filename: "bar.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", function: i32 (i32, i32)* @bar, type: !6)
-!4 = !DISubprogram(file: !5, scope: !5, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !6)
-!5 = !DIFile(filename: "foo.h", directory: "/path/to/dir")
-!6 = !DISubroutineType(types: !{})
-
-; Crasher for llc.
-; REQUIRES: object-emission
-; RUN: %llc_dwarf -filetype=obj -O0 %t1 -o %t1.o
-; RUN: llvm-dwarfdump %t1.o -debug-dump=all | FileCheck %s -check-prefix=DWLW -check-prefix=DW
-; RUN: %llc_dwarf -filetype=obj -O0 %t2 -o %t2.o
-; RUN: llvm-dwarfdump %t2.o -debug-dump=all | FileCheck %s -check-prefix=DWWL -check-prefix=DW
-; Check that the debug info puts the subprogram (with PCs) in the correct
-; compile unit.
-
-; DW-LABEL: .debug_info contents:
-; DWLW:     DW_TAG_compile_unit
-; DWLW:       DW_AT_name {{.*}}"bar.c"
-; Note: If we stop emitting foo here, the comment below for DWWL (and the
-; check) should be copied up here.
-; DWLW:       DW_TAG_subprogram
-; DWLW-NOT:     DW_AT_low_pc
-; DWLW-NOT:     DW_AT_high_pc
-; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWLW:         DW_AT_decl_line {{.*}}(1)
-; DWLW:       DW_TAG_subprogram
-; DWLW:         DW_AT_low_pc
-; DWLW:         DW_AT_high_pc
-; DWLW:         DW_AT_name {{.*}}bar
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
-; DWLW:         DW_AT_decl_line {{.*}}(11)
-; DWLW:         DW_TAG_inlined_subroutine
-; DWLW:           DW_AT_abstract_origin
-; DWLW:     DW_TAG_compile_unit
-; DWLW:       DW_AT_name {{.*}}"foo.c"
-; DWLW:       DW_TAG_subprogram
-; DWLW:         DW_AT_low_pc
-; DWLW:         DW_AT_high_pc
-; DWLW:         DW_AT_name {{.*}}foo
-; DWLW:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWLW:         DW_AT_decl_line {{.*}}(1)
-
-; The DWARF output is already symmetric (just reordered).
-; DWWL:     DW_TAG_compile_unit
-; DWWL:       DW_AT_name {{.*}}"foo.c"
-; DWWL:       DW_TAG_subprogram
-; DWWL:         DW_AT_low_pc
-; DWWL:         DW_AT_high_pc
-; DWWL:         DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}foo.h"
-; DWWL:         DW_AT_decl_line {{.*}}(1)
-; DWWL:     DW_TAG_compile_unit
-; DWWL:       DW_AT_name {{.*}}"bar.c"
-; Note: for symmetry, foo would also show up in this compile unit
-; (alternatively, it wouldn't show up in the DWLW case).  If we start emitting
-; foo here, this should be updated by checking that we don't emit low_pc and
-; high_pc for it.
-; DWWL-NOT:     DW_AT_name {{.*}}foo
-; DWWL:       DW_TAG_subprogram
-; DWWL-NOT:     DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_low_pc
-; DWWL:         DW_AT_high_pc
-; DWWL-NOT:     DW_AT_name {{.*}}foo
-; DWWL:         DW_AT_name {{.*}}bar
-; DWWL:         DW_AT_decl_file {{.*}}"/path/to/dir{{/|\\}}bar.c"
-; DWWL:         DW_AT_decl_line {{.*}}(11)
-; DWWL:         DW_TAG_inlined_subroutine
-; DWWL:           DW_AT_abstract_origin
-
-; DW-LABEL:   .debug_line contents:
-; Check that we have the right things in the line table as well.
-
-; DWLW-LABEL: file_names[{{ *}}1]{{.*}} bar.c
-; DWLW-LABEL: file_names[{{ *}}2]{{.*}} foo.h
-; DWLW:        2 0 2 0 0 is_stmt prologue_end
-; DWLW-LABEL: file_names[{{ *}}1]{{.*}} foo.h
-; DWLW:        2 0 1 0 0 is_stmt prologue_end
-; DWLW-NOT:                      prologue_end
-
-; DWWL-LABEL: file_names[{{ *}}1]{{.*}} foo.h
-; DWWL:        2 0 1 0 0 is_stmt prologue_end
-; DWWL-LABEL: file_names[{{ *}}1]{{.*}} bar.c
-; DWWL-LABEL: file_names[{{ *}}2]{{.*}} foo.h
-; DWWL:        2 0 2 0 0 is_stmt prologue_end
-; DWWL-NOT:                      prologue_end
diff --git a/test/Linker/subprogram-linkonce-weak.ll b/test/Linker/subprogram-linkonce-weak.ll
index 598cea7afeb0..e8bc67765bb8 100644
--- a/test/Linker/subprogram-linkonce-weak.ll
+++ b/test/Linker/subprogram-linkonce-weak.ll
@@ -2,6 +2,7 @@
 ; RUN: FileCheck %s -check-prefix=LW -check-prefix=CHECK <%t1
 ; RUN: llvm-link %S/Inputs/subprogram-linkonce-weak.ll %s -S -o %t2
 ; RUN: FileCheck %s -check-prefix=WL -check-prefix=CHECK <%t2
+; REQUIRES: default_triple
 
 ; This testcase tests the following flow:
 ;  - File A defines a linkonce version of @foo which has inlined into @bar.
@@ -16,29 +17,29 @@
 ; The WL prefix means weak (the other file) first, then linkonce (this file).
 
 ; We'll see @bar before @foo if this file is first.
-; LW-LABEL: define i32 @bar(
+; LW: define i32 @bar({{.*}} !dbg ![[BARSP:[0-9]+]]
 ; LW: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
 ; LW: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
-; LW-LABEL: define weak i32 @foo(
+; LW: define weak i32 @foo({{.*}} !dbg ![[WEAKFOOSP:[0-9]+]]
 ; LW: %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg ![[FOOCALL:[0-9]+]]
 ; LW: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
 
 ; We'll see @foo before @bar if this file is second.
-; WL-LABEL: define weak i32 @foo(
+; WL: define weak i32 @foo({{.*}} !dbg ![[WEAKFOOSP:[0-9]+]]
 ; WL: %sum = call i32 @fastadd(i32 %a, i32 %b), !dbg ![[FOOCALL:[0-9]+]]
 ; WL: ret i32 %sum, !dbg ![[FOORET:[0-9]+]]
-; WL-LABEL: define i32 @bar(
+; WL: define i32 @bar({{.*}} !dbg ![[BARSP:[0-9]+]]
 ; WL: %sum = add i32 %a, %b, !dbg ![[FOOINBAR:[0-9]+]]
 ; WL: ret i32 %sum, !dbg ![[BARRET:[0-9]+]]
 
-define i32 @bar(i32 %a, i32 %b) {
+define i32 @bar(i32 %a, i32 %b) !dbg !3 {
 entry:
   %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4,
                                           inlinedAt: !DILocation(line: 12, scope: !3))
   ret i32 %sum, !dbg !DILocation(line: 13, scope: !3)
 }
 
-define linkonce i32 @foo(i32 %a, i32 %b) {
+define linkonce i32 @foo(i32 %a, i32 %b) !dbg !4 {
 entry:
   %sum = add i32 %a, %b, !dbg !DILocation(line: 2, scope: !4)
   ret i32 %sum, !dbg !DILocation(line: 3, scope: !4)
@@ -52,17 +53,13 @@ entry:
 ; WL-SAME: !{![[WCU:[0-9]+]], ![[LCU:[0-9]+]]}
 !llvm.dbg.cu = !{!1}
 
-; LW: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
-; LW: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; LW: ![[BARSP]] = !DISubprogram(name: "bar",
-; LW-SAME: function: i32 (i32, i32)* @bar
-; LW: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; LW-NOT: function:
-; LW-SAME: ){{$}}
-; LW: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; LW: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; LW: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; LW-SAME: function: i32 (i32, i32)* @foo
+; LW: ![[LCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
+; LW: ![[LSPs]] = !{![[BARSP]], ![[FOOSP:[0-9]+]]}
+; LW: ![[BARSP]] = distinct !DISubprogram(name: "bar",
+; LW: ![[FOOSP]] = distinct !DISubprogram(name: "foo",
+; LW: ![[WCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
+; LW: ![[WSPs]] = !{![[WEAKFOOSP]]}
+; LW: ![[WEAKFOOSP]] = distinct !DISubprogram(name: "foo",
 ; LW: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
 ; LW: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
 ; LW: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
@@ -70,27 +67,23 @@ entry:
 ; LW: ![[FOORET]] = !DILocation(line: 53, scope: ![[WEAKFOOSP]])
 
 ; Same as above, but reordered.
-; WL: ![[WCU]] = !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
-; WL: ![[WSPs]] = !{![[WEAKFOOSP:[0-9]+]]}
-; WL: ![[WEAKFOOSP]] = !DISubprogram(name: "foo",
-; WL-SAME: function: i32 (i32, i32)* @foo
-; WL: ![[LCU]] = !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
+; WL: ![[WCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[WSPs:[0-9]+]]
+; WL: ![[WSPs]] = !{![[WEAKFOOSP]]}
+; WL: ![[WEAKFOOSP]] = distinct !DISubprogram(name: "foo",
+; WL: ![[LCU]] = distinct !DICompileUnit({{.*}} subprograms: ![[LSPs:[0-9]+]]
 ; WL: ![[LSPs]] = !{![[BARSP:[0-9]+]], ![[FOOSP:[0-9]+]]}
-; WL: ![[BARSP]] = !DISubprogram(name: "bar",
-; WL-SAME: function: i32 (i32, i32)* @bar
-; WL: ![[FOOSP]] = {{.*}}!DISubprogram(name: "foo",
-; Note, for symmetry, this should be "NOT: function:" and "SAME: ){{$}}".
-; WL-SAME: function: i32 (i32, i32)* @foo
+; WL: ![[BARSP]] = distinct !DISubprogram(name: "bar",
+; WL: ![[FOOSP]] = distinct !DISubprogram(name: "foo",
 ; WL: ![[FOOCALL]] = !DILocation(line: 52, scope: ![[WEAKFOOSP]])
 ; WL: ![[FOORET]] = !DILocation(line: 53, scope: ![[WEAKFOOSP]])
 ; WL: ![[FOOINBAR]] = !DILocation(line: 2, scope: ![[FOOSP]], inlinedAt: ![[BARIA:[0-9]+]])
 ; WL: ![[BARIA]] = !DILocation(line: 12, scope: ![[BARSP]])
 ; WL: ![[BARRET]] = !DILocation(line: 13, scope: ![[BARSP]])
 
-!1 = !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !{!3, !4}, emissionKind: 1)
 !2 = !DIFile(filename: "bar.c", directory: "/path/to/dir")
-!3 = !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", function: i32 (i32, i32)* @bar, type: !5)
-!4 = !DISubprogram(file: !2, scope: !2, line: 1, name: "foo", function: i32 (i32, i32)* @foo, type: !5)
+!3 = distinct !DISubprogram(file: !2, scope: !2, line: 11, name: "bar", type: !5)
+!4 = distinct !DISubprogram(file: !2, scope: !2, line: 1, name: "foo", type: !5)
 !5 = !DISubroutineType(types: !{})
 
 ; Crasher for llc.
diff --git a/test/Linker/testlink.ll b/test/Linker/testlink.ll
index 2e9447ddfd37..6a316a3bf846 100644
--- a/test/Linker/testlink.ll
+++ b/test/Linker/testlink.ll
@@ -32,6 +32,11 @@
 
 ; CHECK-DAG: @0 = external global i32
 @0 = external global i32
+
+define i32* @use0() {
+  ret i32* @0
+}
+
 ; CHECK-DAG: @Inte = global i32 1
 @Inte = global i32 1
 
@@ -43,7 +48,7 @@
 
 ; This should get renamed since there is a definition that is non-internal in
 ; the other module.
-; CHECK-DAG: @Intern2{{[0-9]+}} = internal constant i32 792
+; CHECK-DAG: @Intern2.{{[0-9]+}} = internal constant i32 792
 @Intern2 = internal constant i32 792
 
 @UseIntern2 = global i32* @Intern2
@@ -101,4 +106,6 @@ define void @testIntern() {
   ret void
 }
 
-declare void @VecSizeCrash(%VecSize)
+define void @VecSizeCrash(%VecSize) {
+  ret void
+}
diff --git a/test/Linker/thinlto_funcimport_debug.ll b/test/Linker/thinlto_funcimport_debug.ll
new file mode 100644
index 000000000000..02f43b24c17a
--- /dev/null
+++ b/test/Linker/thinlto_funcimport_debug.ll
@@ -0,0 +1,80 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/thinlto_funcimport_debug.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; If we import func1 and not func2 we should only link DISubprogram for func1
+; RUN: llvm-link %t2.bc -functionindex=%t3.thinlto.bc -import=func1:%t.bc -S | FileCheck %s
+
+; CHECK: declare i32 @func2
+; CHECK: define available_externally i32 @func1
+
+; Extract out the list of subprograms from each compile unit and ensure
+; that neither contains null.
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
+; CHECK-NOT: ![[SPs1]] = !{{{.*}}null{{.*}}}
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
+; CHECK-NOT: ![[SPs2]] = !{{{.*}}null{{.*}}}
+
+; CHECK: distinct !DISubprogram(name: "func1"
+; CHECK-NOT: distinct !DISubprogram(name: "func2"
+
+
+; ModuleID = 'dbg.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @func1(i32 %n) #0 !dbg !4 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !9, metadata !17), !dbg !18
+  tail call void @llvm.dbg.value(metadata i32 5, i64 0, metadata !10, metadata !17), !dbg !19
+  %cmp = icmp sgt i32 %n, 10, !dbg !20
+  %. = select i1 %cmp, i32 10, i32 5, !dbg !22
+  tail call void @llvm.dbg.value(metadata i32 %., i64 0, metadata !10, metadata !17), !dbg !19
+  ret i32 %., !dbg !23
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @func2(i32 %n) #0 !dbg !11 {
+entry:
+  tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !13, metadata !17), !dbg !24
+  ret i32 %n, !dbg !25
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14, !15}
+!llvm.ident = !{!16}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "dbg.c", directory: ".")
+!2 = !{}
+!3 = !{!4, !11}
+!4 = distinct !DISubprogram(name: "func1", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "n", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !DILocalVariable(name: "x", scope: !4, file: !1, line: 2, type: !7)
+!11 = distinct !DISubprogram(name: "func2", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, variables: !12)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "n", arg: 1, scope: !11, file: !1, line: 8, type: !7)
+!14 = !{i32 2, !"Dwarf Version", i32 4}
+!15 = !{i32 2, !"Debug Info Version", i32 3}
+!16 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!17 = !DIExpression()
+!18 = !DILocation(line: 1, column: 15, scope: !4)
+!19 = !DILocation(line: 2, column: 7, scope: !4)
+!20 = !DILocation(line: 3, column: 9, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 7)
+!22 = !DILocation(line: 3, column: 7, scope: !4)
+!23 = !DILocation(line: 5, column: 3, scope: !4)
+!24 = !DILocation(line: 8, column: 15, scope: !11)
+!25 = !DILocation(line: 9, column: 3, scope: !11)
diff --git a/test/Linker/type-unique-alias.ll b/test/Linker/type-unique-alias.ll
index e43450fbbeb3..89e08dd593d1 100644
--- a/test/Linker/type-unique-alias.ll
+++ b/test/Linker/type-unique-alias.ll
@@ -3,8 +3,8 @@
 %t = type { i8 }
 
 @g = global %t zeroinitializer
-@a = weak alias %t* @g
+@a = weak alias %t, %t* @g
 
 ; CHECK: @g = global %t zeroinitializer
 ; CHECK: @g2 = global %t zeroinitializer
-; CHECK: @a = weak alias %t* @g
+; CHECK: @a = weak alias %t, %t* @g
diff --git a/test/Linker/type-unique-dst-types.ll b/test/Linker/type-unique-dst-types.ll
index 30aecbb970cb..1adad49de91d 100644
--- a/test/Linker/type-unique-dst-types.ll
+++ b/test/Linker/type-unique-dst-types.ll
@@ -17,3 +17,7 @@
 %A = type { %B }
 %B = type { i8 }
 @g3 = external global %A
+
+define %A* @use_g3() {
+  ret %A* @g3
+}
diff --git a/test/Linker/type-unique-odr-a.ll b/test/Linker/type-unique-odr-a.ll
index fa6b6bb4f221..d7d978c2d3d7 100644
--- a/test/Linker/type-unique-odr-a.ll
+++ b/test/Linker/type-unique-odr-a.ll
@@ -1,6 +1,6 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
 ;
-; RUN: llvm-link %s %p/type-unique-odr-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+; RUN: llvm-link %s %p/type-unique-odr-b.ll -S -o - | %llc_dwarf -dwarf-linkage-names=Enable -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 ;
 ; Test ODR-based type uniquing for C++ class members.
 ; rdar://problem/15851313.
@@ -49,14 +49,14 @@
 %class.A = type { i32 }
 
 ; Function Attrs: nounwind
-define void @_Z3bazv() #0 {
+define void @_Z3bazv() #0 !dbg !15 {
 entry:
   call void @_ZL3barv(), !dbg !23
   ret void, !dbg !23
 }
 
 ; Function Attrs: nounwind
-define internal void @_ZL3barv() #0 {
+define internal void @_ZL3barv() #0 !dbg !19 {
 entry:
   %a = alloca %class.A, align 4
   call void @llvm.dbg.declare(metadata %class.A* %a, metadata !24, metadata !DIExpression()), !dbg !25
@@ -73,7 +73,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!20, !21}
 !llvm.ident = !{!22}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -87,15 +87,15 @@ attributes #1 = { nounwind readnone }
 !11 = !{null, !12}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !14 = !{!15, !19}
-!15 = !DISubprogram(name: "baz", linkageName: "_Z3bazv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !16, type: !17, function: void ()* @_Z3bazv, variables: !2)
+!15 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !16, type: !17, variables: !2)
 !16 = !DIFile(filename: "type-unique-odr-a.cpp", directory: "")
 !17 = !DISubroutineType(types: !18)
 !18 = !{null}
-!19 = !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !16, type: !17, function: void ()* @_ZL3barv, variables: !2)
+!19 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 7, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !5, scope: !16, type: !17, variables: !2)
 !20 = !{i32 2, !"Dwarf Version", i32 4}
 !21 = !{i32 1, !"Debug Info Version", i32 3}
 !22 = !{!"clang version 3.5.0 "}
 !23 = !DILocation(line: 11, scope: !15)
-!24 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "a", line: 8, scope: !19, file: !16, type: !"_ZTS1A")
+!24 = !DILocalVariable(name: "a", line: 8, scope: !19, file: !16, type: !"_ZTS1A")
 !25 = !DILocation(line: 8, scope: !19)
 !26 = !DILocation(line: 9, scope: !19)
diff --git a/test/Linker/type-unique-odr-b.ll b/test/Linker/type-unique-odr-b.ll
index f57c21da824b..714bb314c908 100644
--- a/test/Linker/type-unique-odr-b.ll
+++ b/test/Linker/type-unique-odr-b.ll
@@ -22,7 +22,7 @@
 %class.A = type { i32 }
 
 ; Function Attrs: nounwind
-define void @_ZN1A6getFooEv(%class.A* %this) #0 align 2 {
+define void @_ZN1A6getFooEv(%class.A* %this) #0 align 2 !dbg !15 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -35,14 +35,14 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !16 {
 entry:
   call void @_ZL3barv(), !dbg !28
   ret void, !dbg !28
 }
 
 ; Function Attrs: nounwind
-define internal void @_ZL3barv() #0 {
+define internal void @_ZL3barv() #0 !dbg !20 {
 entry:
   ret void, !dbg !29
 }
@@ -54,7 +54,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -68,16 +68,16 @@ attributes #1 = { nounwind readnone }
 !11 = !{null, !12}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS1A")
 !14 = !{!15, !16, !20}
-!15 = !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !5, scope: !"_ZTS1A", type: !10, function: void (%class.A*)* @_ZN1A6getFooEv, declaration: !9, variables: !2)
-!16 = !DISubprogram(name: "f", linkageName: "_Z1fv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !17, type: !18, function: void ()* @_Z1fv, variables: !2)
+!15 = distinct !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 8, file: !5, scope: !"_ZTS1A", type: !10, declaration: !9, variables: !2)
+!16 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !5, scope: !17, type: !18, variables: !2)
 !17 = !DIFile(filename: "type-unique-odr-b.cpp", directory: "")
 !18 = !DISubroutineType(types: !19)
 !19 = !{null}
-!20 = !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 10, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !17, type: !18, function: void ()* @_ZL3barv, variables: !2)
+!20 = distinct !DISubprogram(name: "bar", linkageName: "_ZL3barv", line: 10, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !5, scope: !17, type: !18, variables: !2)
 !21 = !{i32 2, !"Dwarf Version", i32 4}
 !22 = !{i32 1, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 "}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !15, type: !25)
+!24 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !15, type: !25)
 !25 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !26 = !DILocation(line: 0, scope: !15)
 !27 = !DILocation(line: 8, scope: !15)
diff --git a/test/Linker/type-unique-simple-a.ll b/test/Linker/type-unique-simple-a.ll
index 1ab43d657e15..24a830922ef5 100644
--- a/test/Linker/type-unique-simple-a.ll
+++ b/test/Linker/type-unique-simple-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
 
 ; RUN: llvm-link %s %p/type-unique-simple-b.ll -S -o %t
 ; RUN: cat %t | FileCheck %s -check-prefix=LINK
@@ -49,7 +49,7 @@
 %struct.Base = type { i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1fi(i32 %a) #0 {
+define void @_Z1fi(i32 %a) #0 !dbg !10 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 4
@@ -68,7 +68,7 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!14, !20}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
 !2 = !{}
 !3 = !{!4}
@@ -78,14 +78,14 @@ attributes #1 = { nounwind readnone }
 !7 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !5, scope: !"_ZTS4Base", baseType: !8)
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, function: void (i32)* @_Z1fi, variables: !2)
+!10 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !11, type: !12, variables: !2)
 !11 = !DIFile(filename: "foo.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
 !12 = !DISubroutineType(types: !13)
 !13 = !{null, !8}
 !14 = !{i32 2, !"Dwarf Version", i32 2}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !10, file: !11, type: !8)
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !10, file: !11, type: !8)
 !16 = !DILocation(line: 3, scope: !10)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 4, scope: !10, file: !11, type: !4)
+!17 = !DILocalVariable(name: "t", line: 4, scope: !10, file: !11, type: !4)
 !18 = !DILocation(line: 4, scope: !10)
 !19 = !DILocation(line: 5, scope: !10)
 !20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Linker/type-unique-simple-b.ll b/test/Linker/type-unique-simple-b.ll
index c12b91845299..b2228185c9b5 100644
--- a/test/Linker/type-unique-simple-b.ll
+++ b/test/Linker/type-unique-simple-b.ll
@@ -5,7 +5,7 @@
 %struct.Base = type { i32 }
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z1gi(i32 %a) #0 {
+define void @_Z1gi(i32 %a) #0 !dbg !10 {
 entry:
   %a.addr = alloca i32, align 4
   %t = alloca %struct.Base, align 4
@@ -19,7 +19,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: ssp uwtable
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !14 {
 entry:
   %retval = alloca i32, align 4
   store i32 0, i32* %retval
@@ -38,7 +38,7 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!17, !26}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (http://llvm.org/git/clang.git c23b1db6268c8e7ce64026d57d1510c1aac200a0) (http://llvm.org/git/llvm.git 09b98fe3978eddefc2145adc1056cf21580ce945)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !9, globals: !2, imports: !2)
 !1 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
 !2 = !{}
 !3 = !{!4}
@@ -48,17 +48,17 @@ attributes #3 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !7 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !5, scope: !"_ZTS4Base", baseType: !8)
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10, !14}
-!10 = !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !11, type: !12, function: void (i32)* @_Z1gi, variables: !2)
+!10 = distinct !DISubprogram(name: "g", linkageName: "_Z1gi", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !11, type: !12, variables: !2)
 !11 = !DIFile(filename: "bar.cpp", directory: "/Users/mren/c_testing/type_unique_air/simple")
 !12 = !DISubroutineType(types: !13)
 !13 = !{null, !8}
-!14 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !11, type: !15, function: i32 ()* @main, variables: !2)
+!14 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !11, type: !15, variables: !2)
 !15 = !DISubroutineType(types: !16)
 !16 = !{!8}
 !17 = !{i32 2, !"Dwarf Version", i32 2}
-!18 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 4, arg: 1, scope: !10, file: !11, type: !8)
+!18 = !DILocalVariable(name: "a", line: 4, arg: 1, scope: !10, file: !11, type: !8)
 !19 = !DILocation(line: 4, scope: !10)
-!20 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "t", line: 5, scope: !10, file: !11, type: !4)
+!20 = !DILocalVariable(name: "t", line: 5, scope: !10, file: !11, type: !4)
 !21 = !DILocation(line: 5, scope: !10)
 !22 = !DILocation(line: 6, scope: !10)
 !23 = !DILocation(line: 8, scope: !14)
diff --git a/test/Linker/type-unique-simple2-a.ll b/test/Linker/type-unique-simple2-a.ll
index f9170ab5f7fa..3779753a64ea 100644
--- a/test/Linker/type-unique-simple2-a.ll
+++ b/test/Linker/type-unique-simple2-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
 ;
 ; RUN: llvm-link %s %p/type-unique-simple2-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 ;
@@ -18,7 +18,8 @@
 ;     return A().getFoo();
 ; }
 ;
-; CHECK: _ZN1A6setFooEv
+; CHECK: DW_AT_name {{.*}} "setFoo"
+; CHECK-NOT: DW_TAG
 ; CHECK: DW_AT_accessibility [DW_FORM_data1]   (DW_ACCESS_public)
 ; CHECK-NOT: DW_AT_accessibility
 ; CHECK: DW_TAG
@@ -30,7 +31,7 @@
 @_ZTV1A = external unnamed_addr constant [4 x i8*]
 
 ; Function Attrs: nounwind
-define i32 @_Z3barv() #0 {
+define i32 @_Z3barv() #0 !dbg !27 {
 entry:
   %tmp = alloca %class.A, align 8
   %0 = bitcast %class.A* %tmp to i8*, !dbg !38
@@ -44,7 +45,7 @@ entry:
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) #1
 
 ; Function Attrs: inlinehint nounwind
-define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr #2 align 2 {
+define linkonce_odr void @_ZN1AC1Ev(%class.A* %this) unnamed_addr #2 align 2 !dbg !31 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -60,7 +61,7 @@ declare i32 @_ZN1A6getFooEv(%class.A*)
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #4
 
 ; Function Attrs: inlinehint nounwind
-define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr #2 align 2 {
+define linkonce_odr void @_ZN1AC2Ev(%class.A* %this) unnamed_addr #2 align 2 !dbg !34 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -80,7 +81,7 @@ attributes #4 = { nounwind readnone }
 !llvm.module.flags = !{!35, !36}
 !llvm.ident = !{!37}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !26, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !26, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -105,22 +106,22 @@ attributes #4 = { nounwind readnone }
 !23 = !DIDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !24, baseType: !13)
 !24 = !DIFile(filename: "a.cpp", directory: "")
 !26 = !{!27, !31, !34}
-!27 = !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !24, scope: !28, type: !29, function: i32 ()* @_Z3barv, variables: !2)
+!27 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !24, scope: !28, type: !29, variables: !2)
 !28 = !DIFile(filename: "a.cpp", directory: "")
 !29 = !DISubroutineType(types: !30)
 !30 = !{!23}
-!31 = !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC1Ev, declaration: !32, variables: !2)
+!31 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC1Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, declaration: !32, variables: !2)
 !32 = !DISubprogram(name: "A", isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scope: !"_ZTS1A", type: !15)
-!34 = !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1AC2Ev, declaration: !32, variables: !2)
+!34 = distinct !DISubprogram(name: "A", linkageName: "_ZN1AC2Ev", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !"_ZTS1A", type: !15, declaration: !32, variables: !2)
 !35 = !{i32 2, !"Dwarf Version", i32 2}
 !36 = !{i32 1, !"Debug Info Version", i32 3}
 !37 = !{!"clang version 3.5 "}
 !38 = !DILocation(line: 3, scope: !27)
-!39 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !40)
+!39 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !31, type: !40)
 !40 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !41 = !DILocation(line: 0, scope: !31)
 !42 = !DILocation(line: 2, scope: !43)
 !43 = !DILexicalBlockFile(discriminator: 0, file: !5, scope: !31)
-!44 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, type: !40)
+!44 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !34, type: !40)
 !45 = !DILocation(line: 0, scope: !34)
 !46 = !DILocation(line: 2, scope: !34)
diff --git a/test/Linker/type-unique-simple2-b.ll b/test/Linker/type-unique-simple2-b.ll
index 5539fb4adb35..e69ab63e6d24 100644
--- a/test/Linker/type-unique-simple2-b.ll
+++ b/test/Linker/type-unique-simple2-b.ll
@@ -18,7 +18,7 @@
 @_ZTI1A = unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }
 
 ; Function Attrs: nounwind
-define void @_ZN1A6setFooEv(%class.A* %this) unnamed_addr #0 align 2 {
+define void @_ZN1A6setFooEv(%class.A* %this) unnamed_addr #0 align 2 !dbg !26 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -31,7 +31,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind
-define i32 @_ZN1A6getFooEv(%class.A* %this) unnamed_addr #0 align 2 {
+define i32 @_ZN1A6getFooEv(%class.A* %this) unnamed_addr #0 align 2 !dbg !28 {
 entry:
   %this.addr = alloca %class.A*, align 8
   store %class.A* %this, %class.A** %this.addr, align 8
@@ -47,7 +47,7 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!29, !30}
 !llvm.ident = !{!31}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !25, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !3, subprograms: !25, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<unknown>", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -71,16 +71,16 @@ attributes #1 = { nounwind readnone }
 !22 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !23)
 !23 = !DIDerivedType(tag: DW_TAG_typedef, name: "foo_t", line: 1, file: !5, baseType: !13)
 !25 = !{!26, !28}
-!26 = !DISubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !27, scope: !"_ZTS1A", type: !15, function: void (%class.A*)* @_ZN1A6setFooEv, declaration: !14, variables: !2)
+!26 = distinct !DISubprogram(name: "setFoo", linkageName: "_ZN1A6setFooEv", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !27, scope: !"_ZTS1A", type: !15, declaration: !14, variables: !2)
 !27 = !DIFile(filename: "b.cpp", directory: "")
-!28 = !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !27, scope: !"_ZTS1A", type: !20, function: i32 (%class.A*)* @_ZN1A6getFooEv, declaration: !19, variables: !2)
+!28 = distinct !DISubprogram(name: "getFoo", linkageName: "_ZN1A6getFooEv", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPublic | DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !27, scope: !"_ZTS1A", type: !20, declaration: !19, variables: !2)
 !29 = !{i32 2, !"Dwarf Version", i32 2}
 !30 = !{i32 1, !"Debug Info Version", i32 3}
 !31 = !{!"clang version 3.5 "}
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !26, type: !33)
+!32 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !26, type: !33)
 !33 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
 !34 = !DILocation(line: 0, scope: !26)
 !35 = !DILocation(line: 2, scope: !26)
-!36 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !33)
+!36 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !28, type: !33)
 !37 = !DILocation(line: 0, scope: !28)
 !38 = !DILocation(line: 4, scope: !28)
diff --git a/test/Linker/type-unique-simple2.ll b/test/Linker/type-unique-simple2.ll
index 8a56e2e80c67..7c31cdf5975e 100644
--- a/test/Linker/type-unique-simple2.ll
+++ b/test/Linker/type-unique-simple2.ll
@@ -1,4 +1,5 @@
 ; REQUIRES: object-emission
+; REQUIRES: default_triple
 
 ; RUN: llvm-link %S/Inputs/type-unique-simple2-a.ll %S/Inputs/type-unique-simple2-b.ll -S -o %t
 ; RUN: cat %t | FileCheck %S/Inputs/type-unique-simple2-a.ll -check-prefix=LINK
diff --git a/test/Linker/type-unique-src-type.ll b/test/Linker/type-unique-src-type.ll
index 110ecc87e1b1..ab7322892e07 100644
--- a/test/Linker/type-unique-src-type.ll
+++ b/test/Linker/type-unique-src-type.ll
@@ -10,7 +10,7 @@
 ; CHECK-NEXT: %B = type { %A }
 ; CHECK-NEXT: %A = type { i8 }
 
-; CHECK: @g1 = external global %C.0
+; CHECK: @g1 = global %C.0 zeroinitializer
 ; CHECK:  getelementptr %C.0, %C.0* null, i64 0, i32 0, i32 0
 
 %A   = type { i8 }
@@ -21,4 +21,4 @@ define void @f1() {
   getelementptr %C, %C* null, i64 0, i32 0, i32 0
   ret void
 }
-@g1 = external global %C.0
+@g1 = global %C.0 zeroinitializer
diff --git a/test/Linker/type-unique-type-array-a.ll b/test/Linker/type-unique-type-array-a.ll
index 98c8d657743d..0a79f20f40c6 100644
--- a/test/Linker/type-unique-type-array-a.ll
+++ b/test/Linker/type-unique-type-array-a.ll
@@ -1,4 +1,4 @@
-; REQUIRES: object-emission
+; REQUIRES: default_triple, object-emission
 ;
 ; RUN: llvm-link %s %p/type-unique-type-array-b.ll -S -o - | %llc_dwarf -filetype=obj -O0 | llvm-dwarfdump -debug-dump=info - | FileCheck %s
 ;
@@ -23,7 +23,7 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK-NEXT:   DW_AT_name {{.*}} "A"
 ; CHECK: DW_TAG_subprogram
-; CHECK: DW_AT_MIPS_linkage_name {{.*}} "_ZN1A5testAE2SA"
+; CHECK: DW_AT_name {{.*}} "testA"
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref4] (cu + 0x{{.*}} => {0x[[STRUCT:.*]]})
@@ -34,7 +34,7 @@
 ; CHECK: DW_TAG_class_type
 ; CHECK-NEXT:   DW_AT_name {{.*}} "B"
 ; CHECK: DW_TAG_subprogram
-; CHECK:   DW_AT_MIPS_linkage_name {{.*}} "_ZN1B5testBE2SA"
+; CHECK:   DW_AT_name {{.*}} "testB"
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK: DW_TAG_formal_parameter
 ; CHECK-NEXT: DW_AT_type [DW_FORM_ref_addr] {{.*}}[[STRUCT]]
@@ -43,7 +43,7 @@
 %struct.SA = type { i32 }
 
 ; Function Attrs: ssp uwtable
-define void @_Z4topAP1A2SA(%class.A* %a, i32 %sa.coerce) #0 {
+define void @_Z4topAP1A2SA(%class.A* %a, i32 %sa.coerce) #0 !dbg !15 {
 entry:
   %sa = alloca %struct.SA, align 4
   %a.addr = alloca %class.A*, align 8
@@ -67,7 +67,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1A5testAE2SA(%class.A* %this, i32 %a.coerce) #2 align 2 {
+define linkonce_odr void @_ZN1A5testAE2SA(%class.A* %this, i32 %a.coerce) #2 align 2 !dbg !20 {
 entry:
   %a = alloca %struct.SA, align 4
   %this.addr = alloca %class.A*, align 8
@@ -92,7 +92,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "a.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
 !2 = !{}
 !3 = !{!4, !10}
@@ -107,23 +107,23 @@ attributes #3 = { nounwind }
 !12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !1, scope: !"_ZTS2SA", baseType: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !{!15, !20}
-!15 = !DISubprogram(name: "topA", linkageName: "_Z4topAP1A2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, function: void (%class.A*, i32)* @_Z4topAP1A2SA, variables: !2)
+!15 = distinct !DISubprogram(name: "topA", linkageName: "_Z4topAP1A2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, variables: !2)
 !16 = !DIFile(filename: "a.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
 !17 = !DISubroutineType(types: !18)
 !18 = !{null, !19, !"_ZTS2SA"}
 !19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1A")
-!20 = !DISubprogram(name: "testA", linkageName: "_ZN1A5testAE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1A", type: !7, function: void (%class.A*, i32)* @_ZN1A5testAE2SA, declaration: !6, variables: !2)
+!20 = distinct !DISubprogram(name: "testA", linkageName: "_ZN1A5testAE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1A", type: !7, declaration: !6, variables: !2)
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i32 2, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 11, arg: 1, scope: !15, file: !16, type: !19)
+!24 = !DILocalVariable(name: "a", line: 11, arg: 1, scope: !15, file: !16, type: !19)
 !25 = !DILocation(line: 11, column: 14, scope: !15)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
+!26 = !DILocalVariable(name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
 !27 = !DILocation(line: 11, column: 20, scope: !15)
 !28 = !DILocation(line: 12, column: 3, scope: !15)
 !29 = !DILocation(line: 13, column: 1, scope: !15)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
 !31 = !DILocation(line: 0, scope: !20)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
+!32 = !DILocalVariable(name: "a", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
 !33 = !DILocation(line: 7, column: 17, scope: !20)
 !34 = !DILocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/type-unique-type-array-b.ll b/test/Linker/type-unique-type-array-b.ll
index 14ce59b3c127..b7796daf9b13 100644
--- a/test/Linker/type-unique-type-array-b.ll
+++ b/test/Linker/type-unique-type-array-b.ll
@@ -22,7 +22,7 @@
 %struct.SA = type { i32 }
 
 ; Function Attrs: ssp uwtable
-define void @_Z4topBP1B2SA(%class.B* %b, i32 %sa.coerce) #0 {
+define void @_Z4topBP1B2SA(%class.B* %b, i32 %sa.coerce) #0 !dbg !15 {
 entry:
   %sa = alloca %struct.SA, align 4
   %b.addr = alloca %class.B*, align 8
@@ -46,7 +46,7 @@ entry:
 declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
 ; Function Attrs: nounwind ssp uwtable
-define linkonce_odr void @_ZN1B5testBE2SA(%class.B* %this, i32 %sa.coerce) #2 align 2 {
+define linkonce_odr void @_ZN1B5testBE2SA(%class.B* %this, i32 %sa.coerce) #2 align 2 !dbg !20 {
 entry:
   %sa = alloca %struct.SA, align 4
   %this.addr = alloca %class.B*, align 8
@@ -71,7 +71,7 @@ attributes #3 = { nounwind }
 !llvm.module.flags = !{!21, !22}
 !llvm.ident = !{!23}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "b.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
 !2 = !{}
 !3 = !{!4, !10}
@@ -86,23 +86,23 @@ attributes #3 = { nounwind }
 !12 = !DIDerivedType(tag: DW_TAG_member, name: "a", line: 2, size: 32, align: 32, file: !1, scope: !"_ZTS2SA", baseType: !13)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !14 = !{!15, !20}
-!15 = !DISubprogram(name: "topB", linkageName: "_Z4topBP1B2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, function: void (%class.B*, i32)* @_Z4topBP1B2SA, variables: !2)
+!15 = distinct !DISubprogram(name: "topB", linkageName: "_Z4topBP1B2SA", line: 11, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 11, file: !1, scope: !16, type: !17, variables: !2)
 !16 = !DIFile(filename: "b.cpp", directory: "/Users/manmanren/test-Nov/type_unique/rdar_di_array")
 !17 = !DISubroutineType(types: !18)
 !18 = !{null, !19, !"_ZTS2SA"}
 !19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !"_ZTS1B")
-!20 = !DISubprogram(name: "testB", linkageName: "_ZN1B5testBE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1B", type: !7, function: void (%class.B*, i32)* @_ZN1B5testBE2SA, declaration: !6, variables: !2)
+!20 = distinct !DISubprogram(name: "testB", linkageName: "_ZN1B5testBE2SA", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !"_ZTS1B", type: !7, declaration: !6, variables: !2)
 !21 = !{i32 2, !"Dwarf Version", i32 2}
 !22 = !{i32 2, !"Debug Info Version", i32 3}
 !23 = !{!"clang version 3.5.0 (trunk 214102:214113M) (llvm/trunk 214102:214115M)"}
-!24 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 11, arg: 1, scope: !15, file: !16, type: !19)
+!24 = !DILocalVariable(name: "b", line: 11, arg: 1, scope: !15, file: !16, type: !19)
 !25 = !DILocation(line: 11, column: 14, scope: !15)
-!26 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
+!26 = !DILocalVariable(name: "sa", line: 11, arg: 2, scope: !15, file: !16, type: !"_ZTS2SA")
 !27 = !DILocation(line: 11, column: 20, scope: !15)
 !28 = !DILocation(line: 12, column: 3, scope: !15)
 !29 = !DILocation(line: 13, column: 1, scope: !15)
-!30 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
+!30 = !DILocalVariable(name: "this", arg: 1, flags: DIFlagArtificial | DIFlagObjectPointer, scope: !20, type: !19)
 !31 = !DILocation(line: 0, scope: !20)
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "sa", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
+!32 = !DILocalVariable(name: "sa", line: 7, arg: 2, scope: !20, file: !16, type: !"_ZTS2SA")
 !33 = !DILocation(line: 7, column: 17, scope: !20)
 !34 = !DILocation(line: 8, column: 3, scope: !20)
diff --git a/test/Linker/uniqued-distinct-cycles.ll b/test/Linker/uniqued-distinct-cycles.ll
new file mode 100644
index 000000000000..05cc80d73042
--- /dev/null
+++ b/test/Linker/uniqued-distinct-cycles.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-link -o - %s | llvm-dis | FileCheck %s
+
+; CHECK: !named = !{!0, !2}
+!named = !{!0, !2}
+
+; CHECK:      !0 = !{!1}
+; CHECK-NEXT: !1 = distinct !{!0}
+!0 = !{!1}
+!1 = distinct !{!0}
+
+; CHECK-NEXT: !2 = distinct !{!3}
+; CHECK-NEXT: !3 = !{!2}
+!2 = distinct !{!3}
+!3 = !{!2}
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index 794ae987797c..a96f089a99c0 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -15,6 +15,11 @@ define weak void @func-b() unnamed_addr { ret void }
 @global-c = common unnamed_addr global i32 0
 ; CHECK-DAG: @global-c = common unnamed_addr global i32 0
 @global-d = external global i32
+
+define i32* @use-global-d() {
+  ret i32* @global-d
+}
+
 ; CHECK-DAG: @global-d = global i32 42
 @global-e = external unnamed_addr global i32
 ; CHECK-DAG: @global-e = unnamed_addr global i32 42
@@ -22,11 +27,16 @@ define weak void @func-b() unnamed_addr { ret void }
 ; CHECK-DAG: @global-f = global i32 42
 
 @alias-a = weak global i32 42
-; CHECK-DAG: @alias-a = alias i32* @global-f
+; CHECK-DAG: @alias-a = alias i32, i32* @global-f
 @alias-b = weak unnamed_addr global i32 42
-; CHECK-DAG: @alias-b = unnamed_addr alias i32* @global-f
+; CHECK-DAG: @alias-b = unnamed_addr alias i32, i32* @global-f
 
 declare void @func-c()
+define void @use-func-c() {
+  call void @func-c()
+  ret void
+}
+
 ; CHECK-DAG: define weak void @func-c() {
 define weak void @func-d() { ret void }
 ; CHECK-DAG: define weak void @func-d() {
@@ -44,9 +54,9 @@ define weak void @func-e() unnamed_addr { ret void }
 ; CHECK-DAG: @global-j = global i32 42
 
 @alias-c = weak global i32 42
-; CHECK-DAG: @alias-c = alias i32* @global-f
+; CHECK-DAG: @alias-c = alias i32, i32* @global-f
 @alias-d = weak unnamed_addr global i32 42
-; CHECK-DAG: @alias-d = alias i32* @global-f
+; CHECK-DAG: @alias-d = alias i32, i32* @global-f
 
 
 declare void @func-g()
diff --git a/test/Linker/unnamed-addr1-b.ll b/test/Linker/unnamed-addr1-b.ll
index d0f54f2259b4..94334d6da73f 100644
--- a/test/Linker/unnamed-addr1-b.ll
+++ b/test/Linker/unnamed-addr1-b.ll
@@ -6,8 +6,8 @@
 @global-e = unnamed_addr global i32 42
 @global-f = unnamed_addr global i32 42
 
-@alias-a =  unnamed_addr alias i32* @global-f
-@alias-b =  unnamed_addr alias i32* @global-f
+@alias-a =  unnamed_addr alias i32, i32* @global-f
+@alias-b =  unnamed_addr alias i32, i32* @global-f
 
 define weak void @func-c() unnamed_addr { ret void }
 define weak void @func-d() unnamed_addr { ret void }
@@ -18,8 +18,8 @@ define weak void @func-e() unnamed_addr { ret void }
 @global-i = global i32 42
 @global-j = global i32 42
 
-@alias-c =  alias i32* @global-f
-@alias-d =  alias i32* @global-f
+@alias-c =  alias i32, i32* @global-f
+@alias-d =  alias i32, i32* @global-f
 
 define weak void @func-g() { ret void }
 define weak void @func-h() { ret void }
diff --git a/test/Linker/visibility.ll b/test/Linker/visibility.ll
index 4938d7af56d6..4252aee61920 100644
--- a/test/Linker/visibility.ll
+++ b/test/Linker/visibility.ll
@@ -21,14 +21,14 @@ $c1 = comdat any
 @v4 = global i32 1, comdat($c1)
 
 ; Aliases
-; CHECK: @a1 = hidden alias i32* @v1
-@a1 = alias i32* @v1
+; CHECK: @a1 = hidden alias i32, i32* @v1
+@a1 = alias i32, i32* @v1
 
-; CHECK: @a2 = protected alias i32* @v2
-@a2 = alias i32* @v2
+; CHECK: @a2 = protected alias i32, i32* @v2
+@a2 = alias i32, i32* @v2
 
-; CHECK: @a3 = hidden alias i32* @v3
-@a3 = protected alias i32* @v3
+; CHECK: @a3 = hidden alias i32, i32* @v3
+@a3 = protected alias i32, i32* @v3
 
 
 ; Functions
diff --git a/test/Linker/weakextern.ll b/test/Linker/weakextern.ll
index 8d479a0d39b3..814550a907bb 100644
--- a/test/Linker/weakextern.ll
+++ b/test/Linker/weakextern.ll
@@ -1,12 +1,12 @@
-; RUN: llvm-as < %s > %t.bc
-; RUN: llvm-as < %p/testlink.ll > %t2.bc
-; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
-; RUN: llvm-dis < %t1.bc | FileCheck %s
+; RUN: llvm-link %s %s %p/testlink.ll -S | FileCheck %s
 ; CHECK: kallsyms_names = extern_weak
 ; CHECK: Inte = global i32
 ; CHECK: MyVar = external global i32
 
-@kallsyms_names = extern_weak global [0 x i8]		; <[0 x i8]*> [#uses=0]
-@MyVar = extern_weak global i32		; <i32*> [#uses=0]
-@Inte = extern_weak global i32		; <i32*> [#uses=0]
+@kallsyms_names = extern_weak global [0 x i8]
+@MyVar = extern_weak global i32
+@Inte = extern_weak global i32
 
+define weak [0 x i8]* @use_kallsyms_names() {
+  ret [0 x i8]* @kallsyms_names
+}
diff --git a/test/MC/AArch64/arm64-advsimd.s b/test/MC/AArch64/arm64-advsimd.s
index c627de708d31..294f09082916 100644
--- a/test/MC/AArch64/arm64-advsimd.s
+++ b/test/MC/AArch64/arm64-advsimd.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto -output-asm-variant=1 -show-encoding < %s | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=crypto,fullfp16 -output-asm-variant=1 -show-encoding < %s | FileCheck %s
 
 foo:
 
@@ -440,6 +440,106 @@ foo:
 ; CHECK: urshl.8b	v0, v0, v0      ; encoding: [0x00,0x54,0x20,0x2e]
 ; CHECK: ushl.8b	v0, v0, v0              ; encoding: [0x00,0x44,0x20,0x2e]
 
+  fabd.4h v0, v0, v0
+  facge.4h  v0, v0, v0
+  facgt.4h  v0, v0, v0
+  faddp.4h v0, v0, v0
+  fadd.4h v0, v0, v0
+  fcmeq.4h  v0, v0, v0
+  fcmge.4h  v0, v0, v0
+  fcmgt.4h  v0, v0, v0
+  fdiv.4h v0, v0, v0
+  fmaxnmp.4h v0, v0, v0
+  fmaxnm.4h v0, v0, v0
+  fmaxp.4h v0, v0, v0
+  fmax.4h v0, v0, v0
+  fminnmp.4h v0, v0, v0
+  fminnm.4h v0, v0, v0
+  fminp.4h v0, v0, v0
+  fmin.4h v0, v0, v0
+  fmla.4h v0, v0, v0
+  fmls.4h v0, v0, v0
+  fmulx.4h v0, v0, v0
+  fmul.4h v0, v0, v0
+  frecps.4h v0, v0, v0
+  frsqrts.4h v0, v0, v0
+  fsub.4h v0, v0, v0
+
+; CHECK: fabd.4h    v0, v0, v0     ; encoding: [0x00,0x14,0xc0,0x2e]
+; CHECK: facge.4h   v0, v0, v0     ; encoding: [0x00,0x2c,0x40,0x2e]
+; CHECK: facgt.4h   v0, v0, v0     ; encoding: [0x00,0x2c,0xc0,0x2e]
+; CHECK: faddp.4h   v0, v0, v0     ; encoding: [0x00,0x14,0x40,0x2e]
+; CHECK: fadd.4h    v0, v0, v0     ; encoding: [0x00,0x14,0x40,0x0e]
+; CHECK: fcmeq.4h   v0, v0, v0     ; encoding: [0x00,0x24,0x40,0x0e]
+; CHECK: fcmge.4h   v0, v0, v0     ; encoding: [0x00,0x24,0x40,0x2e]
+; CHECK: fcmgt.4h   v0, v0, v0     ; encoding: [0x00,0x24,0xc0,0x2e]
+; CHECK: fdiv.4h    v0, v0, v0     ; encoding: [0x00,0x3c,0x40,0x2e]
+; CHECK: fmaxnmp.4h v0, v0, v0     ; encoding: [0x00,0x04,0x40,0x2e]
+; CHECK: fmaxnm.4h  v0, v0, v0     ; encoding: [0x00,0x04,0x40,0x0e]
+; CHECK: fmaxp.4h   v0, v0, v0     ; encoding: [0x00,0x34,0x40,0x2e]
+; CHECK: fmax.4h    v0, v0, v0     ; encoding: [0x00,0x34,0x40,0x0e]
+; CHECK: fminnmp.4h v0, v0, v0     ; encoding: [0x00,0x04,0xc0,0x2e]
+; CHECK: fminnm.4h  v0, v0, v0     ; encoding: [0x00,0x04,0xc0,0x0e]
+; CHECK: fminp.4h   v0, v0, v0     ; encoding: [0x00,0x34,0xc0,0x2e]
+; CHECK: fmin.4h    v0, v0, v0     ; encoding: [0x00,0x34,0xc0,0x0e]
+; CHECK: fmla.4h    v0, v0, v0     ; encoding: [0x00,0x0c,0x40,0x0e]
+; CHECK: fmls.4h    v0, v0, v0     ; encoding: [0x00,0x0c,0xc0,0x0e]
+; CHECK: fmulx.4h   v0, v0, v0     ; encoding: [0x00,0x1c,0x40,0x0e]
+; CHECK: fmul.4h    v0, v0, v0     ; encoding: [0x00,0x1c,0x40,0x2e]
+; CHECK: frecps.4h  v0, v0, v0     ; encoding: [0x00,0x3c,0x40,0x0e]
+; CHECK: frsqrts.4h v0, v0, v0     ; encoding: [0x00,0x3c,0xc0,0x0e]
+; CHECK: fsub.4h    v0, v0, v0     ; encoding: [0x00,0x14,0xc0,0x0e]
+
+  fabd.8h v0, v0, v0
+  facge.8h  v0, v0, v0
+  facgt.8h  v0, v0, v0
+  faddp.8h v0, v0, v0
+  fadd.8h v0, v0, v0
+  fcmeq.8h  v0, v0, v0
+  fcmge.8h  v0, v0, v0
+  fcmgt.8h  v0, v0, v0
+  fdiv.8h v0, v0, v0
+  fmaxnmp.8h v0, v0, v0
+  fmaxnm.8h v0, v0, v0
+  fmaxp.8h v0, v0, v0
+  fmax.8h v0, v0, v0
+  fminnmp.8h v0, v0, v0
+  fminnm.8h v0, v0, v0
+  fminp.8h v0, v0, v0
+  fmin.8h v0, v0, v0
+  fmla.8h v0, v0, v0
+  fmls.8h v0, v0, v0
+  fmulx.8h v0, v0, v0
+  fmul.8h v0, v0, v0
+  frecps.8h v0, v0, v0
+  frsqrts.8h v0, v0, v0
+  fsub.8h v0, v0, v0
+
+; CHECK: fabd.8h v0, v0, v0              ; encoding: [0x00,0x14,0xc0,0x6e]
+; CHECK: facge.8h        v0, v0, v0      ; encoding: [0x00,0x2c,0x40,0x6e]
+; CHECK: facgt.8h        v0, v0, v0      ; encoding: [0x00,0x2c,0xc0,0x6e]
+; CHECK: faddp.8h        v0, v0, v0      ; encoding: [0x00,0x14,0x40,0x6e]
+; CHECK: fadd.8h v0, v0, v0              ; encoding: [0x00,0x14,0x40,0x4e]
+; CHECK: fcmeq.8h        v0, v0, v0      ; encoding: [0x00,0x24,0x40,0x4e]
+; CHECK: fcmge.8h        v0, v0, v0      ; encoding: [0x00,0x24,0x40,0x6e]
+; CHECK: fcmgt.8h        v0, v0, v0      ; encoding: [0x00,0x24,0xc0,0x6e]
+; CHECK: fdiv.8h v0, v0, v0              ; encoding: [0x00,0x3c,0x40,0x6e]
+; CHECK: fmaxnmp.8h      v0, v0, v0      ; encoding: [0x00,0x04,0x40,0x6e]
+; CHECK: fmaxnm.8h       v0, v0, v0      ; encoding: [0x00,0x04,0x40,0x4e]
+; CHECK: fmaxp.8h        v0, v0, v0      ; encoding: [0x00,0x34,0x40,0x6e]
+; CHECK: fmax.8h v0, v0, v0              ; encoding: [0x00,0x34,0x40,0x4e]
+; CHECK: fminnmp.8h      v0, v0, v0      ; encoding: [0x00,0x04,0xc0,0x6e]
+; CHECK: fminnm.8h       v0, v0, v0      ; encoding: [0x00,0x04,0xc0,0x4e]
+; CHECK: fminp.8h        v0, v0, v0      ; encoding: [0x00,0x34,0xc0,0x6e]
+; CHECK: fmin.8h v0, v0, v0              ; encoding: [0x00,0x34,0xc0,0x4e]
+; CHECK: fmla.8h v0, v0, v0              ; encoding: [0x00,0x0c,0x40,0x4e]
+; CHECK: fmls.8h v0, v0, v0              ; encoding: [0x00,0x0c,0xc0,0x4e]
+; CHECK: fmulx.8h        v0, v0, v0      ; encoding: [0x00,0x1c,0x40,0x4e]
+; CHECK: fmul.8h v0, v0, v0              ; encoding: [0x00,0x1c,0x40,0x6e]
+; CHECK: frecps.8h       v0, v0, v0      ; encoding: [0x00,0x3c,0x40,0x4e]
+; CHECK: frsqrts.8h      v0, v0, v0      ; encoding: [0x00,0x3c,0xc0,0x4e]
+; CHECK: fsub.8h v0, v0, v0              ; encoding: [0x00,0x14,0xc0,0x4e]
+
   bif.8b v0, v0, v0
   bit.8b v0, v0, v0
   bsl.8b v0, v0, v0
@@ -568,6 +668,57 @@ foo:
 ; CHECK: shll2.4s	v1, v2, #16     ; encoding: [0x41,0x38,0x61,0x6e]
 ; CHECK: shll2.2d	v1, v2, #32     ; encoding: [0x41,0x38,0xa1,0x6e]
 
+  fabs.4h     v0, v0
+  fneg.4h     v0, v0
+  frecpe.4h   v0, v0
+  frinta.4h   v0, v0
+  frintx.4h   v0, v0
+  frinti.4h   v0, v0
+  frintm.4h   v0, v0
+  frintn.4h   v0, v0
+  frintp.4h   v0, v0
+  frintz.4h   v0, v0
+  frsqrte.4h  v0, v0
+  fsqrt.4h    v0, v0
+
+; CHECK: fabs.4h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x0e]
+; CHECK: fneg.4h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x2e]
+; CHECK: frecpe.4h       v0, v0          ; encoding: [0x00,0xd8,0xf9,0x0e]
+; CHECK: frinta.4h       v0, v0          ; encoding: [0x00,0x88,0x79,0x2e]
+; CHECK: frintx.4h       v0, v0          ; encoding: [0x00,0x98,0x79,0x2e]
+; CHECK: frinti.4h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x2e]
+; CHECK: frintm.4h       v0, v0          ; encoding: [0x00,0x98,0x79,0x0e]
+; CHECK: frintn.4h       v0, v0          ; encoding: [0x00,0x88,0x79,0x0e]
+; CHECK: frintp.4h       v0, v0          ; encoding: [0x00,0x88,0xf9,0x0e]
+; CHECK: frintz.4h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x0e]
+; CHECK: frsqrte.4h      v0, v0          ; encoding: [0x00,0xd8,0xf9,0x2e]
+; CHECK: fsqrt.4h        v0, v0          ; encoding: [0x00,0xf8,0xf9,0x2e]
+
+  fabs.8h     v0, v0
+  fneg.8h     v0, v0
+  frecpe.8h   v0, v0
+  frinta.8h   v0, v0
+  frintx.8h   v0, v0
+  frinti.8h   v0, v0
+  frintm.8h   v0, v0
+  frintn.8h   v0, v0
+  frintp.8h   v0, v0
+  frintz.8h   v0, v0
+  frsqrte.8h  v0, v0
+  fsqrt.8h    v0, v0
+
+; CHECK: fabs.8h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x4e]
+; CHECK: fneg.8h v0, v0                  ; encoding: [0x00,0xf8,0xf8,0x6e]
+; CHECK: frecpe.8h       v0, v0          ; encoding: [0x00,0xd8,0xf9,0x4e]
+; CHECK: frinta.8h       v0, v0          ; encoding: [0x00,0x88,0x79,0x6e]
+; CHECK: frintx.8h       v0, v0          ; encoding: [0x00,0x98,0x79,0x6e]
+; CHECK: frinti.8h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x6e]
+; CHECK: frintm.8h       v0, v0          ; encoding: [0x00,0x98,0x79,0x4e]
+; CHECK: frintn.8h       v0, v0          ; encoding: [0x00,0x88,0x79,0x4e]
+; CHECK: frintp.8h       v0, v0          ; encoding: [0x00,0x88,0xf9,0x4e]
+; CHECK: frintz.8h       v0, v0          ; encoding: [0x00,0x98,0xf9,0x4e]
+; CHECK: frsqrte.8h      v0, v0          ; encoding: [0x00,0xd8,0xf9,0x6e]
+; CHECK: fsqrt.8h        v0, v0          ; encoding: [0x00,0xf8,0xf9,0x6e]
 
   cmeq.8b   v0, v0, #0
   cmeq.16b  v0, v0, #0
diff --git a/test/MC/AArch64/arm64-diags.s b/test/MC/AArch64/arm64-diags.s
index f8138bde3a4f..3510193a71ff 100644
--- a/test/MC/AArch64/arm64-diags.s
+++ b/test/MC/AArch64/arm64-diags.s
@@ -426,3 +426,66 @@ tlbi vale2
 ; CHECK-ERRORS: error: specified tlbi op requires a register
 tlbi vale3
 ; CHECK-ERRORS: error: specified tlbi op requires a register
+
+
+; Check that we give the proper "too few operands" diagnostic even when
+; using short-form NEON.
+
+  add.16b v0, v1, v2, v3
+  add.8b v0, v1
+  sub.8h v0, v1
+  fadd.4s v0
+  fmul.2s
+
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS:   add.16b v0, v1, v2, v3
+; CHECK-ERRORS:                       ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   add.8b v0, v1
+; CHECK-ERRORS:   ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   sub.8h v0, v1
+; CHECK-ERRORS:   ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   fadd.4s v0
+; CHECK-ERRORS:   ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   fmul.2s
+; CHECK-ERRORS:   ^
+
+; Also for 2-operand instructions.
+
+  frsqrte.4s v0, v1, v2
+  frsqrte.2s v0
+  frecpe.2d
+
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS:   frsqrte.4s v0, v1, v2
+; CHECK-ERRORS:                      ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   frsqrte.2s v0
+; CHECK-ERRORS:   ^
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   frecpe.2d
+; CHECK-ERRORS:   ^
+
+; And check that we do the same for non-NEON instructions.
+
+  b.ne
+  b.eq 0, 0
+
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   b.ne
+; CHECK-ERRORS:   ^
+; CHECK-ERRORS: error: invalid operand for instruction
+; CHECK-ERRORS:   b.eq 0, 0
+; CHECK-ERRORS:           ^
+
+; Check that we give the proper "too few operands" diagnostic instead of
+; asserting.
+
+  ldr
+
+; CHECK-ERRORS: error: too few operands for instruction
+; CHECK-ERRORS:   ldr
+; CHECK-ERRORS:   ^
diff --git a/test/MC/AArch64/arm64-fp-encoding.s b/test/MC/AArch64/arm64-fp-encoding.s
index 684d9883e37f..8187e4a6fcfd 100644
--- a/test/MC/AArch64/arm64-fp-encoding.s
+++ b/test/MC/AArch64/arm64-fp-encoding.s
@@ -1,99 +1,165 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s | FileCheck %s
+; RUN: not llvm-mc -triple arm64-apple-darwin -mattr=neon -show-encoding -output-asm-variant=1 < %s 2>%t | FileCheck %s
+; RUN: FileCheck %s < %t --check-prefix=NO-FP16
+; RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon,v8.2a,fullfp16 -show-encoding -output-asm-variant=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
 
 foo:
 ;-----------------------------------------------------------------------------
 ; Floating-point arithmetic
 ;-----------------------------------------------------------------------------
 
+  fabs h1, h2
   fabs s1, s2
   fabs d1, d2
 
+; FP16:  fabs h1, h2                 ; encoding: [0x41,0xc0,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT:  fabs h1, h2
 ; CHECK: fabs s1, s2                 ; encoding: [0x41,0xc0,0x20,0x1e]
 ; CHECK: fabs d1, d2                 ; encoding: [0x41,0xc0,0x60,0x1e]
 
+  fadd h1, h2, h3
   fadd s1, s2, s3
   fadd d1, d2, d3
 
+; FP16:  fadd h1, h2, h3             ; encoding: [0x41,0x28,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT:  fadd h1, h2, h3
 ; CHECK: fadd s1, s2, s3             ; encoding: [0x41,0x28,0x23,0x1e]
 ; CHECK: fadd d1, d2, d3             ; encoding: [0x41,0x28,0x63,0x1e]
 
+  fdiv h1, h2, h3
   fdiv s1, s2, s3
   fdiv d1, d2, d3
 
+; FP16:  fdiv h1, h2, h3             ; encoding: [0x41,0x18,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT:  fdiv h1, h2, h3
 ; CHECK: fdiv s1, s2, s3             ; encoding: [0x41,0x18,0x23,0x1e]
 ; CHECK: fdiv d1, d2, d3             ; encoding: [0x41,0x18,0x63,0x1e]
 
+  fmadd h1, h2, h3, h4
   fmadd s1, s2, s3, s4
   fmadd d1, d2, d3, d4
 
+; FP16:  fmadd h1, h2, h3, h4        ; encoding: [0x41,0x10,0xc3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT:  fmadd h1, h2, h3, h4
 ; CHECK: fmadd s1, s2, s3, s4        ; encoding: [0x41,0x10,0x03,0x1f]
 ; CHECK: fmadd d1, d2, d3, d4        ; encoding: [0x41,0x10,0x43,0x1f]
 
+  fmax   h1, h2, h3
   fmax   s1, s2, s3
   fmax   d1, d2, d3
+  fmaxnm h1, h2, h3
   fmaxnm s1, s2, s3
   fmaxnm d1, d2, d3
 
+; FP16:  fmax   h1, h2, h3           ; encoding: [0x41,0x48,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmax   h1, h2, h3
 ; CHECK: fmax   s1, s2, s3           ; encoding: [0x41,0x48,0x23,0x1e]
 ; CHECK: fmax   d1, d2, d3           ; encoding: [0x41,0x48,0x63,0x1e]
+; FP16:  fmaxnm h1, h2, h3           ; encoding: [0x41,0x68,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmaxnm h1, h2, h3
 ; CHECK: fmaxnm s1, s2, s3           ; encoding: [0x41,0x68,0x23,0x1e]
 ; CHECK: fmaxnm d1, d2, d3           ; encoding: [0x41,0x68,0x63,0x1e]
 
+  fmin   h1, h2, h3
   fmin   s1, s2, s3
   fmin   d1, d2, d3
+  fminnm h1, h2, h3
   fminnm s1, s2, s3
   fminnm d1, d2, d3
 
+; FP16:  fmin   h1, h2, h3           ; encoding: [0x41,0x58,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmin   h1, h2, h3
 ; CHECK: fmin   s1, s2, s3           ; encoding: [0x41,0x58,0x23,0x1e]
 ; CHECK: fmin   d1, d2, d3           ; encoding: [0x41,0x58,0x63,0x1e]
+; FP16:  fminnm h1, h2, h3           ; encoding: [0x41,0x78,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fminnm h1, h2, h3
 ; CHECK: fminnm s1, s2, s3           ; encoding: [0x41,0x78,0x23,0x1e]
 ; CHECK: fminnm d1, d2, d3           ; encoding: [0x41,0x78,0x63,0x1e]
 
+  fmsub h1, h2, h3, h4
   fmsub s1, s2, s3, s4
   fmsub d1, d2, d3, d4
 
+; FP16:  fmsub h1, h2, h3, h4        ; encoding: [0x41,0x90,0xc3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmsub h1, h2, h3, h4
 ; CHECK: fmsub s1, s2, s3, s4        ; encoding: [0x41,0x90,0x03,0x1f]
 ; CHECK: fmsub d1, d2, d3, d4        ; encoding: [0x41,0x90,0x43,0x1f]
 
+  fmul h1, h2, h3
   fmul s1, s2, s3
   fmul d1, d2, d3
 
+; FP16:  fmul h1, h2, h3             ; encoding: [0x41,0x08,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmul h1, h2, h3
 ; CHECK: fmul s1, s2, s3             ; encoding: [0x41,0x08,0x23,0x1e]
 ; CHECK: fmul d1, d2, d3             ; encoding: [0x41,0x08,0x63,0x1e]
 
+  fneg h1, h2
   fneg s1, s2
   fneg d1, d2
 
+; FP16:  fneg h1, h2                 ; encoding: [0x41,0x40,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fneg h1, h2
 ; CHECK: fneg s1, s2                 ; encoding: [0x41,0x40,0x21,0x1e]
 ; CHECK: fneg d1, d2                 ; encoding: [0x41,0x40,0x61,0x1e]
 
+  fnmadd h1, h2, h3, h4
   fnmadd s1, s2, s3, s4
   fnmadd d1, d2, d3, d4
 
+; FP16:  fnmadd h1, h2, h3, h4       ; encoding: [0x41,0x10,0xe3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmadd h1, h2, h3, h4
 ; CHECK: fnmadd s1, s2, s3, s4       ; encoding: [0x41,0x10,0x23,0x1f]
 ; CHECK: fnmadd d1, d2, d3, d4       ; encoding: [0x41,0x10,0x63,0x1f]
 
+  fnmsub h1, h2, h3, h4
   fnmsub s1, s2, s3, s4
   fnmsub d1, d2, d3, d4
 
+; FP16:  fnmsub h1, h2, h3, h4       ; encoding: [0x41,0x90,0xe3,0x1f]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmsub h1, h2, h3, h4
 ; CHECK: fnmsub s1, s2, s3, s4       ; encoding: [0x41,0x90,0x23,0x1f]
 ; CHECK: fnmsub d1, d2, d3, d4       ; encoding: [0x41,0x90,0x63,0x1f]
 
+  fnmul h1, h2, h3
   fnmul s1, s2, s3
   fnmul d1, d2, d3
 
+; FP16:  fnmul h1, h2, h3            ; encoding: [0x41,0x88,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fnmul h1, h2, h3
 ; CHECK: fnmul s1, s2, s3            ; encoding: [0x41,0x88,0x23,0x1e]
 ; CHECK: fnmul d1, d2, d3            ; encoding: [0x41,0x88,0x63,0x1e]
 
+  fsqrt h1, h2
   fsqrt s1, s2
   fsqrt d1, d2
 
+; FP16:  fsqrt h1, h2                ; encoding: [0x41,0xc0,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fsqrt h1, h2
 ; CHECK: fsqrt s1, s2                ; encoding: [0x41,0xc0,0x21,0x1e]
 ; CHECK: fsqrt d1, d2                ; encoding: [0x41,0xc0,0x61,0x1e]
 
+  fsub h1, h2, h3
   fsub s1, s2, s3
   fsub d1, d2, d3
 
+; FP16:  fsub h1, h2, h3             ; encoding: [0x41,0x38,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fsub h1, h2, h3
 ; CHECK: fsub s1, s2, s3             ; encoding: [0x41,0x38,0x23,0x1e]
 ; CHECK: fsub d1, d2, d3             ; encoding: [0x41,0x38,0x63,0x1e]
 
@@ -101,31 +167,55 @@ foo:
 ; Floating-point comparison
 ;-----------------------------------------------------------------------------
 
+  fccmp  h1, h2, #0, eq
   fccmp  s1, s2, #0, eq
   fccmp  d1, d2, #0, eq
+  fccmpe h1, h2, #0, eq
   fccmpe s1, s2, #0, eq
   fccmpe d1, d2, #0, eq
 
+; FP16:  fccmp  h1, h2, #0, eq       ; encoding: [0x20,0x04,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fccmp  h1, h2, #0, eq
 ; CHECK: fccmp  s1, s2, #0, eq       ; encoding: [0x20,0x04,0x22,0x1e]
 ; CHECK: fccmp  d1, d2, #0, eq       ; encoding: [0x20,0x04,0x62,0x1e]
+; FP16:  fccmpe h1, h2, #0, eq       ; encoding: [0x30,0x04,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fccmpe h1, h2, #0, eq
 ; CHECK: fccmpe s1, s2, #0, eq       ; encoding: [0x30,0x04,0x22,0x1e]
 ; CHECK: fccmpe d1, d2, #0, eq       ; encoding: [0x30,0x04,0x62,0x1e]
 
+  fcmp  h1, h2
   fcmp  s1, s2
   fcmp  d1, d2
+  fcmp  h1, #0.0
   fcmp  s1, #0.0
   fcmp  d1, #0.0
+  fcmpe h1, h2
   fcmpe s1, s2
   fcmpe d1, d2
+  fcmpe h1, #0.0
   fcmpe s1, #0.0
   fcmpe d1, #0.0
 
+; FP16:  fcmp  h1, h2                ; encoding: [0x20,0x20,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmp  h1, h2
 ; CHECK: fcmp  s1, s2                ; encoding: [0x20,0x20,0x22,0x1e]
 ; CHECK: fcmp  d1, d2                ; encoding: [0x20,0x20,0x62,0x1e]
+; FP16:  fcmp  h1, #0.0              ; encoding: [0x28,0x20,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmp  h1, #0.0
 ; CHECK: fcmp  s1, #0.0              ; encoding: [0x28,0x20,0x20,0x1e]
 ; CHECK: fcmp  d1, #0.0              ; encoding: [0x28,0x20,0x60,0x1e]
+; FP16:  fcmpe h1, h2                ; encoding: [0x30,0x20,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmpe h1, h2
 ; CHECK: fcmpe s1, s2                ; encoding: [0x30,0x20,0x22,0x1e]
 ; CHECK: fcmpe d1, d2                ; encoding: [0x30,0x20,0x62,0x1e]
+; FP16:  fcmpe h1, #0.0              ; encoding: [0x38,0x20,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcmpe h1, #0.0
 ; CHECK: fcmpe s1, #0.0              ; encoding: [0x38,0x20,0x20,0x1e]
 ; CHECK: fcmpe d1, #0.0              ; encoding: [0x38,0x20,0x60,0x1e]
 
@@ -133,9 +223,13 @@ foo:
 ; Floating-point conditional select
 ;-----------------------------------------------------------------------------
 
+  fcsel h1, h2, h3, eq
   fcsel s1, s2, s3, eq
   fcsel d1, d2, d3, eq
 
+; FP16:  fcsel h1, h2, h3, eq        ; encoding: [0x41,0x0c,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcsel h1, h2, h3, eq
 ; CHECK: fcsel s1, s2, s3, eq        ; encoding: [0x41,0x0c,0x23,0x1e]
 ; CHECK: fcsel d1, d2, d3, eq        ; encoding: [0x41,0x0c,0x63,0x1e]
 
@@ -161,168 +255,314 @@ foo:
   fcvtas x1, d2
   fcvtas w1, s2
   fcvtas x1, s2
+  fcvtas w1, h2
+  fcvtas x1, h2
 
-; CHECK: fcvtas	w1, d2                  ; encoding: [0x41,0x00,0x64,0x1e]
-; CHECK: fcvtas	x1, d2                  ; encoding: [0x41,0x00,0x64,0x9e]
-; CHECK: fcvtas	w1, s2                  ; encoding: [0x41,0x00,0x24,0x1e]
-; CHECK: fcvtas	x1, s2                  ; encoding: [0x41,0x00,0x24,0x9e]
+; CHECK: fcvtas w1, d2                  ; encoding: [0x41,0x00,0x64,0x1e]
+; CHECK: fcvtas x1, d2                  ; encoding: [0x41,0x00,0x64,0x9e]
+; CHECK: fcvtas w1, s2                  ; encoding: [0x41,0x00,0x24,0x1e]
+; CHECK: fcvtas x1, s2                  ; encoding: [0x41,0x00,0x24,0x9e]
+; FP16:  fcvtas w1, h2                  ; encoding: [0x41,0x00,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtas  w1, h2
+; FP16:  fcvtas x1, h2                  ; encoding: [0x41,0x00,0xe4,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtas  x1, h2
 
+  fcvtau w1, h2
   fcvtau w1, s2
   fcvtau w1, d2
+  fcvtau x1, h2
   fcvtau x1, s2
   fcvtau x1, d2
 
-; CHECK: fcvtau	w1, s2                  ; encoding: [0x41,0x00,0x25,0x1e]
-; CHECK: fcvtau	w1, d2                  ; encoding: [0x41,0x00,0x65,0x1e]
-; CHECK: fcvtau	x1, s2                  ; encoding: [0x41,0x00,0x25,0x9e]
-; CHECK: fcvtau	x1, d2                  ; encoding: [0x41,0x00,0x65,0x9e]
+; FP16:  fcvtau w1, h2                  ; encoding: [0x41,0x00,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtau  w1, h2
+; CHECK: fcvtau w1, s2                  ; encoding: [0x41,0x00,0x25,0x1e]
+; CHECK: fcvtau w1, d2                  ; encoding: [0x41,0x00,0x65,0x1e]
+; FP16:  fcvtau x1, h2                  ; encoding: [0x41,0x00,0xe5,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtau  x1, h2
+; CHECK: fcvtau x1, s2                  ; encoding: [0x41,0x00,0x25,0x9e]
+; CHECK: fcvtau x1, d2                  ; encoding: [0x41,0x00,0x65,0x9e]
 
+  fcvtms w1, h2
   fcvtms w1, s2
   fcvtms w1, d2
+  fcvtms x1, h2
   fcvtms x1, s2
   fcvtms x1, d2
 
-; CHECK: fcvtms	w1, s2                  ; encoding: [0x41,0x00,0x30,0x1e]
-; CHECK: fcvtms	w1, d2                  ; encoding: [0x41,0x00,0x70,0x1e]
-; CHECK: fcvtms	x1, s2                  ; encoding: [0x41,0x00,0x30,0x9e]
-; CHECK: fcvtms	x1, d2                  ; encoding: [0x41,0x00,0x70,0x9e]
+; FP16:  fcvtms w1, h2                  ; encoding: [0x41,0x00,0xf0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtms  w1, h2
+; CHECK: fcvtms w1, s2                  ; encoding: [0x41,0x00,0x30,0x1e]
+; CHECK: fcvtms w1, d2                  ; encoding: [0x41,0x00,0x70,0x1e]
+; FP16:  fcvtms x1, h2                  ; encoding: [0x41,0x00,0xf0,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtms  x1, h2
+; CHECK: fcvtms x1, s2                  ; encoding: [0x41,0x00,0x30,0x9e]
+; CHECK: fcvtms x1, d2                  ; encoding: [0x41,0x00,0x70,0x9e]
 
+  fcvtmu w1, h2
   fcvtmu w1, s2
   fcvtmu w1, d2
+  fcvtmu x1, h2
   fcvtmu x1, s2
   fcvtmu x1, d2
 
-; CHECK: fcvtmu	w1, s2                  ; encoding: [0x41,0x00,0x31,0x1e]
-; CHECK: fcvtmu	w1, d2                  ; encoding: [0x41,0x00,0x71,0x1e]
-; CHECK: fcvtmu	x1, s2                  ; encoding: [0x41,0x00,0x31,0x9e]
-; CHECK: fcvtmu	x1, d2                  ; encoding: [0x41,0x00,0x71,0x9e]
+; FP16:  fcvtmu w1, h2                  ; encoding: [0x41,0x00,0xf1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtmu  w1, h2
+; CHECK: fcvtmu w1, s2                  ; encoding: [0x41,0x00,0x31,0x1e]
+; CHECK: fcvtmu w1, d2                  ; encoding: [0x41,0x00,0x71,0x1e]
+; FP16:  fcvtmu x1, h2                  ; encoding: [0x41,0x00,0xf1,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtmu  x1, h2
+; CHECK: fcvtmu x1, s2                  ; encoding: [0x41,0x00,0x31,0x9e]
+; CHECK: fcvtmu x1, d2                  ; encoding: [0x41,0x00,0x71,0x9e]
 
+  fcvtns w1, h2
   fcvtns w1, s2
   fcvtns w1, d2
+  fcvtns x1, h2
   fcvtns x1, s2
   fcvtns x1, d2
 
-; CHECK: fcvtns	w1, s2                  ; encoding: [0x41,0x00,0x20,0x1e]
-; CHECK: fcvtns	w1, d2                  ; encoding: [0x41,0x00,0x60,0x1e]
-; CHECK: fcvtns	x1, s2                  ; encoding: [0x41,0x00,0x20,0x9e]
-; CHECK: fcvtns	x1, d2                  ; encoding: [0x41,0x00,0x60,0x9e]
+; FP16:  fcvtns w1, h2                  ; encoding: [0x41,0x00,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtns  w1, h2
+; CHECK: fcvtns w1, s2                  ; encoding: [0x41,0x00,0x20,0x1e]
+; CHECK: fcvtns w1, d2                  ; encoding: [0x41,0x00,0x60,0x1e]
+; FP16:  fcvtns x1, h2                  ; encoding: [0x41,0x00,0xe0,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtns  x1, h2
+; CHECK: fcvtns x1, s2                  ; encoding: [0x41,0x00,0x20,0x9e]
+; CHECK: fcvtns x1, d2                  ; encoding: [0x41,0x00,0x60,0x9e]
 
+  fcvtnu w1, h2
   fcvtnu w1, s2
   fcvtnu w1, d2
+  fcvtnu x1, h2
   fcvtnu x1, s2
   fcvtnu x1, d2
 
-; CHECK: fcvtnu	w1, s2                  ; encoding: [0x41,0x00,0x21,0x1e]
-; CHECK: fcvtnu	w1, d2                  ; encoding: [0x41,0x00,0x61,0x1e]
-; CHECK: fcvtnu	x1, s2                  ; encoding: [0x41,0x00,0x21,0x9e]
-; CHECK: fcvtnu	x1, d2                  ; encoding: [0x41,0x00,0x61,0x9e]
+; FP16:  fcvtnu w1, h2                  ; encoding: [0x41,0x00,0xe1,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtnu  w1, h2
+; CHECK: fcvtnu w1, s2                  ; encoding: [0x41,0x00,0x21,0x1e]
+; CHECK: fcvtnu w1, d2                  ; encoding: [0x41,0x00,0x61,0x1e]
+; FP16:  fcvtnu x1, h2                  ; encoding: [0x41,0x00,0xe1,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtnu  x1, h2
+; CHECK: fcvtnu x1, s2                  ; encoding: [0x41,0x00,0x21,0x9e]
+; CHECK: fcvtnu x1, d2                  ; encoding: [0x41,0x00,0x61,0x9e]
 
+  fcvtps w1, h2
   fcvtps w1, s2
   fcvtps w1, d2
+  fcvtps x1, h2
   fcvtps x1, s2
   fcvtps x1, d2
 
-; CHECK: fcvtps	w1, s2                  ; encoding: [0x41,0x00,0x28,0x1e]
-; CHECK: fcvtps	w1, d2                  ; encoding: [0x41,0x00,0x68,0x1e]
-; CHECK: fcvtps	x1, s2                  ; encoding: [0x41,0x00,0x28,0x9e]
-; CHECK: fcvtps	x1, d2                  ; encoding: [0x41,0x00,0x68,0x9e]
+; FP16:  fcvtps w1, h2                  ; encoding: [0x41,0x00,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtps  w1, h2
+; CHECK: fcvtps w1, s2                  ; encoding: [0x41,0x00,0x28,0x1e]
+; CHECK: fcvtps w1, d2                  ; encoding: [0x41,0x00,0x68,0x1e]
+; FP16:  fcvtps x1, h2                  ; encoding: [0x41,0x00,0xe8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtps  x1, h2
+; CHECK: fcvtps x1, s2                  ; encoding: [0x41,0x00,0x28,0x9e]
+; CHECK: fcvtps x1, d2                  ; encoding: [0x41,0x00,0x68,0x9e]
 
+  fcvtpu w1, h2
   fcvtpu w1, s2
   fcvtpu w1, d2
+  fcvtpu x1, h2
   fcvtpu x1, s2
   fcvtpu x1, d2
 
-; CHECK: fcvtpu	w1, s2                  ; encoding: [0x41,0x00,0x29,0x1e]
-; CHECK: fcvtpu	w1, d2                  ; encoding: [0x41,0x00,0x69,0x1e]
-; CHECK: fcvtpu	x1, s2                  ; encoding: [0x41,0x00,0x29,0x9e]
-; CHECK: fcvtpu	x1, d2                  ; encoding: [0x41,0x00,0x69,0x9e]
+; FP16:  fcvtpu w1, h2                  ; encoding: [0x41,0x00,0xe9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtpu  w1, h2
+; CHECK: fcvtpu w1, s2                  ; encoding: [0x41,0x00,0x29,0x1e]
+; CHECK: fcvtpu w1, d2                  ; encoding: [0x41,0x00,0x69,0x1e]
+; FP16:  fcvtpu x1, h2                  ; encoding: [0x41,0x00,0xe9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtpu  x1, h2
+; CHECK: fcvtpu x1, s2                  ; encoding: [0x41,0x00,0x29,0x9e]
+; CHECK: fcvtpu x1, d2                  ; encoding: [0x41,0x00,0x69,0x9e]
 
+  fcvtzs w1, h2
+  fcvtzs w1, h2, #1
   fcvtzs w1, s2
   fcvtzs w1, s2, #1
   fcvtzs w1, d2
   fcvtzs w1, d2, #1
+  fcvtzs x1, h2
+  fcvtzs x1, h2, #1
   fcvtzs x1, s2
   fcvtzs x1, s2, #1
   fcvtzs x1, d2
   fcvtzs x1, d2, #1
 
-; CHECK: fcvtzs	w1, s2                  ; encoding: [0x41,0x00,0x38,0x1e]
-; CHECK: fcvtzs	w1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x1e]
-; CHECK: fcvtzs	w1, d2                  ; encoding: [0x41,0x00,0x78,0x1e]
-; CHECK: fcvtzs	w1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x1e]
-; CHECK: fcvtzs	x1, s2                  ; encoding: [0x41,0x00,0x38,0x9e]
-; CHECK: fcvtzs	x1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x9e]
-; CHECK: fcvtzs	x1, d2                  ; encoding: [0x41,0x00,0x78,0x9e]
-; CHECK: fcvtzs	x1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x9e]
+; FP16:  fcvtzs w1, h2                  ; encoding: [0x41,0x00,0xf8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs  w1, h2
+; FP16:  fcvtzs w1, h2, #1              ; encoding: [0x41,0xfc,0xd8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs  w1, h2, #1
+; CHECK: fcvtzs w1, s2                  ; encoding: [0x41,0x00,0x38,0x1e]
+; CHECK: fcvtzs w1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x1e]
+; CHECK: fcvtzs w1, d2                  ; encoding: [0x41,0x00,0x78,0x1e]
+; CHECK: fcvtzs w1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x1e]
+; FP16:  fcvtzs x1, h2                  ; encoding: [0x41,0x00,0xf8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs  x1, h2
+; FP16:  fcvtzs x1, h2, #1              ; encoding: [0x41,0xfc,0xd8,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzs  x1, h2, #1
+; CHECK: fcvtzs x1, s2                  ; encoding: [0x41,0x00,0x38,0x9e]
+; CHECK: fcvtzs x1, s2, #1              ; encoding: [0x41,0xfc,0x18,0x9e]
+; CHECK: fcvtzs x1, d2                  ; encoding: [0x41,0x00,0x78,0x9e]
+; CHECK: fcvtzs x1, d2, #1              ; encoding: [0x41,0xfc,0x58,0x9e]
 
+  fcvtzu w1, h2
+  fcvtzu w1, h2, #1
   fcvtzu w1, s2
   fcvtzu w1, s2, #1
   fcvtzu w1, d2
   fcvtzu w1, d2, #1
+  fcvtzu x1, h2
+  fcvtzu x1, h2, #1
   fcvtzu x1, s2
   fcvtzu x1, s2, #1
   fcvtzu x1, d2
   fcvtzu x1, d2, #1
 
-; CHECK: fcvtzu	w1, s2                  ; encoding: [0x41,0x00,0x39,0x1e]
-; CHECK: fcvtzu	w1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x1e]
-; CHECK: fcvtzu	w1, d2                  ; encoding: [0x41,0x00,0x79,0x1e]
-; CHECK: fcvtzu	w1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x1e]
-; CHECK: fcvtzu	x1, s2                  ; encoding: [0x41,0x00,0x39,0x9e]
-; CHECK: fcvtzu	x1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x9e]
-; CHECK: fcvtzu	x1, d2                  ; encoding: [0x41,0x00,0x79,0x9e]
-; CHECK: fcvtzu	x1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x9e]
+; FP16:  fcvtzu w1, h2                  ; encoding: [0x41,0x00,0xf9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu  w1, h2
+; FP16:  fcvtzu w1, h2, #1              ; encoding: [0x41,0xfc,0xd9,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu  w1, h2, #1
+; CHECK: fcvtzu w1, s2                  ; encoding: [0x41,0x00,0x39,0x1e]
+; CHECK: fcvtzu w1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x1e]
+; CHECK: fcvtzu w1, d2                  ; encoding: [0x41,0x00,0x79,0x1e]
+; CHECK: fcvtzu w1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x1e]
+; FP16:  fcvtzu x1, h2                  ; encoding: [0x41,0x00,0xf9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu  x1, h2
+; FP16:  fcvtzu x1, h2, #1              ; encoding: [0x41,0xfc,0xd9,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fcvtzu  x1, h2, #1
+; CHECK: fcvtzu x1, s2                  ; encoding: [0x41,0x00,0x39,0x9e]
+; CHECK: fcvtzu x1, s2, #1              ; encoding: [0x41,0xfc,0x19,0x9e]
+; CHECK: fcvtzu x1, d2                  ; encoding: [0x41,0x00,0x79,0x9e]
+; CHECK: fcvtzu x1, d2, #1              ; encoding: [0x41,0xfc,0x59,0x9e]
 
+  scvtf h1, w2
+  scvtf h1, w2, #1
   scvtf s1, w2
   scvtf s1, w2, #1
   scvtf d1, w2
   scvtf d1, w2, #1
+  scvtf h1, x2
+  scvtf h1, x2, #1
   scvtf s1, x2
   scvtf s1, x2, #1
   scvtf d1, x2
   scvtf d1, x2, #1
 
-; CHECK: scvtf	s1, w2                  ; encoding: [0x41,0x00,0x22,0x1e]
-; CHECK: scvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x02,0x1e]
-; CHECK: scvtf	d1, w2                  ; encoding: [0x41,0x00,0x62,0x1e]
-; CHECK: scvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x42,0x1e]
-; CHECK: scvtf	s1, x2                  ; encoding: [0x41,0x00,0x22,0x9e]
-; CHECK: scvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x02,0x9e]
-; CHECK: scvtf	d1, x2                  ; encoding: [0x41,0x00,0x62,0x9e]
-; CHECK: scvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x42,0x9e]
+; FP16:  scvtf  h1, w2                  ; encoding: [0x41,0x00,0xe2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, w2
+; FP16:  scvtf  h1, w2, #1              ; encoding: [0x41,0xfc,0xc2,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, w2, #1
+; CHECK: scvtf  s1, w2                  ; encoding: [0x41,0x00,0x22,0x1e]
+; CHECK: scvtf  s1, w2, #1              ; encoding: [0x41,0xfc,0x02,0x1e]
+; CHECK: scvtf  d1, w2                  ; encoding: [0x41,0x00,0x62,0x1e]
+; CHECK: scvtf  d1, w2, #1              ; encoding: [0x41,0xfc,0x42,0x1e]
+; FP16:  scvtf  h1, x2                  ; encoding: [0x41,0x00,0xe2,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, x2
+; FP16:  scvtf  h1, x2, #1              ; encoding: [0x41,0xfc,0xc2,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: scvtf h1, x2, #1
+; CHECK: scvtf  s1, x2                  ; encoding: [0x41,0x00,0x22,0x9e]
+; CHECK: scvtf  s1, x2, #1              ; encoding: [0x41,0xfc,0x02,0x9e]
+; CHECK: scvtf  d1, x2                  ; encoding: [0x41,0x00,0x62,0x9e]
+; CHECK: scvtf  d1, x2, #1              ; encoding: [0x41,0xfc,0x42,0x9e]
 
+  ucvtf h1, w2
+  ucvtf h1, w2, #1
   ucvtf s1, w2
   ucvtf s1, w2, #1
   ucvtf d1, w2
   ucvtf d1, w2, #1
+  ucvtf h1, x2
+  ucvtf h1, x2, #1
   ucvtf s1, x2
   ucvtf s1, x2, #1
   ucvtf d1, x2
   ucvtf d1, x2, #1
 
-; CHECK: ucvtf	s1, w2                  ; encoding: [0x41,0x00,0x23,0x1e]
-; CHECK: ucvtf	s1, w2, #1              ; encoding: [0x41,0xfc,0x03,0x1e]
-; CHECK: ucvtf	d1, w2                  ; encoding: [0x41,0x00,0x63,0x1e]
-; CHECK: ucvtf	d1, w2, #1              ; encoding: [0x41,0xfc,0x43,0x1e]
-; CHECK: ucvtf	s1, x2                  ; encoding: [0x41,0x00,0x23,0x9e]
-; CHECK: ucvtf	s1, x2, #1              ; encoding: [0x41,0xfc,0x03,0x9e]
-; CHECK: ucvtf	d1, x2                  ; encoding: [0x41,0x00,0x63,0x9e]
-; CHECK: ucvtf	d1, x2, #1              ; encoding: [0x41,0xfc,0x43,0x9e]
+; FP16:  ucvtf  h1, w2                  ; encoding: [0x41,0x00,0xe3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, w2
+; FP16:  ucvtf  h1, w2, #1              ; encoding: [0x41,0xfc,0xc3,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, w2, #1
+; CHECK: ucvtf  s1, w2                  ; encoding: [0x41,0x00,0x23,0x1e]
+; CHECK: ucvtf  s1, w2, #1              ; encoding: [0x41,0xfc,0x03,0x1e]
+; CHECK: ucvtf  d1, w2                  ; encoding: [0x41,0x00,0x63,0x1e]
+; CHECK: ucvtf  d1, w2, #1              ; encoding: [0x41,0xfc,0x43,0x1e]
+; FP16:  ucvtf  h1, x2                  ; encoding: [0x41,0x00,0xe3,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, x2
+; FP16:  ucvtf  h1, x2, #1              ; encoding: [0x41,0xfc,0xc3,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: ucvtf h1, x2, #1
+; CHECK: ucvtf  s1, x2                  ; encoding: [0x41,0x00,0x23,0x9e]
+; CHECK: ucvtf  s1, x2, #1              ; encoding: [0x41,0xfc,0x03,0x9e]
+; CHECK: ucvtf  d1, x2                  ; encoding: [0x41,0x00,0x63,0x9e]
+; CHECK: ucvtf  d1, x2, #1              ; encoding: [0x41,0xfc,0x43,0x9e]
 
 ;-----------------------------------------------------------------------------
 ; Floating-point move
 ;-----------------------------------------------------------------------------
 
+  fmov h1, w2
+  fmov w1, h2
+  fmov h1, x2
+  fmov x1, h2
   fmov s1, w2
   fmov w1, s2
   fmov d1, x2
   fmov x1, d2
 
+; FP16:  fmov h1, w2                 ; encoding: [0x41,0x00,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, w2
+; FP16:  fmov w1, h2                 ; encoding: [0x41,0x00,0xe6,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov w1, h2
+; FP16:  fmov h1, x2                 ; encoding: [0x41,0x00,0xe7,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, x2
+; FP16:  fmov x1, h2                 ; encoding: [0x41,0x00,0xe6,0x9e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov x1, h2
 ; CHECK: fmov s1, w2                 ; encoding: [0x41,0x00,0x27,0x1e]
 ; CHECK: fmov w1, s2                 ; encoding: [0x41,0x00,0x26,0x1e]
 ; CHECK: fmov d1, x2                 ; encoding: [0x41,0x00,0x67,0x9e]
 ; CHECK: fmov x1, d2                 ; encoding: [0x41,0x00,0x66,0x9e]
 
+  fmov h1, #0.125
+  fmov h1, #0x40
   fmov s1, #0.125
   fmov s1, #0x40
   fmov d1, #0.125
@@ -330,9 +570,16 @@ foo:
   fmov d1, #-4.843750e-01
   fmov d1, #4.843750e-01
   fmov d3, #3
+  fmov h2, #0.0
   fmov s2, #0.0
   fmov d2, #0.0
 
+; FP16:  fmov h1, #0.12500000      ; encoding: [0x01,0x10,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, #0.125
+; FP16:  fmov h1, #0.12500000      ; encoding: [0x01,0x10,0xe8,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, #0x40
 ; CHECK: fmov s1, #0.12500000      ; encoding: [0x01,0x10,0x28,0x1e]
 ; CHECK: fmov s1, #0.12500000      ; encoding: [0x01,0x10,0x28,0x1e]
 ; CHECK: fmov d1, #0.12500000      ; encoding: [0x01,0x10,0x68,0x1e]
@@ -340,12 +587,19 @@ foo:
 ; CHECK: fmov d1, #-0.48437500     ; encoding: [0x01,0xf0,0x7b,0x1e]
 ; CHECK: fmov d1, #0.48437500      ; encoding: [0x01,0xf0,0x6b,0x1e]
 ; CHECK: fmov d3, #3.00000000      ; encoding: [0x03,0x10,0x61,0x1e]
+; FP16:  fmov h2, wzr                ; encoding: [0xe2,0x03,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h2, #0.0
 ; CHECK: fmov s2, wzr                ; encoding: [0xe2,0x03,0x27,0x1e]
 ; CHECK: fmov d2, xzr                ; encoding: [0xe2,0x03,0x67,0x9e]
 
+  fmov h1, h2
   fmov s1, s2
   fmov d1, d2
 
+; FP16:  fmov h1, h2                 ; encoding: [0x41,0x40,0xe0,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: fmov h1, h2
 ; CHECK: fmov s1, s2                 ; encoding: [0x41,0x40,0x20,0x1e]
 ; CHECK: fmov d1, d2                 ; encoding: [0x41,0x40,0x60,0x1e]
 
@@ -355,63 +609,91 @@ foo:
   fmov v1.d[1], x1
   fmov.d v8[1], x6
 
-; CHECK: fmov.d	x2, v5[1]               ; encoding: [0xa2,0x00,0xae,0x9e]
-; CHECK: fmov.d	x9, v7[1]               ; encoding: [0xe9,0x00,0xae,0x9e]
-; CHECK: fmov.d	v1[1], x1               ; encoding: [0x21,0x00,0xaf,0x9e]
-; CHECK: fmov.d	v8[1], x6               ; encoding: [0xc8,0x00,0xaf,0x9e]
+; CHECK: fmov.d x2, v5[1]               ; encoding: [0xa2,0x00,0xae,0x9e]
+; CHECK: fmov.d x9, v7[1]               ; encoding: [0xe9,0x00,0xae,0x9e]
+; CHECK: fmov.d v1[1], x1               ; encoding: [0x21,0x00,0xaf,0x9e]
+; CHECK: fmov.d v8[1], x6               ; encoding: [0xc8,0x00,0xaf,0x9e]
 
 
 ;-----------------------------------------------------------------------------
 ; Floating-point round to integral
 ;-----------------------------------------------------------------------------
 
+  frinta h1, h2
   frinta s1, s2
   frinta d1, d2
 
+; FP16:  frinta h1, h2               ; encoding: [0x41,0x40,0xe6,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frinta h1, h2
 ; CHECK: frinta s1, s2               ; encoding: [0x41,0x40,0x26,0x1e]
 ; CHECK: frinta d1, d2               ; encoding: [0x41,0x40,0x66,0x1e]
 
+  frinti h1, h2
   frinti s1, s2
   frinti d1, d2
 
+; FP16:  frinti h1, h2               ; encoding: [0x41,0xc0,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frinti h1, h2
 ; CHECK: frinti s1, s2               ; encoding: [0x41,0xc0,0x27,0x1e]
 ; CHECK: frinti d1, d2               ; encoding: [0x41,0xc0,0x67,0x1e]
 
+  frintm h1, h2
   frintm s1, s2
   frintm d1, d2
 
+; FP16:  frintm h1, h2               ; encoding: [0x41,0x40,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintm h1, h2
 ; CHECK: frintm s1, s2               ; encoding: [0x41,0x40,0x25,0x1e]
 ; CHECK: frintm d1, d2               ; encoding: [0x41,0x40,0x65,0x1e]
 
+  frintn h1, h2
   frintn s1, s2
   frintn d1, d2
 
+; FP16:  frintn h1, h2               ; encoding: [0x41,0x40,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintn h1, h2
 ; CHECK: frintn s1, s2               ; encoding: [0x41,0x40,0x24,0x1e]
 ; CHECK: frintn d1, d2               ; encoding: [0x41,0x40,0x64,0x1e]
 
+  frintp h1, h2
   frintp s1, s2
   frintp d1, d2
 
+; FP16:  frintp h1, h2               ; encoding: [0x41,0xc0,0xe4,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintp h1, h2
 ; CHECK: frintp s1, s2               ; encoding: [0x41,0xc0,0x24,0x1e]
 ; CHECK: frintp d1, d2               ; encoding: [0x41,0xc0,0x64,0x1e]
 
+  frintx h1, h2
   frintx s1, s2
   frintx d1, d2
 
+; FP16:  frintx h1, h2               ; encoding: [0x41,0x40,0xe7,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintx h1, h2
 ; CHECK: frintx s1, s2               ; encoding: [0x41,0x40,0x27,0x1e]
 ; CHECK: frintx d1, d2               ; encoding: [0x41,0x40,0x67,0x1e]
 
+  frintz h1, h2
   frintz s1, s2
   frintz d1, d2
 
+; FP16:  frintz h1, h2               ; encoding: [0x41,0xc0,0xe5,0x1e]
+; NO-FP16: error: instruction requires:
+; NO-FP16-NEXT: frintz h1, h2
 ; CHECK: frintz s1, s2               ; encoding: [0x41,0xc0,0x25,0x1e]
 ; CHECK: frintz d1, d2               ; encoding: [0x41,0xc0,0x65,0x1e]
 
   cmhs d0, d0, d0
   cmtst d0, d0, d0
 
-; CHECK: cmhs	d0, d0, d0              ; encoding: [0x00,0x3c,0xe0,0x7e]
-; CHECK: cmtst	d0, d0, d0              ; encoding: [0x00,0x8c,0xe0,0x5e]
+; CHECK: cmhs d0, d0, d0              ; encoding: [0x00,0x3c,0xe0,0x7e]
+; CHECK: cmtst  d0, d0, d0              ; encoding: [0x00,0x8c,0xe0,0x5e]
 
 
 
diff --git a/test/MC/AArch64/arm64-leaf-compact-unwind.s b/test/MC/AArch64/arm64-leaf-compact-unwind.s
index a0703f6360db..2ff7fe82e9be 100644
--- a/test/MC/AArch64/arm64-leaf-compact-unwind.s
+++ b/test/MC/AArch64/arm64-leaf-compact-unwind.s
@@ -22,6 +22,7 @@
 // CHECK-NEXT:   ]
 // CHECK-NEXT:   Reserved1:
 // CHECK-NEXT:   Reserved2:
+// CHECK-NEXT:   Reserved3:
 // CHECK-NEXT:   Relocations [
 // CHECK-NEXT:     Relocation {
 // CHECK-NEXT:       Offset: 0x60
diff --git a/test/MC/AArch64/arm64-small-data-fixups.s b/test/MC/AArch64/arm64-small-data-fixups.s
index 3fe7c75c011d..6debe0b8fb04 100644
--- a/test/MC/AArch64/arm64-small-data-fixups.s
+++ b/test/MC/AArch64/arm64-small-data-fixups.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - %s | llvm-readobj -r | FileCheck %s
 
 foo:
   .long 0
@@ -9,16 +9,15 @@ baz:
   .byte foo - bar
   .short foo - bar
 
-; CHECK: # Relocation 0
-; CHECK: (('word-0', 0x9),
-; CHECK:  ('word-1', 0x1a000002)),
-; CHECK: # Relocation 1
-; CHECK: (('word-0', 0x9),
-; CHECK:  ('word-1', 0xa000001)),
-; CHECK: # Relocation 2
-; CHECK: (('word-0', 0x8),
-; CHECK:  ('word-1', 0x18000002)),
-; CHECK: # Relocation 3
-; CHECK: (('word-0', 0x8),
-; CHECK:  ('word-1', 0x8000001)),
-
+; CHECK: File: <stdin>
+; CHECK: Format: Mach-O arm64
+; CHECK: Arch: aarch64
+; CHECK: AddressSize: 64bit
+; CHECK: Relocations [
+; CHECK:  Section __text {
+; CHECK:    0x9 0 1 1 ARM64_RELOC_SUBTRACTOR 0 bar
+; CHECK:    0x9 0 1 1 ARM64_RELOC_UNSIGNED 0 foo
+; CHECK:    0x8 0 0 1 ARM64_RELOC_SUBTRACTOR 0 bar
+; CHECK:    0x8 0 0 1 ARM64_RELOC_UNSIGNED 0 foo
+; CHECK:  }
+; CHECK: ]
diff --git a/test/MC/AArch64/armv8.1a-pan.s b/test/MC/AArch64/armv8.1a-pan.s
index 2068c81d939f..c283cb818e25 100644
--- a/test/MC/AArch64/armv8.1a-pan.s
+++ b/test/MC/AArch64/armv8.1a-pan.s
@@ -13,16 +13,16 @@
 // CHECK:  mrs x13, PAN          // encoding: [0x6d,0x42,0x38,0xd5]
 
   msr pan, #-1
-  msr pan, #20
+  msr pan, #2
   msr pan, w0
   mrs w0, pan
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
 // CHECK-ERROR:   msr pan, #-1
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
-// CHECK-ERROR:   msr pan, #20
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
+// CHECK-ERROR:   msr pan, #2
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: immediate must be an integer in range [0, 15].
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
 // CHECK-ERROR:   msr pan, w0
 // CHECK-ERROR:            ^
 // CHECK-ERROR: error: invalid operand for instruction
diff --git a/test/MC/AArch64/armv8.1a-rdma.s b/test/MC/AArch64/armv8.1a-rdma.s
index 1de2a0fb15dd..36158428d6c4 100644
--- a/test/MC/AArch64/armv8.1a-rdma.s
+++ b/test/MC/AArch64/armv8.1a-rdma.s
@@ -26,27 +26,9 @@
   sqrdmlsh v0.8s, v1.8s, v2.8s
   sqrdmlah v0.2s, v1.4h, v2.8h
   sqrdmlsh v0.4s, v1.8h, v2.2s
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                          ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
 // CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:            ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid vector kind qualifier
-// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
-// CHECK-ERROR:                          ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
 // CHECK-ERROR:            ^
diff --git a/test/MC/AArch64/armv8.2a-at.s b/test/MC/AArch64/armv8.2a-at.s
new file mode 100644
index 000000000000..741f6922a9e4
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-at.s
@@ -0,0 +1,9 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+  at s1e1rp, x1
+  at s1e1wp, x2
+// CHECK: at      s1e1rp, x1              // encoding: [0x01,0x79,0x08,0xd5]
+// CHECK: at      s1e1wp, x2              // encoding: [0x22,0x79,0x08,0xd5]
+// ERROR: error: AT S1E1RP requires ARMv8.2a
+// ERROR: error: AT S1E1WP requires ARMv8.2a
diff --git a/test/MC/AArch64/armv8.2a-mmfr2.s b/test/MC/AArch64/armv8.2a-mmfr2.s
new file mode 100644
index 000000000000..5a9b1f1f42a0
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-mmfr2.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+  mrs x3, id_aa64mmfr2_el1
+// CHECK: mrs x3, ID_AA64MMFR2_EL1       // encoding: [0x43,0x07,0x38,0xd5]
+// ERROR: error: expected readable system register
diff --git a/test/MC/AArch64/armv8.2a-persistent-memory.s b/test/MC/AArch64/armv8.2a-persistent-memory.s
new file mode 100644
index 000000000000..8a7600ee904d
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-persistent-memory.s
@@ -0,0 +1,6 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=-v8.2a < %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+  dc cvap, x7
+// CHECK: dc cvap, x7   // encoding: [0x27,0x7c,0x0b,0xd5]
+// ERROR: error: DC CVAP requires ARMv8.2a
diff --git a/test/MC/AArch64/armv8.2a-statistical-profiling.s b/test/MC/AArch64/armv8.2a-statistical-profiling.s
new file mode 100644
index 000000000000..5cb109318786
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-statistical-profiling.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+spe < %s | FileCheck %s
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding < %s 2>&1 | FileCheck --check-prefix=NO_SPE %s
+
+  psb csync
+// CHECK: psb csync              // encoding: [0x3f,0x22,0x03,0xd5]
+// NO_SPE:  invalid operand for instruction
+
+  msr pmblimitr_el1, x0
+  msr pmbptr_el1, x0
+  msr pmbsr_el1, x0
+  msr pmbidr_el1, x0
+  msr pmscr_el2, x0
+  msr pmscr_el12, x0
+  msr pmscr_el1, x0
+  msr pmsicr_el1, x0
+  msr pmsirr_el1, x0
+  msr pmsfcr_el1, x0
+  msr pmsevfr_el1, x0
+  msr pmslatfr_el1, x0
+  msr pmsidr_el1, x0
+// CHECK:     msr PMBLIMITR_EL1, x0       // encoding: [0x00,0x9a,0x18,0xd5]
+// CHECK:     msr PMBPTR_EL1, x0          // encoding: [0x20,0x9a,0x18,0xd5]
+// CHECK:     msr PMBSR_EL1, x0           // encoding: [0x60,0x9a,0x18,0xd5]
+// CHECK:     msr PMBIDR_EL1, x0          // encoding: [0xe0,0x9a,0x18,0xd5]
+// CHECK:     msr PMSCR_EL2, x0           // encoding: [0x00,0x99,0x1c,0xd5]
+// CHECK:     msr PMSCR_EL12, x0          // encoding: [0x00,0x99,0x1d,0xd5]
+// CHECK:     msr PMSCR_EL1, x0           // encoding: [0x00,0x99,0x18,0xd5]
+// CHECK:     msr PMSICR_EL1, x0          // encoding: [0x40,0x99,0x18,0xd5]
+// CHECK:     msr PMSIRR_EL1, x0          // encoding: [0x60,0x99,0x18,0xd5]
+// CHECK:     msr PMSFCR_EL1, x0          // encoding: [0x80,0x99,0x18,0xd5]
+// CHECK:     msr PMSEVFR_EL1, x0         // encoding: [0xa0,0x99,0x18,0xd5]
+// CHECK:     msr PMSLATFR_EL1, x0        // encoding: [0xc0,0x99,0x18,0xd5]
+// CHECK:     msr PMSIDR_EL1, x0          // encoding: [0xe0,0x99,0x18,0xd5]
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+// NO_SPE: error: expected writable system register or pstate
+
+mrs x0, pmblimitr_el1
+  mrs x0, pmbptr_el1
+  mrs x0, pmbsr_el1
+  mrs x0, pmbidr_el1
+  mrs x0, pmscr_el2
+  mrs x0, pmscr_el12
+  mrs x0, pmscr_el1
+  mrs x0, pmsicr_el1
+  mrs x0, pmsirr_el1
+  mrs x0, pmsfcr_el1
+  mrs x0, pmsevfr_el1
+  mrs x0, pmslatfr_el1
+  mrs x0, pmsidr_el1
+
+// CHECK:    mrs x0, PMBLIMITR_EL1       // encoding: [0x00,0x9a,0x38,0xd5]
+// CHECK:    mrs x0, PMBPTR_EL1          // encoding: [0x20,0x9a,0x38,0xd5]
+// CHECK:    mrs x0, PMBSR_EL1           // encoding: [0x60,0x9a,0x38,0xd5]
+// CHECK:    mrs x0, PMBIDR_EL1          // encoding: [0xe0,0x9a,0x38,0xd5]
+// CHECK:    mrs x0, PMSCR_EL2           // encoding: [0x00,0x99,0x3c,0xd5]
+// CHECK:    mrs x0, PMSCR_EL12          // encoding: [0x00,0x99,0x3d,0xd5]
+// CHECK:    mrs x0, PMSCR_EL1           // encoding: [0x00,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSICR_EL1          // encoding: [0x40,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSIRR_EL1          // encoding: [0x60,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSFCR_EL1          // encoding: [0x80,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSEVFR_EL1         // encoding: [0xa0,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSLATFR_EL1        // encoding: [0xc0,0x99,0x38,0xd5]
+// CHECK:    mrs x0, PMSIDR_EL1          // encoding: [0xe0,0x99,0x38,0xd5]
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
+// NO_SPE: error: expected readable system register
diff --git a/test/MC/AArch64/armv8.2a-uao.s b/test/MC/AArch64/armv8.2a-uao.s
new file mode 100644
index 000000000000..ec5e96261a82
--- /dev/null
+++ b/test/MC/AArch64/armv8.2a-uao.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -show-encoding -mattr=+v8.2a < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR %s < %t
+
+  msr uao, #0
+  msr uao, #1
+// CHECK: msr     UAO, #0                 // encoding: [0x7f,0x40,0x00,0xd5]
+// CHECK: msr     UAO, #1                 // encoding: [0x7f,0x41,0x00,0xd5]
+
+  msr uao, #2
+// CHECK-ERROR: error: immediate must be an integer in range [0, 1].
+// CHECK-ERROR:   msr uao, #2
+// CHECK-ERROR:            ^
+
+  msr uao, x1
+  mrs x2, uao
+// CHECK: msr     UAO, x1                 // encoding: [0x81,0x42,0x18,0xd5]
+// CHECK: mrs     x2, UAO                 // encoding: [0x82,0x42,0x38,0xd5]
diff --git a/test/MC/AArch64/basic-a64-diagnostics.s b/test/MC/AArch64/basic-a64-diagnostics.s
index 0c2bc689663c..b9c7b16cb06e 100644
--- a/test/MC/AArch64/basic-a64-diagnostics.s
+++ b/test/MC/AArch64/basic-a64-diagnostics.s
@@ -1603,7 +1603,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         fcsel q3, q20, q9, pl
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: error: instruction requires: fullfp16
 // CHECK-ERROR-NEXT:         fcsel h9, h10, h11, mi
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
@@ -1652,7 +1652,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR-NEXT:         fmadd b3, b4, b5, b6
 // CHECK-ERROR-NEXT:               ^
-// CHECK-ERROR-NEXT: error: invalid operand for instruction
+// CHECK-ERROR-NEXT: error: instruction requires: fullfp16
 // CHECK-ERROR-NEXT:         fmsub h1, h2, h3, h4
 // CHECK-ERROR-NEXT:               ^
 // CHECK-ERROR-NEXT: error: invalid operand for instruction
diff --git a/test/MC/AArch64/elf_osabi_flags.s b/test/MC/AArch64/elf_osabi_flags.s
index 68cb385fc991..42c56f043f2b 100644
--- a/test/MC/AArch64/elf_osabi_flags.s
+++ b/test/MC/AArch64/elf_osabi_flags.s
@@ -1,5 +1,6 @@
-# RUN: llvm-mc -filetype=obj -triple aarch64 %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-OSABI %s
-# AARCH64-OSABI: OS/ABI: SystemV (0x0)
+# RUN: llvm-mc -filetype=obj -triple aarch64 %s -o -| llvm-readobj -h | FileCheck %s
+# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o -| llvm-readobj -h | FileCheck %s
+# CHECK: OS/ABI: SystemV (0x0)
 
-# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-LINUX-OSABI %s
-# AARCH64-LINUX-OSABI: OS/ABI: GNU/Linux (0x3)
+# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-freebsd %s -o -| llvm-readobj -h | FileCheck --check-prefix=AARCH64-FREEBSD-OSABI %s
+# AARCH64-FREEBSD-OSABI: OS/ABI: FreeBSD (0x9)
diff --git a/test/MC/AArch64/error-location-ldr-pseudo.s b/test/MC/AArch64/error-location-ldr-pseudo.s
new file mode 100644
index 000000000000..951373dda61d
--- /dev/null
+++ b/test/MC/AArch64/error-location-ldr-pseudo.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple aarch64--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+  .text
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+  ldr x0, =(-undef)
diff --git a/test/MC/AArch64/error-location.s b/test/MC/AArch64/error-location.s
new file mode 100644
index 000000000000..02504368f004
--- /dev/null
+++ b/test/MC/AArch64/error-location.s
@@ -0,0 +1,49 @@
+// RUN: not llvm-mc -triple aarch64--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+// Note: These errors are not always emitted in the order in which the relevant
+// source appears, this file is carefully ordered so that that is the case.
+
+  .text
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: symbol 'undef' can not be undefined in a subtraction expression
+  .word (0-undef)
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+  .word -undef
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: No relocation available to represent this relative expression
+  adr x0, #a-undef
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections
+  .word x_a - y_a
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol
+  .word a - w
+
+// CHECK: <unknown>:0: error: expression could not be evaluated
+  .set v1, -undef
+
+  .comm common, 4
+// CHECK: <unknown>:0: error: Common symbol 'common' cannot be used in assignment expr
+  .set v3, common
+
+// CHECK: <unknown>:0: error: Undefined temporary symbol
+  .word 5f
+
+// CHECK: <unknown>:0: error: symbol 'undef' could not be evaluated in a subtraction expression
+  .set v2, a-undef
+
+
+
+w:
+  .word 0
+  .weak w
+
+
+  .section sec_x
+x_a:
+  .word 0
+
+
+  .section sec_y
+y_a:
+  .word 0
diff --git a/test/MC/AArch64/fullfp16-diagnostics.s b/test/MC/AArch64/fullfp16-diagnostics.s
new file mode 100644
index 000000000000..06035dbf7028
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-diagnostics.s
@@ -0,0 +1,82 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon < %s 2> %t
+// RUN: FileCheck < %t %s
+
+  fmla v0.4h, v1.4h, v16.h[3]
+  fmla v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmls v0.4h, v1.4h, v16.h[3]
+  fmls v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmul v0.4h, v1.4h, v16.h[3]
+  fmul v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                    ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                    ^
+
+  fmulx v0.4h, v1.4h, v16.h[3]
+  fmulx v2.8h, v3.8h, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx v0.4h, v1.4h, v16.h[3]
+// CHECK-NEXT:                     ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx v2.8h, v3.8h, v17.h[6]
+// CHECK-NEXT:                     ^
+
+  fmla h0, h1, v16.h[3]
+  fmla h2, h3, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla h0, h1, v16.h[3]
+// CHECK-NEXT:              ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmla h2, h3, v17.h[6]
+// CHECK-NEXT:              ^
+
+  fmls h0, h1, v16.h[3]
+  fmls h2, h3, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls h0, h1, v16.h[3]
+// CHECK-NEXT:              ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmls h2, h3, v17.h[6]
+// CHECK-NEXT:              ^
+
+  fmul h0, h1, v16.h[3]
+  fmul h2, h3, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul h0, h1, v16.h[3]
+// CHECK-NEXT:              ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmul h2, h3, v17.h[6]
+// CHECK-NEXT:              ^
+
+  fmulx h0, h1, v16.h[3]
+  fmulx h2, h3, v17.h[6]
+
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx h0, h1, v16.h[3]
+// CHECK-NEXT:               ^
+// CHECK:      error: invalid operand for instruction
+// CHECK-NEXT: fmulx h2, h3, v17.h[6]
+// CHECK-NEXT:               ^
diff --git a/test/MC/AArch64/fullfp16-neon-neg.s b/test/MC/AArch64/fullfp16-neon-neg.s
new file mode 100644
index 000000000000..0913ecb7e9ab
--- /dev/null
+++ b/test/MC/AArch64/fullfp16-neon-neg.s
@@ -0,0 +1,382 @@
+// RUN: not llvm-mc -triple=aarch64 -mattr=+neon,-fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple=aarch64 -mattr=-neon,+fullfp16 -show-encoding < %s 2>&1 | FileCheck %s
+
+
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs.4h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg.4h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz.4h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte.4h  v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt.4h    v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs.8h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg.8h     v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz.8h   v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte.8h  v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt.8h    v0, v0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v3.8h, v8.8h, v2.h[1]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx v0.4h, v1.4h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx v0.8h, v1.8h, v2.h[2]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminv h0, v1.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fadd v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fadd v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsub v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsub v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h,  v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v3.4h, v8.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge v31.8h, v29.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v0.4h, v31.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt v4.8h, v7.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h, v20.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v1.8h, v8.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v3.4h, v20.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle v1.8h, v8.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v16.4h, v2.4h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v15.8h, v4.8h, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v16.4h, v2.4h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt v15.8h, v4.8h, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facle v0.4h, v16.4h, v31.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facle v4.8h, v15.8h, v7.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faclt v3.4h,  v12.4h, v8.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faclt v31.8h, v28.8h, v29.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts v0.4h, v31.4h, v16.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts v4.8h, v7.8h, v15.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps v3.4h, v8.4h, v12.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps v31.8h, v29.8h, v28.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmp v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnmp v31.8h, v15.8h, v16.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmp v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnmp v3.8h, v5.8h, v6.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmax v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmax v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmin v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmin v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnm v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmaxnm v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnm v10.4h, v15.4h, v22.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fminnm v10.8h, v15.8h, v22.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.4h, v1.4h, v2.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls v0.8h, v1.8h, v2.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabd h29, h24, h20
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmla    h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmls    h2, h3, v4.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmul    h0, h1, v1.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx   h6, h2, v8.h[5]
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu h21, h12, #1
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu h12, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmeq h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmge h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmgt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmle h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt h10, h11, #0.0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcmlt h10, h11, #0
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facge h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  facgt h10, h11, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fmulx h20, h22, h15
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecps h21, h16, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrts h21, h5, h12
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe h19, h14
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpx h18, h10
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte h22, h13
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  faddp h18, v3.2h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fabs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fneg v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintn v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinta v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintp v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintm v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintx v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frintz v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frinti v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtns v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtnu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtps v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtpu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtms v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtmu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzs v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtzu v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtas v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fcvtau v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frecpe v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  frsqrte v6.8h, v8.8h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt v4.4h, v0.4h
+// CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
+  fsqrt v6.8h, v8.8h
+
+// CHECK-NOT: :[[@LINE+1]]:{{[0-9]+}}: error: instruction requires:
diff --git a/test/MC/AArch64/ldr-pseudo-diagnostics.s b/test/MC/AArch64/ldr-pseudo-diagnostics.s
index e32c51679528..e83ca93f6d2d 100644
--- a/test/MC/AArch64/ldr-pseudo-diagnostics.s
+++ b/test/MC/AArch64/ldr-pseudo-diagnostics.s
@@ -12,3 +12,21 @@ f2:
 // CHECK-ERROR: error: Immediate too large for register
 // CHECK-ERROR:  ldr w0, =-0x80000001
 // CHECK-ERROR:          ^
+
+f3:
+  ldr foo, =1
+// CHECK-ERROR: error: Only valid when first operand is register
+// CHECK-ERROR:   ldr foo, =1
+// CHECK-ERROR:            ^
+
+f4:
+  add r0, r0, =1
+// CHECK-ERROR: error: unexpected token in operand
+// CHECK-ERROR:   add r0, r0, =1
+// CHECK-ERROR:               ^
+
+f5:
+  ldr x0, =())
+// CHECK-ERROR: error: unknown token in expression
+// CHECK-ERROR:   ldr x0, =())
+// CHECK-ERROR:             ^
diff --git a/test/MC/AArch64/neon-2velem.s b/test/MC/AArch64/neon-2velem.s
index 04841d0164f2..ed55ad0b1363 100644
--- a/test/MC/AArch64/neon-2velem.s
+++ b/test/MC/AArch64/neon-2velem.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -46,6 +46,8 @@
 // CHECK: mls	v0.8h, v1.8h, v2.h[7]   // encoding: [0x20,0x48,0x72,0x6f]
 // CHECK: mls	v0.8h, v1.8h, v14.h[6]  // encoding: [0x20,0x48,0x6e,0x6f]
 
+        fmla v0.4h, v1.4h, v2.h[2]
+        fmla v3.8h, v8.8h, v2.h[1]
         fmla v0.2s, v1.2s, v2.s[2]
         fmla v0.2s, v1.2s, v22.s[2]
         fmla v3.4s, v8.4s, v2.s[1]
@@ -53,6 +55,8 @@
         fmla v0.2d, v1.2d, v2.d[1]
         fmla v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmla    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x10,0x22,0x0f]
+// CHECK: fmla    v3.8h, v8.8h, v2.h[1]   // encoding: [0x03,0x11,0x12,0x4f]
 // CHECK: fmla	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x18,0x82,0x0f]
 // CHECK: fmla	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x18,0x96,0x0f]
 // CHECK: fmla	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x11,0xa2,0x4f]
@@ -60,6 +64,8 @@
 // CHECK: fmla	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x18,0xc2,0x4f]
 // CHECK: fmla	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x18,0xd6,0x4f]
 
+        fmls v0.4h, v1.4h, v2.h[2]
+        fmls v3.8h, v8.8h, v2.h[1]
         fmls v0.2s, v1.2s, v2.s[2]
         fmls v0.2s, v1.2s, v22.s[2]
         fmls v3.4s, v8.4s, v2.s[1]
@@ -67,6 +73,8 @@
         fmls v0.2d, v1.2d, v2.d[1]
         fmls v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmls    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x50,0x22,0x0f]
+// CHECK: fmls    v3.8h, v8.8h, v2.h[1]   // encoding: [0x03,0x51,0x12,0x4f]
 // CHECK: fmls	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x58,0x82,0x0f]
 // CHECK: fmls	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x58,0x96,0x0f]
 // CHECK: fmls	v3.4s, v8.4s, v2.s[1]   // encoding: [0x03,0x51,0xa2,0x4f]
@@ -172,6 +180,8 @@
 // CHECK: mul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x88,0x82,0x4f]
 // CHECK: mul	v0.4s, v1.4s, v22.s[2]  // encoding: [0x20,0x88,0x96,0x4f]
 
+        fmul v0.4h, v1.4h, v2.h[2]
+        fmul v0.8h, v1.8h, v2.h[2]
         fmul v0.2s, v1.2s, v2.s[2]
         fmul v0.2s, v1.2s, v22.s[2]
         fmul v0.4s, v1.4s, v2.s[2]
@@ -179,6 +189,8 @@
         fmul v0.2d, v1.2d, v2.d[1]
         fmul v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmul    v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x0f]
+// CHECK: fmul    v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x4f]
 // CHECK: fmul	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x0f]
 // CHECK: fmul	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x0f]
 // CHECK: fmul	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x4f]
@@ -186,6 +198,8 @@
 // CHECK: fmul	v0.2d, v1.2d, v2.d[1]   // encoding: [0x20,0x98,0xc2,0x4f]
 // CHECK: fmul	v0.2d, v1.2d, v22.d[1]  // encoding: [0x20,0x98,0xd6,0x4f]
 
+        fmulx v0.4h, v1.4h, v2.h[2]
+        fmulx v0.8h, v1.8h, v2.h[2]
         fmulx v0.2s, v1.2s, v2.s[2]
         fmulx v0.2s, v1.2s, v22.s[2]
         fmulx v0.4s, v1.4s, v2.s[2]
@@ -193,6 +207,8 @@
         fmulx v0.2d, v1.2d, v2.d[1]
         fmulx v0.2d, v1.2d, v22.d[1]
 
+// CHECK: fmulx   v0.4h, v1.4h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x2f]
+// CHECK: fmulx   v0.8h, v1.8h, v2.h[2]   // encoding: [0x20,0x90,0x22,0x6f]
 // CHECK: fmulx	v0.2s, v1.2s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x2f]
 // CHECK: fmulx	v0.2s, v1.2s, v22.s[2]  // encoding: [0x20,0x98,0x96,0x2f]
 // CHECK: fmulx	v0.4s, v1.4s, v2.s[2]   // encoding: [0x20,0x98,0x82,0x6f]
diff --git a/test/MC/AArch64/neon-aba-abd.s b/test/MC/AArch64/neon-aba-abd.s
index 178eb26f64c2..b3a90bb14895 100644
--- a/test/MC/AArch64/neon-aba-abd.s
+++ b/test/MC/AArch64/neon-aba-abd.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -68,10 +68,12 @@
 //----------------------------------------------------------------------
 // Vector Absolute Difference (Floating Point)
 //----------------------------------------------------------------------
+         fabd v0.4h, v1.4h, v2.4h
          fabd v0.2s, v1.2s, v2.2s
          fabd v31.4s, v15.4s, v16.4s
          fabd v7.2d, v8.2d, v25.2d
 
+// CHECK: fabd    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x14,0xc2,0x2e]
 // CHECK: fabd v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xd4,0xa2,0x2e]
 // CHECK: fabd v31.4s, v15.4s, v16.4s // encoding: [0xff,0xd5,0xb0,0x6e]
 // CHECK: fabd v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xd5,0xf9,0x6e]
diff --git a/test/MC/AArch64/neon-across.s b/test/MC/AArch64/neon-across.s
index 60b766d8c881..74edc519a475 100644
--- a/test/MC/AArch64/neon-across.s
+++ b/test/MC/AArch64/neon-across.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -90,11 +90,27 @@
 // CHECK: addv	h0, v1.8h               // encoding: [0x20,0xb8,0x71,0x4e]
 // CHECK: addv	s0, v1.4s               // encoding: [0x20,0xb8,0xb1,0x4e]
 
+        fmaxnmv h0, v1.4h
+        fminnmv h0, v1.4h
+        fmaxv h0, v1.4h
+        fminv h0, v1.4h
+        fmaxnmv h0, v1.8h
+        fminnmv h0, v1.8h
+        fmaxv h0, v1.8h
+        fminv h0, v1.8h
         fmaxnmv s0, v1.4s
         fminnmv s0, v1.4s
         fmaxv s0, v1.4s
         fminv s0, v1.4s
 
+// CHECK: fmaxnmv h0, v1.4h               // encoding: [0x20,0xc8,0x30,0x0e]
+// CHECK: fminnmv h0, v1.4h               // encoding: [0x20,0xc8,0xb0,0x0e]
+// CHECK: fmaxv   h0, v1.4h               // encoding: [0x20,0xf8,0x30,0x0e]
+// CHECK: fminv   h0, v1.4h               // encoding: [0x20,0xf8,0xb0,0x0e]
+// CHECK: fmaxnmv h0, v1.8h               // encoding: [0x20,0xc8,0x30,0x4e]
+// CHECK: fminnmv h0, v1.8h               // encoding: [0x20,0xc8,0xb0,0x4e]
+// CHECK: fmaxv   h0, v1.8h               // encoding: [0x20,0xf8,0x30,0x4e]
+// CHECK: fminv   h0, v1.8h               // encoding: [0x20,0xf8,0xb0,0x4e]
 // CHECK: fmaxnmv	s0, v1.4s               // encoding: [0x20,0xc8,0x30,0x6e]
 // CHECK: fminnmv	s0, v1.4s               // encoding: [0x20,0xc8,0xb0,0x6e]
 // CHECK: fmaxv	s0, v1.4s               // encoding: [0x20,0xf8,0x30,0x6e]
diff --git a/test/MC/AArch64/neon-add-pairwise.s b/test/MC/AArch64/neon-add-pairwise.s
index df9938b07e52..3d77c6e2790f 100644
--- a/test/MC/AArch64/neon-add-pairwise.s
+++ b/test/MC/AArch64/neon-add-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -25,10 +25,14 @@
 //------------------------------------------------------------------------------
 // Vector Add Pairwise (Floating Point
 //------------------------------------------------------------------------------
+         faddp v0.4h, v1.4h, v2.4h
+         faddp v0.8h, v1.8h, v2.8h
          faddp v0.2s, v1.2s, v2.2s
          faddp v0.4s, v1.4s, v2.4s
          faddp v0.2d, v1.2d, v2.2d
 
+// CHECK: faddp   v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x14,0x42,0x2e]
+// CHECK: faddp   v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x14,0x42,0x6e]
 // CHECK: faddp v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x2e]
 // CHECK: faddp v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x6e]
 // CHECK: faddp v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x6e]
diff --git a/test/MC/AArch64/neon-add-sub-instructions.s b/test/MC/AArch64/neon-add-sub-instructions.s
index 68f169b3dd90..0d8416537022 100644
--- a/test/MC/AArch64/neon-add-sub-instructions.s
+++ b/test/MC/AArch64/neon-add-sub-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -44,10 +44,14 @@
 //------------------------------------------------------------------------------
 // Vector Floating-Point Add
 //------------------------------------------------------------------------------
+         fadd v0.4h, v1.4h, v2.4h
+         fadd v0.8h, v1.8h, v2.8h
          fadd v0.2s, v1.2s, v2.2s
          fadd v0.4s, v1.4s, v2.4s
          fadd v0.2d, v1.2d, v2.2d
 
+// CHECK: fadd v0.4h, v1.4h, v2.4h       // encoding: [0x20,0x14,0x42,0x0e]
+// CHECK: fadd v0.8h, v1.8h, v2.8h       // encoding: [0x20,0x14,0x42,0x4e]
 // CHECK: fadd v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0x22,0x0e]
 // CHECK: fadd v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0x22,0x4e]
 // CHECK: fadd v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0x62,0x4e]
@@ -56,10 +60,14 @@
 //------------------------------------------------------------------------------
 // Vector Floating-Point Sub
 //------------------------------------------------------------------------------
+         fsub v0.4h, v1.4h, v2.4h
+         fsub v0.8h, v1.8h, v2.8h
          fsub v0.2s, v1.2s, v2.2s
          fsub v0.4s, v1.4s, v2.4s
          fsub v0.2d, v1.2d, v2.2d
 
+// CHECK: fsub v0.4h, v1.4h, v2.4h       // encoding: [0x20,0x14,0xc2,0x0e]
+// CHECK; fsub v0.8h, v1.8h, v2.8h       // encoding: [0x20,0x14,0xc2,0x4e]
 // CHECK: fsub v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xd4,0xa2,0x0e]
 // CHECK: fsub v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xd4,0xa2,0x4e]
 // CHECK: fsub v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xd4,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-compare-instructions.s b/test/MC/AArch64/neon-compare-instructions.s
index 19cfaf1f4d36..ffa88e50e0ce 100644
--- a/test/MC/AArch64/neon-compare-instructions.s
+++ b/test/MC/AArch64/neon-compare-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -194,10 +194,14 @@
 // Vector Compare Mask Equal (Floating Point)
 //----------------------------------------------------------------------
 
+         fcmeq v0.4h, v31.4h, v16.4h
+         fcmeq v4.8h, v7.8h, v15.8h
          fcmeq v0.2s, v31.2s, v16.2s
          fcmeq v4.4s, v7.4s, v15.4s
          fcmeq v29.2d, v2.2d, v5.2d
 
+// CHECK: fcmeq   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0x50,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0x4f,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xe7,0x30,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xe4,0x2f,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xe4,0x65,0x4e]
@@ -208,6 +212,10 @@
 // FCMLE is alias for FCMGE with operands reversed.
 //----------------------------------------------------------------------
 
+         fcmge v3.4h, v8.4h, v12.4h
+         fcmge v31.8h, v29.8h, v28.8h
+         fcmle v3.4h,  v12.4h, v8.4h
+         fcmle v31.8h, v28.8h, v29.8h
          fcmge v31.4s, v29.4s, v28.4s
          fcmge v3.2s, v8.2s, v12.2s
          fcmge v17.2d, v15.2d, v13.2d
@@ -215,6 +223,10 @@
          fcmle v3.2s,  v12.2s, v8.2s
          fcmle v17.2d, v13.2d, v15.2d
 
+// CHECK: fcmge   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x27,0x5c,0x6e]
+// CHECK: fcmge   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x25,0x4c,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x27,0x5c,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xe7,0x3c,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xe5,0x2c,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xe5,0x6d,0x6e]
@@ -228,6 +240,10 @@
 // FCMLT is alias for FCMGT with operands reversed.
 //----------------------------------------------------------------------
 
+         fcmgt v0.4h, v31.4h, v16.4h
+         fcmgt v4.8h, v7.8h, v15.8h
+         fcmlt v0.4h, v16.4h, v31.4h
+         fcmlt v4.8h, v15.8h, v7.8h
          fcmgt v0.2s, v31.2s, v16.2s
          fcmgt v4.4s, v7.4s, v15.4s
          fcmgt v29.2d, v2.2d, v5.2d
@@ -235,6 +251,10 @@
          fcmlt v4.4s, v15.4s, v7.4s
          fcmlt v29.2d, v5.2d, v2.2d
 
+// CHECK: fcmgt   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0xcf,0x6e]
+// CHECK: fcmgt   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x27,0xd0,0x2e]
+// CHECK: fcmgt   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x24,0xcf,0x6e]
 // CHECK: fcmgt v0.2s, v31.2s, v16.2s  // encoding: [0xe0,0xe7,0xb0,0x2e]
 // CHECK: fcmgt v4.4s, v7.4s, v15.4s   // encoding: [0xe4,0xe4,0xaf,0x6e]
 // CHECK: fcmgt v29.2d, v2.2d, v5.2d   // encoding: [0x5d,0xe4,0xe5,0x6e]
@@ -343,16 +363,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Equal to Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmeq v0.4h, v31.4h, #0.0
+         fcmeq v4.8h, v7.8h, #0.0
          fcmeq v0.2s, v31.2s, #0.0
          fcmeq v4.4s, v7.4s, #0.0
          fcmeq v29.2d, v2.2d, #0.0
+         fcmeq v0.4h, v31.4h, #0
+         fcmeq v4.8h, v7.8h, #0
          fcmeq v0.2s, v31.2s, #0
          fcmeq v4.4s, v7.4s, #0
          fcmeq v29.2d, v2.2d, #0
 
+// CHECK: fcmeq   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xd8,0xf8,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, #0.0  // encoding: [0xe0,0xdb,0xa0,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, #0.0   // encoding: [0xe4,0xd8,0xa0,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, #0.0  // encoding: [0x5d,0xd8,0xe0,0x4e]
+// CHECK: fcmeq   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xdb,0xf8,0x0e]
+// CHECK: fcmeq   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xd8,0xf8,0x4e]
 // CHECK: fcmeq v0.2s, v31.2s, #0.0  // encoding: [0xe0,0xdb,0xa0,0x0e]
 // CHECK: fcmeq v4.4s, v7.4s, #0.0   // encoding: [0xe4,0xd8,0xa0,0x4e]
 // CHECK: fcmeq v29.2d, v2.2d, #0.0  // encoding: [0x5d,0xd8,0xe0,0x4e]
@@ -360,16 +388,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmge v3.4h, v8.4h, #0.0
+         fcmge v31.8h, v29.8h, #0.0
          fcmge v31.4s, v29.4s, #0.0
          fcmge v3.2s, v8.2s, #0.0
          fcmge v17.2d, v15.2d, #0.0
+         fcmge v3.4h, v8.4h, #0
+         fcmge v31.8h, v29.8h, #0
          fcmge v31.4s, v29.4s, #0
          fcmge v3.2s, v8.2s, #0
          fcmge v17.2d, v15.2d, #0
 
+// CHECK: fcmge   v3.4h, v8.4h, #0.0      // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, #0.0    // encoding: [0xbf,0xcb,0xf8,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, #0.0  // encoding: [0xbf,0xcb,0xa0,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, #0.0    // encoding: [0x03,0xc9,0xa0,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, #0.0   // encoding: [0xf1,0xc9,0xe0,0x6e]
+// CHECK: fcmge   v3.4h, v8.4h, #0.0      // encoding: [0x03,0xc9,0xf8,0x2e]
+// CHECK: fcmge   v31.8h, v29.8h, #0.0    // encoding: [0xbf,0xcb,0xf8,0x6e]
 // CHECK: fcmge v31.4s, v29.4s, #0.0  // encoding: [0xbf,0xcb,0xa0,0x6e]
 // CHECK: fcmge v3.2s, v8.2s, #0.0    // encoding: [0x03,0xc9,0xa0,0x2e]
 // CHECK: fcmge v17.2d, v15.2d, #0.0   // encoding: [0xf1,0xc9,0xe0,0x6e]
@@ -377,16 +413,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Greater Than Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmgt v0.4h, v31.4h, #0.0
+         fcmgt v4.8h, v7.8h, #0.0
          fcmgt v0.2s, v31.2s, #0.0
          fcmgt v4.4s, v7.4s, #0.0
          fcmgt v29.2d, v2.2d, #0.0
+         fcmgt v0.4h, v31.4h, #0
+         fcmgt v4.8h, v7.8h, #0
          fcmgt v0.2s, v31.2s, #0
          fcmgt v4.4s, v7.4s, #0
          fcmgt v29.2d, v2.2d, #0
 
+// CHECK: fcmgt   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xc8,0xf8,0x4e]
 // CHECK: fcmgt v0.2s, v31.2s, #0.0   // encoding: [0xe0,0xcb,0xa0,0x0e]
 // CHECK: fcmgt v4.4s, v7.4s, #0.0    // encoding: [0xe4,0xc8,0xa0,0x4e]
 // CHECK: fcmgt v29.2d, v2.2d, #0.0   // encoding: [0x5d,0xc8,0xe0,0x4e]
+// CHECK: fcmgt   v0.4h, v31.4h, #0.0     // encoding: [0xe0,0xcb,0xf8,0x0e]
+// CHECK: fcmgt   v4.8h, v7.8h, #0.0      // encoding: [0xe4,0xc8,0xf8,0x4e]
 // CHECK: fcmgt v0.2s, v31.2s, #0.0   // encoding: [0xe0,0xcb,0xa0,0x0e]
 // CHECK: fcmgt v4.4s, v7.4s, #0.0    // encoding: [0xe4,0xc8,0xa0,0x4e]
 // CHECK: fcmgt v29.2d, v2.2d, #0.0   // encoding: [0x5d,0xc8,0xe0,0x4e]
@@ -394,16 +438,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than or Equal To Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmle v3.4h, v20.4h, #0.0
+         fcmle v1.8h, v8.8h, #0.0
          fcmle v1.4s, v8.4s, #0.0
          fcmle v3.2s, v20.2s, #0.0
          fcmle v7.2d, v13.2d, #0.0
+         fcmle v3.4h, v20.4h, #0
+         fcmle v1.8h, v8.8h, #0
          fcmle v1.4s, v8.4s, #0
          fcmle v3.2s, v20.2s, #0
          fcmle v7.2d, v13.2d, #0
 
+// CHECK: fcmle   v3.4h, v20.4h, #0.0     // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle   v1.8h, v8.8h, #0.0      // encoding: [0x01,0xd9,0xf8,0x6e]
 // CHECK: fcmle v1.4s, v8.4s, #0.0   // encoding: [0x01,0xd9,0xa0,0x6e]
 // CHECK: fcmle v3.2s, v20.2s, #0.0  // encoding: [0x83,0xda,0xa0,0x2e]
 // CHECK: fcmle v7.2d, v13.2d, #0.0  // encoding: [0xa7,0xd9,0xe0,0x6e]
+// CHECK: fcmle   v3.4h, v20.4h, #0.0     // encoding: [0x83,0xda,0xf8,0x2e]
+// CHECK: fcmle   v1.8h, v8.8h, #0.0      // encoding: [0x01,0xd9,0xf8,0x6e]
 // CHECK: fcmle v1.4s, v8.4s, #0.0   // encoding: [0x01,0xd9,0xa0,0x6e]
 // CHECK: fcmle v3.2s, v20.2s, #0.0  // encoding: [0x83,0xda,0xa0,0x2e]
 // CHECK: fcmle v7.2d, v13.2d, #0.0  // encoding: [0xa7,0xd9,0xe0,0x6e]
@@ -411,16 +463,24 @@
 //----------------------------------------------------------------------
 // Vector Compare Mask Less Than Zero (Floating Point)
 //----------------------------------------------------------------------
+         fcmlt v16.4h, v2.4h, #0.0
+         fcmlt v15.8h, v4.8h, #0.0
          fcmlt v16.2s, v2.2s, #0.0
          fcmlt v15.4s, v4.4s, #0.0
          fcmlt v5.2d, v29.2d, #0.0
+         fcmlt v16.4h, v2.4h, #0
+         fcmlt v15.8h, v4.8h, #0
          fcmlt v16.2s, v2.2s, #0
          fcmlt v15.4s, v4.4s, #0
          fcmlt v5.2d, v29.2d, #0
 
+// CHECK: fcmlt   v16.4h, v2.4h, #0.0     // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt   v15.8h, v4.8h, #0.0     // encoding: [0x8f,0xe8,0xf8,0x4e]
 // CHECK: fcmlt v16.2s, v2.2s, #0.0   // encoding: [0x50,0xe8,0xa0,0x0e]
 // CHECK: fcmlt v15.4s, v4.4s, #0.0   // encoding: [0x8f,0xe8,0xa0,0x4e]
 // CHECK: fcmlt v5.2d, v29.2d, #0.0   // encoding: [0xa5,0xeb,0xe0,0x4e]
+// CHECK: fcmlt   v16.4h, v2.4h, #0.0     // encoding: [0x50,0xe8,0xf8,0x0e]
+// CHECK: fcmlt   v15.8h, v4.8h, #0.0     // encoding: [0x8f,0xe8,0xf8,0x4e]
 // CHECK: fcmlt v16.2s, v2.2s, #0.0   // encoding: [0x50,0xe8,0xa0,0x0e]
 // CHECK: fcmlt v15.4s, v4.4s, #0.0   // encoding: [0x8f,0xe8,0xa0,0x4e]
 // CHECK: fcmlt v5.2d, v29.2d, #0.0   // encoding: [0xa5,0xeb,0xe0,0x4e]
diff --git a/test/MC/AArch64/neon-diagnostics.s b/test/MC/AArch64/neon-diagnostics.s
index fa1f3caf5ad3..6ded6e40bfb9 100644
--- a/test/MC/AArch64/neon-diagnostics.s
+++ b/test/MC/AArch64/neon-diagnostics.s
@@ -341,7 +341,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabd v0.2s, v1.4s, v2.2d
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabd v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                 ^
 //----------------------------------------------------------------------
@@ -385,7 +385,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        frecps v0.4s, v1.2d, v2.4s
 // CHECK-ERROR:                         ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        frecps v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                  ^
 
@@ -400,7 +400,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        frsqrts v0.2d, v1.2d, v2.2s
 // CHECK-ERROR:                                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        frsqrts v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                   ^
 
@@ -417,7 +417,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        facge v0.2d, v1.2s, v2.2d
 // CHECK-ERROR:                        ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        facge v0.4h, v1.4h, v2.4h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -435,7 +435,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        facgt v0.2d, v1.2d, v2.4s
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        facgt v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -1092,7 +1092,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fmin v0.4s, v1.4s, v2.2d
 // CHECK-ERROR:                              ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmin v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                ^
 
@@ -1177,7 +1177,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        fminp v0.4s, v1.4s, v2.2d
 // CHECK-ERROR:                               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminp v0.8h, v1.8h, v2.8h
 // CHECK-ERROR:                 ^
 
@@ -1283,7 +1283,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fmulx v21.2s, v5.2s, v13.2d
 // CHECK-ERROR:                                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fmulx v1.4h, v25.4h, v3.4h
 // CHECK-ERROR:                  ^
 
@@ -3023,10 +3023,10 @@
       fmla v0.2d, v1.2d, v2.d[2]
       fmla v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmla v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmla v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3057,10 +3057,10 @@
       fmls v0.2d, v1.2d, v2.d[2]
       fmls v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmls v0.4h, v1.4h, v2.h[2]
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmls v0.8h, v1.8h, v2.h[2]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3428,7 +3428,7 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:        mul v0.2d, v1.2d, v2.d[1]
 // CHECK-ERROR:               ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmul v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3458,7 +3458,7 @@
       fmulx v0.2d, v1.2d, v2.d[2]
       fmulx v0.2d, v1.2d, v22.d[2]
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmulx v0.4h, v1.4h, v2.h[4]
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: vector lane must be an integer in range
@@ -3837,16 +3837,16 @@
         fmaxv h0, v1.8h
         fminv h0, v1.8h
 
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmaxnmv h0, v1.8h
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminnmv h0, v1.8h
 // CHECK-ERROR:                ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fmaxv h0, v1.8h
 // CHECK-ERROR:              ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:        fminv h0, v1.8h
 // CHECK-ERROR:              ^
 
@@ -4351,35 +4351,35 @@
          smov x20, v9.d[0]
 
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         smov w1, v0.b[16]
-// CHECK-ERROR                       ^
+// CHECK-ERROR:         smov w1, v0.b[16]
+// CHECK-ERROR:                       ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         smov w14, v6.h[8]
-// CHECK-ERROR                        ^
+// CHECK-ERROR:         smov w14, v6.h[8]
+// CHECK-ERROR:                        ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         smov x1, v0.b[16]
-// CHECK-ERROR                       ^
+// CHECK-ERROR:         smov x1, v0.b[16]
+// CHECK-ERROR:                       ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         smov x14, v6.h[8]
-// CHECK-ERROR                        ^
+// CHECK-ERROR:         smov x14, v6.h[8]
+// CHECK-ERROR:                        ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         smov x20, v9.s[5]
-// CHECK-ERROR                        ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         smov w1, v0.d[0]
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         smov w14, v6.d[1]
-// CHECK-ERROR                      ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         smov x1, v0.d[0]
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         smov x14, v6.d[1]
-// CHECK-ERROR                      ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         smov x20, v9.d[0]
-// CHECK-ERROR                      ^
+// CHECK-ERROR:         smov x20, v9.s[5]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         smov w1, v0.d[0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         smov w14, v6.d[1]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         smov x1, v0.d[0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         smov x14, v6.d[1]
+// CHECK-ERROR:                      ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         smov x20, v9.d[0]
+// CHECK-ERROR:                      ^
 
          umov w1, v0.b[16]
          umov w14, v6.h[8]
@@ -4390,44 +4390,44 @@
          umov d7, v18.d[1]
 
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         umov w1, v0.b[16]
-// CHECK-ERROR                       ^
+// CHECK-ERROR:         umov w1, v0.b[16]
+// CHECK-ERROR:                       ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         umov w14, v6.h[8]
-// CHECK-ERROR                        ^
+// CHECK-ERROR:         umov w14, v6.h[8]
+// CHECK-ERROR:                        ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         umov w20, v9.s[5]
-// CHECK-ERROR                        ^
+// CHECK-ERROR:         umov w20, v9.s[5]
+// CHECK-ERROR:                        ^
 // CHECK-ERROR: vector lane must be an integer in range
-// CHECK-ERROR         umov x7, v18.d[3]
-// CHECK-ERROR                        ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         umov w1, v0.d[0]
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         umov s20, v9.s[2]
-// CHECK-ERROR              ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         umov d7, v18.d[1]
-// CHECK-ERROR              ^
+// CHECK-ERROR:         umov x7, v18.d[3]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         umov w1, v0.d[0]
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         umov s20, v9.s[2]
+// CHECK-ERROR:              ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         umov d7, v18.d[1]
+// CHECK-ERROR:              ^
 
          Ins v1.h[2], v3.b[6]
          Ins v6.h[7], v7.s[2]
          Ins v15.d[0], v22.s[2]
          Ins v0.d[0], v4.b[1]
 
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         Ins v1.h[2], v3.b[6]
-// CHECK-ERROR                         ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         Ins v6.h[7], v7.s[2]
-// CHECK-ERROR                         ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         Ins v15.d[0], v22.s[2]
-// CHECK-ERROR                           ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         Ins v0.d[0], v4.b[1]
-// CHECK-ERROR                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         Ins v1.h[2], v3.b[6]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         Ins v6.h[7], v7.s[2]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         Ins v15.d[0], v22.s[2]
+// CHECK-ERROR:                           ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         Ins v0.d[0], v4.b[1]
+// CHECK-ERROR:                         ^
 
          dup v1.8h, v2.b[2]
          dup v11.4s, v7.h[7]
@@ -4437,27 +4437,27 @@
          dup v17.4s, v20.d[0]
          dup v5.2d, v1.b[1]
 
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v1.8h, v2.b[2]
-// CHECK-ERROR                       ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v11.4s, v7.h[7]
-// CHECK-ERROR                        ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v17.2d, v20.s[0]
-// CHECK-ERROR                         ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v1.16b, v2.h[2]
-// CHECK-ERROR                        ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR         dup v11.8h, v7.s[3]
-// CHECK-ERROR                        ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR         dup v17.4s, v20.d[0]
-// CHECK-ERROR                         ^
-// CHECK-ERROR invalid operand for instruction
-// CHECK-ERROR         dup v5.2d, v1.b[1]
-// CHECK-ERROR                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v1.8h, v2.b[2]
+// CHECK-ERROR:                       ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v11.4s, v7.h[7]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v17.2d, v20.s[0]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v1.16b, v2.h[2]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR:         dup v11.8h, v7.s[3]
+// CHECK-ERROR:                        ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR:         dup v17.4s, v20.d[0]
+// CHECK-ERROR:                         ^
+// CHECK-ERROR: invalid operand for instruction
+// CHECK-ERROR:         dup v5.2d, v1.b[1]
+// CHECK-ERROR:                       ^
 
          dup v1.8b, b1
          dup v11.4h, h14
@@ -4467,27 +4467,27 @@
          dup v17.4d, w28
          dup v5.2d, w0
 
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v1.8b, b1
-// CHECK-ERROR                    ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v11.4h, h14
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v17.2s, s30
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v1.16b, d2
-// CHECK-ERROR                     ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v11.8s, w16
-// CHECK-ERROR             ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v17.4d, w28
-// CHECK-ERROR             ^
-// CHECK-ERROR error: invalid operand for instruction
-// CHECK-ERROR         dup v5.2d, w0
-// CHECK-ERROR                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v1.8b, b1
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v11.4h, h14
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v17.2s, s30
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v1.16b, d2
+// CHECK-ERROR:                     ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v11.8s, w16
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v17.4d, w28
+// CHECK-ERROR:             ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:         dup v5.2d, w0
+// CHECK-ERROR:                    ^
 
 //----------------------------------------------------------------------
 // Scalar Compare Bitwise Equal
@@ -5594,13 +5594,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabs v0.16b, v31.16b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabs v2.8h, v4.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fabs v1.8b, v9.8b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fabs v13.4h, v21.4h
 // CHECK-ERROR:                  ^
 
@@ -5616,13 +5616,13 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fneg v0.16b, v31.16b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fneg v2.8h, v4.8h
 // CHECK-ERROR:                 ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fneg v1.8b, v9.8b
 // CHECK-ERROR:                 ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fneg v13.4h, v21.4h
 // CHECK-ERROR:                  ^
 
@@ -5978,205 +5978,205 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintn v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintn v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintn v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintn v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinta v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinta v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinta v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinta v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintp v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintp v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintp v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintp v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintm v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintm v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintm v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintm v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintx v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintx v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintx v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintx v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintz v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintz v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frintz v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frintz v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinti v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinti v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frinti v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frinti v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtns v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtns v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtns v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtns v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtnu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtnu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtnu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtnu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtps v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtps v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtps v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtps v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtpu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtpu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtpu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtpu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtms v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtms v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtms v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtms v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtmu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtmu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtmu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtmu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzs v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzs v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzs v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzs v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzu v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzu v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtzu v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtzu v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtas v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtas v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtas v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtas v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtau v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtau v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fcvtau v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fcvtau v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
@@ -6212,61 +6212,61 @@
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         scvtf v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         scvtf v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         scvtf v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         scvtf v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ucvtf v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         ucvtf v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         ucvtf v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         ucvtf v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frecpe v0.16b, v31.16b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frecpe v2.8h, v4.8h
 // CHECK-ERROR:                   ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frecpe v1.8b, v9.8b
 // CHECK-ERROR:                   ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frecpe v13.4h, v21.4h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frsqrte v0.16b, v31.16b
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frsqrte v2.8h, v4.8h
 // CHECK-ERROR:                    ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         frsqrte v1.8b, v9.8b
 // CHECK-ERROR:                    ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         frsqrte v13.4h, v21.4h
 // CHECK-ERROR:                     ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fsqrt v0.16b, v31.16b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fsqrt v2.8h, v4.8h
 // CHECK-ERROR:                  ^
 // CHECK-ERROR: error: invalid operand for instruction
 // CHECK-ERROR:         fsqrt v1.8b, v9.8b
 // CHECK-ERROR:                  ^
-// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR: error: instruction requires: fullfp16
 // CHECK-ERROR:         fsqrt v13.4h, v21.4h
 // CHECK-ERROR:                   ^
 
@@ -6466,30 +6466,30 @@
         uzp1 v0.2d, v1.1d, v2.1d
         uzp1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         uzp2 v0.16b, v1.8b, v2.8b
         uzp2 v0.8b, v1.4b, v2.4b
@@ -6500,30 +6500,30 @@
         uzp2 v0.2d, v1.1d, v2.1d
         uzp2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         zip1 v0.16b, v1.8b, v2.8b
         zip1 v0.8b, v1.4b, v2.4b
@@ -6534,30 +6534,30 @@
         zip1 v0.2d, v1.1d, v2.1d
         zip1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         zip2 v0.16b, v1.8b, v2.8b
         zip2 v0.8b, v1.4b, v2.4b
@@ -6568,30 +6568,30 @@
         zip2 v0.2d, v1.1d, v2.1d
         zip2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         trn1 v0.16b, v1.8b, v2.8b
         trn1 v0.8b, v1.4b, v2.4b
@@ -6602,30 +6602,30 @@
         trn1 v0.2d, v1.1d, v2.1d
         trn1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         trn2 v0.16b, v1.8b, v2.8b
         trn2 v0.8b, v1.4b, v2.4b
@@ -6636,30 +6636,30 @@
         trn2 v0.2d, v1.1d, v2.1d
         trn2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
 //----------------------------------------------------------------------
 // Permutation with 3 vectors
@@ -6674,30 +6674,30 @@
         uzp1 v0.2d, v1.1d, v2.1d
         uzp1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4289:22: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4290:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4291:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4292:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4293:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4294:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4295:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4296:17: error: invalid operand for instruction
-// CHECK-ERROR         uzp1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         uzp2 v0.16b, v1.8b, v2.8b
         uzp2 v0.8b, v1.4b, v2.4b
@@ -6708,30 +6708,30 @@
         uzp2 v0.2d, v1.1d, v2.1d
         uzp2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4298:22: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4299:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4300:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4301:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4302:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4303:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4304:21: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4305:17: error: invalid operand for instruction
-// CHECK-ERROR         uzp2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         uzp2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         zip1 v0.16b, v1.8b, v2.8b
         zip1 v0.8b, v1.4b, v2.4b
@@ -6742,30 +6742,30 @@
         zip1 v0.2d, v1.1d, v2.1d
         zip1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4307:22: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4308:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4309:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4310:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4311:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4312:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4313:21: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4314:17: error: invalid operand for instruction
-// CHECK-ERROR         zip1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         zip2 v0.16b, v1.8b, v2.8b
         zip2 v0.8b, v1.4b, v2.4b
@@ -6776,30 +6776,30 @@
         zip2 v0.2d, v1.1d, v2.1d
         zip2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4316:22: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4317:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4318:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4319:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4320:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4321:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4322:21: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4323:17: error: invalid operand for instruction
-// CHECK-ERROR         zip2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         zip2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         trn1 v0.16b, v1.8b, v2.8b
         trn1 v0.8b, v1.4b, v2.4b
@@ -6810,30 +6810,30 @@
         trn1 v0.2d, v1.1d, v2.1d
         trn1 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4325:22: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4326:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4327:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4328:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4329:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4330:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4331:21: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4332:17: error: invalid operand for instruction
-// CHECK-ERROR         trn1 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn1 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
         trn2 v0.16b, v1.8b, v2.8b
         trn2 v0.8b, v1.4b, v2.4b
@@ -6844,30 +6844,30 @@
         trn2 v0.2d, v1.1d, v2.1d
         trn2 v0.1d, v1.1d, v2.1d
 
-// CHECK-ERROR <stdin>:4334:22: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.16b, v1.8b, v2.8b
-// CHECK-ERROR                      ^
-// CHECK-ERROR <stdin>:4335:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.8b, v1.4b, v2.4b
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4336:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.8h, v1.4h, v2.4h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4337:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.4h, v1.2h, v2.2h
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4338:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.4s, v1.2s, v2.2s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4339:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.2s, v1.1s, v2.1s
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4340:21: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.2d, v1.1d, v2.1d
-// CHECK-ERROR                     ^
-// CHECK-ERROR <stdin>:4341:17: error: invalid operand for instruction
-// CHECK-ERROR         trn2 v0.1d, v1.1d, v2.1d
-// CHECK-ERROR                 ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.16b, v1.8b, v2.8b
+// CHECK-ERROR:                      ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.8b, v1.4b, v2.4b
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.8h, v1.4h, v2.4h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.4h, v1.2h, v2.2h
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.4s, v1.2s, v2.2s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.2s, v1.1s, v2.1s
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.2d, v1.1d, v2.1d
+// CHECK-ERROR:                     ^
+// CHECK-ERROR:  error: invalid operand for instruction
+// CHECK-ERROR:         trn2 v0.1d, v1.1d, v2.1d
+// CHECK-ERROR:                 ^
 
 //----------------------------------------------------------------------
 // Floating Point  multiply (scalar, by element)
diff --git a/test/MC/AArch64/neon-facge-facgt.s b/test/MC/AArch64/neon-facge-facgt.s
index 212eda2f2092..9c10caa0f7c2 100644
--- a/test/MC/AArch64/neon-facge-facgt.s
+++ b/test/MC/AArch64/neon-facge-facgt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,16 +6,24 @@
 // Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
 // FACLE is alias for FACGE with operands reversed
 //----------------------------------------------------------------------
+         facge v0.4h, v31.4h, v16.4h
+         facge v4.8h, v7.8h, v15.8h
          facge v0.2s, v31.2s, v16.2s
          facge v4.4s, v7.4s, v15.4s
          facge v29.2d, v2.2d, v5.2d
+         facle v0.4h, v16.4h, v31.4h
+         facle v4.8h, v15.8h, v7.8h
          facle v0.2s, v16.2s, v31.2s
          facle v4.4s, v15.4s, v7.4s
          facle v29.2d, v5.2d, v2.2d
 
+// CHECK: facge   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x2c,0x4f,0x6e]
 // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
 // CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
 // CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
+// CHECK: facge   v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x2f,0x50,0x2e]
+// CHECK: facge   v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x2c,0x4f,0x6e]
 // CHECK: facge v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xef,0x30,0x2e]
 // CHECK: facge v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xec,0x2f,0x6e]
 // CHECK: facge v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xec,0x65,0x6e]
@@ -24,16 +32,24 @@
 // Vector Absolute Compare Mask Less Than (Floating Point)
 // FACLT is alias for FACGT with operands reversed
 //----------------------------------------------------------------------
+         facgt v3.4h, v8.4h, v12.4h
+         facgt v31.8h, v29.8h, v28.8h
          facgt v31.4s, v29.4s, v28.4s
          facgt v3.2s, v8.2s, v12.2s
          facgt v17.2d, v15.2d, v13.2d
+         faclt v3.4h,  v12.4h, v8.4h
+         faclt v31.8h, v28.8h, v29.8h
          faclt v31.4s, v28.4s, v29.4s
          faclt v3.2s,  v12.2s, v8.2s
          faclt v17.2d, v13.2d, v15.2d
 
+// CHECK: facgt   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x2f,0xdc,0x6e]
 // CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
 // CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
 // CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
+// CHECK: facgt   v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x2d,0xcc,0x2e]
+// CHECK: facgt   v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x2f,0xdc,0x6e]
 // CHECK: facgt v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xef,0xbc,0x6e]
 // CHECK: facgt v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xed,0xac,0x2e]
 // CHECK: facgt v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xed,0xed,0x6e]
diff --git a/test/MC/AArch64/neon-frsqrt-frecp.s b/test/MC/AArch64/neon-frsqrt-frecp.s
index 79fe5da5e76f..67a1340ecc32 100644
--- a/test/MC/AArch64/neon-frsqrt-frecp.s
+++ b/test/MC/AArch64/neon-frsqrt-frecp.s
@@ -1,14 +1,18 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
 //----------------------------------------------------------------------
 // Vector Reciprocal Square Root Step (Floating Point)
 //----------------------------------------------------------------------
+         frsqrts v0.4h, v31.4h, v16.4h
+         frsqrts v4.8h, v7.8h, v15.8h
          frsqrts v0.2s, v31.2s, v16.2s
          frsqrts v4.4s, v7.4s, v15.4s
          frsqrts v29.2d, v2.2d, v5.2d
 
+// CHECK: frsqrts v0.4h, v31.4h, v16.4h   // encoding: [0xe0,0x3f,0xd0,0x0e]
+// CHECK: frsqrts v4.8h, v7.8h, v15.8h    // encoding: [0xe4,0x3c,0xcf,0x4e]
 // CHECK: frsqrts v0.2s, v31.2s, v16.2s // encoding: [0xe0,0xff,0xb0,0x0e]
 // CHECK: frsqrts v4.4s, v7.4s, v15.4s  // encoding: [0xe4,0xfc,0xaf,0x4e]
 // CHECK: frsqrts v29.2d, v2.2d, v5.2d  // encoding: [0x5d,0xfc,0xe5,0x4e]
@@ -16,10 +20,14 @@
 //----------------------------------------------------------------------
 // Vector Reciprocal Step (Floating Point)
 //----------------------------------------------------------------------
+         frecps v3.4h, v8.4h, v12.4h
+         frecps v31.8h, v29.8h, v28.8h
          frecps v31.4s, v29.4s, v28.4s
          frecps v3.2s, v8.2s, v12.2s
          frecps v17.2d, v15.2d, v13.2d
 
+// CHECK: frecps  v3.4h, v8.4h, v12.4h    // encoding: [0x03,0x3d,0x4c,0x0e]
+// CHECK: frecps  v31.8h, v29.8h, v28.8h  // encoding: [0xbf,0x3f,0x5c,0x4e]
 // CHECK: frecps v31.4s, v29.4s, v28.4s  // encoding: [0xbf,0xff,0x3c,0x4e]
 // CHECK: frecps v3.2s, v8.2s, v12.2s    // encoding: [0x03,0xfd,0x2c,0x0e]
 // CHECK: frecps v17.2d, v15.2d, v13.2d  // encoding: [0xf1,0xfd,0x6d,0x4e]
diff --git a/test/MC/AArch64/neon-max-min-pairwise.s b/test/MC/AArch64/neon-max-min-pairwise.s
index 8d2dadb1997f..27cf4c8d830a 100644
--- a/test/MC/AArch64/neon-max-min-pairwise.s
+++ b/test/MC/AArch64/neon-max-min-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -67,10 +67,14 @@
 //----------------------------------------------------------------------
 // Vector Maximum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fmaxp v0.4h, v1.4h, v2.4h
+         fmaxp v31.8h, v15.8h, v16.8h
          fmaxp v0.2s, v1.2s, v2.2s
          fmaxp v31.4s, v15.4s, v16.4s
          fmaxp v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxp   v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x34,0x42,0x2e]
+// CHECK: fmaxp   v31.8h, v15.8h, v16.8h  // encoding: [0xff,0x35,0x50,0x6e]
 // CHECK: fmaxp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x2e]
 // CHECK: fmaxp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x6e]
 // CHECK: fmaxp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x6e]
@@ -78,10 +82,14 @@
 //----------------------------------------------------------------------
 // Vector Minimum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fminp v10.4h, v15.4h, v22.4h
+         fminp v3.8h, v5.8h, v6.8h
          fminp v10.2s, v15.2s, v22.2s
          fminp v3.4s, v5.4s, v6.4s
          fminp v17.2d, v13.2d, v2.2d
 
+// CHECK: fminp   v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x35,0xd6,0x2e]
+// CHECK: fminp   v3.8h, v5.8h, v6.8h     // encoding: [0xa3,0x34,0xc6,0x6e]
 // CHECK: fminp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x2e]
 // CHECK: fminp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x6e]
 // CHECK: fminp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x6e]
@@ -89,10 +97,14 @@
 //----------------------------------------------------------------------
 // Vector maxNum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fmaxnmp v0.4h, v1.4h, v2.4h
+         fmaxnmp v31.8h, v15.8h, v16.8h
          fmaxnmp v0.2s, v1.2s, v2.2s
          fmaxnmp v31.4s, v15.4s, v16.4s
          fmaxnmp v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxnmp v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x04,0x42,0x2e]
+// CHECK: fmaxnmp v31.8h, v15.8h, v16.8h  // encoding: [0xff,0x05,0x50,0x6e]
 // CHECK: fmaxnmp v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x2e]
 // CHECK: fmaxnmp v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x6e]
 // CHECK: fmaxnmp v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x6e]
@@ -100,10 +112,14 @@
 //----------------------------------------------------------------------
 // Vector minNum Pairwise (Floating Point)
 //----------------------------------------------------------------------
+         fminnmp v10.4h, v15.4h, v22.4h
+         fminnmp v3.8h, v5.8h, v6.8h
          fminnmp v10.2s, v15.2s, v22.2s
          fminnmp v3.4s, v5.4s, v6.4s
          fminnmp v17.2d, v13.2d, v2.2d
 
+// CHECK: fminnmp v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x05,0xd6,0x2e]
+// CHECK: fminnmp v3.8h, v5.8h, v6.8h     // encoding: [0xa3,0x04,0xc6,0x6e]
 // CHECK: fminnmp v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x2e]
 // CHECK: fminnmp v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x6e]
 // CHECK: fminnmp v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x6e]
diff --git a/test/MC/AArch64/neon-max-min.s b/test/MC/AArch64/neon-max-min.s
index 6d1efde5077f..c4bd74d98882 100644
--- a/test/MC/AArch64/neon-max-min.s
+++ b/test/MC/AArch64/neon-max-min.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -67,10 +67,14 @@
 //----------------------------------------------------------------------
 // Vector Maximum (Floating Point)
 //----------------------------------------------------------------------
+         fmax v0.4h, v1.4h, v2.4h
+         fmax v0.8h, v1.8h, v2.8h
          fmax v0.2s, v1.2s, v2.2s
          fmax v31.4s, v15.4s, v16.4s
          fmax v7.2d, v8.2d, v25.2d
 
+// CHECK: fmax    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x34,0x42,0x0e]
+// CHECK: fmax    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x34,0x42,0x4e]
 // CHECK: fmax v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xf4,0x22,0x0e]
 // CHECK: fmax v31.4s, v15.4s, v16.4s // encoding: [0xff,0xf5,0x30,0x4e]
 // CHECK: fmax v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xf5,0x79,0x4e]
@@ -78,10 +82,14 @@
 //----------------------------------------------------------------------
 // Vector Minimum (Floating Point)
 //----------------------------------------------------------------------
+         fmin v10.4h, v15.4h, v22.4h
+         fmin v10.8h, v15.8h, v22.8h
          fmin v10.2s, v15.2s, v22.2s
          fmin v3.4s, v5.4s, v6.4s
          fmin v17.2d, v13.2d, v2.2d
 
+// CHECK: fmin    v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x35,0xd6,0x0e]
+// CHECK: fmin    v10.8h, v15.8h, v22.8h  // encoding: [0xea,0x35,0xd6,0x4e]
 // CHECK: fmin v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xf5,0xb6,0x0e]
 // CHECK: fmin v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xf4,0xa6,0x4e]
 // CHECK: fmin v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xf5,0xe2,0x4e]
@@ -89,10 +97,14 @@
 //----------------------------------------------------------------------
 // Vector maxNum (Floating Point)
 //----------------------------------------------------------------------
+         fmaxnm v0.4h, v1.4h, v2.4h
+         fmaxnm v0.8h, v1.8h, v2.8h
          fmaxnm v0.2s, v1.2s, v2.2s
          fmaxnm v31.4s, v15.4s, v16.4s
          fmaxnm v7.2d, v8.2d, v25.2d
 
+// CHECK: fmaxnm  v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x04,0x42,0x0e]
+// CHECK: fmaxnm  v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x04,0x42,0x4e]
 // CHECK: fmaxnm v0.2s, v1.2s, v2.2s    // encoding: [0x20,0xc4,0x22,0x0e]
 // CHECK: fmaxnm v31.4s, v15.4s, v16.4s // encoding: [0xff,0xc5,0x30,0x4e]
 // CHECK: fmaxnm v7.2d, v8.2d, v25.2d   // encoding: [0x07,0xc5,0x79,0x4e]
@@ -100,10 +112,14 @@
 //----------------------------------------------------------------------
 // Vector minNum (Floating Point)
 //----------------------------------------------------------------------
+         fminnm v10.4h, v15.4h, v22.4h
+         fminnm v10.8h, v15.8h, v22.8h
          fminnm v10.2s, v15.2s, v22.2s
          fminnm v3.4s, v5.4s, v6.4s
          fminnm v17.2d, v13.2d, v2.2d
 
+// CHECK: fminnm  v10.4h, v15.4h, v22.4h  // encoding: [0xea,0x05,0xd6,0x0e]
+// CHECK: fminnm  v10.8h, v15.8h, v22.8h  // encoding: [0xea,0x05,0xd6,0x4e]
 // CHECK: fminnm v10.2s, v15.2s, v22.2s  // encoding: [0xea,0xc5,0xb6,0x0e]
 // CHECK: fminnm v3.4s, v5.4s, v6.4s     // encoding: [0xa3,0xc4,0xa6,0x4e]
 // CHECK: fminnm v17.2d, v13.2d, v2.2d   // encoding: [0xb1,0xc5,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-mla-mls-instructions.s b/test/MC/AArch64/neon-mla-mls-instructions.s
index 3072e6f1200d..a510fc8c7b91 100644
--- a/test/MC/AArch64/neon-mla-mls-instructions.s
+++ b/test/MC/AArch64/neon-mla-mls-instructions.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -40,10 +40,14 @@
 //----------------------------------------------------------------------
 // Vector Floating-Point Multiply-accumulate
 //----------------------------------------------------------------------
+         fmla v0.4h, v1.4h, v2.4h
+         fmla v0.8h, v1.8h, v2.8h
          fmla v0.2s, v1.2s, v2.2s
          fmla v0.4s, v1.4s, v2.4s
          fmla v0.2d, v1.2d, v2.2d
 
+// CHECK: fmla    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x0c,0x42,0x0e]
+// CHECK: fmla    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x0c,0x42,0x4e]
 // CHECK: fmla v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0x22,0x0e]
 // CHECK: fmla v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0x22,0x4e]
 // CHECK: fmla v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0x62,0x4e]
@@ -51,10 +55,14 @@
 //----------------------------------------------------------------------
 // Vector Floating-Point Multiply-subtract
 //----------------------------------------------------------------------
+         fmls v0.4h, v1.4h, v2.4h
+         fmls v0.8h, v1.8h, v2.8h
          fmls v0.2s, v1.2s, v2.2s
          fmls v0.4s, v1.4s, v2.4s
          fmls v0.2d, v1.2d, v2.2d
 
+// CHECK: fmls    v0.4h, v1.4h, v2.4h     // encoding: [0x20,0x0c,0xc2,0x0e]
+// CHECK: fmls    v0.8h, v1.8h, v2.8h     // encoding: [0x20,0x0c,0xc2,0x4e]
 // CHECK: fmls v0.2s, v1.2s, v2.2s       // encoding: [0x20,0xcc,0xa2,0x0e]
 // CHECK: fmls v0.4s, v1.4s, v2.4s       // encoding: [0x20,0xcc,0xa2,0x4e]
 // CHECK: fmls v0.2d, v1.2d, v2.2d       // encoding: [0x20,0xcc,0xe2,0x4e]
diff --git a/test/MC/AArch64/neon-scalar-abs.s b/test/MC/AArch64/neon-scalar-abs.s
index d08756c0c10c..71130617848f 100644
--- a/test/MC/AArch64/neon-scalar-abs.s
+++ b/test/MC/AArch64/neon-scalar-abs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -14,9 +14,11 @@
 // Scalar Floating-point Absolute Difference
 //----------------------------------------------------------------------
 
+    fabd h29, h24, h20
     fabd s29, s24, s20
     fabd d29, d24, d20
 
+// CHECK: fabd    h29, h24, h20           // encoding: [0x1d,0x17,0xd4,0x7e]
 // CHECK: fabd s29, s24, s20  // encoding: [0x1d,0xd7,0xb4,0x7e]
 // CHECK: fabd d29, d24, d20  // encoding: [0x1d,0xd7,0xf4,0x7e]
 
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mla.s b/test/MC/AArch64/neon-scalar-by-elem-mla.s
index fec9d12d8b8d..394fda673e20 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mla.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mla.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point fused multiply-add (scalar, by element)
 //------------------------------------------------------------------------------
+    fmla    h0, h1, v1.h[5]
     fmla    s0, s1, v1.s[0]
     fmla    s30, s11, v1.s[1]
     fmla    s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
     fmla    d0, d1, v1.d[0]
     fmla    d30, d11, v1.d[1]
 
+// CHECK: fmla    h0, h1, v1.h[5]         // encoding: [0x20,0x18,0x11,0x5f]
 // CHECK: fmla    s0, s1, v1.s[0]         // encoding: [0x20,0x10,0x81,0x5f]
 // CHECK: fmla    s30, s11, v1.s[1]       // encoding: [0x7e,0x11,0xa1,0x5f]
 // CHECK: fmla    s4, s5, v7.s[2]         // encoding: [0xa4,0x18,0x87,0x5f]
@@ -21,6 +23,7 @@
 // Floating Point fused multiply-subtract (scalar, by element)
 //------------------------------------------------------------------------------
 
+    fmls    h2, h3, v4.h[5]
     fmls    s2, s3, v4.s[0]
     fmls    s29, s10, v28.s[1]      
     fmls    s5, s12, v23.s[2]       
@@ -28,6 +31,7 @@
     fmls    d0, d1, v1.d[0]         
     fmls    d30, d11, v1.d[1]       
 
+// CHECK: fmls    h2, h3, v4.h[5]         // encoding: [0x62,0x58,0x14,0x5f]
 // CHECK: fmls    s2, s3, v4.s[0]     // encoding: [0x62,0x50,0x84,0x5f]
 // CHECK: fmls    s29, s10, v28.s[1]  // encoding: [0x5d,0x51,0xbc,0x5f]
 // CHECK: fmls    s5, s12, v23.s[2]   // encoding: [0x85,0x59,0x97,0x5f]
diff --git a/test/MC/AArch64/neon-scalar-by-elem-mul.s b/test/MC/AArch64/neon-scalar-by-elem-mul.s
index 8b8a3f57a9ca..0d832742a389 100644
--- a/test/MC/AArch64/neon-scalar-by-elem-mul.s
+++ b/test/MC/AArch64/neon-scalar-by-elem-mul.s
@@ -1,8 +1,9 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //------------------------------------------------------------------------------
 // Floating Point  multiply (scalar, by element)
 //------------------------------------------------------------------------------
+    fmul    h0, h1, v1.h[5]
     fmul    s0, s1, v1.s[0]
     fmul    s30, s11, v1.s[1]
     fmul    s4, s5, v7.s[2]
@@ -10,6 +11,7 @@
     fmul    d0, d1, v1.d[0]
     fmul    d30, d11, v1.d[1]
 
+// CHECK: fmul    h0, h1, v1.h[5]         // encoding: [0x20,0x98,0x11,0x5f]
 // CHECK: fmul    s0, s1, v1.s[0]      // encoding: [0x20,0x90,0x81,0x5f]
 // CHECK: fmul    s30, s11, v1.s[1]    // encoding: [0x7e,0x91,0xa1,0x5f]
 // CHECK: fmul    s4, s5, v7.s[2]      // encoding: [0xa4,0x98,0x87,0x5f]
@@ -21,6 +23,7 @@
 //------------------------------------------------------------------------------
 // Floating Point  multiply extended (scalar, by element)
 //------------------------------------------------------------------------------
+    fmulx   h6, h2, v8.h[5]
     fmulx   s6, s2, v8.s[0]
     fmulx   s7, s3, v13.s[1]
     fmulx   s9, s7, v9.s[2]
@@ -28,6 +31,7 @@
     fmulx   d15, d9, v7.d[0]
     fmulx   d13, d12, v11.d[1]
 
+// CHECK: fmulx   h6, h2, v8.h[5]         // encoding: [0x46,0x98,0x18,0x7f]
 // CHECK: fmulx   s6, s2, v8.s[0]         // encoding: [0x46,0x90,0x88,0x7f]
 // CHECK: fmulx   s7, s3, v13.s[1]        // encoding: [0x67,0x90,0xad,0x7f]
 // CHECK: fmulx   s9, s7, v9.s[2]         // encoding: [0xe9,0x98,0x89,0x7f]
diff --git a/test/MC/AArch64/neon-scalar-cvt.s b/test/MC/AArch64/neon-scalar-cvt.s
index 97416daf0801..3cbf6bae6758 100644
--- a/test/MC/AArch64/neon-scalar-cvt.s
+++ b/test/MC/AArch64/neon-scalar-cvt.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Scalar Signed Integer Convert To Floating-point
 //----------------------------------------------------------------------
 
+    scvtf h23, h14
     scvtf s22, s13
     scvtf d21, d12
 
+// CHECK: scvtf   h23, h14                // encoding: [0xd7,0xd9,0x79,0x5e]
 // CHECK: scvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x5e]
 // CHECK: scvtf d21, d12    // encoding: [0x95,0xd9,0x61,0x5e]
 
@@ -16,9 +18,11 @@
 // Scalar Unsigned Integer Convert To Floating-point
 //----------------------------------------------------------------------
 
+    ucvtf h20, h12
     ucvtf s22, s13
     ucvtf d21, d14
 
+// CHECK: ucvtf   h20, h12                // encoding: [0x94,0xd9,0x79,0x7e]
 // CHECK: ucvtf s22, s13    // encoding: [0xb6,0xd9,0x21,0x7e]
 // CHECK: ucvtf d21, d14    // encoding: [0xd5,0xd9,0x61,0x7e]
 
@@ -26,9 +30,11 @@
 // Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
 //----------------------------------------------------------------------
 
+    scvtf h22, h13, #16
     scvtf s22, s13, #32
     scvtf d21, d12, #64
 
+// CHECK: scvtf   h22, h13, #16           // encoding: [0xb6,0xe5,0x10,0x5f]
 // CHECK: scvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x5f]
 // CHECK: scvtf d21, d12, #64  // encoding: [0x95,0xe5,0x40,0x5f]    
 
@@ -36,9 +42,11 @@
 // Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
 //----------------------------------------------------------------------
 
+    ucvtf h22, h13, #16
     ucvtf s22, s13, #32
     ucvtf d21, d14, #64
 
+// CHECK: ucvtf   h22, h13, #16           // encoding: [0xb6,0xe5,0x10,0x7f]
 // CHECK: ucvtf s22, s13, #32  // encoding: [0xb6,0xe5,0x20,0x7f]
 // CHECK: ucvtf d21, d14, #64  // encoding: [0xd5,0xe5,0x40,0x7f]
 
@@ -46,9 +54,11 @@
 // Scalar Floating-point Convert To Signed Fixed-point (Immediate)
 //----------------------------------------------------------------------
 
+    fcvtzs h21, h12, #1
     fcvtzs s21, s12, #1
     fcvtzs d21, d12, #1
 
+// CHECK: fcvtzs  h21, h12, #1            // encoding: [0x95,0xfd,0x1f,0x5f]
 // CHECK: fcvtzs s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x5f]
 // CHECK: fcvtzs d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x5f]
         
@@ -56,9 +66,11 @@
 // Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
 //----------------------------------------------------------------------
 
+    fcvtzu h21, h12, #1
     fcvtzu s21, s12, #1
     fcvtzu d21, d12, #1
 
+// CHECK: fcvtzu  h21, h12, #1            // encoding: [0x95,0xfd,0x1f,0x7f]
 // CHECK: fcvtzu s21, s12, #1  // encoding: [0x95,0xfd,0x3f,0x7f]
 // CHECK: fcvtzu d21, d12, #1  // encoding: [0x95,0xfd,0x7f,0x7f]
 
@@ -76,9 +88,11 @@
 // With Ties To Away
 //----------------------------------------------------------------------
 
+    fcvtas h12, h13
     fcvtas s12, s13
     fcvtas d21, d14
 
+// CHECK: fcvtas  h12, h13                // encoding: [0xac,0xc9,0x79,0x5e]
 // CHECK: fcvtas s12, s13    // encoding: [0xac,0xc9,0x21,0x5e]
 // CHECK: fcvtas d21, d14    // encoding: [0xd5,0xc9,0x61,0x5e]
 
@@ -87,9 +101,11 @@
 // Nearest With Ties To Away
 //----------------------------------------------------------------------
 
+    fcvtau h12, h13
     fcvtau s12, s13
     fcvtau d21, d14
 
+// CHECK: fcvtau  h12, h13                // encoding: [0xac,0xc9,0x79,0x7e]
 // CHECK: fcvtau s12, s13    // encoding: [0xac,0xc9,0x21,0x7e]
 // CHECK: fcvtau d21, d14    // encoding: [0xd5,0xc9,0x61,0x7e]
 
@@ -98,9 +114,11 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
+    fcvtms h22, h13
     fcvtms s22, s13
     fcvtms d21, d14
 
+// CHECK: fcvtms  h22, h13                // encoding: [0xb6,0xb9,0x79,0x5e]
 // CHECK: fcvtms s22, s13    // encoding: [0xb6,0xb9,0x21,0x5e]
 // CHECK: fcvtms d21, d14    // encoding: [0xd5,0xb9,0x61,0x5e]
 
@@ -109,9 +127,11 @@
 // Minus Infinity
 //----------------------------------------------------------------------
 
+    fcvtmu h12, h13
     fcvtmu s12, s13
     fcvtmu d21, d14
 
+// CHECK: fcvtmu  h12, h13                // encoding: [0xac,0xb9,0x79,0x7e]
 // CHECK: fcvtmu s12, s13    // encoding: [0xac,0xb9,0x21,0x7e]
 // CHECK: fcvtmu d21, d14    // encoding: [0xd5,0xb9,0x61,0x7e]
 
@@ -120,9 +140,11 @@
 // With Ties To Even
 //----------------------------------------------------------------------
 
+    fcvtns h22, h13
     fcvtns s22, s13
     fcvtns d21, d14
 
+// CHECK: fcvtns  h22, h13                // encoding: [0xb6,0xa9,0x79,0x5e]
 // CHECK: fcvtns s22, s13    // encoding: [0xb6,0xa9,0x21,0x5e]
 // CHECK: fcvtns d21, d14    // encoding: [0xd5,0xa9,0x61,0x5e]
 
@@ -131,9 +153,11 @@
 // Nearest With Ties To Even
 //----------------------------------------------------------------------
 
+    fcvtnu h12, h13
     fcvtnu s12, s13
     fcvtnu d21, d14
 
+// CHECK: fcvtnu  h12, h13                // encoding: [0xac,0xa9,0x79,0x7e]
 // CHECK: fcvtnu s12, s13    // encoding: [0xac,0xa9,0x21,0x7e]
 // CHECK: fcvtnu d21, d14    // encoding: [0xd5,0xa9,0x61,0x7e]
         
@@ -142,9 +166,11 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
+    fcvtps h22, h13
     fcvtps s22, s13
     fcvtps d21, d14
 
+// CHECK: fcvtps  h22, h13                // encoding: [0xb6,0xa9,0xf9,0x5e]
 // CHECK: fcvtps s22, s13    // encoding: [0xb6,0xa9,0xa1,0x5e]
 // CHECK: fcvtps d21, d14    // encoding: [0xd5,0xa9,0xe1,0x5e]
         
@@ -153,9 +179,11 @@
 // Positive Infinity
 //----------------------------------------------------------------------
 
+    fcvtpu h12, h13
     fcvtpu s12, s13
     fcvtpu d21, d14
 
+// CHECK: fcvtpu  h12, h13                // encoding: [0xac,0xa9,0xf9,0x7e]
 // CHECK: fcvtpu s12, s13    // encoding: [0xac,0xa9,0xa1,0x7e]
 // CHECK: fcvtpu d21, d14    // encoding: [0xd5,0xa9,0xe1,0x7e]
 
@@ -163,9 +191,11 @@
 // Scalar Floating-point Convert To Signed Integer, Rounding Toward Zero
 //----------------------------------------------------------------------
 
+    fcvtzs h12, h13
     fcvtzs s12, s13
     fcvtzs d21, d14
 
+// CHECK: fcvtzs  h12, h13                // encoding: [0xac,0xb9,0xf9,0x5e]
 // CHECK: fcvtzs s12, s13    // encoding: [0xac,0xb9,0xa1,0x5e]
 // CHECK: fcvtzs d21, d14    // encoding: [0xd5,0xb9,0xe1,0x5e]
         
@@ -174,8 +204,10 @@
 // Zero
 //----------------------------------------------------------------------
 
+    fcvtzu h12, h13
     fcvtzu s12, s13
     fcvtzu d21, d14
 
+// CHECK: fcvtzu  h12, h13                // encoding: [0xac,0xb9,0xf9,0x7e]
 // CHECK: fcvtzu s12, s13    // encoding: [0xac,0xb9,0xa1,0x7e]
 // CHECK: fcvtzu d21, d14    // encoding: [0xd5,0xb9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-fp-compare.s b/test/MC/AArch64/neon-scalar-fp-compare.s
index b798b3410670..0b91d945a719 100644
--- a/test/MC/AArch64/neon-scalar-fp-compare.s
+++ b/test/MC/AArch64/neon-scalar-fp-compare.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Scalar Floating-point Compare Mask Equal
 //----------------------------------------------------------------------
 
+         fcmeq h10, h11, h12
          fcmeq s10, s11, s12
          fcmeq d20, d21, d22
 
+// CHECK: fcmeq   h10, h11, h12           // encoding: [0x6a,0x25,0x4c,0x5e]
 // CHECK: fcmeq s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x5e]
 // CHECK: fcmeq d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x5e]
 
@@ -16,13 +18,17 @@
 // Scalar Floating-point Compare Mask Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmeq h10, h11, #0.0
          fcmeq s10, s11, #0.0
          fcmeq d20, d21, #0.0
+         fcmeq h10, h11, #0
          fcmeq s10, s11, #0
          fcmeq d20, d21, #0x0
 
+// CHECK: fcmeq   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x5e]
 // CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
 // CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
+// CHECK: fcmeq   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x5e]
 // CHECK: fcmeq s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x5e]
 // CHECK: fcmeq d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x5e]
 
@@ -30,9 +36,11 @@
 // Scalar Floating-point Compare Mask Greater Than Or Equal
 //----------------------------------------------------------------------
 
+         fcmge h10, h11, h12
          fcmge s10, s11, s12
          fcmge d20, d21, d22
 
+// CHECK: fcmge   h10, h11, h12           // encoding: [0x6a,0x25,0x4c,0x7e]
 // CHECK: fcmge s10, s11, s12   // encoding: [0x6a,0xe5,0x2c,0x7e]
 // CHECK: fcmge d20, d21, d22   // encoding: [0xb4,0xe6,0x76,0x7e]
 
@@ -40,13 +48,17 @@
 // Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmge h10, h11, #0.0
          fcmge s10, s11, #0.0
          fcmge d20, d21, #0.0
+         fcmge h10, h11, #0
          fcmge s10, s11, #0
          fcmge d20, d21, #0x0
 
+// CHECK: fcmge   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x7e]
 // CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
 // CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
+// CHECK: fcmge   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x7e]
 // CHECK: fcmge s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x7e]
 // CHECK: fcmge d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x7e]
 
@@ -54,9 +66,11 @@
 // Scalar Floating-point Compare Mask Greather Than
 //----------------------------------------------------------------------
 
+         fcmgt h10, h11, h12
          fcmgt s10, s11, s12
          fcmgt d20, d21, d22
 
+// CHECK: fcmgt   h10, h11, h12           // encoding: [0x6a,0x25,0xcc,0x7e]
 // CHECK: fcmgt s10, s11, s12   // encoding: [0x6a,0xe5,0xac,0x7e]
 // CHECK: fcmgt d20, d21, d22   // encoding: [0xb4,0xe6,0xf6,0x7e]
 
@@ -64,13 +78,17 @@
 // Scalar Floating-point Compare Mask Greather Than Zero
 //----------------------------------------------------------------------
 
+         fcmgt h10, h11, #0.0
          fcmgt s10, s11, #0.0
          fcmgt d20, d21, #0.0
+         fcmgt h10, h11, #0
          fcmgt s10, s11, #0
          fcmgt d20, d21, #0x0
 
+// CHECK: fcmgt   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x5e]
 // CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
 // CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
+// CHECK: fcmgt   h10, h11, #0.0          // encoding: [0x6a,0xc9,0xf8,0x5e]
 // CHECK: fcmgt s10, s11, #0.0   // encoding: [0x6a,0xc9,0xa0,0x5e]
 // CHECK: fcmgt d20, d21, #0.0   // encoding: [0xb4,0xca,0xe0,0x5e]
 
@@ -78,13 +96,17 @@
 // Scalar Floating-point Compare Mask Less Than Or Equal To Zero
 //----------------------------------------------------------------------
 
+         fcmle h10, h11, #0.0
          fcmle s10, s11, #0.0
          fcmle d20, d21, #0.0
+         fcmle h10, h11, #0
          fcmle s10, s11, #0
          fcmle d20, d21, #0x0
 
+// CHECK: fcmle   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x7e]
 // CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
 // CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
+// CHECK: fcmle   h10, h11, #0.0          // encoding: [0x6a,0xd9,0xf8,0x7e]
 // CHECK: fcmle s10, s11, #0.0   // encoding: [0x6a,0xd9,0xa0,0x7e]
 // CHECK: fcmle d20, d21, #0.0   // encoding: [0xb4,0xda,0xe0,0x7e]
 
@@ -92,13 +114,17 @@
 // Scalar Floating-point Compare Mask Less Than
 //----------------------------------------------------------------------
 
+         fcmlt h10, h11, #0.0
          fcmlt s10, s11, #0.0
          fcmlt d20, d21, #0.0
+         fcmlt h10, h11, #0
          fcmlt s10, s11, #0
          fcmlt d20, d21, #0x0
 
+// CHECK: fcmlt   h10, h11, #0.0          // encoding: [0x6a,0xe9,0xf8,0x5e]
 // CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
 // CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
+// CHECK: fcmlt   h10, h11, #0.0          // encoding: [0x6a,0xe9,0xf8,0x5e]
 // CHECK: fcmlt s10, s11, #0.0   // encoding: [0x6a,0xe9,0xa0,0x5e]
 // CHECK: fcmlt d20, d21, #0.0   // encoding: [0xb4,0xea,0xe0,0x5e]
 
@@ -106,9 +132,11 @@
 // Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
 //----------------------------------------------------------------------
 
+         facge h10, h11, h12
          facge s10, s11, s12
          facge d20, d21, d22
 
+// CHECK: facge   h10, h11, h12           // encoding: [0x6a,0x2d,0x4c,0x7e]
 // CHECK: facge s10, s11, s12    // encoding: [0x6a,0xed,0x2c,0x7e]
 // CHECK: facge d20, d21, d22    // encoding: [0xb4,0xee,0x76,0x7e]
 
@@ -116,8 +144,10 @@
 // Scalar Floating-point Absolute Compare Mask Greater Than
 //----------------------------------------------------------------------
 
+         facgt h10, h11, h12
          facgt s10, s11, s12
          facgt d20, d21, d22
 
+// CHECK: facgt   h10, h11, h12           // encoding: [0x6a,0x2d,0xcc,0x7e]
 // CHECK: facgt s10, s11, s12   // encoding: [0x6a,0xed,0xac,0x7e]
 // CHECK: facgt d20, d21, d22   // encoding: [0xb4,0xee,0xf6,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-mul.s b/test/MC/AArch64/neon-scalar-mul.s
index e33bdad91a94..323fad206c4d 100644
--- a/test/MC/AArch64/neon-scalar-mul.s
+++ b/test/MC/AArch64/neon-scalar-mul.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -26,9 +26,11 @@
 // Floating-point Multiply Extended
 //----------------------------------------------------------------------
 
+    fmulx h20, h22, h15
     fmulx s20, s22, s15
     fmulx d23, d11, d1
 
+// CHECK: fmulx   h20, h22, h15           // encoding: [0xd4,0x1e,0x4f,0x5e]
 // CHECK: fmulx s20, s22, s15   // encoding: [0xd4,0xde,0x2f,0x5e]
 // CHECK: fmulx d23, d11, d1    // encoding: [0x77,0xdd,0x61,0x5e]
 
diff --git a/test/MC/AArch64/neon-scalar-recip.s b/test/MC/AArch64/neon-scalar-recip.s
index 7a886f3b4a73..923c3549d6f0 100644
--- a/test/MC/AArch64/neon-scalar-recip.s
+++ b/test/MC/AArch64/neon-scalar-recip.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -6,9 +6,11 @@
 // Floating-point Reciprocal Step
 //----------------------------------------------------------------------
 
+    frecps h21, h16, h13
     frecps s21, s16, s13
     frecps d22, d30, d21
 
+// CHECK: frecps  h21, h16, h13           // encoding: [0x15,0x3e,0x4d,0x5e]
 // CHECK: frecps s21, s16, s13   // encoding: [0x15,0xfe,0x2d,0x5e]
 // CHECK: frecps d22, d30, d21   // encoding: [0xd6,0xff,0x75,0x5e]
 
@@ -16,9 +18,11 @@
 // Floating-point Reciprocal Square Root Step
 //----------------------------------------------------------------------
 
+    frsqrts h21, h5, h12
     frsqrts s21, s5, s12
     frsqrts d8, d22, d18
 
+// CHECK: frsqrts h21, h5, h12            // encoding: [0xb5,0x3c,0xcc,0x5e]
 // CHECK: frsqrts s21, s5, s12   // encoding: [0xb5,0xfc,0xac,0x5e]
 // CHECK: frsqrts d8, d22, d18   // encoding: [0xc8,0xfe,0xf2,0x5e]
 
@@ -26,9 +30,11 @@
 // Scalar Floating-point Reciprocal Estimate
 //----------------------------------------------------------------------
 
+    frecpe h19, h14
     frecpe s19, s14
     frecpe d13, d13
 
+// CHECK: frecpe  h19, h14                // encoding: [0xd3,0xd9,0xf9,0x5e]
 // CHECK: frecpe s19, s14    // encoding: [0xd3,0xd9,0xa1,0x5e]
 // CHECK: frecpe d13, d13    // encoding: [0xad,0xd9,0xe1,0x5e]
 
@@ -36,9 +42,11 @@
 // Scalar Floating-point Reciprocal Exponent
 //----------------------------------------------------------------------
 
+    frecpx h18, h10
     frecpx s18, s10
     frecpx d16, d19
 
+// CHECK: frecpx  h18, h10                // encoding: [0x52,0xf9,0xf9,0x5e]
 // CHECK: frecpx s18, s10    // encoding: [0x52,0xf9,0xa1,0x5e]
 // CHECK: frecpx d16, d19    // encoding: [0x70,0xfa,0xe1,0x5e]
 
@@ -46,8 +54,10 @@
 // Scalar Floating-point Reciprocal Square Root Estimate
 //----------------------------------------------------------------------
 
+    frsqrte h22, h13
     frsqrte s22, s13
     frsqrte d21, d12
 
+// CHECK: frsqrte h22, h13                // encoding: [0xb6,0xd9,0xf9,0x7e]
 // CHECK: frsqrte s22, s13    // encoding: [0xb6,0xd9,0xa1,0x7e]
 // CHECK: frsqrte d21, d12    // encoding: [0x95,0xd9,0xe1,0x7e]
diff --git a/test/MC/AArch64/neon-scalar-reduce-pairwise.s b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
index 403a940ec2f2..dae61d0f0f32 100644
--- a/test/MC/AArch64/neon-scalar-reduce-pairwise.s
+++ b/test/MC/AArch64/neon-scalar-reduce-pairwise.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 //----------------------------------------------------------------------
 // Scalar Reduce Add Pairwise (Integer)
@@ -10,7 +10,12 @@
 //----------------------------------------------------------------------
 // Scalar Reduce Add Pairwise (Floating Point)
 //----------------------------------------------------------------------
+      faddp h18, v3.2h
+      faddp h18, v3.2H
+      faddp s19, v2.2s
       faddp d20, v1.2d
 
+// CHECK: faddp h18, v3.2h     // encoding: [0x72,0xd8,0x30,0x5e]
+// CHECK: faddp s19, v2.2s     // encoding: [0x53,0xd8,0x30,0x7e]
 // CHECK: faddp d20, v1.2d     // encoding: [0x34,0xd8,0x70,0x7e]
 
diff --git a/test/MC/AArch64/neon-simd-misc.s b/test/MC/AArch64/neon-simd-misc.s
index 6d1aafdd7725..32dd48629cd8 100644
--- a/test/MC/AArch64/neon-simd-misc.s
+++ b/test/MC/AArch64/neon-simd-misc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=arm64 -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=arm64 -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -298,10 +298,14 @@
 // Floating-point absolute
 //------------------------------------------------------------------------------
 
+         fabs v4.4h, v0.4h
+         fabs v6.8h, v8.8h
          fabs v6.4s, v8.4s
          fabs v6.2d, v8.2d
          fabs v4.2s, v0.2s
 
+// CHECK: fabs    v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf8,0x0e]
+// CHECK: fabs    v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf8,0x4e]
 // CHECK:	fabs	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x4e]
 // CHECK:	fabs	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x4e]
 // CHECK:	fabs	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x0e]
@@ -310,10 +314,14 @@
 // Floating-point negate
 //------------------------------------------------------------------------------
 
+         fneg v4.4h, v0.4h
+         fneg v6.8h, v8.8h
          fneg v6.4s, v8.4s
          fneg v6.2d, v8.2d
          fneg v4.2s, v0.2s
 
+// CHECK: fneg    v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf8,0x2e]
+// CHECK: fneg    v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf8,0x6e]
 // CHECK:	fneg	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa0,0x6e]
 // CHECK:	fneg	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe0,0x6e]
 // CHECK:	fneg	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa0,0x2e]
@@ -450,58 +458,86 @@
 // Floating-point round to integral
 //------------------------------------------------------------------------------
 
+         frintn v4.4h, v0.4h
+         frintn v6.8h, v8.8h
          frintn v6.4s, v8.4s
          frintn v6.2d, v8.2d
          frintn v4.2s, v0.2s
 
+// CHECK: frintn  v4.4h, v0.4h            // encoding: [0x04,0x88,0x79,0x0e]
+// CHECK: frintn  v6.8h, v8.8h            // encoding: [0x06,0x89,0x79,0x4e]
 // CHECK:	frintn	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x4e]
 // CHECK:	frintn	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x4e]
 // CHECK:	frintn	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x0e]
 
+         frinta v4.4h, v0.4h
+         frinta v6.8h, v8.8h
          frinta v6.4s, v8.4s
          frinta v6.2d, v8.2d
          frinta v4.2s, v0.2s
 
+// CHECK: frinta  v4.4h, v0.4h            // encoding: [0x04,0x88,0x79,0x2e]
+// CHECK: frinta  v6.8h, v8.8h            // encoding: [0x06,0x89,0x79,0x6e]
 // CHECK:	frinta	v6.4s, v8.4s            // encoding: [0x06,0x89,0x21,0x6e]
 // CHECK:	frinta	v6.2d, v8.2d            // encoding: [0x06,0x89,0x61,0x6e]
 // CHECK:	frinta	v4.2s, v0.2s            // encoding: [0x04,0x88,0x21,0x2e]
 
+         frintp v4.4h, v0.4h
+         frintp v6.8h, v8.8h
          frintp v6.4s, v8.4s
          frintp v6.2d, v8.2d
          frintp v4.2s, v0.2s
 
+// CHECK: frintp  v4.4h, v0.4h            // encoding: [0x04,0x88,0xf9,0x0e]
+// CHECK: frintp  v6.8h, v8.8h            // encoding: [0x06,0x89,0xf9,0x4e]
 // CHECK:	frintp	v6.4s, v8.4s            // encoding: [0x06,0x89,0xa1,0x4e]
 // CHECK:	frintp	v6.2d, v8.2d            // encoding: [0x06,0x89,0xe1,0x4e]
 // CHECK:	frintp	v4.2s, v0.2s            // encoding: [0x04,0x88,0xa1,0x0e]
 
+         frintm v4.4h, v0.4h
+         frintm v6.8h, v8.8h
          frintm v6.4s, v8.4s
          frintm v6.2d, v8.2d
          frintm v4.2s, v0.2s
 
+// CHECK: frintm  v4.4h, v0.4h            // encoding: [0x04,0x98,0x79,0x0e]
+// CHECK: frintm  v6.8h, v8.8h            // encoding: [0x06,0x99,0x79,0x4e]
 // CHECK:	frintm	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x4e]
 // CHECK:	frintm	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x4e]
 // CHECK:	frintm	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x0e]
 
+         frintx v4.4h, v0.4h
+         frintx v6.8h, v8.8h
          frintx v6.4s, v8.4s
          frintx v6.2d, v8.2d
          frintx v4.2s, v0.2s
 
+// CHECK: frintx  v4.4h, v0.4h            // encoding: [0x04,0x98,0x79,0x2e]
+// CHECK: frintx  v6.8h, v8.8h            // encoding: [0x06,0x99,0x79,0x6e]
 // CHECK:	frintx	v6.4s, v8.4s            // encoding: [0x06,0x99,0x21,0x6e]
 // CHECK:	frintx	v6.2d, v8.2d            // encoding: [0x06,0x99,0x61,0x6e]
 // CHECK:	frintx	v4.2s, v0.2s            // encoding: [0x04,0x98,0x21,0x2e]
 
+         frintz v4.4h, v0.4h
+         frintz v6.8h, v8.8h
          frintz v6.4s, v8.4s
          frintz v6.2d, v8.2d
          frintz v4.2s, v0.2s
 
+// CHECK: frintz  v4.4h, v0.4h            // encoding: [0x04,0x98,0xf9,0x0e]
+// CHECK: frintz  v6.8h, v8.8h            // encoding: [0x06,0x99,0xf9,0x4e]
 // CHECK:	frintz	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x4e]
 // CHECK:	frintz	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x4e]
 // CHECK:	frintz	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x0e]
 
+         frinti v4.4h, v0.4h
+         frinti v6.8h, v8.8h
          frinti v6.4s, v8.4s
          frinti v6.2d, v8.2d
          frinti v4.2s, v0.2s
 
+// CHECK: frinti  v4.4h, v0.4h            // encoding: [0x04,0x98,0xf9,0x2e]
+// CHECK: frinti  v6.8h, v8.8h            // encoding: [0x06,0x99,0xf9,0x6e]
 // CHECK:	frinti	v6.4s, v8.4s            // encoding: [0x06,0x99,0xa1,0x6e]
 // CHECK:	frinti	v6.2d, v8.2d            // encoding: [0x06,0x99,0xe1,0x6e]
 // CHECK:	frinti	v4.2s, v0.2s            // encoding: [0x04,0x98,0xa1,0x2e]
@@ -510,83 +546,123 @@
 // Floating-point convert to integer
 //------------------------------------------------------------------------------
 
+         fcvtns v4.4h, v0.4h
+         fcvtns v6.8h, v8.8h
          fcvtns v6.4s, v8.4s
          fcvtns v6.2d, v8.2d
          fcvtns v4.2s, v0.2s
 
+// CHECK: fcvtns  v4.4h, v0.4h            // encoding: [0x04,0xa8,0x79,0x0e]
+// CHECK: fcvtns  v6.8h, v8.8h            // encoding: [0x06,0xa9,0x79,0x4e]
 // CHECK:	fcvtns	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x4e]
 // CHECK:	fcvtns	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x4e]
 // CHECK:	fcvtns	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x0e]
 
+         fcvtnu v4.4h, v0.4h
+         fcvtnu v6.8h, v8.8h
          fcvtnu v6.4s, v8.4s
          fcvtnu v6.2d, v8.2d
          fcvtnu v4.2s, v0.2s
 
+// CHECK: fcvtnu  v4.4h, v0.4h            // encoding: [0x04,0xa8,0x79,0x2e]
+// CHECK: fcvtnu  v6.8h, v8.8h            // encoding: [0x06,0xa9,0x79,0x6e]
 // CHECK:	fcvtnu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0x21,0x6e]
 // CHECK:	fcvtnu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0x61,0x6e]
 // CHECK:	fcvtnu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0x21,0x2e]
 
+         fcvtps v4.4h, v0.4h
+         fcvtps v6.8h, v8.8h
          fcvtps v6.4s, v8.4s
          fcvtps v6.2d, v8.2d
          fcvtps v4.2s, v0.2s
 
+// CHECK: fcvtps  v4.4h, v0.4h            // encoding: [0x04,0xa8,0xf9,0x0e]
+// CHECK: fcvtps  v6.8h, v8.8h            // encoding: [0x06,0xa9,0xf9,0x4e]
 // CHECK:	fcvtps	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x4e]
 // CHECK:	fcvtps	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x4e]
 // CHECK:	fcvtps	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x0e]
 
+         fcvtpu v4.4h, v0.4h
+         fcvtpu v6.8h, v8.8h
          fcvtpu v6.4s, v8.4s
          fcvtpu v6.2d, v8.2d
          fcvtpu v4.2s, v0.2s
 
+// CHECK: fcvtpu  v4.4h, v0.4h            // encoding: [0x04,0xa8,0xf9,0x2e]
+// CHECK: fcvtpu  v6.8h, v8.8h            // encoding: [0x06,0xa9,0xf9,0x6e]
 // CHECK:	fcvtpu	v6.4s, v8.4s            // encoding: [0x06,0xa9,0xa1,0x6e]
 // CHECK:	fcvtpu	v6.2d, v8.2d            // encoding: [0x06,0xa9,0xe1,0x6e]
 // CHECK:	fcvtpu	v4.2s, v0.2s            // encoding: [0x04,0xa8,0xa1,0x2e]
 
+         fcvtms v4.4h, v0.4h
+         fcvtms v6.8h, v8.8h
          fcvtms v6.4s, v8.4s
          fcvtms v6.2d, v8.2d
          fcvtms v4.2s, v0.2s
 
+// CHECK: fcvtms  v4.4h, v0.4h            // encoding: [0x04,0xb8,0x79,0x0e]
+// CHECK: fcvtms  v6.8h, v8.8h            // encoding: [0x06,0xb9,0x79,0x4e]
 // CHECK:	fcvtms	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x4e]
 // CHECK:	fcvtms	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x4e]
 // CHECK:	fcvtms	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x0e]
 
+         fcvtmu v4.4h, v0.4h
+         fcvtmu v6.8h, v8.8h
          fcvtmu v6.4s, v8.4s
          fcvtmu v6.2d, v8.2d
          fcvtmu v4.2s, v0.2s
 
+// CHECK: fcvtmu  v4.4h, v0.4h            // encoding: [0x04,0xb8,0x79,0x2e]
+// CHECK: fcvtmu  v6.8h, v8.8h            // encoding: [0x06,0xb9,0x79,0x6e]
 // CHECK:	fcvtmu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0x21,0x6e]
 // CHECK:	fcvtmu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0x61,0x6e]
 // CHECK:	fcvtmu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0x21,0x2e]
 
+         fcvtzs v4.4h, v0.4h
+         fcvtzs v6.8h, v8.8h
          fcvtzs v6.4s, v8.4s
          fcvtzs v6.2d, v8.2d
          fcvtzs v4.2s, v0.2s
 
+// CHECK: fcvtzs  v4.4h, v0.4h            // encoding: [0x04,0xb8,0xf9,0x0e]
+// CHECK: fcvtzs  v6.8h, v8.8h            // encoding: [0x06,0xb9,0xf9,0x4e]
 // CHECK:	fcvtzs	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x4e]
 // CHECK:	fcvtzs	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x4e]
 // CHECK:	fcvtzs	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x0e]
 
 
+         fcvtzu v4.4h, v0.4h
+         fcvtzu v6.8h, v8.8h
          fcvtzu v6.4s, v8.4s
          fcvtzu v6.2d, v8.2d
          fcvtzu v4.2s, v0.2s
 
+// CHECK: fcvtzu  v4.4h, v0.4h            // encoding: [0x04,0xb8,0xf9,0x2e]
+// CHECK: fcvtzu  v6.8h, v8.8h            // encoding: [0x06,0xb9,0xf9,0x6e]
 // CHECK:	fcvtzu	v6.4s, v8.4s            // encoding: [0x06,0xb9,0xa1,0x6e]
 // CHECK:	fcvtzu	v6.2d, v8.2d            // encoding: [0x06,0xb9,0xe1,0x6e]
 // CHECK:	fcvtzu	v4.2s, v0.2s            // encoding: [0x04,0xb8,0xa1,0x2e]
 
+         fcvtas v4.4h, v0.4h
+         fcvtas v6.8h, v8.8h
          fcvtas v6.4s, v8.4s
          fcvtas v6.2d, v8.2d
          fcvtas v4.2s, v0.2s
 
+// CHECK: fcvtas  v4.4h, v0.4h            // encoding: [0x04,0xc8,0x79,0x0e]
+// CHECK: fcvtas  v6.8h, v8.8h            // encoding: [0x06,0xc9,0x79,0x4e]
 // CHECK:	fcvtas	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x4e]
 // CHECK:	fcvtas	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x4e]
 // CHECK:	fcvtas	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x0e]
 
+         fcvtau v4.4h, v0.4h
+         fcvtau v6.8h, v8.8h
          fcvtau v6.4s, v8.4s
          fcvtau v6.2d, v8.2d
          fcvtau v4.2s, v0.2s
 
+// CHECK: fcvtau  v4.4h, v0.4h            // encoding: [0x04,0xc8,0x79,0x2e]
+// CHECK: fcvtau  v6.8h, v8.8h            // encoding: [0x06,0xc9,0x79,0x6e]
 // CHECK:	fcvtau	v6.4s, v8.4s            // encoding: [0x06,0xc9,0x21,0x6e]
 // CHECK:	fcvtau	v6.2d, v8.2d            // encoding: [0x06,0xc9,0x61,0x6e]
 // CHECK:	fcvtau	v4.2s, v0.2s            // encoding: [0x04,0xc8,0x21,0x2e]
@@ -603,42 +679,62 @@
 // CHECK:	ursqrte	v6.4s, v8.4s            // encoding: [0x06,0xc9,0xa1,0x6e]
 // CHECK:	ursqrte	v4.2s, v0.2s            // encoding: [0x04,0xc8,0xa1,0x2e]
 
+         scvtf v4.4h, v0.4h
+         scvtf v6.8h, v8.8h
          scvtf v6.4s, v8.4s
          scvtf v6.2d, v8.2d
          scvtf v4.2s, v0.2s
 
+// CHECK: scvtf   v4.4h, v0.4h            // encoding: [0x04,0xd8,0x79,0x0e]
+// CHECK: scvtf   v6.8h, v8.8h            // encoding: [0x06,0xd9,0x79,0x4e]
 // CHECK:	scvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x4e]
 // CHECK:	scvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x4e]
 // CHECK:	scvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x0e]
 
+         ucvtf v4.4h, v0.4h
+         ucvtf v6.8h, v8.8h
          ucvtf v6.4s, v8.4s
          ucvtf v6.2d, v8.2d
          ucvtf v4.2s, v0.2s
 
+// CHECK: ucvtf   v4.4h, v0.4h            // encoding: [0x04,0xd8,0x79,0x2e]
+// CHECK: ucvtf   v6.8h, v8.8h            // encoding: [0x06,0xd9,0x79,0x6e]
 // CHECK:	ucvtf	v6.4s, v8.4s            // encoding: [0x06,0xd9,0x21,0x6e]
 // CHECK:	ucvtf	v6.2d, v8.2d            // encoding: [0x06,0xd9,0x61,0x6e]
 // CHECK:	ucvtf	v4.2s, v0.2s            // encoding: [0x04,0xd8,0x21,0x2e]
 
+         frecpe v4.4h, v0.4h
+         frecpe v6.8h, v8.8h
          frecpe v6.4s, v8.4s
          frecpe v6.2d, v8.2d
          frecpe v4.2s, v0.2s
 
+// CHECK: frecpe  v4.4h, v0.4h            // encoding: [0x04,0xd8,0xf9,0x0e]
+// CHECK: frecpe  v6.8h, v8.8h            // encoding: [0x06,0xd9,0xf9,0x4e]
 // CHECK:	frecpe	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x4e]
 // CHECK:	frecpe	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x4e]
 // CHECK:	frecpe	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x0e]
 
+         frsqrte v4.4h, v0.4h
+         frsqrte v6.8h, v8.8h
          frsqrte v6.4s, v8.4s
          frsqrte v6.2d, v8.2d
          frsqrte v4.2s, v0.2s
 
+// CHECK: frsqrte v4.4h, v0.4h            // encoding: [0x04,0xd8,0xf9,0x2e]
+// CHECK: frsqrte v6.8h, v8.8h            // encoding: [0x06,0xd9,0xf9,0x6e]
 // CHECK:	frsqrte	v6.4s, v8.4s            // encoding: [0x06,0xd9,0xa1,0x6e]
 // CHECK:	frsqrte	v6.2d, v8.2d            // encoding: [0x06,0xd9,0xe1,0x6e]
 // CHECK:	frsqrte	v4.2s, v0.2s            // encoding: [0x04,0xd8,0xa1,0x2e]
 
+         fsqrt v4.4h, v0.4h
+         fsqrt v6.8h, v8.8h
          fsqrt v6.4s, v8.4s
          fsqrt v6.2d, v8.2d
          fsqrt v4.2s, v0.2s
 
+// CHECK: fsqrt   v4.4h, v0.4h            // encoding: [0x04,0xf8,0xf9,0x2e]
+// CHECK: fsqrt   v6.8h, v8.8h            // encoding: [0x06,0xf9,0xf9,0x6e]
 // CHECK:	fsqrt	v6.4s, v8.4s            // encoding: [0x06,0xf9,0xa1,0x6e]
 // CHECK:	fsqrt	v6.2d, v8.2d            // encoding: [0x06,0xf9,0xe1,0x6e]
 // CHECK:	fsqrt	v4.2s, v0.2s            // encoding: [0x04,0xf8,0xa1,0x2e]
diff --git a/test/MC/AArch64/neon-simd-shift.s b/test/MC/AArch64/neon-simd-shift.s
index a16432324efc..4638c535a6a7 100644
--- a/test/MC/AArch64/neon-simd-shift.s
+++ b/test/MC/AArch64/neon-simd-shift.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon -show-encoding < %s | FileCheck %s
+// RUN: llvm-mc -triple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -show-encoding < %s | FileCheck %s
 
 // Check that the assembler can handle the documented syntax for AArch64
 
@@ -400,16 +400,24 @@
 //------------------------------------------------------------------------------
 // Fixed-point convert to floating-point
 //------------------------------------------------------------------------------
+         scvtf v0.4h, v1.4h, #3
+         scvtf v0.8h, v1.8h, #3
          scvtf v0.2s, v1.2s, #3
          scvtf v0.4s, v1.4s, #3
          scvtf v0.2d, v1.2d, #3
+         ucvtf v0.4h, v1.4h, #3
+         ucvtf v0.8h, v1.8h, #3
          ucvtf v0.2s, v1.2s, #3
          ucvtf v0.4s, v1.4s, #3
          ucvtf v0.2d, v1.2d, #3
 
+// CHECK: scvtf v0.4h, v1.4h, #3        // encoding: [0x20,0xe4,0x1d,0x0f]
+// CHECK: scvtf v0.8h, v1.8h, #3        // encoding: [0x20,0xe4,0x1d,0x4f]
 // CHECK:	scvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x0f]
 // CHECK:	scvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x4f]
 // CHECK:	scvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x4f]
+// CHECK:	ucvtf v0.4h, v1.4h, #3        // encoding: [0x20,0xe4,0x1d,0x2f]
+// CHECK:	ucvtf v0.8h, v1.8h, #3        // encoding: [0x20,0xe4,0x1d,0x6f]
 // CHECK:	ucvtf	v0.2s, v1.2s, #3        // encoding: [0x20,0xe4,0x3d,0x2f]
 // CHECK:	ucvtf	v0.4s, v1.4s, #3        // encoding: [0x20,0xe4,0x3d,0x6f]
 // CHECK:	ucvtf	v0.2d, v1.2d, #3        // encoding: [0x20,0xe4,0x7d,0x6f]
@@ -417,17 +425,25 @@
 //------------------------------------------------------------------------------
 // Floating-point convert to fixed-point
 //------------------------------------------------------------------------------
+         fcvtzs v0.4h, v1.4h, #3
+         fcvtzs v0.8h, v1.8h, #3
          fcvtzs v0.2s, v1.2s, #3
          fcvtzs v0.4s, v1.4s, #3
          fcvtzs v0.2d, v1.2d, #3
+         fcvtzu v0.4h, v1.4h, #3
+         fcvtzu v0.8h, v1.8h, #3
          fcvtzu v0.2s, v1.2s, #3
          fcvtzu v0.4s, v1.4s, #3
          fcvtzu v0.2d, v1.2d, #3
 
 
+// CHECK:	fcvtzs  v0.4h, v1.4h, #3        // encoding: [0x20,0xfc,0x1d,0x0f]
+// CHECK:	fcvtzs  v0.8h, v1.8h, #3        // encoding: [0x20,0xfc,0x1d,0x4f]
 // CHECK:	fcvtzs	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x0f]
 // CHECK:	fcvtzs	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x4f]
 // CHECK:	fcvtzs	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x4f]
+// CHECK:	fcvtzu  v0.4h, v1.4h, #3        // encoding: [0x20,0xfc,0x1d,0x2f]
+// CHECK:	fcvtzu  v0.8h, v1.8h, #3        // encoding: [0x20,0xfc,0x1d,0x6f]
 // CHECK:	fcvtzu	v0.2s, v1.2s, #3        // encoding: [0x20,0xfc,0x3d,0x2f]
 // CHECK:	fcvtzu	v0.4s, v1.4s, #3        // encoding: [0x20,0xfc,0x3d,0x6f]
 // CHECK:	fcvtzu	v0.2d, v1.2d, #3        // encoding: [0x20,0xfc,0x7d,0x6f]
diff --git a/test/MC/AArch64/noneon-diagnostics.s b/test/MC/AArch64/noneon-diagnostics.s
index 60a5fd208af9..309df3697df8 100644
--- a/test/MC/AArch64/noneon-diagnostics.s
+++ b/test/MC/AArch64/noneon-diagnostics.s
@@ -26,4 +26,19 @@
 // CHECK-ERROR-NEXT:    ^
 // CHECK-ERROR-NEXT: error: instruction requires: neon
 // CHECK-ERROR-NEXT:    fmls v9.2s, v9.2s, v0.2s
+// CHECK-ERROR-NEXT:    ^
+
+
+        fmls.4s v3, v12, v17
+        fmls.2d v1, v30, v20
+        fmls.2s v9, v9, v0
+
+// CHECK-ERROR: error: instruction requires: neon
+// CHECK-ERROR-NEXT:    fmls.4s v3, v12, v17
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires: neon
+// CHECK-ERROR-NEXT:    fmls.2d v1, v30, v20
+// CHECK-ERROR-NEXT:    ^
+// CHECK-ERROR-NEXT: error: instruction requires: neon
+// CHECK-ERROR-NEXT:    fmls.2s v9, v9, v0
 // CHECK-ERROR-NEXT:    ^
diff --git a/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s
new file mode 100644
index 000000000000..aa0a1ab86460
--- /dev/null
+++ b/test/MC/AMDGPU/buffer_wbinv1l_vol_vi.s
@@ -0,0 +1,7 @@
+// XFAIL: *
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=VI %s
+
+; When assembled, this emits a different encoding value than codegen for the intrinsic
+
+buffer_wbinvl1_vol
+// VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/flat-scratch.s b/test/MC/AMDGPU/flat-scratch.s
new file mode 100644
index 000000000000..0664c80378db
--- /dev/null
+++ b/test/MC/AMDGPU/flat-scratch.s
@@ -0,0 +1,33 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s 2>&1 | FileCheck -check-prefix=SI -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s | FileCheck -check-prefix=CI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s  | FileCheck -check-prefix=VI %s
+
+// Add a different RUN line for the failing checks, because when stderr and stdout are mixed the
+// order things are printed is not deterministic.
+// RUN: not llvm-mc -arch=amdgcn -mcpu=hawaii -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN %s
+
+s_mov_b64 flat_scratch, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x04,0xe8,0xbe]
+// VI: s_mov_b64 flat_scratch, -1 ; encoding: [0xc1,0x01,0xe6,0xbe]
+
+s_mov_b32 flat_scratch_lo, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x03,0xe8,0xbe]
+// VI: s_mov_b32 flat_scratch_lo, -1 ; encoding: [0xc1,0x00,0xe6,0xbe]
+
+s_mov_b32 flat_scratch_hi, -1
+// SI: error: invalid operand for instruction
+// CI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x03,0xe9,0xbe]
+// VI: s_mov_b32 flat_scratch_hi, -1 ; encoding: [0xc1,0x00,0xe7,0xbe]
+
+
+s_mov_b64 flat_scratch_lo, -1
+// GCN: error: invalid operand for instruction
+
+s_mov_b64 flat_scratch_hi, -1
+// GCN: error: invalid operand for instruction
+
+s_mov_b32 flat_scratch, -1
+// GCN: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/flat.s b/test/MC/AMDGPU/flat.s
index adad29a5595b..20e6c042733c 100644
--- a/test/MC/AMDGPU/flat.s
+++ b/test/MC/AMDGPU/flat.s
@@ -1,143 +1,162 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=CI
-// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=CIVI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=CIVI --check-prefix=VI
 
-// FIXME: These instructions give an 'invalid operand' error on SI and should
-// instead be reporting an 'instruction not supported' error.
+// FIXME: For missing instruction the error message is:
+//  error: too few operands for instruction
+// It should be:
+//  error: instruction not supported on this GPU
+//
 
-// XUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=NOVI
-// XUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
-// XUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOVI
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSI
 
 //===----------------------------------------------------------------------===//
 // Operands
 //===----------------------------------------------------------------------===//
 
 flat_load_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4] glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4] glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] glc slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] glc tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4] slc glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4] slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] slc glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] slc tfe glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] tfe glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x31,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x51,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x32,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x52,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] tfe glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_load_dword v1, v[3:4] tfe slc glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x33,0xdc,0x03,0x00,0x80,0x01]
+// VI: flat_load_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x53,0xdc,0x03,0x00,0x80,0x01]
 
 flat_store_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4] glc slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4] glc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] glc slc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] glc tfe slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] slc ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4] slc glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4] slc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] slc glc tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] slc tfe glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] tfe
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] tfe ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] tfe glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc tfe ; encoding: [0x00,0x00,0x71,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] tfe slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] slc tfe ; encoding: [0x00,0x00,0x72,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] tfe glc slc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 flat_store_dword v1, v[3:4] tfe slc glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] glc slc tfe ; encoding: [0x00,0x00,0x73,0xdc,0x03,0x01,0x80,0x00]
 
 // FIXME: For atomic instructions, glc must be placed immediately following
@@ -151,327 +170,406 @@ flat_store_dword v1, v[3:4] tfe slc glc
 // flat_atomic_add v1, v[3:4], v5 tfe slc glc
 
 flat_atomic_add v1 v[3:4], v5 glc slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_add v1 v[3:4], v5 glc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc tfe ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x80,0x01]
 
 flat_atomic_add v1 v[3:4], v5 glc slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01]
 
 flat_atomic_add v1 v[3:4], v5 glc tfe slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0xcb,0xdc,0x03,0x05,0x80,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc slc tfe ; encoding: [0x00,0x00,0x0b,0xdd,0x03,0x05,0x80,0x01]
 
 flat_atomic_add v[3:4], v5 slc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add v[3:4], v5 slc ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_add v[3:4], v5 slc tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x80,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0xca,0xdc,0x03,0x05,0x80,0x00]
+// VI: flat_atomic_add v[3:4], v5 slc tfe ; encoding: [0x00,0x00,0x0a,0xdd,0x03,0x05,0x80,0x00]
 
 flat_atomic_add v[3:4], v5 tfe
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x80,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x80,0x00]
+// VI: flat_atomic_add v[3:4], v5 tfe ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x80,0x00]
 
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
 
 flat_load_ubyte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x20,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_ubyte v1, v[3:4] ; encoding: [0x00,0x00,0x40,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_sbyte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x24,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_sbyte v1, v[3:4] ; encoding: [0x00,0x00,0x44,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_ushort v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x28,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_ushort v1, v[3:4] ; encoding: [0x00,0x00,0x48,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_sshort v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x2c,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_sshort v1, v[3:4] ; encoding: [0x00,0x00,0x4c,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x30,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dword v1, v[3:4] ; encoding: [0x00,0x00,0x50,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dwordx2 v[1:2], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01]
+// NOSI: error:
+// CI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x34,0xdc,0x03,0x00,0x00,0x01]
+// VI: flat_load_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x54,0xdc,0x03,0x00,0x00,0x01]
 
 flat_load_dwordx4 v[5:8], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05]
+// NOSI: error:
+// CI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x38,0xdc,0x03,0x00,0x00,0x05]
+// VI: flat_load_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x5c,0xdc,0x03,0x00,0x00,0x05]
 
 flat_load_dwordx3 v[5:7], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05]
+// NOSI: error:
+// CI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x3c,0xdc,0x03,0x00,0x00,0x05]
+// VI: flat_load_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x58,0xdc,0x03,0x00,0x00,0x05]
 
 flat_store_byte v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_byte v1, v[3:4] ; encoding: [0x00,0x00,0x60,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_short v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_short v1, v[3:4] ; encoding: [0x00,0x00,0x68,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dword v1, v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dword v1, v[3:4] ; encoding: [0x00,0x00,0x70,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dwordx2 v[1:2], v[3:4]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CIVI: flat_store_dwordx2 v[1:2], v[3:4] ; encoding: [0x00,0x00,0x74,0xdc,0x03,0x01,0x00,0x00]
 
 flat_store_dwordx4 v[5:8], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_store_dwordx4 v[5:8], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
 
 flat_store_dwordx3 v[5:7], v[3:4]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x7c,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_store_dwordx3 v[5:7], v[3:4] ; encoding: [0x00,0x00,0x78,0xdc,0x03,0x05,0x00,0x00]
 
 flat_atomic_swap v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0xc0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_swap v[3:4], v5 ; encoding: [0x00,0x00,0x00,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_swap v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_swap v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x01,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_cmpswap v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xc4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_cmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0x04,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_cmpswap v1, v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xc5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_cmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x05,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_add v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0xc8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add v[3:4], v5 ; encoding: [0x00,0x00,0x08,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_add v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xc9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x09,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_sub v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0xcc,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_sub v[3:4], v5 ; encoding: [0x00,0x00,0x0c,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_sub v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xcd,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_sub v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x0d,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_smin v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0xd4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smin v[3:4], v5 ; encoding: [0x00,0x00,0x10,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_smin v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x11,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_umin v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0xd8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umin v[3:4], v5 ; encoding: [0x00,0x00,0x14,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_umin v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xd9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umin v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x15,0xdd,0x03,0x05,0x00,0x01]
 
-flat_atomic_smax v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00]
+flat_atomic_smax v[3:4], v5,
+// NOSI: error:
+// CI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0xdc,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smax v[3:4], v5 ; encoding: [0x00,0x00,0x18,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_smax v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xdd,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x19,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_umax v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0xe0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umax v[3:4], v5 ; encoding: [0x00,0x00,0x1c,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_umax v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umax v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x1d,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_and v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0xe4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_and v[3:4], v5 ; encoding: [0x00,0x00,0x20,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_and v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_and v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x21,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_or v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0xe8,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_or v[3:4], v5 ; encoding: [0x00,0x00,0x24,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_or v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xe9,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_or v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x25,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_xor v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0xec,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_xor v[3:4], v5 ; encoding: [0x00,0x00,0x28,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_xor v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xed,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_xor v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x29,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_inc v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0xf0,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_inc v[3:4], v5 ; encoding: [0x00,0x00,0x2c,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_inc v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf1,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_inc v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x2d,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_dec v[3:4], v5
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0xf4,0xdc,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_dec v[3:4], v5 ; encoding: [0x00,0x00,0x30,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_dec v1, v[3:4], v5 glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0xf5,0xdc,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_dec v1, v[3:4], v5 glc ; encoding: [0x00,0x00,0x31,0xdd,0x03,0x05,0x00,0x01]
+
+flat_atomic_fcmpswap v[3:4], v[5:6]
+// NOSI: error:
+// CI: flat_atomic_fcmpswap v[3:4], v[5:6] ; encoding: [0x00,0x00,0xf8,0xdc,0x03,0x05,0x00,0x00]
+// NOVI: error:
+
+flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc
+// NOSI: error:
+// CI: flat_atomic_fcmpswap v1, v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xf9,0xdc,0x03,0x05,0x00,0x01]
+// NOVI: error:
 
 flat_atomic_swap_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x40,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_swap_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x41,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_swap_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_cmpswap_x2 v[3:4], v[5:8]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x44,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_cmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x84,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x45,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_cmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x85,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_add_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x48,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_add_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x88,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x49,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_add_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x89,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_sub_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x4c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_sub_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x8c,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x4d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_sub_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x8d,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_smin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x54,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x90,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x55,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x91,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_umin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x58,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x94,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01] 
+// NOSI: error:
+// CI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x59,0xdd,0x03,0x05,0x00,0x01] 
+// VI: flat_atomic_umin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x95,0xdd,0x03,0x05,0x00,0x01] 
 
 flat_atomic_smax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x5c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_smax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x98,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x5d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_smax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x99,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_umax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x60,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_umax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x9c,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x61,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_umax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x9d,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_and_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x64,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_and_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa0,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x65,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_and_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa1,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_or_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x68,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_or_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa4,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x69,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_or_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa5,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_xor_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x6c,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_xor_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xa8,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x6d,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_xor_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xa9,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_inc_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x70,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_inc_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xac,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x71,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_inc_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xad,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_dec_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00]
+// NOSI: error:
+// CI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x74,0xdd,0x03,0x05,0x00,0x00]
+// VI: flat_atomic_dec_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0xb0,0xdd,0x03,0x05,0x00,0x00]
 
 flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
-// CIVI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01]
+// NOSI: error:
+// CI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x75,0xdd,0x03,0x05,0x00,0x01]
+// VI: flat_atomic_dec_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0xb1,0xdd,0x03,0x05,0x00,0x01]
 
 flat_atomic_fcmpswap_x2 v[3:4], v[5:8]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fcmpswap_x2 v[3:4], v[5:8] ; encoding: [0x00,0x00,0x78,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
 
 flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fcmpswap_x2 v[1:2], v[3:4], v[5:8] glc ; encoding: [0x00,0x00,0x79,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
 
 flat_atomic_fmin_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fmin_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x7c,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
 
 flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fmin_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x7d,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
 
 flat_atomic_fmax_x2 v[3:4], v[5:6]
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fmax_x2 v[3:4], v[5:6] ; encoding: [0x00,0x00,0x80,0xdd,0x03,0x05,0x00,0x00]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
 
 flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc
-// NOSI: error: instruction not supported on this GPU
+// NOSI: error:
 // CI: flat_atomic_fmax_x2 v[1:2], v[3:4], v[5:6] glc ; encoding: [0x00,0x00,0x81,0xdd,0x03,0x05,0x00,0x01]
-// NOVI: error: instruction not supported on this GPU
+// NOVI: error:
diff --git a/test/MC/AMDGPU/hsa-text.s b/test/MC/AMDGPU/hsa-text.s
new file mode 100644
index 000000000000..1d2f1f1619e1
--- /dev/null
+++ b/test/MC/AMDGPU/hsa-text.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+
+// For compatibility reasons we treat convert .text sections to .hsatext
+
+// ELF: Section {
+
+// We want to avoid emitting an empty .text section.
+// ELF-NOT: Name: .text
+
+// ELF: Name: .hsatext
+// ELF: Type: SHT_PROGBITS (0x1)
+// ELF: Flags [ (0xC00007)
+// ELF: SHF_ALLOC (0x2)
+// ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+// ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+// ELF: SHF_EXECINSTR (0x4)
+// ELF: SHF_WRITE (0x1)
+// ELF: Size: 260
+// ELF: }
+
+.hsa_code_object_version 1,0
+// ASM: .hsa_code_object_version 1,0
+
+.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+
+.text
+// ASM: .hsatext
+
+.amd_kernel_code_t
+.end_amd_kernel_code_t
+
+s_endpgm
diff --git a/test/MC/AMDGPU/hsa.s b/test/MC/AMDGPU/hsa.s
index 7dfea0fe787e..bfdcfb432923 100644
--- a/test/MC/AMDGPU/hsa.s
+++ b/test/MC/AMDGPU/hsa.s
@@ -1,5 +1,15 @@
 // RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | FileCheck %s --check-prefix=ASM
-// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -s -sd | FileCheck %s --check-prefix=ELF
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
+
+// ELF: Section {
+// ELF: Name: .hsatext
+// ELF: Type: SHT_PROGBITS (0x1)
+// ELF: Flags [ (0xC00007)
+// ELF: SHF_ALLOC (0x2)
+// ELF: SHF_AMDGPU_HSA_AGENT (0x800000)
+// ELF: SHF_AMDGPU_HSA_CODE (0x400000)
+// ELF: SHF_EXECINSTR (0x4)
+// ELF: SHF_WRITE (0x1)
 
 // ELF: SHT_NOTE
 // ELF: 0000: 04000000 08000000 01000000 414D4400
@@ -8,13 +18,30 @@
 // ELF: 0030: 00000000 00000000 414D4400 414D4447
 // ELF: 0040: 50550000
 
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_minimal
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .hsatext
+// ELF: }
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_test_all
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .hsatext
+// ELF: }
+
+
 .hsa_code_object_version 1,0
 // ASM: .hsa_code_object_version 1,0
 
 .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 // ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
 
-.text
+.amdgpu_hsa_kernel amd_kernel_code_t_test_all
+.amdgpu_hsa_kernel amd_kernel_code_t_minimal
+
+.hsatext
+// ASM: .hsatext
+
 amd_kernel_code_t_test_all:
 ; Test all amd_kernel_code_t members with non-default values.
 .amd_kernel_code_t
diff --git a/test/MC/AMDGPU/mubuf.s b/test/MC/AMDGPU/mubuf.s
index 78d365abef13..18cca7022699 100644
--- a/test/MC/AMDGPU/mubuf.s
+++ b/test/MC/AMDGPU/mubuf.s
@@ -1,5 +1,9 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOCI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=NOVI %s
 
 //===----------------------------------------------------------------------===//
 // Test for different operand combinations
@@ -10,343 +14,359 @@
 //===----------------------------------------------------------------------===//
 
 buffer_load_dword v1, s[4:7], s1
-// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dword v1, s[4:7], s1 offset:4
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dword v1, s[4:7], s1 offset:4 glc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dword v1, s[4:7], s1 offset:4 slc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x41,0x01]
 
 buffer_load_dword v1, s[4:7], s1 offset:4 tfe
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x30,0xe0,0x00,0x01,0x81,0x01]
 
 buffer_load_dword v1, s[4:7], s1 tfe glc
-// CHECK: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x30,0xe0,0x00,0x01,0x81,0x01]
 
 buffer_load_dword v1, s[4:7], s1 offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
 
 buffer_load_dword v1, s[4:7], s1 glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x30,0xe0,0x00,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // load - vgpr offset
 //===----------------------------------------------------------------------===//
 
 buffer_load_dword v1, v2, s[4:7], s1 offen
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen tfe glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // load - vgpr index
 //===----------------------------------------------------------------------===//
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen tfe glc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_load_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // load - vgpr index and offset
 //===----------------------------------------------------------------------===//
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // load - addr64
 //===----------------------------------------------------------------------===//
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x30,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
-// CHECK: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_load_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x30,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - immediate offset only
 //===----------------------------------------------------------------------===//
 
 buffer_store_dword v1, s[4:7], s1
-// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dword v1, s[4:7], s1 offset:4
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dword v1, s[4:7], s1 offset:4 glc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dword v1, s[4:7], s1 offset:4 slc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 slc ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x41,0x01]
 
 buffer_store_dword v1, s[4:7], s1 offset:4 tfe
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 tfe ; encoding: [0x04,0x00,0x70,0xe0,0x00,0x01,0x81,0x01]
 
 buffer_store_dword v1, s[4:7], s1 tfe glc
-// CHECK: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 glc tfe ; encoding: [0x00,0x40,0x70,0xe0,0x00,0x01,0x81,0x01]
 
 buffer_store_dword v1, s[4:7], s1 offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
 
 buffer_store_dword v1, s[4:7], s1 glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 offset:4 glc slc tfe ; encoding: [0x04,0x40,0x70,0xe0,0x00,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - vgpr offset
 //===----------------------------------------------------------------------===//
 
 buffer_store_dword v1, v2, s[4:7], s1 offen
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen ; encoding: [0x00,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen  offset:4 glc ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 slc ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 tfe ; encoding: [0x04,0x10,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen tfe glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe ; encoding: [0x00,0x50,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 offen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 offen offset:4 glc slc tfe ; encoding: [0x04,0x50,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - vgpr index
 //===----------------------------------------------------------------------===//
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen ; encoding: [0x00,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 slc ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 tfe ; encoding: [0x04,0x20,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen tfe glc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe ; encoding: [0x00,0x60,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_store_dword v1, v2, s[4:7], s1 idxen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v2, s[4:7], s1 idxen offset:4 glc slc tfe ; encoding: [0x04,0x60,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - vgpr index and offset
 //===----------------------------------------------------------------------===//
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen ; encoding: [0x00,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 slc ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 tfe ; encoding: [0x04,0x30,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen tfe glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe ; encoding: [0x00,0x70,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 idxen offen offset:4 glc slc tfe ; encoding: [0x04,0x70,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // store - addr64
 //===----------------------------------------------------------------------===//
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 ; encoding: [0x00,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0x01,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 slc ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x41,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 tfe ; encoding: [0x04,0x80,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 tfe glc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe ; encoding: [0x00,0xc0,0x70,0xe0,0x02,0x01,0x81,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc tfe slc
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 glc tfe slc offset:4
-// CHECK: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
+// SICI: buffer_store_dword v1, v[2:3], s[4:7], s1 addr64 offset:4 glc slc tfe ; encoding: [0x04,0xc0,0x70,0xe0,0x02,0x01,0xc1,0x01]
 
 //===----------------------------------------------------------------------===//
 // Instructions
 //===----------------------------------------------------------------------===//
 
 buffer_load_format_x v1, s[4:7], s1
-// CHECK: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x00,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_format_xy v[1:2], s[4:7], s1
-// CHECK: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x04,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_format_xyz v[1:3], s[4:7], s1
-// CHECK: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x08,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_format_xyzw v[1:4], s[4:7], s1
-// CHECK: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x0c,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_format_x v1, s[4:7], s1
-// CHECK: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_x v1, s[4:7], s1 ; encoding: [0x00,0x00,0x10,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_format_xy v[1:2], s[4:7], s1
-// CHECK: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xy v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x14,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_format_xyz v[1:3], s[4:7], s1
-// CHECK: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xyz v[1:3], s[4:7], s1 ; encoding: [0x00,0x00,0x18,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_format_xyzw v[1:4], s[4:7], s1
-// CHECK: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_format_xyzw v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x1c,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_ubyte v1, s[4:7], s1
-// CHECK: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_ubyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x20,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_sbyte v1, s[4:7], s1
-// CHECK: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_sbyte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x24,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_ushort v1, s[4:7], s1
-// CHECK: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_ushort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_sshort v1, s[4:7], s1
-// CHECK: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_sshort v1, s[4:7], s1 ; encoding: [0x00,0x00,0x2c,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dword v1, s[4:7], s1
-// CHECK: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x30,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dwordx2 v[1:2], s[4:7], s1
-// CHECK: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x34,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_load_dwordx4 v[1:4], s[4:7], s1
-// CHECK: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_load_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x38,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_byte v1, s[4:7], s1
-// CHECK: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_byte v1, s[4:7], s1 ; encoding: [0x00,0x00,0x60,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_short v1, s[4:7], s1
-// CHECK: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_short v1, s[4:7], s1 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dword v1 s[4:7], s1
-// CHECK: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dword v1, s[4:7], s1 ; encoding: [0x00,0x00,0x70,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dwordx2 v[1:2], s[4:7], s1
-// CHECK: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dwordx2 v[1:2], s[4:7], s1 ; encoding: [0x00,0x00,0x74,0xe0,0x00,0x01,0x01,0x01]
 
 buffer_store_dwordx4 v[1:4], s[4:7], s1
-// CHECK: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+// SICI: buffer_store_dwordx4 v[1:4], s[4:7], s1 ; encoding: [0x00,0x00,0x78,0xe0,0x00,0x01,0x01,0x01]
+
+//===----------------------------------------------------------------------===//
+// Cache invalidation
+//===----------------------------------------------------------------------===//
+
+buffer_wbinvl1
+// SICI: buffer_wbinvl1   ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
+
+buffer_wbinvl1_sc
+// SI: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+// NOCI: error: instruction not supported on this GPU
+// NOVI: error: instruction not supported on this GPU
+
+buffer_wbinvl1_vol
+// CI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
+// NOSI: error: instruction not supported on this GPU
 
 // TODO: Atomics
diff --git a/test/MC/AMDGPU/out-of-range-registers.s b/test/MC/AMDGPU/out-of-range-registers.s
new file mode 100644
index 000000000000..947c64d3e642
--- /dev/null
+++ b/test/MC/AMDGPU/out-of-range-registers.s
@@ -0,0 +1,62 @@
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck %s
+
+s_add_i32 s104, s0, s1
+// CHECK: error: invalid operand for instruction
+
+s_add_i32 s105, s0, s1
+// CHECK: error: invalid operand for instruction
+
+v_add_i32 v256, v0, v1
+// CHECK: error: invalid operand for instruction
+
+v_add_i32 v257, v0, v1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[0:17], -1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[103:104], -1
+// CHECK: error: invalid operand for instruction
+
+s_mov_b64 s[104:105], -1
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[102:105], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[104:108], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[108:112], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[1:4], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx4 s[1:4], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[104:111], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[100:107], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx8 s[108:115], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[92:107], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[96:111], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[100:115], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[104:119], s[2:3], s4
+// CHECK: error: invalid operand for instruction
+
+s_load_dwordx16 s[108:123], s[2:3], s4
+// CHECK: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/smem.s b/test/MC/AMDGPU/smem.s
new file mode 100644
index 000000000000..8fa964ca8d1e
--- /dev/null
+++ b/test/MC/AMDGPU/smem.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=NOSI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire %s 2>&1 | FileCheck -check-prefix=NOSI %s
+
+s_dcache_wb
+; VI: s_dcache_wb  ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
+; NOSI: error: instruction not supported on this GPU
+
+s_dcache_wb_vol
+; VI: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
+; NOSI: error: instruction not supported on this GPU
diff --git a/test/MC/AMDGPU/smrd-err.s b/test/MC/AMDGPU/smrd-err.s
new file mode 100644
index 000000000000..a607e91756da
--- /dev/null
+++ b/test/MC/AMDGPU/smrd-err.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+s_load_dwordx4 s[100:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: s_load_dwordx4 s[100:103], s[2:3], s4
+
+
+s_load_dwordx8 s[96:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: 	s_load_dwordx8 s[96:103], s[2:3], s4
+
+s_load_dwordx16 s[88:103], s[2:3], s4
+// VI: error: invalid operand for instruction
+// SI: s_load_dwordx16 s[88:103], s[2:3], s4
diff --git a/test/MC/AMDGPU/smrd.s b/test/MC/AMDGPU/smrd.s
index b67abf7e6890..56841914c6f0 100644
--- a/test/MC/AMDGPU/smrd.s
+++ b/test/MC/AMDGPU/smrd.s
@@ -1,32 +1,69 @@
-// RUN: llvm-mc -arch=amdgcn -show-encoding %s | FileCheck %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SI  %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=SI %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck --check-prefix=GCN --check-prefix=CI %s
+
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s --check-prefix=NOSI
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI  %s 2>&1 | FileCheck %s --check-prefix=NOSI
+//===----------------------------------------------------------------------===//
+// Offset Handling
+//===----------------------------------------------------------------------===//
+
+s_load_dword s1, s[2:3], 0xfc
+// GCN: s_load_dword s1, s[2:3], 0xfc ; encoding: [0xfc,0x83,0x00,0xc0]
+
+s_load_dword s1, s[2:3], 0xff
+// GCN: s_load_dword s1, s[2:3], 0xff ; encoding: [0xff,0x83,0x00,0xc0]
+
+s_load_dword s1, s[2:3], 0x100
+// NOSI: error: instruction not supported on this GPU
+// CI: s_load_dword s1, s[2:3], 0x100 ; encoding: [0xff,0x82,0x00,0xc0,0x00,0x01,0x00,0x00]
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
 
 s_load_dword s1, s[2:3], 1
-// CHECK: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
+// GCN: s_load_dword s1, s[2:3], 0x1 ; encoding: [0x01,0x83,0x00,0xc0]
 
 s_load_dword s1, s[2:3], s4
-// CHECK: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
+// GCN: s_load_dword s1, s[2:3], s4 ; encoding: [0x04,0x82,0x00,0xc0]
 
 s_load_dwordx2 s[2:3], s[2:3], 1
-// CHECK: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
+// GCN: s_load_dwordx2 s[2:3], s[2:3], 0x1 ; encoding: [0x01,0x03,0x41,0xc0]
 
 s_load_dwordx2 s[2:3], s[2:3], s4
-// CHECK: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
+// GCN: s_load_dwordx2 s[2:3], s[2:3], s4 ; encoding: [0x04,0x02,0x41,0xc0]
 
 s_load_dwordx4 s[4:7], s[2:3], 1
-// CHECK: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
+// GCN: s_load_dwordx4 s[4:7], s[2:3], 0x1 ; encoding: [0x01,0x03,0x82,0xc0]
 
 s_load_dwordx4 s[4:7], s[2:3], s4
-// CHECK: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
+// GCN: s_load_dwordx4 s[4:7], s[2:3], s4 ; encoding: [0x04,0x02,0x82,0xc0]
+
+s_load_dwordx4 s[100:103], s[2:3], s4
+// GCN: s_load_dwordx4 s[100:103], s[2:3], s4 ; encoding: [0x04,0x02,0xb2,0xc0]
 
 s_load_dwordx8 s[8:15], s[2:3], 1
-// CHECK: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
+// GCN: s_load_dwordx8 s[8:15], s[2:3], 0x1 ; encoding: [0x01,0x03,0xc4,0xc0]
 
 s_load_dwordx8 s[8:15], s[2:3], s4
-// CHECK: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
+// GCN: s_load_dwordx8 s[8:15], s[2:3], s4 ; encoding: [0x04,0x02,0xc4,0xc0]
+
+s_load_dwordx8 s[96:103], s[2:3], s4
+// GCN: s_load_dwordx8 s[96:103], s[2:3], s4 ; encoding: [0x04,0x02,0xf0,0xc0]
 
 s_load_dwordx16 s[16:31], s[2:3], 1
-// CHECK: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
+// GCN: s_load_dwordx16 s[16:31], s[2:3], 0x1 ; encoding: [0x01,0x03,0x08,0xc1]
 
 s_load_dwordx16 s[16:31], s[2:3], s4
-// CHECK: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
+// GCN: s_load_dwordx16 s[16:31], s[2:3], s4 ; encoding: [0x04,0x02,0x08,0xc1]
+
+s_load_dwordx16 s[88:103], s[2:3], s4
+// GCN: s_load_dwordx16 s[88:103], s[2:3], s4 ; encoding: [0x04,0x02,0x2c,0xc1]
+
+s_dcache_inv
+// GCN: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
+
+s_dcache_inv_vol
+// CI: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
+// NOSI: error: instruction not supported on this GPU
diff --git a/test/MC/AMDGPU/sop1-err.s b/test/MC/AMDGPU/sop1-err.s
index f892356b623d..ee1d383c4469 100644
--- a/test/MC/AMDGPU/sop1-err.s
+++ b/test/MC/AMDGPU/sop1-err.s
@@ -1,37 +1,61 @@
-// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -arch=amdgcn -mcpu=SI %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn %s 2>&1 | FileCheck -check-prefix=GCN %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
 s_mov_b32 v1, s2
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b32 s1, v0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b32 s[1:2], s0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b32 s0, s[1:2]
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b32 s220, s0
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b32 s0, s220
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b64 s1, s[0:1]
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 s_mov_b64 s[0:1], s1
-// CHECK: error: invalid operand for instruction
+// GCN: error: invalid operand for instruction
 
 // Immediate greater than 32-bits
 s_mov_b32 s1, 0xfffffffff
-// CHECK: error: invalid immediate: only 32-bit values are legal
+// GCN: error: invalid immediate: only 32-bit values are legal
 
 // Immediate greater than 32-bits
 s_mov_b64 s[0:1], 0xfffffffff
-// CHECK: error: invalid immediate: only 32-bit values are legal
+// GCN: error: invalid immediate: only 32-bit values are legal
 
-// Out of range register
+s_mov_b64 s[0:1], 0xfffffffff
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+s_mov_b64 s[0:1], 0xfffffffff
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+s_mov_b64 s[0:1], 0x0000000200000000
+// GCN: error: invalid immediate: only 32-bit values are legal
+
+// FIXME: This shoudl probably say failed to parse.
 s_mov_b32 s
+// GCN: error: invalid operand for instruction
+// Out of range register
+
+s_mov_b32 s102, 1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
+
+s_mov_b32 s103, 1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
+
+s_mov_b64 s[102:103], -1
+// VI: error: invalid operand for instruction
+// SI-NOT: error
diff --git a/test/MC/AMDGPU/sop1.s b/test/MC/AMDGPU/sop1.s
index 92ca73f25004..5f63f9930747 100644
--- a/test/MC/AMDGPU/sop1.s
+++ b/test/MC/AMDGPU/sop1.s
@@ -10,12 +10,29 @@ s_mov_b32 s1, 1
 s_mov_b32 s1, 100
 // CHECK: s_mov_b32 s1, 0x64 ; encoding: [0xff,0x03,0x81,0xbe,0x64,0x00,0x00,0x00]
 
+// Literal constant sign bit
+s_mov_b32 s1, 0x80000000
+// CHECK: s_mov_b32 s1, 0x80000000 ; encoding: [0xff,0x03,0x81,0xbe,0x00,0x00,0x00,0x80]
+
+// Negative 32-bit constant
+s_mov_b32 s0, 0xfe5163ab
+// CHECK: s_mov_b32 s0, 0xfe5163ab ; encoding: [0xff,0x03,0x80,0xbe,0xab,0x63,0x51,0xfe]
+
 s_mov_b64 s[2:3], s[4:5]
 // CHECK: s_mov_b64 s[2:3], s[4:5] ; encoding: [0x04,0x04,0x82,0xbe]
 
 s_mov_b64 s[2:3], 0xffffffffffffffff
 // CHECK: s_mov_b64 s[2:3], -1 ; encoding: [0xc1,0x04,0x82,0xbe]
 
+s_mov_b64 s[2:3], 0xffffffff
+// CHECK: s_mov_b64 s[2:3], 0xffffffff ; encoding: [0xff,0x04,0x82,0xbe,0xff,0xff,0xff,0xff]
+
+s_mov_b64 s[0:1], 0x80000000
+// CHECK: s_mov_b64 s[0:1], 0x80000000 ; encoding: [0xff,0x04,0x80,0xbe,0x00,0x00,0x00,0x80]
+
+s_mov_b64 s[102:103], -1
+// CHECK: s_mov_b64 s[102:103], -1 ; encoding: [0xc1,0x04,0xe6,0xbe]
+
 s_cmov_b32 s1, 200
 // CHECK: s_cmov_b32 s1, 0xc8 ; encoding: [0xff,0x05,0x81,0xbe,0xc8,0x00,0x00,0x00]
 
diff --git a/test/MC/AMDGPU/sop2.s b/test/MC/AMDGPU/sop2.s
index 9a7a1c01064b..1fdc47aa616c 100644
--- a/test/MC/AMDGPU/sop2.s
+++ b/test/MC/AMDGPU/sop2.s
@@ -129,3 +129,6 @@ s_cbranch_g_fork s[4:5], s[6:7]
 
 // CHECK: s_absdiff_i32 s2, s4, s6 ; encoding: [0x04,0x06,0x02,0x96]
 s_absdiff_i32 s2, s4, s6
+
+// CHECK: s_add_u32 s101, s102, s103 ; encoding: [0x66,0x67,0x65,0x80]
+s_add_u32 s101, s102, s103
diff --git a/test/MC/AMDGPU/vop1.s b/test/MC/AMDGPU/vop1.s
index d0b00fcd1897..22a4f91afefa 100644
--- a/test/MC/AMDGPU/vop1.s
+++ b/test/MC/AMDGPU/vop1.s
@@ -8,6 +8,25 @@
 // RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck %s --check-prefix=NOSICI
 // RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s -check-prefix=NOVI
 
+// Force 32-bit encoding
+
+// GCN: v_mov_b32_e32 v1, v2 ; encoding: [0x02,0x03,0x02,0x7e]
+v_mov_b32_e32 v1, v2
+
+// Force 32-bit encoding for special instructions
+// FIXME: We should be printing _e32 suffixes for these:
+
+// GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
+v_nop_e32
+
+// SICI: v_clrexcp ; encoding: [0x00,0x82,0x00,0x7e]
+// VI:   v_clrexcp ; encoding: [0x00,0x6a,0x00,0x7e]
+v_clrexcp_e32
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
 
 // GCN: v_nop ; encoding: [0x00,0x00,0x00,0x7e]
 v_nop
diff --git a/test/MC/AMDGPU/vop2-err.s b/test/MC/AMDGPU/vop2-err.s
index a1131000a909..8d282f9bf7ed 100644
--- a/test/MC/AMDGPU/vop2-err.s
+++ b/test/MC/AMDGPU/vop2-err.s
@@ -32,4 +32,31 @@ v_mul_i32_i24_e64 v1, 100, v3
 v_mul_i32_i24_e64 v1, v2, 100
 // CHECK: error: invalid operand for instruction
 
+v_add_i32_e32 v1, s[0:1], v2, v3
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, s[0:1], v2, v3, s[2:3]
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, -1
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, vcc, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e32 v1, -1, v2, v3, s0
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32_e64 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
+v_addc_u32 v1, s[0:1], v2, v3, 123
+// CHECK: error: invalid operand for instruction
+
 // TODO: Constant bus restrictions
diff --git a/test/MC/AMDGPU/vop2.s b/test/MC/AMDGPU/vop2.s
index a1f3b8d89365..2b8249152b7b 100644
--- a/test/MC/AMDGPU/vop2.s
+++ b/test/MC/AMDGPU/vop2.s
@@ -251,41 +251,110 @@ v_mbcnt_lo_u32_b32 v1, v2, v3
 // VI:   v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00]
 v_mbcnt_hi_u32_b32 v1, v2, v3
 
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI:   v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI:   v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_i32 v1, vcc, v2, v3
 
-// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
-// VI:   v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
-v_add_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32 v1, s[0:1], v2, v3
 
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI:   v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_i32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, s[0:1], v2, v3
 
-// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
-// VI:   v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
-v_sub_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_i32_e64 v1, vcc, v2, v3
 
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI:   v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_i32 v1, v2, v3
+// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a]
+// VI:   v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32]
+v_add_u32 v1, vcc, v2, v3
 
-// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
-// VI:   v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
-v_subrev_u32 v1, v2, v3
+// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00]
+v_add_u32 v1, s[0:1], v2, v3
 
-// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50]
-// VI:   v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38]
-v_addc_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI:   v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_i32 v1, vcc, v2, v3
 
-// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52]
-// VI:   v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a]
-v_subb_u32 v1, v2, v3
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_i32 v1, s[0:1], v2, v3
 
-// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54]
-// VI:   v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c]
-v_subbrev_u32 v1, v2, v3
+// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c]
+// VI:   v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34]
+v_sub_u32 v1, vcc, v2, v3
+
+// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00]
+v_sub_u32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI:   v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_i32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_i32 v1, s[0:1], v2, v3
+
+// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e]
+// VI:   v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36]
+v_subrev_u32 v1, vcc, v2, v3
+
+// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00]
+// VI:   v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00]
+v_subrev_u32 v1, s[0:1], v2, v3
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI:   v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x50]
+// VI:   v_addc_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x38]
+v_addc_u32_e32 v1, vcc, v2, v3, vcc
+
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32 v1, s[0:1], v2, v3, vcc
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: 	v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x0a,0x00]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3] ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x0a,0x00]
+v_addc_u32_e64 v1, s[0:1], v2, v3, s[2:3]
+
+// SI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, s[0:1], v2, v3, -1 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, s[0:1], v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0x06,0x03]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, -1 ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0x06,0x03]
+v_addc_u32_e64 v1, vcc, v2, v3, -1
+
+// SI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x50,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_addc_u32_e64 v1, vcc, v2, v3, vcc ; encoding: [0x01,0x6a,0x1c,0xd1,0x02,0x07,0xaa,0x01]
+v_addc_u32_e64 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x52]
+// VI: v_subb_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3a]
+v_subb_u32 v1, vcc, v2, v3, vcc
+
+// SI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subb_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0xaa,0x01]
+v_subb_u32 v1, s[0:1], v2, v3, vcc
+
+// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x54]
+// VI:   v_subbrev_u32_e32 v1, vcc, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x3c]
+v_subbrev_u32 v1, vcc, v2, v3, vcc
+
+// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0xaa,0x01]
+// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3, vcc ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0xaa,0x01]
+v_subbrev_u32 v1, s[0:1], v2, v3, vcc
 
 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56]
 // VI:   v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00]
diff --git a/test/MC/AMDGPU/vop3-vop1-nosrc.s b/test/MC/AMDGPU/vop3-vop1-nosrc.s
new file mode 100644
index 000000000000..ce1a1a7f3380
--- /dev/null
+++ b/test/MC/AMDGPU/vop3-vop1-nosrc.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s | FileCheck %s --check-prefix=SICI
+// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck %s --check-prefix=VI
+// XFAIL: *
+
+// FIXME: We should be printing _e64 suffixes for these. 
+// FIXME: When this is fixed delete this file and fix test case in vop3.s
+
+v_nop_e64
+// SICI: v_nop_e64 ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_nop_e64 ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp_e64 ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_clrexcp_e64 ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
diff --git a/test/MC/AMDGPU/vop3.s b/test/MC/AMDGPU/vop3.s
index 63914675a869..712b18e37aab 100644
--- a/test/MC/AMDGPU/vop3.s
+++ b/test/MC/AMDGPU/vop3.s
@@ -14,6 +14,11 @@ v_cmp_lt_f32_e64 s[2:3], v4, -v6
 // SICI: v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x02,0xd0,0x04,0x0d,0x02,0x40]
 // VI:   v_cmp_lt_f32_e64 s[2:3], v4, -v6 ; encoding: [0x02,0x00,0x41,0xd0,0x04,0x0d,0x02,0x40]
 
+// Test forcing e64 with vcc dst
+
+v_cmp_lt_f32_e64 vcc, v4, v6
+// SICI: v_cmp_lt_f32_e64 vcc, v4, v6 ; encoding: [0x6a,0x00,0x02,0xd0,0x04,0x0d,0x02,0x00]
+// VI: v_cmp_lt_f32_e64 vcc, v4, v6 ; encoding: [0x6a,0x00,0x41,0xd0,0x04,0x0d,0x02,0x00]
 
 //
 // Modifier tests:
@@ -87,12 +92,49 @@ v_cmp_ge_f32 s[2:3], v4, v6
 // SICI: v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x0c,0xd0,0x04,0x0d,0x02,0x00]
 // VI:   v_cmp_ge_f32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x46,0xd0,0x04,0x0d,0x02,0x00]
 
-// TODO: Finish VOPC
+// TODO: Add tests for the rest of v_cmp_*_f32
+// TODO: Add tests for v_cmpx_*_f32
+
+v_cmp_f_f64 s[2:3], v[4:5], v[6:7]
+// SICI: v_cmp_f_f64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x40,0xd0,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_f_f64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x60,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add tests for the rest of v_cmp_*_f64
+// TODO: Add tests for the rest of the floating-point comparision instructions.
+
+v_cmp_f_i32 s[2:3], v4, v6
+// SICI: v_cmp_f_i32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0x00,0xd1,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_f_i32_e64 s[2:3], v4, v6 ; encoding: [0x02,0x00,0xc0,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add test for the rest of v_cmp_*_i32
+
+v_cmp_f_i64 s[2:3], v[4:5], v[6:7]
+// SICI: v_cmp_f_i64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0x40,0xd1,0x04,0x0d,0x02,0x00]
+// VI:   v_cmp_f_i64_e64 s[2:3], v[4:5], v[6:7] ; encoding: [0x02,0x00,0xe0,0xd0,0x04,0x0d,0x02,0x00]
+
+// TODO: Add tests for the rest of the instructions.
 
 //===----------------------------------------------------------------------===//
 // VOP1 Instructions
 //===----------------------------------------------------------------------===//
 
+// Test forced e64 encoding with e32 operands
+
+v_mov_b32_e64 v1, v2
+// SICI: v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x02,0xd3,0x02,0x01,0x00,0x00]
+// VI:   v_mov_b32_e64 v1, v2 ; encoding: [0x01,0x00,0x41,0xd1,0x02,0x01,0x00,0x00]
+
+// Force e64 encoding for special instructions.
+// FIXME, we should be printing the _e64 suffix for v_nop and v_clrexcp.
+
+v_nop_e64
+// SICI: v_nop ; encoding: [0x00,0x00,0x00,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_nop ; encoding: [0x00,0x00,0x40,0xd1,0x00,0x00,0x00,0x00]
+
+v_clrexcp_e64
+// SICI: v_clrexcp ; encoding: [0x00,0x00,0x82,0xd3,0x00,0x00,0x00,0x00]
+// VI:   v_clrexcp ; encoding: [0x00,0x00,0x75,0xd1,0x00,0x00,0x00,0x00]
+
 //
 // Modifier tests:
 // 
diff --git a/test/MC/AMDGPU/vopc-errs.s b/test/MC/AMDGPU/vopc-errs.s
new file mode 100644
index 000000000000..06c6752a8441
--- /dev/null
+++ b/test/MC/AMDGPU/vopc-errs.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -arch=amdgcn -show-encoding %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=SI -show-encoding %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s 2>&1 | FileCheck %s
+
+// Force 32-bit encoding with non-vcc result
+
+v_cmp_lt_f32_e32 s[0:1], v2, v4
+// CHECK: 18: error: invalid operand for instruction
diff --git a/test/MC/AMDGPU/vopc.s b/test/MC/AMDGPU/vopc.s
index 2d8547c5f953..0692a0e72ce0 100644
--- a/test/MC/AMDGPU/vopc.s
+++ b/test/MC/AMDGPU/vopc.s
@@ -44,5 +44,24 @@ v_cmp_lt_f32 vcc, v2, v4
 // SICI: v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x02,0x7c]
 // VI:   v_cmp_lt_f32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x82,0x7c]
 
-// TODO: Add tests for the rest of the instructions.
+// TODO: Add tests for the rest of v_cmp_*_f32
+// TODO: Add tests for v_cmpx_*_f32
 
+v_cmp_f_f64 vcc, v[2:3], v[4:5]
+// SICI: v_cmp_f_f64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0x40,0x7c]
+// VI:   v_cmp_f_f64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0xc0,0x7c]
+
+// TODO: Add tests for the rest of v_cmp_*_f64
+// TODO: Add tests for the rest of the floating-point comparision instructions.
+
+v_cmp_f_i32 vcc, v2, v4
+// SICI: v_cmp_f_i32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x00,0x7d]
+// VI:   v_cmp_f_i32_e32 vcc, v2, v4 ; encoding: [0x02,0x09,0x80,0x7d]
+
+// TODO: Add test for the rest of v_cmp_*_i32
+
+v_cmp_f_i64 vcc, v[2:3], v[4:5]
+// SICI: v_cmp_f_i64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0x40,0x7d]
+// VI:   v_cmp_f_i64_e32 vcc, v[2:3], v[4:5] ; encoding: [0x02,0x09,0xc0,0x7d]
+
+// TODO: Add tests for the rest of the instructions.
diff --git a/test/MC/ARM/Windows/invalid-relocation.s b/test/MC/ARM/Windows/invalid-relocation.s
index 4f4c59839a6a..c3e74e97634b 100644
--- a/test/MC/ARM/Windows/invalid-relocation.s
+++ b/test/MC/ARM/Windows/invalid-relocation.s
@@ -1,4 +1,4 @@
-# RUN: not llvm-mc -triple thumbv7-windows -filetype obj -o /dev/null 2>&1 %s \
+# RUN: not llvm-mc -triple thumbv7-windows -incremental-linker-compatible -filetype obj -o /dev/null 2>&1 %s \
 # RUN:     | FileCheck %s
 
 	.def invalid_relocation
diff --git a/test/MC/ARM/arm-elf-relocation-diagnostics.s b/test/MC/ARM/arm-elf-relocation-diagnostics.s
index 5fe903f71619..6875d03b303e 100644
--- a/test/MC/ARM/arm-elf-relocation-diagnostics.s
+++ b/test/MC/ARM/arm-elf-relocation-diagnostics.s
@@ -8,20 +8,19 @@
 @ CHECK: .byte target(sbrel)
 @ CHECK:       ^
 
-@ TODO: enable these negative test cases
-@ 	.hword target(sbrel)
-@ @ CHECK-SBREL-HWORD: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-HWORD: .hword target(sbrel)
-@ @ CHECK-SBREL-HWORD:        ^
-@
-@ 	.short target(sbrel)
-@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-SHORT: .short target(sbrel)
-@ @ CHECK-SBREL-SHORT:        ^
-@
-@ 	.quad target(sbrel)
-@ @ CHECK-SBREL-SHORT: error: relocated expression must be 32-bit
-@ @ CHECK-SBREL-SHORT: .quad target(sbrel)
-@ @ CHECK-SBREL-SHORT:        ^
+	.hword target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .hword target(sbrel)
+@ CHECK:        ^
+
+	.short target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .short target(sbrel)
+@ CHECK:        ^
+
+	.quad target(sbrel)
+@ CHECK: error: relocated expression must be 32-bit
+@ CHECK: .quad target(sbrel)
+@ CHECK:        ^
 
 
diff --git a/test/MC/ARM/arm-thumb-trustzone.s b/test/MC/ARM/arm-thumb-trustzone.s
index 7755a3c8e69b..4fec4b7e982c 100644
--- a/test/MC/ARM/arm-thumb-trustzone.s
+++ b/test/MC/ARM/arm-thumb-trustzone.s
@@ -1,5 +1,6 @@
 @ RUN: not llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
 @ RUN: llvm-mc -triple=thumbv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+@ RUN: not llvm-mc -triple=thumbv6kz -mcpu=arm1176jzf-s -show-encoding < %s | FileCheck %s -check-prefix=NOTZ
 
   .syntax unified
   .globl _func
diff --git a/test/MC/ARM/arm-trustzone.s b/test/MC/ARM/arm-trustzone.s
index 72bac48e84e4..5ab27b58dfe1 100644
--- a/test/MC/ARM/arm-trustzone.s
+++ b/test/MC/ARM/arm-trustzone.s
@@ -1,5 +1,6 @@
 @ RUN: not llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=-trustzone < %s | FileCheck %s -check-prefix=NOTZ
 @ RUN: llvm-mc -triple=armv7-apple-darwin -mcpu=cortex-a8 -show-encoding -mattr=trustzone < %s | FileCheck %s -check-prefix=TZ
+@ RUN: llvm-mc -triple=armv6kz -mcpu=arm1176jz-s -show-encoding < %s | FileCheck %s -check-prefix=TZ
 
   .syntax unified
   .globl _func
@@ -14,7 +15,7 @@ _func:
 @------------------------------------------------------------------------------
 @ SMC
 @------------------------------------------------------------------------------
-        smc #0xf
+        smi #0xf                        @ SMI is old (ARMv6KZ) name for SMC
         smceq #0
 
 @ NOTZ-NOT: smc 	#15
diff --git a/test/MC/ARM/arm11-hint-instr.s b/test/MC/ARM/arm11-hint-instr.s
index 6f5a374e417c..2c0fef4adf73 100644
--- a/test/MC/ARM/arm11-hint-instr.s
+++ b/test/MC/ARM/arm11-hint-instr.s
@@ -5,21 +5,24 @@
 @ RUN:     | FileCheck --check-prefix=CHECK-ARM %s
 @ RUN: llvm-mc -triple=armv6t2 -show-encoding < %s \
 @ RUN:     | FileCheck --check-prefix=CHECK-ARM %s
-@ RUN: llvm-mc -triple=thumb -mcpu=arm1156t2-s -show-encoding < %s \
-@ RUN:     | FileCheck --check-prefix=CHECK-THUMB %s
-@ RUN: llvm-mc -triple=armv6m -show-encoding < %s \
-@ RUN:     | FileCheck --check-prefix=CHECK-V6M %s
+@ RUN: not llvm-mc -triple=thumb -mcpu=arm1156t2-s -show-encoding < %s > %t3 2> %t4
+@ RUN: FileCheck --check-prefix=CHECK-THUMB %s < %t3
+@ RUN: FileCheck --check-prefix=CHECK-ERROR-THUMB %s < %t4
+@ RUN: not llvm-mc -triple=armv6m -show-encoding < %s > %t5 2> %t6
+@ RUN: FileCheck --check-prefix=CHECK-V6M %s < %t5
+@ RUN: FileCheck --check-prefix=CHECK-ERROR-V6M %s < %t6
 
   .syntax unified
 
 @------------------------------------------------------------------------------
-@ YIELD/WFE/WFI/SEV - are not supported pre v6K
+@ YIELD/WFE/WFI/SEV/CLREX - are not supported pre v6K
 @------------------------------------------------------------------------------
         nop
         yield
         wfe
         wfi
         sev
+        clrex
 
 
 @------------------------------------------------------------------------------
@@ -37,6 +40,9 @@
 @ CHECK-ERROR-V6: error: instruction requires: armv6k
 @ CHECK-ERROR-V6: sev
 @ CHECK-ERROR-V6: ^
+@ CHECK-ERROR-V6: error: instruction requires: armv6k
+@ CHECK-ERROR-V6: clrex
+@ CHECK-ERROR-V6: ^
 
 @------------------------------------------------------------------------------
 @ v6K using ARM encoding
@@ -49,6 +55,7 @@
 @ CHECK-ARM: wfe                             @ encoding: [0x02,0xf0,0x20,0xe3]
 @ CHECK-ARM: wfi                             @ encoding: [0x03,0xf0,0x20,0xe3]
 @ CHECK-ARM: sev                             @ encoding: [0x04,0xf0,0x20,0xe3]
+@ CHECK-ARM: clrex                           @ encoding: [0x1f,0xf0,0x7f,0xf5]
 
 @------------------------------------------------------------------------------
 @ v6T2 using THUMB encoding (thumb triple)
@@ -58,6 +65,9 @@
 @ CHECK-THUMB: wfe                             @ encoding: [0x20,0xbf]
 @ CHECK-THUMB: wfi                             @ encoding: [0x30,0xbf]
 @ CHECK-THUMB: sev                             @ encoding: [0x40,0xbf]
+@ CHECK-ERROR-THUMB: error: instruction requires: armv7
+@ CHECK-ERROR-THUMB: clrex
+@ CHECK-ERROR-THUMB: ^
 
 @------------------------------------------------------------------------------
 @ v6M using THUMB encoding
@@ -67,3 +77,6 @@
 @ CHECK-V6M: wfe                             @ encoding: [0x20,0xbf]
 @ CHECK-V6M: wfi                             @ encoding: [0x30,0xbf]
 @ CHECK-V6M: sev                             @ encoding: [0x40,0xbf]
+@ CHECK-ERROR-V6M: error: instruction requires: armv7
+@ CHECK-ERROR-V6M: clrex
+@ CHECK-ERROR-V6M: ^
diff --git a/test/MC/ARM/basic-arm-instructions-v8.1a.s b/test/MC/ARM/basic-arm-instructions-v8.1a.s
index 005f27bb3983..9b764c18448a 100644
--- a/test/MC/ARM/basic-arm-instructions-v8.1a.s
+++ b/test/MC/ARM/basic-arm-instructions-v8.1a.s
@@ -37,7 +37,7 @@
 //CHECK-V8:   vqrdmlsh.f32  q3, q4, q5
 //CHECK-V8:           ^
 //CHECK-V8: error: invalid operand for instruction
-//CHECK-V8   vqrdmlsh.f64  d3, d5, d5
+//CHECK-V8:  vqrdmlsh.f64  d3, d5, d5
 //CHECK-V8:           ^
 
   vqrdmlah.s16    d0, d1, d2
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
index a1f13b76dda3..99a3cfa7b29e 100644
--- a/test/MC/ARM/basic-arm-instructions.s
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -349,6 +349,8 @@ Lforward:
     and r6, r7, r8, ror r2
     and r10, r1, r6, rrx
     and r2, r3, #0x7fffffff
+    and sp, sp, #0x7fffffff
+    and pc, pc, #0x7fffffff
 
     @ destination register is optional
     and r1, #0xf
@@ -397,6 +399,8 @@ Lforward:
 @ CHECK: and	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0x07,0xe0]
 @ CHECK: and	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0x01,0xe0]
 @ CHECK: bic	r2, r3, #-2147483648    @ encoding: [0x02,0x21,0xc3,0xe3]
+@ CHECK: bic	sp, sp, #-2147483648    @ encoding: [0x02,0xd1,0xcd,0xe3]
+@ CHECK: bic	pc, pc, #-2147483648    @ encoding: [0x02,0xf1,0xcf,0xe3]
 
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
 @ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
@@ -502,6 +506,10 @@ Lforward:
         bic r6, r7, r8, asr r2
         bic r6, r7, r8, ror r2
         bic r10, r1, r6, rrx
+        bic r2, r3, #0x7fffffff
+        bic sp, sp, #0x7fffffff
+        bic pc, pc, #0x7fffffff
+
 
         @ destination register is optional
         bic r1, #0xf
@@ -548,6 +556,9 @@ Lforward:
 @ CHECK: bic	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0xc7,0xe1]
 @ CHECK: bic	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0xc7,0xe1]
 @ CHECK: bic	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0xc1,0xe1]
+@ CHECK: and  r2, r3, #-2147483648    @ encoding: [0x02,0x21,0x03,0xe2]
+@ CHECK: and  sp, sp, #-2147483648    @ encoding: [0x02,0xd1,0x0d,0xe2]
+@ CHECK: and  pc, pc, #-2147483648    @ encoding: [0x02,0xf1,0x0f,0xe2]
 
 
 @ CHECK: bic	r1, r1, #15             @ encoding: [0x0f,0x10,0xc1,0xe3]
diff --git a/test/MC/ARM/basic-thumb2-instructions-v8.s b/test/MC/ARM/basic-thumb2-instructions-v8.s
index a7882aead01f..46bc1b91ffa5 100644
--- a/test/MC/ARM/basic-thumb2-instructions-v8.s
+++ b/test/MC/ARM/basic-thumb2-instructions-v8.s
@@ -3,7 +3,7 @@
 @ RUN: llvm-mc -triple thumbv8 -show-encoding < %s | FileCheck %s --check-prefix=CHECK-V8
 @ RUN: not llvm-mc -triple thumbv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7
 
-@ HLT
+@ HLT (in ARMv8 only)
         hlt  #0
         hlt  #63
 @ CHECK-V8: hlt  #0                       @ encoding: [0x80,0xba]
@@ -19,12 +19,23 @@
 @ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
 @ CHECK-V7: error: instruction requires: armv8
 
-@ Can accept AL condition code
+@ Can accept AL condition code (in ARMv8 only)
         hltal #24
 @ CHECK-V8: hlt #24                       @ encoding: [0x98,0xba]
 @ CHECK-V7: error: instruction requires: armv8
 
-@ DCPS{1,2,3}
+@ Can accept SP as rGPR (in ARMv8 only)
+        sbc.w r6, r3, sp, asr #16
+        and.w r6, r3, sp, asr #16
+        and sp, r0, #0
+@ CHECK-V8: sbc.w r6, r3, sp, asr #16     @ encoding: [0x63,0xeb,0x2d,0x46]
+@ CHECK-V8: and.w r6, r3, sp, asr #16     @ encoding: [0x03,0xea,0x2d,0x46]
+@ CHECK-V8: and   sp, r0, #0              @ encoding: [0x00,0xf0,0x00,0x0d]
+@ CHECK-V7: error: instruction variant requires ARMv8 or later
+@ CHECK-V7: error: instruction variant requires ARMv8 or later
+@ CHECK-V7: error: invalid operand for instruction
+
+@ DCPS{1,2,3} (in ARMv8 only)
         dcps1
         dcps2
         dcps3
@@ -36,7 +47,7 @@
 @ CHECK-V7: error: instruction requires: armv8
 
 @------------------------------------------------------------------------------
-@ DMB (v8 barriers)
+@ DMB (ARMv8-only barriers)
 @------------------------------------------------------------------------------
         dmb ishld
         dmb oshld
@@ -53,7 +64,7 @@
 @ CHECK-V7: error: invalid operand for instruction
 
 @------------------------------------------------------------------------------
-@ DSB (v8 barriers)
+@ DSB (ARMv8-only barriers)
 @------------------------------------------------------------------------------
         dsb ishld
         dsb oshld
@@ -70,7 +81,7 @@
 @ CHECK-V7: error: invalid operand for instruction
 
 @------------------------------------------------------------------------------
-@ SEVL
+@ SEVL (in ARMv8 only)
 @------------------------------------------------------------------------------
         sevl
         sevl.w
diff --git a/test/MC/ARM/big-endian-thumb2-fixup.s b/test/MC/ARM/big-endian-thumb2-fixup.s
index 4fd5276fce6e..0aaa26a209fe 100644
--- a/test/MC/ARM/big-endian-thumb2-fixup.s
+++ b/test/MC/ARM/big-endian-thumb2-fixup.s
@@ -35,14 +35,14 @@ cond_label:
 
 @ARM::fixup_t2_ldst_precel_12
 .section s_ldst_precel_12,"ax",%progbits
- 	ldr r0, ldst_precel_12_label
+ 	ldr.w r0, ldst_precel_12_label
 	nop
 	nop
 ldst_precel_12_label:
 
 @ARM::fixup_t2_adr_pcrel_12
 .section s_adr_pcrel_12,"ax",%progbits
- 	adr r0, adr_pcrel_12_label
+ 	adr.w r0, adr_pcrel_12_label
 	nop
 	nop
 adr_pcrel_12_label:
diff --git a/test/MC/ARM/coff-debugging-secrel.ll b/test/MC/ARM/coff-debugging-secrel.ll
index 1b8b7310171e..a950ba2b6896 100644
--- a/test/MC/ARM/coff-debugging-secrel.ll
+++ b/test/MC/ARM/coff-debugging-secrel.ll
@@ -1,14 +1,15 @@
 ; RUN: llc -mtriple thumbv7--windows-itanium -filetype obj -o - %s \
 ; RUN:     | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-ITANIUM
 
-; RUN: llc -mtriple thumbv7--windows-msvc -filetype obj -o - %s \
+; RUN: sed -e 's/"Dwarf Version"/"CodeView"/' %s \
+; RUN:    | llc -mtriple thumbv7--windows-msvc -filetype obj -o - \
 ; RUN:    | llvm-readobj -r - | FileCheck %s -check-prefix CHECK-MSVC
 
 ; ModuleID = '/Users/compnerd/work/llvm/test/MC/ARM/reduced.c'
 target datalayout = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv7--windows-itanium"
 
-define arm_aapcs_vfpcc void @function() {
+define arm_aapcs_vfpcc void @function() !dbg !1 {
 entry:
   ret void, !dbg !0
 }
@@ -17,13 +18,13 @@ entry:
 !llvm.module.flags = !{!9, !10}
 
 !0 = !DILocation(line: 1, scope: !1)
-!1 = !DISubprogram(name: "function", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !2, scope: !3, type: !4, function: void ()* @function, variables: !6)
+!1 = distinct !DISubprogram(name: "function", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 1, file: !2, scope: !3, type: !4, variables: !6)
 !2 = !DIFile(filename: "/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", directory: "/Users/compnerd/work/llvm")
 !3 = !DIFile(filename: "/Users/compnerd/work/llvm/test/MC/ARM/reduced.c", directory: "/Users/compnerd/work/llvm")
 !4 = !DISubroutineType(types: !5)
 !5 = !{null}
 !6 = !{}
-!7 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0", isOptimized: false, emissionKind: 1, file: !2, enums: !6, retainedTypes: !6, subprograms: !8, globals: !6, imports: !6)
+!7 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0", isOptimized: false, emissionKind: 1, file: !2, enums: !6, retainedTypes: !6, subprograms: !8, globals: !6, imports: !6)
 !8 = !{!1}
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/MC/ARM/data-in-code.ll b/test/MC/ARM/data-in-code.ll
index c4910ff20e61..10657a3fed39 100644
--- a/test/MC/ARM/data-in-code.ll
+++ b/test/MC/ARM/data-in-code.ll
@@ -51,13 +51,6 @@ exit:
 ;; ARM-NEXT:     Other:
 ;; ARM-NEXT:     Section: [[MIXED_SECT]]
 
-;; ARM:        Symbol {
-;; ARM:          Name: $d
-;; ARM-NEXT:     Value: 0
-;; ARM-NEXT:     Size: 0
-;; ARM-NEXT:     Binding: Local
-;; ARM-NEXT:     Type: None
-
 ;; ARM:        Symbol {
 ;; ARM:          Name: $d
 ;; ARM-NEXT:     Value: 0x{{[0-9A-F]+}}
@@ -77,10 +70,17 @@ exit:
 ;; ARM-NEXT:     Section: .ARM.exidx
 ;; ARM-NEXT:   }
 
+;; ARM:        Symbol {
+;; ARM:          Name: $d
+;; ARM-NEXT:     Value: 0
+;; ARM-NEXT:     Size: 0
+;; ARM-NEXT:     Binding: Local
+;; ARM-NEXT:     Type: None
+
 ;; ARM-NOT:     ${{[atd]}}
 
 ;; TMB:        Symbol {
-;; TMB:          Name: $d.2
+;; TMB:          Name: $d.1
 ;; TMB-NEXT:     Value: 0x{{[0-9A-F]+}}
 ;; TMB-NEXT:     Size: 0
 ;; TMB-NEXT:     Binding: Local
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
index 6f66dc3b4d0a..a1dd95f7d7fc 100644
--- a/test/MC/ARM/diagnostics.s
+++ b/test/MC/ARM/diagnostics.s
@@ -1,7 +1,7 @@
 @ RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V7 < %t %s
 @ RUN: not llvm-mc -triple=armv8 < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS-V8 < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Check for various assembly diagnostic messages on invalid input.
 
@@ -98,22 +98,22 @@
         @ Out of range immediates for v8 HLT instruction.
         hlt #65536
         hlt #-1
-@CHECK-ERRORS-V8: error: invalid operand for instruction
-@CHECK-ERRORS-V8:         hlt #65536
-@CHECK-ERRORS-V8:              ^
-@CHECK-ERRORS-V8: error: invalid operand for instruction
-@CHECK-ERRORS-V8:         hlt #-1
-@CHECK-ERRORS-V8:              ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS:         hlt #65536
+@CHECK-ERRORS:              ^
+@CHECK-ERRORS: error: invalid operand for instruction
+@CHECK-ERRORS:         hlt #-1
+@CHECK-ERRORS:              ^
 
         @ Illegal condition code for v8 HLT instruction.
         hlteq #2
         hltlt #23
-@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
-@CHECK-ERRORS-V8:        hlteq #2
-@CHECK-ERRORS-V8:        ^
-@CHECK-ERRORS-V8: error: instruction 'hlt' is not predicable, but condition code specified
-@CHECK-ERRORS-V8:        hltlt #23
-@CHECK-ERRORS-V8:        ^
+@CHECK-ERRORS: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS:        hlteq #2
+@CHECK-ERRORS:        ^
+@CHECK-ERRORS: error: instruction 'hlt' is not predicable, but condition code specified
+@CHECK-ERRORS:        hltlt #23
+@CHECK-ERRORS:        ^
 
         @ Out of range 4 and 3 bit immediates on CDP[2]
 
@@ -149,7 +149,8 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
 
         @ p10 and p11 are reserved for NEON
         mcr p10, #2, r5, c1, c1, #4
@@ -183,7 +184,8 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
 
         @ Shifter operand validation for PKH instructions.
         pkhbt r2, r2, r3, lsl #-1
@@ -394,12 +396,14 @@
         ldc2 p2, c8, [r1], { 256 }
         ldc2 p2, c8, [r1], { -1 }
 
-@ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255]
-@ CHECK-ERRORS:         ldc2 p2, c8, [r1], { 256 }
-@ CHECK-ERRORS:                              ^
-@ CHECK-ERRORS: error: coprocessor option must be an immediate in range [0, 255]
-@ CHECK-ERRORS:         ldc2 p2, c8, [r1], { -1 }
-@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS-V7: error: coprocessor option must be an immediate in range [0, 255]
+@ CHECK-ERRORS-V7:         ldc2 p2, c8, [r1], { 256 }
+@ CHECK-ERRORS-V7:                              ^
+@ CHECK-ERRORS-V8: error: register expected
+@ CHECK-ERRORS-V7: error: coprocessor option must be an immediate in range [0, 255]
+@ CHECK-ERRORS-V7:         ldc2 p2, c8, [r1], { -1 }
+@ CHECK-ERRORS-V7:                              ^
+@ CHECK-ERRORS-V8: error: register expected
 
         @ Bad CPS instruction format.
         cps f,#1
@@ -470,14 +474,14 @@
         vrintn.f32 s8, s9
         vrintp.f64.f64 d10, d11
         vrintm.f64 d12, d13
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
-@ CHECK-ERRORS: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
+@ CHECK-ERRORS-V7: error: instruction requires: FPARMv8
 
         stm sp!, {r0, pc}^
         ldm sp!, {r0}^
diff --git a/test/MC/ARM/directive-arch-armv6j.s b/test/MC/ARM/directive-arch-armv6j.s
deleted file mode 100644
index e27beef1ebaf..000000000000
--- a/test/MC/ARM/directive-arch-armv6j.s
+++ /dev/null
@@ -1,34 +0,0 @@
-@ Test the .arch directive for armv6j
-
-@ This test case will check the default .ARM.attributes value for the
-@ armv6j architecture.
-
-@ RUN: llvm-mc -triple arm-eabi -filetype asm %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-ASM
-@ RUN: llvm-mc -triple arm-eabi -filetype obj %s \
-@ RUN:   | llvm-readobj -arm-attributes | FileCheck %s -check-prefix CHECK-ATTR
-
-	.syntax	unified
-	.arch	armv6j
-
-@ CHECK-ASM: 	.arch	armv6j
-
-@ CHECK-ATTR: FileAttributes {
-@ CHECK-ATTR:   Attribute {
-@ CHECK-ATTR:     TagName: CPU_name
-@ CHECK-ATTR:     Value: 6J
-@ CHECK-ATTR:   }
-@ CHECK-ATTR:   Attribute {
-@ CHECK-ATTR:     TagName: CPU_arch
-@ CHECK-ATTR:     Description: ARM v6
-@ CHECK-ATTR:   }
-@ CHECK-ATTR:   Attribute {
-@ CHECK-ATTR:     TagName: ARM_ISA_use
-@ CHECK-ATTR:     Description: Permitted
-@ CHECK-ATTR:   }
-@ CHECK-ATTR:   Attribute {
-@ CHECK-ATTR:     TagName: THUMB_ISA_use
-@ CHECK-ATTR:     Description: Thumb-1
-@ CHECK-ATTR:   }
-@ CHECK-ATTR: }
-
diff --git a/test/MC/ARM/directive-arch-armv6z.s b/test/MC/ARM/directive-arch-armv6z.s
index 78a9ab1d5de7..efb8f8bfe9d3 100644
--- a/test/MC/ARM/directive-arch-armv6z.s
+++ b/test/MC/ARM/directive-arch-armv6z.s
@@ -11,12 +11,12 @@
 	.syntax	unified
 	.arch	armv6z
 
-@ CHECK-ASM: 	.arch	armv6z
+@ CHECK-ASM: 	.arch	armv6kz
 
 @ CHECK-ATTR: FileAttributes {
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_name
-@ CHECK-ATTR:     Value: 6Z
+@ CHECK-ATTR:     Value: 6KZ
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_arch
diff --git a/test/MC/ARM/directive-arch-armv6zk.s b/test/MC/ARM/directive-arch-armv8.2-a.s
similarity index 60%
rename from test/MC/ARM/directive-arch-armv6zk.s
rename to test/MC/ARM/directive-arch-armv8.2-a.s
index 48d9cc1a2bf1..c9f4469fb0ae 100644
--- a/test/MC/ARM/directive-arch-armv6zk.s
+++ b/test/MC/ARM/directive-arch-armv8.2-a.s
@@ -1,7 +1,7 @@
-@ Test the .arch directive for armv6zk
+@ Test the .arch directive for armv8.2-a
 
 @ This test case will check the default .ARM.attributes value for the
-@ armv6zk architecture.
+@ armv8-a architecture.
 
 @ RUN: llvm-mc -triple arm-eabi -filetype asm %s \
 @ RUN:   | FileCheck %s -check-prefix CHECK-ASM
@@ -9,18 +9,22 @@
 @ RUN:   | llvm-readobj -arm-attributes | FileCheck %s -check-prefix CHECK-ATTR
 
 	.syntax	unified
-	.arch	armv6zk
+	.arch	armv8.2-a
 
-@ CHECK-ASM: 	.arch	armv6zk
+@ CHECK-ASM: 	.arch	armv8.2-a
 
 @ CHECK-ATTR: FileAttributes {
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_name
-@ CHECK-ATTR:     Value: 6ZK
+@ CHECK-ATTR:     Value: 8.2-A
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: CPU_arch
-@ CHECK-ATTR:     Description: ARM v6KZ
+@ CHECK-ATTR:     Description: ARM v8
+@ CHECK-ATTR:   }
+@ CHECK-ATTR:   Attribute {
+@ CHECK-ATTR:     TagName: CPU_arch_profile
+@ CHECK-ATTR:     Description: Application
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: ARM_ISA_use
@@ -28,11 +32,15 @@
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: THUMB_ISA_use
-@ CHECK-ATTR:     Description: Thumb-1
+@ CHECK-ATTR:     Description: Thumb-2
+@ CHECK-ATTR:   }
+@ CHECK-ATTR:   Attribute {
+@ CHECK-ATTR:     TagName: MPextension_use
+@ CHECK-ATTR:     Description: Permitted
 @ CHECK-ATTR:   }
 @ CHECK-ATTR:   Attribute {
 @ CHECK-ATTR:     TagName: Virtualization_use
-@ CHECK-ATTR:     Description: TrustZone
+@ CHECK-ATTR:     Description: TrustZone + Virtualization Extensions
 @ CHECK-ATTR:   }
 @ CHECK-ATTR: }
 
diff --git a/test/MC/ARM/directive-arch-semantic-action.s b/test/MC/ARM/directive-arch-semantic-action.s
index b9c65d8e49c8..2d64026e041d 100644
--- a/test/MC/ARM/directive-arch-semantic-action.s
+++ b/test/MC/ARM/directive-arch-semantic-action.s
@@ -1,6 +1,6 @@
 @ RUN: not llvm-mc -triple arm-gnueabi-linux -filetype asm %s 2>&1 | FileCheck %s
 
-        .arch	armv6
+	.arch	armv6
         dsb
 @ CHECK: error: instruction requires: data-barriers
 
@@ -9,4 +9,4 @@
 @ CHECK-NOT: error: instruction requires: data-barriers
 
         .arch   invalid_architecture_name
-@ CHECK: error: Unknown arch name
+@ CHECK: error: Unknown arch name        
diff --git a/test/MC/ARM/directive-arch_extension-sec.s b/test/MC/ARM/directive-arch_extension-sec.s
index 55ead8506ab1..645da0f75d31 100644
--- a/test/MC/ARM/directive-arch_extension-sec.s
+++ b/test/MC/ARM/directive-arch_extension-sec.s
@@ -1,11 +1,13 @@
 @ RUN: not llvm-mc -triple armv6-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-ARMv6 -check-prefix CHECK-V6
+@ RUN:   | FileCheck %s -check-prefix CHECK-V6
+@ RUN: not llvm-mc -triple armv6k-eabi -filetype asm -o /dev/null 2>&1 %s \
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7
 @ RUN: not llvm-mc -triple armv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-ARMv7 -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7
 @ RUN: not llvm-mc -triple thumbv6-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-THUMBv6 -check-prefix CHECK-V6
+@ RUN:   | FileCheck %s -check-prefix CHECK-V6
 @ RUN: not llvm-mc -triple thumbv7-eabi -filetype asm -o /dev/null 2>&1 %s \
-@ RUN:   | FileCheck %s -check-prefix CHECK-THUMBv7 -check-prefix CHECK-V7
+@ RUN:   | FileCheck %s -check-prefix CHECK-V7
 
 	.syntax unified
 
@@ -13,6 +15,7 @@
 @ CHECK-V6: error: architectural extension 'sec' is not allowed for the current base architecture
 @ CHECK-V6-NEXT: 	.arch_extension sec
 @ CHECK-V6-NEXT:                     ^
+@ CHECK-V7-NOT: error: architectural extension 'sec' is not allowed for the current base architecture
 
 	.type sec,%function
 sec:
@@ -23,9 +26,11 @@ sec:
 @ CHECK-V6: error: architectural extension 'sec' is not allowed for the current base architecture
 @ CHECK-V6-NEXT: 	.arch_extension nosec
 @ CHECK-V6-NEXT:                     ^
+@ CHECK-V7-NOT: error: architectural extension 'sec' is not allowed for the current base architecture
 
 	.type nosec,%function
 nosec:
 	smc #0
 @ CHECK-V7: error: instruction requires: TrustZone
+@ CHECK-V7-NOT: error: instruction requires: TrustZone
 
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
index 5bf8fbd57fa5..d23c9a93de33 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections-dwarf-2.s
@@ -25,7 +25,7 @@ b:
 
 // DWARF: .debug_info contents:
 // DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
+// DWARF-NOT: DW_TAG_
 // DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
 // DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
 
diff --git a/test/MC/ARM/dwarf-asm-multiple-sections.s b/test/MC/ARM/dwarf-asm-multiple-sections.s
index 0eb8bab81620..49550559e956 100644
--- a/test/MC/ARM/dwarf-asm-multiple-sections.s
+++ b/test/MC/ARM/dwarf-asm-multiple-sections.s
@@ -1,6 +1,8 @@
 // RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -fdebug-compilation-dir=/tmp
-// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF %s
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF -check-prefix DWARF4 %s
 // RUN: llvm-objdump -r %t | FileCheck -check-prefix RELOC %s
+// RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 3 -fdebug-compilation-dir=/tmp
+// RUN: llvm-dwarfdump %t | FileCheck -check-prefix DWARF -check-prefix DWARF3 %s
 // RUN: llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 2 2>&1 | FileCheck -check-prefix VERSION %s
 // RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 1 2>&1 | FileCheck -check-prefix DWARF1 %s
 // RUN: not llvm-mc < %s -triple=armv7-linux-gnueabi -filetype=obj -o %t -g -dwarf-version 5 2>&1 | FileCheck -check-prefix DWARF5 %s
@@ -15,8 +17,10 @@ b:
 // DWARF: .debug_abbrev contents:
 // DWARF: Abbrev table for offset: 0x00000000
 // DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF:         DW_AT_stmt_list DW_FORM_data4
-// DWARF:         DW_AT_ranges    DW_FORM_data4
+// DWARF3:        DW_AT_stmt_list DW_FORM_data4
+// DWARF4:        DW_AT_stmt_list DW_FORM_sec_offset
+// DWARF3:        DW_AT_ranges    DW_FORM_data4
+// DWARF4:        DW_AT_ranges    DW_FORM_sec_offset
 // DWARF:         DW_AT_name      DW_FORM_string
 // DWARF:         DW_AT_comp_dir  DW_FORM_string
 // DWARF:         DW_AT_producer  DW_FORM_string
@@ -24,8 +28,9 @@ b:
 
 // DWARF: .debug_info contents:
 // DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
-// DWARF: DW_AT_ranges [DW_FORM_data4]      (0x00000000
+// DWARF-NOT: DW_TAG_
+// DWARF3: DW_AT_ranges [DW_FORM_data4]           (0x00000000
+// DWARF4: DW_AT_ranges [DW_FORM_sec_offset]      (0x00000000
 
 // DWARF: 0x{{[0-9a-f]+}}:   DW_TAG_label [2] *
 // DWARF-NEXT: DW_AT_name [DW_FORM_string]     ("a")
@@ -41,10 +46,10 @@ b:
 
 
 // DWARF: .debug_line contents:
-// DWARF:      0x0000000000000000      9      0      1   0   0  is_stmt
-// DWARF-NEXT: 0x0000000000000004      9      0      1   0   0  is_stmt end_sequence
-// DWARF-NEXT: 0x0000000000000000     13      0      1   0   0  is_stmt
-// DWARF-NEXT: 0x0000000000000004     13      0      1   0   0  is_stmt end_sequence
+// DWARF:      0x0000000000000000     11      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004     11      0      1   0   0  is_stmt end_sequence
+// DWARF-NEXT: 0x0000000000000000     15      0      1   0   0  is_stmt
+// DWARF-NEXT: 0x0000000000000004     15      0      1   0   0  is_stmt end_sequence
 
 
 // DWARF: .debug_ranges contents:
diff --git a/test/MC/ARM/dwarf-asm-nonstandard-section.s b/test/MC/ARM/dwarf-asm-nonstandard-section.s
index 497a39ad1162..39065a4d05f1 100644
--- a/test/MC/ARM/dwarf-asm-nonstandard-section.s
+++ b/test/MC/ARM/dwarf-asm-nonstandard-section.s
@@ -9,7 +9,7 @@ b:
 // DWARF: .debug_abbrev contents:
 // DWARF: Abbrev table for offset: 0x00000000
 // DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_stmt_list DW_FORM_sec_offset
 // DWARF:         DW_AT_low_pc    DW_FORM_addr
 // DWARF:         DW_AT_high_pc   DW_FORM_addr
 // DWARF:         DW_AT_name      DW_FORM_string
diff --git a/test/MC/ARM/dwarf-asm-single-section.s b/test/MC/ARM/dwarf-asm-single-section.s
index c57e6498a38a..808236f65b3f 100644
--- a/test/MC/ARM/dwarf-asm-single-section.s
+++ b/test/MC/ARM/dwarf-asm-single-section.s
@@ -10,7 +10,7 @@ a:
 // DWARF: .debug_abbrev contents:
 // DWARF: Abbrev table for offset: 0x00000000
 // DWARF: [1] DW_TAG_compile_unit DW_CHILDREN_yes
-// DWARF:         DW_AT_stmt_list DW_FORM_data4
+// DWARF:         DW_AT_stmt_list DW_FORM_sec_offset
 // DWARF:         DW_AT_low_pc    DW_FORM_addr
 // DWARF:         DW_AT_high_pc   DW_FORM_addr
 // DWARF:         DW_AT_name      DW_FORM_string
@@ -20,7 +20,7 @@ a:
 
 // DWARF: .debug_info contents:
 // DWARF: 0x{{[0-9a-f]+}}: DW_TAG_compile_unit [1]
-// CHECK-NOT-DWARF: DW_TAG_
+// DWARF-NOT: DW_TAG_
 // DWARF:               DW_AT_low_pc [DW_FORM_addr]       (0x0000000000000000)
 // DWARF:               DW_AT_high_pc [DW_FORM_addr]      (0x0000000000000004)
 
diff --git a/test/MC/ARM/eh-compact-pr0.s b/test/MC/ARM/eh-compact-pr0.s
index 9c0581a722e8..66fd4a4efeda 100644
--- a/test/MC/ARM/eh-compact-pr0.s
+++ b/test/MC/ARM/eh-compact-pr0.s
@@ -68,8 +68,8 @@ func2:
 @ RELOC:   Section {
 @ RELOC:     Name: .rel.ARM.exidx.TEST1
 @ RELOC:     Relocations [
-@ RELOC:       0x0 R_ARM_PREL31 .TEST1 0x0
 @ RELOC:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:       0x0 R_ARM_PREL31 .TEST1 0x0
 @ RELOC:     ]
 @ RELOC:   }
 
@@ -105,7 +105,7 @@ func2:
 @ RELOC:   Section {
 @ RELOC:     Name: .rel.ARM.exidx.TEST2
 @ RELOC:     Relocations [
-@ RELOC:       0x0 R_ARM_PREL31 .TEST2 0x0
 @ RELOC:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:       0x0 R_ARM_PREL31 .TEST2 0x0
 @ RELOC:     ]
 @ RELOC:   }
diff --git a/test/MC/ARM/eh-compact-pr1.s b/test/MC/ARM/eh-compact-pr1.s
index 17d32f834e3e..9f40593cf437 100644
--- a/test/MC/ARM/eh-compact-pr1.s
+++ b/test/MC/ARM/eh-compact-pr1.s
@@ -68,7 +68,7 @@ func1:
 @ will keep __aeabi_unwind_cpp_pr1.
 @-------------------------------------------------------------------------------
 @ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
 @ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ CHECK:       0x0 R_ARM_PREL31 .TEST1 0x0
 @ CHECK:       0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
 @ CHECK:     ]
diff --git a/test/MC/ARM/eh-directive-handlerdata.s b/test/MC/ARM/eh-directive-handlerdata.s
index 980a5f056760..c4352e5dee58 100644
--- a/test/MC/ARM/eh-directive-handlerdata.s
+++ b/test/MC/ARM/eh-directive-handlerdata.s
@@ -48,8 +48,8 @@ func1:
 @ RELOC: Section {
 @ RELOC:  Name: .rel.ARM.exidx.TEST1
 @ RELOC:  Relocations [
-@ RELOC:    0x0 R_ARM_PREL31 .TEST1 0x0
 @ RELOC:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:    0x0 R_ARM_PREL31 .TEST1 0x0
 @ RELOC:    0x4 R_ARM_PREL31 .ARM.extab.TEST1 0x0
 @ RELOC:  ]
 @ RELOC: }
@@ -108,8 +108,8 @@ func2:
 @ RELOC: Section {
 @ RELOC:  Name: .rel.ARM.exidx.TEST2
 @ RELOC:  Relocations [
-@ RELOC:    0x0 R_ARM_PREL31 .TEST2 0x0
 @ RELOC:    0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:    0x0 R_ARM_PREL31 .TEST2 0x0
 @ RELOC:    0x4 R_ARM_PREL31 .ARM.extab.TEST2 0x0
 @ RELOC:  ]
 @ RELOC: }
diff --git a/test/MC/ARM/eh-directive-personalityindex.s b/test/MC/ARM/eh-directive-personalityindex.s
index 6db942503c6d..5d537bb04d37 100644
--- a/test/MC/ARM/eh-directive-personalityindex.s
+++ b/test/MC/ARM/eh-directive-personalityindex.s
@@ -28,8 +28,8 @@ pr0:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr0
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr0 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr0 0x0
 @ RELOC:   ]
 @ RELOC: }
 
@@ -57,8 +57,8 @@ pr0_nontrivial:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr0.nontrivial
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr0.nontrivial 0x0
 @ RELOC:   ]
 @ RELOC: }
 
@@ -90,8 +90,8 @@ pr1:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr1
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr1 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr1 0x0
 @ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr1 0x0
 @ RELOC:   ]
 @ RELOC: }
@@ -127,8 +127,8 @@ pr1_nontrivial:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr1.nontrivial
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr1 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr1.nontrivial 0x0
 @ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr1.nontrivial 0x0
 @ RELOC:   ]
 @ RELOC: }
@@ -161,8 +161,8 @@ pr2:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr2
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr2 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr2 0x0
 @ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr2 0x0
 @ RELOC:   ]
 @ RELOC: }
@@ -196,8 +196,8 @@ pr2_nontrivial:
 @ RELOC: Section {
 @ RELOC:   Name: .rel.ARM.exidx.pr2.nontrivial
 @ RELOC:   Relocations [
-@ RELOC:     0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
 @ RELOC:     0x0 R_ARM_NONE __aeabi_unwind_cpp_pr2 0x0
+@ RELOC:     0x0 R_ARM_PREL31 .pr2.nontrivial 0x0
 @ RELOC:     0x4 R_ARM_PREL31 .ARM.extab.pr2.nontrivial 0x0
 @ RELOC:   ]
 @ RELOC: }
diff --git a/test/MC/ARM/eh-directive-section-comdat.s b/test/MC/ARM/eh-directive-section-comdat.s
index 9c7160ea5e74..8b7f32eaece9 100644
--- a/test/MC/ARM/eh-directive-section-comdat.s
+++ b/test/MC/ARM/eh-directive-section-comdat.s
@@ -53,8 +53,8 @@ func1:
 @ These are the section indexes of .TEST1, .ARM.extab.TEST1, .ARM.exidx.TEST1,
 @ .rel.ARM.extab.TEST1, and .rel.ARM.exidx.TEST1.
 @-------------------------------------------------------------------------------
-@ CHECK-NEXT:     0000: 01000000 06000000 07000000 08000000
-@ CHECK-NEXT:     0010: 09000000 0A000000
+@ CHECK-NEXT:     0000: 01000000 04000000 05000000 06000000
+@ CHECK-NEXT:     0010: 07000000 08000000
 @ CHECK-NEXT:     )
 @ CHECK:   }
 
@@ -63,7 +63,7 @@ func1:
 @ Check the .TEST1 section
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
-@ CHECK:     Index: 6
+@ CHECK:     Index: 4
 @ CHECK-NEXT:     Name: .TEST1
 @ CHECK:     Type: SHT_PROGBITS (0x1)
 @-------------------------------------------------------------------------------
@@ -81,7 +81,7 @@ func1:
 @ Check the .ARM.extab.TEST1 section
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
-@ CHECK:     Index: 7
+@ CHECK:     Index: 5
 @ CHECK-NEXT:     Name: .ARM.extab.TEST1
 @ CHECK:     Type: SHT_PROGBITS (0x1)
 @-------------------------------------------------------------------------------
@@ -94,7 +94,7 @@ func1:
 @ CHECK:   }
 
 @ CHECK:   Section {
-@ CHECK:     Index: 8
+@ CHECK:     Index: 6
 @ CHECK-NEXT:     Name: .rel.ARM.extab.TEST1
 @ CHECK: }
 
@@ -102,7 +102,7 @@ func1:
 @ Check the .ARM.exidx.TEST1 section
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
-@ CHECK:     Index: 9
+@ CHECK:     Index: 7
 @ CHECK-NEXT:     Name: .ARM.exidx.TEST1
 @ CHECK:     Type: SHT_ARM_EXIDX (0x70000001)
 @-------------------------------------------------------------------------------
@@ -113,12 +113,12 @@ func1:
 @ CHECK:       SHF_GROUP (0x200)
 @ CHECK:       SHF_LINK_ORDER (0x80)
 @ CHECK:     ]
-@ CHECK:     Link: 6
+@ CHECK:     Link: 4
 @ CHECK:   }
 
 
 @ CHECK:   Section {
-@ CHECK:     Index: 10
+@ CHECK:     Index: 8
 @ CHECK-NEXT:     Name: .rel.ARM.exidx.TEST1
 @ CHECK: }
 
diff --git a/test/MC/ARM/eh-directive-section-multiple-func.s b/test/MC/ARM/eh-directive-section-multiple-func.s
index e5307cf9190c..53c498ab87be 100644
--- a/test/MC/ARM/eh-directive-section-multiple-func.s
+++ b/test/MC/ARM/eh-directive-section-multiple-func.s
@@ -54,7 +54,7 @@ func2:
 @ Check the .TEST1 section.  There should be two "bx lr" instructions.
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
-@ CHECK:     Index: 5
+@ CHECK:     Index: 3
 @ CHECK-NEXT:     Name: .TEST1
 @ CHECK:     SectionData (
 @ CHECK:       0000: 1EFF2FE1 1EFF2FE1                    |../.../.|
@@ -87,7 +87,7 @@ func2:
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
 @ CHECK:     Name: .ARM.exidx.TEST1
-@ CHECK:     Link: 5
+@ CHECK:     Link: 3
 @-------------------------------------------------------------------------------
 @ The first word should be the offset to .TEST1.
 @ The second word should be the offset to .ARM.extab.TEST1
diff --git a/test/MC/ARM/eh-directive-section.s b/test/MC/ARM/eh-directive-section.s
index e36d9a99aaf5..bd41fbe5eeea 100644
--- a/test/MC/ARM/eh-directive-section.s
+++ b/test/MC/ARM/eh-directive-section.s
@@ -50,7 +50,7 @@ func2:
 @-------------------------------------------------------------------------------
 @ CHECK: Sections [
 @ CHECK:   Section {
-@ CHECK:     Index: 5
+@ CHECK:     Index: 3
 @ CHECK-NEXT:     Name: .TEST1
 @ CHECK:     SectionData (
 @ CHECK:       0000: 1EFF2FE1                             |../.|
@@ -84,7 +84,7 @@ func2:
 @-------------------------------------------------------------------------------
 @ This section should linked with .TEST1 section.
 @-------------------------------------------------------------------------------
-@ CHECK:     Link: 5
+@ CHECK:     Link: 3
 
 @-------------------------------------------------------------------------------
 @ The first word should be relocated to the code address in .TEST1 section.
@@ -109,7 +109,7 @@ func2:
 @ Check the TEST2 section (without the dot in the beginning)
 @-------------------------------------------------------------------------------
 @ CHECK:   Section {
-@ CHECK:     Index: 10
+@ CHECK:     Index: 8
 @ CHECK-NEXT:     Name: TEST2
 @ CHECK:     SectionData (
 @ CHECK:       0000: 1EFF2FE1                             |../.|
@@ -143,7 +143,7 @@ func2:
 @-------------------------------------------------------------------------------
 @ This section should linked with TEST2 section.
 @-------------------------------------------------------------------------------
-@ CHECK:     Link: 10
+@ CHECK:     Link: 8
 
 @-------------------------------------------------------------------------------
 @ The first word should be relocated to the code address in TEST2 section.
diff --git a/test/MC/ARM/eh-directive-text-section.s b/test/MC/ARM/eh-directive-text-section.s
index 32696d5a1dad..10ccdd54e501 100644
--- a/test/MC/ARM/eh-directive-text-section.s
+++ b/test/MC/ARM/eh-directive-text-section.s
@@ -77,6 +77,6 @@ func1:
 @ add an relocation to __aeabi_unwind_cpp_pr0.
 @-------------------------------------------------------------------------------
 @ CHECK:     Relocations [
-@ CHECK:       0x0 R_ARM_PREL31 .text 0x0
 @ CHECK:       0x0 R_ARM_NONE __aeabi_unwind_cpp_pr0 0x0
+@ CHECK:       0x0 R_ARM_PREL31 .text 0x0
 @ CHECK:     ]
diff --git a/test/MC/ARM/eh-link.s b/test/MC/ARM/eh-link.s
index e14fb0638b91..19fe14db5f30 100644
--- a/test/MC/ARM/eh-link.s
+++ b/test/MC/ARM/eh-link.s
@@ -8,7 +8,7 @@
 @ name first we could use a FileCheck variable.
 
 @ CHECK:      Section {
-@ CHECK:        Index: 6
+@ CHECK:        Index: 4
 @ CHECK-NEXT:   Name: .text
 @ CHECK-NEXT:   Type: SHT_PROGBITS
 @ CHECK-NEXT:   Flags [
@@ -25,7 +25,7 @@
 @ CHECK-NEXT:   EntrySize: 0
 @ CHECK-NEXT: }
 @ CHECK-NEXT: Section {
-@ CHECK-NEXT:   Index: 7
+@ CHECK-NEXT:   Index: 5
 @ CHECK-NEXT:   Name: .ARM.exidx
 @ CHECK-NEXT:   Type: SHT_ARM_EXIDX
 @ CHECK-NEXT:   Flags [
@@ -36,14 +36,14 @@
 @ CHECK-NEXT:   Address: 0x0
 @ CHECK-NEXT:   Offset:
 @ CHECK-NEXT:   Size: 8
-@ CHECK-NEXT:   Link: 6
+@ CHECK-NEXT:   Link: 4
 @ CHECK-NEXT:   Info: 0
 @ CHECK-NEXT:   AddressAlignment: 4
 @ CHECK-NEXT:   EntrySize: 0
 @ CHECK-NEXT: }
 
 @ CHECK:      Section {
-@ CHECK:        Index: 10
+@ CHECK:        Index: 8
 @ CHECK-NEXT:   Name: .text
 @ CHECK-NEXT:   Type: SHT_PROGBITS
 @ CHECK-NEXT:   Flags [
@@ -60,7 +60,7 @@
 @ CHECK-NEXT:   EntrySize: 0
 @ CHECK-NEXT: }
 @ CHECK-NEXT: Section {
-@ CHECK-NEXT:   Index: 11
+@ CHECK-NEXT:   Index: 9
 @ CHECK-NEXT:   Name: .ARM.exidx
 @ CHECK-NEXT:   Type: SHT_ARM_EXIDX
 @ CHECK-NEXT:   Flags [
@@ -71,7 +71,7 @@
 @ CHECK-NEXT:   Address: 0x0
 @ CHECK-NEXT:   Offset:
 @ CHECK-NEXT:   Size: 8
-@ CHECK-NEXT:   Link: 10
+@ CHECK-NEXT:   Link: 8
 @ CHECK-NEXT:   Info: 0
 @ CHECK-NEXT:   AddressAlignment: 4
 @ CHECK-NEXT:   EntrySize: 0
diff --git a/test/MC/ARM/error-location-ldr-pseudo.s b/test/MC/ARM/error-location-ldr-pseudo.s
new file mode 100644
index 000000000000..b5cdcad72597
--- /dev/null
+++ b/test/MC/ARM/error-location-ldr-pseudo.s
@@ -0,0 +1,5 @@
+@ RUN: not llvm-mc -triple armv7a--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+  .text
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+  ldr r0, =(-undef)
diff --git a/test/MC/ARM/error-location.s b/test/MC/ARM/error-location.s
new file mode 100644
index 000000000000..112acf318ed3
--- /dev/null
+++ b/test/MC/ARM/error-location.s
@@ -0,0 +1,49 @@
+@ RUN: not llvm-mc -triple armv7a--none-eabi -filetype obj < %s -o /dev/null 2>&1 | FileCheck %s
+
+@ Note: These errors are not always emitted in the order in which the relevant
+@ source appears, this file is carefully ordered so that that is the case.
+
+  .text
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: symbol 'undef' can not be undefined in a subtraction expression
+  .word (0-undef)
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: expected relocatable expression
+  .word -undef
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: No relocation available to represent this relative expression
+  adr r0, #a-undef
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a difference across sections
+  .word x_a - y_a
+
+@ CHECK: :[[@LINE+1]]:{{[0-9]+}}: error: Cannot represent a subtraction with a weak symbol
+  .word a - w
+
+@ CHECK: <unknown>:0: error: expression could not be evaluated
+  .set v1, -undef
+
+  .comm common, 4
+@ CHECK: <unknown>:0: error: Common symbol 'common' cannot be used in assignment expr
+  .set v3, common
+
+@ CHECK: <unknown>:0: error: Undefined temporary symbol
+  .word 5f
+
+@ CHECK: <unknown>:0: error: symbol 'undef' could not be evaluated in a subtraction expression
+  .set v2, a-undef
+
+
+
+w:
+  .word 0
+  .weak w
+
+
+  .section sec_x
+x_a:
+  .word 0
+
+
+  .section sec_y
+y_a:
+  .word 0
diff --git a/test/MC/ARM/fullfp16-neon-neg.s b/test/MC/ARM/fullfp16-neon-neg.s
new file mode 100644
index 000000000000..1928163db74b
--- /dev/null
+++ b/test/MC/ARM/fullfp16-neon-neg.s
@@ -0,0 +1,289 @@
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,-neon -show-encoding < %s 2>&1 | FileCheck %s
+
+  vadd.f16 d0, d1, d2
+  vadd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vsub.f16 d0, d1, d2
+  vsub.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmul.f16 d0, d1, d2
+  vmul.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmul.f16 d1, d2, d3[2]
+  vmul.f16 q4, q5, d6[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmla.f16 d0, d1, d2
+  vmla.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmla.f16 d5, d6, d7[2]
+  vmla.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmls.f16 d0, d1, d2
+  vmls.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmls.f16 d5, d6, d7[2]
+  vmls.f16 q5, q6, d7[3]
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vfma.f16 d0, d1, d2
+  vfma.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vfms.f16 d0, d1, d2
+  vfms.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vceq.f16 d2, d3, d4
+  vceq.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vceq.f16 d2, d3, #0
+  vceq.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcge.f16 d2, d3, d4
+  vcge.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcge.f16 d2, d3, #0
+  vcge.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcgt.f16 d2, d3, d4
+  vcgt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcgt.f16 d2, d3, #0
+  vcgt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcle.f16 d2, d3, d4
+  vcle.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcle.f16 d2, d3, #0
+  vcle.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vclt.f16 d2, d3, d4
+  vclt.f16 q2, q3, q4
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vclt.f16 d2, d3, #0
+  vclt.f16 q2, q3, #0
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacge.f16 d0, d1, d2
+  vacge.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacgt.f16 d0, d1, d2
+  vacgt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vacle.f16 d0, d1, d2
+  vacle.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vaclt.f16 d0, d1, d2
+  vaclt.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vabd.f16 d0, d1, d2
+  vabd.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vabs.f16 d0, d1
+  vabs.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmax.f16 d0, d1, d2
+  vmax.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmin.f16 d0, d1, d2
+  vmin.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vmaxnm.f16 d0, d1, d2
+  vmaxnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vminnm.f16 d0, d1, d2
+  vminnm.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vpadd.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vpmax.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vpmin.f16 d0, d1, d2
+@ CHECK: error: instruction requires:
+
+  vrecpe.f16 d0, d1
+  vrecpe.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrecps.f16 d0, d1, d2
+  vrecps.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrsqrte.f16 d0, d1
+  vrsqrte.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrsqrts.f16 d0, d1, d2
+  vrsqrts.f16 q0, q1, q2
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vneg.f16 d0, d1
+  vneg.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvt.s16.f16 d0, d1
+  vcvt.u16.f16 d0, d1
+  vcvt.f16.s16 d0, d1
+  vcvt.f16.u16 d0, d1
+  vcvt.s16.f16 q0, q1
+  vcvt.u16.f16 q0, q1
+  vcvt.f16.s16 q0, q1
+  vcvt.f16.u16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvta.s16.f16 d0, d1
+  vcvta.s16.f16 q0, q1
+  vcvta.u16.f16 d0, d1
+  vcvta.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtm.s16.f16 d0, d1
+  vcvtm.s16.f16 q0, q1
+  vcvtm.u16.f16 d0, d1
+  vcvtm.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtn.s16.f16 d0, d1
+  vcvtn.s16.f16 q0, q1
+  vcvtn.u16.f16 d0, d1
+  vcvtn.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vcvtp.s16.f16 d0, d1
+  vcvtp.s16.f16 q0, q1
+  vcvtp.u16.f16 d0, d1
+  vcvtp.u16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+
+  vcvt.s16.f16 d0, d1, #1
+  vcvt.u16.f16 d0, d1, #2
+  vcvt.f16.s16 d0, d1, #3
+  vcvt.f16.u16 d0, d1, #4
+  vcvt.s16.f16 q0, q1, #5
+  vcvt.u16.f16 q0, q1, #6
+  vcvt.f16.s16 q0, q1, #7
+  vcvt.f16.u16 q0, q1, #8
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrinta.f16.f16 d0, d1
+  vrinta.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintm.f16.f16 d0, d1
+  vrintm.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintn.f16.f16 d0, d1
+  vrintn.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintp.f16.f16 d0, d1
+  vrintp.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintx.f16.f16 d0, d1
+  vrintx.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
+
+  vrintz.f16.f16 d0, d1
+  vrintz.f16.f16 q0, q1
+@ CHECK: error: instruction requires:
+@ CHECK: error: instruction requires:
diff --git a/test/MC/ARM/fullfp16-neon.s b/test/MC/ARM/fullfp16-neon.s
new file mode 100644
index 000000000000..32a33720c13b
--- /dev/null
+++ b/test/MC/ARM/fullfp16-neon.s
@@ -0,0 +1,404 @@
+@ RUN: llvm-mc -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=ARM
+@ RUN: llvm-mc -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s | FileCheck %s --check-prefix=THUMB
+
+  vadd.f16 d0, d1, d2
+  vadd.f16 q0, q1, q2
+@ ARM:   vadd.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x11,0xf2]
+@ ARM:   vadd.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x12,0xf2]
+@ THUMB: vadd.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x02,0x0d]
+@ THUMB: vadd.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x44,0x0d]
+
+  vsub.f16 d0, d1, d2
+  vsub.f16 q0, q1, q2
+@ ARM:   vsub.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x31,0xf2]
+@ ARM:   vsub.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x32,0xf2]
+@ THUMB: vsub.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x02,0x0d]
+@ THUMB: vsub.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0d]
+
+  vmul.f16 d0, d1, d2
+  vmul.f16 q0, q1, q2
+@ ARM:   vmul.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x11,0xf3]
+@ ARM:   vmul.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x12,0xf3]
+@ THUMB: vmul.f16        d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0d]
+@ THUMB: vmul.f16        q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0d]
+
+  vmul.f16 d1, d2, d3[2]
+  vmul.f16 q4, q5, d6[3]
+@ ARM:   vmul.f16        d1, d2, d3[2]   @ encoding: [0x63,0x19,0x92,0xf2]
+@ ARM:   vmul.f16        q4, q5, d6[3]   @ encoding: [0x6e,0x89,0x9a,0xf3]
+@ THUMB: vmul.f16        d1, d2, d3[2]   @ encoding: [0x92,0xef,0x63,0x19]
+@ THUMB: vmul.f16        q4, q5, d6[3]   @ encoding: [0x9a,0xff,0x6e,0x89]
+
+  vmla.f16 d0, d1, d2
+  vmla.f16 q0, q1, q2
+@ ARM:   vmla.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x11,0xf2]
+@ ARM:   vmla.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x12,0xf2]
+@ THUMB: vmla.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0d]
+@ THUMB: vmla.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0d]
+
+  vmla.f16 d5, d6, d7[2]
+  vmla.f16 q5, q6, d7[3]
+@ ARM:   vmla.f16        d5, d6, d7[2]   @ encoding: [0x67,0x51,0x96,0xf2]
+@ ARM:   vmla.f16        q5, q6, d7[3]   @ encoding: [0x6f,0xa1,0x9c,0xf3]
+@ THUMB: vmla.f16        d5, d6, d7[2]   @ encoding: [0x96,0xef,0x67,0x51]
+@ THUMB: vmla.f16        q5, q6, d7[3]   @ encoding: [0x9c,0xff,0x6f,0xa1]
+
+  vmls.f16 d0, d1, d2
+  vmls.f16 q0, q1, q2
+@ ARM:   vmls.f16        d0, d1, d2      @ encoding: [0x12,0x0d,0x31,0xf2]
+@ ARM:   vmls.f16        q0, q1, q2      @ encoding: [0x54,0x0d,0x32,0xf2]
+@ THUMB: vmls.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0d]
+@ THUMB: vmls.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0d]
+
+  vmls.f16 d5, d6, d7[2]
+  vmls.f16 q5, q6, d7[3]
+@ ARM:   vmls.f16        d5, d6, d7[2]   @ encoding: [0x67,0x55,0x96,0xf2]
+@ ARM:   vmls.f16        q5, q6, d7[3]   @ encoding: [0x6f,0xa5,0x9c,0xf3]
+@ THUMB: vmls.f16        d5, d6, d7[2]   @ encoding: [0x96,0xef,0x67,0x55]
+@ THUMB: vmls.f16        q5, q6, d7[3]   @ encoding: [0x9c,0xff,0x6f,0xa5]
+
+  vfma.f16 d0, d1, d2
+  vfma.f16 q0, q1, q2
+@ ARM:   vfma.f16        d0, d1, d2      @ encoding: [0x12,0x0c,0x11,0xf2]
+@ ARM:   vfma.f16        q0, q1, q2      @ encoding: [0x54,0x0c,0x12,0xf2]
+@ THUMB: vfma.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0c]
+@ THUMB: vfma.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0c]
+
+  vfms.f16 d0, d1, d2
+  vfms.f16 q0, q1, q2
+@ ARM:   vfms.f16        d0, d1, d2      @ encoding: [0x12,0x0c,0x31,0xf2]
+@ ARM:   vfms.f16        q0, q1, q2      @ encoding: [0x54,0x0c,0x32,0xf2]
+@ THUMB: vfms.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0c]
+@ THUMB: vfms.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0c]
+
+  vceq.f16 d2, d3, d4
+  vceq.f16 q2, q3, q4
+@ ARM:   vceq.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x13,0xf2]
+@ ARM:   vceq.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x16,0xf2]
+@ THUMB: vceq.f16        d2, d3, d4      @ encoding: [0x13,0xef,0x04,0x2e]
+@ THUMB: vceq.f16        q2, q3, q4      @ encoding: [0x16,0xef,0x48,0x4e]
+
+  vceq.f16 d2, d3, #0
+  vceq.f16 q2, q3, #0
+@ ARM:   vceq.f16        d2, d3, #0      @ encoding: [0x03,0x25,0xb5,0xf3]
+@ ARM:   vceq.f16        q2, q3, #0      @ encoding: [0x46,0x45,0xb5,0xf3]
+@ THUMB: vceq.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x25]
+@ THUMB: vceq.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x45]
+
+  vcge.f16 d2, d3, d4
+  vcge.f16 q2, q3, q4
+@ ARM:   vcge.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x13,0xf3]
+@ ARM:   vcge.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x16,0xf3]
+@ THUMB: vcge.f16        d2, d3, d4      @ encoding: [0x13,0xff,0x04,0x2e]
+@ THUMB: vcge.f16        q2, q3, q4      @ encoding: [0x16,0xff,0x48,0x4e]
+
+  vcge.f16 d2, d3, #0
+  vcge.f16 q2, q3, #0
+@ ARM:   vcge.f16        d2, d3, #0      @ encoding: [0x83,0x24,0xb5,0xf3]
+@ ARM:   vcge.f16        q2, q3, #0      @ encoding: [0xc6,0x44,0xb5,0xf3]
+@ THUMB: vcge.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x83,0x24]
+@ THUMB: vcge.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0xc6,0x44]
+
+  vcgt.f16 d2, d3, d4
+  vcgt.f16 q2, q3, q4
+@ ARM:   vcgt.f16        d2, d3, d4      @ encoding: [0x04,0x2e,0x33,0xf3]
+@ ARM:   vcgt.f16        q2, q3, q4      @ encoding: [0x48,0x4e,0x36,0xf3]
+@ THUMB: vcgt.f16        d2, d3, d4      @ encoding: [0x33,0xff,0x04,0x2e]
+@ THUMB: vcgt.f16        q2, q3, q4      @ encoding: [0x36,0xff,0x48,0x4e]
+
+  vcgt.f16 d2, d3, #0
+  vcgt.f16 q2, q3, #0
+@ ARM:   vcgt.f16        d2, d3, #0      @ encoding: [0x03,0x24,0xb5,0xf3]
+@ ARM:   vcgt.f16        q2, q3, #0      @ encoding: [0x46,0x44,0xb5,0xf3]
+@ THUMB: vcgt.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x24]
+@ THUMB: vcgt.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x44]
+
+  vcle.f16 d2, d3, d4
+  vcle.f16 q2, q3, q4
+@ ARM:   vcge.f16        d2, d4, d3      @ encoding: [0x03,0x2e,0x14,0xf3]
+@ ARM:   vcge.f16        q2, q4, q3      @ encoding: [0x46,0x4e,0x18,0xf3]
+@ THUMB: vcge.f16        d2, d4, d3      @ encoding: [0x14,0xff,0x03,0x2e]
+@ THUMB: vcge.f16        q2, q4, q3      @ encoding: [0x18,0xff,0x46,0x4e]
+
+  vcle.f16 d2, d3, #0
+  vcle.f16 q2, q3, #0
+@ ARM:   vcle.f16        d2, d3, #0      @ encoding: [0x83,0x25,0xb5,0xf3]
+@ ARM:   vcle.f16        q2, q3, #0      @ encoding: [0xc6,0x45,0xb5,0xf3]
+@ THUMB: vcle.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x83,0x25]
+@ THUMB: vcle.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0xc6,0x45]
+
+  vclt.f16 d2, d3, d4
+  vclt.f16 q2, q3, q4
+@ ARM:   vcgt.f16        d2, d4, d3      @ encoding: [0x03,0x2e,0x34,0xf3]
+@ ARM:   vcgt.f16        q2, q4, q3      @ encoding: [0x46,0x4e,0x38,0xf3]
+@ THUMB: vcgt.f16        d2, d4, d3      @ encoding: [0x34,0xff,0x03,0x2e]
+@ THUMB: vcgt.f16        q2, q4, q3      @ encoding: [0x38,0xff,0x46,0x4e]
+
+  vclt.f16 d2, d3, #0
+  vclt.f16 q2, q3, #0
+@ ARM:   vclt.f16        d2, d3, #0      @ encoding: [0x03,0x26,0xb5,0xf3]
+@ ARM:   vclt.f16        q2, q3, #0      @ encoding: [0x46,0x46,0xb5,0xf3]
+@ THUMB: vclt.f16        d2, d3, #0      @ encoding: [0xb5,0xff,0x03,0x26]
+@ THUMB: vclt.f16        q2, q3, #0      @ encoding: [0xb5,0xff,0x46,0x46]
+
+  vacge.f16 d0, d1, d2
+  vacge.f16 q0, q1, q2
+@ ARM:   vacge.f16       d0, d1, d2      @ encoding: [0x12,0x0e,0x11,0xf3]
+@ ARM:   vacge.f16       q0, q1, q2      @ encoding: [0x54,0x0e,0x12,0xf3]
+@ THUMB: vacge.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0e]
+@ THUMB: vacge.f16       q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0e]
+
+  vacgt.f16 d0, d1, d2
+  vacgt.f16 q0, q1, q2
+@ ARM:   vacgt.f16       d0, d1, d2      @ encoding: [0x12,0x0e,0x31,0xf3]
+@ ARM:   vacgt.f16       q0, q1, q2      @ encoding: [0x54,0x0e,0x32,0xf3]
+@ THUMB: vacgt.f16       d0, d1, d2      @ encoding: [0x31,0xff,0x12,0x0e]
+@ THUMB: vacgt.f16       q0, q1, q2      @ encoding: [0x32,0xff,0x54,0x0e]
+
+  vacle.f16 d0, d1, d2
+  vacle.f16 q0, q1, q2
+@ ARM:   vacge.f16       d0, d2, d1      @ encoding: [0x11,0x0e,0x12,0xf3]
+@ ARM:   vacge.f16       q0, q2, q1      @ encoding: [0x52,0x0e,0x14,0xf3]
+@ THUMB: vacge.f16       d0, d2, d1      @ encoding: [0x12,0xff,0x11,0x0e]
+@ THUMB: vacge.f16       q0, q2, q1      @ encoding: [0x14,0xff,0x52,0x0e]
+
+  vaclt.f16 d0, d1, d2
+  vaclt.f16 q0, q1, q2
+@ ARM:   vacgt.f16       d0, d2, d1      @ encoding: [0x11,0x0e,0x32,0xf3]
+@ ARM:   vacgt.f16       q0, q2, q1      @ encoding: [0x52,0x0e,0x34,0xf3]
+@ THUMB: vacgt.f16       d0, d2, d1      @ encoding: [0x32,0xff,0x11,0x0e]
+@ THUMB: vacgt.f16       q0, q2, q1      @ encoding: [0x34,0xff,0x52,0x0e]
+
+  vabd.f16 d0, d1, d2
+  vabd.f16 q0, q1, q2
+@ ARM:   vabd.f16        d0, d1, d2      @ encoding: [0x02,0x0d,0x31,0xf3]
+@ ARM:   vabd.f16        q0, q1, q2      @ encoding: [0x44,0x0d,0x32,0xf3]
+@ THUMB: vabd.f16        d0, d1, d2      @ encoding: [0x31,0xff,0x02,0x0d]
+@ THUMB: vabd.f16        q0, q1, q2      @ encoding: [0x32,0xff,0x44,0x0d]
+
+  vabs.f16 d0, d1
+  vabs.f16 q0, q1
+@ ARM:   vabs.f16        d0, d1          @ encoding: [0x01,0x07,0xb5,0xf3]
+@ ARM:   vabs.f16        q0, q1          @ encoding: [0x42,0x07,0xb5,0xf3]
+@ THUMB: vabs.f16        d0, d1          @ encoding: [0xb5,0xff,0x01,0x07]
+@ THUMB: vabs.f16        q0, q1          @ encoding: [0xb5,0xff,0x42,0x07]
+
+  vmax.f16 d0, d1, d2
+  vmax.f16 q0, q1, q2
+@ ARM:   vmax.f16        d0, d1, d2      @ encoding: [0x02,0x0f,0x11,0xf2]
+@ ARM:   vmax.f16        q0, q1, q2      @ encoding: [0x44,0x0f,0x12,0xf2]
+@ THUMB: vmax.f16        d0, d1, d2      @ encoding: [0x11,0xef,0x02,0x0f]
+@ THUMB: vmax.f16        q0, q1, q2      @ encoding: [0x12,0xef,0x44,0x0f]
+
+  vmin.f16 d0, d1, d2
+  vmin.f16 q0, q1, q2
+@ ARM:   vmin.f16        d0, d1, d2      @ encoding: [0x02,0x0f,0x31,0xf2]
+@ ARM:   vmin.f16        q0, q1, q2      @ encoding: [0x44,0x0f,0x32,0xf2]
+@ THUMB: vmin.f16        d0, d1, d2      @ encoding: [0x31,0xef,0x02,0x0f]
+@ THUMB: vmin.f16        q0, q1, q2      @ encoding: [0x32,0xef,0x44,0x0f]
+
+  vmaxnm.f16 d0, d1, d2
+  vmaxnm.f16 q0, q1, q2
+@ ARM:   vmaxnm.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x11,0xf3]
+@ ARM:   vmaxnm.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x12,0xf3]
+@ THUMB: vmaxnm.f16      d0, d1, d2      @ encoding: [0x11,0xff,0x12,0x0f]
+@ THUMB: vmaxnm.f16      q0, q1, q2      @ encoding: [0x12,0xff,0x54,0x0f]
+
+  vminnm.f16 d0, d1, d2
+  vminnm.f16 q0, q1, q2
+@ ARM:   vminnm.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x31,0xf3]
+@ ARM:   vminnm.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x32,0xf3]
+@ THUMB: vminnm.f16      d0, d1, d2      @ encoding: [0x31,0xff,0x12,0x0f]
+@ THUMB: vminnm.f16      q0, q1, q2      @ encoding: [0x32,0xff,0x54,0x0f]
+
+  vpadd.f16 d0, d1, d2
+@ ARM:   vpadd.f16       d0, d1, d2      @ encoding: [0x02,0x0d,0x11,0xf3]
+@ THUMB: vpadd.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x02,0x0d]
+
+  vpmax.f16 d0, d1, d2
+@ ARM:   vpmax.f16       d0, d1, d2      @ encoding: [0x02,0x0f,0x11,0xf3]
+@ THUMB: vpmax.f16       d0, d1, d2      @ encoding: [0x11,0xff,0x02,0x0f]
+
+  vpmin.f16 d0, d1, d2
+@ ARM:   vpmin.f16       d0, d1, d2      @ encoding: [0x02,0x0f,0x31,0xf3]
+@ THUMB: vpmin.f16       d0, d1, d2      @ encoding: [0x31,0xff,0x02,0x0f]
+
+  vrecpe.f16 d0, d1
+  vrecpe.f16 q0, q1
+@ ARM:   vrecpe.f16      d0, d1          @ encoding: [0x01,0x05,0xb7,0xf3]
+@ ARM:   vrecpe.f16      q0, q1          @ encoding: [0x42,0x05,0xb7,0xf3]
+@ THUMB: vrecpe.f16      d0, d1          @ encoding: [0xb7,0xff,0x01,0x05]
+@ THUMB: vrecpe.f16      q0, q1          @ encoding: [0xb7,0xff,0x42,0x05]
+
+  vrecps.f16 d0, d1, d2
+  vrecps.f16 q0, q1, q2
+@ ARM:   vrecps.f16      d0, d1, d2      @ encoding: [0x12,0x0f,0x11,0xf2]
+@ ARM:   vrecps.f16      q0, q1, q2      @ encoding: [0x54,0x0f,0x12,0xf2]
+@ THUMB: vrecps.f16      d0, d1, d2      @ encoding: [0x11,0xef,0x12,0x0f]
+@ THUMB: vrecps.f16      q0, q1, q2      @ encoding: [0x12,0xef,0x54,0x0f]
+
+  vrsqrte.f16 d0, d1
+  vrsqrte.f16 q0, q1
+@ ARM:   vrsqrte.f16     d0, d1          @ encoding: [0x81,0x05,0xb7,0xf3]
+@ ARM:   vrsqrte.f16     q0, q1          @ encoding: [0xc2,0x05,0xb7,0xf3]
+@ THUMB: vrsqrte.f16     d0, d1          @ encoding: [0xb7,0xff,0x81,0x05]
+@ THUMB: vrsqrte.f16     q0, q1          @ encoding: [0xb7,0xff,0xc2,0x05]
+
+  vrsqrts.f16 d0, d1, d2
+  vrsqrts.f16 q0, q1, q2
+@ ARM:   vrsqrts.f16     d0, d1, d2      @ encoding: [0x12,0x0f,0x31,0xf2]
+@ ARM:   vrsqrts.f16     q0, q1, q2      @ encoding: [0x54,0x0f,0x32,0xf2]
+@ THUMB: vrsqrts.f16     d0, d1, d2      @ encoding: [0x31,0xef,0x12,0x0f]
+@ THUMB: vrsqrts.f16     q0, q1, q2      @ encoding: [0x32,0xef,0x54,0x0f]
+
+  vneg.f16 d0, d1
+  vneg.f16 q0, q1
+@ ARM:   vneg.f16        d0, d1          @ encoding: [0x81,0x07,0xb5,0xf3]
+@ ARM:   vneg.f16        q0, q1          @ encoding: [0xc2,0x07,0xb5,0xf3]
+@ THUMB: vneg.f16        d0, d1          @ encoding: [0xb5,0xff,0x81,0x07]
+@ THUMB: vneg.f16        q0, q1          @ encoding: [0xb5,0xff,0xc2,0x07]
+
+  vcvt.s16.f16 d0, d1
+  vcvt.u16.f16 d0, d1
+  vcvt.f16.s16 d0, d1
+  vcvt.f16.u16 d0, d1
+  vcvt.s16.f16 q0, q1
+  vcvt.u16.f16 q0, q1
+  vcvt.f16.s16 q0, q1
+  vcvt.f16.u16 q0, q1
+@ ARM:   vcvt.s16.f16    d0, d1          @ encoding: [0x01,0x07,0xb7,0xf3]
+@ ARM:   vcvt.u16.f16    d0, d1          @ encoding: [0x81,0x07,0xb7,0xf3]
+@ ARM:   vcvt.f16.s16    d0, d1          @ encoding: [0x01,0x06,0xb7,0xf3]
+@ ARM:   vcvt.f16.u16    d0, d1          @ encoding: [0x81,0x06,0xb7,0xf3]
+@ ARM:   vcvt.s16.f16    q0, q1          @ encoding: [0x42,0x07,0xb7,0xf3]
+@ ARM:   vcvt.u16.f16    q0, q1          @ encoding: [0xc2,0x07,0xb7,0xf3]
+@ ARM:   vcvt.f16.s16    q0, q1          @ encoding: [0x42,0x06,0xb7,0xf3]
+@ ARM:   vcvt.f16.u16    q0, q1          @ encoding: [0xc2,0x06,0xb7,0xf3]
+@ THUMB: vcvt.s16.f16    d0, d1          @ encoding: [0xb7,0xff,0x01,0x07]
+@ THUMB: vcvt.u16.f16    d0, d1          @ encoding: [0xb7,0xff,0x81,0x07]
+@ THUMB: vcvt.f16.s16    d0, d1          @ encoding: [0xb7,0xff,0x01,0x06]
+@ THUMB: vcvt.f16.u16    d0, d1          @ encoding: [0xb7,0xff,0x81,0x06]
+@ THUMB: vcvt.s16.f16    q0, q1          @ encoding: [0xb7,0xff,0x42,0x07]
+@ THUMB: vcvt.u16.f16    q0, q1          @ encoding: [0xb7,0xff,0xc2,0x07]
+@ THUMB: vcvt.f16.s16    q0, q1          @ encoding: [0xb7,0xff,0x42,0x06]
+@ THUMB: vcvt.f16.u16    q0, q1          @ encoding: [0xb7,0xff,0xc2,0x06]
+
+  vcvta.s16.f16 d0, d1
+  vcvta.s16.f16 q0, q1
+  vcvta.u16.f16 d0, d1
+  vcvta.u16.f16 q0, q1
+@ ARM:   vcvta.s16.f16   d0, d1          @ encoding: [0x01,0x00,0xb7,0xf3]
+@ ARM:   vcvta.s16.f16   q0, q1          @ encoding: [0x42,0x00,0xb7,0xf3]
+@ ARM:   vcvta.u16.f16   d0, d1          @ encoding: [0x81,0x00,0xb7,0xf3]
+@ ARM:   vcvta.u16.f16   q0, q1          @ encoding: [0xc2,0x00,0xb7,0xf3]
+@ THUMB: vcvta.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x00]
+@ THUMB: vcvta.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x00]
+@ THUMB: vcvta.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x00]
+@ THUMB: vcvta.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x00]
+
+  vcvtm.s16.f16 d0, d1
+  vcvtm.s16.f16 q0, q1
+  vcvtm.u16.f16 d0, d1
+  vcvtm.u16.f16 q0, q1
+@ ARM:   vcvtm.s16.f16   d0, d1          @ encoding: [0x01,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.s16.f16   q0, q1          @ encoding: [0x42,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.u16.f16   d0, d1          @ encoding: [0x81,0x03,0xb7,0xf3]
+@ ARM:   vcvtm.u16.f16   q0, q1          @ encoding: [0xc2,0x03,0xb7,0xf3]
+@ THUMB: vcvtm.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x03]
+@ THUMB: vcvtm.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x03]
+@ THUMB: vcvtm.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x03]
+@ THUMB: vcvtm.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x03]
+
+  vcvtn.s16.f16 d0, d1
+  vcvtn.s16.f16 q0, q1
+  vcvtn.u16.f16 d0, d1
+  vcvtn.u16.f16 q0, q1
+@ ARM:   vcvtn.s16.f16   d0, d1          @ encoding: [0x01,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.s16.f16   q0, q1          @ encoding: [0x42,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.u16.f16   d0, d1          @ encoding: [0x81,0x01,0xb7,0xf3]
+@ ARM:   vcvtn.u16.f16   q0, q1          @ encoding: [0xc2,0x01,0xb7,0xf3]
+@ THUMB: vcvtn.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x01]
+@ THUMB: vcvtn.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x01]
+@ THUMB: vcvtn.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x01]
+@ THUMB: vcvtn.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x01]
+
+  vcvtp.s16.f16 d0, d1
+  vcvtp.s16.f16 q0, q1
+  vcvtp.u16.f16 d0, d1
+  vcvtp.u16.f16 q0, q1
+@ ARM:   vcvtp.s16.f16   d0, d1          @ encoding: [0x01,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.s16.f16   q0, q1          @ encoding: [0x42,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.u16.f16   d0, d1          @ encoding: [0x81,0x02,0xb7,0xf3]
+@ ARM:   vcvtp.u16.f16   q0, q1          @ encoding: [0xc2,0x02,0xb7,0xf3]
+@ THUMB: vcvtp.s16.f16   d0, d1          @ encoding: [0xb7,0xff,0x01,0x02]
+@ THUMB: vcvtp.s16.f16   q0, q1          @ encoding: [0xb7,0xff,0x42,0x02]
+@ THUMB: vcvtp.u16.f16   d0, d1          @ encoding: [0xb7,0xff,0x81,0x02]
+@ THUMB: vcvtp.u16.f16   q0, q1          @ encoding: [0xb7,0xff,0xc2,0x02]
+
+
+  vcvt.s16.f16 d0, d1, #1
+  vcvt.u16.f16 d0, d1, #2
+  vcvt.f16.s16 d0, d1, #3
+  vcvt.f16.u16 d0, d1, #4
+  vcvt.s16.f16 q0, q1, #5
+  vcvt.u16.f16 q0, q1, #6
+  vcvt.f16.s16 q0, q1, #7
+  vcvt.f16.u16 q0, q1, #8
+@ ARM:   vcvt.s16.f16    d0, d1, #1      @ encoding: [0x11,0x0d,0xbf,0xf2]
+@ ARM:   vcvt.u16.f16    d0, d1, #2      @ encoding: [0x11,0x0d,0xbe,0xf3]
+@ ARM:   vcvt.f16.s16    d0, d1, #3      @ encoding: [0x11,0x0c,0xbd,0xf2]
+@ ARM:   vcvt.f16.u16    d0, d1, #4      @ encoding: [0x11,0x0c,0xbc,0xf3]
+@ ARM:   vcvt.s16.f16    q0, q1, #5      @ encoding: [0x52,0x0d,0xbb,0xf2]
+@ ARM:   vcvt.u16.f16    q0, q1, #6      @ encoding: [0x52,0x0d,0xba,0xf3]
+@ ARM:   vcvt.f16.s16    q0, q1, #7      @ encoding: [0x52,0x0c,0xb9,0xf2]
+@ ARM:   vcvt.f16.u16    q0, q1, #8      @ encoding: [0x52,0x0c,0xb8,0xf3]
+@ THUMB: vcvt.s16.f16    d0, d1, #1      @ encoding: [0xbf,0xef,0x11,0x0d]
+@ THUMB: vcvt.u16.f16    d0, d1, #2      @ encoding: [0xbe,0xff,0x11,0x0d]
+@ THUMB: vcvt.f16.s16    d0, d1, #3      @ encoding: [0xbd,0xef,0x11,0x0c]
+@ THUMB: vcvt.f16.u16    d0, d1, #4      @ encoding: [0xbc,0xff,0x11,0x0c]
+@ THUMB: vcvt.s16.f16    q0, q1, #5      @ encoding: [0xbb,0xef,0x52,0x0d]
+@ THUMB: vcvt.u16.f16    q0, q1, #6      @ encoding: [0xba,0xff,0x52,0x0d]
+@ THUMB: vcvt.f16.s16    q0, q1, #7      @ encoding: [0xb9,0xef,0x52,0x0c]
+@ THUMB: vcvt.f16.u16    q0, q1, #8      @ encoding: [0xb8,0xff,0x52,0x0c]
+
+  vrinta.f16.f16 d0, d1
+  vrinta.f16.f16 q0, q1
+@ ARM:   vrinta.f16      d0, d1          @ encoding: [0x01,0x05,0xb6,0xf3]
+@ ARM:   vrinta.f16      q0, q1          @ encoding: [0x42,0x05,0xb6,0xf3]
+@ THUMB: vrinta.f16      d0, d1          @ encoding: [0xb6,0xff,0x01,0x05]
+@ THUMB: vrinta.f16      q0, q1          @ encoding: [0xb6,0xff,0x42,0x05]
+
+  vrintm.f16.f16 d0, d1
+  vrintm.f16.f16 q0, q1
+@ ARM:   vrintm.f16      d0, d1          @ encoding: [0x81,0x06,0xb6,0xf3]
+@ ARM:   vrintm.f16      q0, q1          @ encoding: [0xc2,0x06,0xb6,0xf3]
+@ THUMB: vrintm.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x06]
+@ THUMB: vrintm.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x06]
+
+  vrintn.f16.f16 d0, d1
+  vrintn.f16.f16 q0, q1
+@ ARM:   vrintn.f16      d0, d1          @ encoding: [0x01,0x04,0xb6,0xf3]
+@ ARM:   vrintn.f16      q0, q1          @ encoding: [0x42,0x04,0xb6,0xf3]
+@ THUMB: vrintn.f16      d0, d1          @ encoding: [0xb6,0xff,0x01,0x04]
+@ THUMB: vrintn.f16      q0, q1          @ encoding: [0xb6,0xff,0x42,0x04]
+
+  vrintp.f16.f16 d0, d1
+  vrintp.f16.f16 q0, q1
+@ ARM:   vrintp.f16      d0, d1          @ encoding: [0x81,0x07,0xb6,0xf3]
+@ ARM:   vrintp.f16      q0, q1          @ encoding: [0xc2,0x07,0xb6,0xf3]
+@ THUMB: vrintp.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x07]
+@ THUMB: vrintp.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x07]
+
+  vrintx.f16.f16 d0, d1
+  vrintx.f16.f16 q0, q1
+@ ARM:   vrintx.f16      d0, d1          @ encoding: [0x81,0x04,0xb6,0xf3]
+@ ARM:   vrintx.f16      q0, q1          @ encoding: [0xc2,0x04,0xb6,0xf3]
+@ THUMB: vrintx.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x04]
+@ THUMB: vrintx.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x04]
+
+  vrintz.f16.f16 d0, d1
+  vrintz.f16.f16 q0, q1
+@ ARM:   vrintz.f16      d0, d1          @ encoding: [0x81,0x05,0xb6,0xf3]
+@ ARM:   vrintz.f16      q0, q1          @ encoding: [0xc2,0x05,0xb6,0xf3]
+@ THUMB: vrintz.f16      d0, d1          @ encoding: [0xb6,0xff,0x81,0x05]
+@ THUMB: vrintz.f16      q0, q1          @ encoding: [0xb6,0xff,0xc2,0x05]
diff --git a/test/MC/ARM/neon-vcvt-fp16.s b/test/MC/ARM/neon-vcvt-fp16.s
new file mode 100644
index 000000000000..a23be061c0fa
--- /dev/null
+++ b/test/MC/ARM/neon-vcvt-fp16.s
@@ -0,0 +1,18 @@
+@ RUN: llvm-mc -mcpu=cortex-r7 -triple arm -show-encoding < %s 2>&1| \
+@ RUN:    FileCheck %s --check-prefix=CHECK-FP16
+@ RUN: not llvm-mc -mcpu=cortex-r5 -triple arm -show-encoding < %s 2>&1 | \
+@ RUN:    FileCheck %s --check-prefix=CHECK-NOFP16
+
+@ CHECK-FP16: vcvtt.f32.f16	s7, s1         @ encoding: [0xe0,0x3a,0xf2,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+	vcvtt.f32.f16	s7, s1
+@ CHECK-FP16: vcvtt.f16.f32	s1, s7         @ encoding: [0xe3,0x0a,0xf3,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+	vcvtt.f16.f32	s1, s7
+
+@ CHECK-FP16: vcvtb.f32.f16	s7, s1         @ encoding: [0x60,0x3a,0xf2,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+	vcvtb.f32.f16	s7, s1
+@ CHECK-FP16: vcvtb.f16.f32	s1, s7         @ encoding: [0x63,0x0a,0xf3,0xee]
+@ CHECK-NOFP16: instruction requires: half-float conversions
+	vcvtb.f16.f32	s1, s7
diff --git a/test/MC/ARM/thumb-branches.s b/test/MC/ARM/thumb-branches.s
new file mode 100644
index 000000000000..b4cdfa12a556
--- /dev/null
+++ b/test/MC/ARM/thumb-branches.s
@@ -0,0 +1,25 @@
+@ RUN: llvm-mc < %s -triple thumbv5-linux-gnueabi -filetype=obj -o - \
+@ RUN:   | llvm-readobj -r | FileCheck %s
+
+
+        bl      end
+        .space 0x3fffff
+end:
+
+        bl      end2
+        .space 0x3fffff
+        .global end2
+end2:
+
+        bl      end3
+        .space 0x400000
+        .global end3
+end3:
+
+        bl      end4
+        .space 0x400000
+end4:
+
+@ CHECK: 0x400003 R_ARM_THM_CALL end2 0x0
+@ CHECK: 0x800006 R_ARM_THM_CALL end3 0x0
+@ CHECK: 0xC0000A R_ARM_THM_CALL end4 0x0
diff --git a/test/MC/ARM/thumb-shift-encoding.s b/test/MC/ARM/thumb-shift-encoding.s
index 54284132b653..ad35aff45055 100644
--- a/test/MC/ARM/thumb-shift-encoding.s
+++ b/test/MC/ARM/thumb-shift-encoding.s
@@ -6,40 +6,40 @@
 
 	sbc.w r12, lr, r0
 	sbc.w r1, r8, r9, lsr #32
-	sbc.w r2, r7, pc, lsr #16
+	sbc.w r2, r7, r10, lsr #16
 	sbc.w r3, r6, r10, lsl #0
 	sbc.w r4, r5, lr, lsl #16
 	sbc.w r5, r4, r11, asr #32
-	sbc.w r6, r3, sp, asr #16
+	sbc.w r6, r3, r12, asr #16
 	sbc.w r7, r2, r12, rrx
 	sbc.w r8, r1, r0, ror #16
 
 @ CHECK: sbc.w r12, lr, r0          @ encoding: [0x6e,0xeb,0x00,0x0c]
 @ CHECK: sbc.w r1, r8, r9, lsr #32  @ encoding: [0x68,0xeb,0x19,0x01]
-@ CHECK: sbc.w r2, r7, pc, lsr #16  @ encoding: [0x67,0xeb,0x1f,0x42]
+@ CHECK: sbc.w r2, r7, r10, lsr #16 @ encoding: [0x67,0xeb,0x1a,0x42]
 @ CHECK: sbc.w r3, r6, r10          @ encoding: [0x66,0xeb,0x0a,0x03]
 @ CHECK: sbc.w r4, r5, lr, lsl #16  @ encoding: [0x65,0xeb,0x0e,0x44]
 @ CHECK: sbc.w r5, r4, r11, asr #32 @ encoding: [0x64,0xeb,0x2b,0x05]
-@ CHECK: sbc.w r6, r3, sp, asr #16  @ encoding: [0x63,0xeb,0x2d,0x46]
+@ CHECK: sbc.w r6, r3, r12, asr #16 @ encoding: [0x63,0xeb,0x2c,0x46]
 @ CHECK: sbc.w r7, r2, r12, rrx     @ encoding: [0x62,0xeb,0x3c,0x07]
 @ CHECK: sbc.w r8, r1, r0, ror #16  @ encoding: [0x61,0xeb,0x30,0x48]
 
 	and.w r12, lr, r0
 	and.w r1, r8, r9, lsr #32
-	and.w r2, r7, pc, lsr #16
+	and.w r2, r7, r10, lsr #16
 	and.w r3, r6, r10, lsl #0
 	and.w r4, r5, lr, lsl #16
 	and.w r5, r4, r11, asr #32
-	and.w r6, r3, sp, asr #16
+	and.w r6, r3, r12, asr #16
 	and.w r7, r2, r12, rrx
 	and.w r8, r1, r0, ror #16
 
 @ CHECK: and.w r12, lr, r0          @ encoding: [0x0e,0xea,0x00,0x0c]
 @ CHECK: and.w r1, r8, r9, lsr #32  @ encoding: [0x08,0xea,0x19,0x01]
-@ CHECK: and.w r2, r7, pc, lsr #16  @ encoding: [0x07,0xea,0x1f,0x42]
+@ CHECK: and.w r2, r7, r10, lsr #16 @ encoding: [0x07,0xea,0x1a,0x42]
 @ CHECK: and.w r3, r6, r10          @ encoding: [0x06,0xea,0x0a,0x03]
 @ CHECK: and.w r4, r5, lr, lsl #16  @ encoding: [0x05,0xea,0x0e,0x44]
 @ CHECK: and.w r5, r4, r11, asr #32 @ encoding: [0x04,0xea,0x2b,0x05]
-@ CHECK: and.w r6, r3, sp, asr #16  @ encoding: [0x03,0xea,0x2d,0x46]
+@ CHECK: and.w r6, r3, r12, asr #16 @ encoding: [0x03,0xea,0x2c,0x46]
 @ CHECK: and.w r7, r2, r12, rrx     @ encoding: [0x02,0xea,0x3c,0x07]
 @ CHECK: and.w r8, r1, r0, ror #16  @ encoding: [0x01,0xea,0x30,0x48]
diff --git a/test/MC/ARM/thumb1-relax.s b/test/MC/ARM/thumb1-relax.s
new file mode 100644
index 000000000000..ba261aa26356
--- /dev/null
+++ b/test/MC/ARM/thumb1-relax.s
@@ -0,0 +1,35 @@
+@ RUN: not llvm-mc -triple thumbv6m-none-macho -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+@ RUN: not llvm-mc -triple thumbv6m-none-eabi -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+Lhere:
+@ CHECK: out of range pc-relative fixup value
+  ldr r0, Lhere
+
+@ CHECK: out of range pc-relative fixup value
+  b Lfar2
+
+@ CHECK: out of range pc-relative fixup value
+  bne Lfar1
+
+@ CHECK: out of range pc-relative fixup value
+  ldr r0, Lfar2
+
+@ CHECK: misaligned pc-relative fixup value
+  adr r0, Lmisaligned
+
+@ CHECK: misaligned pc-relative fixup value
+  ldr r0, Lmisaligned
+
+  .balign 4
+  .short 0
+Lmisaligned:
+  .word 42
+
+  .space 256
+Lfar1:
+  .word 42
+
+  .space 2050
+Lfar2:
+  .word 42
+
diff --git a/test/MC/ARM/thumb2-diagnostics.s b/test/MC/ARM/thumb2-diagnostics.s
index 8fd161c2cc53..96978899faa2 100644
--- a/test/MC/ARM/thumb2-diagnostics.s
+++ b/test/MC/ARM/thumb2-diagnostics.s
@@ -1,5 +1,8 @@
 @ RUN: not llvm-mc -triple=thumbv7-apple-darwin < %s 2> %t
-@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V7 < %t %s
+
+@ RUN: not llvm-mc -triple=thumbv8-apple-darwin < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS --check-prefix=CHECK-ERRORS-V8 < %t %s
 
 @ Ill-formed IT block instructions.
         itet eq
@@ -41,7 +44,8 @@
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
-@ CHECK-ERRORS: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V7: error: immediate operand must be in the range [0,15]
+@ CHECK-ERRORS-V8: error: invalid operand for instruction
 
         isb  #-1
         isb  #16
@@ -87,7 +91,14 @@ foo2:
 @ CHECK-ERRORS: error: invalid operand for instruction
 @ CHECK-ERRORS: error: invalid operand for instruction
 
-ssat r0, #1, r0, asr #32
-usat r0, #1, r0, asr #32
+        ssat r0, #1, r0, asr #32
+        usat r0, #1, r0, asr #32
 @ CHECK-ERRORS: error: 'asr #32' shift amount not allowed in Thumb mode
 @ CHECK-ERRORS: error: 'asr #32' shift amount not allowed in Thumb mode
+
+        @ PC is not valid as shifted-rGPR
+        sbc.w r2, r7, pc, lsr #16
+        and.w r2, r7, pc, lsr #16
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+
diff --git a/test/MC/ARM/v7k-dsp.s b/test/MC/ARM/v7k-dsp.s
new file mode 100644
index 000000000000..cf5101746389
--- /dev/null
+++ b/test/MC/ARM/v7k-dsp.s
@@ -0,0 +1,4 @@
+@ RUN: llvm-mc -triple thumbv7k-apple-watchos2.0 %s | FileCheck %s
+
+@ CHECK: usad8 r2, r1, r4
+    usad8 r2, r1, r4
diff --git a/test/MC/AsmParser/dot-symbol-non-absolute.s b/test/MC/AsmParser/dot-symbol-non-absolute.s
index 7342365fe1b3..7cc97f9ed2bd 100644
--- a/test/MC/AsmParser/dot-symbol-non-absolute.s
+++ b/test/MC/AsmParser/dot-symbol-non-absolute.s
@@ -4,6 +4,6 @@
 
 	.extern foo
 
-# CHECK: error: expected absolute expression
+# CHECK: : expected absolute expression
 . = foo + 10
 	.byte 1
diff --git a/test/MC/AsmParser/expr-shr.s b/test/MC/AsmParser/expr-shr.s
index fc117b64e95a..792bef050d31 100644
--- a/test/MC/AsmParser/expr-shr.s
+++ b/test/MC/AsmParser/expr-shr.s
@@ -1,13 +1,12 @@
 // RUN: llvm-mc -triple x86_64-unknown-unknown-elf %s | FileCheck %s --check-prefix=CHECK
+// RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s --check-prefix=CHECK
 // RUN: llvm-mc -triple x86_64-pc-windows-msvc %s | FileCheck %s --check-prefix=MSVC
-// RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s --check-prefix=DARWIN
 
 .data
 
 // CHECK: .quad 3
 
-// Both COFF and Darwin still use AShr.
+// MSVC does AShr.
 // MSVC: .quad -1
-// DARWIN: .quad -1
 
 .quad (~0 >> 62)
diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s
index 88b2a0a486bc..d2f29248967c 100644
--- a/test/MC/AsmParser/exprs-invalid.s
+++ b/test/MC/AsmParser/exprs-invalid.s
@@ -12,3 +12,6 @@
 
 // CHECK-ERRORS: error: literal value out of range for directive
 .long 4e71cf69 // double floating point constant due to missing "0x"
+
+// CHECK-ERRORS: error: literal value out of range for directive
+.word 0xfffffffff
diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s
index c5fc9b594a0c..1c3e284bcf7e 100644
--- a/test/MC/AsmParser/exprs.s
+++ b/test/MC/AsmParser/exprs.s
@@ -34,7 +34,7 @@ k:
         check_expr 1 | 2, 3
         check_expr 1 << 1, 2
         check_expr 2 >> 1, 1
-        check_expr (~0 >> 1), -1
+        check_expr (~0 >> 62), 3
         check_expr 3 - 2, 1
         check_expr 1 ^ 3, 2
         check_expr 1 && 2, 1
diff --git a/test/MC/AsmParser/macros-darwin-vararg.s b/test/MC/AsmParser/macros-darwin-vararg.s
index 4aa2f4c1d9b6..1c3ff69180f0 100644
--- a/test/MC/AsmParser/macros-darwin-vararg.s
+++ b/test/MC/AsmParser/macros-darwin-vararg.s
@@ -54,7 +54,7 @@ abc zed0, zed1, zed2
   ifcc4 %eax, %ecx  ## test
   ifcc4 %ecx, %eax ## test
 
-// CHECK-NOT movl
+// CHECK-NOT: movl
 // CHECK: subl $1, %esp
 .set cc,0
   ifcc  movl,    %esp, %ebp
diff --git a/test/MC/AsmParser/reassign.s b/test/MC/AsmParser/reassign.s
new file mode 100644
index 000000000000..817836b2a2f0
--- /dev/null
+++ b/test/MC/AsmParser/reassign.s
@@ -0,0 +1,12 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+	.text
+bar:
+
+	.data
+.globl foo
+.set   foo, bar
+.globl foo
+.set   foo, bar
+
+// CHECK-NOT: invalid reassignment of non-absolute variable
diff --git a/test/MC/AsmParser/undefined-local-symbol.s b/test/MC/AsmParser/undefined-local-symbol.s
new file mode 100644
index 000000000000..280fc5559804
--- /dev/null
+++ b/test/MC/AsmParser/undefined-local-symbol.s
@@ -0,0 +1,8 @@
+# RUN: not llvm-mc -triple i386-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s
+
+# NOTE: apple-darwin portion of the triple is to enforce the convention choice
+#       of what an assembler local symbol looks like (i.e., 'L' prefix.)
+
+# CHECK: error: assembler local symbol 'Lbar' not defined
+foo:
+  jmp Lbar
diff --git a/test/MC/AsmParser/vararg.s b/test/MC/AsmParser/vararg.s
index e3236b072d12..dae81dfb78ce 100644
--- a/test/MC/AsmParser/vararg.s
+++ b/test/MC/AsmParser/vararg.s
@@ -44,7 +44,7 @@
   ifcc4 %eax %ecx  ## test
   ifcc4 %ecx, %eax ## test
 
-// CHECK-NOT movl
+// CHECK-NOT: movl
 // CHECK: subl $1, %esp
 .set cc,0
   ifcc  movl    %esp, %ebp
diff --git a/test/MC/COFF/ARM/directive-type-diagnostics.s b/test/MC/COFF/ARM/directive-type-diagnostics.s
deleted file mode 100644
index f8a52cd43e42..000000000000
--- a/test/MC/COFF/ARM/directive-type-diagnostics.s
+++ /dev/null
@@ -1,10 +0,0 @@
-// RUN: not llvm-mc -triple arm-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple armeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple thumb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-// RUN: not llvm-mc -triple thumbeb-coff -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
-
-        .type symbol 32
-// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
-// CHECK: .type symbol 32
-// CHECK:              ^
-
diff --git a/test/MC/COFF/alias.s b/test/MC/COFF/alias.s
index 2293d43c5750..369bbe8d8444 100644
--- a/test/MC/COFF/alias.s
+++ b/test/MC/COFF/alias.s
@@ -21,9 +21,9 @@ weak_aliased_to_external = external2
         .long weak_aliased_to_external
 
 // CHECK:      Relocations [
-// CHECK:        0x0 IMAGE_REL_I386_DIR32 local1
+// CHECK:        0x0 IMAGE_REL_I386_DIR32 external_aliased_to_local
 // CHECK:        0x4 IMAGE_REL_I386_DIR32 external1
-// CHECK:        0x8 IMAGE_REL_I386_DIR32 local2
+// CHECK:        0x8 IMAGE_REL_I386_DIR32 global_aliased_to_local
 // CHECK:        0xC IMAGE_REL_I386_DIR32 external2
 // CHECK:      ]
 // CHECK:      Symbols [
diff --git a/test/MC/COFF/bad-expr.s b/test/MC/COFF/bad-expr.s
index ecbdd415c3a6..9a212d988cc3 100644
--- a/test/MC/COFF/bad-expr.s
+++ b/test/MC/COFF/bad-expr.s
@@ -1,7 +1,9 @@
 // RUN: not llvm-mc -filetype=obj -triple i386-pc-win32 %s 2>&1 | FileCheck %s
 
+// CHECK: symbol '__ImageBase' can not be undefined in a subtraction expression
 // CHECK: symbol '__ImageBase' can not be undefined in a subtraction expression
 
         .data
 _x:
         .long   _x-__ImageBase
+        .long   __ImageBase-_x
diff --git a/test/MC/COFF/basic-coff-64.s b/test/MC/COFF/basic-coff-64.s
index 62e4eb92f855..1fa9280e0ca6 100644
--- a/test/MC/COFF/basic-coff-64.s
+++ b/test/MC/COFF/basic-coff-64.s
@@ -93,7 +93,7 @@ _main:                                  # @main
 // CHECK:       Length: [[TextSize]]
 // CHECK:       RelocationCount: 2
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0x8E1B6D20
 // CHECK:       Number: [[TextNum]]
 // CHECK:       Selection: 0x0
 // CHECK:     }
@@ -110,7 +110,7 @@ _main:                                  # @main
 // CHECK:       Length: [[DataSize]]
 // CHECK:       RelocationCount: 0
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0x2B95CA92
 // CHECK:       Number: [[DataNum]]
 // CHECK:       Selection: 0x0
 // CHECK:     }
diff --git a/test/MC/COFF/basic-coff.s b/test/MC/COFF/basic-coff.s
index 549825aacea8..6aa247bdd29d 100644
--- a/test/MC/COFF/basic-coff.s
+++ b/test/MC/COFF/basic-coff.s
@@ -93,7 +93,7 @@ L_.str:                                 # @.str
 // CHECK:       Length: 21
 // CHECK:       RelocationCount: 2
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0xC6461CBE
 // CHECK:       Number: 1
 // CHECK:       Selection: 0x0
 // CHECK:     }
@@ -110,7 +110,7 @@ L_.str:                                 # @.str
 // CHECK:       Length: 12
 // CHECK:       RelocationCount: 0
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0x2B95CA92
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
 // CHECK:     }
diff --git a/test/MC/COFF/invalid-def.s b/test/MC/COFF/invalid-def.s
index 42821c22cf71..f5a71f4d7cfd 100644
--- a/test/MC/COFF/invalid-def.s
+++ b/test/MC/COFF/invalid-def.s
@@ -1,5 +1,8 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
 
+# CHECK: error: starting a new symbol definition without completing the previous one
+# CHECK: error: starting a new symbol definition without completing the previous one
 	.def first
 	.def second
+	.def third
 
diff --git a/test/MC/COFF/invalid-endef.s b/test/MC/COFF/invalid-endef.s
index c6fd8f596268..34c7a090d6e9 100644
--- a/test/MC/COFF/invalid-endef.s
+++ b/test/MC/COFF/invalid-endef.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
 
+# CHECK: ending symbol definition without starting one
+# CHECK: ending symbol definition without starting one
+	.endef
 	.endef
 
diff --git a/test/MC/COFF/invalid-scl-range.s b/test/MC/COFF/invalid-scl-range.s
index 57225059821e..326cc1ef9862 100644
--- a/test/MC/COFF/invalid-scl-range.s
+++ b/test/MC/COFF/invalid-scl-range.s
@@ -1,6 +1,9 @@
 # RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
 
 	.def storage_class_range
+# CHECK: storage class value '1337' out of range
+# CHECK: storage class value '9001' out of range
 		.scl 1337
+		.scl 9001
 	.endef
 
diff --git a/test/MC/COFF/invalid-scl.s b/test/MC/COFF/invalid-scl.s
index 8565a5afe0e9..be2f43aee5fa 100644
--- a/test/MC/COFF/invalid-scl.s
+++ b/test/MC/COFF/invalid-scl.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
 
+# CHECK: storage class specified outside of symbol definition
+# CHECK: storage class specified outside of symbol definition
+	.scl 1337
 	.scl 1337
 
diff --git a/test/MC/COFF/invalid-type.s b/test/MC/COFF/invalid-type.s
index a1e131e99e55..69493092d6c1 100644
--- a/test/MC/COFF/invalid-type.s
+++ b/test/MC/COFF/invalid-type.s
@@ -1,4 +1,7 @@
-# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s
+# RUN: not llvm-mc -triple i686-windows -filetype obj -o /dev/null %s 2>&1 | FileCheck %s
 
+# CHECK: symbol type specified outside of a symbol definition
+# CHECK: symbol type specified outside of a symbol definition
 	.type 65536
+	.type 65537
 
diff --git a/test/MC/COFF/label-undefined.s b/test/MC/COFF/label-undefined.s
new file mode 100644
index 000000000000..ad9a194897c3
--- /dev/null
+++ b/test/MC/COFF/label-undefined.s
@@ -0,0 +1,6 @@
+// RUN: not llvm-mc -filetype=obj -triple i386-pc-win32 %s 2>&1 | FileCheck %s
+// CHECK: assembler label 'Lundefined' can not be undefined
+// CHECK: assembler label 'Lundefined2' can not be undefined
+        .text
+        movl Lundefined, %eax
+        movl Lundefined2, %eax
diff --git a/test/MC/COFF/secidx-diagnostic.s b/test/MC/COFF/secidx-diagnostic.s
index 3e496c3fd45c..aacf7f862803 100644
--- a/test/MC/COFF/secidx-diagnostic.s
+++ b/test/MC/COFF/secidx-diagnostic.s
@@ -2,7 +2,9 @@
 // RUN: FileCheck %s < %t
 
 // CHECK: symbol 'bar' can not be undefined
+// CHECK: symbol 'baz' can not be undefined
 
 .data
 foo:
         .secidx bar
+        .secidx baz
diff --git a/test/MC/COFF/simple-fixups.s b/test/MC/COFF/simple-fixups.s
index cb5d7642ee6b..9d9600842729 100644
--- a/test/MC/COFF/simple-fixups.s
+++ b/test/MC/COFF/simple-fixups.s
@@ -2,8 +2,8 @@
 // references to functions.  Failing to do so might cause pointer-to-function
 // equality to fail if /INCREMENTAL links are used.
 
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -incremental-linker-compatible -triple i686-pc-win32 %s | llvm-readobj -s | FileCheck %s
+// RUN: llvm-mc -filetype=obj -incremental-linker-compatible -triple x86_64-pc-win32 %s | llvm-readobj -s | FileCheck %s
 
 	.def	 _foo;
 	.scl	2;
diff --git a/test/MC/COFF/stdin.s b/test/MC/COFF/stdin.s
new file mode 100644
index 000000000000..9a22da5488e6
--- /dev/null
+++ b/test/MC/COFF/stdin.s
@@ -0,0 +1,3 @@
+// REQUIRES: shell
+// RUN: ( echo "test"; llvm-mc -filetype=obj -triple i686-pc-win32 %s ) > %t
+
diff --git a/test/MC/COFF/symbol-fragment-offset-64.s b/test/MC/COFF/symbol-fragment-offset-64.s
index 05b46bbb7e79..03077ce94291 100644
--- a/test/MC/COFF/symbol-fragment-offset-64.s
+++ b/test/MC/COFF/symbol-fragment-offset-64.s
@@ -114,7 +114,7 @@ _main:                                  # @main
 // CHECK:       Length: 48
 // CHECK:       RelocationCount: 6
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0x7BD396E3
 // CHECK:       Number: 1
 // CHECK:       Selection: 0x0
 // CHECK:     }
@@ -131,7 +131,7 @@ _main:                                  # @main
 // CHECK:       Length: 35
 // CHECK:       RelocationCount: 0
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0xB0A4C21
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
 // CHECK:     }
diff --git a/test/MC/COFF/symbol-fragment-offset.s b/test/MC/COFF/symbol-fragment-offset.s
index cc5040a99cc1..c592fa4c0e7b 100644
--- a/test/MC/COFF/symbol-fragment-offset.s
+++ b/test/MC/COFF/symbol-fragment-offset.s
@@ -114,7 +114,7 @@ L_.str2:
 // CHECK:       Length: 45
 // CHECK:       RelocationCount: 6
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0xDED1DC2
 // CHECK:       Number: 1
 // CHECK:       Selection: 0x0
 // CHECK:     }
@@ -131,7 +131,7 @@ L_.str2:
 // CHECK:       Length: 35
 // CHECK:       RelocationCount: 0
 // CHECK:       LineNumberCount: 0
-// CHECK:       Checksum: 0x0
+// CHECK:       Checksum: 0xB0A4C21
 // CHECK:       Number: 2
 // CHECK:       Selection: 0x0
 // CHECK:     }
diff --git a/test/MC/COFF/temporary-alias.s b/test/MC/COFF/temporary-alias.s
new file mode 100644
index 000000000000..be4297024afc
--- /dev/null
+++ b/test/MC/COFF/temporary-alias.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -triple=i686-pc-windows -filetype=obj -o %t %s
+// RUN: llvm-objdump -d -r %t | FileCheck %s
+
+.globl _main
+_main:
+// CHECK: 00 00 00 00
+// CHECK-NEXT: 00000002:  IMAGE_REL_I386_DIR32 .rdata
+movb L_alias1(%eax), %al
+// CHECK: 01 00 00 00
+// CHECK-NEXT: 00000008:  IMAGE_REL_I386_DIR32 .rdata
+movb L_alias2(%eax), %al
+retl
+
+.section .rdata,"dr"
+L_sym1:
+.ascii "\001"
+L_sym2:
+.ascii "\002"
+
+L_alias1 = L_sym1
+L_alias2 = L_sym2
diff --git a/test/MC/COFF/timestamp.s b/test/MC/COFF/timestamp.s
index 18736a2b2d68..a2761575789d 100644
--- a/test/MC/COFF/timestamp.s
+++ b/test/MC/COFF/timestamp.s
@@ -1,4 +1,6 @@
-// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | llvm-readobj -h | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 -incremental-linker-compatible %s -o - | llvm-readobj -h | FileCheck %s
+// REQUIRES: timestamps
 
 // CHECK: ImageFileHeader {
-// CHECK:   TimeDateStamp: {{.*}} (0x0)
+// CHECK:   TimeDateStamp:
+// CHECK-NOT: 1970-01-01 00:00:00 (0x0)
diff --git a/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
index f139700164ca..c536721f3466 100644
--- a/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
+++ b/test/MC/Disassembler/AArch64/arm64-scalar-fp.txt
@@ -1,98 +1,131 @@
 # RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon --disassemble -output-asm-variant=1 < %s | FileCheck %s
+# RUN: llvm-mc -triple arm64-apple-darwin -mattr=neon,v8.2a,fullfp16 --disassemble -output-asm-variant=1 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
 
 #-----------------------------------------------------------------------------
 # Floating-point arithmetic
 #-----------------------------------------------------------------------------
 
+0x41 0xc0 0xe0 0x1e
 0x41 0xc0 0x20 0x1e
 0x41 0xc0 0x60 0x1e
 
+# FP16: fabs h1, h2
 # CHECK: fabs s1, s2
 # CHECK: fabs d1, d2
 
+0x41 0x28 0xe3 0x1e
 0x41 0x28 0x23 0x1e
 0x41 0x28 0x63 0x1e
 
+# FP16:  fadd h1, h2, h3
 # CHECK: fadd s1, s2, s3
 # CHECK: fadd d1, d2, d3
 
+0x41 0x18 0xe3 0x1e
 0x41 0x18 0x23 0x1e
 0x41 0x18 0x63 0x1e
 
+# FP16:  fdiv h1, h2, h3
 # CHECK: fdiv s1, s2, s3
 # CHECK: fdiv d1, d2, d3
 
+0x41 0x10 0xc3 0x1f
 0x41 0x10 0x03 0x1f
 0x41 0x10 0x43 0x1f
 
+# FP16:  fmadd h1, h2, h3, h4
 # CHECK: fmadd s1, s2, s3, s4
 # CHECK: fmadd d1, d2, d3, d4
 
+0x41 0x48 0xe3 0x1e
 0x41 0x48 0x23 0x1e
 0x41 0x48 0x63 0x1e
+0x41 0x68 0xe3 0x1e
 0x41 0x68 0x23 0x1e
 0x41 0x68 0x63 0x1e
 
+# FP16:  fmax   h1, h2, h3
 # CHECK: fmax   s1, s2, s3
 # CHECK: fmax   d1, d2, d3
+# FP16:  fmaxnm h1, h2, h3
 # CHECK: fmaxnm s1, s2, s3
 # CHECK: fmaxnm d1, d2, d3
 
+0x41 0x58 0xe3 0x1e
 0x41 0x58 0x23 0x1e
 0x41 0x58 0x63 0x1e
+0x41 0x78 0xe3 0x1e
 0x41 0x78 0x23 0x1e
 0x41 0x78 0x63 0x1e
 
+# FP16:  fmin   h1, h2, h3
 # CHECK: fmin   s1, s2, s3
 # CHECK: fmin   d1, d2, d3
+# FP16:  fminnm h1, h2, h3
 # CHECK: fminnm s1, s2, s3
 # CHECK: fminnm d1, d2, d3
 
+0x41 0x90 0xc3 0x1f
 0x41 0x90 0x03 0x1f
 0x41 0x90 0x43 0x1f
 
+# FP16:  fmsub h1, h2, h3, h4
 # CHECK: fmsub s1, s2, s3, s4
 # CHECK: fmsub d1, d2, d3, d4
 
+0x41 0x08 0xe3 0x1e
 0x41 0x08 0x23 0x1e
 0x41 0x08 0x63 0x1e
 
+# FP16:  fmul h1, h2, h3
 # CHECK: fmul s1, s2, s3
 # CHECK: fmul d1, d2, d3
 
+0x41 0x40 0xe1 0x1e
 0x41 0x40 0x21 0x1e
 0x41 0x40 0x61 0x1e
 
+# FP16:  fneg h1, h2
 # CHECK: fneg s1, s2
 # CHECK: fneg d1, d2
 
+0x41 0x10 0xe3 0x1f
 0x41 0x10 0x23 0x1f
 0x41 0x10 0x63 0x1f
 
+# FP16:  fnmadd h1, h2, h3, h4
 # CHECK: fnmadd s1, s2, s3, s4
 # CHECK: fnmadd d1, d2, d3, d4
 
+0x41 0x90 0xe3 0x1f
 0x41 0x90 0x23 0x1f
 0x41 0x90 0x63 0x1f
 
+# FP16:  fnmsub h1, h2, h3, h4
 # CHECK: fnmsub s1, s2, s3, s4
 # CHECK: fnmsub d1, d2, d3, d4
 
+0x41 0x88 0xe3 0x1e
 0x41 0x88 0x23 0x1e
 0x41 0x88 0x63 0x1e
 
+# FP16:  fnmul h1, h2, h3
 # CHECK: fnmul s1, s2, s3
 # CHECK: fnmul d1, d2, d3
 
+0x41 0xc0 0xe1 0x1e
 0x41 0xc0 0x21 0x1e
 0x41 0xc0 0x61 0x1e
 
+# FP16:  fsqrt h1, h2
 # CHECK: fsqrt s1, s2
 # CHECK: fsqrt d1, d2
 
+0x41 0x38 0xe3 0x1e
 0x41 0x38 0x23 0x1e
 0x41 0x38 0x63 0x1e
 
+# FP16:  fsub h1, h2, h3
 # CHECK: fsub s1, s2, s3
 # CHECK: fsub d1, d2, d3
 
@@ -100,31 +133,43 @@
 # Floating-point comparison
 #-----------------------------------------------------------------------------
 
+0x20 0x04 0xe2 0x1e
 0x20 0x04 0x22 0x1e
 0x20 0x04 0x62 0x1e
+0x30 0x04 0xe2 0x1e
 0x30 0x04 0x22 0x1e
 0x30 0x04 0x62 0x1e
 
+# FP16:  fccmp  h1, h2, #0, eq
 # CHECK: fccmp  s1, s2, #0, eq
 # CHECK: fccmp  d1, d2, #0, eq
+# FP16:  fccmpe h1, h2, #0, eq
 # CHECK: fccmpe s1, s2, #0, eq
 # CHECK: fccmpe d1, d2, #0, eq
 
+0x20 0x20 0xe2 0x1e
 0x20 0x20 0x22 0x1e
 0x20 0x20 0x62 0x1e
+0x28 0x20 0xe0 0x1e
 0x28 0x20 0x20 0x1e
 0x28 0x20 0x60 0x1e
+0x30 0x20 0xe2 0x1e
 0x30 0x20 0x22 0x1e
 0x30 0x20 0x62 0x1e
+0x38 0x20 0xe0 0x1e
 0x38 0x20 0x20 0x1e
 0x38 0x20 0x60 0x1e
 
+# FP16:  fcmp  h1, h2
 # CHECK: fcmp  s1, s2
 # CHECK: fcmp  d1, d2
+# FP16:  fcmp  h1, #0.0
 # CHECK: fcmp  s1, #0.0
 # CHECK: fcmp  d1, #0.0
+# FP16:  fcmpe h1, h2
 # CHECK: fcmpe s1, s2
 # CHECK: fcmpe d1, d2
+# FP16:  fcmpe h1, #0.0
 # CHECK: fcmpe s1, #0.0
 # CHECK: fcmpe d1, #0.0
 
@@ -132,9 +177,11 @@
 # Floating-point conditional select
 #-----------------------------------------------------------------------------
 
+0x41 0x0c 0xe3 0x1e
 0x41 0x0c 0x23 0x1e
 0x41 0x0c 0x63 0x1e
 
+# FP16:  fcsel h1, h2, h3, eq
 # CHECK: fcsel s1, s2, s3, eq
 # CHECK: fcsel d1, d2, d3, eq
 
@@ -169,29 +216,37 @@
 # Floating-point move
 #-----------------------------------------------------------------------------
 
+0x41 0x00 0xe7 0x1e
+0x41 0x00 0xe6 0x1e
 0x41 0x00 0x27 0x1e
 0x41 0x00 0x26 0x1e
 0x41 0x00 0x67 0x9e
 0x41 0x00 0x66 0x9e
 
+# FP16:  fmov h1, w2
+# FP16:  fmov w1, h2
 # CHECK: fmov s1, w2
 # CHECK: fmov w1, s2
 # CHECK: fmov d1, x2
 # CHECK: fmov x1, d2
 
+0x01 0x10 0xe8 0x1e
 0x01 0x10 0x28 0x1e
 0x01 0x10 0x68 0x1e
 0x01 0xf0 0x7b 0x1e
 0x01 0xf0 0x6b 0x1e
 
+# FP16:  fmov h1, #0.12500000
 # CHECK: fmov s1, #0.12500000
 # CHECK: fmov d1, #0.12500000
 # CHECK: fmov d1, #-0.48437500
 # CHECK: fmov d1, #0.48437500
 
+0x41 0x40 0xe0 0x1e
 0x41 0x40 0x20 0x1e
 0x41 0x40 0x60 0x1e
 
+# FP16:  fmov h1, h2
 # CHECK: fmov s1, s2
 # CHECK: fmov d1, d2
 
@@ -199,45 +254,59 @@
 # Floating-point round to integral
 #-----------------------------------------------------------------------------
 
+0x41 0x40 0xe6 0x1e
 0x41 0x40 0x26 0x1e
 0x41 0x40 0x66 0x1e
 
+# FP16:  frinta h1, h2
 # CHECK: frinta s1, s2
 # CHECK: frinta d1, d2
 
+0x41 0xc0 0xe7 0x1e
 0x41 0xc0 0x27 0x1e
 0x41 0xc0 0x67 0x1e
 
+# FP16:  frinti h1, h2
 # CHECK: frinti s1, s2
 # CHECK: frinti d1, d2
 
+0x41 0x40 0xe5 0x1e
 0x41 0x40 0x25 0x1e
 0x41 0x40 0x65 0x1e
 
+# FP16:  frintm h1, h2
 # CHECK: frintm s1, s2
 # CHECK: frintm d1, d2
 
+0x41 0x40 0xe4 0x1e
 0x41 0x40 0x24 0x1e
 0x41 0x40 0x64 0x1e
 
+# FP16:  frintn h1, h2
 # CHECK: frintn s1, s2
 # CHECK: frintn d1, d2
 
+0x41 0xc0 0xe4 0x1e
 0x41 0xc0 0x24 0x1e
 0x41 0xc0 0x64 0x1e
 
+# FP16:  frintp h1, h2
 # CHECK: frintp s1, s2
 # CHECK: frintp d1, d2
 
+0x41 0x40 0xe7 0x1e
 0x41 0x40 0x27 0x1e
 0x41 0x40 0x67 0x1e
 
+# FP16:  frintx h1, h2
 # CHECK: frintx s1, s2
 # CHECK: frintx d1, d2
 
+0x41 0xc0 0xe5 0x1e
 0x41 0xc0 0x25 0x1e
 0x41 0xc0 0x65 0x1e
 
+# FP16:  frintz h1, h2
 # CHECK: frintz s1, s2
 # CHECK: frintz d1, d2
 
diff --git a/test/MC/Disassembler/AArch64/armv8.1a-pan.txt b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
index 2af5c2aa21ef..22dc5fd58948 100644
--- a/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
+++ b/test/MC/Disassembler/AArch64/armv8.1a-pan.txt
@@ -2,9 +2,11 @@
 
 0x9f,0x40,0x00,0xd5
 0x9f,0x41,0x00,0xd5
+0x9f,0x42,0x00,0xd5
 0x65,0x42,0x18,0xd5
 0x6d,0x42,0x38,0xd5
 # CHECK:  msr PAN, #0
 # CHECK:  msr PAN, #1
+# CHECK-NOT: msr PAN, #2
 # CHECK:  msr PAN, x5
 # CHECK:  mrs x13, PAN
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-at.txt b/test/MC/Disassembler/AArch64/armv8.2a-at.txt
new file mode 100644
index 000000000000..81841f2c1302
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-at.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=-v8.2a --disassemble < %s | FileCheck %s --check-prefix=NO_V82
+
+[0x01,0x79,0x08,0xd5]
+[0x22,0x79,0x08,0xd5]
+# CHECK: at s1e1rp, x1
+# CHECK: at s1e1wp, x2
+# NO_V82: sys     #0, c7, c9, #0, x1
+# NO_V82: sys     #0, c7, c9, #1, x2
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt b/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt
new file mode 100644
index 000000000000..071412672b16
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-mmfr2.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+
+[0x43,0x07,0x38,0xd5]
+# CHECK: mrs x3, ID_AA64MMFR2_EL1
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt b/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt
new file mode 100644
index 000000000000..58f1f81d83e2
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-persistent-memory.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s | FileCheck --check-prefix=NO_V82 %s
+
+[0x27,0x7c,0x0b,0xd5]
+# CHECK: dc cvap, x7
+# NO_V82: sys #3, c7, c12, #1, x7
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt
new file mode 100644
index 000000000000..217424cc46e0
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-statistical-profiling.txt
@@ -0,0 +1,87 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+spe --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s | FileCheck --check-prefix=NO_SPE %s
+
+[0x3f,0x22,0x03,0xd5]
+# CHECK: psb csync
+# NO_SPE: hint #0x11
+
+[0x00,0x9a,0x18,0xd5]
+[0x20,0x9a,0x18,0xd5]
+[0x60,0x9a,0x18,0xd5]
+[0xe0,0x9a,0x18,0xd5]
+[0x00,0x99,0x1c,0xd5]
+[0x00,0x99,0x1d,0xd5]
+[0x00,0x99,0x18,0xd5]
+[0x40,0x99,0x18,0xd5]
+[0x60,0x99,0x18,0xd5]
+[0x80,0x99,0x18,0xd5]
+[0xa0,0x99,0x18,0xd5]
+[0xc0,0x99,0x18,0xd5]
+[0xe0,0x99,0x18,0xd5]
+# CHECK: msr PMBLIMITR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_0, x0
+# CHECK: msr PMBPTR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_1, x0
+# CHECK: msr PMBSR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_3, x0
+# CHECK: msr PMBIDR_EL1, x0
+# NO_SPE: msr S3_0_C9_C10_7, x0
+# CHECK: msr PMSCR_EL2, x0
+# NO_SPE: msr S3_4_C9_C9_0, x0
+# CHECK: msr PMSCR_EL12, x0
+# NO_SPE: msr S3_5_C9_C9_0, x0
+# CHECK: msr PMSCR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_0, x0
+# CHECK: msr PMSICR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_2, x0
+# CHECK: msr PMSIRR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_3, x0
+# CHECK: msr PMSFCR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_4, x0
+# CHECK: msr PMSEVFR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_5, x0
+# CHECK: msr PMSLATFR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_6, x0
+# CHECK: msr PMSIDR_EL1, x0
+# NO_SPE: msr S3_0_C9_C9_7, x0
+
+[0x00,0x9a,0x38,0xd5]
+[0x20,0x9a,0x38,0xd5]
+[0x60,0x9a,0x38,0xd5]
+[0xe0,0x9a,0x38,0xd5]
+[0x00,0x99,0x3c,0xd5]
+[0x00,0x99,0x3d,0xd5]
+[0x00,0x99,0x38,0xd5]
+[0x40,0x99,0x38,0xd5]
+[0x60,0x99,0x38,0xd5]
+[0x80,0x99,0x38,0xd5]
+[0xa0,0x99,0x38,0xd5]
+[0xc0,0x99,0x38,0xd5]
+[0xe0,0x99,0x38,0xd5]
+
+# CHECK: mrs x0, PMBLIMITR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_0
+# CHECK: mrs x0, PMBPTR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_1
+# CHECK: mrs x0, PMBSR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_3
+# CHECK: mrs x0, PMBIDR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C10_7
+# CHECK: mrs x0, PMSCR_EL2
+# NO_SPE: mrs x0, S3_4_C9_C9_0
+# CHECK: mrs x0, PMSCR_EL12
+# NO_SPE: mrs x0, S3_5_C9_C9_0
+# CHECK: mrs x0, PMSCR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_0
+# CHECK: mrs x0, PMSICR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_2
+# CHECK: mrs x0, PMSIRR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_3
+# CHECK: mrs x0, PMSFCR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_4
+# CHECK: mrs x0, PMSEVFR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_5
+# CHECK: mrs x0, PMSLATFR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_6
+# CHECK: mrs x0, PMSIDR_EL1
+# NO_SPE: mrs x0, S3_0_C9_C9_7
diff --git a/test/MC/Disassembler/AArch64/armv8.2a-uao.txt b/test/MC/Disassembler/AArch64/armv8.2a-uao.txt
new file mode 100644
index 000000000000..b8300f4c7264
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/armv8.2a-uao.txt
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.2a --disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64-none-linux-gnu --disassemble < %s 2>&1 | FileCheck --check-prefix=NO_V82A %s
+
+[0x7f,0x40,0x00,0xd5]
+[0x7f,0x41,0x00,0xd5]
+[0x7f,0x42,0x00,0xd5]
+# CHECK: msr UAO, #0
+# CHECK: msr UAO, #1
+# CHECK: msr S0_0_C4_C2_3, xzr
+# NO_V82A: msr S0_0_C4_C0_3, xzr
+# NO_V82A: msr S0_0_C4_C1_3, xzr
+# NO_V82A: msr S0_0_C4_C2_3, xzr
+
+[0x81,0x42,0x18,0xd5]
+[0x82,0x42,0x38,0xd5]
+# CHECK: msr UAO, x1
+# CHECK: mrs x2, UAO
+# NO_V82A: msr S3_0_C4_C2_4, x1
+# NO_V82A: mrs x2, S3_0_C4_C2_4
diff --git a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
index 615d9ba19ca8..185f0c1124a6 100644
--- a/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
+++ b/test/MC/Disassembler/AArch64/basic-a64-instructions.txt
@@ -1,5 +1,6 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8 -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=arm64 -mattr=+fp-armv8,+fullfp16 -disassemble < %s | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
 
 #------------------------------------------------------------------------------
 # Add/sub (immediate)
@@ -1516,6 +1517,20 @@
 # Floating-point <-> fixed-point conversion
 #------------------------------------------------------------------------------
 
+# FP16: fcvtzs  w3, h5, #1
+# FP16: fcvtzs  wzr, h20, #13
+# FP16: fcvtzs  w19, h0, #32
+0xa3 0xfc 0xd8 0x1e
+0x9f 0xce 0xd8 0x1e
+0x13 0x80 0xd8 0x1e
+
+# FP16: fcvtzs  x3, h5, #1
+# FP16: fcvtzs  x12, h30, #45
+# FP16: fcvtzs  x19, h0, #64
+0xa3 0xfc 0xd8 0x9e
+0xcc 0x4f 0xd8 0x9e
+0x13 0x00 0xd8 0x9e
+
 # CHECK: fcvtzs  w3, s5, #1
 # CHECK: fcvtzs  wzr, s20, #13
 # CHECK: fcvtzs  w19, s0, #32
@@ -1544,6 +1559,20 @@
 0xcc 0x4f 0x58 0x9e
 0x13 0x00 0x58 0x9e
 
+# FP16: fcvtzu  w3, h5, #1
+# FP16: fcvtzu  wzr, h20, #13
+# FP16: fcvtzu  w19, h0, #32
+0xa3 0xfc 0xd9 0x1e
+0x9f 0xce 0xd9 0x1e
+0x13 0x80 0xd9 0x1e
+
+# FP16: fcvtzu  x3, h5, #1
+# FP16: fcvtzu  x12, h30, #45
+# FP16: fcvtzu  x19, h0, #64
+0xa3 0xfc 0xd9 0x9e
+0xcc 0x4f 0xd9 0x9e
+0x13 0x00 0xd9 0x9e
+
 # CHECK: fcvtzu  w3, s5, #1
 # CHECK: fcvtzu  wzr, s20, #13
 # CHECK: fcvtzu  w19, s0, #32
@@ -1572,6 +1601,20 @@
 0xcc 0x4f 0x59 0x9e
 0x13 0x00 0x59 0x9e
 
+# FP16: scvtf   h23, w19, #1
+# FP16: scvtf   h31, wzr, #20
+# FP16: scvtf   h14, w0, #32
+0x77 0xfe 0xc2 0x1e
+0xff 0xb3 0xc2 0x1e
+0x0e 0x80 0xc2 0x1e
+
+# FP16: scvtf   h23, x19, #1
+# FP16: scvtf   h31, xzr, #20
+# FP16: scvtf   h14, x0, #64
+0x77 0xfe 0xc2 0x9e
+0xff 0xb3 0xc2 0x9e
+0x0e 0x00 0xc2 0x9e
+
 # CHECK: scvtf   s23, w19, #1
 # CHECK: scvtf   s31, wzr, #20
 # CHECK: scvtf   s14, w0, #32
@@ -1600,6 +1643,20 @@
 0xff 0xb3 0x42 0x9e
 0x0e 0x00 0x42 0x9e
 
+# FP16: ucvtf   h23, w19, #1
+# FP16: ucvtf   h31, wzr, #20
+# FP16: ucvtf   h14, w0, #32
+0x77 0xfe 0xc3 0x1e
+0xff 0xb3 0xc3 0x1e
+0x0e 0x80 0xc3 0x1e
+
+# FP16: ucvtf   h23, x19, #1
+# FP16: ucvtf   h31, xzr, #20
+# FP16: ucvtf   h14, x0, #64
+0x77 0xfe 0xc3 0x9e
+0xff 0xb3 0xc3 0x9e
+0x0e 0x00 0xc3 0x9e
+
 # CHECK: ucvtf   s23, w19, #1
 # CHECK: ucvtf   s31, wzr, #20
 # CHECK: ucvtf   s14, w0, #32
@@ -1631,6 +1688,61 @@
 #------------------------------------------------------------------------------
 # Floating-point <-> integer conversion
 #------------------------------------------------------------------------------
+
+# FP16: fcvtns   w3, h31
+# FP16: fcvtns   xzr, h12
+# FP16: fcvtnu   wzr, h12
+# FP16: fcvtnu   x0, h0
+0xe3 0x3 0xe0 0x1e
+0x9f 0x1 0xe0 0x9e
+0x9f 0x1 0xe1 0x1e
+0x0 0x0 0xe1 0x9e
+
+# FP16: fcvtps   wzr, h9
+# FP16: fcvtps   x12, h20
+# FP16: fcvtpu   w30, h23
+# FP16: fcvtpu   x29, h3
+0x3f 0x1 0xe8 0x1e
+0x8c 0x2 0xe8 0x9e
+0xfe 0x2 0xe9 0x1e
+0x7d 0x0 0xe9 0x9e
+
+# FP16: fcvtms   w2, h3
+# FP16: fcvtms   x4, h5
+# FP16: fcvtmu   w6, h7
+# FP16: fcvtmu   x8, h9
+0x62 0x0 0xf0 0x1e
+0xa4 0x0 0xf0 0x9e
+0xe6 0x0 0xf1 0x1e
+0x28 0x1 0xf1 0x9e
+
+# FP16: fcvtzs   w10, h11
+# FP16: fcvtzs   x12, h13
+# FP16: fcvtzu   w14, h15
+# FP16: fcvtzu   x15, h16
+0x6a 0x1 0xf8 0x1e
+0xac 0x1 0xf8 0x9e
+0xee 0x1 0xf9 0x1e
+0xf 0x2 0xf9 0x9e
+
+# FP16: scvtf    h17, w18
+# FP16: scvtf    h19, x20
+# FP16: ucvtf    h21, w22
+# FP16: scvtf    h23, x24
+0x51 0x2 0xe2 0x1e
+0x93 0x2 0xe2 0x9e
+0xd5 0x2 0xe3 0x1e
+0x17 0x3 0xe2 0x9e
+
+# FP16: fcvtas   w25, h26
+# FP16: fcvtas   x27, h28
+# FP16: fcvtau   w29, h30
+# FP16: fcvtau   xzr, h0
+0x59 0x3 0xe4 0x1e
+0x9b 0x3 0xe4 0x9e
+0xdd 0x3 0xe5 0x1e
+0x1f 0x0 0xe5 0x9e
+
 # CHECK: fcvtns   w3, s31
 # CHECK: fcvtns   xzr, s12
 # CHECK: fcvtnu   wzr, s12
@@ -4172,12 +4284,16 @@
 
 # CHECK: mrs     x12, {{s3_7_c15_c1_5|S3_7_C15_C1_5}}
 # CHECK: mrs     x13, {{s3_2_c11_c15_7|S3_2_C11_C15_7}}
+# CHECK: mrs     xzr, {{s0_0_c4_c0_0|S0_0_C4_C0_0}}
 # CHECK: msr     {{s3_0_c15_c0_0|S3_0_C15_C0_0}}, x12
 # CHECK: msr     {{s3_7_c11_c13_7|S3_7_C11_C13_7}}, x5
+# CHECK: msr     {{s0_0_c4_c0_0|S0_0_C4_C0_0}}, xzr
 0xac 0xf1 0x3f 0xd5
 0xed 0xbf 0x3a 0xd5
+0x1f 0x40 0x20 0xd5
 0x0c 0xf0 0x18 0xd5
 0xe5 0xbd 0x1f 0xd5
+0x1f 0x40 0x00 0xd5
 
 #------------------------------------------------------------------------------
 # Test and branch (immediate)
diff --git a/test/MC/Disassembler/AArch64/fullfp16-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neg.txt
new file mode 100644
index 000000000000..4feb20c10939
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/fullfp16-neg.txt
@@ -0,0 +1,145 @@
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s
+
+[0x41,0xc0,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x28,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x18,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x10,0xc3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x48,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x68,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x58,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x78,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x90,0xc3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x08,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x10,0xe3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x90,0xe3,0x1f]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x88,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x38,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x30,0x04,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x20,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x28,0x20,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x30,0x20,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x38,0x20,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x0c,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe4,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe5,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf0,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf1,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe0,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe1,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe1,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd8,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd9,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xf9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xd9,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc2,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe2,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc2,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc3,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe3,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xfc,0xc3,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe6,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe7,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x00,0xe6,0x9e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0x10,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0x10,0xe8,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0xe2,0x03,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe0,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe6,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe4,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0x40,0xe7,0x1e]
+# CHECK: warning: invalid instruction encoding
+[0x41,0xc0,0xe5,0x1e]
+# CHECK: warning: invalid instruction encoding
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
new file mode 100644
index 000000000000..8b7e1c878002
--- /dev/null
+++ b/test/MC/Disassembler/AArch64/fullfp16-neon-neg.txt
@@ -0,0 +1,382 @@
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=+neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,-fullfp16 < %s 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble -triple=aarch64 -mattr=-neon,+fullfp16 < %s 2>&1 | FileCheck %s
+
+[0x00,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x88,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0x98,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xd8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x00,0xf8,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x10,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x11,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x50,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x51,0x12,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x0f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x4f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x2f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x90,0x22,0x6f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xc8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0x30,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0xf8,0xb0,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x14,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0x50,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0x4f,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0x4c,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x27,0x5c,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x27,0xd0,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x24,0xcf,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xdb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xd8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0xc9,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xcb,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0xcb,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0xc8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x83,0xda,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x01,0xd9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x50,0xe8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x8f,0xe8,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x2f,0x50,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x2c,0x4f,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x2d,0xcc,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x2f,0xdc,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xe0,0x3f,0xd0,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xe4,0x3c,0xcf,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x03,0x3d,0x4c,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xbf,0x3f,0x5c,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x35,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x34,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xff,0x05,0x50,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0xa3,0x04,0xc6,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x34,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x35,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x04,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0xea,0x05,0xd6,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0x42,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0c,0xc2,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x1d,0x17,0xd4,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x18,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x62,0x58,0x14,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x20,0x98,0x11,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x46,0x98,0x18,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x5f]
+# CHECK: warning: invalid instruction encoding
+[0x95,0xfd,0x1f,0x7f]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xc9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xb9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0x79,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0x79,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xa9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xa9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xac,0xb9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x25,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xc9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xd9,0xf8,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0xe9,0xf8,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0x4c,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x6a,0x2d,0xcc,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0xd4,0x1e,0x4f,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x15,0x3e,0x4d,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb5,0x3c,0xcc,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xd3,0xd9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x52,0xf9,0xf9,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xd9,0xf9,0x7e]
+# CHECK: warning: invalid instruction encoding
+[0x72,0xd8,0x30,0x5e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf8,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf8,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x88,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x89,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0x98,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0x99,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xa8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xa9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xb8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xb9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xc8,0x79,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xc9,0x79,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x0e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x4e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xd8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xd9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+[0x04,0xf8,0xf9,0x2e]
+# CHECK: warning: invalid instruction encoding
+[0x06,0xf9,0xf9,0x6e]
+# CHECK: warning: invalid instruction encoding
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
new file mode 100644
index 000000000000..9c8be171d287
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-arm-neg.txt
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=-fullfp16,+neon -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt b/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
new file mode 100644
index 000000000000..232bd641f059
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-arm.txt
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple armv8a-none-eabi -mattr=+fullfp16,+neon -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x02,0x0d,0x11,0xf2]
+[0x44,0x0d,0x12,0xf2]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf2]
+[0x44,0x0d,0x32,0xf2]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf3]
+[0x54,0x0d,0x12,0xf3]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x63,0x19,0x92,0xf2]
+[0x6e,0x89,0x9a,0xf3]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x12,0x0d,0x11,0xf2]
+[0x54,0x0d,0x12,0xf2]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x67,0x51,0x96,0xf2]
+[0x6f,0xa1,0x9c,0xf3]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x12,0x0d,0x31,0xf2]
+[0x54,0x0d,0x32,0xf2]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x67,0x55,0x96,0xf2]
+[0x6f,0xa5,0x9c,0xf3]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x12,0x0c,0x11,0xf2]
+[0x54,0x0c,0x12,0xf2]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x12,0x0c,0x31,0xf2]
+[0x54,0x0c,0x32,0xf2]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf2]
+[0x48,0x4e,0x16,0xf2]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0x03,0x25,0xb5,0xf3]
+[0x46,0x45,0xb5,0xf3]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x04,0x2e,0x13,0xf3]
+[0x48,0x4e,0x16,0xf3]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0x83,0x24,0xb5,0xf3]
+[0xc6,0x44,0xb5,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x04,0x2e,0x33,0xf3]
+[0x48,0x4e,0x36,0xf3]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0x03,0x24,0xb5,0xf3]
+[0x46,0x44,0xb5,0xf3]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0x83,0x25,0xb5,0xf3]
+[0xc6,0x45,0xb5,0xf3]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0x03,0x26,0xb5,0xf3]
+[0x46,0x46,0xb5,0xf3]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x12,0x0e,0x11,0xf3]
+[0x54,0x0e,0x12,0xf3]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x12,0x0e,0x31,0xf3]
+[0x54,0x0e,0x32,0xf3]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x02,0x0d,0x31,0xf3]
+[0x44,0x0d,0x32,0xf3]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0x01,0x07,0xb5,0xf3]
+[0x42,0x07,0xb5,0xf3]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x02,0x0f,0x11,0xf2]
+[0x44,0x0f,0x12,0xf2]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x02,0x0f,0x31,0xf2]
+[0x44,0x0f,0x32,0xf2]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf3]
+[0x54,0x0f,0x12,0xf3]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf3]
+[0x54,0x0f,0x32,0xf3]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x02,0x0d,0x11,0xf3]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x02,0x0f,0x11,0xf3]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x02,0x0f,0x31,0xf3]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0x01,0x05,0xb7,0xf3]
+[0x42,0x05,0xb7,0xf3]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x12,0x0f,0x11,0xf2]
+[0x54,0x0f,0x12,0xf2]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0x81,0x05,0xb7,0xf3]
+[0xc2,0x05,0xb7,0xf3]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x12,0x0f,0x31,0xf2]
+[0x54,0x0f,0x32,0xf2]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0x81,0x07,0xb5,0xf3]
+[0xc2,0x07,0xb5,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0x01,0x07,0xb7,0xf3]
+[0x81,0x07,0xb7,0xf3]
+[0x01,0x06,0xb7,0xf3]
+[0x81,0x06,0xb7,0xf3]
+[0x42,0x07,0xb7,0xf3]
+[0xc2,0x07,0xb7,0xf3]
+[0x42,0x06,0xb7,0xf3]
+[0xc2,0x06,0xb7,0xf3]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0x01,0x00,0xb7,0xf3]
+[0x42,0x00,0xb7,0xf3]
+[0x81,0x00,0xb7,0xf3]
+[0xc2,0x00,0xb7,0xf3]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0x01,0x03,0xb7,0xf3]
+[0x42,0x03,0xb7,0xf3]
+[0x81,0x03,0xb7,0xf3]
+[0xc2,0x03,0xb7,0xf3]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0x01,0x01,0xb7,0xf3]
+[0x42,0x01,0xb7,0xf3]
+[0x81,0x01,0xb7,0xf3]
+[0xc2,0x01,0xb7,0xf3]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0x01,0x02,0xb7,0xf3]
+[0x42,0x02,0xb7,0xf3]
+[0x81,0x02,0xb7,0xf3]
+[0xc2,0x02,0xb7,0xf3]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0x11,0x0d,0xbf,0xf2]
+[0x11,0x0d,0xbe,0xf3]
+[0x11,0x0c,0xbd,0xf2]
+[0x11,0x0c,0xbc,0xf3]
+[0x52,0x0d,0xbb,0xf2]
+[0x52,0x0d,0xba,0xf3]
+[0x52,0x0c,0xb9,0xf2]
+[0x52,0x0c,0xb8,0xf3]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0x01,0x05,0xb6,0xf3]
+[0x42,0x05,0xb6,0xf3]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0x81,0x06,0xb6,0xf3]
+[0xc2,0x06,0xb6,0xf3]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0x01,0x04,0xb6,0xf3]
+[0x42,0x04,0xb6,0xf3]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0x81,0x07,0xb6,0xf3]
+[0xc2,0x07,0xb6,0xf3]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0x81,0x04,0xb6,0xf3]
+[0xc2,0x04,0xb6,0xf3]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0x81,0x05,0xb6,0xf3]
+[0xc2,0x05,0xb6,0xf3]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+#  4 -- Q
+#  2 -- VMOV op
+#        1 -- VCVT op
+#        2 -- VCVT FP size
+[0x10,0x0c,0xc7,0xf2]
+[0x10,0x0d,0xc7,0xf2]
+[0x10,0x0e,0xc7,0xf2]
+[0x10,0x0f,0xc7,0xf2]
+[0x20,0x0c,0xc7,0xf2]
+[0x20,0x0d,0xc7,0xf2]
+[0x20,0x0e,0xc7,0xf2]
+[0x20,0x0f,0xc7,0xf2]
+[0x50,0x0c,0xc7,0xf2]
+[0x50,0x0d,0xc7,0xf2]
+[0x50,0x0e,0xc7,0xf2]
+[0x50,0x0f,0xc7,0xf2]
+[0x70,0x0c,0xc7,0xf2]
+[0x70,0x0d,0xc7,0xf2]
+[0x70,0x0e,0xc7,0xf2]
+[0x70,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        d16, #0x70ff
+# CHECK: vmov.i32        d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32        d16, #1.000000e+00
+# CHECK: vmull.s8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        q8, #0x70ff
+# CHECK: vmov.i32        q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32        q8, #1.000000e+00
+# CHECK: vmvn.i32        q8, #0x70ff
+# CHECK: vmvn.i32        q8, #0x70ffff
+# CHECK: vmov.i64        q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
new file mode 100644
index 000000000000..f7561bb4096e
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-thumb-neg.txt
@@ -0,0 +1,274 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=-fullfp16,+neon,+thumb-mode -show-encoding < %s 2>&1 | FileCheck %s
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: warning: invalid instruction encoding
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+# CHECK: warning: invalid instruction encoding
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x70,0x0f]
+
+# CHECK-NOT: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt b/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
new file mode 100644
index 000000000000..a776232b7147
--- /dev/null
+++ b/test/MC/Disassembler/ARM/fullfp16-neon-thumb.txt
@@ -0,0 +1,309 @@
+# RUN: not llvm-mc -disassemble -triple thumbv8a-none-eabi -mattr=+fullfp16,+neon,+thumb-mode -show-encoding < %s 2>%t | FileCheck %s
+# RUN FileCheck %s < %t --check-prefix=STDERR
+
+# CHECK: vadd.f16 d0, d1, d2
+# CHECK: vadd.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0d]
+[0x12,0xef,0x44,0x0d]
+
+# CHECK: vsub.f16 d0, d1, d2
+# CHECK: vsub.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0d]
+[0x32,0xef,0x44,0x0d]
+
+# CHECK: vmul.f16 d0, d1, d2
+# CHECK: vmul.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0d]
+[0x12,0xff,0x54,0x0d]
+
+# CHECK: vmul.f16 d1, d2, d3[2]
+# CHECK: vmul.f16 q4, q5, d6[3]
+[0x92,0xef,0x63,0x19]
+[0x9a,0xff,0x6e,0x89]
+
+# CHECK: vmla.f16 d0, d1, d2
+# CHECK: vmla.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0d]
+[0x12,0xef,0x54,0x0d]
+
+# CHECK: vmla.f16 d5, d6, d7[2]
+# CHECK: vmla.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x51]
+[0x9c,0xff,0x6f,0xa1]
+
+# CHECK: vmls.f16 d0, d1, d2
+# CHECK: vmls.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0d]
+[0x32,0xef,0x54,0x0d]
+
+# CHECK: vmls.f16 d5, d6, d7[2]
+# CHECK: vmls.f16 q5, q6, d7[3]
+[0x96,0xef,0x67,0x55]
+[0x9c,0xff,0x6f,0xa5]
+
+# CHECK: vfma.f16 d0, d1, d2
+# CHECK: vfma.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0c]
+[0x12,0xef,0x54,0x0c]
+
+# CHECK: vfms.f16 d0, d1, d2
+# CHECK: vfms.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0c]
+[0x32,0xef,0x54,0x0c]
+
+# CHECK: vceq.f16 d2, d3, d4
+# CHECK: vceq.f16 q2, q3, q4
+[0x13,0xef,0x04,0x2e]
+[0x16,0xef,0x48,0x4e]
+
+# CHECK: vceq.f16 d2, d3, #0
+# CHECK: vceq.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x25]
+[0xb5,0xff,0x46,0x45]
+
+# CHECK: vcge.f16 d2, d3, d4
+# CHECK: vcge.f16 q2, q3, q4
+[0x13,0xff,0x04,0x2e]
+[0x16,0xff,0x48,0x4e]
+
+# CHECK: vcge.f16 d2, d3, #0
+# CHECK: vcge.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x24]
+[0xb5,0xff,0xc6,0x44]
+
+# CHECK: vcgt.f16 d2, d3, d4
+# CHECK: vcgt.f16 q2, q3, q4
+[0x33,0xff,0x04,0x2e]
+[0x36,0xff,0x48,0x4e]
+
+# CHECK: vcgt.f16 d2, d3, #0
+# CHECK: vcgt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x24]
+[0xb5,0xff,0x46,0x44]
+
+# CHECK: vcle.f16 d2, d3, #0
+# CHECK: vcle.f16 q2, q3, #0
+[0xb5,0xff,0x83,0x25]
+[0xb5,0xff,0xc6,0x45]
+
+# CHECK: vclt.f16 d2, d3, #0
+# CHECK: vclt.f16 q2, q3, #0
+[0xb5,0xff,0x03,0x26]
+[0xb5,0xff,0x46,0x46]
+
+# CHECK: vacge.f16 d0, d1, d2
+# CHECK: vacge.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0e]
+[0x12,0xff,0x54,0x0e]
+
+# CHECK: vacgt.f16 d0, d1, d2
+# CHECK: vacgt.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0e]
+[0x32,0xff,0x54,0x0e]
+
+# CHECK: vabd.f16 d0, d1, d2
+# CHECK: vabd.f16 q0, q1, q2
+[0x31,0xff,0x02,0x0d]
+[0x32,0xff,0x44,0x0d]
+
+# CHECK: vabs.f16 d0, d1
+# CHECK: vabs.f16 q0, q1
+[0xb5,0xff,0x01,0x07]
+[0xb5,0xff,0x42,0x07]
+
+# CHECK: vmax.f16 d0, d1, d2
+# CHECK: vmax.f16 q0, q1, q2
+[0x11,0xef,0x02,0x0f]
+[0x12,0xef,0x44,0x0f]
+
+# CHECK: vmin.f16 d0, d1, d2
+# CHECK: vmin.f16 q0, q1, q2
+[0x31,0xef,0x02,0x0f]
+[0x32,0xef,0x44,0x0f]
+
+# CHECK: vmaxnm.f16 d0, d1, d2
+# CHECK: vmaxnm.f16 q0, q1, q2
+[0x11,0xff,0x12,0x0f]
+[0x12,0xff,0x54,0x0f]
+
+# CHECK: vminnm.f16 d0, d1, d2
+# CHECK: vminnm.f16 q0, q1, q2
+[0x31,0xff,0x12,0x0f]
+[0x32,0xff,0x54,0x0f]
+
+# CHECK: vpadd.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0d]
+
+# CHECK: vpmax.f16 d0, d1, d2
+[0x11,0xff,0x02,0x0f]
+
+# CHECK: vpmin.f16 d0, d1, d2
+[0x31,0xff,0x02,0x0f]
+
+# CHECK: vrecpe.f16 d0, d1
+# CHECK: vrecpe.f16 q0, q1
+[0xb7,0xff,0x01,0x05]
+[0xb7,0xff,0x42,0x05]
+
+# CHECK: vrecps.f16 d0, d1, d2
+# CHECK: vrecps.f16 q0, q1, q2
+[0x11,0xef,0x12,0x0f]
+[0x12,0xef,0x54,0x0f]
+
+# CHECK: vrsqrte.f16 d0, d1
+# CHECK: vrsqrte.f16 q0, q1
+[0xb7,0xff,0x81,0x05]
+[0xb7,0xff,0xc2,0x05]
+
+# CHECK: vrsqrts.f16 d0, d1, d2
+# CHECK: vrsqrts.f16 q0, q1, q2
+[0x31,0xef,0x12,0x0f]
+[0x32,0xef,0x54,0x0f]
+
+# CHECK: vneg.f16 d0, d1
+# CHECK: vneg.f16 q0, q1
+[0xb5,0xff,0x81,0x07]
+[0xb5,0xff,0xc2,0x07]
+
+# CHECK: vcvt.s16.f16 d0, d1
+# CHECK: vcvt.u16.f16 d0, d1
+# CHECK: vcvt.f16.s16 d0, d1
+# CHECK: vcvt.f16.u16 d0, d1
+# CHECK: vcvt.s16.f16 q0, q1
+# CHECK: vcvt.u16.f16 q0, q1
+# CHECK: vcvt.f16.s16 q0, q1
+# CHECK: vcvt.f16.u16 q0, q1
+[0xb7,0xff,0x01,0x07]
+[0xb7,0xff,0x81,0x07]
+[0xb7,0xff,0x01,0x06]
+[0xb7,0xff,0x81,0x06]
+[0xb7,0xff,0x42,0x07]
+[0xb7,0xff,0xc2,0x07]
+[0xb7,0xff,0x42,0x06]
+[0xb7,0xff,0xc2,0x06]
+
+# CHECK: vcvta.s16.f16 d0, d1
+# CHECK: vcvta.s16.f16 q0, q1
+# CHECK: vcvta.u16.f16 d0, d1
+# CHECK: vcvta.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x00]
+[0xb7,0xff,0x42,0x00]
+[0xb7,0xff,0x81,0x00]
+[0xb7,0xff,0xc2,0x00]
+
+# CHECK: vcvtm.s16.f16 d0, d1
+# CHECK: vcvtm.s16.f16 q0, q1
+# CHECK: vcvtm.u16.f16 d0, d1
+# CHECK: vcvtm.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x03]
+[0xb7,0xff,0x42,0x03]
+[0xb7,0xff,0x81,0x03]
+[0xb7,0xff,0xc2,0x03]
+
+# CHECK: vcvtn.s16.f16 d0, d1
+# CHECK: vcvtn.s16.f16 q0, q1
+# CHECK: vcvtn.u16.f16 d0, d1
+# CHECK: vcvtn.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x01]
+[0xb7,0xff,0x42,0x01]
+[0xb7,0xff,0x81,0x01]
+[0xb7,0xff,0xc2,0x01]
+
+# CHECK: vcvtp.s16.f16 d0, d1
+# CHECK: vcvtp.s16.f16 q0, q1
+# CHECK: vcvtp.u16.f16 d0, d1
+# CHECK: vcvtp.u16.f16 q0, q1
+[0xb7,0xff,0x01,0x02]
+[0xb7,0xff,0x42,0x02]
+[0xb7,0xff,0x81,0x02]
+[0xb7,0xff,0xc2,0x02]
+
+# CHECK: vcvt.s16.f16 d0, d1, #1
+# CHECK: vcvt.u16.f16 d0, d1, #2
+# CHECK: vcvt.f16.s16 d0, d1, #3
+# CHECK: vcvt.f16.u16 d0, d1, #4
+# CHECK: vcvt.s16.f16 q0, q1, #5
+# CHECK: vcvt.u16.f16 q0, q1, #6
+# CHECK: vcvt.f16.s16 q0, q1, #7
+# CHECK: vcvt.f16.u16 q0, q1, #8
+[0xbf,0xef,0x11,0x0d]
+[0xbe,0xff,0x11,0x0d]
+[0xbd,0xef,0x11,0x0c]
+[0xbc,0xff,0x11,0x0c]
+[0xbb,0xef,0x52,0x0d]
+[0xba,0xff,0x52,0x0d]
+[0xb9,0xef,0x52,0x0c]
+[0xb8,0xff,0x52,0x0c]
+
+# CHECK: vrinta.f16 d0, d1
+# CHECK: vrinta.f16 q0, q1
+[0xb6,0xff,0x01,0x05]
+[0xb6,0xff,0x42,0x05]
+
+# CHECK: vrintm.f16 d0, d1
+# CHECK: vrintm.f16 q0, q1
+[0xb6,0xff,0x81,0x06]
+[0xb6,0xff,0xc2,0x06]
+
+# CHECK: vrintn.f16 d0, d1
+# CHECK: vrintn.f16 q0, q1
+[0xb6,0xff,0x01,0x04]
+[0xb6,0xff,0x42,0x04]
+
+# CHECK: vrintp.f16 d0, d1
+# CHECK: vrintp.f16 q0, q1
+[0xb6,0xff,0x81,0x07]
+[0xb6,0xff,0xc2,0x07]
+
+# CHECK: vrintx.f16 d0, d1
+# CHECK: vrintx.f16 q0, q1
+[0xb6,0xff,0x81,0x04]
+[0xb6,0xff,0xc2,0x04]
+
+# CHECK: vrintz.f16 d0, d1
+# CHECK: vrintz.f16 q0, q1
+[0xb6,0xff,0x81,0x05]
+[0xb6,0xff,0xc2,0x05]
+
+# Existing VMOV(immediate, Advanced SIMD) instructions within the encoding
+# space of the new FP16 VCVT(between floating - point and fixed - point,
+# Advanced SIMD):
+#  1 -- VCVT op
+#  2 -- VCVT FP size
+#            4 -- Q
+#            2 -- VMOV op
+[0xc7,0xef,0x10,0x0c]
+[0xc7,0xef,0x10,0x0d]
+[0xc7,0xef,0x10,0x0e]
+[0xc7,0xef,0x10,0x0f]
+[0xc7,0xef,0x20,0x0c]
+[0xc7,0xef,0x20,0x0d]
+[0xc7,0xef,0x20,0x0e]
+[0xc7,0xef,0x20,0x0f]
+[0xc7,0xef,0x50,0x0c]
+[0xc7,0xef,0x50,0x0d]
+[0xc7,0xef,0x50,0x0e]
+[0xc7,0xef,0x50,0x0f]
+[0xc7,0xef,0x70,0x0c]
+[0xc7,0xef,0x70,0x0d]
+[0xc7,0xef,0x70,0x0e]
+[0xc7,0xef,0x70,0x0f]
+# CHECK: vmov.i32        d16, #0x70ff
+# CHECK: vmov.i32        d16, #0x70ffff
+# CHECK: vmov.i8 d16, #0x70
+# CHECK: vmov.f32        d16, #1.000000e+00
+# CHECK: vmull.s8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0d,0xc7,0xf2]
+# CHECK: vmull.p8        q8, d7, d16
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x20,0x0f,0xc7,0xf2]
+# CHECK: vmov.i32        q8, #0x70ff
+# CHECK: vmov.i32        q8, #0x70ffff
+# CHECK: vmov.i8 q8, #0x70
+# CHECK: vmov.f32        q8, #1.000000e+00
+# CHECK: vmvn.i32        q8, #0x70ff
+# CHECK: vmvn.i32        q8, #0x70ffff
+# CHECK: vmov.i64        q8, #0xffffff0000000
+# STDERR: warning: invalid instruction encoding
+# STDERR-NEXT: [0x70,0x0f,0xc7,0xf2]
diff --git a/test/MC/Disassembler/ARM/invalid-thumbv7.txt b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
index 5257633e579f..512fc5d237e1 100644
--- a/test/MC/Disassembler/ARM/invalid-thumbv7.txt
+++ b/test/MC/Disassembler/ARM/invalid-thumbv7.txt
@@ -1,4 +1,5 @@
-# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple thumbv7 2>&1 | FileCheck %s
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a8 -triple thumbv7 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-V7
+# RUN: not llvm-mc -disassemble %s -mcpu cortex-a53 -triple thumbv8 2>&1 | FileCheck %s
 
 # This file is checking Thumbv7 encodings which are globally invalid, usually due
 # to the constraints of the instructions not being met. For example invalid
@@ -359,10 +360,22 @@
 # 32-bit Thumb STM instructions cannot have a writeback register which appears
 # in the list.
 
-[0xa1,0xe8,0x07,0x04]
+[0xa1 0xe8 0x07 0x04]
 # CHECK: warning: potentially undefined instruction encoding
-# CHECK-NEXT: [0xa1,0xe8,0x07,0x04]
+# CHECK-NEXT: [0xa1 0xe8 0x07 0x04]
 
-[0x21,0xe9,0x07,0x04]
+[0x21 0xe9 0x07 0x04]
 # CHECK: warning: potentially undefined instruction encoding
-# CHECK-NEXT: [0x21,0xe9,0x07,0x04]
+# CHECK-NEXT: [0x21 0xe9 0x07 0x04]
+
+#------------------------------------------------------------------------------
+# SP is invalid as rGPR before ARMv8
+#------------------------------------------------------------------------------
+
+[0x00 0xf0 0x00 0x0d]
+# CHECK-V7: warning: potentially undefined instruction encoding
+# CHECK-V7-NEXT: [0x00 0xf0 0x00 0x0d]
+
+[0x63 0xeb 0x2d 0x46]
+# CHECK-V7: warning: potentially undefined instruction encoding
+# CHECK-V7-NEXT: [0x63 0xeb 0x2d 0x46]
diff --git a/test/MC/Disassembler/ARM/thumb-v8.txt b/test/MC/Disassembler/ARM/thumb-v8.txt
index eb5ffea7d667..1bcf654a4795 100644
--- a/test/MC/Disassembler/ARM/thumb-v8.txt
+++ b/test/MC/Disassembler/ARM/thumb-v8.txt
@@ -1,4 +1,5 @@
-# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding < %s | FileCheck %s
+# RUN: llvm-mc -disassemble -triple thumbv8 -mattr=+db -show-encoding 2>%t < %s | FileCheck %s
+# RUN: FileCheck -allow-empty -check-prefix=STDERR < %t %s
 
 0x80 0xba
 # CHECK: hlt #0
@@ -26,3 +27,10 @@
 # CHECK: dmb oshld
 # CHECK: dmb nshld
 # CHECK: dmb ld
+
+[0x00 0xf0 0x00 0x0d]
+[0x63 0xeb 0x2d 0x46]
+# CHECK: and sp, r0, #0
+# CHECK: sbc.w r6, r3, sp, asr #16
+
+# STDERR-NOT: warning
diff --git a/test/MC/Disassembler/Hexagon/invalid_packet.txt b/test/MC/Disassembler/Hexagon/invalid_packet.txt
new file mode 100644
index 000000000000..8ce0ca90dc5c
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/invalid_packet.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x40 0x20 0x6c 0x00 0xc0 0x00 0x7f
\ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/j.txt b/test/MC/Disassembler/Hexagon/j.txt
index 0c2cc7a4cf34..661670e2a614 100644
--- a/test/MC/Disassembler/Hexagon/j.txt
+++ b/test/MC/Disassembler/Hexagon/j.txt
@@ -11,149 +11,149 @@
 
 # Compare and jump
 0x00 0xc0 0x89 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17,#-1); if (p0.new) jump:nt
 0x00 0xc1 0x89 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17,#-1); if (p0.new) jump:nt
 0x00 0xc3 0x89 0x11
-# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:nt
+# CHECK: p0 = tstbit(r17, #0); if (p0.new) jump:nt
 0x00 0xe0 0x89 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17,#-1); if (p0.new) jump:t
 0x00 0xe1 0x89 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17,#-1); if (p0.new) jump:t
 0x00 0xe3 0x89 0x11
-# CHECK: p0 = tstbit(r9, #0); if (p0.new) jump:t
+# CHECK: p0 = tstbit(r17, #0); if (p0.new) jump:t
 0x00 0xc0 0xc9 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17,#-1); if (!p0.new) jump:nt
 0x00 0xc1 0xc9 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17,#-1); if (!p0.new) jump:nt
 0x00 0xc3 0xc9 0x11
-# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:nt
+# CHECK: p0 = tstbit(r17, #0); if (!p0.new) jump:nt
 0x00 0xe0 0xc9 0x11
-# CHECK: p0 = cmp.eq(r9,#-1); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17,#-1); if (!p0.new) jump:t
 0x00 0xe1 0xc9 0x11
-# CHECK: p0 = cmp.gt(r9,#-1); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17,#-1); if (!p0.new) jump:t
 0x00 0xe3 0xc9 0x11
-# CHECK: p0 = tstbit(r9, #0); if (!p0.new) jump:t
+# CHECK: p0 = tstbit(r17, #0); if (!p0.new) jump:t
 0x00 0xd5 0x09 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, #21); if (p0.new) jump:nt
 0x00 0xf5 0x09 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, #21); if (p0.new) jump:t
 0x00 0xd5 0x49 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, #21); if (!p0.new) jump:nt
 0x00 0xf5 0x49 0x10
-# CHECK: p0 = cmp.eq(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, #21); if (!p0.new) jump:t
 0x00 0xd5 0x89 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, #21); if (p0.new) jump:nt
 0x00 0xf5 0x89 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, #21); if (p0.new) jump:t
 0x00 0xd5 0xc9 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, #21); if (!p0.new) jump:nt
 0x00 0xf5 0xc9 0x10
-# CHECK: p0 = cmp.gt(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, #21); if (!p0.new) jump:t
 0x00 0xd5 0x09 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, #21); if (p0.new) jump:nt
 0x00 0xf5 0x09 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, #21); if (p0.new) jump:t
 0x00 0xd5 0x49 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, #21); if (!p0.new) jump:nt
 0x00 0xf5 0x49 0x11
-# CHECK: p0 = cmp.gtu(r9, #21); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, #21); if (!p0.new) jump:t
 0x00 0xc0 0x89 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17,#-1); if (p1.new) jump:nt
 0x00 0xc1 0x89 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17,#-1); if (p1.new) jump:nt
 0x00 0xc3 0x89 0x13
-# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:nt
+# CHECK: p1 = tstbit(r17, #0); if (p1.new) jump:nt
 0x00 0xe0 0x89 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17,#-1); if (p1.new) jump:t
 0x00 0xe1 0x89 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17,#-1); if (p1.new) jump:t
 0x00 0xe3 0x89 0x13
-# CHECK: p1 = tstbit(r9, #0); if (p1.new) jump:t
+# CHECK: p1 = tstbit(r17, #0); if (p1.new) jump:t
 0x00 0xc0 0xc9 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17,#-1); if (!p1.new) jump:nt
 0x00 0xc1 0xc9 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17,#-1); if (!p1.new) jump:nt
 0x00 0xc3 0xc9 0x13
-# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:nt
+# CHECK: p1 = tstbit(r17, #0); if (!p1.new) jump:nt
 0x00 0xe0 0xc9 0x13
-# CHECK: p1 = cmp.eq(r9,#-1); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17,#-1); if (!p1.new) jump:t
 0x00 0xe1 0xc9 0x13
-# CHECK: p1 = cmp.gt(r9,#-1); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17,#-1); if (!p1.new) jump:t
 0x00 0xe3 0xc9 0x13
-# CHECK: p1 = tstbit(r9, #0); if (!p1.new) jump:t
+# CHECK: p1 = tstbit(r17, #0); if (!p1.new) jump:t
 0x00 0xd5 0x09 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, #21); if (p1.new) jump:nt
 0x00 0xf5 0x09 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, #21); if (p1.new) jump:t
 0x00 0xd5 0x49 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, #21); if (!p1.new) jump:nt
 0x00 0xf5 0x49 0x12
-# CHECK: p1 = cmp.eq(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, #21); if (!p1.new) jump:t
 0x00 0xd5 0x89 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, #21); if (p1.new) jump:nt
 0x00 0xf5 0x89 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, #21); if (p1.new) jump:t
 0x00 0xd5 0xc9 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, #21); if (!p1.new) jump:nt
 0x00 0xf5 0xc9 0x12
-# CHECK: p1 = cmp.gt(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, #21); if (!p1.new) jump:t
 0x00 0xd5 0x09 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, #21); if (p1.new) jump:nt
 0x00 0xf5 0x09 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, #21); if (p1.new) jump:t
 0x00 0xd5 0x49 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, #21); if (!p1.new) jump:nt
 0x00 0xf5 0x49 0x13
-# CHECK: p1 = cmp.gtu(r9, #21); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, #21); if (!p1.new) jump:t
 0x00 0xcd 0x09 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, r21); if (p0.new) jump:nt
 0x00 0xdd 0x09 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, r21); if (p1.new) jump:nt
 0x00 0xed 0x09 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, r21); if (p0.new) jump:t
 0x00 0xfd 0x09 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, r21); if (p1.new) jump:t
 0x00 0xcd 0x49 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.eq(r17, r21); if (!p0.new) jump:nt
 0x00 0xdd 0x49 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.eq(r17, r21); if (!p1.new) jump:nt
 0x00 0xed 0x49 0x14
-# CHECK: p0 = cmp.eq(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.eq(r17, r21); if (!p0.new) jump:t
 0x00 0xfd 0x49 0x14
-# CHECK: p1 = cmp.eq(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.eq(r17, r21); if (!p1.new) jump:t
 0x00 0xcd 0x89 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, r21); if (p0.new) jump:nt
 0x00 0xdd 0x89 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, r21); if (p1.new) jump:nt
 0x00 0xed 0x89 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, r21); if (p0.new) jump:t
 0x00 0xfd 0x89 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, r21); if (p1.new) jump:t
 0x00 0xcd 0xc9 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gt(r17, r21); if (!p0.new) jump:nt
 0x00 0xdd 0xc9 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gt(r17, r21); if (!p1.new) jump:nt
 0x00 0xed 0xc9 0x14
-# CHECK: p0 = cmp.gt(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gt(r17, r21); if (!p0.new) jump:t
 0x00 0xfd 0xc9 0x14
-# CHECK: p1 = cmp.gt(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gt(r17, r21); if (!p1.new) jump:t
 0x00 0xcd 0x09 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, r21); if (p0.new) jump:nt
 0x00 0xdd 0x09 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, r21); if (p1.new) jump:nt
 0x00 0xed 0x09 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, r21); if (p0.new) jump:t
 0x00 0xfd 0x09 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, r21); if (p1.new) jump:t
 0x00 0xcd 0x49 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:nt
+# CHECK: p0 = cmp.gtu(r17, r21); if (!p0.new) jump:nt
 0x00 0xdd 0x49 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:nt
+# CHECK: p1 = cmp.gtu(r17, r21); if (!p1.new) jump:nt
 0x00 0xed 0x49 0x15
-# CHECK: p0 = cmp.gtu(r9, r13); if (!p0.new) jump:t
+# CHECK: p0 = cmp.gtu(r17, r21); if (!p0.new) jump:t
 0x00 0xfd 0x49 0x15
-# CHECK: p1 = cmp.gtu(r9, r13); if (!p1.new) jump:t
+# CHECK: p1 = cmp.gtu(r17, r21); if (!p1.new) jump:t
 
 # Jump to address
 0x22 0xc0 0x00 0x58
@@ -197,6 +197,6 @@
 
 # Transfer and jump
 0x00 0xd5 0x09 0x16
-# CHECK: r9 = #21 ; jump
+# CHECK: r17 = #21 ; jump
 0x00 0xc9 0x0d 0x17
-# CHECK: r9 = r13 ; jump
+# CHECK: r17 = r21 ; jump
diff --git a/test/MC/Disassembler/Hexagon/ld.txt b/test/MC/Disassembler/Hexagon/ld.txt
index 15c23b644886..56e00fd94f56 100644
--- a/test/MC/Disassembler/Hexagon/ld.txt
+++ b/test/MC/Disassembler/Hexagon/ld.txt
@@ -4,12 +4,18 @@
 # Load doubleword
 0x90 0xff 0xd5 0x3a
 # CHECK: r17:16 = memd(r21 + r31<<#3)
-0x10 0xc5 0xc0 0x49
-# CHECK: r17:16 = memd(##320)
+0xb0 0xc2 0xc0 0x49
+# CHECK: r17:16 = memd(#168)
+0x02 0x40 0x00 0x00 0x10 0xc5 0xc0 0x49
+# CHECK: r17:16 = memd(##168)
+0xd0 0xc0 0xd5 0x91
+# CHECK: r17:16 = memd(r21 + #48)
 0xb0 0xe0 0xd5 0x99
 # CHECK: r17:16 = memd(r21 ++ #40:circ(m1))
 0x10 0xe2 0xd5 0x99
 # CHECK: r17:16 = memd(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x70 0xd7 0xd5 0x9b
+# CHECK: r17:16 = memd(r21 = ##31)
 0xb0 0xc0 0xd5 0x9b
 # CHECK: r17:16 = memd(r21++#40)
 0x10 0xe0 0xd5 0x9d
@@ -53,6 +59,8 @@
 0x91 0xff 0x15 0x3a
 # CHECK: r17 = memb(r21 + r31<<#3)
 0xb1 0xc2 0x00 0x49
+# CHECK: r17 = memb(#21)
+0x00 0x40 0x00 0x00 0xb1 0xc2 0x00 0x49
 # CHECK: r17 = memb(##21)
 0xf1 0xc3 0x15 0x91
 # CHECK: r17 = memb(r21 + #31)
@@ -60,6 +68,8 @@
 # CHECK: r17 = memb(r21 ++ #5:circ(m1))
 0x11 0xe2 0x15 0x99
 # CHECK: r17 = memb(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x15 0x9b
+# CHECK: r17 = memb(r21 = ##31)
 0xb1 0xc0 0x15 0x9b
 # CHECK: r17 = memb(r21++#5)
 0x11 0xe0 0x15 0x9d
@@ -99,17 +109,37 @@
 # CHECK: p3 = r5
 # CHECK-NEXT: if (!p3.new) r17 = memb(r21++#5)
 
+# Load byte into shifted vector
+0xf0 0xc3 0x95 0x90
+# CHECK: r17:16 = memb_fifo(r21 + #31)
+0xb0 0xe0 0x95 0x98
+# CHECK: r17:16 = memb_fifo(r21 ++ #5:circ(m1))
+0x10 0xe2 0x95 0x98
+# CHECK: r17:16 = memb_fifo(r21 ++ I:circ(m1))
+
+# Load half into shifted vector
+0xf0 0xc3 0x55 0x90
+# CHECK: r17:16 = memh_fifo(r21 + #62)
+0xb0 0xe0 0x55 0x98
+# CHECK: r17:16 = memh_fifo(r21 ++ #10:circ(m1))
+0x10 0xe2 0x55 0x98
+# CHECK: r17:16 = memh_fifo(r21 ++ I:circ(m1))
+
 # Load halfword
 0x91 0xff 0x55 0x3a
 # CHECK: r17 = memh(r21 + r31<<#3)
-0x51 0xc5 0x40 0x49
-# CHECK: r17 = memh(##84)
+0xb1 0xc2 0x40 0x49
+# CHECK: r17 = memh(#42)
+0x00 0x40 0x00 0x00 0x51 0xc5 0x40 0x49
+# CHECK: r17 = memh(##42)
 0xf1 0xc3 0x55 0x91
 # CHECK: r17 = memh(r21 + #62)
 0xb1 0xe0 0x55 0x99
 # CHECK: r17 = memh(r21 ++ #10:circ(m1))
 0x11 0xe2 0x55 0x99
 # CHECK: r17 = memh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x55 0x9b
+# CHECK: r17 = memh(r21 = ##31)
 0xb1 0xc0 0x55 0x9b
 # CHECK: r17 = memh(r21++#10)
 0x11 0xe0 0x55 0x9d
@@ -138,11 +168,23 @@
 0x03 0x40 0x45 0x85 0xb1 0xfe 0x55 0x9b
 # CHECK: p3 = r5
 # CHECK-NEXT: if (!p3.new) r17 = memh(r21++#10)
+0xf1 0xdb 0x55 0x41
+# CHECK: if (p3) r17 = memh(r21 + #62)
+0xf1 0xdb 0x55 0x45
+# CHECK: if (!p3) r17 = memh(r21 + #62)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x55 0x43
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) r17 = memh(r21 + #62)
+0x03 0x40 0x45 0x85 0xf1 0xdb 0x55 0x47
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) r17 = memh(r21 + #62)
 
 # Load unsigned byte
 0x91 0xff 0x35 0x3a
 # CHECK: r17 = memub(r21 + r31<<#3)
 0xb1 0xc2 0x20 0x49
+# CHECK: r17 = memub(#21)
+0x00 0x40 0x00 0x00 0xb1 0xc2 0x20 0x49
 # CHECK: r17 = memub(##21)
 0xf1 0xc3 0x35 0x91
 # CHECK: r17 = memub(r21 + #31)
@@ -150,6 +192,8 @@
 # CHECK: r17 = memub(r21 ++ #5:circ(m1))
 0x11 0xe2 0x35 0x99
 # CHECK: r17 = memub(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x35 0x9b
+# CHECK: r17 = memub(r21 = ##31)
 0xb1 0xc0 0x35 0x9b
 # CHECK: r17 = memub(r21++#5)
 0x11 0xe0 0x35 0x9d
@@ -192,14 +236,18 @@
 # Load unsigned halfword
 0x91 0xff 0x75 0x3a
 # CHECK: r17 = memuh(r21 + r31<<#3)
-0x51 0xc5 0x60 0x49
-# CHECK: r17 = memuh(##84)
+0xb1 0xc2 0x60 0x49
+# CHECK: r17 = memuh(#42)
+0x00 0x40 0x00 0x00 0x51 0xc5 0x60 0x49
+# CHECK: r17 = memuh(##42)
 0xb1 0xc2 0x75 0x91
 # CHECK: r17 = memuh(r21 + #42)
 0xb1 0xe0 0x75 0x99
 # CHECK: r17 = memuh(r21 ++ #10:circ(m1))
 0x11 0xe2 0x75 0x99
 # CHECK: r17 = memuh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x75 0x9b
+# CHECK: r17 = memuh(r21 = ##31)
 0xb1 0xc0 0x75 0x9b
 # CHECK: r17 = memuh(r21++#10)
 0x11 0xe0 0x75 0x9d
@@ -242,14 +290,18 @@
 # Load word
 0x91 0xff 0x95 0x3a
 # CHECK: r17 = memw(r21 + r31<<#3)
-0x91 0xc2 0x80 0x49
-# CHECK: r17 = memw(##80)
+0xb1 0xc2 0x80 0x49
+# CHECK: r17 = memw(#84)
+0x01 0x40 0x00 0x00 0x91 0xc2 0x80 0x49
+# CHECK: r17 = memw(##84)
 0xb1 0xc2 0x95 0x91
 # CHECK: r17 = memw(r21 + #84)
 0xb1 0xe0 0x95 0x99
 # CHECK: r17 = memw(r21 ++ #20:circ(m1))
 0x11 0xe2 0x95 0x99
 # CHECK: r17 = memw(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x95 0x9b
+# CHECK: r17 = memw(r21 = ##31)
 0xb1 0xc0 0x95 0x9b
 # CHECK: r17 = memw(r21++#20)
 0x11 0xe0 0x95 0x9d
@@ -338,14 +390,36 @@
 # CHECK: r17:16 = memubh(r21 ++ #20:circ(m1))
 0x10 0xe2 0xb5 0x98
 # CHECK: r17:16 = memubh(r21 ++ I:circ(m1))
+0x00 0x40 0x00 0x00 0x71 0xd7 0x35 0x9a
+# CHECK: r17 = membh(r21 = ##31)
 0xb1 0xc0 0x35 0x9a
 # CHECK: r17 = membh(r21++#10)
+0x00 0x40 0x00 0x00 0x71 0xd7 0x75 0x9a
+# CHECK: r17 = memubh(r21 = ##31)
 0xb1 0xc0 0x75 0x9a
 # CHECK: r17 = memubh(r21++#10)
+0x00 0x40 0x00 0x00 0x70 0xd7 0xb5 0x9a
+# CHECK: r17:16 = memubh(r21 = ##31)
 0xb0 0xc0 0xb5 0x9a
 # CHECK: r17:16 = memubh(r21++#20)
+0x00 0x40 0x00 0x00 0x70 0xd7 0xf5 0x9a
+# CHECK: r17:16 = membh(r21 = ##31)
 0xb0 0xc0 0xf5 0x9a
 # CHECK: r17:16 = membh(r21++#20)
+0x00 0x40 0x00 0x00 0xf1 0xf7 0x35 0x9c
+# CHECK: r17 = membh(r21<<#3 + ##31)
+0x11 0xe0 0x35 0x9c
+# CHECK: r17 = membh(r21++m1)
+0x00 0x40 0x00 0x00 0xf1 0xf7 0x75 0x9c
+# CHECK: r17 = memubh(r21<<#3 + ##31)
+0x11 0xe0 0x75 0x9c
+# CHECK: r17 = memubh(r21++m1)
+0x00 0x40 0x00 0x00 0xf0 0xf7 0xf5 0x9c
+# CHECK: r17:16 = membh(r21<<#3 + ##31)
+0x10 0xe0 0xf5 0x9c
+# CHECK: r17:16 = membh(r21++m1)
+0x00 0x40 0x00 0x00 0xf0 0xf7 0xb5 0x9c
+# CHECK: r17:16 = memubh(r21<<#3 + ##31)
 0x11 0xe0 0x35 0x9c
 # CHECK: r17 = membh(r21++m1)
 0x11 0xe0 0x75 0x9c
diff --git a/test/MC/Disassembler/Hexagon/lit.local.cfg b/test/MC/Disassembler/Hexagon/lit.local.cfg
index 6500d4dd7d5a..ba72ff632d4e 100644
--- a/test/MC/Disassembler/Hexagon/lit.local.cfg
+++ b/test/MC/Disassembler/Hexagon/lit.local.cfg
@@ -1,3 +1,3 @@
-if not 'Hexagon' in config.root.targets:
-    config.unsupported = True
-
+if not 'Hexagon' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/MC/Disassembler/Hexagon/nv_j.txt b/test/MC/Disassembler/Hexagon/nv_j.txt
index a6773c3f3c56..2135b5a039f6 100644
--- a/test/MC/Disassembler/Hexagon/nv_j.txt
+++ b/test/MC/Disassembler/Hexagon/nv_j.txt
@@ -4,133 +4,133 @@
 # Jump to address conditioned on new register value
 0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.eq(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x20
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, r21)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r17.new, r21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, r21)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r17.new, r21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r21, r17.new)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r21, r2.new)) jump:t
+# CHECK-NEXT: if (cmp.gt(r21, r17.new)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r21, r17.new)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x21
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r21, r2.new)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r21, r17.new)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x22
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r21, r17.new)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x22
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r21, r2.new)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r21, r17.new)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x22
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r21, r17.new)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x22
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r21, r2.new)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r21, r17.new)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x24
 # CHECK: r17 = r17
-# CHECK-NETX: if (cmp.eq(r2.new, #21)) jump:t
+# CHECK-NETX: if (cmp.eq(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x82 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x82 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #21)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0xc2 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0xc2 0x24
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x02 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (cmp.gtu(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x02 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gtu(r2.new, #21)) jump:t
+# CHECK-NEXT: if (cmp.gtu(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xd5 0x42 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:nt
+# CHECK-NEXT: if (!cmp.gtu(r17.new, #21)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xf5 0x42 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gtu(r2.new, #21)) jump:t
+# CHECK-NEXT: if (!cmp.gtu(r17.new, #21)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:nt
+# CHECK-NEXT: if (tstbit(r17.new, #0)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (tstbit(r2.new, #0)) jump:t
+# CHECK-NEXT: if (tstbit(r17.new, #0)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:nt
+# CHECK-NEXT: if (!tstbit(r17.new, #0)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x25
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!tstbit(r2.new, #0)) jump:t
+# CHECK-NEXT: if (!tstbit(r17.new, #0)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0x02 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (cmp.eq(r17.new, #-1)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0x02 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.eq(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (cmp.eq(r17.new, #-1)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0x42 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (!cmp.eq(r17.new, #-1)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0x42 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.eq(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (!cmp.eq(r17.new, #-1)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0x82 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (cmp.gt(r17.new, #-1)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0x82 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (cmp.gt(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (cmp.gt(r17.new, #-1)) jump:t
 0x11 0x40 0x71 0x70 0x92 0xc0 0xc2 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:nt
+# CHECK-NEXT: if (!cmp.gt(r17.new, #-1)) jump:nt
 0x11 0x40 0x71 0x70 0x92 0xe0 0xc2 0x26
 # CHECK: r17 = r17
-# CHECK-NEXT: if (!cmp.gt(r2.new, #-1)) jump:t
+# CHECK-NEXT: if (!cmp.gt(r17.new, #-1)) jump:t
diff --git a/test/MC/Disassembler/Hexagon/nv_st.txt b/test/MC/Disassembler/Hexagon/nv_st.txt
index ef49455b80b1..3a767f33b36a 100644
--- a/test/MC/Disassembler/Hexagon/nv_st.txt
+++ b/test/MC/Disassembler/Hexagon/nv_st.txt
@@ -4,200 +4,209 @@
 # Store new-value byte
 0x1f 0x40 0x7f 0x70 0x82 0xf5 0xb1 0x3b
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 + r21<<#3) = r2.new
+# CHECK-NEXT: memb(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x11 0xc2 0xa0 0x48
+# CHECK: r31 = r31
+# CHECK-NEXT: memb(#17) = r31.new
 0x1f 0x40 0x7f 0x70 0x15 0xc2 0xb1 0xa1
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17+#21) = r2.new
+# CHECK-NEXT: memb(r17+#21) = r31.new
 0x1f 0x40 0x7f 0x70 0x02 0xe2 0xb1 0xa9
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memb(r17 ++ I:circ(m1)) = r31.new
 0x1f 0x40 0x7f 0x70 0x28 0xe2 0xb1 0xa9
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ #5:circ(m1)) = r2.new
+# CHECK-NEXT: memb(r17 ++ #5:circ(m1)) = r31.new
 0x1f 0x40 0x7f 0x70 0x28 0xc2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17++#5) = r2.new
+# CHECK-NEXT: memb(r17++#5) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xe2 0xb1 0xad
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17++m1) = r2.new
+# CHECK-NEXT: memb(r17++m1) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xe2 0xb1 0xaf
 # CHECK: r31 = r31
-# CHECK-NEXT: memb(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memb(r17 ++ m1:brev) = r31.new
 
 # Store new-value byte conditionally
 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x34
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memb(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x35
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x36
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xe2 0xf5 0xb1 0x37
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x40
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (p3) memb(r17+#21) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x44
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17+#21) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x42
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17+#21) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xc2 0xb1 0x46
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17+#21) = r31.new
 0x1f 0x40 0x7f 0x70 0x2b 0xe2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (p3) memb(r17++#5) = r31.new
 0x1f 0x40 0x7f 0x70 0x2f 0xe2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (!p3) memb(r17++#5) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xe2 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (p3.new) memb(r17++#5) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xe2 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r2.new
+# CHECK-NEXT: if (!p3.new) memb(r17++#5) = r31.new
 
 # Store new-value halfword
 0x1f 0x40 0x7f 0x70 0x8a 0xf5 0xb1 0x3b
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 + r21<<#3) = r2.new
+# CHECK-NEXT: memh(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x15 0xca 0xa0 0x48
+# CHECK: r31 = r31
+# CHECK-NEXT: memh(#42) = r31.new
 0x1f 0x40 0x7f 0x70 0x15 0xca 0xb1 0xa1
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17+#42) = r2.new
+# CHECK-NEXT: memh(r17+#42) = r31.new
 0x1f 0x40 0x7f 0x70 0x02 0xea 0xb1 0xa9
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memh(r17 ++ I:circ(m1)) = r31.new
 0x1f 0x40 0x7f 0x70 0x28 0xea 0xb1 0xa9
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ #10:circ(m1)) = r2.new
+# CHECK-NEXT: memh(r17 ++ #10:circ(m1)) = r31.new
 0x1f 0x40 0x7f 0x70 0x28 0xca 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17++#10) = r2.new
+# CHECK-NEXT: memh(r17++#10) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xea 0xb1 0xad
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17++m1) = r2.new
+# CHECK-NEXT: memh(r17++m1) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xea 0xb1 0xaf
 # CHECK: r31 = r31
-# CHECK-NEXT: memh(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memh(r17 ++ m1:brev) = r31.new
 
 # Store new-value halfword conditionally
 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x34
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memh(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x35
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x36
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xea 0xf5 0xb1 0x37
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x40
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (p3) memh(r17+#42) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x44
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17+#42) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x42
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17+#42) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xca 0xb1 0x46
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17+#42) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17+#42) = r31.new
 0x1f 0x40 0x7f 0x70 0x2b 0xea 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (p3) memh(r17++#10) = r31.new
 0x1f 0x40 0x7f 0x70 0x2f 0xea 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (!p3) memh(r17++#10) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xea 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (p3.new) memh(r17++#10) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xea 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r2.new
+# CHECK-NEXT: if (!p3.new) memh(r17++#10) = r31.new
 
 # Store new-value word
 0x1f 0x40 0x7f 0x70 0x92 0xf5 0xb1 0x3b
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 + r21<<#3) = r2.new
+# CHECK-NEXT: memw(r17 + r21<<#3) = r31.new
+0x1f 0x40 0x7f 0x70 0x15 0xd2 0xa0 0x48
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(#84) = r31.new
 0x1f 0x40 0x7f 0x70 0x15 0xd2 0xb1 0xa1
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17+#84) = r2.new
-0x1f 0x40 0x7f 0x70 0x28 0xf2 0xb1 0xa9
-# CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ #20:circ(m1)) = r2.new
+# CHECK-NEXT: memw(r17+#84) = r31.new
 0x1f 0x40 0x7f 0x70 0x02 0xf2 0xb1 0xa9
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ I:circ(m1)) = r2.new
+# CHECK-NEXT: memw(r17 ++ I:circ(m1)) = r31.new
+0x1f 0x40 0x7f 0x70 0x28 0xf2 0xb1 0xa9
+# CHECK: r31 = r31
+# CHECK-NEXT: memw(r17 ++ #20:circ(m1)) = r31.new
 0x1f 0x40 0x7f 0x70 0x28 0xd2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17++#20) = r2.new
+# CHECK-NEXT: memw(r17++#20) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xf2 0xb1 0xad
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17++m1) = r2.new
+# CHECK-NEXT: memw(r17++m1) = r31.new
 0x1f 0x40 0x7f 0x70 0x00 0xf2 0xb1 0xaf
 # CHECK: r31 = r31
-# CHECK-NEXT: memw(r17 ++ m1:brev) = r2.new
+# CHECK-NEXT: memw(r17 ++ m1:brev) = r31.new
 
 # Store new-value word conditionally
 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x34
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3) memw(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x35
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x36
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17+r21<<#3) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xf2 0xf5 0xb1 0x37
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17+r21<<#3) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x40
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (p3) memw(r17+#84) = r31.new
 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x44
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17+#84) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x42
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17+#84) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xd2 0xb1 0x46
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17+#84) = r31.new
 0x1f 0x40 0x7f 0x70 0x2b 0xf2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (p3) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (p3) memw(r17++#20) = r31.new
 0x1f 0x40 0x7f 0x70 0x2f 0xf2 0xb1 0xab
 # CHECK: r31 = r31
-# CHECK-NEXT: if (!p3) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (!p3) memw(r17++#20) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xab 0xf2 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (p3.new) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (p3.new) memw(r17++#20) = r31.new
 0x03 0x40 0x45 0x85 0x1f 0x40 0x7f 0x70 0xaf 0xf2 0xb1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: r31 = r31
-# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r2.new
+# CHECK-NEXT: if (!p3.new) memw(r17++#20) = r31.new
diff --git a/test/MC/Disassembler/Hexagon/st.txt b/test/MC/Disassembler/Hexagon/st.txt
index 3b809d3465a8..6d9074a05ef7 100644
--- a/test/MC/Disassembler/Hexagon/st.txt
+++ b/test/MC/Disassembler/Hexagon/st.txt
@@ -5,7 +5,9 @@
 0x9e 0xf5 0xd1 0x3b
 # CHECK: memd(r17 + r21<<#3) = r31:30
 0x28 0xd4 0xc0 0x48
-# CHECK: memd(##320) = r21:20
+# CHECK: memd(#320) = r21:20
+0x02 0x40 0x00 0x00 0x28 0xd4 0xc0 0x48
+# CHECK: memd(##168) = r21:20
 0x15 0xd4 0xd1 0xa1
 # CHECK: memd(r17+#168) = r21:20
 0x02 0xf4 0xd1 0xa9
@@ -14,6 +16,8 @@
 # CHECK: memd(r17 ++ #40:circ(m1)) = r21:20
 0x28 0xd4 0xd1 0xab
 # CHECK: memd(r17++#40) = r21:20
+0x00 0x40 0x00 0x00 0xd5 0xfe 0xd1 0xad
+# CHECK: memd(r17<<#3 + ##21) = r31:30
 0x00 0xf4 0xd1 0xad
 # CHECK: memd(r17++m1) = r21:20
 0x00 0xf4 0xd1 0xaf
@@ -50,6 +54,16 @@
 0x03 0x40 0x45 0x85 0xaf 0xf4 0xd1 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: if (!p3.new) memd(r17++#40) = r21:20
+0x02 0x40 0x00 0x00 0xc3 0xd4 0xc2 0xaf
+# CHECK: if (p3) memd(##168) = r21:20
+0x02 0x40 0x00 0x00 0xc7 0xd4 0xc2 0xaf
+# CHECK: if (!p3) memd(##168) = r21:20
+0x03 0x40 0x45 0x85 0x02 0x40 0x00 0x00 0xc3 0xf4 0xc2 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memd(##168) = r21:20
+0x03 0x40 0x45 0x85 0x02 0x40 0x00 0x00 0xc7 0xf4 0xc2 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memd(##168) = r21:20
 
 # Store byte
 0x9f 0xf5 0x11 0x3b
@@ -57,6 +71,8 @@
 0x9f 0xca 0x11 0x3c
 # CHECK: memb(r17+#21)=#31
 0x15 0xd5 0x00 0x48
+# CHECK: memb(#21) = r21
+0x00 0x40 0x00 0x00 0x15 0xd5 0x00 0x48
 # CHECK: memb(##21) = r21
 0x15 0xd5 0x11 0xa1
 # CHECK: memb(r17+#21) = r21
@@ -66,6 +82,8 @@
 # CHECK: memb(r17 ++ #5:circ(m1)) = r21
 0x28 0xd5 0x11 0xab
 # CHECK: memb(r17++#5) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x11 0xad
+# CHECK: memb(r17<<#3 + ##21) = r31
 0x00 0xf5 0x11 0xad
 # CHECK: memb(r17++m1) = r21
 0x00 0xf5 0x11 0xaf
@@ -112,6 +130,16 @@
 0x03 0x40 0x45 0x85 0xaf 0xf5 0x11 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: if (!p3.new) memb(r17++#5) = r21
+0x00 0x40 0x00 0x00 0xab 0xd5 0x01 0xaf
+# CHECK: if (p3) memb(##21) = r21
+0x00 0x40 0x00 0x00 0xaf 0xd5 0x01 0xaf
+# CHECK: if (!p3) memb(##21) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xab 0xf5 0x01 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memb(##21) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xaf 0xf5 0x01 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memb(##21) = r21
 
 # Store halfword
 0x9f 0xf5 0x51 0x3b
@@ -120,10 +148,14 @@
 # CHECK: memh(r17 + r21<<#3) = r31.h
 0x95 0xcf 0x31 0x3c
 # CHECK: memh(r17+#62)=#21
+0x00 0x40 0x00 0x00 0x2a 0xd5 0x40 0x48
+# CHECK: memh(##42) = r21
+0x00 0x40 0x00 0x00 0x2a 0xd5 0x60 0x48
+# CHECK: memh(##42) = r21.h
 0x2a 0xd5 0x40 0x48
-# CHECK: memh(##84) = r21
+# CHECK: memh(#84) = r21
 0x2a 0xd5 0x60 0x48
-# CHECK: memh(##84) = r21.h
+# CHECK: memh(#84) = r21.h
 0x15 0xdf 0x51 0xa1
 # CHECK: memh(r17+#42) = r31
 0x15 0xdf 0x71 0xa1
@@ -138,8 +170,12 @@
 # CHECK: memh(r17 ++ #10:circ(m1)) = r21.h
 0x28 0xd5 0x51 0xab
 # CHECK: memh(r17++#10) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x51 0xad
+# CHECK: memh(r17<<#3 + ##21) = r31
 0x28 0xd5 0x71 0xab
 # CHECK: memh(r17++#10) = r21.h
+0x00 0x40 0x00 0x00 0xd5 0xff 0x71 0xad
+# CHECK: memh(r17<<#3 + ##21) = r31.h
 0x00 0xf5 0x51 0xad
 # CHECK: memh(r17++m1) = r21
 0x00 0xf5 0x71 0xad
@@ -220,22 +256,48 @@
 0x03 0x40 0x45 0x85 0xaf 0xf5 0x71 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: if (!p3.new) memh(r17++#10) = r21.h
+0x00 0x40 0x00 0x00 0xd3 0xd5 0x42 0xaf
+# CHECK: if (p3) memh(##42) = r21
+0x00 0x40 0x00 0x00 0xd3 0xd5 0x62 0xaf
+# CHECK: if (p3) memh(##42) = r21.h
+0x00 0x40 0x00 0x00 0xd7 0xd5 0x42 0xaf
+# CHECK: if (!p3) memh(##42) = r21
+0x00 0x40 0x00 0x00 0xd7 0xd5 0x62 0xaf
+# CHECK: if (!p3) memh(##42) = r21.h
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd3 0xf5 0x42 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(##42) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd3 0xf5 0x62 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memh(##42) = r21.h
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd7 0xf5 0x42 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(##42) = r21
+0x03 0x40 0x45 0x85 0x00 0x40 0x00 0x00 0xd7 0xf5 0x62 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memh(##42) = r21.h
 
 # Store word
 0x9f 0xf5 0x91 0x3b
 # CHECK: memw(r17 + r21<<#3) = r31
 0x9f 0xca 0x51 0x3c
+# CHECK: memw(r17{{ *}}+{{ *}}#84)=#31
+0x15 0xdf 0x80 0x48
+# CHECK: memw(#84) = r31
+0x01 0x40 0x00 0x00 0x14 0xd5 0x80 0x48
+# CHECK: memw(##84) = r21
+0x9f 0xca 0x51 0x3c
 # CHECK: memw(r17+#84)=#31
 0x15 0xdf 0x91 0xa1
 # CHECK: memw(r17+#84) = r31
-0x14 0xd5 0x80 0x48
-# CHECK: memw(##80) = r21
 0x02 0xf5 0x91 0xa9
 # CHECK: memw(r17 ++ I:circ(m1)) = r21
 0x28 0xf5 0x91 0xa9
 # CHECK: memw(r17 ++ #20:circ(m1)) = r21
 0x28 0xd5 0x91 0xab
 # CHECK: memw(r17++#20) = r21
+0x00 0x40 0x00 0x00 0xd5 0xff 0x91 0xad
+# CHECK: memw(r17<<#3 + ##21) = r31
 0x00 0xf5 0x91 0xad
 # CHECK: memw(r17++m1) = r21
 0x00 0xf5 0x91 0xaf
@@ -282,7 +344,17 @@
 0x03 0x40 0x45 0x85 0xab 0xf5 0x91 0xab
 # CHECK: p3 = r5
 # CHECK-NEXT: if (p3.new) memw(r17++#20) = r21
+0x01 0x40 0x00 0x00 0xa3 0xd5 0x81 0xaf
+# CHECK: if (p3) memw(##84) = r21
+0x01 0x40 0x00 0x00 0xa7 0xd5 0x81 0xaf
+# CHECK: if (!p3) memw(##84) = r21
+0x03 0x40 0x45 0x85 0x01 0x40 0x00 0x00 0xa3 0xf5 0x81 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (p3.new) memw(##84) = r21
+0x03 0x40 0x45 0x85 0x01 0x40 0x00 0x00 0xa7 0xf5 0x81 0xaf
+# CHECK: p3 = r5
+# CHECK-NEXT: if (!p3.new) memw(##84) = r21
 
 # Allocate stack frame
 0x1f 0xc0 0x9d 0xa0
-# CHECK: allocframe(#248)
\ No newline at end of file
+# CHECK: allocframe(#248)
diff --git a/test/MC/Disassembler/Hexagon/too_many_instructions.txt b/test/MC/Disassembler/Hexagon/too_many_instructions.txt
new file mode 100644
index 000000000000..2aaa22d37f91
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/too_many_instructions.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0x40 0x00 0x7f 0x00 0xc0 0x00 0x7f
\ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt b/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt
new file mode 100644
index 000000000000..55a6f0b3692e
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/too_many_loop_ends.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#CHECK: warning: invalid instruction encoding
+0x00 0x80 0x00 0x7f 0x00 0x80 0x00 0x7f 0x00 0x80 0x00 0x7f 0x00 0xc0 0x00 0x7f
\ No newline at end of file
diff --git a/test/MC/Disassembler/Hexagon/unextendable.txt b/test/MC/Disassembler/Hexagon/unextendable.txt
new file mode 100644
index 000000000000..377c123177c6
--- /dev/null
+++ b/test/MC/Disassembler/Hexagon/unextendable.txt
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple=hexagon -disassemble < %s 2>&1 | FileCheck %s
+
+#Invalid immediate extend duplex load/load
+#CHECK: warning: invalid instruction encoding
+0xfe 0x40 0x00 0x00 0x11 0x00 0x00 0x00
+
+#Invalid immediate extend barrier
+#CHECK: warning: invalid instruction encoding
+0xfe 0x40 0x00 0x00 0x00 0xc0 0x00 0xa8
\ No newline at end of file
diff --git a/test/MC/Disassembler/Mips/dsp/valid-el.txt b/test/MC/Disassembler/Mips/dsp/valid-el.txt
new file mode 100644
index 000000000000..56f363b62a21
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dsp/valid-el.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
+
+0x10 0xa8 0x60 0x00 # CHECK: mfhi $21, $ac3 
+0x12 0xa8 0x60 0x00 # CHECK: mflo $21, $ac3 
+0x11 0x18 0xa0 0x02 # CHECK: mthi $21, $ac3 
+0x13 0x18 0xa0 0x02 # CHECK: mtlo $21, $ac3 
+0x8a 0x51 0x54 0x7f # CHECK: lbux $10, $20($26) 
+0x0a 0x59 0x75 0x7f # CHECK: lhx  $11, $21($27) 
+0x0a 0x60 0x96 0x7f # CHECK: lwx  $12, $22($gp)
+0xb8 0x0e 0x30 0x7c # CHECK: shilo $ac1, 3
+0xf8 0x14 0xa0 0x7c # CHECK: wrdsp $5, 2
+0xf8 0xfc 0xa0 0x7c # CHECK: wrdsp $5
diff --git a/test/MC/Disassembler/Mips/dsp/valid.txt b/test/MC/Disassembler/Mips/dsp/valid.txt
new file mode 100644
index 000000000000..e6ca900dde55
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dsp/valid.txt
@@ -0,0 +1,125 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dsp | FileCheck %s
+
+  0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph        $1, $2
+  0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w         $5, $6
+  0x7d 0x09 0x3a 0x90 # CHECK: addq.ph          $7, $8, $9
+  0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph        $10, $11, $12
+  0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w         $13, $14, $15
+  0x7f 0xbe 0xe4 0x10 # CHECK: addsc            $gp, $sp, $fp
+  0x7c 0xe8 0x30 0x10 # CHECK: addu.qb          $6, $7, $8
+  0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb        $9, $10, $11
+  0x7d 0xae 0x64 0x50 # CHECK: addwc            $12, $13, $14
+  0x7c 0x1a 0xce 0xd2 # CHECK: bitrev           $25, $26
+  0x04 0x1c 0x14 0x9b # CHECK: bposge32         21104
+  0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph        $27, $gp
+  0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph        $sp, $fp
+  0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph        $ra, $1
+  0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb      $11, $12, $13
+  0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb      $14, $15, $16
+  0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb      $17, $18, $19
+  0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb       $20, $21
+  0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb       $22, $23
+  0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb       $24, $25
+  0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph      $ac1, $1, $2
+  0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w      $ac2, $3, $4
+  0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl       $ac1, $9, $10
+  0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr       $ac1, $11, $12
+  0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph      $ac0, $17, $18
+  0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w      $ac1, $19, $20
+  0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl       $ac0, $5, $6
+  0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr       $ac1, $7, $8
+  0x7f 0xe1 0x00 0xb8 # CHECK: extp             $1, $ac0, 31
+  0x7c 0x02 0x0a 0xb8 # CHECK: extpdp           $2, $ac1, 0
+  0x7c 0x83 0x12 0xf8 # CHECK: extpdpv          $3, $ac2, $4
+  0x7c 0xc5 0x18 0xf8 # CHECK: extpv            $5, $ac3, $6
+  0x7f 0xe7 0x00 0x38 # CHECK: extr.w           $7, $ac0, 31
+  0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w         $8, $ac1, 15
+  0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w        $9, $ac2, 7
+  0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h         $10, $ac3, 3
+  0x7d 0x8b 0x00 0x78 # CHECK: extrv.w          $11, $ac0, $12
+  0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w        $13, $ac1, $14
+  0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w       $15, $ac2, $16
+  0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h        $17, $ac3, $18
+  0x7e 0x93 0x00 0x0c # CHECK: insv             $19, $20
+  0x7f 0x54 0x51 0x8a # CHECK: lbux             $10, $20($26)
+  0x7f 0x75 0x59 0x0a # CHECK: lhx              $11, $21($27)
+  0x7f 0x96 0x60 0x0a # CHECK: lwx              $12, $22($gp)
+  0x70 0xc7 0x08 0x00 # CHECK: madd             $ac1, $6, $7
+  0x71 0x09 0x08 0x01 # CHECK: maddu            $ac1, $8, $9
+  0x70 0xc7 0x00 0x00 # CHECK: madd             $6, $7
+  0x71 0x09 0x00 0x01 # CHECK: maddu            $8, $9
+  0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl      $ac2, $3, $4
+  0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl     $ac3, $5, $6
+  0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr      $ac0, $7, $8
+  0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr     $ac1, $9, $10
+  0x00 0x20 0x70 0x10 # CHECK: mfhi             $14, $ac1
+  0x00 0x20 0x78 0x12 # CHECK: mflo             $15, $ac1
+  0x00 0x00 0x70 0x10 # CHECK: mfhi             $14
+  0x00 0x00 0x78 0x12 # CHECK: mflo             $15
+  0x7d 0x8d 0x5c 0x90 # CHECK: modsub           $11, $12, $13
+  0x71 0x4b 0x18 0x04 # CHECK: msub             $ac3, $10, $11
+  0x71 0x8d 0x10 0x05 # CHECK: msubu            $ac2, $12, $13
+  0x71 0x4b 0x00 0x04 # CHECK: msub             $10, $11
+  0x71 0x8d 0x00 0x05 # CHECK: msubu            $12, $13
+  0x02 0x00 0x18 0x11 # CHECK: mthi             $16, $ac3
+  0x02 0x00 0x00 0x11 # CHECK: mthi             $16
+  0x7d 0xc0 0x17 0xf8 # CHECK: mthlip           $14, $ac2
+  0x02 0x20 0x10 0x13 # CHECK: mtlo             $17, $ac2
+  0x02 0x20 0x00 0x13 # CHECK: mtlo             $17
+  0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl    $21, $22, $23
+  0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr    $24, $25, $26
+  0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl   $27, $gp, $sp
+  0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr   $fp, $ra, $1
+  0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph       $2, $3, $4
+  0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph    $ac0, $16, $17
+  0x00 0x43 0x18 0x18 # CHECK: mult             $ac3, $2, $3
+  0x00 0x85 0x10 0x19 # CHECK: multu            $ac2, $4, $5
+  0x00 0x43 0x00 0x18 # CHECK: mult             $2, $3
+  0x00 0x85 0x00 0x19 # CHECK: multu            $4, $5
+  0x7e 0x74 0x93 0x91 # CHECK: packrl.ph        $18, $19, $20
+  0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph          $7, $15, $3
+  0x7c 0x88 0x10 0xd1 # CHECK: pick.qb          $2, $4, $8
+  0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl     $20, $21
+  0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr     $21, $22
+  0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl   $22, $23
+  0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla  $24, $25
+  0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr   $23, $24
+  0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra  $25, $26
+  0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl    $26, $27
+  0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla   $gp, $sp
+  0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr    $27, $gp
+  0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra   $sp, $fp
+  0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w      $17, $18, $19
+  0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph     $16, $17, $18
+  0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph  $19, $20, $21
+  0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w   $18, $19, $20
+  0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb       $1, $2
+  0x7d 0x00 0x2c 0xb8 # CHECK: rddsp            $5, 256
+  0x7c 0x0c 0x12 0x92 # CHECK: repl.ph          $2, 12
+  0x7c 0x55 0x08 0x92 # CHECK: repl.qb          $1, 85
+  0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph         $1, $2
+  0x7c 0x02 0x08 0xd2 # CHECK: replv.qb         $1, $2
+  0x7d 0x00 0x0e 0xb8 # CHECK: shilo            $ac1, 16
+  0x7c 0x40 0x0e 0xf8 # CHECK: shilov           $ac1, $2
+  0x7c 0x62 0x0a 0x13 # CHECK: shll.ph          $1, $2, 3
+  0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph        $1, $2, 3
+  0x7c 0x62 0x08 0x13 # CHECK: shll.qb          $1, $2, 3
+  0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph         $1, $2, $3
+  0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph       $1, $2, $3
+  0x7c 0x62 0x08 0x93 # CHECK: shllv.qb         $1, $2, $3
+  0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w        $1, $2, $3
+  0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w         $1, $2, 3
+  0x7c 0x22 0x2a 0x53 # CHECK: shra.ph          $5, $2, 1
+  0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph        $5, $2, 1
+  0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph         $1, $2, $3
+  0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph       $1, $2, $3
+  0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w        $1, $2, $3
+  0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w         $1, $2, 1
+  0x7c 0x42 0x08 0x53 # CHECK: shrl.qb          $1, $2, 2
+  0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb         $1, $2, $3
+  0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph          $1, $2, $3
+  0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph        $1, $2, $3
+  0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w         $1, $2, $3
+  0x7c 0x43 0x08 0x50 # CHECK: subu.qb          $1, $2, $3
+  0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb        $1, $2, $3
+  0x7c 0x20 0x04 0xf8 # CHECK: wrdsp            $1, 0
diff --git a/test/MC/Disassembler/Mips/dspr2/valid.txt b/test/MC/Disassembler/Mips/dspr2/valid.txt
new file mode 100644
index 000000000000..b1b5a332dc56
--- /dev/null
+++ b/test/MC/Disassembler/Mips/dspr2/valid.txt
@@ -0,0 +1,173 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=dspr2 | FileCheck %s
+
+  0x7c 0x02 0x0a 0x52 # CHECK: absq_s.ph        $1, $2
+  0x7c 0x04 0x18 0x52 # CHECK: absq_s.qb        $3, $4
+  0x7c 0x06 0x2c 0x52 # CHECK: absq_s.w         $5, $6
+  0x7d 0x09 0x3a 0x90 # CHECK: addq.ph          $7, $8, $9
+  0x7d 0x6c 0x53 0x90 # CHECK: addq_s.ph        $10, $11, $12
+  0x7d 0xcf 0x6d 0x90 # CHECK: addq_s.w         $13, $14, $15
+  0x7e 0x32 0x82 0x18 # CHECK: addqh.ph         $16, $17, $18
+  0x7e 0x95 0x9a 0x98 # CHECK: addqh_r.ph       $19, $20, $21
+  0x7e 0xf8 0xb4 0x18 # CHECK: addqh.w          $22, $23, $24
+  0x7f 0x5b 0xcc 0x98 # CHECK: addqh_r.w        $25, $26, $27
+  0x7f 0xbe 0xe4 0x10 # CHECK: addsc            $gp, $sp, $fp
+  0x7c 0x22 0xfa 0x10 # CHECK: addu.ph          $ra, $1, $2
+  0x7c 0x85 0x1b 0x10 # CHECK: addu_s.ph        $3, $4, $5
+  0x7c 0xe8 0x30 0x10 # CHECK: addu.qb          $6, $7, $8
+  0x7d 0x4b 0x49 0x10 # CHECK: addu_s.qb        $9, $10, $11
+  0x7d 0xae 0x64 0x50 # CHECK: addwc            $12, $13, $14
+  0x7e 0x11 0x78 0x18 # CHECK: adduh.qb         $15, $16, $17
+  0x7e 0x74 0x90 0x98 # CHECK: adduh_r.qb       $18, $19, $20
+  0x7e 0xd5 0x00 0x31 # CHECK: append           $21, $22, 0
+  0x7f 0x17 0x1c 0x31 # CHECK: balign           $23, $24, 3
+  0x7c 0x1a 0xce 0xd2 # CHECK: bitrev           $25, $26
+  0x04 0x1c 0x14 0x9b # CHECK: bposge32         21104
+  0x7f 0x7c 0x02 0x11 # CHECK: cmp.eq.ph        $27, $gp
+  0x7f 0xbe 0x02 0x51 # CHECK: cmp.lt.ph        $sp, $fp
+  0x7f 0xe1 0x02 0x91 # CHECK: cmp.le.ph        $ra, $1
+  0x7c 0x64 0x16 0x11 # CHECK: cmpgdu.eq.qb     $2, $3, $4
+  0x7c 0xc7 0x2e 0x51 # CHECK: cmpgdu.lt.qb     $5, $6, $7
+  0x7d 0x2a 0x46 0x91 # CHECK: cmpgdu.le.qb     $8, $9, $10
+  0x7d 0x8d 0x59 0x11 # CHECK: cmpgu.eq.qb      $11, $12, $13
+  0x7d 0xf0 0x71 0x51 # CHECK: cmpgu.lt.qb      $14, $15, $16
+  0x7e 0x53 0x89 0x91 # CHECK: cmpgu.le.qb      $17, $18, $19
+  0x7e 0x95 0x00 0x11 # CHECK: cmpu.eq.qb       $20, $21
+  0x7e 0xd7 0x00 0x51 # CHECK: cmpu.lt.qb       $22, $23
+  0x7f 0x19 0x00 0x91 # CHECK: cmpu.le.qb       $24, $25
+  0x7f 0x5b 0x00 0x30 # CHECK: dpa.w.ph         $ac0, $26, $27
+  0x7c 0x22 0x09 0x30 # CHECK: dpaq_s.w.ph      $ac1, $1, $2
+  0x7c 0x64 0x13 0x30 # CHECK: dpaq_sa.l.w      $ac2, $3, $4
+  0x7c 0xa6 0x1e 0x30 # CHECK: dpaqx_s.w.ph     $ac3, $5, $6
+  0x7c 0xe8 0x06 0xb0 # CHECK: dpaqx_sa.w.ph    $ac0, $7, $8
+  0x7d 0x2a 0x08 0xf0 # CHECK: dpau.h.qbl       $ac1, $9, $10
+  0x7d 0x6c 0x09 0xf0 # CHECK: dpau.h.qbr       $ac1, $11, $12
+  0x7d 0xae 0x12 0x30 # CHECK: dpax.w.ph        $ac2, $13, $14
+  0x7d 0xf0 0x18 0x70 # CHECK: dps.w.ph         $ac3, $15, $16
+  0x7e 0x32 0x01 0x70 # CHECK: dpsq_s.w.ph      $ac0, $17, $18
+  0x7e 0x74 0x0b 0x70 # CHECK: dpsq_sa.l.w      $ac1, $19, $20
+  0x7c 0x22 0x16 0x70 # CHECK: dpsqx_s.w.ph     $ac2, $1, $2
+  0x7c 0x64 0x1e 0xf0 # CHECK: dpsqx_sa.w.ph    $ac3, $3, $4
+  0x7c 0xa6 0x02 0xf0 # CHECK: dpsu.h.qbl       $ac0, $5, $6
+  0x7c 0xe8 0x0b 0xf0 # CHECK: dpsu.h.qbr       $ac1, $7, $8
+  0x7d 0x2a 0x12 0x70 # CHECK: dpsx.w.ph        $ac2, $9, $10
+  0x7f 0xe1 0x00 0xb8 # CHECK: extp             $1, $ac0, 31
+  0x7c 0x02 0x0a 0xb8 # CHECK: extpdp           $2, $ac1, 0
+  0x7c 0x83 0x12 0xf8 # CHECK: extpdpv          $3, $ac2, $4
+  0x7c 0xc5 0x18 0xf8 # CHECK: extpv            $5, $ac3, $6
+  0x7f 0xe7 0x00 0x38 # CHECK: extr.w           $7, $ac0, 31
+  0x7d 0xe8 0x09 0x38 # CHECK: extr_r.w         $8, $ac1, 15
+  0x7c 0xe9 0x11 0xb8 # CHECK: extr_rs.w        $9, $ac2, 7
+  0x7c 0x6a 0x1b 0xb8 # CHECK: extr_s.h         $10, $ac3, 3
+  0x7d 0x8b 0x00 0x78 # CHECK: extrv.w          $11, $ac0, $12
+  0x7d 0xcd 0x09 0x78 # CHECK: extrv_r.w        $13, $ac1, $14
+  0x7e 0x0f 0x11 0xf8 # CHECK: extrv_rs.w       $15, $ac2, $16
+  0x7e 0x51 0x1b 0xf8 # CHECK: extrv_s.h        $17, $ac3, $18
+  0x7e 0x93 0x00 0x0c # CHECK: insv             $19, $20
+  0x7f 0x54 0x51 0x8a # CHECK: lbux             $10, $20($26)
+  0x7f 0x75 0x59 0x0a # CHECK: lhx              $11, $21($27)
+  0x7f 0x96 0x60 0x0a # CHECK: lwx              $12, $22($gp)
+  0x70 0xc7 0x08 0x00 # CHECK: madd             $ac1, $6, $7
+  0x71 0x09 0x08 0x01 # CHECK: maddu            $ac1, $8, $9
+  0x70 0xc7 0x00 0x00 # CHECK: madd             $6, $7
+  0x71 0x09 0x00 0x01 # CHECK: maddu            $8, $9
+  0x7c 0x64 0x15 0x30 # CHECK: maq_s.w.phl      $ac2, $3, $4
+  0x7c 0xa6 0x1c 0x30 # CHECK: maq_sa.w.phl     $ac3, $5, $6
+  0x7c 0xe8 0x05 0xb0 # CHECK: maq_s.w.phr      $ac0, $7, $8
+  0x7d 0x2a 0x0c 0xb0 # CHECK: maq_sa.w.phr     $ac1, $9, $10
+  0x00 0x20 0x70 0x10 # CHECK: mfhi             $14, $ac1
+  0x00 0x20 0x78 0x12 # CHECK: mflo             $15, $ac1
+  0x00 0x00 0x70 0x10 # CHECK: mfhi             $14
+  0x00 0x00 0x78 0x12 # CHECK: mflo             $15
+  0x7d 0x8d 0x5c 0x90 # CHECK: modsub           $11, $12, $13
+  0x71 0x4b 0x18 0x04 # CHECK: msub             $ac3, $10, $11
+  0x71 0x8d 0x10 0x05 # CHECK: msubu            $ac2, $12, $13
+  0x71 0x4b 0x00 0x04 # CHECK: msub             $10, $11
+  0x71 0x8d 0x00 0x05 # CHECK: msubu            $12, $13
+  0x02 0x00 0x18 0x11 # CHECK: mthi             $16, $ac3
+  0x02 0x00 0x00 0x11 # CHECK: mthi             $16
+  0x7d 0xc0 0x17 0xf8 # CHECK: mthlip           $14, $ac2
+  0x02 0x20 0x10 0x13 # CHECK: mtlo             $17, $ac2
+  0x02 0x20 0x00 0x13 # CHECK: mtlo             $17
+  0x7e 0x11 0x7b 0x18 # CHECK: mul.ph           $15, $16, $17
+  0x7e 0x74 0x93 0x98 # CHECK: mul_s.ph         $18, $19, $20
+  0x7e 0xd7 0xaf 0x10 # CHECK: muleq_s.w.phl    $21, $22, $23
+  0x7f 0x3a 0xc7 0x50 # CHECK: muleq_s.w.phr    $24, $25, $26
+  0x7f 0x9d 0xd9 0x90 # CHECK: muleu_s.ph.qbl   $27, $gp, $sp
+  0x7f 0xe1 0xf1 0xd0 # CHECK: muleu_s.ph.qbr   $fp, $ra, $1
+  0x7c 0x64 0x17 0xd0 # CHECK: mulq_rs.ph       $2, $3, $4
+  0x7c 0xc7 0x2d 0xd8 # CHECK: mulq_rs.w        $5, $6, $7
+  0x7d 0x2a 0x47 0x90 # CHECK: mulq_s.ph        $8, $9, $10
+  0x7d 0x8d 0x5d 0x98 # CHECK: mulq_s.w         $11, $12, $13
+  0x7d 0xcf 0x18 0xb0 # CHECK: mulsa.w.ph       $ac3, $14, $15
+  0x7e 0x11 0x01 0xb0 # CHECK: mulsaq_s.w.ph    $ac0, $16, $17
+  0x00 0x43 0x18 0x18 # CHECK: mult             $ac3, $2, $3
+  0x00 0x85 0x10 0x19 # CHECK: multu            $ac2, $4, $5
+  0x00 0x43 0x00 0x18 # CHECK: mult             $2, $3
+  0x00 0x85 0x00 0x19 # CHECK: multu            $4, $5
+  0x7e 0x74 0x93 0x91 # CHECK: packrl.ph        $18, $19, $20
+  0x7d 0xe3 0x3a 0xd1 # CHECK: pick.ph          $7, $15, $3
+  0x7c 0x88 0x10 0xd1 # CHECK: pick.qb          $2, $4, $8
+  0x7c 0x15 0xa3 0x12 # CHECK: preceq.w.phl     $20, $21
+  0x7c 0x16 0xab 0x52 # CHECK: preceq.w.phr     $21, $22
+  0x7c 0x17 0xb1 0x12 # CHECK: precequ.ph.qbl   $22, $23
+  0x7c 0x19 0xc1 0x92 # CHECK: precequ.ph.qbla  $24, $25
+  0x7c 0x18 0xb9 0x52 # CHECK: precequ.ph.qbr   $23, $24
+  0x7c 0x1a 0xc9 0xd2 # CHECK: precequ.ph.qbra  $25, $26
+  0x7c 0x1b 0xd7 0x12 # CHECK: preceu.ph.qbl    $26, $27
+  0x7c 0x1d 0xe7 0x92 # CHECK: preceu.ph.qbla   $gp, $sp
+  0x7c 0x1c 0xdf 0x52 # CHECK: preceu.ph.qbr    $27, $gp
+  0x7c 0x1e 0xef 0xd2 # CHECK: preceu.ph.qbra   $sp, $fp
+  0x7f 0x19 0xbb 0x51 # CHECK: precr.qb.ph      $23, $24, $25
+  0x7f 0x38 0x07 0x91 # CHECK: precr_sra.ph.w   $24, $25, 0
+  0x7f 0x38 0xff 0x91 # CHECK: precr_sra.ph.w   $24, $25, 31
+  0x7f 0x59 0x07 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 0
+  0x7f 0x59 0xff 0xd1 # CHECK: precr_sra_r.ph.w $25, $26, 31
+  0x7e 0x53 0x8d 0x11 # CHECK: precrq.ph.w      $17, $18, $19
+  0x7e 0x32 0x83 0x11 # CHECK: precrq.qb.ph     $16, $17, $18
+  0x7e 0x95 0x9b 0xd1 # CHECK: precrqu_s.qb.ph  $19, $20, $21
+  0x7e 0x74 0x95 0x51 # CHECK: precrq_rs.ph.w   $18, $19, $20
+  0x7c 0x41 0x18 0x71 # CHECK: prepend          $1, $2, 3
+  0x7c 0x40 0x0d 0x10 # CHECK: raddu.w.qb       $1, $2
+  0x7d 0x00 0x2c 0xb8 # CHECK: rddsp            $5, 256
+  0x7c 0x0c 0x12 0x92 # CHECK: repl.ph          $2, 12
+  0x7c 0x55 0x08 0x92 # CHECK: repl.qb          $1, 85
+  0x7c 0x02 0x0a 0xd2 # CHECK: replv.ph         $1, $2
+  0x7c 0x02 0x08 0xd2 # CHECK: replv.qb         $1, $2
+  0x7d 0x00 0x0e 0xb8 # CHECK: shilo            $ac1, 16
+  0x7c 0x40 0x0e 0xf8 # CHECK: shilov           $ac1, $2
+  0x7c 0x62 0x0a 0x13 # CHECK: shll.ph          $1, $2, 3
+  0x7c 0x62 0x0b 0x13 # CHECK: shll_s.ph        $1, $2, 3
+  0x7c 0x62 0x08 0x13 # CHECK: shll.qb          $1, $2, 3
+  0x7c 0x62 0x0a 0x93 # CHECK: shllv.ph         $1, $2, $3
+  0x7c 0x62 0x0b 0x93 # CHECK: shllv_s.ph       $1, $2, $3
+  0x7c 0x62 0x08 0x93 # CHECK: shllv.qb         $1, $2, $3
+  0x7c 0x62 0x0d 0x93 # CHECK: shllv_s.w        $1, $2, $3
+  0x7c 0x62 0x0d 0x13 # CHECK: shll_s.w         $1, $2, 3
+  0x7c 0x50 0x11 0x13 # CHECK: shra.qb          $2, $16, 2
+  0x7c 0x50 0x11 0x53 # CHECK: shra_r.qb        $2, $16, 2
+  0x7c 0x22 0x2a 0x53 # CHECK: shra.ph $5,      $2, 1
+  0x7c 0x22 0x2b 0x53 # CHECK: shra_r.ph        $5, $2, 1
+  0x7c 0x62 0x0a 0xd3 # CHECK: shrav.ph         $1, $2, $3
+  0x7c 0x62 0x0b 0xd3 # CHECK: shrav_r.ph       $1, $2, $3
+  0x7c 0x62 0x09 0x93 # CHECK: shrav.qb         $1, $2, $3
+  0x7c 0x62 0x09 0xd3 # CHECK: shrav_r.qb       $1, $2, $3
+  0x7c 0x62 0x0d 0xd3 # CHECK: shrav_r.w        $1, $2, $3
+  0x7c 0x22 0x0d 0x53 # CHECK: shra_r.w         $1, $2, 1
+  0x7c 0x42 0x0e 0x53 # CHECK: shrl.ph          $1, $2, 2
+  0x7c 0x42 0x08 0x53 # CHECK: shrl.qb          $1, $2, 2
+  0x7c 0x62 0x0e 0xd3 # CHECK: shrlv.ph         $1, $2, $3
+  0x7c 0x62 0x08 0xd3 # CHECK: shrlv.qb         $1, $2, $3
+  0x7c 0x43 0x0a 0xd0 # CHECK: subq.ph          $1, $2, $3
+  0x7c 0x43 0x0b 0xd0 # CHECK: subq_s.ph        $1, $2, $3
+  0x7c 0x43 0x0d 0xd0 # CHECK: subq_s.w         $1, $2, $3
+  0x7c 0x43 0x0a 0x58 # CHECK: subqh.ph         $1, $2, $3
+  0x7c 0x43 0x0a 0xd8 # CHECK: subqh_r.ph       $1, $2, $3
+  0x7c 0x43 0x0c 0x58 # CHECK: subqh.w          $1, $2, $3
+  0x7c 0x43 0x0c 0xd8 # CHECK: subqh_r.w        $1, $2, $3
+  0x7c 0x49 0x32 0x50 # CHECK: subu.ph          $6, $2, $9
+  0x7c 0x64 0x13 0x50 # CHECK: subu_s.ph        $2, $3, $4
+  0x7c 0x43 0x08 0x50 # CHECK: subu.qb          $1, $2, $3
+  0x7c 0x43 0x09 0x50 # CHECK: subu_s.qb        $1, $2, $3
+  0x7c 0x43 0x08 0x58 # CHECK: subuh.qb         $1, $2, $3
+  0x7c 0x43 0x08 0xd8 # CHECK: subuh_r.qb       $1, $2, $3
+  0x7c 0x20 0x04 0xf8 # CHECK: wrdsp            $1, 0
diff --git a/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt b/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt
new file mode 100644
index 000000000000..b9525ba1206f
--- /dev/null
+++ b/test/MC/Disassembler/Mips/eva/valid_R6-eva.txt
@@ -0,0 +1,38 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r6 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r6 -mattr=eva | FileCheck %s
+# CHECK: .text
+0x7c 0xff 0x7f 0x9b # CHECK: cachee 31, 255($7)
+0x7c 0x80 0x80 0x1b # CHECK: cachee 0, -256($4)
+0x7c 0x85 0xba 0x1b # CHECK: cachee 5, -140($4)
+0x7f 0x2a 0x80 0x2c # CHECK: lbe $10, -256($25)
+0x7d 0xed 0x7f 0xac # CHECK: lbe $13, 255($15)
+0x7d 0xcb 0x49 0x2c # CHECK: lbe $11, 146($14)
+0x7c 0x6d 0x80 0x28 # CHECK: lbue $13, -256($3)
+0x7c 0x4d 0x7f 0xa8 # CHECK: lbue $13, 255($2)
+0x7c 0x6d 0xa1 0x28 # CHECK: lbue $13, -190($3)
+0x7e 0xad 0x80 0x2d # CHECK: lhe $13, -256($21)
+0x7e 0x0c 0x7f 0xad # CHECK: lhe $12, 255($16)
+0x7e 0x0d 0x28 0xad # CHECK: lhe $13, 81($16)
+0x7c 0x72 0x80 0x29 # CHECK: lhue $18, -256($3)
+0x7c 0x72 0x7f 0xa9 # CHECK: lhue $18, 255($3)
+0x7c 0x56 0xac 0x29 # CHECK: lhue $22, -168($2)
+0x7e 0xa2 0x80 0x2e # CHECK: lle $2, -256($21)
+0x7e 0x63 0x7f 0xae # CHECK: lle $3, 255($19)
+0x7e 0xc3 0xdc 0xae # CHECK: lle $3, -71($22)
+0x7c 0x4e 0x80 0x23 # CHECK: prefe 14, -256($2)
+0x7c 0x6b 0x7f 0xa3 # CHECK: prefe 11, 255($3)
+0x7c 0x6e 0xed 0xa3 # CHECK: prefe 14, -37($3)
+0x7d 0x71 0x7f 0x9c # CHECK: sbe $17, 255($11)
+0x7d 0x51 0x80 0x1c # CHECK: sbe $17, -256($10)
+0x7d 0xd3 0x00 0x1c # CHECK: sbe $19, 0($14)
+0x7e 0x49 0x7f 0x9e # CHECK: sce $9, 255($18)
+0x7e 0xac 0x80 0x1e # CHECK: sce $12, -256($21)
+0x7e 0xed 0xf0 0x9e # CHECK: sce $13, -31($23)
+0x7d 0xee 0x7f 0x9d # CHECK: she $14, 255($15)
+0x7d 0xee 0x80 0x1d # CHECK: she $14, -256($15)
+0x7d 0x69 0x75 0x9d # CHECK: she $9, 235($11)
+0x7f 0xbf 0x7f 0x9f # CHECK: swe $ra, 255($sp)
+0x7f 0xbf 0x80 0x1f # CHECK: swe $ra, -256($sp)
+0x7f 0xbf 0xe5 0x9f # CHECK: swe $ra, -53($sp)
+0x42 0x00 0x00 0x03 # CHECK: tlbinv
+0x42 0x00 0x00 0x04 # CHECK: tlbinvf
diff --git a/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt b/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt
new file mode 100644
index 000000000000..f364433c1c4e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/eva/valid_preR6-eva.txt
@@ -0,0 +1,54 @@
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r2 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r3 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r5 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r3 -mattr=eva | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r5 -mattr=eva | FileCheck %s
+# CHECK: .text
+0x7c 0xff 0x7f 0x9b # CHECK: cachee 31, 255($7)
+0x7c 0x80 0x80 0x1b # CHECK: cachee 0, -256($4)
+0x7c 0x85 0xba 0x1b # CHECK: cachee 5, -140($4)
+0x7f 0x2a 0x80 0x2c # CHECK: lbe $10, -256($25)
+0x7d 0xed 0x7f 0xac # CHECK: lbe $13, 255($15)
+0x7d 0xcb 0x49 0x2c # CHECK: lbe $11, 146($14)
+0x7c 0x6d 0x80 0x28 # CHECK: lbue $13, -256($3)
+0x7c 0x4d 0x7f 0xa8 # CHECK: lbue $13, 255($2)
+0x7c 0x6d 0xa1 0x28 # CHECK: lbue $13, -190($3)
+0x7e 0xad 0x80 0x2d # CHECK: lhe $13, -256($21)
+0x7e 0x0c 0x7f 0xad # CHECK: lhe $12, 255($16)
+0x7e 0x0d 0x28 0xad # CHECK: lhe $13, 81($16)
+0x7c 0x72 0x80 0x29 # CHECK: lhue $18, -256($3)
+0x7c 0x72 0x7f 0xa9 # CHECK: lhue $18, 255($3)
+0x7c 0x56 0xac 0x29 # CHECK: lhue $22, -168($2)
+0x7e 0xa2 0x80 0x2e # CHECK: lle $2, -256($21)
+0x7e 0x63 0x7f 0xae # CHECK: lle $3, 255($19)
+0x7e 0xc3 0xdc 0xae # CHECK: lle $3, -71($22)
+0x7d 0xf6 0x7f 0x99 # CHECK: lwle $22, 255($15)
+0x7d 0x57 0x80 0x19 # CHECK: lwle $23, -256($10)
+0x7d 0xb7 0xa8 0x19 # CHECK: lwle $23, -176($13)
+0x7f 0x80 0x7f 0x9a # CHECK: lwre $zero, 255($gp)
+0x7f 0x80 0x80 0x1a # CHECK: lwre $zero, -256($gp)
+0x7f 0x80 0xa8 0x1a # CHECK: lwre $zero, -176($gp)
+0x7c 0x4e 0x80 0x23 # CHECK: prefe 14, -256($2)
+0x7c 0x6b 0x7f 0xa3 # CHECK: prefe 11, 255($3)
+0x7c 0x6e 0xed 0xa3 # CHECK: prefe 14, -37($3)
+0x7d 0x71 0x7f 0x9c # CHECK: sbe $17, 255($11)
+0x7d 0x51 0x80 0x1c # CHECK: sbe $17, -256($10)
+0x7d 0xd3 0x00 0x1c # CHECK: sbe $19, 0($14)
+0x7e 0x49 0x7f 0x9e # CHECK: sce $9, 255($18)
+0x7e 0xac 0x80 0x1e # CHECK: sce $12, -256($21)
+0x7e 0xed 0xf0 0x9e # CHECK: sce $13, -31($23)
+0x7d 0xee 0x7f 0x9d # CHECK: she $14, 255($15)
+0x7d 0xee 0x80 0x1d # CHECK: she $14, -256($15)
+0x7d 0x69 0x75 0x9d # CHECK: she $9, 235($11)
+0x7f 0xbf 0x7f 0x9f # CHECK: swe $ra, 255($sp)
+0x7f 0xbf 0x80 0x1f # CHECK: swe $ra, -256($sp)
+0x7f 0xbf 0xe5 0x9f # CHECK: swe $ra, -53($sp)
+0x7e 0x29 0x7f 0xa1 # CHECK: swle $9, 255($17)
+0x7e 0x6a 0x80 0x21 # CHECK: swle $10, -256($19)
+0x7e 0xa8 0x41 0xa1 # CHECK: swle $8, 131($21)
+0x7d 0xb4 0x7f 0xa2 # CHECK: swre $20, 255($13)
+0x7d 0xb4 0x80 0x22 # CHECK: swre $20, -256($13)
+0x7d 0xd2 0x2b 0x22 # CHECK: swre $18, 86($14)
+0x42 0x00 0x00 0x03 # CHECK: tlbinv
+0x42 0x00 0x00 0x04 # CHECK: tlbinvf
diff --git a/test/MC/Disassembler/Mips/micromips-dsp/valid.txt b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
new file mode 100644
index 000000000000..fdd404c43492
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips-dsp/valid.txt
@@ -0,0 +1,103 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips  -mattr=+dsp | FileCheck %s
+
+0x00 0x64 0x11 0x3c # CHECK: absq_s.ph $3, $4
+0x00 0x64 0x21 0x3c # CHECK: absq_s.w $3, $4
+0x00 0xa4 0x18 0xcd # CHECK: addu.qb $3, $4, $5
+0x00 0xa4 0x1c 0xcd # CHECK: addu_s.qb $3, $4, $5
+0x00 0xa4 0x1b 0x85 # CHECK: addsc $3, $4, $5
+0x00 0xa4 0x1b 0xc5 # CHECK: addwc $3, $4, $5
+0x00 0xa4 0x18 0x0d # CHECK: addq.ph $3, $4, $5
+0x00 0xa4 0x1c 0x0d # CHECK: addq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x05 # CHECK: addq_s.w $3, $4, $5
+0x00 0x65 0x42 0xbc # CHECK: dpaq_s.w.ph $ac1, $5, $3
+0x00 0x64 0x92 0xbc # CHECK: dpaq_sa.l.w $ac2, $4, $3
+0x00 0x83 0x60 0xbc # CHECK: dpau.h.qbl $ac1, $3, $4
+0x02 0xb4 0xb0 0xbc # CHECK: dpau.h.qbr $ac2, $20, $21
+0x00 0x06 0x66 0x7c # CHECK: extp $zero, $ac1, 6
+0x00 0x42 0x76 0x7c # CHECK: extpdp $2, $ac1, 2
+0x00 0x88 0xb8 0xbc # CHECK: extpdpv $4, $ac2, $8
+0x01 0xe7 0xe8 0xbc # CHECK: extpv $15, $ac3, $7
+0x03 0x7f 0xce 0x7c # CHECK: extr.w $27, $ac3, 31
+0x01 0x98 0x1e 0x7c # CHECK: extr_r.w $12, $ac0, 24
+0x03 0x69 0xee 0x7c # CHECK: extr_rs.w $27, $ac3, 9
+0x00 0x61 0xbe 0x7c # CHECK: extr_s.h $3, $ac2, 1
+0x00 0xa6 0x0e 0xbc # CHECK: extrv.w $5, $ac0, $6
+0x01 0x43 0x1e 0xbc # CHECK: extrv_r.w $10, $ac0, $3
+0x01 0xf4 0x6e 0xbc # CHECK: extrv_rs.w $15, $ac1, $20
+0x01 0x10 0xbe 0xbc # CHECK: extrv_s.h $8, $ac2, $16
+0x00 0x64 0x41 0x3c # CHECK: insv $3, $4
+0x00 0xe6 0x4a 0xbc # CHECK: madd $ac1, $6, $7
+0x01 0x28 0x1a 0xbc # CHECK: maddu $ac0, $8, $9
+0x01 0x6a 0xea 0xbc # CHECK: msub $ac3, $10, $11
+0x01 0xac 0xba 0xbc # CHECK: msubu $ac2, $12, $13
+0x00 0x62 0xcc 0xbc # CHECK: mult $ac3, $2, $3
+0x00 0xa4 0x9c 0xbc # CHECK: multu $ac2, $4, $5
+0x00 0xa4 0x19 0xad # CHECK: packrl.ph $3, $4, $5
+0x00 0xa4 0x1a 0x2d # CHECK: pick.ph $3, $4, $5
+0x00 0xa4 0x19 0xed # CHECK: pick.qb $3, $4, $5
+0x00 0x22 0x51 0x3c # CHECK: preceq.w.phl $1, $2
+0x00 0x64 0x61 0x3c # CHECK: preceq.w.phr $3, $4
+0x00 0xa6 0x71 0x3c # CHECK: precequ.ph.qbl $5, $6
+0x00 0xe8 0x73 0x3c # CHECK: precequ.ph.qbla $7, $8
+0x01 0x2a 0x91 0x3c # CHECK: precequ.ph.qbr $9, $10
+0x01 0x6c 0x93 0x3c # CHECK: precequ.ph.qbra $11, $12
+0x01 0xae 0xb1 0x3c # CHECK: preceu.ph.qbl $13, $14
+0x01 0xf0 0xb3 0x3c # CHECK: preceu.ph.qbla $15, $16
+0x02 0x32 0xd1 0x3c # CHECK: preceu.ph.qbr $17, $18
+0x02 0x74 0xd3 0x3c # CHECK: preceu.ph.qbra $19, $20
+0x01 0x49 0x40 0xed # CHECK: precrq.ph.w $8, $9, $10
+0x01 0xac 0x58 0xad # CHECK: precrq.qb.ph $11, $12, $13
+0x02 0x0f 0x71 0x6d # CHECK: precrqu_s.qb.ph $14, $15, $16
+0x02 0x72 0x89 0x2d # CHECK: precrq_rs.ph.w $17, $18, $19
+0x00 0x03 0x40 0x1d # CHECK: shilo $ac1, 3
+0x00 0x05 0x52 0x7c # CHECK: shilov $ac1, $5
+0x00 0x64 0x53 0xb5 # CHECK: shll.ph $3, $4, 5
+0x00 0x64 0x5b 0xb5 # CHECK: shll_s.ph $3, $4, 5
+0x00 0x64 0xa8 0x7c # CHECK: shll.qb $3, $4, 5
+0x00 0x85 0x18 0x0e # CHECK: shllv.ph $3, $4, $5
+0x00 0x85 0x1c 0x0e # CHECK: shllv_s.ph $3, $4, $5
+0x00 0x85 0x1b 0x95 # CHECK: shllv.qb $3, $4, $5
+0x00 0x85 0x1b 0xd5 # CHECK: shllv_s.w $3, $4, $5
+0x00 0x64 0x2b 0xf5 # CHECK: shll_s.w $3, $4, 5
+0x00 0x64 0x53 0x35 # CHECK: shra.ph $3, $4, 5
+0x00 0x64 0x57 0x35 # CHECK: shra_r.ph $3, $4, 5
+0x00 0x85 0x19 0x8d # CHECK: shrav.ph $3, $4, $5
+0x00 0x85 0x1d 0x8d # CHECK: shrav_r.ph $3, $4, $5
+0x00 0x85 0x1a 0xd5 # CHECK: shrav_r.w $3, $4, $5
+0x00 0x64 0x2a 0xf5 # CHECK: shra_r.w $3, $4, 5
+0x00 0x64 0xb8 0x7c # CHECK: shrl.qb $3, $4, 5
+0x00 0x85 0x1b 0x55 # CHECK: shrlv.qb $3, $4, $5
+0x00 0xa4 0x1a 0x0d # CHECK: subq.ph $3, $4, $5
+0x00 0xa4 0x1e 0x0d # CHECK: subq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x45 # CHECK: subq_s.w $3, $4, $5
+0x00 0xa4 0x1a 0xcd # CHECK: subu.qb $3, $4, $5
+0x00 0xa4 0x1e 0xcd # CHECK: subu_s.qb $3, $4, $5
+0x00 0xc4 0x46 0xbc # CHECK: dpsq_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x56 0xbc # CHECK: dpsq_sa.l.w $ac1, $4, $6
+0x00 0xc4 0x64 0xbc # CHECK: dpsu.h.qbl $ac1, $4, $6
+0x00 0xc4 0x74 0xbc # CHECK: dpsu.h.qbr $ac1, $4, $6
+0x00 0x62 0x08 0x25 # CHECK: muleq_s.w.phl $1, $2, $3
+0x00 0x62 0x08 0x65 # CHECK: muleq_s.w.phr $1, $2, $3
+0x00 0x62 0x08 0x95 # CHECK: muleu_s.ph.qbl $1, $2, $3
+0x00 0x62 0x08 0xd5 # CHECK: muleu_s.ph.qbr $1, $2, $3
+0x00,0x62,0x09,0x15 # CHECK: mulq_rs.ph $1, $2, $3
+0x00 0x43 0x0a 0x25 # CHECK: lbux $1, $2($3)
+0x00 0x43 0x09 0x65 # CHECK: lhx $1, $2($3)
+0x00 0x43 0x09 0xa5 # CHECK: lwx $1, $2($3)
+0x00 0x62 0x5a 0x7c # CHECK: maq_s.w.phl $ac1, $2, $3
+0x00 0x62 0x7a 0x7c # CHECK: maq_sa.w.phl $ac1, $2, $3
+0x00 0x62 0x4a 0x7c # CHECK: maq_s.w.phr $ac1, $2, $3
+0x00 0x62 0x6a 0x7c # CHECK: maq_sa.w.phr $ac1, $2, $3
+0x00 0x02 0x40 0x7c # CHECK: mfhi $2, $ac1
+0x00 0x01 0x50 0x7c # CHECK: mflo $1, $ac1
+0x00 0x01 0x60 0x7c # CHECK: mthi $1, $ac1
+0x00 0x01 0x70 0x7c # CHECK: mtlo $1, $ac1
+0x00 0x22 0xf1 0x3c # CHECK: raddu.w.qb $1, $2
+0x00 0x20 0x86 0x7c # CHECK: rddsp $1, 2
+0x02 0x00 0x08 0x3d # CHECK: repl.ph $1, 512
+0x00 0x30 0x05 0xfc # CHECK: repl.qb $1, 128
+0x00 0x22 0x03 0x3c # CHECK: replv.ph $1, $2
+0x00 0x22 0x13 0x3c # CHECK: replv.qb $1, $2
+0x00 0x01 0x82 0x7c # CHECK: mthlip $1, $ac2
+0x00 0xa7 0xd6 0x7c # CHECK: wrdsp $5
+0x00 0xa0 0x96 0x7c # CHECK: wrdsp $5, 2
diff --git a/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt b/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt
new file mode 100644
index 000000000000..1d9fc9e80d73
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips-dspr2/valid.txt
@@ -0,0 +1,125 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 | FileCheck %s
+
+0x00 0x64 0x11 0x3c # CHECK: absq_s.ph $3, $4
+0x00 0x64 0x01 0x3c # CHECK: absq_s.qb $3, $4
+0x00 0x64 0x21 0x3c # CHECK: absq_s.w $3, $4
+0x00 0xa4 0x19 0x0d # CHECK: addu.ph $3, $4, $5
+0x00 0xa4 0x18 0xcd # CHECK: addu.qb $3, $4, $5
+0x00 0xa4 0x1d 0x0d # CHECK: addu_s.ph $3, $4, $5
+0x00 0xa4 0x1c 0xcd # CHECK: addu_s.qb $3, $4, $5
+0x00 0xa4 0x19 0x4d # CHECK: adduh.qb $3, $4, $5
+0x00 0xa4 0x1d 0x4d # CHECK: adduh_r.qb $3, $4, $5
+0x00 0xa4 0x1b 0x85 # CHECK: addsc $3, $4, $5
+0x00 0xa4 0x1b 0xc5 # CHECK: addwc $3, $4, $5
+0x00 0xa4 0x18 0x0d # CHECK: addq.ph $3, $4, $5
+0x00 0xa4 0x1c 0x0d # CHECK: addq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x05 # CHECK: addq_s.w $3, $4, $5
+0x00 0xa4 0x18 0x4d # CHECK: addqh.ph $3, $4, $5
+0x00 0xa4 0x18 0x8d # CHECK: addqh.w $3, $4, $5
+0x00 0xa4 0x1c 0x4d # CHECK: addqh_r.ph $3, $4, $5
+0x00 0xa4 0x1c 0x8d # CHECK: addqh_r.w $3, $4, $5
+0x00 0x43 0x00 0xbc # CHECK: dpa.w.ph $ac0, $3, $2
+0x00 0x65 0x42 0xbc # CHECK: dpaq_s.w.ph $ac1, $5, $3
+0x00 0x64 0x92 0xbc # CHECK: dpaq_sa.l.w $ac2, $4, $3
+0x00 0xec 0xe2 0xbc # CHECK: dpaqx_s.w.ph $ac3, $12, $7
+0x00 0xc5 0x32 0xbc # CHECK: dpaqx_sa.w.ph $ac0, $5, $6
+0x00 0x83 0x60 0xbc # CHECK: dpau.h.qbl $ac1, $3, $4
+0x02 0xb4 0xb0 0xbc # CHECK: dpau.h.qbr $ac2, $20, $21
+0x00 0x22 0xd0 0xbc # CHECK: dpax.w.ph $ac3, $2, $1
+0x00 0x06 0x66 0x7c # CHECK: extp $zero, $ac1, 6
+0x00 0x42 0x76 0x7c # CHECK: extpdp $2, $ac1, 2
+0x00 0x88 0xb8 0xbc # CHECK: extpdpv $4, $ac2, $8
+0x01 0xe7 0xe8 0xbc # CHECK: extpv $15, $ac3, $7
+0x03 0x7f 0xce 0x7c # CHECK: extr.w $27, $ac3, 31
+0x01 0x98 0x1e 0x7c # CHECK: extr_r.w $12, $ac0, 24
+0x03 0x69 0xee 0x7c # CHECK: extr_rs.w $27, $ac3, 9
+0x00 0x61 0xbe 0x7c # CHECK: extr_s.h $3, $ac2, 1
+0x00 0xa6 0x0e 0xbc # CHECK: extrv.w $5, $ac0, $6
+0x01 0x43 0x1e 0xbc # CHECK: extrv_r.w $10, $ac0, $3
+0x01 0xf4 0x6e 0xbc # CHECK: extrv_rs.w $15, $ac1, $20
+0x01 0x10 0xbe 0xbc # CHECK: extrv_s.h $8, $ac2, $16
+0x00 0x64 0x41 0x3c # CHECK: insv $3, $4
+0x00 0xe6 0x4a 0xbc # CHECK: madd $ac1, $6, $7
+0x01 0x28 0x1a 0xbc # CHECK: maddu $ac0, $8, $9
+0x01 0x6a 0xea 0xbc # CHECK: msub $ac3, $10, $11
+0x01 0xac 0xba 0xbc # CHECK: msubu $ac2, $12, $13
+0x00 0x62 0xcc 0xbc # CHECK: mult $ac3, $2, $3
+0x00 0xa4 0x9c 0xbc # CHECK: multu $ac2, $4, $5
+0x00 0xa4 0x19 0xad # CHECK: packrl.ph $3, $4, $5
+0x00 0xa4 0x1a 0x2d # CHECK: pick.ph $3, $4, $5
+0x00 0xa4 0x19 0xed # CHECK: pick.qb $3, $4, $5
+0x00 0x22 0x51 0x3c # CHECK: preceq.w.phl $1, $2
+0x00 0x64 0x61 0x3c # CHECK: preceq.w.phr $3, $4
+0x00 0xa6 0x71 0x3c # CHECK: precequ.ph.qbl $5, $6
+0x00 0xe8 0x73 0x3c # CHECK: precequ.ph.qbla $7, $8
+0x01 0x2a 0x91 0x3c # CHECK: precequ.ph.qbr $9, $10
+0x01 0x6c 0x93 0x3c # CHECK: precequ.ph.qbra $11, $12
+0x01 0xae 0xb1 0x3c # CHECK: preceu.ph.qbl $13, $14
+0x01 0xf0 0xb3 0x3c # CHECK: preceu.ph.qbla $15, $16
+0x02 0x32 0xd1 0x3c # CHECK: preceu.ph.qbr $17, $18
+0x02 0x74 0xd3 0x3c # CHECK: preceu.ph.qbra $19, $20
+0x00 0x62 0x08 0x6d # CHECK: precr.qb.ph $1, $2, $3
+0x00 0x85 0x0b 0xcd # CHECK: precr_sra.ph.w $4, $5, 1
+0x00 0xc7 0x17 0xcd # CHECK: precr_sra_r.ph.w $6, $7, 2
+0x01 0x49 0x40 0xed # CHECK: precrq.ph.w $8, $9, $10
+0x01 0xac 0x58 0xad # CHECK: precrq.qb.ph $11, $12, $13
+0x02 0x0f 0x71 0x6d # CHECK: precrqu_s.qb.ph $14, $15, $16
+0x02 0x72 0x89 0x2d # CHECK: precrq_rs.ph.w $17, $18, $19
+0x00 0x03 0x40 0x1d # CHECK: shilo $ac1, 3
+0x00 0x05 0x52 0x7c # CHECK: shilov $ac1, $5
+0x00 0x64 0x53 0xb5 # CHECK: shll.ph $3, $4, 5
+0x00 0x64 0x5b 0xb5 # CHECK: shll_s.ph $3, $4, 5
+0x00 0x64 0xa8 0x7c # CHECK: shll.qb $3, $4, 5
+0x00 0x85 0x18 0x0e # CHECK: shllv.ph $3, $4, $5
+0x00 0x85 0x1c 0x0e # CHECK: shllv_s.ph $3, $4, $5
+0x00 0x85 0x1b 0x95 # CHECK: shllv.qb $3, $4, $5
+0x00 0x85 0x1b 0xd5 # CHECK: shllv_s.w $3, $4, $5
+0x00 0x64 0x2b 0xf5 # CHECK: shll_s.w $3, $4, 5
+0x00 0x64 0x53 0x35 # CHECK: shra.ph $3, $4, 5
+0x00 0x64 0xa1 0xfc # CHECK: shra.qb $3, $4, 5
+0x00 0x64 0x57 0x35 # CHECK: shra_r.ph $3, $4, 5
+0x00 0x64 0xb1 0xfc # CHECK: shra_r.qb $3, $4, 5
+0x00 0x85 0x19 0x8d # CHECK: shrav.ph $3, $4, $5
+0x00 0x85 0x19 0xcd # CHECK: shrav.qb $3, $4, $5
+0x00 0x85 0x1d 0x8d # CHECK: shrav_r.ph $3, $4, $5
+0x00 0x85 0x1d 0xcd # CHECK: shrav_r.qb $3, $4, $5
+0x00 0x85 0x1a 0xd5 # CHECK: shrav_r.w $3, $4, $5
+0x00 0x64 0x2a 0xf5 # CHECK: shra_r.w $3, $4, 5
+0x00 0x64 0x53 0xfc # CHECK: shrl.ph $3, $4, 5
+0x00 0x64 0xb8 0x7c # CHECK: shrl.qb $3, $4, 5
+0x00 0x85 0x1b 0x15 # CHECK: shrlv.ph $3, $4, $5
+0x00 0x85 0x1b 0x55 # CHECK: shrlv.qb $3, $4, $5
+0x00 0xa4 0x1a 0x0d # CHECK: subq.ph $3, $4, $5
+0x00 0xa4 0x1e 0x0d # CHECK: subq_s.ph $3, $4, $5
+0x00 0xa4 0x1b 0x45 # CHECK: subq_s.w $3, $4, $5
+0x00 0xa4 0x1a 0x4d # CHECK: subqh.ph $3, $4, $5
+0x00 0xa4 0x1e 0x4d # CHECK: subqh_r.ph $3, $4, $5
+0x00 0xa4 0x1a 0x8d # CHECK: subqh.w $3, $4, $5
+0x00 0xa4 0x1e 0x8d # CHECK: subqh_r.w $3, $4, $5
+0x00 0xa4 0x1b 0x0d # CHECK: subu.ph $3, $4, $5
+0x00 0xa4 0x1f 0x0d # CHECK: subu_s.ph $3, $4, $5
+0x00 0xa4 0x1a 0xcd # CHECK: subu.qb $3, $4, $5
+0x00 0xa4 0x1e 0xcd # CHECK: subu_s.qb $3, $4, $5
+0x00 0xa4 0x1b 0x4d # CHECK: subuh.qb $3, $4, $5
+0x00 0xa4 0x1f 0x4d # CHECK: subuh_r.qb $3, $4, $5
+0x00 0xc4 0x46 0xbc # CHECK: dpsq_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x56 0xbc # CHECK: dpsq_sa.l.w $ac1, $4, $6
+0x00 0xc4 0x64 0xbc # CHECK: dpsu.h.qbl $ac1, $4, $6
+0x00 0xc4 0x74 0xbc # CHECK: dpsu.h.qbr $ac1, $4, $6
+0x00 0xc4 0x44 0xbc # CHECK: dps.w.ph $ac1, $4, $6
+0x00 0xc4 0x66 0xbc # CHECK: dpsqx_s.w.ph $ac1, $4, $6
+0x00 0xc4 0x76 0xbc # CHECK: dpsqx_sa.w.ph $ac1, $4, $6
+0x00 0xc4 0x54 0xbc # CHECK: dpsx.w.ph $ac1, $4, $6
+0x00 0x62 0x08 0x2d # CHECK: mul.ph $1, $2, $3
+0x00 0x62 0x0c 0x2d # CHECK: mul_s.ph $1, $2, $3
+0x00 0x62 0x09 0x95 # CHECK: mulq_rs.w $1, $2, $3
+0x00 0x62 0x09 0x55 # CHECK: mulq_s.ph $1, $2, $3
+0x00 0x62 0x09 0xd5 # CHECK: mulq_s.w $1, $2, $3
+0x00 0x62 0x08 0x25 # CHECK: muleq_s.w.phl $1, $2, $3
+0x00 0x62 0x08 0x65 # CHECK: muleq_s.w.phr $1, $2, $3
+0x00 0x62 0x08 0x95 # CHECK: muleu_s.ph.qbl $1, $2, $3
+0x00 0x62 0x08 0xd5 # CHECK: muleu_s.ph.qbr $1, $2, $3
+0x00,0x62,0x09,0x15 # CHECK: mulq_rs.ph $1, $2, $3
+0x00 0x22 0x1a 0x55 # CHECK: prepend $1, $2, 3
+0x00 0xa7 0xd6 0x7c # CHECK: wrdsp $5
+0x00 0xa0 0x96 0x7c # CHECK: wrdsp $5, 2
diff --git a/test/MC/Disassembler/Mips/micromips32r3/invalid.txt b/test/MC/Disassembler/Mips/micromips32r3/invalid.txt
new file mode 100644
index 000000000000..fcaa6169e669
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r3/invalid.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
+# RUN:     2>&1 | FileCheck %s
+
+0x21 0xe2 0x5c 0x71 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/micromips_le.txt b/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt
similarity index 86%
rename from test/MC/Disassembler/Mips/micromips_le.txt
rename to test/MC/Disassembler/Mips/micromips32r3/valid-el.txt
index 3058bd061066..d6c7de4e3a53 100644
--- a/test/MC/Disassembler/Mips/micromips_le.txt
+++ b/test/MC/Disassembler/Mips/micromips32r3/valid-el.txt
@@ -1,342 +1,191 @@
 # RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mattr=micromips \
 # RUN: | FileCheck %s
 
-0xe6 0x00 0x10 0x49 # CHECK: add $9, $6, $7
-
-0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
-
-0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
-
-0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
-
-0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
-
 0xf9 0x4f # CHECK: addiusp -16
-
 0xff 0x4f # CHECK: addiusp -1028
-
 0xfd 0x4f # CHECK: addiusp -1032
-
 0x01 0x4c # CHECK: addiusp 1024
-
 0x03 0x4c # CHECK: addiusp 1028
-
-0xe6 0x00 0x50 0x49 # CHECK: addu $9, $6, $7
-
 0x29 0x2c # CHECK: andi16 $16, $2, 31
-
-0xe6 0x00 0x90 0x49 # CHECK: sub $9, $6, $7
-
-0xa3 0x00 0xd0 0x21 # CHECK: subu  $4, $3, $5
-
-0xe0 0x00 0x90 0x31 # CHECK: sub $6, $zero, $7
-
-0xe0 0x00 0xd0 0x31 # CHECK: subu $6, $zero, $7
-
-0x08 0x00 0x50 0x39 # CHECK: addu $7, $8, $zero
-
-0xa3 0x00 0x50 0x1b # CHECK: slt $3, $3, $5
-
-0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
-
-0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
-
-0x63 0xb0 0x67 0x00 # CHECK: sltiu $3, $3, 103
-
-0xa3 0x00 0x90 0x1b # CHECK: sltu $3, $3, $5
-
-0xa9 0x41 0x67 0x45 # CHECK: lui $9, 17767
-
-0xe6 0x00 0x50 0x4a # CHECK: and $9, $6, $7
-
-0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
-
-0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
-
-0xa4 0x00 0x90 0x1a # CHECK: or $3, $4, $5
-
-0x26 0x51 0x67 0x45 # CHECK: ori $9, $6, 17767
-
-0xa3 0x00 0x10 0x1b # CHECK: xor $3, $3, $5
-
-0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
-
-0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
-
-0xe6 0x00 0xd0 0x4a # CHECK: nor $9, $6, $7
-
-0x08 0x00 0xd0 0x3a # CHECK: not $7, $8
-
-0xe6 0x00 0x10 0x4a # CHECK: mul $9, $6, $7
-
-0xe9 0x00 0x3c 0x8b # CHECK: mult $9, $7
-
-0xe9 0x00 0x3c 0x9b # CHECK: multu $9, $7
-
-0xe9 0x00 0x3c 0xab # CHECK: div $zero, $9, $7
-
-0xe9 0x00 0x3c 0xbb # CHECK: divu $zero, $9, $7
-
-0x83 0x00 0x00 0x38 # CHECK: sll $4, $3, 7
-
-0x65 0x00 0x10 0x10 # CHECK: sllv $2, $3, $5
-
-0x83 0x00 0x80 0x38 # CHECK: sra $4, $3, 7
-
-0x65 0x00 0x90 0x10 # CHECK: srav $2, $3, $5
-
-0x83 0x00 0x40 0x38 # CHECK: srl $4, $3, 7
-
-0x65 0x00 0x50 0x10 # CHECK: srlv $2, $3, $5
-
-0x26 0x01 0xc0 0x38 # CHECK: rotr $9, $6, 7
-
-0xc7 0x00 0xd0 0x48 # CHECK: rotrv $9, $6, $7
-
-0xa4 0x1c 0x08 0x00 # CHECK: lb $5, 8($4)
-
-0xc4 0x14 0x08 0x00 # CHECK: lbu $6, 8($4)
-
-0x44 0x3c 0x08 0x00 # CHECK: lh $2, 8($4)
-
-0x82 0x34 0x08 0x00 # CHECK: lhu $4, 8($2)
-
-0xc5 0xfc 0x04 0x00 # CHECK: lw $6, 4($5)
-
-0xdd 0xfc 0x7b 0x00 # CHECK: lw $6, 123($sp)
-
-0xa4 0x18 0x08 0x00 # CHECK: sb $5, 8($4)
-
-0x44 0x38 0x08 0x00 # CHECK: sh $2, 8($4)
-
-0xa6 0xf8 0x04 0x00 # CHECK: sw $5, 4($6)
-
-0xbd 0xf8 0x7b 0x00 # CHECK: sw $5, 123($sp)
-
-0x44 0x60 0x08 0xe0 # CHECK: lwu $2, 8($4)
-
-0x85 0x60 0x10 0x00 # CHECK: lwl $4, 16($5)
-
-0x85 0x60 0x10 0x10 # CHECK: lwr $4, 16($5)
-
-0x85 0x60 0x10 0x80 # CHECK: swl $4, 16($5)
-
-0x85 0x60 0x10 0x90 # CHECK: swr $4, 16($5)
-
-0xe6 0x00 0x58 0x48 # CHECK: movz $9, $6, $7
-
-0xe6 0x00 0x18 0x48 # CHECK: movn $9, $6, $7
-
-0x26 0x55 0x7b 0x09 # CHECK: movt $9, $6, $fcc0
-
-0x26 0x55 0x7b 0x01 # CHECK: movf $9, $6, $fcc0
-
-0x06 0x00 0x7c 0x2d # CHECK: mthi $6
-
-0x06 0x00 0x7c 0x0d # CHECK: mfhi $6
-
-0x06 0x00 0x7c 0x3d # CHECK: mtlo $6
-
-0x06 0x00 0x7c 0x1d # CHECK: mflo $6
-
-0xa4 0x00 0x3c 0xcb # CHECK: madd $4, $5
-
-0xa4 0x00 0x3c 0xdb # CHECK: maddu $4, $5
-
-0xa4 0x00 0x3c 0xeb # CHECK: msub $4, $5
-
-0xa4 0x00 0x3c 0xfb # CHECK: msubu $4, $5
-
-0x26 0x01 0x3c 0x5b # CHECK: clz $9, $6
-
-0x26 0x01 0x3c 0x4b # CHECK: clo $9, $6
-
-0x26 0x01 0x3c 0x2b # CHECK: seb $9, $6
-
-0x26 0x01 0x3c 0x3b # CHECK: seh $9, $6
-
-0x26 0x01 0x3c 0x7b # CHECK: wsbh $9, $6
-
-0x26 0x01 0xec 0x30 # CHECK: ext $9, $6, 3, 7
-
-0x26 0x01 0xcc 0x48 # CHECK: ins $9, $6, 3, 7
-
-0x00 0xd4 0x98 0x02 # CHECK: j 1328
-
-0x00 0xf4 0x98 0x02 # CHECK: jal 1328
-
-0xe6 0x03 0x3c 0x0f # CHECK: jalr $ra, $6
-
-0x07 0x00 0x3c 0x0f # CHECK: jr $7
-
 0x05 0x47 # CHECK: jraddiusp 20
-
-0xc9 0x94 0x9a 0x02 # CHECK: beq $9, $6, 1332
-
-0x46 0x40 0x9a 0x02 # CHECK: bgez $6, 1332
-
-0x66 0x40 0x9a 0x02 # CHECK: bgezal $6, 1332
-
-0x26 0x40 0x9a 0x02 # CHECK: bltzal $6, 1332
-
-0xc6 0x40 0x9a 0x02 # CHECK: bgtz $6, 1332
-
-0x86 0x40 0x9a 0x02 # CHECK: blez $6, 1332
-
-0xc9 0xb4 0x9a 0x02 # CHECK: bne $9, $6, 1332
-
-0x06 0x40 0x9a 0x02 # CHECK: bltz $6, 1332
-
-0x28 0x01 0x3c 0x00 # CHECK: teq $8, $9, 0
-
-0x28 0x01 0x3c 0x02 # CHECK: tge $8, $9, 0
-
-0x28 0x01 0x3c 0x04 # CHECK: tgeu $8, $9, 0
-
-0x28 0x01 0x3c 0x08 # CHECK: tlt $8, $9, 0
-
-0x28 0x01 0x3c 0x0a # CHECK: tltu $8, $9, 0
-
-0x28 0x01 0x3c 0x0c # CHECK: tne $8, $9, 0
-
-0xc9 0x41 0x67 0x45 # CHECK: teqi $9, 17767
-
-0x29 0x41 0x67 0x45 # CHECK: tgei $9, 17767
-
-0x69 0x41 0x67 0x45 # CHECK: tgeiu $9, 17767
-
-0x09 0x41 0x67 0x45 # CHECK: tlti $9, 17767
-
-0x49 0x41 0x67 0x45 # CHECK: tltiu $9, 17767
-
-0x89 0x41 0x67 0x45 # CHECK: tnei $9, 17767
-
-0x25 0x20 0x08 0x60 # CHECK: cache 1, 8($5)
-
-0x25 0x60 0x08 0x20 # CHECK: pref 1, 8($5)
-
-0x00 0x00 0x00 0x08 # CHECK: ssnop
-
-0x00 0x00 0x00 0x18 # CHECK: ehb
-
-0x00 0x00 0x00 0x28 # CHECK: pause
-
-0x44 0x60 0x08 0x30 # CHECK: ll $2, 8($4)
-
-0x44 0x60 0x08 0xb0 # CHECK: sc $2, 8($4)
-
-0x64 0x00 0x18 0x11 # CHECK: lwxs $2, $3($4)
-
-0x66 0x42 0x9a 0x02 # CHECK: bgezals $6, 1332
-
-0x26 0x42 0x9a 0x02 # CHECK: bltzals $6, 1332
-
-0xe9 0x40 0x9a 0x02 # CHECK: beqzc $9, 1332
-
-0xa9 0x40 0x9a 0x02 # CHECK: bnezc $9, 1332
-
-0x00 0x74 0x98 0x02 # CHECK: jals 1328
-
-0xe6 0x03 0x3c 0x4f # CHECK: jalrs $ra, $6
-
-0x44 0x20 0x08 0x50 # CHECK: lwm32 $16, $17, 8($4)
-
-0x44 0x20 0x08 0xd0 # CHECK: swm32 $16, $17, 8($4)
-
-0x04 0x22 0x08 0x90 # CHECK: swp $16, 8($4)
-
-0x04 0x22 0x08 0x10 # CHECK: lwp $16, 8($4)
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x00 0x79 0x05 0x00 # CHECK: addiupc $2, 20
-
-0xbf 0x7b 0xff 0xff # CHECK: addiupc $7, 16777212
-
-0xc0 0x7b 0x00 0x00 # CHECK: addiupc $7, -16777216
-
 0x42 0x07 # CHECK: addu16 $6, $17, $4
-
 0xb1 0x06 # CHECK: subu16 $5, $16, $3
-
 0x82 0x44 # CHECK: and16 $16, $2
-
 0x0b 0x44 # CHECK: not16 $17, $3
-
 0xc4 0x44 # CHECK: or16 $16, $4
-
 0x4d 0x44 # CHECK: xor16 $17, $5
-
 0x8a 0x25 # CHECK: sll16 $3, $16, 5
-
 0x1d 0x26 # CHECK: srl16 $4, $17, 6
-
 0x94 0x09 # CHECK: lbu16 $3, 4($17)
-
 0x8f 0x09 # CHECK: lbu16 $3, -1($16)
-
 0x82 0x29 # CHECK: lhu16 $3, 4($16)
-
 0x12 0x6a # CHECK: lw16 $4, 8($17)
-
 0x84 0x89 # CHECK: sb16 $3, 4($16)
-
 0x14 0xaa # CHECK: sh16 $4, 8($17)
-
 0x11 0xea # CHECK: sw16 $4, 4($17)
-
 0x11 0xe8 # CHECK: sw16 $zero, 4($17)
-
 0x09 0x46 # CHECK: mfhi $9
-
 0x49 0x46 # CHECK: mflo $9
-
 0x21 0x0f # CHECK: move $25, $1
-
 0xa9 0x45 # CHECK: jrc $9
-
 0xc9 0x45 # CHECK: jalr $9
-
 0xe9 0x45 # CHECK: jalrs16 $9
-
 0x89 0x45 # CHECK: jr16 $9
-
 0xff 0xed # CHECK: li16 $3, -1
-
 0xfe 0xed # CHECK: li16 $3, 126
-
 0x83 0x6f # CHECK: addiur1sp $7, 4
-
 0x7e 0x6f # CHECK: addiur2 $6, $7, -1
-
 0x76 0x6f # CHECK: addiur2 $6, $7, 12
-
 0xfc 0x4c # CHECK: addius5 $7, -2
-
 0x00 0x0c # CHECK: nop
-
 0x68 0x48 # CHECK: lw $3, 32($sp)
-
 0x9f 0xc8 # CHECK: sw $4, 124($sp)
-
 0x0a 0x8f # CHECK: beqz16 $6, 20
-
 0x0a 0xaf # CHECK: bnez16 $6, 20
-
 0x42 0xcc # CHECK: b16 132
-
 0x88 0x65 # CHECK: lw $3, 32($gp)
-
 0x12 0x45 # CHECK: lwm16 $16, $17, $ra, 8($sp)
-
 0x52 0x45 # CHECK: swm16 $16, $17, $ra, 8($sp)
-
 0x88 0x46 # CHECK: break16 8
-
 0xce 0x46 # CHECK: sdbbp16 14
-
 0x34 0x84 # CHECK: movep $5, $6, $2, $3
-
+0xe6 0x00 0x10 0x49 # CHECK: add $9, $6, $7
+0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
+0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
+0x26 0x11 0x67 0x45 # CHECK: addi $9, $6, 17767
+0x26 0x31 0x67 0xc5 # CHECK: addiu $9, $6, -15001
+0xe6 0x00 0x50 0x49 # CHECK: addu $9, $6, $7
+0xe6 0x00 0x90 0x49 # CHECK: sub $9, $6, $7
+0xa3 0x00 0xd0 0x21 # CHECK: subu  $4, $3, $5
+0xe0 0x00 0x90 0x31 # CHECK: sub $6, $zero, $7
+0xe0 0x00 0xd0 0x31 # CHECK: subu $6, $zero, $7
+0x08 0x00 0x50 0x39 # CHECK: addu $7, $8, $zero
+0xa3 0x00 0x50 0x1b # CHECK: slt $3, $3, $5
+0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
+0x63 0x90 0x67 0x00 # CHECK: slti $3, $3, 103
+0x63 0xb0 0x67 0x00 # CHECK: sltiu $3, $3, 103
+0xa3 0x00 0x90 0x1b # CHECK: sltu $3, $3, $5
+0xa9 0x41 0x67 0x45 # CHECK: lui $9, 17767
+0xe6 0x00 0x50 0x4a # CHECK: and $9, $6, $7
+0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
+0x26 0xd1 0x67 0x45 # CHECK: andi $9, $6, 17767
+0xa4 0x00 0x90 0x1a # CHECK: or $3, $4, $5
+0x26 0x51 0x67 0x45 # CHECK: ori $9, $6, 17767
+0xa3 0x00 0x10 0x1b # CHECK: xor $3, $3, $5
+0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
+0x26 0x71 0x67 0x45 # CHECK: xori $9, $6, 17767
+0xe6 0x00 0xd0 0x4a # CHECK: nor $9, $6, $7
+0x08 0x00 0xd0 0x3a # CHECK: not $7, $8
+0xe6 0x00 0x10 0x4a # CHECK: mul $9, $6, $7
+0xe9 0x00 0x3c 0x8b # CHECK: mult $9, $7
+0xe9 0x00 0x3c 0x9b # CHECK: multu $9, $7
+0xe9 0x00 0x3c 0xab # CHECK: div $zero, $9, $7
+0xe9 0x00 0x3c 0xbb # CHECK: divu $zero, $9, $7
+0x83 0x00 0x00 0x38 # CHECK: sll $4, $3, 7
+0x65 0x00 0x10 0x10 # CHECK: sllv $2, $3, $5
+0x83 0x00 0x80 0x38 # CHECK: sra $4, $3, 7
+0x65 0x00 0x90 0x10 # CHECK: srav $2, $3, $5
+0x83 0x00 0x40 0x38 # CHECK: srl $4, $3, 7
+0x65 0x00 0x50 0x10 # CHECK: srlv $2, $3, $5
+0x26 0x01 0xc0 0x38 # CHECK: rotr $9, $6, 7
+0xc7 0x00 0xd0 0x48 # CHECK: rotrv $9, $6, $7
+0xa4 0x1c 0x08 0x00 # CHECK: lb $5, 8($4)
+0xc4 0x14 0x08 0x00 # CHECK: lbu $6, 8($4)
+0x44 0x3c 0x08 0x00 # CHECK: lh $2, 8($4)
+0x82 0x34 0x08 0x00 # CHECK: lhu $4, 8($2)
+0xc5 0xfc 0x04 0x00 # CHECK: lw $6, 4($5)
+0xdd 0xfc 0x7b 0x00 # CHECK: lw $6, 123($sp)
+0xa4 0x18 0x08 0x00 # CHECK: sb $5, 8($4)
+0x44 0x38 0x08 0x00 # CHECK: sh $2, 8($4)
+0xa6 0xf8 0x04 0x00 # CHECK: sw $5, 4($6)
+0xbd 0xf8 0x7b 0x00 # CHECK: sw $5, 123($sp)
+0x44 0x60 0x08 0xe0 # CHECK: lwu $2, 8($4)
+0x85 0x60 0x10 0x00 # CHECK: lwl $4, 16($5)
+0x85 0x60 0x10 0x10 # CHECK: lwr $4, 16($5)
+0x85 0x60 0x10 0x80 # CHECK: swl $4, 16($5)
+0x85 0x60 0x10 0x90 # CHECK: swr $4, 16($5)
+0xe6 0x00 0x58 0x48 # CHECK: movz $9, $6, $7
+0xe6 0x00 0x18 0x48 # CHECK: movn $9, $6, $7
+0x26 0x55 0x7b 0x09 # CHECK: movt $9, $6, $fcc0
+0x26 0x55 0x7b 0x01 # CHECK: movf $9, $6, $fcc0
+0x06 0x00 0x7c 0x2d # CHECK: mthi $6
+0x06 0x00 0x7c 0x0d # CHECK: mfhi $6
+0x06 0x00 0x7c 0x3d # CHECK: mtlo $6
+0x06 0x00 0x7c 0x1d # CHECK: mflo $6
+0xa4 0x00 0x3c 0xcb # CHECK: madd $4, $5
+0xa4 0x00 0x3c 0xdb # CHECK: maddu $4, $5
+0xa4 0x00 0x3c 0xeb # CHECK: msub $4, $5
+0xa4 0x00 0x3c 0xfb # CHECK: msubu $4, $5
+0x26 0x01 0x3c 0x5b # CHECK: clz $9, $6
+0x26 0x01 0x3c 0x4b # CHECK: clo $9, $6
+0x26 0x01 0x3c 0x2b # CHECK: seb $9, $6
+0x26 0x01 0x3c 0x3b # CHECK: seh $9, $6
+0x26 0x01 0x3c 0x7b # CHECK: wsbh $9, $6
+0x26 0x01 0xec 0x30 # CHECK: ext $9, $6, 3, 7
+0x26 0x01 0xcc 0x48 # CHECK: ins $9, $6, 3, 7
+0x00 0xd4 0x98 0x02 # CHECK: j 1328
+0x00 0xf4 0x98 0x02 # CHECK: jal 1328
+0xe6 0x03 0x3c 0x0f # CHECK: jalr $ra, $6
+0x07 0x00 0x3c 0x0f # CHECK: jr $7
+0xc9 0x94 0x9a 0x02 # CHECK: beq $9, $6, 1332
+0x46 0x40 0x9a 0x02 # CHECK: bgez $6, 1332
+0x66 0x40 0x9a 0x02 # CHECK: bgezal $6, 1332
+0x26 0x40 0x9a 0x02 # CHECK: bltzal $6, 1332
+0xc6 0x40 0x9a 0x02 # CHECK: bgtz $6, 1332
+0x86 0x40 0x9a 0x02 # CHECK: blez $6, 1332
+0xc9 0xb4 0x9a 0x02 # CHECK: bne $9, $6, 1332
+0x06 0x40 0x9a 0x02 # CHECK: bltz $6, 1332
+0x28 0x01 0x3c 0x00 # CHECK: teq $8, $9
+0x28 0x01 0x3c 0x02 # CHECK: tge $8, $9
+0x28 0x01 0x3c 0x04 # CHECK: tgeu $8, $9
+0x28 0x01 0x3c 0x08 # CHECK: tlt $8, $9
+0x28 0x01 0x3c 0x0a # CHECK: tltu $8, $9
+0x28 0x01 0x3c 0x0c # CHECK: tne $8, $9
+0xc9 0x41 0x67 0x45 # CHECK: teqi $9, 17767
+0x29 0x41 0x67 0x45 # CHECK: tgei $9, 17767
+0x69 0x41 0x67 0x45 # CHECK: tgeiu $9, 17767
+0x09 0x41 0x67 0x45 # CHECK: tlti $9, 17767
+0x49 0x41 0x67 0x45 # CHECK: tltiu $9, 17767
+0x89 0x41 0x67 0x45 # CHECK: tnei $9, 17767
+0x25 0x20 0x08 0x60 # CHECK: cache 1, 8($5)
+0x25 0x60 0x08 0x20 # CHECK: pref 1, 8($5)
+0x00 0x00 0x00 0x08 # CHECK: ssnop
+0x00 0x00 0x00 0x18 # CHECK: ehb
+0x00 0x00 0x00 0x28 # CHECK: pause
+0x44 0x60 0x08 0x30 # CHECK: ll $2, 8($4)
+0x44 0x60 0x08 0xb0 # CHECK: sc $2, 8($4)
+0x64 0x00 0x18 0x11 # CHECK: lwxs $2, $3($4)
+0x66 0x42 0x9a 0x02 # CHECK: bgezals $6, 1332
+0x26 0x42 0x9a 0x02 # CHECK: bltzals $6, 1332
+0xe9 0x40 0x9a 0x02 # CHECK: beqzc $9, 1332
+0xa9 0x40 0x9a 0x02 # CHECK: bnezc $9, 1332
+0x00 0x74 0x98 0x02 # CHECK: jals 1328
+0xe6 0x03 0x3c 0x4f # CHECK: jalrs $ra, $6
+0x44 0x20 0x08 0x50 # CHECK: lwm32 $16, $17, 8($4)
+0x3b 0x21 0x84 0x59 # CHECK: lwm32 $16, $17, $18, $19, $20, $21, $22, $23, $fp, -1660($27)
+0x44 0x20 0x08 0xd0 # CHECK: swm32 $16, $17, 8($4)
+0x04 0x22 0x08 0x90 # CHECK: swp $16, 8($4)
+0x04 0x22 0x08 0x10 # CHECK: lwp $16, 8($4)
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x79 0x05 0x00 # CHECK: addiupc $2, 20
+0xbf 0x7b 0xff 0xff # CHECK: addiupc $7, 16777212
+0xc0 0x7b 0x00 0x00 # CHECK: addiupc $7, -16777216
 0x00 0x00 0x7c 0x57 # CHECK: ei
-
 0x0a 0x00 0x7c 0x57 # CHECK: ei $10
+0x25 0x60 0x08 0xa6 # CHECK: cachee 1, 8($5)
+0x25 0x60 0x08 0xa4 # CHECK: prefe 1, 8($5)
+0x65 0x54 0xa0 0x09 # CHECK: prefx 1, $3($5)
+0x82 0x60 0x08 0x62 # CHECK: lhue $4, 8($2)
+0x82 0x60 0x08 0x68 # CHECK: lbe $4, 8($2)
+0x82 0x60 0x08 0x60 # CHECK: lbue $4, 8($2)
+0x82 0x60 0x08 0x6a # CHECK: lhe $4, 8($2)
+0x82 0x60 0x08 0x6e # CHECK: lwe $4, 8($2)
+0xa4 0x60 0x08 0xa8 # CHECK: sbe $5, 8($4)
+0xa4 0x60 0x08 0xaa # CHECK: she $5, 8($4)
+0xa4 0x60 0x08 0xae # CHECK: swe $5, 8($4)
+0x03 0x63 0x05 0xa2 # CHECK: swre $24, 5($3)
+0x03 0x63 0x05 0xa0 # CHECK: swle $24, 5($3)
+0x03 0x63 0x05 0x66 # CHECK: lwre $24, 5($3)
+0x04 0x63 0x02 0x64 # CHECK: lwle $24, 2($4)
+0x44 0x60 0x08 0x6c # CHECK: lle $2, 8($4)
+0x44 0x60 0x08 0xac # CHECK: sce $2, 8($4)
diff --git a/test/MC/Disassembler/Mips/micromips.txt b/test/MC/Disassembler/Mips/micromips32r3/valid.txt
similarity index 84%
rename from test/MC/Disassembler/Mips/micromips.txt
rename to test/MC/Disassembler/Mips/micromips32r3/valid.txt
index 5809ac28113c..030afb36723d 100644
--- a/test/MC/Disassembler/Mips/micromips.txt
+++ b/test/MC/Disassembler/Mips/micromips32r3/valid.txt
@@ -1,342 +1,191 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mattr=micromips \
 # RUN: | FileCheck %s
 
-0x00 0xe6 0x49 0x10 # CHECK: add $9, $6, $7
-
-0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
-
-0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
-
-0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
-
-0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
-
 0x4f 0xf9 # CHECK: addiusp -16
-
 0x4f 0xff # CHECK: addiusp -1028
-
 0x4f 0xfd # CHECK: addiusp -1032
-
 0x4c 0x01 # CHECK: addiusp 1024
-
 0x4c 0x03 # CHECK: addiusp 1028
-
-0x00 0xe6 0x49 0x50 # CHECK: addu $9, $6, $7
-
-0x00 0xe6 0x49 0x90 # CHECK: sub $9, $6, $7
-
-0x00 0xa3 0x21 0xd0 # CHECK: subu $4, $3, $5
-
-0x00 0xe0 0x31 0x90 # CHECK: sub $6, $zero, $7
-
-0x00 0xe0 0x31 0xd0 # CHECK: subu $6, $zero, $7
-
-0x00 0x08 0x39 0x50 # CHECK: addu $7, $8, $zero
-
-0x00 0xa3 0x1b 0x50 # CHECK: slt $3, $3, $5
-
-0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-
-0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
-
-0xb0 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
-
-0x00 0xa3 0x1b 0x90 # CHECK: sltu $3, $3, $5
-
-0x41 0xa9 0x45 0x67 # CHECK: lui $9, 17767
-
-0x00 0xe6 0x4a 0x50 # CHECK: and $9, $6, $7
-
-0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
-
-0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
-
 0x2c 0x29 # CHECK: andi16 $16, $2, 31
-
-0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
-
-0x51 0x26 0x45 0x67 # CHECK: ori $9, $6, 17767
-
-0x00 0xa3 0x1b 0x10 # CHECK: xor $3, $3, $5
-
-0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
-
-0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
-
-0x00 0xe6 0x4a 0xd0 # CHECK: nor $9, $6, $7
-
-0x00 0x08 0x3a 0xd0 # CHECK: not $7, $8
-
-0x00 0xe6 0x4a 0x10 # CHECK: mul $9, $6, $7
-
-0x00 0xe9 0x8b 0x3c # CHECK: mult $9, $7
-
-0x00 0xe9 0x9b 0x3c # CHECK: multu $9, $7
-
-0x00 0xe9 0xab 0x3c # CHECK-EB: div $zero, $9, $7
-
-0x00 0xe9 0xbb 0x3c # CHECK-EB: divu $zero, $9, $7
-
-0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
-
-0x00 0x65 0x10 0x10 # CHECK: sllv $2, $3, $5
-
-0x00 0x83 0x38 0x80 # CHECK: sra $4, $3, 7
-
-0x00 0x65 0x10 0x90 # CHECK: srav $2, $3, $5
-
-0x00 0x83 0x38 0x40 # CHECK: srl $4, $3, 7
-
-0x00 0x65 0x10 0x50 # CHECK: srlv $2, $3, $5
-
-0x01 0x26 0x38 0xc0 # CHECK: rotr $9, $6, 7
-
-0x00 0xc7 0x48 0xd0 # CHECK: rotrv $9, $6, $7
-
-0x1c 0xa4 0x00 0x08 # CHECK: lb $5, 8($4)
-
-0x14 0xc4 0x00 0x08 # CHECK: lbu $6, 8($4)
-
-0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4)
-
-0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2)
-
-0xfc 0xc5 0x00 0x04 # CHECK: lw  $6, 4($5)
-
-0xfc 0xdd 0x00 0x7b # CHECK: lw $6, 123($sp)
-
-0x18 0xa4 0x00 0x08 # CHECK: sb $5, 8($4)
-
-0x38 0x44 0x00 0x08 # CHECK: sh  $2, 8($4)
-
-0xf8 0xa6 0x00 0x04 # CHECK: sw  $5, 4($6)
-
-0xf8 0xbd 0x00 0x7b # CHECK: sw $5, 123($sp)
-
-0x60 0x44 0xe0 0x08 # CHECK: lwu $2, 8($4)
-
-0x60 0x85 0x00 0x10 # CHECK: lwl $4, 16($5)
-
-0x60 0x85 0x10 0x10 # CHECK: lwr $4, 16($5)
-
-0x60 0x85 0x80 0x10 # CHECK: swl $4, 16($5)
-
-0x60 0x85 0x90 0x10 # CHECK: swr $4, 16($5)
-
-0x00 0xe6 0x48 0x58 # CHECK: movz $9, $6, $7
-
-0x00 0xe6 0x48 0x18 # CHECK: movn $9, $6, $7
-
-0x55 0x26 0x09 0x7b # CHECK: movt $9, $6, $fcc0
-
-0x55 0x26 0x01 0x7b # CHECK: movf $9, $6, $fcc0
-
-0x00 0x06 0x2d 0x7c # CHECK: mthi   $6
-
-0x00 0x06 0x0d 0x7c # CHECK: mfhi   $6
-
-0x00 0x06 0x3d 0x7c # CHECK: mtlo   $6
-
-0x00 0x06 0x1d 0x7c # CHECK: mflo   $6
-
-0x00 0xa4 0xcb 0x3c # CHECK: madd   $4, $5
-
-0x00 0xa4 0xdb 0x3c # CHECK: maddu  $4, $5
-
-0x00 0xa4 0xeb 0x3c # CHECK: msub   $4, $5
-
-0x00 0xa4 0xfb 0x3c # CHECK: msubu  $4, $5
-
-0x01 0x26 0x5b 0x3c # CHECK: clz $9, $6
-
-0x01 0x26 0x4b 0x3c # CHECK: clo $9, $6
-
-0x01 0x26 0x2b 0x3c # CHECK: seb $9, $6
-
-0x01 0x26 0x3b 0x3c # CHECK: seh $9, $6
-
-0x01 0x26 0x7b 0x3c # CHECK: wsbh $9, $6
-
-0x01 0x26 0x30 0xec # CHECK: ext $9, $6, 3, 7
-
-0x01 0x26 0x48 0xcc # CHECK: ins $9, $6, 3, 7
-
-0xd4 0x00 0x02 0x98 # CHECK: j 1328
-
-0xf4 0x00 0x02 0x98 # CHECK: jal 1328
-
-0x03 0xe6 0x0f 0x3c # CHECK: jalr $ra, $6
-
-0x00 0x07 0x0f 0x3c # CHECK: jr $7
-
 0x47 0x05 # CHECK: jraddiusp 20
-
-0x94 0xc9 0x02 0x9a # CHECK: beq $9, $6, 1332
-
-0x40 0x46 0x02 0x9a # CHECK: bgez $6, 1332
-
-0x40 0x66 0x02 0x9a # CHECK: bgezal $6, 1332
-
-0x40 0x26 0x02 0x9a # CHECK: bltzal $6, 1332
-
-0x40 0xc6 0x02 0x9a # CHECK: bgtz $6, 1332
-
-0x40 0x86 0x02 0x9a # CHECK: blez $6, 1332
-
-0xb4 0xc9 0x02 0x9a # CHECK: bne $9, $6, 1332
-
-0x40 0x06 0x02 0x9a # CHECK: bltz $6, 1332
-
-0x01 0x28 0x00 0x3c # CHECK: teq $8, $9, 0
-
-0x01 0x28 0x02 0x3c # CHECK: tge $8, $9, 0
-
-0x01 0x28 0x04 0x3c # CHECK: tgeu $8, $9, 0
-
-0x01 0x28 0x08 0x3c # CHECK: tlt $8, $9, 0
-
-0x01 0x28 0x0a 0x3c # CHECK: tltu $8, $9, 0
-
-0x01 0x28 0x0c 0x3c # CHECK: tne $8, $9, 0
-
-0x41,0xc9,0x45,0x67 # CHECK: teqi $9, 17767
-
-0x41 0x29 0x45 0x67 # CHECK: tgei $9, 17767
-
-0x41 0x69 0x45 0x67 # CHECK: tgeiu $9, 17767
-
-0x41 0x09 0x45 0x67 # CHECK: tlti $9, 17767
-
-0x41 0x49 0x45 0x67 # CHECK: tltiu $9, 17767
-
-0x41 0x89 0x45 0x67 # CHECK: tnei $9, 17767
-
-0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
-
-0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
-
-0x00 0x00 0x08 0x00 # CHECK: ssnop
-
-0x00 0x00 0x18 0x00 # CHECK: ehb
-
-0x00 0x00 0x28 0x00 # CHECK: pause
-
-0x60 0x44 0x30 0x08 # CHECK: ll $2, 8($4)
-
-0x60 0x44 0xb0 0x08 # CHECK: sc $2, 8($4)
-
-0x00 0x64 0x11 0x18 # CHECK: lwxs $2, $3($4)
-
-0x42 0x66 0x02 0x9a # CHECK: bgezals $6, 1332
-
-0x42 0x26 0x02 0x9a # CHECK: bltzals $6, 1332
-
-0x40 0xe9 0x02 0x9a # CHECK: beqzc $9, 1332
-
-0x40 0xa9 0x02 0x9a # CHECK: bnezc $9, 1332
-
-0x74 0x00 0x02 0x98 # CHECK: jals 1328
-
-0x03 0xe6 0x4f 0x3c # CHECK: jalrs $ra, $6
-
-0x20 0x44 0x50 0x08 # CHECK: lwm32 $16, $17, 8($4)
-
-0x20 0x44 0xd0 0x08 # CHECK: swm32 $16, $17, 8($4)
-
-0x22 0x04 0x90 0x08 # CHECK: swp $16, 8($4)
-
-0x22 0x04 0x10 0x08 # CHECK: lwp $16, 8($4)
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x79 0x00 0x00 0x05 # CHECK: addiupc $2, 20
-
-0x7b 0xbf 0xff 0xff # CHECK: addiupc $7, 16777212
-
-0x7b 0xc0 0x00 0x00 # CHECK: addiupc $7, -16777216
-
 0x07 0x42 # CHECK: addu16 $6, $17, $4
-
 0x06 0xb1 # CHECK: subu16 $5, $16, $3
-
 0x44 0x82 # CHECK: and16 $16, $2
-
 0x44 0x0b # CHECK: not16 $17, $3
-
 0x44 0xc4 # CHECK: or16 $16, $4
-
 0x44 0x4d # CHECK: xor16 $17, $5
-
 0x25 0x8a # CHECK: sll16 $3, $16, 5
-
 0x26 0x1d # CHECK: srl16 $4, $17, 6
-
 0x09 0x94 # CHECK: lbu16 $3, 4($17)
-
 0x09 0x8f # CHECK: lbu16 $3, -1($16)
-
 0x29 0x82 # CHECK: lhu16 $3, 4($16)
-
 0x6a 0x12 # CHECK: lw16 $4, 8($17)
-
 0x89 0x84 # CHECK: sb16 $3, 4($16)
-
 0xaa 0x14 # CHECK: sh16 $4, 8($17)
-
 0xea 0x11 # CHECK: sw16 $4, 4($17)
-
 0xe8 0x11 # CHECK: sw16 $zero, 4($17)
-
 0x46 0x09 # CHECK: mfhi $9
-
 0x46 0x49 # CHECK: mflo $9
-
 0x0f 0x21 # CHECK: move $25, $1
-
 0x45 0xa9 # CHECK: jrc $9
-
 0x45 0xc9 # CHECK: jalr $9
-
 0x45 0xe9 # CHECK: jalrs16 $9
-
 0x45 0x89 # CHECK: jr16 $9
-
 0xed 0xff # CHECK: li16 $3, -1
-
 0xed 0xfe # CHECK: li16 $3, 126
-
 0x6f 0x83 # CHECK: addiur1sp $7, 4
-
 0x6f 0x7e # CHECK: addiur2 $6, $7, -1
-
 0x6f 0x76 # CHECK: addiur2 $6, $7, 12
-
 0x4c 0xfc # CHECK: addius5 $7, -2
-
 0x0c 0x00 # CHECK: nop
-
 0x48 0x68 # CHECK: lw $3, 32($sp)
-
 0xc8 0x9f # CHECK: sw $4, 124($sp)
-
 0x8f 0x0a # CHECK: beqz16 $6, 20
-
 0xaf 0x0a # CHECK: bnez16 $6, 20
-
 0xcc 0x42 # CHECK: b16 132
-
 0x65 0x88 # CHECK: lw $3, 32($gp)
-
 0x45 0x12 # CHECK: lwm16 $16, $17, $ra, 8($sp)
-
 0x45 0x52 # CHECK: swm16 $16, $17, $ra, 8($sp)
-
 0x46 0x88 # CHECK: break16 8
-
 0x46 0xce # CHECK: sdbbp16 14
-
 0x84 0x34 # CHECK: movep $5, $6, $2, $3
-
+0x00 0xe6 0x49 0x10 # CHECK: add $9, $6, $7
+0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x11 0x26 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x31 0x26 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x00 0xe6 0x49 0x50 # CHECK: addu $9, $6, $7
+0x00 0xe6 0x49 0x90 # CHECK: sub $9, $6, $7
+0x00 0xa3 0x21 0xd0 # CHECK: subu $4, $3, $5
+0x00 0xe0 0x31 0x90 # CHECK: sub $6, $zero, $7
+0x00 0xe0 0x31 0xd0 # CHECK: subu $6, $zero, $7
+0x00 0x08 0x39 0x50 # CHECK: addu $7, $8, $zero
+0x00 0xa3 0x1b 0x50 # CHECK: slt $3, $3, $5
+0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x90 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0xb0 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x00 0xa3 0x1b 0x90 # CHECK: sltu $3, $3, $5
+0x41 0xa9 0x45 0x67 # CHECK: lui $9, 17767
+0x00 0xe6 0x4a 0x50 # CHECK: and $9, $6, $7
+0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
+0xd1 0x26 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
+0x51 0x26 0x45 0x67 # CHECK: ori $9, $6, 17767
+0x00 0xa3 0x1b 0x10 # CHECK: xor $3, $3, $5
+0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x71 0x26 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x00 0xe6 0x4a 0xd0 # CHECK: nor $9, $6, $7
+0x00 0x08 0x3a 0xd0 # CHECK: not $7, $8
+0x00 0xe6 0x4a 0x10 # CHECK: mul $9, $6, $7
+0x00 0xe9 0x8b 0x3c # CHECK: mult $9, $7
+0x00 0xe9 0x9b 0x3c # CHECK: multu $9, $7
+0x00 0xe9 0xab 0x3c # CHECK: div $zero, $9, $7
+0x00 0xe9 0xbb 0x3c # CHECK: divu $zero, $9, $7
+0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
+0x00 0x65 0x10 0x10 # CHECK: sllv $2, $3, $5
+0x00 0x83 0x38 0x80 # CHECK: sra $4, $3, 7
+0x00 0x65 0x10 0x90 # CHECK: srav $2, $3, $5
+0x00 0x83 0x38 0x40 # CHECK: srl $4, $3, 7
+0x00 0x65 0x10 0x50 # CHECK: srlv $2, $3, $5
+0x01 0x26 0x38 0xc0 # CHECK: rotr $9, $6, 7
+0x00 0xc7 0x48 0xd0 # CHECK: rotrv $9, $6, $7
+0x1c 0xa4 0x00 0x08 # CHECK: lb $5, 8($4)
+0x14 0xc4 0x00 0x08 # CHECK: lbu $6, 8($4)
+0x3c 0x44 0x00 0x08 # CHECK: lh $2, 8($4)
+0x34 0x82 0x00 0x08 # CHECK: lhu $4, 8($2)
+0xfc 0xc5 0x00 0x04 # CHECK: lw  $6, 4($5)
+0xfc 0xdd 0x00 0x7b # CHECK: lw $6, 123($sp)
+0x18 0xa4 0x00 0x08 # CHECK: sb $5, 8($4)
+0x38 0x44 0x00 0x08 # CHECK: sh  $2, 8($4)
+0xf8 0xa6 0x00 0x04 # CHECK: sw  $5, 4($6)
+0xf8 0xbd 0x00 0x7b # CHECK: sw $5, 123($sp)
+0x60 0x44 0xe0 0x08 # CHECK: lwu $2, 8($4)
+0x60 0x85 0x00 0x10 # CHECK: lwl $4, 16($5)
+0x60 0x85 0x10 0x10 # CHECK: lwr $4, 16($5)
+0x60 0x85 0x80 0x10 # CHECK: swl $4, 16($5)
+0x60 0x85 0x90 0x10 # CHECK: swr $4, 16($5)
+0x00 0xe6 0x48 0x58 # CHECK: movz $9, $6, $7
+0x00 0xe6 0x48 0x18 # CHECK: movn $9, $6, $7
+0x55 0x26 0x09 0x7b # CHECK: movt $9, $6, $fcc0
+0x55 0x26 0x01 0x7b # CHECK: movf $9, $6, $fcc0
+0x00 0x06 0x2d 0x7c # CHECK: mthi   $6
+0x00 0x06 0x0d 0x7c # CHECK: mfhi   $6
+0x00 0x06 0x3d 0x7c # CHECK: mtlo   $6
+0x00 0x06 0x1d 0x7c # CHECK: mflo   $6
+0x00 0xa4 0xcb 0x3c # CHECK: madd   $4, $5
+0x00 0xa4 0xdb 0x3c # CHECK: maddu  $4, $5
+0x00 0xa4 0xeb 0x3c # CHECK: msub   $4, $5
+0x00 0xa4 0xfb 0x3c # CHECK: msubu  $4, $5
+0x01 0x26 0x5b 0x3c # CHECK: clz $9, $6
+0x01 0x26 0x4b 0x3c # CHECK: clo $9, $6
+0x01 0x26 0x2b 0x3c # CHECK: seb $9, $6
+0x01 0x26 0x3b 0x3c # CHECK: seh $9, $6
+0x01 0x26 0x7b 0x3c # CHECK: wsbh $9, $6
+0x01 0x26 0x30 0xec # CHECK: ext $9, $6, 3, 7
+0x01 0x26 0x48 0xcc # CHECK: ins $9, $6, 3, 7
+0xd4 0x00 0x02 0x98 # CHECK: j 1328
+0xf4 0x00 0x02 0x98 # CHECK: jal 1328
+0x03 0xe6 0x0f 0x3c # CHECK: jalr $ra, $6
+0x00 0x07 0x0f 0x3c # CHECK: jr $7
+0x94 0xc9 0x02 0x9a # CHECK: beq $9, $6, 1332
+0x40 0x46 0x02 0x9a # CHECK: bgez $6, 1332
+0x40 0x66 0x02 0x9a # CHECK: bgezal $6, 1332
+0x40 0x26 0x02 0x9a # CHECK: bltzal $6, 1332
+0x40 0xc6 0x02 0x9a # CHECK: bgtz $6, 1332
+0x40 0x86 0x02 0x9a # CHECK: blez $6, 1332
+0xb4 0xc9 0x02 0x9a # CHECK: bne $9, $6, 1332
+0x40 0x06 0x02 0x9a # CHECK: bltz $6, 1332
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x01 0x28 0x02 0x3c # CHECK: tge $8, $9
+0x01 0x28 0x04 0x3c # CHECK: tgeu $8, $9
+0x01 0x28 0x08 0x3c # CHECK: tlt $8, $9
+0x01 0x28 0x0a 0x3c # CHECK: tltu $8, $9
+0x01 0x28 0x0c 0x3c # CHECK: tne $8, $9
+0x41 0xc9 0x45 0x67 # CHECK: teqi $9, 17767
+0x41 0x29 0x45 0x67 # CHECK: tgei $9, 17767
+0x41 0x69 0x45 0x67 # CHECK: tgeiu $9, 17767
+0x41 0x09 0x45 0x67 # CHECK: tlti $9, 17767
+0x41 0x49 0x45 0x67 # CHECK: tltiu $9, 17767
+0x41 0x89 0x45 0x67 # CHECK: tnei $9, 17767
+0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
+0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
+0x00 0x00 0x08 0x00 # CHECK: ssnop
+0x00 0x00 0x18 0x00 # CHECK: ehb
+0x00 0x00 0x28 0x00 # CHECK: pause
+0x60 0x44 0x30 0x08 # CHECK: ll $2, 8($4)
+0x60 0x44 0xb0 0x08 # CHECK: sc $2, 8($4)
+0x00 0x64 0x11 0x18 # CHECK: lwxs $2, $3($4)
+0x42 0x66 0x02 0x9a # CHECK: bgezals $6, 1332
+0x42 0x26 0x02 0x9a # CHECK: bltzals $6, 1332
+0x40 0xe9 0x02 0x9a # CHECK: beqzc $9, 1332
+0x40 0xa9 0x02 0x9a # CHECK: bnezc $9, 1332
+0x74 0x00 0x02 0x98 # CHECK: jals 1328
+0x03 0xe6 0x4f 0x3c # CHECK: jalrs $ra, $6
+0x20 0x44 0x50 0x08 # CHECK: lwm32 $16, $17, 8($4)
+0x21 0x3b 0x59 0x84 # CHECK: lwm32 $16, $17, $18, $19, $20, $21, $22, $23, $fp, -1660($27)
+0x20 0x44 0xd0 0x08 # CHECK: swm32 $16, $17, 8($4)
+0x22 0x04 0x90 0x08 # CHECK: swp $16, 8($4)
+0x22 0x04 0x10 0x08 # CHECK: lwp $16, 8($4)
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x79 0x00 0x00 0x05 # CHECK: addiupc $2, 20
+0x7b 0xbf 0xff 0xff # CHECK: addiupc $7, 16777212
+0x7b 0xc0 0x00 0x00 # CHECK: addiupc $7, -16777216
 0x00 0x00 0x57 0x7c # CHECK: ei
-
 0x00 0x0a 0x57 0x7c # CHECK: ei $10
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x60 0x25 0xa4 0x08 # CHECK: prefe 1, 8($5)
+0x54 0x65 0x09 0xa0 # CHECK: prefx 1, $3($5)
+0x60 0x82 0x62 0x08 # CHECK: lhue $4, 8($2)
+0x60 0x82 0x68 0x08 # CHECK: lbe $4, 8($2)
+0x60 0x82 0x60 0x08 # CHECK: lbue $4, 8($2)
+0x60 0x82 0x6a 0x08 # CHECK: lhe $4, 8($2)
+0x60 0x82 0x6e 0x08 # CHECK: lwe $4, 8($2)
+0x60 0xa4 0xa8 0x08 # CHECK: sbe $5, 8($4)
+0x60 0xa4 0xaa 0x08 # CHECK: she $5, 8($4)
+0x60 0xa4 0xae 0x08 # CHECK: swe $5, 8($4)
+0x63 0x03 0xa2 0x05 # CHECK: swre $24, 5($3)
+0x63 0x03 0xa0 0x05 # CHECK: swle $24, 5($3)
+0x63 0x03 0x66 0x05 # CHECK: lwre $24, 5($3)
+0x63 0x04 0x64 0x02 # CHECK: lwle $24, 2($4)
+0x60 0x44 0x6c 0x08 # CHECK: lle $2, 8($4)
+0x60 0x44 0xac 0x08 # CHECK: sce $2, 8($4)
diff --git a/test/MC/Disassembler/Mips/micromips32r6.txt b/test/MC/Disassembler/Mips/micromips32r6.txt
deleted file mode 100644
index a2691ee6bc3a..000000000000
--- a/test/MC/Disassembler/Mips/micromips32r6.txt
+++ /dev/null
@@ -1,114 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips | FileCheck %s
-
-0x00 0xa4 0x19 0x10 # CHECK: add $3, $4, $5
-
-0x30 0x64 0x04 0xd2 # CHECK: addiu $3, $4, 1234
-
-0x00 0xa4 0x19 0x50 # CHECK: addu $3, $4, $5
-
-0x78 0x80 0x00 0x19 # CHECK: addiupc $4, 100
-
-0x78 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
-
-0x78 0x7e 0xff 0xff # CHECK: auipc $3, -1
-
-0x00 0x43 0x24 0x1f # CHECK: align $4, $2, $3, 2
-
-0x00 0xa4 0x1a 0x50 # CHECK: and $3, $4, $5
-
-0xd0 0x64 0x04 0xd2 # CHECK: andi $3, $4, 1234
-
-0x10 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
-
-0x74 0x40 0x02 0x9a # CHECK: beqzalc $2, 1332
-
-0x7c 0x40 0x02 0x9a # CHECK: bnezalc $2, 1332
-
-0xc0 0x42 0x02 0x9a # CHECK: bgezalc $2, 1332
-
-0xe0 0x40 0x02 0x9a # CHECK: bgtzalc $2, 1332
-
-0xe0 0x42 0x02 0x9a # CHECK: bltzalc $2, 1332
-
-0xc0 0x40 0x02 0x9a # CHECK: blezalc $2, 1332
-
-0xb4 0x37 0x96 0xb8 # CHECK: balc 14572256
-
-0x94 0x37 0x96 0xb8 # CHECK: bc 14572256
-
-0x00 0x44 0x0b 0x3c # CHECK: bitswap $4, $2
-
-0x00 0x00 0x00 0x07 # CHECK: break
-
-0x00 0x07 0x00 0x07 # CHECK: break 7
-
-0x00 0x07 0x01 0x47 # CHECK: break 7, 5
-
-0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
-
-0x01 0x65 0x4b 0x3c # CHECK: clo $11, $5
-
-0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
-
-0x00 0xa4 0x19 0x18 # CHECK: div $3, $4, $5
-
-0x00 0xa4 0x19 0x98 # CHECK: divu $3, $4, $5
-
-0x00 0x00 0x18 0x00 # CHECK: ehb
-
-0x00 0x00 0x57 0x7c # CHECK: ei
-
-0x00 0x0a 0x57 0x7c # CHECK: ei $10
-
-0x00 0x00 0xf3 0x7c # CHECK: eret
-
-0x00 0x01 0xf3 0x7c # CHECK: eretnc
-
-0x80 0x05 0x01 0x00 # CHECK: jialc $5, 256
-
-0xa0 0x05 0x01 0x00 # CHECK: jic $5, 256
-
-0x78 0x48 0x00 0x43 # CHECK: lwpc $2, 268
-
-0x00 0x43 0x26 0x0f # CHECK: lsa $2, $3, $4, 3
-
-0x00 0xa4 0x19 0x58 # CHECK: mod $3, $4, $5
-
-0x00 0xa4 0x19 0xd8 # CHECK: modu $3, $4, $5
-
-0x00 0xa4 0x18 0x18 # CHECK: mul $3, $4, $5
-
-0x00 0xa4 0x18 0x58 # CHECK: muh $3, $4, $5
-
-0x00 0xa4 0x18 0x98 # CHECK: mulu $3, $4, $5
-
-0x00 0xa4,0x18,0xd8 # CHECK: muhu $3, $4, $5
-
-0x00 0x00 0x00 0x00 # CHECK: nop
-
-0x00 0xa4 0x1a 0xd0 # CHECK: nor $3, $4, $5
-
-0x00,0xa4,0x1a,0x90 # CHECK: or $3, $4, $5
-
-0x50 0x64 0x04 0xd2 # CHECK: ori $3, $4, 1234
-
-0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
-
-0x00 0x83 0x11 0x40 # CHECK: seleqz $2, $3, $4
-
-0x00 0x83 0x11 0x80 # CHECK: selnez $2, $3, $4
-
-0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
-
-0x00 0xa4 0x19 0x90 # CHECK: sub $3, $4, $5
-
-0x00 0xa4 0x19 0xd0 # CHECK: subu $3, $4, $5
-
-0x00 0xa4 0x1b 0x10 # CHECK: xor $3, $4, $5
-
-0x70 0x64 0x04 0xd2 # CHECK: xori $3, $4, 1234
-
-0x00 0x64 0x2b 0x3c # CHECK: seb $3, $4
-
-0x00 0x64 0x3b 0x3c # CHECK: seh $3, $4
-
diff --git a/test/MC/Disassembler/Mips/micromips32r6/valid.txt b/test/MC/Disassembler/Mips/micromips32r6/valid.txt
new file mode 100644
index 000000000000..b8e69f833a03
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips32r6/valid.txt
@@ -0,0 +1,258 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r6 -mattr=micromips | FileCheck %s
+
+0x6f 0x83 # CHECK: addiur1sp $7, 4
+0x6f 0x7e # CHECK: addiur2 $6, $7, -1
+0x6f 0x76 # CHECK: addiur2 $6, $7, 12
+0x4c 0xfc # CHECK: addius5 $7, -2
+0x4f 0xff # CHECK: addiusp -1028
+0x4f 0xfd # CHECK: addiusp -1032
+0x4c 0x01 # CHECK: addiusp 1024
+0x4c 0x03 # CHECK: addiusp 1028
+0x4f 0xf9 # CHECK: addiusp -16
+0xcc 0x42 # CHECK: bc16 132
+0x8f 0x0a # CHECK: beqzc16 $6, 20
+0xaf 0x0a # CHECK: bnezc16 $6, 20
+0x45 0x2b # CHECK: jalr $9
+0x45 0x23 # CHECK: jrc16 $9
+0x44 0xb3 # CHECK: jrcaddiusp 20
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x04 0xcc # CHECK: addu16 $6, $17, $4
+0x44 0x21 # CHECK: and16 $16, $2
+0x2e 0x56 # CHECK: andi16 $4, $5, 8
+0x46 0x70 # CHECK: not16 $4, $7
+0x45 0xf9 # CHECK: or16 $3, $7
+0x25 0xe0 # CHECK: sll16 $3, $6, 8
+0x25 0xe1 # CHECK: srl16 $3, $6, 8
+0x00 0xa4 0x19 0x10 # CHECK: add $3, $4, $5
+0x30 0x64 0x04 0xd2 # CHECK: addiu $3, $4, 1234
+0x00 0xa4 0x19 0x50 # CHECK: addu $3, $4, $5
+0x78 0x80 0x00 0x19 # CHECK: addiupc $4, 100
+0x78 0x7f 0x00 0x38 # CHECK: aluipc $3, 56
+0x78 0x7e 0xff 0xff # CHECK: auipc $3, -1
+0x00 0x43 0x24 0x1f # CHECK: align $4, $2, $3, 2
+0x00 0xa4 0x1a 0x50 # CHECK: and $3, $4, $5
+0xd0 0x64 0x04 0xd2 # CHECK: andi $3, $4, 1234
+0x10 0x62 0xff 0xe9 # CHECK: aui $3, $2, -23
+0x74 0x40 0x02 0x9a # CHECK: beqzalc $2, 1332
+0x7c 0x40 0x02 0x9a # CHECK: bnezalc $2, 1332
+0xc0 0x42 0x02 0x9a # CHECK: bgezalc $2, 1332
+0xe0 0x40 0x02 0x9a # CHECK: bgtzalc $2, 1332
+0xe0 0x42 0x02 0x9a # CHECK: bltzalc $2, 1332
+0xc0 0x40 0x02 0x9a # CHECK: blezalc $2, 1332
+0xb4 0x37 0x96 0xb8 # CHECK: balc 7286128
+0x94 0x37 0x96 0xb8 # CHECK: bc 7286128
+0x00 0x44 0x0b 0x3c # CHECK: bitswap $4, $2
+0x00 0x00 0x00 0x07 # CHECK: break
+0x00 0x07 0x00 0x07 # CHECK: break 7
+0x00 0x07 0x01 0x47 # CHECK: break 7, 5
+0x20 0x25 0x60 0x08 # CHECK: cache 1, 8($5)
+0x01 0x65 0x4b 0x3c # CHECK: clo $11, $5
+0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x00 0x00 0xe3 0x7c # CHECK: deret
+0x00 0xa4 0x19 0x18 # CHECK: div $3, $4, $5
+0x00 0xa4 0x19 0x98 # CHECK: divu $3, $4, $5
+0x00 0x00 0x18 0x00 # CHECK: ehb
+0x00 0x00 0x57 0x7c # CHECK: ei
+0x00 0x0a 0x57 0x7c # CHECK: ei $10
+0x00 0x00 0xf3 0x7c # CHECK: eret
+0x00 0x01 0xf3 0x7c # CHECK: eretnc
+0x80 0x05 0x01 0x00 # CHECK: jialc $5, 256
+0xa0 0x05 0x01 0x00 # CHECK: jic $5, 256
+0x78 0x48 0x00 0x43 # CHECK: lwpc $2, 268
+0x00 0x43 0x26 0x0f # CHECK: lsa $2, $3, $4, 4
+0x00 0xa4 0x19 0x58 # CHECK: mod $3, $4, $5
+0x00 0xa4 0x19 0xd8 # CHECK: modu $3, $4, $5
+0x00 0xa4 0x18 0x18 # CHECK: mul $3, $4, $5
+0x00 0xa4 0x18 0x58 # CHECK: muh $3, $4, $5
+0x00 0xa4 0x18 0x98 # CHECK: mulu $3, $4, $5
+0x00 0xa4 0x18 0xd8 # CHECK: muhu $3, $4, $5
+0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0xa4 0x1a 0xd0 # CHECK: nor $3, $4, $5
+0x00 0xa4 0x1a 0x90 # CHECK: or $3, $4, $5
+0x50 0x64 0x04 0xd2 # CHECK: ori $3, $4, 1234
+0x60 0x25 0x20 0x08 # CHECK: pref 1, 8($5)
+0x00 0x83 0x11 0x40 # CHECK: seleqz $2, $3, $4
+0x00 0x83 0x11 0x80 # CHECK: selnez $2, $3, $4
+0x00 0x83 0x38 0x00 # CHECK: sll $4, $3, 7
+0x00 0xa4 0x19 0x90 # CHECK: sub $3, $4, $5
+0x00 0xa4 0x19 0xd0 # CHECK: subu $3, $4, $5
+0x00 0x64 0xf1 0x7c # CHECK: wrpgpr $3, $4
+0x00 0x64 0x7b 0x3c # CHECK: wsbh $3, $4
+0x00 0xa4 0x1b 0x10 # CHECK: xor $3, $4, $5
+0x70 0x64 0x04 0xd2 # CHECK: xori $3, $4, 1234
+0x18 0x85 0x00 0x06 # CHECK: sb  $4, 6($5)
+0x60 0x85 0xa8 0x06 # CHECK: sbe $4, 6($5)
+0x60 0x85 0xac 0x06 # CHECK: sce $4, 6($5)
+0x38 0x85 0x00 0x06 # CHECK: sh $4, 6($5)
+0x60 0x85 0xaa 0x06 # CHECK: she $4, 6($5)
+0x60 0x85 0x6c 0x06 # CHECK: lle $4, 6($5)
+0x60 0x85 0x6e 0x06 # CHECK: lwe $4, 6($5)
+0xfc 0x85 0x00 0x06 # CHECK: lw $4, 6($5)
+0x10 0xc0 0x45 0x67 # CHECK: lui $6, 17767
+0x00 0x64 0x2b 0x3c # CHECK: seb $3, $4
+0x00 0x64 0x3b 0x3c # CHECK: seh $3, $4
+0xf8 0xa6 0x00 0x04 # CHECK: sw $5, 4($6)
+0x60 0xa4 0xae 0x08 # CHECK: swe $5, 8($4)
+0x54 0xa4 0x18 0x30 # CHECK: add.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x30 # CHECK: add.d $f2, $f4, $f6
+0x54 0xa4 0x18 0x70 # CHECK: sub.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x70 # CHECK: sub.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xb0 # CHECK: mul.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xb0 # CHECK: mul.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xf0 # CHECK: div.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xf0 # CHECK: div.d $f2, $f4, $f6
+0x54 0xa4 0x19 0xb8 # CHECK: maddf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xb8 # CHECK: maddf.d $f3, $f4, $f5
+0x54 0xa4 0x19 0xf8 # CHECK: msubf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xf8 # CHECK: msubf.d $f3, $f4, $f5
+0x54 0xc7 0x00 0x7b # CHECK: mov.s $f6, $f7
+0x54 0x86 0x20 0x7b # CHECK: mov.d $f4, $f6
+0x54 0xc7 0x0b 0x7b # CHECK: neg.s $f6, $f7
+0x54 0x86 0x2b 0x7b # CHECK: neg.d $f4, $f6
+0x54 0x64 0x28 0x0b # CHECK: max.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x0b # CHECK: max.d $f5, $f4, $f3
+0x54 0x64 0x28 0x2b # CHECK: maxa.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x2b # CHECK: maxa.d $f5, $f4, $f3
+0x54 0x64 0x28 0x03 # CHECK: min.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x03 # CHECK: min.d $f5, $f4, $f3
+0x54 0x64 0x28 0x23 # CHECK: mina.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x23 # CHECK: mina.d $f5, $f4, $f3
+0x54 0x83 0x10 0x05 # CHECK: cmp.af.s $f2, $f3, $f4
+0x54 0x83 0x10 0x45 # CHECK: cmp.un.s $f2, $f3, $f4
+0x54 0x83 0x10 0x85 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x54 0x83 0x10 0xc5 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x54 0x83 0x11 0x05 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x54 0x83 0x11 0x45 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x54 0x83 0x11 0x85 # CHECK: cmp.le.s $f2, $f3, $f4
+0x54 0x83 0x11 0xc5 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x54 0x83 0x12 0x05 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x54 0x83 0x12 0x45 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x54 0x83 0x12 0x85 # CHECK: cmp.seq.s $f2, $f3, $f4
+0x54 0x83 0x12 0xc5 # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x54 0x83 0x13 0x05 # CHECK: cmp.slt.s $f2, $f3, $f4
+0x54 0x83 0x13 0x45 # CHECK: cmp.sult.s $f2, $f3, $f4
+0x54 0x83 0x13 0x85 # CHECK: cmp.sle.s $f2, $f3, $f4
+0x54 0x83 0x13 0xc5 # CHECK: cmp.sule.s $f2, $f3, $f4
+0x54 0x83 0x10 0x15 # CHECK: cmp.af.d $f2, $f3, $f4
+0x54 0x83 0x10 0x55 # CHECK: cmp.un.d $f2, $f3, $f4
+0x54 0x83 0x10 0x95 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x54 0x83 0x10 0xd5 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x54 0x83 0x11 0x15 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x54 0x83 0x11 0x55 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x54 0x83 0x11 0x95 # CHECK: cmp.le.d $f2, $f3, $f4
+0x54 0x83 0x11 0xd5 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x54 0x83 0x12 0x15 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x54 0x83 0x12 0x55 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x54 0x83 0x12 0x95 # CHECK: cmp.seq.d $f2, $f3, $f4
+0x54 0x83 0x12 0xd5 # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x54 0x83 0x13 0x15 # CHECK: cmp.slt.d $f2, $f3, $f4
+0x54 0x83 0x13 0x55 # CHECK: cmp.sult.d $f2, $f3, $f4
+0x54 0x83 0x13 0x95 # CHECK: cmp.sle.d $f2, $f3, $f4
+0x54 0x83 0x13 0xd5 # CHECK: cmp.sule.d $f2, $f3, $f4
+0x54 0x64 0x01 0x3b # CHECK: cvt.l.s $f3, $f4
+0x54 0x64 0x41 0x3b # CHECK: cvt.l.d $f3, $f4
+0x54 0x64 0x09 0x3b # CHECK: cvt.w.s $f3, $f4
+0x54 0x64 0x49 0x3b # CHECK: cvt.w.d $f3, $f4
+0x54 0x44 0x13 0x7b # CHECK: cvt.d.s $f2, $f4
+0x54 0x44 0x33 0x7b # CHECK: cvt.d.w $f2, $f4
+0x54 0x44 0x53 0x7b # CHECK: cvt.d.l $f2, $f4
+0x54 0x44 0x1b 0x7b # CHECK: cvt.s.d $f2, $f4
+0x54 0x64 0x3b 0x7b # CHECK: cvt.s.w $f3, $f4
+0x54 0x64 0x5b 0x7b # CHECK: cvt.s.l $f3, $f4
+0x54 0x65 0x03 0x7b # CHECK: abs.s $f3, $f5
+0x54 0x44 0x23 0x7b # CHECK: abs.d $f2, $f4
+0x54 0x65 0x03 0x3b # CHECK: floor.l.s $f3, $f5
+0x54 0x44 0x43 0x3b # CHECK: floor.l.d $f2, $f4
+0x54 0x65 0x0b 0x3b # CHECK: floor.w.s $f3, $f5
+0x54 0x44 0x4b 0x3b # CHECK: floor.w.d $f2, $f4
+0x54 0x65 0x13 0x3b # CHECK: ceil.l.s $f3, $f5
+0x54 0x44 0x53 0x3b # CHECK: ceil.l.d $f2, $f4
+0x54 0x65 0x1b 0x3b # CHECK: ceil.w.s $f3, $f5
+0x54 0x44 0x5b 0x3b # CHECK: ceil.w.d $f2, $f4
+0x54 0x65 0x23 0x3b # CHECK: trunc.l.s $f3, $f5
+0x54 0x44 0x63 0x3b # CHECK: trunc.l.d $f2, $f4
+0x54 0x65 0x2b 0x3b # CHECK: trunc.w.s $f3, $f5
+0x54 0x44 0x6b 0x3b # CHECK: trunc.w.d $f2, $f4
+0x54 0x65 0x0a 0x3b # CHECK: sqrt.s $f3, $f5
+0x54 0x44 0x4a 0x3b # CHECK: sqrt.d $f2, $f4
+0x54 0x65 0x02 0x3b # CHECK: rsqrt.s $f3, $f5
+0x54 0x44 0x42 0x3b # CHECK: rsqrt.d $f2, $f4
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x60 0x25 0xa4 0x08 # CHECK: prefe 1, 8($5)
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x00 0xe5 0xf0 0x3c # CHECK: teq $5, $7, 15
+0x01 0x47 0x02 0x3c # CHECK: tge $7, $10
+0x02 0x67 0xf2 0x3c # CHECK: tge $7, $19, 15
+0x03 0x96 0x04 0x3c # CHECK: tgeu $22, $gp
+0x01 0xd4 0xf4 0x3c # CHECK: tgeu $20, $14, 15
+0x01 0xaf 0x08 0x3c # CHECK: tlt $15, $13
+0x02 0x62 0xf8 0x3c # CHECK: tlt $2, $19, 15
+0x02 0x0b 0x0a 0x3c # CHECK: tltu $11, $16
+0x03 0xb0 0xfa 0x3c # CHECK: tltu $16, $sp, 15
+0x02 0x26 0x0c 0x3c # CHECK: tne $6, $17
+0x01 0x07 0xfc 0x3c # CHECK: tne $7, $8, 15
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x04 0xcc # CHECK: addu16 $6, $17, $4
+0x44 0x21 # CHECK: and16 $16, $2
+0x2e 0x56 # CHECK: andi16 $4, $5, 8
+0x46 0x70 # CHECK: not16 $4, $7
+0x45 0xf9 # CHECK: or16 $3, $7
+0x25 0xe0 # CHECK: sll16 $3, $6, 8
+0x25 0xe1 # CHECK: srl16 $3, $6, 8
+0x46 0x1B # CHECK: break16 8
+0xed 0xff # CHECK: li16 $3, -1
+0x0c 0x65 # CHECK: move16 $3, $5
+0x46 0x3b # CHECK: sdbbp16 8
+0x04 0x3b # CHECK: subu16 $5, $16, $3
+0x44 0xd8 # CHECK: xor16 $17, $5
+0x1c 0x85 0x00 0x08 # CHECK: lb $4, 8($5)
+0x14 0x85 0x00 0x08 # CHECK: lbu $4, 8($5)
+0x60 0x85 0x68 0x08 # CHECK: lbe $4, 8($5)
+0x60 0x85 0x60 0x08 # CHECK: lbue $4, 8($5)
+0x00 0x00 0x28 0x00 # CHECK: pause
+0x00 0xbd 0x11 0xc0 # CHECK: rdhwr $5, $29, 2
+0x00 0xbd 0x01 0xc0 # CHECK: rdhwr $5, $29
+0x00 0x00 0x93 0x7c # CHECK: wait
+0x00 0x11 0x93 0x7c # CHECK: wait 17
+0x00 0x00 0x08 0x00 # CHECK: ssnop
+0x00 0x00 0x6b 0x7c # CHECK: sync
+0x00 0x11 0x6b 0x7c # CHECK: sync 17
+0x41 0x85 0x00 0x08 # CHECK: synci 8($5)
+0x00 0x69 0xe1 0x7c # CHECK: rdpgpr $3, $9
+0x00 0x00 0xdb 0x7c # CHECK: sdbbp
+0x00 0x22 0xdb 0x7c # CHECK: sdbbp 34
+0x45 0x22 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x89 0x84 # CHECK: sb16 $3, 4($16)
+0xaa 0x14 # CHECK: sh16 $4, 8($17)
+0xc8 0x9f # CHECK: sw $4, 124($sp)
+0xea 0x11 # CHECK: sw16 $4, 4($17)
+0xe8 0x11 # CHECK: sw16 $zero, 4($17)
+0x45 0x2a # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x54 0x44 0x12 0x3b # CHECK: recip.s $f2, $f4
+0x54 0x44 0x52 0x3b # CHECK: recip.d $f2, $f4
+0x54 0x82 0x00 0x20 # CHECK: rint.s $f2, $f4
+0x54 0x82 0x02 0x20 # CHECK: rint.d $f2, $f4
+0x54 0x44 0x33 0x3b # CHECK: round.l.s $f2, $f4
+0x54 0x44 0x73 0x3b # CHECK: round.l.d $f2, $f4
+0x54 0x44 0x3b 0x3b # CHECK: round.w.s $f2, $f4
+0x54 0x44 0x7b 0x3b # CHECK: round.w.d $f2, $f4
+0x54 0x41 0x08 0xb8 # CHECK: sel.s $f1, $f1, $f2
+0x54 0x82 0x02 0xb8 # CHECK: sel.d $f0, $f2, $f4
+0x54 0x62 0x08 0x38 # CHECK: seleqz.s $f1, $f2, $f3
+0x55 0x04 0x12 0x38 # CHECK: seleqz.d $f2, $f4, $f8
+0x54 0x62 0x08 0x78 # CHECK: selnez.s $f1, $f2, $f3
+0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8
+0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3
+0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4
+0x00 0x00 0x47 0x7c # CHECK: di
+0x00 0x0f 0x47 0x7c # CHECK: di $15
diff --git a/test/MC/Disassembler/Mips/micromips64r6/valid.txt b/test/MC/Disassembler/Mips/micromips64r6/valid.txt
new file mode 100644
index 000000000000..4f2325fe7ce1
--- /dev/null
+++ b/test/MC/Disassembler/Mips/micromips64r6/valid.txt
@@ -0,0 +1,171 @@
+# RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r6 -mattr=micromips | FileCheck %s
+
+0x6f 0x83 # CHECK: addiur1sp $7, 4
+0x6f 0x7e # CHECK: addiur2 $6, $7, -1
+0x6f 0x76 # CHECK: addiur2 $6, $7, 12
+0x4c 0xfc # CHECK: addius5 $7, -2
+0x4f 0xff # CHECK: addiusp -1028
+0x4f 0xfd # CHECK: addiusp -1032
+0x4c 0x01 # CHECK: addiusp 1024
+0x4c 0x03 # CHECK: addiusp 1028
+0x4f 0xf9 # CHECK: addiusp -16
+0xcc 0x42 # CHECK: bc16 132
+0x8f 0x0a # CHECK: beqzc16 $6, 20
+0xaf 0x0a # CHECK: bnezc16 $6, 20
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x45 0x2b # CHECK: jalr $9
+0x45 0x23 # CHECK: jrc16 $9
+0x44 0xb3 # CHECK: jrcaddiusp 20
+0xf0 0x64 0x00 0x05 # CHECK: daui $3, $4, 5
+0x42 0x23 0x00 0x04 # CHECK: dahi $3, 4
+0x42 0x03 0x00 0x04 # CHECK: dati $3, 4
+0x59 0x26 0x30 0xec # CHECK: dext $9, $6, 3, 7
+0x59 0x26 0x30 0xe4 # CHECK: dextm $9, $6, 3, 7
+0x59 0x26 0x30 0xd4 # CHECK: dextu $9, $6, 3, 7
+0x58 0x43 0x25 0x1c # CHECK: dalign $4, $2, $3, 5
+0x58 0x64 0x29 0x18 # CHECK: ddiv $3, $4, $5
+0x58 0x64 0x29 0x58 # CHECK: dmod $3, $4, $5
+0x58 0x64 0x29 0x98 # CHECK: ddivu $3, $4, $5
+0x58 0x64 0x29 0xd8 # CHECK: dmodu $3, $4, $5
+0x54 0xa4 0x18 0x30 # CHECK: add.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x30 # CHECK: add.d $f2, $f4, $f6
+0x54 0xa4 0x18 0x70 # CHECK: sub.s $f3, $f4, $f5
+0x54 0xc4 0x11 0x70 # CHECK: sub.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xb0 # CHECK: mul.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xb0 # CHECK: mul.d $f2, $f4, $f6
+0x54 0xa4 0x18 0xf0 # CHECK: div.s $f3, $f4, $f5
+0x54 0xc4 0x11 0xf0 # CHECK: div.d $f2, $f4, $f6
+0x54 0xa4 0x19 0xb8 # CHECK: maddf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xb8 # CHECK: maddf.d $f3, $f4, $f5
+0x54 0xa4 0x19 0xf8 # CHECK: msubf.s $f3, $f4, $f5
+0x54 0xa4 0x1b 0xf8 # CHECK: msubf.d $f3, $f4, $f5
+0x54 0xc7 0x00 0x7b # CHECK: mov.s $f6, $f7
+0x54 0x86 0x20 0x7b # CHECK: mov.d $f4, $f6
+0x54 0xc7 0x0b 0x7b # CHECK: neg.s $f6, $f7
+0x54 0x86 0x2b 0x7b # CHECK: neg.d $f4, $f6
+0x54 0x64 0x28 0x0b # CHECK: max.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x0b # CHECK: max.d $f5, $f4, $f3
+0x54 0x64 0x28 0x2b # CHECK: maxa.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x2b # CHECK: maxa.d $f5, $f4, $f3
+0x54 0x64 0x28 0x03 # CHECK: min.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x03 # CHECK: min.d $f5, $f4, $f3
+0x54 0x64 0x28 0x23 # CHECK: mina.s $f5, $f4, $f3
+0x54 0x64 0x2a 0x23 # CHECK: mina.d $f5, $f4, $f3
+0x54 0x83 0x10 0x05 # CHECK: cmp.af.s $f2, $f3, $f4
+0x54 0x83 0x10 0x45 # CHECK: cmp.un.s $f2, $f3, $f4
+0x54 0x83 0x10 0x85 # CHECK: cmp.eq.s $f2, $f3, $f4
+0x54 0x83 0x10 0xc5 # CHECK: cmp.ueq.s $f2, $f3, $f4
+0x54 0x83 0x11 0x05 # CHECK: cmp.lt.s $f2, $f3, $f4
+0x54 0x83 0x11 0x45 # CHECK: cmp.ult.s $f2, $f3, $f4
+0x54 0x83 0x11 0x85 # CHECK: cmp.le.s $f2, $f3, $f4
+0x54 0x83 0x11 0xc5 # CHECK: cmp.ule.s $f2, $f3, $f4
+0x54 0x83 0x12 0x05 # CHECK: cmp.saf.s $f2, $f3, $f4
+0x54 0x83 0x12 0x45 # CHECK: cmp.sun.s $f2, $f3, $f4
+0x54 0x83 0x12 0x85 # CHECK: cmp.seq.s $f2, $f3, $f4
+0x54 0x83 0x12 0xc5 # CHECK: cmp.sueq.s $f2, $f3, $f4
+0x54 0x83 0x13 0x05 # CHECK: cmp.slt.s $f2, $f3, $f4
+0x54 0x83 0x13 0x45 # CHECK: cmp.sult.s $f2, $f3, $f4
+0x54 0x83 0x13 0x85 # CHECK: cmp.sle.s $f2, $f3, $f4
+0x54 0x83 0x13 0xc5 # CHECK: cmp.sule.s $f2, $f3, $f4
+0x54 0x83 0x10 0x15 # CHECK: cmp.af.d $f2, $f3, $f4
+0x54 0x83 0x10 0x55 # CHECK: cmp.un.d $f2, $f3, $f4
+0x54 0x83 0x10 0x95 # CHECK: cmp.eq.d $f2, $f3, $f4
+0x54 0x83 0x10 0xd5 # CHECK: cmp.ueq.d $f2, $f3, $f4
+0x54 0x83 0x11 0x15 # CHECK: cmp.lt.d $f2, $f3, $f4
+0x54 0x83 0x11 0x55 # CHECK: cmp.ult.d $f2, $f3, $f4
+0x54 0x83 0x11 0x95 # CHECK: cmp.le.d $f2, $f3, $f4
+0x54 0x83 0x11 0xd5 # CHECK: cmp.ule.d $f2, $f3, $f4
+0x54 0x83 0x12 0x15 # CHECK: cmp.saf.d $f2, $f3, $f4
+0x54 0x83 0x12 0x55 # CHECK: cmp.sun.d $f2, $f3, $f4
+0x54 0x83 0x12 0x95 # CHECK: cmp.seq.d $f2, $f3, $f4
+0x54 0x83 0x12 0xd5 # CHECK: cmp.sueq.d $f2, $f3, $f4
+0x54 0x83 0x13 0x15 # CHECK: cmp.slt.d $f2, $f3, $f4
+0x54 0x83 0x13 0x55 # CHECK: cmp.sult.d $f2, $f3, $f4
+0x54 0x83 0x13 0x95 # CHECK: cmp.sle.d $f2, $f3, $f4
+0x54 0x83 0x13 0xd5 # CHECK: cmp.sule.d $f2, $f3, $f4
+0x54 0x64 0x01 0x3b # CHECK: cvt.l.s $f3, $f4
+0x54 0x64 0x41 0x3b # CHECK: cvt.l.d $f3, $f4
+0x54 0x64 0x09 0x3b # CHECK: cvt.w.s $f3, $f4
+0x54 0x64 0x49 0x3b # CHECK: cvt.w.d $f3, $f4
+0x54 0x44 0x13 0x7b # CHECK: cvt.d.s $f2, $f4
+0x54 0x44 0x33 0x7b # CHECK: cvt.d.w $f2, $f4
+0x54 0x44 0x53 0x7b # CHECK: cvt.d.l $f2, $f4
+0x54 0x44 0x1b 0x7b # CHECK: cvt.s.d $f2, $f4
+0x54 0x64 0x3b 0x7b # CHECK: cvt.s.w $f3, $f4
+0x54 0x64 0x5b 0x7b # CHECK: cvt.s.l $f3, $f4
+0x54 0x65 0x03 0x7b # CHECK: abs.s $f3, $f5
+0x54 0x44 0x23 0x7b # CHECK: abs.d $f2, $f4
+0x54 0x65 0x03 0x3b # CHECK: floor.l.s $f3, $f5
+0x54 0x44 0x43 0x3b # CHECK: floor.l.d $f2, $f4
+0x54 0x65 0x0b 0x3b # CHECK: floor.w.s $f3, $f5
+0x54 0x44 0x4b 0x3b # CHECK: floor.w.d $f2, $f4
+0x54 0x65 0x13 0x3b # CHECK: ceil.l.s $f3, $f5
+0x54 0x44 0x53 0x3b # CHECK: ceil.l.d $f2, $f4
+0x54 0x65 0x1b 0x3b # CHECK: ceil.w.s $f3, $f5
+0x54 0x44 0x5b 0x3b # CHECK: ceil.w.d $f2, $f4
+0x54 0x65 0x23 0x3b # CHECK: trunc.l.s $f3, $f5
+0x54 0x44 0x63 0x3b # CHECK: trunc.l.d $f2, $f4
+0x54 0x65 0x2b 0x3b # CHECK: trunc.w.s $f3, $f5
+0x54 0x44 0x6b 0x3b # CHECK: trunc.w.d $f2, $f4
+0x54 0x65 0x0a 0x3b # CHECK: sqrt.s $f3, $f5
+0x54 0x44 0x4a 0x3b # CHECK: sqrt.d $f2, $f4
+0x54 0x65 0x02 0x3b # CHECK: rsqrt.s $f3, $f5
+0x54 0x44 0x42 0x3b # CHECK: rsqrt.d $f2, $f4
+0x01 0x28 0x00 0x3c # CHECK: teq $8, $9
+0x00 0xe5 0xf0 0x3c # CHECK: teq $5, $7, 15
+0x01 0x47 0x02 0x3c # CHECK: tge $7, $10
+0x02 0x67 0xf2 0x3c # CHECK: tge $7, $19, 15
+0x03 0x96 0x04 0x3c # CHECK: tgeu $22, $gp
+0x01 0xd4 0xf4 0x3c # CHECK: tgeu $20, $14, 15
+0x01 0xaf 0x08 0x3c # CHECK: tlt $15, $13
+0x02 0x62 0xf8 0x3c # CHECK: tlt $2, $19, 15
+0x02 0x0b 0x0a 0x3c # CHECK: tltu $11, $16
+0x03 0xb0 0xfa 0x3c # CHECK: tltu $16, $sp, 15
+0x02 0x26 0x0c 0x3c # CHECK: tne $6, $17
+0x01 0x07 0xfc 0x3c # CHECK: tne $7, $8, 15
+0x60 0x25 0xa6 0x08 # CHECK: cachee 1, 8($5)
+0x00 0x64 0xf1 0x7c # CHECK: wrpgpr $3, $4
+0x00 0x64 0x7b 0x3c # CHECK: wsbh $3, $4
+0x65 0x88 # CHECK: lw $3, 32($gp)
+0x48 0x66 # CHECK: lw $3, 24($sp)
+0x6a 0x12 # CHECK: lw16 $4, 8($17)
+0x29 0x82 # CHECK: lhu16 $3, 4($16)
+0x09 0x94 # CHECK: lbu16 $3, 4($17)
+0x09 0x9f # CHECK: lbu16 $3, -1($17)
+0x46 0x1B # CHECK: break16 8
+0xed 0xff # CHECK: li16 $3, -1
+0x0c 0x65 # CHECK: move16 $3, $5
+0x46 0x3b # CHECK: sdbbp16 8
+0x04 0x3b # CHECK: subu16 $5, $16, $3
+0x44 0xd8 # CHECK: xor16 $17, $5
+0x45 0x22 # CHECK: lwm16 $16, $17, $ra, 8($sp)
+0x89 0x84 # CHECK: sb16 $3, 4($16)
+0xaa 0x14 # CHECK: sh16 $4, 8($17)
+0xc8 0x9f # CHECK: sw $4, 124($sp)
+0xea 0x11 # CHECK: sw16 $4, 4($17)
+0xe8 0x11 # CHECK: sw16 $zero, 4($17)
+0x45 0x2a # CHECK: swm16 $16, $17, $ra, 8($sp)
+0x54 0x44 0x12 0x3b # CHECK: recip.s $f2, $f4
+0x54 0x44 0x52 0x3b # CHECK: recip.d $f2, $f4
+0x54 0x82 0x00 0x20 # CHECK: rint.s $f2, $f4
+0x54 0x82 0x02 0x20 # CHECK: rint.d $f2, $f4
+0x54 0x44 0x33 0x3b # CHECK: round.l.s $f2, $f4
+0x54 0x44 0x73 0x3b # CHECK: round.l.d $f2, $f4
+0x54 0x44 0x3b 0x3b # CHECK: round.w.s $f2, $f4
+0x54 0x44 0x7b 0x3b # CHECK: round.w.d $f2, $f4
+0x54 0x41 0x08 0xb8 # CHECK: sel.s $f1, $f1, $f2
+0x54 0x82 0x02 0xb8 # CHECK: sel.d $f0, $f2, $f4
+0x54 0x62 0x08 0x38 # CHECK: seleqz.s $f1, $f2, $f3
+0x55 0x04 0x12 0x38 # CHECK: seleqz.d $f2, $f4, $f8
+0x54 0x62 0x08 0x78 # CHECK: selnez.s $f1, $f2, $f3
+0x55 0x04 0x12 0x78 # CHECK: selnez.d $f2, $f4, $f8
+0x54 0x62 0x00 0x60 # CHECK: class.s $f2, $f3
+0x54 0x82 0x02 0x60 # CHECK: class.d $f2, $f4
+0x00 0x00 0xe3 0x7c # CHECK: deret
+0x00 0x00 0x47 0x7c # CHECK: di
+0x00 0x0f 0x47 0x7c # CHECK: di $15
diff --git a/test/MC/Disassembler/Mips/mips-dsp.txt b/test/MC/Disassembler/Mips/mips-dsp.txt
deleted file mode 100644
index 3f60ae1f8e65..000000000000
--- a/test/MC/Disassembler/Mips/mips-dsp.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-# RUN: llvm-mc -triple=mipsel-unknown-linux -mattr=+dsp -disassemble < %s | FileCheck %s
-
-# CHECK: mfhi $21, $ac3
-0x10 0xa8 0x60 0x00
-
-# CHECK: mflo $21, $ac3
-0x12 0xa8 0x60 0x00
-
-# CHECK: mthi $21, $ac3
-0x11 0x18 0xa0 0x02
-
-# CHECK: mtlo $21, $ac3
-0x13 0x18 0xa0 0x02
-
-# CHECK: lbux $10, $20($26)
-0x8a 0x51 0x54 0x7f
-
-# CHECK: lhx  $11, $21($27)
-0x0a 0x59 0x75 0x7f
-
-# CHECK: lwx  $12, $22($gp)
-0x0a 0x60 0x96 0x7f
diff --git a/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt
new file mode 100644
index 000000000000..c53091363965
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/invalid-xfail.txt
@@ -0,0 +1,11 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028  # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips1/invalid.txt b/test/MC/Disassembler/Mips/mips1/invalid.txt
new file mode 100644
index 000000000000..a8fc4228f269
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips1/invalid.txt
@@ -0,0 +1,45 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 2>&1 | FileCheck %s
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+0x00 0x11 0x00 0x0f # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x00 0x30 0xc0 0x42 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x00 0xab 0x09 0x4a # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x02 0x80 0x44 0xf0 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x02 0xc5 0x40 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x03 0x21 0x22 0xd5 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x03 0xa0 0x08 0x13 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x04 0x1c 0x63 0xee # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x40 0x3c 0x00 0x5d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x42 0x02 0x00 0x27 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x42 0x1d 0x60 0x25 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x43 0xa2 0x00 0x18 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x44 0x20 0x86 0x06 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x44 0xe6 0xd0 0x04 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x03 0x80 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0xe9 0xce 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x03 0x7a # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x1a 0xa5 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0x78 0x4f # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0xe5 0xe1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x00 0xe6 0xcc # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x20 0x75 0x46 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x21 0xaa 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x21 0xaa 0x11 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x30 0x14 0xc1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x46 0x3c 0xe8 0x3b # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4c 0x00 0x3b 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x09 0x92 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x20 0x03 0x21 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x4d 0x20 0x03 0x8d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x56 0x28 0x40 0x0d # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x5e 0x03 0xc8 0x13 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x73 0x11 0x00 0x01 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x75 0x68 0x90 0xf3 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x7d 0x00 0xa0 0x71 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xbe 0x03 0x46 0x40 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xd8 0x07 0x44 0x00 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xde 0xe3 0x06 0xef # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xe0 0x46 0x3b 0x29 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xf7 0x06 0xdc 0xef # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
index dba949acd66a..869d909621a1 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1-el.txt
@@ -58,6 +58,8 @@
 0x12 0x88 0x00 0x00 # CHECK: mflo $17
 0x06 0x75 0x20 0x46 # CHECK: mov.d $f20, $f14
 0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
 0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
 0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
diff --git a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
index 59e702e17e1c..a34327aaa766 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-mips1.txt
@@ -16,11 +16,13 @@
 0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
 0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
 0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
 0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
 0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
 0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
@@ -43,6 +45,9 @@
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
 0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
 0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
 0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
diff --git a/test/MC/Disassembler/Mips/mips1/valid-xfail.txt b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
index 759097cc3751..615bf51addfa 100644
--- a/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
+++ b/test/MC/Disassembler/Mips/mips1/valid-xfail.txt
@@ -1,4 +1,12 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips1 | FileCheck %s
 # XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
 0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
 0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt
new file mode 100644
index 000000000000..b3fd1c49edc4
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028  # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
index 806040937b77..5bdf8f17eb68 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2-el.txt
@@ -74,6 +74,8 @@
 0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
 0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
 0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
 0x11 0x00 0x20 0x02 # CHECK: mthi $17
 0x13 0x00 0xa0 0x03 # CHECK: mtlo $sp
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
index 268bb2900903..570f8ddb47c3 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
@@ -1,9 +1,15 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
 0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
 0x00 0x00 0x00 0xc0 # CHECK: ehb
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
 0x00 0x00 0x88 0x12 # CHECK: mflo $17
 0x00 0x00 0x98 0x10 # CHECK: mfhi $19
 0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -14,11 +20,15 @@
 0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
 0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
 0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
 0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
 0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
 0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -26,6 +36,8 @@
 0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
 0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
 0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
 0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -38,6 +50,7 @@
 0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
 0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
 0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
 0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
@@ -53,9 +66,11 @@
 0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
 0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
 0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
 0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
 0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -65,6 +80,9 @@
 0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
 0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
 0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
 0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
 0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -78,7 +96,9 @@
 0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
 0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
 0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x42 0x00 0x00 0x01 # CHECK: tlbr
 0x42 0x00 0x00 0x02 # CHECK: tlbwi
 0x42 0x00 0x00 0x06 # CHECK: tlbwr
diff --git a/test/MC/Disassembler/Mips/mips2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips2/valid-xfail.txt
new file mode 100644
index 000000000000..5497515d0090
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips2/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt
new file mode 100644
index 000000000000..cfb7d801e911
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/invalid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028  # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0xce 0x00 0x00 0x00 # pref 0, 0($16)    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
index 98ce16bb7429..6ffe7748e950 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3-el.txt
@@ -118,6 +118,8 @@
 0x86 0xd8 0x00 0x46 # CHECK: mov.s $f2, $f27
 0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
 0x00 0x48 0x9e 0x44 # CHECK: mtc1 $fp, $f9
 0x11 0x00 0x20 0x02 # CHECK: mthi $17
 0x13 0x00 0xa0 0x03 # CHECK: mtlo $sp
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
index 2a38b19092f0..db83b50aa01a 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
@@ -1,12 +1,19 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
 0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
 0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
 0x00 0x00 0x88 0x12 # CHECK: mflo $17
 0x00 0x00 0x98 0x10 # CHECK: mfhi $19
 0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -21,6 +28,8 @@
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
 0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
 0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
 0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
@@ -30,11 +39,14 @@
 0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
 0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
 0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
 0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x2d # CHECK: move $fp, $4
 0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -44,6 +56,9 @@
 0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
+0x00 0xc0 0xc8 0x2d # CHECK: move $25, $6
 0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
 0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -59,6 +74,7 @@
 0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
 0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
 0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
 0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
@@ -81,9 +97,11 @@
 0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
 0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
 0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
 0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
 0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -93,6 +111,9 @@
 0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
 0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
 0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
 0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
 0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -106,7 +127,10 @@
 0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
 0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
 0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3a 0x00 0x27 0x12 # CHECK: xori $zero, $16, 10002       
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x42 0x00 0x00 0x01 # CHECK: tlbr
 0x42 0x00 0x00 0x02 # CHECK: tlbwi
 0x42 0x00 0x00 0x06 # CHECK: tlbwr
@@ -195,6 +219,7 @@
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
 0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
+0xbf 0x00 0xe2 0x1c # CHECK: cache 0, -7652($24)           
 0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
diff --git a/test/MC/Disassembler/Mips/mips3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips3/valid-xfail.txt
new file mode 100644
index 000000000000..cd685deda2e5
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips3/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt
new file mode 100644
index 000000000000..77d2a152d439
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
index f2299732a80e..0801ad7fe61b 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32-el.txt
@@ -7,7 +7,9 @@
 0x67 0x45 0xc9 0x20 # CHECK: addi $9, $6, 17767
 0x67 0xc5 0xc9 0x24 # CHECK: addiu $9, $6, -15001
 0x21 0x48 0xc7 0x00 # CHECK: addu $9, $6, $7
+0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x24 0x48 0xc7 0x00 # CHECK: and $9, $6, $7
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x67 0x45 0xc9 0x30 # CHECK: andi $9, $6, 17767
 0x4c 0x01 0x00 0x10 # CHECK: b 1332
 0x4c 0x01 0x00 0x45 # CHECK: bc1f 1332
@@ -87,11 +89,20 @@
 0x00 0x00 0xc7 0x70 # CHECK: madd $6, $7
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
 0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x04 0x80 0x08 0x40 # CHECK: mfc0 $8, $16, 4
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
 0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
+0x25,0x38,0x00,0x01 # CHECK: move $7, $8
+0x25,0x18,0x40,0x00 # CHECK: move $3, $2
+0x01,0x18,0x5c,0x00 # CHECK: movf $3, $2, $fcc7
+0x11,0x11,0x3c,0x46 # CHECK: movf.d $f4, $f2, $fcc7
+0x11,0x11,0x1c,0x46 # CHECK: movf.s $f4, $f2, $fcc7
+0x01,0x18,0x5d,0x00 # CHECK: movt $3, $2, $fcc7
+0x11,0x11,0x3d,0x46 # CHECK: movt.d $f4, $f2, $fcc7
+0x11,0x11,0x1d,0x46 # CHECK: movt.s $f4, $f2, $fcc7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
 0x05 0x00 0xc7 0x70 # CHECK: msubu $6, $7
 0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
diff --git a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
index 09f1e56fff43..f71b2a16fcfc 100644
--- a/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
+++ b/test/MC/Disassembler/Mips/mips32/valid-mips32.txt
@@ -1,11 +1,39 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux | FileCheck %s
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
 0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
 0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
@@ -15,23 +43,80 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,24 +124,55 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
 0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
 0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -67,6 +183,17 @@
 0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -85,10 +212,17 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
 0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
 0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -98,6 +232,14 @@
 0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -117,15 +259,29 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
 0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
 0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
@@ -134,25 +290,49 @@
                     # CHECK: rdhwr $5, $29
                     # CHECK: .set pop
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
 0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
 0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
 0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32/valid-xfail.txt
new file mode 100644
index 000000000000..9a72e0583964
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32_le.txt b/test/MC/Disassembler/Mips/mips32_le.txt
deleted file mode 100644
index c019c41bd120..000000000000
--- a/test/MC/Disassembler/Mips/mips32_le.txt
+++ /dev/null
@@ -1,450 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux | FileCheck %s
-# CHECK: abs.d $f12, $f14
-0x05 0x73 0x20 0x46
-
-# CHECK: abs.s $f6, $f7
-0x85 0x39 0x00 0x46
-
-# CHECK: add $9, $6, $7
-0x20 0x48 0xc7 0x00
-
-# CHECK: add.d $f8, $f12, $f14
-0x00 0x62 0x2e 0x46
-
-# CHECK: add.s $f9, $f6, $f7
-0x40 0x32 0x07 0x46
-
-# CHECK: addi $9, $6, 17767
-0x67 0x45 0xc9 0x20
-
-# CHECK: addiu $9, $6, -15001
-0x67 0xc5 0xc9 0x24
-
-# CHECK: addu $9, $6, $7
-0x21 0x48 0xc7 0x00
-
-# CHECK: and $9, $6, $7
-0x24 0x48 0xc7 0x00
-
-# CHECK: andi $9, $6, 17767
-0x67 0x45 0xc9 0x30
-
-# CHECK: b 1332
-0x4c 0x01 0x00 0x10
-
-# CHECK: bc1f 1332
-0x4c 0x01 0x00 0x45
-
-# CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x1c 0x45
-
-# CHECK: bc1t 1332
-0x4c 0x01 0x01 0x45
-
-# CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x1d 0x45
-
-# CHECK: beq $9, $6, 1332
-0x4c 0x01 0x26 0x11
-
-# CHECK: bgez  $6, 1332
-0x4c 0x01 0xc1 0x04
-
-# CHECK: bgezal  $6, 1332
-0x4c 0x01 0xd1 0x04
-
-# CHECK: bgtz  $6, 1332
-0x4c 0x01 0xc0 0x1c
-
-# CHECK: blez  $6, 1332
-0x4c 0x01 0xc0 0x18
-
-# CHECK: bne $9, $6, 1332
-0x4c 0x01 0x26 0x15
-
-# CHECK: c.eq.d $f12, $f14
-0x32 0x60 0x2e 0x46
-
-# CHECK: c.eq.s $f6, $f7
-0x32 0x30 0x07 0x46
-
-# CHECK: c.f.d $f12, $f14
-0x30 0x60 0x2e 0x46
-
-# CHECK: c.f.s $f6, $f7
-0x30 0x30 0x07 0x46
-
-# CHECK: c.le.d $f12, $f14
-0x3e 0x60 0x2e 0x46
-
-# CHECK: c.le.s $f6, $f7
-0x3e 0x30 0x07 0x46
-
-# CHECK: c.lt.d $f12, $f14
-0x3c 0x60 0x2e 0x46
-
-# CHECK: c.lt.s $f6, $f7
-0x3c 0x30 0x07 0x46
-
-# CHECK: c.nge.d $f12, $f14
-0x3d 0x60 0x2e 0x46
-
-# CHECK: c.nge.s $f6, $f7
-0x3d 0x30 0x07 0x46
-
-# CHECK: c.ngl.d $f12, $f14
-0x3b 0x60 0x2e 0x46
-
-# CHECK: c.ngl.s $f6, $f7
-0x3b 0x30 0x07 0x46
-
-# CHECK: c.ngle.d $f12, $f14
-0x39 0x60 0x2e 0x46
-
-# CHECK: c.ngle.s $f6, $f7
-0x39 0x30 0x07 0x46
-
-# CHECK: c.ngt.d $f12, $f14
-0x3f 0x60 0x2e 0x46
-
-# CHECK: c.ngt.s $f6, $f7
-0x3f 0x30 0x07 0x46
-
-# CHECK: c.ole.d $f12, $f14
-0x36 0x60 0x2e 0x46
-
-# CHECK: c.ole.s $f6, $f7
-0x36 0x30 0x07 0x46
-
-# CHECK: c.olt.d $f12, $f14
-0x34 0x60 0x2e 0x46
-
-# CHECK: c.olt.s $f6, $f7
-0x34 0x30 0x07 0x46
-
-# CHECK: c.seq.d $f12, $f14
-0x3a 0x60 0x2e 0x46
-
-# CHECK: c.seq.s $f6, $f7
-0x3a 0x30 0x07 0x46
-
-# CHECK: c.sf.d $f12, $f14
-0x38 0x60 0x2e 0x46
-
-# CHECK: c.sf.s $f6, $f7
-0x38 0x30 0x07 0x46
-
-# CHECK: c.ueq.d $f12, $f14
-0x33 0x60 0x2e 0x46
-
-# CHECK: c.ueq.s $f28, $f18
-0x33 0xe0 0x12 0x46
-
-# CHECK: c.ule.d $f12, $f14
-0x37 0x60 0x2e 0x46
-
-# CHECK: c.ule.s $f6, $f7
-0x37 0x30 0x07 0x46
-
-# CHECK: c.ult.d $f12, $f14
-0x35 0x60 0x2e 0x46
-
-# CHECK: c.ult.s $f6, $f7
-0x35 0x30 0x07 0x46
-
-# CHECK: c.un.d $f12, $f14
-0x31 0x60 0x2e 0x46
-
-# CHECK: c.un.s $f6, $f7
-0x31 0x30 0x07 0x46
-
-# CHECK: ceil.w.d $f12, $f14
-0x0e 0x73 0x20 0x46
-
-# CHECK: ceil.w.s $f6, $f7
-0x8e 0x39 0x00 0x46
-
-# CHECK: cfc1  $6, $7
-0x00 0x38 0x46 0x44
-
-# CHECK: clo  $6, $7
-0x21 0x30 0xe6 0x70
-
-# CHECK: clz  $6, $7
-0x20 0x30 0xe6 0x70
-
-# CHECK: ctc1  $6, $7
-0x00 0x38 0xc6 0x44
-
-# CHECK: cvt.d.s $f6, $f7
-0xa1 0x39 0x00 0x46
-
-# CHECK: cvt.d.w $f12, $f14
-0x21 0x73 0x80 0x46
-
-# CHECK: cvt.s.d $f12, $f14
-0x20 0x73 0x20 0x46
-
-# CHECK: cvt.s.w $f6, $f7
-0xa0 0x39 0x80 0x46
-
-# CHECK: cvt.w.d $f12, $f14
-0x24 0x73 0x20 0x46
-
-# CHECK: cvt.w.s $f6, $f7
-0xa4 0x39 0x00 0x46
-
-# CHECK: floor.w.d $f12, $f14
-0x0f 0x73 0x20 0x46
-
-# CHECK: floor.w.s $f6, $f7
-0x8f 0x39 0x00 0x46
-
-# CHECK: j 1328
-0x4c 0x01 0x00 0x08
-
-# CHECK: jal 1328
-0x4c 0x01 0x00 0x0c
-
-# CHECK: jalx 1328
-0x4c 0x01 0x00 0x74
-
-# CHECK: jalr  $7
-0x09 0xf8 0xe0 0x00
-
-# CHECK: jr  $7
-0x08 0x00 0xe0 0x00
-
-# CHECK: lb  $4, 9158($5)
-0xc6 0x23 0xa4 0x80
-
-# CHECK: lbu $4, 6($5)
-0x06 0x00 0xa4 0x90
-
-# CHECK: ldc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xd4
-
-# CHECK: lh  $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: lh  $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: ll  $9, 9158($7)
-0xc6 0x23 0xe9 0xc0
-
-# CHECK: lui  $6, 17767
-0x67 0x45 0x06 0x3c
-
-# CHECK: lw  $4, 24($5)
-0x18 0x00 0xa4 0x8c
-
-# CHECK: lwc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xc4
-
-# CHECK: lwl   $2,  3($4)
-0x03 0x00 0x82 0x88
-
-# CHECK: lwr   $3, 16($5)
-0x10 0x00 0xa3 0x98
-
-# CHECK: madd   $6,  $7
-0x00 0x00 0xc7 0x70
-
-# CHECK: maddu  $6,  $7
-0x01 0x00 0xc7 0x70
-
-# CHECK: mfc0 $8, $16, 4
-0x04 0x80 0x08 0x40
-
-# CHECK: mfc1   $6, $f7
-0x00 0x38 0x06 0x44
-
-# CHECK: mfhi  $5
-0x10 0x28 0x00 0x00
-
-# CHECK: mflo  $5
-0x12 0x28 0x00 0x00
-
-# CHECK: mov.d $f6, $f8
-0x86 0x41 0x20 0x46
-
-# CHECK: mov.s $f6, $f7
-0x86 0x39 0x00 0x46
-
-# CHECK: move $7, $8
-0x21,0x38,0x00,0x01
-
-# CHECK: move $3, $2
-0x25,0x18,0x40,0x00
-
-# CHECK: movf $3, $2, $fcc7
-0x01,0x18,0x5c,0x00
-
-# CHECK: movf.d $f4, $f2, $fcc7
-0x11,0x11,0x3c,0x46
-
-# CHECK: movf.s $f4, $f2, $fcc7
-0x11,0x11,0x1c,0x46
-
-# CHECK: movt $3, $2, $fcc7
-0x01,0x18,0x5d,0x00
-
-# CHECK: movt.d $f4, $f2, $fcc7
-0x11,0x11,0x3d,0x46
-
-# CHECK: movt.s $f4, $f2, $fcc7
-0x11,0x11,0x1d,0x46
-
-# CHECK: msub   $6,  $7
-0x04 0x00 0xc7 0x70
-
-# CHECK: msubu  $6,  $7
-0x05 0x00 0xc7 0x70
-
-# CHECK: mtc0 $9, $15, 1
-0x01 0x78 0x89 0x40
-
-# CHECK: mtc1   $6, $f7
-0x00 0x38 0x86 0x44
-
-# CHECK: mthi   $7
-0x11 0x00 0xe0 0x00
-
-# CHECK: mtlo   $7
-0x13 0x00 0xe0 0x00
-
-# CHECK: mul.d $f8, $f12, $f14
-0x02 0x62 0x2e 0x46
-
-# CHECK: mul.s $f9, $f6, $f7
-0x42 0x32 0x07 0x46
-
-# CHECK: mul $9,  $6,  $7
-0x02 0x48 0xc7 0x70
-
-# CHECK: mult  $3, $5
-0x18 0x00 0x65 0x00
-
-# CHECK: multu $3, $5
-0x19 0x00 0x65 0x00
-
-# CHECK: neg.d $f12, $f14
-0x07 0x73 0x20 0x46
-
-# CHECK: neg.s $f6, $f7
-0x87 0x39 0x00 0x46
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9,  $6, $7
-0x27 0x48 0xc7 0x00
-
-# CHECK: or  $3, $3, $5
-0x25 0x18 0x65 0x00
-
-# CHECK: ori $9,  $6, 17767
-0x67 0x45 0xc9 0x34
-
-# CHECK: round.w.d $f12, $f14
-0x0c 0x73 0x20 0x46
-
-# CHECK: round.w.s $f6, $f7
-0x8c 0x39 0x00 0x46
-
-# CHECK: sb  $4, 9158($5)
-0xc6 0x23 0xa4 0xa0
-
-# CHECK: sb  $4, 6($5)
-0x06 0x00 0xa4 0xa0
-
-# CHECK: sc  $9, 9158($7)
-0xc6 0x23 0xe9 0xe0
-
-# CHECK: sdc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xf4
-
-# CHECK: sh  $4, 9158($5)
-0xc6 0x23 0xa4 0xa4
-
-# CHECK: sll $4, $3, 7
-0xc0 0x21 0x03 0x00
-
-# CHECK: sllv  $2, $3, $5
-0x04 0x10 0xa3 0x00
-
-# CHECK: slt $3, $3, $5
-0x2a 0x18 0x65 0x00
-
-# CHECK: slti  $3, $3, 103
-0x67 0x00 0x63 0x28
-
-# CHECK: sltiu $3, $3, 103
-0x67 0x00 0x63 0x2c
-
-# CHECK: sltu  $3, $3, $5
-0x2b 0x18 0x65 0x00
-
-# CHECK: sqrt.d  $f12, $f14
-0x04 0x73 0x20 0x46
-
-# CHECK: sqrt.s  $f6, $f7
-0x84 0x39 0x00 0x46
-
-# CHECK: sra $4, $3, 7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav  $2, $3, $5
-0x07 0x10 0xa3 0x00
-
-# CHECK: srl $4, $3, 7
-0xc2 0x21 0x03 0x00
-
-# CHECK: srlv  $2, $3, $5
-0x06 0x10 0xa3 0x00
-
-# CHECK: sub.d $f8, $f12, $f14
-0x01 0x62 0x2e 0x46
-
-# CHECK: sub.s $f9, $f6, $f7
-0x41 0x32 0x07 0x46
-
-# CHECK: sub $9,  $6, $7
-0x22 0x48 0xc7 0x00
-
-# CHECK: subu  $4, $3, $5
-0x23 0x20 0x65 0x00
-
-# CHECK: sw  $4, 24($5)
-0x18 0x00 0xa4 0xac
-
-# CHECK: swc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xe4
-
-# CHECK: swl $4,  16($5)
-0x10 0x00 0xa4 0xa8
-
-# CHECK: swr $6, 16($7)
-0x10 0x00 0xe6 0xb8
-
-# CHECK: sync  7
-0xcf 0x01 0x00 0x00
-
-# CHECK: trunc.w.d $f12, $f14
-0x0d 0x73 0x20 0x46
-
-# CHECK: trunc.w.s $f6, $f7
-0x8d 0x39 0x00 0x46
-
-# CHECK: xor $3, $3, $5
-0x26 0x18 0x65 0x00
-
-# CHECK: xori  $9,  $6, 17767
-0x67 0x45 0xc9 0x38
-
-# CHECK: .set    push
-# CHECK: .set    mips32r2
-# CHECK: rdhwr   $5, $29
-# CHECK: .set    pop
-0x3b 0xe8 0x05 0x7c
diff --git a/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt
new file mode 100644
index 000000000000..82503c05b718
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
index c487b6d0e960..82a883557e87 100644
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2-el.txt
@@ -102,6 +102,7 @@
 0x60 0x98 0xf9 0x4f # CHECK: madd.s $f1, $f31, $f19, $f25
 0x01 0x00 0xc7 0x70 # CHECK: maddu $6, $7
 0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
+0x04 0x80 0x08 0x40 # CHECK: mfc0 $8, $16, 4
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
@@ -172,3 +173,4 @@
 0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
 0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
 0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
+0x4c 0x1d 0x7f 0x06 # CHECK: synci 7500($19)
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
index d01384752907..acce76bcfddd 100644
--- a/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-mips32r2.txt
@@ -3,13 +3,43 @@
 # an effect on the disassembler behaviour.
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips32r2 | FileCheck %s
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -17,25 +47,82 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
 0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -43,12 +130,28 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
 0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
@@ -57,16 +160,31 @@
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
 0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -78,6 +196,17 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -96,8 +225,17 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -108,6 +246,14 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -127,8 +273,19 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
 0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -143,34 +300,72 @@
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7                
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt
new file mode 100644
index 000000000000..c10faca4d32c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r2/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r2_le.txt b/test/MC/Disassembler/Mips/mips32r2_le.txt
deleted file mode 100644
index faaed7cb3453..000000000000
--- a/test/MC/Disassembler/Mips/mips32r2_le.txt
+++ /dev/null
@@ -1,459 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mipsel-unknown-linux -mcpu=mips32r2 | FileCheck %s
-# CHECK: abs.d $f12, $f14
-0x05 0x73 0x20 0x46
-
-# CHECK: abs.s $f6, $f7
-0x85 0x39 0x00 0x46
-
-# CHECK: add $9, $6, $7
-0x20 0x48 0xc7 0x00
-
-# CHECK: add.d $f8, $f12, $f14
-0x00 0x62 0x2e 0x46
-
-# CHECK: add.s $f9, $f6, $f7
-0x40 0x32 0x07 0x46
-
-# CHECK: addi $9, $6, 17767
-0x67 0x45 0xc9 0x20
-
-# CHECK: addiu $9, $6, -15001
-0x67 0xc5 0xc9 0x24
-
-# CHECK: addu $9, $6, $7
-0x21 0x48 0xc7 0x00
-
-# CHECK: and $9, $6, $7
-0x24 0x48 0xc7 0x00
-
-# CHECK: andi $9, $6, 17767
-0x67 0x45 0xc9 0x30
-
-# CHECK: b 1332
-0x4c 0x01 0x00 0x10
-
-# CHECK: bc1f 1332
-0x4c 0x01 0x00 0x45
-
-# CHECK: bc1f $fcc7, 1332
-0x4c 0x01 0x1c 0x45
-
-# CHECK: bc1t 1332
-0x4c 0x01 0x01 0x45
-
-# CHECK: bc1t $fcc7, 1332
-0x4c 0x01 0x1d 0x45
-
-# CHECK: beq $9, $6, 1332
-0x4c 0x01 0x26 0x11
-
-# CHECK: bgez  $6, 1332
-0x4c 0x01 0xc1 0x04
-
-# CHECK: bgezal  $6, 1332
-0x4c 0x01 0xd1 0x04
-
-# CHECK: bgtz  $6, 1332
-0x4c 0x01 0xc0 0x1c
-
-# CHECK: blez  $6, 1332
-0x4c 0x01 0xc0 0x18
-
-# CHECK: bne $9, $6, 1332
-0x4c 0x01 0x26 0x15
-
-# CHECK: c.eq.d $f12, $f14
-0x32 0x60 0x2e 0x46
-
-# CHECK: c.eq.s $f6, $f7
-0x32 0x30 0x07 0x46
-
-# CHECK: c.f.d $f12, $f14
-0x30 0x60 0x2e 0x46
-
-# CHECK: c.f.s $f6, $f7
-0x30 0x30 0x07 0x46
-
-# CHECK: c.le.d $f12, $f14
-0x3e 0x60 0x2e 0x46
-
-# CHECK: c.le.s $f6, $f7
-0x3e 0x30 0x07 0x46
-
-# CHECK: c.lt.d $f12, $f14
-0x3c 0x60 0x2e 0x46
-
-# CHECK: c.lt.s $f6, $f7
-0x3c 0x30 0x07 0x46
-
-# CHECK: c.nge.d $f12, $f14
-0x3d 0x60 0x2e 0x46
-
-# CHECK: c.nge.s $f6, $f7
-0x3d 0x30 0x07 0x46
-
-# CHECK: c.ngl.d $f12, $f14
-0x3b 0x60 0x2e 0x46
-
-# CHECK: c.ngl.s $f6, $f7
-0x3b 0x30 0x07 0x46
-
-# CHECK: c.ngle.d $f12, $f14
-0x39 0x60 0x2e 0x46
-
-# CHECK: c.ngle.s $f6, $f7
-0x39 0x30 0x07 0x46
-
-# CHECK: c.ngt.d $f12, $f14
-0x3f 0x60 0x2e 0x46
-
-# CHECK: c.ngt.s $f6, $f7
-0x3f 0x30 0x07 0x46
-
-# CHECK: c.ole.d $f12, $f14
-0x36 0x60 0x2e 0x46
-
-# CHECK: c.ole.s $f6, $f7
-0x36 0x30 0x07 0x46
-
-# CHECK: c.olt.d $f12, $f14
-0x34 0x60 0x2e 0x46
-
-# CHECK: c.olt.s $f6, $f7
-0x34 0x30 0x07 0x46
-
-# CHECK: c.seq.d $f12, $f14
-0x3a 0x60 0x2e 0x46
-
-# CHECK: c.seq.s $f6, $f7
-0x3a 0x30 0x07 0x46
-
-# CHECK: c.sf.d $f12, $f14
-0x38 0x60 0x2e 0x46
-
-# CHECK: c.sf.s $f6, $f7
-0x38 0x30 0x07 0x46
-
-# CHECK: c.ueq.d $f12, $f14
-0x33 0x60 0x2e 0x46
-
-# CHECK: c.ueq.s $f28, $f18
-0x33 0xe0 0x12 0x46
-
-# CHECK: c.ule.d $f12, $f14
-0x37 0x60 0x2e 0x46
-
-# CHECK: c.ule.s $f6, $f7
-0x37 0x30 0x07 0x46
-
-# CHECK: c.ult.d $f12, $f14
-0x35 0x60 0x2e 0x46
-
-# CHECK: c.ult.s $f6, $f7
-0x35 0x30 0x07 0x46
-
-# CHECK: c.un.d $f12, $f14
-0x31 0x60 0x2e 0x46
-
-# CHECK: c.un.s $f6, $f7
-0x31 0x30 0x07 0x46
-
-# CHECK: ceil.w.d $f12, $f14
-0x0e 0x73 0x20 0x46
-
-# CHECK: ceil.w.s $f6, $f7
-0x8e 0x39 0x00 0x46
-
-# CHECK: cfc1  $6, $7
-0x00 0x38 0x46 0x44
-
-# CHECK: clo  $6, $7
-0x21 0x30 0xe6 0x70
-
-# CHECK: clz  $6, $7
-0x20 0x30 0xe6 0x70
-
-# CHECK: ctc1  $6, $7
-0x00 0x38 0xc6 0x44
-
-# CHECK: cvt.d.s $f6, $f7
-0xa1 0x39 0x00 0x46
-
-# CHECK: cvt.d.w $f12, $f14
-0x21 0x73 0x80 0x46
-
-# CHECK: cvt.l.d $f12, $f14
-0x25 0x73 0x20 0x46
-
-# CHECK: cvt.l.s $f6, $f7
-0xa5 0x39 0x00 0x46
-
-# CHECK: cvt.s.d $f12, $f14
-0x20 0x73 0x20 0x46
-
-# CHECK: cvt.s.w $f6, $f7
-0xa0 0x39 0x80 0x46
-
-# CHECK: cvt.w.d $f12, $f14
-0x24 0x73 0x20 0x46
-
-# CHECK: cvt.w.s $f6, $f7
-0xa4 0x39 0x00 0x46
-
-# CHECK: floor.w.d $f12, $f14
-0x0f 0x73 0x20 0x46
-
-# CHECK: floor.w.s $f6, $f7
-0x8f 0x39 0x00 0x46
-
-# CHECK: ins $19, $9, 6, 7
-0x84 0x61 0x33 0x7d
-
-# CHECK: j 1328
-0x4c 0x01 0x00 0x08
-
-# CHECK: jal 1328
-0x4c 0x01 0x00 0x0c
-
-# CHECK: jalx 1328
-0x4c 0x01 0x00 0x74
-
-# CHECK: jalr  $7
-0x09 0xf8 0xe0 0x00
-
-# CHECK: jr  $7
-0x08 0x00 0xe0 0x00
-
-# CHECK: lb  $4, 9158($5)
-0xc6 0x23 0xa4 0x80
-
-# CHECK: lbu $4, 6($5)
-0x06 0x00 0xa4 0x90
-
-# CHECK: ldc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xd4
-
-# CHECK: lh  $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: lh  $4, 12($5)
-0x0c 0x00 0xa4 0x84
-
-# CHECK: ll  $9, 9158($7)
-0xc6 0x23 0xe9 0xc0
-
-# CHECK: lui  $6, 17767
-0x67 0x45 0x06 0x3c
-
-# CHECK: luxc1 $f0, $6($5)
-0x05 0x00 0xa6 0x4c
-
-# CHECK: lw  $4, 24($5)
-0x18 0x00 0xa4 0x8c
-
-# CHECK: lwc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xc4
-
-# CHECK: lwl   $2,  3($4)
-0x03 0x00 0x82 0x88
-
-# CHECK: lwr   $3, 16($5)
-0x10 0x00 0xa3 0x98
-
-# CHECK: lwxc1 $f20, $12($14)
-0x00 0x05 0xcc 0x4d
-
-# CHECK: madd   $6,  $7
-0x00 0x00 0xc7 0x70
-
-# CHECK: maddu  $6,  $7
-0x01 0x00 0xc7 0x70
-
-# CHECK: mfc0 $8, $16, 4
-0x04 0x80 0x08 0x40
-
-# CHECK: mfc1   $6, $f7
-0x00 0x38 0x06 0x44
-
-# CHECK: mfhi  $5
-0x10 0x28 0x00 0x00
-
-# CHECK: mflo  $5
-0x12 0x28 0x00 0x00
-
-# CHECK: mov.d $f6, $f8
-0x86 0x41 0x20 0x46
-
-# CHECK: mov.s $f6, $f7
-0x86 0x39 0x00 0x46
-
-# CHECK: msub   $6,  $7
-0x04 0x00 0xc7 0x70
-
-# CHECK: msubu  $6,  $7
-0x05 0x00 0xc7 0x70
-
-# CHECK: mtc0 $9, $15, 1
-0x01 0x78 0x89 0x40
-
-# CHECK: mtc1   $6, $f7
-0x00 0x38 0x86 0x44
-
-# CHECK: mthi   $7
-0x11 0x00 0xe0 0x00
-
-# CHECK: mtlo   $7
-0x13 0x00 0xe0 0x00
-
-# CHECK: mul.d $f8, $f12, $f14
-0x02 0x62 0x2e 0x46
-
-# CHECK: mul.s $f9, $f6, $f7
-0x42 0x32 0x07 0x46
-
-# CHECK: mul $9,  $6,  $7
-0x02 0x48 0xc7 0x70
-
-# CHECK: mult  $3, $5
-0x18 0x00 0x65 0x00
-
-# CHECK: multu $3, $5
-0x19 0x00 0x65 0x00
-
-# CHECK: neg.d $f12, $f14
-0x07 0x73 0x20 0x46
-
-# CHECK: neg.s $f6, $f7
-0x87 0x39 0x00 0x46
-
-# CHECK: nop
-0x00 0x00 0x00 0x00
-
-# CHECK: nor $9,  $6, $7
-0x27 0x48 0xc7 0x00
-
-# CHECK: or  $3, $3, $5
-0x25 0x18 0x65 0x00
-
-# CHECK: ori $9,  $6, 17767
-0x67 0x45 0xc9 0x34
-
-# CHECK: rotr $9, $6, 7
-0xc2 0x49 0x26 0x00
-
-# CHECK:  rotrv $9, $6, $7
-0x46 0x48 0xe6 0x00
-
-# CHECK: round.w.d $f12, $f14
-0x0c 0x73 0x20 0x46
-
-# CHECK: round.w.s $f6, $f7
-0x8c 0x39 0x00 0x46
-
-# CHECK: sb  $4, 9158($5)
-0xc6 0x23 0xa4 0xa0
-
-# CHECK: sb  $4, 6($5)
-0x06 0x00 0xa4 0xa0
-
-# CHECK: sc  $9, 9158($7)
-0xc6 0x23 0xe9 0xe0
-
-# CHECK: sdc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xf4
-
-# CHECK: seb $6, $7
-0x20 0x34 0x07 0x7c
-
-# CHECK: seh $6, $7
-0x20 0x36 0x07 0x7c
-
-# CHECK: sh  $4, 9158($5)
-0xc6 0x23 0xa4 0xa4
-
-# CHECK: sll $4, $3, 7
-0xc0 0x21 0x03 0x00
-
-# CHECK: sllv  $2, $3, $5
-0x04 0x10 0xa3 0x00
-
-# CHECK: slt $3, $3, $5
-0x2a 0x18 0x65 0x00
-
-# CHECK: slti  $3, $3, 103
-0x67 0x00 0x63 0x28
-
-# CHECK: sltiu $3, $3, 103
-0x67 0x00 0x63 0x2c
-
-# CHECK: sltu  $3, $3, $5
-0x2b 0x18 0x65 0x00
-
-# CHECK: sqrt.d  $f12, $f14
-0x04 0x73 0x20 0x46
-
-# CHECK: sqrt.s  $f6, $f7
-0x84 0x39 0x00 0x46
-
-# CHECK: sra $4, $3, 7
-0xc3 0x21 0x03 0x00
-
-# CHECK: srav  $2, $3, $5
-0x07 0x10 0xa3 0x00
-
-# CHECK: srl $4, $3, 7
-0xc2 0x21 0x03 0x00
-
-# CHECK: srlv  $2, $3, $5
-0x06 0x10 0xa3 0x00
-
-# CHECK: sub.d $f8, $f12, $f14
-0x01 0x62 0x2e 0x46
-
-# CHECK: sub.s $f9, $f6, $f7
-0x41 0x32 0x07 0x46
-
-# CHECK: sub $9,  $6, $7
-0x22 0x48 0xc7 0x00
-
-# CHECK: subu  $4, $3, $5
-0x23 0x20 0x65 0x00
-
-# CHECK: suxc1 $f4, $24($5)
-0x0d 0x20 0xb8 0x4c
-
-# CHECK: sw  $4, 24($5)
-0x18 0x00 0xa4 0xac
-
-# CHECK: swc1  $f9, 9158($7)
-0xc6 0x23 0xe9 0xe4
-
-# CHECK: swl $4,  16($5)
-0x10 0x00 0xa4 0xa8
-
-# CHECK: swr $6, 16($7)
-0x10 0x00 0xe6 0xb8
-
-# CHECK: swxc1 $f26, $18($22)
-0x08 0xd0 0xd2 0x4e
-
-# CHECK: sync  7
-0xcf 0x01 0x00 0x00
-
-# CHECK: trunc.w.d $f12, $f14
-0x0d 0x73 0x20 0x46
-
-# CHECK: trunc.w.s $f6, $f7
-0x8d 0x39 0x00 0x46
-
-# CHECK: wsbh  $6, $7
-0xa0 0x30 0x07 0x7c
-
-# CHECK: xor $3, $3, $5
-0x26 0x18 0x65 0x00
-
-# CHECK: xori  $9,  $6, 17767
-0x67 0x45 0xc9 0x38
-
-# CHECK: synci 7500($19)
-0x4c 0x1d 0x7f 0x06
diff --git a/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt
new file mode 100644
index 000000000000..0d259902d68d
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r3/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
index cf9e98675d25..18dbd9ea7a40 100644
--- a/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-mips32r3.txt
@@ -1,12 +1,42 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r3 | FileCheck %s
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -14,24 +44,82 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,29 +127,61 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x41 0x60 0x60 0x00 # CHECK: di
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
 0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -73,6 +193,17 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -91,8 +222,17 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -103,6 +243,14 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -122,8 +270,19 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
 0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -138,34 +297,72 @@
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7                
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt
new file mode 100644
index 000000000000..03836adaa31f
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r3/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt
new file mode 100644
index 000000000000..871e45f63f4c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r5/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r5 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
index 282f3a2b0544..8b553cfab2cc 100644
--- a/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-mips32r5.txt
@@ -1,12 +1,42 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips32r5 | FileCheck %s
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
 0x00 0x03 0x21 0xc2 # CHECK: srl $4, $3, 7
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -14,24 +44,82 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xdf 0xe8 0x07 # CHECK: synci -6137($fp)
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -39,29 +127,61 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x41 0x60 0x60 0x00 # CHECK: di
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
 0x46 0x00 0x39 0x85 # CHECK: abs.s $f6, $f7
 0x46 0x00 0x39 0x86 # CHECK: mov.s $f6, $f7
@@ -73,6 +193,17 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -88,11 +219,21 @@
 0x46 0x07 0x30 0x3d # CHECK: c.nge.s $f6, $f7
 0x46 0x07 0x30 0x3e # CHECK: c.le.s $f6, $f7
 0x46 0x07 0x30 0x3f # CHECK: c.ngt.s $f6, $f7
+0x42 0x00 0x00 0x58 # CHECK: eretnc
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -103,6 +244,14 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -122,8 +271,19 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
 0x4c 0x52 0xf2 0xa9 # CHECK: msub.d $f10, $f2, $f30, $f18
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
@@ -138,34 +298,72 @@
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
 0x70 0xc7 0x00 0x01 # CHECK: maddu $6, $7
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
+0x7d 0x07 0x30 0x00 # CHECK: ext $7, $8, 0, 7                
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
diff --git a/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt b/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt
new file mode 100644
index 000000000000..c99ec7159872
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips32r5/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips32r5 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
index 94dc3a2645d9..4164988b2b8f 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt
@@ -80,7 +80,7 @@
 0x9b 0x10 0x64 0x00 # CHECK: divu $2, $3, $4
 0x20 0x60 0x6e 0x41 # CHECK: ei $14
 0x20 0x60 0x60 0x41 # CHECK: ei
-0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 3
+0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 4
 0x43 0x00 0x48 0xec # CHECK: lwpc $2, 268
 0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268
 0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1
diff --git a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
index e1721b934835..6af02fc61b86 100644
--- a/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
+++ b/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt
@@ -12,7 +12,7 @@
 0x00 0x64 0x10 0x99 # CHECK: mulu $2, $3, $4
 0x00 0x64 0x10 0x9a # CHECK: div $2, $3, $4
 0x00 0x64 0x10 0x9b # CHECK: divu $2, $3, $4
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 4
 0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
 0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
 0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
@@ -126,6 +126,7 @@
 0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
 0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
 0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
+0x42 0x00 0x00 0x58 # CHECK: eretnc
 # FIXME: The encode/decode functions are not inverses of each other.
 #        The immediate should be 8 but the disassembler currently emits 12
 0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
diff --git a/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt
new file mode 100644
index 000000000000..6bef06228c10
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028  # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
index 0c9e2f1b742f..1d1044d32055 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4-el.txt
@@ -126,6 +126,8 @@
 0xd1 0x2d 0x18 0x46 # CHECK: movf.s $f23, $f5, $fcc6
 0x21 0xf0 0x80 0x00 # CHECK: move $fp, $4
 0x21 0xc8 0xc0 0x00 # CHECK: move $25, $6
+0x25 0xf0 0x80 0x00 # CHECK: move $fp, $4
+0x25 0xc8 0xc0 0x00 # CHECK: move $25, $6
 0x0b 0x18 0x30 0x02 # CHECK: movn $3, $17, $16
 0xd3 0xae 0x3a 0x46 # CHECK: movn.d $f27, $f21, $26
 0x13 0x03 0x17 0x46 # CHECK: movn.s $f12, $f0, $23
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
index 207f4087791d..5e8253dfef2d 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
@@ -1,12 +1,19 @@
 # RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
 0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
 0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
 0x00 0x00 0x88 0x12 # CHECK: mflo $17
 0x00 0x00 0x98 0x10 # CHECK: mfhi $19
 0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
@@ -21,6 +28,8 @@
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
 0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
 0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
 0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
@@ -30,11 +39,14 @@
 0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
 0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
 0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
 0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
 0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x2d # CHECK: move $fp, $4
 0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
 0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
@@ -44,6 +56,9 @@
 0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
+0x00 0xc0 0xc8 0x2d # CHECK: move $25, $6
 0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
 0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
@@ -60,6 +75,7 @@
 0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
 0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
 0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
 0x02 0x30 0x18 0x0b # CHECK: movn $3, $17, $16
 0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
 0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
@@ -85,9 +101,11 @@
 0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
 0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
 0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
 0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
 0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
 0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
@@ -97,6 +115,9 @@
 0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
 0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
 0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
 0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
 0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
@@ -110,7 +131,10 @@
 0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
 0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
 0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
 0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
+0x3a 0x00 0x27 0x12 # CHECK: xori $zero, $16, 10002       
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x42 0x00 0x00 0x01 # CHECK: tlbr
 0x42 0x00 0x00 0x02 # CHECK: tlbwi
 0x42 0x00 0x00 0x06 # CHECK: tlbwr
@@ -184,7 +208,10 @@
 0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
 0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
 0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
+0x4c 0x20 0x01 0x01 # CHECK: ldxc1 $f4, $zero($1)          
+0x4c 0x21 0x00 0x28 # CHECK: msub.s $f0, $f1, $f0, $f1      
 0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
+0x4e 0x20 0x3e 0xb9 # CHECK: nmsub.d $f26, $f17, $f7, $f0    
 0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
 0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
 0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
@@ -214,9 +241,11 @@
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
 0xbc 0xa1 0x00 0x08 # CHECK: cache 1, 8($5)
+0xbf 0x00 0xe2 0x1c # CHECK: cache 0, -7652($24)           
 0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x20 0x00 0x00 # CHECK: pref 0, 0($1)                
 0xcc 0xa1 0x00 0x08 # CHECK: pref 1, 8($5) 
 0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
 0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
diff --git a/test/MC/Disassembler/Mips/mips4/valid-xfail.txt b/test/MC/Disassembler/Mips/mips4/valid-xfail.txt
new file mode 100644
index 000000000000..159e7111d39a
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips4/valid-xfail.txt
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -disassemble -mcpu=mips4 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0x27 0xc7 0xf2 # CHECK: beql $1, $7, -57400          
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x53 0x2a 0x02 0x1e # CHECK: beql $25, $10, 2168          
+0x53 0x80 0x21 0x73 # CHECK: beql $gp, $zero, 34252       
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt
new file mode 100644
index 000000000000..bbc7ff43d468
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
index 2d52216fddaa..1cd7b0bcebab 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64-el.txt
@@ -148,6 +148,8 @@
 0x00 0x38 0x06 0x44 # CHECK: mfc1 $6, $f7
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
 0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
@@ -218,3 +220,31 @@
 0x04 0x00 0x43 0xcc # CHECK: pref 3, 4($2)
 0xc6 0x23 0xe9 0xe8 # CHECK: swc2 $9, 9158($7)
 0xca 0x23 0xc8 0xc8 # CHECK: lwc2 $8, 9162($6)
+0xcd 0x7c 0x4b 0x67 # CHECK: daddiu $11, $26, 31949
+0x2d 0xd0 0x2b 0x00 # CHECK: daddu $26, $1, $11
+0x1e 0x00 0x56 0x03 # CHECK: ddiv $zero, $26, $22
+0x1f 0x00 0x38 0x01 # CHECK: ddivu $zero, $9, $24
+0x00 0x70 0x22 0x44 # CHECK: dmfc1 $2, $f14
+0x00 0x28 0xb7 0x44 # CHECK: dmtc1 $23, $f5
+0x1c 0x00 0x7a 0x01 # CHECK: dmult $11, $26
+0x1d 0x00 0xed 0x02 # CHECK: dmultu $23, $13
+0x78 0x1c 0x18 0x00 # CHECK: dsll $3, $24, 17
+0x14 0xe0 0x1b 0x03 # CHECK: dsllv $gp, $27, $24
+0xbb 0x0f 0x01 0x00 # CHECK: dsra $1, $1, 30
+0x17 0x08 0xc1 0x03 # CHECK: dsrav $1, $1, $fp
+0x3a 0x56 0x1c 0x00 # CHECK: dsrl $10, $gp, 24
+0x16 0xe0 0xea 0x02 # CHECK: dsrlv $gp, $10, $23
+0x2f 0xe0 0x78 0x03 # CHECK: dsubu $gp, $27, $24
+0xcd 0xc4 0x3b 0x8c # CHECK: lw $27, -15155($1)
+0x01 0x00 0x01 0x3c # CHECK: lui $1, 1
+0x2e 0xf9 0x63 0x9c # CHECK: lwu $3, -1746($3)
+0x01 0x00 0x1f 0x3c # CHECK: lui $ra, 1
+0xc9 0xc4 0x3a 0xac # CHECK: sw $26, -15159($1)
+0x76 0x0f 0x1a 0xdc # CHECK: ld $26, 3958($zero)
+0x67 0x45 0x06 0xfc # CHECK: sd $6, 17767($zero)
+0x05 0x00 0xa6 0x4c # CHECK: luxc1 $f0, $6($5)
+0x00 0x05 0xcc 0x4d # CHECK: lwxc1 $f20, $12($14)
+0x0d 0x20 0xb8 0x4c # CHECK: suxc1 $f4, $24($5)
+0x08 0xd0 0xd2 0x4e # CHECK: swxc1 $f26, $18($22)
+0x81 0x00 0x42 0x4d # CHECK: ldxc1 $f2, $2($10)
+0x09 0x40 0x24 0x4f # CHECK: sdxc1 $f8, $4($25)
diff --git a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
index 6cbf5d3206b0..2ba1ecdf5b8e 100644
--- a/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
+++ b/test/MC/Disassembler/Mips/mips64/valid-mips64.txt
@@ -1,12 +1,31 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux | FileCheck %s
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
 0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
 0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,18 +33,29 @@
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
 0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
 0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
 0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
 0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
 0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
 0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
 0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -33,43 +63,102 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
 0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
 0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
 0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
 0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
 0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
 0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
 0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
 0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
 0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
 0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
 0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
 0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -77,12 +166,28 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
@@ -91,18 +196,33 @@
 0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
 0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
 0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
 0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -115,8 +235,20 @@
 0x46 0x00 0x39 0x8f # CHECK: floor.w.s $f6, $f7
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8               
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
 0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -135,11 +267,20 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
 0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -149,7 +290,15 @@
 0x46 0x20 0x73 0x0f # CHECK: floor.w.d $f12, $f14
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
 0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -169,8 +318,27 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0           
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0              
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12    
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27   
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
 0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
@@ -206,6 +374,7 @@
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
@@ -216,39 +385,63 @@
                     # CHECK: rdhwr $5, $29
                     # CHECK: .set pop
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
 0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
 0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
 0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
 0xd9 0x03 0x23 0xca # CHECK: ldc2  $3, 9162($8)
 0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
 0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
 0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
 0xf8 0xe9 0x23 0xc6 # CHECK: sdc2  $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
 0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
 0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64/valid-xfail.txt
new file mode 100644
index 000000000000..4cc762e87492
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64/valid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64_le.txt b/test/MC/Disassembler/Mips/mips64_le.txt
deleted file mode 100644
index 0d3d2faf1312..000000000000
--- a/test/MC/Disassembler/Mips/mips64_le.txt
+++ /dev/null
@@ -1,84 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: luxc1 $f0, $6($5)
-0x05 0x00 0xa6 0x4c
-
-# CHECK: lwxc1 $f20, $12($14)
-0x00 0x05 0xcc 0x4d
-
-# CHECK: suxc1 $f4, $24($5)
-0x0d 0x20 0xb8 0x4c
-
-# CHECK: swxc1 $f26, $18($22)
-0x08 0xd0 0xd2 0x4e
-
-# CHECK: ldxc1 $f2, $2($10)
-0x81 0x00 0x42 0x4d
-
-# CHECK: sdxc1 $f8, $4($25)
-0x09 0x40 0x24 0x4f
diff --git a/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt
new file mode 100644
index 000000000000..9a320be22b40
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r2/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r2 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
index 2c6859f27faa..3f2b7615eb31 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2-el.txt
@@ -168,6 +168,8 @@
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CHECK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CHECK: move $15, $ra
 0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
@@ -239,3 +241,33 @@
 0xa0 0x30 0x07 0x7c # CHECK: wsbh $6, $7
 0x26 0x18 0x65 0x00 # CHECK: xor $3, $3, $5
 0x67 0x45 0xc9 0x38 # CHECK: xori $9, $6, 17767
+0xcd 0x7c 0x4b 0x67 # CHECK: daddiu $11, $26, 31949
+0x2d 0xd0 0x2b 0x00 # CHECK: daddu $26, $1, $11
+0x1e 0x00 0x56 0x03 # CHECK: ddiv $zero, $26, $22
+0x1f 0x00 0x38 0x01 # CHECK: ddivu $zero, $9, $24
+0x00 0x70 0x22 0x44 # CHECK: dmfc1 $2, $f14
+0x00 0x28 0xb7 0x44 # CHECK: dmtc1 $23, $f5
+0x1c 0x00 0x7a 0x01 # CHECK: dmult $11, $26
+0x1d 0x00 0xed 0x02 # CHECK: dmultu $23, $13
+0x78 0x1c 0x18 0x00 # CHECK: dsll $3, $24, 17
+0x14 0xe0 0x1b 0x03 # CHECK: dsllv $gp, $27, $24
+0xbb 0x0f 0x01 0x00 # CHECK: dsra $1, $1, 30
+0x17 0x08 0xc1 0x03 # CHECK: dsrav $1, $1, $fp
+0x3a 0x56 0x1c 0x00 # CHECK: dsrl $10, $gp, 24
+0x16 0xe0 0xea 0x02 # CHECK: dsrlv $gp, $10, $23
+0x2f 0xe0 0x78 0x03 # CHECK: dsubu $gp, $27, $24
+0xcd 0xc4 0x3b 0x8c # CHECK: lw $27, -15155($1)
+0x01 0x00 0x01 0x3c # CHECK: lui $1, 1
+0x2e 0xf9 0x63 0x9c # CHECK: lwu $3, -1746($3)
+0x01 0x00 0x1f 0x3c # CHECK: lui $ra, 1
+0xc9 0xc4 0x3a 0xac # CHECK: sw $26, -15159($1)
+0x76 0x0f 0x1a 0xdc # CHECK: ld $26, 3958($zero)
+0x67 0x45 0x06 0xfc # CHECK: sd $6, 17767($zero)
+0x25 0x48 0x09 0x73 # CHECK: dclo $9, $24
+0x24 0xd0 0x3a 0x71 # CHECK: dclz $26, $9
+0x43 0xf7 0x87 0x7f # CHECK: dext $7, $gp, 29, 31
+0xc7 0x7b 0x94 0x7f # CHECK: dins $20, $gp, 15, 1
+0xa4 0x38 0x1c 0x7c # CHECK: dsbh $7, $gp
+0x64 0x19 0x0e 0x7c # CHECK: dshd $3, $14
+0xba 0xa1 0x3b 0x00 # CHECK: drotr $20, $27, 6
+0x56 0xc0 0xb7 0x00 # CHECK: drotrv $24, $23, $5
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
index 0c6e10ee37ce..ac9d2b808622 100644
--- a/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-mips64r2.txt
@@ -4,13 +4,32 @@
 # RUN: llvm-mc --disassemble %s -triple=mips-unknown-linux -mcpu=mips64r2 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
 0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
 0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -18,11 +37,17 @@
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
 0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
 0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
 0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
 0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
 0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
 0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
@@ -33,9 +58,14 @@
 0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
 0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
 0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
 0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -43,46 +73,106 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
 0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
 0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
 0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
 0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
 0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
 0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
 0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
 0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
 0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
 0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
 0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
 0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
 0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
 0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)               
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -90,16 +180,33 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
 0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
 0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -107,20 +214,35 @@
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
 0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
 0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
 0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
 0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -134,8 +256,20 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8               
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
 0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -154,11 +288,20 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
 0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -169,7 +312,15 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
 0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -189,19 +340,44 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0           
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0              
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12    
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27   
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
 0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
 0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
 0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
 0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
 0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -216,6 +392,7 @@
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
 0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
 0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
 0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
 0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -223,6 +400,7 @@
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
@@ -230,6 +408,10 @@
 0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
@@ -241,33 +423,63 @@
 0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
 0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
 0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
 0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
 0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2  $3, 9162($8)
 0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
 0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2  $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
 0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
 0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt
new file mode 100644
index 000000000000..84d2c4728c7c
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r2/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r2 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25              
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r2_le.txt b/test/MC/Disassembler/Mips/mips64r2_le.txt
deleted file mode 100644
index 82e4d6ae1ce0..000000000000
--- a/test/MC/Disassembler/Mips/mips64r2_le.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-# RUN: llvm-mc --disassemble %s -triple=mips64el-unknown-linux -mattr +mips64r2 | FileCheck %s
-# CHECK: daddiu $11, $26, 31949
-0xcd 0x7c 0x4b 0x67
-
-# CHECK: daddu $26, $1, $11
-0x2d 0xd0 0x2b 0x00
-
-# CHECK: ddiv $zero, $26, $22
-0x1e 0x00 0x56 0x03
-
-# CHECK: ddivu $zero, $9, $24
-0x1f 0x00 0x38 0x01
-
-# CHECK: dmfc1 $2, $f14
-0x00 0x70 0x22 0x44
-
-# CHECK: dmtc1 $23, $f5
-0x00 0x28 0xb7 0x44
-
-# CHECK: dmult $11, $26
-0x1c 0x00 0x7a 0x01
-
-# CHECK: dmultu $23, $13
-0x1d 0x00 0xed 0x02
-
-# CHECK: dsll $3, $24, 17
-0x78 0x1c 0x18 0x00
-
-# CHECK: dsllv $gp, $27, $24
-0x14 0xe0 0x1b 0x03
-
-# CHECK: dsra $1, $1, 30
-0xbb 0x0f 0x01 0x00
-
-# CHECK: dsrav $1, $1, $fp
-0x17 0x08 0xc1 0x03
-
-# CHECK: dsrl $10, $gp, 24
-0x3a 0x56 0x1c 0x00
-
-# CHECK: dsrlv $gp, $10, $23
-0x16 0xe0 0xea 0x02
-
-# CHECK: dsubu $gp, $27, $24
-0x2f 0xe0 0x78 0x03
-
-# CHECK: lw $27, -15155($1)
-0xcd 0xc4 0x3b 0x8c
-
-# CHECK: lui $1, 1
-0x01 0x00 0x01 0x3c
-
-# CHECK: lwu $3, -1746($3)
-0x2e 0xf9 0x63 0x9c
-
-# CHECK: lui $ra, 1
-0x01 0x00 0x1f 0x3c
-
-# CHECK: sw $26, -15159($1)
-0xc9 0xc4 0x3a 0xac
-
-# CHECK: ld $26, 3958($zero)
-0x76 0x0f 0x1a 0xdc
-
-# CHECK: sd $6, 17767($zero)
-0x67 0x45 0x06 0xfc
-
-# CHECK: dclo $9, $24
-0x25 0x48 0x09 0x73
-
-# CHECK: dclz $26, $9
-0x24 0xd0 0x3a 0x71
-
-# CHECK: dext $7, $gp, 29, 31
-0x43 0xf7 0x87 0x7f
-
-# CHECK: dins $20, $gp, 15, 1
-0xc7 0x7b 0x94 0x7f
-
-# CHECK: dsbh $7, $gp
-0xa4 0x38 0x1c 0x7c
-
-# CHECK: dshd $3, $14
-0x64 0x19 0x0e 0x7c
-
-# CHECK: drotr $20, $27, 6
-0xba 0xa1 0x3b 0x00
-
-# CHECK: drotrv $24, $23, $5
-0x56 0xc0 0xb7 0x00
diff --git a/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt
new file mode 100644
index 000000000000..d53830876ca3
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r3/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r3 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
index 88e9c262a0ee..7d59ef6d3e03 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3-el.txt
@@ -165,6 +165,8 @@
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
 0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
index 82405f357bf4..31b9f66bce12 100644
--- a/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-mips64r3.txt
@@ -1,12 +1,32 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r3 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
 0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,20 +34,35 @@
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
 0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
 0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
 0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
 0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
 0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
 0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
 0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
 0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
 0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
 0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -35,37 +70,106 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
 0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
 0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
 0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
 0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
 0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)               
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -73,14 +177,34 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3a 0x00 0x3a 0x21 # CHECK: xori $zero, $16, 14881       
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -88,18 +212,35 @@
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
 0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
 0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -113,8 +254,20 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8               
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
 0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -133,11 +286,20 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
 0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -148,7 +310,15 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
 0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -168,19 +338,44 @@
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0           
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0              
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12    
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27   
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
 0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
 0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
 0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
 0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
 0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -194,6 +389,8 @@
 0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
 0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
 0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
 0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -201,40 +398,86 @@
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x71 0x3a 0xd0 0x24 # CHECK: dclz $26, $9
+0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
 0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
+0x7c 0x0e 0x19 0x64 # CHECK: dshd $3, $14
+0x7c 0x1c 0x38 0xa4 # CHECK: dsbh $7, $gp
 0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
+0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2  $3, 9162($8)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
 0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2  $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
 0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt
new file mode 100644
index 000000000000..4b85968a5370
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r3/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r3 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25              
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt b/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt
new file mode 100644
index 000000000000..3ca183b2d31e
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r5/invalid-xfail.txt
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r5 | FileCheck %s
+# XFAIL: *
+
+# Start with a valid instruction. Otherwise llvm-mc gives up immediately.
+0x00 0x00 0x00 0x00
+
+# CHECK: .text
+0x45 0x06 0x00 0x82 # bc1fl $fcc1, 520    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x07 0xd8 0x01 # bc1tl $fcc1, -40956 # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x08 0x14 0x02 # bc1f $fcc2, 20488   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x45 0x09 0x01 0x01 # bc1t $fcc2, 1028    # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x00 0x00 0x01 # mfc2 $zero, $0, 1   # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
+0x48 0x86 0x00 0x04 # mtc2 $6, $0, 4      # CHECK: :[[@LINE]]:1: warning: invalid instruction encoding
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
index bd709d228794..ee6ad1c71945 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5-el.txt
@@ -165,6 +165,8 @@
 0x10 0x28 0x00 0x00 # CHECK: mfhi $5
 0x00 0xc0 0x7e 0x44 # CHECK: mfhc1 $fp, $f24
 0x12 0x28 0x00 0x00 # CHECK: mflo $5
+0x25 0x78 0xe0 0x03 # CEHCK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CEHCK: move $15, $ra
 0x86 0x41 0x20 0x46 # CHECK: mov.d $f6, $f8
 0x86 0x39 0x00 0x46 # CHECK: mov.s $f6, $f7
 0x04 0x00 0xc7 0x70 # CHECK: msub $6, $7
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
index 1b30144acab4..1fa0e629b375 100644
--- a/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-mips64r5.txt
@@ -1,12 +1,32 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r5 | FileCheck %s
 # CHECK: .text
 0x00 0x00 0x00 0x00 # CHECK: nop
+0x00 0x00 0x00 0x09 # CHECK: jr $zero
+0x00 0x00 0x00 0x0c # CHECK: syscall                         
+0x00 0x00 0x00 0x0d # CHECK: break
+0x00 0x00 0x00 0x0f # CHECK: sync                            
+0x00 0x00 0x00 0x20 # CHECK: add $zero, $zero, $zero
+0x00 0x00 0x00 0x40 # CHECK: ssnop
+0x00 0x00 0x00 0x80 # CHECK: sll $zero, $zero, 2
+0x00 0x00 0x00 0xc0 # CHECK: ehb
 0x00 0x00 0x01 0xcf # CHECK: sync 7
 0x00 0x00 0x04 0xb8 # CHECK: dsll $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
 0x00 0x00 0x04 0xbc # CHECK: dsll32 $zero, $zero, 18
+0x00 0x00 0x28 0x09 # CHECK: jalr $5, $zero
 0x00 0x00 0x28 0x10 # CHECK: mfhi $5
 0x00 0x00 0x28 0x12 # CHECK: mflo $5
+0x00 0x00 0x3c 0x80 # CHECK: sll $7, $zero, 18
+0x00 0x00 0x72 0x0d # CHECK: break 0, 456
+0x00 0x00 0x88 0x12 # CHECK: mflo $17
+0x00 0x00 0x98 0x10 # CHECK: mfhi $19
+0x00 0x00 0xe8 0x10 # CHECK: mfhi $sp
+0x00 0x01 0x0f 0xbb # CHECK: dsra $1, $1, 30
+0x00 0x02 0x10 0x23 # CHECK: negu $2, $2
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x02 0x11 0xc2 # CHECK: srl $2, $2, 7
+0x00 0x03 0x00 0x34 # CHECK: teq $zero, $3
+0x00 0x03 0x10 0x23 # CHECK: negu $2, $3
 0x00 0x03 0x10 0x2e # CHECK: dneg $2, $3
 0x00 0x03 0x10 0x2f # CHECK: dnegu $2, $3
 0x00 0x03 0x21 0xc0 # CHECK: sll $4, $3, 7
@@ -14,20 +34,35 @@
 0x00 0x03 0x21 0xc3 # CHECK: sra $4, $3, 7
 0x00 0x06 0x9d 0xfa # CHECK: dsrl $19, $6, 23
 0x00 0x06 0x9d 0xfe # CHECK: dsrl32 $19, $6, 23
+0x00 0x07 0x38 0x27 # CHECK: nor $7, $zero, $7
+0x00 0x07 0x3c 0x80 # CHECK: sll $7, $7, 18
+0x00 0x08 0xe8 0x22 # CHECK: neg $sp, $8
+0x00 0x10 0x00 0x80 # CHECK: sll $zero, $16, 2
+0x00 0x11 0x8b 0xc3 # CHECK: sra $17, $17, 15
 0x00 0x12 0xe2 0xbb # CHECK: dsra $gp, $18, 10
 0x00 0x12 0xe2 0xbf # CHECK: dsra32 $gp, $18, 10
 0x00 0x13 0x9d 0xfa # CHECK: dsrl $19, $19, 23
 0x00 0x13 0x9d 0xfe # CHECK: dsrl32 $19, $19, 23
 0x00 0x14 0x04 0xb8 # CHECK: dsll $zero, $20, 18
+0x00 0x17 0x8b 0xc3 # CHECK: sra $17, $23, 15
+0x00 0x18 0x1c 0x78 # CHECK: dsll $3, $24, 17
+0x00 0x1c 0x56 0x3a # CHECK: dsrl $10, $gp, 24
 0x00 0x1c 0xe2 0xbb # CHECK: dsra $gp, $gp, 10
 0x00 0x1c 0xe2 0xbf # CHECK: dsra32 $gp, $gp, 10
 0x00 0x21 0x0b 0xfa # CHECK: drotr $1, $1, 15
 0x00 0x21 0x0b 0xfe # CHECK: drotr32 $1, $1, 15
 0x00 0x26 0x49 0xc2 # CHECK: rotr $9, $6, 7
+0x00 0x2b 0xd0 0x2d # CHECK: daddu $26, $1, $11
 0x00 0x2e 0x0b 0xfa # CHECK: drotr $1, $14, 15
 0x00 0x2e 0x0b 0xfe # CHECK: drotr32 $1, $14, 15
+0x00 0x3a 0x3a 0xcc # CHECK: syscall 59627
+0x00 0x3b 0xa1 0xba # CHECK: drotr $20, $27, 6
 0x00 0x3f 0x98 0x2c # CHECK: dadd $19, $1, $ra
 0x00 0x3f 0x98 0x2d # CHECK: daddu $19, $1, $ra
+0x00 0x4c 0xb8 0x24 # CHECK: and $23, $2, $12
+0x00 0x53 0x21 0x72 # CHECK: tlt $2, $19, 133
+0x00 0x5c 0x18 0x01 # CHECK: movf $3, $2, $fcc7
+0x00 0x5d 0x18 0x01 # CHECK: movt $3, $2, $fcc7
 0x00 0x65 0x00 0x18 # CHECK: mult $3, $5
 0x00 0x65 0x00 0x19 # CHECK: multu $3, $5
 0x00 0x65 0x18 0x25 # CHECK: or $3, $3, $5
@@ -35,37 +70,106 @@
 0x00 0x65 0x18 0x2a # CHECK: slt $3, $3, $5
 0x00 0x65 0x18 0x2b # CHECK: sltu $3, $3, $5
 0x00 0x65 0x20 0x23 # CHECK: subu $4, $3, $5
+0x00 0x80 0xf0 0x21 # CHECK: move $fp, $4
+0x00 0x80 0xf0 0x25 # CHECK: move $fp, $4
 0x00 0x80 0xfc 0x09 # CHECK: jalr.hb $4
+0x00 0x86 0x48 0x21 # CHECK: addu $9, $4, $6
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x94 0xc8 0x06 # CHECK: srlv $25, $20, $4
+0x00 0x9e 0x90 0x26 # CHECK: xor $18, $4, $fp
 0x00 0xa0 0x24 0x09 # CHECK: jalr.hb $4, $5
 0x00 0xa3 0x10 0x04 # CHECK: sllv $2, $3, $5
 0x00 0xa3 0x10 0x06 # CHECK: srlv $2, $3, $5
 0x00 0xa3 0x10 0x07 # CHECK: srav $2, $3, $5
 0x00 0xa6 0x00 0x1d # CHECK: dmultu $5, $6
+0x00 0xa7 0x9b 0x34 # CHECK: teq $5, $7, 620
+0x00 0xb3 0x55 0x30 # CHECK: tge $5, $19, 340
+0x00 0xb7 0xc0 0x56 # CHECK: drotrv $24, $23, $5
+0x00 0xc0 0xc8 0x21 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x25 # CHECK: move $25, $6
+0x00 0xc0 0xc8 0x27 # CHECK: not $25, $6
 0x00 0xba 0x28 0x2f # CHECK: dsubu $5, $5, $26
 0x00 0xc7 0x48 0x20 # CHECK: add $9, $6, $7
 0x00 0xc7 0x48 0x21 # CHECK: addu $9, $6, $7
 0x00 0xc7 0x48 0x22 # CHECK: sub $9, $6, $7
 0x00 0xc7 0x48 0x24 # CHECK: and $9, $6, $7
 0x00 0xc7 0x48 0x27 # CHECK: nor $9, $6, $7
+0x00 0xd1 0x00 0x36 # CHECK: tne $6, $17
 0x00 0xe0 0x00 0x08 # CHECK: jr $7
 0x00 0xe0 0x00 0x11 # CHECK: mthi $7
 0x00 0xe0 0x00 0x13 # CHECK: mtlo $7
 0x00 0xe0 0xf8 0x09 # CHECK: jalr $7
+0x00 0xe0 0xf8 0x0b # CHECK: movn $ra, $7, $zero          
 0x00 0xe6 0x48 0x46 # CHECK: rotrv $9, $6, $7
+0x00 0xe8 0xdd 0x76 # CHECK: tne $7, $8, 885
+0x00 0xea 0x00 0x30 # CHECK: tge $7, $10
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x20 0x38 0x04 # CHECK: sllv $7, $zero, $9
+0x01 0x32 0x00 0x19 # CHECK: multu $9, $18
+0x01 0x38 0x00 0x1f # CHECK: ddivu $zero, $9, $24
+0x01 0x70 0x00 0x33 # CHECK: tltu $11, $16
+0x01 0x7a 0x00 0x1c # CHECK: dmult $11, $26
+0x01 0x7b 0xb8 0x2a # CHECK: slt $23, $11, $27
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
 0x01 0x94 0x00 0x14 # CHECK: dsllv $zero, $20, $12
+0x01 0xed 0x00 0x32 # CHECK: tlt $15, $13
 0x01 0xee 0x08 0x56 # CHECK: drotrv $1, $14, $15
 0x02 0x11 0x00 0x1f # CHECK: ddivu $zero, $16, $17
+0x02 0x1d 0x60 0x25 # CHECK: or $12, $16, $sp
+0x02 0x1d 0xfe 0x33 # CHECK: tltu $16, $sp, 1016
+0x02 0x20 0x00 0x11 # CHECK: mthi $17
+0x02 0x27 0x00 0x0d # CHECK: break 551
+0x02 0x45 0xb8 0x20 # CHECK: add $23, $18, $5
+0x02 0x6c 0xb0 0x22 # CHECK: sub $22, $19, $12
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x72 0xe0 0x17 # CHECK: dsrav $gp, $18, $19
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
 0x02 0x86 0x98 0x16 # CHECK: dsrlv $19, $6, $20
+0x02 0x8e 0x5e 0xf1 # CHECK: tgeu $20, $14, 379
+0x02 0xab 0xa0 0x2b # CHECK: sltu $20, $21, $11
 0x02 0xc8 0x38 0x2e # CHECK: dsub $7, $22, $8
+0x02 0xd6 0xe8 0x23 # CHECK: subu $sp, $22, $22
+0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
 0x02 0xe9 0x00 0x1c # CHECK: dmult $23, $9
+0x02 0xea 0xe0 0x16 # CHECK: dsrlv $gp, $10, $23
+0x02 0xed 0x00 0x1d # CHECK: dmultu $23, $13
+0x03 0x1b 0xe0 0x14 # CHECK: dsllv $gp, $27, $24
+0x03 0x20 0x00 0x13 # CHECK: mtlo $25
+0x03 0x2b 0x00 0x1a # CHECK: div $zero, $25, $11
+0x03 0x2f 0x00 0x1b # CHECK: divu $zero, $25, $15
 0x03 0x53 0x00 0x1e # CHECK: ddiv $zero, $26, $19
+0x03 0x56 0x00 0x1e # CHECK: ddiv $zero, $26, $22
+0x03 0x78 0xe0 0x2f # CHECK: dsubu $gp, $27, $24
+0x03 0x9a 0x00 0x19 # CHECK: multu $gp, $26
+0x03 0xa0 0x00 0x13 # CHECK: mtlo $sp
+0x03 0xa2 0x00 0x18 # CHECK: mult $sp, $2
+0x03 0xb4 0x00 0x18 # CHECK: mult $sp, $20
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xb7 0x88 0x07 # CHECK: srav $17, $23, $sp
+0x03 0xc1 0x08 0x17 # CHECK: dsrav $1, $1, $fp
+0x03 0xcd 0x23 0xcd # CHECK: break 973, 143
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
+0x04 0x11 0x14 0x9b # CHECK: bal 21104
+0x04 0x7f 0x47 0xc0 # CHECK: synci 18368($3)               
+0x04 0x83 0xf9 0x4d # CHECK: bgezl $4, -6856
 0x04 0xc1 0x01 0x4c # CHECK: bgez $6, 1332
+0x04 0xd0 0x14 0x9b # CHECK: bltzal $6, 21104
 0x04 0xd1 0x01 0x4c # CHECK: bgezal $6, 1332
+0x04 0xd1 0x14 0x9b # CHECK: bgezal $6, 21104
+0x04 0xd2 0x00 0x7a # CHECK: bltzall $6, 492
+0x05 0x8e 0x8c 0x31 # CHECK: tnei $12, 35889
+0x05 0x93 0x07 0x1f # CHECK: bgezall $12, 7296
+0x05 0xca 0xad 0xbd # CHECK: tlti $14, 44477
+0x06 0x22 0xf6 0x45 # CHECK: bltzl $17, -9960
+0x06 0x28 0x13 0xa1 # CHECK: tgei $17, 5025
+0x06 0xac 0xbb 0xa0 # CHECK: teqi $21, 48032
+0x07 0xa9 0x90 0x33 # CHECK: tgeiu $sp, 36915
+0x07 0xeb 0xec 0x2c # CHECK: tltiu $ra, 60460
+0x08 0x00 0x00 0x01 # CHECK: j 4
 0x08 0x00 0x01 0x4c # CHECK: j 1328
+0x09 0x33 0x00 0x2a # CHECK: j 80478376
+0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
 0x0c 0x00 0x01 0x4c # CHECK: jal 1328
 0x10 0x00 0x01 0x4c # CHECK: b 1332
 0x11 0x26 0x01 0x4c # CHECK: beq $9, $6, 1332
@@ -73,14 +177,33 @@
 0x18 0xc0 0x01 0x4c # CHECK: blez $6, 1332
 0x1c 0xc0 0x01 0x4c # CHECK: bgtz $6, 1332
 0x20 0xc9 0x45 0x67 # CHECK: addi $9, $6, 17767
+0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
+0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
+0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
+0x21 0xc9 0x3b 0x48 # CHECK: addi $9, $14, 15176
+0x22 0x36 0x0c 0x36 # CHECK: addi $22, $17, 3126
+0x23 0x18 0xe3 0xe7 # CHECK: addi $24, $24, -7193
+0x24 0x00 0x8b 0x3f # CHECK: addiu $zero, $zero, -29889
+0x24 0x01 0x8b 0xb3 # CHECK: addiu $1, $zero, -29773
 0x24 0xc9 0xc5 0x67 # CHECK: addiu $9, $6, -15001
+0x25 0x29 0x00 0x0a # CHECK: addiu $9, $9, 10
 0x28 0x63 0x00 0x67 # CHECK: slti $3, $3, 103
+0x29 0x51 0x25 0x11 # CHECK: slti $17, $10, 9489
 0x2c 0x63 0x00 0x67 # CHECK: sltiu $3, $3, 103
+0x2f 0x38 0xc3 0x55 # CHECK: sltiu $24, $25, -15531
+0x2f 0x39 0xc3 0x55 # CHECK: sltiu $25, $25, -15531
+0x30 0x42 0x00 0x04 # CHECK: andi $2, $2, 4
 0x30 0xc9 0x45 0x67 # CHECK: andi $9, $6, 17767
+0x34 0x42 0x00 0x00 # CHECK: ori $2, $2, 0
+0x34 0x42 0x00 0x04 # CHECK: ori $2, $2, 4
 0x34 0xc9 0x45 0x67 # CHECK: ori $9, $6, 17767
 0x38 0xc9 0x45 0x67 # CHECK: xori $9, $6, 17767
+0x3c 0x00 0x00 0x80 # CHECK: lui $zero, 128
+0x3c 0x01 0x00 0x01 # CHECK: lui $1, 1
 0x3c 0x06 0x45 0x67 # CHECK: lui $6, 17767
+0x3c 0x1f 0x00 0x01 # CHECK: lui $ra, 1
 0x40 0x08 0x78 0x01 # CHECK: mfc0 $8, $15, 1
+0x40 0x08 0x80 0x04 # CHECK: mfc0 $8, $16, 4
 0x40 0x38 0x50 0x00 # CHECK: dmfc0 $24, $10, 0
 0x40 0x89 0x78 0x01 # CHECK: mtc0 $9, $15, 1
 0x40 0xa4 0x50 0x00 # CHECK: dmtc0 $4, $10, 0
@@ -88,18 +211,35 @@
 0x41 0x60 0x60 0x20 # CHECK: ei
 0x41 0x6e 0x60 0x20 # CHECK: ei $14
 0x41 0x7e 0x60 0x00 # CHECK: di $fp
+0x42 0x00 0x00 0x01 # CHECK: tlbr
+0x42 0x00 0x00 0x02 # CHECK: tlbwi
+0x42 0x00 0x00 0x06 # CHECK: tlbwr
+0x42 0x00 0x00 0x08 # CHECK: tlbp
+0x42 0x00 0x00 0x18 # CHECK: eret                            
+0x42 0x00 0x00 0x20 # CHECK: wait                            
 0x44 0x06 0x38 0x00 # CHECK: mfc1 $6, $f7
+0x44 0x07 0xd8 0x00 # CHECK: mfc1 $7, $f27
+0x44 0x22 0x70 0x00 # CHECK: dmfc1 $2, $f14
 0x44 0x2c 0x68 0x00 # CHECK: dmfc1 $12, $f13
 0x44 0x46 0x38 0x00 # CHECK: cfc1 $6, $7
+0x44 0x51 0xa8 0x00 # CHECK: cfc1 $17, $21
 0x44 0x7e 0xc0 0x00 # CHECK: mfhc1 $fp, $f24
 0x44 0x86 0x38 0x00 # CHECK: mtc1 $6, $f7
+0x44 0x9e 0x48 0x00 # CHECK: mtc1 $fp, $f9
 0x44 0xb0 0x70 0x00 # CHECK: dmtc1 $16, $f14
+0x44 0xb7 0x28 0x00 # CHECK: dmtc1 $23, $f5
 0x44 0xc6 0x38 0x00 # CHECK: ctc1 $6, $7
+0x44 0xc6 0xd0 0x00 # CHECK: ctc1 $6, $26
 0x44 0xe0 0x80 0x00 # CHECK: mthc1 $zero, $f16
+0x45 0x00 0x00 0x01 # CHECK: bc1f 8
 0x45 0x00 0x01 0x4c # CHECK: bc1f 1332
+0x45 0x01 0x00 0x01 # CHECK: bc1t 8
 0x45 0x01 0x01 0x4c # CHECK: bc1t 1332
+0x45 0x02 0x00 0x0c # CHECK: bc1fl 52
+0x45 0x03 0xf7 0xf4 # CHECK: bc1tl -8236
 0x45 0x1c 0x01 0x4c # CHECK: bc1f $fcc7, 1332
 0x45 0x1d 0x01 0x4c # CHECK: bc1t $fcc7, 1332
+0x46 0x00 0x08 0x04 # CHECK: sqrt.s $f0, $f1
 0x46 0x00 0x2b 0x0b # CHECK: floor.l.s $f12, $f5
 0x46 0x00 0x2e 0x48 # CHECK: round.l.s $f25, $f5
 0x46 0x00 0x39 0x84 # CHECK: sqrt.s $f6, $f7
@@ -113,8 +253,20 @@
 0x46 0x00 0x39 0xa1 # CHECK: cvt.d.s $f6, $f7
 0x46 0x00 0x39 0xa4 # CHECK: cvt.w.s $f6, $f7
 0x46 0x00 0x39 0xa5 # CHECK: cvt.l.s $f6, $f7
+0x46 0x00 0x46 0xa5 # CHECK: cvt.l.s $f26, $f8               
+0x46 0x00 0x4a 0x0f # CHECK: floor.w.s $f8, $f9
 0x46 0x00 0x6c 0x8a # CHECK: ceil.l.s $f18, $f13
+0x46 0x00 0x78 0x47 # CHECK: neg.s $f1, $f15
+0x46 0x00 0x82 0x45 # CHECK: abs.s $f9, $f16
+0x46 0x00 0xa1 0x8e # CHECK: ceil.w.s $f6, $f20
+0x46 0x00 0xc5 0x24 # CHECK: cvt.w.s $f20, $f24
+0x46 0x00 0xc5 0x53 # CHECK: movn.s $f21, $f24, $zero       
+0x46 0x00 0xd8 0x86 # CHECK: mov.s $f2, $f27
+0x46 0x00 0xe5 0xa1 # CHECK: cvt.d.s $f22, $f28
+0x46 0x00 0xe6 0xcc # CHECK: round.w.s $f27, $f28
+0x46 0x00 0xf7 0x0d # CHECK: trunc.w.s $f28, $f30
 0x46 0x00 0xff 0x09 # CHECK: trunc.l.s $f28, $f31
+0x46 0x02 0x57 0x82 # CHECK: mul.s $f30, $f10, $f2
 0x46 0x07 0x30 0x30 # CHECK: c.f.s $f6, $f7
 0x46 0x07 0x30 0x31 # CHECK: c.un.s $f6, $f7
 0x46 0x07 0x30 0x32 # CHECK: c.eq.s $f6, $f7
@@ -133,11 +285,20 @@
 0x46 0x07 0x32 0x40 # CHECK: add.s $f9, $f6, $f7
 0x46 0x07 0x32 0x41 # CHECK: sub.s $f9, $f6, $f7
 0x46 0x07 0x32 0x42 # CHECK: mul.s $f9, $f6, $f7
+0x46 0x0f 0x29 0x03 # CHECK: div.s $f4, $f5, $f15
 0x46 0x12 0xe0 0x33 # CHECK: c.ueq.s $f28, $f18
+0x46 0x16 0x70 0x38 # CHECK: c.sf.s $f14, $f22
+0x46 0x16 0xb5 0xc1 # CHECK: sub.s $f23, $f22, $f22
+0x46 0x18 0xaa 0x00 # CHECK: add.s $f8, $f21, $f24
+0x46 0x1c 0x11 0x11 # CHECK: movf.s $f4, $f2, $fcc7
+0x46 0x1d 0x11 0x11 # CHECK: movt.s $f4, $f2, $fcc7
 0x46 0x20 0x0b 0x08 # CHECK: round.l.d $f12, $f1
 0x46 0x20 0x18 0x4a # CHECK: ceil.l.d $f1, $f3
+0x46 0x20 0x21 0x8c # CHECK: round.w.d $f6, $f4
 0x46 0x20 0x3e 0x8b # CHECK: floor.l.d $f26, $f7
 0x46 0x20 0x41 0x86 # CHECK: mov.d $f6, $f8
+0x46 0x20 0x46 0xa0 # CHECK: cvt.s.d $f26, $f8
+0x46 0x20 0x53 0x8f # CHECK: floor.w.d $f14, $f10
 0x46 0x20 0x73 0x04 # CHECK: sqrt.d $f12, $f14
 0x46 0x20 0x73 0x05 # CHECK: abs.d $f12, $f14
 0x46 0x20 0x73 0x07 # CHECK: neg.d $f12, $f14
@@ -148,7 +309,15 @@
 0x46 0x20 0x73 0x20 # CHECK: cvt.s.d $f12, $f14
 0x46 0x20 0x73 0x24 # CHECK: cvt.w.d $f12, $f14
 0x46 0x20 0x73 0x25 # CHECK: cvt.l.d $f12, $f14
+0x46 0x20 0x75 0x06 # CHECK: mov.d $f20, $f14
+0x46 0x20 0x75 0x24 # CHECK: cvt.w.d $f20, $f14
+0x46 0x20 0x75 0x8d # CHECK: trunc.w.d $f22, $f14
+0x46 0x20 0x96 0x87 # CHECK: neg.d $f26, $f18
+0x46 0x20 0xb4 0x04 # CHECK: sqrt.d $f16, $f22
 0x46 0x20 0xbd 0xc9 # CHECK: trunc.l.d $f23, $f23
+0x46 0x20 0xc1 0x85 # CHECK: abs.d $f6, $f24
+0x46 0x20 0xc2 0xce # CHECK: ceil.w.d $f11, $f24
+0x46 0x20 0xf0 0x38 # CHECK: c.sf.d $f30, $f0
 0x46 0x2e 0x60 0x30 # CHECK: c.f.d $f12, $f14
 0x46 0x2e 0x60 0x31 # CHECK: c.un.d $f12, $f14
 0x46 0x2e 0x60 0x32 # CHECK: c.eq.d $f12, $f14
@@ -165,22 +334,48 @@
 0x46 0x2e 0x60 0x3d # CHECK: c.nge.d $f12, $f14
 0x46 0x2e 0x60 0x3e # CHECK: c.le.d $f12, $f14
 0x46 0x2e 0x60 0x3f # CHECK: c.ngt.d $f12, $f14
+0x42 0x00 0x00 0x58 # CHECK: eretnc
 0x46 0x2e 0x62 0x00 # CHECK: add.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x01 # CHECK: sub.d $f8, $f12, $f14
 0x46 0x2e 0x62 0x02 # CHECK: mul.d $f8, $f12, $f14
+0x46 0x30 0x00 0x39 # CHECK: c.ngle.d $f0, $f16
+0x46 0x30 0x14 0x81 # CHECK: sub.d $f18, $f2, $f16
+0x46 0x30 0xa5 0x02 # CHECK: mul.d $f20, $f20, $f16
+0x46 0x3a 0xa7 0x03 # CHECK: div.d $f28, $f20, $f26
+0x46 0x3a 0xb1 0x13 # CHECK: movn.d $f4, $f22, $26          
+0x46 0x3c 0x11 0x11 # CHECK: movf.d $f4, $f2, $fcc7
+0x46 0x3c 0x30 0x00 # CHECK: add.d $f0, $f6, $f28
+0x46 0x3c 0xe0 0x3b # CHECK: c.ngl.d $f28, $f28
+0x46 0x3d 0x11 0x11 # CHECK: movt.d $f4, $f2, $fcc7
 0x46 0x80 0x39 0xa0 # CHECK: cvt.s.w $f6, $f7
+0x46 0x80 0x5e 0xa1 # CHECK: cvt.d.w $f26, $f11
 0x46 0x80 0x73 0x21 # CHECK: cvt.d.w $f12, $f14
+0x46 0x80 0x7d 0xa0 # CHECK: cvt.s.w $f22, $f15
+0x48 0x20 0x50 0x00 # CHECK: dmfc2 $zero, $10, 0           
+0x48 0xa4 0x50 0x00 # CHECK: dmtc2 $4, $10, 0              
+0x4d 0x0c 0xe0 0x21 # CHECK: madd.d $f0, $f8, $f28, $f12    
+0x4d 0xbb 0x0d 0xe0 # CHECK: madd.s $f23, $f13, $f1, $f27   
+0x51 0xd3 0x0c 0x40 # CHECK: beql $14, $19, 12548
+0x57 0x94 0x04 0xfc # CHECK: bnel $gp, $20, 5108
+0x58 0xc0 0x02 0xe7 # CHECK: blezl $6, 2976
+0x5d 0x40 0xfc 0x59 # CHECK: bgtzl $10, -3736
 0x46 0xa0 0x81 0x21 # CHECK: cvt.d.l $f4, $f16
 0x46 0xa0 0xf3 0xe0 # CHECK: cvt.s.l $f15, $f30
 0x4c 0xa6 0x00 0x05 # CHECK: luxc1 $f0, $6($5)
 0x4c 0xac 0xc8 0x30 # CHECK: nmadd.s $f0, $f5, $f25, $f12
 0x4c 0xb8 0x20 0x0d # CHECK: suxc1 $f4, $24($5)
+0x4d 0x42 0x00 0x81 # CHECK: ldxc1 $f2, $2($10)
+0x4d 0xbb 0x60 0x0d # CHECK: suxc1 $f12, $27($13)
+0x4d 0xca 0x58 0x09 # CHECK: sdxc1 $f11, $10($14)
 0x4d 0xcc 0x05 0x00 # CHECK: lwxc1 $f20, $12($14)
 0x4d 0xf7 0x02 0x01 # CHECK: ldxc1 $f8, $23($15)
 0x4e 0x70 0x53 0x28 # CHECK: msub.s $f12, $f19, $f10, $f16
+0x4e 0xb6 0x04 0xc5 # CHECK: luxc1 $f19, $22($21)
 0x4e 0xd2 0xd0 0x08 # CHECK: swxc1 $f26, $18($22)
 0x4f 0x04 0x98 0x78 # CHECK: nmsub.s $f1, $f24, $f19, $f4
 0x4f 0x24 0x40 0x09 # CHECK: sdxc1 $f8, $4($25)
+0x4f 0x4c 0x98 0x08 # CHECK: swxc1 $f19, $12($26)
+0x4f 0xd1 0x03 0x00 # CHECK: lwxc1 $f12, $17($fp)
 0x4f 0xf9 0x98 0x60 # CHECK: madd.s $f1, $f31, $f19, $f25
 0x62 0x9d 0x6c 0x39 # CHECK: daddi $sp, $20, 27705
 0x62 0x9d 0x93 0xc7 # CHECK: daddi $sp, $20, -27705
@@ -194,6 +389,8 @@
 0x65 0xce 0x11 0xea # CHECK: daddiu $14, $14, 4586
 0x66 0x73 0x69 0x3f # CHECK: daddiu $19, $19, 26943
 0x66 0xda 0xee 0x16 # CHECK: daddiu $26, $22, -4586
+0x67 0x4b 0x7c 0xcd # CHECK: daddiu $11, $26, 31949
+0x67 0xbd 0xff 0xe0 # CHECK: daddiu $sp, $sp, -32
 0x6b 0x18 0xef 0xb9 # CHECK: ldl $24, -4167($24)
 0x6e 0x8e 0x89 0x6a # CHECK: ldr $14, -30358($20)
 0x70 0xc7 0x00 0x00 # CHECK: madd $6, $7
@@ -201,40 +398,87 @@
 0x70 0xc7 0x00 0x04 # CHECK: msub $6, $7
 0x70 0xc7 0x00 0x05 # CHECK: msubu $6, $7
 0x70 0xc7 0x48 0x02 # CHECK: mul $9, $6, $7
+0x70 0xc7 0x7d 0x3f # CHECK: sdbbp 204276                  
 0x70 0xd2 0x90 0x25 # CHECK: dclo $18, $6
 0x70 0xe6 0x30 0x20 # CHECK: clz $6, $7
 0x70 0xe6 0x30 0x21 # CHECK: clo $6, $7
+0x71 0x3a 0xd0 0x24 # CHECK: dclz $26, $9
+0x73 0x09 0x48 0x25 # CHECK: dclo $9, $24
 0x73 0x30 0x80 0x24 # CHECK: dclz $16, $25
 0x74 0x00 0x01 0x4c # CHECK: jalx 1328
+0x7c 0x05 0xe8 0x3b # CHECK: .set push
+                    # CHECK: .set mips32r2
+                    # CHECK: rdhwr $5, $29
+                    # CHECK: .set pop
 0x7c 0x07 0x30 0xa0 # CHECK: wsbh $6, $7
 0x7c 0x07 0x34 0x20 # CHECK: seb $6, $7
 0x7c 0x07 0x36 0x20 # CHECK: seh $6, $7
 0x7c 0x0e 0x18 0xa4 # CHECK: dsbh $3, $14
+0x7c 0x0e 0x19 0x64 # CHECK: dshd $3, $14
+0x7c 0x1c 0x38 0xa4 # CHECK: dsbh $7, $gp
 0x7c 0x1d 0x11 0x64 # CHECK: dshd $2, $sp
 0x7d 0x33 0x61 0x84 # CHECK: ins $19, $9, 6, 7
+0x7f 0x87 0xf7 0x43 # CHECK: dext $7, $gp, 29, 31
+0x7f 0x94 0x7b 0xc7 # CHECK: dins $20, $gp, 15, 1
 0x80 0xa4 0x23 0xc6 # CHECK: lb $4, 9158($5)
+0x81 0x58 0xc7 0x4d # CHECK: lb $24, -14515($10)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
 0x84 0xa4 0x00 0x0c # CHECK: lh $4, 12($5)
+0x86 0xab 0xde 0x94 # CHECK: lh $11, -8556($21)
 0x88 0x82 0x00 0x03 # CHECK: lwl $2, 3($4)
+0x89 0xf4 0xef 0x79 # CHECK: lwl $20, -4231($15)
+0x8c 0x3b 0xc4 0xcd # CHECK: lw $27, -15155($1)
 0x8c 0xa4 0x00 0x18 # CHECK: lw $4, 24($5)
+0x8c 0xa8 0x16 0x2a # CHECK: lw $8, 5674($5)
+0x90 0x68 0x75 0xf3 # CHECK: lbu $8, 30195($3)
 0x90 0xa4 0x00 0x06 # CHECK: lbu $4, 6($5)
+0x94 0x53 0xa6 0xbd # CHECK: lhu $19, -22851($2)
 0x98 0xa3 0x00 0x10 # CHECK: lwr $3, 16($5)
+0x9b 0x80 0xb5 0x35 # CHECK: lwr $zero, -19147($gp)
+0x9c 0x63 0xf9 0x2e # CHECK: lwu $3, -1746($3)
 0x9c 0x73 0xa1 0xea # CHECK: lwu $19, -24086($3)
 0xa0 0xa4 0x00 0x06 # CHECK: sb $4, 6($5)
 0xa0 0xa4 0x23 0xc6 # CHECK: sb $4, 9158($5)
+0xa1 0xd6 0xb2 0x6f # CHECK: sb $22, -19857($14)
 0xa4 0xa4 0x23 0xc6 # CHECK: sh $4, 9158($5)
+0xa5 0xee 0xe5 0xd0 # CHECK: sh $14, -6704($15)
 0xa8 0xa4 0x00 0x10 # CHECK: swl $4, 16($5)
+0xaa 0x6f 0x35 0x7e # CHECK: swl $15, 13694($19)
+0xac 0x3a 0xc4 0xc9 # CHECK: sw $26, -15159($1)
 0xac 0xa4 0x00 0x18 # CHECK: sw $4, 24($5)
+0xaf 0xbf 0xd8 0x50 # CHECK: sw $ra, -10160($sp)
 0xb3 0xc7 0xae 0x1f # CHECK: sdl $7, -20961($fp)
 0xb5 0x8b 0xb0 0x39 # CHECK: sdr $11, -20423($12)
 0xb8 0xe6 0x00 0x10 # CHECK: swr $6, 16($7)
+0xb9 0xd1 0x98 0x22 # CHECK: swr $17, -26590($14)
+0xbc 0x61 0x00 0x02 # CHECK: cache 1, 2($3)
+0xbc 0x80 0xb7 0xd2 # CHECK: cache 0, -18478($4)           
 0xc0 0xe9 0x23 0xc6 # CHECK: ll $9, 9158($7)
+0xc2 0x42 0xe3 0x67 # CHECK: ll $2, -7321($18)
 0xc4 0xe9 0x23 0xc6 # CHECK: lwc1 $f9, 9158($7)
+0xc7 0x50 0x27 0xf1 # CHECK: lwc1 $f16, 10225($26)
+0xc8 0xc8 0x23 0xca # CHECK: lwc2 $8, 9162($6)
+0xc8 0xd2 0xfc 0xb7 # CHECK: lwc2 $18, -841($6)
+0xcc 0x43 0x00 0x04 # CHECK: pref 3, 4($2)
+0xcf 0x00 0x00 0x00 # CHECK: pref 0, 0($24)               
 0xd3 0xe0 0xc6 0x70 # CHECK: lld $zero, -14736($ra)
 0xd4 0xe9 0x23 0xc6 # CHECK: ldc1 $f9, 9158($7)
+0xd6 0x0a 0x40 0x07 # CHECK: ldc1 $f10, 16391($16)
+0xd8 0x07 0x34 0x20 # CHECK: ldc2 $7, 13344($zero)        
+0xd8 0x28 0xad 0x43 # CHECK: ldc2 $8, -21181($1)
+0xd9 0x03 0x23 0xca # CHECK: ldc2  $3, 9162($8)
+0xdc 0x1a 0x0f 0x76 # CHECK: ld $26, 3958($zero)
 0xde 0x3d 0x90 0x1b # CHECK: ld $sp, -28645($17)
 0xe0 0xe9 0x23 0xc6 # CHECK: sc $9, 9158($7)
+0xe2 0x6f 0x49 0xd8 # CHECK: sc $15, 18904($19)
 0xe4 0xe9 0x23 0xc6 # CHECK: swc1 $f9, 9158($7)
+0xe7 0x06 0xde 0xef # CHECK: swc1 $f6, -8465($24)
+0xe8 0xe9 0x23 0xc6 # CHECK: swc2 $9, 9158($7)
+0xea 0x19 0x61 0x30 # CHECK: swc2 $25, 24880($16)
 0xf3 0xaf 0xdf 0xcd # CHECK: scd $15, -8243($sp)
 0xf4 0xe9 0x23 0xc6 # CHECK: sdc1 $f9, 9158($7)
+0xf5 0xbe 0x77 0x6e # CHECK: sdc1 $f30, 30574($13)
+0xf8 0xe9 0x23 0xc6 # CHECK: sdc2  $9, 9158($7)
+0xfa 0x54 0x5a 0x75 # CHECK: sdc2 $20, 23157($18)
+0xfc 0x06 0x45 0x67 # CHECK: sd $6, 17767($zero)
 0xfd 0x4c 0x16 0xcb # CHECK: sd $12, 5835($10)
diff --git a/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt b/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt
new file mode 100644
index 000000000000..e1fc42c94380
--- /dev/null
+++ b/test/MC/Disassembler/Mips/mips64r5/valid-xfail.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -disassemble -mcpu=mips64r5 | FileCheck %s
+# XFAIL: *
+0x10 0x00 0x00 0x02 # CHECK: b 8
+0x10 0x00 0x00 0x05 # CHECK: b 20
+0x10 0x00 0x28 0x09 # CHECK: b 40996
+0x10 0x04 0x14 0xe1 # CHECK: beq $zero, $4, 21380
+0x11 0x00 0x00 0xc3 # CHECK: beqz $8, 780
+0x12 0x88 0x00 0x16 # CHECK: beq $20, $8, 88
+0x15 0x00 0x88 0x13 # CHECK: bnez $8, -122804
+0x15 0x8a 0x9f 0x89 # CHECK: bne $12, $10, -98780
+0x50 0xc7 0x07 0xf2 # CHECK: beql $6, $7, 8136
+0x7c 0x48 0xc7 0x00 # CHECK: ext $8, $2, 28, 25              
+0xc2 0x44 0xe3 0x67 # CHECK: lwc0 $4, -7321($18)
+0xe2 0x64 0x49 0xd8 # CHECK: swc0 $4, 18904($19)
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
index 157e33593e37..08f5e04ab4fa 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt
@@ -91,7 +91,7 @@
 0x00 0x60 0x7e 0x41 # CHECK: di $fp
 0x9a 0x10 0x64 0x00 # CHECK: div $2, $3, $4
 0x9b 0x10 0x64 0x00 # CHECK: divu $2, $3, $4
-0xd5 0x10 0x64 0x00 # CHECK: dlsa $2, $3, $4, 3
+0xd5 0x10 0x64 0x00 # CHECK: dlsa $2, $3, $4, 4
 0x00 0x50 0x38 0x40 # CHECK: dmfc0 $24, $10, 0
 0xde 0x10 0x64 0x00 # CHECK: dmod $2, $3, $4
 0xdf 0x10 0x64 0x00 # CHECK: dmodu $2, $3, $4
@@ -111,7 +111,7 @@
 0x48 0x3c 0x58 0xec # CHECK: ldpc $2, 123456
 0xb6 0xb3 0x42 0x7e # CHECK: ll $2, -153($18)
 0x37 0x38 0xe0 0x7f # CHECK: lld $zero, 112($ra)
-0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 3
+0xc5 0x10 0x64 0x00 # CHECK: lsa $2, $3, $4, 4
 0xb7 0x34 0x52 0x49 # CHECK: lwc2 $18, -841($6)
 0x43 0x00 0x48 0xec # CHECK: lwpc $2, 268
 0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268
@@ -128,6 +128,8 @@
 0x1e 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4
 0xda 0x10 0x64 0x00 # CHECK: mod $2, $3, $4
 0xdb 0x10 0x64 0x00 # CHECK: modu $2, $3, $4
+0x25 0x78 0xe0 0x03 # CHECK: move $15, $ra
+0x2d 0x78 0xe0 0x03 # CHECK: move $15, $ra
 0x01 0x78 0x89 0x40 # CHECK: mtc0 $9, $15, 1
 0x99 0x18 0x24 0x46 # CHECK: msubf.d $f2, $f3, $f4
 0x99 0x18 0x04 0x46 # CHECK: msubf.s $f2, $f3, $f4
diff --git a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
index 45379d9c8988..7fa27a7c5420 100644
--- a/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
+++ b/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt
@@ -18,8 +18,8 @@
 0x00 0x64 0x10 0x9d # CHECK: dmulu $2, $3, $4
 0x00 0x64 0x10 0x9e # CHECK: ddiv $2, $3, $4
 0x00 0x64 0x10 0x9f # CHECK: ddivu $2, $3, $4
-0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 3
-0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 3
+0x00 0x64 0x10 0xc5 # CHECK: lsa $2, $3, $4, 4
+0x00 0x64 0x10 0xd5 # CHECK: dlsa $2, $3, $4, 4
 0x00 0x64 0x10 0xd8 # CHECK: muh $2, $3, $4
 0x00 0x64 0x10 0xd9 # CHECK: muhu $2, $3, $4
 0x00 0x64 0x10 0xda # CHECK: mod $2, $3, $4
@@ -45,6 +45,8 @@
 0x02 0xdc 0x00 0x31 # CHECK: tgeu $22, $gp
 0x03 0x20 0x80 0x52 # CHECK: dclz $16, $25
 0x03 0x80 0xe8 0x50 # CHECK: clz $sp, $gp
+0x03 0xe0 0x78 0x25 # CHECK: move $15, $ra
+0x03 0xe0 0x78 0x2d # CHECK: move $15, $ra
 0x04 0x11 0x14 0x9b # CHECK: bal 21104
 0x04 0x66 0x56 0x78 # CHECK: dahi $3, 22136
 0x04 0x7e 0xab 0xcd # CHECK: dati $3, -21555
@@ -143,6 +145,7 @@
 0x46 0xa4 0x18 0x8d # CHECK: cmp.sult.d $f2, $f3, $f4
 0x46 0xa4 0x18 0x8e # CHECK: cmp.sle.d $f2, $f3, $f4
 0x46 0xa4 0x18 0x8f # CHECK: cmp.sule.d $f2, $f3, $f4
+0x42 0x00 0x00 0x58 # CHECK: eretnc
 # FIXME: The encode/decode functions are not inverses of each other.
 #        The immediate should be 8 but the disassembler currently emits 12
 0x49 0x20 0x00 0x02 # CHECK: bc2eqz $0, 12
diff --git a/test/MC/Disassembler/Mips/msa/test_elm.txt b/test/MC/Disassembler/Mips/msa/test_elm.txt
index 832587b23412..e7322858b8c9 100644
--- a/test/MC/Disassembler/Mips/msa/test_elm.txt
+++ b/test/MC/Disassembler/Mips/msa/test_elm.txt
@@ -5,7 +5,6 @@
 0x78 0xb1 0x2d 0x99 # CHECK:        copy_s.w        $22, $w5[1]
 0x78 0xc4 0xa5 0x99 # CHECK:        copy_u.b        $22, $w20[4]
 0x78 0xe0 0x25 0x19 # CHECK:        copy_u.h        $20, $w4[0]
-0x78 0xf2 0x6f 0x99 # CHECK:        copy_u.w        $fp, $w13[2]
 0x78 0x04 0xe8 0x19 # CHECK:        sldi.b          $w0, $w29[4]
 0x78 0x20 0x8a 0x19 # CHECK:        sldi.h          $w8, $w17[0]
 0x78 0x32 0xdd 0x19 # CHECK:        sldi.w          $w20, $w27[2]
diff --git a/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
index 70c831ac2743..bd4e64faa0c2 100644
--- a/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
+++ b/test/MC/Disassembler/Mips/msa/test_elm_msa64.txt
@@ -1,6 +1,3 @@
 # RUN: llvm-mc --disassemble %s -triple=mips64-unknown-linux -mcpu=mips64r2 -mattr=+msa | FileCheck %s
 
-# CHECK:        copy_s.d        $19, $w31[0]
-0x78 0xb8 0xfc 0xd9
-# CHECK:        copy_u.d        $18, $w29[1]
-0x78 0xf9 0xec 0x99
+0x78 0xb8 0xfc 0xd9 # CHECK:        copy_s.d        $19, $w31[0]
diff --git a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
index f235c242fbce..74023340d500 100644
--- a/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64-encoding.txt
@@ -508,10 +508,10 @@
 # CHECK: extsh. 2, 3                     
 0x7c 0x62 0x07 0x35
 
-# CHECK: cntlz 2, 3
+# CHECK: cntlzw 2, 3
 0x7c 0x62 0x00 0x34
 
-# CHECK: cntlz. 2, 3
+# CHECK: cntlzw. 2, 3
 0x7c 0x62 0x00 0x35
 
 # CHECK: popcntw 2, 3                    
diff --git a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
index f154e00ff51c..9ddc286d8aaa 100644
--- a/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
+++ b/test/MC/Disassembler/PowerPC/ppc64le-encoding.txt
@@ -508,10 +508,10 @@
 # CHECK: extsh. 2, 3
 0x35 0x07 0x62 0x7c
 
-# CHECK: cntlz 2, 3
+# CHECK: cntlzw 2, 3
 0x34 0x00 0x62 0x7c
 
-# CHECK: cntlz. 2, 3
+# CHECK: cntlzw. 2, 3
 0x35 0x00 0x62 0x7c
 
 # CHECK: popcntw 2, 3
diff --git a/test/MC/Disassembler/PowerPC/vsx.txt b/test/MC/Disassembler/PowerPC/vsx.txt
index 6f4ba6f6b9ac..37fd17b015ab 100644
--- a/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/test/MC/Disassembler/PowerPC/vsx.txt
@@ -57,6 +57,9 @@
 # CHECK: xscvdpsp 7, 27
 0xf0 0xe0 0xdc 0x24
 
+# CHECK: xscvdpspn 7, 27
+0xf0 0xe0 0xdc 0x2c
+
 # CHECK: xscvdpsxds 7, 27
 0xf0 0xe0 0xdd 0x60
 
@@ -72,9 +75,18 @@
 # CHECK: xscvspdp 7, 27
 0xf0 0xe0 0xdd 0x24
 
+# CHECK: xscvspdpn 7, 27
+0xf0 0xe0 0xdd 0x2c
+
+# CHECK: xscvsxdsp 7, 27
+0xf0 0xe0 0xdc 0xe0
+
 # CHECK: xscvsxddp 7, 27
 0xf0 0xe0 0xdd 0xe0
 
+# CHECK: xscvuxdsp 7, 27
+0xf0 0xe0 0xdc 0xa0
+
 # CHECK: xscvuxddp 7, 27
 0xf0 0xe0 0xdd 0xa0
 
diff --git a/test/MC/Disassembler/Sparc/sparc-mem.txt b/test/MC/Disassembler/Sparc/sparc-mem.txt
index 5f8886ef8b76..04a0365cc7c1 100644
--- a/test/MC/Disassembler/Sparc/sparc-mem.txt
+++ b/test/MC/Disassembler/Sparc/sparc-mem.txt
@@ -221,3 +221,27 @@
 
 # CHECK:     swapa [%g1] 131, %o2
 0xd4 0xf8 0x50 0x60
+
+# CHECK:     ldd [%i0+%l6], %o2
+0xd4 0x1e 0x00 0x16
+
+# CHECK:     ldd [%i0+32], %o2
+0xd4 0x1e 0x20 0x20
+
+# CHECK:     ldd [%g1], %o2
+0xd4 0x18 0x60 0x00
+
+# CHECK:     ldd [%g1], %o2
+0xd4 0x18 0x40 0x00
+
+# CHECK:     std %o2, [%i0+%l6]
+0xd4 0x3e 0x00 0x16
+
+# CHECK:     std %o2, [%i0+32]
+0xd4 0x3e 0x20 0x20
+
+# CHECK:     std %o2, [%g1]
+0xd4 0x38 0x60 0x00
+
+# CHECK:     std %o2, [%g1]
+0xd4 0x38 0x40 0x00
diff --git a/test/MC/Disassembler/Sparc/sparc-v9.txt b/test/MC/Disassembler/Sparc/sparc-v9.txt
new file mode 100644
index 000000000000..b8ca01ce04ee
--- /dev/null
+++ b/test/MC/Disassembler/Sparc/sparc-v9.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=sparcv9-unknown-linux | FileCheck %s
+
+# CHECK: popc %g1, %g2
+0x85 0x70 0x00 0x01
diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt
index 9d3f2b08743a..17c3a45b3e41 100644
--- a/test/MC/Disassembler/SystemZ/insns.txt
+++ b/test/MC/Disassembler/SystemZ/insns.txt
@@ -7537,7 +7537,79 @@
 # CHECK: stcy %r0, -524288
 0xe3 0x00 0x00 0x00 0x80 0x72
 
-# CHECK: stcy %r0, -1
+# CHECK: stck 0
+0xb2 0x05 0x00 0x00
+
+# CHECK: stck 0(%r1)
+0xb2 0x05 0x10 0x00
+
+#CHECK: stck   0(%r15)
+0xb2 0x05 0xf0 0x00
+
+#CHECK: stck	4095
+0xb2 0x05 0x0f 0xff 
+
+#CHECK: stck	4095(%r1)
+0xb2 0x05 0x1f 0xff
+
+#CHECK: stck   4095(%r15)
+0xb2 0x05 0xff 0xff
+
+# CHECK: stckf 0
+0xb2 0x7c 0x00 0x00
+
+# CHECK: stckf 0(%r1)
+0xb2 0x7c 0x10 0x00
+
+#CHECK: stckf   0(%r15)
+0xb2 0x7c 0xf0 0x00
+
+#CHECK: stckf	4095
+0xb2 0x7c 0x0f 0xff 
+
+#CHECK: stckf	4095(%r1)
+0xb2 0x7c 0x1f 0xff
+
+#CHECK: stckf   4095(%r15)
+0xb2 0x7c 0xff 0xff
+
+# CHECK: stcke 0
+0xb2 0x78 0x00 0x00
+
+# CHECK: stcke 0(%r1)
+0xb2 0x78 0x10 0x00
+
+#CHECK: stcke   0(%r15)
+0xb2 0x78 0xf0 0x00
+
+#CHECK: stcke	4095
+0xb2 0x78 0x0f 0xff 
+
+#CHECK: stcke	4095(%r1)
+0xb2 0x78 0x1f 0xff
+
+#CHECK: stcke   4095(%r15)
+0xb2 0x78 0xff 0xff
+
+# CHECK: stfle 0
+0xb2 0xb0 0x00 0x00
+
+# CHECK: stfle 0(%r1)
+0xb2 0xb0 0x10 0x00
+
+#CHECK: stfle   0(%r15)
+0xb2 0xb0 0xf0 0x00
+
+#CHECK: stfle	4095
+0xb2 0xb0 0x0f 0xff 
+
+#CHECK: stfle	4095(%r1)
+0xb2 0xb0 0x1f 0xff
+
+#CHECK: stfle   4095(%r15)
+0xb2 0xb0 0xff 0xff
+
+# CHECK: stcy  %r0, -1
 0xe3 0x00 0x0f 0xff 0xff 0x72
 
 # CHECK: stcy %r0, 0
diff --git a/test/MC/Disassembler/X86/x86-64.txt b/test/MC/Disassembler/X86/x86-64.txt
index 065b2a57c844..13e36df002a4 100644
--- a/test/MC/Disassembler/X86/x86-64.txt
+++ b/test/MC/Disassembler/X86/x86-64.txt
@@ -302,6 +302,117 @@
 # CHECK: movq %rax, 1515870810
 0x67, 0x48 0xa3 0x5a 0x5a 0x5a 0x5a
 
+# CHECK: callq -32769
+0x66 0xe8 0xff 0x7f 0xff 0xff
+
+# CHECK: callq -32769
+0x66 0x66 0x48 0xe8 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0x66 0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jmp -32769
+0x66 0x66 0x48 0xe9 0xff 0x7f 0xff 0xff
+
+# CHECK: jb -32769
+0x0f 0x82 0xff 0x7f 0xff 0xff
+
+# CHECK: jb -32769
+0x66 0x0f 0x82 0xff 0x7f 0xff 0xff
+
+# CHECK: jae -32769
+0x0f 0x83 0xff 0x7f 0xff 0xff
+
+# CHECK: jae -32769
+0x66 0x0f 0x83 0xff 0x7f 0xff 0xff
+
+# CHECK: je -32769
+0x0f 0x84 0xff 0x7f 0xff 0xff
+
+# CHECK: je -32769
+0x66 0x0f 0x84 0xff 0x7f 0xff 0xff
+
+# CHECK: jne -32769
+0x0f 0x85 0xff 0x7f 0xff 0xff
+
+# CHECK: jne -32769
+0x66 0x0f 0x85 0xff 0x7f 0xff 0xff
+
+# CHECK: jbe -32769
+0x0f 0x86 0xff 0x7f 0xff 0xff
+
+# CHECK: jbe -32769
+0x66 0x0f 0x86 0xff 0x7f 0xff 0xff
+
+# CHECK: ja -32769
+0x0f 0x87 0xff 0x7f 0xff 0xff
+
+# CHECK: ja -32769
+0x66 0x0f 0x87 0xff 0x7f 0xff 0xff
+
+# CHECK: js -32769
+0x0f 0x88 0xff 0x7f 0xff 0xff
+
+# CHECK: js -32769
+0x66 0x0f 0x88 0xff 0x7f 0xff 0xff
+
+# CHECK: jns -32769
+0x0f 0x89 0xff 0x7f 0xff 0xff
+
+# CHECK: jns -32769
+0x66 0x0f 0x89 0xff 0x7f 0xff 0xff
+
+# CHECK: jp -32769
+0x0f 0x8a 0xff 0x7f 0xff 0xff
+
+# CHECK: jp -32769
+0x66 0x0f 0x8a 0xff 0x7f 0xff 0xff
+
+# CHECK: jnp -32769
+0x0f 0x8b 0xff 0x7f 0xff 0xff
+
+# CHECK: jnp -32769
+0x66 0x0f 0x8b 0xff 0x7f 0xff 0xff
+
+# CHECK: jl -32769
+0x0f 0x8c 0xff 0x7f 0xff 0xff
+
+# CHECK: jl -32769
+0x66 0x0f 0x8c 0xff 0x7f 0xff 0xff
+
+# CHECK: jge -32769
+0x0f 0x8d 0xff 0x7f 0xff 0xff
+
+# CHECK: jge -32769
+0x66 0x0f 0x8d 0xff 0x7f 0xff 0xff
+
+# CHECK: jle -32769
+0x0f 0x8e 0xff 0x7f 0xff 0xff
+
+# CHECK: jle -32769
+0x66 0x0f 0x8e 0xff 0x7f 0xff 0xff
+
+# CHECK: jg -32769
+0x0f 0x8f 0xff 0x7f 0xff 0xff
+
+# CHECK: jg -32769
+0x66 0x0f 0x8f 0xff 0x7f 0xff 0xff
+
+# CHECK: lcallw	*-32769(%rip)
+0x66 0xff 0x1d 0xff 0x7f 0xff 0xff
+
+# CHECK: ljmpw	*-32769(%rip)
+0x66 0xff 0x2d 0xff 0x7f 0xff 0xff
+
+# CHECK: psubsb	(%rdx), %mm3
+0x0f 0xe8 0x1a
+
+# CHECK: psubsb	(%rdx), %xmm3
+0x66 0x0f 0xe8 0x1a
+
 # CHECK: addq 255(%rip), %rbx
 0x49, 0x03, 0x1d, 0xff, 0x00, 0x00, 0x00
 
diff --git a/test/MC/ELF/ARM/directive-type-diagnostics.s b/test/MC/ELF/ARM/directive-type-diagnostics.s
new file mode 100644
index 000000000000..b166ffd06aab
--- /dev/null
+++ b/test/MC/ELF/ARM/directive-type-diagnostics.s
@@ -0,0 +1,10 @@
+// RUN: not llvm-mc -triple arm-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple armeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+// RUN: not llvm-mc -triple thumbeb-elf -filetype asm -o /dev/null %s 2>&1 | FileCheck %s
+
+        .type symbol 32
+// CHECK: error: expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '%<type>' or "<type>"
+// CHECK: .type symbol 32
+// CHECK:              ^
+
diff --git a/test/MC/ELF/align-zero.s b/test/MC/ELF/align-zero.s
new file mode 100644
index 000000000000..d8087d127d26
--- /dev/null
+++ b/test/MC/ELF/align-zero.s
@@ -0,0 +1,4 @@
+// Test that an alignment of zero is accepted.
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o -
+
+  .align 0
diff --git a/test/MC/ELF/align.s b/test/MC/ELF/align.s
index 43f5b548d9ee..7e78298540f5 100644
--- a/test/MC/ELF/align.s
+++ b/test/MC/ELF/align.s
@@ -1,22 +1,22 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
 // Test that the alignment of rodata doesn't force a alignment of the
-// previous section (.bss)
+// previous section (.text)
 
 	nop
 	.section	.rodata,"a",@progbits
 	.align	8
 
 // CHECK:        Section {
-// CHECK:          Name: .bss
-// CHECK-NEXT:     Type: SHT_NOBITS
+// CHECK:          Name: .text
+// CHECK-NEXT:     Type:
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
-// CHECK-NEXT:       SHF_WRITE
+// CHECK-NEXT:       SHF_EXECINSTR
 // CHECK-NEXT:     ]
-// CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x44
-// CHECK-NEXT:     Size: 0
+// CHECK-NEXT:     Address:
+// CHECK-NEXT:     Offset:
+// CHECK-NEXT:     Size:
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
 // CHECK-NEXT:     AddressAlignment: 4
diff --git a/test/MC/ELF/cfi-adjust-cfa-offset.s b/test/MC/ELF/cfi-adjust-cfa-offset.s
index 398ad54fe75b..7177ccb36501 100644
--- a/test/MC/ELF/cfi-adjust-cfa-offset.s
+++ b/test/MC/ELF/cfi-adjust-cfa-offset.s
@@ -24,7 +24,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-advance-loc2.s b/test/MC/ELF/cfi-advance-loc2.s
index 133979201273..6a60e52acbac 100644
--- a/test/MC/ELF/cfi-advance-loc2.s
+++ b/test/MC/ELF/cfi-advance-loc2.s
@@ -12,7 +12,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-def-cfa-offset.s b/test/MC/ELF/cfi-def-cfa-offset.s
index 3e4ca57a5161..c4cc6d53e020 100644
--- a/test/MC/ELF/cfi-def-cfa-offset.s
+++ b/test/MC/ELF/cfi-def-cfa-offset.s
@@ -13,7 +13,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-def-cfa-register.s b/test/MC/ELF/cfi-def-cfa-register.s
index 53174cb2f552..708f6b1496e2 100644
--- a/test/MC/ELF/cfi-def-cfa-register.s
+++ b/test/MC/ELF/cfi-def-cfa-register.s
@@ -9,7 +9,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-def-cfa.s b/test/MC/ELF/cfi-def-cfa.s
index 9706c4da097b..25931b77ec61 100644
--- a/test/MC/ELF/cfi-def-cfa.s
+++ b/test/MC/ELF/cfi-def-cfa.s
@@ -9,7 +9,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-escape.s b/test/MC/ELF/cfi-escape.s
index e93d5f7be5f0..fb019be1252f 100644
--- a/test/MC/ELF/cfi-escape.s
+++ b/test/MC/ELF/cfi-escape.s
@@ -10,7 +10,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-large-model.s b/test/MC/ELF/cfi-large-model.s
index f4a20c26267c..2fb63d183712 100644
--- a/test/MC/ELF/cfi-large-model.s
+++ b/test/MC/ELF/cfi-large-model.s
@@ -4,7 +4,7 @@
 // CHECK:      Section {
 // CHECK:        Index: 
 // CHECK:        Name: .eh_frame
-// CHECK-NEXT:   Type: SHT_PROGBITS
+// CHECK-NEXT:   Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:   Flags [
 // CHECK-NEXT:     SHF_ALLOC
 // CHECK-NEXT:   ]
diff --git a/test/MC/ELF/cfi-offset.s b/test/MC/ELF/cfi-offset.s
index 9038def03a64..ea9d0f49915c 100644
--- a/test/MC/ELF/cfi-offset.s
+++ b/test/MC/ELF/cfi-offset.s
@@ -9,7 +9,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-register.s b/test/MC/ELF/cfi-register.s
index be60e2ea2e81..f7c021d06b8b 100644
--- a/test/MC/ELF/cfi-register.s
+++ b/test/MC/ELF/cfi-register.s
@@ -10,7 +10,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-rel-offset.s b/test/MC/ELF/cfi-rel-offset.s
index 892cda165ba6..89ca4ff148a6 100644
--- a/test/MC/ELF/cfi-rel-offset.s
+++ b/test/MC/ELF/cfi-rel-offset.s
@@ -17,7 +17,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-rel-offset2.s b/test/MC/ELF/cfi-rel-offset2.s
index 805b0b290bc8..e4d6b58c748f 100644
--- a/test/MC/ELF/cfi-rel-offset2.s
+++ b/test/MC/ELF/cfi-rel-offset2.s
@@ -9,7 +9,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-remember.s b/test/MC/ELF/cfi-remember.s
index ae112f59ebef..c438ad3d0f3e 100644
--- a/test/MC/ELF/cfi-remember.s
+++ b/test/MC/ELF/cfi-remember.s
@@ -12,7 +12,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-restore.s b/test/MC/ELF/cfi-restore.s
index 2bc87cf691a1..eb968fecbc4c 100644
--- a/test/MC/ELF/cfi-restore.s
+++ b/test/MC/ELF/cfi-restore.s
@@ -10,7 +10,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-same-value.s b/test/MC/ELF/cfi-same-value.s
index 7df4cbbb6dd1..7d744ab59638 100644
--- a/test/MC/ELF/cfi-same-value.s
+++ b/test/MC/ELF/cfi-same-value.s
@@ -10,7 +10,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-signal-frame.s b/test/MC/ELF/cfi-signal-frame.s
index 023311962189..334cdb497b9f 100644
--- a/test/MC/ELF/cfi-signal-frame.s
+++ b/test/MC/ELF/cfi-signal-frame.s
@@ -11,7 +11,7 @@ g:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-undefined.s b/test/MC/ELF/cfi-undefined.s
index 9a7012310ade..50c482df79e5 100644
--- a/test/MC/ELF/cfi-undefined.s
+++ b/test/MC/ELF/cfi-undefined.s
@@ -10,7 +10,7 @@ f:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-version.ll b/test/MC/ELF/cfi-version.ll
index 9ea7be4a42b5..6bb9f8ba8fe7 100644
--- a/test/MC/ELF/cfi-version.ll
+++ b/test/MC/ELF/cfi-version.ll
@@ -1,12 +1,14 @@
 ; RUN: %llc_dwarf %s -o - -dwarf-version 2 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF2
 ; RUN: %llc_dwarf %s -o - -dwarf-version 3 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF3
 ; RUN: %llc_dwarf %s -o - -dwarf-version 4 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
+; RUN: %llc_dwarf %s -o - -dwarf-version 5 -filetype=obj | llvm-dwarfdump - | FileCheck %s --check-prefix=DWARF4
 
 ; .debug_frame is not emitted for targeting Windows x64.
 ; REQUIRES: debug_frame
+; REQUIRES: default_triple
 
 ; Function Attrs: nounwind
-define i32 @foo() #0 {
+define i32 @foo() #0 !dbg !4 {
 entry:
   %call = call i32 bitcast (i32 (...)* @bar to i32 ()*)(), !dbg !12
   %add = add nsw i32 %call, 1, !dbg !12
@@ -22,11 +24,11 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
diff --git a/test/MC/ELF/cfi-window-save.s b/test/MC/ELF/cfi-window-save.s
index fb38ecd85608..01d7ef433781 100644
--- a/test/MC/ELF/cfi-window-save.s
+++ b/test/MC/ELF/cfi-window-save.s
@@ -12,7 +12,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi-zero-addr-delta.s b/test/MC/ELF/cfi-zero-addr-delta.s
index 1e5c5e7e5680..495d52c234d7 100644
--- a/test/MC/ELF/cfi-zero-addr-delta.s
+++ b/test/MC/ELF/cfi-zero-addr-delta.s
@@ -16,7 +16,7 @@ f:
 
 // CHECK:        Section {
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/cfi.s b/test/MC/ELF/cfi.s
index 10587e030521..1e9a39d2bcfd 100644
--- a/test/MC/ELF/cfi.s
+++ b/test/MC/ELF/cfi.s
@@ -220,7 +220,7 @@ f37:
 // CHECK:        Section {
 // CHECK:          Index:
 // CHECK:          Name: .eh_frame
-// CHECK-NEXT:     Type: SHT_PROGBITS
+// CHECK-NEXT:     Type: SHT_X86_64_UNWIND
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
 // CHECK-NEXT:     ]
diff --git a/test/MC/ELF/comdat-dup-group-name.s b/test/MC/ELF/comdat-dup-group-name.s
index e52f3dcc04d1..e11cba62b402 100644
--- a/test/MC/ELF/comdat-dup-group-name.s
+++ b/test/MC/ELF/comdat-dup-group-name.s
@@ -2,27 +2,27 @@
 
 // Test that we produce two foo sections, each in separate groups
 
+// CHECK: Index: 3
+// CHECK-NEXT: Name: .group
+
+// CHECK: Index: 4
+// CHECK-NEXT: Name: .foo
+
 // CHECK: Index: 5
 // CHECK-NEXT: Name: .group
 
 // CHECK: Index: 6
 // CHECK-NEXT: Name: .foo
 
-// CHECK: Index: 7
-// CHECK-NEXT: Name: .group
-
-// CHECK: Index: 8
-// CHECK-NEXT: Name: .foo
-
 // CHECK: Symbols [
 
 // CHECK: Name: f1
 // CHECK-NOT: }
-// CHECK: Section: .group (0x5)
+// CHECK: Section: .group (0x3)
 
 // CHECK: Name: f2
 // CHECK-NOT: }
-// CHECK: Section: .group (0x7)
+// CHECK: Section: .group (0x5)
 
 	.section	.foo,"axG",@progbits,f1,comdat
         nop
diff --git a/test/MC/ELF/comdat-reloc.s b/test/MC/ELF/comdat-reloc.s
index 1ea3d1e57efa..bc126f67fee0 100644
--- a/test/MC/ELF/comdat-reloc.s
+++ b/test/MC/ELF/comdat-reloc.s
@@ -16,14 +16,14 @@ world:
 // CHECK:  Name: .group
 // CHECK-NOT: SectionData
 // CHECK: SectionData
-// CHECK-NEXT: 0000: 01000000 07000000 08000000
+// CHECK-NEXT: 0000: 01000000 05000000 06000000
 
-// CHECK: Index: 7
+// CHECK: Index: 5
 // CHECK-NEXT: Name: .text.world
 // CHECK-NOT: Section {
 // CHECK: SHF_GROUP
 
-// CHECK: Index: 8
+// CHECK: Index: 6
 // CHECK-NEXT: Name: .rela.text.world
 // CHECK-NOT: Section {
 // CHECK: SHF_GROUP
diff --git a/test/MC/ELF/comdat.s b/test/MC/ELF/comdat.s
index 18da17e6118c..5e6fc64bac41 100644
--- a/test/MC/ELF/comdat.s
+++ b/test/MC/ELF/comdat.s
@@ -3,7 +3,7 @@
 // Test that we produce the group sections and that they are before the members
 
 // CHECK:        Section {
-// CHECK:          Index: 5
+// CHECK:          Index: 3
 // CHECK-NEXT:     Name: .group
 // CHECK-NEXT:     Type: SHT_GROUP
 // CHECK-NEXT:     Flags [
@@ -16,11 +16,11 @@
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 4
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000:    01000000 06000000 07000000
+// CHECK-NEXT:       0000:    01000000 04000000 05000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 // CHECK:        Section {
-// CHECK:          Index: 8
+// CHECK:          Index: 6
 // CHECK-NEXT:     Name: .group
 // CHECK-NEXT:     Type: SHT_GROUP
 // CHECK-NEXT:     Flags [
@@ -33,11 +33,11 @@
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 4
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000:    01000000 09000000
+// CHECK-NEXT:       0000:    01000000 07000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 // CHECK:        Section {
-// CHECK:          Index: 10
+// CHECK:          Index: 8
 // CHECK-NEXT:     Name: .group
 // CHECK-NEXT:     Type: SHT_GROUP
 // CHECK-NEXT:     Flags [
@@ -50,7 +50,7 @@
 // CHECK-NEXT:     AddressAlignment: 4
 // CHECK-NEXT:     EntrySize: 4
 // CHECK-NEXT:     SectionData (
-// CHECK-NEXT:       0000:    01000000 0B000000 0C000000
+// CHECK-NEXT:       0000:    01000000 09000000 0A000000
 // CHECK-NEXT:     )
 // CHECK-NEXT:   }
 
@@ -72,7 +72,7 @@
 // CHECK-NEXT:     Binding: Local
 // CHECK-NEXT:     Type: None
 // CHECK-NEXT:     Other: 0
-// CHECK-NEXT:     Section: .group (0x8)
+// CHECK-NEXT:     Section: .group
 // CHECK-NEXT:   }
 
 // CHECK:        Symbol {
diff --git a/test/MC/ELF/common-error1.s b/test/MC/ELF/common-error1.s
index a413885b0165..5ea01cddb563 100644
--- a/test/MC/ELF/common-error1.s
+++ b/test/MC/ELF/common-error1.s
@@ -3,4 +3,4 @@
         .comm   C,4,4
         .set    A,C
 
-// CHECK: Common symbol C cannot be used in assignment expr
+// CHECK: Common symbol 'C' cannot be used in assignment expr
diff --git a/test/MC/ELF/common-error2.s b/test/MC/ELF/common-error2.s
index d666feedee6a..93d35bc80d90 100644
--- a/test/MC/ELF/common-error2.s
+++ b/test/MC/ELF/common-error2.s
@@ -3,4 +3,4 @@
         .set    A,C
         .comm   C,4,4
 
-// CHECK: Common symbol C cannot be used in assignment expr
+// CHECK: Common symbol 'C' cannot be used in assignment expr
diff --git a/test/MC/ELF/common2.s b/test/MC/ELF/common2.s
index 26c32a7c8408..bf9f22f90e93 100644
--- a/test/MC/ELF/common2.s
+++ b/test/MC/ELF/common2.s
@@ -1,7 +1,8 @@
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s | FileCheck %s
 
-// Test that the common symbols are placed at the end of .bss. In this example
-// it causes .bss to have size 9 instead of 8.
+// Test local common construction.
+// Unlike gas, common symbols are created when found, not at the end of .bss.
+// In this example it causes .bss to have size 8 instead of 9.
 
 	.local	vimvardict
 	.comm	vimvardict,1,8
@@ -16,7 +17,7 @@
 // CHECK:          ]
 // CHECK-NEXT:     Address:
 // CHECK-NEXT:     Offset:
-// CHECK-NEXT:     Size: 9
+// CHECK-NEXT:     Size: 8
 // CHECK-NEXT:     Link:
 // CHECK-NEXT:     Info:
 // CHECK-NEXT:     AddressAlignment:
diff --git a/test/MC/ELF/debug-loc.s b/test/MC/ELF/debug-loc.s
index ea8eb3ec70cd..4f1487284231 100644
--- a/test/MC/ELF/debug-loc.s
+++ b/test/MC/ELF/debug-loc.s
@@ -14,7 +14,7 @@
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x44
+// CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 61
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
diff --git a/test/MC/ELF/div-by-zero.s b/test/MC/ELF/div-by-zero.s
new file mode 100644
index 000000000000..8c7f77346551
--- /dev/null
+++ b/test/MC/ELF/div-by-zero.s
@@ -0,0 +1,6 @@
+// Check that llvm-mc doesn't crash on division by zero.
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+// CHECK: expected relocatable expression
+.int 1/0
diff --git a/test/MC/ELF/dot-symbol-assignment.s b/test/MC/ELF/dot-symbol-assignment.s
index 00fe205082be..284bef0f3372 100644
--- a/test/MC/ELF/dot-symbol-assignment.s
+++ b/test/MC/ELF/dot-symbol-assignment.s
@@ -11,6 +11,9 @@ two:
 three:
 	.quad 0xdddddddddddddddd
 
+        .align 4
+        . = three + 9
+
 // CHECK:        Section {
 // CHECK:          Name: .text
 // CHECK-NEXT:     Type:
@@ -18,5 +21,5 @@ three:
 // CHECK:          SectionData (
 // CHECK-NEXT:     0000: FFFFFFFF FFFFFFFF 00000000 00000000
 // CHECK-NEXT:     0010: 00000000 00000000 EEEEEEEE EEEEEEEE
-// CHECK-NEXT:     0020: DDDDDDDD DDDDDDDD
+// CHECK-NEXT:     0020: DDDDDDDD DDDDDDDD 00 |
 // CHECK-NEXT:     )
diff --git a/test/MC/ELF/empty-twice.ll b/test/MC/ELF/empty-twice.ll
new file mode 100644
index 000000000000..c24bd629c416
--- /dev/null
+++ b/test/MC/ELF/empty-twice.ll
@@ -0,0 +1,6 @@
+; Check that there is no persistent state in the ELF emitter that crashes us
+; when we try to reuse the pass manager
+; RUN: llc -compile-twice -filetype=obj %s -o -
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
diff --git a/test/MC/ELF/empty.s b/test/MC/ELF/empty.s
index 7b686fef3a23..ea88803a31d3 100644
--- a/test/MC/ELF/empty.s
+++ b/test/MC/ELF/empty.s
@@ -10,8 +10,8 @@
 // DARWIN-NEXT:  Arch: x86_64
 // WINDOWS-NEXT: Arch: x86_64
 
-// Test that like gnu as we create text, data and bss by default. Also test
-// that symtab and strtab are listed.
+// Test that we create text by default. Also test that symtab and strtab are
+// listed.
 
 // CHECK:        Section {
 // CHECK:          Name: .strtab
@@ -20,7 +20,7 @@
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
 // CHECK-NEXT:     Offset:
-// CHECK-NEXT:     Size: 34
+// CHECK-NEXT:     Size: 23
 // CHECK-NEXT:     Link: 0
 // CHECK-NEXT:     Info: 0
 // CHECK-NEXT:     AddressAlignment: 1
@@ -42,36 +42,6 @@
 // CHECK-NEXT:     EntrySize: 0
 // CHECK-NEXT:   }
 // CHECK:        Section {
-// CHECK:          Name: .data
-// CHECK-NEXT:     Type: SHT_PROGBITS
-// CHECK-NEXT:     Flags [
-// CHECK-NEXT:       SHF_ALLOC
-// CHECK-NEXT:       SHF_WRITE
-// CHECK-NEXT:     ]
-// CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x40
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Link: 0
-// CHECK-NEXT:     Info: 0
-// CHECK-NEXT:     AddressAlignment: 4
-// CHECK-NEXT:     EntrySize: 0
-// CHECK-NEXT:   }
-// CHECK:        Section {
-// CHECK:          Name: .bss
-// CHECK-NEXT:     Type: SHT_NOBITS
-// CHECK-NEXT:     Flags [
-// CHECK-NEXT:       SHF_ALLOC
-// CHECK-NEXT:       SHF_WRITE
-// CHECK-NEXT:     ]
-// CHECK-NEXT:     Address: 0x0
-// CHECK-NEXT:     Offset: 0x40
-// CHECK-NEXT:     Size: 0
-// CHECK-NEXT:     Link: 0
-// CHECK-NEXT:     Info: 0
-// CHECK-NEXT:     AddressAlignment: 4
-// CHECK-NEXT:     EntrySize: 0
-// CHECK-NEXT:   }
-// CHECK:        Section {
 // CHECK:          Name: .symtab
 // CHECK-NEXT:     Type: SHT_SYMTAB
 // CHECK-NEXT:     Flags [
diff --git a/test/MC/ELF/many-sections-2.s b/test/MC/ELF/many-sections-2.s
index 0077552ef313..2c3e4b87bb89 100644
--- a/test/MC/ELF/many-sections-2.s
+++ b/test/MC/ELF/many-sections-2.s
@@ -114,6 +114,9 @@
         gen_sections16384 b\x
 .endm
 
+        .section foo
+        .section bar
+
 gen_sections32768 a
 gen_sections16384 b
 gen_sections8192 c
diff --git a/test/MC/ELF/many-sections-3.s b/test/MC/ELF/many-sections-3.s
index 02d30a60523b..ec198480916c 100644
--- a/test/MC/ELF/many-sections-3.s
+++ b/test/MC/ELF/many-sections-3.s
@@ -102,6 +102,8 @@ gen_sections8 l
 gen_sections4 m
 
         .section foo
+        .section foo2
+        .section foo3
         .section bar, "a"
 
 a:
diff --git a/test/MC/ELF/many-sections.s b/test/MC/ELF/many-sections.s
index 2db6abb9321b..b1348f3b7c36 100644
--- a/test/MC/ELF/many-sections.s
+++ b/test/MC/ELF/many-sections.s
@@ -103,7 +103,6 @@ gen_sections64 i
 gen_sections32 j
 gen_sections16 k
 gen_sections8 l
-        .section foo
-        .section bar
+gen_sections4 m
         .section zed
 .long zed
diff --git a/test/MC/ELF/popsection.s b/test/MC/ELF/popsection.s
index 19f55688a1b2..ace6fac5e4ce 100644
--- a/test/MC/ELF/popsection.s
+++ b/test/MC/ELF/popsection.s
@@ -6,8 +6,8 @@
         .popsection
 
 // CHECK:       Section {
-// CHECK:         Index: 5
-// CHECK-NEXT:    Name: foo
+// CHECK:         Index:
+// CHECK:         Name: foo
 // CHECK-NEXT:    Type: SHT_PROGBITS
 // CHECK-NEXT:    Flags [ (0x0)
 // CHECK-NEXT:    ]
diff --git a/test/MC/ELF/relax-arith.s b/test/MC/ELF/relax-arith.s
index 15e44ebff7ef..56b7c399c441 100644
--- a/test/MC/ELF/relax-arith.s
+++ b/test/MC/ELF/relax-arith.s
@@ -123,3 +123,35 @@ bar:
         .section push,"x"
         pushw $foo
         push  $foo
+
+// CHECK:      Disassembly of section adc:
+// CHECK-NEXT: adc:
+// CHECK-NEXT:   0: 66 81 d3 00 00                       adcw $0, %bx
+// CHECK-NEXT:   5: 66 81 14 25 00 00 00 00 00 00        adcw $0, 0
+// CHECK-NEXT:   f: 81 d3 00 00 00 00                    adcl $0, %ebx
+// CHECK-NEXT:  15: 81 14 25 00 00 00 00 00 00 00 00     adcl $0, 0
+// CHECK-NEXT:  20: 48 81 d3 00 00 00 00                 adcq $0, %rbx
+// CHECK-NEXT:  27: 48 81 14 25 00 00 00 00 00 00 00 00  adcq $0, 0
+        .section adc,"x"
+        adc  $foo, %bx
+        adcw $foo, bar
+        adc  $foo, %ebx
+        adcl $foo, bar
+        adc  $foo, %rbx
+        adcq $foo, bar
+
+// CHECK:      Disassembly of section sbb:
+// CHECK-NEXT: sbb:
+// CHECK-NEXT:   0: 66 81 db 00 00                       sbbw $0, %bx
+// CHECK-NEXT:   5: 66 81 1c 25 00 00 00 00 00 00        sbbw $0, 0
+// CHECK-NEXT:   f: 81 db 00 00 00 00                    sbbl $0, %ebx
+// CHECK-NEXT:  15: 81 1c 25 00 00 00 00 00 00 00 00     sbbl $0, 0
+// CHECK-NEXT:  20: 48 81 db 00 00 00 00                 sbbq $0, %rbx
+// CHECK-NEXT:  27: 48 81 1c 25 00 00 00 00 00 00 00 00  sbbq $0, 0
+        .section sbb,"x"
+        sbb  $foo, %bx
+        sbbw $foo, bar
+        sbb  $foo, %ebx
+        sbbl $foo, bar
+        sbb  $foo, %rbx
+        sbbq $foo, bar
diff --git a/test/MC/ELF/relocation-386.s b/test/MC/ELF/relocation-386.s
index 6b7e02f03ea0..19efe0976129 100644
--- a/test/MC/ELF/relocation-386.s
+++ b/test/MC/ELF/relocation-386.s
@@ -1,8 +1,11 @@
-// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r  | FileCheck  %s
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | llvm-readobj -r  | FileCheck  %s --check-prefix=CHECK --check-prefix=I386 
+// RUN: llvm-mc -filetype=obj -triple i386-pc-elfiamcu %s -o - | llvm-readobj -r  | FileCheck  %s --check-prefix=CHECK --check-prefix=IAMCU
 
 // Test that we produce the correct relocation types and that the relocations
 // correctly point to the section or the symbol.
 
+// IAMCU: Format: ELF32-iamcu
+// I386: Format: ELF32-i386
 // CHECK:      Relocations [
 // CHECK-NEXT:   Section {{.*}} .rel.text {
 // CHECK-NEXT:     0x2          R_386_GOTOFF     .Lfoo 0x0
diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s
index ca7addf3a620..85e97f059584 100644
--- a/test/MC/ELF/relocation-pc.s
+++ b/test/MC/ELF/relocation-pc.s
@@ -2,8 +2,9 @@
 
 // Test that we produce the correct relocation.
 
-	loope	0                 # R_X86_64_PC8
-	jmp	-256              # R_X86_64_PC32
+        loope   0                 # R_X86_64_PC8
+        jmp     -256              # R_X86_64_PC32
+        .word 0x42 - .            # R_X86_64_PC16
 
 // CHECK:        Section {
 // CHECK:          Index:
@@ -13,7 +14,7 @@
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address: 0x0
 // CHECK-NEXT:     Offset:
-// CHECK-NEXT:     Size: 48
+// CHECK-NEXT:     Size:
 // CHECK-NEXT:     Link:
 // CHECK-NEXT:     Info:
 // CHECK-NEXT:     AddressAlignment: 8
@@ -21,5 +22,6 @@
 // CHECK-NEXT:     Relocations [
 // CHECK-NEXT:       0x1 R_X86_64_PC8 - 0xFFFFFFFFFFFFFFFF
 // CHECK-NEXT:       0x3 R_X86_64_PC32 - 0xFFFFFFFFFFFFFEFC
+// CHECK-NEXT:       0x7 R_X86_64_PC16 - 0x42
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/relocation.s b/test/MC/ELF/relocation.s
index 34f1a4038131..0fec76792818 100644
--- a/test/MC/ELF/relocation.s
+++ b/test/MC/ELF/relocation.s
@@ -55,6 +55,11 @@ bar:
         .quad	pr23272_2 - pr23272
         .quad	pr23272_3 - pr23272
 
+	.global pr24486
+pr24486:
+	pr24486_alias = pr24486
+	.long pr24486_alias
+
         .code16
         call pr23771
 
@@ -94,6 +99,7 @@ bar:
 // CHECK-NEXT:       0xD4 R_X86_64_SIZE32 blah 0xFFFFFFFFFFFFFFE0
 // CHECK-NEXT:       0xD8 R_X86_64_GOTPCREL foo 0x0
 // CHECK-NEXT:       0xDC R_X86_64_PLT32 foo 0x0
-// CHECK-NEXT:       0xF1 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE
+// CHECK-NEXT:       0xF0 R_X86_64_32 .text 0xF0
+// CHECK-NEXT:       0xF5 R_X86_64_PC16 pr23771 0xFFFFFFFFFFFFFFFE
 // CHECK-NEXT:     ]
 // CHECK-NEXT:   }
diff --git a/test/MC/ELF/section-sym.s b/test/MC/ELF/section-sym.s
index 4a9484d9b779..9e660526b7ad 100644
--- a/test/MC/ELF/section-sym.s
+++ b/test/MC/ELF/section-sym.s
@@ -9,8 +9,8 @@
 
 // The first seciton foo has index 6
 // CHECK:      Section {
-// CHECK:        Index:   6
-// CHECK-NEXT:   Name:    foo (28)
+// CHECK:        Index:   4
+// CHECK-NEXT:   Name:    foo
 // CHECK-NEXT:   Type:    SHT_PROGBITS (0x1)
 // CHECK-NEXT:   Flags [ (0x202)
 // CHECK-NEXT:     SHF_ALLOC (0x2)
@@ -25,8 +25,8 @@
 // CHECK-NEXT:   EntrySize:       0
 // CHECK-NEXT: }
 // CHECK:      Section {
-// CHECK:        Index:   8
-// CHECK-NEXT:   Name:    foo (28)
+// CHECK:        Index:   6
+// CHECK-NEXT:   Name:    foo
 // CHECK-NEXT:   Type:    SHT_PROGBITS (0x1)
 // CHECK-NEXT:   Flags [ (0x200)
 // CHECK-NEXT:     SHF_GROUP (0x200)
@@ -64,22 +64,22 @@
 // CHECK-NEXT:    Section: Undefined (0x0)
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: f1 (57)
+// CHECK-NEXT:    Name: f1
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Local (0x0)
 // CHECK-NEXT:    Type: None (0x0)
 // CHECK-NEXT:    Other: 0
-// CHECK-NEXT:    Section: .group (0x5)
+// CHECK-NEXT:    Section: .group
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
-// CHECK-NEXT:    Name: f2 (54)
+// CHECK-NEXT:    Name: f2
 // CHECK-NEXT:    Value: 0x0
 // CHECK-NEXT:    Size: 0
 // CHECK-NEXT:    Binding: Local (0x0)
 // CHECK-NEXT:    Type: None (0x0)
 // CHECK-NEXT:    Other: 0
-// CHECK-NEXT:    Section: .group (0x7)
+// CHECK-NEXT:    Section: .group
 // CHECK-NEXT:  }
 // CHECK-NEXT:  Symbol {
 // CHECK-NEXT:    Name:  (0)
@@ -88,6 +88,6 @@
 // CHECK-NEXT:    Binding: Local (0x0)
 // CHECK-NEXT:    Type: Section (0x3)
 // CHECK-NEXT:    Other: 0
-// CHECK-NEXT:    Section: foo (0x6)
+// CHECK-NEXT:    Section: foo (0x4)
 // CHECK-NEXT:  }
 // CHECK-NEXT: ]
diff --git a/test/MC/ELF/section-unique.s b/test/MC/ELF/section-unique.s
index bd15148f6d83..b56666130fdf 100644
--- a/test/MC/ELF/section-unique.s
+++ b/test/MC/ELF/section-unique.s
@@ -26,7 +26,7 @@ g:
 // OBJ:   Binding: Global
 // OBJ:   Type:    None
 // OBJ:   Other:   0
-// OBJ:   Section: .text (0x5)
+// OBJ:   Section: .text (0x3)
 // OBJ: }
 // OBJ: Symbol {
 // OBJ:   Name:    g
@@ -35,5 +35,5 @@ g:
 // OBJ:   Binding: Global
 // OBJ:   Type:    None
 // OBJ:   Other:   0
-// OBJ:   Section: .text (0x6)
+// OBJ:   Section: .text (0x4)
 // OBJ: }
diff --git a/test/MC/ELF/section.s b/test/MC/ELF/section.s
index ab3fe0387400..008c4605552a 100644
--- a/test/MC/ELF/section.s
+++ b/test/MC/ELF/section.s
@@ -123,7 +123,7 @@ bar:
 .section .excluded,"e",@progbits
 
 // CHECK:      Section {
-// CHECK:        Name: .excluded (92)
+// CHECK:        Name: .excluded
 // CHECK-NEXT:   Type: SHT_PROGBITS (0x1)
 // CHECK-NEXT:   Flags [ (0x80000000)
 // CHECK-NEXT:     SHF_EXCLUDE (0x80000000)
diff --git a/test/MC/ELF/sleb.s b/test/MC/ELF/sleb.s
index 5cba5829a12b..280b42d8cac8 100644
--- a/test/MC/ELF/sleb.s
+++ b/test/MC/ELF/sleb.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_64 %s
 
 	.text
 foo:
@@ -27,7 +27,11 @@ foo:
 // ELF_64:   SectionData (
 // ELF_64:     0000: 00017F3F 40C000BF 7FFF3F80 4081C000
 // ELF_64:   )
-// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
-// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_32: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_32: SectionData (
+// MACHO_32:   0000: 00017F3F 40C000BF 7FFF3F80 4081C000  |...?@.....?.@...|
+// MACHO_32: )
+// MACHO_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_64: SectionData (
+// MACHO_64:   0000: 00017F3F 40C000BF 7FFF3F80 4081C000  |...?@.....?.@...|
+// MACHO_64: )
diff --git a/test/MC/ELF/strtab-suffix-opt.s b/test/MC/ELF/strtab-suffix-opt.s
index 96d15005c618..69aa0933c996 100644
--- a/test/MC/ELF/strtab-suffix-opt.s
+++ b/test/MC/ELF/strtab-suffix-opt.s
@@ -16,6 +16,6 @@ foobar:
 .Ltmp3:
 	.size	foobar, .Ltmp3-foobar
 
-// CHECK:     Name: bar (19)
-// CHECK:     Name: foo (23)
-// CHECK:     Name: foobar (16)
+// CHECK:     Name: bar (14)
+// CHECK:     Name: foo (18)
+// CHECK:     Name: foobar (11)
diff --git a/test/MC/ELF/uleb.s b/test/MC/ELF/uleb.s
index 5d203a93f022..ffa84e9021dd 100644
--- a/test/MC/ELF/uleb.s
+++ b/test/MC/ELF/uleb.s
@@ -1,7 +1,7 @@
 // RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_32 %s
 // RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=ELF_64 %s
-// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
-// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | llvm-readobj -s -sd | FileCheck -check-prefix=MACHO_64 %s
 
 	.text
 foo:
@@ -21,7 +21,11 @@ foo:
 // ELF_64:   SectionData (
 // ELF_64:     0000: 00017F80 01FF7F80 8001172A
 // ELF_64:   )
-// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001172a')
-// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001172a')
+// MACHO_32: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_32: SectionData (
+// MACHO_32:   0000: 00017F80 01FF7F80 8001172A           |...........*|
+// MACHO_32: )
+// MACHO_64: Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// MACHO_64: SectionData (
+// MACHO_64:       0000: 00017F80 01FF7F80 8001172A           |...........*|
+// MACHO_64: )
diff --git a/test/MC/Hexagon/asmMap.s b/test/MC/Hexagon/asmMap.s
new file mode 100644
index 000000000000..81bb8f31f02c
--- /dev/null
+++ b/test/MC/Hexagon/asmMap.s
@@ -0,0 +1,608 @@
+#RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+
+# Make sure that the assembler mapped instructions are being handled correctly.
+
+#CHECK: 3c56c000 { memw(r22{{ *}}+{{ *}}#0)=#0
+memw(r22)=#0
+
+#CHECK: 3c23e05f { memh(r3{{ *}}+{{ *}}#0)=#-33
+memh(r3)=#-33
+
+#CHECK: 3c07c012 { memb(r7{{ *}}+{{ *}}#0)=#18
+memb(r7)=#18
+
+#CHECK: 4101c008 { if (p0) r8 = memb(r1{{ *}}+{{ *}}#0)
+if (p0) r8=memb(r1)
+
+#CHECK: 4519d817 { if (!p3) r23 = memb(r25{{ *}}+{{ *}}#0)
+if (!p3) r23=memb(r25)
+
+#CHECK: 412dc002 { if (p0) r2 = memub(r13{{ *}}+{{ *}}#0)
+if (p0) r2=memub(r13)
+
+#CHECK: 453cc01a { if (!p0) r26 = memub(r28{{ *}}+{{ *}}#0)
+if (!p0) r26=memub(r28)
+
+#CHECK: 416bc818 { if (p1) r24 = memuh(r11{{ *}}+{{ *}}#0)
+if (p1) r24=memuh(r11)
+
+#CHECK: 457fc012 { if (!p0) r18 = memuh(r31{{ *}}+{{ *}}#0)
+if (!p0) r18=memuh(r31)
+
+#CHECK: 455dc014 { if (!p0) r20 = memh(r29{{ *}}+{{ *}}#0)
+if (!p0) r20=memh(r29)
+
+#CHECK: 415dc01d { if (p0) r29 = memh(r29{{ *}}+{{ *}}#0)
+if (p0) r29=memh(r29)
+
+#CHECK: 4583c01d { if (!p0) r29 = memw(r3{{ *}}+{{ *}}#0)
+if (!p0) r29=memw(r3)
+
+#CHECK: 419bd01e { if (p2) r30 = memw(r27{{ *}}+{{ *}}#0)
+if (p2) r30=memw(r27)
+
+#CHECK: 90e2c018 { r25:24 = membh(r2{{ *}}+{{ *}}#0)
+r25:24=membh(r2)
+
+#CHECK: 902bc006 { r6 = membh(r11{{ *}}+{{ *}}#0)
+r6=membh(r11)
+
+#CHECK: 90a2c01c { r29:28 = memubh(r2{{ *}}+{{ *}}#0)
+r29:28=memubh(r2)
+
+#CHECK: 906ec00d { r13 = memubh(r14{{ *}}+{{ *}}#0)
+r13=memubh(r14)
+
+#CHECK: 91dac00c { r13:12 = memd(r26{{ *}}+{{ *}}#0)
+r13:12=memd(r26)
+
+#CHECK: 919bc004 { r4 = memw(r27{{ *}}+{{ *}}#0)
+r4=memw(r27)
+
+#CHECK: 914cc005 { r5 = memh(r12{{ *}}+{{ *}}#0)
+r5=memh(r12)
+
+#CHECK: 9176c010 { r16 = memuh(r22{{ *}}+{{ *}}#0)
+r16=memuh(r22)
+
+#CHECK: 910bc017 { r23 = memb(r11{{ *}}+{{ *}}#0)
+r23=memb(r11)
+
+#CHECK: 912bc01b { r27 = memub(r11{{ *}}+{{ *}}#0)
+r27=memub(r11)
+
+#CHECK: 404ede01 { if (p1) memh(r14{{ *}}+{{ *}}#0) = r30
+if (p1) memh(r14)=r30
+
+#CHECK: 4449d900 { if (!p0) memh(r9{{ *}}+{{ *}}#0) = r25
+if (!p0) memh(r9)=r25
+
+#CHECK: 400ecd00 { if (p0) memb(r14{{ *}}+{{ *}}#0) = r13
+if (p0) memb(r14)=r13
+
+#CHECK: 440bcc01 { if (!p1) memb(r11{{ *}}+{{ *}}#0) = r12
+if (!p1) memb(r11)=r12
+
+#CHECK: 41d0d804 { if (p3) r5:4 = memd(r16{{ *}}+{{ *}}#0)
+if (p3) r5:4=memd(r16)
+
+#CHECK: 45d9c00c { if (!p0) r13:12 = memd(r25{{ *}}+{{ *}}#0)
+if (!p0) r13:12=memd(r25)
+
+#CHECK: 385ee06d { if (p3) memw(r30{{ *}}+{{ *}}#0)=#-19
+if (p3) memw(r30)=#-19
+
+#CHECK: 38c6c053 { if (!p2) memw(r6{{ *}}+{{ *}}#0)=#19
+if (!p2) memw(r6)=#19
+
+#CHECK: 381fc034 { if (p1) memb(r31{{ *}}+{{ *}}#0)=#20
+if (p1) memb(r31)=#20
+
+#CHECK: 389dc010 { if (!p0) memb(r29{{ *}}+{{ *}}#0)=#16
+if (!p0) memb(r29)=#16
+
+#CHECK: 3833e019 { if (p0) memh(r19{{ *}}+{{ *}}#0)=#-7
+if (p0) memh(r19)=#-7
+
+#CHECK: 38b7c013 { if (!p0) memh(r23{{ *}}+{{ *}}#0)=#19
+if (!p0) memh(r23)=#19
+
+#CHECK: 4488d401 { if (!p1) memw(r8{{ *}}+{{ *}}#0) = r20
+if (!p1) memw(r8)=r20
+
+#CHECK: 409ddc02 { if (p2) memw(r29{{ *}}+{{ *}}#0) = r28
+if (p2) memw(r29)=r28
+
+#CHECK: 446fc301 { if (!p1) memh(r15{{ *}}+{{ *}}#0) = r3.h
+if (!p1) memh(r15)=r3.h
+
+#CHECK: 406dc201 { if (p1) memh(r13{{ *}}+{{ *}}#0) = r2.h
+if (p1) memh(r13)=r2.h
+
+#CHECK: 40d9c601 { if (p1) memd(r25{{ *}}+{{ *}}#0) = r7:6
+if (p1) memd(r25)=r7:6
+
+#CHECK: 44dad803 { if (!p3) memd(r26{{ *}}+{{ *}}#0) = r25:24
+if (!p3) memd(r26)=r25:24
+
+#CHECK: 3e21c011 { memh(r1{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r17
+memh(r1)+=r17
+
+#CHECK: 3e4fc019 { memw(r15{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r25
+memw(r15)+=r25
+
+#CHECK: 3e5dc022 { memw(r29{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r2
+memw(r29)-=r2
+
+#CHECK: 3e04c004 { memb(r4{{ *}}+{{ *}}#0) {{ *}}+={{ *}} r4
+memb(r4)+=r4
+
+#CHECK: 3f53c016 { memw(r19{{ *}}+{{ *}}#0){{ *}}{{ *}}+={{ *}}{{ *}}#22
+memw(r19)+=#22
+
+#CHECK: 3f24c01e { memh(r4{{ *}}+{{ *}}#0){{ *}}{{ *}}+={{ *}}{{ *}}#30
+memh(r4)+=#30
+
+#CHECK: 3e27c02d { memh(r7{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r13
+memh(r7)-=r13
+
+#CHECK: 3e1ec032 { memb(r30{{ *}}+{{ *}}#0) {{ *}}-={{ *}} r18
+memb(r30)-=r18
+
+#CHECK: 3e49c05b { memw(r9{{ *}}+{{ *}}#0) &= r27
+memw(r9)&=r27
+
+#CHECK: 3e2dc040 { memh(r13{{ *}}+{{ *}}#0) &= r0
+memh(r13)&=r0
+
+#CHECK: 3e05c046 { memb(r5{{ *}}+{{ *}}#0) &= r6
+memb(r5)&=r6
+
+#CHECK: 3e45c06a { memw(r5{{ *}}+{{ *}}#0) |= r10
+memw(r5)|=r10
+
+#CHECK: 3e21c07e { memh(r1{{ *}}+{{ *}}#0) |= r30
+memh(r1)|=r30
+
+#CHECK: 3e09c06f { memb(r9{{ *}}+{{ *}}#0) |= r15
+memb(r9)|=r15
+
+#CHECK: a157d100 { memh(r23{{ *}}+{{ *}}#0) = r17
+memh(r23)=r17
+
+#CHECK: a10fd400 { memb(r15{{ *}}+{{ *}}#0) = r20
+memb(r15)=r20
+
+#CHECK: 9082c014 { r21:20 = memb_fifo(r2{{ *}}+{{ *}}#0)
+r21:20=memb_fifo(r2)
+
+#CHECK: 9056c01c { r29:28 = memh_fifo(r22{{ *}}+{{ *}}#0)
+r29:28=memh_fifo(r22)
+
+#CHECK: a1d8ca00 { memd(r24{{ *}}+{{ *}}#0) = r11:10
+memd(r24)=r11:10
+
+#CHECK: a19ed900 { memw(r30{{ *}}+{{ *}}#0) = r25
+memw(r30)=r25
+
+#CHECK: a169ce00 { memh(r9{{ *}}+{{ *}}#0) = r14.h
+memh(r9)=r14.h
+
+#CHECK: 3f07c06b { memb(r7{{ *}}+{{ *}}#0) = setbit(#11)
+memb(r7)=setbit(#11)
+
+#CHECK: 3f34c07b { memh(r20{{ *}}+{{ *}}#0) = setbit(#27)
+memh(r20)=setbit(#27)
+
+#CHECK: 3f1cc032 { memb(r28{{ *}}+{{ *}}#0){{ *}}-={{ *}}#18
+memb(r28)-=#18
+
+#CHECK: 3f29c02a { memh(r9{{ *}}+{{ *}}#0){{ *}}-={{ *}}#10
+memh(r9)-=#10
+
+#CHECK: 3f4cc026 { memw(r12{{ *}}+{{ *}}#0){{ *}}-={{ *}}#6
+memw(r12)-=#6
+
+#CHECK: 3f00c00c { memb(r0{{ *}}+{{ *}}#0){{ *}}+={{ *}}#12
+memb(r0)+=#12
+
+#CHECK: 3f50c07a { memw(r16{{ *}}+{{ *}}#0) = setbit(#26)
+memw(r16)=setbit(#26)
+
+#CHECK: 3f1fc05d { memb(r31{{ *}}+{{ *}}#0) = clrbit(#29)
+memb(r31)=clrbit(#29)
+
+#CHECK: 3f20c05e { memh(r0{{ *}}+{{ *}}#0) = clrbit(#30)
+memh(r0)=clrbit(#30)
+
+#CHECK: 3f42c059 { memw(r2{{ *}}+{{ *}}#0) = clrbit(#25)
+memw(r2)=clrbit(#25)
+
+#CHECK: 39cfe072 if (!p3.new) memw(r15{{ *}}+{{ *}}#0)=#-14
+{
+  p3=cmp.eq(r5,##-1997506977)
+  if (!p3.new) memw(r15)=#-14
+}
+
+#CHECK: 3959e06b if (p3.new) memw(r25{{ *}}+{{ *}}#0)=#-21
+{
+  p3=cmp.eq(r0,##1863618461)
+  if (p3.new) memw(r25)=#-21
+}
+
+#CHECK: 4312c801 if (p1.new) r1 = memb(r18{{ *}}+{{ *}}#0)
+{
+  if (p1.new) r1=memb(r18)
+  p1=cmp.eq(r23,##-1105571618)
+}
+
+#CHECK: 4718d803 if (!p3.new) r3 = memb(r24{{ *}}+{{ *}}#0)
+{
+  if (!p3.new) r3=memb(r24)
+  p3=cmp.eq(r3,##-210870878)
+}
+
+#CHECK: 4326c81b if (p1.new) r27 = memub(r6{{ *}}+{{ *}}#0)
+{
+  if (p1.new) r27=memub(r6)
+  p1=cmp.eq(r29,##-188410493)
+}
+
+#CHECK: 473ad00d if (!p2.new) r13 = memub(r26{{ *}}+{{ *}}#0)
+{
+  p2=cmp.eq(r30,##-1823852150)
+  if (!p2.new) r13=memub(r26)
+}
+
+#CHECK: 4785d80e if (!p3.new) r14 = memw(r5{{ *}}+{{ *}}#0)
+{
+  if (!p3.new) r14=memw(r5)
+  p3=cmp.eq(r31,##-228524711)
+}
+
+#CHECK: 438cc81a if (p1.new) r26 = memw(r12{{ *}}+{{ *}}#0)
+{
+  if (p1.new) r26=memw(r12)
+  p1=cmp.eq(r11,##-485232313)
+}
+
+#CHECK: 477dc019 if (!p0.new) r25 = memuh(r29{{ *}}+{{ *}}#0)
+{
+  p0=cmp.eq(r23,##127565957)
+  if (!p0.new) r25=memuh(r29)
+}
+
+#CHECK: 4377c807 if (p1.new) r7 = memuh(r23{{ *}}+{{ *}}#0)
+{
+  p1=cmp.eq(r30,##-222020054)
+  if (p1.new) r7=memuh(r23)
+}
+
+#CHECK: 4754c81c if (!p1.new) r28 = memh(r20{{ *}}+{{ *}}#0)
+{
+  p1=cmp.eq(r18,##1159699785)
+  if (!p1.new) r28=memh(r20)
+}
+
+#CHECK: 435ec01b if (p0.new) r27 = memh(r30{{ *}}+{{ *}}#0)
+{
+  p0=cmp.eq(r7,##-1114567705)
+  if (p0.new) r27=memh(r30)
+}
+
+#CHECK: 420dd100 if (p0.new) memb(r13{{ *}}+{{ *}}#0) = r17
+{
+  p0=cmp.eq(r21,##-1458796638)
+  if (p0.new) memb(r13)=r17
+}
+
+#CHECK: 4601d602 if (!p2.new) memb(r1{{ *}}+{{ *}}#0) = r22
+{
+  p2=cmp.eq(r20,##-824022439)
+  if (!p2.new) memb(r1)=r22
+}
+
+#CHECK: 43dcd808 if (p3.new) r9:8 = memd(r28{{ *}}+{{ *}}#0)
+{
+  p3=cmp.eq(r13,##56660744)
+  if (p3.new) r9:8=memd(r28)
+}
+
+#CHECK: 47d8c80e if (!p1.new) r15:14 = memd(r24{{ *}}+{{ *}}#0)
+{
+  if (!p1.new) r15:14=memd(r24)
+  p1=cmp.eq(r15,##1536716489)
+}
+
+#CHECK: 3918e045 if (p2.new) memb(r24{{ *}}+{{ *}}#0)=#-27
+{
+  if (p2.new) memb(r24)=#-27
+  p2=cmp.eq(r21,##1741091811)
+}
+
+#CHECK: 398fe04d if (!p2.new) memb(r15{{ *}}+{{ *}}#0)=#-19
+{
+  if (!p2.new) memb(r15)=#-19
+  p2=cmp.eq(r15,##779870261)
+}
+
+#CHECK: 3931c04b if (p2.new) memh(r17{{ *}}+{{ *}}#0)=#11
+{
+  if (p2.new) memh(r17)=#11
+  p2=cmp.eq(r13,##-1171145798)
+}
+
+#CHECK: 39aee056 if (!p2.new) memh(r14{{ *}}+{{ *}}#0)=#-10
+{
+  p2=cmp.eq(r23,##-633976762)
+  if (!p2.new) memh(r14)=#-10
+}
+
+#CHECK: 4692df01 if (!p1.new) memw(r18{{ *}}+{{ *}}#0) = r31
+{
+  if (!p1.new) memw(r18)=r31
+  p1=cmp.eq(r11,##-319375732)
+}
+
+#CHECK: 428dc402 if (p2.new) memw(r13{{ *}}+{{ *}}#0) = r4
+{
+  if (p2.new) memw(r13)=r4
+  p2=cmp.eq(r18,##1895120239)
+}
+
+#CHECK: 4670c300 if (!p0.new) memh(r16{{ *}}+{{ *}}#0) = r3.h
+{
+  p0=cmp.eq(r25,##1348715015)
+  if (!p0.new) memh(r16)=r3.h
+}
+
+#CHECK: 426ddf02 if (p2.new) memh(r13{{ *}}+{{ *}}#0) = r31.h
+{
+  p2=cmp.eq(r25,##1085560657)
+  if (p2.new) memh(r13)=r31.h
+}
+
+#CHECK: 464bcb01 if (!p1.new) memh(r11{{ *}}+{{ *}}#0) = r11
+{
+  p1=cmp.eq(r10,##1491455911)
+  if (!p1.new) memh(r11)=r11
+}
+
+#CHECK: 4248d200 if (p0.new) memh(r8{{ *}}+{{ *}}#0) = r18
+{
+  p0=cmp.eq(r3,##687581160)
+  if (p0.new) memh(r8)=r18
+}
+
+#CHECK: 42deca00 if (p0.new) memd(r30{{ *}}+{{ *}}#0) = r11:10
+{
+  if (p0.new) memd(r30)=r11:10
+  p0=cmp.eq(r28,##562796189)
+}
+
+#CHECK: 46d5cc03 if (!p3.new) memd(r21{{ *}}+{{ *}}#0) = r13:12
+{
+  if (!p3.new) memd(r21)=r13:12
+  p3=cmp.eq(r6,##-969273288)
+}
+
+#CHECK: 42bad201 if (p1.new) memw(r26{{ *}}+{{ *}}#0) = r22.new
+{
+  if (p1.new) memw(r26)=r22.new
+  p1=cmp.eq(r0,##-1110065473)
+  r22=add(r28,r9)
+}
+
+#CHECK: 46b9d201 if (!p1.new) memw(r25{{ *}}+{{ *}}#0) = r26.new
+{
+  p1=cmp.eq(r11,##-753121346)
+  r26=add(r19,r7)
+  if (!p1.new) memw(r25)=r26.new
+}
+
+#CHECK: 40aad200 if (p0) memw(r10{{ *}}+{{ *}}#0) = r6.new
+{
+  r6=add(r30,r0)
+  if (p0) memw(r10)=r6.new
+}
+
+#CHECK: 44a6d202 if (!p2) memw(r6{{ *}}+{{ *}}#0) = r4.new
+{
+  if (!p2) memw(r6)=r4.new
+  r4=add(r0,r3)
+}
+
+#CHECK: 40b9c200 if (p0) memb(r25{{ *}}+{{ *}}#0) = r29.new
+{
+  if (p0) memb(r25)=r29.new
+  r29=add(r27,r30)
+}
+
+#CHECK: 44bec203 if (!p3) memb(r30{{ *}}+{{ *}}#0) = r8.new
+{
+  if (!p3) memb(r30)=r8.new
+  r8=add(r24,r4)
+}
+
+#CHECK: 46aecc01 if (!p1.new) memh(r14{{ *}}+{{ *}}#0) = r13.new
+{
+  if (!p1.new) memh(r14)=r13.new
+  r13=add(r21,r2)
+  p1=cmp.eq(r3,##-1529345886)
+}
+
+#CHECK: 42bcca02 if (p2.new) memh(r28{{ *}}+{{ *}}#0) = r18.new
+{
+  p2=cmp.eq(r15,##2048545649)
+  if (p2.new) memh(r28)=r18.new
+  r18=add(r9,r3)
+}
+
+#CHECK: 46aac200 if (!p0.new) memb(r10{{ *}}+{{ *}}#0) = r30.new
+{
+  p0=cmp.eq(r21,##-1160401822)
+  r30=add(r9,r22)
+  if (!p0.new) memb(r10)=r30.new
+}
+
+#CHECK: 42b8c202 if (p2.new) memb(r24{{ *}}+{{ *}}#0) = r11.new
+{
+  if (p2.new) memb(r24)=r11.new
+  p2=cmp.eq(r30,##1267977346)
+  r11=add(r8,r18)
+}
+
+#CHECK: 44a3ca00 if (!p0) memh(r3{{ *}}+{{ *}}#0) = r28.new
+{
+  r28=add(r16,r11)
+  if (!p0) memh(r3)=r28.new
+}
+
+#CHECK: 40abca03 if (p3) memh(r11{{ *}}+{{ *}}#0) = r24.new
+{
+  if (p3) memh(r11)=r24.new
+  r24=add(r18,r19)
+}
+
+#CHECK: a1abd200 memw(r11{{ *}}+{{ *}}#0) = r5.new
+{
+  memw(r11)=r5.new
+  r5=add(r0,r10)
+}
+
+#CHECK: a1a2ca00 memh(r2{{ *}}+{{ *}}#0) = r18.new
+{
+  r18=add(r27,r18)
+  memh(r2)=r18.new
+}
+
+#CHECK: a1bac200 memb(r26{{ *}}+{{ *}}#0) = r15.new
+{
+  r15=add(r22,r17)
+  memb(r26)=r15.new
+}
+
+#CHECK: d328ce1c { r29:28{{ *}}={{ *}}vsubub(r15:14, r9:8)
+r29:28=vsubb(r15:14,r9:8)
+
+#CHECK: 8c5ed60c { r12{{ *}}={{ *}}asr(r30, #22):rnd
+r12=asrrnd(r30,#23)
+
+#CHECK: ed1ec109 { r9{{ *}}={{ *}}mpyi(r30, r1)
+r9=mpyui(r30,r1)
+
+#CHECK: e010d787 { r7{{ *}}={{ *}}+{{ *}}mpyi(r16, #188)
+r7=mpyi(r16,#188)
+
+#CHECK: d206eea2 { p2{{ *}}={{ *}}boundscheck(r7:6, r15:14):raw:hi
+p2=boundscheck(r7,r15:14)
+
+#CHECK: f27ac102 { p2{{ *}}={{ *}}cmp.gtu(r26, r1)
+p2=cmp.ltu(r1,r26)
+
+#CHECK: f240df00 { p0{{ *}}={{ *}}cmp.gt(r0, r31)
+p0=cmp.lt(r31,r0)
+
+#CHECK: 7586cc01 { p1{{ *}}={{ *}}cmp.gtu(r6, #96)
+p1=cmp.geu(r6,#97)
+
+#CHECK: 755dc9a2 { p2{{ *}}={{ *}}cmp.gt(r29, #77)
+p2=cmp.ge(r29,#78)
+
+#CHECK: d310d60a { r11:10{{ *}}={{ *}}vaddub(r17:16, r23:22)
+r11:10=vaddb(r17:16,r23:22)
+
+#CHECK: 8753d1e6 { r6{{ *}}={{ *}}tableidxh(r19, #7, #17):raw
+r6=tableidxh(r19,#7,#18)
+
+#CHECK: 8786d277 { r23{{ *}}={{ *}}tableidxw(r6, #3, #18):raw
+r23=tableidxw(r6,#3,#20)
+
+#CHECK: 7c4dfff8 { r25:24{{ *}}={{ *}}combine(#-1, #-101)
+r25:24=#-101
+
+#CHECK: 8866c09a { r26{{ *}}={{ *}}vasrhub(r7:6, #0):raw
+r26=vasrhub(r7:6,#1):rnd:sat
+
+#CHECK: 7654c016 { r22{{ *}}={{ *}}sub(#0, r20)
+r22=neg(r20)
+
+#CHECK: 802cc808 { r9:8{{ *}}={{ *}}vasrh(r13:12, #8):raw
+r9:8=vasrh(r13:12,#9):rnd
+
+#CHECK: 7614dfe5 { r5{{ *}}={{ *}}{{zxtb\(r20\)|and\(r20, *#255\)}}
+r5=zxtb(r20)
+
+#CHECK: 00ab68e2 immext(#179976320)
+#CHECK: 7500c500 p0{{ *}}={{ *}}cmp.eq(r0, ##179976360)
+{
+	if (p0.new) r11=r26
+	p0=cmp.eq(r0,##179976360)
+}
+
+#CHECK: 74f9c00f { if (!p3) r15{{ *}}={{ *}}r25
+if (!p3) r15=r25
+
+#CHECK: 7425c005 { if (p1) r5{{ *}}={{ *}}r5
+if (p1) r5=r5
+
+#CHECK: e9badae2 { r2{{ *}}={{ *}}vrcmpys(r27:26, r27:26):<<1:rnd:sat:raw:lo
+r2=vrcmpys(r27:26,r26):<<1:rnd:sat
+
+#CHECK: fd13f20e if (p0.new) r15:14{{ *}}={{ *}}{{r19:18|combine\(r19, *r18\)}}
+{
+  p0=cmp.eq(r26,##1766934387)
+  if (p0.new) r15:14=r19:18
+}
+
+#CHECK: fd07c6c2 { if (!p2) r3:2{{ *}}={{ *}}{{r7:6|combine\(r7, *r6\)}}
+if (!p2) r3:2=r7:6
+
+#CHECK: fd0dcc7e { if (p3) r31:30{{ *}}={{ *}}{{r13:12|combine\(r13, *r12\)}}
+if (p3) r31:30=r13:12
+
+#CHECK: 748ae015 if (!p0.new) r21{{ *}}={{ *}}r10
+{
+  p0=cmp.eq(r23,##805633208)
+  if (!p0.new) r21=r10
+}
+
+#CHECK: d36ec6c8 { r9:8{{ *}}={{ *}}add(r15:14, r7:6):raw:lo
+r9:8=add(r14,r7:6)
+
+#CHECK: 01e65477 immext(#509943232)
+#CHECK: 7516c3a3 p3{{ *}}={{ *}}cmp.eq(r22, ##509943261)
+{
+  if (!p3.new) r9:8=r25:24
+  p3=cmp.eq(r22,##509943261)
+}
+
+#CHECK: 87e0d5e5 { r5{{ *}}={{ *}}tableidxd(r0, #15, #21):raw
+r5=tableidxd(r0,#15,#24)
+
+#CHECK: 8701db65 { r5{{ *}}={{ *}}tableidxb(r1, #3, #27):raw
+r5=tableidxb(r1,#3,#27)
+
+#CHECK: 767affe3 { r3{{ *}}={{ *}}sub(#-1, r26)
+r3=not(r26)
+
+#CHECK: f51ddc06 { r7:6{{ *}}={{ *}}{{r29:28|combine\(r29, *r28\)}}
+r7:6=r29:28
+
+#CHECK: 9406c000 { dcfetch(r6 + #0)
+dcfetch(r6)
+
+#CHECK: 6b20c001 { p1{{ *}}={{ *}}or(p0, p0)
+p1=p0
+
+#CHECK: eafcdc82 { r3:2 += vrcmpys(r29:28, r29:28):<<1:sat:raw:lo
+r3:2+=vrcmpys(r29:28,r28):<<1:sat
+
+#CHECK: e8ead092 { r19:18{{ *}}={{ *}}vrcmpys(r11:10, r17:16):<<1:sat:raw:lo
+r19:18=vrcmpys(r11:10,r16):<<1:sat
+
+#CHECK: 9082c014 { r21:20{{ *}}={{ *}}memb_fifo(r2{{ *}}+{{ *}}#0)
+r21:20=memb_fifo(r2)
+
+#CHECK: 9056c01c { r29:28{{ *}}={{ *}}memh_fifo(r22{{ *}}+{{ *}}#0)
+r29:28=memh_fifo(r22)
\ No newline at end of file
diff --git a/test/MC/Hexagon/capitalizedEndloop.s b/test/MC/Hexagon/capitalizedEndloop.s
new file mode 100644
index 000000000000..d20ff34de6fe
--- /dev/null
+++ b/test/MC/Hexagon/capitalizedEndloop.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d -r - | FileCheck %s
+#
+
+# Verify that capitaizled endloops work
+
+	{ R0 = mpyi(R0,R0) } : endloop0
+	{ R0 = mpyi(R0,R0) } : ENDLOOP0
+	{ R0 = mpyi(R0,R0) }:endloop0
+
+	{ R0 = mpyi(R0,R0) } : endloop1
+	{ R0 = mpyi(R0,R0) } : ENDLOOP1
+	{ R0 = mpyi(R0,R0) }:endloop1
+
+	{ R0 = mpyi(R0,R0) } : endloop0 : endloop1
+	{ R0 = mpyi(R0,R0) } : ENDLOOP0 : ENDLOOP1
+	{ R0 = mpyi(R0,R0) }:endloop0:endloop1
+
+# CHECK: r0 = mpyi(r0, r0)
+# CHECK: :endloop0
+# CHECK: :endloop0
+# CHECK: :endloop0
+# CHECK: :endloop1
+# CHECK: :endloop1
+# CHECK: :endloop1
+# CHECK: :endloop0 :endloop1
+# CHECK: :endloop0 :endloop1
+# CHECK: :endloop0 :endloop1
+
+
diff --git a/test/MC/Hexagon/dcfetch.s b/test/MC/Hexagon/dcfetch.s
new file mode 100644
index 000000000000..bf4349d95c37
--- /dev/null
+++ b/test/MC/Hexagon/dcfetch.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -mno-pairing %s -o %t; llvm-objdump -d %t | FileCheck %s
+
+# Check that DCFETCH is correctly shuffled.
+
+	{ dcfetch(r2 + #0); r1 = memw(r2) }
+# CHECK: 9402c000
+
+# Bug 17424: This should be a legal packet
+{
+  P3 = SP1LOOP0(#8,R18)
+  R7:6 = MEMUBH(R4++#4)
+  R13:12 = VALIGNB(R11:10,R9:8,P2)
+  DCFETCH(R5+#(8+0))
+}
+# CHECK-NOT: error:
diff --git a/test/MC/Hexagon/empty_asm.s b/test/MC/Hexagon/empty_asm.s
new file mode 100644
index 000000000000..10b30ff558ed
--- /dev/null
+++ b/test/MC/Hexagon/empty_asm.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm %s -o - | FileCheck %s
+
+# Verify empty packets aren't printed
+barrier
+{}
+barrier
+# CHECK: {
+# CHECK-NEXT: barrier
+# CHECK-NEXT: }
+# CHECK-NOT: }
+# CHECK: {
+# CHECK-NEXT: barrier
+# CHECK-NEXT: }
+
+
diff --git a/test/MC/Hexagon/endloop.s b/test/MC/Hexagon/endloop.s
new file mode 100644
index 000000000000..303f84fb14ff
--- /dev/null
+++ b/test/MC/Hexagon/endloop.s
@@ -0,0 +1,19 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm %s 2>%t; FileCheck %s <%t
+
+# Check that a branch in an end-loop packet is caught.
+
+1:
+{
+	r0 = #1
+	p0 = cmp.eq (r1, r2)
+	if (p0) jump 1b
+}:endloop0
+
+2:
+{
+        r0 = #1
+        p0 = cmp.eq (r1, r2)
+        if (p0) jump 2b
+}:endloop1
+
+# CHECK: rror: packet marked with `:endloop{{.}}' cannot contain instructions that modify register
diff --git a/test/MC/Hexagon/got.s b/test/MC/Hexagon/got.s
new file mode 100644
index 000000000000..85409ee4a900
--- /dev/null
+++ b/test/MC/Hexagon/got.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -r - | FileCheck %s
+#
+
+# make sure the fixups emitted match what is
+# expected.
+.Lgot:
+    r0 = memw (r1 + ##foo@GOT)
+
+# CHECK: R_HEX_GOT_32_6_X foo
+# CHECK: R_HEX_GOT_11_X foo
+
diff --git a/test/MC/Hexagon/inst_and64.ll b/test/MC/Hexagon/inst_and64.ll
index 0b8307463261..856f5c9ceea8 100644
--- a/test/MC/Hexagon/inst_and64.ll
+++ b/test/MC/Hexagon/inst_and64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj -disable-hsdr %s -o - \
 ;; RUN: | llvm-objdump -s - | FileCheck %s
 
 define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/inst_or64.ll b/test/MC/Hexagon/inst_or64.ll
index ea104300da3e..f73b8279343b 100644
--- a/test/MC/Hexagon/inst_or64.ll
+++ b/test/MC/Hexagon/inst_or64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj -disable-hsdr %s -o - \
 ;; RUN: | llvm-objdump -s - | FileCheck %s
 
 define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/inst_xor64.ll b/test/MC/Hexagon/inst_xor64.ll
index 7f77c4614cf0..c13ef6bea6ca 100644
--- a/test/MC/Hexagon/inst_xor64.ll
+++ b/test/MC/Hexagon/inst_xor64.ll
@@ -1,4 +1,4 @@
-;; RUN: llc -mtriple=hexagon-unknown-elf -filetype=obj %s -o - \
+;; RUN: llc -mtriple=hexagon-unknown-elf -disable-hsdr -filetype=obj %s -o - \
 ;; RUN: | llvm-objdump -s - | FileCheck %s
 
 define i64 @foo (i64 %a, i64 %b)
diff --git a/test/MC/Hexagon/instructions/alu32_alu.s b/test/MC/Hexagon/instructions/alu32_alu.s
new file mode 100644
index 000000000000..4b3256be0733
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_alu.s
@@ -0,0 +1,84 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.1 ALU32/ALU
+
+# Add
+# CHECK: f1 c3 15 b0
+r17 = add(r21, #31)
+# CHECK: 11 df 15 f3
+r17 = add(r21, r31)
+# CHECK: 11 df 55 f6
+r17 = add(r21, r31):sat
+
+# And
+# CHECK: f1 c3 15 76
+r17 = and(r21, #31)
+# CHECK: f1 c3 95 76
+r17 = or(r21, #31)
+# CHECK: 11 df 15 f1
+r17 = and(r21, r31)
+# CHECK: 11 df 35 f1
+r17 = or(r21, r31)
+# CHECK: 11 df 75 f1
+r17 = xor(r21, r31)
+# CHECK: 11 d5 9f f1
+r17 = and(r21, ~r31)
+# CHECK: 11 d5 bf f1
+r17 = or(r21, ~r31)
+
+# Nop
+# CHECK: 00 c0 00 7f
+nop
+
+# Subtract
+# CHECK: b1 c2 5f 76
+r17 = sub(#21, r31)
+# CHECK: 11 df 35 f3
+r17 = sub(r31, r21)
+# CHECK: 11 df d5 f6
+r17 = sub(r31, r21):sat
+
+# Sign extend
+# CHECK: 11 c0 bf 70
+r17 = sxtb(r31)
+
+# Transfer immediate
+# CHECK: 15 c0 31 72
+r17.h = #21
+# CHECK: 15 c0 31 71
+r17.l = #21
+# CHECK: f1 ff 5f 78
+r17 = #32767
+# CHECK: f1 ff df 78
+r17 = #-1
+
+# Transfer register
+# CHECK: 11 c0 75 70
+r17 = r21
+
+# Vector add halfwords
+# CHECK: 11 df 15 f6
+r17 = vaddh(r21, r31)
+# CHECK: 11 df 35 f6
+r17 = vaddh(r21, r31):sat
+# CHECK: 11 df 75 f6
+r17 = vadduh(r21, r31):sat
+
+# Vector average halfwords
+# CHECK: 11 df 15 f7
+r17 = vavgh(r21, r31)
+# CHECK: 11 df 35 f7
+r17 = vavgh(r21, r31):rnd
+# CHECK: 11 df 75 f7
+r17 = vnavgh(r31, r21)
+
+# Vector subtract halfwords
+# CHECK: 11 df 95 f6
+r17 = vsubh(r31, r21)
+# CHECK: 11 df b5 f6
+r17 = vsubh(r31, r21):sat
+# CHECK: 11 df f5 f6
+r17 = vsubuh(r31, r21):sat
+
+# Zero extend
+# CHECK: 11 c0 d5 70
+r17 = zxth(r21)
diff --git a/test/MC/Hexagon/instructions/alu32_perm.s b/test/MC/Hexagon/instructions/alu32_perm.s
new file mode 100644
index 000000000000..8410cb9128a3
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_perm.s
@@ -0,0 +1,40 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj %s -o - | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.2 ALU32/PERM
+
+# Combine words in to doublewords
+# CHECK: 11 df 95 f3
+r17 = combine(r31.h, r21.h)
+# CHECK: 11 df b5 f3
+r17 = combine(r31.h, r21.l)
+# CHECK: 11 df d5 f3
+r17 = combine(r31.l, r21.h)
+# CHECK: 11 df f5 f3
+r17 = combine(r31.l, r21.l)
+# CHECK: b0 e2 0f 7c
+r17:16 = combine(#21, #31)
+# CHECK: b0 e2 3f 73
+r17:16 = combine(#21, r31)
+# CHECK: f0 e3 15 73
+r17:16 = combine(r21, #31)
+# CHECK: 10 df 15 f5
+r17:16 = combine(r21, r31)
+
+# Mux
+# CHECK: f1 c3 75 73
+r17 = mux(p3, r21, #31)
+# CHECK: b1 c2 ff 73
+r17 = mux(p3, #21, r31)
+# CHECK: b1 e2 8f 7b
+r17 = mux(p3, #21, #31)
+# CHECK: 71 df 15 f4
+r17 = mux(p3, r21, r31)
+
+# Shift word by 16
+# CHECK: 11 c0 15 70
+r17 = aslh(r21)
+# CHECK: 11 c0 35 70
+r17 = asrh(r21)
+
+# Pack high and low halfwords
+# CHECK: 10 df 95 f5
+r17:16 = packhl(r21, r31)
diff --git a/test/MC/Hexagon/instructions/alu32_pred.s b/test/MC/Hexagon/instructions/alu32_pred.s
new file mode 100644
index 000000000000..e5fded0a3691
--- /dev/null
+++ b/test/MC/Hexagon/instructions/alu32_pred.s
@@ -0,0 +1,222 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.1.3 ALU32/PRED
+
+# Conditional add
+# CHECK: f1 c3 75 74
+if (p3) r17 = add(r21, #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 e3 75 74
+{ p3 = r5
+  if (p3.new) r17 = add(r21, #31) }
+# CHECK: f1 c3 f5 74
+if (!p3) r17 = add(r21, #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 e3 f5 74
+{ p3 = r5
+  if (!p3.new) r17 = add(r21, #31) }
+# CHECK: 71 df 15 fb
+if (p3) r17 = add(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 15 fb
+{ p3 = r5
+  if (p3.new) r17 = add(r21, r31) }
+# CHECK: f1 df 15 fb
+if (!p3) r17 = add(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 fb
+{ p3 = r5
+  if (!p3.new) r17 = add(r21, r31) }
+
+# Conditional shift halfword
+# CHECK: 11 e3 15 70
+if (p3) r17 = aslh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 15 70
+{ p3 = r5
+  if (p3.new) r17 = aslh(r21) }
+# CHECK: 11 eb 15 70
+if (!p3) r17 = aslh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 15 70
+{ p3 = r5
+  if (!p3.new) r17 = aslh(r21) }
+# CHECK: 11 e3 35 70
+if (p3) r17 = asrh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 35 70
+{ p3 = r5
+  if (p3.new) r17 = asrh(r21) }
+# CHECK: 11 eb 35 70
+if (!p3) r17 = asrh(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 35 70
+{ p3 = r5
+  if (!p3.new) r17 = asrh(r21) }
+
+# Conditional combine
+# CHECK: 70 df 15 fd
+if (p3) r17:16 = combine(r21, r31)
+# CHECK: f0 df 15 fd
+if (!p3) r17:16 = combine(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 ff 15 fd
+{ p3 = r5
+  if (p3.new) r17:16 = combine(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff 15 fd
+{ p3 = r5
+  if (!p3.new) r17:16 = combine(r21, r31) }
+
+# Conditional logical operations
+# CHECK: 71 df 15 f9
+if (p3) r17 = and(r21, r31)
+# CHECK: f1 df 15 f9
+if (!p3) r17 = and(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 15 f9
+{ p3 = r5
+  if (p3.new) r17 = and(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 f9
+{ p3 = r5
+  if (!p3.new) r17 = and(r21, r31) }
+# CHECK: 71 df 35 f9
+if (p3) r17 = or(r21, r31)
+# CHECK: f1 df 35 f9
+if (!p3) r17 = or(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 35 f9
+{ p3 = r5
+  if (p3.new) r17 = or(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 f9
+{ p3 = r5
+  if (!p3.new) r17 = or(r21, r31) }
+# CHECK: 71 df 75 f9
+if (p3) r17 = xor(r21, r31)
+# CHECK: f1 df 75 f9
+if (!p3) r17 = xor(r21, r31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 75 f9
+{ p3 = r5
+  if (p3.new) r17 = xor(r21, r31) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 f9
+{ p3 = r5
+  if (!p3.new) r17 = xor(r21, r31) }
+
+# Conditional subtract
+# CHECK: 71 df 35 fb
+if (p3) r17 = sub(r31, r21)
+# CHECK: f1 df 35 fb
+if (!p3) r17 = sub(r31, r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 71 ff 35 fb
+{ p3 = r5
+  if (p3.new) r17 = sub(r31, r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 fb
+{ p3 = r5
+  if (!p3.new) r17 = sub(r31, r21) }
+
+# Conditional sign extend
+# CHECK: 11 e3 b5 70
+if (p3) r17 = sxtb(r21)
+# CHECK: 11 eb b5 70
+if (!p3) r17 = sxtb(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 b5 70
+{ p3 = r5
+  if (p3.new) r17 = sxtb(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef b5 70
+{ p3 = r5
+  if (!p3.new) r17 = sxtb(r21) }
+# CHECK: 11 e3 f5 70
+if (p3) r17 = sxth(r21)
+# CHECK: 11 eb f5 70
+if (!p3) r17 = sxth(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 f5 70
+{ p3 = r5
+  if (p3.new) r17 = sxth(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef f5 70
+{ p3 = r5
+  if (!p3.new) r17 = sxth(r21) }
+
+# Conditional transfer
+# CHECK: b1 c2 60 7e
+if (p3) r17 = #21
+# CHECK: b1 c2 e0 7e
+if (!p3) r17 = #21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 e2 60 7e
+{ p3 = r5
+  if (p3.new) r17 = #21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 e2 e0 7e
+{ p3 = r5
+  if (!p3.new) r17 = #21 }
+
+# Conditional zero extend
+# CHECK: 11 e3 95 70
+if (p3) r17 = zxtb(r21)
+# CHECK: 11 eb 95 70
+if (!p3) r17 = zxtb(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 95 70
+{ p3 = r5
+  if (p3.new) r17 = zxtb(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef 95 70
+{ p3 = r5
+  if (!p3.new) r17 = zxtb(r21) }
+# CHECK: 11 e3 d5 70
+if (p3) r17 = zxth(r21)
+# CHECK: 11 eb d5 70
+if (!p3) r17 = zxth(r21)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 e7 d5 70
+{ p3 = r5
+  if (p3.new) r17 = zxth(r21) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 11 ef d5 70
+{ p3 = r5
+  if (!p3.new) r17 = zxth(r21) }
+
+# Compare
+# CHECK: e3 c3 15 75
+p3 = cmp.eq(r21, #31)
+# CHECK: f3 c3 15 75
+p3 = !cmp.eq(r21, #31)
+# CHECK: e3 c3 55 75
+p3 = cmp.gt(r21, #31)
+# CHECK: f3 c3 55 75
+p3 = !cmp.gt(r21, #31)
+# CHECK: e3 c3 95 75
+p3 = cmp.gtu(r21, #31)
+# CHECK: f3 c3 95 75
+p3 = !cmp.gtu(r21, #31)
+# CHECK: 03 df 15 f2
+p3 = cmp.eq(r21, r31)
+# CHECK: 13 df 15 f2
+p3 = !cmp.eq(r21, r31)
+# CHECK: 03 df 55 f2
+p3 = cmp.gt(r21, r31)
+# CHECK: 13 df 55 f2
+p3 = !cmp.gt(r21, r31)
+# CHECK: 03 df 75 f2
+p3 = cmp.gtu(r21, r31)
+# CHECK: 13 df 75 f2
+p3 = !cmp.gtu(r21, r31)
+
+# Compare to general register
+# CHECK: f1 e3 55 73
+r17 = cmp.eq(r21, #31)
+# CHECK: f1 e3 75 73
+r17 = !cmp.eq(r21, #31)
+# CHECK: 11 df 55 f3
+r17 = cmp.eq(r21, r31)
+# CHECK: 11 df 75 f3
+r17 = !cmp.eq(r21, r31)
diff --git a/test/MC/Hexagon/instructions/cr.s b/test/MC/Hexagon/instructions/cr.s
new file mode 100644
index 000000000000..4cc21551865b
--- /dev/null
+++ b/test/MC/Hexagon/instructions/cr.s
@@ -0,0 +1,78 @@
+# RUN: llvm-mc --triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.2 CR
+
+# Corner detection acceleration
+# CHECK: 93 e1 12 6b
+p3 = !fastcorner9(p2, p1)
+# CHECK: 91 e3 02 6b
+p1 = fastcorner9(p2, p3)
+
+# Logical reductions on predicates
+# CHECK: 01 c0 82 6b
+p1 = any8(p2)
+# CHECK: 01 c0 a2 6b
+p1 = all8(p2)
+
+# Looping instructions
+# CHECK: 00 c0 15 60
+loop0(0, r21)
+# CHECK: 00 c0 35 60
+loop1(0, r21)
+# CHECK: 60 c0 00 69
+loop0(0, #12)
+# CHECK: 60 c0 20 69
+loop1(0, #12)
+
+# Add to PC
+# CHECK: 91 ca 49 6a
+r17 = add(pc, #21)
+
+# Pipelined loop instructions
+# CHECK: 00 c0 b5 60
+p3 = sp1loop0(0, r21)
+# CHECK: 00 c0 d5 60
+p3 = sp2loop0(0, r21)
+# CHECK: 00 c0 f5 60
+p3 = sp3loop0(0, r21)
+# CHECK: a1 c0 a0 69
+p3 = sp1loop0(0, #21)
+# CHECK: a1 c0 c0 69
+p3 = sp2loop0(0, #21)
+# CHECK: a1 c0 e0 69
+p3 = sp3loop0(0, #21)
+
+# Logical operations on predicates
+# CHECK: 01 c3 02 6b
+p1 = and(p3, p2)
+# CHECK: c1 c3 12 6b
+p1 = and(p2, and(p3, p3))
+# CHECK: 01 c3 22 6b
+p1 = or(p3, p2)
+# CHECK: c1 c3 32 6b
+p1 = and(p2, or(p3, p3))
+# CHECK: 01 c3 42 6b
+p1 = xor(p2, p3)
+# CHECK: c1 c3 52 6b
+p1 = or(p2, and(p3, p3))
+# CHECK: 01 c2 63 6b
+p1 = and(p2, !p3)
+# CHECK: c1 c3 72 6b
+p1 = or(p2, or(p3, p3))
+# CHECK: c1 c3 92 6b
+p1 = and(p2, and(p3, !p3))
+# CHECK: c1 c3 b2 6b
+p1 = and(p2, or(p3, !p3))
+# CHECK: 01 c0 c2 6b
+p1 = not(p2)
+# CHECK: c1 c3 d2 6b
+p1 = or(p2, and(p3, !p3))
+# CHECK: 01 c2 e3 6b
+p1 = or(p2, !p3)
+# CHECK: c1 c3 f2 6b
+p1 = or(p2, or(p3, !p3))
+
+# User control register transfer
+# CHECK: 0d c0 35 62
+cs1 = r21
+# CHECK: 11 c0 0d 6a
+r17 = cs1
diff --git a/test/MC/Hexagon/instructions/j.s b/test/MC/Hexagon/instructions/j.s
new file mode 100644
index 000000000000..0a9003b3d7b3
--- /dev/null
+++ b/test/MC/Hexagon/instructions/j.s
@@ -0,0 +1,206 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.4 J
+
+# Call subroutine
+# CHECK: 00 c0 00 5a
+call 0
+# CHECK: 00 c3 00 5d
+if (p3) call 0
+# CHECK: 00 c3 20 5d
+if (!p3) call 0
+
+# Compare and jump
+# CHECK: 00 c0 89 11
+{ p0 = cmp.eq(r17,#-1); if (p0.new) jump:nt 0 }
+# CHECK: 00 c1 89 11
+{ p0 = cmp.gt(r17,#-1); if (p0.new) jump:nt 0 }
+# CHECK: 00 c3 89 11
+{ p0 = tstbit(r17, #0); if (p0.new) jump:nt 0 }
+# CHECK: 00 e0 89 11
+{ p0 = cmp.eq(r17,#-1); if (p0.new) jump:t 0 }
+# CHECK: 00 e1 89 11
+{ p0 = cmp.gt(r17,#-1); if (p0.new) jump:t 0 }
+# CHECK: 00 e3 89 11
+{ p0 = tstbit(r17, #0); if (p0.new) jump:t 0 }
+# CHECK: 00 c0 c9 11
+{ p0 = cmp.eq(r17,#-1); if (!p0.new) jump:nt 0 }
+# CHECK: 00 c1 c9 11
+{ p0 = cmp.gt(r17,#-1); if (!p0.new) jump:nt 0 }
+# CHECK: 00 c3 c9 11
+{ p0 = tstbit(r17, #0); if (!p0.new) jump:nt 0 }
+# CHECK: 00 e0 c9 11
+{ p0 = cmp.eq(r17,#-1); if (!p0.new) jump:t 0 }
+# CHECK: 00 e1 c9 11
+{ p0 = cmp.gt(r17,#-1); if (!p0.new) jump:t 0 }
+# CHECK: 00 e3 c9 11
+{ p0 = tstbit(r17, #0); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 09 10
+{ p0 = cmp.eq(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 09 10
+{ p0 = cmp.eq(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 49 10
+{ p0 = cmp.eq(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 49 10
+{ p0 = cmp.eq(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 89 10
+{ p0 = cmp.gt(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 89 10
+{ p0 = cmp.gt(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 c9 10
+{ p0 = cmp.gt(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 c9 10
+{ p0 = cmp.gt(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 d5 09 11
+{ p0 = cmp.gtu(r17, #21); if (p0.new) jump:nt 0 }
+# CHECK: 00 f5 09 11
+{ p0 = cmp.gtu(r17, #21); if (p0.new) jump:t 0 }
+# CHECK: 00 d5 49 11
+{ p0 = cmp.gtu(r17, #21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 f5 49 11
+{ p0 = cmp.gtu(r17, #21); if (!p0.new) jump:t 0 }
+# CHECK: 00 c0 89 13
+{ p1 = cmp.eq(r17,#-1); if (p1.new) jump:nt 0 }
+# CHECK: 00 c1 89 13
+{ p1 = cmp.gt(r17,#-1); if (p1.new) jump:nt 0 }
+# CHECK: 00 c3 89 13
+{ p1 = tstbit(r17, #0); if (p1.new) jump:nt 0 }
+# CHECK: 00 e0 89 13
+{ p1 = cmp.eq(r17,#-1); if (p1.new) jump:t 0 }
+# CHECK: 00 e1 89 13
+{ p1 = cmp.gt(r17,#-1); if (p1.new) jump:t 0 }
+# CHECK: 00 e3 89 13
+{ p1 = tstbit(r17, #0); if (p1.new) jump:t 0 }
+# CHECK: 00 c0 c9 13
+{ p1 = cmp.eq(r17,#-1); if (!p1.new) jump:nt 0 }
+# CHECK: 00 c1 c9 13
+{ p1 = cmp.gt(r17,#-1); if (!p1.new) jump:nt 0 }
+# CHECK: 00 c3 c9 13
+{ p1 = tstbit(r17, #0); if (!p1.new) jump:nt 0 }
+# CHECK: 00 e0 c9 13
+{ p1 = cmp.eq(r17,#-1); if (!p1.new) jump:t 0 }
+# CHECK: 00 e1 c9 13
+{ p1 = cmp.gt(r17,#-1); if (!p1.new) jump:t 0 }
+# CHECK: 00 e3 c9 13
+{ p1 = tstbit(r17, #0); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 09 12
+{ p1 = cmp.eq(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 09 12
+{ p1 = cmp.eq(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 49 12
+{ p1 = cmp.eq(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 49 12
+{ p1 = cmp.eq(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 89 12
+{ p1 = cmp.gt(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 89 12
+{ p1 = cmp.gt(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 c9 12
+{ p1 = cmp.gt(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 c9 12
+{ p1 = cmp.gt(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 d5 09 13
+{ p1 = cmp.gtu(r17, #21); if (p1.new) jump:nt 0 }
+# CHECK: 00 f5 09 13
+{ p1 = cmp.gtu(r17, #21); if (p1.new) jump:t 0 }
+# CHECK: 00 d5 49 13
+{ p1 = cmp.gtu(r17, #21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 f5 49 13
+{ p1 = cmp.gtu(r17, #21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 09 14
+{ p0 = cmp.eq(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 09 14
+{ p1 = cmp.eq(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 09 14
+{ p0 = cmp.eq(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 09 14
+{ p1 = cmp.eq(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd 49 14
+{ p0 = cmp.eq(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd 49 14
+{ p1 = cmp.eq(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed 49 14
+{ p0 = cmp.eq(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd 49 14
+{ p1 = cmp.eq(r17, r21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 89 14
+{ p0 = cmp.gt(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 89 14
+{ p1 = cmp.gt(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 89 14
+{ p0 = cmp.gt(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 89 14
+{ p1 = cmp.gt(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd c9 14
+{ p0 = cmp.gt(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd c9 14
+{ p1 = cmp.gt(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed c9 14
+{ p0 = cmp.gt(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd c9 14
+{ p1 = cmp.gt(r17, r21); if (!p1.new) jump:t 0 }
+# CHECK: 00 cd 09 15
+{ p0 = cmp.gtu(r17, r21); if (p0.new) jump:nt 0 }
+# CHECK: 00 dd 09 15
+{ p1 = cmp.gtu(r17, r21); if (p1.new) jump:nt 0 }
+# CHECK: 00 ed 09 15
+{ p0 = cmp.gtu(r17, r21); if (p0.new) jump:t 0 }
+# CHECK: 00 fd 09 15
+{ p1 = cmp.gtu(r17, r21); if (p1.new) jump:t 0 }
+# CHECK: 00 cd 49 15
+{ p0 = cmp.gtu(r17, r21); if (!p0.new) jump:nt 0 }
+# CHECK: 00 dd 49 15
+{ p1 = cmp.gtu(r17, r21); if (!p1.new) jump:nt 0 }
+# CHECK: 00 ed 49 15
+{ p0 = cmp.gtu(r17, r21); if (!p0.new) jump:t 0 }
+# CHECK: 00 fd 49 15
+{ p1 = cmp.gtu(r17, r21); if (!p1.new) jump:t 0 }
+
+# Jump to address
+# CHECK: 00 c0 00 58
+jump 0
+# CHECK: 00 c3 00 5c
+if (p3) jump 0
+# CHECK: 00 c3 20 5c
+if (!p3) jump 0
+
+# Jump to address conditioned on new predicate
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 00 5c
+{ p3 = r5
+  if (p3.new) jump:nt 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 00 5c
+{ p3 = r5
+  if (p3.new) jump:t 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 20 5c
+{ p3 = r5
+  if (!p3.new) jump:nt 0 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 20 5c
+{ p3 = r5
+  if (!p3.new) jump:t 0 }
+
+# Jump to address conditioned on register value
+# CHECK: 00 c0 11 61
+if (r17!=#0) jump:nt 0
+# CHECK: 00 d0 11 61
+if (r17!=#0) jump:t 0
+# CHECK: 00 c0 51 61
+if (r17>=#0) jump:nt 0
+# CHECK: 00 d0 51 61
+if (r17>=#0) jump:t 0
+# CHECK: 00 c0 91 61
+if (r17==#0) jump:nt 0
+# CHECK: 00 d0 91 61
+if (r17==#0) jump:t 0
+# CHECK: 00 c0 d1 61
+if (r17<=#0) jump:nt 0
+# CHECK: 00 d0 d1 61
+if (r17<=#0) jump:t 0
+
+# Transfer and jump
+# CHECK: 00 d5 09 16
+{ r17 = #21 ; jump 0} 
+# CHECK: 00 c9 0d 17
+{ r17 = r21 ; jump 0 }
diff --git a/test/MC/Hexagon/instructions/jr.s b/test/MC/Hexagon/instructions/jr.s
new file mode 100644
index 000000000000..f4f32450f34e
--- /dev/null
+++ b/test/MC/Hexagon/instructions/jr.s
@@ -0,0 +1,38 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.3 JR
+
+# Call subroutine from register
+# CHECK: 00 c0 b5 50
+callr r21
+# CHECK: 00 c1 15 51
+if (p1) callr r21
+# CHECK: 00 c3 35 51
+if (!p3) callr r21
+
+# Hint an indirect jump address
+# CHECK: 00 c0 b5 52
+hintjr(r21)
+
+# Jump to address from register
+# CHECK: 00 c0 95 52
+jumpr r21
+# CHECK: 00 c1 55 53
+if (p1) jumpr r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 55 53
+{ p3 = r5
+  if (p3.new) jumpr:nt r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 55 53
+{ p3 = r5
+  if (p3.new) jumpr:t r21 }
+# CHECK: 00 c3 75 53
+if (!p3) jumpr r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 cb 75 53
+{ p3 = r5
+  if (!p3.new) jumpr:nt r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 db 75 53
+{ p3 = r5
+  if (!p3.new) jumpr:t r21 }
diff --git a/test/MC/Hexagon/instructions/ld.s b/test/MC/Hexagon/instructions/ld.s
new file mode 100644
index 000000000000..2695999aa85f
--- /dev/null
+++ b/test/MC/Hexagon/instructions/ld.s
@@ -0,0 +1,493 @@
+# RUN: llvm-mc -triple hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.5 LD
+
+# CHECK: 02 40 00 00
+# CHECK-NEXT: 10 c5 c0 49
+r17:16 = memd(##168)
+# CHECK: d0 c0 d5 91
+r17:16 = memd(r21 + #48)
+# CHECK: b0 e0 d5 99
+r17:16 = memd(r21 ++ #40:circ(m1))
+# CHECK: 10 e2 d5 99
+r17:16 = memd(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 d5 9b
+r17:16 = memd(r21 = ##31)
+# CHECK: b0 c0 d5 9b
+r17:16 = memd(r21++#40)
+# CHECK: 10 e0 d5 9d
+r17:16 = memd(r21++m1)
+# CHECK: 10 e0 d5 9f
+r17:16 = memd(r21 ++ m1:brev)
+
+# Load doubleword conditionally
+# CHECK: f0 ff d5 30
+if (p3) r17:16 = memd(r21+r31<<#3)
+# CHECK: f0 ff d5 31
+if (!p3) r17:16 = memd(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff d5 32
+{ p3 = r5
+  if (p3.new) r17:16 = memd(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f0 ff d5 33
+{ p3 = r5
+  if (!p3.new) r17:16 = memd(r21+r31<<#3) }
+# CHECK: 70 d8 d5 41
+if (p3) r17:16 = memd(r21 + #24)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 d8 d5 43
+{ p3 = r5
+  if (p3.new) r17:16 = memd(r21 + #24) }
+# CHECK: 70 d8 d5 45
+if (!p3) r17:16 = memd(r21 + #24)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 70 d8 d5 47
+{ p3 = r5
+  if (!p3.new) r17:16 = memd(r21 + #24) }
+# CHECK: b0 e6 d5 9b
+if (p3) r17:16 = memd(r21++#40)
+# CHECK: b0 ee d5 9b
+if (!p3) r17:16 = memd(r21++#40)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b0 f6 d5 9b
+{ p3 = r5
+  if (p3.new) r17:16 = memd(r21++#40) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b0 fe d5 9b
+{ p3 = r5
+  if (!p3.new) r17:16 = memd(r21++#40) }
+
+# Load byte
+# CHECK: 91 ff 15 3a
+r17 = memb(r21 + r31<<#3)
+# CHECK: b1 c2 00 49
+r17 = memb(#21)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: b1 c2 00 49
+r17 = memb(##21)
+# CHECK: f1 c3 15 91
+r17 = memb(r21 + #31)
+# CHECK: b1 e0 15 99
+r17 = memb(r21 ++ #5:circ(m1))
+# CHECK: 11 e2 15 99
+r17 = memb(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 15 9b
+r17 = memb(r21 = ##31)
+# CHECK: b1 c0 15 9b
+r17 = memb(r21++#5)
+# CHECK: 11 e0 15 9d
+r17 = memb(r21++m1)
+# CHECK: 11 e0 15 9f
+r17 = memb(r21 ++ m1:brev)
+
+# Load byte conditionally
+# CHECK: f1 ff 15 30
+if (p3) r17 = memb(r21+r31<<#3)
+# CHECK: f1 ff 15 31
+if (!p3) r17 = memb(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 32
+{ p3 = r5
+  if (p3.new) r17 = memb(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 15 33
+{ p3 = r5
+  if (!p3.new) r17 = memb(r21+r31<<#3) }
+# CHECK: 91 dd 15 41
+if (p3) r17 = memb(r21 + #44)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 91 dd 15 43
+{ p3 = r5
+  if (p3.new) r17 = memb(r21 + #44) }
+# CHECK: 91 dd 15 45
+if (!p3) r17 = memb(r21 + #44)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 91 dd 15 47
+{ p3 = r5
+  if (!p3.new) r17 = memb(r21 + #44) }
+# CHECK: b1 e6 15 9b
+if (p3) r17 = memb(r21++#5)
+# CHECK: b1 ee 15 9b
+if (!p3) r17 = memb(r21++#5)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 15 9b
+{ p3 = r5
+  if (p3.new) r17 = memb(r21++#5) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 15 9b
+{ p3 = r5
+  if (!p3.new) r17 = memb(r21++#5) }
+
+# Load byte into shifted vector
+# CHECK: f0 c3 95 90
+r17:16 = memb_fifo(r21 + #31)
+# CHECK: b0 e0 95 98
+r17:16 = memb_fifo(r21 ++ #5:circ(m1))
+# CHECK: 10 e2 95 98
+r17:16 = memb_fifo(r21 ++ I:circ(m1))
+
+# Load half into shifted vector
+# CHECK: f0 c3 55 90
+r17:16 = memh_fifo(r21 + #62)
+# CHECK: b0 e0 55 98
+r17:16 = memh_fifo(r21 ++ #10:circ(m1))
+# CHECK: 10 e2 55 98
+r17:16 = memh_fifo(r21 ++ I:circ(m1))
+
+# Load halfword
+# CHECK: 91 ff 55 3a
+r17 = memh(r21 + r31<<#3)
+# CHECK: b1 c2 40 49
+r17 = memh(#42)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 51 c5 40 49
+r17 = memh(##42)
+# CHECK: f1 c3 55 91
+r17 = memh(r21 + #62)
+# CHECK: b1 e0 55 99
+r17 = memh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 55 99
+r17 = memh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 55 9b
+r17 = memh(r21 = ##31)
+# CHECK: b1 c0 55 9b
+r17 = memh(r21++#10)
+# CHECK: 11 e0 55 9d
+r17 = memh(r21++m1)
+# CHECK: 11 e0 55 9f
+r17 = memh(r21 ++ m1:brev)
+
+# Load halfword conditionally
+# CHECK: f1 ff 55 30
+if (p3) r17 = memh(r21+r31<<#3)
+# CHECK: f1 ff 55 31
+if (!p3) r17 = memh(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 55 32
+{ p3 = r5
+  if (p3.new) r17 = memh(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 55 33
+{ p3 = r5
+  if (!p3.new) r17 = memh(r21+r31<<#3) }
+# CHECK: b1 e6 55 9b
+if (p3) r17 = memh(r21++#10)
+# CHECK: b1 ee 55 9b
+if (!p3) r17 = memh(r21++#10)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 55 9b
+{ p3 = r5
+  if (p3.new) r17 = memh(r21++#10) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 55 9b
+{ p3 = r5
+  if (!p3.new) r17 = memh(r21++#10) }
+# CHECK: f1 db 55 41
+if (p3) r17 = memh(r21 + #62)
+# CHECK: f1 db 55 45
+if (!p3) r17 = memh(r21 + #62)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 55 43
+{ p3 = r5
+  if (p3.new) r17 = memh(r21 + #62) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 55 47
+{ p3 = r5
+  if (!p3.new) r17 = memh(r21 + #62) }
+
+# Load unsigned byte
+# CHECK: 91 ff 35 3a
+r17 = memub(r21 + r31<<#3)
+# CHECK: b1 c2 20 49
+r17 = memub(#21)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: b1 c2 20 49
+r17 = memub(##21)
+# CHECK: f1 c3 35 91
+r17 = memub(r21 + #31)
+# CHECK: b1 e0 35 99
+r17 = memub(r21 ++ #5:circ(m1))
+# CHECK: 11 e2 35 99
+r17 = memub(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 35 9b
+r17 = memub(r21 = ##31)
+# CHECK: b1 c0 35 9b
+r17 = memub(r21++#5)
+# CHECK: 11 e0 35 9d
+r17 = memub(r21++m1)
+# CHECK: 11 e0 35 9f
+r17 = memub(r21 ++ m1:brev)
+
+# Load unsigned byte conditionally
+# CHECK: f1 ff 35 30
+if (p3) r17 = memub(r21+r31<<#3)
+# CHECK: f1 ff 35 31
+if (!p3) r17 = memub(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 32
+{ p3 = r5
+  if (p3.new) r17 = memub(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 35 33
+{ p3 = r5
+  if (!p3.new) r17 = memub(r21+r31<<#3) }
+# CHECK: f1 db 35 41
+if (p3) r17 = memub(r21 + #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 35 43
+{ p3 = r5
+  if (p3.new) r17 = memub(r21 + #31) }
+# CHECK: f1 db 35 45
+if (!p3) r17 = memub(r21 + #31)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 db 35 47
+{ p3 = r5
+  if (!p3.new) r17 = memub(r21 + #31) }
+# CHECK: b1 e6 35 9b
+if (p3) r17 = memub(r21++#5)
+# CHECK: b1 ee 35 9b
+if (!p3) r17 = memub(r21++#5)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 35 9b
+{ p3 = r5
+  if (p3.new) r17 = memub(r21++#5) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 35 9b
+{ p3 = r5
+  if (!p3.new) r17 = memub(r21++#5) }
+
+# Load unsigned halfword
+# CHECK: 91 ff 75 3a
+r17 = memuh(r21 + r31<<#3)
+# CHECK: b1 c2 60 49
+r17 = memuh(#42)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 51 c5 60 49
+r17 = memuh(##42)
+# CHECK: b1 c2 75 91
+r17 = memuh(r21 + #42)
+# CHECK: b1 e0 75 99
+r17 = memuh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 75 99
+r17 = memuh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 75 9b
+r17 = memuh(r21 = ##31)
+# CHECK: b1 c0 75 9b
+r17 = memuh(r21++#10)
+# CHECK: 11 e0 75 9d
+r17 = memuh(r21++m1)
+# CHECK: 11 e0 75 9f
+r17 = memuh(r21 ++ m1:brev)
+
+# Load unsigned halfword conditionally
+# CHECK: f1 ff 75 30
+if (p3) r17 = memuh(r21+r31<<#3)
+# CHECK: f1 ff 75 31
+if (!p3) r17 = memuh(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 32
+{ p3 = r5
+  if (p3.new) r17 = memuh(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 75 33
+{ p3 = r5
+  if (!p3.new) r17 = memuh(r21+r31<<#3) }
+# CHECK: b1 da 75 41
+if (p3) r17 = memuh(r21 + #42)
+# CHECK: b1 da 75 45
+if (!p3) r17 = memuh(r21 + #42)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 75 43
+{ p3 = r5
+  if (p3.new) r17 = memuh(r21 + #42) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 75 47
+{ p3 = r5
+  if (!p3.new) r17 = memuh(r21 + #42) }
+# CHECK: b1 e6 75 9b
+if (p3) r17 = memuh(r21++#10)
+# CHECK: b1 ee 75 9b
+if (!p3) r17 = memuh(r21++#10)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 75 9b
+{ p3 = r5
+  if (p3.new) r17 = memuh(r21++#10) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 75 9b
+{ p3 = r5
+  if (!p3.new) r17 = memuh(r21++#10) }
+
+# Load word
+# CHECK: 91 ff 95 3a
+r17 = memw(r21 + r31<<#3)
+# CHECK: b1 c2 80 49
+r17 = memw(#84)
+# CHECK: 01 40 00 00
+# CHECK-NEXT: 91 c2 80 49
+r17 = memw(##84)
+# CHECK: b1 c2 95 91
+r17 = memw(r21 + #84)
+# CHECK: b1 e0 95 99
+r17 = memw(r21 ++ #20:circ(m1))
+# CHECK: 11 e2 95 99
+r17 = memw(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 95 9b
+r17 = memw(r21 = ##31)
+# CHECK: b1 c0 95 9b
+r17 = memw(r21++#20)
+# CHECK: 11 e0 95 9d
+r17 = memw(r21++m1)
+# CHECK: 11 e0 95 9f
+r17 = memw(r21 ++ m1:brev)
+
+# Load word conditionally
+# CHECK: f1 ff 95 30
+if (p3) r17 = memw(r21+r31<<#3)
+# CHECK: f1 ff 95 31
+if (!p3) r17 = memw(r21+r31<<#3)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 95 32
+{ p3 = r5
+  if (p3.new) r17 = memw(r21+r31<<#3) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f1 ff 95 33
+{ p3 = r5
+  if (!p3.new) r17 = memw(r21+r31<<#3) }
+# CHECK: b1 da 95 41
+if (p3) r17 = memw(r21 + #84)
+# CHECK: b1 da 95 45
+if (!p3) r17 = memw(r21 + #84)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 95 43
+{ p3 = r5
+  if (p3.new) r17 = memw(r21 + #84) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 da 95 47
+{ p3 = r5
+  if (!p3.new) r17 = memw(r21 + #84) }
+# CHECK: b1 e6 95 9b
+if (p3) r17 = memw(r21++#20)
+# CHECK: b1 ee 95 9b
+if (!p3) r17 = memw(r21++#20)
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 f6 95 9b
+{ p3 = r5
+  if (p3.new) r17 = memw(r21++#20) }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: b1 fe 95 9b
+{ p3 = r5
+  if (!p3.new) r17 = memw(r21++#20) }
+
+# Deallocate stack frame
+# CHECK: 1e c0 1e 90
+deallocframe
+
+# Deallocate stack frame and return
+# CHECK: 1e c0 1e 96
+dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e cb 1e 96
+{ p3 = r5
+  if (p3.new) dealloc_return:nt }
+# CHECK: 1e d3 1e 96
+if (p3) dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e db 1e 96
+{ p3 = r5
+  if (p3.new) dealloc_return:t }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e eb 1e 96
+{ p3 = r5
+  if (!p3.new) dealloc_return:nt }
+# CHECK: 1e f3 1e 96
+if (!p3) dealloc_return
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1e fb 1e 96
+{ p3 = r5
+  if (!p3.new) dealloc_return:t }
+
+# Load and unpack bytes to halfwords
+# CHECK: f1 c3 35 90
+r17 = membh(r21 + #62)
+# CHECK: f1 c3 75 90
+r17 = memubh(r21 + #62)
+# CHECK: f0 c3 b5 90
+r17:16 = memubh(r21 + #124)
+# CHECK: f0 c3 f5 90
+r17:16 = membh(r21 + #124)
+# CHECK: b1 e0 35 98
+r17 = membh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 35 98
+r17 = membh(r21 ++ I:circ(m1))
+# CHECK: b1 e0 75 98
+r17 = memubh(r21 ++ #10:circ(m1))
+# CHECK: 11 e2 75 98
+r17 = memubh(r21 ++ I:circ(m1))
+# CHECK: b0 e0 f5 98
+r17:16 = membh(r21 ++ #20:circ(m1))
+# CHECK: 10 e2 f5 98
+r17:16 = membh(r21 ++ I:circ(m1))
+# CHECK: b0 e0 b5 98
+r17:16 = memubh(r21 ++ #20:circ(m1))
+# CHECK: 10 e2 b5 98
+r17:16 = memubh(r21 ++ I:circ(m1))
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 35 9a
+r17 = membh(r21 = ##31)
+# CHECK: b1 c0 35 9a
+r17 = membh(r21++#10)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 71 d7 75 9a
+r17 = memubh(r21 = ##31)
+# CHECK: b1 c0 75 9a
+r17 = memubh(r21++#10)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 b5 9a
+r17:16 = memubh(r21 = ##31)
+# CHECK: b0 c0 b5 9a
+r17:16 = memubh(r21++#20)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 70 d7 f5 9a
+r17:16 = membh(r21 = ##31)
+# CHECK: b0 c0 f5 9a
+r17:16 = membh(r21++#20)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f1 f7 35 9c
+r17 = membh(r21<<#3 + ##31)
+# CHECK: 11 e0 35 9c
+r17 = membh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f1 f7 75 9c
+r17 = memubh(r21<<#3 + ##31)
+# CHECK: 11 e0 75 9c
+r17 = memubh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f0 f7 f5 9c
+r17:16 = membh(r21<<#3 + ##31)
+# CHECK: 10 e0 f5 9c
+r17:16 = membh(r21++m1)
+# CHECK: 00 40 00 00
+# CHECK-NEXT: f0 f7 b5 9c
+r17:16 = memubh(r21<<#3 + ##31)
+# CHECK: 11 e0 35 9c
+r17 = membh(r21++m1)
+# CHECK: 11 e0 75 9c
+r17 = memubh(r21++m1)
+# CHECK: 10 e0 f5 9c
+r17:16 = membh(r21++m1)
+# CHECK: 10 e0 b5 9c
+r17:16 = memubh(r21++m1)
+# CHECK: 11 e0 35 9e
+r17 = membh(r21 ++ m1:brev)
+# CHECK: 11 e0 75 9e
+r17 = memubh(r21 ++ m1:brev)
+# CHECK: 10 e0 b5 9e
+r17:16 = memubh(r21 ++ m1:brev)
+# CHECK: 10 e0 f5 9e
+r17:16 = membh(r21 ++ m1:brev)
diff --git a/test/MC/Hexagon/instructions/memop.s b/test/MC/Hexagon/instructions/memop.s
new file mode 100644
index 000000000000..1aac69056b17
--- /dev/null
+++ b/test/MC/Hexagon/instructions/memop.s
@@ -0,0 +1,56 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.6 MEMOP
+
+# Operation on memory byte
+# CHECK: 95 d9 11 3e
+memb(r17+#51) += r21
+# CHECK: b5 d9 11 3e
+memb(r17+#51) -= r21
+# CHECK: d5 d9 11 3e
+memb(r17+#51) &= r21
+# CHECK: f5 d9 11 3e
+memb(r17+#51) |= r21
+# CHECK: 95 d9 11 3f
+memb(r17+#51) += #21
+# CHECK: b5 d9 11 3f
+memb(r17+#51) -= #21
+# CHECK: d5 d9 11 3f
+memb(r17+#51) = clrbit(#21)
+# CHECK: f5 d9 11 3f
+memb(r17+#51) = setbit(#21)
+
+# Operation on memory halfword
+# CHECK: 95 d9 31 3e
+memh(r17+#102) += r21
+# CHECK: b5 d9 31 3e
+memh(r17+#102) -= r21
+# CHECK: d5 d9 31 3e
+memh(r17+#102) &= r21
+# CHECK: f5 d9 31 3e
+memh(r17+#102) |= r21
+# CHECK: 95 d9 31 3f
+memh(r17+#102) += #21
+# CHECK: b5 d9 31 3f
+memh(r17+#102) -= #21
+# CHECK: d5 d9 31 3f
+memh(r17+#102) = clrbit(#21)
+# CHECK: f5 d9 31 3f
+memh(r17+#102) = setbit(#21)
+
+# Operation on memory word
+# CHECK: 95 d9 51 3e
+memw(r17+#204) += r21
+# CHECK: b5 d9 51 3e
+memw(r17+#204) -= r21
+# CHECK: d5 d9 51 3e
+memw(r17+#204) &= r21
+# CHECK: f5 d9 51 3e
+memw(r17+#204) |= r21
+# CHECK: 95 d9 51 3f
+memw(r17+#204) += #21
+# CHECK: b5 d9 51 3f
+memw(r17+#204) -= #21
+# CHECK: d5 d9 51 3f
+memw(r17+#204) = clrbit(#21)
+# CHECK: f5 d9 51 3f
+memw(r17+#204) = setbit(#21)
diff --git a/test/MC/Hexagon/instructions/nv_j.s b/test/MC/Hexagon/instructions/nv_j.s
new file mode 100644
index 000000000000..5bc75c5a964d
--- /dev/null
+++ b/test/MC/Hexagon/instructions/nv_j.s
@@ -0,0 +1,180 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.7.1 NV/J
+
+# Jump to address conditioned on new register value
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 20
+{ r17 = r17
+  if (cmp.eq(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 20
+{ r17 = r17
+  if (cmp.eq(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 20
+{ r17 = r17
+  if (!cmp.eq(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 20
+{ r17 = r17
+  if (!cmp.eq(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 20
+{ r17 = r17
+  if (cmp.gt(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 20
+{ r17 = r17
+  if (cmp.gt(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 20
+{ r17 = r17
+  if (!cmp.gt(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 20
+{ r17 = r17
+  if (!cmp.gt(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 21
+{ r17 = r17
+  if (cmp.gtu(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 21
+{ r17 = r17
+  if (cmp.gtu(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 21
+{ r17 = r17
+  if (!cmp.gtu(r17.new, r21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 21
+{ r17 = r17
+  if (!cmp.gtu(r17.new, r21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 21
+{ r17 = r17
+  if (cmp.gt(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 21
+{ r17 = r17
+  if (cmp.gt(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 21
+{ r17 = r17
+  if (!cmp.gt(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 21
+{ r17 = r17
+  if (!cmp.gt(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 22
+{ r17 = r17
+  if (cmp.gtu(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 22
+{ r17 = r17
+  if (cmp.gtu(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 22
+{ r17 = r17
+  if (!cmp.gtu(r21, r17.new)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 22
+{ r17 = r17
+  if (!cmp.gtu(r21, r17.new)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 24
+{ r17 = r17
+  if (cmp.eq(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 24
+{ r17 = r17
+  if (cmp.eq(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 24
+{ r17 = r17
+  if (!cmp.eq(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 24
+{ r17 = r17
+  if (!cmp.eq(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 82 24
+{ r17 = r17
+  if (cmp.gt(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 82 24
+{ r17 = r17
+  if (cmp.gt(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 c2 24
+{ r17 = r17
+  if (!cmp.gt(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 c2 24
+{ r17 = r17
+  if (!cmp.gt(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 02 25
+{ r17 = r17
+  if (cmp.gtu(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 02 25
+{ r17 = r17
+  if (cmp.gtu(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 d5 42 25
+{ r17 = r17
+  if (!cmp.gtu(r17.new, #21)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 f5 42 25
+{ r17 = r17
+  if (!cmp.gtu(r17.new, #21)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 82 25
+{ r17 = r17
+  if (tstbit(r17.new, #0)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 82 25
+{ r17 = r17
+  if (tstbit(r17.new, #0)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 c2 25
+{ r17 = r17
+  if (!tstbit(r17.new, #0)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 c2 25
+{ r17 = r17
+  if (!tstbit(r17.new, #0)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 02 26
+{ r17 = r17
+  if (cmp.eq(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 02 26
+{ r17 = r17
+  if (cmp.eq(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 42 26
+{ r17 = r17
+  if (!cmp.eq(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 42 26
+{ r17 = r17
+  if (!cmp.eq(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 82 26
+{ r17 = r17
+  if (cmp.gt(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 82 26
+{ r17 = r17
+  if (cmp.gt(r17.new, #-1)) jump:t 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 c0 c2 26
+{ r17 = r17
+  if (!cmp.gt(r17.new, #-1)) jump:nt 0x0 }
+# CHECK: 11 40 71 70
+# CHECK-NEXT: 00 e0 c2 26
+{ r17 = r17
+  if (!cmp.gt(r17.new, #-1)) jump:t 0x0 }
diff --git a/test/MC/Hexagon/instructions/nv_st.s b/test/MC/Hexagon/instructions/nv_st.s
new file mode 100644
index 000000000000..4ff490024a82
--- /dev/null
+++ b/test/MC/Hexagon/instructions/nv_st.s
@@ -0,0 +1,290 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.7.2 NV/ST
+
+# Store new-value byte
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 82 f5 b1 3b
+{ r31 = r31
+  memb(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 11 c2 a0 48
+{ r31 = r31
+  memb(#17) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 c2 b1 a1
+{ r31 = r31
+  memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 e2 b1 a9
+{ r31 = r31
+  memb(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 e2 b1 a9
+{ r31 = r31
+  memb(r17 ++ #5:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 c2 b1 ab
+{ r31 = r31
+  memb(r17++#5) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 e2 b1 ad
+{ r31 = r31
+  memb(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 e2 b1 af
+{ r31 = r31
+  memb(r17 ++ m1:brev) = r31.new }
+
+# Store new-value byte conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 34
+{ r31 = r31
+  if (p3) memb(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 35
+{ r31 = r31
+  if (!p3) memb(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 36
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memb(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: e2 f5 b1 37
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memb(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 40
+{ r31 = r31
+  if (p3) memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 44
+{ r31 = r31
+  if (!p3) memb(r17+#21) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 42
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memb(r17+#21) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab c2 b1 46
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memb(r17+#21) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b e2 b1 ab
+{ r31 = r31
+  if (p3) memb(r17++#5) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f e2 b1 ab
+{ r31 = r31
+  if (!p3) memb(r17++#5) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab e2 b1 ab
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memb(r17++#5) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af e2 b1 ab
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memb(r17++#5) = r31.new }
+
+# Store new-value halfword
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 8a f5 b1 3b
+{ r31 = r31
+  memh(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 ca a0 48
+{ r31 = r31
+  memh(#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 ca b1 a1
+{ r31 = r31
+  memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 ea b1 a9
+{ r31 = r31
+  memh(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 ea b1 a9
+{ r31 = r31
+  memh(r17 ++ #10:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 ca b1 ab
+{ r31 = r31
+  memh(r17++#10) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 ea b1 ad
+{ r31 = r31
+  memh(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 ea b1 af
+{ r31 = r31
+  memh(r17 ++ m1:brev) = r31.new }
+
+# Store new-value halfword conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 34
+{ r31 = r31
+  if (p3) memh(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 35
+{ r31 = r31
+  if (!p3) memh(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 36
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memh(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ea f5 b1 37
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memh(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 40
+{ r31 = r31
+  if (p3) memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 44
+{ r31 = r31
+  if (!p3) memh(r17+#42) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 42
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memh(r17+#42) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ca b1 46
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memh(r17+#42) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b ea b1 ab
+{ r31 = r31
+  if (p3) memh(r17++#10) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f ea b1 ab
+{ r31 = r31
+  if (!p3) memh(r17++#10) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab ea b1 ab
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memh(r17++#10) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af ea b1 ab
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memh(r17++#10) = r31.new }
+
+# Store new-value word
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 92 f5 b1 3b
+{ r31 = r31
+  memw(r17 + r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 d2 a0 48
+{ r31 = r31
+  memw(#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 15 d2 b1 a1
+{ r31 = r31
+  memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 02 f2 b1 a9
+{ r31 = r31
+  memw(r17 ++ I:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 f2 b1 a9
+{ r31 = r31
+  memw(r17 ++ #20:circ(m1)) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 28 d2 b1 ab
+{ r31 = r31
+  memw(r17++#20) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 f2 b1 ad
+{ r31 = r31
+  memw(r17++m1) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 00 f2 b1 af
+{ r31 = r31
+  memw(r17 ++ m1:brev) = r31.new }
+
+# Store new-value word conditionally
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 34
+{ r31 = r31
+  if (p3) memw(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 35
+{ r31 = r31
+  if (!p3) memw(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 36
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memw(r17+r21<<#3) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: f2 f5 b1 37
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memw(r17+r21<<#3) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 40
+{ r31 = r31
+  if (p3) memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 44
+{ r31 = r31
+  if (!p3) memw(r17+#84) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 42
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memw(r17+#84) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab d2 b1 46
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memw(r17+#84) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2b f2 b1 ab
+{ r31 = r31
+  if (p3) memw(r17++#20) = r31.new }
+# CHECK: 1f 40 7f 70
+# CHECK-NEXT: 2f f2 b1 ab
+{ r31 = r31
+  if (!p3) memw(r17++#20) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: ab f2 b1 ab
+{ p3 = r5
+  r31 = r31
+  if (p3.new) memw(r17++#20) = r31.new }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 1f 40 7f 70
+# CHECK-NEXT: af f2 b1 ab
+{ p3 = r5
+  r31 = r31
+  if (!p3.new) memw(r17++#20) = r31.new }
diff --git a/test/MC/Hexagon/instructions/st.s b/test/MC/Hexagon/instructions/st.s
new file mode 100644
index 000000000000..3b5e8ee18100
--- /dev/null
+++ b/test/MC/Hexagon/instructions/st.s
@@ -0,0 +1,434 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.8 ST
+
+# Store doubleword
+# CHECK: 9e f5 d1 3b
+memd(r17 + r21<<#3) = r31:30
+# CHECK: 28 d4 c0 48
+memd(#320) = r21:20
+# CHECK: 02 40 00 00
+# CHECK-NEXT: 28 d4 c0 48
+memd(##168) = r21:20
+memd(r17+#168) = r21:20
+# CHECK: 02 f4 d1 a9
+memd(r17 ++ I:circ(m1)) = r21:20
+# CHECK: 28 f4 d1 a9
+memd(r17 ++ #40:circ(m1)) = r21:20
+# CHECK: 28 d4 d1 ab
+memd(r17++#40) = r21:20
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 fe d1 ad
+memd(r17<<#3 + ##21) = r31:30
+memd(r17++m1) = r21:20
+# CHECK: 00 f4 d1 af
+memd(r17 ++ m1:brev) = r21:20
+
+# Store doubleword conditionally
+# CHECK: fe f5 d1 34
+if (p3) memd(r17+r21<<#3) = r31:30
+# CHECK: fe f5 d1 35
+if (!p3) memd(r17+r21<<#3) = r31:30
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fe f5 d1 36
+{ p3 = r5
+  if (p3.new) memd(r17+r21<<#3) = r31:30 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fe f5 d1 37
+{ p3 = r5
+  if (!p3.new) memd(r17+r21<<#3) = r31:30 }
+# CHECK: ab de d1 40
+if (p3) memd(r17+#168) = r31:30
+# CHECK: ab de d1 44
+if (!p3) memd(r17+#168) = r31:30
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab de d1 42
+{ p3 = r5
+  if (p3.new) memd(r17+#168) = r31:30 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab de d1 46
+{ p3 = r5
+  if (!p3.new) memd(r17+#168) = r31:30 }
+# CHECK: 2b f4 d1 ab
+if (p3) memd(r17++#40) = r21:20
+# CHECK: 2f f4 d1 ab
+if (!p3) memd(r17++#40) = r21:20
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f4 d1 ab
+{ p3 = r5
+  if (p3.new) memd(r17++#40) = r21:20 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f4 d1 ab
+{ p3 = r5
+  if (!p3.new) memd(r17++#40) = r21:20 }
+# CHECK: 02 40 00 00
+# CHECK-NEXT: c3 d4 c2 af
+if (p3) memd(##168) = r21:20
+# CHECK: 02 40 00 00
+# CHECK-NEXT: c7 d4 c2 af
+if (!p3) memd(##168) = r21:20
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 02 40 00 00
+# CHECK-NEXT: c3 f4 c2 af
+{ p3 = r5
+  if (p3.new) memd(##168) = r21:20 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 02 40 00 00
+# CHECK-NEXT: c7 f4 c2 af
+{ p3 = r5
+  if (!p3.new) memd(##168) = r21:20 }
+
+# Store byte
+# CHECK: 9f f5 11 3b
+memb(r17 + r21<<#3) = r31
+# CHECK: 9f ca 11 3c
+memb(r17+#21)=#31
+# CHECK: 15 d5 00 48
+memb(#21) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 15 d5 00 48
+memb(##21) = r21
+# CHECK: 15 d5 11 a1
+memb(r17+#21) = r21
+# CHECK: 02 f5 11 a9
+memb(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 11 a9
+memb(r17 ++ #5:circ(m1)) = r21
+# CHECK: 28 d5 11 ab
+memb(r17++#5) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 11 ad
+memb(r17<<#3 + ##21) = r31
+# CHECK: 00 f5 11 ad
+memb(r17++m1) = r21
+# CHECK: 00 f5 11 af
+memb(r17 ++ m1:brev) = r21
+
+# Store byte conditionally
+# CHECK: ff f5 11 34
+if (p3) memb(r17+r21<<#3) = r31
+# CHECK: ff f5 11 35
+if (!p3) memb(r17+r21<<#3) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 11 36
+{ p3 = r5
+  if (p3.new) memb(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 11 37
+{ p3 = r5
+  if (!p3.new) memb(r17+r21<<#3) = r31 }
+# CHECK: ff ca 11 38
+if (p3) memb(r17+#21)=#31
+# CHECK: ff ca 91 38
+if (!p3) memb(r17+#21)=#31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 11 39
+{ p3 = r5
+  if (p3.new) memb(r17+#21)=#31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 91 39
+{ p3 = r5
+  if (!p3.new) memb(r17+#21)=#31 }
+# CHECK: ab df 11 40
+if (p3) memb(r17+#21) = r31
+# CHECK: ab df 11 44
+if (!p3) memb(r17+#21) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 11 42
+{ p3 = r5
+  if (p3.new) memb(r17+#21) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 11 46
+{ p3 = r5
+  if (!p3.new) memb(r17+#21) = r31 }
+# CHECK: 2b f5 11 ab
+if (p3) memb(r17++#5) = r21
+# CHECK: 2f f5 11 ab
+if (!p3) memb(r17++#5) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 11 ab
+{ p3 = r5
+  if (p3.new) memb(r17++#5) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 11 ab
+{ p3 = r5
+  if (!p3.new) memb(r17++#5) = r21 }
+# CHECK: 00 40 00 00
+# CHECK-NEXT: ab d5 01 af
+if (p3) memb(##21) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: af d5 01 af
+if (!p3) memb(##21) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: ab f5 01 af
+{ p3 = r5
+  if (p3.new) memb(##21) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: af f5 01 af
+{ p3 = r5
+  if (!p3.new) memb(##21) = r21 }
+
+# Store halfword
+# CHECK: 9f f5 51 3b
+memh(r17 + r21<<#3) = r31
+# CHECK: 9f f5 71 3b
+memh(r17 + r21<<#3) = r31.h
+# CHECK: 95 cf 31 3c
+memh(r17+#62)=#21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 2a d5 40 48
+memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: 2a d5 60 48
+memh(##42) = r21.h
+# CHECK: 2a d5 40 48
+memh(#84) = r21
+# CHECK: 2a d5 60 48
+memh(#84) = r21.h
+# CHECK: 15 df 51 a1
+memh(r17+#42) = r31
+# CHECK: 15 df 71 a1
+memh(r17+#42) = r31.h
+# CHECK: 02 f5 51 a9
+memh(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 51 a9
+memh(r17 ++ #10:circ(m1)) = r21
+# CHECK: 02 f5 71 a9
+memh(r17 ++ I:circ(m1)) = r21.h
+# CHECK: 28 f5 71 a9
+memh(r17 ++ #10:circ(m1)) = r21.h
+# CHECK: 28 d5 51 ab
+memh(r17++#10) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 51 ad
+memh(r17<<#3 + ##21) = r31
+# CHECK: 28 d5 71 ab
+memh(r17++#10) = r21.h
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 71 ad
+memh(r17<<#3 + ##21) = r31.h
+# CHECK: 00 f5 51 ad
+memh(r17++m1) = r21
+# CHECK: 00 f5 71 ad
+memh(r17++m1) = r21.h
+# CHECK: 00 f5 51 af
+memh(r17 ++ m1:brev) = r21
+# CHECK: 00 f5 71 af
+memh(r17 ++ m1:brev) = r21.h
+
+# Store halfword conditionally
+# CHECK: ff f5 51 34
+if (p3) memh(r17+r21<<#3) = r31
+# CHECK: ff f5 71 34
+if (p3) memh(r17+r21<<#3) = r31.h
+# CHECK: ff f5 51 35
+if (!p3) memh(r17+r21<<#3) = r31
+# CHECK: ff f5 71 35
+if (!p3) memh(r17+r21<<#3) = r31.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 51 36
+{ p3 = r5
+  if (p3.new) memh(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 71 36
+{ p3 = r5
+  if (p3.new) memh(r17+r21<<#3) = r31.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 51 37
+{ p3 = r5
+  if (!p3.new) memh(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 71 37
+{ p3 = r5
+  if (!p3.new) memh(r17+r21<<#3) = r31.h }
+# CHECK: f5 cf 31 38
+if (p3) memh(r17+#62)=#21
+# CHECK: f5 cf b1 38
+if (!p3) memh(r17+#62)=#21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f5 cf 31 39
+{ p3 = r5
+  if (p3.new) memh(r17+#62)=#21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: f5 cf b1 39
+{ p3 = r5
+  if (!p3.new) memh(r17+#62)=#21 }
+# CHECK: fb d5 51 40
+if (p3) memh(r17+#62) = r21
+# CHECK: fb d5 71 40
+if (p3) memh(r17+#62) = r21.h
+# CHECK: fb d5 51 44
+if (!p3) memh(r17+#62) = r21
+# CHECK: fb d5 71 44
+if (!p3) memh(r17+#62) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 51 42
+{ p3 = r5
+  if (p3.new) memh(r17+#62) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 71 42
+{ p3 = r5
+  if (p3.new) memh(r17+#62) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 51 46
+{ p3 = r5
+  if (!p3.new) memh(r17+#62) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: fb d5 71 46
+{ p3 = r5
+  if (!p3.new) memh(r17+#62) = r21.h }
+# CHECK: 2b f5 51 ab
+if (p3) memh(r17++#10) = r21
+# CHECK: 2f f5 51 ab
+if (!p3) memh(r17++#10) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 51 ab
+{ p3 = r5
+  if (p3.new) memh(r17++#10) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 51 ab 
+{ p3 = r5
+  if (!p3.new) memh(r17++#10) = r21 }
+# CHECK: 2b f5 71 ab
+if (p3) memh(r17++#10) = r21.h
+# CHECK: 2f f5 71 ab
+if (!p3) memh(r17++#10) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 71 ab
+{ p3 = r5
+  if (p3.new) memh(r17++#10) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 71 ab
+{ p3 = r5
+  if (!p3.new) memh(r17++#10) = r21.h }
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d3 d5 42 af
+if (p3) memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d3 d5 62 af
+if (p3) memh(##42) = r21.h
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d7 d5 42 af
+if (!p3) memh(##42) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d7 d5 62 af
+if (!p3) memh(##42) = r21.h
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d3 f5 42 af
+{ p3 = r5
+  if (p3.new) memh(##42) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d3 f5 62 af
+{ p3 = r5
+  if (p3.new) memh(##42) = r21.h }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d7 f5 42 af
+{ p3 = r5
+  if (!p3.new) memh(##42) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 00 40 00 00
+# CHECK-NEXT: d7 f5 62 af
+{ p3 = r5
+  if (!p3.new) memh(##42) = r21.h }
+
+# Store word
+# CHECK: 9f f5 91 3b
+memw(r17 + r21<<#3) = r31
+# CHECK: 9f ca 51 3c
+memw(r17+#84)=#31
+# CHECK: 15 df 80 48
+memw(#84) = r31
+# CHECK: 01 40 00 00
+# CHECK-NEXT: 14 d5 80 48
+memw(##84) = r21
+# CHECK: 9f ca 51 3c
+memw(r17+#84)=#31
+# CHECK: 15 df 91 a1
+memw(r17+#84) = r31
+# CHECK: 02 f5 91 a9
+memw(r17 ++ I:circ(m1)) = r21
+# CHECK: 28 f5 91 a9
+memw(r17 ++ #20:circ(m1)) = r21
+# CHECK: 28 d5 91 ab
+memw(r17++#20) = r21
+# CHECK: 00 40 00 00
+# CHECK-NEXT: d5 ff 91 ad
+memw(r17<<#3 + ##21) = r31
+# CHECK: 00 f5 91 ad
+memw(r17++m1) = r21
+# CHECK: 00 f5 91 af
+memw(r17 ++ m1:brev) = r21
+
+# Store word conditionally
+# CHECK: ff f5 91 34
+if (p3) memw(r17+r21<<#3) = r31
+# CHECK: ff f5 91 35
+if (!p3) memw(r17+r21<<#3) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 91 36
+{ p3 = r5
+  if (p3.new) memw(r17+r21<<#3) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff f5 91 37
+{ p3 = r5
+  if (!p3.new) memw(r17+r21<<#3) = r31 }
+# CHECK: ff ca 51 38
+if (p3) memw(r17+#84)=#31
+# CHECK: ff ca d1 38
+if (!p3) memw(r17+#84)=#31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca 51 39
+{ p3 = r5
+  if (p3.new) memw(r17+#84)=#31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ff ca d1 39
+{ p3 = r5
+  if (!p3.new) memw(r17+#84)=#31 }
+# CHECK: ab df 91 40
+if (p3) memw(r17+#84) = r31
+# CHECK: ab df 91 44 
+if (!p3) memw(r17+#84) = r31
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 91 42
+{ p3 = r5
+  if (p3.new) memw(r17+#84) = r31 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab df 91 46
+{ p3 = r5
+  if (!p3.new) memw(r17+#84) = r31 }
+# CHECK: 2b f5 91 ab
+if (p3) memw(r17++#20) = r21
+# CHECK: 2f f5 91 ab
+if (!p3) memw(r17++#20) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: af f5 91 ab
+{ p3 = r5
+  if (!p3.new) memw(r17++#20) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: ab f5 91 ab
+{ p3 = r5
+  if (p3.new) memw(r17++#20) = r21 }
+# CHECK: 01 40 00 00
+# CHECK-NEXT: a3 d5 81 af
+if (p3) memw(##84) = r21
+# CHECK: 01 40 00 00
+# CHECK-NEXT: a7 d5 81 af
+if (!p3) memw(##84) = r21
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 01 40 00 00
+# CHECK-NEXT: a3 f5 81 af
+{ p3 = r5
+  if (p3.new) memw(##84) = r21 }
+# CHECK: 03 40 45 85
+# CHECK-NEXT: 01 40 00 00
+# CHECK-NEXT: a7 f5 81 af
+{ p3 = r5
+  if (!p3.new) memw(##84) = r21 }
+
+# Allocate stack frame
+# CHECK: 1f c0 9d a0
+allocframe(#248)
diff --git a/test/MC/Hexagon/instructions/system_user.s b/test/MC/Hexagon/instructions/system_user.s
new file mode 100644
index 000000000000..d52f8b41182a
--- /dev/null
+++ b/test/MC/Hexagon/instructions/system_user.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.9.1 SYSTEM/USER
+
+# Load locked
+# CHECK: 11 c0 15 92
+r17 = memw_locked(r21)
+# CHECK: 10 d0 15 92
+r17:16 = memd_locked(r21)
+
+# Store conditional
+# CHECK: 03 d5 b1 a0
+memw_locked(r17, p3) = r21
+# CHECK: 03 d4 f1 a0
+memd_locked(r17, p3) = r21:20
+
+# Memory barrier
+# CHECK: 00 c0 00 a8
+barrier
+
+# Data cache prefetch
+# CHECK: 15 c0 11 94
+dcfetch(r17 + #168)
+
+# Send value to ETM trace
+# CHECK: 00 c0 51 62
+trace(r17)
diff --git a/test/MC/Hexagon/instructions/xtype_alu.s b/test/MC/Hexagon/instructions/xtype_alu.s
new file mode 100644
index 000000000000..b68175ed1e23
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_alu.s
@@ -0,0 +1,395 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.1 XTYPE/ALU
+
+# Absolute value doubleword
+# CHECK: d0 c0 94 80
+r17:16 = abs(r21:20)
+# CHECK: 91 c0 95 8c
+r17 = abs(r21)
+# CHECK: b1 c0 95 8c
+r17 = abs(r21):sat
+
+# Add and accumulate
+# CHECK: ff d1 35 db
+r17 = add(r21, add(r31, #23))
+# CHECK: ff d1 b5 db
+r17 = add(r21, sub(#23, r31))
+# CHECK: f1 c2 15 e2
+r17 += add(r21, #23)
+# CHECK: f1 c2 95 e2
+r17 -= add(r21, #23)
+# CHECK: 31 df 15 ef
+r17 += add(r21, r31)
+# CHECK: 31 df 95 ef
+r17 -= add(r21, r31)
+
+# Add doublewords
+# CHECK: f0 de 14 d3
+r17:16 = add(r21:20, r31:30)
+# CHECK: b0 de 74 d3
+r17:16 = add(r21:20, r31:30):sat
+# CHECK: d0 de 74 d3
+r17:16 = add(r21:20, r31:30):raw:lo
+# CHECK: f0 de 74 d3
+r17:16 = add(r21:20, r31:30):raw:hi
+
+# Add halfword
+# CHECK: 11 d5 1f d5
+r17 = add(r21.l, r31.l)
+# CHECK: 51 d5 1f d5
+r17 = add(r21.l, r31.h)
+# CHECK: 91 d5 1f d5
+r17 = add(r21.l, r31.l):sat
+# CHECK: d1 d5 1f d5
+r17 = add(r21.l, r31.h):sat
+# CHECK: 11 d5 5f d5
+r17 = add(r21.l, r31.l):<<16
+# CHECK: 31 d5 5f d5
+r17 = add(r21.l, r31.h):<<16
+# CHECK: 51 d5 5f d5
+r17 = add(r21.h, r31.l):<<16
+# CHECK: 71 d5 5f d5
+r17 = add(r21.h, r31.h):<<16
+# CHECK: 91 d5 5f d5
+r17 = add(r21.l, r31.l):sat:<<16
+# CHECK: b1 d5 5f d5
+r17 = add(r21.l, r31.h):sat:<<16
+# CHECK: d1 d5 5f d5
+r17 = add(r21.h, r31.l):sat:<<16
+# CHECK: f1 d5 5f d5
+r17 = add(r21.h, r31.h):sat:<<16
+
+# Add or subtract doublewords with carry
+# CHECK: 70 de d4 c2
+r17:16 = add(r21:20, r31:30, p3):carry
+# CHECK: 70 de f4 c2
+r17:16 = sub(r21:20, r31:30, p3):carry
+
+# Logical doublewords
+# CHECK: 90 c0 94 80
+r17:16 = not(r21:20)
+# CHECK: 10 de f4 d3
+r17:16 = and(r21:20, r31:30)
+# CHECK: 30 d4 fe d3
+r17:16 = and(r21:20, ~r31:30)
+# CHECK: 50 de f4 d3
+r17:16 = or(r21:20, r31:30)
+# CHECK: 70 d4 fe d3
+r17:16 = or(r21:20, ~r31:30)
+# CHECK: 90 de f4 d3
+r17:16 = xor(r21:20, r31:30)
+
+# Logical-logical doublewords
+# CHECK: 10 de 94 ca
+r17:16 ^= xor(r21:20, r31:30)
+
+# Logical-logical words
+# CHECK: f1 c3 15 da
+r17 |= and(r21, #31)
+# CHECK: f5 c3 51 da
+r17 = or(r21, and(r17, #31))
+# CHECK: f1 c3 95 da
+r17 |= or(r21, #31)
+# CHECK: 11 df 35 ef
+r17 |= and(r21, ~r31)
+# CHECK: 31 df 35 ef
+r17 &= and(r21, ~r31)
+# CHECK: 51 df 35 ef
+r17 ^= and(r21, ~r31)
+# CHECK: 11 df 55 ef
+r17 &= and(r21, r31)
+# CHECK: 31 df 55 ef
+r17 &= or(r21, r31)
+# CHECK: 51 df 55 ef
+r17 &= xor(r21, r31)
+# CHECK: 71 df 55 ef
+r17 |= and(r21, r31)
+# CHECK: 71 df 95 ef
+r17 ^= xor(r21, r31)
+# CHECK: 11 df d5 ef
+r17 |= or(r21, r31)
+# CHECK: 31 df d5 ef
+r17 |= xor(r21, r31)
+# CHECK: 51 df d5 ef
+r17 ^= and(r21, r31)
+# CHECK: 71 df d5 ef
+r17 ^= or(r21, r31)
+
+# Maximum words
+# CHECK: 11 df d5 d5
+r17 = max(r21, r31)
+# CHECK: 91 df d5 d5
+r17 = maxu(r21, r31)
+
+# Maximum doublewords
+# CHECK: 90 de d4 d3
+r17:16 = max(r21:20, r31:30)
+# CHECK: b0 de d4 d3
+r17:16 = maxu(r21:20, r31:30)
+
+# Minimum words
+# CHECK: 11 d5 bf d5
+r17 = min(r21, r31)
+# CHECK: 91 d5 bf d5
+r17 = minu(r21, r31)
+
+# Minimum doublewords
+# CHECK: d0 d4 be d3
+r17:16 = min(r21:20, r31:30)
+# CHECK: f0 d4 be d3
+r17:16 = minu(r21:20, r31:30)
+
+# Module wrap
+# CHECK: f1 df f5 d3
+r17 = modwrap(r21, r31)
+
+# Negate
+# CHECK: b0 c0 94 80
+r17:16 = neg(r21:20)
+# CHECK: d1 c0 95 8c
+r17 = neg(r21):sat
+
+# Round
+# CHECK: 31 c0 d4 88
+r17 = round(r21:20):sat
+# CHECK: 11 df f5 8c
+r17 = cround(r21, #31)
+# CHECK: 91 df f5 8c
+r17 = round(r21, #31)
+# CHECK: d1 df f5 8c
+r17 = round(r21, #31):sat
+# CHECK: 11 df d5 c6
+r17 = cround(r21, r31)
+# CHECK: 91 df d5 c6
+r17 = round(r21, r31)
+# CHECK: d1 df d5 c6
+r17 = round(r21, r31):sat
+
+# Subtract doublewords
+# CHECK: f0 d4 3e d3
+r17:16 = sub(r21:20, r31:30)
+
+# Subtract and accumulate words
+# CHECK: 71 d5 1f ef
+r17 += sub(r21, r31)
+
+# Subtract halfword
+# CHECK: 11 d5 3f d5
+r17 = sub(r21.l, r31.l)
+# CHECK: 51 d5 3f d5
+r17 = sub(r21.l, r31.h)
+# CHECK: 91 d5 3f d5
+r17 = sub(r21.l, r31.l):sat
+# CHECK: d1 d5 3f d5
+r17 = sub(r21.l, r31.h):sat
+# CHECK: 11 d5 7f d5
+r17 = sub(r21.l, r31.l):<<16
+# CHECK: 31 d5 7f d5
+r17 = sub(r21.l, r31.h):<<16
+# CHECK: 51 d5 7f d5
+r17 = sub(r21.h, r31.l):<<16
+# CHECK: 71 d5 7f d5
+r17 = sub(r21.h, r31.h):<<16
+# CHECK: 91 d5 7f d5
+r17 = sub(r21.l, r31.l):sat:<<16
+# CHECK: b1 d5 7f d5
+r17 = sub(r21.l, r31.h):sat:<<16
+# CHECK: d1 d5 7f d5
+r17 = sub(r21.h, r31.l):sat:<<16
+# CHECK: f1 d5 7f d5
+r17 = sub(r21.h, r31.h):sat:<<16
+
+# Sign extend word to doubleword
+# CHECK: 10 c0 55 84
+r17:16 = sxtw(r21)
+
+# Vector absolute value halfwords
+# CHECK: 90 c0 54 80
+r17:16 = vabsh(r21:20)
+# CHECK: b0 c0 54 80
+r17:16 = vabsh(r21:20):sat
+
+# Vector absolute value words
+# CHECK: d0 c0 54 80
+r17:16 = vabsw(r21:20)
+# CHECK: f0 c0 54 80
+r17:16 = vabsw(r21:20):sat
+
+# Vector absolute difference halfwords
+# CHECK: 10 d4 7e e8
+r17:16 = vabsdiffh(r21:20, r31:30)
+
+# Vector absolute difference words
+# CHECK: 10 d4 3e e8
+r17:16 = vabsdiffw(r21:20, r31:30)
+
+# Vector add halfwords
+# CHECK: 50 de 14 d3
+r17:16 = vaddh(r21:20, r31:30)
+# CHECK: 70 de 14 d3
+r17:16 = vaddh(r21:20, r31:30):sat
+# CHECK: 90 de 14 d3
+r17:16 = vadduh(r21:20, r31:30):sat
+
+# Vector add halfwords with saturate and pack to unsigned bytes
+# CHECK: 31 de 54 c1
+r17 = vaddhub(r21:20, r31:30):sat
+
+# Vector reduce add unsigned bytes
+# CHECK: 30 de 54 e8
+r17:16 = vraddub(r21:20, r31:30)
+# CHECK: 30 de 54 ea
+r17:16 += vraddub(r21:20, r31:30)
+
+# Vector reduce add halfwords
+# CHECK: 31 de 14 e9
+r17 = vradduh(r21:20, r31:30)
+# CHECK: f1 de 34 e9
+r17 = vraddh(r21:20, r31:30)
+
+# Vector add bytes
+# CHECK: 10 de 14 d3
+r17:16 = vaddub(r21:20, r31:30)
+# CHECK: 30 de 14 d3
+r17:16 = vaddub(r21:20, r31:30):sat
+
+# Vector add words
+# CHECK: b0 de 14 d3
+r17:16 = vaddw(r21:20, r31:30)
+# CHECK: d0 de 14 d3
+r17:16 = vaddw(r21:20, r31:30):sat
+
+# Vector average halfwords
+# CHECK: 50 de 54 d3
+r17:16 = vavgh(r21:20, r31:30)
+# CHECK: 70 de 54 d3
+r17:16 = vavgh(r21:20, r31:30):rnd
+# CHECK: 90 de 54 d3
+r17:16 = vavgh(r21:20, r31:30):crnd
+# CHECK: b0 de 54 d3
+r17:16 = vavguh(r21:20, r31:30)
+# CHECK: d0 de 54 d3
+r17:16 = vavguh(r21:20, r31:30):rnd
+# CHECK: 10 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30)
+# CHECK: 30 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30):rnd:sat
+# CHECK: 50 d4 9e d3
+r17:16 = vnavgh(r21:20, r31:30):crnd:sat
+
+# Vector average unsigned bytes
+# CHECK: 10 de 54 d3
+r17:16 = vavgub(r21:20, r31:30)
+# CHECK: 30 de 54 d3
+r17:16 = vavgub(r21:20, r31:30):rnd
+
+# Vector average words
+# CHECK: 10 de 74 d3
+r17:16 = vavgw(r21:20, r31:30)
+# CHECK: 30 de 74 d3
+r17:16 = vavgw(r21:20, r31:30):rnd
+# CHECK: 50 de 74 d3
+r17:16 = vavgw(r21:20, r31:30):crnd
+# CHECK: 70 de 74 d3
+r17:16 = vavguw(r21:20, r31:30)
+# CHECK: 90 de 74 d3
+r17:16 = vavguw(r21:20, r31:30):rnd
+# CHECK: 70 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30)
+# CHECK: 90 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30):rnd:sat
+# CHECK: d0 d4 9e d3
+r17:16 = vnavgw(r21:20, r31:30):crnd:sat
+
+# Vector conditional negate
+# CHECK: 50 df d4 c3
+r17:16 = vcnegh(r21:20, r31)
+
+# CHECK: f0 ff 34 cb
+r17:16 += vrcnegh(r21:20, r31)
+
+# Vector maximum bytes
+# CHECK: 10 d4 de d3
+r17:16 = vmaxub(r21:20, r31:30)
+# CHECK: d0 d4 de d3
+r17:16 = vmaxb(r21:20, r31:30)
+
+# Vector maximum halfwords
+# CHECK: 30 d4 de d3
+r17:16 = vmaxh(r21:20, r31:30)
+# CHECK: 50 d4 de d3
+r17:16 = vmaxuh(r21:20, r31:30)
+
+# Vector reduce maximum halfwords
+# CHECK: 3f d0 34 cb
+r17:16 = vrmaxh(r21:20, r31)
+# CHECK: 3f f0 34 cb
+r17:16 = vrmaxuh(r21:20, r31)
+
+# Vector reduce maximum words
+# CHECK: 5f d0 34 cb
+r17:16 = vrmaxw(r21:20, r31)
+# CHECK: 5f f0 34 cb
+r17:16 = vrmaxuw(r21:20, r31)
+
+# Vector maximum words
+# CHECK: b0 d4 be d3
+r17:16 = vmaxuw(r21:20, r31:30)
+# CHECK: 70 d4 de d3
+r17:16 = vmaxw(r21:20, r31:30)
+
+# Vector minimum bytes
+# CHECK: 10 d4 be d3
+r17:16 = vminub(r21:20, r31:30)
+# CHECK: f0 d4 de d3
+r17:16 = vminb(r21:20, r31:30)
+
+# Vector minimum halfwords
+# CHECK: 30 d4 be d3
+r17:16 = vminh(r21:20, r31:30)
+# CHECK: 50 d4 be d3
+r17:16 = vminuh(r21:20, r31:30)
+
+# Vector reduce minimum halfwords
+# CHECK: bf d0 34 cb
+r17:16 = vrminh(r21:20, r31)
+# CHECK: bf f0 34 cb
+r17:16 = vrminuh(r21:20, r31)
+
+# Vector reduce minimum words
+# CHECK: df d0 34 cb
+r17:16 = vrminw(r21:20, r31)
+# CHECK: df f0 34 cb
+r17:16 = vrminuw(r21:20, r31)
+
+# Vector minimum words
+# CHECK: 70 d4 be d3
+r17:16 = vminw(r21:20, r31:30)
+# CHECK: 90 d4 be d3
+r17:16 = vminuw(r21:20, r31:30)
+
+# Vector sum of absolute differences unsigned bytes
+# CHECK: 50 de 54 e8
+r17:16 = vrsadub(r21:20, r31:30)
+# CHECK: 50 de 54 ea
+r17:16 += vrsadub(r21:20, r31:30)
+
+# Vector subtract halfwords
+# CHECK: 50 d4 3e d3
+r17:16 = vsubh(r21:20, r31:30)
+# CHECK: 70 d4 3e d3
+r17:16 = vsubh(r21:20, r31:30):sat
+# CHECK: 90 d4 3e d3
+r17:16 = vsubuh(r21:20, r31:30):sat
+
+# Vector subtract bytes
+# CHECK: 10 d4 3e d3
+r17:16 = vsubub(r21:20, r31:30)
+# CHECK: 30 d4 3e d3
+r17:16 = vsubub(r21:20, r31:30):sat
+
+# Vector subtract words
+# CHECK: b0 d4 3e d3
+r17:16 = vsubw(r21:20, r31:30)
+# CHECK: d0 d4 3e d3
+r17:16 = vsubw(r21:20, r31:30):sat
diff --git a/test/MC/Hexagon/instructions/xtype_bit.s b/test/MC/Hexagon/instructions/xtype_bit.s
new file mode 100644
index 000000000000..6d2fa401b819
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_bit.s
@@ -0,0 +1,118 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.2 XTYPE/BIT
+
+# Count leading
+# CHECK: 11 c0 54 88
+r17 = clb(r21:20)
+# CHECK: 51 c0 54 88
+r17 = cl0(r21:20)
+# CHECK: 91 c0 54 88
+r17 = cl1(r21:20)
+# CHECK: 11 c0 74 88
+r17 = normamt(r21:20)
+# CHECK: 51 d7 74 88
+r17 = add(clb(r21:20), #23)
+# CHECK: 11 d7 35 8c
+r17 = add(clb(r21), #23)
+# CHECK: 91 c0 15 8c
+r17 = clb(r21)
+# CHECK: b1 c0 15 8c
+r17 = cl0(r21)
+# CHECK: d1 c0 15 8c
+r17 = cl1(r21)
+# CHECK: f1 c0 15 8c
+r17 = normamt(r21)
+
+# Count population
+# CHECK: 71 c0 74 88
+r17 = popcount(r21:20)
+
+# Count trailing
+# CHECK: 51 c0 f4 88
+r17 = ct0(r21:20)
+# CHECK: 91 c0 f4 88
+r17 = ct1(r21:20)
+# CHECK: 91 c0 55 8c
+r17 = ct0(r21)
+# CHECK: b1 c0 55 8c
+r17 = ct1(r21)
+
+# Extract bitfield
+# CHECK: f0 df 54 81
+r17:16 = extractu(r21:20, #31, #23)
+# CHECK: f0 df 54 8a
+r17:16 = extract(r21:20, #31, #23)
+# CHECK: f1 df 55 8d
+r17 = extractu(r21, #31, #23)
+# CHECK: f1 df d5 8d
+r17 = extract(r21, #31, #23)
+# CHECK: 10 de 14 c1
+r17:16 = extractu(r21:20, r31:30)
+# CHECK: 90 de d4 c1
+r17:16 = extract(r21:20, r31:30)
+# CHECK: 11 de 15 c9
+r17 = extractu(r21, r31:30)
+# CHECK: 51 de 15 c9
+r17 = extract(r21, r31:30)
+
+# Insert bitfield
+# CHECK: f0 df 54 83
+r17:16 = insert(r21:20, #31, #23)
+# CHECK: f1 df 55 8f
+r17 = insert(r21, #31, #23)
+# CHECK: 11 de 15 c8
+r17 = insert(r21, r31:30)
+# CHECK: 10 de 14 ca
+r17:16 = insert(r21:20, r31:30)
+
+# Interleave/deinterleave
+# CHECK: 90 c0 d4 80
+r17:16 = deinterleave(r21:20)
+# CHECK: b0 c0 d4 80
+r17:16 = interleave(r21:20)
+
+# Linear feedback-shift iteration
+# CHECK: d0 de 94 c1
+r17:16 = lfs(r21:20, r31:30)
+
+# Masked parity
+# CHECK: 11 de 14 d0
+r17 = parity(r21:20, r31:30)
+# CHECK: 11 df f5 d5
+r17 = parity(r21, r31)
+
+# Bit reverse
+# CHECK: d0 c0 d4 80
+r17:16 = brev(r21:20)
+# CHECK: d1 c0 55 8c
+r17 = brev(r21)
+
+# Set/clear/toggle bit
+# CHECK: 11 df d5 8c
+r17 = setbit(r21, #31)
+# CHECK: 31 df d5 8c
+r17 = clrbit(r21, #31)
+# CHECK: 51 df d5 8c
+r17 = togglebit(r21, #31)
+# CHECK: 11 df 95 c6
+r17 = setbit(r21, r31)
+# CHECK: 51 df 95 c6
+r17 = clrbit(r21, r31)
+# CHECK: 91 df 95 c6
+r17 = togglebit(r21, r31)
+
+# Split bitfield
+# CHECK: 90 df d5 88
+r17:16 = bitsplit(r21, #31)
+# CHECK: 10 df 35 d4
+r17:16 = bitsplit(r21, r31)
+
+# Table index
+# CHECK: f1 cd 15 87
+r17 = tableidxb(r21, #7, #13):raw
+# CHECK: f1 cd 55 87
+r17 = tableidxh(r21, #7, #13):raw
+# CHECK: f1 cd 95 87
+r17 = tableidxw(r21, #7, #13):raw
+# CHECK: f1 cd d5 87
+r17 = tableidxd(r21, #7, #13):raw
diff --git a/test/MC/Hexagon/instructions/xtype_complex.s b/test/MC/Hexagon/instructions/xtype_complex.s
new file mode 100644
index 000000000000..901c29c80d93
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_complex.s
@@ -0,0 +1,128 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.3 XTYPE/COMPLEX
+
+# Complex add/sub halfwords
+# CHECK: 90 de 54 c1
+r17:16 = vxaddsubh(r21:20, r31:30):sat
+# CHECK: d0 de 54 c1
+r17:16 = vxsubaddh(r21:20, r31:30):sat
+# CHECK: 10 de d4 c1
+r17:16 = vxaddsubh(r21:20, r31:30):rnd:>>1:sat
+# CHECK: 50 de d4 c1
+r17:16 = vxsubaddh(r21:20, r31:30):rnd:>>1:sat
+
+# Complex add/sub words
+# CHECK: 10 de 54 c1
+r17:16 = vxaddsubw(r21:20, r31:30):sat
+# CHECK: 50 de 54 c1
+r17:16 = vxsubaddw(r21:20, r31:30):sat
+
+# Complex multiply
+# CHECK: d0 df 15 e5
+r17:16 = cmpy(r21, r31):sat
+# CHECK: d0 df 95 e5
+r17:16 = cmpy(r21, r31):<<1:sat
+# CHECK: d0 df 55 e5
+r17:16 = cmpy(r21, r31*):sat
+# CHECK: d0 df d5 e5
+r17:16 = cmpy(r21, r31*):<<1:sat
+# CHECK: d0 df 15 e7
+r17:16 += cmpy(r21, r31):sat
+# CHECK: d0 df 95 e7
+r17:16 += cmpy(r21, r31):<<1:sat
+# CHECK: f0 df 15 e7
+r17:16 -= cmpy(r21, r31):sat
+# CHECK: f0 df 95 e7
+r17:16 -= cmpy(r21, r31):<<1:sat
+# CHECK: d0 df 55 e7
+r17:16 += cmpy(r21, r31*):sat
+# CHECK: d0 df d5 e7
+r17:16 += cmpy(r21, r31*):<<1:sat
+# CHECK: f0 df 55 e7
+r17:16 -= cmpy(r21, r31*):sat
+# CHECK: f0 df d5 e7
+r17:16 -= cmpy(r21, r31*):<<1:sat
+
+# Complex multiply real or imaginary
+# CHECK: 30 df 15 e5
+r17:16 = cmpyi(r21, r31)
+# CHECK: 50 df 15 e5
+r17:16 = cmpyr(r21, r31)
+# CHECK: 30 df 15 e7
+r17:16 += cmpyi(r21, r31)
+# CHECK: 50 df 15 e7
+r17:16 += cmpyr(r21, r31)
+
+# Complex multiply with round and pack
+# CHECK: d1 df 35 ed
+r17 = cmpy(r21, r31):rnd:sat
+# CHECK: d1 df b5 ed
+r17 = cmpy(r21, r31):<<1:rnd:sat
+# CHECK: d1 df 75 ed
+r17 = cmpy(r21, r31*):rnd:sat
+# CHECK: d1 df f5 ed
+r17 = cmpy(r21, r31*):<<1:rnd:sat
+
+# Complex multiply 32x16
+# CHECK: 91 df 14 c5
+r17 = cmpyiwh(r21:20, r31):<<1:rnd:sat
+# CHECK: b1 df 14 c5
+r17 = cmpyiwh(r21:20, r31*):<<1:rnd:sat
+# CHECK: d1 df 14 c5
+r17 = cmpyrwh(r21:20, r31):<<1:rnd:sat
+# CHECK: f1 df 14 c5
+r17 = cmpyrwh(r21:20, r31*):<<1:rnd:sat
+
+# Vector complex multiply real or imaginary
+# CHECK: d0 de 34 e8
+r17:16 = vcmpyr(r21:20, r31:30):sat
+# CHECK: d0 de b4 e8
+r17:16 = vcmpyr(r21:20, r31:30):<<1:sat
+# CHECK: d0 de 54 e8
+r17:16 = vcmpyi(r21:20, r31:30):sat
+# CHECK: d0 de d4 e8
+r17:16 = vcmpyi(r21:20, r31:30):<<1:sat
+# CHECK: 90 de 34 ea
+r17:16 += vcmpyr(r21:20, r31:30):sat
+# CHECK: 90 de 54 ea
+r17:16 += vcmpyi(r21:20, r31:30):sat
+
+# Vector complex conjugate
+# CHECK: f0 c0 94 80
+r17:16 = vconj(r21:20):sat
+
+# Vector complex rotate
+# CHECK: 10 df d4 c3
+r17:16 = vcrotate(r21:20, r31)
+
+# Vector reduce complex multiply real or imaginary
+# CHECK: 10 de 14 e8
+r17:16 = vrcmpyi(r21:20, r31:30)
+# CHECK: 30 de 14 e8
+r17:16 = vrcmpyr(r21:20, r31:30)
+# CHECK: 10 de 54 e8
+r17:16 = vrcmpyi(r21:20, r31:30*)
+# CHECK: 30 de 74 e8
+r17:16 = vrcmpyr(r21:20, r31:30*)
+
+# Vector reduce complex multiply by scalar
+# CHECK: 90 de b4 e8
+r17:16 = vrcmpys(r21:20, r31:30):<<1:sat:raw:hi
+# CHECK: 90 de f4 e8
+r17:16 = vrcmpys(r21:20, r31:30):<<1:sat:raw:lo
+# CHECK: 90 de b4 ea
+r17:16 += vrcmpys(r21:20, r31:30):<<1:sat:raw:hi
+# CHECK: 90 de f4 ea
+r17:16 += vrcmpys(r21:20, r31:30):<<1:sat:raw:lo
+
+# Vector reduce complex multiply by scalar with round and pack
+# CHECK: d1 de b4 e9
+r17 = vrcmpys(r21:20, r31:30):<<1:rnd:sat:raw:hi
+# CHECK: f1 de b4 e9
+r17 = vrcmpys(r21:20, r31:30):<<1:rnd:sat:raw:lo
+
+# Vector reduce complex rotate
+# CHECK: f0 ff d4 c3
+r17:16 = vrcrotate(r21:20, r31, #3)
+# CHECK: 30 ff b4 cb
+r17:16 += vrcrotate(r21:20, r31, #3)
diff --git a/test/MC/Hexagon/instructions/xtype_fp.s b/test/MC/Hexagon/instructions/xtype_fp.s
new file mode 100644
index 000000000000..184098ec6d0f
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_fp.s
@@ -0,0 +1,146 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.4 XTYPE/FP
+
+# Floating point addition
+# CHECK: 11 df 15 eb
+r17 = sfadd(r21, r31)
+
+# Classify floating-point value
+# CHECK: 03 d5 f1 85
+p3 = sfclass(r17, #21)
+# CHECK: b3 c2 90 dc
+p3 = dfclass(r17:16, #21)
+
+# Compare floating-point value
+# CHECK: 03 d5 f1 c7
+p3 = sfcmp.ge(r17, r21)
+# CHECK: 23 d5 f1 c7
+p3 = sfcmp.uo(r17, r21)
+# CHECK: 63 d5 f1 c7
+p3 = sfcmp.eq(r17, r21)
+# CHECK: 83 d5 f1 c7
+p3 = sfcmp.gt(r17, r21)
+# CHECK: 03 d4 f0 d2
+p3 = dfcmp.eq(r17:16, r21:20)
+# CHECK: 23 d4 f0 d2
+p3 = dfcmp.gt(r17:16, r21:20)
+# CHECK: 43 d4 f0 d2
+p3 = dfcmp.ge(r17:16, r21:20)
+# CHECK: 63 d4 f0 d2
+p3 = dfcmp.uo(r17:16, r21:20)
+
+# Convert floating-point value to other format
+# CHECK: 10 c0 95 84
+r17:16 = convert_sf2df(r21)
+# CHECK: 31 c0 14 88
+r17 = convert_df2sf(r21:20)
+
+# Convert integer to floating-point value
+# CHECK: 50 c0 f4 80
+r17:16 = convert_ud2df(r21:20)
+# CHECK: 70 c0 f4 80
+r17:16 = convert_d2df(r21:20)
+# CHECK: 30 c0 95 84
+r17:16 = convert_uw2df(r21)
+# CHECK: 50 c0 95 84
+r17:16 = convert_w2df(r21)
+# CHECK: 31 c0 34 88
+r17 = convert_ud2sf(r21:20)
+# CHECK: 31 c0 54 88
+r17 = convert_d2sf(r21:20)
+# CHECK: 11 c0 35 8b
+r17 = convert_uw2sf(r21)
+# CHECK: 11 c0 55 8b
+r17 = convert_w2sf(r21)
+
+# Convert floating-point value to integer
+# CHECK: 10 c0 f4 80
+r17:16 = convert_df2d(r21:20)
+# CHECK: 30 c0 f4 80
+r17:16 = convert_df2ud(r21:20)
+# CHECK: d0 c0 f4 80
+r17:16 = convert_df2d(r21:20):chop
+# CHECK: f0 c0 f4 80
+r17:16 = convert_df2ud(r21:20):chop
+# CHECK: 70 c0 95 84
+r17:16 = convert_sf2ud(r21)
+# CHECK: 90 c0 95 84
+r17:16 = convert_sf2d(r21)
+# CHECK: b0 c0 95 84
+r17:16 = convert_sf2ud(r21):chop
+# CHECK: d0 c0 95 84
+r17:16 = convert_sf2d(r21):chop
+# CHECK: 31 c0 74 88
+r17 = convert_df2uw(r21:20)
+# CHECK: 31 c0 94 88
+r17 = convert_df2w(r21:20)
+# CHECK: 31 c0 b4 88
+r17 = convert_df2uw(r21:20):chop
+# CHECK: 31 c0 f4 88
+r17 = convert_df2w(r21:20):chop
+# CHECK: 11 c0 75 8b
+r17 = convert_sf2uw(r21)
+# CHECK: 31 c0 75 8b
+r17 = convert_sf2uw(r21):chop
+# CHECK: 11 c0 95 8b
+r17 = convert_sf2w(r21)
+# CHECK: 31 c0 95 8b
+r17 = convert_sf2w(r21):chop
+
+# Floating point extreme value assistance
+# CHECK: 11 c0 b5 8b
+r17 = sffixupr(r21)
+# CHECK: 11 df d5 eb
+r17 = sffixupn(r21, r31)
+# CHECK: 31 df d5 eb
+r17 = sffixupd(r21, r31)
+
+# Floating point fused multiply-add
+# CHECK: 91 df 15 ef
+r17 += sfmpy(r21, r31)
+# CHECK: b1 df 15 ef
+r17 -= sfmpy(r21, r31)
+
+# Floating point fused multiply-add with scaling
+# CHECK: f1 df 75 ef
+r17 += sfmpy(r21, r31, p3):scale
+
+# Floating point reciprocal square root approximation
+# CHECK: 71 c0 f5 8b
+r17, p3 = sfinvsqrta(r21)
+
+# Floating point fused multiply-add for library routines
+# CHECK: d1 df 15 ef
+r17 += sfmpy(r21, r31):lib
+# CHECK: f1 df 15 ef
+r17 -= sfmpy(r21, r31):lib
+
+# Create floating-point constant
+# CHECK: b1 c2 00 d6
+r17 = sfmake(#21):pos
+# CHECK: b1 c2 40 d6
+r17 = sfmake(#21):neg
+# CHECK: b0 c2 00 d9
+r17:16 = dfmake(#21):pos
+# CHECK: b0 c2 40 d9
+r17:16 = dfmake(#21):neg
+
+# Floating point maximum
+# CHECK: 11 df 95 eb
+r17 = sfmax(r21, r31)
+
+# Floating point minimum
+# CHECK: 31 df 95 eb
+r17 = sfmin(r21, r31)
+
+# Floating point multiply
+# CHECK: 11 df 55 eb
+r17 = sfmpy(r21, r31)
+
+# Floating point reciprocal approximation
+# CHECK: f1 df f5 eb
+r17, p3 = sfrecipa(r21, r31)
+
+# Floating point subtraction
+# CHECK: 31 df 15 eb
+r17 = sfsub(r21, r31)
diff --git a/test/MC/Hexagon/instructions/xtype_mpy.s b/test/MC/Hexagon/instructions/xtype_mpy.s
new file mode 100644
index 000000000000..4b9efd4cabc9
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_mpy.s
@@ -0,0 +1,400 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.5 XTYPE/MPY
+
+# Multiply and use lower result
+# CHECK: b1 df 35 d7
+r17 = add(#21, mpyi(r21, r31))
+# CHECK: bf d1 35 d8
+r17 = add(#21, mpyi(r21, #31))
+# CHECK: b5 d1 3f df
+r17 = add(r21, mpyi(#84, r31))
+# CHECK: f5 f1 b5 df
+r17 = add(r21, mpyi(r21, #31))
+# CHECK: 15 d1 1f e3
+r17 = add(r21, mpyi(r17, r31))
+# CHECK: f1 c3 15 e0
+r17 =+ mpyi(r21, #31)
+# CHECK: f1 c3 95 e0
+r17 =- mpyi(r21, #31)
+# CHECK: f1 c3 15 e1
+r17 += mpyi(r21, #31)
+# CHECK: f1 c3 95 e1
+r17 -= mpyi(r21, #31)
+# CHECK: 11 df 15 ed
+r17 = mpyi(r21, r31)
+# CHECK: 11 df 15 ef
+r17 += mpyi(r21, r31)
+
+# Vector multiply word by signed half (32x16)
+# CHECK: b0 de 14 e8
+r17:16 = vmpyweh(r21:20, r31:30):sat
+# CHECK: b0 de 94 e8
+r17:16 = vmpyweh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 14 e8
+r17:16 = vmpywoh(r21:20, r31:30):sat
+# CHECK: f0 de 94 e8
+r17:16 = vmpywoh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 34 e8
+r17:16 = vmpyweh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de b4 e8
+r17:16 = vmpyweh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 34 e8
+r17:16 = vmpywoh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de b4 e8
+r17:16 = vmpywoh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: b0 de 14 ea
+r17:16 += vmpyweh(r21:20, r31:30):sat
+# CHECK: b0 de 94 ea
+r17:16 += vmpyweh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 14 ea
+r17:16 += vmpywoh(r21:20, r31:30):sat
+# CHECK: f0 de 94 ea
+r17:16 += vmpywoh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 34 ea
+r17:16 += vmpyweh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de b4 ea
+r17:16 += vmpyweh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 34 ea
+r17:16 += vmpywoh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de b4 ea
+r17:16 += vmpywoh(r21:20, r31:30):<<1:rnd:sat
+
+# Vector multiply word by unsigned half (32x16)
+# CHECK: b0 de 54 e8
+r17:16 = vmpyweuh(r21:20, r31:30):sat
+# CHECK: b0 de d4 e8
+r17:16 = vmpyweuh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 54 e8
+r17:16 = vmpywouh(r21:20, r31:30):sat
+# CHECK: f0 de d4 e8
+r17:16 = vmpywouh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 74 e8
+r17:16 = vmpyweuh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de f4 e8
+r17:16 = vmpyweuh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 74 e8
+r17:16 = vmpywouh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de f4 e8
+r17:16 = vmpywouh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: b0 de 54 ea
+r17:16 += vmpyweuh(r21:20, r31:30):sat
+# CHECK: b0 de d4 ea
+r17:16 += vmpyweuh(r21:20, r31:30):<<1:sat
+# CHECK: f0 de 54 ea
+r17:16 += vmpywouh(r21:20, r31:30):sat
+# CHECK: f0 de d4 ea
+r17:16 += vmpywouh(r21:20, r31:30):<<1:sat
+# CHECK: b0 de 74 ea
+r17:16 += vmpyweuh(r21:20, r31:30):rnd:sat
+# CHECK: b0 de f4 ea
+r17:16 += vmpyweuh(r21:20, r31:30):<<1:rnd:sat
+# CHECK: f0 de 74 ea
+r17:16 += vmpywouh(r21:20, r31:30):rnd:sat
+# CHECK: f0 de f4 ea
+r17:16 += vmpywouh(r21:20, r31:30):<<1:rnd:sat
+
+# Multiply signed halfwords
+# CHECK: 10 df 95 e4
+r17:16 = mpy(r21.l, r31.l):<<1
+# CHECK: 30 df 95 e4
+r17:16 = mpy(r21.l, r31.h):<<1
+# CHECK: 50 df 95 e4
+r17:16 = mpy(r21.h, r31.l):<<1
+# CHECK: 70 df 95 e4
+r17:16 = mpy(r21.h, r31.h):<<1
+# CHECK: 10 df b5 e4
+r17:16 = mpy(r21.l, r31.l):<<1:rnd
+# CHECK: 30 df b5 e4
+r17:16 = mpy(r21.l, r31.h):<<1:rnd
+# CHECK: 50 df b5 e4
+r17:16 = mpy(r21.h, r31.l):<<1:rnd
+# CHECK: 70 df b5 e4
+r17:16 = mpy(r21.h, r31.h):<<1:rnd
+# CHECK: 10 df 95 e6
+r17:16 += mpy(r21.l, r31.l):<<1
+# CHECK: 30 df 95 e6
+r17:16 += mpy(r21.l, r31.h):<<1
+# CHECK: 50 df 95 e6
+r17:16 += mpy(r21.h, r31.l):<<1
+# CHECK: 70 df 95 e6
+r17:16 += mpy(r21.h, r31.h):<<1
+# CHECK: 10 df b5 e6
+r17:16 -= mpy(r21.l, r31.l):<<1
+# CHECK: 30 df b5 e6
+r17:16 -= mpy(r21.l, r31.h):<<1
+# CHECK: 50 df b5 e6
+r17:16 -= mpy(r21.h, r31.l):<<1
+# CHECK: 70 df b5 e6
+r17:16 -= mpy(r21.h, r31.h):<<1
+# CHECK: 11 df 95 ec
+r17 = mpy(r21.l, r31.l):<<1
+# CHECK: 31 df 95 ec
+r17 = mpy(r21.l, r31.h):<<1
+# CHECK: 51 df 95 ec
+r17 = mpy(r21.h, r31.l):<<1
+# CHECK: 71 df 95 ec
+r17 = mpy(r21.h, r31.h):<<1
+# CHECK: 91 df 95 ec
+r17 = mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df 95 ec
+r17 = mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df 95 ec
+r17 = mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df 95 ec
+r17 = mpy(r21.h, r31.h):<<1:sat
+# CHECK: 11 df b5 ec
+r17 = mpy(r21.l, r31.l):<<1:rnd
+# CHECK: 31 df b5 ec
+r17 = mpy(r21.l, r31.h):<<1:rnd
+# CHECK: 51 df b5 ec
+r17 = mpy(r21.h, r31.l):<<1:rnd
+# CHECK: 71 df b5 ec
+r17 = mpy(r21.h, r31.h):<<1:rnd
+# CHECK: 91 df b5 ec
+r17 = mpy(r21.l, r31.l):<<1:rnd:sat
+# CHECK: b1 df b5 ec
+r17 = mpy(r21.l, r31.h):<<1:rnd:sat
+# CHECK: d1 df b5 ec
+r17 = mpy(r21.h, r31.l):<<1:rnd:sat
+# CHECK: f1 df b5 ec
+r17 = mpy(r21.h, r31.h):<<1:rnd:sat
+# CHECK: 11 df 95 ee
+r17 += mpy(r21.l, r31.l):<<1
+# CHECK: 31 df 95 ee
+r17 += mpy(r21.l, r31.h):<<1
+# CHECK: 51 df 95 ee
+r17 += mpy(r21.h, r31.l):<<1
+# CHECK: 71 df 95 ee
+r17 += mpy(r21.h, r31.h):<<1
+# CHECK: 91 df 95 ee
+r17 += mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df 95 ee
+r17 += mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df 95 ee
+r17 += mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df 95 ee
+r17 += mpy(r21.h, r31.h):<<1:sat
+# CHECK: 11 df b5 ee
+r17 -= mpy(r21.l, r31.l):<<1
+# CHECK: 31 df b5 ee
+r17 -= mpy(r21.l, r31.h):<<1
+# CHECK: 51 df b5 ee
+r17 -= mpy(r21.h, r31.l):<<1
+# CHECK: 71 df b5 ee
+r17 -= mpy(r21.h, r31.h):<<1
+# CHECK: 91 df b5 ee
+r17 -= mpy(r21.l, r31.l):<<1:sat
+# CHECK: b1 df b5 ee
+r17 -= mpy(r21.l, r31.h):<<1:sat
+# CHECK: d1 df b5 ee
+r17 -= mpy(r21.h, r31.l):<<1:sat
+# CHECK: f1 df b5 ee
+r17 -= mpy(r21.h, r31.h):<<1:sat
+
+# Multiply unsigned halfwords
+# CHECK: 10 df d5 e4
+r17:16 = mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df d5 e4
+r17:16 = mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df d5 e4
+r17:16 = mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df d5 e4
+r17:16 = mpyu(r21.h, r31.h):<<1
+# CHECK: 10 df d5 e6
+r17:16 += mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df d5 e6
+r17:16 += mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df d5 e6
+r17:16 += mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df d5 e6
+r17:16 += mpyu(r21.h, r31.h):<<1
+# CHECK: 10 df f5 e6
+r17:16 -= mpyu(r21.l, r31.l):<<1
+# CHECK: 30 df f5 e6
+r17:16 -= mpyu(r21.l, r31.h):<<1
+# CHECK: 50 df f5 e6
+r17:16 -= mpyu(r21.h, r31.l):<<1
+# CHECK: 70 df f5 e6
+r17:16 -= mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df d5 ec
+r17 = mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df d5 ec
+r17 = mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df d5 ec
+r17 = mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df d5 ec
+r17 = mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df d5 ee
+r17 += mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df d5 ee
+r17 += mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df d5 ee
+r17 += mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df d5 ee
+r17 += mpyu(r21.h, r31.h):<<1
+# CHECK: 11 df f5 ee
+r17 -= mpyu(r21.l, r31.l):<<1
+# CHECK: 31 df f5 ee
+r17 -= mpyu(r21.l, r31.h):<<1
+# CHECK: 51 df f5 ee
+r17 -= mpyu(r21.h, r31.l):<<1
+# CHECK: 71 df f5 ee
+r17 -= mpyu(r21.h, r31.h):<<1
+
+# Polynomial multiply words
+# CHECK: f0 df 55 e5
+r17:16 = pmpyw(r21, r31)
+# CHECK: f0 df 35 e7
+r17:16 ^= pmpyw(r21, r31)
+
+# Vector reduce multiply word by signed half (32x16)
+# CHECK: 50 de 34 e8
+r17:16 = vrmpywoh(r21:20, r31:30)
+# CHECK: 50 de b4 e8
+r17:16 = vrmpywoh(r21:20, r31:30):<<1
+# CHECK: 90 de 54 e8
+r17:16 = vrmpyweh(r21:20, r31:30)
+# CHECK: 90 de d4 e8
+r17:16 = vrmpyweh(r21:20, r31:30):<<1
+# CHECK: d0 de 74 ea
+r17:16 += vrmpywoh(r21:20, r31:30)
+# CHECK: d0 de f4 ea
+r17:16 += vrmpywoh(r21:20, r31:30):<<1
+# CHECK: d0 de 34 ea
+r17:16 += vrmpyweh(r21:20, r31:30)
+# CHECK: d0 de b4 ea
+r17:16 += vrmpyweh(r21:20, r31:30):<<1
+
+# Multiply and use upper result
+# CHECK: 31 df 15 ed
+r17 = mpy(r21, r31)
+# CHECK: 31 df 35 ed
+r17 = mpy(r21, r31):rnd
+# CHECK: 31 df 55 ed
+r17 = mpyu(r21, r31)
+# CHECK: 31 df 75 ed
+r17 = mpysu(r21, r31)
+# CHECK: 11 df b5 ed
+r17 = mpy(r21, r31.h):<<1:sat
+# CHECK: 31 df b5 ed
+r17 = mpy(r21, r31.l):<<1:sat
+# CHECK: 91 df b5 ed
+r17 = mpy(r21, r31.h):<<1:rnd:sat
+# CHECK: 11 df f5 ed
+r17 = mpy(r21, r31):<<1:sat
+# CHECK: 91 df f5 ed
+r17 = mpy(r21, r31.l):<<1:rnd:sat
+# CHECK: 51 df b5 ed
+r17 = mpy(r21, r31):<<1
+# CHECK: 11 df 75 ef
+r17 += mpy(r21, r31):<<1:sat
+# CHECK: 31 df 75 ef
+r17 -= mpy(r21, r31):<<1:sat
+
+# Multiply and use full result
+# CHECK: 10 df 15 e5
+r17:16 = mpy(r21, r31)
+# CHECK: 10 df 55 e5
+r17:16 = mpyu(r21, r31)
+# CHECK: 10 df 15 e7
+r17:16 += mpy(r21, r31)
+# CHECK: 10 df 35 e7
+r17:16 -= mpy(r21, r31)
+# CHECK: 10 df 55 e7
+r17:16 += mpyu(r21, r31)
+# CHECK: 10 df 75 e7
+r17:16 -= mpyu(r21, r31)
+
+# Vector dual multiply
+# CHECK: 90 de 14 e8
+r17:16 = vdmpy(r21:20, r31:30):sat
+# CHECK: 90 de 94 e8
+r17:16 = vdmpy(r21:20, r31:30):<<1:sat
+# CHECK: 90 de 14 ea
+r17:16 += vdmpy(r21:20, r31:30):sat
+# CHECK: 90 de 94 ea
+r17:16 += vdmpy(r21:20, r31:30):<<1:sat
+
+# Vector dual multiply with round and pack
+# CHECK: 11 de 14 e9
+r17 = vdmpy(r21:20, r31:30):rnd:sat
+# CHECK: 11 de 94 e9
+r17 = vdmpy(r21:20, r31:30):<<1:rnd:sat
+
+# Vector reduce multiply bytes
+# CHECK: 30 de 94 e8
+r17:16 = vrmpybu(r21:20, r31:30)
+# CHECK: 30 de d4 e8
+r17:16 = vrmpybsu(r21:20, r31:30)
+# CHECK: 30 de 94 ea
+r17:16 += vrmpybu(r21:20, r31:30)
+# CHECK: 30 de d4 ea
+r17:16 += vrmpybsu(r21:20, r31:30)
+
+# Vector dual multiply signed by unsigned bytes
+# CHECK: 30 de b4 e8
+r17:16 = vdmpybsu(r21:20, r31:30):sat
+# CHECK: 30 de 34 ea
+r17:16 += vdmpybsu(r21:20, r31:30):sat
+
+# Vector multiply even haldwords
+# CHECK: d0 de 14 e8
+r17:16 = vmpyeh(r21:20, r31:30):sat
+# CHECK: d0 de 94 e8
+r17:16 = vmpyeh(r21:20, r31:30):<<1:sat
+# CHECK: 50 de 34 ea
+r17:16 += vmpyeh(r21:20, r31:30)
+# CHECK: d0 de 14 ea
+r17:16 += vmpyeh(r21:20, r31:30):sat
+# CHECK: d0 de 94 ea
+r17:16 += vmpyeh(r21:20, r31:30):<<1:sat
+
+# Vector multiply halfwords
+# CHECK: b0 df 15 e5
+r17:16 = vmpyh(r21, r31):sat
+# CHECK: b0 df 95 e5
+r17:16 = vmpyh(r21, r31):<<1:sat
+# CHECK: 30 df 35 e7
+r17:16 += vmpyh(r21, r31)
+# CHECK: b0 df 15 e7
+r17:16 += vmpyh(r21, r31):sat
+# CHECK: b0 df 95 e7
+r17:16 += vmpyh(r21, r31):<<1:sat
+
+# Vector multiply halfwords with round and pack
+# CHECK: f1 df 35 ed
+r17 = vmpyh(r21, r31):rnd:sat
+# CHECK: f1 df b5 ed
+r17 = vmpyh(r21, r31):<<1:rnd:sat
+
+# Vector multiply halfwords signed by unsigned
+# CHECK: f0 df 15 e5
+r17:16 = vmpyhsu(r21, r31):sat
+# CHECK: f0 df 95 e5
+r17:16 = vmpyhsu(r21, r31):<<1:sat
+# CHECK: b0 df 75 e7
+r17:16 += vmpyhsu(r21, r31):sat
+# CHECK: b0 df f5 e7
+r17:16 += vmpyhsu(r21, r31):<<1:sat
+
+# Vector reduce multiply halfwords
+# CHECK: 50 de 14 e8
+r17:16 = vrmpyh(r21:20, r31:30)
+# CHECK: 50 de 14 ea
+r17:16 += vrmpyh(r21:20, r31:30)
+
+# Vector multiply bytes
+# CHECK: 30 df 55 e5
+r17:16 = vmpybsu(r21, r31)
+# CHECK: 30 df 95 e5
+r17:16 = vmpybu(r21, r31)
+# CHECK: 30 df 95 e7
+r17:16 += vmpybu(r21, r31)
+# CHECK: 30 df d5 e7
+r17:16 += vmpybsu(r21, r31)
+
+# Vector polynomial multiply halfwords
+# CHECK: f0 df d5 e5
+r17:16 = vpmpyh(r21, r31)
+# CHECK: f0 df b5 e7
+r17:16 ^= vpmpyh(r21, r31)
diff --git a/test/MC/Hexagon/instructions/xtype_perm.s b/test/MC/Hexagon/instructions/xtype_perm.s
new file mode 100644
index 000000000000..d8033ec80177
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_perm.s
@@ -0,0 +1,104 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.6 XTYPE/PERM
+
+# CABAC decode bin
+# CHECK: d0 de d4 c1
+r17:16 = decbin(r21:20, r31:30)
+
+# Saturate
+# CHECK: 11 c0 d4 88
+r17 = sat(r21:20)
+# CHECK: 91 c0 d5 8c
+r17 = sath(r21)
+# CHECK: b1 c0 d5 8c
+r17 = satuh(r21)
+# CHECK: d1 c0 d5 8c
+r17 = satub(r21)
+# CHECK: f1 c0 d5 8c
+r17 = satb(r21)
+
+# Swizzle bytes
+# CHECK: f1 c0 95 8c
+r17 = swiz(r21)
+
+# Vector align
+# CHECK: 70 d4 1e c2
+r17:16 = valignb(r21:20, r31:30, p3)
+# CHECK: 70 de 94 c2
+r17:16 = vspliceb(r21:20, r31:30, p3)
+
+# Vector round and pack
+# CHECK: 91 c0 94 88
+r17 = vrndwh(r21:20)
+# CHECK: d1 c0 94 88
+r17 = vrndwh(r21:20):sat
+
+# Vector saturate and pack
+# CHECK: 11 c0 14 88
+r17 = vsathub(r21:20)
+# CHECK: 51 c0 14 88
+r17 = vsatwh(r21:20)
+# CHECK: 91 c0 14 88
+r17 = vsatwuh(r21:20)
+# CHECK: d1 c0 14 88
+r17 = vsathb(r21:20)
+# CHECK: 11 c0 95 8c
+r17 = vsathb(r21)
+# CHECK: 51 c0 95 8c
+r17 = vsathub(r21)
+
+# Vector saturate without pack
+# CHECK: 90 c0 14 80
+r17:16 = vsathub(r21:20)
+# CHECK: b0 c0 14 80
+r17:16 = vsatwuh(r21:20)
+# CHECK: d0 c0 14 80
+r17:16 = vsatwh(r21:20)
+# CHECK: f0 c0 14 80
+r17:16 = vsathb(r21:20)
+
+# Vector shuffle
+# CHECK: 50 de 14 c1
+r17:16 = shuffeb(r21:20, r31:30)
+# CHECK: 90 d4 1e c1
+r17:16 = shuffob(r21:20, r31:30)
+# CHECK: d0 de 14 c1
+r17:16 = shuffeh(r21:20, r31:30)
+# CHECK: 10 d4 9e c1
+r17:16 = shuffoh(r21:20, r31:30)
+
+# Vector splat bytes
+# CHECK: f1 c0 55 8c
+r17 = vsplatb(r21)
+
+# Vector splat halfwords
+# CHECK: 50 c0 55 84
+r17:16 = vsplath(r21)
+
+# Vector splice
+# CHECK: 70 de 94 c0
+r17:16 = vspliceb(r21:20, r31:30, #3)
+# CHECK: 70 de 94 c2
+r17:16 = vspliceb(r21:20, r31:30, p3)
+
+# Vector sign extend
+# CHECK: 10 c0 15 84
+r17:16 = vsxtbh(r21)
+# CHECK: 90 c0 15 84
+r17:16 = vsxthw(r21)
+
+# Vector truncate
+# CHECK: 11 c0 94 88
+r17 = vtrunohb(r21:20)
+# CHECK: 51 c0 94 88
+r17 = vtrunehb(r21:20)
+# CHECK: 50 de 94 c1
+r17:16 = vtrunewh(r21:20, r31:30)
+# CHECK: 90 de 94 c1
+r17:16 = vtrunowh(r21:20, r31:30)
+
+# Vector zero extend
+# CHECK: 50 c0 15 84
+r17:16 = vzxtbh(r21)
+# CHECK: d0 c0 15 84
+r17:16 = vzxthw(r21)
diff --git a/test/MC/Hexagon/instructions/xtype_pred.s b/test/MC/Hexagon/instructions/xtype_pred.s
new file mode 100644
index 000000000000..769de0f6e027
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_pred.s
@@ -0,0 +1,136 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.7 XTYPE/PRED
+
+# Bounds check
+# CHECK: 83 f4 10 d2
+p3 = boundscheck(r17:16, r21:20):raw:lo
+# CHECK: a3 f4 10 d2
+p3 = boundscheck(r17:16, r21:20):raw:hi
+
+# Compare byte
+# CHECK: 43 d5 d1 c7
+p3 = cmpb.gt(r17, r21)
+# CHECK: c3 d5 d1 c7
+p3 = cmpb.eq(r17, r21)
+# CHECK: e3 d5 d1 c7
+p3 = cmpb.gtu(r17, r21)
+# CHECK: a3 c2 11 dd
+p3 = cmpb.eq(r17, #21)
+# CHECK: a3 c2 31 dd
+p3 = cmpb.gt(r17, #21)
+# CHECK: a3 c2 51 dd
+p3 = cmpb.gtu(r17, #21)
+
+# Compare half
+# CHECK: 63 d5 d1 c7
+p3 = cmph.eq(r17, r21)
+# CHECK: 83 d5 d1 c7
+p3 = cmph.gt(r17, r21)
+# CHECK: a3 d5 d1 c7
+p3 = cmph.gtu(r17, r21)
+# CHECK: ab c2 11 dd
+p3 = cmph.eq(r17, #21)
+# CHECK: ab c2 31 dd
+p3 = cmph.gt(r17, #21)
+# CHECK: ab c2 51 dd
+p3 = cmph.gtu(r17, #21)
+
+# Compare doublewords
+# CHECK: 03 de 94 d2
+p3 = cmp.eq(r21:20, r31:30)
+# CHECK: 43 de 94 d2
+p3 = cmp.gt(r21:20, r31:30)
+# CHECK: 83 de 94 d2
+p3 = cmp.gtu(r21:20, r31:30)
+
+# Compare bitmask
+# CHECK: 03 d5 91 85
+p3 = bitsclr(r17, #21)
+# CHECK: 03 d5 b1 85
+p3 = !bitsclr(r17, #21)
+# CHECK: 03 d5 51 c7
+p3 = bitsset(r17, r21)
+# CHECK: 03 d5 71 c7
+p3 = !bitsset(r17, r21)
+# CHECK: 03 d5 91 c7
+p3 = bitsclr(r17, r21)
+# CHECK: 03 d5 b1 c7
+p3 = !bitsclr(r17, r21)
+
+# mask generate from predicate
+# CHECK: 10 c3 00 86
+r17:16 = mask(p3)
+
+# Check for TLB match
+# CHECK: 63 f5 10 d2
+p3 = tlbmatch(r17:16, r21)
+
+# Predicate Transfer
+# CHECK: 03 c0 45 85
+p3 = r5
+# CHECK: 05 c0 43 89
+r5 = p3
+
+# Test bit
+# CHECK: 03 d5 11 85
+p3 = tstbit(r17, #21)
+# CHECK: 03 d5 31 85
+p3 = !tstbit(r17, #21)
+# CHECK: 03 d5 11 c7
+p3 = tstbit(r17, r21)
+# CHECK: 03 d5 31 c7
+p3 = !tstbit(r17, r21)
+
+# Vector compare halfwords
+# CHECK: 63 de 14 d2
+p3 = vcmph.eq(r21:20, r31:30)
+# CHECK: 83 de 14 d2
+p3 = vcmph.gt(r21:20, r31:30)
+# CHECK: a3 de 14 d2
+p3 = vcmph.gtu(r21:20, r31:30)
+# CHECK: eb c3 14 dc
+p3 = vcmph.eq(r21:20, #31)
+# CHECK: eb c3 34 dc
+p3 = vcmph.gt(r21:20, #31)
+# CHECK: eb c3 54 dc
+p3 = vcmph.gtu(r21:20, #31)
+
+# Vector compare bytes for any match
+# CHECK: 03 fe 14 d2
+p3 = any8(vcmpb.eq(r21:20, r31:30))
+
+# Vector compare bytes
+# CHECK: 63 de 14 d2
+p3 = vcmph.eq(r21:20, r31:30)
+# CHECK: 83 de 14 d2
+p3 = vcmph.gt(r21:20, r31:30)
+# CHECK: a3 de 14 d2
+p3 = vcmph.gtu(r21:20, r31:30)
+# CHECK: eb c3 14 dc
+p3 = vcmph.eq(r21:20, #31)
+# CHECK: eb c3 34 dc
+p3 = vcmph.gt(r21:20, #31)
+# CHECK: eb c3 54 dc
+p3 = vcmph.gtu(r21:20, #31)
+
+# Vector compare words
+# CHECK: 03 de 14 d2
+p3 = vcmpw.eq(r21:20, r31:30)
+# CHECK: 23 de 14 d2
+p3 = vcmpw.gt(r21:20, r31:30)
+# CHECK: 43 de 14 d2
+p3 = vcmpw.gtu(r21:20, r31:30)
+# CHECK: f3 c3 14 dc
+p3 = vcmpw.eq(r21:20, #31)
+# CHECK: f3 c3 34 dc
+p3 = vcmpw.gt(r21:20, #31)
+# CHECK: f3 c3 54 dc
+p3 = vcmpw.gtu(r21:20, #31)
+
+# Viterbi pack even and odd predicate bits
+# CHECK: 11 c2 03 89
+r17 = vitpack(p3, p2)
+
+# Vector mux
+# CHECK: 70 de 14 d1
+r17:16 = vmux(p3, r21:20, r31:30)
diff --git a/test/MC/Hexagon/instructions/xtype_shift.s b/test/MC/Hexagon/instructions/xtype_shift.s
new file mode 100644
index 000000000000..bbe327b62bad
--- /dev/null
+++ b/test/MC/Hexagon/instructions/xtype_shift.s
@@ -0,0 +1,260 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj -o - %s | llvm-objdump -d - | FileCheck %s
+# Hexagon Programmer's Reference Manual 11.10.8 XTYPE/SHIFT
+
+# Shift by immediate
+# CHECK: 10 df 14 80
+r17:16 = asr(r21:20, #31)
+# CHECK: 30 df 14 80
+r17:16 = lsr(r21:20, #31)
+# CHECK: 50 df 14 80
+r17:16 = asl(r21:20, #31)
+# CHECK: 11 df 15 8c
+r17 = asr(r21, #31)
+# CHECK: 31 df 15 8c
+r17 = lsr(r21, #31)
+# CHECK: 51 df 15 8c
+r17 = asl(r21, #31)
+
+# Shift by immediate and accumulate
+# CHECK: 10 df 14 82
+r17:16 -= asr(r21:20, #31)
+# CHECK: 30 df 14 82
+r17:16 -= lsr(r21:20, #31)
+# CHECK: 50 df 14 82
+r17:16 -= asl(r21:20, #31)
+# CHECK: 90 df 14 82
+r17:16 += asr(r21:20, #31)
+# CHECK: b0 df 14 82
+r17:16 += lsr(r21:20, #31)
+# CHECK: d0 df 14 82
+r17:16 += asl(r21:20, #31)
+# CHECK: 11 df 15 8e
+r17 -= asr(r21, #31)
+# CHECK: 31 df 15 8e
+r17 -= lsr(r21, #31)
+# CHECK: 51 df 15 8e
+r17 -= asl(r21, #31)
+# CHECK: 91 df 15 8e
+r17 += asr(r21, #31)
+# CHECK: b1 df 15 8e
+r17 += lsr(r21, #31)
+# CHECK: d1 df 15 8e
+r17 += asl(r21, #31)
+# CHECK: 4c f7 11 de
+r17 = add(#21, asl(r17, #23))
+# CHECK: 4e f7 11 de
+r17 = sub(#21, asl(r17, #23))
+# CHECK: 5c f7 11 de
+r17 = add(#21, lsr(r17, #23))
+# CHECK: 5e f7 11 de
+r17 = sub(#21, lsr(r17, #23))
+
+# Shift by immediate and add
+# CHECK: f1 d5 1f c4
+r17 = addasl(r21, r31, #7)
+
+# Shift by immediate and logical
+# CHECK: 10 df 54 82
+r17:16 &= asr(r21:20, #31)
+# CHECK: 30 df 54 82
+r17:16 &= lsr(r21:20, #31)
+# CHECK: 50 df 54 82
+r17:16 &= asl(r21:20, #31)
+# CHECK: 90 df 54 82
+r17:16 |= asr(r21:20, #31)
+# CHECK: b0 df 54 82
+r17:16 |= lsr(r21:20, #31)
+# CHECK: d0 df 54 82
+r17:16 |= asl(r21:20, #31)
+# CHECK: 30 df 94 82
+r17:16 ^= lsr(r21:20, #31)
+# CHECK: 50 df 94 82
+r17:16 ^= asl(r21:20, #31)
+# CHECK: 11 df 55 8e
+r17 &= asr(r21, #31)
+# CHECK: 31 df 55 8e
+r17 &= lsr(r21, #31)
+# CHECK: 51 df 55 8e
+r17 &= asl(r21, #31)
+# CHECK: 91 df 55 8e
+r17 |= asr(r21, #31)
+# CHECK: b1 df 55 8e
+r17 |= lsr(r21, #31)
+# CHECK: d1 df 55 8e
+r17 |= asl(r21, #31)
+# CHECK: 31 df 95 8e
+r17 ^= lsr(r21, #31)
+# CHECK: 51 df 95 8e
+r17 ^= asl(r21, #31)
+# CHECK: 48 ff 11 de
+r17 = and(#21, asl(r17, #31))
+# CHECK: 4a ff 11 de
+r17 = or(#21, asl(r17, #31))
+# CHECK: 58 ff 11 de
+r17 = and(#21, lsr(r17, #31))
+# CHECK: 5a ff 11 de
+r17 = or(#21, lsr(r17, #31))
+
+# Shift right by immediate with rounding
+# CHECK: f0 df d4 80
+r17:16 = asr(r21:20, #31):rnd
+# CHECK: 11 df 55 8c
+r17 = asr(r21, #31):rnd
+
+# Shift left by immediate with saturation
+# CHECK: 51 df 55 8c
+r17 = asl(r21, #31):sat
+
+# Shift by register
+# CHECK: 10 df 94 c3
+r17:16 = asr(r21:20, r31)
+# CHECK: 50 df 94 c3
+r17:16 = lsr(r21:20, r31)
+# CHECK: 90 df 94 c3
+r17:16 = asl(r21:20, r31)
+# CHECK: d0 df 94 c3
+r17:16 = lsl(r21:20, r31)
+# CHECK: 11 df 55 c6
+r17 = asr(r21, r31)
+# CHECK: 51 df 55 c6
+r17 = lsr(r21, r31)
+# CHECK: 91 df 55 c6
+r17 = asl(r21, r31)
+# CHECK: d1 df 55 c6
+r17 = lsl(r21, r31)
+# CHECK: f1 df 8a c6
+r17 = lsl(#21, r31)
+
+# Shift by register and accumulate
+# CHECK: 10 df 94 cb
+r17:16 -= asr(r21:20, r31)
+# CHECK: 50 df 94 cb
+r17:16 -= lsr(r21:20, r31)
+# CHECK: 90 df 94 cb
+r17:16 -= asl(r21:20, r31)
+# CHECK: d0 df 94 cb
+r17:16 -= lsl(r21:20, r31)
+# CHECK: 10 df d4 cb
+r17:16 += asr(r21:20, r31)
+# CHECK: 50 df d4 cb
+r17:16 += lsr(r21:20, r31)
+# CHECK: 90 df d4 cb
+r17:16 += asl(r21:20, r31)
+# CHECK: d0 df d4 cb
+r17:16 += lsl(r21:20, r31)
+# CHECK: 11 df 95 cc
+r17 -= asr(r21, r31)
+# CHECK: 51 df 95 cc
+r17 -= lsr(r21, r31)
+# CHECK: 91 df 95 cc
+r17 -= asl(r21, r31)
+# CHECK: d1 df 95 cc
+r17 -= lsl(r21, r31)
+# CHECK: 11 df d5 cc
+r17 += asr(r21, r31)
+# CHECK: 51 df d5 cc
+r17 += lsr(r21, r31)
+# CHECK: 91 df d5 cc
+r17 += asl(r21, r31)
+# CHECK: d1 df d5 cc
+r17 += lsl(r21, r31)
+
+# Shift by register and logical
+# CHECK: 10 df 14 cb
+r17:16 |= asr(r21:20, r31)
+# CHECK: 50 df 14 cb
+r17:16 |= lsr(r21:20, r31)
+# CHECK: 90 df 14 cb
+r17:16 |= asl(r21:20, r31)
+# CHECK: d0 df 14 cb
+r17:16 |= lsl(r21:20, r31)
+# CHECK: 10 df 54 cb
+r17:16 &= asr(r21:20, r31)
+# CHECK: 50 df 54 cb
+r17:16 &= lsr(r21:20, r31)
+# CHECK: 90 df 54 cb
+r17:16 &= asl(r21:20, r31)
+# CHECK: d0 df 54 cb
+r17:16 &= lsl(r21:20, r31)
+# CHECK: 10 df 74 cb
+r17:16 ^= asr(r21:20, r31)
+# CHECK: 50 df 74 cb
+r17:16 ^= lsr(r21:20, r31)
+# CHECK: 90 df 74 cb
+r17:16 ^= asl(r21:20, r31)
+# CHECK: d0 df 74 cb
+r17:16 ^= lsl(r21:20, r31)
+# CHECK: 11 df 15 cc
+r17 |= asr(r21, r31)
+# CHECK: 51 df 15 cc
+r17 |= lsr(r21, r31)
+# CHECK: 91 df 15 cc
+r17 |= asl(r21, r31)
+# CHECK: d1 df 15 cc
+r17 |= lsl(r21, r31)
+# CHECK: 11 df 55 cc
+r17 &= asr(r21, r31)
+# CHECK: 51 df 55 cc
+r17 &= lsr(r21, r31)
+# CHECK: 91 df 55 cc
+r17 &= asl(r21, r31)
+# CHECK: d1 df 55 cc
+r17 &= lsl(r21, r31)
+
+# Shift by register with saturation
+# CHECK: 11 df 15 c6
+r17 = asr(r21, r31):sat
+# CHECK: 91 df 15 c6
+r17 = asl(r21, r31):sat
+
+# Vector shift halfwords by immediate
+# CHECK: 10 c5 94 80
+r17:16 = vasrh(r21:20, #5)
+# CHECK: 30 c5 94 80
+r17:16 = vlsrh(r21:20, #5)
+# CHECK: 50 c5 94 80
+r17:16 = vaslh(r21:20, #5)
+
+# Vector arithmetic shift halfwords with round
+# CHECK: 10 c5 34 80
+r17:16 = vasrh(r21:20, #5):raw
+
+# Vector arithmetic shift halfwords with saturate and pack
+# CHECK: 91 c5 74 88
+r17 = vasrhub(r21:20, #5):raw
+# CHECK: b1 c5 74 88
+r17 = vasrhub(r21:20, #5):sat
+
+# Vector shift halfwords by register
+# CHECK: 10 df 54 c3
+r17:16 = vasrh(r21:20, r31)
+# CHECK: 50 df 54 c3
+r17:16 = vlsrh(r21:20, r31)
+# CHECK: 90 df 54 c3
+r17:16 = vaslh(r21:20, r31)
+# CHECK: d0 df 54 c3
+r17:16 = vlslh(r21:20, r31)
+
+# Vector shift words by immediate
+# CHECK: 10 df 54 80
+r17:16 = vasrw(r21:20, #31)
+# CHECK: 30 df 54 80
+r17:16 = vlsrw(r21:20, #31)
+# CHECK: 50 df 54 80
+r17:16 = vaslw(r21:20, #31)
+
+# Vector shift words by register
+# CHECK: 10 df 14 c3
+r17:16 = vasrw(r21:20, r31)
+# CHECK: 50 df 14 c3
+r17:16 = vlsrw(r21:20, r31)
+# CHECK: 90 df 14 c3
+r17:16 = vaslw(r21:20, r31)
+# CHECK: d0 df 14 c3
+r17:16 = vlslw(r21:20, r31)
+
+# Vector shift words with truncate and pack
+# CHECK: 51 df d4 88
+r17 = vasrw(r21:20, #31)
+# CHECK: 51 df 14 c5
+r17 = vasrw(r21:20, r31)
diff --git a/test/MC/Hexagon/jumpdoublepound.s b/test/MC/Hexagon/jumpdoublepound.s
new file mode 100644
index 000000000000..6b829360a906
--- /dev/null
+++ b/test/MC/Hexagon/jumpdoublepound.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s -o - | llvm-objdump -d - | FileCheck %s
+
+# Verify that jump encodes correctly
+
+
+mylabel:
+# CHECK: if (p0) jump
+if (p0) jump ##mylabel
+
+# CHECK: if (cmp.gtu(r5.new, r4)) jump:t
+{ r5 = r4
+  if (cmp.gtu(r5.new, r4)) jump:t ##mylabel }
+
diff --git a/test/MC/Hexagon/labels.s b/test/MC/Hexagon/labels.s
new file mode 100644
index 000000000000..d52ae004b07d
--- /dev/null
+++ b/test/MC/Hexagon/labels.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc -triple=hexagon -filetype=asm -o - %s | FileCheck %s
+
+# CHECK: a:
+a:
+
+# CHECK: r1:
+r1:
+
+# CHECK: r3:
+# CHECK: nop
+r3:nop
+
+# CHECK: r5:4 = combine(r5, r4)
+r5:4 = r5:4
+
+# CHECK: r0 = r1
+# CHECK: p0 = tstbit(r0, #10)
+# CHECK: if (!p0) jump
+1:r0=r1; p0=tstbit(r0, #10); if !p0 jump 1b;
+
+# CHECK: nop
+# CHECK: r1 = add(r1, #4)
+# CHECK: r5 = memw(r1 + #0)
+# CHECK: endloop0
+b: { r5 = memw(r1)
+     r1 = add(r1, #4) } : endloop0
\ No newline at end of file
diff --git a/test/MC/Hexagon/new-value-check.s b/test/MC/Hexagon/new-value-check.s
new file mode 100644
index 000000000000..e46360a7bb3d
--- /dev/null
+++ b/test/MC/Hexagon/new-value-check.s
@@ -0,0 +1,72 @@
+# RUN: llvm-mc -triple=hexagon < %s 2>%t ; \
+# RUN:     FileCheck %s < %t --check-prefix=CHECK-STRICT
+# RUN: llvm-mc -triple=hexagon -relax-nv-checks < %s 2>%t ; \
+# RUN:     FileCheck %s < %t --check-prefix=CHECK-RELAXED
+
+# CHECK-STRICT: :12:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :12:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+  # invalid: r0 definition predicated on the opposite condition
+  if (p3) r0 = add(r1, r2)
+  if (!p3) memb(r20) = r0.new
+}
+
+# CHECK-STRICT: :20:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :20:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+  # invalid: new-value compare-and-jump cannot use floating point value
+  r0 = sfadd(r1, r2)
+  if (cmp.eq(r0.new, #0)) jump:nt .
+}
+
+# CHECK-STRICT: :29:1: error: register `R0' used with `.new' but not validly modified in the same packet
+# CHECK-RELAXED: :29:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+  # invalid: definition of r0 should be unconditional (not explicitly docu-
+  # mented)
+  if (p0) r0 = r1
+  if (cmp.eq(r0.new, #0)) jump:nt .
+}
+
+
+# No errors from this point on with the relaxed checks.
+# CHECK-RELAXED-NOT: error
+
+# CHECK-STRICT: :41:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+  # valid (relaxed): p2 and p3 cannot be proven to violate the new-value
+  # requirements
+  if (p3) r0 = add(r1, r2)
+  if (p2) memb(r20) = r0.new
+}
+
+# CHECK-STRICT: :48:1: error: register `R0' used with `.new' but not validly modified in the same packet
+{
+  # valid (relaxed): p3 could be always true
+  if (p3) r0 = add(r1, r2)
+  memb(r20) = r0.new
+}
+
+
+# No errors from this point on with the strict checks.
+# CHECK-RELAXED-NOT: error
+
+{
+  # valid: r0 defined unconditionally
+  r0 = add(r1, r2)
+  if (p2) memb(r20) = r0.new
+}
+
+{
+  # valid: r0 definition and use identically predicated
+  if (p3) r0 = add(r1, r2)
+  if (p3) memb(r20) = r0.new
+}
+
+{
+  # valid: r0 defined regardless of p0
+  if (p0) r0 = #0
+  if (!p0) r0 = #1
+  if (p0) memb(r20) = r0.new
+}
+
diff --git a/test/MC/Hexagon/out_of_range.s b/test/MC/Hexagon/out_of_range.s
new file mode 100644
index 000000000000..2a98ef287f6b
--- /dev/null
+++ b/test/MC/Hexagon/out_of_range.s
@@ -0,0 +1,10 @@
+# RUN: not llvm-mc -triple=hexagon -filetype=asm %s 2> %t; FileCheck %s < %t
+
+r1:0=##0xFFFFFF7000001000
+# CHECK: rror: value -144(0xffffffffffffff70) out of range: -128-127
+
+p0 = cmpb.eq(r0, #-257)
+# CHECK: rror: invalid operand for instruction
+
+p0 = cmpb.eq(r0, #256)
+# CHECK: rror: invalid operand for instruction
diff --git a/test/MC/Hexagon/pcrel.s b/test/MC/Hexagon/pcrel.s
new file mode 100644
index 000000000000..368fea5c2b30
--- /dev/null
+++ b/test/MC/Hexagon/pcrel.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -arch=hexagon -filetype=obj %s | llvm-objdump -r - | FileCheck %s
+#
+
+# make sure the fixups emitted match what is
+# expected.
+.Lpc:
+    r0 = add (pc, ##foo@PCREL)
+
+# CHECK: R_HEX_B32_PCREL_X
+# CHECK: R_HEX_6_PCREL_X
+
diff --git a/test/MC/Hexagon/relaxed_newvalue.s b/test/MC/Hexagon/relaxed_newvalue.s
new file mode 100644
index 000000000000..65fbd312e0ac
--- /dev/null
+++ b/test/MC/Hexagon/relaxed_newvalue.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+# Make sure relaxation doesn't hinder newvalue calculation
+
+#CHECK: r18 = add(r2, #-6)
+#CHECK-NEXT: immext(#0)
+#CHECK-NEXT: if (!cmp.gt(r18.new, #1)) jump:t
+{
+  r18 = add(r2, #-6)
+  if (!cmp.gt(r18.new, #1)) jump:t .unknown
+}
diff --git a/test/MC/Hexagon/test.s b/test/MC/Hexagon/test.s
new file mode 100644
index 000000000000..73b6d0a96c71
--- /dev/null
+++ b/test/MC/Hexagon/test.s
@@ -0,0 +1,4 @@
+#RUN: llvm-mc -filetype=obj -triple=hexagon -mcpu=hexagonv60 %s
+
+{ vmem (r0 + #0) = v0
+  r0 = memw(r0) } 
\ No newline at end of file
diff --git a/test/MC/Hexagon/two_ext.s b/test/MC/Hexagon/two_ext.s
new file mode 100644
index 000000000000..c55bcc8cd9f5
--- /dev/null
+++ b/test/MC/Hexagon/two_ext.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple=hexagon -filetype=obj %s | llvm-objdump -d - | FileCheck %s
+
+# verify two extenders generated during relaxation
+{
+  if (p1) call foo_a
+  if (!p1) call foo_b
+}
+# CHECK: 00004000 { immext(#0)
+# CHECK: 5d004100   if (p1) call 0x0
+# CHECK: 00004000   immext(#0)
+# CHECK: 5d20c100   if (!p1) call 0x0 }
+
diff --git a/test/MC/Hexagon/v60-alu.s b/test/MC/Hexagon/v60-alu.s
new file mode 100644
index 000000000000..1583c3da2cb7
--- /dev/null
+++ b/test/MC/Hexagon/v60-alu.s
@@ -0,0 +1,312 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1ce2cbd7 { v23.w = vavg(v11.w,{{ *}}v2.w):rnd }
+v23.w=vavg(v11.w,v2.w):rnd
+
+#CHECK: 1cf4d323 { v3.h = vnavg(v19.h,{{ *}}v20.h) }
+v3.h=vnavg(v19.h,v20.h)
+
+#CHECK: 1cffce9a { v26.uh = vavg(v14.uh,{{ *}}v31.uh):rnd }
+v26.uh=vavg(v14.uh,v31.uh):rnd
+
+#CHECK: 1ce5cba1 { v1.h = vavg(v11.h,{{ *}}v5.h):rnd }
+v1.h=vavg(v11.h,v5.h):rnd
+
+#CHECK: 1cc0d012 { v18.ub = vabsdiff(v16.ub,{{ *}}v0.ub) }
+v18.ub=vabsdiff(v16.ub,v0.ub)
+
+#CHECK: 1cc2de29 { v9.uh = vabsdiff(v30.h,{{ *}}v2.h) }
+v9.uh=vabsdiff(v30.h,v2.h)
+
+#CHECK: 1ce9ca06 { v6.b = vnavg(v10.ub,{{ *}}v9.ub) }
+v6.b=vnavg(v10.ub,v9.ub)
+
+#CHECK: 1caacf90 { v17:16.w = vadd(v15.h,{{ *}}v10.h) }
+v17:16.w=vadd(v15.h,v10.h)
+
+#CHECK: 1cb4cabe { v31:30.h = vsub(v10.ub,{{ *}}v20.ub) }
+v31:30.h=vsub(v10.ub,v20.ub)
+
+#CHECK: 1cb8cada { v27:26.w = vsub(v10.uh,{{ *}}v24.uh) }
+v27:26.w=vsub(v10.uh,v24.uh)
+
+#CHECK: 1cbcdbe8 { v9:8.w = vsub(v27.h,{{ *}}v28.h) }
+v9:8.w=vsub(v27.h,v28.h)
+
+#CHECK: 1caeca00 { v1:0.h = vsub(v11:10.h,{{ *}}v15:14.h):sat }
+v1:0.h=vsub(v11:10.h,v15:14.h):sat
+
+#CHECK: 1ca8c43e { v31:30.w = vsub(v5:4.w,{{ *}}v9:8.w):sat }
+v31:30.w=vsub(v5:4.w,v9:8.w):sat
+
+#CHECK: 1cbad95c { v29:28.h = vadd(v25.ub,{{ *}}v26.ub) }
+v29:28.h=vadd(v25.ub,v26.ub)
+
+#CHECK: 1ca1dc64 { v5:4.w = vadd(v28.uh,{{ *}}v1.uh) }
+v5:4.w=vadd(v28.uh,v1.uh)
+
+#CHECK: 1c79c350 { v16.h = vsub(v3.h,{{ *}}v25.h):sat }
+v16.h=vsub(v3.h,v25.h):sat
+
+#CHECK: 1c7fd364 { v4.w = vsub(v19.w,{{ *}}v31.w):sat }
+v4.w=vsub(v19.w,v31.w):sat
+
+#CHECK: 1c67d816 { v22.ub = vsub(v24.ub,{{ *}}v7.ub):sat }
+v22.ub=vsub(v24.ub,v7.ub):sat
+
+#CHECK: 1c7ddc2f { v15.uh = vsub(v28.uh,{{ *}}v29.uh):sat }
+v15.uh=vsub(v28.uh,v29.uh):sat
+
+#CHECK: 1c5cc6d7 { v23.h = vsub(v6.h,{{ *}}v28.h) }
+v23.h=vsub(v6.h,v28.h)
+
+#CHECK: 1c54cae4 { v4.w = vsub(v10.w,{{ *}}v20.w) }
+v4.w=vsub(v10.w,v20.w)
+
+#CHECK: 1c4dc78b { v11.w = vadd(v7.w,{{ *}}v13.w):sat }
+v11.w=vadd(v7.w,v13.w):sat
+
+#CHECK: 1c48c7a4 { v4.b = vsub(v7.b,{{ *}}v8.b) }
+v4.b=vsub(v7.b,v8.b)
+
+#CHECK: 1cdec3b0 { v16.uh = vavg(v3.uh,{{ *}}v30.uh) }
+v16.uh=vavg(v3.uh,v30.uh)
+
+#CHECK: 1c76dc98 { v25:24.b = vadd(v29:28.b,{{ *}}v23:22.b) }
+v25:24.b=vadd(v29:28.b,v23:22.b)
+
+#CHECK: 1c7ad4a6 { v7:6.h = vadd(v21:20.h,{{ *}}v27:26.h) }
+v7:6.h=vadd(v21:20.h,v27:26.h)
+
+#CHECK: 1cc7c564 { v4.uw = vabsdiff(v5.w,{{ *}}v7.w) }
+v4.uw=vabsdiff(v5.w,v7.w)
+
+#CHECK: 1cd2cdc1 { v1.h = vavg(v13.h,{{ *}}v18.h) }
+v1.h=vavg(v13.h,v18.h)
+
+#CHECK: 1cd5d246 { v6.uh = vabsdiff(v18.uh,{{ *}}v21.uh) }
+v6.uh=vabsdiff(v18.uh,v21.uh)
+
+#CHECK: 1cdcd987 { v7.ub = vavg(v25.ub,{{ *}}v28.ub) }
+v7.ub=vavg(v25.ub,v28.ub)
+
+#CHECK: 1c92c6e4 { v5:4.uh = vsub(v7:6.uh,{{ *}}v19:18.uh):sat }
+v5:4.uh=vsub(v7:6.uh,v19:18.uh):sat
+
+#CHECK: 1c86dace { v15:14.ub = vsub(v27:26.ub,{{ *}}v7:6.ub):sat }
+v15:14.ub=vsub(v27:26.ub,v7:6.ub):sat
+
+#CHECK: 1cffc07c { v28.ub = vavg(v0.ub,{{ *}}v31.ub):rnd }
+v28.ub=vavg(v0.ub,v31.ub):rnd
+
+#CHECK: 1cf8d851 { v17.w = vnavg(v24.w,{{ *}}v24.w) }
+v17.w=vnavg(v24.w,v24.w)
+
+#CHECK: 1c70d2e6 { v7:6.ub = vadd(v19:18.ub,{{ *}}v17:16.ub):sat }
+v7:6.ub=vadd(v19:18.ub,v17:16.ub):sat
+
+#CHECK: 1c72dec6 { v7:6.w = vadd(v31:30.w,{{ *}}v19:18.w) }
+v7:6.w=vadd(v31:30.w,v19:18.w)
+
+#CHECK: 1c92d23e { v31:30.h = vadd(v19:18.h,{{ *}}v19:18.h):sat }
+v31:30.h=vadd(v19:18.h,v19:18.h):sat
+
+#CHECK: 1c94de1e { v31:30.uh = vadd(v31:30.uh,{{ *}}v21:20.uh):sat }
+v31:30.uh=vadd(v31:30.uh,v21:20.uh):sat
+
+#CHECK: 1c9ec07c { v29:28.b = vsub(v1:0.b,{{ *}}v31:30.b) }
+v29:28.b=vsub(v1:0.b,v31:30.b)
+
+#CHECK: 1c88da56 { v23:22.w = vadd(v27:26.w,{{ *}}v9:8.w):sat }
+v23:22.w=vadd(v27:26.w,v9:8.w):sat
+
+#CHECK: 1c9acab8 { v25:24.w = vsub(v11:10.w,{{ *}}v27:26.w) }
+v25:24.w=vsub(v11:10.w,v27:26.w)
+
+#CHECK: 1c82d282 { v3:2.h = vsub(v19:18.h,{{ *}}v3:2.h) }
+v3:2.h=vsub(v19:18.h,v3:2.h)
+
+#CHECK: 1c2bd9a6 { v6 = vand(v25,{{ *}}v11) }
+v6=vand(v25,v11)
+
+#CHECK: 1c43c22d { v13.ub = vadd(v2.ub,{{ *}}v3.ub):sat }
+v13.ub=vadd(v2.ub,v3.ub):sat
+
+#CHECK: 1c59d707 { v7.w = vadd(v23.w,{{ *}}v25.w) }
+v7.w=vadd(v23.w,v25.w)
+
+#CHECK: 1c3fc9e1 { v1 = vxor(v9,{{ *}}v31) }
+v1=vxor(v9,v31)
+
+#CHECK: 1c2acbdf { v31 = vor(v11,{{ *}}v10) }
+v31=vor(v11,v10)
+
+#CHECK: 1cdaccf6 { v22.w = vavg(v12.w,{{ *}}v26.w) }
+v22.w=vavg(v12.w,v26.w)
+
+#CHECK: 1c5ac767 { v7.h = vadd(v7.h,{{ *}}v26.h):sat }
+v7.h=vadd(v7.h,v26.h):sat
+
+#CHECK: 1c40d956 { v22.uh = vadd(v25.uh,{{ *}}v0.uh):sat }
+v22.uh=vadd(v25.uh,v0.uh):sat
+
+#CHECK: 1fbbd611 { v17.w = vasr(v22.w{{ *}},{{ *}}v27.w) }
+v17.w=vasr(v22.w,v27.w)
+
+#CHECK: 1fbad835 { v21.w = vlsr(v24.w{{ *}},{{ *}}v26.w) }
+v21.w=vlsr(v24.w,v26.w)
+
+#CHECK: 1f79cedc { v28.b = vround(v14.h{{ *}},{{ *}}v25.h):sat }
+v28.b=vround(v14.h,v25.h):sat
+
+#CHECK: 1f69c4e0 { v0.ub = vround(v4.h{{ *}},{{ *}}v9.h):sat }
+v0.ub=vround(v4.h,v9.h):sat
+
+#CHECK: 1f72c485 { v5.h = vround(v4.w{{ *}},{{ *}}v18.w):sat }
+v5.h=vround(v4.w,v18.w):sat
+
+#CHECK: 1f6bc8b1 { v17.uh = vround(v8.w{{ *}},{{ *}}v11.w):sat }
+v17.uh=vround(v8.w,v11.w):sat
+
+#CHECK: 1f71c25b { v27.ub = vsat(v2.h{{ *}},{{ *}}v17.h) }
+v27.ub=vsat(v2.h,v17.h)
+
+#CHECK: 1f66c560 { v0.h = vsat(v5.w{{ *}},{{ *}}v6.w) }
+v0.h=vsat(v5.w,v6.w)
+
+#CHECK: 1fb3d148 { v8.h = vlsr(v17.h{{ *}},{{ *}}v19.h) }
+v8.h=vlsr(v17.h,v19.h)
+
+#CHECK: 1fbec56e { v14.h = vasr(v5.h{{ *}},{{ *}}v30.h) }
+v14.h=vasr(v5.h,v30.h)
+
+#CHECK: 1fb2d2a2 { v2.h = vasl(v18.h{{ *}},{{ *}}v18.h) }
+v2.h=vasl(v18.h,v18.h)
+
+#CHECK: 1faccc95 { v21.w = vasl(v12.w{{ *}},{{ *}}v12.w) }
+v21.w=vasl(v12.w,v12.w)
+
+#CHECK: 1fb9c1e2 { v2.h = vadd(v1.h{{ *}},{{ *}}v25.h) }
+v2.h=vadd(v1.h,v25.h)
+
+#CHECK: 1fbbd5df { v31.b = vadd(v21.b{{ *}},{{ *}}v27.b) }
+v31.b=vadd(v21.b,v27.b)
+
+#CHECK: 1f25c578 { v24 = vrdelta(v5{{ *}},{{ *}}v5) }
+v24=vrdelta(v5,v5)
+
+#CHECK: 1f22c62a { v10 = vdelta(v6{{ *}},{{ *}}v2) }
+v10=vdelta(v6,v2)
+
+#CHECK: 1f20d102 { v2.w = vmax(v17.w{{ *}},{{ *}}v0.w) }
+v2.w=vmax(v17.w,v0.w)
+
+#CHECK: 1f1ed6fc { v28.h = vmax(v22.h{{ *}},{{ *}}v30.h) }
+v28.h=vmax(v22.h,v30.h)
+
+#CHECK: 1f0cc8d8 { v24.uh = vmax(v8.uh{{ *}},{{ *}}v12.uh) }
+v24.uh=vmax(v8.uh,v12.uh)
+
+#CHECK: 1f00c1b0 { v16.ub = vmax(v1.ub{{ *}},{{ *}}v0.ub) }
+v16.ub=vmax(v1.ub,v0.ub)
+
+#CHECK: 1f12d08e { v14.w = vmin(v16.w{{ *}},{{ *}}v18.w) }
+v14.w=vmin(v16.w,v18.w)
+
+#CHECK: 1f1ad466 { v6.h = vmin(v20.h{{ *}},{{ *}}v26.h) }
+v6.h=vmin(v20.h,v26.h)
+
+#CHECK: 1f13df5d { v29.uh = vmin(v31.uh{{ *}},{{ *}}v19.uh) }
+v29.uh=vmin(v31.uh,v19.uh)
+
+#CHECK: 1f09c226 { v6.ub = vmin(v2.ub{{ *}},{{ *}}v9.ub) }
+v6.ub=vmin(v2.ub,v9.ub)
+
+#CHECK: 1f41d34f { v15.b = vshuffo(v19.b{{ *}},{{ *}}v1.b) }
+v15.b=vshuffo(v19.b,v1.b)
+
+#CHECK: 1f5fc72e { v14.b = vshuffe(v7.b{{ *}},{{ *}}v31.b) }
+v14.b=vshuffe(v7.b,v31.b)
+
+#CHECK: 1f34d0f7 { v23.b = vdeale(v16.b{{ *}},{{ *}}v20.b) }
+v23.b=vdeale(v16.b,v20.b)
+
+#CHECK: 1f4bd6c4 { v5:4.b = vshuffoe(v22.b{{ *}},{{ *}}v11.b) }
+v5:4.b=vshuffoe(v22.b,v11.b)
+
+#CHECK: 1f5dcea2 { v3:2.h = vshuffoe(v14.h{{ *}},{{ *}}v29.h) }
+v3:2.h=vshuffoe(v14.h,v29.h)
+
+#CHECK: 1f4fd186 { v6.h = vshuffo(v17.h{{ *}},{{ *}}v15.h) }
+v6.h=vshuffo(v17.h,v15.h)
+
+#CHECK: 1f5bda79 { v25.h = vshuffe(v26.h{{ *}},{{ *}}v27.h) }
+v25.h=vshuffe(v26.h,v27.h)
+
+#CHECK: 1f41d1f2 { v19:18 = vcombine(v17{{ *}},{{ *}}v1) }
+v19:18=vcombine(v17,v1)
+
+#CHECK: 1e82f432 { if (!q2) v18.b -= v20.b }
+if (!q2) v18.b-=v20.b
+
+#CHECK: 1ec2fd13 { if (q3) v19.w -= v29.w }
+if (q3) v19.w-=v29.w
+
+#CHECK: 1e81fef9 { if (q2) v25.h -= v30.h }
+if (q2) v25.h-=v30.h
+
+#CHECK: 1e81e2d3 { if (q2) v19.b -= v2.b }
+if (q2) v19.b-=v2.b
+
+#CHECK: 1e41ecad { if (!q1) v13.w += v12.w }
+if (!q1) v13.w+=v12.w
+
+#CHECK: 1e41e789 { if (!q1) v9.h += v7.h }
+if (!q1) v9.h+=v7.h
+
+#CHECK: 1e81e967 { if (!q2) v7.b += v9.b }
+if (!q2) v7.b+=v9.b
+
+#CHECK: 1e41f04f { if (q1) v15.w += v16.w }
+if (q1) v15.w+=v16.w
+
+#CHECK: 1e01e838 { if (q0) v24.h += v8.h }
+if (q0) v24.h+=v8.h
+
+#CHECK: 1ec1f112 { if (q3) v18.b += v17.b }
+if (q3) v18.b+=v17.b
+
+#CHECK: 1e42f67b { if (!q1) v27.w -= v22.w }
+if (!q1) v27.w-=v22.w
+
+#CHECK: 1e82ea5b { if (!q2) v27.h -= v10.h }
+if (!q2) v27.h-=v10.h
+
+#CHECK: 1e00c586 { v6 = vnot(v5) }
+v6=vnot(v5)
+
+#CHECK: 1e00df70 { v16.w = vabs(v31.w):sat }
+v16.w=vabs(v31.w):sat
+
+#CHECK: 1e00d45f { v31.w = vabs(v20.w) }
+v31.w=vabs(v20.w)
+
+#CHECK: 1e00db2f { v15.h = vabs(v27.h):sat }
+v15.h=vabs(v27.h):sat
+
+#CHECK: 1e00d001 { v1.h = vabs(v16.h) }
+v1.h=vabs(v16.h)
+
+#CHECK: 1e02c832 { v19:18.uh = vzxt(v8.ub) }
+v19:18.uh=vzxt(v8.ub)
+
+#CHECK: 1e02c98a { v11:10.w = vsxt(v9.h) }
+v11:10.w=vsxt(v9.h)
+
+#CHECK: 1e02cf76 { v23:22.h = vsxt(v15.b) }
+v23:22.h=vsxt(v15.b)
+
+#CHECK: 1e02c258 { v25:24.uw = vzxt(v2.uh) }
+v25:24.uw=vzxt(v2.uh)
diff --git a/test/MC/Hexagon/v60-permute.s b/test/MC/Hexagon/v60-permute.s
new file mode 100644
index 000000000000..b3544bd0a57b
--- /dev/null
+++ b/test/MC/Hexagon/v60-permute.s
@@ -0,0 +1,51 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1fd2d5cf { v15.b = vpack(v21.h{{ *}},{{ *}}v18.h):sat }
+v15.b=vpack(v21.h,v18.h):sat
+
+#CHECK: 1fd7d7a2 { v2.ub = vpack(v23.h{{ *}},{{ *}}v23.h):sat }
+v2.ub=vpack(v23.h,v23.h):sat
+
+#CHECK: 1fc7d464 { v4.h = vpacke(v20.w{{ *}},{{ *}}v7.w) }
+v4.h=vpacke(v20.w,v7.w)
+
+#CHECK: 1fc2c75b { v27.b = vpacke(v7.h{{ *}},{{ *}}v2.h) }
+v27.b=vpacke(v7.h,v2.h)
+
+#CHECK: 1fc9c5ed { v13.uh = vpack(v5.w{{ *}},{{ *}}v9.w):sat }
+v13.uh=vpack(v5.w,v9.w):sat
+
+#CHECK: 1ff1d81f { v31.h = vpack(v24.w{{ *}},{{ *}}v17.w):sat }
+v31.h=vpack(v24.w,v17.w):sat
+
+#CHECK: 1fe6c435 { v21.b = vpacko(v4.h{{ *}},{{ *}}v6.h) }
+v21.b=vpacko(v4.h,v6.h)
+
+#CHECK: 1febc140 { v0.h = vpacko(v1.w{{ *}},{{ *}}v11.w) }
+v0.h=vpacko(v1.w,v11.w)
+
+#CHECK: 1e01d256 { v23:22.h = vunpack(v18.b) }
+v23:22.h=vunpack(v18.b)
+
+#CHECK: 1e01cc38 { v25:24.uw = vunpack(v12.uh) }
+v25:24.uw=vunpack(v12.uh)
+
+#CHECK: 1e01c61e { v31:30.uh = vunpack(v6.ub) }
+v31:30.uh=vunpack(v6.ub)
+
+#CHECK: 1e01d778 { v25:24.w = vunpack(v23.h) }
+v25:24.w=vunpack(v23.h)
+
+#CHECK: 1e00c0e0 { v0.b = vdeal(v0.b) }
+v0.b=vdeal(v0.b)
+
+#CHECK: 1e00d5c9 { v9.h = vdeal(v21.h) }
+v9.h=vdeal(v21.h)
+
+#CHECK: 1e02cb1c { v28.b = vshuff(v11.b) }
+v28.b=vshuff(v11.b)
+
+#CHECK: 1e01d8fe { v30.h = vshuff(v24.h) }
+v30.h=vshuff(v24.h)
diff --git a/test/MC/Hexagon/v60-shift.s b/test/MC/Hexagon/v60-shift.s
new file mode 100644
index 000000000000..3d0c334debb9
--- /dev/null
+++ b/test/MC/Hexagon/v60-shift.s
@@ -0,0 +1,39 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 198fd829 { v9.uw = vlsr(v24.uw,{{ *}}r15) }
+v9.uw=vlsr(v24.uw,r15)
+
+#CHECK: 1999d645 { v5.uh = vlsr(v22.uh,{{ *}}r25) }
+v5.uh=vlsr(v22.uh,r25)
+
+#CHECK: 198cc303 { v3.h = vasl(v3.h,{{ *}}r12) }
+v3.h=vasl(v3.h,r12)
+
+#CHECK: 1965d7ac { v12.w = vasr(v23.w,{{ *}}r5) }
+v12.w=vasr(v23.w,r5)
+
+#CHECK: 197dddc3 { v3.h = vasr(v29.h,{{ *}}r29) }
+v3.h=vasr(v29.h,r29)
+
+#CHECK: 197adde8 { v8.w = vasl(v29.w,{{ *}}r26) }
+v8.w=vasl(v29.w,r26)
+
+#CHECK: 1977cc26 { v6 = vror(v12,{{ *}}r23) }
+v6=vror(v12,r23)
+
+#CHECK: 1e02cfad { v13.uw = vcl0(v15.uw) }
+v13.uw=vcl0(v15.uw)
+
+#CHECK: 1e02defb { v27.uh = vcl0(v30.uh) }
+v27.uh=vcl0(v30.uh)
+
+#CHECK: 1e03de90 { v16.w = vnormamt(v30.w) }
+v16.w=vnormamt(v30.w)
+
+#CHECK: 1e03d4a3 { v3.h = vnormamt(v20.h) }
+v3.h=vnormamt(v20.h)
+
+#CHECK: 1e02c2d8 { v24.h = vpopcount(v2.h) }
+v24.h=vpopcount(v2.h)
diff --git a/test/MC/Hexagon/v60-vcmp.s b/test/MC/Hexagon/v60-vcmp.s
new file mode 100644
index 000000000000..c7f4e128be63
--- /dev/null
+++ b/test/MC/Hexagon/v60-vcmp.s
@@ -0,0 +1,84 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1c81f142 { q2 |= vcmp.eq(v17.b{{ *}},{{ *}}v1.b) }
+q2|=vcmp.eq(v17.b,v1.b)
+
+#CHECK: 1c84fb2a { q2 &= vcmp.gt(v27.uw{{ *}},{{ *}}v4.uw) }
+q2&=vcmp.gt(v27.uw,v4.uw)
+
+#CHECK: 1c8cf826 { q2 &= vcmp.gt(v24.uh{{ *}},{{ *}}v12.uh) }
+q2&=vcmp.gt(v24.uh,v12.uh)
+
+#CHECK: 1c80e720 { q0 &= vcmp.gt(v7.ub{{ *}},{{ *}}v0.ub) }
+q0&=vcmp.gt(v7.ub,v0.ub)
+
+#CHECK: 1c9aed1a { q2 &= vcmp.gt(v13.w{{ *}},{{ *}}v26.w) }
+q2&=vcmp.gt(v13.w,v26.w)
+
+#CHECK: 1c8de516 { q2 &= vcmp.gt(v5.h{{ *}},{{ *}}v13.h) }
+q2&=vcmp.gt(v5.h,v13.h)
+
+#CHECK: 1c8dfc11 { q1 &= vcmp.gt(v28.b{{ *}},{{ *}}v13.b) }
+q1&=vcmp.gt(v28.b,v13.b)
+
+#CHECK: 1c94fa0b { q3 &= vcmp.eq(v26.w{{ *}},{{ *}}v20.w) }
+q3&=vcmp.eq(v26.w,v20.w)
+
+#CHECK: 1c83e206 { q2 &= vcmp.eq(v2.h{{ *}},{{ *}}v3.h) }
+q2&=vcmp.eq(v2.h,v3.h)
+
+#CHECK: 1c85e900 { q0 &= vcmp.eq(v9.b{{ *}},{{ *}}v5.b) }
+q0&=vcmp.eq(v9.b,v5.b)
+
+#CHECK: 1c9cfca8 { q0 ^= vcmp.gt(v28.uw{{ *}},{{ *}}v28.uw) }
+q0^=vcmp.gt(v28.uw,v28.uw)
+
+#CHECK: 1c81faa0 { q0 ^= vcmp.gt(v26.ub{{ *}},{{ *}}v1.ub) }
+q0^=vcmp.gt(v26.ub,v1.ub)
+
+#CHECK: 1c96f0a4 { q0 ^= vcmp.gt(v16.uh{{ *}},{{ *}}v22.uh) }
+q0^=vcmp.gt(v16.uh,v22.uh)
+
+#CHECK: 1c9bf795 { q1 ^= vcmp.gt(v23.h{{ *}},{{ *}}v27.h) }
+q1^=vcmp.gt(v23.h,v27.h)
+
+#CHECK: 1c9de698 { q0 ^= vcmp.gt(v6.w{{ *}},{{ *}}v29.w) }
+q0^=vcmp.gt(v6.w,v29.w)
+
+#CHECK: 1c82ef8a { q2 ^= vcmp.eq(v15.w{{ *}},{{ *}}v2.w) }
+q2^=vcmp.eq(v15.w,v2.w)
+
+#CHECK: 1c99e891 { q1 ^= vcmp.gt(v8.b{{ *}},{{ *}}v25.b) }
+q1^=vcmp.gt(v8.b,v25.b)
+
+#CHECK: 1c8afe55 { q1 |= vcmp.gt(v30.h{{ *}},{{ *}}v10.h) }
+q1|=vcmp.gt(v30.h,v10.h)
+
+#CHECK: 1c92ef50 { q0 |= vcmp.gt(v15.b{{ *}},{{ *}}v18.b) }
+q0|=vcmp.gt(v15.b,v18.b)
+
+#CHECK: 1c9ffb4b { q3 |= vcmp.eq(v27.w{{ *}},{{ *}}v31.w) }
+q3|=vcmp.eq(v27.w,v31.w)
+
+#CHECK: 1c87e944 { q0 |= vcmp.eq(v9.h{{ *}},{{ *}}v7.h) }
+q0|=vcmp.eq(v9.h,v7.h)
+
+#CHECK: 1c8ee768 { q0 |= vcmp.gt(v7.uw{{ *}},{{ *}}v14.uw) }
+q0|=vcmp.gt(v7.uw,v14.uw)
+
+#CHECK: 1c92e265 { q1 |= vcmp.gt(v2.uh{{ *}},{{ *}}v18.uh) }
+q1|=vcmp.gt(v2.uh,v18.uh)
+
+#CHECK: 1c80f062 { q2 |= vcmp.gt(v16.ub{{ *}},{{ *}}v0.ub) }
+q2|=vcmp.gt(v16.ub,v0.ub)
+
+#CHECK: 1c91f75a { q2 |= vcmp.gt(v23.w{{ *}},{{ *}}v17.w) }
+q2|=vcmp.gt(v23.w,v17.w)
+
+#CHECK: 1c86fe84 { q0 ^= vcmp.eq(v30.h{{ *}},{{ *}}v6.h) }
+q0^=vcmp.eq(v30.h,v6.h)
+
+#CHECK: 1c86ec82 { q2 ^= vcmp.eq(v12.b{{ *}},{{ *}}v6.b) }
+q2^=vcmp.eq(v12.b,v6.b)
diff --git a/test/MC/Hexagon/v60-vmem.s b/test/MC/Hexagon/v60-vmem.s
new file mode 100644
index 000000000000..fe202251ec4b
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmem.s
@@ -0,0 +1,424 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 292cc11b { vmem(r12++#1) = v27 }
+{
+  vmem(r12++#1)=v27
+}
+
+#CHECK: 294dc319 { v25 = vmem(r13++#3):nt }
+{
+  v25=vmem(r13++#3):nt
+}
+
+#CHECK: 2904c1fb { v27 = vmemu(r4++#1) }
+{
+  v27=vmemu(r4++#1)
+}
+
+#CHECK: 291dc01f { v31 = vmem(r29++#0) }
+{
+  v31=vmem(r29++#0)
+}
+
+#CHECK: 293ec0ff { vmemu(r30++#0) = v31 }
+{
+  vmemu(r30++#0)=v31
+}
+
+#CHECK: 296ec411 { vmem(r14++#-4):nt = v17 }
+{
+  vmem(r14++#-4):nt=v17
+}
+
+#CHECK: 29fec62f { if (!p0) vmem(r30++#-2):nt = v15 }
+{
+  if (!p0) vmem(r30++#-2):nt=v15
+}
+
+#CHECK: 29f9c914 { if (p1) vmem(r25++#1):nt = v20 }
+{
+  if (p1) vmem(r25++#1):nt=v20
+}
+
+#CHECK: 2984de30 { if (!q3) vmem(r4++#-2) = v16 }
+{
+  if (!q3) vmem(r4++#-2)=v16
+}
+
+#CHECK: 2992dd1f { if (q3) vmem(r18++#-3) = v31 }
+{
+  if (q3) vmem(r18++#-3)=v31
+}
+
+#CHECK: 29c9c425 { if (!q0) vmem(r9++#-4):nt = v5 }
+{
+  if (!q0) vmem(r9++#-4):nt=v5
+}
+
+#CHECK: 29d1cf11 { if (q1) vmem(r17++#-1):nt = v17 }
+{
+  if (q1) vmem(r17++#-1):nt=v17
+}
+
+#CHECK: 29a7c328 { if (!p0) vmem(r7++#3) = v8 }
+{
+  if (!p0) vmem(r7++#3)=v8
+}
+
+#CHECK: 29b6cc1d { if (p1) vmem(r22++#-4) = v29 }
+{
+  if (p1) vmem(r22++#-4)=v29
+}
+
+#CHECK: 29abc5fe { if (!p0) vmemu(r11++#-3) = v30 }
+{
+  if (!p0) vmemu(r11++#-3)=v30
+}
+
+#CHECK: 29b8d5c4 { if (p2) vmemu(r24++#-3) = v4 }
+{
+  if (p2) vmemu(r24++#-3)=v4
+}
+
+#CHECK: 2860e407 { vmem(r0+#-4):nt = v7 }
+{
+  vmem(r0+#-4):nt=v7
+}
+
+#CHECK: 2830e2e7 { vmemu(r16+#-6) = v7 }
+{
+  vmemu(r16+#-6)=v7
+}
+
+#CHECK: 2839c316 { vmem(r25+#3) = v22 }
+{
+  vmem(r25+#3)=v22
+}
+#CHECK: 284be316 { v22 = vmem(r11+#-5):nt }
+{
+  v22=vmem(r11+#-5):nt
+}
+
+#CHECK: 280ec1e6 { v6 = vmemu(r14+#1) }
+{
+  v6=vmemu(r14+#1)
+}
+
+#CHECK: 280ae50c { v12 = vmem(r10+#-3) }
+{
+  v12=vmem(r10+#-3)
+}
+
+#CHECK: 2b62e005 { vmem(r2++m1):nt = v5 }
+{
+  vmem(r2++m1):nt=v5
+}
+
+#CHECK: 2b28e0f2 { vmemu(r8++m1) = v18 }
+{
+  vmemu(r8++m1)=v18
+}
+
+#CHECK: 2b42e019 { v25 = vmem(r2++m1):nt }
+{
+  v25=vmem(r2++m1):nt
+}
+
+#CHECK: 2b2ce009 { vmem(r12++m1) = v9 }
+{
+  vmem(r12++m1)=v9
+}
+
+#CHECK: 2b03c005 { v5 = vmem(r3++m0) }
+{
+  v5=vmem(r3++m0)
+}
+
+
+#CHECK: 2b0ec0f5 { v21 = vmemu(r14++m0) }
+{
+  v21=vmemu(r14++m0)
+}
+
+#CHECK: 2be8c022 { if (!p0) vmem(r8++m0):nt = v2 }
+{
+  if (!p0) vmem(r8++m0):nt=v2
+}
+
+#CHECK: 2bebd813 { if (p3) vmem(r11++m0):nt = v19 }
+{
+  if (p3) vmem(r11++m0):nt=v19
+}
+
+#CHECK: 2ba5e0e7 { if (!p0) vmemu(r5++m1) = v7 }
+{
+  if (!p0) vmemu(r5++m1)=v7
+}
+
+#CHECK: 2ba4f0dd { if (p2) vmemu(r4++m1) = v29 }
+{
+  if (p2) vmemu(r4++m1)=v29
+}
+
+#CHECK: 2ba4e828 { if (!p1) vmem(r4++m1) = v8 }
+{
+  if (!p1) vmem(r4++m1)=v8
+}
+
+#CHECK: 2bbae803 { if (p1) vmem(r26++m1) = v3 }
+{
+  if (p1) vmem(r26++m1)=v3
+}
+
+#CHECK: 2bc9c027 { if (!q0) vmem(r9++m0):nt = v7 }
+{
+  if (!q0) vmem(r9++m0):nt=v7
+}
+
+#CHECK: 2bcfc001 { if (q0) vmem(r15++m0):nt = v1 }
+{
+  if (q0) vmem(r15++m0):nt=v1
+}
+
+#CHECK: 2b97f031 { if (!q2) vmem(r23++m1) = v17 }
+{
+  if (!q2) vmem(r23++m1)=v17
+}
+
+#CHECK: 2b8ad809 { if (q3) vmem(r10++m0) = v9 }
+{
+  if (q3) vmem(r10++m0)=v9
+}
+
+#CHECK: 28c7f438 { if (!q2) vmem(r7+#-4):nt = v24 }
+{
+  if (!q2) vmem(r7+#-4):nt=v24
+}
+
+#CHECK: 28d1eb15 { if (q1) vmem(r17+#-5):nt = v21 }
+{
+  if (q1) vmem(r17+#-5):nt=v21
+}
+
+#CHECK: 289cfe2b { if (!q3) vmem(r28+#-2) = v11 }
+{
+  if (!q3) vmem(r28+#-2)=v11
+}
+
+#CHECK: 288eef0f { if (q1) vmem(r14+#-1) = v15 }
+{
+  if (q1) vmem(r14+#-1)=v15
+}
+
+#CHECK: 28a2d1e1 { if (!p2) vmemu(r2+#1) = v1 }
+{
+  if (!p2) vmemu(r2+#1)=v1
+}
+
+#CHECK: 28bcf4db { if (p2) vmemu(r28+#-4) = v27 }
+{
+  if (p2) vmemu(r28+#-4)=v27
+}
+
+#CHECK: 28b2c925 { if (!p1) vmem(r18+#1) = v5 }
+{
+  if (!p1) vmem(r18+#1)=v5
+}
+
+#CHECK: 28afe41a { if (p0) vmem(r15+#-4) = v26 }
+{
+  if (p0) vmem(r15+#-4)=v26
+}
+
+#CHECK: 28f7fd3a { if (!p3) vmem(r23+#-3):nt = v26 }
+{
+  if (!p3) vmem(r23+#-3):nt=v26
+}
+
+#CHECK: 28f5fd10 { if (p3) vmem(r21+#-3):nt = v16 }
+{
+  if (p3) vmem(r21+#-3):nt=v16
+}
+
+#CHECK: 2945c440 v0.tmp = vmem(r5++#-4):nt }
+{
+  v0.tmp=vmem(r5++#-4):nt
+  v26=v0
+}
+
+#CHECK: 2942c338 v24.cur = vmem(r2++#3):nt }
+{
+  v24.cur=vmem(r2++#3):nt
+  v6=v24
+}
+
+#CHECK: 2908c157 v23.tmp = vmem(r8++#1) }
+{
+  v25=v23
+  v23.tmp=vmem(r8++#1)
+}
+
+#CHECK: 2903c72d v13.cur = vmem(r3++#-1) }
+{
+  v13.cur=vmem(r3++#-1)
+  v21=v13
+}
+
+#CHECK: 2855c743 v3.tmp = vmem(r21+#7):nt }
+{
+  v3.tmp=vmem(r21+#7):nt
+  v21=v3
+}
+
+#CHECK: 2856e025 v5.cur = vmem(r22+#-8):nt }
+{
+  v5.cur=vmem(r22+#-8):nt
+  v29=v5
+}
+
+#CHECK: 2802c555 v21.tmp = vmem(r2+#5) }
+{
+  v31=v21
+  v21.tmp=vmem(r2+#5)
+}
+
+#CHECK: 2814e12a v10.cur = vmem(r20+#-7) }
+{
+  v9=v10
+  v10.cur=vmem(r20+#-7)
+}
+
+
+#CHECK: 2b52c02c v12.cur = vmem(r18++m0):nt }
+{
+  v12.cur=vmem(r18++m0):nt
+  v25=v12
+}
+
+#CHECK: 2b4ae043 v3.tmp = vmem(r10++m1):nt }
+{
+  v25=v3
+  v3.tmp=vmem(r10++m1):nt
+}
+
+#CHECK: 2b06c025 v5.cur = vmem(r6++m0) }
+{
+  v5.cur=vmem(r6++m0)
+  v10=v5
+}
+
+#CHECK: 2b17e048 v8.tmp = vmem(r23++m1) }
+{
+  v8.tmp=vmem(r23++m1)
+  v28=v8
+}
+
+#CHECK: 282ee422 vmem(r14+#-4) = v14.new }
+{
+  v14 = v14
+  vmem(r14+#-4)=v14.new
+}
+
+#CHECK: 2866e222 vmem(r6+#-6):nt = v16.new }
+{
+  v16 = v8
+  vmem(r6+#-6):nt=v16.new
+}
+
+#CHECK: 28b1cd42 if(p1) vmem(r17+#5) = v17.new }
+{
+  v17 = v25
+  if(p1)vmem(r17+#5)=v17.new
+}
+
+#CHECK: 28bbeb6a if(!p1) vmem(r27+#-5) = v17.new }
+{
+  v17 = v15
+  if(!p1)vmem(r27+#-5)=v17.new
+}
+
+#CHECK: 28e4d252 if(p2) vmem(r4+#2):nt = v24.new }
+{
+  v24 = v10
+  if(p2)vmem(r4+#2):nt=v24.new
+}
+
+#CHECK: 28f8d17a if(!p2) vmem(r24+#1):nt = v4.new }
+{
+  v4 = v8
+  if(!p2)vmem(r24+#1):nt=v4.new
+}
+
+#CHECK: 2924c322 vmem(r4++#3) = v4.new }
+{
+  v4 = v3
+  vmem(r4++#3)=v4.new
+}
+
+#CHECK: 2961c122 vmem(r1++#1):nt = v7.new }
+{
+  v7 = v8
+  vmem(r1++#1):nt=v7.new
+}
+
+#CHECK: 29a6d042 if(p2) vmem(r6++#0) = v11.new }
+{
+  v11 = v13
+  if(p2)vmem(r6++#0)=v11.new
+}
+
+#CHECK: 29a2cb6a if(!p1) vmem(r2++#3) = v25.new }
+{
+  v25 = v17
+  if(!p1)vmem(r2++#3)=v25.new
+}
+
+#CHECK: 29f5c952 if(p1) vmem(r21++#1):nt = v14.new }
+{
+  v14 = v13
+  if(p1)vmem(r21++#1):nt=v14.new
+}
+
+#CHECK: 29f7cd7a if(!p1) vmem(r23++#-3):nt = v1.new }
+{
+  v1 = v0
+  if(!p1)vmem(r23++#-3):nt=v1.new
+}
+
+#CHECK: 2b3ec022 vmem(r30++m0) = v10.new }
+{
+  v10 = v23
+  vmem(r30++m0)=v10.new
+}
+
+#CHECK: 2b6fc022 vmem(r15++m0):nt = v19.new }
+{
+  v19 = v20
+  vmem(r15++m0):nt=v19.new
+}
+
+#CHECK: 2bb7f042 if(p2) vmem(r23++m1) = v6.new }
+{
+  v6 = v30
+  if(p2)vmem(r23++m1)=v6.new
+}
+
+#CHECK: 2ba2f06a if(!p2) vmem(r2++m1) = v12.new }
+{
+  v12 = v9
+  if(!p2)vmem(r2++m1)=v12.new
+}
+
+#CHECK: 2be7e852 if(p1) vmem(r7++m1):nt = v3.new }
+{
+  v3 = v13
+  if(p1)vmem(r7++m1):nt=v3.new
+}
+
+#CHECK: 2bfdd07a if(!p2) vmem(r29++m0):nt = v29.new }
+{
+  v29 = v9
+  if(!p2)vmem(r29++m0):nt=v29.new
+}
diff --git a/test/MC/Hexagon/v60-vmpy-acc.s b/test/MC/Hexagon/v60-vmpy-acc.s
new file mode 100644
index 000000000000..c39a9252b563
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmpy-acc.s
@@ -0,0 +1,123 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1936ee37 { v23.w += vdmpy(v15:14.h,r22.uh,#1):sat }
+v23.w += vdmpy(v15:14.h,r22.uh,#1):sat
+
+#CHECK: 193bf90f { v15.w += vdmpy(v25.h,r27.uh):sat }
+v15.w += vdmpy(v25.h,r27.uh):sat
+
+#CHECK: 1902fcf0 { v17:16.h += vdmpy(v29:28.ub,r2.b) }
+v17:16.h += vdmpy(v29:28.ub,r2.b)
+
+#CHECK: 190cffd1 { v17.h += vdmpy(v31.ub,r12.b) }
+v17.h += vdmpy(v31.ub,r12.b)
+
+#CHECK: 1900f5ac { v12.w += vrmpy(v21.ub,r0.b) }
+v12.w += vrmpy(v21.ub,r0.b)
+
+#CHECK: 1905fb86 { v6.uw += vrmpy(v27.ub,r5.ub) }
+v6.uw += vrmpy(v27.ub,r5.ub)
+
+#CHECK: 191de570 { v16.w += vdmpy(v5.h,r29.b) }
+v16.w += vdmpy(v5.h,r29.b)
+
+#CHECK: 191de846 { v7:6.w += vtmpy(v9:8.h,r29.b) }
+v7:6.w += vtmpy(v9:8.h,r29.b)
+
+#CHECK: 190bfa22 { v3:2.h += vtmpy(v27:26.ub,r11.b) }
+v3:2.h += vtmpy(v27:26.ub,r11.b)
+
+#CHECK: 1915e408 { v9:8.h += vtmpy(v5:4.b,r21.b) }
+v9:8.h += vtmpy(v5:4.b,r21.b)
+
+#CHECK: 1987f71e { v31:30.uh += vmpy(v23.ub,r7.ub) }
+v31:30.uh += vmpy(v23.ub,r7.ub)
+
+#CHECK: 1969ff47 { v7.w += vasl(v31.w,r9) }
+v7.w += vasl(v31.w,r9)
+
+#CHECK: 196de3b0 { v16.w += vasr(v3.w,r13) }
+v16.w += vasr(v3.w,r13)
+
+#CHECK: 1977fe0a { v11:10.uw += vdsad(v31:30.uh,r23.uh) }
+v11:10.uw += vdsad(v31:30.uh,r23.uh)
+
+#CHECK: 196eee36 { v22.h += vmpyi(v14.h,r14.b) }
+v22.h += vmpyi(v14.h,r14.b)
+
+#CHECK: 1931faac { v13:12.h += vmpy(v26.ub,r17.b) }
+v13:12.h += vmpy(v26.ub,r17.b)
+
+#CHECK: 193cfc94 { v21:20.w += vdmpy(v29:28.h,r28.b) }
+v21:20.w += vdmpy(v29:28.h,r28.b)
+
+#CHECK: 1934fc62 { v2.w += vdmpy(v28.h,r20.h):sat }
+v2.w += vdmpy(v28.h,r20.h):sat
+
+#CHECK: 1925fe5f { v31.w += vdmpy(v31:30.h,r5.h):sat }
+v31.w += vdmpy(v31:30.h,r5.h):sat
+
+#CHECK: 194efe36 { v23:22.uw += vmpy(v30.uh,r14.uh) }
+v23:22.uw += vmpy(v30.uh,r14.uh)
+
+#CHECK: 1948e306 { v7:6.w += vmpy(v3.h,r8.h):sat }
+v7:6.w += vmpy(v3.h,r8.h):sat
+
+#CHECK: 192af2f8 { v25:24.w += vmpa(v19:18.h,r10.b) }
+v25:24.w += vmpa(v19:18.h,r10.b)
+
+#CHECK: 1926e4da { v27:26.h += vmpa(v5:4.ub,r6.b) }
+v27:26.h += vmpa(v5:4.ub,r6.b)
+
+#CHECK: 194ff078 { v24.w += vmpyi(v16.w,r15.h) }
+v24.w += vmpyi(v16.w,r15.h)
+
+#CHECK: 1946e247 { v7.w += vmpyi(v2.w,r6.b) }
+v7.w += vmpyi(v2.w,r6.b)
+
+#CHECK: 1c3fead5 { v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift }
+v21.w += vmpyo(v10.w,v31.h):<<1:sat:shift
+
+#CHECK: 1c30e1fa { v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift }
+v26.w += vmpyo(v1.w,v16.h):<<1:rnd:sat:shift
+
+#CHECK: 1c34f690 { v16.h += vmpyi(v22.h,v20.h) }
+v16.h += vmpyi(v22.h,v20.h)
+
+#CHECK: 1c34f4b5 { v21.w += vmpyie(v20.w,v20.uh) }
+v21.w += vmpyie(v20.w,v20.uh)
+
+#CHECK: 1c54f804 { v4.w += vmpyie(v24.w,v20.h) }
+v4.w += vmpyie(v24.w,v20.h)
+
+#CHECK: 1c1ff6f4 { v21:20.w += vmpy(v22.h,v31.h) }
+v21:20.w += vmpy(v22.h,v31.h)
+
+#CHECK: 1c31f026 { v7:6.w += vmpy(v16.h,v17.uh) }
+v7:6.w += vmpy(v16.h,v17.uh)
+
+#CHECK: 1c12fb98 { v25:24.h += vmpy(v27.b,v18.b) }
+v25:24.h += vmpy(v27.b,v18.b)
+
+#CHECK: 1c17fcc0 { v1:0.h += vmpy(v28.ub,v23.b) }
+v1:0.h += vmpy(v28.ub,v23.b)
+
+#CHECK: 1c16f26f { v15.w += vdmpy(v18.h,v22.h):sat }
+v15.w += vdmpy(v18.h,v22.h):sat
+
+#CHECK: 1c0bea3a { v26.w += vrmpy(v10.b,v11.b) }
+v26.w += vrmpy(v10.b,v11.b)
+
+#CHECK: 1c15eb47 { v7.w += vrmpy(v11.ub,v21.b) }
+v7.w += vrmpy(v11.ub,v21.b)
+
+#CHECK: 1c26e40e { v15:14.uw += vmpy(v4.uh,v6.uh) }
+v15:14.uw += vmpy(v4.uh,v6.uh)
+
+#CHECK: 1c0df9a8 { v9:8.uh += vmpy(v25.ub,v13.ub) }
+v9:8.uh += vmpy(v25.ub,v13.ub)
+
+#CHECK: 1c0afc15 { v21.uw += vrmpy(v28.ub,v10.ub) }
+v21.uw += vrmpy(v28.ub,v10.ub)
diff --git a/test/MC/Hexagon/v60-vmpy1.s b/test/MC/Hexagon/v60-vmpy1.s
new file mode 100644
index 000000000000..1f36a5e95ddb
--- /dev/null
+++ b/test/MC/Hexagon/v60-vmpy1.s
@@ -0,0 +1,138 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+#CHECK: 1939c223 { v3.w = vdmpy(v3:2.h,{{ *}}r25.uh,{{ *}}#1):sat }
+v3.w=vdmpy(v3:2.h,r25.uh,#1):sat
+
+#CHECK: 1936de0d { v13.w = vdmpy(v30.h,{{ *}}r22.uh):sat }
+v13.w=vdmpy(v30.h,r22.uh):sat
+
+#CHECK: 1919ccea { v11:10.h = vdmpy(v13:12.ub,{{ *}}r25.b) }
+v11:10.h=vdmpy(v13:12.ub,r25.b)
+
+#CHECK: 1918ced6 { v22.h = vdmpy(v14.ub,{{ *}}r24.b) }
+v22.h=vdmpy(v14.ub,r24.b)
+
+#CHECK: 1911deba { v27:26.uw = vdsad(v31:30.uh,{{ *}}r17.uh) }
+v27:26.uw=vdsad(v31:30.uh,r17.uh)
+
+#CHECK: 1908da97 { v23.w = vrmpy(v26.ub,{{ *}}r8.b) }
+v23.w=vrmpy(v26.ub,r8.b)
+
+#CHECK: 1915c974 { v20.uw = vrmpy(v9.ub,{{ *}}r21.ub) }
+v20.uw=vrmpy(v9.ub,r21.ub)
+
+#CHECK: 190dd446 { v6.w = vdmpy(v20.h,{{ *}}r13.b) }
+v6.w=vdmpy(v20.h,r13.b)
+
+#CHECK: 190ec030 { v17:16.h = vtmpy(v1:0.ub,{{ *}}r14.b) }
+v17:16.h=vtmpy(v1:0.ub,r14.b)
+
+#CHECK: 1918de1c { v29:28.h = vtmpy(v31:30.b,{{ *}}r24.b) }
+v29:28.h=vtmpy(v31:30.b,r24.b)
+
+#CHECK: 198dddf1 { v17.w = vmpyi(v29.w,{{ *}}r13.h) }
+v17.w=vmpyi(v29.w,r13.h)
+
+#CHECK: 19bccb13 { v19.w = vmpyi(v11.w,{{ *}}r28.b) }
+v19.w=vmpyi(v11.w,r28.b)
+
+#CHECK: 19c8cb0a { v11:10.uh = vmpy(v11.ub,{{ *}}r8.ub) }
+v11:10.uh=vmpy(v11.ub,r8.ub)
+
+#CHECK: 1973d012 { v18.h = vmpyi(v16.h,{{ *}}r19.b) }
+v18.h=vmpyi(v16.h,r19.b)
+
+#CHECK: 1922d1aa { v11:10.h = vmpy(v17.ub,{{ *}}r2.b) }
+v11:10.h=vmpy(v17.ub,r2.b)
+
+#CHECK: 1936ce9c { v29:28.w = vdmpy(v15:14.h,{{ *}}r22.b) }
+v29:28.w=vdmpy(v15:14.h,r22.b)
+
+#CHECK: 1925d86b { v11.w = vdmpy(v25:24.h,{{ *}}r5.h):sat }
+v11.w=vdmpy(v25:24.h,r5.h):sat
+
+#CHECK: 1925c255 { v21.w = vdmpy(v2.h,{{ *}}r5.h):sat }
+v21.w=vdmpy(v2.h,r5.h):sat
+
+#CHECK: 1941d424 { v4.h = vmpy(v20.h,{{ *}}r1.h):<<1:sat }
+v4.h=vmpy(v20.h,r1.h):<<1:sat
+
+#CHECK: 1943cf0a { v11:10.w = vmpy(v15.h,{{ *}}r3.h) }
+v11:10.w=vmpy(v15.h,r3.h)
+
+#CHECK: 193ec2f0 { v17:16.w = vmpa(v3:2.h,{{ *}}r30.b) }
+v17:16.w=vmpa(v3:2.h,r30.b)
+
+#CHECK: 193ddcde { v31:30.h = vmpa(v29:28.ub,{{ *}}r29.b) }
+v31:30.h=vmpa(v29:28.ub,r29.b)
+
+#CHECK: 1946de76 { v23:22.uw = vmpy(v30.uh,{{ *}}r6.uh) }
+v23:22.uw=vmpy(v30.uh,r6.uh)
+
+#CHECK: 1945c945 { v5.h = vmpy(v9.h,{{ *}}r5.h):<<1:rnd:sat }
+v5.h=vmpy(v9.h,r5.h):<<1:rnd:sat
+
+#CHECK: 19b0c280 { v1:0.w = vtmpy(v3:2.h,{{ *}}r16.b) }
+v1:0.w=vtmpy(v3:2.h,r16.b)
+
+#CHECK: 1c34d937 { v23.h = vmpy(v25.h,{{ *}}v20.h):<<1:rnd:sat }
+v23.h=vmpy(v25.h,v20.h):<<1:rnd:sat
+
+#CHECK: 1c36c90a { v11:10.uw = vmpy(v9.uh,{{ *}}v22.uh) }
+v11:10.uw=vmpy(v9.uh,v22.uh)
+
+#CHECK: 1c09c3ec { v13:12.w = vmpy(v3.h,{{ *}}v9.h) }
+v13:12.w=vmpy(v3.h,v9.h)
+
+#CHECK: 1c0dd1d8 { v25:24.h = vmpy(v17.ub,{{ *}}v13.b) }
+v25:24.h=vmpy(v17.ub,v13.b)
+
+#CHECK: 1c0dc0a4 { v5:4.uh = vmpy(v0.ub,{{ *}}v13.ub) }
+v5:4.uh=vmpy(v0.ub,v13.ub)
+
+#CHECK: 1c14df84 { v5:4.h = vmpy(v31.b,{{ *}}v20.b) }
+v5:4.h=vmpy(v31.b,v20.b)
+
+#CHECK: 1c16d77c { v28.w = vdmpy(v23.h,{{ *}}v22.h):sat }
+v28.w=vdmpy(v23.h,v22.h):sat
+
+#CHECK: 1c08d84f { v15.w = vrmpy(v24.ub,{{ *}}v8.b) }
+v15.w=vrmpy(v24.ub,v8.b)
+
+#CHECK: 1c06da29 { v9.w = vrmpy(v26.b,{{ *}}v6.b) }
+v9.w=vrmpy(v26.b,v6.b)
+
+#CHECK: 1c1ac805 { v5.uw = vrmpy(v8.ub,{{ *}}v26.ub) }
+v5.uw=vrmpy(v8.ub,v26.ub)
+
+#CHECK: 1c39d089 { v9.h = vmpyi(v16.h,{{ *}}v25.h) }
+v9.h=vmpyi(v16.h,v25.h)
+
+#CHECK: 1c3ecc64 { v5:4.h = vmpa(v13:12.ub,{{ *}}v31:30.b) }
+v5:4.h=vmpa(v13:12.ub,v31:30.b)
+
+#CHECK: 1c21ce54 { v21:20.w = vmpy(v14.h,{{ *}}v1.uh) }
+v21:20.w=vmpy(v14.h,v1.uh)
+
+#CHECK: 1cf2c6f0 { v17:16.h = vmpa(v7:6.ub,{{ *}}v19:18.ub) }
+v17:16.h=vmpa(v7:6.ub,v19:18.ub)
+
+#CHECK: 1fcdc82b { v11.w = vmpyio(v8.w{{ *}},{{ *}}v13.h) }
+v11.w=vmpyio(v8.w,v13.h)
+
+#CHECK: 1fdeda10 { v16.w = vmpyie(v26.w{{ *}},{{ *}}v30.uh) }
+v16.w=vmpyie(v26.w,v30.uh)
+
+#CHECK: 1ff2c2a6 { v6.w = vmpye(v2.w{{ *}},{{ *}}v18.uh) }
+v6.w=vmpye(v2.w,v18.uh)
+
+#CHECK: 1ff7cbfa { v26.w = vmpyo(v11.w{{ *}},{{ *}}v23.h):<<1:sat }
+v26.w=vmpyo(v11.w,v23.h):<<1:sat
+
+#CHECK: 1f5cd411 { v17.w = vmpyo(v20.w{{ *}},{{ *}}v28.h):<<1:rnd:sat }
+v17.w=vmpyo(v20.w,v28.h):<<1:rnd:sat
+
+#CHECK: 1f71cf1d { v29.w = vmpyieo(v15.h{{ *}},{{ *}}v17.h) }
+v29.w=vmpyieo(v15.h,v17.h)
diff --git a/test/MC/Hexagon/v60lookup.s b/test/MC/Hexagon/v60lookup.s
new file mode 100644
index 000000000000..b92a2d3c6eb1
--- /dev/null
+++ b/test/MC/Hexagon/v60lookup.s
@@ -0,0 +1,14 @@
+#RUN: llvm-mc -triple=hexagon -mcpu=hexagonv60 -filetype=obj %s | \
+#RUN: llvm-objdump -triple=hexagon -mcpu=hexagonv60 -d - | \
+#RUN: FileCheck %s
+
+                    V31.b = vlut32(V29.b, V15.b, R1)
+# CHECK: 1b79fd3f { v31.b = vlut32(v29.b,v15.b,r1) }
+                    V31.b |= vlut32(V29.b, V15.b, R2)
+# CHECK: 1b7afdbf { v31.b |= vlut32(v29.b,v15.b,r2) }
+                    V31:30.h = vlut16(V29.b, V15.h, R3)
+# CHECK: 1b7bfdde { v31:30.h = vlut16(v29.b,v15.h,r3) }
+                    v31:30.h |= vlut16(v2.b, v9.h, r4)
+# CHECK: 1b4ce2fe { v31:30.h |= vlut16(v2.b,v9.h,r4) }
+                    v31.w = vinsert(r4)
+# CHECK: 19a4e03f { v31.w = vinsert(r4) }
diff --git a/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
index d98c257c8586..a10aef3a5f8c 100644
--- a/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
+++ b/test/MC/MachO/AArch64/darwin-ARM64-local-label-diff.s
@@ -1,4 +1,4 @@
-; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - < %s | macho-dump -dump-section-data | FileCheck %s
+; RUN: llvm-mc -triple arm64-apple-darwin -filetype=obj -o - < %s | llvm-readobj -s -sd | FileCheck %s
 ; rdar://13028719
 
  .globl context_save0
@@ -18,4 +18,11 @@ Lcontext_save1_size: .quad (Lcontext_save1_end - Lcontext_save1)
 Llockup_release:
  .quad 0
 
-; CHECK:  ('_section_data', '05000000 00000000 05000000 00000000 10000000 00000000 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 1f2003d5 00000000 00000000 00000000 00000000 10000000 00000000 00000000 00000000')
+; CHECK: SectionData (
+; CHECK:   0000: 05000000 00000000 05000000 00000000  |................|
+; CHECK:   0010: 10000000 00000000 1F2003D5 1F2003D5  |......... ... ..|
+; CHECK:   0020: 1F2003D5 1F2003D5 1F2003D5 1F2003D5  |. ... ... ... ..|
+; CHECK:   0030: 1F2003D5 1F2003D5 1F2003D5 1F2003D5  |. ... ... ... ..|
+; CHECK:   0040: 00000000 00000000 00000000 00000000  |................|
+; CHECK:   0050: 10000000 00000000 00000000 00000000  |................|
+; CHECK: )
diff --git a/test/MC/MachO/AArch64/reloc-errors.s b/test/MC/MachO/AArch64/reloc-errors.s
new file mode 100644
index 000000000000..c29416d62cf3
--- /dev/null
+++ b/test/MC/MachO/AArch64/reloc-errors.s
@@ -0,0 +1,10 @@
+; RUN: not llvm-mc -triple aarch64-none-macho %s -filetype=obj -o - 2>&1 | FileCheck %s
+
+; CHECK: error: conditional branch requires assembler-local label. 'external' is external.
+  b.eq external
+
+; CHECK: error: Invalid relocation on conditional branch
+  tbz w0, #4, external
+
+; CHECK: error: unknown AArch64 fixup kind!
+  adr x0, external
diff --git a/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s b/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
index 7ad91df3ce0d..ae4bc225dc5d 100644
--- a/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/bad-darwin-ARM-reloc.s
@@ -7,3 +7,9 @@
 L___fcommon: 
     .word 0
 @ CHECK-ERROR: unsupported relocation on symbol
+
+c:
+  .word a - b
+@ CHECK-ERROR: symbol 'a' can not be undefined in a subtraction expression
+  .word c - b
+@ CHECK-ERROR: symbol 'b' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/ARM/compact-unwind-armv7k.s b/test/MC/MachO/ARM/compact-unwind-armv7k.s
new file mode 100644
index 000000000000..6e8a855cafca
--- /dev/null
+++ b/test/MC/MachO/ARM/compact-unwind-armv7k.s
@@ -0,0 +1,124 @@
+@ RUN: llvm-mc -triple=thumbv7k-apple-watchos2.0.0 -filetype=obj -o %t < %s && llvm-objdump -unwind-info %t | FileCheck %s
+
+@ CHECK: Contents of __compact_unwind section:
+
+        .syntax unified
+        .align        2
+        .code        16
+
+@ CHECK-LABEL: start: {{.*}} _test_r4_r5_r6
+@ CHECK: compact encoding: 0x01000007
+        .thumb_func        _test_r4_r5_r6
+_test_r4_r5_r6:
+        .cfi_startproc
+        push        {r4, r5, r6, r7, lr}
+        add        r7, sp, #12
+        sub        sp, #16
+        .cfi_def_cfa r7, 8
+        .cfi_offset lr, -4
+        .cfi_offset r7, -8
+        .cfi_offset r6, -12
+        .cfi_offset r5, -16
+        .cfi_offset r4, -20
+        .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_r4_r5_r10_r11
+@ CHECK: compact encoding: 0x01000063
+        .thumb_func        _test_r4_r5_r10_r11
+_test_r4_r5_r10_r11:
+        .cfi_startproc
+        push        {r4, r5, r7, lr}
+        add        r7, sp, #8
+        .cfi_def_cfa r7, 8
+        .cfi_offset lr, -4
+        .cfi_offset r7, -8
+        .cfi_offset r5, -12
+        .cfi_offset r4, -16
+        push.w        {r10, r11}
+        .cfi_offset r11, -20
+        .cfi_offset r10, -24
+        .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_d8
+@ CHECK: compact encoding: 0x02000000
+        .thumb_func        _test_d8
+_test_d8:
+        .cfi_startproc
+        push        {r7, lr}
+        mov        r7, sp
+        .cfi_def_cfa r7, 8
+        .cfi_offset lr, -4
+        .cfi_offset r7, -8
+        vpush        {d8}
+        .cfi_offset d8, -16
+        .cfi_endproc
+
+
+@ CHECK-LABEL: start: {{.*}} _test_d8_d10_d12_d14
+@ CHECK: compact encoding: 0x02000300
+        .thumb_func        _test_d8_d10_d12_d14
+_test_d8_d10_d12_d14:
+        .cfi_startproc
+        push        {r7, lr}
+        mov        r7, sp
+        .cfi_def_cfa r7, 8
+        .cfi_offset lr, -4
+        .cfi_offset r7, -8
+        vpush        {d14}
+        vpush        {d12}
+        vpush        {d10}
+        vpush        {d8}
+        .cfi_offset d14, -16
+        .cfi_offset d12, -24
+        .cfi_offset d10, -32
+        .cfi_offset d8, -40
+        .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_varargs
+@ CHECK: compact encoding: 0x01c00001
+        .thumb_func        _test_varargs
+_test_varargs:
+        .cfi_startproc
+        sub        sp, #12
+        push        {r4, r7, lr}
+        add        r7, sp, #4
+        .cfi_def_cfa r7, 20
+        .cfi_offset lr, -16
+        .cfi_offset r7, -20
+        .cfi_offset r4, -24
+        add.w        r9, r7, #8
+        mov        r4, r0
+        stm.w        r9, {r1, r2, r3}
+        .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_missing_lr
+@ CHECK: compact encoding: 0x04000000
+        .thumb_func _test_missing_lr
+_test_missing_lr:
+        .cfi_startproc
+        push {r7}
+        .cfi_def_cfa r7, 4
+        .cfi_offset r7, -4
+        pop {r7}
+        bx lr
+        .cfi_endproc
+
+@ CHECK-LABEL: start: {{.*}} _test_swapped_offsets
+@ CHECK: compact encoding: 0x04000000
+        .thumb_func _test_swapped_offsets
+_test_swapped_offsets:
+        .cfi_startproc
+        push {r7, lr}
+        push {r10}
+        push {r4}
+        .cfi_def_cfa r7, 8
+        .cfi_offset lr, -4
+        .cfi_offset r7, -8
+        .cfi_offset r10, -12
+        .cfi_offset r4, -16
+        pop {r4}
+        pop {r10}
+        pop {r7, pc}
+        .cfi_endproc
diff --git a/test/MC/MachO/ARM/darwin-ARM-reloc.s b/test/MC/MachO/ARM/darwin-ARM-reloc.s
index 374f8804a52c..9843ec301545 100644
--- a/test/MC/MachO/ARM/darwin-ARM-reloc.s
+++ b/test/MC/MachO/ARM/darwin-ARM-reloc.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.obj > %t.dump
 @ RUN: FileCheck < %t.dump %s
 
 	.syntax unified
@@ -21,153 +21,166 @@ Lsc0_0:
 
         .subsections_via_symbols
 
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 364)
-@ CHECK: ('flag', 8192)
-@ CHECK: ('load_commands', [
-@ CHECK:   # Load Command 0
-@ CHECK:  (('command', 1)
-@ CHECK:   ('size', 260)
-@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:   ('vm_addr', 0)
-@ CHECK:   ('vm_size', 16)
-@ CHECK:   ('file_offset', 392)
-@ CHECK:   ('file_size', 16)
-@ CHECK:   ('maxprot', 7)
-@ CHECK:   ('initprot', 7)
-@ CHECK:   ('num_sections', 3)
-@ CHECK:   ('flags', 0)
-@ CHECK:   ('sections', [
-@ CHECK:     # Section 0
-@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 0)
-@ CHECK:     ('size', 8)
-@ CHECK:     ('offset', 392)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 408)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x80000400)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0x4),
-@ CHECK:      ('word-1', 0x55000001)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0x0),
-@ CHECK:      ('word-1', 0x5d000003)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
-@ CHECK:     # Section 1
-@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 8)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 400)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 424)
-@ CHECK:     ('num_reloc', 2)
-@ CHECK:     ('flags', 0x0)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0xa2000000),
-@ CHECK:      ('word-1', 0xc)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0xa1000000),
-@ CHECK:      ('word-1', 0x8)),
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '04000000')
-@ CHECK:     # Section 2
-@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 12)
-@ CHECK:     ('size', 4)
-@ CHECK:     ('offset', 404)
-@ CHECK:     ('alignment', 0)
-@ CHECK:     ('reloc_offset', 0)
-@ CHECK:     ('num_reloc', 0)
-@ CHECK:     ('flags', 0x2)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '00000000')
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 1
-@ CHECK:  (('command', 2)
-@ CHECK:   ('size', 24)
-@ CHECK:   ('symoff', 440)
-@ CHECK:   ('nsyms', 4)
-@ CHECK:   ('stroff', 488)
-@ CHECK:   ('strsize', 24)
-@ CHECK:   ('_string_data', '\x00_printf\x00_f1\x00_f0\x00_d0\x00\x00\x00\x00')
-@ CHECK:   ('_symbols', [
-@ CHECK:     # Symbol 0
-@ CHECK:    (('n_strx', 13)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_f0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 1
-@ CHECK:    (('n_strx', 9)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 4)
-@ CHECK:     ('_string', '_f1')
-@ CHECK:    ),
-@ CHECK:     # Symbol 2
-@ CHECK:    (('n_strx', 17)
-@ CHECK:     ('n_type', 0xe)
-@ CHECK:     ('n_sect', 2)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 8)
-@ CHECK:     ('_string', '_d0')
-@ CHECK:    ),
-@ CHECK:     # Symbol 3
-@ CHECK:    (('n_strx', 1)
-@ CHECK:     ('n_type', 0x1)
-@ CHECK:     ('n_sect', 0)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_printf')
-@ CHECK:    ),
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 2
-@ CHECK:  (('command', 11)
-@ CHECK:   ('size', 80)
-@ CHECK:   ('ilocalsym', 0)
-@ CHECK:   ('nlocalsym', 3)
-@ CHECK:   ('iextdefsym', 3)
-@ CHECK:   ('nextdefsym', 0)
-@ CHECK:   ('iundefsym', 3)
-@ CHECK:   ('nundefsym', 1)
-@ CHECK:   ('tocoff', 0)
-@ CHECK:   ('ntoc', 0)
-@ CHECK:   ('modtaboff', 0)
-@ CHECK:   ('nmodtab', 0)
-@ CHECK:   ('extrefsymoff', 0)
-@ CHECK:   ('nextrefsyms', 0)
-@ CHECK:   ('indirectsymoff', 0)
-@ CHECK:   ('nindirectsyms', 0)
-@ CHECK:   ('extreloff', 0)
-@ CHECK:   ('nextrel', 0)
-@ CHECK:   ('locreloff', 0)
-@ CHECK:   ('nlocrel', 0)
-@ CHECK:   ('_indirect_symbols', [
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK: ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: MachHeader {
+@ CHECK:   Magic: Magic (0xFEEDFACE)
+@ CHECK:   CpuType: Arm (0xC)
+@ CHECK:   CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+@ CHECK:   FileType: Relocatable (0x1)
+@ CHECK:   NumOfLoadCommands: 4
+@ CHECK:   SizeOfLoadCommands: 380
+@ CHECK:   Flags [ (0x2000)
+@ CHECK:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+@ CHECK:   ]
+@ CHECK: }
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Index: 0
+@ CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Address: 0x0
+@ CHECK:     Size: 0x8
+@ CHECK:     Offset: 408
+@ CHECK:     Alignment: 0
+@ CHECK:     RelocationOffset: 0x1A8
+@ CHECK:     RelocationCount: 2
+@ CHECK:     Type: 0x0
+@ CHECK:     Attributes [ (0x800004)
+@ CHECK:       PureInstructions (0x800000)
+@ CHECK:       SomeInstructions (0x4)
+@ CHECK:     ]
+@ CHECK:     Reserved1: 0x0
+@ CHECK:     Reserved2: 0x0
+@ CHECK:     SectionData (
+@ CHECK:       0000: FEFFFFEB FDFFFFEB                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:   Section {
+@ CHECK:     Index: 1
+@ CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Address: 0x8
+@ CHECK:     Size: 0x4
+@ CHECK:     Offset: 416
+@ CHECK:     Alignment: 0
+@ CHECK:     RelocationOffset: 0x1B8
+@ CHECK:     RelocationCount: 2
+@ CHECK:     Type: 0x0
+@ CHECK:     Attributes [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Reserved1: 0x0
+@ CHECK:     Reserved2: 0x0
+@ CHECK:     SectionData (
+@ CHECK:       0000: 04000000                             |....|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:   Section {
+@ CHECK:     Index: 2
+@ CHECK:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+@ CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Address: 0xC
+@ CHECK:     Size: 0x4
+@ CHECK:     Offset: 420
+@ CHECK:     Alignment: 0
+@ CHECK:     RelocationOffset: 0x0
+@ CHECK:     RelocationCount: 0
+@ CHECK:     Type: ExtReloc (0x2)
+@ CHECK:     Attributes [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Reserved1: 0x0
+@ CHECK:     Reserved2: 0x0
+@ CHECK:     SectionData (
+@ CHECK:       0000: 00000000                             |....|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Relocations [
+@ CHECK:   Section __text {
+@ CHECK:     0x4 1 2 0 ARM_RELOC_BR24 0 __text
+@ CHECK:     0x0 1 2 1 ARM_RELOC_BR24 0 _printf
+@ CHECK:   }
+@ CHECK:   Section __data {
+@ CHECK:     0x0 0 2 n/a ARM_RELOC_SECTDIFF 1 0xC
+@ CHECK:     0x0 0 2 n/a ARM_RELOC_PAIR 1 0x8
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: _f0 (13)
+@ CHECK:     Type: Section (0xE)
+@ CHECK:     Section: __text (0x1)
+@ CHECK:     RefType: UndefinedNonLazy (0x0)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x0
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: _f1 (9)
+@ CHECK:     Type: Section (0xE)
+@ CHECK:     Section: __text (0x1)
+@ CHECK:     RefType: UndefinedNonLazy (0x0)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x4
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: _d0 (17)
+@ CHECK:     Type: Section (0xE)
+@ CHECK:     Section: __data (0x2)
+@ CHECK:     RefType: UndefinedNonLazy (0x0)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x8
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: _printf (1)
+@ CHECK:     Extern
+@ CHECK:     Type: Undef (0x0)
+@ CHECK:     Section:  (0x0)
+@ CHECK:     RefType: UndefinedNonLazy (0x0)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x0
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Indirect Symbols {
+@ CHECK:   Number: 0
+@ CHECK:   Symbols [
+@ CHECK:   ]
+@ CHECK: }
+@ CHECK: Segment {
+@ CHECK:   Cmd: LC_SEGMENT
+@ CHECK:   Name: 
+@ CHECK:   Size: 260
+@ CHECK:   vmaddr: 0x0
+@ CHECK:   vmsize: 0x10
+@ CHECK:   fileoff: 408
+@ CHECK:   filesize: 16
+@ CHECK:   maxprot: rwx
+@ CHECK:   initprot: rwx
+@ CHECK:   nsects: 3
+@ CHECK:   flags: 0x0
+@ CHECK: }
+@ CHECK: Dysymtab {
+@ CHECK:   ilocalsym: 0
+@ CHECK:   nlocalsym: 3
+@ CHECK:   iextdefsym: 3
+@ CHECK:   nextdefsym: 0
+@ CHECK:   iundefsym: 3
+@ CHECK:   nundefsym: 1
+@ CHECK:   tocoff: 0
+@ CHECK:   ntoc: 0
+@ CHECK:   modtaboff: 0
+@ CHECK:   nmodtab: 0
+@ CHECK:   extrefsymoff: 0
+@ CHECK:   nextrefsyms: 0
+@ CHECK:   indirectsymoff: 0
+@ CHECK:   nindirectsyms: 0
+@ CHECK:   extreloff: 0
+@ CHECK:   nextrel: 0
+@ CHECK:   locreloff: 0
+@ CHECK:   nlocrel: 0
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/darwin-Thumb-reloc.s b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
index 567573d9ef19..7c85e0d3fe8e 100644
--- a/test/MC/MachO/ARM/darwin-Thumb-reloc.s
+++ b/test/MC/MachO/ARM/darwin-Thumb-reloc.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.obj > %t.dump
 @ RUN: FileCheck < %t.dump %s
 
 	.syntax unified
@@ -22,118 +22,127 @@ L_.str:
 
 .subsections_via_symbols
 
-@ CHECK: ('cputype', 12)
-@ CHECK: ('cpusubtype', 9)
-@ CHECK: ('filetype', 1)
-@ CHECK: ('num_load_commands', 3)
-@ CHECK: ('load_commands_size', 296)
-@ CHECK: ('flag', 8192)
-@ CHECK: ('load_commands', [
-@ CHECK:   # Load Command 0
-@ CHECK:  (('command', 1)
-@ CHECK:   ('size', 192)
-@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:   ('vm_addr', 0)
-@ CHECK:   ('vm_size', 11)
-@ CHECK:   ('file_offset', 324)
-@ CHECK:   ('file_size', 11)
-@ CHECK:   ('maxprot', 7)
-@ CHECK:   ('initprot', 7)
-@ CHECK:   ('num_sections', 2)
-@ CHECK:   ('flags', 0)
-@ CHECK:   ('sections', [
-@ CHECK:     # Section 0
-@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 0)
-@ CHECK:     ('size', 8)
-@ CHECK:     ('offset', 324)
-@ CHECK:     ('alignment', 2)
-@ CHECK:     ('reloc_offset', 336)
-@ CHECK:     ('num_reloc', 3)
-@ CHECK:     ('flags', 0x80000400)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:     # Relocation 0
-@ CHECK:     (('word-0', 0xa2000004),
-@ CHECK:      ('word-1', 0x8)),
-@ CHECK:     # Relocation 1
-@ CHECK:     (('word-0', 0xa1000000),
-@ CHECK:      ('word-1', 0x0)),
-@ CHECK:     # Relocation 2
-@ CHECK:     (('word-0', 0x0),
-@ CHECK:      ('word-1', 0x6d000001)),
-@ CHECK:   ])
-@ CHECK-FIXME:   ('_section_data', 'fff7feef 04000000')
-@ CHECK:     # Section 1
-@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-@ CHECK:     ('address', 8)
-@ CHECK:     ('size', 3)
-@ CHECK:     ('offset', 332)
-@ CHECK:     ('alignment', 2)
-@ CHECK:     ('reloc_offset', 0)
-@ CHECK:     ('num_reloc', 0)
-@ CHECK:     ('flags', 0x2)
-@ CHECK:     ('reserved1', 0)
-@ CHECK:     ('reserved2', 0)
-@ CHECK:    ),
-@ CHECK:   ('_relocations', [
-@ CHECK:   ])
-@ CHECK:   ('_section_data', '733000')
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 1
-@ CHECK:  (('command', 2)
-@ CHECK:   ('size', 24)
-@ CHECK:   ('symoff', 360)
-@ CHECK:   ('nsyms', 2)
-@ CHECK:   ('stroff', 384)
-@ CHECK:   ('strsize', 16)
-@ CHECK:   ('_string_data', '\x00_main\x00_printf\x00\x00')
-@ CHECK:   ('_symbols', [
-@ CHECK:     # Symbol 0
-@ CHECK:    (('n_strx', 1)
-@ CHECK:     ('n_type', 0xf)
-@ CHECK:     ('n_sect', 1)
-@ CHECK:     ('n_desc', 8)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_main')
-@ CHECK:    ),
-@ CHECK:     # Symbol 1
-@ CHECK:    (('n_strx', 7)
-@ CHECK:     ('n_type', 0x1)
-@ CHECK:     ('n_sect', 0)
-@ CHECK:     ('n_desc', 0)
-@ CHECK:     ('n_value', 0)
-@ CHECK:     ('_string', '_printf')
-@ CHECK:    ),
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK:   # Load Command 2
-@ CHECK:  (('command', 11)
-@ CHECK:   ('size', 80)
-@ CHECK:   ('ilocalsym', 0)
-@ CHECK:   ('nlocalsym', 0)
-@ CHECK:   ('iextdefsym', 0)
-@ CHECK:   ('nextdefsym', 1)
-@ CHECK:   ('iundefsym', 1)
-@ CHECK:   ('nundefsym', 1)
-@ CHECK:   ('tocoff', 0)
-@ CHECK:   ('ntoc', 0)
-@ CHECK:   ('modtaboff', 0)
-@ CHECK:   ('nmodtab', 0)
-@ CHECK:   ('extrefsymoff', 0)
-@ CHECK:   ('nextrefsyms', 0)
-@ CHECK:   ('indirectsymoff', 0)
-@ CHECK:   ('nindirectsyms', 0)
-@ CHECK:   ('extreloff', 0)
-@ CHECK:   ('nextrel', 0)
-@ CHECK:   ('locreloff', 0)
-@ CHECK:   ('nlocrel', 0)
-@ CHECK:   ('_indirect_symbols', [
-@ CHECK:   ])
-@ CHECK:  ),
-@ CHECK: ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: MachHeader {
+@ CHECK:   Magic: Magic (0xFEEDFACE)
+@ CHECK:   CpuType: Arm (0xC)
+@ CHECK:   CpuSubType: CPU_SUBTYPE_ARM_V7 (0x9)
+@ CHECK:   FileType: Relocatable (0x1)
+@ CHECK:   NumOfLoadCommands: 4
+@ CHECK:   SizeOfLoadCommands: 312
+@ CHECK:   Flags [ (0x2000)
+@ CHECK:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+@ CHECK:   ]
+@ CHECK: }
+@ CHECK: Sections [
+@ CHECK:   Section {
+@ CHECK:     Index: 0
+@ CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Address: 0x0
+@ CHECK:     Size: 0x8
+@ CHECK:     Offset: 340
+@ CHECK:     Alignment: 2
+@ CHECK:     RelocationOffset: 0x160
+@ CHECK:     RelocationCount: 3
+@ CHECK:     Type: 0x0
+@ CHECK:     Attributes [ (0x800004)
+@ CHECK:       PureInstructions (0x800000)
+@ CHECK:       SomeInstructions (0x4)
+@ CHECK:     ]
+@ CHECK:     Reserved1: 0x0
+@ CHECK:     Reserved2: 0x0
+@ CHECK:     SectionData (
+@ CHECK:       0000: FFF7FEEF 04000000                    |........|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK:   Section {
+@ CHECK:     Index: 1
+@ CHECK:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+@ CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+@ CHECK:     Address: 0x8
+@ CHECK:     Size: 0x3
+@ CHECK:     Offset: 348
+@ CHECK:     Alignment: 2
+@ CHECK:     RelocationOffset: 0x0
+@ CHECK:     RelocationCount: 0
+@ CHECK:     Type: ExtReloc (0x2)
+@ CHECK:     Attributes [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Reserved1: 0x0
+@ CHECK:     Reserved2: 0x0
+@ CHECK:     SectionData (
+@ CHECK:       0000: 733000                               |s0.|
+@ CHECK:     )
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Relocations [
+@ CHECK:   Section __text {
+@ CHECK:     0x4 0 2 n/a ARM_RELOC_SECTDIFF 1 0x8
+@ CHECK:     0x0 0 2 n/a ARM_RELOC_PAIR 1 0x0
+@ CHECK:     0x0 1 2 1 ARM_THUMB_RELOC_BR22 0 _printf
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Symbols [
+@ CHECK:   Symbol {
+@ CHECK:     Name: _main (1)
+@ CHECK:     Extern
+@ CHECK:     Type: Section (0xE)
+@ CHECK:     Section: __text (0x1)
+@ CHECK:     RefType: 0x8
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x0
+@ CHECK:   }
+@ CHECK:   Symbol {
+@ CHECK:     Name: _printf (7)
+@ CHECK:     Extern
+@ CHECK:     Type: Undef (0x0)
+@ CHECK:     Section:  (0x0)
+@ CHECK:     RefType: UndefinedNonLazy (0x0)
+@ CHECK:     Flags [ (0x0)
+@ CHECK:     ]
+@ CHECK:     Value: 0x0
+@ CHECK:   }
+@ CHECK: ]
+@ CHECK: Indirect Symbols {
+@ CHECK:   Number: 0
+@ CHECK:   Symbols [
+@ CHECK:   ]
+@ CHECK: }
+@ CHECK: Segment {
+@ CHECK:   Cmd: LC_SEGMENT
+@ CHECK:   Name: 
+@ CHECK:   Size: 192
+@ CHECK:   vmaddr: 0x0
+@ CHECK:   vmsize: 0xB
+@ CHECK:   fileoff: 340
+@ CHECK:   filesize: 11
+@ CHECK:   maxprot: rwx
+@ CHECK:   initprot: rwx
+@ CHECK:   nsects: 2
+@ CHECK:   flags: 0x0
+@ CHECK: }
+@ CHECK: Dysymtab {
+@ CHECK:   ilocalsym: 0
+@ CHECK:   nlocalsym: 0
+@ CHECK:   iextdefsym: 0
+@ CHECK:   nextdefsym: 1
+@ CHECK:   iundefsym: 1
+@ CHECK:   nundefsym: 1
+@ CHECK:   tocoff: 0
+@ CHECK:   ntoc: 0
+@ CHECK:   modtaboff: 0
+@ CHECK:   nmodtab: 0
+@ CHECK:   extrefsymoff: 0
+@ CHECK:   nextrefsyms: 0
+@ CHECK:   indirectsymoff: 0
+@ CHECK:   nindirectsyms: 0
+@ CHECK:   extreloff: 0
+@ CHECK:   nextrel: 0
+@ CHECK:   locreloff: 0
+@ CHECK:   nlocrel: 0
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/data-in-code.s b/test/MC/MachO/ARM/data-in-code.s
index bbcb9aabde5c..a7be7e74d495 100644
--- a/test/MC/MachO/ARM/data-in-code.s
+++ b/test/MC/MachO/ARM/data-in-code.s
@@ -1,23 +1,6 @@
-@ RUN: llvm-mc -triple armv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -triple armv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj --macho-data-in-code | FileCheck %s
         .text
 _foo:
-@ CHECK: # DICE 0
-@ CHECK: ('offset', 0)
-@ CHECK: ('length', 4)
-@ CHECK: ('kind', 1)
-@ CHECK: # DICE 1
-@ CHECK: ('offset', 4)
-@ CHECK: ('length', 4)
-@ CHECK: ('kind', 4)
-@ CHECK: # DICE 2
-@ CHECK: ('offset', 8)
-@ CHECK: ('length', 2)
-@ CHECK: ('kind', 3)
-@ CHECK: # DICE 3
-@ CHECK: ('offset', 10)
-@ CHECK: ('length', 1)
-@ CHECK: ('kind', 2)
-
 .data_region
         .long 10
 .end_data_region
@@ -31,3 +14,37 @@ _foo:
         .byte 3
 .end_data_region
 
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: DataInCode {
+@ CHECK:   Data offset: 300
+@ CHECK:   Data size: 32
+@ CHECK:   Data entries [
+@ CHECK:     Entry {
+@ CHECK:       Index: 0
+@ CHECK:       Offset: 0
+@ CHECK:       Length: 4
+@ CHECK:       Kind: 1
+@ CHECK:     }
+@ CHECK:     Entry {
+@ CHECK:       Index: 1
+@ CHECK:       Offset: 4
+@ CHECK:       Length: 4
+@ CHECK:       Kind: 4
+@ CHECK:     }
+@ CHECK:     Entry {
+@ CHECK:       Index: 2
+@ CHECK:       Offset: 8
+@ CHECK:       Length: 2
+@ CHECK:       Kind: 3
+@ CHECK:     }
+@ CHECK:     Entry {
+@ CHECK:       Index: 3
+@ CHECK:       Offset: 10
+@ CHECK:       Length: 1
+@ CHECK:       Kind: 2
+@ CHECK:     }
+@ CHECK:   ]
+@ CHECK: }
diff --git a/test/MC/MachO/ARM/empty-function-nop.ll b/test/MC/MachO/ARM/empty-function-nop.ll
index ef86ebc2a267..0bc439497e2c 100644
--- a/test/MC/MachO/ARM/empty-function-nop.ll
+++ b/test/MC/MachO/ARM/empty-function-nop.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T1 %s
-; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-T2 %s
-; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARM %s
-; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | macho-dump --dump-section-data | FileCheck -check-prefix=CHECK-ARMV7 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv6-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-T1 %s
+; RUN: llc < %s -filetype=obj -mtriple=thumbv7-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-T2 %s
+; RUN: llc < %s -filetype=obj -mtriple=armv6-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-ARM %s
+; RUN: llc < %s -filetype=obj -mtriple=armv7-apple-darwin -o - | llvm-readobj -s -sd | FileCheck -check-prefix=CHECK-ARMV7 %s
 
 ; Empty functions need a NOP in them for MachO to prevent DWARF FDEs from
 ; getting all mucked up. See lib/CodeGen/AsmPrinter/AsmPrinter.cpp for
@@ -9,7 +9,15 @@
 define internal fastcc void @empty_function() {
   unreachable
 }
-; CHECK-T1:    ('_section_data', 'c046')
-; CHECK-T2:    ('_section_data', '00bf')
-; CHECK-ARM:   ('_section_data', '0000a0e1')
-; CHECK-ARMV7: ('_section_data', '00f020e3')
+; CHECK-T1:    SectionData (
+; CHECK-T1:      0000: C046                                 |.F|
+; CHECK-T1:    )
+; CHECK-T2:    SectionData (
+; CHECK-T2:      0000: 00BF                                 |..|
+; CHECK-T2:    )
+; CHECK-ARM:   SectionData (
+; CHECK-ARM:     0000: 0000A0E1                             |....|
+; CHECK-ARM:   )
+; CHECK-ARMV7: SectionData (
+; CHECK-ARMV7:   0000: 00F020E3                             |.. .|
+; CHECK-ARMV7: )
diff --git a/test/MC/MachO/ARM/ios-version-min-load-command.s b/test/MC/MachO/ARM/ios-version-min-load-command.s
index 9f63c9bd27c7..0fa29da0b4c6 100644
--- a/test/MC/MachO/ARM/ios-version-min-load-command.s
+++ b/test/MC/MachO/ARM/ios-version-min-load-command.s
@@ -1,10 +1,16 @@
-// RUN: llvm-mc -triple armv7-apple-ios %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple armv7-apple-ios %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
 
 // Test the formation of the version-min load command in the MachO.
 // use a nonsense but well formed version.
 .ios_version_min 99,8,7
-// CHECK:  (('command', 37)
-// CHECK:   ('size', 16)
-// CHECK:   ('version, 6490119)
-// CHECK:   ('sdk, 0)
-// CHECK:  ),
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O arm
+// CHECK: Arch: arm
+// CHECK: AddressSize: 32bit
+// CHECK: MinVersion {
+// CHECK:   Cmd: LC_VERSION_MIN_IPHONEOS
+// CHECK:   Size: 16
+// CHECK:   Version: 99.8.7
+// CHECK:   SDK: n/a
+// CHECK: }
diff --git a/test/MC/MachO/ARM/long-call-branch-island-relocation.s b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
index 8ee7da54b541..c4d153fc2b68 100644
--- a/test/MC/MachO/ARM/long-call-branch-island-relocation.s
+++ b/test/MC/MachO/ARM/long-call-branch-island-relocation.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.o
-@ RUN: macho-dump --dump-section-data < %t.o | FileCheck %s
+@ RUN: llvm-readobj -relocations -expand-relocs < %t.o | FileCheck %s
 
 @ rdar://12359919
 
@@ -36,8 +36,18 @@ _foo:
 	pop	{r7, pc}
 
 
-@ CHECK:  ('_relocations', [
-@ CHECK:    # Relocation 0
-@ CHECK:    (('word-0', 0x4),
-@ CHECK:     ('word-1', 0x6d000002)),
-@ CHECK:  ])
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK:   Section __text {
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x4
+@ CHECK:       PCRel: 1
+@ CHECK:       Length: 2
+@ CHECK:       Type: ARM_THUMB_RELOC_BR22 (6)
+@ CHECK:       Symbol: _foo (2)
+@ CHECK:     }
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/no-subsections-reloc.s b/test/MC/MachO/ARM/no-subsections-reloc.s
index 7701c59c6805..e367a3cfa731 100644
--- a/test/MC/MachO/ARM/no-subsections-reloc.s
+++ b/test/MC/MachO/ARM/no-subsections-reloc.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck < %t.dump %s
 
 @ When not using subsections-via-symbols, references to non-local symbols
@@ -14,5 +14,7 @@ _foo:
 bar:
     .long 0
 
-@ CHECK: 'num_reloc', 0
-@ CHECK: '_section_data', 'dff80030 00000000'
+@ CHECK: RelocationCount: 0
+@ CHECK: SectionData (
+@ CHECK:   0000: DFF80030 00000000                    |...0....|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-armv4-padding.s b/test/MC/MachO/ARM/nop-armv4-padding.s
index 8e03d17a70c9..baab3d7491c8 100644
--- a/test/MC/MachO/ARM/nop-armv4-padding.s
+++ b/test/MC/MachO/ARM/nop-armv4-padding.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -triple armv4-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck %s < %t.dump
 
 x:
@@ -7,4 +7,7 @@ x:
       .align 4
       add r0, r1, r2
 
-@ CHECK: ('_section_data', '020081e0 0000a0e1 0000a0e1 0000a0e1 020081e0')
+@ CHECK: SectionData (
+@ CHECK:   0000: 020081E0 0000A0E1 0000A0E1 0000A0E1  |................|
+@ CHECK:   0010: 020081E0                             |....|
+@ CHECK: )
diff --git a/test/MC/MachO/ARM/nop-armv6t2-padding.s b/test/MC/MachO/ARM/nop-armv6t2-padding.s
index c38ad2d7c57c..a1a21f5f456e 100644
--- a/test/MC/MachO/ARM/nop-armv6t2-padding.s
+++ b/test/MC/MachO/ARM/nop-armv6t2-padding.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -triple armv6t2-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck %s < %t.dump
 
 x:
@@ -7,4 +7,7 @@ x:
       .align 4
       add r0, r1, r2
 
-@ CHECK: ('_section_data', '020081e0 00f020e3 00f020e3 00f020e3 020081e0')
+@ CHECK:  SectionData (
+@ CHECK:    0000: 020081E0 00F020E3 00F020E3 00F020E3  |...... ... ... .|
+@ CHECK:    0010: 020081E0                             |....|
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/nop-thumb-padding.s b/test/MC/MachO/ARM/nop-thumb-padding.s
index 1e173f1a42d9..26db12140def 100644
--- a/test/MC/MachO/ARM/nop-thumb-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb-padding.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -triple armv6-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck %s < %t.dump
 
 .thumb_func x
@@ -9,4 +9,7 @@ x:
       .align 4
       adds r0, r1, r2
 
-@ CHECK: ('_section_data', '8818c046 c046c046 c046c046 c046c046 8818')
+@ CHECK:  SectionData (
+@ CHECK:    0000: 8818C046 C046C046 C046C046 C046C046  |...F.F.F.F.F.F.F|
+@ CHECK:    0010: 8818                                 |..|
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/nop-thumb2-padding.s b/test/MC/MachO/ARM/nop-thumb2-padding.s
index a8aa3a1168ef..a986ff17f2aa 100644
--- a/test/MC/MachO/ARM/nop-thumb2-padding.s
+++ b/test/MC/MachO/ARM/nop-thumb2-padding.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -triple armv7-apple-darwin %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck %s < %t.dump
 
 .thumb_func x
@@ -9,4 +9,7 @@ x:
       .align 4
       adds r0, r1, r2
 
-@ CHECK: ('_section_data', '881800bf 00bf00bf 00bf00bf 00bf00bf 8818')
+@ CHECK:  SectionData (
+@ CHECK:    0000: 881800BF 00BF00BF 00BF00BF 00BF00BF  |................|
+@ CHECK:    0010: 8818                                 |..|
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
index 8d26f6d2e2d7..f6f2233e6f97 100644
--- a/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
+++ b/test/MC/MachO/ARM/relax-thumb-ldr-literal.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck < %t.dump %s
 
 	.syntax unified
@@ -9,5 +9,14 @@
 _foo:
         ldr r2, (_foo - 4)
 
-@ CHECK: ('num_reloc', 0)
-@ CHECK: ('_section_data', '5ff80820')
+@ CHECK:  RelocationCount: 0
+@ CHECK:  Type: 0x0
+@ CHECK:  Attributes [ (0x800004)
+@ CHECK:    PureInstructions (0x800000)
+@ CHECK:    SomeInstructions (0x4)
+@ CHECK:  ]
+@ CHECK:  Reserved1: 0x0
+@ CHECK:  Reserved2: 0x0
+@ CHECK:  SectionData (
+@ CHECK:    0000: 5FF80820                             |_.. |
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/relax-thumb2-branches.s b/test/MC/MachO/ARM/relax-thumb2-branches.s
index 7916d424078c..fbac5fd4260c 100644
--- a/test/MC/MachO/ARM/relax-thumb2-branches.s
+++ b/test/MC/MachO/ARM/relax-thumb2-branches.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -show-encoding %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
 
         ble Lfoo        @ wide encoding
 
@@ -10,5 +10,38 @@ Lfoo:
         .space 256
 Lbaz:
 
-@ CHECK: '_section_data', '40f38180
-@ CHECK: 000000bf 7fdd
+@ CHECK:  SectionData (
+@ CHECK:    0000: 40F38180 00000000 00000000 00000000  |@...............|
+@ CHECK:    0010: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0020: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0030: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0040: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0050: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0060: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0070: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0080: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0090: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00A0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00B0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00C0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00D0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00E0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    00F0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0100: 00000000 000000BF 7FDD0000 00000000  |................|
+@ CHECK:    0110: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0120: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0130: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0140: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0150: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0160: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0170: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0180: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0190: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01A0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01B0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01C0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01D0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01E0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    01F0: 00000000 00000000 00000000 00000000  |................|
+@ CHECK:    0200: 00000000 00000000 0000               |..........|
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/thumb-bl-jbits.s b/test/MC/MachO/ARM/thumb-bl-jbits.s
index 9657968db5e4..2657fd9d99d7 100644
--- a/test/MC/MachO/ARM/thumb-bl-jbits.s
+++ b/test/MC/MachO/ARM/thumb-bl-jbits.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | macho-dump --dump-section-data | FileCheck %s
+@ RUN: llvm-mc -triple=thumbv7-apple-darwin -filetype=obj -o - < %s | llvm-readobj -s -sd | FileCheck %s
 .thumb
 .thumb_func t
 t:	nop
@@ -11,9 +11,17 @@ t:	nop
 .thumb_func b
 b:
 	bl	t
-# CHECK: '_section_data', 'c3f7fcf5'
 # We are checking that the branch and link instruction which is:
 #	bl	#-4441096
 # has it displacement encoded correctly with respect to the J1 and J2 bits when
 # the branch is assembled with a label not a displacement.
 # rdar://10149689
+
+# CHECK: Section {
+# CHECK:   Index: 2
+# CHECK:   Name: __branch (5F 5F 62 72 61 6E 63 68 00 00 00 00 00 00 00 00)
+# CHECK:   Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+# CHECK:   SectionData (
+# CHECK:     0000: C3F7FCF5                             |....|
+# CHECK:   )
+# CHECK: }
diff --git a/test/MC/MachO/ARM/thumb2-function-relative-load.s b/test/MC/MachO/ARM/thumb2-function-relative-load.s
index 622007dc1657..1a91675fae58 100644
--- a/test/MC/MachO/ARM/thumb2-function-relative-load.s
+++ b/test/MC/MachO/ARM/thumb2-function-relative-load.s
@@ -1,5 +1,5 @@
 @ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
-@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: llvm-readobj -s -sd < %t.obj > %t.dump
 @ RUN: FileCheck < %t.dump %s
         .syntax unified
         .text
@@ -10,4 +10,6 @@ _foo:
 
         .subsections_via_symbols
 
-@ CHECK: ('_section_data', '5ff808e0')
+@ CHECK:  SectionData (
+@ CHECK:    0000: 5FF808E0                             |_...|
+@ CHECK:  )
diff --git a/test/MC/MachO/ARM/thumb2-movt-fixup.s b/test/MC/MachO/ARM/thumb2-movt-fixup.s
index ddd95b54791e..5cfb3f4c9186 100644
--- a/test/MC/MachO/ARM/thumb2-movt-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movt-fixup.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj -relocations -expand-relocs | FileCheck %s
 
 _fred:
 	movt	r3, :upper16:(_wilma-(LPC0_0+4))
@@ -7,11 +7,25 @@ LPC0_0:
 _wilma:
   .long 0
 
-@ CHECK:  ('_relocations', [
-@ CHECK:    # Relocation 0
-@ CHECK:    (('word-0', 0xb9000000),
-@ CHECK:     ('word-1', 0x4)),
-@ CHECK:    # Relocation 1
-@ CHECK:    (('word-0', 0xb100fffc),
-@ CHECK:     ('word-1', 0x4)),
-
+@ CHECK: File: <stdin>
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK:   Section __text {
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x0
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_HALF_SECTDIFF (9)
+@ CHECK:       Value: 0x4
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0xFFFC
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_PAIR (1)
+@ CHECK:       Value: 0x4
+@ CHECK:     }
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/thumb2-movw-fixup.s b/test/MC/MachO/ARM/thumb2-movw-fixup.s
index 57973a874467..9c21d9b5b50e 100644
--- a/test/MC/MachO/ARM/thumb2-movw-fixup.s
+++ b/test/MC/MachO/ARM/thumb2-movw-fixup.s
@@ -1,4 +1,4 @@
-@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | llvm-readobj -relocations -expand-relocs | FileCheck %s
 
 @ rdar://10038370
 
@@ -17,28 +17,66 @@
 L1: .long 0
 L2: .long 0
 
-@ CHECK:  ('_relocations', [
-@ CHECK:    # Relocation 0
-@ CHECK:    (('word-0', 0xc),
-@ CHECK:     ('word-1', 0x86000002)),
-@ CHECK:    # Relocation 1
-@ CHECK:    (('word-0', 0x1184),
-@ CHECK:     ('word-1', 0x16ffffff)),
-@ CHECK:    # Relocation 2
-@ CHECK:    (('word-0', 0x8),
-@ CHECK:     ('word-1', 0x84000002)),
-@ CHECK:    # Relocation 3
-@ CHECK:    (('word-0', 0x1),
-@ CHECK:     ('word-1', 0x14ffffff)),
-@ CHECK:    # Relocation 4
-@ CHECK:    (('word-0', 0x4),
-@ CHECK:     ('word-1', 0x86000002)),
-@ CHECK:    # Relocation 5
-@ CHECK:    (('word-0', 0x1180),
-@ CHECK:     ('word-1', 0x16ffffff)),
-@ CHECK:    # Relocation 6
-@ CHECK:    (('word-0', 0x0),
-@ CHECK:     ('word-1', 0x84000002)),
-@ CHECK:    # Relocation 7
-@ CHECK:    (('word-0', 0x1),
-@ CHECK:     ('word-1', 0x14ffffff)),
+@ CHECK: Format: Mach-O arm
+@ CHECK: Arch: arm
+@ CHECK: AddressSize: 32bit
+@ CHECK: Relocations [
+@ CHECK:   Section __text {
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0xC
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_HALF (8)
+@ CHECK:       Section: __data (2)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x1184
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_PAIR (1)
+@ CHECK:       Section: - (16777215)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x8
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 2
+@ CHECK:       Type: ARM_RELOC_HALF (8)
+@ CHECK:       Section: __data (2)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x1
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 2
+@ CHECK:       Type: ARM_RELOC_PAIR (1)
+@ CHECK:       Section: - (16777215)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x4
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_HALF (8)
+@ CHECK:       Section: __data (2)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x1180
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 3
+@ CHECK:       Type: ARM_RELOC_PAIR (1)
+@ CHECK:       Section: - (16777215)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x0
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 2
+@ CHECK:       Type: ARM_RELOC_HALF (8)
+@ CHECK:       Section: __data (2)
+@ CHECK:     }
+@ CHECK:     Relocation {
+@ CHECK:       Offset: 0x1
+@ CHECK:       PCRel: 0
+@ CHECK:       Length: 2
+@ CHECK:       Type: ARM_RELOC_PAIR (1)
+@ CHECK:       Section: - (16777215)
+@ CHECK:     }
+@ CHECK:   }
+@ CHECK: ]
diff --git a/test/MC/MachO/ARM/tvos-version-min-load-command.s b/test/MC/MachO/ARM/tvos-version-min-load-command.s
new file mode 100644
index 000000000000..3c9b237d6950
--- /dev/null
+++ b/test/MC/MachO/ARM/tvos-version-min-load-command.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple armv7-apple-tvos %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
+
+
+// Test the formation of the version-min load command in the MachO.
+// use a nonsense but well formed version.
+.tvos_version_min 99,8,7
+
+// CHECK: MinVersion {
+// CHECK-NEXT:   Cmd: LC_VERSION_MIN_TVOS
+// CHECK-NEXT:   Size: 16
+// CHECK-NEXT:   Version: 99.8.7
+// CHECK-NEXT:   SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/ARM/version-min-diagnostics.s b/test/MC/MachO/ARM/version-min-diagnostics.s
index 15d44d31661a..76c3268dafbf 100644
--- a/test/MC/MachO/ARM/version-min-diagnostics.s
+++ b/test/MC/MachO/ARM/version-min-diagnostics.s
@@ -15,6 +15,16 @@
 .macosx_version_min 10,-1,1
 .macosx_version_min 0,1,1
 .macosx_version_min 70000,1
+.tvos_version_min 99,2,257
+.tvos_version_min 50,256,1
+.tvos_version_min 10,-1,1
+.tvos_version_min 0,1,1
+.tvos_version_min 70000,1
+.watchos_version_min 99,2,257
+.watchos_version_min 50,256,1
+.watchos_version_min 10,-1,1
+.watchos_version_min 0,1,1
+.watchos_version_min 70000,1
 
 
 // CHECK: error: invalid OS update number
@@ -47,3 +57,33 @@
 // CHECK: error: invalid OS major version number
 // CHECK: .macosx_version_min 70000,1
 // CHECK:                     ^
+// CHECK: error: invalid OS update number
+// CHECK: .tvos_version_min 99,2,257
+// CHECK:                          ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .tvos_version_min 50,256,1
+// CHECK:                        ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .tvos_version_min 10,-1,1
+// CHECK:                        ^
+// CHECK: error: invalid OS major version number
+// CHECK: .tvos_version_min 0,1,1
+// CHECK:                     ^
+// CHECK: error: invalid OS major version number
+// CHECK: .tvos_version_min 70000,1
+// CHECK:                     ^
+// CHECK: error: invalid OS update number
+// CHECK: .watchos_version_min 99,2,257
+// CHECK:                          ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .watchos_version_min 50,256,1
+// CHECK:                        ^
+// CHECK: error: invalid OS minor version number
+// CHECK: .watchos_version_min 10,-1,1
+// CHECK:                        ^
+// CHECK: error: invalid OS major version number
+// CHECK: .watchos_version_min 0,1,1
+// CHECK:                     ^
+// CHECK: error: invalid OS major version number
+// CHECK: .watchos_version_min 70000,1
+// CHECK:                     ^
diff --git a/test/MC/MachO/ARM/version-min-diagnostics2.s b/test/MC/MachO/ARM/version-min-diagnostics2.s
new file mode 100644
index 000000000000..0689cd41f704
--- /dev/null
+++ b/test/MC/MachO/ARM/version-min-diagnostics2.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -triple i386-apple-ios %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=IOS
+// RUN: llvm-mc -triple i386-apple-watchos %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=WATCHOS
+// RUN: llvm-mc -triple i386-apple-tvos %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=TVOS
+// RUN: llvm-mc -triple i386-apple-macosx %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=MACOSX
+
+.ios_version_min 1,2,3
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .ios_version_min should only be used for ios targets
+// TVOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .ios_version_min should only be used for ios targets
+// MACOSX: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .ios_version_min should only be used for ios targets
+// IOS-NOT: warning: .ios_version_min should only be used for ios targets
+
+.macosx_version_min 4,5,6
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .macosx_version_min should only be used for macosx targets
+// TVOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .macosx_version_min should only be used for macosx targets
+// IOS: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .macosx_version_min should only be used for macosx targets
+// MACOSX-NOT: warning: .macosx_version_min should only be used for macosx targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-12]]:1: note: previous definition is here
+
+.tvos_version_min 7,8,9
+// WATCHOS: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .tvos_version_min should only be used for tvos targets
+// MACOSX: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .tvos_version_min should only be used for tvos targets
+// IOS: version-min-diagnostics2.s:[[@LINE-3]]:1: warning: .tvos_version_min should only be used for tvos targets
+// TVOS-NOT: warning: .tvos_version_min should only be used for tvos targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-14]]:1: note: previous definition is here
+
+.watchos_version_min 10,11,12
+// MACOSX: version-min-diagnostics2.s:[[@LINE-1]]:1: warning: .watchos_version_min should only be used for watchos targets
+// IOS: version-min-diagnostics2.s:[[@LINE-2]]:1: warning: .watchos_version_min should only be used for watchos targets
+// TVOS-NOT: warning: .tvos_version_min should only be used for tvos targets
+// WATCHOS-NOT: warning: .watchos_version_min should only be used for watchos targets
+// CHECK: version-min-diagnostics2.s:[[@LINE-5]]:1: warning: overriding previous version_min directive
+// CHECK: version-min-diagnostics2.s:[[@LINE-14]]:1: note: previous definition is here
diff --git a/test/MC/MachO/ARM/version-min.s b/test/MC/MachO/ARM/version-min.s
index 0a40338ed5e0..d4840db7b6bf 100644
--- a/test/MC/MachO/ARM/version-min.s
+++ b/test/MC/MachO/ARM/version-min.s
@@ -19,3 +19,19 @@
 // CHECK: .macosx_version_min 10, 2
 // CHECK: .macosx_version_min 10, 8, 1
 // CHECK: .macosx_version_min 2, 0
+
+.tvos_version_min 5,2,0
+.tvos_version_min 3,2,1
+.tvos_version_min 5,0
+
+// CHECK: .tvos_version_min 5, 2
+// CHECK: .tvos_version_min 3, 2, 1
+// CHECK: .tvos_version_min 5, 0
+
+.watchos_version_min 5,2,0
+.watchos_version_min 3,2,1
+.watchos_version_min 5,0
+
+// CHECK: .watchos_version_min 5, 2
+// CHECK: .watchos_version_min 3, 2, 1
+// CHECK: .watchos_version_min 5, 0
diff --git a/test/MC/MachO/ARM/watchos-version-min-load-command.s b/test/MC/MachO/ARM/watchos-version-min-load-command.s
new file mode 100644
index 000000000000..3df38fb6a179
--- /dev/null
+++ b/test/MC/MachO/ARM/watchos-version-min-load-command.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple armv7k-apple-watchos %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
+
+
+// Test the formation of the version-min load command in the MachO.
+// use a nonsense but well formed version.
+.watchos_version_min 99,8,7
+
+// CHECK: MinVersion {
+// CHECK-NEXT:   Cmd: LC_VERSION_MIN_WATCHOS
+// CHECK-NEXT:   Size: 16
+// CHECK-NEXT:   Version: 99.8.7
+// CHECK-NEXT:   SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/PowerPC/coal-sections-powerpc.s b/test/MC/MachO/PowerPC/coal-sections-powerpc.s
new file mode 100644
index 000000000000..1c3dc37d5bd7
--- /dev/null
+++ b/test/MC/MachO/PowerPC/coal-sections-powerpc.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -triple powerpc-apple-darwin8 -arch=ppc32 -filetype=obj %s -o - | llvm-readobj -sections | FileCheck %s
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 0
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __textcoal_nt (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 2
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 3
+// CHECK-NEXT: Name: __const_coal (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 4
+// CHECK-NEXT: Name: __datacoal_nt (
+
+  .section  __TEXT,__text,regular,pure_instructions
+  .machine ppc
+  .section  __TEXT,__textcoal_nt,coalesced,pure_instructions
+  .section  __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+  .section  __TEXT,__text,regular,pure_instructions
+  .section  __TEXT,__textcoal_nt,coalesced,pure_instructions
+  .globl  _foo
+  .weak_definition  _foo
+  .align  4
+_foo:
+	blr
+
+.subsections_via_symbols
+	.section	__TEXT,__const_coal,coalesced
+	.globl	_a                      ; @a
+	.weak_definition	_a
+	.align	4
+_a:
+	.long	1                       ; 0x1
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.globl	_b                      ; @b
+	.weak_definition	_b
+	.align	2
+_b:
+	.long	5                       ; 0x5
diff --git a/test/MC/MachO/PowerPC/lit.local.cfg b/test/MC/MachO/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/MC/MachO/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
diff --git a/test/MC/MachO/absolute.s b/test/MC/MachO/absolute.s
index 0b22afb1b4d0..36a0ae5ec5bb 100644
--- a/test/MC/MachO/absolute.s
+++ b/test/MC/MachO/absolute.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 _bar:
   nop
@@ -17,142 +17,159 @@ foo_equals2 = (_foo - _bar + 0xffff0000)
   .globl foo_set2_global;
   .set foo_set2_global, (_foo - _bar + 0xffff0000)
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 256)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 152)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 2)
-// CHECK:   ('file_offset', 288)
-// CHECK:   ('file_size', 2)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 2)
-// CHECK:     ('offset', 288)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 292)
-// CHECK:   ('nsyms', 8)
-// CHECK:   ('stroff', 420)
-// CHECK:   ('strsize', 84)
-// CHECK:   ('_string_data', '\x00foo_equals\x00_bar\x00_foo\x00foo_set2_global\x00foo_set1_global\x00foo_set2\x00foo_equals2\x00foo_set1\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 12)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_bar')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 17)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 1)
-// CHECK:     ('_string', '_foo')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 75)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_set1')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 54)
-// CHECK:     ('n_type', 0x2)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_set2')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_equals')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 63)
-// CHECK:     ('n_type', 0x2)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_equals2')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 38)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_set1_global')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 22)
-// CHECK:     ('n_type', 0x3)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 4294901761)
-// CHECK:     ('_string', 'foo_set2_global')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 6)
-// CHECK:   ('iextdefsym', 6)
-// CHECK:   ('nextdefsym', 2)
-// CHECK:   ('iundefsym', 8)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 272
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x2
+// CHECK:     Offset: 304
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _bar (12)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _foo (17)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x1
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_set1 (75)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_set2 (54)
+// CHECK:     Type: Abs (0x2)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_equals (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_equals2 (63)
+// CHECK:     Type: Abs (0x2)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_set1_global (38)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: foo_set2_global (22)
+// CHECK:     Extern
+// CHECK:     Type: Abs (0x2)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0xFFFF0001
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 152
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x2
+// CHECK:   fileoff: 304
+// CHECK:   filesize: 2
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 6
+// CHECK:   iextdefsym: 6
+// CHECK:   nextdefsym: 2
+// CHECK:   iundefsym: 8
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/absolutize.s b/test/MC/MachO/absolutize.s
index 8947c0f65e53..1f5ed32ff72d 100644
--- a/test/MC/MachO/absolutize.s
+++ b/test/MC/MachO/absolutize.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 _text_a:
         xorl %eax,%eax
@@ -47,143 +47,151 @@ Ldata_expr_2 = Ldata_d - Ldata_c
 
         .long _data_a + Ldata_expr_0
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 87)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 87)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 43)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 412)
-// CHECK:     ('num_reloc', 3)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xa0000027),
-// CHECK:      ('word-1', 0x0)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0xa4000009),
-// CHECK:      ('word-1', 0x0)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x2)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '31c031c0 31c031c0 b8feffff ffb8feff ffffb802 000000b8 02000000 b8020000 00b80200 0000b8fe ffffff')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 43)
-// CHECK:     ('size', 44)
-// CHECK:     ('offset', 367)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 436)
-// CHECK:     ('num_reloc', 3)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xa0000028),
-// CHECK:      ('word-1', 0x2b)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0xa4000010),
-// CHECK:      ('word-1', 0x2b)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0xa1000000),
-// CHECK:      ('word-1', 0x2f)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 fcffffff fcffffff 04000000 04000000 04000000 04000000 27000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 460)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 508)
-// CHECK:   ('strsize', 36)
-// CHECK:   ('_string_data', '\x00_text_b\x00_data_b\x00_text_a\x00_data_a\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 17)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_text_a')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 2)
-// CHECK:     ('_string', '_text_b')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 25)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 43)
-// CHECK:     ('_string', '_data_a')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 9)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 47)
-// CHECK:     ('_string', '_data_b')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 4)
-// CHECK:   ('iextdefsym', 4)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x2B
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x1AC
+// CHECK:     RelocationCount: 3
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 31C031C0 31C031C0 B8FEFFFF FFB8FEFF  |1.1.1.1.........|
+// CHECK:       0010: FFFFB802 000000B8 02000000 B8020000  |................|
+// CHECK:       0020: 00B80200 0000B8FE FFFFFF             |...........|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x2B
+// CHECK:     Size: 0x2C
+// CHECK:     Offset: 383
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x1C4
+// CHECK:     RelocationCount: 3
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0010: FCFFFFFF FCFFFFFF 04000000 04000000  |................|
+// CHECK:       0020: 04000000 04000000 27000000           |........'...|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x27 0 2 n/a GENERIC_RELOC_VANILLA 1 0x0
+// CHECK:     0x9 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x2
+// CHECK:   }
+// CHECK:   Section __data {
+// CHECK:     0x28 0 2 n/a GENERIC_RELOC_VANILLA 1 0x2B
+// CHECK:     0x10 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x2B
+// CHECK:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x2F
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _text_a (17)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _text_b (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x2
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _data_a (25)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x2B
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _data_b (9)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x2F
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x57
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 87
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 4
+// CHECK:   iextdefsym: 4
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
index 1ccebc5124c3..36b5f9579242 100644
--- a/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/bad-darwin-x86_64-diff-relocs.s
@@ -2,4 +2,12 @@
 // RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
 
 .quad _foo - _bar
-// CHECK-ERROR: unsupported relocation with subtraction expression
+// CHECK-ERROR: error: unsupported relocation with subtraction expression
+
+_Y:
+.long (_Y+4)-_b
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_b' can not be undefined in a subtraction expression
+
+_Z:
+.long (_a+4)-_Z
+// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_a' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
deleted file mode 100644
index 518ae6423dbc..000000000000
--- a/test/MC/MachO/bad-darwin-x86_64-reloc-expr1.s
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
-
-_Z:
-.long (_Z+4)-_b
-// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_b' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s b/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
deleted file mode 100644
index 3aefd87c557c..000000000000
--- a/test/MC/MachO/bad-darwin-x86_64-reloc-expr2.s
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - 2> %t.err > %t
-// RUN: FileCheck --check-prefix=CHECK-ERROR < %t.err %s
-
-_Z:
-.long (_a+4)-_Z
-// CHECK-ERROR: error: unsupported relocation with subtraction expression, symbol '_a' can not be undefined in a subtraction expression
diff --git a/test/MC/MachO/coal-sections-x86_64.s b/test/MC/MachO/coal-sections-x86_64.s
new file mode 100644
index 000000000000..5ecdc578dbb6
--- /dev/null
+++ b/test/MC/MachO/coal-sections-x86_64.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - 2>%t.err | llvm-readobj -sections | FileCheck %s
+// RUN: FileCheck --check-prefix=WARNING < %t.err %s
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 0
+// CHECK-NEXT: Name: __text (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 1
+// CHECK-NEXT: Name: __textcoal_nt (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 2
+// CHECK-NEXT: Name: __const_coal (
+
+// CHECK: Section {
+// CHECK-NEXT: Index: 3
+// CHECK-NEXT: Name: __datacoal_nt (
+
+// WARNING: warning: section "__textcoal_nt" is deprecated
+// WARNING: note: change section name to "__text"
+// WARNING: warning: section "__const_coal" is deprecated
+// WARNING: note: change section name to "__const"
+// WARNING: warning: section "__datacoal_nt" is deprecated
+// WARNING: note: change section name to "__data"
+
+	.section	__TEXT,__textcoal_nt,coalesced,pure_instructions
+	.globl	_foo
+	.weak_definition	_foo
+	.align	4, 0x90
+_foo:
+	retq
+
+	.section	__TEXT,__const_coal,coalesced
+	.globl	_a                      ## @a
+	.weak_definition	_a
+	.align	4
+_a:
+	.long	1                       ## 0x1
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.globl	_b                      ## @b
+	.weak_definition	_b
+	.align	2
+_b:
+	.long	5                       ## 0x5
+
+.subsections_via_symbols
diff --git a/test/MC/MachO/comm-1.s b/test/MC/MachO/comm-1.s
index cb240f98e14f..1b64e94c2c3f 100644
--- a/test/MC/MachO/comm-1.s
+++ b/test/MC/MachO/comm-1.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .comm           sym_comm_B, 2
         .comm           sym_comm_A, 4
@@ -7,108 +7,119 @@
 
         .no_dead_strip sym_comm_C
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 256)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 256)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 256)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 304)
-// CHECK:   ('strsize', 48)
-// CHECK:   ('_string_data', '\x00sym_comm_D\x00sym_comm_C\x00sym_comm_B\x00sym_comm_A\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 34)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'sym_comm_A')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 23)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 2)
-// CHECK:     ('_string', 'sym_comm_B')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 12)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 544)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'sym_comm_C')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 768)
-// CHECK:     ('n_value', 2)
-// CHECK:     ('_string', 'sym_comm_D')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 0)
-// CHECK:   ('iextdefsym', 0)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 0)
-// CHECK:   ('nundefsym', 4)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 244
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 272
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_comm_A (34)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_comm_B (23)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x2
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_comm_C (12)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x220)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_comm_D (1)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x300)
+// CHECK:     ]
+// CHECK:     Value: 0x2
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 124
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x0
+// CHECK:   fileoff: 272
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 0
+// CHECK:   iextdefsym: 0
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 0
+// CHECK:   nundefsym: 4
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/cstexpr-gotpcrel-64.ll b/test/MC/MachO/cstexpr-gotpcrel-64.ll
index bafddcb3db69..41abeb0179cc 100644
--- a/test/MC/MachO/cstexpr-gotpcrel-64.ll
+++ b/test/MC/MachO/cstexpr-gotpcrel-64.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=x86_64-apple-darwin %s -o %t
 ; RUN:  FileCheck %s -check-prefix=X86 < %t
 ; RUN:  FileCheck %s -check-prefix=X86-GOT-EQUIV < %t
+; RUN:  FileCheck %s -check-prefix=X86-NOGOT-EQUIV < %t
 
 ; GOT equivalent globals references can be replaced by the GOT entry of the
 ; final symbol instead.
@@ -86,10 +87,15 @@ define i32** @t1() {
 }
 
 ; Do not crash when a pattern cannot be matched as a GOT equivalent
-
+define void @foo() {
+; X86-NOGOT-EQUIV-LABEL: _foo:
+; X86-NOGOT-EQUIV: leaq  _b(%rip), %rax
+  store i8** @b, i8*** null
+  ret void
+}
 @a = external global i8
 @b = internal unnamed_addr constant i8* @a
 
-; X86-LABEL: _c:
-; X86:   .quad _b
+; X86-NOGOT-EQUIV-LABEL: _c:
+; X86-NOGOT-EQUIV:   .quad _b
 @c = global i8** @b
diff --git a/test/MC/MachO/darwin-complex-difference.s b/test/MC/MachO/darwin-complex-difference.s
index f31d3ade33bc..84940ba68f89 100644
--- a/test/MC/MachO/darwin-complex-difference.s
+++ b/test/MC/MachO/darwin-complex-difference.s
@@ -1,5 +1,5 @@
 // RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
 // RUN: FileCheck < %t.dump %s
         
 _a:
@@ -15,115 +15,117 @@ _c:
 _d:
         .long 0
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 256)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 152)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 24)
-// CHECK:   ('file_offset', 288)
-// CHECK:   ('file_size', 24)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 24)
-// CHECK:     ('offset', 288)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 312)
-// CHECK:     ('num_reloc', 4)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0xc),
-// CHECK:      ('word-1', 0x5c000002)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0xc),
-// CHECK:      ('word-1', 0xc000001)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0x5c000002)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x8),
-// CHECK:      ('word-1', 0xc000001)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '01000000 02000000 04000000 04000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 344)
-// CHECK:   ('nsyms', 3)
-// CHECK:   ('stroff', 392)
-// CHECK:   ('strsize', 12)
-// CHECK:   ('_string_data', '\x00_d\x00_c\x00_a\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 7)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_a')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 4)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
-// CHECK:     ('_string', '_c')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 20)
-// CHECK:     ('_string', '_d')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 3)
-// CHECK:   ('iextdefsym', 3)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 3)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 272
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x18
+// CHECK:     Offset: 304
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x148
+// CHECK:     RelocationCount: 4
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 01000000 02000000 04000000 04000000  |................|
+// CHECK:       0010: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0xC 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _d
+// CHECK:     0xC 0 2 1 X86_64_RELOC_UNSIGNED 0 _c
+// CHECK:     0x8 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _d
+// CHECK:     0x8 0 2 1 X86_64_RELOC_UNSIGNED 0 _c
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a (7)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _c (4)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x10
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _d (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x14
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 152
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x18
+// CHECK:   fileoff: 304
+// CHECK:   filesize: 24
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 3
+// CHECK:   iextdefsym: 3
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 3
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-version-min-load-command.s b/test/MC/MachO/darwin-version-min-load-command.s
new file mode 100644
index 000000000000..17f3784d6326
--- /dev/null
+++ b/test/MC/MachO/darwin-version-min-load-command.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple x86_64-apple-macosx10.10.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-ios8.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-IOS
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-DARWIN
+
+// Test version-min load command should be inferred from triple and should always be generated on Darwin
+// CHECK: Load command
+// CHECK:       cmd LC_VERSION_MIN_MACOSX
+// CHECK:   cmdsize 16
+// CHECK:   version 10.10
+
+// CHECK-IOS: Load command
+// CHECK-IOS:       cmd LC_VERSION_MIN_IPHONEOS
+// CHECK-IOS:   cmdsize 16
+// CHECK-IOS:   version 8.0
+
+// CHECK-DARWIN-NOT: LC_VERSION_MIN
+
+
+// RUN: llvm-mc -triple x86_64-apple-watchos1.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-WATCHOS
+// RUN: llvm-mc -triple x86_64-apple-tvos8.0.0 %s -filetype=obj -o - | llvm-objdump -macho -private-headers - | FileCheck %s --check-prefix=CHECK-TVOS
+// CHECK-WATCHOS: Load command
+// CHECK-WATCHOS:           cmd LC_VERSION_MIN_WATCHOS
+// CHECK-WATCHOS-NEXT:   cmdsize 16
+// CHECK-WATCHOS-NEXT:   version 1.0
+
+// CHECK-TVOS:            cmd LC_VERSION_MIN_TVOS
+// CHECK-TVOS-NEXT:   cmdsize 16
+// CHECK-TVOS-NEXT:   version 8.0
diff --git a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
index 49cfa418162c..7a7919786d41 100644
--- a/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
+++ b/test/MC/MachO/darwin-x86_64-diff-reloc-assign.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 // Test case for rdar://10743265
 
@@ -17,11 +17,9 @@ _base = .
 _start_ap_2:
         cli
 
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0x5c000000)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xc000001)),
-// CHECK:   ])
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x0 0 2 1 X86_64_RELOC_SUBTRACTOR 0 _base
+// CHECK:     0x0 0 2 1 X86_64_RELOC_UNSIGNED 0 _start_ap_2
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/darwin-x86_64-diff-relocs.s b/test/MC/MachO/darwin-x86_64-diff-relocs.s
index eb28cf1af158..9d69a493dd83 100644
--- a/test/MC/MachO/darwin-x86_64-diff-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .text
 
@@ -117,213 +117,201 @@ _g3:
 L3:
         xorl %eax,%eax
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 236)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 236)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 94)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 604)
-// CHECK:     ('num_reloc', 12)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-
 // FIXME: Unfortunately, we do not get these relocations in exactly the same
 // order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
 // them in reverse address order, but sometimes it allocates some
 // additional relocations late so these end up precede the other entries. I
 // haven't figured out the exact criteria for this yet.
-        
-// CHECK:     (('word-0', 0x56),
-// CHECK:      ('word-1', 0x1d000004)),
-// CHECK:     (('word-0', 0x50),
-// CHECK:      ('word-1', 0x1d000004)),
-// CHECK:     (('word-0', 0x4a),
-// CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     (('word-0', 0x44),
-// CHECK:      ('word-1', 0x1d000003)),
-// CHECK:     (('word-0', 0x3e),
-// CHECK:      ('word-1', 0x1d000002)),
-// CHECK:     (('word-0', 0x38),
-// CHECK:      ('word-1', 0x1d000002)),
-// CHECK:     (('word-0', 0x20),
-// CHECK:      ('word-1', 0x2d000004)),
-// CHECK:     (('word-0', 0x1b),
-// CHECK:      ('word-1', 0x2d000004)),
-// CHECK:     (('word-0', 0x16),
-// CHECK:      ('word-1', 0x2d000003)),
-// CHECK:     (('word-0', 0x11),
-// CHECK:      ('word-1', 0x2d000003)),
-// CHECK:     (('word-0', 0xc),
-// CHECK:      ('word-1', 0x2d000002)),
-// CHECK:     (('word-0', 0x5),
-// CHECK:      ('word-1', 0x2d000000)),
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 94)
-// CHECK:     ('size', 142)
-// CHECK:     ('offset', 462)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 700)
-// CHECK:     ('num_reloc', 16)
-// CHECK:     ('flags', 0x400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x7a),
-// CHECK:      ('word-1', 0x5e000001)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x7a),
-// CHECK:      ('word-1', 0xe000002)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x72),
-// CHECK:      ('word-1', 0x5e000001)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x72),
-// CHECK:      ('word-1', 0xe000002)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0x62),
-// CHECK:      ('word-1', 0xe000002)),
-// CHECK:     # Relocation 5
-// CHECK:     (('word-0', 0x5a),
-// CHECK:      ('word-1', 0xe000002)),
-// CHECK:     # Relocation 6
-// CHECK:     (('word-0', 0x52),
-// CHECK:      ('word-1', 0xe000001)),
-// CHECK:     # Relocation 7
-// CHECK:     (('word-0', 0x4a),
-// CHECK:      ('word-1', 0xe000001)),
-// CHECK:     # Relocation 8
-// CHECK:     (('word-0', 0x3a),
-// CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 9
-// CHECK:     (('word-0', 0x3a),
-// CHECK:      ('word-1', 0xe000004)),
-// CHECK:     # Relocation 10
-// CHECK:     (('word-0', 0x32),
-// CHECK:      ('word-1', 0x5e000003)),
-// CHECK:     # Relocation 11
-// CHECK:     (('word-0', 0x32),
-// CHECK:      ('word-1', 0xe000004)),
-// CHECK:     # Relocation 12
-// CHECK:     (('word-0', 0x22),
-// CHECK:      ('word-1', 0xe000004)),
-// CHECK:     # Relocation 13
-// CHECK:     (('word-0', 0x1a),
-// CHECK:      ('word-1', 0xe000004)),
-// CHECK:     # Relocation 14
-// CHECK:     (('word-0', 0x12),
-// CHECK:      ('word-1', 0xe000003)),
-// CHECK:     # Relocation 15
-// CHECK:     (('word-0', 0xa),
-// CHECK:      ('word-1', 0xe000003)),
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 828)
-// CHECK:   ('nsyms', 5)
-// CHECK:   ('stroff', 908)
-// CHECK:   ('strsize', 24)
-// CHECK:   ('_string_data', '\x00_foo\x00_g3\x00_g2\x00_g1\x00_g0\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_foo')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 18)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 2)
-// CHECK:     ('_string', '_g0')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 14)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 90)
-// CHECK:     ('_string', '_g1')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 10)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 94)
-// CHECK:     ('_string', '_g2')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 6)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 232)
-// CHECK:     ('_string', '_g3')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 5)
-// CHECK:   ('iextdefsym', 5)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 5)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+ 
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 352
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x5E
+// CHECK:     Offset: 384
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x26C
+// CHECK:     RelocationCount: 12
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 31C031C0 E9040000 00EBF9E9 00000000  |1.1.............|
+// CHECK:       0010: E9000000 00E90200 0000E900 000000E9  |................|
+// CHECK:       0020: 02000000 89050400 00008905 D2FFFFFF  |................|
+// CHECK:       0030: 8905CEFF FFFF8905 00000000 89050200  |................|
+// CHECK:       0040: 00008905 00000000 89050200 00008905  |................|
+// CHECK:       0050: 00000000 89050200 000031C0 31C0      |..........1.1.|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x5E
+// CHECK:     Size: 0x8E
+// CHECK:     Offset: 478
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x2CC
+// CHECK:     RelocationCount: 16
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x4)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 31C00400 00000000 00000000 00000000  |1...............|
+// CHECK:       0010: 00000200 00000000 00000000 00000000  |................|
+// CHECK:       0020: 00000200 00000000 00000200 00000000  |................|
+// CHECK:       0030: 00000000 00000000 00000200 00000000  |................|
+// CHECK:       0040: 00000200 00000000 00000000 00000000  |................|
+// CHECK:       0050: 00000200 00000000 00000000 00000000  |................|
+// CHECK:       0060: 00000200 00000000 00000200 00000000  |................|
+// CHECK:       0070: 00000000 00000000 00000200 00000000  |................|
+// CHECK:       0080: 00000200 00000000 000031C0 31C0      |..........1.1.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x56 1 2 1 X86_64_RELOC_SIGNED 0 _g3
+// CHECK:     0x50 1 2 1 X86_64_RELOC_SIGNED 0 _g3
+// CHECK:     0x4A 1 2 1 X86_64_RELOC_SIGNED 0 _g2
+// CHECK:     0x44 1 2 1 X86_64_RELOC_SIGNED 0 _g2
+// CHECK:     0x3E 1 2 1 X86_64_RELOC_SIGNED 0 _g1
+// CHECK:     0x38 1 2 1 X86_64_RELOC_SIGNED 0 _g1
+// CHECK:     0x20 1 2 1 X86_64_RELOC_BRANCH 0 _g3
+// CHECK:     0x1B 1 2 1 X86_64_RELOC_BRANCH 0 _g3
+// CHECK:     0x16 1 2 1 X86_64_RELOC_BRANCH 0 _g2
+// CHECK:     0x11 1 2 1 X86_64_RELOC_BRANCH 0 _g2
+// CHECK:     0xC 1 2 1 X86_64_RELOC_BRANCH 0 _g1
+// CHECK:     0x5 1 2 1 X86_64_RELOC_BRANCH 0 _foo
+// CHECK:   }
+// CHECK:   Section __data {
+// CHECK:     0x7A 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g0
+// CHECK:     0x7A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK:     0x72 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g0
+// CHECK:     0x72 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK:     0x62 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK:     0x5A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g1
+// CHECK:     0x52 0 3 1 X86_64_RELOC_UNSIGNED 0 _g0
+// CHECK:     0x4A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g0
+// CHECK:     0x3A 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g2
+// CHECK:     0x3A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK:     0x32 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g2
+// CHECK:     0x32 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK:     0x22 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK:     0x1A 0 3 1 X86_64_RELOC_UNSIGNED 0 _g3
+// CHECK:     0x12 0 3 1 X86_64_RELOC_UNSIGNED 0 _g2
+// CHECK:     0xA 0 3 1 X86_64_RELOC_UNSIGNED 0 _g2
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _foo (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _g0 (18)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x2
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _g1 (14)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x5A
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _g2 (10)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x5E
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _g3 (6)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xE8
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xEC
+// CHECK:   fileoff: 384
+// CHECK:   filesize: 236
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 5
+// CHECK:   iextdefsym: 5
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 5
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-x86_64-nobase-relocs.s b/test/MC/MachO/darwin-x86_64-nobase-relocs.s
index a90b3e4d0962..857c3541d652 100644
--- a/test/MC/MachO/darwin-x86_64-nobase-relocs.s
+++ b/test/MC/MachO/darwin-x86_64-nobase-relocs.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
 
 // Test case for rdar://10062261
 
@@ -15,44 +15,58 @@ Lbar:
 	mov $1, %eax
 	ret
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 152)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 152)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 17)
-// CHECK:   ('file_offset', 184)
-// CHECK:   ('file_size', 17)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 17)
-// CHECK:     ('offset', 184)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '90eb080f 1f40000f 1f4000b8 01000000 c3')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 2
+// CHECK:   SizeOfLoadCommands: 168
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x11
+// CHECK:     Offset: 200
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 90EB080F 1F40000F 1F4000B8 01000000  |.....@...@......|
+// CHECK:       0010: C3                                   |.|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 152
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x11
+// CHECK:   fileoff: 200
+// CHECK:   filesize: 17
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
index f748064b2bf9..e7f0c5baf8df 100644
--- a/test/MC/MachO/darwin-x86_64-reloc-offsets.s
+++ b/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .data
 
@@ -114,230 +114,178 @@ L1:
  	movl  %eax, L1 + 3(%rip)
  	movl  %eax, L1 + 4(%rip)
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 358)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 358)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 318)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 728)
-// CHECK:     ('num_reloc', 42)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x13a),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x134),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x12e),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x128),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0x122),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 5
-// CHECK:     (('word-0', 0x11c),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 6
-// CHECK:     (('word-0', 0x116),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 7
-// CHECK:     (('word-0', 0x10c),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 8
-// CHECK:     (('word-0', 0x102),
-// CHECK:      ('word-1', 0x6d000000)),
-// CHECK:     # Relocation 9
-// CHECK:     (('word-0', 0xf8),
-// CHECK:      ('word-1', 0x7d000000)),
-// CHECK:     # Relocation 10
-// CHECK:     (('word-0', 0xee),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 11
-// CHECK:     (('word-0', 0xe4),
-// CHECK:      ('word-1', 0x8d000000)),
-// CHECK:     # Relocation 12
-// CHECK:     (('word-0', 0xdd),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 13
-// CHECK:     (('word-0', 0xd6),
-// CHECK:      ('word-1', 0x6d000000)),
-// CHECK:     # Relocation 14
-// CHECK:     (('word-0', 0xd0),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 15
-// CHECK:     (('word-0', 0xca),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 16
-// CHECK:     (('word-0', 0xc4),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 17
-// CHECK:     (('word-0', 0xbe),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 18
-// CHECK:     (('word-0', 0xb8),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 19
-// CHECK:     (('word-0', 0xb2),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 20
-// CHECK:     (('word-0', 0xac),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 21
-// CHECK:     (('word-0', 0xa2),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 22
-// CHECK:     (('word-0', 0x98),
-// CHECK:      ('word-1', 0x65000002)),
-// CHECK:     # Relocation 23
-// CHECK:     (('word-0', 0x8e),
-// CHECK:      ('word-1', 0x75000002)),
-// CHECK:     # Relocation 24
-// CHECK:     (('word-0', 0x84),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 25
-// CHECK:     (('word-0', 0x7a),
-// CHECK:      ('word-1', 0x85000002)),
-// CHECK:     # Relocation 26
-// CHECK:     (('word-0', 0x73),
-// CHECK:      ('word-1', 0x15000002)),
-// CHECK:     # Relocation 27
-// CHECK:     (('word-0', 0x6c),
-// CHECK:      ('word-1', 0x65000002)),
-// CHECK:     # Relocation 28
-// CHECK:     (('word-0', 0x66),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 29
-// CHECK:     (('word-0', 0x60),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 30
-// CHECK:     (('word-0', 0x5a),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 31
-// CHECK:     (('word-0', 0x54),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 32
-// CHECK:     (('word-0', 0x4e),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 33
-// CHECK:     (('word-0', 0x48),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 34
-// CHECK:     (('word-0', 0x42),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 35
-// CHECK:     (('word-0', 0x38),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 36
-// CHECK:     (('word-0', 0x2e),
-// CHECK:      ('word-1', 0x6d000000)),
-// CHECK:     # Relocation 37
-// CHECK:     (('word-0', 0x24),
-// CHECK:      ('word-1', 0x7d000000)),
-// CHECK:     # Relocation 38
-// CHECK:     (('word-0', 0x1a),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 39
-// CHECK:     (('word-0', 0x10),
-// CHECK:      ('word-1', 0x8d000000)),
-// CHECK:     # Relocation 40
-// CHECK:     (('word-0', 0x9),
-// CHECK:      ('word-1', 0x1d000000)),
-// CHECK:     # Relocation 41
-// CHECK:     (('word-0', 0x2),
-// CHECK:      ('word-1', 0x6d000000)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'c605ffff ffff12c6 05000000 0012c705 fcffffff 78563412 c705fdff ffff7856 3412c705 feffffff 78563412 c705ffff ffff7856 3412c705 00000000 78563412 88050000 00008805 01000000 89050000 00008905 01000000 89050200 00008905 03000000 89050400 0000c605 dd000000 12c605d7 00000012 c705cc00 00007856 3412c705 c3000000 78563412 c705ba00 00007856 3412c705 b1000000 78563412 c705a800 00007856 34128805 9e000000 88059900 00008905 92000000 89058d00 00008905 88000000 89058300 00008905 7e000000 c6050300 000012c6 05040000 0012c705 00000000 78563412 c7050100 00007856 3412c705 02000000 78563412 c7050300 00007856 3412c705 04000000 78563412 88050400 00008805 05000000 89050400 00008905 05000000 89050600 00008905 07000000 89050800 0000')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 318)
-// CHECK:     ('size', 40)
-// CHECK:     ('offset', 686)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 1064)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 1080)
-// CHECK:   ('strsize', 4)
-// CHECK:   ('_string_data', '\x00_d\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 350)
-// CHECK:     ('_string', '_d')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 352
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x13E
+// CHECK:     Offset: 384
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x2E8
+// CHECK:     RelocationCount: 42
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: C605FFFF FFFF12C6 05000000 0012C705  |................|
+// CHECK:       0010: FCFFFFFF 78563412 C705FDFF FFFF7856  |....xV4.......xV|
+// CHECK:       0020: 3412C705 FEFFFFFF 78563412 C705FFFF  |4.......xV4.....|
+// CHECK:       0030: FFFF7856 3412C705 00000000 78563412  |..xV4.......xV4.|
+// CHECK:       0040: 88050000 00008805 01000000 89050000  |................|
+// CHECK:       0050: 00008905 01000000 89050200 00008905  |................|
+// CHECK:       0060: 03000000 89050400 0000C605 DD000000  |................|
+// CHECK:       0070: 12C605D7 00000012 C705CC00 00007856  |..............xV|
+// CHECK:       0080: 3412C705 C3000000 78563412 C705BA00  |4.......xV4.....|
+// CHECK:       0090: 00007856 3412C705 B1000000 78563412  |..xV4.......xV4.|
+// CHECK:       00A0: C705A800 00007856 34128805 9E000000  |......xV4.......|
+// CHECK:       00B0: 88059900 00008905 92000000 89058D00  |................|
+// CHECK:       00C0: 00008905 88000000 89058300 00008905  |................|
+// CHECK:       00D0: 7E000000 C6050300 000012C6 05040000  |~...............|
+// CHECK:       00E0: 0012C705 00000000 78563412 C7050100  |........xV4.....|
+// CHECK:       00F0: 00007856 3412C705 02000000 78563412  |..xV4.......xV4.|
+// CHECK:       0100: C7050300 00007856 3412C705 04000000  |......xV4.......|
+// CHECK:       0110: 78563412 88050400 00008805 05000000  |xV4.............|
+// CHECK:       0120: 89050400 00008905 05000000 89050600  |................|
+// CHECK:       0130: 00008905 07000000 89050800 0000      |..............|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x13E
+// CHECK:     Size: 0x28
+// CHECK:     Offset: 702
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0010: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0020: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x13A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x134 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x12E 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x128 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x122 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x11C 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x116 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x10C 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x102 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK:     0xF8 1 2 1 X86_64_RELOC_SIGNED_2 0 _d
+// CHECK:     0xEE 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0xE4 1 2 1 X86_64_RELOC_SIGNED_4 0 _d
+// CHECK:     0xDD 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0xD6 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK:     0xD0 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xCA 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xC4 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xBE 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xB8 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xB2 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xAC 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0xA2 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0x98 1 2 0 X86_64_RELOC_SIGNED_1 0 __data
+// CHECK:     0x8E 1 2 0 X86_64_RELOC_SIGNED_2 0 __data
+// CHECK:     0x84 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0x7A 1 2 0 X86_64_RELOC_SIGNED_4 0 __data
+// CHECK:     0x73 1 2 0 X86_64_RELOC_SIGNED 0 __data
+// CHECK:     0x6C 1 2 0 X86_64_RELOC_SIGNED_1 0 __data
+// CHECK:     0x66 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x60 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x5A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x54 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x4E 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x48 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x42 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x38 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x2E 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK:     0x24 1 2 1 X86_64_RELOC_SIGNED_2 0 _d
+// CHECK:     0x1A 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x10 1 2 1 X86_64_RELOC_SIGNED_4 0 _d
+// CHECK:     0x9 1 2 1 X86_64_RELOC_SIGNED 0 _d
+// CHECK:     0x2 1 2 1 X86_64_RELOC_SIGNED_1 0 _d
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _d (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x15E
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x166
+// CHECK:   fileoff: 384
+// CHECK:   filesize: 358
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/data.s b/test/MC/MachO/data.s
index 0ff2854801ac..90679a989f7b 100644
--- a/test/MC/MachO/data.s
+++ b/test/MC/MachO/data.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -macho-segment | FileCheck %s
 
         .data
         .ascii "hello"
@@ -14,54 +14,67 @@
         .short 0                // 50
         .p2alignw 3, 0xABCD, 5  // 50
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 192)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 50)
-// CHECK:   ('file_offset', 220)
-// CHECK:   ('file_size', 50)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 220)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 50)
-// CHECK:     ('offset', 220)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
-
 // FIXME: Dump contents, so we can check those too.
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 2
+// CHECK:   SizeOfLoadCommands: 208
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 236
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x32
+// CHECK:     Offset: 236
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x32
+// CHECK:   fileoff: 236
+// CHECK:   filesize: 50
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/debug_frame.s b/test/MC/MachO/debug_frame.s
index 247347d252a7..d185127f4b17 100644
--- a/test/MC/MachO/debug_frame.s
+++ b/test/MC/MachO/debug_frame.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin %s -filetype=obj -o - | llvm-readobj -s -sd -r | FileCheck %s
 
 // Make sure MC can handle file level .cfi_startproc and .cfi_endproc that creates
 // an empty frame.
@@ -26,23 +26,33 @@ Leh_func_end0:
 	.cfi_sections .debug_frame
 Ltext_end:
 
-// CHECK:       (('section_name', '__debug_frame\x00\x00\x00')
-// CHECK-NEXT:   ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:   ('address', 8)
-// CHECK-NEXT:   ('size', 52)
-// CHECK-NEXT:   ('offset', 332)
-// CHECK-NEXT:   ('alignment', 2)
-// CHECK-NEXT:   ('reloc_offset', 384)
-// CHECK-NEXT:   ('num_reloc', 2)
-// CHECK-NEXT:   ('flags', 0x2000000)
-// CHECK-NEXT:   ('reserved1', 0)
-// CHECK-NEXT:   ('reserved2', 0)
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ('_relocations', [
-// CHECK-NEXT:   # Relocation 0
-// CHECK-NEXT:   (('word-0', 0x2c),
-// CHECK-NEXT:    ('word-1', 0x4000001)),
-// CHECK-NEXT:   # Relocation 1
-// CHECK-NEXT:   (('word-0', 0x1c),
-// CHECK-NEXT:    ('word-1', 0x4000001)),
-// CHECK-NEXT: ])
+// CHECK: Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __debug_frame (5F 5F 64 65 62 75 67 5F 66 72 61 6D 65 00 00 00)
+// CHECK:     Segment: __DWARF (5F 5F 44 57 41 52 46 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x8
+// CHECK:     Size: 0x34
+// CHECK:     Offset: 332
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x180
+// CHECK:     RelocationCount: 2
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x20000)
+// CHECK:       Debug (0x20000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 10000000 FFFFFFFF 04000400 017C080C  |.............|..|
+// CHECK:       0010: 04048801 0C000000 00000000 00000000  |................|
+// CHECK:       0020: 00000000 0C000000 00000000 00000000  |................|
+// CHECK:       0030: 06000000                             |....|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __debug_frame {
+// CHECK:     0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK:     0x1C 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/diff-with-two-sections.s b/test/MC/MachO/diff-with-two-sections.s
index b5e09885f318..15784afad558 100644
--- a/test/MC/MachO/diff-with-two-sections.s
+++ b/test/MC/MachO/diff-with-two-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment -macho-version-min | FileCheck %s
 
 	.section	__TEXT,__text,regular,pure_instructions
 Leh_func_begin0:
@@ -7,58 +7,81 @@ Ltmp3:
 Ltmp4 = Leh_func_begin0-Ltmp3
 	.long	Ltmp4
 
-// CHECK:      ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 1)
-// CHECK-NEXT: ('load_commands_size', 192)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT:   # Load Command 0
-// CHECK-NEXT:  (('command', 1)
-// CHECK-NEXT:   ('size', 192)
-// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:   ('vm_addr', 0)
-// CHECK-NEXT:   ('vm_size', 4)
-// CHECK-NEXT:   ('file_offset', 220)
-// CHECK-NEXT:   ('file_size', 4)
-// CHECK-NEXT:   ('maxprot', 7)
-// CHECK-NEXT:   ('initprot', 7)
-// CHECK-NEXT:   ('num_sections', 2)
-// CHECK-NEXT:   ('flags', 0)
-// CHECK-NEXT:   ('sections', [
-// CHECK-NEXT:     # Section 0
-// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 0)
-// CHECK-NEXT:     ('size', 0)
-// CHECK-NEXT:     ('offset', 220)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 0)
-// CHECK-NEXT:     ('num_reloc', 0)
-// CHECK-NEXT:     ('flags', 0x80000000)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '')
-// CHECK-NEXT:     # Section 1
-// CHECK-NEXT:    (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 0)
-// CHECK-NEXT:     ('size', 4)
-// CHECK-NEXT:     ('offset', 220)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 0)
-// CHECK-NEXT:     ('num_reloc', 0)
-// CHECK-NEXT:     ('flags', 0x6800000b)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '00000000')
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 32-bit i386
+// CHECK-NEXT: Arch: i386
+// CHECK-NEXT: AddressSize: 32bit
+// CHECK-NEXT: MachHeader {
+// CHECK-NEXT:   Magic: Magic (0xFEEDFACE)
+// CHECK-NEXT:   CpuType: X86 (0x7)
+// CHECK-NEXT:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-NEXT:   FileType: Relocatable (0x1)
+// CHECK-NEXT:   NumOfLoadCommands: 2
+// CHECK-NEXT:   SizeOfLoadCommands: 208
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Sections [
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 0
+// CHECK-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Size: 0x0
+// CHECK-NEXT:     Offset: 236
+// CHECK-NEXT:     Alignment: 0
+// CHECK-NEXT:     RelocationOffset: 0x0
+// CHECK-NEXT:     RelocationCount: 0
+// CHECK-NEXT:     Type: 0x0
+// CHECK-NEXT:     Attributes [ (0x800000)
+// CHECK-NEXT:       PureInstructions (0x800000)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Reserved1: 0x0
+// CHECK-NEXT:     Reserved2: 0x0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 1
+// CHECK-NEXT:     Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Size: 0x4
+// CHECK-NEXT:     Offset: 236
+// CHECK-NEXT:     Alignment: 0
+// CHECK-NEXT:     RelocationOffset: 0x0
+// CHECK-NEXT:     RelocationCount: 0
+// CHECK-NEXT:     Type: 0xB
+// CHECK-NEXT:     Attributes [ (0x680000)
+// CHECK-NEXT:       LiveSupport (0x80000)
+// CHECK-NEXT:       NoTOC (0x400000)
+// CHECK-NEXT:       StripStaticSyms (0x200000)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Reserved1: 0x0
+// CHECK-NEXT:     Reserved2: 0x0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000                             |....|
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT: ]
+// CHECK-NEXT: Segment {
+// CHECK-NEXT:   Cmd: LC_SEGMENT
+// CHECK-NEXT:   Name: 
+// CHECK-NEXT:   Size: 192
+// CHECK-NEXT:   vmaddr: 0x0
+// CHECK-NEXT:   vmsize: 0x4
+// CHECK-NEXT:   fileoff: 236
+// CHECK-NEXT:   filesize: 4
+// CHECK-NEXT:   maxprot: rwx
+// CHECK-NEXT:   initprot: rwx
+// CHECK-NEXT:   nsects: 2
+// CHECK-NEXT:   flags: 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: MinVersion {
+// CHECK-NEXT:   Cmd: LC_VERSION_MIN_MACOSX
+// CHECK-NEXT:   Size: 16
+// CHECK-NEXT:   Version: 9.0
+// CHECK-NEXT:   SDK: n/a
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/direction_labels.s b/test/MC/MachO/direction_labels.s
index e224ed3a1473..345a7521d7a6 100644
--- a/test/MC/MachO/direction_labels.s
+++ b/test/MC/MachO/direction_labels.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 direction_labels:
 10:     nop
@@ -11,85 +11,88 @@ direction_labels:
 11:     nop
         ret
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 13)
-// CHECK:   ('file_offset', 256)
-// CHECK:   ('file_size', 13)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 13)
-// CHECK:     ('offset', 256)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '90ebfd90 75009075 fdeb0090 c3')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 272)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 284)
-// CHECK:   ('strsize', 20)
-// CHECK:   ('_string_data', '\x00direction_labels\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'direction_labels')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 244
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xD
+// CHECK:     Offset: 272
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: direction_labels (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 124
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xD
+// CHECK:   fileoff: 272
+// CHECK:   filesize: 13
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/empty-twice.ll b/test/MC/MachO/empty-twice.ll
new file mode 100644
index 000000000000..6914c73a58d1
--- /dev/null
+++ b/test/MC/MachO/empty-twice.ll
@@ -0,0 +1,12 @@
+; Check that there is no persistent state in the MachO emitter that crashes
+; us when reusing the pass manager.
+; RUN: llc -mtriple=x86_64-apple-darwin -compile-twice -filetype=obj %s -o -
+
+; Force the creation of a DWARF section
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: true)
+!1 = !DIFile(filename: "<stdin>", directory: "/")
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
index a7d6c20b885f..108e3bbf173f 100644
--- a/test/MC/MachO/file.s
+++ b/test/MC/MachO/file.s
@@ -9,7 +9,7 @@
 // CHECK-NEXT:    Segment: __DWARF
 // CHECK-NEXT:    Address: 0x1
 // CHECK-NEXT:    Size: 0x28
-// CHECK-NEXT:    Offset: 221
+// CHECK-NEXT:    Offset: 237
 // CHECK-NEXT:    Alignment: 0
 // CHECK-NEXT:    RelocationOffset: 0x0
 // CHECK-NEXT:    RelocationCount: 0
diff --git a/test/MC/MachO/gen-dwarf.s b/test/MC/MachO/gen-dwarf.s
index ad0a562aaf70..22a8e93799d6 100644
--- a/test/MC/MachO/gen-dwarf.s
+++ b/test/MC/MachO/gen-dwarf.s
@@ -17,7 +17,7 @@ _x:	.long 1
 // CHECK: .debug_abbrev contents:
 // CHECK: Abbrev table for offset: 0x00000000
 // CHECK: [1] DW_TAG_compile_unit	DW_CHILDREN_yes
-// CHECK: 	DW_AT_stmt_list	DW_FORM_data4
+// CHECK: 	DW_AT_stmt_list	DW_FORM_sec_offset
 // CHECK: 	DW_AT_low_pc	DW_FORM_addr
 // CHECK: 	DW_AT_high_pc	DW_FORM_addr
 // CHECK: 	DW_AT_name	DW_FORM_string
@@ -39,7 +39,7 @@ _x:	.long 1
 
 // We don't check the leading addresses these are at.
 // CHECK:  DW_TAG_compile_unit [1] *
-// CHECK:    DW_AT_stmt_list [DW_FORM_data4]	(0x00000000)
+// CHECK:    DW_AT_stmt_list [DW_FORM_sec_offset]	(0x00000000)
 // CHECK:    DW_AT_low_pc [DW_FORM_addr]	(0x0000000000000000)
 // CHECK:    DW_AT_high_pc [DW_FORM_addr]	(0x0000000000000008)
 // We don't check the file name as it is a temp directory
diff --git a/test/MC/MachO/i386-large-relocations.s b/test/MC/MachO/i386-large-relocations.s
index e5a1cfb2c5ef..e8805d37e4a1 100644
--- a/test/MC/MachO/i386-large-relocations.s
+++ b/test/MC/MachO/i386-large-relocations.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 .space 0x1ed280
        .section        __DATA,__const
@@ -20,17 +20,11 @@ _foo:
 // so the assembler falls back to non-scattered relocations.
 // rdar://12358909
 
-// CHECK: ('_relocations', [
-// CHECK:   # Relocation 0
-// CHECK:   (('word-0', 0x5181034),
-// CHECK:    ('word-1', 0x4000003)),
-// CHECK:   # Relocation 1
-// CHECK:   (('word-0', 0x518102c),
-// CHECK:    ('word-1', 0x4000003)),
-// CHECK:   # Relocation 2
-// CHECK:   (('word-0', 0x5181028),
-// CHECK:    ('word-1', 0x4000003)),
-// CHECK:   # Relocation 3
-// CHECK:   (('word-0', 0x5181020),
-// CHECK:    ('word-1', 0x4000003)),
-// CHECK: ])
+// CHECK: Relocations [
+// CHECK:   Section __const {
+// CHECK:     0x5181034 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK:     0x518102C 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK:     0x5181028 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK:     0x5181020 0 2 0 GENERIC_RELOC_VANILLA 0 __bss
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/indirect-symbols.s b/test/MC/MachO/indirect-symbols.s
index 079576833cfe..4ab384821d9e 100644
--- a/test/MC/MachO/indirect-symbols.s
+++ b/test/MC/MachO/indirect-symbols.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 _b:
         _c = 0
@@ -20,169 +20,200 @@ _e:
 .indirect_symbol _f
 	.long	0
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 260)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 27)
-// CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 27)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__jump_table\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 15)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x84000008)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 5)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', 'f4f4f4f4 f4f4f4f4 f4f4f4f4 f4f4f4')
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__pointers\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 15)
-// CHECK:     ('size', 12)
-// CHECK:     ('offset', 407)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x6)
-// CHECK:     ('reserved1', 3)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 444)
-// CHECK:   ('nsyms', 6)
-// CHECK:   ('stroff', 516)
-// CHECK:   ('strsize', 20)
-// CHECK:   ('_string_data', '\x00_f\x00_e\x00_d\x00_c\x00_b\x00_a\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_b')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 10)
-// CHECK:     ('n_type', 0x2)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_c')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 4)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_e')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0x2)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_f')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 16)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 1)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_a')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 7)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_d')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 4)
-// CHECK:   ('iextdefsym', 4)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 2)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 420)
-// CHECK:   ('nindirectsyms', 6)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:     # Indirect Symbol 0
-// CHECK:     (('symbol_index', 0x4),),
-// CHECK:     # Indirect Symbol 1
-// CHECK:     (('symbol_index', 0x0),),
-// CHECK:     # Indirect Symbol 2
-// CHECK:     (('symbol_index', 0x1),),
-// CHECK:     # Indirect Symbol 3
-// CHECK:     (('symbol_index', 0x5),),
-// CHECK:     # Indirect Symbol 4
-// CHECK:     (('symbol_index', 0x80000000),),
-// CHECK:     # Indirect Symbol 5
-// CHECK:     (('symbol_index', 0xc0000000),),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 380
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __jump_table (5F 5F 6A 75 6D 70 5F 74 61 62 6C 65 00 00 00 00)
+// CHECK:     Segment: __IMPORT (5F 5F 49 4D 50 4F 52 54 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xF
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x840000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SelfModifyingCode (0x40000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x5
+// CHECK:     SectionData (
+// CHECK:       0000: F4F4F4F4 F4F4F4F4 F4F4F4F4 F4F4F4    |...............|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __pointers (5F 5F 70 6F 69 6E 74 65 72 73 00 00 00 00 00 00)
+// CHECK:     Segment: __IMPORT (5F 5F 49 4D 50 4F 52 54 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0xF
+// CHECK:     Size: 0xC
+// CHECK:     Offset: 423
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x6
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x3
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000           |............|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _b (13)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _c (10)
+// CHECK:     Type: Abs (0x2)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _e (4)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _f (1)
+// CHECK:     Type: Abs (0x2)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _a (16)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _d (7)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 6
+// CHECK:   Symbols [
+// CHECK:     Entry {
+// CHECK:       Entry Index: 0
+// CHECK:       Symbol Index: 0x4
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 1
+// CHECK:       Symbol Index: 0x0
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 2
+// CHECK:       Symbol Index: 0x1
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 3
+// CHECK:       Symbol Index: 0x5
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 4
+// CHECK:       Symbol Index: 0x80000000
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 5
+// CHECK:       Symbol Index: 0xC0000000
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 260
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x1B
+// CHECK:   fileoff: 408
+// CHECK:   filesize: 27
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 3
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 4
+// CHECK:   iextdefsym: 4
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 2
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 436
+// CHECK:   nindirectsyms: 6
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/jcc.s b/test/MC/MachO/jcc.s
index 2288a20fa273..caff25714a0c 100644
--- a/test/MC/MachO/jcc.s
+++ b/test/MC/MachO/jcc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
 
    ja 1f
 1: nop
@@ -65,42 +65,60 @@
    jz 1f
 1: nop
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 124)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 96)
-// CHECK:   ('file_offset', 152)
-// CHECK:   ('file_size', 96)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 96)
-// CHECK:     ('offset', 152)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '77009073 00907200 90760090 720090e3 0090e300 90740090 7f00907d 00907c00 907e0090 76009072 00907300 90770090 73009075 00907e00 907c0090 7d00907f 00907100 907b0090 79009075 00907000 907a0090 7a00907b 00907800 90740090')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 2
+// CHECK:   SizeOfLoadCommands: 140
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x60
+// CHECK:     Offset: 168
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 77009073 00907200 90760090 720090E3  |w..s..r..v..r...|
+// CHECK:       0010: 0090E300 90740090 7F00907D 00907C00  |.....t.....}..|.|
+// CHECK:       0020: 907E0090 76009072 00907300 90770090  |.~..v..r..s..w..|
+// CHECK:       0030: 73009075 00907E00 907C0090 7D00907F  |s..u..~..|..}...|
+// CHECK:       0040: 00907100 907B0090 79009075 00907000  |..q..{..y..u..p.|
+// CHECK:       0050: 907A0090 7A00907B 00907800 90740090  |.z..z..{..x..t..|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 124
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x60
+// CHECK:   fileoff: 168
+// CHECK:   filesize: 96
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/lcomm-attributes.s b/test/MC/MachO/lcomm-attributes.s
index 6e49e8016d1c..5f902ebac4f6 100644
--- a/test/MC/MachO/lcomm-attributes.s
+++ b/test/MC/MachO/lcomm-attributes.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         // Note, this test intentionally mismatches Darwin 'as', which loses the
 	// following global marker.
@@ -14,123 +14,134 @@
         .zerofill __DATA, __bss, sym_zfill_ext_B, 4
         .globl sym_zfill_ext_B
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 16)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 16)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x1)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 324)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 372)
-// CHECK:   ('strsize', 68)
-// CHECK:   ('_string_data', '\x00sym_lcomm_ext_B\x00sym_zfill_ext_B\x00sym_lcomm_ext_A\x00sym_zfill_ext_A\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 33)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lcomm_ext_A')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'sym_lcomm_ext_B')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 49)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'sym_zfill_ext_A')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 17)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 12)
-// CHECK:     ('_string', 'sym_zfill_ext_B')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 0)
-// CHECK:   ('iextdefsym', 0)
-// CHECK:   ('nextdefsym', 4)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x10
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: LocReloc (0x1)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_ext_A (33)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_ext_B (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_zfill_ext_A (49)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_zfill_ext_B (17)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xC
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x10
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 0
+// CHECK:   iextdefsym: 0
+// CHECK:   nextdefsym: 4
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/linker-option-2.s b/test/MC/MachO/linker-option-2.s
index bb5966be2734..415d02e536b3 100644
--- a/test/MC/MachO/linker-option-2.s
+++ b/test/MC/MachO/linker-option-2.s
@@ -1,25 +1,18 @@
-// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | macho-dump | FileCheck %s
-
-// CHECK: ('load_commands_size', 104)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 45)
-// CHECK:   ('size', 16)
-// CHECK:   ('count', 1)
-// CHECK:   ('_strings', [
-// CHECK: 	"a",
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 45)
-// CHECK:   ('size', 16)
-// CHECK:   ('count', 2)
-// CHECK:   ('_strings', [
-// CHECK: 	"a",
-// CHECK: 	"b",
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// RUN: llvm-mc -n -triple x86_64-apple-darwin10 %s -filetype=obj | llvm-readobj -macho-linker-options | FileCheck %s
 
 .linker_option "a"
 .linker_option "a", "b"
+
+// CHECK: Linker Options {
+// CHECK:   Size: 16
+// CHECK:   Strings [
+// CHECK:     Value: a
+// CHECK:   ]
+// CHECK: }
+// CHECK: Linker Options {
+// CHECK:   Size: 16
+// CHECK:   Strings [
+// CHECK:     Value: a
+// CHECK:     Value: b
+// CHECK:   ]
+// CHECK: }
diff --git a/test/MC/MachO/linker-options.ll b/test/MC/MachO/linker-options.ll
index 2cda835c100c..09ebd0f91567 100644
--- a/test/MC/MachO/linker-options.ll
+++ b/test/MC/MachO/linker-options.ll
@@ -4,35 +4,28 @@
 ; CHECK-ASM: .linker_option "-lz"
 ; CHECK-ASM-NEXT: .linker_option "-framework", "Cocoa"
 
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump > %t
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-readobj -macho-linker-options > %t
 ; RUN: FileCheck --check-prefix=CHECK-OBJ < %t %s
 
-; CHECK-OBJ: ('load_commands', [
-; CHECK-OBJ:   # Load Command 1
-; CHECK-OBJ:  (('command', 45)
-; CHECK-OBJ:   ('size', 16)
-; CHECK-OBJ:   ('count', 1)
-; CHECK-OBJ:   ('_strings', [
-; CHECK-OBJ: 	"-lz",
-; CHECK-OBJ:   ])
-; CHECK-OBJ:  ),
-; CHECK-OBJ:   # Load Command 2
-; CHECK-OBJ:  (('command', 45)
-; CHECK-OBJ:   ('size', 32)
-; CHECK-OBJ:   ('count', 2)
-; CHECK-OBJ:   ('_strings', [
-; CHECK-OBJ: 	"-framework",
-; CHECK-OBJ: 	"Cocoa",
-; CHECK-OBJ:   ])
-; CHECK-OBJ:   # Load Command 3
-; CHECK-OBJ:  (('command', 45)
-; CHECK-OBJ:   ('size', 24)
-; CHECK-OBJ:   ('count', 1)
-; CHECK-OBJ:   ('_strings', [
-; CHECK-OBJ: 	"-lmath",
-; CHECK-OBJ:   ])
-; CHECK-OBJ:  ),
-; CHECK-OBJ: ])
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ:   Size: 16
+; CHECK-OBJ:   Strings [
+; CHECK-OBJ:     Value: -lz
+; CHECK-OBJ:   ]
+; CHECK-OBJ: }
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ:   Size: 32
+; CHECK-OBJ:   Strings [
+; CHECK-OBJ:     Value: -framework
+; CHECK-OBJ:     Value: Cocoa
+; CHECK-OBJ:   ]
+; CHECK-OBJ: }
+; CHECK-OBJ: Linker Options {
+; CHECK-OBJ:   Size: 24
+; CHECK-OBJ:   Strings [
+; CHECK-OBJ:     Value: -lmath
+; CHECK-OBJ:   ]
+; CHECK-OBJ: }
 
 !0 = !{i32 6, !"Linker Options", !{!{!"-lz"}, !{!"-framework", !"Cocoa"}, !{!"-lmath"}}}
 
diff --git a/test/MC/MachO/loc.s b/test/MC/MachO/loc.s
index 6e7faa3bf9aa..c1a2edd60909 100644
--- a/test/MC/MachO/loc.s
+++ b/test/MC/MachO/loc.s
@@ -1,25 +1,35 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r -s -sd | FileCheck %s
 
         .file	1 "foo"
 	.loc	1 64 0
         nop
 
-// CHECK:         # Section 1
-// CHECK-NEXT:   (('section_name', '__debug_line\x00\x00\x00\x00')
-// CHECK-NEXT:    ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:    ('address', 1)
-// CHECK-NEXT:    ('size', 51)
-// CHECK-NEXT:    ('offset', 221)
-// CHECK-NEXT:    ('alignment', 0)
-// CHECK-NEXT:    ('reloc_offset', 272)
-// CHECK-NEXT:    ('num_reloc', 1)
-// CHECK-NEXT:    ('flags', 0x2000000)
-// CHECK-NEXT:    ('reserved1', 0)
-// CHECK-NEXT:    ('reserved2', 0)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('word-0', 0x27),
-// CHECK-NEXT:     ('word-1', 0x4000001)),
-// CHECK-NEXT:  ])
-// CHECK-NEXT:  ('_section_data', '2f000000 02001a00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f00 00000000 00050200 00000003 3f010201 000101')
+// CHECK: Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __debug_line (5F 5F 64 65 62 75 67 5F 6C 69 6E 65 00 00 00 00)
+// CHECK:     Segment: __DWARF (5F 5F 44 57 41 52 46 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x1
+// CHECK:     Size: 0x33
+// CHECK:     Offset: 237
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x120
+// CHECK:     RelocationCount: 1
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x20000)
+// CHECK:       Debug (0x20000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 2F000000 02001A00 00000101 FB0E0D00  |/...............|
+// CHECK:       0010: 01010101 00000001 00000100 666F6F00  |............foo.|
+// CHECK:       0020: 00000000 00050200 00000003 3F010201  |............?...|
+// CHECK:       0030: 000101                               |...|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __debug_line {
+// CHECK:     0x27 0 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/osx-version-min-load-command.s b/test/MC/MachO/osx-version-min-load-command.s
index cb62565cef9a..2218d556bce5 100644
--- a/test/MC/MachO/osx-version-min-load-command.s
+++ b/test/MC/MachO/osx-version-min-load-command.s
@@ -1,10 +1,16 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj --macho-version-min | FileCheck %s
 
 // Test the formation of the version-min load command in the MachO.
 // use a nonsense but well formed version.
 .macosx_version_min 25,3,1
-// CHECK:  (('command', 36)
-// CHECK:   ('size', 16)
-// CHECK:   ('version, 1639169)
-// CHECK:   ('sdk, 0)
-// CHECK:  ),
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MinVersion {
+// CHECK:   Cmd: LC_VERSION_MIN_MACOSX
+// CHECK:   Size: 16
+// CHECK:   Version: 25.3.1
+// CHECK:   SDK: n/a
+// CHECK: }
diff --git a/test/MC/MachO/pcrel-to-other-section.s b/test/MC/MachO/pcrel-to-other-section.s
index 22a7822d9576..cf46b0dcb0f7 100644
--- a/test/MC/MachO/pcrel-to-other-section.s
+++ b/test/MC/MachO/pcrel-to-other-section.s
@@ -1,107 +1,119 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 nop
 	.section	__TEXT,__StaticInit,regular,pure_instructions
 	calll	foo
 
-// CHECK:      ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 296)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT:   # Load Command 0
-// CHECK-NEXT:  (('command', 1)
-// CHECK-NEXT:   ('size', 192)
-// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:   ('vm_addr', 0)
-// CHECK-NEXT:   ('vm_size', 6)
-// CHECK-NEXT:   ('file_offset', 324)
-// CHECK-NEXT:   ('file_size', 6)
-// CHECK-NEXT:   ('maxprot', 7)
-// CHECK-NEXT:   ('initprot', 7)
-// CHECK-NEXT:   ('num_sections', 2)
-// CHECK-NEXT:   ('flags', 0)
-// CHECK-NEXT:   ('sections', [
-// CHECK-NEXT:     # Section 0
-// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 0)
-// CHECK-NEXT:     ('size', 1)
-// CHECK-NEXT:     ('offset', 324)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 0)
-// CHECK-NEXT:     ('num_reloc', 0)
-// CHECK-NEXT:     ('flags', 0x80000400)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '90')
-// CHECK-NEXT:     # Section 1
-// CHECK-NEXT:    (('section_name', '__StaticInit\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 1)
-// CHECK-NEXT:     ('size', 5)
-// CHECK-NEXT:     ('offset', 325)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 332)
-// CHECK-NEXT:     ('num_reloc', 1)
-// CHECK-NEXT:     ('flags', 0x80000400)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:     # Relocation 0
-// CHECK-NEXT:     (('word-0', 0x1),
-// CHECK-NEXT:      ('word-1', 0xd000000)),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', 'e8faffff ff')
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT:   # Load Command 1
-// CHECK-NEXT:  (('command', 2)
-// CHECK-NEXT:   ('size', 24)
-// CHECK-NEXT:   ('symoff', 340)
-// CHECK-NEXT:   ('nsyms', 1)
-// CHECK-NEXT:   ('stroff', 352)
-// CHECK-NEXT:   ('strsize', 8)
-// CHECK-NEXT:   ('_string_data', '\x00foo\x00\x00\x00\x00')
-// CHECK-NEXT:   ('_symbols', [
-// CHECK-NEXT:     # Symbol 0
-// CHECK-NEXT:    (('n_strx', 1)
-// CHECK-NEXT:     ('n_type', 0x1)
-// CHECK-NEXT:     ('n_sect', 0)
-// CHECK-NEXT:     ('n_desc', 0)
-// CHECK-NEXT:     ('n_value', 0)
-// CHECK-NEXT:     ('_string', 'foo')
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT:   # Load Command 2
-// CHECK-NEXT:  (('command', 11)
-// CHECK-NEXT:   ('size', 80)
-// CHECK-NEXT:   ('ilocalsym', 0)
-// CHECK-NEXT:   ('nlocalsym', 0)
-// CHECK-NEXT:   ('iextdefsym', 0)
-// CHECK-NEXT:   ('nextdefsym', 0)
-// CHECK-NEXT:   ('iundefsym', 0)
-// CHECK-NEXT:   ('nundefsym', 1)
-// CHECK-NEXT:   ('tocoff', 0)
-// CHECK-NEXT:   ('ntoc', 0)
-// CHECK-NEXT:   ('modtaboff', 0)
-// CHECK-NEXT:   ('nmodtab', 0)
-// CHECK-NEXT:   ('extrefsymoff', 0)
-// CHECK-NEXT:   ('nextrefsyms', 0)
-// CHECK-NEXT:   ('indirectsymoff', 0)
-// CHECK-NEXT:   ('nindirectsyms', 0)
-// CHECK-NEXT:   ('extreloff', 0)
-// CHECK-NEXT:   ('nextrel', 0)
-// CHECK-NEXT:   ('locreloff', 0)
-// CHECK-NEXT:   ('nlocrel', 0)
-// CHECK-NEXT:   ('_indirect_symbols', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 90                                   |.|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __StaticInit (5F 5F 53 74 61 74 69 63 49 6E 69 74 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x1
+// CHECK:     Size: 0x5
+// CHECK:     Offset: 341
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x15C
+// CHECK:     RelocationCount: 1
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: E8FAFFFF FF                          |.....|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __StaticInit {
+// CHECK:     0x1 1 2 1 GENERIC_RELOC_VANILLA 0 foo
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: foo (1)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x6
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 6
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 0
+// CHECK:   iextdefsym: 0
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 0
+// CHECK:   nundefsym: 1
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/relax-jumps.s b/test/MC/MachO/relax-jumps.s
index 65a51e92b37c..ab68eb1d6c22 100644
--- a/test/MC/MachO/relax-jumps.s
+++ b/test/MC/MachO/relax-jumps.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
 
 // FIXME: This is a horrible way of checking the output, we need an llvm-mc
 // based 'otool'. Use:
@@ -10,10 +10,6 @@
 //    diff $f.{as,mc}.dump)
 // to examine the results in a more sensible fashion.
 
-// CHECK: ('_section_data', '90
-// CHECK: 0f8432ff ffff0f82 e6000000 0f8726ff ffff0f8f da000000 0f881aff ffff0f83 ce000000 0f890eff ffff90
-// CHECK: 9031c0')
-
 L1:
         .space 200, 0x90
 
@@ -29,3 +25,12 @@ L1:
 L2:
 
         xorl %eax, %eax
+
+// CHECK: SectionData (
+// CHECK:   00C0: 90909090 90909090 0F8432FF FFFF0F82  |..........2.....|
+// CHECK:   00D0: E6000000 0F8726FF FFFF0F8F DA000000  |......&.........|
+// CHECK:   00E0: 0F881AFF FFFF0F83 CE000000 0F890EFF  |................|
+// CHECK:   00F0: FFFF9090 90909090 90909090 90909090  |................|
+// CHECK:   01A0: 90909090 90909090 90909090 90909090  |................|
+// CHECK:   01B0: 90909090 90909090 909031C0           |..........1.|
+// CHECK: )
diff --git a/test/MC/MachO/relax-recompute-align.s b/test/MC/MachO/relax-recompute-align.s
index 249402502f71..1369bcdf5aee 100644
--- a/test/MC/MachO/relax-recompute-align.s
+++ b/test/MC/MachO/relax-recompute-align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s | FileCheck %s
 
 // FIXME: This is a horrible way of checking the output, we need an llvm-mc
 // based 'otool'.
@@ -8,20 +8,6 @@
 // recomputed -- otherwise the second jump will appear to be out-of-range for a
 // 1-byte jump.
 
-// CHECK:  # Section 0
-// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:  ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:  ('address', 0)
-// CHECK:  ('size', 306)
-// CHECK:  ('offset', 324)
-// CHECK:  ('alignment', 4)
-// CHECK:  ('reloc_offset', 0)
-// CHECK:  ('num_reloc', 0)
-// CHECK:  ('flags', 0x80000400)
-// CHECK:  ('reserved1', 0)
-// CHECK:  ('reserved2', 0)
-// CHECK: ),
-
 L0:
         .space 0x8a, 0x90
 	jmp	L0
@@ -35,3 +21,22 @@ L1:
 L2:
 
 .zerofill __DATA,__bss,_sym,4,2
+
+// CHECK: Section {
+// CHECK-NEXT:   Index: 0
+// CHECK-NEXT:   Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:   Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:   Address: 0x0
+// CHECK-NEXT:   Size: 0x132
+// CHECK-NEXT:   Offset: 340
+// CHECK-NEXT:   Alignment: 4
+// CHECK-NEXT:   RelocationOffset: 0x0
+// CHECK-NEXT:   RelocationCount: 0
+// CHECK-NEXT:   Type: 0x0
+// CHECK-NEXT:   Attributes [ (0x800004)
+// CHECK-NEXT:     PureInstructions (0x800000)
+// CHECK-NEXT:     SomeInstructions (0x4)
+// CHECK-NEXT:   ]
+// CHECK-NEXT:   Reserved1: 0x0
+// CHECK-NEXT:   Reserved2: 0x0
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/reloc-diff.s b/test/MC/MachO/reloc-diff.s
index a63a413f24b6..f9d58d97e529 100644
--- a/test/MC/MachO/reloc-diff.s
+++ b/test/MC/MachO/reloc-diff.s
@@ -1,30 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xa4000010),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xa1000000),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0xa4000008),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0xa1000000),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0xa4000004),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0xa1000000),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0xa2000000),
-// CHECK:  ('word-1', 0x0)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0xa1000000),
-// CHECK:  ('word-1', 0x0)),
-// CHECK-NEXT: ])
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
 _local_def:
         .globl _external_def
@@ -41,3 +15,16 @@ Ltemp:
 
         .long _local_def - Ltemp
         .long _external_def - Ltemp
+
+// CHECK: Relocations [
+// CHECK-NEXT:   Section __data {
+// CHECK-NEXT:     0x10 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT:     0x8 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT:     0x4 0 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x0
+// CHECK-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT:     0x0 0 2 n/a GENERIC_RELOC_SECTDIFF 1 0x0
+// CHECK-NEXT:     0x0 0 2 n/a GENERIC_RELOC_PAIR 1 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/reloc-pcrel-offset.s b/test/MC/MachO/reloc-pcrel-offset.s
index e113e9616cc0..d4d6ddf48ddf 100644
--- a/test/MC/MachO/reloc-pcrel-offset.s
+++ b/test/MC/MachO/reloc-pcrel-offset.s
@@ -1,10 +1,4 @@
-// RUN: llvm-mc -n -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0x1),
-// CHECK: ('word-1', 0x5000002)),
-// CHECK-NEXT: ])
-// CHECK: ('_section_data', 'e8fbffff ff')
+// RUN: llvm-mc -n -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r -s -sd | FileCheck %s
 
         .data
         .long 0
@@ -15,3 +9,28 @@ _b:
         call _a
 
         .subsections_via_symbols
+
+// CHECK: Section {
+// CHECK:   Index: 0
+// CHECK:   Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:   Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:   Address: 0x0
+// CHECK:   Size: 0x4
+// CHECK:   Offset: 340
+// CHECK:   Alignment: 0
+// CHECK:   RelocationOffset: 0x0
+// CHECK:   RelocationCount: 0
+// CHECK:   Type: 0x0
+// CHECK:   Attributes [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved1: 0x0
+// CHECK:   Reserved2: 0x0
+// CHECK:   SectionData (
+// CHECK:     0000: 00000000                             |....|
+// CHECK:   )
+// CHECK: }
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x1 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/reloc-pcrel.s b/test/MC/MachO/reloc-pcrel.s
index 11334150368a..88f8828e6874 100644
--- a/test/MC/MachO/reloc-pcrel.s
+++ b/test/MC/MachO/reloc-pcrel.s
@@ -1,39 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-
-// CHECK: # Relocation 0
-// CHECK: (('word-0', 0xe4000045),
-// CHECK:  ('word-1', 0x4)),
-// CHECK: # Relocation 1
-// CHECK: (('word-0', 0xe1000000),
-// CHECK:  ('word-1', 0x6)),
-// CHECK: # Relocation 2
-// CHECK: (('word-0', 0x40),
-// CHECK:  ('word-1', 0xd000003)),
-// CHECK: # Relocation 3
-// CHECK: (('word-0', 0x3b),
-// CHECK:  ('word-1', 0xd000003)),
-// CHECK: # Relocation 4
-// CHECK: (('word-0', 0x36),
-// CHECK:  ('word-1', 0xd000003)),
-// CHECK: # Relocation 5
-// CHECK: (('word-0', 0xe0000031),
-// CHECK:  ('word-1', 0x4)),
-// CHECK: # Relocation 6
-// CHECK: (('word-0', 0xe000002c),
-// CHECK:  ('word-1', 0x4)),
-// CHECK: # Relocation 7
-// CHECK: (('word-0', 0x27),
-// CHECK:  ('word-1', 0x5000001)),
-// CHECK: # Relocation 8
-// CHECK: (('word-0', 0xe0000022),
-// CHECK:  ('word-1', 0x2)),
-// CHECK: # Relocation 9
-// CHECK: (('word-0', 0xe000001d),
-// CHECK:  ('word-1', 0x2)),
-// CHECK: # Relocation 10
-// CHECK: (('word-0', 0x18),
-// CHECK:  ('word-1', 0x5000001)),
-// CHECK-NEXT: ])
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -r | FileCheck %s
 
         xorl %eax,%eax
 
@@ -63,3 +28,19 @@ L1:
         call _b - L0
 
         .subsections_via_symbols
+
+// CHECK: Relocations [
+// CHECK-NEXT:   Section __text {
+// CHECK-NEXT:     0x45 1 2 n/a GENERIC_RELOC_LOCAL_SECTDIFF 1 0x4
+// CHECK-NEXT:     0x0 1 2 n/a GENERIC_RELOC_PAIR 1 0x6
+// CHECK-NEXT:     0x40 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT:     0x3B 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT:     0x36 1 2 1 GENERIC_RELOC_VANILLA 0 _c
+// CHECK-NEXT:     0x31 1 2 n/a GENERIC_RELOC_VANILLA 1 0x4
+// CHECK-NEXT:     0x2C 1 2 n/a GENERIC_RELOC_VANILLA 1 0x4
+// CHECK-NEXT:     0x27 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK-NEXT:     0x22 1 2 n/a GENERIC_RELOC_VANILLA 1 0x2
+// CHECK-NEXT:     0x1D 1 2 n/a GENERIC_RELOC_VANILLA 1 0x2
+// CHECK-NEXT:     0x18 1 2 0 GENERIC_RELOC_VANILLA 0 __text
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
diff --git a/test/MC/MachO/section-align-1.s b/test/MC/MachO/section-align-1.s
index 360c0a842313..db0716585504 100644
--- a/test/MC/MachO/section-align-1.s
+++ b/test/MC/MachO/section-align-1.s
@@ -1,87 +1,89 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t -file-headers -s -macho-indirect-symbols -macho-dysymtab -macho-segment | FileCheck %s
 
 name:
         .byte 0
 
         // Check that symbol table is aligned to 4 bytes.
         
-        
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 1)
-// CHECK:   ('file_offset', 256)
-// CHECK:   ('file_size', 1)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 1)
-// CHECK:     ('offset', 256)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 260)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 272)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00name\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'name')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 244
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 272
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: name (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 124
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x1
+// CHECK:   fileoff: 272
+// CHECK:   filesize: 1
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/section-align-2.s b/test/MC/MachO/section-align-2.s
index 086fc4a4f15a..4a2099a4b0cb 100644
--- a/test/MC/MachO/section-align-2.s
+++ b/test/MC/MachO/section-align-2.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .byte 0
 
@@ -14,124 +14,137 @@ bar:
         .const
 baz:
         
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 260)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 13)
-// CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 13)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 1)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 4)
-// CHECK:     ('size', 9)
-// CHECK:     ('offset', 396)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 13)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 405)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 408)
-// CHECK:   ('nsyms', 3)
-// CHECK:   ('stroff', 444)
-// CHECK:   ('strsize', 16)
-// CHECK:   ('_string_data', '\x00baz\x00bar\x00foo\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 9)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'foo')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 5)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 12)
-// CHECK:     ('_string', 'bar')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 13)
-// CHECK:     ('_string', 'baz')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 3)
-// CHECK:   ('iextdefsym', 3)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 3)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 380
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x4
+// CHECK:     Size: 0x9
+// CHECK:     Offset: 412
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0xD
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 421
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: foo (9)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: bar (5)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xC
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: baz (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __const (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xD
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 260
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xD
+// CHECK:   fileoff: 408
+// CHECK:   filesize: 13
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 3
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 3
+// CHECK:   iextdefsym: 3
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 3
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/section-attributes.s b/test/MC/MachO/section-attributes.s
index b21ef38ac561..0c2913e4aca4 100644
--- a/test/MC/MachO/section-attributes.s
+++ b/test/MC/MachO/section-attributes.s
@@ -1,7 +1,10 @@
 // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o %t
-// RUN: macho-dump %t | FileCheck %s
+// RUN: llvm-readobj -s -sd %t | FileCheck %s
 
-// CHECK: # Section 1
-// CHECK: ('flags', 0x0)
 .section __TEXT,__objc_opt_ro
 .long 0
+
+// CHECK: Section {
+// CHECK:   Index: 1
+// CHECK:   Attributes [ (0x0)
+// CHECK:   ]
diff --git a/test/MC/MachO/section-flags.s b/test/MC/MachO/section-flags.s
index 8ac1bbff7551..da5c4c133176 100644
--- a/test/MC/MachO/section-flags.s
+++ b/test/MC/MachO/section-flags.s
@@ -1,14 +1,49 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
-//
-// CHECK: # Section 0
-// CHECK: 'section_name', '__text
-// CHECK: 'flags', 0x80000000
-// CHECK: # Section 1
-// CHECK: 'section_name', '__data
-// CHECK: 'flags', 0x400
-        
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
+
         .text
 
         .data
 f0:
         movl $0, %eax
+
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x5
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x4)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: B8000000 00                          |.....|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/string-table.s b/test/MC/MachO/string-table.s
index 3a935eee0500..0902a3477b55 100644
--- a/test/MC/MachO/string-table.s
+++ b/test/MC/MachO/string-table.s
@@ -1,100 +1,107 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
         
 	movl	$a, b
         
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 228)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 124)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 10)
-// CHECK:   ('file_offset', 256)
-// CHECK:   ('file_size', 10)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 1)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 10)
-// CHECK:     ('offset', 256)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 268)
-// CHECK:     ('num_reloc', 2)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x6),
-// CHECK:      ('word-1', 0xc000000)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x2),
-// CHECK:      ('word-1', 0xc000001)),
-// CHECK:   ])
-// CHECK:   ('_section_data', 'c7050000 00000000 0000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 284)
-// CHECK:   ('nsyms', 2)
-// CHECK:   ('stroff', 308)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00b\x00a\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 3)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'a')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'b')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 0)
-// CHECK:   ('iextdefsym', 0)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 0)
-// CHECK:   ('nundefsym', 2)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 244
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xA
+// CHECK:     Offset: 272
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x11C
+// CHECK:     RelocationCount: 2
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: C7050000 00000000 0000               |..........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x6 0 2 1 GENERIC_RELOC_VANILLA 0 a
+// CHECK:     0x2 0 2 1 GENERIC_RELOC_VANILLA 0 b
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: a (3)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: b (1)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 124
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xA
+// CHECK:   fileoff: 272
+// CHECK:   filesize: 10
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 1
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 0
+// CHECK:   iextdefsym: 0
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 0
+// CHECK:   nundefsym: 2
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-diff.s b/test/MC/MachO/symbol-diff.s
index dae32878b8e2..2c3b52f85a85 100644
--- a/test/MC/MachO/symbol-diff.s
+++ b/test/MC/MachO/symbol-diff.s
@@ -1,122 +1,131 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 _g:
 LFB2:
 	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
 _g.eh:
 	.quad	LFB2-.
 
-// CHECK:      ('cputype', 16777223)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 336)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('reserved', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT:   # Load Command 0
-// CHECK-NEXT:  (('command', 25)
-// CHECK-NEXT:   ('size', 232)
-// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:   ('vm_addr', 0)
-// CHECK-NEXT:   ('vm_size', 8)
-// CHECK-NEXT:   ('file_offset', 368)
-// CHECK-NEXT:   ('file_size', 8)
-// CHECK-NEXT:   ('maxprot', 7)
-// CHECK-NEXT:   ('initprot', 7)
-// CHECK-NEXT:   ('num_sections', 2)
-// CHECK-NEXT:   ('flags', 0)
-// CHECK-NEXT:   ('sections', [
-// CHECK-NEXT:    # Section 0
-// CHECK-NEXT:   (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:    ('address', 0)
-// CHECK-NEXT:    ('size', 0)
-// CHECK-NEXT:    ('offset', 368)
-// CHECK-NEXT:    ('alignment', 0)
-// CHECK-NEXT:    ('reloc_offset', 0)
-// CHECK-NEXT:    ('num_reloc', 0)
-// CHECK-NEXT:    ('flags', 0x80000000)
-// CHECK-NEXT:    ('reserved1', 0)
-// CHECK-NEXT:    ('reserved2', 0)
-// CHECK-NEXT:    ('reserved3', 0)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:  ])
-// CHECK-NEXT:  ('_section_data', '')
-// CHECK-NEXT:    # Section 1
-// CHECK-NEXT:   (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:    ('address', 0)
-// CHECK-NEXT:    ('size', 8)
-// CHECK-NEXT:    ('offset', 368)
-// CHECK-NEXT:    ('alignment', 0)
-// CHECK-NEXT:    ('reloc_offset', 376)
-// CHECK-NEXT:    ('num_reloc', 2)
-// CHECK-NEXT:    ('flags', 0x6800000b)
-// CHECK-NEXT:    ('reserved1', 0)
-// CHECK-NEXT:    ('reserved2', 0)
-// CHECK-NEXT:    ('reserved3', 0)
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('word-0', 0x0),
-// CHECK-NEXT:     ('word-1', 0x5e000001)),
-// CHECK-NEXT:    # Relocation 1
-// CHECK-NEXT:    (('word-0', 0x0),
-// CHECK-NEXT:     ('word-1', 0xe000000)),
-// CHECK-NEXT:  ])
-// CHECK-NEXT:  ('_section_data', '00000000 00000000')
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Load Command 1
-// CHECK-NEXT: (('command', 2)
-// CHECK-NEXT:  ('size', 24)
-// CHECK-NEXT:  ('symoff', 392)
-// CHECK-NEXT:  ('nsyms', 2)
-// CHECK-NEXT:  ('stroff', 424)
-// CHECK-NEXT:  ('strsize', 12)
-// CHECK-NEXT:  ('_string_data', '\x00_g.eh\x00_g\x00\x00\x00')
-// CHECK-NEXT:  ('_symbols', [
-// CHECK-NEXT:    # Symbol 0
-// CHECK-NEXT:   (('n_strx', 7)
-// CHECK-NEXT:    ('n_type', 0xe)
-// CHECK-NEXT:    ('n_sect', 1)
-// CHECK-NEXT:    ('n_desc', 0)
-// CHECK-NEXT:    ('n_value', 0)
-// CHECK-NEXT:    ('_string', '_g')
-// CHECK-NEXT:   ),
-// CHECK-NEXT:    # Symbol 1
-// CHECK-NEXT:   (('n_strx', 1)
-// CHECK-NEXT:    ('n_type', 0xe)
-// CHECK-NEXT:    ('n_sect', 2)
-// CHECK-NEXT:    ('n_desc', 0)
-// CHECK-NEXT:    ('n_value', 0)
-// CHECK-NEXT:    ('_string', '_g.eh')
-// CHECK-NEXT:   ),
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
-// CHECK-NEXT:  # Load Command 2
-// CHECK-NEXT: (('command', 11)
-// CHECK-NEXT:  ('size', 80)
-// CHECK-NEXT:  ('ilocalsym', 0)
-// CHECK-NEXT:  ('nlocalsym', 2)
-// CHECK-NEXT:  ('iextdefsym', 2)
-// CHECK-NEXT:  ('nextdefsym', 0)
-// CHECK-NEXT:  ('iundefsym', 2)
-// CHECK-NEXT:  ('nundefsym', 0)
-// CHECK-NEXT:  ('tocoff', 0)
-// CHECK-NEXT:  ('ntoc', 0)
-// CHECK-NEXT:  ('modtaboff', 0)
-// CHECK-NEXT:  ('nmodtab', 0)
-// CHECK-NEXT:  ('extrefsymoff', 0)
-// CHECK-NEXT:  ('nextrefsyms', 0)
-// CHECK-NEXT:  ('indirectsymoff', 0)
-// CHECK-NEXT:  ('nindirectsyms', 0)
-// CHECK-NEXT:  ('extreloff', 0)
-// CHECK-NEXT:  ('nextrel', 0)
-// CHECK-NEXT:  ('locreloff', 0)
-// CHECK-NEXT:  ('nlocrel', 0)
-// CHECK-NEXT:  ('_indirect_symbols', [
-// CHECK-NEXT:  ])
-// CHECK-NEXT: ),
-// CHECK-NEXT:])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 352
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 384
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x8
+// CHECK:     Offset: 384
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x188
+// CHECK:     RelocationCount: 2
+// CHECK:     Type: 0xB
+// CHECK:     Attributes [ (0x680000)
+// CHECK:       LiveSupport (0x80000)
+// CHECK:       NoTOC (0x400000)
+// CHECK:       StripStaticSyms (0x200000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __eh_frame {
+// CHECK:     0x0 0 3 1 X86_64_RELOC_SUBTRACTOR 0 _g.eh
+// CHECK:     0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 _g
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _g (7)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _g.eh (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __eh_frame (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x8
+// CHECK:   fileoff: 384
+// CHECK:   filesize: 8
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 2
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-flags.s b/test/MC/MachO/symbol-flags.s
index 561d88a14e73..8f001e5c612e 100644
--- a/test/MC/MachO/symbol-flags.s
+++ b/test/MC/MachO/symbol-flags.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .reference sym_ref_A
         .reference sym_ref_def_A
@@ -59,283 +59,349 @@ sym_symbol_resolver_A:
         .desc sym_desc_flags,0x47
 sym_desc_flags:
         
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 324)
-// CHECK:   ('nsyms', 24)
-// CHECK:   ('stroff', 612)
-// CHECK:   ('strsize', 388)
-// CHECK:   ('_string_data', '\x00sym_desc_flags\x00sym_private_ext_E\x00sym_lazy_ref_E\x00sym_ref_def_E\x00sym_private_ext_D\x00sym_lazy_ref_D\x00sym_ref_def_D\x00sym_private_ext_C\x00sym_lazy_ref_C\x00sym_weak_def_C\x00sym_ref_def_C\x00sym_private_ext_B\x00sym_lazy_ref_B\x00sym_weak_def_B\x00sym_weak_ref_def_B\x00sym_private_ext_A\x00sym_symbol_resolver_A\x00sym_no_dead_strip_A\x00sym_lazy_ref_A\x00sym_ref_A\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_ref_def_A\x00sym_weak_ref_def_A\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 354)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_ref_def_A')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 158)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_ref_def_C')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 368)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 64)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_ref_def_A')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 220)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_ref_def_B')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 190)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lazy_ref_B')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 128)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lazy_ref_C')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 257)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 256)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_symbol_resolver_A')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 64)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_desc_flags')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 172)
-// CHECK:     ('n_type', 0x1f)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_private_ext_B')
-// CHECK:    ),
-// CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 110)
-// CHECK:     ('n_type', 0x1f)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_private_ext_C')
-// CHECK:    ),
-// CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 339)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 128)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_def_A')
-// CHECK:    ),
-// CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 205)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 128)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_def_B')
-// CHECK:    ),
-// CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 143)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 128)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_def_C')
-// CHECK:    ),
-// CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 299)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 33)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lazy_ref_A')
-// CHECK:    ),
-// CHECK:     # Symbol 14
-// CHECK:    (('n_strx', 81)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lazy_ref_D')
-// CHECK:    ),
-// CHECK:     # Symbol 15
-// CHECK:    (('n_strx', 34)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 33)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lazy_ref_E')
-// CHECK:    ),
-// CHECK:     # Symbol 16
-// CHECK:    (('n_strx', 279)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_no_dead_strip_A')
-// CHECK:    ),
-// CHECK:     # Symbol 17
-// CHECK:    (('n_strx', 239)
-// CHECK:     ('n_type', 0x11)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_private_ext_A')
-// CHECK:    ),
-// CHECK:     # Symbol 18
-// CHECK:    (('n_strx', 63)
-// CHECK:     ('n_type', 0x11)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_private_ext_D')
-// CHECK:    ),
-// CHECK:     # Symbol 19
-// CHECK:    (('n_strx', 16)
-// CHECK:     ('n_type', 0x11)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_private_ext_E')
-// CHECK:    ),
-// CHECK:     # Symbol 20
-// CHECK:    (('n_strx', 314)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 4660)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_ref_A')
-// CHECK:    ),
-// CHECK:     # Symbol 21
-// CHECK:    (('n_strx', 96)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_ref_def_D')
-// CHECK:    ),
-// CHECK:     # Symbol 22
-// CHECK:    (('n_strx', 49)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 32)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_ref_def_E')
-// CHECK:    ),
-// CHECK:     # Symbol 23
-// CHECK:    (('n_strx', 324)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 64)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_weak_ref_A')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 8)
-// CHECK:   ('iextdefsym', 8)
-// CHECK:   ('nextdefsym', 5)
-// CHECK:   ('iundefsym', 13)
-// CHECK:   ('nundefsym', 11)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_ref_def_A (354)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_ref_def_C (158)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_ref_def_A (368)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x40)
+// CHECK:       WeakRef (0x40)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_ref_def_B (220)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lazy_ref_B (190)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lazy_ref_C (128)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_symbol_resolver_A (257)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x100)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_desc_flags (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x40)
+// CHECK:       WeakRef (0x40)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_private_ext_B (172)
+// CHECK:     PrivateExtern
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_private_ext_C (110)
+// CHECK:     PrivateExtern
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_def_A (339)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x80)
+// CHECK:       WeakDef (0x80)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_def_B (205)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x80)
+// CHECK:       WeakDef (0x80)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_def_C (143)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x80)
+// CHECK:       WeakDef (0x80)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lazy_ref_A (299)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lazy_ref_D (81)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lazy_ref_E (34)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_no_dead_strip_A (279)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_private_ext_A (239)
+// CHECK:     PrivateExtern
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_private_ext_D (63)
+// CHECK:     PrivateExtern
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_private_ext_E (16)
+// CHECK:     PrivateExtern
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_ref_A (314)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagPrivateUndefinedNonLazy (0x4)
+// CHECK:     Flags [ (0x1230)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:       ReferencedDynamically (0x10)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_ref_def_D (96)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_ref_def_E (49)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x20)
+// CHECK:       NoDeadStrip (0x20)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_weak_ref_A (324)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x40)
+// CHECK:       WeakRef (0x40)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x0
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 8
+// CHECK:   iextdefsym: 8
+// CHECK:   nextdefsym: 5
+// CHECK:   iundefsym: 13
+// CHECK:   nundefsym: 11
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbol-indirect.s b/test/MC/MachO/symbol-indirect.s
index 1cdeed1f4ef6..c0012b703553 100644
--- a/test/MC/MachO/symbol-indirect.s
+++ b/test/MC/MachO/symbol-indirect.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 // FIXME: We are missing a lot of diagnostics on this kind of stuff which the
 // assembler has.
@@ -69,200 +69,248 @@ sym_nlp_G:
         .indirect_symbol sym_nlp_G
         .long 0
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 260)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 40)
-// CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 40)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__la_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 20)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x7)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 20)
-// CHECK:     ('size', 20)
-// CHECK:     ('offset', 412)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x6)
-        // FIXME: Enable this when fixed!
-// CHECX:     ('reserved1', 5)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 472)
-// CHECK:   ('nsyms', 10)
-// CHECK:   ('stroff', 592)
-// CHECK:   ('strsize', 104)
-// CHECK:   ('_string_data', '\x00sym_lsp_G\x00sym_nlp_G\x00sym_lsp_E\x00sym_nlp_E\x00sym_lsp_C\x00sym_nlp_C\x00sym_lsp_B\x00sym_nlp_B\x00sym_lsp_A\x00sym_nlp_A\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 41)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'sym_lsp_C')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 51)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 28)
-// CHECK:     ('_string', 'sym_nlp_C')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
-// CHECK:     ('_string', 'sym_lsp_G')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 11)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 36)
-// CHECK:     ('_string', 'sym_nlp_G')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 81)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lsp_A')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 61)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 1)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lsp_B')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 21)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 1)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lsp_E')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 91)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_nlp_A')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 71)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_nlp_B')
-// CHECK:    ),
-// CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 31)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_nlp_E')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 2)
-// CHECK:   ('iextdefsym', 2)
-// CHECK:   ('nextdefsym', 2)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 6)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 432)
-// CHECK:   ('nindirectsyms', 10)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:     # Indirect Symbol 0
-// CHECK:     (('symbol_index', 0x5),),
-// CHECK:     # Indirect Symbol 1
-// CHECK:     (('symbol_index', 0x4),),
-// CHECK:     # Indirect Symbol 2
-// CHECK:     (('symbol_index', 0x0),),
-// CHECK:     # Indirect Symbol 3
-// CHECK:     (('symbol_index', 0x6),),
-// CHECK:     # Indirect Symbol 4
-// CHECK:     (('symbol_index', 0x2),),
-// CHECK:     # Indirect Symbol 5
-// CHECK:     (('symbol_index', 0x8),),
-// CHECK:     # Indirect Symbol 6
-// CHECK:     (('symbol_index', 0x7),),
-// CHECK:     # Indirect Symbol 7
-// CHECK:     (('symbol_index', 0x80000000),),
-// CHECK:     # Indirect Symbol 8
-// CHECK:     (('symbol_index', 0x9),),
-// CHECK:     # Indirect Symbol 9
-// CHECK:     (('symbol_index', 0x3),),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 380
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x14
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x7
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x14
+// CHECK:     Size: 0x14
+// CHECK:     Offset: 428
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x6
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x5
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lsp_C (41)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __la_symbol_ptr (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_nlp_C (51)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __nl_symbol_ptr (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x1C
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lsp_G (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __la_symbol_ptr (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x10
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_nlp_G (11)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __nl_symbol_ptr (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x24
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lsp_A (81)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lsp_B (61)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lsp_E (21)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: ReferenceFlagUndefinedLazy (0x1)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_nlp_A (91)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_nlp_B (71)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_nlp_E (31)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 10
+// CHECK:   Symbols [
+// CHECK:     Entry {
+// CHECK:       Entry Index: 0
+// CHECK:       Symbol Index: 0x5
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 1
+// CHECK:       Symbol Index: 0x4
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 2
+// CHECK:       Symbol Index: 0x0
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 3
+// CHECK:       Symbol Index: 0x6
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 4
+// CHECK:       Symbol Index: 0x2
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 5
+// CHECK:       Symbol Index: 0x8
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 6
+// CHECK:       Symbol Index: 0x7
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 7
+// CHECK:       Symbol Index: 0x80000000
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 8
+// CHECK:       Symbol Index: 0x9
+// CHECK:     }
+// CHECK:     Entry {
+// CHECK:       Entry Index: 9
+// CHECK:       Symbol Index: 0x3
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 260
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x28
+// CHECK:   fileoff: 408
+// CHECK:   filesize: 40
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 3
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 2
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 6
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 448
+// CHECK:   nindirectsyms: 10
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/symbols-1.s b/test/MC/MachO/symbols-1.s
index 8b663dc71762..bec31bb73459 100644
--- a/test/MC/MachO/symbols-1.s
+++ b/test/MC/MachO/symbols-1.s
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_32 %s
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_64 %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck -check-prefix CHECK-X86_32 %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck -check-prefix CHECK-X86_64 %s
 
 sym_local_B:
 .globl sym_globl_def_B
@@ -17,294 +17,330 @@ sym_globl_def_C:
 Lsym_asm_temp:
         .long 0
         
-// CHECK-X86_32: ('cputype', 7)
-// CHECK-X86_32: ('cpusubtype', 3)
-// CHECK-X86_32: ('filetype', 1)
-// CHECK-X86_32: ('num_load_commands', 3)
-// CHECK-X86_32: ('load_commands_size', 228)
-// CHECK-X86_32: ('flag', 0)
-// CHECK-X86_32: ('load_commands', [
-// CHECK-X86_32:   # Load Command 0
-// CHECK-X86_32:  (('command', 1)
-// CHECK-X86_32:   ('size', 124)
-// CHECK-X86_32:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32:   ('vm_addr', 0)
-// CHECK-X86_32:   ('vm_size', 4)
-// CHECK-X86_32:   ('file_offset', 256)
-// CHECK-X86_32:   ('file_size', 4)
-// CHECK-X86_32:   ('maxprot', 7)
-// CHECK-X86_32:   ('initprot', 7)
-// CHECK-X86_32:   ('num_sections', 1)
-// CHECK-X86_32:   ('flags', 0)
-// CHECK-X86_32:   ('sections', [
-// CHECK-X86_32:     # Section 0
-// CHECK-X86_32:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_32:     ('address', 0)
-// CHECK-X86_32:     ('size', 4)
-// CHECK-X86_32:     ('offset', 256)
-// CHECK-X86_32:     ('alignment', 0)
-// CHECK-X86_32:     ('reloc_offset', 0)
-// CHECK-X86_32:     ('num_reloc', 0)
-// CHECK-X86_32:     ('flags', 0x80000000)
-// CHECK-X86_32:     ('reserved1', 0)
-// CHECK-X86_32:     ('reserved2', 0)
-// CHECK-X86_32:    ),
-// CHECK-X86_32:   ])
-// CHECK-X86_32:  ),
-// CHECK-X86_32:   # Load Command 1
-// CHECK-X86_32:  (('command', 2)
-// CHECK-X86_32:   ('size', 24)
-// CHECK-X86_32:   ('symoff', 260)
-// CHECK-X86_32:   ('nsyms', 9)
-// CHECK-X86_32:   ('stroff', 368)
-// CHECK-X86_32:   ('strsize', 140)
-// CHECK-X86_32:   ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
-// CHECK-X86_32:   ('_symbols', [
-// CHECK-X86_32:     # Symbol 0
-// CHECK-X86_32:    (('n_strx', 47)
-// CHECK-X86_32:     ('n_type', 0xe)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_local_B')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 1
-// CHECK-X86_32:    (('n_strx', 93)
-// CHECK-X86_32:     ('n_type', 0xe)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_local_A')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 2
-// CHECK-X86_32:    (('n_strx', 1)
-// CHECK-X86_32:     ('n_type', 0xe)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_local_C')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 3
-// CHECK-X86_32:    (('n_strx', 123)
-// CHECK-X86_32:     ('n_type', 0xf)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_def_A')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 4
-// CHECK-X86_32:    (('n_strx', 77)
-// CHECK-X86_32:     ('n_type', 0xf)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_def_B')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 5
-// CHECK-X86_32:    (('n_strx', 31)
-// CHECK-X86_32:     ('n_type', 0xf)
-// CHECK-X86_32:     ('n_sect', 1)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_def_C')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 6
-// CHECK-X86_32:    (('n_strx', 105)
-// CHECK-X86_32:     ('n_type', 0x1)
-// CHECK-X86_32:     ('n_sect', 0)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_undef_A')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 7
-// CHECK-X86_32:    (('n_strx', 59)
-// CHECK-X86_32:     ('n_type', 0x1)
-// CHECK-X86_32:     ('n_sect', 0)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_undef_B')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:     # Symbol 8
-// CHECK-X86_32:    (('n_strx', 13)
-// CHECK-X86_32:     ('n_type', 0x1)
-// CHECK-X86_32:     ('n_sect', 0)
-// CHECK-X86_32:     ('n_desc', 0)
-// CHECK-X86_32:     ('n_value', 0)
-// CHECK-X86_32:     ('_string', 'sym_globl_undef_C')
-// CHECK-X86_32:    ),
-// CHECK-X86_32:   ])
-// CHECK-X86_32:  ),
-// CHECK-X86_32:   # Load Command 2
-// CHECK-X86_32:  (('command', 11)
-// CHECK-X86_32:   ('size', 80)
-// CHECK-X86_32:   ('ilocalsym', 0)
-// CHECK-X86_32:   ('nlocalsym', 3)
-// CHECK-X86_32:   ('iextdefsym', 3)
-// CHECK-X86_32:   ('nextdefsym', 3)
-// CHECK-X86_32:   ('iundefsym', 6)
-// CHECK-X86_32:   ('nundefsym', 3)
-// CHECK-X86_32:   ('tocoff', 0)
-// CHECK-X86_32:   ('ntoc', 0)
-// CHECK-X86_32:   ('modtaboff', 0)
-// CHECK-X86_32:   ('nmodtab', 0)
-// CHECK-X86_32:   ('extrefsymoff', 0)
-// CHECK-X86_32:   ('nextrefsyms', 0)
-// CHECK-X86_32:   ('indirectsymoff', 0)
-// CHECK-X86_32:   ('nindirectsyms', 0)
-// CHECK-X86_32:   ('extreloff', 0)
-// CHECK-X86_32:   ('nextrel', 0)
-// CHECK-X86_32:   ('locreloff', 0)
-// CHECK-X86_32:   ('nlocrel', 0)
-// CHECK-X86_32:   ('_indirect_symbols', [
-// CHECK-X86_32:   ])
-// CHECK-X86_32:  ),
-// CHECK-X86_32: ])
+// CHECK-X86_32: File: <stdin>
+// CHECK-X86_32: Format: Mach-O 32-bit i386
+// CHECK-X86_32: Arch: i386
+// CHECK-X86_32: AddressSize: 32bit
+// CHECK-X86_32: MachHeader {
+// CHECK-X86_32:   Magic: Magic (0xFEEDFACE)
+// CHECK-X86_32:   CpuType: X86 (0x7)
+// CHECK-X86_32:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-X86_32:   FileType: Relocatable (0x1)
+// CHECK-X86_32:   NumOfLoadCommands: 4
+// CHECK-X86_32:   SizeOfLoadCommands: 244
+// CHECK-X86_32:   Flags [ (0x0)
+// CHECK-X86_32:   ]
+// CHECK-X86_32: }
+// CHECK-X86_32: Sections [
+// CHECK-X86_32:   Section {
+// CHECK-X86_32:     Index: 0
+// CHECK-X86_32:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_32:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_32:     Address: 0x0
+// CHECK-X86_32:     Size: 0x4
+// CHECK-X86_32:     Offset: 272
+// CHECK-X86_32:     Alignment: 0
+// CHECK-X86_32:     RelocationOffset: 0x0
+// CHECK-X86_32:     RelocationCount: 0
+// CHECK-X86_32:     Type: 0x0
+// CHECK-X86_32:     Attributes [ (0x800000)
+// CHECK-X86_32:       PureInstructions (0x800000)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Reserved1: 0x0
+// CHECK-X86_32:     Reserved2: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32: ]
+// CHECK-X86_32: Relocations [
+// CHECK-X86_32: ]
+// CHECK-X86_32: Symbols [
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_local_B (47)
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_local_A (93)
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_local_C (1)
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_def_A (123)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_def_B (77)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_def_C (31)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Section (0xE)
+// CHECK-X86_32:     Section: __text (0x1)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_undef_A (105)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Undef (0x0)
+// CHECK-X86_32:     Section:  (0x0)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_undef_B (59)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Undef (0x0)
+// CHECK-X86_32:     Section:  (0x0)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32:   Symbol {
+// CHECK-X86_32:     Name: sym_globl_undef_C (13)
+// CHECK-X86_32:     Extern
+// CHECK-X86_32:     Type: Undef (0x0)
+// CHECK-X86_32:     Section:  (0x0)
+// CHECK-X86_32:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_32:     Flags [ (0x0)
+// CHECK-X86_32:     ]
+// CHECK-X86_32:     Value: 0x0
+// CHECK-X86_32:   }
+// CHECK-X86_32: ]
+// CHECK-X86_32: Indirect Symbols {
+// CHECK-X86_32:   Number: 0
+// CHECK-X86_32:   Symbols [
+// CHECK-X86_32:   ]
+// CHECK-X86_32: }
+// CHECK-X86_32: Segment {
+// CHECK-X86_32:   Cmd: LC_SEGMENT
+// CHECK-X86_32:   Name: 
+// CHECK-X86_32:   Size: 124
+// CHECK-X86_32:   vmaddr: 0x0
+// CHECK-X86_32:   vmsize: 0x4
+// CHECK-X86_32:   fileoff: 272
+// CHECK-X86_32:   filesize: 4
+// CHECK-X86_32:   maxprot: rwx
+// CHECK-X86_32:   initprot: rwx
+// CHECK-X86_32:   nsects: 1
+// CHECK-X86_32:   flags: 0x0
+// CHECK-X86_32: }
+// CHECK-X86_32: Dysymtab {
+// CHECK-X86_32:   ilocalsym: 0
+// CHECK-X86_32:   nlocalsym: 3
+// CHECK-X86_32:   iextdefsym: 3
+// CHECK-X86_32:   nextdefsym: 3
+// CHECK-X86_32:   iundefsym: 6
+// CHECK-X86_32:   nundefsym: 3
+// CHECK-X86_32:   tocoff: 0
+// CHECK-X86_32:   ntoc: 0
+// CHECK-X86_32:   modtaboff: 0
+// CHECK-X86_32:   nmodtab: 0
+// CHECK-X86_32:   extrefsymoff: 0
+// CHECK-X86_32:   nextrefsyms: 0
+// CHECK-X86_32:   indirectsymoff: 0
+// CHECK-X86_32:   nindirectsyms: 0
+// CHECK-X86_32:   extreloff: 0
+// CHECK-X86_32:   nextrel: 0
+// CHECK-X86_32:   locreloff: 0
+// CHECK-X86_32:   nlocrel: 0
+// CHECK-X86_32: }
 
-// CHECK-X86_64: ('cputype', 16777223)
-// CHECK-X86_64: ('cpusubtype', 3)
-// CHECK-X86_64: ('filetype', 1)
-// CHECK-X86_64: ('num_load_commands', 3)
-// CHECK-X86_64: ('load_commands_size', 256)
-// CHECK-X86_64: ('flag', 0)
-// CHECK-X86_64: ('reserved', 0)
-// CHECK-X86_64: ('load_commands', [
-// CHECK-X86_64:   # Load Command 0
-// CHECK-X86_64:  (('command', 25)
-// CHECK-X86_64:   ('size', 152)
-// CHECK-X86_64:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:   ('vm_addr', 0)
-// CHECK-X86_64:   ('vm_size', 4)
-// CHECK-X86_64:   ('file_offset', 288)
-// CHECK-X86_64:   ('file_size', 4)
-// CHECK-X86_64:   ('maxprot', 7)
-// CHECK-X86_64:   ('initprot', 7)
-// CHECK-X86_64:   ('num_sections', 1)
-// CHECK-X86_64:   ('flags', 0)
-// CHECK-X86_64:   ('sections', [
-// CHECK-X86_64:     # Section 0
-// CHECK-X86_64:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('address', 0)
-// CHECK-X86_64:     ('size', 4)
-// CHECK-X86_64:     ('offset', 288)
-// CHECK-X86_64:     ('alignment', 0)
-// CHECK-X86_64:     ('reloc_offset', 0)
-// CHECK-X86_64:     ('num_reloc', 0)
-// CHECK-X86_64:     ('flags', 0x80000000)
-// CHECK-X86_64:     ('reserved1', 0)
-// CHECK-X86_64:     ('reserved2', 0)
-// CHECK-X86_64:     ('reserved3', 0)
-// CHECK-X86_64:    ),
-// CHECK-X86_64:   ('_relocations', [
-// CHECK-X86_64:   ])
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64:   # Load Command 1
-// CHECK-X86_64:  (('command', 2)
-// CHECK-X86_64:   ('size', 24)
-// CHECK-X86_64:   ('symoff', 292)
-// CHECK-X86_64:   ('nsyms', 9)
-// CHECK-X86_64:   ('stroff', 436)
-// CHECK-X86_64:   ('strsize', 140)
-// CHECK-X86_64:   ('_string_data', '\x00sym_local_C\x00sym_globl_undef_C\x00sym_globl_def_C\x00sym_local_B\x00sym_globl_undef_B\x00sym_globl_def_B\x00sym_local_A\x00sym_globl_undef_A\x00sym_globl_def_A\x00\x00')
-// CHECK-X86_64:   ('_symbols', [
-// CHECK-X86_64:     # Symbol 0
-// CHECK-X86_64:    (('n_strx', 47)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_local_B')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 1
-// CHECK-X86_64:    (('n_strx', 93)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_local_A')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 2
-// CHECK-X86_64:    (('n_strx', 1)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_local_C')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 3
-// CHECK-X86_64:    (('n_strx', 123)
-// CHECK-X86_64:     ('n_type', 0xf)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_def_A')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 4
-// CHECK-X86_64:    (('n_strx', 77)
-// CHECK-X86_64:     ('n_type', 0xf)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_def_B')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 5
-// CHECK-X86_64:    (('n_strx', 31)
-// CHECK-X86_64:     ('n_type', 0xf)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_def_C')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 6
-// CHECK-X86_64:    (('n_strx', 105)
-// CHECK-X86_64:     ('n_type', 0x1)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_undef_A')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 7
-// CHECK-X86_64:    (('n_strx', 59)
-// CHECK-X86_64:     ('n_type', 0x1)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_undef_B')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 8
-// CHECK-X86_64:    (('n_strx', 13)
-// CHECK-X86_64:     ('n_type', 0x1)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'sym_globl_undef_C')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64:   # Load Command 2
-// CHECK-X86_64:  (('command', 11)
-// CHECK-X86_64:   ('size', 80)
-// CHECK-X86_64:   ('ilocalsym', 0)
-// CHECK-X86_64:   ('nlocalsym', 3)
-// CHECK-X86_64:   ('iextdefsym', 3)
-// CHECK-X86_64:   ('nextdefsym', 3)
-// CHECK-X86_64:   ('iundefsym', 6)
-// CHECK-X86_64:   ('nundefsym', 3)
-// CHECK-X86_64:   ('tocoff', 0)
-// CHECK-X86_64:   ('ntoc', 0)
-// CHECK-X86_64:   ('modtaboff', 0)
-// CHECK-X86_64:   ('nmodtab', 0)
-// CHECK-X86_64:   ('extrefsymoff', 0)
-// CHECK-X86_64:   ('nextrefsyms', 0)
-// CHECK-X86_64:   ('indirectsymoff', 0)
-// CHECK-X86_64:   ('nindirectsyms', 0)
-// CHECK-X86_64:   ('extreloff', 0)
-// CHECK-X86_64:   ('nextrel', 0)
-// CHECK-X86_64:   ('locreloff', 0)
-// CHECK-X86_64:   ('nlocrel', 0)
-// CHECK-X86_64:   ('_indirect_symbols', [
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64: ])
+// CHECK-X86_64: File: <stdin>
+// CHECK-X86_64: Format: Mach-O 64-bit x86-64
+// CHECK-X86_64: Arch: x86_64
+// CHECK-X86_64: AddressSize: 64bit
+// CHECK-X86_64: MachHeader {
+// CHECK-X86_64:   Magic: Magic64 (0xFEEDFACF)
+// CHECK-X86_64:   CpuType: X86-64 (0x1000007)
+// CHECK-X86_64:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK-X86_64:   FileType: Relocatable (0x1)
+// CHECK-X86_64:   NumOfLoadCommands: 4
+// CHECK-X86_64:   SizeOfLoadCommands: 272
+// CHECK-X86_64:   Flags [ (0x0)
+// CHECK-X86_64:   ]
+// CHECK-X86_64:   Reserved: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Sections [
+// CHECK-X86_64:   Section {
+// CHECK-X86_64:     Index: 0
+// CHECK-X86_64:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Address: 0x0
+// CHECK-X86_64:     Size: 0x4
+// CHECK-X86_64:     Offset: 304
+// CHECK-X86_64:     Alignment: 0
+// CHECK-X86_64:     RelocationOffset: 0x0
+// CHECK-X86_64:     RelocationCount: 0
+// CHECK-X86_64:     Type: 0x0
+// CHECK-X86_64:     Attributes [ (0x800000)
+// CHECK-X86_64:       PureInstructions (0x800000)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Reserved1: 0x0
+// CHECK-X86_64:     Reserved2: 0x0
+// CHECK-X86_64:     Reserved3: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Relocations [
+// CHECK-X86_64: ]
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_local_B (47)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_local_A (93)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_local_C (1)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_def_A (123)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_def_B (77)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_def_C (31)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_undef_A (105)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Undef (0x0)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_undef_B (59)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Undef (0x0)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: sym_globl_undef_C (13)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Undef (0x0)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Indirect Symbols {
+// CHECK-X86_64:   Number: 0
+// CHECK-X86_64:   Symbols [
+// CHECK-X86_64:   ]
+// CHECK-X86_64: }
+// CHECK-X86_64: Segment {
+// CHECK-X86_64:   Cmd: LC_SEGMENT_64
+// CHECK-X86_64:   Name: 
+// CHECK-X86_64:   Size: 152
+// CHECK-X86_64:   vmaddr: 0x0
+// CHECK-X86_64:   vmsize: 0x4
+// CHECK-X86_64:   fileoff: 304
+// CHECK-X86_64:   filesize: 4
+// CHECK-X86_64:   maxprot: rwx
+// CHECK-X86_64:   initprot: rwx
+// CHECK-X86_64:   nsects: 1
+// CHECK-X86_64:   flags: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Dysymtab {
+// CHECK-X86_64:   ilocalsym: 0
+// CHECK-X86_64:   nlocalsym: 3
+// CHECK-X86_64:   iextdefsym: 3
+// CHECK-X86_64:   nextdefsym: 3
+// CHECK-X86_64:   iundefsym: 6
+// CHECK-X86_64:   nundefsym: 3
+// CHECK-X86_64:   tocoff: 0
+// CHECK-X86_64:   ntoc: 0
+// CHECK-X86_64:   modtaboff: 0
+// CHECK-X86_64:   nmodtab: 0
+// CHECK-X86_64:   extrefsymoff: 0
+// CHECK-X86_64:   nextrefsyms: 0
+// CHECK-X86_64:   indirectsymoff: 0
+// CHECK-X86_64:   nindirectsyms: 0
+// CHECK-X86_64:   extreloff: 0
+// CHECK-X86_64:   nextrel: 0
+// CHECK-X86_64:   locreloff: 0
+// CHECK-X86_64:   nlocrel: 0
+// CHECK-X86_64: }
diff --git a/test/MC/MachO/tbss.s b/test/MC/MachO/tbss.s
index 1c23aa548d76..a25772909268 100644
--- a/test/MC/MachO/tbss.s
+++ b/test/MC/MachO/tbss.s
@@ -1,114 +1,122 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 .tbss _a$tlv$init, 4
 .tbss _b$tlv$init, 4, 3
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 12)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 12)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x12)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', 'cffaedfe 07000001 03000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 368)
-// CHECK:   ('nsyms', 2)
-// CHECK:   ('stroff', 400)
-// CHECK:   ('strsize', 28)
-// CHECK:   ('_string_data', '\x00_b$tlv$init\x00_a$tlv$init\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_a$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', '_b$tlv$init')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 2)
-// CHECK:   ('iextdefsym', 2)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 2)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 3
+// CHECK:   SizeOfLoadCommands: 336
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 368
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xC
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x12
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: CFFAEDFE 07000001 03000000           |............|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a$tlv$init (13)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _b$tlv$init (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xC
+// CHECK:   fileoff: 368
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 2
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tdata.s b/test/MC/MachO/tdata.s
index 4829ca73a519..855ce54c0abe 100644
--- a/test/MC/MachO/tdata.s
+++ b/test/MC/MachO/tdata.s
@@ -1,106 +1,113 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 .tdata
 _a$tlv$init:
 	.long 4
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 4)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 4)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 4)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x11)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '04000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 372)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 388)
-// CHECK:   ('strsize', 16)
-// CHECK:   ('_string_data', '\x00_a$tlv$init\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_a$tlv$init')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 3
+// CHECK:   SizeOfLoadCommands: 336
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 368
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x4
+// CHECK:     Offset: 368
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x11
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 04000000                             |....|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a$tlv$init (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x4
+// CHECK:   fileoff: 368
+// CHECK:   filesize: 4
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/temp-labels.s b/test/MC/MachO/temp-labels.s
index ac0f6203aef1..12dbae5531a6 100644
--- a/test/MC/MachO/temp-labels.s
+++ b/test/MC/MachO/temp-labels.s
@@ -1,33 +1,27 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -save-temp-labels -o - | llvm-readobj -t | FileCheck %s
 
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 296)
-// CHECK:   ('nsyms', 2)
-// CHECK:   ('stroff', 328)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00_f0\x00L0\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_f0')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 5)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'L0')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
 _f0:
         .long 0
 L0:
         .long 0
+
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _f0 (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: L0 (5)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/thread_init_func.s b/test/MC/MachO/thread_init_func.s
index d3ead83fd255..912d7824140f 100644
--- a/test/MC/MachO/thread_init_func.s
+++ b/test/MC/MachO/thread_init_func.s
@@ -1,63 +1,77 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump	--dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -macho-segment | FileCheck %s
 
 	.thread_init_func
 	.quad 0
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 232)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 8)
-// CHECK:   ('file_offset', 264)
-// CHECK:   ('file_size', 8)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 264)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_init\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 8)
-// CHECK:     ('offset', 264)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x15)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 1
+// CHECK:   SizeOfLoadCommands: 232
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 264
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_init (5F 5F 74 68 72 65 61 64 5F 69 6E 69 74 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x8
+// CHECK:     Offset: 264
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x15
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x8
+// CHECK:   fileoff: 264
+// CHECK:   filesize: 8
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/tls.s b/test/MC/MachO/tls.s
index 33e23a9c4276..438c7f04c8ed 100644
--- a/test/MC/MachO/tls.s
+++ b/test/MC/MachO/tls.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .section        __TEXT,__text,regular,pure_instructions
         .section        __DATA,__thread_data,thread_local_regular
@@ -45,226 +45,246 @@ _b:
 
 .subsections_via_symbols
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 496)
-// CHECK: ('flag', 8192)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 392)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 112)
-// CHECK:   ('file_offset', 528)
-// CHECK:   ('file_size', 104)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 4)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 528)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 8)
-// CHECK:     ('offset', 528)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x11)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '04000000 05000000')
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 8)
-// CHECK:     ('size', 96)
-// CHECK:     ('offset', 536)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 632)
-// CHECK:     ('num_reloc', 8)
-// CHECK:     ('flags', 0x13)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x58),
-// CHECK:      ('word-1', 0xe000001)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x48),
-// CHECK:      ('word-1', 0xe000008)),
-// CHECK:     # Relocation 2
-// CHECK:     (('word-0', 0x40),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 3
-// CHECK:     (('word-0', 0x30),
-// CHECK:      ('word-1', 0xe000008)),
-// CHECK:     # Relocation 4
-// CHECK:     (('word-0', 0x28),
-// CHECK:      ('word-1', 0xe000007)),
-// CHECK:     # Relocation 5
-// CHECK:     (('word-0', 0x18),
-// CHECK:      ('word-1', 0xe000008)),
-// CHECK:     # Relocation 6
-// CHECK:     (('word-0', 0x10),
-// CHECK:      ('word-1', 0xe000005)),
-// CHECK:     # Relocation 7
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xe000008)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 104)
-// CHECK:     ('size', 8)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x12)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', 'cffaedfe 07000001')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 696)
-// CHECK:   ('nsyms', 9)
-// CHECK:   ('stroff', 840)
-// CHECK:   ('strsize', 80)
-// CHECK:   ('_string_data', '\x00_d$tlv$init\x00_c$tlv$init\x00_b$tlv$init\x00_a$tlv$init\x00___tlv_bootstrap\x00_d\x00_c\x00_b\x00_a\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 37)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 104)
-// CHECK:     ('_string', '_a$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 25)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 108)
-// CHECK:     ('_string', '_b$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 75)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 56)
-// CHECK:     ('_string', '_a')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 72)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 80)
-// CHECK:     ('_string', '_b')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 69)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', '_c')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_c$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 66)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 32)
-// CHECK:     ('_string', '_d')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', '_d$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 49)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '___tlv_bootstrap')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 2)
-// CHECK:   ('iextdefsym', 2)
-// CHECK:   ('nextdefsym', 6)
-// CHECK:   ('iundefsym', 8)
-// CHECK:   ('nundefsym', 1)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 3
+// CHECK:   SizeOfLoadCommands: 496
+// CHECK:   Flags [ (0x2000)
+// CHECK:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 528
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x8
+// CHECK:     Offset: 528
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x11
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 04000000 05000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x8
+// CHECK:     Size: 0x60
+// CHECK:     Offset: 536
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x278
+// CHECK:     RelocationCount: 8
+// CHECK:     Type: 0x13
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0010: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0020: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0030: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0040: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0050: 00000000 00000000 00000000 00000000  |................|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 3
+// CHECK:     Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x68
+// CHECK:     Size: 0x8
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x12
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: CFFAEDFE 07000001                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __thread_vars {
+// CHECK:     0x58 0 3 1 X86_64_RELOC_UNSIGNED 0 _b$tlv$init
+// CHECK:     0x48 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK:     0x40 0 3 1 X86_64_RELOC_UNSIGNED 0 _a$tlv$init
+// CHECK:     0x30 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK:     0x28 0 3 1 X86_64_RELOC_UNSIGNED 0 _d$tlv$init
+// CHECK:     0x18 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK:     0x10 0 3 1 X86_64_RELOC_UNSIGNED 0 _c$tlv$init
+// CHECK:     0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 ___tlv_bootstrap
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a$tlv$init (37)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_bss (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x68
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _b$tlv$init (25)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_bss (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x6C
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _a (75)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x38
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _b (72)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x50
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _c (69)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _c$tlv$init (13)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _d (66)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x20
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _d$tlv$init (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: ___tlv_bootstrap (49)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 392
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x70
+// CHECK:   fileoff: 528
+// CHECK:   filesize: 104
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 4
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 6
+// CHECK:   iundefsym: 8
+// CHECK:   nundefsym: 1
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tlv-bss.ll b/test/MC/MachO/tlv-bss.ll
index 3dbf4b07e16e..9a6ea20266fc 100644
--- a/test/MC/MachO/tlv-bss.ll
+++ b/test/MC/MachO/tlv-bss.ll
@@ -1,11 +1,16 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin12 -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin12 -filetype=obj -o - %s | llvm-readobj -s | FileCheck %s
 ; Test that we emit weak_odr thread_locals correctly into the thread_bss section
 ; PR15972
 
-; CHECK: __thread_bss
-; CHECK: 'size', 8
-; CHECK: 'alignment', 3
-; CHECK: __thread_vars
+; CHECK: Section {
+; CHECK:   Index: 1
+; CHECK:   Name: __thread_bss (5F 5F 74 68 72 65 61 64 5F 62 73 73 00 00 00 00)
+; CHECK:   Size: 0x8
+; CHECK:   Alignment: 3
+; CHECK: }
+; CHECK: Section {
+; CHECK:   Index: 2
+; CHECK:   Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
 
 ; Generated from this C++ source
 ; template<class T>
diff --git a/test/MC/MachO/tlv-reloc.s b/test/MC/MachO/tlv-reloc.s
index 80e0565c59b7..ca6873478dce 100644
--- a/test/MC/MachO/tlv-reloc.s
+++ b/test/MC/MachO/tlv-reloc.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 .tdata
 _a$tlv$init:
@@ -21,154 +21,171 @@ _foo:
 	 call	*(%rdi) # returns &a in %rax
 	 ret
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 416)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 312)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 38)
-// CHECK:   ('file_offset', 448)
-// CHECK:   ('file_size', 38)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 10)
-// CHECK:     ('offset', 448)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 488)
-// CHECK:     ('num_reloc', 1)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x3),
-// CHECK:      ('word-1', 0x9d000001)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '488b3d00 000000ff 17c3')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 10)
-// CHECK:     ('size', 4)
-// CHECK:     ('offset', 458)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x11)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '04000000')
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 14)
-// CHECK:     ('size', 24)
-// CHECK:     ('offset', 462)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 496)
-// CHECK:     ('num_reloc', 2)
-// CHECK:     ('flags', 0x13)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:     # Relocation 0
-// CHECK:     (('word-0', 0x10),
-// CHECK:      ('word-1', 0xe000000)),
-// CHECK:     # Relocation 1
-// CHECK:     (('word-0', 0x0),
-// CHECK:      ('word-1', 0xe000003)),
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 512)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 576)
-// CHECK:   ('strsize', 40)
-// CHECK:   ('_string_data', '\x00_a$tlv$init\x00__tlv_bootstrap\x00_foo\x00_a\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 10)
-// CHECK:     ('_string', '_a$tlv$init')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 34)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 14)
-// CHECK:     ('_string', '_a')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 29)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_foo')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0x1)
-// CHECK:     ('n_sect', 0)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '__tlv_bootstrap')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 2)
-// CHECK:   ('iundefsym', 3)
-// CHECK:   ('nundefsym', 1)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 3
+// CHECK:   SizeOfLoadCommands: 416
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xA
+// CHECK:     Offset: 448
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x1E8
+// CHECK:     RelocationCount: 1
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 488B3D00 000000FF 17C3               |H.=.......|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_data (5F 5F 74 68 72 65 61 64 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0xA
+// CHECK:     Size: 0x4
+// CHECK:     Offset: 458
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x11
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 04000000                             |....|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0xE
+// CHECK:     Size: 0x18
+// CHECK:     Offset: 462
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x1F0
+// CHECK:     RelocationCount: 2
+// CHECK:     Type: 0x13
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0010: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK:   Section __text {
+// CHECK:     0x3 1 2 1 X86_64_RELOC_TLV 0 _a
+// CHECK:   }
+// CHECK:   Section __thread_vars {
+// CHECK:     0x10 0 3 1 X86_64_RELOC_UNSIGNED 0 _a$tlv$init
+// CHECK:     0x0 0 3 1 X86_64_RELOC_UNSIGNED 0 __tlv_bootstrap
+// CHECK:   }
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a$tlv$init (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xA
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _a (34)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xE
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _foo (29)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: __tlv_bootstrap (13)
+// CHECK:     Extern
+// CHECK:     Type: Undef (0x0)
+// CHECK:     Section:  (0x0)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 312
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x26
+// CHECK:   fileoff: 448
+// CHECK:   filesize: 38
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 3
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 2
+// CHECK:   iundefsym: 3
+// CHECK:   nundefsym: 1
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/tlv.s b/test/MC/MachO/tlv.s
index 0fe028e7d501..57d74448aea5 100644
--- a/test/MC/MachO/tlv.s
+++ b/test/MC/MachO/tlv.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 	.tlv
 	.globl _a
@@ -7,104 +7,113 @@ _a:
 	.quad 0
 	.quad 0
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 24)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 24)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 24)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x13)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 392)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 408)
-// CHECK:   ('strsize', 4)
-// CHECK:   ('_string_data', '\x00_a\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', '_a')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 0)
-// CHECK:   ('iextdefsym', 0)
-// CHECK:   ('nextdefsym', 1)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 3
+// CHECK:   SizeOfLoadCommands: 336
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 368
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __thread_vars (5F 5F 74 68 72 65 61 64 5F 76 61 72 73 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x18
+// CHECK:     Offset: 368
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x13
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK:       0010: 00000000 00000000                    |........|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __thread_vars (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x18
+// CHECK:   fileoff: 368
+// CHECK:   filesize: 24
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 0
+// CHECK:   iextdefsym: 0
+// CHECK:   nextdefsym: 1
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/values.s b/test/MC/MachO/values.s
index 96115990636e..0bdd0946770f 100644
--- a/test/MC/MachO/values.s
+++ b/test/MC/MachO/values.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
         .long 0
 text_def_int:
@@ -17,119 +17,132 @@ data_def_int:
 data_def_ext:
         .long 0
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 24)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 24)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 12)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 12)
-// CHECK:     ('size', 12)
-// CHECK:     ('offset', 336)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 348)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 396)
-// CHECK:   ('strsize', 56)
-// CHECK:   ('_string_data', '\x00text_def_ext\x00data_def_ext\x00text_def_int\x00data_def_int\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 27)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'text_def_int')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 40)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 16)
-// CHECK:     ('_string', 'data_def_int')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 14)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 20)
-// CHECK:     ('_string', 'data_def_ext')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'text_def_ext')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 2)
-// CHECK:   ('iextdefsym', 2)
-// CHECK:   ('nextdefsym', 2)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0xC
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0xC
+// CHECK:     Size: 0xC
+// CHECK:     Offset: 352
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: text_def_int (27)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: data_def_int (40)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x10
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: data_def_ext (14)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x14
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: text_def_ext (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x18
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 24
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 2
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/variable-exprs.s b/test/MC/MachO/variable-exprs.s
index ac781ef7f67d..85f395fd3be8 100644
--- a/test/MC/MachO/variable-exprs.s
+++ b/test/MC/MachO/variable-exprs.s
@@ -1,9 +1,9 @@
 // RUN: llvm-mc -triple i386-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
 // RUN: FileCheck --check-prefix=CHECK-I386 < %t.dump %s
 
 // RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
-// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols < %t.o > %t.dump
 // RUN: FileCheck --check-prefix=CHECK-X86_64 < %t.dump %s
 
 .data
@@ -46,401 +46,417 @@ Lt0_b:
 Lt0_x = Lt0_a - Lt0_b
 	.quad	Lt0_x
 
-// CHECK-I386: ('cputype', 7)
-// CHECK-I386: ('cpusubtype', 3)
-// CHECK-I386: ('filetype', 1)
-// CHECK-I386: ('num_load_commands', 3)
-// CHECK-I386: ('load_commands_size', 296)
-// CHECK-I386: ('flag', 0)
-// CHECK-I386: ('load_commands', [
-// CHECK-I386:   # Load Command 0
-// CHECK-I386:  (('command', 1)
-// CHECK-I386:   ('size', 192)
-// CHECK-I386:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386:   ('vm_addr', 0)
-// CHECK-I386:   ('vm_size', 57)
-// CHECK-I386:   ('file_offset', 324)
-// CHECK-I386:   ('file_size', 57)
-// CHECK-I386:   ('maxprot', 7)
-// CHECK-I386:   ('initprot', 7)
-// CHECK-I386:   ('num_sections', 2)
-// CHECK-I386:   ('flags', 0)
-// CHECK-I386:   ('sections', [
-// CHECK-I386:     # Section 0
-// CHECK-I386:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386:     ('address', 0)
-// CHECK-I386:     ('size', 1)
-// CHECK-I386:     ('offset', 324)
-// CHECK-I386:     ('alignment', 0)
-// CHECK-I386:     ('reloc_offset', 0)
-// CHECK-I386:     ('num_reloc', 0)
-// CHECK-I386:     ('flags', 0x80000400)
-// CHECK-I386:     ('reserved1', 0)
-// CHECK-I386:     ('reserved2', 0)
-// CHECK-I386:    ),
-// CHECK-I386:   ('_relocations', [
-// CHECK-I386:   ])
-// CHECK-I386:   ('_section_data', 'c3')
-// CHECK-I386:     # Section 1
-// CHECK-I386:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-I386:     ('address', 1)
-// CHECK-I386:     ('size', 56)
-// CHECK-I386:     ('offset', 325)
-// CHECK-I386:     ('alignment', 0)
-// CHECK-I386:     ('reloc_offset', 384)
-// CHECK-I386:     ('num_reloc', 9)
-// CHECK-I386:     ('flags', 0x0)
-// CHECK-I386:     ('reserved1', 0)
-// CHECK-I386:     ('reserved2', 0)
-// CHECK-I386:    ),
-// CHECK-I386:   ('_relocations', [
-// CHECK-I386:     # Relocation 0
-// CHECK-I386:     (('word-0', 0x2c),
-// CHECK-I386:      ('word-1', 0x4000002)),
-// CHECK-I386:     # Relocation 1
-// CHECK-I386:     (('word-0', 0x28),
-// CHECK-I386:      ('word-1', 0x4000002)),
-// CHECK-I386:     # Relocation 2
-// CHECK-I386:     (('word-0', 0x24),
-// CHECK-I386:      ('word-1', 0xc000009)),
-// CHECK-I386:     # Relocation 3
-// CHECK-I386:     (('word-0', 0x20),
-// CHECK-I386:      ('word-1', 0xc000008)),
-// CHECK-I386:     # Relocation 4
-// CHECK-I386:     (('word-0', 0x1c),
-// CHECK-I386:      ('word-1', 0xc000007)),
-// CHECK-I386:     # Relocation 5
-// CHECK-I386:     (('word-0', 0xa0000018),
-// CHECK-I386:      ('word-1', 0x5)),
-// CHECK-I386:     # Relocation 6
-// CHECK-I386:     (('word-0', 0x14),
-// CHECK-I386:      ('word-1', 0x4000002)),
-// CHECK-I386:     # Relocation 7
-// CHECK-I386:     (('word-0', 0x10),
-// CHECK-I386:      ('word-1', 0x4000002)),
-// CHECK-I386:     # Relocation 8
-// CHECK-I386:     (('word-0', 0x8),
-// CHECK-I386:      ('word-1', 0x4000002)),
-// CHECK-I386:   ])
-// CHECK-I386:   ('_section_data', '00000000 00000000 05000000 00000000 05000000 09000000 09000000 00000000 00000000 00000000 0d000000 0d000000 cfffffff ffffffff')
-// CHECK-I386:   ])
-// CHECK-I386:  ),
-// CHECK-I386:   # Load Command 1
-// CHECK-I386:  (('command', 2)
-// CHECK-I386:   ('size', 24)
-// CHECK-I386:   ('symoff', 456)
-// CHECK-I386:   ('nsyms', 10)
-// CHECK-I386:   ('stroff', 576)
-// CHECK-I386:   ('strsize', 24)
-// CHECK-I386:   ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
-// CHECK-I386:   ('_symbols', [
-// CHECK-I386:     # Symbol 0
-// CHECK-I386:    (('n_strx', 13)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 5)
-// CHECK-I386:     ('_string', 'a')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 1
-// CHECK-I386:    (('n_strx', 11)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 5)
-// CHECK-I386:     ('_string', 'b')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 2
-// CHECK-I386:    (('n_strx', 9)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 9)
-// CHECK-I386:     ('_string', 'c')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 3
-// CHECK-I386:    (('n_strx', 5)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 9)
-// CHECK-I386:     ('_string', 'e')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 4
-// CHECK-I386:    (('n_strx', 1)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 13)
-// CHECK-I386:     ('_string', 'g')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 5
-// CHECK-I386:    (('n_strx', 3)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 2)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 13)
-// CHECK-I386:     ('_string', 'f')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 6
-// CHECK-I386:    (('n_strx', 21)
-// CHECK-I386:     ('n_type', 0xe)
-// CHECK-I386:     ('n_sect', 1)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 0)
-// CHECK-I386:     ('_string', 't0')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 7
-// CHECK-I386:    (('n_strx', 7)
-// CHECK-I386:     ('n_type', 0x1)
-// CHECK-I386:     ('n_sect', 0)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 0)
-// CHECK-I386:     ('_string', 'd')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 8
-// CHECK-I386:    (('n_strx', 18)
-// CHECK-I386:     ('n_type', 0xb)
-// CHECK-I386:     ('n_sect', 0)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 7)
-// CHECK-I386:     ('_string', 'd2')
-// CHECK-I386:    ),
-// CHECK-I386:     # Symbol 9
-// CHECK-I386:    (('n_strx', 15)
-// CHECK-I386:     ('n_type', 0x1)
-// CHECK-I386:     ('n_sect', 0)
-// CHECK-I386:     ('n_desc', 0)
-// CHECK-I386:     ('n_value', 0)
-// CHECK-I386:     ('_string', 'd3')
-// CHECK-I386:    ),
-// CHECK-I386:   ])
-// CHECK-I386:  ),
-// CHECK-I386:   # Load Command 2
-// CHECK-I386:  (('command', 11)
-// CHECK-I386:   ('size', 80)
-// CHECK-I386:   ('ilocalsym', 0)
-// CHECK-I386:   ('nlocalsym', 7)
-// CHECK-I386:   ('iextdefsym', 7)
-// CHECK-I386:   ('nextdefsym', 0)
-// CHECK-I386:   ('iundefsym', 7)
-// CHECK-I386:   ('nundefsym', 3)
-// CHECK-I386:   ('tocoff', 0)
-// CHECK-I386:   ('ntoc', 0)
-// CHECK-I386:   ('modtaboff', 0)
-// CHECK-I386:   ('nmodtab', 0)
-// CHECK-I386:   ('extrefsymoff', 0)
-// CHECK-I386:   ('nextrefsyms', 0)
-// CHECK-I386:   ('indirectsymoff', 0)
-// CHECK-I386:   ('nindirectsyms', 0)
-// CHECK-I386:   ('extreloff', 0)
-// CHECK-I386:   ('nextrel', 0)
-// CHECK-I386:   ('locreloff', 0)
-// CHECK-I386:   ('nlocrel', 0)
-// CHECK-I386:   ('_indirect_symbols', [
-// CHECK-I386:   ])
-// CHECK-I386:  ),
-// CHECK-I386: ])
+// CHECK-I386: File: <stdin>
+// CHECK-I386: Format: Mach-O 32-bit i386
+// CHECK-I386: Arch: i386
+// CHECK-I386: AddressSize: 32bit
+// CHECK-I386: MachHeader {
+// CHECK-I386:   Magic: Magic (0xFEEDFACE)
+// CHECK-I386:   CpuType: X86 (0x7)
+// CHECK-I386:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-I386:   FileType: Relocatable (0x1)
+// CHECK-I386:   NumOfLoadCommands: 4
+// CHECK-I386:   SizeOfLoadCommands: 312
+// CHECK-I386:   Flags [ (0x0)
+// CHECK-I386:   ]
+// CHECK-I386: }
+// CHECK-I386: Sections [
+// CHECK-I386:   Section {
+// CHECK-I386:     Index: 0
+// CHECK-I386:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386:     Address: 0x0
+// CHECK-I386:     Size: 0x1
+// CHECK-I386:     Offset: 340
+// CHECK-I386:     Alignment: 0
+// CHECK-I386:     RelocationOffset: 0x0
+// CHECK-I386:     RelocationCount: 0
+// CHECK-I386:     Type: 0x0
+// CHECK-I386:     Attributes [ (0x800004)
+// CHECK-I386:       PureInstructions (0x800000)
+// CHECK-I386:       SomeInstructions (0x4)
+// CHECK-I386:     ]
+// CHECK-I386:     Reserved1: 0x0
+// CHECK-I386:     Reserved2: 0x0
+// CHECK-I386:     SectionData (
+// CHECK-I386:       0000: C3                                   |.|
+// CHECK-I386:     )
+// CHECK-I386:   }
+// CHECK-I386:   Section {
+// CHECK-I386:     Index: 1
+// CHECK-I386:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-I386:     Address: 0x1
+// CHECK-I386:     Size: 0x38
+// CHECK-I386:     Offset: 341
+// CHECK-I386:     Alignment: 0
+// CHECK-I386:     RelocationOffset: 0x190
+// CHECK-I386:     RelocationCount: 9
+// CHECK-I386:     Type: 0x0
+// CHECK-I386:     Attributes [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Reserved1: 0x0
+// CHECK-I386:     Reserved2: 0x0
+// CHECK-I386:     SectionData (
+// CHECK-I386:       0000: 00000000 00000000 05000000 00000000  |................|
+// CHECK-I386:       0010: 05000000 09000000 09000000 00000000  |................|
+// CHECK-I386:       0020: 00000000 00000000 0D000000 0D000000  |................|
+// CHECK-I386:       0030: CFFFFFFF FFFFFFFF                    |........|
+// CHECK-I386:     )
+// CHECK-I386:   }
+// CHECK-I386: ]
+// CHECK-I386: Relocations [
+// CHECK-I386:   Section __data {
+// CHECK-I386:     0x2C 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386:     0x28 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386:     0x24 0 2 1 GENERIC_RELOC_VANILLA 0 d3
+// CHECK-I386:     0x20 0 2 1 GENERIC_RELOC_VANILLA 0 d2
+// CHECK-I386:     0x1C 0 2 1 GENERIC_RELOC_VANILLA 0 d
+// CHECK-I386:     0x18 0 2 n/a GENERIC_RELOC_VANILLA 1 0x5
+// CHECK-I386:     0x14 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386:     0x10 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386:     0x8 0 2 0 GENERIC_RELOC_VANILLA 0 __data
+// CHECK-I386:   }
+// CHECK-I386: ]
+// CHECK-I386: Symbols [
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: a (13)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x5
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: b (11)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x5
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: c (9)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x9
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: e (5)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x9
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: g (1)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0xD
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: f (3)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __data (0x2)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0xD
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: t0 (21)
+// CHECK-I386:     Type: Section (0xE)
+// CHECK-I386:     Section: __text (0x1)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x0
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: d (7)
+// CHECK-I386:     Extern
+// CHECK-I386:     Type: Undef (0x0)
+// CHECK-I386:     Section:  (0x0)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x0
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: d2 (18)
+// CHECK-I386:     Extern
+// CHECK-I386:     Type: Indirect (0xA)
+// CHECK-I386:     Section:  (0x0)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x7
+// CHECK-I386:   }
+// CHECK-I386:   Symbol {
+// CHECK-I386:     Name: d3 (15)
+// CHECK-I386:     Extern
+// CHECK-I386:     Type: Undef (0x0)
+// CHECK-I386:     Section:  (0x0)
+// CHECK-I386:     RefType: UndefinedNonLazy (0x0)
+// CHECK-I386:     Flags [ (0x0)
+// CHECK-I386:     ]
+// CHECK-I386:     Value: 0x0
+// CHECK-I386:   }
+// CHECK-I386: ]
+// CHECK-I386: Indirect Symbols {
+// CHECK-I386:   Number: 0
+// CHECK-I386:   Symbols [
+// CHECK-I386:   ]
+// CHECK-I386: }
+// CHECK-I386: Segment {
+// CHECK-I386:   Cmd: LC_SEGMENT
+// CHECK-I386:   Name: 
+// CHECK-I386:   Size: 192
+// CHECK-I386:   vmaddr: 0x0
+// CHECK-I386:   vmsize: 0x39
+// CHECK-I386:   fileoff: 340
+// CHECK-I386:   filesize: 57
+// CHECK-I386:   maxprot: rwx
+// CHECK-I386:   initprot: rwx
+// CHECK-I386:   nsects: 2
+// CHECK-I386:   flags: 0x0
+// CHECK-I386: }
+// CHECK-I386: Dysymtab {
+// CHECK-I386:   ilocalsym: 0
+// CHECK-I386:   nlocalsym: 7
+// CHECK-I386:   iextdefsym: 7
+// CHECK-I386:   nextdefsym: 0
+// CHECK-I386:   iundefsym: 7
+// CHECK-I386:   nundefsym: 3
+// CHECK-I386:   tocoff: 0
+// CHECK-I386:   ntoc: 0
+// CHECK-I386:   modtaboff: 0
+// CHECK-I386:   nmodtab: 0
+// CHECK-I386:   extrefsymoff: 0
+// CHECK-I386:   nextrefsyms: 0
+// CHECK-I386:   indirectsymoff: 0
+// CHECK-I386:   nindirectsyms: 0
+// CHECK-I386:   extreloff: 0
+// CHECK-I386:   nextrel: 0
+// CHECK-I386:   locreloff: 0
+// CHECK-I386:   nlocrel: 0
+// CHECK-I386: }
 
-// CHECK-X86_64: ('cputype', 16777223)
-// CHECK-X86_64: ('cpusubtype', 3)
-// CHECK-X86_64: ('filetype', 1)
-// CHECK-X86_64: ('num_load_commands', 3)
-// CHECK-X86_64: ('load_commands_size', 336)
-// CHECK-X86_64: ('flag', 0)
-// CHECK-X86_64: ('reserved', 0)
-// CHECK-X86_64: ('load_commands', [
-// CHECK-X86_64:   # Load Command 0
-// CHECK-X86_64:  (('command', 25)
-// CHECK-X86_64:   ('size', 232)
-// CHECK-X86_64:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:   ('vm_addr', 0)
-// CHECK-X86_64:   ('vm_size', 57)
-// CHECK-X86_64:   ('file_offset', 368)
-// CHECK-X86_64:   ('file_size', 57)
-// CHECK-X86_64:   ('maxprot', 7)
-// CHECK-X86_64:   ('initprot', 7)
-// CHECK-X86_64:   ('num_sections', 2)
-// CHECK-X86_64:   ('flags', 0)
-// CHECK-X86_64:   ('sections', [
-// CHECK-X86_64:     # Section 0
-// CHECK-X86_64:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('address', 0)
-// CHECK-X86_64:     ('size', 1)
-// CHECK-X86_64:     ('offset', 368)
-// CHECK-X86_64:     ('alignment', 0)
-// CHECK-X86_64:     ('reloc_offset', 0)
-// CHECK-X86_64:     ('num_reloc', 0)
-// CHECK-X86_64:     ('flags', 0x80000400)
-// CHECK-X86_64:     ('reserved1', 0)
-// CHECK-X86_64:     ('reserved2', 0)
-// CHECK-X86_64:     ('reserved3', 0)
-// CHECK-X86_64:    ),
-// CHECK-X86_64:   ('_relocations', [
-// CHECK-X86_64:   ])
-// CHECK-X86_64:   ('_section_data', 'c3')
-// CHECK-X86_64:     # Section 1
-// CHECK-X86_64:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-X86_64:     ('address', 1)
-// CHECK-X86_64:     ('size', 56)
-// CHECK-X86_64:     ('offset', 369)
-// CHECK-X86_64:     ('alignment', 0)
-// CHECK-X86_64:     ('reloc_offset', 428)
-// CHECK-X86_64:     ('num_reloc', 9)
-// CHECK-X86_64:     ('flags', 0x0)
-// CHECK-X86_64:     ('reserved1', 0)
-// CHECK-X86_64:     ('reserved2', 0)
-// CHECK-X86_64:     ('reserved3', 0)
-// CHECK-X86_64:    ),
-// CHECK-X86_64:   ('_relocations', [
-// CHECK-X86_64:     # Relocation 0
-// CHECK-X86_64:     (('word-0', 0x2c),
-// CHECK-X86_64:      ('word-1', 0xc000004)),
-// CHECK-X86_64:     # Relocation 1
-// CHECK-X86_64:     (('word-0', 0x28),
-// CHECK-X86_64:      ('word-1', 0xc000005)),
-// CHECK-X86_64:     # Relocation 2
-// CHECK-X86_64:     (('word-0', 0x24),
-// CHECK-X86_64:      ('word-1', 0xc000009)),
-// CHECK-X86_64:     # Relocation 3
-// CHECK-X86_64:     (('word-0', 0x20),
-// CHECK-X86_64:      ('word-1', 0xc000008)),
-// CHECK-X86_64:     # Relocation 4
-// CHECK-X86_64:     (('word-0', 0x1c),
-// CHECK-X86_64:      ('word-1', 0xc000007)),
-// CHECK-X86_64:     # Relocation 5
-// CHECK-X86_64:     (('word-0', 0x18),
-// CHECK-X86_64:      ('word-1', 0xc000000)),
-// CHECK-X86_64:     # Relocation 6
-// CHECK-X86_64:     (('word-0', 0x14),
-// CHECK-X86_64:      ('word-1', 0xc000003)),
-// CHECK-X86_64:     # Relocation 7
-// CHECK-X86_64:     (('word-0', 0x10),
-// CHECK-X86_64:      ('word-1', 0xc000001)),
-// CHECK-X86_64:     # Relocation 8
-// CHECK-X86_64:     (('word-0', 0x8),
-// CHECK-X86_64:      ('word-1', 0xc000001)),
-// CHECK-X86_64:   ])
-// CHECK-X86_64:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 04000000 00000000 00000000 00000000 00000000 00000000 cfffffff ffffffff')
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64:   # Load Command 1
-// CHECK-X86_64:  (('command', 2)
-// CHECK-X86_64:   ('size', 24)
-// CHECK-X86_64:   ('symoff', 500)
-// CHECK-X86_64:   ('nsyms', 10)
-// CHECK-X86_64:   ('stroff', 660)
-// CHECK-X86_64:   ('strsize', 24)
-// CHECK-X86_64:   ('_string_data', '\x00g\x00f\x00e\x00d\x00c\x00b\x00a\x00d3\x00d2\x00t0\x00')
-// CHECK-X86_64:   ('_symbols', [
-// CHECK-X86_64:     # Symbol 0
-// CHECK-X86_64:    (('n_strx', 13)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 5)
-// CHECK-X86_64:     ('_string', 'a')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 1
-// CHECK-X86_64:    (('n_strx', 11)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 5)
-// CHECK-X86_64:     ('_string', 'b')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 2
-// CHECK-X86_64:    (('n_strx', 9)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 9)
-// CHECK-X86_64:     ('_string', 'c')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 3
-// CHECK-X86_64:    (('n_strx', 5)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 9)
-// CHECK-X86_64:     ('_string', 'e')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 4
-// CHECK-X86_64:    (('n_strx', 1)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 13)
-// CHECK-X86_64:     ('_string', 'g')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 5
-// CHECK-X86_64:    (('n_strx', 3)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 2)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 13)
-// CHECK-X86_64:     ('_string', 'f')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 6
-// CHECK-X86_64:    (('n_strx', 21)
-// CHECK-X86_64:     ('n_type', 0xe)
-// CHECK-X86_64:     ('n_sect', 1)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 't0')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 7
-// CHECK-X86_64:    (('n_strx', 7)
-// CHECK-X86_64:     ('n_type', 0x1)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'd')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 8
-// CHECK-X86_64:    (('n_strx', 18)
-// CHECK-X86_64:     ('n_type', 0xb)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 7)
-// CHECK-X86_64:     ('_string', 'd2')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:     # Symbol 9
-// CHECK-X86_64:    (('n_strx', 15)
-// CHECK-X86_64:     ('n_type', 0x1)
-// CHECK-X86_64:     ('n_sect', 0)
-// CHECK-X86_64:     ('n_desc', 0)
-// CHECK-X86_64:     ('n_value', 0)
-// CHECK-X86_64:     ('_string', 'd3')
-// CHECK-X86_64:    ),
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64:   # Load Command 2
-// CHECK-X86_64:  (('command', 11)
-// CHECK-X86_64:   ('size', 80)
-// CHECK-X86_64:   ('ilocalsym', 0)
-// CHECK-X86_64:   ('nlocalsym', 7)
-// CHECK-X86_64:   ('iextdefsym', 7)
-// CHECK-X86_64:   ('nextdefsym', 0)
-// CHECK-X86_64:   ('iundefsym', 7)
-// CHECK-X86_64:   ('nundefsym', 3)
-// CHECK-X86_64:   ('tocoff', 0)
-// CHECK-X86_64:   ('ntoc', 0)
-// CHECK-X86_64:   ('modtaboff', 0)
-// CHECK-X86_64:   ('nmodtab', 0)
-// CHECK-X86_64:   ('extrefsymoff', 0)
-// CHECK-X86_64:   ('nextrefsyms', 0)
-// CHECK-X86_64:   ('indirectsymoff', 0)
-// CHECK-X86_64:   ('nindirectsyms', 0)
-// CHECK-X86_64:   ('extreloff', 0)
-// CHECK-X86_64:   ('nextrel', 0)
-// CHECK-X86_64:   ('locreloff', 0)
-// CHECK-X86_64:   ('nlocrel', 0)
-// CHECK-X86_64:   ('_indirect_symbols', [
-// CHECK-X86_64:   ])
-// CHECK-X86_64:  ),
-// CHECK-X86_64: ])
+// CHECK-X86_64: File: <stdin>
+// CHECK-X86_64: Format: Mach-O 64-bit x86-64
+// CHECK-X86_64: Arch: x86_64
+// CHECK-X86_64: AddressSize: 64bit
+// CHECK-X86_64: MachHeader {
+// CHECK-X86_64:   Magic: Magic64 (0xFEEDFACF)
+// CHECK-X86_64:   CpuType: X86-64 (0x1000007)
+// CHECK-X86_64:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK-X86_64:   FileType: Relocatable (0x1)
+// CHECK-X86_64:   NumOfLoadCommands: 4
+// CHECK-X86_64:   SizeOfLoadCommands: 352
+// CHECK-X86_64:   Flags [ (0x0)
+// CHECK-X86_64:   ]
+// CHECK-X86_64:   Reserved: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Sections [
+// CHECK-X86_64:   Section {
+// CHECK-X86_64:     Index: 0
+// CHECK-X86_64:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Address: 0x0
+// CHECK-X86_64:     Size: 0x1
+// CHECK-X86_64:     Offset: 384
+// CHECK-X86_64:     Alignment: 0
+// CHECK-X86_64:     RelocationOffset: 0x0
+// CHECK-X86_64:     RelocationCount: 0
+// CHECK-X86_64:     Type: 0x0
+// CHECK-X86_64:     Attributes [ (0x800004)
+// CHECK-X86_64:       PureInstructions (0x800000)
+// CHECK-X86_64:       SomeInstructions (0x4)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Reserved1: 0x0
+// CHECK-X86_64:     Reserved2: 0x0
+// CHECK-X86_64:     Reserved3: 0x0
+// CHECK-X86_64:     SectionData (
+// CHECK-X86_64:       0000: C3                                   |.|
+// CHECK-X86_64:     )
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Section {
+// CHECK-X86_64:     Index: 1
+// CHECK-X86_64:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-X86_64:     Address: 0x1
+// CHECK-X86_64:     Size: 0x38
+// CHECK-X86_64:     Offset: 385
+// CHECK-X86_64:     Alignment: 0
+// CHECK-X86_64:     RelocationOffset: 0x1BC
+// CHECK-X86_64:     RelocationCount: 9
+// CHECK-X86_64:     Type: 0x0
+// CHECK-X86_64:     Attributes [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Reserved1: 0x0
+// CHECK-X86_64:     Reserved2: 0x0
+// CHECK-X86_64:     Reserved3: 0x0
+// CHECK-X86_64:     SectionData (
+// CHECK-X86_64:       0000: 00000000 00000000 00000000 00000000  |................|
+// CHECK-X86_64:       0010: 00000000 00000000 04000000 00000000  |................|
+// CHECK-X86_64:       0020: 00000000 00000000 00000000 00000000  |................|
+// CHECK-X86_64:       0030: CFFFFFFF FFFFFFFF                    |........|
+// CHECK-X86_64:     )
+// CHECK-X86_64:   }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Relocations [
+// CHECK-X86_64:   Section __data {
+// CHECK-X86_64:     0x2C 0 2 1 X86_64_RELOC_UNSIGNED 0 g
+// CHECK-X86_64:     0x28 0 2 1 X86_64_RELOC_UNSIGNED 0 f
+// CHECK-X86_64:     0x24 0 2 1 X86_64_RELOC_UNSIGNED 0 d3
+// CHECK-X86_64:     0x20 0 2 1 X86_64_RELOC_UNSIGNED 0 d2
+// CHECK-X86_64:     0x1C 0 2 1 X86_64_RELOC_UNSIGNED 0 d
+// CHECK-X86_64:     0x18 0 2 1 X86_64_RELOC_UNSIGNED 0 a
+// CHECK-X86_64:     0x14 0 2 1 X86_64_RELOC_UNSIGNED 0 e
+// CHECK-X86_64:     0x10 0 2 1 X86_64_RELOC_UNSIGNED 0 b
+// CHECK-X86_64:     0x8 0 2 1 X86_64_RELOC_UNSIGNED 0 b
+// CHECK-X86_64:   }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Symbols [
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: a (13)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x5
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: b (11)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x5
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: c (9)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x9
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: e (5)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x9
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: g (1)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0xD
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: f (3)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __data (0x2)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0xD
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: t0 (21)
+// CHECK-X86_64:     Type: Section (0xE)
+// CHECK-X86_64:     Section: __text (0x1)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: d (7)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Undef (0x0)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: d2 (18)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Indirect (0xA)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x7
+// CHECK-X86_64:   }
+// CHECK-X86_64:   Symbol {
+// CHECK-X86_64:     Name: d3 (15)
+// CHECK-X86_64:     Extern
+// CHECK-X86_64:     Type: Undef (0x0)
+// CHECK-X86_64:     Section:  (0x0)
+// CHECK-X86_64:     RefType: UndefinedNonLazy (0x0)
+// CHECK-X86_64:     Flags [ (0x0)
+// CHECK-X86_64:     ]
+// CHECK-X86_64:     Value: 0x0
+// CHECK-X86_64:   }
+// CHECK-X86_64: ]
+// CHECK-X86_64: Indirect Symbols {
+// CHECK-X86_64:   Number: 0
+// CHECK-X86_64:   Symbols [
+// CHECK-X86_64:   ]
+// CHECK-X86_64: }
+// CHECK-X86_64: Segment {
+// CHECK-X86_64:   Cmd: LC_SEGMENT_64
+// CHECK-X86_64:   Name: 
+// CHECK-X86_64:   Size: 232
+// CHECK-X86_64:   vmaddr: 0x0
+// CHECK-X86_64:   vmsize: 0x39
+// CHECK-X86_64:   fileoff: 384
+// CHECK-X86_64:   filesize: 57
+// CHECK-X86_64:   maxprot: rwx
+// CHECK-X86_64:   initprot: rwx
+// CHECK-X86_64:   nsects: 2
+// CHECK-X86_64:   flags: 0x0
+// CHECK-X86_64: }
+// CHECK-X86_64: Dysymtab {
+// CHECK-X86_64:   ilocalsym: 0
+// CHECK-X86_64:   nlocalsym: 7
+// CHECK-X86_64:   iextdefsym: 7
+// CHECK-X86_64:   nextdefsym: 0
+// CHECK-X86_64:   iundefsym: 7
+// CHECK-X86_64:   nundefsym: 3
+// CHECK-X86_64:   tocoff: 0
+// CHECK-X86_64:   ntoc: 0
+// CHECK-X86_64:   modtaboff: 0
+// CHECK-X86_64:   nmodtab: 0
+// CHECK-X86_64:   extrefsymoff: 0
+// CHECK-X86_64:   nextrefsyms: 0
+// CHECK-X86_64:   indirectsymoff: 0
+// CHECK-X86_64:   nindirectsyms: 0
+// CHECK-X86_64:   extreloff: 0
+// CHECK-X86_64:   nextrel: 0
+// CHECK-X86_64:   locreloff: 0
+// CHECK-X86_64:   nlocrel: 0
+// CHECK-X86_64: }
diff --git a/test/MC/MachO/weakdef.s b/test/MC/MachO/weakdef.s
index 494079df5004..2043dc86f74b 100644
--- a/test/MC/MachO/weakdef.s
+++ b/test/MC/MachO/weakdef.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 	.section	__DATA,__datacoal_nt,coalesced
 	.section	__TEXT,__const_coal,coalesced
@@ -14,128 +14,142 @@ __ZTS3optIbE:
 __ZTI3optIbE:
 	.long	__ZTS3optIbE
 
-// CHECK:      ('cputype', 7)
-// CHECK-NEXT: ('cpusubtype', 3)
-// CHECK-NEXT: ('filetype', 1)
-// CHECK-NEXT: ('num_load_commands', 3)
-// CHECK-NEXT: ('load_commands_size', 364)
-// CHECK-NEXT: ('flag', 0)
-// CHECK-NEXT: ('load_commands', [
-// CHECK-NEXT:   # Load Command 0
-// CHECK-NEXT:  (('command', 1)
-// CHECK-NEXT:   ('size', 260)
-// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:   ('vm_addr', 0)
-// CHECK-NEXT:   ('vm_size', 4)
-// CHECK-NEXT:   ('file_offset', 392)
-// CHECK-NEXT:   ('file_size', 4)
-// CHECK-NEXT:   ('maxprot', 7)
-// CHECK-NEXT:   ('initprot', 7)
-// CHECK-NEXT:   ('num_sections', 3)
-// CHECK-NEXT:   ('flags', 0)
-// CHECK-NEXT:   ('sections', [
-// CHECK-NEXT:     # Section 0
-// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 0)
-// CHECK-NEXT:     ('size', 0)
-// CHECK-NEXT:     ('offset', 392)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 0)
-// CHECK-NEXT:     ('num_reloc', 0)
-// CHECK-NEXT:     ('flags', 0x80000000)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '')
-// CHECK-NEXT:     # Section 1
-// CHECK-NEXT:    (('section_name', '__datacoal_nt\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 0)
-// CHECK-NEXT:     ('size', 4)
-// CHECK-NEXT:     ('offset', 392)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 396)
-// CHECK-NEXT:     ('num_reloc', 1)
-// CHECK-NEXT:     ('flags', 0xb)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:     # Relocation 0
-// CHECK-NEXT:     (('word-0', 0x0),
-// CHECK-NEXT:      ('word-1', 0xc000001)),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '00000000')
-// CHECK-NEXT:     # Section 2
-// CHECK-NEXT:    (('section_name', '__const_coal\x00\x00\x00\x00')
-// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK-NEXT:     ('address', 4)
-// CHECK-NEXT:     ('size', 0)
-// CHECK-NEXT:     ('offset', 396)
-// CHECK-NEXT:     ('alignment', 0)
-// CHECK-NEXT:     ('reloc_offset', 0)
-// CHECK-NEXT:     ('num_reloc', 0)
-// CHECK-NEXT:     ('flags', 0xb)
-// CHECK-NEXT:     ('reserved1', 0)
-// CHECK-NEXT:     ('reserved2', 0)
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ('_relocations', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:   ('_section_data', '')
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT:   # Load Command 1
-// CHECK-NEXT:  (('command', 2)
-// CHECK-NEXT:   ('size', 24)
-// CHECK-NEXT:   ('symoff', 404)
-// CHECK-NEXT:   ('nsyms', 2)
-// CHECK-NEXT:   ('stroff', 428)
-// CHECK-NEXT:   ('strsize', 28)
-// CHECK-NEXT:   ('_string_data', '\x00__ZTS3optIbE\x00__ZTI3optIbE\x00\x00')
-// CHECK-NEXT:   ('_symbols', [
-// CHECK-NEXT:     # Symbol 0
-// CHECK-NEXT:    (('n_strx', 14)
-// CHECK-NEXT:     ('n_type', 0xf)
-// CHECK-NEXT:     ('n_sect', 2)
-// CHECK-NEXT:     ('n_desc', 128)
-// CHECK-NEXT:     ('n_value', 0)
-// CHECK-NEXT:     ('_string', '__ZTI3optIbE')
-// CHECK-NEXT:    ),
-// CHECK-NEXT:     # Symbol 1
-// CHECK-NEXT:    (('n_strx', 1)
-// CHECK-NEXT:     ('n_type', 0xf)
-// CHECK-NEXT:     ('n_sect', 3)
-// CHECK-NEXT:     ('n_desc', 128)
-// CHECK-NEXT:     ('n_value', 4)
-// CHECK-NEXT:     ('_string', '__ZTS3optIbE')
-// CHECK-NEXT:    ),
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT:   # Load Command 2
-// CHECK-NEXT:  (('command', 11)
-// CHECK-NEXT:   ('size', 80)
-// CHECK-NEXT:   ('ilocalsym', 0)
-// CHECK-NEXT:   ('nlocalsym', 0)
-// CHECK-NEXT:   ('iextdefsym', 0)
-// CHECK-NEXT:   ('nextdefsym', 2)
-// CHECK-NEXT:   ('iundefsym', 2)
-// CHECK-NEXT:   ('nundefsym', 0)
-// CHECK-NEXT:   ('tocoff', 0)
-// CHECK-NEXT:   ('ntoc', 0)
-// CHECK-NEXT:   ('modtaboff', 0)
-// CHECK-NEXT:   ('nmodtab', 0)
-// CHECK-NEXT:   ('extrefsymoff', 0)
-// CHECK-NEXT:   ('nextrefsyms', 0)
-// CHECK-NEXT:   ('indirectsymoff', 0)
-// CHECK-NEXT:   ('nindirectsyms', 0)
-// CHECK-NEXT:   ('extreloff', 0)
-// CHECK-NEXT:   ('nextrel', 0)
-// CHECK-NEXT:   ('locreloff', 0)
-// CHECK-NEXT:   ('nlocrel', 0)
-// CHECK-NEXT:   ('_indirect_symbols', [
-// CHECK-NEXT:   ])
-// CHECK-NEXT:  ),
-// CHECK-NEXT: ])
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 32-bit i386
+// CHECK-NEXT: Arch: i386
+// CHECK-NEXT: AddressSize: 32bit
+// CHECK-NEXT: MachHeader {
+// CHECK-NEXT:   Magic: Magic (0xFEEDFACE)
+// CHECK-NEXT:   CpuType: X86 (0x7)
+// CHECK-NEXT:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK-NEXT:   FileType: Relocatable (0x1)
+// CHECK-NEXT:   NumOfLoadCommands: 4
+// CHECK-NEXT:   SizeOfLoadCommands: 380
+// CHECK-NEXT:   Flags [ (0x0)
+// CHECK-NEXT:   ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Sections [
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 0
+// CHECK-NEXT:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Size: 0x0
+// CHECK-NEXT:     Offset: 408
+// CHECK-NEXT:     Alignment: 0
+// CHECK-NEXT:     RelocationOffset: 0x0
+// CHECK-NEXT:     RelocationCount: 0
+// CHECK-NEXT:     Type: 0x0
+// CHECK-NEXT:     Attributes [ (0x800000)
+// CHECK-NEXT:       PureInstructions (0x800000)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Reserved1: 0x0
+// CHECK-NEXT:     Reserved2: 0x0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 1
+// CHECK-NEXT:     Name: __datacoal_nt (5F 5F 64 61 74 61 63 6F 61 6C 5F 6E 74 00 00 00)
+// CHECK-NEXT:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Address: 0x0
+// CHECK-NEXT:     Size: 0x4
+// CHECK-NEXT:     Offset: 408
+// CHECK-NEXT:     Alignment: 0
+// CHECK-NEXT:     RelocationOffset: 0x19C
+// CHECK-NEXT:     RelocationCount: 1
+// CHECK-NEXT:     Type: 0xB
+// CHECK-NEXT:     Attributes [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Reserved1: 0x0
+// CHECK-NEXT:     Reserved2: 0x0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:       0000: 00000000                             |....|
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Section {
+// CHECK-NEXT:     Index: 2
+// CHECK-NEXT:     Name: __const_coal (5F 5F 63 6F 6E 73 74 5F 63 6F 61 6C 00 00 00 00)
+// CHECK-NEXT:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK-NEXT:     Address: 0x4
+// CHECK-NEXT:     Size: 0x0
+// CHECK-NEXT:     Offset: 412
+// CHECK-NEXT:     Alignment: 0
+// CHECK-NEXT:     RelocationOffset: 0x0
+// CHECK-NEXT:     RelocationCount: 0
+// CHECK-NEXT:     Type: 0xB
+// CHECK-NEXT:     Attributes [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Reserved1: 0x0
+// CHECK-NEXT:     Reserved2: 0x0
+// CHECK-NEXT:     SectionData (
+// CHECK-NEXT:     )
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT:   Section __datacoal_nt {
+// CHECK-NEXT:     0x0 0 2 1 GENERIC_RELOC_VANILLA 0 __ZTS3optIbE
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: __ZTI3optIbE (14)
+// CHECK-NEXT:     Extern
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __datacoal_nt (0x2)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x80)
+// CHECK-NEXT:       WeakDef (0x80)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x0
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: __ZTS3optIbE (1)
+// CHECK-NEXT:     Extern
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __const_coal (0x3)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x80)
+// CHECK-NEXT:       WeakDef (0x80)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x4
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Indirect Symbols {
+// CHECK-NEXT:   Number: 0
+// CHECK-NEXT:   Symbols [
+// CHECK-NEXT:   ]
+// CHECK-NEXT: }
+// CHECK-NEXT: Segment {
+// CHECK-NEXT:   Cmd: LC_SEGMENT
+// CHECK-NEXT:   Name: 
+// CHECK-NEXT:   Size: 260
+// CHECK-NEXT:   vmaddr: 0x0
+// CHECK-NEXT:   vmsize: 0x4
+// CHECK-NEXT:   fileoff: 408
+// CHECK-NEXT:   filesize: 4
+// CHECK-NEXT:   maxprot: rwx
+// CHECK-NEXT:   initprot: rwx
+// CHECK-NEXT:   nsects: 3
+// CHECK-NEXT:   flags: 0x0
+// CHECK-NEXT: }
+// CHECK-NEXT: Dysymtab {
+// CHECK-NEXT:   ilocalsym: 0
+// CHECK-NEXT:   nlocalsym: 0
+// CHECK-NEXT:   iextdefsym: 0
+// CHECK-NEXT:   nextdefsym: 2
+// CHECK-NEXT:   iundefsym: 2
+// CHECK-NEXT:   nundefsym: 0
+// CHECK-NEXT:   tocoff: 0
+// CHECK-NEXT:   ntoc: 0
+// CHECK-NEXT:   modtaboff: 0
+// CHECK-NEXT:   nmodtab: 0
+// CHECK-NEXT:   extrefsymoff: 0
+// CHECK-NEXT:   nextrefsyms: 0
+// CHECK-NEXT:   indirectsymoff: 0
+// CHECK-NEXT:   nindirectsyms: 0
+// CHECK-NEXT:   extreloff: 0
+// CHECK-NEXT:   nextrel: 0
+// CHECK-NEXT:   locreloff: 0
+// CHECK-NEXT:   nlocrel: 0
+// CHECK-NEXT: }
diff --git a/test/MC/MachO/x86-data-in-code.ll b/test/MC/MachO/x86-data-in-code.ll
index c2e136fbeb08..b8d3a185cbd9 100644
--- a/test/MC/MachO/x86-data-in-code.ll
+++ b/test/MC/MachO/x86-data-in-code.ll
@@ -1,9 +1,8 @@
-; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | macho-dump | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-readobj -macho-data-in-code | FileCheck %s
 
 ; There should not be a data-in-code load command (type 0x29) for x86_64
 ; jump tables, even though they are in the text section.
-; CHECK: 'num_load_commands'
-; CHECK-NOT: (('command', 41)
+; CHECK-NOT: DataInCode {
 
 define void @foo(i32* %ptr) nounwind ssp {
   %tmp = load i32, i32* %ptr, align 4
diff --git a/test/MC/MachO/x86_32-optimal_nop.s b/test/MC/MachO/x86_32-optimal_nop.s
index 24751409bdb4..01d8a1f6eb2a 100644
--- a/test/MC/MachO/x86_32-optimal_nop.s
+++ b/test/MC/MachO/x86_32-optimal_nop.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t -macho-segment -macho-dysymtab -macho-indirect-symbols | FileCheck %s
 
 # 1 byte nop test
         .align 4, 0 # start with 16 byte alignment filled with zeros
@@ -157,101 +157,132 @@ f0:
 	.align	4, 0x90
         .long 0
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 372)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 372)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 337)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000400)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', 'c390c300 00000000 00000000 00000000 c3c36690 c3000000 00000000 00000000 c30f1f00 c3000000 00000000 00000000 c3c3c3c3 0f1f4000 c3000000 00000000 c3c3c30f 1f440000 c3000000 00000000 c3c3660f 1f440000 c3000000 00000000 c30f1f80 00000000 c3000000 00000000 c3c3c3c3 c3c3c3c3 c3000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c366662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c366 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c36666 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3666666 6666662e 0f1f8400 00000000 c3')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 352)
-// CHECK:     ('size', 20)
-// CHECK:     ('offset', 676)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '00909090 90909090 90909090 90909090 00000000')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 696)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 708)
-// CHECK:   ('strsize', 4)
-// CHECK:   ('_string_data', '\x00f0\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 352)
-// CHECK:     ('_string', 'f0')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x151
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800004)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:       SomeInstructions (0x4)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: C390C300 00000000 00000000 00000000  |................|
+// CHECK:       0010: C3C36690 C3000000 00000000 00000000  |..f.............|
+// CHECK:       0020: C30F1F00 C3000000 00000000 00000000  |................|
+// CHECK:       0030: C3C3C3C3 0F1F4000 C3000000 00000000  |......@.........|
+// CHECK:       0040: C3C3C30F 1F440000 C3000000 00000000  |.....D..........|
+// CHECK:       0050: C3C3660F 1F440000 C3000000 00000000  |..f..D..........|
+// CHECK:       0060: C30F1F80 00000000 C3000000 00000000  |................|
+// CHECK:       0070: C3C3C3C3 C3C3C3C3 C3000000 00000000  |................|
+// CHECK:       0080: C3C3C3C3 C3C3C366 0F1F8400 00000000  |.......f........|
+// CHECK:       0090: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       00A0: C3C3C3C3 C3C3C366 0F1F8400 00000000  |.......f........|
+// CHECK:       00B0: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       00C0: C3C3C3C3 C366662E 0F1F8400 00000000  |.....ff.........|
+// CHECK:       00D0: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       00E0: C3C3C3C3 6666662E 0F1F8400 00000000  |....fff.........|
+// CHECK:       00F0: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       0100: C3C3C366 6666662E 0F1F8400 00000000  |...ffff.........|
+// CHECK:       0110: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       0120: C3C36666 6666662E 0F1F8400 00000000  |..fffff.........|
+// CHECK:       0130: C3000000 00000000 00000000 00000000  |................|
+// CHECK:       0140: C3666666 6666662E 0F1F8400 00000000  |.ffffff.........|
+// CHECK:       0150: C3                                   |.|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x160
+// CHECK:     Size: 0x14
+// CHECK:     Offset: 692
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00909090 90909090 90909090 90909090  |................|
+// CHECK:       0010: 00000000                             |....|
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: f0 (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __const (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x160
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x174
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 372
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/x86_32-scattered-reloc-fallback.s b/test/MC/MachO/x86_32-scattered-reloc-fallback.s
index 3de52b4228d0..b2dc27b3e1cb 100644
--- a/test/MC/MachO/x86_32-scattered-reloc-fallback.s
+++ b/test/MC/MachO/x86_32-scattered-reloc-fallback.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -sd | FileCheck %s
 
 // rdar://15526046
 
@@ -24,4 +24,8 @@ _key64b_9:
 // be relocated, in this case _key64b_9+4, value correct in the instruction.
 // 01020f55	c7056475530100000000	movl	$0x0, 0x1537564
 
-// CHECK:   90c70564 75530100 000000')
+// CHECK: SectionData (
+// CHECK: F75530: 90909090 90909090 90909090 90909090  |................|
+// CHECK: 1020F50: 90909090 90C70564 75530100 000000    |.......duS.....|
+// CHECK: 75530: 00000000 00000000 00000000 00000000  |................|
+// CHECK: )
diff --git a/test/MC/MachO/x86_32-sections.s b/test/MC/MachO/x86_32-sections.s
index 66ada2807ef9..a78ac17db1d4 100644
--- a/test/MC/MachO/x86_32-sections.s
+++ b/test/MC/MachO/x86_32-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd --macho-segment | FileCheck %s
 
         .text
 	.section	__TEXT,__text,regular,pure_instructions
@@ -43,494 +43,700 @@
         .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
 
         .subsections_via_symbols
-        
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 2504)
-// CHECK: ('flag', 8192)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 2504)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 2532)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 36)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__static_const\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 4
-// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x3)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 5
-// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x4)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 6
-// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xe)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 7
-// CHECK:    (('section_name', '__constructor\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 8
-// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 9
-// CHECK:    (('section_name', '__symbol_stub\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000008)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 16)
-// CHECK:    ),
-// CHECK:     # Section 10
-// CHECK:    (('section_name', '__picsymbol_stub')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000008)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 26)
-// CHECK:    ),
-// CHECK:     # Section 11
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 12
-// CHECK:    (('section_name', '__static_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 13
-// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x6)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 14
-// CHECK:    (('section_name', '__la_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x7)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 15
-// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 16
-// CHECK:    (('section_name', '__mod_init_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x9)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 17
-// CHECK:    (('section_name', '__mod_term_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xa)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 18
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 19
-// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 20
-// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 21
-// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 22
-// CHECK:    (('section_name', '__cat_inst_meth\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 23
-// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 24
-// CHECK:    (('section_name', '__string_object\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 25
-// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 26
-// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 27
-// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 28
-// CHECK:    (('section_name', '__message_refs\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 29
-// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 30
-// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 31
-// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 32
-// CHECK:    (('section_name', '__instance_vars\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 33
-// CHECK:    (('section_name', '__module_info\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:     # Section 34
-// CHECK:    (('section_name', '__selector_strs\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     # Section 35
-// CHECK:    (('section_name', '__picsymbolstub4')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2532)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x8)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 16)
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 2
+// CHECK:   SizeOfLoadCommands: 2520
+// CHECK:   Flags [ (0x2000)
+// CHECK:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 3
+// CHECK:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 4
+// CHECK:     Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x3
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 5
+// CHECK:     Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: SomeInstructions (0x4)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 6
+// CHECK:     Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xE
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 7
+// CHECK:     Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 8
+// CHECK:     Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 9
+// CHECK:     Name: __symbol_stub (5F 5F 73 79 6D 62 6F 6C 5F 73 74 75 62 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x10
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 10
+// CHECK:     Name: __picsymbol_stub (5F 5F 70 69 63 73 79 6D 62 6F 6C 5F 73 74 75 62)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x1A
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 11
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 12
+// CHECK:     Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 13
+// CHECK:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x6
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 14
+// CHECK:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x7
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 15
+// CHECK:     Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 16
+// CHECK:     Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x9
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 17
+// CHECK:     Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xA
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 18
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 19
+// CHECK:     Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 20
+// CHECK:     Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 21
+// CHECK:     Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 22
+// CHECK:     Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 23
+// CHECK:     Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 24
+// CHECK:     Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 25
+// CHECK:     Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 26
+// CHECK:     Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 27
+// CHECK:     Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 28
+// CHECK:     Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 29
+// CHECK:     Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 30
+// CHECK:     Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 31
+// CHECK:     Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 32
+// CHECK:     Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 33
+// CHECK:     Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 34
+// CHECK:     Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 35
+// CHECK:     Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2548
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x10
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 2504
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x0
+// CHECK:   fileoff: 2548
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 36
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/x86_32-symbols.s b/test/MC/MachO/x86_32-symbols.s
index 95aa507305fd..1a4ffafb745d 100644
--- a/test/MC/MachO/x86_32-symbols.s
+++ b/test/MC/MachO/x86_32-symbols.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
 
         .text
 L0:
@@ -121,921 +121,1087 @@ D38:
 L39:
 D39:
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 2608)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 2504)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 2636)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 36)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__static_const\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 4
-// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x3)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 5
-// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x4)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 6
-// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xe)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 7
-// CHECK:    (('section_name', '__constructor\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 8
-// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 9
-// CHECK:    (('section_name', '__symbol_stub\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000008)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 16)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 10
-// CHECK:    (('section_name', '__picsymbol_stub')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000008)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 26)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 11
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 12
-// CHECK:    (('section_name', '__static_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 13
-// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x6)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 14
-// CHECK:    (('section_name', '__la_symbol_ptr\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x7)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 15
-// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 16
-// CHECK:    (('section_name', '__mod_init_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x9)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 17
-// CHECK:    (('section_name', '__mod_term_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xa)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 18
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 19
-// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 20
-// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 21
-// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 22
-// CHECK:    (('section_name', '__cat_inst_meth\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 23
-// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 24
-// CHECK:    (('section_name', '__string_object\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 25
-// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 26
-// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 27
-// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 28
-// CHECK:    (('section_name', '__message_refs\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 29
-// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 30
-// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 31
-// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 32
-// CHECK:    (('section_name', '__instance_vars\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 33
-// CHECK:    (('section_name', '__module_info\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 34
-// CHECK:    (('section_name', '__selector_strs\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 35
-// CHECK:    (('section_name', '__picsymbolstub4')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2636)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x8)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 16)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 2636)
-// CHECK:   ('nsyms', 40)
-// CHECK:   ('stroff', 3116)
-// CHECK:   ('strsize', 152)
-// CHECK:   ('_string_data', '\x00D9\x00D39\x00D29\x00D19\x00D8\x00D38\x00D28\x00D18\x00D7\x00D37\x00D27\x00D17\x00D6\x00D36\x00D26\x00D16\x00D5\x00D35\x00D25\x00D15\x00D4\x00D34\x00D24\x00D14\x00D3\x00D33\x00D23\x00D13\x00D2\x00D32\x00D22\x00D12\x00D1\x00D31\x00D21\x00D11\x00D0\x00D30\x00D20\x00D10\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 136)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D0')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 121)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 1)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D1')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 106)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D2')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 91)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 3)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D3')
-// CHECK:    ),
-// CHECK:     # Symbol 4
-// CHECK:    (('n_strx', 76)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D4')
-// CHECK:    ),
-// CHECK:     # Symbol 5
-// CHECK:    (('n_strx', 61)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 5)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D5')
-// CHECK:    ),
-// CHECK:     # Symbol 6
-// CHECK:    (('n_strx', 46)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 6)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D6')
-// CHECK:    ),
-// CHECK:     # Symbol 7
-// CHECK:    (('n_strx', 31)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 7)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D7')
-// CHECK:    ),
-// CHECK:     # Symbol 8
-// CHECK:    (('n_strx', 16)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 8)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D8')
-// CHECK:    ),
-// CHECK:     # Symbol 9
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 9)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D9')
-// CHECK:    ),
-// CHECK:     # Symbol 10
-// CHECK:    (('n_strx', 147)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 10)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D10')
-// CHECK:    ),
-// CHECK:     # Symbol 11
-// CHECK:    (('n_strx', 132)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 11)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D11')
-// CHECK:    ),
-// CHECK:     # Symbol 12
-// CHECK:    (('n_strx', 117)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 12)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D12')
-// CHECK:    ),
-// CHECK:     # Symbol 13
-// CHECK:    (('n_strx', 102)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 13)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D13')
-// CHECK:    ),
-// CHECK:     # Symbol 14
-// CHECK:    (('n_strx', 87)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 14)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D14')
-// CHECK:    ),
-// CHECK:     # Symbol 15
-// CHECK:    (('n_strx', 72)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 15)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D15')
-// CHECK:    ),
-// CHECK:     # Symbol 16
-// CHECK:    (('n_strx', 57)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 16)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D16')
-// CHECK:    ),
-// CHECK:     # Symbol 17
-// CHECK:    (('n_strx', 42)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 17)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D17')
-// CHECK:    ),
-// CHECK:     # Symbol 18
-// CHECK:    (('n_strx', 27)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 18)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D18')
-// CHECK:    ),
-// CHECK:     # Symbol 19
-// CHECK:    (('n_strx', 12)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 19)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D19')
-// CHECK:    ),
-// CHECK:     # Symbol 20
-// CHECK:    (('n_strx', 143)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 20)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D20')
-// CHECK:    ),
-// CHECK:     # Symbol 21
-// CHECK:    (('n_strx', 128)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 21)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D21')
-// CHECK:    ),
-// CHECK:     # Symbol 22
-// CHECK:    (('n_strx', 113)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 22)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D22')
-// CHECK:    ),
-// CHECK:     # Symbol 23
-// CHECK:    (('n_strx', 98)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 23)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D23')
-// CHECK:    ),
-// CHECK:     # Symbol 24
-// CHECK:    (('n_strx', 83)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 24)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D24')
-// CHECK:    ),
-// CHECK:     # Symbol 25
-// CHECK:    (('n_strx', 68)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 25)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D25')
-// CHECK:    ),
-// CHECK:     # Symbol 26
-// CHECK:    (('n_strx', 53)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 26)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D26')
-// CHECK:    ),
-// CHECK:     # Symbol 27
-// CHECK:    (('n_strx', 38)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 27)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D27')
-// CHECK:    ),
-// CHECK:     # Symbol 28
-// CHECK:    (('n_strx', 23)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 28)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D28')
-// CHECK:    ),
-// CHECK:     # Symbol 29
-// CHECK:    (('n_strx', 8)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 29)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D29')
-// CHECK:    ),
-// CHECK:     # Symbol 30
-// CHECK:    (('n_strx', 139)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 30)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D30')
-// CHECK:    ),
-// CHECK:     # Symbol 31
-// CHECK:    (('n_strx', 124)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 31)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D31')
-// CHECK:    ),
-// CHECK:     # Symbol 32
-// CHECK:    (('n_strx', 109)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 32)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D32')
-// CHECK:    ),
-// CHECK:     # Symbol 33
-// CHECK:    (('n_strx', 94)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 33)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D33')
-// CHECK:    ),
-// CHECK:     # Symbol 34
-// CHECK:    (('n_strx', 79)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 34)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D34')
-// CHECK:    ),
-// CHECK:     # Symbol 35
-// CHECK:    (('n_strx', 64)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D35')
-// CHECK:    ),
-// CHECK:     # Symbol 36
-// CHECK:    (('n_strx', 49)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D36')
-// CHECK:    ),
-// CHECK:     # Symbol 37
-// CHECK:    (('n_strx', 34)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 4)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D37')
-// CHECK:    ),
-// CHECK:     # Symbol 38
-// CHECK:    (('n_strx', 19)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 35)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D38')
-// CHECK:    ),
-// CHECK:     # Symbol 39
-// CHECK:    (('n_strx', 4)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 36)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'D39')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 40)
-// CHECK:   ('iextdefsym', 40)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 40)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 2624
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 3
+// CHECK:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 4
+// CHECK:     Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x3
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 5
+// CHECK:     Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: SomeInstructions (0x4)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 6
+// CHECK:     Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xE
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 7
+// CHECK:     Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 8
+// CHECK:     Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 9
+// CHECK:     Name: __symbol_stub (5F 5F 73 79 6D 62 6F 6C 5F 73 74 75 62 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x10
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 10
+// CHECK:     Name: __picsymbol_stub (5F 5F 70 69 63 73 79 6D 62 6F 6C 5F 73 74 75 62)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x1A
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 11
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 12
+// CHECK:     Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 13
+// CHECK:     Name: __nl_symbol_ptr (5F 5F 6E 6C 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x6
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 14
+// CHECK:     Name: __la_symbol_ptr (5F 5F 6C 61 5F 73 79 6D 62 6F 6C 5F 70 74 72 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x7
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 15
+// CHECK:     Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 16
+// CHECK:     Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x9
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 17
+// CHECK:     Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xA
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 18
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 19
+// CHECK:     Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 20
+// CHECK:     Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 21
+// CHECK:     Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 22
+// CHECK:     Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 23
+// CHECK:     Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 24
+// CHECK:     Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 25
+// CHECK:     Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 26
+// CHECK:     Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 27
+// CHECK:     Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 28
+// CHECK:     Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 29
+// CHECK:     Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 30
+// CHECK:     Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 31
+// CHECK:     Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 32
+// CHECK:     Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 33
+// CHECK:     Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 34
+// CHECK:     Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 35
+// CHECK:     Name: __picsymbolstub4 (5F 5F 70 69 63 73 79 6D 62 6F 6C 73 74 75 62 34)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2652
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x8
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x10
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: D0 (136)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D1 (121)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __text (0x1)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D2 (106)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __const (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D3 (91)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __static_const (0x3)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D4 (76)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cstring (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D5 (61)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __literal4 (0x5)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D6 (46)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __literal8 (0x6)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D7 (31)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __literal16 (0x7)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D8 (16)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __constructor (0x8)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D9 (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __destructor (0x9)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D10 (147)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __symbol_stub (0xA)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D11 (132)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __picsymbol_stub (0xB)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D12 (117)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __data (0xC)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D13 (102)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __static_data (0xD)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D14 (87)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __nl_symbol_ptr (0xE)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D15 (72)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __la_symbol_ptr (0xF)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D16 (57)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __dyld (0x10)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D17 (42)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __mod_init_func (0x11)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D18 (27)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __mod_term_func (0x12)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D19 (12)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __const (0x13)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D20 (143)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __class (0x14)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D21 (128)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __meta_class (0x15)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D22 (113)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cat_cls_meth (0x16)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D23 (98)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cat_inst_meth (0x17)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D24 (83)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __protocol (0x18)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D25 (68)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __string_object (0x19)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D26 (53)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cls_meth (0x1A)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D27 (38)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __inst_meth (0x1B)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D28 (23)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cls_refs (0x1C)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D29 (8)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __message_refs (0x1D)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D30 (139)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __symbols (0x1E)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D31 (124)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __category (0x1F)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D32 (109)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __class_vars (0x20)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D33 (94)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __instance_vars (0x21)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D34 (79)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __module_info (0x22)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D35 (64)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cstring (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D36 (49)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cstring (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D37 (34)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __cstring (0x4)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D38 (19)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __selector_strs (0x23)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: D39 (4)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __picsymbolstub4 (0x24)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 2504
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x0
+// CHECK:   fileoff: 2652
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 36
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 40
+// CHECK:   iextdefsym: 40
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 40
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/x86_64-reloc-arithmetic.s b/test/MC/MachO/x86_64-reloc-arithmetic.s
index e82f69b6d477..de524791faa3 100644
--- a/test/MC/MachO/x86_64-reloc-arithmetic.s
+++ b/test/MC/MachO/x86_64-reloc-arithmetic.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -r -t | FileCheck %s
 
 // rdar://9906375
 .org 0x100
@@ -7,15 +7,31 @@ _bar = _foo + 2
 _baz:
         leaq    _bar(%rip), %rcx
 
-// CHECK:        ('_relocations', [
-// CHECK-NEXT:    # Relocation 0
-// CHECK-NEXT:    (('word-0', 0x103),
-// CHECK-NEXT:     ('word-1', 0x1d000001))
-
-// CHECK:         # Symbol 1
-// CHECK-NEXT:   (('n_strx', 6)
-// CHECK-NEXT:    ('n_type', 0xe)
-// CHECK-NEXT:    ('n_sect', 1)
-// CHECK-NEXT:    ('n_desc', 0)
-// CHECK-NEXT:    ('n_value', 258)
-// CHECK-NEXT:    ('_string', '_bar')
+// CHECK: File: <stdin>
+// CHECK-NEXT: Format: Mach-O 64-bit x86-64
+// CHECK-NEXT: Arch: x86_64
+// CHECK-NEXT: AddressSize: 64bit
+// CHECK-NEXT: Relocations [
+// CHECK-NEXT:   Section __text {
+// CHECK-NEXT:     0x103 1 2 1 X86_64_RELOC_SIGNED 0 _bar
+// CHECK-NEXT:   }
+// CHECK-NEXT: ]
+// CHECK-NEXT: Symbols [
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: _foo (11)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __text (0x1)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x100
+// CHECK-NEXT:   }
+// CHECK-NEXT:   Symbol {
+// CHECK-NEXT:     Name: _bar (6)
+// CHECK-NEXT:     Type: Section (0xE)
+// CHECK-NEXT:     Section: __text (0x1)
+// CHECK-NEXT:     RefType: UndefinedNonLazy (0x0)
+// CHECK-NEXT:     Flags [ (0x0)
+// CHECK-NEXT:     ]
+// CHECK-NEXT:     Value: 0x102
+// CHECK-NEXT:   }
diff --git a/test/MC/MachO/x86_64-sections.s b/test/MC/MachO/x86_64-sections.s
index 8efd35e6cbff..5ca83257f301 100644
--- a/test/MC/MachO/x86_64-sections.s
+++ b/test/MC/MachO/x86_64-sections.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r --macho-segment | FileCheck %s
 
         .text
 	.section	__TEXT,__text,regular,pure_instructions
@@ -39,523 +39,641 @@
 
         .subsections_via_symbols
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 1)
-// CHECK: ('load_commands_size', 2552)
-// CHECK: ('flag', 8192)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 2552)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 0)
-// CHECK:   ('file_offset', 2584)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 31)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__static_const\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 3
-// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 4
-// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x3)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 5
-// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x4)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 6
-// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xe)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 7
-// CHECK:    (('section_name', '__constructor\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 8
-// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 9
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 10
-// CHECK:    (('section_name', '__static_data\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 11
-// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 12
-// CHECK:    (('section_name', '__mod_init_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x9)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 13
-// CHECK:    (('section_name', '__mod_term_func\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0xa)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 14
-// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 15
-// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 16
-// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 17
-// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 18
-// CHECK:    (('section_name', '__cat_inst_meth\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 19
-// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 20
-// CHECK:    (('section_name', '__string_object\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 21
-// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 22
-// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 23
-// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 24
-// CHECK:    (('section_name', '__message_refs\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 2)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000005)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 25
-// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 26
-// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 27
-// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 28
-// CHECK:    (('section_name', '__instance_vars\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 29
-// CHECK:    (('section_name', '__module_info\x00\x00\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x10000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 30
-// CHECK:    (('section_name', '__selector_strs\x00')
-// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 2584)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x2)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 2
+// CHECK:   SizeOfLoadCommands: 2568
+// CHECK:   Flags [ (0x2000)
+// CHECK:     MH_SUBSECTIONS_VIA_SYMBOLS (0x2000)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __static_const (5F 5F 73 74 61 74 69 63 5F 63 6F 6E 73 74 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 3
+// CHECK:     Name: __cstring (5F 5F 63 73 74 72 69 6E 67 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 4
+// CHECK:     Name: __literal4 (5F 5F 6C 69 74 65 72 61 6C 34 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x3
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 5
+// CHECK:     Name: __literal8 (5F 5F 6C 69 74 65 72 61 6C 38 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: SomeInstructions (0x4)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 6
+// CHECK:     Name: __literal16 (5F 5F 6C 69 74 65 72 61 6C 31 36 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xE
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 7
+// CHECK:     Name: __constructor (5F 5F 63 6F 6E 73 74 72 75 63 74 6F 72 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 8
+// CHECK:     Name: __destructor (5F 5F 64 65 73 74 72 75 63 74 6F 72 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 9
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 10
+// CHECK:     Name: __static_data (5F 5F 73 74 61 74 69 63 5F 64 61 74 61 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 11
+// CHECK:     Name: __dyld (5F 5F 64 79 6C 64 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 12
+// CHECK:     Name: __mod_init_func (5F 5F 6D 6F 64 5F 69 6E 69 74 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x9
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 13
+// CHECK:     Name: __mod_term_func (5F 5F 6D 6F 64 5F 74 65 72 6D 5F 66 75 6E 63 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0xA
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 14
+// CHECK:     Name: __const (5F 5F 63 6F 6E 73 74 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 15
+// CHECK:     Name: __class (5F 5F 63 6C 61 73 73 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 16
+// CHECK:     Name: __meta_class (5F 5F 6D 65 74 61 5F 63 6C 61 73 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 17
+// CHECK:     Name: __cat_cls_meth (5F 5F 63 61 74 5F 63 6C 73 5F 6D 65 74 68 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 18
+// CHECK:     Name: __cat_inst_meth (5F 5F 63 61 74 5F 69 6E 73 74 5F 6D 65 74 68 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 19
+// CHECK:     Name: __protocol (5F 5F 70 72 6F 74 6F 63 6F 6C 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 20
+// CHECK:     Name: __string_object (5F 5F 73 74 72 69 6E 67 5F 6F 62 6A 65 63 74 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 21
+// CHECK:     Name: __cls_meth (5F 5F 63 6C 73 5F 6D 65 74 68 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 22
+// CHECK:     Name: __inst_meth (5F 5F 69 6E 73 74 5F 6D 65 74 68 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 23
+// CHECK:     Name: __cls_refs (5F 5F 63 6C 73 5F 72 65 66 73 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 24
+// CHECK:     Name: __message_refs (5F 5F 6D 65 73 73 61 67 65 5F 72 65 66 73 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 2
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x5
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 25
+// CHECK:     Name: __symbols (5F 5F 73 79 6D 62 6F 6C 73 00 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 26
+// CHECK:     Name: __category (5F 5F 63 61 74 65 67 6F 72 79 00 00 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 27
+// CHECK:     Name: __class_vars (5F 5F 63 6C 61 73 73 5F 76 61 72 73 00 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 28
+// CHECK:     Name: __instance_vars (5F 5F 69 6E 73 74 61 6E 63 65 5F 76 61 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 29
+// CHECK:     Name: __module_info (5F 5F 6D 6F 64 75 6C 65 5F 69 6E 66 6F 00 00 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x100000)
+// CHECK:       NoDeadStrip (0x100000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 30
+// CHECK:     Name: __selector_strs (5F 5F 73 65 6C 65 63 74 6F 72 5F 73 74 72 73 00)
+// CHECK:     Segment: __OBJC (5F 5F 4F 42 4A 43 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 2600
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: ExtReloc (0x2)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 2552
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x0
+// CHECK:   fileoff: 2600
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 31
+// CHECK:   flags: 0x0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-1.s b/test/MC/MachO/zerofill-1.s
index 805a7861e1fe..c3de2ebbf795 100644
--- a/test/MC/MachO/zerofill-1.s
+++ b/test/MC/MachO/zerofill-1.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
 
         .text
         .byte 0                 // Align to 2**3 bytes, not 2**1
@@ -8,114 +8,124 @@
         .data
         .align 3
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 364)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 260)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 10)
-// CHECK:   ('file_offset', 392)
-// CHECK:   ('file_size', 8)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 3)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 1)
-// CHECK:     ('offset', 392)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__common\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 8)
-// CHECK:     ('size', 2)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 1)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x1)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 2
-// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 8)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 400)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x0)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 400)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 412)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00zfill\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'zfill')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 380
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 408
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00                                   |.|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __common (5F 5F 63 6F 6D 6D 6F 6E 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x8
+// CHECK:     Size: 0x2
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 1
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: LocReloc (0x1)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 2
+// CHECK:     Name: __data (5F 5F 64 61 74 61 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x8
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 416
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: zfill (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __common (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name:
+// CHECK:   Size: 260
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0xA
+// CHECK:   fileoff: 408
+// CHECK:   filesize: 8
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 3
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-2.s b/test/MC/MachO/zerofill-2.s
index 16577e41d03a..e2e2dfac6fe6 100644
--- a/test/MC/MachO/zerofill-2.s
+++ b/test/MC/MachO/zerofill-2.s
@@ -1,103 +1,110 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
 
         .byte 0
 
         // This file has size 2, the tail padding doesn't count.
         .zerofill       __DATA, __bss, sym_a, 1
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 2)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 1)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 1)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 1)
-// CHECK:     ('size', 1)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x1)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 328)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 340)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00sym_a\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 1)
-// CHECK:     ('_string', 'sym_a')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 00                                   |.|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x1
+// CHECK:     Size: 0x1
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: LocReloc (0x1)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_a (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x1
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x2
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 1
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-3.s b/test/MC/MachO/zerofill-3.s
index a4cd31ec0a40..2e4ff2781329 100644
--- a/test/MC/MachO/zerofill-3.s
+++ b/test/MC/MachO/zerofill-3.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
 
         // FIXME: We don't get the order right currently, the assembler first
         // orders the symbols, then assigns addresses. :(
@@ -19,123 +19,134 @@
         .lcomm          sym_lcomm_B, 4
 .endif
 
-// CHECK: ('cputype', 7)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 296)
-// CHECK: ('flag', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 1)
-// CHECK:   ('size', 192)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 16)
-// CHECK:   ('file_offset', 324)
-// CHECK:   ('file_size', 0)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 0)
-// CHECK:     ('offset', 324)
-// CHECK:     ('alignment', 0)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 16)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 4)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x1)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 324)
-// CHECK:   ('nsyms', 4)
-// CHECK:   ('stroff', 372)
-// CHECK:   ('strsize', 52)
-// CHECK:   ('_string_data', '\x00sym_lcomm_D\x00sym_lcomm_C\x00sym_lcomm_B\x00sym_lcomm_A\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 37)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', 'sym_lcomm_A')
-// CHECK:    ),
-// CHECK:     # Symbol 1
-// CHECK:    (('n_strx', 25)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 12)
-// CHECK:     ('_string', 'sym_lcomm_B')
-// CHECK:    ),
-// CHECK:     # Symbol 2
-// CHECK:    (('n_strx', 13)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 0)
-// CHECK:     ('_string', 'sym_lcomm_C')
-// CHECK:    ),
-// CHECK:     # Symbol 3
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xf)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 4)
-// CHECK:     ('_string', 'sym_lcomm_D')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 2)
-// CHECK:   ('iextdefsym', 2)
-// CHECK:   ('nextdefsym', 2)
-// CHECK:   ('iundefsym', 4)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic (0xFEEDFACE)
+// CHECK:   CpuType: X86 (0x7)
+// CHECK:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 312
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x0
+// CHECK:     Offset: 340
+// CHECK:     Alignment: 0
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     SectionData (
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x10
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 4
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: LocReloc (0x1)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_A (37)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_B (25)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0xC
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_C (13)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: sym_lcomm_D (1)
+// CHECK:     Extern
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT
+// CHECK:   Name: 
+// CHECK:   Size: 192
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x10
+// CHECK:   fileoff: 340
+// CHECK:   filesize: 0
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 2
+// CHECK:   iextdefsym: 2
+// CHECK:   nextdefsym: 2
+// CHECK:   iundefsym: 4
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-4.s b/test/MC/MachO/zerofill-4.s
index d9c987c9b65a..b99e6289691e 100644
--- a/test/MC/MachO/zerofill-4.s
+++ b/test/MC/MachO/zerofill-4.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
 
 .zerofill __DATA,__bss,_fill0,1,0
 .zerofill __DATA,__bss,_a,4,2
@@ -9,27 +9,81 @@
 .zerofill __DATA,__bss,_fill3,1,0
 .zerofill __DATA,__bss,_d,4,5
 
-// CHECK: # Symbol 0
-// CHECK: ('n_value', 0)
-// CHECK: ('_string', '_fill0')
-// CHECK: # Symbol 1
-// CHECK: ('n_value', 4)
-// CHECK: ('_string', '_a')
-// CHECK: # Symbol 2
-// CHECK: ('n_value', 8)
-// CHECK: ('_string', '_fill1')
-// CHECK: # Symbol 3
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', '_b')
-// CHECK: # Symbol 4
-// CHECK: ('n_value', 20)
-// CHECK: ('_string', '_fill2')
-// CHECK: # Symbol 5
-// CHECK: ('n_value', 32)
-// CHECK: ('_string', '_c')
-// CHECK: # Symbol 6
-// CHECK: ('n_value', 36)
-// CHECK: ('_string', '_fill3')
-// CHECK: # Symbol 7
-// CHECK: ('n_value', 64)
-// CHECK: ('_string', '_d')
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _fill0 (34)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x0
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _a (10)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x4
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _fill1 (27)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _b (7)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x10
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _fill2 (20)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x14
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _c (4)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x20
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _fill3 (13)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x24
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _d (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x40
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/MachO/zerofill-5.s b/test/MC/MachO/zerofill-5.s
index 91f251b9983c..b688e6b0692e 100644
--- a/test/MC/MachO/zerofill-5.s
+++ b/test/MC/MachO/zerofill-5.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -file-headers -s -sd -r -t --macho-segment --macho-dysymtab --macho-indirect-symbols | FileCheck %s
 
 	.text
 	.align	3
@@ -6,104 +6,109 @@
 
         .zerofill __DATA,__bss,_g0,8,3
 
-// CHECK: ('cputype', 16777223)
-// CHECK: ('cpusubtype', 3)
-// CHECK: ('filetype', 1)
-// CHECK: ('num_load_commands', 3)
-// CHECK: ('load_commands_size', 336)
-// CHECK: ('flag', 0)
-// CHECK: ('reserved', 0)
-// CHECK: ('load_commands', [
-// CHECK:   # Load Command 0
-// CHECK:  (('command', 25)
-// CHECK:   ('size', 232)
-// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:   ('vm_addr', 0)
-// CHECK:   ('vm_size', 16)
-// CHECK:   ('file_offset', 368)
-// CHECK:   ('file_size', 4)
-// CHECK:   ('maxprot', 7)
-// CHECK:   ('initprot', 7)
-// CHECK:   ('num_sections', 2)
-// CHECK:   ('flags', 0)
-// CHECK:   ('sections', [
-// CHECK:     # Section 0
-// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 0)
-// CHECK:     ('size', 4)
-// CHECK:     ('offset', 368)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x80000000)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', '02000000')
-// CHECK:     # Section 1
-// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
-// CHECK:     ('address', 8)
-// CHECK:     ('size', 8)
-// CHECK:     ('offset', 0)
-// CHECK:     ('alignment', 3)
-// CHECK:     ('reloc_offset', 0)
-// CHECK:     ('num_reloc', 0)
-// CHECK:     ('flags', 0x1)
-// CHECK:     ('reserved1', 0)
-// CHECK:     ('reserved2', 0)
-// CHECK:     ('reserved3', 0)
-// CHECK:    ),
-// CHECK:   ('_relocations', [
-// CHECK:   ])
-// CHECK:   ('_section_data', 'cffaedfe 07000001')
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 1
-// CHECK:  (('command', 2)
-// CHECK:   ('size', 24)
-// CHECK:   ('symoff', 372)
-// CHECK:   ('nsyms', 1)
-// CHECK:   ('stroff', 388)
-// CHECK:   ('strsize', 8)
-// CHECK:   ('_string_data', '\x00_g0\x00\x00\x00\x00')
-// CHECK:   ('_symbols', [
-// CHECK:     # Symbol 0
-// CHECK:    (('n_strx', 1)
-// CHECK:     ('n_type', 0xe)
-// CHECK:     ('n_sect', 2)
-// CHECK:     ('n_desc', 0)
-// CHECK:     ('n_value', 8)
-// CHECK:     ('_string', '_g0')
-// CHECK:    ),
-// CHECK:   ])
-// CHECK:  ),
-// CHECK:   # Load Command 2
-// CHECK:  (('command', 11)
-// CHECK:   ('size', 80)
-// CHECK:   ('ilocalsym', 0)
-// CHECK:   ('nlocalsym', 1)
-// CHECK:   ('iextdefsym', 1)
-// CHECK:   ('nextdefsym', 0)
-// CHECK:   ('iundefsym', 1)
-// CHECK:   ('nundefsym', 0)
-// CHECK:   ('tocoff', 0)
-// CHECK:   ('ntoc', 0)
-// CHECK:   ('modtaboff', 0)
-// CHECK:   ('nmodtab', 0)
-// CHECK:   ('extrefsymoff', 0)
-// CHECK:   ('nextrefsyms', 0)
-// CHECK:   ('indirectsymoff', 0)
-// CHECK:   ('nindirectsyms', 0)
-// CHECK:   ('extreloff', 0)
-// CHECK:   ('nextrel', 0)
-// CHECK:   ('locreloff', 0)
-// CHECK:   ('nlocrel', 0)
-// CHECK:   ('_indirect_symbols', [
-// CHECK:   ])
-// CHECK:  ),
-// CHECK: ])
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 64-bit x86-64
+// CHECK: Arch: x86_64
+// CHECK: AddressSize: 64bit
+// CHECK: MachHeader {
+// CHECK:   Magic: Magic64 (0xFEEDFACF)
+// CHECK:   CpuType: X86-64 (0x1000007)
+// CHECK:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+// CHECK:   FileType: Relocatable (0x1)
+// CHECK:   NumOfLoadCommands: 4
+// CHECK:   SizeOfLoadCommands: 352
+// CHECK:   Flags [ (0x0)
+// CHECK:   ]
+// CHECK:   Reserved: 0x0
+// CHECK: }
+// CHECK: Sections [
+// CHECK:   Section {
+// CHECK:     Index: 0
+// CHECK:     Name: __text (5F 5F 74 65 78 74 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x0
+// CHECK:     Size: 0x4
+// CHECK:     Offset: 384
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: 0x0
+// CHECK:     Attributes [ (0x800000)
+// CHECK:       PureInstructions (0x800000)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:     SectionData (
+// CHECK:       0000: 02000000                             |....|
+// CHECK:     )
+// CHECK:   }
+// CHECK:   Section {
+// CHECK:     Index: 1
+// CHECK:     Name: __bss (5F 5F 62 73 73 00 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Segment: __DATA (5F 5F 44 41 54 41 00 00 00 00 00 00 00 00 00 00)
+// CHECK:     Address: 0x8
+// CHECK:     Size: 0x8
+// CHECK:     Offset: 0
+// CHECK:     Alignment: 3
+// CHECK:     RelocationOffset: 0x0
+// CHECK:     RelocationCount: 0
+// CHECK:     Type: LocReloc (0x1)
+// CHECK:     Attributes [ (0x0)
+// CHECK:     ]
+// CHECK:     Reserved1: 0x0
+// CHECK:     Reserved2: 0x0
+// CHECK:     Reserved3: 0x0
+// CHECK:   }
+// CHECK: ]
+// CHECK: Relocations [
+// CHECK: ]
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _g0 (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x8
+// CHECK:   }
+// CHECK: ]
+// CHECK: Indirect Symbols {
+// CHECK:   Number: 0
+// CHECK:   Symbols [
+// CHECK:   ]
+// CHECK: }
+// CHECK: Segment {
+// CHECK:   Cmd: LC_SEGMENT_64
+// CHECK:   Name: 
+// CHECK:   Size: 232
+// CHECK:   vmaddr: 0x0
+// CHECK:   vmsize: 0x10
+// CHECK:   fileoff: 384
+// CHECK:   filesize: 4
+// CHECK:   maxprot: rwx
+// CHECK:   initprot: rwx
+// CHECK:   nsects: 2
+// CHECK:   flags: 0x0
+// CHECK: }
+// CHECK: Dysymtab {
+// CHECK:   ilocalsym: 0
+// CHECK:   nlocalsym: 1
+// CHECK:   iextdefsym: 1
+// CHECK:   nextdefsym: 0
+// CHECK:   iundefsym: 1
+// CHECK:   nundefsym: 0
+// CHECK:   tocoff: 0
+// CHECK:   ntoc: 0
+// CHECK:   modtaboff: 0
+// CHECK:   nmodtab: 0
+// CHECK:   extrefsymoff: 0
+// CHECK:   nextrefsyms: 0
+// CHECK:   indirectsymoff: 0
+// CHECK:   nindirectsyms: 0
+// CHECK:   extreloff: 0
+// CHECK:   nextrel: 0
+// CHECK:   locreloff: 0
+// CHECK:   nlocrel: 0
+// CHECK: }
diff --git a/test/MC/MachO/zerofill-sect-align.s b/test/MC/MachO/zerofill-sect-align.s
index 5d7730f439e9..d950b7f103c2 100644
--- a/test/MC/MachO/zerofill-sect-align.s
+++ b/test/MC/MachO/zerofill-sect-align.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -t | FileCheck %s
 //
 // Check that the section itself is aligned.
 
@@ -7,9 +7,27 @@
 .zerofill __DATA,__bss,_a,1,0
 .zerofill __DATA,__bss,_b,4,4
 
-// CHECK: # Symbol 0
-// CHECK: ('n_value', 16)
-// CHECK: ('_string', '_a')
-// CHECK: # Symbol 1
-// CHECK: ('n_value', 32)
-// CHECK: ('_string', '_b')
+// CHECK: File: <stdin>
+// CHECK: Format: Mach-O 32-bit i386
+// CHECK: Arch: i386
+// CHECK: AddressSize: 32bit
+// CHECK: Symbols [
+// CHECK:   Symbol {
+// CHECK:     Name: _a (4)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x10
+// CHECK:   }
+// CHECK:   Symbol {
+// CHECK:     Name: _b (1)
+// CHECK:     Type: Section (0xE)
+// CHECK:     Section: __bss (0x2)
+// CHECK:     RefType: UndefinedNonLazy (0x0)
+// CHECK:     Flags [ (0x0)
+// CHECK:     ]
+// CHECK:     Value: 0x20
+// CHECK:   }
+// CHECK: ]
diff --git a/test/MC/Mips/branch-pseudos-bad.s b/test/MC/Mips/branch-pseudos-bad.s
index fcbf84af84d0..3a0193b2e94b 100644
--- a/test/MC/Mips/branch-pseudos-bad.s
+++ b/test/MC/Mips/branch-pseudos-bad.s
@@ -19,3 +19,20 @@ local_label:
 # CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
   bgtu $7, $8, local_label
 # CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+
+  bltl $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bltul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  blel $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bleul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bgel $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bgeul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bgtl $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
+  bgtul $7, $8, local_label
+# CHECK: :[[@LINE-1]]:3: error: pseudo-instruction requires $at, which is not available
diff --git a/test/MC/Mips/branch-pseudos.s b/test/MC/Mips/branch-pseudos.s
index d5b06f78d800..56841e29f425 100644
--- a/test/MC/Mips/branch-pseudos.s
+++ b/test/MC/Mips/branch-pseudos.s
@@ -187,3 +187,183 @@ local_label:
 # CHECK: bnez $zero, local_label # encoding: [0x14,0x00,A,A]
 # CHECK:                         #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
 # CHECK: nop
+
+  bltl $7,$8,local_label
+# CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltl $7,$8,global_label
+# CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltl $7,$0,local_label
+# CHECK: bltz $7, local_label           # encoding: [0x04,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltl $0,$8,local_label
+# CHECK: bgtz $8, local_label           # encoding: [0x1d,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltl $0,$0,local_label
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  blel $7,$8,local_label
+# CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  blel $7,$8,global_label
+# CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  blel $7,$0,local_label
+# CHECK: blez $7, local_label           # encoding: [0x18,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  blel $0,$8,local_label
+# CHECK: bgez $8, local_label           # encoding: [0x05,0x01,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  blel $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label                  # encoding: [0x10,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bgel $7,$8,local_label
+# CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgel $7,$8,global_label
+# CHECK: slt $1, $7, $8                 # encoding: [0x00,0xe8,0x08,0x2a]
+# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgel $7,$0,local_label
+# CHECK: bgez $7, local_label           # encoding: [0x04,0xe1,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgel $0,$8,local_label
+# CHECK: blez $8, local_label           # encoding: [0x19,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgel $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label                  # encoding: [0x10,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bgtl $7,$8,local_label
+# CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtl $7,$8,global_label
+# CHECK: slt $1, $8, $7                 # encoding: [0x01,0x07,0x08,0x2a]
+# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtl $7,$0,local_label
+# CHECK: bgtz $7, local_label           # encoding: [0x1c,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtl $0,$8,local_label
+# CHECK: bltz $8, local_label           # encoding: [0x05,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtl $0,$0,local_label
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bltul $7,$8,local_label
+# CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltul $7,$8,global_label
+# CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltul $7,$0,local_label
+# CHECK: bnez $7, local_label           # encoding: [0x14,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltul $0,$8,local_label
+# CHECK: bnez $8, local_label           # encoding: [0x15,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bltul $0,$0,local_label
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bleul $7,$8,local_label
+# CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bleul $7,$8,global_label
+# CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bleul $7,$0,local_label
+# CHECK: beqz $7, local_label           # encoding: [0x10,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bleul $0,$8,local_label
+# CHECK: beqz $8, local_label           # encoding: [0x11,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bleul $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label                  # encoding: [0x10,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bgeul $7,$8,local_label
+# CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: beql $1, $zero, local_label    # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgeul $7,$8,global_label
+# CHECK: sltu $1, $7, $8                # encoding: [0x00,0xe8,0x08,0x2b]
+# CHECK: beql $1, $zero, global_label   # encoding: [0x50,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgeul $7,$0,local_label
+# CHECK: beqz $7, local_label           # encoding: [0x10,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgeul $0,$8,local_label
+# CHECK: beqz $8, local_label           # encoding: [0x11,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgeul $0,$0,local_label
+# WARNING: :[[@LINE-1]]:3: warning: branch is always taken
+# CHECK: b local_label                  # encoding: [0x10,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+
+  bgtul $7,$8,local_label
+# CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: bnel $1, $zero, local_label    # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtul $7,$8,global_label
+# CHECK: sltu $1, $8, $7                # encoding: [0x01,0x07,0x08,0x2b]
+# CHECK: bnel $1, $zero, global_label   # encoding: [0x54,0x20,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: global_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtul $7,$0,local_label
+# CHECK: bnez $7, local_label           # encoding: [0x14,0xe0,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtul $0,$8,local_label
+# CHECK: bnez $8, local_label           # encoding: [0x15,0x00,A,A]
+# CHECK:                                #   fixup A - offset: 0, value: local_label-4, kind: fixup_Mips_PC16
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
+  bgtul $0,$0,local_label
+# CHECK: nop                            # encoding: [0x00,0x00,0x00,0x00]
diff --git a/test/MC/Mips/cnmips/invalid.s b/test/MC/Mips/cnmips/invalid.s
new file mode 100644
index 000000000000..52e71102e46f
--- /dev/null
+++ b/test/MC/Mips/cnmips/invalid.s
@@ -0,0 +1,15 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=octeon 2>%t1
+# RUN: FileCheck %s < %t1
+
+    .set noat
+foo:
+    bbit0 $19, -1, foo   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+    bbit0 $19, 64, foo   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+    bbit032 $19, -1, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+    bbit032 $19, 32, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+    bbit1 $19, -1, foo   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+    bbit1 $19, 64, foo   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+    bbit132 $19, -1, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+    bbit132 $19, 32, foo # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/cprestore-bad.s b/test/MC/Mips/cprestore-bad.s
new file mode 100644
index 000000000000..d2fb037091d1
--- /dev/null
+++ b/test/MC/Mips/cprestore-bad.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic 2>%t1
+# RUN: FileCheck %s < %t1
+
+  .text
+  .set noreorder
+  .cpload $25
+
+  .set mips16
+  .cprestore 8
+# CHECK: :[[@LINE-1]]:14: error: .cprestore is not supported in Mips16 mode
+  .set nomips16
+
+  .cprestore
+# CHECK: :[[@LINE-1]]:13: error: expected stack offset value
+
+  .cprestore foo
+# CHECK: :[[@LINE-1]]:17: error: stack offset is not an absolute expression
+
+  .cprestore -8
+# CHECK: :[[@LINE-1]]:3: warning: .cprestore with negative stack offset has no effect
+
+  .cprestore 8, 35, bar
+# CHECK: :[[@LINE-1]]:15: error: unexpected token, expected end of statement
diff --git a/test/MC/Mips/cprestore-noreorder.s b/test/MC/Mips/cprestore-noreorder.s
new file mode 100644
index 000000000000..750c95a63f0c
--- /dev/null
+++ b/test/MC/Mips/cprestore-noreorder.s
@@ -0,0 +1,97 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -filetype=obj -o -| \
+# RUN:  llvm-objdump -d -r -arch=mips - | \
+# RUN:   FileCheck %s -check-prefix=CHECK-FOR-STORE
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+micromips -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=static -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=NO-PIC
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N64
+
+  .text
+  .ent foo
+foo:
+  .frame  $sp, 0, $ra
+  .set noreorder
+
+  .cpload $25
+  .cprestore 8
+
+  jal $25
+  jal $4, $25
+  jal foo
+
+  .end foo
+
+# CHECK-FOR-STORE: sw  $gp, 8($sp)
+
+# CHECK: .cprestore 8
+# CHECK: jalr  $25                 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: jalr  $4,  $25            # encoding: [0x03,0x20,0x20,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: lw    $25, %got(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# CHECK:                           #   fixup A - offset: 0, value: foo@GOT, kind: fixup_Mips_GOT_Local
+# CHECK: addiu $25, $25, %lo(foo)  # encoding: [0x27,0x39,A,A]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: jalr  $25                 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+# CHECK: .end  foo
+
+# MICROMIPS: .cprestore 8
+# MICROMIPS: jalrs16 $25                 # encoding: [0x45,0xf9]
+# MICROMIPS: nop                         # encoding: [0x00,0x00,0x00,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: jalrs   $4,  $25            # encoding: [0x00,0x99,0x4f,0x3c]
+# MICROMIPS: nop                         # encoding: [0x00,0x00,0x00,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: lw      $25, %got(foo)($gp) # encoding: [0xff,0x3c,A,A]
+# MICROMIPS:                             #   fixup A - offset: 0, value: foo@GOT, kind: fixup_MICROMIPS_GOT16
+# MICROMIPS: addiu   $25, $25, %lo(foo)  # encoding: [0x33,0x39,A,A]
+# MICROMIPS:                             #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
+# MICROMIPS: jalrs   $ra, $25            # encoding: [0x03,0xf9,0x4f,0x3c]
+# MICROMIPS: nop                         # encoding: [0x0c,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+# MICROMIPS: .end  foo
+
+# NO-PIC:     .cprestore  8
+# NO-PIC:     jalr  $25         # encoding: [0x03,0x20,0xf8,0x09]
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC:     jalr  $4,  $25    # encoding: [0x03,0x20,0x20,0x09]
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC:     jal   foo         # encoding: [0b000011AA,A,A,A]
+# NO-PIC:                       #   fixup A - offset: 0, value: foo, kind: fixup_Mips_26
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# NO-PIC:     .end  foo
+
+# BAD-ABI:     .cprestore  8
+# BAD-ABI:     jalr  $25                      # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI:     jalr  $4,  $25                 # encoding: [0x03,0x20,0x20,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI-N32: lw    $25, %got_disp(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# BAD-ABI-N64: ld    $25, %got_disp(foo)($gp) # encoding: [0xdf,0x99,A,A]
+# BAD-ABI:                                    #   fixup A - offset: 0, value: foo@GOT_DISP, kind: fixup_Mips_GOT_DISP
+# BAD-ABI:     jalr  $25                      # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+# BAD-ABI:  .end  foo
diff --git a/test/MC/Mips/cprestore-reorder.s b/test/MC/Mips/cprestore-reorder.s
new file mode 100644
index 000000000000..e037701ede0a
--- /dev/null
+++ b/test/MC/Mips/cprestore-reorder.s
@@ -0,0 +1,98 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic -filetype=obj -o -| \
+# RUN:  llvm-objdump -d -r -arch=mips - | \
+# RUN:   FileCheck %s -check-prefix=CHECK-FOR-STORE
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=+micromips -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=static -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=NO-PIC
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -relocation-model=pic -show-encoding | \
+# RUN:  FileCheck %s -check-prefix=BAD-ABI -check-prefix=BAD-ABI-N64
+
+  .text
+  .ent foo
+foo:
+  .frame  $sp, 0, $ra
+  .set noreorder
+  .cpload $25
+  .set reorder
+
+  .cprestore 8
+
+  jal $25
+  jal $4, $25
+  jal foo
+
+  .end foo
+
+# CHECK-FOR-STORE: sw  $gp, 8($sp)
+
+# CHECK: .cprestore 8
+# CHECK: jalr  $25                 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: jalr  $4,  $25            # encoding: [0x03,0x20,0x20,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+
+# CHECK: lw    $25, %got(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# CHECK:                           #   fixup A - offset: 0, value: foo@GOT, kind: fixup_Mips_GOT_Local
+# CHECK: addiu $25, $25, %lo(foo)  # encoding: [0x27,0x39,A,A]
+# CHECK:                           #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_Mips_LO16
+# CHECK: jalr  $25                 # encoding: [0x03,0x20,0xf8,0x09]
+# CHECK: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK: lw    $gp, 8($sp)         # encoding: [0x8f,0xbc,0x00,0x08]
+# CHECK: .end  foo
+
+# MICROMIPS: .cprestore 8
+# MICROMIPS: jalrs16 $25                 # encoding: [0x45,0xf9]
+# MICROMIPS: nop                         # encoding: [0x0c,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: jalrs   $4,  $25            # encoding: [0x00,0x99,0x4f,0x3c]
+# MICROMIPS: nop                         # encoding: [0x0c,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+
+# MICROMIPS: lw      $25, %got(foo)($gp) # encoding: [0xff,0x3c,A,A]
+# MICROMIPS:                             #   fixup A - offset: 0, value: foo@GOT, kind: fixup_MICROMIPS_GOT16
+# MICROMIPS: addiu   $25, $25, %lo(foo)  # encoding: [0x33,0x39,A,A]
+# MICROMIPS:                             #   fixup A - offset: 0, value: foo@ABS_LO, kind: fixup_MICROMIPS_LO16
+# MICROMIPS: jalrs   $ra, $25            # encoding: [0x03,0xf9,0x4f,0x3c]
+# MICROMIPS: nop                         # encoding: [0x0c,0x00]
+# MICROMIPS: lw      $gp, 8($sp)         # encoding: [0xff,0x9d,0x00,0x08]
+# MICROMIPS: .end  foo
+
+# NO-PIC:     .cprestore 8
+# NO-PIC:     jalr  $25         # encoding: [0x03,0x20,0xf8,0x09]
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC:     jalr  $4,  $25    # encoding: [0x03,0x20,0x20,0x09]
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+
+# NO-PIC:     jal   foo         # encoding: [0b000011AA,A,A,A]
+# NO-PIC:                       #   fixup A - offset: 0, value: foo, kind: fixup_Mips_26
+# NO-PIC-NOT: lw    $gp, 8($sp) # encoding: [0x8f,0xbc,0x00,0x08]
+# NO-PIC:     .end  foo
+
+# BAD-ABI:     .cprestore 8
+# BAD-ABI:     jalr  $25                      # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI:     jalr  $4,  $25                 # encoding: [0x03,0x20,0x20,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+
+# BAD-ABI-N32: lw    $25, %got_disp(foo)($gp) # encoding: [0x8f,0x99,A,A]
+# BAD-ABI-N64: ld    $25, %got_disp(foo)($gp) # encoding: [0xdf,0x99,A,A]
+# BAD-ABI:                                    #   fixup A - offset: 0, value: foo@GOT_DISP, kind: fixup_Mips_GOT_DISP
+# BAD-ABI:     jalr  $25                      # encoding: [0x03,0x20,0xf8,0x09]
+# BAD-ABI-NOT: lw    $gp, 8($sp)              # encoding: [0x8f,0xbc,0x00,0x08]
+# BAD-ABI:     .end  foo
diff --git a/test/MC/Mips/cprestore-warning-unused.s b/test/MC/Mips/cprestore-warning-unused.s
new file mode 100644
index 000000000000..41a5df715977
--- /dev/null
+++ b/test/MC/Mips/cprestore-warning-unused.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -relocation-model=pic 2>%t1
+# RUN: FileCheck %s < %t1
+
+  .text
+  .set noreorder
+  .cpload $25
+  .set reorder
+
+  jal $25
+# CHECK: :[[@LINE-1]]:3: warning: no .cprestore used in PIC mode
diff --git a/test/MC/Mips/cpsetup.s b/test/MC/Mips/cpsetup.s
index a3ffae67aaec..95d84d95d851 100644
--- a/test/MC/Mips/cpsetup.s
+++ b/test/MC/Mips/cpsetup.s
@@ -1,29 +1,33 @@
 # RUN: llvm-mc -triple mips64-unknown-unknown -target-abi o32 -filetype=obj -o - %s | \
 # RUN:   llvm-objdump -d -r -arch=mips64 - | \
-# RUN:     FileCheck -check-prefix=O32 %s
+# RUN:     FileCheck -check-prefix=ALL -check-prefix=O32 %s
 
 # RUN: llvm-mc -triple mips64-unknown-unknown -target-abi o32 %s | \
-# RUN:   FileCheck -check-prefix=ASM %s
+# RUN:   FileCheck -check-prefix=ALL -check-prefix=ASM %s
 
 # RUN: llvm-mc -triple mips64-unknown-unknown -target-abi n32 -filetype=obj -o - %s | \
 # RUN:   llvm-objdump -d -r -t -arch=mips64 - | \
-# RUN:     FileCheck -check-prefix=NXX -check-prefix=N32 %s
+# RUN:     FileCheck -check-prefix=ALL -check-prefix=NXX -check-prefix=N32 %s
 
 # RUN: llvm-mc -triple mips64-unknown-unknown -target-abi n32 %s | \
-# RUN:   FileCheck -check-prefix=ASM %s
+# RUN:   FileCheck -check-prefix=ALL -check-prefix=ASM %s
 
 # RUN: llvm-mc -triple mips64-unknown-unknown %s -filetype=obj -o - | \
 # RUN:   llvm-objdump -d -r -t -arch=mips64 - | \
-# RUN:     FileCheck -check-prefix=NXX -check-prefix=N64 %s
+# RUN:     FileCheck -check-prefix=ALL -check-prefix=NXX -check-prefix=N64 %s
 
 # RUN: llvm-mc -triple mips64-unknown-unknown %s | \
-# RUN:   FileCheck -check-prefix=ASM %s
+# RUN:   FileCheck -check-prefix=ALL -check-prefix=ASM %s
 
         .text
         .option pic2
 t1:
         .cpsetup $25, 8, __cerror
+        nop
+        .cpreturn
+        nop
 
+# ALL-LABEL: t1:
 
 # O32-NOT: __cerror
 
@@ -31,19 +35,30 @@ t1:
 # N32 doesn't allow 3 operations to be specified in the same relocation
 # record like N64 does.
 
-# NXX: sd       $gp, 8($sp)
-# NXX: lui      $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
-# NXX: addiu    $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
-# N32: addu     $gp, $gp, $25
-# N64: daddu    $gp, $gp, $25
+# NXX-NEXT: sd       $gp, 8($sp)
+# NXX-NEXT: lui      $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
+# NXX-NEXT: addiu    $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
+# N32-NEXT: addu     $gp, $gp, $25
+# N64-NEXT: daddu    $gp, $gp, $25
 
-# ASM: .cpsetup $25, 8, __cerror
+# ASM-NEXT: .cpsetup $25, 8, __cerror
+
+# ALL-NEXT: nop
+
+# ASM-NEXT: .cpreturn
+# NXX-NEXT: ld $gp, 8($sp)
+
+# ALL-NEXT: nop
 
 t2:
-
         .cpsetup $25, $2, __cerror
+        nop
+        .cpreturn
+        nop
+
+# ALL-LABEL: t2:
 
 # O32-NOT: __cerror
 
@@ -51,60 +66,115 @@ t2:
 # N32 doesn't allow 3 operations to be specified in the same relocation
 # record like N64 does.
 
-# NXX: move     $2, $gp
-# NXX: lui      $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
-# NXX: addiu    $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
-# N32: addu     $gp, $gp, $25
-# N64: daddu    $gp, $gp, $25
+# NXX-NEXT: move     $2, $gp
+# NXX-NEXT: lui      $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
+# NXX-NEXT: addiu    $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
+# N32-NEXT: addu     $gp, $gp, $25
+# N64-NEXT: daddu    $gp, $gp, $25
 
-# ASM: .cpsetup $25, $2, __cerror
+# ASM-NEXT: .cpsetup $25, $2, __cerror
+
+# ALL-NEXT: nop
+
+# ASM-NEXT: .cpreturn
+# NXX-NEXT: move $gp, $2
+
+# ALL-NEXT: nop
 
 # .cpsetup with local labels (PR22518):
+
+# The '1:' label isn't emitted in all cases but we still want a label to match
+# so we force one here.
+
+t3:
+        nop
 1:
         .cpsetup $25, $2, 1b
         nop
         sub $3, $3, $2
-        nop
 
-# O32: t2:
-# O32:   nop
-# O32:   sub $3, $3, $2
-# O32:   nop
+# ALL-LABEL: t3:
+# ALL-NEXT:  nop
+
+# O32-NEXT:   nop
+# O32-NEXT:   sub $3, $3, $2
 
 # FIXME: Direct object emission for N32 is still under development.
 # N32 doesn't allow 3 operations to be specified in the same relocation
 # record like N64 does.
 
-# NXX: move     $2, $gp
-# NXX: lui      $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  $tmp0
-# NXX: addiu    $gp, $gp, 0
-# NXX: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  $tmp0
-# N32: addu     $gp, $gp, $25
-# N64: daddu    $gp, $gp, $25
-# NXX: nop
-# NXX: sub $3, $3, $2
-# NXX: nop
+# NXX: $tmp0:
+# NXX-NEXT: move     $2, $gp
+# NXX-NEXT: lui      $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  $tmp0
+# NXX-NEXT: addiu    $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  $tmp0
+# N32-NEXT: addu     $gp, $gp, $25
+# N64-NEXT: daddu    $gp, $gp, $25
+# NXX-NEXT: nop
+# NXX-NEXT: sub $3, $3, $2
 
-# ASM: .cpsetup $25, $2, $tmp0
+# ASM: $tmp0:
+# ASM-NEXT: .cpsetup $25, $2, $tmp0
+
+# Ensure we have at least one instruction between labels so that the labels
+# we're matching aren't removed.
+        nop
+# ALL-NEXT: nop
 
-t3:
         .option pic0
+t4:
         nop
         .cpsetup $25, 8, __cerror
         nop
+        .cpreturn
+        nop
 
 # Testing that .cpsetup expands to nothing in this case
 # by checking that the next instruction after the first
 # nop is also a 'nop'.
-# NXX: nop
+
+# ALL-LABEL: t4:
+
+# NXX-NEXT: nop
+# NXX-NEXT: nop
 # NXX-NEXT: nop
 
-# ASM: nop
-# ASM: .cpsetup $25, 8, __cerror
-# ASM: nop
+# ASM-NEXT: nop
+# ASM-NEXT: .cpsetup $25, 8, __cerror
+# ASM-NEXT: nop
+# ASM-NEXT: .cpreturn
+# ASM-NEXT: nop
+
+# Test that we accept constant expressions.
+        .option pic2
+t5:
+        .cpsetup $25, ((8*4) - (3*8)), __cerror
+        nop
+
+# ALL-LABEL: t5:
+
+# O32-NOT: __cerror
+
+# FIXME: Direct object emission for N32 is still under development.
+# N32 doesn't allow 3 operations to be specified in the same relocation
+# record like N64 does.
+
+# NXX-NEXT: sd       $gp, 8($sp)
+# NXX-NEXT: lui      $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_HI16  __cerror
+# NXX-NEXT: addiu    $gp, $gp, 0
+# NXX-NEXT: R_MIPS_GPREL16/R_MIPS_SUB/R_MIPS_LO16  __cerror
+# N32-NEXT: addu     $gp, $gp, $25
+# N64-NEXT: daddu    $gp, $gp, $25
+
+# ASM-NEXT: .cpsetup $25, 8, __cerror
+
+# ALL-NEXT: nop
+
+# NXX-LABEL: SYMBOL TABLE:
 
 # For .cpsetup with local labels, we need to check if $tmp0 is in the symbol
 # table:
diff --git a/test/MC/Mips/directive-ent.s b/test/MC/Mips/directive-ent.s
new file mode 100644
index 000000000000..b9b8bf902f6d
--- /dev/null
+++ b/test/MC/Mips/directive-ent.s
@@ -0,0 +1,50 @@
+# The effects of .ent on the .pdr section are tested in mips-pdr*.s. Test
+# everything else here.
+#
+# RUN: llvm-mc -mcpu=mips32 -triple mips-unknown-unknown %s | \
+# RUN:     FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -filetype=obj -mcpu=mips32 -triple mips-unknown-unknown %s | \
+# RUN:     llvm-readobj -symbols | \
+# RUN:     FileCheck -check-prefix=OBJ -check-prefix=OBJ-32 %s
+#
+# RUN: llvm-mc -mcpu=mips32 -mattr=micromips -triple mips-unknown-unknown %s | \
+# RUN:     FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -filetype=obj -mcpu=mips32 -mattr=micromips \
+# RUN:     -triple mips-unknown-unknown %s | \
+# RUN:     llvm-readobj -symbols | \
+# RUN:     FileCheck -check-prefix=OBJ -check-prefix=OBJ-MM %s
+#
+    .ent a
+a:
+
+# ASM: .ent a
+# ASM: a:
+
+# OBJ:     Name: a
+# OBJ:     Value: 0x0
+# OBJ:     Size: 0
+# OBJ:     Binding: Local
+# OBJ:     Type: Function
+# OBJ:     Other: 0
+# OBJ:     Section: .text
+# OBJ: }
+
+    .ent b
+b:
+    nop
+    nop
+    .end b
+
+# ASM: .ent b
+# ASM: b:
+
+# OBJ:     Name: b
+# OBJ:     Value: 0x0
+# OBJ-32:  Size: 8
+# FIXME: microMIPS uses the 4-byte nop instead of the 2-byte nop.
+# OBJ-MM:  Size: 8
+# OBJ:     Binding: Local
+# OBJ:     Type: Function
+# OBJ:     Other: 0
+# OBJ:     Section: .text
+# OBJ: }
diff --git a/test/MC/Mips/dsp/invalid.s b/test/MC/Mips/dsp/invalid.s
new file mode 100644
index 000000000000..8bd0906e67f8
--- /dev/null
+++ b/test/MC/Mips/dsp/invalid.s
@@ -0,0 +1,25 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-unknown -show-encoding -mattr=dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+  shll.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shll.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shll_s.ph $3, $4, 16     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shll_s.ph $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shll.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shll.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+  shll_s.w $3, $4, 32      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shll_s.w $3, $4, -1      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shra.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shra.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shra_r.ph $3, $4, 16     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shra_r.ph $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+  shra_r.w $3, $4, 32      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shra_r.w $3, $4, -1      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shrl.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shrl.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shilo $ac1, 64           # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+  shilo $ac1, -64          # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+  wrdsp $5, 1024           # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
+  wrdsp $5, -1             # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/dsp/valid.s b/test/MC/Mips/dsp/valid.s
new file mode 100644
index 000000000000..f5926f3e2593
--- /dev/null
+++ b/test/MC/Mips/dsp/valid.s
@@ -0,0 +1,131 @@
+# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dsp %s | FileCheck %s
+#
+# CHECK:   .text
+  .set noat
+  absq_s.ph         $1, $2          # CHECK: absq_s.ph          $1, $2          # encoding: [0x7c,0x02,0x0a,0x52]
+  absq_s.w          $5, $6          # CHECK: absq_s.w           $5, $6          # encoding: [0x7c,0x06,0x2c,0x52]
+  addq.ph           $7, $8, $9      # CHECK: addq.ph            $7, $8, $9      # encoding: [0x7d,0x09,0x3a,0x90]
+  addq_s.ph         $10, $11, $12   # CHECK: addq_s.ph          $10, $11, $12   # encoding: [0x7d,0x6c,0x53,0x90]
+  addq_s.w          $13, $14, $15   # CHECK: addq_s.w           $13, $14, $15   # encoding: [0x7d,0xcf,0x6d,0x90]
+  addsc             $gp, $sp, $fp   # CHECK: addsc              $gp, $sp, $fp   # encoding: [0x7f,0xbe,0xe4,0x10]
+  addu.qb           $6, $7, $8      # CHECK: addu.qb            $6, $7, $8      # encoding: [0x7c,0xe8,0x30,0x10]
+  addu_s.qb         $9, $10, $11    # CHECK: addu_s.qb          $9, $10, $11    # encoding: [0x7d,0x4b,0x49,0x10]
+  addwc             $12, $13, $14   # CHECK: addwc              $12, $13, $14   # encoding: [0x7d,0xae,0x64,0x50]
+  bitrev            $25, $26        # CHECK: bitrev             $25, $26        # encoding: [0x7c,0x1a,0xce,0xd2]
+  bposge32          21100           # CHECK: bposge32           21100           # encoding: [0x04,0x1c,0x14,0x9b]
+  cmp.eq.ph         $27, $gp        # CHECK: cmp.eq.ph          $27, $gp        # encoding: [0x7f,0x7c,0x02,0x11]
+  cmp.lt.ph         $sp, $fp        # CHECK: cmp.lt.ph          $sp, $fp        # encoding: [0x7f,0xbe,0x02,0x51]
+  cmp.le.ph         $ra, $1         # CHECK: cmp.le.ph          $ra, $1         # encoding: [0x7f,0xe1,0x02,0x91]
+  cmpgu.eq.qb       $11, $12, $13   # CHECK: cmpgu.eq.qb        $11, $12, $13   # encoding: [0x7d,0x8d,0x59,0x11]
+  cmpgu.lt.qb       $14, $15, $16   # CHECK: cmpgu.lt.qb        $14, $15, $16   # encoding: [0x7d,0xf0,0x71,0x51]
+  cmpgu.le.qb       $17, $18, $19   # CHECK: cmpgu.le.qb        $17, $18, $19   # encoding: [0x7e,0x53,0x89,0x91]
+  cmpu.eq.qb        $20, $21        # CHECK: cmpu.eq.qb         $20, $21        # encoding: [0x7e,0x95,0x00,0x11]
+  cmpu.lt.qb        $22, $23        # CHECK: cmpu.lt.qb         $22, $23        # encoding: [0x7e,0xd7,0x00,0x51]
+  cmpu.le.qb        $24, $25        # CHECK: cmpu.le.qb         $24, $25        # encoding: [0x7f,0x19,0x00,0x91]
+  dpaq_s.w.ph       $ac1, $1, $2    # CHECK: dpaq_s.w.ph        $ac1, $1, $2    # encoding: [0x7c,0x22,0x09,0x30]
+  dpaq_sa.l.w       $ac2, $3, $4    # CHECK: dpaq_sa.l.w        $ac2, $3, $4    # encoding: [0x7c,0x64,0x13,0x30]
+  dpau.h.qbl        $ac1, $9, $10   # CHECK: dpau.h.qbl         $ac1, $9, $10   # encoding: [0x7d,0x2a,0x08,0xf0]
+  dpau.h.qbr        $ac1, $11, $12  # CHECK: dpau.h.qbr         $ac1, $11, $12  # encoding: [0x7d,0x6c,0x09,0xf0]
+  dpsq_s.w.ph       $ac0, $17, $18  # CHECK: dpsq_s.w.ph        $ac0, $17, $18  # encoding: [0x7e,0x32,0x01,0x70]
+  dpsq_sa.l.w       $ac1, $19, $20  # CHECK: dpsq_sa.l.w        $ac1, $19, $20  # encoding: [0x7e,0x74,0x0b,0x70]
+  dpsu.h.qbl        $ac0, $5, $6    # CHECK: dpsu.h.qbl         $ac0, $5, $6    # encoding: [0x7c,0xa6,0x02,0xf0]
+  dpsu.h.qbr        $ac1, $7, $8    # CHECK: dpsu.h.qbr         $ac1, $7, $8    # encoding: [0x7c,0xe8,0x0b,0xf0]
+  extp              $1, $ac0, 31    # CHECK: extp               $1, $ac0, 31    # encoding: [0x7f,0xe1,0x00,0xb8]
+  extpdp            $2, $ac1, 0     # CHECK: extpdp             $2, $ac1, 0     # encoding: [0x7c,0x02,0x0a,0xb8]
+  extpdpv           $3, $ac2, $4    # CHECK: extpdpv            $3, $ac2, $4    # encoding: [0x7c,0x83,0x12,0xf8]
+  extpv             $5, $ac3, $6    # CHECK: extpv              $5, $ac3, $6    # encoding: [0x7c,0xc5,0x18,0xf8]
+  extr.w            $7, $ac0, 31    # CHECK: extr.w             $7, $ac0, 31    # encoding: [0x7f,0xe7,0x00,0x38]
+  extr_r.w          $8, $ac1, 15    # CHECK: extr_r.w           $8, $ac1, 15    # encoding: [0x7d,0xe8,0x09,0x38]
+  extr_rs.w         $9, $ac2, 7     # CHECK: extr_rs.w          $9, $ac2, 7     # encoding: [0x7c,0xe9,0x11,0xb8]
+  extr_s.h          $10, $ac3, 3    # CHECK: extr_s.h           $10, $ac3, 3    # encoding: [0x7c,0x6a,0x1b,0xb8]
+  extrv.w           $11, $ac0, $12  # CHECK: extrv.w            $11, $ac0, $12  # encoding: [0x7d,0x8b,0x00,0x78]
+  extrv_r.w         $13, $ac1, $14  # CHECK: extrv_r.w          $13, $ac1, $14  # encoding: [0x7d,0xcd,0x09,0x78]
+  extrv_rs.w        $15, $ac2, $16  # CHECK: extrv_rs.w         $15, $ac2, $16  # encoding: [0x7e,0x0f,0x11,0xf8]
+  extrv_s.h         $17, $ac3, $18  # CHECK: extrv_s.h          $17, $ac3, $18  # encoding: [0x7e,0x51,0x1b,0xf8]
+  insv              $19, $20        # CHECK: insv               $19, $20        # encoding: [0x7e,0x93,0x00,0x0c]
+  lbux              $10, $20($26)   # CHECK: lbux               $10, $20($26)   # encoding: [0x7f,0x54,0x51,0x8a]
+  lhx               $11, $21($27)   # CHECK: lhx                $11, $21($27)   # encoding: [0x7f,0x75,0x59,0x0a]
+  lwx               $12, $22($gp)   # CHECK: lwx                $12, $22($gp)   # encoding: [0x7f,0x96,0x60,0x0a]
+  madd              $ac1, $6, $7    # CHECK: madd               $ac1, $6, $7    # encoding: [0x70,0xc7,0x08,0x00]
+  maddu             $ac0, $8, $9    # CHECK: maddu              $ac0, $8, $9    # encoding: [0x71,0x09,0x00,0x01]
+  madd              $6, $7          # CHECK: madd               $6, $7          # encoding: [0x70,0xc7,0x00,0x00]
+  maddu             $8, $9          # CHECK: maddu              $8, $9          # encoding: [0x71,0x09,0x00,0x01]
+  maq_s.w.phl       $ac2, $3, $4    # CHECK: maq_s.w.phl        $ac2, $3, $4    # encoding: [0x7c,0x64,0x15,0x30]
+  maq_sa.w.phl      $ac3, $5, $6    # CHECK: maq_sa.w.phl       $ac3, $5, $6    # encoding: [0x7c,0xa6,0x1c,0x30]
+  maq_s.w.phr       $ac0, $7, $8    # CHECK: maq_s.w.phr        $ac0, $7, $8    # encoding: [0x7c,0xe8,0x05,0xb0]
+  maq_sa.w.phr      $ac1, $9, $10   # CHECK: maq_sa.w.phr       $ac1, $9, $10   # encoding: [0x7d,0x2a,0x0c,0xb0]
+  mfhi              $14, $ac1       # CHECK: mfhi               $14, $ac1       # encoding: [0x00,0x20,0x70,0x10]
+  mflo              $15, $ac0       # CHECK: mflo               $15, $ac0       # encoding: [0x00,0x00,0x78,0x12]
+  mfhi              $14             # CHECK: mfhi               $14             # encoding: [0x00,0x00,0x70,0x10]
+  mflo              $15             # CHECK: mflo               $15             # encoding: [0x00,0x00,0x78,0x12]
+  modsub            $11, $12, $13   # CHECK: modsub             $11, $12, $13   # encoding: [0x7d,0x8d,0x5c,0x90]
+  msub              $ac3, $10, $11  # CHECK: msub               $ac3, $10, $11  # encoding: [0x71,0x4b,0x18,0x04]
+  msubu             $ac2, $12, $13  # CHECK: msubu              $ac2, $12, $13  # encoding: [0x71,0x8d,0x10,0x05]
+  msub              $10, $11        # CHECK: msub               $10, $11        # encoding: [0x71,0x4b,0x00,0x04]
+  msubu             $12, $13        # CHECK: msubu              $12, $13        # encoding: [0x71,0x8d,0x00,0x05]
+  mthi              $16, $ac3       # CHECK: mthi               $16, $ac3       # encoding: [0x02,0x00,0x18,0x11]
+  mthi              $16             # CHECK: mthi               $16             # encoding: [0x02,0x00,0x00,0x11]
+  mthlip            $14, $ac2       # CHECK: mthlip             $14, $ac2       # encoding: [0x7d,0xc0,0x17,0xf8]
+  mtlo              $17, $ac2       # CHECK: mtlo               $17, $ac2       # encoding: [0x02,0x20,0x10,0x13]
+  mtlo              $17             # CHECK: mtlo               $17             # encoding: [0x02,0x20,0x00,0x13]
+  muleq_s.w.phl     $21, $22, $23   # CHECK: muleq_s.w.phl      $21, $22, $23   # encoding: [0x7e,0xd7,0xaf,0x10]
+  muleq_s.w.phr     $24, $25, $26   # CHECK: muleq_s.w.phr      $24, $25, $26   # encoding: [0x7f,0x3a,0xc7,0x50]
+  muleu_s.ph.qbl    $27, $gp, $sp   # CHECK: muleu_s.ph.qbl     $27, $gp, $sp   # encoding: [0x7f,0x9d,0xd9,0x90]
+  muleu_s.ph.qbr    $fp, $ra, $1    # CHECK: muleu_s.ph.qbr     $fp, $ra, $1    # encoding: [0x7f,0xe1,0xf1,0xd0]
+  mulq_rs.ph        $2, $3, $4      # CHECK: mulq_rs.ph         $2, $3, $4      # encoding: [0x7c,0x64,0x17,0xd0]
+  mulsaq_s.w.ph     $ac0, $16, $17  # CHECK: mulsaq_s.w.ph      $ac0, $16, $17  # encoding: [0x7e,0x11,0x01,0xb0]
+  mult              $ac3, $2, $3    # CHECK: mult               $ac3, $2, $3    # encoding: [0x00,0x43,0x18,0x18]
+  multu             $ac2, $4, $5    # CHECK: multu              $ac2, $4, $5    # encoding: [0x00,0x85,0x10,0x19]
+  mult              $2, $3          # CHECK: mult               $2, $3          # encoding: [0x00,0x43,0x00,0x18]
+  multu             $4, $5          # CHECK: multu              $4, $5          # encoding: [0x00,0x85,0x00,0x19]
+  packrl.ph         $18, $19, $20   # CHECK: packrl.ph          $18, $19, $20   # encoding: [0x7e,0x74,0x93,0x91]
+  pick.ph           $7, $15, $3     # CHECK: pick.ph            $7, $15, $3     # encoding: [0x7d,0xe3,0x3a,0xd1]
+  pick.qb           $2, $4, $8      # CHECK: pick.qb            $2, $4, $8      # encoding: [0x7c,0x88,0x10,0xd1]
+  preceq.w.phl      $20, $21        # CHECK: preceq.w.phl       $20, $21        # encoding: [0x7c,0x15,0xa3,0x12]
+  preceq.w.phr      $21, $22        # CHECK: preceq.w.phr       $21, $22        # encoding: [0x7c,0x16,0xab,0x52]
+  precequ.ph.qbl    $22, $23        # CHECK: precequ.ph.qbl     $22, $23        # encoding: [0x7c,0x17,0xb1,0x12]
+  precequ.ph.qbla   $24, $25        # CHECK: precequ.ph.qbla    $24, $25        # encoding: [0x7c,0x19,0xc1,0x92]
+  precequ.ph.qbr    $23, $24        # CHECK: precequ.ph.qbr     $23, $24        # encoding: [0x7c,0x18,0xb9,0x52]
+  precequ.ph.qbra   $25, $26        # CHECK: precequ.ph.qbra    $25, $26        # encoding: [0x7c,0x1a,0xc9,0xd2]
+  preceu.ph.qbl     $26, $27        # CHECK: preceu.ph.qbl      $26, $27        # encoding: [0x7c,0x1b,0xd7,0x12]
+  preceu.ph.qbla    $gp, $sp        # CHECK: preceu.ph.qbla     $gp, $sp        # encoding: [0x7c,0x1d,0xe7,0x92]
+  preceu.ph.qbr     $27, $gp        # CHECK: preceu.ph.qbr      $27, $gp        # encoding: [0x7c,0x1c,0xdf,0x52]
+  preceu.ph.qbra    $sp, $fp        # CHECK: preceu.ph.qbra     $sp, $fp        # encoding: [0x7c,0x1e,0xef,0xd2]
+  precrq.ph.w       $17, $18, $19   # CHECK: precrq.ph.w        $17, $18, $19   # encoding: [0x7e,0x53,0x8d,0x11]
+  precrq.qb.ph      $16, $17, $18   # CHECK: precrq.qb.ph       $16, $17, $18   # encoding: [0x7e,0x32,0x83,0x11]
+  precrqu_s.qb.ph   $19, $20, $21   # CHECK: precrqu_s.qb.ph    $19, $20, $21   # encoding: [0x7e,0x95,0x9b,0xd1]
+  precrq_rs.ph.w    $18, $19, $20   # CHECK: precrq_rs.ph.w     $18, $19, $20   # encoding: [0x7e,0x74,0x95,0x51]
+  raddu.w.qb        $1, $2          # CHECK: raddu.w.qb         $1, $2          # encoding: [0x7c,0x40,0x0d,0x10]
+  rddsp             $5, 256         # CHECK: rddsp              $5, 256         # encoding: [0x7d,0x00,0x2c,0xb8]
+  repl.ph           $2, 12          # CHECK: repl.ph            $2, 12          # encoding: [0x7c,0x0c,0x12,0x92]
+  repl.qb           $1, 85          # CHECK: repl.qb            $1, 85          # encoding: [0x7c,0x55,0x08,0x92]
+  replv.ph          $1, $2          # CHECK: replv.ph           $1, $2          # encoding: [0x7c,0x02,0x0a,0xd2]
+  replv.qb          $1, $2          # CHECK: replv.qb           $1, $2          # encoding: [0x7c,0x02,0x08,0xd2]
+  shilo             $ac1, 3         # CHECK: shilo              $ac1, 3         # encoding: [0x7c,0x30,0x0e,0xb8]
+  shilo             $ac1, 16        # CHECK: shilo              $ac1, 16        # encoding: [0x7d,0x00,0x0e,0xb8]
+  shilov            $ac1, $2        # CHECK: shilov             $ac1, $2        # encoding: [0x7c,0x40,0x0e,0xf8]
+  shll.ph           $1, $2, 3       # CHECK: shll.ph            $1, $2, 3       # encoding: [0x7c,0x62,0x0a,0x13]
+  shll_s.ph         $1, $2, 3       # CHECK: shll_s.ph          $1, $2, 3       # encoding: [0x7c,0x62,0x0b,0x13]
+  shll.qb           $1, $2, 3       # CHECK: shll.qb            $1, $2, 3       # encoding: [0x7c,0x62,0x08,0x13]
+  shllv.ph          $1, $2, $3      # CHECK: shllv.ph           $1, $2, $3      # encoding: [0x7c,0x62,0x0a,0x93]
+  shllv_s.ph        $1, $2, $3      # CHECK: shllv_s.ph         $1, $2, $3      # encoding: [0x7c,0x62,0x0b,0x93]
+  shllv.qb          $1, $2, $3      # CHECK: shllv.qb           $1, $2, $3      # encoding: [0x7c,0x62,0x08,0x93]
+  shllv_s.w         $1, $2, $3      # CHECK: shllv_s.w          $1, $2, $3      # encoding: [0x7c,0x62,0x0d,0x93]
+  shll_s.w          $1, $2, 3       # CHECK: shll_s.w           $1, $2, 3       # encoding: [0x7c,0x62,0x0d,0x13]
+  shra.ph           $5, $2, 1       # CHECK: shra.ph            $5, $2, 1       # encoding: [0x7c,0x22,0x2a,0x53]
+  shra_r.ph         $5, $2, 1       # CHECK: shra_r.ph          $5, $2, 1       # encoding: [0x7c,0x22,0x2b,0x53]
+  shrav.ph          $1, $2, $3      # CHECK: shrav.ph           $1, $2, $3      # encoding: [0x7c,0x62,0x0a,0xd3]
+  shrav_r.ph        $1, $2, $3      # CHECK: shrav_r.ph         $1, $2, $3      # encoding: [0x7c,0x62,0x0b,0xd3]
+  shrav_r.w         $1, $2, $3      # CHECK: shrav_r.w          $1, $2, $3      # encoding: [0x7c,0x62,0x0d,0xd3]
+  shra_r.w          $1, $2, 1       # CHECK: shra_r.w           $1, $2, 1       # encoding: [0x7c,0x22,0x0d,0x53]
+  shrl.qb           $1, $2, 2       # CHECK: shrl.qb            $1, $2, 2       # encoding: [0x7c,0x42,0x08,0x53]
+  shrlv.qb          $1, $2, $3      # CHECK: shrlv.qb           $1, $2, $3      # encoding: [0x7c,0x62,0x08,0xd3]
+  subq.ph           $1, $2, $3      # CHECK: subq.ph            $1, $2, $3      # encoding: [0x7c,0x43,0x0a,0xd0]
+  subq_s.ph         $1, $2, $3      # CHECK: subq_s.ph          $1, $2, $3      # encoding: [0x7c,0x43,0x0b,0xd0]
+  subq_s.w          $1, $2, $3      # CHECK: subq_s.w           $1, $2, $3      # encoding: [0x7c,0x43,0x0d,0xd0]
+  subu.qb           $1, $2, $3      # CHECK: subu.qb            $1, $2, $3      # encoding: [0x7c,0x43,0x08,0x50]
+  subu_s.qb         $1, $2, $3      # CHECK: subu_s.qb          $1, $2, $3      # encoding: [0x7c,0x43,0x09,0x50]
+  wrdsp             $1, 0           # CHECK: wrdsp              $1, 0           # encoding: [0x7c,0x20,0x04,0xf8]
+  wrdsp             $5              # CHECK: wrdsp              $5              # encoding: [0x7c,0xa0,0xfc,0xf8]
+  wrdsp             $5, 2           # CHECK: wrdsp              $5, 2           # encoding: [0x7c,0xa0,0x14,0xf8]
+  wrdsp             $5, 31          # CHECK: wrdsp              $5              # encoding: [0x7c,0xa0,0xfc,0xf8]
diff --git a/test/MC/Mips/dspr2/invalid.s b/test/MC/Mips/dspr2/invalid.s
new file mode 100644
index 000000000000..16eb8faa112a
--- /dev/null
+++ b/test/MC/Mips/dspr2/invalid.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mattr=+dspr2 -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+  append $2, $3, -1             # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+  append $2, $3, 32             # CHECK: :[[@LINE]]:18: error: expected 5-bit unsigned immediate
+  balign $2, $3, -1             # CHECK: :[[@LINE]]:18: error: expected 2-bit unsigned immediate
+  balign $2, $3, 4              # CHECK: :[[@LINE]]:18: error: expected 2-bit unsigned immediate
+  precr_sra.ph.w $24, $25, -1   # CHECK: :[[@LINE]]:28: error: expected 5-bit unsigned immediate
+  precr_sra.ph.w $24, $25, 32   # CHECK: :[[@LINE]]:28: error: expected 5-bit unsigned immediate
+  precr_sra_r.ph.w $25 ,$26, -1 # CHECK: :[[@LINE]]:30: error: expected 5-bit unsigned immediate
+  precr_sra_r.ph.w $25 ,$26, 32 # CHECK: :[[@LINE]]:30: error: expected 5-bit unsigned immediate
+  prepend $2, $3, -1            # CHECK: :[[@LINE]]:19: error: expected 5-bit unsigned immediate
+  prepend $2, $3, 32            # CHECK: :[[@LINE]]:19: error: expected 5-bit unsigned immediate
+  shra.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shra.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shra_r.qb $3, $4, 8      # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+  shra_r.qb $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+  shrl.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shrl.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/dspr2/valid.s b/test/MC/Mips/dspr2/valid.s
new file mode 100644
index 000000000000..c50e9d6e5ae1
--- /dev/null
+++ b/test/MC/Mips/dspr2/valid.s
@@ -0,0 +1,179 @@
+# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s
+#
+# CHECK:   .text
+  .set noat
+  absq_s.ph       $1, $2          # CHECK: absq_s.ph        $1, $2          # encoding: [0x7c,0x02,0x0a,0x52]
+  absq_s.qb       $3, $4          # CHECK: absq_s.qb        $3, $4          # encoding: [0x7c,0x04,0x18,0x52]
+  absq_s.w        $5, $6          # CHECK: absq_s.w         $5, $6          # encoding: [0x7c,0x06,0x2c,0x52]
+  addq.ph         $7, $8, $9      # CHECK: addq.ph          $7, $8, $9      # encoding: [0x7d,0x09,0x3a,0x90]
+  addq_s.ph       $10, $11, $12   # CHECK: addq_s.ph        $10, $11, $12   # encoding: [0x7d,0x6c,0x53,0x90]
+  addq_s.w        $13, $14, $15   # CHECK: addq_s.w         $13, $14, $15   # encoding: [0x7d,0xcf,0x6d,0x90]
+  addqh.ph        $16, $17, $18   # CHECK: addqh.ph         $16, $17, $18   # encoding: [0x7e,0x32,0x82,0x18]
+  addqh_r.ph      $19, $20, $21   # CHECK: addqh_r.ph       $19, $20, $21   # encoding: [0x7e,0x95,0x9a,0x98]
+  addqh.w         $22, $23, $24   # CHECK: addqh.w          $22, $23, $24   # encoding: [0x7e,0xf8,0xb4,0x18]
+  addqh_r.w       $25, $26, $27   # CHECK: addqh_r.w        $25, $26, $27   # encoding: [0x7f,0x5b,0xcc,0x98]
+  addsc           $gp, $sp, $fp   # CHECK: addsc            $gp, $sp, $fp   # encoding: [0x7f,0xbe,0xe4,0x10]
+  addu.ph         $ra, $1, $2     # CHECK: addu.ph          $ra, $1, $2     # encoding: [0x7c,0x22,0xfa,0x10]
+  addu_s.ph       $3, $4, $5      # CHECK: addu_s.ph        $3, $4, $5      # encoding: [0x7c,0x85,0x1b,0x10]
+  addu.qb         $6, $7, $8      # CHECK: addu.qb          $6, $7, $8      # encoding: [0x7c,0xe8,0x30,0x10]
+  addu_s.qb       $9, $10, $11    # CHECK: addu_s.qb        $9, $10, $11    # encoding: [0x7d,0x4b,0x49,0x10]
+  addwc           $12, $13, $14   # CHECK: addwc            $12, $13, $14   # encoding: [0x7d,0xae,0x64,0x50]
+  adduh.qb        $15, $16, $17   # CHECK: adduh.qb         $15, $16, $17   # encoding: [0x7e,0x11,0x78,0x18]
+  adduh_r.qb      $18, $19, $20   # CHECK: adduh_r.qb       $18, $19, $20   # encoding: [0x7e,0x74,0x90,0x98]
+  append          $21, $22, 0     # CHECK: append           $21, $22, 0     # encoding: [0x7e,0xd5,0x00,0x31]
+  balign          $23, $24, 3     # CHECK: balign           $23, $24, 3     # encoding: [0x7f,0x17,0x1c,0x31]
+  bitrev          $25, $26        # CHECK: bitrev           $25, $26        # encoding: [0x7c,0x1a,0xce,0xd2]
+  bposge32        21100           # CHECK: bposge32         21100           # encoding: [0x04,0x1c,0x14,0x9b]
+  cmp.eq.ph       $27, $gp        # CHECK: cmp.eq.ph        $27, $gp        # encoding: [0x7f,0x7c,0x02,0x11]
+  cmp.lt.ph       $sp, $fp        # CHECK: cmp.lt.ph        $sp, $fp        # encoding: [0x7f,0xbe,0x02,0x51]
+  cmp.le.ph       $ra, $1         # CHECK: cmp.le.ph        $ra, $1         # encoding: [0x7f,0xe1,0x02,0x91]
+  cmpgdu.eq.qb    $2, $3, $4      # CHECK: cmpgdu.eq.qb     $2, $3, $4      # encoding: [0x7c,0x64,0x16,0x11]
+  cmpgdu.lt.qb    $5, $6, $7      # CHECK: cmpgdu.lt.qb     $5, $6, $7      # encoding: [0x7c,0xc7,0x2e,0x51]
+  cmpgdu.le.qb    $8, $9, $10     # CHECK: cmpgdu.le.qb     $8, $9, $10     # encoding: [0x7d,0x2a,0x46,0x91]
+  cmpgu.eq.qb     $11, $12, $13   # CHECK: cmpgu.eq.qb      $11, $12, $13   # encoding: [0x7d,0x8d,0x59,0x11]
+  cmpgu.lt.qb     $14, $15, $16   # CHECK: cmpgu.lt.qb      $14, $15, $16   # encoding: [0x7d,0xf0,0x71,0x51]
+  cmpgu.le.qb     $17, $18, $19   # CHECK: cmpgu.le.qb      $17, $18, $19   # encoding: [0x7e,0x53,0x89,0x91]
+  cmpu.eq.qb      $20, $21        # CHECK: cmpu.eq.qb       $20, $21        # encoding: [0x7e,0x95,0x00,0x11]
+  cmpu.lt.qb      $22, $23        # CHECK: cmpu.lt.qb       $22, $23        # encoding: [0x7e,0xd7,0x00,0x51]
+  cmpu.le.qb      $24, $25        # CHECK: cmpu.le.qb       $24, $25        # encoding: [0x7f,0x19,0x00,0x91]
+  dpa.w.ph        $ac0, $26, $27  # CHECK: dpa.w.ph         $ac0, $26, $27  # encoding: [0x7f,0x5b,0x00,0x30]
+  dpaq_s.w.ph     $ac1, $1, $2    # CHECK: dpaq_s.w.ph      $ac1, $1, $2    # encoding: [0x7c,0x22,0x09,0x30]
+  dpaq_sa.l.w     $ac2, $3, $4    # CHECK: dpaq_sa.l.w      $ac2, $3, $4    # encoding: [0x7c,0x64,0x13,0x30]
+  dpaqx_s.w.ph    $ac3, $5, $6    # CHECK: dpaqx_s.w.ph     $ac3, $5, $6    # encoding: [0x7c,0xa6,0x1e,0x30]
+  dpaqx_sa.w.ph   $ac0, $7, $8    # CHECK: dpaqx_sa.w.ph    $ac0, $7, $8    # encoding: [0x7c,0xe8,0x06,0xb0]
+  dpau.h.qbl      $ac1, $9, $10   # CHECK: dpau.h.qbl       $ac1, $9, $10   # encoding: [0x7d,0x2a,0x08,0xf0]
+  dpau.h.qbr      $ac1, $11, $12  # CHECK: dpau.h.qbr       $ac1, $11, $12  # encoding: [0x7d,0x6c,0x09,0xf0]
+  dpax.w.ph       $ac2, $13, $14  # CHECK: dpax.w.ph        $ac2, $13, $14  # encoding: [0x7d,0xae,0x12,0x30]
+  dps.w.ph        $ac3, $15, $16  # CHECK: dps.w.ph         $ac3, $15, $16  # encoding: [0x7d,0xf0,0x18,0x70]
+  dpsq_s.w.ph     $ac0, $17, $18  # CHECK: dpsq_s.w.ph      $ac0, $17, $18  # encoding: [0x7e,0x32,0x01,0x70]
+  dpsq_sa.l.w     $ac1, $19, $20  # CHECK: dpsq_sa.l.w      $ac1, $19, $20  # encoding: [0x7e,0x74,0x0b,0x70]
+  dpsqx_s.w.ph    $ac2, $1, $2    # CHECK: dpsqx_s.w.ph     $ac2, $1, $2    # encoding: [0x7c,0x22,0x16,0x70]
+  dpsqx_sa.w.ph   $ac3, $3, $4    # CHECK: dpsqx_sa.w.ph    $ac3, $3, $4    # encoding: [0x7c,0x64,0x1e,0xf0]
+  dpsu.h.qbl      $ac0, $5, $6    # CHECK: dpsu.h.qbl       $ac0, $5, $6    # encoding: [0x7c,0xa6,0x02,0xf0]
+  dpsu.h.qbr      $ac1, $7, $8    # CHECK: dpsu.h.qbr       $ac1, $7, $8    # encoding: [0x7c,0xe8,0x0b,0xf0]
+  dpsx.w.ph       $ac2, $9, $10   # CHECK: dpsx.w.ph        $ac2, $9, $10   # encoding: [0x7d,0x2a,0x12,0x70]
+  extp            $1, $ac0, 31    # CHECK: extp             $1, $ac0, 31    # encoding: [0x7f,0xe1,0x00,0xb8]
+  extpdp          $2, $ac1, 0     # CHECK: extpdp           $2, $ac1, 0     # encoding: [0x7c,0x02,0x0a,0xb8]
+  extpdpv         $3, $ac2, $4    # CHECK: extpdpv          $3, $ac2, $4    # encoding: [0x7c,0x83,0x12,0xf8]
+  extpv           $5, $ac3, $6    # CHECK: extpv            $5, $ac3, $6    # encoding: [0x7c,0xc5,0x18,0xf8]
+  extr.w          $7, $ac0, 31    # CHECK: extr.w           $7, $ac0, 31    # encoding: [0x7f,0xe7,0x00,0x38]
+  extr_r.w        $8, $ac1, 15    # CHECK: extr_r.w         $8, $ac1, 15    # encoding: [0x7d,0xe8,0x09,0x38]
+  extr_rs.w       $9, $ac2, 7     # CHECK: extr_rs.w        $9, $ac2, 7     # encoding: [0x7c,0xe9,0x11,0xb8]
+  extr_s.h        $10, $ac3, 3    # CHECK: extr_s.h         $10, $ac3, 3    # encoding: [0x7c,0x6a,0x1b,0xb8]
+  extrv.w         $11, $ac0, $12  # CHECK: extrv.w          $11, $ac0, $12  # encoding: [0x7d,0x8b,0x00,0x78]
+  extrv_r.w       $13, $ac1, $14  # CHECK: extrv_r.w        $13, $ac1, $14  # encoding: [0x7d,0xcd,0x09,0x78]
+  extrv_rs.w      $15, $ac2, $16  # CHECK: extrv_rs.w       $15, $ac2, $16  # encoding: [0x7e,0x0f,0x11,0xf8]
+  extrv_s.h       $17, $ac3, $18  # CHECK: extrv_s.h        $17, $ac3, $18  # encoding: [0x7e,0x51,0x1b,0xf8]
+  insv            $19, $20        # CHECK: insv             $19, $20        # encoding: [0x7e,0x93,0x00,0x0c]
+  lbux            $10, $20($26)   # CHECK: lbux             $10, $20($26)   # encoding: [0x7f,0x54,0x51,0x8a]
+  lhx             $11, $21($27)   # CHECK: lhx              $11, $21($27)   # encoding: [0x7f,0x75,0x59,0x0a]
+  lwx             $12, $22($gp)   # CHECK: lwx              $12, $22($gp)   # encoding: [0x7f,0x96,0x60,0x0a]
+  madd            $ac1, $6, $7    # CHECK: madd             $ac1, $6, $7    # encoding: [0x70,0xc7,0x08,0x00]
+  maddu           $ac0, $8, $9    # CHECK: maddu            $ac0, $8, $9    # encoding: [0x71,0x09,0x00,0x01]
+  madd            $6, $7          # CHECK: madd             $6, $7          # encoding: [0x70,0xc7,0x00,0x00]
+  maddu           $8, $9          # CHECK: maddu            $8, $9          # encoding: [0x71,0x09,0x00,0x01]
+  maq_s.w.phl     $ac2, $3, $4    # CHECK: maq_s.w.phl      $ac2, $3, $4    # encoding: [0x7c,0x64,0x15,0x30]
+  maq_sa.w.phl    $ac3, $5, $6    # CHECK: maq_sa.w.phl     $ac3, $5, $6    # encoding: [0x7c,0xa6,0x1c,0x30]
+  maq_s.w.phr     $ac0, $7, $8    # CHECK: maq_s.w.phr      $ac0, $7, $8    # encoding: [0x7c,0xe8,0x05,0xb0]
+  maq_sa.w.phr    $ac1, $9, $10   # CHECK: maq_sa.w.phr     $ac1, $9, $10   # encoding: [0x7d,0x2a,0x0c,0xb0]
+  mfhi            $14, $ac1       # CHECK: mfhi             $14, $ac1       # encoding: [0x00,0x20,0x70,0x10]
+  mflo            $15, $ac0       # CHECK: mflo             $15, $ac0       # encoding: [0x00,0x00,0x78,0x12]
+  mfhi            $14             # CHECK: mfhi             $14             # encoding: [0x00,0x00,0x70,0x10]
+  mflo            $15             # CHECK: mflo             $15             # encoding: [0x00,0x00,0x78,0x12]
+  modsub          $11, $12, $13   # CHECK: modsub           $11, $12, $13   # encoding: [0x7d,0x8d,0x5c,0x90]
+  msub            $ac3, $10, $11  # CHECK: msub             $ac3, $10, $11  # encoding: [0x71,0x4b,0x18,0x04]
+  msubu           $ac2, $12, $13  # CHECK: msubu            $ac2, $12, $13  # encoding: [0x71,0x8d,0x10,0x05]
+  msub            $10, $11        # CHECK: msub             $10, $11        # encoding: [0x71,0x4b,0x00,0x04]
+  msubu           $12, $13        # CHECK: msubu            $12, $13        # encoding: [0x71,0x8d,0x00,0x05]
+  mthi            $16, $ac3       # CHECK: mthi             $16, $ac3       # encoding: [0x02,0x00,0x18,0x11]
+  mthi            $16             # CHECK: mthi             $16             # encoding: [0x02,0x00,0x00,0x11]
+  mthlip          $14, $ac2       # CHECK: mthlip           $14, $ac2       # encoding: [0x7d,0xc0,0x17,0xf8]
+  mtlo            $17, $ac2       # CHECK: mtlo             $17, $ac2       # encoding: [0x02,0x20,0x10,0x13]
+  mtlo            $17             # CHECK: mtlo             $17             # encoding: [0x02,0x20,0x00,0x13]
+  mul.ph          $15, $16, $17   # CHECK: mul.ph           $15, $16, $17   # encoding: [0x7e,0x11,0x7b,0x18]
+  mul_s.ph        $18, $19, $20   # CHECK: mul_s.ph         $18, $19, $20   # encoding: [0x7e,0x74,0x93,0x98]
+  muleq_s.w.phl   $21, $22, $23   # CHECK: muleq_s.w.phl    $21, $22, $23   # encoding: [0x7e,0xd7,0xaf,0x10]
+  muleq_s.w.phr   $24, $25, $26   # CHECK: muleq_s.w.phr    $24, $25, $26   # encoding: [0x7f,0x3a,0xc7,0x50]
+  muleu_s.ph.qbl  $27, $gp, $sp   # CHECK: muleu_s.ph.qbl   $27, $gp, $sp   # encoding: [0x7f,0x9d,0xd9,0x90]
+  muleu_s.ph.qbr  $fp, $ra, $1    # CHECK: muleu_s.ph.qbr   $fp, $ra, $1    # encoding: [0x7f,0xe1,0xf1,0xd0]
+  mulq_rs.ph      $2, $3, $4      # CHECK: mulq_rs.ph       $2, $3, $4      # encoding: [0x7c,0x64,0x17,0xd0]
+  mulq_rs.w       $5, $6, $7      # CHECK: mulq_rs.w        $5, $6, $7      # encoding: [0x7c,0xc7,0x2d,0xd8]
+  mulq_s.ph       $8, $9, $10     # CHECK: mulq_s.ph        $8, $9, $10     # encoding: [0x7d,0x2a,0x47,0x90]
+  mulq_s.w        $11, $12, $13   # CHECK: mulq_s.w         $11, $12, $13   # encoding: [0x7d,0x8d,0x5d,0x98]
+  mulsa.w.ph      $ac3, $14, $15  # CHECK: mulsa.w.ph       $ac3, $14, $15  # encoding: [0x7d,0xcf,0x18,0xb0]
+  mulsaq_s.w.ph   $ac0, $16, $17  # CHECK: mulsaq_s.w.ph    $ac0, $16, $17  # encoding: [0x7e,0x11,0x01,0xb0]
+  mult            $ac3, $2, $3    # CHECK: mult             $ac3, $2, $3    # encoding: [0x00,0x43,0x18,0x18]
+  multu           $ac2, $4, $5    # CHECK: multu            $ac2, $4, $5    # encoding: [0x00,0x85,0x10,0x19]
+  mult            $2, $3          # CHECK: mult             $2, $3          # encoding: [0x00,0x43,0x00,0x18]
+  multu           $4, $5          # CHECK: multu            $4, $5          # encoding: [0x00,0x85,0x00,0x19]
+  packrl.ph       $18, $19, $20   # CHECK: packrl.ph        $18, $19, $20   # encoding: [0x7e,0x74,0x93,0x91]
+  pick.ph         $7, $15, $3     # CHECK: pick.ph          $7, $15, $3     # encoding: [0x7d,0xe3,0x3a,0xd1]
+  pick.qb         $2, $4, $8      # CHECK: pick.qb          $2, $4, $8      # encoding: [0x7c,0x88,0x10,0xd1]
+  preceq.w.phl    $20,$21         # CHECK: preceq.w.phl     $20, $21        # encoding: [0x7c,0x15,0xa3,0x12]
+  preceq.w.phr    $21,$22         # CHECK: preceq.w.phr     $21, $22        # encoding: [0x7c,0x16,0xab,0x52]
+  precequ.ph.qbl  $22,$23         # CHECK: precequ.ph.qbl   $22, $23        # encoding: [0x7c,0x17,0xb1,0x12]
+  precequ.ph.qbla $24,$25         # CHECK: precequ.ph.qbla  $24, $25        # encoding: [0x7c,0x19,0xc1,0x92]
+  precequ.ph.qbr  $23,$24         # CHECK: precequ.ph.qbr   $23, $24        # encoding: [0x7c,0x18,0xb9,0x52]
+  precequ.ph.qbra $25,$26         # CHECK: precequ.ph.qbra  $25, $26        # encoding: [0x7c,0x1a,0xc9,0xd2]
+  preceu.ph.qbl   $26,$27         # CHECK: preceu.ph.qbl    $26, $27        # encoding: [0x7c,0x1b,0xd7,0x12]
+  preceu.ph.qbla  $28,$29         # CHECK: preceu.ph.qbla   $gp, $sp        # encoding: [0x7c,0x1d,0xe7,0x92]
+  preceu.ph.qbr   $27,$28         # CHECK: preceu.ph.qbr    $27, $gp        # encoding: [0x7c,0x1c,0xdf,0x52]
+  preceu.ph.qbra  $29,$30         # CHECK: preceu.ph.qbra   $sp, $fp        # encoding: [0x7c,0x1e,0xef,0xd2]
+  precr.qb.ph     $23,$24,$25     # CHECK: precr.qb.ph      $23, $24, $25   # encoding: [0x7f,0x19,0xbb,0x51]
+  precr_sra.ph.w  $24,$25,0       # CHECK: precr_sra.ph.w   $24, $25, 0     # encoding: [0x7f,0x38,0x07,0x91]
+  precr_sra.ph.w  $24,$25,31      # CHECK: precr_sra.ph.w   $24, $25, 31    # encoding: [0x7f,0x38,0xff,0x91]
+  precr_sra_r.ph.w  $25,$26,0     # CHECK: precr_sra_r.ph.w $25, $26, 0     # encoding: [0x7f,0x59,0x07,0xd1]
+  precr_sra_r.ph.w  $25,$26,31    # CHECK: precr_sra_r.ph.w $25, $26, 31    # encoding: [0x7f,0x59,0xff,0xd1]
+  precrq.ph.w     $17,$18,$19     # CHECK: precrq.ph.w      $17, $18, $19   # encoding: [0x7e,0x53,0x8d,0x11]
+  precrq.qb.ph    $16,$17,$18     # CHECK: precrq.qb.ph     $16, $17, $18   # encoding: [0x7e,0x32,0x83,0x11]
+  precrqu_s.qb.ph $19,$20,$21     # CHECK: precrqu_s.qb.ph  $19, $20, $21   # encoding: [0x7e,0x95,0x9b,0xd1]
+  precrq_rs.ph.w  $18,$19,$20     # CHECK: precrq_rs.ph.w   $18, $19, $20   # encoding: [0x7e,0x74,0x95,0x51]
+  prepend         $1, $2, 3       # CHECK: prepend          $1, $2, 3       # encoding: [0x7c,0x41,0x18,0x71]
+  raddu.w.qb      $1, $2          # CHECK: raddu.w.qb       $1, $2          # encoding: [0x7c,0x40,0x0d,0x10]
+  rddsp           $5, 256         # CHECK: rddsp            $5, 256         # encoding: [0x7d,0x00,0x2c,0xb8]
+  repl.ph         $2, 12          # CHECK: repl.ph          $2, 12          # encoding: [0x7c,0x0c,0x12,0x92]
+  repl.qb         $1, 85          # CHECK: repl.qb          $1, 85          # encoding: [0x7c,0x55,0x08,0x92]
+  replv.ph        $1, $2          # CHECK: replv.ph         $1, $2          # encoding: [0x7c,0x02,0x0a,0xd2]
+  replv.qb        $1, $2          # CHECK: replv.qb         $1, $2          # encoding: [0x7c,0x02,0x08,0xd2]
+  shilo           $ac1, 3         # CHECK: shilo            $ac1, 3         # encoding: [0x7c,0x30,0x0e,0xb8]
+  shilo           $ac1, 16        # CHECK: shilo            $ac1, 16        # encoding: [0x7d,0x00,0x0e,0xb8]
+  shilov          $ac1, $2        # CHECK: shilov           $ac1, $2        # encoding: [0x7c,0x40,0x0e,0xf8]
+  shll.ph         $1, $2, 3       # CHECK: shll.ph          $1, $2, 3       # encoding: [0x7c,0x62,0x0a,0x13]
+  shll_s.ph       $1, $2, 3       # CHECK: shll_s.ph        $1, $2, 3       # encoding: [0x7c,0x62,0x0b,0x13]
+  shll.qb         $1, $2, 3       # CHECK: shll.qb          $1, $2, 3       # encoding: [0x7c,0x62,0x08,0x13]
+  shllv.ph        $1, $2, $3      # CHECK: shllv.ph         $1, $2, $3      # encoding: [0x7c,0x62,0x0a,0x93]
+  shllv_s.ph      $1, $2, $3      # CHECK: shllv_s.ph       $1, $2, $3      # encoding: [0x7c,0x62,0x0b,0x93]
+  shllv.qb        $1, $2, $3      # CHECK: shllv.qb         $1, $2, $3      # encoding: [0x7c,0x62,0x08,0x93]
+  shllv_s.w       $1, $2, $3      # CHECK: shllv_s.w        $1, $2, $3      # encoding: [0x7c,0x62,0x0d,0x93]
+  shll_s.w        $1, $2, 3       # CHECK: shll_s.w         $1, $2, 3       # encoding: [0x7c,0x62,0x0d,0x13]
+  shra.qb         $2, $16, 2      # CHECK: shra.qb          $2, $16, 2      # encoding: [0x7c,0x50,0x11,0x13]
+  shra_r.qb       $2, $16, 2      # CHECK: shra_r.qb        $2, $16, 2      # encoding: [0x7c,0x50,0x11,0x53]
+  shra.ph         $5, $2, 1       # CHECK: shra.ph          $5, $2, 1       # encoding: [0x7c,0x22,0x2a,0x53]
+  shra_r.ph       $5, $2, 1       # CHECK: shra_r.ph        $5, $2, 1       # encoding: [0x7c,0x22,0x2b,0x53]
+  shrav.ph        $1, $2, $3      # CHECK: shrav.ph         $1, $2, $3      # encoding: [0x7c,0x62,0x0a,0xd3]
+  shrav_r.ph      $1, $2, $3      # CHECK: shrav_r.ph       $1, $2, $3      # encoding: [0x7c,0x62,0x0b,0xd3]
+  shrav.qb        $1, $2, $3      # CHECK: shrav.qb         $1, $2, $3      # encoding: [0x7c,0x62,0x09,0x93]
+  shrav_r.qb      $1, $2, $3      # CHECK: shrav_r.qb       $1, $2, $3      # encoding: [0x7c,0x62,0x09,0xd3]
+  shrav_r.w       $1, $2, $3      # CHECK: shrav_r.w        $1, $2, $3      # encoding: [0x7c,0x62,0x0d,0xd3]
+  shra_r.w        $1, $2, 1       # CHECK: shra_r.w         $1, $2, 1       # encoding: [0x7c,0x22,0x0d,0x53]
+  shrl.ph         $1, $2, 2       # CHECK: shrl.ph          $1, $2, 2       # encoding: [0x7c,0x42,0x0e,0x53]
+  shrl.qb         $1, $2, 2       # CHECK: shrl.qb          $1, $2, 2       # encoding: [0x7c,0x42,0x08,0x53]
+  shrlv.ph        $1, $2, $3      # CHECK: shrlv.ph         $1, $2, $3      # encoding: [0x7c,0x62,0x0e,0xd3]
+  shrlv.qb        $1, $2, $3      # CHECK: shrlv.qb         $1, $2, $3      # encoding: [0x7c,0x62,0x08,0xd3]
+  subq.ph         $1, $2, $3      # CHECK: subq.ph          $1, $2, $3      # encoding: [0x7c,0x43,0x0a,0xd0]
+  subq_s.ph       $1, $2, $3      # CHECK: subq_s.ph        $1, $2, $3      # encoding: [0x7c,0x43,0x0b,0xd0]
+  subq_s.w        $1, $2, $3      # CHECK: subq_s.w         $1, $2, $3      # encoding: [0x7c,0x43,0x0d,0xd0]
+  subqh.ph        $1, $2, $3      # CHECK: subqh.ph         $1, $2, $3      # encoding: [0x7c,0x43,0x0a,0x58]
+  subqh_r.ph      $1, $2, $3      # CHECK: subqh_r.ph       $1, $2, $3      # encoding: [0x7c,0x43,0x0a,0xd8]
+  subqh.w         $1, $2, $3      # CHECK: subqh.w          $1, $2, $3      # encoding: [0x7c,0x43,0x0c,0x58]
+  subqh_r.w       $1, $2, $3      # CHECK: subqh_r.w        $1, $2, $3      # encoding: [0x7c,0x43,0x0c,0xd8]
+  subu.ph         $6, $2, $9      # CHECK: subu.ph          $6, $2, $9      # encoding: [0x7c,0x49,0x32,0x50]
+  subu_s.ph       $2, $3, $4      # CHECK: subu_s.ph        $2, $3, $4      # encoding: [0x7c,0x64,0x13,0x50]
+  subu.qb         $1, $2, $3      # CHECK: subu.qb          $1, $2, $3      # encoding: [0x7c,0x43,0x08,0x50]
+  subu_s.qb       $1, $2, $3      # CHECK: subu_s.qb        $1, $2, $3      # encoding: [0x7c,0x43,0x09,0x50]
+  subuh.qb        $1, $2, $3      # CHECK: subuh.qb         $1, $2, $3      # encoding: [0x7c,0x43,0x08,0x58]
+  subuh_r.qb      $1, $2, $3      # CHECK: subuh_r.qb       $1, $2, $3      # encoding: [0x7c,0x43,0x08,0xd8]
+  wrdsp           $1, 0           # CHECK: wrdsp            $1, 0           # encoding: [0x7c,0x20,0x04,0xf8]
+  wrdsp           $5              # CHECK: wrdsp            $5              # encoding: [0x7c,0xa0,0xfc,0xf8]
+  wrdsp           $5, 2           # CHECK: wrdsp            $5, 2           # encoding: [0x7c,0xa0,0x14,0xf8]
+  wrdsp           $5, 31          # CHECK: wrdsp            $5              # encoding: [0x7c,0xa0,0xfc,0xf8]
diff --git a/test/MC/Mips/elf_basic.s b/test/MC/Mips/elf_basic.s
index 6c1e7690126d..4739247f67fa 100644
--- a/test/MC/Mips/elf_basic.s
+++ b/test/MC/Mips/elf_basic.s
@@ -36,6 +36,6 @@
 // CHECK-LE64:   Ident {
 // CHECK-LE64:     Class: 64-bit
 // CHECK-LE64:     DataEncoding: LittleEndian
-// CHECK-LE64:     OS/ABI: GNU/Linux
+// CHECK-LE64:     OS/ABI: SystemV
 // CHECK-LE64:   }
 // CHECK-LE64: }
diff --git a/test/MC/Mips/eva/invalid-noeva-wrong-error.s b/test/MC/Mips/eva/invalid-noeva-wrong-error.s
new file mode 100644
index 000000000000..77b25645fe20
--- /dev/null
+++ b/test/MC/Mips/eva/invalid-noeva-wrong-error.s
@@ -0,0 +1,69 @@
+# invalid operand for instructions that are invalid without -mattr=+eva flag and
+# are correctly rejected but use the wrong error message at the moment.
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        cachee    31, 255($7)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        cachee    0, -256($4)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        cachee    5, -140($4)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbe       $10,-256($25)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbe       $13,255($15)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbe       $11,146($14)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbue      $13,-256($v1)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbue      $13,255($v0)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lbue      $13,-190($v1)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhe       $13,-256($s5)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhe       $12,255($s0)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhe       $13,81($s0)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhue      $s2,-256($v1)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhue      $s2,255($v1)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lhue      $s6,-168($v0)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lle       $v0,-256($s5)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lle       $v1,255($s3)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lle       $v1,-71($s6)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwe       $15,255($a2)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwe       $13,-256($a2)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwe       $15,-200($a1)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s6,255($15)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s7,-256($10)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s7,-176($13)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,255($gp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-256($gp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-176($gp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        prefe     14, -256($2)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        prefe     11, 255($3)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        prefe     14, -37($3)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sbe       $s1,255($11)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sbe       $s1,-256($10)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sbe       $s3,0($14)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sce       $9,255($s2)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sce       $12,-256($s5)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        sce       $13,-31($s7)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        she       $14,255($15)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        she       $14,-256($15)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        she       $9,235($11)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swe       $ra,255($sp)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swe       $ra,-256($sp)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swe       $ra,-53($sp)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $9,255($s1)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $10,-256($s3)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $8,131($s5)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s4,255($13)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s4,-256($13)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s2,86($14)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/eva/invalid-noeva.s b/test/MC/Mips/eva/invalid-noeva.s
new file mode 100644
index 000000000000..ee9ce12b2054
--- /dev/null
+++ b/test/MC/Mips/eva/invalid-noeva.s
@@ -0,0 +1,22 @@
+# invalid operand for instructions that are invalid without -mattr=+eva flag
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r3 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r5 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        tlbinv                         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        tlbinvf                        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/eva/invalid.s b/test/MC/Mips/eva/invalid.s
new file mode 100644
index 000000000000..2ef4eaaab08d
--- /dev/null
+++ b/test/MC/Mips/eva/invalid.s
@@ -0,0 +1,11 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN:     -mattr==eva 2>%t1
+# RUN: FileCheck %s < %t1
+
+    .set noat
+    cachee -1, 255($7) # CHECK: :[[@LINE]]:12: error: expected 5-bit unsigned immediate
+    cachee 32, 255($7) # CHECK: :[[@LINE]]:12: error: expected 5-bit unsigned immediate
+    prefe -1, 255($7)  # CHECK: :[[@LINE]]:11: error: expected 5-bit unsigned immediate
+    prefe 32, 255($7)  # CHECK: :[[@LINE]]:11: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/eva/invalid_R6.s b/test/MC/Mips/eva/invalid_R6.s
new file mode 100644
index 000000000000..81c322845a63
--- /dev/null
+++ b/test/MC/Mips/eva/invalid_R6.s
@@ -0,0 +1,20 @@
+# Instructions that are invalid as they were removed in R6
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=+eva 2>%t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=+eva 2>%t1
+# RUN: FileCheck %s < %t1
+
+        .set noat
+        lwle      $s6,255($15)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s7,-256($10)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwle      $s7,-176($13)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,255($gp)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-256($gp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-176($gp)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $9,255($s1)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $10,-256($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $8,131($s5)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s4,255($13)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s4,-256($13)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s2,86($14)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/eva/valid_R6.s b/test/MC/Mips/eva/valid_R6.s
new file mode 100644
index 000000000000..75ecf7538bb3
--- /dev/null
+++ b/test/MC/Mips/eva/valid_R6.s
@@ -0,0 +1,47 @@
+# Instructions that are valid
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=+eva | FileCheck %s
+a:
+        .set noat
+        cachee    31, 255($7)          # CHECK: cachee 31, 255($7)      # encoding: [0x7c,0xff,0x7f,0x9b]
+        cachee    0, -256($4)          # CHECK: cachee 0, -256($4)      # encoding: [0x7c,0x80,0x80,0x1b]
+        cachee    5, -140($4)          # CHECK: cachee 5, -140($4)      # encoding: [0x7c,0x85,0xba,0x1b]
+        lbe       $10,-256($25)        # CHECK: lbe $10, -256($25)      # encoding: [0x7f,0x2a,0x80,0x2c]
+        lbe       $13,255($15)         # CHECK: lbe $13, 255($15)       # encoding: [0x7d,0xed,0x7f,0xac]
+        lbe       $11,146($14)         # CHECK: lbe $11, 146($14)       # encoding: [0x7d,0xcb,0x49,0x2c]
+        lbue      $13,-256($v1)        # CHECK: lbue $13, -256($3)      # encoding: [0x7c,0x6d,0x80,0x28]
+        lbue      $13,255($v0)         # CHECK: lbue $13, 255($2)       # encoding: [0x7c,0x4d,0x7f,0xa8]
+        lbue      $13,-190($v1)        # CHECK: lbue $13, -190($3)      # encoding: [0x7c,0x6d,0xa1,0x28]
+        lhe       $13,-256($s5)        # CHECK: lhe $13, -256($21)      # encoding: [0x7e,0xad,0x80,0x2d]
+        lhe       $12,255($s0)         # CHECK: lhe $12, 255($16)       # encoding: [0x7e,0x0c,0x7f,0xad]
+        lhe       $13,81($s0)          # CHECK: lhe $13, 81($16)        # encoding: [0x7e,0x0d,0x28,0xad]
+        lhue      $s2,-256($v1)        # CHECK: lhue $18, -256($3)      # encoding: [0x7c,0x72,0x80,0x29]
+        lhue      $s2,255($v1)         # CHECK: lhue $18, 255($3)       # encoding: [0x7c,0x72,0x7f,0xa9]
+        lhue      $s6,-168($v0)        # CHECK: lhue $22, -168($2)      # encoding: [0x7c,0x56,0xac,0x29]
+        lle       $v0,-256($s5)        # CHECK: lle $2, -256($21)       # encoding: [0x7e,0xa2,0x80,0x2e]
+        lle       $v1,255($s3)         # CHECK: lle $3, 255($19)        # encoding: [0x7e,0x63,0x7f,0xae]
+        lle       $v1,-71($s6)         # CHECK: lle $3, -71($22)        # encoding: [0x7e,0xc3,0xdc,0xae]
+        lwe       $15,255($a2)         # CHECK: lwe $15, 255($6)        # encoding: [0x7c,0xcf,0x7f,0xaf]
+        lwe       $13,-256($a2)        # CHECK: lwe $13, -256($6)       # encoding: [0x7c,0xcd,0x80,0x2f]
+        lwe       $15,-200($a1)        # CHECK: lwe $15, -200($5)       # encoding: [0x7c,0xaf,0x9c,0x2f]
+        prefe     14, -256($2)         # CHECK: prefe 14, -256($2)      # encoding: [0x7c,0x4e,0x80,0x23]
+        prefe     11, 255($3)          # CHECK: prefe 11, 255($3)       # encoding: [0x7c,0x6b,0x7f,0xa3]
+        prefe     14, -37($3)          # CHECK: prefe 14, -37($3)       # encoding: [0x7c,0x6e,0xed,0xa3]
+        sbe       $s1,255($11)         # CHECK: sbe $17, 255($11)       # encoding: [0x7d,0x71,0x7f,0x9c]
+        sbe       $s1,-256($10)        # CHECK: sbe $17, -256($10)      # encoding: [0x7d,0x51,0x80,0x1c]
+        sbe       $s3,0($14)           # CHECK: sbe $19, 0($14)         # encoding: [0x7d,0xd3,0x00,0x1c]
+        sce       $9,255($s2)          # CHECK: sce $9, 255($18)        # encoding: [0x7e,0x49,0x7f,0x9e]
+        sce       $12,-256($s5)        # CHECK: sce $12, -256($21)      # encoding: [0x7e,0xac,0x80,0x1e]
+        sce       $13,-31($s7)         # CHECK: sce $13, -31($23)       # encoding: [0x7e,0xed,0xf0,0x9e]
+        she       $14,255($15)         # CHECK: she $14, 255($15)       # encoding: [0x7d,0xee,0x7f,0x9d]
+        she       $14,-256($15)        # CHECK: she $14, -256($15)      # encoding: [0x7d,0xee,0x80,0x1d]
+        she       $9,235($11)          # CHECK: she $9, 235($11)        # encoding: [0x7d,0x69,0x75,0x9d]
+        swe       $ra,255($sp)         # CHECK: swe $ra, 255($sp)       # encoding: [0x7f,0xbf,0x7f,0x9f]
+        swe       $ra,-256($sp)        # CHECK: swe $ra, -256($sp)      # encoding: [0x7f,0xbf,0x80,0x1f]
+        swe       $ra,-53($sp)         # CHECK: swe $ra, -53($sp)       # encoding: [0x7f,0xbf,0xe5,0x9f]
+        tlbinv                         # CHECK: tlbinv                  # encoding: [0x42,0x00,0x00,0x03]
+        tlbinvf                        # CHECK: tlbinvf                 # encoding: [0x42,0x00,0x00,0x04]
+
+
+1:
diff --git a/test/MC/Mips/eva/valid_preR6.s b/test/MC/Mips/eva/valid_preR6.s
new file mode 100644
index 000000000000..b7ae76c7819a
--- /dev/null
+++ b/test/MC/Mips/eva/valid_preR6.s
@@ -0,0 +1,62 @@
+# Instructions that are valid
+#
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r3 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r5 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r3 -mattr=+eva | FileCheck %s
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r5 -mattr=+eva | FileCheck %s
+a:
+        .set noat
+        cachee    31, 255($7)          # CHECK: cachee 31, 255($7)      # encoding: [0x7c,0xff,0x7f,0x9b]
+        cachee    0, -256($4)          # CHECK: cachee 0, -256($4)      # encoding: [0x7c,0x80,0x80,0x1b]
+        cachee    5, -140($4)          # CHECK: cachee 5, -140($4)      # encoding: [0x7c,0x85,0xba,0x1b]
+        lbe       $10,-256($25)        # CHECK: lbe $10, -256($25)      # encoding: [0x7f,0x2a,0x80,0x2c]
+        lbe       $13,255($15)         # CHECK: lbe $13, 255($15)       # encoding: [0x7d,0xed,0x7f,0xac]
+        lbe       $11,146($14)         # CHECK: lbe $11, 146($14)       # encoding: [0x7d,0xcb,0x49,0x2c]
+        lbue      $13,-256($v1)        # CHECK: lbue $13, -256($3)      # encoding: [0x7c,0x6d,0x80,0x28]
+        lbue      $13,255($v0)         # CHECK: lbue $13, 255($2)       # encoding: [0x7c,0x4d,0x7f,0xa8]
+        lbue      $13,-190($v1)        # CHECK: lbue $13, -190($3)      # encoding: [0x7c,0x6d,0xa1,0x28]
+        lhe       $13,-256($s5)        # CHECK: lhe $13, -256($21)      # encoding: [0x7e,0xad,0x80,0x2d]
+        lhe       $12,255($s0)         # CHECK: lhe $12, 255($16)       # encoding: [0x7e,0x0c,0x7f,0xad]
+        lhe       $13,81($s0)          # CHECK: lhe $13, 81($16)        # encoding: [0x7e,0x0d,0x28,0xad]
+        lhue      $s2,-256($v1)        # CHECK: lhue $18, -256($3)      # encoding: [0x7c,0x72,0x80,0x29]
+        lhue      $s2,255($v1)         # CHECK: lhue $18, 255($3)       # encoding: [0x7c,0x72,0x7f,0xa9]
+        lhue      $s6,-168($v0)        # CHECK: lhue $22, -168($2)      # encoding: [0x7c,0x56,0xac,0x29]
+        lle       $v0,-256($s5)        # CHECK: lle $2, -256($21)       # encoding: [0x7e,0xa2,0x80,0x2e]
+        lle       $v1,255($s3)         # CHECK: lle $3, 255($19)        # encoding: [0x7e,0x63,0x7f,0xae]
+        lle       $v1,-71($s6)         # CHECK: lle $3, -71($22)        # encoding: [0x7e,0xc3,0xdc,0xae]
+        lwe       $15,255($a2)         # CHECK: lwe $15, 255($6)        # encoding: [0x7c,0xcf,0x7f,0xaf]
+        lwe       $13,-256($a2)        # CHECK: lwe $13, -256($6)       # encoding: [0x7c,0xcd,0x80,0x2f]
+        lwe       $15,-200($a1)        # CHECK: lwe $15, -200($5)       # encoding: [0x7c,0xaf,0x9c,0x2f]
+        lwle      $s6,255($15)         # CHECK: lwle $22, 255($15)      # encoding: [0x7d,0xf6,0x7f,0x99]
+        lwle      $s7,-256($10)        # CHECK: lwle $23, -256($10)     # encoding: [0x7d,0x57,0x80,0x19]
+        lwle      $s7,-176($13)        # CHECK: lwle $23, -176($13)     # encoding: [0x7d,0xb7,0xa8,0x19]
+        lwre      $zero,255($gp)       # CHECK: lwre $zero, 255($gp)    # encoding: [0x7f,0x80,0x7f,0x9a]
+        lwre      $zero,-256($gp)      # CHECK: lwre $zero, -256($gp)   # encoding: [0x7f,0x80,0x80,0x1a]
+        lwre      $zero,-176($gp)      # CHECK: lwre $zero, -176($gp)   # encoding: [0x7f,0x80,0xa8,0x1a]
+        prefe     14, -256($2)         # CHECK: prefe 14, -256($2)      # encoding: [0x7c,0x4e,0x80,0x23]
+        prefe     11, 255($3)          # CHECK: prefe 11, 255($3)       # encoding: [0x7c,0x6b,0x7f,0xa3]
+        prefe     14, -37($3)          # CHECK: prefe 14, -37($3)       # encoding: [0x7c,0x6e,0xed,0xa3]
+        sbe       $s1,255($11)         # CHECK: sbe $17, 255($11)       # encoding: [0x7d,0x71,0x7f,0x9c]
+        sbe       $s1,-256($10)        # CHECK: sbe $17, -256($10)      # encoding: [0x7d,0x51,0x80,0x1c]
+        sbe       $s3,0($14)           # CHECK: sbe $19, 0($14)         # encoding: [0x7d,0xd3,0x00,0x1c]
+        sce       $9,255($s2)          # CHECK: sce $9, 255($18)        # encoding: [0x7e,0x49,0x7f,0x9e]
+        sce       $12,-256($s5)        # CHECK: sce $12, -256($21)      # encoding: [0x7e,0xac,0x80,0x1e]
+        sce       $13,-31($s7)         # CHECK: sce $13, -31($23)       # encoding: [0x7e,0xed,0xf0,0x9e]
+        she       $14,255($15)         # CHECK: she $14, 255($15)       # encoding: [0x7d,0xee,0x7f,0x9d]
+        she       $14,-256($15)        # CHECK: she $14, -256($15)      # encoding: [0x7d,0xee,0x80,0x1d]
+        she       $9,235($11)          # CHECK: she $9, 235($11)        # encoding: [0x7d,0x69,0x75,0x9d]
+        swe       $ra,255($sp)         # CHECK: swe $ra, 255($sp)       # encoding: [0x7f,0xbf,0x7f,0x9f]
+        swe       $ra,-256($sp)        # CHECK: swe $ra, -256($sp)      # encoding: [0x7f,0xbf,0x80,0x1f]
+        swe       $ra,-53($sp)         # CHECK: swe $ra, -53($sp)       # encoding: [0x7f,0xbf,0xe5,0x9f]
+        swle      $9,255($s1)          # CHECK: swle $9, 255($17)       # encoding: [0x7e,0x29,0x7f,0xa1]
+        swle      $10,-256($s3)        # CHECK: swle $10, -256($19)     # encoding: [0x7e,0x6a,0x80,0x21]
+        swle      $8,131($s5)          # CHECK: swle $8, 131($21)       # encoding: [0x7e,0xa8,0x41,0xa1]
+        swre      $s4,255($13)         # CHECK: swre $20, 255($13)      # encoding: [0x7d,0xb4,0x7f,0xa2]
+        swre      $s4,-256($13)        # CHECK: swre $20, -256($13)     # encoding: [0x7d,0xb4,0x80,0x22]
+        swre      $s2,86($14)          # CHECK: swre $18, 86($14)       # encoding: [0x7d,0xd2,0x2b,0x22]
+        tlbinv                         # CHECK: tlbinv                  # encoding: [0x42,0x00,0x00,0x03]
+        tlbinvf                        # CHECK: tlbinvf                 # encoding: [0x42,0x00,0x00,0x04]
+
+1:
diff --git a/test/MC/Mips/expansion-jal-sym-pic.s b/test/MC/Mips/expansion-jal-sym-pic.s
new file mode 100644
index 000000000000..23a4396276f7
--- /dev/null
+++ b/test/MC/Mips/expansion-jal-sym-pic.s
@@ -0,0 +1,183 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=O32
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=N32
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=NORMAL -check-prefix=N64
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32 -mattr=micromips -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=O32-MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips -mcpu=mips64 -target-abi n32 -mattr=micromips -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=N32-MICROMIPS
+
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 -mattr=micromips -show-encoding |\
+# RUN:   FileCheck %s -check-prefix=ALL -check-prefix=MICROMIPS -check-prefix=N64-MICROMIPS
+
+  .weak weak_label
+
+  .text
+  .option pic2
+
+  .ent local_label
+local_label:
+  .frame  $sp, 0, $ra
+  .set noreorder
+
+  jal local_label
+  nop
+
+  jal weak_label
+  nop
+
+  jal global_label
+  nop
+
+  jal .text
+  nop
+
+  # local labels ($tmp symbols)
+  jal 1f
+  nop
+
+  .end local_label
+
+1:
+  nop
+  add $8, $8, $8
+  nop
+
+# Expanding "jal local_label":
+# O32: lw     $25, %got(local_label)($gp)   # encoding: [0x8f,0x99,A,A]
+# O32:                                      #   fixup A - offset: 0, value: local_label@GOT, kind:   fixup_Mips_GOT_Local
+# O32: addiu  $25, $25, %lo(local_label)    # encoding: [0x27,0x39,A,A]
+# O32:                                      #   fixup A - offset: 0, value: local_label@ABS_LO, kind:   fixup_Mips_LO16
+
+# N32: lw  $25, %got_disp(local_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32:                                      #   fixup A - offset: 0, value: local_label@GOT_DISP, kind:   fixup_Mips_GOT_DISP
+
+# N64: ld  $25, %got_disp(local_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64:                                      #   fixup A - offset: 0, value: local_label@GOT_DISP, kind:   fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS: lw    $25, %got(local_label)($gp)      # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS:                                        #   fixup A - offset: 0, value: local_label@GOT, kind:   fixup_MICROMIPS_GOT16
+# O32-MICROMIPS: addiu $25, $25, %lo(local_label)       # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS:                                        #   fixup A - offset: 0, value: local_label@ABS_LO, kind:   fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS: lw    $25, %got_disp(local_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS:                                        #   fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS: ld    $25, %got_disp(local_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS:                                        #   fixup A - offset: 0, value: local_label@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL:    jalr $25      # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL:       nop           # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal weak_label":
+# O32: lw  $25, %call16(weak_label)($gp) # encoding: [0x8f,0x99,A,A]
+# O32:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# N32: lw  $25, %call16(weak_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# N64: ld  $25, %call16(weak_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# O32-MICROMIPS: lw  $25, %call16(weak_label)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind:   fixup_MICROMIPS_CALL16
+
+# N32-MICROMIPS: lw  $25, %call16(weak_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N64-MICROMIPS: ld  $25, %call16(weak_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS:                                   #   fixup A - offset: 0, value: weak_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# NORMAL:    jalr $25      # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL:       nop           # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal global_label":
+# O32: lw  $25, %call16(global_label)($gp) # encoding: [0x8f,0x99,A,A]
+# O32:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# N32: lw  $25, %call16(global_label)($gp) # encoding: [0x8f,0x99,A,A]
+# N32:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# N64: ld  $25, %call16(global_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind:   fixup_Mips_CALL16
+
+# O32-MICROMIPS: lw  $25, %call16(global_label)($gp) # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N32-MICROMIPS: lw  $25, %call16(global_label)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# N64-MICROMIPS: ld  $25, %call16(global_label)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS:                                     #   fixup A - offset: 0, value: global_label@GOT_CALL, kind: fixup_MICROMIPS_CALL16
+
+# NORMAL:    jalr $25      # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL:       nop           # encoding: [0x00,0x00,0x00,0x00]
+
+
+# FIXME: The .text section MCSymbol isn't created when printing assembly. However,
+# it is created when generating an ELF object file.
+# Expanding "jal .text":
+# O32-FIXME: lw    $25, %got(.text)($gp)           # encoding: [0x8f,0x99,A,A]
+# O32-FIXME:                                       #   fixup A - offset: 0, value: .text@GOT, kind: fixup_Mips_GOT_Local
+# O32-FIXME: addiu $25, $25, %lo(.text)            # encoding: [0x27,0x39,A,A]
+# O32-FIXME:                                       #   fixup A - offset: 0, value: .text@ABS_LO, kind: fixup_Mips_LO16
+
+# N32-FIXME: lw  $25, %got_disp(.text)($gp)        # encoding: [0x8f,0x99,A,A]
+# N32-FIXME:                                       #   fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# N64-FIXME: ld  $25, %got_disp(.text)($gp)        # encoding: [0xdf,0x99,A,A]
+# N64-FIXME:                                       #   fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS-FIXME: lw    $25, %got(.text)($gp)      # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS-FIXME:                                  #   fixup A - offset: 0, value: .text@GOT, kind: fixup_MICROMIPS_GOT16
+# O32-MICROMIPS-FIXME: addiu $25, $25, %lo(.text)       # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS-FIXME:                                  #   fixup A - offset: 0, value: .text@ABS_LO, kind: fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS-FIXME: lw    $25, %got_disp(.text)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS-FIXME:                                  #   fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS-FIXME: ld    $25, %got_disp(.text)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS-FIXME:                                  #   fixup A - offset: 0, value: .text@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL:    jalr $25      # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL:       nop           # encoding: [0x00,0x00,0x00,0x00]
+
+
+# Expanding "jal 1f":
+# O32: lw     $25, %got($tmp0)($gp)   # encoding: [0x8f,0x99,A,A]
+# O32:                                #   fixup A - offset: 0, value: ($tmp0)@GOT, kind:   fixup_Mips_GOT_Local
+# O32: addiu  $25, $25, %lo($tmp0)    # encoding: [0x27,0x39,A,A]
+# O32:                                #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind:   fixup_Mips_LO16
+
+# N32: lw  $25, %got_disp($tmp0)($gp) # encoding: [0x8f,0x99,A,A]
+# N32:                                #   fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind:   fixup_Mips_GOT_DISP
+
+# N64: ld  $25, %got_disp($tmp0)($gp) # encoding: [0xdf,0x99,A,A]
+# N64:                                #   fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind:   fixup_Mips_GOT_DISP
+
+# O32-MICROMIPS: lw    $25, %got($tmp0)($gp)    # encoding: [0xff,0x3c,A,A]
+# O32-MICROMIPS:                                #   fixup A - offset: 0, value: ($tmp0)@GOT, kind: fixup_MICROMIPS_GOT16
+# O32-MICROMIPS: addiu $25, $25, %lo($tmp0)     # encoding: [0x33,0x39,A,A]
+# O32-MICROMIPS:                                #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_MICROMIPS_LO16
+
+# N32-MICROMIPS: lw  $25, %got_disp($tmp0)($gp) # encoding: [0xff,0x3c,A,A]
+# N32-MICROMIPS:                                #   fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# N64-MICROMIPS: ld  $25, %got_disp($tmp0)($gp) # encoding: [0xdf,0x99,A,A]
+# N64-MICROMIPS:                                #   fixup A - offset: 0, value: ($tmp0)@GOT_DISP, kind: fixup_MICROMIPS_GOT_DISP
+
+# NORMAL:    jalr $25      # encoding: [0x03,0x20,0xf8,0x09]
+# MICROMIPS: jalr $ra, $25 # encoding: [0x03,0xf9,0x0f,0x3c]
+# ALL:       nop           # encoding: [0x00,0x00,0x00,0x00]
diff --git a/test/MC/Mips/instalias-imm-expanding.s b/test/MC/Mips/instalias-imm-expanding.s
new file mode 100644
index 000000000000..b3667ef8bf81
--- /dev/null
+++ b/test/MC/Mips/instalias-imm-expanding.s
@@ -0,0 +1,273 @@
+# RUN: llvm-mc  %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -show-encoding | FileCheck %s
+
+
+  .text
+text_label:
+
+  add $4, -0x80000000
+# CHECK: lui    $1, 32768               # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: add    $4, $4, $1              # encoding: [0x20,0x20,0x81,0x00]
+  add $4, -0x8001
+# CHECK: lui    $1, 65535               # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori    $1, $1, 32767           # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: add    $4, $4, $1              # encoding: [0x20,0x20,0x81,0x00]
+  add $4, -0x8000
+# CHECK: addi   $4, $4, -32768          # encoding: [0x00,0x80,0x84,0x20]
+  add $4, 0
+# CHECK: addi   $4, $4, 0               # encoding: [0x00,0x00,0x84,0x20]
+  add $4, 0xFFFF
+# CHECK: ori    $1, $zero, 65535        # encoding: [0xff,0xff,0x01,0x34]
+# CHECK: add    $4, $4, $1              # encoding: [0x20,0x20,0x81,0x00]
+  add $4, 0x10000
+# CHECK: lui    $1, 1                   # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: add    $4, $4, $1              # encoding: [0x20,0x20,0x81,0x00]
+  add $4, 0xFFFFFFFF
+# CHECK: addiu  $1, $zero, -1           # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: add    $4, $4, $1              # encoding: [0x20,0x20,0x81,0x00]
+
+  add $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: add    $4, $4, $5              # encoding: [0x20,0x20,0x85,0x00]
+  add $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: add    $4, $4, $5              # encoding: [0x20,0x20,0x85,0x00]
+  add $4, $5, -0x8000
+# CHECK: addi   $4, $5, -32768          # encoding: [0x00,0x80,0xa4,0x20]
+  add $4, $5, 0
+# CHECK: addi   $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x20]
+  add $4, $5, 0xFFFF
+# CHECK: ori    $4, $zero, 65535        # encoding: [0xff,0xff,0x04,0x34]
+# CHECK: add    $4, $4, $5              # encoding: [0x20,0x20,0x85,0x00]
+  add $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: add    $4, $4, $5              # encoding: [0x20,0x20,0x85,0x00]
+  add $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: add    $4, $4, $5              # encoding: [0x20,0x20,0x85,0x00]
+
+  addu $4, -0x80000000
+# CHECK: lui    $1, 32768               # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: addu   $4, $4, $1              # encoding: [0x21,0x20,0x81,0x00]
+  addu $4, -0x8001
+# CHECK: lui    $1, 65535               # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori    $1, $1, 32767           # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: addu   $4, $4, $1              # encoding: [0x21,0x20,0x81,0x00]
+  addu $4, -0x8000
+# CHECK: addiu  $4, $4, -32768          # encoding: [0x00,0x80,0x84,0x24]
+  addu $4, 0
+# CHECK: addiu  $4, $4, 0               # encoding: [0x00,0x00,0x84,0x24]
+  addu $4, 0xFFFF
+# CHECK: ori    $1, $zero, 65535        # encoding: [0xff,0xff,0x01,0x34]
+# CHECK: addu   $4, $4, $1              # encoding: [0x21,0x20,0x81,0x00]
+  addu $4, 0x10000
+# CHECK: lui    $1, 1                   # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addu   $4, $4, $1              # encoding: [0x21,0x20,0x81,0x00]
+  addu $4, 0xFFFFFFFF
+# CHECK: addiu  $1, $zero, -1           # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: addu   $4, $4, $1              # encoding: [0x21,0x20,0x81,0x00]
+
+  addu $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: addu   $4, $4, $5              # encoding: [0x21,0x20,0x85,0x00]
+  addu $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: addu   $4, $4, $5              # encoding: [0x21,0x20,0x85,0x00]
+  addu $4, $5, -0x8000
+# CHECK: addiu  $4, $5, -32768          # encoding: [0x00,0x80,0xa4,0x24]
+  addu $4, $5, 0
+# CHECK: addiu  $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x24]
+  addu $4, $5, 0xFFFF
+# CHECK: ori    $4, $zero, 65535        # encoding: [0xff,0xff,0x04,0x34]
+# CHECK: addu   $4, $4, $5              # encoding: [0x21,0x20,0x85,0x00]
+  addu $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: addu   $4, $4, $5              # encoding: [0x21,0x20,0x85,0x00]
+  addu $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: addu   $4, $4, $5              # encoding: [0x21,0x20,0x85,0x00]
+
+  and $4, -0x80000000
+# CHECK: lui    $1, 32768               # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: and    $4, $4, $1              # encoding: [0x24,0x20,0x81,0x00]
+  and $4, -0x8001
+# CHECK: lui    $1, 65535               # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori    $1, $1, 32767           # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: and    $4, $4, $1              # encoding: [0x24,0x20,0x81,0x00]
+  and $4, -0x8000
+# CHECK: addiu  $1, $zero, -32768       # encoding: [0x00,0x80,0x01,0x24]
+  and $4, 0
+# CHECK: andi   $4, $4, 0               # encoding: [0x00,0x00,0x84,0x30]
+  and $4, 0xFFFF
+# CHECK: andi   $4, $4, 65535           # encoding: [0xff,0xff,0x84,0x30]
+  and $4, 0x10000
+# CHECK: lui    $1, 1                   # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: and    $4, $4, $1              # encoding: [0x24,0x20,0x81,0x00]
+  and $4, 0xFFFFFFFF
+# CHECK: addiu  $1, $zero, -1           # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: and    $4, $4, $1              # encoding: [0x24,0x20,0x81,0x00]
+
+  and $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: and    $4, $4, $5              # encoding: [0x24,0x20,0x85,0x00]
+  and $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: and    $4, $4, $5              # encoding: [0x24,0x20,0x85,0x00]
+  and $4, $5, -0x8000
+# CHECK: addiu  $4, $zero, -32768       # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: and    $4, $4, $5              # encoding: [0x24,0x20,0x85,0x00]
+  and $4, $5, 0
+# CHECK: andi   $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x30]
+  and $4, $5, 0xFFFF
+# CHECK: andi   $4, $5, 65535           # encoding: [0xff,0xff,0xa4,0x30]
+  and $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: and    $4, $4, $5              # encoding: [0x24,0x20,0x85,0x00]
+  and $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: and    $4, $4, $5              # encoding: [0x24,0x20,0x85,0x00]
+
+  nor $4, $5, 0
+# CHECK: addiu  $4, $zero, 0            # encoding: [0x00,0x00,0x04,0x24]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+  nor $4, $5, 1
+# CHECK: addiu  $4, $zero, 1            # encoding: [0x01,0x00,0x04,0x24]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+  nor $4, $5, 0x8000
+# CHECK: ori    $4, $zero, 32768        # encoding: [0x00,0x80,0x04,0x34]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+  nor $4, $5, -0x8000
+# CHECK: addiu  $4, $zero, -32768       # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+  nor $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+  nor $4, $5, 0x1a5a5
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: ori    $4, $4, 42405           # encoding: [0xa5,0xa5,0x84,0x34]
+# CHECK: nor    $4, $4, $5              # encoding: [0x27,0x20,0x85,0x00]
+
+  or $4, -0x80000000
+# CHECK: lui    $1, 32768               # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: or     $4, $4, $1              # encoding: [0x25,0x20,0x81,0x00]
+  or $4, -0x8001
+# CHECK: lui    $1, 65535               # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori    $1, $1, 32767           # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: or     $4, $4, $1              # encoding: [0x25,0x20,0x81,0x00]
+  or $4, -0x8000
+# CHECK: addiu  $1, $zero, -32768       # encoding: [0x00,0x80,0x01,0x24]
+# CHECK: or     $4, $4, $1              # encoding: [0x25,0x20,0x81,0x00]
+  or $4, 0
+# CHECK: ori    $4, $4, 0               # encoding: [0x00,0x00,0x84,0x34]
+  or $4, 0xFFFF
+# CHECK: ori    $4, $4, 65535           # encoding: [0xff,0xff,0x84,0x34]
+  or $4, 0x10000
+# CHECK: lui    $1, 1                   # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: or     $4, $4, $1              # encoding: [0x25,0x20,0x81,0x00]
+  or $4, 0xFFFFFFFF
+# CHECK: addiu  $1, $zero, -1           # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: or     $4, $4, $1              # encoding: [0x25,0x20,0x81,0x00]
+
+  or $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: or     $4, $4, $5              # encoding: [0x25,0x20,0x85,0x00]
+  or $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: or     $4, $4, $5              # encoding: [0x25,0x20,0x85,0x00]
+  or $4, $5, -0x8000
+# CHECK: addiu  $4, $zero, -32768       # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: or     $4, $4, $5              # encoding: [0x25,0x20,0x85,0x00]
+  or $4, $5, 0
+# CHECK: ori    $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x34]
+  or $4, $5, 0xFFFF
+# CHECK: ori    $4, $5, 65535           # encoding: [0xff,0xff,0xa4,0x34]
+  or $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: or     $4, $4, $5              # encoding: [0x25,0x20,0x85,0x00]
+  or $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: or     $4, $4, $5              # encoding: [0x25,0x20,0x85,0x00]
+
+  slt $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: slt    $4, $4, $5              # encoding: [0x2a,0x20,0x85,0x00]
+  slt $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: slt    $4, $4, $5              # encoding: [0x2a,0x20,0x85,0x00]
+  slt $4, $5, -0x8000
+# CHECK: slti   $4, $5, -32768          # encoding: [0x00,0x80,0xa4,0x28]
+  slt $4, $5, 0
+# CHECK: slti   $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x28]
+  slt $4, $5, 0xFFFF
+# CHECK: ori    $4, $zero, 65535        # encoding: [0xff,0xff,0x04,0x34]
+  slt $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: slt    $4, $4, $5              # encoding: [0x2a,0x20,0x85,0x00]
+  slt $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: slt    $4, $4, $5              # encoding: [0x2a,0x20,0x85,0x00]
+
+  sltu $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: sltu   $4, $4, $5              # encoding: [0x2b,0x20,0x85,0x00]
+  sltu $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: sltu   $4, $4, $5              # encoding: [0x2b,0x20,0x85,0x00]
+  sltu $4, $5, -0x8000
+# CHECK: sltiu  $4, $5, -32768          # encoding: [0x00,0x80,0xa4,0x2c]
+  sltu $4, $5, 0
+# CHECK: sltiu  $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x2c]
+  sltu $4, $5, 0xFFFF
+# CHECK: ori    $4, $zero, 65535        # encoding: [0xff,0xff,0x04,0x34]
+  sltu $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: sltu   $4, $4, $5              # encoding: [0x2b,0x20,0x85,0x00]
+  sltu $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: sltu   $4, $4, $5              # encoding: [0x2b,0x20,0x85,0x00]
+
+  xor $4, -0x80000000
+# CHECK: lui    $1, 32768               # encoding: [0x00,0x80,0x01,0x3c]
+# CHECK: xor    $4, $4, $1              # encoding: [0x26,0x20,0x81,0x00]
+  xor $4, -0x8001
+# CHECK: lui    $1, 65535               # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK: ori    $1, $1, 32767           # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK: xor    $4, $4, $1              # encoding: [0x26,0x20,0x81,0x00]
+  xor $4, -0x8000
+# CHECK: addiu  $1, $zero, -32768       # encoding: [0x00,0x80,0x01,0x24]
+  xor $4, 0
+# CHECK: xori   $4, $4, 0               # encoding: [0x00,0x00,0x84,0x38]
+  xor $4, 0xFFFF
+# CHECK: xori   $4, $4, 65535           # encoding: [0xff,0xff,0x84,0x38]
+  xor $4, 0x10000
+# CHECK: lui    $1, 1                   # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: xor    $4, $4, $1              # encoding: [0x26,0x20,0x81,0x00]
+  xor $4, 0xFFFFFFFF
+# CHECK: addiu  $1, $zero, -1           # encoding: [0xff,0xff,0x01,0x24]
+# CHECK: xor    $4, $4, $1              # encoding: [0x26,0x20,0x81,0x00]
+
+  xor $4, $5, -0x80000000
+# CHECK: lui    $4, 32768               # encoding: [0x00,0x80,0x04,0x3c]
+# CHECK: xor    $4, $4, $5              # encoding: [0x26,0x20,0x85,0x00]
+  xor $4, $5, -0x8001
+# CHECK: lui    $4, 65535               # encoding: [0xff,0xff,0x04,0x3c]
+# CHECK: ori    $4, $4, 32767           # encoding: [0xff,0x7f,0x84,0x34]
+# CHECK: xor    $4, $4, $5              # encoding: [0x26,0x20,0x85,0x00]
+  xor $4, $5, -0x8000
+# CHECK: addiu  $4, $zero, -32768       # encoding: [0x00,0x80,0x04,0x24]
+# CHECK: xor $4, $4, $5                 # encoding: [0x26,0x20,0x85,0x00]
+  xor $4, $5, 0
+# CHECK: xori   $4, $5, 0               # encoding: [0x00,0x00,0xa4,0x38]
+  xor $4, $5, 0xFFFF
+# CHECK: xori   $4, $5, 65535           # encoding: [0xff,0xff,0xa4,0x38]
+  xor $4, $5, 0x10000
+# CHECK: lui    $4, 1                   # encoding: [0x01,0x00,0x04,0x3c]
+# CHECK: xor    $4, $4, $5              # encoding: [0x26,0x20,0x85,0x00]
+  xor $4, $5, 0xFFFFFFFF
+# CHECK: addiu  $4, $zero, -1           # encoding: [0xff,0xff,0x04,0x24]
+# CHECK: xor    $4, $4, $5              # encoding: [0x26,0x20,0x85,0x00]
diff --git a/test/MC/Mips/macro-bcc-imm-bad.s b/test/MC/Mips/macro-bcc-imm-bad.s
new file mode 100644
index 000000000000..bcf08bdc34d5
--- /dev/null
+++ b/test/MC/Mips/macro-bcc-imm-bad.s
@@ -0,0 +1,12 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN:     FileCheck %s --check-prefix=ALL
+
+    .text
+    .set noat
+foo:
+    blt $a2, 16, foo # ALL: :[[@LINE]]:5: error: pseudo-instruction requires $at, which is not available
+    .set at
+    .set noreorder
+    .set nomacro
+    blt $a2, 16, foo # ALL: :[[@LINE]]:5: warning: macro instruction expanded into multiple instructions
+                     # ALL-NOT: :[[@LINE-1]]:5: warning: macro instruction expanded into multiple instructions
diff --git a/test/MC/Mips/macro-bcc-imm.s b/test/MC/Mips/macro-bcc-imm.s
new file mode 100644
index 000000000000..fbc4662d6833
--- /dev/null
+++ b/test/MC/Mips/macro-bcc-imm.s
@@ -0,0 +1,69 @@
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -show-encoding 2>&1 | \
+# RUN:     FileCheck %s --check-prefix=ALL
+
+    .text
+foo:                      # ALL-LABEL: foo:
+    blt $a2, 16, foo      # ALL: addiu $1, $zero, 16
+                          # ALL: slt   $1, $6, $1
+                          # ALL: bnez  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    ble $a2, 16, foo      # ALL: addiu $1, $zero, 16
+                          # ALL: slt   $1, $1, $6
+                          # ALL: beqz  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bge $a2, 32767, foo   # ALL: addiu $1, $zero, 32767
+                          # ALL: slt   $1, $6, $1
+                          # ALL: beqz  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgt $a2, 32768, foo   # ALL: ori   $1, $zero, 32768
+                          # ALL: slt   $1, $1, $6
+                          # ALL: bnez  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bltu $a2, 16, foo     # ALL: addiu $1, $zero, 16
+                          # ALL: sltu  $1, $6, $1
+                          # ALL: bnez  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bleu $a2, 16, foo     # ALL: addiu $1, $zero, 16
+                          # ALL: sltu  $1, $1, $6
+                          # ALL: beqz  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgeu $a2, 32767, foo  # ALL: addiu $1, $zero, 32767
+                          # ALL: sltu  $1, $6, $1
+                          # ALL: beqz  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgtu $a2, 32768, foo  # ALL: ori   $1, $zero, 32768
+                          # ALL: sltu  $1, $1, $6
+                          # ALL: bnez  $1, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bltl $a2, 16, foo     # ALL: addiu $1, $zero, 16
+                          # ALL: slt   $1, $6, $1
+                          # ALL: bnel  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    blel $a2, 16, foo     # ALL: addiu $1, $zero, 16
+                          # ALL: slt   $1, $1, $6
+                          # ALL: beql  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgel $a2, 32767, foo  # ALL: addiu $1, $zero, 32767
+                          # ALL: slt   $1, $6, $1
+                          # ALL: beql  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgtl $a2, 32768, foo  # ALL: ori   $1, $zero, 32768
+                          # ALL: slt   $1, $1, $6
+                          # ALL: bnel  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bltul $a2, 16, foo    # ALL: addiu $1, $zero, 16
+                          # ALL: sltu  $1, $6, $1
+                          # ALL: bnel  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bleul $a2, 16, foo    # ALL: addiu $1, $zero, 16
+                          # ALL: sltu  $1, $1, $6
+                          # ALL: beql  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgeul $a2, 32767, foo # ALL: addiu $1, $zero, 32767
+                          # ALL: sltu  $1, $6, $1
+                          # ALL: beql  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
+    bgtul $a2, 65536, foo # ALL: lui   $1, 1
+                          # ALL: sltu  $1, $1, $6
+                          # ALL: bnel  $1, $zero, foo
+                          # ALL: # fixup A - offset: 0, value: foo-4, kind: fixup_Mips_PC16
diff --git a/test/MC/Mips/macro-ddiv-bad.s b/test/MC/Mips/macro-ddiv-bad.s
new file mode 100644
index 000000000000..350a0fbaeda6
--- /dev/null
+++ b/test/MC/Mips/macro-ddiv-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+  .text
+  ddiv $25, $11
+  # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+  ddiv $25, $0
+  # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+  ddiv $0,$0
+  # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-ddiv.s b/test/MC/Mips/macro-ddiv.s
new file mode 100644
index 000000000000..99bc5450d015
--- /dev/null
+++ b/test/MC/Mips/macro-ddiv.s
@@ -0,0 +1,85 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN:   FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN:  -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+  ddiv $25, $11
+# CHECK-NOTRAP: bne $11, $zero, 8         # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $25, $11      # encoding: [0x03,0x2b,0x00,0x1e]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1       # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $11, $1, 20           # encoding: [0x15,0x61,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1        # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31         # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $25, $1, 8            # encoding: [0x17,0x21,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6                   # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddiv $24,$12
+# CHECK-NOTRAP: bne $12, $zero, 8         # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $24, $12      # encoding: [0x03,0x0c,0x00,0x1e]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1       # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $12, $1, 20           # encoding: [0x15,0x81,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1        # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31         # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $24, $1, 8            # encoding: [0x17,0x01,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6                   # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24                  # encoding: [0x00,0x00,0xc0,0x12]
+
+  ddiv $25,$0
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+
+  ddiv $0,$9
+# CHECK-NOTRAP: bne $9, $zero, 8          # encoding: [0x15,0x20,0x00,0x02]
+# CHECK-NOTRAP: ddiv $zero, $zero, $9     # encoding: [0x00,0x09,0x00,0x1e]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1       # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $9, $1, 20            # encoding: [0x15,0x21,0x00,0x05]
+# CHECK-NOTRAP: addiu $1, $zero, 1        # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-NOTRAP: dsll32 $1, $1, 31         # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-NOTRAP: bne $zero, $1, 8          # encoding: [0x14,0x01,0x00,0x02]
+# CHECK-NOTRAP: sll $zero, $zero, 0       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6                   # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero                # encoding: [0x00,0x00,0x00,0x12]
+
+  ddiv $0,$0
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+
+  ddiv $25,$11
+# CHECK-TRAP: teq $11, $zero, 7           # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $25, $11        # encoding: [0x03,0x2b,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1         # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $11, $1, 12             # encoding: [0x15,0x61,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1          # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31           # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $25, $1, 6              # encoding: [0x03,0x21,0x01,0xb4]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddiv $24,$12
+# CHECK-TRAP: teq $12, $zero, 7           # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $24, $12        # encoding: [0x03,0x0c,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1         # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $12, $1, 12             # encoding: [0x15,0x81,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1          # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31           # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $24, $1, 6              # encoding: [0x03,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $24                    # encoding: [0x00,0x00,0xc0,0x12]
+
+  ddiv $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
+
+  ddiv $0,$9
+# CHECK-TRAP: teq $9, $zero, 7            # encoding: [0x01,0x20,0x01,0xf4]
+# CHECK-TRAP: ddiv $zero, $zero, $9       # encoding: [0x00,0x09,0x00,0x1e]
+# CHECK-TRAP: addiu $1, $zero, -1         # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $9, $1, 12              # encoding: [0x15,0x21,0x00,0x03]
+# CHECK-TRAP: addiu $1, $zero, 1          # encoding: [0x24,0x01,0x00,0x01]
+# CHECK-TRAP: dsll32 $1, $1, 31           # encoding: [0x00,0x01,0x0f,0xfc]
+# CHECK-TRAP: teq $zero, $1, 6            # encoding: [0x00,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $zero                  # encoding: [0x00,0x00,0x00,0x12]
+
+  ddiv $0,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
diff --git a/test/MC/Mips/macro-ddivu-bad.s b/test/MC/Mips/macro-ddivu-bad.s
new file mode 100644
index 000000000000..7a6c7e0bd528
--- /dev/null
+++ b/test/MC/Mips/macro-ddivu-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+  .text
+  ddivu $25, $11
+  # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+  ddivu $25, $0
+  # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+  ddivu $0,$0
+  # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-ddivu.s b/test/MC/Mips/macro-ddivu.s
new file mode 100644
index 000000000000..72d923802785
--- /dev/null
+++ b/test/MC/Mips/macro-ddivu.s
@@ -0,0 +1,59 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN:   FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN:  -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+  ddivu $25,$11
+# CHECK-NOTRAP: bne $11, $zero, 8         # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $25, $11     # encoding: [0x03,0x2b,0x00,0x1f]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddivu $24,$12
+# CHECK-NOTRAP: bne $12, $zero, 8         # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $24, $12     # encoding: [0x03,0x0c,0x00,0x1f]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24                  # encoding: [0x00,0x00,0xc0,0x12]
+
+  ddivu $25,$0
+# CHECK-NOTRAP: bne $zero, $zero, 8       # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $25, $zero   # encoding: [0x03,0x20,0x00,0x1f]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddivu $0,$9
+# CHECK-NOTRAP: bne $9, $zero, 8          # encoding: [0x15,0x20,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $zero, $9    # encoding: [0x00,0x09,0x00,0x1f]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero                # encoding: [0x00,0x00,0x00,0x12]
+
+  ddivu $0,$0
+# CHECK-NOTRAP: bne $zero, $zero, 8       # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: ddivu $zero, $zero, $zero # encoding: [0x00,0x00,0x00,0x1f]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $zero                # encoding: [0x00,0x00,0x00,0x12]
+
+  ddivu $25, $11
+# CHECK-TRAP: teq $11, $zero, 7           # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $25, $11       # encoding: [0x03,0x2b,0x00,0x1f]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddivu $24,$12
+# CHECK-TRAP: teq $12, $zero, 7           # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $24, $12       # encoding: [0x03,0x0c,0x00,0x1f]
+# CHECK-TRAP: mflo $24                    # encoding: [0x00,0x00,0xc0,0x12]
+
+  ddivu $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $25, $zero     # encoding: [0x03,0x20,0x00,0x1f]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  ddivu $0,$9
+# CHECK-TRAP: teq $9, $zero, 7            # encoding: [0x01,0x20,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $zero, $9      # encoding: [0x00,0x09,0x00,0x1f]
+# CHECK-TRAP: mflo $zero                  # encoding: [0x00,0x00,0x00,0x12]
+
+  ddivu $0,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: ddivu $zero, $zero, $zero   # encoding: [0x00,0x00,0x00,0x1f]
+# CHECK-TRAP: mflo $zero                  # encoding: [0x00,0x00,0x00,0x12]
diff --git a/test/MC/Mips/macro-div-bad.s b/test/MC/Mips/macro-div-bad.s
new file mode 100644
index 000000000000..086e8b441885
--- /dev/null
+++ b/test/MC/Mips/macro-div-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+  .text
+  div $25, $11
+  # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+  div $25, $0
+  # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+  div $0,$0
+  # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-div.s b/test/MC/Mips/macro-div.s
new file mode 100644
index 000000000000..9efd6e19db15
--- /dev/null
+++ b/test/MC/Mips/macro-div.s
@@ -0,0 +1,64 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN:   FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN:  -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+  div $25,$11
+# CHECK-NOTRAP: bnez $11, 8               # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: div $zero, $25, $11       # encoding: [0x03,0x2b,0x00,0x1a]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1       # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $11, $1, 16           # encoding: [0x15,0x61,0x00,0x04]
+# CHECK-NOTRAP: lui $1, 32768             # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-NOTRAP: bne $25, $1, 8            # encoding: [0x17,0x21,0x00,0x02]
+# CHECK-NOTRAP: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6                   # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  div $24,$12
+# CHECK-NOTRAP: bnez $12, 8               # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: div $zero, $24, $12       # encoding: [0x03,0x0c,0x00,0x1a]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: addiu $1, $zero, -1       # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-NOTRAP: bne $12, $1, 16           # encoding: [0x15,0x81,0x00,0x04]
+# CHECK-NOTRAP: lui $1, 32768             # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-NOTRAP: bne $24, $1, 8            # encoding: [0x17,0x01,0x00,0x02]
+# CHECK-NOTRAP: nop                       # encoding: [0x00,0x00,0x00,0x00]
+# CHECK-NOTRAP: break 6                   # encoding: [0x00,0x06,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24                  # encoding: [0x00,0x00,0xc0,0x12]
+
+  div $25,$0
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+
+  div $0,$9
+# CHECK-NOTRAP: div $zero, $zero, $9      # encoding: [0x00,0x09,0x00,0x1a]
+
+  div $0,$0
+# CHECK-NOTRAP: div $zero, $zero, $zero   # encoding: [0x00,0x00,0x00,0x1a]
+
+  div $25, $11
+# CHECK-TRAP: teq $11, $zero, 7           # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: div $zero, $25, $11         # encoding: [0x03,0x2b,0x00,0x1a]
+# CHECK-TRAP: addiu $1, $zero, -1         # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $11, $1, 8              # encoding: [0x15,0x61,0x00,0x02]
+# CHECK-TRAP: lui $1, 32768               # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-TRAP: teq $25, $1, 6              # encoding: [0x03,0x21,0x01,0xb4]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  div $24,$12
+# CHECK-TRAP: teq $12, $zero, 7           # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: div $zero, $24, $12         # encoding: [0x03,0x0c,0x00,0x1a]
+# CHECK-TRAP: addiu $1, $zero, -1         # encoding: [0x24,0x01,0xff,0xff]
+# CHECK-TRAP: bne $12, $1, 8              # encoding: [0x15,0x81,0x00,0x02]
+# CHECK-TRAP: lui $1, 32768               # encoding: [0x3c,0x01,0x80,0x00]
+# CHECK-TRAP: teq $24, $1, 6              # encoding: [0x03,0x01,0x01,0xb4]
+# CHECK-TRAP: mflo $24                    # encoding: [0x00,0x00,0xc0,0x12]
+
+  div $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
+
+  div $0,$9
+# CHECK-TRAP: div $zero, $zero, $9        # encoding: [0x00,0x09,0x00,0x1a]
+
+  div $0,$0
+# CHECK-TRAP: div $zero, $zero, $zero     # encoding: [0x00,0x00,0x00,0x1a]
diff --git a/test/MC/Mips/macro-divu-bad.s b/test/MC/Mips/macro-divu-bad.s
new file mode 100644
index 000000000000..45cef1f81820
--- /dev/null
+++ b/test/MC/Mips/macro-divu-bad.s
@@ -0,0 +1,18 @@
+# RUN: not llvm-mc %s -arch=mips -mcpu=mips32r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64r6 2>&1 | \
+# RUN: FileCheck %s --check-prefix=R6
+# RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+# RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 2>&1 | \
+# RUN: FileCheck %s --check-prefix=NOT-R6
+
+  .text
+  divu $25, $11
+  # R6: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+
+  divu $25, $0
+  # NOT-R6: :[[@LINE-1]]:3: warning: division by zero
+
+  divu $0,$0
+  # NOT-R6: :[[@LINE-1]]:3: warning: dividing zero by zero
diff --git a/test/MC/Mips/macro-divu.s b/test/MC/Mips/macro-divu.s
new file mode 100644
index 000000000000..95630d34bd16
--- /dev/null
+++ b/test/MC/Mips/macro-divu.s
@@ -0,0 +1,49 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
+# RUN:   FileCheck %s --check-prefix=CHECK-NOTRAP
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN:  -mattr=+use-tcc-in-div | FileCheck %s --check-prefix=CHECK-TRAP
+
+  divu $25,$11
+# CHECK-NOTRAP: bnez $11, 8               # encoding: [0x15,0x60,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $25, $11      # encoding: [0x03,0x2b,0x00,0x1b]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  divu $24,$12
+# CHECK-NOTRAP: bnez $12, 8               # encoding: [0x15,0x80,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $24, $12      # encoding: [0x03,0x0c,0x00,0x1b]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $24                  # encoding: [0x00,0x00,0xc0,0x12]
+
+  divu $25,$0
+# CHECK-NOTRAP: bnez $zero, 8             # encoding: [0x14,0x00,0x00,0x02]
+# CHECK-NOTRAP: divu $zero, $25, $zero    # encoding: [0x03,0x20,0x00,0x1b]
+# CHECK-NOTRAP: break 7                   # encoding: [0x00,0x07,0x00,0x0d]
+# CHECK-NOTRAP: mflo $25                  # encoding: [0x00,0x00,0xc8,0x12]
+
+  divu $0,$9
+# CHECK-NOTRAP: divu $zero, $zero, $9     # encoding: [0x00,0x09,0x00,0x1b]
+
+  divu $0,$0
+# CHECK-NOTRAP: divu $zero, $zero, $zero  # encoding: [0x00,0x00,0x00,0x1b]
+
+  divu $25, $11
+# CHECK-TRAP: teq	$11, $zero, 7           # encoding: [0x01,0x60,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $25, $11        # encoding: [0x03,0x2b,0x00,0x1b]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  divu $24,$12
+# CHECK-TRAP: teq $12, $zero, 7           # encoding: [0x01,0x80,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $24, $12        # encoding: [0x03,0x0c,0x00,0x1b]
+# CHECK-TRAP: mflo $24                    # encoding: [0x00,0x00,0xc0,0x12]
+
+  divu $25,$0
+# CHECK-TRAP: teq $zero, $zero, 7         # encoding: [0x00,0x00,0x01,0xf4]
+# CHECK-TRAP: divu $zero, $25, $zero      # encoding: [0x03,0x20,0x00,0x1b]
+# CHECK-TRAP: mflo $25                    # encoding: [0x00,0x00,0xc8,0x12]
+
+  divu $0,$9
+# CHECK-TRAP: divu $zero, $zero, $9       # encoding: [0x00,0x09,0x00,0x1b]
+
+  divu $0,$0
+# CHECK-TRAP: divu $zero, $zero, $zero    # encoding: [0x00,0x00,0x00,0x1b]
diff --git a/test/MC/Mips/macro-dla.s b/test/MC/Mips/macro-dla.s
new file mode 100644
index 000000000000..e3b558e9e514
--- /dev/null
+++ b/test/MC/Mips/macro-dla.s
@@ -0,0 +1,707 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN:   FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN:   FileCheck %s
+
+bits_32_to_47_0x0000:           # CHECK-LABEL: bits_32_to_47_0x0000:
+dla $5, 0x0000000000000001      # CHECK: daddiu $5, $zero, 1         # encoding: [0x64,0x05,0x00,0x01]
+dla $5, 0x0000000000000002      # CHECK: daddiu $5, $zero, 2         # encoding: [0x64,0x05,0x00,0x02]
+dla $5, 0x0000000000004000      # CHECK: daddiu $5, $zero, 16384     # encoding: [0x64,0x05,0x40,0x00]
+dla $5, 0x0000000000008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+dla $5, 0x00000000ffff8000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000ffffc000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 49152           # encoding: [0x34,0xa5,0xc0,0x00]
+dla $5, 0x00000000fffffffe      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 65534           # encoding: [0x34,0xa5,0xff,0xfe]
+dla $5, 0x00000000ffffffff      # CHECK: lui $5, 65535               # encoding: [0x3c,0x05,0xff,0xff]
+                                # CHECK: dsrl32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3e]
+dla $5, 0x0000000000010000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+dla $5, 0x0000000000020000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+dla $5, 0x0000000040000000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+dla $5, 0x0000000080000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000c0000000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000fffe0000      # CHECK: ori $5, $zero, 65534        # encoding: [0x34,0x05,0xff,0xfe]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x00000000ffff0000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000000010001      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000000020001      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000040000001      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000080000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000000010002      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000000020002      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000040000002      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000080000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000000014000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000000024000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000040004000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000080004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000000018000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000000028000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000040008000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000080008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000c0008000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000fffe8000      # CHECK: ori $5, $zero, 65534        # encoding: [0x34,0x05,0xff,0xfe]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x00000000ffff8000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0001:           # CHECK-LABEL: bits_32_to_47_0x0001:
+dla $5, 0x0000000100000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000100000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000100010000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000100010001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100010002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100014000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100018000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000100020000      # CHECK: ori $5, $zero, 32769        # encoding: [0x34,0x05,0x80,0x01]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000100020001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000100020002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000100024000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000100028000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000140000000      # CHECK: ori $5, $zero, 40960        # encoding: [0x34,0x05,0xa0,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000140000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000140000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000140004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000140008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000180000000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dla $5, 0x0000000180000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000180000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000180004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000180008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0002:           # CHECK-LABEL: bits_32_to_47_0x0002:
+dla $5, 0x0000000200000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000200000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000200010000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000200010001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200010002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200014000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200018000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000200020000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000000200020001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000200020002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000200024000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000200028000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000240000000      # CHECK: ori $5, $zero, 36864        # encoding: [0x34,0x05,0x90,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000240000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000240000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000240004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000240008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000000280000000      # CHECK: ori $5, $zero, 40960        # encoding: [0x34,0x05,0xa0,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dla $5, 0x0000000280000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000000280000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000000280004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000000280008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x4000:           # CHECK-LABEL: bits_32_to_47_0x4000:
+dla $5, 0x0000400000000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 31             # encoding: [0x00,0x05,0x2f,0xf8]
+dla $5, 0x0000400000000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400000010000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400000010001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000010002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000014000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000018000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400000020000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400000020001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400000020002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400000024000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400000028000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400040000000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000400040000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400040000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400040004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400040008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000400080000000      # CHECK: ori $5, $zero, 32769        # encoding: [0x34,0x05,0x80,0x01]
+                                # CHECK: dsll $5, $5, 31             # encoding: [0x00,0x05,0x2f,0xf8]
+dla $5, 0x0000400080000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000400080000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000400080004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000400080008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x8000:           # CHECK-LABEL: bits_32_to_47_0x8000:
+dla $5, 0x0000800000000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 32             # encoding: [0x00,0x05,0x28,0x3c]
+dla $5, 0x0000800000000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800000010000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800000010001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000010002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000014000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000018000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800000020000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800000020001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800000020002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800000024000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800000028000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800040000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800040000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800040000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800040004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800040008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0000800080000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dla $5, 0x0000800080000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dla $5, 0x0000800080000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dla $5, 0x0000800080004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dla $5, 0x0000800080008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+# Only test a few with bits 48-63 non-zero. It just adds an lui to the cases we've already done.
+dla $5, 0x0001800080008000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x0002800080008000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x4000800080008000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dla $5, 0x8000800080008000      # CHECK: lui $5, 32768               # encoding: [0x3c,0x05,0x80,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+
+dla $5, 0x000000000($6)         # CHECK: daddiu $5, $6, 0            # encoding: [0x64,0xc5,0x00,0x00]
+dla $5, 0x00000001($6)          # CHECK: daddiu $5, $6, 1            # encoding: [0x64,0xc5,0x00,0x01]
+dla $5, 0x00000002($6)          # CHECK: daddiu $5, $6, 2            # encoding: [0x64,0xc5,0x00,0x02]
+dla $5, 0x00004000($6)          # CHECK: daddiu $5, $6, 16384        # encoding: [0x64,0xc5,0x40,0x00]
+dla $5, 0x00008000($6)          # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: daddu $5, $5, $6            # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffffffff($6)          # CHECK: lui     $5, 65535           # encoding: [0x3c,0x05,0xff,0xff]
+                                # CHECK: dsrl32  $5, $5, 0           # encoding: [0x00,0x05,0x28,0x3e]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xfffffffe($6)          # CHECK: ori     $5, $zero, 65535    # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori     $5, $5, 65534       # encoding: [0x34,0xa5,0xff,0xfe]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffffc000($6)          # CHECK: ori     $5, $zero, 65535    # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori     $5, $5, 49152       # encoding: [0x34,0xa5,0xc0,0x00]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffff8000($6)          # CHECK: ori     $5, $zero, 65535    # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori     $5, $5, 32768       # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+
+dla $5, 0x00010000($6)          # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: daddu $5, $5, $6            # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x00020000($6)          # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: daddu $5, $5, $6            # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x40000000($6)          # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: daddu $5, $5, $6            # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x80000000($6)          # CHECK: ori     $5, $zero, 32768    # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xffff0000($6)          # CHECK: ori     $5, $zero, 65535    # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xfffe0000($6)          # CHECK: ori     $5, $zero, 65534    # encoding: [0x34,0x05,0xff,0xfe]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0xc0000000($6)          # CHECK: ori     $5, $zero, 49152    # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+dla $5, 0x80000000($6)          # CHECK: ori     $5, $zero, 32768    # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll    $5, $5, 16          # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: daddu   $5, $5, $6          # encoding: [0x00,0xa6,0x28,0x2d]
+
+dla $5, 0x00010001($6)          # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: daddu $5, $5, $6            # encoding: [0x00,0xa6,0x28,0x2d]
+# There are no further interesting immediates.
+
+dla $6, 0x00000001($6)          # CHECK: daddiu $6, $6, 1            # encoding: [0x64,0xc6,0x00,0x01]
+dla $6, 0x00000002($6)          # CHECK: daddiu $6, $6, 2            # encoding: [0x64,0xc6,0x00,0x02]
+dla $6, 0x00004000($6)          # CHECK: daddiu $6, $6, 16384        # encoding: [0x64,0xc6,0x40,0x00]
+dla $6, 0x00008000($6)          # CHECK: ori $1, $zero, 32768        # encoding: [0x34,0x01,0x80,0x00]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffffffff($6)          # CHECK: lui     $1, 65535           # encoding: [0x3c,0x01,0xff,0xff]
+                                # CHECK: dsrl32  $1, $1, 0           # encoding: [0x00,0x01,0x08,0x3e]
+                                # CHECK: daddu   $6, $1, $6          # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xfffffffe($6)          # CHECK: ori     $1, $zero, 65535    # encoding: [0x34,0x01,0xff,0xff]
+                                # CHECK: dsll    $1, $1, 16          # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: ori     $1, $1, 65534       # encoding: [0x34,0x21,0xff,0xfe]
+                                # CHECK: daddu   $6, $1, $6          # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffffc000($6)          # CHECK: ori     $1, $zero, 65535    # encoding: [0x34,0x01,0xff,0xff]
+                                # CHECK: dsll    $1, $1, 16          # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: ori     $1, $1, 49152       # encoding: [0x34,0x21,0xc0,0x00]
+                                # CHECK: daddu   $6, $1, $6          # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffff8000($6)          # CHECK: ori     $1, $zero, 65535    # encoding: [0x34,0x01,0xff,0xff]
+                                # CHECK: dsll    $1, $1, 16          # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: ori     $1, $1, 32768       # encoding: [0x34,0x21,0x80,0x00]
+                                # CHECK: daddu   $6, $1, $6          # encoding: [0x00,0x26,0x30,0x2d]
+
+dla $6, 0x00010000($6)          # CHECK: lui $1, 1                   # encoding: [0x3c,0x01,0x00,0x01]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x00020000($6)          # CHECK: lui $1, 2                   # encoding: [0x3c,0x01,0x00,0x02]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x40000000($6)          # CHECK: lui $1, 16384               # encoding: [0x3c,0x01,0x40,0x00]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x80000000($6)          # CHECK: ori   $1, $zero, 32768      # encoding: [0x34,0x01,0x80,0x00]
+                                # CHECK: dsll  $1, $1, 16            # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xffff0000($6)          # CHECK: ori   $1, $zero, 65535      # encoding: [0x34,0x01,0xff,0xff]
+                                # CHECK: dsll  $1, $1, 16            # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xfffe0000($6)          # CHECK: ori   $1, $zero, 65534      # encoding: [0x34,0x01,0xff,0xfe]
+                                # CHECK: dsll  $1, $1, 16            # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0xc0000000($6)          # CHECK: ori   $1, $zero, 49152      # encoding: [0x34,0x01,0xc0,0x00]
+                                # CHECK: dsll  $1, $1, 16            # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+dla $6, 0x80000000($6)          # CHECK: ori   $1, $zero, 32768      # encoding: [0x34,0x01,0x80,0x00]
+                                # CHECK: dsll  $1, $1, 16            # encoding: [0x00,0x01,0x0c,0x38]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+
+dla $6, 0x00010001($6)          # CHECK: lui $1, 1                   # encoding: [0x3c,0x01,0x00,0x01]
+                                # CHECK: ori $1, $1, 1               # encoding: [0x34,0x21,0x00,0x01]
+                                # CHECK: daddu $6, $1, $6            # encoding: [0x00,0x26,0x30,0x2d]
+# There are no further interesting immediates.
+
+symbol:           # CHECK-LABEL: symbol:
+.extern extern_sym
+.option pic0
+dla $5, extern_sym     # CHECK: lui $5, %highest(extern_sym)       # encoding: [0x3c,0x05,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+                       # CHECK: lui $1, %hi(extern_sym)            # encoding: [0x3c,0x01,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+                       # CHECK: daddiu $5, $5, %higher(extern_sym) # encoding: [0x64,0xa5,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+                       # CHECK: daddiu $1, $1, %lo(extern_sym)     # encoding: [0x64,0x21,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+                       # CHECK: dsll32  $5, $5, 0                  # encoding: [0x00,0x05,0x28,0x3c]
+                       # CHECK: daddu   $5, $5, $1                 # encoding: [0x00,0xa1,0x28,0x2d]
+
+dla $5, extern_sym($8) # CHECK: lui $5, %highest(extern_sym)       # encoding: [0x3c,0x05,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+                       # CHECK: lui $1, %hi(extern_sym)            # encoding: [0x3c,0x01,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+                       # CHECK: daddiu $5, $5, %higher(extern_sym) # encoding: [0x64,0xa5,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+                       # CHECK: daddiu $1, $1, %lo(extern_sym)     # encoding: [0x64,0x21,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+                       # CHECK: dsll32  $5, $5, 0                  # encoding: [0x00,0x05,0x28,0x3c]
+                       # CHECK: daddu   $5, $5, $1                 # encoding: [0x00,0xa1,0x28,0x2d]
+                       # CHECK: daddu   $5, $5, $8                 # encoding: [0x00,0xa8,0x28,0x2d]
+
+dla $5, extern_sym($5) # CHECK: lui $1, %highest(extern_sym)       # encoding: [0x3c,0x01,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %highest(extern_sym), kind: fixup_Mips_HIGHEST
+                       # CHECK: daddiu $1, $1, %higher(extern_sym) # encoding: [0x64,0x21,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %higher(extern_sym), kind: fixup_Mips_HIGHER
+                       # CHECK: dsll $1, $1, 16                    # encoding: [0x00,0x01,0x0c,0x38]
+                       # CHECK: daddiu $1, $1, %hi(extern_sym)     # encoding: [0x64,0x21,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %hi(extern_sym), kind: fixup_Mips_HI16
+                       # CHECK: dsll $1, $1, 16                    # encoding: [0x00,0x01,0x0c,0x38]
+                       # CHECK: daddiu $1, $1, %lo(extern_sym)     # encoding: [0x64,0x21,A,A]
+                       # CHECK:                                    # fixup A - offset: 0, value: %lo(extern_sym), kind: fixup_Mips_LO16
+                       # CHECK: daddu   $5, $1, $5                 # encoding: [0x00,0x25,0x28,0x2d]
+
+dla $5, extern_sym+8     # CHECK: lui $5, %highest(extern_sym+8)       # encoding: [0x3c,0x05,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+                         # CHECK: lui $1, %hi(extern_sym+8)            # encoding: [0x3c,0x01,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+                         # CHECK: daddiu $5, $5, %higher(extern_sym+8) # encoding: [0x64,0xa5,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+                         # CHECK: daddiu $1, $1, %lo(extern_sym+8)     # encoding: [0x64,0x21,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+                         # CHECK: dsll32  $5, $5, 0                    # encoding: [0x00,0x05,0x28,0x3c]
+                         # CHECK: daddu   $5, $5, $1                   # encoding: [0x00,0xa1,0x28,0x2d]
+
+dla $5, extern_sym+8($8) # CHECK: lui $5, %highest(extern_sym+8)       # encoding: [0x3c,0x05,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+                         # CHECK: lui $1, %hi(extern_sym+8)            # encoding: [0x3c,0x01,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+                         # CHECK: daddiu $5, $5, %higher(extern_sym+8) # encoding: [0x64,0xa5,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+                         # CHECK: daddiu $1, $1, %lo(extern_sym+8)     # encoding: [0x64,0x21,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+                         # CHECK: dsll32  $5, $5, 0                    # encoding: [0x00,0x05,0x28,0x3c]
+                         # CHECK: daddu   $5, $5, $1                   # encoding: [0x00,0xa1,0x28,0x2d]
+                         # CHECK: daddu   $5, $5, $8                   # encoding: [0x00,0xa8,0x28,0x2d]
+
+dla $5, extern_sym+8($5) # CHECK: lui $1, %highest(extern_sym+8)       # encoding: [0x3c,0x01,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %highest(extern_sym+8), kind: fixup_Mips_HIGHEST
+                         # CHECK: daddiu $1, $1, %higher(extern_sym+8) # encoding: [0x64,0x21,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %higher(extern_sym+8), kind: fixup_Mips_HIGHER
+                         # CHECK: dsll $1, $1, 16                      # encoding: [0x00,0x01,0x0c,0x38]
+                         # CHECK: daddiu $1, $1, %hi(extern_sym+8)     # encoding: [0x64,0x21,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %hi(extern_sym+8), kind: fixup_Mips_HI16
+                         # CHECK: dsll $1, $1, 16                      # encoding: [0x00,0x01,0x0c,0x38]
+                         # CHECK: daddiu $1, $1, %lo(extern_sym+8)     # encoding: [0x64,0x21,A,A]
+                         # CHECK:                                      # fixup A - offset: 0, value: %lo(extern_sym+8), kind: fixup_Mips_LO16
+                         # CHECK: daddu   $5, $1, $5                   # encoding: [0x00,0x25,0x28,0x2d]
+
+.option pic2
+#dla $5, symbol
diff --git a/test/MC/Mips/macro-dli.s b/test/MC/Mips/macro-dli.s
new file mode 100644
index 000000000000..6faf5051e5fc
--- /dev/null
+++ b/test/MC/Mips/macro-dli.s
@@ -0,0 +1,534 @@
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN:   FileCheck %s
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN:   FileCheck %s
+
+bits_32_to_47_0x0000:           # CHECK-LABEL: bits_32_to_47_0x0000:
+dli $5, 0x0000000000000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+dli $5, 0x0000000000000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+dli $5, 0x0000000000004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+dli $5, 0x0000000000008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+dli $5, 0x00000000ffff8000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000ffffc000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 49152           # encoding: [0x34,0xa5,0xc0,0x00]
+dli $5, 0x00000000fffffffe      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 65534           # encoding: [0x34,0xa5,0xff,0xfe]
+dli $5, 0x00000000ffffffff      # CHECK: lui $5, 65535               # encoding: [0x3c,0x05,0xff,0xff]
+                                # CHECK: dsrl32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3e]
+dli $5, 0x0000000000010000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+dli $5, 0x0000000000020000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+dli $5, 0x0000000040000000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+dli $5, 0x0000000080000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000c0000000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000fffe0000      # CHECK: ori $5, $zero, 65534        # encoding: [0x34,0x05,0xff,0xfe]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x00000000ffff0000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000000010001      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000000020001      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000040000001      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000080000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000000010002      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000000020002      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000040000002      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000080000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000000014000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000000024000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000040004000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000080004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000000018000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000000028000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000040008000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000080008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000c0008000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000fffe8000      # CHECK: ori $5, $zero, 65534        # encoding: [0x34,0x05,0xff,0xfe]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x00000000ffff8000      # CHECK: ori $5, $zero, 65535        # encoding: [0x34,0x05,0xff,0xff]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0001:           # CHECK-LABEL: bits_32_to_47_0x0001:
+dli $5, 0x0000000100000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000100000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000100010000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000100010001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100010002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100014000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100018000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000100020000      # CHECK: ori $5, $zero, 32769        # encoding: [0x34,0x05,0x80,0x01]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000100020001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000100020002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000100024000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000100028000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000140000000      # CHECK: ori $5, $zero, 40960        # encoding: [0x34,0x05,0xa0,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000140000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000140000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000140004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000140008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000180000000      # CHECK: ori $5, $zero, 49152        # encoding: [0x34,0x05,0xc0,0x00]
+                                # CHECK: dsll $5, $5, 17             # encoding: [0x00,0x05,0x2c,0x78]
+dli $5, 0x0000000180000001      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000180000002      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000180004000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000180008000      # CHECK: addiu $5, $zero, 1          # encoding: [0x24,0x05,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x0002:           # CHECK-LABEL: bits_32_to_47_0x0002:
+dli $5, 0x0000000200000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000200000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000200010000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000200010001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200010002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200014000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200018000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000200020000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000000200020001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000200020002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000200024000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000200028000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000240000000      # CHECK: ori $5, $zero, 36864        # encoding: [0x34,0x05,0x90,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000240000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000240000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000240004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000240008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000000280000000      # CHECK: ori $5, $zero, 40960        # encoding: [0x34,0x05,0xa0,0x00]
+                                # CHECK: dsll $5, $5, 18             # encoding: [0x00,0x05,0x2c,0xb8]
+dli $5, 0x0000000280000001      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000000280000002      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000000280004000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000000280008000      # CHECK: addiu $5, $zero, 2          # encoding: [0x24,0x05,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x4000:           # CHECK-LABEL: bits_32_to_47_0x4000:
+dli $5, 0x0000400000000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 31             # encoding: [0x00,0x05,0x2f,0xf8]
+dli $5, 0x0000400000000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400000010000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400000010001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000010002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000014000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000018000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400000020000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400000020001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400000020002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400000024000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400000028000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400040000000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000400040000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400040000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400040004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400040008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000400080000000      # CHECK: ori $5, $zero, 32769        # encoding: [0x34,0x05,0x80,0x01]
+                                # CHECK: dsll $5, $5, 31             # encoding: [0x00,0x05,0x2f,0xf8]
+dli $5, 0x0000400080000001      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000400080000002      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000400080004000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000400080008000      # CHECK: addiu $5, $zero, 16384      # encoding: [0x24,0x05,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+bits_32_to_47_0x8000:           # CHECK-LABEL: bits_32_to_47_0x8000:
+dli $5, 0x0000800000000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 32             # encoding: [0x00,0x05,0x28,0x3c]
+dli $5, 0x0000800000000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll32 $5, $5, 0            # encoding: [0x00,0x05,0x28,0x3c]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800000010000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800000010001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000010002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000014000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000018000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800000020000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800000020001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800000020002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800000024000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800000028000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800040000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800040000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800040000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800040004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800040008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0000800080000000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+dli $5, 0x0000800080000001      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 1               # encoding: [0x34,0xa5,0x00,0x01]
+dli $5, 0x0000800080000002      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 2               # encoding: [0x34,0xa5,0x00,0x02]
+dli $5, 0x0000800080004000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 16384           # encoding: [0x34,0xa5,0x40,0x00]
+dli $5, 0x0000800080008000      # CHECK: ori $5, $zero, 32768        # encoding: [0x34,0x05,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+# Only test a few with bits 48-63 non-zero. It just adds an lui to the cases we've already done.
+dli $5, 0x0001800080008000      # CHECK: lui $5, 1                   # encoding: [0x3c,0x05,0x00,0x01]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x0002800080008000      # CHECK: lui $5, 2                   # encoding: [0x3c,0x05,0x00,0x02]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x4000800080008000      # CHECK: lui $5, 16384               # encoding: [0x3c,0x05,0x40,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+dli $5, 0x8000800080008000      # CHECK: lui $5, 32768               # encoding: [0x3c,0x05,0x80,0x00]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
+                                # CHECK: dsll $5, $5, 16             # encoding: [0x00,0x05,0x2c,0x38]
+                                # CHECK: ori $5, $5, 32768           # encoding: [0x34,0xa5,0x80,0x00]
diff --git a/test/MC/Mips/macro-la-bad.s b/test/MC/Mips/macro-la-bad.s
index 89d334030ec6..2a8cf2a9d894 100644
--- a/test/MC/Mips/macro-la-bad.s
+++ b/test/MC/Mips/macro-la-bad.s
@@ -1,17 +1,23 @@
 # RUN: not llvm-mc %s -arch=mips -mcpu=mips32r2 2>%t1
-# RUN: FileCheck %s < %t1 --check-prefix=32-BIT
+# RUN: FileCheck %s < %t1 --check-prefix=O32
 # RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n32 2>&1 | \
-# RUN:   FileCheck %s --check-prefix=64-BIT --check-prefix=N32-ONLY
+# RUN:   FileCheck %s --check-prefix=N32
 # RUN: not llvm-mc %s -arch=mips64 -mcpu=mips64 -target-abi n64 2>&1 | \
-# RUN:   FileCheck %s --check-prefix=64-BIT --check-prefix=N64-ONLY
+# RUN:   FileCheck %s --check-prefix=N64
 
   .text
   la $5, 0x100000000
-  # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
-  # 64-BIT: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+  # O32: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
+  # N32: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+  # N64: :[[@LINE-3]]:3: error: la used to load 64-bit address
+
   la $5, 0x100000000($6)
-  # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
-  # 64-BIT: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+  # O32: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
+  # N32: :[[@LINE-2]]:3: error: instruction requires a 32-bit immediate
+  # N64: :[[@LINE-3]]:3: error: la used to load 64-bit address
+
+  # FIXME: These should be warnings but we lack la -> dla promotion at the
+  #        moment.
   la $5, symbol
-  # N64-ONLY: :[[@LINE-1]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
-  # N32-ONLY-NOT: :[[@LINE-2]]:3: warning: instruction loads the 32-bit address of a 64-bit symbol
+  # N32-NOT: :[[@LINE-1]]:3: error: la used to load 64-bit address
+  # N64:     :[[@LINE-2]]:3: error: la used to load 64-bit address
diff --git a/test/MC/Mips/macro-la.s b/test/MC/Mips/macro-la.s
index 8c183a7b23e4..3428cd78cb84 100644
--- a/test/MC/Mips/macro-la.s
+++ b/test/MC/Mips/macro-la.s
@@ -2,11 +2,13 @@
 # RUN:   FileCheck %s
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 | \
 # RUN:   FileCheck %s
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 | \
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r2 -target-abi=n32 | \
 # RUN:   FileCheck %s
-# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 | \
+# RUN: llvm-mc %s -triple=mips64-unknown-linux -show-encoding -mcpu=mips64r6 -target-abi=n32 | \
 # RUN:   FileCheck %s
 
+# N64 should be acceptable too but we cannot convert la to dla yet.
+
 la $5, 0x00000001 # CHECK: addiu $5, $zero, 1      # encoding: [0x24,0x05,0x00,0x01]
 la $5, 0x00000002 # CHECK: addiu $5, $zero, 2      # encoding: [0x24,0x05,0x00,0x02]
 la $5, 0x00004000 # CHECK: addiu $5, $zero, 16384  # encoding: [0x24,0x05,0x40,0x00]
@@ -243,21 +245,35 @@ la $6, 0x80008000($6) # CHECK: lui $1, 32768     # encoding: [0x3c,0x01,0x80,0x0
                       # CHECK: addu $6, $1, $6   # encoding: [0x00,0x26,0x30,0x21]
 
 la $5, symbol         # CHECK: lui $5, %hi(symbol)       # encoding: [0x3c,0x05,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+                      # CHECK:                           #   fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
                       # CHECK: addiu $5, $5, %lo(symbol) # encoding: [0x24,0xa5,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+                      # CHECK:                           #   fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
 la $5, symbol($6)     # CHECK: lui $5, %hi(symbol)       # encoding: [0x3c,0x05,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+                      # CHECK:                           #   fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
                       # CHECK: addiu $5, $5, %lo(symbol) # encoding: [0x24,0xa5,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+                      # CHECK:                           #   fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
                       # CHECK: addu $5, $5, $6           # encoding: [0x00,0xa6,0x28,0x21]
 la $6, symbol($6)     # CHECK: lui $1, %hi(symbol)       # encoding: [0x3c,0x01,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_HI, kind: fixup_Mips_HI16
+                      # CHECK:                           #   fixup A - offset: 0, value: %hi(symbol), kind: fixup_Mips_HI16
                       # CHECK: addiu $1, $1, %lo(symbol) # encoding: [0x24,0x21,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: symbol@ABS_LO, kind: fixup_Mips_LO16
+                      # CHECK:                           #   fixup A - offset: 0, value: %lo(symbol), kind: fixup_Mips_LO16
                       # CHECK: addu $6, $1, $6           # encoding: [0x00,0x26,0x30,0x21]
-la $5, 1f             # CHECK: lui $5, %hi($tmp0)        # encoding: [0x3c,0x05,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-                      # CHECK: addiu $5, $5, %lo($tmp0)  # encoding: [0x24,0xa5,A,A]
-                      # CHECK:                           #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+la $5, symbol+8       # CHECK: lui $5, %hi(symbol+8)       # encoding: [0x3c,0x05,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+                      # CHECK: addiu $5, $5, %lo(symbol+8) # encoding: [0x24,0xa5,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+la $5, symbol+8($6)   # CHECK: lui $5, %hi(symbol+8)       # encoding: [0x3c,0x05,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+                      # CHECK: addiu $5, $5, %lo(symbol+8) # encoding: [0x24,0xa5,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+                      # CHECK: addu $5, $5, $6             # encoding: [0x00,0xa6,0x28,0x21]
+la $6, symbol+8($6)   # CHECK: lui $1, %hi(symbol+8)       # encoding: [0x3c,0x01,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %hi(symbol+8), kind: fixup_Mips_HI16
+                      # CHECK: addiu $1, $1, %lo(symbol+8) # encoding: [0x24,0x21,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %lo(symbol+8), kind: fixup_Mips_LO16
+                      # CHECK: addu $6, $1, $6             # encoding: [0x00,0x26,0x30,0x21]
+la $5, 1f             # CHECK: lui $5, %hi(($tmp0))        # encoding: [0x3c,0x05,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %hi(($tmp0)), kind: fixup_Mips_HI16
+                      # CHECK: addiu $5, $5, %lo(($tmp0))  # encoding: [0x24,0xa5,A,A]
+                      # CHECK:                             #   fixup A - offset: 0, value: %lo(($tmp0)), kind: fixup_Mips_LO16
 1:
diff --git a/test/MC/Mips/micromips-control-instructions.s b/test/MC/Mips/micromips-control-instructions.s
index 76c953f85d55..2276b492e3d9 100644
--- a/test/MC/Mips/micromips-control-instructions.s
+++ b/test/MC/Mips/micromips-control-instructions.s
@@ -11,10 +11,10 @@
 #------------------------------------------------------------------------------
 # CHECK-EL:    sdbbp                      # encoding: [0x00,0x00,0x7c,0xdb]
 # CHECK-EL:    sdbbp 34                   # encoding: [0x22,0x00,0x7c,0xdb]
-# CHECK-EL:    .set push
-# CHECK-EL:    .set mips32r2
-# CHECK-EL:    rdhwr $5, $29
-# CHECK-EL:    .set pop                   # encoding: [0xbd,0x00,0x3c,0x6b]
+# CHECK-EL-NOT:    .set push
+# CHECK-EL-NOT:    .set mips32r2
+# CHECK-EL:    rdhwr $5, $29              # encoding: [0xbd,0x00,0x3c,0x6b]
+# CHECK-EL-NOT:    .set pop
 # CHECK-EL:    cache 1, 8($5)             # encoding: [0x25,0x20,0x08,0x60]
 # CHECK-EL:    pref 1, 8($5)              # encoding: [0x25,0x60,0x08,0x20]
 # CHECK-EL:    ssnop                      # encoding: [0x00,0x00,0x00,0x08]
@@ -39,15 +39,24 @@
 # CHECK-EL:    tlbr                       # encoding: [0x00,0x00,0x7c,0x13]
 # CHECK-EL:    tlbwi                      # encoding: [0x00,0x00,0x7c,0x23]
 # CHECK-EL:    tlbwr                      # encoding: [0x00,0x00,0x7c,0x33]
+# CHECK-EL:    prefe 1, 8($5)             # encoding: [0x25,0x60,0x08,0xa4]
+# CHECK-EL:    cachee 1, 8($5)            # encoding: [0x25,0x60,0x08,0xa6]
+# CHECK-EL:    prefx 1, $3($5)            # encoding: [0x65,0x54,0xa0,0x09]
+# CHECK-EL:    swre $24, 5($3)            # encoding: [0x03,0x63,0x05,0xa2]
+# CHECK-EL:    swle $24, 5($3)            # encoding: [0x03,0x63,0x05,0xa0]
+# CHECK-EL:    lwre $24, 5($3)            # encoding: [0x03,0x63,0x05,0x66]
+# CHECK-EL:    lwle $24, 2($4)            # encoding: [0x04,0x63,0x02,0x64]
+# CHECK-EL:    lle $2, 8($4)              # encoding: [0x44,0x60,0x08,0x6c]
+# CHECK-EL:    sce $2, 8($4)              # encoding: [0x44,0x60,0x08,0xac]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
 # CHECK-EB:   sdbbp                       # encoding: [0x00,0x00,0xdb,0x7c]
 # CHECK-EB:   sdbbp 34                    # encoding: [0x00,0x22,0xdb,0x7c]
-# CHECK-EB:   .set push
-# CHECK-EB:   .set mips32r2
-# CHECK-EB:   rdhwr $5, $29
-# CHECK-EB:   .set pop                    # encoding: [0x00,0xbd,0x6b,0x3c]
+# CHECK-EB-NOT:    .set push
+# CHECK-EB-NOT:    .set mips32r2
+# CHECK-EB:   rdhwr $5, $29               # encoding: [0x00,0xbd,0x6b,0x3c]
+# CHECK-EB-NOT:    .set pop
 # CHECK-EB:   cache 1, 8($5)              # encoding: [0x20,0x25,0x60,0x08]
 # CHECK-EB:   pref 1, 8($5)               # encoding: [0x60,0x25,0x20,0x08]
 # CHECK-EB:   ssnop                       # encoding: [0x00,0x00,0x08,0x00]
@@ -72,6 +81,15 @@
 # CHECK-EB:   tlbr                        # encoding: [0x00,0x00,0x13,0x7c]
 # CHECK-EB:   tlbwi                       # encoding: [0x00,0x00,0x23,0x7c]
 # CHECK-EB:   tlbwr                       # encoding: [0x00,0x00,0x33,0x7c]
+# CHECK-EB:   prefe 1, 8($5)              # encoding: [0x60,0x25,0xa4,0x08]
+# CHECK-EB:   cachee 1, 8($5)             # encoding: [0x60,0x25,0xa6,0x08]
+# CHECK-EB:   prefx 1, $3($5)             # encoding: [0x54,0x65,0x09,0xa0]
+# CHECK-EB:   swre $24, 5($3)             # encoding: [0x63,0x03,0xa2,0x05]
+# CHECK-EB:   swle $24, 5($3)             # encoding: [0x63,0x03,0xa0,0x05]
+# CHECK-EB:   lwre $24, 5($3)             # encoding: [0x63,0x03,0x66,0x05]
+# CHECK-EB:   lwle $24, 2($4)             # encoding: [0x63,0x04,0x64,0x02]
+# CHECK-EB:   lle $2, 8($4)               # encoding: [0x60,0x44,0x6c,0x08]
+# CHECK-EB:   sce $2, 8($4)               # encoding: [0x60,0x44,0xac,0x08]
 
     sdbbp
     sdbbp 34
@@ -100,3 +118,12 @@
     tlbr
     tlbwi
     tlbwr
+    prefe 1, 8($5)
+    cachee 1, 8($5)
+    prefx 1, $3($5)
+    swre $24, 5($3)
+    swle $24, 5($3)
+    lwre $24, 5($3)
+    lwle $24, 2($4)
+    lle $2, 8($4)
+    sce $2, 8($4)
diff --git a/test/MC/Mips/micromips-diagnostic-fixup.s b/test/MC/Mips/micromips-diagnostic-fixup.s
index 041338ac2d3b..4a94f9a3cd51 100644
--- a/test/MC/Mips/micromips-diagnostic-fixup.s
+++ b/test/MC/Mips/micromips-diagnostic-fixup.s
@@ -1,8 +1,11 @@
 # RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -arch=mips -mattr=+micromips 2>&1 -filetype=obj | FileCheck %s
-#
-# CHECK: LLVM ERROR: out of range PC16 fixup
+
+# Two instructions, to check that this is not a fatal error
+# CHECK: error: out of range PC16 fixup
+# CHECK: error: out of range PC16 fixup
 
 .text
+  b foo
   b foo
   .space 65536 - 6, 1   # -6 = size of b instr plus size of automatically inserted nop
   nop                   # This instr makes the branch too long to fit into a 17-bit offset
diff --git a/test/MC/Mips/micromips-dsp/invalid-wrong-error.s b/test/MC/Mips/micromips-dsp/invalid-wrong-error.s
new file mode 100644
index 000000000000..d1ba873809db
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/invalid-wrong-error.s
@@ -0,0 +1,7 @@
+# Instructions that are correctly rejected but emit a wrong or misleading error.
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+  .set noat
+  wrdsp $5, 128            # CHECK: :[[@LINE]]:3: error: instruction requires a CPU feature not currently enabled
+  wrdsp $5, -1             # CHECK: :[[@LINE]]:13: error: expected 10-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dsp/invalid.s b/test/MC/Mips/micromips-dsp/invalid.s
new file mode 100644
index 000000000000..55a6f8e28737
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/invalid.s
@@ -0,0 +1,23 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp 2>%t1
+# RUN: FileCheck %s < %t1
+
+  shll.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shll.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shll_s.ph $3, $4, 16     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shll_s.ph $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shll.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shll.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+  shll_s.w $3, $4, 32      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shll_s.w $3, $4, -1      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shra.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shra.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shra_r.ph $3, $4, 16     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  shra_r.ph $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 4-bit unsigned immediate
+  // FIXME: Following invalid tests are temporarely disabled, until operand check for uimm5 is added
+  shra_r.w $3, $4, 32      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shra_r.w $3, $4, -1      # -CHECK: :[[@LINE]]:20: error: expected 5-bit unsigned immediate
+  shrl.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shrl.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shilo $ac1, 64           # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
+  shilo $ac1, -64          # CHECK: :[[@LINE]]:15: error: expected 6-bit signed immediate
diff --git a/test/MC/Mips/micromips-dsp/valid.s b/test/MC/Mips/micromips-dsp/valid.s
new file mode 100644
index 000000000000..c147a6d850ac
--- /dev/null
+++ b/test/MC/Mips/micromips-dsp/valid.s
@@ -0,0 +1,105 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dsp | FileCheck %s
+
+  .set noat
+  absq_s.ph $3, $4             # CHECK: absq_s.ph $3, $4        # encoding: [0x00,0x64,0x11,0x3c]
+  absq_s.w $3, $4              # CHECK: absq_s.w $3, $4         # encoding: [0x00,0x64,0x21,0x3c]
+  addu.qb $3, $4, $5           # CHECK: addu.qb $3, $4, $5      # encoding: [0x00,0xa4,0x18,0xcd]
+  addu_s.qb $3, $4, $5         # CHECK: addu_s.qb $3, $4, $5    # encoding: [0x00,0xa4,0x1c,0xcd]
+  addsc $3, $4, $5             # CHECK: addsc $3, $4, $5        # encoding: [0x00,0xa4,0x1b,0x85]
+  addwc $3, $4, $5             # CHECK: addwc $3, $4, $5        # encoding: [0x00,0xa4,0x1b,0xc5]
+  addq.ph $3, $4, $5           # CHECK: addq.ph $3, $4, $5      # encoding: [0x00,0xa4,0x18,0x0d]
+  addq_s.ph $3, $4, $5         # CHECK: addq_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1c,0x0d]
+  addq_s.w $3, $4, $5          # CHECK: addq_s.w $3, $4, $5     # encoding: [0x00,0xa4,0x1b,0x05]
+  dpaq_s.w.ph $ac1, $5, $3     # CHECK: dpaq_s.w.ph $ac1, $5, $3   # encoding: [0x00,0x65,0x42,0xbc]
+  dpaq_sa.l.w $ac2, $4, $3     # CHECK: dpaq_sa.l.w $ac2, $4, $3   # encoding: [0x00,0x64,0x92,0xbc]
+  dpau.h.qbl $ac1, $3, $4      # CHECK: dpau.h.qbl $ac1, $3, $4    # encoding: [0x00,0x83,0x60,0xbc]
+  dpau.h.qbr $ac2, $20, $21    # CHECK: dpau.h.qbr $ac2, $20, $21  # encoding: [0x02,0xb4,0xb0,0xbc]
+  extp $zero, $ac1, 6          # CHECK: extp $zero, $ac1, 6        # encoding: [0x00,0x06,0x66,0x7c]
+  extpdp $2, $ac1, 2           # CHECK: extpdp $2, $ac1, 2         # encoding: [0x00,0x42,0x76,0x7c]
+  extpdpv $4, $ac2, $8         # CHECK: extpdpv $4, $ac2, $8       # encoding: [0x00,0x88,0xb8,0xbc]
+  extpv $15, $ac3, $7          # CHECK: extpv $15, $ac3, $7        # encoding: [0x01,0xe7,0xe8,0xbc]
+  extr.w $27, $ac3, 31         # CHECK: extr.w $27, $ac3, 31       # encoding: [0x03,0x7f,0xce,0x7c]
+  extr_r.w $12, $ac0, 24       # CHECK: extr_r.w $12, $ac0, 24     # encoding: [0x01,0x98,0x1e,0x7c]
+  extr_rs.w $27, $ac3, 9       # CHECK: extr_rs.w $27, $ac3, 9     # encoding: [0x03,0x69,0xee,0x7c]
+  extr_s.h $3, $ac2, 1         # CHECK: extr_s.h $3, $ac2, 1       # encoding: [0x00,0x61,0xbe,0x7c]
+  extrv.w $5, $ac0, $6         # CHECK: extrv.w $5, $ac0, $6       # encoding: [0x00,0xa6,0x0e,0xbc]
+  extrv_r.w $10, $ac0, $3      # CHECK: extrv_r.w $10, $ac0, $3    # encoding: [0x01,0x43,0x1e,0xbc]
+  extrv_rs.w $15, $ac1, $20    # CHECK: extrv_rs.w $15, $ac1, $20  # encoding: [0x01,0xf4,0x6e,0xbc]
+  extrv_s.h $8, $ac2, $16      # CHECK: extrv_s.h $8, $ac2, $16    # encoding: [0x01,0x10,0xbe,0xbc]
+  insv $3, $4                  # CHECK: insv $3, $4             # encoding: [0x00,0x64,0x41,0x3c]
+  madd $ac1, $6, $7            # CHECK: madd $ac1, $6, $7       # encoding: [0x00,0xe6,0x4a,0xbc]
+  maddu $ac0, $8, $9           # CHECK: maddu $ac0, $8, $9      # encoding: [0x01,0x28,0x1a,0xbc]
+  msub $ac3, $10, $11          # CHECK: msub $ac3, $10, $11     # encoding: [0x01,0x6a,0xea,0xbc]
+  msubu $ac2, $12, $13         # CHECK: msubu $ac2, $12, $13    # encoding: [0x01,0xac,0xba,0xbc]
+  mult $ac3, $2, $3            # CHECK: mult $ac3, $2, $3       # encoding: [0x00,0x62,0xcc,0xbc]
+  multu $ac2, $4, $5           # CHECK: multu $ac2, $4, $5      # encoding: [0x00,0xa4,0x9c,0xbc]
+  packrl.ph $3, $4, $5         # CHECK: packrl.ph $3, $4, $5    # encoding: [0x00,0xa4,0x19,0xad]
+  pick.ph $3, $4, $5           # CHECK: pick.ph $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x2d]
+  pick.qb $3, $4, $5           # CHECK: pick.qb $3, $4, $5      # encoding: [0x00,0xa4,0x19,0xed]
+  preceq.w.phl $1, $2          # CHECK: preceq.w.phl $1, $2      # encoding: [0x00,0x22,0x51,0x3c]
+  preceq.w.phr $3, $4          # CHECK: preceq.w.phr $3, $4      # encoding: [0x00,0x64,0x61,0x3c]
+  precequ.ph.qbl $5, $6        # CHECK: precequ.ph.qbl $5, $6    # encoding: [0x00,0xa6,0x71,0x3c]
+  precequ.ph.qbla $7, $8       # CHECK: precequ.ph.qbla $7, $8   # encoding: [0x00,0xe8,0x73,0x3c]
+  precequ.ph.qbr $9, $10       # CHECK: precequ.ph.qbr $9, $10   # encoding: [0x01,0x2a,0x91,0x3c]
+  precequ.ph.qbra $11, $12     # CHECK: precequ.ph.qbra $11, $12 # encoding: [0x01,0x6c,0x93,0x3c]
+  preceu.ph.qbl $13, $14       # CHECK: preceu.ph.qbl $13, $14   # encoding: [0x01,0xae,0xb1,0x3c]
+  preceu.ph.qbla $15, $16      # CHECK: preceu.ph.qbla $15, $16  # encoding: [0x01,0xf0,0xb3,0x3c]
+  preceu.ph.qbr $17, $18       # CHECK: preceu.ph.qbr $17, $18   # encoding: [0x02,0x32,0xd1,0x3c]
+  preceu.ph.qbra $19, $20      # CHECK: preceu.ph.qbra $19, $20  # encoding: [0x02,0x74,0xd3,0x3c]
+  precrq.ph.w $8, $9, $10       # CHECK: precrq.ph.w $8, $9, $10       # encoding: [0x01,0x49,0x40,0xed]
+  precrq.qb.ph $11, $12, $13    # CHECK: precrq.qb.ph $11, $12, $13    # encoding: [0x01,0xac,0x58,0xad]
+  precrqu_s.qb.ph $14, $15, $16 # CHECK: precrqu_s.qb.ph $14, $15, $16 # encoding: [0x02,0x0f,0x71,0x6d]
+  precrq_rs.ph.w $17, $18, $19  # CHECK: precrq_rs.ph.w $17, $18, $19  # encoding: [0x02,0x72,0x89,0x2d]
+  shilo $ac1, 3                # CHECK: shilo $ac1, 3           # encoding: [0x00,0x03,0x40,0x1d]
+  shilov $ac1, $5              # CHECK: shilov $ac1, $5         # encoding: [0x00,0x05,0x52,0x7c]
+  shll.ph $3, $4, 5            # CHECK: shll.ph $3, $4, 5       # encoding: [0x00,0x64,0x53,0xb5]
+  shll_s.ph $3, $4, 5          # CHECK: shll_s.ph $3, $4, 5     # encoding: [0x00,0x64,0x5b,0xb5]
+  shll.qb $3, $4, 5            # CHECK: shll.qb $3, $4, 5       # encoding: [0x00,0x64,0xa8,0x7c]
+  shllv.ph $3, $4, $5          # CHECK: shllv.ph $3, $4, $5     # encoding: [0x00,0x85,0x18,0x0e]
+  shllv_s.ph $3, $4, $5        # CHECK: shllv_s.ph $3, $4, $5   # encoding: [0x00,0x85,0x1c,0x0e]
+  shllv.qb $3, $4, $5          # CHECK: shllv.qb $3, $4, $5     # encoding: [0x00,0x85,0x1b,0x95]
+  shllv_s.w $3, $4, $5         # CHECK: shllv_s.w $3, $4, $5    # encoding: [0x00,0x85,0x1b,0xd5]
+  shll_s.w $3, $4, 5           # CHECK: shll_s.w $3, $4, 5      # encoding: [0x00,0x64,0x2b,0xf5]
+  shra.ph $3, $4, 5            # CHECK: shra.ph $3, $4, 5       # encoding: [0x00,0x64,0x53,0x35]
+  shra_r.ph $3, $4, 5          # CHECK: shra_r.ph $3, $4, 5     # encoding: [0x00,0x64,0x57,0x35]
+  shrav.ph $3, $4, $5          # CHECK: shrav.ph $3, $4, $5     # encoding: [0x00,0x85,0x19,0x8d]
+  shrav_r.ph $3, $4, $5        # CHECK: shrav_r.ph $3, $4, $5   # encoding: [0x00,0x85,0x1d,0x8d]
+  shrav_r.w $3, $4, $5         # CHECK: shrav_r.w $3, $4, $5    # encoding: [0x00,0x85,0x1a,0xd5]
+  shra_r.w $3, $4, 5           # CHECK: shra_r.w $3, $4, 5      # encoding: [0x00,0x64,0x2a,0xf5]
+  shrl.qb $3, $4, 5            # CHECK: shrl.qb $3, $4, 5       # encoding: [0x00,0x64,0xb8,0x7c]
+  shrlv.qb $3, $4, $5          # CHECK: shrlv.qb $3, $4, $5     # encoding: [0x00,0x85,0x1b,0x55]
+  subq.ph $3, $4, $5           # CHECK: subq.ph $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x0d]
+  subq_s.ph $3, $4, $5         # CHECK: subq_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1e,0x0d]
+  subq_s.w $3, $4, $5          # CHECK: subq_s.w $3, $4, $5     # encoding: [0x00,0xa4,0x1b,0x45]
+  subu.qb $3, $4, $5           # CHECK: subu.qb $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0xcd]
+  subu_s.qb $3, $4, $5         # CHECK: subu_s.qb $3, $4, $5    # encoding: [0x00,0xa4,0x1e,0xcd]
+  dpsq_s.w.ph $ac1, $4, $6     # CHECK: dpsq_s.w.ph $ac1, $4, $6   # encoding: [0x00,0xc4,0x46,0xbc]
+  dpsq_sa.l.w $ac1, $4, $6     # CHECK: dpsq_sa.l.w $ac1, $4, $6   # encoding: [0x00,0xc4,0x56,0xbc]
+  dpsu.h.qbl $ac1, $4, $6      # CHECK: dpsu.h.qbl $ac1, $4, $6    # encoding: [0x00,0xc4,0x64,0xbc]
+  dpsu.h.qbr $ac1, $4, $6      # CHECK: dpsu.h.qbr $ac1, $4, $6    # encoding: [0x00,0xc4,0x74,0xbc]
+  muleq_s.w.phl $1, $2, $3     # CHECK: muleq_s.w.phl $1, $2, $3   # encoding: [0x00,0x62,0x08,0x25]
+  muleq_s.w.phr $1, $2, $3     # CHECK: muleq_s.w.phr $1, $2, $3   # encoding: [0x00,0x62,0x08,0x65]
+  muleu_s.ph.qbl $1, $2, $3    # CHECK: muleu_s.ph.qbl $1, $2, $3  # encoding: [0x00,0x62,0x08,0x95]
+  muleu_s.ph.qbr $1, $2, $3    # CHECK: muleu_s.ph.qbr $1, $2, $3  # encoding: [0x00,0x62,0x08,0xd5]
+  mulq_rs.ph $1, $2, $3        # CHECK: mulq_rs.ph $1, $2, $3      # encoding: [0x00,0x62,0x09,0x15]
+  lbux $1, $2($3)              # CHECK: lbux $1, $2($3)              # encoding: [0x00,0x43,0x0a,0x25]
+  lhx $1, $2($3)               # CHECK: lhx $1, $2($3)               # encoding: [0x00,0x43,0x09,0x65]
+  lwx $1, $2($3)               # CHECK: lwx $1, $2($3)               # encoding: [0x00,0x43,0x09,0xa5]
+  maq_s.w.phl $ac1, $2, $3     # CHECK: maq_s.w.phl $ac1, $2, $3     # encoding: [0x00,0x62,0x5a,0x7c]
+  maq_sa.w.phl $ac1, $2, $3    # CHECK: maq_sa.w.phl $ac1, $2, $3    # encoding: [0x00,0x62,0x7a,0x7c]
+  maq_s.w.phr $ac1, $2, $3     # CHECK: maq_s.w.phr $ac1, $2, $3     # encoding: [0x00,0x62,0x4a,0x7c]
+  maq_sa.w.phr $ac1, $2, $3    # CHECK: maq_sa.w.phr $ac1, $2, $3    # encoding: [0x00,0x62,0x6a,0x7c]
+  mfhi $2, $ac1                # CHECK: mfhi $2, $ac1                # encoding: [0x00,0x02,0x40,0x7c]
+  mflo $1, $ac1                # CHECK: mflo $1, $ac1                # encoding: [0x00,0x01,0x50,0x7c]
+  mthi $1, $ac1                # CHECK: mthi $1, $ac1                # encoding: [0x00,0x01,0x60,0x7c]
+  mtlo $1, $ac1                # CHECK: mtlo $1, $ac1                # encoding: [0x00,0x01,0x70,0x7c]
+  raddu.w.qb $1, $2            # CHECK: raddu.w.qb $1, $2       # encoding: [0x00,0x22,0xf1,0x3c]
+  rddsp $1, 2                  # CHECK: rddsp $1, 2             # encoding: [0x00,0x20,0x86,0x7c]
+  repl.ph $1, 512              # CHECK: repl.ph $1, 512         # encoding: [0x02,0x00,0x08,0x3d]
+  repl.qb $1, 128              # CHECK: repl.qb $1, 128         # encoding: [0x00,0x30,0x05,0xfc]
+  replv.ph $1, $2              # CHECK: replv.ph $1, $2         # encoding: [0x00,0x22,0x03,0x3c]
+  replv.qb $1, $2              # CHECK: replv.qb $1, $2         # encoding: [0x00,0x22,0x13,0x3c]
+  mthlip $1, $ac2              # CHECK: mthlip $1, $ac2         # encoding: [0x00,0x01,0x82,0x7c]
+  wrdsp $5                     # CHECK: wrdsp $5                # encoding: [0x00,0xa7,0xd6,0x7c]
+  wrdsp $5, 2                  # CHECK: wrdsp $5, 2             # encoding: [0x00,0xa0,0x96,0x7c]
+  wrdsp $5, 31                 # CHECK: wrdsp $5                # encoding: [0x00,0xa7,0xd6,0x7c]
diff --git a/test/MC/Mips/micromips-dspr2/invalid.s b/test/MC/Mips/micromips-dspr2/invalid.s
new file mode 100644
index 000000000000..5ea203d981c1
--- /dev/null
+++ b/test/MC/Mips/micromips-dspr2/invalid.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 2>%t1
+# RUN: FileCheck %s < %t1
+
+  shra.qb $3, $4, 8        # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shra.qb $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 3-bit unsigned immediate
+  shra_r.qb $3, $4, 8      # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+  shra_r.qb $3, $4, -1     # CHECK: :[[@LINE]]:21: error: expected 3-bit unsigned immediate
+  shrl.ph $3, $4, 16       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
+  shrl.ph $3, $4, -1       # CHECK: :[[@LINE]]:19: error: expected 4-bit unsigned immediate
diff --git a/test/MC/Mips/micromips-dspr2/valid.s b/test/MC/Mips/micromips-dspr2/valid.s
new file mode 100644
index 000000000000..b1d09cbc84c7
--- /dev/null
+++ b/test/MC/Mips/micromips-dspr2/valid.s
@@ -0,0 +1,127 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r6 -mattr=micromips -mattr=+dspr2 | FileCheck %s
+
+  .set noat
+  absq_s.ph $3, $4             # CHECK: absq_s.ph $3, $4        # encoding: [0x00,0x64,0x11,0x3c]
+  absq_s.qb $3, $4             # CHECK: absq_s.qb $3, $4        # encoding: [0x00,0x64,0x01,0x3c]
+  absq_s.w $3, $4              # CHECK: absq_s.w $3, $4         # encoding: [0x00,0x64,0x21,0x3c]
+  addqh.ph $3, $4, $5          # CHECK: addqh.ph $3, $4, $5     # encoding: [0x00,0xa4,0x18,0x4d]
+  addqh_r.ph $3, $4, $5        # CHECK: addqh_r.ph $3, $4, $5   # encoding: [0x00,0xa4,0x1c,0x4d]
+  addqh.w $3, $4, $5           # CHECK: addqh.w $3, $4, $5      # encoding: [0x00,0xa4,0x18,0x8d]
+  addqh_r.w $3, $4, $5         # CHECK: addqh_r.w $3, $4, $5    # encoding: [0x00,0xa4,0x1c,0x8d]
+  addu.ph $3, $4, $5           # CHECK: addu.ph $3, $4, $5      # encoding: [0x00,0xa4,0x19,0x0d]
+  addu.qb $3, $4, $5           # CHECK: addu.qb $3, $4, $5      # encoding: [0x00,0xa4,0x18,0xcd]
+  addu_s.ph $3, $4, $5         # CHECK: addu_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1d,0x0d]
+  addu_s.qb $3, $4, $5         # CHECK: addu_s.qb $3, $4, $5    # encoding: [0x00,0xa4,0x1c,0xcd]
+  adduh.qb $3, $4, $5          # CHECK: adduh.qb $3, $4, $5     # encoding: [0x00,0xa4,0x19,0x4d]
+  adduh_r.qb $3, $4, $5        # CHECK: adduh_r.qb $3, $4, $5   # encoding: [0x00,0xa4,0x1d,0x4d]
+  addsc $3, $4, $5             # CHECK: addsc $3, $4, $5        # encoding: [0x00,0xa4,0x1b,0x85]
+  addwc $3, $4, $5             # CHECK: addwc $3, $4, $5        # encoding: [0x00,0xa4,0x1b,0xc5]
+  addq.ph $3, $4, $5           # CHECK: addq.ph $3, $4, $5      # encoding: [0x00,0xa4,0x18,0x0d]
+  addq_s.ph $3, $4, $5         # CHECK: addq_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1c,0x0d]
+  addq_s.w $3, $4, $5          # CHECK: addq_s.w $3, $4, $5     # encoding: [0x00,0xa4,0x1b,0x05]
+  dpa.w.ph $ac0, $3, $2        # CHECK: dpa.w.ph $ac0, $3, $2      # encoding: [0x00,0x43,0x00,0xbc]
+  dpaq_s.w.ph $ac1, $5, $3     # CHECK: dpaq_s.w.ph $ac1, $5, $3   # encoding: [0x00,0x65,0x42,0xbc]
+  dpaq_sa.l.w $ac2, $4, $3     # CHECK: dpaq_sa.l.w $ac2, $4, $3   # encoding: [0x00,0x64,0x92,0xbc]
+  dpaqx_s.w.ph $ac3, $12, $7   # CHECK: dpaqx_s.w.ph $ac3, $12, $7 # encoding: [0x00,0xec,0xe2,0xbc]
+  dpaqx_sa.w.ph $ac0, $5, $6   # CHECK: dpaqx_sa.w.ph $ac0, $5, $6 # encoding: [0x00,0xc5,0x32,0xbc]
+  dpau.h.qbl $ac1, $3, $4      # CHECK: dpau.h.qbl $ac1, $3, $4    # encoding: [0x00,0x83,0x60,0xbc]
+  dpau.h.qbr $ac2, $20, $21    # CHECK: dpau.h.qbr $ac2, $20, $21  # encoding: [0x02,0xb4,0xb0,0xbc]
+  dpax.w.ph $ac3, $2, $1       # CHECK: dpax.w.ph $ac3, $2, $1     # encoding: [0x00,0x22,0xd0,0xbc]
+  extp $zero, $ac1, 6          # CHECK: extp $zero, $ac1, 6        # encoding: [0x00,0x06,0x66,0x7c]
+  extpdp $2, $ac1, 2           # CHECK: extpdp $2, $ac1, 2         # encoding: [0x00,0x42,0x76,0x7c]
+  extpdpv $4, $ac2, $8         # CHECK: extpdpv $4, $ac2, $8       # encoding: [0x00,0x88,0xb8,0xbc]
+  extpv $15, $ac3, $7          # CHECK: extpv $15, $ac3, $7        # encoding: [0x01,0xe7,0xe8,0xbc]
+  extr.w $27, $ac3, 31         # CHECK: extr.w $27, $ac3, 31       # encoding: [0x03,0x7f,0xce,0x7c]
+  extr_r.w $12, $ac0, 24       # CHECK: extr_r.w $12, $ac0, 24     # encoding: [0x01,0x98,0x1e,0x7c]
+  extr_rs.w $27, $ac3, 9       # CHECK: extr_rs.w $27, $ac3, 9     # encoding: [0x03,0x69,0xee,0x7c]
+  extr_s.h $3, $ac2, 1         # CHECK: extr_s.h $3, $ac2, 1       # encoding: [0x00,0x61,0xbe,0x7c]
+  extrv.w $5, $ac0, $6         # CHECK: extrv.w $5, $ac0, $6       # encoding: [0x00,0xa6,0x0e,0xbc]
+  extrv_r.w $10, $ac0, $3      # CHECK: extrv_r.w $10, $ac0, $3    # encoding: [0x01,0x43,0x1e,0xbc]
+  extrv_rs.w $15, $ac1, $20    # CHECK: extrv_rs.w $15, $ac1, $20  # encoding: [0x01,0xf4,0x6e,0xbc]
+  extrv_s.h $8, $ac2, $16      # CHECK: extrv_s.h $8, $ac2, $16    # encoding: [0x01,0x10,0xbe,0xbc]
+  insv $3, $4                  # CHECK: insv $3, $4             # encoding: [0x00,0x64,0x41,0x3c]
+  madd $ac1, $6, $7            # CHECK: madd $ac1, $6, $7       # encoding: [0x00,0xe6,0x4a,0xbc]
+  maddu $ac0, $8, $9           # CHECK: maddu $ac0, $8, $9      # encoding: [0x01,0x28,0x1a,0xbc]
+  msub $ac3, $10, $11          # CHECK: msub $ac3, $10, $11     # encoding: [0x01,0x6a,0xea,0xbc]
+  msubu $ac2, $12, $13         # CHECK: msubu $ac2, $12, $13    # encoding: [0x01,0xac,0xba,0xbc]
+  mult $ac3, $2, $3            # CHECK: mult $ac3, $2, $3       # encoding: [0x00,0x62,0xcc,0xbc]
+  multu $ac2, $4, $5           # CHECK: multu $ac2, $4, $5      # encoding: [0x00,0xa4,0x9c,0xbc]
+  packrl.ph $3, $4, $5         # CHECK: packrl.ph $3, $4, $5    # encoding: [0x00,0xa4,0x19,0xad]
+  pick.ph $3, $4, $5           # CHECK: pick.ph $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x2d]
+  pick.qb $3, $4, $5           # CHECK: pick.qb $3, $4, $5      # encoding: [0x00,0xa4,0x19,0xed]
+  preceq.w.phl $1, $2          # CHECK: preceq.w.phl $1, $2      # encoding: [0x00,0x22,0x51,0x3c]
+  preceq.w.phr $3, $4          # CHECK: preceq.w.phr $3, $4      # encoding: [0x00,0x64,0x61,0x3c]
+  precequ.ph.qbl $5, $6        # CHECK: precequ.ph.qbl $5, $6    # encoding: [0x00,0xa6,0x71,0x3c]
+  precequ.ph.qbla $7, $8       # CHECK: precequ.ph.qbla $7, $8   # encoding: [0x00,0xe8,0x73,0x3c]
+  precequ.ph.qbr $9, $10       # CHECK: precequ.ph.qbr $9, $10   # encoding: [0x01,0x2a,0x91,0x3c]
+  precequ.ph.qbra $11, $12     # CHECK: precequ.ph.qbra $11, $12 # encoding: [0x01,0x6c,0x93,0x3c]
+  preceu.ph.qbl $13, $14       # CHECK: preceu.ph.qbl $13, $14   # encoding: [0x01,0xae,0xb1,0x3c]
+  preceu.ph.qbla $15, $16      # CHECK: preceu.ph.qbla $15, $16  # encoding: [0x01,0xf0,0xb3,0x3c]
+  preceu.ph.qbr $17, $18       # CHECK: preceu.ph.qbr $17, $18   # encoding: [0x02,0x32,0xd1,0x3c]
+  preceu.ph.qbra $19, $20      # CHECK: preceu.ph.qbra $19, $20  # encoding: [0x02,0x74,0xd3,0x3c]
+  precr.qb.ph $1, $2, $3        # CHECK: precr.qb.ph $1, $2, $3        # encoding: [0x00,0x62,0x08,0x6d]
+  precr_sra.ph.w $4, $5, 1      # CHECK: precr_sra.ph.w $4, $5, 1      # encoding: [0x00,0x85,0x0b,0xcd]
+  precr_sra_r.ph.w $6, $7, 2    # CHECK: precr_sra_r.ph.w $6, $7, 2    # encoding: [0x00,0xc7,0x17,0xcd]
+  precrq.ph.w $8, $9, $10       # CHECK: precrq.ph.w $8, $9, $10       # encoding: [0x01,0x49,0x40,0xed]
+  precrq.qb.ph $11, $12, $13    # CHECK: precrq.qb.ph $11, $12, $13    # encoding: [0x01,0xac,0x58,0xad]
+  precrqu_s.qb.ph $14, $15, $16 # CHECK: precrqu_s.qb.ph $14, $15, $16 # encoding: [0x02,0x0f,0x71,0x6d]
+  precrq_rs.ph.w $17, $18, $19  # CHECK: precrq_rs.ph.w $17, $18, $19  # encoding: [0x02,0x72,0x89,0x2d]
+  shilo $ac1, 3                # CHECK: shilo $ac1, 3           # encoding: [0x00,0x03,0x40,0x1d]
+  shilov $ac1, $5              # CHECK: shilov $ac1, $5         # encoding: [0x00,0x05,0x52,0x7c]
+  shll.ph $3, $4, 5            # CHECK: shll.ph $3, $4, 5       # encoding: [0x00,0x64,0x53,0xb5]
+  shll_s.ph $3, $4, 5          # CHECK: shll_s.ph $3, $4, 5     # encoding: [0x00,0x64,0x5b,0xb5]
+  shll.qb $3, $4, 5            # CHECK: shll.qb $3, $4, 5       # encoding: [0x00,0x64,0xa8,0x7c]
+  shllv.ph $3, $4, $5          # CHECK: shllv.ph $3, $4, $5     # encoding: [0x00,0x85,0x18,0x0e]
+  shllv_s.ph $3, $4, $5        # CHECK: shllv_s.ph $3, $4, $5   # encoding: [0x00,0x85,0x1c,0x0e]
+  shllv.qb $3, $4, $5          # CHECK: shllv.qb $3, $4, $5     # encoding: [0x00,0x85,0x1b,0x95]
+  shllv_s.w $3, $4, $5         # CHECK: shllv_s.w $3, $4, $5    # encoding: [0x00,0x85,0x1b,0xd5]
+  shll_s.w $3, $4, 5           # CHECK: shll_s.w $3, $4, 5      # encoding: [0x00,0x64,0x2b,0xf5]
+  shra.ph $3, $4, 5            # CHECK: shra.ph $3, $4, 5       # encoding: [0x00,0x64,0x53,0x35]
+  shra.qb $3, $4, 5            # CHECK: shra.qb $3, $4, 5       # encoding: [0x00,0x64,0xa1,0xfc]
+  shra_r.ph $3, $4, 5          # CHECK: shra_r.ph $3, $4, 5     # encoding: [0x00,0x64,0x57,0x35]
+  shra_r.qb $3, $4, 5          # CHECK: shra_r.qb $3, $4, 5     # encoding: [0x00,0x64,0xb1,0xfc]
+  shrav.ph $3, $4, $5          # CHECK: shrav.ph $3, $4, $5     # encoding: [0x00,0x85,0x19,0x8d]
+  shrav.qb $3, $4, $5          # CHECK: shrav.qb $3, $4, $5     # encoding: [0x00,0x85,0x19,0xcd]
+  shrav_r.ph $3, $4, $5        # CHECK: shrav_r.ph $3, $4, $5   # encoding: [0x00,0x85,0x1d,0x8d]
+  shrav_r.qb $3, $4, $5        # CHECK: shrav_r.qb $3, $4, $5   # encoding: [0x00,0x85,0x1d,0xcd]
+  shrav_r.w $3, $4, $5         # CHECK: shrav_r.w $3, $4, $5    # encoding: [0x00,0x85,0x1a,0xd5]
+  shra_r.w $3, $4, 5           # CHECK: shra_r.w $3, $4, 5      # encoding: [0x00,0x64,0x2a,0xf5]
+  shrl.ph $3, $4, 5            # CHECK: shrl.ph $3, $4, 5       # encoding: [0x00,0x64,0x53,0xfc]
+  shrl.qb $3, $4, 5            # CHECK: shrl.qb $3, $4, 5       # encoding: [0x00,0x64,0xb8,0x7c]
+  shrlv.ph $3, $4, $5          # CHECK: shrlv.ph $3, $4, $5     # encoding: [0x00,0x85,0x1b,0x15]
+  shrlv.qb $3, $4, $5          # CHECK: shrlv.qb $3, $4, $5     # encoding: [0x00,0x85,0x1b,0x55]
+  subq.ph $3, $4, $5           # CHECK: subq.ph $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x0d]
+  subq_s.ph $3, $4, $5         # CHECK: subq_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1e,0x0d]
+  subq_s.w $3, $4, $5          # CHECK: subq_s.w $3, $4, $5     # encoding: [0x00,0xa4,0x1b,0x45]
+  subqh.ph $3, $4, $5          # CHECK: subqh.ph $3, $4, $5     # encoding: [0x00,0xa4,0x1a,0x4d]
+  subqh_r.ph $3, $4, $5        # CHECK: subqh_r.ph $3, $4, $5   # encoding: [0x00,0xa4,0x1e,0x4d]
+  subqh.w $3, $4, $5           # CHECK: subqh.w $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x8d]
+  subqh_r.w $3, $4, $5         # CHECK: subqh_r.w $3, $4, $5    # encoding: [0x00,0xa4,0x1e,0x8d]
+  subu.ph $3, $4, $5           # CHECK: subu.ph $3, $4, $5      # encoding: [0x00,0xa4,0x1b,0x0d]
+  subu_s.ph $3, $4, $5         # CHECK: subu_s.ph $3, $4, $5    # encoding: [0x00,0xa4,0x1f,0x0d]
+  subu.qb $3, $4, $5           # CHECK: subu.qb $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0xcd]
+  subu_s.qb $3, $4, $5         # CHECK: subu_s.qb $3, $4, $5    # encoding: [0x00,0xa4,0x1e,0xcd]
+  subuh.qb $3, $4, $5          # CHECK: subuh.qb $3, $4, $5     # encoding: [0x00,0xa4,0x1b,0x4d]
+  subuh_r.qb $3, $4, $5        # CHECK: subuh_r.qb $3, $4, $5   # encoding: [0x00,0xa4,0x1f,0x4d]
+  dpsq_s.w.ph $ac1, $4, $6     # CHECK: dpsq_s.w.ph $ac1, $4, $6   # encoding: [0x00,0xc4,0x46,0xbc]
+  dpsq_sa.l.w $ac1, $4, $6     # CHECK: dpsq_sa.l.w $ac1, $4, $6   # encoding: [0x00,0xc4,0x56,0xbc]
+  dpsu.h.qbl $ac1, $4, $6      # CHECK: dpsu.h.qbl $ac1, $4, $6    # encoding: [0x00,0xc4,0x64,0xbc]
+  dpsu.h.qbr $ac1, $4, $6      # CHECK: dpsu.h.qbr $ac1, $4, $6    # encoding: [0x00,0xc4,0x74,0xbc]
+  dps.w.ph $ac1, $4, $6        # CHECK: dps.w.ph $ac1, $4, $6      # encoding: [0x00,0xc4,0x44,0xbc]
+  dpsqx_s.w.ph $ac1, $4, $6    # CHECK: dpsqx_s.w.ph $ac1, $4, $6  # encoding: [0x00,0xc4,0x66,0xbc]
+  dpsqx_sa.w.ph $ac1, $4, $6   # CHECK: dpsqx_sa.w.ph $ac1, $4, $6 # encoding: [0x00,0xc4,0x76,0xbc]
+  dpsx.w.ph $ac1, $4, $6       # CHECK: dpsx.w.ph $ac1, $4, $6     # encoding: [0x00,0xc4,0x54,0xbc]
+  mul.ph $1, $2, $3            # CHECK: mul.ph $1, $2, $3      # encoding: [0x00,0x62,0x08,0x2d]
+  mul_s.ph $1, $2, $3          # CHECK: mul_s.ph $1, $2, $3    # encoding: [0x00,0x62,0x0c,0x2d]
+  mulq_rs.w $1, $2, $3         # CHECK: mulq_rs.w $1, $2, $3   # encoding: [0x00,0x62,0x09,0x95]
+  mulq_s.ph $1, $2, $3         # CHECK: mulq_s.ph $1, $2, $3   # encoding: [0x00,0x62,0x09,0x55]
+  mulq_s.w $1, $2, $3          # CHECK: mulq_s.w $1, $2, $3    # encoding: [0x00,0x62,0x09,0xd5]
+  muleq_s.w.phl $1, $2, $3     # CHECK: muleq_s.w.phl $1, $2, $3   # encoding: [0x00,0x62,0x08,0x25]
+  muleq_s.w.phr $1, $2, $3     # CHECK: muleq_s.w.phr $1, $2, $3   # encoding: [0x00,0x62,0x08,0x65]
+  muleu_s.ph.qbl $1, $2, $3    # CHECK: muleu_s.ph.qbl $1, $2, $3  # encoding: [0x00,0x62,0x08,0x95]
+  muleu_s.ph.qbr $1, $2, $3    # CHECK: muleu_s.ph.qbr $1, $2, $3  # encoding: [0x00,0x62,0x08,0xd5]
+  mulq_rs.ph $1, $2, $3        # CHECK: mulq_rs.ph $1, $2, $3      # encoding: [0x00,0x62,0x09,0x15]
+  prepend $1, $2, 3            # CHECK: prepend $1, $2, 3          # encoding: [0x00,0x22,0x1a,0x55]
+  wrdsp $5                     # CHECK: wrdsp $5                # encoding: [0x00,0xa7,0xd6,0x7c]
+  wrdsp $5, 2                  # CHECK: wrdsp $5, 2             # encoding: [0x00,0xa0,0x96,0x7c]
+  wrdsp $5, 31                 # CHECK: wrdsp $5                # encoding: [0x00,0xa7,0xd6,0x7c]
diff --git a/test/MC/Mips/micromips-invalid.s b/test/MC/Mips/micromips-invalid.s
index 74a62ceeba0a..7d34e79cf714 100644
--- a/test/MC/Mips/micromips-invalid.s
+++ b/test/MC/Mips/micromips-invalid.s
@@ -65,16 +65,29 @@
   sb16  $7, 4($9)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   sh16  $7, 8($9)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   sw16  $7, 4($10)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  cache 256, 8($5)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
-  pref 256, 8($5)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  cache -1, 8($5)  # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  cache 32, 8($5)  # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  pref -1, 8($5)   # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  pref 32, 8($5)   # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
   beqz16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   bnez16 $9, 20 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   movep   $5, $21, $2, $3 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   movep   $8, $6, $2, $3  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   movep   $5, $6, $5, $3  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
   movep   $5, $6, $2, $9  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  break 1024        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  break 1024, 5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  break 7, 1024     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  break 1024, 1024  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  wait 1024         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  break 1024        # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 1024, 5     # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 7, 1024     # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+  break 1024, 1024  # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  wait 1024         # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+  prefx -1, $8($5)  # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  prefx 32, $8($5)  # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  jraddiusp 1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 31      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 33      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 125     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 132     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
diff --git a/test/MC/Mips/micromips-loadstore-instructions.s b/test/MC/Mips/micromips-loadstore-instructions.s
index f22719dff94e..4865713a3c72 100644
--- a/test/MC/Mips/micromips-loadstore-instructions.s
+++ b/test/MC/Mips/micromips-loadstore-instructions.s
@@ -44,6 +44,14 @@
 # CHECK-EL: swm32  $16, $17, 8($sp)           # encoding: [0x5d,0x20,0x08,0xd0]
 # CHECK-EL: swp    $16, 8($4)                 # encoding: [0x04,0x22,0x08,0x90]
 # CHECK-EL: lwp    $16, 8($4)                 # encoding: [0x04,0x22,0x08,0x10]
+# CHECK-EL: lhue   $4, 8($2)                  # encoding: [0x82,0x60,0x08,0x62]
+# CHECK-EL: lbe    $4, 8($2)                  # encoding: [0x82,0x60,0x08,0x68]
+# CHECK-EL: lbue   $4, 8($2)                  # encoding: [0x82,0x60,0x08,0x60]
+# CHECK-EL: lhe    $4, 8($2)                  # encoding: [0x82,0x60,0x08,0x6a]
+# CHECK-EL: lwe    $4, 8($2)                  # encoding: [0x82,0x60,0x08,0x6e]
+# CHECK-EL: sbe    $5, 8($4)                  # encoding: [0xa4,0x60,0x08,0xa8]
+# CHECK-EL: she    $5, 8($4)                  # encoding: [0xa4,0x60,0x08,0xaa]
+# CHECK-EL: swe    $5, 8($4)                  # encoding: [0xa4,0x60,0x08,0xae]
 #------------------------------------------------------------------------------
 # Big endian
 #------------------------------------------------------------------------------
@@ -82,6 +90,14 @@
 # CHECK-EB: swm32  $16, $17, 8($sp)          # encoding: [0x20,0x5d,0xd0,0x08]
 # CHECK-EB: swp    $16, 8($4)                # encoding: [0x22,0x04,0x90,0x08]
 # CHECK-EB: lwp    $16, 8($4)                # encoding: [0x22,0x04,0x10,0x08]
+# CHECK-EB: lhue   $4, 8($2)                 # encoding: [0x60,0x82,0x62,0x08]
+# CHECK-EB: lbe    $4, 8($2)                 # encoding: [0x60,0x82,0x68,0x08]
+# CHECK-EB: lbue   $4, 8($2)                 # encoding: [0x60,0x82,0x60,0x08]
+# CHECK-EB: lhe    $4, 8($2)                 # encoding: [0x60,0x82,0x6a,0x08]
+# CHECK-EB: lwe    $4, 8($2)                 # encoding: [0x60,0x82,0x6e,0x08]
+# CHECK-EB: sbe    $5, 8($4)                 # encoding: [0x60,0xa4,0xa8,0x08]
+# CHECK-EB: she    $5, 8($4)                 # encoding: [0x60,0xa4,0xaa,0x08]
+# CHECK-EB: swe    $5, 8($4)                 # encoding: [0x60,0xa4,0xae,0x08]
      lb     $5, 8($4)
      lbu    $6, 8($4)
      lh     $2, 8($4)
@@ -117,3 +133,11 @@
      swm    $16, $17, 8($sp)
      swp    $16, 8($4)
      lwp    $16, 8($4)
+     lhue   $4, 8($2)
+     lbe    $4, 8($2)
+     lbue   $4, 8($2)
+     lhe    $4, 8($2)
+     lwe    $4, 8($2)
+     sbe    $5, 8($4)
+     she    $5, 8($4)
+     swe    $5, 8($4)
diff --git a/test/MC/Mips/micromips-pc16-fixup.s b/test/MC/Mips/micromips-pc16-fixup.s
index 146a1550b499..7725b4e6f0eb 100644
--- a/test/MC/Mips/micromips-pc16-fixup.s
+++ b/test/MC/Mips/micromips-pc16-fixup.s
@@ -1,6 +1,6 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -arch=mips -mattr=+micromips 2>&1 -filetype=obj | FileCheck %s
 #
-# CHECK-NOT: LLVM ERROR: out of range PC16 fixup
+# CHECK-NOT: error: out of range PC16 fixup
 
 .text
   b foo
diff --git a/test/MC/Mips/micromips/invalid.s b/test/MC/Mips/micromips/invalid.s
new file mode 100644
index 000000000000..b091062fdccf
--- /dev/null
+++ b/test/MC/Mips/micromips/invalid.s
@@ -0,0 +1,35 @@
+# RUN: not llvm-mc %s -triple=mips -show-encoding -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+  break -1            # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 1024          # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break -1, 5         # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 1024, 5       # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 7, -1         # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+  break 7, 1024       # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+  break16 -1          # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+  break16 16          # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+  cache -1, 255($7)   # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  cache 32, 255($7)   # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  # FIXME: Check '0 < pos + size <= 32' constraint on ext
+  ext $2, $3, -1, 31  # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 32, 31  # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 1, 0    # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ext $2, $3, 1, 33   # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ins $2, $3, -1, 31  # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ins $2, $3, 32, 31  # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  jraddiusp -1        # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp -4        # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 125       # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+  jraddiusp 128       # CHECK: :[[@LINE]]:13: error: expected both 7-bit unsigned immediate and multiple of 4
+  pref -1, 255($7)    # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  pref 32, 255($7)    # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  rotr $2, $3, 32     # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+  sdbbp16 -1          # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+  sdbbp16 16          # CHECK: :[[@LINE]]:11: error: expected 4-bit unsigned immediate
+  sll $2, $3, -1      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  sll $2, $3, 32      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  sra $2, $3, -1      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  sra $2, $3, 32      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  srl $2, $3, -1      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  srl $2, $3, 32      # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/micromips32r6/invalid.s b/test/MC/Mips/micromips32r6/invalid.s
index 8ba787ae1aa4..41c661b04920 100644
--- a/test/MC/Mips/micromips32r6/invalid.s
+++ b/test/MC/Mips/micromips32r6/invalid.s
@@ -1,6 +1,121 @@
 # RUN: not llvm-mc %s -triple=mips -show-encoding -mcpu=mips32r6 -mattr=micromips 2>%t1
 # RUN: FileCheck %s < %t1
 
-  break 1024               # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-  break 1023, 1024         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur1sp $7, 260        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiur1sp $7, 241        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: misaligned immediate operand value
+  addiur1sp $8, 240        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur2 $9, $7, -1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur2 $6, $7, 10       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addius5 $7, 9            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiusp 1032             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  align $4, $2, $3, -1     # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+  align $4, $2, $3, 4      # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+  beqzc16 $9, 20           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  beqzc16 $6, 31           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+  beqzc16 $6, 130          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+  bnezc16 $9, 20           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  bnezc16 $6, 31           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+  bnezc16 $6, 130          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+  break -1                 # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 1024               # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break -1, 5              # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 1024, 5            # CHECK: :[[@LINE]]:9: error: expected 10-bit unsigned immediate
+  break 7, -1              # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+  break 7, 1024            # CHECK: :[[@LINE]]:12: error: expected 10-bit unsigned immediate
+  break 1023, 1024         # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+  cache -1, 255($7)        # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  cache 32, 255($7)        # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  # FIXME: Check '0 < pos + size <= 32' constraint on ext
+  ext $2, $3, -1, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 32, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 1, 0         # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ext $2, $3, 1, 33        # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ins $2, $3, -1, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ins $2, $3, 32, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
   ei $32                   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swe $33, 8($4)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swe $5, 8($34)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swe $5, 512($4)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lbu16 $9, 8($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lbu16 $3, -2($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lbu16 $3, -2($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lbu16 $16, 8($9)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $9, 4($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $3, 64($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $3, 64($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $16, 4($9)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lsa   $4, $2, $3, 0      # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+  lsa   $4, $2, $3, 5      # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+  lw16  $9, 8($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lw16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $17, 8($10)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  pref -1, 255($7)         # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  pref 32, 255($7)         # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  teq $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  teq $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  teq $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tge $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tgeu $34, $9, 5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $35, 6          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $9, 16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tlt $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tltu $34, $9, 5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $35, 6          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $9, 16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tne $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  teq $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $9, $2          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $9, $2          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wait -1                  # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+  wait 1024                # CHECK: :[[@LINE]]:8: error: expected 10-bit unsigned immediate
+  wrpgpr $34, $4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wrpgpr $3, $33           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wsbh $34, $4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wsbh $3, $33             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  jrcaddiusp 1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 3             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 18            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 31            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 33            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 125           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 132           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  lwm16 $5, $6, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  lwm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  lwm16 $16-$25, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lwm16 $16, 8($sp)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, 8($sp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16-$20, 8($sp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, $ra, 8($fp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $9, 4($16)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $3, 64($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sb16 $16, 4($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $7, 4($9)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $9, 8($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sh16  $16, 8($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $7, 8($9)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $9, 4($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $4, 64($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sw16  $16, 4($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $7, 4($10)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $5, $6, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  swm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  swm16 $16-$25, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  swm16 $16, 8($sp)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, 8($sp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16-$20, 8($sp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, $ra, 8($fp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips32r6/valid.s b/test/MC/Mips/micromips32r6/valid.s
index a49622a507de..62048a91163d 100644
--- a/test/MC/Mips/micromips32r6/valid.s
+++ b/test/MC/Mips/micromips32r6/valid.s
@@ -5,6 +5,15 @@
   addiu $3, $4, 1234       # CHECK: addiu $3, $4, 1234  # encoding: [0x30,0x64,0x04,0xd2]
   addu $3, $4, $5          # CHECK: addu $3, $4, $5     # encoding: [0x00,0xa4,0x19,0x50]
   addiupc $4, 100          # CHECK: addiupc $4, 100     # encoding: [0x78,0x80,0x00,0x19]
+  addiur1sp $7, 4          # CHECK: addiur1sp $7, 4     # encoding: [0x6f,0x83]
+  addiur2 $6, $7, -1       # CHECK: addiur2 $6, $7, -1  # encoding: [0x6f,0x7e]
+  addiur2 $6, $7, 12       # CHECK: addiur2 $6, $7, 12  # encoding: [0x6f,0x76]
+  addius5 $7, -2           # CHECK: addius5 $7, -2      # encoding: [0x4c,0xfc]
+  addiusp -1028            # CHECK: addiusp -1028       # encoding: [0x4f,0xff]
+  addiusp -1032            # CHECK: addiusp -1032       # encoding: [0x4f,0xfd]
+  addiusp 1024             # CHECK: addiusp 1024        # encoding: [0x4c,0x01]
+  addiusp 1028             # CHECK: addiusp 1028        # encoding: [0x4c,0x03]
+  addiusp -16              # CHECK: addiusp -16         # encoding: [0x4f,0xf9]
   aluipc $3, 56            # CHECK: aluipc $3, 56       # encoding: [0x78,0x7f,0x00,0x38]
   and $3, $4, $5           # CHECK: and $3, $4, $5      # encoding: [0x00,0xa4,0x1a,0x50]
   andi $3, $4, 1234        # CHECK: andi $3, $4, 1234   # encoding: [0xd0,0x64,0x04,0xd2]
@@ -17,8 +26,12 @@
   bgtzalc $2, 1332         # CHECK: bgtzalc $2, 1332    # encoding: [0xe0,0x40,0x02,0x9a]
   bltzalc $2, 1332         # CHECK: bltzalc $2, 1332    # encoding: [0xe0,0x42,0x02,0x9a]
   blezalc $2, 1332         # CHECK: blezalc $2, 1332    # encoding: [0xc0,0x40,0x02,0x9a]
-  balc 14572256            # CHECK: balc 14572256       # encoding: [0xb4,0x37,0x96,0xb8]
-  bc 14572256              # CHECK: bc 14572256         # encoding: [0x94,0x37,0x96,0xb8]
+  balc 7286128             # CHECK: balc 7286128        # encoding: [0xb4,0x37,0x96,0xb8]
+  b 132                    # CHECK: bc16 132            # encoding: [0xcc,0x42]
+  bc 7286128               # CHECK: bc 7286128          # encoding: [0x94,0x37,0x96,0xb8]
+  bc16 132                 # CHECK: bc16 132            # encoding: [0xcc,0x42]
+  beqzc16 $6, 20           # CHECK: beqzc16 $6, 20      # encoding: [0x8f,0x0a]
+  bnezc16 $6, 20           # CHECK: bnezc16 $6, 20      # encoding: [0xaf,0x0a]
   bitswap $4, $2           # CHECK: bitswap $4, $2      # encoding: [0x00,0x44,0x0b,0x3c]
   break                    # CHECK: break               # encoding: [0x00,0x00,0x00,0x07]
   break 7                  # CHECK: break 7             # encoding: [0x00,0x07,0x00,0x07]
@@ -32,12 +45,20 @@
   ei                       # CHECK: ei                  # encoding: [0x00,0x00,0x57,0x7c]
   ei $0                    # CHECK: ei                  # encoding: [0x00,0x00,0x57,0x7c]
   ei $10                   # CHECK: ei $10              # encoding: [0x00,0x0a,0x57,0x7c]
+  di                       # CHECK: di                  # encoding: [0x00,0x00,0x47,0x7c]
+  di $0                    # CHECK: di                  # encoding: [0x00,0x00,0x47,0x7c]
+  di $15                   # CHECK: di $15              # encoding: [0x00,0x0f,0x47,0x7c]
   eret                     # CHECK: eret                # encoding: [0x00,0x00,0xf3,0x7c]
   eretnc                   # CHECK: eretnc              # encoding: [0x00,0x01,0xf3,0x7c]
+  jalr $9                  # CHECK: jalr $9             # encoding: [0x45,0x2b]
   jialc $5, 256            # CHECK: jialc $5, 256       # encoding: [0x80,0x05,0x01,0x00]
   jic   $5, 256            # CHECK: jic $5, 256         # encoding: [0xa0,0x05,0x01,0x00]
-  lsa $2, $3, $4, 3        # CHECK: lsa  $2, $3, $4, 3  # encoding: [0x00,0x43,0x26,0x0f]
+  jrc16 $9                 # CHECK: jrc16 $9            # encoding: [0x45,0x23]
+  jrcaddiusp 20            # CHECK: jrcaddiusp 20       # encoding: [0x44,0xb3]
+  lsa $2, $3, $4, 3        # CHECK: lsa  $2, $3, $4, 3  # encoding: [0x00,0x43,0x24,0x0f]
   lwpc    $2,268           # CHECK: lwpc $2, 268        # encoding: [0x78,0x48,0x00,0x43]
+  lwm $16, $17, $ra, 8($sp)   # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+  lwm16 $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
   mod $3, $4, $5           # CHECK: mod $3, $4, $5      # encoding: [0x00,0xa4,0x19,0x58]
   modu $3, $4, $5          # CHECK: modu $3, $4, $5     # encoding: [0x00,0xa4,0x19,0xd8]
   mul $3, $4, $5           # CHECK mul $3, $4, $5       # encoding: [0x00,0xa4,0x18,0x18]
@@ -49,13 +70,183 @@
   or $3, $4, $5            # CHECK: or $3, $4, $5       # encoding: [0x00,0xa4,0x1a,0x90]
   ori $3, $4, 1234         # CHECK: ori $3, $4, 1234    # encoding: [0x50,0x64,0x04,0xd2]
   pref 1, 8($5)            # CHECK: pref 1, 8($5)       # encoding: [0x60,0x25,0x20,0x08]
+  sb16 $3, 4($16)          # CHECK: sb16 $3, 4($16)     # encoding: [0x89,0x84]
   seb $3, $4               # CHECK: seb $3, $4          # encoding: [0x00,0x64,0x2b,0x3c]
   seh $3, $4               # CHECK: seh $3, $4          # encoding: [0x00,0x64,0x3b,0x3c]
   seleqz $2,$3,$4          # CHECK: seleqz $2, $3, $4   # encoding: [0x00,0x83,0x11,0x40]
   selnez $2,$3,$4          # CHECK: selnez $2, $3, $4   # encoding: [0x00,0x83,0x11,0x80]
+  sh16 $4, 8($17)          # CHECK: sh16 $4, 8($17)     # encoding: [0xaa,0x14]
   sll $4, $3, 7            # CHECK: sll $4, $3, 7       # encoding: [0x00,0x83,0x38,0x00]
   sub $3, $4, $5           # CHECK: sub $3, $4, $5      # encoding: [0x00,0xa4,0x19,0x90]
   subu $3, $4, $5          # CHECK: subu $3, $4, $5     # encoding: [0x00,0xa4,0x19,0xd0]
+  sw $4, 124($sp)          # CHECK: sw $4, 124($sp)     # encoding: [0xc8,0x9f]
+  sw16 $4, 4($17)          # CHECK: sw16 $4, 4($17)     # encoding: [0xea,0x11]
+  sw16 $0, 4($17)          # CHECK: sw16 $zero, 4($17)  # encoding: [0xe8,0x11]
+  swm $16, $17, $ra, 8($sp)   # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+  swm16 $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+  wrpgpr $3, $4            # CHECK: wrpgpr $3, $4       # encoding: [0x00,0x64,0xf1,0x7c]
+  wsbh $3, $4              # CHECK: wsbh $3, $4         # encoding: [0x00,0x64,0x7b,0x3c]
+  pause                    # CHECK: pause               # encoding: [0x00,0x00,0x28,0x00]
+  rdhwr $5, $29, 2         # CHECK: rdhwr $5, $29, 2    # encoding: [0x00,0xbd,0x11,0xc0]
+  rdhwr $5, $29, 0         # CHECK: rdhwr $5, $29       # encoding: [0x00,0xbd,0x01,0xc0]
+  rdhwr $5, $29            # CHECK: rdhwr $5, $29       # encoding: [0x00,0xbd,0x01,0xc0]
+  wait                     # CHECK: wait                # encoding: [0x00,0x00,0x93,0x7c]
+  wait 17                  # CHECK: wait 17             # encoding: [0x00,0x11,0x93,0x7c]
+  ssnop                    # CHECK: ssnop               # encoding: [0x00,0x00,0x08,0x00]
+  sync                     # CHECK: sync                # encoding: [0x00,0x00,0x6b,0x7c]
+  sync 17                  # CHECK: sync 17             # encoding: [0x00,0x11,0x6b,0x7c]
+  synci 8($5)              # CHECK: synci 8($5)         # encoding: [0x41,0x85,0x00,0x08]
+  rdpgpr $3, $9            # CHECK: $3, $9              # encoding: [0x00,0x69,0xe1,0x7c]
+  sdbbp                    # CHECK: sdbbp               # encoding: [0x00,0x00,0xdb,0x7c]
+  sdbbp 34                 # CHECK: sdbbp 34            # encoding: [0x00,0x22,0xdb,0x7c]
   xor $3, $4, $5           # CHECK: xor $3, $4, $5      # encoding: [0x00,0xa4,0x1b,0x10]
   xori $3, $4, 1234        # CHECK: xori $3, $4, 1234   # encoding: [0x70,0x64,0x04,0xd2]
-
+  sw $5, 4($6)             # CHECK: sw $5, 4($6)        # encoding: [0xf8,0xa6,0x00,0x04]
+  swe $5, 8($4)            # CHECK: swe $5, 8($4)       # encoding: [0x60,0xa4,0xae,0x08]
+  add.s $f3, $f4, $f5      # CHECK: add.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x30]
+  add.d $f2, $f4, $f6      # CHECK: add.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x30]
+  sub.s $f3, $f4, $f5      # CHECK: sub.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x70]
+  sub.d $f2, $f4, $f6      # CHECK: sub.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x70]
+  mul.s $f3, $f4, $f5      # CHECK: mul.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xb0]
+  mul.d $f2, $f4, $f6      # CHECK: mul.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xb0]
+  div.s $f3, $f4, $f5      # CHECK: div.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xf0]
+  div.d $f2, $f4, $f6      # CHECK: div.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xf0]
+  maddf.s $f3, $f4, $f5    # CHECK: maddf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xb8]
+  maddf.d $f3, $f4, $f5    # CHECK: maddf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xb8]
+  msubf.s $f3, $f4, $f5    # CHECK: msubf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xf8]
+  msubf.d $f3, $f4, $f5    # CHECK: msubf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xf8]
+  mov.s $f6, $f7           # CHECK: mov.s $f6, $f7      # encoding: [0x54,0xc7,0x00,0x7b]
+  mov.d $f4, $f6           # CHECK: mov.d $f4, $f6      # encoding: [0x54,0x86,0x20,0x7b]
+  neg.s $f6, $f7           # CHECK: neg.s $f6, $f7      # encoding: [0x54,0xc7,0x0b,0x7b]
+  neg.d $f4, $f6           # CHECK: neg.d $f4, $f6      # encoding: [0x54,0x86,0x2b,0x7b]
+  max.s $f5, $f4, $f3      # CHECK: max.s $f5, $f4, $f3      # encoding: [0x54,0x64,0x28,0x0b]
+  max.d $f5, $f4, $f3      # CHECK: max.d $f5, $f4, $f3      # encoding: [0x54,0x64,0x2a,0x0b]
+  maxa.s $f5, $f4, $f3     # CHECK: maxa.s $f5, $f4, $f3     # encoding: [0x54,0x64,0x28,0x2b]
+  maxa.d $f5, $f4, $f3     # CHECK: maxa.d $f5, $f4, $f3     # encoding: [0x54,0x64,0x2a,0x2b]
+  min.s $f5, $f4, $f3      # CHECK: min.s $f5, $f4, $f3      # encoding: [0x54,0x64,0x28,0x03]
+  min.d $f5, $f4, $f3      # CHECK: min.d $f5, $f4, $f3      # encoding: [0x54,0x64,0x2a,0x03]
+  mina.s $f5, $f4, $f3     # CHECK: mina.s $f5, $f4, $f3     # encoding: [0x54,0x64,0x28,0x23]
+  mina.d $f5, $f4, $f3     # CHECK: mina.d $f5, $f4, $f3     # encoding: [0x54,0x64,0x2a,0x23]
+  cmp.af.s $f2, $f3, $f4   # CHECK: cmp.af.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x05]
+  cmp.af.d $f2, $f3, $f4   # CHECK: cmp.af.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x15]
+  cmp.un.s $f2, $f3, $f4   # CHECK: cmp.un.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x45]
+  cmp.un.d $f2, $f3, $f4   # CHECK: cmp.un.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x55]
+  cmp.eq.s $f2, $f3, $f4   # CHECK: cmp.eq.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x85]
+  cmp.eq.d $f2, $f3, $f4   # CHECK: cmp.eq.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x95]
+  cmp.ueq.s $f2, $f3, $f4  # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x10,0xc5]
+  cmp.ueq.d $f2, $f3, $f4  # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x10,0xd5]
+  cmp.lt.s $f2, $f3, $f4   # CHECK: cmp.lt.s  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x05]
+  cmp.lt.d $f2, $f3, $f4   # CHECK: cmp.lt.d  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x15]
+  cmp.ult.s $f2, $f3, $f4  # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x45]
+  cmp.ult.d $f2, $f3, $f4  # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x55]
+  cmp.le.s $f2, $f3, $f4   # CHECK: cmp.le.s  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x85]
+  cmp.le.d $f2, $f3, $f4   # CHECK: cmp.le.d  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x95]
+  cmp.ule.s $f2, $f3, $f4  # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0xc5]
+  cmp.ule.d $f2, $f3, $f4  # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0xd5]
+  cmp.saf.s $f2, $f3, $f4  # CHECK: cmp.saf.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x05]
+  cmp.saf.d $f2, $f3, $f4  # CHECK: cmp.saf.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x15]
+  cmp.sun.s $f2, $f3, $f4  # CHECK: cmp.sun.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x45]
+  cmp.sun.d $f2, $f3, $f4  # CHECK: cmp.sun.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x55]
+  cmp.seq.s $f2, $f3, $f4  # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x85]
+  cmp.seq.d $f2, $f3, $f4  # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x95]
+  cmp.sueq.s $f2, $f3, $f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xc5]
+  cmp.sueq.d $f2, $f3, $f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xd5]
+  cmp.slt.s $f2, $f3, $f4  # CHECK: cmp.slt.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x05]
+  cmp.slt.d $f2, $f3, $f4  # CHECK: cmp.slt.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x15]
+  cmp.sult.s $f2, $f3, $f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x45]
+  cmp.sult.d $f2, $f3, $f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x55]
+  cmp.sle.s $f2, $f3, $f4  # CHECK: cmp.sle.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x85]
+  cmp.sle.d $f2, $f3, $f4  # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x95]
+  cmp.sule.s $f2, $f3, $f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xc5]
+  cmp.sule.d $f2, $f3, $f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xd5]
+  cvt.l.s $f3, $f4         # CHECK: cvt.l.s $f3, $f4         # encoding: [0x54,0x64,0x01,0x3b]
+  cvt.l.d $f3, $f4         # CHECK: cvt.l.d $f3, $f4         # encoding: [0x54,0x64,0x41,0x3b]
+  cvt.w.s $f3, $f4         # CHECK: cvt.w.s $f3, $f4         # encoding: [0x54,0x64,0x09,0x3b]
+  cvt.w.d $f3, $f4         # CHECK: cvt.w.d $f3, $f4         # encoding: [0x54,0x64,0x49,0x3b]
+  cvt.d.s $f2, $f4         # CHECK: cvt.d.s $f2, $f4         # encoding: [0x54,0x44,0x13,0x7b]
+  cvt.d.w $f2, $f4         # CHECK: cvt.d.w $f2, $f4         # encoding: [0x54,0x44,0x33,0x7b]
+  cvt.d.l $f2, $f4         # CHECK: cvt.d.l $f2, $f4         # encoding: [0x54,0x44,0x53,0x7b]
+  cvt.s.d $f2, $f4         # CHECK: cvt.s.d $f2, $f4         # encoding: [0x54,0x44,0x1b,0x7b]
+  cvt.s.w $f3, $f4         # CHECK: cvt.s.w $f3, $f4         # encoding: [0x54,0x64,0x3b,0x7b]
+  cvt.s.l $f3, $f4         # CHECK: cvt.s.l $f3, $f4         # encoding: [0x54,0x64,0x5b,0x7b]
+  abs.s $f3, $f5           # CHECK: abs.s $f3, $f5      # encoding: [0x54,0x65,0x03,0x7b]
+  abs.d $f2, $f4           # CHECK: abs.d $f2, $f4      # encoding: [0x54,0x44,0x23,0x7b]
+  floor.l.s $f3, $f5       # CHECK: floor.l.s $f3, $f5  # encoding: [0x54,0x65,0x03,0x3b]
+  floor.l.d $f2, $f4       # CHECK: floor.l.d $f2, $f4  # encoding: [0x54,0x44,0x43,0x3b]
+  floor.w.s $f3, $f5       # CHECK: floor.w.s $f3, $f5  # encoding: [0x54,0x65,0x0b,0x3b]
+  floor.w.d $f2, $f4       # CHECK: floor.w.d $f2, $f4  # encoding: [0x54,0x44,0x4b,0x3b]
+  ceil.l.s $f3, $f5        # CHECK: ceil.l.s $f3, $f5   # encoding: [0x54,0x65,0x13,0x3b]
+  ceil.l.d $f2, $f4        # CHECK: ceil.l.d $f2, $f4   # encoding: [0x54,0x44,0x53,0x3b]
+  ceil.w.s $f3, $f5        # CHECK: ceil.w.s $f3, $f5   # encoding: [0x54,0x65,0x1b,0x3b]
+  ceil.w.d $f2, $f4        # CHECK: ceil.w.d $f2, $f4   # encoding: [0x54,0x44,0x5b,0x3b]
+  trunc.l.s $f3, $f5       # CHECK: trunc.l.s $f3, $f5  # encoding: [0x54,0x65,0x23,0x3b]
+  trunc.l.d $f2, $f4       # CHECK: trunc.l.d $f2, $f4  # encoding: [0x54,0x44,0x63,0x3b]
+  trunc.w.s $f3, $f5       # CHECK: trunc.w.s $f3, $f5  # encoding: [0x54,0x65,0x2b,0x3b]
+  trunc.w.d $f2, $f4       # CHECK: trunc.w.d $f2, $f4  # encoding: [0x54,0x44,0x6b,0x3b]
+  sqrt.s $f3, $f5          # CHECK: sqrt.s $f3, $f5     # encoding: [0x54,0x65,0x0a,0x3b]
+  sqrt.d $f2, $f4          # CHECK: sqrt.d $f2, $f4     # encoding: [0x54,0x44,0x4a,0x3b]
+  rsqrt.s $f3, $f5         # CHECK: rsqrt.s $f3, $f5    # encoding: [0x54,0x65,0x02,0x3b]
+  rsqrt.d $f2, $f4         # CHECK: rsqrt.d $f2, $f4    # encoding: [0x54,0x44,0x42,0x3b]
+  lw $3, 32($gp)           # CHECK: lw $3, 32($gp)      # encoding: [0x65,0x88]
+  lw $3, 24($sp)           # CHECK: lw $3, 24($sp)      # encoding: [0x48,0x66]
+  lw16 $4, 8($17)          # CHECK: lw16 $4, 8($17)     # encoding: [0x6a,0x12]
+  lhu16 $3, 4($16)         # CHECK: lhu16 $3, 4($16)    # encoding: [0x29,0x82]
+  lbu16 $3, 4($17)         # CHECK: lbu16 $3, 4($17)    # encoding: [0x09,0x94]
+  lbu16 $3, -1($17)        # CHECK: lbu16 $3, -1($17)   # encoding: [0x09,0x9f]
+  sb  $4, 6($5)            # CHECK: sb  $4, 6($5)       # encoding: [0x18,0x85,0x00,0x06]
+  sbe $4, 6($5)            # CHECK: sbe $4, 6($5)       # encoding: [0x60,0x85,0xa8,0x06]
+  sce $4, 6($5)            # CHECK: sce $4, 6($5)       # encoding: [0x60,0x85,0xac,0x06]
+  sh $4, 6($5)             # CHECK: sh $4, 6($5)        # encoding: [0x38,0x85,0x00,0x06]
+  she $4, 6($5)            # CHECK: she $4, 6($5)       # encoding: [0x60,0x85,0xaa,0x06]
+  lle $4, 6($5)            # CHECK: lle $4, 6($5)       # encoding: [0x60,0x85,0x6c,0x06]
+  lwe $4, 6($5)            # CHECK: lwe $4, 6($5)       # encoding: [0x60,0x85,0x6e,0x06]
+  lw $4, 6($5)             # CHECK: lw $4, 6($5)        # encoding: [0xfc,0x85,0x00,0x06]
+  lui $6, 17767            # CHECK: lui $6, 17767       # encoding: [0x10,0xc0,0x45,0x67]
+  addu16 $6, $17, $4       # CHECK: addu16 $6, $17, $4  # encoding: [0x04,0xcc]
+  and16 $16, $2            # CHECK: and16 $16, $2       # encoding: [0x44,0x21]
+  andi16 $4, $5, 8         # CHECK: andi16 $4, $5, 8    # encoding: [0x2e,0x56]
+  not16 $4, $7             # CHECK: not16 $4, $7        # encoding: [0x46,0x70]
+  or16 $3, $7              # CHECK: or16 $3, $7         # encoding: [0x45,0xf9]
+  sll16 $3, $6, 8          # CHECK: sll16 $3, $6, 8     # encoding: [0x25,0xe0]
+  srl16 $3, $6, 8          # CHECK: srl16 $3, $6, 8     # encoding: [0x25,0xe1]
+  prefe 1, 8($5)           # CHECK: prefe 1, 8($5)      # encoding: [0x60,0x25,0xa4,0x08]
+  cachee 1, 8($5)          # CHECK: cachee 1, 8($5)     # encoding: [0x60,0x25,0xa6,0x08]
+  teq $8, $9               # CHECK: teq $8, $9          # encoding: [0x01,0x28,0x00,0x3c]
+  teq $5, $7, 15           # CHECK: teq $5, $7, 15      # encoding: [0x00,0xe5,0xf0,0x3c]
+  tge $7, $10              # CHECK: tge $7, $10         # encoding: [0x01,0x47,0x02,0x3c]
+  tge $7, $19, 15          # CHECK: tge $7, $19, 15     # encoding: [0x02,0x67,0xf2,0x3c]
+  tgeu $22, $gp            # CHECK: tgeu $22, $gp       # encoding: [0x03,0x96,0x04,0x3c]
+  tgeu $20, $14, 15        # CHECK: tgeu $20, $14, 15   # encoding: [0x01,0xd4,0xf4,0x3c]
+  tlt $15, $13             # CHECK: tlt $15, $13        # encoding: [0x01,0xaf,0x08,0x3c]
+  tlt $2, $19, 15          # CHECK: tlt $2, $19, 15     # encoding: [0x02,0x62,0xf8,0x3c]
+  tltu $11, $16            # CHECK: tltu $11, $16       # encoding: [0x02,0x0b,0x0a,0x3c]
+  tltu $16, $sp, 15        # CHECK: tltu $16, $sp, 15   # encoding: [0x03,0xb0,0xfa,0x3c]
+  tne $6, $17              # CHECK: tne $6, $17         # encoding: [0x02,0x26,0x0c,0x3c]
+  tne $7, $8, 15           # CHECK: tne $7, $8, 15      # encoding: [0x01,0x07,0xfc,0x3c]
+  break16 8                # CHECK: break16 8           # encoding: [0x46,0x1b]
+  li16 $3, -1              # CHECK: li16 $3, -1         # encoding: [0xed,0xff]
+  move16 $3, $5            # CHECK: move16 $3, $5       # encoding: [0x0c,0x65]
+  sdbbp16 8                # CHECK: sdbbp16 8           # encoding: [0x46,0x3b]
+  subu16 $5, $16, $3       # CHECK: subu16 $5, $16, $3  # encoding: [0x04,0x3b]
+  xor16 $17, $5            # CHECK: xor16 $17, $5       # encoding: [0x44,0xd8]
+  lb $4, 8($5)             # CHECK: lb $4, 8($5)        # encoding: [0x1c,0x85,0x00,0x08]
+  lbu $4, 8($5)            # CHECK: lbu $4, 8($5)       # encoding: [0x14,0x85,0x00,0x08]
+  lbe $4, 8($5)            # CHECK: lbe $4, 8($5)       # encoding: [0x60,0x85,0x68,0x08]
+  lbue $4, 8($5)           # CHECK: lbue $4, 8($5)      # encoding: [0x60,0x85,0x60,0x08]
+  recip.s $f2, $f4         # CHECK: recip.s $f2, $f4    # encoding: [0x54,0x44,0x12,0x3b]
+  recip.d $f2, $f4         # CHECK: recip.d $f2, $f4    # encoding: [0x54,0x44,0x52,0x3b]
+  rint.s $f2, $f4          # CHECK: rint.s $f2, $f4     # encoding: [0x54,0x82,0x00,0x20]
+  rint.d $f2, $f4          # CHECK: rint.d $f2, $f4     # encoding: [0x54,0x82,0x02,0x20]
+  round.l.s $f2, $f4       # CHECK: round.l.s $f2, $f4  # encoding: [0x54,0x44,0x33,0x3b]
+  round.l.d $f2, $f4       # CHECK: round.l.d $f2, $f4  # encoding: [0x54,0x44,0x73,0x3b]
+  round.w.s $f2, $f4       # CHECK: round.w.s $f2, $f4  # encoding: [0x54,0x44,0x3b,0x3b]
+  round.w.d $f2, $f4       # CHECK: round.w.d $f2, $f4  # encoding: [0x54,0x44,0x7b,0x3b]
+  sel.s $f1, $f1, $f2      # CHECK: sel.s $f1, $f1, $f2 # encoding: [0x54,0x41,0x08,0xb8]
+  sel.d $f0, $f2, $f4      # CHECK: sel.d $f0, $f2, $f4 # encoding: [0x54,0x82,0x02,0xb8]
+  seleqz.s $f1, $f2, $f3   # CHECK: seleqz.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x38]
+  seleqz.d $f2, $f4, $f8   # CHECK: seleqz.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x38]
+  selnez.s $f1, $f2, $f3   # CHECK: selnez.s $f1, $f2, $f3 # encoding: [0x54,0x62,0x08,0x78]
+  selnez.d $f2, $f4, $f8   # CHECK: selnez.d $f2, $f4, $f8 # encoding: [0x55,0x04,0x12,0x78]
+  class.s $f2, $f3         # CHECK: class.s $f2, $f3       # encoding: [0x54,0x62,0x00,0x60]
+  class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x54,0x82,0x02,0x60]
+  deret                    # CHECK: deret                  # encoding: [0x00,0x00,0xe3,0x7c]
diff --git a/test/MC/Mips/micromips64r6/invalid.s b/test/MC/Mips/micromips64r6/invalid.s
new file mode 100644
index 000000000000..df1005dfa0b1
--- /dev/null
+++ b/test/MC/Mips/micromips64r6/invalid.s
@@ -0,0 +1,145 @@
+# RUN: not llvm-mc %s -triple=mips -show-encoding -mcpu=mips64r6 -mattr=micromips 2>%t1
+# RUN: FileCheck %s < %t1
+
+  addiur1sp $7, 260        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiur1sp $7, 241        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: misaligned immediate operand value
+  addiur1sp $8, 240        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur2 $9, $7, -1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  addiur2 $6, $7, 10       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addius5 $7, 9            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  addiusp 1032             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  align $4, $2, $3, -1     # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+  align $4, $2, $3, 4      # CHECK: :[[@LINE]]:21: error: expected 2-bit unsigned immediate
+  beqzc16 $9, 20           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  beqzc16 $6, 31           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+  beqzc16 $6, 130          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+  bnezc16 $9, 20           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  bnezc16 $6, 31           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch to misaligned address
+  bnezc16 $6, 130          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: branch target out of range
+  cache -1, 255($7)        # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  cache 32, 255($7)        # CHECK: :[[@LINE]]:9: error: expected 5-bit unsigned immediate
+  # FIXME: Check various 'pos + size' constraints on dext*
+  dext $2, $3, -1, 1   # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+  dext $2, $3, 32, 1   # CHECK: :[[@LINE]]:16: error: expected 5-bit unsigned immediate
+  dext $2, $3, 1, 0    # CHECK: :[[@LINE]]:19: error: expected immediate in range 1 .. 32
+  dext $2, $3, 1, 33   # CHECK: :[[@LINE]]:19: error: expected immediate in range 1 .. 32
+  dextm $2, $3, -1, 1  # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dextm $2, $3, 32, 1  # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dextm $2, $3, -1, 33 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dextm $2, $3, 32, 33 # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dextm $2, $3, 1, 32  # CHECK: :[[@LINE]]:20: error: expected immediate in range 33 .. 64
+  dextm $2, $3, 1, 65  # CHECK: :[[@LINE]]:20: error: expected immediate in range 33 .. 64
+  dextu $2, $3, 31, 1  # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+  dextu $2, $3, 64, 1  # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+  dextu $2, $3, 32, 0  # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 32
+  dextu $2, $3, 32, 33 # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 32
+  # FIXME: Check size on dins*
+  dins $2, $3, -1, 1   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+  dins $2, $3, 64, 1   # CHECK: :[[@LINE]]:16: error: expected 6-bit unsigned immediate
+  dinsm $2, $3, -1, 1  # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dinsm $2, $3, 32, 1  # CHECK: :[[@LINE]]:17: error: expected 5-bit unsigned immediate
+  dinsu $2, $3, 31, 1  # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+  dinsu $2, $3, 64, 1  # CHECK: :[[@LINE]]:17: error: expected immediate in range 32 .. 63
+  # FIXME: Check '0 < pos + size <= 32' constraint on ext
+  ext $2, $3, -1, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 32, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ext $2, $3, 1, 0         # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ext $2, $3, 1, 33        # CHECK: :[[@LINE]]:18: error: expected immediate in range 1 .. 32
+  ins $2, $3, -1, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  ins $2, $3, 32, 31       # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+  dalign  $4, $2, $3, -1   # CHECK: :[[@LINE]]:23: error: expected 3-bit unsigned immediate
+  dalign  $4, $2, $3, 8    # CHECK: :[[@LINE]]:23: error: expected 3-bit unsigned immediate
+  lbu16 $9, 8($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lbu16 $3, -2($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lbu16 $3, -2($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lbu16 $16, 8($9)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $9, 4($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lhu16 $3, 64($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $3, 64($16)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lhu16 $16, 4($9)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lsa   $4, $2, $3, 0      # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+  lsa   $4, $2, $3, 5      # CHECK: :[[@LINE]]:21: error: expected immediate in range 1 .. 4
+  lw16  $9, 8($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lw16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  lw16  $17, 8($10)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddiv $32, $4, $5         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddiv $3, $34, $5         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddiv $3, $4, $35         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmod $32, $4, $5         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmod $3, $34, $5         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmod $3, $4, $35         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddivu $32, $4, $5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddivu $3, $34, $5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  ddivu $3, $4, $35        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmodu $32, $4, $5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmodu $3, $34, $5        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  dmodu $3, $4, $35        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  pref -1, 255($7)         # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  pref 32, 255($7)         # CHECK: :[[@LINE]]:8: error: expected 5-bit unsigned immediate
+  teq $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  teq $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  teq $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tge $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tgeu $34, $9, 5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $35, 6          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $9, 16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tlt $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tltu $34, $9, 5          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $35, 6          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $9, 16          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  tne $34, $9, 5           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $35, 6           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $9, 16           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  teq $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tge $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tgeu $8, $9, $2          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tlt $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tltu $8, $9, $2          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  tne $8, $9, $2           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wrpgpr $34, $4           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wrpgpr $3, $33           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wsbh $34, $4             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  wsbh $3, $33             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  jrcaddiusp 1             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 2             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 3             # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 10            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 18            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 31            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 33            # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 125           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 128           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  jrcaddiusp 132           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: expected both 7-bit unsigned immediate and multiple of 4
+  lwm16 $5, $6, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  lwm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  lwm16 $16-$25, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  lwm16 $16, 8($sp)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, 8($sp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16-$20, 8($sp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, $ra, 8($fp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  lwm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $9, 4($16)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $3, 64($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sb16 $16, 4($16)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sb16 $7, 4($9)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $9, 8($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $4, 68($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sh16  $16, 8($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sh16  $7, 8($9)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $9, 4($17)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $4, 64($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: immediate operand value out of range
+  sw16  $16, 4($17)        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  sw16  $7, 4($10)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $5, $6, $ra, 8($sp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: $16 or $31 expected
+  swm16 $16, $19, $ra, 8($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: consecutive register numbers expected
+  swm16 $16-$25, $ra, 8($sp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid register operand
+  swm16 $16, 8($sp)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, 8($sp)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16-$20, 8($sp)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, $ra, 8($fp)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+  swm16 $16, $17, $ra, 64($sp) # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/micromips64r6/valid.s b/test/MC/Mips/micromips64r6/valid.s
new file mode 100644
index 000000000000..b41c86bd553c
--- /dev/null
+++ b/test/MC/Mips/micromips64r6/valid.s
@@ -0,0 +1,154 @@
+# RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r6 -mattr=micromips | FileCheck %s
+a:
+        .set noat
+        addiur1sp $7, 4          # CHECK: addiur1sp $7, 4     # encoding: [0x6f,0x83]
+        addiur2 $6, $7, -1       # CHECK: addiur2 $6, $7, -1  # encoding: [0x6f,0x7e]
+        addiur2 $6, $7, 12       # CHECK: addiur2 $6, $7, 12  # encoding: [0x6f,0x76]
+        addius5 $7, -2           # CHECK: addius5 $7, -2      # encoding: [0x4c,0xfc]
+        addiusp -1028            # CHECK: addiusp -1028       # encoding: [0x4f,0xff]
+        addiusp -1032            # CHECK: addiusp -1032       # encoding: [0x4f,0xfd]
+        addiusp 1024             # CHECK: addiusp 1024        # encoding: [0x4c,0x01]
+        addiusp 1028             # CHECK: addiusp 1028        # encoding: [0x4c,0x03]
+        addiusp -16              # CHECK: addiusp -16         # encoding: [0x4f,0xf9]
+        b 132                    # CHECK: bc16 132            # encoding: [0xcc,0x42]
+        bc16 132                 # CHECK: bc16 132            # encoding: [0xcc,0x42]
+        beqzc16 $6, 20           # CHECK: beqzc16 $6, 20      # encoding: [0x8f,0x0a]
+        bnezc16 $6, 20           # CHECK: bnezc16 $6, 20      # encoding: [0xaf,0x0a]
+        daui $3, $4, 5           # CHECK: daui $3, $4, 5      # encoding: [0xf0,0x64,0x00,0x05]
+        dahi $3, 4               # CHECK: dahi $3, 4          # encoding: [0x42,0x23,0x00,0x04]
+        dati $3, 4               # CHECK: dati $3, 4          # encoding: [0x42,0x03,0x00,0x04]
+        dext $9, $6, 3, 7        # CHECK: dext $9, $6, 3, 7   # encoding: [0x59,0x26,0x30,0xec]
+        dextm $9, $6, 3, 39      # CHECK: dextm $9, $6, 3, 39 # encoding: [0x59,0x26,0x30,0xe4]
+        dextu $9, $6, 35, 7      # CHECK: dextu $9, $6, 35, 7  # encoding: [0x59,0x26,0x30,0xd4]
+        dalign $4, $2, $3, 5     # CHECK: dalign $4, $2, $3, 5  # encoding: [0x58,0x43,0x25,0x1c]
+        lw $3, 32($gp)           # CHECK: lw $3, 32($gp)        # encoding: [0x65,0x88]
+        lw $3, 24($sp)           # CHECK: lw $3, 24($sp)        # encoding: [0x48,0x66]
+        lw16 $4, 8($17)          # CHECK: lw16 $4, 8($17)       # encoding: [0x6a,0x12]
+        lhu16 $3, 4($16)         # CHECK: lhu16 $3, 4($16)      # encoding: [0x29,0x82]
+        lbu16 $3, 4($17)         # CHECK: lbu16 $3, 4($17)      # encoding: [0x09,0x94]
+        lbu16 $3, -1($17)        # CHECK: lbu16 $3, -1($17)     # encoding: [0x09,0x9f]
+        ddiv $3, $4, $5          # CHECK: ddiv $3, $4, $5     # encoding: [0x58,0x64,0x29,0x18]
+        dmod $3, $4, $5          # CHECK: dmod $3, $4, $5     # encoding: [0x58,0x64,0x29,0x58]
+        ddivu $3, $4, $5         # CHECK: ddivu $3, $4, $5    # encoding: [0x58,0x64,0x29,0x98]
+        dmodu $3, $4, $5         # CHECK: dmodu $3, $4, $5    # encoding: [0x58,0x64,0x29,0xd8]
+        add.s $f3, $f4, $f5      # CHECK: add.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x30]
+        add.d $f2, $f4, $f6      # CHECK: add.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x30]
+        sub.s $f3, $f4, $f5      # CHECK: sub.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0x70]
+        sub.d $f2, $f4, $f6      # CHECK: sub.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0x70]
+        mul.s $f3, $f4, $f5      # CHECK: mul.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xb0]
+        mul.d $f2, $f4, $f6      # CHECK: mul.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xb0]
+        div.s $f3, $f4, $f5      # CHECK: div.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x18,0xf0]
+        div.d $f2, $f4, $f6      # CHECK: div.d $f2, $f4, $f6 # encoding: [0x54,0xc4,0x11,0xf0]
+        maddf.s $f3, $f4, $f5    # CHECK: maddf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xb8]
+        maddf.d $f3, $f4, $f5    # CHECK: maddf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xb8]
+        msubf.s $f3, $f4, $f5    # CHECK: msubf.s $f3, $f4, $f5 # encoding: [0x54,0xa4,0x19,0xf8]
+        msubf.d $f3, $f4, $f5    # CHECK: msubf.d $f3, $f4, $f5 # encoding: [0x54,0xa4,0x1b,0xf8]
+        mov.s $f6, $f7           # CHECK: mov.s $f6, $f7      # encoding: [0x54,0xc7,0x00,0x7b]
+        mov.d $f4, $f6           # CHECK: mov.d $f4, $f6      # encoding: [0x54,0x86,0x20,0x7b]
+        neg.s $f6, $f7           # CHECK: neg.s $f6, $f7      # encoding: [0x54,0xc7,0x0b,0x7b]
+        neg.d $f4, $f6           # CHECK: neg.d $f4, $f6      # encoding: [0x54,0x86,0x2b,0x7b]
+        max.s $f5, $f4, $f3      # CHECK: max.s $f5, $f4, $f3      # encoding: [0x54,0x64,0x28,0x0b]
+        max.d $f5, $f4, $f3      # CHECK: max.d $f5, $f4, $f3      # encoding: [0x54,0x64,0x2a,0x0b]
+        maxa.s $f5, $f4, $f3     # CHECK: maxa.s $f5, $f4, $f3     # encoding: [0x54,0x64,0x28,0x2b]
+        maxa.d $f5, $f4, $f3     # CHECK: maxa.d $f5, $f4, $f3     # encoding: [0x54,0x64,0x2a,0x2b]
+        min.s $f5, $f4, $f3      # CHECK: min.s $f5, $f4, $f3      # encoding: [0x54,0x64,0x28,0x03]
+        min.d $f5, $f4, $f3      # CHECK: min.d $f5, $f4, $f3      # encoding: [0x54,0x64,0x2a,0x03]
+        mina.s $f5, $f4, $f3     # CHECK: mina.s $f5, $f4, $f3     # encoding: [0x54,0x64,0x28,0x23]
+        mina.d $f5, $f4, $f3     # CHECK: mina.d $f5, $f4, $f3     # encoding: [0x54,0x64,0x2a,0x23]
+        cmp.af.s $f2, $f3, $f4   # CHECK: cmp.af.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x05]
+        cmp.af.d $f2, $f3, $f4   # CHECK: cmp.af.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x15]
+        cmp.un.s $f2, $f3, $f4   # CHECK: cmp.un.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x45]
+        cmp.un.d $f2, $f3, $f4   # CHECK: cmp.un.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x55]
+        cmp.eq.s $f2, $f3, $f4   # CHECK: cmp.eq.s $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x85]
+        cmp.eq.d $f2, $f3, $f4   # CHECK: cmp.eq.d $f2, $f3, $f4   # encoding: [0x54,0x83,0x10,0x95]
+        cmp.ueq.s $f2, $f3, $f4  # CHECK: cmp.ueq.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x10,0xc5]
+        cmp.ueq.d $f2, $f3, $f4  # CHECK: cmp.ueq.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x10,0xd5]
+        cmp.lt.s $f2, $f3, $f4   # CHECK: cmp.lt.s  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x05]
+        cmp.lt.d $f2, $f3, $f4   # CHECK: cmp.lt.d  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x15]
+        cmp.ult.s $f2, $f3, $f4  # CHECK: cmp.ult.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x45]
+        cmp.ult.d $f2, $f3, $f4  # CHECK: cmp.ult.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x55]
+        cmp.le.s $f2, $f3, $f4   # CHECK: cmp.le.s  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x85]
+        cmp.le.d $f2, $f3, $f4   # CHECK: cmp.le.d  $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0x95]
+        cmp.ule.s $f2, $f3, $f4  # CHECK: cmp.ule.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0xc5]
+        cmp.ule.d $f2, $f3, $f4  # CHECK: cmp.ule.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x11,0xd5]
+        cmp.saf.s $f2, $f3, $f4  # CHECK: cmp.saf.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x05]
+        cmp.saf.d $f2, $f3, $f4  # CHECK: cmp.saf.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x15]
+        cmp.sun.s $f2, $f3, $f4  # CHECK: cmp.sun.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x45]
+        cmp.sun.d $f2, $f3, $f4  # CHECK: cmp.sun.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x55]
+        cmp.seq.s $f2, $f3, $f4  # CHECK: cmp.seq.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x85]
+        cmp.seq.d $f2, $f3, $f4  # CHECK: cmp.seq.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x12,0x95]
+        cmp.sueq.s $f2, $f3, $f4 # CHECK: cmp.sueq.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xc5]
+        cmp.sueq.d $f2, $f3, $f4 # CHECK: cmp.sueq.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x12,0xd5]
+        cmp.slt.s $f2, $f3, $f4  # CHECK: cmp.slt.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x05]
+        cmp.slt.d $f2, $f3, $f4  # CHECK: cmp.slt.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x15]
+        cmp.sult.s $f2, $f3, $f4 # CHECK: cmp.sult.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x45]
+        cmp.sult.d $f2, $f3, $f4 # CHECK: cmp.sult.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0x55]
+        cmp.sle.s $f2, $f3, $f4  # CHECK: cmp.sle.s $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x85]
+        cmp.sle.d $f2, $f3, $f4  # CHECK: cmp.sle.d $f2, $f3, $f4  # encoding: [0x54,0x83,0x13,0x95]
+        cmp.sule.s $f2, $f3, $f4 # CHECK: cmp.sule.s $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xc5]
+        cmp.sule.d $f2, $f3, $f4 # CHECK: cmp.sule.d $f2, $f3, $f4 # encoding: [0x54,0x83,0x13,0xd5]
+        cvt.l.s $f3, $f4         # CHECK: cvt.l.s $f3, $f4         # encoding: [0x54,0x64,0x01,0x3b]
+        cvt.l.d $f3, $f4         # CHECK: cvt.l.d $f3, $f4         # encoding: [0x54,0x64,0x41,0x3b]
+        cvt.w.s $f3, $f4         # CHECK: cvt.w.s $f3, $f4         # encoding: [0x54,0x64,0x09,0x3b]
+        cvt.w.d $f3, $f4         # CHECK: cvt.w.d $f3, $f4         # encoding: [0x54,0x64,0x49,0x3b]
+        cvt.d.s $f2, $f4         # CHECK: cvt.d.s $f2, $f4         # encoding: [0x54,0x44,0x13,0x7b]
+        cvt.d.w $f2, $f4         # CHECK: cvt.d.w $f2, $f4         # encoding: [0x54,0x44,0x33,0x7b]
+        cvt.d.l $f2, $f4         # CHECK: cvt.d.l $f2, $f4         # encoding: [0x54,0x44,0x53,0x7b]
+        cvt.s.d $f2, $f4         # CHECK: cvt.s.d $f2, $f4         # encoding: [0x54,0x44,0x1b,0x7b]
+        cvt.s.w $f3, $f4         # CHECK: cvt.s.w $f3, $f4         # encoding: [0x54,0x64,0x3b,0x7b]
+        cvt.s.l $f3, $f4         # CHECK: cvt.s.l $f3, $f4         # encoding: [0x54,0x64,0x5b,0x7b]
+        teq $8, $9               # CHECK: teq $8, $9          # encoding: [0x01,0x28,0x00,0x3c]
+        teq $5, $7, 15           # CHECK: teq $5, $7, 15      # encoding: [0x00,0xe5,0xf0,0x3c]
+        tge $7, $10              # CHECK: tge $7, $10         # encoding: [0x01,0x47,0x02,0x3c]
+        tge $7, $19, 15          # CHECK: tge $7, $19, 15     # encoding: [0x02,0x67,0xf2,0x3c]
+        tgeu $22, $gp            # CHECK: tgeu $22, $gp       # encoding: [0x03,0x96,0x04,0x3c]
+        tgeu $20, $14, 15        # CHECK: tgeu $20, $14, 15   # encoding: [0x01,0xd4,0xf4,0x3c]
+        tlt $15, $13             # CHECK: tlt $15, $13        # encoding: [0x01,0xaf,0x08,0x3c]
+        tlt $2, $19, 15          # CHECK: tlt $2, $19, 15     # encoding: [0x02,0x62,0xf8,0x3c]
+        tltu $11, $16            # CHECK: tltu $11, $16       # encoding: [0x02,0x0b,0x0a,0x3c]
+        tltu $16, $sp, 15        # CHECK: tltu $16, $sp, 15   # encoding: [0x03,0xb0,0xfa,0x3c]
+        tne $6, $17              # CHECK: tne $6, $17         # encoding: [0x02,0x26,0x0c,0x3c]
+        tne $7, $8, 15           # CHECK: tne $7, $8, 15      # encoding: [0x01,0x07,0xfc,0x3c]
+        cachee 1, 8($5)          # CHECK: cachee 1, 8($5)     # encoding: [0x60,0x25,0xa6,0x08]
+        wrpgpr $3, $4            # CHECK: wrpgpr $3, $4       # encoding: [0x00,0x64,0xf1,0x7c]
+        wsbh $3, $4              # CHECK: wsbh $3, $4         # encoding: [0x00,0x64,0x7b,0x3c]
+        jalr $9                  # CHECK: jalr $9             # encoding: [0x45,0x2b]
+        jrc16 $9                 # CHECK: jrc16 $9            # encoding: [0x45,0x23]
+        jrcaddiusp 20            # CHECK: jrcaddiusp 20       # encoding: [0x44,0xb3]
+        break16 8                # CHECK: break16 8                # encoding: [0x46,0x1b]
+        li16 $3, -1              # CHECK: li16 $3, -1              # encoding: [0xed,0xff]
+        move16 $3, $5            # CHECK: move16 $3, $5            # encoding: [0x0c,0x65]
+        sdbbp16 8                # CHECK: sdbbp16 8                # encoding: [0x46,0x3b]
+        subu16 $5, $16, $3       # CHECK: subu16 $5, $16, $3       # encoding: [0x04,0x3b]
+        xor16 $17, $5            # CHECK: xor16 $17, $5            # encoding: [0x44,0xd8]
+        lwm $16, $17, $ra, 8($sp)   # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+        lwm16 $16, $17, $ra, 8($sp) # CHECK: lwm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x22]
+        sb16 $3, 4($16)          # CHECK: sb16 $3, 4($16)     # encoding: [0x89,0x84]
+        sh16 $4, 8($17)          # CHECK: sh16 $4, 8($17)     # encoding: [0xaa,0x14]
+        sw $4, 124($sp)          # CHECK: sw $4, 124($sp)     # encoding: [0xc8,0x9f]
+        sw16 $4, 4($17)          # CHECK: sw16 $4, 4($17)     # encoding: [0xea,0x11]
+        sw16 $0, 4($17)          # CHECK: sw16 $zero, 4($17)  # encoding: [0xe8,0x11]
+        swm $16, $17, $ra, 8($sp)   # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+        swm16 $16, $17, $ra, 8($sp) # CHECK: swm16 $16, $17, $ra, 8($sp) # encoding: [0x45,0x2a]
+        recip.s $f2, $f4         # CHECK: recip.s $f2, $f4    # encoding: [0x54,0x44,0x12,0x3b]
+        recip.d $f2, $f4         # CHECK: recip.d $f2, $f4    # encoding: [0x54,0x44,0x52,0x3b]
+        rint.s $f2, $f4          # CHECK: rint.s $f2, $f4     # encoding: [0x54,0x82,0x00,0x20]
+        rint.d $f2, $f4          # CHECK: rint.d $f2, $f4     # encoding: [0x54,0x82,0x02,0x20]
+        round.l.s $f2, $f4       # CHECK: round.l.s $f2, $f4  # encoding: [0x54,0x44,0x33,0x3b]
+        round.l.d $f2, $f4       # CHECK: round.l.d $f2, $f4  # encoding: [0x54,0x44,0x73,0x3b]
+        round.w.s $f2, $f4       # CHECK: round.w.s $f2, $f4  # encoding: [0x54,0x44,0x3b,0x3b]
+        round.w.d $f2, $f4       # CHECK: round.w.d $f2, $f4  # encoding: [0x54,0x44,0x7b,0x3b]
+        sel.s $f1, $f1, $f2      # CHECK: sel.s $f1, $f1, $f2 # encoding: [0x54,0x41,0x08,0xb8]
+        sel.d $f0, $f2, $f4      # CHECK: sel.d $f0, $f2, $f4 # encoding: [0x54,0x82,0x02,0xb8]
+        seleqz.s $f1, $f2, $f3   # CHECK: seleqz.s $f1, $f2, $f3  # encoding: [0x54,0x62,0x08,0x38]
+        seleqz.d $f2, $f4, $f8   # CHECK: seleqz.d $f2, $f4, $f8  # encoding: [0x55,0x04,0x12,0x38]
+        selnez.s $f1, $f2, $f3   # CHECK: selnez.s $f1, $f2, $f3  # encoding: [0x54,0x62,0x08,0x78]
+        selnez.d $f2, $f4, $f8   # CHECK: selnez.d $f2, $f4, $f8  # encoding: [0x55,0x04,0x12,0x78]
+        class.s $f2, $f3         # CHECK: class.s $f2, $f3        # encoding: [0x54,0x62,0x00,0x60]
+        class.d $f2, $f4         # CHECK: class.d $f2, $f4        # encoding: [0x54,0x82,0x02,0x60]
+        deret                    # CHECK: deret                   # encoding: [0x00,0x00,0xe3,0x7c]
+        di                       # CHECK: di                      # encoding: [0x00,0x00,0x47,0x7c]
+        di $0                    # CHECK: di                      # encoding: [0x00,0x00,0x47,0x7c]
+        di $15                   # CHECK: di $15                  # encoding: [0x00,0x0f,0x47,0x7c]
+
+1:
diff --git a/test/MC/Mips/mips-alu-instructions.s b/test/MC/Mips/mips-alu-instructions.s
index b25394b39a67..9c133abbae36 100644
--- a/test/MC/Mips/mips-alu-instructions.s
+++ b/test/MC/Mips/mips-alu-instructions.s
@@ -91,7 +91,7 @@
 # CHECK:  addiu   $sp, $sp, -40  # encoding: [0xd8,0xff,0xbd,0x27]
 # CHECK:  neg     $6, $7         # encoding: [0x22,0x30,0x07,0x00]
 # CHECK:  negu    $6, $7         # encoding: [0x23,0x30,0x07,0x00]
-# CHECK:  move    $7, $8         # encoding: [0x21,0x38,0x00,0x01]
+# CHECK:  move    $7, $8         # encoding: [0x25,0x38,0x00,0x01]
 # CHECK:  .set    push
 # CHECK:  .set    mips32r2
 # CHECK:  rdhwr   $5, $29
diff --git a/test/MC/Mips/mips-diagnostic-fixup.s b/test/MC/Mips/mips-diagnostic-fixup.s
index 864d7397271d..7bfe0d6263db 100644
--- a/test/MC/Mips/mips-diagnostic-fixup.s
+++ b/test/MC/Mips/mips-diagnostic-fixup.s
@@ -1,8 +1,11 @@
 # RUN: not llvm-mc %s -triple=mipsel-unknown-linux -mcpu=mips32r2 -arch=mips 2>&1 -filetype=obj | FileCheck %s
-#
-# CHECK: LLVM ERROR: out of range PC16 fixup
+
+# Two instructions, to check that this is not a fatal error
+# CHECK: error: out of range PC16 fixup
+# CHECK: error: out of range PC16 fixup
 
 .text
+  b foo
   b foo
   .space 131072 - 8, 1  # -8 = size of b instr plus size of automatically inserted nop
   nop                   # This instr makes the branch too long to fit into a 18-bit offset
diff --git a/test/MC/Mips/mips-dsp-instructions.s b/test/MC/Mips/mips-dsp-instructions.s
deleted file mode 100644
index 5a9e8ea9db39..000000000000
--- a/test/MC/Mips/mips-dsp-instructions.s
+++ /dev/null
@@ -1,97 +0,0 @@
-# RUN: llvm-mc -show-encoding -triple=mips-unknown-unknown -mattr=dspr2 %s | FileCheck %s
-#
-# CHECK:   .text
-# CHECK:   precrq.qb.ph      $16, $17, $18   # encoding: [0x7e,0x32,0x83,0x11]
-# CHECK:   precrq.ph.w       $17, $18, $19   # encoding: [0x7e,0x53,0x8d,0x11]
-# CHECK:   precrq_rs.ph.w    $18, $19, $20   # encoding: [0x7e,0x74,0x95,0x51]
-# CHECK:   precrqu_s.qb.ph   $19, $20, $21   # encoding: [0x7e,0x95,0x9b,0xd1]
-# CHECK:   preceq.w.phl      $20, $21        # encoding: [0x7c,0x15,0xa3,0x12]
-# CHECK:   preceq.w.phr      $21, $22        # encoding: [0x7c,0x16,0xab,0x52]
-# CHECK:   precequ.ph.qbl    $22, $23        # encoding: [0x7c,0x17,0xb1,0x12]
-# CHECK:   precequ.ph.qbr    $23, $24        # encoding: [0x7c,0x18,0xb9,0x52]
-# CHECK:   precequ.ph.qbla   $24, $25        # encoding: [0x7c,0x19,0xc1,0x92]
-# CHECK:   precequ.ph.qbra   $25, $26        # encoding: [0x7c,0x1a,0xc9,0xd2]
-# CHECK:   preceu.ph.qbl     $26, $27        # encoding: [0x7c,0x1b,0xd7,0x12]
-# CHECK:   preceu.ph.qbr     $27, $gp        # encoding: [0x7c,0x1c,0xdf,0x52]
-# CHECK:   preceu.ph.qbla    $gp, $sp        # encoding: [0x7c,0x1d,0xe7,0x92]
-# CHECK:   preceu.ph.qbra    $sp, $fp        # encoding: [0x7c,0x1e,0xef,0xd2]
-
-# CHECK:   precr.qb.ph       $23, $24, $25   # encoding: [0x7f,0x19,0xbb,0x51]
-# CHECK:   precr_sra.ph.w    $24, $25, 0     # encoding: [0x7f,0x38,0x07,0x91]
-# CHECK:   precr_sra.ph.w    $24, $25, 31    # encoding: [0x7f,0x38,0xff,0x91]
-# CHECK:   precr_sra_r.ph.w  $25, $26, 0     # encoding: [0x7f,0x59,0x07,0xd1]
-# CHECK:   precr_sra_r.ph.w  $25, $26, 31    # encoding: [0x7f,0x59,0xff,0xd1]
-
-# CHECK:   lbux $10, $20($26)                # encoding: [0x7f,0x54,0x51,0x8a]
-# CHECK:   lhx  $11, $21($27)                # encoding: [0x7f,0x75,0x59,0x0a]
-# CHECK:   lwx  $12, $22($gp)                # encoding: [0x7f,0x96,0x60,0x0a]
-
-# CHECK:    mult $ac3, $2, $3               # encoding: [0x00,0x43,0x18,0x18]
-# CHECK:    multu $ac2, $4, $5              # encoding: [0x00,0x85,0x10,0x19]
-# CHECK:    madd $ac1, $6, $7               # encoding: [0x70,0xc7,0x08,0x00]
-# CHECK:    maddu $ac0, $8, $9              # encoding: [0x71,0x09,0x00,0x01]
-# CHECK:    msub $ac3, $10, $11             # encoding: [0x71,0x4b,0x18,0x04]
-# CHECK:    msubu $ac2, $12, $13            # encoding: [0x71,0x8d,0x10,0x05]
-# CHECK:    mfhi $14, $ac1                  # encoding: [0x00,0x20,0x70,0x10]
-# CHECK:    mflo $15, $ac0                  # encoding: [0x00,0x00,0x78,0x12]
-# CHECK:    mthi $16, $ac3                  # encoding: [0x02,0x00,0x18,0x11]
-# CHECK:    mtlo $17, $ac2                  # encoding: [0x02,0x20,0x10,0x13]
-
-# CHECK:    mult $2, $3                      # encoding: [0x00,0x43,0x00,0x18]
-# CHECK:    multu $4, $5                     # encoding: [0x00,0x85,0x00,0x19]
-# CHECK:    madd $6, $7                      # encoding: [0x70,0xc7,0x00,0x00]
-# CHECK:    maddu $8, $9                     # encoding: [0x71,0x09,0x00,0x01]
-# CHECK:    msub $10, $11                    # encoding: [0x71,0x4b,0x00,0x04]
-# CHECK:    msubu $12, $13                   # encoding: [0x71,0x8d,0x00,0x05]
-# CHECK:    mfhi $14                         # encoding: [0x00,0x00,0x70,0x10]
-# CHECK:    mflo $15                         # encoding: [0x00,0x00,0x78,0x12]
-# CHECK:    mthi $16                         # encoding: [0x02,0x00,0x00,0x11]
-# CHECK:    mtlo $17                         # encoding: [0x02,0x20,0x00,0x13]
-
-
-  precrq.qb.ph    $16,$17,$18
-  precrq.ph.w     $17,$18,$19
-  precrq_rs.ph.w  $18,$19,$20
-  precrqu_s.qb.ph $19,$20,$21
-  preceq.w.phl    $20,$21
-  preceq.w.phr    $21,$22
-  precequ.ph.qbl  $22,$23
-  precequ.ph.qbr  $23,$24
-  precequ.ph.qbla $24,$25
-  precequ.ph.qbra $25,$26
-  preceu.ph.qbl   $26,$27
-  preceu.ph.qbr   $27,$28
-  preceu.ph.qbla  $28,$29
-  preceu.ph.qbra  $29,$30
-
-  precr.qb.ph     $23,$24,$25
-  precr_sra.ph.w  $24,$25,0
-  precr_sra.ph.w  $24,$25,31
-  precr_sra_r.ph.w  $25,$26,0
-  precr_sra_r.ph.w  $25,$26,31
-
-  lbux $10, $s4($26)
-  lhx  $11, $s5($27)
-  lwx  $12, $s6($28)
-
-  mult $ac3, $2, $3
-  multu $ac2, $4, $5
-  madd $ac1, $6, $7
-  maddu $ac0, $8, $9
-  msub $ac3, $10, $11
-  msubu $ac2, $12, $13
-  mfhi $14, $ac1
-  mflo $15, $ac0
-  mthi $16, $ac3
-  mtlo $17, $ac2
-
-  mult $2, $3
-  multu $4, $5
-  madd $6, $7
-  maddu $8, $9
-  msub $10, $11
-  msubu $12, $13
-  mfhi $14
-  mflo $15
-  mthi $16
-  mtlo $17
diff --git a/test/MC/Mips/mips-expansions-bad.s b/test/MC/Mips/mips-expansions-bad.s
index cd74f7d4aa88..3d6824687939 100644
--- a/test/MC/Mips/mips-expansions-bad.s
+++ b/test/MC/Mips/mips-expansions-bad.s
@@ -13,14 +13,23 @@
   beq $2, 0x100010001, 1332
   # 32-BIT: :[[@LINE-1]]:3: error: instruction requires a 32-bit immediate
   .set mips32r6
+  ulh $5, 0
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
   ulhu $5, 0
   # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
   # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
   .set mips32
+  ulh $5, 1
+  # 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
   ulhu $5, 1
   # 32-BIT-NOT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
   # 64-BIT-NOT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
   .set mips64r6
+  ulh $5, 2
+  # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
+  # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
   ulhu $5, 2
   # 32-BIT: :[[@LINE-1]]:3: error: instruction not supported on mips32r6 or mips64r6
   # 64-BIT: :[[@LINE-2]]:3: error: instruction not supported on mips32r6 or mips64r6
diff --git a/test/MC/Mips/mips-expansions.s b/test/MC/Mips/mips-expansions.s
index 93c6b7cd75a8..625b77f2d884 100644
--- a/test/MC/Mips/mips-expansions.s
+++ b/test/MC/Mips/mips-expansions.s
@@ -1,17 +1,17 @@
 # RUN: llvm-mc %s -triple=mipsel-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN:   FileCheck %s --check-prefix=CHECK-LE
+# RUN:   FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
 # RUN: llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 | \
-# RUN:   FileCheck %s --check-prefix=CHECK-BE
+# RUN:   FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
 
 # Check that the IAS expands macro instructions in the same way as GAS.
 
 # Load address, done by MipsAsmParser::expandLoadAddressReg()
 # and MipsAsmParser::expandLoadAddressImm():
   la $8, 1f
-# CHECK-LE: lui     $8, %hi($tmp0)      # encoding: [A,A,0x08,0x3c]
-# CHECK-LE:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_HI, kind: fixup_Mips_HI16
-# CHECK-LE: addiu   $8, $8, %lo($tmp0)  # encoding: [A,A,0x08,0x25]
-# CHECK-LE:                             #   fixup A - offset: 0, value: ($tmp0)@ABS_LO, kind: fixup_Mips_LO16
+# CHECK-LE: lui     $8, %hi(($tmp0))      # encoding: [A,A,0x08,0x3c]
+# CHECK-LE:                             #   fixup A - offset: 0, value: %hi(($tmp0)), kind: fixup_Mips_HI16
+# CHECK-LE: addiu   $8, $8, %lo(($tmp0))  # encoding: [A,A,0x08,0x25]
+# CHECK-LE:                             #   fixup A - offset: 0, value: %lo(($tmp0)), kind: fixup_Mips_LO16
 
 # LW/SW and LDC1/SDC1 of symbol address, done by MipsAsmParser::expandMemInst():
   .set noat
@@ -131,7 +131,210 @@
 # CHECK-LE: beq   $2, $1, 1332      # encoding: [0x4d,0x01,0x41,0x10]
 # CHECK-LE: nop                     # encoding: [0x00,0x00,0x00,0x00]
 
+# Test ULH with immediate operand.
+ulh_imm: # CHECK-LABEL: ulh_imm:
+  ulh $8, 0
+# CHECK-BE: lb   $1, 0($zero)      # encoding: [0x80,0x01,0x00,0x00]
+# CHECK-BE: lbu  $8, 1($zero)      # encoding: [0x90,0x08,0x00,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, 1($zero)      # encoding: [0x01,0x00,0x01,0x80]
+# CHECK-LE: lbu  $8, 0($zero)      # encoding: [0x00,0x00,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 2
+# CHECK-BE: lb   $1, 2($zero)      # encoding: [0x80,0x01,0x00,0x02]
+# CHECK-BE: lbu  $8, 3($zero)      # encoding: [0x90,0x08,0x00,0x03]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, 3($zero)      # encoding: [0x03,0x00,0x01,0x80]
+# CHECK-LE: lbu  $8, 2($zero)      # encoding: [0x02,0x00,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x8000
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32768  # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, -0x8000
+# CHECK-BE: lb   $1, -32768($zero) # encoding: [0x80,0x01,0x80,0x00]
+# CHECK-BE: lbu  $8, -32767($zero) # encoding: [0x90,0x08,0x80,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, -32767($zero) # encoding: [0x01,0x80,0x01,0x80]
+# CHECK-LE: lbu  $8, -32768($zero) # encoding: [0x00,0x80,0x08,0x90]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x10000
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x18888
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 34952     # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, -32769
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32767     # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 65535         # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 32767     # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 32767
+# CHECK-BE: addiu $1, $zero, 32767  # encoding: [0x24,0x01,0x7f,0xff]
+# CHECK-BE: lb   $8, 0($1)          # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)          # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8          # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1         # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: addiu $1, $zero, 32767  # encoding: [0xff,0x7f,0x01,0x24]
+# CHECK-LE: lb   $8, 1($1)          # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)          # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8          # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1         # encoding: [0x25,0x40,0x01,0x01]
+
+# Test ULH with immediate offset and a source register operand.
+ulh_reg: # CHECK-LABEL: ulh_reg:
+  ulh $8, 0($9)
+# CHECK-BE: lb   $1, 0($9)         # encoding: [0x81,0x21,0x00,0x00]
+# CHECK-BE: lbu  $8, 1($9)         # encoding: [0x91,0x28,0x00,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, 1($9)         # encoding: [0x01,0x00,0x21,0x81]
+# CHECK-LE: lbu  $8, 0($9)         # encoding: [0x00,0x00,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 2($9)
+# CHECK-BE: lb   $1, 2($9)         # encoding: [0x81,0x21,0x00,0x02]
+# CHECK-BE: lbu  $8, 3($9)         # encoding: [0x91,0x28,0x00,0x03]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, 3($9)         # encoding: [0x03,0x00,0x21,0x81]
+# CHECK-LE: lbu  $8, 2($9)         # encoding: [0x02,0x00,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x8000($9)
+# CHECK-BE: ori  $1, $zero, 32768  # encoding: [0x34,0x01,0x80,0x00]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: ori  $1, $zero, 32768  # encoding: [0x00,0x80,0x01,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, -0x8000($9)
+# CHECK-BE: lb   $1, -32768($9)    # encoding: [0x81,0x21,0x80,0x00]
+# CHECK-BE: lbu  $8, -32767($9)    # encoding: [0x91,0x28,0x80,0x01]
+# CHECK-BE: sll  $1, $1, 8         # encoding: [0x00,0x01,0x0a,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lb   $1, -32767($9)    # encoding: [0x01,0x80,0x21,0x81]
+# CHECK-LE: lbu  $8, -32768($9)    # encoding: [0x00,0x80,0x28,0x91]
+# CHECK-LE: sll  $1, $1, 8         # encoding: [0x00,0x0a,0x01,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x10000($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 0x18888($9)
+# CHECK-BE: lui  $1, 1             # encoding: [0x3c,0x01,0x00,0x01]
+# CHECK-BE: ori  $1, $1, 34952     # encoding: [0x34,0x21,0x88,0x88]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 1             # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 34952     # encoding: [0x88,0x88,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, -32769($9)
+# CHECK-BE: lui  $1, 65535         # encoding: [0x3c,0x01,0xff,0xff]
+# CHECK-BE: ori  $1, $1, 32767     # encoding: [0x34,0x21,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: lui  $1, 65535         # encoding: [0xff,0xff,0x01,0x3c]
+# CHECK-LE: ori  $1, $1, 32767     # encoding: [0xff,0x7f,0x21,0x34]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
+  ulh $8, 32767($9)
+# CHECK-BE: addiu $1, $zero, 32767 # encoding: [0x24,0x01,0x7f,0xff]
+# CHECK-BE: addu $1, $1, $9        # encoding: [0x00,0x29,0x08,0x21]
+# CHECK-BE: lb   $8, 0($1)         # encoding: [0x80,0x28,0x00,0x00]
+# CHECK-BE: lbu  $1, 1($1)         # encoding: [0x90,0x21,0x00,0x01]
+# CHECK-BE: sll  $8, $8, 8         # encoding: [0x00,0x08,0x42,0x00]
+# CHECK-BE: or   $8, $8, $1        # encoding: [0x01,0x01,0x40,0x25]
+# CHECK-LE: addiu $1, $zero, 32767 # encoding: [0xff,0x7f,0x01,0x24]
+# CHECK-LE: addu $1, $1, $9        # encoding: [0x21,0x08,0x29,0x00]
+# CHECK-LE: lb   $8, 1($1)         # encoding: [0x01,0x00,0x28,0x80]
+# CHECK-LE: lbu  $1, 0($1)         # encoding: [0x00,0x00,0x21,0x90]
+# CHECK-LE: sll  $8, $8, 8         # encoding: [0x00,0x42,0x08,0x00]
+# CHECK-LE: or   $8, $8, $1        # encoding: [0x25,0x40,0x01,0x01]
+
 # Test ULHU with immediate operand.
+ulhu_imm: # CHECK-LABEL: ulhu_imm:
   ulhu $8, 0
 # CHECK-BE: lbu  $1, 0($zero)      # encoding: [0x90,0x01,0x00,0x00]
 # CHECK-BE: lbu  $8, 1($zero)      # encoding: [0x90,0x08,0x00,0x01]
diff --git a/test/MC/Mips/mips-pc16-fixup.s b/test/MC/Mips/mips-pc16-fixup.s
index 5443532d6125..ae4c915d97d5 100644
--- a/test/MC/Mips/mips-pc16-fixup.s
+++ b/test/MC/Mips/mips-pc16-fixup.s
@@ -1,6 +1,6 @@
 # RUN: llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -arch=mips 2>&1 -filetype=obj | FileCheck %s
 #
-# CHECK-NOT: LLVM ERROR: out of range PC16 fixup
+# CHECK-NOT: error: out of range PC16 fixup
 
 .text
   b foo
diff --git a/test/MC/Mips/mips-pdr.s b/test/MC/Mips/mips-pdr.s
index 79e824bda64b..8177e3e23448 100644
--- a/test/MC/Mips/mips-pdr.s
+++ b/test/MC/Mips/mips-pdr.s
@@ -33,7 +33,7 @@
 
 # We should also check if relocation information was correctly generated.
 # OBJOUT:      Relocations [
-# OBJOUT-NEXT:   Section (6) .rel.pdr {
+# OBJOUT-NEXT:   Section ({{.*}}) .rel.pdr {
 # OBJOUT-NEXT:     0x0 R_MIPS_32 .text 0x0
 # OBJOUT-NEXT:     0x20 R_MIPS_32 _global_foo 0x0
 # OBJOUT-NEXT:   }
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index e2feeac4dd56..80f0f8b047c6 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -64,8 +64,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4           # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6           # encoding: [0x00,0xc0,0xc8,0x25]
         mtc1      $s8,$f9
         mthi      $s1
         mtlo      $sp
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index 93fdbafdfcd0..c57d386d9d05 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -84,8 +84,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         mtc1      $s8,$f9
         mthi      $s1
         mtlo      $sp
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index 1c878c97d158..cf51753712e6 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -142,10 +142,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         mtc1      $s8,$f9
         mthi      $s1
         mtlo      $sp
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index fbe1551f6c26..2fdbdfe65223 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -96,8 +96,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r2/invalid-dsp.s b/test/MC/Mips/mips32r2/invalid-dsp.s
new file mode 100644
index 000000000000..66e5f63129ac
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-dsp.s
@@ -0,0 +1,97 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN:     -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        absq_s.ph       $8,$a0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        absq_s.w        $s3,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq.ph         $s1,$15,$at   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq_s.ph       $s3,$s6,$s2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq_s.w        $a2,$8,$at    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addsc           $s8,$15,$12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu.qb         $s6,$v1,$v1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu_s.qb       $s4,$s8,$s1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addwc           $k0,$s6,$s7   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bitrev          $14,$at       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.eq.ph       $s7,$14       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.le.ph       $8,$14        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.lt.ph       $k0,$sp       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.eq.qb     $14,$s6,$s8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.le.qb     $9,$a3,$s4    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.lt.qb     $sp,$at,$8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.eq.qb      $v0,$24       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.le.qb      $s1,$a1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.lt.qb      $at,$a3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpaq_sa.l.w     $ac0,$a2,$14  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpau.h.qbl      $ac1,$10,$24  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpau.h.qbr      $ac1,$s7,$s6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsq_s.w.ph     $ac0,$gp,$k0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsq_sa.l.w     $ac0,$a3,$15  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsu.h.qbl      $ac2,$14,$10  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsu.h.qbr      $ac2,$a1,$s6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extpdpv         $s6,$ac0,$s8  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extpv           $13,$ac0,$14  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv.w         $8,$ac3,$at   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_r.w       $8,$ac1,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_rs.w      $gp,$ac1,$s6  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_s.h       $s2,$ac1,$14  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        insv            $s2,$at       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lbux            $9,$14($v0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lhx             $sp,$k0($15)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwx             $12,$12($s4)  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd            $ac2,$sp,$14  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu           $ac2,$a1,$24  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_s.w.phl     $ac2,$25,$11  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_s.w.phr     $ac0,$10,$25  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_sa.w.phl    $ac3,$a1,$v1  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_sa.w.phr    $ac1,$at,$10  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi            $9,$ac2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo            $9,$ac2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        modsub          $a3,$12,$a3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi            $v0,$ac1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthlip          $a3,$ac0      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo            $v0,$ac1      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleq_s.w.phl   $11,$s4,$s4   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleq_s.w.phr   $s6,$a0,$s8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleu_s.ph.qbl  $a2,$14,$8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleu_s.ph.qbr  $a1,$ra,$9    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulq_rs.ph      $s2,$14,$15   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulsaq_s.w.ph   $ac0,$ra,$s2  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult            $ac1, $2, $3  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu           $ac1, $2, $3  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        packrl.ph       $ra,$24,$14   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pick.ph         $ra,$a2,$gp   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pick.qb         $11,$a0,$gp   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbl  $s7,$ra       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbla $a0,$9        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbr  $ra,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbra $24,$8        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbl   $sp,$8        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbla  $s6,$11       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbr   $gp,$s1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbra  $k1,$s0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq.ph.w     $14,$s8,$24   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq.qb.ph    $a2,$12,$12   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq_rs.ph.w  $a1,$k0,$a3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrqu_s.qb.ph $zero,$gp,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        raddu.w.qb      $25,$s3       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        repl.ph         $at,-307      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        replv.ph        $v1,$s7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        replv.qb        $25,$12       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shilo           $ac1,26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shilov          $ac2,$10      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv.ph        $10,$s0,$s0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv.qb        $gp,$v1,$zero # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv_s.ph      $k1,$at,$13   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv_s.w       $s1,$ra,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav.ph        $25,$s2,$s1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav_r.ph      $s3,$11,$25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav_r.w       $s7,$s4,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrlv.qb        $a2,$s2,$11   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq.ph         $ra,$9,$s8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq_s.ph       $13,$s8,$s5   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq_s.w        $k1,$a2,$a3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu.qb         $s6,$a2,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu_s.qb       $s1,$at,$ra   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid-dspr2.s b/test/MC/Mips/mips32r2/invalid-dspr2.s
new file mode 100644
index 000000000000..5c31b465ca1f
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-dspr2.s
@@ -0,0 +1,134 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding \
+# RUN:     -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        absq_s.ph       $8,$a0         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        absq_s.qb       $15,$s1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        absq_s.w        $s3,$ra        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq.ph         $s1,$15,$at    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq_s.ph       $s3,$s6,$s2    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addq_s.w        $a2,$8,$at     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addqh.ph        $s4,$14,$s1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addqh_r.ph      $sp,$25,$s8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addsc           $s8,$15,$12    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu.ph         $a2,$14,$s3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu.qb         $s6,$v1,$v1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu_s.ph       $a3,$s3,$gp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addu_s.qb       $s4,$s8,$s1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        adduh.qb        $a1,$a1,$at    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        adduh_r.qb      $a0,$9,$12     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        addwc           $k0,$s6,$s7    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bitrev          $14,$at        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.eq.ph       $s7,$14        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.le.ph       $8,$14         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmp.lt.ph       $k0,$sp        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgdu.eq.qb    $s3,$zero,$k0  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgdu.le.qb    $v1,$15,$s2    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgdu.lt.qb    $s0,$gp,$sp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.eq.qb     $14,$s6,$s8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.le.qb     $9,$a3,$s4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpgu.lt.qb     $sp,$at,$8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.eq.qb      $v0,$24        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.le.qb      $s1,$a1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cmpu.lt.qb      $at,$a3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpa.w.ph        $ac1,$s7,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpaq_sa.l.w     $ac0,$a2,$14   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpaqx_s.w.ph    $ac3,$a0,$24   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpaqx_sa.w.ph   $ac1,$zero,$s5 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpau.h.qbl      $ac1,$10,$24   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpau.h.qbr      $ac1,$s7,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpax.w.ph       $ac3,$a0,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dps.w.ph        $ac1,$a3,$a1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsq_s.w.ph     $ac0,$gp,$k0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsq_sa.l.w     $ac0,$a3,$15   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsqx_s.w.ph    $ac3,$13,$a3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsqx_sa.w.ph   $ac3,$sp,$s2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsu.h.qbl      $ac2,$14,$10   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsu.h.qbr      $ac2,$a1,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        dpsx.w.ph       $ac0,$s7,$gp   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extpdpv         $s6,$ac0,$s8   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extpv           $13,$ac0,$14   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv.w         $8,$ac3,$at    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_r.w       $8,$ac1,$s6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_rs.w      $gp,$ac1,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        extrv_s.h       $s2,$ac1,$14   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        insv            $s2,$at        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lbux            $9,$14($v0)    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lhx             $sp,$k0($15)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        lwx             $12,$12($s4)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        madd            $ac2,$sp,$14   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maddu           $ac2,$a1,$24   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_s.w.phl     $ac2,$25,$11   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_s.w.phr     $ac0,$10,$25   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_sa.w.phl    $ac3,$a1,$v1   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        maq_sa.w.phr    $ac1,$at,$10   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mfhi            $9,$ac2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mflo            $9,$ac2        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        modsub          $a3,$12,$a3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthi            $v0,$ac1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mthlip          $a3,$ac0       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mtlo            $v0,$ac1       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul.ph          $10,$14,$15    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul.ph          $s4,$24,$s0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mul_s.ph        $10,$14,$15    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleq_s.w.phl   $11,$s4,$s4    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleq_s.w.phr   $s6,$a0,$s8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleu_s.ph.qbl  $a2,$14,$8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        muleu_s.ph.qbr  $a1,$ra,$9     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulq_rs.ph      $s2,$14,$15    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulq_rs.w       $at,$s4,$25    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulq_s.ph       $s0,$k1,$15    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulq_s.w        $9,$a3,$s0     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulsa.w.ph      $ac1,$s4,$s6   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mulsaq_s.w.ph   $ac0,$ra,$s2   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        mult            $ac1, $2, $3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        multu           $ac1, $2, $3   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        packrl.ph       $ra,$24,$14    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pick.ph         $ra,$a2,$gp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pick.qb         $11,$a0,$gp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbl  $s7,$ra        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbla $a0,$9         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbr  $ra,$s3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precequ.ph.qbra $24,$8         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbl   $sp,$8         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbla  $s6,$11        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbr   $gp,$s1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        preceu.ph.qbra  $k1,$s0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precr.qb.ph     $v0,$12,$s8    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq.ph.w     $14,$s8,$24    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq.qb.ph    $a2,$12,$12    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrq_rs.ph.w  $a1,$k0,$a3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        precrqu_s.qb.ph $zero,$gp,$s5  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        raddu.w.qb      $25,$s3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        repl.ph         $at,-307       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        replv.ph        $v1,$s7        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        replv.qb        $25,$12        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shilo           $ac1,26        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shilov          $ac2,$10       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv.ph        $10,$s0,$s0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv.qb        $gp,$v1,$zero  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv_s.ph      $k1,$at,$13    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shllv_s.w       $s1,$ra,$k0    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav.ph        $25,$s2,$s1    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav.qb        $zero,$24,$11  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav_r.ph      $s3,$11,$25    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav_r.qb      $a0,$sp,$s5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrav_r.w       $s7,$s4,$s6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrlv.ph        $14,$10,$9     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        shrlv.qb        $a2,$s2,$11    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq.ph         $ra,$9,$s8     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq_s.ph       $13,$s8,$s5    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subq_s.w        $k1,$a2,$a3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subqh.ph        $10,$at,$9     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subqh.w         $v0,$a2,$zero  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subqh_r.ph      $a0,$12,$s6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subqh_r.w       $10,$a2,$gp    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu.ph         $9,$s6,$s4     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu.qb         $s6,$a2,$s6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu_s.ph       $v1,$a1,$s3    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subu_s.qb       $s1,$at,$ra    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subuh.qb        $zero,$gp,$gp  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        subuh_r.qb      $s4,$s8,$s6    # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid-msa.s b/test/MC/Mips/mips32r2/invalid-msa.s
new file mode 100644
index 000000000000..2ad99b147209
--- /dev/null
+++ b/test/MC/Mips/mips32r2/invalid-msa.s
@@ -0,0 +1,62 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips64-unknown-linux -show-encoding \
+# RUN:     -mcpu=mips32r2 2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        and.v           $w10,$w25,$w29 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bmnz.v          $w15,$w2,$w28  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bmz.v           $w13,$w11,$w21 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bsel.v          $w28,$w7,$w0   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fclass.d        $w14,$w27      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fclass.w        $w19,$w28      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fexupl.d        $w10,$w29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fexupl.w        $w12,$w27      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fexupr.d        $w31,$w15      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fexupr.w        $w29,$w12      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffint_s.d       $w1,$w30       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffint_s.w       $w16,$w14      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffint_u.d       $w23,$w18      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffint_u.w       $w19,$w12      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffql.d          $w2,$w3        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffql.w          $w9,$w0        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffqr.d          $w25,$w24      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ffqr.w          $w10,$w6       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fill.b          $w9,$v1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fill.h          $w9,$8         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fill.w          $w31,$15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        flog2.d         $w12,$w16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        flog2.w         $w19,$w23      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frcp.d          $w12,$w4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frcp.w          $w30,$w8       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frint.d         $w20,$w8       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frint.w         $w11,$w29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frsqrt.d        $w29,$w2       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        frsqrt.w        $w9,$w8        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fsqrt.d         $w3,$w1        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        fsqrt.w         $w5,$w15       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftint_s.d       $w31,$w26      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftint_s.w       $w27,$w14      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftint_u.d       $w5,$w31       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftint_u.w       $w12,$w29      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftrunc_s.d      $w4,$w22       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftrunc_s.w      $w24,$w7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftrunc_u.d      $w20,$w25      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        ftrunc_u.w      $w7,$w26       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        move.v          $w8,$w17       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nloc.b          $w12,$w30      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nloc.d          $w16,$w7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nloc.h          $w21,$w17      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nloc.w          $w17,$w16      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nlzc.b          $w12,$w7       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nlzc.d          $w14,$w14      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nlzc.h          $w24,$w24      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nlzc.w          $w10,$w4       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        nor.v           $w20,$w20,$w15 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        or.v            $w13,$w23,$w12 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pcnt.b          $w30,$w15      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pcnt.d          $w5,$w16       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pcnt.h          $w20,$w24      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        pcnt.w          $w22,$w20      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        xor.v           $w20,$w21,$w30 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r2/invalid.s b/test/MC/Mips/mips32r2/invalid.s
index ebccc43834e0..6001aeacf9b3 100644
--- a/test/MC/Mips/mips32r2/invalid.s
+++ b/test/MC/Mips/mips32r2/invalid.s
@@ -2,9 +2,29 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        # FIXME: Check '0 < pos + size <= 32' constraint on ext
+        ext $2, $3, -1, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ext $2, $3, 32, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ext $2, $3, 1, 0     # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+        ext $2, $3, 1, 33    # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+        # FIXME: Check size on ins
+        ins $2, $3, -1, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ins $2, $3, 32, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        sll $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sll $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        srl $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        srl $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sra $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sra $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        rotr $2, $3, -1      # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+        rotr $2, $3, 32      # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r2/valid-xfail.s b/test/MC/Mips/mips32r2/valid-xfail.s
index 13385d06ce81..5a61eb6cbfb7 100644
--- a/test/MC/Mips/mips32r2/valid-xfail.s
+++ b/test/MC/Mips/mips32r2/valid-xfail.s
@@ -8,31 +8,10 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         ctcmsa          $31,$s7
         cvt.d.l         $f4,$f16
         cvt.ps.s        $f3,$f18,$f19
@@ -99,162 +66,44 @@
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmt $k0
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         floor.l.d       $f26,$f7
         floor.l.s       $f12,$f5
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
         lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
         lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx             $sp,$k0($15)
         lle             $gp,-237($ra)
         lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx             $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
-        move.v          $w8,$w17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
         movt.ps         $f20,$f25,$fcc2
         movz.ps         $f18,$f17,$ra
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         round.l.d       $f12,$f1
         round.l.s       $f25,$f5
@@ -263,33 +112,7 @@
         sbe             $s7,33($s1)
         sce             $sp,189($10)
         she             $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
         swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
@@ -304,5 +127,4 @@
         trunc.l.d       $f23,$f23
         trunc.l.s       $f28,$f31
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 2e4366ab40f1..7ebc60d946a8 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -111,8 +111,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r3/invalid.s b/test/MC/Mips/mips32r3/invalid.s
index f67f4c55ecb3..9051088fb38f 100644
--- a/test/MC/Mips/mips32r3/invalid.s
+++ b/test/MC/Mips/mips32r3/invalid.s
@@ -2,9 +2,13 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r3 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r3/valid-xfail.s b/test/MC/Mips/mips32r3/valid-xfail.s
index b0fc3a1d23f7..defa388c4df2 100644
--- a/test/MC/Mips/mips32r3/valid-xfail.s
+++ b/test/MC/Mips/mips32r3/valid-xfail.s
@@ -8,31 +8,10 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         ctcmsa          $31,$s7
         cvt.d.l         $f4,$f16
         cvt.ps.s        $f3,$f18,$f19
@@ -99,162 +66,44 @@
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmt $k0
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         floor.l.d       $f26,$f7
         floor.l.s       $f12,$f5
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
         lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
         lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx             $sp,$k0($15)
         lle             $gp,-237($ra)
         lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx             $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
-        move.v          $w8,$w17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
         movt.ps         $f20,$f25,$fcc2
         movz.ps         $f18,$f17,$ra
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         round.l.d       $f12,$f1
         round.l.s       $f25,$f5
@@ -263,33 +112,7 @@
         sbe             $s7,33($s1)
         sce             $sp,189($10)
         she             $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
         swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
@@ -304,5 +127,4 @@
         trunc.l.d       $f23,$f23
         trunc.l.s       $f28,$f31
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index f6ef1d356c1f..3431e1cbc8d5 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -111,8 +111,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r5/invalid-mips32.s b/test/MC/Mips/mips32r5/invalid-mips32.s
new file mode 100644
index 000000000000..fc1a516f811a
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid-mips32r2.s b/test/MC/Mips/mips32r5/invalid-mips32r2.s
new file mode 100644
index 000000000000..2e7a29720899
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32r2.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid-mips32r3.s b/test/MC/Mips/mips32r5/invalid-mips32r3.s
new file mode 100644
index 000000000000..3ee188f148d3
--- /dev/null
+++ b/test/MC/Mips/mips32r5/invalid-mips32r3.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips32r3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r5/invalid.s b/test/MC/Mips/mips32r5/invalid.s
index fec30e1426cd..0d83005d5ccc 100644
--- a/test/MC/Mips/mips32r5/invalid.s
+++ b/test/MC/Mips/mips32r5/invalid.s
@@ -2,9 +2,13 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r5 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r5/valid-xfail.s b/test/MC/Mips/mips32r5/valid-xfail.s
index a821dddb85ca..c1bf7a4b3a29 100644
--- a/test/MC/Mips/mips32r5/valid-xfail.s
+++ b/test/MC/Mips/mips32r5/valid-xfail.s
@@ -8,31 +8,10 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -80,18 +59,6 @@
         ceil.l.d        $f1,$f3
         ceil.l.s        $f18,$f13
         cfcmsa          $s6,$19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         ctcmsa          $31,$s7
         cvt.d.l         $f4,$f16
         cvt.ps.s        $f3,$f18,$f19
@@ -99,162 +66,44 @@
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmt $k0
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         floor.l.d       $f26,$f7
         floor.l.s       $f12,$f5
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
         lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
         lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx             $sp,$k0($15)
         lle             $gp,-237($ra)
         lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx             $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
-        move.v          $w8,$w17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
         movt.ps         $f20,$f25,$fcc2
         movz.ps         $f18,$f17,$ra
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         round.l.d       $f12,$f1
         round.l.s       $f25,$f5
@@ -263,33 +112,7 @@
         sbe             $s7,33($s1)
         sce             $sp,189($10)
         she             $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
         swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
@@ -304,5 +127,4 @@
         trunc.l.d       $f23,$f23
         trunc.l.s       $f28,$f31
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index f12d75113203..0c477f4fa2ae 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -70,6 +70,7 @@ a:
         ei        $14                  # CHECK: ei  $14       # encoding: [0x41,0x6e,0x60,0x20]
         ei                             # CHECK: ei            # encoding: [0x41,0x60,0x60,0x20]
         eret
+        eretnc                         # CHECK: eretnc        # encoding: [0x42,0x00,0x00,0x58]
         floor.w.d $f14,$f11
         floor.w.s $f8,$f9
         j         1f                   # CHECK: j $tmp0 # encoding: [0b000010AA,A,A,A]
@@ -111,8 +112,8 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $s8,$a0
-        move      $25,$a2
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
index cc7d403eaf8b..6d569d12b396 100644
--- a/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips1-wrong-error.s
@@ -11,7 +11,8 @@
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $24, 5($3)          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
index 06bf58c1e8fa..1cec777c27a6 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4-wrong-error.s
@@ -8,4 +8,3 @@
         .set noat
         bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips32r6/invalid-mips4.s b/test/MC/Mips/mips32r6/invalid-mips4.s
index 9d8f02fb6d67..6a0370870112 100644
--- a/test/MC/Mips/mips32r6/invalid-mips4.s
+++ b/test/MC/Mips/mips32r6/invalid-mips4.s
@@ -11,3 +11,4 @@
         lwxc1     $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$10($14)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$12($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        prefx     0,$2($31)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips32r6/invalid.s b/test/MC/Mips/mips32r6/invalid.s
index 0ce75e6143c2..1656ac1da350 100644
--- a/test/MC/Mips/mips32r6/invalid.s
+++ b/test/MC/Mips/mips32r6/invalid.s
@@ -2,17 +2,40 @@
 # the assembler (e.g. invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r6 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
+local_label:
         .set noreorder
         .set noat
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        ldc2    $8,-21181($at)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        sdc2    $20,23157($s2)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        swc2    $25,24880($s0)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        break 1024        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 1024, 5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 7, 1024     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 1024, 1024  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        align   $4, $2, $3, -1    # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+        align   $4, $2, $3, 4     # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+        jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        ldc2    $8,-21181($at)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        sdc2    $20,23157($s2)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        swc2    $25,24880($s0)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        break -1          # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 1024        # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break -1, 5       # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 1024, 5     # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 7, -1       # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+        break 7, 1024     # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+        break 1024, 1024  # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved
+        bltl  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        blel  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bleul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgel  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgeul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtl  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        lsa $2, $3, $4, 0    # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+        lsa $2, $3, $4, 5    # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 52752c5a6997..226acd96a5a6 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -64,7 +64,7 @@ a:
         bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
         bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
         bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4       # encoding: [0x20,0x82,0x00,0x01]
-        cache      1, 8($5)         # CHECK: cache 1, 8($5)         # encoding: [0x7c,0xa1,0x04,0x25]
+        cache      1, 8($5)         # CHECK: cache 1, 8($5)          # encoding: [0x7c,0xa1,0x04,0x25]
         cmp.af.s   $f2,$f3,$f4      # CHECK: cmp.af.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x80]
         cmp.af.d   $f2,$f3,$f4      # CHECK: cmp.af.d $f2, $f3, $f4  # encoding: [0x46,0xa4,0x18,0x80]
         cmp.un.s   $f2,$f3,$f4      # CHECK: cmp.un.s $f2, $f3, $f4  # encoding: [0x46,0x84,0x18,0x81]
@@ -103,14 +103,20 @@ a:
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
         ei      $14              # CHECK: ei  $14          # encoding: [0x41,0x6e,0x60,0x20]
         ei                       # CHECK: ei               # encoding: [0x41,0x60,0x60,0x20]
+        eret
+        eretnc                   # CHECK: eretnc           # encoding: [0x42,0x00,0x00,0x58]
         jialc   $5, 256          # CHECK: jialc $5, 256    # encoding: [0xf8,0x05,0x01,0x00]
         jic     $5, 256          # CHECK: jic $5, 256      # encoding: [0xd8,0x05,0x01,0x00]
-        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0x85]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
         mfc0    $8,$15,1         # CHECK: mfc0 $8, $15, 1  # encoding: [0x40,0x08,0x78,0x01]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
         modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+        move    $a0,$a3          # CHECK: move $4, $7      # encoding: [0x00,0xe0,0x20,0x25]
+        move    $s5,$a0          # CHECK: move $21, $4     # encoding: [0x00,0x80,0xa8,0x25]
+        move    $s8,$a0          # CHECK: move $fp, $4     # encoding: [0x00,0x80,0xf0,0x25]
+        move    $25,$a2          # CHECK: move $25, $6     # encoding: [0x00,0xc0,0xc8,0x25]
         mtc0    $9,$15,1         # CHECK: mtc0 $9, $15, 1  # encoding: [0x40,0x89,0x78,0x01]
         mul     $2,$3,$4         # CHECK: mul $2, $3, $4   # encoding: [0x00,0x64,0x10,0x98]
         muh     $2,$3,$4         # CHECK: muh $2, $3, $4   # encoding: [0x00,0x64,0x10,0xd8]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index b89026dd714a..9bf98d1c29ff 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -150,10 +150,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index b444274d78ec..cb30de38c295 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -151,10 +151,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64-alu-instructions.s b/test/MC/Mips/mips64-alu-instructions.s
index 19ed1ffad1ca..409d59e5e88b 100644
--- a/test/MC/Mips/mips64-alu-instructions.s
+++ b/test/MC/Mips/mips64-alu-instructions.s
@@ -74,7 +74,7 @@
 # CHECK:  daddiu  $9, $9, -15001  # encoding: [0x67,0xc5,0x29,0x65]
 # CHECK:  daddu   $9, $6, $7      # encoding: [0x2d,0x48,0xc7,0x00]
 # CHECK:  drotr   $9, $6, 20      # encoding: [0x3a,0x4d,0x26,0x00]
-# CHECK:  drotr32 $9, $6, 52      # encoding: [0x3e,0x4d,0x26,0x00]
+# CHECK:  drotr32 $9, $6, 20      # encoding: [0x3e,0x4d,0x26,0x00]
 # CHECK:  madd   $6, $7          # encoding: [0x00,0x00,0xc7,0x70]
 # CHECK:  maddu  $6, $7          # encoding: [0x01,0x00,0xc7,0x70]
 # CHECK:  msub   $6, $7          # encoding: [0x04,0x00,0xc7,0x70]
@@ -84,7 +84,7 @@
 # CHECK:  dsub    $9, $6, $7     # encoding: [0x2e,0x48,0xc7,0x00]
 # CHECK:  dsubu   $4, $3, $5     # encoding: [0x2f,0x20,0x65,0x00]
 # CHECK:  daddiu  $9, $6, -17767 # encoding: [0x99,0xba,0xc9,0x64]
-# CHECK:  move    $7, $8         # encoding: [0x2d,0x38,0x00,0x01]
+# CHECK:  move    $7, $8         # encoding: [0x25,0x38,0x00,0x01]
 # CHECK:  .set    push
 # CHECK:  .set    mips32r2
 # CHECK:  rdhwr   $5, $29
@@ -99,7 +99,7 @@
     daddiu  $9,-15001
     daddu   $9,$6,$7
     drotr   $9, $6, 20
-    drotr32 $9, $6, 52
+    drotr32 $9, $6, 20
     madd   $6,$7
     maddu  $6,$7
     msub   $6,$7
diff --git a/test/MC/Mips/mips64-expansions.s b/test/MC/Mips/mips64-expansions.s
index b8f1e7a3e87e..0b56cf501283 100644
--- a/test/MC/Mips/mips64-expansions.s
+++ b/test/MC/Mips/mips64-expansions.s
@@ -1,183 +1,4 @@
 # RUN: llvm-mc %s -triple=mips64el-unknown-linux -show-encoding -mcpu=mips64r2 | FileCheck %s
-#
-# Test the 'dli' and 'dla' 64-bit variants of 'li' and 'la'.
-
-# Immediate is <= 32 bits.
-  dli $5, 123
-# CHECK:     addiu $5, $zero, 123   # encoding: [0x7b,0x00,0x05,0x24]
-
-  dli $6, -2345
-# CHECK:     addiu $6, $zero, -2345 # encoding: [0xd7,0xf6,0x06,0x24]
-
-  dli $7, 65538
-# CHECK:     lui   $7, 1            # encoding: [0x01,0x00,0x07,0x3c]
-# CHECK:     ori   $7, $7, 2        # encoding: [0x02,0x00,0xe7,0x34]
-
-  dli $8, ~7
-# CHECK:     addiu $8, $zero, -8    # encoding: [0xf8,0xff,0x08,0x24]
-
-  dli $9, 0x10000
-# CHECK:     lui   $9, 1            # encoding: [0x01,0x00,0x09,0x3c]
-# CHECK-NOT: ori   $9, $9, 0        # encoding: [0x00,0x00,0x29,0x35]
-
-
-# Positive immediate which is => 32 bits and <= 48 bits.
-  dli $8, 0x100000000
-# CHECK: lui  $8, 1                 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16            # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, 0x100000001
-# CHECK: lui  $8, 1                 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll $8, $8, 16            # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori  $8, $8, 1             # encoding: [0x01,0x00,0x08,0x35]
-
-  dli $8, 0x100010000
-# CHECK: lui  $8, 1                 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori  $8, $8, 1             # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16            # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, 0x100010001
-# CHECK: lui  $8, 1                 # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori  $8, $8, 1             # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll $8, $8, 16            # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori  $8, $8, 1             # encoding: [0x01,0x00,0x08,0x35]
-
-
-# Positive immediate which is > 48 bits.
-  dli $8, 0x1000000000000
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-
-  dli $8, 0x1000000000001
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-
-  dli $8, 0x1000000010000
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, 0x1000100000000
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-
-  dli $8, 0x1000000010001
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-
-  dli $8, 0x1000100010000
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, 0x1000100000001
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-
-  dli $8, 0x1000100010001
-# CHECK: lui    $8, 1               # encoding: [0x01,0x00,0x08,0x3c]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 1           # encoding: [0x01,0x00,0x08,0x35]
-
-
-# Negative immediate which is => 32 bits and <= 48 bits.
-  dli $8, -0x100000000
-# CHECK: lui    $8, 65535           # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-
-  dli $8, -0x100000001
-# CHECK: lui    $8, 65535           # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-
-  dli $8, -0x100010000
-# CHECK: lui    $8, 65535           # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, -0x100010001
-# CHECK: lui    $8, 65535           # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-
-
-# Negative immediate which is > 48 bits.
-  dli $8, -0x1000000000000
-# CHECK: lui    $8, 65535           # encoding: [0xff,0xff,0x08,0x3c]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-
-  dli $8, -0x1000000000001
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-
-  dli $8, -0x1000000010000
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, -0x1000100000000
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll32 $8, $8, 0           # encoding: [0x3c,0x40,0x08,0x00]
-
-  dli $8, -0x1000000010001
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-
-  dli $8, -0x1000100010000
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-
-  dli $8, -0x1000100000001
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
-
-  dli $8, -0x1000100010001
-# CHECK: lui    $8, 65534           # encoding: [0xfe,0xff,0x08,0x3c]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65534       # encoding: [0xfe,0xff,0x08,0x35]
-# CHECK: dsll   $8, $8, 16          # encoding: [0x38,0x44,0x08,0x00]
-# CHECK: ori    $8, $8, 65535       # encoding: [0xff,0xff,0x08,0x35]
 
 # Check that signed negative 32-bit immediates are loaded correctly:
   li $10, ~(0x101010)
@@ -185,18 +6,10 @@
 # CHECK: ori $10, $10, 61423   # encoding: [0xef,0xef,0x4a,0x35]
 # CHECK-NOT: dsll
 
-  dli $10, ~(0x202020)
-# CHECK: lui $10, 65503        # encoding: [0xdf,0xff,0x0a,0x3c]
-# CHECK: ori $10, $10, 57311   # encoding: [0xdf,0xdf,0x4a,0x35]
-# CHECK-NOT: dsll
-
-  dli $9, 0x80000000
-# CHECK: ori  $9, $zero, 32768 # encoding: [0x00,0x80,0x09,0x34]
-# CHECK: dsll $9, $9, 16       # encoding: [0x38,0x4c,0x09,0x00]
-
 # Test bne with an immediate as the 2nd operand.
   bne $2, 0x100010001, 1332
-# CHECK: lui  $1, 1                 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1         # encoding: [0x01,0x00,0x01,0x24]
+# CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1             # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1             # encoding: [0x01,0x00,0x21,0x34]
@@ -214,8 +27,7 @@
 # CHECK: nop                        # encoding: [0x00,0x00,0x00,0x00]
 
   bne $2, -0x100010001, 1332
-# CHECK: lui  $1, 65535             # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori  $1, $1, 65534         # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2        # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 65534         # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
@@ -235,7 +47,8 @@
 
 # Test beq with an immediate as the 2nd operand.
   beq $2, 0x100010001, 1332
-# CHECK: lui  $1, 1                 # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1         # encoding: [0x01,0x00,0x01,0x24]
+# CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1             # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1             # encoding: [0x01,0x00,0x21,0x34]
@@ -253,8 +66,7 @@
 # CHECK: nop                        # encoding: [0x00,0x00,0x00,0x00]
 
   beq $2, -0x100010001, 1332
-# CHECK: lui  $1, 65535             # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori  $1, $1, 65534         # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2        # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 65534         # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll $1, $1, 16            # encoding: [0x38,0x0c,0x01,0x00]
@@ -274,7 +86,7 @@
 
 # Test ulhu with 64-bit immediate addresses.
   ulhu $8, 0x100010001
-# CHECK: lui  $1, 1            # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1    # encoding: [0x01,0x00,0x01,0x24]
 # CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1        # encoding: [0x01,0x00,0x21,0x34]
@@ -296,8 +108,7 @@
 # CHECK: or   $8, $8, $1       # encoding: [0x25,0x40,0x01,0x01]
 
   ulhu $8, -0x100010001
-# CHECK: lui  $1, 65535        # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2   # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 65534    # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll $1, $1, 16       # encoding: [0x38,0x0c,0x01,0x00]
@@ -321,7 +132,7 @@
 
 # Test ulhu with source register and 64-bit immediate offset.
   ulhu $8, 0x100010001($9)
-# CHECK: lui   $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1    # encoding: [0x01,0x00,0x01,0x24]
 # CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori   $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
@@ -345,12 +156,10 @@
 # CHECK: or    $8, $8, $1      # encoding: [0x25,0x40,0x01,0x01]
 
   ulhu $8, -0x100010001($9)
-# CHECK: lui   $1, 65535       # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2   # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori   $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll  $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
-# CHECK: ori   $1, $1, 65535   # encoding: [0xff,0xff,0x21,0x34]
 # CHECK: daddu $1, $1, $9      # encoding: [0x2d,0x08,0x29,0x00]
 # CHECK: lbu   $8, 1($1)       # encoding: [0x01,0x00,0x28,0x90]
 # CHECK: lbu   $1, 0($1)       # encoding: [0x00,0x00,0x21,0x90]
@@ -372,7 +181,7 @@
 
 # Test ulw with 64-bit immediate addresses.
   ulw $8, 0x100010001
-# CHECK: lui  $1, 1           # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1   # encoding: [0x01,0x00,0x01,0x24]
 # CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 1       # encoding: [0x01,0x00,0x21,0x34]
@@ -390,8 +199,7 @@
 # CHECK: lwr  $8, 0($1)       # encoding: [0x00,0x00,0x28,0x98]
 
   ulw $8, -0x100010001
-# CHECK: lui  $1, 65535       # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2  # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori  $1, $1, 65534   # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll $1, $1, 16      # encoding: [0x38,0x0c,0x01,0x00]
@@ -411,7 +219,7 @@
 
 # Test ulw with source register and 64-bit immediate offset.
   ulw $8, 0x100010001($9)
-# CHECK: lui   $1, 1          # encoding: [0x01,0x00,0x01,0x3c]
+# CHECK: addiu $1, $zero, 1   # encoding: [0x01,0x00,0x01,0x24]
 # CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
 # CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori   $1, $1, 1      # encoding: [0x01,0x00,0x21,0x34]
@@ -431,8 +239,7 @@
 # CHECK: lwr   $8, 0($1)      # encoding: [0x00,0x00,0x28,0x98]
 
   ulw $8, -0x100010001($9)
-# CHECK: lui   $1, 65535      # encoding: [0xff,0xff,0x01,0x3c]
-# CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
+# CHECK: addiu $1, $zero, -2  # encoding: [0xfe,0xff,0x01,0x24]
 # CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
 # CHECK: ori   $1, $1, 65534  # encoding: [0xfe,0xff,0x21,0x34]
 # CHECK: dsll  $1, $1, 16     # encoding: [0x38,0x0c,0x01,0x00]
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 03ea6c15c333..24ed1ffc8d60 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -163,10 +163,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r2/invalid.s b/test/MC/Mips/mips64r2/invalid.s
index f53cfff40438..86bfc959fff0 100644
--- a/test/MC/Mips/mips64r2/invalid.s
+++ b/test/MC/Mips/mips64r2/invalid.s
@@ -2,9 +2,67 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r2 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        # FIXME: Check various 'pos + size' constraints on dext*
+        dext $2, $3, -1, 1   # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+        dext $2, $3, 32, 1   # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+        dext $2, $3, 1, 0    # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 32
+        dext $2, $3, 1, 33   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 32
+        dextm $2, $3, -1, 1  # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dextm $2, $3, 32, 1  # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dextm $2, $3, -1, 33 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dextm $2, $3, 32, 33 # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dextm $2, $3, 1, 32  # CHECK: :[[@LINE]]:26: error: expected immediate in range 33 .. 64
+        dextm $2, $3, 1, 65  # CHECK: :[[@LINE]]:26: error: expected immediate in range 33 .. 64
+        dextu $2, $3, 31, 1  # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+        dextu $2, $3, 64, 1  # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+        dextu $2, $3, 32, 0  # CHECK: :[[@LINE]]:27: error: expected immediate in range 1 .. 32
+        dextu $2, $3, 32, 33 # CHECK: :[[@LINE]]:27: error: expected immediate in range 1 .. 32
+        # FIXME: Check size on dins*
+        dins $2, $3, -1, 1   # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dins $2, $3, 64, 1   # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dinsm $2, $3, -1, 1  # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dinsm $2, $3, 32, 1  # CHECK: :[[@LINE]]:23: error: expected 5-bit unsigned immediate
+        dinsu $2, $3, 31, 1  # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+        dinsu $2, $3, 64, 1  # CHECK: :[[@LINE]]:23: error: expected immediate in range 32 .. 63
+        drotr $2, $3, -1     # CHECK: :[[@LINE]]:23: error: expected 6-bit unsigned immediate
+        drotr $2, $3, 64     # CHECK: :[[@LINE]]:23: error: expected 6-bit unsigned immediate
+        drotr32 $2, $3, -1   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, 32   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        dsll $2, $3, -1      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsll $2, $3, 64      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsll32 $2, $3, -1    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        dsll32 $2, $3, 32    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        dsrl $2, $3, -1      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsrl $2, $3, 64      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsrl32 $2, $3, -1    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        dsrl32 $2, $3, 64    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        dsra $2, $3, -1      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsra $2, $3, 64      # CHECK: :[[@LINE]]:22: error: expected 6-bit unsigned immediate
+        dsra32 $2, $3, -1    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        dsra32 $2, $3, 64    # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+        # FIXME: Check '0 < pos + size <= 32' constraint on ext
+        ext $2, $3, -1, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ext $2, $3, 32, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ext $2, $3, 1, 0     # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+        ext $2, $3, 1, 33    # CHECK: :[[@LINE]]:24: error: expected immediate in range 1 .. 32
+        # FIXME: Check size on ins
+        ins $2, $3, -1, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        ins $2, $3, 32, 1    # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        sll $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sll $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        srl $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        srl $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sra $2, $3, -1       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        sra $2, $3, 32       # CHECK: :[[@LINE]]:21: error: expected 5-bit unsigned immediate
+        rotr $2, $3, -1      # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
+        rotr $2, $3, 32      # CHECK: :[[@LINE]]:22: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r2/valid-xfail.s b/test/MC/Mips/mips64r2/valid-xfail.s
index 148758cd3263..bf17b35c446c 100644
--- a/test/MC/Mips/mips64r2/valid-xfail.s
+++ b/test/MC/Mips/mips64r2/valid-xfail.s
@@ -8,34 +8,10 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
-        alnv.ob         $v22,$v19,$v30,$v1
-        alnv.ob         $v31,$v23,$v30,$at
-        alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -81,18 +57,6 @@
         c.un.ps         $fcc4,$f2,$f26
         c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmfc0           $10,c0_watchhi,2
@@ -100,90 +64,22 @@
         dmt $k0
         dmtc0           $15,c0_datalo
         dmtgc0          $a2,c0_watchlo,2
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         drorv           $at,$a1,$s7
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.d          $w28,$8
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
-        lbe $14,122($9)
+        lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
-        lhe $s6,219($v1)
+        lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx $sp,$k0($15)
-        lle $gp,-237($ra)
-        lwe $ra,-145($14)
+        lle             $gp,-237($ra)
+        lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
@@ -191,106 +87,30 @@
         movz.ps         $f18,$f17,$ra
         msgn.qh         $v0,$v24,$v20
         msgn.qh         $v12,$v21,$v0[1]
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         rsqrt.d         $f3,$f28
         rsqrt.s         $f4,$f8
-        sbe $s7,33($s1)
-        sce $sp,189($10)
-        she $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
+        sbe             $s7,33($s1)
+        sce             $sp,189($10)
+        she             $24,105($v0)
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
-        swe $24,94($k0)
+        swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
         tlbginv
@@ -302,5 +122,4 @@
         tlbinv
         tlbinvf
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index 37753ae0a878..e571d9365913 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -179,10 +179,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r3/invalid.s b/test/MC/Mips/mips64r3/invalid.s
index 99cd0803d202..99071211ba4b 100644
--- a/test/MC/Mips/mips64r3/invalid.s
+++ b/test/MC/Mips/mips64r3/invalid.s
@@ -2,9 +2,15 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r3 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, -1   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, 32   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r3/valid-xfail.s b/test/MC/Mips/mips64r3/valid-xfail.s
index f2949c4f2dda..7e94200dfd62 100644
--- a/test/MC/Mips/mips64r3/valid-xfail.s
+++ b/test/MC/Mips/mips64r3/valid-xfail.s
@@ -8,34 +8,13 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
         alnv.ob         $v22,$v19,$v30,$v1
         alnv.ob         $v31,$v23,$v30,$at
         alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -81,18 +60,6 @@
         c.un.ps         $fcc4,$f2,$f26
         c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmfc0           $10,c0_watchhi,2
@@ -100,90 +67,22 @@
         dmt $k0
         dmtc0           $15,c0_datalo
         dmtgc0          $a2,c0_watchlo,2
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         drorv           $at,$a1,$s7
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.d          $w28,$8
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
-        lbe $14,122($9)
+        lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
-        lhe $s6,219($v1)
+        lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx $sp,$k0($15)
-        lle $gp,-237($ra)
-        lwe $ra,-145($14)
+        lle             $gp,-237($ra)
+        lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
@@ -191,106 +90,30 @@
         movz.ps         $f18,$f17,$ra
         msgn.qh         $v0,$v24,$v20
         msgn.qh         $v12,$v21,$v0[1]
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         rsqrt.d         $f3,$f28
         rsqrt.s         $f4,$f8
-        sbe $s7,33($s1)
-        sce $sp,189($10)
-        she $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
+        sbe             $s7,33($s1)
+        sce             $sp,189($10)
+        she             $24,105($v0)
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
-        swe $24,94($k0)
+        swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
         tlbginv
@@ -302,5 +125,4 @@
         tlbinv
         tlbinvf
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index c5d4848458d2..4bde82eb8ec1 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -179,10 +179,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r5/invalid-mips64.s b/test/MC/Mips/mips64r5/invalid-mips64.s
new file mode 100644
index 000000000000..412fb31a1043
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid-mips64r2.s b/test/MC/Mips/mips64r5/invalid-mips64r2.s
new file mode 100644
index 000000000000..1ee159429a70
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64r2.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r2 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid-mips64r3.s b/test/MC/Mips/mips64r5/invalid-mips64r3.s
new file mode 100644
index 000000000000..e80d7a1e9b4f
--- /dev/null
+++ b/test/MC/Mips/mips64r5/invalid-mips64r3.s
@@ -0,0 +1,8 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -show-encoding -mcpu=mips64r3 \
+# RUN:     2>%t1
+# RUN: FileCheck %s < %t1
+
+	.set noat
+        eretnc                      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r5/invalid.s b/test/MC/Mips/mips64r5/invalid.s
index 8319debaa30c..3bdea15f5236 100644
--- a/test/MC/Mips/mips64r5/invalid.s
+++ b/test/MC/Mips/mips64r5/invalid.s
@@ -2,9 +2,15 @@
 # invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r5 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
         .set noreorder
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, -1   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, 32   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r5/valid-xfail.s b/test/MC/Mips/mips64r5/valid-xfail.s
index 04221ddb8630..b5ecdcbfb726 100644
--- a/test/MC/Mips/mips64r5/valid-xfail.s
+++ b/test/MC/Mips/mips64r5/valid-xfail.s
@@ -8,34 +8,13 @@
 
         .set noat
         abs.ps          $f22,$f8
-        absq_s.ph       $8,$a0
-        absq_s.qb       $15,$s1
-        absq_s.w        $s3,$ra
         add.ps          $f25,$f27,$f13
-        addq.ph         $s1,$15,$at
-        addq_s.ph       $s3,$s6,$s2
-        addq_s.w        $a2,$8,$at
-        addqh.ph        $s4,$14,$s1
         addqh.w         $s7,$s7,$k1
-        addqh_r.ph      $sp,$25,$s8
         addqh_r.w       $8,$v1,$zero
-        addsc           $s8,$15,$12
-        addu.ph         $a2,$14,$s3
-        addu.qb         $s6,$v1,$v1
-        addu_s.ph       $a3,$s3,$gp
-        addu_s.qb       $s4,$s8,$s1
-        adduh.qb        $a1,$a1,$at
-        adduh_r.qb      $a0,$9,$12
-        addwc           $k0,$s6,$s7
         alnv.ob         $v22,$v19,$v30,$v1
         alnv.ob         $v31,$v23,$v30,$at
         alnv.ob         $v8,$v17,$v30,$a1
         alnv.ps         $f12,$f18,$f30,$12
-        and.v           $w10,$w25,$w29
-        bitrev          $14,$at
-        bmnz.v          $w15,$w2,$w28
-        bmz.v           $w13,$w11,$w21
-        bsel.v          $w28,$w7,$w0
         c.eq.d          $fcc1,$f15,$f15
         c.eq.ps         $fcc5,$f0,$f9
         c.eq.s          $fcc5,$f24,$f17
@@ -81,18 +60,6 @@
         c.un.ps         $fcc4,$f2,$f26
         c.un.s          $fcc1,$f30,$f4
         cvt.ps.s        $f3,$f18,$f19
-        cmp.eq.ph       $s7,$14
-        cmp.le.ph       $8,$14
-        cmp.lt.ph       $k0,$sp
-        cmpgdu.eq.qb    $s3,$zero,$k0
-        cmpgdu.le.qb    $v1,$15,$s2
-        cmpgdu.lt.qb    $s0,$gp,$sp
-        cmpgu.eq.qb     $14,$s6,$s8
-        cmpgu.le.qb     $9,$a3,$s4
-        cmpgu.lt.qb     $sp,$at,$8
-        cmpu.eq.qb      $v0,$24
-        cmpu.le.qb      $s1,$a1
-        cmpu.lt.qb      $at,$a3
         cvt.s.pl        $f30,$f1
         cvt.s.pu        $f14,$f25
         dmfc0           $10,c0_watchhi,2
@@ -100,90 +67,22 @@
         dmt $k0
         dmtc0           $15,c0_datalo
         dmtgc0          $a2,c0_watchlo,2
-        dpa.w.ph        $ac1,$s7,$k0
-        dpaq_s.w.ph     $ac2,$a0,$13
-        dpaq_sa.l.w     $ac0,$a2,$14
-        dpaqx_s.w.ph    $ac3,$a0,$24
-        dpaqx_sa.w.ph   $ac1,$zero,$s5
-        dpau.h.qbl      $ac1,$10,$24
-        dpau.h.qbr      $ac1,$s7,$s6
-        dpax.w.ph       $ac3,$a0,$k0
-        dps.w.ph        $ac1,$a3,$a1
-        dpsq_s.w.ph     $ac0,$gp,$k0
-        dpsq_sa.l.w     $ac0,$a3,$15
-        dpsqx_s.w.ph    $ac3,$13,$a3
-        dpsqx_sa.w.ph   $ac3,$sp,$s2
-        dpsu.h.qbl      $ac2,$14,$10
-        dpsu.h.qbr      $ac2,$a1,$s6
-        dpsx.w.ph       $ac0,$s7,$gp
         drorv           $at,$a1,$s7
         dvpe            $s6
         emt $8
         evpe            $v0
-        extpdpv         $s6,$ac0,$s8
-        extpv           $13,$ac0,$14
-        extrv.w         $8,$ac3,$at
-        extrv_r.w       $8,$ac1,$s6
-        extrv_rs.w      $gp,$ac1,$s6
-        extrv_s.h       $s2,$ac1,$14
-        fclass.d        $w14,$w27
-        fclass.w        $w19,$w28
-        fexupl.d        $w10,$w29
-        fexupl.w        $w12,$w27
-        fexupr.d        $w31,$w15
-        fexupr.w        $w29,$w12
-        ffint_s.d       $w1,$w30
-        ffint_s.w       $w16,$w14
-        ffint_u.d       $w23,$w18
-        ffint_u.w       $w19,$w12
-        ffql.d          $w2,$w3
-        ffql.w          $w9,$w0
-        ffqr.d          $w25,$w24
-        ffqr.w          $w10,$w6
-        fill.b          $w9,$v1
-        fill.d          $w28,$8
-        fill.h          $w9,$8
-        fill.w          $w31,$15
-        flog2.d         $w12,$w16
-        flog2.w         $w19,$w23
         fork            $s2,$8,$a0
-        frcp.d          $w12,$w4
-        frcp.w          $w30,$w8
-        frint.d         $w20,$w8
-        frint.w         $w11,$w29
-        frsqrt.d        $w29,$w2
-        frsqrt.w        $w9,$w8
-        fsqrt.d         $w3,$w1
-        fsqrt.w         $w5,$w15
-        ftint_s.d       $w31,$w26
-        ftint_s.w       $w27,$w14
-        ftint_u.d       $w5,$w31
-        ftint_u.w       $w12,$w29
-        ftrunc_s.d      $w4,$w22
-        ftrunc_s.w      $w24,$w7
-        ftrunc_u.d      $w20,$w25
-        ftrunc_u.w      $w7,$w26
-        insv            $s2,$at
         iret
-        lbe $14,122($9)
+        lbe             $14,122($9)
         lbue            $11,-108($10)
-        lbux            $9,$14($v0)
-        lhe $s6,219($v1)
+        lhe             $s6,219($v1)
         lhue            $gp,118($11)
-        lhx $sp,$k0($15)
-        lle $gp,-237($ra)
-        lwe $ra,-145($14)
+        lle             $gp,-237($ra)
+        lwe             $ra,-145($14)
         lwle            $11,-42($11)
         lwre            $sp,-152($24)
-        lwx $12,$12($s4)
         madd.ps         $f22,$f3,$f14,$f3
-        maq_s.w.phl     $ac2,$25,$11
-        maq_s.w.phr     $ac0,$10,$25
-        maq_sa.w.phl    $ac3,$a1,$v1
-        maq_sa.w.phr    $ac1,$at,$10
         mfgc0           $s6,c0_datahi1
-        mflo            $9,$ac2
-        modsub          $a3,$12,$a3
         mov.ps          $f22,$f17
         movf.ps         $f10,$f28,$fcc6
         movn.ps         $f31,$f31,$s3
@@ -191,106 +90,30 @@
         movz.ps         $f18,$f17,$ra
         msgn.qh         $v0,$v24,$v20
         msgn.qh         $v12,$v21,$v0[1]
-        msub            $ac2,$sp,$14
         msub.ps         $f12,$f14,$f29,$f17
-        msubu           $ac2,$a1,$24
         mtc0            $9,c0_datahi1
         mtgc0           $s4,$21,7
-        mthi            $v0,$ac1
-        mthlip          $a3,$ac0
-        mul.ph          $s4,$24,$s0
         mul.ps          $f14,$f0,$f16
-        mul_s.ph        $10,$14,$15
-        muleq_s.w.phl   $11,$s4,$s4
-        muleq_s.w.phr   $s6,$a0,$s8
-        muleu_s.ph.qbl  $a2,$14,$8
-        muleu_s.ph.qbr  $a1,$ra,$9
-        mulq_rs.ph      $s2,$14,$15
-        mulq_rs.w       $at,$s4,$25
-        mulq_s.ph       $s0,$k1,$15
-        mulq_s.w        $9,$a3,$s0
-        mulsa.w.ph      $ac1,$s4,$s6
-        mulsaq_s.w.ph   $ac0,$ra,$s2
         neg.ps          $f19,$f13
-        nloc.b          $w12,$w30
-        nloc.d          $w16,$w7
-        nloc.h          $w21,$w17
-        nloc.w          $w17,$w16
-        nlzc.b          $w12,$w7
-        nlzc.d          $w14,$w14
-        nlzc.h          $w24,$w24
-        nlzc.w          $w10,$w4
         nmadd.ps        $f27,$f4,$f9,$f25
         nmsub.ps        $f6,$f12,$f14,$f17
-        nor.v           $w20,$w20,$w15
-        or.v            $w13,$w23,$w12
-        packrl.ph       $ra,$24,$14
-        pcnt.b          $w30,$w15
-        pcnt.d          $w5,$w16
-        pcnt.h          $w20,$w24
-        pcnt.w          $w22,$w20
-        pick.ph         $ra,$a2,$gp
-        pick.qb         $11,$a0,$gp
         pll.ps          $f25,$f9,$f30
         plu.ps          $f1,$f26,$f29
         preceq.w.phl    $s8,$gp
         preceq.w.phr    $s5,$15
-        precequ.ph.qbl  $s7,$ra
-        precequ.ph.qbla $a0,$9
-        precequ.ph.qbr  $ra,$s3
-        precequ.ph.qbra $24,$8
-        preceu.ph.qbl   $sp,$8
-        preceu.ph.qbla  $s6,$11
-        preceu.ph.qbr   $gp,$s1
-        preceu.ph.qbra  $k1,$s0
-        precr.qb.ph     $v0,$12,$s8
-        precrq.ph.w     $14,$s8,$24
-        precrq.qb.ph    $a2,$12,$12
-        precrq_rs.ph.w  $a1,$k0,$a3
-        precrqu_s.qb.ph $zero,$gp,$s5
         pul.ps          $f9,$f30,$f26
         puu.ps          $f24,$f9,$f2
-        raddu.w.qb      $25,$s3
         rdpgpr          $s3,$9
         recip.d         $f19,$f6
         recip.s         $f3,$f30
-        repl.ph         $at,-307
-        replv.ph        $v1,$s7
-        replv.qb        $25,$12
         rorv            $13,$a3,$s5
         rsqrt.d         $f3,$f28
         rsqrt.s         $f4,$f8
-        sbe $s7,33($s1)
-        sce $sp,189($10)
-        she $24,105($v0)
-        shilo           $ac1,26
-        shilov          $ac2,$10
-        shllv.ph        $10,$s0,$s0
-        shllv.qb        $gp,$v1,$zero
-        shllv_s.ph      $k1,$at,$13
-        shllv_s.w       $s1,$ra,$k0
-        shrav.ph        $25,$s2,$s1
-        shrav.qb        $zero,$24,$11
-        shrav_r.ph      $s3,$11,$25
-        shrav_r.qb      $a0,$sp,$s5
-        shrav_r.w       $s7,$s4,$s6
-        shrlv.ph        $14,$10,$9
-        shrlv.qb        $a2,$s2,$11
+        sbe             $s7,33($s1)
+        sce             $sp,189($10)
+        she             $24,105($v0)
         sub.ps          $f5,$f14,$f26
-        subq.ph         $ra,$9,$s8
-        subq_s.ph       $13,$s8,$s5
-        subq_s.w        $k1,$a2,$a3
-        subqh.ph        $10,$at,$9
-        subqh.w         $v0,$a2,$zero
-        subqh_r.ph      $a0,$12,$s6
-        subqh_r.w       $10,$a2,$gp
-        subu.ph         $9,$s6,$s4
-        subu.qb         $s6,$a2,$s6
-        subu_s.ph       $v1,$a1,$s3
-        subu_s.qb       $s1,$at,$ra
-        subuh.qb        $zero,$gp,$gp
-        subuh_r.qb      $s4,$s8,$s6
-        swe $24,94($k0)
+        swe             $24,94($k0)
         swle            $v1,-209($gp)
         swre            $k0,-202($s2)
         tlbginv
@@ -302,5 +125,4 @@
         tlbinv
         tlbinvf
         wrpgpr          $zero,$13
-        xor.v           $w20,$w21,$w30
         yield           $v1,$s0
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index d4e52dcca67e..029dfa9438c7 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -40,7 +40,7 @@ a:
         bltzall   $6,488               # CHECK: bltzall $6, 488      # encoding: [0x04,0xd2,0x00,0x7a]
         bltzl     $s1,-9964            # CHECK: bltzl $17, -9964     # encoding: [0x06,0x22,0xf6,0x45]
         bnel      $gp,$s4,5107         # CHECK: bnel $gp, $20, 5107  # encoding: [0x57,0x94,0x04,0xfc]
-        cache     1, 8($5)             # CHECK: cache 1, 8($5)   # encoding: [0xbc,0xa1,0x00,0x08]
+        cache     1, 8($5)             # CHECK: cache 1, 8($5)       # encoding: [0xbc,0xa1,0x00,0x08]
         c.ngl.d   $f29,$f29
         c.ngle.d  $f0,$f16
         c.sf.d    $f30,$f0
@@ -132,6 +132,7 @@ a:
         ei        $14                  # CHECK: ei  $14       # encoding: [0x41,0x6e,0x60,0x20]
         ei                             # CHECK: ei            # encoding: [0x41,0x60,0x60,0x20]
         eret
+        eretnc                         # CHECK: eretnc        # encoding: [0x42,0x00,0x00,0x58]
         floor.l.d $f26,$f7
         floor.l.s $f12,$f5
         floor.w.d $f14,$f11
@@ -179,10 +180,10 @@ a:
         mflo      $s1
         mov.d     $f20,$f14
         mov.s     $f2,$f27
-        move      $a0,$a3
-        move      $s5,$a0
-        move      $s8,$a0
-        move      $25,$a2
+        move      $a0,$a3              # CHECK: move $4, $7              # encoding: [0x00,0xe0,0x20,0x25]
+        move      $s5,$a0              # CHECK: move $21, $4             # encoding: [0x00,0x80,0xa8,0x25]
+        move      $s8,$a0              # CHECK: move $fp, $4             # encoding: [0x00,0x80,0xf0,0x25]
+        move      $25,$a2              # CHECK: move $25, $6             # encoding: [0x00,0xc0,0xc8,0x25]
         movf      $gp,$8,$fcc7
         movf.d    $f6,$f11,$fcc5
         movf.s    $f23,$f5,$fcc6
diff --git a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
index 5156429340a1..06d95fd8665b 100644
--- a/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips1-wrong-error.s
@@ -11,7 +11,7 @@
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
index eda18ac9012e..a111726fca38 100644
--- a/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips3-wrong-error.s
@@ -17,7 +17,7 @@
         lwr       $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swl       $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
         swr       $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
+        lwle      $s4,-4231($15)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        lwre      $zero,-19147($gp)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swle      $15,13694($s3)      # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        swre      $s1,-26590($14)     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
index 06bf58c1e8fa..1cec777c27a6 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4-wrong-error.s
@@ -8,4 +8,3 @@
         .set noat
         bc2tl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
         bc2fl 4                 # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
-        prefx 0,$2($31)         # CHECK: :[[@LINE]]:{{[0-9]+}}: error: unknown instruction
diff --git a/test/MC/Mips/mips64r6/invalid-mips4.s b/test/MC/Mips/mips64r6/invalid-mips4.s
index 82a1196daf84..a25eeaa09871 100644
--- a/test/MC/Mips/mips64r6/invalid-mips4.s
+++ b/test/MC/Mips/mips64r6/invalid-mips4.s
@@ -14,3 +14,4 @@
         lwxc1     $f12,$s1($s8)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         sdxc1     $f11,$10($14)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
         swxc1     $f19,$12($k0)       # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        prefx     0,$2($31)           # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/mips64r6/invalid.s b/test/MC/Mips/mips64r6/invalid.s
index ae980347f306..cba20b15eea4 100644
--- a/test/MC/Mips/mips64r6/invalid.s
+++ b/test/MC/Mips/mips64r6/invalid.s
@@ -2,15 +2,44 @@
 # the assembler (e.g. invalid set of operands or operand's restrictions not met).
 
 # RUN: not llvm-mc %s -triple=mips64-unknown-linux -mcpu=mips64r6 2>%t1
-# RUN: FileCheck %s < %t1 -check-prefix=ASM
+# RUN: FileCheck %s < %t1
 
         .text
+local_label:
         .set noreorder
 	.set noat
-        jalr.hb $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        jalr.hb $31, $31 # ASM: :[[@LINE]]:9: error: source and destination must be different
-        ldc2    $8,-21181($at)   # ASM: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
-        break 1024        # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 1024, 5     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 7, 1024     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
-        break 1024, 1024  # CHECK: :[[@LINE]]:{{[0-9]+}}: error: invalid operand for instruction
+        align   $4, $2, $3, -1    # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+        align   $4, $2, $3, 4     # CHECK: :[[@LINE]]:29: error: expected 2-bit unsigned immediate
+        jalr.hb $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        jalr.hb $31, $31 # CHECK: :[[@LINE]]:9: error: source and destination must be different
+        ldc2    $8,-21181($at)   # CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        break -1          # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 1024        # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break -1, 5       # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 1024, 5     # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        break 7, -1       # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+        break 7, 1024     # CHECK: :[[@LINE]]:18: error: expected 10-bit unsigned immediate
+        break 1024, 1024  # CHECK: :[[@LINE]]:15: error: expected 10-bit unsigned immediate
+        // FIXME: Following tests are temporarely disabled, until "PredicateControl not in hierarchy" problem is resolved
+        bltl  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bltul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        blel  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bleul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgel  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgeul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtl  $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        bgtul $7, $8, local_label  # -CHECK: :[[@LINE]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+        cache -1, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        cache 32, 255($7)    # CHECK: :[[@LINE]]:15: error: expected 5-bit unsigned immediate
+        dalign  $4, $2, $3, -1    # CHECK: :[[@LINE]]:29: error: expected 3-bit unsigned immediate
+        dalign  $4, $2, $3, 8     # CHECK: :[[@LINE]]:29: error: expected 3-bit unsigned immediate
+        dlsa    $2, $3, $4, 0     # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+        dlsa    $2, $3, $4, 5     # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+        drotr32 $2, $3, -1   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        drotr32 $2, $3, 32   # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+        jalr.hb $31          # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        jalr.hb $31, $31     # CHECK: :[[@LINE]]:{{[0-9]+}}: error: source and destination must be different
+        lsa     $2, $3, $4, 0     # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+        lsa     $2, $3, $4, 5     # CHECK: :[[@LINE]]:29: error: expected immediate in range 1 .. 4
+        pref -1, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
+        pref 32, 255($7)     # CHECK: :[[@LINE]]:14: error: expected 5-bit unsigned immediate
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 3dc771a80d54..0b4b6b187afd 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -64,7 +64,7 @@ a:
         bovc     $0, $0, 4       # CHECK: bovc $zero, $zero, 4 # encoding: [0x20,0x00,0x00,0x01]
         bovc     $2, $0, 4       # CHECK: bovc $2, $zero, 4    # encoding: [0x20,0x40,0x00,0x01]
         bovc     $4, $2, 4       # CHECK: bovc $4, $2, 4      # encoding: [0x20,0x82,0x00,0x01]
-        cache      1, 8($5)         # CHECK: cache 1, 8($5)         # encoding: [0x7c,0xa1,0x04,0x25]
+        cache    1, 8($5)        # CHECK: cache 1, 8($5)         # encoding: [0x7c,0xa1,0x04,0x25]
         class.d $f2, $f4         # CHECK: class.d $f2, $f4       # encoding: [0x46,0x20,0x20,0x9b]
         class.s $f2, $f4         # CHECK: class.s $f2, $f4       # encoding: [0x46,0x00,0x20,0x9b]
         clo     $11,$a1          # CHECK: clo $11, $5            # encoding: [0x00,0xa0,0x58,0x51]
@@ -116,7 +116,7 @@ a:
         di      $s8              # CHECK: di  $fp          # encoding: [0x41,0x7e,0x60,0x00]
         div     $2,$3,$4         # CHECK: div $2, $3, $4   # encoding: [0x00,0x64,0x10,0x9a]
         divu    $2,$3,$4         # CHECK: divu $2, $3, $4  # encoding: [0x00,0x64,0x10,0x9b]
-        dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xd5]
+        dlsa    $2, $3, $4, 3    # CHECK: dlsa $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0x95]
         dmfc0   $10, $16, 2      # CHECK: dmfc0 $10, $16, 2  # encoding: [0x40,0x2a,0x80,0x02]
         dmod    $2,$3,$4         # CHECK: dmod $2, $3, $4  # encoding: [0x00,0x64,0x10,0xde]
         dmodu   $2,$3,$4         # CHECK: dmodu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdf]
@@ -132,6 +132,7 @@ a:
         dsubu   $15,$11,5025     # CHECK: daddiu $15, $11, -5025 # encoding: [0x65,0x6f,0xec,0x5f]
         ei                       # CHECK: ei               # encoding: [0x41,0x60,0x60,0x20]
         ei      $14              # CHECK: ei  $14          # encoding: [0x41,0x6e,0x60,0x20]
+        eretnc                   # CHECK: eretnc                 # encoding: [0x42,0x00,0x00,0x58]
         j       1f               # CHECK: j $tmp0                # encoding: [0b000010AA,A,A,A]
                                  # CHECK:                        #   fixup A - offset: 0, value: ($tmp0), kind: fixup_Mips_26
         j       a                # CHECK: j a                    # encoding: [0b000010AA,A,A,A]
@@ -146,7 +147,7 @@ a:
         ldpc    $2,123456        # CHECK: ldpc $2, 123456  # encoding: [0xec,0x58,0x3c,0x48]
         ll      $v0,-153($s2)    # CHECK: ll $2, -153($18)       # encoding: [0x7e,0x42,0xb3,0xb6]
         lld     $zero,112($ra)   # CHECK: lld $zero, 112($ra)    # encoding: [0x7f,0xe0,0x38,0x37]
-        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3 # encoding: [0x00,0x64,0x10,0xc5]
+        lsa     $2, $3, $4, 3    # CHECK: lsa  $2, $3, $4, 3     # encoding: [0x00,0x64,0x10,0x85]
         lwc2    $18,-841($a2)    # CHECK: lwc2 $18, -841($6)     # encoding: [0x49,0x52,0x34,0xb7]
         lwpc    $2,268           # CHECK: lwpc $2, 268     # encoding: [0xec,0x48,0x00,0x43]
         lwupc   $2,268           # CHECK: lwupc $2, 268    # encoding: [0xec,0x50,0x00,0x43]
@@ -163,6 +164,10 @@ a:
         mfc0    $8,$15,1         # CHECK: mfc0 $8, $15, 1      # encoding: [0x40,0x08,0x78,0x01]
         mod     $2,$3,$4         # CHECK: mod $2, $3, $4   # encoding: [0x00,0x64,0x10,0xda]
         modu    $2,$3,$4         # CHECK: modu $2, $3, $4  # encoding: [0x00,0x64,0x10,0xdb]
+        move    $a0,$a3          # CHECK: move $4, $7             # encoding: [0x00,0xe0,0x20,0x25]
+        move    $s5,$a0          # CHECK: move $21, $4            # encoding: [0x00,0x80,0xa8,0x25]
+        move    $s8,$a0          # CHECK: move $fp, $4            # encoding: [0x00,0x80,0xf0,0x25]
+        move    $25,$a2          # CHECK: move $25, $6            # encoding: [0x00,0xc0,0xc8,0x25]
         mtc0    $9,$15,1         # CHECK: mtc0 $9, $15, 1        # encoding: [0x40,0x89,0x78,0x01]
         msubf.d $f2,$f3,$f4      # CHECK: msubf.d $f2, $f3, $f4  # encoding: [0x46,0x24,0x18,0x99]
         msubf.s $f2,$f3,$f4      # CHECK: msubf.s $f2, $f3, $f4  # encoding: [0x46,0x04,0x18,0x99]
diff --git a/test/MC/Mips/msa/invalid-64.s b/test/MC/Mips/msa/invalid-64.s
new file mode 100644
index 000000000000..a15ee270bccf
--- /dev/null
+++ b/test/MC/Mips/msa/invalid-64.s
@@ -0,0 +1,66 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips64r2 -mattr=+msa \
+# RUN:     -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+
+    .set noat
+    dlsa    $2, $3, $4, 0   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    dlsa    $2, $3, $4, 5   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    insve.b $w25[-1], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+    insve.b $w25[16], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+    insve.h $w24[-1], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+    insve.h $w24[8], $w2[0]  # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+    insve.w $w0[-1], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+    insve.w $w0[4], $w13[0]  # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+    insve.d $w3[-1], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+    insve.d $w3[2], $w18[0]  # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+    insve.b $w25[3], $w9[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.h $w24[2], $w2[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.w $w0[2], $w13[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.d $w3[0], $w18[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    lsa     $2, $3, $4, 0   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    lsa     $2, $3, $4, 5   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    sat_s.b $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_s.b $w31, $w31, 8   # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_s.h $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_s.h $w31, $w31, 16  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_s.w $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_s.w $w31, $w31, 32  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_s.d $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_s.d $w31, $w31, 64  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_u.b $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_u.b $w31, $w31, 8   # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_u.h $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_u.h $w31, $w31, 16  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_u.w $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_u.w $w31, $w31, 32  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_u.d $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_u.d $w31, $w31, 64  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    shf.b $w19, $w30, -1    # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+    shf.h $w17, $w8, -1     # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    shf.w $w14, $w3, -1     # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    sldi.b $w0, $w29[-1]    # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+    sldi.b $w0, $w29[16]    # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+    sldi.d $w4, $w12[-1]    # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+    sldi.d $w4, $w12[2]     # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+    sldi.h $w8, $w17[-1]    # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+    sldi.h $w8, $w17[8]     # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+    sldi.w $w20, $w27[-1]   # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+    sldi.w $w20, $w27[4]    # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+    srari.b $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srari.b $w5, $w25, 8    # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srari.h $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srari.h $w5, $w25, 16   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srari.w $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srari.w $w5, $w25, 32   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srari.d $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srari.d $w5, $w25, 64   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srlri.b $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srlri.b $w18, $w3, 8    # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srlri.h $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srlri.h $w18, $w3, 16   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srlri.w $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srlri.w $w18, $w3, 32   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srlri.d $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srlri.d $w18, $w3, 64   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
diff --git a/test/MC/Mips/msa/invalid.s b/test/MC/Mips/msa/invalid.s
new file mode 100644
index 000000000000..724d9c193e0a
--- /dev/null
+++ b/test/MC/Mips/msa/invalid.s
@@ -0,0 +1,67 @@
+# Instructions that are invalid
+#
+# RUN: not llvm-mc %s -triple=mips-unknown-linux -mcpu=mips32r2 -mattr=+msa \
+# RUN:     -show-encoding 2>%t1
+# RUN: FileCheck %s < %t1
+
+    .set noat
+    insve.b $w25[-1], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+    insve.b $w25[16], $w9[0] # CHECK: :[[@LINE]]:18: error: expected 4-bit unsigned immediate
+    insve.h $w24[-1], $w2[0] # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+    insve.h $w24[8], $w2[0]  # CHECK: :[[@LINE]]:18: error: expected 3-bit unsigned immediate
+    insve.w $w0[-1], $w13[0] # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+    insve.w $w0[4], $w13[0]  # CHECK: :[[@LINE]]:17: error: expected 2-bit unsigned immediate
+    insve.d $w3[-1], $w18[0] # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+    insve.d $w3[2], $w18[0]  # CHECK: :[[@LINE]]:17: error: expected 1-bit unsigned immediate
+    insve.b $w25[3], $w9[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.h $w24[2], $w2[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.w $w0[2], $w13[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    insve.d $w3[0], $w18[1] # CHECK: :[[@LINE]]:26: error: expected '0'
+    lsa     $2, $3, $4, 0   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    lsa     $2, $3, $4, 5   # CHECK: :[[@LINE]]:25: error: expected immediate in range 1 .. 4
+    sat_s.b $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_s.b $w31, $w31, 8   # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_s.h $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_s.h $w31, $w31, 16  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_s.w $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_s.w $w31, $w31, 32  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_s.d $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_s.d $w31, $w31, 64  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_u.b $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_u.b $w31, $w31, 8   # CHECK: :[[@LINE]]:25: error: expected 3-bit unsigned immediate
+    sat_u.h $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_u.h $w31, $w31, 16  # CHECK: :[[@LINE]]:25: error: expected 4-bit unsigned immediate
+    sat_u.w $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_u.w $w31, $w31, 32  # CHECK: :[[@LINE]]:25: error: expected 5-bit unsigned immediate
+    sat_u.d $w31, $w31, -1  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    sat_u.d $w31, $w31, 64  # CHECK: :[[@LINE]]:25: error: expected 6-bit unsigned immediate
+    shf.b $w19, $w30, -1    # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+    shf.b $w19, $w30, 256   # CHECK: :[[@LINE]]:23: error: expected 8-bit unsigned immediate
+    shf.h $w17, $w8, -1     # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    shf.h $w17, $w8, 256    # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    shf.w $w14, $w3, -1     # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    shf.w $w14, $w3, 256    # CHECK: :[[@LINE]]:22: error: expected 8-bit unsigned immediate
+    sldi.b $w0, $w29[-1]    # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+    sldi.b $w0, $w29[16]    # CHECK: :[[@LINE]]:22: error: expected 4-bit unsigned immediate
+    sldi.d $w4, $w12[-1]    # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+    sldi.d $w4, $w12[2]     # CHECK: :[[@LINE]]:22: error: expected 1-bit unsigned immediate
+    sldi.h $w8, $w17[-1]    # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+    sldi.h $w8, $w17[8]     # CHECK: :[[@LINE]]:22: error: expected 3-bit unsigned immediate
+    sldi.w $w20, $w27[-1]   # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+    sldi.w $w20, $w27[4]    # CHECK: :[[@LINE]]:23: error: expected 2-bit unsigned immediate
+    srari.b $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srari.b $w5, $w25, 8    # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srari.h $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srari.h $w5, $w25, 16   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srari.w $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srari.w $w5, $w25, 32   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srari.d $w5, $w25, -1   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srari.d $w5, $w25, 64   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srlri.b $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srlri.b $w18, $w3, 8    # CHECK: :[[@LINE]]:24: error: expected 3-bit unsigned immediate
+    srlri.h $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srlri.h $w18, $w3, 16   # CHECK: :[[@LINE]]:24: error: expected 4-bit unsigned immediate
+    srlri.w $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srlri.w $w18, $w3, 32   # CHECK: :[[@LINE]]:24: error: expected 5-bit unsigned immediate
+    srlri.d $w18, $w3, -1   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
+    srlri.d $w18, $w3, 64   # CHECK: :[[@LINE]]:24: error: expected 6-bit unsigned immediate
diff --git a/test/MC/Mips/msa/test_elm.s b/test/MC/Mips/msa/test_elm.s
index dbe6d5c700b5..ca6f18c9584f 100644
--- a/test/MC/Mips/msa/test_elm.s
+++ b/test/MC/Mips/msa/test_elm.s
@@ -1,33 +1,16 @@
 # RUN: llvm-mc %s -arch=mips -mcpu=mips32r2 -mattr=+msa -show-encoding | FileCheck %s
-#
-# CHECK:        copy_s.b        $13, $w8[2]             # encoding: [0x78,0x82,0x43,0x59]
-# CHECK:        copy_s.h        $1, $w25[0]             # encoding: [0x78,0xa0,0xc8,0x59]
-# CHECK:        copy_s.w        $22, $w5[1]             # encoding: [0x78,0xb1,0x2d,0x99]
-# CHECK:        copy_u.b        $22, $w20[4]            # encoding: [0x78,0xc4,0xa5,0x99]
-# CHECK:        copy_u.h        $20, $w4[0]             # encoding: [0x78,0xe0,0x25,0x19]
-# CHECK:        copy_u.w        $fp, $w13[2]            # encoding: [0x78,0xf2,0x6f,0x99]
-# CHECK:        sldi.b          $w0, $w29[4]            # encoding: [0x78,0x04,0xe8,0x19]
-# CHECK:        sldi.h          $w8, $w17[0]            # encoding: [0x78,0x20,0x8a,0x19]
-# CHECK:        sldi.w          $w20, $w27[2]           # encoding: [0x78,0x32,0xdd,0x19]
-# CHECK:        sldi.d          $w4, $w12[0]            # encoding: [0x78,0x38,0x61,0x19]
-# CHECK:        splati.b        $w25, $w3[2]            # encoding: [0x78,0x42,0x1e,0x59]
-# CHECK:        splati.h        $w24, $w28[1]           # encoding: [0x78,0x61,0xe6,0x19]
-# CHECK:        splati.w        $w13, $w18[0]           # encoding: [0x78,0x70,0x93,0x59]
-# CHECK:        splati.d        $w28, $w1[0]            # encoding: [0x78,0x78,0x0f,0x19]
-# CHECK:        move.v          $w23, $w24              # encoding: [0x78,0xbe,0xc5,0xd9]
 
-                copy_s.b        $13, $w8[2]
-                copy_s.h        $1, $w25[0]
-                copy_s.w        $22, $w5[1]
-                copy_u.b        $22, $w20[4]
-                copy_u.h        $20, $w4[0]
-                copy_u.w        $30, $w13[2]
-                sldi.b          $w0, $w29[4]
-                sldi.h          $w8, $w17[0]
-                sldi.w          $w20, $w27[2]
-                sldi.d          $w4, $w12[0]
-                splati.b        $w25, $w3[2]
-                splati.h        $w24, $w28[1]
-                splati.w        $w13, $w18[0]
-                splati.d        $w28, $w1[0]
-                move.v          $w23, $w24
+copy_s.b $13, $w8[2]   # CHECK: copy_s.b $13, $w8[2]   # encoding: [0x78,0x82,0x43,0x59]
+copy_s.h $1, $w25[0]   # CHECK: copy_s.h $1, $w25[0]   # encoding: [0x78,0xa0,0xc8,0x59]
+copy_s.w $22, $w5[1]   # CHECK: copy_s.w $22, $w5[1]   # encoding: [0x78,0xb1,0x2d,0x99]
+copy_u.b $22, $w20[4]  # CHECK: copy_u.b $22, $w20[4]  # encoding: [0x78,0xc4,0xa5,0x99]
+copy_u.h $20, $w4[0]   # CHECK: copy_u.h $20, $w4[0]   # encoding: [0x78,0xe0,0x25,0x19]
+sldi.b   $w0, $w29[4]  # CHECK: sldi.b   $w0, $w29[4]  # encoding: [0x78,0x04,0xe8,0x19]
+sldi.h   $w8, $w17[0]  # CHECK: sldi.h   $w8, $w17[0]  # encoding: [0x78,0x20,0x8a,0x19]
+sldi.w   $w20, $w27[2] # CHECK: sldi.w   $w20, $w27[2] # encoding: [0x78,0x32,0xdd,0x19]
+sldi.d   $w4, $w12[0]  # CHECK: sldi.d   $w4, $w12[0]  # encoding: [0x78,0x38,0x61,0x19]
+splati.b $w25, $w3[2]  # CHECK: splati.b $w25, $w3[2]  # encoding: [0x78,0x42,0x1e,0x59]
+splati.h $w24, $w28[1] # CHECK: splati.h $w24, $w28[1] # encoding: [0x78,0x61,0xe6,0x19]
+splati.w $w13, $w18[0] # CHECK: splati.w $w13, $w18[0] # encoding: [0x78,0x70,0x93,0x59]
+splati.d $w28, $w1[0]  # CHECK: splati.d $w28, $w1[0]  # encoding: [0x78,0x78,0x0f,0x19]
+move.v   $w23, $w24    # CHECK: move.v   $w23, $w24    # encoding: [0x78,0xbe,0xc5,0xd9]
diff --git a/test/MC/Mips/msa/test_elm_msa64.s b/test/MC/Mips/msa/test_elm_msa64.s
index 5cc9147df77a..8e4c540c5019 100644
--- a/test/MC/Mips/msa/test_elm_msa64.s
+++ b/test/MC/Mips/msa/test_elm_msa64.s
@@ -1,7 +1,3 @@
 # RUN: llvm-mc %s -arch=mips64 -mcpu=mips64r2 -mattr=+msa -show-encoding | FileCheck %s
-#
-# CHECK:        copy_s.d        $19, $w31[0]             # encoding: [0x78,0xb8,0xfc,0xd9]
-# CHECK:        copy_u.d        $18, $w29[1]             # encoding: [0x78,0xf9,0xec,0x99]
 
-        copy_s.d        $19, $w31[0]
-        copy_u.d        $18, $w29[1]
+copy_s.d $19, $w31[0] # CHECK: copy_s.d $19, $w31[0] # encoding: [0x78,0xb8,0xfc,0xd9]
diff --git a/test/MC/Mips/reloc-directive-bad.s b/test/MC/Mips/reloc-directive-bad.s
new file mode 100644
index 000000000000..41f21ed36de7
--- /dev/null
+++ b/test/MC/Mips/reloc-directive-bad.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN:     2>&1 | FileCheck %s
+	.text
+foo:
+	.reloc 0, R_MIPS_32, .text+.text # CHECK: :[[@LINE]]:23: error: expression must be relocatable
+	nop
diff --git a/test/MC/Mips/reloc-directive.s b/test/MC/Mips/reloc-directive.s
new file mode 100644
index 000000000000..f42a1bc699f4
--- /dev/null
+++ b/test/MC/Mips/reloc-directive.s
@@ -0,0 +1,58 @@
+# RUN: llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN:     | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n32 \
+# RUN:     | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n64 \
+# RUN:     | FileCheck -check-prefix=ASM %s
+# RUN: llvm-mc -triple mips-unknown-linux < %s -show-encoding -target-abi=o32 \
+# RUN:     -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN:     FileCheck -check-prefix=OBJ-O32 %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n32 \
+# RUN:     -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN:     FileCheck -check-prefix=OBJ-N32 %s
+# RUN: llvm-mc -triple mips64-unknown-linux < %s -show-encoding -target-abi=n64 \
+# RUN:     -filetype=obj | llvm-readobj -sections -section-data -r | \
+# RUN:     FileCheck -check-prefix=OBJ-N64 %s
+	.text
+foo:
+	.reloc 4, R_MIPS_NONE, foo   # ASM: .reloc 4, R_MIPS_NONE, foo
+	.reloc 0, R_MIPS_NONE, foo+4 # ASM: .reloc 0, R_MIPS_NONE, foo+4
+	.reloc 8, R_MIPS_32, foo+8   # ASM: .reloc 8, R_MIPS_32, foo+8
+	nop
+	nop
+	nop
+	.reloc 12, R_MIPS_NONE       # ASM: .reloc 12, R_MIPS_NONE{{$}}
+        nop
+
+# OBJ-O32-LABEL: Name: .text
+# OBJ-O32:       0000: 00000000 00000000 00000008
+# OBJ-O32-LABEL: }
+# OBJ-O32-LABEL: Relocations [
+# OBJ-O32:       0x0 R_MIPS_NONE foo 0x0
+# OBJ-O32:       0x4 R_MIPS_NONE foo 0x0
+# OBJ-O32:       0x8 R_MIPS_32 .text 0x0
+# OBJ-O32:       0xC R_MIPS_NONE -   0x0
+
+# FIXME: We can't get N32 correct at the moment. If we use a mips-* triple then
+#        we incorrectly drop the addend. If we use a mips64-* triple then we
+#        incorrectly use the 3-reloc encoding (and ELF64). mips64-* triples
+#        are closest to being correct so we use them for now.
+#        This should be corrected once the triple bugfixes allow us to be ABI
+#        dependent rather than triple dependent.
+# OBJ-N32-LABEL: Name: .text
+# OBJ-N32:       0000: 00000000 00000000 00000000
+# OBJ-N32-LABEL: }
+# OBJ-N32-LABEL: Relocations [
+# OBJ-N32:       0x0 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x4
+# OBJ-N32:       0x4 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x0
+# OBJ-N32:       0x8 R_MIPS_32/R_MIPS_NONE/R_MIPS_NONE .text 0x8
+# OBJ-N32:       0xC R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE -   0x0
+
+# OBJ-N64-LABEL: Name: .text
+# OBJ-N64:       0000: 00000000 00000000 00000000
+# OBJ-N64-LABEL: }
+# OBJ-N64-LABEL: Relocations [
+# OBJ-N64:       0x0 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x4
+# OBJ-N64:       0x4 R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE foo 0x0
+# OBJ-N64:       0x8 R_MIPS_32/R_MIPS_NONE/R_MIPS_NONE .text 0x8
+# OBJ-N64:       0xC R_MIPS_NONE/R_MIPS_NONE/R_MIPS_NONE -   0x0
diff --git a/test/MC/Mips/rotations32-bad.s b/test/MC/Mips/rotations32-bad.s
new file mode 100644
index 000000000000..600000e7caef
--- /dev/null
+++ b/test/MC/Mips/rotations32-bad.s
@@ -0,0 +1,31 @@
+# RUN: not llvm-mc  %s -arch=mips -mcpu=mips32 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc  %s -arch=mips -mcpu=mips32r2 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc  %s -arch=mips -mcpu=mips32r3 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc  %s -arch=mips -mcpu=mips32r5 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+# RUN: not llvm-mc  %s -arch=mips -mcpu=mips32r6 -show-encoding 2> %t1
+# RUN: FileCheck %s < %t1
+
+  .text
+foo:
+
+  drol $4,$5
+# CHECK:        [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+  drol $4,$5,$6
+# CHECK:        [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+  drol $4,0
+# CHECK:        [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+  drol $4,$5,0
+# CHECK:        [[@LINE-1]]:{{[0-9]+}}: error: instruction requires a CPU feature not currently enabled
+
+  dror $4,$5
+# CHECK:        [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+  dror $4,$5,$6
+# CHECK:        [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+  dror $4,0
+# CHECK:        [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
+  dror $4,$5,0
+# CHECK:        [[@LINE-1]]:3: error: instruction requires a CPU feature not currently enabled
diff --git a/test/MC/Mips/rotations32.s b/test/MC/Mips/rotations32.s
new file mode 100644
index 000000000000..64207708aa94
--- /dev/null
+++ b/test/MC/Mips/rotations32.s
@@ -0,0 +1,87 @@
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips32 -show-encoding | FileCheck %s -check-prefix=CHECK-32
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips32r2 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips32r3 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips32r5 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips32r6 -show-encoding | FileCheck %s -check-prefix=CHECK-32R
+
+  .text
+foo:
+  rol $4,$5
+# CHECK-32:     negu    $1, $5              # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32:     srlv    $1, $4, $1          # encoding: [0x00,0x24,0x08,0x06]
+# CHECK-32:     sllv    $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x04]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    negu    $1, $5              # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32R:    rotrv   $4, $4, $1          # encoding: [0x00,0x24,0x20,0x46]
+  rol $4,$5,$6
+# CHECK-32:     negu    $1, $6              # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-32:     srlv    $1, $5, $1          # encoding: [0x00,0x25,0x08,0x06]
+# CHECK-32:     sllv    $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x04]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    negu    $4, $6              # encoding: [0x00,0x06,0x20,0x23]
+# CHECK-32R:    rotrv   $4, $5, $4          # encoding: [0x00,0x85,0x20,0x46]
+  rol $4,0
+# CHECK-32:     srl     $4, $4, 0           # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-32R:    rotr    $4, $4, 0           # encoding: [0x00,0x24,0x20,0x02]
+  rol $4,$5,0
+# CHECK-32:     srl     $4, $5, 0           # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-32R:    rotr    $4, $5, 0           # encoding: [0x00,0x25,0x20,0x02]
+  rol $4,1
+# CHECK-32:     sll     $1, $4, 1           # encoding: [0x00,0x04,0x08,0x40]
+# CHECK-32:     srl     $4, $4, 31          # encoding: [0x00,0x04,0x27,0xc2]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $4, 31          # encoding: [0x00,0x24,0x27,0xc2]
+  rol $4,$5,1
+# CHECK-32:     sll     $1, $5, 1           # encoding: [0x00,0x05,0x08,0x40]
+# CHECK-32:     srl     $4, $5, 31          # encoding: [0x00,0x05,0x27,0xc2]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $5, 31          # encoding: [0x00,0x25,0x27,0xc2]
+  rol $4,2
+# CHECK-32:     sll     $1, $4, 2           # encoding: [0x00,0x04,0x08,0x80]
+# CHECK-32:     srl     $4, $4, 30          # encoding: [0x00,0x04,0x27,0x82]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $4, 30          # encoding: [0x00,0x24,0x27,0x82]
+  rol $4,$5,2
+# CHECK-32:     sll     $1, $5, 2           # encoding: [0x00,0x05,0x08,0x80]
+# CHECK-32:     srl     $4, $5, 30          # encoding: [0x00,0x05,0x27,0x82]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $5, 30          # encoding: [0x00,0x25,0x27,0x82]
+
+  ror $4,$5
+# CHECK-32:     negu    $1, $5              # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-32:     sllv    $1, $4, $1          # encoding: [0x00,0x24,0x08,0x04]
+# CHECK-32:     srlv    $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x06]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotrv   $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x46]
+  ror $4,$5,$6
+# CHECK-32:     negu    $1, $6              # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-32:     sllv    $1, $5, $1          # encoding: [0x00,0x25,0x08,0x04]
+# CHECK-32:     srlv    $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x06]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotrv   $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x46]
+  ror $4,0
+# CHECK-32:     srl     $4, $4, 0           # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-32R:    rotr    $4, $4, 0           # encoding: [0x00,0x24,0x20,0x02]
+  ror $4,$5,0
+# CHECK-32:     srl     $4, $5, 0           # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-32R:    rotr    $4, $5, 0           # encoding: [0x00,0x25,0x20,0x02]
+  ror $4,1
+# CHECK-32:     srl     $1, $4, 1           # encoding: [0x00,0x04,0x08,0x42]
+# CHECK-32:     sll     $4, $4, 31          # encoding: [0x00,0x04,0x27,0xc0]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $4, 1           # encoding: [0x00,0x24,0x20,0x42]
+  ror $4,$5,1
+# CHECK-32:     srl     $1, $5, 1           # encoding: [0x00,0x05,0x08,0x42]
+# CHECK-32:     sll     $4, $5, 31          # encoding: [0x00,0x05,0x27,0xc0]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $5, 1           # encoding: [0x00,0x25,0x20,0x42]
+  ror $4,2
+# CHECK-32:     srl     $1, $4, 2           # encoding: [0x00,0x04,0x08,0x82]
+# CHECK-32:     sll     $4, $4, 30          # encoding: [0x00,0x04,0x27,0x80]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $4, 2           # encoding: [0x00,0x24,0x20,0x82]
+  ror $4,$5,2
+# CHECK-32:     srl     $1, $5, 2           # encoding: [0x00,0x05,0x08,0x82]
+# CHECK-32:     sll     $4, $5, 30          # encoding: [0x00,0x05,0x27,0x80]
+# CHECK-32:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-32R:    rotr    $4, $5, 2           # encoding: [0x00,0x25,0x20,0x82]
diff --git a/test/MC/Mips/rotations64.s b/test/MC/Mips/rotations64.s
new file mode 100644
index 000000000000..f25b48ad87fd
--- /dev/null
+++ b/test/MC/Mips/rotations64.s
@@ -0,0 +1,238 @@
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips64 -show-encoding | FileCheck %s -check-prefix=CHECK-64
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips64r2 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips64r3 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips64r5 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+# RUN: llvm-mc  %s -arch=mips -mcpu=mips64r6 -show-encoding | FileCheck %s -check-prefix=CHECK-64R
+
+  .text
+foo:
+  rol $4,$5
+# CHECK-64:     subu    $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64:     srlv    $1, $4, $1          # encoding: [0x00,0x24,0x08,0x06]
+# CHECK-64:     sllv    $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x04]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    subu    $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64R:    rotrv   $4, $4, $1          # encoding: [0x00,0x24,0x20,0x46]
+  rol $4,$5,$6
+# CHECK-64:     subu    $1, $zero, $6       # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-64:     srlv    $1, $5, $1          # encoding: [0x00,0x25,0x08,0x06]
+# CHECK-64:     sllv    $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x04]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    negu    $4, $6              # encoding: [0x00,0x06,0x20,0x23]
+# CHECK-64R:    rotrv   $4, $5, $4          # encoding: [0x00,0x85,0x20,0x46]
+  rol $4,0
+# CHECK-64:     srl     $4, $4, 0           # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-64R:    rotr    $4, $4, 0           # encoding: [0x00,0x24,0x20,0x02]
+  rol $4,$5,0
+# CHECK-64:     srl     $4, $5, 0           # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-64R:    rotr    $4, $5, 0           # encoding: [0x00,0x25,0x20,0x02]
+  rol $4,1
+# CHECK-64:     sll     $1, $4, 1           # encoding: [0x00,0x04,0x08,0x40]
+# CHECK-64:     srl     $4, $4, 31          # encoding: [0x00,0x04,0x27,0xc2]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $4, 31          # encoding: [0x00,0x24,0x27,0xc2]
+  rol $4,$5,1
+# CHECK-64:     sll     $1, $5, 1           # encoding: [0x00,0x05,0x08,0x40]
+# CHECK-64:     srl     $4, $5, 31          # encoding: [0x00,0x05,0x27,0xc2]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $5, 31          # encoding: [0x00,0x25,0x27,0xc2]
+  rol $4,2
+# CHECK-64:     sll     $1, $4, 2           # encoding: [0x00,0x04,0x08,0x80]
+# CHECK-64:     srl     $4, $4, 30          # encoding: [0x00,0x04,0x27,0x82]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $4, 30          # encoding: [0x00,0x24,0x27,0x82]
+  rol $4,$5,2
+# CHECK-64:     sll     $1, $5, 2           # encoding: [0x00,0x05,0x08,0x80]
+# CHECK-64:     srl     $4, $5, 30          # encoding: [0x00,0x05,0x27,0x82]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $5, 30          # encoding: [0x00,0x25,0x27,0x82]
+
+  ror $4,$5
+# CHECK-64:     subu    $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x23]
+# CHECK-64:     sllv    $1, $4, $1          # encoding: [0x00,0x24,0x08,0x04]
+# CHECK-64:     srlv    $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x06]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotrv   $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x46]
+  ror $4,$5,$6
+# CHECK-64:     subu    $1, $zero, $6       # encoding: [0x00,0x06,0x08,0x23]
+# CHECK-64:     sllv    $1, $5, $1          # encoding: [0x00,0x25,0x08,0x04]
+# CHECK-64:     srlv    $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x06]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotrv   $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x46]
+  ror $4,0
+# CHECK-64:     srl     $4, $4, 0           # encoding: [0x00,0x04,0x20,0x02]
+# CHECK-64R:    rotr    $4, $4, 0           # encoding: [0x00,0x24,0x20,0x02]
+  ror $4,$5,0
+# CHECK-64:     srl     $4, $5, 0           # encoding: [0x00,0x05,0x20,0x02]
+# CHECK-64R:    rotr    $4, $5, 0           # encoding: [0x00,0x25,0x20,0x02]
+  ror $4,1
+# CHECK-64:     srl     $1, $4, 1           # encoding: [0x00,0x04,0x08,0x42]
+# CHECK-64:     sll     $4, $4, 31          # encoding: [0x00,0x04,0x27,0xc0]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $4, 1           # encoding: [0x00,0x24,0x20,0x42]
+  ror $4,$5,1
+# CHECK-64:     srl     $1, $5, 1           # encoding: [0x00,0x05,0x08,0x42]
+# CHECK-64:     sll     $4, $5, 31          # encoding: [0x00,0x05,0x27,0xc0]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $5, 1           # encoding: [0x00,0x25,0x20,0x42]
+  ror $4,2
+# CHECK-64:     srl     $1, $4, 2           # encoding: [0x00,0x04,0x08,0x82]
+# CHECK-64:     sll     $4, $4, 30          # encoding: [0x00,0x04,0x27,0x80]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $4, 2           # encoding: [0x00,0x24,0x20,0x82]
+  ror $4,$5,2
+# CHECK-64:     srl     $1, $5, 2           # encoding: [0x00,0x05,0x08,0x82]
+# CHECK-64:     sll     $4, $5, 30          # encoding: [0x00,0x05,0x27,0x80]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    rotr    $4, $5, 2           # encoding: [0x00,0x25,0x20,0x82]
+
+  drol $4,$5
+# CHECK-64:     dsubu   $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64:     dsrlv   $1, $4, $1          # encoding: [0x00,0x24,0x08,0x16]
+# CHECK-64:     dsllv   $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x14]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    dsubu   $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64R:    drotrv  $4, $4, $1          # encoding: [0x00,0x24,0x20,0x56]
+  drol $4,$5,$6
+# CHECK-64:     dsubu   $1, $zero, $6       # encoding: [0x00,0x06,0x08,0x2f]
+# CHECK-64:     dsrlv   $1, $5, $1          # encoding: [0x00,0x25,0x08,0x16]
+# CHECK-64:     dsllv   $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x14]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    dsubu   $4, $zero, $6       # encoding: [0x00,0x06,0x20,0x2f]
+# CHECK-64R:    drotrv  $4, $5, $4          # encoding: [0x00,0x85,0x20,0x56]
+
+  drol $4,1
+# CHECK-64:     dsll    $1, $4, 1           # encoding: [0x00,0x04,0x08,0x78]
+# CHECK-64:     dsrl32  $4, $4, 31          # encoding: [0x00,0x04,0x27,0xfe]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $4, 31          # encoding: [0x00,0x24,0x27,0xfe]
+  drol $4,$5,0
+# CHECK-64:     dsrl    $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R:    drotr   $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3a]
+  drol $4,$5,1
+# CHECK-64:     dsll    $1, $5, 1           # encoding: [0x00,0x05,0x08,0x78]
+# CHECK-64:     dsrl32  $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfe]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfe]
+  drol $4,$5,31
+# CHECK-64:     dsll    $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xf8]
+# CHECK-64:     dsrl32  $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7e]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7e]
+  drol $4,$5,32
+# CHECK-64:     dsll32  $1, $5, 0           # encoding: [0x00,0x05,0x08,0x3c]
+# CHECK-64:     dsrl32  $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3e]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3e]
+  drol $4,$5,33
+# CHECK-64:     dsll32  $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7c]
+# CHECK-64:     dsrl    $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfa]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfa]
+  drol $4,$5,63
+# CHECK-64:     dsll32  $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfc]
+# CHECK-64:     dsrl    $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7a]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7a]
+  drol $4,$5,64
+# CHECK-64:     dsrl    $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R:    drotr   $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3a]
+  drol $4,$5,65
+# CHECK-64:     dsll    $1, $5, 1           # encoding: [0x00,0x05,0x08,0x78]
+# CHECK-64:     dsrl32  $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfe]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfe]
+  drol $4,$5,95
+# CHECK-64:     dsll    $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xf8]
+# CHECK-64:     dsrl32  $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7e]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7e]
+  drol $4,$5,96
+# CHECK-64:     dsll32  $1, $5, 0           # encoding: [0x00,0x05,0x08,0x3c]
+# CHECK-64:     dsrl32  $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3e]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3e]
+  drol $4,$5,97
+# CHECK-64:     dsll32  $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7c]
+# CHECK-64:     dsrl    $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfa]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfa]
+  drol $4,$5,127
+# CHECK-64:     dsll32  $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfc]
+# CHECK-64:     dsrl    $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7a]
+# CHECK-64:     or  $4, $4, $1              # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7a]
+
+  dror $4,$5
+# CHECK-64:     dsubu   $1, $zero, $5       # encoding: [0x00,0x05,0x08,0x2f]
+# CHECK-64:     dsllv   $1, $4, $1          # encoding: [0x00,0x24,0x08,0x14]
+# CHECK-64:     dsrlv   $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x16]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotrv  $4, $4, $5          # encoding: [0x00,0xa4,0x20,0x56]
+  dror $4,$5,$6
+# CHECK-64:     dsubu   $1, $zero, $6       # encoding: [0x00,0x06,0x08,0x2f]
+# CHECK-64:     dsllv   $1, $5, $1          # encoding: [0x00,0x25,0x08,0x14]
+# CHECK-64:     dsrlv   $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x16]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotrv  $4, $5, $6          # encoding: [0x00,0xc5,0x20,0x56]
+  dror $4,1
+# CHECK-64:     dsrl    $1, $4, 1           # encoding: [0x00,0x04,0x08,0x7a]
+# CHECK-64:     dsll32  $4, $4, 31          # encoding: [0x00,0x04,0x27,0xfc]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $4, 1           # encoding: [0x00,0x24,0x20,0x7a]
+  dror $4,$5,0
+# CHECK-64:     dsrl    $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R:    drotr   $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3a]
+  dror $4,$5,1
+# CHECK-64:     dsrl    $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7a]
+# CHECK-64:     dsll32  $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfc]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7a]
+  dror $4,$5,31
+# CHECK-64:     dsrl    $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfa]
+# CHECK-64:     dsll32  $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7c]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfa]
+  dror $4,$5,32
+# CHECK-64:     dsrl32  $1, $5, 0           # encoding: [0x00,0x05,0x08,0x3e]
+# CHECK-64:     dsll32  $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3c]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3e]
+  dror $4,$5,33
+# CHECK-64:     dsrl32  $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7e]
+# CHECK-64:     dsll    $4, $5, 31          # encoding: [0x00,0x05,0x27,0xf8]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7e]
+  dror $4,$5,63
+# CHECK-64:     dsrl32  $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfe]
+# CHECK-64:     dsll    $4, $5, 1           # encoding: [0x00,0x05,0x20,0x78]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfe]
+  dror $4,$5,64
+# CHECK-64:     dsrl    $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3a]
+# CHECK-64R:    drotr   $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3a]
+  dror $4,$5,65
+# CHECK-64:     dsrl    $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7a]
+# CHECK-64:     dsll32  $4, $5, 31          # encoding: [0x00,0x05,0x27,0xfc]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7a]
+  dror $4,$5,95
+# CHECK-64:     dsrl    $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfa]
+# CHECK-64:     dsll32  $4, $5, 1           # encoding: [0x00,0x05,0x20,0x7c]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr   $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfa]
+  dror $4,$5,96
+# CHECK-64:     dsrl32  $1, $5, 0           # encoding: [0x00,0x05,0x08,0x3e]
+# CHECK-64:     dsll32  $4, $5, 0           # encoding: [0x00,0x05,0x20,0x3c]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 0           # encoding: [0x00,0x25,0x20,0x3e]
+  dror $4,$5,97
+# CHECK-64:     dsrl32  $1, $5, 1           # encoding: [0x00,0x05,0x08,0x7e]
+# CHECK-64:     dsll    $4, $5, 31          # encoding: [0x00,0x05,0x27,0xf8]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 1           # encoding: [0x00,0x25,0x20,0x7e]
+  dror $4,$5,127
+# CHECK-64:     dsrl32  $1, $5, 31          # encoding: [0x00,0x05,0x0f,0xfe]
+# CHECK-64:     dsll    $4, $5, 1           # encoding: [0x00,0x05,0x20,0x78]
+# CHECK-64:     or      $4, $4, $1          # encoding: [0x00,0x81,0x20,0x25]
+# CHECK-64R:    drotr32 $4, $5, 31          # encoding: [0x00,0x25,0x27,0xfe]
diff --git a/test/MC/Mips/set-nomacro.s b/test/MC/Mips/set-nomacro.s
index 3f82f8161315..1b7a49fbaffb 100644
--- a/test/MC/Mips/set-nomacro.s
+++ b/test/MC/Mips/set-nomacro.s
@@ -60,6 +60,7 @@
   bgtu $0, $8, local_label
   bgtu $0, $0, local_label
 
+  ulh $5, 0
   ulhu $5, 0
 
   ulw $8, 2
@@ -67,6 +68,11 @@
   ulw $8, 2($9)
   ulw $8, 0x8000($9)
 
+  jal foo
+  .option pic2
+  jal foo
+  .option pic0
+
   add $4, $5, $6
 
   .set noreorder
@@ -175,6 +181,8 @@
   bgtu $0, $0, local_label
 # CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
 
+  ulh $5, 0
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
   ulhu $5, 0
 # CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
 
@@ -187,5 +195,12 @@
   ulw $8, 0x8000($9)
 # CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
 
+  jal foo
+# CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+  .option pic2
+  jal foo
+# CHECK: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
+  .option pic0
+
   add $4, $5, $6
 # CHECK-NOT: [[@LINE-1]]:3: warning: macro instruction expanded into multiple instructions
diff --git a/test/MC/PowerPC/ppc-llong.s b/test/MC/PowerPC/ppc-llong.s
index 5d92fe319679..bbe0c95a084e 100644
--- a/test/MC/PowerPC/ppc-llong.s
+++ b/test/MC/PowerPC/ppc-llong.s
@@ -21,7 +21,7 @@
 # CHECK-NEXT:     Size: 8
 # CHECK-NEXT:     Link: 0
 # CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     AddressAlignment:
 # CHECK-NEXT:     EntrySize: 0
 # CHECK-NEXT:     SectionData (
 # CHECK-NEXT:       0000: 00000000 00000000
diff --git a/test/MC/PowerPC/ppc-word.s b/test/MC/PowerPC/ppc-word.s
index e69de54bca08..c52129c57cd3 100644
--- a/test/MC/PowerPC/ppc-word.s
+++ b/test/MC/PowerPC/ppc-word.s
@@ -21,7 +21,7 @@
 # CHECK-NEXT:     Size: 2
 # CHECK-NEXT:     Link: 0
 # CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     AddressAlignment:
 # CHECK-NEXT:     EntrySize: 0
 # CHECK-NEXT:     SectionData (
 # CHECK-NEXT:       0000: 0000
diff --git a/test/MC/PowerPC/ppc64-encoding.s b/test/MC/PowerPC/ppc64-encoding.s
index 4a698adadda3..b2aac8874ffe 100644
--- a/test/MC/PowerPC/ppc64-encoding.s
+++ b/test/MC/PowerPC/ppc64-encoding.s
@@ -634,18 +634,20 @@
 # CHECK-LE: extsh. 2, 3                     # encoding: [0x35,0x07,0x62,0x7c]
             extsh. 2, 3
 
-# CHECK-BE: cntlz 2, 3                     # encoding: [0x7c,0x62,0x00,0x34]
-# CHECK-LE: cntlz 2, 3                     # encoding: [0x34,0x00,0x62,0x7c]
+# CHECK-BE: cntlzw 2, 3                     # encoding: [0x7c,0x62,0x00,0x34]
+# CHECK-LE: cntlzw 2, 3                     # encoding: [0x34,0x00,0x62,0x7c]
             cntlzw 2, 3
-# CHECK-BE: cntlz. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
-# CHECK-LE: cntlz. 2, 3                    # encoding: [0x35,0x00,0x62,0x7c]
+# CHECK-BE: cntlzw. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
+# CHECK-LE: cntlzw. 2, 3                    # encoding: [0x35,0x00,0x62,0x7c]
             cntlzw. 2, 3
-# CHECK-BE: cntlz 2, 3                     # encoding: [0x7c,0x62,0x00,0x34]
-# CHECK-LE: cntlz 2, 3                     # encoding: [0x34,0x00,0x62,0x7c]
-            cntlz  2, 3
-# CHECK-BE: cntlz. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
-# CHECK-LE: cntlz. 2, 3                    # encoding: [0x35,0x00,0x62,0x7c]
-            cntlz.  2, 3
+#
+# The POWER variant of cntlzw
+# CHECK-BE: cntlzw 2, 3                     # encoding: [0x7c,0x62,0x00,0x34]
+# CHECK-LE: cntlzw 2, 3                     # encoding: [0x34,0x00,0x62,0x7c]
+            cntlz 2, 3
+# CHECK-BE: cntlzw. 2, 3                    # encoding: [0x7c,0x62,0x00,0x35]
+# CHECK-LE: cntlzw. 2, 3                    # encoding: [0x35,0x00,0x62,0x7c]
+            cntlz. 2, 3
             cmpb 7, 21, 4
 # CHECK-BE: cmpb 7, 21, 4                   # encoding: [0x7e,0xa7,0x23,0xf8]
 # CHECK-LE: cmpb 7, 21, 4                   # encoding: [0xf8,0x23,0xa7,0x7e]
diff --git a/test/MC/PowerPC/ppc64-fixup-apply.s b/test/MC/PowerPC/ppc64-fixup-apply.s
index f98b46d6b18d..c3b7d06eb483 100644
--- a/test/MC/PowerPC/ppc64-fixup-apply.s
+++ b/test/MC/PowerPC/ppc64-fixup-apply.s
@@ -101,7 +101,7 @@ addis 1, 1, target7@highesta
 # CHECK-NEXT:     Size: 15
 # CHECK-NEXT:     Link: 0
 # CHECK-NEXT:     Info: 0
-# CHECK-NEXT:     AddressAlignment: 4
+# CHECK-NEXT:     AddressAlignment:
 # CHECK-NEXT:     EntrySize: 0
 # CHECK-NEXT:     SectionData (
 # CHECK-BE-NEXT:    0000: 12345678 9ABCDEF0 87654321 BEEF42
diff --git a/test/MC/PowerPC/pr24686.s b/test/MC/PowerPC/pr24686.s
new file mode 100644
index 000000000000..28cba230b8c4
--- /dev/null
+++ b/test/MC/PowerPC/pr24686.s
@@ -0,0 +1,7 @@
+# RUN: not llvm-mc -triple=powerpc64le-unknown-linux-gnu -filetype=obj %s \
+# RUN: 2>&1 | FileCheck %s
+        
+_stext:
+ld %r5, p_end - _stext(%r5)
+
+# CHECK: LLVM ERROR: Invalid PC-relative half16ds relocation
diff --git a/test/MC/PowerPC/st-other-crash.s b/test/MC/PowerPC/st-other-crash.s
index fcc56ad70c56..20f51f761373 100644
--- a/test/MC/PowerPC/st-other-crash.s
+++ b/test/MC/PowerPC/st-other-crash.s
@@ -10,7 +10,7 @@
 // CHECK-NEXT:    Binding: Local (0x0)
 // CHECK-NEXT:    Type: None (0x0)
 // CHECK-NEXT:    Other: 64
-// CHECK-NEXT:    Section: .group (0x5)
+// CHECK-NEXT:    Section: .group
 // CHECK-NEXT:  }
 
 
diff --git a/test/MC/PowerPC/vsx.s b/test/MC/PowerPC/vsx.s
index 352fc5173800..2c03659c9cde 100644
--- a/test/MC/PowerPC/vsx.s
+++ b/test/MC/PowerPC/vsx.s
@@ -62,6 +62,9 @@
 # CHECK-BE: xscvdpsp 7, 27                     # encoding: [0xf0,0xe0,0xdc,0x24]
 # CHECK-LE: xscvdpsp 7, 27                     # encoding: [0x24,0xdc,0xe0,0xf0]
             xscvdpsp 7, 27
+# CHECK-BE: xscvdpspn 7, 27                    # encoding: [0xf0,0xe0,0xdc,0x2c]
+# CHECK-LE: xscvdpspn 7, 27                    # encoding: [0x2c,0xdc,0xe0,0xf0]
+            xscvdpspn 7, 27
 # CHECK-BE: xscvdpsxds 7, 27                   # encoding: [0xf0,0xe0,0xdd,0x60]
 # CHECK-LE: xscvdpsxds 7, 27                   # encoding: [0x60,0xdd,0xe0,0xf0]
             xscvdpsxds 7, 27
@@ -77,9 +80,18 @@
 # CHECK-BE: xscvspdp 7, 27                     # encoding: [0xf0,0xe0,0xdd,0x24]
 # CHECK-LE: xscvspdp 7, 27                     # encoding: [0x24,0xdd,0xe0,0xf0]
             xscvspdp 7, 27
+# CHECK-BE: xscvspdpn 7, 27                    # encoding: [0xf0,0xe0,0xdd,0x2c]
+# CHECK-LE: xscvspdpn 7, 27                    # encoding: [0x2c,0xdd,0xe0,0xf0]
+            xscvspdpn 7, 27
+# CHECK-BE: xscvsxdsp 7, 27                    # encoding: [0xf0,0xe0,0xdc,0xe0]
+# CHECK-LE: xscvsxdsp 7, 27                    # encoding: [0xe0,0xdc,0xe0,0xf0]
+            xscvsxdsp 7, 27
 # CHECK-BE: xscvsxddp 7, 27                    # encoding: [0xf0,0xe0,0xdd,0xe0]
 # CHECK-LE: xscvsxddp 7, 27                    # encoding: [0xe0,0xdd,0xe0,0xf0]
             xscvsxddp 7, 27
+# CHECK-BE: xscvuxdsp 7, 27                    # encoding: [0xf0,0xe0,0xdc,0xa0]
+# CHECK-LE: xscvuxdsp 7, 27                    # encoding: [0xa0,0xdc,0xe0,0xf0]
+            xscvuxdsp 7, 27
 # CHECK-BE: xscvuxddp 7, 27                    # encoding: [0xf0,0xe0,0xdd,0xa0]
 # CHECK-LE: xscvuxddp 7, 27                    # encoding: [0xa0,0xdd,0xe0,0xf0]
             xscvuxddp 7, 27
diff --git a/test/MC/Sparc/sparc-alu-instructions.s b/test/MC/Sparc/sparc-alu-instructions.s
index 98caf1d6d673..b08ab43f13e9 100644
--- a/test/MC/Sparc/sparc-alu-instructions.s
+++ b/test/MC/Sparc/sparc-alu-instructions.s
@@ -91,6 +91,12 @@
         ! CHECK:  smulcc %g2, %g1, %g3            ! encoding: [0x86,0xd8,0x80,0x01]
         smulcc %g2, %g1, %g3
 
+        ! CHECK:  mulscc %g2, %g1, %g3            ! encoding: [0x87,0x20,0x80,0x01]
+        mulscc %g2, %g1, %g3
+
+        ! CHECK:  mulscc %g2, 254, %g3            ! encoding: [0x87,0x20,0xa0,0xfe]
+        mulscc %g2, 254, %g3
+
         ! CHECK:  udivcc %g2, %g1, %g3            ! encoding: [0x86,0xf0,0x80,0x01]
         udivcc %g2, %g1, %g3
 
diff --git a/test/MC/Sparc/sparc-asm-errors.s b/test/MC/Sparc/sparc-asm-errors.s
new file mode 100644
index 000000000000..6a4128f683f8
--- /dev/null
+++ b/test/MC/Sparc/sparc-asm-errors.s
@@ -0,0 +1,8 @@
+! RUN: not llvm-mc %s -arch=sparc   -show-encoding 2>&1 | FileCheck %s
+! RUN: not llvm-mc %s -arch=sparcv9 -show-encoding 2>&1 | FileCheck %s
+
+! Test the lower and upper bounds of 'set'
+        ! CHECK: argument must be between
+        set -2147483649, %o1
+        ! CHECK: argument must be between
+        set 4294967296, %o1
diff --git a/test/MC/Sparc/sparc-assembly-exprs.s b/test/MC/Sparc/sparc-assembly-exprs.s
index 7fdc5a50345f..1729ee48dcf8 100644
--- a/test/MC/Sparc/sparc-assembly-exprs.s
+++ b/test/MC/Sparc/sparc-assembly-exprs.s
@@ -3,4 +3,11 @@
         ! CHECK: mov 1033, %o1  ! encoding: [0x92,0x10,0x24,0x09]
         mov      (0x400|9), %o1
         ! CHECK: mov 60, %o2    ! encoding: [0x94,0x10,0x20,0x3c]
-        mov      (12+3<<2), %o2
+        mov      ((12+3)<<2), %o2
+
+! "." is exactly like a temporary symbol equated to the current line.
+! RUN: llvm-mc %s -arch=sparc | FileCheck %s --check-prefix=DOTEXPR
+
+        ! DOTEXPR: .Ltmp0
+        ! DOTEXPR-NEXT: ba .Ltmp0+8
+        b . + 8
diff --git a/test/MC/Sparc/sparc-atomic-instructions.s b/test/MC/Sparc/sparc-atomic-instructions.s
index 17f97d4b5354..4ce19d5cdda0 100644
--- a/test/MC/Sparc/sparc-atomic-instructions.s
+++ b/test/MC/Sparc/sparc-atomic-instructions.s
@@ -12,3 +12,12 @@
 
         ! CHECK: swapa [%i0+%l6] 131, %o2   ! encoding: [0xd4,0xfe,0x10,0x76]
         swapa [%i0+%l6] 131, %o2
+
+        ! CHECK: ldstub [%i0+40], %g1 ! encoding: [0xc2,0x6e,0x20,0x28]
+        ldstub [%i0+40], %g1
+
+        ! CHECK: ldstub [%i0+%i2], %g1 ! encoding: [0xc2,0x6e,0x00,0x1a]
+        ldstub [%i0+%i2], %g1
+
+        ! CHECK: ldstuba [%i0+%i2] 131, %g1 ! encoding: [0xc2,0xee,0x10,0x7a]
+        ldstuba [%i0+%i2] 131, %g1
diff --git a/test/MC/Sparc/sparc-ctrl-instructions.s b/test/MC/Sparc/sparc-ctrl-instructions.s
index a2608fdba608..bcb625b17e61 100644
--- a/test/MC/Sparc/sparc-ctrl-instructions.s
+++ b/test/MC/Sparc/sparc-ctrl-instructions.s
@@ -64,6 +64,10 @@
         ! CHECK-NEXT:         ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         bz .BB0
 
+        ! CHECK: be .BB0      ! encoding: [0x02,0b10AAAAAA,A,A]
+        ! CHECK-NEXT:         ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
+        beq .BB0
+
         ! CHECK: bg .BB0      ! encoding: [0x14,0b10AAAAAA,A,A]
         ! CHECK-NEXT:         ! fixup A - offset: 0, value: .BB0, kind: fixup_sparc_br22
         bg .BB0
diff --git a/test/MC/Sparc/sparc-fp-instructions.s b/test/MC/Sparc/sparc-fp-instructions.s
index f8c130f6e5f9..64f1538cb910 100644
--- a/test/MC/Sparc/sparc-fp-instructions.s
+++ b/test/MC/Sparc/sparc-fp-instructions.s
@@ -138,3 +138,12 @@
         fdtox %f0, %f4
         fqtox %f0, %f4
 
+        ! CHECK: ld [%l0], %f29                  ! encoding: [0xfb,0x04,0x00,0x00]
+        ! CHECK: ldd [%l0], %f48                 ! encoding: [0xe3,0x1c,0x00,0x00]
+        ld [%l0], %f29
+        ldd [%l0], %f48
+
+        ! CHECK: st %f29, [%l0]                  ! encoding: [0xfb,0x24,0x00,0x00]
+        ! CHECK: std %f48, [%l0]                 ! encoding: [0xe3,0x3c,0x00,0x00]
+        st %f29, [%l0]
+        std %f48, [%l0]
diff --git a/test/MC/Sparc/sparc-mem-instructions.s b/test/MC/Sparc/sparc-mem-instructions.s
index c10c8781fd88..1d098fe158fd 100644
--- a/test/MC/Sparc/sparc-mem-instructions.s
+++ b/test/MC/Sparc/sparc-mem-instructions.s
@@ -46,6 +46,15 @@
         ! CHECK: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
         lda [%i0 + %l6] 131, %o2
 
+        ! CHECK: ldd [%i0+%l6], %o2    ! encoding: [0xd4,0x1e,0x00,0x16]
+        ldd [%i0 + %l6], %o2
+        ! CHECK: ldd [%i0+32], %o2     ! encoding: [0xd4,0x1e,0x20,0x20]
+        ldd [%i0 + 32], %o2
+        ! CHECK: ldd [%g1], %o2        ! encoding: [0xd4,0x18,0x40,0x00]
+        ldd [%g1], %o2
+        ! CHECK: ldda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x9e,0x10,0x76]
+        ldda [%i0 + %l6] 131, %o2
+
         ! CHECK: stb %o2, [%i0+%l6]   ! encoding: [0xd4,0x2e,0x00,0x16]
         stb %o2, [%i0 + %l6]
         ! CHECK: stb %o2, [%i0+32]    ! encoding: [0xd4,0x2e,0x20,0x20]
@@ -73,6 +82,15 @@
         ! CHECK: sta %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xa6,0x10,0x76]
         sta %o2, [%i0 + %l6] 131
 
+        ! CHECK: std %o2, [%i0+%l6]    ! encoding: [0xd4,0x3e,0x00,0x16]
+        std %o2, [%i0 + %l6]
+        ! CHECK: std %o2, [%i0+32]     ! encoding: [0xd4,0x3e,0x20,0x20]
+        std %o2, [%i0 + 32]
+        ! CHECK: std %o2, [%g1]        ! encoding: [0xd4,0x38,0x40,0x00]
+        std %o2, [%g1]
+        ! CHECK: stda %o2, [%i0+%l6] 131 ! encoding: [0xd4,0xbe,0x10,0x76]
+        stda %o2, [%i0 + %l6] 131
+
         ! CHECK:  flush %g1+%g2         ! encoding: [0x81,0xd8,0x40,0x02]
         flush %g1 + %g2
         ! CHECK:  flush %g1+8           ! encoding: [0x81,0xd8,0x60,0x08]
diff --git a/test/MC/Sparc/sparc-pic.s b/test/MC/Sparc/sparc-pic.s
index 5430d1fea103..1c935e0842ce 100644
--- a/test/MC/Sparc/sparc-pic.s
+++ b/test/MC/Sparc/sparc-pic.s
@@ -1,16 +1,36 @@
-! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=pic -filetype=obj | llvm-readobj -r | FileCheck %s
+! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=pic -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=PIC %s
+! RUN: llvm-mc %s -arch=sparcv9 --relocation-model=static -filetype=obj | llvm-readobj -r | FileCheck --check-prefix=NOPIC %s
 
 
-! CHECK:      Relocations [
-! CHECK-NOT:    0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
-! CHECK:        0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT22 .LC0 0x0
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT10 .LC0 0x0
-! CHECK-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0
-! CHECK:      ]
+! PIC:      Relocations [
+! PIC-NOT:    0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
+! PIC:        0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x4
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x8
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_PC22 _GLOBAL_OFFSET_TABLE_ 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_PC10 _GLOBAL_OFFSET_TABLE_ 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT22 AGlobalVar 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT10 AGlobalVar 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT22 .LC0 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_GOT10 .LC0 0x0
+! PIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_WPLT30 bar 0x0
+! PIC:      ]
+
+! NOPIC:      Relocations [
+! NOPIC-NOT:    0x{{[0-9,A-F]+}} R_SPARC_WPLT30 .text 0xC
+! NOPIC:        0x{{[0-9,A-F]+}} R_SPARC_HI22 _GLOBAL_OFFSET_TABLE_ 0x4
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_LO10 _GLOBAL_OFFSET_TABLE_ 0x8
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_HI22 _GLOBAL_OFFSET_TABLE_ 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_LO10 _GLOBAL_OFFSET_TABLE_ 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_HI22 AGlobalVar 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_LO10 AGlobalVar 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_HI22 AGlobalVar 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_LO10 AGlobalVar 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_HI22 .rodata 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_LO10 .rodata 0x0
+! NOPIC-NEXT:   0x{{[0-9,A-F]+}} R_SPARC_WDISP30 bar 0x0
+! NOPIC:      ]
 
         .section        ".rodata"
         .align 8
@@ -33,9 +53,11 @@ foo:
         sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp6-.Ltmp4)), %i1
 .Ltmp5:
         or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp4)), %i1
+        set _GLOBAL_OFFSET_TABLE_, %i1
         add %i1, %o7, %i1
         sethi %hi(AGlobalVar), %i2
         add %i2, %lo(AGlobalVar), %i2
+        set AGlobalVar, %i2
         ldx [%i1+%i2], %i3
         ldx [%i3], %i3
         sethi %hi(.LC0), %i2
diff --git a/test/MC/Sparc/sparc-relocations.s b/test/MC/Sparc/sparc-relocations.s
index a5b7bafa4f57..58ad37e1da53 100644
--- a/test/MC/Sparc/sparc-relocations.s
+++ b/test/MC/Sparc/sparc-relocations.s
@@ -18,7 +18,7 @@
         call foo
 
         ! CHECK: or %g1, %lo(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
-        ! CHECK-NEXT                   !   fixup A - offset: 0, value: %lo(sym), kind: fixup_sparc_lo10
+        ! CHECK-NEXT:                  !   fixup A - offset: 0, value: %lo(sym), kind: fixup_sparc_lo10
         or %g1, %lo(sym), %g3
 
         ! CHECK: sethi %hi(sym), %l0  ! encoding: [0x21,0b00AAAAAA,A,A]
@@ -26,15 +26,15 @@
         sethi %hi(sym), %l0
 
         ! CHECK: sethi %h44(sym), %l0  ! encoding: [0x21,0b00AAAAAA,A,A]
-        ! CHECK-NEXT:                 !   fixup A - offset: 0, value: %h44(sym), kind: fixup_sparc_h44
+        ! CHECK-NEXT:                  !   fixup A - offset: 0, value: %h44(sym), kind: fixup_sparc_h44
         sethi %h44(sym), %l0
 
         ! CHECK: or %g1, %m44(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
-        ! CHECK-NEXT                   !   fixup A - offset: 0, value: %m44(sym), kind: fixup_sparc_m44
+        ! CHECK-NEXT:                   !   fixup A - offset: 0, value: %m44(sym), kind: fixup_sparc_m44
         or %g1, %m44(sym), %g3
 
         ! CHECK: or %g1, %l44(sym), %g3 ! encoding: [0x86,0x10,0b0110AAAA,A]
-        ! CHECK-NEXT                   !   fixup A - offset: 0, value: %l44(sym), kind: fixup_sparc_l44
+        ! CHECK-NEXT:                   !   fixup A - offset: 0, value: %l44(sym), kind: fixup_sparc_l44
         or %g1, %l44(sym), %g3
 
         ! CHECK: sethi %hh(sym), %l0  ! encoding: [0x21,0b00AAAAAA,A,A]
@@ -42,5 +42,5 @@
         sethi %hh(sym), %l0
 
         ! CHECK: or %g1, %hm(sym), %g3 ! encoding: [0x86,0x10,0b011000AA,A]
-        ! CHECK-NEXT                   !   fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm
+        ! CHECK-NEXT:                  !   fixup A - offset: 0, value: %hm(sym), kind: fixup_sparc_hm
         or %g1, %hm(sym), %g3
diff --git a/test/MC/Sparc/sparc-special-registers.s b/test/MC/Sparc/sparc-special-registers.s
index 2cb57d720c4f..235c4cfedcf8 100644
--- a/test/MC/Sparc/sparc-special-registers.s
+++ b/test/MC/Sparc/sparc-special-registers.s
@@ -33,3 +33,21 @@
 
         ! CHECK: wr %i0, 5, %tbr          ! encoding: [0x81,0x9e,0x20,0x05]
         wr %i0, 5, %tbr
+
+        ! CHECK: rd %asr6, %i0         ! encoding: [0xb1,0x41,0x80,0x00]
+        rd %fprs, %i0
+
+        ! CHECK: wr %i0, 7, %asr6      ! encoding: [0x8d,0x86,0x20,0x07]
+        wr %i0, 7, %fprs
+
+        ! CHECK: ld [%g2+20], %fsr     ! encoding: [0xc1,0x08,0xa0,0x14]
+        ld [%g2 + 20],%fsr
+
+        ! CHECK: ld [%g2+%i5], %fsr    ! encoding: [0xc1,0x08,0x80,0x1d]
+        ld [%g2 + %i5],%fsr
+
+        ! CHECK: st %fsr, [%g2+20]     ! encoding: [0xc1,0x28,0xa0,0x14]
+        st %fsr,[%g2 + 20]
+
+        ! CHECK: st %fsr, [%g2+%i5]    ! encoding: [0xc1,0x28,0x80,0x1d]
+        st %fsr,[%g2 + %i5]
diff --git a/test/MC/Sparc/sparc-synthetic-instructions.s b/test/MC/Sparc/sparc-synthetic-instructions.s
index 09fd30c09e28..f83c8c2893ac 100644
--- a/test/MC/Sparc/sparc-synthetic-instructions.s
+++ b/test/MC/Sparc/sparc-synthetic-instructions.s
@@ -27,13 +27,33 @@
         ! CHECK: or %g1, %lo(40000), %g1          ! encoding: [0x82,0x10,0b011000AA,A]
         ! CHECK:                                  !   fixup A - offset: 0, value: %lo(40000), kind: fixup_sparc_lo10
         set 40000, %g1
-        ! CHECK: mov      %lo(1), %g1             ! encoding: [0x82,0x10,0b001000AA,A]
-        ! CHECK:                                  !   fixup A - offset: 0, value: %lo(1), kind: fixup_sparc_lo10
+        ! CHECK: mov 1, %g1 ! encoding: [0x82,0x10,0x20,0x01]
         set 1, %g1
         ! CHECK: sethi %hi(32768), %g1            ! encoding: [0x03,0b00AAAAAA,A,A]
         ! CHECK:                                  !   fixup A - offset: 0, value: %hi(32768), kind: fixup_sparc_hi22
         set 32768, %g1
 
+        ! Expect a 'sethi' without an 'or'.
+        ! CHECK: sethi %hi(268431360), %o1       ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! CHECK:                                 !   fixup A - offset: 0, value: %hi(268431360), kind: fixup_sparc_hi22
+        set 0x0ffff000, %o1
+
+        ! CHECK: sethi %hi(268433408), %o1       ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! CHECK:                                 !   fixup A - offset: 0, value: %hi(268433408), kind: fixup_sparc_hi22
+        set 0x0ffff800, %o1
+
+        ! This is the boundary case that uses the lowest of the 22 bits in sethi.
+        ! CHECK: sethi %hi(268434432), %o1       ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! CHECK:                                 !   fixup A - offset: 0, value: %hi(268434432), kind: fixup_sparc_hi22
+        set 0x0ffffc00, %o1
+
+        ! Now the synthetic instruction becomes two instructions.
+        ! CHECK: sethi %hi(2147483647), %o1      ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! CHECK:                                 !   fixup A - offset: 0, value: %hi(2147483647), kind: fixup_sparc_hi22
+        ! CHECK: or %o1, %lo(2147483647), %o1    ! encoding: [0x92,0x12,0b011000AA,A]
+        ! CHECK:                                 !   fixup A - offset: 0, value: %lo(2147483647), kind: fixup_sparc_lo10
+        set 2147483647, %o1
+
         ! CHECK: xnor %g1, %g0, %g2               ! encoding: [0x84,0x38,0x40,0x00]
         not %g1, %g2
         ! CHECK: xnor %g1, %g0, %g1               ! encoding: [0x82,0x38,0x40,0x00]
@@ -143,3 +163,51 @@
         wr %i0, %tbr
         ! CHECK: wr %g0, 5, %tbr                  ! encoding: [0x81,0x98,0x20,0x05]
         wr 5, %tbr
+
+! The following tests exercise 'set' in such a way that its output differs
+! depending on whether targeting V8 or V9.
+!
+! RUN: llvm-mc %s -arch=sparc   -show-encoding | FileCheck %s --check-prefix=V8
+! RUN: llvm-mc %s -arch=sparcv9 -show-encoding | FileCheck %s --check-prefix=V9
+
+        ! V8: mov        -1, %o1              ! encoding: [0x92,0x10,0x3f,0xff]
+        ! V9: sethi %hi(-1), %o1              ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-1), kind: fixup_sparc_hi22
+        ! V9: or %o1, %lo(-1), %o1            ! encoding: [0x92,0x12,0b011000AA,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %lo(-1), kind: fixup_sparc_lo10
+        set 0xffffffff, %o1
+
+        ! V8: mov        -2, %o1              ! encoding: [0x92,0x10,0x3f,0xfe]
+        ! V9: sethi %hi(-2), %o1              ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-2), kind: fixup_sparc_hi22
+        ! V9: or %o1, %lo(-2), %o1            ! encoding: [0x92,0x12,0b011000AA,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %lo(-2), kind: fixup_sparc_lo10
+        set 0xfffffffe, %o1
+
+        ! V8: mov        -16, %o1             ! encoding: [0x92,0x10,0x3f,0xf0]
+        ! V9: sethi %hi(-16), %o1             ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-16), kind: fixup_sparc_hi22
+        ! V9: or %o1, %lo(-16), %o1           ! encoding: [0x92,0x12,0b011000AA,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %lo(-16), kind: fixup_sparc_lo10
+        set 0xfffffff0, %o1
+
+        ! V8: mov        -256, %o1            ! encoding: [0x92,0x10,0x3f,0x00]
+        ! V9: sethi %hi(-256), %o1            ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-256), kind: fixup_sparc_hi22
+        ! V9: or %o1, %lo(-256), %o1          ! encoding: [0x92,0x12,0b011000AA,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %lo(-256), kind: fixup_sparc_lo10
+        set 0xffffff00, %o1
+
+        ! V8: mov        -4096, %o1           ! encoding: [0x92,0x10,0x30,0x00]
+        ! V9: sethi %hi(-4096), %o1           ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-4096), kind: fixup_sparc_hi22
+        set 0xfffff000, %o1
+
+        ! These results are the same for V8 and V9, so this test could have
+        ! been with the others that weren't segregated by architecture,
+        ! but logically it belongs here as a boundary case.
+        ! V8: sethi %hi(-8192), %o1           ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V8:                                 !   fixup A - offset: 0, value: %hi(-8192), kind: fixup_sparc_hi22
+        ! V9: sethi %hi(-8192), %o1           ! encoding: [0x13,0b00AAAAAA,A,A]
+        ! V9:                                 !   fixup A - offset: 0, value: %hi(-8192), kind: fixup_sparc_hi22
+        set 0xffffe000, %o1
diff --git a/test/MC/Sparc/sparcv9-instructions.s b/test/MC/Sparc/sparcv9-instructions.s
index 37f4c8b2f6b9..2f90d4360dd9 100644
--- a/test/MC/Sparc/sparcv9-instructions.s
+++ b/test/MC/Sparc/sparcv9-instructions.s
@@ -21,3 +21,275 @@
         ! V9:      subxcc %g1, %g2, %g3         ! encoding: [0x86,0xe0,0x40,0x02]
         subccc %g1, %g2, %g3
 
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: popc %g1, %g2
+        ! V9:      popc %g1, %g2                ! encoding: [0x85,0x70,0x00,0x01]
+        popc %g1, %g2
+
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: signx %g1, %g2
+        ! V9: sra %g1, %g0, %g2               ! encoding: [0x85,0x38,0x40,0x00]
+        signx %g1, %g2
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: signx %g1
+        ! V9: sra %g1, %g0, %g1               ! encoding: [0x83,0x38,0x40,0x00]
+        signx %g1
+
+        ! V8:      error: invalid instruction mnemonic
+        ! V8-NEXT: lduw [%i0 + %l6], %o2
+        ! V9: ld [%i0+%l6], %o2    ! encoding: [0xd4,0x06,0x00,0x16]
+        lduw [%i0 + %l6], %o2
+        ! V8:      error: invalid instruction mnemonic
+        ! V8-NEXT: lduw [%i0 + 32], %o2
+        ! V9: ld [%i0+32], %o2     ! encoding: [0xd4,0x06,0x20,0x20]
+        lduw [%i0 + 32], %o2
+        ! V8:      error: invalid instruction mnemonic
+        ! V8-NEXT: lduw [%g1], %o2
+        ! V9: ld [%g1], %o2        ! encoding: [0xd4,0x00,0x40,0x00]
+        lduw [%g1], %o2
+        ! V8:      error: invalid instruction mnemonic
+        ! V8-NEXT: lduwa [%i0 + %l6] 131, %o2
+        ! V9: lda [%i0+%l6] 131, %o2 ! encoding: [0xd4,0x86,0x10,0x76]
+        lduwa [%i0 + %l6] 131, %o2
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: lda [%l0] 0xf0, %f29
+        ! V9: lda [%l0] 240, %f29             ! encoding: [0xfb,0x84,0x1e,0x00]
+        lda [%l0] 0xf0, %f29
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: ldda [%l0] 0xf0, %f48
+        ! V9: ldda [%l0] 240, %f48            ! encoding: [0xe3,0x9c,0x1e,0x00]
+        ldda [%l0] 0xf0, %f48
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: ldqa [%l0] 0xf0, %f48
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: ldq [%l0], %f48
+        ! V9: ldqa [%l0] 240, %f48            ! encoding: [0xe3,0x94,0x1e,0x00]
+        ! V9: ldq [%l0], %f48                 ! encoding: [0xe3,0x14,0x00,0x00]
+        ldqa [%l0] 0xf0, %f48
+        ldq [%l0], %f48
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: sta %f29, [%l0] 0xf0
+        ! V9: sta %f29, [%l0] 240             ! encoding: [0xfb,0xa4,0x1e,0x00]
+        sta %f29, [%l0] 0xf0
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: stda %f48, [%l0] 0xf0
+        ! V9: stda %f48, [%l0] 240            ! encoding: [0xe3,0xbc,0x1e,0x00]
+        stda %f48, [%l0] 0xf0
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: stqa %f48, [%l0] 0xf0
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: stq %f48, [%l0]
+        ! V9: stqa %f48, [%l0] 240            ! encoding: [0xe3,0xb4,0x1e,0x00]
+        ! V9: stq %f48, [%l0]                 ! encoding: [0xe3,0x34,0x00,0x00]
+        stqa %f48, [%l0] 0xf0
+        stq %f48, [%l0]
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: ldx [%g2 + 20],%fsr
+        ! V9: ldx [%g2+20], %fsr    ! encoding: [0xc3,0x08,0xa0,0x14]
+        ldx [%g2 + 20],%fsr
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: ldx [%g2 + %i5],%fsr
+        ! V9: ldx [%g2+%i5], %fsr   ! encoding: [0xc3,0x08,0x80,0x1d]
+        ldx [%g2 + %i5],%fsr
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: stx %fsr,[%g2 + 20]
+        ! V9: stx %fsr, [%g2+20]    ! encoding: [0xc3,0x28,0xa0,0x14]
+        stx %fsr,[%g2 + 20]
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: stx %fsr,[%g2 + %i5]
+        ! V9: stx %fsr, [%g2+%i5]   ! encoding: [0xc3,0x28,0x80,0x1d]
+        stx %fsr,[%g2 + %i5]
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tpc
+        ! V9: wrpr %g6, %fp, %tpc        ! encoding: [0x81,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tpc
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tnpc
+        ! V9: wrpr %g6, %fp, %tnpc       ! encoding: [0x83,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tnpc
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tstate
+        ! V9: wrpr %g6, %fp, %tstate     ! encoding: [0x85,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tstate
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tt
+        ! V9: wrpr %g6, %fp, %tt         ! encoding: [0x87,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tt
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tick
+        ! V9: wrpr %g6, %fp, %tick       ! encoding: [0x89,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tick
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tba
+        ! V9: wrpr %g6, %fp, %tba        ! encoding: [0x8b,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tba
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%pstate
+        ! V9: wrpr %g6, %fp, %pstate     ! encoding: [0x8d,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%pstate
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%tl
+        ! V9: wrpr %g6, %fp, %tl         ! encoding: [0x8f,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%tl
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%pil
+        ! V9: wrpr %g6, %fp, %pil        ! encoding: [0x91,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%pil
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%cwp
+        ! V9: wrpr %g6, %fp, %cwp        ! encoding: [0x93,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%cwp
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%cansave
+        ! V9: wrpr %g6, %fp, %cansave    ! encoding: [0x95,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%cansave
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%canrestore
+        ! V9: wrpr %g6, %fp, %canrestore ! encoding: [0x97,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%canrestore
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%cleanwin
+        ! V9: wrpr %g6, %fp, %cleanwin   ! encoding: [0x99,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%cleanwin
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%otherwin
+        ! V9: wrpr %g6, %fp, %otherwin   ! encoding: [0x9b,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%otherwin
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,%i6,%wstate
+        ! V9: wrpr %g6, %fp, %wstate     ! encoding: [0x9d,0x91,0x80,0x1e]
+        wrpr %g6,%i6,%wstate
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tpc
+        ! V9: wrpr %g6, 255, %tpc        ! encoding: [0x81,0x91,0xa0,0xff]
+        wrpr %g6,255,%tpc
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tnpc
+        ! V9: wrpr %g6, 255, %tnpc       ! encoding: [0x83,0x91,0xa0,0xff]
+        wrpr %g6,255,%tnpc
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tstate
+        ! V9: wrpr %g6, 255, %tstate     ! encoding: [0x85,0x91,0xa0,0xff]
+        wrpr %g6,255,%tstate
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tt
+        ! V9: wrpr %g6, 255, %tt         ! encoding: [0x87,0x91,0xa0,0xff]
+        wrpr %g6,255,%tt
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tick
+        ! V9: wrpr %g6, 255, %tick       ! encoding: [0x89,0x91,0xa0,0xff]
+        wrpr %g6,255,%tick
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tba
+        ! V9: wrpr %g6, 255, %tba        ! encoding: [0x8b,0x91,0xa0,0xff]
+        wrpr %g6,255,%tba
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%pstate
+        ! V9: wrpr %g6, 255, %pstate     ! encoding: [0x8d,0x91,0xa0,0xff]
+        wrpr %g6,255,%pstate
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%tl
+        ! V9: wrpr %g6, 255, %tl         ! encoding: [0x8f,0x91,0xa0,0xff]
+        wrpr %g6,255,%tl
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%pil
+        ! V9: wrpr %g6, 255, %pil        ! encoding: [0x91,0x91,0xa0,0xff]
+        wrpr %g6,255,%pil
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%cwp
+        ! V9: wrpr %g6, 255, %cwp        ! encoding: [0x93,0x91,0xa0,0xff]
+        wrpr %g6,255,%cwp
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%cansave
+        ! V9: wrpr %g6, 255, %cansave    ! encoding: [0x95,0x91,0xa0,0xff]
+        wrpr %g6,255,%cansave
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%canrestore
+        ! V9: wrpr %g6, 255, %canrestore ! encoding: [0x97,0x91,0xa0,0xff]
+        wrpr %g6,255,%canrestore
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%cleanwin
+        ! V9: wrpr %g6, 255, %cleanwin   ! encoding: [0x99,0x91,0xa0,0xff]
+        wrpr %g6,255,%cleanwin
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%otherwin
+        ! V9: wrpr %g6, 255, %otherwin   ! encoding: [0x9b,0x91,0xa0,0xff]
+        wrpr %g6,255,%otherwin
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: wrpr %g6,255,%wstate
+        ! V9: wrpr %g6, 255, %wstate     ! encoding: [0x9d,0x91,0xa0,0xff]
+        wrpr %g6,255,%wstate
+
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tpc,%i5
+        ! V9: rdpr %tpc, %i5            ! encoding: [0xbb,0x50,0x00,0x00]
+        rdpr %tpc,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tnpc,%i5
+        ! V9: rdpr %tnpc, %i5           ! encoding: [0xbb,0x50,0x40,0x00]
+        rdpr %tnpc,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tstate,%i5
+        ! V9: rdpr %tstate, %i5         ! encoding: [0xbb,0x50,0x80,0x00]
+        rdpr %tstate,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tt,%i5
+        ! V9: rdpr %tt, %i5             ! encoding: [0xbb,0x50,0xc0,0x00]
+        rdpr %tt,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tick,%i5
+        ! V9: rdpr %tick, %i5           ! encoding: [0xbb,0x51,0x00,0x00]
+        rdpr %tick,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tba,%i5
+        ! V9: rdpr %tba, %i5            ! encoding: [0xbb,0x51,0x40,0x00]
+        rdpr %tba,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %pstate,%i5
+        ! V9: rdpr %pstate, %i5         ! encoding: [0xbb,0x51,0x80,0x00]
+        rdpr %pstate,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %tl,%i5
+        ! V9: rdpr %tl, %i5             ! encoding: [0xbb,0x51,0xc0,0x00]
+        rdpr %tl,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %pil,%i5
+        ! V9: rdpr %pil, %i5            ! encoding: [0xbb,0x52,0x00,0x00]
+        rdpr %pil,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %cwp,%i5
+        ! V9: rdpr %cwp, %i5            ! encoding: [0xbb,0x52,0x40,0x00]
+        rdpr %cwp,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %cansave,%i5
+        ! V9: rdpr %cansave, %i5        ! encoding: [0xbb,0x52,0x80,0x00]
+        rdpr %cansave,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %canrestore,%i5
+        ! V9: rdpr %canrestore, %i5     ! encoding: [0xbb,0x52,0xc0,0x00]
+        rdpr %canrestore,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %cleanwin,%i5
+        ! V9: rdpr %cleanwin, %i5       ! encoding: [0xbb,0x53,0x00,0x00]
+        rdpr %cleanwin,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %otherwin,%i5
+        ! V9: rdpr %otherwin, %i5       ! encoding: [0xbb,0x53,0x40,0x00]
+        rdpr %otherwin,%i5
+        ! V8:      error: instruction requires a CPU feature not currently enabled
+        ! V8-NEXT: rdpr %wstate,%i5
+        ! V9: rdpr %wstate, %i5         ! encoding: [0xbb,0x53,0x80,0x00]
+        rdpr %wstate,%i5
diff --git a/test/MC/SystemZ/fixups.s b/test/MC/SystemZ/fixups.s
index 8354121a01e9..ea3b690d253b 100644
--- a/test/MC/SystemZ/fixups.s
+++ b/test/MC/SystemZ/fixups.s
@@ -37,16 +37,16 @@
 # CHECK: brasl %r14, target@PLT:tls_gdcall:sym  # encoding: [0xc0,0xe5,A,A,A,A]
 # CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
 # CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
 # CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
 	.align 16
 	brasl %r14, target@plt:tls_gdcall:sym
 
 # CHECK: brasl %r14, target@PLT:tls_ldcall:sym  # encoding: [0xc0,0xe5,A,A,A,A]
 # CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
 # CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
 # CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
 	.align 16
 	brasl %r14, target@plt:tls_ldcall:sym
 
@@ -65,16 +65,16 @@
 # CHECK: bras %r14, target@PLT:tls_gdcall:sym   # encoding: [0xa7,0xe5,A,A]
 # CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
 # CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
 # CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
 	.align 16
 	bras %r14, target@plt:tls_gdcall:sym
 
 # CHECK: bras %r14, target@PLT:tls_ldcall:sym   # encoding: [0xa7,0xe5,A,A]
 # CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
 # CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
 # CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
 	.align 16
 	bras %r14, target@plt:tls_ldcall:sym
 
diff --git a/test/MC/SystemZ/insn-good-z13.s b/test/MC/SystemZ/insn-good-z13.s
index 621b946d19dd..db783295e546 100644
--- a/test/MC/SystemZ/insn-good-z13.s
+++ b/test/MC/SystemZ/insn-good-z13.s
@@ -4681,6 +4681,7 @@
 	vzero	%v31
 
 #CHECK: wcdgb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3]
+#CHECK:	wcdgb	%f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3]
 #CHECK: wcdgb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc3]
 #CHECK: wcdgb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
 #CHECK: wcdgb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
@@ -4689,6 +4690,7 @@
 #CHECK: wcdgb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc3]
 
 	wcdgb	%v0, %v0, 0, 0
+	wcdgb   %f0, %f0, 0, 0
  	wcdgb	%v0, %v0, 0, 15
 	wcdgb	%v0, %v0, 4, 0
 	wcdgb	%v0, %v0, 12, 0
@@ -4697,6 +4699,7 @@
 	wcdgb	%v14, %v17, 4, 10
 
 #CHECK: wcdlgb  %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1]
+#CHECK:	wcdlgb	%f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1]
 #CHECK: wcdlgb  %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc1]
 #CHECK: wcdlgb  %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
 #CHECK: wcdlgb  %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
@@ -4705,6 +4708,7 @@
 #CHECK: wcdlgb  %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc1]
 
 	wcdlgb	%v0, %v0, 0, 0
+	wcdlgb  %f0, %f0, 0, 0
  	wcdlgb	%v0, %v0, 0, 15
 	wcdlgb	%v0, %v0, 4, 0
 	wcdlgb	%v0, %v0, 12, 0
@@ -4713,6 +4717,7 @@
 	wcdlgb	%v14, %v17, 4, 10
 
 #CHECK: wcgdb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2]
+#CHECK:	wcgdb	%f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2]
 #CHECK: wcgdb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc2]
 #CHECK: wcgdb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
 #CHECK: wcgdb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
@@ -4721,6 +4726,7 @@
 #CHECK: wcgdb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc2]
 
 	wcgdb	%v0, %v0, 0, 0
+	wcgdb  	%f0, %f0, 0, 0
  	wcgdb	%v0, %v0, 0, 15
 	wcgdb	%v0, %v0, 4, 0
 	wcgdb	%v0, %v0, 12, 0
@@ -4729,6 +4735,7 @@
 	wcgdb	%v14, %v17, 4, 10
 
 #CHECK: wclgdb  %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0]
+#CHECK:	wclgdb	%f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0]
 #CHECK: wclgdb  %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc0]
 #CHECK: wclgdb  %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
 #CHECK: wclgdb  %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
@@ -4737,6 +4744,7 @@
 #CHECK: wclgdb  %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc0]
 
 	wclgdb	%v0, %v0, 0, 0
+	wclgdb  %f0, %f0, 0, 0
  	wclgdb	%v0, %v0, 0, 15
 	wclgdb	%v0, %v0, 4, 0
 	wclgdb	%v0, %v0, 12, 0
@@ -4744,6 +4752,7 @@
 	wclgdb	%v31, %v0, 0, 0
 	wclgdb	%v14, %v17, 4, 10
 
+#CHECK: wfadb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3]
 #CHECK: wfadb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3]
 #CHECK: wfadb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe3]
 #CHECK: wfadb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe3]
@@ -4751,25 +4760,29 @@
 #CHECK: wfadb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe3]
 
 	wfadb	%v0, %v0, %v0
+	wfadb   %f0, %f0, %f0
 	wfadb	%v0, %v0, %v31
 	wfadb	%v0, %v31, %v0
 	wfadb	%v31, %v0, %v0
 	wfadb	%v18, %v3, %v20
 
+#CHECK: wfcdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb]
 #CHECK: wfcdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb]
 #CHECK: wfcdb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcb]
 #CHECK: wfcdb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcb]
 #CHECK: wfcdb   %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcb]
 #CHECK: wfcdb   %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcb]
 #CHECK: wfcdb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcb]
-
+	
 	wfcdb	%v0, %v0
+	wfcdb	%f0, %f0
 	wfcdb	%v0, %v15
 	wfcdb	%v0, %v31
 	wfcdb	%v15, %v0
 	wfcdb	%v31, %v0
 	wfcdb	%v14, %v17
-
+	
+#CHECK: wfcedb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8]
 #CHECK: wfcedb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8]
 #CHECK: wfcedb  %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe8]
 #CHECK: wfcedb  %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe8]
@@ -4777,11 +4790,13 @@
 #CHECK: wfcedb  %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe8]
 
 	wfcedb	%v0, %v0, %v0
+	wfcedb	%f0, %f0, %f0
 	wfcedb	%v0, %v0, %v31
 	wfcedb	%v0, %v31, %v0
 	wfcedb	%v31, %v0, %v0
 	wfcedb	%v18, %v3, %v20
 
+#CHECK: wfcedbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8]
 #CHECK: wfcedbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8]
 #CHECK: wfcedbs %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xe8]
 #CHECK: wfcedbs %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xe8]
@@ -4789,11 +4804,13 @@
 #CHECK: wfcedbs %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xe8]
 
 	wfcedbs	%v0, %v0, %v0
+	wfcedbs	%f0, %f0, %f0
 	wfcedbs	%v0, %v0, %v31
 	wfcedbs	%v0, %v31, %v0
 	wfcedbs	%v31, %v0, %v0
 	wfcedbs	%v18, %v3, %v20
 
+#CHECK: wfchdb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb]
 #CHECK: wfchdb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb]
 #CHECK: wfchdb  %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xeb]
 #CHECK: wfchdb  %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xeb]
@@ -4801,23 +4818,27 @@
 #CHECK: wfchdb  %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xeb]
 
 	wfchdb	%v0, %v0, %v0
+	wfchdb	%f0, %f0, %f0
 	wfchdb	%v0, %v0, %v31
 	wfchdb	%v0, %v31, %v0
 	wfchdb	%v31, %v0, %v0
 	wfchdb	%v18, %v3, %v20
 
+#CHECK: wfchdbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb]
 #CHECK: wfchdbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb]
 #CHECK: wfchdbs %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xeb]
 #CHECK: wfchdbs %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xeb]
 #CHECK: wfchdbs %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xeb]
 #CHECK: wfchdbs %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xeb]
-
+	
 	wfchdbs	%v0, %v0, %v0
+	wfchdbs	%f0, %f0, %f0
 	wfchdbs	%v0, %v0, %v31
 	wfchdbs	%v0, %v31, %v0
 	wfchdbs	%v31, %v0, %v0
 	wfchdbs	%v18, %v3, %v20
 
+#CHECK: wfchedb %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea]
 #CHECK: wfchedb %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea]
 #CHECK: wfchedb %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xea]
 #CHECK: wfchedb %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xea]
@@ -4825,11 +4846,13 @@
 #CHECK: wfchedb %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xea]
 
 	wfchedb	%v0, %v0, %v0
+	wfchedb	%f0, %f0, %f0
 	wfchedb	%v0, %v0, %v31
 	wfchedb	%v0, %v31, %v0
 	wfchedb	%v31, %v0, %v0
 	wfchedb	%v18, %v3, %v20
 
+#CHECK: wfchedbs %f0, %f0, %f0          # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea]
 #CHECK: wfchedbs %f0, %f0, %f0          # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea]
 #CHECK: wfchedbs %f0, %f0, %v31         # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xea]
 #CHECK: wfchedbs %f0, %v31, %f0         # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xea]
@@ -4837,11 +4860,13 @@
 #CHECK: wfchedbs %v18, %f3, %v20        # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xea]
 
 	wfchedbs %v0, %v0, %v0
+	wfchedbs %f0, %f0, %f0
 	wfchedbs %v0, %v0, %v31
 	wfchedbs %v0, %v31, %v0
 	wfchedbs %v31, %v0, %v0
 	wfchedbs %v18, %v3, %v20
 
+#CHECK: wfddb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5]
 #CHECK: wfddb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5]
 #CHECK: wfddb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe5]
 #CHECK: wfddb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe5]
@@ -4849,12 +4874,14 @@
 #CHECK: wfddb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe5]
 
 	wfddb	%v0, %v0, %v0
+	wfddb	%f0, %f0, %f0
 	wfddb	%v0, %v0, %v31
 	wfddb	%v0, %v31, %v0
 	wfddb	%v31, %v0, %v0
 	wfddb	%v18, %v3, %v20
 
 #CHECK: wfidb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7]
+#CHECK: wfidb	%f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7]	
 #CHECK: wfidb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc7]
 #CHECK: wfidb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
 #CHECK: wfidb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
@@ -4863,6 +4890,7 @@
 #CHECK: wfidb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc7]
 
 	wfidb	%v0, %v0, 0, 0
+	wfidb	%f0, %f0, 0, 0
  	wfidb	%v0, %v0, 0, 15
 	wfidb	%v0, %v0, 4, 0
 	wfidb	%v0, %v0, 12, 0
@@ -4870,6 +4898,7 @@
 	wfidb	%v31, %v0, 0, 0
 	wfidb	%v14, %v17, 4, 10
 
+#CHECK: wfkdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca]
 #CHECK: wfkdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca]
 #CHECK: wfkdb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xca]
 #CHECK: wfkdb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xca]
@@ -4878,12 +4907,14 @@
 #CHECK: wfkdb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xca]
 
 	wfkdb	%v0, %v0
+	wfkdb	%f0, %f0
 	wfkdb	%v0, %v15
 	wfkdb	%v0, %v31
 	wfkdb	%v15, %v0
 	wfkdb	%v31, %v0
 	wfkdb	%v14, %v17
 
+#CHECK: wflcdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc]
 #CHECK: wflcdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc]
 #CHECK: wflcdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xcc]
 #CHECK: wflcdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xcc]
@@ -4892,12 +4923,14 @@
 #CHECK: wflcdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xcc]
 
 	wflcdb	%v0, %v0
+	wflcdb  %f0, %f0
 	wflcdb	%v0, %v15
 	wflcdb	%v0, %v31
 	wflcdb	%v15, %v0
 	wflcdb	%v31, %v0
 	wflcdb	%v14, %v17
 
+#CHECK: wflndb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc]
 #CHECK: wflndb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc]
 #CHECK: wflndb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x18,0x30,0xcc]
 #CHECK: wflndb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xcc]
@@ -4906,12 +4939,14 @@
 #CHECK: wflndb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x18,0x34,0xcc]
 
 	wflndb	%v0, %v0
+	wflndb	%f0, %f0
 	wflndb	%v0, %v15
 	wflndb	%v0, %v31
 	wflndb	%v15, %v0
 	wflndb	%v31, %v0
 	wflndb	%v14, %v17
 
+#CHECK: wflpdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc]
 #CHECK: wflpdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc]
 #CHECK: wflpdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x28,0x30,0xcc]
 #CHECK: wflpdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x28,0x34,0xcc]
@@ -4920,12 +4955,14 @@
 #CHECK: wflpdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x28,0x34,0xcc]
 
 	wflpdb	%v0, %v0
+	wflpdb	%f0, %f0
 	wflpdb	%v0, %v15
 	wflpdb	%v0, %v31
 	wflpdb	%v15, %v0
 	wflpdb	%v31, %v0
 	wflpdb	%v14, %v17
 
+#CHECK: wfmadb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f]
 #CHECK: wfmadb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f]
 #CHECK: wfmadb  %f0, %f0, %f0, %v31     # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8f]
 #CHECK: wfmadb  %f0, %f0, %v31, %f0     # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8f]
@@ -4934,12 +4971,14 @@
 #CHECK: wfmadb  %f13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8f]
 
 	wfmadb	%v0, %v0, %v0, %v0
+	wfmadb	%f0, %f0, %f0, %f0
 	wfmadb	%v0, %v0, %v0, %v31
 	wfmadb	%v0, %v0, %v31, %v0
 	wfmadb	%v0, %v31, %v0, %v0
 	wfmadb	%v31, %v0, %v0, %v0
 	wfmadb	%v13, %v17, %v21, %v25
 
+#CHECK: wfmdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7]
 #CHECK: wfmdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7]
 #CHECK: wfmdb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe7]
 #CHECK: wfmdb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe7]
@@ -4947,11 +4986,13 @@
 #CHECK: wfmdb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe7]
 
 	wfmdb	%v0, %v0, %v0
+	wfmdb	%f0, %f0, %f0
 	wfmdb	%v0, %v0, %v31
 	wfmdb	%v0, %v31, %v0
 	wfmdb	%v31, %v0, %v0
 	wfmdb	%v18, %v3, %v20
 
+#CHECK: wfmsdb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e]
 #CHECK: wfmsdb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e]
 #CHECK: wfmsdb  %f0, %f0, %f0, %v31     # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8e]
 #CHECK: wfmsdb  %f0, %f0, %v31, %f0     # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8e]
@@ -4960,38 +5001,44 @@
 #CHECK: wfmsdb  %f13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8e]
 
 	wfmsdb	%v0, %v0, %v0, %v0
+	wfmsdb	%f0, %f0, %f0, %f0
 	wfmsdb	%v0, %v0, %v0, %v31
 	wfmsdb	%v0, %v0, %v31, %v0
 	wfmsdb	%v0, %v31, %v0, %v0
 	wfmsdb	%v31, %v0, %v0, %v0
 	wfmsdb	%v13, %v17, %v21, %v25
 
+#CHECK: wfsdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2]
 #CHECK: wfsdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2]
 #CHECK: wfsdb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe2]
 #CHECK: wfsdb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe2]
 #CHECK: wfsdb   %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe2]
 #CHECK: wfsdb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe2]
-
+	
 	wfsdb	%v0, %v0, %v0
+	wfsdb	%f0, %f0, %f0
 	wfsdb	%v0, %v0, %v31
 	wfsdb	%v0, %v31, %v0
 	wfsdb	%v31, %v0, %v0
 	wfsdb	%v18, %v3, %v20
-
+	
+#CHECK: wfsqdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce]
 #CHECK: wfsqdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce]
 #CHECK: wfsqdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xce]
 #CHECK: wfsqdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xce]
 #CHECK: wfsqdb  %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xce]
 #CHECK: wfsqdb  %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xce]
 #CHECK: wfsqdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xce]
-
+	
 	wfsqdb	%v0, %v0
+	wfsqdb	%f0, %f0
 	wfsqdb	%v0, %v15
 	wfsqdb	%v0, %v31
 	wfsqdb	%v15, %v0
 	wfsqdb	%v31, %v0
 	wfsqdb	%v14, %v17
 
+#CHECK: wftcidb %f0, %f0, 0             # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a]
 #CHECK: wftcidb %f0, %f0, 0             # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a]
 #CHECK: wftcidb %f0, %f0, 4095          # encoding: [0xe7,0x00,0xff,0xf8,0x30,0x4a]
 #CHECK: wftcidb %f0, %f15, 0            # encoding: [0xe7,0x0f,0x00,0x08,0x30,0x4a]
@@ -4999,8 +5046,9 @@
 #CHECK: wftcidb %f15, %f0, 0            # encoding: [0xe7,0xf0,0x00,0x08,0x30,0x4a]
 #CHECK: wftcidb %v31, %f0, 0            # encoding: [0xe7,0xf0,0x00,0x08,0x38,0x4a]
 #CHECK: wftcidb %f4, %v21, 1656         # encoding: [0xe7,0x45,0x67,0x88,0x34,0x4a]
-
+	
 	wftcidb	%v0, %v0, 0
+	wftcidb	%f0, %f0, 0
 	wftcidb	%v0, %v0, 4095
 	wftcidb	%v0, %v15, 0
 	wftcidb	%v0, %v31, 0
@@ -5008,6 +5056,7 @@
 	wftcidb	%v31, %v0, 0
 	wftcidb	%v4, %v21, 0x678
 
+#CHECK: wldeb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4]
 #CHECK: wldeb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4]
 #CHECK: wldeb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4]
 #CHECK: wldeb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4]
@@ -5016,12 +5065,14 @@
 #CHECK: wldeb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4]
 
 	wldeb	%v0, %v0
+	wldeb	%f0, %f0
 	wldeb	%v0, %v15
 	wldeb	%v0, %v31
 	wldeb	%v15, %v0
 	wldeb	%v31, %v0
 	wldeb	%v14, %v17
 
+#CHECK: wledb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5]
 #CHECK: wledb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5]
 #CHECK: wledb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5]
 #CHECK: wledb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
@@ -5031,6 +5082,7 @@
 #CHECK: wledb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5]
 
 	wledb	%v0, %v0, 0, 0
+	wledb	%f0, %f0, 0, 0
  	wledb	%v0, %v0, 0, 15
 	wledb	%v0, %v0, 4, 0
 	wledb	%v0, %v0, 12, 0
diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s
index 23bd68a2f5d9..0e8cad4bdb7a 100644
--- a/test/MC/SystemZ/insn-good.s
+++ b/test/MC/SystemZ/insn-good.s
@@ -7946,6 +7946,62 @@
 	stc	%r0, 4095(%r15,%r1)
 	stc	%r15, 0
 
+#CHECK: stck	0                  	# encoding: [0xb2,0x05,0x00,0x00]
+#CHECK: stck	0(%r1)             	# encoding: [0xb2,0x05,0x10,0x00]
+#CHECK: stck	0(%r15)            	# encoding: [0xb2,0x05,0xf0,0x00]
+#CHECK: stck	4095                 	# encoding: [0xb2,0x05,0x0f,0xff]
+#CHECK: stck	4095(%r1)             	# encoding: [0xb2,0x05,0x1f,0xff]
+#CHECK: stck	4095(%r15)             	# encoding: [0xb2,0x05,0xff,0xff]	
+
+	stck	0
+	stck	0(%r1)
+	stck	0(%r15)
+	stck	4095	
+	stck	4095(%r1)
+	stck	4095(%r15)
+
+#CHECK: stckf	0                  	# encoding: [0xb2,0x7c,0x00,0x00]
+#CHECK: stckf	0(%r1)             	# encoding: [0xb2,0x7c,0x10,0x00]
+#CHECK: stckf	0(%r15)            	# encoding: [0xb2,0x7c,0xf0,0x00]
+#CHECK: stckf	4095                 	# encoding: [0xb2,0x7c,0x0f,0xff]
+#CHECK: stckf	4095(%r1)             	# encoding: [0xb2,0x7c,0x1f,0xff]
+#CHECK: stckf	4095(%r15)             	# encoding: [0xb2,0x7c,0xff,0xff]	
+
+	stckf	0
+	stckf	0(%r1)
+	stckf	0(%r15)
+	stckf	4095	
+	stckf	4095(%r1)
+	stckf	4095(%r15)
+
+#CHECK: stcke	0                  	# encoding: [0xb2,0x78,0x00,0x00]
+#CHECK: stcke	0(%r1)             	# encoding: [0xb2,0x78,0x10,0x00]
+#CHECK: stcke	0(%r15)            	# encoding: [0xb2,0x78,0xf0,0x00]
+#CHECK: stcke	4095                 	# encoding: [0xb2,0x78,0x0f,0xff]
+#CHECK: stcke	4095(%r1)             	# encoding: [0xb2,0x78,0x1f,0xff]
+#CHECK: stcke	4095(%r15)             	# encoding: [0xb2,0x78,0xff,0xff]	
+
+	stcke	0
+	stcke	0(%r1)
+	stcke	0(%r15)
+	stcke	4095	
+	stcke	4095(%r1)
+	stcke	4095(%r15)
+
+#CHECK: stfle	0                  	# encoding: [0xb2,0xb0,0x00,0x00]
+#CHECK: stfle	0(%r1)             	# encoding: [0xb2,0xb0,0x10,0x00]
+#CHECK: stfle	0(%r15)            	# encoding: [0xb2,0xb0,0xf0,0x00]
+#CHECK: stfle	4095                 	# encoding: [0xb2,0xb0,0x0f,0xff]
+#CHECK: stfle	4095(%r1)             	# encoding: [0xb2,0xb0,0x1f,0xff]
+#CHECK: stfle	4095(%r15)             	# encoding: [0xb2,0xb0,0xff,0xff]	
+
+	stfle	0
+	stfle	0(%r1)
+	stfle	0(%r15)
+	stfle	4095	
+	stfle	4095(%r1)
+	stfle	4095(%r15)
+
 #CHECK: stcy	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x72]
 #CHECK: stcy	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x72]
 #CHECK: stcy	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x72]
diff --git a/test/MC/SystemZ/lit.local.cfg b/test/MC/SystemZ/lit.local.cfg
index 78c5738c7acc..2f3cf7d3f043 100644
--- a/test/MC/SystemZ/lit.local.cfg
+++ b/test/MC/SystemZ/lit.local.cfg
@@ -1,6 +1,2 @@
 if not 'SystemZ' in config.root.targets:
     config.unsupported = True
-
-# http://llvm.org/bugs/show_bug.cgi?id=20980
-if 'ubsan' in config.available_features:
-  config.unsupported = True
diff --git a/test/MC/X86/X86_64-pku.s b/test/MC/X86/X86_64-pku.s
new file mode 100644
index 000000000000..8f8b227fa0c2
--- /dev/null
+++ b/test/MC/X86/X86_64-pku.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+pku --show-encoding < %s | FileCheck %s
+// CHECK: rdpkru
+// CHECK: encoding: [0x0f,0x01,0xee]
+   rdpkru
+
+// CHECK: wrpkru
+// CHECK: encoding: [0x0f,0x01,0xef]
+   wrpkru 
\ No newline at end of file
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 3bb7a5bcd2c3..658ca715a32a 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -12846,6 +12846,342 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xf1,0xfd,0x58,0x5a,0xaa,0xf8,0xfb,0xff,0xff]
           vcvtpd2ps -1032(%rdx){1to8}, %ymm5
 
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x02,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21 {%k2}
+
+// CHECK: vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x82,0x2d,0xed]
+          vscalefsd %xmm21, %xmm22, %xmm21 {%k2} {z}
+
+// CHECK: vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x10,0x2d,0xed]
+          vscalefsd {rn-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x50,0x2d,0xed]
+          vscalefsd {ru-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x30,0x2d,0xed]
+          vscalefsd {rd-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x70,0x2d,0xed]
+          vscalefsd {rz-sae}, %xmm21, %xmm22, %xmm21
+
+// CHECK: vscalefsd (%rcx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x29]
+          vscalefsd (%rcx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0xcd,0x00,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vscalefsd 291(%rax,%r14,8), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1016(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x7f]
+          vscalefsd 1016(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd 1024(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0x00,0x04,0x00,0x00]
+          vscalefsd 1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1024(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0x6a,0x80]
+          vscalefsd -1024(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefsd -1032(%rdx), %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x2d,0xaa,0xf8,0xfb,0xff,0xff]
+          vscalefsd -1032(%rdx), %xmm22, %xmm21
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x08,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+// CHECK:  encoding: [0x62,0x32,0x05,0x0b,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13 {%k3}
+
+// CHECK: vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+// CHECK:  encoding: [0x62,0x32,0x05,0x8b,0x2d,0xef]
+          vscalefss %xmm23, %xmm15, %xmm13 {%k3} {z}
+
+// CHECK: vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x18,0x2d,0xef]
+          vscalefss {rn-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x58,0x2d,0xef]
+          vscalefss {ru-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x38,0x2d,0xef]
+          vscalefss {rd-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x78,0x2d,0xef]
+          vscalefss {rz-sae}, %xmm23, %xmm15, %xmm13
+
+// CHECK: vscalefss (%rcx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x29]
+          vscalefss (%rcx), %xmm15, %xmm13
+
+// CHECK: vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x32,0x05,0x08,0x2d,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vscalefss 291(%rax,%r14,8), %xmm15, %xmm13
+
+// CHECK: vscalefss 508(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x7f]
+          vscalefss 508(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss 512(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0x00,0x02,0x00,0x00]
+          vscalefss 512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -512(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0x6a,0x80]
+          vscalefss -512(%rdx), %xmm15, %xmm13
+
+// CHECK: vscalefss -516(%rdx), %xmm15, %xmm13
+// CHECK:  encoding: [0x62,0x72,0x05,0x08,0x2d,0xaa,0xfc,0xfd,0xff,0xff]
+          vscalefss -516(%rdx), %xmm15, %xmm13
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x49,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22 {%k1}
+
+// CHECK: vrndscalepd $171, %zmm7, %zmm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab]
+          vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z}
+
+// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab]
+          vrndscalepd $0xab,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b]
+          vrndscalepd $0x7b, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b]
+          vrndscalepd $0x7b,{sae}, %zmm7, %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x31,0x7b]
+          vrndscalepd $0x7b, (%rcx), %zmm22
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x09,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %zmm22
+
+// CHECK: vrndscalepd $123, (%rcx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x31,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x7f,0x7b]
+          vrndscalepd $0x7b, 8128(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0x00,0x20,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0x72,0x80,0x7b]
+          vrndscalepd $0x7b, -8192(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x09,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -8256(%rdx), %zmm22
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0x72,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to8}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x09,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to8}, %zmm22
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1}
+// CHECK:  encoding: [0x62,0x73,0x7d,0x49,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13 {%k1}
+
+// CHECK: vrndscaleps $171, %zmm7, %zmm13 {%k1} {z}
+// CHECK:  encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab]
+          vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z}
+
+// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab]
+          vrndscaleps $0xab,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b]
+          vrndscaleps $0x7b, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b]
+          vrndscaleps $0x7b,{sae}, %zmm7, %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x29,0x7b]
+          vrndscaleps $0x7b, (%rcx), %zmm13
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %zmm13
+// CHECK:  encoding: [0x62,0x33,0x7d,0x48,0x08,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %zmm13
+
+// CHECK: vrndscaleps $123, (%rcx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x29,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 8128(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x7f,0x7b]
+          vrndscaleps $0x7b, 8128(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0x6a,0x80,0x7b]
+          vrndscaleps $0x7b, -8192(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, -8256(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x48,0x08,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -8256(%rdx), %zmm13
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0x6a,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to16}, %zmm13
+// CHECK:  encoding: [0x62,0x73,0x7d,0x58,0x08,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to16}, %zmm13
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6}
+// CHECK:  encoding: [0x62,0x43,0x9d,0x0e,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6}
+
+// CHECK: vrndscalesd $171, %xmm15, %xmm12, %xmm25 {%k6} {z}
+// CHECK:  encoding: [0x62,0x43,0x9d,0x8e,0x0b,0xcf,0xab]
+          vrndscalesd $0xab, %xmm15, %xmm12, %xmm25 {%k6} {z}
+
+// CHECK: vrndscalesd $171, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0xab]
+          vrndscalesd $0xab,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x08,0x0b,0xcf,0x7b]
+          vrndscalesd $0x7b, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, {sae}, %xmm15, %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x9d,0x18,0x0b,0xcf,0x7b]
+          vrndscalesd $0x7b,{sae}, %xmm15, %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, (%rcx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x09,0x7b]
+          vrndscalesd $0x7b, (%rcx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 291(%rax,%r14,8), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x23,0x9d,0x08,0x0b,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalesd $0x7b, 291(%rax,%r14,8), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1016(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x7f,0x7b]
+          vrndscalesd $0x7b, 1016(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, 1024(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalesd $0x7b, 1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1024(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x4a,0x80,0x7b]
+          vrndscalesd $0x7b, -1024(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscalesd $123, -1032(%rdx), %xmm12, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x9d,0x08,0x0b,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalesd $0x7b, -1032(%rdx), %xmm12, %xmm25
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3}
+// CHECK:  encoding: [0x62,0x33,0x25,0x0b,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3}
+
+// CHECK: vrndscaless $171, %xmm17, %xmm11, %xmm11 {%k3} {z}
+// CHECK:  encoding: [0x62,0x33,0x25,0x8b,0x0a,0xd9,0xab]
+          vrndscaless $0xab, %xmm17, %xmm11, %xmm11 {%k3} {z}
+
+// CHECK: vrndscaless $171, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0xab]
+          vrndscaless $0xab,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0xd9,0x7b]
+          vrndscaless $0x7b, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, {sae}, %xmm17, %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x18,0x0a,0xd9,0x7b]
+          vrndscaless $0x7b,{sae}, %xmm17, %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, (%rcx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x19,0x7b]
+          vrndscaless $0x7b, (%rcx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 291(%rax,%r14,8), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x33,0x25,0x08,0x0a,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaless $0x7b, 291(%rax,%r14,8), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 508(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x7f,0x7b]
+          vrndscaless $0x7b, 508(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, 512(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaless $0x7b, 512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -512(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x5a,0x80,0x7b]
+          vrndscaless $0x7b, -512(%rdx), %xmm11, %xmm11
+
+// CHECK: vrndscaless $123, -516(%rdx), %xmm11, %xmm11
+// CHECK:  encoding: [0x62,0x73,0x25,0x08,0x0a,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaless $0x7b, -516(%rdx), %xmm11, %xmm11
+
 // CHECK: vfmadd132ss %xmm22, %xmm17, %xmm30
 // CHECK:  encoding: [0x62,0x22,0x75,0x00,0x99,0xf6]
           vfmadd132ss %xmm22, %xmm17, %xmm30
@@ -14094,3 +14430,4848 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
 // CHECK:  encoding: [0x62,0xf2,0xdd,0x00,0xbf,0xb2,0xf8,0xfb,0xff,0xff]
           vfnmsub231sd -1032(%rdx), %xmm20, %xmm6
 
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x0c,0x48,0x15,0xe8]
+          vunpckhps %zmm16, %zmm14, %zmm5
+
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6}
+// CHECK:  encoding: [0x62,0xb1,0x0c,0x4e,0x15,0xe8]
+          vunpckhps %zmm16, %zmm14, %zmm5 {%k6}
+
+// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z}
+// CHECK:  encoding: [0x62,0xb1,0x0c,0xce,0x15,0xe8]
+          vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z}
+
+// CHECK: vunpckhps (%rcx), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x48,0x15,0x29]
+          vunpckhps (%rcx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x0c,0x48,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5
+
+// CHECK: vunpckhps (%rcx){1to16}, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x58,0x15,0x29]
+          vunpckhps (%rcx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps 8128(%rdx), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x7f]
+          vunpckhps 8128(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 8192(%rdx), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0x00,0x20,0x00,0x00]
+          vunpckhps 8192(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps -8192(%rdx), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x80]
+          vunpckhps -8192(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps -8256(%rdx), %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0xc0,0xdf,0xff,0xff]
+          vunpckhps -8256(%rdx), %zmm14, %zmm5
+
+// CHECK: vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x7f]
+          vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0x00,0x02,0x00,0x00]
+          vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x80]
+          vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0xfc,0xfd,0xff,0xff]
+          vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0xca]
+          vunpcklps %zmm2, %zmm3, %zmm1
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3}
+// CHECK:  encoding: [0x62,0xf1,0x64,0x4b,0x14,0xca]
+          vunpcklps %zmm2, %zmm3, %zmm1 {%k3}
+
+// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z}
+// CHECK:  encoding: [0x62,0xf1,0x64,0xcb,0x14,0xca]
+          vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z}
+
+// CHECK: vunpcklps (%rcx), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0x09]
+          vunpcklps (%rcx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xb1,0x64,0x48,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1
+
+// CHECK: vunpcklps (%rcx){1to16}, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x14,0x09]
+          vunpcklps (%rcx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps 8128(%rdx), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x7f]
+          vunpcklps 8128(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 8192(%rdx), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0x00,0x20,0x00,0x00]
+          vunpcklps 8192(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps -8192(%rdx), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x80]
+          vunpcklps -8192(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps -8256(%rdx), %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0xc0,0xdf,0xff,0xff]
+          vunpcklps -8256(%rdx), %zmm3, %zmm1
+
+// CHECK: vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x7f]
+          vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0x00,0x02,0x00,0x00]
+          vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x80]
+          vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0xfc,0xfd,0xff,0xff]
+          vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x01,0xe5,0x40,0x15,0xca]
+          vunpckhpd %zmm26, %zmm19, %zmm25
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5}
+// CHECK:  encoding: [0x62,0x01,0xe5,0x45,0x15,0xca]
+          vunpckhpd %zmm26, %zmm19, %zmm25 {%k5}
+
+// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0xe5,0xc5,0x15,0xca]
+          vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z}
+
+// CHECK: vunpckhpd (%rcx), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x40,0x15,0x09]
+          vunpckhpd (%rcx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x21,0xe5,0x40,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25
+
+// CHECK: vunpckhpd (%rcx){1to8}, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x50,0x15,0x09]
+          vunpckhpd (%rcx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd 8128(%rdx), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x7f]
+          vunpckhpd 8128(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 8192(%rdx), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0x00,0x20,0x00,0x00]
+          vunpckhpd 8192(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd -8192(%rdx), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x80]
+          vunpckhpd -8192(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd -8256(%rdx), %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0xc0,0xdf,0xff,0xff]
+          vunpckhpd -8256(%rdx), %zmm19, %zmm25
+
+// CHECK: vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x7f]
+          vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0x00,0x04,0x00,0x00]
+          vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x80]
+          vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25
+// CHECK:  encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0xf8,0xfb,0xff,0xff]
+          vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x95,0x40,0x14,0xd5]
+          vunpcklpd %zmm21, %zmm29, %zmm18
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x95,0x46,0x14,0xd5]
+          vunpcklpd %zmm21, %zmm29, %zmm18 {%k6}
+
+// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x95,0xc6,0x14,0xd5]
+          vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z}
+
+// CHECK: vunpcklpd (%rcx), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x40,0x14,0x11]
+          vunpcklpd (%rcx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x95,0x40,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18
+
+// CHECK: vunpcklpd (%rcx){1to8}, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x50,0x14,0x11]
+          vunpcklpd (%rcx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd 8128(%rdx), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x7f]
+          vunpcklpd 8128(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 8192(%rdx), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0x00,0x20,0x00,0x00]
+          vunpcklpd 8192(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd -8192(%rdx), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x80]
+          vunpcklpd -8192(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd -8256(%rdx), %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0xc0,0xdf,0xff,0xff]
+          vunpcklpd -8256(%rdx), %zmm29, %zmm18
+
+// CHECK: vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x7f]
+          vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0x00,0x04,0x00,0x00]
+          vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x80]
+          vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0xf8,0xfb,0xff,0xff]
+          vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x21,0x65,0x48,0x62,0xc1]
+          vpunpckldq %zmm17, %zmm3, %zmm24
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3}
+// CHECK:  encoding: [0x62,0x21,0x65,0x4b,0x62,0xc1]
+          vpunpckldq %zmm17, %zmm3, %zmm24 {%k3}
+
+// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0x65,0xcb,0x62,0xc1]
+          vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z}
+
+// CHECK: vpunpckldq (%rcx), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x48,0x62,0x01]
+          vpunpckldq (%rcx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x21,0x65,0x48,0x62,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24
+
+// CHECK: vpunpckldq (%rcx){1to16}, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x58,0x62,0x01]
+          vpunpckldq (%rcx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq 8128(%rdx), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x7f]
+          vpunpckldq 8128(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 8192(%rdx), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0x00,0x20,0x00,0x00]
+          vpunpckldq 8192(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq -8192(%rdx), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x80]
+          vpunpckldq -8192(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq -8256(%rdx), %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0xc0,0xdf,0xff,0xff]
+          vpunpckldq -8256(%rdx), %zmm3, %zmm24
+
+// CHECK: vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x7f]
+          vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0x00,0x02,0x00,0x00]
+          vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x80]
+          vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24
+// CHECK:  encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0xfc,0xfd,0xff,0xff]
+          vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xd1,0x5d,0x48,0x6a,0xf5]
+          vpunpckhdq %zmm13, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5}
+// CHECK:  encoding: [0x62,0xd1,0x5d,0x4d,0x6a,0xf5]
+          vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5}
+
+// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z}
+// CHECK:  encoding: [0x62,0xd1,0x5d,0xcd,0x6a,0xf5]
+          vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z}
+
+// CHECK: vpunpckhdq (%rcx), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x31]
+          vpunpckhdq (%rcx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x48,0x6a,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x31]
+          vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 8128(%rdx), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x7f]
+          vpunpckhdq 8128(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 8192(%rdx), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0x00,0x20,0x00,0x00]
+          vpunpckhdq 8192(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -8192(%rdx), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x80]
+          vpunpckhdq -8192(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -8256(%rdx), %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0xc0,0xdf,0xff,0xff]
+          vpunpckhdq -8256(%rdx), %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x7f]
+          vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0x00,0x02,0x00,0x00]
+          vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x80]
+          vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0xfc,0xfd,0xff,0xff]
+          vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xb1,0xdd,0x48,0x6c,0xd9]
+          vpunpcklqdq %zmm17, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1}
+// CHECK:  encoding: [0x62,0xb1,0xdd,0x49,0x6c,0xd9]
+          vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1}
+
+// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z}
+// CHECK:  encoding: [0x62,0xb1,0xdd,0xc9,0x6c,0xd9]
+          vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x19]
+          vpunpcklqdq (%rcx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xb1,0xdd,0x48,0x6c,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x19]
+          vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 8128(%rdx), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x7f]
+          vpunpcklqdq 8128(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 8192(%rdx), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0x00,0x20,0x00,0x00]
+          vpunpcklqdq 8192(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -8192(%rdx), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x80]
+          vpunpcklqdq -8192(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -8256(%rdx), %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0xc0,0xdf,0xff,0xff]
+          vpunpcklqdq -8256(%rdx), %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x7f]
+          vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0x00,0x04,0x00,0x00]
+          vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x80]
+          vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3
+// CHECK:  encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0xf8,0xfb,0xff,0xff]
+          vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x21,0x85,0x48,0x6d,0xd8]
+          vpunpckhqdq %zmm16, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3}
+// CHECK:  encoding: [0x62,0x21,0x85,0x4b,0x6d,0xd8]
+          vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3}
+
+// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0x85,0xcb,0x6d,0xd8]
+          vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x48,0x6d,0x19]
+          vpunpckhqdq (%rcx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x21,0x85,0x48,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x58,0x6d,0x19]
+          vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 8128(%rdx), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x7f]
+          vpunpckhqdq 8128(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 8192(%rdx), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0x00,0x20,0x00,0x00]
+          vpunpckhqdq 8192(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -8192(%rdx), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x80]
+          vpunpckhqdq -8192(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -8256(%rdx), %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0xc0,0xdf,0xff,0xff]
+          vpunpckhqdq -8256(%rdx), %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x7f]
+          vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0x00,0x04,0x00,0x00]
+          vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x80]
+          vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27
+// CHECK:  encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0xf8,0xfb,0xff,0xff]
+          vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0x82,0x75,0x08,0x43,0xe2]
+          vgetexpss %xmm26, %xmm1, %xmm20
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x75,0x0f,0x43,0xe2]
+          vgetexpss %xmm26, %xmm1, %xmm20 {%k7}
+
+// CHECK: vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x75,0x8f,0x43,0xe2]
+          vgetexpss %xmm26, %xmm1, %xmm20 {%k7} {z}
+
+// CHECK: vgetexpss {sae}, %xmm26, %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0x82,0x75,0x18,0x43,0xe2]
+          vgetexpss {sae}, %xmm26, %xmm1, %xmm20
+
+// CHECK: vgetexpss (%rcx), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x75,0x08,0x43,0x21]
+          vgetexpss (%rcx), %xmm1, %xmm20
+
+// CHECK: vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x75,0x08,0x43,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vgetexpss 291(%rax,%r14,8), %xmm1, %xmm20
+
+// CHECK: vgetexpss 508(%rdx), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x7f]
+          vgetexpss 508(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss 512(%rdx), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0x00,0x02,0x00,0x00]
+          vgetexpss 512(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss -512(%rdx), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x75,0x08,0x43,0x62,0x80]
+          vgetexpss -512(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpss -516(%rdx), %xmm1, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x75,0x08,0x43,0xa2,0xfc,0xfd,0xff,0xff]
+          vgetexpss -516(%rdx), %xmm1, %xmm20
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0xd2]
+          vgetexpsd %xmm2, %xmm7, %xmm2
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5}
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x0d,0x43,0xd2]
+          vgetexpsd %xmm2, %xmm7, %xmm2 {%k5}
+
+// CHECK: vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z}
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x8d,0x43,0xd2]
+          vgetexpsd %xmm2, %xmm7, %xmm2 {%k5} {z}
+
+// CHECK: vgetexpsd {sae}, %xmm2, %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x18,0x43,0xd2]
+          vgetexpsd {sae}, %xmm2, %xmm7, %xmm2
+
+// CHECK: vgetexpsd (%rcx), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x11]
+          vgetexpsd (%rcx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xb2,0xc5,0x08,0x43,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vgetexpsd 291(%rax,%r14,8), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 1016(%rdx), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x7f]
+          vgetexpsd 1016(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd 1024(%rdx), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0x00,0x04,0x00,0x00]
+          vgetexpsd 1024(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd -1024(%rdx), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x52,0x80]
+          vgetexpsd -1024(%rdx), %xmm7, %xmm2
+
+// CHECK: vgetexpsd -1032(%rdx), %xmm7, %xmm2
+// CHECK:  encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
+          vgetexpsd -1032(%rdx), %xmm7, %xmm2
+
+// CHECK: vcmpss $171, %xmm12, %xmm15, %k4
+// CHECK:  encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0xab]
+          vcmpss $0xab, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $171, %xmm12, %xmm15, %k4 {%k5}
+// CHECK:  encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab]
+          vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5}
+
+// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4
+// CHECK:  encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab]
+          vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123, %xmm12, %xmm15, %k4
+// CHECK:  encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b]
+          vcmpss $0x7b, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4
+// CHECK:  encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b]
+          vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4
+
+// CHECK: vcmpss $123, (%rcx), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xf1,0x06,0x08,0xc2,0x21,0x7b]
+          vcmpss $0x7b, (%rcx), %xmm15, %k4
+
+// CHECK: vcmpss $123, 291(%rax,%r14,8), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xb1,0x06,0x08,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vcmpss $0x7b, 291(%rax,%r14,8), %xmm15, %k4
+
+// CHECK: vcmpss $123, 508(%rdx), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x7f,0x7b]
+          vcmpss $0x7b, 508(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, 512(%rdx), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vcmpss $0x7b, 512(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, -512(%rdx), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xf1,0x06,0x08,0xc2,0x62,0x80,0x7b]
+          vcmpss $0x7b, -512(%rdx), %xmm15, %k4
+
+// CHECK: vcmpss $123, -516(%rdx), %xmm15, %k4
+// CHECK:  encoding: [0x62,0xf1,0x06,0x08,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vcmpss $0x7b, -516(%rdx), %xmm15, %k4
+
+// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0xab]
+          vcmpsd $0xab, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $171, %xmm4, %xmm19, %k5 {%k1}
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab]
+          vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1}
+
+// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab]
+          vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123, %xmm4, %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b]
+          vcmpsd $0x7b, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b]
+          vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5
+
+// CHECK: vcmpsd $123, (%rcx), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x29,0x7b]
+          vcmpsd $0x7b, (%rcx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 291(%rax,%r14,8), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xb1,0xe7,0x00,0xc2,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vcmpsd $0x7b, 291(%rax,%r14,8), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 1016(%rdx), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x7f,0x7b]
+          vcmpsd $0x7b, 1016(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, 1024(%rdx), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vcmpsd $0x7b, 1024(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, -1024(%rdx), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0x6a,0x80,0x7b]
+          vcmpsd $0x7b, -1024(%rdx), %xmm19, %k5
+
+// CHECK: vcmpsd $123, -1032(%rdx), %xmm19, %k5
+// CHECK:  encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vcmpsd $0x7b, -1032(%rdx), %xmm19, %k5
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x66,0x00,0x51,0xf0]
+          vsqrtss %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22 {%k1}
+// CHECK:  encoding: [0x62,0xc1,0x66,0x01,0x51,0xf0]
+          vsqrtss %xmm8, %xmm19, %xmm22 {%k1}
+
+// CHECK: vsqrtss %xmm8, %xmm19, %xmm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xc1,0x66,0x81,0x51,0xf0]
+          vsqrtss %xmm8, %xmm19, %xmm22 {%k1} {z}
+
+// CHECK: vsqrtss {rn-sae}, %xmm8, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x66,0x10,0x51,0xf0]
+          vsqrtss {rn-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {ru-sae}, %xmm8, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x66,0x50,0x51,0xf0]
+          vsqrtss {ru-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {rd-sae}, %xmm8, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x66,0x30,0x51,0xf0]
+          vsqrtss {rd-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss {rz-sae}, %xmm8, %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x66,0x70,0x51,0xf0]
+          vsqrtss {rz-sae}, %xmm8, %xmm19, %xmm22
+
+// CHECK: vsqrtss (%rcx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x66,0x00,0x51,0x31]
+          vsqrtss (%rcx), %xmm19, %xmm22
+
+// CHECK: vsqrtss 291(%rax,%r14,8), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x66,0x00,0x51,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtss 291(%rax,%r14,8), %xmm19, %xmm22
+
+// CHECK: vsqrtss 508(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x66,0x00,0x51,0x72,0x7f]
+          vsqrtss 508(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss 512(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x66,0x00,0x51,0xb2,0x00,0x02,0x00,0x00]
+          vsqrtss 512(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss -512(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x66,0x00,0x51,0x72,0x80]
+          vsqrtss -512(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtss -516(%rdx), %xmm19, %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x66,0x00,0x51,0xb2,0xfc,0xfd,0xff,0xff]
+          vsqrtss -516(%rdx), %xmm19, %xmm22
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm26   
+// CHECK: encoding: [0x62,0x41,0xef,0x08,0x51,0xd4]
+          vsqrtsd %xmm12, %xmm2, %xmm26
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm6 {%k7}
+// CHECK:  encoding: [0x62,0xd1,0xef,0x0f,0x51,0xf4]
+          vsqrtsd %xmm12, %xmm2, %xmm6 {%k7}
+
+// CHECK: vsqrtsd %xmm12, %xmm2, %xmm6 {%k7} {z}
+// CHECK:  encoding: [0x62,0xd1,0xef,0x8f,0x51,0xf4]
+          vsqrtsd %xmm12, %xmm2, %xmm6 {%k7} {z}
+
+// CHECK: vsqrtsd {rn-sae}, %xmm12, %xmm2, %xmm6
+// CHECK:  encoding: [0x62,0xd1,0xef,0x18,0x51,0xf4]
+          vsqrtsd {rn-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {ru-sae}, %xmm12, %xmm2, %xmm6
+// CHECK:  encoding: [0x62,0xd1,0xef,0x58,0x51,0xf4]
+          vsqrtsd {ru-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {rd-sae}, %xmm12, %xmm2, %xmm6
+// CHECK:  encoding: [0x62,0xd1,0xef,0x38,0x51,0xf4]
+          vsqrtsd {rd-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd {rz-sae}, %xmm12, %xmm2, %xmm6
+// CHECK:  encoding: [0x62,0xd1,0xef,0x78,0x51,0xf4]
+          vsqrtsd {rz-sae}, %xmm12, %xmm2, %xmm6
+
+// CHECK: vsqrtsd (%rcx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x11]
+          vsqrtsd (%rcx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 291(%rax,%r14,8), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x21,0xef,0x08,0x51,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vsqrtsd 291(%rax,%r14,8), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 1016(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x52,0x7f]
+          vsqrtsd 1016(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd 1024(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x92,0x00,0x04,0x00,0x00]
+          vsqrtsd 1024(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd -1024(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x52,0x80]
+          vsqrtsd -1024(%rdx), %xmm2, %xmm26
+
+// CHECK: vsqrtsd -1032(%rdx), %xmm2, %xmm26
+// CHECK: encoding: [0x62,0x61,0xef,0x08,0x51,0x92,0xf8,0xfb,0xff,0xff]
+          vsqrtsd -1032(%rdx), %xmm2, %xmm26
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0xab]
+          vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1}
+// CHECK:  encoding: [0x62,0x73,0x2d,0x41,0x18,0xdb,0xab]
+          vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1}
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1} {z}
+// CHECK:  encoding: [0x62,0x73,0x2d,0xc1,0x18,0xdb,0xab]
+          vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm3, %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0x7b]
+          vinsertf32x4 $0x7b, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, (%rcx), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0x19,0x7b]
+          vinsertf32x4 $0x7b, (%rcx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x33,0x2d,0x40,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf32x4 $0x7b, 291(%rax,%r14,8), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x7f,0x7b]
+          vinsertf32x4 $0x7b, 2032(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf32x4 $0x7b, 2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x80,0x7b]
+          vinsertf32x4 $0x7b, -2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %zmm26, %zmm11
+// CHECK:  encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf32x4 $0x7b, -2064(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0xab]
+          vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1}
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x49,0x1a,0xcf,0xab]
+          vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1}
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1} {z}
+// CHECK:  encoding: [0x62,0xf3,0xd5,0xc9,0x1a,0xcf,0xab]
+          vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1} {z}
+
+// CHECK: vinsertf64x4 $123, %ymm7, %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0x7b]
+          vinsertf64x4 $0x7b, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, (%rcx), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x09,0x7b]
+          vinsertf64x4 $0x7b, (%rcx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 291(%rax,%r14,8), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xb3,0xd5,0x48,0x1a,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf64x4 $0x7b, 291(%rax,%r14,8), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4064(%rdx), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x7f,0x7b]
+          vinsertf64x4 $0x7b, 4064(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4096(%rdx), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vinsertf64x4 $0x7b, 4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4096(%rdx), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x80,0x7b]
+          vinsertf64x4 $0x7b, -4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4128(%rdx), %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vinsertf64x4 $0x7b, -4128(%rdx), %zmm5, %zmm1
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0xab]
+          vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6}
+// CHECK:  encoding: [0x62,0xc3,0x15,0x4e,0x38,0xca,0xab]
+          vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6}
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xc3,0x15,0xce,0x38,0xca,0xab]
+          vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6} {z}
+
+// CHECK: vinserti32x4 $123, %xmm10, %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0x7b]
+          vinserti32x4 $0x7b, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x48,0x38,0x09,0x7b]
+          vinserti32x4 $0x7b, (%rcx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xa3,0x15,0x48,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti32x4 $0x7b, 291(%rax,%r14,8), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x7f,0x7b]
+          vinserti32x4 $0x7b, 2032(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vinserti32x4 $0x7b, 2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x80,0x7b]
+          vinserti32x4 $0x7b, -2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %zmm13, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti32x4 $0x7b, -2064(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0xab]
+          vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1}
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x41,0x3a,0xe4,0xab]
+          vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1}
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1} {z}
+// CHECK:  encoding: [0x62,0xf3,0xb5,0xc1,0x3a,0xe4,0xab]
+          vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1} {z}
+
+// CHECK: vinserti64x4 $123, %ymm4, %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0x7b]
+          vinserti64x4 $0x7b, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, (%rcx), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x21,0x7b]
+          vinserti64x4 $0x7b, (%rcx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 291(%rax,%r14,8), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xb3,0xb5,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti64x4 $0x7b, 291(%rax,%r14,8), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4064(%rdx), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x7f,0x7b]
+          vinserti64x4 $0x7b, 4064(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4096(%rdx), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vinserti64x4 $0x7b, 4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4096(%rdx), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x80,0x7b]
+          vinserti64x4 $0x7b, -4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4128(%rdx), %zmm25, %zmm4
+// CHECK:  encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vinserti64x4 $0x7b, -4128(%rdx), %zmm25, %zmm4
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0xab]
+          vextractf32x4 $0xab, %zmm21, %xmm15
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15 {%k1}
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x49,0x19,0xef,0xab]
+          vextractf32x4 $0xab, %zmm21, %xmm15 {%k1}
+
+// CHECK: vextractf32x4 $171, %zmm21, %xmm15 {%k1} {z}
+// CHECK:  encoding: [0x62,0xc3,0x7d,0xc9,0x19,0xef,0xab]
+          vextractf32x4 $0xab, %zmm21, %xmm15 {%k1} {z}
+
+// CHECK: vextractf32x4 $123, %zmm21, %xmm15
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0x7b]
+          vextractf32x4 $0x7b, %zmm21, %xmm15
+
+// CHECK: vextractf32x4 $171, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0x21,0xab]
+          vextractf32x4 $0xab, %zmm20, (%rcx)
+
+// CHECK: vextractf32x4 $171, %zmm20, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x4f,0x19,0x21,0xab]
+          vextractf32x4 $0xab, %zmm20, (%rcx) {%k7}
+
+// CHECK: vextractf32x4 $123, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0x21,0x7b]
+          vextractf32x4 $0x7b, %zmm20, (%rcx)
+
+// CHECK: vextractf32x4 $123, %zmm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf32x4 $0x7b, %zmm20, 291(%rax,%r14,8)
+
+// CHECK: vextractf32x4 $123, %zmm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0x62,0x7f,0x7b]
+          vextractf32x4 $0x7b, %zmm20, 2032(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextractf32x4 $0x7b, %zmm20, 2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0x62,0x80,0x7b]
+          vextractf32x4 $0x7b, %zmm20, -2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %zmm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf32x4 $0x7b, %zmm20, -2064(%rdx)
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11
+// CHECK:  encoding: [0x62,0x43,0xfd,0x48,0x1b,0xc3,0xab]
+          vextractf64x4 $0xab, %zmm24, %ymm11
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11 {%k5}
+// CHECK:  encoding: [0x62,0x43,0xfd,0x4d,0x1b,0xc3,0xab]
+          vextractf64x4 $0xab, %zmm24, %ymm11 {%k5}
+
+// CHECK: vextractf64x4 $171, %zmm24, %ymm11 {%k5} {z}
+// CHECK:  encoding: [0x62,0x43,0xfd,0xcd,0x1b,0xc3,0xab]
+          vextractf64x4 $0xab, %zmm24, %ymm11 {%k5} {z}
+
+// CHECK: vextractf64x4 $123, %zmm24, %ymm11
+// CHECK:  encoding: [0x62,0x43,0xfd,0x48,0x1b,0xc3,0x7b]
+          vextractf64x4 $0x7b, %zmm24, %ymm11
+
+// CHECK: vextractf64x4 $171, %zmm5, (%rcx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x29,0xab]
+          vextractf64x4 $0xab, %zmm5, (%rcx)
+
+// CHECK: vextractf64x4 $171, %zmm5, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x4c,0x1b,0x29,0xab]
+          vextractf64x4 $0xab, %zmm5, (%rcx) {%k4}
+
+// CHECK: vextractf64x4 $123, %zmm5, (%rcx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x29,0x7b]
+          vextractf64x4 $0x7b, %zmm5, (%rcx)
+
+// CHECK: vextractf64x4 $123, %zmm5, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x1b,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf64x4 $0x7b, %zmm5, 291(%rax,%r14,8)
+
+// CHECK: vextractf64x4 $123, %zmm5, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x6a,0x7f,0x7b]
+          vextractf64x4 $0x7b, %zmm5, 4064(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vextractf64x4 $0x7b, %zmm5, 4096(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0x6a,0x80,0x7b]
+          vextractf64x4 $0x7b, %zmm5, -4096(%rdx)
+
+// CHECK: vextractf64x4 $123, %zmm5, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vextractf64x4 $0x7b, %zmm5, -4128(%rdx)
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x48,0x39,0xc5,0xab]
+          vextracti32x4 $0xab, %zmm16, %xmm13
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13 {%k5}
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x4d,0x39,0xc5,0xab]
+          vextracti32x4 $0xab, %zmm16, %xmm13 {%k5}
+
+// CHECK: vextracti32x4 $171, %zmm16, %xmm13 {%k5} {z}
+// CHECK:  encoding: [0x62,0xc3,0x7d,0xcd,0x39,0xc5,0xab]
+          vextracti32x4 $0xab, %zmm16, %xmm13 {%k5} {z}
+
+// CHECK: vextracti32x4 $123, %zmm16, %xmm13
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x48,0x39,0xc5,0x7b]
+          vextracti32x4 $0x7b, %zmm16, %xmm13
+
+// CHECK: vextracti32x4 $171, %zmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0x29,0xab]
+          vextracti32x4 $0xab, %zmm29, (%rcx)
+
+// CHECK: vextracti32x4 $171, %zmm29, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x63,0x7d,0x4a,0x39,0x29,0xab]
+          vextracti32x4 $0xab, %zmm29, (%rcx) {%k2}
+
+// CHECK: vextracti32x4 $123, %zmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0x29,0x7b]
+          vextracti32x4 $0x7b, %zmm29, (%rcx)
+
+// CHECK: vextracti32x4 $123, %zmm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x39,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti32x4 $0x7b, %zmm29, 291(%rax,%r14,8)
+
+// CHECK: vextracti32x4 $123, %zmm29, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0x6a,0x7f,0x7b]
+          vextracti32x4 $0x7b, %zmm29, 2032(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vextracti32x4 $0x7b, %zmm29, 2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0x6a,0x80,0x7b]
+          vextracti32x4 $0x7b, %zmm29, -2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %zmm29, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x39,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti32x4 $0x7b, %zmm29, -2064(%rdx)
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13
+// CHECK:  encoding: [0x62,0xc3,0xfd,0x48,0x3b,0xc5,0xab]
+          vextracti64x4 $0xab, %zmm16, %ymm13
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13 {%k3}
+// CHECK:  encoding: [0x62,0xc3,0xfd,0x4b,0x3b,0xc5,0xab]
+          vextracti64x4 $0xab, %zmm16, %ymm13 {%k3}
+
+// CHECK: vextracti64x4 $171, %zmm16, %ymm13 {%k3} {z}
+// CHECK:  encoding: [0x62,0xc3,0xfd,0xcb,0x3b,0xc5,0xab]
+          vextracti64x4 $0xab, %zmm16, %ymm13 {%k3} {z}
+
+// CHECK: vextracti64x4 $123, %zmm16, %ymm13
+// CHECK:  encoding: [0x62,0xc3,0xfd,0x48,0x3b,0xc5,0x7b]
+          vextracti64x4 $0x7b, %zmm16, %ymm13
+
+// CHECK: vextracti64x4 $171, %zmm30, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0x31,0xab]
+          vextracti64x4 $0xab, %zmm30, (%rcx)
+
+// CHECK: vextracti64x4 $171, %zmm30, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0x63,0xfd,0x4c,0x3b,0x31,0xab]
+          vextracti64x4 $0xab, %zmm30, (%rcx) {%k4}
+
+// CHECK: vextracti64x4 $123, %zmm30, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0x31,0x7b]
+          vextracti64x4 $0x7b, %zmm30, (%rcx)
+
+// CHECK: vextracti64x4 $123, %zmm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0xfd,0x48,0x3b,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti64x4 $0x7b, %zmm30, 291(%rax,%r14,8)
+
+// CHECK: vextracti64x4 $123, %zmm30, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0x72,0x7f,0x7b]
+          vextracti64x4 $0x7b, %zmm30, 4064(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0xb2,0x00,0x10,0x00,0x00,0x7b]
+          vextracti64x4 $0x7b, %zmm30, 4096(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0x72,0x80,0x7b]
+          vextracti64x4 $0x7b, %zmm30, -4096(%rdx)
+
+// CHECK: vextracti64x4 $123, %zmm30, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x3b,0xb2,0xe0,0xef,0xff,0xff,0x7b]
+          vextracti64x4 $0x7b, %zmm30, -4128(%rdx)
+
+// CHECK: kunpckbw %k6, %k5, %k5
+// CHECK:  encoding: [0xc5,0xd5,0x4b,0xee]
+          kunpckbw %k6, %k5, %k5
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0xab]
+          vgetmantss $0xab, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3 {%k7}
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x0f,0x27,0xdc,0xab]
+          vgetmantss $0xab, %xmm12, %xmm2, %xmm3 {%k7}
+
+// CHECK: vgetmantss $171, %xmm12, %xmm2, %xmm3 {%k7} {z}
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x8f,0x27,0xdc,0xab]
+          vgetmantss $0xab, %xmm12, %xmm2, %xmm3 {%k7} {z}
+
+// CHECK: vgetmantss $171,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0xab]
+          vgetmantss $0xab,{sae}, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, %xmm12, %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0x7b]
+          vgetmantss $0x7b, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0x7b]
+          vgetmantss $0x7b,{sae}, %xmm12, %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, (%rcx), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0x6d,0x08,0x27,0x19,0x7b]
+          vgetmantss $0x7b, (%rcx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 291(%rax,%r14,8), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xb3,0x6d,0x08,0x27,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantss $0x7b, 291(%rax,%r14,8), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 508(%rdx), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0x6d,0x08,0x27,0x5a,0x7f,0x7b]
+          vgetmantss $0x7b, 508(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, 512(%rdx), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0x6d,0x08,0x27,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vgetmantss $0x7b, 512(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, -512(%rdx), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0x6d,0x08,0x27,0x5a,0x80,0x7b]
+          vgetmantss $0x7b, -512(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantss $123, -516(%rdx), %xmm2, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0x6d,0x08,0x27,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vgetmantss $0x7b, -516(%rdx), %xmm2, %xmm3
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x00,0x27,0xd8,0xab]
+          vgetmantsd $0xab, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3 {%k6}
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x06,0x27,0xd8,0xab]
+          vgetmantsd $0xab, %xmm8, %xmm27, %xmm3 {%k6}
+
+// CHECK: vgetmantsd $171, %xmm8, %xmm27, %xmm3 {%k6} {z}
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x86,0x27,0xd8,0xab]
+          vgetmantsd $0xab, %xmm8, %xmm27, %xmm3 {%k6} {z}
+
+// CHECK: vgetmantsd $171,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0xab]
+          vgetmantsd $0xab,{sae}, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, %xmm8, %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x00,0x27,0xd8,0x7b]
+          vgetmantsd $0x7b, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0x7b]
+          vgetmantsd $0x7b,{sae}, %xmm8, %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, (%rcx), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x27,0x19,0x7b]
+          vgetmantsd $0x7b, (%rcx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 291(%rax,%r14,8), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xb3,0xa5,0x00,0x27,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantsd $0x7b, 291(%rax,%r14,8), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 1016(%rdx), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x27,0x5a,0x7f,0x7b]
+          vgetmantsd $0x7b, 1016(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, 1024(%rdx), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x27,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vgetmantsd $0x7b, 1024(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, -1024(%rdx), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x27,0x5a,0x80,0x7b]
+          vgetmantsd $0x7b, -1024(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantsd $123, -1032(%rdx), %xmm27, %xmm3
+// CHECK:  encoding: [0x62,0xf3,0xa5,0x00,0x27,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vgetmantsd $0x7b, -1032(%rdx), %xmm27, %xmm3
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x26,0xf4,0xab]
+          vgetmantps $0xab, %zmm28, %zmm22
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22 {%k3}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x4b,0x26,0xf4,0xab]
+          vgetmantps $0xab, %zmm28, %zmm22 {%k3}
+
+// CHECK: vgetmantps $171, %zmm28, %zmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0xcb,0x26,0xf4,0xab]
+          vgetmantps $0xab, %zmm28, %zmm22 {%k3} {z}
+
+// CHECK: vgetmantps $171,{sae}, %zmm28, %zmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0xab]
+          vgetmantps $0xab,{sae}, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123, %zmm28, %zmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x26,0xf4,0x7b]
+          vgetmantps $0x7b, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123,{sae}, %zmm28, %zmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0x7b]
+          vgetmantps $0x7b,{sae}, %zmm28, %zmm22
+
+// CHECK: vgetmantps $123, (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x26,0x31,0x7b]
+          vgetmantps $0x7b, (%rcx), %zmm22
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x26,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 291(%rax,%r14,8), %zmm22
+
+// CHECK: vgetmantps $123, (%rcx){1to16}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x26,0x31,0x7b]
+          vgetmantps $0x7b, (%rcx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, 8128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x26,0x72,0x7f,0x7b]
+          vgetmantps $0x7b, 8128(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, 8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x26,0xb2,0x00,0x20,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 8192(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, -8192(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x26,0x72,0x80,0x7b]
+          vgetmantps $0x7b, -8192(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, -8256(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x26,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -8256(%rdx), %zmm22
+
+// CHECK: vgetmantps $123, 508(%rdx){1to16}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x26,0x72,0x7f,0x7b]
+          vgetmantps $0x7b, 508(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, 512(%rdx){1to16}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x26,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 512(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, -512(%rdx){1to16}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x26,0x72,0x80,0x7b]
+          vgetmantps $0x7b, -512(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantps $123, -516(%rdx){1to16}, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x26,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -516(%rdx){1to16}, %zmm22
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x48,0x26,0xd2,0xab]
+          vgetmantpd $0xab, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2 {%k7}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x4f,0x26,0xd2,0xab]
+          vgetmantpd $0xab, %zmm26, %zmm2 {%k7}
+
+// CHECK: vgetmantpd $171, %zmm26, %zmm2 {%k7} {z}
+// CHECK:  encoding: [0x62,0x93,0xfd,0xcf,0x26,0xd2,0xab]
+          vgetmantpd $0xab, %zmm26, %zmm2 {%k7} {z}
+
+// CHECK: vgetmantpd $171,{sae}, %zmm26, %zmm2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0xab]
+          vgetmantpd $0xab,{sae}, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123, %zmm26, %zmm2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x48,0x26,0xd2,0x7b]
+          vgetmantpd $0x7b, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123,{sae}, %zmm26, %zmm2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0x7b]
+          vgetmantpd $0x7b,{sae}, %zmm26, %zmm2
+
+// CHECK: vgetmantpd $123, (%rcx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x26,0x11,0x7b]
+          vgetmantpd $0x7b, (%rcx), %zmm2
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %zmm2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x26,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 291(%rax,%r14,8), %zmm2
+
+// CHECK: vgetmantpd $123, (%rcx){1to8}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x26,0x11,0x7b]
+          vgetmantpd $0x7b, (%rcx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, 8128(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x26,0x52,0x7f,0x7b]
+          vgetmantpd $0x7b, 8128(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, 8192(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x26,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 8192(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, -8192(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x26,0x52,0x80,0x7b]
+          vgetmantpd $0x7b, -8192(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, -8256(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x26,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -8256(%rdx), %zmm2
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to8}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x26,0x52,0x7f,0x7b]
+          vgetmantpd $0x7b, 1016(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to8}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x26,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 1024(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to8}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x26,0x52,0x80,0x7b]
+          vgetmantpd $0x7b, -1024(%rdx){1to8}, %zmm2
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to8}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x26,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -1032(%rdx){1to8}, %zmm2
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6}
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x4e,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6}
+
+// CHECK: vshufps $171, %zmm9, %zmm6, %zmm5 {%k6} {z}
+// CHECK:  encoding: [0x62,0xd1,0x4c,0xce,0xc6,0xe9,0xab]
+          vshufps $0xab, %zmm9, %zmm6, %zmm5 {%k6} {z}
+
+// CHECK: vshufps $123, %zmm9, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xd1,0x4c,0x48,0xc6,0xe9,0x7b]
+          vshufps $0x7b, %zmm9, %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x29,0x7b]
+          vshufps $0x7b, (%rcx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xb1,0x4c,0x48,0xc6,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %zmm6, %zmm5
+
+// CHECK: vshufps $123, (%rcx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x29,0x7b]
+          vshufps $0x7b, (%rcx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8128(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x7f,0x7b]
+          vshufps $0x7b, 8128(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 8192(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vshufps $0x7b, 8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8192(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0x6a,0x80,0x7b]
+          vshufps $0x7b, -8192(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, -8256(%rdx), %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x48,0xc6,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufps $0x7b, -8256(%rdx), %zmm6, %zmm5
+
+// CHECK: vshufps $123, 508(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, 512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -512(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0x6a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufps $123, -516(%rdx){1to16}, %zmm6, %zmm5
+// CHECK:  encoding: [0x62,0xf1,0x4c,0x58,0xc6,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to16}, %zmm6, %zmm5
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xbd,0x4a,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2}
+
+// CHECK: vshufpd $171, %zmm22, %zmm8, %zmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xbd,0xca,0xc6,0xe6,0xab]
+          vshufpd $0xab, %zmm22, %zmm8, %zmm28 {%k2} {z}
+
+// CHECK: vshufpd $123, %zmm22, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xe6,0x7b]
+          vshufpd $0x7b, %zmm22, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x21,0xbd,0x48,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, (%rcx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8128(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 8128(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 8192(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vshufpd $0x7b, 8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8192(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -8192(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -8256(%rdx), %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x48,0xc6,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vshufpd $0x7b, -8256(%rdx), %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1016(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, 1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1024(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: vshufpd $123, -1032(%rdx){1to8}, %zmm8, %zmm28
+// CHECK:  encoding: [0x62,0x61,0xbd,0x58,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to8}, %zmm8, %zmm28
+
+// CHECK: kortestw %k6, %k2
+// CHECK:  encoding: [0xc5,0xf8,0x98,0xd6]
+          kortestw %k6, %k2
+
+// CHECK: vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+// CHECK:  encoding: [0x62,0x92,0x7d,0x41,0xa3,0xb4,0xde,0x7b,0x00,0x00,0x00]
+          vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+
+// CHECK: vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+// CHECK:  encoding: [0x62,0x92,0x7d,0x41,0xa3,0xb4,0xde,0x7b,0x00,0x00,0x00]
+          vscatterqps %ymm6, 123(%r14,%zmm27,8) {%k1}
+
+// CHECK: vscatterqps %ymm6, 256(%r9,%zmm27) {%k1}
+// CHECK:  encoding: [0x62,0x92,0x7d,0x41,0xa3,0x74,0x19,0x40]
+          vscatterqps %ymm6, 256(%r9,%zmm27) {%k1}
+
+// CHECK: vscatterqps %ymm6, 1024(%rcx,%zmm27,4) {%k1}
+// CHECK:  encoding: [0x62,0xb2,0x7d,0x41,0xa3,0xb4,0x99,0x00,0x04,0x00,0x00]
+          vscatterqps %ymm6, 1024(%rcx,%zmm27,4) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa3,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+          vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa3,0xb4,0xe6,0x7b,0x00,0x00,0x00]
+          vscatterqpd %zmm22, 123(%r14,%zmm28,8) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 256(%r9,%zmm28) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa3,0x74,0x21,0x20]
+          vscatterqpd %zmm22, 256(%r9,%zmm28) {%k1}
+
+// CHECK: vscatterqpd %zmm22, 1024(%rcx,%zmm28,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x41,0xa3,0xb4,0xa1,0x00,0x04,0x00,0x00]
+          vscatterqpd %zmm22, 1024(%rcx,%zmm28,4) {%k1}
+
+// CHECK: vscatterdps %zmm17, 123(%r14,%zmm19,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x8c,0xde,0x7b,0x00,0x00,0x00]
+          vscatterdps %zmm17, 123(%r14, %zmm19,8) {%k1}
+
+// CHECK: vscatterdps %zmm17, 123(%r14,%zmm19,8) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x8c,0xde,0x7b,0x00,0x00,0x00]
+          vscatterdps %zmm17, 123(%r14, %zmm19,8) {%k1}
+
+// CHECK: vscatterdps %zmm17, 256(%r9,%zmm19) {%k1}
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x41,0xa2,0x4c,0x19,0x40]
+          vscatterdps %zmm17, 256(%r9, %zmm19) {%k1}
+
+// CHECK: vscatterdps %zmm17, 1024(%rcx,%zmm19,4) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x41,0xa2,0x8c,0x99,0x00,0x04,0x00,0x00]
+          vscatterdps %zmm17, 1024(%rcx, %zmm19,4) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 123(%r14,%ymm24,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa2,0x94,0xc6,0x7b,0x00,0x00,0x00]
+          vscatterdpd %zmm18, 123(%r14, %ymm24,8) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 123(%r14,%ymm24,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa2,0x94,0xc6,0x7b,0x00,0x00,0x00]
+          vscatterdpd %zmm18, 123(%r14, %ymm24,8) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 256(%r9,%ymm24) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x41,0xa2,0x54,0x01,0x20]
+          vscatterdpd %zmm18, 256(%r9, %ymm24) {%k1}
+
+// CHECK: vscatterdpd %zmm18, 1024(%rcx,%ymm24,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x41,0xa2,0x94,0x81,0x00,0x04,0x00,0x00]
+          vscatterdpd %zmm18, 1024(%rcx, %ymm24,4) {%k1}
+
+// CHECK: vpermilps $171, %zmm22, %zmm2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0xab]
+          vpermilps $0xab, %zmm22, %zmm2
+
+// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x4a,0x04,0xd6,0xab]
+          vpermilps $0xab, %zmm22, %zmm2 {%k2}
+
+// CHECK: vpermilps $171, %zmm22, %zmm2 {%k2} {z}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0xca,0x04,0xd6,0xab]
+          vpermilps $0xab, %zmm22, %zmm2 {%k2} {z}
+
+// CHECK: vpermilps $123, %zmm22, %zmm2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x04,0xd6,0x7b]
+          vpermilps $0x7b, %zmm22, %zmm2
+
+// CHECK: vpermilps $123, (%rcx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x04,0x11,0x7b]
+          vpermilps $0x7b, (%rcx), %zmm2
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %zmm2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x04,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilps $0x7b, 291(%rax,%r14,8), %zmm2
+
+// CHECK: vpermilps $123, (%rcx){1to16}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x04,0x11,0x7b]
+          vpermilps $0x7b, (%rcx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, 8128(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x7f,0x7b]
+          vpermilps $0x7b, 8128(%rdx), %zmm2
+
+// CHECK: vpermilps $123, 8192(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vpermilps $0x7b, 8192(%rdx), %zmm2
+
+// CHECK: vpermilps $123, -8192(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x04,0x52,0x80,0x7b]
+          vpermilps $0x7b, -8192(%rdx), %zmm2
+
+// CHECK: vpermilps $123, -8256(%rdx), %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x04,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vpermilps $0x7b, -8256(%rdx), %zmm2
+
+// CHECK: vpermilps $123, 508(%rdx){1to16}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x7f,0x7b]
+          vpermilps $0x7b, 508(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, 512(%rdx){1to16}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vpermilps $0x7b, 512(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, -512(%rdx){1to16}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x04,0x52,0x80,0x7b]
+          vpermilps $0x7b, -512(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps $123, -516(%rdx){1to16}, %zmm2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x04,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vpermilps $0x7b, -516(%rdx){1to16}, %zmm2
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0xea]
+          vpermilps %zmm2, %zmm20, %zmm13
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1}
+// CHECK:  encoding: [0x62,0x72,0x5d,0x41,0x0c,0xea]
+          vpermilps %zmm2, %zmm20, %zmm13 {%k1}
+
+// CHECK: vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
+// CHECK:  encoding: [0x62,0x72,0x5d,0xc1,0x0c,0xea]
+          vpermilps %zmm2, %zmm20, %zmm13 {%k1} {z}
+
+// CHECK: vpermilps (%rcx), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0x29]
+          vpermilps (%rcx), %zmm20, %zmm13
+
+// CHECK: vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x32,0x5d,0x40,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpermilps 291(%rax,%r14,8), %zmm20, %zmm13
+
+// CHECK: vpermilps (%rcx){1to16}, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x50,0x0c,0x29]
+          vpermilps (%rcx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps 8128(%rdx), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x7f]
+          vpermilps 8128(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps 8192(%rdx), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0x00,0x20,0x00,0x00]
+          vpermilps 8192(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps -8192(%rdx), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0x6a,0x80]
+          vpermilps -8192(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps -8256(%rdx), %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x40,0x0c,0xaa,0xc0,0xdf,0xff,0xff]
+          vpermilps -8256(%rdx), %zmm20, %zmm13
+
+// CHECK: vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x7f]
+          vpermilps 508(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0x00,0x02,0x00,0x00]
+          vpermilps 512(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x50,0x0c,0x6a,0x80]
+          vpermilps -512(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
+// CHECK:  encoding: [0x62,0x72,0x5d,0x50,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
+          vpermilps -516(%rdx){1to16}, %zmm20, %zmm13
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0xab]
+          vpermilpd $0xab, %zmm4, %zmm19
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x49,0x05,0xdc,0xab]
+          vpermilpd $0xab, %zmm4, %zmm19 {%k1}
+
+// CHECK: vpermilpd $171, %zmm4, %zmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0xc9,0x05,0xdc,0xab]
+          vpermilpd $0xab, %zmm4, %zmm19 {%k1} {z}
+
+// CHECK: vpermilpd $123, %zmm4, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0xdc,0x7b]
+          vpermilpd $0x7b, %zmm4, %zmm19
+
+// CHECK: vpermilpd $123, (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0x19,0x7b]
+          vpermilpd $0x7b, (%rcx), %zmm19
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x05,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 291(%rax,%r14,8), %zmm19
+
+// CHECK: vpermilpd $123, (%rcx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x05,0x19,0x7b]
+          vpermilpd $0x7b, (%rcx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x7f,0x7b]
+          vpermilpd $0x7b, 8128(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 8192(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0x5a,0x80,0x7b]
+          vpermilpd $0x7b, -8192(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x05,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -8256(%rdx), %zmm19
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x7f,0x7b]
+          vpermilpd $0x7b, 1016(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 1024(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x05,0x5a,0x80,0x7b]
+          vpermilpd $0x7b, -1024(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x05,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -1032(%rdx){1to8}, %zmm19
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0xad,0x40,0x0d,0xcd]
+          vpermilpd %zmm21, %zmm26, %zmm1
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
+// CHECK:  encoding: [0x62,0xb2,0xad,0x42,0x0d,0xcd]
+          vpermilpd %zmm21, %zmm26, %zmm1 {%k2}
+
+// CHECK: vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
+// CHECK:  encoding: [0x62,0xb2,0xad,0xc2,0x0d,0xcd]
+          vpermilpd %zmm21, %zmm26, %zmm1 {%k2} {z}
+
+// CHECK: vpermilpd (%rcx), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x40,0x0d,0x09]
+          vpermilpd (%rcx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xb2,0xad,0x40,0x0d,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpermilpd 291(%rax,%r14,8), %zmm26, %zmm1
+
+// CHECK: vpermilpd (%rcx){1to8}, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x50,0x0d,0x09]
+          vpermilpd (%rcx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd 8128(%rdx), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x7f]
+          vpermilpd 8128(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 8192(%rdx), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0x00,0x20,0x00,0x00]
+          vpermilpd 8192(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd -8192(%rdx), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x40,0x0d,0x4a,0x80]
+          vpermilpd -8192(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd -8256(%rdx), %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x40,0x0d,0x8a,0xc0,0xdf,0xff,0xff]
+          vpermilpd -8256(%rdx), %zmm26, %zmm1
+
+// CHECK: vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x7f]
+          vpermilpd 1016(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0x00,0x04,0x00,0x00]
+          vpermilpd 1024(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x50,0x0d,0x4a,0x80]
+          vpermilpd -1024(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xad,0x50,0x0d,0x8a,0xf8,0xfb,0xff,0xff]
+          vpermilpd -1032(%rdx){1to8}, %zmm26, %zmm1
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x48,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x41,0xff,0x4b,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24 {%k3}
+
+// CHECK: vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x41,0xff,0xcb,0xe6,0xc7]
+          vcvtpd2dq %zmm15, %ymm24 {%k3} {z}
+
+// CHECK: vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x18,0xe6,0xc7]
+          vcvtpd2dq {rn-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x58,0xe6,0xc7]
+          vcvtpd2dq {ru-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x38,0xe6,0xc7]
+          vcvtpd2dq {rd-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+// CHECK:  encoding: [0x62,0x41,0xff,0x78,0xe6,0xc7]
+          vcvtpd2dq {rz-sae}, %zmm15, %ymm24
+
+// CHECK: vcvtpd2dq (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x01]
+          vcvtpd2dq (%rcx), %ymm24
+
+// CHECK: vcvtpd2dq 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x21,0xff,0x48,0xe6,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dq 291(%rax,%r14,8), %ymm24
+
+// CHECK: vcvtpd2dq (%rcx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x01]
+          vcvtpd2dq (%rcx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 8128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x7f]
+          vcvtpd2dq 8128(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 8192(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0x00,0x20,0x00,0x00]
+          vcvtpd2dq 8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8192(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x42,0x80]
+          vcvtpd2dq -8192(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq -8256(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0xe6,0x82,0xc0,0xdf,0xff,0xff]
+          vcvtpd2dq -8256(%rdx), %ymm24
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x7f]
+          vcvtpd2dq 1016(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x42,0x80]
+          vcvtpd2dq -1024(%rdx){1to8}, %ymm24
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0xe6,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to8}, %ymm24
+          
+// CHECK: vcvtpd2udq %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x48,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7}
+// CHECK:  encoding: [0x62,0x31,0xfc,0x4f,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15 {%k7}
+
+// CHECK: vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+// CHECK:  encoding: [0x62,0x31,0xfc,0xcf,0x79,0xfb]
+          vcvtpd2udq %zmm19, %ymm15 {%k7} {z}
+
+// CHECK: vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x18,0x79,0xfb]
+          vcvtpd2udq {rn-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x58,0x79,0xfb]
+          vcvtpd2udq {ru-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x38,0x79,0xfb]
+          vcvtpd2udq {rd-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x78,0x79,0xfb]
+          vcvtpd2udq {rz-sae}, %zmm19, %ymm15
+
+// CHECK: vcvtpd2udq (%rcx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x39]
+          vcvtpd2udq (%rcx), %ymm15
+
+// CHECK: vcvtpd2udq 291(%rax,%r14,8), %ymm15
+// CHECK:  encoding: [0x62,0x31,0xfc,0x48,0x79,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2udq 291(%rax,%r14,8), %ymm15
+
+// CHECK: vcvtpd2udq (%rcx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x39]
+          vcvtpd2udq (%rcx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 8128(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x7f]
+          vcvtpd2udq 8128(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 8192(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0x00,0x20,0x00,0x00]
+          vcvtpd2udq 8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8192(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0x7a,0x80]
+          vcvtpd2udq -8192(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq -8256(%rdx), %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x48,0x79,0xba,0xc0,0xdf,0xff,0xff]
+          vcvtpd2udq -8256(%rdx), %ymm15
+
+// CHECK: vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x7f]
+          vcvtpd2udq 1016(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0x00,0x04,0x00,0x00]
+          vcvtpd2udq 1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0x7a,0x80]
+          vcvtpd2udq -1024(%rdx){1to8}, %ymm15
+
+// CHECK: vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+// CHECK:  encoding: [0x62,0x71,0xfc,0x58,0x79,0xba,0xf8,0xfb,0xff,0xff]
+          vcvtpd2udq -1032(%rdx){1to8}, %ymm15
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x48,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x4f,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16 {%k7}
+
+// CHECK: vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfc,0xcf,0x78,0xc4]
+          vcvttpd2udq %zmm20, %ymm16 {%k7} {z}
+
+// CHECK: vcvttpd2udq {sae}, %zmm20, %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x18,0x78,0xc4]
+          vcvttpd2udq {sae}, %zmm20, %ymm16
+
+// CHECK: vcvttpd2udq (%rcx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x01]
+          vcvttpd2udq (%rcx), %ymm16
+
+// CHECK: vcvttpd2udq 291(%rax,%r14,8), %ymm16
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x48,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udq 291(%rax,%r14,8), %ymm16
+
+// CHECK: vcvttpd2udq (%rcx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x01]
+          vcvttpd2udq (%rcx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 8128(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x7f]
+          vcvttpd2udq 8128(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 8192(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0x00,0x20,0x00,0x00]
+          vcvttpd2udq 8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8192(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x42,0x80]
+          vcvttpd2udq -8192(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq -8256(%rdx), %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x48,0x78,0x82,0xc0,0xdf,0xff,0xff]
+          vcvttpd2udq -8256(%rdx), %ymm16
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x7f]
+          vcvttpd2udq 1016(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x42,0x80]
+          vcvttpd2udq -1024(%rdx){1to8}, %ymm16
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to8}, %ymm16
+          
+// CHECK: vcvttpd2dq %zmm9, %ymm27
+// CHECK:  encoding: [0x62,0x41,0xfd,0x48,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5}
+// CHECK:  encoding: [0x62,0x41,0xfd,0x4d,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27 {%k5}
+
+// CHECK: vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x41,0xfd,0xcd,0xe6,0xd9]
+          vcvttpd2dq %zmm9, %ymm27 {%k5} {z}
+
+// CHECK: vcvttpd2dq {sae}, %zmm9, %ymm27
+// CHECK:  encoding: [0x62,0x41,0xfd,0x18,0xe6,0xd9]
+          vcvttpd2dq {sae}, %zmm9, %ymm27
+
+// CHECK: vcvttpd2dq (%rcx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x19]
+          vcvttpd2dq (%rcx), %ymm27
+
+// CHECK: vcvttpd2dq 291(%rax,%r14,8), %ymm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x48,0xe6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2dq 291(%rax,%r14,8), %ymm27
+
+// CHECK: vcvttpd2dq (%rcx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x19]
+          vcvttpd2dq (%rcx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 8128(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x7f]
+          vcvttpd2dq 8128(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 8192(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0x00,0x20,0x00,0x00]
+          vcvttpd2dq 8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8192(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x5a,0x80]
+          vcvttpd2dq -8192(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq -8256(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0xe6,0x9a,0xc0,0xdf,0xff,0xff]
+          vcvttpd2dq -8256(%rdx), %ymm27
+
+// CHECK: vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x7f]
+          vcvttpd2dq 1016(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0x00,0x04,0x00,0x00]
+          vcvttpd2dq 1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x5a,0x80]
+          vcvttpd2dq -1024(%rdx){1to8}, %ymm27
+
+// CHECK: vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x58,0xe6,0x9a,0xf8,0xfb,0xff,0xff]
+          vcvttpd2dq -1032(%rdx){1to8}, %ymm27
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x48,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x4e,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6}
+
+// CHECK: vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xc1,0xb7,0xce,0x5a,0xcc]
+          vcvtsd2ss %xmm12, %xmm9, %xmm17 {%k6} {z}
+
+// CHECK: vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x18,0x5a,0xcc]
+          vcvtsd2ss {rn-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x58,0x5a,0xcc]
+          vcvtsd2ss {ru-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x38,0x5a,0xcc]
+          vcvtsd2ss {rd-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xc1,0xb7,0x78,0x5a,0xcc]
+          vcvtsd2ss {rz-sae}, %xmm12, %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss (%rcx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x09]
+          vcvtsd2ss (%rcx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xb7,0x48,0x5a,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2ss 291(%rax,%r14,8), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x7f]
+          vcvtsd2ss 1016(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0x00,0x04,0x00,0x00]
+          vcvtsd2ss 1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x4a,0x80]
+          vcvtsd2ss -1024(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xb7,0x48,0x5a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtsd2ss -1032(%rdx), %xmm9, %xmm17
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+// CHECK:  encoding: [0x62,0x61,0x4e,0x4b,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3}
+
+// CHECK: vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x61,0x4e,0xcb,0x5a,0xe6]
+          vcvtss2sd %xmm6, %xmm6, %xmm28 {%k3} {z}
+
+// CHECK: vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x18,0x5a,0xe6]
+          vcvtss2sd {sae}, %xmm6, %xmm6, %xmm28
+
+// CHECK: vcvtss2sd (%rcx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x21]
+          vcvtss2sd (%rcx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x21,0x4e,0x48,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2sd 291(%rax,%r14,8), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 508(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x7f]
+          vcvtss2sd 508(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd 512(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0x00,0x02,0x00,0x00]
+          vcvtss2sd 512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -512(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0x62,0x80]
+          vcvtss2sd -512(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtss2sd -516(%rdx), %xmm6, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x4e,0x48,0x5a,0xa2,0xfc,0xfd,0xff,0xff]
+          vcvtss2sd -516(%rdx), %xmm6, %xmm28
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xc7]
+          vcvtsd2si {rn-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xc7]
+          vcvtsd2si {ru-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xc7]
+          vcvtsd2si {rd-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xc7]
+          vcvtsd2si {rz-sae}, %xmm7, %eax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2d,0xef]
+          vcvtsd2si {rn-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x58,0x2d,0xef]
+          vcvtsd2si {ru-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x38,0x2d,0xef]
+          vcvtsd2si {rd-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x78,0x2d,0xef]
+          vcvtsd2si {rz-sae}, %xmm7, %ebp
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x18,0x2d,0xef]
+          vcvtsd2si {rn-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x58,0x2d,0xef]
+          vcvtsd2si {ru-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x38,0x2d,0xef]
+          vcvtsd2si {rd-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm7, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x78,0x2d,0xef]
+          vcvtsd2si {rz-sae}, %xmm7, %r13d
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x18,0x2d,0xc2]
+          vcvtsd2si {rn-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x58,0x2d,0xc2]
+          vcvtsd2si {ru-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x38,0x2d,0xc2]
+          vcvtsd2si {rd-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %rax
+// CHECK:  encoding: [0x62,0xd1,0xff,0x78,0x2d,0xc2]
+          vcvtsd2si {rz-sae}, %xmm10, %rax
+
+// CHECK: vcvtsd2si {rn-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x18,0x2d,0xc2]
+          vcvtsd2si {rn-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {ru-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x58,0x2d,0xc2]
+          vcvtsd2si {ru-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rd-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x38,0x2d,0xc2]
+          vcvtsd2si {rd-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2si {rz-sae}, %xmm10, %r8
+// CHECK:  encoding: [0x62,0x51,0xff,0x78,0x2d,0xc2]
+          vcvtsd2si {rz-sae}, %xmm10, %r8
+
+// CHECK: vcvtsd2usi %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x08,0x79,0xc6]
+          vcvtsd2usi %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x18,0x79,0xc6]
+          vcvtsd2usi {rn-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x58,0x79,0xc6]
+          vcvtsd2usi {ru-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x38,0x79,0xc6]
+          vcvtsd2usi {rd-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7f,0x78,0x79,0xc6]
+          vcvtsd2usi {rz-sae}, %xmm30, %eax
+
+// CHECK: vcvtsd2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %eax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvtsd2usi %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x08,0x79,0xee]
+          vcvtsd2usi %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x18,0x79,0xee]
+          vcvtsd2usi {rn-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x58,0x79,0xee]
+          vcvtsd2usi {ru-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x38,0x79,0xee]
+          vcvtsd2usi {rd-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7f,0x78,0x79,0xee]
+          vcvtsd2usi {rz-sae}, %xmm30, %ebp
+
+// CHECK: vcvtsd2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x29]
+          vcvtsd2usi (%rcx), %ebp
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtsd2usi 1016(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x7f]
+          vcvtsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvtsd2usi 1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0x6a,0x80]
+          vcvtsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvtsd2usi -1032(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvtsd2usi %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x08,0x79,0xee]
+          vcvtsd2usi %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x18,0x79,0xee]
+          vcvtsd2usi {rn-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x58,0x79,0xee]
+          vcvtsd2usi {ru-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x38,0x79,0xee]
+          vcvtsd2usi {rd-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm30, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7f,0x78,0x79,0xee]
+          vcvtsd2usi {rz-sae}, %xmm30, %r13d
+
+// CHECK: vcvtsd2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x29]
+          vcvtsd2usi (%rcx), %r13d
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x7f]
+          vcvtsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0x6a,0x80]
+          vcvtsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x79,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvtsd2usi %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x79,0xc2]
+          vcvtsd2usi %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x18,0x79,0xc2]
+          vcvtsd2usi {rn-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x58,0x79,0xc2]
+          vcvtsd2usi {ru-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x38,0x79,0xc2]
+          vcvtsd2usi {rd-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x78,0x79,0xc2]
+          vcvtsd2usi {rz-sae}, %xmm18, %rax
+
+// CHECK: vcvtsd2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %rax
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtsd2usi 1016(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvtsd2usi 1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvtsd2usi -1032(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvtsd2usi %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x79,0xc2]
+          vcvtsd2usi %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rn-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x18,0x79,0xc2]
+          vcvtsd2usi {rn-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {ru-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x58,0x79,0xc2]
+          vcvtsd2usi {ru-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rd-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x38,0x79,0xc2]
+          vcvtsd2usi {rd-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi {rz-sae}, %xmm18, %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x78,0x79,0xc2]
+          vcvtsd2usi {rz-sae}, %xmm18, %r8
+
+// CHECK: vcvtsd2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x01]
+          vcvtsd2usi (%rcx), %r8
+
+// CHECK: vcvtsd2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtsd2usi 1016(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x7f]
+          vcvtsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvtsd2usi 1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0x00,0x04,0x00,0x00]
+          vcvtsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x42,0x80]
+          vcvtsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvtsd2usi -1032(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x79,0x82,0xf8,0xfb,0xff,0xff]
+          vcvtsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xc6]
+          vcvtss2si {rn-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xc6]
+          vcvtss2si {ru-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xc6]
+          vcvtss2si {rd-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xc6]
+          vcvtss2si {rz-sae}, %xmm22, %eax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x2d,0xee]
+          vcvtss2si {rn-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x58,0x2d,0xee]
+          vcvtss2si {ru-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x38,0x2d,0xee]
+          vcvtss2si {rd-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x78,0x2d,0xee]
+          vcvtss2si {rz-sae}, %xmm22, %ebp
+
+// CHECK: vcvtss2si {rn-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x18,0x2d,0xee]
+          vcvtss2si {rn-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {ru-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x58,0x2d,0xee]
+          vcvtss2si {ru-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rd-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x38,0x2d,0xee]
+          vcvtss2si {rd-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rz-sae}, %xmm22, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x78,0x2d,0xee]
+          vcvtss2si {rz-sae}, %xmm22, %r13d
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x18,0x2d,0xc5]
+          vcvtss2si {rn-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x58,0x2d,0xc5]
+          vcvtss2si {ru-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x38,0x2d,0xc5]
+          vcvtss2si {rd-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x78,0x2d,0xc5]
+          vcvtss2si {rz-sae}, %xmm29, %rax
+
+// CHECK: vcvtss2si {rn-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x18,0x2d,0xc5]
+          vcvtss2si {rn-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {ru-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x58,0x2d,0xc5]
+          vcvtss2si {ru-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rd-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x38,0x2d,0xc5]
+          vcvtss2si {rd-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2si {rz-sae}, %xmm29, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x78,0x2d,0xc5]
+          vcvtss2si {rz-sae}, %xmm29, %r8
+
+// CHECK: vcvtss2usi %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x08,0x79,0xc4]
+          vcvtss2usi %xmm28, %eax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x18,0x79,0xc4]
+          vcvtss2usi {rn-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x58,0x79,0xc4]
+          vcvtss2usi {ru-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x38,0x79,0xc4]
+          vcvtss2usi {rd-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7e,0x78,0x79,0xc4]
+          vcvtss2usi {rz-sae}, %xmm28, %eax
+
+// CHECK: vcvtss2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %eax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvtss2usi 508(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %eax
+
+// CHECK: vcvtss2usi 512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %eax
+
+// CHECK: vcvtss2usi -512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %eax
+
+// CHECK: vcvtss2usi -516(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %eax
+
+// CHECK: vcvtss2usi %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x08,0x79,0xec]
+          vcvtss2usi %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x18,0x79,0xec]
+          vcvtss2usi {rn-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x58,0x79,0xec]
+          vcvtss2usi {ru-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x38,0x79,0xec]
+          vcvtss2usi {rd-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x91,0x7e,0x78,0x79,0xec]
+          vcvtss2usi {rz-sae}, %xmm28, %ebp
+
+// CHECK: vcvtss2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x29]
+          vcvtss2usi (%rcx), %ebp
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvtss2usi 508(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x7f]
+          vcvtss2usi 508(%rdx), %ebp
+
+// CHECK: vcvtss2usi 512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0x6a,0x80]
+          vcvtss2usi -512(%rdx), %ebp
+
+// CHECK: vcvtss2usi -516(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %ebp
+
+// CHECK: vcvtss2usi %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x08,0x79,0xec]
+          vcvtss2usi %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x18,0x79,0xec]
+          vcvtss2usi {rn-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x58,0x79,0xec]
+          vcvtss2usi {ru-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x38,0x79,0xec]
+          vcvtss2usi {rd-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x11,0x7e,0x78,0x79,0xec]
+          vcvtss2usi {rz-sae}, %xmm28, %r13d
+
+// CHECK: vcvtss2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x29]
+          vcvtss2usi (%rcx), %r13d
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x79,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvtss2usi 508(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x7f]
+          vcvtss2usi 508(%rdx), %r13d
+
+// CHECK: vcvtss2usi 512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0x6a,0x80]
+          vcvtss2usi -512(%rdx), %r13d
+
+// CHECK: vcvtss2usi -516(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x79,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %r13d
+
+// CHECK: vcvtss2usi %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x79,0xc7]
+          vcvtss2usi %xmm23, %rax
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x18,0x79,0xc7]
+          vcvtss2usi {rn-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x58,0x79,0xc7]
+          vcvtss2usi {ru-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x38,0x79,0xc7]
+          vcvtss2usi {rd-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x78,0x79,0xc7]
+          vcvtss2usi {rz-sae}, %xmm23, %rax
+
+// CHECK: vcvtss2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %rax
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvtss2usi 508(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %rax
+
+// CHECK: vcvtss2usi 512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %rax
+
+// CHECK: vcvtss2usi -512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %rax
+
+// CHECK: vcvtss2usi -516(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %rax
+
+// CHECK: vcvtss2usi %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x79,0xc7]
+          vcvtss2usi %xmm23, %r8
+
+// CHECK: vcvtss2usi {rn-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x18,0x79,0xc7]
+          vcvtss2usi {rn-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {ru-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x58,0x79,0xc7]
+          vcvtss2usi {ru-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rd-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x38,0x79,0xc7]
+          vcvtss2usi {rd-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi {rz-sae}, %xmm23, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x78,0x79,0xc7]
+          vcvtss2usi {rz-sae}, %xmm23, %r8
+
+// CHECK: vcvtss2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x01]
+          vcvtss2usi (%rcx), %r8
+
+// CHECK: vcvtss2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x79,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvtss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvtss2usi 508(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x7f]
+          vcvtss2usi 508(%rdx), %r8
+
+// CHECK: vcvtss2usi 512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0x00,0x02,0x00,0x00]
+          vcvtss2usi 512(%rdx), %r8
+
+// CHECK: vcvtss2usi -512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x42,0x80]
+          vcvtss2usi -512(%rdx), %r8
+
+// CHECK: vcvtss2usi -516(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x79,0x82,0xfc,0xfd,0xff,0xff]
+          vcvtss2usi -516(%rdx), %r8
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xc3]
+          vcvttsd2si {sae}, %xmm3, %eax
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x18,0x2c,0xeb]
+          vcvttsd2si {sae}, %xmm3, %ebp
+
+// CHECK: vcvttsd2si {sae}, %xmm3, %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x18,0x2c,0xeb]
+          vcvttsd2si {sae}, %xmm3, %r13d
+
+// CHECK: vcvttsd2si {sae}, %xmm1, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x18,0x2c,0xc1]
+          vcvttsd2si {sae}, %xmm1, %rax
+
+// CHECK: vcvttsd2usi %xmm21, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xc5]
+          vcvttsd2usi %xmm21, %eax
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x18,0x78,0xc5]
+          vcvttsd2usi {sae}, %xmm21, %eax
+
+// CHECK: vcvttsd2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %eax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %eax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %eax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %eax
+
+// CHECK: vcvttsd2usi %xmm21, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xed]
+          vcvttsd2usi %xmm21, %ebp
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x18,0x78,0xed]
+          vcvttsd2usi {sae}, %xmm21, %ebp
+
+// CHECK: vcvttsd2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x29]
+          vcvttsd2usi (%rcx), %ebp
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttsd2usi 1016(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x7f]
+          vcvttsd2usi 1016(%rdx), %ebp
+
+// CHECK: vcvttsd2usi 1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1024(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0x6a,0x80]
+          vcvttsd2usi -1024(%rdx), %ebp
+
+// CHECK: vcvttsd2usi -1032(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %ebp
+
+// CHECK: vcvttsd2usi %xmm21, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x78,0xed]
+          vcvttsd2usi %xmm21, %r13d
+
+// CHECK: vcvttsd2usi {sae}, %xmm21, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x18,0x78,0xed]
+          vcvttsd2usi {sae}, %xmm21, %r13d
+
+// CHECK: vcvttsd2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x29]
+          vcvttsd2usi (%rcx), %r13d
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7f,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x7f]
+          vcvttsd2usi 1016(%rdx), %r13d
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0x6a,0x80]
+          vcvttsd2usi -1024(%rdx), %r13d
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7f,0x08,0x78,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %r13d
+
+// CHECK: vcvttsd2usi %xmm7, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0xc7]
+          vcvttsd2usi %xmm7, %rax
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x18,0x78,0xc7]
+          vcvttsd2usi {sae}, %xmm7, %rax
+
+// CHECK: vcvttsd2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %rax
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttsd2usi 1016(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %rax
+
+// CHECK: vcvttsd2usi 1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1024(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %rax
+
+// CHECK: vcvttsd2usi -1032(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %rax
+
+// CHECK: vcvttsd2usi %xmm7, %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0xc7]
+          vcvttsd2usi %xmm7, %r8
+
+// CHECK: vcvttsd2usi {sae}, %xmm7, %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x18,0x78,0xc7]
+          vcvttsd2usi {sae}, %xmm7, %r8
+
+// CHECK: vcvttsd2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x01]
+          vcvttsd2usi (%rcx), %r8
+
+// CHECK: vcvttsd2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xff,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttsd2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttsd2usi 1016(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x7f]
+          vcvttsd2usi 1016(%rdx), %r8
+
+// CHECK: vcvttsd2usi 1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0x00,0x04,0x00,0x00]
+          vcvttsd2usi 1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1024(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x42,0x80]
+          vcvttsd2usi -1024(%rdx), %r8
+
+// CHECK: vcvttsd2usi -1032(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xff,0x08,0x78,0x82,0xf8,0xfb,0xff,0xff]
+          vcvttsd2usi -1032(%rdx), %r8
+
+// CHECK: vcvttss2si {sae}, %xmm14, %eax
+// CHECK:  encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xc6]
+          vcvttss2si {sae}, %xmm14, %eax
+
+// CHECK: vcvttss2si {sae}, %xmm14, %ebp
+// CHECK:  encoding: [0x62,0xd1,0x7e,0x18,0x2c,0xee]
+          vcvttss2si {sae}, %xmm14, %ebp
+
+// CHECK: vcvttss2si {sae}, %xmm14, %r13d
+// CHECK:  encoding: [0x62,0x51,0x7e,0x18,0x2c,0xee]
+          vcvttss2si {sae}, %xmm14, %r13d
+
+// CHECK: vcvttss2si {sae}, %xmm21, %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x18,0x2c,0xc5]
+          vcvttss2si {sae}, %xmm21, %rax
+
+// CHECK: vcvttss2si {sae}, %xmm21, %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x18,0x2c,0xc5]
+          vcvttss2si {sae}, %xmm21, %r8
+
+// CHECK: vcvttss2usi %xmm18, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xc2]
+          vcvttss2usi %xmm18, %eax
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x78,0xc2]
+          vcvttss2usi {sae}, %xmm18, %eax
+
+// CHECK: vcvttss2usi (%rcx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %eax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %eax
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %eax
+
+// CHECK: vcvttss2usi 508(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %eax
+
+// CHECK: vcvttss2usi 512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %eax
+
+// CHECK: vcvttss2usi -512(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %eax
+
+// CHECK: vcvttss2usi -516(%rdx), %eax
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %eax
+
+// CHECK: vcvttss2usi %xmm18, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xea]
+          vcvttss2usi %xmm18, %ebp
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x18,0x78,0xea]
+          vcvttss2usi {sae}, %xmm18, %ebp
+
+// CHECK: vcvttss2usi (%rcx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x29]
+          vcvttss2usi (%rcx), %ebp
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %ebp
+// CHECK:  encoding: [0x62,0xb1,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %ebp
+
+// CHECK: vcvttss2usi 508(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x7f]
+          vcvttss2usi 508(%rdx), %ebp
+
+// CHECK: vcvttss2usi 512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -512(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0x6a,0x80]
+          vcvttss2usi -512(%rdx), %ebp
+
+// CHECK: vcvttss2usi -516(%rdx), %ebp
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %ebp
+
+// CHECK: vcvttss2usi %xmm18, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x78,0xea]
+          vcvttss2usi %xmm18, %r13d
+
+// CHECK: vcvttss2usi {sae}, %xmm18, %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x18,0x78,0xea]
+          vcvttss2usi {sae}, %xmm18, %r13d
+
+// CHECK: vcvttss2usi (%rcx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x29]
+          vcvttss2usi (%rcx), %r13d
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r13d
+// CHECK:  encoding: [0x62,0x31,0x7e,0x08,0x78,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %r13d
+
+// CHECK: vcvttss2usi 508(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x7f]
+          vcvttss2usi 508(%rdx), %r13d
+
+// CHECK: vcvttss2usi 512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -512(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0x6a,0x80]
+          vcvttss2usi -512(%rdx), %r13d
+
+// CHECK: vcvttss2usi -516(%rdx), %r13d
+// CHECK:  encoding: [0x62,0x71,0x7e,0x08,0x78,0xaa,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %r13d
+
+// CHECK: vcvttss2usi %xmm27, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x08,0x78,0xc3]
+          vcvttss2usi %xmm27, %rax
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %rax
+// CHECK:  encoding: [0x62,0x91,0xfe,0x18,0x78,0xc3]
+          vcvttss2usi {sae}, %xmm27, %rax
+
+// CHECK: vcvttss2usi (%rcx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %rax
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %rax
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %rax
+
+// CHECK: vcvttss2usi 508(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %rax
+
+// CHECK: vcvttss2usi 512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %rax
+
+// CHECK: vcvttss2usi -512(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %rax
+
+// CHECK: vcvttss2usi -516(%rdx), %rax
+// CHECK:  encoding: [0x62,0xf1,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %rax
+
+// CHECK: vcvttss2usi %xmm27, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x08,0x78,0xc3]
+          vcvttss2usi %xmm27, %r8
+
+// CHECK: vcvttss2usi {sae}, %xmm27, %r8
+// CHECK:  encoding: [0x62,0x11,0xfe,0x18,0x78,0xc3]
+          vcvttss2usi {sae}, %xmm27, %r8
+
+// CHECK: vcvttss2usi (%rcx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x01]
+          vcvttss2usi (%rcx), %r8
+
+// CHECK: vcvttss2usi 291(%rax,%r14,8), %r8
+// CHECK:  encoding: [0x62,0x31,0xfe,0x08,0x78,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vcvttss2usi 291(%rax,%r14,8), %r8
+
+// CHECK: vcvttss2usi 508(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x7f]
+          vcvttss2usi 508(%rdx), %r8
+
+// CHECK: vcvttss2usi 512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0x00,0x02,0x00,0x00]
+          vcvttss2usi 512(%rdx), %r8
+
+// CHECK: vcvttss2usi -512(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x42,0x80]
+          vcvttss2usi -512(%rdx), %r8
+
+// CHECK: vcvttss2usi -516(%rdx), %r8
+// CHECK:  encoding: [0x62,0x71,0xfe,0x08,0x78,0x82,0xfc,0xfd,0xff,0xff]
+          vcvttss2usi -516(%rdx), %r8
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x42,0xcd,0x08,0x4f,0xd2]
+          vrsqrt14sd %xmm10, %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
+// CHECK:  encoding: [0x62,0x42,0xcd,0x0d,0x4f,0xd2]
+          vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5}
+
+// CHECK: vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x42,0xcd,0x8d,0x4f,0xd2]
+          vrsqrt14sd %xmm10, %xmm6, %xmm26 {%k5} {z}
+
+// CHECK: vrsqrt14sd (%rcx), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x08,0x4f,0x11]
+          vrsqrt14sd (%rcx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xcd,0x08,0x4f,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14sd 291(%rax,%r14,8), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x7f]
+          vrsqrt14sd 1016(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0x00,0x04,0x00,0x00]
+          vrsqrt14sd 1024(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x08,0x4f,0x52,0x80]
+          vrsqrt14sd -1024(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xcd,0x08,0x4f,0x92,0xf8,0xfb,0xff,0xff]
+          vrsqrt14sd -1032(%rdx), %xmm6, %xmm26
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x52,0x0d,0x08,0x4f,0xf1]
+          vrsqrt14ss %xmm9, %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
+// CHECK:  encoding: [0x62,0x52,0x0d,0x09,0x4f,0xf1]
+          vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1}
+
+// CHECK: vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
+// CHECK:  encoding: [0x62,0x52,0x0d,0x89,0x4f,0xf1]
+          vrsqrt14ss %xmm9, %xmm14, %xmm14 {%k1} {z}
+
+// CHECK: vrsqrt14ss (%rcx), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x0d,0x08,0x4f,0x31]
+          vrsqrt14ss (%rcx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x32,0x0d,0x08,0x4f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vrsqrt14ss 291(%rax,%r14,8), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 508(%rdx), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x7f]
+          vrsqrt14ss 508(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss 512(%rdx), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0x00,0x02,0x00,0x00]
+          vrsqrt14ss 512(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss -512(%rdx), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x0d,0x08,0x4f,0x72,0x80]
+          vrsqrt14ss -512(%rdx), %xmm14, %xmm14
+
+// CHECK: vrsqrt14ss -516(%rdx), %xmm14, %xmm14
+// CHECK:  encoding: [0x62,0x72,0x0d,0x08,0x4f,0xb2,0xfc,0xfd,0xff,0xff]
+          vrsqrt14ss -516(%rdx), %xmm14, %xmm14
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x52,0xcd,0x00,0x4d,0xe6]
+          vrcp14sd %xmm14, %xmm22, %xmm12
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
+// CHECK:  encoding: [0x62,0x52,0xcd,0x02,0x4d,0xe6]
+          vrcp14sd %xmm14, %xmm22, %xmm12 {%k2}
+
+// CHECK: vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
+// CHECK:  encoding: [0x62,0x52,0xcd,0x82,0x4d,0xe6]
+          vrcp14sd %xmm14, %xmm22, %xmm12 {%k2} {z}
+
+// CHECK: vrcp14sd (%rcx), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x72,0xcd,0x00,0x4d,0x21]
+          vrcp14sd (%rcx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x32,0xcd,0x00,0x4d,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14sd 291(%rax,%r14,8), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 1016(%rdx), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x7f]
+          vrcp14sd 1016(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd 1024(%rdx), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0x00,0x04,0x00,0x00]
+          vrcp14sd 1024(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd -1024(%rdx), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x72,0xcd,0x00,0x4d,0x62,0x80]
+          vrcp14sd -1024(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14sd -1032(%rdx), %xmm22, %xmm12
+// CHECK:  encoding: [0x62,0x72,0xcd,0x00,0x4d,0xa2,0xf8,0xfb,0xff,0xff]
+          vrcp14sd -1032(%rdx), %xmm22, %xmm12
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0xc3]
+          vrcp14ss %xmm3, %xmm8, %xmm8
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
+// CHECK:  encoding: [0x62,0x72,0x3d,0x0f,0x4d,0xc3]
+          vrcp14ss %xmm3, %xmm8, %xmm8 {%k7}
+
+// CHECK: vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
+// CHECK:  encoding: [0x62,0x72,0x3d,0x8f,0x4d,0xc3]
+          vrcp14ss %xmm3, %xmm8, %xmm8 {%k7} {z}
+
+// CHECK: vrcp14ss (%rcx), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0x01]
+          vrcp14ss (%rcx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x32,0x3d,0x08,0x4d,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vrcp14ss 291(%rax,%r14,8), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 508(%rdx), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x7f]
+          vrcp14ss 508(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss 512(%rdx), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0x00,0x02,0x00,0x00]
+          vrcp14ss 512(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss -512(%rdx), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0x42,0x80]
+          vrcp14ss -512(%rdx), %xmm8, %xmm8
+
+// CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8
+// CHECK:  encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff]
+          vrcp14ss -516(%rdx), %xmm8, %xmm8
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0xab]
+          vpternlogd $0xab, %zmm20, %zmm14, %zmm12
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7}
+// CHECK:  encoding: [0x62,0x33,0x0d,0x4f,0x25,0xe4,0xab]
+          vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7}
+
+// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7} {z}
+// CHECK:  encoding: [0x62,0x33,0x0d,0xcf,0x25,0xe4,0xab]
+          vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7} {z}
+
+// CHECK: vpternlogd $123, %zmm20, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0x7b]
+          vpternlogd $0x7b, %zmm20, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, (%rcx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x48,0x25,0x21,0x7b]
+          vpternlogd $0x7b, (%rcx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x33,0x0d,0x48,0x25,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 291(%rax,%r14,8), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, (%rcx){1to16}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x58,0x25,0x21,0x7b]
+          vpternlogd $0x7b, (%rcx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 8128(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x7f,0x7b]
+          vpternlogd $0x7b, 8128(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 8192(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -8192(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x80,0x7b]
+          vpternlogd $0x7b, -8192(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -8256(%rdx), %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -8256(%rdx), %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 508(%rdx){1to16}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x7f,0x7b]
+          vpternlogd $0x7b, 508(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, 512(%rdx){1to16}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 512(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -512(%rdx){1to16}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x80,0x7b]
+          vpternlogd $0x7b, -512(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogd $123, -516(%rdx){1to16}, %zmm14, %zmm12
+// CHECK:  encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -516(%rdx){1to16}, %zmm14, %zmm12
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0xab]
+          vpternlogq $0xab, %zmm21, %zmm2, %zmm15
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3}
+// CHECK:  encoding: [0x62,0x33,0xed,0x4b,0x25,0xfd,0xab]
+          vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3}
+
+// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3} {z}
+// CHECK:  encoding: [0x62,0x33,0xed,0xcb,0x25,0xfd,0xab]
+          vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3} {z}
+
+// CHECK: vpternlogq $123, %zmm21, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0x7b]
+          vpternlogq $0x7b, %zmm21, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, (%rcx), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x48,0x25,0x39,0x7b]
+          vpternlogq $0x7b, (%rcx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x33,0xed,0x48,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 291(%rax,%r14,8), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, (%rcx){1to8}, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0x39,0x7b]
+          vpternlogq $0x7b, (%rcx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 8128(%rdx), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x7f,0x7b]
+          vpternlogq $0x7b, 8128(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 8192(%rdx), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0x00,0x20,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 8192(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -8192(%rdx), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x80,0x7b]
+          vpternlogq $0x7b, -8192(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -8256(%rdx), %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -8256(%rdx), %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to8}, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x7f,0x7b]
+          vpternlogq $0x7b, 1016(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to8}, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0x00,0x04,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 1024(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to8}, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x80,0x7b]
+          vpternlogq $0x7b, -1024(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to8}, %zmm2, %zmm15
+// CHECK:  encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15
+
+// CHECK: vpbroadcastd (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x4a,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xca,0x58,0x11]
+          vpbroadcastd (%rcx), %zmm26 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x58,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpbroadcastd 508(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x7f]
+          vpbroadcastd 508(%rdx), %zmm26
+
+// CHECK: vpbroadcastd 512(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -512(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x80]
+          vpbroadcastd -512(%rdx), %zmm26
+
+// CHECK: vpbroadcastd -516(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %zmm26
+
+// CHECK: vpbroadcastd %xmm22, %zmm10
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7}
+// CHECK:  encoding: [0x62,0x32,0x7d,0x4f,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10 {%k7}
+
+// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+// CHECK:  encoding: [0x62,0x32,0x7d,0xcf,0x58,0xd6]
+          vpbroadcastd %xmm22, %zmm10 {%k7} {z}
+
+// CHECK: vpbroadcastd %eax, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6}
+// CHECK:  encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6}
+
+// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z}
+// CHECK:  encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8]
+          vpbroadcastd %eax, %zmm11 {%k6} {z}
+
+// CHECK: vpbroadcastd %ebp, %zmm11
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %ebp, %zmm11
+
+// CHECK: vpbroadcastd %r13d, %zmm11
+// CHECK:  encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd]
+          vpbroadcastd %r13d, %zmm11
+
+// CHECK: vpbroadcastq (%rcx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x4a,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25 {%k2}
+
+// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0xca,0x59,0x09]
+          vpbroadcastq (%rcx), %zmm25 {%k2} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %zmm25
+// CHECK:  encoding: [0x62,0x22,0xfd,0x48,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastq 1016(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x7f]
+          vpbroadcastq 1016(%rdx), %zmm25
+
+// CHECK: vpbroadcastq 1024(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1024(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x80]
+          vpbroadcastq -1024(%rdx), %zmm25
+
+// CHECK: vpbroadcastq -1032(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %zmm25
+
+// CHECK: vpbroadcastq %xmm5, %zmm3
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x4d,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3 {%k5}
+
+// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0xcd,0x59,0xdd]
+          vpbroadcastq %xmm5, %zmm3 {%k5} {z}
+
+// CHECK: vpbroadcastq %rax, %zmm1
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6}
+
+// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8]
+          vpbroadcastq %rax, %zmm1 {%k6} {z}
+
+// CHECK: vpbroadcastq %r8, %zmm1
+// CHECK:  encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8]
+          vpbroadcastq %r8, %zmm1
+
+// CHECK: vcvtph2ps %ymm27, %zmm13
+// CHECK:  encoding: [0x62,0x12,0x7d,0x48,0x13,0xeb]
+          vcvtph2ps %ymm27, %zmm13
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3}
+// CHECK:  encoding: [0x62,0x12,0x7d,0x4b,0x13,0xeb]
+          vcvtph2ps %ymm27, %zmm13 {%k3}
+
+// CHECK: vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+// CHECK:  encoding: [0x62,0x12,0x7d,0xcb,0x13,0xeb]
+          vcvtph2ps %ymm27, %zmm13 {%k3} {z}
+
+// CHECK: vcvtph2ps {sae}, %ymm27, %zmm13
+// CHECK:  encoding: [0x62,0x12,0x7d,0x18,0x13,0xeb]
+          vcvtph2ps {sae}, %ymm27, %zmm13
+
+// CHECK: vcvtph2ps (%rcx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x13,0x29]
+          vcvtph2ps (%rcx), %zmm13
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %zmm13
+// CHECK:  encoding: [0x62,0x32,0x7d,0x48,0x13,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vcvtph2ps 291(%rax,%r14,8), %zmm13
+
+// CHECK: vcvtph2ps 4064(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x7f]
+          vcvtph2ps 4064(%rdx), %zmm13
+
+// CHECK: vcvtph2ps 4096(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0x00,0x10,0x00,0x00]
+          vcvtph2ps 4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4096(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x13,0x6a,0x80]
+          vcvtph2ps -4096(%rdx), %zmm13
+
+// CHECK: vcvtph2ps -4128(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x72,0x7d,0x48,0x13,0xaa,0xe0,0xef,0xff,0xff]
+          vcvtph2ps -4128(%rdx), %zmm13
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11
+// CHECK:  encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0xab]
+          vcvtps2ph $0xab, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6}
+// CHECK:  encoding: [0x62,0x53,0x7d,0x4e,0x1d,0xf3,0xab]
+          vcvtps2ph $0xab, %zmm14, %ymm11 {%k6}
+
+// CHECK: vcvtps2ph $171, %zmm14, %ymm11 {%k6} {z}
+// CHECK:  encoding: [0x62,0x53,0x7d,0xce,0x1d,0xf3,0xab]
+          vcvtps2ph $0xab, %zmm14, %ymm11 {%k6} {z}
+
+// CHECK: vcvtps2ph $171, {sae}, %zmm14, %ymm11
+// CHECK:  encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0xab]
+          vcvtps2ph $0xab,{sae}, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $123, %zmm14, %ymm11
+// CHECK:  encoding: [0x62,0x53,0x7d,0x48,0x1d,0xf3,0x7b]
+          vcvtps2ph $0x7b, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $123, {sae}, %zmm14, %ymm11
+// CHECK:  encoding: [0x62,0x53,0x7d,0x18,0x1d,0xf3,0x7b]
+          vcvtps2ph $0x7b,{sae}, %zmm14, %ymm11
+
+// CHECK: vcvtps2ph $171, %zmm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0xab]
+          vcvtps2ph $0xab, %zmm19, (%rcx)
+
+// CHECK: vcvtps2ph $171, %zmm19, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x4e,0x1d,0x19,0xab]
+          vcvtps2ph $0xab, %zmm19, (%rcx) {%k6}
+
+// CHECK: vcvtps2ph $123, %zmm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x19,0x7b]
+          vcvtps2ph $0x7b, %zmm19, (%rcx)
+
+// CHECK: vcvtps2ph $123, %zmm19, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %zmm19, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %zmm19, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x7f,0x7b]
+          vcvtps2ph $0x7b, %zmm19, 4064(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %zmm19, 4096(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x5a,0x80,0x7b]
+          vcvtps2ph $0x7b, %zmm19, -4096(%rdx)
+
+// CHECK: vcvtps2ph $123, %zmm19, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1d,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vcvtps2ph $0x7b, %zmm19, -4128(%rdx)
+
+// CHECK: vmovq  %rax, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x6e,0xe8]
+          vmovq  %rax, %xmm29
+
+// CHECK: vmovq  %r8, %xmm29
+// CHECK:  encoding: [0x62,0x41,0xfd,0x08,0x6e,0xe8]
+          vmovq  %r8, %xmm29
+
+// CHECK: vmovq  (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x7e,0x29]
+          vmovq  (%rcx), %xmm29
+
+// CHECK: vmovq  291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x21,0xfe,0x08,0x7e,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovq  291(%rax,%r14,8), %xmm29
+
+// CHECK: vmovq  1016(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x7e,0x6a,0x7f]
+          vmovq  1016(%rdx), %xmm29
+
+// CHECK: vmovq  1024(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x7e,0xaa,0x00,0x04,0x00,0x00]
+          vmovq  1024(%rdx), %xmm29
+
+// CHECK: vmovq  -1024(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x7e,0x6a,0x80]
+          vmovq  -1024(%rdx), %xmm29
+
+// CHECK: vmovq  -1032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x61,0xfe,0x08,0x7e,0xaa,0xf8,0xfb,0xff,0xff]
+          vmovq  -1032(%rdx), %xmm29
+
+// CHECK: vmovq	%xmm17, (%rcx)          
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x09]
+          vmovq	%xmm17, (%rcx)          
+
+// CHECK: vmovq	%xmm17, 291(%rax,%r14,8) 
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0xd6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovq	%xmm17, 291(%rax,%r14,8) 
+
+// CHECK: vmovq	%xmm17, 1016(%rdx)      
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x4a,0x7f]
+          vmovq	%xmm17, 1016(%rdx)      
+
+// CHECK: vmovq	%xmm17, 1024(%rdx)      
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x8a,0x00,0x04,0x00,0x00]
+          vmovq	%xmm17, 1024(%rdx)      
+
+// CHECK: vmovq	%xmm17, -1024(%rdx)     
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x4a,0x80]
+          vmovq	%xmm17, -1024(%rdx)     
+
+// CHECK: vmovq	%xmm17, -1032(%rdx)     
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x8a,0xf8,0xfb,0xff,0xff]
+          vmovq	%xmm17, -1032(%rdx)     
+
+// CHECK: vmovq	%xmm3, %xmm24           
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0xc3]
+          vmovq	%xmm3, %xmm24           
+
+// CHECK: vmovq	(%rcx), %xmm24          
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x01]
+          vmovq	(%rcx), %xmm24          
+
+// CHECK: vmovq	291(%rax,%r14,8), %xmm24 
+// CHECK: encoding: [0x62,0x21,0xfe,0x08,0x7e,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovq	291(%rax,%r14,8), %xmm24 
+
+// CHECK: vmovq	1016(%rdx), %xmm24      
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x42,0x7f]
+          vmovq	1016(%rdx), %xmm24      
+
+// CHECK: vmovq	1024(%rdx), %xmm24      
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x82,0x00,0x04,0x00,0x00]
+          vmovq	1024(%rdx), %xmm24      
+
+// CHECK: vmovq	-1024(%rdx), %xmm24     
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x42,0x80]
+          vmovq	-1024(%rdx), %xmm24     
+
+// CHECK: vmovq	-1032(%rdx), %xmm24     
+// CHECK: encoding: [0x62,0x61,0xfe,0x08,0x7e,0x82,0xf8,0xfb,0xff,0xff]
+          vmovq	-1032(%rdx), %xmm24     
+
+// CHECK: vmovq	%xmm19, (%rcx)          
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x19]
+          vmovq	%xmm19, (%rcx)          
+
+// CHECK: vmovq	%xmm19, 291(%rax,%r14,8) 
+// CHECK: encoding: [0x62,0xa1,0xfd,0x08,0xd6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovq	%xmm19, 291(%rax,%r14,8) 
+
+// CHECK: vmovq	%xmm19, 1016(%rdx)      
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x5a,0x7f]
+          vmovq	%xmm19, 1016(%rdx)      
+
+// CHECK: vmovq	%xmm19, 1024(%rdx)      
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x9a,0x00,0x04,0x00,0x00]
+          vmovq	%xmm19, 1024(%rdx)      
+
+// CHECK: vmovq	%xmm19, -1024(%rdx)     
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x5a,0x80]
+          vmovq	%xmm19, -1024(%rdx)     
+
+// CHECK: vmovq	%xmm19, -1032(%rdx)     
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0xd6,0x9a,0xf8,0xfb,0xff,0xff]
+          vmovq	%xmm19, -1032(%rdx)     
+
+// CHECK: vmovq  %xmm27, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xd8]
+          vmovq  %xmm27, %rax
+
+// CHECK: vmovq  %xmm27, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xd8]
+          vmovq  %xmm27, %rax
+
+// CHECK: vmovq  %xmm27, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xd8]
+          vmovq  %xmm27, %r8
+
+// CHECK: vmovq  %xmm27, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xd8]
+          vmovq  %xmm27, %r8
+
+// CHECK: vmovq  %xmm22, %rax
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xf0]
+          vmovq  %xmm22, %rax
+
+// CHECK: vmovq  %xmm22, %rax
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xf0]
+          vmovq  %xmm22, %rax
+
+// CHECK: vmovq  %xmm22, %r8
+// CHECK:  encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xf0]
+          vmovq  %xmm22, %r8
+
+// CHECK: vmovq  %xmm22, %r8
+// CHECK:  encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xf0]
+          vmovq  %xmm22, %r8
+
+// CHECK: vmovq  %xmm29, %xmm29
+// CHECK: encoding: [0x62,0x01,0xfe,0x08,0x7e,0xed]
+          vmovq  %xmm29, %xmm29
+
+// CHECK: vmovq  %xmm25, %xmm8
+// CHECK: encoding: [0x62,0x11,0xfe,0x08,0x7e,0xc1]
+          vmovq  %xmm25, %xmm8
+
+// CHECK: vmovq  %xmm29, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xe8]
+          vmovq  %xmm29, %rax
+
+// CHECK: vmovq  %xmm29, %rax
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x7e,0xe8]
+          vmovq  %xmm29, %rax
+
+// CHECK: vmovq  %xmm29, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xe8]
+          vmovq  %xmm29, %r8
+
+// CHECK: vmovq  %xmm29, %r8
+// CHECK: encoding: [0x62,0x41,0xfd,0x08,0x7e,0xe8]
+          vmovq  %xmm29, %r8
+
+// CHECK: vmovq  %xmm20, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xe0]
+          vmovq  %xmm20, %rax
+
+// CHECK: vmovq  %xmm20, %rax
+// CHECK: encoding: [0x62,0xe1,0xfd,0x08,0x7e,0xe0]
+          vmovq  %xmm20, %rax
+
+// CHECK: vmovq  %xmm20, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xe0]
+          vmovq  %xmm20, %r8
+
+// CHECK: vmovq  %xmm20, %r8
+// CHECK: encoding: [0x62,0xc1,0xfd,0x08,0x7e,0xe0]
+          vmovq  %xmm20, %r8
+
+// CHECK: vmovq  %xmm14, %xmm25
+// CHECK:  encoding: [0x62,0x41,0xfe,0x08,0x7e,0xce]
+          vmovq  %xmm14, %xmm25
+
+// CHECK: vmovq  %xmm24, %xmm12
+// CHECK:  encoding: [0x62,0x11,0xfe,0x08,0x7e,0xe0]
+          vmovq  %xmm24, %xmm12
+
+// CHECK: vmovd  %xmm5, %eax
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xe8]
+          vmovd  %xmm5, %eax
+
+// CHECK: vmovd  %xmm5, %eax
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xe8]
+          vmovd  %xmm5, %eax
+
+// CHECK: vmovd  %xmm5, %ebp
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xed]
+          vmovd  %xmm5, %ebp
+
+// CHECK: vmovd  %xmm5, %ebp
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xed]
+          vmovd  %xmm5, %ebp
+
+// CHECK: vmovd  %xmm5, %r13d
+// CHECK:  encoding: [0xc4,0xc1,0x79,0x7e,0xed]
+          vmovd  %xmm5, %r13d
+
+// CHECK: vmovd  %xmm5, %r13d
+// CHECK:  encoding: [0xc4,0xc1,0x79,0x7e,0xed]
+          vmovd  %xmm5, %r13d
+
+// CHECK: vmovd  %xmm28, %eax
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe0]
+          vmovd  %xmm28, %eax
+
+// CHECK: vmovd  %xmm28, %eax
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe0]
+          vmovd  %xmm28, %eax
+
+// CHECK: vmovd  %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe5]
+          vmovd  %xmm28, %ebp
+
+// CHECK: vmovd  %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x7e,0xe5]
+          vmovd  %xmm28, %ebp
+
+// CHECK: vmovd  %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x41,0x7d,0x08,0x7e,0xe5]
+          vmovd  %xmm28, %r13d
+
+// CHECK: vmovd  %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x41,0x7d,0x08,0x7e,0xe5]
+          vmovd  %xmm28, %r13d
+
+// CHECK: vmovd  %xmm14, %eax
+// CHECK:  encoding: [0xc5,0x79,0x7e,0xf0]
+          vmovd  %xmm14, %eax
+
+// CHECK: vmovd  %xmm14, %eax
+// CHECK:  encoding: [0xc5,0x79,0x7e,0xf0]
+          vmovd  %xmm14, %eax
+
+// CHECK: vmovd  %xmm14, %ebp
+// CHECK:  encoding: [0xc5,0x79,0x7e,0xf5]
+          vmovd  %xmm14, %ebp
+
+// CHECK: vmovd  %xmm14, %ebp
+// CHECK:  encoding: [0xc5,0x79,0x7e,0xf5]
+          vmovd  %xmm14, %ebp
+
+// CHECK: vmovd  %xmm14, %r13d
+// CHECK:  encoding: [0xc4,0x41,0x79,0x7e,0xf5]
+          vmovd  %xmm14, %r13d
+
+// CHECK: vmovd  %xmm14, %r13d
+// CHECK:  encoding: [0xc4,0x41,0x79,0x7e,0xf5]
+          vmovd  %xmm14, %r13d
+
+// CHECK: vmovd  %xmm1, %eax
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xc8]
+          vmovd  %xmm1, %eax
+
+// CHECK: vmovd  %xmm1, %eax
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xc8]
+          vmovd  %xmm1, %eax
+
+// CHECK: vmovd  %xmm1, %ebp
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xcd]
+          vmovd  %xmm1, %ebp
+
+// CHECK: vmovd  %xmm1, %ebp
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xcd]
+          vmovd  %xmm1, %ebp
+
+// CHECK: vmovd  %xmm1, %r13d
+// CHECK:  encoding: [0xc4,0xc1,0x79,0x7e,0xcd]
+          vmovd  %xmm1, %r13d
+
+// CHECK: vmovd  %xmm1, %r13d
+// CHECK:  encoding: [0xc4,0xc1,0x79,0x7e,0xcd]
+          vmovd  %xmm1, %r13d
+
+// CHECK: vmovd  %eax, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0xd0]
+          vmovd  %eax, %xmm26
+
+// CHECK: vmovd  %ebp, %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0xd5]
+          vmovd  %ebp, %xmm26
+
+// CHECK: vmovd  %r13d, %xmm26
+// CHECK:  encoding: [0x62,0x41,0x7d,0x08,0x6e,0xd5]
+          vmovd  %r13d, %xmm26
+
+// CHECK: vmovd  (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0x11]
+          vmovd  (%rcx), %xmm26
+
+// CHECK: vmovd  291(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x21,0x7d,0x08,0x6e,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovd  291(%rax,%r14,8), %xmm26
+
+// CHECK: vmovd  508(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0x52,0x7f]
+          vmovd  508(%rdx), %xmm26
+
+// CHECK: vmovd  512(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0x92,0x00,0x02,0x00,0x00]
+          vmovd  512(%rdx), %xmm26
+
+// CHECK: vmovd  -512(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0x52,0x80]
+          vmovd  -512(%rdx), %xmm26
+
+// CHECK: vmovd  -516(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0x7d,0x08,0x6e,0x92,0xfc,0xfd,0xff,0xff]
+          vmovd  -516(%rdx), %xmm26
+
+// CHECK: vmovd  %xmm5, (%rcx)
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0x29]
+          vmovd  %xmm5, (%rcx)
+
+// CHECK: vmovd  %xmm5, 291(%rax,%r14,8)
+// CHECK:  encoding: [0xc4,0xa1,0x79,0x7e,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovd  %xmm5, 291(%rax,%r14,8)
+
+// CHECK: vmovd  %xmm5, 508(%rdx)
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0x01,0x00,0x00]
+          vmovd  %xmm5, 508(%rdx)
+
+// CHECK: vmovd  %xmm5, 512(%rdx)
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xaa,0x00,0x02,0x00,0x00]
+          vmovd  %xmm5, 512(%rdx)
+
+// CHECK: vmovd  %xmm5, -512(%rdx)
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xaa,0x00,0xfe,0xff,0xff]
+          vmovd  %xmm5, -512(%rdx)
+
+// CHECK: vmovd  %xmm5, -516(%rdx)
+// CHECK:  encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
+          vmovd  %xmm5, -516(%rdx)
+
+// CHECK: vmovshdup %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0x81,0x7e,0x48,0x16,0xc3]
+          vmovshdup %zmm27, %zmm16
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4}
+// CHECK:  encoding: [0x62,0x81,0x7e,0x4c,0x16,0xc3]
+          vmovshdup %zmm27, %zmm16 {%k4}
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0x7e,0xcc,0x16,0xc3]
+          vmovshdup %zmm27, %zmm16 {%k4} {z}
+
+// CHECK: vmovshdup (%rcx), %zmm16
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x16,0x01]
+          vmovshdup (%rcx), %zmm16
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %zmm16
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x16,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovshdup 291(%rax,%r14,8), %zmm16
+
+// CHECK: vmovshdup 8128(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x7f]
+          vmovshdup 8128(%rdx), %zmm16
+
+// CHECK: vmovshdup 8192(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0x00,0x20,0x00,0x00]
+          vmovshdup 8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8192(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x80]
+          vmovshdup -8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8256(%rdx), %zmm16
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0xc0,0xdf,0xff,0xff]
+          vmovshdup -8256(%rdx), %zmm16
+
+// CHECK: vmovsldup %zmm14, %zmm13
+// CHECK:  encoding: [0x62,0x51,0x7e,0x48,0x12,0xee]
+          vmovsldup %zmm14, %zmm13
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6}
+// CHECK:  encoding: [0x62,0x51,0x7e,0x4e,0x12,0xee]
+          vmovsldup %zmm14, %zmm13 {%k6}
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6} {z}
+// CHECK:  encoding: [0x62,0x51,0x7e,0xce,0x12,0xee]
+          vmovsldup %zmm14, %zmm13 {%k6} {z}
+
+// CHECK: vmovsldup (%rcx), %zmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x12,0x29]
+          vmovsldup (%rcx), %zmm13
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %zmm13
+// CHECK:  encoding: [0x62,0x31,0x7e,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovsldup 291(%rax,%r14,8), %zmm13
+
+// CHECK: vmovsldup 8128(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x7f]
+          vmovsldup 8128(%rdx), %zmm13
+
+// CHECK: vmovsldup 8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
+          vmovsldup 8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8192(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x80]
+          vmovsldup -8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8256(%rdx), %zmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovsldup -8256(%rdx), %zmm13
+
+// CHECK: vmovlps (%rcx), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
+          vmovlps (%rcx), %xmm20, %xmm7
+
+// CHECK: vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xb1,0x5c,0x00,0x12,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovlps 291(%rax,%r14,8), %xmm20, %xmm7
+
+// CHECK: vmovlps 1016(%rdx), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x7f]
+          vmovlps 1016(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps 1024(%rdx), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0x00,0x04,0x00,0x00]
+          vmovlps 1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1024(%rdx), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xf1,0x5c,0x00,0x12,0x7a,0x80]
+          vmovlps -1024(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps -1032(%rdx), %xmm20, %xmm7
+// CHECK:  encoding: [0x62,0xf1,0x5c,0x00,0x12,0xba,0xf8,0xfb,0xff,0xff]
+          vmovlps -1032(%rdx), %xmm20, %xmm7
+
+// CHECK: vmovlps %xmm27, (%rcx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x13,0x19]
+          vmovlps %xmm27, (%rcx)
+
+// CHECK: vmovlps %xmm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovlps %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vmovlps %xmm27, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x7f]
+          vmovlps %xmm27, 1016(%rdx)
+
+// CHECK: vmovlps %xmm27, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+          vmovlps %xmm27, 1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x13,0x5a,0x80]
+          vmovlps %xmm27, -1024(%rdx)
+
+// CHECK: vmovlps %xmm27, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x61,0x7c,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+          vmovlps %xmm27, -1032(%rdx)
+
+// CHECK: vmovlpd (%rcx), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xcd,0x08,0x12,0x29]
+          vmovlpd (%rcx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x21,0xcd,0x08,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovlpd 291(%rax,%r14,8), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1016(%rdx), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x7f]
+          vmovlpd 1016(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd 1024(%rdx), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0x00,0x04,0x00,0x00]
+          vmovlpd 1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1024(%rdx), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xcd,0x08,0x12,0x6a,0x80]
+          vmovlpd -1024(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd -1032(%rdx), %xmm6, %xmm29
+// CHECK:  encoding: [0x62,0x61,0xcd,0x08,0x12,0xaa,0xf8,0xfb,0xff,0xff]
+          vmovlpd -1032(%rdx), %xmm6, %xmm29
+
+// CHECK: vmovlpd	%xmm25, (%rcx)          
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x09]
+          vmovlpd	%xmm25, (%rcx)          
+
+// CHECK: vmovlpd	%xmm25, 291(%rax,%r14,8) 
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x13,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovlpd	%xmm25, 291(%rax,%r14,8) 
+
+// CHECK: vmovlpd	%xmm25, 1016(%rdx)      
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x7f]
+          vmovlpd	%xmm25, 1016(%rdx)      
+
+// CHECK: vmovlpd	%xmm25, 1024(%rdx)      
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0x00,0x04,0x00,0x00]
+          vmovlpd	%xmm25, 1024(%rdx)      
+
+// CHECK: vmovlpd	%xmm25, -1024(%rdx)     
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x4a,0x80]
+          vmovlpd	%xmm25, -1024(%rdx)     
+
+// CHECK: vmovlpd	%xmm25, -1032(%rdx)     
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x13,0x8a,0xf8,0xfb,0xff,0xff]
+          vmovlpd	%xmm25, -1032(%rdx)     
+
+// CHECK: vmovhps (%rcx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x74,0x00,0x16,0x21]
+          vmovhps (%rcx), %xmm17, %xmm20
+
+// CHECK: vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0x74,0x00,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vmovhps 291(%rax,%r14,8), %xmm17, %xmm20
+
+// CHECK: vmovhps 1016(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x7f]
+          vmovhps 1016(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps 1024(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0x00,0x04,0x00,0x00]
+          vmovhps 1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1024(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x74,0x00,0x16,0x62,0x80]
+          vmovhps -1024(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps -1032(%rdx), %xmm17, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0x74,0x00,0x16,0xa2,0xf8,0xfb,0xff,0xff]
+          vmovhps -1032(%rdx), %xmm17, %xmm20
+
+// CHECK: vmovhps %xmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x17,0x11]
+          vmovhps %xmm18, (%rcx)
+
+// CHECK: vmovhps %xmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x17,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovhps %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vmovhps %xmm18, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x7f]
+          vmovhps %xmm18, 1016(%rdx)
+
+// CHECK: vmovhps %xmm18, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0x00,0x04,0x00,0x00]
+          vmovhps %xmm18, 1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x17,0x52,0x80]
+          vmovhps %xmm18, -1024(%rdx)
+
+// CHECK: vmovhps %xmm18, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x17,0x92,0xf8,0xfb,0xff,0xff]
+          vmovhps %xmm18, -1032(%rdx)
+
+// CHECK: vmovhpd (%rcx), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0x16,0x19]
+          vmovhpd (%rcx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0x9d,0x00,0x16,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vmovhpd 291(%rax,%r14,8), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1016(%rdx), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x7f]
+          vmovhpd 1016(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd 1024(%rdx), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0x00,0x04,0x00,0x00]
+          vmovhpd 1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1024(%rdx), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0x16,0x5a,0x80]
+          vmovhpd -1024(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd -1032(%rdx), %xmm28, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0x9d,0x00,0x16,0x9a,0xf8,0xfb,0xff,0xff]
+          vmovhpd -1032(%rdx), %xmm28, %xmm19
+
+// CHECK: vmovhpd	%xmm25, (%rcx)          
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x09]
+          vmovhpd	%xmm25, (%rcx)          
+
+// CHECK: vmovhpd	%xmm25, 291(%rax,%r14,8) 
+// CHECK: encoding: [0x62,0x21,0xfd,0x08,0x17,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovhpd	%xmm25, 291(%rax,%r14,8) 
+
+// CHECK: vmovhpd	%xmm25, 1016(%rdx)      
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x7f]
+          vmovhpd	%xmm25, 1016(%rdx)      
+
+// CHECK: vmovhpd	%xmm25, 1024(%rdx)      
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0x00,0x04,0x00,0x00]
+          vmovhpd	%xmm25, 1024(%rdx)      
+
+// CHECK: vmovhpd	%xmm25, -1024(%rdx)     
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x4a,0x80]
+          vmovhpd	%xmm25, -1024(%rdx)     
+
+// CHECK: vmovhpd	%xmm25, -1032(%rdx)     
+// CHECK: encoding: [0x62,0x61,0xfd,0x08,0x17,0x8a,0xf8,0xfb,0xff,0xff]
+          vmovhpd	%xmm25, -1032(%rdx)     
+
+// CHECK: vmovddup %zmm29, %zmm5
+// CHECK:  encoding: [0x62,0x91,0xff,0x48,0x12,0xed]
+          vmovddup %zmm29, %zmm5
+
+// CHECK: vmovddup %zmm29, %zmm5 {%k4}
+// CHECK:  encoding: [0x62,0x91,0xff,0x4c,0x12,0xed]
+          vmovddup %zmm29, %zmm5 {%k4}
+
+// CHECK: vmovddup %zmm29, %zmm5 {%k4} {z}
+// CHECK:  encoding: [0x62,0x91,0xff,0xcc,0x12,0xed]
+          vmovddup %zmm29, %zmm5 {%k4} {z}
+
+// CHECK: vmovddup (%rcx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0xff,0x48,0x12,0x29]
+          vmovddup (%rcx), %zmm5
+
+// CHECK: vmovddup 291(%rax,%r14,8), %zmm5
+// CHECK:  encoding: [0x62,0xb1,0xff,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vmovddup 291(%rax,%r14,8), %zmm5
+
+// CHECK: vmovddup 8128(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x7f]
+          vmovddup 8128(%rdx), %zmm5
+
+// CHECK: vmovddup 8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
+          vmovddup 8192(%rdx), %zmm5
+
+// CHECK: vmovddup -8192(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0xff,0x48,0x12,0x6a,0x80]
+          vmovddup -8192(%rdx), %zmm5
+
+// CHECK: vmovddup -8256(%rdx), %zmm5
+// CHECK:  encoding: [0x62,0xf1,0xff,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
+          vmovddup -8256(%rdx), %zmm5
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0x31,0xcf,0x00,0x11,0xfd]
+          vmovsd.s %xmm15, %xmm22, %xmm21
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21 {%k7}
+// CHECK:  encoding: [0x62,0x31,0xcf,0x07,0x11,0xfd]
+          vmovsd.s %xmm15, %xmm22, %xmm21 {%k7}
+
+// CHECK: vmovsd.s %xmm15, %xmm22, %xmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0x31,0xcf,0x87,0x11,0xfd]
+          vmovsd.s %xmm15, %xmm22, %xmm21 {%k7} {z}
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm23
+// CHECK:  encoding: [0x62,0x31,0x97,0x08,0x11,0xc7]
+          vmovsd.s %xmm8, %xmm13, %xmm23
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm3 {%k5}
+// CHECK:  encoding: [0x62,0x71,0x97,0x0d,0x11,0xc3]
+          vmovsd.s %xmm8, %xmm13, %xmm3 {%k5}
+
+// CHECK: vmovsd.s %xmm8, %xmm13, %xmm3 {%k5} {z}
+// CHECK:  encoding: [0x62,0x71,0x97,0x8d,0x11,0xc3]
+          vmovsd.s %xmm8, %xmm13, %xmm3 {%k5} {z}
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm24
+// CHECK:  encoding: [0x62,0x91,0x87,0x08,0x11,0xe0]
+          vmovsd.s %xmm4, %xmm15, %xmm24
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm4 {%k6}
+// CHECK:  encoding: [0x62,0xf1,0x87,0x0e,0x11,0xe4]
+          vmovsd.s %xmm4, %xmm15, %xmm4 {%k6}
+
+// CHECK: vmovsd.s %xmm4, %xmm15, %xmm4 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf1,0x87,0x8e,0x11,0xe4]
+          vmovsd.s %xmm4, %xmm15, %xmm4 {%k6} {z}
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20
+// CHECK:  encoding: [0x62,0x31,0xef,0x08,0x11,0xf4]
+          vmovsd.s %xmm14, %xmm2, %xmm20
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0x31,0xef,0x0f,0x11,0xf4]
+          vmovsd.s %xmm14, %xmm2, %xmm20 {%k7}
+
+// CHECK: vmovsd.s %xmm14, %xmm2, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0x31,0xef,0x8f,0x11,0xf4]
+          vmovsd.s %xmm14, %xmm2, %xmm20 {%k7} {z}
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xb1,0x26,0x00,0x11,0xd1]
+          vmovss.s %xmm2, %xmm27, %xmm17
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0xb1,0x26,0x02,0x11,0xd1]
+          vmovss.s %xmm2, %xmm27, %xmm17 {%k2}
+
+// CHECK: vmovss.s %xmm2, %xmm27, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0xb1,0x26,0x82,0x11,0xd1]
+          vmovss.s %xmm2, %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10
+// CHECK:  encoding: [0x62,0xc1,0x66,0x00,0x11,0xfa]
+          vmovss.s %xmm23, %xmm19, %xmm10
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10 {%k3}
+// CHECK:  encoding: [0x62,0xc1,0x66,0x03,0x11,0xfa]
+          vmovss.s %xmm23, %xmm19, %xmm10 {%k3}
+
+// CHECK: vmovss.s %xmm23, %xmm19, %xmm10 {%k3} {z}
+// CHECK:  encoding: [0x62,0xc1,0x66,0x83,0x11,0xfa]
+          vmovss.s %xmm23, %xmm19, %xmm10 {%k3} {z}
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x26,0x08,0x11,0xdd]
+          vmovss.s %xmm19, %xmm11, %xmm21
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x26,0x0b,0x11,0xdd]
+          vmovss.s %xmm19, %xmm11, %xmm21 {%k3}
+
+// CHECK: vmovss.s %xmm19, %xmm11, %xmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x26,0x8b,0x11,0xdd]
+          vmovss.s %xmm19, %xmm11, %xmm21 {%k3} {z}
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15
+// CHECK:  encoding: [0x62,0x41,0x26,0x00,0x11,0xc7]
+          vmovss.s %xmm24, %xmm27, %xmm15
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15 {%k2}
+// CHECK:  encoding: [0x62,0x41,0x26,0x02,0x11,0xc7]
+          vmovss.s %xmm24, %xmm27, %xmm15 {%k2}
+
+// CHECK: vmovss.s %xmm24, %xmm27, %xmm15 {%k2} {z}
+// CHECK:  encoding: [0x62,0x41,0x26,0x82,0x11,0xc7]
+          vmovss.s %xmm24, %xmm27, %xmm15 {%k2} {z}
+
+// CHECK: vmovapd.s %zmm29, %zmm13
+// CHECK:  encoding: [0x62,0x41,0xfd,0x48,0x29,0xed]
+          vmovapd.s %zmm29, %zmm13
+
+// CHECK: vmovapd.s %zmm29, %zmm13 {%k4}
+// CHECK:  encoding: [0x62,0x41,0xfd,0x4c,0x29,0xed]
+          vmovapd.s %zmm29, %zmm13 {%k4}
+
+// CHECK: vmovapd.s %zmm29, %zmm13 {%k4} {z}
+// CHECK:  encoding: [0x62,0x41,0xfd,0xcc,0x29,0xed]
+          vmovapd.s %zmm29, %zmm13 {%k4} {z}
+
+// CHECK: vmovapd.s %zmm1, %zmm17
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x48,0x29,0xc9]
+          vmovapd.s %zmm1, %zmm17
+
+// CHECK: vmovapd.s %zmm1, %zmm17 {%k5}
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x4d,0x29,0xc9]
+          vmovapd.s %zmm1, %zmm17 {%k5}
+
+// CHECK: vmovapd.s %zmm1, %zmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xb1,0xfd,0xcd,0x29,0xc9]
+          vmovapd.s %zmm1, %zmm17 {%k5} {z}
+
+// CHECK: vmovapd.s %zmm16, %zmm26
+// CHECK:  encoding: [0x62,0x81,0xfd,0x48,0x29,0xc2]
+          vmovapd.s %zmm16, %zmm26
+
+// CHECK: vmovapd.s %zmm16, %zmm26 {%k1}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x49,0x29,0xc2]
+          vmovapd.s %zmm16, %zmm26 {%k1}
+
+// CHECK: vmovapd.s %zmm16, %zmm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0xfd,0xc9,0x29,0xc2]
+          vmovapd.s %zmm16, %zmm26 {%k1} {z}
+
+// CHECK: vmovapd.s %zmm7, %zmm4
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x48,0x29,0xfc]
+          vmovapd.s %zmm7, %zmm4
+
+// CHECK: vmovapd.s %zmm7, %zmm4 {%k5}
+// CHECK:  encoding: [0x62,0xf1,0xfd,0x4d,0x29,0xfc]
+          vmovapd.s %zmm7, %zmm4 {%k5}
+
+// CHECK: vmovapd.s %zmm7, %zmm4 {%k5} {z}
+// CHECK:  encoding: [0x62,0xf1,0xfd,0xcd,0x29,0xfc]
+          vmovapd.s %zmm7, %zmm4 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm6, %zmm2
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x29,0xf2]
+          vmovaps.s %zmm6, %zmm2
+
+// CHECK: vmovaps.s %zmm6, %zmm2 {%k7}
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x4f,0x29,0xf2]
+          vmovaps.s %zmm6, %zmm2 {%k7}
+
+// CHECK: vmovaps.s %zmm6, %zmm2 {%k7} {z}
+// CHECK:  encoding: [0x62,0xf1,0x7c,0xcf,0x29,0xf2]
+          vmovaps.s %zmm6, %zmm2 {%k7} {z}
+
+// CHECK: vmovaps.s %zmm2, %zmm8
+// CHECK:  encoding: [0x62,0xd1,0x7c,0x48,0x29,0xd0]
+          vmovaps.s %zmm2, %zmm8
+
+// CHECK: vmovaps.s %zmm2, %zmm8 {%k5}
+// CHECK:  encoding: [0x62,0xd1,0x7c,0x4d,0x29,0xd0]
+          vmovaps.s %zmm2, %zmm8 {%k5}
+
+// CHECK: vmovaps.s %zmm2, %zmm8 {%k5} {z}
+// CHECK:  encoding: [0x62,0xd1,0x7c,0xcd,0x29,0xd0]
+          vmovaps.s %zmm2, %zmm8 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm1, %zmm21
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x48,0x29,0xcd]
+          vmovaps.s %zmm1, %zmm21
+
+// CHECK: vmovaps.s %zmm1, %zmm21 {%k5}
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x4d,0x29,0xcd]
+          vmovaps.s %zmm1, %zmm21 {%k5}
+
+// CHECK: vmovaps.s %zmm1, %zmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0xb1,0x7c,0xcd,0x29,0xcd]
+          vmovaps.s %zmm1, %zmm21 {%k5} {z}
+
+// CHECK: vmovaps.s %zmm12, %zmm30
+// CHECK:  encoding: [0x62,0x11,0x7c,0x48,0x29,0xe6]
+          vmovaps.s %zmm12, %zmm30
+
+// CHECK: vmovaps.s %zmm12, %zmm30 {%k3}
+// CHECK:  encoding: [0x62,0x11,0x7c,0x4b,0x29,0xe6]
+          vmovaps.s %zmm12, %zmm30 {%k3}
+
+// CHECK: vmovaps.s %zmm12, %zmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x11,0x7c,0xcb,0x29,0xe6]
+          vmovaps.s %zmm12, %zmm30 {%k3} {z}
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x48,0x7f,0xcc]
+          vmovdqa32.s %zmm17, %zmm4
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4 {%k4}
+// CHECK:  encoding: [0x62,0xe1,0x7d,0x4c,0x7f,0xcc]
+          vmovdqa32.s %zmm17, %zmm4 {%k4}
+
+// CHECK: vmovdqa32.s %zmm17, %zmm4 {%k4} {z}
+// CHECK:  encoding: [0x62,0xe1,0x7d,0xcc,0x7f,0xcc]
+          vmovdqa32.s %zmm17, %zmm4 {%k4} {z}
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x48,0x7f,0xca]
+          vmovdqa32.s %zmm1, %zmm18
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18 {%k1}
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x49,0x7f,0xca]
+          vmovdqa32.s %zmm1, %zmm18 {%k1}
+
+// CHECK: vmovdqa32.s %zmm1, %zmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xb1,0x7d,0xc9,0x7f,0xca]
+          vmovdqa32.s %zmm1, %zmm18 {%k1} {z}
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14
+// CHECK:  encoding: [0x62,0x41,0x7d,0x48,0x7f,0xe6]
+          vmovdqa32.s %zmm28, %zmm14
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14 {%k5}
+// CHECK:  encoding: [0x62,0x41,0x7d,0x4d,0x7f,0xe6]
+          vmovdqa32.s %zmm28, %zmm14 {%k5}
+
+// CHECK: vmovdqa32.s %zmm28, %zmm14 {%k5} {z}
+// CHECK:  encoding: [0x62,0x41,0x7d,0xcd,0x7f,0xe6]
+          vmovdqa32.s %zmm28, %zmm14 {%k5} {z}
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10
+// CHECK:  encoding: [0x62,0x41,0x7d,0x48,0x7f,0xc2]
+          vmovdqa32.s %zmm24, %zmm10
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10 {%k1}
+// CHECK:  encoding: [0x62,0x41,0x7d,0x49,0x7f,0xc2]
+          vmovdqa32.s %zmm24, %zmm10 {%k1}
+
+// CHECK: vmovdqa32.s %zmm24, %zmm10 {%k1} {z}
+// CHECK:  encoding: [0x62,0x41,0x7d,0xc9,0x7f,0xc2]
+          vmovdqa32.s %zmm24, %zmm10 {%k1} {z}
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18
+// CHECK:  encoding: [0x62,0x21,0xfd,0x48,0x7f,0xca]
+          vmovdqa64.s %zmm25, %zmm18
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18 {%k7}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x4f,0x7f,0xca]
+          vmovdqa64.s %zmm25, %zmm18 {%k7}
+
+// CHECK: vmovdqa64.s %zmm25, %zmm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0xcf,0x7f,0xca]
+          vmovdqa64.s %zmm25, %zmm18 {%k7} {z}
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x48,0x7f,0xd5]
+          vmovdqa64.s %zmm18, %zmm21
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x4e,0x7f,0xd5]
+          vmovdqa64.s %zmm18, %zmm21 {%k6}
+
+// CHECK: vmovdqa64.s %zmm18, %zmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xce,0x7f,0xd5]
+          vmovdqa64.s %zmm18, %zmm21 {%k6} {z}
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4
+// CHECK:  encoding: [0x62,0x71,0xfd,0x48,0x7f,0xf4]
+          vmovdqa64.s %zmm14, %zmm4
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4 {%k3}
+// CHECK:  encoding: [0x62,0x71,0xfd,0x4b,0x7f,0xf4]
+          vmovdqa64.s %zmm14, %zmm4 {%k3}
+
+// CHECK: vmovdqa64.s %zmm14, %zmm4 {%k3} {z}
+// CHECK:  encoding: [0x62,0x71,0xfd,0xcb,0x7f,0xf4]
+          vmovdqa64.s %zmm14, %zmm4 {%k3} {z}
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x48,0x7f,0xfd]
+          vmovdqa64.s %zmm7, %zmm21
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x4f,0x7f,0xfd]
+          vmovdqa64.s %zmm7, %zmm21 {%k7}
+
+// CHECK: vmovdqa64.s %zmm7, %zmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xb1,0xfd,0xcf,0x7f,0xfd]
+          vmovdqa64.s %zmm7, %zmm21 {%k7} {z}
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10
+// CHECK:  encoding: [0x62,0xc1,0x7e,0x48,0x7f,0xda]
+          vmovdqu32.s %zmm19, %zmm10
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10 {%k4}
+// CHECK:  encoding: [0x62,0xc1,0x7e,0x4c,0x7f,0xda]
+          vmovdqu32.s %zmm19, %zmm10 {%k4}
+
+// CHECK: vmovdqu32.s %zmm19, %zmm10 {%k4} {z}
+// CHECK:  encoding: [0x62,0xc1,0x7e,0xcc,0x7f,0xda]
+          vmovdqu32.s %zmm19, %zmm10 {%k4} {z}
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x7f,0xc2]
+          vmovdqu32.s %zmm16, %zmm18
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x4b,0x7f,0xc2]
+          vmovdqu32.s %zmm16, %zmm18 {%k3}
+
+// CHECK: vmovdqu32.s %zmm16, %zmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0xcb,0x7f,0xc2]
+          vmovdqu32.s %zmm16, %zmm18 {%k3} {z}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7
+// CHECK:  encoding: [0x62,0x71,0x7e,0x48,0x7f,0xcf]
+          vmovdqu32.s %zmm9, %zmm7
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7 {%k4}
+// CHECK:  encoding: [0x62,0x71,0x7e,0x4c,0x7f,0xcf]
+          vmovdqu32.s %zmm9, %zmm7 {%k4}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm7 {%k4} {z}
+// CHECK:  encoding: [0x62,0x71,0x7e,0xcc,0x7f,0xcf]
+          vmovdqu32.s %zmm9, %zmm7 {%k4} {z}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13
+// CHECK:  encoding: [0x62,0x51,0x7e,0x48,0x7f,0xcd]
+          vmovdqu32.s %zmm9, %zmm13
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13 {%k4}
+// CHECK:  encoding: [0x62,0x51,0x7e,0x4c,0x7f,0xcd]
+          vmovdqu32.s %zmm9, %zmm13 {%k4}
+
+// CHECK: vmovdqu32.s %zmm9, %zmm13 {%k4} {z}
+// CHECK:  encoding: [0x62,0x51,0x7e,0xcc,0x7f,0xcd]
+          vmovdqu32.s %zmm9, %zmm13 {%k4} {z}
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21
+// CHECK:  encoding: [0x62,0x21,0xfe,0x48,0x7f,0xdd]
+          vmovdqu64.s %zmm27, %zmm21
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x4a,0x7f,0xdd]
+          vmovdqu64.s %zmm27, %zmm21 {%k2}
+
+// CHECK: vmovdqu64.s %zmm27, %zmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0xca,0x7f,0xdd]
+          vmovdqu64.s %zmm27, %zmm21 {%k2} {z}
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12
+// CHECK:  encoding: [0x62,0x41,0xfe,0x48,0x7f,0xcc]
+          vmovdqu64.s %zmm25, %zmm12
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12 {%k3}
+// CHECK:  encoding: [0x62,0x41,0xfe,0x4b,0x7f,0xcc]
+          vmovdqu64.s %zmm25, %zmm12 {%k3}
+
+// CHECK: vmovdqu64.s %zmm25, %zmm12 {%k3} {z}
+// CHECK:  encoding: [0x62,0x41,0xfe,0xcb,0x7f,0xcc]
+          vmovdqu64.s %zmm25, %zmm12 {%k3} {z}
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x48,0x7f,0xfb]
+          vmovdqu64.s %zmm7, %zmm19
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xb1,0xfe,0x4f,0x7f,0xfb]
+          vmovdqu64.s %zmm7, %zmm19 {%k7}
+
+// CHECK: vmovdqu64.s %zmm7, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xb1,0xfe,0xcf,0x7f,0xfb]
+          vmovdqu64.s %zmm7, %zmm19 {%k7} {z}
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17
+// CHECK:  encoding: [0x62,0x21,0xfe,0x48,0x7f,0xc1]
+          vmovdqu64.s %zmm24, %zmm17
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17 {%k1}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x49,0x7f,0xc1]
+          vmovdqu64.s %zmm24, %zmm17 {%k1}
+
+// CHECK: vmovdqu64.s %zmm24, %zmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0xc9,0x7f,0xc1]
+          vmovdqu64.s %zmm24, %zmm17 {%k1} {z}
+
+// CHECK: vmovupd.s %zmm17, %zmm3
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x48,0x11,0xcb]
+          vmovupd.s %zmm17, %zmm3
+
+// CHECK: vmovupd.s %zmm17, %zmm3 {%k6}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x4e,0x11,0xcb]
+          vmovupd.s %zmm17, %zmm3 {%k6}
+
+// CHECK: vmovupd.s %zmm17, %zmm3 {%k6} {z}
+// CHECK:  encoding: [0x62,0xe1,0xfd,0xce,0x11,0xcb]
+          vmovupd.s %zmm17, %zmm3 {%k6} {z}
+
+// CHECK: vmovupd.s %zmm26, %zmm7
+// CHECK:  encoding: [0x62,0x61,0xfd,0x48,0x11,0xd7]
+          vmovupd.s %zmm26, %zmm7
+
+// CHECK: vmovupd.s %zmm26, %zmm7 {%k5}
+// CHECK:  encoding: [0x62,0x61,0xfd,0x4d,0x11,0xd7]
+          vmovupd.s %zmm26, %zmm7 {%k5}
+
+// CHECK: vmovupd.s %zmm26, %zmm7 {%k5} {z}
+// CHECK:  encoding: [0x62,0x61,0xfd,0xcd,0x11,0xd7]
+          vmovupd.s %zmm26, %zmm7 {%k5} {z}
+
+// CHECK: vmovupd.s %zmm3, %zmm15
+// CHECK:  encoding: [0x62,0xd1,0xfd,0x48,0x11,0xdf]
+          vmovupd.s %zmm3, %zmm15
+
+// CHECK: vmovupd.s %zmm3, %zmm15 {%k1}
+// CHECK:  encoding: [0x62,0xd1,0xfd,0x49,0x11,0xdf]
+          vmovupd.s %zmm3, %zmm15 {%k1}
+
+// CHECK: vmovupd.s %zmm3, %zmm15 {%k1} {z}
+// CHECK:  encoding: [0x62,0xd1,0xfd,0xc9,0x11,0xdf]
+          vmovupd.s %zmm3, %zmm15 {%k1} {z}
+
+// CHECK: vmovupd.s %zmm8, %zmm19
+// CHECK:  encoding: [0x62,0x31,0xfd,0x48,0x11,0xc3]
+          vmovupd.s %zmm8, %zmm19
+
+// CHECK: vmovupd.s %zmm8, %zmm19 {%k1}
+// CHECK:  encoding: [0x62,0x31,0xfd,0x49,0x11,0xc3]
+          vmovupd.s %zmm8, %zmm19 {%k1}
+
+// CHECK: vmovupd.s %zmm8, %zmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0x31,0xfd,0xc9,0x11,0xc3]
+          vmovupd.s %zmm8, %zmm19 {%k1} {z}
+
+// CHECK: vmovups.s %zmm27, %zmm16
+// CHECK:  encoding: [0x62,0x21,0x7c,0x48,0x11,0xd8]
+          vmovups.s %zmm27, %zmm16
+
+// CHECK: vmovups.s %zmm27, %zmm16 {%k1}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x49,0x11,0xd8]
+          vmovups.s %zmm27, %zmm16 {%k1}
+
+// CHECK: vmovups.s %zmm27, %zmm16 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0xc9,0x11,0xd8]
+          vmovups.s %zmm27, %zmm16 {%k1} {z}
+
+// CHECK: vmovups.s %zmm5, %zmm19
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x48,0x11,0xeb]
+          vmovups.s %zmm5, %zmm19
+
+// CHECK: vmovups.s %zmm5, %zmm19 {%k2}
+// CHECK:  encoding: [0x62,0xb1,0x7c,0x4a,0x11,0xeb]
+          vmovups.s %zmm5, %zmm19 {%k2}
+
+// CHECK: vmovups.s %zmm5, %zmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xb1,0x7c,0xca,0x11,0xeb]
+          vmovups.s %zmm5, %zmm19 {%k2} {z}
+
+// CHECK: vmovups.s %zmm5, %zmm1
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x48,0x11,0xe9]
+          vmovups.s %zmm5, %zmm1
+
+// CHECK: vmovups.s %zmm5, %zmm1 {%k3}
+// CHECK:  encoding: [0x62,0xf1,0x7c,0x4b,0x11,0xe9]
+          vmovups.s %zmm5, %zmm1 {%k3}
+
+// CHECK: vmovups.s %zmm5, %zmm1 {%k3} {z}
+// CHECK:  encoding: [0x62,0xf1,0x7c,0xcb,0x11,0xe9]
+          vmovups.s %zmm5, %zmm1 {%k3} {z}
+
+// CHECK: vmovups.s %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x81,0x7c,0x48,0x11,0xe3]
+          vmovups.s %zmm20, %zmm27
+
+// CHECK: vmovups.s %zmm20, %zmm27 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x4e,0x11,0xe3]
+          vmovups.s %zmm20, %zmm27 {%k6}
+
+// CHECK: vmovups.s %zmm20, %zmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0xce,0x11,0xe3]
+          vmovups.s %zmm20, %zmm27 {%k6} {z}
+
+// CHECK: vmovq.s %xmm9, %xmm29
+// CHECK:  encoding: [0x62,0x11,0xfd,0x08,0xd6,0xcd]
+          vmovq.s %xmm9, %xmm29
+
+// CHECK: vmovq.s %xmm5, %xmm18
+// CHECK:  encoding: [0x62,0xb1,0xfd,0x08,0xd6,0xea]
+          vmovq.s %xmm5, %xmm18
+
+// CHECK: vmovq.s  %xmm14, %xmm25
+// CHECK:  encoding: [0x62,0x11,0xfd,0x08,0xd6,0xf1]
+          vmovq.s  %xmm14, %xmm25
+
+// CHECK: vmovq.s  %xmm24, %xmm12
+// CHECK:  encoding: [0x62,0x41,0xfd,0x08,0xd6,0xc4]
+          vmovq.s  %xmm24, %xmm12
+
+// CHECK: vcomisd %xmm21, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xfd]
+          vcomisd %xmm21, %xmm23
+
+// CHECK: vcomisd {sae}, %xmm21, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x18,0x2f,0xfd]
+          vcomisd {sae}, %xmm21, %xmm23
+
+// CHECK: vcomisd (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39]
+          vcomisd (%rcx), %xmm23
+
+// CHECK: vcomisd 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x2f,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcomisd 291(%rax,%r14,8), %xmm23
+
+// CHECK: vcomisd 1016(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x7f]
+          vcomisd 1016(%rdx), %xmm23
+
+// CHECK: vcomisd 1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0x00,0x04,0x00,0x00]
+          vcomisd 1024(%rdx), %xmm23
+
+// CHECK: vcomisd -1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x7a,0x80]
+          vcomisd -1024(%rdx), %xmm23
+
+// CHECK: vcomisd -1032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0xba,0xf8,0xfb,0xff,0xff]
+          vcomisd -1032(%rdx), %xmm23
+
+// CHECK: vcomiss %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x11,0x7c,0x08,0x2f,0xf4]
+          vcomiss %xmm28, %xmm14
+
+// CHECK: vcomiss {sae}, %xmm28, %xmm14
+// CHECK:  encoding: [0x62,0x11,0x7c,0x18,0x2f,0xf4]
+          vcomiss {sae}, %xmm28, %xmm14
+
+// CHECK: vcomiss (%rcx), %xmm14
+// CHECK:  encoding: [0xc5,0x78,0x2f,0x31]
+          vcomiss (%rcx), %xmm14
+
+// CHECK: vcomiss 291(%rax,%r14,8), %xmm14
+// CHECK:  encoding: [0xc4,0x21,0x78,0x2f,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcomiss 291(%rax,%r14,8), %xmm14
+
+// CHECK: vcomiss 508(%rdx), %xmm14
+// CHECK:  encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0x01,0x00,0x00]
+          vcomiss 508(%rdx), %xmm14
+
+// CHECK: vcomiss 512(%rdx), %xmm14
+// CHECK:  encoding: [0xc5,0x78,0x2f,0xb2,0x00,0x02,0x00,0x00]
+          vcomiss 512(%rdx), %xmm14
+
+// CHECK: vcomiss -512(%rdx), %xmm14
+// CHECK:  encoding: [0xc5,0x78,0x2f,0xb2,0x00,0xfe,0xff,0xff]
+          vcomiss -512(%rdx), %xmm14
+
+// CHECK: vcomiss -516(%rdx), %xmm14
+// CHECK:  encoding: [0xc5,0x78,0x2f,0xb2,0xfc,0xfd,0xff,0xff]
+          vcomiss -516(%rdx), %xmm14
+
+// CHECK: vucomisd %xmm10, %xmm11
+// CHECK:  encoding: [0xc4,0x41,0x79,0x2e,0xda]
+          vucomisd %xmm10, %xmm11
+
+// CHECK: vucomisd {sae}, %xmm10, %xmm11
+// CHECK:  encoding: [0x62,0x51,0xfd,0x18,0x2e,0xda]
+          vucomisd {sae}, %xmm10, %xmm11
+
+// CHECK: vucomisd (%rcx), %xmm11
+// CHECK:  encoding: [0xc5,0x79,0x2e,0x19]
+          vucomisd (%rcx), %xmm11
+
+// CHECK: vucomisd 291(%rax,%r14,8), %xmm11
+// CHECK:  encoding: [0xc4,0x21,0x79,0x2e,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vucomisd 291(%rax,%r14,8), %xmm11
+
+// CHECK: vucomisd 1016(%rdx), %xmm11
+// CHECK:  encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0x03,0x00,0x00]
+          vucomisd 1016(%rdx), %xmm11
+
+// CHECK: vucomisd 1024(%rdx), %xmm11
+// CHECK:  encoding: [0xc5,0x79,0x2e,0x9a,0x00,0x04,0x00,0x00]
+          vucomisd 1024(%rdx), %xmm11
+
+// CHECK: vucomisd -1024(%rdx), %xmm11
+// CHECK:  encoding: [0xc5,0x79,0x2e,0x9a,0x00,0xfc,0xff,0xff]
+          vucomisd -1024(%rdx), %xmm11
+
+// CHECK: vucomisd -1032(%rdx), %xmm11
+// CHECK:  encoding: [0xc5,0x79,0x2e,0x9a,0xf8,0xfb,0xff,0xff]
+          vucomisd -1032(%rdx), %xmm11
+
+// CHECK: vucomiss %xmm11, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x7c,0x08,0x2e,0xf3]
+          vucomiss %xmm11, %xmm22
+
+// CHECK: vucomiss {sae}, %xmm11, %xmm22
+// CHECK:  encoding: [0x62,0xc1,0x7c,0x18,0x2e,0xf3]
+          vucomiss {sae}, %xmm11, %xmm22
+
+// CHECK: vucomiss (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x31]
+          vucomiss (%rcx), %xmm22
+
+// CHECK: vucomiss 291(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x2e,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vucomiss 291(%rax,%r14,8), %xmm22
+
+// CHECK: vucomiss 508(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x7f]
+          vucomiss 508(%rdx), %xmm22
+
+// CHECK: vucomiss 512(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0x00,0x02,0x00,0x00]
+          vucomiss 512(%rdx), %xmm22
+
+// CHECK: vucomiss -512(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2e,0x72,0x80]
+          vucomiss -512(%rdx), %xmm22
+
+// CHECK: vucomiss -516(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff]
+          vucomiss -516(%rdx), %xmm22
+// CHECK: vmovsd (%rcx), %xmm25 {%k3}
+// CHECK:  encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
+          vmovsd (%rcx), %xmm25 {%k3}
+
+// CHECK: vmovsd (%rcx), %xmm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
+          vmovsd (%rcx), %xmm25 {%k3} {z}
+
+// CHECK: vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xe7,0x8b,0x10,0xdb]
+          vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
+
+// CHECK: vmovss (%rcx), %xmm2 {%k4}
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
+          vmovss (%rcx), %xmm2 {%k4}
+
+// CHECK: vmovss (%rcx), %xmm2 {%k4} {z}
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
+          vmovss (%rcx), %xmm2 {%k4} {z}
+
+// CHECK: vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0x36,0x8c,0x10,0xe2]
+          vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
+
+// CHECK: vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xc1,0xcf,0x87,0x10,0xef]
+          vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
+
+// CHECK: vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
+// CHECK:  encoding: [0x62,0xd1,0x97,0x8d,0x10,0xd8]
+          vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
+
+// CHECK: vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe1,0x26,0x82,0x10,0xca]
+          vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
+// CHECK:  encoding: [0x62,0x31,0x66,0x83,0x10,0xd7]
+          vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
+
+// CHECK: vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
+// CHECK:  encoding: [0x62,0xf1,0x87,0x8e,0x10,0xe4]
+          vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
+
+// CHECK: vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xc1,0xef,0x8f,0x10,0xe6]
+          vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
+
+// CHECK: vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x26,0x8b,0x10,0xeb]
+          vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
+
+// CHECK: vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}
+// CHECK:  encoding: [0x62,0x11,0x26,0x82,0x10,0xf8]
+          vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}
diff --git a/test/MC/X86/avx512vl-encoding.s b/test/MC/X86/avx512vl-encoding.s
index e1fc32848ccd..2e3eaf2aa4c7 100644
--- a/test/MC/X86/avx512vl-encoding.s
+++ b/test/MC/X86/avx512vl-encoding.s
@@ -860,6 +860,215 @@
 // CHECK:  encoding: [0x62,0xf2,0xbe,0x30,0x27,0xa2,0xf8,0xfb,0xff,0xff]
           vptestnmq -1032(%rdx){1to4}, %ymm24, %k4
 
+// CHECK: vptestnmq %xmm19, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x00,0x27,0xd3]
+          vptestnmq %xmm19, %xmm28,%k2
+
+// CHECK: vptestnmq %xmm19, %xmm28, %k2 {%k1}
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x01,0x27,0xd3]
+          vptestnmq %xmm19, %xmm28,%k2 {%k1}
+
+// CHECK: vptestnmq (%rcx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x00,0x27,0x11]
+          vptestnmq (%rcx), %xmm28,%k2
+
+// CHECK: vptestnmq 4660(%rax,%r14,8), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x00,0x27,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmq 4660(%rax,%r14,8), %xmm28,%k2
+
+// CHECK: vptestnmq (%rcx){1to2}, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x10,0x27,0x11]
+          vptestnmq (%rcx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq 2032(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x00,0x27,0x52,0x7f]
+          vptestnmq 2032(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq 2048(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x00,0x27,0x92,0x00,0x08,0x00,0x00]
+          vptestnmq 2048(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq -2048(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x00,0x27,0x52,0x80]
+          vptestnmq -2048(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq -2064(%rdx), %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x00,0x27,0x92,0xf0,0xf7,0xff,0xff]
+          vptestnmq -2064(%rdx), %xmm28,%k2
+
+// CHECK: vptestnmq 1016(%rdx){1to2}, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x10,0x27,0x52,0x7f]
+          vptestnmq 1016(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq 1024(%rdx){1to2}, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x10,0x27,0x92,0x00,0x04,0x00,0x00]
+          vptestnmq 1024(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq -1024(%rdx){1to2}, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x10,0x27,0x52,0x80]
+          vptestnmq -1024(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq -1032(%rdx){1to2}, %xmm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x10,0x27,0x92,0xf8,0xfb,0xff,0xff]
+          vptestnmq -1032(%rdx){1to2}, %xmm28,%k2
+
+// CHECK: vptestnmq %ymm17, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xb2,0xae,0x20,0x27,0xe1]
+          vptestnmq %ymm17, %ymm26,%k4
+
+// CHECK: vptestnmq %ymm17, %ymm26, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb2,0xae,0x21,0x27,0xe1]
+          vptestnmq %ymm17, %ymm26,%k4 {%k1}
+
+// CHECK: vptestnmq (%rcx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x20,0x27,0x21]
+          vptestnmq (%rcx), %ymm26,%k4
+
+// CHECK: vptestnmq 4660(%rax,%r14,8), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xb2,0xae,0x20,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmq 4660(%rax,%r14,8), %ymm26,%k4
+
+// CHECK: vptestnmq (%rcx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x30,0x27,0x21]
+          vptestnmq (%rcx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq 4064(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x20,0x27,0x62,0x7f]
+          vptestnmq 4064(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq 4096(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x20,0x27,0xa2,0x00,0x10,0x00,0x00]
+          vptestnmq 4096(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq -4096(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x20,0x27,0x62,0x80]
+          vptestnmq -4096(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq -4128(%rdx), %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x20,0x27,0xa2,0xe0,0xef,0xff,0xff]
+          vptestnmq -4128(%rdx), %ymm26,%k4
+
+// CHECK: vptestnmq 1016(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x30,0x27,0x62,0x7f]
+          vptestnmq 1016(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq 1024(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x30,0x27,0xa2,0x00,0x04,0x00,0x00]
+          vptestnmq 1024(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq -1024(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x30,0x27,0x62,0x80]
+          vptestnmq -1024(%rdx){1to4}, %ymm26,%k4
+
+// CHECK: vptestnmq -1032(%rdx){1to4}, %ymm26, %k4
+// CHECK:  encoding: [0x62,0xf2,0xae,0x30,0x27,0xa2,0xf8,0xfb,0xff,0xff]
+          vptestnmq -1032(%rdx){1to4}, %ymm26,%k4
+
+
+// CHECK: vptestnmd %xmm21, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xb2,0x36,0x00,0x27,0xe5]
+          vptestnmd %xmm21, %xmm25,%k4
+
+// CHECK: vptestnmd %xmm21, %xmm25, %k4 {%k5}
+// CHECK:  encoding: [0x62,0xb2,0x36,0x05,0x27,0xe5]
+          vptestnmd %xmm21, %xmm25,%k4 {%k5}
+
+// CHECK: vptestnmd (%rcx), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x00,0x27,0x21]
+          vptestnmd (%rcx), %xmm25,%k4
+
+// CHECK: vptestnmd 4660(%rax,%r14,8), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xb2,0x36,0x00,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmd 4660(%rax,%r14,8), %xmm25,%k4
+
+// CHECK: vptestnmd (%rcx){1to4}, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x10,0x27,0x21]
+          vptestnmd (%rcx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd 2032(%rdx), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x00,0x27,0x62,0x7f]
+          vptestnmd 2032(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd 2048(%rdx), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x00,0x27,0xa2,0x00,0x08,0x00,0x00]
+          vptestnmd 2048(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd -2048(%rdx), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x00,0x27,0x62,0x80]
+          vptestnmd -2048(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd -2064(%rdx), %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x00,0x27,0xa2,0xf0,0xf7,0xff,0xff]
+          vptestnmd -2064(%rdx), %xmm25,%k4
+
+// CHECK: vptestnmd 508(%rdx){1to4}, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x10,0x27,0x62,0x7f]
+          vptestnmd 508(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd 512(%rdx){1to4}, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x10,0x27,0xa2,0x00,0x02,0x00,0x00]
+          vptestnmd 512(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd -512(%rdx){1to4}, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x10,0x27,0x62,0x80]
+          vptestnmd -512(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd -516(%rdx){1to4}, %xmm25, %k4
+// CHECK:  encoding: [0x62,0xf2,0x36,0x10,0x27,0xa2,0xfc,0xfd,0xff,0xff]
+          vptestnmd -516(%rdx){1to4}, %xmm25,%k4
+
+// CHECK: vptestnmd %ymm26, %ymm29, %k4
+// CHECK:  encoding: [0x62,0x92,0x16,0x20,0x27,0xe2]
+          vptestnmd %ymm26, %ymm29,%k4
+
+// CHECK: vptestnmd %ymm26, %ymm29, %k4 {%k4}
+// CHECK:  encoding: [0x62,0x92,0x16,0x24,0x27,0xe2]
+          vptestnmd %ymm26, %ymm29,%k4 {%k4}
+
+// CHECK: vptestnmd (%rcx), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x20,0x27,0x21]
+          vptestnmd (%rcx), %ymm29,%k4
+
+// CHECK: vptestnmd 4660(%rax,%r14,8), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xb2,0x16,0x20,0x27,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmd 4660(%rax,%r14,8), %ymm29,%k4
+
+// CHECK: vptestnmd (%rcx){1to8}, %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x30,0x27,0x21]
+          vptestnmd (%rcx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd 4064(%rdx), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x20,0x27,0x62,0x7f]
+          vptestnmd 4064(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd 4096(%rdx), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x20,0x27,0xa2,0x00,0x10,0x00,0x00]
+          vptestnmd 4096(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd -4096(%rdx), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x20,0x27,0x62,0x80]
+          vptestnmd -4096(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd -4128(%rdx), %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x20,0x27,0xa2,0xe0,0xef,0xff,0xff]
+          vptestnmd -4128(%rdx), %ymm29,%k4
+
+// CHECK: vptestnmd 508(%rdx){1to8}, %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x30,0x27,0x62,0x7f]
+          vptestnmd 508(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd 512(%rdx){1to8}, %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x30,0x27,0xa2,0x00,0x02,0x00,0x00]
+          vptestnmd 512(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd -512(%rdx){1to8}, %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x30,0x27,0x62,0x80]
+          vptestnmd -512(%rdx){1to8}, %ymm29,%k4
+
+// CHECK: vptestnmd -516(%rdx){1to8}, %ymm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x16,0x30,0x27,0xa2,0xfc,0xfd,0xff,0xff]
+          vptestnmd -516(%rdx){1to8}, %ymm29,%k4
+
 // CHECK: vpmovd2m %xmm27, %k3
 // CHECK:  encoding: [0x62,0x92,0x7e,0x08,0x39,0xdb]
           vpmovd2m %xmm27, %k3
diff --git a/test/MC/X86/cfi_def_cfa-crash.s b/test/MC/X86/cfi_def_cfa-crash.s
index 9d22d6e281cf..74b41d9943a9 100644
--- a/test/MC/X86/cfi_def_cfa-crash.s
+++ b/test/MC/X86/cfi_def_cfa-crash.s
@@ -1,4 +1,4 @@
-// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | macho-dump | FileCheck %s
+// RUN: llvm-mc -triple x86_64-apple-darwin -filetype=obj %s -o - | llvm-readobj -sections | FileCheck %s
 
 // We were trying to generate compact unwind info for assembly like this.
 // The .cfi_def_cfa directive, however, throws a wrench into that and was
@@ -68,6 +68,22 @@ _foo:
   ret
  .cfi_endproc
 
-
-
-// CHECK: 'section_name', '__eh_frame\x00
+// CHECK: Section {
+// CHECK:   Index: 1
+// CHECK:   Name: __eh_frame (5F 5F 65 68 5F 66 72 61 6D 65 00 00 00 00 00 00)
+// CHECK:   Segment: __TEXT (5F 5F 54 45 58 54 00 00 00 00 00 00 00 00 00 00)
+// CHECK:   Address: 0x70
+// CHECK:   Size: 0x40
+// CHECK:   Offset: 480
+// CHECK:   Alignment: 3
+// CHECK:   RelocationOffset: 0x0
+// CHECK:   RelocationCount: 0
+// CHECK:   Type: 0xB
+// CHECK:   Attributes [ (0x680000)
+// CHECK:     LiveSupport (0x80000)
+// CHECK:     NoTOC (0x400000)
+// CHECK:     StripStaticSyms (0x200000)
+// CHECK:   ]
+// CHECK:   Reserved1: 0x0
+// CHECK:   Reserved2: 0x0
+// CHECK: }
diff --git a/test/MC/X86/encoder-fail.s b/test/MC/X86/encoder-fail.s
new file mode 100644
index 000000000000..3e845fe7561b
--- /dev/null
+++ b/test/MC/X86/encoder-fail.s
@@ -0,0 +1,3 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2>&1 | FileCheck %s
+// CHECK: LLVM ERROR: Cannot encode high byte register in REX-prefixed instruction
+ movzx %dh, %rsi
diff --git a/test/MC/X86/expand-var.s b/test/MC/X86/expand-var.s
index 8d5529a9a469..d9b8bb9394a4 100644
--- a/test/MC/X86/expand-var.s
+++ b/test/MC/X86/expand-var.s
@@ -1,9 +1,13 @@
-// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux < %s | llvm-readobj -r | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux < %s | llvm-readobj -r -t | FileCheck %s
 
 // CHECK:       Section {{.*}} .rela.text {
 // CHECK-NEXT:    0x0 R_X86_64_32 d 0x0
 // CHECK-NEXT:  }
 
+// CHECK:      Symbol {
+// CHECK:        Name:  d2
+// CHECK-NEXT:   Value: 0x2A
+
 a:
         b = a
         c = a
@@ -16,3 +20,4 @@ a2:
         .weak b2
         b2 = a2
         c2 = b2 - a2
+        d2 = b2 - a2 + 42
diff --git a/test/MC/X86/i386-darwin-frame-register.ll b/test/MC/X86/i386-darwin-frame-register.ll
index c994a1efd203..12b1e1e5c548 100644
--- a/test/MC/X86/i386-darwin-frame-register.ll
+++ b/test/MC/X86/i386-darwin-frame-register.ll
@@ -29,7 +29,7 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!3, !4, !5}
 !llvm.ident = !{!6}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 230514) (llvm/trunk 230518)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.7.0 (trunk 230514) (llvm/trunk 230518)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "/tmp")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/MC/X86/intel-syntax-2.s b/test/MC/X86/intel-syntax-2.s
index f7bdaf92dbb5..aead5766db4d 100644
--- a/test/MC/X86/intel-syntax-2.s
+++ b/test/MC/X86/intel-syntax-2.s
@@ -15,3 +15,17 @@ _test2:
 .att_syntax prefix
 	movl $255, -4(%rsp)
 // CHECK:	movl	$255, -4(%rsp)
+
+_test3:
+fadd 
+// CHECK: faddp %st(1)
+fmul
+// CHECK: fmulp %st(1)
+fsub
+// CHECK: fsubp %st(1)
+fsubr
+// CHECK: fsubrp %st(1)
+fdiv
+// CHECK: fdivp %st(1)
+fdivr
+// CHECK: fdivrp %st(1)
diff --git a/test/MC/X86/intel-syntax-ambiguous.s b/test/MC/X86/intel-syntax-ambiguous.s
index fe1fe5023902..e90cca820043 100644
--- a/test/MC/X86/intel-syntax-ambiguous.s
+++ b/test/MC/X86/intel-syntax-ambiguous.s
@@ -45,3 +45,15 @@ add rax, 3
 
 fadd   "?half@?0??bar@@YAXXZ@4NA"
 // CHECK: error: ambiguous operand size for instruction 'fadd'
+
+// Instruction line with PTR inside check that they don't accept register as memory.
+
+// CHECK:  error: expected memory operand after 'ptr', found register operand instead
+// CHECK: andps xmm1, xmmword ptr xmm1
+andps xmm1, xmmword ptr xmm1
+// CHECK:  error: expected memory operand after 'ptr', found register operand instead
+// CHECK: andps xmmword ptr xmm1, xmm1
+andps xmmword ptr xmm1, xmm1
+// CHECK:  error: expected memory operand after 'ptr', found register operand instead
+// CHECK: mov dword ptr eax, ebx
+mov dword ptr eax, ebx
diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s
index 86a1af8bc16f..c5ab7dde1106 100644
--- a/test/MC/X86/intel-syntax-avx512.s
+++ b/test/MC/X86/intel-syntax-avx512.s
@@ -256,3 +256,99 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
 // CHECK:  vfixupimmsd  xmm13 , xmm26, qword ptr [rdx - 1032], 123
 // CHECK:  encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
           vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x408],0x7b
+
+// CHECK:  vcomisd xmm23, qword ptr [rcx]
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x08,0x2f,0x39]
+          vcomisd xmm23, QWORD PTR [rcx]
+
+// CHECK:  vcomiss xmm16, dword ptr [rcx]
+// CHECK:  encoding: [0x62,0xe1,0x7c,0x08,0x2f,0x01]
+          vcomiss xmm16, DWORD PTR [rcx]
+
+// CHECK: vmovss dword ptr [rcx] {k2}, xmm13
+// CHECK:  encoding: [0x62,0x71,0x7e,0x0a,0x11,0x29]
+          vmovss dword ptr [rcx]{k2},xmm13
+
+// CHECK: vmovss dword ptr [rax + 8*r14 + 4660], xmm13
+// CHECK:  encoding: [0xc4,0x21,0x7a,0x11,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vmovss dword ptr [rax+r14*8+0x1234],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 508], xmm13
+// CHECK:  encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0x01,0x00,0x00]
+          vmovss dword ptr [rdx+0x1fc],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 512], xmm13
+// CHECK:  encoding: [0xc5,0x7a,0x11,0xaa,0x00,0x02,0x00,0x00]
+          vmovss dword ptr [rdx+0x200],xmm13
+
+// CHECK: vmovss dword ptr [rdx - 512], xmm13
+// CHECK:  encoding: [0xc5,0x7a,0x11,0xaa,0x00,0xfe,0xff,0xff]
+          vmovss dword ptr [rdx-0x200],xmm13
+
+// CHECK: vmovss dword ptr [rdx - 516], xmm13
+// CHECK:  encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0xfd,0xff,0xff]
+          vmovss dword ptr [rdx-0x204],xmm13
+
+// CHECK: vmovss dword ptr [rdx + 508], xmm5
+// CHECK:  encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0x01,0x00,0x00]
+          vmovss dword ptr [rdx+0x1fc],xmm5
+
+// CHECK: vmovss dword ptr [rdx + 512], xmm5
+// CHECK:  encoding: [0xc5,0xfa,0x11,0xaa,0x00,0x02,0x00,0x00]
+          vmovss dword ptr [rdx+0x200],xmm5
+
+// CHECK: vmovss dword ptr [rdx - 512], xmm5
+// CHECK:  encoding: [0xc5,0xfa,0x11,0xaa,0x00,0xfe,0xff,0xff]
+          vmovss dword ptr [rdx-0x200], xmm5
+
+// CHECK: vmovss dword ptr [rdx - 516], xmm5
+// CHECK:  encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0xfd,0xff,0xff]
+          vmovss dword ptr [rdx-0x204],xmm5
+
+// CHECK: vmovss dword ptr [rcx], xmm13
+// CHECK:  encoding: [0xc5,0x7a,0x11,0x29]
+          vmovss dword ptr [rcx],xmm13
+
+// CHECK: vmovss xmm2, dword ptr [rcx]
+// CHECK:  encoding: [0xc5,0xfa,0x10,0x11]
+          vmovss xmm2, dword ptr [rcx]
+
+// CHECK: vmovss xmm2 {k4}, dword ptr [rcx]
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
+          vmovss xmm2{k4}, dword ptr [rcx]
+
+// CHECK: vmovss xmm2 {k4} {z}, dword ptr [rcx]
+// CHECK:  encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
+          vmovss xmm2{k4} {z}, dword ptr [rcx]
+
+// CHECK: vmovsd xmm25 , qword ptr [rcx]
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x10,0x09]
+          vmovsd xmm25, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 {k3}, qword ptr [rcx]
+// CHECK:  encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
+          vmovsd xmm25{k3}, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 {k3} {z}, qword ptr [rcx]
+// CHECK:  encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
+          vmovsd xmm25{k3} {z}, qword ptr [rcx]
+
+// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291]
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovsd xmm25, qword ptr [rax+r14*8+0x123]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016]
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f]
+          vmovsd xmm25, qword ptr [rdx+0x3f8]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024]
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
+          vmovsd xmm25, qword ptr [rdx+0x400]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024]
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80]
+          vmovsd xmm25, qword ptr [rdx-0x400]
+
+// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032]
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
+          vmovsd xmm25, qword ptr [rdx-0x408]
diff --git a/test/MC/X86/intel-syntax-print.ll b/test/MC/X86/intel-syntax-print.ll
new file mode 100644
index 000000000000..14ef46705be9
--- /dev/null
+++ b/test/MC/X86/intel-syntax-print.ll
@@ -0,0 +1,10 @@
+; RUN: llc -x86-asm-syntax=intel < %s | FileCheck %s -check-prefix=INTEL
+; RUN: llc -x86-asm-syntax=att < %s | FileCheck %s -check-prefix=ATT
+
+; INTEL: .intel_syntax noprefix
+; ATT-NOT: .intel_syntax noprefix
+target triple = "x86_64-unknown-unknown"
+define i32 @test() {
+entry:
+  ret i32 0
+}
diff --git a/test/MC/X86/intel-syntax.s b/test/MC/X86/intel-syntax.s
index 6fde42bd898d..c7ec77eadfee 100644
--- a/test/MC/X86/intel-syntax.s
+++ b/test/MC/X86/intel-syntax.s
@@ -489,10 +489,12 @@ test [ECX], AL
 // CHECK: fnstsw %ax
 // CHECK: fnstsw %ax
 // CHECK: fnstsw %ax
+// CHECK: fnstsw (%eax)
 fnstsw
 fnstsw AX
 fnstsw EAX
 fnstsw AL
+fnstsw WORD PTR [EAX]
 
 // CHECK: faddp %st(1)
 // CHECK: fmulp %st(1)
@@ -533,6 +535,20 @@ fsubrp ST(1)
 fdivp ST(1)
 fdivrp ST(1)
 
+
+// CHECK: faddp %st(1)
+// CHECK: fmulp %st(1)
+// CHECK: fsubrp %st(1)
+// CHECK: fsubp %st(1)
+// CHECK: fdivrp %st(1)
+// CHECK: fdivp %st(1)
+fadd 
+fmul
+fsub
+fsubr
+fdiv
+fdivr
+
 // CHECK: faddp %st(1)
 // CHECK: fmulp %st(1)
 // CHECK: fsubrp %st(1)
@@ -635,10 +651,12 @@ add byte ptr [rax], 1
 // CHECK: addw $1, (%rax)
 // CHECK: addb $1, (%rax)
 
+fstp tbyte ptr [rax]
 fstp xword ptr [rax]
 fstp qword ptr [rax]
 fstp dword ptr [rax]
 // CHECK: fstpt (%rax)
+// CHECK: fstpt (%rax)
 // CHECK: fstpl (%rax)
 // CHECK: fstps (%rax)
 
@@ -679,3 +697,57 @@ outsd
 // CHECK: outsb (%rsi), %dx
 // CHECK: outsw (%rsi), %dx
 // CHECK: outsl (%rsi), %dx
+
+imul bx, 123
+imul ebx, 123
+imul rbx, 123
+// CHECK: imulw $123, %bx
+// CHECK: imull $123, %ebx
+// CHECK: imulq $123, %rbx
+
+repe cmpsb
+repz cmpsb
+repne cmpsb
+repnz cmpsb
+// CHECK: rep
+// CHECK: cmpsb	%es:(%rdi), (%rsi)
+// CHECK: rep
+// CHECK: cmpsb	%es:(%rdi), (%rsi)
+// CHECK: repne
+// CHECK: cmpsb	%es:(%rdi), (%rsi)
+// CHECK: repne
+// CHECK: cmpsb	%es:(%rdi), (%rsi)
+
+sal eax, 123
+// CHECK: shll	$123, %eax
+
+psignw    mm0, MMWORD PTR t2
+// CHECK: psignw t2, %mm0
+
+comisd xmm0, QWORD PTR [eax]
+comiss xmm0, DWORD PTR [eax]
+vcomisd xmm0, QWORD PTR [eax]
+vcomiss xmm0, DWORD PTR [eax]
+
+// CHECK: comisd (%eax), %xmm0
+// CHECK: comiss (%eax), %xmm0
+// CHECK: vcomisd (%eax), %xmm0
+// CHECK: vcomiss (%eax), %xmm0
+
+fbld tbyte ptr [eax]
+fbstp tbyte ptr [eax]
+// CHECK: fbld (%eax)
+// CHECK: fbstp (%eax)
+
+fcomip st, st(2)
+fucomip st, st(2)
+// CHECK: fcompi  %st(2)
+// CHECK: fucompi  %st(2)
+
+loopz _foo
+loopnz _foo
+// CHECK: loope _foo
+// CHECK: loopne _foo
+
+sidt fword ptr [eax]
+// CHECK: sidtq (%eax)
diff --git a/test/MC/X86/large-bss.s b/test/MC/X86/large-bss.s
new file mode 100644
index 000000000000..edb111e9092a
--- /dev/null
+++ b/test/MC/X86/large-bss.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o - | llvm-readobj -s | FileCheck %s
+
+.bss
+.zero 0x10000000000000
+
+// CHECK:      Name: .bss
+// CHECK-NEXT: Type: SHT_NOBITS
+// CHECK-NEXT: Flags [
+// CHECK-NEXT:   SHF_ALLOC
+// CHECK-NEXT:   SHF_WRITE
+// CHECK-NEXT: ]
+// CHECK-NEXT: Address: 0x0
+// CHECK-NEXT: Offset: 0x40
+// CHECK-NEXT: Size: 4503599627370496
diff --git a/test/MC/X86/macho-reloc-errors-x86.s b/test/MC/X86/macho-reloc-errors-x86.s
new file mode 100644
index 000000000000..4af202220073
--- /dev/null
+++ b/test/MC/X86/macho-reloc-errors-x86.s
@@ -0,0 +1,15 @@
+// RUN: not llvm-mc -triple=i686-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+        .space 0x1000000
+        mov %eax, thing-thing2
+        mov %eax, defined-thing2
+        mov %eax, later-defined
+
+        .section __DATA,__tim
+defined:
+
+        .section __DATA,__tim2
+later:
+
+// CHECK-ERROR: 3:9: error: symbol 'thing' can not be undefined in a subtraction expression
+// CHECK-ERROR: 4:9: error: symbol 'thing2' can not be undefined in a subtraction expression
+// CHECK-ERROR: 5:9: error: Section too large, can't encode r_address (0x100000b) into 24 bits of scattered relocation entry.
diff --git a/test/MC/X86/macho-reloc-errors-x86_64.s b/test/MC/X86/macho-reloc-errors-x86_64.s
new file mode 100644
index 000000000000..05f77c495b24
--- /dev/null
+++ b/test/MC/X86/macho-reloc-errors-x86_64.s
@@ -0,0 +1,19 @@
+// RUN: not llvm-mc -triple=x86_64-apple-darwin -filetype=obj -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+
+        mov %rax, thing
+        mov %rax, thing@GOT-thing2@GOT
+        mov %rax, (thing-thing2)(%rip)
+        mov %rax, thing-thing
+        mov %rax, thing-thing2
+        mov %rax, thing@PLT
+        jmp thing@PLT
+        mov %rax, thing@TLVP
+
+// CHECK-ERROR: 3:9: error: 32-bit absolute addressing is not supported in 64-bit mode
+// CHECK-ERROR: 4:9: error: unsupported relocation of modified symbol
+// CHECK-ERROR: 5:9: error: unsupported pc-relative relocation of difference
+// CHECK-ERROR: 6:9: error: unsupported relocation with identical base
+// CHECK-ERROR: 7:9: error: unsupported relocation with subtraction expression, symbol 'thing' can not be undefined in a subtraction expression
+// CHECK-ERROR: 8:9: error: unsupported symbol modifier in relocation
+// CHECK-ERROR: 9:9: error: unsupported symbol modifier in branch relocation
+// CHECK-ERROR: 10:9: error: TLVP symbol modifier should have been rip-rel
diff --git a/test/MC/X86/validate-inst-att.s b/test/MC/X86/validate-inst-att.s
index dec8bfdf6600..7ac851e76150 100644
--- a/test/MC/X86/validate-inst-att.s
+++ b/test/MC/X86/validate-inst-att.s
@@ -2,6 +2,21 @@
 
 	.text
 	int $65535
-# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK: error: invalid operand for instruction
 # CHECK:	int $65535
+# CHECK:            ^
+
+	int $-129
+# CHECK: error: invalid operand for instruction
+# CHECK:	int $-129
+# CHECK:            ^
+
+	inb $65535, %al
+# CHECK: error: invalid operand for instruction
+# CHECK:	inb $65535, %al
+# CHECK:            ^
+
+	outb %al, $65535
+# CHECK: error: invalid operand for instruction
+# CHECK:	outb %al, $65535
 # CHECK:            ^
diff --git a/test/MC/X86/validate-inst-intel.s b/test/MC/X86/validate-inst-intel.s
index 9a7d122ca781..466b906fee73 100644
--- a/test/MC/X86/validate-inst-intel.s
+++ b/test/MC/X86/validate-inst-intel.s
@@ -3,7 +3,13 @@
 
 	.text
 	int 65535
-# CHECK: error: interrupt vector must be in range [0-255]
+# CHECK: error: invalid operand for instruction
 # CHECK:	int 65535
+# CHECK:            ^
+
+	.text
+	int -129
+# CHECK: error: invalid operand for instruction
+# CHECK:	int -129
 # CHECK:            ^
 
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index e14031d67f1e..10a1288bd57f 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -10742,3 +10742,23 @@ btcq $4, (%eax)
 // CHECK: getsec
 // CHECK:  encoding: [0x0f,0x37]
         	getsec
+
+// CHECK: 	monitorx
+// CHECK:  encoding: [0x0f,0x01,0xfa]
+        	monitorx
+
+// CHECK: 	monitorx
+// CHECK:  encoding: [0x0f,0x01,0xfa]
+        	monitorx %eax, %ecx, %edx
+
+// CHECK: 	mwaitx
+// CHECK:  encoding: [0x0f,0x01,0xfb]
+        	mwaitx
+
+// CHECK: 	mwaitx
+// CHECK:  encoding: [0x0f,0x01,0xfb]
+        	mwaitx %eax, %ecx, %ebx
+
+// CHECK: 	clzero
+// CHECK:  encoding: [0x0f,0x01,0xfc]
+        	clzero
diff --git a/test/MC/X86/x86-64-avx512bw.s b/test/MC/X86/x86-64-avx512bw.s
index 95eabfdb3411..b5ba2af64f0b 100644
--- a/test/MC/X86/x86-64-avx512bw.s
+++ b/test/MC/X86/x86-64-avx512bw.s
@@ -3668,6 +3668,126 @@
 // CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x1d,0xb2,0xc0,0xdf,0xff,0xff]
           vpabsw -8256(%rdx), %zmm30
 
+// CHECK: vpmovwb %zmm27, %ymm22
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x30,0xde]
+          vpmovwb %zmm27, %ymm22
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x49,0x30,0xde]
+          vpmovwb %zmm27, %ymm22 {%k1}
+
+// CHECK: vpmovwb %zmm27, %ymm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0xc9,0x30,0xde]
+          vpmovwb %zmm27, %ymm22 {%k1} {z}
+
+// CHECK: vpmovwb %zmm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x30,0x31]
+          vpmovwb %zmm22, (%rcx)
+
+// CHECK: vpmovwb %zmm22, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x4c,0x30,0x31]
+          vpmovwb %zmm22, (%rcx) {%k4}
+
+// CHECK: vpmovwb %zmm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x48,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovwb %zmm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %zmm22, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x7f]
+          vpmovwb %zmm22, 4064(%rdx)
+
+// CHECK: vpmovwb %zmm22, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0x00,0x10,0x00,0x00]
+          vpmovwb %zmm22, 4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x30,0x72,0x80]
+          vpmovwb %zmm22, -4096(%rdx)
+
+// CHECK: vpmovwb %zmm22, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x48,0x30,0xb2,0xe0,0xef,0xff,0xff]
+          vpmovwb %zmm22, -4128(%rdx)
+
+// CHECK: vpmovswb %zmm18, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x48,0x20,0xd7]
+          vpmovswb %zmm18, %ymm23
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x4a,0x20,0xd7]
+          vpmovswb %zmm18, %ymm23 {%k2}
+
+// CHECK: vpmovswb %zmm18, %ymm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xca,0x20,0xd7]
+          vpmovswb %zmm18, %ymm23 {%k2} {z}
+
+// CHECK: vpmovswb %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x20,0x01]
+          vpmovswb %zmm24, (%rcx)
+
+// CHECK: vpmovswb %zmm24, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x4f,0x20,0x01]
+          vpmovswb %zmm24, (%rcx) {%k7}
+
+// CHECK: vpmovswb %zmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovswb %zmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %zmm24, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x7f]
+          vpmovswb %zmm24, 4064(%rdx)
+
+// CHECK: vpmovswb %zmm24, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0x00,0x10,0x00,0x00]
+          vpmovswb %zmm24, 4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x20,0x42,0x80]
+          vpmovswb %zmm24, -4096(%rdx)
+
+// CHECK: vpmovswb %zmm24, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x20,0x82,0xe0,0xef,0xff,0xff]
+          vpmovswb %zmm24, -4128(%rdx)
+
+// CHECK: vpmovuswb %zmm22, %ymm28
+// CHECK:  encoding: [0x62,0x82,0x7e,0x48,0x10,0xf4]
+          vpmovuswb %zmm22, %ymm28
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x4b,0x10,0xf4]
+          vpmovuswb %zmm22, %ymm28 {%k3}
+
+// CHECK: vpmovuswb %zmm22, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xcb,0x10,0xf4]
+          vpmovuswb %zmm22, %ymm28 {%k3} {z}
+
+// CHECK: vpmovuswb %zmm27, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x10,0x19]
+          vpmovuswb %zmm27, (%rcx)
+
+// CHECK: vpmovuswb %zmm27, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x4a,0x10,0x19]
+          vpmovuswb %zmm27, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %zmm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x48,0x10,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovuswb %zmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %zmm27, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x7f]
+          vpmovuswb %zmm27, 4064(%rdx)
+
+// CHECK: vpmovuswb %zmm27, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0x00,0x10,0x00,0x00]
+          vpmovuswb %zmm27, 4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x10,0x5a,0x80]
+          vpmovuswb %zmm27, -4096(%rdx)
+
+// CHECK: vpmovuswb %zmm27, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x48,0x10,0x9a,0xe0,0xef,0xff,0xff]
+          vpmovuswb %zmm27, -4128(%rdx)
+
 // CHECK: vpmulhuw %zmm21, %zmm24, %zmm21
 // CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0xe4,0xed]
           vpmulhuw %zmm21, %zmm24, %zmm21
@@ -3776,3 +3896,942 @@
 // CHECK:  encoding: [0x62,0xe2,0x25,0x40,0x0b,0xaa,0xc0,0xdf,0xff,0xff]
           vpmulhrsw -8256(%rdx), %zmm27, %zmm21
 
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x02,0x5d,0x40,0x04,0xd9]
+          vpmaddubsw %zmm25, %zmm20, %zmm27
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x5d,0x43,0x04,0xd9]
+          vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3}
+
+// CHECK: vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x5d,0xc3,0x04,0xd9]
+          vpmaddubsw %zmm25, %zmm20, %zmm27 {%k3} {z}
+
+// CHECK: vpmaddubsw (%rcx), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x40,0x04,0x19]
+          vpmaddubsw (%rcx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x5d,0x40,0x04,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddubsw 291(%rax,%r14,8), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x7f]
+          vpmaddubsw 8128(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0x00,0x20,0x00,0x00]
+          vpmaddubsw 8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x40,0x04,0x5a,0x80]
+          vpmaddubsw -8192(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+// CHECK:  encoding: [0x62,0x62,0x5d,0x40,0x04,0x9a,0xc0,0xdf,0xff,0xff]
+          vpmaddubsw -8256(%rdx), %zmm20, %zmm27
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x01,0x4d,0x40,0xf5,0xd1]
+          vpmaddwd %zmm25, %zmm22, %zmm26
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x4d,0x42,0xf5,0xd1]
+          vpmaddwd %zmm25, %zmm22, %zmm26 {%k2}
+
+// CHECK: vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x4d,0xc2,0xf5,0xd1]
+          vpmaddwd %zmm25, %zmm22, %zmm26 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xf5,0x11]
+          vpmaddwd (%rcx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x21,0x4d,0x40,0xf5,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddwd 291(%rax,%r14,8), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8128(%rdx), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x7f]
+          vpmaddwd 8128(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd 8192(%rdx), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0x00,0x20,0x00,0x00]
+          vpmaddwd 8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8192(%rdx), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xf5,0x52,0x80]
+          vpmaddwd -8192(%rdx), %zmm22, %zmm26
+
+// CHECK: vpmaddwd -8256(%rdx), %zmm22, %zmm26
+// CHECK:  encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0xc0,0xdf,0xff,0xff]
+          vpmaddwd -8256(%rdx), %zmm22, %zmm26
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x01,0x75,0x40,0x60,0xe0]
+          vpunpcklbw %zmm24, %zmm17, %zmm28
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x75,0x41,0x60,0xe0]
+          vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1}
+
+// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x75,0xc1,0x60,0xe0]
+          vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z}
+
+// CHECK: vpunpcklbw (%rcx), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x75,0x40,0x60,0x21]
+          vpunpcklbw (%rcx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x21,0x75,0x40,0x60,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 8128(%rdx), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x7f]
+          vpunpcklbw 8128(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw 8192(%rdx), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0x00,0x20,0x00,0x00]
+          vpunpcklbw 8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw -8192(%rdx), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x80]
+          vpunpcklbw -8192(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpcklbw -8256(%rdx), %zmm17, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0xc0,0xdf,0xff,0xff]
+          vpunpcklbw -8256(%rdx), %zmm17, %zmm28
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x40,0x68,0xf7]
+          vpunpckhbw %zmm23, %zmm19, %zmm30
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7}
+// CHECK:  encoding: [0x62,0x21,0x65,0x47,0x68,0xf7]
+          vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7}
+
+// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x21,0x65,0xc7,0x68,0xf7]
+          vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z}
+
+// CHECK: vpunpckhbw (%rcx), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x40,0x68,0x31]
+          vpunpckhbw (%rcx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x40,0x68,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 8128(%rdx), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x7f]
+          vpunpckhbw 8128(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw 8192(%rdx), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0x00,0x20,0x00,0x00]
+          vpunpckhbw 8192(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw -8192(%rdx), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x80]
+          vpunpckhbw -8192(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpckhbw -8256(%rdx), %zmm19, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0xc0,0xdf,0xff,0xff]
+          vpunpckhbw -8256(%rdx), %zmm19, %zmm30
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0x61,0xe2]
+          vpunpcklwd %zmm18, %zmm24, %zmm20
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x47,0x61,0xe2]
+          vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7}
+
+// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x3d,0xc7,0x61,0xe2]
+          vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z}
+
+// CHECK: vpunpcklwd (%rcx), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0x61,0x21]
+          vpunpcklwd (%rcx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x40,0x61,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 8128(%rdx), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x7f]
+          vpunpcklwd 8128(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd 8192(%rdx), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0x00,0x20,0x00,0x00]
+          vpunpcklwd 8192(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd -8192(%rdx), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x80]
+          vpunpcklwd -8192(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpcklwd -8256(%rdx), %zmm24, %zmm20
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0xc0,0xdf,0xff,0xff]
+          vpunpcklwd -8256(%rdx), %zmm24, %zmm20
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x01,0x2d,0x40,0x69,0xf0]
+          vpunpckhwd %zmm24, %zmm26, %zmm30
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x01,0x2d,0x44,0x69,0xf0]
+          vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4}
+
+// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0x2d,0xc4,0x69,0xf0]
+          vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z}
+
+// CHECK: vpunpckhwd (%rcx), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x2d,0x40,0x69,0x31]
+          vpunpckhwd (%rcx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x21,0x2d,0x40,0x69,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 8128(%rdx), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x7f]
+          vpunpckhwd 8128(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd 8192(%rdx), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0x00,0x20,0x00,0x00]
+          vpunpckhwd 8192(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd -8192(%rdx), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x80]
+          vpunpckhwd -8192(%rdx), %zmm26, %zmm30
+
+// CHECK: vpunpckhwd -8256(%rdx), %zmm26, %zmm30
+// CHECK:  encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff]
+          vpunpckhwd -8256(%rdx), %zmm26, %zmm30
+
+// CHECK: vpextrb $171, %xmm17, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0xc8,0xab]
+          vpextrb $171, %xmm17, %eax
+
+// CHECK: vpextrb $123, %xmm17, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0xc8,0x7b]
+          vpextrb $123, %xmm17, %eax
+
+// CHECK: vpextrb $123, %xmm17, %r8d
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x08,0x14,0xc8,0x7b]
+          vpextrb $123, %xmm17,%r8d
+
+// CHECK: vpextrb $123, %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0x09,0x7b]
+          vpextrb $123, %xmm17, (%rcx)
+
+// CHECK: vpextrb $123, %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpextrb $123, %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vpextrb $123, %xmm17, 127(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0x4a,0x7f,0x7b]
+          vpextrb $123, %xmm17, 127(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, 128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0x8a,0x80,0x00,0x00,0x00,0x7b]
+          vpextrb $123, %xmm17, 128(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, -128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0x4a,0x80,0x7b]
+          vpextrb $123, %xmm17, -128(%rdx)
+
+// CHECK: vpextrb $123, %xmm17, -129(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x14,0x8a,0x7f,0xff,0xff,0xff,0x7b]
+          vpextrb $123, %xmm17, -129(%rdx)
+// CHECK: vpinsrb $171, %eax, %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0xc8,0xab]
+          vpinsrb $171,%eax, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %eax, %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0xc8,0x7b]
+          vpinsrb $123,%eax, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %ebp, %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0xcd,0x7b]
+          vpinsrb $123,%ebp, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, %r13d, %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x43,0x35,0x00,0x20,0xcd,0x7b]
+          vpinsrb $123,%r13d, %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, (%rcx), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0x09,0x7b]
+          vpinsrb $123, (%rcx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 291(%rax,%r14,8), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x23,0x35,0x00,0x20,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpinsrb $123, 291(%rax,%r14,8), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 127(%rdx), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0x4a,0x7f,0x7b]
+          vpinsrb $123, 127(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, 128(%rdx), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0x8a,0x80,0x00,0x00,0x00,0x7b]
+          vpinsrb $123, 128(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, -128(%rdx), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0x4a,0x80,0x7b]
+          vpinsrb $123, -128(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrb $123, -129(%rdx), %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x63,0x35,0x00,0x20,0x8a,0x7f,0xff,0xff,0xff,0x7b]
+          vpinsrb $123, -129(%rdx), %xmm25, %xmm25
+
+// CHECK: vpinsrw $171, %eax, %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd0,0xab]
+          vpinsrw $171,%eax, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %eax, %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd0,0x7b]
+          vpinsrw $123,%eax, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %ebp, %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0xd5,0x7b]
+          vpinsrw $123,%ebp, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, %r13d, %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xc1,0x35,0x00,0xc4,0xd5,0x7b]
+          vpinsrw $123,%r13d, %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, (%rcx), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0x11,0x7b]
+          vpinsrw $123, (%rcx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 291(%rax,%r14,8), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x35,0x00,0xc4,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpinsrw $123, 291(%rax,%r14,8), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 254(%rdx), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0x52,0x7f,0x7b]
+          vpinsrw $123, 254(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, 256(%rdx), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0x92,0x00,0x01,0x00,0x00,0x7b]
+          vpinsrw $123, 256(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, -256(%rdx), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0x52,0x80,0x7b]
+          vpinsrw $123, -256(%rdx), %xmm25, %xmm18
+
+// CHECK: vpinsrw $123, -258(%rdx), %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0xc4,0x92,0xfe,0xfe,0xff,0xff,0x7b]
+          vpinsrw $123, -258(%rdx), %xmm25, %xmm18
+
+// CHECK: vpextrw $123, %xmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0x21,0x7b]
+          vpextrw $123, %xmm28, (%rcx)
+
+// CHECK: vpextrw $123, %xmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x15,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpextrw $123, %xmm28, 291(%rax,%r14,8)
+
+// CHECK: vpextrw $123, %xmm28, 254(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0x62,0x7f,0x7b]
+          vpextrw $123, %xmm28, 254(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, 256(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xa2,0x00,0x01,0x00,0x00,0x7b]
+          vpextrw $123, %xmm28, 256(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, -256(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0x62,0x80,0x7b]
+          vpextrw $123, %xmm28, -256(%rdx)
+
+// CHECK: vpextrw $123, %xmm28, -258(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xa2,0xfe,0xfe,0xff,0xff,0x7b]
+          vpextrw $123, %xmm28, -258(%rdx)
+
+// CHECK: vpextrw $171, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc6,0xab]
+          vpextrw $171, %xmm30,%rax
+
+// CHECK: vpextrw $123, %xmm30, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc6,0x7b]
+          vpextrw $123, %xmm30,%rax
+
+// CHECK: vpextrw $123, %xmm30, %r8d
+// CHECK:  encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc6,0x7b]
+          vpextrw $123, %xmm30,%r8
+
+// CHECK: vpextrw $171, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0xab]
+          vpextrw $0xab, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %r8d
+// CHECK:  encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw $171, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0xab]
+          vpextrw $0xab, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x91,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw $123, %xmm28, %r8d
+// CHECK:  encoding: [0x62,0x11,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw $171, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc4,0xab]
+          vpextrw $0xab, %xmm20, %eax
+
+// CHECK: vpextrw $123, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm20, %eax
+
+// CHECK: vpextrw $123, %xmm20, %r8d
+// CHECK:  encoding: [0x62,0x31,0x7d,0x08,0xc5,0xc4,0x7b]
+          vpextrw $0x7b, %xmm20, %r8d
+
+// CHECK: vpextrw $171, %xmm19, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc3,0xab]
+          vpextrw $0xab, %xmm19, %eax
+
+// CHECK: vpextrw $123, %xmm19, %eax
+// CHECK:  encoding: [0x62,0xb1,0x7d,0x08,0xc5,0xc3,0x7b]
+          vpextrw $0x7b, %xmm19, %eax
+
+// CHECK: vpextrw $123, %xmm19, %r8d
+// CHECK:  encoding: [0x62,0x31,0x7d,0x08,0xc5,0xc3,0x7b]
+          vpextrw $0x7b, %xmm19, %r8d
+
+// CHECK: kunpckdq %k4, %k6, %k4
+// CHECK:  encoding: [0xc4,0xe1,0xcc,0x4b,0xe4]
+          kunpckdq %k4, %k6, %k4
+
+// CHECK: kunpckwd %k6, %k5, %k5
+// CHECK:  encoding: [0xc5,0xd4,0x4b,0xee]
+          kunpckwd %k6, %k5, %k5
+
+// CHECK: ktestd %k3, %k3
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x99,0xdb]
+          ktestd %k3, %k3
+
+// CHECK: ktestq %k6, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x99,0xd6]
+          ktestq %k6, %k2
+
+// CHECK: kortestd %k3, %k4
+// CHECK:  encoding: [0xc4,0xe1,0xf9,0x98,0xe3]
+          kortestd %k3, %k4
+
+// CHECK: kortestq %k4, %k5
+// CHECK:  encoding: [0xc4,0xe1,0xf8,0x98,0xec]
+          kortestq %k4, %k5
+
+// CHECK: kaddd  %k6, %k6, %k2
+// CHECK:  encoding: [0xc4,0xe1,0xcd,0x4a,0xd6]
+          kaddd  %k6, %k6, %k2
+
+// CHECK: kaddq  %k4, %k6, %k3
+// CHECK:  encoding: [0xc4,0xe1,0xcc,0x4a,0xdc]
+          kaddq  %k4, %k6, %k3
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0xab]
+          vpalignr $171, %zmm17, %zmm26, %zmm22
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x43,0x0f,0xf1,0xab]
+          vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3}
+
+// CHECK: vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa3,0x2d,0xc3,0x0f,0xf1,0xab]
+          vpalignr $171, %zmm17, %zmm26, %zmm22 {%k3} {z}
+
+// CHECK: vpalignr $123, %zmm17, %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xf1,0x7b]
+          vpalignr $123, %zmm17, %zmm26, %zmm22
+
+// CHECK: vpalignr $123, (%rcx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x31,0x7b]
+          vpalignr $123, (%rcx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x40,0x0f,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpalignr $123, 291(%rax,%r14,8), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 8128(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x7f,0x7b]
+          vpalignr $123, 8128(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, 8192(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0x00,0x20,0x00,0x00,0x7b]
+          vpalignr $123, 8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, -8192(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x40,0x0f,0x72,0x80,0x7b]
+          vpalignr $123, -8192(%rdx), %zmm26, %zmm22
+
+// CHECK: vpalignr $123, -8256(%rdx), %zmm26, %zmm22
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x40,0x0f,0xb2,0xc0,0xdf,0xff,0xff,0x7b]
+          vpalignr $123, -8256(%rdx), %zmm26, %zmm22
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xa3,0x5d,0x40,0x42,0xea,0xab]
+          vdbpsadbw $171, %zmm18, %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa3,0x5d,0x41,0x42,0xea,0xab]
+          vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1}
+
+// CHECK: vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa3,0x5d,0xc1,0x42,0xea,0xab]
+          vdbpsadbw $171, %zmm18, %zmm20, %zmm21 {%k1} {z}
+
+// CHECK: vdbpsadbw $123, %zmm18, %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xa3,0x5d,0x40,0x42,0xea,0x7b]
+          vdbpsadbw $123, %zmm18, %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, (%rcx), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0x29,0x7b]
+          vdbpsadbw $123, (%rcx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xa3,0x5d,0x40,0x42,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vdbpsadbw $123, 291(%rax,%r14,8), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 8128(%rdx), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0x6a,0x7f,0x7b]
+          vdbpsadbw $123, 8128(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, 8192(%rdx), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0x00,0x20,0x00,0x00,0x7b]
+          vdbpsadbw $123, 8192(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, -8192(%rdx), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0x6a,0x80,0x7b]
+          vdbpsadbw $123, -8192(%rdx), %zmm20, %zmm21
+
+// CHECK: vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
+// CHECK:  encoding: [0x62,0xe3,0x5d,0x40,0x42,0xaa,0xc0,0xdf,0xff,0xff,0x7b]
+          vdbpsadbw $123, -8256(%rdx), %zmm20, %zmm21
+
+// CHECK: vpslldq $171, %zmm28, %zmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0xab]
+          vpslldq $171, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, %zmm28, %zmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x40,0x73,0xfc,0x7b]
+          vpslldq $123, %zmm28, %zmm20
+
+// CHECK: vpslldq $123, (%rcx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %zmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %zmm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x40,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %zmm20
+
+// CHECK: vpslldq $123, 8128(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 8128(%rdx), %zmm20
+
+// CHECK: vpslldq $123, 8192(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0x00,0x20,0x00,0x00,0x7b]
+          vpslldq $123, 8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8192(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -8192(%rdx), %zmm20
+
+// CHECK: vpslldq $123, -8256(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x40,0x73,0xba,0xc0,0xdf,0xff,0xff,0x7b]
+          vpslldq $123, -8256(%rdx), %zmm20
+
+// CHECK: vpsrldq $171, %zmm26, %zmm18
+// CHECK:  encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0xab]
+          vpsrldq $171, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, %zmm26, %zmm18
+// CHECK:  encoding: [0x62,0x91,0x6d,0x40,0x73,0xda,0x7b]
+          vpsrldq $123, %zmm26, %zmm18
+
+// CHECK: vpsrldq $123, (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %zmm18
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x40,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpsrldq $123, 8128(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 8128(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, 8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vpsrldq $123, 8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -8192(%rdx), %zmm18
+
+// CHECK: vpsrldq $123, -8256(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x40,0x73,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vpsrldq $123, -8256(%rdx), %zmm18
+
+// CHECK: vpsadbw %zmm22, %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x21,0x35,0x40,0xf6,0xe6]
+          vpsadbw %zmm22, %zmm25, %zmm28
+
+// CHECK: vpsadbw (%rcx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x21]
+          vpsadbw (%rcx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x21,0x35,0x40,0xf6,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8128(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x7f]
+          vpsadbw 8128(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw 8192(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0x00,0x20,0x00,0x00]
+          vpsadbw 8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8192(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0x62,0x80]
+          vpsadbw -8192(%rdx), %zmm25, %zmm28
+
+// CHECK: vpsadbw -8256(%rdx), %zmm25, %zmm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x40,0xf6,0xa2,0xc0,0xdf,0xff,0xff]
+          vpsadbw -8256(%rdx), %zmm25, %zmm28
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26
+// CHECK:  encoding: [0x62,0x01,0x7f,0x48,0x70,0xd4,0xab]
+          vpshuflw $171, %zmm28, %zmm26
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x7f,0x49,0x70,0xd4,0xab]
+          vpshuflw $171, %zmm28, %zmm26 {%k1}
+
+// CHECK: vpshuflw $171, %zmm28, %zmm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x7f,0xc9,0x70,0xd4,0xab]
+          vpshuflw $171, %zmm28, %zmm26 {%k1} {z}
+
+// CHECK: vpshuflw $123, %zmm28, %zmm26
+// CHECK:  encoding: [0x62,0x01,0x7f,0x48,0x70,0xd4,0x7b]
+          vpshuflw $123, %zmm28, %zmm26
+
+// CHECK: vpshuflw $123, (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x70,0x11,0x7b]
+          vpshuflw $123, (%rcx), %zmm26
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x21,0x7f,0x48,0x70,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshuflw $123, 291(%rax,%r14,8), %zmm26
+
+// CHECK: vpshuflw $123, 8128(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x70,0x52,0x7f,0x7b]
+          vpshuflw $123, 8128(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, 8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x70,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vpshuflw $123, 8192(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, -8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x70,0x52,0x80,0x7b]
+          vpshuflw $123, -8192(%rdx), %zmm26
+
+// CHECK: vpshuflw $123, -8256(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x61,0x7f,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vpshuflw $123, -8256(%rdx), %zmm26
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x70,0xd2,0xab]
+          vpshufhw $171, %zmm18, %zmm18
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x4c,0x70,0xd2,0xab]
+          vpshufhw $171, %zmm18, %zmm18 {%k4}
+
+// CHECK: vpshufhw $171, %zmm18, %zmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0xcc,0x70,0xd2,0xab]
+          vpshufhw $171, %zmm18, %zmm18 {%k4} {z}
+
+// CHECK: vpshufhw $123, %zmm18, %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x70,0xd2,0x7b]
+          vpshufhw $123, %zmm18, %zmm18
+
+// CHECK: vpshufhw $123, (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x11,0x7b]
+          vpshufhw $123, (%rcx), %zmm18
+
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x48,0x70,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshufhw $123, 291(%rax,%r14,8), %zmm18
+
+// CHECK: vpshufhw $123, 8128(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x52,0x7f,0x7b]
+          vpshufhw $123, 8128(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, 8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vpshufhw $123, 8192(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, -8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x52,0x80,0x7b]
+          vpshufhw $123, -8192(%rdx), %zmm18
+
+// CHECK: vpshufhw $123, -8256(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vpshufhw $123, -8256(%rdx), %zmm18
+
+// CHECK: vpbroadcastb %xmm23, %zmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4f,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25 {%k7}
+
+// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcf,0x78,0xcf]
+          vpbroadcastb %xmm23, %zmm25 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x09]
+          vpbroadcastb (%rcx), %zmm25
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %zmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x78,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %zmm25
+
+// CHECK: vpbroadcastb 127(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x7f]
+          vpbroadcastb 127(%rdx), %zmm25
+
+// CHECK: vpbroadcastb 128(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -128(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x80]
+          vpbroadcastb -128(%rdx), %zmm25
+
+// CHECK: vpbroadcastb -129(%rdx), %zmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %zmm25
+
+// CHECK: vpbroadcastb %eax, %zmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7}
+
+// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8]
+          vpbroadcastb %eax, %zmm19 {%k7} {z}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4c,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30 {%k4}
+
+// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcc,0x79,0xf3]
+          vpbroadcastw %xmm19, %zmm30 {%k4} {z}
+
+// CHECK: vpbroadcastw (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x31]
+          vpbroadcastw (%rcx), %zmm30
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x79,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %zmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x7f]
+          vpbroadcastw 254(%rdx), %zmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x80]
+          vpbroadcastw -256(%rdx), %zmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %zmm30
+
+// CHECK: vpbroadcastw %eax, %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1}
+
+// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0]
+          vpbroadcastw %eax, %zmm24 {%k1} {z}
+
+// CHECK: vpextrw.s $171, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0xab]
+          vpextrw.s $0xab, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %r8d
+// CHECK:  encoding: [0x62,0x43,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw.s $171, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0xab]
+          vpextrw.s $0xab, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm28, %eax
+
+// CHECK: vpextrw.s $123, %xmm28, %r8d
+// CHECK:  encoding: [0x62,0x43,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm28, %r8d
+
+// CHECK: vpextrw.s $171, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x15,0xe0,0xab]
+          vpextrw.s $0xab, %xmm20, %eax
+
+// CHECK: vpextrw.s $123, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm20, %eax
+
+// CHECK: vpextrw.s $123, %xmm20, %r8d
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x08,0x15,0xe0,0x7b]
+          vpextrw.s $0x7b, %xmm20, %r8d
+
+// CHECK: vpextrw.s $171, %xmm19, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x15,0xd8,0xab]
+          vpextrw.s $0xab, %xmm19, %eax
+
+// CHECK: vpextrw.s $123, %xmm19, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x15,0xd8,0x7b]
+          vpextrw.s $0x7b, %xmm19, %eax
+
+// CHECK: vpextrw.s $123, %xmm19, %r8d
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x08,0x15,0xd8,0x7b]
+          vpextrw.s $0x7b, %xmm19, %r8d
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x7f,0xde]
+          vmovdqu16.s %zmm19, %zmm22
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x4b,0x7f,0xde]
+          vmovdqu16.s %zmm19, %zmm22 {%k3}
+
+// CHECK: vmovdqu16.s %zmm19, %zmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0xcb,0x7f,0xde]
+          vmovdqu16.s %zmm19, %zmm22 {%k3} {z}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x7f,0xf4]
+          vmovdqu16.s %zmm22, %zmm20
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x4b,0x7f,0xf4]
+          vmovdqu16.s %zmm22, %zmm20 {%k3}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0xcb,0x7f,0xf4]
+          vmovdqu16.s %zmm22, %zmm20 {%k3} {z}
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26
+// CHECK:  encoding: [0x62,0x01,0xff,0x48,0x7f,0xd2]
+          vmovdqu16.s %zmm26, %zmm26
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26 {%k2}
+// CHECK:  encoding: [0x62,0x01,0xff,0x4a,0x7f,0xd2]
+          vmovdqu16.s %zmm26, %zmm26 {%k2}
+
+// CHECK: vmovdqu16.s %zmm26, %zmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xca,0x7f,0xd2]
+          vmovdqu16.s %zmm26, %zmm26 {%k2} {z}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x7f,0xf3]
+          vmovdqu16.s %zmm22, %zmm19
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x4f,0x7f,0xf3]
+          vmovdqu16.s %zmm22, %zmm19 {%k7}
+
+// CHECK: vmovdqu16.s %zmm22, %zmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0xcf,0x7f,0xf3]
+          vmovdqu16.s %zmm22, %zmm19 {%k7} {z}
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24
+// CHECK:  encoding: [0x62,0x81,0x7f,0x48,0x7f,0xf8]
+          vmovdqu8.s %zmm23, %zmm24
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x4b,0x7f,0xf8]
+          vmovdqu8.s %zmm23, %zmm24 {%k3}
+
+// CHECK: vmovdqu8.s %zmm23, %zmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0xcb,0x7f,0xf8]
+          vmovdqu8.s %zmm23, %zmm24 {%k3} {z}
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23
+// CHECK:  encoding: [0x62,0x21,0x7f,0x48,0x7f,0xdf]
+          vmovdqu8.s %zmm27, %zmm23
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x4d,0x7f,0xdf]
+          vmovdqu8.s %zmm27, %zmm23 {%k5}
+
+// CHECK: vmovdqu8.s %zmm27, %zmm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x7f,0xcd,0x7f,0xdf]
+          vmovdqu8.s %zmm27, %zmm23 {%k5} {z}
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x48,0x7f,0xcf]
+          vmovdqu8.s %zmm17, %zmm23
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x4a,0x7f,0xcf]
+          vmovdqu8.s %zmm17, %zmm23 {%k2}
+
+// CHECK: vmovdqu8.s %zmm17, %zmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0xca,0x7f,0xcf]
+          vmovdqu8.s %zmm17, %zmm23 {%k2} {z}
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28
+// CHECK:  encoding: [0x62,0x81,0x7f,0x48,0x7f,0xdc]
+          vmovdqu8.s %zmm19, %zmm28
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28 {%k4}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x4c,0x7f,0xdc]
+          vmovdqu8.s %zmm19, %zmm28 {%k4}
+
+// CHECK: vmovdqu8.s %zmm19, %zmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0xcc,0x7f,0xdc]
+          vmovdqu8.s %zmm19, %zmm28 {%k4} {z}
+
diff --git a/test/MC/X86/x86-64-avx512bw_vl.s b/test/MC/X86/x86-64-avx512bw_vl.s
index 014be27564bf..1c3784a61527 100644
--- a/test/MC/X86/x86-64-avx512bw_vl.s
+++ b/test/MC/X86/x86-64-avx512bw_vl.s
@@ -5776,165 +5776,325 @@
 // CHECK:  encoding: [0x62,0x61,0x35,0x20,0xd9,0x9a,0xe0,0xef,0xff,0xff]
           vpsubusw -4128(%rdx), %ymm25, %ymm27
 
-// CHECK: vpshufhw $171, %xmm19, %xmm23
-// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0xab]
-          vpshufhw $171, %xmm19, %xmm23
+// CHECK: vpshufhw $171, %xmm27, %xmm28
+// CHECK:  encoding: [0x62,0x01,0x7e,0x08,0x70,0xe3,0xab]
+          vpshufhw $171, %xmm27, %xmm28
 
-// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7}
-// CHECK:  encoding: [0x62,0xa1,0xfe,0x0f,0x70,0xfb,0xab]
-          vpshufhw $171, %xmm19, %xmm23 {%k7}
+// CHECK: vpshufhw $171, %xmm27, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x0e,0x70,0xe3,0xab]
+          vpshufhw $171, %xmm27, %xmm28 {%k6}
 
-// CHECK: vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
-// CHECK:  encoding: [0x62,0xa1,0xfe,0x8f,0x70,0xfb,0xab]
-          vpshufhw $171, %xmm19, %xmm23 {%k7} {z}
+// CHECK: vpshufhw $171, %xmm27, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x8e,0x70,0xe3,0xab]
+          vpshufhw $171, %xmm27, %xmm28 {%k6} {z}
 
-// CHECK: vpshufhw $123,  %xmm19, %xmm23
-// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x70,0xfb,0x7b]
-          vpshufhw $123,  %xmm19, %xmm23
+// CHECK: vpshufhw $123, %xmm27, %xmm28
+// CHECK:  encoding: [0x62,0x01,0x7e,0x08,0x70,0xe3,0x7b]
+          vpshufhw $123, %xmm27, %xmm28
 
-// CHECK: vpshufhw $123,  (%rcx), %xmm23
-// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x70,0x39,0x7b]
-          vpshufhw $123, (%rcx), %xmm23
+// CHECK: vpshufhw $123, (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x70,0x21,0x7b]
+          vpshufhw $123, (%rcx), %xmm28
 
-// CHECK: vpshufhw $123, 291(%rax,%r14,8), %xmm23
-// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x70,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
-          vpshufhw $123, 291(%rax,%r14,8), %xmm23
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x70,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshufhw $123, 291(%rax,%r14,8), %xmm28
 
-// CHECK: vpshufhw $123, 2032(%rdx), %xmm23
-// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x7f,0x7b]
-          vpshufhw $123, 2032(%rdx), %xmm23
+// CHECK: vpshufhw $123, 2032(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x70,0x62,0x7f,0x7b]
+          vpshufhw $123, 2032(%rdx), %xmm28
 
-// CHECK: vpshufhw $123, 2048(%rdx), %xmm23
-// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0x00,0x08,0x00,0x00,0x7b]
-          vpshufhw $123, 2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, 2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x70,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vpshufhw $123, 2048(%rdx), %xmm28
 
-// CHECK: vpshufhw $123, -2048(%rdx), %xmm23
-// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x70,0x7a,0x80,0x7b]
-          vpshufhw $123, -2048(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x70,0x62,0x80,0x7b]
+          vpshufhw $123, -2048(%rdx), %xmm28
 
-// CHECK: vpshufhw $123, -2064(%rdx), %xmm23
-// CHECK:  encoding: [0x62,0xe1,0xfe,0x08,0x70,0xba,0xf0,0xf7,0xff,0xff,0x7b]
-          vpshufhw $123, -2064(%rdx), %xmm23
+// CHECK: vpshufhw $123, -2064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x70,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpshufhw $123, -2064(%rdx), %xmm28
 
-// CHECK: vpshufhw $171, %ymm17, %ymm29
-// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0xab]
-          vpshufhw $171, %ymm17, %ymm29
+// CHECK: vpshufhw $171, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x70,0xde,0xab]
+          vpshufhw $171, %ymm22, %ymm19
 
-// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7}
-// CHECK:  encoding: [0x62,0x21,0xfe,0x2f,0x70,0xe9,0xab]
-          vpshufhw $171, %ymm17, %ymm29 {%k7}
+// CHECK: vpshufhw $171, %ymm22, %ymm19 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x29,0x70,0xde,0xab]
+          vpshufhw $171, %ymm22, %ymm19 {%k1}
 
-// CHECK: vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
-// CHECK:  encoding: [0x62,0x21,0xfe,0xaf,0x70,0xe9,0xab]
-          vpshufhw $171, %ymm17, %ymm29 {%k7} {z}
+// CHECK: vpshufhw $171, %ymm22, %ymm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0xa9,0x70,0xde,0xab]
+          vpshufhw $171, %ymm22, %ymm19 {%k1} {z}
 
-// CHECK: vpshufhw $123,  %ymm17, %ymm29
-// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x70,0xe9,0x7b]
-          vpshufhw $123,  %ymm17, %ymm29
+// CHECK: vpshufhw $123, %ymm22, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x70,0xde,0x7b]
+          vpshufhw $123, %ymm22, %ymm19
 
-// CHECK: vpshufhw $123, (%rcx), %ymm29
-// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x70,0x29,0x7b]
-          vpshufhw $123, (%rcx), %ymm29
+// CHECK: vpshufhw $123, (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x70,0x19,0x7b]
+          vpshufhw $123, (%rcx), %ymm19
 
-// CHECK: vpshufhw $123, 291(%rax,%r14,8), %ymm29
-// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x70,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
-          vpshufhw $123, 291(%rax,%r14,8), %ymm29
+// CHECK: vpshufhw $123, 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x70,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshufhw $123, 291(%rax,%r14,8), %ymm19
 
-// CHECK: vpshufhw $123, 4064(%rdx), %ymm29
-// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x7f,0x7b]
-          vpshufhw $123, 4064(%rdx), %ymm29
+// CHECK: vpshufhw $123, 4064(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x70,0x5a,0x7f,0x7b]
+          vpshufhw $123, 4064(%rdx), %ymm19
 
-// CHECK: vpshufhw $123, 4096(%rdx), %ymm29
-// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0x00,0x10,0x00,0x00,0x7b]
-          vpshufhw $123, 4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, 4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x70,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpshufhw $123, 4096(%rdx), %ymm19
 
-// CHECK: vpshufhw $123, -4096(%rdx), %ymm29
-// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x70,0x6a,0x80,0x7b]
-          vpshufhw $123, -4096(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x70,0x5a,0x80,0x7b]
+          vpshufhw $123, -4096(%rdx), %ymm19
 
-// CHECK: vpshufhw $123, -4128(%rdx), %ymm29
-// CHECK:  encoding: [0x62,0x61,0xfe,0x28,0x70,0xaa,0xe0,0xef,0xff,0xff,0x7b]
-          vpshufhw $123, -4128(%rdx), %ymm29
+// CHECK: vpshufhw $123, -4128(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x70,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpshufhw $123, -4128(%rdx), %ymm19
 
-// CHECK: vpshuflw $171, %xmm27, %xmm30
-// CHECK:  encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0xab]
-          vpshuflw $171, %xmm27, %xmm30
+// CHECK: vpshufhw $171, %xmm22, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x70,0xf6,0xab]
+          vpshufhw $0xab, %xmm22, %xmm22
 
-// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6}
-// CHECK:  encoding: [0x62,0x01,0xff,0x0e,0x70,0xf3,0xab]
-          vpshuflw $171, %xmm27, %xmm30 {%k6}
+// CHECK: vpshufhw $171, %xmm22, %xmm22 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x0f,0x70,0xf6,0xab]
+          vpshufhw $0xab, %xmm22, %xmm22 {%k7}
 
-// CHECK: vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
-// CHECK:  encoding: [0x62,0x01,0xff,0x8e,0x70,0xf3,0xab]
-          vpshuflw $171, %xmm27, %xmm30 {%k6} {z}
+// CHECK: vpshufhw $171, %xmm22, %xmm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x8f,0x70,0xf6,0xab]
+          vpshufhw $0xab, %xmm22, %xmm22 {%k7} {z}
 
-// CHECK: vpshuflw $123,  %xmm27, %xmm30
-// CHECK:  encoding: [0x62,0x01,0xff,0x08,0x70,0xf3,0x7b]
-          vpshuflw $123,  %xmm27, %xmm30
+// CHECK: vpshufhw $123, %xmm22, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x70,0xf6,0x7b]
+          vpshufhw $0x7b, %xmm22, %xmm22
 
-// CHECK: vpshuflw $123, (%rcx), %xmm30
-// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x70,0x31,0x7b]
-          vpshuflw $123, (%rcx), %xmm30
+// CHECK: vpshufhw $123, (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x70,0x31,0x7b]
+          vpshufhw $0x7b,(%rcx), %xmm22
 
-// CHECK: vpshuflw $123, 291(%rax,%r14,8), %xmm30
-// CHECK:  encoding: [0x62,0x21,0xff,0x08,0x70,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
-          vpshuflw $123, 291(%rax,%r14,8), %xmm30
+// CHECK: vpshufhw $123, 4660(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x70,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpshufhw $0x7b,4660(%rax,%r14,8), %xmm22
 
-// CHECK: vpshuflw $123, 2032(%rdx), %xmm30
-// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x7f,0x7b]
-          vpshuflw $123, 2032(%rdx), %xmm30
+// CHECK: vpshufhw $123, 2032(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x70,0x72,0x7f,0x7b]
+          vpshufhw $0x7b,2032(%rdx), %xmm22
 
-// CHECK: vpshuflw $123, 2048(%rdx), %xmm30
-// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
-          vpshuflw $123, 2048(%rdx), %xmm30
+// CHECK: vpshufhw $123, 2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vpshufhw $0x7b,2048(%rdx), %xmm22
 
-// CHECK: vpshuflw $123, -2048(%rdx), %xmm30
-// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x70,0x72,0x80,0x7b]
-          vpshuflw $123, -2048(%rdx), %xmm30
+// CHECK: vpshufhw $123, -2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x70,0x72,0x80,0x7b]
+          vpshufhw $0x7b,-2048(%rdx), %xmm22
 
-// CHECK: vpshuflw $123, -2064(%rdx), %xmm30
-// CHECK:  encoding: [0x62,0x61,0xff,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
-          vpshuflw $123, -2064(%rdx), %xmm30
+// CHECK: vpshufhw $123, -2064(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpshufhw $0x7b,-2064(%rdx), %xmm22
 
-// CHECK: vpshuflw $171, %ymm25, %ymm25
-// CHECK:  encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0xab]
-          vpshuflw $171, %ymm25, %ymm25
+// CHECK: vpshufhw $171, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x01,0x7e,0x28,0x70,0xd8,0xab]
+          vpshufhw $0xab, %ymm24, %ymm27
 
-// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5}
-// CHECK:  encoding: [0x62,0x01,0xff,0x2d,0x70,0xc9,0xab]
-          vpshuflw $171, %ymm25, %ymm25 {%k5}
+// CHECK: vpshufhw $171, %ymm24, %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x29,0x70,0xd8,0xab]
+          vpshufhw $0xab, %ymm24, %ymm27 {%k1}
 
-// CHECK: vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
-// CHECK:  encoding: [0x62,0x01,0xff,0xad,0x70,0xc9,0xab]
-          vpshuflw $171, %ymm25, %ymm25 {%k5} {z}
+// CHECK: vpshufhw $171, %ymm24, %ymm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0xa9,0x70,0xd8,0xab]
+          vpshufhw $0xab, %ymm24, %ymm27 {%k1} {z}
 
-// CHECK: vpshuflw $123,  %ymm25, %ymm25
-// CHECK:  encoding: [0x62,0x01,0xff,0x28,0x70,0xc9,0x7b]
-          vpshuflw $123,  %ymm25, %ymm25
+// CHECK: vpshufhw $123, %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x01,0x7e,0x28,0x70,0xd8,0x7b]
+          vpshufhw $0x7b, %ymm24, %ymm27
 
-// CHECK: vpshuflw $123, (%rcx), %ymm25
-// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x09,0x7b]
-          vpshuflw $123, (%rcx), %ymm25
+// CHECK: vpshufhw $123, (%rcx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x70,0x19,0x7b]
+          vpshufhw $0x7b,(%rcx), %ymm27
 
-// CHECK: vpshuflw $123, 291(%rax,%r14,8), %ymm25
-// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x70,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
-          vpshuflw $123, 291(%rax,%r14,8), %ymm25
+// CHECK: vpshufhw $123, 4660(%rax,%r14,8), %ymm27
+// CHECK:  encoding: [0x62,0x21,0x7e,0x28,0x70,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpshufhw $0x7b,4660(%rax,%r14,8), %ymm27
 
-// CHECK: vpshuflw $123, 4064(%rdx), %ymm25
-// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x7f,0x7b]
-          vpshuflw $123, 4064(%rdx), %ymm25
+// CHECK: vpshufhw $123, 4064(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x70,0x5a,0x7f,0x7b]
+          vpshufhw $0x7b,4064(%rdx), %ymm27
 
-// CHECK: vpshuflw $123, 4096(%rdx), %ymm25
-// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0x00,0x10,0x00,0x00,0x7b]
-          vpshuflw $123, 4096(%rdx), %ymm25
+// CHECK: vpshufhw $123, 4096(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x70,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpshufhw $0x7b,4096(%rdx), %ymm27
 
-// CHECK: vpshuflw $123, -4096(%rdx), %ymm25
-// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x4a,0x80,0x7b]
-          vpshuflw $123, -4096(%rdx), %ymm25
+// CHECK: vpshufhw $123, -4096(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x70,0x5a,0x80,0x7b]
+          vpshufhw $0x7b,-4096(%rdx), %ymm27
 
-// CHECK: vpshuflw $123, -4128(%rdx), %ymm25
-// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0xe0,0xef,0xff,0xff,0x7b]
-          vpshuflw $123, -4128(%rdx), %ymm25
+// CHECK: vpshufhw $123, -4128(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x70,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpshufhw $0x7b,-4128(%rdx), %ymm27
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x70,0xf7,0xab]
+          vpshuflw $171, %xmm23, %xmm22
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x0d,0x70,0xf7,0xab]
+          vpshuflw $171, %xmm23, %xmm22 {%k5}
+
+// CHECK: vpshuflw $171, %xmm23, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x8d,0x70,0xf7,0xab]
+          vpshuflw $171, %xmm23, %xmm22 {%k5} {z}
+
+// CHECK: vpshuflw $123, %xmm23, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x70,0xf7,0x7b]
+          vpshuflw $123, %xmm23, %xmm22
+
+// CHECK: vpshuflw $123, (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x31,0x7b]
+          vpshuflw $123, (%rcx), %xmm22
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x70,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshuflw $123, 291(%rax,%r14,8), %xmm22
+
+// CHECK: vpshuflw $123, 2032(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x72,0x7f,0x7b]
+          vpshuflw $123, 2032(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, 2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vpshuflw $123, 2048(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, -2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x72,0x80,0x7b]
+          vpshuflw $123, -2048(%rdx), %xmm22
+
+// CHECK: vpshuflw $123, -2064(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpshuflw $123, -2064(%rdx), %xmm22
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x01,0x7f,0x28,0x70,0xc2,0xab]
+          vpshuflw $171, %ymm26, %ymm24
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x7f,0x2a,0x70,0xc2,0xab]
+          vpshuflw $171, %ymm26, %ymm24 {%k2}
+
+// CHECK: vpshuflw $171, %ymm26, %ymm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x7f,0xaa,0x70,0xc2,0xab]
+          vpshuflw $171, %ymm26, %ymm24 {%k2} {z}
+
+// CHECK: vpshuflw $123, %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x01,0x7f,0x28,0x70,0xc2,0x7b]
+          vpshuflw $123, %ymm26, %ymm24
+
+// CHECK: vpshuflw $123, (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7f,0x28,0x70,0x01,0x7b]
+          vpshuflw $123, (%rcx), %ymm24
+
+// CHECK: vpshuflw $123, 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x21,0x7f,0x28,0x70,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpshuflw $123, 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpshuflw $123, 4064(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7f,0x28,0x70,0x42,0x7f,0x7b]
+          vpshuflw $123, 4064(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, 4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7f,0x28,0x70,0x82,0x00,0x10,0x00,0x00,0x7b]
+          vpshuflw $123, 4096(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, -4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7f,0x28,0x70,0x42,0x80,0x7b]
+          vpshuflw $123, -4096(%rdx), %ymm24
+
+// CHECK: vpshuflw $123, -4128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7f,0x28,0x70,0x82,0xe0,0xef,0xff,0xff,0x7b]
+          vpshuflw $123, -4128(%rdx), %ymm24
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0x81,0x7f,0x08,0x70,0xec,0xab]
+          vpshuflw $0xab, %xmm28, %xmm21
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x0e,0x70,0xec,0xab]
+          vpshuflw $0xab, %xmm28, %xmm21 {%k6}
+
+// CHECK: vpshuflw $171, %xmm28, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x8e,0x70,0xec,0xab]
+          vpshuflw $0xab, %xmm28, %xmm21 {%k6} {z}
+
+// CHECK: vpshuflw $123, %xmm28, %xmm21
+// CHECK:  encoding: [0x62,0x81,0x7f,0x08,0x70,0xec,0x7b]
+          vpshuflw $0x7b, %xmm28, %xmm21
+
+// CHECK: vpshuflw $123, (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x29,0x7b]
+          vpshuflw $0x7b,(%rcx), %xmm21
+
+// CHECK: vpshuflw $123, 4660(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x70,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpshuflw $0x7b,4660(%rax,%r14,8), %xmm21
+
+// CHECK: vpshuflw $123, 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x6a,0x7f,0x7b]
+          vpshuflw $0x7b,2032(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpshuflw $0x7b,2048(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0x6a,0x80,0x7b]
+          vpshuflw $0x7b,-2048(%rdx), %xmm21
+
+// CHECK: vpshuflw $123, -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x08,0x70,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpshuflw $0x7b,-2064(%rdx), %xmm21
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x7f,0x28,0x70,0xe5,0xab]
+          vpshuflw $0xab, %ymm29, %ymm20
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x2b,0x70,0xe5,0xab]
+          vpshuflw $0xab, %ymm29, %ymm20 {%k3}
+
+// CHECK: vpshuflw $171, %ymm29, %ymm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0xab,0x70,0xe5,0xab]
+          vpshuflw $0xab, %ymm29, %ymm20 {%k3} {z}
+
+// CHECK: vpshuflw $123, %ymm29, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x7f,0x28,0x70,0xe5,0x7b]
+          vpshuflw $0x7b, %ymm29, %ymm20
+
+// CHECK: vpshuflw $123, (%rcx), %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x70,0x21,0x7b]
+          vpshuflw $0x7b,(%rcx), %ymm20
+
+// CHECK: vpshuflw $123, 4660(%rax,%r14,8), %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x70,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpshuflw $0x7b,4660(%rax,%r14,8), %ymm20
+
+// CHECK: vpshuflw $123, 4064(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x70,0x62,0x7f,0x7b]
+          vpshuflw $0x7b,4064(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, 4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x70,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vpshuflw $0x7b,4096(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, -4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x70,0x62,0x80,0x7b]
+          vpshuflw $0x7b,-4096(%rdx), %ymm20
+
+// CHECK: vpshuflw $123, -4128(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x7f,0x28,0x70,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vpshuflw $0x7b,-4128(%rdx), %ymm20
 
 // CHECK: vpermi2w %xmm21, %xmm29, %xmm19
 // CHECK:  encoding: [0x62,0xa2,0x95,0x00,0x75,0xdd]
@@ -6583,6 +6743,486 @@
 // CHECK:  encoding: [0x62,0xe2,0x6d,0x20,0x00,0x9a,0xe0,0xef,0xff,0xff]
           vpshufb -4128(%rdx), %ymm18, %ymm19
 
+// CHECK: vpmovwb %xmm28, %xmm27
+// CHECK:  encoding: [0x62,0x02,0x7e,0x08,0x30,0xe3]
+          vpmovwb %xmm28, %xmm27
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x0a,0x30,0xe3]
+          vpmovwb %xmm28, %xmm27 {%k2}
+
+// CHECK: vpmovwb %xmm28, %xmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x8a,0x30,0xe3]
+          vpmovwb %xmm28, %xmm27 {%k2} {z}
+
+// CHECK: vpmovwb %ymm26, %xmm26
+// CHECK:  encoding: [0x62,0x02,0x7e,0x28,0x30,0xd2]
+          vpmovwb %ymm26, %xmm26
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x2c,0x30,0xd2]
+          vpmovwb %ymm26, %xmm26 {%k4}
+
+// CHECK: vpmovwb %ymm26, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xac,0x30,0xd2]
+          vpmovwb %ymm26, %xmm26 {%k4} {z}
+
+// CHECK: vpmovwb %xmm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x39]
+          vpmovwb %xmm23,(%rcx)
+
+// CHECK: vpmovwb %xmm23, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0e,0x30,0x39]
+          vpmovwb %xmm23,(%rcx) {%k6}
+
+// CHECK: vpmovwb %xmm23, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x30,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vpmovwb %xmm23,4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm23, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x7f]
+          vpmovwb %xmm23, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm23, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0x00,0x04,0x00,0x00]
+          vpmovwb %xmm23, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x7a,0x80]
+          vpmovwb %xmm23,-1024(%rdx)
+
+// CHECK: vpmovwb %xmm23, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0xba,0xf8,0xfb,0xff,0xff]
+          vpmovwb %xmm23,-1032(%rdx)
+
+// CHECK: vpmovwb %ymm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x29]
+          vpmovwb %ymm21,(%rcx)
+
+// CHECK: vpmovwb %ymm21, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2d,0x30,0x29]
+          vpmovwb %ymm21,(%rcx) {%k5}
+
+// CHECK: vpmovwb %ymm21, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x30,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vpmovwb %ymm21, 4660(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm21, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x7f]
+          vpmovwb %ymm21, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm21, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0x00,0x08,0x00,0x00]
+          vpmovwb %ymm21, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x6a,0x80]
+          vpmovwb %ymm21,-2048(%rdx)
+
+// CHECK: vpmovwb %ymm21, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmovwb %ymm21, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm19, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x20,0xd9]
+          vpmovswb %xmm19, %xmm17
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x09,0x20,0xd9]
+          vpmovswb %xmm19, %xmm17 {%k1}
+
+// CHECK: vpmovswb %xmm19, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x89,0x20,0xd9]
+          vpmovswb %xmm19, %xmm17 {%k1} {z}
+
+// CHECK: vpmovswb %ymm19, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x20,0xdd]
+          vpmovswb %ymm19, %xmm21
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2c,0x20,0xdd]
+          vpmovswb %ymm19, %xmm21 {%k4}
+
+// CHECK: vpmovswb %ymm19, %xmm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xac,0x20,0xdd]
+          vpmovswb %ymm19, %xmm21 {%k4} {z}
+
+// CHECK: vpmovswb %xmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x20,0x11]
+          vpmovswb %xmm18,(%rcx)
+
+// CHECK: vpmovswb %xmm18, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0a,0x20,0x11]
+          vpmovswb %xmm18,(%rcx) {%k2}
+
+// CHECK: vpmovswb %xmm18, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x20,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpmovswb %xmm18, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm18, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x7f]
+          vpmovswb %xmm18, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm18, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0x00,0x04,0x00,0x00]
+          vpmovswb %xmm18, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x20,0x52,0x80]
+          vpmovswb %xmm18, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm18, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x20,0x92,0xf8,0xfb,0xff,0xff]
+          vpmovswb %xmm18, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x20,0x39]
+          vpmovswb %ymm23,(%rcx)
+
+// CHECK: vpmovswb %ymm23, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2a,0x20,0x39]
+          vpmovswb %ymm23,(%rcx) {%k2}
+
+// CHECK: vpmovswb %ymm23, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x20,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vpmovswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x7f]
+          vpmovswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0x00,0x08,0x00,0x00]
+          vpmovswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x20,0x7a,0x80]
+          vpmovswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x20,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm17, %xmm26
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x10,0xca]
+          vpmovuswb %xmm17, %xmm26
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0e,0x10,0xca]
+          vpmovuswb %xmm17, %xmm26 {%k6}
+
+// CHECK: vpmovuswb %xmm17, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8e,0x10,0xca]
+          vpmovuswb %xmm17, %xmm26 {%k6} {z}
+
+// CHECK: vpmovuswb %ymm26, %xmm17
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x10,0xd1]
+          vpmovuswb %ymm26, %xmm17
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x2a,0x10,0xd1]
+          vpmovuswb %ymm26, %xmm17 {%k2}
+
+// CHECK: vpmovuswb %ymm26, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0xaa,0x10,0xd1]
+          vpmovuswb %ymm26, %xmm17 {%k2} {z}
+
+// CHECK: vpmovuswb %xmm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x10,0x19]
+          vpmovuswb %xmm19,(%rcx)
+
+// CHECK: vpmovuswb %xmm19, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x09,0x10,0x19]
+          vpmovuswb %xmm19,(%rcx) {%k1}
+
+// CHECK: vpmovuswb %xmm19, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x10,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpmovuswb %xmm19, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm19, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x7f]
+          vpmovuswb %xmm19, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm19, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0x00,0x04,0x00,0x00]
+          vpmovuswb %xmm19, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x10,0x5a,0x80]
+          vpmovuswb %xmm19, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm19, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x10,0x9a,0xf8,0xfb,0xff,0xff]
+          vpmovuswb %xmm19, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x10,0x39]
+          vpmovuswb %ymm23,(%rcx)
+
+// CHECK: vpmovuswb %ymm23, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2e,0x10,0x39]
+          vpmovuswb %ymm23,(%rcx) {%k6}
+
+// CHECK: vpmovuswb %ymm23, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x10,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vpmovuswb %ymm23, 4660(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x7f]
+          vpmovuswb %ymm23, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0x00,0x08,0x00,0x00]
+          vpmovuswb %ymm23, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x10,0x7a,0x80]
+          vpmovuswb %ymm23, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x10,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovuswb %ymm23, -2064(%rdx)
+
+// CHECK: vpmovwb %xmm17, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x30,0xcd]
+          vpmovwb %xmm17, %xmm21
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x09,0x30,0xcd]
+          vpmovwb %xmm17, %xmm21 {%k1}
+
+// CHECK: vpmovwb %xmm17, %xmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x89,0x30,0xcd]
+          vpmovwb %xmm17, %xmm21 {%k1} {z}
+
+// CHECK: vpmovwb %ymm23, %xmm26
+// CHECK:  encoding: [0x62,0x82,0x7e,0x28,0x30,0xfa]
+          vpmovwb %ymm23, %xmm26
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x2f,0x30,0xfa]
+          vpmovwb %ymm23, %xmm26 {%k7}
+
+// CHECK: vpmovwb %ymm23, %xmm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xaf,0x30,0xfa]
+          vpmovwb %ymm23, %xmm26 {%k7} {z}
+
+// CHECK: vpmovwb %xmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x29]
+          vpmovwb %xmm21, (%rcx)
+
+// CHECK: vpmovwb %xmm21, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0a,0x30,0x29]
+          vpmovwb %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovwb %xmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x30,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovwb %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %xmm21, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x7f]
+          vpmovwb %xmm21, 1016(%rdx)
+
+// CHECK: vpmovwb %xmm21, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0x00,0x04,0x00,0x00]
+          vpmovwb %xmm21, 1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0x6a,0x80]
+          vpmovwb %xmm21, -1024(%rdx)
+
+// CHECK: vpmovwb %xmm21, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x30,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovwb %xmm21, -1032(%rdx)
+
+// CHECK: vpmovwb %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x21]
+          vpmovwb %ymm20, (%rcx)
+
+// CHECK: vpmovwb %ymm20, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2c,0x30,0x21]
+          vpmovwb %ymm20, (%rcx) {%k4}
+
+// CHECK: vpmovwb %ymm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x30,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovwb %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovwb %ymm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x7f]
+          vpmovwb %ymm20, 2032(%rdx)
+
+// CHECK: vpmovwb %ymm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0x00,0x08,0x00,0x00]
+          vpmovwb %ymm20, 2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0x62,0x80]
+          vpmovwb %ymm20, -2048(%rdx)
+
+// CHECK: vpmovwb %ymm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x30,0xa2,0xf0,0xf7,0xff,0xff]
+          vpmovwb %ymm20, -2064(%rdx)
+
+// CHECK: vpmovswb %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x20,0xe0]
+          vpmovswb %xmm20, %xmm24
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0c,0x20,0xe0]
+          vpmovswb %xmm20, %xmm24 {%k4}
+
+// CHECK: vpmovswb %xmm20, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8c,0x20,0xe0]
+          vpmovswb %xmm20, %xmm24 {%k4} {z}
+
+// CHECK: vpmovswb %ymm18, %xmm27
+// CHECK:  encoding: [0x62,0x82,0x7e,0x28,0x20,0xd3]
+          vpmovswb %ymm18, %xmm27
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x29,0x20,0xd3]
+          vpmovswb %ymm18, %xmm27 {%k1}
+
+// CHECK: vpmovswb %ymm18, %xmm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xa9,0x20,0xd3]
+          vpmovswb %ymm18, %xmm27 {%k1} {z}
+
+// CHECK: vpmovswb %xmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x20,0x01]
+          vpmovswb %xmm24, (%rcx)
+
+// CHECK: vpmovswb %xmm24, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0b,0x20,0x01]
+          vpmovswb %xmm24, (%rcx) {%k3}
+
+// CHECK: vpmovswb %xmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x20,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovswb %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %xmm24, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x7f]
+          vpmovswb %xmm24, 1016(%rdx)
+
+// CHECK: vpmovswb %xmm24, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0x00,0x04,0x00,0x00]
+          vpmovswb %xmm24, 1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x20,0x42,0x80]
+          vpmovswb %xmm24, -1024(%rdx)
+
+// CHECK: vpmovswb %xmm24, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x20,0x82,0xf8,0xfb,0xff,0xff]
+          vpmovswb %xmm24, -1032(%rdx)
+
+// CHECK: vpmovswb %ymm27, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x20,0x19]
+          vpmovswb %ymm27, (%rcx)
+
+// CHECK: vpmovswb %ymm27, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2f,0x20,0x19]
+          vpmovswb %ymm27, (%rcx) {%k7}
+
+// CHECK: vpmovswb %ymm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovswb %ymm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovswb %ymm27, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x7f]
+          vpmovswb %ymm27, 2032(%rdx)
+
+// CHECK: vpmovswb %ymm27, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0x00,0x08,0x00,0x00]
+          vpmovswb %ymm27, 2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x20,0x5a,0x80]
+          vpmovswb %ymm27, -2048(%rdx)
+
+// CHECK: vpmovswb %ymm27, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x20,0x9a,0xf0,0xf7,0xff,0xff]
+          vpmovswb %ymm27, -2064(%rdx)
+
+// CHECK: vpmovuswb %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x10,0xdf]
+          vpmovuswb %xmm19, %xmm23
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x0c,0x10,0xdf]
+          vpmovuswb %xmm19, %xmm23 {%k4}
+
+// CHECK: vpmovuswb %xmm19, %xmm23 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x8c,0x10,0xdf]
+          vpmovuswb %xmm19, %xmm23 {%k4} {z}
+
+// CHECK: vpmovuswb %ymm23, %xmm28
+// CHECK:  encoding: [0x62,0x82,0x7e,0x28,0x10,0xfc]
+          vpmovuswb %ymm23, %xmm28
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x2e,0x10,0xfc]
+          vpmovuswb %ymm23, %xmm28 {%k6}
+
+// CHECK: vpmovuswb %ymm23, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xae,0x10,0xfc]
+          vpmovuswb %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vpmovuswb %xmm25, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x10,0x09]
+          vpmovuswb %xmm25, (%rcx)
+
+// CHECK: vpmovuswb %xmm25, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0b,0x10,0x09]
+          vpmovuswb %xmm25, (%rcx) {%k3}
+
+// CHECK: vpmovuswb %xmm25, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovuswb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %xmm25, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x7f]
+          vpmovuswb %xmm25, 1016(%rdx)
+
+// CHECK: vpmovuswb %xmm25, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
+          vpmovuswb %xmm25, 1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x10,0x4a,0x80]
+          vpmovuswb %xmm25, -1024(%rdx)
+
+// CHECK: vpmovuswb %xmm25, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovuswb %xmm25, -1032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x10,0x21]
+          vpmovuswb %ymm28, (%rcx)
+
+// CHECK: vpmovuswb %ymm28, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2a,0x10,0x21]
+          vpmovuswb %ymm28, (%rcx) {%k2}
+
+// CHECK: vpmovuswb %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x10,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovuswb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovuswb %ymm28, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x7f]
+          vpmovuswb %ymm28, 2032(%rdx)
+
+// CHECK: vpmovuswb %ymm28, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0x00,0x08,0x00,0x00]
+          vpmovuswb %ymm28, 2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x10,0x62,0x80]
+          vpmovuswb %ymm28, -2048(%rdx)
+
+// CHECK: vpmovuswb %ymm28, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x10,0xa2,0xf0,0xf7,0xff,0xff]
+          vpmovuswb %ymm28, -2064(%rdx)
+
 // CHECK: vpmulhuw %xmm18, %xmm21, %xmm24
 // CHECK:  encoding: [0x62,0x21,0x55,0x00,0xe4,0xc2]
           vpmulhuw %xmm18, %xmm21, %xmm24
@@ -6799,3 +7439,2387 @@
 // CHECK:  encoding: [0x62,0x62,0x5d,0x20,0x0b,0xa2,0xe0,0xef,0xff,0xff]
           vpmulhrsw -4128(%rdx), %ymm20, %ymm28
 
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x55,0x00,0x04,0xe4]
+          vpmaddubsw %xmm20, %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x55,0x06,0x04,0xe4]
+          vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6}
+
+// CHECK: vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x55,0x86,0x04,0xe4]
+          vpmaddubsw %xmm20, %xmm21, %xmm28 {%k6} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x04,0x21]
+          vpmaddubsw (%rcx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x55,0x00,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddubsw 291(%rax,%r14,8), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x7f]
+          vpmaddubsw 2032(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0x00,0x08,0x00,0x00]
+          vpmaddubsw 2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x04,0x62,0x80]
+          vpmaddubsw -2048(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x55,0x00,0x04,0xa2,0xf0,0xf7,0xff,0xff]
+          vpmaddubsw -2064(%rdx), %xmm21, %xmm28
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x02,0x2d,0x20,0x04,0xf2]
+          vpmaddubsw %ymm26, %ymm26, %ymm30
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x2d,0x25,0x04,0xf2]
+          vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5}
+
+// CHECK: vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x2d,0xa5,0x04,0xf2]
+          vpmaddubsw %ymm26, %ymm26, %ymm30 {%k5} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x2d,0x20,0x04,0x31]
+          vpmaddubsw (%rcx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x2d,0x20,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddubsw 291(%rax,%r14,8), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x7f]
+          vpmaddubsw 4064(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0x00,0x10,0x00,0x00]
+          vpmaddubsw 4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x2d,0x20,0x04,0x72,0x80]
+          vpmaddubsw -4096(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+// CHECK:  encoding: [0x62,0x62,0x2d,0x20,0x04,0xb2,0xe0,0xef,0xff,0xff]
+          vpmaddubsw -4128(%rdx), %ymm26, %ymm30
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0x81,0x3d,0x00,0xf5,0xcc]
+          vpmaddwd %xmm28, %xmm24, %xmm17
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x3d,0x01,0xf5,0xcc]
+          vpmaddwd %xmm28, %xmm24, %xmm17 {%k1}
+
+// CHECK: vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x3d,0x81,0xf5,0xcc]
+          vpmaddwd %xmm28, %xmm24, %xmm17 {%k1} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x09]
+          vpmaddwd (%rcx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x00,0xf5,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddwd 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x7f]
+          vpmaddwd 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0x00,0x08,0x00,0x00]
+          vpmaddwd 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x4a,0x80]
+          vpmaddwd -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf5,0x8a,0xf0,0xf7,0xff,0xff]
+          vpmaddwd -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x45,0x20,0xf5,0xc3]
+          vpmaddwd %ymm19, %ymm23, %ymm24
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x45,0x24,0xf5,0xc3]
+          vpmaddwd %ymm19, %ymm23, %ymm24 {%k4}
+
+// CHECK: vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x45,0xa4,0xf5,0xc3]
+          vpmaddwd %ymm19, %ymm23, %ymm24 {%k4} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xf5,0x01]
+          vpmaddwd (%rcx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x45,0x20,0xf5,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmaddwd 291(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x7f]
+          vpmaddwd 4064(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0x00,0x10,0x00,0x00]
+          vpmaddwd 4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xf5,0x42,0x80]
+          vpmaddwd -4096(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0xf5,0x82,0xe0,0xef,0xff,0xff]
+          vpmaddwd -4128(%rdx), %ymm23, %ymm24
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0x82,0x45,0x00,0x04,0xd9]
+          vpmaddubsw %xmm25, %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x45,0x02,0x04,0xd9]
+          vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2}
+
+// CHECK: vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x45,0x82,0x04,0xd9]
+          vpmaddubsw %xmm25, %xmm23, %xmm19 {%k2} {z}
+
+// CHECK: vpmaddubsw (%rcx), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x04,0x19]
+          vpmaddubsw (%rcx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x04,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpmaddubsw 4660(%rax,%r14,8), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x7f]
+          vpmaddubsw 2032(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0x00,0x08,0x00,0x00]
+          vpmaddubsw 2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x04,0x5a,0x80]
+          vpmaddubsw -2048(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x04,0x9a,0xf0,0xf7,0xff,0xff]
+          vpmaddubsw -2064(%rdx), %xmm23, %xmm19
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x65,0x20,0x04,0xce]
+          vpmaddubsw %ymm22, %ymm19, %ymm17
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x65,0x27,0x04,0xce]
+          vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7}
+
+// CHECK: vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x65,0xa7,0x04,0xce]
+          vpmaddubsw %ymm22, %ymm19, %ymm17 {%k7} {z}
+
+// CHECK: vpmaddubsw (%rcx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0x04,0x09]
+          vpmaddubsw (%rcx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x65,0x20,0x04,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpmaddubsw 4660(%rax,%r14,8), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x7f]
+          vpmaddubsw 4064(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0x00,0x10,0x00,0x00]
+          vpmaddubsw 4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0x04,0x4a,0x80]
+          vpmaddubsw -4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x65,0x20,0x04,0x8a,0xe0,0xef,0xff,0xff]
+          vpmaddubsw -4128(%rdx), %ymm19, %ymm17
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xfc]
+          vpmaddwd %xmm20, %xmm22, %xmm23
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x03,0xf5,0xfc]
+          vpmaddwd %xmm20, %xmm22, %xmm23 {%k3}
+
+// CHECK: vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x83,0xf5,0xfc]
+          vpmaddwd %xmm20, %xmm22, %xmm23 {%k3} {z}
+
+// CHECK: vpmaddwd (%rcx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x39]
+          vpmaddwd (%rcx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x00,0xf5,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vpmaddwd 4660(%rax,%r14,8), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2032(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x7f]
+          vpmaddwd 2032(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd 2048(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0x00,0x08,0x00,0x00]
+          vpmaddwd 2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2048(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf5,0x7a,0x80]
+          vpmaddwd -2048(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd -2064(%rdx), %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0xf5,0xba,0xf0,0xf7,0xff,0xff]
+          vpmaddwd -2064(%rdx), %xmm22, %xmm23
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x5d,0x20,0xf5,0xd9]
+          vpmaddwd %ymm17, %ymm20, %ymm19
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x5d,0x22,0xf5,0xd9]
+          vpmaddwd %ymm17, %ymm20, %ymm19 {%k2}
+
+// CHECK: vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x5d,0xa2,0xf5,0xd9]
+          vpmaddwd %ymm17, %ymm20, %ymm19 {%k2} {z}
+
+// CHECK: vpmaddwd (%rcx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x19]
+          vpmaddwd (%rcx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x5d,0x20,0xf5,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpmaddwd 4660(%rax,%r14,8), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4064(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x7f]
+          vpmaddwd 4064(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd 4096(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0x00,0x10,0x00,0x00]
+          vpmaddwd 4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4096(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x5a,0x80]
+          vpmaddwd -4096(%rdx), %ymm20, %ymm19
+
+// CHECK: vpmaddwd -4128(%rdx), %ymm20, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0xe0,0xef,0xff,0xff]
+          vpmaddwd -4128(%rdx), %ymm20, %ymm19
+
+// CHECK: vptestnmw %xmm24, %xmm29, %k4
+// CHECK:  encoding: [0x62,0x92,0x96,0x00,0x26,0xe0]
+          vptestnmw %xmm24, %xmm29, %k4
+
+// CHECK: vptestnmw %xmm24, %xmm29, %k4 {%k5}
+// CHECK:  encoding: [0x62,0x92,0x96,0x05,0x26,0xe0]
+          vptestnmw %xmm24, %xmm29, %k4 {%k5}
+
+// CHECK: vptestnmw (%rcx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x96,0x00,0x26,0x21]
+          vptestnmw (%rcx), %xmm29, %k4
+
+// CHECK: vptestnmw 291(%rax,%r14,8), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xb2,0x96,0x00,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vptestnmw 291(%rax,%r14,8), %xmm29, %k4
+
+// CHECK: vptestnmw 2032(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x96,0x00,0x26,0x62,0x7f]
+          vptestnmw 2032(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw 2048(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x96,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+          vptestnmw 2048(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw -2048(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x96,0x00,0x26,0x62,0x80]
+          vptestnmw -2048(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw -2064(%rdx), %xmm29, %k4
+// CHECK:  encoding: [0x62,0xf2,0x96,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+          vptestnmw -2064(%rdx), %xmm29, %k4
+
+// CHECK: vptestnmw %ymm17, %ymm28, %k2
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x20,0x26,0xd1]
+          vptestnmw %ymm17, %ymm28, %k2
+
+// CHECK: vptestnmw %ymm17, %ymm28, %k2 {%k2}
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x22,0x26,0xd1]
+          vptestnmw %ymm17, %ymm28, %k2 {%k2}
+
+// CHECK: vptestnmw (%rcx), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x20,0x26,0x11]
+          vptestnmw (%rcx), %ymm28, %k2
+
+// CHECK: vptestnmw 291(%rax,%r14,8), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xb2,0x9e,0x20,0x26,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vptestnmw 291(%rax,%r14,8), %ymm28, %k2
+
+// CHECK: vptestnmw 4064(%rdx), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x20,0x26,0x52,0x7f]
+          vptestnmw 4064(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw 4096(%rdx), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x20,0x26,0x92,0x00,0x10,0x00,0x00]
+          vptestnmw 4096(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw -4096(%rdx), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x20,0x26,0x52,0x80]
+          vptestnmw -4096(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw -4128(%rdx), %ymm28, %k2
+// CHECK:  encoding: [0x62,0xf2,0x9e,0x20,0x26,0x92,0xe0,0xef,0xff,0xff]
+          vptestnmw -4128(%rdx), %ymm28, %k2
+
+// CHECK: vptestnmw %xmm21, %xmm27, %k2
+// CHECK:  encoding: [0x62,0xb2,0xa6,0x00,0x26,0xd5]
+          vptestnmw %xmm21, %xmm27, %k2
+
+// CHECK: vptestnmw %xmm21, %xmm27, %k2 {%k3}
+// CHECK:  encoding: [0x62,0xb2,0xa6,0x03,0x26,0xd5]
+          vptestnmw %xmm21, %xmm27, %k2 {%k3}
+
+// CHECK: vptestnmw (%rcx), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xf2,0xa6,0x00,0x26,0x11]
+          vptestnmw (%rcx), %xmm27, %k2
+
+// CHECK: vptestnmw 4660(%rax,%r14,8), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xb2,0xa6,0x00,0x26,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmw 4660(%rax,%r14,8), %xmm27, %k2
+
+// CHECK: vptestnmw 2032(%rdx), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xf2,0xa6,0x00,0x26,0x52,0x7f]
+          vptestnmw 2032(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw 2048(%rdx), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xf2,0xa6,0x00,0x26,0x92,0x00,0x08,0x00,0x00]
+          vptestnmw 2048(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw -2048(%rdx), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xf2,0xa6,0x00,0x26,0x52,0x80]
+          vptestnmw -2048(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw -2064(%rdx), %xmm27, %k2
+// CHECK:  encoding: [0x62,0xf2,0xa6,0x00,0x26,0x92,0xf0,0xf7,0xff,0xff]
+          vptestnmw -2064(%rdx), %xmm27, %k2
+
+// CHECK: vptestnmw %ymm23, %ymm19, %k4
+// CHECK:  encoding: [0x62,0xb2,0xe6,0x20,0x26,0xe7]
+          vptestnmw %ymm23, %ymm19, %k4
+
+// CHECK: vptestnmw %ymm23, %ymm19, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb2,0xe6,0x21,0x26,0xe7]
+          vptestnmw %ymm23, %ymm19, %k4 {%k1}
+
+// CHECK: vptestnmw (%rcx), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xf2,0xe6,0x20,0x26,0x21]
+          vptestnmw (%rcx), %ymm19, %k4
+
+// CHECK: vptestnmw 4660(%rax,%r14,8), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xb2,0xe6,0x20,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmw 4660(%rax,%r14,8), %ymm19, %k4
+
+// CHECK: vptestnmw 4064(%rdx), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xf2,0xe6,0x20,0x26,0x62,0x7f]
+          vptestnmw 4064(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw 4096(%rdx), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xf2,0xe6,0x20,0x26,0xa2,0x00,0x10,0x00,0x00]
+          vptestnmw 4096(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw -4096(%rdx), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xf2,0xe6,0x20,0x26,0x62,0x80]
+          vptestnmw -4096(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmw -4128(%rdx), %ymm19, %k4
+// CHECK:  encoding: [0x62,0xf2,0xe6,0x20,0x26,0xa2,0xe0,0xef,0xff,0xff]
+          vptestnmw -4128(%rdx), %ymm19, %k4
+
+// CHECK: vptestnmb %xmm22, %xmm27, %k4
+// CHECK:  encoding: [0x62,0xb2,0x26,0x00,0x26,0xe6]
+          vptestnmb %xmm22, %xmm27, %k4
+
+// CHECK: vptestnmb %xmm22, %xmm27, %k4 {%k1}
+// CHECK:  encoding: [0x62,0xb2,0x26,0x01,0x26,0xe6]
+          vptestnmb %xmm22, %xmm27, %k4 {%k1}
+
+// CHECK: vptestnmb (%rcx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x21]
+          vptestnmb (%rcx), %xmm27, %k4
+
+// CHECK: vptestnmb 291(%rax,%r14,8), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xb2,0x26,0x00,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vptestnmb 291(%rax,%r14,8), %xmm27, %k4
+
+// CHECK: vptestnmb 2032(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x7f]
+          vptestnmb 2032(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb 2048(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+          vptestnmb 2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2048(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x80]
+          vptestnmb -2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2064(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+          vptestnmb -2064(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb %ymm17, %ymm25, %k5
+// CHECK:  encoding: [0x62,0xb2,0x36,0x20,0x26,0xe9]
+          vptestnmb %ymm17, %ymm25, %k5
+
+// CHECK: vptestnmb %ymm17, %ymm25, %k5 {%k6}
+// CHECK:  encoding: [0x62,0xb2,0x36,0x26,0x26,0xe9]
+          vptestnmb %ymm17, %ymm25, %k5 {%k6}
+
+// CHECK: vptestnmb (%rcx), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xf2,0x36,0x20,0x26,0x29]
+          vptestnmb (%rcx), %ymm25, %k5
+
+// CHECK: vptestnmb 291(%rax,%r14,8), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xb2,0x36,0x20,0x26,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vptestnmb 291(%rax,%r14,8), %ymm25, %k5
+
+// CHECK: vptestnmb 4064(%rdx), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xf2,0x36,0x20,0x26,0x6a,0x7f]
+          vptestnmb 4064(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb 4096(%rdx), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xf2,0x36,0x20,0x26,0xaa,0x00,0x10,0x00,0x00]
+          vptestnmb 4096(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb -4096(%rdx), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xf2,0x36,0x20,0x26,0x6a,0x80]
+          vptestnmb -4096(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb -4128(%rdx), %ymm25, %k5
+// CHECK:  encoding: [0x62,0xf2,0x36,0x20,0x26,0xaa,0xe0,0xef,0xff,0xff]
+          vptestnmb -4128(%rdx), %ymm25, %k5
+
+// CHECK: vptestnmb %xmm19, %xmm27, %k4
+// CHECK:  encoding: [0x62,0xb2,0x26,0x00,0x26,0xe3]
+          vptestnmb %xmm19, %xmm27, %k4
+
+// CHECK: vptestnmb %xmm19, %xmm27, %k4 {%k3}
+// CHECK:  encoding: [0x62,0xb2,0x26,0x03,0x26,0xe3]
+          vptestnmb %xmm19, %xmm27, %k4 {%k3}
+
+// CHECK: vptestnmb (%rcx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x21]
+          vptestnmb (%rcx), %xmm27, %k4
+
+// CHECK: vptestnmb 4660(%rax,%r14,8), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xb2,0x26,0x00,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmb 4660(%rax,%r14,8), %xmm27, %k4
+
+// CHECK: vptestnmb 2032(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x7f]
+          vptestnmb 2032(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb 2048(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0x00,0x08,0x00,0x00]
+          vptestnmb 2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2048(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0x62,0x80]
+          vptestnmb -2048(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb -2064(%rdx), %xmm27, %k4
+// CHECK:  encoding: [0x62,0xf2,0x26,0x00,0x26,0xa2,0xf0,0xf7,0xff,0xff]
+          vptestnmb -2064(%rdx), %xmm27, %k4
+
+// CHECK: vptestnmb %ymm24, %ymm28, %k4
+// CHECK:  encoding: [0x62,0x92,0x1e,0x20,0x26,0xe0]
+          vptestnmb %ymm24, %ymm28, %k4
+
+// CHECK: vptestnmb %ymm24, %ymm28, %k4 {%k1}
+// CHECK:  encoding: [0x62,0x92,0x1e,0x21,0x26,0xe0]
+          vptestnmb %ymm24, %ymm28, %k4 {%k1}
+
+// CHECK: vptestnmb (%rcx), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xf2,0x1e,0x20,0x26,0x21]
+          vptestnmb (%rcx), %ymm28, %k4
+
+// CHECK: vptestnmb 4660(%rax,%r14,8), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xb2,0x1e,0x20,0x26,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vptestnmb 4660(%rax,%r14,8), %ymm28, %k4
+
+// CHECK: vptestnmb 4064(%rdx), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xf2,0x1e,0x20,0x26,0x62,0x7f]
+          vptestnmb 4064(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb 4096(%rdx), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xf2,0x1e,0x20,0x26,0xa2,0x00,0x10,0x00,0x00]
+          vptestnmb 4096(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb -4096(%rdx), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xf2,0x1e,0x20,0x26,0x62,0x80]
+          vptestnmb -4096(%rdx), %ymm28, %k4
+
+// CHECK: vptestnmb -4128(%rdx), %ymm28, %k4
+// CHECK:  encoding: [0x62,0xf2,0x1e,0x20,0x26,0xa2,0xe0,0xef,0xff,0xff]
+          vptestnmb -4128(%rdx), %ymm28, %k4
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0x60,0xf4]
+          vpunpcklbw %xmm20, %xmm19, %xmm30
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x65,0x04,0x60,0xf4]
+          vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4}
+
+// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x65,0x84,0x60,0xf4]
+          vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0x60,0x31]
+          vpunpcklbw (%rcx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0x60,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 2032(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x7f]
+          vpunpcklbw 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw 2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0x00,0x08,0x00,0x00]
+          vpunpcklbw 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw -2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x80]
+          vpunpcklbw -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw -2064(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0xf0,0xf7,0xff,0xff]
+          vpunpcklbw -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x20,0x60,0xe6]
+          vpunpcklbw %ymm22, %ymm28, %ymm20
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x21,0x60,0xe6]
+          vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1}
+
+// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0xa1,0x60,0xe6]
+          vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z}
+
+// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x20,0x60,0x21]
+          vpunpcklbw (%rcx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x20,0x60,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x7f]
+          vpunpcklbw 4064(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0x00,0x10,0x00,0x00]
+          vpunpcklbw 4096(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x80]
+          vpunpcklbw -4096(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0xe0,0xef,0xff,0xff]
+          vpunpcklbw -4128(%rdx), %ymm28, %ymm20
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0x81,0x4d,0x00,0x68,0xd3]
+          vpunpckhbw %xmm27, %xmm22, %xmm18
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x4d,0x01,0x68,0xd3]
+          vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1}
+
+// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x4d,0x81,0x68,0xd3]
+          vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vpunpckhbw (%rcx), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0x68,0x11]
+          vpunpckhbw (%rcx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x4d,0x00,0x68,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 2032(%rdx), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x7f]
+          vpunpckhbw 2032(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw 2048(%rdx), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0x00,0x08,0x00,0x00]
+          vpunpckhbw 2048(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw -2048(%rdx), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x80]
+          vpunpckhbw -2048(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw -2064(%rdx), %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff]
+          vpunpckhbw -2064(%rdx), %xmm22, %xmm18
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0x81,0x5d,0x20,0x68,0xc8]
+          vpunpckhbw %ymm24, %ymm20, %ymm17
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x5d,0x25,0x68,0xc8]
+          vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5}
+
+// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x5d,0xa5,0x68,0xc8]
+          vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z}
+
+// CHECK: vpunpckhbw (%rcx), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0x68,0x09]
+          vpunpckhbw (%rcx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x5d,0x20,0x68,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4064(%rdx), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x7f]
+          vpunpckhbw 4064(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw 4096(%rdx), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0x00,0x10,0x00,0x00]
+          vpunpckhbw 4096(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw -4096(%rdx), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x80]
+          vpunpckhbw -4096(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpckhbw -4128(%rdx), %ymm20, %ymm17
+// CHECK:  encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0xe0,0xef,0xff,0xff]
+          vpunpckhbw -4128(%rdx), %ymm20, %ymm17
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x21,0x25,0x00,0x61,0xd9]
+          vpunpcklwd %xmm17, %xmm27, %xmm27
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x25,0x05,0x61,0xd9]
+          vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5}
+
+// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x25,0x85,0x61,0xd9]
+          vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z}
+
+// CHECK: vpunpcklwd (%rcx), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x25,0x00,0x61,0x19]
+          vpunpcklwd (%rcx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x21,0x25,0x00,0x61,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 2032(%rdx), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x7f]
+          vpunpcklwd 2032(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd 2048(%rdx), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0x00,0x08,0x00,0x00]
+          vpunpcklwd 2048(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd -2048(%rdx), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x80]
+          vpunpcklwd -2048(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd -2064(%rdx), %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0xf0,0xf7,0xff,0xff]
+          vpunpcklwd -2064(%rdx), %xmm27, %xmm27
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x35,0x20,0x61,0xd7]
+          vpunpcklwd %ymm23, %ymm25, %ymm18
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x35,0x25,0x61,0xd7]
+          vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5}
+
+// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x35,0xa5,0x61,0xd7]
+          vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z}
+
+// CHECK: vpunpcklwd (%rcx), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0x61,0x11]
+          vpunpcklwd (%rcx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x35,0x20,0x61,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4064(%rdx), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x7f]
+          vpunpcklwd 4064(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd 4096(%rdx), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0x00,0x10,0x00,0x00]
+          vpunpcklwd 4096(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd -4096(%rdx), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x80]
+          vpunpcklwd -4096(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpcklwd -4128(%rdx), %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0xe0,0xef,0xff,0xff]
+          vpunpcklwd -4128(%rdx), %ymm25, %ymm18
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x00,0x69,0xc9]
+          vpunpckhwd %xmm17, %xmm28, %xmm17
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x07,0x69,0xc9]
+          vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7}
+
+// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x87,0x69,0xc9]
+          vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z}
+
+// CHECK: vpunpckhwd (%rcx), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x00,0x69,0x09]
+          vpunpckhwd (%rcx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x1d,0x00,0x69,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 2032(%rdx), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x7f]
+          vpunpckhwd 2032(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd 2048(%rdx), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00]
+          vpunpckhwd 2048(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd -2048(%rdx), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x80]
+          vpunpckhwd -2048(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd -2064(%rdx), %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff]
+          vpunpckhwd -2064(%rdx), %xmm28, %xmm17
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x35,0x20,0x69,0xc4]
+          vpunpckhwd %ymm20, %ymm25, %ymm24
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1}
+// CHECK:  encoding: [0x62,0x21,0x35,0x21,0x69,0xc4]
+          vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1}
+
+// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0x35,0xa1,0x69,0xc4]
+          vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z}
+
+// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x01]
+          vpunpckhwd (%rcx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x35,0x20,0x69,0x84,0xf0,0x34,0x12,0x00,0x00]
+          vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x7f]
+          vpunpckhwd 4064(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0x00,0x10,0x00,0x00]
+          vpunpckhwd 4096(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x80]
+          vpunpckhwd -4096(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0xe0,0xef,0xff,0xff]
+          vpunpckhwd -4128(%rdx), %ymm25, %ymm24
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x15,0x00,0x60,0xd7]
+          vpunpcklbw %xmm23, %xmm29, %xmm18
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0x15,0x04,0x60,0xd7]
+          vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4}
+
+// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0x15,0x84,0x60,0xd7]
+          vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x15,0x00,0x60,0x11]
+          vpunpcklbw (%rcx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x15,0x00,0x60,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 2032(%rdx), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x7f]
+          vpunpcklbw 2032(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw 2048(%rdx), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0x00,0x08,0x00,0x00]
+          vpunpcklbw 2048(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw -2048(%rdx), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x80]
+          vpunpcklbw -2048(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw -2064(%rdx), %xmm29, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0xf0,0xf7,0xff,0xff]
+          vpunpcklbw -2064(%rdx), %xmm29, %xmm18
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x21,0x1d,0x20,0x60,0xdd]
+          vpunpcklbw %ymm21, %ymm28, %ymm27
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x1d,0x24,0x60,0xdd]
+          vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4}
+
+// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x1d,0xa4,0x60,0xdd]
+          vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z}
+
+// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0x60,0x19]
+          vpunpcklbw (%rcx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x21,0x1d,0x20,0x60,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x7f]
+          vpunpcklbw 4064(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0x00,0x10,0x00,0x00]
+          vpunpcklbw 4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x80]
+          vpunpcklbw -4096(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm27
+// CHECK:  encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0xe0,0xef,0xff,0xff]
+          vpunpcklbw -4128(%rdx), %ymm28, %ymm27
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0x81,0x55,0x00,0x68,0xd0]
+          vpunpckhbw %xmm24, %xmm21, %xmm18
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6}
+// CHECK:  encoding: [0x62,0x81,0x55,0x06,0x68,0xd0]
+          vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6}
+
+// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0x55,0x86,0x68,0xd0]
+          vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z}
+
+// CHECK: vpunpckhbw (%rcx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x00,0x68,0x11]
+          vpunpckhbw (%rcx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x55,0x00,0x68,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 2032(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x7f]
+          vpunpckhbw 2032(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw 2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0x00,0x08,0x00,0x00]
+          vpunpckhbw 2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw -2048(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x80]
+          vpunpckhbw -2048(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw -2064(%rdx), %xmm21, %xmm18
+// CHECK:  encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff]
+          vpunpckhbw -2064(%rdx), %xmm21, %xmm18
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x2d,0x20,0x68,0xe7]
+          vpunpckhbw %ymm23, %ymm26, %ymm20
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x2d,0x21,0x68,0xe7]
+          vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1}
+
+// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x2d,0xa1,0x68,0xe7]
+          vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z}
+
+// CHECK: vpunpckhbw (%rcx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0x68,0x21]
+          vpunpckhbw (%rcx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x2d,0x20,0x68,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 4064(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x7f]
+          vpunpckhbw 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw 4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0x00,0x10,0x00,0x00]
+          vpunpckhbw 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw -4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x80]
+          vpunpckhbw -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpckhbw -4128(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0xe0,0xef,0xff,0xff]
+          vpunpckhbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x35,0x00,0x61,0xed]
+          vpunpcklwd %xmm21, %xmm25, %xmm21
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x35,0x06,0x61,0xed]
+          vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6}
+
+// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x35,0x86,0x61,0xed]
+          vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z}
+
+// CHECK: vpunpcklwd (%rcx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0x61,0x29]
+          vpunpcklwd (%rcx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x35,0x00,0x61,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 2032(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x7f]
+          vpunpcklwd 2032(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd 2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0x00,0x08,0x00,0x00]
+          vpunpcklwd 2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd -2048(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x80]
+          vpunpcklwd -2048(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd -2064(%rdx), %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0xf0,0xf7,0xff,0xff]
+          vpunpcklwd -2064(%rdx), %xmm25, %xmm21
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x01,0x45,0x20,0x61,0xd2]
+          vpunpcklwd %ymm26, %ymm23, %ymm26
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x45,0x22,0x61,0xd2]
+          vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2}
+
+// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x45,0xa2,0x61,0xd2]
+          vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z}
+
+// CHECK: vpunpcklwd (%rcx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0x61,0x11]
+          vpunpcklwd (%rcx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x21,0x45,0x20,0x61,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 4064(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x7f]
+          vpunpcklwd 4064(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd 4096(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0x00,0x10,0x00,0x00]
+          vpunpcklwd 4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd -4096(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x80]
+          vpunpcklwd -4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpcklwd -4128(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0xe0,0xef,0xff,0xff]
+          vpunpcklwd -4128(%rdx), %ymm23, %ymm26
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x6d,0x00,0x69,0xcf]
+          vpunpckhwd %xmm23, %xmm18, %xmm17
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x6d,0x03,0x69,0xcf]
+          vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3}
+
+// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x6d,0x83,0x69,0xcf]
+          vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z}
+
+// CHECK: vpunpckhwd (%rcx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0x69,0x09]
+          vpunpckhwd (%rcx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x6d,0x00,0x69,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 2032(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x7f]
+          vpunpckhwd 2032(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd 2048(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00]
+          vpunpckhwd 2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd -2048(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x80]
+          vpunpckhwd -2048(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd -2064(%rdx), %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff]
+          vpunpckhwd -2064(%rdx), %xmm18, %xmm17
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x01,0x35,0x20,0x69,0xe2]
+          vpunpckhwd %ymm26, %ymm25, %ymm28
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x01,0x35,0x24,0x69,0xe2]
+          vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4}
+
+// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0x35,0xa4,0x69,0xe2]
+          vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z}
+
+// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x21]
+          vpunpckhwd (%rcx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x21,0x35,0x20,0x69,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x7f]
+          vpunpckhwd 4064(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0x00,0x10,0x00,0x00]
+          vpunpckhwd 4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x80]
+          vpunpckhwd -4096(%rdx), %ymm25, %ymm28
+
+// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm28
+// CHECK:  encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff]
+          vpunpckhwd -4128(%rdx), %ymm25, %ymm28
+
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0xab]
+          vpalignr $171, %xmm21, %xmm26, %xmm19
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x04,0x0f,0xdd,0xab]
+          vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4}
+
+// CHECK: vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x84,0x0f,0xdd,0xab]
+          vpalignr $171, %xmm21, %xmm26, %xmm19 {%k4} {z}
+
+// CHECK: vpalignr $123, %xmm21, %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x00,0x0f,0xdd,0x7b]
+          vpalignr $123, %xmm21, %xmm26, %xmm19
+
+// CHECK: vpalignr $123, (%rcx), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x19,0x7b]
+          vpalignr $123, (%rcx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xa3,0x2d,0x00,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpalignr $123, 291(%rax,%r14,8), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 2032(%rdx), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x7f,0x7b]
+          vpalignr $123, 2032(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, 2048(%rdx), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpalignr $123, 2048(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, -2048(%rdx), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x5a,0x80,0x7b]
+          vpalignr $123, -2048(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $123, -2064(%rdx), %xmm26, %xmm19
+// CHECK:  encoding: [0x62,0xe3,0x2d,0x00,0x0f,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpalignr $123, -2064(%rdx), %xmm26, %xmm19
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0xab]
+          vpalignr $171, %ymm22, %ymm21, %ymm27
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
+// CHECK:  encoding: [0x62,0x23,0x55,0x22,0x0f,0xde,0xab]
+          vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2}
+
+// CHECK: vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x23,0x55,0xa2,0x0f,0xde,0xab]
+          vpalignr $171, %ymm22, %ymm21, %ymm27 {%k2} {z}
+
+// CHECK: vpalignr $123, %ymm22, %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x23,0x55,0x20,0x0f,0xde,0x7b]
+          vpalignr $123, %ymm22, %ymm21, %ymm27
+
+// CHECK: vpalignr $123, (%rcx), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x63,0x55,0x20,0x0f,0x19,0x7b]
+          vpalignr $123, (%rcx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x23,0x55,0x20,0x0f,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpalignr $123, 291(%rax,%r14,8), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 4064(%rdx), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x7f,0x7b]
+          vpalignr $123, 4064(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, 4096(%rdx), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpalignr $123, 4096(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, -4096(%rdx), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x63,0x55,0x20,0x0f,0x5a,0x80,0x7b]
+          vpalignr $123, -4096(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $123, -4128(%rdx), %ymm21, %ymm27
+// CHECK:  encoding: [0x62,0x63,0x55,0x20,0x0f,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpalignr $123, -4128(%rdx), %ymm21, %ymm27
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0xab]
+          vpalignr $0xab, %xmm25, %xmm20, %xmm30
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x03,0x5d,0x02,0x0f,0xf1,0xab]
+          vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2}
+
+// CHECK: vpalignr $171, %xmm25, %xmm20, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x03,0x5d,0x82,0x0f,0xf1,0xab]
+          vpalignr $0xab, %xmm25, %xmm20, %xmm30 {%k2} {z}
+
+// CHECK: vpalignr $123, %xmm25, %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x03,0x5d,0x00,0x0f,0xf1,0x7b]
+          vpalignr $0x7b, %xmm25, %xmm20, %xmm30
+
+// CHECK: vpalignr $123, (%rcx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x5d,0x00,0x0f,0x31,0x7b]
+          vpalignr $0x7b,(%rcx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 4660(%rax,%r14,8), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x5d,0x00,0x0f,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpalignr $0x7b,4660(%rax,%r14,8), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 2032(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x7f,0x7b]
+          vpalignr $0x7b,2032(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, 2048(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vpalignr $0x7b,2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, -2048(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x5d,0x00,0x0f,0x72,0x80,0x7b]
+          vpalignr $0x7b,-2048(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $123, -2064(%rdx), %xmm20, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x5d,0x00,0x0f,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpalignr $0x7b,-2064(%rdx), %xmm20, %xmm30
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0xab]
+          vpalignr $0xab, %ymm27, %ymm17, %ymm21
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0x83,0x75,0x27,0x0f,0xeb,0xab]
+          vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7}
+
+// CHECK: vpalignr $171, %ymm27, %ymm17, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0x83,0x75,0xa7,0x0f,0xeb,0xab]
+          vpalignr $0xab, %ymm27, %ymm17, %ymm21 {%k7} {z}
+
+// CHECK: vpalignr $123, %ymm27, %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0x83,0x75,0x20,0x0f,0xeb,0x7b]
+          vpalignr $0x7b, %ymm27, %ymm17, %ymm21
+
+// CHECK: vpalignr $123, (%rcx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0x75,0x20,0x0f,0x29,0x7b]
+          vpalignr $0x7b,(%rcx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4660(%rax,%r14,8), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0x75,0x20,0x0f,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpalignr $0x7b,4660(%rax,%r14,8), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4064(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x7f,0x7b]
+          vpalignr $0x7b,4064(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, 4096(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vpalignr $0x7b,4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, -4096(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0x75,0x20,0x0f,0x6a,0x80,0x7b]
+          vpalignr $0x7b,-4096(%rdx), %ymm17, %ymm21
+
+// CHECK: vpalignr $123, -4128(%rdx), %ymm17, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0x75,0x20,0x0f,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vpalignr $0x7b,-4128(%rdx), %ymm17, %ymm21
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0x15,0x00,0x42,0xcc,0xab]
+          vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17 {%k4}
+// CHECK:  encoding: [0x62,0xa3,0x15,0x04,0x42,0xcc,0xab]
+          vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17 {%k4}
+
+// CHECK: vdbpsadbw $171, %xmm20, %xmm29, %xmm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa3,0x15,0x84,0x42,0xcc,0xab]
+          vdbpsadbw $0xab, %xmm20, %xmm29, %xmm17 {%k4} {z}
+
+// CHECK: vdbpsadbw $123, %xmm20, %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0x15,0x00,0x42,0xcc,0x7b]
+          vdbpsadbw $0x7b, %xmm20, %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, (%rcx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x42,0x09,0x7b]
+          vdbpsadbw $0x7b,(%rcx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 4660(%rax,%r14,8), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0x15,0x00,0x42,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vdbpsadbw $0x7b,4660(%rax,%r14,8), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 2032(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x42,0x4a,0x7f,0x7b]
+          vdbpsadbw $0x7b,2032(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, 2048(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x42,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vdbpsadbw $0x7b,2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, -2048(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x42,0x4a,0x80,0x7b]
+          vdbpsadbw $0x7b,-2048(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $123, -2064(%rdx), %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x42,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vdbpsadbw $0x7b,-2064(%rdx), %xmm29, %xmm17
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x03,0x1d,0x20,0x42,0xd2,0xab]
+          vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26 {%k4}
+// CHECK:  encoding: [0x62,0x03,0x1d,0x24,0x42,0xd2,0xab]
+          vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26 {%k4}
+
+// CHECK: vdbpsadbw $171, %ymm26, %ymm28, %ymm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0x1d,0xa4,0x42,0xd2,0xab]
+          vdbpsadbw $0xab, %ymm26, %ymm28, %ymm26 {%k4} {z}
+
+// CHECK: vdbpsadbw $123, %ymm26, %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x03,0x1d,0x20,0x42,0xd2,0x7b]
+          vdbpsadbw $0x7b, %ymm26, %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, (%rcx), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x1d,0x20,0x42,0x11,0x7b]
+          vdbpsadbw $0x7b,(%rcx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4660(%rax,%r14,8), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x23,0x1d,0x20,0x42,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vdbpsadbw $0x7b,4660(%rax,%r14,8), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4064(%rdx), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x1d,0x20,0x42,0x52,0x7f,0x7b]
+          vdbpsadbw $0x7b,4064(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, 4096(%rdx), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x1d,0x20,0x42,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vdbpsadbw $0x7b,4096(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, -4096(%rdx), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x1d,0x20,0x42,0x52,0x80,0x7b]
+          vdbpsadbw $0x7b,-4096(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $123, -4128(%rdx), %ymm28, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x1d,0x20,0x42,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vdbpsadbw $0x7b,-4128(%rdx), %ymm28, %ymm26
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x35,0x00,0x42,0xf1,0xab]
+          vdbpsadbw $171, %xmm17, %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3}
+// CHECK:  encoding: [0x62,0xa3,0x35,0x03,0x42,0xf1,0xab]
+          vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3}
+
+// CHECK: vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa3,0x35,0x83,0x42,0xf1,0xab]
+          vdbpsadbw $171, %xmm17, %xmm25, %xmm22 {%k3} {z}
+
+// CHECK: vdbpsadbw $123, %xmm17, %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x35,0x00,0x42,0xf1,0x7b]
+          vdbpsadbw $123, %xmm17, %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, (%rcx), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x42,0x31,0x7b]
+          vdbpsadbw $123, (%rcx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x35,0x00,0x42,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vdbpsadbw $123, 291(%rax,%r14,8), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 2032(%rdx), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x42,0x72,0x7f,0x7b]
+          vdbpsadbw $123, 2032(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, 2048(%rdx), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x42,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vdbpsadbw $123, 2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, -2048(%rdx), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x42,0x72,0x80,0x7b]
+          vdbpsadbw $123, -2048(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $123, -2064(%rdx), %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x42,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vdbpsadbw $123, -2064(%rdx), %xmm25, %xmm22
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0x65,0x20,0x42,0xcc,0xab]
+          vdbpsadbw $171, %ymm20, %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5}
+// CHECK:  encoding: [0x62,0xa3,0x65,0x25,0x42,0xcc,0xab]
+          vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5}
+
+// CHECK: vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa3,0x65,0xa5,0x42,0xcc,0xab]
+          vdbpsadbw $171, %ymm20, %ymm19, %ymm17 {%k5} {z}
+
+// CHECK: vdbpsadbw $123, %ymm20, %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0x65,0x20,0x42,0xcc,0x7b]
+          vdbpsadbw $123, %ymm20, %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, (%rcx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x09,0x7b]
+          vdbpsadbw $123, (%rcx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 291(%rax,%r14,8), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0x65,0x20,0x42,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vdbpsadbw $123, 291(%rax,%r14,8), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 4064(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x4a,0x7f,0x7b]
+          vdbpsadbw $123, 4064(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, 4096(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vdbpsadbw $123, 4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, -4096(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x4a,0x80,0x7b]
+          vdbpsadbw $123, -4096(%rdx), %ymm19, %ymm17
+
+// CHECK: vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x65,0x20,0x42,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vdbpsadbw $123, -4128(%rdx), %ymm19, %ymm17
+// CHECK: vpslldq $171, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0xab]
+          vpslldq $171, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, %xmm24, %xmm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x00,0x73,0xf8,0x7b]
+          vpslldq $123, %xmm24, %xmm20
+
+// CHECK: vpslldq $123, (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %xmm20
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x00,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 2032(%rdx), %xmm20
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+          vpslldq $123, 2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -2048(%rdx), %xmm20
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+          vpslldq $123, -2064(%rdx), %xmm20
+
+// CHECK: vpslldq $171, %ymm25, %ymm26
+// CHECK:  encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0xab]
+          vpslldq $171, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, %ymm25, %ymm26
+// CHECK:  encoding: [0x62,0x91,0x2d,0x20,0x73,0xf9,0x7b]
+          vpslldq $123, %ymm25, %ymm26
+
+// CHECK: vpslldq $123, (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x39,0x7b]
+          vpslldq $123, (%rcx), %ymm26
+
+// CHECK: vpslldq $123, 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0xb1,0x2d,0x20,0x73,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpslldq $123, 291(%rax,%r14,8), %ymm26
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x7f,0x7b]
+          vpslldq $123, 4064(%rdx), %ymm26
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+          vpslldq $123, 4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0x7a,0x80,0x7b]
+          vpslldq $123, -4096(%rdx), %ymm26
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0xf1,0x2d,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+          vpslldq $123, -4128(%rdx), %ymm26
+
+// CHECK: vpslldq $171, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0xab]
+          vpslldq $0xab, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, %xmm19, %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xfb,0x7b]
+          vpslldq $0x7b, %xmm19, %xmm23
+
+// CHECK: vpslldq $123, (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x39,0x7b]
+          vpslldq $0x7b,(%rcx), %xmm23
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xb1,0x45,0x00,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpslldq $0x7b,4660(%rax,%r14,8), %xmm23
+
+// CHECK: vpslldq $123, 2032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x7f,0x7b]
+          vpslldq $0x7b,2032(%rdx), %xmm23
+
+// CHECK: vpslldq $123, 2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0x00,0x08,0x00,0x00,0x7b]
+          vpslldq $0x7b,2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0x7a,0x80,0x7b]
+          vpslldq $0x7b,-2048(%rdx), %xmm23
+
+// CHECK: vpslldq $123, -2064(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xf1,0x45,0x00,0x73,0xba,0xf0,0xf7,0xff,0xff,0x7b]
+          vpslldq $0x7b,-2064(%rdx), %xmm23
+
+// CHECK: vpslldq $171, %ymm25, %ymm29
+// CHECK:  encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0xab]
+          vpslldq $0xab, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, %ymm25, %ymm29
+// CHECK:  encoding: [0x62,0x91,0x15,0x20,0x73,0xf9,0x7b]
+          vpslldq $0x7b, %ymm25, %ymm29
+
+// CHECK: vpslldq $123, (%rcx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x39,0x7b]
+          vpslldq $0x7b,(%rcx), %ymm29
+
+// CHECK: vpslldq $123, 4660(%rax,%r14,8), %ymm29
+// CHECK:  encoding: [0x62,0xb1,0x15,0x20,0x73,0xbc,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpslldq $0x7b,4660(%rax,%r14,8), %ymm29
+
+// CHECK: vpslldq $123, 4064(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x7f,0x7b]
+          vpslldq $0x7b,4064(%rdx), %ymm29
+
+// CHECK: vpslldq $123, 4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0x00,0x10,0x00,0x00,0x7b]
+          vpslldq $0x7b,4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4096(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0x7a,0x80,0x7b]
+          vpslldq $0x7b,-4096(%rdx), %ymm29
+
+// CHECK: vpslldq $123, -4128(%rdx), %ymm29
+// CHECK:  encoding: [0x62,0xf1,0x15,0x20,0x73,0xba,0xe0,0xef,0xff,0xff,0x7b]
+          vpslldq $0x7b,-4128(%rdx), %ymm29
+
+// CHECK: vpsrldq $171, %xmm21, %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0xab]
+          vpsrldq $171, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, %xmm21, %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0xdd,0x7b]
+          vpsrldq $123, %xmm21, %xmm24
+
+// CHECK: vpsrldq $123, (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %xmm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x00,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 2032(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpsrldq $123, 2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -2048(%rdx), %xmm24
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpsrldq $123, -2064(%rdx), %xmm24
+
+// CHECK: vpsrldq $171, %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0xab]
+          vpsrldq $171, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, %ymm25, %ymm24
+// CHECK:  encoding: [0x62,0x91,0x3d,0x20,0x73,0xd9,0x7b]
+          vpsrldq $123, %ymm25, %ymm24
+
+// CHECK: vpsrldq $123, (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x19,0x7b]
+          vpsrldq $123, (%rcx), %ymm24
+
+// CHECK: vpsrldq $123, 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0xb1,0x3d,0x20,0x73,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpsrldq $123, 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $123, 4064(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpsrldq $123, 4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x5a,0x80,0x7b]
+          vpsrldq $123, -4096(%rdx), %ymm24
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0xf1,0x3d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpsrldq $123, -4128(%rdx), %ymm24
+
+// CHECK: vpsrldq $171, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0xab]
+          vpsrldq $0xab, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0xd9,0x7b]
+          vpsrldq $0x7b, %xmm17, %xmm18
+
+// CHECK: vpsrldq $123, (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x19,0x7b]
+          vpsrldq $0x7b,(%rcx), %xmm18
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xb1,0x6d,0x00,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpsrldq $123, 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpsrldq $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x5a,0x80,0x7b]
+          vpsrldq $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vpsrldq $123, -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xf1,0x6d,0x00,0x73,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpsrldq $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vpsrldq $171, %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0xab]
+          vpsrldq $0xab, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0x91,0x5d,0x20,0x73,0xdc,0x7b]
+          vpsrldq $0x7b, %ymm28, %ymm20
+
+// CHECK: vpsrldq $123, (%rcx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x19,0x7b]
+          vpsrldq $0x7b,(%rcx), %ymm20
+
+// CHECK: vpsrldq $123, 4660(%rax,%r14,8), %ymm20
+// CHECK:  encoding: [0x62,0xb1,0x5d,0x20,0x73,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4660(%rax,%r14,8), %ymm20
+
+// CHECK: vpsrldq $123, 4064(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x7f,0x7b]
+          vpsrldq $0x7b,4064(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, 4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vpsrldq $0x7b,4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x5a,0x80,0x7b]
+          vpsrldq $0x7b,-4096(%rdx), %ymm20
+
+// CHECK: vpsrldq $123, -4128(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xf1,0x5d,0x20,0x73,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vpsrldq $0x7b,-4128(%rdx), %ymm20
+
+// CHECK: vpsadbw %xmm24, %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0x81,0x3d,0x00,0xf6,0xc8]
+          vpsadbw %xmm24, %xmm24, %xmm17
+
+// CHECK: vpsadbw (%rcx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x09]
+          vpsadbw (%rcx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x3d,0x00,0xf6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2032(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x7f]
+          vpsadbw 2032(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw 2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0x00,0x08,0x00,0x00]
+          vpsadbw 2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2048(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x4a,0x80]
+          vpsadbw -2048(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw -2064(%rdx), %xmm24, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x3d,0x00,0xf6,0x8a,0xf0,0xf7,0xff,0xff]
+          vpsadbw -2064(%rdx), %xmm24, %xmm17
+
+// CHECK: vpsadbw %ymm24, %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0x81,0x25,0x20,0xf6,0xd8]
+          vpsadbw %ymm24, %ymm27, %ymm19
+
+// CHECK: vpsadbw (%rcx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x19]
+          vpsadbw (%rcx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x25,0x20,0xf6,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpsadbw 291(%rax,%r14,8), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4064(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x7f]
+          vpsadbw 4064(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw 4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0x00,0x10,0x00,0x00]
+          vpsadbw 4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4096(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x5a,0x80]
+          vpsadbw -4096(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw -4128(%rdx), %ymm27, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x25,0x20,0xf6,0x9a,0xe0,0xef,0xff,0xff]
+          vpsadbw -4128(%rdx), %ymm27, %ymm19
+
+// CHECK: vpsadbw %xmm21, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0xf6,0xf5]
+          vpsadbw %xmm21, %xmm19, %xmm30
+
+// CHECK: vpsadbw (%rcx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x31]
+          vpsadbw (%rcx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x65,0x00,0xf6,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpsadbw 4660(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2032(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x7f]
+          vpsadbw 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw 2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0x00,0x08,0x00,0x00]
+          vpsadbw 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0x72,0x80]
+          vpsadbw -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw -2064(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x65,0x00,0xf6,0xb2,0xf0,0xf7,0xff,0xff]
+          vpsadbw -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vpsadbw %ymm27, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x2d,0x20,0xf6,0xe3]
+          vpsadbw %ymm27, %ymm26, %ymm20
+
+// CHECK: vpsadbw (%rcx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x21]
+          vpsadbw (%rcx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x2d,0x20,0xf6,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vpsadbw 4660(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4064(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x7f]
+          vpsadbw 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw 4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0x00,0x10,0x00,0x00]
+          vpsadbw 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0x62,0x80]
+          vpsadbw -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff]
+          vpsadbw -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vpbroadcastb %xmm28, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0c,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30 {%k4}
+
+// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8c,0x78,0xf4]
+          vpbroadcastb %xmm28, %xmm30 {%k4} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x31]
+          vpbroadcastb (%rcx), %xmm30
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x78,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastb 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x7f]
+          vpbroadcastb 127(%rdx), %xmm30
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x80]
+          vpbroadcastb -128(%rdx), %xmm30
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %xmm30
+
+// CHECK: vpbroadcastb %xmm25, %ymm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2a,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17 {%k2}
+
+// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaa,0x78,0xc9]
+          vpbroadcastb %xmm25, %ymm17 {%k2} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x09]
+          vpbroadcastb (%rcx), %ymm17
+
+// CHECK: vpbroadcastb 4660(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x78,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastb 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x7f]
+          vpbroadcastb 127(%rdx), %ymm17
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x80]
+          vpbroadcastb -128(%rdx), %ymm17
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %ymm17
+
+// CHECK: vpbroadcastb %eax, %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xe0]
+          vpbroadcastb %eax, %xmm20 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2e,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27 {%k6}
+
+// CHECK: vpbroadcastb %eax, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xae,0x7a,0xd8]
+          vpbroadcastb %eax, %ymm27 {%k6} {z}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x09,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30 {%k1}
+
+// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x89,0x79,0xf0]
+          vpbroadcastw %xmm24, %xmm30 {%k1} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x31]
+          vpbroadcastw (%rcx), %xmm30
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x79,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastw 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x7f]
+          vpbroadcastw 254(%rdx), %xmm30
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x80]
+          vpbroadcastw -256(%rdx), %xmm30
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %xmm30
+
+// CHECK: vpbroadcastw %xmm18, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2b,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28 {%k3}
+
+// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xab,0x79,0xe2]
+          vpbroadcastw %xmm18, %ymm28 {%k3} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x21]
+          vpbroadcastw (%rcx), %ymm28
+
+// CHECK: vpbroadcastw 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vpbroadcastw 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x7f]
+          vpbroadcastw 254(%rdx), %ymm28
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x80]
+          vpbroadcastw -256(%rdx), %ymm28
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %ymm28
+
+// CHECK: vpbroadcastw %eax, %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x0e,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24 {%k6}
+
+// CHECK: vpbroadcastw %eax, %xmm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x8e,0x7b,0xc0]
+          vpbroadcastw %eax, %xmm24 {%k6} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x2b,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19 {%k3}
+
+// CHECK: vpbroadcastw %eax, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xab,0x7b,0xd8]
+          vpbroadcastw %eax, %ymm19 {%k3} {z}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0f,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20 {%k7}
+
+// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8f,0x78,0xe4]
+          vpbroadcastb %xmm20, %xmm20 {%k7} {z}
+
+// CHECK: vpbroadcastb (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x21]
+          vpbroadcastb (%rcx), %xmm20
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpbroadcastb 127(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x7f]
+          vpbroadcastb 127(%rdx), %xmm20
+
+// CHECK: vpbroadcastb 128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x80]
+          vpbroadcastb -128(%rdx), %xmm20
+
+// CHECK: vpbroadcastb -129(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %xmm20
+
+// CHECK: vpbroadcastb %xmm27, %ymm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2e,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30 {%k6}
+
+// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xae,0x78,0xf3]
+          vpbroadcastb %xmm27, %ymm30 {%k6} {z}
+
+// CHECK: vpbroadcastb (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x31]
+          vpbroadcastb (%rcx), %ymm30
+
+// CHECK: vpbroadcastb 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x78,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastb 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpbroadcastb 127(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x7f]
+          vpbroadcastb 127(%rdx), %ymm30
+
+// CHECK: vpbroadcastb 128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x80,0x00,0x00,0x00]
+          vpbroadcastb 128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x80]
+          vpbroadcastb -128(%rdx), %ymm30
+
+// CHECK: vpbroadcastb -129(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x7f,0xff,0xff,0xff]
+          vpbroadcastb -129(%rdx), %ymm30
+
+// CHECK: vpbroadcastb %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3}
+
+// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0]
+          vpbroadcastb %eax, %xmm22 {%k3} {z}
+
+// CHECK: vpbroadcastb %eax, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1}
+
+// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8]
+          vpbroadcastb %eax, %ymm17 {%k1} {z}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0a,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19 {%k2}
+
+// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8a,0x79,0xdc]
+          vpbroadcastw %xmm20, %xmm19 {%k2} {z}
+
+// CHECK: vpbroadcastw (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x19]
+          vpbroadcastw (%rcx), %xmm19
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x79,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpbroadcastw 254(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x7f]
+          vpbroadcastw 254(%rdx), %xmm19
+
+// CHECK: vpbroadcastw 256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x80]
+          vpbroadcastw -256(%rdx), %xmm19
+
+// CHECK: vpbroadcastw -258(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %xmm19
+
+// CHECK: vpbroadcastw %xmm17, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25 {%k7}
+
+// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x79,0xc9]
+          vpbroadcastw %xmm17, %ymm25 {%k7} {z}
+
+// CHECK: vpbroadcastw (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x09]
+          vpbroadcastw (%rcx), %ymm25
+
+// CHECK: vpbroadcastw 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x79,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastw 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpbroadcastw 254(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x7f]
+          vpbroadcastw 254(%rdx), %ymm25
+
+// CHECK: vpbroadcastw 256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0x00,0x01,0x00,0x00]
+          vpbroadcastw 256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x80]
+          vpbroadcastw -256(%rdx), %ymm25
+
+// CHECK: vpbroadcastw -258(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0xfe,0xfe,0xff,0xff]
+          vpbroadcastw -258(%rdx), %ymm25
+
+// CHECK: vpbroadcastw %eax, %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1}
+
+// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8]
+          vpbroadcastw %eax, %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastw %eax, %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4}
+
+// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0]
+          vpbroadcastw %eax, %ymm28 {%k4} {z}
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0x7f,0xdf]
+          vmovdqu16.s %xmm27, %xmm23
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xff,0x0a,0x7f,0xdf]
+          vmovdqu16.s %xmm27, %xmm23 {%k2}
+
+// CHECK: vmovdqu16.s %xmm27, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xff,0x8a,0x7f,0xdf]
+          vmovdqu16.s %xmm27, %xmm23 {%k2} {z}
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27
+// CHECK:  encoding: [0x62,0x81,0xff,0x08,0x7f,0xcb]
+          vmovdqu16.s %xmm17, %xmm27
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27 {%k2}
+// CHECK:  encoding: [0x62,0x81,0xff,0x0a,0x7f,0xcb]
+          vmovdqu16.s %xmm17, %xmm27 {%k2}
+
+// CHECK: vmovdqu16.s %xmm17, %xmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0xff,0x8a,0x7f,0xcb]
+          vmovdqu16.s %xmm17, %xmm27 {%k2} {z}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27
+// CHECK:  encoding: [0x62,0x01,0xff,0x28,0x7f,0xeb]
+          vmovdqu16.s %ymm29, %ymm27
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27 {%k6}
+// CHECK:  encoding: [0x62,0x01,0xff,0x2e,0x7f,0xeb]
+          vmovdqu16.s %ymm29, %ymm27 {%k6}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xae,0x7f,0xeb]
+          vmovdqu16.s %ymm29, %ymm27 {%k6} {z}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29
+// CHECK:  encoding: [0x62,0x01,0xff,0x28,0x7f,0xed]
+          vmovdqu16.s %ymm29, %ymm29
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xff,0x29,0x7f,0xed]
+          vmovdqu16.s %ymm29, %ymm29 {%k1}
+
+// CHECK: vmovdqu16.s %ymm29, %ymm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xa9,0x7f,0xed]
+          vmovdqu16.s %ymm29, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24
+// CHECK:  encoding: [0x62,0x01,0xff,0x08,0x7f,0xd0]
+          vmovdqu16.s %xmm26, %xmm24
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24 {%k5}
+// CHECK:  encoding: [0x62,0x01,0xff,0x0d,0x7f,0xd0]
+          vmovdqu16.s %xmm26, %xmm24 {%k5}
+
+// CHECK: vmovdqu16.s %xmm26, %xmm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0x8d,0x7f,0xd0]
+          vmovdqu16.s %xmm26, %xmm24 {%k5} {z}
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7f,0xe7]
+          vmovdqu16.s %xmm20, %xmm23
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x09,0x7f,0xe7]
+          vmovdqu16.s %xmm20, %xmm23 {%k1}
+
+// CHECK: vmovdqu16.s %xmm20, %xmm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x89,0x7f,0xe7]
+          vmovdqu16.s %xmm20, %xmm23 {%k1} {z}
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28
+// CHECK:  encoding: [0x62,0x81,0xff,0x28,0x7f,0xfc]
+          vmovdqu16.s %ymm23, %ymm28
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x81,0xff,0x2c,0x7f,0xfc]
+          vmovdqu16.s %ymm23, %ymm28 {%k4}
+
+// CHECK: vmovdqu16.s %ymm23, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0xff,0xac,0x7f,0xfc]
+          vmovdqu16.s %ymm23, %ymm28 {%k4} {z}
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x01,0xff,0x28,0x7f,0xc2]
+          vmovdqu16.s %ymm24, %ymm26
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26 {%k2}
+// CHECK:  encoding: [0x62,0x01,0xff,0x2a,0x7f,0xc2]
+          vmovdqu16.s %ymm24, %ymm26 {%k2}
+
+// CHECK: vmovdqu16.s %ymm24, %ymm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xaa,0x7f,0xc2]
+          vmovdqu16.s %ymm24, %ymm26 {%k2} {z}
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x08,0x7f,0xf2]
+          vmovdqu8.s %xmm22, %xmm18
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x09,0x7f,0xf2]
+          vmovdqu8.s %xmm22, %xmm18 {%k1}
+
+// CHECK: vmovdqu8.s %xmm22, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x89,0x7f,0xf2]
+          vmovdqu8.s %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x81,0x7f,0x08,0x7f,0xdc]
+          vmovdqu8.s %xmm19, %xmm28
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x09,0x7f,0xdc]
+          vmovdqu8.s %xmm19, %xmm28 {%k1}
+
+// CHECK: vmovdqu8.s %xmm19, %xmm28 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x89,0x7f,0xdc]
+          vmovdqu8.s %xmm19, %xmm28 {%k1} {z}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28
+// CHECK:  encoding: [0x62,0x81,0x7f,0x28,0x7f,0xdc]
+          vmovdqu8.s %ymm19, %ymm28
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x2b,0x7f,0xdc]
+          vmovdqu8.s %ymm19, %ymm28 {%k3}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0xab,0x7f,0xdc]
+          vmovdqu8.s %ymm19, %ymm28 {%k3} {z}
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xcb]
+          vmovdqu8.s %ymm17, %ymm19
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x2a,0x7f,0xcb]
+          vmovdqu8.s %ymm17, %ymm19 {%k2}
+
+// CHECK: vmovdqu8.s %ymm17, %ymm19 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0xaa,0x7f,0xcb]
+          vmovdqu8.s %ymm17, %ymm19 {%k2} {z}
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x81,0x7f,0x08,0x7f,0xc8]
+          vmovdqu8.s %xmm17, %xmm24
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x0b,0x7f,0xc8]
+          vmovdqu8.s %xmm17, %xmm24 {%k3}
+
+// CHECK: vmovdqu8.s %xmm17, %xmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7f,0x8b,0x7f,0xc8]
+          vmovdqu8.s %xmm17, %xmm24 {%k3} {z}
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23
+// CHECK:  encoding: [0x62,0x21,0x7f,0x08,0x7f,0xdf]
+          vmovdqu8.s %xmm27, %xmm23
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x0a,0x7f,0xdf]
+          vmovdqu8.s %xmm27, %xmm23 {%k2}
+
+// CHECK: vmovdqu8.s %xmm27, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x7f,0x8a,0x7f,0xdf]
+          vmovdqu8.s %xmm27, %xmm23 {%k2} {z}
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xe9]
+          vmovdqu8.s %ymm21, %ymm17
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x29,0x7f,0xe9]
+          vmovdqu8.s %ymm21, %ymm17 {%k1}
+
+// CHECK: vmovdqu8.s %ymm21, %ymm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0xa9,0x7f,0xe9]
+          vmovdqu8.s %ymm21, %ymm17 {%k1} {z}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x28,0x7f,0xd9]
+          vmovdqu8.s %ymm19, %ymm17
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0x2e,0x7f,0xd9]
+          vmovdqu8.s %ymm19, %ymm17 {%k6}
+
+// CHECK: vmovdqu8.s %ymm19, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7f,0xae,0x7f,0xd9]
+          vmovdqu8.s %ymm19, %ymm17 {%k6} {z}
+
diff --git a/test/MC/X86/x86-64-avx512cd.s b/test/MC/X86/x86-64-avx512cd.s
new file mode 100644
index 000000000000..49c5ccfa3637
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512cd.s
@@ -0,0 +1,450 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq  %zmm22, %zmm21  
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21  
+
+// CHECK: vplzcntq  %zmm22, %zmm21 {%k7} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21 {%k7} 
+
+// CHECK: vplzcntq  %zmm22, %zmm21 {%k7} {z} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x44,0xee]
+          vplzcntq  %zmm22, %zmm21 {%k7} {z} 
+
+// CHECK: vplzcntq  (%rcx), %zmm21  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x29]
+          vplzcntq  (%rcx), %zmm21  
+
+// CHECK: vplzcntq  291(%rax,%r14,8), %zmm21 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq  291(%rax,%r14,8), %zmm21 
+
+// CHECK: vplzcntq  (%rcx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x29]
+          vplzcntq  (%rcx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  4064(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+          vplzcntq  4064(%rdx), %zmm21 
+
+// CHECK: vplzcntq  4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0x40]
+          vplzcntq  4096(%rdx), %zmm21 
+
+// CHECK: vplzcntq  -4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0xc0]
+          vplzcntq  -4096(%rdx), %zmm21 
+
+// CHECK: vplzcntq  -4128(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntq  -4128(%rdx), %zmm21 
+
+// CHECK: vplzcntq  1016(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x7f]
+          vplzcntq  1016(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  1024(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0x00,0x04,0x00,0x00]
+          vplzcntq  1024(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  -1024(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x80]
+          vplzcntq  -1024(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  -1032(%rdx){1to8}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+          vplzcntq  -1032(%rdx){1to8}, %zmm21 
+
+// CHECK: vplzcntq  %zmm27, %zmm23  
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23  
+
+// CHECK: vplzcntq  %zmm27, %zmm23 {%k5} 
+// CHECK: encoding: [0x62,0x82,0xfd,0x4d,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23 {%k5} 
+
+// CHECK: vplzcntq  %zmm27, %zmm23 {%k5} {z} 
+// CHECK: encoding: [0x62,0x82,0xfd,0xcd,0x44,0xfb]
+          vplzcntq  %zmm27, %zmm23 {%k5} {z} 
+
+// CHECK: vplzcntq  (%rcx), %zmm23  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x39]
+          vplzcntq  (%rcx), %zmm23  
+
+// CHECK: vplzcntq  4660(%rax,%r14,8), %zmm23 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq  4660(%rax,%r14,8), %zmm23 
+
+// CHECK: vplzcntq  (%rcx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x39]
+          vplzcntq  (%rcx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  4064(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0x0f,0x00,0x00]
+          vplzcntq  4064(%rdx), %zmm23 
+
+// CHECK: vplzcntq  4096(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0x40]
+          vplzcntq  4096(%rdx), %zmm23 
+
+// CHECK: vplzcntq  -4096(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0xc0]
+          vplzcntq  -4096(%rdx), %zmm23 
+
+// CHECK: vplzcntq  -4128(%rdx), %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0xef,0xff,0xff]
+          vplzcntq  -4128(%rdx), %zmm23 
+
+// CHECK: vplzcntq  1016(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x7f]
+          vplzcntq  1016(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  1024(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0x00,0x04,0x00,0x00]
+          vplzcntq  1024(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  -1024(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x80]
+          vplzcntq  -1024(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntq  -1032(%rdx){1to8}, %zmm23 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0xf8,0xfb,0xff,0xff]
+          vplzcntq  -1032(%rdx){1to8}, %zmm23 
+
+// CHECK: vplzcntd  %zmm22, %zmm25  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25  
+
+// CHECK: vplzcntd  %zmm22, %zmm25 {%k2} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4a,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25 {%k2} 
+
+// CHECK: vplzcntd  %zmm22, %zmm25 {%k2} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xca,0x44,0xce]
+          vplzcntd  %zmm22, %zmm25 {%k2} {z} 
+
+// CHECK: vplzcntd  (%rcx), %zmm25  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x09]
+          vplzcntd  (%rcx), %zmm25  
+
+// CHECK: vplzcntd  291(%rax,%r14,8), %zmm25 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd  291(%rax,%r14,8), %zmm25 
+
+// CHECK: vplzcntd  (%rcx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x09]
+          vplzcntd  (%rcx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  4064(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0x0f,0x00,0x00]
+          vplzcntd  4064(%rdx), %zmm25 
+
+// CHECK: vplzcntd  4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0x40]
+          vplzcntd  4096(%rdx), %zmm25 
+
+// CHECK: vplzcntd  -4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0xc0]
+          vplzcntd  -4096(%rdx), %zmm25 
+
+// CHECK: vplzcntd  -4128(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0xef,0xff,0xff]
+          vplzcntd  -4128(%rdx), %zmm25 
+
+// CHECK: vplzcntd  508(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x7f]
+          vplzcntd  508(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0x00,0x02,0x00,0x00]
+          vplzcntd  512(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  -512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x80]
+          vplzcntd  -512(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  -516(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+          vplzcntd  -516(%rdx){1to16}, %zmm25 
+
+// CHECK: vplzcntd  %zmm22, %zmm21  
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21  
+
+// CHECK: vplzcntd  %zmm22, %zmm21 {%k3} 
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4b,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21 {%k3} 
+
+// CHECK: vplzcntd  %zmm22, %zmm21 {%k3} {z} 
+// CHECK: encoding: [0x62,0xa2,0x7d,0xcb,0x44,0xee]
+          vplzcntd  %zmm22, %zmm21 {%k3} {z} 
+
+// CHECK: vplzcntd  (%rcx), %zmm21  
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x29]
+          vplzcntd  (%rcx), %zmm21  
+
+// CHECK: vplzcntd  4660(%rax,%r14,8), %zmm21 
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd  4660(%rax,%r14,8), %zmm21 
+
+// CHECK: vplzcntd  (%rcx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x29]
+          vplzcntd  (%rcx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  4064(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00]
+          vplzcntd  4064(%rdx), %zmm21 
+
+// CHECK: vplzcntd  4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0x40]
+          vplzcntd  4096(%rdx), %zmm21 
+
+// CHECK: vplzcntd  -4096(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0xc0]
+          vplzcntd  -4096(%rdx), %zmm21 
+
+// CHECK: vplzcntd  -4128(%rdx), %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntd  -4128(%rdx), %zmm21 
+
+// CHECK: vplzcntd  508(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x7f]
+          vplzcntd  508(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  512(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0x00,0x02,0x00,0x00]
+          vplzcntd  512(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  -512(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x80]
+          vplzcntd  -512(%rdx){1to16}, %zmm21 
+
+// CHECK: vplzcntd  -516(%rdx){1to16}, %zmm21 
+// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+          vplzcntd  -516(%rdx){1to16}, %zmm21 
+
+// CHECK: vpconflictq   %zmm25, %zmm20  
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20  
+
+// CHECK: vpconflictq   %zmm25, %zmm20 {%k6} 
+// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20 {%k6} 
+
+// CHECK: vpconflictq   %zmm25, %zmm20 {%k6} {z} 
+// CHECK: encoding: [0x62,0x82,0xfd,0xce,0xc4,0xe1]
+          vpconflictq   %zmm25, %zmm20 {%k6} {z} 
+
+// CHECK: vpconflictq   (%rcx), %zmm20  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x21]
+          vpconflictq   (%rcx), %zmm20  
+
+// CHECK: vpconflictq   291(%rax,%r14,8), %zmm20 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq   291(%rax,%r14,8), %zmm20 
+
+// CHECK: vpconflictq   (%rcx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x21]
+          vpconflictq   (%rcx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   4064(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0x0f,0x00,0x00]
+          vpconflictq   4064(%rdx), %zmm20 
+
+// CHECK: vpconflictq   4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0x40]
+          vpconflictq   4096(%rdx), %zmm20 
+
+// CHECK: vpconflictq   -4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0xc0]
+          vpconflictq   -4096(%rdx), %zmm20 
+
+// CHECK: vpconflictq   -4128(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+          vpconflictq   -4128(%rdx), %zmm20 
+
+// CHECK: vpconflictq   1016(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x7f]
+          vpconflictq   1016(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0x00,0x04,0x00,0x00]
+          vpconflictq   1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   -1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x80]
+          vpconflictq   -1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   -1032(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+          vpconflictq   -1032(%rdx){1to8}, %zmm20 
+
+// CHECK: vpconflictq   %zmm21, %zmm17  
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17  
+
+// CHECK: vpconflictq   %zmm21, %zmm17 {%k6} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17 {%k6} 
+
+// CHECK: vpconflictq   %zmm21, %zmm17 {%k6} {z} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0xc4,0xcd]
+          vpconflictq   %zmm21, %zmm17 {%k6} {z} 
+
+// CHECK: vpconflictq   (%rcx), %zmm17  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x09]
+          vpconflictq   (%rcx), %zmm17  
+
+// CHECK: vpconflictq   4660(%rax,%r14,8), %zmm17 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq   4660(%rax,%r14,8), %zmm17 
+
+// CHECK: vpconflictq   (%rcx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x09]
+          vpconflictq   (%rcx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   4064(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+          vpconflictq   4064(%rdx), %zmm17 
+
+// CHECK: vpconflictq   4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0x40]
+          vpconflictq   4096(%rdx), %zmm17 
+
+// CHECK: vpconflictq   -4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0xc0]
+          vpconflictq   -4096(%rdx), %zmm17 
+
+// CHECK: vpconflictq   -4128(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictq   -4128(%rdx), %zmm17 
+
+// CHECK: vpconflictq   1016(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x7f]
+          vpconflictq   1016(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0x00,0x04,0x00,0x00]
+          vpconflictq   1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   -1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x80]
+          vpconflictq   -1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictq   -1032(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+          vpconflictq   -1032(%rdx){1to8}, %zmm17 
+
+// CHECK: vpconflictd   %zmm19, %zmm25  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25  
+
+// CHECK: vpconflictd   %zmm19, %zmm25 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25 {%k4} 
+
+// CHECK: vpconflictd   %zmm19, %zmm25 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xcb]
+          vpconflictd   %zmm19, %zmm25 {%k4} {z} 
+
+// CHECK: vpconflictd   (%rcx), %zmm25  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x09]
+          vpconflictd   (%rcx), %zmm25  
+
+// CHECK: vpconflictd   291(%rax,%r14,8), %zmm25 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd   291(%rax,%r14,8), %zmm25 
+
+// CHECK: vpconflictd   (%rcx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x09]
+          vpconflictd   (%rcx){1to16}, %zmm25 
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s
+
+// CHECK: vpconflictd   4064(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00]
+          vpconflictd   4064(%rdx), %zmm25 
+
+// CHECK: vpconflictd   4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0x40]
+          vpconflictd   4096(%rdx), %zmm25 
+
+// CHECK: vpconflictd   -4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0xc0]
+          vpconflictd   -4096(%rdx), %zmm25 
+
+// CHECK: vpconflictd   -4128(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictd   -4128(%rdx), %zmm25 
+
+// CHECK: vpconflictd   508(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x7f]
+          vpconflictd   508(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0x00,0x02,0x00,0x00]
+          vpconflictd   512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   -512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x80]
+          vpconflictd   -512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   -516(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+          vpconflictd   -516(%rdx){1to16}, %zmm25 
+
+// CHECK: vpconflictd   %zmm21, %zmm26  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26  
+
+// CHECK: vpconflictd   %zmm21, %zmm26 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26 {%k4} 
+
+// CHECK: vpconflictd   %zmm21, %zmm26 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xd5]
+          vpconflictd   %zmm21, %zmm26 {%k4} {z} 
+
+// CHECK: vpconflictd   (%rcx), %zmm26  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x11]
+          vpconflictd   (%rcx), %zmm26  
+
+// CHECK: vpconflictd   4660(%rax,%r14,8), %zmm26 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd   4660(%rax,%r14,8), %zmm26 
+
+// CHECK: vpconflictd   (%rcx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x11]
+          vpconflictd   (%rcx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   4064(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0x0f,0x00,0x00]
+          vpconflictd   4064(%rdx), %zmm26 
+
+// CHECK: vpconflictd   4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0x40]
+          vpconflictd   4096(%rdx), %zmm26 
+
+// CHECK: vpconflictd   -4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0xc0]
+          vpconflictd   -4096(%rdx), %zmm26 
+
+// CHECK: vpconflictd   -4128(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0xef,0xff,0xff]
+          vpconflictd   -4128(%rdx), %zmm26 
+
+// CHECK: vpconflictd   508(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x7f]
+          vpconflictd   508(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0x00,0x02,0x00,0x00]
+          vpconflictd   512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   -512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x80]
+          vpconflictd   -512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpconflictd   -516(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+          vpconflictd   -516(%rdx){1to16}, %zmm26 
diff --git a/test/MC/X86/x86-64-avx512cd_vl.s b/test/MC/X86/x86-64-avx512cd_vl.s
new file mode 100644
index 000000000000..ddaeae69a92c
--- /dev/null
+++ b/test/MC/X86/x86-64-avx512cd_vl.s
@@ -0,0 +1,913 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl  --show-encoding %s | FileCheck %s
+
+// CHECK: vplzcntq %xmm20, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x09,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18 {%k1}
+
+// CHECK: vplzcntq %xmm20, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x89,0x44,0xd4]
+          vplzcntq %xmm20, %xmm18 {%k1} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x11]
+          vplzcntq (%rcx), %xmm18
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq 291(%rax,%r14,8), %xmm18
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x11]
+          vplzcntq (%rcx){1to2}, %xmm18
+
+// CHECK: vplzcntq 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x7f]
+          vplzcntq 2032(%rdx), %xmm18
+
+// CHECK: vplzcntq 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+          vplzcntq 2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x80]
+          vplzcntq -2048(%rdx), %xmm18
+
+// CHECK: vplzcntq -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+          vplzcntq -2064(%rdx), %xmm18
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x7f]
+          vplzcntq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x80]
+          vplzcntq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vplzcntq %ymm22, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xee]
+          vplzcntq %ymm22, %ymm21
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2f,0x44,0xee]
+          vplzcntq %ymm22, %ymm21 {%k7}
+
+// CHECK: vplzcntq %ymm22, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xaf,0x44,0xee]
+          vplzcntq %ymm22, %ymm21 {%k7} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x29]
+          vplzcntq (%rcx), %ymm21
+
+// CHECK: vplzcntq 291(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntq 291(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x29]
+          vplzcntq (%rcx){1to4}, %ymm21
+
+// CHECK: vplzcntq 4064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x7f]
+          vplzcntq 4064(%rdx), %ymm21
+
+// CHECK: vplzcntq 4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+          vplzcntq 4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x80]
+          vplzcntq -4096(%rdx), %ymm21
+
+// CHECK: vplzcntq -4128(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntq -4128(%rdx), %ymm21
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x7f]
+          vplzcntq 1016(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x80]
+          vplzcntq -1024(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to4}, %ymm21
+
+// CHECK: vplzcntq %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x0b,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24 {%k3}
+
+// CHECK: vplzcntq %xmm20, %xmm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x8b,0x44,0xc4]
+          vplzcntq %xmm20, %xmm24 {%k3} {z}
+
+// CHECK: vplzcntq (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x01]
+          vplzcntq (%rcx), %xmm24
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x44,0x84,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq 4660(%rax,%r14,8), %xmm24
+
+// CHECK: vplzcntq (%rcx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x01]
+          vplzcntq (%rcx){1to2}, %xmm24
+
+// CHECK: vplzcntq 2032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x7f]
+          vplzcntq 2032(%rdx), %xmm24
+
+// CHECK: vplzcntq 2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0x00,0x08,0x00,0x00]
+          vplzcntq 2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2048(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x80]
+          vplzcntq -2048(%rdx), %xmm24
+
+// CHECK: vplzcntq -2064(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0xf0,0xf7,0xff,0xff]
+          vplzcntq -2064(%rdx), %xmm24
+
+// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x7f]
+          vplzcntq 1016(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x80]
+          vplzcntq -1024(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm24
+// CHECK:  encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to2}, %xmm24
+
+// CHECK: vplzcntq %ymm27, %ymm23
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2d,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23 {%k5}
+
+// CHECK: vplzcntq %ymm27, %ymm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xad,0x44,0xfb]
+          vplzcntq %ymm27, %ymm23 {%k5} {z}
+
+// CHECK: vplzcntq (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x39]
+          vplzcntq (%rcx), %ymm23
+
+// CHECK: vplzcntq 4660(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntq 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vplzcntq (%rcx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x39]
+          vplzcntq (%rcx){1to4}, %ymm23
+
+// CHECK: vplzcntq 4064(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x7f]
+          vplzcntq 4064(%rdx), %ymm23
+
+// CHECK: vplzcntq 4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0x00,0x10,0x00,0x00]
+          vplzcntq 4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x80]
+          vplzcntq -4096(%rdx), %ymm23
+
+// CHECK: vplzcntq -4128(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0xe0,0xef,0xff,0xff]
+          vplzcntq -4128(%rdx), %ymm23
+
+// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x7f]
+          vplzcntq 1016(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0x00,0x04,0x00,0x00]
+          vplzcntq 1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x80]
+          vplzcntq -1024(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0xf8,0xfb,0xff,0xff]
+          vplzcntq -1032(%rdx){1to4}, %ymm23
+
+// CHECK: vplzcntd %xmm26, %xmm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0c,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26 {%k4}
+
+// CHECK: vplzcntd %xmm26, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8c,0x44,0xd2]
+          vplzcntd %xmm26, %xmm26 {%k4} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x11]
+          vplzcntd (%rcx), %xmm26
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd 291(%rax,%r14,8), %xmm26
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x11]
+          vplzcntd (%rcx){1to4}, %xmm26
+
+// CHECK: vplzcntd 2032(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x7f]
+          vplzcntd 2032(%rdx), %xmm26
+
+// CHECK: vplzcntd 2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0x00,0x08,0x00,0x00]
+          vplzcntd 2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2048(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x80]
+          vplzcntd -2048(%rdx), %xmm26
+
+// CHECK: vplzcntd -2064(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff]
+          vplzcntd -2064(%rdx), %xmm26
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x7f]
+          vplzcntd 508(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x80]
+          vplzcntd -512(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to4}, %xmm26
+
+// CHECK: vplzcntd %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x44,0xce]
+          vplzcntd %ymm22, %ymm25
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2a,0x44,0xce]
+          vplzcntd %ymm22, %ymm25 {%k2}
+
+// CHECK: vplzcntd %ymm22, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaa,0x44,0xce]
+          vplzcntd %ymm22, %ymm25 {%k2} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x09]
+          vplzcntd (%rcx), %ymm25
+
+// CHECK: vplzcntd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vplzcntd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x09]
+          vplzcntd (%rcx){1to8}, %ymm25
+
+// CHECK: vplzcntd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x7f]
+          vplzcntd 4064(%rdx), %ymm25
+
+// CHECK: vplzcntd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0x00,0x10,0x00,0x00]
+          vplzcntd 4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x80]
+          vplzcntd -4096(%rdx), %ymm25
+
+// CHECK: vplzcntd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0xe0,0xef,0xff,0xff]
+          vplzcntd -4128(%rdx), %ymm25
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x7f]
+          vplzcntd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x80]
+          vplzcntd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vplzcntd %xmm22, %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0f,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30 {%k7}
+
+// CHECK: vplzcntd %xmm22, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8f,0x44,0xf6]
+          vplzcntd %xmm22, %xmm30 {%k7} {z}
+
+// CHECK: vplzcntd (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x31]
+          vplzcntd (%rcx), %xmm30
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x44,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vplzcntd (%rcx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x31]
+          vplzcntd (%rcx){1to4}, %xmm30
+
+// CHECK: vplzcntd 2032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x7f]
+          vplzcntd 2032(%rdx), %xmm30
+
+// CHECK: vplzcntd 2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0x00,0x08,0x00,0x00]
+          vplzcntd 2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x80]
+          vplzcntd -2048(%rdx), %xmm30
+
+// CHECK: vplzcntd -2064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0xf0,0xf7,0xff,0xff]
+          vplzcntd -2064(%rdx), %xmm30
+
+// CHECK: vplzcntd 508(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x7f]
+          vplzcntd 508(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd 512(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -512(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x80]
+          vplzcntd -512(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd -516(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to4}, %xmm30
+
+// CHECK: vplzcntd %ymm22, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x44,0xee]
+          vplzcntd %ymm22, %ymm21
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x2b,0x44,0xee]
+          vplzcntd %ymm22, %ymm21 {%k3}
+
+// CHECK: vplzcntd %ymm22, %ymm21 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xab,0x44,0xee]
+          vplzcntd %ymm22, %ymm21 {%k3} {z}
+
+// CHECK: vplzcntd (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x29]
+          vplzcntd (%rcx), %ymm21
+
+// CHECK: vplzcntd 4660(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x44,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vplzcntd 4660(%rax,%r14,8), %ymm21
+
+// CHECK: vplzcntd (%rcx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x29]
+          vplzcntd (%rcx){1to8}, %ymm21
+
+// CHECK: vplzcntd 4064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x7f]
+          vplzcntd 4064(%rdx), %ymm21
+
+// CHECK: vplzcntd 4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0x00,0x10,0x00,0x00]
+          vplzcntd 4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4096(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x80]
+          vplzcntd -4096(%rdx), %ymm21
+
+// CHECK: vplzcntd -4128(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff]
+          vplzcntd -4128(%rdx), %ymm21
+
+// CHECK: vplzcntd 508(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x7f]
+          vplzcntd 508(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd 512(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0x00,0x02,0x00,0x00]
+          vplzcntd 512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -512(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x80]
+          vplzcntd -512(%rdx){1to8}, %ymm21
+
+// CHECK: vplzcntd -516(%rdx){1to8}, %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0xfc,0xfd,0xff,0xff]
+          vplzcntd -516(%rdx){1to8}, %ymm21
+
+// CHECK: vpconflictq %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0f,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19 {%k7}
+
+// CHECK: vpconflictq %xmm24, %xmm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8f,0xc4,0xd8]
+          vpconflictq %xmm24, %xmm19 {%k7} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x19]
+          vpconflictq (%rcx), %xmm19
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x19]
+          vpconflictq (%rcx){1to2}, %xmm19
+
+// CHECK: vpconflictq 2032(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x7f]
+          vpconflictq 2032(%rdx), %xmm19
+
+// CHECK: vpconflictq 2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+          vpconflictq 2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2048(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x80]
+          vpconflictq -2048(%rdx), %xmm19
+
+// CHECK: vpconflictq -2064(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+          vpconflictq -2064(%rdx), %xmm19
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x7f]
+          vpconflictq 1016(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x80]
+          vpconflictq -1024(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to2}, %xmm19
+
+// CHECK: vpconflictq %ymm25, %ymm20
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2e,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20 {%k6}
+
+// CHECK: vpconflictq %ymm25, %ymm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xae,0xc4,0xe1]
+          vpconflictq %ymm25, %ymm20 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x21]
+          vpconflictq (%rcx), %ymm20
+
+// CHECK: vpconflictq 291(%rax,%r14,8), %ymm20
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictq 291(%rax,%r14,8), %ymm20
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x21]
+          vpconflictq (%rcx){1to4}, %ymm20
+
+// CHECK: vpconflictq 4064(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x7f]
+          vpconflictq 4064(%rdx), %ymm20
+
+// CHECK: vpconflictq 4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0x00,0x10,0x00,0x00]
+          vpconflictq 4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4096(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x80]
+          vpconflictq -4096(%rdx), %ymm20
+
+// CHECK: vpconflictq -4128(%rdx), %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0xe0,0xef,0xff,0xff]
+          vpconflictq -4128(%rdx), %ymm20
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x7f]
+          vpconflictq 1016(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x80]
+          vpconflictq -1024(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm20
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to4}, %ymm20
+
+// CHECK: vpconflictq %xmm27, %xmm18
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0c,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18 {%k4}
+
+// CHECK: vpconflictq %xmm27, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8c,0xc4,0xd3]
+          vpconflictq %xmm27, %xmm18 {%k4} {z}
+
+// CHECK: vpconflictq (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x11]
+          vpconflictq (%rcx), %xmm18
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq 4660(%rax,%r14,8), %xmm18
+
+// CHECK: vpconflictq (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x11]
+          vpconflictq (%rcx){1to2}, %xmm18
+
+// CHECK: vpconflictq 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x7f]
+          vpconflictq 2032(%rdx), %xmm18
+
+// CHECK: vpconflictq 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0x00,0x08,0x00,0x00]
+          vpconflictq 2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x80]
+          vpconflictq -2048(%rdx), %xmm18
+
+// CHECK: vpconflictq -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0xf0,0xf7,0xff,0xff]
+          vpconflictq -2064(%rdx), %xmm18
+
+// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x7f]
+          vpconflictq 1016(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x80]
+          vpconflictq -1024(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to2}, %xmm18
+
+// CHECK: vpconflictq %ymm21, %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x2e,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17 {%k6}
+
+// CHECK: vpconflictq %ymm21, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0xae,0xc4,0xcd]
+          vpconflictq %ymm21, %ymm17 {%k6} {z}
+
+// CHECK: vpconflictq (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x09]
+          vpconflictq (%rcx), %ymm17
+
+// CHECK: vpconflictq 4660(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictq 4660(%rax,%r14,8), %ymm17
+
+// CHECK: vpconflictq (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x09]
+          vpconflictq (%rcx){1to4}, %ymm17
+
+// CHECK: vpconflictq 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x7f]
+          vpconflictq 4064(%rdx), %ymm17
+
+// CHECK: vpconflictq 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+          vpconflictq 4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x80]
+          vpconflictq -4096(%rdx), %ymm17
+
+// CHECK: vpconflictq -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictq -4128(%rdx), %ymm17
+
+// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x7f]
+          vpconflictq 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0x00,0x04,0x00,0x00]
+          vpconflictq 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x80]
+          vpconflictq -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0xf8,0xfb,0xff,0xff]
+          vpconflictq -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vpconflictd %xmm27, %xmm21
+// CHECK:  encoding: [0x62,0x82,0x7d,0x08,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x0d,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21 {%k5}
+
+// CHECK: vpconflictd %xmm27, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x8d,0xc4,0xeb]
+          vpconflictd %xmm27, %xmm21 {%k5} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x29]
+          vpconflictd (%rcx), %xmm21
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0xc4,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd 291(%rax,%r14,8), %xmm21
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x29]
+          vpconflictd (%rcx){1to4}, %xmm21
+
+// CHECK: vpconflictd 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x7f]
+          vpconflictd 2032(%rdx), %xmm21
+
+// CHECK: vpconflictd 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0x00,0x08,0x00,0x00]
+          vpconflictd 2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x80]
+          vpconflictd -2048(%rdx), %xmm21
+
+// CHECK: vpconflictd -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0xf0,0xf7,0xff,0xff]
+          vpconflictd -2064(%rdx), %xmm21
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x7f]
+          vpconflictd 508(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x80]
+          vpconflictd -512(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to4}, %xmm21
+
+// CHECK: vpconflictd %ymm19, %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25 {%k4}
+
+// CHECK: vpconflictd %ymm19, %ymm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xac,0xc4,0xcb]
+          vpconflictd %ymm19, %ymm25 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x09]
+          vpconflictd (%rcx), %ymm25
+
+// CHECK: vpconflictd 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpconflictd 291(%rax,%r14,8), %ymm25
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x09]
+          vpconflictd (%rcx){1to8}, %ymm25
+
+// CHECK: vpconflictd 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x7f]
+          vpconflictd 4064(%rdx), %ymm25
+
+// CHECK: vpconflictd 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00]
+          vpconflictd 4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x80]
+          vpconflictd -4096(%rdx), %ymm25
+
+// CHECK: vpconflictd -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff]
+          vpconflictd -4128(%rdx), %ymm25
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x7f]
+          vpconflictd 508(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x80]
+          vpconflictd -512(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to8}, %ymm25
+
+// CHECK: vpconflictd %xmm28, %xmm27
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0b,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27 {%k3}
+
+// CHECK: vpconflictd %xmm28, %xmm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8b,0xc4,0xdc]
+          vpconflictd %xmm28, %xmm27 {%k3} {z}
+
+// CHECK: vpconflictd (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x19]
+          vpconflictd (%rcx), %xmm27
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0xc4,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd 4660(%rax,%r14,8), %xmm27
+
+// CHECK: vpconflictd (%rcx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x19]
+          vpconflictd (%rcx){1to4}, %xmm27
+
+// CHECK: vpconflictd 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x7f]
+          vpconflictd 2032(%rdx), %xmm27
+
+// CHECK: vpconflictd 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00]
+          vpconflictd 2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x80]
+          vpconflictd -2048(%rdx), %xmm27
+
+// CHECK: vpconflictd -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff]
+          vpconflictd -2064(%rdx), %xmm27
+
+// CHECK: vpconflictd 508(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x7f]
+          vpconflictd 508(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd 512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -512(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x80]
+          vpconflictd -512(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd -516(%rdx){1to4}, %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to4}, %xmm27
+
+// CHECK: vpconflictd %ymm21, %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26 {%k4}
+
+// CHECK: vpconflictd %ymm21, %ymm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xac,0xc4,0xd5]
+          vpconflictd %ymm21, %ymm26 {%k4} {z}
+
+// CHECK: vpconflictd (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x11]
+          vpconflictd (%rcx), %ymm26
+
+// CHECK: vpconflictd 4660(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpconflictd 4660(%rax,%r14,8), %ymm26
+
+// CHECK: vpconflictd (%rcx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x11]
+          vpconflictd (%rcx){1to8}, %ymm26
+
+// CHECK: vpconflictd 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x7f]
+          vpconflictd 4064(%rdx), %ymm26
+
+// CHECK: vpconflictd 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0x00,0x10,0x00,0x00]
+          vpconflictd 4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x80]
+          vpconflictd -4096(%rdx), %ymm26
+
+// CHECK: vpconflictd -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0xe0,0xef,0xff,0xff]
+          vpconflictd -4128(%rdx), %ymm26
+
+// CHECK: vpconflictd 508(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x7f]
+          vpconflictd 508(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd 512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0x00,0x02,0x00,0x00]
+          vpconflictd 512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x80]
+          vpconflictd -512(%rdx){1to8}, %ymm26
+
+// CHECK: vpconflictd -516(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0xfc,0xfd,0xff,0xff]
+          vpconflictd -516(%rdx){1to8}, %ymm26
+
+// CHECK: vpbroadcastmw2d %k4, %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x3a,0xdc]
+          vpbroadcastmw2d %k4, %xmm19
+
+// CHECK: vpbroadcastmw2d %k3, %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x3a,0xc3]
+          vpbroadcastmw2d %k3, %ymm24
+
+// CHECK: vpbroadcastmw2d %k4, %xmm21
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x3a,0xec]
+          vpbroadcastmw2d %k4, %xmm21
+
+// CHECK: vpbroadcastmw2d %k4, %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x3a,0xdc]
+          vpbroadcastmw2d %k4, %ymm27
diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s
index 4b26f7a0b80e..d0b91d69ebde 100644
--- a/test/MC/X86/x86-64-avx512dq.s
+++ b/test/MC/X86/x86-64-avx512dq.s
@@ -1391,6 +1391,470 @@
 // CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x51,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
           vrangess $0x7b,-516(%rdx), %xmm24, %xmm25
 
+// CHECK: vreducepd $171, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x4e,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19 {%k6}
+
+// CHECK: vreducepd $171, %zmm19, %zmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab]
+          vreducepd $0xab, %zmm19, %zmm19 {%k6} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab]
+          vreducepd $0xab,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b]
+          vreducepd $0x7b, %zmm19, %zmm19
+
+// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b]
+          vreducepd $0x7b,{sae}, %zmm19, %zmm19
+
+// CHECK: vreducepd $123, (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x19,0x7b]
+          vreducepd $0x7b,(%rcx), %zmm19
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x19,0x7b]
+          vreducepd $0x7b,(%rcx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x7f,0x7b]
+          vreducepd $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vreducepd $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x5a,0x80,0x7b]
+          vreducepd $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vreducepd $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x5a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to8}, %zmm19
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to8}, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x4b,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19 {%k3}
+
+// CHECK: vreduceps $171, %zmm29, %zmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab]
+          vreduceps $0xab, %zmm29, %zmm19 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab]
+          vreduceps $0xab,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b]
+          vreduceps $0x7b, %zmm29, %zmm19
+
+// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19
+// CHECK:  encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b]
+          vreduceps $0x7b,{sae}, %zmm29, %zmm19
+
+// CHECK: vreduceps $123, (%rcx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x19,0x7b]
+          vreduceps $0x7b,(%rcx), %zmm19
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %zmm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x56,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %zmm19
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x19,0x7b]
+          vreduceps $0x7b,(%rcx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x7f,0x7b]
+          vreduceps $0x7b,8128(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0x00,0x20,0x00,0x00,0x7b]
+          vreduceps $0x7b,8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x5a,0x80,0x7b]
+          vreduceps $0x7b,-8192(%rdx), %zmm19
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x56,0x9a,0xc0,0xdf,0xff,0xff,0x7b]
+          vreduceps $0x7b,-8256(%rdx), %zmm19
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x5a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to16}, %zmm19
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x58,0x56,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to16}, %zmm19
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0x83,0xf5,0x06,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6}
+
+// CHECK: vreducesd $171, %xmm25, %xmm17, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab]
+          vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab]
+          vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b]
+          vreducesd $0x7b, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b]
+          vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17
+
+// CHECK: vreducesd $123, (%rcx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x09,0x7b]
+          vreducesd $0x7b,(%rcx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 291(%rax,%r14,8), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xf5,0x00,0x57,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducesd $0x7b,291(%rax,%r14,8), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x7f,0x7b]
+          vreducesd $0x7b,1016(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducesd $0x7b,1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x4a,0x80,0x7b]
+          vreducesd $0x7b,-1024(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm17, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducesd $0x7b,-1032(%rdx), %xmm17, %xmm17
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x23,0x15,0x01,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1}
+
+// CHECK: vreducess $171, %xmm21, %xmm29, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab]
+          vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b]
+          vreducess $0x7b, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b]
+          vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x31,0x7b]
+          vreducess $0x7b,(%rcx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 291(%rax,%r14,8), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x15,0x00,0x57,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducess $0x7b,291(%rax,%r14,8), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x7f,0x7b]
+          vreducess $0x7b,508(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vreducess $0x7b,512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0x72,0x80,0x7b]
+          vreducess $0x7b,-512(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x15,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreducess $0x7b,-516(%rdx), %xmm29, %xmm30
+
+// CHECK: vreducepd $171, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x4d,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18 {%k5}
+
+// CHECK: vreducepd $171, %zmm28, %zmm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab]
+          vreducepd $0xab, %zmm28, %zmm18 {%k5} {z}
+
+// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab]
+          vreducepd $0xab,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b]
+          vreducepd $0x7b, %zmm28, %zmm18
+
+// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18
+// CHECK:  encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b]
+          vreducepd $0x7b,{sae}, %zmm28, %zmm18
+
+// CHECK: vreducepd $123, (%rcx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx), %zmm18
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %zmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %zmm18
+
+// CHECK: vreducepd $123, (%rcx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 8128(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,8128(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vreducepd $0x7b,8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8192(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-8192(%rdx), %zmm18
+
+// CHECK: vreducepd $123, -8256(%rdx), %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vreducepd $0x7b,-8256(%rdx), %zmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to8}, %zmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to8}, %zmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x58,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to8}, %zmm18
+
+// CHECK: vreduceps $171, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3}
+// CHECK:  encoding: [0x62,0x03,0x7d,0x4b,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26 {%k3}
+
+// CHECK: vreduceps $171, %zmm25, %zmm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab]
+          vreduceps $0xab, %zmm25, %zmm26 {%k3} {z}
+
+// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab]
+          vreduceps $0xab,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b]
+          vreduceps $0x7b, %zmm25, %zmm26
+
+// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26
+// CHECK:  encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b]
+          vreduceps $0x7b,{sae}, %zmm25, %zmm26
+
+// CHECK: vreduceps $123, (%rcx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx), %zmm26
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %zmm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x56,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %zmm26
+
+// CHECK: vreduceps $123, (%rcx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 8128(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,8128(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vreduceps $0x7b,8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8192(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-8192(%rdx), %zmm26
+
+// CHECK: vreduceps $123, -8256(%rdx), %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x56,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vreduceps $0x7b,-8256(%rdx), %zmm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to16}, %zmm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to16}, %zmm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x58,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to16}, %zmm26
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3}
+// CHECK:  encoding: [0x62,0x03,0xe5,0x03,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3}
+
+// CHECK: vreducesd $171, %xmm24, %xmm19, %xmm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab]
+          vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z}
+
+// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab]
+          vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b]
+          vreducesd $0x7b, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b]
+          vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25
+
+// CHECK: vreducesd $123, (%rcx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x09,0x7b]
+          vreducesd $0x7b,(%rcx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 4660(%rax,%r14,8), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x23,0xe5,0x00,0x57,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducesd $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1016(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x7f,0x7b]
+          vreducesd $0x7b,1016(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, 1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducesd $0x7b,1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x4a,0x80,0x7b]
+          vreducesd $0x7b,-1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducesd $123, -1032(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x57,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducesd $0x7b,-1032(%rdx), %xmm19, %xmm25
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x23,0x3d,0x02,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2}
+
+// CHECK: vreducess $171, %xmm21, %xmm24, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab]
+          vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z}
+
+// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab]
+          vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b]
+          vreducess $0x7b, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b]
+          vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30
+
+// CHECK: vreducess $123, (%rcx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x31,0x7b]
+          vreducess $0x7b,(%rcx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 4660(%rax,%r14,8), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x23,0x3d,0x00,0x57,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducess $0x7b,4660(%rax,%r14,8), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 508(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x7f,0x7b]
+          vreducess $0x7b,508(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, 512(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vreducess $0x7b,512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -512(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0x72,0x80,0x7b]
+          vreducess $0x7b,-512(%rdx), %xmm24, %xmm30
+
+// CHECK: vreducess $123, -516(%rdx), %xmm24, %xmm30
+// CHECK:  encoding: [0x62,0x63,0x3d,0x00,0x57,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreducess $0x7b,-516(%rdx), %xmm24, %xmm30
+
 // CHECK: vcvtpd2qq %zmm29, %zmm18
 // CHECK:  encoding: [0x62,0x81,0xfd,0x48,0x7b,0xd5]
           vcvtpd2qq %zmm29, %zmm18
@@ -1823,6 +2287,78 @@
 // CHECK:  encoding: [0x62,0xe1,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
           vcvtqq2ps -1032(%rdx){1to8}, %ymm20
 
+// CHECK: vcvtqq2ps %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x48,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3}
+// CHECK:  encoding: [0x62,0x21,0xfc,0x4b,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28 {%k3}
+
+// CHECK: vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xfc,0xcb,0x5b,0xe3]
+          vcvtqq2ps %zmm19, %ymm28 {%k3} {z}
+
+// CHECK: vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x18,0x5b,0xe3]
+          vcvtqq2ps {rn-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x58,0x5b,0xe3]
+          vcvtqq2ps {ru-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x38,0x5b,0xe3]
+          vcvtqq2ps {rd-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x78,0x5b,0xe3]
+          vcvtqq2ps {rz-sae}, %zmm19, %ymm28
+
+// CHECK: vcvtqq2ps (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x21]
+          vcvtqq2ps (%rcx), %ymm28
+
+// CHECK: vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x48,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2ps 4660(%rax,%r14,8), %ymm28
+
+// CHECK: vcvtqq2ps (%rcx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x21]
+          vcvtqq2ps (%rcx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 8128(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x7f]
+          vcvtqq2ps 8128(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 8192(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0x00,0x20,0x00,0x00]
+          vcvtqq2ps 8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8192(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0x62,0x80]
+          vcvtqq2ps -8192(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps -8256(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x48,0x5b,0xa2,0xc0,0xdf,0xff,0xff]
+          vcvtqq2ps -8256(%rdx), %ymm28
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x7f]
+          vcvtqq2ps 1016(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0x62,0x80]
+          vcvtqq2ps -1024(%rdx){1to8}, %ymm28
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x58,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to8}, %ymm28
+
 // CHECK: vcvtuqq2pd %zmm29, %zmm21
 // CHECK:  encoding: [0x62,0x81,0xfe,0x48,0x7a,0xed]
           vcvtuqq2pd %zmm29, %zmm21
@@ -1907,3 +2443,1696 @@
 // CHECK:  encoding: [0x62,0xa1,0xff,0xca,0x7a,0xd5]
           vcvtuqq2ps %zmm21, %ymm18 {%k2} {z}
 
+// CHECK: vpextrd $171, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0xe0,0xab]
+          vpextrd $0xab, %xmm28, %eax
+
+// CHECK: vpextrd $123, %xmm28, %eax
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0xe0,0x7b]
+          vpextrd $0x7b, %xmm28, %eax
+
+// CHECK: vpextrd $123, %xmm28, %ebp
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0xe5,0x7b]
+          vpextrd $0x7b, %xmm28, %ebp
+
+// CHECK: vpextrd $123, %xmm28, %r13d
+// CHECK:  encoding: [0x62,0x43,0x7d,0x08,0x16,0xe5,0x7b]
+          vpextrd $0x7b, %xmm28, %r13d
+
+// CHECK: vpextrd $123, %xmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0x21,0x7b]
+          vpextrd $0x7b, %xmm28, (%rcx)
+
+// CHECK: vpextrd $123, %xmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x16,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpextrd $0x7b, %xmm28, 291(%rax,%r14,8)
+
+// CHECK: vpextrd $123, %xmm28, 508(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0x62,0x7f,0x7b]
+          vpextrd $0x7b, %xmm28, 508(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, 512(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpextrd $0x7b, %xmm28, 512(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, -512(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0x62,0x80,0x7b]
+          vpextrd $0x7b, %xmm28, -512(%rdx)
+
+// CHECK: vpextrd $123, %xmm28, -516(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x16,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpextrd $0x7b, %xmm28, -516(%rdx)
+
+// CHECK: vpextrd $171, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe0,0xab]
+          vpextrd $0xab, %xmm20, %eax
+
+// CHECK: vpextrd $123, %xmm20, %eax
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe0,0x7b]
+          vpextrd $0x7b, %xmm20, %eax
+
+// CHECK: vpextrd $123, %xmm20, %ebp
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0xe5,0x7b]
+          vpextrd $0x7b, %xmm20, %ebp
+
+// CHECK: vpextrd $123, %xmm20, %r13d
+// CHECK:  encoding: [0x62,0xc3,0x7d,0x08,0x16,0xe5,0x7b]
+          vpextrd $0x7b, %xmm20, %r13d
+
+// CHECK: vpextrd $123, %xmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0x21,0x7b]
+          vpextrd $0x7b, %xmm20, (%rcx)
+
+// CHECK: vpextrd $123, %xmm20, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x16,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpextrd $0x7b, %xmm20, 4660(%rax,%r14,8)
+
+// CHECK: vpextrd $123, %xmm20, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0x62,0x7f,0x7b]
+          vpextrd $0x7b, %xmm20, 508(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpextrd $0x7b, %xmm20, 512(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0x62,0x80,0x7b]
+          vpextrd $0x7b, %xmm20, -512(%rdx)
+
+// CHECK: vpextrd $123, %xmm20, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x16,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpextrd $0x7b, %xmm20, -516(%rdx)
+
+// CHECK: vpextrq $171, %xmm24, %rax
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0xc0,0xab]
+          vpextrq $0xab, %xmm24, %rax
+
+// CHECK: vpextrq $123, %xmm24, %rax
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0xc0,0x7b]
+          vpextrq $0x7b, %xmm24, %rax
+
+// CHECK: vpextrq $123, %xmm24, %r8
+// CHECK:  encoding: [0x62,0x43,0xfd,0x08,0x16,0xc0,0x7b]
+          vpextrq $0x7b, %xmm24, %r8
+
+// CHECK: vpextrq $123, %xmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0x01,0x7b]
+          vpextrq $0x7b, %xmm24, (%rcx)
+
+// CHECK: vpextrq $123, %xmm24, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x16,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpextrq $0x7b, %xmm24, 291(%rax,%r14,8)
+
+// CHECK: vpextrq $123, %xmm24, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0x42,0x7f,0x7b]
+          vpextrq $0x7b, %xmm24, 1016(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0x82,0x00,0x04,0x00,0x00,0x7b]
+          vpextrq $0x7b, %xmm24, 1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0x42,0x80,0x7b]
+          vpextrq $0x7b, %xmm24, -1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm24, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x16,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+          vpextrq $0x7b, %xmm24, -1032(%rdx)
+
+// CHECK: vpextrq $171, %xmm20, %rax
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0xe0,0xab]
+          vpextrq $0xab, %xmm20, %rax
+
+// CHECK: vpextrq $123, %xmm20, %rax
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0xe0,0x7b]
+          vpextrq $0x7b, %xmm20, %rax
+
+// CHECK: vpextrq $123, %xmm20, %r8
+// CHECK:  encoding: [0x62,0xc3,0xfd,0x08,0x16,0xe0,0x7b]
+          vpextrq $0x7b, %xmm20, %r8
+
+// CHECK: vpextrq $123, %xmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0x21,0x7b]
+          vpextrq $0x7b, %xmm20, (%rcx)
+
+// CHECK: vpextrq $123, %xmm20, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x16,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpextrq $0x7b, %xmm20, 4660(%rax,%r14,8)
+
+// CHECK: vpextrq $123, %xmm20, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0x62,0x7f,0x7b]
+          vpextrq $0x7b, %xmm20, 1016(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vpextrq $0x7b, %xmm20, 1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0x62,0x80,0x7b]
+          vpextrq $0x7b, %xmm20, -1024(%rdx)
+
+// CHECK: vpextrq $123, %xmm20, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x16,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vpextrq $0x7b, %xmm20, -1032(%rdx)
+
+// CHECK: vpinsrd $171, %eax, %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0xf8,0xab]
+          vpinsrd $0xab,%eax, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %eax, %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0xf8,0x7b]
+          vpinsrd $0x7b,%eax, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %ebp, %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0xfd,0x7b]
+          vpinsrd $0x7b,%ebp, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, %r13d, %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xc3,0x35,0x00,0x22,0xfd,0x7b]
+          vpinsrd $0x7b,%r13d, %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, (%rcx), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0x39,0x7b]
+          vpinsrd $0x7b,(%rcx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 291(%rax,%r14,8), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xa3,0x35,0x00,0x22,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpinsrd $0x7b,291(%rax,%r14,8), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 508(%rdx), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0x7a,0x7f,0x7b]
+          vpinsrd $0x7b,508(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, 512(%rdx), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0xba,0x00,0x02,0x00,0x00,0x7b]
+          vpinsrd $0x7b,512(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, -512(%rdx), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0x7a,0x80,0x7b]
+          vpinsrd $0x7b,-512(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $123, -516(%rdx), %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0xe3,0x35,0x00,0x22,0xba,0xfc,0xfd,0xff,0xff,0x7b]
+          vpinsrd $0x7b,-516(%rdx), %xmm25, %xmm23
+
+// CHECK: vpinsrd $171, %eax, %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0xf0,0xab]
+          vpinsrd $0xab,%eax, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %eax, %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0xf0,0x7b]
+          vpinsrd $0x7b,%eax, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %ebp, %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0xf5,0x7b]
+          vpinsrd $0x7b,%ebp, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, %r13d, %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xc3,0x15,0x00,0x22,0xf5,0x7b]
+          vpinsrd $0x7b,%r13d, %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, (%rcx), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0x31,0x7b]
+          vpinsrd $0x7b,(%rcx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 4660(%rax,%r14,8), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x15,0x00,0x22,0xb4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpinsrd $0x7b,4660(%rax,%r14,8), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 508(%rdx), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0x72,0x7f,0x7b]
+          vpinsrd $0x7b,508(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, 512(%rdx), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vpinsrd $0x7b,512(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, -512(%rdx), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0x72,0x80,0x7b]
+          vpinsrd $0x7b,-512(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrd $123, -516(%rdx), %xmm29, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x15,0x00,0x22,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpinsrd $0x7b,-516(%rdx), %xmm29, %xmm22
+
+// CHECK: vpinsrq $171, %rax, %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0xf0,0xab]
+          vpinsrq $0xab,%rax, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, %rax, %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0xf0,0x7b]
+          vpinsrq $0x7b,%rax, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, %r8, %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xc3,0xdd,0x00,0x22,0xf0,0x7b]
+          vpinsrq $0x7b,%r8, %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, (%rcx), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0x31,0x7b]
+          vpinsrq $0x7b,(%rcx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 291(%rax,%r14,8), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xa3,0xdd,0x00,0x22,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpinsrq $0x7b,291(%rax,%r14,8), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 1016(%rdx), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0x72,0x7f,0x7b]
+          vpinsrq $0x7b,1016(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, 1024(%rdx), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0xb2,0x00,0x04,0x00,0x00,0x7b]
+          vpinsrq $0x7b,1024(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, -1024(%rdx), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0x72,0x80,0x7b]
+          vpinsrq $0x7b,-1024(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $123, -1032(%rdx), %xmm20, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0xdd,0x00,0x22,0xb2,0xf8,0xfb,0xff,0xff,0x7b]
+          vpinsrq $0x7b,-1032(%rdx), %xmm20, %xmm22
+
+// CHECK: vpinsrq $171, %rax, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0xc8,0xab]
+          vpinsrq $0xab,%rax, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, %rax, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0xc8,0x7b]
+          vpinsrq $0x7b,%rax, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, %r8, %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x43,0xe5,0x00,0x22,0xc8,0x7b]
+          vpinsrq $0x7b,%r8, %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, (%rcx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0x09,0x7b]
+          vpinsrq $0x7b,(%rcx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 4660(%rax,%r14,8), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x23,0xe5,0x00,0x22,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vpinsrq $0x7b,4660(%rax,%r14,8), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 1016(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0x4a,0x7f,0x7b]
+          vpinsrq $0x7b,1016(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, 1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vpinsrq $0x7b,1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, -1024(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0x4a,0x80,0x7b]
+          vpinsrq $0x7b,-1024(%rdx), %xmm19, %xmm25
+
+// CHECK: vpinsrq $123, -1032(%rdx), %xmm19, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xe5,0x00,0x22,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpinsrq $0x7b,-1032(%rdx), %xmm19, %xmm25
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0xab]
+          vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3}
+// CHECK:  encoding: [0x62,0x03,0x75,0x43,0x1a,0xe8,0xab]
+          vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3}
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0x75,0xc3,0x1a,0xe8,0xab]
+          vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm24, %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0x7b]
+          vinsertf32x8 $0x7b, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x40,0x1a,0x29,0x7b]
+          vinsertf32x8 $0x7b,(%rcx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 291(%rax,%r14,8), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x23,0x75,0x40,0x1a,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf32x8 $0x7b,291(%rax,%r14,8), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x7f,0x7b]
+          vinsertf32x8 $0x7b,4064(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vinsertf32x8 $0x7b,4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x80,0x7b]
+          vinsertf32x8 $0x7b,-4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm17, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vinsertf32x8 $0x7b,-4128(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0xab]
+          vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5}
+// CHECK:  encoding: [0x62,0x23,0x1d,0x45,0x1a,0xee,0xab]
+          vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5}
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x23,0x1d,0xc5,0x1a,0xee,0xab]
+          vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm22, %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0x7b]
+          vinsertf32x8 $0x7b, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x1d,0x40,0x1a,0x29,0x7b]
+          vinsertf32x8 $0x7b,(%rcx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4660(%rax,%r14,8), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x23,0x1d,0x40,0x1a,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinsertf32x8 $0x7b,4660(%rax,%r14,8), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x7f,0x7b]
+          vinsertf32x8 $0x7b,4064(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vinsertf32x8 $0x7b,4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x80,0x7b]
+          vinsertf32x8 $0x7b,-4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm28, %zmm29
+// CHECK:  encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vinsertf32x8 $0x7b,-4128(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0xab]
+          vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2}
+// CHECK:  encoding: [0x62,0x83,0x9d,0x42,0x18,0xc9,0xab]
+          vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2}
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x83,0x9d,0xc2,0x18,0xc9,0xab]
+          vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm25, %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0x7b]
+          vinsertf64x2 $0x7b, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x9d,0x40,0x18,0x09,0x7b]
+          vinsertf64x2 $0x7b,(%rcx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xa3,0x9d,0x40,0x18,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,291(%rax,%r14,8), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x7f,0x7b]
+          vinsertf64x2 $0x7b,2032(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x80,0x7b]
+          vinsertf64x2 $0x7b,-2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm28, %zmm17
+// CHECK:  encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf64x2 $0x7b,-2064(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0xab]
+          vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7}
+// CHECK:  encoding: [0x62,0x83,0xf5,0x47,0x18,0xe4,0xab]
+          vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0x83,0xf5,0xc7,0x18,0xe4,0xab]
+          vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm28, %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0x7b]
+          vinsertf64x2 $0x7b, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x40,0x18,0x21,0x7b]
+          vinsertf64x2 $0x7b,(%rcx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xa3,0xf5,0x40,0x18,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,4660(%rax,%r14,8), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x7f,0x7b]
+          vinsertf64x2 $0x7b,2032(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x80,0x7b]
+          vinsertf64x2 $0x7b,-2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm17, %zmm20
+// CHECK:  encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf64x2 $0x7b,-2064(%rdx), %zmm17, %zmm20
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0xab]
+          vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2}
+// CHECK:  encoding: [0x62,0x23,0x3d,0x42,0x3a,0xe6,0xab]
+          vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2}
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x23,0x3d,0xc2,0x3a,0xe6,0xab]
+          vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2} {z}
+
+// CHECK: vinserti32x8 $123, %ymm22, %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0x7b]
+          vinserti32x8 $0x7b, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x3d,0x40,0x3a,0x21,0x7b]
+          vinserti32x8 $0x7b,(%rcx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 291(%rax,%r14,8), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x23,0x3d,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti32x8 $0x7b,291(%rax,%r14,8), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x7f,0x7b]
+          vinserti32x8 $0x7b,4064(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vinserti32x8 $0x7b,4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x80,0x7b]
+          vinserti32x8 $0x7b,-4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm24, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vinserti32x8 $0x7b,-4128(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0xab]
+          vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7}
+// CHECK:  encoding: [0x62,0x03,0x5d,0x47,0x3a,0xe0,0xab]
+          vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7}
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x03,0x5d,0xc7,0x3a,0xe0,0xab]
+          vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7} {z}
+
+// CHECK: vinserti32x8 $123, %ymm24, %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0x7b]
+          vinserti32x8 $0x7b, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x5d,0x40,0x3a,0x21,0x7b]
+          vinserti32x8 $0x7b,(%rcx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4660(%rax,%r14,8), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x23,0x5d,0x40,0x3a,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinserti32x8 $0x7b,4660(%rax,%r14,8), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x7f,0x7b]
+          vinserti32x8 $0x7b,4064(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vinserti32x8 $0x7b,4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x80,0x7b]
+          vinserti32x8 $0x7b,-4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm20, %zmm28
+// CHECK:  encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vinserti32x8 $0x7b,-4128(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0xab]
+          vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7}
+// CHECK:  encoding: [0x62,0x03,0xed,0x47,0x38,0xe2,0xab]
+          vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7}
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x03,0xed,0xc7,0x38,0xe2,0xab]
+          vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7} {z}
+
+// CHECK: vinserti64x2 $123, %xmm26, %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0x7b]
+          vinserti64x2 $0x7b, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x21,0x7b]
+          vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x23,0xed,0x40,0x38,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,291(%rax,%r14,8), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x7f,0x7b]
+          vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x80,0x7b]
+          vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm28
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2}
+// CHECK:  encoding: [0x62,0x23,0xed,0x42,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x23,0xed,0xc2,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0x7b]
+          vinserti64x2 $0x7b, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x19,0x7b]
+          vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x23,0xed,0x40,0x38,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,4660(%rax,%r14,8), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x7f,0x7b]
+          vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x80,0x7b]
+          vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm27
+// CHECK:  encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm27
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0xab]
+          vextractf32x8 $0xab, %zmm18, %ymm21
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21 {%k1}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x49,0x1b,0xd5,0xab]
+          vextractf32x8 $0xab, %zmm18, %ymm21 {%k1}
+
+// CHECK: vextractf32x8 $171, %zmm18, %ymm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0xc9,0x1b,0xd5,0xab]
+          vextractf32x8 $0xab, %zmm18, %ymm21 {%k1} {z}
+
+// CHECK: vextractf32x8 $123, %zmm18, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0x7b]
+          vextractf32x8 $0x7b, %zmm18, %ymm21
+
+// CHECK: vextractf32x8 $171, %zmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x29,0xab]
+          vextractf32x8 $0xab, %zmm21,(%rcx)
+
+// CHECK: vextractf32x8 $171, %zmm21, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x4b,0x1b,0x29,0xab]
+          vextractf32x8 $0xab, %zmm21,(%rcx) {%k3}
+
+// CHECK: vextractf32x8 $123, %zmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x29,0x7b]
+          vextractf32x8 $0x7b, %zmm21,(%rcx)
+
+// CHECK: vextractf32x8 $123, %zmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf32x8 $0x7b, %zmm21,291(%rax,%r14,8)
+
+// CHECK: vextractf32x8 $123, %zmm21, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x6a,0x7f,0x7b]
+          vextractf32x8 $0x7b, %zmm21,4064(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vextractf32x8 $0x7b, %zmm21,4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x6a,0x80,0x7b]
+          vextractf32x8 $0x7b, %zmm21,-4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm21, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vextractf32x8 $0x7b, %zmm21,-4128(%rdx)
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x1b,0xd6,0xab]
+          vextractf32x8 $0xab, %zmm26, %ymm30
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30 {%k3}
+// CHECK:  encoding: [0x62,0x03,0x7d,0x4b,0x1b,0xd6,0xab]
+          vextractf32x8 $0xab, %zmm26, %ymm30 {%k3}
+
+// CHECK: vextractf32x8 $171, %zmm26, %ymm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x03,0x7d,0xcb,0x1b,0xd6,0xab]
+          vextractf32x8 $0xab, %zmm26, %ymm30 {%k3} {z}
+
+// CHECK: vextractf32x8 $123, %zmm26, %ymm30
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x1b,0xd6,0x7b]
+          vextractf32x8 $0x7b, %zmm26, %ymm30
+
+// CHECK: vextractf32x8 $171, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x21,0xab]
+          vextractf32x8 $0xab, %zmm20,(%rcx)
+
+// CHECK: vextractf32x8 $171, %zmm20, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x4b,0x1b,0x21,0xab]
+          vextractf32x8 $0xab, %zmm20,(%rcx) {%k3}
+
+// CHECK: vextractf32x8 $123, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x21,0x7b]
+          vextractf32x8 $0x7b, %zmm20,(%rcx)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextractf32x8 $0x7b, %zmm20,4660(%rax,%r14,8)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x62,0x7f,0x7b]
+          vextractf32x8 $0x7b, %zmm20,4064(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vextractf32x8 $0x7b, %zmm20,4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0x62,0x80,0x7b]
+          vextractf32x8 $0x7b, %zmm20,-4096(%rdx)
+
+// CHECK: vextractf32x8 $123, %zmm20, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x1b,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vextractf32x8 $0x7b, %zmm20,-4128(%rdx)
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x48,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28 {%k5}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x4d,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm28 {%k5}
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm28 {%k5} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0xcd,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm28 {%k5} {z}
+
+// CHECK: vextractf64x2 $123, %zmm26, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x48,0x19,0xd4,0x7b]
+          vextractf64x2 $0x7b, %zmm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %zmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0x21,0xab]
+          vextractf64x2 $0xab, %zmm28,(%rcx)
+
+// CHECK: vextractf64x2 $171, %zmm28, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x63,0xfd,0x4b,0x19,0x21,0xab]
+          vextractf64x2 $0xab, %zmm28,(%rcx) {%k3}
+
+// CHECK: vextractf64x2 $123, %zmm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0x21,0x7b]
+          vextractf64x2 $0x7b, %zmm28,(%rcx)
+
+// CHECK: vextractf64x2 $123, %zmm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0xfd,0x48,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %zmm28,291(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %zmm28, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0x62,0x7f,0x7b]
+          vextractf64x2 $0x7b, %zmm28,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %zmm28,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0x62,0x80,0x7b]
+          vextractf64x2 $0x7b, %zmm28,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm28, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf64x2 $0x7b, %zmm28,-2064(%rdx)
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19
+// CHECK:  encoding: [0x62,0x23,0xfd,0x48,0x19,0xd3,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm19
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19 {%k3}
+// CHECK:  encoding: [0x62,0x23,0xfd,0x4b,0x19,0xd3,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm19 {%k3}
+
+// CHECK: vextractf64x2 $171, %zmm26, %xmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0xfd,0xcb,0x19,0xd3,0xab]
+          vextractf64x2 $0xab, %zmm26, %xmm19 {%k3} {z}
+
+// CHECK: vextractf64x2 $123, %zmm26, %xmm19
+// CHECK:  encoding: [0x62,0x23,0xfd,0x48,0x19,0xd3,0x7b]
+          vextractf64x2 $0x7b, %zmm26, %xmm19
+
+// CHECK: vextractf64x2 $171, %zmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x09,0xab]
+          vextractf64x2 $0xab, %zmm17,(%rcx)
+
+// CHECK: vextractf64x2 $171, %zmm17, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x49,0x19,0x09,0xab]
+          vextractf64x2 $0xab, %zmm17,(%rcx) {%k1}
+
+// CHECK: vextractf64x2 $123, %zmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x09,0x7b]
+          vextractf64x2 $0x7b, %zmm17,(%rcx)
+
+// CHECK: vextractf64x2 $123, %zmm17, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x19,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %zmm17,4660(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %zmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x4a,0x7f,0x7b]
+          vextractf64x2 $0x7b, %zmm17,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %zmm17,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x4a,0x80,0x7b]
+          vextractf64x2 $0x7b, %zmm17,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %zmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x19,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf64x2 $0x7b, %zmm17,-2064(%rdx)
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x3b,0xc4,0xab]
+          vextracti32x8 $0xab, %zmm24, %ymm20
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20 {%k1}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x49,0x3b,0xc4,0xab]
+          vextracti32x8 $0xab, %zmm24, %ymm20 {%k1}
+
+// CHECK: vextracti32x8 $171, %zmm24, %ymm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xc9,0x3b,0xc4,0xab]
+          vextracti32x8 $0xab, %zmm24, %ymm20 {%k1} {z}
+
+// CHECK: vextracti32x8 $123, %zmm24, %ymm20
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x3b,0xc4,0x7b]
+          vextracti32x8 $0x7b, %zmm24, %ymm20
+
+// CHECK: vextracti32x8 $171, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x21,0xab]
+          vextracti32x8 $0xab, %zmm20,(%rcx)
+
+// CHECK: vextracti32x8 $171, %zmm20, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x4b,0x3b,0x21,0xab]
+          vextracti32x8 $0xab, %zmm20,(%rcx) {%k3}
+
+// CHECK: vextracti32x8 $123, %zmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x21,0x7b]
+          vextracti32x8 $0x7b, %zmm20,(%rcx)
+
+// CHECK: vextracti32x8 $123, %zmm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x48,0x3b,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti32x8 $0x7b, %zmm20,291(%rax,%r14,8)
+
+// CHECK: vextracti32x8 $123, %zmm20, 4064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x62,0x7f,0x7b]
+          vextracti32x8 $0x7b, %zmm20,4064(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, 4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vextracti32x8 $0x7b, %zmm20,4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, -4096(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0x62,0x80,0x7b]
+          vextracti32x8 $0x7b, %zmm20,-4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm20, -4128(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x48,0x3b,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vextracti32x8 $0x7b, %zmm20,-4128(%rdx)
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x3b,0xeb,0xab]
+          vextracti32x8 $0xab, %zmm29, %ymm27
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27 {%k2}
+// CHECK:  encoding: [0x62,0x03,0x7d,0x4a,0x3b,0xeb,0xab]
+          vextracti32x8 $0xab, %zmm29, %ymm27 {%k2}
+
+// CHECK: vextracti32x8 $171, %zmm29, %ymm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x03,0x7d,0xca,0x3b,0xeb,0xab]
+          vextracti32x8 $0xab, %zmm29, %ymm27 {%k2} {z}
+
+// CHECK: vextracti32x8 $123, %zmm29, %ymm27
+// CHECK:  encoding: [0x62,0x03,0x7d,0x48,0x3b,0xeb,0x7b]
+          vextracti32x8 $0x7b, %zmm29, %ymm27
+
+// CHECK: vextracti32x8 $171, %zmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x11,0xab]
+          vextracti32x8 $0xab, %zmm26,(%rcx)
+
+// CHECK: vextracti32x8 $171, %zmm26, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x63,0x7d,0x4a,0x3b,0x11,0xab]
+          vextracti32x8 $0xab, %zmm26,(%rcx) {%k2}
+
+// CHECK: vextracti32x8 $123, %zmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x11,0x7b]
+          vextracti32x8 $0x7b, %zmm26,(%rcx)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x48,0x3b,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextracti32x8 $0x7b, %zmm26,4660(%rax,%r14,8)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x52,0x7f,0x7b]
+          vextracti32x8 $0x7b, %zmm26,4064(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, 4096(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vextracti32x8 $0x7b, %zmm26,4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, -4096(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x52,0x80,0x7b]
+          vextracti32x8 $0x7b, %zmm26,-4096(%rdx)
+
+// CHECK: vextracti32x8 $123, %zmm26, -4128(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x48,0x3b,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vextracti32x8 $0x7b, %zmm26,-4128(%rdx)
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x39,0xe1,0xab]
+          vextracti64x2 $0xab, %zmm20, %xmm17
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x4a,0x39,0xe1,0xab]
+          vextracti64x2 $0xab, %zmm20, %xmm17 {%k2}
+
+// CHECK: vextracti64x2 $171, %zmm20, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xca,0x39,0xe1,0xab]
+          vextracti64x2 $0xab, %zmm20, %xmm17 {%k2} {z}
+
+// CHECK: vextracti64x2 $123, %zmm20, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x39,0xe1,0x7b]
+          vextracti64x2 $0x7b, %zmm20, %xmm17
+
+// CHECK: vextracti64x2 $171, %zmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x09,0xab]
+          vextracti64x2 $0xab, %zmm17,(%rcx)
+
+// CHECK: vextracti64x2 $171, %zmm17, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x4d,0x39,0x09,0xab]
+          vextracti64x2 $0xab, %zmm17,(%rcx) {%k5}
+
+// CHECK: vextracti64x2 $123, %zmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x09,0x7b]
+          vextracti64x2 $0x7b, %zmm17,(%rcx)
+
+// CHECK: vextracti64x2 $123, %zmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x48,0x39,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %zmm17,291(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %zmm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x4a,0x7f,0x7b]
+          vextracti64x2 $0x7b, %zmm17,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %zmm17,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x4a,0x80,0x7b]
+          vextracti64x2 $0x7b, %zmm17,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x48,0x39,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti64x2 $0x7b, %zmm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x39,0xfb,0xab]
+          vextracti64x2 $0xab, %zmm23, %xmm27
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27 {%k5}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x4d,0x39,0xfb,0xab]
+          vextracti64x2 $0xab, %zmm23, %xmm27 {%k5}
+
+// CHECK: vextracti64x2 $171, %zmm23, %xmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xcd,0x39,0xfb,0xab]
+          vextracti64x2 $0xab, %zmm23, %xmm27 {%k5} {z}
+
+// CHECK: vextracti64x2 $123, %zmm23, %xmm27
+// CHECK:  encoding: [0x62,0x83,0xfd,0x48,0x39,0xfb,0x7b]
+          vextracti64x2 $0x7b, %zmm23, %xmm27
+
+// CHECK: vextracti64x2 $171, %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x01,0xab]
+          vextracti64x2 $0xab, %zmm24,(%rcx)
+
+// CHECK: vextracti64x2 $171, %zmm24, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x63,0xfd,0x4b,0x39,0x01,0xab]
+          vextracti64x2 $0xab, %zmm24,(%rcx) {%k3}
+
+// CHECK: vextracti64x2 $123, %zmm24, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x01,0x7b]
+          vextracti64x2 $0x7b, %zmm24,(%rcx)
+
+// CHECK: vextracti64x2 $123, %zmm24, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0xfd,0x48,0x39,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %zmm24,4660(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %zmm24, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x42,0x7f,0x7b]
+          vextracti64x2 $0x7b, %zmm24,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x82,0x00,0x08,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %zmm24,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x42,0x80,0x7b]
+          vextracti64x2 $0x7b, %zmm24,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %zmm24, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0xfd,0x48,0x39,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti64x2 $0x7b, %zmm24,-2064(%rdx)
+
+// CHECK: ktestb %k6, %k4
+// CHECK:  encoding: [0xc5,0xf9,0x99,0xe6]
+          ktestb %k6, %k4
+
+// CHECK: ktestb %k4, %k5
+// CHECK:  encoding: [0xc5,0xf9,0x99,0xec]
+          ktestb %k4, %k5
+
+// CHECK: ktestw %k4, %k5
+// CHECK:  encoding: [0xc5,0xf8,0x99,0xec]
+          ktestw %k4, %k5
+
+// CHECK: ktestw %k6, %k2
+// CHECK:  encoding: [0xc5,0xf8,0x99,0xd6]
+          ktestw %k6, %k2
+
+// CHECK: kortestb %k3, %k2
+// CHECK:  encoding: [0xc5,0xf9,0x98,0xd3]
+          kortestb %k3, %k2
+
+// CHECK: kortestb %k6, %k2
+// CHECK:  encoding: [0xc5,0xf9,0x98,0xd6]
+          kortestb %k6, %k2
+
+// CHECK: kaddb  %k7, %k4, %k5
+// CHECK:  encoding: [0xc5,0xdd,0x4a,0xef]
+          kaddb  %k7, %k4, %k5
+
+// CHECK: kaddb  %k4, %k6, %k5
+// CHECK:  encoding: [0xc5,0xcd,0x4a,0xec]
+          kaddb  %k4, %k6, %k5
+
+// CHECK: kaddw  %k4, %k3, %k2
+// CHECK:  encoding: [0xc5,0xe4,0x4a,0xd4]
+          kaddw  %k4, %k3, %k2
+
+// CHECK: kaddw  %k6, %k6, %k2
+// CHECK:  encoding: [0xc5,0xcc,0x4a,0xd6]
+          kaddw  %k6, %k6, %k2
+
+// CHECK: vfpclasspd $171, %zmm17, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd1,0xab]
+          vfpclasspd $0xab, %zmm17, %k2
+
+// CHECK: vfpclasspd $171, %zmm17, %k2 {%k1}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x49,0x66,0xd1,0xab]
+          vfpclasspd $0xab, %zmm17, %k2 {%k1}
+
+// CHECK: vfpclasspd $123,  %zmm17, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd1,0x7b]
+          vfpclasspd $0x7b, %zmm17, %k2
+
+// CHECK: vfpclasspdz $123, (%rcx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x11,0x7b]
+          vfpclasspdz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdz $123, 291(%rax,%r14,8), %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspdz $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x11,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to8}, %k2
+
+// CHECK: vfpclasspdz $123, 8128(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x7f,0x7b]
+          vfpclasspdz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, 8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vfpclasspdz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x80,0x7b]
+          vfpclasspdz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8256(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vfpclasspdz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to8}, %k2
+
+// CHECK: vfpclassps $171, %zmm21, %k2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0xd5,0xab]
+          vfpclassps $0xab, %zmm21, %k2
+
+// CHECK: vfpclassps $171, %zmm21, %k2 {%k2}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x4a,0x66,0xd5,0xab]
+          vfpclassps $0xab, %zmm21, %k2 {%k2}
+
+// CHECK: vfpclassps $123,  %zmm21, %k2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0xd5,0x7b]
+          vfpclassps $0x7b, %zmm21, %k2
+
+// CHECK: vfpclasspsz $123, (%rcx), %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x11,0x7b]
+          vfpclasspsz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspsz $123, 291(%rax,%r14,8), %k2
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspsz $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspsl $123, (%rcx){1to16}, %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x11,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to16}, %k2
+
+// CHECK: vfpclasspsz $123, 8128(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x52,0x7f,0x7b]
+          vfpclasspsz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, 8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vfpclasspsz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, -8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x52,0x80,0x7b]
+          vfpclasspsz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspsz $123, -8256(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vfpclasspsz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to16}, %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x52,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to16}, %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to16}, %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x52,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to16}, %k2
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to16}, %k2
+
+// CHECK: vfpclasspd $171, %zmm19, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd3,0xab]
+          vfpclasspd $0xab, %zmm19, %k2
+
+// CHECK: vfpclasspd $171, %zmm19, %k2 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x4e,0x66,0xd3,0xab]
+          vfpclasspd $0xab, %zmm19, %k2 {%k6}
+
+// CHECK: vfpclasspd $123,  %zmm19, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0xd3,0x7b]
+          vfpclasspd $0x7b, %zmm19, %k2
+
+// CHECK: vfpclasspdz $123, (%rcx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x11,0x7b]
+          vfpclasspdz $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdz $123, 4660(%rax,%r14,8), %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x48,0x66,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspdz $0x7b,4660(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x11,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to8}, %k2
+
+// CHECK: vfpclasspdz $123, 8128(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x7f,0x7b]
+          vfpclasspdz $0x7b,8128(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, 8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0x00,0x20,0x00,0x00,0x7b]
+          vfpclasspdz $0x7b,8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8192(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x52,0x80,0x7b]
+          vfpclasspdz $0x7b,-8192(%rdx), %k2
+
+// CHECK: vfpclasspdz $123, -8256(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x48,0x66,0x92,0xc0,0xdf,0xff,0xff,0x7b]
+          vfpclasspdz $0x7b,-8256(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x52,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to8}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to8}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x58,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to8}, %k2
+
+// CHECK: vfpclassps $171, %zmm17, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0xe1,0xab]
+          vfpclassps $0xab, %zmm17, %k4
+
+// CHECK: vfpclassps $171, %zmm17, %k4 {%k2}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x4a,0x66,0xe1,0xab]
+          vfpclassps $0xab, %zmm17, %k4 {%k2}
+
+// CHECK: vfpclassps $123,  %zmm17, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0xe1,0x7b]
+          vfpclassps $0x7b, %zmm17, %k4
+
+// CHECK: vfpclasspsz $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x21,0x7b]
+          vfpclasspsz $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsz $123, 4660(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x48,0x66,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspsz $0x7b,4660(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to16}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x21,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to16}, %k4
+
+// CHECK: vfpclasspsz $123, 8128(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x62,0x7f,0x7b]
+          vfpclasspsz $0x7b,8128(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, 8192(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0xa2,0x00,0x20,0x00,0x00,0x7b]
+          vfpclasspsz $0x7b,8192(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, -8192(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0x62,0x80,0x7b]
+          vfpclasspsz $0x7b,-8192(%rdx), %k4
+
+// CHECK: vfpclasspsz $123, -8256(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x48,0x66,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+          vfpclasspsz $0x7b,-8256(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to16}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x62,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to16}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to16}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0x62,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to16}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to16}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x58,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to16}, %k4
+
+
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x18,0x7a,0xd5]
+          vcvtuqq2ps {rn-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x58,0x7a,0xd5]
+          vcvtuqq2ps {ru-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x38,0x7a,0xd5]
+          vcvtuqq2ps {rd-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x78,0x7a,0xd5]
+          vcvtuqq2ps {rz-sae}, %zmm21, %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x11]
+          vcvtuqq2ps (%rcx), %ymm18
+
+// CHECK: vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x48,0x7a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vcvtuqq2ps 291(%rax,%r14,8), %ymm18
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x11]
+          vcvtuqq2ps (%rcx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x7f]
+          vcvtuqq2ps 8128(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0x00,0x20,0x00,0x00]
+          vcvtuqq2ps 8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x52,0x80]
+          vcvtuqq2ps -8192(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x48,0x7a,0x92,0xc0,0xdf,0xff,0xff]
+          vcvtuqq2ps -8256(%rdx), %ymm18
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x52,0x80]
+          vcvtuqq2ps -1024(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x58,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to8}, %ymm18
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x48,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2}
+// CHECK:  encoding: [0x62,0x01,0xff,0x4a,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25 {%k2}
+
+// CHECK: vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xca,0x7a,0xca]
+          vcvtuqq2ps %zmm26, %ymm25 {%k2} {z}
+
+// CHECK: vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x18,0x7a,0xca]
+          vcvtuqq2ps {rn-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x58,0x7a,0xca]
+          vcvtuqq2ps {ru-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x38,0x7a,0xca]
+          vcvtuqq2ps {rd-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xff,0x78,0x7a,0xca]
+          vcvtuqq2ps {rz-sae}, %zmm26, %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x09]
+          vcvtuqq2ps (%rcx), %ymm25
+
+// CHECK: vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x48,0x7a,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2ps 4660(%rax,%r14,8), %ymm25
+
+// CHECK: vcvtuqq2ps (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x09]
+          vcvtuqq2ps (%rcx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 8128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x7f]
+          vcvtuqq2ps 8128(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 8192(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0x00,0x20,0x00,0x00]
+          vcvtuqq2ps 8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8192(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x4a,0x80]
+          vcvtuqq2ps -8192(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps -8256(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x48,0x7a,0x8a,0xc0,0xdf,0xff,0xff]
+          vcvtuqq2ps -8256(%rdx), %ymm25
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x4a,0x80]
+          vcvtuqq2ps -1024(%rdx){1to8}, %ymm25
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x58,0x7a,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to8}, %ymm25
+
+// CHECK: vfpclasssd $171, %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0xab]
+          vfpclasssd $0xab, %xmm28, %k4
+
+// CHECK: vfpclasssd $171, %xmm28, %k4 {%k3}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x0b,0x67,0xe4,0xab]
+          vfpclasssd $0xab, %xmm28, %k4 {%k3}
+
+// CHECK: vfpclasssd $123,  %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x67,0xe4,0x7b]
+          vfpclasssd $0x7b, %xmm28, %k4
+
+// CHECK: vfpclasssd $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x21,0x7b]
+          vfpclasssd $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasssd $123, 291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x7f,0x7b]
+          vfpclasssd $0x7b,1016(%rdx), %k4
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x62,0x80,0x7b]
+          vfpclasssd $0x7b,-1024(%rdx), %k4
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasssd $0x7b,-1032(%rdx), %k4
+
+// CHECK: vfpclassss $171, %xmm26, %k5
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0xab]
+          vfpclassss $0xab, %xmm26, %k5
+
+// CHECK: vfpclassss $171, %xmm26, %k5 {%k4}
+// CHECK:  encoding: [0x62,0x93,0x7d,0x0c,0x67,0xea,0xab]
+          vfpclassss $0xab, %xmm26, %k5 {%k4}
+
+// CHECK: vfpclassss $123,  %xmm26, %k5
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xea,0x7b]
+          vfpclassss $0x7b, %xmm26, %k5
+
+// CHECK: vfpclassss $123, (%rcx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x29,0x7b]
+          vfpclassss $0x7b,(%rcx), %k5
+
+// CHECK: vfpclassss $123, 291(%rax,%r14,8), %k5
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x67,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclassss $0x7b,291(%rax,%r14,8), %k5
+
+// CHECK: vfpclassss $123, 508(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x7f,0x7b]
+          vfpclassss $0x7b,508(%rdx), %k5
+
+// CHECK: vfpclassss $123, 512(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vfpclassss $0x7b,512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -512(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x6a,0x80,0x7b]
+          vfpclassss $0x7b,-512(%rdx), %k5
+
+// CHECK: vfpclassss $123, -516(%rdx), %k5
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclassss $0x7b,-516(%rdx), %k5
+
+// CHECK: vfpclasssd $171, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0xab]
+          vfpclasssd $0xab, %xmm20, %k3
+
+// CHECK: vfpclasssd $171, %xmm20, %k3 {%k6}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x0e,0x67,0xdc,0xab]
+          vfpclasssd $0xab, %xmm20, %k3 {%k6}
+
+// CHECK: vfpclasssd $123, %xmm20, %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0xdc,0x7b]
+          vfpclasssd $0x7b, %xmm20, %k3
+
+// CHECK: vfpclasssd $123, (%rcx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x19,0x7b]
+          vfpclasssd $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasssd $123, 4660(%rax,%r14,8), %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x67,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasssd $123, 1016(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x7f,0x7b]
+          vfpclasssd $0x7b,1016(%rdx), %k3
+
+// CHECK: vfpclasssd $123, 1024(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasssd $0x7b,1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1024(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x5a,0x80,0x7b]
+          vfpclasssd $0x7b,-1024(%rdx), %k3
+
+// CHECK: vfpclasssd $123, -1032(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x67,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasssd $0x7b,-1032(%rdx), %k3
+
+// CHECK: vfpclassss $171, %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0xab]
+          vfpclassss $0xab, %xmm28, %k4
+
+// CHECK: vfpclassss $171, %xmm28, %k4 {%k6}
+// CHECK:  encoding: [0x62,0x93,0x7d,0x0e,0x67,0xe4,0xab]
+          vfpclassss $0xab, %xmm28, %k4 {%k6}
+
+// CHECK: vfpclassss $123,  %xmm28, %k4
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x67,0xe4,0x7b]
+          vfpclassss $0x7b, %xmm28, %k4
+
+// CHECK: vfpclassss $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x21,0x7b]
+          vfpclassss $0x7b,(%rcx), %k4
+
+// CHECK: vfpclassss $123, 4660(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x67,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclassss $0x7b,4660(%rax,%r14,8), %k4
+
+// CHECK: vfpclassss $123, 508(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x7f,0x7b]
+          vfpclassss $0x7b,508(%rdx), %k4
+
+// CHECK: vfpclassss $123, 512(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vfpclassss $0x7b,512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -512(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0x62,0x80,0x7b]
+          vfpclassss $0x7b,-512(%rdx), %k4
+
+// CHECK: vfpclassss $123, -516(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x67,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclassss $0x7b,-516(%rdx), %k4
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4d,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30 {%k5}
+
+// CHECK: vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xcd,0x59,0xf7]
+          vbroadcasti32x2 %xmm31, %zmm30 {%k5} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %zmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %zmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %zmm30
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x49,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xc9,0x59,0xe1]
+          vbroadcasti32x2 %xmm17, %zmm20 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x21]
+          vbroadcasti32x2 (%rcx), %zmm20
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x59,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %zmm20
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x7f]
+          vbroadcasti32x2 1016(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0x62,0x80]
+          vbroadcasti32x2 -1024(%rdx), %zmm20
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %zmm20
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x59,0xa2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %zmm20
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4e,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27 {%k6}
+
+// CHECK: vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xce,0x19,0xdf]
+          vbroadcastf32x2 %xmm23, %zmm27 {%k6} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastf32x2 291(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x4d,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27 {%k5}
+
+// CHECK: vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xcd,0x19,0xdd]
+          vbroadcastf32x2 %xmm21, %zmm27 {%k5} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %zmm27
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcastf32x2 4660(%rax,%r14,8), %zmm27
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %zmm27
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %zmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %zmm27
+
diff --git a/test/MC/X86/x86-64-avx512dq_vl.s b/test/MC/X86/x86-64-avx512dq_vl.s
index 17c37c08335c..eef6b0cf33e2 100644
--- a/test/MC/X86/x86-64-avx512dq_vl.s
+++ b/test/MC/X86/x86-64-avx512dq_vl.s
@@ -2208,6 +2208,486 @@
 // CHECK:  encoding: [0x62,0x63,0x45,0x30,0x50,0x82,0xfc,0xfd,0xff,0xff,0x7b]
           vrangeps $0x7b,-516(%rdx){1to8}, %ymm23, %ymm24
 
+// CHECK: vreducepd $171, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x0b,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18 {%k3}
+
+// CHECK: vreducepd $171, %xmm17, %xmm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x8b,0x56,0xd1,0xab]
+          vreducepd $0xab, %xmm17, %xmm18 {%k3} {z}
+
+// CHECK: vreducepd $123, %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0xd1,0x7b]
+          vreducepd $0x7b, %xmm17, %xmm18
+
+// CHECK: vreducepd $123, (%rcx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx), %xmm18
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %xmm18
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x08,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %xmm18
+
+// CHECK: vreducepd $171, %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x0c,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25 {%k4}
+
+// CHECK: vreducepd $171, %xmm28, %xmm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x8c,0x56,0xcc,0xab]
+          vreducepd $0xab, %xmm28, %xmm25 {%k4} {z}
+
+// CHECK: vreducepd $123, %xmm28, %xmm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x56,0xcc,0x7b]
+          vreducepd $0x7b, %xmm28, %xmm25
+
+// CHECK: vreducepd $123, (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx), %xmm25
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %xmm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,2032(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vreducepd $0x7b,2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-2048(%rdx), %xmm25
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x56,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vreducepd $0x7b,-2064(%rdx), %xmm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to2}, %xmm25
+
+// CHECK: vreducepd $171, %ymm17, %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4}
+// CHECK:  encoding: [0x62,0x23,0xfd,0x2c,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28 {%k4}
+
+// CHECK: vreducepd $171, %ymm17, %ymm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x23,0xfd,0xac,0x56,0xe1,0xab]
+          vreducepd $0xab, %ymm17, %ymm28 {%k4} {z}
+
+// CHECK: vreducepd $123, %ymm17, %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xe1,0x7b]
+          vreducepd $0x7b, %ymm17, %ymm28
+
+// CHECK: vreducepd $123, (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x21,0x7b]
+          vreducepd $0x7b,(%rcx), %ymm28
+
+// CHECK: vreducepd $123, 4660(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreducepd $0x7b,4660(%rax,%r14,8), %ymm28
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x21,0x7b]
+          vreducepd $0x7b,(%rcx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x7f,0x7b]
+          vreducepd $0x7b,4064(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vreducepd $0x7b,4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x62,0x80,0x7b]
+          vreducepd $0x7b,-4096(%rdx), %ymm28
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vreducepd $0x7b,-4128(%rdx), %ymm28
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x62,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to4}, %ymm28
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to4}, %ymm28
+
+// CHECK: vreduceps $171, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x0f,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29 {%k7}
+
+// CHECK: vreduceps $171, %xmm21, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x8f,0x56,0xed,0xab]
+          vreduceps $0xab, %xmm21, %xmm29 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xed,0x7b]
+          vreduceps $0x7b, %xmm21, %xmm29
+
+// CHECK: vreduceps $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x29,0x7b]
+          vreduceps $0x7b,(%rcx), %xmm29
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x56,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %xmm29
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x29,0x7b]
+          vreduceps $0x7b,(%rcx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x7f,0x7b]
+          vreduceps $0x7b,2032(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vreduceps $0x7b,2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0x6a,0x80,0x7b]
+          vreduceps $0x7b,-2048(%rdx), %xmm29
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x56,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vreduceps $0x7b,-2064(%rdx), %xmm29
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0x6a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x56,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to4}, %xmm29
+
+// CHECK: vreduceps $171, %ymm23, %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2b,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25 {%k3}
+
+// CHECK: vreduceps $171, %ymm23, %ymm25 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xab,0x56,0xcf,0xab]
+          vreduceps $0xab, %ymm23, %ymm25 {%k3} {z}
+
+// CHECK: vreduceps $123, %ymm23, %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xcf,0x7b]
+          vreduceps $0x7b, %ymm23, %ymm25
+
+// CHECK: vreduceps $123, (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x09,0x7b]
+          vreduceps $0x7b,(%rcx), %ymm25
+
+// CHECK: vreduceps $123, 4660(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vreduceps $0x7b,4660(%rax,%r14,8), %ymm25
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x09,0x7b]
+          vreduceps $0x7b,(%rcx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x7f,0x7b]
+          vreduceps $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vreduceps $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x4a,0x80,0x7b]
+          vreduceps $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vreduceps $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x4a,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to8}, %ymm25
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to8}, %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x11,0x7b]
+          vreducepd $0x7b,(%rcx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 2032(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,2032(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0x00,0x08,0x00,0x00,0x7b]
+          vreducepd $0x7b,2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2048(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-2048(%rdx), %xmm18
+
+// CHECK: vreducepd $123, -2064(%rdx), %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x08,0x56,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+          vreducepd $0x7b,-2064(%rdx), %xmm18
+
+// CHECK: vreducepd $123, 1016(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, 1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1024(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x52,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $123, -1032(%rdx){1to2}, %xmm18
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x18,0x56,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to2}, %xmm18
+
+// CHECK: vreducepd $171, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x29,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25 {%k1}
+
+// CHECK: vreducepd $171, %ymm29, %ymm25 {%k1} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0xa9,0x56,0xcd,0xab]
+          vreducepd $0xab, %ymm29, %ymm25 {%k1} {z}
+
+// CHECK: vreducepd $123, %ymm29, %ymm25
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x56,0xcd,0x7b]
+          vreducepd $0x7b, %ymm29, %ymm25
+
+// CHECK: vreducepd $123, (%rcx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx), %ymm25
+
+// CHECK: vreducepd $123, 291(%rax,%r14,8), %ymm25
+// CHECK:  encoding: [0x62,0x23,0xfd,0x28,0x56,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreducepd $0x7b,291(%rax,%r14,8), %ymm25
+
+// CHECK: vreducepd $123, (%rcx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x09,0x7b]
+          vreducepd $0x7b,(%rcx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 4064(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,4064(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vreducepd $0x7b,4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4096(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-4096(%rdx), %ymm25
+
+// CHECK: vreducepd $123, -4128(%rdx), %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x28,0x56,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vreducepd $0x7b,-4128(%rdx), %ymm25
+
+// CHECK: vreducepd $123, 1016(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x7f,0x7b]
+          vreducepd $0x7b,1016(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, 1024(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vreducepd $0x7b,1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1024(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x4a,0x80,0x7b]
+          vreducepd $0x7b,-1024(%rdx){1to4}, %ymm25
+
+// CHECK: vreducepd $123, -1032(%rdx){1to4}, %ymm25
+// CHECK:  encoding: [0x62,0x63,0xfd,0x38,0x56,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vreducepd $0x7b,-1032(%rdx){1to4}, %ymm25
+
+// CHECK: vreduceps $171, %xmm23, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x0f,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20 {%k7}
+
+// CHECK: vreduceps $171, %xmm23, %xmm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x8f,0x56,0xe7,0xab]
+          vreduceps $0xab, %xmm23, %xmm20 {%k7} {z}
+
+// CHECK: vreduceps $123, %xmm23, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xe7,0x7b]
+          vreduceps $0x7b, %xmm23, %xmm20
+
+// CHECK: vreduceps $123, (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x21,0x7b]
+          vreduceps $0x7b,(%rcx), %xmm20
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x56,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %xmm20
+
+// CHECK: vreduceps $123, (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x21,0x7b]
+          vreduceps $0x7b,(%rcx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x7f,0x7b]
+          vreduceps $0x7b,2032(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vreduceps $0x7b,2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0x62,0x80,0x7b]
+          vreduceps $0x7b,-2048(%rdx), %xmm20
+
+// CHECK: vreduceps $123, -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x56,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vreduceps $0x7b,-2064(%rdx), %xmm20
+
+// CHECK: vreduceps $123, 508(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, 512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0x62,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $123, -516(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x56,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to4}, %xmm20
+
+// CHECK: vreduceps $171, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2e,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26 {%k6}
+
+// CHECK: vreduceps $171, %ymm22, %ymm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xae,0x56,0xd6,0xab]
+          vreduceps $0xab, %ymm22, %ymm26 {%k6} {z}
+
+// CHECK: vreduceps $123, %ymm22, %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0xd6,0x7b]
+          vreduceps $0x7b, %ymm22, %ymm26
+
+// CHECK: vreduceps $123, (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx), %ymm26
+
+// CHECK: vreduceps $123, 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x56,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vreduceps $0x7b,291(%rax,%r14,8), %ymm26
+
+// CHECK: vreduceps $123, (%rcx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x11,0x7b]
+          vreduceps $0x7b,(%rcx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 4064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,4064(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vreduceps $0x7b,4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4096(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-4096(%rdx), %ymm26
+
+// CHECK: vreduceps $123, -4128(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x56,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vreduceps $0x7b,-4128(%rdx), %ymm26
+
+// CHECK: vreduceps $123, 508(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x7f,0x7b]
+          vreduceps $0x7b,508(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, 512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0x00,0x02,0x00,0x00,0x7b]
+          vreduceps $0x7b,512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -512(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x52,0x80,0x7b]
+          vreduceps $0x7b,-512(%rdx){1to8}, %ymm26
+
+// CHECK: vreduceps $123, -516(%rdx){1to8}, %ymm26
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x56,0x92,0xfc,0xfd,0xff,0xff,0x7b]
+          vreduceps $0x7b,-516(%rdx){1to8}, %ymm26
+
 // CHECK: vcvtpd2qq %xmm22, %xmm24
 // CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x7b,0xc6]
           vcvtpd2qq %xmm22, %xmm24
@@ -2880,6 +3360,118 @@
 // CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x5b,0x9a,0xf8,0xfb,0xff,0xff]
           vcvtqq2ps -1032(%rdx){1to4}, %xmm27
 
+// CHECK: vcvtqq2ps %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x01,0xfc,0x08,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4}
+// CHECK:  encoding: [0x62,0x01,0xfc,0x0c,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30 {%k4}
+
+// CHECK: vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+// CHECK:  encoding: [0x62,0x01,0xfc,0x8c,0x5b,0xf2]
+          vcvtqq2ps %xmm26, %xmm30 {%k4} {z}
+
+// CHECK: vcvtqq2psx (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x31]
+          vcvtqq2psx (%rcx), %xmm30
+
+// CHECK: vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x21,0xfc,0x08,0x5b,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2psx 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtqq2ps (%rcx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x31]
+          vcvtqq2ps (%rcx){1to2}, %xmm30
+
+// CHECK: vcvtqq2psx 2032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x7f]
+          vcvtqq2psx 2032(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx 2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0x00,0x08,0x00,0x00]
+          vcvtqq2psx 2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2048(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0x72,0x80]
+          vcvtqq2psx -2048(%rdx), %xmm30
+
+// CHECK: vcvtqq2psx -2064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x08,0x5b,0xb2,0xf0,0xf7,0xff,0xff]
+          vcvtqq2psx -2064(%rdx), %xmm30
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x7f]
+          vcvtqq2ps 1016(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0x72,0x80]
+          vcvtqq2ps -1024(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xfc,0x18,0x5b,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to2}, %xmm30
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20
+// CHECK:  encoding: [0x62,0x81,0xfc,0x28,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x2b,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20 {%k3}
+
+// CHECK: vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xfc,0xab,0x5b,0xe4]
+          vcvtqq2ps %ymm28, %xmm20 {%k3} {z}
+
+// CHECK: vcvtqq2psy (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x21]
+          vcvtqq2psy (%rcx), %xmm20
+
+// CHECK: vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x28,0x5b,0xa4,0xf0,0x34,0x12,0x00,0x00]
+          vcvtqq2psy 4660(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtqq2ps (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x21]
+          vcvtqq2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtqq2psy 4064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x7f]
+          vcvtqq2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy 4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0x00,0x10,0x00,0x00]
+          vcvtqq2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0x62,0x80]
+          vcvtqq2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtqq2psy -4128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x28,0x5b,0xa2,0xe0,0xef,0xff,0xff]
+          vcvtqq2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x7f]
+          vcvtqq2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0x00,0x04,0x00,0x00]
+          vcvtqq2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0x62,0x80]
+          vcvtqq2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x38,0x5b,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtqq2ps -1032(%rdx){1to4}, %xmm20
+
 // CHECK: vcvtuqq2pd %xmm20, %xmm19
 // CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x7a,0xdc]
           vcvtuqq2pd %xmm20, %xmm19
@@ -3104,3 +3696,1131 @@
 // CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff]
           vcvtuqq2ps -1032(%rdx){1to4}, %xmm28
 
+// CHECK: vcvtuqq2ps %xmm22, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x0e,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21 {%k6}
+
+// CHECK: vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x8e,0x7a,0xee]
+          vcvtuqq2ps %xmm22, %xmm21 {%k6} {z}
+
+// CHECK: vcvtuqq2psx (%rcx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x29]
+          vcvtuqq2psx (%rcx), %xmm21
+
+// CHECK: vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x7a,0xac,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2psx 4660(%rax,%r14,8), %xmm21
+
+// CHECK: vcvtuqq2ps (%rcx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x29]
+          vcvtuqq2ps (%rcx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2psx 2032(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x7f]
+          vcvtuqq2psx 2032(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx 2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0x00,0x08,0x00,0x00]
+          vcvtuqq2psx 2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2048(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0x6a,0x80]
+          vcvtuqq2psx -2048(%rdx), %xmm21
+
+// CHECK: vcvtuqq2psx -2064(%rdx), %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x7a,0xaa,0xf0,0xf7,0xff,0xff]
+          vcvtuqq2psx -2064(%rdx), %xmm21
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0x6a,0x80]
+          vcvtuqq2ps -1024(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+// CHECK:  encoding: [0x62,0xe1,0xff,0x18,0x7a,0xaa,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to2}, %xmm21
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4}
+// CHECK:  encoding: [0x62,0x21,0xff,0x2c,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26 {%k4}
+
+// CHECK: vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0xff,0xac,0x7a,0xd1]
+          vcvtuqq2ps %ymm17, %xmm26 {%k4} {z}
+
+// CHECK: vcvtuqq2psy (%rcx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x11]
+          vcvtuqq2psy (%rcx), %xmm26
+
+// CHECK: vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0x7a,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vcvtuqq2psy 4660(%rax,%r14,8), %xmm26
+
+// CHECK: vcvtuqq2ps (%rcx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x11]
+          vcvtuqq2ps (%rcx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2psy 4064(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x7f]
+          vcvtuqq2psy 4064(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy 4096(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0x00,0x10,0x00,0x00]
+          vcvtuqq2psy 4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4096(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x52,0x80]
+          vcvtuqq2psy -4096(%rdx), %xmm26
+
+// CHECK: vcvtuqq2psy -4128(%rdx), %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0x7a,0x92,0xe0,0xef,0xff,0xff]
+          vcvtuqq2psy -4128(%rdx), %xmm26
+
+// CHECK: vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x7f]
+          vcvtuqq2ps 1016(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0x00,0x04,0x00,0x00]
+          vcvtuqq2ps 1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x52,0x80]
+          vcvtuqq2ps -1024(%rdx){1to4}, %xmm26
+
+// CHECK: vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0x7a,0x92,0xf8,0xfb,0xff,0xff]
+          vcvtuqq2ps -1032(%rdx){1to4}, %xmm26
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab]
+          vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0xa5,0x27,0x18,0xef,0xab]
+          vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0xa5,0xa7,0x18,0xef,0xab]
+          vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm23, %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0x7b]
+          vinsertf64x2 $0x7b, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0xa5,0x20,0x18,0x29,0x7b]
+          vinsertf64x2 $0x7b,(%rcx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xa3,0xa5,0x20,0x18,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,291(%rax,%r14,8), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x7f,0x7b]
+          vinsertf64x2 $0x7b,2032(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x80,0x7b]
+          vinsertf64x2 $0x7b,-2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf64x2 $0x7b,-2064(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0xab]
+          vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5}
+// CHECK:  encoding: [0x62,0x03,0xc5,0x25,0x18,0xc3,0xab]
+          vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5}
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x03,0xc5,0xa5,0x18,0xc3,0xab]
+          vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm27, %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0x7b]
+          vinsertf64x2 $0x7b, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x18,0x01,0x7b]
+          vinsertf64x2 $0x7b,(%rcx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x23,0xc5,0x20,0x18,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,4660(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x7f,0x7b]
+          vinsertf64x2 $0x7b,2032(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf64x2 $0x7b,2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x80,0x7b]
+          vinsertf64x2 $0x7b,-2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm23, %ymm24
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf64x2 $0x7b,-2064(%rdx), %ymm23, %ymm24
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6}
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x26,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6}
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa3,0xb5,0xa6,0x38,0xdd,0xab]
+          vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0x7b]
+          vinserti64x2 $0x7b, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x20,0x38,0x19,0x7b]
+          vinserti64x2 $0x7b,(%rcx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x20,0x38,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x7f,0x7b]
+          vinserti64x2 $0x7b,2032(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x80,0x7b]
+          vinserti64x2 $0x7b,-2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti64x2 $0x7b,-2064(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0xab]
+          vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2}
+// CHECK:  encoding: [0x62,0x03,0xbd,0x22,0x38,0xe9,0xab]
+          vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x03,0xbd,0xa2,0x38,0xe9,0xab]
+          vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm25, %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0x7b]
+          vinserti64x2 $0x7b, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x63,0xbd,0x20,0x38,0x29,0x7b]
+          vinserti64x2 $0x7b,(%rcx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x23,0xbd,0x20,0x38,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,4660(%rax,%r14,8), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x7f,0x7b]
+          vinserti64x2 $0x7b,2032(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vinserti64x2 $0x7b,2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x80,0x7b]
+          vinserti64x2 $0x7b,-2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm24, %ymm29
+// CHECK:  encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti64x2 $0x7b,-2064(%rdx), %ymm24, %ymm29
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0xab]
+          vextractf64x2 $0xab, %ymm21, %xmm27
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x2f,0x19,0xeb,0xab]
+          vextractf64x2 $0xab, %ymm21, %xmm27 {%k7}
+
+// CHECK: vextractf64x2 $171, %ymm21, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xaf,0x19,0xeb,0xab]
+          vextractf64x2 $0xab, %ymm21, %xmm27 {%k7} {z}
+
+// CHECK: vextractf64x2 $123, %ymm21, %xmm27
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0x7b]
+          vextractf64x2 $0x7b, %ymm21, %xmm27
+
+// CHECK: vextractf64x2 $171, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x21,0xab]
+          vextractf64x2 $0xab, %ymm20,(%rcx)
+
+// CHECK: vextractf64x2 $171, %ymm20, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x29,0x19,0x21,0xab]
+          vextractf64x2 $0xab, %ymm20,(%rcx) {%k1}
+
+// CHECK: vextractf64x2 $123, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x21,0x7b]
+          vextractf64x2 $0x7b, %ymm20,(%rcx)
+
+// CHECK: vextractf64x2 $123, %ymm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %ymm20,291(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %ymm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x62,0x7f,0x7b]
+          vextractf64x2 $0x7b, %ymm20,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %ymm20,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x62,0x80,0x7b]
+          vextractf64x2 $0x7b, %ymm20,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf64x2 $0x7b, %ymm20,-2064(%rdx)
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %ymm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x2c,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %ymm26, %xmm28 {%k4}
+
+// CHECK: vextractf64x2 $171, %ymm26, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0xac,0x19,0xd4,0xab]
+          vextractf64x2 $0xab, %ymm26, %xmm28 {%k4} {z}
+
+// CHECK: vextractf64x2 $123, %ymm26, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x19,0xd4,0x7b]
+          vextractf64x2 $0x7b, %ymm26, %xmm28
+
+// CHECK: vextractf64x2 $171, %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x09,0xab]
+          vextractf64x2 $0xab, %ymm17,(%rcx)
+
+// CHECK: vextractf64x2 $171, %ymm17, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x2a,0x19,0x09,0xab]
+          vextractf64x2 $0xab, %ymm17,(%rcx) {%k2}
+
+// CHECK: vextractf64x2 $123, %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x09,0x7b]
+          vextractf64x2 $0x7b, %ymm17,(%rcx)
+
+// CHECK: vextractf64x2 $123, %ymm17, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x19,0x8c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %ymm17,4660(%rax,%r14,8)
+
+// CHECK: vextractf64x2 $123, %ymm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x4a,0x7f,0x7b]
+          vextractf64x2 $0x7b, %ymm17,2032(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vextractf64x2 $0x7b, %ymm17,2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x4a,0x80,0x7b]
+          vextractf64x2 $0x7b, %ymm17,-2048(%rdx)
+
+// CHECK: vextractf64x2 $123, %ymm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x19,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf64x2 $0x7b, %ymm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x39,0xc5,0xab]
+          vextracti64x2 $0xab, %ymm24, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x2f,0x39,0xc5,0xab]
+          vextracti64x2 $0xab, %ymm24, %xmm29 {%k7}
+
+// CHECK: vextracti64x2 $171, %ymm24, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0xaf,0x39,0xc5,0xab]
+          vextracti64x2 $0xab, %ymm24, %xmm29 {%k7} {z}
+
+// CHECK: vextracti64x2 $123, %ymm24, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x28,0x39,0xc5,0x7b]
+          vextracti64x2 $0x7b, %ymm24, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x09,0xab]
+          vextracti64x2 $0xab, %ymm17,(%rcx)
+
+// CHECK: vextracti64x2 $171, %ymm17, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x29,0x39,0x09,0xab]
+          vextracti64x2 $0xab, %ymm17,(%rcx) {%k1}
+
+// CHECK: vextracti64x2 $123, %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x09,0x7b]
+          vextracti64x2 $0x7b, %ymm17,(%rcx)
+
+// CHECK: vextracti64x2 $123, %ymm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x39,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %ymm17,291(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %ymm17, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x4a,0x7f,0x7b]
+          vextracti64x2 $0x7b, %ymm17,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %ymm17,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x4a,0x80,0x7b]
+          vextracti64x2 $0x7b, %ymm17,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm17, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti64x2 $0x7b, %ymm17,-2064(%rdx)
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x39,0xcd,0xab]
+          vextracti64x2 $0xab, %ymm17, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29 {%k5}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x2d,0x39,0xcd,0xab]
+          vextracti64x2 $0xab, %ymm17, %xmm29 {%k5}
+
+// CHECK: vextracti64x2 $171, %ymm17, %xmm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xad,0x39,0xcd,0xab]
+          vextracti64x2 $0xab, %ymm17, %xmm29 {%k5} {z}
+
+// CHECK: vextracti64x2 $123, %ymm17, %xmm29
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x39,0xcd,0x7b]
+          vextracti64x2 $0x7b, %ymm17, %xmm29
+
+// CHECK: vextracti64x2 $171, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x21,0xab]
+          vextracti64x2 $0xab, %ymm20,(%rcx)
+
+// CHECK: vextracti64x2 $171, %ymm20, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x2a,0x39,0x21,0xab]
+          vextracti64x2 $0xab, %ymm20,(%rcx) {%k2}
+
+// CHECK: vextracti64x2 $123, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x21,0x7b]
+          vextracti64x2 $0x7b, %ymm20,(%rcx)
+
+// CHECK: vextracti64x2 $123, %ymm20, 4660(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x39,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %ymm20,4660(%rax,%r14,8)
+
+// CHECK: vextracti64x2 $123, %ymm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x62,0x7f,0x7b]
+          vextracti64x2 $0x7b, %ymm20,2032(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextracti64x2 $0x7b, %ymm20,2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0x62,0x80,0x7b]
+          vextracti64x2 $0x7b, %ymm20,-2048(%rdx)
+
+// CHECK: vextracti64x2 $123, %ymm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x39,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti64x2 $0x7b, %ymm20,-2064(%rdx)
+
+// CHECK: vfpclasspd $171, %xmm18, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x66,0xd2,0xab]
+          vfpclasspd $0xab, %xmm18, %k2
+
+// CHECK: vfpclasspd $171, %xmm18, %k2 {%k7}
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x0f,0x66,0xd2,0xab]
+          vfpclasspd $0xab, %xmm18, %k2 {%k7}
+
+// CHECK: vfpclasspd $123,  %xmm18, %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x66,0xd2,0x7b]
+          vfpclasspd $0x7b, %xmm18, %k2
+
+// CHECK: vfpclasspdx $123, (%rcx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x11,0x7b]
+          vfpclasspdx $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdx $123, 291(%rax,%r14,8), %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x66,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspdx $0x7b,291(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to2}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x11,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to2}, %k2
+
+// CHECK: vfpclasspdx $123, 2032(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x52,0x7f,0x7b]
+          vfpclasspdx $0x7b,2032(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, 2048(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x92,0x00,0x08,0x00,0x00,0x7b]
+          vfpclasspdx $0x7b,2048(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, -2048(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x52,0x80,0x7b]
+          vfpclasspdx $0x7b,-2048(%rdx), %k2
+
+// CHECK: vfpclasspdx $123, -2064(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+          vfpclasspdx $0x7b,-2064(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to2}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x52,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to2}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to2}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x52,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to2}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to2}, %k2
+
+// CHECK: vfpclasspd $171, %ymm25, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x28,0x66,0xe1,0xab]
+          vfpclasspd $0xab, %ymm25, %k4
+
+// CHECK: vfpclasspd $171, %ymm25, %k4 {%k6}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x2e,0x66,0xe1,0xab]
+          vfpclasspd $0xab, %ymm25, %k4 {%k6}
+
+// CHECK: vfpclasspd $123,  %ymm25, %k4
+// CHECK:  encoding: [0x62,0x93,0xfd,0x28,0x66,0xe1,0x7b]
+          vfpclasspd $0x7b, %ymm25, %k4
+
+// CHECK: vfpclasspdy $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x21,0x7b]
+          vfpclasspdy $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspdy $123, 291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x28,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspdy $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspdq $123, (%rcx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x21,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to4}, %k4
+
+// CHECK: vfpclasspdy $123, 4064(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x62,0x7f,0x7b]
+          vfpclasspdy $0x7b,4064(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, 4096(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vfpclasspdy $0x7b,4096(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, -4096(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x62,0x80,0x7b]
+          vfpclasspdy $0x7b,-4096(%rdx), %k4
+
+// CHECK: vfpclasspdy $123, -4128(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vfpclasspdy $0x7b,-4128(%rdx), %k4
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x62,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x62,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to4}, %k4
+
+// CHECK: vfpclassps $171, %xmm20, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x66,0xe4,0xab]
+          vfpclassps $0xab, %xmm20, %k4
+
+// CHECK: vfpclassps $171, %xmm20, %k4 {%k5}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x0d,0x66,0xe4,0xab]
+          vfpclassps $0xab, %xmm20, %k4 {%k5}
+
+// CHECK: vfpclassps $123,  %xmm20, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x66,0xe4,0x7b]
+          vfpclassps $0x7b, %xmm20, %k4
+
+// CHECK: vfpclasspsx $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x21,0x7b]
+          vfpclasspsx $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsx $123, 291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspsx $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x21,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to4}, %k4
+
+// CHECK: vfpclasspsx $123, 2032(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x62,0x7f,0x7b]
+          vfpclasspsx $0x7b,2032(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, 2048(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vfpclasspsx $0x7b,2048(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, -2048(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x62,0x80,0x7b]
+          vfpclasspsx $0x7b,-2048(%rdx), %k4
+
+// CHECK: vfpclasspsx $123, -2064(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vfpclasspsx $0x7b,-2064(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x62,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x62,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to4}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to4}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to4}, %k4
+
+// CHECK: vfpclassps $171, %ymm17, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0xe1,0xab]
+          vfpclassps $0xab, %ymm17, %k4
+
+// CHECK: vfpclassps $171, %ymm17, %k4 {%k5}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x2d,0x66,0xe1,0xab]
+          vfpclassps $0xab, %ymm17, %k4 {%k5}
+
+// CHECK: vfpclassps $123,  %ymm17, %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0xe1,0x7b]
+          vfpclassps $0x7b, %ymm17, %k4
+
+// CHECK: vfpclasspsy $123, (%rcx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x21,0x7b]
+          vfpclasspsy $0x7b,(%rcx), %k4
+
+// CHECK: vfpclasspsy $123, 291(%rax,%r14,8), %k4
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vfpclasspsy $0x7b,291(%rax,%r14,8), %k4
+
+// CHECK: vfpclasspsl $123, (%rcx){1to8}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x21,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to8}, %k4
+
+// CHECK: vfpclasspsy $123, 4064(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x62,0x7f,0x7b]
+          vfpclasspsy $0x7b,4064(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, 4096(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0xa2,0x00,0x10,0x00,0x00,0x7b]
+          vfpclasspsy $0x7b,4096(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, -4096(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x62,0x80,0x7b]
+          vfpclasspsy $0x7b,-4096(%rdx), %k4
+
+// CHECK: vfpclasspsy $123, -4128(%rdx), %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+          vfpclasspsy $0x7b,-4128(%rdx), %k4
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to8}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x62,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to8}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to8}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x62,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k4
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to8}, %k4
+
+// CHECK: vfpclasspd $171, %xmm26, %k3
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x66,0xda,0xab]
+          vfpclasspd $0xab, %xmm26, %k3
+
+// CHECK: vfpclasspd $171, %xmm26, %k3 {%k5}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x0d,0x66,0xda,0xab]
+          vfpclasspd $0xab, %xmm26, %k3 {%k5}
+
+// CHECK: vfpclasspd $123,  %xmm26, %k3
+// CHECK:  encoding: [0x62,0x93,0xfd,0x08,0x66,0xda,0x7b]
+          vfpclasspd $0x7b, %xmm26, %k3
+
+// CHECK: vfpclasspdx $123, (%rcx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x19,0x7b]
+          vfpclasspdx $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspdx $123, 4660(%rax,%r14,8), %k3
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x08,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspdx $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspdq $123, (%rcx){1to2}, %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x19,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to2}, %k3
+
+// CHECK: vfpclasspdx $123, 2032(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x5a,0x7f,0x7b]
+          vfpclasspdx $0x7b,2032(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, 2048(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vfpclasspdx $0x7b,2048(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, -2048(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x5a,0x80,0x7b]
+          vfpclasspdx $0x7b,-2048(%rdx), %k3
+
+// CHECK: vfpclasspdx $123, -2064(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x08,0x66,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vfpclasspdx $0x7b,-2064(%rdx), %k3
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to2}, %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x5a,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to2}, %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to2}, %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x5a,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to2}, %k3
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x18,0x66,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to2}, %k3
+
+// CHECK: vfpclasspd $171, %ymm26, %k2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x28,0x66,0xd2,0xab]
+          vfpclasspd $0xab, %ymm26, %k2
+
+// CHECK: vfpclasspd $171, %ymm26, %k2 {%k6}
+// CHECK:  encoding: [0x62,0x93,0xfd,0x2e,0x66,0xd2,0xab]
+          vfpclasspd $0xab, %ymm26, %k2 {%k6}
+
+// CHECK: vfpclasspd $123,  %ymm26, %k2
+// CHECK:  encoding: [0x62,0x93,0xfd,0x28,0x66,0xd2,0x7b]
+          vfpclasspd $0x7b, %ymm26, %k2
+
+// CHECK: vfpclasspdy $123, (%rcx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x11,0x7b]
+          vfpclasspdy $0x7b,(%rcx), %k2
+
+// CHECK: vfpclasspdy $123, 4660(%rax,%r14,8), %k2
+// CHECK:  encoding: [0x62,0xb3,0xfd,0x28,0x66,0x94,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspdy $0x7b,4660(%rax,%r14,8), %k2
+
+// CHECK: vfpclasspdq $123, (%rcx){1to4}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x11,0x7b]
+          vfpclasspdq $0x7b,(%rcx){1to4}, %k2
+
+// CHECK: vfpclasspdy $123, 4064(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x52,0x7f,0x7b]
+          vfpclasspdy $0x7b,4064(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, 4096(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vfpclasspdy $0x7b,4096(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, -4096(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x52,0x80,0x7b]
+          vfpclasspdy $0x7b,-4096(%rdx), %k2
+
+// CHECK: vfpclasspdy $123, -4128(%rdx), %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x28,0x66,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vfpclasspdy $0x7b,-4128(%rdx), %k2
+
+// CHECK: vfpclasspdq $123, 1016(%rdx){1to4}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x52,0x7f,0x7b]
+          vfpclasspdq $0x7b,1016(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, 1024(%rdx){1to4}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vfpclasspdq $0x7b,1024(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, -1024(%rdx){1to4}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x52,0x80,0x7b]
+          vfpclasspdq $0x7b,-1024(%rdx){1to4}, %k2
+
+// CHECK: vfpclasspdq $123, -1032(%rdx){1to4}, %k2
+// CHECK:  encoding: [0x62,0xf3,0xfd,0x38,0x66,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vfpclasspdq $0x7b,-1032(%rdx){1to4}, %k2
+
+// CHECK: vfpclassps $171, %xmm29, %k3
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x66,0xdd,0xab]
+          vfpclassps $0xab, %xmm29, %k3
+
+// CHECK: vfpclassps $171, %xmm29, %k3 {%k6}
+// CHECK:  encoding: [0x62,0x93,0x7d,0x0e,0x66,0xdd,0xab]
+          vfpclassps $0xab, %xmm29, %k3 {%k6}
+
+// CHECK: vfpclassps $123,  %xmm29, %k3
+// CHECK:  encoding: [0x62,0x93,0x7d,0x08,0x66,0xdd,0x7b]
+          vfpclassps $0x7b, %xmm29, %k3
+
+// CHECK: vfpclasspsx $123, (%rcx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x19,0x7b]
+          vfpclasspsx $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspsx $123, 4660(%rax,%r14,8), %k3
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x08,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspsx $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspsl $123, (%rcx){1to4}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x19,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to4}, %k3
+
+// CHECK: vfpclasspsx $123, 2032(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x5a,0x7f,0x7b]
+          vfpclasspsx $0x7b,2032(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, 2048(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vfpclasspsx $0x7b,2048(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, -2048(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x5a,0x80,0x7b]
+          vfpclasspsx $0x7b,-2048(%rdx), %k3
+
+// CHECK: vfpclasspsx $123, -2064(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x08,0x66,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vfpclasspsx $0x7b,-2064(%rdx), %k3
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to4}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x5a,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to4}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to4}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x5a,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to4}, %k3
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to4}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x18,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to4}, %k3
+
+// CHECK: vfpclassps $171, %ymm19, %k3
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0xdb,0xab]
+          vfpclassps $0xab, %ymm19, %k3
+
+// CHECK: vfpclassps $171, %ymm19, %k3 {%k3}
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x2b,0x66,0xdb,0xab]
+          vfpclassps $0xab, %ymm19, %k3 {%k3}
+
+// CHECK: vfpclassps $123,  %ymm19, %k3
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0xdb,0x7b]
+          vfpclassps $0x7b, %ymm19, %k3
+
+// CHECK: vfpclasspsy $123, (%rcx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x19,0x7b]
+          vfpclasspsy $0x7b,(%rcx), %k3
+
+// CHECK: vfpclasspsy $123, 4660(%rax,%r14,8), %k3
+// CHECK:  encoding: [0x62,0xb3,0x7d,0x28,0x66,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+          vfpclasspsy $0x7b,4660(%rax,%r14,8), %k3
+
+// CHECK: vfpclasspsl $123, (%rcx){1to8}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x19,0x7b]
+          vfpclasspsl $0x7b,(%rcx){1to8}, %k3
+
+// CHECK: vfpclasspsy $123, 4064(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x5a,0x7f,0x7b]
+          vfpclasspsy $0x7b,4064(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, 4096(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vfpclasspsy $0x7b,4096(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, -4096(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x5a,0x80,0x7b]
+          vfpclasspsy $0x7b,-4096(%rdx), %k3
+
+// CHECK: vfpclasspsy $123, -4128(%rdx), %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x28,0x66,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vfpclasspsy $0x7b,-4128(%rdx), %k3
+
+// CHECK: vfpclasspsl $123, 508(%rdx){1to8}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x5a,0x7f,0x7b]
+          vfpclasspsl $0x7b,508(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, 512(%rdx){1to8}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vfpclasspsl $0x7b,512(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, -512(%rdx){1to8}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x5a,0x80,0x7b]
+          vfpclasspsl $0x7b,-512(%rdx){1to8}, %k3
+
+// CHECK: vfpclasspsl $123, -516(%rdx){1to8}, %k3
+// CHECK:  encoding: [0x62,0xf3,0x7d,0x38,0x66,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vfpclasspsl $0x7b,-516(%rdx){1to8}, %k3
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0a,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30 {%k2}
+
+// CHECK: vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8a,0x59,0xf6]
+          vbroadcasti32x2 %xmm30, %xmm30 {%k2} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2f,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26 {%k7}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xaf,0x59,0xd4]
+          vbroadcasti32x2 %xmm28, %ymm26 {%k7} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x11]
+          vbroadcasti32x2 (%rcx), %ymm26
+
+// CHECK: vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x59,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcasti32x2 291(%rax,%r14,8), %ymm26
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x7f]
+          vbroadcasti32x2 1016(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x52,0x80]
+          vbroadcasti32x2 -1024(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x59,0x92,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %ymm26
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0e,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30 {%k6}
+
+// CHECK: vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8e,0x59,0xf4]
+          vbroadcasti32x2 %xmm28, %xmm30 {%k6} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x31]
+          vbroadcasti32x2 (%rcx), %xmm30
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x59,0xb4,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %xmm30
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x7f]
+          vbroadcasti32x2 1016(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0x72,0x80]
+          vbroadcasti32x2 -1024(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %xmm30
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x29,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23 {%k1}
+
+// CHECK: vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xa9,0x59,0xfc]
+          vbroadcasti32x2 %xmm28, %ymm23 {%k1} {z}
+
+// CHECK: vbroadcasti32x2 (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x39]
+          vbroadcasti32x2 (%rcx), %ymm23
+
+// CHECK: vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x59,0xbc,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcasti32x2 4660(%rax,%r14,8), %ymm23
+
+// CHECK: vbroadcasti32x2 1016(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x7f]
+          vbroadcasti32x2 1016(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0x00,0x04,0x00,0x00]
+          vbroadcasti32x2 1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0x7a,0x80]
+          vbroadcasti32x2 -1024(%rdx), %ymm23
+
+// CHECK: vbroadcasti32x2 -1032(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x59,0xba,0xf8,0xfb,0xff,0xff]
+          vbroadcasti32x2 -1032(%rdx), %ymm23
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x19,0xe2]
+          vbroadcastf32x2 %xmm18, %ymm28 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x21]
+          vbroadcastf32x2 (%rcx), %ymm28
+
+// CHECK: vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vbroadcastf32x2 291(%rax,%r14,8), %ymm28
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x7f]
+          vbroadcastf32x2 1016(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0x62,0x80]
+          vbroadcastf32x2 -1024(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x19,0xa2,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %ymm28
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2f,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19 {%k7}
+
+// CHECK: vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaf,0x19,0xda]
+          vbroadcastf32x2 %xmm26, %ymm19 {%k7} {z}
+
+// CHECK: vbroadcastf32x2 (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x19]
+          vbroadcastf32x2 (%rcx), %ymm19
+
+// CHECK: vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x19,0x9c,0xf0,0x34,0x12,0x00,0x00]
+          vbroadcastf32x2 4660(%rax,%r14,8), %ymm19
+
+// CHECK: vbroadcastf32x2 1016(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x7f]
+          vbroadcastf32x2 1016(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0x00,0x04,0x00,0x00]
+          vbroadcastf32x2 1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x5a,0x80]
+          vbroadcastf32x2 -1024(%rdx), %ymm19
+
+// CHECK: vbroadcastf32x2 -1032(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0xf8,0xfb,0xff,0xff]
+          vbroadcastf32x2 -1032(%rdx), %ymm19
+
diff --git a/test/MC/X86/x86-64-avx512f_vl.s b/test/MC/X86/x86-64-avx512f_vl.s
index c746e6627f7a..8031c097a1e5 100644
--- a/test/MC/X86/x86-64-avx512f_vl.s
+++ b/test/MC/X86/x86-64-avx512f_vl.s
@@ -16285,6 +16285,1446 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK:  encoding: [0x62,0x62,0x4d,0x30,0x2c,0x8a,0xfc,0xfd,0xff,0xff]
           vscalefps -516(%rdx){1to8}, %ymm22, %ymm25
 
+// CHECK: vpmovqb %xmm29, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x7e,0x08,0x32,0xe8]
+          vpmovqb %xmm29, %xmm24
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x0c,0x32,0xe8]
+          vpmovqb %xmm29, %xmm24 {%k4}
+
+// CHECK: vpmovqb %xmm29, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x8c,0x32,0xe8]
+          vpmovqb %xmm29, %xmm24 {%k4} {z}
+
+// CHECK: vpmovqb %ymm29, %xmm17
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x32,0xe9]
+          vpmovqb %ymm29, %xmm17
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x2b,0x32,0xe9]
+          vpmovqb %ymm29, %xmm17 {%k3}
+
+// CHECK: vpmovqb %ymm29, %xmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0xab,0x32,0xe9]
+          vpmovqb %ymm29, %xmm17 {%k3} {z}
+
+// CHECK: vpmovqb %xmm27, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x32,0x19]
+          vpmovqb %xmm27, (%rcx)
+
+// CHECK: vpmovqb %xmm27, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0a,0x32,0x19]
+          vpmovqb %xmm27, (%rcx) {%k2}
+
+// CHECK: vpmovqb %xmm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqb %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %xmm27, 254(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x7f]
+          vpmovqb %xmm27, 254(%rdx)
+
+// CHECK: vpmovqb %xmm27, 256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
+          vpmovqb %xmm27, 256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x32,0x5a,0x80]
+          vpmovqb %xmm27, -256(%rdx)
+
+// CHECK: vpmovqb %xmm27, -258(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
+          vpmovqb %xmm27, -258(%rdx)
+
+// CHECK: vpmovqb %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x32,0x21]
+          vpmovqb %ymm28, (%rcx)
+
+// CHECK: vpmovqb %ymm28, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2f,0x32,0x21]
+          vpmovqb %ymm28, (%rcx) {%k7}
+
+// CHECK: vpmovqb %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x32,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqb %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqb %ymm28, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x7f]
+          vpmovqb %ymm28, 508(%rdx)
+
+// CHECK: vpmovqb %ymm28, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0x00,0x02,0x00,0x00]
+          vpmovqb %ymm28, 512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x32,0x62,0x80]
+          vpmovqb %ymm28, -512(%rdx)
+
+// CHECK: vpmovqb %ymm28, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x32,0xa2,0xfc,0xfd,0xff,0xff]
+          vpmovqb %ymm28, -516(%rdx)
+
+// CHECK: vpmovsqb %xmm19, %xmm26
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x22,0xda]
+          vpmovsqb %xmm19, %xmm26
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x09,0x22,0xda]
+          vpmovsqb %xmm19, %xmm26 {%k1}
+
+// CHECK: vpmovsqb %xmm19, %xmm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x89,0x22,0xda]
+          vpmovsqb %xmm19, %xmm26 {%k1} {z}
+
+// CHECK: vpmovsqb %ymm20, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x22,0xe4]
+          vpmovsqb %ymm20, %xmm20
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2e,0x22,0xe4]
+          vpmovsqb %ymm20, %xmm20 {%k6}
+
+// CHECK: vpmovsqb %ymm20, %xmm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xae,0x22,0xe4]
+          vpmovsqb %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vpmovsqb %xmm25, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x22,0x09]
+          vpmovsqb %xmm25, (%rcx)
+
+// CHECK: vpmovsqb %xmm25, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0f,0x22,0x09]
+          vpmovsqb %xmm25, (%rcx) {%k7}
+
+// CHECK: vpmovsqb %xmm25, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqb %xmm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %xmm25, 254(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x7f]
+          vpmovsqb %xmm25, 254(%rdx)
+
+// CHECK: vpmovsqb %xmm25, 256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
+          vpmovsqb %xmm25, 256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x22,0x4a,0x80]
+          vpmovsqb %xmm25, -256(%rdx)
+
+// CHECK: vpmovsqb %xmm25, -258(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
+          vpmovsqb %xmm25, -258(%rdx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x22,0x09]
+          vpmovsqb %ymm17, (%rcx)
+
+// CHECK: vpmovsqb %ymm17, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2c,0x22,0x09]
+          vpmovsqb %ymm17, (%rcx) {%k4}
+
+// CHECK: vpmovsqb %ymm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqb %ymm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqb %ymm17, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x7f]
+          vpmovsqb %ymm17, 508(%rdx)
+
+// CHECK: vpmovsqb %ymm17, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0x00,0x02,0x00,0x00]
+          vpmovsqb %ymm17, 512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x22,0x4a,0x80]
+          vpmovsqb %ymm17, -512(%rdx)
+
+// CHECK: vpmovsqb %ymm17, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x22,0x8a,0xfc,0xfd,0xff,0xff]
+          vpmovsqb %ymm17, -516(%rdx)
+
+// CHECK: vpmovusqb %xmm22, %xmm28
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x12,0xf4]
+          vpmovusqb %xmm22, %xmm28
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0a,0x12,0xf4]
+          vpmovusqb %xmm22, %xmm28 {%k2}
+
+// CHECK: vpmovusqb %xmm22, %xmm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8a,0x12,0xf4]
+          vpmovusqb %xmm22, %xmm28 {%k2} {z}
+
+// CHECK: vpmovusqb %ymm23, %xmm22
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x12,0xfe]
+          vpmovusqb %ymm23, %xmm22
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2f,0x12,0xfe]
+          vpmovusqb %ymm23, %xmm22 {%k7}
+
+// CHECK: vpmovusqb %ymm23, %xmm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xaf,0x12,0xfe]
+          vpmovusqb %ymm23, %xmm22 {%k7} {z}
+
+// CHECK: vpmovusqb %xmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x12,0x11]
+          vpmovusqb %xmm26, (%rcx)
+
+// CHECK: vpmovusqb %xmm26, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0d,0x12,0x11]
+          vpmovusqb %xmm26, (%rcx) {%k5}
+
+// CHECK: vpmovusqb %xmm26, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %xmm26, 254(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x7f]
+          vpmovusqb %xmm26, 254(%rdx)
+
+// CHECK: vpmovusqb %xmm26, 256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0x00,0x01,0x00,0x00]
+          vpmovusqb %xmm26, 256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -256(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x12,0x52,0x80]
+          vpmovusqb %xmm26, -256(%rdx)
+
+// CHECK: vpmovusqb %xmm26, -258(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x12,0x92,0xfe,0xfe,0xff,0xff]
+          vpmovusqb %xmm26, -258(%rdx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x12,0x31]
+          vpmovusqb %ymm30, (%rcx)
+
+// CHECK: vpmovusqb %ymm30, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2a,0x12,0x31]
+          vpmovusqb %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovusqb %ymm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x12,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqb %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqb %ymm30, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x7f]
+          vpmovusqb %ymm30, 508(%rdx)
+
+// CHECK: vpmovusqb %ymm30, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0x00,0x02,0x00,0x00]
+          vpmovusqb %ymm30, 512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x12,0x72,0x80]
+          vpmovusqb %ymm30, -512(%rdx)
+
+// CHECK: vpmovusqb %ymm30, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x12,0xb2,0xfc,0xfd,0xff,0xff]
+          vpmovusqb %ymm30, -516(%rdx)
+
+// CHECK: vpmovqw %xmm18, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x34,0xd3]
+          vpmovqw %xmm18, %xmm19
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x0c,0x34,0xd3]
+          vpmovqw %xmm18, %xmm19 {%k4}
+
+// CHECK: vpmovqw %xmm18, %xmm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x8c,0x34,0xd3]
+          vpmovqw %xmm18, %xmm19 {%k4} {z}
+
+// CHECK: vpmovqw %ymm22, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x34,0xf3]
+          vpmovqw %ymm22, %xmm19
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2d,0x34,0xf3]
+          vpmovqw %ymm22, %xmm19 {%k5}
+
+// CHECK: vpmovqw %ymm22, %xmm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xad,0x34,0xf3]
+          vpmovqw %ymm22, %xmm19 {%k5} {z}
+
+// CHECK: vpmovqw %xmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x34,0x29]
+          vpmovqw %xmm21, (%rcx)
+
+// CHECK: vpmovqw %xmm21, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0b,0x34,0x29]
+          vpmovqw %xmm21, (%rcx) {%k3}
+
+// CHECK: vpmovqw %xmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %xmm21, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x7f]
+          vpmovqw %xmm21, 508(%rdx)
+
+// CHECK: vpmovqw %xmm21, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
+          vpmovqw %xmm21, 512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x34,0x6a,0x80]
+          vpmovqw %xmm21, -512(%rdx)
+
+// CHECK: vpmovqw %xmm21, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
+          vpmovqw %xmm21, -516(%rdx)
+
+// CHECK: vpmovqw %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x34,0x21]
+          vpmovqw %ymm28, (%rcx)
+
+// CHECK: vpmovqw %ymm28, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2e,0x34,0x21]
+          vpmovqw %ymm28, (%rcx) {%k6}
+
+// CHECK: vpmovqw %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x34,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqw %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vpmovqw %ymm28, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x7f]
+          vpmovqw %ymm28, 1016(%rdx)
+
+// CHECK: vpmovqw %ymm28, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0x00,0x04,0x00,0x00]
+          vpmovqw %ymm28, 1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x34,0x62,0x80]
+          vpmovqw %ymm28, -1024(%rdx)
+
+// CHECK: vpmovqw %ymm28, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x34,0xa2,0xf8,0xfb,0xff,0xff]
+          vpmovqw %ymm28, -1032(%rdx)
+
+// CHECK: vpmovsqw %xmm18, %xmm26
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x24,0xd2]
+          vpmovsqw %xmm18, %xmm26
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0f,0x24,0xd2]
+          vpmovsqw %xmm18, %xmm26 {%k7}
+
+// CHECK: vpmovsqw %xmm18, %xmm26 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8f,0x24,0xd2]
+          vpmovsqw %xmm18, %xmm26 {%k7} {z}
+
+// CHECK: vpmovsqw %ymm20, %xmm28
+// CHECK:  encoding: [0x62,0x82,0x7e,0x28,0x24,0xe4]
+          vpmovsqw %ymm20, %xmm28
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x2c,0x24,0xe4]
+          vpmovsqw %ymm20, %xmm28 {%k4}
+
+// CHECK: vpmovsqw %ymm20, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xac,0x24,0xe4]
+          vpmovsqw %ymm20, %xmm28 {%k4} {z}
+
+// CHECK: vpmovsqw %xmm30, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x24,0x31]
+          vpmovsqw %xmm30, (%rcx)
+
+// CHECK: vpmovsqw %xmm30, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0c,0x24,0x31]
+          vpmovsqw %xmm30, (%rcx) {%k4}
+
+// CHECK: vpmovsqw %xmm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x24,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqw %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %xmm30, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x7f]
+          vpmovsqw %xmm30, 508(%rdx)
+
+// CHECK: vpmovsqw %xmm30, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0x00,0x02,0x00,0x00]
+          vpmovsqw %xmm30, 512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x24,0x72,0x80]
+          vpmovsqw %xmm30, -512(%rdx)
+
+// CHECK: vpmovsqw %xmm30, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x24,0xb2,0xfc,0xfd,0xff,0xff]
+          vpmovsqw %xmm30, -516(%rdx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x24,0x29]
+          vpmovsqw %ymm21, (%rcx)
+
+// CHECK: vpmovsqw %ymm21, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2d,0x24,0x29]
+          vpmovsqw %ymm21, (%rcx) {%k5}
+
+// CHECK: vpmovsqw %ymm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqw %ymm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqw %ymm21, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x7f]
+          vpmovsqw %ymm21, 1016(%rdx)
+
+// CHECK: vpmovsqw %ymm21, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0x00,0x04,0x00,0x00]
+          vpmovsqw %ymm21, 1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x24,0x6a,0x80]
+          vpmovsqw %ymm21, -1024(%rdx)
+
+// CHECK: vpmovsqw %ymm21, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x24,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovsqw %ymm21, -1032(%rdx)
+
+// CHECK: vpmovusqw %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x14,0xe5]
+          vpmovusqw %xmm20, %xmm29
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x09,0x14,0xe5]
+          vpmovusqw %xmm20, %xmm29 {%k1}
+
+// CHECK: vpmovusqw %xmm20, %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x89,0x14,0xe5]
+          vpmovusqw %xmm20, %xmm29 {%k1} {z}
+
+// CHECK: vpmovusqw %ymm21, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x14,0xec]
+          vpmovusqw %ymm21, %xmm20
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2d,0x14,0xec]
+          vpmovusqw %ymm21, %xmm20 {%k5}
+
+// CHECK: vpmovusqw %ymm21, %xmm20 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xad,0x14,0xec]
+          vpmovusqw %ymm21, %xmm20 {%k5} {z}
+
+// CHECK: vpmovusqw %xmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x14,0x11]
+          vpmovusqw %xmm18, (%rcx)
+
+// CHECK: vpmovusqw %xmm18, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x09,0x14,0x11]
+          vpmovusqw %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqw %xmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqw %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %xmm18, 508(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x7f]
+          vpmovusqw %xmm18, 508(%rdx)
+
+// CHECK: vpmovusqw %xmm18, 512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0x00,0x02,0x00,0x00]
+          vpmovusqw %xmm18, 512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -512(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x14,0x52,0x80]
+          vpmovusqw %xmm18, -512(%rdx)
+
+// CHECK: vpmovusqw %xmm18, -516(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x14,0x92,0xfc,0xfd,0xff,0xff]
+          vpmovusqw %xmm18, -516(%rdx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x14,0x11]
+          vpmovusqw %ymm18, (%rcx)
+
+// CHECK: vpmovusqw %ymm18, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2a,0x14,0x11]
+          vpmovusqw %ymm18, (%rcx) {%k2}
+
+// CHECK: vpmovusqw %ymm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqw %ymm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqw %ymm18, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x7f]
+          vpmovusqw %ymm18, 1016(%rdx)
+
+// CHECK: vpmovusqw %ymm18, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0x00,0x04,0x00,0x00]
+          vpmovusqw %ymm18, 1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x14,0x52,0x80]
+          vpmovusqw %ymm18, -1024(%rdx)
+
+// CHECK: vpmovusqw %ymm18, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x14,0x92,0xf8,0xfb,0xff,0xff]
+          vpmovusqw %ymm18, -1032(%rdx)
+
+// CHECK: vpmovqd %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x35,0xcd]
+          vpmovqd %xmm25, %xmm21
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x0d,0x35,0xcd]
+          vpmovqd %xmm25, %xmm21 {%k5}
+
+// CHECK: vpmovqd %xmm25, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x8d,0x35,0xcd]
+          vpmovqd %xmm25, %xmm21 {%k5} {z}
+
+// CHECK: vpmovqd %ymm22, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x35,0xf5]
+          vpmovqd %ymm22, %xmm21
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2e,0x35,0xf5]
+          vpmovqd %ymm22, %xmm21 {%k6}
+
+// CHECK: vpmovqd %ymm22, %xmm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xae,0x35,0xf5]
+          vpmovqd %ymm22, %xmm21 {%k6} {z}
+
+// CHECK: vpmovqd %xmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x35,0x29]
+          vpmovqd %xmm29, (%rcx)
+
+// CHECK: vpmovqd %xmm29, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0e,0x35,0x29]
+          vpmovqd %xmm29, (%rcx) {%k6}
+
+// CHECK: vpmovqd %xmm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x35,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqd %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %xmm29, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x7f]
+          vpmovqd %xmm29, 1016(%rdx)
+
+// CHECK: vpmovqd %xmm29, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0x00,0x04,0x00,0x00]
+          vpmovqd %xmm29, 1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x35,0x6a,0x80]
+          vpmovqd %xmm29, -1024(%rdx)
+
+// CHECK: vpmovqd %xmm29, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x35,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovqd %xmm29, -1032(%rdx)
+
+// CHECK: vpmovqd %ymm30, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x35,0x31]
+          vpmovqd %ymm30, (%rcx)
+
+// CHECK: vpmovqd %ymm30, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2a,0x35,0x31]
+          vpmovqd %ymm30, (%rcx) {%k2}
+
+// CHECK: vpmovqd %ymm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x35,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovqd %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovqd %ymm30, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x7f]
+          vpmovqd %ymm30, 2032(%rdx)
+
+// CHECK: vpmovqd %ymm30, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0x00,0x08,0x00,0x00]
+          vpmovqd %ymm30, 2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x35,0x72,0x80]
+          vpmovqd %ymm30, -2048(%rdx)
+
+// CHECK: vpmovqd %ymm30, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x35,0xb2,0xf0,0xf7,0xff,0xff]
+          vpmovqd %ymm30, -2064(%rdx)
+
+// CHECK: vpmovsqd %xmm21, %xmm21
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x25,0xed]
+          vpmovsqd %xmm21, %xmm21
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x0a,0x25,0xed]
+          vpmovsqd %xmm21, %xmm21 {%k2}
+
+// CHECK: vpmovsqd %xmm21, %xmm21 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x8a,0x25,0xed]
+          vpmovsqd %xmm21, %xmm21 {%k2} {z}
+
+// CHECK: vpmovsqd %ymm29, %xmm29
+// CHECK:  encoding: [0x62,0x02,0x7e,0x28,0x25,0xed]
+          vpmovsqd %ymm29, %xmm29
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x2c,0x25,0xed]
+          vpmovsqd %ymm29, %xmm29 {%k4}
+
+// CHECK: vpmovsqd %ymm29, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xac,0x25,0xed]
+          vpmovsqd %ymm29, %xmm29 {%k4} {z}
+
+// CHECK: vpmovsqd %xmm17, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x25,0x09]
+          vpmovsqd %xmm17, (%rcx)
+
+// CHECK: vpmovsqd %xmm17, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0a,0x25,0x09]
+          vpmovsqd %xmm17, (%rcx) {%k2}
+
+// CHECK: vpmovsqd %xmm17, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqd %xmm17, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %xmm17, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x7f]
+          vpmovsqd %xmm17, 1016(%rdx)
+
+// CHECK: vpmovsqd %xmm17, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0x00,0x04,0x00,0x00]
+          vpmovsqd %xmm17, 1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x25,0x4a,0x80]
+          vpmovsqd %xmm17, -1024(%rdx)
+
+// CHECK: vpmovsqd %xmm17, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x25,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovsqd %xmm17, -1032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x25,0x39]
+          vpmovsqd %ymm23, (%rcx)
+
+// CHECK: vpmovsqd %ymm23, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2d,0x25,0x39]
+          vpmovsqd %ymm23, (%rcx) {%k5}
+
+// CHECK: vpmovsqd %ymm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsqd %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovsqd %ymm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x7f]
+          vpmovsqd %ymm23, 2032(%rdx)
+
+// CHECK: vpmovsqd %ymm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0x00,0x08,0x00,0x00]
+          vpmovsqd %ymm23, 2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x25,0x7a,0x80]
+          vpmovsqd %ymm23, -2048(%rdx)
+
+// CHECK: vpmovsqd %ymm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x25,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovsqd %ymm23, -2064(%rdx)
+
+// CHECK: vpmovusqd %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x15,0xe9]
+          vpmovusqd %xmm21, %xmm25
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0d,0x15,0xe9]
+          vpmovusqd %xmm21, %xmm25 {%k5}
+
+// CHECK: vpmovusqd %xmm21, %xmm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8d,0x15,0xe9]
+          vpmovusqd %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vpmovusqd %ymm21, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x15,0xec]
+          vpmovusqd %ymm21, %xmm20
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2a,0x15,0xec]
+          vpmovusqd %ymm21, %xmm20 {%k2}
+
+// CHECK: vpmovusqd %ymm21, %xmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xaa,0x15,0xec]
+          vpmovusqd %ymm21, %xmm20 {%k2} {z}
+
+// CHECK: vpmovusqd %xmm18, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x15,0x11]
+          vpmovusqd %xmm18, (%rcx)
+
+// CHECK: vpmovusqd %xmm18, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x09,0x15,0x11]
+          vpmovusqd %xmm18, (%rcx) {%k1}
+
+// CHECK: vpmovusqd %xmm18, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x15,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqd %xmm18, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %xmm18, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x7f]
+          vpmovusqd %xmm18, 1016(%rdx)
+
+// CHECK: vpmovusqd %xmm18, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0x00,0x04,0x00,0x00]
+          vpmovusqd %xmm18, 1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x15,0x52,0x80]
+          vpmovusqd %xmm18, -1024(%rdx)
+
+// CHECK: vpmovusqd %xmm18, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x15,0x92,0xf8,0xfb,0xff,0xff]
+          vpmovusqd %xmm18, -1032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x15,0x29]
+          vpmovusqd %ymm29, (%rcx)
+
+// CHECK: vpmovusqd %ymm29, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2e,0x15,0x29]
+          vpmovusqd %ymm29, (%rcx) {%k6}
+
+// CHECK: vpmovusqd %ymm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusqd %ymm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovusqd %ymm29, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x7f]
+          vpmovusqd %ymm29, 2032(%rdx)
+
+// CHECK: vpmovusqd %ymm29, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0x00,0x08,0x00,0x00]
+          vpmovusqd %ymm29, 2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x15,0x6a,0x80]
+          vpmovusqd %ymm29, -2048(%rdx)
+
+// CHECK: vpmovusqd %ymm29, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x15,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmovusqd %ymm29, -2064(%rdx)
+
+// CHECK: vpmovdb %xmm21, %xmm30
+// CHECK:  encoding: [0x62,0x82,0x7e,0x08,0x31,0xee]
+          vpmovdb %xmm21, %xmm30
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x0b,0x31,0xee]
+          vpmovdb %xmm21, %xmm30 {%k3}
+
+// CHECK: vpmovdb %xmm21, %xmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x8b,0x31,0xee]
+          vpmovdb %xmm21, %xmm30 {%k3} {z}
+
+// CHECK: vpmovdb %ymm21, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x31,0xef]
+          vpmovdb %ymm21, %xmm23
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2c,0x31,0xef]
+          vpmovdb %ymm21, %xmm23 {%k4}
+
+// CHECK: vpmovdb %ymm21, %xmm23 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xac,0x31,0xef]
+          vpmovdb %ymm21, %xmm23 {%k4} {z}
+
+// CHECK: vpmovdb %xmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x31,0x29]
+          vpmovdb %xmm29, (%rcx)
+
+// CHECK: vpmovdb %xmm29, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0b,0x31,0x29]
+          vpmovdb %xmm29, (%rcx) {%k3}
+
+// CHECK: vpmovdb %xmm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x31,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdb %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %xmm29, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x7f]
+          vpmovdb %xmm29, 508(%rdx)
+
+// CHECK: vpmovdb %xmm29, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0x00,0x02,0x00,0x00]
+          vpmovdb %xmm29, 512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x31,0x6a,0x80]
+          vpmovdb %xmm29, -512(%rdx)
+
+// CHECK: vpmovdb %xmm29, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x31,0xaa,0xfc,0xfd,0xff,0xff]
+          vpmovdb %xmm29, -516(%rdx)
+
+// CHECK: vpmovdb %ymm26, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x31,0x11]
+          vpmovdb %ymm26, (%rcx)
+
+// CHECK: vpmovdb %ymm26, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2e,0x31,0x11]
+          vpmovdb %ymm26, (%rcx) {%k6}
+
+// CHECK: vpmovdb %ymm26, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x31,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdb %ymm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovdb %ymm26, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x7f]
+          vpmovdb %ymm26, 1016(%rdx)
+
+// CHECK: vpmovdb %ymm26, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0x00,0x04,0x00,0x00]
+          vpmovdb %ymm26, 1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x31,0x52,0x80]
+          vpmovdb %ymm26, -1024(%rdx)
+
+// CHECK: vpmovdb %ymm26, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x31,0x92,0xf8,0xfb,0xff,0xff]
+          vpmovdb %ymm26, -1032(%rdx)
+
+// CHECK: vpmovsdb %xmm27, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7e,0x08,0x21,0xde]
+          vpmovsdb %xmm27, %xmm30
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x09,0x21,0xde]
+          vpmovsdb %xmm27, %xmm30 {%k1}
+
+// CHECK: vpmovsdb %xmm27, %xmm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x89,0x21,0xde]
+          vpmovsdb %xmm27, %xmm30 {%k1} {z}
+
+// CHECK: vpmovsdb %ymm27, %xmm26
+// CHECK:  encoding: [0x62,0x02,0x7e,0x28,0x21,0xda]
+          vpmovsdb %ymm27, %xmm26
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x2b,0x21,0xda]
+          vpmovsdb %ymm27, %xmm26 {%k3}
+
+// CHECK: vpmovsdb %ymm27, %xmm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xab,0x21,0xda]
+          vpmovsdb %ymm27, %xmm26 {%k3} {z}
+
+// CHECK: vpmovsdb %xmm30, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x21,0x31]
+          vpmovsdb %xmm30, (%rcx)
+
+// CHECK: vpmovsdb %xmm30, (%rcx) {%k3}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0b,0x21,0x31]
+          vpmovsdb %xmm30, (%rcx) {%k3}
+
+// CHECK: vpmovsdb %xmm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x21,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdb %xmm30, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %xmm30, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x7f]
+          vpmovsdb %xmm30, 508(%rdx)
+
+// CHECK: vpmovsdb %xmm30, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0x00,0x02,0x00,0x00]
+          vpmovsdb %xmm30, 512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x21,0x72,0x80]
+          vpmovsdb %xmm30, -512(%rdx)
+
+// CHECK: vpmovsdb %xmm30, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x21,0xb2,0xfc,0xfd,0xff,0xff]
+          vpmovsdb %xmm30, -516(%rdx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x21,0x09]
+          vpmovsdb %ymm25, (%rcx)
+
+// CHECK: vpmovsdb %ymm25, (%rcx) {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2d,0x21,0x09]
+          vpmovsdb %ymm25, (%rcx) {%k5}
+
+// CHECK: vpmovsdb %ymm25, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x21,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdb %ymm25, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x7f]
+          vpmovsdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovsdb %ymm25, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0x00,0x04,0x00,0x00]
+          vpmovsdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x21,0x4a,0x80]
+          vpmovsdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovsdb %ymm25, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x21,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovsdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovusdb %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7e,0x08,0x11,0xee]
+          vpmovusdb %xmm29, %xmm30
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x0f,0x11,0xee]
+          vpmovusdb %xmm29, %xmm30 {%k7}
+
+// CHECK: vpmovusdb %xmm29, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x8f,0x11,0xee]
+          vpmovusdb %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vpmovusdb %ymm17, %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x11,0xcf]
+          vpmovusdb %ymm17, %xmm23
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2e,0x11,0xcf]
+          vpmovusdb %ymm17, %xmm23 {%k6}
+
+// CHECK: vpmovusdb %ymm17, %xmm23 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xae,0x11,0xcf]
+          vpmovusdb %ymm17, %xmm23 {%k6} {z}
+
+// CHECK: vpmovusdb %xmm26, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x11,0x11]
+          vpmovusdb %xmm26, (%rcx)
+
+// CHECK: vpmovusdb %xmm26, (%rcx) {%k7}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x0f,0x11,0x11]
+          vpmovusdb %xmm26, (%rcx) {%k7}
+
+// CHECK: vpmovusdb %xmm26, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x11,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdb %xmm26, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %xmm26, 508(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x7f]
+          vpmovusdb %xmm26, 508(%rdx)
+
+// CHECK: vpmovusdb %xmm26, 512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0x00,0x02,0x00,0x00]
+          vpmovusdb %xmm26, 512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -512(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x11,0x52,0x80]
+          vpmovusdb %xmm26, -512(%rdx)
+
+// CHECK: vpmovusdb %xmm26, -516(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x11,0x92,0xfc,0xfd,0xff,0xff]
+          vpmovusdb %xmm26, -516(%rdx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x11,0x09]
+          vpmovusdb %ymm25, (%rcx)
+
+// CHECK: vpmovusdb %ymm25, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x2e,0x11,0x09]
+          vpmovusdb %ymm25, (%rcx) {%k6}
+
+// CHECK: vpmovusdb %ymm25, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x28,0x11,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdb %ymm25, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdb %ymm25, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x7f]
+          vpmovusdb %ymm25, 1016(%rdx)
+
+// CHECK: vpmovusdb %ymm25, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0x00,0x04,0x00,0x00]
+          vpmovusdb %ymm25, 1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x11,0x4a,0x80]
+          vpmovusdb %ymm25, -1024(%rdx)
+
+// CHECK: vpmovusdb %ymm25, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x28,0x11,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovusdb %ymm25, -1032(%rdx)
+
+// CHECK: vpmovdw %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x33,0xc9]
+          vpmovdw %xmm25, %xmm17
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x0d,0x33,0xc9]
+          vpmovdw %xmm25, %xmm17 {%k5}
+
+// CHECK: vpmovdw %xmm25, %xmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0x22,0x7e,0x8d,0x33,0xc9]
+          vpmovdw %xmm25, %xmm17 {%k5} {z}
+
+// CHECK: vpmovdw %ymm19, %xmm25
+// CHECK:  encoding: [0x62,0x82,0x7e,0x28,0x33,0xd9]
+          vpmovdw %ymm19, %xmm25
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4}
+// CHECK:  encoding: [0x62,0x82,0x7e,0x2c,0x33,0xd9]
+          vpmovdw %ymm19, %xmm25 {%k4}
+
+// CHECK: vpmovdw %ymm19, %xmm25 {%k4} {z}
+// CHECK:  encoding: [0x62,0x82,0x7e,0xac,0x33,0xd9]
+          vpmovdw %ymm19, %xmm25 {%k4} {z}
+
+// CHECK: vpmovdw %xmm21, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x33,0x29]
+          vpmovdw %xmm21, (%rcx)
+
+// CHECK: vpmovdw %xmm21, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0a,0x33,0x29]
+          vpmovdw %xmm21, (%rcx) {%k2}
+
+// CHECK: vpmovdw %xmm21, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x33,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdw %xmm21, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %xmm21, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x7f]
+          vpmovdw %xmm21, 1016(%rdx)
+
+// CHECK: vpmovdw %xmm21, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0x00,0x04,0x00,0x00]
+          vpmovdw %xmm21, 1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x33,0x6a,0x80]
+          vpmovdw %xmm21, -1024(%rdx)
+
+// CHECK: vpmovdw %xmm21, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x33,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovdw %xmm21, -1032(%rdx)
+
+// CHECK: vpmovdw %ymm22, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x33,0x31]
+          vpmovdw %ymm22, (%rcx)
+
+// CHECK: vpmovdw %ymm22, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2e,0x33,0x31]
+          vpmovdw %ymm22, (%rcx) {%k6}
+
+// CHECK: vpmovdw %ymm22, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x33,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovdw %ymm22, 291(%rax,%r14,8)
+
+// CHECK: vpmovdw %ymm22, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x7f]
+          vpmovdw %ymm22, 2032(%rdx)
+
+// CHECK: vpmovdw %ymm22, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0x00,0x08,0x00,0x00]
+          vpmovdw %ymm22, 2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x33,0x72,0x80]
+          vpmovdw %ymm22, -2048(%rdx)
+
+// CHECK: vpmovdw %ymm22, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x33,0xb2,0xf0,0xf7,0xff,0xff]
+          vpmovdw %ymm22, -2064(%rdx)
+
+// CHECK: vpmovsdw %xmm18, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x23,0xd2]
+          vpmovsdw %xmm18, %xmm18
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x0e,0x23,0xd2]
+          vpmovsdw %xmm18, %xmm18 {%k6}
+
+// CHECK: vpmovsdw %xmm18, %xmm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x8e,0x23,0xd2]
+          vpmovsdw %xmm18, %xmm18 {%k6} {z}
+
+// CHECK: vpmovsdw %ymm18, %xmm20
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x23,0xd4]
+          vpmovsdw %ymm18, %xmm20
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x2a,0x23,0xd4]
+          vpmovsdw %ymm18, %xmm20 {%k2}
+
+// CHECK: vpmovsdw %ymm18, %xmm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0xaa,0x23,0xd4]
+          vpmovsdw %ymm18, %xmm20 {%k2} {z}
+
+// CHECK: vpmovsdw %xmm29, (%rcx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x23,0x29]
+          vpmovsdw %xmm29, (%rcx)
+
+// CHECK: vpmovsdw %xmm29, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7e,0x09,0x23,0x29]
+          vpmovsdw %xmm29, (%rcx) {%k1}
+
+// CHECK: vpmovsdw %xmm29, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x22,0x7e,0x08,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdw %xmm29, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %xmm29, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x7f]
+          vpmovsdw %xmm29, 1016(%rdx)
+
+// CHECK: vpmovsdw %xmm29, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0x00,0x04,0x00,0x00]
+          vpmovsdw %xmm29, 1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x23,0x6a,0x80]
+          vpmovsdw %xmm29, -1024(%rdx)
+
+// CHECK: vpmovsdw %xmm29, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x62,0x7e,0x08,0x23,0xaa,0xf8,0xfb,0xff,0xff]
+          vpmovsdw %xmm29, -1032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x23,0x19]
+          vpmovsdw %ymm19, (%rcx)
+
+// CHECK: vpmovsdw %ymm19, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x2e,0x23,0x19]
+          vpmovsdw %ymm19, (%rcx) {%k6}
+
+// CHECK: vpmovsdw %ymm19, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x23,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsdw %ymm19, 291(%rax,%r14,8)
+
+// CHECK: vpmovsdw %ymm19, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x7f]
+          vpmovsdw %ymm19, 2032(%rdx)
+
+// CHECK: vpmovsdw %ymm19, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0x00,0x08,0x00,0x00]
+          vpmovsdw %ymm19, 2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x23,0x5a,0x80]
+          vpmovsdw %ymm19, -2048(%rdx)
+
+// CHECK: vpmovsdw %ymm19, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x23,0x9a,0xf0,0xf7,0xff,0xff]
+          vpmovsdw %ymm19, -2064(%rdx)
+
+// CHECK: vpmovusdw %xmm18, %xmm18
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x13,0xd2]
+          vpmovusdw %xmm18, %xmm18
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x0a,0x13,0xd2]
+          vpmovusdw %xmm18, %xmm18 {%k2}
+
+// CHECK: vpmovusdw %xmm18, %xmm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x8a,0x13,0xd2]
+          vpmovusdw %xmm18, %xmm18 {%k2} {z}
+
+// CHECK: vpmovusdw %ymm25, %xmm28
+// CHECK:  encoding: [0x62,0x02,0x7e,0x28,0x13,0xcc]
+          vpmovusdw %ymm25, %xmm28
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7e,0x2c,0x13,0xcc]
+          vpmovusdw %ymm25, %xmm28 {%k4}
+
+// CHECK: vpmovusdw %ymm25, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7e,0xac,0x13,0xcc]
+          vpmovusdw %ymm25, %xmm28 {%k4} {z}
+
+// CHECK: vpmovusdw %xmm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x13,0x21]
+          vpmovusdw %xmm20, (%rcx)
+
+// CHECK: vpmovusdw %xmm20, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x0e,0x13,0x21]
+          vpmovusdw %xmm20, (%rcx) {%k6}
+
+// CHECK: vpmovusdw %xmm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x08,0x13,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdw %xmm20, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %xmm20, 1016(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x7f]
+          vpmovusdw %xmm20, 1016(%rdx)
+
+// CHECK: vpmovusdw %xmm20, 1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0x00,0x04,0x00,0x00]
+          vpmovusdw %xmm20, 1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1024(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x13,0x62,0x80]
+          vpmovusdw %xmm20, -1024(%rdx)
+
+// CHECK: vpmovusdw %xmm20, -1032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x08,0x13,0xa2,0xf8,0xfb,0xff,0xff]
+          vpmovusdw %xmm20, -1032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x13,0x39]
+          vpmovusdw %ymm23, (%rcx)
+
+// CHECK: vpmovusdw %ymm23, (%rcx) {%k1}
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x29,0x13,0x39]
+          vpmovusdw %ymm23, (%rcx) {%k1}
+
+// CHECK: vpmovusdw %ymm23, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa2,0x7e,0x28,0x13,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovusdw %ymm23, 291(%rax,%r14,8)
+
+// CHECK: vpmovusdw %ymm23, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x7f]
+          vpmovusdw %ymm23, 2032(%rdx)
+
+// CHECK: vpmovusdw %ymm23, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0x00,0x08,0x00,0x00]
+          vpmovusdw %ymm23, 2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x13,0x7a,0x80]
+          vpmovusdw %ymm23, -2048(%rdx)
+
+// CHECK: vpmovusdw %ymm23, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe2,0x7e,0x28,0x13,0xba,0xf0,0xf7,0xff,0xff]
+          vpmovusdw %ymm23, -2064(%rdx)
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x0c,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29 {%k4}
+
+// CHECK: vrndscalepd $171, %xmm28, %xmm29 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x8c,0x09,0xec,0xab]
+          vrndscalepd $0xab, %xmm28, %xmm29 {%k4} {z}
+
+// CHECK: vrndscalepd $123, %xmm28, %xmm29
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x09,0xec,0x7b]
+          vrndscalepd $0x7b, %xmm28, %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x29,0x7b]
+          vrndscalepd $0x7b, (%rcx), %xmm29
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x09,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vrndscalepd $123, (%rcx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x29,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x7f,0x7b]
+          vrndscalepd $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0x6a,0x80,0x7b]
+          vrndscalepd $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x09,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0x6a,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x09,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x2f,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17 {%k7}
+
+// CHECK: vrndscalepd $171, %ymm22, %ymm17 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xaf,0x09,0xce,0xab]
+          vrndscalepd $0xab, %ymm22, %ymm17 {%k7} {z}
+
+// CHECK: vrndscalepd $123, %ymm22, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0xce,0x7b]
+          vrndscalepd $0x7b, %ymm22, %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x09,0x7b]
+          vrndscalepd $0x7b, (%rcx), %ymm17
+
+// CHECK: vrndscalepd $123, 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x09,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vrndscalepd $123, (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x09,0x7b]
+          vrndscalepd $0x7b, (%rcx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x7f,0x7b]
+          vrndscalepd $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x4a,0x80,0x7b]
+          vrndscalepd $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x09,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vrndscalepd $123, 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x7f,0x7b]
+          vrndscalepd $0x7b, 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vrndscalepd $0x7b, 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x4a,0x80,0x7b]
+          vrndscalepd $0x7b, -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscalepd $123, -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x09,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vrndscalepd $0x7b, -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x0c,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22 {%k4}
+
+// CHECK: vrndscaleps $171, %xmm26, %xmm22 {%k4} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x8c,0x08,0xf2,0xab]
+          vrndscaleps $0xab, %xmm26, %xmm22 {%k4} {z}
+
+// CHECK: vrndscaleps $123, %xmm26, %xmm22
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x08,0xf2,0x7b]
+          vrndscaleps $0x7b, %xmm26, %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x31,0x7b]
+          vrndscaleps $0x7b, (%rcx), %xmm22
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %xmm22
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x08,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %xmm22
+
+// CHECK: vrndscaleps $123, (%rcx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x31,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 2032(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x7f,0x7b]
+          vrndscaleps $0x7b, 2032(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2048(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0x72,0x80,0x7b]
+          vrndscaleps $0x7b, -2048(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, -2064(%rdx), %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x08,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -2064(%rdx), %xmm22
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0x72,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to4}, %xmm22
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x08,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to4}, %xmm22
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x2f,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19 {%k7}
+
+// CHECK: vrndscaleps $171, %ymm17, %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0xaf,0x08,0xd9,0xab]
+          vrndscaleps $0xab, %ymm17, %ymm19 {%k7} {z}
+
+// CHECK: vrndscaleps $123, %ymm17, %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0xd9,0x7b]
+          vrndscaleps $0x7b, %ymm17, %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x19,0x7b]
+          vrndscaleps $0x7b, (%rcx), %ymm19
+
+// CHECK: vrndscaleps $123, 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x08,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 291(%rax,%r14,8), %ymm19
+
+// CHECK: vrndscaleps $123, (%rcx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x19,0x7b]
+          vrndscaleps $0x7b, (%rcx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 4064(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x7f,0x7b]
+          vrndscaleps $0x7b, 4064(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4096(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x5a,0x80,0x7b]
+          vrndscaleps $0x7b, -4096(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, -4128(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x08,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -4128(%rdx), %ymm19
+
+// CHECK: vrndscaleps $123, 508(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x7f,0x7b]
+          vrndscaleps $0x7b, 508(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, 512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vrndscaleps $0x7b, 512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -512(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x5a,0x80,0x7b]
+          vrndscaleps $0x7b, -512(%rdx){1to8}, %ymm19
+
+// CHECK: vrndscaleps $123, -516(%rdx){1to8}, %ymm19
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x08,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vrndscaleps $0x7b, -516(%rdx){1to8}, %ymm19
+
 // CHECK: vcvtps2pd %xmm27, %xmm20
 // CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x5a,0xe3]
           vcvtps2pd %xmm27, %xmm20
@@ -17403,3 +18843,4123 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1
 // CHECK: vcvtdq2ps -516(%rdx){1to8}, %ymm24
 // CHECK:  encoding: [0x62,0x61,0x7c,0x38,0x5b,0x82,0xfc,0xfd,0xff,0xff]
           vcvtdq2ps -516(%rdx){1to8}, %ymm24
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x01,0x64,0x00,0x15,0xf3]
+          vunpckhps %xmm27, %xmm19, %xmm30
+
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x64,0x02,0x15,0xf3]
+          vunpckhps %xmm27, %xmm19, %xmm30 {%k2}
+
+// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x64,0x82,0x15,0xf3]
+          vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z}
+
+// CHECK: vunpckhps (%rcx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x15,0x31]
+          vunpckhps (%rcx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x21,0x64,0x00,0x15,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30
+
+// CHECK: vunpckhps (%rcx){1to4}, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x15,0x31]
+          vunpckhps (%rcx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps 2032(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x7f]
+          vunpckhps 2032(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0x00,0x08,0x00,0x00]
+          vunpckhps 2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps -2048(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x80]
+          vunpckhps -2048(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps -2064(%rdx), %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0xf0,0xf7,0xff,0xff]
+          vunpckhps -2064(%rdx), %xmm19, %xmm30
+
+// CHECK: vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x7f]
+          vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0x00,0x02,0x00,0x00]
+          vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x80]
+          vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30
+// CHECK:  encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0xfc,0xfd,0xff,0xff]
+          vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0x81,0x34,0x20,0x15,0xe9]
+          vunpckhps %ymm25, %ymm25, %ymm21
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x34,0x25,0x15,0xe9]
+          vunpckhps %ymm25, %ymm25, %ymm21 {%k5}
+
+// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x34,0xa5,0x15,0xe9]
+          vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z}
+
+// CHECK: vunpckhps (%rcx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0x15,0x29]
+          vunpckhps (%rcx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0x15,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21
+
+// CHECK: vunpckhps (%rcx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0x15,0x29]
+          vunpckhps (%rcx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps 4064(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x7f]
+          vunpckhps 4064(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 4096(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0x00,0x10,0x00,0x00]
+          vunpckhps 4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps -4096(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x80]
+          vunpckhps -4096(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps -4128(%rdx), %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0xe0,0xef,0xff,0xff]
+          vunpckhps -4128(%rdx), %ymm25, %ymm21
+
+// CHECK: vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x7f]
+          vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0x00,0x02,0x00,0x00]
+          vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x80]
+          vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0xfc,0xfd,0xff,0xff]
+          vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x01,0x3c,0x00,0x14,0xea]
+          vunpcklps %xmm26, %xmm24, %xmm29
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x3c,0x06,0x14,0xea]
+          vunpcklps %xmm26, %xmm24, %xmm29 {%k6}
+
+// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x3c,0x86,0x14,0xea]
+          vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z}
+
+// CHECK: vunpcklps (%rcx), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x00,0x14,0x29]
+          vunpcklps (%rcx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x21,0x3c,0x00,0x14,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29
+
+// CHECK: vunpcklps (%rcx){1to4}, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x10,0x14,0x29]
+          vunpcklps (%rcx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps 2032(%rdx), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x7f]
+          vunpcklps 2032(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 2048(%rdx), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0x00,0x08,0x00,0x00]
+          vunpcklps 2048(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps -2048(%rdx), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x80]
+          vunpcklps -2048(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps -2064(%rdx), %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0xf0,0xf7,0xff,0xff]
+          vunpcklps -2064(%rdx), %xmm24, %xmm29
+
+// CHECK: vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x7f]
+          vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0x00,0x02,0x00,0x00]
+          vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x80]
+          vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29
+// CHECK:  encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0xfc,0xfd,0xff,0xff]
+          vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x21,0x6c,0x20,0x14,0xd2]
+          vunpcklps %ymm18, %ymm18, %ymm26
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1}
+// CHECK:  encoding: [0x62,0x21,0x6c,0x21,0x14,0xd2]
+          vunpcklps %ymm18, %ymm18, %ymm26 {%k1}
+
+// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0x6c,0xa1,0x14,0xd2]
+          vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z}
+
+// CHECK: vunpcklps (%rcx), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x20,0x14,0x11]
+          vunpcklps (%rcx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x21,0x6c,0x20,0x14,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26
+
+// CHECK: vunpcklps (%rcx){1to8}, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x30,0x14,0x11]
+          vunpcklps (%rcx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps 4064(%rdx), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x7f]
+          vunpcklps 4064(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 4096(%rdx), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0x00,0x10,0x00,0x00]
+          vunpcklps 4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps -4096(%rdx), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x80]
+          vunpcklps -4096(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps -4128(%rdx), %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0xe0,0xef,0xff,0xff]
+          vunpcklps -4128(%rdx), %ymm18, %ymm26
+
+// CHECK: vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x7f]
+          vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0x00,0x02,0x00,0x00]
+          vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x80]
+          vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0xfc,0xfd,0xff,0xff]
+          vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0x81,0xb5,0x00,0x15,0xda]
+          vunpckhpd %xmm26, %xmm25, %xmm19
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xb5,0x03,0x15,0xda]
+          vunpckhpd %xmm26, %xmm25, %xmm19 {%k3}
+
+// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xb5,0x83,0x15,0xda]
+          vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z}
+
+// CHECK: vunpckhpd (%rcx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x00,0x15,0x19]
+          vunpckhpd (%rcx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0xb5,0x00,0x15,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19
+
+// CHECK: vunpckhpd (%rcx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x10,0x15,0x19]
+          vunpckhpd (%rcx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd 2032(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x7f]
+          vunpckhpd 2032(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 2048(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0x00,0x08,0x00,0x00]
+          vunpckhpd 2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd -2048(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x80]
+          vunpckhpd -2048(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd -2064(%rdx), %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0xf0,0xf7,0xff,0xff]
+          vunpckhpd -2064(%rdx), %xmm25, %xmm19
+
+// CHECK: vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x7f]
+          vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0x00,0x04,0x00,0x00]
+          vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x80]
+          vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0xf8,0xfb,0xff,0xff]
+          vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x01,0xcd,0x20,0x15,0xc8]
+          vunpckhpd %ymm24, %ymm22, %ymm25
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xcd,0x27,0x15,0xc8]
+          vunpckhpd %ymm24, %ymm22, %ymm25 {%k7}
+
+// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xcd,0xa7,0x15,0xc8]
+          vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z}
+
+// CHECK: vunpckhpd (%rcx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x20,0x15,0x09]
+          vunpckhpd (%rcx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x21,0xcd,0x20,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25
+
+// CHECK: vunpckhpd (%rcx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x30,0x15,0x09]
+          vunpckhpd (%rcx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd 4064(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x7f]
+          vunpckhpd 4064(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0x00,0x10,0x00,0x00]
+          vunpckhpd 4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd -4096(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x80]
+          vunpckhpd -4096(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd -4128(%rdx), %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0xe0,0xef,0xff,0xff]
+          vunpckhpd -4128(%rdx), %ymm22, %ymm25
+
+// CHECK: vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x7f]
+          vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0x00,0x04,0x00,0x00]
+          vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x80]
+          vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25
+// CHECK:  encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0xf8,0xfb,0xff,0xff]
+          vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x21,0xad,0x00,0x14,0xf2]
+          vunpcklpd %xmm18, %xmm26, %xmm30
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5}
+// CHECK:  encoding: [0x62,0x21,0xad,0x05,0x14,0xf2]
+          vunpcklpd %xmm18, %xmm26, %xmm30 {%k5}
+
+// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0xad,0x85,0x14,0xf2]
+          vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z}
+
+// CHECK: vunpcklpd (%rcx), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x00,0x14,0x31]
+          vunpcklpd (%rcx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x21,0xad,0x00,0x14,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30
+
+// CHECK: vunpcklpd (%rcx){1to2}, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x10,0x14,0x31]
+          vunpcklpd (%rcx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd 2032(%rdx), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x7f]
+          vunpcklpd 2032(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 2048(%rdx), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0x00,0x08,0x00,0x00]
+          vunpcklpd 2048(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd -2048(%rdx), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x80]
+          vunpcklpd -2048(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd -2064(%rdx), %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0xf0,0xf7,0xff,0xff]
+          vunpcklpd -2064(%rdx), %xmm26, %xmm30
+
+// CHECK: vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x7f]
+          vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0x00,0x04,0x00,0x00]
+          vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x80]
+          vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0xf8,0xfb,0xff,0xff]
+          vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xad,0x20,0x14,0xe1]
+          vunpcklpd %ymm17, %ymm26, %ymm20
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0xad,0x22,0x14,0xe1]
+          vunpcklpd %ymm17, %ymm26, %ymm20 {%k2}
+
+// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0xad,0xa2,0x14,0xe1]
+          vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z}
+
+// CHECK: vunpcklpd (%rcx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x20,0x14,0x21]
+          vunpcklpd (%rcx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xad,0x20,0x14,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20
+
+// CHECK: vunpcklpd (%rcx){1to4}, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x30,0x14,0x21]
+          vunpcklpd (%rcx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd 4064(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x7f]
+          vunpcklpd 4064(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0x00,0x10,0x00,0x00]
+          vunpcklpd 4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd -4096(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x80]
+          vunpcklpd -4096(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd -4128(%rdx), %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0xe0,0xef,0xff,0xff]
+          vunpcklpd -4128(%rdx), %ymm26, %ymm20
+
+// CHECK: vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x7f]
+          vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0x00,0x04,0x00,0x00]
+          vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x80]
+          vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0xf8,0xfb,0xff,0xff]
+          vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x21,0x55,0x00,0x62,0xc9]
+          vpunpckldq %xmm17, %xmm21, %xmm25
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7}
+// CHECK:  encoding: [0x62,0x21,0x55,0x07,0x62,0xc9]
+          vpunpckldq %xmm17, %xmm21, %xmm25 {%k7}
+
+// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x21,0x55,0x87,0x62,0xc9]
+          vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z}
+
+// CHECK: vpunpckldq (%rcx), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x00,0x62,0x09]
+          vpunpckldq (%rcx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x21,0x55,0x00,0x62,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25
+
+// CHECK: vpunpckldq (%rcx){1to4}, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x10,0x62,0x09]
+          vpunpckldq (%rcx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq 2032(%rdx), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x7f]
+          vpunpckldq 2032(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 2048(%rdx), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0x00,0x08,0x00,0x00]
+          vpunpckldq 2048(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq -2048(%rdx), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x80]
+          vpunpckldq -2048(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq -2064(%rdx), %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0xf0,0xf7,0xff,0xff]
+          vpunpckldq -2064(%rdx), %xmm21, %xmm25
+
+// CHECK: vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x7f]
+          vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0x00,0x02,0x00,0x00]
+          vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x80]
+          vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0xfc,0xfd,0xff,0xff]
+          vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0x81,0x65,0x20,0x62,0xe2]
+          vpunpckldq %ymm26, %ymm19, %ymm20
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7}
+// CHECK:  encoding: [0x62,0x81,0x65,0x27,0x62,0xe2]
+          vpunpckldq %ymm26, %ymm19, %ymm20 {%k7}
+
+// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0x65,0xa7,0x62,0xe2]
+          vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z}
+
+// CHECK: vpunpckldq (%rcx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x20,0x62,0x21]
+          vpunpckldq (%rcx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x65,0x20,0x62,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20
+
+// CHECK: vpunpckldq (%rcx){1to8}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x30,0x62,0x21]
+          vpunpckldq (%rcx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq 4064(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x7f]
+          vpunpckldq 4064(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 4096(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0x00,0x10,0x00,0x00]
+          vpunpckldq 4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq -4096(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x80]
+          vpunpckldq -4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq -4128(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0xe0,0xef,0xff,0xff]
+          vpunpckldq -4128(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x7f]
+          vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0x00,0x02,0x00,0x00]
+          vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x80]
+          vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0xfc,0xfd,0xff,0xff]
+          vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x01,0x3d,0x00,0x6a,0xe3]
+          vpunpckhdq %xmm27, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7}
+// CHECK:  encoding: [0x62,0x01,0x3d,0x07,0x6a,0xe3]
+          vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7}
+
+// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0x3d,0x87,0x6a,0xe3]
+          vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z}
+
+// CHECK: vpunpckhdq (%rcx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x00,0x6a,0x21]
+          vpunpckhdq (%rcx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x21,0x3d,0x00,0x6a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x10,0x6a,0x21]
+          vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 2032(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x7f]
+          vpunpckhdq 2032(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 2048(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0x00,0x08,0x00,0x00]
+          vpunpckhdq 2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -2048(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x80]
+          vpunpckhdq -2048(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -2064(%rdx), %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0xf0,0xf7,0xff,0xff]
+          vpunpckhdq -2064(%rdx), %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x7f]
+          vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0x00,0x02,0x00,0x00]
+          vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x80]
+          vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28
+// CHECK:  encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0xfc,0xfd,0xff,0xff]
+          vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x01,0x3d,0x20,0x6a,0xd4]
+          vpunpckhdq %ymm28, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x3d,0x22,0x6a,0xd4]
+          vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2}
+
+// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x3d,0xa2,0x6a,0xd4]
+          vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z}
+
+// CHECK: vpunpckhdq (%rcx), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x20,0x6a,0x11]
+          vpunpckhdq (%rcx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x21,0x3d,0x20,0x6a,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x30,0x6a,0x11]
+          vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 4064(%rdx), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x7f]
+          vpunpckhdq 4064(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 4096(%rdx), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0x00,0x10,0x00,0x00]
+          vpunpckhdq 4096(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -4096(%rdx), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x80]
+          vpunpckhdq -4096(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -4128(%rdx), %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0xe0,0xef,0xff,0xff]
+          vpunpckhdq -4128(%rdx), %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x7f]
+          vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0x00,0x02,0x00,0x00]
+          vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x80]
+          vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26
+// CHECK:  encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0xfc,0xfd,0xff,0xff]
+          vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xa5,0x00,0x6c,0xcf]
+          vpunpcklqdq %xmm23, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xa5,0x06,0x6c,0xcf]
+          vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6}
+
+// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xa5,0x86,0x6c,0xcf]
+          vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x09]
+          vpunpcklqdq (%rcx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xa5,0x00,0x6c,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x09]
+          vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 2032(%rdx), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x7f]
+          vpunpcklqdq 2032(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 2048(%rdx), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0x00,0x08,0x00,0x00]
+          vpunpcklqdq 2048(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -2048(%rdx), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x80]
+          vpunpcklqdq -2048(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -2064(%rdx), %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0xf0,0xf7,0xff,0xff]
+          vpunpcklqdq -2064(%rdx), %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x7f]
+          vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0x00,0x04,0x00,0x00]
+          vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x80]
+          vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0xf8,0xfb,0xff,0xff]
+          vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x01,0xdd,0x20,0x6c,0xec]
+          vpunpcklqdq %ymm28, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xdd,0x27,0x6c,0xec]
+          vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7}
+
+// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xdd,0xa7,0x6c,0xec]
+          vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z}
+
+// CHECK: vpunpcklqdq (%rcx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0x6c,0x29]
+          vpunpcklqdq (%rcx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x21,0xdd,0x20,0x6c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0x6c,0x29]
+          vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 4064(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x7f]
+          vpunpcklqdq 4064(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0x00,0x10,0x00,0x00]
+          vpunpcklqdq 4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -4096(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x80]
+          vpunpcklqdq -4096(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -4128(%rdx), %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0xe0,0xef,0xff,0xff]
+          vpunpcklqdq -4128(%rdx), %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x7f]
+          vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0x00,0x04,0x00,0x00]
+          vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x80]
+          vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29
+// CHECK:  encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0xf8,0xfb,0xff,0xff]
+          vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0x81,0xe5,0x00,0x6d,0xd8]
+          vpunpckhqdq %xmm24, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6}
+// CHECK:  encoding: [0x62,0x81,0xe5,0x06,0x6d,0xd8]
+          vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6}
+
+// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0xe5,0x86,0x6d,0xd8]
+          vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x19]
+          vpunpckhqdq (%rcx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xa1,0xe5,0x00,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x19]
+          vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 2032(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x7f]
+          vpunpckhqdq 2032(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 2048(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0x00,0x08,0x00,0x00]
+          vpunpckhqdq 2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -2048(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x80]
+          vpunpckhqdq -2048(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -2064(%rdx), %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0xf0,0xf7,0xff,0xff]
+          vpunpckhqdq -2064(%rdx), %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x7f]
+          vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0x00,0x04,0x00,0x00]
+          vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x80]
+          vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0xf8,0xfb,0xff,0xff]
+          vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0x81,0xe5,0x20,0x6d,0xe4]
+          vpunpckhqdq %ymm28, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6}
+// CHECK:  encoding: [0x62,0x81,0xe5,0x26,0x6d,0xe4]
+          vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6}
+
+// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0xe5,0xa6,0x6d,0xe4]
+          vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z}
+
+// CHECK: vpunpckhqdq (%rcx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x21]
+          vpunpckhqdq (%rcx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xe5,0x20,0x6d,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x21]
+          vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 4064(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x7f]
+          vpunpckhqdq 4064(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 4096(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0x00,0x10,0x00,0x00]
+          vpunpckhqdq 4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -4096(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x80]
+          vpunpckhqdq -4096(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -4128(%rdx), %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0xe0,0xef,0xff,0xff]
+          vpunpckhqdq -4128(%rdx), %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x7f]
+          vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0x00,0x04,0x00,0x00]
+          vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x80]
+          vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
+// CHECK:  encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff]
+          vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0xab]
+          vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7}
+// CHECK:  encoding: [0x62,0x83,0x6d,0x27,0x18,0xd3,0xab]
+          vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7}
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0x83,0x6d,0xa7,0x18,0xd3,0xab]
+          vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm27, %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0x7b]
+          vinsertf32x4 $0x7b, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, (%rcx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x6d,0x20,0x18,0x11,0x7b]
+          vinsertf32x4 $0x7b, (%rcx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xa3,0x6d,0x20,0x18,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinsertf32x4 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x7f,0x7b]
+          vinsertf32x4 $0x7b, 2032(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0x00,0x08,0x00,0x00,0x7b]
+          vinsertf32x4 $0x7b, 2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x80,0x7b]
+          vinsertf32x4 $0x7b, -2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %ymm18, %ymm18
+// CHECK:  encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+          vinsertf32x4 $0x7b, -2064(%rdx), %ymm18, %ymm18
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0xab]
+          vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3}
+// CHECK:  encoding: [0x62,0x83,0x1d,0x23,0x38,0xc8,0xab]
+          vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3}
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0x83,0x1d,0xa3,0x38,0xc8,0xab]
+          vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3} {z}
+
+// CHECK: vinserti32x4 $123, %xmm24, %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0x7b]
+          vinserti32x4 $0x7b, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x1d,0x20,0x38,0x09,0x7b]
+          vinserti32x4 $0x7b, (%rcx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xa3,0x1d,0x20,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vinserti32x4 $0x7b, 291(%rax,%r14,8), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x7f,0x7b]
+          vinserti32x4 $0x7b, 2032(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vinserti32x4 $0x7b, 2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x80,0x7b]
+          vinserti32x4 $0x7b, -2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %ymm28, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vinserti32x4 $0x7b, -2064(%rdx), %ymm28, %ymm17
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28
+// CHECK:  encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0xab]
+          vextractf32x4 $0xab, %ymm17, %xmm28
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x2e,0x19,0xcc,0xab]
+          vextractf32x4 $0xab, %ymm17, %xmm28 {%k6}
+
+// CHECK: vextractf32x4 $171, %ymm17, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0xae,0x19,0xcc,0xab]
+          vextractf32x4 $0xab, %ymm17, %xmm28 {%k6} {z}
+
+// CHECK: vextractf32x4 $123, %ymm17, %xmm28
+// CHECK:  encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0x7b]
+          vextractf32x4 $0x7b, %ymm17, %xmm28
+
+// CHECK: vextractf32x4 $171, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0x21,0xab]
+          vextractf32x4 $0xab, %ymm20, (%rcx)
+
+// CHECK: vextractf32x4 $171, %ymm20, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x2a,0x19,0x21,0xab]
+          vextractf32x4 $0xab, %ymm20, (%rcx) {%k2}
+
+// CHECK: vextractf32x4 $123, %ymm20, (%rcx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0x21,0x7b]
+          vextractf32x4 $0x7b, %ymm20, (%rcx)
+
+// CHECK: vextractf32x4 $123, %ymm20, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x19,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextractf32x4 $0x7b, %ymm20, 291(%rax,%r14,8)
+
+// CHECK: vextractf32x4 $123, %ymm20, 2032(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0x62,0x7f,0x7b]
+          vextractf32x4 $0x7b, %ymm20, 2032(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, 2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextractf32x4 $0x7b, %ymm20, 2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, -2048(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0x62,0x80,0x7b]
+          vextractf32x4 $0x7b, %ymm20, -2048(%rdx)
+
+// CHECK: vextractf32x4 $123, %ymm20, -2064(%rdx)
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x19,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextractf32x4 $0x7b, %ymm20, -2064(%rdx)
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x39,0xec,0xab]
+          vextracti32x4 $0xab, %ymm21, %xmm20
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20 {%k4}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x2c,0x39,0xec,0xab]
+          vextracti32x4 $0xab, %ymm21, %xmm20 {%k4}
+
+// CHECK: vextracti32x4 $171, %ymm21, %xmm20 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa3,0x7d,0xac,0x39,0xec,0xab]
+          vextracti32x4 $0xab, %ymm21, %xmm20 {%k4} {z}
+
+// CHECK: vextracti32x4 $123, %ymm21, %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x39,0xec,0x7b]
+          vextracti32x4 $0x7b, %ymm21, %xmm20
+
+// CHECK: vextracti32x4 $171, %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0x21,0xab]
+          vextracti32x4 $0xab, %ymm28, (%rcx)
+
+// CHECK: vextracti32x4 $171, %ymm28, (%rcx) {%k6}
+// CHECK:  encoding: [0x62,0x63,0x7d,0x2e,0x39,0x21,0xab]
+          vextracti32x4 $0xab, %ymm28, (%rcx) {%k6}
+
+// CHECK: vextracti32x4 $123, %ymm28, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0x21,0x7b]
+          vextracti32x4 $0x7b, %ymm28, (%rcx)
+
+// CHECK: vextracti32x4 $123, %ymm28, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x39,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vextracti32x4 $0x7b, %ymm28, 291(%rax,%r14,8)
+
+// CHECK: vextracti32x4 $123, %ymm28, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0x62,0x7f,0x7b]
+          vextracti32x4 $0x7b, %ymm28, 2032(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vextracti32x4 $0x7b, %ymm28, 2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0x62,0x80,0x7b]
+          vextracti32x4 $0x7b, %ymm28, -2048(%rdx)
+
+// CHECK: vextracti32x4 $123, %ymm28, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x39,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vextracti32x4 $0x7b, %ymm28, -2064(%rdx)
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x26,0xef,0xab]
+          vgetmantps $0xab, %xmm23, %xmm29
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29 {%k5}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x0d,0x26,0xef,0xab]
+          vgetmantps $0xab, %xmm23, %xmm29 {%k5}
+
+// CHECK: vgetmantps $171, %xmm23, %xmm29 {%k5} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x8d,0x26,0xef,0xab]
+          vgetmantps $0xab, %xmm23, %xmm29 {%k5} {z}
+
+// CHECK: vgetmantps $123, %xmm23, %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x26,0xef,0x7b]
+          vgetmantps $0x7b, %xmm23, %xmm29
+
+// CHECK: vgetmantps $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x26,0x29,0x7b]
+          vgetmantps $0x7b, (%rcx), %xmm29
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x26,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vgetmantps $123, (%rcx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x26,0x29,0x7b]
+          vgetmantps $0x7b, (%rcx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x26,0x6a,0x7f,0x7b]
+          vgetmantps $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x26,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x26,0x6a,0x80,0x7b]
+          vgetmantps $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x26,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vgetmantps $123, 508(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x26,0x6a,0x7f,0x7b]
+          vgetmantps $0x7b, 508(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, 512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x26,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 512(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, -512(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x26,0x6a,0x80,0x7b]
+          vgetmantps $0x7b, -512(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $123, -516(%rdx){1to4}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0x7d,0x18,0x26,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -516(%rdx){1to4}, %xmm29
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0x83,0x7d,0x28,0x26,0xcd,0xab]
+          vgetmantps $0xab, %ymm29, %ymm17
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17 {%k1}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x29,0x26,0xcd,0xab]
+          vgetmantps $0xab, %ymm29, %ymm17 {%k1}
+
+// CHECK: vgetmantps $171, %ymm29, %ymm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0xa9,0x26,0xcd,0xab]
+          vgetmantps $0xab, %ymm29, %ymm17 {%k1} {z}
+
+// CHECK: vgetmantps $123, %ymm29, %ymm17
+// CHECK:  encoding: [0x62,0x83,0x7d,0x28,0x26,0xcd,0x7b]
+          vgetmantps $0x7b, %ymm29, %ymm17
+
+// CHECK: vgetmantps $123, (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x26,0x09,0x7b]
+          vgetmantps $0x7b, (%rcx), %ymm17
+
+// CHECK: vgetmantps $123, 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x28,0x26,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vgetmantps $123, (%rcx){1to8}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x26,0x09,0x7b]
+          vgetmantps $0x7b, (%rcx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x26,0x4a,0x7f,0x7b]
+          vgetmantps $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x26,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x26,0x4a,0x80,0x7b]
+          vgetmantps $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x28,0x26,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vgetmantps $123, 508(%rdx){1to8}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x26,0x4a,0x7f,0x7b]
+          vgetmantps $0x7b, 508(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, 512(%rdx){1to8}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x26,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vgetmantps $0x7b, 512(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, -512(%rdx){1to8}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x26,0x4a,0x80,0x7b]
+          vgetmantps $0x7b, -512(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantps $123, -516(%rdx){1to8}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x38,0x26,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vgetmantps $0x7b, -516(%rdx){1to8}, %ymm17
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x26,0xe5,0xab]
+          vgetmantpd $0xab, %xmm29, %xmm28
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28 {%k4}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x0c,0x26,0xe5,0xab]
+          vgetmantpd $0xab, %xmm29, %xmm28 {%k4}
+
+// CHECK: vgetmantpd $171, %xmm29, %xmm28 {%k4} {z}
+// CHECK:  encoding: [0x62,0x03,0xfd,0x8c,0x26,0xe5,0xab]
+          vgetmantpd $0xab, %xmm29, %xmm28 {%k4} {z}
+
+// CHECK: vgetmantpd $123, %xmm29, %xmm28
+// CHECK:  encoding: [0x62,0x03,0xfd,0x08,0x26,0xe5,0x7b]
+          vgetmantpd $0x7b, %xmm29, %xmm28
+
+// CHECK: vgetmantpd $123, (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x26,0x21,0x7b]
+          vgetmantpd $0x7b, (%rcx), %xmm28
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x26,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 291(%rax,%r14,8), %xmm28
+
+// CHECK: vgetmantpd $123, (%rcx){1to2}, %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x26,0x21,0x7b]
+          vgetmantpd $0x7b, (%rcx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, 2032(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x26,0x62,0x7f,0x7b]
+          vgetmantpd $0x7b, 2032(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, 2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x26,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 2048(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, -2048(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x26,0x62,0x80,0x7b]
+          vgetmantpd $0x7b, -2048(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, -2064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x26,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -2064(%rdx), %xmm28
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to2}, %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x26,0x62,0x7f,0x7b]
+          vgetmantpd $0x7b, 1016(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to2}, %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x26,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 1024(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to2}, %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x26,0x62,0x80,0x7b]
+          vgetmantpd $0x7b, -1024(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to2}, %xmm28
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x26,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -1032(%rdx){1to2}, %xmm28
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x26,0xff,0xab]
+          vgetmantpd $0xab, %ymm23, %ymm23
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23 {%k5}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x2d,0x26,0xff,0xab]
+          vgetmantpd $0xab, %ymm23, %ymm23 {%k5}
+
+// CHECK: vgetmantpd $171, %ymm23, %ymm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa3,0xfd,0xad,0x26,0xff,0xab]
+          vgetmantpd $0xab, %ymm23, %ymm23 {%k5} {z}
+
+// CHECK: vgetmantpd $123, %ymm23, %ymm23
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x26,0xff,0x7b]
+          vgetmantpd $0x7b, %ymm23, %ymm23
+
+// CHECK: vgetmantpd $123, (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x26,0x39,0x7b]
+          vgetmantpd $0x7b, (%rcx), %ymm23
+
+// CHECK: vgetmantpd $123, 291(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x26,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 291(%rax,%r14,8), %ymm23
+
+// CHECK: vgetmantpd $123, (%rcx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x26,0x39,0x7b]
+          vgetmantpd $0x7b, (%rcx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, 4064(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x26,0x7a,0x7f,0x7b]
+          vgetmantpd $0x7b, 4064(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, 4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x26,0xba,0x00,0x10,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 4096(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, -4096(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x26,0x7a,0x80,0x7b]
+          vgetmantpd $0x7b, -4096(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, -4128(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x26,0xba,0xe0,0xef,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -4128(%rdx), %ymm23
+
+// CHECK: vgetmantpd $123, 1016(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x26,0x7a,0x7f,0x7b]
+          vgetmantpd $0x7b, 1016(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, 1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x26,0xba,0x00,0x04,0x00,0x00,0x7b]
+          vgetmantpd $0x7b, 1024(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, -1024(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x26,0x7a,0x80,0x7b]
+          vgetmantpd $0x7b, -1024(%rdx){1to4}, %ymm23
+
+// CHECK: vgetmantpd $123, -1032(%rdx){1to4}, %ymm23
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x26,0xba,0xf8,0xfb,0xff,0xff,0x7b]
+          vgetmantpd $0x7b, -1032(%rdx){1to4}, %ymm23
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x44,0x03,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3}
+
+// CHECK: vshufps $171, %xmm21, %xmm23, %xmm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x44,0x83,0xc6,0xcd,0xab]
+          vshufps $0xab, %xmm21, %xmm23, %xmm17 {%k3} {z}
+
+// CHECK: vshufps $123, %xmm21, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0xcd,0x7b]
+          vshufps $0x7b, %xmm21, %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x09,0x7b]
+          vshufps $0x7b, (%rcx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0x44,0x00,0xc6,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %xmm23, %xmm17
+
+// CHECK: vshufps $123, (%rcx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x09,0x7b]
+          vshufps $0x7b, (%rcx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2032(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x7f,0x7b]
+          vshufps $0x7b, 2032(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 2048(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vshufps $0x7b, 2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2048(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x4a,0x80,0x7b]
+          vshufps $0x7b, -2048(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, -2064(%rdx), %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x00,0xc6,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vshufps $0x7b, -2064(%rdx), %xmm23, %xmm17
+
+// CHECK: vshufps $123, 508(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, 512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -512(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x4a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $123, -516(%rdx){1to4}, %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xe1,0x44,0x10,0xc6,0x8a,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to4}, %xmm23, %xmm17
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x34,0x23,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3}
+
+// CHECK: vshufps $171, %ymm23, %ymm25, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x34,0xa3,0xc6,0xdf,0xab]
+          vshufps $0xab, %ymm23, %ymm25, %ymm19 {%k3} {z}
+
+// CHECK: vshufps $123, %ymm23, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0xdf,0x7b]
+          vshufps $0x7b, %ymm23, %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x19,0x7b]
+          vshufps $0x7b, (%rcx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x34,0x20,0xc6,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufps $0x7b, 291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vshufps $123, (%rcx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x19,0x7b]
+          vshufps $0x7b, (%rcx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4064(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x7f,0x7b]
+          vshufps $0x7b, 4064(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 4096(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0x00,0x10,0x00,0x00,0x7b]
+          vshufps $0x7b, 4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4096(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x5a,0x80,0x7b]
+          vshufps $0x7b, -4096(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, -4128(%rdx), %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x20,0xc6,0x9a,0xe0,0xef,0xff,0xff,0x7b]
+          vshufps $0x7b, -4128(%rdx), %ymm25, %ymm19
+
+// CHECK: vshufps $123, 508(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x7f,0x7b]
+          vshufps $0x7b, 508(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, 512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vshufps $0x7b, 512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -512(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x5a,0x80,0x7b]
+          vshufps $0x7b, -512(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufps $123, -516(%rdx){1to8}, %ymm25, %ymm19
+// CHECK:  encoding: [0x62,0xe1,0x34,0x30,0xc6,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vshufps $0x7b, -516(%rdx){1to8}, %ymm25, %ymm19
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x03,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3}
+
+// CHECK: vshufpd $171, %xmm22, %xmm21, %xmm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x83,0xc6,0xe6,0xab]
+          vshufpd $0xab, %xmm22, %xmm21, %xmm20 {%k3} {z}
+
+// CHECK: vshufpd $123, %xmm22, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xe6,0x7b]
+          vshufpd $0x7b, %xmm22, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xd5,0x00,0xc6,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, (%rcx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x21,0x7b]
+          vshufpd $0x7b, (%rcx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2032(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 2032(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 2048(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vshufpd $0x7b, 2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2048(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -2048(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -2064(%rdx), %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x00,0xc6,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vshufpd $0x7b, -2064(%rdx), %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1016(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, 1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1024(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0x62,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $123, -1032(%rdx){1to2}, %xmm21, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xd5,0x10,0xc6,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to2}, %xmm21, %xmm20
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x95,0x26,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6}
+
+// CHECK: vshufpd $171, %ymm22, %ymm29, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x95,0xa6,0xc6,0xc6,0xab]
+          vshufpd $0xab, %ymm22, %ymm29, %ymm24 {%k6} {z}
+
+// CHECK: vshufpd $123, %ymm22, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0xc6,0x7b]
+          vshufpd $0x7b, %ymm22, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x01,0x7b]
+          vshufpd $0x7b, (%rcx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 291(%rax,%r14,8), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x21,0x95,0x20,0xc6,0x84,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vshufpd $0x7b, 291(%rax,%r14,8), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, (%rcx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x01,0x7b]
+          vshufpd $0x7b, (%rcx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4064(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x7f,0x7b]
+          vshufpd $0x7b, 4064(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0x00,0x10,0x00,0x00,0x7b]
+          vshufpd $0x7b, 4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4096(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x42,0x80,0x7b]
+          vshufpd $0x7b, -4096(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -4128(%rdx), %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x20,0xc6,0x82,0xe0,0xef,0xff,0xff,0x7b]
+          vshufpd $0x7b, -4128(%rdx), %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1016(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x7f,0x7b]
+          vshufpd $0x7b, 1016(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, 1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0x00,0x04,0x00,0x00,0x7b]
+          vshufpd $0x7b, 1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1024(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x42,0x80,0x7b]
+          vshufpd $0x7b, -1024(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vshufpd $123, -1032(%rdx){1to4}, %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x61,0x95,0x30,0xc6,0x82,0xf8,0xfb,0xff,0xff,0x7b]
+          vshufpd $0x7b, -1032(%rdx){1to4}, %ymm29, %ymm24
+
+// CHECK: vscatterqps %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa3,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqps %xmm28, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm28, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa3,0xa4,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqps %xmm28, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm28, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa3,0x64,0x39,0x40]
+          vscatterqps %xmm28, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterqps %xmm28, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x01,0xa3,0xa4,0xb9,0x00,0x04,0x00,0x00]
+          vscatterqps %xmm28, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterqps %xmm25, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa3,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqps %xmm25, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm25, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa3,0x8c,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqps %xmm25, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqps %xmm25, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x21,0xa3,0x4c,0x39,0x40]
+          vscatterqps %xmm25, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterqps %xmm25, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x21,0xa3,0x8c,0xb9,0x00,0x04,0x00,0x00]
+          vscatterqps %xmm25, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa3,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqpd %xmm21, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa3,0xac,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqpd %xmm21, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa3,0x6c,0x39,0x20]
+          vscatterqpd %xmm21, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterqpd %xmm21, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0xa3,0xac,0xb9,0x00,0x04,0x00,0x00]
+          vscatterqpd %xmm21, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa3,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqpd %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa3,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterqpd %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x21,0xa3,0x7c,0x39,0x20]
+          vscatterqpd %ymm23, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterqpd %ymm23, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x21,0xa3,0xbc,0xb9,0x00,0x04,0x00,0x00]
+          vscatterqpd %ymm23, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterdps %xmm24, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa2,0x84,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdps %xmm24, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdps %xmm24, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa2,0x84,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdps %xmm24, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdps %xmm24, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x01,0xa2,0x44,0x39,0x40]
+          vscatterdps %xmm24, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdps %xmm24, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x01,0xa2,0x84,0xb9,0x00,0x04,0x00,0x00]
+          vscatterdps %xmm24, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterdps %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0xa2,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdps %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterdps %ymm23, 123(%r14,%ymm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0xa2,0xbc,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdps %ymm23, 123(%r14, %ymm31,8) {%k1}
+
+// CHECK: vscatterdps %ymm23, 256(%r9,%ymm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x21,0xa2,0x7c,0x39,0x40]
+          vscatterdps %ymm23, 256(%r9, %ymm31) {%k1}
+
+// CHECK: vscatterdps %ymm23, 1024(%rcx,%ymm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x21,0xa2,0xbc,0xb9,0x00,0x04,0x00,0x00]
+          vscatterdps %ymm23, 1024(%rcx, %ymm31,4) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa2,0x94,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdpd %xmm18, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa2,0x94,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdpd %xmm18, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x01,0xa2,0x54,0x39,0x20]
+          vscatterdpd %xmm18, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdpd %xmm18, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x01,0xa2,0x94,0xb9,0x00,0x04,0x00,0x00]
+          vscatterdpd %xmm18, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa2,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdpd %ymm30, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 123(%r14,%xmm31,8) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa2,0xb4,0xfe,0x7b,0x00,0x00,0x00]
+          vscatterdpd %ymm30, 123(%r14, %xmm31,8) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 256(%r9,%xmm31) {%k1}
+// CHECK:  encoding: [0x62,0x02,0xfd,0x21,0xa2,0x74,0x39,0x20]
+          vscatterdpd %ymm30, 256(%r9, %xmm31) {%k1}
+
+// CHECK: vscatterdpd %ymm30, 1024(%rcx,%xmm31,4) {%k1}
+// CHECK:  encoding: [0x62,0x22,0xfd,0x21,0xa2,0xb4,0xb9,0x00,0x04,0x00,0x00]
+          vscatterdpd %ymm30, 1024(%rcx, %xmm31,4) {%k1}
+
+// CHECK: vpermilps $171, %xmm28, %xmm20
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0xab]
+          vpermilps $0xab, %xmm28, %xmm20
+
+// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x0c,0x04,0xe4,0xab]
+          vpermilps $0xab, %xmm28, %xmm20 {%k4}
+
+// CHECK: vpermilps $171, %xmm28, %xmm20 {%k4} {z}
+// CHECK:  encoding: [0x62,0x83,0x7d,0x8c,0x04,0xe4,0xab]
+          vpermilps $0xab, %xmm28, %xmm20 {%k4} {z}
+
+// CHECK: vpermilps $123, %xmm28, %xmm20
+// CHECK:  encoding: [0x62,0x83,0x7d,0x08,0x04,0xe4,0x7b]
+          vpermilps $0x7b, %xmm28, %xmm20
+
+// CHECK: vpermilps $123, (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x04,0x21,0x7b]
+          vpermilps $0x7b, (%rcx), %xmm20
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa3,0x7d,0x08,0x04,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilps $0x7b, 291(%rax,%r14,8), %xmm20
+
+// CHECK: vpermilps $123, (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x04,0x21,0x7b]
+          vpermilps $0x7b, (%rcx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, 2032(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x7f,0x7b]
+          vpermilps $0x7b, 2032(%rdx), %xmm20
+
+// CHECK: vpermilps $123, 2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0x00,0x08,0x00,0x00,0x7b]
+          vpermilps $0x7b, 2048(%rdx), %xmm20
+
+// CHECK: vpermilps $123, -2048(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x04,0x62,0x80,0x7b]
+          vpermilps $0x7b, -2048(%rdx), %xmm20
+
+// CHECK: vpermilps $123, -2064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x08,0x04,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+          vpermilps $0x7b, -2064(%rdx), %xmm20
+
+// CHECK: vpermilps $123, 508(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x7f,0x7b]
+          vpermilps $0x7b, 508(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, 512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0x00,0x02,0x00,0x00,0x7b]
+          vpermilps $0x7b, 512(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, -512(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x04,0x62,0x80,0x7b]
+          vpermilps $0x7b, -512(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $123, -516(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe3,0x7d,0x18,0x04,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpermilps $0x7b, -516(%rdx){1to4}, %xmm20
+
+// CHECK: vpermilps $171, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0xab]
+          vpermilps $0xab, %ymm17, %ymm30
+
+// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2d,0x04,0xf1,0xab]
+          vpermilps $0xab, %ymm17, %ymm30 {%k5}
+
+// CHECK: vpermilps $171, %ymm17, %ymm30 {%k5} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xad,0x04,0xf1,0xab]
+          vpermilps $0xab, %ymm17, %ymm30 {%k5} {z}
+
+// CHECK: vpermilps $123, %ymm17, %ymm30
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x04,0xf1,0x7b]
+          vpermilps $0x7b, %ymm17, %ymm30
+
+// CHECK: vpermilps $123, (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x04,0x31,0x7b]
+          vpermilps $0x7b, (%rcx), %ymm30
+
+// CHECK: vpermilps $123, 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x04,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilps $0x7b, 291(%rax,%r14,8), %ymm30
+
+// CHECK: vpermilps $123, (%rcx){1to8}, %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x04,0x31,0x7b]
+          vpermilps $0x7b, (%rcx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, 4064(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x7f,0x7b]
+          vpermilps $0x7b, 4064(%rdx), %ymm30
+
+// CHECK: vpermilps $123, 4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0x00,0x10,0x00,0x00,0x7b]
+          vpermilps $0x7b, 4096(%rdx), %ymm30
+
+// CHECK: vpermilps $123, -4096(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x04,0x72,0x80,0x7b]
+          vpermilps $0x7b, -4096(%rdx), %ymm30
+
+// CHECK: vpermilps $123, -4128(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x04,0xb2,0xe0,0xef,0xff,0xff,0x7b]
+          vpermilps $0x7b, -4128(%rdx), %ymm30
+
+// CHECK: vpermilps $123, 508(%rdx){1to8}, %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x7f,0x7b]
+          vpermilps $0x7b, 508(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, 512(%rdx){1to8}, %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0x00,0x02,0x00,0x00,0x7b]
+          vpermilps $0x7b, 512(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, -512(%rdx){1to8}, %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x04,0x72,0x80,0x7b]
+          vpermilps $0x7b, -512(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps $123, -516(%rdx){1to8}, %ymm30
+// CHECK:  encoding: [0x62,0x63,0x7d,0x38,0x04,0xb2,0xfc,0xfd,0xff,0xff,0x7b]
+          vpermilps $0x7b, -516(%rdx){1to8}, %ymm30
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0x0c,0xe6]
+          vpermilps %xmm22, %xmm28, %xmm28
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x06,0x0c,0xe6]
+          vpermilps %xmm22, %xmm28, %xmm28 {%k6}
+
+// CHECK: vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x86,0x0c,0xe6]
+          vpermilps %xmm22, %xmm28, %xmm28 {%k6} {z}
+
+// CHECK: vpermilps (%rcx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x0c,0x21]
+          vpermilps (%rcx), %xmm28, %xmm28
+
+// CHECK: vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x22,0x1d,0x00,0x0c,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpermilps 291(%rax,%r14,8), %xmm28, %xmm28
+
+// CHECK: vpermilps (%rcx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0x0c,0x21]
+          vpermilps (%rcx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps 2032(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x7f]
+          vpermilps 2032(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps 2048(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0x00,0x08,0x00,0x00]
+          vpermilps 2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps -2048(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x0c,0x62,0x80]
+          vpermilps -2048(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps -2064(%rdx), %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x00,0x0c,0xa2,0xf0,0xf7,0xff,0xff]
+          vpermilps -2064(%rdx), %xmm28, %xmm28
+
+// CHECK: vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x7f]
+          vpermilps 508(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0x00,0x02,0x00,0x00]
+          vpermilps 512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0x0c,0x62,0x80]
+          vpermilps -512(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
+// CHECK:  encoding: [0x62,0x62,0x1d,0x10,0x0c,0xa2,0xfc,0xfd,0xff,0xff]
+          vpermilps -516(%rdx){1to4}, %xmm28, %xmm28
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x22,0x1d,0x20,0x0c,0xed]
+          vpermilps %ymm21, %ymm28, %ymm29
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x1d,0x22,0x0c,0xed]
+          vpermilps %ymm21, %ymm28, %ymm29 {%k2}
+
+// CHECK: vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x1d,0xa2,0x0c,0xed]
+          vpermilps %ymm21, %ymm28, %ymm29 {%k2} {z}
+
+// CHECK: vpermilps (%rcx), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x0c,0x29]
+          vpermilps (%rcx), %ymm28, %ymm29
+
+// CHECK: vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x22,0x1d,0x20,0x0c,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpermilps 291(%rax,%r14,8), %ymm28, %ymm29
+
+// CHECK: vpermilps (%rcx){1to8}, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x0c,0x29]
+          vpermilps (%rcx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps 4064(%rdx), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x7f]
+          vpermilps 4064(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps 4096(%rdx), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0x00,0x10,0x00,0x00]
+          vpermilps 4096(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps -4096(%rdx), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x0c,0x6a,0x80]
+          vpermilps -4096(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps -4128(%rdx), %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x20,0x0c,0xaa,0xe0,0xef,0xff,0xff]
+          vpermilps -4128(%rdx), %ymm28, %ymm29
+
+// CHECK: vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x7f]
+          vpermilps 508(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0x00,0x02,0x00,0x00]
+          vpermilps 512(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x0c,0x6a,0x80]
+          vpermilps -512(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x62,0x1d,0x30,0x0c,0xaa,0xfc,0xfd,0xff,0xff]
+          vpermilps -516(%rdx){1to8}, %ymm28, %ymm29
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0xab]
+          vpermilpd $0xab, %xmm19, %xmm29
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7}
+// CHECK:  encoding: [0x62,0x23,0xfd,0x0f,0x05,0xeb,0xab]
+          vpermilpd $0xab, %xmm19, %xmm29 {%k7}
+
+// CHECK: vpermilpd $171, %xmm19, %xmm29 {%k7} {z}
+// CHECK:  encoding: [0x62,0x23,0xfd,0x8f,0x05,0xeb,0xab]
+          vpermilpd $0xab, %xmm19, %xmm29 {%k7} {z}
+
+// CHECK: vpermilpd $123, %xmm19, %xmm29
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x05,0xeb,0x7b]
+          vpermilpd $0x7b, %xmm19, %xmm29
+
+// CHECK: vpermilpd $123, (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x05,0x29,0x7b]
+          vpermilpd $0x7b, (%rcx), %xmm29
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x23,0xfd,0x08,0x05,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpermilpd $123, (%rcx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x05,0x29,0x7b]
+          vpermilpd $0x7b, (%rcx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, 2032(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x7f,0x7b]
+          vpermilpd $0x7b, 2032(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, 2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0x00,0x08,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 2048(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, -2048(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x05,0x6a,0x80,0x7b]
+          vpermilpd $0x7b, -2048(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, -2064(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x08,0x05,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -2064(%rdx), %xmm29
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x7f,0x7b]
+          vpermilpd $0x7b, 1016(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0x00,0x04,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 1024(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x05,0x6a,0x80,0x7b]
+          vpermilpd $0x7b, -1024(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to2}, %xmm29
+// CHECK:  encoding: [0x62,0x63,0xfd,0x18,0x05,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -1032(%rdx){1to2}, %xmm29
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0xab]
+          vpermilpd $0xab, %ymm24, %ymm17
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0x83,0xfd,0x2e,0x05,0xc8,0xab]
+          vpermilpd $0xab, %ymm24, %ymm17 {%k6}
+
+// CHECK: vpermilpd $171, %ymm24, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x83,0xfd,0xae,0x05,0xc8,0xab]
+          vpermilpd $0xab, %ymm24, %ymm17 {%k6} {z}
+
+// CHECK: vpermilpd $123, %ymm24, %ymm17
+// CHECK:  encoding: [0x62,0x83,0xfd,0x28,0x05,0xc8,0x7b]
+          vpermilpd $0x7b, %ymm24, %ymm17
+
+// CHECK: vpermilpd $123, (%rcx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x05,0x09,0x7b]
+          vpermilpd $0x7b, (%rcx), %ymm17
+
+// CHECK: vpermilpd $123, 291(%rax,%r14,8), %ymm17
+// CHECK:  encoding: [0x62,0xa3,0xfd,0x28,0x05,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 291(%rax,%r14,8), %ymm17
+
+// CHECK: vpermilpd $123, (%rcx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x05,0x09,0x7b]
+          vpermilpd $0x7b, (%rcx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, 4064(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x7f,0x7b]
+          vpermilpd $0x7b, 4064(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, 4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0x00,0x10,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 4096(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, -4096(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x05,0x4a,0x80,0x7b]
+          vpermilpd $0x7b, -4096(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, -4128(%rdx), %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x28,0x05,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -4128(%rdx), %ymm17
+
+// CHECK: vpermilpd $123, 1016(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x7f,0x7b]
+          vpermilpd $0x7b, 1016(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, 1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vpermilpd $0x7b, 1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, -1024(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x05,0x4a,0x80,0x7b]
+          vpermilpd $0x7b, -1024(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd $123, -1032(%rdx){1to4}, %ymm17
+// CHECK:  encoding: [0x62,0xe3,0xfd,0x38,0x05,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpermilpd $0x7b, -1032(%rdx){1to4}, %ymm17
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x0d,0xd1]
+          vpermilpd %xmm17, %xmm27, %xmm26
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x02,0x0d,0xd1]
+          vpermilpd %xmm17, %xmm27, %xmm26 {%k2}
+
+// CHECK: vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0xa5,0x82,0x0d,0xd1]
+          vpermilpd %xmm17, %xmm27, %xmm26 {%k2} {z}
+
+// CHECK: vpermilpd (%rcx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x0d,0x11]
+          vpermilpd (%rcx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x22,0xa5,0x00,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpermilpd 291(%rax,%r14,8), %xmm27, %xmm26
+
+// CHECK: vpermilpd (%rcx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x0d,0x11]
+          vpermilpd (%rcx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd 2032(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x7f]
+          vpermilpd 2032(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 2048(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0x00,0x08,0x00,0x00]
+          vpermilpd 2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd -2048(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x0d,0x52,0x80]
+          vpermilpd -2048(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd -2064(%rdx), %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x00,0x0d,0x92,0xf0,0xf7,0xff,0xff]
+          vpermilpd -2064(%rdx), %xmm27, %xmm26
+
+// CHECK: vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x7f]
+          vpermilpd 1016(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0x00,0x04,0x00,0x00]
+          vpermilpd 1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x0d,0x52,0x80]
+          vpermilpd -1024(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x62,0xa5,0x10,0x0d,0x92,0xf8,0xfb,0xff,0xff]
+          vpermilpd -1032(%rdx){1to2}, %xmm27, %xmm26
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x02,0xad,0x20,0x0d,0xd0]
+          vpermilpd %ymm24, %ymm26, %ymm26
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
+// CHECK:  encoding: [0x62,0x02,0xad,0x25,0x0d,0xd0]
+          vpermilpd %ymm24, %ymm26, %ymm26 {%k5}
+
+// CHECK: vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0xad,0xa5,0x0d,0xd0]
+          vpermilpd %ymm24, %ymm26, %ymm26 {%k5} {z}
+
+// CHECK: vpermilpd (%rcx), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x20,0x0d,0x11]
+          vpermilpd (%rcx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x22,0xad,0x20,0x0d,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpermilpd 291(%rax,%r14,8), %ymm26, %ymm26
+
+// CHECK: vpermilpd (%rcx){1to4}, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x30,0x0d,0x11]
+          vpermilpd (%rcx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd 4064(%rdx), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x7f]
+          vpermilpd 4064(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 4096(%rdx), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0x00,0x10,0x00,0x00]
+          vpermilpd 4096(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd -4096(%rdx), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x20,0x0d,0x52,0x80]
+          vpermilpd -4096(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd -4128(%rdx), %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x20,0x0d,0x92,0xe0,0xef,0xff,0xff]
+          vpermilpd -4128(%rdx), %ymm26, %ymm26
+
+// CHECK: vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x7f]
+          vpermilpd 1016(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0x00,0x04,0x00,0x00]
+          vpermilpd 1024(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x30,0x0d,0x52,0x80]
+          vpermilpd -1024(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
+// CHECK:  encoding: [0x62,0x62,0xad,0x30,0x0d,0x92,0xf8,0xfb,0xff,0xff]
+          vpermilpd -1032(%rdx){1to4}, %ymm26, %ymm26
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xff,0x0a,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25 {%k2}
+
+// CHECK: vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xff,0x8a,0xe6,0xcc]
+          vcvtpd2dq %xmm20, %xmm25 {%k2} {z}
+
+// CHECK: vcvtpd2dqx (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x09]
+          vcvtpd2dqx (%rcx), %xmm25
+
+// CHECK: vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x21,0xff,0x08,0xe6,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dqx 291(%rax,%r14,8), %xmm25
+
+// CHECK: vcvtpd2dq (%rcx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x09]
+          vcvtpd2dq (%rcx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dqx 2032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x7f]
+          vcvtpd2dqx 2032(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx 2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0x00,0x08,0x00,0x00]
+          vcvtpd2dqx 2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x4a,0x80]
+          vcvtpd2dqx -2048(%rdx), %xmm25
+
+// CHECK: vcvtpd2dqx -2064(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x08,0xe6,0x8a,0xf0,0xf7,0xff,0xff]
+          vcvtpd2dqx -2064(%rdx), %xmm25
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x7f]
+          vcvtpd2dq 1016(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x4a,0x80]
+          vcvtpd2dq -1024(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+// CHECK:  encoding: [0x62,0x61,0xff,0x18,0xe6,0x8a,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to2}, %xmm25
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30
+// CHECK:  encoding: [0x62,0x01,0xff,0x28,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xff,0x2b,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30 {%k3}
+
+// CHECK: vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xff,0xab,0xe6,0xf4]
+          vcvtpd2dq %ymm28, %xmm30 {%k3} {z}
+
+// CHECK: vcvtpd2dqy (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x31]
+          vcvtpd2dqy (%rcx), %xmm30
+
+// CHECK: vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x21,0xff,0x28,0xe6,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2dqy 291(%rax,%r14,8), %xmm30
+
+// CHECK: vcvtpd2dq (%rcx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x31]
+          vcvtpd2dq (%rcx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dqy 4064(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x7f]
+          vcvtpd2dqy 4064(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy 4096(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0x00,0x10,0x00,0x00]
+          vcvtpd2dqy 4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4096(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0x72,0x80]
+          vcvtpd2dqy -4096(%rdx), %xmm30
+
+// CHECK: vcvtpd2dqy -4128(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x28,0xe6,0xb2,0xe0,0xef,0xff,0xff]
+          vcvtpd2dqy -4128(%rdx), %xmm30
+
+// CHECK: vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x7f]
+          vcvtpd2dq 1016(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0x00,0x04,0x00,0x00]
+          vcvtpd2dq 1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0x72,0x80]
+          vcvtpd2dq -1024(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+// CHECK:  encoding: [0x62,0x61,0xff,0x38,0xe6,0xb2,0xf8,0xfb,0xff,0xff]
+          vcvtpd2dq -1032(%rdx){1to4}, %xmm30
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x0f,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27 {%k7}
+
+// CHECK: vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x8f,0x5a,0xdb]
+          vcvtpd2ps %xmm27, %xmm27 {%k7} {z}
+
+// CHECK: vcvtpd2psx (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x19]
+          vcvtpd2psx (%rcx), %xmm27
+
+// CHECK: vcvtpd2psx 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x5a,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2psx 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtpd2ps (%rcx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x19]
+          vcvtpd2ps (%rcx){1to2}, %xmm27
+
+// CHECK: vcvtpd2psx 2032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x7f]
+          vcvtpd2psx 2032(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx 2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0x00,0x08,0x00,0x00]
+          vcvtpd2psx 2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2048(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x5a,0x80]
+          vcvtpd2psx -2048(%rdx), %xmm27
+
+// CHECK: vcvtpd2psx -2064(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x08,0x5a,0x9a,0xf0,0xf7,0xff,0xff]
+          vcvtpd2psx -2064(%rdx), %xmm27
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x7f]
+          vcvtpd2ps 1016(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0x00,0x04,0x00,0x00]
+          vcvtpd2ps 1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x5a,0x80]
+          vcvtpd2ps -1024(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+// CHECK:  encoding: [0x62,0x61,0xfd,0x18,0x5a,0x9a,0xf8,0xfb,0xff,0xff]
+          vcvtpd2ps -1032(%rdx){1to2}, %xmm27
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2e,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20 {%k6}
+
+// CHECK: vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xae,0x5a,0xe4]
+          vcvtpd2ps %ymm20, %xmm20 {%k6} {z}
+
+// CHECK: vcvtpd2psy (%rcx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x21]
+          vcvtpd2psy (%rcx), %xmm20
+
+// CHECK: vcvtpd2psy 291(%rax,%r14,8), %xmm20
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x5a,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtpd2psy 291(%rax,%r14,8), %xmm20
+
+// CHECK: vcvtpd2ps (%rcx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x21]
+          vcvtpd2ps (%rcx){1to4}, %xmm20
+
+// CHECK: vcvtpd2psy 4064(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x7f]
+          vcvtpd2psy 4064(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy 4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0x00,0x10,0x00,0x00]
+          vcvtpd2psy 4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4096(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0x62,0x80]
+          vcvtpd2psy -4096(%rdx), %xmm20
+
+// CHECK: vcvtpd2psy -4128(%rdx), %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x28,0x5a,0xa2,0xe0,0xef,0xff,0xff]
+          vcvtpd2psy -4128(%rdx), %xmm20
+
+// CHECK: vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x7f]
+          vcvtpd2ps 1016(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0x00,0x04,0x00,0x00]
+          vcvtpd2ps 1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0x62,0x80]
+          vcvtpd2ps -1024(%rdx){1to4}, %xmm20
+
+// CHECK: vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+// CHECK:  encoding: [0x62,0xe1,0xfd,0x38,0x5a,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvtpd2ps -1032(%rdx){1to4}, %xmm20
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0x81,0xfc,0x08,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x0a,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23 {%k2}
+
+// CHECK: vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0xfc,0x8a,0x78,0xfa]
+          vcvttpd2udq %xmm26, %xmm23 {%k2} {z}
+
+// CHECK: vcvttpd2udqx (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x39]
+          vcvttpd2udqx (%rcx), %xmm23
+
+// CHECK: vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfc,0x08,0x78,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udqx 291(%rax,%r14,8), %xmm23
+
+// CHECK: vcvttpd2udq (%rcx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x39]
+          vcvttpd2udq (%rcx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udqx 2032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x7f]
+          vcvttpd2udqx 2032(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx 2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0x00,0x08,0x00,0x00]
+          vcvttpd2udqx 2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0x7a,0x80]
+          vcvttpd2udqx -2048(%rdx), %xmm23
+
+// CHECK: vcvttpd2udqx -2064(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x08,0x78,0xba,0xf0,0xf7,0xff,0xff]
+          vcvttpd2udqx -2064(%rdx), %xmm23
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x7f]
+          vcvttpd2udq 1016(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0x7a,0x80]
+          vcvttpd2udq -1024(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+// CHECK:  encoding: [0x62,0xe1,0xfc,0x18,0x78,0xba,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to2}, %xmm23
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x28,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xfc,0x2e,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28 {%k6}
+
+// CHECK: vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xfc,0xae,0x78,0xe7]
+          vcvttpd2udq %ymm23, %xmm28 {%k6} {z}
+
+// CHECK: vcvttpd2udqy (%rcx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x21]
+          vcvttpd2udqy (%rcx), %xmm28
+
+// CHECK: vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+// CHECK:  encoding: [0x62,0x21,0xfc,0x28,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vcvttpd2udqy 291(%rax,%r14,8), %xmm28
+
+// CHECK: vcvttpd2udq (%rcx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x21]
+          vcvttpd2udq (%rcx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udqy 4064(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x7f]
+          vcvttpd2udqy 4064(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy 4096(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0x00,0x10,0x00,0x00]
+          vcvttpd2udqy 4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4096(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0x62,0x80]
+          vcvttpd2udqy -4096(%rdx), %xmm28
+
+// CHECK: vcvttpd2udqy -4128(%rdx), %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x28,0x78,0xa2,0xe0,0xef,0xff,0xff]
+          vcvttpd2udqy -4128(%rdx), %xmm28
+
+// CHECK: vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x7f]
+          vcvttpd2udq 1016(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0x00,0x04,0x00,0x00]
+          vcvttpd2udq 1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0x62,0x80]
+          vcvttpd2udq -1024(%rdx){1to4}, %xmm28
+
+// CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28
+// CHECK:  encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff]
+          vcvttpd2udq -1032(%rdx){1to4}, %xmm28
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0xab]
+          vpternlogd $0xab, %xmm25, %xmm19, %xmm27
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x03,0x65,0x07,0x25,0xd9,0xab]
+          vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7}
+
+// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x03,0x65,0x87,0x25,0xd9,0xab]
+          vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7} {z}
+
+// CHECK: vpternlogd $123, %xmm25, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0x7b]
+          vpternlogd $0x7b, %xmm25, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, (%rcx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x00,0x25,0x19,0x7b]
+          vpternlogd $0x7b, (%rcx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x23,0x65,0x00,0x25,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 291(%rax,%r14,8), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, (%rcx){1to4}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x10,0x25,0x19,0x7b]
+          vpternlogd $0x7b, (%rcx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 2032(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x7f,0x7b]
+          vpternlogd $0x7b, 2032(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 2048(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0x00,0x08,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -2048(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x80,0x7b]
+          vpternlogd $0x7b, -2048(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -2064(%rdx), %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -2064(%rdx), %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 508(%rdx){1to4}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x7f,0x7b]
+          vpternlogd $0x7b, 508(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, 512(%rdx){1to4}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0x00,0x02,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 512(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -512(%rdx){1to4}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x80,0x7b]
+          vpternlogd $0x7b, -512(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $123, -516(%rdx){1to4}, %xmm19, %xmm27
+// CHECK:  encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0xfc,0xfd,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -516(%rdx){1to4}, %xmm19, %xmm27
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0xab]
+          vpternlogd $0xab, %ymm20, %ymm17, %ymm29
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3}
+// CHECK:  encoding: [0x62,0x23,0x75,0x23,0x25,0xec,0xab]
+          vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3}
+
+// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0x75,0xa3,0x25,0xec,0xab]
+          vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3} {z}
+
+// CHECK: vpternlogd $123, %ymm20, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0x7b]
+          vpternlogd $0x7b, %ymm20, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, (%rcx), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x20,0x25,0x29,0x7b]
+          vpternlogd $0x7b, (%rcx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 291(%rax,%r14,8), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x23,0x75,0x20,0x25,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 291(%rax,%r14,8), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, (%rcx){1to8}, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x30,0x25,0x29,0x7b]
+          vpternlogd $0x7b, (%rcx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 4064(%rdx), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x7f,0x7b]
+          vpternlogd $0x7b, 4064(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 4096(%rdx), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0x00,0x10,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 4096(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -4096(%rdx), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x80,0x7b]
+          vpternlogd $0x7b, -4096(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -4128(%rdx), %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -4128(%rdx), %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 508(%rdx){1to8}, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x7f,0x7b]
+          vpternlogd $0x7b, 508(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, 512(%rdx){1to8}, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0x00,0x02,0x00,0x00,0x7b]
+          vpternlogd $0x7b, 512(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -512(%rdx){1to8}, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x80,0x7b]
+          vpternlogd $0x7b, -512(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogd $123, -516(%rdx){1to8}, %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0xfc,0xfd,0xff,0xff,0x7b]
+          vpternlogd $0x7b, -516(%rdx){1to8}, %ymm17, %ymm29
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0xab]
+          vpternlogq $0xab, %xmm22, %xmm25, %xmm17
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x01,0x25,0xce,0xab]
+          vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1}
+
+// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x81,0x25,0xce,0xab]
+          vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1} {z}
+
+// CHECK: vpternlogq $123, %xmm22, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0x7b]
+          vpternlogq $0x7b, %xmm22, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, (%rcx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x00,0x25,0x09,0x7b]
+          vpternlogq $0x7b, (%rcx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xa3,0xb5,0x00,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 291(%rax,%r14,8), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, (%rcx){1to2}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x10,0x25,0x09,0x7b]
+          vpternlogq $0x7b, (%rcx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 2032(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x7f,0x7b]
+          vpternlogq $0x7b, 2032(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 2048(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0x00,0x08,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -2048(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x80,0x7b]
+          vpternlogq $0x7b, -2048(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -2064(%rdx), %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -2064(%rdx), %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to2}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x7f,0x7b]
+          vpternlogq $0x7b, 1016(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to2}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0x00,0x04,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 1024(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to2}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x80,0x7b]
+          vpternlogq $0x7b, -1024(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to2}, %xmm25, %xmm17
+// CHECK:  encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -1032(%rdx){1to2}, %xmm25, %xmm17
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0xab]
+          vpternlogq $0xab, %ymm25, %ymm23, %ymm26
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6}
+// CHECK:  encoding: [0x62,0x03,0xc5,0x26,0x25,0xd1,0xab]
+          vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6}
+
+// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x03,0xc5,0xa6,0x25,0xd1,0xab]
+          vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6} {z}
+
+// CHECK: vpternlogq $123, %ymm25, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0x7b]
+          vpternlogq $0x7b, %ymm25, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, (%rcx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x25,0x11,0x7b]
+          vpternlogq $0x7b, (%rcx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 291(%rax,%r14,8), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x23,0xc5,0x20,0x25,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 291(%rax,%r14,8), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, (%rcx){1to4}, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x11,0x7b]
+          vpternlogq $0x7b, (%rcx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 4064(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x7f,0x7b]
+          vpternlogq $0x7b, 4064(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 4096(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0x00,0x10,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -4096(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x80,0x7b]
+          vpternlogq $0x7b, -4096(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -4128(%rdx), %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0xe0,0xef,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -4128(%rdx), %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 1016(%rdx){1to4}, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x7f,0x7b]
+          vpternlogq $0x7b, 1016(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, 1024(%rdx){1to4}, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0x00,0x04,0x00,0x00,0x7b]
+          vpternlogq $0x7b, 1024(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -1024(%rdx){1to4}, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x80,0x7b]
+          vpternlogq $0x7b, -1024(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpternlogq $123, -1032(%rdx){1to4}, %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b]
+          vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26
+
+// CHECK: vpbroadcastd (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x09,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29 {%k1}
+
+// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x89,0x58,0x29]
+          vpbroadcastd (%rcx), %xmm29 {%k1} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x58,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpbroadcastd 508(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x7f]
+          vpbroadcastd 508(%rdx), %xmm29
+
+// CHECK: vpbroadcastd 512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x80]
+          vpbroadcastd -512(%rdx), %xmm29
+
+// CHECK: vpbroadcastd -516(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %xmm29
+
+// CHECK: vpbroadcastd (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2a,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28 {%k2}
+
+// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xaa,0x58,0x21]
+          vpbroadcastd (%rcx), %ymm28 {%k2} {z}
+
+// CHECK: vpbroadcastd 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastd 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpbroadcastd 508(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x7f]
+          vpbroadcastd 508(%rdx), %ymm28
+
+// CHECK: vpbroadcastd 512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0x00,0x02,0x00,0x00]
+          vpbroadcastd 512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x80]
+          vpbroadcastd -512(%rdx), %ymm28
+
+// CHECK: vpbroadcastd -516(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0xfc,0xfd,0xff,0xff]
+          vpbroadcastd -516(%rdx), %ymm28
+
+// CHECK: vpbroadcastd %xmm18, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0a,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29 {%k2}
+
+// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8a,0x58,0xea]
+          vpbroadcastd %xmm18, %xmm29 {%k2} {z}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2b,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17 {%k3}
+
+// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xab,0x58,0xca]
+          vpbroadcastd %xmm26, %ymm17 {%k3} {z}
+
+// CHECK: vpbroadcastd %eax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5}
+
+// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0]
+          vpbroadcastd %eax, %xmm22 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %ebp, %xmm22
+
+// CHECK: vpbroadcastd %r13d, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5]
+          vpbroadcastd %r13d, %xmm22
+
+// CHECK: vpbroadcastd %eax, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5}
+// CHECK:  encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5}
+
+// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8]
+          vpbroadcastd %eax, %ymm25 {%k5} {z}
+
+// CHECK: vpbroadcastd %ebp, %ymm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %ebp, %ymm25
+
+// CHECK: vpbroadcastd %r13d, %ymm25
+// CHECK:  encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd]
+          vpbroadcastd %r13d, %ymm25
+
+// CHECK: vpbroadcastq (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x0f,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x62,0xfd,0x8f,0x59,0x31]
+          vpbroadcastq (%rcx), %xmm30 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0xfd,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpbroadcastq 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x7f]
+          vpbroadcastq 1016(%rdx), %xmm30
+
+// CHECK: vpbroadcastq 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x80]
+          vpbroadcastq -1024(%rdx), %xmm30
+
+// CHECK: vpbroadcastq -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %xmm30
+
+// CHECK: vpbroadcastq (%rcx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2f,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19 {%k7}
+
+// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xaf,0x59,0x19]
+          vpbroadcastq (%rcx), %ymm19 {%k7} {z}
+
+// CHECK: vpbroadcastq 291(%rax,%r14,8), %ymm19
+// CHECK:  encoding: [0x62,0xa2,0xfd,0x28,0x59,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpbroadcastq 291(%rax,%r14,8), %ymm19
+
+// CHECK: vpbroadcastq 1016(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x7f]
+          vpbroadcastq 1016(%rdx), %ymm19
+
+// CHECK: vpbroadcastq 1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0x00,0x04,0x00,0x00]
+          vpbroadcastq 1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1024(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x80]
+          vpbroadcastq -1024(%rdx), %ymm19
+
+// CHECK: vpbroadcastq -1032(%rdx), %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0xf8,0xfb,0xff,0xff]
+          vpbroadcastq -1032(%rdx), %ymm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x08,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x0e,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x8e,0x59,0xd8]
+          vpbroadcastq %xmm24, %xmm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19
+// CHECK:  encoding: [0x62,0x82,0xfd,0x28,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6}
+// CHECK:  encoding: [0x62,0x82,0xfd,0x2e,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19 {%k6}
+
+// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+// CHECK:  encoding: [0x62,0x82,0xfd,0xae,0x59,0xda]
+          vpbroadcastq %xmm26, %ymm19 {%k6} {z}
+
+// CHECK: vpbroadcastq %rax, %xmm22
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2}
+
+// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0]
+          vpbroadcastq %rax, %xmm22 {%k2} {z}
+
+// CHECK: vpbroadcastq %r8, %xmm22
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0]
+          vpbroadcastq %r8, %xmm22
+
+// CHECK: vpbroadcastq %rax, %ymm19
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5}
+
+// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8]
+          vpbroadcastq %rax, %ymm19 {%k5} {z}
+
+// CHECK: vpbroadcastq %r8, %ymm19
+// CHECK:  encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8]
+          vpbroadcastq %r8, %ymm19
+
+// CHECK: vcvtph2ps %xmm17, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x13,0xd9]
+          vcvtph2ps %xmm17, %xmm27
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0a,0x13,0xd9]
+          vcvtph2ps %xmm17, %xmm27 {%k2}
+
+// CHECK: vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8a,0x13,0xd9]
+          vcvtph2ps %xmm17, %xmm27 {%k2} {z}
+
+// CHECK: vcvtph2ps (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x13,0x19]
+          vcvtph2ps (%rcx), %xmm27
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x13,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vcvtph2ps 291(%rax,%r14,8), %xmm27
+
+// CHECK: vcvtph2ps 1016(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x7f]
+          vcvtph2ps 1016(%rdx), %xmm27
+
+// CHECK: vcvtph2ps 1024(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0x00,0x04,0x00,0x00]
+          vcvtph2ps 1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1024(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x13,0x5a,0x80]
+          vcvtph2ps -1024(%rdx), %xmm27
+
+// CHECK: vcvtph2ps -1032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x13,0x9a,0xf8,0xfb,0xff,0xff]
+          vcvtph2ps -1032(%rdx), %xmm27
+
+// CHECK: vcvtph2ps %xmm22, %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x13,0xf6]
+          vcvtph2ps %xmm22, %ymm30
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x13,0xf6]
+          vcvtph2ps %xmm22, %ymm30 {%k7}
+
+// CHECK: vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x13,0xf6]
+          vcvtph2ps %xmm22, %ymm30 {%k7} {z}
+
+// CHECK: vcvtph2ps (%rcx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x13,0x31]
+          vcvtph2ps (%rcx), %ymm30
+
+// CHECK: vcvtph2ps 291(%rax,%r14,8), %ymm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x13,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vcvtph2ps 291(%rax,%r14,8), %ymm30
+
+// CHECK: vcvtph2ps 2032(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x7f]
+          vcvtph2ps 2032(%rdx), %ymm30
+
+// CHECK: vcvtph2ps 2048(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0x00,0x08,0x00,0x00]
+          vcvtph2ps 2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2048(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x13,0x72,0x80]
+          vcvtph2ps -2048(%rdx), %ymm30
+
+// CHECK: vcvtph2ps -2064(%rdx), %ymm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x13,0xb2,0xf0,0xf7,0xff,0xff]
+          vcvtph2ps -2064(%rdx), %ymm30
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0xab]
+          vcvtps2ph $0xab, %xmm25, %xmm21
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x0d,0x1d,0xcd,0xab]
+          vcvtps2ph $0xab, %xmm25, %xmm21 {%k5}
+
+// CHECK: vcvtps2ph $171, %xmm25, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x8d,0x1d,0xcd,0xab]
+          vcvtps2ph $0xab, %xmm25, %xmm21 {%k5} {z}
+
+// CHECK: vcvtps2ph $123, %xmm25, %xmm21
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x1d,0xcd,0x7b]
+          vcvtps2ph $0x7b, %xmm25, %xmm21
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0xab]
+          vcvtps2ph $0xab, %ymm28, %xmm23
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0x23,0x7d,0x2b,0x1d,0xe7,0xab]
+          vcvtps2ph $0xab, %ymm28, %xmm23 {%k3}
+
+// CHECK: vcvtps2ph $171, %ymm28, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0x23,0x7d,0xab,0x1d,0xe7,0xab]
+          vcvtps2ph $0xab, %ymm28, %xmm23 {%k3} {z}
+
+// CHECK: vcvtps2ph $123, %ymm28, %xmm23
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x1d,0xe7,0x7b]
+          vcvtps2ph $0x7b, %ymm28, %xmm23
+
+// CHECK: vcvtps2ph $171, %xmm27, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0xab]
+          vcvtps2ph $0xab, %xmm27, (%rcx)
+
+// CHECK: vcvtps2ph $171, %xmm27, (%rcx) {%k4}
+// CHECK:  encoding: [0x62,0x63,0x7d,0x0c,0x1d,0x19,0xab]
+          vcvtps2ph $0xab, %xmm27, (%rcx) {%k4}
+
+// CHECK: vcvtps2ph $123, %xmm27, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x19,0x7b]
+          vcvtps2ph $0x7b, %xmm27, (%rcx)
+
+// CHECK: vcvtps2ph $123, %xmm27, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x08,0x1d,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %xmm27, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %xmm27, 1016(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x7f,0x7b]
+          vcvtps2ph $0x7b, %xmm27, 1016(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, 1024(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0x00,0x04,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %xmm27, 1024(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, -1024(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x5a,0x80,0x7b]
+          vcvtps2ph $0x7b, %xmm27, -1024(%rdx)
+
+// CHECK: vcvtps2ph $123, %xmm27, -1032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x08,0x1d,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+          vcvtps2ph $0x7b, %xmm27, -1032(%rdx)
+
+// CHECK: vcvtps2ph $171, %ymm30, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0xab]
+          vcvtps2ph $0xab, %ymm30, (%rcx)
+
+// CHECK: vcvtps2ph $171, %ymm30, (%rcx) {%k2}
+// CHECK:  encoding: [0x62,0x63,0x7d,0x2a,0x1d,0x31,0xab]
+          vcvtps2ph $0xab, %ymm30, (%rcx) {%k2}
+
+// CHECK: vcvtps2ph $123, %ymm30, (%rcx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0x31,0x7b]
+          vcvtps2ph $0x7b, %ymm30, (%rcx)
+
+// CHECK: vcvtps2ph $123, %ymm30, 291(%rax,%r14,8)
+// CHECK:  encoding: [0x62,0x23,0x7d,0x28,0x1d,0xb4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %ymm30, 291(%rax,%r14,8)
+
+// CHECK: vcvtps2ph $123, %ymm30, 2032(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x7f,0x7b]
+          vcvtps2ph $0x7b, %ymm30, 2032(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, 2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0x00,0x08,0x00,0x00,0x7b]
+          vcvtps2ph $0x7b, %ymm30, 2048(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, -2048(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0x72,0x80,0x7b]
+          vcvtps2ph $0x7b, %ymm30, -2048(%rdx)
+
+// CHECK: vcvtps2ph $123, %ymm30, -2064(%rdx)
+// CHECK:  encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
+          vcvtps2ph $0x7b, %ymm30, -2064(%rdx)
+
+// CHECK: vmovshdup %xmm18, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x16,0xfa]
+          vmovshdup %xmm18, %xmm23
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x0a,0x16,0xfa]
+          vmovshdup %xmm18, %xmm23 {%k2}
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x8a,0x16,0xfa]
+          vmovshdup %xmm18, %xmm23 {%k2} {z}
+
+// CHECK: vmovshdup (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x16,0x39]
+          vmovshdup (%rcx), %xmm23
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x16,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vmovshdup 291(%rax,%r14,8), %xmm23
+
+// CHECK: vmovshdup 2032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x7f]
+          vmovshdup 2032(%rdx), %xmm23
+
+// CHECK: vmovshdup 2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0x00,0x08,0x00,0x00]
+          vmovshdup 2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2048(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x80]
+          vmovshdup -2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2064(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0xf0,0xf7,0xff,0xff]
+          vmovshdup -2064(%rdx), %xmm23
+
+// CHECK: vmovshdup %ymm24, %ymm18
+// CHECK:  encoding: [0x62,0x81,0x7e,0x28,0x16,0xd0]
+          vmovshdup %ymm24, %ymm18
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7e,0x2b,0x16,0xd0]
+          vmovshdup %ymm24, %ymm18 {%k3}
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7e,0xab,0x16,0xd0]
+          vmovshdup %ymm24, %ymm18 {%k3} {z}
+
+// CHECK: vmovshdup (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x16,0x11]
+          vmovshdup (%rcx), %ymm18
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x28,0x16,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovshdup 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovshdup 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x7f]
+          vmovshdup 4064(%rdx), %ymm18
+
+// CHECK: vmovshdup 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0x00,0x10,0x00,0x00]
+          vmovshdup 4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x80]
+          vmovshdup -4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0xe0,0xef,0xff,0xff]
+          vmovshdup -4128(%rdx), %ymm18
+
+// CHECK: vmovsldup %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x12,0xcd]
+          vmovsldup %xmm21, %xmm25
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x0d,0x12,0xcd]
+          vmovsldup %xmm21, %xmm25 {%k5}
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x8d,0x12,0xcd]
+          vmovsldup %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x12,0x09]
+          vmovsldup (%rcx), %xmm25
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovsldup 291(%rax,%r14,8), %xmm25
+
+// CHECK: vmovsldup 2032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x7f]
+          vmovsldup 2032(%rdx), %xmm25
+
+// CHECK: vmovsldup 2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0x00,0x08,0x00,0x00]
+          vmovsldup 2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2048(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x80]
+          vmovsldup -2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2064(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0xf0,0xf7,0xff,0xff]
+          vmovsldup -2064(%rdx), %xmm25
+
+// CHECK: vmovsldup %ymm29, %ymm24
+// CHECK:  encoding: [0x62,0x01,0x7e,0x28,0x12,0xc5]
+          vmovsldup %ymm29, %ymm24
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x2d,0x12,0xc5]
+          vmovsldup %ymm29, %ymm24 {%k5}
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0xad,0x12,0xc5]
+          vmovsldup %ymm29, %ymm24 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x12,0x01]
+          vmovsldup (%rcx), %ymm24
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x21,0x7e,0x28,0x12,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vmovsldup 291(%rax,%r14,8), %ymm24
+
+// CHECK: vmovsldup 4064(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x7f]
+          vmovsldup 4064(%rdx), %ymm24
+
+// CHECK: vmovsldup 4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0x00,0x10,0x00,0x00]
+          vmovsldup 4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4096(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x80]
+          vmovsldup -4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4128(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
+          vmovsldup -4128(%rdx), %ymm24
+
+// CHECK: vmovddup %xmm23, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x12,0xcf]
+          vmovddup %xmm23, %xmm17
+
+// CHECK: vmovddup %xmm23, %xmm17 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x0e,0x12,0xcf]
+          vmovddup %xmm23, %xmm17 {%k6}
+
+// CHECK: vmovddup %xmm23, %xmm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xff,0x8e,0x12,0xcf]
+          vmovddup %xmm23, %xmm17 {%k6} {z}
+
+// CHECK: vmovddup (%rcx), %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x12,0x09]
+          vmovddup (%rcx), %xmm17
+
+// CHECK: vmovddup 291(%rax,%r14,8), %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xff,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vmovddup 291(%rax,%r14,8), %xmm17
+
+// CHECK: vmovddup 1016(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x7f]
+          vmovddup 1016(%rdx), %xmm17
+
+// CHECK: vmovddup 1024(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0x00,0x04,0x00,0x00]
+          vmovddup 1024(%rdx), %xmm17
+
+// CHECK: vmovddup -1024(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x12,0x4a,0x80]
+          vmovddup -1024(%rdx), %xmm17
+
+// CHECK: vmovddup -1032(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe1,0xff,0x08,0x12,0x8a,0xf8,0xfb,0xff,0xff]
+          vmovddup -1032(%rdx), %xmm17
+
+// CHECK: vmovddup %ymm25, %ymm18
+// CHECK:  encoding: [0x62,0x81,0xff,0x28,0x12,0xd1]
+          vmovddup %ymm25, %ymm18
+
+// CHECK: vmovddup %ymm25, %ymm18 {%k4}
+// CHECK:  encoding: [0x62,0x81,0xff,0x2c,0x12,0xd1]
+          vmovddup %ymm25, %ymm18 {%k4}
+
+// CHECK: vmovddup %ymm25, %ymm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0xff,0xac,0x12,0xd1]
+          vmovddup %ymm25, %ymm18 {%k4} {z}
+
+// CHECK: vmovddup (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x12,0x11]
+          vmovddup (%rcx), %ymm18
+
+// CHECK: vmovddup 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xff,0x28,0x12,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vmovddup 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovddup 4064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x7f]
+          vmovddup 4064(%rdx), %ymm18
+
+// CHECK: vmovddup 4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0x00,0x10,0x00,0x00]
+          vmovddup 4096(%rdx), %ymm18
+
+// CHECK: vmovddup -4096(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x12,0x52,0x80]
+          vmovddup -4096(%rdx), %ymm18
+
+// CHECK: vmovddup -4128(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe1,0xff,0x28,0x12,0x92,0xe0,0xef,0xff,0xff]
+          vmovddup -4128(%rdx), %ymm18
+
+// CHECK: vmovapd.s %xmm27, %xmm26
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x29,0xda]
+          vmovapd.s %xmm27, %xmm26
+
+// CHECK: vmovapd.s %xmm27, %xmm26 {%k6}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x0e,0x29,0xda]
+          vmovapd.s %xmm27, %xmm26 {%k6}
+
+// CHECK: vmovapd.s %xmm27, %xmm26 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x8e,0x29,0xda]
+          vmovapd.s %xmm27, %xmm26 {%k6} {z}
+
+// CHECK: vmovapd.s %xmm26, %xmm17
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x29,0xd1]
+          vmovapd.s %xmm26, %xmm17
+
+// CHECK: vmovapd.s %xmm26, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x0a,0x29,0xd1]
+          vmovapd.s %xmm26, %xmm17 {%k2}
+
+// CHECK: vmovapd.s %xmm26, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x8a,0x29,0xd1]
+          vmovapd.s %xmm26, %xmm17 {%k2} {z}
+
+// CHECK: vmovapd.s %ymm20, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x29,0xe2]
+          vmovapd.s %ymm20, %ymm18
+
+// CHECK: vmovapd.s %ymm20, %ymm18 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2a,0x29,0xe2]
+          vmovapd.s %ymm20, %ymm18 {%k2}
+
+// CHECK: vmovapd.s %ymm20, %ymm18 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xaa,0x29,0xe2]
+          vmovapd.s %ymm20, %ymm18 {%k2} {z}
+
+// CHECK: vmovapd.s %ymm20, %ymm21
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x29,0xe5]
+          vmovapd.s %ymm20, %ymm21
+
+// CHECK: vmovapd.s %ymm20, %ymm21 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2e,0x29,0xe5]
+          vmovapd.s %ymm20, %ymm21 {%k6}
+
+// CHECK: vmovapd.s %ymm20, %ymm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xae,0x29,0xe5]
+          vmovapd.s %ymm20, %ymm21 {%k6} {z}
+
+// CHECK: vmovapd.s %xmm29, %xmm17
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x29,0xe9]
+          vmovapd.s %xmm29, %xmm17
+
+// CHECK: vmovapd.s %xmm29, %xmm17 {%k1}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x09,0x29,0xe9]
+          vmovapd.s %xmm29, %xmm17 {%k1}
+
+// CHECK: vmovapd.s %xmm29, %xmm17 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x89,0x29,0xe9]
+          vmovapd.s %xmm29, %xmm17 {%k1} {z}
+
+// CHECK: vmovapd.s %xmm27, %xmm19
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x29,0xdb]
+          vmovapd.s %xmm27, %xmm19
+
+// CHECK: vmovapd.s %xmm27, %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x09,0x29,0xdb]
+          vmovapd.s %xmm27, %xmm19 {%k1}
+
+// CHECK: vmovapd.s %xmm27, %xmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x89,0x29,0xdb]
+          vmovapd.s %xmm27, %xmm19 {%k1} {z}
+
+// CHECK: vmovapd.s %ymm18, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x29,0xd3]
+          vmovapd.s %ymm18, %ymm19
+
+// CHECK: vmovapd.s %ymm18, %ymm19 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2c,0x29,0xd3]
+          vmovapd.s %ymm18, %ymm19 {%k4}
+
+// CHECK: vmovapd.s %ymm18, %ymm19 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xac,0x29,0xd3]
+          vmovapd.s %ymm18, %ymm19 {%k4} {z}
+
+// CHECK: vmovapd.s %ymm28, %ymm20
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x29,0xe4]
+          vmovapd.s %ymm28, %ymm20
+
+// CHECK: vmovapd.s %ymm28, %ymm20 {%k5}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x2d,0x29,0xe4]
+          vmovapd.s %ymm28, %ymm20 {%k5}
+
+// CHECK: vmovapd.s %ymm28, %ymm20 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0xad,0x29,0xe4]
+          vmovapd.s %ymm28, %ymm20 {%k5} {z}
+
+// CHECK: vmovaps.s %xmm23, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x29,0xfa]
+          vmovaps.s %xmm23, %xmm18
+
+// CHECK: vmovaps.s %xmm23, %xmm18 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x0d,0x29,0xfa]
+          vmovaps.s %xmm23, %xmm18 {%k5}
+
+// CHECK: vmovaps.s %xmm23, %xmm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x8d,0x29,0xfa]
+          vmovaps.s %xmm23, %xmm18 {%k5} {z}
+
+// CHECK: vmovaps.s %xmm24, %xmm18
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x29,0xc2]
+          vmovaps.s %xmm24, %xmm18
+
+// CHECK: vmovaps.s %xmm24, %xmm18 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x0e,0x29,0xc2]
+          vmovaps.s %xmm24, %xmm18 {%k6}
+
+// CHECK: vmovaps.s %xmm24, %xmm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x8e,0x29,0xc2]
+          vmovaps.s %xmm24, %xmm18 {%k6} {z}
+
+// CHECK: vmovaps.s %ymm23, %ymm26
+// CHECK:  encoding: [0x62,0x81,0x7c,0x28,0x29,0xfa]
+          vmovaps.s %ymm23, %ymm26
+
+// CHECK: vmovaps.s %ymm23, %ymm26 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x2d,0x29,0xfa]
+          vmovaps.s %ymm23, %ymm26 {%k5}
+
+// CHECK: vmovaps.s %ymm23, %ymm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0xad,0x29,0xfa]
+          vmovaps.s %ymm23, %ymm26 {%k5} {z}
+
+// CHECK: vmovaps.s %ymm17, %ymm27
+// CHECK:  encoding: [0x62,0x81,0x7c,0x28,0x29,0xcb]
+          vmovaps.s %ymm17, %ymm27
+
+// CHECK: vmovaps.s %ymm17, %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x29,0x29,0xcb]
+          vmovaps.s %ymm17, %ymm27 {%k1}
+
+// CHECK: vmovaps.s %ymm17, %ymm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0xa9,0x29,0xcb]
+          vmovaps.s %ymm17, %ymm27 {%k1} {z}
+
+// CHECK: vmovaps.s %xmm28, %xmm17
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x29,0xe1]
+          vmovaps.s %xmm28, %xmm17
+
+// CHECK: vmovaps.s %xmm28, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x0a,0x29,0xe1]
+          vmovaps.s %xmm28, %xmm17 {%k2}
+
+// CHECK: vmovaps.s %xmm28, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x8a,0x29,0xe1]
+          vmovaps.s %xmm28, %xmm17 {%k2} {z}
+
+// CHECK: vmovaps.s %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x29,0xdd]
+          vmovaps.s %xmm19, %xmm21
+
+// CHECK: vmovaps.s %xmm19, %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x09,0x29,0xdd]
+          vmovaps.s %xmm19, %xmm21 {%k1}
+
+// CHECK: vmovaps.s %xmm19, %xmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x89,0x29,0xdd]
+          vmovaps.s %xmm19, %xmm21 {%k1} {z}
+
+// CHECK: vmovaps.s %ymm26, %ymm18
+// CHECK:  encoding: [0x62,0x21,0x7c,0x28,0x29,0xd2]
+          vmovaps.s %ymm26, %ymm18
+
+// CHECK: vmovaps.s %ymm26, %ymm18 {%k5}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x2d,0x29,0xd2]
+          vmovaps.s %ymm26, %ymm18 {%k5}
+
+// CHECK: vmovaps.s %ymm26, %ymm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0xad,0x29,0xd2]
+          vmovaps.s %ymm26, %ymm18 {%k5} {z}
+
+// CHECK: vmovaps.s %ymm27, %ymm17
+// CHECK:  encoding: [0x62,0x21,0x7c,0x28,0x29,0xd9]
+          vmovaps.s %ymm27, %ymm17
+
+// CHECK: vmovaps.s %ymm27, %ymm17 {%k6}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x2e,0x29,0xd9]
+          vmovaps.s %ymm27, %ymm17 {%k6}
+
+// CHECK: vmovaps.s %ymm27, %ymm17 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0xae,0x29,0xd9]
+          vmovaps.s %ymm27, %ymm17 {%k6} {z}
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25
+// CHECK:  encoding: [0x62,0x01,0x7d,0x08,0x7f,0xc9]
+          vmovdqa32.s %xmm25, %xmm25
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25 {%k5}
+// CHECK:  encoding: [0x62,0x01,0x7d,0x0d,0x7f,0xc9]
+          vmovdqa32.s %xmm25, %xmm25 {%k5}
+
+// CHECK: vmovdqa32.s %xmm25, %xmm25 {%k5} {z}
+// CHECK:  encoding: [0x62,0x01,0x7d,0x8d,0x7f,0xc9]
+          vmovdqa32.s %xmm25, %xmm25 {%k5} {z}
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x08,0x7f,0xce]
+          vmovdqa32.s %xmm17, %xmm22
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x0a,0x7f,0xce]
+          vmovdqa32.s %xmm17, %xmm22 {%k2}
+
+// CHECK: vmovdqa32.s %xmm17, %xmm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x8a,0x7f,0xce]
+          vmovdqa32.s %xmm17, %xmm22 {%k2} {z}
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30
+// CHECK:  encoding: [0x62,0x81,0x7d,0x28,0x7f,0xe6]
+          vmovdqa32.s %ymm20, %ymm30
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30 {%k2}
+// CHECK:  encoding: [0x62,0x81,0x7d,0x2a,0x7f,0xe6]
+          vmovdqa32.s %ymm20, %ymm30 {%k2}
+
+// CHECK: vmovdqa32.s %ymm20, %ymm30 {%k2} {z}
+// CHECK:  encoding: [0x62,0x81,0x7d,0xaa,0x7f,0xe6]
+          vmovdqa32.s %ymm20, %ymm30 {%k2} {z}
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xcc]
+          vmovdqa32.s %ymm17, %ymm20
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x2b,0x7f,0xcc]
+          vmovdqa32.s %ymm17, %ymm20 {%k3}
+
+// CHECK: vmovdqa32.s %ymm17, %ymm20 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0xab,0x7f,0xcc]
+          vmovdqa32.s %ymm17, %ymm20 {%k3} {z}
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21
+// CHECK:  encoding: [0x62,0x21,0x7d,0x08,0x7f,0xdd]
+          vmovdqa32.s %xmm27, %xmm21
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x7d,0x0c,0x7f,0xdd]
+          vmovdqa32.s %xmm27, %xmm21 {%k4}
+
+// CHECK: vmovdqa32.s %xmm27, %xmm21 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x7d,0x8c,0x7f,0xdd]
+          vmovdqa32.s %xmm27, %xmm21 {%k4} {z}
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24
+// CHECK:  encoding: [0x62,0x81,0x7d,0x08,0x7f,0xe0]
+          vmovdqa32.s %xmm20, %xmm24
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x81,0x7d,0x0c,0x7f,0xe0]
+          vmovdqa32.s %xmm20, %xmm24 {%k4}
+
+// CHECK: vmovdqa32.s %xmm20, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x81,0x7d,0x8c,0x7f,0xe0]
+          vmovdqa32.s %xmm20, %xmm24 {%k4} {z}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x2d,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18 {%k5}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0xad,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18 {%k5} {z}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x28,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0x2b,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18 {%k3}
+
+// CHECK: vmovdqa32.s %ymm22, %ymm18 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7d,0xab,0x7f,0xf2]
+          vmovdqa32.s %ymm22, %ymm18 {%k3} {z}
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x7f,0xf7]
+          vmovdqa64.s %xmm22, %xmm23
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23 {%k3}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x0b,0x7f,0xf7]
+          vmovdqa64.s %xmm22, %xmm23 {%k3}
+
+// CHECK: vmovdqa64.s %xmm22, %xmm23 {%k3} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x8b,0x7f,0xf7]
+          vmovdqa64.s %xmm22, %xmm23 {%k3} {z}
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x7f,0xca]
+          vmovdqa64.s %xmm25, %xmm18
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18 {%k4}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x0c,0x7f,0xca]
+          vmovdqa64.s %xmm25, %xmm18 {%k4}
+
+// CHECK: vmovdqa64.s %xmm25, %xmm18 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x8c,0x7f,0xca]
+          vmovdqa64.s %xmm25, %xmm18 {%k4} {z}
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x7f,0xd6]
+          vmovdqa64.s %ymm26, %ymm22
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22 {%k1}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x29,0x7f,0xd6]
+          vmovdqa64.s %ymm26, %ymm22 {%k1}
+
+// CHECK: vmovdqa64.s %ymm26, %ymm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0xa9,0x7f,0xd6]
+          vmovdqa64.s %ymm26, %ymm22 {%k1} {z}
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x7f,0xc5]
+          vmovdqa64.s %ymm24, %ymm21
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x2e,0x7f,0xc5]
+          vmovdqa64.s %ymm24, %ymm21 {%k6}
+
+// CHECK: vmovdqa64.s %ymm24, %ymm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0xae,0x7f,0xc5]
+          vmovdqa64.s %ymm24, %ymm21 {%k6} {z}
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25
+// CHECK:  encoding: [0x62,0x01,0xfd,0x08,0x7f,0xe9]
+          vmovdqa64.s %xmm29, %xmm25
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x09,0x7f,0xe9]
+          vmovdqa64.s %xmm29, %xmm25 {%k1}
+
+// CHECK: vmovdqa64.s %xmm29, %xmm25 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x89,0x7f,0xe9]
+          vmovdqa64.s %xmm29, %xmm25 {%k1} {z}
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26
+// CHECK:  encoding: [0x62,0x81,0xfd,0x08,0x7f,0xda]
+          vmovdqa64.s %xmm19, %xmm26
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26 {%k3}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x0b,0x7f,0xda]
+          vmovdqa64.s %xmm19, %xmm26 {%k3}
+
+// CHECK: vmovdqa64.s %xmm19, %xmm26 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x8b,0x7f,0xda]
+          vmovdqa64.s %xmm19, %xmm26 {%k3} {z}
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29
+// CHECK:  encoding: [0x62,0x01,0xfd,0x28,0x7f,0xe5]
+          vmovdqa64.s %ymm28, %ymm29
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x29,0x7f,0xe5]
+          vmovdqa64.s %ymm28, %ymm29 {%k1}
+
+// CHECK: vmovdqa64.s %ymm28, %ymm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0xa9,0x7f,0xe5]
+          vmovdqa64.s %ymm28, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x7f,0xe9]
+          vmovdqa64.s %ymm21, %ymm17
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17 {%k4}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x2c,0x7f,0xe9]
+          vmovdqa64.s %ymm21, %ymm17 {%k4}
+
+// CHECK: vmovdqa64.s %ymm21, %ymm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xac,0x7f,0xe9]
+          vmovdqa64.s %ymm21, %ymm17 {%k4} {z}
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x7f,0xf2]
+          vmovdqu32.s %xmm22, %xmm18
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x09,0x7f,0xf2]
+          vmovdqu32.s %xmm22, %xmm18 {%k1}
+
+// CHECK: vmovdqu32.s %xmm22, %xmm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x89,0x7f,0xf2]
+          vmovdqu32.s %xmm22, %xmm18 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25
+// CHECK:  encoding: [0x62,0x01,0x7e,0x08,0x7f,0xd1]
+          vmovdqu32.s %xmm26, %xmm25
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25 {%k2}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x0a,0x7f,0xd1]
+          vmovdqu32.s %xmm26, %xmm25 {%k2}
+
+// CHECK: vmovdqu32.s %xmm26, %xmm25 {%k2} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x8a,0x7f,0xd1]
+          vmovdqu32.s %xmm26, %xmm25 {%k2} {z}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22
+// CHECK:  encoding: [0x62,0x21,0x7e,0x28,0x7f,0xd6]
+          vmovdqu32.s %ymm26, %ymm22
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22 {%k3}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x2b,0x7f,0xd6]
+          vmovdqu32.s %ymm26, %ymm22 {%k3}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm22 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0x7e,0xab,0x7f,0xd6]
+          vmovdqu32.s %ymm26, %ymm22 {%k3} {z}
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x01,0x7e,0x28,0x7f,0xc3]
+          vmovdqu32.s %ymm24, %ymm27
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x29,0x7f,0xc3]
+          vmovdqu32.s %ymm24, %ymm27 {%k1}
+
+// CHECK: vmovdqu32.s %ymm24, %ymm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0xa9,0x7f,0xc3]
+          vmovdqu32.s %ymm24, %ymm27 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x08,0x7f,0xdd]
+          vmovdqu32.s %xmm19, %xmm21
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x09,0x7f,0xdd]
+          vmovdqu32.s %xmm19, %xmm21 {%k1}
+
+// CHECK: vmovdqu32.s %xmm19, %xmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7e,0x89,0x7f,0xdd]
+          vmovdqu32.s %xmm19, %xmm21 {%k1} {z}
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22
+// CHECK:  encoding: [0x62,0x21,0x7e,0x08,0x7f,0xce]
+          vmovdqu32.s %xmm25, %xmm22
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22 {%k7}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x0f,0x7f,0xce]
+          vmovdqu32.s %xmm25, %xmm22 {%k7}
+
+// CHECK: vmovdqu32.s %xmm25, %xmm22 {%k7} {z}
+// CHECK:  encoding: [0x62,0x21,0x7e,0x8f,0x7f,0xce]
+          vmovdqu32.s %xmm25, %xmm22 {%k7} {z}
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29
+// CHECK:  encoding: [0x62,0x81,0x7e,0x28,0x7f,0xcd]
+          vmovdqu32.s %ymm17, %ymm29
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29 {%k1}
+// CHECK:  encoding: [0x62,0x81,0x7e,0x29,0x7f,0xcd]
+          vmovdqu32.s %ymm17, %ymm29 {%k1}
+
+// CHECK: vmovdqu32.s %ymm17, %ymm29 {%k1} {z}
+// CHECK:  encoding: [0x62,0x81,0x7e,0xa9,0x7f,0xcd]
+          vmovdqu32.s %ymm17, %ymm29 {%k1} {z}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24
+// CHECK:  encoding: [0x62,0x01,0x7e,0x28,0x7f,0xd0]
+          vmovdqu32.s %ymm26, %ymm24
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x7e,0x29,0x7f,0xd0]
+          vmovdqu32.s %ymm26, %ymm24 {%k1}
+
+// CHECK: vmovdqu32.s %ymm26, %ymm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x7e,0xa9,0x7f,0xd0]
+          vmovdqu32.s %ymm26, %ymm24 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x7f,0xcf]
+          vmovdqu64.s %xmm17, %xmm23
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x09,0x7f,0xcf]
+          vmovdqu64.s %xmm17, %xmm23 {%k1}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm23 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x89,0x7f,0xcf]
+          vmovdqu64.s %xmm17, %xmm23 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26
+// CHECK:  encoding: [0x62,0x01,0xfe,0x08,0x7f,0xea]
+          vmovdqu64.s %xmm29, %xmm26
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26 {%k1}
+// CHECK:  encoding: [0x62,0x01,0xfe,0x09,0x7f,0xea]
+          vmovdqu64.s %xmm29, %xmm26 {%k1}
+
+// CHECK: vmovdqu64.s %xmm29, %xmm26 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0xfe,0x89,0x7f,0xea]
+          vmovdqu64.s %xmm29, %xmm26 {%k1} {z}
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19
+// CHECK:  encoding: [0x62,0x21,0xfe,0x28,0x7f,0xd3]
+          vmovdqu64.s %ymm26, %ymm19
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19 {%k3}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x2b,0x7f,0xd3]
+          vmovdqu64.s %ymm26, %ymm19 {%k3}
+
+// CHECK: vmovdqu64.s %ymm26, %ymm19 {%k3} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0xab,0x7f,0xd3]
+          vmovdqu64.s %ymm26, %ymm19 {%k3} {z}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24
+// CHECK:  encoding: [0x62,0x81,0xfe,0x28,0x7f,0xc8]
+          vmovdqu64.s %ymm17, %ymm24
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x81,0xfe,0x2e,0x7f,0xc8]
+          vmovdqu64.s %ymm17, %ymm24 {%k6}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x81,0xfe,0xae,0x7f,0xc8]
+          vmovdqu64.s %ymm17, %ymm24 {%k6} {z}
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21
+// CHECK:  encoding: [0x62,0x21,0xfe,0x08,0x7f,0xd5]
+          vmovdqu64.s %xmm26, %xmm21
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21 {%k1}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x09,0x7f,0xd5]
+          vmovdqu64.s %xmm26, %xmm21 {%k1}
+
+// CHECK: vmovdqu64.s %xmm26, %xmm21 {%k1} {z}
+// CHECK:  encoding: [0x62,0x21,0xfe,0x89,0x7f,0xd5]
+          vmovdqu64.s %xmm26, %xmm21 {%k1} {z}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x08,0x7f,0xca]
+          vmovdqu64.s %xmm17, %xmm18
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x0e,0x7f,0xca]
+          vmovdqu64.s %xmm17, %xmm18 {%k6}
+
+// CHECK: vmovdqu64.s %xmm17, %xmm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x8e,0x7f,0xca]
+          vmovdqu64.s %xmm17, %xmm18 {%k6} {z}
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27
+// CHECK:  encoding: [0x62,0x01,0xfe,0x28,0x7f,0xc3]
+          vmovdqu64.s %ymm24, %ymm27
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27 {%k3}
+// CHECK:  encoding: [0x62,0x01,0xfe,0x2b,0x7f,0xc3]
+          vmovdqu64.s %ymm24, %ymm27 {%k3}
+
+// CHECK: vmovdqu64.s %ymm24, %ymm27 {%k3} {z}
+// CHECK:  encoding: [0x62,0x01,0xfe,0xab,0x7f,0xc3]
+          vmovdqu64.s %ymm24, %ymm27 {%k3} {z}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x28,0x7f,0xca]
+          vmovdqu64.s %ymm17, %ymm18
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18 {%k6}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0x2e,0x7f,0xca]
+          vmovdqu64.s %ymm17, %ymm18 {%k6}
+
+// CHECK: vmovdqu64.s %ymm17, %ymm18 {%k6} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfe,0xae,0x7f,0xca]
+          vmovdqu64.s %ymm17, %ymm18 {%k6} {z}
+
+// CHECK: vmovupd.s %xmm27, %xmm17
+// CHECK:  encoding: [0x62,0x21,0xfd,0x08,0x11,0xd9]
+          vmovupd.s %xmm27, %xmm17
+
+// CHECK: vmovupd.s %xmm27, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x0a,0x11,0xd9]
+          vmovupd.s %xmm27, %xmm17 {%k2}
+
+// CHECK: vmovupd.s %xmm27, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x8a,0x11,0xd9]
+          vmovupd.s %xmm27, %xmm17 {%k2} {z}
+
+// CHECK: vmovupd.s %xmm21, %xmm17
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x08,0x11,0xe9]
+          vmovupd.s %xmm21, %xmm17
+
+// CHECK: vmovupd.s %xmm21, %xmm17 {%k2}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x0a,0x11,0xe9]
+          vmovupd.s %xmm21, %xmm17 {%k2}
+
+// CHECK: vmovupd.s %xmm21, %xmm17 {%k2} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x8a,0x11,0xe9]
+          vmovupd.s %xmm21, %xmm17 {%k2} {z}
+
+// CHECK: vmovupd.s %ymm21, %ymm24
+// CHECK:  encoding: [0x62,0x81,0xfd,0x28,0x11,0xe8]
+          vmovupd.s %ymm21, %ymm24
+
+// CHECK: vmovupd.s %ymm21, %ymm24 {%k7}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x2f,0x11,0xe8]
+          vmovupd.s %ymm21, %ymm24 {%k7}
+
+// CHECK: vmovupd.s %ymm21, %ymm24 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0xfd,0xaf,0x11,0xe8]
+          vmovupd.s %ymm21, %ymm24 {%k7} {z}
+
+// CHECK: vmovupd.s %ymm27, %ymm21
+// CHECK:  encoding: [0x62,0x21,0xfd,0x28,0x11,0xdd]
+          vmovupd.s %ymm27, %ymm21
+
+// CHECK: vmovupd.s %ymm27, %ymm21 {%k6}
+// CHECK:  encoding: [0x62,0x21,0xfd,0x2e,0x11,0xdd]
+          vmovupd.s %ymm27, %ymm21 {%k6}
+
+// CHECK: vmovupd.s %ymm27, %ymm21 {%k6} {z}
+// CHECK:  encoding: [0x62,0x21,0xfd,0xae,0x11,0xdd]
+          vmovupd.s %ymm27, %ymm21 {%k6} {z}
+
+// CHECK: vmovupd.s %xmm23, %xmm27
+// CHECK:  encoding: [0x62,0x81,0xfd,0x08,0x11,0xfb]
+          vmovupd.s %xmm23, %xmm27
+
+// CHECK: vmovupd.s %xmm23, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x0f,0x11,0xfb]
+          vmovupd.s %xmm23, %xmm27 {%k7}
+
+// CHECK: vmovupd.s %xmm23, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x8f,0x11,0xfb]
+          vmovupd.s %xmm23, %xmm27 {%k7} {z}
+
+// CHECK: vmovupd.s %xmm17, %xmm30
+// CHECK:  encoding: [0x62,0x81,0xfd,0x08,0x11,0xce]
+          vmovupd.s %xmm17, %xmm30
+
+// CHECK: vmovupd.s %xmm17, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x0f,0x11,0xce]
+          vmovupd.s %xmm17, %xmm30 {%k7}
+
+// CHECK: vmovupd.s %xmm17, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x81,0xfd,0x8f,0x11,0xce]
+          vmovupd.s %xmm17, %xmm30 {%k7} {z}
+
+// CHECK: vmovupd.s %ymm28, %ymm24
+// CHECK:  encoding: [0x62,0x01,0xfd,0x28,0x11,0xe0]
+          vmovupd.s %ymm28, %ymm24
+
+// CHECK: vmovupd.s %ymm28, %ymm24 {%k6}
+// CHECK:  encoding: [0x62,0x01,0xfd,0x2e,0x11,0xe0]
+          vmovupd.s %ymm28, %ymm24 {%k6}
+
+// CHECK: vmovupd.s %ymm28, %ymm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0xfd,0xae,0x11,0xe0]
+          vmovupd.s %ymm28, %ymm24 {%k6} {z}
+
+// CHECK: vmovupd.s %ymm22, %ymm20
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x28,0x11,0xf4]
+          vmovupd.s %ymm22, %ymm20
+
+// CHECK: vmovupd.s %ymm22, %ymm20 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0x29,0x11,0xf4]
+          vmovupd.s %ymm22, %ymm20 {%k1}
+
+// CHECK: vmovupd.s %ymm22, %ymm20 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0xfd,0xa9,0x11,0xf4]
+          vmovupd.s %ymm22, %ymm20 {%k1} {z}
+
+// CHECK: vmovups.s %xmm20, %xmm21
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x08,0x11,0xe5]
+          vmovups.s %xmm20, %xmm21
+
+// CHECK: vmovups.s %xmm20, %xmm21 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x0d,0x11,0xe5]
+          vmovups.s %xmm20, %xmm21 {%k5}
+
+// CHECK: vmovups.s %xmm20, %xmm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x8d,0x11,0xe5]
+          vmovups.s %xmm20, %xmm21 {%k5} {z}
+
+// CHECK: vmovups.s %xmm19, %xmm28
+// CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x11,0xdc]
+          vmovups.s %xmm19, %xmm28
+
+// CHECK: vmovups.s %xmm19, %xmm28 {%k5}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x0d,0x11,0xdc]
+          vmovups.s %xmm19, %xmm28 {%k5}
+
+// CHECK: vmovups.s %xmm19, %xmm28 {%k5} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x8d,0x11,0xdc]
+          vmovups.s %xmm19, %xmm28 {%k5} {z}
+
+// CHECK: vmovups.s %ymm19, %ymm19
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x28,0x11,0xdb]
+          vmovups.s %ymm19, %ymm19
+
+// CHECK: vmovups.s %ymm19, %ymm19 {%k5}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x2d,0x11,0xdb]
+          vmovups.s %ymm19, %ymm19 {%k5}
+
+// CHECK: vmovups.s %ymm19, %ymm19 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0xad,0x11,0xdb]
+          vmovups.s %ymm19, %ymm19 {%k5} {z}
+
+// CHECK: vmovups.s %ymm29, %ymm30
+// CHECK:  encoding: [0x62,0x01,0x7c,0x28,0x11,0xee]
+          vmovups.s %ymm29, %ymm30
+
+// CHECK: vmovups.s %ymm29, %ymm30 {%k6}
+// CHECK:  encoding: [0x62,0x01,0x7c,0x2e,0x11,0xee]
+          vmovups.s %ymm29, %ymm30 {%k6}
+
+// CHECK: vmovups.s %ymm29, %ymm30 {%k6} {z}
+// CHECK:  encoding: [0x62,0x01,0x7c,0xae,0x11,0xee]
+          vmovups.s %ymm29, %ymm30 {%k6} {z}
+
+// CHECK: vmovups.s %xmm25, %xmm23
+// CHECK:  encoding: [0x62,0x21,0x7c,0x08,0x11,0xcf]
+          vmovups.s %xmm25, %xmm23
+
+// CHECK: vmovups.s %xmm25, %xmm23 {%k4}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x0c,0x11,0xcf]
+          vmovups.s %xmm25, %xmm23 {%k4}
+
+// CHECK: vmovups.s %xmm25, %xmm23 {%k4} {z}
+// CHECK:  encoding: [0x62,0x21,0x7c,0x8c,0x11,0xcf]
+          vmovups.s %xmm25, %xmm23 {%k4} {z}
+
+// CHECK: vmovups.s %xmm21, %xmm29
+// CHECK:  encoding: [0x62,0x81,0x7c,0x08,0x11,0xed]
+          vmovups.s %xmm21, %xmm29
+
+// CHECK: vmovups.s %xmm21, %xmm29 {%k3}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x0b,0x11,0xed]
+          vmovups.s %xmm21, %xmm29 {%k3}
+
+// CHECK: vmovups.s %xmm21, %xmm29 {%k3} {z}
+// CHECK:  encoding: [0x62,0x81,0x7c,0x8b,0x11,0xed]
+          vmovups.s %xmm21, %xmm29 {%k3} {z}
+
+// CHECK: vmovups.s %ymm28, %ymm30
+// CHECK:  encoding: [0x62,0x01,0x7c,0x28,0x11,0xe6]
+          vmovups.s %ymm28, %ymm30
+
+// CHECK: vmovups.s %ymm28, %ymm30 {%k1}
+// CHECK:  encoding: [0x62,0x01,0x7c,0x29,0x11,0xe6]
+          vmovups.s %ymm28, %ymm30 {%k1}
+
+// CHECK: vmovups.s %ymm28, %ymm30 {%k1} {z}
+// CHECK:  encoding: [0x62,0x01,0x7c,0xa9,0x11,0xe6]
+          vmovups.s %ymm28, %ymm30 {%k1} {z}
+
+// CHECK: vmovups.s %ymm19, %ymm22
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x28,0x11,0xde]
+          vmovups.s %ymm19, %ymm22
+
+// CHECK: vmovups.s %ymm19, %ymm22 {%k1}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0x29,0x11,0xde]
+          vmovups.s %ymm19, %ymm22 {%k1}
+
+// CHECK: vmovups.s %ymm19, %ymm22 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa1,0x7c,0xa9,0x11,0xde]
+          vmovups.s %ymm19, %ymm22 {%k1} {z}
+
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index 096e90039abd..71f8557cde1c 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -413,7 +413,7 @@ fcmovae %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
 fcmova %st(1), %st(0)   // CHECK: fcmovnbe	%st(1), %st(0)
 
 // rdar://8456417
-.byte 88 + 1 & 15  // CHECK: .byte	9
+.byte (88 + 1) & 15  // CHECK: .byte	9
 
 // rdar://8456412
 mov %rdx, %cr0
@@ -1392,3 +1392,19 @@ vmovq %xmm0, %rax
 // CHECK: seto 3735928559(%r10,%r9,8)
 // CHECK:  encoding: [0x43,0x0f,0x90,0x84,0xca,0xef,0xbe,0xad,0xde]
 	seto 0xdeadbeef(%r10,%r9,8)
+
+// CHECK: 	monitorx
+// CHECK:  encoding: [0x0f,0x01,0xfa]
+        	monitorx
+
+// CHECK: 	monitorx
+// CHECK:  encoding: [0x0f,0x01,0xfa]
+        	monitorx %rax, %rcx, %rdx
+
+// CHECK: 	mwaitx
+// CHECK:  encoding: [0x0f,0x01,0xfb]
+        	mwaitx
+
+// CHECK: 	mwaitx
+// CHECK:  encoding: [0x0f,0x01,0xfb]
+        	mwaitx %rax, %rcx, %rbx
diff --git a/test/MC/X86/x86-evenDirective.s b/test/MC/X86/x86-evenDirective.s
new file mode 100644
index 000000000000..6d6555e70776
--- /dev/null
+++ b/test/MC/X86/x86-evenDirective.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple -x86_64-unknown-unknown -filetype obj -o - %s | llvm-readobj -s -sd \
+# RUN:   | FileCheck %s
+
+	.text
+	even_check:
+	.byte 0x00
+	.byte 0x01
+	.byte 0x02
+	.byte 0x03
+	.byte 0x04
+	.byte 0x05
+	.byte 0x06
+	.byte 0x07
+	.byte 0x08
+	.byte 0x09
+	.byte 0x10
+	.even
+	.byte 0x11
+	.byte 0x12
+	.even
+	.byte 0x13
+	.even
+	.byte 0x00
+	.byte 0x01
+	.byte 0x02
+	.byte 0x03
+	.byte 0x04
+	.byte 0x05
+	.byte 0x06
+	.byte 0x07
+	.byte 0x08
+	.byte 0x09
+	.byte 0x10
+	.byte 0x11
+	.byte 0x12
+	.byte 0x13
+	.byte 0x14
+	.byte 0x15
+
+# CHECK: Section {
+# CHECK:   Name: .text
+# CHECK:   SectionData (
+# CHECK:      0000: 00010203 04050607 08091090 11121390
+# CHECK:	  0010: 00010203 04050607 08091011 12131415
+# CHECK:   )
+# CHECK: }
+
diff --git a/test/MC/X86/x86_nop.s b/test/MC/X86/x86_nop.s
index 572487bfdaca..feac4e4cf039 100644
--- a/test/MC/X86/x86_nop.s
+++ b/test/MC/X86/x86_nop.s
@@ -22,13 +22,7 @@ inc %eax
 inc %eax
 
 // CHECK: 0:	40                                           	incl	%eax
-// CHECK: 1:	90                                           	nop
-// CHECK: 2:	90                                           	nop
-// CHECK: 3:	90                                           	nop
-// CHECK: 4:	90                                           	nop
-// CHECK: 5:	90                                           	nop
-// CHECK: 6:	90                                           	nop
-// CHECK: 7:	90                                           	nop
+// CHECK: 1:	8d b4 26 00 00 00 00                            leal (%esi), %esi
 // CHECK: 8:	40                                           	incl	%eax
 
 
diff --git a/test/Makefile b/test/Makefile
index 558762e39dc2..03194520caa2 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -134,6 +134,7 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s!@OCAMLFLAGS@!$(addprefix -cclib ,$(LDFLAGS))!g >> lit.tmp
 	@$(ECHOPATH) s=@HAVE_OCAMLOPT@=$(HAVE_OCAMLOPT)=g >> lit.tmp
 	@$(ECHOPATH) s=@HAVE_OCAML_OUNIT@=$(HAVE_OCAML_OUNIT)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_INCLUDE_GO_TESTS@=ON=g >> lit.tmp
 	@$(ECHOPATH) s=@GO_EXECUTABLE@=$(GO)=g >> lit.tmp
 	@$(ECHOPATH) s!@HOST_CC@!$(CC)!g >> lit.tmp
 	@$(ECHOPATH) s!@HOST_CXX@!$(CXX)!g >> lit.tmp
@@ -146,6 +147,8 @@ lit.site.cfg: FORCE
 	@$(ECHOPATH) s=@HOST_ARCH@=$(HOST_ARCH)=g >> lit.tmp
 	@$(ECHOPATH) s=@HAVE_LIBZ@=$(HAVE_LIBZ)=g >> lit.tmp
 	@$(ECHOPATH) s=@HAVE_DIA_SDK@=0=g >> lit.tmp
+	@$(ECHOPATH) s=@ENABLE_EXAMPLES@=$(BUILD_EXAMPLES)=g >> lit.tmp
+	@$(ECHOPATH) s=@ENABLE_TIMESTAMPS@=$(ENABLE_TIMESTAMPS)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Object/AMDGPU/elf-definitios.yaml b/test/Object/AMDGPU/elf-definitios.yaml
new file mode 100644
index 000000000000..5f5e200b49b3
--- /dev/null
+++ b/test/Object/AMDGPU/elf-definitios.yaml
@@ -0,0 +1,27 @@
+# RUN: yaml2obj -format=elf %s > %t.o
+# RUN: llvm-readobj -s -file-headers %t.o | FileCheck %s
+
+# CHECK:  Machine: EM_AMDGPU (0xE0)
+# CHECK: Sections [
+# CHECK: Section {
+# CHECK: Name: .shf_amdgpu
+# CHECK: Flags [ (0xF00000)
+# CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
+# CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
+# CHECK: SHF_AMDGPU_HSA_GLOBAL (0x100000)
+# CHECK: SHF_AMDGPU_HSA_READONLY (0x200000)
+# CHECK: }
+
+---
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_AMDGPU
+
+Sections:
+  - Name:  .shf_amdgpu
+    Type:  SHT_PROGBITS
+    Flags: [ SHF_AMDGPU_HSA_GLOBAL, SHF_AMDGPU_HSA_READONLY,
+             SHF_AMDGPU_HSA_CODE, SHF_AMDGPU_HSA_AGENT]
+...
diff --git a/test/Object/Inputs/coff-short-import-code b/test/Object/Inputs/coff-short-import-code
new file mode 100644
index 0000000000000000000000000000000000000000..446279037b53cf0673eadfa7828e793d143b05fb
GIT binary patch
literal 31
mcmZQz`2U}Q!H03#$NQn&3=9m63>*ycY5DmK6?!Q-ISc@m69}*X

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/coff-short-import-data b/test/Object/Inputs/coff-short-import-data
new file mode 100644
index 0000000000000000000000000000000000000000..71b635ba1920a269a349864cdbd89e1ed21913d1
GIT binary patch
literal 31
kcmZQz`2U}Q!H03#$NQn&3?RVC5TBG-#89D^l9R&#0F;9Wp#T5?

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..d164d8243d5ce90ac25a48f9bfad4a6ad3518679
GIT binary patch
literal 1688
zcmcIk%}OId5U#|ZRpTMb9u!s(@h004z34@<{<5H+b`OFuPBy^^W&)W}lGElfa?B%m
z@DV)t7`}o}5bK+%PqVWV1jT}$uluY1x~jXY=kNZ}K|Yrg+L&z1>qw+*j@}q&fZLRD
z{mx23CY2u7RyKDUAB`tY*%&b<te*w<mIP6X$t=)_N&bE=;fZ(2P>9s!O*a?};u#O7
zBi@>h$UKey50FPuo(Apte_haJ@n1*q;?=5t<csH3!q5vNzZEGJRJkgS9p{U9=wlyQ
zhBJXJ+qo5zp)45kth?r)F@DhyF<^c3C!^2%BHpw%;Ni({xoBQhmrJfxdLty#%jG8|
zeQ(%L*16-Fv-qVEym))3CqIsV?vB0~7uDL4IK3}%)lLvbEpb6Js=15c+F>8xZ)&aZ
zvKb`qPD5PX59{&BZ(RNn7uHo2`4#OEJB@f8*Mg?@VO!G`Py?(N)&=q0BEXL`Wxcar
zW|bSRx31L%k#%Fs#A~n*+|Y)aWBk79iL02TX7o#Z)**N_*7eJKrvTz1qmI;;cA({N
zi}mtgdcJe!$Nk%HM$S}=snqkGw<%EHu%+XFQ76`XuO9yktdTax|CVy3<Ns2BR;oQC
m+os360mc3E{*RPn?lv`sqLC}Zr1sO-vZGv<e%;8*8~SHJM{2+T

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..ee29a4162e391eeaa596d1c354556fe06657ed36
GIT binary patch
literal 1736
zcmcIkOKKZI5G{S|e5{2W8xwFKUStt4X%OBx8>2XOf<f^rSr`Jt$RkSzYXq7}uvWQ(
zmN|yA%Mo%6T_GoM>P^>+MN6_FCIvIE`&GZL>aLpkcG!AXE|r8<kv+MYi8RgetHKrF
zcBQKCnykpO(p7z$<_>XUJn^B;5o5{5^I&305T)){fksU7lU2_Xul!Vq)Rphh{Rni-
z)94vM9(6rA^WXwpQ(b;vkDrk7_;>-ov4EE|i8@J~#}Y;DEQ_){9_C7=iQf-#aDO-#
z?tiL0VppfEzboLDkt>*}J@xRn2H(w(JYkz2ioG%XIlZa*{s-VJkMse)KmH`$!9}vw
zaE-<t!H{05yd+^C_<NtNQ%%>;tJB&7{`mr4qJxv;Hy@7P-c~S|cP?{rT1RooD9!Sr
zxO9+rTsQsVaKcw#JHzaJkb3v1FRmA7y<!&k&rii=Jr(6~TSvrEznB+2qRHLUwpEYb
zKtI9QFT_hv7k=C|zYq2i?h`#oJ^P8sezI*OY^NP{u%EGp{YHFgLh2YFeF7`$uZVvG
zY+?(Ep1pZfN9MH-pyhYOKBAug6P}Op?YAVKRf~|+^ZOrDpuTAf<3CU*_WW1<x@xgU
zXpH|XaD?$cs()UpS4NIakM9W-&(Hd|lw<C;w1B3OFQ!QyvyP<qbudq#7}@4R!%+VR
D^ZIWm

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..122194ae6f0d674871ba8d969e9034ce8ea93080
GIT binary patch
literal 1688
zcmcIk%}N_l6h4VYTa!gBE?Tf4;!eB}-RMSQTPuQfr3*nACngxdOdvCsWMv*B%RE9C
zeFPUihOf{ki07L*pN-=LLGi$y@80j+@0|N{?%eDi{LJTaLYtCp`9Bh=nqx4<8Q?Z$
zTE8VJ%8b&}+N$PG<Gu034>m@Qg7vfD))qmOaxx1va+1HDPk7?rWGF=H@}|49xOb)_
z%4ifT|CpZZwBa*An6msEbw53IL6^lZOyI?9)cwd8&#Q%@7e;<3QYxr3DvllJi+t!~
zA6bSofi2noipfwu8}h9C=ASeEiy?BrhUQO3@ApN%kJ^BHhsSc!zN#-(T&40tN-Qsz
zF9`3A_l5mroja~Mi~l--7w^Z(;m)u9?~^aaMU8GGPVY-xy%&U0M_ka38tx*vcG$<-
z??xxQYzK+E*AmzC!)844TbJkJ!lsHMzotE6uN9BuTF|Av*;X|IYJl~^x*(oe2KaHN
ztasK+NxAWQ>swtASvM9Yj=?@~LmO(2`G=+_u3(ay(U19Ahv3mz*N^X=0*J?qI#OHO
zftJ55)(hW1>xS#&{_Qs-r>aFN>iN#Q6sT|9()nlAiI}g|<9~q|X=DDylp~%0Nc~w$
p9T?d)J>Cr{?w|L6pd8n3Q)4I_xjas4KYcCVl*`hu8(Din{{RcSY1#k)

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-phentsize.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..148d4c5fd64ba19ec75928c9d25f22bd75efb4dc
GIT binary patch
literal 1720
zcmcIk&1w@-6h8UW+GY_;7Ybcu(M3hP5L|U5X{!|>4G8U`P>2%~l7N{&W|U;rJc3J?
zc?1_O`T$wvG5QKVfu1jOzUg#2K~Ov}`R@JB{qDK<o|AL+rg4(b<%Cv{S8_WOshi`v
zz!l&Qq^Q4T*^&oJ7xk-~JB>HS6E|#*7$qBL!NLwfluEP;G-8rpJdSwck7z1H>hh*r
z+P+KE5x<*`$a5Nf53o%|e*T(l-*rKk#qX@)rPb|tp(m|YI|y1q=#4|A{0?`;v*Udc
z4`bXT%kU<!s@-oZn#vPHzV*!fo5nviL=4#6{L$?H{UY8Y{eW+q=h7Q}>Qrm4R{KN9
z=y=6A|6GoD&*e*!&L`5Gl@Gj27QeTK7v|>mMf33V-SPUoxv)D4#c4Xlb*6q0j>Y+-
zu<LsMXNPOP_|hE*gOMM(r$cdlFX*pk-f-|yT+mlh=(Tl3oDNs>RjKF+|J|PJ7N`-{
z6YGZf!9u`~H|PI>^;A|aS&uWT8zSq<wps1LG4Q2+s7c13o1Ta~z>JRLqh4U(@v-jX
zen0^hC5$>!2ReY3ZH;xq{=h!K^YQ-HhU8MU2t_^nz=Q(zB`qEQp*peW-|Ep%ut(Y$
ze>dew$3Ihl)=|G1`PKC36F4L9&-^!xw`DQx+l8LROf*QR<w$AXJ-PI~k?ka@<Fx)a
Db)al_

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-relocation-size.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..2798ab34e17478d05d4c968efc1b7b336ad3f26b
GIT binary patch
literal 2160
zcmcIk%We}v5NtaRgtr3%@j66E2#{bE-#8!?C)g3fBtnD(Tvi*_vLpF{ybFrC@B#b)
zpTUtMpW;t2H{X`7*)GSK*hmP`l3mj^)!RM2J^Q8Bs22+bp)JW%Ir2m{Y~y>03UH65
zr2m)XqMTA1TN}3R<H&g8iuuT~Z0mWDSrJ4j=PJ<1Nj_W6dE%<Y5gAvsIKcvTIS1m{
zTq5f<`UD`2m2_-x$??Ck@|9z>oL#_QTfj?8!e$($k%ZxXl7vYb4N|3gasFK7LH}?Z
z^dG)&sBvuPM9-`>PWH8Qcamk<G;U(L)Ao>&iP?wi#-A~~VTk9#)=YmccZGVs<8QW)
zKEQ6};A+S6$82%7_g{hU-%EmC;*7xUoj20zcjC25P^ox2-_6TkmWg)`^si9VE?GX#
z7sj?wToLx2{ko_8yt#Vd_>*(BzJPzSfEP~Ev$s3dm(RB(+-~e{RvY1Nz5cqk7w%Oz
z8@2FH(-)-8!&HJ+KmQ9pbabI3DnUH#CFwwdL9-LVk)YR4n?bAhQPrLJ(WmAhIp`}5
z;jk+~J4)Kli@FEzB}m#Do<{q+A`ZLGcNalV^zXK%kHC!bGr+ke9$E_Q<5l|ee^WW!
zHO6zUiJWVdIP=CqUMS0Z3jybzc;7`G-BsqtJi_ki4{PQhSpp*Gh<O=5%VUrCc49&z
z@4rf%F)#gBx<Ksod&7HV1#!MvAM<mj@qDCYJikw43!q*l&V2v8va|8LKNE}RIW2tp
z{^+M(JipUr7DS<;fBvh=qbAD#HvXZ;`aMP}=D!VR#{B#p-D7WOWU_7r{WS7|B4XJ;
k_9a%1wVTwz*vRTEscRdLDf>=W^WrO3=;x_2U-<FA0TOP8H2?qr

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-strtab.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..bdaa3cf920824ee8f8b2c8f71617a776e3c5250f
GIT binary patch
literal 1712
zcmcIk%}N_l6h8T}^(O@_T`0K7!h%qGA#~GjO4@2gP*=JT3ge_nO`w@TW-Q67K0=pW
z`Uoz3ge-iFzJgDn=bJg-IF1ua13fVFo%@}?d*+_GKezWjmrEs~ROG$fuSA+OWUwi3
zrK*;iEXfn)tNJv}p2t7N6X$J>8gr%>!NfK}l;y+=G-{HctRy_~Dp?AVab=6Uw!GIC
zN4&H+BKK+ZBS4yExw#!Ze`rEn5&vQeFJ7nZN4|JoD-69b@`sU9L7Q1Cs^I&g9(43k
zB={zfy7jk|EM?WuXgN1!(fBn()PP*jF<Jh%H|j0u1Kc?{lGDNWcD>;mjlWb(wpUHh
zpYHKDD$hy&JfEq<znn*XiobZ@#M?SP_^`kGY5JMDs56Sh=~0SnkApB8iVFr&$DIae
z4lTa^(HVxlL6DfoeQ{ks?8Ym<-}@>q>}pWtw^SpJ`|&y+6Sg}uzS2m{2Iq+LLOij|
zu;W|vKj0kIl*`WHxy=ia^JHn_9Mpkt^ud46`WF^Y+<=pc(P@30F9bXu&U<<fFaS;&
zVH~M<svs6UKsYbF7x@2kf9&6`ANis&D8+dG|04z<X-4w(pQ;TxXN|`_f*g5a{q>w7
zU;j|;MO*!C<S&cI{eV5Pf1dxI@#dDorZx03WfCQSEFYC-?^)M<BU@Qg^?dvt*57H@

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 b/test/Object/Inputs/corrupt-invalid-virtual-addr.elf.x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..58d995933ecadffe435c6044763bc67adaa41d63
GIT binary patch
literal 1720
zcmcIky-piJ5FY=KKuqDF6d<LbND&mVl0}m=$O(if5+FrNibzP#8J{pxe3pC%va93~
zC@K926e)NB7d%E@Ay44U=bi6zxtxU*#7O6F_nZB8c6Y|}?d|?vxl|HbMYiS7Or&X!
z>k3zZdnr}@U6z_WP`avL)7*LdX*_Y!=7_Oi<06>YA&64<t3V?r`N?C?6My)r5UDGh
z?#A}LF&**BbVQ!h=zjp))aBQ7xbn9Px+4C`99~*sCrAQmwc2sqij!cFC>3?ME1n(i
zi+C909z}*Xfvwy9YJMuKhJ5Sk)Dw@4Uo%7u*x2-bc7MN!x1=BN-Qg!W>3{94H(aA}
zOGtmbYMOs8$Gexx3zE+#(p;1eyh{=PYz{BX&F<&J*9Y%+=I6~N;V==W=@i!)MR76^
z7xk0SokV92*W5f02k~h?^6pVjTsMfjvsutPJr);tRg?s69T7*p*?d+idcwQg3*7=W
z!g^xe5WiRm`0?iaKd_#bmCM%ScdHvB>&mv7?ZGi{OFz^k<Bv^GL>^#9r}0rQF!1<T
z_h~<%0E;q49jRA3fR=5Ib;JI^KEU(w{?>-%NVNz>J^R3r0`+ArAOE2`vFHEl(NC~P
z-WdOB&XJG*QT;_ny)^Q|^ym{fBk#}r?;CH+Qh06`+DI8c$fxCv(!6_i=?5d*SyIP&
F{cnshZSw#C

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/invalid-symbol-table-size.elf b/test/Object/Inputs/invalid-symbol-table-size.elf
new file mode 100755
index 0000000000000000000000000000000000000000..8329abbbd71207ca93602a4e2687a77e221895e9
GIT binary patch
literal 536
zcmb<-^>JfjWMpQ50!9Wq21XbMiJpPPb^x;-7}&6?gec?zQ%n#JGO1URT2aEFmsDKL
zkj9`_TvAk$m;|INbHQ{<Vo4&hYO>jUK%H<;A(>JDWvd{Gftcui2Ju0d4J3&K9FW+^
zOcrDgkSzzqApPiWZ~%&71L*qMfr>bQm|Xp0K)qN(pTPkr4g!V{3hYLZI*?r;AOWEm
E0J6#uH~;_u

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/invalid-xindex-size.elf b/test/Object/Inputs/invalid-xindex-size.elf
new file mode 100644
index 0000000000000000000000000000000000000000..2852b85ca04252818dc7b15cab7a5087cc33d698
GIT binary patch
literal 624
zcmb<-^>JfjWMpQ50!9Wq21XbMiQa(3b^x;-7}((||Nlpl1~a9>B&Ik>m<dGam84db
zFz6)}7c=M;SLT)^CdC(L<fT*;F{I_^1BFV8fI=`S2ECNTl0=X;N`XAk0Jvw746A^$
zO_9VvO!Tk-@j;jkB#8tZkl4sf7Gw^Ptq;T?{pfCR0E%G)==#}#ia3CnT>a`mz2NXi
ov&;d?H-OS0H-glG>;eG^2*tnyq=letWU2tlfw`9zjSmwC0M*qS*Z=?k

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/main-ret-zero-pe-i386.dll b/test/Object/Inputs/main-ret-zero-pe-i386.dll
new file mode 100755
index 0000000000000000000000000000000000000000..d4a343088b1787ea77988cf06ec1de7996b0941d
GIT binary patch
literal 5120
zcmeZ`V!#R<u!^Dzg46`KGBEfsGD0MT7#LW9tOpECirgR}CWtVY6abPS3;_aQHc*}o
zDgdJy6o3LybxaHnjbIL#4<w-CPztJZU?{zk)QS?I)oLK?K@I}>70qD^3=9cC(NQoO
z0;3@?8UmvsFd71*Aut*OqaiRF0;3@?8UmvsFd6~_9s;4APs5tu@N~Nhbo=sjhjQ!#
z_1imLe{}kO=?wkQ`Jwp;Ph9NbGoxUbh5&3F1C$U!QP0evmy%eL2;u?(R3xdm7{Z5%
a#Jl9=_$FrNLDVP!2R}gLKBHjhg#ZBf>zYsi

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/main-ret-zero-pe-i386.exe b/test/Object/Inputs/main-ret-zero-pe-i386.exe
new file mode 100755
index 0000000000000000000000000000000000000000..366767b9b773aca94c13d5e00a63f9fbe6ed7489
GIT binary patch
literal 5120
zcmeZ`V!#R<u!^Dzg46`KGBEfsGD0MT7#LW9tOpECjNBk0CWtVY6abPS3<eGmHc*}o
zDgvV!6o3Lyb<7M7jbIL#4<w-CPztJZU?{zk)QS?I)j%)9{Q`3z%tZ<e3<)syC_Nei
zqaiRF0;3@?8UmvsFd71*Aut*OqaiRF0;3@?8Uh0u0->Ex0}Kxwcl`r$MDr1r*u!T=
z!3YWg*f<6#`awfB%nW)di6x03E)YOPl8TEVe3(dlZenI0REUv50XWbD8uu9mLoWmX
DX6B#~

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/no-section-header-string-table.elf-x86-64 b/test/Object/Inputs/no-section-header-string-table.elf-x86-64
new file mode 100644
index 0000000000000000000000000000000000000000..9024ad9b75739802e8c828d6f0089e0cf98b02e9
GIT binary patch
literal 1024
zcmbVLu}T9$5S>e+21JlFDp)9pT^6H_m0Y3mK+!_PdXuc=^l~e=8;O;HlzxsMU_ilN
zv9Pfh;?CV{++1Sg!0nrP^JZ^nZ|?Evv|jMMB9M7-0G<t8Ec^z3>iSSN7z{qk?}u9@
zq+W3Oz-4Ec!jnk^gSYZ)AfiZaT*)MAZDJl^DFawEIDWbDE5zgL7R>WK%52Ty=N+Et
zDZm~|@HLuB7@|14L&G~wI)%lF6K07O0~KeVZPH_V%jCPT{s#{_3GYK5Rb}HwG<8)A
zrI~^b4`aCNY6TavBhGbOB=v6G)S-+4m=-+^ERij)Ia4~Ru^O;gYQc`rE~`rO=B-JK
zh?7!1bTn@ON;7Pe=brHYKHfEc6W4qS?7i6Bth>Sb-JJfKVml|c9<_OMxjjm%yoh2~
zYbELMQm{|hBMb=V3}P+LJlbjo>FQ2RpZH-8@qTcRS@rJx`*grQq3wsZb0*F*lmCyR
Px@W?ZUs>uAJ!XFZ8^cx*

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/pr25877.lib b/test/Object/Inputs/pr25877.lib
new file mode 100644
index 0000000000000000000000000000000000000000..401cbeb172a90e0c701299f608c5b79113fc175d
GIT binary patch
literal 774
zcmb7BOHRWu5FN*WkYU9M!UY_c=F<g2JpxCF+a^UQZBUy<*|FjR>^T6ZKw`@gSaivX
z6$fC(v5S)~5F<IBd3k<$FFwE2tLe>^h{A3S;zY&CaFmU@xU%EGHDN9QfZ_To;omB$
zQ_qEwuu=?At0(rCql@9M)~e7|bzjxX`BE>6sH|&QPi{d(JJ&7OYG!HT&RSflG(~T5
z<_zXy_IQZy0M5|piRl1jQ*0XB(?%ISyqVe%u5&rljqc&w-aH2NDktl858fLeh_~qq
z9wTepW1`+z-gSAW$oa(eZMhtG;=SQc-d*`UT`z|Ne6}5Z>&Re!dOE?GJ13qNLEHPu
z1H-|`t_Z=u=%pq{+{u`?gdoRDOkOx6A5_oq@=geHytH&zoKapnPn`DlN}OCV2q7@Z
FegKyfRq+4-

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/rel-no-sec-table.elf-x86-64 b/test/Object/Inputs/rel-no-sec-table.elf-x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..d527305d65976ad7afc5e0c569a18d5d04285186
GIT binary patch
literal 2152
zcmcIk&2G~`5T2wdl>Zb2LcpOyAR$p!%?S<&B`ryX2(1w102j-}CaL5kRcuhhg$LjT
zcodF2N**EHd^0oa`CNCKAR)v`$v5-O%+BoWtiQIqoob~*SWEPbjy0kt3?*#R6RNT0
z3SFi%%-8tWRJ)5)<$)`zOO9og=Ru}L0I8mP2`xFnXRA36T-7)r;;I@)BFMU$18}A)
z5Opej3M7q|bi%e|`CnQ6+6kJ@FW_%1;HgJm82E`#p0^*zUYz*jgsEYW?<;wve{vk@
zzv%9g8fSV=>6y01x_ztNPqQp5;!^cHtD8*K8GX2^{5eJFvahT)^}mp-1Uv5dyV|7>
zvTo;KYZviHxj41`SHS%{Nu-x@M#1f!x6~U&!CJ#K8jhc<=H)L_>YPLFrBdZCX+F%?
zMa!=c`i_3xWq#gV-Lw4Zxq7&Of4qRFkK{dnx6^v{VvD@(?(Sx*>+N<rZ`ym_UTd@4
z_Wm?@CJB!cGQCm$XAUD4csj`hlVO~U$&ACu7lq7ll!T@?{KVc-@aS_mjt@pmi{oTK
zrtimntNDY24`kv#hbR6%OTfv%>h=xrVg7M#alFhXo<HUlc%=J_U9Qlb`&-P(T_PUy
z48%OMi(Sqb&`W0VUc~|P4t!vvj^8Nq%N)v*xeyuor<wqW`9WU9mwB<_y{762#Jgt~
zyO0<9YZfH7_|D+{XhE23)Q9|-W!Ya+A|BtRnFhe0UF>po`(D_Bc)Xib<8e+6FYXU|
z!a_MezRz`R2uDTt{MVV6n&AGo@#`Gx_E=IO{~b|E%grCL4ZSV7(S{cEQ^`w=5{v$!
jF9CC?U0p5AlzdtyS=R9=*!L{Wi(l75zZ9Oi;Ku(3s%wKW

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/shndx.elf b/test/Object/Inputs/shndx.elf
new file mode 100644
index 0000000000000000000000000000000000000000..f1e0a3a8df7d797543936e91e2dd266dc4d827d2
GIT binary patch
literal 824
zcmbVKF>V4e5Zr?(geXOzqNGScWl>V0D@a6<KOmm~r3fJ1xgZr^;0bv~o<>Jzz(&gg
z4I`a*cD%FpTDN)s{HSGFT_Q3WN|q88cZ%l+o=fwha=%w)Ct8<F{SCXXS9F>l@zqEa
zp25%V<;vN&cVV^;;)<#gSN$x4FDBL3a=I?1@TJUGE6AaQoVLYHeekO9y|f48>Cse^
zzsQG5BR#u2a?VJNQ6rDZgBHPUPOsHy`Q-w1jzJoTe9NbpCw+jNJ?splSZ9Mh>jeeu
ud9S|<uYD6pa_f9s+efYaN9Z4O0aB#z+Wal(Z5A4AW6t>qz8{h1Pw^j9x+7@-

literal 0
HcmV?d00001

diff --git a/test/Object/Inputs/trivial-object-test.elf-avr b/test/Object/Inputs/trivial-object-test.elf-avr
new file mode 100755
index 0000000000000000000000000000000000000000..e0ff8a3d55692d0699c8bab3318ccd128fb5fac0
GIT binary patch
literal 840
zcma)4F;2rk5L_oA0t#emBm@P5Dphy_(9qF9bvf`!M2h3cz9Liv9zaJ!1r0TC;755u
zn7Q-H!4eW9?d{FY+<LXX$Fs}xBuPZkkt<O)#Nl;E<QaJ=2hx!f=}R~0UN0c`>exVr
zJ?L}fkt~9SZ9pF@Yk%)-zRI9_$i2Nq_GQfXy7O-CWsq5K<GsJu*h%~jJzacVdnfP7
za>bq+B(mKQN#WiHXa5BG5pu(qtJ9#wh5AKHziH_%@Tah*=%;Myixz)r@i*`h=0@=T
z2l6@>J18W#<Ea?qOPkM%$$082Q+Zo@F)qx`7!J=&3Y!@>&05xY(m3j?Ci6vE)qdw@
zWs%q9E(pJE%AyuZy70%sozVWjmOZ%m&-gY7zAK$SIo}g;h@h{F;5_)g)<KQfMbM+5
j?^p@4`k1vg<#$AiL%tUVdq@#mi1m73@O<J-q4j<MWszh=

literal 0
HcmV?d00001

diff --git a/test/Object/X86/nm-ir.ll b/test/Object/X86/nm-ir.ll
index 881397c00a42..d517b09d3e92 100644
--- a/test/Object/X86/nm-ir.ll
+++ b/test/Object/X86/nm-ir.ll
@@ -27,8 +27,8 @@ module asm ".long undef_asm_sym"
 @g3 = common global i32 0
 @g4 = private global i32 42
 
-@a1 = alias i32* @g1
-@a2 = internal alias i32* @g1
+@a1 = alias i32, i32* @g1
+@a2 = internal alias i32, i32* @g1
 
 define void @f1() {
   ret void
diff --git a/test/Object/archive-format.test b/test/Object/archive-format.test
index 40af9a33d8d7..651a9b9ef157 100644
--- a/test/Object/archive-format.test
+++ b/test/Object/archive-format.test
@@ -6,17 +6,26 @@ RUN: cd %t
 
 RUN: echo -n bar. > 0123456789abcde
 RUN: echo -n zed. > 0123456789abcdef
+RUN: mkdir -p foo
+RUN: echo -n bar2 > foo/0123456789abcde
+RUN: echo -n zed2 > foo/0123456789abcdef
 
 RUN: rm -f %t.a
-RUN: llvm-ar --format=gnu rc %t.a 0123456789abcde 0123456789abcdef
+RUN: llvm-ar --format=gnu rc %t.a 0123456789abcde 0123456789abcdef foo/0123456789abcde foo/0123456789abcdef
 RUN: cat %t.a | FileCheck -strict-whitespace %s
 
 CHECK:      !<arch>
-CHECK-NEXT: //                                              18        `
+CHECK-NEXT: //                                              36        `
+CHECK-NEXT: 0123456789abcdef/
 CHECK-NEXT: 0123456789abcdef/
 CHECK-NEXT: 0123456789abcde/0           0     0     644     4         `
-CHECK-NEXT: bar./0              0           0     0     644     4         `
+CHECK-NEXT: bar.
+CHECK-SAME: /0              0           0     0     644     4         `
 CHECK-NEXT: zed.
+CHECK-SAME: 0123456789abcde/0           0     0     644     4         `
+CHECK-NEXT: bar2
+CHECK-SAME: /18             0           0     0     644     4         `
+CHECK-NEXT: zed2
 
 RUN: rm -f %t.a
 RUN: llvm-ar --format=bsd rc %t.a 0123456789abcde 0123456789abcdef
@@ -27,3 +36,29 @@ BSD-NEXT: #1/20           0           0     0     644     24        `
 BSD-NEXT: 0123456789abcde{{.....}}bar.
 BSD-SAME: #1/16           0           0     0     644     20        `
 BSD-NEXT: 0123456789abcdefzed.
+
+RUN: rm -f test.a
+RUN: llvm-ar --format=gnu rcT test.a 0123456789abcde 0123456789abcdef
+RUN: cat test.a | FileCheck -strict-whitespace --check-prefix=THIN %s
+THIN: !<thin>
+THIN-NEXT: //                                              36        `
+THIN-NEXT: 0123456789abcde/
+THIN-NEXT: 0123456789abcdef/{{$}}
+THIN: {{^$}}
+THIN: /0              0           0     0     644     4         `
+THIN-NEXT: /17             0           0     0     644     4         `
+
+RUN: mkdir -p bar
+RUN: rm -f bar/test.a
+RUN: llvm-ar --format=gnu rcT bar/test.a 0123456789abcde 0123456789abcdef foo/0123456789abcde foo/0123456789abcdef
+RUN: cat bar/test.a | FileCheck -strict-whitespace --check-prefix=THIN-PATH %s
+THIN-PATH: !<thin>
+THIN-PATH-NEXT: //                                              90        `
+THIN-PATH-NEXT: ..{{/|\\}}0123456789abcde/
+THIN-PATH-NEXT: ..{{/|\\}}0123456789abcdef/
+THIN-PATH-NEXT: ..{{/|\\}}foo{{/|\\}}0123456789abcde/
+THIN-PATH-NEXT: ..{{/|\\}}foo{{/|\\}}0123456789abcdef/
+THIN-PATH-NEXT: /0              0           0     0     644     4         `
+THIN-PATH-NEXT: /20             0           0     0     644     4         `
+THIN-PATH-NEXT: /41             0           0     0     644     4         `
+THIN-PATH-NEXT: /65             0           0     0     644     4         `
diff --git a/test/Object/archive-symtab.test b/test/Object/archive-symtab.test
index 6e4c76fb3768..6bad783a8c82 100644
--- a/test/Object/archive-symtab.test
+++ b/test/Object/archive-symtab.test
@@ -8,6 +8,17 @@ CHECK-NEXT: foo in trivial-object-test2.elf-x86-64
 CHECK-NEXT: main in trivial-object-test2.elf-x86-64
 CHECK-NOT: bar
 
+
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rcT %t.a %p/Inputs/trivial-object-test.elf-x86-64 %p/Inputs/trivial-object-test2.elf-x86-64
+RUN: llvm-nm -M %t.a | FileCheck --check-prefix=THIN %s
+
+THIN: Archive map
+THIN-NEXT: main in {{.*}}/Inputs/trivial-object-test.elf-x86-64
+THIN-NEXT: foo in {{.*}}/Inputs/trivial-object-test2.elf-x86-64
+THIN-NEXT: main in {{.*}}/Inputs/trivial-object-test2.elf-x86-64
+
+
 CHECK: trivial-object-test.elf-x86-64:
 CHECK-NEXT:                  U SomeOtherFunction
 CHECK-NEXT: 0000000000000000 T main
@@ -75,7 +86,7 @@ MACHO: Archive map
 MACHO-NEXT: _main in trivial-object-test.macho-x86-64
 MACHO-NEXT: _foo in trivial-object-test2.macho-x86-64
 MACHO-NEXT: _main in trivial-object-test2.macho-x86-64
-MACHO-NOT: bar
+MACHO-NOT: {{^}}bar
 
 MACHO: trivial-object-test.macho-x86-64
 MACHO-NEXT: 0000000000000028 s L_.str
@@ -88,6 +99,16 @@ MACHO-NEXT: 0000000000000000 t _bar
 MACHO-NEXT: 0000000000000001 T _foo
 MACHO-NEXT: 0000000000000002 T _main
 
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rcsU %t.a %p/Inputs/coff-short-import-code %p/Inputs/coff-short-import-data
+RUN: llvm-nm -M %t.a | FileCheck --check-prefix=COFF-SHORT-IMPORT %s
+
+COFF-SHORT-IMPORT:      Archive map
+COFF-SHORT-IMPORT-NEXT: __imp__foo in coff-short-import-code
+COFF-SHORT-IMPORT-NEXT: _foo in coff-short-import-code
+COFF-SHORT-IMPORT-NEXT: __imp__bar in coff-short-import-data
+COFF-SHORT-IMPORT-NOT:  _bar in coff-short-import-data
+
 Test that we pad the symbol table so that it ends in a multiple of 4 bytes:
 8 + 60 + 36 == 104
 RUN: rm -f %t.a
diff --git a/test/Object/archive-update.test b/test/Object/archive-update.test
index 147db90ba951..fd1ea4113c39 100644
--- a/test/Object/archive-update.test
+++ b/test/Object/archive-update.test
@@ -7,12 +7,13 @@ Create a file named evenlen that is newer than the evenlen on the source dir.
 RUN: mkdir -p %t.older
 RUN: echo older > %t.older/evenlen
 
+RUN: mkdir -p %t.newer
+
 Either the shell supports the 'touch' command with a flag to manually set the
 mtime or we sleep for over two seconds so that the mtime is definitely
 observable.
 RUN: touch -m -t 200001010000 %t.older/evenlen || sleep 2.1
 
-RUN: mkdir -p %t.newer
 RUN: echo newer > %t.newer/evenlen
 RUN: touch %t.newer/evenlen
 
@@ -34,3 +35,17 @@ RUN: llvm-ar p %t.a | FileCheck --check-prefix=NEWER %s
 
 NEWER: newer
 OLDER: older
+
+RUN: rm -f %t.a
+RUN: echo foo > foo
+RUN: echo bar > bar
+RUN: llvm-ar --format=gnu rcT %t.a foo
+RUN: llvm-ar --format=gnu rcT %t.a bar
+RUN: llvm-ar t %t.a | FileCheck --check-prefix=BOTH-FILES %s
+BOTH-FILES: foo
+BOTH-FILES: bar
+
+RUN: rm -f %t.a
+RUN: llvm-ar --format=gnu rc %t.a foo
+RUN: not llvm-ar --format=gnu rcT %t.a bar 2>&1 | FileCheck --check-prefix=ERROR %s
+ERROR: Cannot convert a regular archive to a thin one.
diff --git a/test/Object/corrupt.test b/test/Object/corrupt.test
index ef72a0979b52..0d9aad378f88 100644
--- a/test/Object/corrupt.test
+++ b/test/Object/corrupt.test
@@ -2,23 +2,71 @@
 RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections \
 RUN:     2>&1 | FileCheck --check-prefix=SECNAME %s
 
+SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+
 // Section data offset past end of file.
 RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -sections -section-data \
 RUN:     2>&1 | FileCheck --check-prefix=SECDATA %s
 
+SECDATA: Error reading file: Invalid data was encountered while parsing the file.
+
+
 // Symbol name offset overflows string table.
 RUN: not llvm-readobj %p/Inputs/corrupt.elf-x86-64 -symbols \
 RUN:     2>&1 | FileCheck --check-prefix=SYMNAME %s
 
+SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+
+
 // Version index in .gnu.version overflows the version map.
 RUN: not llvm-readobj %p/Inputs/corrupt-version.elf-x86_64 -dt \
 RUN:     2>&1 | FileCheck --check-prefix=VER %s
 
-SECNAME: Error reading file: Invalid data was encountered while parsing the file.
+VER: Error reading file: Invalid version entry.
 
-SECDATA: Error reading file: Invalid data was encountered while parsing the file.
-SECDATA: Error reading file: Invalid data was encountered while parsing the file.
 
-SYMNAME: Error reading file: Invalid data was encountered while parsing the file.
+// The file is missing the dynamic string table but has references to it.
+RUN: not llvm-readobj -dynamic-table %p/Inputs/corrupt-invalid-strtab.elf.x86-64 \
+RUN:     2>&1 | FileCheck --check-prefix=STRTAB %s
 
-VER: Error reading file: Invalid data was encountered while parsing the file.
+STRTAB: Invalid dynamic string table reference
+
+RUN: not llvm-readobj -program-headers \
+RUN:   %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=PHENTSIZE %s
+
+PHENTSIZE: Invalid program header size
+
+RUN: not llvm-readobj -dynamic-table \
+RUN:   %p/Inputs/corrupt-invalid-virtual-addr.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=VIRTADDR %s
+
+VIRTADDR: Virtual address is not in any segment
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN:   %p/Inputs/corrupt-invalid-relocation-size.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=RELOC %s
+
+RELOC:  Invalid relocation entry size
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN:   %p/Inputs/corrupt-invalid-dynamic-table-size.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=DYN-TABLE-SIZE %s
+
+DYN-TABLE-SIZE:  Invalid dynamic table size
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN:   %p/Inputs/corrupt-invalid-dynamic-table-offset.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=DYN-TABLE-OFFSET %s
+
+DYN-TABLE-OFFSET: Invalid data was encountered while parsing the file.
+
+
+RUN: not llvm-readobj -dyn-relocations \
+RUN:   %p/Inputs/corrupt-invalid-dynamic-table-too-large.elf.x86-64 2>&1 | \
+RUN:   FileCheck --check-prefix=DYN-TABLE-TOO-LARGE %s
+
+DYN-TABLE-TOO-LARGE: Invalid data was encountered while parsing the file.
diff --git a/test/Object/invalid.test b/test/Object/invalid.test
index 1d5a70b3487d..cc5cd68304c6 100644
--- a/test/Object/invalid.test
+++ b/test/Object/invalid.test
@@ -44,3 +44,11 @@ INVALID-SECTION-INDEX: Invalid section index
 
 RUN: not llvm-readobj -s %p/Inputs/invalid-section-size.elf 2>&1 | FileCheck --check-prefix=INVALID-SECTION-SIZE %s
 INVALID-SECTION-SIZE: Invalid section header entry size (e_shentsize) in ELF header
+
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-symbol-table-size.elf 2>&1 | FileCheck --check-prefix=INVALID-SYMTAB-SIZE %s
+INVALID-SYMTAB-SIZE: Invalid symbol table size
+
+
+RUN: not llvm-readobj -t %p/Inputs/invalid-xindex-size.elf 2>&1 | FileCheck --check-prefix=INVALID-XINDEX-SIZE %s
+INVALID-XINDEX-SIZE: Invalid data was encountered while parsing the file.
diff --git a/test/Object/nm-archive.test b/test/Object/nm-archive.test
index 4cd58d33cf28..8f8cdbe9f08c 100644
--- a/test/Object/nm-archive.test
+++ b/test/Object/nm-archive.test
@@ -58,3 +58,12 @@ GNU AR is able to parse the unaligned member and warns about the member with
 the unknown format. We should probably simply warn on both. For now just check
 that we don't produce an error.
 RUN: llvm-nm %p/Inputs/corrupt-archive.a
+
+
+RUN: llvm-nm %p/Inputs/thin.a | FileCheck %s -check-prefix THIN
+
+THIN: IsNAN.o:
+THIN: 00000014 T _ZN4llvm5IsNANEd
+THIN: 00000000 T _ZN4llvm5IsNANEf
+THIN:          U __isnan
+THIN:          U __isnanf
diff --git a/test/Object/nm-pe-image.test b/test/Object/nm-pe-image.test
new file mode 100644
index 000000000000..d1ca5845ef61
--- /dev/null
+++ b/test/Object/nm-pe-image.test
@@ -0,0 +1,31 @@
+The executable was generated like so:
+$ cat t.c
+int main() { return 0; }
+$ clang --target=i686-windows -c t.c -o t.o
+$ lld-link t.o -out:t.exe -entry:main -debug
+
+It has a mingw-style symbol table in the executable, which MSVC-produced images
+don't have.
+
+RUN: llvm-nm %p/Inputs/main-ret-zero-pe-i386.exe \
+RUN:         | FileCheck %s -check-prefix PE-EXE
+
+PE-EXE: 00401000 t .bss
+PE-EXE: 00401000 t .data
+PE-EXE: 00401000 t .text
+PE-EXE: 00401000 T _main
+
+The DLL was generated like so:
+$ cat t.c
+int DllMain(void *mod, long reason, void *reserved) { return 1; }
+$ clang --target=i686-windows -c t.c -o t.o
+$ lld-link t.o -out:t.exe -entry:DllMain -debug -dll
+
+
+RUN: llvm-nm %p/Inputs/main-ret-zero-pe-i386.dll \
+RUN:         | FileCheck %s -check-prefix PE-DLL
+
+PE-DLL: 10001000 t .bss
+PE-DLL: 10001000 t .data
+PE-DLL: 10001000 t .text
+PE-DLL: 10001000 T _DllMain
diff --git a/test/Object/no-section-header-string-table.test b/test/Object/no-section-header-string-table.test
new file mode 100644
index 000000000000..1f449328cabc
--- /dev/null
+++ b/test/Object/no-section-header-string-table.test
@@ -0,0 +1,10 @@
+RUN: llvm-readobj %p/Inputs/no-section-header-string-table.elf-x86-64 --sections \
+RUN:   | FileCheck %s
+
+CHECK:    Type: SHT_PROGBITS (0x1)
+CHECK:    Type: SHT_PROGBITS (0x1)
+CHECK:    Type: SHT_PROGBITS (0x1)
+CHECK:    Type: SHT_RELA (0x4)
+CHECK:    Type: SHT_SYMTAB (0x2)
+CHECK:    Type: SHT_STRTAB (0x3)
+CHECK:    Type: SHT_STRTAB (0x3)
diff --git a/test/Object/obj2yaml.test b/test/Object/obj2yaml.test
index 8054b23eb560..60d46a83a52e 100644
--- a/test/Object/obj2yaml.test
+++ b/test/Object/obj2yaml.test
@@ -3,6 +3,7 @@ RUN: obj2yaml %p/Inputs/trivial-object-test.coff-x86-64 | FileCheck %s --check-p
 RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mipsel | FileCheck %s --check-prefix ELF-MIPSEL
 RUN: obj2yaml %p/Inputs/trivial-object-test.elf-mips64el | FileCheck %s --check-prefix ELF-MIPS64EL
 RUN: obj2yaml %p/Inputs/trivial-object-test.elf-x86-64 | FileCheck %s --check-prefix ELF-X86-64
+RUN: obj2yaml %p/Inputs/trivial-object-test.elf-avr | FileCheck %s --check-prefix ELF-AVR
 RUN: obj2yaml %p/Inputs/unwind-section.elf-x86-64 \
 RUN:   | FileCheck %s --check-prefix ELF-X86-64-UNWIND
 
@@ -409,8 +410,72 @@ ELF-X86-64-NEXT:       Size:            0x0000000000000026
 ELF-X86-64-NEXT:     - Name:            SomeOtherFunction
 ELF-X86-64-NEXT:     - Name:            puts
 
+
+ELF-AVR:      FileHeader:      
+ELF-AVR-NEXT:   Class:           ELFCLASS32
+ELF-AVR-NEXT:   Data:            ELFDATA2LSB
+ELF-AVR-NEXT:   Type:            ET_EXEC
+ELF-AVR-NEXT:   Machine:         EM_AVR
+ELF-AVR-NEXT:   Flags:           [ EF_AVR_ARCH_AVR2 ]
+ELF-AVR-NEXT: Sections:        
+ELF-AVR-NEXT:   - Name:            .text
+ELF-AVR-NEXT:     Type:            SHT_PROGBITS
+ELF-AVR-NEXT:     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+ELF-AVR-NEXT:     AddressAlign:    0x0000000000000002
+ELF-AVR-NEXT:     Content:         C20E0895
+ELF-AVR-NEXT:   - Name:            .data
+ELF-AVR-NEXT:     Type:            SHT_PROGBITS
+ELF-AVR-NEXT:     Flags:           [ SHF_WRITE, SHF_ALLOC ]
+ELF-AVR-NEXT:     Address:         0x0000000000800060
+ELF-AVR-NEXT:     AddressAlign:    0x0000000000000001
+ELF-AVR-NEXT:     Content:         ''
+ELF-AVR-NEXT: Symbols:         
+ELF-AVR-NEXT:   Local:           
+ELF-AVR-NEXT:     - Type:            STT_SECTION
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Type:            STT_SECTION
+ELF-AVR-NEXT:       Section:         .data
+ELF-AVR-NEXT:       Value:           0x0000000000800060
+ELF-AVR-NEXT:     - Name:            a.o
+ELF-AVR-NEXT:       Type:            STT_FILE
+ELF-AVR-NEXT:     - Name:            main
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:   Global:          
+ELF-AVR-NEXT:     - Name:            __trampolines_start
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            _etext
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:       Value:           0x0000000000000004
+ELF-AVR-NEXT:     - Name:            __data_load_end
+ELF-AVR-NEXT:       Value:           0x0000000000000004
+ELF-AVR-NEXT:     - Name:            __trampolines_end
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            __data_load_start
+ELF-AVR-NEXT:       Value:           0x0000000000000004
+ELF-AVR-NEXT:     - Name:            __dtors_end
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            __eeprom_end
+ELF-AVR-NEXT:       Section:         .data
+ELF-AVR-NEXT:       Value:           0x0000000000810000
+ELF-AVR-NEXT:     - Name:            __ctors_start
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            __dtors_start
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            __ctors_end
+ELF-AVR-NEXT:       Section:         .text
+ELF-AVR-NEXT:     - Name:            _edata
+ELF-AVR-NEXT:       Section:         .data
+ELF-AVR-NEXT:       Value:           0x0000000000800060
+ELF-AVR-NEXT:     - Name:            _end
+ELF-AVR-NEXT:       Section:         .data
+ELF-AVR-NEXT:       Value:           0x0000000000800060
+
+
 ELF-X86-64-UNWIND:      - Name:            .eh_frame
 ELF-X86-64-UNWIND-NEXT:   Type:            SHT_X86_64_UNWIND
 ELF-X86-64-UNWIND-NEXT:   Flags:           [ SHF_ALLOC ]
 ELF-X86-64-UNWIND-NEXT:   AddressAlign:    0x0000000000000001
 ELF-X86-64-UNWIND-NEXT:   Content:         ''
+
+RUN: not obj2yaml %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: Error: '{{[Nn]}}o such file or directory'
diff --git a/test/Object/objdump-shndx.test b/test/Object/objdump-shndx.test
new file mode 100644
index 000000000000..dcdb3c2f0931
--- /dev/null
+++ b/test/Object/objdump-shndx.test
@@ -0,0 +1,8 @@
+RUN: llvm-objdump -r %p/Inputs/shndx.elf | FileCheck %s
+
+Test that llvm-objdump can handle shndx. The relocation points to a section
+symbol that has st_shndx == SHN_XINDEX. To print the section name llvm-objdump
+has to use the shndx section.
+
+CHECK:      RELOCATION RECORDS FOR [.rela.text]:
+CHECK-NEXT: 0000000000000000 R_X86_64_32 bar+0
diff --git a/test/Object/pr25877.test b/test/Object/pr25877.test
new file mode 100644
index 000000000000..c323d6259fa4
--- /dev/null
+++ b/test/Object/pr25877.test
@@ -0,0 +1,9 @@
+; RUN: echo create %t.a > %t.mri
+; RUN: echo addlib %p/Inputs/pr25877.lib >> %t.mri
+; RUN: echo save >> %t.mri
+; RUN: echo end >> %t.mri
+
+; RUN: llvm-ar -M  < %t.mri
+; RUN: llvm-ar t %t.a | FileCheck %s
+
+; CHECK: foo.obj
diff --git a/test/Object/readobj-absent.test b/test/Object/readobj-absent.test
new file mode 100644
index 000000000000..0968f3c31022
--- /dev/null
+++ b/test/Object/readobj-absent.test
@@ -0,0 +1,2 @@
+// Don't crash if required information is absent
+RUN: llvm-readobj -dyn-symbols %p/Inputs/trivial-object-test.elf-x86-64
diff --git a/test/Object/readobj-shared-object.test b/test/Object/readobj-shared-object.test
index 508caca9717d..173581e60c39 100644
--- a/test/Object/readobj-shared-object.test
+++ b/test/Object/readobj-shared-object.test
@@ -300,18 +300,32 @@ ELF:     Section: Absolute (0xFFF1)
 ELF:   }
 ELF: ]
 
-ELF: DynamicSection [ (9 entries)
-ELF:   Tag        Type                 Name/Value
-ELF:   00000001 NEEDED               SharedLibrary (libc.so.6)
-ELF:   00000001 NEEDED               SharedLibrary (libm.so.6)
-ELF:   0000000E SONAME               LibrarySoname (libfoo.so)
-ELF:   00000004 HASH                 {{[0-9a-f]+}}
-ELF:   00000005 STRTAB               {{[0-9a-f]+}}
-ELF:   00000006 SYMTAB               {{[0-9a-f]+}}
-ELF:   0000000A STRSZ                {{[0-9]+}} (bytes)
-ELF:   0000000B SYMENT               {{[0-9]+}} (bytes)
-ELF:   00000000 NULL                 0x0
-ELF: ]
+ELF32: DynamicSection [ (9 entries)
+ELF32:   Tag        Type                 Name/Value
+ELF32:   0x00000001 NEEDED               SharedLibrary (libc.so.6)
+ELF32:   0x00000001 NEEDED               SharedLibrary (libm.so.6)
+ELF32:   0x0000000E SONAME               LibrarySoname (libfoo.so)
+ELF32:   0x00000004 HASH                 {{[0-9a-f]+}}
+ELF32:   0x00000005 STRTAB               {{[0-9a-f]+}}
+ELF32:   0x00000006 SYMTAB               {{[0-9a-f]+}}
+ELF32:   0x0000000A STRSZ                {{[0-9]+}} (bytes)
+ELF32:   0x0000000B SYMENT               {{[0-9]+}} (bytes)
+ELF32:   0x00000000 NULL                 0x0
+ELF32: ]
+
+ELF64: DynamicSection [ (9 entries)
+ELF64:   Tag        Type                 Name/Value
+ELF64:   0x0000000000000001 NEEDED               SharedLibrary (libc.so.6)
+ELF64:   0x0000000000000001 NEEDED               SharedLibrary (libm.so.6)
+ELF64:   0x000000000000000E SONAME               LibrarySoname (libfoo.so)
+ELF64:   0x0000000000000004 HASH                 {{[0-9a-f]+}}
+ELF64:   0x0000000000000005 STRTAB               {{[0-9a-f]+}}
+ELF64:   0x0000000000000006 SYMTAB               {{[0-9a-f]+}}
+ELF64:   0x000000000000000A STRSZ                {{[0-9]+}} (bytes)
+ELF64:   0x000000000000000B SYMENT               {{[0-9]+}} (bytes)
+ELF64:   0x0000000000000000 NULL                 0x0
+ELF64: ]
+
 
 ELF:      NeededLibraries [
 ELF-NEXT:  libc.so.6
diff --git a/test/Object/relocation-executable.test b/test/Object/relocation-executable.test
index 38ad5968af8d..93d4dee3089b 100644
--- a/test/Object/relocation-executable.test
+++ b/test/Object/relocation-executable.test
@@ -35,3 +35,15 @@ RUN:   %p/Inputs/hello-world.elf-x86-64 | FileCheck %s --check-prefix=DYN
 // DYN-NEXT:     Addend: 0x0
 // DYN-NEXT:   }
 // DYN-NEXT: }
+
+RUN: llvm-readobj -dyn-relocations -expand-relocs \
+RUN:   %p/Inputs/rel-no-sec-table.elf-x86-64 | FileCheck %s --check-prefix=DYN2
+
+// DYN2:      Dynamic Relocations {
+// DYN2-NEXT:   Relocation {
+// DYN2-NEXT:     Offset: 0x12F0
+// DYN2-NEXT:     Type: R_X86_64_GLOB_DAT (6)
+// DYN2-NEXT:     Symbol: g
+// DYN2-NEXT:     Addend: 0x0
+// DYN2-NEXT:   }
+// DYN2-NEXT: }
diff --git a/test/Other/2010-05-06-Printer.ll b/test/Other/2010-05-06-Printer.ll
index 1cbe78dab7de..e57b9825b334 100644
--- a/test/Other/2010-05-06-Printer.ll
+++ b/test/Other/2010-05-06-Printer.ll
@@ -1,4 +1,5 @@
 ; RUN: llc -O2 -print-after-all < %s 2>/dev/null
+; REQUIRES: default_triple
 
 define void @tester(){
   ret void
diff --git a/test/Other/extract-alias.ll b/test/Other/extract-alias.ll
index dbc650ec6903..ac7776dd7f39 100644
--- a/test/Other/extract-alias.ll
+++ b/test/Other/extract-alias.ll
@@ -12,10 +12,10 @@
 ; CHECK:      declare void @a0bar()
 
 ; DELETE:      @zed = global i32 0
-; DELETE:      @zeda0 = alias i32* @zed
-; DELETE-NEXT: @a0foo = alias i32* ()* @foo
-; DELETE-NEXT: @a0a0bar = alias void ()* @bar
-; DELETE-NEXT: @a0bar = alias void ()* @bar
+; DELETE:      @zeda0 = alias i32, i32* @zed
+; DELETE-NEXT: @a0foo = alias i32* (), i32* ()* @foo
+; DELETE-NEXT: @a0a0bar = alias void (), void ()* @bar
+; DELETE-NEXT: @a0bar = alias void (), void ()* @bar
 ; DELETE:      declare i32* @foo()
 ; DELETE:      define void @bar() {
 ; DELETE-NEXT:  %c = call i32* @foo()
@@ -23,25 +23,25 @@
 ; DELETE-NEXT: }
 
 ; ALIAS: @zed = external global i32
-; ALIAS: @zeda0 = alias i32* @zed
+; ALIAS: @zeda0 = alias i32, i32* @zed
 
-; ALIASRE: @a0a0bar = alias void ()* @bar
-; ALIASRE: @a0bar = alias void ()* @bar
+; ALIASRE: @a0a0bar = alias void (), void ()* @bar
+; ALIASRE: @a0bar = alias void (), void ()* @bar
 ; ALIASRE: declare void @bar()
 
 @zed = global i32 0
-@zeda0 = alias i32* @zed
+@zeda0 = alias i32, i32* @zed
 
-@a0foo = alias i32* ()* @foo
+@a0foo = alias i32* (), i32* ()* @foo
 
 define i32* @foo() {
   call void @a0bar()
   ret i32* @zeda0
 }
 
-@a0a0bar = alias void ()* @bar
+@a0a0bar = alias void (), void ()* @bar
 
-@a0bar = alias void ()* @bar
+@a0bar = alias void (), void ()* @bar
 
 define void @bar() {
   %c = call i32* @foo()
diff --git a/test/Other/llvm-nm-without-aliases.ll b/test/Other/llvm-nm-without-aliases.ll
index 6ef72c742328..4df1a751a04f 100644
--- a/test/Other/llvm-nm-without-aliases.ll
+++ b/test/Other/llvm-nm-without-aliases.ll
@@ -12,13 +12,13 @@
 ; WITH: T bar
 ; WITH: T foo
 
-@a0foo = alias void ()* @foo
+@a0foo = alias void (), void ()* @foo
 
 define void @foo() {
   ret void
 }
 
-@a0bar = alias void ()* @bar
+@a0bar = alias void (), void ()* @bar
 
 define void @bar() {
   ret void
diff --git a/test/Other/opt-twice.ll b/test/Other/opt-twice.ll
new file mode 100644
index 000000000000..6bff52e34e35
--- /dev/null
+++ b/test/Other/opt-twice.ll
@@ -0,0 +1,14 @@
+; The pass here doesn't matter (we use deadargelim), but test
+; that the -run-twice options exists, generates output, and
+; doesn't crash
+; RUN: opt -run-twice -deadargelim -S < %s | FileCheck %s
+
+; CHECK: define internal void @test
+define internal {} @test() {
+  ret {} undef
+}
+
+define void @caller() {
+  call {} @test()
+  ret void
+}
diff --git a/test/SymbolRewriter/rewrite.ll b/test/SymbolRewriter/rewrite.ll
index e8a0db6d606c..1d2365cb1d8f 100644
--- a/test/SymbolRewriter/rewrite.ll
+++ b/test/SymbolRewriter/rewrite.ll
@@ -20,7 +20,7 @@ define i32 @caller() {
 }
 
 %struct.S = type { i8 }
-@_ZN1SC1Ev = alias void (%struct.S*)* @_ZN1SC2Ev
+@_ZN1SC1Ev = alias void (%struct.S*), void (%struct.S*)* @_ZN1SC2Ev
 define void @_ZN1SC2Ev(%struct.S* %this) unnamed_addr align 2 {
 entry:
   %this.addr = alloca %struct.S*, align 4
diff --git a/test/TableGen/cast-list-initializer.td b/test/TableGen/cast-list-initializer.td
new file mode 100644
index 000000000000..4c83773a5a79
--- /dev/null
+++ b/test/TableGen/cast-list-initializer.td
@@ -0,0 +1,10 @@
+// RUN: llvm-tblgen %s | FileCheck %s
+
+class Foo<bits<8> b> {
+// CHECK: list<int> ListOfInts = [170];
+// CHECK: list<int> AnotherList = [170, 7];
+  list<int> ListOfInts = [!cast<int>(b)];
+  list<int> AnotherList = [!cast<int>(b), !cast<int>({1, 1, 1})];
+}
+
+def : Foo<{1, 0, 1, 0, 1, 0, 1, 0}>;
diff --git a/test/TableGen/intrinsic-varargs.td b/test/TableGen/intrinsic-varargs.td
index 380d79d4a22a..42ce8a959625 100644
--- a/test/TableGen/intrinsic-varargs.td
+++ b/test/TableGen/intrinsic-varargs.td
@@ -23,8 +23,8 @@ class Intrinsic<string name, list<LLVMType> param_types = []> {
 }
 
 // isVoid needs to match the definition in ValueTypes.td
-def isVoid : ValueType<0, 57>;   // Produces no value
+def isVoid : ValueType<0, 66>;   // Produces no value
 def llvm_vararg_ty : LLVMType<isVoid>;   // this means vararg here
 
-// CHECK: /* 0 */ 0, 28, 0,
+// CHECK: /* 0 */ 0, 29, 0,
 def int_foo : Intrinsic<"llvm.foo", [llvm_vararg_ty]>;
diff --git a/test/TableGen/trydecode-emission.td b/test/TableGen/trydecode-emission.td
new file mode 100644
index 000000000000..91c0e123857b
--- /dev/null
+++ b/test/TableGen/trydecode-emission.td
@@ -0,0 +1,43 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+// Check that if decoding of an instruction fails and the instruction does not
+// have a complete decoder method that can determine if the bitpattern is valid
+// or not then the decoder tries to find a more general instruction that
+// matches the bitpattern too.
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+  let Inst = {0,0,0,0,?,?,?,?};
+  let AsmString = "InstA";
+}
+
+def InstB : TestInstruction {
+  let Inst = {0,0,0,0,0,0,?,?};
+  let AsmString = "InstB";
+  let DecoderMethod = "DecodeInstB";
+  let hasCompleteDecoder = 0;
+}
+
+// CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21
+// CHECK-NEXT: /* 7 */       MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18
+// CHECK-NEXT: /* 13 */      MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 18
+// CHECK-NEXT: /* 18 */      MCD::OPC_Decode, 23, 1, // Opcode: InstA
+// CHECK-NEXT: /* 21 */      MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/TableGen/trydecode-emission2.td b/test/TableGen/trydecode-emission2.td
new file mode 100644
index 000000000000..56ca6d33c241
--- /dev/null
+++ b/test/TableGen/trydecode-emission2.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+  let Inst = {0,0,0,0,0,0,?,?};
+  let AsmString = "InstA";
+  let DecoderMethod = "DecodeInstA";
+  let hasCompleteDecoder = 0;
+}
+
+def InstB : TestInstruction {
+  let Inst = {0,0,0,?,?,0,1,1};
+  let AsmString = "InstB";
+  let DecoderMethod = "DecodeInstB";
+  let hasCompleteDecoder = 0;
+}
+
+// CHECK:      /* 0 */       MCD::OPC_ExtractField, 2, 1,  // Inst{2} ...
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 29, 0, // Skip to: 36
+// CHECK-NEXT: /* 7 */       MCD::OPC_ExtractField, 5, 3,  // Inst{7-5} ...
+// CHECK-NEXT: /* 10 */      MCD::OPC_FilterValue, 0, 22, 0, // Skip to: 36
+// CHECK-NEXT: /* 14 */      MCD::OPC_CheckField, 0, 2, 3, 5, 0, // Skip to: 25
+// CHECK-NEXT: /* 20 */      MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 25
+// CHECK-NEXT: /* 25 */      MCD::OPC_CheckField, 3, 2, 0, 5, 0, // Skip to: 36
+// CHECK-NEXT: /* 31 */      MCD::OPC_TryDecode, 23, 1, 0, 0, // Opcode: InstA, skip to: 36
+// CHECK-NEXT: /* 36 */      MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstB(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
+// CHECK: if (DecodeInstA(MI, insn, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/TableGen/trydecode-emission3.td b/test/TableGen/trydecode-emission3.td
new file mode 100644
index 000000000000..ad21eefa897a
--- /dev/null
+++ b/test/TableGen/trydecode-emission3.td
@@ -0,0 +1,44 @@
+// RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def archInstrInfo : InstrInfo { }
+
+def arch : Target {
+  let InstructionSet = archInstrInfo;
+}
+
+class TestInstruction : Instruction {
+  let Size = 1;
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  field bits<8> Inst;
+  field bits<8> SoftFail = 0;
+}
+
+def InstA : TestInstruction {
+  let Inst = {0,0,0,0,?,?,?,?};
+  let AsmString = "InstA";
+}
+
+def InstBOp : Operand<i32> {
+  let DecoderMethod = "DecodeInstBOp";
+  let hasCompleteDecoder = 0;
+}
+
+def InstB : TestInstruction {
+  bits<2> op;
+  let Inst{7-2} = {0,0,0,0,0,0};
+  let Inst{1-0} = op;
+  let OutOperandList = (outs InstBOp:$op);
+  let AsmString = "InstB";
+}
+
+// CHECK:      /* 0 */       MCD::OPC_ExtractField, 4, 4,  // Inst{7-4} ...
+// CHECK-NEXT: /* 3 */       MCD::OPC_FilterValue, 0, 14, 0, // Skip to: 21
+// CHECK-NEXT: /* 7 */       MCD::OPC_CheckField, 2, 2, 0, 5, 0, // Skip to: 18
+// CHECK-NEXT: /* 13 */      MCD::OPC_TryDecode, 24, 0, 0, 0, // Opcode: InstB, skip to: 18
+// CHECK-NEXT: /* 18 */      MCD::OPC_Decode, 23, 1, // Opcode: InstA
+// CHECK-NEXT: /* 21 */      MCD::OPC_Fail,
+
+// CHECK: if (DecodeInstBOp(MI, tmp, Address, Decoder) == MCDisassembler::Fail) { DecodeComplete = false; return MCDisassembler::Fail; }
diff --git a/test/Transforms/ADCE/basictest.ll b/test/Transforms/ADCE/basictest.ll
index 378d70288f3f..aaacc1842253 100644
--- a/test/Transforms/ADCE/basictest.ll
+++ b/test/Transforms/ADCE/basictest.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -adce -simplifycfg | llvm-dis
+; RUN: opt < %s -passes=adce | llvm-dis
 
 define i32 @Test(i32 %A, i32 %B) {
 BB1:
@@ -15,5 +16,3 @@ BB4:            ; preds = %BB1
         %X = phi i32 [ %A, %BB1 ]               ; <i32> [#uses=1]
         br label %BB3
 }
-
-
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index cabf707fe5d6..0588562c7377 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -11,7 +11,7 @@
 ;         if (i < 10) x = i;
 ;       }
 
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
 entry:
   %i.addr = alloca i32, align 4
   %x = alloca i32, align 4
@@ -37,15 +37,19 @@ if.end:                                           ; preds = %if.then, %entry
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "basic.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "basic.c", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -55,7 +59,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
 !12 = !DILocation(line: 4, scope: !4)
 
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
 ; CHECK: ![[BLOCK:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
 ; CHECK: ![[THEN]] = !DILocation(line: 3, scope: ![[BLOCKFILE:[0-9]+]])
 ; CHECK: ![[BLOCKFILE]] = !DILexicalBlockFile(scope: ![[BLOCK]],{{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/call.ll b/test/Transforms/AddDiscriminators/call.ll
new file mode 100644
index 000000000000..b123b25f2af2
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/call.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for calls that are defined in one line:
+; #1 void bar();
+; #2
+; #3 void foo() {
+; #4  bar();bar()/*discriminator 1*/;bar()/*discriminator 2*/;
+; #5 }
+
+; Function Attrs: uwtable
+define void @_Z3foov() #0 {
+  call void @_Z3barv(), !dbg !10
+; CHECK:  call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
+  call void @_Z3barv(), !dbg !11
+; CHECK:  call void @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
+  call void @_Z3barv(), !dbg !12
+; CHECK:  call void @_Z3barv(), !dbg ![[CALL2:[0-9]+]]
+  ret void, !dbg !13
+}
+
+declare void @_Z3barv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "c.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)"}
+!10 = !DILocation(line: 4, column: 3, scope: !4)
+!11 = !DILocation(line: 4, column: 9, scope: !4)
+!12 = !DILocation(line: 4, column: 15, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
+
+; CHECK: ![[CALL1]] = !DILocation(line: 4, column: 9, scope: ![[CALL1BLOCK:[0-9]+]])
+; CHECK: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 15, scope: ![[CALL2BLOCK:[0-9]+]])
+; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
diff --git a/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
new file mode 100644
index 000000000000..5e90d32a62eb
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -add-discriminators < %s | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; This checks whether the add-discriminators pass producess valid metadata on
+; llvm.dbg.declare instructions
+;
+; CHECK-LABEL: @test_valid_metadata
+define void @test_valid_metadata() {
+  %a = alloca i8
+  call void @llvm.dbg.declare(metadata i8* %a, metadata !2, metadata !5), !dbg !6
+  %b = alloca i8
+  call void @llvm.dbg.declare(metadata i8* %b, metadata !9, metadata !5), !dbg !11
+  ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !DILocalVariable(scope: !3)
+!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!4 = !DIFile(filename: "a.cpp", directory: "/tmp")
+!5 = !DIExpression()
+!6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
+!7 = distinct !DILocation(line: 0, scope: !8)
+!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!9 = !DILocalVariable(scope: !10)
+!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!11 = !DILocation(line: 0, scope: !10)
diff --git a/test/Transforms/AddDiscriminators/diamond.ll b/test/Transforms/AddDiscriminators/diamond.ll
new file mode 100644
index 000000000000..2ca638a83ec3
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/diamond.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for diamond-shaped CFG.:
+; #1 void bar(int);
+; #2 
+; #3 void foo(int i) {
+; #4   if (i > 10)
+; #5     bar(5); else bar(3);
+; #6 }
+
+; bar(5):     discriminator 0
+; bar(3):     discriminator 1
+
+; Function Attrs: uwtable
+define void @_Z3fooi(i32 %i) #0 !dbg !4 {
+  %1 = alloca i32, align 4
+  store i32 %i, i32* %1, align 4
+  call void @llvm.dbg.declare(metadata i32* %1, metadata !11, metadata !12), !dbg !13
+  %2 = load i32, i32* %1, align 4, !dbg !14
+  %3 = icmp sgt i32 %2, 10, !dbg !16
+  br i1 %3, label %4, label %5, !dbg !17
+
+; <label>:4                                       ; preds = %0
+  call void @_Z3bari(i32 5), !dbg !18
+  br label %6, !dbg !18
+
+; <label>:5                                       ; preds = %0
+  call void @_Z3bari(i32 3), !dbg !19
+; CHECK:  call void @_Z3bari(i32 3), !dbg ![[ELSE:[0-9]+]]
+  br label %6
+
+; <label>:6                                       ; preds = %5, %4
+  ret void, !dbg !20
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @_Z3bari(i32) #2
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 253273)"}
+!11 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 3, column: 14, scope: !4)
+!14 = !DILocation(line: 4, column: 7, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!16 = !DILocation(line: 4, column: 9, scope: !15)
+!17 = !DILocation(line: 4, column: 7, scope: !4)
+!18 = !DILocation(line: 5, column: 5, scope: !15)
+!19 = !DILocation(line: 5, column: 18, scope: !15)
+!20 = !DILocation(line: 6, column: 1, scope: !4)
+
+; CHECK: ![[ELSE]] = !DILocation(line: 5, column: 18, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index 7f1ea2b15cf6..20d88b55e96e 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -13,7 +13,7 @@
 ;         }
 ;       }
 
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
 entry:
   %i.addr = alloca i32, align 4
   %x = alloca i32, align 4
@@ -46,15 +46,19 @@ if.end:                                           ; preds = %if.then, %entry
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "first-only.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "first-only.c", directory: ".")
 !6 = !DISubroutineType(types: !{null})
 !7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -63,7 +67,7 @@ attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointe
 !10 = !DILocation(line: 3, scope: !11)
 
 !11 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
 ; CHECK: ![[BLOCK1:[0-9]+]] = distinct !DILexicalBlock(scope: ![[FOO]],{{.*}} line: 3)
 
 !12 = !DILocation(line: 3, scope: !13)
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 621a7117571c..9a05fcd86864 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -10,7 +10,7 @@
 ; The two stores inside the if-then-else line must have different discriminator
 ; values.
 
-define void @foo(i32 %i) #0 {
+define void @foo(i32 %i) #0 !dbg !4 {
 entry:
   %i.addr = alloca i32, align 4
   %x = alloca i32, align 4
@@ -47,15 +47,19 @@ if.end:                                           ; preds = %if.else, %if.then
 
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
-!llvm.dbg.cu = !{!0}
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "multiple.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "multiple.c", directory: ".")
 !6 = !DISubroutineType(types: !{null, !13})
 !13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index 895967e73b4d..bbba9dc62c4e 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -12,7 +12,7 @@
 ; altered. If they are, it means that the discriminators pass added a
 ; new lexical scope.
 
-define i32 @foo(i64 %i) #0 {
+define i32 @foo(i64 %i) #0 !dbg !4 {
 entry:
   %retval = alloca i32, align 4
   %i.addr = alloca i64, align 8
@@ -44,16 +44,20 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
 
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "no-discriminators", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i64)* @foo, variables: !2)
-; CHECK: ![[FOO:[0-9]+]] = !DISubprogram(name: "foo"
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
 !5 = !DIFile(filename: "no-discriminators", directory: ".")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9}
@@ -63,7 +67,7 @@ attributes #1 = { nounwind readnone }
 ; CHECK: !{i32 2, !"Dwarf Version", i32 2}
 !11 = !{i32 1, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.5.0 "}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
 !14 = !DILocation(line: 1, scope: !4)
 !15 = !DILocation(line: 2, scope: !16)
 ; CHECK: ![[ENTRY]] = !DILocation(line: 2, scope: ![[BLOCK:[0-9]+]])
diff --git a/test/Transforms/AddDiscriminators/oneline.ll b/test/Transforms/AddDiscriminators/oneline.ll
new file mode 100644
index 000000000000..ebee3935dd66
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/oneline.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+
+; Discriminator support for code that is written in one line:
+; #1 int foo(int i) {
+; #2   if (i == 3 || i == 5) return 100; else return 99;
+; #3 }
+
+; i == 3:     discriminator 0
+; i == 5:     discriminator 1
+; return 100: discriminator 2
+; return 99:  discriminator 3
+
+define i32 @_Z3fooi(i32 %i) #0 !dbg !4 {
+  %1 = alloca i32, align 4
+  %2 = alloca i32, align 4
+  store i32 %i, i32* %2, align 4, !tbaa !13
+  call void @llvm.dbg.declare(metadata i32* %2, metadata !9, metadata !17), !dbg !18
+  %3 = load i32, i32* %2, align 4, !dbg !19, !tbaa !13
+  %4 = icmp eq i32 %3, 3, !dbg !21
+  br i1 %4, label %8, label %5, !dbg !22
+
+; <label>:5                                       ; preds = %0
+  %6 = load i32, i32* %2, align 4, !dbg !23, !tbaa !13
+; CHECK:  %6 = load i32, i32* %2, align 4, !dbg ![[THEN1:[0-9]+]],{{.*}}
+
+  %7 = icmp eq i32 %6, 5, !dbg !24
+; CHECK:  %7 = icmp eq i32 %6, 5, !dbg ![[THEN2:[0-9]+]]
+
+  br i1 %7, label %8, label %9, !dbg !25
+; CHECK:  br i1 %7, label %8, label %9, !dbg ![[THEN3:[0-9]+]]
+
+; <label>:8                                       ; preds = %5, %0
+  store i32 100, i32* %1, align 4, !dbg !26
+; CHECK: store i32 100, i32* %1, align 4, !dbg ![[ELSE:[0-9]+]]
+
+  br label %10, !dbg !26
+; CHECK: br label %10, !dbg ![[ELSE]]
+
+; <label>:9                                       ; preds = %5
+  store i32 99, i32* %1, align 4, !dbg !27
+; CHECK: store i32 99, i32* %1, align 4, !dbg ![[COMBINE:[0-9]+]]
+
+  br label %10, !dbg !27
+; CHECK: br label %10, !dbg ![[COMBINE]]
+
+; <label>:10                                      ; preds = %9, %8
+  %11 = load i32, i32* %1, align 4, !dbg !28
+  ret i32 %11, !dbg !28
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+; We should be able to add discriminators even in the absence of llvm.dbg.cu.
+; When using sample profiles, the front end will generate line tables but it
+; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
+; to the final binary.
+; !llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11}
+!llvm.ident = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{!"clang version 3.8.0 (trunk 250915)"}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"int", !15, i64 0}
+!15 = !{!"omnipotent char", !16, i64 0}
+!16 = !{!"Simple C/C++ TBAA"}
+!17 = !DIExpression()
+!18 = !DILocation(line: 1, column: 13, scope: !4)
+!19 = !DILocation(line: 2, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!21 = !DILocation(line: 2, column: 9, scope: !20)
+!22 = !DILocation(line: 2, column: 14, scope: !20)
+!23 = !DILocation(line: 2, column: 17, scope: !20)
+!24 = !DILocation(line: 2, column: 19, scope: !20)
+!25 = !DILocation(line: 2, column: 7, scope: !4)
+!26 = !DILocation(line: 2, column: 25, scope: !20)
+!27 = !DILocation(line: 2, column: 42, scope: !20)
+!28 = !DILocation(line: 3, column: 1, scope: !4)
+
+; CHECK: ![[THEN1]] = !DILocation(line: 2, column: 17, scope: ![[THENBLOCK:[0-9]+]])
+; CHECK: ![[THENBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
+; CHECK: ![[THEN2]] = !DILocation(line: 2, column: 19, scope: ![[THENBLOCK]])
+; CHECK: ![[THEN3]] = !DILocation(line: 2, column: 7, scope: ![[THENBLOCK]])
+; CHECK: ![[ELSE]] = !DILocation(line: 2, column: 25, scope: ![[ELSEBLOCK:[0-9]+]])
+; CHECK: ![[ELSEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 2)
+; CHECK: ![[COMBINE]] = !DILocation(line: 2, column: 42, scope: ![[COMBINEBLOCK:[0-9]+]])
+; CHECK: ![[COMBINEBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 3)
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index 17a34cb62239..dbdccacf42ba 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -1,10 +1,9 @@
 ; RUN: opt < %s -argpromotion -S | FileCheck %s
-; CHECK: call void @test(i32 %
-; CHECK: !DISubprogram(name: "test",{{.*}} function: void (i32)* @test
 
 declare void @sink(i32)
 
-define internal void @test(i32** %X) {
+; CHECK: define internal void @test({{.*}} !dbg [[SP:![0-9]+]]
+define internal void @test(i32** %X) !dbg !2 {
   %1 = load i32*, i32** %X, align 8
   %2 = load i32, i32* %1, align 8
   call void @sink(i32 %2)
@@ -12,16 +11,19 @@ define internal void @test(i32** %X) {
 }
 
 define void @caller(i32** %Y) {
+; CHECK: call void @test(i32 %
   call void @test(i32** %Y)
   ret void
 }
 
+; CHECK: [[SP]] = distinct !DISubprogram(name: "test",
+
 !llvm.module.flags = !{!0}
 !llvm.dbg.cu = !{!3}
 
 !0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !DILocation(line: 8, scope: !2)
-!2 = !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null, function: void (i32**)* @test)
-!3 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
+!2 = distinct !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
 !4 = !{!2}
 !5 = !DIFile(filename: "test.c", directory: "")
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 282d42f75f05..4647e8fd6d9e 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -229,7 +229,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -241,6 +241,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
@@ -263,7 +267,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -275,6 +279,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
@@ -296,7 +304,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -307,6 +315,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK: call void @llvm.arm.dmb(i32 11)
 ; CHECK: br label %[[DONE]]
@@ -335,7 +347,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -350,6 +362,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: dmb
 ; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 42d7b781006d..7bb6ffed397d 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -91,7 +91,7 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i8(i8* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
@@ -103,6 +103,10 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
@@ -125,7 +129,7 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldaex.p0i16(i16* %ptr)
 ; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
@@ -137,6 +141,10 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: fence
 ; CHECK: br label %[[DONE]]
@@ -158,7 +166,7 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK: [[LOOP]]:
 ; CHECK: [[OLDVAL:%.*]] = call i32 @llvm.arm.ldaex.p0i32(i32* %ptr)
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[TRYAGAIN:%.*]] =  call i32 @llvm.arm.strex.p0i32(i32 %newval, i32* %ptr)
@@ -169,6 +177,10 @@ define i32 @test_cmpxchg_i32_acquire_acquire(i32* %ptr, i32 %desired, i32 %newva
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
@@ -197,7 +209,7 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK: [[HI64:%.*]] = shl i64 [[HI64_TMP]], 32
 ; CHECK: [[OLDVAL:%.*]] = or i64 [[LO64]], [[HI64]]
 ; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i64 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK: [[NEWLO:%.*]] = trunc i64 %newval to i32
@@ -212,6 +224,10 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: call void @llvm.arm.clrex()
+; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: fence_cst
 ; CHECK: br label %[[DONE]]
diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index 54653000f5d8..f9aa524fac98 100644
--- a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -9,17 +9,21 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
 ; CHECK:     [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
-; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB]]
+; CHECK:     br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
 
 ; CHECK: [[SUCCESS_BB]]:
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK:     call void @llvm.arm.clrex()
+; CHECK:     br label %[[FAILURE_BB]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END]]
@@ -41,7 +45,7 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -52,6 +56,10 @@ define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK:     call void @llvm.arm.dmb(i32 11)
 ; CHECK:     br label %[[END:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK:     call void @llvm.arm.clrex()
+; CHECK:     br label %[[FAILURE_BB]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
@@ -73,7 +81,7 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK: [[START]]:
 ; CHECK:     [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
 ; CHECK:     [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[FAILURE_BB:.*]]
+; CHECK:     br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
 
 ; CHECK: [[TRY_STORE]]:
 ; CHECK:     [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -84,6 +92,10 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
 ; CHECK-NOT: dmb
 ; CHECK:     br label %[[END:.*]]
 
+; CHECK: [[NO_STORE_BB]]:
+; CHECK:     call void @llvm.arm.clrex()
+; CHECK:     br label %[[FAILURE_BB]]
+
 ; CHECK: [[FAILURE_BB]]:
 ; CHECK-NOT: dmb
 ; CHECK:     br label %[[END]]
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
new file mode 100644
index 000000000000..792fb1ec4f70
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This file tests the functions `llvm::convertAtomicLoadToIntegerType` and
+; `llvm::convertAtomicStoreToIntegerType`. If X86 stops using this 
+; functionality, please move this test to a target which still is.
+
+define float @float_load_expand(float* %ptr) {
+; CHECK-LABEL: @float_load_expand
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float* %ptr unordered, align 4
+  ret float %res
+}
+
+define float @float_load_expand_seq_cst(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_seq_cst
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic i32, i32* %1 seq_cst, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float* %ptr seq_cst, align 4
+  ret float %res
+}
+
+define float @float_load_expand_vol(float* %ptr) {
+; CHECK-LABEL: @float_load_expand_vol
+; CHECK: %1 = bitcast float* %ptr to i32*
+; CHECK: %2 = load atomic volatile i32, i32* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic volatile float, float* %ptr unordered, align 4
+  ret float %res
+}
+
+define float @float_load_expand_addr1(float addrspace(1)* %ptr) {
+; CHECK-LABEL: @float_load_expand_addr1
+; CHECK: %1 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: %2 = load atomic i32, i32 addrspace(1)* %1 unordered, align 4
+; CHECK: %3 = bitcast i32 %2 to float
+; CHECK: ret float %3
+  %res = load atomic float, float addrspace(1)* %ptr unordered, align 4
+  ret float %res
+}
+
+define void @float_store_expand(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 unordered, align 4
+  store atomic float %v, float* %ptr unordered, align 4
+  ret void
+}
+
+define void @float_store_expand_seq_cst(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_seq_cst
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic i32 %1, i32* %2 seq_cst, align 4
+  store atomic float %v, float* %ptr seq_cst, align 4
+  ret void
+}
+
+define void @float_store_expand_vol(float* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_vol
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float* %ptr to i32*
+; CHECK: store atomic volatile i32 %1, i32* %2 unordered, align 4
+  store atomic volatile float %v, float* %ptr unordered, align 4
+  ret void
+}
+
+define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) {
+; CHECK-LABEL: @float_store_expand_addr1
+; CHECK: %1 = bitcast float %v to i32
+; CHECK: %2 = bitcast float addrspace(1)* %ptr to i32 addrspace(1)*
+; CHECK: store atomic i32 %1, i32 addrspace(1)* %2 unordered, align 4
+  store atomic float %v, float addrspace(1)* %ptr unordered, align 4
+  ret void
+}
+
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
new file mode 100644
index 000000000000..029a0e7b3e92
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-rmw-initial-load.ll
@@ -0,0 +1,11 @@
+; RUN: opt -S %s -atomic-expand -mtriple=i686-linux-gnu | FileCheck %s
+
+; This file tests the function `llvm::expandAtomicRMWToCmpXchg`.
+; It isn't technically target specific, but is exposed through a pass that is.
+
+define i8 @test_initial_load(i8* %ptr, i8 %value) {
+  %res = atomicrmw nand i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+; CHECK-LABEL: @test_initial_load
+; CHECK-NEXT:    %1 = load i8, i8* %ptr, align 1
diff --git a/test/Transforms/AtomicExpand/X86/lit.local.cfg b/test/Transforms/AtomicExpand/X86/lit.local.cfg
new file mode 100644
index 000000000000..afde89be896d
--- /dev/null
+++ b/test/Transforms/AtomicExpand/X86/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+  config.unsupported = True
diff --git a/test/Transforms/BBVectorize/X86/wr-aliases.ll b/test/Transforms/BBVectorize/X86/wr-aliases.ll
index 56448c0e5471..a6ea27fc3ecb 100644
--- a/test/Transforms/BBVectorize/X86/wr-aliases.ll
+++ b/test/Transforms/BBVectorize/X86/wr-aliases.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -bb-vectorize -S < %s | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx -disable-basicaa -bb-vectorize -S < %s | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/test/Transforms/BBVectorize/simple3.ll b/test/Transforms/BBVectorize/simple3.ll
index da7f94149414..6edf7f07ac1d 100644
--- a/test/Transforms/BBVectorize/simple3.ll
+++ b/test/Transforms/BBVectorize/simple3.ll
@@ -4,12 +4,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; Basic depth-3 chain
 define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2, double %B3) {
 ; CHECK-LABEL: @test1(
-; CHECK: %X1.v.i1.1.1 = insertelement <3 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i1.2.2 = insertelement <3 x double> %X1.v.i1.1.1, double %B2, i32 1
-; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.2.2, double %B3, i32 2
-; CHECK: %X1.v.i0.1.3 = insertelement <3 x double> undef, double %A1, i32 0
-; CHECK: %X1.v.i0.2.4 = insertelement <3 x double> %X1.v.i0.1.3, double %A2, i32 1
-; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.2.4, double %A3, i32 2
+; CHECK: %X1.v.i1.11 = insertelement <3 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.22 = insertelement <3 x double> %X1.v.i1.11, double %B2, i32 1
+; CHECK: %X1.v.i1 = insertelement <3 x double> %X1.v.i1.22, double %B3, i32 2
+; CHECK: %X1.v.i0.13 = insertelement <3 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.24 = insertelement <3 x double> %X1.v.i0.13, double %A2, i32 1
+; CHECK: %X1.v.i0 = insertelement <3 x double> %X1.v.i0.24, double %A3, i32 2
 	%X1 = fsub double %A1, %B1
 	%X2 = fsub double %A2, %B2
 	%X3 = fsub double %A3, %B3
@@ -24,11 +24,11 @@ define double @test1(double %A1, double %A2, double %A3, double %B1, double %B2,
 ; CHECK: %Z1 = fadd <3 x double> %Y1, %X1.v.i1
         %R1 = fmul double %Z1, %Z2
 	%R  = fmul double %R1, %Z3
-; CHECK: %Z1.v.r2.10 = extractelement <3 x double> %Z1, i32 2
+; CHECK: %Z1.v.r210 = extractelement <3 x double> %Z1, i32 2
 ; CHECK: %Z1.v.r1 = extractelement <3 x double> %Z1, i32 0
 ; CHECK: %Z1.v.r2 = extractelement <3 x double> %Z1, i32 1
 ; CHECK: %R1 = fmul double %Z1.v.r1, %Z1.v.r2
-; CHECK: %R = fmul double %R1, %Z1.v.r2.10
+; CHECK: %R = fmul double %R1, %Z1.v.r210
 	ret double %R
 ; CHECK: ret double %R
 }
diff --git a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
index d4b94fe62c71..43fcc6051210 100644
--- a/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
+++ b/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -1,13 +1,18 @@
-; RUN: opt < %s -O3 -o - | llc -no-integrated-as -o - | grep bork_directive | wc -l | grep 2
+; RUN: opt < %s -O3 | llc -no-integrated-as | FileCheck %s
+; REQUIRES: default_triple
 
 ;; We don't want branch folding to fold asm directives.
 
+; CHECK: bork_directive
+; CHECK: bork_directive
+; CHECK-NOT: bork_directive
+
 define void @bork(i32 %param) {
 entry:
 	%tmp = icmp eq i32 %param, 0
         br i1 %tmp, label %cond_true, label %cond_false
 
-cond_true:   
+cond_true:
         call void asm sideeffect ".bork_directive /* ${0:c}:${1:c} */", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
         ret void
 
diff --git a/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
new file mode 100644
index 000000000000..c3c11a1c4949
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/free-zext.ll
@@ -0,0 +1,82 @@
+; RUN: opt -S -codegenprepare -mtriple=aarch64-linux %s | FileCheck %s
+
+; Test for CodeGenPrepare::optimizeLoadExt(): simple case: two loads
+; feeding a phi that zext's each loaded value.
+define i32 @test_free_zext(i32* %ptr, i32* %ptr2, i32 %c) {
+; CHECK-LABEL: @test_free_zext(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+; CHECK-LABEL: bb2:
+; CHECK: %[[T2:.*]] = load
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+  %load2 = load i32, i32* %ptr2, align 4
+  br label %bb3
+bb3:
+; CHECK-LABEL: bb3:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %bb2 ]
+  %phi = phi i32 [ %load1, %bb1 ], [ %load2, %bb2 ]
+  %and = and i32 %phi, 65535
+  ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): exercise all opcode
+; cases of active bit calculation.
+define i32 @test_free_zext2(i32* %ptr, i16* %dst16, i32* %dst32, i32 %c) {
+; CHECK-LABEL: @test_free_zext2(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  %cmp = icmp ne i32 %c, 0
+  br i1 %cmp, label %bb2, label %bb4
+bb2:
+; CHECK-LABEL: bb2:
+  %trunc = trunc i32 %load1 to i16
+  store i16 %trunc, i16* %dst16, align 2
+  br i1 %cmp, label %bb3, label %bb4
+bb3:
+; CHECK-LABEL: bb3:
+  %shl = shl i32 %load1, 16
+  store i32 %shl, i32* %dst32, align 4
+  br label %bb4
+bb4:
+; CHECK-LABEL: bb4:
+; CHECK-NOT: and
+; CHECK: ret i32 %[[A1]]
+  %and = and i32 %load1, 65535
+  ret i32 %and
+}
+
+; Test for CodeGenPrepare::optimizeLoadExt(): check case of zext-able
+; load feeding a phi in the same block.
+define void @test_free_zext3(i32* %ptr, i32* %ptr2, i32* %dst, i64* %c) {
+; CHECK-LABEL: @test_free_zext3(
+bb1:
+; CHECK-LABEL: bb1:
+; CHECK: %[[T1:.*]] = load
+; CHECK: %[[A1:.*]] = and i32 %[[T1]], 65535
+  %load1 = load i32, i32* %ptr, align 4
+  br label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK: phi i32 [ %[[A1]], %bb1 ], [ %[[A2]], %loop ]
+  %phi = phi i32 [ %load1, %bb1 ], [ %load2, %loop ]
+  %and = and i32 %phi, 65535
+  store i32 %and, i32* %dst, align 4
+  %idx = load volatile i64, i64* %c, align 4
+  %addr = getelementptr inbounds i32, i32* %ptr2, i64 %idx
+; CHECK: %[[T2:.*]] = load i32
+; CHECK: %[[A2:.*]] = and i32 %[[T2]], 65535
+  %load2 = load i32, i32* %addr, align 4
+  %cmp = icmp ne i64 %idx, 0
+  br i1 %cmp, label %loop, label %end
+end:
+  ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
new file mode 100644
index 000000000000..172541a46080
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/AArch64/widen_switch.ll
@@ -0,0 +1,95 @@
+;; AArch64 is arbitralily chosen as a 32/64-bit RISC representative to show the transform in all tests.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefix=ARM64
+
+; AArch64 widens to 32-bit.
+
+define i32 @widen_switch_i16(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i16
+  switch i16 %trunc, label %sw.default [
+    i16 1, label %sw.bb0
+    i16 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16(
+; ARM64:       %0 = zext i16 %trunc to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 1, label %return
+; ARM64-NEXT:    i32 65535, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i17
+  switch i17 %trunc, label %sw.default [
+    i17 10, label %sw.bb0
+    i17 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i17(
+; ARM64:       %0 = zext i17 %trunc to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 10, label %return
+; ARM64-NEXT:    i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a)  {
+entry:
+  switch i2 %a, label %sw.default [
+    i2 1, label %sw.bb0
+    i2 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; ARM64-LABEL: @widen_switch_i16_sext(
+; ARM64:       %0 = sext i2 %a to i32
+; ARM64-NEXT:  switch i32 %0, label %sw.default [
+; ARM64-NEXT:    i32 1, label %return
+; ARM64-NEXT:    i32 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
new file mode 100644
index 000000000000..8c5e01e3634f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -0,0 +1,118 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; The following target lines are needed for the test to exercise what it should.
+; Without these lines, CodeGenPrepare does not try to sink the bitcasts.
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @f()
+
+declare void @g(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2
+
+; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
+; blocks as the place to which the bitcast should be sunk.  Since catchpads
+; do not allow non-phi instructions before the terminator, this isn't possible. 
+
+; CHECK-LABEL: @test(
+define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %x = getelementptr i32, i32* %addr, i32 1
+  %p1 = bitcast i32* %x to i8*
+  invoke void @f()
+          to label %invoke.cont unwind label %catch1
+
+; CHECK: invoke.cont:
+; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
+invoke.cont:
+  %y = getelementptr i32, i32* %addr, i32 2
+  %p2 = bitcast i32* %y to i8*
+  invoke void @f()
+          to label %done unwind label %catch2
+
+done:
+  ret void
+
+catch1:
+  %cs1 = catchswitch within none [label %handler1] unwind to caller
+
+handler1:
+  %cp1 = catchpad within %cs1 []
+  br label %catch.shared
+; CHECK: handler1:
+; CHECK-NEXT: catchpad within %cs1
+; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*
+
+catch2:
+  %cs2 = catchswitch within none [label %handler2] unwind to caller
+
+handler2:
+  %cp2 = catchpad within %cs2 []
+  br label %catch.shared
+; CHECK: handler2:
+; CHECK: catchpad within %cs2
+; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*
+
+; CHECK: catch.shared:
+; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
+catch.shared:
+  %p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
+  call void @g(i8* %p)
+  unreachable
+}
+
+; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
+; there is no insertion point in a catchpad block.
+
+; CHECK-LABEL: @test_dbg_value(
+define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %a = alloca i8
+  %b = alloca i8
+  invoke void @f() to label %next unwind label %catch.dispatch
+next:
+  invoke void @f() to label %ret unwind label %catch.dispatch
+ret:
+  ret void
+
+catch.dispatch:
+  %p = phi i8* [%a, %entry], [%b, %next]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+  %cp1 = catchpad within %cs1 []
+  tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
+  call void @g(i8* %p)
+  catchret from %cp1 to label %ret
+
+; CHECK: catch.dispatch:
+; CHECK-NEXT: phi i8
+; CHECK-NEXT: catchswitch
+; CHECK-NOT: llvm.dbg.value
+
+; CHECK: catch:
+; CHECK-NEXT: catchpad
+; CHECK-NEXT: call void @llvm.dbg.value
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: null, subprograms: !3)
+!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: null)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"PIC Level", i32 2}
+!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
+!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !DIExpression()
+!14 = !DILocation(line: 2, column: 8, scope: !4)
+!15 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
new file mode 100644
index 000000000000..72d82e2a162e
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s --check-prefix=SLOW
+; RUN: opt -S -codegenprepare -mattr=+bmi < %s | FileCheck %s --check-prefix=FAST_TZ
+; RUN: opt -S -codegenprepare -mattr=+lzcnt < %s | FileCheck %s --check-prefix=FAST_LZ
+
+target triple = "x86_64-unknown-unknown"
+target datalayout = "e-n32:64"
+
+; If the intrinsic is cheap, nothing should change.
+; If the intrinsic is expensive, check if the input is zero to avoid the call. 
+; This is undoing speculation that may have been created by SimplifyCFG + InstCombine.
+
+define i64 @cttz(i64 %A) {
+entry:
+  %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+  ret i64 %z
+
+; SLOW-LABEL: @cttz(
+; SLOW: entry:
+; SLOW:   %cmpz = icmp eq i64 %A, 0
+; SLOW:   br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW:   %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
+; SLOW:   br label %cond.end
+; SLOW: cond.end:
+; SLOW:   %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW:   ret i64 %ctz
+
+; FAST_TZ-LABEL: @cttz(
+; FAST_TZ:  %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
+; FAST_TZ:  ret i64 %z
+}
+
+define i64 @ctlz(i64 %A) {
+entry:
+  %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+  ret i64 %z
+
+; SLOW-LABEL: @ctlz(
+; SLOW: entry:
+; SLOW:   %cmpz = icmp eq i64 %A, 0
+; SLOW:   br i1 %cmpz, label %cond.end, label %cond.false
+; SLOW: cond.false:
+; SLOW:   %z = call i64 @llvm.ctlz.i64(i64 %A, i1 true)
+; SLOW:   br label %cond.end
+; SLOW: cond.end:
+; SLOW:   %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
+; SLOW:   ret i64 %ctz
+
+; FAST_LZ-LABEL: @ctlz(
+; FAST_LZ:  %z = call i64 @llvm.ctlz.i64(i64 %A, i1 false)
+; FAST_LZ:  ret i64 %z
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
diff --git a/test/Transforms/CodeGenPrepare/X86/select.ll b/test/Transforms/CodeGenPrepare/X86/select.ll
new file mode 100644
index 000000000000..a26938ad5ee4
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -0,0 +1,141 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; Nothing to sink here, but this gets converted to a branch to
+; avoid stalling an out-of-order CPU on a predictable branch.
+
+define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y)  {
+entry:
+  %load = load double, double* %b, align 8
+  %cmp = fcmp olt double %load, %a
+  %sel = select i1 %cmp, i32 %x, i32 %y
+  ret i32 %sel
+
+; CHECK-LABEL: @no_sink(
+; CHECK:    %load = load double, double* %b, align 8
+; CHECK:    %cmp = fcmp olt double %load, %a
+; CHECK:    br i1 %cmp, label %select.end, label %select.false
+; CHECK:  select.false:
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi i32 [ %x, %entry ], [ %y, %select.false ] 
+; CHECK:    ret i32 %sel
+}
+
+
+; An 'fdiv' is expensive, so sink it rather than speculatively execute it.
+
+define float @fdiv_true_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %div, float 2.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_true_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 1.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.end
+; CHECK:  select.true.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div, %select.true.sink ], [ 2.000000e+00, %entry ]
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_false_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %cmp = fcmp ogt float %a, 3.0
+  %sel = select i1 %cmp, float 4.0, float %div
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_false_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 3.0
+; CHECK:    br i1 %cmp, label %select.end, label %select.false.sink
+; CHECK:  select.false.sink:
+; CHECK:    %div = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+define float @fdiv_both_sink(float %a, float %b) {
+entry:
+  %div1 = fdiv float %a, %b
+  %div2 = fdiv float %b, %a
+  %cmp = fcmp ogt float %a, 5.0
+  %sel = select i1 %cmp, float %div1, float %div2
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_both_sink(
+; CHECK:    %cmp = fcmp ogt float %a, 5.0
+; CHECK:    br i1 %cmp, label %select.true.sink, label %select.false.sink
+; CHECK:  select.true.sink:
+; CHECK:    %div1 = fdiv float %a, %b
+; CHECK:    br label %select.end
+; CHECK:  select.false.sink:
+; CHECK:    %div2 = fdiv float %b, %a
+; CHECK:    br label %select.end
+; CHECK:  select.end:
+; CHECK:    %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ] 
+; CHECK:    ret float %sel
+}
+
+; An 'fadd' is not too expensive, so it's ok to speculate.
+
+define float @fadd_no_sink(float %a, float %b) {
+  %add = fadd float %a, %b
+  %cmp = fcmp ogt float 6.0, %a
+  %sel = select i1 %cmp, float %add, float 7.0 
+  ret float %sel
+
+; CHECK-LABEL: @fadd_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 7.0 
+}
+
+; Possible enhancement: sinkability is only calculated with the direct
+; operand of the select, so we don't try to sink this. The fdiv cost is not
+; taken into account.
+
+define float @fdiv_no_sink(float %a, float %b) {
+entry:
+  %div = fdiv float %a, %b
+  %add = fadd float %div, %b
+  %cmp = fcmp ogt float %a, 1.0
+  %sel = select i1 %cmp, float %add, float 8.0
+  ret float %sel
+
+; CHECK-LABEL: @fdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, float %add, float 8.0 
+}
+
+; Do not transform the CFG if the select operands may have side effects.
+
+declare i64* @bar(i32, i32, i32)
+declare i64* @baz(i32, i32, i32)
+
+define i64* @calls_no_sink(i32 %in) {
+  %call1 = call i64* @bar(i32 1, i32 2, i32 3)
+  %call2 = call i64* @baz(i32 1, i32 2, i32 3)
+  %tobool = icmp ne i32 %in, 0
+  %sel = select i1 %tobool, i64* %call1, i64* %call2
+  ret i64* %sel
+
+; CHECK-LABEL: @calls_no_sink(
+; CHECK:  %sel = select i1 %tobool, i64* %call1, i64* %call2
+}
+
+define i32 @sdiv_no_sink(i32 %a, i32 %b) {
+  %div1 = sdiv i32 %a, %b
+  %div2 = sdiv i32 %b, %a
+  %cmp = icmp sgt i32 %a, 5
+  %sel = select i1 %cmp, i32 %div1, i32 %div2
+  ret i32 %sel
+
+; CHECK-LABEL: @sdiv_no_sink(
+; CHECK:  %sel = select i1 %cmp, i32 %div1, i32 %div2
+}
+
diff --git a/test/Transforms/CodeGenPrepare/X86/widen_switch.ll b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
new file mode 100644
index 000000000000..53c9cc073558
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/widen_switch.ll
@@ -0,0 +1,95 @@
+;; x86 is chosen to show the transform when 8-bit and 16-bit registers are available.
+
+; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown    | FileCheck %s --check-prefix=X86
+
+; No change for x86 because 16-bit registers are part of the architecture.
+
+define i32 @widen_switch_i16(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i16
+  switch i16 %trunc, label %sw.default [
+    i16 1, label %sw.bb0
+    i16 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16(
+; X86:       %trunc = trunc i32 %a to i16
+; X86-NEXT:  switch i16 %trunc, label %sw.default [
+; X86-NEXT:    i16 1, label %return
+; X86-NEXT:    i16 -1, label %sw.bb1
+}
+
+; Widen to 32-bit from a smaller, non-native type.
+
+define i32 @widen_switch_i17(i32 %a)  {
+entry:
+  %trunc = trunc i32 %a to i17
+  switch i17 %trunc, label %sw.default [
+    i17 10, label %sw.bb0
+    i17 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i17(
+; X86:       %0 = zext i17 %trunc to i32
+; X86-NEXT:  switch i32 %0, label %sw.default [
+; X86-NEXT:    i32 10, label %return
+; X86-NEXT:    i32 131071, label %sw.bb1
+}
+
+; If the switch condition is a sign-extended function argument, then the
+; condition and cases should be sign-extended rather than zero-extended
+; because the sign-extension can be optimized away.
+
+define i32 @widen_switch_i16_sext(i2 signext %a)  {
+entry:
+  switch i2 %a, label %sw.default [
+    i2 1, label %sw.bb0
+    i2 -1, label %sw.bb1
+  ]
+
+sw.bb0:
+  br label %return
+
+sw.bb1:
+  br label %return
+
+sw.default:
+  br label %return
+
+return:
+  %retval = phi i32 [ -1, %sw.default ], [ 0, %sw.bb0 ], [ 1, %sw.bb1 ]
+  ret i32 %retval
+
+; X86-LABEL: @widen_switch_i16_sext(
+; X86:       %0 = sext i2 %a to i8
+; X86-NEXT:  switch i8 %0, label %sw.default [
+; X86-NEXT:    i8 1, label %return
+; X86-NEXT:    i8 -1, label %sw.bb1
+}
+
diff --git a/test/Transforms/CodeGenPrepare/invariant.group.ll b/test/Transforms/CodeGenPrepare/invariant.group.ll
new file mode 100644
index 000000000000..e8f1e42ddcbb
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/invariant.group.ll
@@ -0,0 +1,23 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+@tmp = global i8 0
+
+; CHECK-LABEL: define void @foo() {
+define void @foo() {
+enter:
+  ; CHECK-NOT: !invariant.group
+  ; CHECK-NOT: @llvm.invariant.group.barrier(
+  ; CHECK: %val = load i8, i8* @tmp
+  %val = load i8, i8* @tmp, !invariant.group !0
+  %ptr = call i8* @llvm.invariant.group.barrier(i8* @tmp)
+  
+  ; CHECK: store i8 42, i8* @tmp
+  store i8 42, i8* %ptr, !invariant.group !0
+  
+  ret void
+}
+; CHECK-LABEL: }
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"}
\ No newline at end of file
diff --git a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
index b6898b373113..b31dfe7f3fa6 100644
--- a/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
+++ b/test/Transforms/CodeGenPrepare/statepoint-relocate.ll
@@ -10,9 +10,9 @@ define i32 @test_sor_basic(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base-new, i32 15
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
-       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok,  i32 7, i32 7)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
@@ -25,10 +25,10 @@ define i32 @test_sor_two_derived(i32* %base) gc "statepoint-example" {
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
        %ptr2 = getelementptr i32, i32* %base, i32 12
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
-       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok,  i32 7, i32 7)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
@@ -38,9 +38,9 @@ define i32 @test_sor_ooo(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base-new, i32 15
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok,  i32 7, i32 7)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
@@ -50,9 +50,9 @@ define i32 @test_sor_gep_smallint([3 x i32]* %base) gc "statepoint-example" {
 ; CHECK: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 2
 entry:
        %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 2
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
-       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok,  i32 7, i32 7)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
@@ -62,27 +62,66 @@ define i32 @test_sor_gep_largeint([3 x i32]* %base) gc "statepoint-example" {
 ; CHECK-NOT: getelementptr [3 x i32], [3 x i32]* %base-new, i32 0, i32 21
 entry:
        %ptr = getelementptr [3 x i32], [3 x i32]* %base, i32 0, i32 21
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
-       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok,  i32 7, i32 7)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
+       %base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token %tok, i32 7, i32 7)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
 define i32 @test_sor_noop(i32* %base) gc "statepoint-example" {
 ; CHECK: getelementptr i32, i32* %base, i32 15
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
 entry:
        %ptr = getelementptr i32, i32* %base, i32 15
        %ptr2 = getelementptr i32, i32* %base, i32 12
-       %tok = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
-       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 8)
-       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 7, i32 9)
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 9)
        %ret = load i32, i32* %ptr-new
        ret i32 %ret
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
-declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)
+define i32 @test_sor_basic_wrong_order(i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_basic_wrong_order
+; Here we have base relocate inserted after derived. Make sure that we don't
+; produce uses of the relocated base pointer before it's definition.
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       ; CHECK: getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK: %base-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK-NEXT: getelementptr i32, i32* %base-new, i32 15
+       %ret = load i32, i32* %ptr-new
+       ret i32 %ret
+}
+
+define i32 @test_sor_noop_cross_bb(i1 %external-cond, i32* %base) gc "statepoint-example" {
+; CHECK-LABEL: @test_sor_noop_cross_bb
+; Here base relocate doesn't dominate derived relocate. Make sure that we don't
+; produce undefined use of the relocated base pointer.
+entry:
+       %ptr = getelementptr i32, i32* %base, i32 15
+       ; CHECK: getelementptr i32, i32* %base, i32 15
+       %tok = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
+       br i1 %external-cond, label %left, label %right
+
+left:
+       %ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 8)
+       %ret-new = load i32, i32* %ptr-new
+       ret i32 %ret-new
+
+right:
+       %ptr-base = call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       ; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(token %tok, i32 7, i32 7)
+       %ret-base = load i32, i32* %ptr-base
+       ret i32 %ret-base
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32* @llvm.experimental.gc.relocate.p0i32(token, i32, i32)
+declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(token, i32, i32)
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index 7541418b06ec..e65d8b28fe7d 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -1,17 +1,52 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
 ; RUN: opt < %s -constprop -disable-simplify-libcalls -S | FileCheck %s --check-prefix=FNOBUILTIN
 
+declare double @acos(double)
+declare double @asin(double)
+declare double @atan(double)
+declare double @atan2(double, double)
+declare double @ceil(double)
 declare double @cos(double)
-
-declare double @sin(double)
-
-declare double @tan(double)
-
-declare double @sqrt(double)
+declare double @cosh(double)
+declare double @exp(double)
 declare double @exp2(double)
+declare double @fabs(double)
+declare double @floor(double)
+declare double @fmod(double, double)
+declare double @log(double)
+declare double @log10(double)
+declare double @pow(double, double)
+declare double @sin(double)
+declare double @sinh(double)
+declare double @sqrt(double)
+declare double @tan(double)
+declare double @tanh(double)
+
+declare float @acosf(float)
+declare float @asinf(float)
+declare float @atanf(float)
+declare float @atan2f(float, float)
+declare float @ceilf(float)
+declare float @cosf(float)
+declare float @coshf(float)
+declare float @expf(float)
+declare float @exp2f(float)
+declare float @fabsf(float)
+declare float @floorf(float)
+declare float @fmodf(float, float)
+declare float @logf(float)
+declare float @log10f(float)
+declare float @powf(float, float)
+declare float @sinf(float)
+declare float @sinhf(float)
+declare float @sqrtf(float)
+declare float @tanf(float)
+declare float @tanhf(float)
 
 define double @T() {
 ; CHECK-LABEL: @T(
+; FNOBUILTIN-LABEL: @T(
+
 ; CHECK-NOT: call
 ; CHECK: ret
   %A = call double @cos(double 0.000000e+00)
@@ -22,6 +57,119 @@ define double @T() {
   %D = call double @sqrt(double 4.000000e+00)
   %c = fadd double %b, %D
 
+  %slot = alloca double
+  %slotf = alloca float
+; FNOBUILTIN: call
+  %1 = call double @acos(double 1.000000e+00)
+  store double %1, double* %slot
+; FNOBUILTIN: call
+  %2 = call double @asin(double 1.000000e+00)
+  store double %2, double* %slot
+; FNOBUILTIN: call
+  %3 = call double @atan(double 3.000000e+00)
+  store double %3, double* %slot
+; FNOBUILTIN: call
+  %4 = call double @atan2(double 3.000000e+00, double 4.000000e+00)
+  store double %4, double* %slot
+; FNOBUILTIN: call
+  %5 = call double @ceil(double 3.000000e+00)
+  store double %5, double* %slot
+; FNOBUILTIN: call
+  %6 = call double @cosh(double 3.000000e+00)
+  store double %6, double* %slot
+; FNOBUILTIN: call
+  %7 = call double @exp(double 3.000000e+00)
+  store double %7, double* %slot
+; FNOBUILTIN: call
+  %8 = call double @exp2(double 3.000000e+00)
+  store double %8, double* %slot
+; FNOBUILTIN: call
+  %9 = call double @fabs(double 3.000000e+00)
+  store double %9, double* %slot
+; FNOBUILTIN: call
+  %10 = call double @floor(double 3.000000e+00)
+  store double %10, double* %slot
+; FNOBUILTIN: call
+  %11 = call double @fmod(double 3.000000e+00, double 4.000000e+00)
+  store double %11, double* %slot
+; FNOBUILTIN: call
+  %12 = call double @log(double 3.000000e+00)
+  store double %12, double* %slot
+; FNOBUILTIN: call
+  %13 = call double @log10(double 3.000000e+00)
+  store double %13, double* %slot
+; FNOBUILTIN: call
+  %14 = call double @pow(double 3.000000e+00, double 4.000000e+00)
+  store double %14, double* %slot
+; FNOBUILTIN: call
+  %15 = call double @sinh(double 3.000000e+00)
+  store double %15, double* %slot
+; FNOBUILTIN: call
+  %16 = call double @tanh(double 3.000000e+00)
+  store double %16, double* %slot
+; FNOBUILTIN: call
+  %17 = call float @acosf(float 1.000000e+00)
+  store float %17, float* %slotf
+; FNOBUILTIN: call
+  %18 = call float @asinf(float 1.000000e+00)
+  store float %18, float* %slotf
+; FNOBUILTIN: call
+  %19 = call float @atanf(float 3.000000e+00)
+  store float %19, float* %slotf
+; FNOBUILTIN: call
+  %20 = call float @atan2f(float 3.000000e+00, float 4.000000e+00)
+  store float %20, float* %slotf
+; FNOBUILTIN: call
+  %21 = call float @ceilf(float 3.000000e+00)
+  store float %21, float* %slotf
+; FNOBUILTIN: call
+  %22 = call float @cosf(float 3.000000e+00)
+  store float %22, float* %slotf
+; FNOBUILTIN: call
+  %23 = call float @coshf(float 3.000000e+00)
+  store float %23, float* %slotf
+; FNOBUILTIN: call
+  %24 = call float @expf(float 3.000000e+00)
+  store float %24, float* %slotf
+; FNOBUILTIN: call
+  %25 = call float @exp2f(float 3.000000e+00)
+  store float %25, float* %slotf
+; FNOBUILTIN: call
+  %26 = call float @fabsf(float 3.000000e+00)
+  store float %26, float* %slotf
+; FNOBUILTIN: call
+  %27 = call float @floorf(float 3.000000e+00)
+  store float %27, float* %slotf
+; FNOBUILTIN: call
+  %28 = call float @fmodf(float 3.000000e+00, float 4.000000e+00)
+  store float %28, float* %slotf
+; FNOBUILTIN: call
+  %29 = call float @logf(float 3.000000e+00)
+  store float %29, float* %slotf
+; FNOBUILTIN: call
+  %30 = call float @log10f(float 3.000000e+00)
+  store float %30, float* %slotf
+; FNOBUILTIN: call
+  %31 = call float @powf(float 3.000000e+00, float 4.000000e+00)
+  store float %31, float* %slotf
+; FNOBUILTIN: call
+  %32 = call float @sinf(float 3.000000e+00)
+  store float %32, float* %slotf
+; FNOBUILTIN: call
+  %33 = call float @sinhf(float 3.000000e+00)
+  store float %33, float* %slotf
+; FNOBUILTIN: call
+  %34 = call float @sqrtf(float 3.000000e+00)
+  store float %34, float* %slotf
+; FNOBUILTIN: call
+  %35 = call float @tanf(float 3.000000e+00)
+  store float %35, float* %slotf
+; FNOBUILTIN: call
+  %36 = call float @tanhf(float 3.000000e+00)
+  store float %36, float* %slotf
+
+; FNOBUILTIN: ret
+
   ; PR9315
   %E = call double @exp2(double 4.0)
   %d = fadd double %c, %E 
@@ -65,85 +213,9 @@ define double @test_intrinsic_pow() nounwind uwtable ssp {
 entry:
 ; CHECK-LABEL: @test_intrinsic_pow(
 ; CHECK-NOT: call
+; CHECK: ret
   %0 = call double @llvm.pow.f64(double 1.500000e+00, double 3.000000e+00)
   ret double %0
 }
+
 declare double @llvm.pow.f64(double, double) nounwind readonly
-
-; Shouldn't fold because of -fno-builtin
-define double @sin_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sin_(
-; FNOBUILTIN: %1 = call double @sin(double 3.000000e+00)
-  %1 = call double @sin(double 3.000000e+00)
-  ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @sqrt_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrt_(
-; FNOBUILTIN: %1 = call double @sqrt(double 3.000000e+00)
-  %1 = call double @sqrt(double 3.000000e+00)
-  ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define float @sqrtf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sqrtf_(
-; FNOBUILTIN: %1 = call float @sqrtf(float 3.000000e+00)
-  %1 = call float @sqrtf(float 3.000000e+00)
-  ret float %1
-}
-declare float @sqrtf(float)
-
-; Shouldn't fold because of -fno-builtin
-define float @sinf_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @sinf_(
-; FNOBUILTIN: %1 = call float @sinf(float 3.000000e+00)
-  %1 = call float @sinf(float 3.000000e+00)
-  ret float %1
-}
-declare float @sinf(float)
-
-; Shouldn't fold because of -fno-builtin
-define double @tan_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tan_(
-; FNOBUILTIN: %1 = call double @tan(double 3.000000e+00)
-  %1 = call double @tan(double 3.000000e+00)
-  ret double %1
-}
-
-; Shouldn't fold because of -fno-builtin
-define double @tanh_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @tanh_(
-; FNOBUILTIN: %1 = call double @tanh(double 3.000000e+00)
-  %1 = call double @tanh(double 3.000000e+00)
-  ret double %1
-}
-declare double @tanh(double)
-
-; Shouldn't fold because of -fno-builtin
-define double @pow_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @pow_(
-; FNOBUILTIN: %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
-  %1 = call double @pow(double 3.000000e+00, double 3.000000e+00)
-  ret double %1
-}
-declare double @pow(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @fmod_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @fmod_(
-; FNOBUILTIN: %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
-  %1 = call double @fmod(double 3.000000e+00, double 3.000000e+00)
-  ret double %1
-}
-declare double @fmod(double, double)
-
-; Shouldn't fold because of -fno-builtin
-define double @atan2_() nounwind uwtable ssp {
-; FNOBUILTIN-LABEL: @atan2_(
-; FNOBUILTIN: %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
-  %1 = call double @atan2(double 3.000000e+00, double 3.000000e+00)
-  ret double %1
-}
-declare double @atan2(double, double)
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
index dce2b728b93b..606f7ddc679c 100644
--- a/test/Transforms/ConstProp/insertvalue.ll
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -74,3 +74,13 @@ define i32 @test-float-Nan() {
 ; CHECK: @test-float-Nan
 ; CHECK: ret i32 2139171423
 }
+
+define i16 @test-half-Nan() {
+  %A = bitcast i16 32256 to half
+  %B = insertvalue [1 x half] undef, half %A, 0
+  %C = extractvalue [1 x half] %B, 0
+  %D = bitcast half %C to i16
+  ret i16 %D
+; CHECK: @test-half-Nan
+; CHECK: ret i16 32256
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 5426ad0f8adb..89387ad06ba8 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -40,13 +40,16 @@ define i16 @test2_addrspacecast() {
   %r = load i16, i16 addrspace(1)* addrspacecast(i32* getelementptr ({{i32,i8},i32}, {{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16 addrspace(1)*)
   ret i16 %r
 
+; FIXME: Should be able to load through a constant addrspacecast.
 ; 0xBEEF
 ; LE-LABEL: @test2_addrspacecast(
-; LE: ret i16 -16657
+; XLE: ret i16 -16657
+; LE: load i16, i16 addrspace(1)* addrspacecast
 
 ; 0xDEAD
 ; BE-LABEL: @test2_addrspacecast(
-; BE: ret i16 -8531
+; XBE: ret i16 -8531
+; BE: load i16, i16 addrspace(1)* addrspacecast
 }
 
 ; Load of second 16 bits of 32-bit value.
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index 11b0621d42d7..514c789b4701 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -25,7 +25,7 @@ declare void @helper([16 x i8]*)
 ; CHECK-NEXT: @var7 = internal constant [16 x i8] c"foo1bar2foo3bar\00"
 ; CHECK-NEXT: @var8 = private constant [16 x i8] c"foo1bar2foo3bar\00", align 16
 
-@var4a = alias %struct.foobar* @var4
+@var4a = alias %struct.foobar, %struct.foobar* @var4
 @llvm.used = appending global [1 x %struct.foobar*] [%struct.foobar* @var4a], section "llvm.metadata"
 
 define i32 @main() {
diff --git a/test/Transforms/CorrelatedValuePropagation/non-null.ll b/test/Transforms/CorrelatedValuePropagation/non-null.ll
index 6bb8bb07c45f..6fb4cb6e3582 100644
--- a/test/Transforms/CorrelatedValuePropagation/non-null.ll
+++ b/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -101,3 +101,63 @@ bb:
 ; CHECK: KEEP2
   ret void
 }
+
+declare void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+define void @test10(i8* %arg1, i8* %arg2, i32 %non-pointer-arg) {
+; CHECK-LABEL: @test10
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+  ; CHECK: call void @test10_helper(i8* nonnull %arg1, i8* %arg2, i32 %non-pointer-arg)
+  br label %null
+
+null:
+  call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+  ; CHECK: call void @test10_helper(i8* %arg1, i8* %arg2, i32 %non-pointer-arg)
+  ret void
+}
+
+declare void @test11_helper(i8* %arg)
+define void @test11(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test11
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  br label %merge
+
+null:
+  %another_arg = alloca i8
+  br label %merge
+
+merge:
+  %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+  call void @test11_helper(i8* %merged_arg)
+  ; CHECK: call void @test11_helper(i8* nonnull %merged_arg)
+  ret void
+}
+
+declare void @test12_helper(i8* %arg)
+define void @test12(i8* %arg1, i8** %arg2) {
+; CHECK-LABEL: @test12
+entry:
+  %is_null = icmp eq i8* %arg1, null
+  br i1 %is_null, label %null, label %non_null
+
+non_null:
+  br label %merge
+
+null:
+  %another_arg = load i8*, i8** %arg2, !nonnull !{}
+  br label %merge
+
+merge:
+  %merged_arg = phi i8* [%another_arg, %null], [%arg1, %non_null]
+  call void @test12_helper(i8* %merged_arg)
+  ; CHECK: call void @test12_helper(i8* nonnull %merged_arg)
+  ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index e40c63919078..884cc8bdc125 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -165,3 +165,27 @@ sw.default:
  %or2 = or i1 %cmp7, %cmp8
  ret i1 false
 }
+
+define i1 @test8(i64* %p) {
+; CHECK-LABEL: @test8
+; CHECK: ret i1 false
+  %a = load i64, i64* %p, !range !{i64 4, i64 255}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
+
+define i1 @test9(i64* %p) {
+; CHECK-LABEL: @test9
+; CHECK: ret i1 true
+  %a = load i64, i64* %p, !range !{i64 0, i64 1}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
+
+define i1 @test10(i64* %p) {
+; CHECK-LABEL: @test10
+; CHECK: ret i1 false
+  %a = load i64, i64* %p, !range !{i64 4, i64 8, i64 15, i64 20}
+  %res = icmp eq i64 %a, 0
+  ret i1 %res
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/select.ll b/test/Transforms/CorrelatedValuePropagation/select.ll
index d88e3e462a20..be44bdcd921d 100644
--- a/test/Transforms/CorrelatedValuePropagation/select.ll
+++ b/test/Transforms/CorrelatedValuePropagation/select.ll
@@ -71,5 +71,5 @@ for.body:
 
 if.end:
   ret i32 %sel
-; CHECK: ret i32 %[[sel]]
+; CHECK: ret i32 1
 }
diff --git a/test/Transforms/CrossDSOCFI/basic.ll b/test/Transforms/CrossDSOCFI/basic.ll
new file mode 100644
index 000000000000..49b3e8f23ccf
--- /dev/null
+++ b/test/Transforms/CrossDSOCFI/basic.ll
@@ -0,0 +1,88 @@
+; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+
+; CHECK:     define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]]) align 4096
+; CHECK:     switch i64 %[[TYPE]], label %[[TRAP:.*]] [
+; CHECK-NEXT:   i64 111, label %[[L1:.*]]
+; CHECK-NEXT:   i64 222, label %[[L2:.*]]
+; CHECK-NEXT:   i64 333, label %[[L3:.*]]
+; CHECK-NEXT:   i64 444, label %[[L4:.*]]
+; CHECK-NEXT: {{]$}}
+
+; CHECK:     [[TRAP]]:
+; CHECK-NEXT:   call void @llvm.trap()
+; CHECK-MEXT:   unreachable
+
+; CHECK:     [[EXIT:.*]]:
+; CHECK-NEXT:   ret void
+
+; CHECK:     [[L1]]:
+; CHECK-NEXT:   call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 111)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK:     [[L2]]:
+; CHECK-NEXT:   call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 222)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK:     [[L3]]:
+; CHECK-NEXT:   call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 333)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+; CHECK:     [[L4]]:
+; CHECK-NEXT:   call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 444)
+; CHECK-NEXT:   br {{.*}} label %[[EXIT]], label %[[TRAP]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@_ZTV1A = constant i8 0
+@_ZTI1A = constant i8 0
+@_ZTS1A = constant i8 0
+@_ZTV1B = constant i8 0
+@_ZTI1B = constant i8 0
+@_ZTS1B = constant i8 0
+
+define signext i8 @f11() {
+entry:
+  ret i8 1
+}
+
+define signext i8 @f12() {
+entry:
+  ret i8 2
+}
+
+define signext i8 @f13() {
+entry:
+  ret i8 3
+}
+
+define i32 @f21() {
+entry:
+  ret i32 4
+}
+
+define i32 @f22() {
+entry:
+  ret i32 5
+}
+
+!llvm.bitsets = !{!0, !1, !2, !3, !4, !7, !8, !9, !10, !11, !12, !13, !14, !15}
+!llvm.module.flags = !{!17}
+
+!0 = !{!"_ZTSFcvE", i8 ()* @f11, i64 0}
+!1 = !{i64 111, i8 ()* @f11, i64 0}
+!2 = !{!"_ZTSFcvE", i8 ()* @f12, i64 0}
+!3 = !{i64 111, i8 ()* @f12, i64 0}
+!4 = !{!"_ZTSFcvE", i8 ()* @f13, i64 0}
+!5 = !{i64 111, i8 ()* @f13, i64 0}
+!6 = !{!"_ZTSFivE", i32 ()* @f21, i64 0}
+!7 = !{i64 222, i32 ()* @f21, i64 0}
+!8 = !{!"_ZTSFivE", i32 ()* @f22, i64 0}
+!9 = !{i64 222, i32 ()* @f22, i64 0}
+!10 = !{!"_ZTS1A", i8* @_ZTV1A, i64 16}
+!11 = !{i64 333, i8* @_ZTV1A, i64 16}
+!12 = !{!"_ZTS1A", i8* @_ZTV1B, i64 16}
+!13 = !{i64 333, i8* @_ZTV1B, i64 16}
+!14 = !{!"_ZTS1B", i8* @_ZTV1B, i64 16}
+!15 = !{i64 444, i8* @_ZTV1B, i64 16}
+!17= !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 5ff05f0d6858..10578761cd73 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -45,30 +45,30 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
+!0 = !DILocalVariable(name: "name", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
 !2 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !6, !9, !9, !9}
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !28, scope: !2, baseType: !7)
 !7 = !DIDerivedType(tag: DW_TAG_const_type, size: 8, align: 8, file: !28, scope: !2, baseType: !8)
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 9, arg: 0, scope: !1, file: !2, type: !9)
-!11 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 10, arg: 0, scope: !1, file: !2, type: !9)
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 11, arg: 0, scope: !1, file: !2, type: !9)
+!10 = !DILocalVariable(name: "len", line: 9, arg: 2, scope: !1, file: !2, type: !9)
+!11 = !DILocalVariable(name: "hash", line: 10, arg: 3, scope: !1, file: !2, type: !9)
+!12 = !DILocalVariable(name: "flags", line: 11, arg: 4, scope: !1, file: !2, type: !9)
 !13 = !DILocation(line: 13, scope: !14)
 !14 = distinct !DILexicalBlock(line: 12, column: 0, file: !28, scope: !1)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "name", line: 17, arg: 0, scope: !16, file: !2, type: !6)
-!16 = !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
+!15 = !DILocalVariable(name: "name", line: 17, arg: 1, scope: !16, file: !2, type: !6)
+!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
 !17 = !DISubroutineType(types: !18)
 !18 = !{!6, !6, !9, !9, !19, !9}
 !19 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "len", line: 18, arg: 0, scope: !16, file: !2, type: !9)
-!21 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "hash", line: 19, arg: 0, scope: !16, file: !2, type: !9)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "extra", line: 20, arg: 0, scope: !16, file: !2, type: !19)
-!23 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "flags", line: 21, arg: 0, scope: !16, file: !2, type: !9)
+!20 = !DILocalVariable(name: "len", line: 18, arg: 2, scope: !16, file: !2, type: !9)
+!21 = !DILocalVariable(name: "hash", line: 19, arg: 3, scope: !16, file: !2, type: !9)
+!22 = !DILocalVariable(name: "extra", line: 20, arg: 4, scope: !16, file: !2, type: !19)
+!23 = !DILocalVariable(name: "flags", line: 21, arg: 5, scope: !16, file: !2, type: !9)
 !24 = !DILocation(line: 23, scope: !25)
 !25 = distinct !DILexicalBlock(line: 22, column: 0, file: !28, scope: !16)
 !26 = !DILocation(line: 24, scope: !25)
diff --git a/test/Transforms/DeadArgElim/aggregates.ll b/test/Transforms/DeadArgElim/aggregates.ll
index 68d253425587..2eca76a4a4e3 100644
--- a/test/Transforms/DeadArgElim/aggregates.ll
+++ b/test/Transforms/DeadArgElim/aggregates.ll
@@ -159,4 +159,28 @@ define internal i8 @outer() {
 
   %val = load i8, i8* %resptr
   ret i8 %val
-}
\ No newline at end of file
+}
+
+define internal { i32 } @agg_ret() {
+entry:
+  unreachable
+}
+
+; CHECK-LABEL: define void @PR24906
+; CHECK: %[[invoke:.*]] = invoke i32 @agg_ret()
+; CHECK: %[[oldret:.*]] = insertvalue { i32 } undef, i32 %[[invoke]], 0
+; CHECK: phi { i32 } [ %[[oldret]],
+define void @PR24906() personality i32 (i32)* undef {
+entry:
+  %tmp2 = invoke { i32 } @agg_ret()
+          to label %bb3 unwind label %bb4
+
+bb3:
+  %tmp3 = phi { i32 } [ %tmp2, %entry ]
+  unreachable
+
+bb4:
+  %tmp4 = landingpad { i8*, i32 }
+          cleanup
+  unreachable
+}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index efafd9bbc9e2..a19d4b1fd1ab 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -14,20 +14,21 @@
 ; the function->debug info mapping on update to ensure it's accurate when used
 ; again for the next removal.
 
-; CHECK: !DISubprogram(name: "f1",{{.*}} function: void ()* @_ZL2f1iz
+; CHECK: define internal void @_ZL2f1iz({{.*}} !dbg [[SP:![0-9]+]]
+; CHECK: [[SP]] = distinct !DISubprogram(name: "f1"
 
 ; Check that debug info metadata for subprograms stores pointers to
 ; updated LLVM functions.
 
 ; Function Attrs: uwtable
-define void @_Z2f2v() #0 {
+define void @_Z2f2v() #0 !dbg !4 {
 entry:
   call void (i32, ...) @_ZL2f1iz(i32 1), !dbg !15
   ret void, !dbg !16
 }
 
 ; Function Attrs: nounwind uwtable
-define internal void @_ZL2f1iz(i32, ...) #1 {
+define internal void @_ZL2f1iz(i32, ...) #1 !dbg !8 {
 entry:
   call void @llvm.dbg.value(metadata i32 %0, i64 0, metadata !17, metadata !18), !dbg !19
   ret void, !dbg !20
@@ -47,15 +48,15 @@ attributes #2 = { nounwind readnone }
 !llvm.module.flags = !{!12, !13}
 !llvm.ident = !{!14}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
-!8 = !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, function: void (i32, ...)* @_ZL2f1iz, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
 !9 = !DISubroutineType(types: !10)
 !10 = !{null, !11, null}
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -64,7 +65,7 @@ attributes #2 = { nounwind readnone }
 !14 = !{!"clang version 3.6.0 "}
 !15 = !DILocation(line: 5, column: 3, scope: !4)
 !16 = !DILocation(line: 6, column: 1, scope: !4)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
+!17 = !DILocalVariable(name: "", line: 1, arg: 1, scope: !8, file: !5, type: !11)
 !18 = !DIExpression()
 !19 = !DILocation(line: 1, column: 19, scope: !8)
 !20 = !DILocation(line: 2, column: 1, scope: !8)
diff --git a/test/Transforms/DeadArgElim/naked_functions.ll b/test/Transforms/DeadArgElim/naked_functions.ll
new file mode 100644
index 000000000000..b7955a10127d
--- /dev/null
+++ b/test/Transforms/DeadArgElim/naked_functions.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -deadargelim %s | FileCheck %s
+
+; Don't eliminate dead arugments from naked functions.
+; CHECK: define internal i32 @naked(i32 %x)
+
+define internal i32 @naked(i32 %x) #0 {
+  tail call void asm sideeffect inteldialect "mov eax, [esp + $$4]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+
+; Don't eliminate dead varargs from naked functions.
+; CHECK: define internal i32 @naked_va(i32 %x, ...)
+
+define internal i32 @naked_va(i32 %x, ...) #0 {
+  tail call void asm sideeffect inteldialect "mov eax, [esp + $$8]\0A\09ret", "~{eax},~{dirflag},~{fpsr},~{flags}"()
+  unreachable
+}
+
+define i32 @f(i32 %x, i32 %y) {
+  %r = call i32 @naked(i32 %x)
+  %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+; Make sure the arguments are still there: not removed or replaced with undef.
+; CHECK: %r = call i32 @naked(i32 %x)
+; CHECK: %s = call i32 (i32, ...) @naked_va(i32 %x, i32 %r)
+
+  ret i32 %s
+}
+
+attributes #0 = { naked }
diff --git a/test/Transforms/DeadArgElim/operandbundle.ll b/test/Transforms/DeadArgElim/operandbundle.ll
new file mode 100644
index 000000000000..aa112b1c0501
--- /dev/null
+++ b/test/Transforms/DeadArgElim/operandbundle.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | FileCheck %s
+
+define internal void @f(i32 %arg) {
+entry:
+  call void @g() [ "foo"(i32 %arg) ]
+  ret void
+}
+
+; CHECK-LABEL: define internal void @f(
+; CHECK: call void @g() [ "foo"(i32 %arg) ]
+
+declare void @g()
diff --git a/test/Transforms/DeadStoreElimination/calloc-store.ll b/test/Transforms/DeadStoreElimination/calloc-store.ll
new file mode 100644
index 000000000000..daba61332065
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/calloc-store.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @calloc(i64, i64)
+
+define i32* @test1() {
+; CHECK-LABEL: test1
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  ; This store is dead and should be removed
+  store i32 0, i32* %2, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i32* @test2() {
+; CHECK-LABEL: test2
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  %3 = getelementptr i32, i32* %2, i32 5
+  store i32 0, i32* %3, align 4
+; CHECK-NOT: store i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i32* @test3(i32 *%arg) {
+; CHECK-LABEL: test3
+  store i32 0, i32* %arg, align 4
+; CHECK: store i32 0, i32* %arg, align 4
+  ret i32* %arg
+}
+
+declare void @clobber_memory(i8*)
+define i8* @test4() {
+; CHECK-LABEL: test4
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  call void @clobber_memory(i8* %1)
+  store i8 0, i8* %1, align 4
+; CHECK: store i8 0, i8* %1, align 4
+  ret i8* %1
+}
+
+define i32* @test5() {
+; CHECK-LABEL: test5
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  %2 = bitcast i8* %1 to i32*
+  store volatile i32 0, i32* %2, align 4
+; CHECK: store volatile i32 0, i32* %2, align 4
+  ret i32* %2
+}
+
+define i8* @test6() {
+; CHECK-LABEL: test6
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  store i8 5, i8* %1, align 4
+; CHECK: store i8 5, i8* %1, align 4
+  ret i8* %1
+}
+
+define i8* @test7(i8 %arg) {
+; CHECK-LABEL: test7
+  %1 = tail call noalias i8* @calloc(i64 1, i64 4)
+  store i8 %arg, i8* %1, align 4
+; CHECK: store i8 %arg, i8* %1, align 4
+  ret i8* %1
+}
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 54e41c8b413b..5848ab89bc88 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -9,7 +9,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 @x = global i32 0, align 4
 
 ; Function Attrs: nounwind
-define i32 @test_within_limit() {
+define i32 @test_within_limit() !dbg !4 {
 entry:
   ; The first store; later there is a second store to the same location,
   ; so this store should be optimized away by DSE.
@@ -245,17 +245,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!11, !13}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "/home/tmp")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 ()* @test_within_limit, variables: !2)
+!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "test.c", directory: "/home/tmp")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !9 = !{!10}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", scope: !4, type: !8)
+!10 = !DILocalVariable(name: "x", scope: !4, type: !8)
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !{i32* undef}
 
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 2ffe0539098e..4f6221db2454 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -350,3 +350,150 @@ define i8* @test25(i8* %p) nounwind {
   store i8 %tmp, i8* %p.4, align 1
   ret i8* %q
 }
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test26(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test26(i1 %c, i32* %p) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  store i32 %v, i32* %p, align 4
+  br label %bb3
+bb3:
+  ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block.
+; CHECK-LABEL: @test27(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test27(i1 %c, i32* %p) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test28(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test28(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+  %v = load i32, i32* %p, align 4
+
+  ; Might overwrite value at %p
+  store i32 %i, i32* %p2, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Don't remove redundant store because of may-aliased store.
+; CHECK-LABEL: @test29(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test29(i1 %c, i32* %p, i32* %p2, i32 %i) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  ; Might overwrite value at %p
+  store i32 %i, i32* %p2, align 4
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+declare void @unknown_func()
+
+; Don't remove redundant store because of unknown call.
+; CHECK-LABEL: @test30(
+; CHECK: bb3:
+; CHECK-NEXT: store i32 %v
+define i32 @test30(i1 %c, i32* %p, i32 %i) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br i1 %c, label %bb1, label %bb2
+bb1:
+  br label %bb3
+bb2:
+  ; Might overwrite value at %p
+  call void @unknown_func()
+  br label %bb3
+bb3:
+  store i32 %v, i32* %p, align 4
+  ret i32 0
+}
+
+; Remove redundant store if loaded value is in another block inside a loop.
+; CHECK-LABEL: @test31(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test31(i1 %c, i32* %p, i32 %i) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br label %bb1
+bb1:
+  store i32 %v, i32* %p, align 4
+  br i1 undef, label %bb1, label %bb2
+bb2:
+  ret i32 0
+}
+
+; Don't remove redundant store in a loop with a may-alias store.
+; CHECK-LABEL: @test32(
+; CHECK: bb1:
+; CHECK-NEXT: store i32 %v
+; CHECK-NEXT: call void @unknown_func
+define i32 @test32(i1 %c, i32* %p, i32 %i) {
+entry:
+  %v = load i32, i32* %p, align 4
+  br label %bb1
+bb1:
+  store i32 %v, i32* %p, align 4
+  ; Might read and overwrite value at %p
+  call void @unknown_func()
+  br i1 undef, label %bb1, label %bb2
+bb2:
+  ret i32 0
+}
+
+; Remove redundant store, which is in the lame loop as the load.
+; CHECK-LABEL: @test33(
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test33(i1 %c, i32* %p, i32 %i) {
+entry:
+  br label %bb1
+bb1:
+  %v = load i32, i32* %p, align 4
+  br label %bb2
+bb2:
+  store i32 %v, i32* %p, align 4
+  ; Might read and overwrite value at %p, but doesn't matter.
+  call void @unknown_func()
+  br i1 undef, label %bb1, label %bb3
+bb3:
+  ret i32 0
+}
+
diff --git a/test/Transforms/EarlyCSE/AArch64/ldstN.ll b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
new file mode 100644
index 000000000000..cc1af31429e1
--- /dev/null
+++ b/test/Transforms/EarlyCSE/AArch64/ldstN.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
+
+; Although the store and the ld4 are using the same pointer, the
+; data can not be reused because ld4 accesses multiple elements.
+define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
+entry:
+  store <4 x i16> undef, <4 x i16>* undef, align 8
+  %0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
+  ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
+; CHECK-LABEL: @foo(
+; CHECK: store
+; CHECK-NEXT: call
+; CHECK-NEXT: ret
+}
diff --git a/test/Transforms/EarlyCSE/atomics.ll b/test/Transforms/EarlyCSE/atomics.ll
new file mode 100644
index 000000000000..21c19cd8e880
--- /dev/null
+++ b/test/Transforms/EarlyCSE/atomics.ll
@@ -0,0 +1,259 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK-LABEL: @test12(
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+  %load0 = load i32, i32* %P1
+  %1 = load atomic i32, i32* %P2 seq_cst, align 4
+  %load1 = load i32, i32* %P1
+  %sel = select i1 %B, i32 %load0, i32 %load1
+  ret i32 %sel
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i1 %B, i32* %P1) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load i32, i32* %P1
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1
+  ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i1 %B, i32* %P1) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1 seq_cst
+  ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i1 %B, i32* %P1, i32* %P2) {
+  %a = load atomic i32, i32* %P1 seq_cst, align 4
+  %b = load atomic i32, i32* %P1 seq_cst, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load atomic i32, i32* %P1
+  ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example.  We just don't
+; do that right now.)
+define i32 @test16(i1 %B, i32* %P1, i32* %P2) {
+  %a = load i32, i32* %P1, align 4
+  %b = load atomic i32, i32* %P1 unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  store atomic i32 0, i32* %P2 seq_cst, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  fence seq_cst
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  call void asm sideeffect "", ""()
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+  store i32 0, i32* %P1, align 4
+  call void asm "", "~{memory}"()
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i1 %B, i32* %P1, i32* %P2) {
+  %a = load i32, i32* %P1, align 4
+  %b = load volatile i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @volatile_load
+  ; CHECK: load i32, i32* %P1
+  ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i1 %B, i32* %P1, i32* %P2) {
+  %a = load volatile i32, i32* %P1, align 4
+  %b = load volatile i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @redundant_volatile_load
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store
+; CHECK: store volatile
+; CHECK: store
+  store volatile i32 0, i32* %P1, align 4
+  store i32 3, i32* %P1, align 4
+  ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store
+; CHECK: store volatile
+; CHECK: store volatile
+  store volatile i32 0, i32* %P1, align 4
+  store volatile i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i1 %B, i32* %P1, i32* %P2) {
+  %a = load volatile i32, i32* %P1, align 4
+  %b = load i32, i32* %P1, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+  ; CHECK-LABEL: @test20
+  ; CHECK: load volatile i32, i32* %P1
+  ; CHECK: ret i32 0
+}
+
+; Can DSE a non-volatile store in favor of a volatile one
+; currently a missed optimization
+define void @test21(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test21
+; CHECK: store 
+; CHECK: store volatile
+  store i32 0, i32* %P1, align 4
+  store volatile i32 3, i32* %P1, align 4
+  ret void
+}
+
+; Can DSE a normal store in favor of a unordered one
+define void @test22(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test22
+; CHECK-NEXT: store atomic
+  store i32 0, i32* %P1, align 4
+  store atomic i32 3, i32* %P1 unordered, align 4
+  ret void
+}
+
+; Can also DSE a unordered store in favor of a normal one
+define void @test23(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test23
+; CHECK-NEXT: store i32 0
+  store atomic i32 3, i32* %P1 unordered, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; Note that we could remove the earlier store if we could
+; represent the required ordering.
+define void @test24(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test24
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store i32 0
+  store atomic i32 3, i32* %P1 release, align 4
+  store i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can't remove volatile stores - each is independently observable and
+; the count of such stores is an observable program side effect.
+define void @test25(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test25
+; CHECK-NEXT: store volatile
+; CHECK-NEXT: store volatile
+  store volatile i32 3, i32* %P1, align 4
+  store volatile i32 0, i32* %P1, align 4
+  ret void
+}
+
+; Can DSE a unordered store in favor of a unordered one
+define void @test26(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test26
+; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 unordered, align 4
+  ret void
+}
+
+; Can DSE a unordered store in favor of a ordered one,
+; but current don't due to implementation limits
+define void @test27(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test27
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 release, align 4
+  ret void
+}
+
+; Can DSE an unordered atomic store in favor of an
+; ordered one, but current don't due to implementation limits
+define void @test28(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test28
+; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
+; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %P1 unordered, align 4
+  store atomic i32 3, i32* %P1 release, align 4
+  ret void
+}
+
+; As an implementation limitation, can't remove ordered stores
+; see also: @test24
+define void @test29(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test29
+; CHECK-NEXT: store atomic
+; CHECK-NEXT: store atomic
+  store atomic i32 3, i32* %P1 release, align 4
+  store atomic i32 0, i32* %P1 unordered, align 4
+  ret void
+}
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 43b5e6098f6a..8c9b74b4d0e1 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -203,3 +203,77 @@ define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
   ; CHECK: load i32, i32* %P1
   ; CHECK: load i32, i32* %P1
 }
+
+define void @dse1(i32 *%P) {
+; CHECK-LABEL: @dse1
+; CHECK-NOT: store
+  %v = load i32, i32* %P
+  store i32 %v, i32* %P
+  ret void
+}
+
+define void @dse2(i32 *%P) {
+; CHECK-LABEL: @dse2
+; CHECK-NOT: store
+  %v = load atomic i32, i32* %P seq_cst, align 4
+  store i32 %v, i32* %P
+  ret void
+}
+
+define void @dse3(i32 *%P) {
+; CHECK-LABEL: @dse3
+; CHECK-NOT: store
+  %v = load atomic i32, i32* %P seq_cst, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  ret void
+}
+
+define i32 @dse4(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse4
+; CHECK-NOT: store
+; CHECK: ret i32 0
+  %a = load i32, i32* %Q
+  %v = load atomic i32, i32* %P unordered, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  %b = load i32, i32* %Q
+  %res = sub i32 %a, %b
+  ret i32 %res
+}
+
+; Note that in this example, %P and %Q could in fact be the same
+; pointer.  %v could be different than the value observed for %a
+; and that's okay because we're using relaxed memory ordering.  
+; The only guarantee we have to provide is that each of the loads 
+; has to observe some value written to that location.  We  do 
+; not have to respect the order in which those writes were done.  
+define i32 @dse5(i32 *%P, i32 *%Q) {
+; CHECK-LABEL: @dse5
+; CHECK-NOT: store
+; CHECK: ret i32 0
+  %v = load atomic i32, i32* %P unordered, align 4
+  %a = load atomic i32, i32* %Q unordered, align 4
+  store atomic i32 %v, i32* %P unordered, align 4
+  %b = load atomic i32, i32* %Q unordered, align 4
+  %res = sub i32 %a, %b
+  ret i32 %res
+}
+
+
+define void @dse_neg1(i32 *%P) {
+; CHECK-LABEL: @dse_neg1
+; CHECK: store
+  %v = load i32, i32* %P
+  store i32 5, i32* %P
+  ret void
+}
+
+; Could remove the store, but only if ordering was somehow
+; encoded.
+define void @dse_neg2(i32 *%P) {
+; CHECK-LABEL: @dse_neg2
+; CHECK: store
+  %v = load i32, i32* %P
+  store atomic i32 %v, i32* %P seq_cst, align 4
+  ret void
+}
+
diff --git a/test/Transforms/EarlyCSE/fence.ll b/test/Transforms/EarlyCSE/fence.ll
new file mode 100644
index 000000000000..c6d47e9fb22e
--- /dev/null
+++ b/test/Transforms/EarlyCSE/fence.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+; NOTE: This file is testing the current implementation.  Some of
+; the transforms used as negative tests below would be legal, but 
+; only if reached through a chain of logic which EarlyCSE is incapable
+; of performing.  To say it differently, this file tests a conservative
+; version of the memory model.  If we want to extend EarlyCSE to be more
+; aggressive in the future, we may need to relax some of the negative tests.
+
+; We can value forward across the fence since we can (semantically) 
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  %a = load i32, i32* %addr.i, align 4
+  ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test2
+; CHECK: load
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+  %a = load i32, i32* %addr.i, align 4
+  fence release
+  %a2 = load i32, i32* %addr.i, align 4
+  %res = sub i32 %a, %a2
+  ret i32 %a
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence.  If this thread observed the release 
+; had happened, we must present a consistent view of memory at the
+; fence.  Note that it would be legal to reorder '%a' after the fence
+; and then remove '%a2'.  The current implementation doesn't know how
+; to do this, but if it learned, this test will need revised.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: sub
+; CHECK: ret
+  %a = load i32, i32* %addr.i, align 4
+  fence acquire
+  %a2 = load i32, i32* %addr.i, align 4
+  %res = sub i32 %a, %a2
+  ret i32 %res
+}
+
+; We can not dead store eliminate accross the fence.  We could in 
+; principal reorder the second store above the fence and then DSE either
+; store, but this is beyond the simple last-store DSE which EarlyCSE
+; implements.
+define void @test4(i32* %addr.i) {
+; CHECK-LABEL: @test4
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
+
+; We *could* DSE across this fence, but don't.  No other thread can
+; observe the order of the acquire fence and the store.
+define void @test5(i32* %addr.i) {
+; CHECK-LABEL: @test5
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence acquire
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
diff --git a/test/Transforms/Float2Int/basic.ll b/test/Transforms/Float2Int/basic.ll
index f4d946914cd4..7f04a594dc80 100644
--- a/test/Transforms/Float2Int/basic.ll
+++ b/test/Transforms/Float2Int/basic.ll
@@ -254,3 +254,13 @@ define i32 @neg_calluser(i32 %value) {
   ret i32 %7
 }
 declare double @g(double)
+
+; CHECK-LABEL: @neg_vector
+; CHECK:  %1 = uitofp <4 x i8> %a to <4 x float>
+; CHECK:  %2 = fptoui <4 x float> %1 to <4 x i16>
+; CHECK:  ret <4 x i16> %2
+define <4 x i16> @neg_vector(<4 x i8> %a) {
+  %1 = uitofp <4 x i8> %a to <4 x float>
+  %2 = fptoui <4 x float> %1 to <4 x i16>
+  ret <4 x i16> %2
+}
diff --git a/test/Transforms/ForcedFunctionAttrs/forced.ll b/test/Transforms/ForcedFunctionAttrs/forced.ll
new file mode 100644
index 000000000000..a41e9c0efbe4
--- /dev/null
+++ b/test/Transforms/ForcedFunctionAttrs/forced.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -S -forceattrs | FileCheck %s --check-prefix=CHECK-CONTROL
+; RUN: opt < %s -S -forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+; RUN: opt < %s -S -passes=forceattrs -force-attribute foo:noinline | FileCheck %s --check-prefix=CHECK-FOO
+
+; CHECK-CONTROL: define void @foo() {
+; CHECK-FOO: define void @foo() #0 {
+define void @foo() {
+  ret void
+}
+
+
+; CHECK-FOO: attributes #0 = { noinline }
diff --git a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
index ca05d63743b5..b62698a776fb 100644
--- a/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
+++ b/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -10,15 +10,16 @@ define i32 @f() {
 	ret i32 %tmp
 }
 
-; CHECK: define i32 @g() #0
+; CHECK: define i32 @g() #1
 define i32 @g() readonly {
 	ret i32 0
 }
 
-; CHECK: define i32 @h() #0
+; CHECK: define i32 @h() #1
 define i32 @h() readnone {
 	%tmp = load i32, i32* @x		; <i32> [#uses=1]
 	ret i32 %tmp
 }
 
 ; CHECK: attributes #0 = { readnone }
+; CHECK: attributes #1 = { norecurse readnone }
diff --git a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll b/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
deleted file mode 100644
index fa06cc718a93..000000000000
--- a/test/Transforms/FunctionAttrs/2009-01-04-Annotate.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-
-; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) #0
-declare i8* @fopen(i8*, i8*)
-
-; CHECK: declare i8 @strlen(i8* nocapture) #1
-declare i8 @strlen(i8*)
-
-; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) #0
-declare i32* @realloc(i32*, i32)
-
-; Test deliberately wrong declaration
-declare i32 @strcpy(...)
-
-; CHECK-NOT: strcpy{{.*}}noalias
-; CHECK-NOT: strcpy{{.*}}nocapture
-; CHECK-NOT: strcpy{{.*}}nounwind
-; CHECK-NOT: strcpy{{.*}}readonly
-
-; CHECK: attributes #0 = { nounwind }
-; CHECK: attributes #1 = { nounwind readonly }
diff --git a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
index 1a64a8393804..23bb18e92b4c 100644
--- a/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
+++ b/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -4,7 +4,9 @@
 @g = constant i32 1
 
 define void @foo() {
-; CHECK: void @foo() {
+; CHECK: void @foo() #0 {
   %tmp = load volatile i32, i32* @g
   ret void
 }
+
+; CHECK: attributes #0 = { norecurse }
diff --git a/test/Transforms/FunctionAttrs/atomic.ll b/test/Transforms/FunctionAttrs/atomic.ll
index bb867011cc2a..dd915a6027f2 100644
--- a/test/Transforms/FunctionAttrs/atomic.ll
+++ b/test/Transforms/FunctionAttrs/atomic.ll
@@ -19,5 +19,5 @@ entry:
   ret i32 %r
 }
 
-; CHECK: attributes #0 = { readnone ssp uwtable }
-; CHECK: attributes #1 = { ssp uwtable }
+; CHECK: attributes #0 = { norecurse readnone ssp uwtable }
+; CHECK: attributes #1 = { norecurse ssp uwtable }
diff --git a/test/Transforms/FunctionAttrs/nonnull.ll b/test/Transforms/FunctionAttrs/nonnull.ll
new file mode 100644
index 000000000000..1fb64b7434ab
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/nonnull.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -functionattrs %s | FileCheck %s
+declare nonnull i8* @ret_nonnull()
+
+; Return a pointer trivially nonnull (call return attribute)
+define i8* @test1() {
+; CHECK: define nonnull i8* @test1
+  %ret = call i8* @ret_nonnull()
+  ret i8* %ret
+}
+
+; Return a pointer trivially nonnull (argument attribute)
+define i8* @test2(i8* nonnull %p) {
+; CHECK: define nonnull i8* @test2
+  ret i8* %p
+}
+
+; Given an SCC where one of the functions can not be marked nonnull,
+; can we still mark the other one which is trivially nonnull
+define i8* @scc_binder() {
+; CHECK: define i8* @scc_binder
+  call i8* @test3()
+  ret i8* null
+}
+
+define i8* @test3() {
+; CHECK: define nonnull i8* @test3
+  call i8* @scc_binder()
+  %ret = call i8* @ret_nonnull()
+  ret i8* %ret
+}
+
+; Given a mutual recursive set of functions, we can mark them
+; nonnull if neither can ever return null.  (In this case, they
+; just never return period.)
+define i8* @test4_helper() {
+; CHECK: define noalias nonnull i8* @test4_helper
+  %ret = call i8* @test4()
+  ret i8* %ret
+}
+
+define i8* @test4() {
+; CHECK: define noalias nonnull i8* @test4
+  %ret = call i8* @test4_helper()
+  ret i8* %ret
+}
+
+; Given a mutual recursive set of functions which *can* return null
+; make sure we haven't marked them as nonnull.
+define i8* @test5_helper() {
+; CHECK: define noalias i8* @test5_helper
+  %ret = call i8* @test5()
+  ret i8* null
+}
+
+define i8* @test5() {
+; CHECK: define noalias i8* @test5
+  %ret = call i8* @test5_helper()
+  ret i8* %ret
+}
+
+; Local analysis, but going through a self recursive phi
+define i8* @test6() {
+entry:
+; CHECK: define nonnull i8* @test6
+  %ret = call i8* @ret_nonnull()
+  br label %loop
+loop:
+  %phi = phi i8* [%ret, %entry], [%phi, %loop]
+  br i1 undef, label %loop, label %exit
+exit:
+  ret i8* %phi
+}
+
+
diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll
new file mode 100644
index 000000000000..47481191d278
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/norecurse.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
+
+; CHECK: define i32 @leaf() #0
+define i32 @leaf() {
+  ret i32 1
+}
+
+; CHECK: define i32 @self_rec() #1
+define i32 @self_rec() {
+  %a = call i32 @self_rec()
+  ret i32 4
+}
+
+; CHECK: define i32 @indirect_rec() #1
+define i32 @indirect_rec() {
+  %a = call i32 @indirect_rec2()
+  ret i32 %a
+}
+; CHECK: define i32 @indirect_rec2() #1
+define i32 @indirect_rec2() {
+  %a = call i32 @indirect_rec()
+  ret i32 %a
+}
+
+; CHECK: define i32 @extern() #1
+define i32 @extern() {
+  %a = call i32 @k()
+  ret i32 %a
+}
+declare i32 @k() readnone
+
+; CHECK: define internal i32 @called_by_norecurse() #0
+define internal i32 @called_by_norecurse() {
+  %a = call i32 @k()
+  ret i32 %a
+}
+define void @m() norecurse {
+  %a = call i32 @called_by_norecurse()
+  ret void
+}
+
+; CHECK: define internal i32 @called_by_norecurse_indirectly() #0
+define internal i32 @called_by_norecurse_indirectly() {
+  %a = call i32 @k()
+  ret i32 %a
+}
+define internal void @o() {
+  %a = call i32 @called_by_norecurse_indirectly()
+  ret void
+}
+define void @p() norecurse {
+  call void @o()
+  ret void
+}
+
+; CHECK: attributes #0 = { norecurse readnone }
+; CHECK: attributes #1 = { readnone }
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
index 7694bfe13aa5..441ff4da65ec 100644
--- a/test/Transforms/FunctionAttrs/optnone.ll
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -16,9 +16,11 @@ define void @test_optnone(i8* %p) noinline optnone {
 
 declare i8 @strlen(i8*) noinline optnone
 ; CHECK-LABEL: @strlen
-; CHECK: (i8*) #1
+; CHECK: (i8*) #2
 
 ; CHECK-LABEL: attributes #0
-; CHECK: = { readnone }
+; CHECK: = { norecurse readnone }
 ; CHECK-LABEL: attributes #1
+; CHECK: = { noinline norecurse optnone }
+; CHECK-LABEL: attributes #2
 ; CHECK: = { noinline optnone }
diff --git a/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
new file mode 100644
index 000000000000..db9a895f97ea
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/out-of-bounds-iterator-bug.ll
@@ -0,0 +1,30 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+
+; This checks for an iterator wraparound bug in FunctionAttrs.  The previous
+; "incorrect" behavior was inferring readonly for the %x argument in @caller.
+; Inferring readonly for %x *is* actually correct, since @va_func is marked
+; readonly, but FunctionAttrs was inferring readonly for the wrong reasons (and
+; we _need_ the readonly on @va_func to trigger the problematic code path).  It
+; is possible that in the future FunctionAttrs becomes smart enough to infer
+; readonly for %x for the right reasons, and at that point this test will have
+; to be marked invalid.
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+
+define void @va_func(i32* readonly %b, ...) readonly nounwind {
+; CHECK-LABEL: define void @va_func(i32* nocapture readonly %b, ...)
+ entry:
+  %valist = alloca i8
+  call void @llvm.va_start(i8* %valist)
+  call void @llvm.va_end(i8* %valist)
+  %x = call i32 @caller(i32* %b)
+  ret void
+}
+
+define i32 @caller(i32* %x) {
+; CHECK-LABEL: define i32 @caller(i32* nocapture %x)
+ entry:
+  call void(i32*,...) @va_func(i32* null, i32 0, i32 0, i32 0, i32* %x)
+  ret i32 42
+}
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index 7f22e6f2a2c5..aabdfe8d2005 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -65,3 +65,41 @@ entry:
   store i32 10, i32* %call, align 4
   ret void
 }
+
+; CHECK: declare void @llvm.masked.scatter
+declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
+
+; CHECK-NOT: readnone
+; CHECK-NOT: readonly
+; CHECK: define void @test9
+define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
+  call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
+  ret void
+}
+
+; CHECK: declare <4 x i32> @llvm.masked.gather
+declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+; CHECK: readonly
+; CHECK: define <4 x i32> @test10
+define <4 x i32> @test10(<4 x i32*> %ptrs) {
+  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
+  ret <4 x i32> %res
+}
+
+; CHECK: declare <4 x i32> @test11_1
+declare <4 x i32> @test11_1(<4 x i32*>) argmemonly nounwind readonly
+; CHECK: readonly
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test11_2
+define <4 x i32> @test11_2(<4 x i32*> %ptrs) {
+  %res = call <4 x i32> @test11_1(<4 x i32*> %ptrs)
+  ret <4 x i32> %res
+}
+
+declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind
+; CHECK-NOT: readnone
+; CHECK: define <4 x i32> @test12_2
+define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
+  %res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
+  ret <4 x i32> %res
+}
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport.ll b/test/Transforms/FunctionImport/Inputs/funcimport.ll
new file mode 100644
index 000000000000..96555892fe3c
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport.ll
@@ -0,0 +1,87 @@
+@globalvar = global i32 1, align 4
+@staticvar = internal global i32 1, align 4
+@staticconstvar = internal unnamed_addr constant [2 x i32] [i32 10, i32 20], align 4
+@commonvar = common global i32 0, align 4
+@P = internal global void ()* null, align 8
+
+@weakalias = weak alias void (...), bitcast (void ()* @globalfunc1 to void (...)*)
+@analias = alias void (...), bitcast (void ()* @globalfunc2 to void (...)*)
+@linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*)
+
+define void @globalfunc1() #0 {
+entry:
+  ret void
+}
+
+define void @globalfunc2() #0 {
+entry:
+  ret void
+}
+
+define linkonce_odr void @linkoncefunc() #0 {
+entry:
+  ret void
+}
+
+define i32 @referencestatics(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %call = call i32 @staticfunc()
+  %0 = load i32, i32* @staticvar, align 4
+  %add = add nsw i32 %call, %0
+  %1 = load i32, i32* %i.addr, align 4
+  %idxprom = sext i32 %1 to i64
+  %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* @staticconstvar, i64 0, i64 %idxprom
+  %2 = load i32, i32* %arrayidx, align 4
+  %add1 = add nsw i32 %add, %2
+  ret i32 %add1
+}
+
+define i32 @referenceglobals(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  call void @globalfunc1()
+  %0 = load i32, i32* @globalvar, align 4
+  ret i32 %0
+}
+
+define i32 @referencecommon(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* @commonvar, align 4
+  ret i32 %0
+}
+
+define void @setfuncptr() #0 {
+entry:
+  store void ()* @staticfunc2, void ()** @P, align 8
+  ret void
+}
+
+define void @callfuncptr() #0 {
+entry:
+  %0 = load void ()*, void ()** @P, align 8
+  call void %0()
+  ret void
+}
+
+@weakvar = weak global i32 1, align 4
+define weak void @weakfunc() #0 {
+entry:
+  ret void
+}
+
+define internal i32 @staticfunc() #0 {
+entry:
+  ret i32 1
+}
+
+define internal void @staticfunc2() #0 {
+entry:
+  ret void
+}
+
+
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
new file mode 100644
index 000000000000..35c62a262903
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
@@ -0,0 +1,27 @@
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define void @func() #0 !dbg !4 {
+entry:
+    ret void, !dbg !10
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/Transforms/FunctionImport/funcimport.ll b/test/Transforms/FunctionImport/funcimport.ll
new file mode 100644
index 000000000000..c099b9766477
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport.ll
@@ -0,0 +1,75 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIMDEF
+
+; Test import with smaller instruction limit
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -import-instr-limit=5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM5
+; INSTLIM5-NOT: @staticfunc.llvm.2
+
+define i32 @main() #0 {
+entry:
+  call void (...) @weakalias()
+  call void (...) @analias()
+  call void (...) @linkoncealias()
+  %call = call i32 (...) @referencestatics()
+  %call1 = call i32 (...) @referenceglobals()
+  %call2 = call i32 (...) @referencecommon()
+  call void (...) @setfuncptr()
+  call void (...) @callfuncptr()
+  call void (...) @weakfunc()
+  ret i32 0
+}
+
+; Won't import weak alias
+; CHECK-DAG: declare void @weakalias
+declare void @weakalias(...) #1
+
+; Cannot create an alias to available_externally
+; CHECK-DAG: declare void @analias
+declare void @analias(...) #1
+
+; Aliases import the aliasee function
+declare void @linkoncealias(...) #1
+; CHECK-DAG: define linkonce_odr void @linkoncefunc()
+; CHECK-DAG: @linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*
+
+; INSTLIMDEF-DAG: define available_externally i32 @referencestatics(i32 %i)
+; INSTLIM5-DAG: declare i32 @referencestatics(...)
+declare i32 @referencestatics(...) #1
+
+; The import of referencestatics will expose call to staticfunc that
+; should in turn be imported as a promoted/renamed and hidden function.
+; Ensure that the call is to the properly-renamed function.
+; INSTLIMDEF-DAG: %call = call i32 @staticfunc.llvm.2()
+; INSTLIMDEF-DAG: define available_externally hidden i32 @staticfunc.llvm.2()
+
+; CHECK-DAG: define available_externally i32 @referenceglobals(i32 %i)
+declare i32 @referenceglobals(...) #1
+
+; The import of referenceglobals will expose call to globalfunc1 that
+; should in turn be imported.
+; CHECK-DAG: define available_externally void @globalfunc1()
+
+; CHECK-DAG: define available_externally i32 @referencecommon(i32 %i)
+declare i32 @referencecommon(...) #1
+
+; CHECK-DAG: define available_externally void @setfuncptr()
+declare void @setfuncptr(...) #1
+
+; CHECK-DAG: define available_externally void @callfuncptr()
+declare void @callfuncptr(...) #1
+
+; Ensure that all uses of local variable @P which has used in setfuncptr
+; and callfuncptr are to the same promoted/renamed global.
+; CHECK-DAG: @P.llvm.2 = available_externally hidden global void ()* null
+; CHECK-DAG: %0 = load void ()*, void ()** @P.llvm.2,
+; CHECK-DAG: store void ()* @staticfunc2.llvm.2, void ()** @P.llvm.2,
+
+; Won't import weak func
+; CHECK-DAG: declare void @weakfunc(...)
+declare void @weakfunc(...) #1
+
diff --git a/test/Transforms/FunctionImport/funcimport_debug.ll b/test/Transforms/FunctionImport/funcimport_debug.ll
new file mode 100644
index 000000000000..c57b5e14af1b
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport_debug.ll
@@ -0,0 +1,45 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_debug.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now and confirm that metadata is linked for imported function.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+
+; CHECK: define available_externally void @func()
+; CHECK: distinct !DISubprogram(name: "main"
+; CHECK: distinct !DISubprogram(name: "func"
+
+; ModuleID = 'funcimport_debug.o'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !4 {
+entry:
+  call void (...) @func(), !dbg !11
+  ret i32 0, !dbg !12
+}
+
+declare void @func(...) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)"}
+!11 = !DILocation(line: 3, column: 3, scope: !4)
+!12 = !DILocation(line: 4, column: 1, scope: !4)
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
index 5a704e4d047b..f94d5ad30bbc 100644
--- a/test/Transforms/GCOVProfiling/function-numbering.ll
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -22,16 +22,16 @@ target triple = "x86_64-apple-macosx10.10.0"
 ; GCNO-NOT: == bar ({{[0-9]+}}) @
 ; GCNO: == baz (1) @
 
-define void @foo() {
+define void @foo() !dbg !4 {
   ret void, !dbg !12
 }
 
-define void @bar() {
+define void @bar() !dbg !7 {
   ; This function is referenced by the debug info, but no lines have locations.
   ret void
 }
 
-define void @baz() {
+define void @baz() !dbg !8 {
   ret void, !dbg !13
 }
 
@@ -40,15 +40,15 @@ define void @baz() {
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
 !2 = !{}
 !3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: void ()* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: void ()* @bar, variables: !2)
-!8 = !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: void ()* @baz, variables: !2)
+!7 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !9 = !{i32 2, !"Dwarf Version", i32 2}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.6.0 "}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 29c46d6c2107..47600c7bfcad 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -8,7 +8,7 @@
 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
 
 ; Function Attrs: nounwind
-define internal void @__cxx_global_var_init() #0 section ".text.startup" {
+define internal void @__cxx_global_var_init() #0 section ".text.startup" !dbg !4 {
 entry:
   br label %0
 
@@ -38,15 +38,15 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
 !llvm.gcov = !{!16}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, function: void ()* @__cxx_global_var_init, variables: !2)
+!4 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
 !5 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
 !6 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
 !7 = !DISubroutineType(types: !2)
-!8 = !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, function: void ()* @_GLOBAL__sub_I_global-ctor.ll, variables: !2)
+!8 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, variables: !2)
 !9 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index 9e172b752d78..e071c4e6dbf7 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-unknown-linux-gnu"
 %struct.vector = type { i8 }
 
 ; Function Attrs: nounwind
-define i32 @_Z4testv() #0 {
+define i32 @_Z4testv() #0 !dbg !15 {
 entry:
   %retval = alloca i32, align 4
   %__range = alloca %struct.vector*, align 8
@@ -75,7 +75,7 @@ declare i8* @_ZN6vector3endEv(%struct.vector*) #2
 declare void @llvm.trap() #3
 
 ; Function Attrs: nounwind
-define void @_Z2f1v() #0 {
+define void @_Z2f1v() #0 !dbg !20 {
 entry:
   br label %0
 
@@ -93,7 +93,7 @@ attributes #3 = { noreturn nounwind }
 !llvm.gcov = !{!25}
 !llvm.ident = !{!26}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "PATTERN")
 !2 = !{}
 !3 = !{!4}
@@ -108,29 +108,29 @@ attributes #3 = { noreturn nounwind }
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS6vector")
 !13 = !DISubprogram(name: "end", linkageName: "_ZN6vector3endEv", line: 26, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 26, file: !5, scope: !"_ZTS6vector", type: !8)
 !14 = !{!15, !20}
-!15 = !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, function: i32 ()* @_Z4testv, variables: !2)
+!15 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, variables: !2)
 !16 = !DIFile(filename: "linezero.cc", directory: "PATTERN")
 !17 = !DISubroutineType(types: !18)
 !18 = !{!19}
 !19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, function: void ()* @_Z2f1v, variables: !2)
+!20 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, variables: !2)
 !21 = !DISubroutineType(types: !22)
 !22 = !{null}
 !23 = !{i32 2, !"Dwarf Version", i32 4}
 !24 = !{i32 2, !"Debug Info Version", i32 3}
 !25 = !{!"PATTERN/linezero.o", !0}
 !26 = !{!"clang version 3.5.0 (trunk 209871)"}
-!27 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
+!27 = !DILocalVariable(name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
 !28 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !15)
 !29 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !"_ZTS6vector")
 !30 = !DILocation(line: 0, scope: !28)
 !31 = !DILocation(line: 51, scope: !28)
-!32 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
-!33 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
+!32 = !DILocalVariable(name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
+!33 = !DILocalVariable(name: "__end", flags: DIFlagArtificial, scope: !28, type: !10)
 !34 = !DILocation(line: 51, scope: !35)
 !35 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !36)
 !36 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
-!37 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "spec", line: 51, scope: !28, file: !16, type: !11)
+!37 = !DILocalVariable(name: "spec", line: 51, scope: !28, file: !16, type: !11)
 !38 = !DILocation(line: 51, scope: !39)
 !39 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !28)
 !40 = !DILocation(line: 51, scope: !41)
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 7a4119802c9a..65830bf78025 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -4,7 +4,7 @@
 ; RUN: grep _Z3foov %T/linkagename.gcno
 ; RUN: rm %T/linkagename.gcno
 
-define void @_Z3foov() {
+define void @_Z3foov() !dbg !5 {
 entry:
   ret void, !dbg !8
 }
@@ -13,12 +13,12 @@ entry:
 !llvm.module.flags = !{!10}
 !llvm.gcov = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports:  !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports:  !3)
 !1 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
 !2 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
 !3 = !{}
 !4 = !{!5}
-!5 = !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, function: void ()* @_Z3foov, variables: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, variables: !3)
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
 !8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
index 38b5b75e3c2d..9b502a14bfa2 100644
--- a/test/Transforms/GCOVProfiling/return-block.ll
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @A = common global i32 0, align 4
 
 ; Function Attrs: nounwind uwtable
-define void @test() #0 {
+define void @test() #0 !dbg !4 {
 entry:
   tail call void (...) @f() #2, !dbg !14
   %0 = load i32, i32* @A, align 4, !dbg !15
@@ -44,11 +44,11 @@ attributes #2 = { nounwind }
 !llvm.module.flags = !{!11, !12}
 !llvm.ident = !{!13}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
 !1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, function: void ()* @test, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 487e72123cb0..67bfb3c97612 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -8,7 +8,7 @@
 ; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
 ; RUN: rm %T/version.gcno
 
-define void @test() {
+define void @test() !dbg !5 {
   ret void, !dbg !8
 }
 
@@ -16,11 +16,11 @@ define void @test() {
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
 !2 = !DIFile(filename: "version", directory: "/usr/local/google/home/nlewycky")
 !3 = !{}
 !4 = !{!5}
-!5 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, function: void ()* @test, variables: !3)
+!5 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, variables: !3)
 !6 = !DIFile(filename: "<stdin>", directory: ".")
 !7 = !DISubroutineType(types: !{null})
 !8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
index fdf17e0b46df..a0cf92989b81 100644
--- a/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
+++ b/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -17,20 +17,20 @@ target triple = "i386-pc-linux-gnu"
 	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
 	%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
 
-@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL20__gthrw_pthread_oncePiPFvvE = weak alias i32 (i32*, void ()*), i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = weak alias i8* (i32), i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = weak alias i32 (i32, i8*), i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = weak alias i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*), i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = weak alias i32 (i32), i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = weak alias i32 (%struct.pthread_mutex_t*), i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = weak alias i32 (%struct.pthread_mutex_t*, %struct.__sched_param*), i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = weak alias i32 (i32*, void (i8*)*), i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = weak alias i32 (i32), i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = weak alias i32 (%struct.__sched_param*, i32), i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = weak alias i32 (%struct.__sched_param*), i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
 
 declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
 
diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll
new file mode 100644
index 000000000000..f9304a8fc7c6
--- /dev/null
+++ b/test/Transforms/GVN/assume-equal.ll
@@ -0,0 +1,235 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3fooEv to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A3barEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+; Checks if indirect calls can be replaced with direct
+; assuming that %vtable == @_ZTV1A (with alignment).
+; Checking const propagation across other BBs
+; CHECK-LABEL: define void @_Z1gb(
+
+define void @_Z1gb(i1 zeroext %p) {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) #4
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  br i1 %p, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+  ; CHECK: call i32 @_ZN1A3fooEv(
+  %call2 = tail call i32 %2(%struct.A* %0) #1
+  
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+  %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3barEv(
+  %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+  
+  %call5 = tail call i32 %3(%struct.A* %0) #1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+; Check integration with invariant.group handling
+; CHECK-LABEL: define void @invariantGroupHandling(i1 zeroext %p) {
+define void @invariantGroupHandling(i1 zeroext %p) {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) #4
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) #1
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8, !invariant.group !0
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  br i1 %p, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+; CHECK: call i32 @_ZN1A3fooEv(
+  %call2 = tail call i32 %2(%struct.A* %0) #1
+  %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
+  %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
+  %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
+; FIXME: those loads could be also direct, but right now the invariant.group
+; analysis works only on single block
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+  %callx = tail call i32 %call1(%struct.A* %0) #1
+  
+  %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
+  %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
+  %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+  %cally = tail call i32 %call4(%struct.A* %0) #1
+  
+  %b = bitcast i8* %call to %struct.A**
+  %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
+  %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
+  %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
+; CHECK-NOT: call i32 @_ZN1A3fooEv(
+  %unknown = tail call i32 %vfun(%struct.A* %0) #1
+  
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %vfn47 = getelementptr inbounds i8*, i8** %vtable, i64 1
+  %vfn4 = bitcast i8** %vfn47 to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3barEv(
+  %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn4, align 8
+  
+  %call5 = tail call i32 %3(%struct.A* %0) #1
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+
+; Checking const propagation in the same BB
+; CHECK-LABEL: define i32 @main()
+
+define i32 @main() {
+entry:
+  %call = tail call noalias i8* @_Znwm(i64 8) 
+  %0 = bitcast i8* %call to %struct.A*
+  tail call void @_ZN1AC1Ev(%struct.A* %0) 
+  %1 = bitcast i8* %call to i8***
+  %vtable = load i8**, i8*** %1, align 8
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV1A, i64 0, i64 2)
+  tail call void @llvm.assume(i1 %cmp.vtables)
+  %vtable1.cast = bitcast i8** %vtable to i32 (%struct.A*)**
+  
+  ; CHECK: call i32 @_ZN1A3fooEv(
+  %2 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable1.cast, align 8
+  
+  %call2 = tail call i32 %2(%struct.A* %0)
+  ret i32 0
+}
+
+; This tests checks const propatation with fcmp instruction.
+; CHECK-LABEL: define float @_Z1gf(float %p)
+
+define float @_Z1gf(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  %f = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  store float 3.000000e+00, float* %f, align 4
+  %0 = load float, float* %p.addr, align 4
+  %1 = load float, float* %f, align 4
+  %cmp = fcmp oeq float %1, %0 ; note const on lhs
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: ret float 3.000000e+00
+  ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1hf(float %p)
+
+define float @_Z1hf(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  %0 = load float, float* %p.addr, align 4
+  %cmp = fcmp nnan ueq float %0, 3.000000e+00
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: ret float 3.000000e+00
+  ret float %0
+}
+
+; CHECK-LABEL: define float @_Z1if(float %p)
+define float @_Z1if(float %p) {
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+  
+  %0 = load float, float* %p.addr, align 4
+  %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK-NOT: ret float 3.000000e+00
+  ret float %0
+}
+
+; This test checks if constant propagation works for multiple node edges
+; CHECK-LABEL: define i32 @_Z1ii(i32 %p)
+define i32 @_Z1ii(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+  call void @llvm.assume(i1 true)
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+  
+  ; CHECK: ret i32 42
+  ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ij(i32 %p)
+define i32 @_Z1ij(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+   ; CHECK-NOT: %cmp2 = 
+  %cmp2 = icmp eq i32 %p, 42
+  ; CHECK-NOT: call void @llvm.assume(
+  call void @llvm.assume(i1 %cmp2)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb2
+  br i1 %cmp, label %bb2, label %bb2
+  
+  ; CHECK: ret i32 42
+  ret i32 %p
+}
+
+; CHECK-LABEL: define i32 @_Z1ik(i32 %p)
+define i32 @_Z1ik(i32 %p) {
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+  
+  ; CHECK: br i1 true, label %bb2, label %bb3
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+  ; CHECK-NOT: %cmp3 = 
+  %cmp3 = icmp eq i32 %p, 43
+  ; CHECK: store i8 undef, i8* null
+  call void @llvm.assume(i1 %cmp3)
+  ret i32 15
+bb3:
+  ret i32 17
+}
+
+declare noalias i8* @_Znwm(i64)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare void @llvm.assume(i1)
+declare i32 @_ZN1A3fooEv(%struct.A*)
+declare i32 @_ZN1A3barEv(%struct.A*)
+
+!0 = !{!"struct A"}
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
index f076a8d81ace..0d09ecedc6ac 100644
--- a/test/Transforms/GVN/crash-no-aa.ll
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -1,4 +1,4 @@
-; RUN: opt -no-aa -gvn -S < %s
+; RUN: opt -disable-basicaa -gvn -S < %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-freebsd8.0"
diff --git a/test/Transforms/GVN/funclet.ll b/test/Transforms/GVN/funclet.ll
new file mode 100644
index 000000000000..2669256f0bdc
--- /dev/null
+++ b/test/Transforms/GVN/funclet.ll
@@ -0,0 +1,44 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+%eh.ThrowInfo = type { i32, i8*, i8*, i8* }
+%struct.A = type { i32* }
+
+@"_TI1?AUA@@" = external constant %eh.ThrowInfo
+
+define i8 @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %b = alloca i8
+  %c = alloca i8
+  store i8 42, i8* %b
+  store i8 13, i8* %c
+  invoke void @_CxxThrowException(i8* %b, %eh.ThrowInfo* nonnull @"_TI1?AUA@@")
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %catchpad = catchpad within %cs1 [i8* null, i32 64, i8* null]
+  store i8 5, i8* %b
+  catchret from %catchpad to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  %load_b = load i8, i8* %b
+  %load_c = load i8, i8* %c
+  %add = add i8 %load_b, %load_c
+  ret i8 %add
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+; CHECK-LABEL: define i8 @f(
+; CHECK:       %[[load_b:.*]] = load i8, i8* %b
+; CHECK-NEXT:  %[[load_c:.*]] = load i8, i8* %c
+; CHECK-NEXT:  %[[add:.*]] = add i8 %[[load_b]], %[[load_c]]
+; CHECK-NEXT:  ret i8 %[[add]]
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, %eh.ThrowInfo*)
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/invariant-load.ll
index 982da8cfe486..f74fd3392c18 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/invariant-load.ll
@@ -114,6 +114,23 @@ entry:
   ret i32 %res
 }
 
+define i32 @test8(i1 %cnd, i32* %p) {
+; CHECK-LABEL: test8
+; CHECK: @bar
+; CHECK: load i32, i32* %p2, !invariant.load
+; CHECK: br label %merge
+entry:
+  %v1 = load i32, i32* %p, !invariant.load !0
+  br i1 %cnd, label %taken, label %merge
+taken:
+  %p2 = call i32* (...) @bar(i32* %p)
+  br label %merge
+merge:
+  %p3 = phi i32* [%p, %entry], [%p2, %taken]
+  %v2 = load i32, i32* %p3, !invariant.load !0
+  %res = sub i32 %v1, %v2
+  ret i32 %res
+}
 
 !0 = !{ }
 
diff --git a/test/Transforms/GVN/invariant.group.ll b/test/Transforms/GVN/invariant.group.ll
new file mode 100644
index 000000000000..f703fda93f23
--- /dev/null
+++ b/test/Transforms/GVN/invariant.group.ll
@@ -0,0 +1,337 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+%struct.A = type { i32 (...)** }
+@_ZTV1A = available_externally unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast (i8** @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1A3fooEv to i8*)], align 8
+@_ZTI1A = external constant i8*
+
+@unknownPtr = external global i8
+
+; CHECK-LABEL: define i8 @simple() {
+define i8 @simple() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    call void @foo(i8* %ptr)
+
+    %a = load i8, i8* %ptr, !invariant.group !0
+    %b = load i8, i8* %ptr, !invariant.group !0
+    %c = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 42
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable1() {
+define i8 @optimizable1() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+    %a = load i8, i8* %ptr, !invariant.group !0
+    
+    call void @foo(i8* %ptr2); call to use %ptr2
+; CHECK: ret i8 42
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @optimizable2() {
+define i8 @optimizable2() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    call void @foo(i8* %ptr)
+    
+    store i8 13, i8* %ptr ; can't use this store with invariant.group
+    %a = load i8, i8* %ptr 
+    call void @bar(i8 %a) ; call to use %a
+    
+    call void @foo(i8* %ptr)
+    %b = load i8, i8* %ptr, !invariant.group !0
+    
+; CHECK: ret i8 42
+    ret i8 %b
+}
+
+; CHECK-LABEL: define i8 @unoptimizable1() {
+define i8 @unoptimizable1() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr
+    call void @foo(i8* %ptr)
+    %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK: ret i8 %a
+    ret i8 %a
+}
+
+; CHECK-LABEL: define void @indirectLoads() {
+define void @indirectLoads() {
+entry:
+  %a = alloca %struct.A*, align 8
+  %0 = bitcast %struct.A** %a to i8*
+  
+  %call = call i8* @getPointer(i8* null) 
+  %1 = bitcast i8* %call to %struct.A*
+  call void @_ZN1AC1Ev(%struct.A* %1)
+  %2 = bitcast %struct.A* %1 to i8***
+  
+; CHECK: %vtable = load {{.*}} !invariant.group
+  %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+  call void @llvm.assume(i1 %cmp.vtables)
+  
+  store %struct.A* %1, %struct.A** %a, align 8
+  %3 = load %struct.A*, %struct.A** %a, align 8
+  %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+  %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+  %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+  %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+  call void %5(%struct.A* %3)
+  %6 = load %struct.A*, %struct.A** %a, align 8
+  %7 = bitcast %struct.A* %6 to void (%struct.A*)***
+
+; CHECK: call void @_ZN1A3fooEv(
+  %vtable2 = load void (%struct.A*)**, void (%struct.A*)*** %7, align 8, !invariant.group !2
+  %vfn3 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable2, i64 0
+  %8 = load void (%struct.A*)*, void (%struct.A*)** %vfn3, align 8
+  
+  call void %8(%struct.A* %6)
+  %9 = load %struct.A*, %struct.A** %a, align 8
+  %10 = bitcast %struct.A* %9 to void (%struct.A*)***
+  
+  %vtable4 = load void (%struct.A*)**, void (%struct.A*)*** %10, align 8, !invariant.group !2
+  %vfn5 = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable4, i64 0
+  %11 = load void (%struct.A*)*, void (%struct.A*)** %vfn5, align 8
+; CHECK: call void @_ZN1A3fooEv(
+  call void %11(%struct.A* %9)
+ 
+  %vtable5 = load i8**, i8*** %2, align 8, !invariant.group !2
+  %vfn6 = getelementptr inbounds i8*, i8** %vtable5, i64 0
+  %12 = bitcast i8** %vfn6 to void (%struct.A*)**
+  %13 = load void (%struct.A*)*, void (%struct.A*)** %12, align 8
+; CHECK: call void @_ZN1A3fooEv(
+  call void %13(%struct.A* %9)
+  
+  ret void
+}
+
+; CHECK-LABEL: define void @combiningBitCastWithLoad() {
+define void @combiningBitCastWithLoad() {
+entry:
+  %a = alloca %struct.A*, align 8
+  %0 = bitcast %struct.A** %a to i8*
+  
+  %call = call i8* @getPointer(i8* null) 
+  %1 = bitcast i8* %call to %struct.A*
+  call void @_ZN1AC1Ev(%struct.A* %1)
+  %2 = bitcast %struct.A* %1 to i8***
+  
+; CHECK: %vtable = load {{.*}} !invariant.group
+  %vtable = load i8**, i8*** %2, align 8, !invariant.group !2
+  %cmp.vtables = icmp eq i8** %vtable, getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2)
+  
+  store %struct.A* %1, %struct.A** %a, align 8
+; CHECK-NOT: !invariant.group
+  %3 = load %struct.A*, %struct.A** %a, align 8
+  %4 = bitcast %struct.A* %3 to void (%struct.A*)***
+
+  %vtable1 = load void (%struct.A*)**, void (%struct.A*)*** %4, align 8, !invariant.group !2
+  %vfn = getelementptr inbounds void (%struct.A*)*, void (%struct.A*)** %vtable1, i64 0
+  %5 = load void (%struct.A*)*, void (%struct.A*)** %vfn, align 8
+  call void %5(%struct.A* %3)
+
+  ret void
+}
+
+; CHECK-LABEL:define void @loadCombine() {
+define void @loadCombine() {
+enter:
+  %ptr = alloca i8
+  store i8 42, i8* %ptr
+  call void @foo(i8* %ptr)
+; CHECK: %[[A:.*]] = load i8, i8* %ptr, !invariant.group
+  %a = load i8, i8* %ptr, !invariant.group !0
+; CHECK-NOT: load
+  %b = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[A]])
+  call void @bar(i8 %a)
+; CHECK: call void @bar(i8 %[[A]])
+  call void @bar(i8 %b)
+  ret void
+}
+
+; CHECK-LABEL: define void @loadCombine1() {
+define void @loadCombine1() {
+enter:
+  %ptr = alloca i8
+  store i8 42, i8* %ptr
+  call void @foo(i8* %ptr)
+; CHECK: %[[D:.*]] = load i8, i8* %ptr, !invariant.group
+  %c = load i8, i8* %ptr
+; CHECK-NOT: load
+  %d = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[D]])
+  call void @bar(i8 %c)
+; CHECK: call void @bar(i8 %[[D]])
+  call void @bar(i8 %d)
+  ret void
+}
+
+; CHECK-LABEL: define void @loadCombine2() {    
+define void @loadCombine2() {
+enter:
+  %ptr = alloca i8
+  store i8 42, i8* %ptr
+  call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group
+  %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+  %f = load i8, i8* %ptr
+; CHECK: call void @bar(i8 %[[E]])
+  call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+  call void @bar(i8 %f)
+  ret void
+}
+
+; CHECK-LABEL: define void @loadCombine3() {
+define void @loadCombine3() {
+enter:
+  %ptr = alloca i8
+  store i8 42, i8* %ptr
+  call void @foo(i8* %ptr)
+; CHECK: %[[E:.*]] = load i8, i8* %ptr, !invariant.group ![[OneMD:[0-9]]]
+  %e = load i8, i8* %ptr, !invariant.group !1
+; CHECK-NOT: load
+  %f = load i8, i8* %ptr, !invariant.group !1
+; CHECK: call void @bar(i8 %[[E]])
+  call void @bar(i8 %e)
+; CHECK: call void @bar(i8 %[[E]])
+  call void @bar(i8 %f)
+  ret void
+}
+
+; CHECK-LABEL: define i8 @unoptimizable2() {
+define i8 @unoptimizable2() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr
+    call void @foo(i8* %ptr)
+    %a = load i8, i8* %ptr
+    call void @foo(i8* %ptr)
+    %b = load i8, i8* %ptr, !invariant.group !0
+    
+; CHECK: ret i8 %a
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable3() {
+define i8 @unoptimizable3() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    %ptr2 = call i8* @getPointer(i8* %ptr)
+    %a = load i8, i8* %ptr2, !invariant.group !0
+    
+; CHECK: ret i8 %a
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @unoptimizable4() {
+define i8 @unoptimizable4() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    %ptr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+    %a = load i8, i8* %ptr2, !invariant.group !0
+    
+; CHECK: ret i8 %a
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile1() {
+define i8 @volatile1() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    call void @foo(i8* %ptr)
+    %a = load i8, i8* %ptr, !invariant.group !0
+    %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+    call void @bar(i8 %b)
+
+    %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+    call void @bar(i8 %c)
+; CHECK: ret i8 42
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @volatile2() {
+define i8 @volatile2() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    call void @foo(i8* %ptr)
+    %a = load i8, i8* %ptr, !invariant.group !0
+    %b = load volatile i8, i8* %ptr
+; CHECK: call void @bar(i8 %b)
+    call void @bar(i8 %b)
+
+    %c = load volatile i8, i8* %ptr, !invariant.group !0
+; FIXME: we could change %c to 42, preserving volatile load
+; CHECK: call void @bar(i8 %c)
+    call void @bar(i8 %c)
+; CHECK: ret i8 42
+    ret i8 %a
+}
+
+; CHECK-LABEL: define i8 @fun() {
+define i8 @fun() {
+entry:
+    %ptr = alloca i8
+    store i8 42, i8* %ptr, !invariant.group !0
+    call void @foo(i8* %ptr)
+
+    %a = load i8, i8* %ptr, !invariant.group !0 ; Can assume that value under %ptr didn't change
+; CHECK: call void @bar(i8 42)
+    call void @bar(i8 %a)
+    
+    call void @foo(i8* %ptr)
+    %b = load i8, i8* %ptr, !invariant.group !1 ; Can't assume anything, because group changed
+; CHECK: call void @bar(i8 %b)
+    call void @bar(i8 %b)
+    
+    %newPtr = call i8* @getPointer(i8* %ptr) 
+    %c = load i8, i8* %newPtr, !invariant.group !0 ; Can't assume anything, because we only have information about %ptr
+; CHECK: call void @bar(i8 %c)
+    call void @bar(i8 %c)
+    
+    %unknownValue = load i8, i8* @unknownPtr
+; FIXME: Can assume that %unknownValue == 42
+; CHECK: store i8 %unknownValue, i8* %ptr, !invariant.group !0
+    store i8 %unknownValue, i8* %ptr, !invariant.group !0 
+
+    %newPtr2 = call i8* @llvm.invariant.group.barrier(i8* %ptr)
+    %d = load i8, i8* %newPtr2, !invariant.group !0  ; Can't step through invariant.group.barrier to get value of %ptr
+; CHECK: ret i8 %d
+    ret i8 %d
+}
+
+declare void @foo(i8*)
+declare void @bar(i8)
+declare i8* @getPointer(i8*)
+declare void @_ZN1A3fooEv(%struct.A*)
+declare void @_ZN1AC1Ev(%struct.A*)
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1 %cmp.vtables) #0
+
+
+attributes #0 = { nounwind }
+; CHECK: ![[OneMD]] = !{!"other ptr"}
+!0 = !{!"magic ptr"}
+!1 = !{!"other ptr"}
+!2 = !{!"vtable_of_a"}
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/load-pre-nonlocal.ll
index e9827a158ade..e0e886653076 100644
--- a/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/test/Transforms/GVN/load-pre-nonlocal.ll
@@ -61,7 +61,7 @@ for.end:
 ; CHECK-NOT: %1 = load i32, i32*
 ; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
 
-define i32 @overaligned_load(i32 %a, i32* nocapture %b) {
+define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 {
 entry:
   %cmp = icmp sgt i32 %a, 0, !dbg !14
   br i1 %cmp, label %if.then, label %if.else, !dbg !14
@@ -99,7 +99,7 @@ if.end:
 !10 = !{}
 !11 = !DISubroutineType(types: !10)
 !12 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!13 = !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32*)* @overaligned_load, variables: !10)
+!13 = distinct !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !10)
 !14 = !DILocation(line: 100, column: 1, scope: !13)
 !15 = !DILocation(line: 101, column: 1, scope: !13)
 !16 = !DILocation(line: 102, column: 1, scope: !13)
diff --git a/test/Transforms/GVN/no_speculative_loads_with_asan.ll b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..2e790db1b2ad
--- /dev/null
+++ b/test/Transforms/GVN/no_speculative_loads_with_asan.ll
@@ -0,0 +1,57 @@
+; RUN: opt -O3 -S %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+declare noalias i8* @_Znam(i64) #1
+
+define i32 @TestNoAsan() {
+  %1 = tail call noalias i8* @_Znam(i64 2)
+  %2 = getelementptr inbounds i8, i8* %1, i64 1
+  store i8 0, i8* %2, align 1
+  store i8 0, i8* %1, align 1
+  %3 = bitcast i8* %1 to i16*
+  %4 = load i16, i16* %3, align 4
+  %5 = icmp eq i16 %4, 0
+  br i1 %5, label %11, label %6
+
+; <label>:6                                       ; preds = %0
+  %7 = getelementptr inbounds i8, i8* %1, i64 2
+  %8 = bitcast i8* %7 to i16*
+  %9 = load i16, i16* %8, align 2
+  %10 = sext i16 %9 to i32
+  br label %11
+
+; <label>:11                                      ; preds = %0, %6
+  %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+  ret i32 %12
+}
+
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = ashr i32 %[[LOAD]]
+; CHECK-NOT: {{.*}} = phi
+
+define i32 @TestAsan() sanitize_address {
+  %1 = tail call noalias i8* @_Znam(i64 2)
+  %2 = getelementptr inbounds i8, i8* %1, i64 1
+  store i8 0, i8* %2, align 1
+  store i8 0, i8* %1, align 1
+  %3 = bitcast i8* %1 to i16*
+  %4 = load i16, i16* %3, align 4
+  %5 = icmp eq i16 %4, 0
+  br i1 %5, label %11, label %6
+
+; <label>:6                                       ; preds = %0
+  %7 = getelementptr inbounds i8, i8* %1, i64 2
+  %8 = bitcast i8* %7 to i16*
+  %9 = load i16, i16* %8, align 2
+  %10 = sext i16 %9 to i32
+  br label %11
+
+; <label>:11                                      ; preds = %0, %6
+  %12 = phi i32 [ %10, %6 ], [ 0, %0 ]
+  ret i32 %12
+}
+
+; CHECK-LABEL: @TestAsan
+; CHECK-NOT: %[[LOAD:[^ ]+]] = load i32
+; CHECK: {{.*}} = phi
+
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/phi-translate.ll
index 9e37b882f222..67036ab9746c 100644
--- a/test/Transforms/GVN/phi-translate.ll
+++ b/test/Transforms/GVN/phi-translate.ll
@@ -18,7 +18,7 @@ target datalayout = "e-p:64:64:64"
 ; CHECK-DAG: [[N_LOC]] = !DILocation(line: 47, column: 1, scope: !{{.*}})
 
 @G = external global [100 x i32]
-define i32 @foo(i32 %x, i32 %z) {
+define i32 @foo(i32 %x, i32 %z) !dbg !6 {
 entry:
   %tobool = icmp eq i32 %z, 0, !dbg !7
   br i1 %tobool, label %end, label %then, !dbg !7
@@ -44,7 +44,7 @@ end:
 !3 = !{}
 !4 = !DISubroutineType(types: !3)
 !5 = !DIFile(filename: "a.cc", directory: "/tmp")
-!6 = !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32, i32)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
 !7 = !DILocation(line: 43, column: 1, scope: !6)
 !8 = !DILocation(line: 44, column: 1, scope: !6)
 !9 = !DILocation(line: 45, column: 1, scope: !6)
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
index eafe418dbdc6..ec1b1717f067 100644
--- a/test/Transforms/GVN/pr14166.ll
+++ b/test/Transforms/GVN/pr14166.ll
@@ -1,4 +1,4 @@
-; RUN: opt -gvn -S < %s | FileCheck %s
+; RUN: opt -disable-basicaa -gvn -S < %s | FileCheck %s
 target datalayout = "e-p:32:32:32"
 target triple = "i386-pc-linux-gnu"
 define <2 x i32> @test1() {
diff --git a/test/Transforms/GVN/pr24426.ll b/test/Transforms/GVN/pr24426.ll
new file mode 100644
index 000000000000..76b190f8fc22
--- /dev/null
+++ b/test/Transforms/GVN/pr24426.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -memcpyopt -mldst-motion -gvn -S | FileCheck %s
+
+declare void @check(i8)
+
+declare void @write(i8* %res)
+
+define void @test1() {
+  %1 = alloca [10 x i8]
+  %2 = bitcast [10 x i8]* %1 to i8*
+  call void @write(i8* %2)
+  %3 = load i8, i8* %2
+
+; CHECK-NOT: undef
+  call void @check(i8 %3)
+
+  ret void
+}
+
diff --git a/test/Transforms/GVN/pr25440.ll b/test/Transforms/GVN/pr25440.ll
new file mode 100644
index 000000000000..14e2c30f04b2
--- /dev/null
+++ b/test/Transforms/GVN/pr25440.ll
@@ -0,0 +1,108 @@
+;RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n8:16:32-S64"
+target triple = "thumbv7--linux-gnueabi"
+
+%struct.a = type { i16, i16, [1 x %union.a] }
+%union.a = type { i32 }
+
+@length = external global [0 x i32], align 4
+
+; Function Attrs: nounwind
+define fastcc void @foo(%struct.a* nocapture readonly %x) {
+;CHECK-LABEL: foo
+entry:
+  br label %bb0
+
+bb0:                                      ; preds = %land.lhs.true, %entry
+;CHECK: bb0:
+  %x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ]
+  %code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0
+  %0 = load i16, i16* %code1, align 4
+; CHECK: load i32, i32*
+  %conv = zext i16 %0 to i32
+  switch i32 %conv, label %if.end.50 [
+    i32 43, label %cleanup
+    i32 52, label %if.then.5
+  ]
+
+if.then.5:                                        ; preds = %bb0
+  br i1 undef, label %land.lhs.true, label %if.then.26
+
+land.lhs.true:                                    ; preds = %if.then.5
+  br i1 undef, label %cleanup, label %bb0
+
+if.then.26:                                       ; preds = %if.then.5
+  %x.tr.lcssa163 = phi %struct.a* [ %x.tr, %if.then.5 ]
+  br i1 undef, label %cond.end, label %cond.false
+
+cond.false:                                       ; preds = %if.then.26
+; CHECK: cond.false:
+; CHECK-NOT: load
+  %mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1
+  %bf.load = load i16, i16* %mode, align 2
+  %bf.shl = shl i16 %bf.load, 8
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %if.then.26
+  br i1 undef, label %if.then.44, label %cleanup
+
+if.then.44:                                       ; preds = %cond.end
+  unreachable
+
+if.end.50:                                        ; preds = %bb0
+;%CHECK: if.end.50:
+  %conv.lcssa = phi i32 [ %conv, %bb0 ]
+  %arrayidx52 = getelementptr inbounds [0 x i32], [0 x i32]* @length, i32 0, i32 %conv.lcssa
+  %1 = load i32, i32* %arrayidx52, align 4
+  br i1 undef, label %for.body.57, label %cleanup
+
+for.body.57:                                      ; preds = %if.end.50
+  %i.2157 = add nsw i32 %1, -1
+  unreachable
+
+cleanup:                                          ; preds = %if.end.50, %cond.end, %land.lhs.true, %bb0
+  ret void
+}
+
+@yy_c_buf_p = external unnamed_addr global i8*, align 4
+@dfg_text = external global i8*, align 4
+
+define void @dfg_lex() {
+;CHECK-LABEL: dfg_lex
+entry:
+  br label %while.bodythread-pre-split
+
+while.bodythread-pre-split:                       ; preds = %while.end, %while.end, %entry
+  br i1 undef, label %if.then.14, label %if.end.15
+
+if.then.14:                                       ; preds = %while.end, %while.bodythread-pre-split
+  %v1 = load i32, i32* bitcast (i8** @dfg_text to i32*), align 4
+  %sub.ptr.sub = sub i32 undef, %v1
+  br label %if.end.15
+
+if.end.15:                                        ; preds = %if.then.14, %while.bodythread-pre-split
+  %v2 = load i8*, i8** @yy_c_buf_p, align 4
+  br label %while.cond.16
+
+while.cond.16:                                    ; preds = %while.cond.16, %if.end.15
+  br i1 undef, label %while.cond.16, label %while.end
+
+while.end:                                        ; preds = %while.cond.16
+  %add.ptr = getelementptr inbounds i8, i8* %v2, i32 undef
+  store i8* %add.ptr, i8** @dfg_text, align 4
+  %sub.ptr.rhs.cast25 = ptrtoint i8* %add.ptr to i32
+  %sub.ptr.sub26 = sub i32 0, %sub.ptr.rhs.cast25
+  switch i32 undef, label %sw.default [
+    i32 65, label %while.bodythread-pre-split
+    i32 3, label %return
+    i32 57, label %while.bodythread-pre-split
+    i32 60, label %if.then.14
+  ]
+
+sw.default:                                       ; preds = %while.end
+  unreachable
+
+return:                                           ; preds = %while.end
+  ret void
+}
diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/pre-gep-load.ll
index 291af359a7a1..a46dc22ade89 100644
--- a/test/Transforms/GVN/pre-gep-load.ll
+++ b/test/Transforms/GVN/pre-gep-load.ll
@@ -47,3 +47,34 @@ return:                                           ; preds = %sw.default, %sw.bb2
   %retval.0 = phi double [ 0.000000e+00, %sw.default ], [ %sub6, %sw.bb2 ], [ %sub, %if.then ]
   ret double %retval.0
 }
+
+; The load causes the GEP's operands to be PREd earlier than normal. The
+; resulting sext ends up in pre.dest and in the GVN system before that BB is
+; actually processed. Make sure we can deal with the situation.
+
+define void @test_shortcut_safe(i1 %tst, i32 %p1, i32* %a) {
+; CHECK-LABEL: define void @test_shortcut_safe
+; CHECK: [[SEXT1:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI1:%.*]] = phi i64 [ [[SEXT1]], {{%.*}} ], [ [[PHI2:%.*]], {{%.*}} ]
+; CHECK: [[SEXT2:%.*]] = sext i32 %p1 to i64
+; CHECK: [[PHI2]] = phi i64 [ [[SEXT2]], {{.*}} ], [ [[PHI1]], {{%.*}} ]
+; CHECK: getelementptr inbounds i32, i32* %a, i64 [[PHI2]]
+
+  br i1 %tst, label %sext1, label %pre.dest
+
+pre.dest:
+  br label %sext.use
+
+sext1:
+  %idxprom = sext i32 %p1 to i64
+  br label %sext.use
+
+sext.use:
+  %idxprom2 = sext i32 %p1 to i64
+  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom2
+  %val = load i32, i32* %arrayidx3, align 4
+  tail call void (i32) @g(i32 %val)
+  br label %pre.dest
+}
+
+declare void @g(i32)
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/pre-load.ll
index 24221d540f22..685df24f62b6 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/pre-load.ll
@@ -389,3 +389,44 @@ block5:
 ; CHECK: block4:
 ; CHECK-NEXT: phi i32
 }
+
+declare void @f()
+declare void @g(i32)
+declare i32 @__CxxFrameHandler3(...)
+
+; Test that loads aren't PRE'd into EH pads.
+define void @test12(i32* %p) personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test12(
+block1:
+  invoke void @f()
+          to label %block2 unwind label %catch.dispatch
+
+block2:
+  invoke void @f()
+          to label %block3 unwind label %cleanup
+
+block3:
+  ret void
+
+catch.dispatch:
+  %cs1 = catchswitch within none [label %catch] unwind label %cleanup2
+
+catch:
+  %c = catchpad within %cs1 []
+  catchret from %c to label %block2
+
+cleanup:
+  %c1 = cleanuppad within none []
+  store i32 0, i32* %p
+  cleanupret from %c1 unwind label %cleanup2
+
+; CHECK: cleanup2:
+; CHECK-NOT: phi
+; CHECK-NEXT: %c2 = cleanuppad within none []
+; CHECK-NEXT: %NOTPRE = load i32, i32* %p
+cleanup2:
+  %c2 = cleanuppad within none []
+  %NOTPRE = load i32, i32* %p
+  call void @g(i32 %NOTPRE)
+  cleanupret from %c2 unwind to caller
+}
diff --git a/test/Transforms/GVN/range.ll b/test/Transforms/GVN/range.ll
index 297c6aac88dd..39acc0c35157 100644
--- a/test/Transforms/GVN/range.ll
+++ b/test/Transforms/GVN/range.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
 
 define i32 @test1(i32* %p) {
-; CHECK: @test1(i32* %p)
+; CHECK-LABEL: @test1(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range !0
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !0
@@ -11,7 +11,7 @@ define i32 @test1(i32* %p) {
 }
 
 define i32 @test2(i32* %p) {
-; CHECK: @test2(i32* %p)
+; CHECK-LABEL: @test2(i32* %p)
 ; CHECK: %a = load i32, i32* %p
 ; CHECK-NOT: range
 ; CHECK: %c = add i32 %a, %a
@@ -22,7 +22,7 @@ define i32 @test2(i32* %p) {
 }
 
 define i32 @test3(i32* %p) {
-; CHECK: @test3(i32* %p)
+; CHECK-LABEL: @test3(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range ![[DISJOINT_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !0
@@ -32,7 +32,7 @@ define i32 @test3(i32* %p) {
 }
 
 define i32 @test4(i32* %p) {
-; CHECK: @test4(i32* %p)
+; CHECK-LABEL: @test4(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range ![[MERGED_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !0
@@ -42,7 +42,7 @@ define i32 @test4(i32* %p) {
 }
 
 define i32 @test5(i32* %p) {
-; CHECK: @test5(i32* %p)
+; CHECK-LABEL: @test5(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range ![[MERGED_SIGNED_RANGE:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !3
@@ -52,7 +52,7 @@ define i32 @test5(i32* %p) {
 }
 
 define i32 @test6(i32* %p) {
-; CHECK: @test6(i32* %p)
+; CHECK-LABEL: @test6(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST6:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !5
@@ -62,7 +62,7 @@ define i32 @test6(i32* %p) {
 }
 
 define i32 @test7(i32* %p) {
-; CHECK: @test7(i32* %p)
+; CHECK-LABEL: @test7(i32* %p)
 ; CHECK: %a = load i32, i32* %p, !range ![[MERGED_TEST7:[0-9]+]]
 ; CHECK: %c = add i32 %a, %a
   %a = load i32, i32* %p, !range !7
@@ -72,7 +72,7 @@ define i32 @test7(i32* %p) {
 }
 
 define i32 @test8(i32* %p) {
-; CHECK: @test8(i32* %p)
+; CHECK-LABEL: @test8(i32* %p)
 ; CHECK: %a = load i32, i32* %p
 ; CHECK-NOT: range
 ; CHECK: %c = add i32 %a, %a
@@ -84,17 +84,17 @@ define i32 @test8(i32* %p) {
 
 ; CHECK: ![[DISJOINT_RANGE]] = !{i32 0, i32 2, i32 3, i32 5}
 ; CHECK: ![[MERGED_RANGE]] = !{i32 0, i32 5}
-; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -3, i32 -2, i32 1, i32 2}
+; CHECK: ![[MERGED_SIGNED_RANGE]] = !{i32 -5, i32 -2, i32 1, i32 5}
 ; CHECK: ![[MERGED_TEST6]] = !{i32 10, i32 1}
 ; CHECK: ![[MERGED_TEST7]] = !{i32 3, i32 4, i32 5, i32 2}
 
 !0 = !{i32 0, i32 2}
 !1 = !{i32 3, i32 5}
 !2 = !{i32 2, i32 5}
-!3 = !{i32 -3, i32 -2}
-!4 = !{i32 1, i32 2}
+!3 = !{i32 -5, i32 -2}
+!4 = !{i32 1, i32 5}
 !5 = !{i32 10, i32 1}
-!6 = !{i32 12, i32 13}
+!6 = !{i32 12, i32 16}
 !7 = !{i32 1, i32 2, i32 3, i32 4}
 !8 = !{i32 5, i32 1}
 !9 = !{i32 1, i32 5}
diff --git a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
index 584f0bf467fa..c62ec10df790 100644
--- a/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
+++ b/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -5,14 +5,14 @@
 @A = global i32 0
 ; CHECK: @A = global i32 0
 
-@D = internal alias i32* @A
+@D = internal alias i32, i32* @A
 ; DEAD-NOT: @D
 
-@L1 = alias i32* @A
-; CHECK: @L1 = alias i32* @A
+@L1 = alias i32, i32* @A
+; CHECK: @L1 = alias i32, i32* @A
 
-@L2 = internal alias i32* @L1
-; CHECK: @L2 = internal alias i32* @L1
+@L2 = internal alias i32, i32* @L1
+; CHECK: @L2 = internal alias i32, i32* @L1
 
-@L3 = alias i32* @L2
-; CHECK: @L3 = alias i32* @L2
+@L3 = alias i32, i32* @L2
+; CHECK: @L3 = alias i32, i32* @L2
diff --git a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
index 5fb4444c6ba8..17474888d79b 100644
--- a/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
+++ b/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -1,4 +1,4 @@
 ; RUN: opt < %s -globaldce
 
-@A = internal alias void ()* @F
+@A = internal alias void (), void ()* @F
 define internal void @F() { ret void }
diff --git a/test/Transforms/GlobalDCE/pr20981.ll b/test/Transforms/GlobalDCE/pr20981.ll
index 0eaa6b899091..c3e06699da77 100644
--- a/test/Transforms/GlobalDCE/pr20981.ll
+++ b/test/Transforms/GlobalDCE/pr20981.ll
@@ -3,8 +3,8 @@
 $c1 = comdat any
 ; CHECK: $c1 = comdat any
 
-@a1 = linkonce_odr alias void ()* @f1
-; CHECK: @a1 = linkonce_odr alias void ()* @f1
+@a1 = linkonce_odr alias void (), void ()* @f1
+; CHECK: @a1 = linkonce_odr alias void (), void ()* @f1
 
 define linkonce_odr void @f1() comdat($c1) {
   ret void
diff --git a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
index a3e90045d64b..e6337adefa13 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -2,7 +2,7 @@
 
 @g = global i32 0
 
-@a = alias bitcast (i32* @g to i8*)
+@a = alias i8, bitcast (i32* @g to i8*)
 
 define void @f() {
 	%tmp = load i8, i8* @a
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index 6933d4a8d96c..42c243d9d7c0 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -6,14 +6,14 @@ define internal void @f() {
 	ret void
 }
 
-@a = alias void ()* @f
+@a = alias void (), void ()* @f
 
 define void @g() {
 	call void() @a()
 	ret void
 }
 
-@b = internal alias  void ()* @g
+@b = internal alias  void (),  void ()* @g
 ; CHECK-NOT: @b
 
 define void @h() {
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index a8d618ae522d..f74f2081dc20 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -56,10 +56,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.gv = !{!0}
 
 !0 = !DIGlobalVariable(name: "Stop", line: 2, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !2, variable: i32* @Stop)
-!1 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!1 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
 !2 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!3 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 4, arg: 0, scope: !4, file: !1, type: !2)
-!4 = !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
+!3 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !4, file: !1, type: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!2, !2}
 !7 = !DILocation(line: 5, scope: !8)
@@ -70,7 +70,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !12 = !DILocation(line: 11, scope: !8)
 !13 = !DILocation(line: 14, scope: !14)
 !14 = distinct !DILexicalBlock(line: 0, column: 0, file: !20, scope: !15)
-!15 = !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
 !16 = !DISubroutineType(types: !17)
 !17 = !{!2}
 !18 = !DILocation(line: 15, scope: !14)
diff --git a/test/Transforms/GlobalOpt/alias-resolve.ll b/test/Transforms/GlobalOpt/alias-resolve.ll
index 090d78455226..46b90ec29b9d 100644
--- a/test/Transforms/GlobalOpt/alias-resolve.ll
+++ b/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -1,20 +1,20 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
-@foo1 = alias void ()* @foo2
-; CHECK: @foo1 = alias void ()* @bar2
+@foo1 = alias void (), void ()* @foo2
+; CHECK: @foo1 = alias void (), void ()* @bar2
 
-@foo2 = alias void()* @bar1
-; CHECK: @foo2 = alias void ()* @bar2
+@foo2 = alias void(), void()* @bar1
+; CHECK: @foo2 = alias void (), void ()* @bar2
 
-@bar1  = alias void ()* @bar2
-; CHECK: @bar1 = alias void ()* @bar2
+@bar1  = alias void (), void ()* @bar2
+; CHECK: @bar1 = alias void (), void ()* @bar2
 
-@weak1 = weak alias void ()* @bar2
-; CHECK: @weak1 = weak alias void ()* @bar2
+@weak1 = weak alias void (), void ()* @bar2
+; CHECK: @weak1 = weak alias void (), void ()* @bar2
 
 @bar4 = private unnamed_addr constant [2 x i8*] zeroinitializer
-@foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
-; CHECK: @foo4 = linkonce_odr unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+@foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
+; CHECK: @foo4 = weak_odr unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* @bar4, i32 0, i32 1)
 
 define void @bar2() {
   ret void
@@ -37,7 +37,7 @@ entry:
          ret void
 }
 
-@foo3 = alias void ()* @bar3
+@foo3 = alias void (), void ()* @bar3
 ; CHECK-NOT: bar3
 
 define internal void @bar3() {
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 62e74ba2ab48..367f375ec900 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -7,7 +7,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @i = internal addrspace(1) global i8 42
 
 ; CHECK: @ia = internal addrspace(1) global i8 42
-@ia = internal alias i8 addrspace(1)* @i
+@ia = internal alias i8, i8 addrspace(1)* @i
 
 @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
 ; CHECK-DAG: @llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ca to i8*)], section "llvm.metadata"
@@ -18,8 +18,8 @@ target datalayout = "p:32:32:32-p1:16:16:16"
 @sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
 ; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
 
-@ca = internal alias i8 addrspace(1)* @c
-; CHECK: @ca = internal alias i8 addrspace(1)* @c
+@ca = internal alias i8, i8 addrspace(1)* @c
+; CHECK: @ca = internal alias i8, i8 addrspace(1)* @c
 
 define i8 addrspace(1)* @h() {
   ret i8 addrspace(1)* @ca
diff --git a/test/Transforms/GlobalOpt/alias-used-section.ll b/test/Transforms/GlobalOpt/alias-used-section.ll
index 4dab2f5a02f5..a3657dfd16bc 100644
--- a/test/Transforms/GlobalOpt/alias-used-section.ll
+++ b/test/Transforms/GlobalOpt/alias-used-section.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -globalopt < %s | FileCheck %s
 
 @_Z17in_custom_section = internal global i8 42, section "CUSTOM"
-@in_custom_section = internal dllexport alias i8* @_Z17in_custom_section
+@in_custom_section = internal dllexport alias i8, i8* @_Z17in_custom_section
 
 ; CHECK: @in_custom_section = internal dllexport global i8 42, section "CUSTOM"
 
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 21f06b7be5ff..9ced3974ee87 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -4,10 +4,10 @@
 
 @i = internal global i8 42
 ; CHECK: @ia = internal global i8 42
-@ia = internal alias i8* @i
+@ia = internal alias i8, i8* @i
 
 @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
-; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* @ca, i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*)], section "llvm.metadata"
 
 @llvm.compiler.used = appending global [4 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* bitcast (void ()* @fa to i8*), i8* @ia, i8* @i], section "llvm.metadata"
 ; CHECK-DAG: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* @ia], section "llvm.metadata"
@@ -18,17 +18,17 @@
 @other = global i32* bitcast (void ()* @fa to i32*)
 ; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
 
-@fa = internal alias void ()* @f
-; CHECK: @fa = internal alias void ()* @f
+@fa = internal alias void (), void ()* @f
+; CHECK: @fa = internal alias void (), void ()* @f
 
-@fa2 = internal alias void ()* @f
+@fa2 = internal alias void (), void ()* @f
 ; CHECK-NOT: @fa2
 
-@fa3 = internal alias void ()* @f
+@fa3 = internal alias void (), void ()* @f
 ; CHECK: @fa3
 
-@ca = internal alias i8* @c
-; CHECK: @ca = internal alias i8* @c
+@ca = internal alias i8, i8* @c
+; CHECK: @ca = internal alias i8, i8* @c
 
 define void @f() {
   ret void
diff --git a/test/Transforms/GlobalOpt/assume.ll b/test/Transforms/GlobalOpt/assume.ll
new file mode 100644
index 000000000000..3f3157a38fbb
--- /dev/null
+++ b/test/Transforms/GlobalOpt/assume.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; CHECK: @tmp = global i32 42
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+@tmp = global i32 0
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+  ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+  %tmp1 = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+  store i32 %tmp1, i32* @tmp
+  %cmp = icmp eq i32 %tmp1, 42
+  call void @llvm.assume(i1 %cmp)
+  ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/GlobalOpt/available_externally_global_ctors.ll b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
new file mode 100644
index 000000000000..7092a5ae2226
--- /dev/null
+++ b/test/Transforms/GlobalOpt/available_externally_global_ctors.ll
@@ -0,0 +1,22 @@
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; Verify that the initialization of the available_externally global is not eliminated
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo_static_init, i8* null }]
+@foo_external = available_externally global void ()* null
+
+define internal void @foo_static_init() {
+entry:
+  store void ()* @foo_impl, void ()** @foo_external
+  ret void
+}
+
+define internal void @foo_impl() {
+entry:
+  ret void
+}
+
diff --git a/test/Transforms/GlobalOpt/deadglobal.ll b/test/Transforms/GlobalOpt/deadglobal.ll
index 9563a23b2c29..f5eed44cbb6e 100644
--- a/test/Transforms/GlobalOpt/deadglobal.ll
+++ b/test/Transforms/GlobalOpt/deadglobal.ll
@@ -1,11 +1,14 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
 @G1 = internal global i32 123            ; <i32*> [#uses=1]
+@A1 = internal alias i32, i32* @G1
 
 ; CHECK-NOT: @G1
 ; CHECK: @G2
 ; CHECK-NOT: @G3
 
+; CHECK-NOT: @A1
+
 define void @foo1() {
 ; CHECK: define void @foo
 ; CHECK-NEXT: ret
diff --git a/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
new file mode 100644
index 000000000000..b446d24f1fd2
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized-aggregate.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, so if we split it into scalars we
+; should keep that flag set on all of the new globals. This will prevent the
+; store to @a[0] from being constant propagated to the load in @foo, but will not
+; prevent @a[1] from being removed since it is dead.
+; CHECK: @a.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @a.1
+@a = internal externally_initialized global [2 x i32] undef, align 4
+; This is the same, but a struct rather than an array.
+; CHECK: @b.0 = internal unnamed_addr externally_initialized global i32 undef
+; CHECK-NOT @b.1
+@b = internal externally_initialized global {i32, i32} undef, align 4
+
+define i32 @foo() {
+; CHECK-LABEL: define i32 @foo
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @a.0
+  %0 = load i32, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+  ret i32 %0
+}
+
+define i32 @bar() {
+; CHECK-LABEL: define i32 @bar
+entry:
+; This load uses the split global, but cannot be constant-propagated away.
+; CHECK: %0 = load i32, i32* @b.0
+  %0 = load i32, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+  ret i32 %0
+}
+
+define void @init() {
+; CHECK-LABEL: define void @init
+entry:
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 1, i32* @a.0
+  store i32 1, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @a is never read.
+; CHECK-NOT: store i32 2, i32* @a.1
+  store i32 2, i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i32 0, i32 1), align 4
+
+; This store uses the split global, but cannot be constant-propagated away.
+; CHECK: store i32 3, i32* @b.0
+  store i32 3, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 0), align 4
+; This store can be removed, because the second element of @b is never read.
+; CHECK-NOT: store i32 4, i32* @b.1
+  store i32 4, i32* getelementptr inbounds ({i32, i32}, {i32, i32}* @b, i32 0, i32 1), align 4
+  ret void
+}
diff --git a/test/Transforms/GlobalOpt/externally-initialized.ll b/test/Transforms/GlobalOpt/externally-initialized.ll
new file mode 100644
index 000000000000..c01ba10f49c9
--- /dev/null
+++ b/test/Transforms/GlobalOpt/externally-initialized.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+; This global is externally_initialized, which may modify the value between
+; it's static initializer and any code in this module being run, so the only
+; write to it cannot be merged into the static initialiser.
+; CHECK: @a = internal unnamed_addr externally_initialized global i32 undef
+@a = internal externally_initialized global i32 undef
+
+; This global is stored to by the external initialization, so cannot be
+; constant-propagated and removed, despite the fact that there are no writes
+; to it.
+; CHECK: @b = internal unnamed_addr externally_initialized global i32 undef
+@b = internal externally_initialized global i32 undef
+
+
+define void @foo() {
+; CHECK-LABEL: foo
+entry:
+; CHECK: store i32 42, i32* @a
+  store i32 42, i32* @a
+  ret void
+}
+define i32 @bar() {
+; CHECK-LABEL: bar
+entry:
+; CHECK: %val = load i32, i32* @a
+  %val = load i32, i32* @a
+  ret i32 %val
+}
+
+define i32 @baz() {
+; CHECK-LABEL: baz
+entry:
+; CHECK: %val = load i32, i32* @b
+  %val = load i32, i32* @b
+  ret i32 %val
+}
diff --git a/test/Transforms/GlobalOpt/global-demotion.ll b/test/Transforms/GlobalOpt/global-demotion.ll
new file mode 100644
index 000000000000..7965cb896208
--- /dev/null
+++ b/test/Transforms/GlobalOpt/global-demotion.ll
@@ -0,0 +1,80 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+@G1 = internal global i32 5
+@G2 = internal global i32 5
+@G3 = internal global i32 5
+@G4 = internal global i32 5
+@G5 = internal global i32 5
+
+; CHECK-LABEL: @test1
+define internal i32 @test1() norecurse {
+; CHECK-NOT: @G1
+  store i32 4, i32* @G1
+  %a = load i32, i32* @G1
+; CHECK: ret
+  ret i32 %a
+}
+
+; The load comes before the store which makes @G2 live before the call.
+; CHECK-LABEL: @test2
+define internal i32 @test2() norecurse {
+; CHECK-NOT: %G2
+  %a = load i32, i32* @G2
+  store i32 4, i32* @G2
+; CHECK: ret
+  ret i32 %a
+}
+
+; This global is indexed by a GEP - this makes it partial alias and we bail out.
+; FIXME: We don't actually have to bail out in this case.
+
+; CHECK-LABEL: @test3
+define internal i32 @test3() norecurse {
+; CHECK-NOT: %G3
+  %x = getelementptr i32,i32* @G3, i32 0
+  %a = load i32, i32* %x
+  store i32 4, i32* @G3
+; CHECK: ret
+  ret i32 %a
+}
+
+; The global is casted away to a larger type then loaded. The store only partially
+; covers the load, so we must not demote.
+
+; CHECK-LABEL: @test4
+define internal i32 @test4() norecurse {
+; CHECK-NOT: %G4
+  store i32 4, i32* @G4
+  %x = bitcast i32* @G4 to i64*
+  %a = load i64, i64* %x
+  %b = trunc i64 %a to i32
+; CHECK: ret
+  ret i32 %b
+}
+
+; The global is casted away to a smaller type then loaded. This one is fine.
+
+; CHECK-LABEL: @test5
+define internal i32 @test5() norecurse {
+; CHECK-NOT: @G5
+  store i32 4, i32* @G5
+  %x = bitcast i32* @G5 to i16*
+  %a = load i16, i16* %x
+  %b = zext i16 %a to i32
+; CHECK: ret
+  ret i32 %b
+}
+
+define i32 @main() norecurse {
+  %a = call i32 @test1()
+  %b = call i32 @test2()
+  %c = call i32 @test3()
+  %d = call i32 @test4()
+  %e = call i32 @test5()
+
+  %x = or i32 %a, %b
+  %y = or i32 %x, %c
+  %z = or i32 %y, %d
+  %w = or i32 %z, %e
+  ret i32 %w
+}
diff --git a/test/Transforms/GlobalOpt/invariant.group.barrier.ll b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
new file mode 100644
index 000000000000..54d91d408019
--- /dev/null
+++ b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; This test is hint, what could globalOpt optimize and what it can't
+; FIXME: @tmp and @tmp2 can be safely set to 42
+; CHECK: @tmp = global i32 0
+; CHECK: @tmp2 = global i32 0
+; CHECK: @tmp3 = global i32 0
+
+@tmp = global i32 0
+@tmp2 = global i32 0
+@tmp3 = global i32 0
+@ptrToTmp3 = global i32* null
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define i32 @TheAnswerToLifeTheUniverseAndEverything() {
+  ret i32 42
+}
+
+define void @_GLOBAL__I_a() {
+enter:
+  call void @_optimizable()
+  call void @_not_optimizable()
+  ret void
+}
+
+define void @_optimizable() {
+enter:
+  %valptr = alloca i32
+  
+  %val = call i32 @TheAnswerToLifeTheUniverseAndEverything()
+  store i32 %val, i32* @tmp
+  store i32 %val, i32* %valptr
+  
+  %0 = bitcast i32* %valptr to i8*
+  %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+  %1 = bitcast i8* %barr to i32*
+  
+  %val2 = load i32, i32* %1
+  store i32 %val2, i32* @tmp2
+  ret void
+}
+
+; We can't step through invariant.group.barrier here, because that would change
+; this load in @usage_of_globals()
+; val = load i32, i32* %ptrVal, !invariant.group !0 
+; into 
+; %val = load i32, i32* @tmp3, !invariant.group !0
+; and then we could assume that %val and %val2 to be the same, which coud be 
+; false, because @changeTmp3ValAndCallBarrierInside() may change the value
+; of @tmp3.
+define void @_not_optimizable() {
+enter:
+  store i32 13, i32* @tmp3, !invariant.group !0
+  
+  %0 = bitcast i32* @tmp3 to i8*
+  %barr = call i8* @llvm.invariant.group.barrier(i8* %0)
+  %1 = bitcast i8* %barr to i32*
+  
+  store i32* %1, i32** @ptrToTmp3
+  store i32 42, i32* %1, !invariant.group !0
+  
+  ret void
+}
+define void @usage_of_globals() {
+entry:
+  %ptrVal = load i32*, i32** @ptrToTmp3
+  %val = load i32, i32* %ptrVal, !invariant.group !0
+  
+  call void @changeTmp3ValAndCallBarrierInside()
+  %val2 = load i32, i32* @tmp3, !invariant.group !0
+  ret void;
+}
+
+declare void @changeTmp3ValAndCallBarrierInside()
+
+declare i8* @llvm.invariant.group.barrier(i8*)
+
+!0 = !{!"something"}
diff --git a/test/Transforms/GlobalOpt/localize-constexpr.ll b/test/Transforms/GlobalOpt/localize-constexpr.ll
new file mode 100644
index 000000000000..3fa7db89b04b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/localize-constexpr.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S < %s -globalopt | FileCheck %s
+
+@G = internal global i32 42
+
+define i8 @f() norecurse {
+; CHECK-LABEL: @f
+; CHECK: alloca
+; CHECK-NOT: @G
+; CHECK: }
+  store i32 42, i32* @G
+  %a = load i8, i8* bitcast (i32* @G to i8*)
+  ret i8 %a
+}
+
+@H = internal global i32 42
+@Halias = alias i32, i32* @H
+
+; @H can't be localized because @Halias uses it, and @Halias can't be converted to an instruction.
+define i8 @g() norecurse {
+; CHECK-LABEL: @g
+; CHECK-NOT: alloca
+; CHECK: @H
+; CHECK: }
+  store i32 42, i32* @H
+  %a = load i8, i8* bitcast (i32* @H to i8*)
+  ret i8 %a
+}
+
diff --git a/test/Transforms/GlobalOpt/metadata.ll b/test/Transforms/GlobalOpt/metadata.ll
index fb60b66ab58e..152d58e6e320 100644
--- a/test/Transforms/GlobalOpt/metadata.ll
+++ b/test/Transforms/GlobalOpt/metadata.ll
@@ -5,7 +5,7 @@
 ; to that containing %G should likewise drop to null.
 @G = internal global i8** null
 
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) norecurse {
 ; CHECK-LABEL: @main(
 ; CHECK: %G = alloca
   store i8** %argv, i8*** @G
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
index f3cb4a65704d..d010b96188f1 100644
--- a/test/Transforms/GlobalOpt/tls.ll
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
+; RUN: opt -emulated-tls < %s -globalopt -S | FileCheck %s
 
 declare void @wait()
 declare void @signal()
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index 85ed829c7112..de436c62a347 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -12,7 +12,13 @@
 ; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
 ; CHECK: @e = linkonce_odr global i32 0
 
+; CHECK: define internal fastcc void @used_internal() unnamed_addr {
+define internal void @used_internal() {
+  ret void
+}
+
 define i32 @get_e() {
+       call void @used_internal()
        %t = load i32, i32* @e
        ret i32 %t
 }
diff --git a/test/Transforms/IndVarSimplify/bec-cmp.ll b/test/Transforms/IndVarSimplify/bec-cmp.ll
new file mode 100644
index 000000000000..06a7d5ebe4dc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/bec-cmp.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b, i32 signext %n) #0 {
+entry:
+
+; CHECK-LABEL: @foo
+
+  %cmp.10 = icmp sgt i32 %n, 0
+  br i1 %cmp.10, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:              ; preds = %for.inc
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.inc
+  %i.011 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+  %cmp1 = icmp sgt i32 %i.011, %n
+  br i1 %cmp1, label %if.then, label %for.inc
+
+; CHECK-NOT: br i1 %cmp1, label %if.then, label %for.inc
+; CHECK: br i1 false, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %idxprom = sext i32 %i.011 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, 1
+  %arrayidx3 = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  store i32 %add, i32* %arrayidx3, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %inc = add nsw i32 %i.011, 1
+  %cmp = icmp slt i32 %inc, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/IndVarSimplify/const_phi.ll b/test/Transforms/IndVarSimplify/const_phi.ll
new file mode 100644
index 000000000000..33dc5514d3cc
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/const_phi.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; PR25372
+; We can compute the expression of %phi0 and that is a SCEV
+; constant. However, instcombine can't deduce this, so we can
+; potentially end up trying to handle a constant when replacing
+; congruent IVs.
+
+; CHECK-LABEL: crash
+define void @crash() {
+entry:
+  br i1 false, label %not_taken, label %pre
+
+not_taken:
+  br label %pre
+
+pre:
+; %phi0.pre and %phi1.pre are evaluated by SCEV to constant 0.
+  %phi0.pre = phi i32 [ 0, %entry ], [ 2, %not_taken ]
+  %phi1.pre = phi i32 [ 0, %entry ], [ 1, %not_taken ]
+  br label %loop
+
+loop:
+; %phi0 and %phi1 are evaluated by SCEV to constant 0.
+  %phi0 = phi i32 [ 0, %loop ], [ %phi0.pre, %pre ]
+  %phi1 = phi i32 [ 0, %loop ], [ %phi1.pre, %pre ]
+  br i1 undef, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 4d14b3681c5d..612f01e3cade 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -209,3 +209,351 @@ assert77:                                         ; preds = %noassert68
 unrolledend:                                      ; preds = %forcond38
   ret i32 0
 }
+
+declare void @side_effect()
+
+define void @func_13(i32* %len.ptr) {
+; CHECK-LABEL: @func_13(
+ entry:
+  %len = load i32, i32* %len.ptr, !range !0
+  %len.sub.1 = add i32 %len, -1
+  %len.is.zero = icmp eq i32 %len, 0
+  br i1 %len.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  call void @side_effect()
+  %iv.inc = add i32 %iv, 1
+  %iv.cmp = icmp ult i32 %iv, %len
+  br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp ult i32 %iv, %len.sub.1
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_14(i32* %len.ptr) {
+; CHECK-LABEL: @func_14(
+ entry:
+  %len = load i32, i32* %len.ptr, !range !0
+  %len.sub.1 = add i32 %len, -1
+  %len.is.zero = icmp eq i32 %len, 0
+  %len.is.int_min = icmp eq i32 %len, 2147483648
+  %no.entry = or i1 %len.is.zero, %len.is.int_min
+  br i1 %no.entry, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  call void @side_effect()
+  %iv.inc = add i32 %iv, 1
+  %iv.cmp = icmp slt i32 %iv, %len
+  br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv, %len.sub.1
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_15(i32* %len.ptr) {
+; CHECK-LABEL: @func_15(
+ entry:
+  %len = load i32, i32* %len.ptr, !range !0
+  %len.add.1 = add i32 %len, 1
+  %len.add.1.is.zero = icmp eq i32 %len.add.1, 0
+  br i1 %len.add.1.is.zero, label %leave, label %loop
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  call void @side_effect()
+  %iv.inc = add i32 %iv, 1
+  %iv.cmp = icmp ult i32 %iv, %len.add.1
+  br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp ult i32 %iv, %len
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_16(i32* %len.ptr) {
+; CHECK-LABEL: @func_16(
+ entry:
+  %len = load i32, i32* %len.ptr, !range !0
+  %len.add.5 = add i32 %len, 5
+  %entry.cond.0 = icmp slt i32 %len, 2147483643
+  %entry.cond.1 = icmp slt i32 4, %len.add.5
+  %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  call void @side_effect()
+  %iv.inc = add i32 %iv, 1
+  %iv.add.4 = add i32 %iv, 4
+  %iv.cmp = icmp slt i32 %iv.add.4, %len.add.5
+  br i1 %iv.cmp, label %be, label %leave
+; CHECK: br i1 true, label %be, label %leave
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv, %len
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_17(i32* %len.ptr) {
+; CHECK-LABEL: @func_17(
+ entry:
+  %len = load i32, i32* %len.ptr
+  %len.add.5 = add i32 %len, -5
+  %entry.cond.0 = icmp slt i32 %len, 2147483653 ;; 2147483653 == INT_MIN - (-5)
+  %entry.cond.1 = icmp slt i32 -6, %len.add.5
+  %entry.cond = and i1 %entry.cond.0, %entry.cond.1
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.inc, %be ]
+  %iv = phi i32 [ -6, %entry ], [ %iv.inc, %be ]
+  call void @side_effect()
+  %iv.inc = add i32 %iv, 1
+  %iv.2.inc = add i32 %iv.2, 1
+  %iv.cmp = icmp slt i32 %iv, %len.add.5
+
+; Deduces {-5,+,1} s< (-5 + %len) from {0,+,1} < %len
+; since %len s< INT_MIN - (-5) from the entry condition
+
+; CHECK: br i1 true, label %be, label %leave
+  br i1 %iv.cmp, label %be, label %leave
+
+ be:
+; CHECK: be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv.2, %len
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define i1 @func_18(i16* %tmp20, i32* %len.addr) {
+; CHECK-LABEL: @func_18(
+entry:
+  %len = load i32, i32* %len.addr, !range !0
+  %tmp18 = icmp eq i32 %len, 0
+  br i1 %tmp18, label %bb2, label %bb0.preheader
+
+bb0.preheader:
+  br label %bb0
+
+bb0:
+; CHECK: bb0:
+  %var_0.in = phi i32 [ %var_0, %bb1 ], [ %len, %bb0.preheader ]
+  %var_1 = phi i32 [ %tmp30, %bb1 ], [ 0, %bb0.preheader ]
+  %var_0 = add nsw i32 %var_0.in, -1
+  %tmp23 = icmp ult i32 %var_1, %len
+; CHECK: br i1 true, label %stay, label %bb2.loopexit
+  br i1 %tmp23, label %stay, label %bb2
+
+stay:
+; CHECK: stay:
+  %tmp25 = getelementptr inbounds i16, i16* %tmp20, i32 %var_1
+  %tmp26 = load i16, i16* %tmp25
+  %tmp29 = icmp eq i16 %tmp26, 0
+  br i1 %tmp29, label %bb1, label %bb2
+
+bb1:
+  %tmp30 = add i32 %var_1, 1
+  %tmp31 = icmp eq i32 %var_0, 0
+  br i1 %tmp31, label %bb3, label %bb0
+
+bb2:
+  ret i1 false
+
+bb3:
+  ret i1 true
+}
+
+define void @func_19(i32* %length.ptr) {
+; CHECK-LABEL: @func_19(
+ entry:
+  %length = load i32, i32* %length.ptr, !range !0
+  %length.is.nonzero = icmp ne i32 %length, 0
+  br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %range.check = icmp ult i32 %iv, %length
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv.inc, %length
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_20(i32* %length.ptr) {
+; Like @func_19, but %length is no longer provably positive, so
+; %range.check cannot be proved to be always true.
+
+; CHECK-LABEL: @func_20(
+ entry:
+  %length = load i32, i32* %length.ptr
+  %length.is.nonzero = icmp ne i32 %length, 0
+  br i1 %length.is.nonzero, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %range.check = icmp ult i32 %iv, %length
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 %range.check, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv.inc, %length
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_21(i32* %length.ptr) {
+; CHECK-LABEL: @func_21(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+  %length = load i32, i32* %length.ptr, !range !0
+  %lim = sub i32 %length, 1
+  %entry.cond = icmp sgt i32 %length, 1
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %range.check = icmp slt i32 %iv, %length
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv.inc, %lim
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_22(i32* %length.ptr) {
+; CHECK-LABEL: @func_22(
+
+; This checks that the backedge condition, (I + 1) < Length - 1 implies
+; (I + 1) < Length
+ entry:
+  %length = load i32, i32* %length.ptr, !range !0
+  %lim = sub i32 %length, 1
+  %entry.cond = icmp sgt i32 %length, 1
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %range.check = icmp sle i32 %iv, %length
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp sle i32 %iv.inc, %lim
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_23(i32* %length.ptr) {
+; CHECK-LABEL: @func_23(
+ entry:
+  %length = load i32, i32* %length.ptr, !range !0
+  %entry.cond = icmp ult i32 4, %length
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ 4, %entry ], [ %iv.inc, %be ]
+  %iv.inc = add i32 %iv, 1
+  %range.check = icmp slt i32 %iv, %length
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp slt i32 %iv.inc, %length
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+define void @func_24(i32* %init.ptr) {
+; CHECK-LABEL: @func_24(
+ entry:
+  %init = load i32, i32* %init.ptr, !range !0
+  %entry.cond = icmp ugt i32 %init, 4
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i32 [ %init, %entry ], [ %iv.dec, %be ]
+  %iv.dec = add i32 %iv, -1
+  %range.check = icmp sgt i32 %iv, 4
+  br i1 %range.check, label %be, label %leave
+; CHECK:   br i1 true, label %be, label %leave.loopexit
+; CHECK: be:
+
+ be:
+  call void @side_effect()
+  %be.cond = icmp sgt i32 %iv.dec, 4
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
+
+!0 = !{i32 0, i32 2147483647}
diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll
index 464b03ce5595..ccf9fa0aa0ac 100644
--- a/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -6,7 +6,7 @@ target datalayout = "n8:16:32:64"
 
 target triple = "x86_64-apple-darwin"
 
-; CHECK-LABEL: @sloop
+; CHECK-LABEL: @loop_0
 ; CHECK-LABEL: B18:
 ; Only one phi now.
 ; CHECK: phi
@@ -16,7 +16,7 @@ target triple = "x86_64-apple-darwin"
 ; One trunc for the dummy() call.
 ; CHECK-LABEL: exit24:
 ; CHECK: trunc i64 {{.*}}lcssa.wide to i32
-define void @sloop(i32* %a) {
+define void @loop_0(i32* %a) {
 Prologue:
   br i1 undef, label %B18, label %B6
 
@@ -41,4 +41,30 @@ exit24:                      ; preds = %B18
   unreachable
 }
 
+define void @loop_1(i32 %lim) {
+; CHECK-LABEL: @loop_1(
+ entry:
+  %entry.cond = icmp ne i32 %lim, 0
+  br i1 %entry.cond, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+; CHECK:  %indvars.iv = phi i64 [ 1, %loop.preheader ], [ %indvars.iv.next, %loop ]
+; CHECK:  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK:  [[IV_INC:%[^ ]+]] = add nsw i64 %indvars.iv, -1
+; CHECK:  call void @dummy.i64(i64 [[IV_INC]])
+
+  %iv = phi i32 [ 1, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+  %iv.inc.sub = add i32 %iv, -1
+  %iv.inc.sub.zext = zext i32 %iv.inc.sub to i64
+  call void @dummy.i64(i64 %iv.inc.sub.zext)
+  %be.cond = icmp ult i32 %iv.inc, %lim
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
+
 declare void @dummy(i32)
+declare void @dummy.i64(i64)
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
new file mode 100644
index 000000000000..eee321da2395
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop-invariant-conditions.ll
@@ -0,0 +1,279 @@
+; RUN: opt -S -indvars %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1(i64 %start) {
+; CHECK-LABEL: @test1
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test2(i64 %start) {
+; CHECK-LABEL: @test2
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp sle i64 %start, -1
+  %cmp1 = icmp sle i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+; As long as the test dominates the backedge, we're good
+define void @test3(i64 %start) {
+; CHECK-LABEL: @test3
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test4(i64 %start) {
+; CHECK-LABEL: @test4
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp sgt i64 %start, -1
+  %cmp1 = icmp sgt i64 %indvars.iv, -1
+  br i1 %cmp1, label %loop, label %for.end
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test5(i64 %start) {
+; CHECK-LABEL: @test5
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nuw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp ugt i64 %start, 100
+  %cmp1 = icmp ugt i64 %indvars.iv, 100
+  br i1 %cmp1, label %loop, label %for.end
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test6(i64 %start) {
+; CHECK-LABEL: @test6
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nuw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp ult i64 %start, 100
+  %cmp1 = icmp ult i64 %indvars.iv, 100
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test7(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test7
+entry:
+  %inc = load i64, i64* %inc_ptr, !range !0
+  %ok = icmp sge i64 %inc, 0
+  br i1 %ok, label %loop, label %for.end
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %start, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+!0 = !{i64 0, i64 100}
+
+; Negative test - we can't show that the internal branch executes, so we can't
+; fold the test to a loop invariant one.
+define void @test1_neg(i64 %start) {
+; CHECK-LABEL: @test1_neg
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %skip
+skip:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %backedge
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+  br label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+; Slightly subtle version of @test4 where the icmp dominates the backedge,
+; but the exit branch doesn't.  
+define void @test2_neg(i64 %start) {
+; CHECK-LABEL: @test2_neg
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp, label %backedge, label %skip
+skip:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+  br i1 %cmp1, label %for.end, label %backedge
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+  br label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+; The branch has to exit the loop if the condition is true
+define void @test3_neg(i64 %start) {
+; CHECK-LABEL: @test3_neg
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %loop, label %for.end
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test4_neg(i64 %start) {
+; CHECK-LABEL: @test4_neg
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %backedge ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, 1
+  %cmp = icmp eq i64 %indvars.iv.next, 25
+  br i1 %cmp, label %backedge, label %for.end
+
+backedge:
+  ; prevent flattening, needed to make sure we're testing what we intend
+  call void @foo() 
+; CHECK: %cmp1 = icmp sgt i64 %indvars.iv, -1
+  %cmp1 = icmp sgt i64 %indvars.iv, -1
+
+; %cmp1 can be made loop invariant only if the branch below goes to
+; %the header when %cmp1 is true.
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test5_neg(i64 %start, i64 %inc) {
+; CHECK-LABEL: @test5_neg
+entry:
+  br label %loop
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+define void @test8(i64 %start, i64* %inc_ptr) {
+; CHECK-LABEL: @test8
+entry:
+  %inc = load i64, i64* %inc_ptr, !range !1
+  %ok = icmp sge i64 %inc, 0
+  br i1 %ok, label %loop, label %for.end
+
+loop:
+  %indvars.iv = phi i64 [ %start, %entry ], [ %indvars.iv.next, %loop ]
+  %indvars.iv.next = add nsw i64 %indvars.iv, %inc
+; CHECK: %cmp1 = icmp slt i64 %indvars.iv, -1
+  %cmp1 = icmp slt i64 %indvars.iv, -1
+  br i1 %cmp1, label %for.end, label %loop
+
+for.end:                                          ; preds = %if.end, %entry
+  ret void
+}
+
+!1 = !{i64 -1, i64 100}
+
+
+declare void @foo()
diff --git a/test/Transforms/IndVarSimplify/pr24356.ll b/test/Transforms/IndVarSimplify/pr24356.ll
new file mode 100644
index 000000000000..eac4204c0e16
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24356.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind ssp uwtable
+define void @fn1() {
+; CHECK-LABEL: @fn1(
+bb:
+  br label %bb4.preheader
+
+bb4.preheader:                                    ; preds = %bb, %bb16
+; CHECK-LABEL:  bb4.preheader:
+  %b.03 = phi i8 [ 0, %bb ], [ %tmp17, %bb16 ]
+; CHECK: %tmp9 = icmp ugt i8 %b.03, 1
+; CHECK-NOT: %tmp9 = icmp ugt i8 0, 1
+
+  %tmp9 = icmp ugt i8 %b.03, 1
+  br i1 %tmp9, label %bb4.preheader.bb18.loopexit.split_crit_edge, label %bb4.preheader.bb4.preheader.split_crit_edge
+
+bb4.preheader.bb4.preheader.split_crit_edge:      ; preds = %bb4.preheader
+  br label %bb4.preheader.split
+
+bb4.preheader.bb18.loopexit.split_crit_edge:      ; preds = %bb4.preheader
+  store i32 0, i32* @a, align 4
+  br label %bb18.loopexit.split
+
+bb4.preheader.split:                              ; preds = %bb4.preheader.bb4.preheader.split_crit_edge
+  br label %bb7
+
+bb4:                                              ; preds = %bb7
+  %tmp6 = icmp slt i32 %storemerge2, 0
+  br i1 %tmp6, label %bb7, label %bb16
+
+bb7:                                              ; preds = %bb4.preheader.split, %bb4
+  %storemerge2 = phi i32 [ 0, %bb4.preheader.split ], [ %tmp14, %bb4 ]
+  %tmp14 = add nsw i32 %storemerge2, 1
+  br i1 false, label %bb18.loopexit, label %bb4
+
+bb16:                                             ; preds = %bb4
+  %tmp14.lcssa5 = phi i32 [ %tmp14, %bb4 ]
+  %tmp17 = add i8 %b.03, -1
+  %tmp2 = icmp eq i8 %tmp17, -2
+  br i1 %tmp2, label %bb18.loopexit1, label %bb4.preheader
+
+bb18.loopexit:                                    ; preds = %bb7
+  br label %bb18.loopexit.split
+
+bb18.loopexit.split:                              ; preds = %bb4.preheader.bb18.loopexit.split_crit_edge, %bb18.loopexit
+  br label %bb18
+
+bb18.loopexit1:                                   ; preds = %bb16
+  %tmp14.lcssa5.lcssa = phi i32 [ %tmp14.lcssa5, %bb16 ]
+  store i32 %tmp14.lcssa5.lcssa, i32* @a, align 4
+  br label %bb18
+
+bb18:                                             ; preds = %bb18.loopexit1, %bb18.loopexit.split
+  ret void
+}
+
+declare void @abort()
diff --git a/test/Transforms/IndVarSimplify/pr24783.ll b/test/Transforms/IndVarSimplify/pr24783.ll
new file mode 100644
index 000000000000..637cb1e196c5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24783.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define void @f(i32* %end.s, i8** %loc, i32 %p) {
+; CHECK-LABEL: @f(
+entry:
+; CHECK:  [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
+; CHECK:  [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
+
+  %end = getelementptr inbounds i32, i32* %end.s, i32 %p
+  %init = bitcast i32* %end.s to i8*
+  br label %while.body.i
+
+while.body.i:
+  %ptr = phi i8* [ %ptr.inc, %while.body.i ], [ %init, %entry ]
+  %ptr.inc = getelementptr inbounds i8, i8* %ptr, i8 1
+  %ptr.inc.cast = bitcast i8* %ptr.inc to i32*
+  %cmp.i = icmp eq i32* %ptr.inc.cast, %end
+  br i1 %cmp.i, label %loop.exit, label %while.body.i
+
+loop.exit:
+; CHECK: loop.exit:
+; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
+; CHECK: store i8* [[END_BCASTED]], i8** %loc
+  %ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
+  store i8* %ptr.inc.lcssa, i8** %loc
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24804.ll b/test/Transforms/IndVarSimplify/pr24804.ll
new file mode 100644
index 000000000000..6f89481853ad
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24804.ll
@@ -0,0 +1,25 @@
+; RUN: opt -indvars -loop-idiom -loop-deletion -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Checking for a crash
+
+define void @f(i32* %a) {
+; CHECK-LABEL: @f(
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %for.cond, %entry
+  %iv = phi i32 [ 0, %entry ], [ %add, %for.inc ], [ %iv, %for.cond ]
+  %add = add nsw i32 %iv, 1
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  br i1 undef, label %for.cond, label %for.inc
+
+for.inc:                                          ; preds = %for.cond
+  br i1 undef, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24952.ll b/test/Transforms/IndVarSimplify/pr24952.ll
new file mode 100644
index 000000000000..c430cae98f58
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24952.ll
@@ -0,0 +1,27 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+declare void @use(i1)
+
+define void @f() {
+; CHECK-LABEL: @f(
+ entry:
+  %x = alloca i32
+  %y = alloca i32
+  br label %loop
+
+ loop:
+  %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i32 %iv, 1
+
+  %x.gep = getelementptr i32, i32* %x, i32 %iv
+  %eql = icmp eq i32* %x.gep, %y
+; CHECK-NOT: @use(i1 true)
+  call void @use(i1 %eql)
+
+  ; %be.cond deliberately 'false' -- we want want the trip count to be 0.
+  %be.cond = icmp ult i32 %iv, 0
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr24956.ll b/test/Transforms/IndVarSimplify/pr24956.ll
new file mode 100644
index 000000000000..58688912cc37
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr24956.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Check that this test does not crash.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define void @re_update_line(i8* %new) {
+; CHECK: @re_update_line(
+entry:
+  %incdec.ptr6 = getelementptr inbounds i8, i8* %new, i64 1
+  br label %for.cond.11.preheader
+
+for.cond.11.preheader:                            ; preds = %for.inc.26, %entry
+  %n.154 = phi i8* [ %new, %entry ], [ %incdec.ptr27, %for.inc.26 ]
+  %cmp12.52 = icmp ult i8* %n.154, %incdec.ptr6
+  br i1 %cmp12.52, label %land.rhs.16.lr.ph, label %for.inc.26
+
+land.rhs.16.lr.ph:                                ; preds = %for.cond.11.preheader
+  br label %land.rhs.16
+
+for.cond.11:                                      ; preds = %land.rhs.16
+  %incdec.ptr24 = getelementptr inbounds i8, i8* %p.053, i64 1
+  %cmp12 = icmp ult i8* %p.053, %new
+  br i1 %cmp12, label %land.rhs.16, label %for.inc.26
+
+land.rhs.16:                                      ; preds = %for.cond.11, %land.rhs.16.lr.ph
+  %p.053 = phi i8* [ %n.154, %land.rhs.16.lr.ph ], [ %incdec.ptr24, %for.cond.11 ]
+  br i1 undef, label %for.cond.11, label %for.inc.26
+
+for.inc.26:                                       ; preds = %land.rhs.16, %for.cond.11, %for.cond.11.preheader
+  %incdec.ptr27 = getelementptr inbounds i8, i8* %n.154, i64 1
+  br i1 false, label %for.cond.11.preheader, label %for.end.28
+
+for.end.28:                                       ; preds = %for.inc.26
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25047.ll b/test/Transforms/IndVarSimplify/pr25047.ll
new file mode 100644
index 000000000000..dc39a78c7eb9
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25047.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @fn1(i1 %c0, i1 %c1) {
+; CHECK-LABEL: @fn1(
+entry:
+  br i1 %c0, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.loopexit:                                ; preds = %for.end.29, %for.end.7
+  %f.lcssa = phi i32 [ %f.1, %for.end.29 ], [ %f.1, %for.end.7 ]
+  br i1 %c1, label %for.end.34, label %for.cond.1thread-pre-split
+
+for.cond.1thread-pre-split:                       ; preds = %for.cond.loopexit, %entry
+  %f.047 = phi i32 [ %f.lcssa, %for.cond.loopexit ], [ 0, %entry ]
+  br label %for.cond.1
+
+for.cond.1:                                       ; preds = %for.cond.1, %for.cond.1thread-pre-split
+  br i1 %c1, label %for.cond.4, label %for.cond.1
+
+for.cond.4:                                       ; preds = %for.end.29, %for.cond.1
+  %f.1 = phi i32 [ 0, %for.end.29 ], [ %f.047, %for.cond.1 ]
+  br label %for.cond.5
+
+for.cond.5:                                       ; preds = %for.cond.5, %for.cond.4
+  %h.0 = phi i32 [ 0, %for.cond.4 ], [ %inc, %for.cond.5 ]
+  %cmp = icmp slt i32 %h.0, 1
+  %inc = add nsw i32 %h.0, 1
+  br i1 %cmp, label %for.cond.5, label %for.end.7
+
+for.end.7:                                        ; preds = %for.cond.5
+  %g.lcssa = phi i32 [ %h.0, %for.cond.5 ]
+  %tobool10 = icmp eq i32 %g.lcssa, 0
+  br i1 %tobool10, label %for.end.8, label %for.cond.loopexit
+
+for.end.8:                       ; preds = %for.end.7
+  br i1 %c1, label %for.cond.19, label %for.end.29
+
+for.cond.19:                                      ; preds = %for.cond.19, %for.end.8
+  br label %for.cond.19
+
+for.end.29:                                       ; preds = %for.end.8
+  %tobool30 = icmp eq i32 %f.1, 0
+  br i1 %tobool30, label %for.cond.4, label %for.cond.loopexit
+
+for.end.34:                                       ; preds = %for.cond.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25051.ll b/test/Transforms/IndVarSimplify/pr25051.ll
new file mode 100644
index 000000000000..a02d539a66dd
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25051.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define i32 @somefunc(double* %arr) {
+; CHECK-LABEL: @somefunc(
+entry:
+  br label %for.cond.1.preheader
+
+for.cond.1.preheader:                             ; preds = %for.inc.9, %entry
+  %index3.013 = phi i32 [ 0, %entry ], [ %index3.1.lcssa, %for.inc.9 ]
+  %index.012 = phi i32 [ 0, %entry ], [ %inc10, %for.inc.9 ]
+  %cmp2.9 = icmp sgt i32 %index.012, 0
+  br i1 %cmp2.9, label %for.body.3.lr.ph, label %for.inc.9
+
+for.body.3.lr.ph:                                 ; preds = %for.cond.1.preheader
+  %idxprom5 = sext i32 %index.012 to i64 
+  br label %for.body.3
+
+for.body.3:                                       ; preds = %for.body.3, %for.body.3.lr.ph
+  %index3.111 = phi i32 [ %index3.013, %for.body.3.lr.ph ], [ %inc, %for.body.3 ]
+  %index2.010 = phi i32 [ 0, %for.body.3.lr.ph ], [ %inc8, %for.body.3 ]
+  %inc = add nsw i32 %index3.111, 1
+  %idxprom = sext i32 %index3.111 to i64
+  %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
+  %idxprom4 = sext i32 %index2.010 to i64
+  %inc8 = add nsw i32 %index2.010, 1
+  %cmp2 = icmp slt i32 %inc8, %index.012
+  br i1 %cmp2, label %for.body.3, label %for.inc.9.loopexit
+
+for.inc.9.loopexit:                               ; preds = %for.body.3
+  %inc.lcssa = phi i32 [ %inc, %for.body.3 ]
+  br label %for.inc.9
+
+for.inc.9:                                        ; preds = %for.inc.9.loopexit, %for.cond.1.preheader
+  %index3.1.lcssa = phi i32 [ %index3.013, %for.cond.1.preheader ], [ %inc.lcssa, %for.inc.9.loopexit ]
+  %inc10 = add nsw i32 %index.012, 1
+  %cmp = icmp slt i32 %inc10, 10
+  br i1 %cmp, label %for.cond.1.preheader, label %for.end.11
+
+for.end.11:                                       ; preds = %for.inc.9
+  ret i32 1
+}
diff --git a/test/Transforms/IndVarSimplify/pr25060.ll b/test/Transforms/IndVarSimplify/pr25060.ll
new file mode 100644
index 000000000000..25863fff2d36
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25060.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+define i16 @fn1() {
+; CHECK-LABEL: @fn1(
+entry:
+  br label %bb1
+
+bb1:
+  %i = phi i16 [ 0, %entry ], [ 1, %bb1 ]
+  %storemerge = phi i16 [ %storemerge2, %bb1 ], [ 0, %entry ]
+  %storemerge2 = phi i16 [ 10, %entry ], [ 200, %bb1 ]
+  %tmp10 = icmp eq i16 %i, 1
+  br i1 %tmp10, label %bb5, label %bb1
+
+bb5:
+  %storemerge.lcssa = phi i16 [ %storemerge, %bb1 ]
+; CHECK: ret i16 10
+  ret i16 %storemerge.lcssa
+}
+
+define i16 @fn2() {
+; CHECK-LABEL: @fn2(
+entry:
+  br label %bb1
+
+bb1:
+  %canary = phi i16 [ 0, %entry ], [ %canary.inc, %bb1 ]
+  %i = phi i16 [ 0, %entry ], [ %storemerge, %bb1 ]
+  %storemerge = phi i16 [ 0, %bb1 ], [ 10, %entry ]
+  %canary.inc = add i16 %canary, 1
+  %_tmp10 = icmp eq i16 %i, 10
+  br i1 %_tmp10, label %bb5, label %bb1
+
+bb5:
+; CHECK: ret i16 1
+  ret i16 %canary
+}
diff --git a/test/Transforms/IndVarSimplify/pr25360.ll b/test/Transforms/IndVarSimplify/pr25360.ll
new file mode 100644
index 000000000000..9f6df7051ea8
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25360.ll
@@ -0,0 +1,33 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+
+; Ensure that does not crash
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f() {
+; CHECK-LABEL: @f(
+entry:
+  br label %for.end
+
+for.condt:                         ; preds = %for.end
+  br i1 true, label %for.cond.0, label %for.end
+
+for.end:                                          ; preds = %for.body.3
+  %inc = select i1 undef, i32 2, i32 1
+  br i1 false, label %for.condt, label %for.cond.0
+
+for.cond.0:                       ; preds = %for.end, %for.condt
+  %init = phi i32 [ 0, %for.condt ], [ %inc, %for.end ]
+  br i1 true, label %for.end.13, label %for.body.9
+
+for.body.9:                                       ; preds = %for.body.9, %for.cond.0
+  %p1.addr.22 = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+  %inc10 = add i32 %p1.addr.22, 1
+  br i1 true, label %for.end.13, label %for.body.9
+
+for.end.13:                                       ; preds = %for.cond.7.for.end.13_crit_edge, %for.cond.0
+  %p1.addr.2.lcssa = phi i32 [ %inc10, %for.body.9 ], [ %init, %for.cond.0 ]
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25421.ll b/test/Transforms/IndVarSimplify/pr25421.ll
new file mode 100644
index 000000000000..efb71f9c3039
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25421.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare void @use(i1)
+
+define void @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+  %conv = sext i32 %x to i64
+  %sub = add i64 %conv, -1
+  %ec = icmp sgt i32 %x, 0
+  br i1 %ec, label %loop, label %leave
+
+ loop:
+; CHECK: loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %loop ]
+  %iv.inc = add i64 %iv, 1
+  %cmp = icmp slt i64 %iv, %sub
+  call void @use(i1 %cmp)
+; CHECK: call void @use(i1 %cmp)
+; CHECK-NOT: call void @use(i1 true)
+
+  %be.cond = icmp slt i64 %iv.inc, %conv
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/pr25578.ll b/test/Transforms/IndVarSimplify/pr25578.ll
new file mode 100644
index 000000000000..bc648b517bbe
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25578.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @foo
+define void @foo() {
+entry:
+  br label %L1_header
+
+L1_header:
+  br label %L2_header
+
+; CHECK: L2_header:
+; CHECK: %[[INDVAR:.*]] = phi i64
+; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32
+L2_header:
+  %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ]
+  %i_prom = sext i32 %i to i64
+  br label %L3_header
+
+L3_header:
+  br i1 undef, label %L3_latch, label %L2_exiting_1
+
+L3_latch:
+  br i1 undef, label %L3_header, label %L2_exiting_2
+
+L2_exiting_1:
+  br i1 undef, label %L2_latch, label %L1_latch
+
+L2_exiting_2:
+  br i1 undef, label %L2_latch, label %L1_latch
+
+L2_latch:
+  %i_next = add nsw i32 %i, 1
+  br label %L2_header
+
+L1_latch:
+; CHECK: L1_latch:
+; CHECK: %i_lcssa = phi i32 [ %[[TRUNC]], %L2_exiting_1 ], [ %[[TRUNC]], %L2_exiting_2 ]
+
+  %i_lcssa = phi i32 [ %i, %L2_exiting_1 ], [ %i, %L2_exiting_2 ]
+  br i1 undef, label %exit, label %L1_header
+
+exit:
+  ret void
+}
diff --git a/test/Transforms/IndVarSimplify/tripcount_infinite.ll b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
index 0495b50c3e48..658598d3b7e4 100644
--- a/test/Transforms/IndVarSimplify/tripcount_infinite.ll
+++ b/test/Transforms/IndVarSimplify/tripcount_infinite.ll
@@ -1,38 +1,45 @@
 ; These tests have an infinite trip count.  We obviously shouldn't remove the 
 ; loops!  :)
 ;
-; RUN: opt < %s -indvars -adce -simplifycfg -S | grep icmp | wc -l > %t2
-; RUN: llvm-as < %s | llvm-dis | grep icmp | wc -l > %t1
-; RUN: diff %t1 %t2
+; RUN: opt < %s -indvars -adce -simplifycfg -S | FileCheck %s
 
 ;; test for (i = 1; i != 100; i += 2)
 define i32 @infinite_linear() {
+; CHECK-LABEL: @infinite_linear(
 entry:
         br label %loop
 
 loop:           ; preds = %loop, %entry
+; CHECK-LABEL: loop:
         %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=3]
         %i.next = add i32 %i, 2         ; <i32> [#uses=1]
         %c = icmp ne i32 %i, 100                ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
         br i1 %c, label %loop, label %loopexit
 
 loopexit:               ; preds = %loop
+; CHECK-LABEL: loopexit:
         ret i32 %i
 }
 
 ;; test for (i = 1; i*i != 63; ++i)
 define i32 @infinite_quadratic() {
+; CHECK-LABEL: @infinite_quadratic(
 entry:
         br label %loop
 
 loop:           ; preds = %loop, %entry
+; CHECK-LABEL: loop:
         %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=4]
         %isquare = mul i32 %i, %i               ; <i32> [#uses=1]
         %i.next = add i32 %i, 1         ; <i32> [#uses=1]
         %c = icmp ne i32 %isquare, 63           ; <i1> [#uses=1]
+; CHECK: icmp
+; CHECK: br
         br i1 %c, label %loop, label %loopexit
 
 loopexit:               ; preds = %loop
+; CHECK-LABEL: loopexit:
         ret i32 %i
 }
-
diff --git a/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
index eb81ceb700b0..b87cd0550192 100644
--- a/test/Transforms/IndVarSimplify/widen-loop-comp.ll
+++ b/test/Transforms/IndVarSimplify/widen-loop-comp.ll
@@ -193,3 +193,163 @@ for.body:
 for.end:
   ret i32 %sum.0
 }
+
+define i32 @test6(i32* %a, i32 %b) {
+; CHECK-LABEL: @test6(
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp sle i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  ret i32 %sum.0
+}
+
+define i32 @test7(i32* %a, i32 %b) {
+; CHECK-LABEL: @test7(
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+; CHECK: for.body:
+; CHECK: icmp sle i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+  br label %for.cond
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ule i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  %cmp2 = icmp sle i32 %i.0, %b
+  br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+  ret i32 %sum.0
+}
+
+define i32 @test8(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test8(
+; CHECK: [[INIT_SEXT:%[a-z0-9]+]] = sext i32 %init to i64
+; CHECK: [[B_ZEXT:%[a-z0-9]+]] = zext i32 %b to i64
+; CHECK: for.cond:
+;     Note: %indvars.iv is the sign extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_SEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp ule i64 %indvars.iv, [[B_ZEXT]]
+
+entry:
+  %e = icmp sgt i32 %init, 0
+  br i1 %e, label %for.cond, label %leave
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+  %cmp = icmp ule i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = sext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  %cmp2 = icmp slt i32 0, %inc
+  br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+  ret i32 %sum.0
+
+leave:
+  ret i32 0
+}
+
+define i32 @test9(i32* %a, i32 %b, i32 %init) {
+; CHECK-LABEL: @test9(
+; CHECK: [[INIT_ZEXT:%[a-z0-9]+]] = zext i32 %init to i64
+; CHECK: [[B_SEXT:%[a-z0-9]+]] = sext i32 %b to i64
+; CHECK: for.cond:
+;     Note: %indvars.iv is the zero extension of %i.0
+; CHECK: %indvars.iv = phi i64 [ [[INIT_ZEXT]], %for.cond.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK: icmp slt i64 %indvars.iv, [[B_SEXT]]
+
+entry:
+  %e = icmp sgt i32 %init, 0
+  br i1 %e, label %for.cond, label %leave
+
+for.cond:
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %i.0 = phi i32 [ %init, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, %b
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %0
+  %inc = add nsw i32 %i.0, 1
+  %cmp2 = icmp slt i32 0, %inc
+  br i1 %cmp2, label %for.cond, label %for.end
+
+for.end:
+  ret i32 %sum.0
+
+leave:
+  ret i32 0
+}
+
+declare void @consume.i64(i64)
+declare void @consume.i1(i1)
+
+define i32 @test10(i32 %v) {
+; CHECK-LABEL: @test10(
+ entry:
+; CHECK-NOT: zext
+  br label %loop
+
+ loop:
+; CHECK: loop:
+; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: [[MUL:%[a-z0-9]+]] = mul nsw i64 %indvars.iv, -1
+; CHECK: [[MUL_TRUNC:%[a-z0-9]+]] = trunc i64 [[MUL]] to i32
+; CHECK: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[MUL_TRUNC]], %v
+; CHECK: call void @consume.i1(i1 [[CMP]])
+
+  %i = phi i32 [ 0, %entry ], [ %i.inc, %loop ]
+  %i.inc = add i32 %i, 1
+  %iv = mul i32 %i, -1
+  %cmp = icmp eq i32 %iv, %v
+  call void @consume.i1(i1 %cmp)
+  %be.cond = icmp slt i32 %i.inc, 11
+  %ext = sext i32 %iv to i64
+  call void @consume.i64(i64 %ext)
+  br i1 %be.cond, label %loop, label %leave
+
+ leave:
+  ret i32 22
+}
diff --git a/test/Transforms/IndVarSimplify/zext-nuw.ll b/test/Transforms/IndVarSimplify/zext-nuw.ll
new file mode 100644
index 000000000000..13138de6a507
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/zext-nuw.ll
@@ -0,0 +1,49 @@
+; RUN: opt -indvars -S %s | FileCheck %s
+
+%struct.A = type { i8 }
+
+@c = global %struct.A* null
+@d = global i32 4
+
+define void @_Z3fn1v() {
+  %x2 = load i32, i32* @d
+  %x3 = icmp slt i32 %x2, 1
+  %x4 = select i1 %x3, i32 1, i32 %x2
+  %x5 = load %struct.A*, %struct.A** @c
+  %j.sroa.0.0..sroa_idx = getelementptr %struct.A, %struct.A* %x5, i64 0, i32 0
+  %j.sroa.0.0.copyload = load i8, i8* %j.sroa.0.0..sroa_idx
+  br label %.preheader4.lr.ph
+
+.preheader4.lr.ph:                                ; preds = %0
+  ; CHECK-NOT: add i64 {{.*}}, 4294967296
+  br label %.preheader4
+
+.preheader4:                                      ; preds = %x22, %.preheader4.lr.ph
+  %k.09 = phi i8* [ undef, %.preheader4.lr.ph ], [ %x25, %x22 ]
+  %x8 = icmp ult i32 0, 4
+  br i1 %x8, label %.preheader.lr.ph, label %x22
+
+.preheader.lr.ph:                                 ; preds = %.preheader4
+  br label %.preheader
+
+.preheader:                                       ; preds = %x17, %.preheader.lr.ph
+  %k.17 = phi i8* [ %k.09, %.preheader.lr.ph ], [ %x19, %x17 ]
+  %v.06 = phi i32 [ 0, %.preheader.lr.ph ], [ %x20, %x17 ]
+  br label %x17
+
+x17:                                              ; preds = %.preheader
+  %x18 = sext i8 %j.sroa.0.0.copyload to i64
+  %x19 = getelementptr i8, i8* %k.17, i64 %x18
+  %x20 = add i32 %v.06, 1
+  %x21 = icmp ult i32 %x20, %x4
+  br i1 %x21, label %.preheader, label %._crit_edge.8
+
+._crit_edge.8:                                    ; preds = %x17
+  %split = phi i8* [ %x19, %x17 ]
+  br label %x22
+
+x22:                                              ; preds = %._crit_edge.8, %.preheader4
+  %k.1.lcssa = phi i8* [ %split, %._crit_edge.8 ], [ %k.09, %.preheader4 ]
+  %x25 = getelementptr i8, i8* %k.1.lcssa
+  br label %.preheader4
+}
diff --git a/test/Transforms/FunctionAttrs/annotate-1.ll b/test/Transforms/InferFunctionAttrs/annotate.ll
similarity index 76%
rename from test/Transforms/FunctionAttrs/annotate-1.ll
rename to test/Transforms/InferFunctionAttrs/annotate.ll
index 9fba7a9f2882..963f484eb55e 100644
--- a/test/Transforms/FunctionAttrs/annotate-1.ll
+++ b/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -functionattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -functionattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
+; RUN: opt < %s -inferattrs -S | FileCheck %s
+; RUN: opt < %s -passes=inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
 
 declare i8* @fopen(i8*, i8*)
 ; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]] 
diff --git a/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
new file mode 100644
index 000000000000..5314f0b8397d
--- /dev/null
+++ b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
@@ -0,0 +1,102 @@
+; Test that alloca merging in the inliner places dbg.declare calls immediately
+; after the merged alloca. Not at the end of the entry BB, and definitely not
+; before the alloca.
+;
+; clang -g -S -emit-llvm -Xclang -disable-llvm-optzns
+;
+;__attribute__((always_inline)) void f() {
+;  char aaa[100];
+;  aaa[10] = 1;
+;}
+;
+;__attribute__((always_inline)) void g() {
+;  char bbb[100];
+;  bbb[20] = 1;
+;}
+;
+;void h() {
+;  f();
+;  g();
+;}
+;
+; RUN: opt -always-inline -S < %s | FileCheck %s
+;
+; CHECK:      define void @h()
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %[[AI:.*]] = alloca [100 x i8]
+; CHECK-NEXT:   call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+; CHECK-NEXT:   call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]],
+
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @f() #0 !dbg !4 {
+entry:
+  %aaa = alloca [100 x i8], align 16
+  call void @llvm.dbg.declare(metadata [100 x i8]* %aaa, metadata !12, metadata !17), !dbg !18
+  %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %aaa, i64 0, i64 10, !dbg !19
+  store i8 1, i8* %arrayidx, align 2, !dbg !20
+  ret void, !dbg !21
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: alwaysinline nounwind uwtable
+define void @g() #0 !dbg !7 {
+entry:
+  %bbb = alloca [100 x i8], align 16
+  call void @llvm.dbg.declare(metadata [100 x i8]* %bbb, metadata !22, metadata !17), !dbg !23
+  %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %bbb, i64 0, i64 20, !dbg !24
+  store i8 1, i8* %arrayidx, align 4, !dbg !25
+  ret void, !dbg !26
+}
+
+; Function Attrs: nounwind uwtable
+define void @h() #2 !dbg !8 {
+entry:
+  call void @f(), !dbg !27
+  call void @g(), !dbg !28
+  ret void, !dbg !29
+}
+
+attributes #0 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+!llvm.ident = !{!11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "../1.c", directory: "/code/llvm-git/build")
+!2 = !{}
+!3 = !{!4, !7, !8}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, variables: !2)
+!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, variables: !2)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
+!12 = !DILocalVariable(name: "aaa", scope: !4, file: !1, line: 2, type: !13)
+!13 = !DICompositeType(tag: DW_TAG_array_type, baseType: !14, size: 800, align: 8, elements: !15)
+!14 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!15 = !{!16}
+!16 = !DISubrange(count: 100)
+!17 = !DIExpression()
+!18 = !DILocation(line: 2, column: 8, scope: !4)
+!19 = !DILocation(line: 3, column: 3, scope: !4)
+!20 = !DILocation(line: 3, column: 11, scope: !4)
+!21 = !DILocation(line: 4, column: 1, scope: !4)
+!22 = !DILocalVariable(name: "bbb", scope: !7, file: !1, line: 7, type: !13)
+!23 = !DILocation(line: 7, column: 8, scope: !7)
+!24 = !DILocation(line: 8, column: 3, scope: !7)
+!25 = !DILocation(line: 8, column: 11, scope: !7)
+!26 = !DILocation(line: 9, column: 1, scope: !7)
+!27 = !DILocation(line: 12, column: 3, scope: !8)
+!28 = !DILocation(line: 13, column: 3, scope: !8)
+!29 = !DILocation(line: 14, column: 1, scope: !8)
diff --git a/test/Transforms/Inline/alloca-dbgdeclare.ll b/test/Transforms/Inline/alloca-dbgdeclare.ll
index 286f2931ff22..39575311b4b4 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -34,7 +34,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #2
 
 ; Function Attrs: nounwind
-define void @_Z3fn4v() #0 {
+define void @_Z3fn4v() #0 !dbg !21 {
 entry:
 ; Test that the dbg.declare is moved together with the alloca.
 ; CHECK: define void @_Z3fn5v()
@@ -61,7 +61,7 @@ _Z3fn31A.exit:                                    ; preds = %entry, %if.then.i
 }
 
 ; Function Attrs: noreturn nounwind
-define void @_Z3fn5v() #3 {
+define void @_Z3fn5v() #3 !dbg !24 {
 entry:
   br label %while.body, !dbg !55
 
@@ -82,7 +82,7 @@ attributes #3 = { noreturn nounwind }
 !llvm.module.flags = !{!28, !29}
 !llvm.ident = !{!30}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
 !1 = !DIFile(filename: "<stdin>", directory: "")
 !2 = !{}
 !3 = !{!4}
@@ -97,16 +97,16 @@ attributes #3 = { noreturn nounwind }
 !12 = !{!13}
 !13 = !DISubrange(count: 2)
 !14 = !{!15, !21, !24}
-!15 = !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, function: void (%struct.A*)* @_Z3fn31A, variables: !19)
+!15 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, variables: !19)
 !16 = !DIFile(filename: "test.cpp", directory: "")
 !17 = !DISubroutineType(types: !18)
 !18 = !{null, !"_ZTS1A"}
 !19 = !{!20}
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
-!21 = !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn4v, variables: !2)
+!20 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!21 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, variables: !2)
 !22 = !DISubroutineType(types: !23)
 !23 = !{null}
-!24 = !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, function: void ()* @_Z3fn5v, variables: !2)
+!24 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, variables: !2)
 !25 = !{!26, !27}
 !26 = !DIGlobalVariable(name: "a", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @a)
 !27 = !DIGlobalVariable(name: "b", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @b)
@@ -128,7 +128,7 @@ attributes #3 = { noreturn nounwind }
 !43 = !{!37, !37, i64 0}
 !44 = !{!38, !38, i64 0}
 !45 = !DILocation(line: 9, scope: !15)
-!46 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!46 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
 !47 = distinct !DILocation(line: 11, scope: !21)
 !48 = !DIExpression(DW_OP_bit_piece, 32, 160)
 !49 = !DILocation(line: 6, scope: !15, inlinedAt: !47)
diff --git a/test/Transforms/Inline/debug-info-duplicate-calls.ll b/test/Transforms/Inline/debug-info-duplicate-calls.ll
index 4e3d9d92fdb3..442ff325863c 100644
--- a/test/Transforms/Inline/debug-info-duplicate-calls.ll
+++ b/test/Transforms/Inline/debug-info-duplicate-calls.ll
@@ -33,10 +33,10 @@
 ; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs1_f2:![0-9]+]]
 ; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs2_f2:![0-9]+]]
 
-; CHECK-DAG: [[F:![0-9]+]]  = !DISubprogram(name: "f"
-; CHECK-DAG: [[F2:![0-9]+]] = !DISubprogram(name: "f2"
-; CHECK-DAG: [[F3:![0-9]+]] = !DISubprogram(name: "f3"
-; CHECK-DAG: [[F4:![0-9]+]] = !DISubprogram(name: "f4"
+; CHECK-DAG: [[F:![0-9]+]]  = distinct !DISubprogram(name: "f"
+; CHECK-DAG: [[F2:![0-9]+]] = distinct !DISubprogram(name: "f2"
+; CHECK-DAG: [[F3:![0-9]+]] = distinct !DISubprogram(name: "f3"
+; CHECK-DAG: [[F4:![0-9]+]] = distinct !DISubprogram(name: "f4"
 
 ; CHECK: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]])
 ; CHECK: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]])
@@ -59,7 +59,7 @@ $_Z2f3v = comdat any
 $_Z2f2v = comdat any
 
 ; Function Attrs: uwtable
-define void @_Z1fv() #0 {
+define void @_Z1fv() #0 !dbg !4 {
 entry:
   call void @_Z2f4v(), !dbg !13
   call void @_Z2f4v(), !dbg !13
@@ -67,14 +67,14 @@ entry:
 }
 
 ; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f4v() #1 comdat {
+define linkonce_odr void @_Z2f4v() #1 comdat !dbg !7 {
 entry:
   call void @_Z2f3v(), !dbg !15
   ret void, !dbg !16
 }
 
 ; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f3v() #1 comdat {
+define linkonce_odr void @_Z2f3v() #1 comdat !dbg !8 {
 entry:
   call void @_Z2f2v(), !dbg !17
   call void @_Z2f2v(), !dbg !17
@@ -82,7 +82,7 @@ entry:
 }
 
 ; Function Attrs: alwaysinline inlinehint uwtable
-define linkonce_odr void @_Z2f2v() #1 comdat {
+define linkonce_odr void @_Z2f2v() #1 comdat !dbg !9 {
 entry:
   call void @_Z2f1v(), !dbg !19
   ret void, !dbg !20
@@ -98,16 +98,16 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
 !2 = !{}
 !3 = !{!4, !7, !8, !9}
-!4 = !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, function: void ()* @_Z1fv, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, function: void ()* @_Z2f4v, variables: !2)
-!8 = !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: void ()* @_Z2f3v, variables: !2)
-!9 = !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, function: void ()* @_Z2f2v, variables: !2)
+!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
 !10 = !{i32 2, !"Dwarf Version", i32 4}
 !11 = !{i32 2, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)"}
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index ca407acdd659..c547559d8c2a 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -4,7 +4,7 @@
 
 ; CHECK: invoke void @test()
 ; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]]
-; CHECK: [[SP:.*]] = !DISubprogram(
+; CHECK: [[SP:.*]] = distinct !DISubprogram(
 ; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]])
 ; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]])
 
@@ -32,6 +32,6 @@ lpad:
 
 !llvm.module.flags = !{!1}
 !1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
 !3 = !DILocation(line: 1, scope: !2)
 !4 = !DILocation(line: 2, scope: !2)
diff --git a/test/Transforms/Inline/deopt-bundles.ll b/test/Transforms/Inline/deopt-bundles.ll
new file mode 100644
index 000000000000..3e3c52f7d2d5
--- /dev/null
+++ b/test/Transforms/Inline/deopt-bundles.ll
@@ -0,0 +1,203 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @f()
+declare i32 @g()
+declare fastcc i32 @g.fastcc()
+
+define i32 @callee_0() alwaysinline {
+ entry:
+  call void @f()
+  ret i32 2
+}
+
+define i32 @caller_0() {
+; CHECK-LABEL: @caller_0(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT: call void @f()
+; CHECK-NEXT: ret i32 2
+  %x = call i32 @callee_0() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_1() alwaysinline {
+ entry:
+  call void @f() [ "deopt"() ]
+  call void @f() [ "deopt"(i32 0, i32 1) ]
+  call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 2
+}
+
+define i32 @caller_1() {
+; CHECK-LABEL: @caller_1(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1) ]
+; CHECK-NEXT:  call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:  ret i32 2
+
+  %x = call i32 @callee_1() [ "deopt"(i32 5) ]
+  ret i32 %x
+}
+
+define i32 @callee_2() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_2(i32 %val) {
+; CHECK-LABEL: @caller_2(
+ entry:
+; CHECK: entry:
+; CHECK-NEXT:   [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+; CHECK-NEXT:   ret i32 [[RVAL]]
+  %x = call i32 @callee_2() [ "deopt"(i32 %val) ]
+  ret i32 %x
+}
+
+define i32 @callee_3() alwaysinline {
+ entry:
+  %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_3() personality i8 3 {
+; CHECK-LABEL: @caller_3(
+ entry:
+  %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+
+ normal:
+  ret i32 %x
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 101
+}
+
+define i32 @callee_4() alwaysinline personality i8 3 {
+ entry:
+  %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+  ret i32 %v
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 100
+}
+
+define i32 @caller_4() {
+; CHECK-LABEL: @caller_4(
+ entry:
+; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+  %x = call i32 @callee_4() [ "deopt"(i32 7) ]
+  ret i32 %x
+}
+
+define i32 @callee_5() alwaysinline personality i8 3 {
+ entry:
+  %v = invoke fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind
+
+ normal:
+  ret i32 %v
+
+ unwind:
+  %cleanup = landingpad i8 cleanup
+  ret i32 100
+}
+
+define i32 @caller_5() {
+; CHECK-LABEL: @caller_5(
+ entry:
+; CHECK:  invoke fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX:[0-9]+]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+  %x = call i32 @callee_5() [ "deopt"(i32 7) ]
+  ret i32 %x
+}
+
+define i32 @callee_6() alwaysinline personality i8 3 {
+ entry:
+  %v = call fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_6() {
+; CHECK-LABEL: @caller_6(
+ entry:
+; CHECK: call fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ]
+  %x = call i32 @callee_6() [ "deopt"(i32 7) ]
+  ret i32 %x
+}
+
+define i32 @callee_7(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+  br i1 %val, label %check, label %precheck
+
+ precheck:
+  br label %check
+
+ check:
+  %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+  br i1 %p, label %do.not, label %do
+
+ do.not:
+  ret i32 0
+
+ do:
+  %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  ret i32 %v
+}
+
+define i32 @caller_7() {
+; CHECK-LABEL: @caller_7(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+  %x = call i32 @callee_7(i1 true) [ "deopt"(i32 7) ]
+  ret i32 %x
+}
+
+define i32 @callee_8(i1 %val) alwaysinline personality i8 3 {
+; We want something that PruningFunctionCloner is not smart enough to
+; recognize, but can be recognized by recursivelySimplifyInstruction.
+
+ entry:
+  br i1 %val, label %check, label %precheck
+
+ precheck:
+  br label %check
+
+ check:
+  %p = phi i1 [ %val, %entry ], [ true, %precheck ]
+  br i1 %p, label %do.not, label %do
+
+ do.not:
+  ret i32 0
+
+ do:
+  %phi = phi i32 [ 0, %check ], [ %v, %do ]
+  %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ]
+  %ic = icmp eq i32 %v, 42
+  br i1 %ic, label %do, label %done
+
+ done:
+  ret i32 %phi
+}
+
+define i32 @caller_8() {
+; CHECK-LABEL: @caller_8(
+ entry:
+; CHECK-NOT: call fastcc i32 @g.fastcc()
+; CHECK: ret i32 0
+  %x = call i32 @callee_8(i1 true) [ "deopt"(i32 7) ]
+  ret i32 %x
+}
+
+attributes #0 = { "foo"="bar" }
+
+; CHECK: attributes #[[FOO_BAR_ATTR_IDX]] = { "foo"="bar" }
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index eb92bc52cc94..f4f046846e82 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -12,11 +12,11 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
 
 define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) {
 entry:
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
   %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00>
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
   %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
   %add = fadd <4 x float> %mul, %mul1
   ret <4 x float> %add
 }
@@ -27,10 +27,10 @@ define float @outer_vectors(<4 x float> %a, <4 x float> %b) {
 ; CHECK: ret float
 
 entry:
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
   %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b)
-  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
+  call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6)
   %vecext = extractelement <4 x float> %call, i32 0
   %vecext1 = extractelement <4 x float> %call, i32 1
   %add = fadd float %vecext, %vecext1
@@ -47,10 +47,10 @@ attributes #0 = { nounwind readnone }
 !llvm.module.flags = !{!3, !4}
 !llvm.ident = !{!5}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
 !1 = !DIFile(filename: "test.c", directory: "")
 !2 = !{}
 !3 = !{i32 2, !"Dwarf Version", i32 4}
 !4 = !{i32 1, !"Debug Info Version", i32 3}
 !5 = !{!""}
-!6 = !DISubprogram()
+!6 = distinct !DISubprogram()
diff --git a/test/Transforms/Inline/inline-assume.ll b/test/Transforms/Inline/inline-assume.ll
new file mode 100644
index 000000000000..4a7dc3edb22d
--- /dev/null
+++ b/test/Transforms/Inline/inline-assume.ll
@@ -0,0 +1,31 @@
+; RUN: opt -inline -S -o - < %s | FileCheck %s
+
+%0 = type opaque
+%struct.Foo = type { i32, %0* }
+
+; Test that we don't crash when inlining @bar (rdar://22521387).
+define void @foo(%struct.Foo* align 4 %a) {
+entry:
+  call fastcc void @bar(%struct.Foo* nonnull align 4 undef)
+
+; CHECK: call void @llvm.assume(i1 undef)
+; CHECK: unreachable
+
+  ret void
+}
+
+define fastcc void @bar(%struct.Foo* align 4 %a) {
+; CHECK-LABEL: @bar
+entry:
+  %b = getelementptr inbounds %struct.Foo, %struct.Foo* %a, i32 0, i32 1
+  br i1 undef, label %if.end, label %if.then.i.i
+
+if.then.i.i:
+  call void @llvm.assume(i1 undef)
+  unreachable
+
+if.end:
+  ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
new file mode 100644
index 000000000000..1fd9f105db50
--- /dev/null
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline  -inlinecold-threshold=0 -S | FileCheck %s
+
+; This tests that a cold callee gets the (lower) inlinecold-threshold even without
+; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold.
+; A callee with identical body does gets inlined because cost fits within the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+
+  ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+
+  ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+  %y2 = call i32 @callee2(i32 %y1)
+  %y3 = call i32 @callee1(i32 %y2)
+  ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
+!1 = !{!"function_entry_count", i64 100}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
new file mode 100644
index 000000000000..498a995ecd45
--- /dev/null
+++ b/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+
+target datalayout = "e-p3:32:32-p4:64:64-n32"
+
+@lds = internal addrspace(3) global [64 x i64] zeroinitializer
+
+; CHECK-LABEL: @constexpr_addrspacecast_ptr_size_change(
+; CHECK: load i64, i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*), i64 undef)
+; CHECK-NEXT: br
+define void @constexpr_addrspacecast_ptr_size_change() #0 {
+  %tmp0 = call i32 @foo(i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*)) #1
+  ret void
+}
+
+define i32 @foo(i64 addrspace(4)* %arg) #1 {
+bb:
+  %tmp = getelementptr i64, i64 addrspace(4)* %arg, i64 undef
+  %tmp1 = load i64, i64 addrspace(4)* %tmp
+  br i1 undef, label %bb2, label %bb3
+
+bb2:
+  store i64 0, i64 addrspace(4)* %tmp
+  br label %bb3
+
+bb3:
+  unreachable
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { alwaysinline nounwind }
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
new file mode 100644
index 000000000000..93ea9d43c78d
--- /dev/null
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
+
+; This tests that a hot callee gets the (higher) inlinehint-threshold even without
+; inline hints and gets inlined because the cost is less than inlinehint-threshold.
+; A cold callee with identical body does not get inlined because cost exceeds the
+; inline-threshold
+
+define i32 @callee1(i32 %x) !prof !1 {
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+
+  ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) !prof !2 {
+; CHECK-LABEL: @callee2(
+  %x1 = add i32 %x, 1
+  %x2 = add i32 %x1, 1
+  %x3 = add i32 %x2, 1
+
+  ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) !prof !2 {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+  %y2 = call i32 @callee2(i32 %y1)
+  %y3 = call i32 @callee1(i32 %y2)
+  ret i32 %y3
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"MaxFunctionCount", i32 10}
+!1 = !{!"function_entry_count", i64 10}
+!2 = !{!"function_entry_count", i64 1}
+
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index 820e56f7f8ef..b01a1f657f31 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -3,7 +3,7 @@
 
 ; The inline threshold for a function with the optsize attribute is currently
 ; the same as the global inline threshold for -Os. Check that the optsize
-; function attribute don't alter the function specific inline threshold if the
+; function attribute doesn't alter the function-specific inline threshold if the
 ; global inline threshold is lower (as for -Oz).
 
 @a = global i32 4
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
index f3ad7ef8b8fc..3c701c41459a 100644
--- a/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -23,7 +23,7 @@ target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
 ; Function Attrs: nounwind
-define float @foo(float %x) #0 {
+define float @foo(float %x) #0 !dbg !4 {
 entry:
   %x.addr = alloca float, align 4
   store float %x, float* %x.addr, align 4
@@ -38,10 +38,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 ; CHECK: define void @bar
 
 ; Function Attrs: nounwind
-define void @bar(float* %dst) #0 {
+define void @bar(float* %dst) #0 !dbg !9 {
 entry:
 
 ; CHECK: [[x_addr_i:%[a-zA-Z0-9.]+]] = alloca float, align 4
+; CHECK-NEXT: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
 
   %dst.addr = alloca float*, align 4
   store float* %dst, float** %dst.addr, align 4
@@ -52,7 +53,6 @@ entry:
   %call = call float @foo(float %1), !dbg !22
 
 ; CHECK-NOT: call float @foo
-; CHECK: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !{{[0-9]+}}), !dbg [[m24:![0-9]+]]
 
   %2 = load float*, float** %dst.addr, align 4, !dbg !22
   %arrayidx1 = getelementptr inbounds float, float* %2, i32 0, !dbg !22
@@ -67,33 +67,33 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!13, !14}
 !llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "foo.c", directory: "")
 !2 = !{}
 !3 = !{!4, !9}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, function: float (float)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "foo.c", directory: "")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !8}
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!9 = !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, function: void (float*)* @bar, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
 !10 = !DISubroutineType(types: !11)
 !11 = !{null, !12}
 !12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
 !13 = !{i32 2, !"Dwarf Version", i32 4}
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.6.0 (trunk)"}
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8)
 !17 = !DIExpression()
 !18 = !DILocation(line: 1, column: 17, scope: !4)
 !19 = !DILocation(line: 3, column: 5, scope: !4)
-!20 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
+!20 = !DILocalVariable(name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12)
 !21 = !DILocation(line: 6, column: 17, scope: !9)
 !22 = !DILocation(line: 8, column: 14, scope: !9)
 !23 = !DILocation(line: 9, column: 1, scope: !9)
 
-; CHECK: [[FOO:![0-9]+]] = !DISubprogram(name: "foo",
-; CHECK: [[BAR:![0-9]+]] = !DISubprogram(name: "bar",
-; CHECK: [[m23]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", arg: 1, scope: [[FOO]]
-; CHECK: [[CALL_SITE:![0-9]+]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
-; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE]])
+; CHECK: [[FOO:![0-9]+]] = distinct !DISubprogram(name: "foo",
+; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar",
+; CHECK: [[m23]] = !DILocalVariable(name: "x", arg: 1, scope: [[FOO]]
+; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE:![0-9]+]])
+; CHECK: [[CALL_SITE]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
diff --git a/test/Transforms/Inline/inline_invoke.ll b/test/Transforms/Inline/inline_invoke.ll
index 2ef216e2d38a..6784e16b1d87 100644
--- a/test/Transforms/Inline/inline_invoke.ll
+++ b/test/Transforms/Inline/inline_invoke.ll
@@ -344,4 +344,5 @@ terminate:
 ; CHECK: attributes [[NUW]] = { nounwind }
 ; CHECK: attributes #1 = { nounwind readnone }
 ; CHECK: attributes #2 = { ssp uwtable }
-; CHECK: attributes #3 = { noreturn nounwind }
+; CHECK: attributes #3 = { argmemonly nounwind }
+; CHECK: attributes #4 = { noreturn nounwind }
diff --git a/test/Transforms/Inline/noalias-calls.ll b/test/Transforms/Inline/noalias-calls.ll
index c09d2a673297..56d5c6dc0818 100644
--- a/test/Transforms/Inline/noalias-calls.ll
+++ b/test/Transforms/Inline/noalias-calls.ll
@@ -16,24 +16,25 @@ entry:
   ret void
 }
 
-define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
 entry:
   tail call void @hello(i8* %a, i8* %c, i8* %b)
   ret void
 }
 
-; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #1 {
+; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 {
 ; CHECK: entry:
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #0, !noalias !0
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #0, !alias.scope !5
-; CHECK:   call void @hey() #0, !noalias !5
-; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #0, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i32 16, i1 false) #1, !noalias !0
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %c, i64 16, i32 16, i1 false) #1, !alias.scope !5
+; CHECK:   call void @hey() #1, !noalias !5
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %c, i64 16, i32 16, i1 false) #1, !noalias !3
 ; CHECK:   ret void
 ; CHECK: }
 
-attributes #0 = { nounwind }
-attributes #1 = { nounwind uwtable }
+attributes #0 = { nounwind argmemonly }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind uwtable }
 
 ; CHECK: !0 = !{!1}
 ; CHECK: !1 = distinct !{!1, !2, !"hello: %c"}
diff --git a/test/Transforms/Inline/noalias-cs.ll b/test/Transforms/Inline/noalias-cs.ll
index 0bff1882e832..8528a391cf95 100644
--- a/test/Transforms/Inline/noalias-cs.ll
+++ b/test/Transforms/Inline/noalias-cs.ll
@@ -34,13 +34,13 @@ entry:
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx.i, align 4, !noalias !16
 ; CHECK:   %2 = load float, float* %a, align 4, !alias.scope !16, !noalias !17
-; CHECK:   %arrayidx.i.i.1 = getelementptr inbounds float, float* %b, i64 5
-; CHECK:   store float %2, float* %arrayidx.i.i.1, align 4, !alias.scope !21, !noalias !22
-; CHECK:   %arrayidx1.i.i.2 = getelementptr inbounds float, float* %b, i64 8
-; CHECK:   store float %2, float* %arrayidx1.i.i.2, align 4, !alias.scope !23, !noalias !24
+; CHECK:   %arrayidx.i.i1 = getelementptr inbounds float, float* %b, i64 5
+; CHECK:   store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22
+; CHECK:   %arrayidx1.i.i2 = getelementptr inbounds float, float* %b, i64 8
+; CHECK:   store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24
 ; CHECK:   %3 = load float, float* %a, align 4, !alias.scope !16
-; CHECK:   %arrayidx.i.3 = getelementptr inbounds float, float* %b, i64 7
-; CHECK:   store float %3, float* %arrayidx.i.3, align 4, !alias.scope !16
+; CHECK:   %arrayidx.i3 = getelementptr inbounds float, float* %b, i64 7
+; CHECK:   store float %3, float* %arrayidx.i3, align 4, !alias.scope !16
 ; CHECK:   ret void
 ; CHECK: }
 
diff --git a/test/Transforms/Inline/noalias2.ll b/test/Transforms/Inline/noalias2.ll
index df135b0a318a..432fccf431c0 100644
--- a/test/Transforms/Inline/noalias2.ll
+++ b/test/Transforms/Inline/noalias2.ll
@@ -61,8 +61,8 @@ entry:
 ; CHECK:   %arrayidx.i = getelementptr inbounds float, float* %a, i64 7
 ; CHECK:   store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13
 ; CHECK:   %2 = load float, float* %c, align 4, !noalias !15
-; CHECK:   %arrayidx.i.1 = getelementptr inbounds float, float* %a, i64 6
-; CHECK:   store float %2, float* %arrayidx.i.1, align 4, !alias.scope !19, !noalias !20
+; CHECK:   %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6
+; CHECK:   store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20
 ; CHECK:   %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8
 ; CHECK:   store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19
 ; CHECK:   %3 = load float, float* %c, align 4
diff --git a/test/Transforms/Inline/zero-cost.ll b/test/Transforms/Inline/zero-cost.ll
new file mode 100644
index 000000000000..8e7194a1963b
--- /dev/null
+++ b/test/Transforms/Inline/zero-cost.ll
@@ -0,0 +1,17 @@
+; RUN: opt -inline -S %s | FileCheck %s
+
+define void @f() {
+entry:
+  tail call void @g()
+  unreachable
+
+; CHECK-LABEL: @f
+; CHECK-NOT: call
+; CHECK: unreachable
+}
+
+define void @g() {
+entry:
+  unreachable
+}
+
diff --git a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
index 7f06f009515f..c303ddd58974 100644
--- a/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
+++ b/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1646
 
-@__gthrw_pthread_cancel = weak alias i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthrw_pthread_cancel = weak alias i32 (i32), i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
 @__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
 define weak i32 @pthread_cancel(i32) {
        ret i32 0
diff --git a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
index 3793a860e8e9..7c6df1f984a4 100644
--- a/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
+++ b/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -instcombine -S | grep icmp
 ; PR1678
 
-@A = weak alias void ()* @B		; <void ()*> [#uses=1]
+@A = weak alias void (), void ()* @B		; <void ()*> [#uses=1]
 
 define weak void @B() {
        ret void
diff --git a/test/Transforms/InstCombine/LandingPadClauses.ll b/test/Transforms/InstCombine/LandingPadClauses.ll
index a4d77cbe8efb..75050c91bbb4 100644
--- a/test/Transforms/InstCombine/LandingPadClauses.ll
+++ b/test/Transforms/InstCombine/LandingPadClauses.ll
@@ -69,9 +69,11 @@ lpad.c:
           filter [1 x i32*] [i32* @T1]
           catch i32* @T2
   unreachable
+; Caught types should not be removed from filters
 ; CHECK: %c = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [0 x i32*]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [1 x i32*] [i32* @T1]
+; CHECK-NEXT: catch i32* @T2 
 ; CHECK-NEXT: unreachable
 
 lpad.d:
@@ -87,9 +89,10 @@ lpad.e:
           catch i32* @T1
           filter [3 x i32*] [i32* @T1, i32* @T2, i32* @T2]
   unreachable
+; Caught types should not be removed from filters
 ; CHECK: %e = landingpad
-; CHECK-NEXT: @T1
-; CHECK-NEXT: filter [1 x i32*] [i32* @T2]
+; CHECK-NEXT: catch i32* @T1
+; CHECK-NEXT: filter [2 x i32*] [i32* @T1, i32* @T2]
 ; CHECK-NEXT: unreachable
 
 lpad.f:
diff --git a/test/Transforms/InstCombine/add2.ll b/test/Transforms/InstCombine/add2.ll
index 1af2b0ffbf0a..2fe9e8cadeb7 100644
--- a/test/Transforms/InstCombine/add2.ll
+++ b/test/Transforms/InstCombine/add2.ll
@@ -395,3 +395,13 @@ define i8 @add_of_mul(i8 %x, i8 %y, i8 %z) {
   %sum = add nsw i8 %mA, %mB
   ret i8 %sum
 }
+
+define i32 @add_of_selects(i1 %A, i32 %B) {
+  %sel0 = select i1 %A, i32 0, i32 -2
+  %sel1 = select i1 %A, i32 %B, i32 2
+  %add = add i32 %sel0, %sel1
+  ret i32 %add
+; CHECK-LABEL: @add_of_selects(
+; CHECK-NEXT: %[[sel:.*]] = select i1 %A, i32 %B, i32 0
+; CHECK-NEXT: ret i32 %[[sel]]
+}
diff --git a/test/Transforms/InstCombine/alias-recursion.ll b/test/Transforms/InstCombine/alias-recursion.ll
index 74254f3e8dff..efc1899e1f47 100644
--- a/test/Transforms/InstCombine/alias-recursion.ll
+++ b/test/Transforms/InstCombine/alias-recursion.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-pc-windows-msvc"
 
 @0 = constant [1 x i8*] zeroinitializer
 
-@vtbl = alias getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
+@vtbl = alias i8*, getelementptr inbounds ([1 x i8*], [1 x i8*]* @0, i32 0, i32 0)
 
 define i32 (%class.A*)* @test() {
 ; CHECK-LABEL: test
diff --git a/test/Transforms/InstCombine/all-bits-shift.ll b/test/Transforms/InstCombine/all-bits-shift.ll
new file mode 100644
index 000000000000..b9eb19cf2ad1
--- /dev/null
+++ b/test/Transforms/InstCombine/all-bits-shift.ll
@@ -0,0 +1,46 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+@d = global i32 15, align 4
+@b = global i32* @d, align 8
+@a = common global i32 0, align 4
+
+; Function Attrs: nounwind
+define signext i32 @main() #1 {
+entry:
+  %0 = load i32*, i32** @b, align 8
+  %1 = load i32, i32* @a, align 4
+  %lnot = icmp eq i32 %1, 0
+  %lnot.ext = zext i1 %lnot to i32
+  %shr.i = lshr i32 2072, %lnot.ext
+  %call.lobit = lshr i32 %shr.i, 7
+  %2 = and i32 %call.lobit, 1
+  %3 = load i32, i32* %0, align 4
+  %or = or i32 %2, %3
+  store i32 %or, i32* %0, align 4
+  %4 = load i32, i32* @a, align 4
+  %lnot.1 = icmp eq i32 %4, 0
+  %lnot.ext.1 = zext i1 %lnot.1 to i32
+  %shr.i.1 = lshr i32 2072, %lnot.ext.1
+  %call.lobit.1 = lshr i32 %shr.i.1, 7
+  %5 = and i32 %call.lobit.1, 1
+  %or.1 = or i32 %5, %or
+  store i32 %or.1, i32* %0, align 4
+  ret i32 %or.1
+
+; Check that both InstCombine and InstSimplify can use computeKnownBits to
+; realize that:
+;   ((2072 >> (L == 0)) >> 7) & 1
+; is always zero.
+
+; CHECK-LABEL: @main
+; CHECK: %[[V1:[0-9]+]] = load i32*, i32** @b, align 8
+; CHECK: %[[V2:[0-9]+]] = load i32, i32* %[[V1]], align 4
+; CHECK: ret i32 %[[V2]]
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index b61b75e9f9f3..2ee0372e5e0a 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -163,3 +163,14 @@ entry:
   call void (...) @use(i1* %v32, i1* %v64, i1* %v33)
   ret void
 }
+
+define void @test11() {
+entry:
+; ALL-LABEL: @test11(
+; ALL: %y = alloca i32
+; ALL: call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+; ALL: ret void
+  %y = alloca i32
+  call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ]
+  ret void
+}
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index 037641b90ad7..53ea81d1c0d4 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,6 +1,9 @@
 ; RUN: opt < %s -instcombine -S | \
 ; RUN: FileCheck %s
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
 ; Should be optimized to one and.
 define i1 @test1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test1(
@@ -13,3 +16,23 @@ define i1 @test1(i32 %a, i32 %b) {
         %tmp = icmp ne i32 %tmp1, %tmp3         ; <i1> [#uses=1]
         ret i1 %tmp
 }
+
+define zeroext i1 @test2(i64 %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp sgt i8 %[[trunc]], -1
+; CHECK-NEXT: ret i1 %[[icmp]]
+  %and = and i64 %A, 128
+  %cmp = icmp eq i64 %and, 0
+  ret i1 %cmp
+}
+
+define zeroext i1 @test3(i64 %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %[[trunc:.*]] = trunc i64 %A to i8
+; CHECK-NEXT: %[[icmp:.*]] = icmp slt i8 %[[trunc]], 0
+; CHECK-NEXT: ret i1 %[[icmp]]
+  %and = and i64 %A, 128
+  %cmp = icmp ne i64 %and, 0
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 96b535dda99d..326bfda38553 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -77,3 +77,71 @@ define i1 @test8(i32 %i) {
   %cond = and i1 %cmp1, %cmp2
   ret i1 %cond
 }
+
+; combine -x & 1 into x & 1
+define i64 @test9(i64 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NOT: %and = and i64 %sub, 1
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NEXT: ret i64 %and
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  ret i64 %and
+}
+
+define i64 @test10(i64 %x) {
+; CHECK-LABEL: @test10(
+; CHECK-NOT: %sub = sub nsw i64 0, %x
+; CHECK-NEXT: %and = and i64 %x, 1
+; CHECK-NOT: %add = add i64 %sub, %and
+; CHECK-NEXT: %add = sub i64 %and, %x
+; CHECK-NEXT: ret i64 %add
+  %sub = sub nsw i64 0, %x
+  %and = and i64 %sub, 1
+  %add = add i64 %sub, %and
+  ret i64 %add
+}
+
+define i64 @fabs_double(double %x) {
+; CHECK-LABEL: @fabs_double(
+; CHECK-NEXT:  %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT:  %and = bitcast double %fabs to i64
+; CHECK-NEXT:  ret i64 %and
+  %bc = bitcast double %x to i64
+  %and = and i64 %bc, 9223372036854775807
+  ret i64 %and
+}
+
+define i64 @fabs_double_swap(double %x) {
+; CHECK-LABEL: @fabs_double_swap(
+; CHECK-NEXT:  %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT:  %and = bitcast double %fabs to i64
+; CHECK-NEXT:  ret i64 %and
+  %bc = bitcast double %x to i64
+  %and = and i64 9223372036854775807, %bc
+  ret i64 %and
+}
+
+define i32 @fabs_float(float %x) {
+; CHECK-LABEL: @fabs_float(
+; CHECK-NEXT:  %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT:  %and = bitcast float %fabs to i32
+; CHECK-NEXT:  ret i32 %and
+  %bc = bitcast float %x to i32
+  %and = and i32 %bc, 2147483647
+  ret i32 %and
+}
+
+; Make sure that only a bitcast is transformed.
+
+define i64 @fabs_double_not_bitcast(double %x) {
+; CHECK-LABEL: @fabs_double_not_bitcast(
+; CHECK-NEXT:  %bc = fptoui double %x to i64
+; CHECK-NEXT:  %and = and i64 %bc, 9223372036854775807
+; CHECK-NEXT:  ret i64 %and
+  %bc = fptoui double %x to i64
+  %and = and i64 %bc, 9223372036854775807
+  ret i64 %and
+}
+
diff --git a/test/Transforms/InstCombine/apint-or.ll b/test/Transforms/InstCombine/apint-or.ll
new file mode 100644
index 000000000000..e2312b61f2b9
--- /dev/null
+++ b/test/Transforms/InstCombine/apint-or.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These tests are for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+define i23 @test1(i23 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i23 -1, %A
+    %B = or i23 %A, %NotA
+    ret i23 %B
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i23 -1
+}
+
+define i39 @test2(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+; CHECK-LABEL: @test2
+; CHECK-NEXT: %N = and i39 %M, -274877906944
+; CHECK-NEXT: %A = add i39 %N, %V
+; CHECK-NEXT: ret i39 %A
+}
+
+define i43 @test3(i43 %A, i43 %B) {
+    ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+    %NotA = xor i43 %A, -1
+    %NotB = xor i43 %B, -1
+    %C1 = or i43 %NotA, %NotB
+    ret i43 %C1
+; CHECK-LABEL: @test3
+; CHECK-NEXT: %C1.demorgan = and i43 %A, %B
+; CHECK-NEXT: %C1 = xor i43 %C1.demorgan, -1
+; CHECK-NEXT: ret i43 %C1
+}
+
+; These tests are for Integer BitWidth > 64 && BitWidth <= 1024.
+define i1023 @test4(i1023 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i1023 -1, %A
+    %B = or i1023 %A, %NotA
+    ret i1023 %B
+; CHECK-LABEL: @test4
+; CHECK-NEXT: ret i1023 -1
+}
+
+define i399 @test5(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+; CHECK-LABEL: @test5
+; CHECK-NEXT: %N = and i399 %M, 18446742974197923840
+; CHECK-NEXT: %A = add i399 %N, %V
+; CHECK-NEXT: ret i399 %A
+}
+
+define i129 @test6(i129 %A, i129 %B) {
+    ;; (~A | ~B) == (~(A & B)) - De Morgan's Law
+    %NotA = xor i129 %A, -1
+    %NotB = xor i129 %B, -1
+    %C1 = or i129 %NotA, %NotB
+    ret i129 %C1
+; CHECK-LABEL: @test6
+; CHECK-NEXT: %C1.demorgan = and i129 %A, %B
+; CHECK-NEXT: %C1 = xor i129 %C1.demorgan, -1
+; CHECK-NEXT: ret i129 %C1
+}
diff --git a/test/Transforms/InstCombine/apint-or1.ll b/test/Transforms/InstCombine/apint-or1.ll
deleted file mode 100644
index d4f87ac894d9..000000000000
--- a/test/Transforms/InstCombine/apint-or1.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
-;
-
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i7 @test0(i7 %X) {
-    %Y = or i7 %X, 0
-    ret i7 %Y
-}
-
-define i17 @test1(i17 %X) {
-    %Y = or i17 %X, -1
-    ret i17 %Y
-} 
-
-define i23 @test2(i23 %A) {
-    ;; A | ~A == -1
-    %NotA = xor i23 -1, %A
-    %B = or i23 %A, %NotA
-    ret i23 %B
-}
-
-define i39 @test3(i39 %V, i39 %M) {
-    ;; If we have: ((V + N) & C1) | (V & C2)
-    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-    ;; replace with V+N.
-    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
-    %N = and i39 %M, 274877906944
-    %A = add i39 %V, %N
-    %B = and i39 %A, %C1
-    %D = and i39 %V, 274877906943
-    %R = or i39 %B, %D
-    ret i39 %R
-}
diff --git a/test/Transforms/InstCombine/apint-or2.ll b/test/Transforms/InstCombine/apint-or2.ll
deleted file mode 100644
index d7de255f7fd2..000000000000
--- a/test/Transforms/InstCombine/apint-or2.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; This test makes sure that or instructions are properly eliminated.
-; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-;
-; RUN: opt < %s -instcombine -S | not grep or
-
-
-define i777 @test0(i777 %X) {
-    %Y = or i777 %X, 0
-    ret i777 %Y
-}
-
-define i117 @test1(i117 %X) {
-    %Y = or i117 %X, -1
-    ret i117 %Y
-} 
-
-define i1023 @test2(i1023 %A) {
-    ;; A | ~A == -1
-    %NotA = xor i1023 -1, %A
-    %B = or i1023 %A, %NotA
-    ret i1023 %B
-}
-
-define i399 @test3(i399 %V, i399 %M) {
-    ;; If we have: ((V + N) & C1) | (V & C2)
-    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
-    ;; replace with V+N.
-    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
-    %N = and i399 %M, 18446742974197923840
-    %A = add i399 %V, %N
-    %B = and i399 %A, %C1
-    %D = and i399 %V, 274877906943
-    %R = or i399 %B, %D
-    ret i399 %R
-}
diff --git a/test/Transforms/InstCombine/assume-redundant.ll b/test/Transforms/InstCombine/assume-redundant.ll
index 4b869ef2c50e..4bdbcc8d086a 100644
--- a/test/Transforms/InstCombine/assume-redundant.ll
+++ b/test/Transforms/InstCombine/assume-redundant.ll
@@ -47,6 +47,32 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
+declare align 8 i8* @get()
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test1() {
+  %p = call align 8 i8* @get()
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
+; Check that redundant align assume is removed
+; CHECK-LABEL: @test
+; CHECK-NOT: call void @llvm.assume
+define void @test3() {
+  %p = alloca i8, align 8
+  %ptrint = ptrtoint i8* %p to i64
+  %maskedptr = and i64 %ptrint, 7
+  %maskcond = icmp eq i64 %maskedptr, 0
+  call void @llvm.assume(i1 %maskcond)
+  ret void
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.assume(i1) #1
 
diff --git a/test/Transforms/InstCombine/bitcast-alias-function.ll b/test/Transforms/InstCombine/bitcast-alias-function.ll
index 1a598a5d4153..b04308e10e23 100644
--- a/test/Transforms/InstCombine/bitcast-alias-function.ll
+++ b/test/Transforms/InstCombine/bitcast-alias-function.ll
@@ -6,46 +6,46 @@ target datalayout = "e-p:32:32:32-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16
 ; Cases that should be bitcast
 
 ; Test cast between scalars with same bit sizes
-@alias_i32_to_f32 = alias bitcast (i32 (i32)* @func_i32 to float (float)*)
+@alias_i32_to_f32 = alias float (float), bitcast (i32 (i32)* @func_i32 to float (float)*)
 
 ; Test cast between vectors with same number of elements and bit sizes
-@alias_v2i32_to_v2f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
+@alias_v2i32_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <2 x float> (<2 x float>)*)
 
 ; Test cast from vector to scalar with same number of bits
-@alias_v2f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
+@alias_v2f32_to_i64 = alias <2 x float> (<2 x float>), bitcast (i64 (i64)* @func_i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast from scalar to vector with same number of bits
-@alias_i64_to_v2f32 = alias bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
+@alias_i64_to_v2f32 = alias i64 (i64), bitcast (<2 x float> (<2 x float>)* @func_v2f32 to i64 (i64)*)
 
 ; Test cast between vectors of pointers
-@alias_v2i32p_to_v2i64p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
+@alias_v2i32p_to_v2i64p = alias <2 x i64*> (<2 x i64*>), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to <2 x i64*> (<2 x i64*>)*)
 
 
 ; Cases that should be invalid and unchanged
 
 ; Test cast between scalars with different bit sizes
-@alias_i64_to_f32 = alias bitcast (i64 (i64)* @func_i64 to float (float)*)
+@alias_i64_to_f32 = alias float (float), bitcast (i64 (i64)* @func_i64 to float (float)*)
 
 ; Test cast between vectors with different bit sizes but the
 ; same number of elements
-@alias_v2i64_to_v2f32 = alias bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
+@alias_v2i64_to_v2f32 = alias <2 x float> (<2 x float>), bitcast (<2 x i64> (<2 x i64>)* @func_v2i64 to <2 x float> (<2 x float>)*)
 
 ; Test cast between vectors with same number of bits and different
 ; numbers of elements
-@alias_v2i32_to_v4f32 = alias bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
+@alias_v2i32_to_v4f32 = alias <4 x float> (<4 x float>), bitcast (<2 x i32> (<2 x i32>)* @func_v2i32 to <4 x float> (<4 x float>)*)
 
 ; Test cast between scalar and vector with different number of bits
-@alias_i64_to_v4f32 = alias bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
+@alias_i64_to_v4f32 = alias i64 (i64), bitcast (<4 x float> (<4 x float>)* @func_v4f32 to i64 (i64)*)
 
 ; Test cast between vector and scalar with different number of bits
-@alias_v4f32_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
+@alias_v4f32_to_i64 = alias <4 x float> (<4 x float>), bitcast (i64 (i64)* @func_i64 to <4 x float> (<4 x float>)*)
 
 ; Test cast from scalar to vector of pointers with same number of bits
 ; We don't know the pointer size at this point, so this can't be done
-@alias_i64_to_v2i32p = alias bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
+@alias_i64_to_v2i32p = alias i64 (i64), bitcast (<2 x i32*> (<2 x i32*>)* @func_v2i32p to i64 (i64)*)
 
 ; Test cast between vector of pointers and scalar with different number of bits
-@alias_v4i32p_to_i64 = alias bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
+@alias_v4i32p_to_i64 = alias <4 x i32*> (<4 x i32*>), bitcast (i64 (i64)* @func_i64 to <4 x i32*> (<4 x i32*>)*)
 
 
 
diff --git a/test/Transforms/InstCombine/bitcast-bitcast.ll b/test/Transforms/InstCombine/bitcast-bitcast.ll
new file mode 100644
index 000000000000..0f46ff53bc18
--- /dev/null
+++ b/test/Transforms/InstCombine/bitcast-bitcast.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check all scalar / vector combinations for a pair of bitcasts.
+
+define ppc_fp128 @bitcast_bitcast_s_s_s(i128 %a) {
+  %bc1 = bitcast i128 %a to fp128
+  %bc2 = bitcast fp128 %bc1 to ppc_fp128
+  ret ppc_fp128 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_s(
+; CHECK-NEXT:  %bc2 = bitcast i128 %a to ppc_fp128
+; CHECK-NEXT:  ret ppc_fp128 %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_s_v(i64 %a) {
+  %bc1 = bitcast i64 %a to double
+  %bc2 = bitcast double %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_s_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define double @bitcast_bitcast_s_v_s(i64 %a) {
+  %bc1 = bitcast i64 %a to <2 x i32>
+  %bc2 = bitcast <2 x i32> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_s(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_s_v_v(i64 %a) {
+  %bc1 = bitcast i64 %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_s_v_v(
+; CHECK-NEXT:  %bc2 = bitcast i64 %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+define i64 @bitcast_bitcast_v_s_s(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to i64
+  ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to i64
+; CHECK-NEXT:  ret i64 %bc2
+}
+
+define <4 x i16> @bitcast_bitcast_v_s_v(<2 x i32> %a) {
+  %bc1 = bitcast <2 x i32> %a to double
+  %bc2 = bitcast double %bc1 to <4 x i16>
+  ret <4 x i16> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_s_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x i32> %a to <4 x i16>
+; CHECK-NEXT:  ret <4 x i16> %bc2
+}
+
+define double @bitcast_bitcast_v_v_s(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_s(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to double
+; CHECK-NEXT:  ret double %bc2
+}
+
+define <2 x i32> @bitcast_bitcast_v_v_v(<2 x float> %a) {
+  %bc1 = bitcast <2 x float> %a to <4 x i16>
+  %bc2 = bitcast <4 x i16> %bc1 to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_bitcast_v_v_v(
+; CHECK-NEXT:  %bc2 = bitcast <2 x float> %a to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
diff --git a/test/Transforms/InstCombine/bitcast-vec-canon.ll b/test/Transforms/InstCombine/bitcast-vec-canon.ll
index d27765e89424..97145221099e 100644
--- a/test/Transforms/InstCombine/bitcast-vec-canon.ll
+++ b/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -1,22 +1,41 @@
-; RUN: opt < %s -instcombine -S | grep element | count 4
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define double @a(<1 x i64> %y) {
   %c = bitcast <1 x i64> %y to double
-  ret double %c 
+  ret double %c
+ 
+; CHECK-LABEL: @a(
+; CHECK-NEXT:  bitcast <1 x i64> %y to <1 x double>
+; CHECK-NEXT:  extractelement <1 x double> {{.*}}, i32 0
+; CHECK-NEXT:  ret double
 }
 
 define i64 @b(<1 x i64> %y) {
   %c = bitcast <1 x i64> %y to i64
-  ret i64 %c 
+  ret i64 %c
+
+; CHECK-LABEL: @b(
+; CHECK-NEXT:  extractelement <1 x i64> %y, i32 0
+; CHECK-NEXT:  ret i64
 }
 
 define <1 x i64> @c(double %y) {
   %c = bitcast double %y to <1 x i64>
   ret <1 x i64> %c
+
+; CHECK-LABEL: @c(
+; CHECK-NEXT:  bitcast double %y to i64
+; CHECK-NEXT:  insertelement <1 x i64> undef, i64 {{.*}}, i32 0
+; CHECK-NEXT:  ret <1 x i64>
 }
 
 define <1 x i64> @d(i64 %y) {
   %c = bitcast i64 %y to <1 x i64>
   ret <1 x i64> %c
+
+; CHECK-LABEL: @d(
+; CHECK-NEXT:  insertelement <1 x i64> undef, i64 %y, i32 0
+; CHECK-NEXT:  ret <1 x i64>
 }
 
+
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 579839e4245b..bccd19cc32ea 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -64,6 +64,61 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
+; Both bitcasts are unnecessary; change the extractelement.
+
+define float @bitcast_extelt1(<2 x float> %A) {
+  %bc1 = bitcast <2 x float> %A to <2 x i32>
+  %ext = extractelement <2 x i32> %bc1, i32 0
+  %bc2 = bitcast i32 %ext to float
+  ret float %bc2
+
+; CHECK-LABEL: @bitcast_extelt1(
+; CHECK-NEXT:  %bc2 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:  ret float %bc2
+}
+
+; Second bitcast can be folded into the first.
+
+define i64 @bitcast_extelt2(<4 x float> %A) {
+  %bc1 = bitcast <4 x float> %A to <2 x double>
+  %ext = extractelement <2 x double> %bc1, i32 1
+  %bc2 = bitcast double %ext to i64
+  ret i64 %bc2
+
+; CHECK-LABEL: @bitcast_extelt2(
+; CHECK-NEXT:  %bc = bitcast <4 x float> %A to <2 x i64>
+; CHECK-NEXT:  %bc2 = extractelement <2 x i64> %bc, i32 1
+; CHECK-NEXT:  ret i64 %bc2
+}
+
+; TODO: This should return %A. 
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+  %ext = extractelement <1 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT:  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+; CHECK-NEXT:  %ext = extractelement <1 x i64> %bc1, i32 0
+; CHECK-NEXT:  %bc2 = bitcast i64 %ext to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
+; Handle the case where the input is not a vector.
+
+define double @bitcast_extelt4(i128 %A) {
+  %bc1 = bitcast i128 %A to <2 x i64>
+  %ext = extractelement <2 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to double
+  ret double %bc2
+
+; CHECK-LABEL: @bitcast_extelt4(
+; CHECK-NEXT:  %bc = bitcast i128 %A to <2 x double>
+; CHECK-NEXT:  %bc2 = extractelement <2 x double> %bc, i32 0
+; CHECK-NEXT:  ret double %bc2
+}
 
 define <2 x i32> @test4(i32 %A, i32 %B){
   %tmp38 = zext i32 %A to i64
diff --git a/test/Transforms/InstCombine/bitreverse-fold.ll b/test/Transforms/InstCombine/bitreverse-fold.ll
new file mode 100644
index 000000000000..ad7fc3a74644
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-fold.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %p) {
+; CHECK-LABEL: @test1
+; CHECK-NEXT: ret i32 %p
+  %a = call i32 @llvm.bitreverse.i32(i32 %p)
+  %b = call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %b
+}
+
+declare i32 @llvm.bitreverse.i32(i32) readnone
diff --git a/test/Transforms/InstCombine/bitreverse-recognize.ll b/test/Transforms/InstCombine/bitreverse-recognize.ll
new file mode 100644
index 000000000000..fbd5cb6d139c
--- /dev/null
+++ b/test/Transforms/InstCombine/bitreverse-recognize.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+define zeroext i8 @f_u8(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8
+; CHECK-NEXT: %[[A:.*]] = call i8 @llvm.bitreverse.i8(i8 %a)
+; CHECK-NEXT: ret i8 %[[A]]
+  %1 = shl i8 %a, 7
+  %2 = shl i8 %a, 5
+  %3 = and i8 %2, 64
+  %4 = shl i8 %a, 3
+  %5 = and i8 %4, 32
+  %6 = shl i8 %a, 1
+  %7 = and i8 %6, 16
+  %8 = lshr i8 %a, 1
+  %9 = and i8 %8, 8
+  %10 = lshr i8 %a, 3
+  %11 = and i8 %10, 4
+  %12 = lshr i8 %a, 5
+  %13 = and i8 %12, 2
+  %14 = lshr i8 %a, 7
+  %15 = or i8 %14, %1
+  %16 = or i8 %15, %3
+  %17 = or i8 %16, %5
+  %18 = or i8 %17, %7
+  %19 = or i8 %18, %9
+  %20 = or i8 %19, %11
+  %21 = or i8 %20, %13
+  ret i8 %21
+}
+
+; The ANDs with 32 and 64 have been swapped here, so the sequence does not
+; completely match a bitreverse.
+define zeroext i8 @f_u8_fail(i8 zeroext %a) {
+; CHECK-LABEL: @f_u8_fail
+; CHECK-NOT: call
+; CHECK: ret i8
+  %1 = shl i8 %a, 7
+  %2 = shl i8 %a, 5
+  %3 = and i8 %2, 32
+  %4 = shl i8 %a, 3
+  %5 = and i8 %4, 64
+  %6 = shl i8 %a, 1
+  %7 = and i8 %6, 16
+  %8 = lshr i8 %a, 1
+  %9 = and i8 %8, 8
+  %10 = lshr i8 %a, 3
+  %11 = and i8 %10, 4
+  %12 = lshr i8 %a, 5
+  %13 = and i8 %12, 2
+  %14 = lshr i8 %a, 7
+  %15 = or i8 %14, %1
+  %16 = or i8 %15, %3
+  %17 = or i8 %16, %5
+  %18 = or i8 %17, %7
+  %19 = or i8 %18, %9
+  %20 = or i8 %19, %11
+  %21 = or i8 %20, %13
+  ret i8 %21
+}
+
+define zeroext i16 @f_u16(i16 zeroext %a) {
+; CHECK-LABEL: @f_u16
+; CHECK-NEXT: %[[A:.*]] = call i16 @llvm.bitreverse.i16(i16 %a)
+; CHECK-NEXT: ret i16 %[[A]]
+  %1 = shl i16 %a, 15
+  %2 = shl i16 %a, 13
+  %3 = and i16 %2, 16384
+  %4 = shl i16 %a, 11
+  %5 = and i16 %4, 8192
+  %6 = shl i16 %a, 9
+  %7 = and i16 %6, 4096
+  %8 = shl i16 %a, 7
+  %9 = and i16 %8, 2048
+  %10 = shl i16 %a, 5
+  %11 = and i16 %10, 1024
+  %12 = shl i16 %a, 3
+  %13 = and i16 %12, 512
+  %14 = shl i16 %a, 1
+  %15 = and i16 %14, 256
+  %16 = lshr i16 %a, 1
+  %17 = and i16 %16, 128
+  %18 = lshr i16 %a, 3
+  %19 = and i16 %18, 64
+  %20 = lshr i16 %a, 5
+  %21 = and i16 %20, 32
+  %22 = lshr i16 %a, 7
+  %23 = and i16 %22, 16
+  %24 = lshr i16 %a, 9
+  %25 = and i16 %24, 8
+  %26 = lshr i16 %a, 11
+  %27 = and i16 %26, 4
+  %28 = lshr i16 %a, 13
+  %29 = and i16 %28, 2
+  %30 = lshr i16 %a, 15
+  %31 = or i16 %30, %1
+  %32 = or i16 %31, %3
+  %33 = or i16 %32, %5
+  %34 = or i16 %33, %7
+  %35 = or i16 %34, %9
+  %36 = or i16 %35, %11
+  %37 = or i16 %36, %13
+  %38 = or i16 %37, %15
+  %39 = or i16 %38, %17
+  %40 = or i16 %39, %19
+  %41 = or i16 %40, %21
+  %42 = or i16 %41, %23
+  %43 = or i16 %42, %25
+  %44 = or i16 %43, %27
+  %45 = or i16 %44, %29
+  ret i16 %45
+}
\ No newline at end of file
diff --git a/test/Transforms/InstCombine/blend_x86.ll b/test/Transforms/InstCombine/blend_x86.ll
index 778d44ba342c..eb0b8d7584ab 100644
--- a/test/Transforms/InstCombine/blend_x86.ll
+++ b/test/Transforms/InstCombine/blend_x86.ll
@@ -2,42 +2,118 @@
 
 define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
 ; CHECK-LABEL: @constant_blendvpd
-; CHECK: select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %ab, <2 x double> %xy
+; CHECK-NEXT: ret <2 x double> %1
   %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
   ret <2 x double> %1
 }
 
+define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_zero
+; CHECK-NEXT: ret <2 x double> %xy
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
+  ret <2 x double> %1
+}
+
+define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_dup
+; CHECK-NEXT: ret <2 x double> %xy
+  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
+  ret <2 x double> %1
+}
+
 define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
 ; CHECK-LABEL: @constant_blendvps
-; CHECK: select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %abcd, <4 x float> %xyzw
+; CHECK-NEXT: ret <4 x float> %1
   %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
   ret <4 x float> %1
 }
 
+define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_zero
+; CHECK-NEXT: ret <4 x float> %xyzw
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
+  ret <4 x float> %1
+}
+
+define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_dup
+; CHECK-NEXT: ret <4 x float> %xyzw
+  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
+  ret <4 x float> %1
+}
+
 define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
 ; CHECK-LABEL: @constant_pblendvb
-; CHECK: select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %abcd, <16 x i8> %xyzw
+; CHECK-NEXT: ret <16 x i8> %1
   %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
   ret <16 x i8> %1
 }
 
+define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_zero
+; CHECK-NEXT: ret <16 x i8> %xyzw
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_dup
+; CHECK-NEXT: ret <16 x i8> %xyzw
+  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
+  ret <16 x i8> %1
+}
+
 define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
 ; CHECK-LABEL: @constant_blendvpd_avx
-; CHECK: select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: %1 = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %ab, <4 x double> %xy
+; CHECK-NEXT: ret <4 x double> %1
   %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
   ret <4 x double> %1
 }
 
+define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: @constant_blendvpd_avx_zero
+; CHECK-NEXT: ret <4 x double> %xy
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
+  ret <4 x double> %1
+}
+
+define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
+; CHECK-LABEL: @constant_blendvpd_avx_dup
+; CHECK-NEXT: ret <4 x double> %xy
+  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
+  ret <4 x double> %1
+}
+
 define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
 ; CHECK-LABEL: @constant_blendvps_avx
-; CHECK: select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>,  <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %abcd, <8 x float> %xyzw
+; CHECK-NEXT: ret <8 x float> %1
   %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
   ret <8 x float> %1
 }
 
+define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: @constant_blendvps_avx_zero
+; CHECK-NEXT: ret <8 x float> %xyzw
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
+  ret <8 x float> %1
+}
+
+define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
+; CHECK-LABEL: @constant_blendvps_avx_dup
+; CHECK-NEXT: ret <8 x float> %xyzw
+  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
+  ret <8 x float> %1
+}
+
 define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
 ; CHECK-LABEL: @constant_pblendvb_avx2
-; CHECK: select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %abcd, <32 x i8> %xyzw
+; CHECK-NEXT: ret <32 x i8> %1
   %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
         <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
                    i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
@@ -46,6 +122,20 @@ define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
   ret <32 x i8> %1
 }
 
+define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: @constant_pblendvb_avx2_zero
+; CHECK-NEXT: ret <32 x i8> %xyzw
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
+; CHECK-LABEL: @constant_pblendvb_avx2_dup
+; CHECK-NEXT: ret <32 x i8> %xyzw
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
+  ret <32 x i8> %1
+}
+
 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index 63b0775e4aff..edf9572f1e11 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -51,7 +51,7 @@ define i32 @test5(i32 %a) nounwind {
 define i32 @test6(i32 %a) nounwind {
 ; CHECK-LABEL: @test6
 ; CHECK-NEXT:  %tmp2 = lshr i32 %a, 24
-; CHECK-NEXT   ret i32 %tmp4
+; CHECK-NEXT:  ret i32 %tmp2
 	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
 	%tmp4 = and i32 %tmp2, 255
 	ret i32 %tmp4
@@ -62,7 +62,7 @@ define i16 @test7(i32 %A) {
 ; CHECK-LABEL: @test7
 ; CHECK-NEXT:  %1 = lshr i32 %A, 16
 ; CHECK-NEXT:  %D = trunc i32 %1 to i16
-; CHECK-NEXT   ret i16 %D
+; CHECK-NEXT:  ret i16 %D
   %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
   %C = trunc i32 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
@@ -73,7 +73,7 @@ define i16 @test8(i64 %A) {
 ; CHECK-LABEL: @test8
 ; CHECK-NEXT:  %1 = lshr i64 %A, 48
 ; CHECK-NEXT:  %D = trunc i64 %1 to i16
-; CHECK-NEXT   ret i16 %D
+; CHECK-NEXT:  ret i16 %D
   %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
   %C = trunc i64 %B to i16
   %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
diff --git a/test/Transforms/InstCombine/bswap-known-bits.ll b/test/Transforms/InstCombine/bswap-known-bits.ll
new file mode 100644
index 000000000000..1f3285af65cc
--- /dev/null
+++ b/test/Transforms/InstCombine/bswap-known-bits.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+; Note: This is testing functionality in computeKnownBits.  I'd have rather
+; used instsimplify, but the bit test folding is apparently only in instcombine.
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 true
+  %a = or i16 %arg, 511
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+  %a = or i16 %arg, 1
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 256
+  ret i1 %res
+}
+
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+  %a = or i16 %arg, 256
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 1
+  %res = icmp eq i16 %and, 1
+  ret i1 %res
+}
+
+define i1 @test4(i32 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+  %a = or i32 %arg, 2147483647  ; i32_MAX
+  %b = call i32 @llvm.bswap.i32(i32 %a)
+  %and = and i32 %b, 127
+  %res = icmp eq i32 %and, 127
+  ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index ba7df3125f4e..b48b2a57c8ce 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,7 +1,7 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
 ; RUN: opt < %s -instcombine -S | \
-; RUN:    grep "call.*llvm.bswap" | count 6
+; RUN:    grep "call.*llvm.bswap" | count 7
 
 define i32 @test1(i32 %i) {
 	%tmp1 = lshr i32 %i, 24		; <i32> [#uses=1]
@@ -72,3 +72,15 @@ define i32 @test6(i32 %x) nounwind readnone {
 	ret i32 %tmp7
 }
 
+; PR23863
+define i32 @test7(i32 %x) {
+        %shl = shl i32 %x, 16
+        %shr = lshr i32 %x, 16
+        %or = or i32 %shl, %shr
+        %and2 = shl i32 %or, 8
+        %shl3 = and i32 %and2, -16711936
+        %and4 = lshr i32 %or, 8
+        %shr5 = and i32 %and4, 16711935
+        %or6 = or i32 %shl3, %shr5
+        ret i32 %or6
+}
diff --git a/test/Transforms/InstCombine/call_nonnull_arg.ll b/test/Transforms/InstCombine/call_nonnull_arg.ll
new file mode 100644
index 000000000000..b10411f622be
--- /dev/null
+++ b/test/Transforms/InstCombine/call_nonnull_arg.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; InstCombine should mark null-checked argument as nonnull at callsite
+declare void @dummy(i32*, i32)
+
+define void @test(i32* %a, i32 %b) {
+; CHECK-LABEL: @test
+; CHECK: call void @dummy(i32* nonnull %a, i32 %b)
+entry:
+  %cond1 = icmp eq i32* %a, null
+  br i1 %cond1, label %dead, label %not_null
+not_null:
+  %cond2 = icmp eq i32 %b, 0
+  br i1 %cond2, label %dead, label %not_zero
+not_zero:
+  call void @dummy(i32* %a, i32 %b)
+  ret void
+dead:
+  unreachable
+}
diff --git a/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
new file mode 100644
index 000000000000..0f8601b855cf
--- /dev/null
+++ b/test/Transforms/InstCombine/cast-callee-deopt-bundles.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @foo(i32)
+
+define void @g() {
+; CHECK-LABEL: @g(
+ entry:
+; CHECK: call void @foo(i32 0) [ "deopt"() ]
+  call void bitcast (void (i32)* @foo to void ()*) ()  [ "deopt"() ]
+  ret void
+}
diff --git a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
index 551d0efce5ea..2e87a7d78020 100644
--- a/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
+++ b/test/Transforms/InstCombine/cast-int-fcmp-eq-0.ll
@@ -10,8 +10,8 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_oeq_int_n0_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp oeq float %f, -0.0
@@ -28,8 +28,8 @@ define i1 @i32_cast_cmp_oeq_int_0_sitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_oeq_int_n0_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp oeq float %f, -0.0
@@ -46,8 +46,8 @@ define i1 @i32_cast_cmp_one_int_0_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_one_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_one_int_n0_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp one float %f, -0.0
@@ -64,8 +64,8 @@ define i1 @i32_cast_cmp_one_int_0_sitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_one_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp one
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_one_int_n0_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp one float %f, -0.0
@@ -82,8 +82,8 @@ define i1 @i32_cast_cmp_ueq_int_0_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_ueq_int_n0_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp ueq float %f, -0.0
@@ -100,8 +100,8 @@ define i1 @i32_cast_cmp_ueq_int_0_sitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_ueq_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ueq
+; CHECK-NEXT: icmp eq i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_ueq_int_n0_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp ueq float %f, -0.0
@@ -118,8 +118,8 @@ define i1 @i32_cast_cmp_une_int_0_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_une_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_une_int_n0_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp une float %f, -0.0
@@ -136,8 +136,8 @@ define i1 @i32_cast_cmp_une_int_0_sitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_une_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp une
+; CHECK-NEXT: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_une_int_n0_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp une float %f, -0.0
@@ -154,8 +154,8 @@ define i1 @i32_cast_cmp_ogt_int_0_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp ogt
+; CHECK: icmp ne i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_ogt_int_n0_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp ogt float %f, -0.0
@@ -172,8 +172,8 @@ define i1 @i32_cast_cmp_ogt_int_0_sitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_ogt_int_n0_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp ogt
+; CHECK: icmp sgt i32 %i, 0
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_ogt_int_n0_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp ogt float %f, -0.0
@@ -261,12 +261,13 @@ define i1 @i32_cast_cmp_oeq_int_0_uitofp_ppcf128(i32 %i) {
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
-; CHECK: uitofp
-; CHECK: fcmp oeq
+; Since 0xFFFFFF fits in a float, and one less and 
+; one more than it also fits without rounding, the 
+; test can be optimized to an integer compare.
 
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_uitofp(
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
   %f = uitofp i32 %i to float
   %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
@@ -274,17 +275,18 @@ define i1 @i32_cast_cmp_oeq_int_i24max_uitofp(i32 %i) {
 }
 
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24max_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-
-; XCHECK: icmp eq i32 %i, 16777215
-; XCHECK-NEXT: ret
+; CHECK: icmp eq i32 %i, 16777215
+; CHECK-NEXT: ret
 define i1 @i32_cast_cmp_oeq_int_i24max_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp oeq float %f, 0x416FFFFFE0000000
   ret i1 %cmp
 }
 
+; Though 0x1000000 fits in a float, one more than it 
+; would round to it too, hence a single integer comparison 
+; does not suffice.
+
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_i24maxp1_uitofp(
 ; CHECK: uitofp
 ; CHECK: fcmp oeq
@@ -319,10 +321,18 @@ define i1 @i32_cast_cmp_oeq_int_i32umax_uitofp(i32 %i) {
   ret i1 %cmp
 }
 
+; 32-bit unsigned integer cannot possibly round up to 1<<33
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_big_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_big_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x4200000000000000
+  ret i1 %cmp
+}
+
+; 32-bit signed integer cannot possibly round up to 1<<32
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_i32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
 define i1 @i32_cast_cmp_oeq_int_i32umax_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp oeq float %f, 0x41F0000000000000
@@ -379,10 +389,9 @@ define i1 @i32_cast_cmp_oeq_int_negi32umax_uitofp(i32 %i) {
   ret i1 %cmp
 }
 
+; 32-bit signed integer cannot possibly round to -1<<32
 ; CHECK-LABEL: @i32_cast_cmp_oeq_int_negi32umax_sitofp(
-; CHECK: sitofp
-; CHECK: fcmp oeq
-; CHECK-NEXT: ret
+; CHECK-NEXT: ret i1 false
 define i1 @i32_cast_cmp_oeq_int_negi32umax_sitofp(i32 %i) {
   %f = sitofp i32 %i to float
   %cmp = fcmp oeq float %f, 0xC1F0000000000000
@@ -452,3 +461,30 @@ define i1 @i32_cast_cmp_une_half_sitofp(i32 %i) {
   %cmp = fcmp une float %f, 0.5
   ret i1 %cmp
 }
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_uitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_uitofp(i32 %i) {
+  %f = uitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @i32_cast_cmp_oeq_int_inf_sitofp(
+; CHECK-NEXT: ret i1 false
+define i1 @i32_cast_cmp_oeq_int_inf_sitofp(i32 %i) {
+  %f = sitofp i32 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
+
+; An i128 could round to an IEEE single-precision infinity.
+; CHECK-LABEL: @i128_cast_cmp_oeq_int_inf_uitofp(
+; CHECK: uitofp
+; CHECK: fcmp oeq
+; CHECK-NEXT: ret
+define i1 @i128_cast_cmp_oeq_int_inf_uitofp(i128 %i) {
+  %f = uitofp i128 %i to float
+  %cmp = fcmp oeq float %f, 0x7FF0000000000000
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 47ba920d9286..8f19bdcdfde3 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -10,7 +10,7 @@ define i1 @test1(i32 %X) {
         ; Convert to setne int %X, 12
         %c = icmp ne i32 %A, 12         ; <i1> [#uses=1]
         ret i1 %c
-; CHECK-LABEL @test1(
+; CHECK-LABEL: @test1(
 ; CHECK: %c = icmp ne i32 %X, 12
 ; CHECK: ret i1 %c
 }
@@ -21,7 +21,7 @@ define i1 @test2(i32 %X, i32 %Y) {
         ; Convert to setne int %X, %Y
         %c = icmp ne i32 %A, %B         ; <i1> [#uses=1]
         ret i1 %c
-; CHECK-LABEL @test2(
+; CHECK-LABEL: @test2(
 ; CHECK: %c = icmp ne i32 %X, %Y
 ; CHECK: ret i1 %c
 }
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 7fe54ef8469b..016b6aa64558 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -187,8 +187,8 @@ define i32 @test21(i32 %X) {
         %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
         %RV = and i32 %c2, 255          ; <i32> [#uses=1]
         ret i32 %RV
-; CHECK: %c2.1 = and i32 %X, 255
-; CHECK: ret i32 %c2.1
+; CHECK: %c21 = and i32 %X, 255
+; CHECK: ret i32 %c21
 }
 
 define i32 @test22(i32 %X) {
@@ -722,7 +722,7 @@ define i1 @test67(i1 %a, i32 %b) {
 ; CHECK: ret i1 false
 }
 
-%s = type { i32, i32, i32 }
+%s = type { i32, i32, i16 }
 
 define %s @test68(%s *%p, i64 %i) {
 ; CHECK-LABEL: @test68(
@@ -1062,6 +1062,43 @@ define i8 @test85(i32 %a) {
 ; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
 }
 
+define i16 @test86(i16 %v) {
+  %a = sext i16 %v to i32
+  %s = ashr i32 %a, 4
+  %t = trunc i32 %s to i16
+  ret i16 %t
+
+; CHECK-LABEL: @test86(
+; CHECK:  [[ASHR:%.*]] = ashr i16 %v, 4
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test87(i16 %v) {
+  %c = sext i16 %v to i32
+  %m = mul nsw i32 %c, 16
+  %a = ashr i32 %m, 16
+  %t = trunc i32 %a to i16
+  ret i16 %t
+
+; CHECK-LABEL: @test87(
+; CHECK:  [[ASHR:%.*]] = ashr i16 %v, 12
+; CHECK-NEXT: ret i16 [[ASHR]]
+}
+
+define i16 @test88(i16 %v) {
+  %a = sext i16 %v to i32
+  %s = ashr i32 %a, 18
+  %t = trunc i32 %s to i16
+  ret i16 %t
+
+; Do not optimize to ashr i16 (shift by 18)
+; CHECK-LABEL: @test88(
+; CHECK: [[SEXT:%.*]] = sext i16 %v to i32
+; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[SEXT]], 18
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16
+; CHECK-NEXT: ret i16 [[TRUNC]]
+}
+
 ; Overflow on a float to int or int to float conversion is undefined (PR21130).
 
 define i8 @overflow_fptosi() {
@@ -1137,3 +1174,14 @@ define i1 @PR23309v2(i32 %A, i32 %B) {
   %trunc = trunc i32 %sub to i1
   ret i1 %trunc
 }
+
+define i16 @PR24763(i8 %V) {
+; CHECK-LABEL: @PR24763(
+; CHECK-NEXT: %[[sh:.*]] = ashr i8
+; CHECK-NEXT: %[[ext:.*]] = sext i8 %[[sh]] to i16
+; CHECK-NEXT: ret i16 %[[ext]]
+  %conv = sext i8 %V to i32
+  %l = lshr i32 %conv, 1
+  %t = trunc i32 %l to i16
+  ret i16 %t
+}
diff --git a/test/Transforms/InstCombine/compare-alloca.ll b/test/Transforms/InstCombine/compare-alloca.ll
new file mode 100644
index 000000000000..ca24da191779
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-alloca.ll
@@ -0,0 +1,97 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+target datalayout = "p:32:32"
+
+
+define i1 @alloca_argument_compare(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare
+  ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_swapped(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_swapped
+  ; CHECK: ret i1 false
+}
+
+define i1 @alloca_argument_compare_ne(i64* %arg) {
+  %alloc = alloca i64
+  %cmp = icmp ne i64* %arg, %alloc
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_ne
+  ; CHECK: ret i1 true
+}
+
+define i1 @alloca_argument_compare_derived_ptrs(i64* %arg, i64 %x) {
+  %alloc = alloca i64, i64 8
+  %p = getelementptr i64, i64* %arg, i64 %x
+  %q = getelementptr i64, i64* %alloc, i64 3
+  %cmp = icmp eq i64* %p, %q
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_derived_ptrs
+  ; CHECK: ret i1 false
+}
+
+declare void @escape(i64*)
+define i1 @alloca_argument_compare_escaped_alloca(i64* %arg) {
+  %alloc = alloca i64
+  call void @escape(i64* %alloc)
+  %cmp = icmp eq i64* %alloc, %arg
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_escaped_alloca
+  ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+  ; CHECK: ret i1 %cmp
+}
+
+declare void @check_compares(i1, i1)
+define void @alloca_argument_compare_two_compares(i64* %p) {
+  %q = alloca i64, i64 8
+  %r = getelementptr i64, i64* %p, i64 1
+  %s = getelementptr i64, i64* %q, i64 2
+  %cmp1 = icmp eq i64* %p, %q
+  %cmp2 = icmp eq i64* %r, %s
+  call void @check_compares(i1 %cmp1, i1 %cmp2)
+  ret void
+  ; We will only fold if there is a single cmp.
+  ; CHECK-LABEL: alloca_argument_compare_two_compares
+  ; CHECK: call void @check_compares(i1 %cmp1, i1 %cmp2)
+}
+
+define i1 @alloca_argument_compare_escaped_through_store(i64* %arg, i64** %ptr) {
+  %alloc = alloca i64
+  %cmp = icmp eq i64* %alloc, %arg
+  %p = getelementptr i64, i64* %alloc, i64 1
+  store i64* %p, i64** %ptr
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_escaped_through_store
+  ; CHECK: %cmp = icmp eq i64* %alloc, %arg
+  ; CHECK: ret i1 %cmp
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+define i1 @alloca_argument_compare_benign_instrs(i8* %arg) {
+  %alloc = alloca i8
+  call void @llvm.lifetime.start(i64 1, i8* %alloc)
+  %cmp = icmp eq i8* %arg, %alloc
+  %x = load i8, i8* %arg
+  store i8 %x, i8* %alloc
+  call void @llvm.lifetime.end(i64 1, i8* %alloc)
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_argument_compare_benign_instrs
+  ; CHECK: ret i1 false
+}
+
+declare i64* @allocator()
+define i1 @alloca_call_compare() {
+  %p = alloca i64
+  %q = call i64* @allocator()
+  %cmp = icmp eq i64* %p, %q
+  ret i1 %cmp
+  ; CHECK-LABEL: alloca_call_compare
+  ; CHECK: ret i1 false
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index 62cd5b3f94d5..0ed0ac7d8d9c 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -56,3 +56,43 @@ entry:
 ; CHECK-NOT: zext
 ; CHECK: ret i32 %2
 }
+
+define i1 @test4a(i32 %a) {
+; CHECK-LABEL: @test4a(
+ entry:
+; CHECK: %c = icmp slt i32 %a, 1
+; CHECK-NEXT: ret i1 %c
+  %l = ashr i32 %a, 31
+  %na = sub i32 0, %a
+  %r = lshr i32 %na, 31
+  %signum = or i32 %l, %r
+  %c = icmp slt i32 %signum, 1
+  ret i1 %c
+}
+
+define i1 @test4b(i64 %a) {
+; CHECK-LABEL: @test4b(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+  %l = ashr i64 %a, 63
+  %na = sub i64 0, %a
+  %r = lshr i64 %na, 63
+  %signum = or i64 %l, %r
+  %c = icmp slt i64 %signum, 1
+  ret i1 %c
+}
+
+define i1 @test4c(i64 %a) {
+; CHECK-LABEL: @test4c(
+ entry:
+; CHECK: %c = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 %c
+  %l = ashr i64 %a, 63
+  %na = sub i64 0, %a
+  %r = lshr i64 %na, 63
+  %signum = or i64 %l, %r
+  %signum.trunc = trunc i64 %signum to i32
+  %c = icmp slt i32 %signum.trunc, 1
+  ret i1 %c
+}
diff --git a/test/Transforms/InstCombine/constant-fold-alias.ll b/test/Transforms/InstCombine/constant-fold-alias.ll
index c872f57c37e1..810687255f61 100644
--- a/test/Transforms/InstCombine/constant-fold-alias.ll
+++ b/test/Transforms/InstCombine/constant-fold-alias.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p1:16:16-p2:32:32-p3:64:64"
 @G2 = global i32 42
 @G3 = global [4 x i8] zeroinitializer, align 1
 
-@A1 = alias bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
-@A2 = alias inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
+@A1 = alias i32, bitcast (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 2) to i32*)
+@A2 = alias i32, inttoptr (i64 and (i64 ptrtoint (i8* getelementptr inbounds ([4 x i8], [4 x i8]* @G3, i32 0, i32 3) to i64), i64 -4) to i32*)
 
 define i64 @f1() {
 ; This cannot be constant folded because G1 is underaligned.
diff --git a/test/Transforms/InstCombine/ctpop.ll b/test/Transforms/InstCombine/ctpop.ll
new file mode 100644
index 000000000000..38612c92aaa4
--- /dev/null
+++ b/test/Transforms/InstCombine/ctpop.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -instcombine | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i8 @llvm.ctpop.i8(i8)
+declare void @llvm.assume(i1)
+
+define i1 @test1(i32 %arg) {
+; CHECK: @test1
+; CHECK: ret i1 false
+  %and = and i32 %arg, 15
+  %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+  %res = icmp eq i32 %cnt, 9
+  ret i1 %res
+}
+
+define i1 @test2(i32 %arg) {
+; CHECK: @test2
+; CHECK: ret i1 false
+  %and = and i32 %arg, 1
+  %cnt = call i32 @llvm.ctpop.i32(i32 %and)
+  %res = icmp eq i32 %cnt, 2
+  ret i1 %res
+}
+
+define i1 @test3(i32 %arg) {
+; CHECK: @test3
+; CHECK: ret i1 false
+  ;; Use an assume to make all the bits known without triggering constant 
+  ;; folding.  This is trying to hit a corner case where we have to avoid
+  ;; taking the log of 0.
+  %assume = icmp eq i32 %arg, 0
+  call void @llvm.assume(i1 %assume)
+  %cnt = call i32 @llvm.ctpop.i32(i32 %arg)
+  %res = icmp eq i32 %cnt, 2
+  ret i1 %res
+}
+
+; Negative test for when we know nothing
+define i1 @test4(i8 %arg) {
+; CHECK: @test4
+; CHECK: ret i1 %res
+  %cnt = call i8 @llvm.ctpop.i8(i8 %arg)
+  %res = icmp eq i8 %cnt, 2
+  ret i1 %res
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 823ec98ebe2b..4b1db9db353b 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -3,7 +3,7 @@
 
 @.str = private constant [3 x i8] c"%c\00"
 
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
 ;CHECK: call i32 @putchar{{.+}} !dbg
   %1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i32 0, i32 0), i32 97), !dbg !5
   ret void, !dbg !7
@@ -15,9 +15,9 @@ declare i32 @printf(i8*, ...)
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
 !1 = !DIFile(filename: "m.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
 !5 = !DILocation(line: 5, column: 2, scope: !6)
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 3875bcc9b8c6..9c8b2a8e4154 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -6,7 +6,7 @@ declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) nounwind readnone
 
 declare i8* @foo(i8*, i32, i64, i64) nounwind
 
-define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp {
+define hidden i8* @foobar(i8* %__dest, i32 %__val, i64 %__len) nounwind inlinehint ssp !dbg !1 {
 entry:
   %__dest.addr = alloca i8*, align 8
   %__val.addr = alloca i32, align 4
@@ -31,16 +31,16 @@ entry:
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!30}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, function: i8* (i8*, i32, i64)* @foobar, variables: !25)
+!0 = !DILocalVariable(name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25)
 !2 = !DIFile(filename: "string.h", directory: "Game")
-!3 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6}
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null)
-!7 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
+!7 = !DILocalVariable(name: "__val", line: 78, arg: 2, scope: !1, file: !2, type: !8)
 !8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
+!9 = !DILocalVariable(name: "__len", line: 78, arg: 3, scope: !1, file: !2, type: !10)
 !10 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", line: 80, file: !27, scope: !3, baseType: !11)
 !11 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_size_t", line: 90, file: !27, scope: !3, baseType: !12)
 !12 = !DIBasicType(tag: DW_TAG_base_type, name: "long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
diff --git a/test/Transforms/InstCombine/demorgan-zext.ll b/test/Transforms/InstCombine/demorgan-zext.ll
new file mode 100644
index 000000000000..da41fac3e350
--- /dev/null
+++ b/test/Transforms/InstCombine/demorgan-zext.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR22723: Recognize De Morgan's Laws when obfuscated by zexts.
+
+define i32 @demorgan_or(i1 %X, i1 %Y) {
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %or    = or i32 %notX, %notY
+  ret i32 %or
+
+; CHECK-LABEL: demorgan_or(
+; CHECK-NEXT:  %[[AND:.*]] = and i1 %X, %Y
+; CHECK-NEXT:  %[[ZEXT:.*]] = zext i1 %[[AND]] to i32
+; CHECK-NEXT:  %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT:  ret i32 %[[XOR]]
+}
+
+define i32 @demorgan_and(i1 %X, i1 %Y) {
+  %zextX = zext i1 %X to i32
+  %zextY = zext i1 %Y to i32
+  %notX  = xor i32 %zextX, 1
+  %notY  = xor i32 %zextY, 1
+  %and   = and i32 %notX, %notY
+  ret i32 %and
+
+; CHECK-LABEL: demorgan_and(
+; CHECK-NEXT:  %[[OR:.*]] = or i1 %X, %Y
+; CHECK-NEXT:  %[[ZEXT:.*]] = zext i1 %[[OR]] to i32
+; CHECK-NEXT:  %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
+; CHECK-NEXT:  ret i32 %[[XOR]]
+}
+
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index e0ff07baae7c..27a316113e52 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -163,7 +163,7 @@ define i32 @test19(i32 %x) {
 ; CHECK-LABEL: @test19(
 ; CHECK-NEXT: icmp eq i32 %x, 1
 ; CHECK-NEXT: zext i1 %{{.*}} to i32
-; CHECK-NEXT ret i32
+; CHECK-NEXT: ret i32
 }
 
 define i32 @test20(i32 %x) {
@@ -270,9 +270,7 @@ define <2 x i32> @test31(<2 x i32> %x) {
   %div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
   ret <2 x i32> %div
 ; CHECK-LABEL: @test31(
-; CHECK-NEXT: %[[shr:.*]] = lshr <2 x i32> %x, <i32 31, i32 31>
-; CHECK-NEXT: udiv <2 x i32> %[[shr]], <i32 2147483647, i32 2147483647>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
 }
 
 define i32 @test32(i32 %a, i32 %b) {
@@ -325,3 +323,21 @@ define i32 @test36(i32 %A) {
 ; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
 ; CHECK-NEXT: ret i32 %[[shr]]
 }
+
+define i32 @test37(i32* %b) {
+entry:
+  store i32 0, i32* %b, align 4
+  %0 = load i32, i32* %b, align 4
+  br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs:                                          ; preds = %entry
+  %mul = mul nsw i32 undef, %0
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %t.0 = phi i32 [ %0, %entry ], [ %mul, %lor.rhs ]
+  %div = sdiv i32 %t.0, 2
+  ret i32 %div
+; CHECK-LABEL: @test37(
+; CHECK: ret i32 0
+}
diff --git a/test/Transforms/InstCombine/exp2-1.ll b/test/Transforms/InstCombine/exp2-1.ll
index 8e6a0e0d93f6..b6a56b9a9a7e 100644
--- a/test/Transforms/InstCombine/exp2-1.ll
+++ b/test/Transforms/InstCombine/exp2-1.ll
@@ -1,7 +1,8 @@
 ; Test that the exp2 library call simplifier works correctly.
 ;
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=CHECK-WIN
+; RUN: opt < %s -instcombine -S | FileCheck %s -check-prefix=CHECK -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=LDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=i386-pc-win32 | FileCheck %s -check-prefix=INTRINSIC -check-prefix=LDEXP -check-prefix=NOLDEXPF
+; RUN: opt < %s -instcombine -S -mtriple=amdgcn-unknown-unknown | FileCheck %s -check-prefix=INTRINSIC -check-prefix=NOLDEXP -check-prefix=NOLDEXPF
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
@@ -80,21 +81,19 @@ declare double @llvm.exp2.f64(double)
 declare float @llvm.exp2.f32(float)
 
 define double @test_simplify9(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify9(
-; CHECK-WIN-LABEL: @test_simplify9(
+; INTRINSIC-LABEL: @test_simplify9(
   %conv = uitofp i8 %x to double
   %ret = call double @llvm.exp2.f64(double %conv)
-; CHECK: call double @ldexp
-; CHECK-WIN: call double @ldexp
+; LDEXP: call double @ldexp
+; NOLDEXP-NOT: call double @ldexp
   ret double %ret
 }
 
 define float @test_simplify10(i8 zeroext %x) {
-; CHECK-LABEL: @test_simplify10(
-; CHECK-WIN-LABEL: @test_simplify10(
+; INTRINSIC-LABEL: @test_simplify10(
   %conv = uitofp i8 %x to float
   %ret = call float @llvm.exp2.f32(float %conv)
-; CHECK: call float @ldexpf
-; CHECK-WIN-NOT: call float @ldexpf
+; LDEXPF: call float @ldexpf
+; NOLDEXPF-NOT: call float @ldexpf
   ret float %ret
 }
diff --git a/test/Transforms/InstCombine/extractvalue.ll b/test/Transforms/InstCombine/extractvalue.ll
index 6319590873a2..9c293581a069 100644
--- a/test/Transforms/InstCombine/extractvalue.ll
+++ b/test/Transforms/InstCombine/extractvalue.ll
@@ -48,16 +48,16 @@ define i32 @foo(i32 %a, i32 %b) {
 ; CHECK: call {{.*}}(i32 [[LOAD]])
 ; CHECK-NOT: extractvalue
 ; CHECK: ret i32 [[LOAD]]
-define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
+define i32 @extract2gep({i16, i32}* %pair, i32* %P) {
         ; The load + extractvalue should be converted
         ; to an inbounds gep + smaller load.
         ; The new load should be in the same spot as the old load.
-        %L = load {i32, i32}, {i32, i32}* %pair
+        %L = load {i16, i32}, {i16, i32}* %pair
         store i32 0, i32* %P
         br label %loop
 
 loop:
-        %E = extractvalue {i32, i32} %L, 1
+        %E = extractvalue {i16, i32} %L, 1
         %C = call i32 @baz(i32 %E)
         store i32 %C, i32* %P
         %cond = icmp eq i32 %C, 0
@@ -67,17 +67,17 @@ end:
         ret i32 %E
 }
 
-; CHECK-LABEL: define i32 @doubleextract2gep(
+; CHECK-LABEL: define i16 @doubleextract2gep(
 ; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}, {{.*}}* %arg, i64 0, i32 1, i32 1
-; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32, i32* [[GEP]]
-; CHECK-NEXT: ret i32 [[LOAD]]
-define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i16, i16* [[GEP]]
+; CHECK-NEXT: ret i16 [[LOAD]]
+define i16 @doubleextract2gep({i16, {i32, i16}}* %arg) {
         ; The load + extractvalues should be converted
         ; to a 3-index inbounds gep + smaller load.
-        %L = load {i32, {i32, i32}}, {i32, {i32, i32}}* %arg
-        %E1 = extractvalue {i32, {i32, i32}} %L, 1
-        %E2 = extractvalue {i32, i32} %E1, 1
-        ret i32 %E2
+        %L = load {i16, {i32, i16}}, {i16, {i32, i16}}* %arg
+        %E1 = extractvalue {i16, {i32, i16}} %L, 1
+        %E2 = extractvalue {i32, i16} %E1, 1
+        ret i16 %E2
 }
 
 ; CHECK: define i32 @nogep-multiuse
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index 0479549bea3f..941270df0e97 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -41,6 +41,7 @@ define fp128 @square_fabs_call_f128(fp128 %x) {
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
 declare fp128 @llvm.fabs.f128(fp128)
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
 
 define float @square_fabs_intrinsic_f32(float %x) {
   %mul = fmul float %x, %x
@@ -98,3 +99,27 @@ define float @square_fabs_shrink_call2(float %x) {
 ; CHECK-NEXT: ret float %sq
 }
 
+; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
+
+define i32 @fabs_value_tracking_f32(float %x) {
+  %call = call float @llvm.fabs.f32(float %x)
+  %bc = bitcast float %call to i32
+  %and = and i32 %bc, 2147483648
+  ret i32 %and
+
+; CHECK-LABEL: fabs_value_tracking_f32(
+; CHECK:       ret i32 0
+}
+
+; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
+
+define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
+  %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
+  %bc = bitcast <4 x float> %call to <4 x i32>
+  %and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+  ret <4 x i32> %and
+
+; CHECK-LABEL: fabs_value_tracking_v4f32(
+; CHECK:       ret <4 x i32> %and
+}
+
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 4eebdbdfacf1..fd563481b3ed 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -570,7 +570,7 @@ define double @sqrt_intrinsic_arg_squared(double %x) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_squared(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
 ; CHECK-NEXT: ret double %fabs
 }
 
@@ -584,8 +584,8 @@ define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args1(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -597,8 +597,8 @@ define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args2(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -610,8 +610,8 @@ define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args3(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -623,8 +623,8 @@ define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args4(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -636,8 +636,8 @@ define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args5(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -649,8 +649,8 @@ define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_intrinsic_three_args6(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %y)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y)
 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -675,7 +675,7 @@ define double @sqrt_intrinsic_arg_5th(double %x) #0 {
 
 ; CHECK-LABEL: sqrt_intrinsic_arg_5th(
 ; CHECK-NEXT: %mul = fmul fast double %x, %x
-; CHECK-NEXT: %sqrt1 = call double @llvm.sqrt.f64(double %x)
+; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x)
 ; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1
 ; CHECK-NEXT: ret double %1
 }
@@ -692,7 +692,7 @@ define float @sqrt_call_squared_f32(float %x) #0 {
   ret float %sqrt
 
 ; CHECK-LABEL: sqrt_call_squared_f32(
-; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
+; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x)
 ; CHECK-NEXT: ret float %fabs
 }
 
@@ -702,7 +702,7 @@ define double @sqrt_call_squared_f64(double %x) #0 {
   ret double %sqrt
 
 ; CHECK-LABEL: sqrt_call_squared_f64(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
+; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x)
 ; CHECK-NEXT: ret double %fabs
 }
 
@@ -712,7 +712,114 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
   ret fp128 %sqrt
 
 ; CHECK-LABEL: sqrt_call_squared_f128(
-; CHECK-NEXT: %fabs = call fp128 @llvm.fabs.f128(fp128 %x)
+; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x)
 ; CHECK-NEXT: ret fp128 %fabs
 }
 
+; =========================================================================
+;
+;   Test-cases for fmin / fmax
+;
+; =========================================================================
+
+declare double @fmax(double, double)
+declare double @fmin(double, double)
+declare float @fmaxf(float, float)
+declare float @fminf(float, float)
+declare fp128 @fmaxl(fp128, fp128)
+declare fp128 @fminl(fp128, fp128)
+
+; No NaNs is the minimum requirement to replace these calls.
+; This should always be set when unsafe-fp-math is true, but
+; alternate the attributes for additional test coverage.
+; 'nsz' is implied by the definition of fmax or fmin itself.
+attributes #1 = { "no-nans-fp-math" = "true" }
+
+; Shrink and remove the call.
+define float @max1(float %a, float %b) #0 {
+  %c = fpext float %a to double
+  %d = fpext float %b to double
+  %e = call double @fmax(double %c, double %d)
+  %f = fptrunc double %e to float
+  ret float %f
+
+; CHECK-LABEL: max1(
+; CHECK-NEXT:  fcmp fast ogt float %a, %b 
+; CHECK-NEXT:  select {{.*}} float %a, float %b 
+; CHECK-NEXT:  ret
+}
+
+define float @max2(float %a, float %b) #1 {
+  %c = call float @fmaxf(float %a, float %b)
+  ret float %c
+
+; CHECK-LABEL: max2(
+; CHECK-NEXT:  fcmp nnan nsz ogt float %a, %b 
+; CHECK-NEXT:  select {{.*}} float %a, float %b 
+; CHECK-NEXT:  ret
+}
+
+
+define double @max3(double %a, double %b) #0 {
+  %c = call double @fmax(double %a, double %b)
+  ret double %c
+
+; CHECK-LABEL: max3(
+; CHECK-NEXT:  fcmp fast ogt double %a, %b 
+; CHECK-NEXT:  select {{.*}} double %a, double %b 
+; CHECK-NEXT:  ret
+}
+
+define fp128 @max4(fp128 %a, fp128 %b) #1 {
+  %c = call fp128 @fmaxl(fp128 %a, fp128 %b)
+  ret fp128 %c
+
+; CHECK-LABEL: max4(
+; CHECK-NEXT:  fcmp nnan nsz ogt fp128 %a, %b 
+; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b 
+; CHECK-NEXT:  ret
+}
+
+; Shrink and remove the call.
+define float @min1(float %a, float %b) #1 {
+  %c = fpext float %a to double
+  %d = fpext float %b to double
+  %e = call double @fmin(double %c, double %d)
+  %f = fptrunc double %e to float
+  ret float %f
+
+; CHECK-LABEL: min1(
+; CHECK-NEXT:  fcmp nnan nsz olt float %a, %b 
+; CHECK-NEXT:  select {{.*}} float %a, float %b 
+; CHECK-NEXT:  ret
+}
+
+define float @min2(float %a, float %b) #0 {
+  %c = call float @fminf(float %a, float %b)
+  ret float %c
+
+; CHECK-LABEL: min2(
+; CHECK-NEXT:  fcmp fast olt float %a, %b 
+; CHECK-NEXT:  select {{.*}} float %a, float %b 
+; CHECK-NEXT:  ret
+}
+
+define double @min3(double %a, double %b) #1 {
+  %c = call double @fmin(double %a, double %b)
+  ret double %c
+
+; CHECK-LABEL: min3(
+; CHECK-NEXT:  fcmp nnan nsz olt double %a, %b 
+; CHECK-NEXT:  select {{.*}} double %a, double %b 
+; CHECK-NEXT:  ret
+}
+
+define fp128 @min4(fp128 %a, fp128 %b) #0 {
+  %c = call fp128 @fminl(fp128 %a, fp128 %b)
+  ret fp128 %c
+
+; CHECK-LABEL: min4(
+; CHECK-NEXT:  fcmp fast olt fp128 %a, %b 
+; CHECK-NEXT:  select {{.*}} fp128 %a, fp128 %b 
+; CHECK-NEXT:  ret
+}
diff --git a/test/Transforms/InstCombine/ffs-1.ll b/test/Transforms/InstCombine/ffs-1.ll
index c8763dc199a9..d27fb5d89f09 100644
--- a/test/Transforms/InstCombine/ffs-1.ll
+++ b/test/Transforms/InstCombine/ffs-1.ll
@@ -1,9 +1,12 @@
 ; Test that the ffs* library call simplifier works correctly.
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-LINUX
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+; RUN: opt < %s -mtriple i386-pc-linux -instcombine -S | FileCheck %s -check-prefix=CHECK-FFS
+; RUN: opt -instcombine -mtriple=arm64-apple-ios9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=arm64-apple-tvos9.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=thumbv7k-apple-watchos2.0 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-apple-macosx10.11 -S %s | FileCheck --check-prefix=CHECK-FFS %s
+; RUN: opt -instcombine -mtriple=x86_64-freebsd-gnu -S %s | FileCheck --check-prefix=CHECK-FFS %s
 
 declare i32 @ffs(i32)
 declare i32 @ffsl(i32)
@@ -19,17 +22,17 @@ define i32 @test_simplify1() {
 }
 
 define i32 @test_simplify2() {
-; CHECK-LINUX-LABEL: @test_simplify2(
+; CHECK-FFS-LABEL: @test_simplify2(
   %ret = call i32 @ffsl(i32 0)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
 }
 
 define i32 @test_simplify3() {
-; CHECK-LINUX-LABEL: @test_simplify3(
+; CHECK-FFS-LABEL: @test_simplify3(
   %ret = call i32 @ffsll(i64 0)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 0
+; CHECK-FFS-NEXT: ret i32 0
 }
 
 ; Check ffs(c) -> cttz(c) + 1, where 'c' is a constant.
@@ -56,45 +59,45 @@ define i32 @test_simplify6() {
 }
 
 define i32 @test_simplify7() {
-; CHECK-LINUX-LABEL: @test_simplify7(
+; CHECK-FFS-LABEL: @test_simplify7(
   %ret = call i32 @ffsl(i32 65536)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
 }
 
 define i32 @test_simplify8() {
-; CHECK-LINUX-LABEL: @test_simplify8(
+; CHECK-FFS-LABEL: @test_simplify8(
   %ret = call i32 @ffsll(i64 1024)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 11
+; CHECK-FFS-NEXT: ret i32 11
 }
 
 define i32 @test_simplify9() {
-; CHECK-LINUX-LABEL: @test_simplify9(
+; CHECK-FFS-LABEL: @test_simplify9(
   %ret = call i32 @ffsll(i64 65536)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 17
+; CHECK-FFS-NEXT: ret i32 17
 }
 
 define i32 @test_simplify10() {
-; CHECK-LINUX-LABEL: @test_simplify10(
+; CHECK-FFS-LABEL: @test_simplify10(
   %ret = call i32 @ffsll(i64 17179869184)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 35
+; CHECK-FFS-NEXT: ret i32 35
 }
 
 define i32 @test_simplify11() {
-; CHECK-LINUX-LABEL: @test_simplify11(
+; CHECK-FFS-LABEL: @test_simplify11(
   %ret = call i32 @ffsll(i64 281474976710656)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 49
+; CHECK-FFS-NEXT: ret i32 49
 }
 
 define i32 @test_simplify12() {
-; CHECK-LINUX-LABEL: @test_simplify12(
+; CHECK-FFS-LABEL: @test_simplify12(
   %ret = call i32 @ffsll(i64 1152921504606846976)
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 61
+; CHECK-FFS-NEXT: ret i32 61
 }
 
 ; Check ffs(x) -> x != 0 ? (i32)llvm.cttz(x) + 1 : 0.
@@ -102,7 +105,7 @@ define i32 @test_simplify12() {
 define i32 @test_simplify13(i32 %x) {
 ; CHECK-LABEL: @test_simplify13(
   %ret = call i32 @ffs(i32 %x)
-; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+; CHECK-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
 ; CHECK-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
 ; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
 ; CHECK-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
@@ -111,24 +114,24 @@ define i32 @test_simplify13(i32 %x) {
 }
 
 define i32 @test_simplify14(i32 %x) {
-; CHECK-LINUX-LABEL: @test_simplify14(
+; CHECK-FFS-LABEL: @test_simplify14(
   %ret = call i32 @ffsl(i32 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i32 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[INC]], i32 0
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
 }
 
 define i32 @test_simplify15(i64 %x) {
-; CHECK-LINUX-LABEL: @test_simplify15(
+; CHECK-FFS-LABEL: @test_simplify15(
   %ret = call i32 @ffsll(i64 %x)
-; CHECK-LINUX-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 false)
-; CHECK-LINUX-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
-; CHECK-LINUX-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
-; CHECK-LINUX-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
-; CHECK-LINUX-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
+; CHECK-FFS-NEXT: [[CTTZ:%[a-z0-9]+]] = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+; CHECK-FFS-NEXT: [[INC:%[a-z0-9]+]] = add nuw nsw i64 [[CTTZ]], 1
+; CHECK-FFS-NEXT: [[TRUNC:%[a-z0-9]+]] = trunc i64 [[INC]] to i32
+; CHECK-FFS-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i64 %x, 0
+; CHECK-FFS-NEXT: [[RET:%[a-z0-9]+]] = select i1 [[CMP]], i32 [[TRUNC]], i32 0
   ret i32 %ret
-; CHECK-LINUX-NEXT: ret i32 [[RET]]
+; CHECK-FFS-NEXT: ret i32 [[RET]]
 }
diff --git a/test/Transforms/InstCombine/fold-phi-load-metadata.ll b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
new file mode 100644
index 000000000000..7fa26b46e25d
--- /dev/null
+++ b/test/Transforms/InstCombine/fold-phi-load-metadata.ll
@@ -0,0 +1,69 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@g1 = common global i32* null, align 8
+
+%struct.S1 = type { i32, float }
+%struct.S2 = type { float, i32 }
+
+; Check that instcombine preserves metadata when it merges two loads.
+;
+; CHECK: return:
+; CHECK: load i32*, i32** %{{[a-z0-9.]+}}, align 8, !nonnull ![[EMPTYNODE:[0-9]+]]
+; CHECK: load i32, i32* %{{[a-z0-9.]+}}, align 4, !tbaa ![[TBAA:[0-9]+]], !range ![[RANGE:[0-9]+]], !invariant.load ![[EMPTYNODE:[0-9]+]], !alias.scope ![[ALIAS_SCOPE:[0-9]+]], !noalias ![[NOALIAS:[0-9]+]]
+
+; Function Attrs: nounwind ssp uwtable
+define i32 @phi_load_metadata(%struct.S1* %s1, %struct.S2* %s2, i32 %c, i32** %x0, i32 **%x1) #0 {
+entry:
+  %tobool = icmp eq i32 %c, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %i = getelementptr inbounds %struct.S2, %struct.S2* %s2, i64 0, i32 1
+  %val = load i32, i32* %i, align 4, !tbaa !0, !alias.scope !13, !noalias !14, !invariant.load !17, !range !18
+  %p0 = load i32*, i32** %x0, align 8, !nonnull !17
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %i2 = getelementptr inbounds %struct.S1, %struct.S1* %s1, i64 0, i32 0
+  %val2 = load i32, i32* %i2, align 4, !tbaa !2, !alias.scope !15, !noalias !16, !invariant.load !17, !range !19
+  %p1 = load i32*, i32** %x1, align 8, !nonnull !17
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval = phi i32 [ %val, %if.then ], [ %val2, %if.end ]
+  %pval = phi i32* [ %p0, %if.then ], [ %p1, %if.end ]
+  store i32* %pval, i32** @g1, align 8
+  ret i32 %retval
+}
+
+; CHECK: ![[EMPTYNODE]] = !{}
+; CHECK: ![[TBAA]] = !{![[TAG1:[0-9]+]], ![[TAG1]], i64 0}
+; CHECK: ![[TAG1]] = !{!"int", !{{[0-9]+}}, i64 0}
+; CHECK: ![[RANGE]] = !{i32 10, i32 25}
+; CHECK: ![[ALIAS_SCOPE]] = !{![[SCOPE0:[0-9]+]], ![[SCOPE1:[0-9]+]], ![[SCOPE2:[0-9]+]]}
+; CHECK: ![[SCOPE0]] = distinct !{![[SCOPE0]], !{{[0-9]+}}, !"scope0"}
+; CHECK: ![[SCOPE1]] = distinct !{![[SCOPE1]], !{{[0-9]+}}, !"scope1"}
+; CHECK: ![[SCOPE2]] = distinct !{![[SCOPE2]], !{{[0-9]+}}, !"scope2"}
+; CHECK: ![[NOALIAS]] = !{![[SCOPE3:[0-9]+]]}
+; CHECK: ![[SCOPE3]] = distinct !{![[SCOPE3]], !{{[0-9]+}}, !"scope3"}
+
+!0 = !{!1, !4, i64 4}
+!1 = !{!"", !7, i64 0, !4, i64 4}
+!2 = !{!3, !4, i64 0}
+!3 = !{!"", !4, i64 0, !7, i64 4}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!"float", !5, i64 0}
+!8 = !{!8, !"some domain"}
+!9 = !{!9, !8, !"scope0"}
+!10 = !{!10, !8, !"scope1"}
+!11 = !{!11, !8, !"scope2"}
+!12 = !{!12, !8, !"scope3"}
+!13 = !{!9, !10}
+!14 = !{!11, !12}
+!15 = !{!9, !11}
+!16 = !{!10, !12}
+!17 = !{}
+!18 = !{i32 10, i32 20}
+!19 = !{i32 15, i32 25}
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
index a51aac10eb57..308258a19417 100644
--- a/test/Transforms/InstCombine/gc.relocate.ll
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 ; then the return attribute of gc.relocate is dereferenceable(N).
 
 declare zeroext i1 @return_i1()
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
 
 define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
 ; Checks that a dereferenceabler pointer
@@ -15,7 +15,38 @@ define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc
 ; CHECK: call dereferenceable(8)
 entry:
     %load = load i32, i32 addrspace(1)* %dparam
-    %tok = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
-    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %tok,  i32 7, i32 7)
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok,  i32 7, i32 7)
     ret i32 addrspace(1)* %relocate
 }
+
+define i32 @explicit_nonnull(i32 addrspace(1)* nonnull %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @explicit_nonnull
+; CHECK: ret i32 1
+entry:
+    %load = load i32, i32 addrspace(1)* %dparam
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok,  i32 7, i32 7)
+    %cmp = icmp eq i32 addrspace(1)* %relocate, null
+    %ret_val = select i1 %cmp, i32 0, i32 1
+    ret i32 %ret_val
+}
+
+define i32 @implicit_nonnull(i32 addrspace(1)* %dparam) gc "statepoint-example" {
+; Checks that a nonnull pointer
+; CHECK-LABEL: @implicit_nonnull
+; CHECK: ret i32 1
+entry:
+    %cond = icmp eq i32 addrspace(1)* %dparam, null
+    br i1 %cond, label %no_gc, label %gc
+gc:
+    %load = load i32, i32 addrspace(1)* %dparam
+    %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
+    %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok,  i32 7, i32 7)
+    %cmp = icmp eq i32 addrspace(1)* %relocate, null
+    %ret_val = select i1 %cmp, i32 0, i32 1
+    ret i32 %ret_val
+no_gc:
+    unreachable
+}
diff --git a/test/Transforms/InstCombine/gepphigep.ll b/test/Transforms/InstCombine/gepphigep.ll
index b98ea4cd1159..cc90d714be73 100644
--- a/test/Transforms/InstCombine/gepphigep.ll
+++ b/test/Transforms/InstCombine/gepphigep.ll
@@ -134,3 +134,53 @@ exit:
 ; CHECK: getelementptr{{.*}}i64 1
 ; CHECK: exit:
 }
+
+@.str.4 = external unnamed_addr constant [100 x i8], align 1
+
+; Instcombine shouldn't add new PHI nodes while folding GEPs if that will leave
+; old PHI nodes behind as this is not clearly beneficial.
+; CHECK-LABEL: @test5(
+define void @test5(i16 *%idx, i8 **%in) #0 {
+entry:
+  %0 = load i8*, i8** %in
+  %incdec.ptr = getelementptr inbounds i8, i8* %0, i32 1
+  %1 = load i8, i8* %incdec.ptr, align 1
+  %cmp23 = icmp eq i8 %1, 54
+  br i1 %cmp23, label %while.cond, label %if.then.25
+
+if.then.25:
+  call void @g(i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.4, i32 0, i32 0))
+  br label %while.cond
+
+while.cond:
+; CHECK-LABEL: while.cond
+; CHECK-NOT: phi i8* [ %0, %entry ], [ %Ptr, %while.body ], [ %0, %if.then.25 ]
+  %Ptr = phi i8* [ %incdec.ptr, %entry ], [ %incdec.ptr32, %while.body], [%incdec.ptr, %if.then.25 ]
+  %2 = load i8, i8* %Ptr
+  %and = and i8 %2, 64
+  %lnot = icmp eq i8 %and, 0
+  br i1 %lnot, label %while.body, label %while.cond.33
+
+while.body:
+  %incdec.ptr32 = getelementptr inbounds i8, i8* %Ptr, i32 1
+  br label %while.cond
+
+while.cond.33:
+  %incdec.ptr34 = getelementptr inbounds i8, i8* %Ptr, i32 1
+  br label %while.cond.57
+
+while.cond.57:
+  %3 = load i8, i8* %incdec.ptr34, align 1
+  %conv59 = zext i8 %3 to i32
+  %arrayidx61 = getelementptr inbounds i16, i16* %idx, i32 %conv59
+  %4 = load i16, i16* %arrayidx61, align 2
+  %and63 = and i16 %4, 2048
+  %tobool64 = icmp eq i16 %and63, 0
+  br i1 %tobool64, label %while.cond.73, label %while.cond.57
+
+while.cond.73:
+  br label %while.cond.73
+
+}
+
+declare void @g(i8*)
diff --git a/test/Transforms/InstCombine/icmp-range.ll b/test/Transforms/InstCombine/icmp-range.ll
index 041adf76b5e1..f035683170e1 100644
--- a/test/Transforms/InstCombine/icmp-range.ll
+++ b/test/Transforms/InstCombine/icmp-range.ll
@@ -54,8 +54,97 @@ define i1 @test_nonzero6(i8* %argw) {
   ret i1 %rval
 }
 
+; Constant not in range, should return true.
+define i1 @test_not_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_not_in_range
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 6
+  ret i1 %rval
+}
+
+; Constant in range, can not fold.
+define i1 @test_in_range(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_in_range
+; CHECK: icmp ne i32 %val, 3
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp ne i32 %val, 3
+  ret i1 %rval
+}
+
+; Values in range greater than constant.
+define i1 @test_range_sgt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_sgt_constant
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp sgt i32 %val, 0
+  ret i1 %rval
+}
+
+; Values in range less than constant.
+define i1 @test_range_slt_constant(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_range_slt_constant
+; CHECK: ret i1 false
+  %val = load i32, i32* %arg, !range !0
+  %rval = icmp sgt i32 %val, 6
+  ret i1 %rval
+}
+
+; Values in union of multiple sub ranges not equal to constant.
+define i1 @test_multi_range1(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range1
+; CHECK: ret i1 true
+  %val = load i32, i32* %arg, !range !4
+  %rval = icmp ne i32 %val, 0
+  ret i1 %rval
+}
+
+; Values in multiple sub ranges not equal to constant, but in
+; union of sub ranges could possibly equal to constant. This
+; in theory could also be folded and might be implemented in 
+; the future if shown profitable in practice.
+define i1 @test_multi_range2(i32* nocapture readonly %arg) {
+; CHECK-LABEL: test_multi_range2
+; CHECK: icmp ne i32 %val, 7
+  %val = load i32, i32* %arg, !range !4
+  %rval = icmp ne i32 %val, 7
+  ret i1 %rval
+}
+
+; Values' ranges overlap each other, so it can not be simplified.
+define i1 @test_two_ranges(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges
+; CHECK: icmp ult i32 %val2, %val1
+  %val1 = load i32, i32* %arg1, !range !5
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to false.
+define i1 @test_two_ranges2(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges2
+; CHECK: ret i1 false
+  %val1 = load i32, i32* %arg1, !range !0
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ult i32 %val2, %val1
+  ret i1 %rval
+}
+
+; Values' ranges do not overlap each other, so it can simplified to true.
+define i1 @test_two_ranges3(i32* nocapture readonly %arg1, i32* nocapture readonly %arg2) {
+; CHECK-LABEL: test_two_ranges3
+; CHECK: ret i1 true
+  %val1 = load i32, i32* %arg1, !range !0
+  %val2 = load i32, i32* %arg2, !range !6
+  %rval = icmp ugt i32 %val2, %val1
+  ret i1 %rval
+}
 
 !0 = !{i32 1, i32 6} 
 !1 = !{i32 0, i32 6} 
 !2 = !{i8 0, i8 1} 
 !3 = !{i8 0, i8 6} 
+!4 = !{i32 1, i32 6, i32 8, i32 10}
+!5 = !{i32 5, i32 10} 
+!6 = !{i32 8, i32 16} 
diff --git a/test/Transforms/InstCombine/icmp-shr.ll b/test/Transforms/InstCombine/icmp-shr.ll
index 52414b99cca7..4fa85a72baf7 100644
--- a/test/Transforms/InstCombine/icmp-shr.ll
+++ b/test/Transforms/InstCombine/icmp-shr.ll
@@ -376,3 +376,12 @@ define i1 @PR21222(i32 %B) {
   %cmp = icmp eq i32 %shr, -2
   ret i1 %cmp
 }
+
+; CHECK-LABEL: @PR24873(
+; CHECK:      %[[icmp:.*]] = icmp ugt i64 %V, 61
+; CHECK-NEXT: ret i1 %[[icmp]]
+define i1 @PR24873(i64 %V) {
+  %ashr = ashr i64 -4611686018427387904, %V
+  %icmp = icmp eq i64 %ashr, -1
+  ret i1 %icmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index edcf76d5a7d2..7d6ec96b5328 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -819,8 +819,8 @@ define i1 @test68(i32 %x) nounwind uwtable {
 
 ; PR14708
 ; CHECK-LABEL: @test69(
-; CHECK: %1 = and i32 %c, -33
-; CHECK: %2 = icmp eq i32 %1, 65
+; CHECK: %1 = or i32 %c, 32
+; CHECK: %2 = icmp eq i32 %1, 97
 ; CHECK: ret i1 %2
 define i1 @test69(i32 %c) nounwind uwtable {
   %1 = icmp eq i32 %c, 97
@@ -1603,3 +1603,72 @@ define i32 @f7(i32 %a, i32 %b) {
   %s = select i1 %cmp, i32 10000, i32 0
   ret i32 %s
 }
+
+; CHECK: @f8(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f8(i32 %val, i32 %lim) {
+  %lim.sub = add i32 %lim, -1
+  %val.and = and i32 %val, %lim.sub
+  %r = icmp ult i32 %val.and, %lim
+  ret i1 %r
+}
+
+; CHECK: @f9(
+; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[RESULT]]
+define i1 @f9(i32 %val, i32 %lim) {
+  %lim.sub = sub i32 %lim, 1
+  %val.and = and i32 %val, %lim.sub
+  %r = icmp ult i32 %val.and, %lim
+  ret i1 %r
+}
+
+; CHECK: @f10(
+; CHECK: [[CMP:%.*]] = icmp uge i16 %p, mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
+; CHECK-NEXT: ret i1 [[CMP]]
+define i1 @f10(i16 %p) {
+entry:
+  %cmp580 = icmp ule i16 mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16)), %p
+  ret i1 %cmp580
+}
+
+; CHECK-LABEL: @cmp_sgt_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp sge i32 %conv, %i
+define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %i, 1
+  %cmp = icmp sgt i32 %conv, %dec
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sle_rhs_dec
+; CHECK-NOT: sub
+; CHECK: icmp slt i32 %conv, %i
+define i1 @cmp_sle_rhs_dec(float %x, i32 %i) {
+  %conv = fptosi float %x to i32
+  %dec = sub nsw i32 %i, 1
+  %cmp = icmp sle i32 %conv, %dec
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_sge_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sgt i32 %conv, %i
+define i1 @cmp_sge_rhs_inc(float %x, i32 %i) {
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp sge i32 %conv, %inc
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @cmp_slt_rhs_inc
+; CHECK-NOT: add
+; CHECK: icmp sle i32 %conv, %i
+define i1 @cmp_slt_rhs_inc(float %x, i32 %i) {
+  %conv = fptosi float %x to i32
+  %inc = add nsw i32 %i, 1
+  %cmp = icmp slt i32 %conv, %inc
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
index af34277563e0..c6446d43cffd 100644
--- a/test/Transforms/InstCombine/inline-intrinsic-assert.ll
+++ b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
@@ -9,7 +9,7 @@ define float @foo(float %f1) #0 {
   ret float %call
 
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
 ; CHECK-NEXT: ret float
 }
 
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 8929c82def7b..c75c771407e5 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -24,14 +24,51 @@ define <4 x i16> @test2(<8 x i16> %in, <8 x i16> %in2) {
   ret <4 x i16> %vec.3
 }
 
-define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #0 {
+define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) {
 ; CHECK-LABEL: @test_vcopyq_lane_p64
-; CHECK: extractelement
-; CHECK: insertelement
-; CHECK-NOT: shufflevector
-entry:
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <1 x i64> %b, <1 x i64> undef, <2 x i32> <i32 0, i32 undef>
+; CHECK-NEXT: shufflevector <2 x i64> %a, <2 x i64> %[[WIDEVEC]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT: ret <2 x i64> %res
   %elt = extractelement <1 x i64> %b, i32 0
   %res = insertelement <2 x i64> %a, i64 %elt, i32 1
   ret <2 x i64> %res
 }
 
+; PR2109: https://llvm.org/bugs/show_bug.cgi?id=2109
+
+define <4 x float> @widen_extract2(<4 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract2(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 0, i32 4, i32 2, i32 5>
+; CHECK-NEXT: ret <4 x float> %i2
+  %e1 = extractelement <2 x float> %ext, i32 0
+  %e2 = extractelement <2 x float> %ext, i32 1
+  %i1 = insertelement <4 x float> %ins, float %e1, i32 1
+  %i2 = insertelement <4 x float> %i1, float %e2, i32 3
+  ret <4 x float> %i2
+}
+
+define <4 x float> @widen_extract3(<4 x float> %ins, <3 x float> %ext) {
+; CHECK-LABEL: @widen_extract3(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <3 x float> %ext, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT: shufflevector <4 x float> %ins, <4 x float> %[[WIDEVEC]], <4 x i32> <i32 6, i32 5, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x float> %i3
+  %e1 = extractelement <3 x float> %ext, i32 0
+  %e2 = extractelement <3 x float> %ext, i32 1
+  %e3 = extractelement <3 x float> %ext, i32 2
+  %i1 = insertelement <4 x float> %ins, float %e1, i32 2
+  %i2 = insertelement <4 x float> %i1, float %e2, i32 1
+  %i3 = insertelement <4 x float> %i2, float %e3, i32 0
+  ret <4 x float> %i3
+}
+
+define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {
+; CHECK-LABEL: @widen_extract4(
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x float> %ext, <2 x float> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: shufflevector <8 x float> %ins, <8 x float> %[[WIDEVEC]], <8 x i32> <i32 0, i32 1, i32 8, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <8 x float> %i1
+  %e1 = extractelement <2 x float> %ext, i32 0
+  %i1 = insertelement <8 x float> %ins, float %e1, i32 2
+  ret <8 x float> %i1
+}
+
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index bea063787a75..88f032498271 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -19,6 +19,11 @@ declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
 declare double @llvm.cos.f64(double %Val) nounwind readonly
 declare double @llvm.sin.f64(double %Val) nounwind readonly
+declare double @llvm.floor.f64(double %Val) nounwind readonly
+declare double @llvm.ceil.f64(double %Val) nounwind readonly
+declare double @llvm.trunc.f64(double %Val) nounwind readonly
+declare double @llvm.rint.f64(double %Val) nounwind readonly
+declare double @llvm.nearbyint.f64(double %Val) nounwind readonly
 
 define i8 @uaddtest1(i8 %A, i8 %B) {
   %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
@@ -447,3 +452,63 @@ entry:
 ; CHECK-LABEL: @sin(
 ; CHECK: store volatile double 0.000000e+00, double* %P
 }
+
+define void @floor(double *%P) {
+entry:
+  %B = tail call double @llvm.floor.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.floor.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+; CHECK-LABEL: @floor(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @ceil(double *%P) {
+entry:
+  %B = tail call double @llvm.ceil.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.ceil.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+; CHECK-LABEL: @ceil(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @trunc(double *%P) {
+entry:
+  %B = tail call double @llvm.trunc.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.trunc.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+; CHECK-LABEL: @trunc(
+; CHECK: store volatile double 1.000000e+00, double* %P, align 8
+; CHECK: store volatile double -1.000000e+00, double* %P, align 8
+}
+
+define void @rint(double *%P) {
+entry:
+  %B = tail call double @llvm.rint.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.rint.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+; CHECK-LABEL: @rint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
+
+define void @nearbyint(double *%P) {
+entry:
+  %B = tail call double @llvm.nearbyint.f64(double 1.5) nounwind
+  store volatile double %B, double* %P
+  %C = tail call double @llvm.nearbyint.f64(double -1.5) nounwind
+  store volatile double %C, double* %P
+  ret void
+; CHECK-LABEL: @nearbyint(
+; CHECK: store volatile double 2.000000e+00, double* %P, align 8
+; CHECK: store volatile double -2.000000e+00, double* %P, align 8
+}
diff --git a/test/Transforms/InstCombine/lifetime.ll b/test/Transforms/InstCombine/lifetime.ll
new file mode 100644
index 000000000000..e5cbe3401410
--- /dev/null
+++ b/test/Transforms/InstCombine/lifetime.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @foo(i8* nocapture, i8* nocapture)
+
+define void @bar(i1 %flag) !dbg !4 {
+entry:
+; CHECK-LABEL: @bar(
+; CHECK: %[[T:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %text
+; CHECK: %[[B:[^ ]+]] = getelementptr inbounds [1 x i8], [1 x i8]* %buff
+; CHECK: if:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br label %bb3
+; CHECK: bb3:
+; CHECK-NEXT: call void @llvm.dbg.declare
+; CHECK-NEXT: br label %fin
+; CHECK: call void @llvm.lifetime.start(i64 1, i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.start(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @foo(i8* %[[B]], i8* %[[T]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[B]])
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 1, i8* %[[T]])
+  %text = alloca [1 x i8], align 1
+  %buff = alloca [1 x i8], align 1
+  %0 = getelementptr inbounds [1 x i8], [1 x i8]* %text, i64 0, i64 0
+  %1 = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i64 0, i64 0
+  br i1 %flag, label %if, label %else
+
+if:
+  call void @llvm.lifetime.start(i64 1, i8* %0)
+  call void @llvm.lifetime.start(i64 1, i8* %1)
+  call void @llvm.lifetime.end(i64 1, i8* %1)
+  call void @llvm.lifetime.end(i64 1, i8* %0)
+  br label %bb2
+
+bb2:
+  call void @llvm.lifetime.start(i64 1, i8* %0)
+  call void @llvm.lifetime.start(i64 1, i8* %1)
+  call void @llvm.lifetime.end(i64 1, i8* %0)
+  call void @llvm.lifetime.end(i64 1, i8* %1)
+  br label %bb3
+
+bb3:
+  call void @llvm.lifetime.start(i64 1, i8* %0)
+  call void @llvm.dbg.declare(metadata [1 x i8]* %text, metadata !14, metadata !25), !dbg !26
+  call void @llvm.lifetime.end(i64 1, i8* %0)
+  br label %fin
+
+else:
+  call void @llvm.lifetime.start(i64 1, i8* %0)
+  call void @llvm.lifetime.start(i64 1, i8* %1)
+  call void @foo(i8* %1, i8* %0)
+  call void @llvm.lifetime.end(i64 1, i8* %1)
+  call void @llvm.lifetime.end(i64 1, i8* %0)
+  br  label %fin
+
+fin:
+  ret void
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!22, !23}
+!llvm.ident = !{!24}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7}
+!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
+!8 = !{!9, !11, !12, !14, !21}
+!9 = !DILocalVariable(name: "Size", arg: 1, scope: !4, file: !1, line: 2, type: !10)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "flag", arg: 2, scope: !4, file: !1, line: 2, type: !7)
+!12 = !DILocalVariable(name: "i", scope: !13, file: !1, line: 3, type: !10)
+!13 = distinct !DILexicalBlock(scope: !4, file: !1, line: 3, column: 3)
+!14 = !DILocalVariable(name: "text", scope: !15, file: !1, line: 4, type: !17)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 3, column: 30)
+!16 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 3)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 8, align: 8, elements: !19)
+!18 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!19 = !{!20}
+!20 = !DISubrange(count: 1)
+!21 = !DILocalVariable(name: "buff", scope: !15, file: !1, line: 5, type: !17)
+!22 = !{i32 2, !"Dwarf Version", i32 4}
+!23 = !{i32 2, !"Debug Info Version", i32 3}
+!24 = !{!"clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)"}
+!25 = !DIExpression()
+!26 = !DILocation(line: 4, column: 10, scope: !15)
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index b0bfdc4c4c54..fe1bf1517539 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -148,8 +148,8 @@ define i1 @test8(i32 %X) {
   %S = icmp eq i16 %R, 0
   ret i1 %S
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT: and i32 %X, -2
-; CHECK-NEXT: icmp eq i32 {{.*}}, 8
+; CHECK-NEXT: or i32 %X, 1
+; CHECK-NEXT: icmp eq i32 {{.*}}, 9
 ; CHECK-NEXT: ret i1
 }
 
@@ -233,7 +233,8 @@ define i1 @test10_struct_arr(i32 %x) {
 
 define i1 @test10_struct_arr_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT  %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
+; CHECK-NEXT: %r = icmp ne i32 %x, 1
+; CHECK-NEXT: ret i1 %r
   %p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
   %q = load i32, i32* %p
   %r = icmp eq i32 %q, 9
diff --git a/test/Transforms/InstCombine/load-combine-metadata-2.ll b/test/Transforms/InstCombine/load-combine-metadata-2.ll
new file mode 100644
index 000000000000..bec0d7d2c36b
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-2.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !align !0
+  %b = load i32*, i32** %0, !align !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
\ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-3.ll b/test/Transforms/InstCombine/load-combine-metadata-3.ll
new file mode 100644
index 000000000000..bad4bb240590
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-3.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !dereferenceable !0
+  %b = load i32*, i32** %0, !dereferenceable !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
\ No newline at end of file
diff --git a/test/Transforms/InstCombine/load-combine-metadata-4.ll b/test/Transforms/InstCombine/load-combine-metadata-4.ll
new file mode 100644
index 000000000000..2a1ffcd0605e
--- /dev/null
+++ b/test/Transforms/InstCombine/load-combine-metadata-4.ll
@@ -0,0 +1,20 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @test_load_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %0
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define void @test_load_load_combine_metadata(i32**, i32**, i32**) {
+  %a = load i32*, i32** %0, !dereferenceable_or_null !0
+  %b = load i32*, i32** %0, !dereferenceable_or_null !1
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 4}
+
+!0 = !{i64 4}
+!1 = !{i64 8}
diff --git a/test/Transforms/InstCombine/load-combine-metadata.ll b/test/Transforms/InstCombine/load-combine-metadata.ll
index 9b9c1fe607b9..24b26fa42135 100644
--- a/test/Transforms/InstCombine/load-combine-metadata.ll
+++ b/test/Transforms/InstCombine/load-combine-metadata.ll
@@ -17,9 +17,9 @@ define void @test_load_load_combine_metadata(i32*, i32*, i32*) {
   ret void
 }
 
-; CHECK: ![[RANGE]] = !{i32 0, i32 1, i32 8, i32 9}
-!0 = !{ i32 0, i32 1 }
-!1 = !{ i32 8, i32 9 }
+; CHECK: ![[RANGE]] = !{i32 0, i32 5, i32 7, i32 9}
+!0 = !{ i32 0, i32 5 }
+!1 = !{ i32 7, i32 9 }
 !2 = !{!2}
 !3 = !{!3, !2}
 !4 = !{!4, !2}
diff --git a/test/Transforms/InstCombine/loadstore-metadata.ll b/test/Transforms/InstCombine/loadstore-metadata.ll
index a30c0bc852ea..f72e36a7ea37 100644
--- a/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -31,7 +31,7 @@ define float @test_load_cast_combine_range(i32* %ptr) {
 ; CHECK-NOT: !range
 ; CHECK: ret float
 entry:
-  %l = load i32, i32* %ptr, !range !5
+  %l = load i32, i32* %ptr, !range !6
   %c = bitcast i32 %l to float
   ret float %c
 }
@@ -57,6 +57,39 @@ entry:
   ret i32 %c
 }
 
+define i8* @test_load_cast_combine_align(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves align
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_align(
+; CHECK: load i8*, i8** %{{.*}}, !align !5
+entry:
+  %l = load i32*, i32** %ptr, !align !5
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves dereferenceable
+; metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable !5
+entry:
+  %l = load i32*, i32** %ptr, !dereferenceable !5
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
+define i8* @test_load_cast_combine_deref_or_null(i32** %ptr) {
+; Ensure (cast (load (...))) -> (load (cast (...))) preserves
+; dereferenceable_or_null metadata.
+; CHECK-LABEL: @test_load_cast_combine_deref_or_null(
+; CHECK: load i8*, i8** %{{.*}}, !dereferenceable_or_null !5
+entry:
+  %l = load i32*, i32** %ptr, !dereferenceable_or_null !5
+  %c = bitcast i32* %l to i8*
+  ret i8* %c
+}
+
 define void @test_load_cast_combine_loop(float* %src, i32* %dst, i32 %n) {
 ; Ensure (cast (load (...))) -> (load (cast (...))) preserves loop access
 ; metadata.
@@ -110,4 +143,5 @@ entry:
 !2 = !{ !2, !1 }
 !3 = !{ }
 !4 = !{ i32 1 }
-!5 = !{ i32 0, i32 42 }
+!5 = !{ i64 8 }
+!6 = !{ i32 0, i32 42 }
diff --git a/test/Transforms/InstCombine/log-pow-nofastmath.ll b/test/Transforms/InstCombine/log-pow-nofastmath.ll
new file mode 100644
index 000000000000..faaef97311ec
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow-nofastmath.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) {
+entry:
+  %pow = call double @llvm.pow.f64(double %x, double %y)
+  %call = call double @log(double %pow)
+  ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK:   %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK:   %call = call double @log(double %pow)
+; CHECK:   ret double %call
+; CHECK: }
+
+define double @test3(double %x) {
+  %call2 = call double @exp2(double %x)
+  %call3 = call double @log(double %call2)
+  ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK:   %call2 = call double @exp2(double %x)
+; CHECK:   %call3 = call double @log(double %call2)
+; CHECK:   ret double %call3
+; CHECK: }
+
+declare double @log(double)
+declare double @exp2(double)
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/log-pow.ll b/test/Transforms/InstCombine/log-pow.ll
new file mode 100644
index 000000000000..c5ca1688d34a
--- /dev/null
+++ b/test/Transforms/InstCombine/log-pow.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mylog(double %x, double %y) #0 {
+entry:
+  %pow = call double @llvm.pow.f64(double %x, double %y)
+  %call = call double @log(double %pow) #0
+  ret double %call
+}
+
+; CHECK-LABEL: define double @mylog(
+; CHECK:   %log = call fast double @log(double %x) #0
+; CHECK:   %mul = fmul fast double %log, %y
+; CHECK:   ret double %mul
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+  %call1 = call double %fptr()
+  %pow = call double @log(double %call1)
+  ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: log
+
+define double @test3(double %x) #0 {
+  %call2 = call double @exp2(double %x) #0
+  %call3 = call double @log(double %call2) #0
+  ret double %call3
+}
+
+; CHECK-LABEL: @test3
+; CHECK:  %call2 = call double @exp2(double %x) #0
+; CHECK:  %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
+; CHECK:  ret double %logmul
+; CHECK: }
+
+declare double @log(double) #0
+declare double @exp2(double) #0
+declare double @llvm.pow.f64(double, double)
+
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 138001ace951..8fcb8214360d 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -186,3 +186,14 @@ define void @test8() {
   call void @_ZdaPvj(i8* %naj, i32 32) builtin
   ret void
 }
+
+declare noalias i8* @"\01??2@YAPEAX_K@Z"(i64) nobuiltin
+declare void @"\01??3@YAXPEAX@Z"(i8*) nobuiltin
+
+; CHECK-LABEL: @test9(
+define void @test9() {
+  ; CHECK-NOT: call
+  %new_long_long = call noalias i8* @"\01??2@YAPEAX_K@Z"(i64 32) builtin
+  call void @"\01??3@YAXPEAX@Z"(i8* %new_long_long) builtin
+  ret void
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
index db15bd66b715..f9ff479e3add 100644
--- a/test/Transforms/InstCombine/memcmp-1.ll
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -2,7 +2,7 @@
 ;
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32:64"
 
 @foo = constant [4 x i8] c"foo\00"
 @hel = constant [4 x i8] c"hel\00"
@@ -70,3 +70,54 @@ define i32 @test_simplify6() {
   ret i32 %ret
 ; CHECK: ret i32 -1
 }
+
+; Check memcmp(mem1, mem2, 8)==0 -> *(int64_t*)mem1 == *(int64_t*)mem2
+
+define i1 @test_simplify7(i64 %x, i64 %y) {
+; CHECK-LABEL: @test_simplify7(
+  %x.addr = alloca i64, align 8
+  %y.addr = alloca i64, align 8
+  store i64 %x, i64* %x.addr, align 8
+  store i64 %y, i64* %y.addr, align 8
+  %xptr = bitcast i64* %x.addr to i8*
+  %yptr = bitcast i64* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 8)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+; CHECK: %cmp = icmp eq i64 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 4)==0 -> *(int32_t*)mem1 == *(int32_t*)mem2
+
+define i1 @test_simplify8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test_simplify8(
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %xptr = bitcast i32* %x.addr to i8*
+  %yptr = bitcast i32* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 4)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+; CHECK: %cmp = icmp eq i32 %x, %y
+; CHECK: ret i1 %cmp
+}
+
+; Check memcmp(mem1, mem2, 2)==0 -> *(int16_t*)mem1 == *(int16_t*)mem2
+
+define i1 @test_simplify9(i16 %x, i16 %y) {
+; CHECK-LABEL: @test_simplify9(
+  %x.addr = alloca i16, align 2
+  %y.addr = alloca i16, align 2
+  store i16 %x, i16* %x.addr, align 2
+  store i16 %y, i16* %y.addr, align 2
+  %xptr = bitcast i16* %x.addr to i8*
+  %yptr = bitcast i16* %y.addr to i8*
+  %call = call i32 @memcmp(i8* %xptr, i8* %yptr, i32 2)
+  %cmp = icmp eq i32 %call, 0
+  ret i1 %cmp
+; CHECK: %cmp = icmp eq i16 %x, %y
+; CHECK: ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 27f7293a6bce..56ea14c8292d 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -63,4 +63,30 @@ define i8* @test_no_simplify2() {
   ret i8* %ret
 }
 
+; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
+define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
+; CHECK-LABEL: test_rauw
+entry:
+  %call49 = call i64 @strlen(i8* %a)
+  %add180 = add i64 %call49, 1
+  %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false)
+  %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
+; CHECK: %strlen = call i64 @strlen(i8* %b)
+; CHECK-NEXT: %strchr2 = getelementptr i8, i8* %b, i64 %strlen
+  %call51i = call i8* @strrchr(i8* %b, i32 0)
+  %d = load i8*, i8** %c, align 8
+  %sub182 = ptrtoint i8* %d to i64
+  %sub183 = ptrtoint i8* %b to i64
+  %sub184 = sub i64 %sub182, %sub183
+  %add52.i.i = add nsw i64 %sub184, 1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %strchr2
+  %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
+  ret i32 4
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
+declare i8* @strrchr(i8*, i32)
+declare i64 @strlen(i8* nocapture)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
 declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll
new file mode 100644
index 000000000000..b90afe3405f7
--- /dev/null
+++ b/test/Transforms/InstCombine/minmax-fp.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; CHECK-LABEL: @t1
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t1(float %a) {
+  ; This is the canonical form for a type-changing min/max.
+  %1 = fcmp ult float %a, 5.0
+  %2 = select i1 %1, float %a, float 5.0
+  %3 = fpext float %2 to double
+  ret double %3
+}
+
+; CHECK-LABEL: @t2
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fpext float %1 to double
+define double @t2(float %a) {
+  ; Check this is converted into canonical form, as above.
+  %1 = fcmp ult float %a, 5.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 5.0
+  ret double %3
+}
+
+; CHECK-LABEL: @t4
+; CHECK-NEXT: fcmp oge double %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, double 5.000000e+00, double %a
+; CHECK-NEXT: fptrunc double %1 to float
+define float @t4(double %a) {
+  ; Same again, with trunc.
+  %1 = fcmp ult double %a, 5.0
+  %2 = fptrunc double %a to float
+  %3 = select i1 %1, float %2, float 5.0
+  ret float %3
+}
+
+; CHECK-LABEL: @t5
+; CHECK-NEXT: fcmp ult float %a, 5.000000e+00
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 5.001
+define double @t5(float %a) {
+  ; different values, should not be converted.
+  %1 = fcmp ult float %a, 5.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 5.001
+  ret double %3
+}
+
+; CHECK-LABEL: @t6
+; CHECK-NEXT: fcmp ult float %a, -0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double 0.0
+define double @t6(float %a) {
+  ; Signed zero, should not be converted
+  %1 = fcmp ult float %a, -0.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double 0.0
+  ret double %3
+}
+
+; CHECK-LABEL: @t7
+; CHECK-NEXT: fcmp ult float %a, 0.0
+; CHECK-NEXT: fpext float %a to double
+; CHECK-NEXT: select i1 %1, double %2, double -0.0
+define double @t7(float %a) {
+  ; Signed zero, should not be converted
+  %1 = fcmp ult float %a, 0.0
+  %2 = fpext float %a to double
+  %3 = select i1 %1, double %2, double -0.0
+  ret double %3
+}
+
+; CHECK-LABEL: @t8
+; CHECK-NEXT: fcmp oge float %a, 5.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 5.000000e+00, float %a
+; CHECK-NEXT: fptoui float %1 to i64
+define i64 @t8(float %a) {
+  %1 = fcmp ult float %a, 5.0
+  %2 = fptoui float %a to i64
+  %3 = select i1 %1, i64 %2, i64 5
+  ret i64 %3
+}
+
+; CHECK-LABEL: @t9
+; CHECK-NEXT: fcmp oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t9(float %a) {
+  %1 = fcmp ult float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
+
+; CHECK-LABEL: @t11
+; CHECK-NEXT: fcmp fast oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi
+define i8 @t11(float %a, float %b) {
+  ; Either operand could be NaN, but fast modifier applied.
+  %1 = fcmp fast ult float %b, %a
+  %2 = fptosi float %a to i8
+  %3 = fptosi float %b to i8
+  %4 = select i1 %1, i8 %3, i8 %2
+  ret i8 %4
+}
+
+; CHECK-LABEL: @t12
+; CHECK-NEXT: fcmp nnan oge float %b, %a
+; CHECK-NEXT: select i1 %.inv, float %a, float %b
+; CHECK-NEXT: fptosi float %.v to i8
+define i8 @t12(float %a, float %b) {
+  ; Either operand could be NaN, but nnan modifier applied.
+  %1 = fcmp nnan ult float %b, %a
+  %2 = fptosi float %a to i8
+  %3 = fptosi float %b to i8
+  %4 = select i1 %1, i8 %3, i8 %2
+  ret i8 %4
+}
+
+; CHECK-LABEL: @t13
+; CHECK-NEXT: fcmp ult float %a, 1.500000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 1
+define i8 @t13(float %a) {
+  ; Float and int values do not match.
+  %1 = fcmp ult float %a, 1.5
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 1
+  ret i8 %3
+}
+
+; CHECK-LABEL: @t14
+; CHECK-NEXT: fcmp ule float %a, 0.000000e+00
+; CHECK-NEXT: fptosi float %a to i8
+; CHECK-NEXT: select i1 %1, i8 %2, i8 0
+define i8 @t14(float %a) {
+  ; <= comparison, where %a could be -0.0. Not safe.
+  %1 = fcmp ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
+
+; CHECK-LABEL: @t15
+; CHECK-NEXT: fcmp nsz oge float %a, 0.000000e+00
+; CHECK-NEXT: select i1 %.inv, float 0.000000e+00, float %a
+; CHECK-NEXT: fptosi float %1 to i8
+define i8 @t15(float %a) {
+  %1 = fcmp nsz ule float %a, 0.0
+  %2 = fptosi float %a to i8
+  %3 = select i1 %1, i8 %2, i8 0
+  ret i8 %3
+}
diff --git a/test/Transforms/InstCombine/neon-intrinsics.ll b/test/Transforms/InstCombine/neon-intrinsics.ll
index 3ad09cc6c694..d22fa9c811dc 100644
--- a/test/Transforms/InstCombine/neon-intrinsics.ll
+++ b/test/Transforms/InstCombine/neon-intrinsics.ll
@@ -3,8 +3,8 @@
 ; The alignment arguments for NEON load/store intrinsics can be increased
 ; by instcombine.  Check for this.
 
-; CHECK: vld4.v2i32({{.*}}, i32 32)
-; CHECK: vst4.v2i32({{.*}}, i32 16)
+; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
+; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
 
 @x = common global [8 x i32] zeroinitializer, align 32
 @y = common global [8 x i32] zeroinitializer, align 16
@@ -12,14 +12,14 @@
 %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
 
 define void @test() nounwind ssp {
-  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
   %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
   %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
   %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
   %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
-  call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+  call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
   ret void
 }
 
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
index cec5297695b1..3df04d2c8902 100644
--- a/test/Transforms/InstCombine/no_cgscc_assert.ll
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -10,7 +10,7 @@ define float @bar(float %f) #0 {
   ret float %call1
 
 ; CHECK-LABEL: @bar(
-; CHECK-NEXT: call float @llvm.fabs.f32
+; CHECK-NEXT: call fast float @llvm.fabs.f32
 ; CHECK-NEXT: ret float
 }
 
diff --git a/test/Transforms/InstCombine/nonnull-attribute.ll b/test/Transforms/InstCombine/nonnull-attribute.ll
new file mode 100644
index 000000000000..74fb09114927
--- /dev/null
+++ b/test/Transforms/InstCombine/nonnull-attribute.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This test makes sure that we do not assume globals in address spaces other
+; than 0 are able to be null.
+
+@as0 = external global i32
+@as1 = external addrspace(1) global i32
+
+declare void @addrspace0(i32*)
+declare void @addrspace1(i32 addrspace(1)*)
+
+; CHECK: call void @addrspace0(i32* nonnull @as0)
+; CHECK: call void @addrspace1(i32 addrspace(1)* @as1)
+
+define void @test() {
+  call void @addrspace0(i32* @as0)
+  call void @addrspace1(i32 addrspace(1)* @as1)
+  ret void
+}
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 9d59edd7934d..edb402a125ac 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -5,51 +5,51 @@
 ; CHECK-NOT: xor
 
 define i32 @test1(i32 %A) {
-        %B = xor i32 %A, -1             ; <i32> [#uses=1]
-        %C = xor i32 %B, -1             ; <i32> [#uses=1]
+        %B = xor i32 %A, -1
+        %C = xor i32 %B, -1
         ret i32 %C
 }
 
 define i1 @test2(i32 %A, i32 %B) {
         ; Can change into setge
-        %cond = icmp sle i32 %A, %B             ; <i1> [#uses=1]
-        %Ret = xor i1 %cond, true               ; <i1> [#uses=1]
+        %cond = icmp sle i32 %A, %B
+        %Ret = xor i1 %cond, true
         ret i1 %Ret
 }
 
-; Test that demorgans law can be instcombined
+; Test that De Morgan's law can be instcombined.
 define i32 @test3(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1             ; <i32> [#uses=1]
-        %b = xor i32 %B, -1             ; <i32> [#uses=1]
-        %c = and i32 %a, %b             ; <i32> [#uses=1]
-        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        %a = xor i32 %A, -1
+        %b = xor i32 %B, -1
+        %c = and i32 %a, %b
+        %d = xor i32 %c, -1
         ret i32 %d
 }
 
-; Test that demorgens law can work with constants
+; Test that De Morgan's law can work with constants.
 define i32 @test4(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1             ; <i32> [#uses=1]
-        %c = and i32 %a, 5              ; <i32> [#uses=1]
-        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        %a = xor i32 %A, -1
+        %c = and i32 %a, 5
+        %d = xor i32 %c, -1
         ret i32 %d
 }
 
-; test the mirror of demorgans law...
+; Test the mirror of De Morgan's law.
 define i32 @test5(i32 %A, i32 %B) {
-        %a = xor i32 %A, -1             ; <i32> [#uses=1]
-        %b = xor i32 %B, -1             ; <i32> [#uses=1]
-        %c = or i32 %a, %b              ; <i32> [#uses=1]
-        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        %a = xor i32 %A, -1
+        %b = xor i32 %B, -1
+        %c = or i32 %a, %b
+        %d = xor i32 %c, -1
         ret i32 %d
 }
 
 ; PR2298
-define zeroext i8 @test6(i32 %a, i32 %b)  nounwind  {
+define zeroext i8 @test6(i32 %a, i32 %b) {
 entry:
-	%tmp1not = xor i32 %a, -1		; <i32> [#uses=1]
-	%tmp2not = xor i32 %b, -1		; <i32> [#uses=1]
-	%tmp3 = icmp slt i32 %tmp1not, %tmp2not		; <i1> [#uses=1]
-	%retval67 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%tmp1not = xor i32 %a, -1
+	%tmp2not = xor i32 %b, -1
+	%tmp3 = icmp slt i32 %tmp1not, %tmp2not
+	%retval67 = zext i1 %tmp3 to i8
 	ret i8 %retval67
 }
 
@@ -58,3 +58,4 @@ define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
         %Ret = xor <2 x i1> %cond, <i1 true, i1 true>
         ret <2 x i1> %Ret
 }
+
diff --git a/test/Transforms/InstCombine/objsize-address-space.ll b/test/Transforms/InstCombine/objsize-address-space.ll
index 6046dad89790..ab4b64dfbf07 100644
--- a/test/Transforms/InstCombine/objsize-address-space.ll
+++ b/test/Transforms/InstCombine/objsize-address-space.ll
@@ -32,7 +32,7 @@ define i16 @foo_as3_i16() nounwind {
   ret i16 %1
 }
 
-@a_alias = weak alias [60 x i8] addrspace(3)* @a_as3
+@a_alias = weak alias [60 x i8], [60 x i8] addrspace(3)* @a_as3
 define i32 @foo_alias() nounwind {
   %1 = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* getelementptr inbounds ([60 x i8], [60 x i8] addrspace(3)* @a_alias, i32 0, i32 0), i1 false)
   ret i32 %1
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 335a816e9ece..2af391f907cc 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -219,7 +219,7 @@ define i32 @test13(i8** %esc) {
   ret i32 %1
 }
 
-@globalalias = internal alias [60 x i8]* @a
+@globalalias = internal alias [60 x i8], [60 x i8]* @a
 
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT: ret i32 60
@@ -229,7 +229,7 @@ define i32 @test18() {
   ret i32 %1
 }
 
-@globalalias2 = weak alias [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8], [60 x i8]* @a
 
 ; CHECK-LABEL: @test19(
 ; CHECK: llvm.objectsize
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index b91a5954d97e..a2bc4e7d9832 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -182,7 +182,7 @@ define i1 @test19(i32 %A) {
         %D = or i1 %B, %C
         ret i1 %D
 ; CHECK-LABEL: @test19(
-; CHECK: and i32
+; CHECK: or i32
 ; CHECK: icmp eq 
 ; CHECK: ret i1
 }
diff --git a/test/Transforms/InstCombine/phi-load-metadata-2.ll b/test/Transforms/InstCombine/phi-load-metadata-2.ll
new file mode 100644
index 000000000000..cfbf2dea8a7a
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-2.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !dereferenceable !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !dereferenceable !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata-3.ll b/test/Transforms/InstCombine/phi-load-metadata-3.ll
new file mode 100644
index 000000000000..39049c9c7181
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata-3.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that dereferenceable_or_null metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !dereferenceable_or_null !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !dereferenceable_or_null !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi-load-metadata.ll b/test/Transforms/InstCombine/phi-load-metadata.ll
new file mode 100644
index 000000000000..004a355ca441
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-load-metadata.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare void @bar()
+declare void @baz()
+
+; Check that align metadata is combined
+; CHECK-LABEL: cont:
+; CHECK: load i32*, i32**
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+define i32* @test_phi_combine_load_metadata(i1 %c, i32** dereferenceable(8) %p1, i32** dereferenceable(8) %p2) {
+  br i1 %c, label %t, label %f
+t:
+  call void @bar()
+  %v1 = load i32*, i32** %p1, align 8, !align !0
+  br label %cont
+
+f:
+  call void @baz()
+  %v2 = load i32*, i32** %p2, align 8, !align !1
+  br label %cont
+
+cont:
+  %res = phi i32* [ %v1, %t ], [ %v2, %f ]
+  ret i32* %res
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index 54cc4cfe4594..d0441d76d399 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -630,3 +630,133 @@ done:
   %y = phi i32 [ undef, %entry ]
   ret i32 %y
 }
+
+; We should be able to fold the zexts to the other side of the phi
+; even though there's a constant value input to the phi. This is
+; because we can shrink that constant to the smaller phi type.
+
+define i1 @PR24766(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]] 
+}
+
+; Same as above (a phi with more than 2 operands), but no constants
+ 
+define i1 @PR24766_no_constants(i8 %x1, i8 %x2, i8 %condition, i1 %another_condition) {
+entry:
+  %frombool0 = zext i1 %another_condition to i8
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ %frombool0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_no_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ %another_condition, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants
+
+define i1 @PR24766_two_constants(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ 1, %sw2 ], [ %frombool1, %sw1 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ true, %sw2 ], [ %cmp1, %sw1 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
+; Same as above (a phi with more than 2 operands), but two constants and two variables
+
+define i1 @PR24766_two_constants_two_var(i8 %x1, i8 %x2, i8 %condition) {
+entry:
+  %conv = sext i8 %condition to i32
+  switch i32 %conv, label %epilog [
+    i32 0, label %sw1
+    i32 1, label %sw2
+    i32 2, label %sw3
+  ]
+
+sw1:
+  %cmp1 = icmp eq i8 %x1, %x2
+  %frombool1 = zext i1 %cmp1 to i8
+  br label %epilog
+
+sw2:
+  %cmp2 = icmp sle i8 %x1, %x2
+  %frombool2 = zext i1 %cmp2 to i8
+  br label %epilog
+
+sw3:
+  %cmp3 = icmp sge i8 %x1, %x2
+  %frombool3 = zext i1 %cmp3 to i8
+  br label %epilog
+
+epilog:
+  %conditionMet = phi i8 [ 0, %entry ], [ %frombool2, %sw2 ], [ %frombool1, %sw1 ], [ 1, %sw3 ]
+  %tobool = icmp ne i8 %conditionMet, 0
+  ret i1 %tobool
+
+; CHECK-LABEL: @PR24766_two_constants_two_var(
+; CHECK: %[[RES:.*]] = phi i1 [ false, %entry ], [ %cmp2, %sw2 ], [ %cmp1, %sw1 ], [ true, %sw3 ]
+; CHECK-NEXT: ret i1 %[[RES]]
+}
+
diff --git a/test/Transforms/InstCombine/pow-1.ll b/test/Transforms/InstCombine/pow-1.ll
index fb3b7d796160..f2b56fd33d64 100644
--- a/test/Transforms/InstCombine/pow-1.ll
+++ b/test/Transforms/InstCombine/pow-1.ll
@@ -6,6 +6,8 @@
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-apple-macosx10.8 | FileCheck %s --check-prefix=CHECK-NO-EXP10
 ; RUN: opt -instcombine -S < %s -mtriple=arm-apple-ios6.0 | FileCheck %s --check-prefix=CHECK-NO-EXP10
 ; RUN: opt -instcombine -S < %s -mtriple=x86_64-netbsd | FileCheck %s --check-prefix=CHECK-NO-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-tvos9.0 | FileCheck %s --check-prefix=CHECK-EXP10
+; RUN: opt -instcombine -S < %s -mtriple=arm-apple-watchos2.0 | FileCheck %s --check-prefix=CHECK-EXP10
 ; rdar://7251832
 
 ; NOTE: The readonly attribute on the pow call should be preserved
diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll
new file mode 100644
index 000000000000..76ef4c5de923
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-4.ll
@@ -0,0 +1,120 @@
+; Test that the pow library call simplifier works correctly.
+
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare double @llvm.pow.f64(double, double)
+declare float @llvm.pow.f32(float, float)
+
+; pow(x, 4.0f)
+define float @test_simplify_4f(float %x) #0 {
+; CHECK-LABEL: @test_simplify_4f(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul float %x, %x
+; CHECK-NEXT: %2 = fmul float %1, %1
+; CHECK-NEXT: ret float %2
+  %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00)
+  ret float %1
+}
+
+; pow(x, 3.0)
+define double @test_simplify_3(double %x) #0 {
+; CHECK-LABEL: @test_simplify_3(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: ret double %2
+  %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00)
+  ret double %1
+}
+
+; pow(x, 4.0)
+define double @test_simplify_4(double %x) #0 {
+; CHECK-LABEL: @test_simplify_4(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: ret double %2
+  %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00)
+  ret double %1
+}
+
+; pow(x, 15.0)
+define double @test_simplify_15(double %x) #0 {
+; CHECK-LABEL: @test_simplify_15(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %2, %4
+; CHECK-NEXT: ret double %5
+  %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01)
+  ret double %1
+}
+
+; pow(x, -7.0)
+define double @test_simplify_neg_7(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_7(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %x
+; CHECK-NEXT: %3 = fmul double %1, %2
+; CHECK-NEXT: %4 = fmul double %1, %3
+; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4
+; CHECK-NEXT: ret double %5
+  %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00)
+  ret double %1
+}
+
+; pow(x, -19.0)
+define double @test_simplify_neg_19(double %x) #0 {
+; CHECK-LABEL: @test_simplify_neg_19(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %1, %4
+; CHECK-NEXT: %6 = fmul double %5, %x
+; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6
+; CHECK-NEXT: ret double %7
+  %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01)
+  ret double %1
+}
+
+; pow(x, 11.23)
+define double @test_simplify_11_23(double %x) #0 {
+; CHECK-LABEL: @test_simplify_11_23(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+; CHECK-NEXT: ret double %1
+  %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+  ret double %1
+}
+
+; pow(x, 32.0)
+define double @test_simplify_32(double %x) #0 {
+; CHECK-LABEL: @test_simplify_32(
+; CHECK-NOT: pow
+; CHECK-NEXT: %1 = fmul double %x, %x
+; CHECK-NEXT: %2 = fmul double %1, %1
+; CHECK-NEXT: %3 = fmul double %2, %2
+; CHECK-NEXT: %4 = fmul double %3, %3
+; CHECK-NEXT: %5 = fmul double %4, %4
+; CHECK-NEXT: ret double %5
+  %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01)
+  ret double %1
+}
+
+; pow(x, 33.0)
+define double @test_simplify_33(double %x) #0 {
+; CHECK-LABEL: @test_simplify_33(
+; CHECK-NOT: fmul
+; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+; CHECK-NEXT: ret double %1
+  %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+  ret double %1
+}
+
+attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/pow-exp-nofastmath.ll b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
new file mode 100644
index 000000000000..9e596fa3a723
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+  %call = call double @exp(double %x)
+  %pow = call double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK:   %call = call double @exp(double %x)
+; CHECK:   %pow = call double @llvm.pow.f64(double %call, double %y)
+; CHECK:   ret double %pow
+; CHECK: }
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
diff --git a/test/Transforms/InstCombine/pow-exp.ll b/test/Transforms/InstCombine/pow-exp.ll
new file mode 100644
index 000000000000..acc512734ec5
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+  %call = call double @exp(double %x)
+  %pow = call double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK:   %mul = fmul fast double %x, %y
+; CHECK:   %exp = call fast double @exp(double %mul) #0
+; CHECK:   ret double %exp
+; CHECK: }
+
+define double @test2(double ()* %fptr, double %p1) #0 {
+  %call1 = call double %fptr()
+  %pow = call double @llvm.pow.f64(double %call1, double %p1)
+  ret double %pow
+}
+
+; CHECK-LABEL: @test2
+; CHECK: llvm.pow.f64
+
+declare double @exp(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-exp2.ll b/test/Transforms/InstCombine/pow-exp2.ll
new file mode 100644
index 000000000000..c42cab391e64
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-exp2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x, double %y) #0 {
+entry:
+  %call = call double @exp2(double %x)
+  %pow = call double @llvm.pow.f64(double %call, double %y)
+  ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK:   %mul = fmul fast double %x, %y
+; CHECK:   %exp2 = call fast double @exp2(double %mul) #0
+; CHECK:   ret double %exp2
+; CHECK: }
+
+declare double @exp2(double) #1
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
+attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
new file mode 100644
index 000000000000..8fc74e4a0024
--- /dev/null
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define double @mypow(double %x) #0 {
+entry:
+  %pow = call double @llvm.pow.f64(double %x, double 5.000000e-01)
+  ret double %pow
+}
+
+; CHECK-LABEL: define double @mypow(
+; CHECK:   %sqrt = call double @sqrt(double %x) #1
+; CHECK:   ret double %sqrt
+; CHECK: }
+
+declare double @llvm.pow.f64(double, double)
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pr20059.ll b/test/Transforms/InstCombine/pr20059.ll
deleted file mode 100644
index 0ef315936ff2..000000000000
--- a/test/Transforms/InstCombine/pr20059.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-
-; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
-; for an srem operation. This is not a valid optimization because it may cause a trap
-; on div-by-zero.
-
-; CHECK-LABEL: @do_not_reorder
-; CHECK: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
-define <4 x i32> @do_not_reorder(<4 x i32> %p1, <4 x i32> %p2) {
-  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
-  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
-  %retval = srem <4 x i32> %splat1, %splat2
-  ret <4 x i32> %retval
-}
diff --git a/test/Transforms/InstCombine/pr24605.ll b/test/Transforms/InstCombine/pr24605.ll
new file mode 100644
index 000000000000..4b7b36137e6a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr24605.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i1 @f(i8* %a, i8 %b) {
+; CHECK-LABEL: @f(
+entry:
+  %or = or i8 %b, -117
+  %sub = add i8 %or, -1
+  store i8 %sub, i8* %a, align 1
+  %cmp = icmp ugt i8 %or, %sub
+  ret i1 %cmp
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstCombine/pr25745.ll b/test/Transforms/InstCombine/pr25745.ll
new file mode 100644
index 000000000000..3bf9efc92b90
--- /dev/null
+++ b/test/Transforms/InstCombine/pr25745.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Checking for a crash
+
+declare void @use.i1(i1 %val)
+declare void @use.i64(i64 %val)
+
+define i64 @f(i32 %x) {
+; CHECK-LABEL: @f(
+ entry:
+  %x.wide = sext i32 %x to i64
+  %minus.x = sub i32 0, %x
+  %minus.x.wide = sext i32 %minus.x to i64
+  %c = icmp slt i32 %x, 0
+  %val = select i1 %c, i64 %x.wide, i64 %minus.x.wide
+  call void @use.i1(i1 %c)
+  call void @use.i64(i64 %x.wide)
+  ret i64 %val
+; CHECK: ret i64 %val
+}
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 79c2ae28105e..0b5b5deb68c5 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -575,7 +575,7 @@ entry:
 ; CHECK: %0 = shl i8 %tmp4, 2
 ; CHECK: %tmp54 = and i8 %0, 16
   %tmp55 = xor i8 %tmp54, %tmp51
-; CHECK: ret i8 %tmp55.1
+; CHECK: ret i8 %tmp551
   ret i8 %tmp55
 }
 
@@ -743,7 +743,7 @@ define i32 @test57(i32 %x) {
   %or = or i32 %shl, 7
   ret i32 %or
 ; CHECK-LABEL: @test57(
-; CHECK: %shl = shl i32 %shr.1, 4
+; CHECK: %shl = shl i32 %shr1, 4
 }
 
 
diff --git a/test/Transforms/InstCombine/sincospi.ll b/test/Transforms/InstCombine/sincospi.ll
index f49fb35cb76a..10342c500961 100644
--- a/test/Transforms/InstCombine/sincospi.ll
+++ b/test/Transforms/InstCombine/sincospi.ll
@@ -90,3 +90,12 @@ define double @test_constant_f64() {
 ; CHECK-NO-SINCOS: call double @__sinpi
 ; CHECK-NO-SINCOS: call double @__cospi
 }
+
+define double @test_fptr(double (double)* %fptr, double %p1) {
+       %sin = call double @__sinpi(double %p1) #0
+       %cos = call double %fptr(double %p1)
+       %res = fadd double %sin, %cos
+       ret double %res
+; CHECK-LABEL: @test_fptr
+; CHECK: __sinpi
+}
diff --git a/test/Transforms/InstCombine/sqrt-nofast.ll b/test/Transforms/InstCombine/sqrt-nofast.ll
new file mode 100644
index 000000000000..0d1dfc1542a5
--- /dev/null
+++ b/test/Transforms/InstCombine/sqrt-nofast.ll
@@ -0,0 +1,25 @@
+; Check that we skip transformations if the attribute unsafe-fp-math
+; is not set.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mysqrt(float %x, float %y) #0 {
+entry:
+  %x.addr = alloca float, align 4
+  %y.addr = alloca float, align 4
+  store float %x, float* %x.addr, align 4
+  store float %y, float* %y.addr, align 4
+  %0 = load float, float* %x.addr, align 4
+  %1 = load float, float* %x.addr, align 4
+  %mul = fmul fast float %0, %1
+  %2 = call float @llvm.sqrt.f32(float %mul)
+  ret float %2
+}
+
+declare float @llvm.sqrt.f32(float) #1
+
+; CHECK: define float @mysqrt(float %x, float %y) {
+; CHECK: entry:
+; CHECK:   %mul = fmul fast float %x, %x
+; CHECK:   %0 = call float @llvm.sqrt.f32(float %mul)
+; CHECK:   ret float %0
+; CHECK: }
diff --git a/test/Transforms/InstCombine/statepoint.ll b/test/Transforms/InstCombine/statepoint.ll
index f904f207bfdc..54fb6a7756ff 100644
--- a/test/Transforms/InstCombine/statepoint.ll
+++ b/test/Transforms/InstCombine/statepoint.ll
@@ -7,8 +7,8 @@ declare void @func()
 
 define i1 @test_negative(i32 addrspace(1)* %p) gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
-  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
 ; CHECK-LABEL: test_negative
@@ -18,8 +18,8 @@ entry:
 
 define i1 @test_nonnull(i32 addrspace(1)* nonnull %p) gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
-  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %p)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
 ; CHECK-LABEL: test_nonnull
@@ -28,8 +28,8 @@ entry:
 
 define i1 @test_null() gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
-  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* null)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
 ; CHECK-LABEL: test_null
@@ -39,8 +39,8 @@ entry:
 
 define i1 @test_undef() gc "statepoint-example" {
 entry:
-  %safepoint_token = tail call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
-  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %safepoint_token,  i32 7, i32 7)
+  %safepoint_token = tail call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @func, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* undef)
+  %pnew = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %safepoint_token,  i32 7, i32 7)
   %cmp = icmp eq i32 addrspace(1)* %pnew, null
   ret i1 %cmp
 ; CHECK-LABEL: test_undef
@@ -48,5 +48,5 @@ entry:
 ; CHECK: ret i1 undef
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #3
diff --git a/test/Transforms/InstCombine/store.ll b/test/Transforms/InstCombine/store.ll
index 5dfbd7140901..b8730413f1b5 100644
--- a/test/Transforms/InstCombine/store.ll
+++ b/test/Transforms/InstCombine/store.ll
@@ -113,6 +113,119 @@ for.end:                                          ; preds = %for.cond
 ; CHECK-NEXT: store i32 %storemerge, i32* %gi, align 4, !tbaa !0
 }
 
+define void @dse1(i32* %p) {
+; CHECK-LABEL: dse1
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 0, i32* %p
+  store i32 0, i32* %p
+  ret void
+} 
+
+; Slightly subtle: if we're mixing atomic and non-atomic access to the
+; same location, then the contents of the location are undefined if there's
+; an actual race.  As such, we're free to pick either store under the 
+; assumption that we're not racing with any other thread.
+define void @dse2(i32* %p) {
+; CHECK-LABEL: dse2
+; CHECK-NEXT: store i32 0, i32* %p
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store i32 0, i32* %p
+  ret void
+} 
+
+define void @dse3(i32* %p) {
+; CHECK-LABEL: dse3
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+  store i32 0, i32* %p
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+} 
+
+define void @dse4(i32* %p) {
+; CHECK-LABEL: dse4
+; CHECK-NEXT: store atomic i32 0, i32* %p unordered, align 4
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p unordered, align 4
+  ret void
+} 
+
+; Implementation limit - could remove unordered store here, but
+; currently don't.
+define void @dse5(i32* %p) {
+; CHECK-LABEL: dse5
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store atomic i32 0, i32* %p unordered, align 4
+  store atomic i32 0, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back1(i32* %p) {
+; CHECK-LABEL: write_back1
+; CHECK-NEXT: ret
+  %v = load i32, i32* %p
+  store i32 %v, i32* %p
+  ret void
+} 
+
+define void @write_back2(i32* %p) {
+; CHECK-LABEL: write_back2
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store i32 %v, i32* %p
+  ret void
+} 
+
+define void @write_back3(i32* %p) {
+; CHECK-LABEL: write_back3
+; CHECK-NEXT: ret
+  %v = load i32, i32* %p
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+} 
+
+define void @write_back4(i32* %p) {
+; CHECK-LABEL: write_back4
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+} 
+
+; Can't remove store due to ordering side effect
+define void @write_back5(i32* %p) {
+; CHECK-LABEL: write_back5
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p unordered, align 4
+  store atomic i32 %v, i32* %p seq_cst, align 4
+  ret void
+}
+
+define void @write_back6(i32* %p) {
+; CHECK-LABEL: write_back6
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+  %v = load atomic i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
+define void @write_back7(i32* %p) {
+; CHECK-LABEL: write_back7
+; CHECK-NEXT: load
+; CHECK-NEXT: ret
+  %v = load atomic volatile i32, i32* %p seq_cst, align 4
+  store atomic i32 %v, i32* %p unordered, align 4
+  ret void
+}
+
 !0 = !{!4, !4, i64 0}
 !1 = !{!"omnipotent char", !2}
 !2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
index fc35dddcae5a..96f36e8d89c7 100644
--- a/test/Transforms/InstCombine/strto-1.ll
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -1,6 +1,6 @@
 ; Test that the strto* library call simplifiers works correctly.
 ;
-; RUN: opt < %s -instcombine -functionattrs -S | FileCheck %s
+; RUN: opt < %s -instcombine -inferattrs -S | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
diff --git a/test/Transforms/InstCombine/tan-nofastmath.ll b/test/Transforms/InstCombine/tan-nofastmath.ll
new file mode 100644
index 000000000000..0fe7b2c1d522
--- /dev/null
+++ b/test/Transforms/InstCombine/tan-nofastmath.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) {
+entry:
+  %call = call float @atanf(float %x)
+  %call1 = call float @tanf(float %call) 
+  ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK:   %call = call float @atanf(float %x)
+; CHECK-NEXT:   %call1 = call float @tanf(float %call)
+; CHECK-NEXT:   ret float %call1
+; CHECK-NEXT: }
+
+declare float @tanf(float)
+declare float @atanf(float)
diff --git a/test/Transforms/InstCombine/tan.ll b/test/Transforms/InstCombine/tan.ll
new file mode 100644
index 000000000000..15a832f253a9
--- /dev/null
+++ b/test/Transforms/InstCombine/tan.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define float @mytan(float %x) #0 {
+entry:
+  %call = call float @atanf(float %x)
+  %call1 = call float @tanf(float %call)
+  ret float %call1
+}
+
+; CHECK-LABEL: define float @mytan(
+; CHECK:   ret float %x
+
+define float @test2(float ()* %fptr) #0 {
+  %call1 = call float %fptr()
+  %tan = call float @tanf(float %call1)
+  ret float %tan
+}
+
+; CHECK-LABEL: @test2
+; CHECK: tanf
+
+declare float @tanf(float) #0
+declare float @atanf(float) #0
+attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/token.ll b/test/Transforms/InstCombine/token.ll
new file mode 100644
index 000000000000..0929cf7ebee1
--- /dev/null
+++ b/test/Transforms/InstCombine/token.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  unreachable
+
+unreachable:
+  %cl = cleanuppad within none []
+  cleanupret from %cl unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: unreachable:
+; CHECK:   %cl = cleanuppad within none []
+; CHECK:   cleanupret from %cl unwind to caller
+
+define void @test2(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  %X = zext i8 %A to i32
+  invoke void @g(i32 0)
+    to label %cont
+    unwind label %catch
+
+cont:
+  %Y = zext i8 %B to i32
+  invoke void @g(i32 0)
+    to label %unreachable
+    unwind label %catch
+
+catch:
+  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+  %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+  %cl = catchpad within %cs []
+  call void @g(i32 %phi)
+  unreachable
+
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: define void @test2(
+; CHECK:  %X = zext i8 %A to i32
+; CHECK:  %Y = zext i8 %B to i32
+; CHECK:  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ]
+
+define void @test3(i8 %A, i8 %B) personality i32 (...)* @__CxxFrameHandler3 {
+bb:
+  %X = zext i8 %A to i32
+  invoke void @g(i32 0)
+    to label %cont
+    unwind label %catch
+
+cont:
+  %Y = zext i8 %B to i32
+  invoke void @g(i32 0)
+    to label %cont2
+    unwind label %catch
+
+cont2:
+  invoke void @g(i32 0)
+    to label %unreachable
+    unwind label %catch
+
+catch:
+  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+  %cs = catchswitch within none [label %doit] unwind to caller
+
+doit:
+  %cl = catchpad within %cs []
+  call void @g(i32 %phi)
+  unreachable
+
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK:  %X = zext i8 %A to i32
+; CHECK:  %Y = zext i8 %B to i32
+; CHECK:  %phi = phi i32 [ %X, %bb ], [ %Y, %cont ], [ %Y, %cont2 ]
+
+
+declare void @g(i32)
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index ee81cf8c3c5d..38f6b2804d63 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -118,3 +118,45 @@ define i8 @test10(i32 %X) {
 ; CHECK: and
 ; CHECK: ret
 }
+
+; PR25543
+; https://llvm.org/bugs/show_bug.cgi?id=25543
+; This is an extractelement.
+
+define i32 @trunc_bitcast1(<4 x i32> %v) {
+  %bc = bitcast <4 x i32> %v to i128
+  %shr = lshr i128 %bc, 32
+  %ext = trunc i128 %shr to i32
+  ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast1(
+; CHECK-NEXT:  %ext = extractelement <4 x i32> %v, i32 1
+; CHECK-NEXT:  ret i32 %ext
+}
+
+; A bitcast may still be required.
+
+define i32 @trunc_bitcast2(<2 x i64> %v) {
+  %bc = bitcast <2 x i64> %v to i128
+  %shr = lshr i128 %bc, 64
+  %ext = trunc i128 %shr to i32
+  ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast2(
+; CHECK-NEXT:  %bc1 = bitcast <2 x i64> %v to <4 x i32>
+; CHECK-NEXT:  %ext = extractelement <4 x i32> %bc1, i32 2
+; CHECK-NEXT:  ret i32 %ext
+}
+
+; The right shift is optional.
+
+define i32 @trunc_bitcast3(<4 x i32> %v) {
+  %bc = bitcast <4 x i32> %v to i128
+  %ext = trunc i128 %bc to i32
+  ret i32 %ext
+
+; CHECK-LABEL: @trunc_bitcast3(
+; CHECK-NEXT:  %ext = extractelement <4 x i32> %v, i32 0
+; CHECK-NEXT:  ret i32 %ext
+}
+
diff --git a/test/Transforms/InstCombine/unpack-fca.ll b/test/Transforms/InstCombine/unpack-fca.ll
index 48bb157956aa..9b8d10457491 100644
--- a/test/Transforms/InstCombine/unpack-fca.ll
+++ b/test/Transforms/InstCombine/unpack-fca.ll
@@ -5,110 +5,134 @@ target triple = "x86_64-unknown-linux-gnu"
 
 %A__vtbl = type { i8*, i32 (%A*)* }
 %A = type { %A__vtbl* }
+%B = type { i8*, i64 }
 
 @A__vtblZ = constant %A__vtbl { i8* null, i32 (%A*)* @A.foo }
 
 declare i32 @A.foo(%A* nocapture %this)
 
-declare i8* @allocmemory(i64)
-
-define void @storeA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to %A*
+define void @storeA(%A* %a.ptr) {
 ; CHECK-LABEL: storeA
-; CHECK: store %A__vtbl* @A__vtblZ
-  store %A { %A__vtbl* @A__vtblZ }, %A* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store %A { %A__vtbl* @A__vtblZ }, %A* %a.ptr, align 8
   ret void
 }
 
-define void @storeStructOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to { %A }*
+define void @storeB(%B* %b.ptr) {
+; CHECK-LABEL: storeB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret void
+  store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+  ret void
+}
+
+define void @storeStructOfA({ %A }* %sa.ptr) {
 ; CHECK-LABEL: storeStructOfA
-; CHECK: store %A__vtbl* @A__vtblZ
-  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
   ret void
 }
 
-define void @storeArrayOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to [1 x %A]*
+define void @storeArrayOfA([1 x %A]* %aa.ptr) {
 ; CHECK-LABEL: storeArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
-  store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store [1 x %A] [%A { %A__vtbl* @A__vtblZ }], [1 x %A]* %aa.ptr, align 8
   ret void
 }
 
-define void @storeStructOfArrayOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to { [1 x %A] }*
+define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
 ; CHECK-LABEL: storeStructOfArrayOfA
-; CHECK: store %A__vtbl* @A__vtblZ
-  store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %1, align 8
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret void
+  store { [1 x %A] } { [1 x %A] [%A { %A__vtbl* @A__vtblZ }] }, { [1 x %A] }* %saa.ptr, align 8
   ret void
 }
 
-define %A @loadA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to %A*
+define %A @loadA(%A* %a.ptr) {
 ; CHECK-LABEL: loadA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-  %2 = load %A, %A* %1, align 8
-  ret %A %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: ret %A [[IV]]
+  %1 = load %A, %A* %a.ptr, align 8
+  ret %A %1
 }
 
-define { %A } @loadStructOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to { %A }*
+define %B @loadB(%B* %b.ptr) {
+; CHECK-LABEL: loadB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: ret %B [[IV2]]
+  %1 = load %B, %B* %b.ptr, align 8
+  ret %B %1
+}
+
+define { %A } @loadStructOfA({ %A }* %sa.ptr) {
 ; CHECK-LABEL: loadStructOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue { %A } undef, %A {{.*}}, 0
-  %2 = load { %A }, { %A }* %1, align 8
-  ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue { %A } undef, %A [[IV1]], 0
+; CHECK-NEXT: ret { %A } [[IV2]]
+  %1 = load { %A }, { %A }* %sa.ptr, align 8
+  ret { %A } %1
 }
 
-define [1 x %A] @loadArrayOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to [1 x %A]*
+define [1 x %A] @loadArrayOfA([1 x %A]* %aa.ptr) {
 ; CHECK-LABEL: loadArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
-  %2 = load [1 x %A], [1 x %A]* %1, align 8
-  ret [1 x %A] %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds [1 x %A], [1 x %A]* %aa.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: ret [1 x %A] [[IV2]]
+  %1 = load [1 x %A], [1 x %A]* %aa.ptr, align 8
+  ret [1 x %A] %1
 }
 
-define { [1 x %A] } @loadStructOfArrayOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to { [1 x %A] }*
+define { [1 x %A] } @loadStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
 ; CHECK-LABEL: loadStructOfArrayOfA
-; CHECK: load %A__vtbl*,
-; CHECK: insertvalue %A undef, %A__vtbl* {{.*}}, 0
-; CHECK: insertvalue [1 x %A] undef, %A {{.*}}, 0
-; CHECK: insertvalue { [1 x %A] } undef, [1 x %A] {{.*}}, 0
-  %2 = load { [1 x %A] }, { [1 x %A] }* %1, align 8
-  ret { [1 x %A] } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { [1 x %A] }, { [1 x %A] }* %saa.ptr, i64 0, i32 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %A__vtbl*, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %A undef, %A__vtbl* [[LOAD]], 0
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue [1 x %A] undef, %A [[IV1]], 0
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue { [1 x %A] } undef, [1 x %A] [[IV2]], 0
+; CHECK-NEXT: ret { [1 x %A] } [[IV3]]
+  %1 = load { [1 x %A] }, { [1 x %A] }* %saa.ptr, align 8
+  ret { [1 x %A] } %1
 }
 
-define { %A } @structOfA() {
-body:
-  %0 = tail call i8* @allocmemory(i64 32)
-  %1 = bitcast i8* %0 to { %A }*
+define { %A } @structOfA({ %A }* %sa.ptr) {
 ; CHECK-LABEL: structOfA
-; CHECK: store %A__vtbl* @A__vtblZ
-  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %1, align 8
-  %2 = load { %A }, { %A }* %1, align 8
-; CHECK-NOT: load
-; CHECK: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
-  ret { %A } %2
+; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds { %A }, { %A }* %sa.ptr, i64 0, i32 0, i32 0
+; CHECK-NEXT: store %A__vtbl* @A__vtblZ, %A__vtbl** [[GEP]], align 8
+; CHECK-NEXT: ret { %A } { %A { %A__vtbl* @A__vtblZ } }
+  store { %A } { %A { %A__vtbl* @A__vtblZ } }, { %A }* %sa.ptr, align 8
+  %1 = load { %A }, { %A }* %sa.ptr, align 8
+  ret { %A } %1
+}
+
+define %B @structB(%B* %b.ptr) {
+; CHECK-LABEL: structB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 0
+; CHECK-NEXT: store i8* null, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds %B, %B* %b.ptr, i64 0, i32 1
+; CHECK-NEXT: store i64 42, i64* [[GEP2]], align 8
+; CHECK-NEXT: ret %B { i8* null, i64 42 }
+  store %B { i8* null, i64 42 }, %B* %b.ptr, align 8
+  %1 = load %B, %B* %b.ptr, align 8
+  ret %B %1
 }
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 4245c7a3c134..0b9663300c39 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -138,22 +138,6 @@ declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
 
-; <rdar://problem/6945110>
-define <4 x i32> @kernel3_vertical(<4 x i16> * %src, <8 x i16> * %foo) nounwind {
-entry:
-	%tmp = load <4 x i16>, <4 x i16>* %src
-	%tmp1 = load <8 x i16>, <8 x i16>* %foo
-; CHECK: %tmp2 = shufflevector
-	%tmp2 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; pmovzxwd ignores the upper 64-bits of its input; -instcombine should remove this shuffle:
-; CHECK-NOT: shufflevector
-	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: pmovzxwd
-	%0 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
-	ret <4 x i32> %0
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-
 define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
 entry:
 ; CHECK-LABEL: define <4 x float> @dead_shuffle_elt(
@@ -210,130 +194,6 @@ define <4 x float> @test_select(float %f, float %g) {
   ret <4 x float> %ret
 }
 
-; We should optimize these two redundant insertqi into one
-; CHECK: define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertTwice(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-; CHECK-NOT: insertqi
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 32)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 32)
-  ret <2 x i64> %2
-}
-
-; The result of this insert is the second arg, since the top 64 bits of
-; the result are undefined, and we copy the bottom 64 bits from the
-; second arg
-; CHECK: define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
-  ret <2 x i64> %1
-}
-
-; Test the several types of ranges and ordering that exist for two insertqi
-; CHECK: define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 16)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertContainedRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 16)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 16)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertOverlappingRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 16)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 32, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertAdjacentRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: %[[RES:.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 0)
-; CHECK: ret <2 x i64> %[[RES]]
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 32)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 32, i8 0)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK: tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testInsertDisjointRange_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK:  tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-; CHECK:  tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 16, i8 0)
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %i, i8 16, i8 32)
-  ret <2 x i64> %2
-}
-
-; CHECK: define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> %i
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
-  ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
-  ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
-  ret <2 x i64> %1
-}
-
-; CHECK: define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i)
-define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK: ret <2 x i64> undef
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
-  ret <2 x i64> %1
-}
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
-declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
-
 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
 define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
 ; CHECK-LABEL: @test_vpermilvar_ps(
@@ -394,212 +254,15 @@ define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
   ret <4 x double> %a
 }
 
-define <2 x i64> @test_sse2_1() nounwind readnone uwtable {
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-; CHECK: test_sse2_1
-; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+define <2 x i64> @PR24922(<2 x i64> %v) {
+; CHECK-LABEL: @PR24922
+; CHECK: select <2 x i1> 
+;
+; Check that instcombine doesn't wrongly fold the select statement into a
+; ret <2 x i64> %v
+;
+; FIXME: We should be able to simplify the ConstantExpr in the select mask.
+entry:
+  %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
+  ret <2 x i64> %result
 }
-
-define <4 x i64> @test_avx2_1() nounwind readnone uwtable {
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-; CHECK: test_avx2_1
-; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
-}
-
-define <2 x i64> @test_sse2_0() nounwind readnone uwtable {
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-; CHECK: test_sse2_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_0() nounwind readnone uwtable {
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-; CHECK: test_avx2_0
-; CHECK: ret <4 x i64> zeroinitializer
-}
-define <2 x i64> @test_sse2_psrl_1() nounwind readnone uwtable {
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-; CHECK: test_sse2_psrl_1
-; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
-}
-
-define <4 x i64> @test_avx2_psrl_1() nounwind readnone uwtable {
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-; CHECK: test_avx2_psrl_1
-; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
-}
-
-define <2 x i64> @test_sse2_psrl_0() nounwind readnone uwtable {
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-; CHECK: test_sse2_psrl_0
-; CHECK: ret <2 x i64> zeroinitializer
-}
-
-define <4 x i64> @test_avx2_psrl_0() nounwind readnone uwtable {
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-; CHECK: test_avx2_psrl_0
-; CHECK: ret <4 x i64> zeroinitializer
-}
-
-declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
-
-attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index d4d7f167ef07..d2cd2b90abc2 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -310,16 +310,16 @@ define <4 x i32> @shuffle_17addnuw(<4 x i32> %v1, <4 x i32> %v2) nounwind uwtabl
   ret <4 x i32> %r
 }
 
-define <4 x float> @shuffle_17fsub(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
-; CHECK-LABEL: @shuffle_17fsub(
-; CHECK-NOT: shufflevector
-; CHECK: fsub <4 x float> %v1, %v2
-; CHECK: shufflevector
+define <4 x float> @shuffle_17fsub_fast(<4 x float> %v1, <4 x float> %v2) nounwind uwtable {
+; CHECK-LABEL: @shuffle_17fsub_fast(
+; CHECK-NEXT: [[VAR1:%[a-zA-Z0-9.]+]] = fsub fast <4 x float> %v1, %v2
+; CHECK-NEXT: shufflevector <4 x float> [[VAR1]], <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT: ret <4 x float>
   %t1 = shufflevector <4 x float> %v1, <4 x float> zeroinitializer,
                       <4 x i32> <i32 1, i32 2, i32 3, i32 0>
   %t2 = shufflevector <4 x float> %v2, <4 x float> zeroinitializer,
                       <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-  %r = fsub <4 x float> %t1, %t2
+  %r = fsub fast <4 x float> %t1, %t2
   ret <4 x float> %r
 }
 
@@ -406,6 +406,21 @@ define i32 @pr19737(<4 x i32> %in0) {
   ret i32 %rv
 }
 
+; In PR20059 ( http://llvm.org/pr20059 ), shufflevector operations are reordered/removed
+; for an srem operation. This is not a valid optimization because it may cause a trap
+; on div-by-zero.
+
+define <4 x i32> @pr20059(<4 x i32> %p1, <4 x i32> %p2) {
+; CHECK-LABEL: @pr20059(
+; CHECK-NEXT: %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: %retval = srem <4 x i32> %splat1, %splat2
+  %splat1 = shufflevector <4 x i32> %p1, <4 x i32> undef, <4 x i32> zeroinitializer
+  %splat2 = shufflevector <4 x i32> %p2, <4 x i32> undef, <4 x i32> zeroinitializer
+  %retval = srem <4 x i32> %splat1, %splat2
+  ret <4 x i32> %retval
+}
+
 define <4 x i32> @pr20114(<4 x i32> %__mask) {
 ; CHECK-LABEL: @pr20114
 ; CHECK: shufflevector
diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll
index d76a7d56cc7a..1b80ffd101c9 100644
--- a/test/Transforms/InstCombine/vector_gep2.ll
+++ b/test/Transforms/InstCombine/vector_gep2.ll
@@ -9,3 +9,26 @@ define <2 x i8*> @testa(<2 x i8*> %a) {
 ; CHECK: getelementptr i8, <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
   ret <2 x i8*> %g
 }
+
+define <8 x double*> @vgep_s_v8i64(double* %a, <8 x i64>%i) {
+; CHECK-LABEL: @vgep_s_v8i64
+; CHECK: getelementptr double, double* %a, <8 x i64> %i
+  %VectorGep = getelementptr double, double* %a, <8 x i64> %i
+  ret <8 x double*> %VectorGep
+}
+
+define <8 x double*> @vgep_s_v8i32(double* %a, <8 x i32>%i) {
+; CHECK-LABEL: @vgep_s_v8i32
+; CHECK: %1 = sext <8 x i32> %i to <8 x i64>
+; CHECK: getelementptr double, double* %a, <8 x i64> %1
+  %VectorGep = getelementptr double, double* %a, <8 x i32> %i
+  ret <8 x double*> %VectorGep
+}
+
+define <8 x i8*> @vgep_v8iPtr_i32(<8 x i8*> %a, i32 %i) {
+; CHECK-LABEL: @vgep_v8iPtr_i32
+; CHECK:  %1 = sext i32 %i to i64
+; CHECK:  %VectorGep = getelementptr i8, <8 x i8*> %a, i64 %1
+  %VectorGep = getelementptr i8, <8 x i8*> %a, i32 %i
+  ret <8 x i8*> %VectorGep
+}
diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll
new file mode 100644
index 000000000000..e10b339907e3
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
+declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
+
+;
+; Vector Demanded Bits
+;
+
+; Only bottom 4 elements required.
+define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_128
+; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT: ret <4 x float> %1
+  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
+  ret <4 x float> %2
+}
+
+; All 8 elements required.
+define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
+; CHECK-LABEL: @demand_vcvtph2ps_256
+; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+; CHECK-NEXT: ret <8 x float> %2
+  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
+  ret <8 x float> %2
+}
+
+;
+; Constant Folding
+;
+
+define <4 x float> @fold_vcvtph2ps_128() {
+; CHECK-LABEL: @fold_vcvtph2ps_128
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+  ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256() {
+; CHECK-LABEL: @fold_vcvtph2ps_256
+; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
+  ret <8 x float> %1
+}
+
+define <4 x float> @fold_vcvtph2ps_128_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  ret <4 x float> %1
+}
+
+define <8 x float> @fold_vcvtph2ps_256_zero() {
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero
+; CHECK-NEXT: ret <8 x float> zeroinitializer
+  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  ret <8 x float> %1
+}
diff --git a/test/Transforms/InstCombine/x86-pmovsx.ll b/test/Transforms/InstCombine/x86-pmovsx.ll
new file mode 100644
index 000000000000..31bdc59b16a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovsx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32>  @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+declare <8 x i16>  @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32>  @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+declare <8 x i32>  @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic sign extension tests
+;
+
+define <4 x i32> @sse41_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbd
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  sext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT:  ret <4 x i32> %2
+
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %v)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbq
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  sext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %v)
+  ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxbw
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  sext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT:  ret <8 x i16> %2
+
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %v)
+  ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxdq
+; CHECK-NEXT:  shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  sext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %v)
+  ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwd
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  sext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT:  ret <4 x i32> %2
+
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %v)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovsxwq
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  sext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %v)
+  ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbd
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  sext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT:  ret <8 x i32> %2
+
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v)
+  ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbq
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  sext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %2
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v)
+  ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxbw
+; CHECK-NEXT:  sext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT:  ret <16 x i16> %1
+
+  %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v)
+  ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxdq
+; CHECK-NEXT:  sext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %1
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v)
+  ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwd
+; CHECK-NEXT:  sext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT:  ret <8 x i32> %1
+
+  %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v)
+  ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovsxwq
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  sext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %2
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v)
+  ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pmovzx.ll b/test/Transforms/InstCombine/x86-pmovzx.ll
new file mode 100644
index 000000000000..31028cba26eb
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pmovzx.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare <4 x i32>  @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+declare <8 x i16>  @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+declare <4 x i32>  @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+declare <8 x i32>  @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+declare <8 x i32>  @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+declare <4 x i64>  @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+;
+; Basic zero extension tests
+;
+
+define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbd
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  zext <4 x i8> %1 to <4 x i32>
+; CHECK-NEXT:  ret <4 x i32> %2
+
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbq
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  zext <2 x i8> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v)
+  ret <2 x i64> %res
+}
+
+define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxbw
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  zext <8 x i8> %1 to <8 x i16>
+; CHECK-NEXT:  ret <8 x i16> %2
+
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v)
+  ret <8 x i16> %res
+}
+
+define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxdq
+; CHECK-NEXT:  shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  zext <2 x i32> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v)
+  ret <2 x i64> %res
+}
+
+define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwd
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  zext <4 x i16> %1 to <4 x i32>
+; CHECK-NEXT:  ret <4 x i32> %2
+
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v)
+  ret <4 x i32> %res
+}
+
+define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @sse41_pmovzxwq
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:  zext <2 x i16> %1 to <2 x i64>
+; CHECK-NEXT:  ret <2 x i64> %2
+
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v)
+  ret <2 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbd
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  zext <8 x i8> %1 to <8 x i32>
+; CHECK-NEXT:  ret <8 x i32> %2
+
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v)
+  ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbq
+; CHECK-NEXT:  shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  zext <4 x i8> %1 to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %2
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v)
+  ret <4 x i64> %res
+}
+
+define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxbw
+; CHECK-NEXT:  zext <16 x i8> %v to <16 x i16>
+; CHECK-NEXT:  ret <16 x i16> %1
+
+  %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v)
+  ret <16 x i16> %res
+}
+
+define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxdq
+; CHECK-NEXT:  zext <4 x i32> %v to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %1
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v)
+  ret <4 x i64> %res
+}
+
+define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwd
+; CHECK-NEXT:  zext <8 x i16> %v to <8 x i32>
+; CHECK-NEXT:  ret <8 x i32> %1
+
+  %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v)
+  ret <8 x i32> %res
+}
+
+define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone {
+; CHECK-LABEL: @avx2_pmovzxwq
+; CHECK-NEXT:  shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  zext <4 x i16> %1 to <4 x i64>
+; CHECK-NEXT:  ret <4 x i64> %2
+
+  %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v)
+  ret <4 x i64> %res
+}
diff --git a/test/Transforms/InstCombine/x86-pshufb.ll b/test/Transforms/InstCombine/x86-pshufb.ll
new file mode 100644
index 000000000000..caaaed8910a8
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-pshufb.ll
@@ -0,0 +1,267 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <16 x i8> @identity_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test
+; CHECK: ret <16 x i8> %InVec
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2
+; CHECK: ret <32 x i8> %InVec
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+
+; Verify that instcombine is able to fold byte shuffles with zero masks.
+
+define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector
+; CHECK: ret <16 x i8> zeroinitializer
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2
+; CHECK: ret <32 x i8> zeroinitializer
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <16 x i8> @splat_test(<16 x i8> %InVec) {
+; CHECK-LABEL: @splat_test
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
+  ret <16 x i8> %1
+}
+
+; In the test case below, elements in the low 128-bit lane of the result
+; vector are equal to the lower byte of %InVec (shuffle index 0).
+; Elements in the high 128-bit lane of the result vector are equal to
+; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
+
+define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @splat_test_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
+  ret <32 x i8> %1
+}
+
+; Each of the byte shuffles in the following tests is equivalent to a blend between
+; vector %InVec and a vector of all zeroes.
+
+define <16 x i8> @blend1(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend1
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend2(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend2
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend3(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend3
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend4(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend4
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend5(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend5
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @blend6(<16 x i8> %InVec) {
+; CHECK-LABEL: @blend6
+; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend4_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend5_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @blend6_avx2
+; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+; movq idiom.
+define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @movq_idiom_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
+  ret <32 x i8> %1
+}
+
+; Vector permutations using byte shuffles.
+
+define <16 x i8> @permute1(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute1
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @permute2(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute2
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute1_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
+  ret <32 x i8> %1
+}
+
+define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute2_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
+  ret <32 x i8> %1
+}
+
+; Test that instcombine correctly folds a pshufb with values that
+; are not -128 and that are not encoded in four bits.
+
+define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @identity_test2_2
+; CHECK: ret <16 x i8> %InVec
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @identity_test_avx2_2
+; CHECK: ret <32 x i8> %InVec
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
+  ret <32 x i8> %1
+}
+
+define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_2
+; CHECK: ret <16 x i8> zeroinitializer
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_to_zero_vector_avx2_2
+; CHECK: ret <32 x i8> zeroinitializer
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
+  ret <32 x i8> %1
+}
+
+define <16 x i8> @permute3(<16 x i8> %InVec) {
+; CHECK-LABEL: @permute3
+; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+
+  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
+  ret <16 x i8> %1
+}
+
+define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @permute3_avx2
+; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+
+  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
+  ret <32 x i8> %1
+}
+
+
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
+declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll
new file mode 100644
index 000000000000..815d26bd2254
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@@ -0,0 +1,336 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;
+; EXTRQ
+;
+
+define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg0
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_zero_arg1
+; CHECK-NEXT: ret <2 x i64> %x
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_to_extqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
+  ret <2 x i64> %1
+}
+
+;
+; EXTRQI
+;
+
+define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
+; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %3
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_undef
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_zero
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQ
+;
+
+define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_call
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_to_insertqi
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant
+; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_constant_undef
+; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
+  ret <2 x i64> %1
+}
+
+;
+; INSERTQI
+;
+
+define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_04uu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
+; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> %1
+  %1 = bitcast <16 x i8> %v to <2 x i64>
+  %2 = bitcast <16 x i8> %i to <2 x i64>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
+  %4 = bitcast <2 x i64> %3 to <16 x i8>
+  ret <16 x i8> %4
+}
+
+define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @test_insertqi_constant
+; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
+  ret <2 x i64> %1
+}
+
+; The result of this insert is the second arg, since the top 64 bits of
+; the result are undefined, and we copy the bottom 64 bits from the
+; second arg
+define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testInsert64Bits
+; CHECK-NEXT: ret <2 x i64> %i
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testZeroLength
+; CHECK-NEXT: ret <2 x i64> %i
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_1
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_2
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
+; CHECK-LABEL: @testUndefinedInsertq_3
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
+  ret <2 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
+; CHECK-LABEL: @test_extrq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
+; CHECK-LABEL: @test_extrqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertq_ret
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg0
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_arg1
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_args01
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: @test_insertqi_ret
+; CHECK-NEXT: ret <2 x i64> undef
+  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
+  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %2
+}
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
+declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
+declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
+declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
+
+; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
+declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind
diff --git a/test/Transforms/InstCombine/x86-vector-shifts.ll b/test/Transforms/InstCombine/x86-vector-shifts.ll
new file mode 100644
index 000000000000..59e445a40bef
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-vector-shifts.ll
@@ -0,0 +1,1318 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; ASHR - Immediate
+;
+
+define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrai_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrai_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+;
+; LSHR - Immediate
+;
+
+define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+;
+; SHL - Immediate
+;
+
+define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_pslli_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_pslli_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_pslli_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_pslli_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_pslli_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_pslli_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
+  ret <4 x i64> %1
+}
+
+;
+; ASHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psra_w_64
+; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psra_d_64
+; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_15_splat
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psra_w_64
+; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_15_splat
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psra_d_64
+; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+;
+; LSHR - Constant Vector
+;
+
+define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psrl_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psrl_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psrl_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15
+; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psrl_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15
+; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrl_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_15
+; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrl_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+;
+; SHL - Constant Vector
+;
+
+define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_0
+; CHECK-NEXT: ret <8 x i16> %v
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15
+; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_15_splat
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
+; CHECK-LABEL: @sse2_psll_w_64
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <8 x i16> %1
+}
+
+define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_0
+; CHECK-NEXT: ret <4 x i32> %v
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15
+; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_15_splat
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
+; CHECK-LABEL: @sse2_psll_d_64
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <4 x i32> %1
+}
+
+define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_0
+; CHECK-NEXT: ret <2 x i64> %v
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_15
+; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
+; CHECK-LABEL: @sse2_psll_q_64
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <2 x i64> %1
+}
+
+define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_0
+; CHECK-NEXT: ret <16 x i16> %v
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15
+; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_15_splat
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  ret <16 x i16> %1
+}
+
+define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
+; CHECK-LABEL: @avx2_psll_w_64
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
+  ret <16 x i16> %1
+}
+
+define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_0
+; CHECK-NEXT: ret <8 x i32> %v
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15
+; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_15_splat
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  ret <8 x i32> %1
+}
+
+define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psll_d_64
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
+  ret <8 x i32> %1
+}
+
+define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_0
+; CHECK-NEXT: ret <4 x i64> %v
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_15
+; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psll_q_64
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
+  ret <4 x i64> %1
+}
+
+;
+; Vector Demanded Bits
+;
+
+define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psra_w_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <8 x i16> %2
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <8 x i16>
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
+  ret <8 x i16> %3
+}
+
+define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psra_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psra_d_var_bc
+; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <4 x i32> %2
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = bitcast <8 x i16> %1 to <4 x i32>
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
+  ret <4 x i32> %3
+}
+
+define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psra_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psra_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
+; CHECK-LABEL: @avx2_psrl_w_var_bc
+; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
+; CHECK-NEXT: ret <16 x i16> %2
+  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %2 = bitcast <16 x i8> %1 to <8 x i16>
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
+  ret <16 x i16> %3
+}
+
+define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_d_var_bc
+; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
+; CHECK-NEXT: ret <8 x i32> %2
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = bitcast <2 x i64> %1 to <4 x i32>
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
+  ret <8 x i32> %3
+}
+
+define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psrl_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @sse2_psll_w_var
+; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
+  ret <8 x i16> %2
+}
+
+define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @sse2_psll_d_var
+; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
+  ret <4 x i32> %2
+}
+
+define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @sse2_psll_q_var
+; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> %1
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
+  ret <2 x i64> %2
+}
+
+define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
+; CHECK-LABEL: @avx2_psll_w_var
+; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> %1
+  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
+; CHECK-LABEL: @avx2_psll_d_var
+; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> %1
+  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
+; CHECK-LABEL: @avx2_psll_q_var
+; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> %1
+  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
+  ret <4 x i64> %2
+}
+
+;
+; Constant Folding
+;
+
+define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
+; CHECK-LABEL: @test_sse2_psra_w_0
+; CHECK-NEXT: ret <8 x i16> %A
+  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
+  ret <8 x i16> %3
+}
+
+define <8 x i16> @test_sse2_psra_w_8() {
+; CHECK-LABEL: @test_sse2_psra_w_8
+; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
+  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
+  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
+  ret <8 x i16> %4
+}
+
+define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
+; CHECK-LABEL: @test_sse2_psra_d_0
+; CHECK-NEXT: ret <4 x i32> %A
+  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
+  ret <4 x i32> %3
+}
+
+define <4 x i32> @sse2_psra_d_8() {
+; CHECK-LABEL: @sse2_psra_d_8
+; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
+  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
+  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
+  ret <4 x i32> %4
+}
+
+define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_0
+; CHECK-NEXT: ret <16 x i16> %A
+  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
+  ret <16 x i16> %3
+}
+
+define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
+; CHECK-LABEL: @test_avx2_psra_w_8
+; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
+  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
+  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
+  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
+  ret <16 x i16> %4
+}
+
+define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
+; CHECK-LABEL: @test_avx2_psra_d_0
+; CHECK-NEXT: ret <8 x i32> %A
+  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
+  ret <8 x i32> %3
+}
+
+define <8 x i32> @test_avx2_psra_d_8() {
+; CHECK-LABEL: @test_avx2_psra_d_8
+; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
+  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
+  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
+  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
+  ret <8 x i32> %4
+}
+
+define <2 x i64> @test_sse2_1() {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_1
+; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+}
+
+define <4 x i64> @test_avx2_1() {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_1
+; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+}
+
+define <2 x i64> @test_sse2_0() {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_0() {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+define <2 x i64> @test_sse2_psrl_1() {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_psrl_1
+; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+}
+
+define <4 x i64> @test_avx2_psrl_1() {
+  %S = bitcast i32 1 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_psrl_1
+; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+}
+
+define <2 x i64> @test_sse2_psrl_0() {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
+  %6 = bitcast <8 x i16> %5 to <4 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <4 x i32> %8 to <2 x i64>
+  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <2 x i64> %10 to <8 x i16>
+  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
+  %13 = bitcast <8 x i16> %12 to <4 x i32>
+  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
+  %15 = bitcast <4 x i32> %14 to <2 x i64>
+  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
+  ret <2 x i64> %16
+; CHECK: test_sse2_psrl_0
+; CHECK: ret <2 x i64> zeroinitializer
+}
+
+define <4 x i64> @test_avx2_psrl_0() {
+  %S = bitcast i32 128 to i32
+  %1 = zext i32 %S to i64
+  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
+  %3 = insertelement <2 x i64> %2, i64 0, i32 1
+  %4 = bitcast <2 x i64> %3 to <8 x i16>
+  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
+  %6 = bitcast <16 x i16> %5 to <8 x i32>
+  %7 = bitcast <2 x i64> %3 to <4 x i32>
+  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
+  %9 = bitcast <8 x i32> %8 to <4 x i64>
+  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
+  %11 = bitcast <4 x i64> %10 to <16 x i16>
+  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
+  %13 = bitcast <16 x i16> %12 to <8 x i32>
+  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
+  %15 = bitcast <8 x i32> %14 to <4 x i64>
+  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
+  ret <4 x i64> %16
+; CHECK: test_avx2_psrl_0
+; CHECK: ret <4 x i64> zeroinitializer
+}
+
+declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
+
+declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
+declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
+declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
+
+declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
+declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
+declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
+declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll
new file mode 100644
index 000000000000..176c504989df
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-xop.ll
@@ -0,0 +1,209 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_slt_v2i64
+; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ult_v2i64
+; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_sle_v2i64
+; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @cmp_ule_v2i64
+; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> %2
+  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %1
+}
+
+define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sgt_v4i32
+; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_ugt_v4i32
+; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_sge_v4i32
+; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @cmp_uge_v4i32
+; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %2
+  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_seq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_ueq_v8i16
+; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_sne_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: @cmp_une_v8i16
+; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> %2
+  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %1
+}
+
+define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_strue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_utrue_v16i8
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_sfalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: @cmp_ufalse_v16i8
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %1
+}
+
+declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
+declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index 33d5a2a9fda5..c8debcbac226 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -63,8 +63,8 @@ define i32 @test7(i32 %A, i32 %B) {
 ; CHECK-LABEL: @test7(
 ; CHECK-NEXT: %A1 = and i32 %A, 7
 ; CHECK-NEXT: %B1 = and i32 %B, 128
-; CHECK-NEXT: %C1.1 = or i32 %A1, %B1
-; CHECK-NEXT: ret i32 %C1.1
+; CHECK-NEXT: %C11 = or i32 %A1, %B1
+; CHECK-NEXT: ret i32 %C11
 	%A1 = and i32 %A, 7		; <i32> [#uses=1]
 	%B1 = and i32 %B, 128		; <i32> [#uses=1]
 	%C1 = xor i32 %A1, %B1		; <i32> [#uses=1]
@@ -96,8 +96,8 @@ define i1 @test9(i8 %A) {
 define i8 @test10(i8 %A) {
 ; CHECK-LABEL: @test10(
 ; CHECK-NEXT: %B = and i8 %A, 3
-; CHECK-NEXT: %C.1 = or i8 %B, 4
-; CHECK-NEXT: ret i8 %C.1
+; CHECK-NEXT: %C1 = or i8 %B, 4
+; CHECK-NEXT: ret i8 %C1
 	%B = and i8 %A, 3		; <i8> [#uses=1]
 	%C = xor i8 %B, 4		; <i8> [#uses=1]
 	ret i8 %C
diff --git a/test/Transforms/InstSimplify/add-mask.ll b/test/Transforms/InstSimplify/add-mask.ll
new file mode 100644
index 000000000000..1e53cc5bc7fa
--- /dev/null
+++ b/test/Transforms/InstSimplify/add-mask.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+define i1 @test(i32 %a) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 false
+  %rhs = add i32 %a, -1
+  %and = and i32 %a, %rhs
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
+
+define i1 @test2(i32 %a) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+  %rhs = add i32 %a, 1
+  %and = and i32 %a, %rhs
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
+
+define i1 @test3(i32 %a) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+  %rhs = add i32 %a, 7
+  %and = and i32 %a, %rhs
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
+
+@B = external global i32
+declare void @llvm.assume(i1)
+
+; Known bits without a constant
+define i1 @test4(i32 %a) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+  %b = load i32, i32* @B
+  %b.and = and i32 %b, 1
+  %b.cnd = icmp eq i32 %b.and, 1
+  call void @llvm.assume(i1 %b.cnd)
+
+  %rhs = add i32 %a, %b
+  %and = and i32 %a, %rhs
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
+
+; Negative test - even number
+define i1 @test5(i32 %a) {
+; CHECK-LABEL: @test5
+; CHECK: ret i1 %res
+  %rhs = add i32 %a, 2
+  %and = and i32 %a, %rhs
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
+
+define i1 @test6(i32 %a) {
+; CHECK-LABEL: @test6
+; CHECK: ret i1 false
+  %lhs = add i32 %a, -1
+  %and = and i32 %lhs, %a
+  %res = icmp eq i32 %and, 1
+  ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
index 5d314db7133d..36844289aaf0 100644
--- a/test/Transforms/InstSimplify/apint-or.ll
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instsimplify -S | not grep or
+; RUN: opt < %s -instsimplify -S | FileCheck %s
 
 ; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
 define i39 @test1(i39 %V, i39 %M) {
@@ -12,14 +12,28 @@ define i39 @test1(i39 %V, i39 %M) {
     %D = and i39 %V, 274877906943
     %R = or i39 %B, %D
     ret i39 %R
-; CHECK-LABEL @test1
+; CHECK-LABEL: @test1
 ; CHECK-NEXT: and {{.*}}, -274877906944
 ; CHECK-NEXT: add
 ; CHECK-NEXT: ret
 }
 
+define i7 @test2(i7 %X) {
+    %Y = or i7 %X, 0
+    ret i7 %Y
+; CHECK-LABEL: @test2
+; CHECK-NEXT: ret i7 %X
+}
+
+define i17 @test3(i17 %X) {
+    %Y = or i17 %X, -1
+    ret i17 %Y
+; CHECK-LABEL: @test3
+; CHECK-NEXT: ret i17 -1
+}
+
 ; Test the case where Integer BitWidth > 64 && BitWidth <= 1024. 
-define i399 @test2(i399 %V, i399 %M) {
+define i399 @test4(i399 %V, i399 %M) {
     ;; If we have: ((V + N) & C1) | (V & C2)
     ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
     ;; replace with V+N.
@@ -30,8 +44,22 @@ define i399 @test2(i399 %V, i399 %M) {
     %D = and i399 %V, 274877906943
     %R = or i399 %B, %D
     ret i399 %R
-; CHECK-LABEL @test2
+; CHECK-LABEL: @test4
 ; CHECK-NEXT: and {{.*}}, 18446742974197923840
 ; CHECK-NEXT: add
 ; CHECK-NEXT: ret
 }
+
+define i777 @test5(i777 %X) {
+    %Y = or i777 %X, 0
+    ret i777 %Y
+; CHECK-LABEL: @test5
+; CHECK-NEXT: ret i777 %X
+}
+
+define i117 @test6(i117 %X) {
+    %Y = or i117 %X, -1
+    ret i117 %Y
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret i117 -1
+}
diff --git a/test/Transforms/InstSimplify/bswap.ll b/test/Transforms/InstSimplify/bswap.ll
new file mode 100644
index 000000000000..7bc3af9e307f
--- /dev/null
+++ b/test/Transforms/InstSimplify/bswap.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -S -instsimplify | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+
+define i1 @test1(i16 %arg) {
+; CHECK-LABEL: @test1
+; CHECK: ret i1 false
+  %a = or i16 %arg, 1
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %res = icmp eq i16 %b, 0
+  ret i1 %res
+}
+
+define i1 @test2(i16 %arg) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 false
+  %a = or i16 %arg, 1024
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %res = icmp eq i16 %b, 0
+  ret i1 %res
+}
+
+define i1 @test3(i16 %arg) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 false
+  %a = and i16 %arg, 1
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 1
+  %res = icmp eq i16 %and, 1
+  ret i1 %res
+}
+
+define i1 @test4(i16 %arg) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 false
+  %a = and i16 %arg, 511
+  %b = call i16 @llvm.bswap.i16(i16 %a)
+  %and = and i16 %b, 256
+  %res = icmp eq i16 %and, 1
+  ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 07c90d8f1eb8..6e66fbfede9f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -1164,3 +1164,11 @@ define i1 @tautological8(i32 %A, i32 %B) {
 ; CHECK-LABEL: @tautological8(
 ; CHECK: ret i1 false
 }
+
+define i1 @tautological9(i32 %x) {
+  %add = add nuw i32 %x, 13
+  %cmp = icmp ne i32 %add, 12
+  ret i1 %cmp
+; CHECK-LABEL: @tautological9(
+; CHECK: ret i1 true
+}
diff --git a/test/Transforms/InstSimplify/implies.ll b/test/Transforms/InstSimplify/implies.ll
new file mode 100644
index 000000000000..2e3c9591b079
--- /dev/null
+++ b/test/Transforms/InstSimplify/implies.ll
@@ -0,0 +1,217 @@
+; RUN: opt -S %s -instsimplify | FileCheck %s
+
+; A ==> A -> true
+define i1 @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+; CHECK: ret i1 true
+  %var29 = icmp slt i32 %i, %length.i
+  %res = icmp uge i1 %var29, %var29
+  ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+define i1 @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+; CHECK: ret i1 true
+  %iplus1 = add nsw i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; i + C_{>0} <s L ==> i <s L -> unknown without the nsw
+define i1 @test2_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg
+; CHECK:   ret i1 %res
+  %iplus1 = add i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; sle is not implication
+define i1 @test2_neg2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg2
+; CHECK:   ret i1 %res
+  %iplus1 = add i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp sle i1 %var30, %var29
+  ret i1 %res
+}
+
+; The binary operator has to be an add
+define i1 @test2_neg3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2_neg3
+; CHECK:   ret i1 %res
+  %iplus1 = sub nsw i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; i +_{nsw} C_{>0} <s L ==> i <s L -> true
+; With an inverted conditional (ule B A rather than canonical ugt A B
+define i1 @test3(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test3
+; CHECK: ret i1 true
+  %iplus1 = add nsw i32 %i, 1
+  %var29 = icmp slt i32 %i, %length.i
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp uge i1 %var29, %var30
+  ret i1 %res
+}
+
+; i +_{nuw} C <u L ==> i <u L
+define i1 @test4(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test4
+; CHECK: ret i1 true
+  %iplus1 = add nuw i32 %i, 1
+  %var29 = icmp ult i32 %i, %length.i
+  %var30 = icmp ult i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; A ==> A for vectors
+define <4 x i1> @test5(<4 x i1> %vec) {
+; CHECK-LABEL: @test5
+; CHECK: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+  %res = icmp ule <4 x i1> %vec, %vec
+  ret <4 x i1> %res
+}
+
+; Don't crash on vector inputs - pr25040
+define <4 x i1> @test6(<4 x i1> %a, <4 x i1> %b) {
+; CHECK-LABEL: @test6
+; CHECK: ret <4 x i1> %res
+  %res = icmp ule <4 x i1> %a, %b
+  ret <4 x i1> %res
+}
+
+; i +_{nsw} 1 <s L  ==> i < L +_{nsw} 1
+define i1 @test7(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test7(
+; CHECK: ret i1 true
+  %iplus1 = add nsw i32 %i, 1
+  %len.plus.one = add nsw i32 %length.i, 1
+  %var29 = icmp slt i32 %i, %len.plus.one
+  %var30 = icmp slt i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; i +_{nuw} 1 <s L  ==> i < L +_{nuw} 1
+define i1 @test8(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test8(
+; CHECK: ret i1 true
+  %iplus1 = add nuw i32 %i, 1
+  %len.plus.one = add nuw i32 %length.i, 1
+  %var29 = icmp ult i32 %i, %len.plus.one
+  %var30 = icmp ult i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+; i +_{nuw} C <s L ==> i < L, even if C is negative
+define i1 @test9(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test9(
+; CHECK: ret i1 true
+  %iplus1 = add nuw i32 %i, -100
+  %var29 = icmp ult i32 %i, %length.i
+  %var30 = icmp ult i32 %iplus1, %length.i
+  %res = icmp ule i1 %var30, %var29
+  ret i1 %res
+}
+
+define i1 @test10(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test10(
+; CHECK:  ret i1 true
+
+  %x = and i32 %x.full, 4294901760  ;; 4294901760 == 0xffff0000
+  %large = or i32 %x, 100
+  %small = or i32 %x, 90
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+define i1 @test11(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+  %large = or i32 %x, 100
+  %small = or i32 %x, 90
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+define i1 @test12(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test12(
+; CHECK: %res = icmp ule i1 %known, %to.prove
+; CHECK: ret i1 %res
+
+  %x = and i32 %x.full, 4294901760  ;; 4294901760 == 0xffff0000
+  %large = or i32 %x, 65536 ;; 65536 == 0x00010000
+  %small = or i32 %x, 90
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+define i1 @test13(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test13(
+; CHECK:  ret i1 true
+
+  %large = add nuw i32 %x, 100
+  %small = add nuw i32 %x, 90
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+define i1 @test14(i32 %length.i, i32 %x.full) {
+; CHECK-LABEL: @test14(
+; CHECK:  ret i1 true
+
+  %x = and i32 %x.full, 4294905615  ;; 4294905615 == 0xffff0f0f
+  %large = or i32 %x, 8224 ;; == 0x2020
+  %small = or i32 %x, 4112 ;; == 0x1010
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+define i1 @test15(i32 %length.i, i32 %x) {
+; CHECK-LABEL: @test15(
+; CHECK:  %res = icmp ule i1 %known, %to.prove
+; CHECK:  ret i1 %res
+
+  %large = add nuw i32 %x, 100
+  %small = add nuw i32 %x, 110
+  %known = icmp ult i32 %large, %length.i
+  %to.prove = icmp ult i32 %small, %length.i
+  %res = icmp ule i1 %known, %to.prove
+  ret i1 %res
+}
+
+; X >=(s) Y == X ==> Y (i1 1 becomes -1 for reasoning)
+define i1 @test_sge(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_sge
+; CHECK: ret i1 true
+  %iplus1 = add nsw nuw i32 %i, 1
+  %var29 = icmp ult i32 %i, %length.i
+  %var30 = icmp ult i32 %iplus1, %length.i
+  %res = icmp sge i1 %var30, %var29
+  ret i1 %res
+}
diff --git a/test/Transforms/InstSimplify/shift-128-kb.ll b/test/Transforms/InstSimplify/shift-128-kb.ll
new file mode 100644
index 000000000000..3f69ecccaf5b
--- /dev/null
+++ b/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+entry:
+  %conv = zext i32 %IntegerBitWidth to i64
+  %sub = sub i64 128, %conv
+  %conv1 = trunc i64 %sub to i32
+  %conv2 = zext i64 %Val to i128
+  %sh_prom = zext i32 %conv1 to i128
+  %shl = shl i128 %conv2, %sh_prom
+  %shr = ashr i128 %shl, %sh_prom
+  %cmp = icmp slt i128 %shr, 0
+  ret i1 %cmp
+}
+
+; CHECK-LABEL: @_Z10isNegativemj
+; CHECK-NOT: ret i1 false
+; CHECK: ret i1 %cmp
+
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
index b0dc8731a112..edabcc314ea6 100644
--- a/test/Transforms/InstSimplify/shr-nop.ll
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -244,7 +244,7 @@ define i1 @ashr_ne_opposite_msb(i8 %a) {
 }
 
 ; CHECK-LABEL: @exact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
 define i1 @exact_ashr_eq_shift_gt(i8 %a) {
  %shr = ashr exact i8 -2, %a
  %cmp = icmp eq i8 %shr, -8
@@ -252,7 +252,7 @@ define i1 @exact_ashr_eq_shift_gt(i8 %a) {
 }
 
 ; CHECK-LABEL: @exact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
 define i1 @exact_ashr_ne_shift_gt(i8 %a) {
  %shr = ashr exact i8 -2, %a
  %cmp = icmp ne i8 %shr, -8
@@ -260,7 +260,7 @@ define i1 @exact_ashr_ne_shift_gt(i8 %a) {
 }
 
 ; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
 define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
  %shr = ashr i8 -2, %a
  %cmp = icmp eq i8 %shr, -8
@@ -268,7 +268,7 @@ define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
 }
 
 ; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
 define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
  %shr = ashr i8 -2, %a
  %cmp = icmp ne i8 %shr, -8
@@ -292,7 +292,7 @@ define i1 @exact_lshr_ne_shift_gt(i8 %a) {
 }
 
 ; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
-; CHECK-NEXT : ret i1 false
+; CHECK-NEXT: ret i1 false
 define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
  %shr = lshr i8 2, %a
  %cmp = icmp eq i8 %shr, 8
@@ -300,7 +300,7 @@ define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
 }
 
 ; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
-; CHECK-NEXT : ret i1 true
+; CHECK-NEXT: ret i1 true
 define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
  %shr = ashr i8 2, %a
  %cmp = icmp ne i8 %shr, 8
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index c50b6fc61c8e..58f3c1d09cc2 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -3,11 +3,11 @@
 @A = global i32 0
 ; CHECK: @A = internal global i32 0
 
-@B = alias i32* @A
-; CHECK: @B = internal alias i32* @A
+@B = alias i32, i32* @A
+; CHECK: @B = internal alias i32, i32* @A
 
-@C = alias i32* @A
-; CHECK: @C = internal alias i32* @A
+@C = alias i32, i32* @A
+; CHECK: @C = internal alias i32, i32* @A
 
 define i32 @main() {
 	%tmp = load i32, i32* @C
diff --git a/test/Transforms/Internalize/comdat.ll b/test/Transforms/Internalize/comdat.ll
new file mode 100644
index 000000000000..ac536f7eb656
--- /dev/null
+++ b/test/Transforms/Internalize/comdat.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -internalize -internalize-public-api-list c1 -internalize-public-api-list c2 -internalize-public-api-list c3 -internalize-public-api-list c4 -S | FileCheck %s
+
+$c1 = comdat any
+$c2 = comdat any
+$c3 = comdat any
+$c4 = comdat any
+
+; CHECK: @c1_c = global i32 0, comdat($c1)
+@c1_c = global i32 0, comdat($c1)
+
+; CHECK: @c2_b = internal global i32 0{{$}}
+@c2_b = global i32 0, comdat($c2)
+
+; CHECK: @c3 = global i32 0, comdat{{$}}
+@c3 = global i32 0, comdat
+
+; CHECK: @c4_a = internal global i32 0, comdat($c4)
+@c4_a = internal global i32 0, comdat($c4)
+
+; CHECK: @c1_d = alias i32, i32* @c1_c
+@c1_d = alias i32, i32* @c1_c
+
+; CHECK: @c2_c = internal alias i32, i32* @c2_b
+@c2_c = alias i32, i32* @c2_b
+
+; CHECK: @c4 = alias i32, i32* @c4_a
+@c4 = alias i32, i32* @c4_a
+
+; CHECK: define void @c1() comdat {
+define void @c1() comdat {
+  ret void
+}
+
+; CHECK: define void @c1_a() comdat($c1) {
+define void @c1_a() comdat($c1) {
+  ret void
+}
+
+; CHECK: define internal void @c2() {
+define internal void @c2() comdat {
+  ret void
+}
+
+; CHECK: define internal void @c2_a() {
+define void @c2_a() comdat($c2) {
+  ret void
+}
+
+; CHECK: define void @c3_a() comdat($c3) {
+define void @c3_a() comdat($c3) {
+  ret void
+}
diff --git a/test/Transforms/Internalize/local-visibility.ll b/test/Transforms/Internalize/local-visibility.ll
index b09a136e5263..0d73f21972aa 100644
--- a/test/Transforms/Internalize/local-visibility.ll
+++ b/test/Transforms/Internalize/local-visibility.ll
@@ -10,10 +10,10 @@
 ; CHECK: @protected.variable = internal global i32 0
 @protected.variable = protected global i32 0
 
-; CHECK: @hidden.alias = internal alias  i32* @global
-@hidden.alias = hidden alias i32* @global
-; CHECK: @protected.alias = internal alias i32* @global
-@protected.alias = protected alias i32* @global
+; CHECK: @hidden.alias = internal alias  i32,  i32* @global
+@hidden.alias = hidden alias i32, i32* @global
+; CHECK: @protected.alias = internal alias i32, i32* @global
+@protected.alias = protected alias i32, i32* @global
 
 ; CHECK: define internal void @hidden.function() {
 define hidden void @hidden.function() {
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 32cc4de9285a..46c92bc1f577 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -483,7 +483,7 @@ declare void @g()
 declare void @j()
 declare void @k()
 
-; CHECK: define void @h(i32 %p) {
+; CHECK-LABEL: define void @h(i32 %p) {
 define void @h(i32 %p) {
   %x = icmp ult i32 %p, 5
   br i1 %x, label %l1, label %l2
@@ -513,4 +513,36 @@ l5:
 ; CHECK: }
 }
 
+; CHECK-LABEL: define void @h_con(i32 %p) {
+define void @h_con(i32 %p) {
+  %x = icmp ult i32 %p, 5
+  br i1 %x, label %l1, label %l2
+
+l1:
+  call void @j()
+  br label %l3
+
+l2:
+  call void @k()
+  br label %l3
+
+l3:
+; CHECK: call void @g() [[CON:#[0-9]+]]
+; CHECK-NOT: call void @g() [[CON]]
+  call void @g() convergent
+  %y = icmp ult i32 %p, 5
+  br i1 %y, label %l4, label %l5
+
+l4:
+  call void @j()
+  ret void
+
+l5:
+  call void @k()
+  ret void
+; CHECK: }
+}
+
+
 ; CHECK: attributes [[NOD]] = { noduplicate }
+; CHECK: attributes [[CON]] = { convergent }
diff --git a/test/Transforms/JumpThreading/implied-cond.ll b/test/Transforms/JumpThreading/implied-cond.ll
new file mode 100644
index 000000000000..3d1717e91261
--- /dev/null
+++ b/test/Transforms/JumpThreading/implied-cond.ll
@@ -0,0 +1,98 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+
+declare void @side_effect(i32)
+
+define void @test0(i32 %i, i32 %len) {
+; CHECK-LABEL: @test0(
+ entry:
+  call void @side_effect(i32 0)
+  %i.inc = add nuw i32 %i, 1
+  %c0 = icmp ult i32 %i.inc, %len
+  br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+  call void @side_effect(i32 0)
+  %c1 = icmp ult i32 %i, %len
+  br i1 %c1, label %left0, label %right
+
+ left0:
+  call void @side_effect(i32 0)
+  ret void
+
+ right:
+  %t = phi i32 [ 1, %left ], [ 2, %entry ]
+  call void @side_effect(i32 %t)
+  ret void
+}
+
+define void @test1(i32 %i, i32 %len) {
+; CHECK-LABEL: @test1(
+ entry:
+  call void @side_effect(i32 0)
+  %i.inc = add nsw i32 %i, 1
+  %c0 = icmp slt i32 %i.inc, %len
+  br i1 %c0, label %left, label %right
+
+ left:
+; CHECK: entry:
+; CHECK: br i1 %c0, label %left0, label %right
+
+; CHECK: left0:
+; CHECK: call void @side_effect
+; CHECK-NOT: br i1 %c1
+; CHECK: call void @side_effect
+  call void @side_effect(i32 0)
+  %c1 = icmp slt i32 %i, %len
+  br i1 %c1, label %left0, label %right
+
+ left0:
+  call void @side_effect(i32 0)
+  ret void
+
+ right:
+  %t = phi i32 [ 1, %left ], [ 2, %entry ]
+  call void @side_effect(i32 %t)
+  ret void
+}
+
+define void @test2(i32 %i, i32 %len, i1* %c.ptr) {
+; CHECK-LABEL: @test2(
+
+; CHECK: entry:
+; CHECK: br i1 %c0, label %cont, label %right
+; CHECK: cont:
+; CHECK: br i1 %c, label %left0, label %right
+; CHECK: left0:
+; CHECK: call void @side_effect(i32 0)
+; CHECK: call void @side_effect(i32 0)
+ entry:
+  call void @side_effect(i32 0)
+  %i.inc = add nsw i32 %i, 1
+  %c0 = icmp slt i32 %i.inc, %len
+  br i1 %c0, label %cont, label %right
+
+ cont:
+  %c = load i1, i1* %c.ptr
+  br i1 %c, label %left, label %right
+
+ left:
+  call void @side_effect(i32 0)
+  %c1 = icmp slt i32 %i, %len
+  br i1 %c1, label %left0, label %right
+
+ left0:
+  call void @side_effect(i32 0)
+  ret void
+
+ right:
+  %t = phi i32 [ 1, %left ], [ 2, %entry ], [ 3, %cont ]
+  call void @side_effect(i32 %t)
+  ret void
+}
diff --git a/test/Transforms/JumpThreading/phi-known.ll b/test/Transforms/JumpThreading/phi-known.ll
new file mode 100644
index 000000000000..8eaf57f748ac
--- /dev/null
+++ b/test/Transforms/JumpThreading/phi-known.ll
@@ -0,0 +1,66 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Value of predicate known on all inputs (trivial case)
+; Note: InstCombine/EarlyCSE would also get this case
+define void @test(i8* %p, i8** %addr) {
+; CHECK-LABEL: @test
+entry:
+  %cmp0 = icmp eq i8* %p, null
+  br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: br label %loop
+  %p1 = phi i8* [%p, %entry], [%p1, %loop]
+  %cmp1 = icmp eq i8* %p1, null
+  br i1 %cmp1, label %exit, label %loop
+exit:
+  ret void
+}
+
+; Value of predicate known on all inputs (non-trivial)
+define void @test2(i8* %p) {
+; CHECK-LABEL: @test2
+entry:
+  %cmp0 = icmp eq i8* %p, null
+  br i1 %cmp0, label %exit, label %loop
+loop:
+  %p1 = phi i8* [%p, %entry], [%p2, %backedge]
+  %cmp1 = icmp eq i8* %p1, null
+  br i1 %cmp1, label %exit, label %backedge
+backedge:
+; CHECK-LABEL: backedge:
+; CHECK-NEXT: phi
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load
+; CHECK-NEXT: cmp
+; CHECK-NEXT: br 
+; CHECK-DAG: label %backedge
+  %addr = bitcast i8* %p1 to i8**
+  %p2 = load i8*, i8** %addr
+  %cmp2 = icmp eq i8* %p2, null
+  br i1 %cmp2, label %exit, label %loop
+exit:
+  ret void
+}
+
+; If the inputs don't branch the same way, we can't rewrite
+; Well, we could unroll this loop exactly twice, but that's
+; a different transform.
+define void @test_mixed(i8* %p) {
+; CHECK-LABEL: @test_mixed
+entry:
+  %cmp0 = icmp eq i8* %p, null
+  br i1 %cmp0, label %exit, label %loop
+loop:
+; CHECK-LABEL: loop:
+; CHECK-NEXT: phi
+; CHECK-NEXT: %cmp1 = icmp
+; CHECK-NEXT: br i1 %cmp1
+  %p1 = phi i8* [%p, %entry], [%p1, %loop]
+  %cmp1 = icmp ne i8* %p1, null
+  br i1 %cmp1, label %exit, label %loop
+exit:
+  ret void
+}
+
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index d0df7725f722..595cacbcbf54 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -91,6 +91,36 @@ L3:
 }
 
 
+; Jump threading of indirectbr with select as address.  Test increased
+; duplication threshold for cases where indirectbr is being threaded
+; through.
+
+; CHECK-LABEL: @test_indirectbr_thresh(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L3
+; CHECK-NOT: indirectbr
+define void @test_indirectbr_thresh(i1 %cond, i8* %address) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L3
+L0:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test_indirectbr_thresh, %L1), i8* %address
+  call void @quux()
+  call void @quux()
+  call void @quux()
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+}
+
+
 ; A more complicated case: the condition is a select based on a comparison.
 
 ; CHECK-LABEL: @test_switch_cmp(
diff --git a/test/Transforms/JumpThreading/update-edge-weight.ll b/test/Transforms/JumpThreading/update-edge-weight.ll
new file mode 100644
index 000000000000..58cd71861d8a
--- /dev/null
+++ b/test/Transforms/JumpThreading/update-edge-weight.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -jump-threading %s | FileCheck %s
+
+; Test if edge weights are properly updated after jump threading.
+
+; CHECK: !2 = !{!"branch_weights", i32 1629125526, i32 518358122}
+
+define void @foo(i32 %n) !prof !0 {
+entry:
+  %cmp = icmp sgt i32 %n, 10
+  br i1 %cmp, label %if.then.1, label %if.else.1, !prof !1
+
+if.then.1:
+  tail call void @a()
+  br label %if.cond
+
+if.else.1:
+  tail call void @b()
+  br label %if.cond
+
+if.cond:
+  %cmp1 = icmp sgt i32 %n, 5
+  br i1 %cmp1, label %if.then.2, label %if.else.2, !prof !2
+
+if.then.2:
+  tail call void @c()
+  br label %if.end
+
+if.else.2:
+  tail call void @d()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @a()
+declare void @b()
+declare void @c()
+declare void @d()
+
+!0 = !{!"function_entry_count", i64 1}
+!1 = !{!"branch_weights", i32 10, i32 5}
+!2 = !{!"branch_weights", i32 10, i32 1}
diff --git a/test/Transforms/LCSSA/mixed-catch.ll b/test/Transforms/LCSSA/mixed-catch.ll
new file mode 100644
index 000000000000..95d5b17bf081
--- /dev/null
+++ b/test/Transforms/LCSSA/mixed-catch.ll
@@ -0,0 +1,95 @@
+; RUN: opt -lcssa -S < %s | FileCheck %s
+
+; This test is based on the following C++ code:
+;
+; void f()
+; {
+;   for (int i=0; i<12; i++) {
+;     try {
+;       if (i==3)
+;         throw i;
+;     } catch (int) {
+;       continue;
+;     } catch (...) { }
+;     if (i==3) break;
+;   }
+; }
+;
+; The loop info analysis identifies the catch pad for the second catch as being
+; outside the loop (because it returns to %for.end) but the associated
+; catchswitch block is identified as being inside the loop.  Because of this
+; analysis, the LCSSA pass wants to create a PHI node in the catchpad block
+; for the catchswitch value, but this is a token, so it can't.
+
+define void @f() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %tmp = alloca i32, align 4
+  %i7 = alloca i32, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i.0, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %cond = icmp eq i32 %i.0, 3
+  br i1 %cond, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  store i32 %i.0, i32* %tmp, align 4
+  %tmp1 = bitcast i32* %tmp to i8*
+  invoke void @_CxxThrowException(i8* %tmp1, %eh.ThrowInfo* nonnull @_TI1H) #1
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %if.then
+  %tmp2 = catchswitch within none [label %catch, label %catch2] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %tmp3 = catchpad within %tmp2 [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %i7]
+  catchret from %tmp3 to label %for.inc
+
+catch2:                                           ; preds = %catch.dispatch
+  %tmp4 = catchpad within %tmp2 [i8* null, i32 64, i8* null]
+  catchret from %tmp4 to label %for.end
+
+for.inc:                                          ; preds = %catch, %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %catch2, %for.cond
+  ret void
+
+unreachable:                                      ; preds = %if.then
+  unreachable
+}
+
+; CHECK-LABEL: define void @f()
+; CHECK: catch2:
+; CHECK-NOT: phi
+; CHECK:   %tmp4 = catchpad within %tmp2
+; CHECK:   catchret from %tmp4 to label %for.end
+
+%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] }
+%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 }
+%eh.CatchableTypeArray.1 = type { i32, [1 x i32] }
+%eh.ThrowInfo = type { i32, i32, i32, i32 }
+
+$"\01??_R0H@8" = comdat any
+
+$"_CT??_R0H@84" = comdat any
+
+$_CTA1H = comdat any
+
+$_TI1H = comdat any
+
+@"\01??_7type_info@@6B@" = external constant i8*
+@"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
+@__ImageBase = external constant i8
+@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"\01??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat
+@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat
+@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat
+
+declare void @_CxxThrowException(i8*, %eh.ThrowInfo*)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
index 73862db69819..e2b07facd48e 100644
--- a/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
+++ b/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -globalsmodref-aa -licm -disable-output
+; RUN: opt < %s -globals-aa -licm -disable-output
 
 @PL_regcomp_parse = internal global i8* null		; <i8**> [#uses=2]
 
diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll
new file mode 100644
index 000000000000..e2640a1c8deb
--- /dev/null
+++ b/test/Transforms/LICM/argmemonly-call.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -basicaa -licm %s | FileCheck %s
+declare i32 @foo() readonly argmemonly nounwind
+declare i32 @foo2() readonly nounwind
+declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
+
+define void @test(i32* %loc) {
+; CHECK-LABEL: @test
+; CHECK: @foo
+; CHECK-LABEL: loop:
+  br label %loop
+
+loop:
+  %res = call i32 @foo()
+  store i32 %res, i32* %loc
+  br label %loop
+}
+
+; Negative test: show argmemonly is required
+define void @test_neg(i32* %loc) {
+; CHECK-LABEL: @test_neg
+; CHECK-LABEL: loop:
+; CHECK: @foo
+  br label %loop
+
+loop:
+  %res = call i32 @foo2()
+  store i32 %res, i32* %loc
+  br label %loop
+}
+
+define void @test2(i32* noalias %loc, i32* noalias %loc2) {
+; CHECK-LABEL: @test2
+; CHECK: @bar
+; CHECK-LABEL: loop:
+  br label %loop
+
+loop:
+  %res = call i32 @bar(i32* %loc2)
+  store i32 %res, i32* %loc
+  br label %loop
+}
+
+; Negative test: %might clobber gep
+define void @test3(i32* %loc) {
+; CHECK-LABEL: @test3
+; CHECK-LABEL: loop:
+; CHECK: @bar
+  br label %loop
+
+loop:
+  %res = call i32 @bar(i32* %loc)
+  %gep = getelementptr i32, i32 *%loc, i64 1000000
+  store i32 %res, i32* %gep
+  br label %loop
+}
+
+
+; Negative test: %loc might alias %loc2
+define void @test4(i32* %loc, i32* %loc2) {
+; CHECK-LABEL: @test4
+; CHECK-LABEL: loop:
+; CHECK: @bar
+  br label %loop
+
+loop:
+  %res = call i32 @bar(i32* %loc2)
+  store i32 %res, i32* %loc
+  br label %loop
+}
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index 62429fdc3216..d8ae5e576641 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -36,17 +36,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!26}
 !llvm.dbg.sp = !{!0, !6, !9, !10}
 
-!0 = !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
 !1 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!6 = distinct !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{null}
-!9 = !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
-!10 = !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!9 = distinct !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!10 = distinct !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
 !11 = !DILocation(line: 281, column: 9, scope: !12)
 !12 = distinct !DILexicalBlock(line: 272, column: 5, file: !25, scope: !13)
 !13 = distinct !DILexicalBlock(line: 271, column: 5, file: !25, scope: !14)
@@ -55,7 +55,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !16 = !DILocation(line: 284, column: 10, scope: !17)
 !17 = distinct !DILexicalBlock(line: 282, column: 9, file: !25, scope: !12)
 !18 = !{double undef}
-!19 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "temp", line: 268, scope: !14, file: !1, type: !20)
+!19 = !DILocalVariable(name: "temp", line: 268, scope: !14, file: !1, type: !20)
 !20 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !21 = !DILocation(line: 286, column: 14, scope: !22)
 !22 = distinct !DILexicalBlock(line: 285, column: 13, file: !25, scope: !17)
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index c384a858d1e6..fd10c5d7503d 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -388,5 +388,49 @@ for.end:                                          ; preds = %for.inc, %entry
   ret void
 }
 
+; In this test we should be able to only hoist load from %cptr. We can't hoist
+; load from %c because it's dereferenceability can depend on %cmp1 condition.
+; By moving it out of the loop we break this dependency and can not rely
+; on the dereferenceability anymore.
+; In other words this test checks that we strip dereferenceability  metadata
+; after hoisting an instruction.
+
+; CHECK-LABEL: @test10
+; CHECK: %c = load i32*, i32** %cptr
+; CHECK-NOT: dereferenceable
+; CHECK: if.then:
+; CHECK: load i32, i32* %c, align 4
+
+define void @test10(i32* noalias %a, i32* %b, i32** dereferenceable(8) %cptr, i32 %n) #0 {
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.inc
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %0, 0
+  br i1 %cmp1, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %c = load i32*, i32** %cptr, !dereferenceable !0
+  %1 = load i32, i32* %c, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %2 = load i32, i32* %arrayidx3, align 4
+  %mul = mul nsw i32 %2, %1
+  store i32 %mul, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc, %entry
+  ret void
+}
+
 attributes #0 = { nounwind uwtable }
 !0 = !{i64 4}
diff --git a/test/Transforms/LICM/hoist-invariant-load.ll b/test/Transforms/LICM/hoist-invariant-load.ll
index aec155b5580a..ed669f383fc3 100644
--- a/test/Transforms/LICM/hoist-invariant-load.ll
+++ b/test/Transforms/LICM/hoist-invariant-load.ll
@@ -1,5 +1,5 @@
 ; REQUIRES: asserts
-; RUN: opt < %s -licm -stats -S 2>&1 | grep "1 licm"
+; RUN: opt < %s -licm -disable-basicaa -stats -S 2>&1 | grep "1 licm"
 
 @"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__objc_methname,cstring_literals", align 1
 @"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
diff --git a/test/Transforms/LICM/pr23608.ll b/test/Transforms/LICM/pr23608.ll
index 249bc6bf5f63..fe6fd1a1810b 100644
--- a/test/Transforms/LICM/pr23608.ll
+++ b/test/Transforms/LICM/pr23608.ll
@@ -31,7 +31,7 @@ bb2:                                              ; preds = %while.cond
   br i1 %tobool, label %bb13, label %bb15
 
 bb13:                                             ; preds = %bb2
-; CHECK-LABEL bb13:
+; CHECK-LABEL: bb13:
 ; CHECK: %tmp8.le = inttoptr
   %.lcssa7 = phi i32* [ %tmp8, %bb2 ]
   call void @__msan_warning_noreturn()
diff --git a/test/Transforms/LoopDistribute/basic-with-memchecks.ll b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 3aced4850411..dce5698595ac 100644
--- a/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -36,7 +36,7 @@ entry:
 ; Since the checks to A and A + 4 get merged, this will give us a
 ; total of 8 compares.
 ;
-; CHECK: for.body.lver.memcheck:
+; CHECK: for.body.lver.check:
 ; CHECK:     = icmp
 ; CHECK:     = icmp
 
diff --git a/test/Transforms/LoopDistribute/bounds-expansion-bug.ll b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
new file mode 100644
index 000000000000..5d1aac6c104c
--- /dev/null
+++ b/test/Transforms/LoopDistribute/bounds-expansion-bug.ll
@@ -0,0 +1,106 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; When emitting the memchecks for:
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * B[i];
+;     =======================
+;     C[i] = D[i] * E[i];
+;   }
+;
+; we had a bug when expanding the bounds for A and C.  These are expanded
+; multiple times and rely on the caching in SCEV expansion to avoid any
+; redundancy.  However, due to logic in SCEVExpander::ReuseOrCreateCast, we
+; can get earlier expanded values invalidated when casts are used.  This test
+; ensure that we are not using the invalidated values.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a1, i32* %a2,
+               i32* %b,
+               i32* %c1, i32* %c2,
+               i32* %d,
+               i32* %e) {
+entry:
+
+  %cond = icmp eq i32* %e, null
+  br i1 %cond, label %one, label %two
+one:
+  br label %join
+two:
+  br label %join
+join:
+
+; The pointers need to be defined by PHIs in order for the bug to trigger.
+; Because of the PHIs the existing casts won't be at the desired location so a
+; new cast will be emitted and the old cast will get invalidated.
+;
+; These are the steps:
+;
+; 1. After the bounds for A and C are first expanded:
+;
+;   join:
+;     %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+;     %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+;     %c5 = bitcast i32* %c to i8*
+;     %a3 = bitcast i32* %a to i8*
+;
+; 2. After A is expanded again:
+;
+;   join:                                             ; preds = %two, %one
+;     %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+;     %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+;     %a3 = bitcast i32* %a to i8*                   <--- new
+;     %c5 = bitcast i32* %c to i8*
+;     %0 = bitcast i32* undef to i8*                 <--- old, invalidated
+;
+; 3. Finally, when C is expanded again:
+;
+;   join:                                             ; preds = %two, %one
+;     %a = phi i32* [ %a1, %one ], [ %a2, %two ]
+;     %c = phi i32* [ %c1, %one ], [ %c2, %two ]
+;     %c5 = bitcast i32* %c to i8*                   <--- new
+;     %a3 = bitcast i32* %a to i8*
+;     %0 = bitcast i32* undef to i8*                 <--- old, invalidated
+;     %1 = bitcast i32* undef to i8*
+
+  %a = phi i32* [%a1, %one], [%a2, %two]
+  %c = phi i32* [%c1, %one], [%c2, %two]
+  br label %for.body
+
+
+; CHECK: [[VALUE:%[0-9a-z]+]] = bitcast i32* undef to i8*
+; CHECK-NOT: [[VALUE]]
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %join ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulA = mul i32 %loadB, %loadA
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+  %loadD = load i32, i32* %arrayidxD, align 4
+
+  %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+  %loadE = load i32, i32* %arrayidxE, align 4
+
+  %mulC = mul i32 %loadD, %loadE
+
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
new file mode 100644
index 000000000000..2ba746dd6b9e
--- /dev/null
+++ b/test/Transforms/LoopDistribute/unknown-bounds-for-memchecks.ll
@@ -0,0 +1,57 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | FileCheck %s
+
+; If we can't find the bounds for one of the arrays in order to generate the
+; memchecks (e.g., C[i * i] below), loop shold not get distributed.
+;
+;   for (i = 0; i < n; i++) {
+;     A[i + 1] = A[i] * 3;
+; -------------------------------
+;     C[i * i] = B[i] * 2;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we didn't distribute by checking that we still have the original
+; number of branches.
+
+@A = common global i32* null, align 8
+@B = common global i32* null, align 8
+@C = common global i32* null, align 8
+
+define void @f() {
+entry:
+  %a = load i32*, i32** @A, align 8
+  %b = load i32*, i32** @B, align 8
+  %c = load i32*, i32** @C, align 8
+  br label %for.body
+; CHECK: br
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+  %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+  %loadA = load i32, i32* %arrayidxA, align 4
+
+  %mulA = mul i32 %loadA, 3
+
+  %add = add nuw nsw i64 %ind, 1
+  %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+  store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+  %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+  %loadB = load i32, i32* %arrayidxB, align 4
+
+  %mulC = mul i32 %loadB, 2
+
+  %ind_2 = mul i64 %ind, %ind
+  %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind_2
+  store i32 %mulC, i32* %arrayidxC, align 4
+
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: br
+; CHECK-NOT: br
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index c633ae95d16f..27a955175b59 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -424,3 +424,110 @@ exit:
   ret void
 ; CHECK: ret void
 }
+
+; Recognize loops with a negative stride.
+define void @test15(i32* nocapture %f) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+  store i32 0, i32* %arrayidx, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp = icmp sgt i64 %indvars.iv, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+; CHECK-LABEL: @test15(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %f1, i8 0, i64 262148, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Loop with a negative stride.  Verify an aliasing write to f[65536] prevents
+; the creation of a memset.
+define void @test16(i32* nocapture %f) {
+entry:
+  %arrayidx1 = getelementptr inbounds i32, i32* %f, i64 65536
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 65536, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+  store i32 0, i32* %arrayidx, align 4
+  store i32 1, i32* %arrayidx1, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp = icmp sgt i64 %indvars.iv, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+; CHECK-LABEL: @test16(
+; CHECK-NOT: call void @llvm.memset.p0i8.i64
+; CHECK: ret void
+}
+
+; Handle memcpy-able loops with negative stride.
+define noalias i32* @test17(i32* nocapture readonly %a, i32 %c) {
+entry:
+  %conv = sext i32 %c to i64
+  %mul = shl nsw i64 %conv, 2
+  %call = tail call noalias i8* @malloc(i64 %mul)
+  %0 = bitcast i8* %call to i32*
+  %tobool.9 = icmp eq i32 %c, 0
+  br i1 %tobool.9, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %dec10.in = phi i32 [ %dec10, %while.body ], [ %c, %while.body.preheader ]
+  %dec10 = add nsw i32 %dec10.in, -1
+  %idxprom = sext i32 %dec10 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %idxprom
+  %1 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %0, i64 %idxprom
+  store i32 %1, i32* %arrayidx2, align 4
+  %tobool = icmp eq i32 %dec10, 0
+  br i1 %tobool, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  ret i32* %0
+; CHECK-LABEL: @test17(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret i32*
+}
+
+declare noalias i8* @malloc(i64)
+
+; Handle memcpy-able loops with negative stride.
+; void test18(unsigned *__restrict__ a, unsigned *__restrict__ b) {
+;   for (int i = 2047; i >= 0; --i) {
+;     a[i] = b[i];
+;   }
+; }
+define void @test18(i32* noalias nocapture %a, i32* noalias nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ 2047, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  store i32 %0, i32* %arrayidx2, align 4
+  %indvars.iv.next = add nsw i64 %indvars.iv, -1
+  %cmp = icmp sgt i64 %indvars.iv, 0
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+; CHECK-LABEL: @test18(
+; CHECK: call void @llvm.memcpy
+; CHECK: ret
+}
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index bcd862d7a729..a85e48997548 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -3,7 +3,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin10.0.0"
 
 
-define void @foo(double* nocapture %a) nounwind ssp {
+define void @foo(double* nocapture %a) nounwind ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata double* %a, i64 0, metadata !5, metadata !DIExpression()), !dbg !8
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !10, metadata !DIExpression()), !dbg !14
@@ -30,17 +30,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!19}
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: void (double*)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
 !1 = !DIFile(filename: "li.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
+!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
 !6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !2, baseType: !7)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !8 = !DILocation(line: 2, column: 18, scope: !0)
 !9 = !{}
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !11, file: !1, type: !13)
+!10 = !DILocalVariable(name: "i", line: 3, scope: !11, file: !1, type: !13)
 !11 = distinct !DILexicalBlock(line: 3, column: 3, file: !18, scope: !12)
 !12 = distinct !DILexicalBlock(line: 2, column: 21, file: !18, scope: !0)
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/LoopLoadElim/backward.ll b/test/Transforms/LoopLoadElim/backward.ll
new file mode 100644
index 000000000000..7c750a51a2a3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/backward.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical backward dep.
+;
+;   for (unsigned i = 0; i < 100; i++)
+;     A[i+1] = A[i] + B[i];
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %load = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %load_1 = load i32, i32* %arrayidx2, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+  %add = add i32 %load_1, %load
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  store i32 %add, i32* %arrayidx_next, align 4
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopLoadElim/def-store-before-load.ll b/test/Transforms/LoopLoadElim/def-store-before-load.ll
new file mode 100644
index 000000000000..3dc93f6786e7
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/def-store-before-load.ll
@@ -0,0 +1,35 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+;  No loop-carried forwarding: The intervening store to A[i] kills the stored
+;  value from the previous iteration.
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i] = 1;
+;     A[i+1] = A[i] + B[i];
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 1, i32* %arrayidx, align 4
+  %a = load i32, i32* %arrayidx, align 4
+  %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %b = load i32, i32* %arrayidxB, align 4
+; CHECK: %add = add i32 %b, %a
+  %add = add i32 %b, %a
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  store i32 %add, i32* %arrayidx_next, align 4
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopLoadElim/forward.ll b/test/Transforms/LoopLoadElim/forward.ll
new file mode 100644
index 000000000000..c2b1816530c1
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/forward.ll
@@ -0,0 +1,47 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Simple st->ld forwarding derived from a lexical forwrad dep.
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i+1] = B[i] + 2;
+;     C[i] = A[i] * 2;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) {
+
+; CHECK:   for.body.lver.check:
+; CHECK:     %found.conflict{{.*}} =
+; CHECK-NOT: %found.conflict{{.*}} =
+
+entry:
+; for.body.ph:
+; CHECK: %load_initial = load i32, i32* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %for.body.ph ], [ %a_p1, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+  %b = load i32, i32* %Bidx, align 4
+  %a_p1 = add i32 %b, 2
+  store i32 %a_p1, i32* %Aidx_next, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %store_forwarded, 2
+  %c = mul i32 %a, 2
+  store i32 %c, i32* %Cidx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopLoadElim/memcheck.ll b/test/Transforms/LoopLoadElim/memcheck.ll
new file mode 100644
index 000000000000..8eadd437a5ac
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/memcheck.ll
@@ -0,0 +1,52 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+; RUN: opt -loop-load-elim -S -runtime-check-per-loop-load-elim=2 < %s | FileCheck %s --check-prefix=AGGRESSIVE
+
+; This needs two pairs of memchecks (A * { C, D }) for a single load
+; elimination which is considered to expansive by default.
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i+1] = B[i] + 2;
+;     C[i] = A[i] * 2;
+;     D[i] = 2;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32*  %A, i32*  %B, i32*  %C, i64 %N, i32* %D) {
+entry:
+  br label %for.body
+
+; AGGRESSIVE: for.body.lver.check:
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE: %found.conflict{{.*}} =
+; AGGRESSIVE-NOT: %found.conflict{{.*}} =
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+; AGGRESSIVE: %store_forwarded =
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+
+  %b = load i32, i32* %Bidx, align 4
+  %a_p1 = add i32 %b, 2
+  store i32 %a_p1, i32* %Aidx_next, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %a, 2
+; AGGRESSIVE: %c = mul i32 %store_forwarded, 2
+  %c = mul i32 %a, 2
+  store i32 %c, i32* %Cidx, align 4
+  store i32 2, i32* %Didx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
new file mode 100644
index 000000000000..b0c0f3dee86e
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/multiple-stores-same-block.ll
@@ -0,0 +1,48 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+; In this case the later store forward to the load:
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     B[i] = A[i] + 1;
+;     A[i+1] = C[i] + 2;
+;     A[i+1] = D[i] + 3;
+;   }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B,
+               i32* noalias nocapture %C, i32* noalias nocapture readonly %D,
+               i64 %N) {
+entry:
+; CHECK: %load_initial = load i32, i32* %A
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK: %store_forwarded = phi i32 [ %load_initial, %entry ], [ %addD, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidxA = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %loadA = load i32, i32* %arrayidxA, align 4
+; CHECK: %addA = add i32 %store_forwarded, 1
+  %addA = add i32 %loadA, 1
+
+  %arrayidxB = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  store i32 %addA, i32* %arrayidxB, align 4
+
+  %arrayidxC = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %loadC = load i32, i32* %arrayidxC, align 4
+  %addC = add i32 %loadC, 2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidxA_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  store i32 %addC, i32* %arrayidxA_next, align 4
+
+  %arrayidxD = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+  %loadD = load i32, i32* %arrayidxD, align 4
+  %addD = add i32 %loadD, 3
+  store i32 %addD, i32* %arrayidxA_next, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopLoadElim/unknown-dep.ll b/test/Transforms/LoopLoadElim/unknown-dep.ll
new file mode 100644
index 000000000000..d2df718ca4c3
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/unknown-dep.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Give up in the presence of unknown deps. Here, the different strides result
+; in unknown dependence:
+;
+;   for (unsigned i = 0; i < 100; i++) {
+;     A[i+1] = B[i] + 2;
+;     A[2*i] = C[i] + 2;
+;     D[i] = A[i] + 2;
+;   }
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C,
+               i32* noalias %D, i64 %N) {
+
+entry:
+; for.body.ph:
+; CHECK-NOT: %load_initial =
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+; CHECK-NOT: %store_forwarded =
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+  %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %Didx = getelementptr inbounds i32, i32* %D, i64 %indvars.iv
+  %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %indvars.m2 = mul nuw nsw i64 %indvars.iv, 2
+  %A2idx = getelementptr inbounds i32, i32* %A, i64 %indvars.m2
+
+  %b = load i32, i32* %Bidx, align 4
+  %a_p1 = add i32 %b, 2
+  store i32 %a_p1, i32* %Aidx_next, align 4
+
+  %c = load i32, i32* %Cidx, align 4
+  %a_m2 = add i32 %c, 2
+  store i32 %a_m2, i32* %A2idx, align 4
+
+  %a = load i32, i32* %Aidx, align 4
+; CHECK-NOT: %d = add i32 %store_forwarded, 2
+; CHECK: %d = add i32 %a, 2
+  %d = add i32 %a, 2
+  store i32 %d, i32* %Didx, align 4
+
+  %exitcond = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopReroll/negative.ll b/test/Transforms/LoopReroll/negative.ll
new file mode 100644
index 000000000000..36f6806e1c37
--- /dev/null
+++ b/test/Transforms/LoopReroll/negative.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S  -loop-reroll   %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+@buf = global [16 x i8] c"\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A\0A", align 1
+
+define i32 @test1(i32 %len, i8* nocapture readonly %buf) #0 {
+entry:
+  %cmp.13 = icmp sgt i32 %len, 1
+  br i1 %cmp.13, label %while.body.lr.ph, label %while.end
+
+while.body.lr.ph:                                 ; preds = %entry
+  br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT:    %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.lr.ph ]
+;CHECK-NEXT:    %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add, %while.body ]
+;CHECK-NOT:     %sub5 = add nsw i32 %len.addr.014, -1
+;CHECK-NOT:     %sub5 = add nsw i32 %len.addr.014, -2
+;CHECK:    br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+  %sum4.015 = phi i64 [ 0, %while.body.lr.ph ], [ %add4, %while.body ]
+  %len.addr.014 = phi i32 [ %len, %while.body.lr.ph ], [ %sub5, %while.body ]
+  %idxprom = sext i32 %len.addr.014 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %buf, i64 %idxprom
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i64
+  %add = add i64 %conv, %sum4.015
+  %sub = add nsw i32 %len.addr.014, -1
+  %idxprom1 = sext i32 %sub to i64
+  %arrayidx2 = getelementptr inbounds i8, i8* %buf, i64 %idxprom1
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i64
+  %add4 = add i64 %add, %conv3
+  %sub5 = add nsw i32 %len.addr.014, -2
+  %cmp = icmp sgt i32 %sub5, 1
+  br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
+
+while.cond.while.end_crit_edge:                   ; preds = %while.body
+  %add4.lcssa = phi i64 [ %add4, %while.body ]
+  %phitmp = trunc i64 %add4.lcssa to i32
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  %sum4.0.lcssa = phi i32 [ %phitmp, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  ret i32 %sum4.0.lcssa
+  unreachable
+}
+
diff --git a/test/Transforms/LoopReroll/reroll_with_dbg.ll b/test/Transforms/LoopReroll/reroll_with_dbg.ll
new file mode 100644
index 000000000000..78b457ed94ab
--- /dev/null
+++ b/test/Transforms/LoopReroll/reroll_with_dbg.ll
@@ -0,0 +1,139 @@
+;RUN: opt < %s -loop-reroll -S | FileCheck %s
+;void foo(float * restrict a, float * restrict b, int n) {
+;  for(int i = 0; i < n; i+=4) {
+;    a[i] = b[i];
+;    a[i+1] = b[i+1];
+;    a[i+2] = b[i+2];
+;    a[i+3] = b[i+3];
+;  }
+;}
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv4t--linux-gnueabi"
+
+; Function Attrs: nounwind
+define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %b, i32 %n) #0 !dbg !4 {
+entry:
+;CHECK-LABEL: @foo
+
+  tail call void @llvm.dbg.value(metadata float* %a, i64 0, metadata !12, metadata !22), !dbg !23
+  tail call void @llvm.dbg.value(metadata float* %b, i64 0, metadata !13, metadata !22), !dbg !24
+  tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !14, metadata !22), !dbg !25
+  tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !22), !dbg !26
+  %cmp.30 = icmp sgt i32 %n, 0, !dbg !27
+  br i1 %cmp.30, label %for.body.preheader, label %for.cond.cleanup, !dbg !29
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !30
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup, !dbg !32
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void, !dbg !32
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+;CHECK: for.body:
+;CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, {{.*}} ]
+;CHECK: load
+;CHECK: store
+;CHECK-NOT: load
+;CHECK-NOT: store
+;CHECK: call void @llvm.dbg.value
+;CHECK: %indvar.next = add i32 %indvar, 1
+;CHECK: icmp eq i32 %indvar
+  %i.031 = phi i32 [ %add13, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float, float* %b, i32 %i.031, !dbg !30
+  %0 = bitcast float* %arrayidx to i32*, !dbg !30
+  %1 = load i32, i32* %0, align 4, !dbg !30, !tbaa !33
+  %arrayidx1 = getelementptr inbounds float, float* %a, i32 %i.031, !dbg !37
+  %2 = bitcast float* %arrayidx1 to i32*, !dbg !38
+  store i32 %1, i32* %2, align 4, !dbg !38, !tbaa !33
+  %add = or i32 %i.031, 1, !dbg !39
+  %arrayidx2 = getelementptr inbounds float, float* %b, i32 %add, !dbg !40
+  %3 = bitcast float* %arrayidx2 to i32*, !dbg !40
+  %4 = load i32, i32* %3, align 4, !dbg !40, !tbaa !33
+  %arrayidx4 = getelementptr inbounds float, float* %a, i32 %add, !dbg !41
+  %5 = bitcast float* %arrayidx4 to i32*, !dbg !42
+  store i32 %4, i32* %5, align 4, !dbg !42, !tbaa !33
+  %add5 = or i32 %i.031, 2, !dbg !43
+  %arrayidx6 = getelementptr inbounds float, float* %b, i32 %add5, !dbg !44
+  %6 = bitcast float* %arrayidx6 to i32*, !dbg !44
+  %7 = load i32, i32* %6, align 4, !dbg !44, !tbaa !33
+  %arrayidx8 = getelementptr inbounds float, float* %a, i32 %add5, !dbg !45
+  %8 = bitcast float* %arrayidx8 to i32*, !dbg !46
+  store i32 %7, i32* %8, align 4, !dbg !46, !tbaa !33
+  %add9 = or i32 %i.031, 3, !dbg !47
+  %arrayidx10 = getelementptr inbounds float, float* %b, i32 %add9, !dbg !48
+  %9 = bitcast float* %arrayidx10 to i32*, !dbg !48
+  %10 = load i32, i32* %9, align 4, !dbg !48, !tbaa !33
+  %arrayidx12 = getelementptr inbounds float, float* %a, i32 %add9, !dbg !49
+  %11 = bitcast float* %arrayidx12 to i32*, !dbg !50
+  store i32 %10, i32* %11, align 4, !dbg !50, !tbaa !33
+  %add13 = add nuw nsw i32 %i.031, 4, !dbg !51
+  tail call void @llvm.dbg.value(metadata i32 %add13, i64 0, metadata !15, metadata !22), !dbg !26
+  %cmp = icmp slt i32 %add13, %n, !dbg !27
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !dbg !29
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+
+attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+strict-align" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!17, !18, !19, !20}
+!llvm.ident = !{!21}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "test.c", directory: "/home/weimingz/llvm-build/release/community-tip")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !7, !10}
+!7 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !8)
+!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 32, align: 32)
+!9 = !DIBasicType(name: "float", size: 32, align: 32, encoding: DW_ATE_float)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !13, !14, !15}
+!12 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 1, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 2, scope: !4, file: !1, line: 1, type: !7)
+!14 = !DILocalVariable(name: "n", arg: 3, scope: !4, file: !1, line: 1, type: !10)
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 2, type: !10)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 3)
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{i32 1, !"wchar_size", i32 4}
+!20 = !{i32 1, !"min_enum_size", i32 4}
+!21 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)"}
+!22 = !DIExpression()
+!23 = !DILocation(line: 1, column: 27, scope: !4)
+!24 = !DILocation(line: 1, column: 47, scope: !4)
+!25 = !DILocation(line: 1, column: 54, scope: !4)
+!26 = !DILocation(line: 2, column: 11, scope: !16)
+!27 = !DILocation(line: 2, column: 20, scope: !28)
+!28 = distinct !DILexicalBlock(scope: !16, file: !1, line: 2, column: 3)
+!29 = !DILocation(line: 2, column: 3, scope: !16)
+!30 = !DILocation(line: 3, column: 12, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !28, file: !1, line: 2, column: 31)
+!32 = !DILocation(line: 8, column: 1, scope: !4)
+!33 = !{!34, !34, i64 0}
+!34 = !{!"float", !35, i64 0}
+!35 = !{!"omnipotent char", !36, i64 0}
+!36 = !{!"Simple C/C++ TBAA"}
+!37 = !DILocation(line: 3, column: 5, scope: !31)
+!38 = !DILocation(line: 3, column: 10, scope: !31)
+!39 = !DILocation(line: 4, column: 17, scope: !31)
+!40 = !DILocation(line: 4, column: 14, scope: !31)
+!41 = !DILocation(line: 4, column: 5, scope: !31)
+!42 = !DILocation(line: 4, column: 12, scope: !31)
+!43 = !DILocation(line: 5, column: 17, scope: !31)
+!44 = !DILocation(line: 5, column: 14, scope: !31)
+!45 = !DILocation(line: 5, column: 5, scope: !31)
+!46 = !DILocation(line: 5, column: 12, scope: !31)
+!47 = !DILocation(line: 6, column: 17, scope: !31)
+!48 = !DILocation(line: 6, column: 14, scope: !31)
+!49 = !DILocation(line: 6, column: 5, scope: !31)
+!50 = !DILocation(line: 6, column: 12, scope: !31)
+!51 = !DILocation(line: 2, column: 26, scope: !28)
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index 9bcca15ab551..d90841d16270 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -3,7 +3,7 @@
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 {
 ; CHECK-LABEL: define i32 @tak(
 ; CHECK: entry
 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x
@@ -72,7 +72,7 @@ for.body:
 
 for.inc:
   %dec = add i64 %i.0, -1
-  tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
+  tail call void @llvm.dbg.value(metadata i64 %dec, i64 0, metadata !DILocalVariable(scope: !0), metadata !DIExpression()), !dbg !DILocation(scope: !0)
   br label %for.cond
 
 for.end:
@@ -84,17 +84,17 @@ for.end:
 !llvm.module.flags = !{!20}
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3, function: i32 (i32, i32, i32)* @tak)
+!0 = distinct !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
 !1 = !DIFile(filename: "/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", directory: "/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "x", line: 32, arg: 1, scope: !0, file: !1, type: !5)
 !7 = !DILocation(line: 32, column: 13, scope: !0)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!8 = !DILocalVariable(name: "y", line: 32, arg: 2, scope: !0, file: !1, type: !5)
 !9 = !DILocation(line: 32, column: 20, scope: !0)
-!10 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 32, arg: 0, scope: !0, file: !1, type: !5)
+!10 = !DILocalVariable(name: "z", line: 32, arg: 3, scope: !0, file: !1, type: !5)
 !11 = !DILocation(line: 32, column: 27, scope: !0)
 !12 = !DILocation(line: 33, column: 3, scope: !13)
 !13 = distinct !DILexicalBlock(line: 32, column: 30, file: !18, scope: !0)
diff --git a/test/Transforms/LoopSimplify/dbg-loc.ll b/test/Transforms/LoopSimplify/dbg-loc.ll
index 073319bdac3c..b0e14bbcfd7f 100644
--- a/test/Transforms/LoopSimplify/dbg-loc.ll
+++ b/test/Transforms/LoopSimplify/dbg-loc.ll
@@ -16,7 +16,7 @@ declare void @f3()
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label %for.end, !dbg [[LOOPEXIT_LOC:![0-9]+]]
 
-define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 {
+define linkonce_odr hidden void @foo(%"Length"* %begin, %"Length"* %end) nounwind ssp uwtable align 2 !dbg !6 {
 entry:
   %cmp.4 = icmp eq %"Length"* %begin, %end, !dbg !7
   br i1 %cmp.4, label %for.end, label %for.body, !dbg !8
@@ -80,7 +80,7 @@ eh.resume:                                        ; preds = %catch
 !3 = !{}
 !4 = !DISubroutineType(types: !3)
 !5 = !DIFile(filename: "Vector.h", directory: "/tmp")
-!6 = !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, function: void (%"Length"*, %"Length"*)* @foo, variables: !3)
+!6 = distinct !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
 !7 = !DILocation(line: 73, column: 38, scope: !6)
 !8 = !DILocation(line: 73, column: 13, scope: !6)
 !9 = !DILocation(line: 73, column: 27, scope: !6)
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index 92fbdca8a6d2..6f3db8fb14fc 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -30,7 +30,7 @@ BE2:    ; preds = %n br label %Loop
 !2 = !{}
 !3 = !DISubroutineType(types: !2)
 !4 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !6 = !DILocation(line: 100, column: 1, scope: !5)
 !7 = !DILocation(line: 101, column: 1, scope: !5)
 !8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
new file mode 100644
index 000000000000..bf61112a3c3e
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-addressing-mode-loops.ll
@@ -0,0 +1,156 @@
+; RUN: opt -S -mtriple=amdgcn-- -mcpu=bonaire -loop-reduce < %s | FileCheck -check-prefix=OPT %s
+
+; Test that loops with different maximum offsets for different address
+; spaces are correctly handled.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
+; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
+define void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+  %tmp = icmp sgt i32 %n, 0
+  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %bb
+  br label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
+  ret void
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %tmp1 = add nuw nsw i64 %indvars.iv, 4095
+  %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+  %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+  %tmp7 = add nsw i32 %tmp6, %tmp4
+  store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4096
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
+define void @test_global_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+  %tmp = icmp sgt i32 %n, 0
+  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %bb
+  br label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
+  ret void
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %tmp1 = add nuw nsw i64 %indvars.iv, 4096
+  %tmp2 = getelementptr inbounds i8, i8 addrspace(1)* %arg1, i64 %tmp1
+  %tmp3 = load i8, i8 addrspace(1)* %tmp2, align 1
+  %tmp4 = sext i8 %tmp3 to i32
+  %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+  %tmp6 = load i32, i32 addrspace(1)* %tmp5, align 4
+  %tmp7 = add nsw i32 %tmp6, %tmp4
+  store i32 %tmp7, i32 addrspace(1)* %tmp5, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
+; OPT: {{^}}.lr.ph
+; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
+; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
+define void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+  %tmp = icmp sgt i32 %n, 0
+  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %bb
+  br label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
+  ret void
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %tmp1 = add nuw nsw i64 %indvars.iv, 65535
+  %tmp2 = trunc i64 %tmp1 to i32
+  %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+  %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+  %tmp5 = sext i8 %tmp4 to i32
+  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+  %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp7, %tmp5
+  store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_p1_i32(
+; OPT: {{^}}.lr.ph.preheader:
+; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65536
+; OPT: br label %.lr.ph
+
+; OPT: {{^}}.lr.ph:
+; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
+; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
+define void @test_local_addressing_loop_uniform_index_max_offset_p1_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
+bb:
+  %tmp = icmp sgt i32 %n, 0
+  br i1 %tmp, label %.lr.ph.preheader, label %._crit_edge
+
+.lr.ph.preheader:                                 ; preds = %bb
+  br label %.lr.ph
+
+._crit_edge.loopexit:                             ; preds = %.lr.ph
+  br label %._crit_edge
+
+._crit_edge:                                      ; preds = %._crit_edge.loopexit, %bb
+  ret void
+
+.lr.ph:                                           ; preds = %.lr.ph, %.lr.ph.preheader
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
+  %tmp1 = add nuw nsw i64 %indvars.iv, 65536
+  %tmp2 = trunc i64 %tmp1 to i32
+  %tmp3 = getelementptr inbounds i8, i8 addrspace(3)* %arg1, i32 %tmp2
+  %tmp4 = load i8, i8 addrspace(3)* %tmp3, align 1
+  %tmp5 = sext i8 %tmp4 to i32
+  %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg0, i64 %indvars.iv
+  %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+  %tmp8 = add nsw i32 %tmp7, %tmp5
+  store i32 %tmp8, i32 addrspace(1)* %tmp6, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %._crit_edge.loopexit, label %.lr.ph
+}
+
+attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="hawaii" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
new file mode 100644
index 000000000000..bd80302a68b8
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
@@ -0,0 +1,113 @@
+; RUN: llc -march=amdgcn -mcpu=bonaire -print-lsr-output < %s 2>&1 | FileCheck %s
+
+; Test various conditions where OptimizeLoopTermCond doesn't look at a
+; memory instruction use and fails to find the address space.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; CHECK-LABEL: @local_cmp_user(
+; CHECK: bb11:
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
+
+; CHECK: bb:
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2
+; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef
+define void @local_cmp_user() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+  %ii = shl i32 %i, 1
+  %c0 = icmp eq i32 %i, undef
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
+  %p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
+  %c1 = icmp ult i8 addrspace(3)* %p, undef
+  %i.next = add i32 %i, 1
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
+
+; CHECK-LABEL: @global_cmp_user(
+; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2
+; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2
+define void @global_cmp_user() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
+  %ii = shl i64 %i, 1
+  %c0 = icmp eq i64 %i, undef
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+  %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
+  %c1 = icmp ult i8 addrspace(1)* %p, undef
+  %i.next = add i64 %i, 1
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
+
+; CHECK-LABEL: @global_gep_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_gep_user() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+  %ii = shl i32 %i, 1
+  %c0 = icmp eq i32 %i, undef
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+  %p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
+  %c1 = icmp ult i8 addrspace(1)* %p, undef
+  %i.next = add i32 %i, 1
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
+
+; CHECK-LABEL: @global_sext_scale_user(
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
+define void @global_sext_scale_user() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
+  %ii = shl i32 %i, 1
+  %ii.ext = sext i32 %ii to i64
+  %c0 = icmp eq i32 %i, undef
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
+  %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+  %c1 = icmp ult i8 addrspace(1)* %p, undef
+  %i.next = add i32 %i, 1
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
diff --git a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
index 2ad6c2ea52da..788842101080 100644
--- a/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
+++ b/test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll
@@ -239,33 +239,33 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
   %counter.04 = phi i32 [ 0, %.lr.ph ], [ %44, %11 ]
   %result.03 = phi <16 x i8> [ zeroinitializer, %.lr.ph ], [ %41, %11 ]
   %.012 = phi <16 x i8>* [ %data, %.lr.ph ], [ %43, %11 ]
-  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %.05, i32 1) nounwind
+  %12 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %.05, i32 1) nounwind
   %13 = getelementptr inbounds i8, i8* %.05, i32 %ref_stride
-  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %13, i32 1) nounwind
+  %14 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %13, i32 1) nounwind
   %15 = shufflevector <1 x i64> %12, <1 x i64> %14, <2 x i32> <i32 0, i32 1>
   %16 = bitcast <2 x i64> %15 to <16 x i8>
   %17 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 1
   store <16 x i8> %16, <16 x i8>* %.012, align 4
   %18 = getelementptr inbounds i8, i8* %.05, i32 %2
-  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %18, i32 1) nounwind
+  %19 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %18, i32 1) nounwind
   %20 = getelementptr inbounds i8, i8* %.05, i32 %3
-  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %20, i32 1) nounwind
+  %21 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %20, i32 1) nounwind
   %22 = shufflevector <1 x i64> %19, <1 x i64> %21, <2 x i32> <i32 0, i32 1>
   %23 = bitcast <2 x i64> %22 to <16 x i8>
   %24 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 2
   store <16 x i8> %23, <16 x i8>* %17, align 4
   %25 = getelementptr inbounds i8, i8* %.05, i32 %4
-  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %25, i32 1) nounwind
+  %26 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %25, i32 1) nounwind
   %27 = getelementptr inbounds i8, i8* %.05, i32 %5
-  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %27, i32 1) nounwind
+  %28 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %27, i32 1) nounwind
   %29 = shufflevector <1 x i64> %26, <1 x i64> %28, <2 x i32> <i32 0, i32 1>
   %30 = bitcast <2 x i64> %29 to <16 x i8>
   %31 = getelementptr inbounds <16 x i8>, <16 x i8>* %.012, i32 3
   store <16 x i8> %30, <16 x i8>* %24, align 4
   %32 = getelementptr inbounds i8, i8* %.05, i32 %6
-  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %32, i32 1) nounwind
+  %33 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %32, i32 1) nounwind
   %34 = getelementptr inbounds i8, i8* %.05, i32 %7
-  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %34, i32 1) nounwind
+  %35 = tail call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %34, i32 1) nounwind
   %36 = shufflevector <1 x i64> %33, <1 x i64> %35, <2 x i32> <i32 0, i32 1>
   %37 = bitcast <2 x i64> %36 to <16 x i8>
   store <16 x i8> %37, <16 x i8>* %31, align 4
@@ -290,7 +290,7 @@ define hidden void @testNeon(i8* %ref_data, i32 %ref_stride, i32 %limit, <16 x i
   ret void
 }
 
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32) nounwind readonly
 
 ; Handle chains in which the same offset is used for both loads and
 ; stores to the same array.
@@ -328,32 +328,32 @@ for.body:                                         ; preds = %for.body, %entry
   %i.0110 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
   %src.addr = phi i8* [ %src, %entry ], [ %add.ptr45, %for.body ]
   %add.ptr = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg
-  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr, i32 1)
+  %vld1 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr, i32 1)
   %add.ptr3 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg2
-  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr3, i32 1)
+  %vld2 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr3, i32 1)
   %add.ptr7 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg6
-  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr7, i32 1)
+  %vld3 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr7, i32 1)
   %add.ptr11 = getelementptr inbounds i8, i8* %src.addr, i32 %idx.neg10
-  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr11, i32 1)
-  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %src.addr, i32 1)
+  %vld4 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr11, i32 1)
+  %vld5 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %src.addr, i32 1)
   %add.ptr17 = getelementptr inbounds i8, i8* %src.addr, i32 %stride
-  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr17, i32 1)
+  %vld6 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr17, i32 1)
   %add.ptr20 = getelementptr inbounds i8, i8* %src.addr, i32 %mul5
-  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr20, i32 1)
+  %vld7 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr20, i32 1)
   %add.ptr23 = getelementptr inbounds i8, i8* %src.addr, i32 %mul1
-  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %add.ptr23, i32 1)
+  %vld8 = tail call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %add.ptr23, i32 1)
   %vadd1 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld1, <8 x i8> %vld2) nounwind
   %vadd2 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld2, <8 x i8> %vld3) nounwind
   %vadd3 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld3, <8 x i8> %vld4) nounwind
   %vadd4 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld4, <8 x i8> %vld5) nounwind
   %vadd5 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld5, <8 x i8> %vld6) nounwind
   %vadd6 = tail call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %vld6, <8 x i8> %vld7) nounwind
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
-  tail call void @llvm.arm.neon.vst1.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr3, <8 x i8> %vadd1, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr7, <8 x i8> %vadd2, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr11, <8 x i8> %vadd3, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %src.addr, <8 x i8> %vadd4, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr17, <8 x i8> %vadd5, i32 1)
+  tail call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %add.ptr20, <8 x i8> %vadd6, i32 1)
   %inc = add nsw i32 %i.0110, 1
   %add.ptr45 = getelementptr inbounds i8, i8* %src.addr, i32 8
   %exitcond = icmp eq i32 %inc, 4
@@ -363,8 +363,8 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32) nounwind readonly
 
-declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.p0i8.v8i8(i8*, <8 x i8>, i32) nounwind
 
 declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
new file mode 100644
index 000000000000..2cb98eb371b2
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'NVPTX' in config.root.targets:
+    config.unsupported = True
diff --git a/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
new file mode 100644
index 000000000000..a16065b4dfbd
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/NVPTX/trunc.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; This confirms that NVPTXTTI considers a 64-to-32 integer trunc free. If such
+; truncs were not considered free, LSR would promote (int)i as a separate
+; induction variable in the following example.
+;
+;   for (long i = begin; i != end; i += stride)
+;     use((int)i);
+;
+; That would be worthless, because "i" is simulated by two 32-bit registers and
+; truncating it to 32-bit is as simple as directly using the register that
+; contains the low bits.
+define void @trunc_is_free(i64 %begin, i64 %stride, i64 %end) {
+; CHECK-LABEL: @trunc_is_free(
+entry:
+  %cmp.4 = icmp eq i64 %begin, %end
+  br i1 %cmp.4, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+; CHECK: for.body:
+  %i.05 = phi i64 [ %add, %for.body ], [ %begin, %for.body.preheader ]
+  %conv = trunc i64 %i.05 to i32
+; CHECK: trunc i64 %{{[^ ]+}} to i32
+  tail call void @_Z3usei(i32 %conv) #2
+  %add = add nsw i64 %i.05, %stride
+  %cmp = icmp eq i64 %add, %end
+  br i1 %cmp, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+declare void @_Z3usei(i32)
+
+!nvvm.annotations = !{!0}
+!0 = !{void (i64, i64, i64)* @trunc_is_free, !"kernel", i32 1}
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
index 24be0dc42d6d..7925bf01020e 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-stress-X86.ll
@@ -23,7 +23,7 @@
 ; X32: add
 ; X32: add
 ; X32: add
-; X32: leal
+; X32: add
 ; X32: %for.body.3
 define void @sharedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c, i32 %s, i32 %len) nounwind ssp {
 entry:
diff --git a/test/Transforms/LoopStrengthReduce/funclet.ll b/test/Transforms/LoopStrengthReduce/funclet.ll
new file mode 100644
index 000000000000..5d20646141c4
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/funclet.ll
@@ -0,0 +1,216 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare i32 @_except_handler3(...)
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @external(i32*)
+declare void @reserve()
+
+define void @f() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:                                              ; preds = %throw
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %unreachable] unwind label %blah2
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blah2:
+  %cleanuppadi4.i.i.i = cleanuppad within none []
+  br label %loop_body
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah2 ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out:                                       ; preds = %iter, %loop_body
+  cleanupret from %cleanuppadi4.i.i.i unwind to caller
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @g() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %unreachable, label %blah] unwind to caller
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blah:
+  %catchpad = catchpad within %cs []
+  br label %loop_body
+
+unwind_out:
+  catchret from %catchpad to label %leave
+
+leave:
+  ret void
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blah ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @g(
+; CHECK: blah:
+; CHECK-NEXT: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+
+define void @h() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %pad
+
+pad:
+  %cs = catchswitch within none [label %unreachable, label %blug] unwind to caller
+
+unreachable:
+  catchpad within %cs []
+  unreachable
+
+blug:
+  %phi2 = phi i8* [ %tmp96, %pad ]
+  %catchpad = catchpad within %cs []
+  br label %loop_body
+
+unwind_out:
+  catchret from %catchpad to label %leave
+
+leave:
+  ret void
+
+loop_body:                                        ; preds = %iter, %pad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %blug ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+}
+
+; CHECK-LABEL: define void @h(
+; CHECK: blug:
+; CHECK: catchpad within %cs []
+; CHECK-NEXT: ptrtoint i8* %phi2 to i32
+
+define void @i() personality i32 (...)* @_except_handler3 {
+entry:
+  br label %throw
+
+throw:                                            ; preds = %throw, %entry
+  %tmp96 = getelementptr inbounds i8, i8* undef, i32 1
+  invoke void @reserve()
+          to label %throw unwind label %catchpad
+
+catchpad:                                              ; preds = %throw
+  %phi2 = phi i8* [ %tmp96, %throw ]
+  %cs = catchswitch within none [label %cp_body] unwind label %cleanuppad
+
+cp_body:
+  catchpad within %cs []
+  br label %loop_head
+
+cleanuppad:
+  cleanuppad within none []
+  br label %loop_head
+
+loop_head:
+  br label %loop_body
+
+loop_body:                                        ; preds = %iter, %catchpad
+  %tmp99 = phi i8* [ %tmp101, %iter ], [ %phi2, %loop_head ]
+  %tmp100 = icmp eq i8* %tmp99, undef
+  br i1 %tmp100, label %unwind_out, label %iter
+
+iter:                                             ; preds = %loop_body
+  %tmp101 = getelementptr inbounds i8, i8* %tmp99, i32 1
+  br i1 undef, label %unwind_out, label %loop_body
+
+unwind_out:                                       ; preds = %iter, %loop_body
+  unreachable
+}
+
+; CHECK-LABEL: define void @i(
+; CHECK: ptrtoint i8* %phi2 to i32
+
+define void @test1(i32* %b, i32* %c) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+  invoke void @external(i32* %d.0)
+          to label %for.inc unwind label %catch.dispatch
+
+for.inc:                                          ; preds = %for.cond
+  %incdec.ptr = getelementptr inbounds i32, i32* %d.0, i32 1
+  br label %for.cond
+
+catch.dispatch:                                   ; preds = %for.cond
+  %cs = catchswitch within none [label %catch] unwind label %catch.dispatch.2
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  invoke void @external(i32* %c)
+          to label %try.cont.7 unwind label %catch.dispatch.2
+
+catch.dispatch.2:                                 ; preds = %try.cont, %catchendblock
+  %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
+  %cs2 = catchswitch within none [label %catch.4] unwind to caller
+
+catch.4:                                          ; preds = %catch.dispatch.2
+  catchpad within %cs2 [i8* null, i32 64, i8* null]
+  unreachable
+
+try.cont.7:                                       ; preds = %try.cont
+  ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: for.cond:
+; CHECK:   %d.0 = phi i32* [ %b, %entry ], [ %incdec.ptr, %for.inc ]
+
+; CHECK: catch.dispatch.2:
+; CHECK: %e.0 = phi i32* [ %c, %try.cont ], [ %b, %catch.dispatch ]
diff --git a/test/Transforms/LoopStrengthReduce/pr12018.ll b/test/Transforms/LoopStrengthReduce/pr12018.ll
index b15961a77904..bb5d1654fada 100644
--- a/test/Transforms/LoopStrengthReduce/pr12018.ll
+++ b/test/Transforms/LoopStrengthReduce/pr12018.ll
@@ -16,7 +16,7 @@ for.body:                                         ; preds = %_ZN8nsTArray9Elemen
   %tmp = bitcast %struct.nsTArrayHeader* %add.ptr.i to %struct.nsTArray*
   %arrayidx = getelementptr inbounds %struct.nsTArray, %struct.nsTArray* %tmp, i32 %i.06
   %add = add nsw i32 %i.06, 1
-  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !DISubprogram())
+  call void @llvm.dbg.value(metadata %struct.nsTArray* %aValues, i64 0, metadata !0, metadata !DIExpression()) nounwind, !dbg !DILocation(scope: !1)
   br label %_ZN8nsTArray9ElementAtEi.exit
 
 _ZN8nsTArray9ElementAtEi.exit:                    ; preds = %for.body
@@ -35,4 +35,5 @@ declare %struct.nsTArrayHeader* @_ZN8nsTArray4Hdr2Ev()
 
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, scope: !DISubprogram())
+!0 = !DILocalVariable(scope: !1)
+!1 = distinct !DISubprogram()
diff --git a/test/Transforms/LoopStrengthReduce/pr25541.ll b/test/Transforms/LoopStrengthReduce/pr25541.ll
new file mode 100644
index 000000000000..011998b90893
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr25541.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.inc.i, %entry
+  %_First.addr.0.i = phi i32* [ null, %entry ], [ %incdec.ptr.i, %for.inc.i ]
+  invoke void @g()
+          to label %for.inc.i unwind label %catch.dispatch.i
+
+catch.dispatch.i:                                 ; preds = %for.cond.i
+  %cs = catchswitch within none [label %for.cond.1.preheader.i] unwind to caller
+
+for.cond.1.preheader.i:                           ; preds = %catch.dispatch.i
+  %0 = catchpad within %cs [i8* null, i32 64, i8* null]
+  %cmp.i = icmp eq i32* %_First.addr.0.i, null
+  br label %for.cond.1.i
+
+for.cond.1.i:                                     ; preds = %for.body.i, %for.cond.1.preheader.i
+  br i1 %cmp.i, label %for.end.i, label %for.body.i
+
+for.body.i:                                       ; preds = %for.cond.1.i
+  call void @g()
+  br label %for.cond.1.i
+
+for.inc.i:                                        ; preds = %for.cond.i
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %_First.addr.0.i, i64 1
+  br label %for.cond.i
+
+for.end.i:                                        ; preds = %for.cond.1.i
+  catchret from %0 to label %leave
+
+leave:                                            ; preds = %for.end.i
+  ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: %[[PHI:.*]]  = phi i64 [ %[[IV_NEXT:.*]], {{.*}} ], [ 0, {{.*}} ]
+; CHECK: %[[ITOP:.*]] = inttoptr i64 %[[PHI]] to i32*
+; CHECK: %[[CMP:.*]]  = icmp eq i32* %[[ITOP]], null
+; CHECK: %[[IV_NEXT]] = add i64 %[[PHI]], -4
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index 483becc0e7b8..c6d6690e4302 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -28,7 +28,7 @@ exit:
 ; sure they aren't marked as post-inc users.
 ;
 ; CHECK-LABEL: IV Users for loop %test2.loop
-; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us)),+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
+; CHECK: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test2.loop> in %f = ashr i32 %sext.us, 24
 define i32 @test2() {
 entry:
   br label %test2.loop
diff --git a/test/Transforms/LoopStrengthReduce/sext-ind-var.ll b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
new file mode 100644
index 000000000000..3cf8f536fa71
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/sext-ind-var.ll
@@ -0,0 +1,140 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-unknown-unknown"
+
+; LSR used not to be able to generate a float* induction variable in
+; these cases due to scalar evolution not propagating nsw from an
+; instruction to the SCEV, preventing distributing sext into the
+; corresponding addrec.
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i + offset];
+;
+define float @testadd(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testadd
+; CHECK: sext i32 %offset to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = add nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i - offset];
+;
+define float @testsub(float* %input, i32 %offset, i32 %numIterations) {
+; CHECK-LABEL: @testsub
+; CHECK: sub i32 0, %offset
+; CHECK: sext i32
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = sub nuw nsw i32 %i, %offset
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[i * stride];
+;
+define float @testmul(float* %input, i32 %stride, i32 %numIterations) {
+; CHECK-LABEL: @testmul
+; CHECK: sext i32 %stride to i64
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = mul nuw nsw i32 %i, %stride
+  %index64 = sext i32 %index32 to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
+
+; Test this pattern:
+;
+;   for (int i = 0; i < numIterations; ++i)
+;     sum += ptr[3 * (i << 7)];
+;
+; The multiplication by 3 is to make the address calculation expensive
+; enough to force the introduction of a pointer induction variable.
+define float @testshl(float* %input, i32 %numIterations) {
+; CHECK-LABEL: @testshl
+; CHECK: loop:
+; CHECK-DAG: phi float*
+; CHECK-DAG: phi i32
+; CHECK-NOT: sext
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %nexti, %loop ], [ 0, %entry ]
+  %sum = phi float [ %nextsum, %loop ], [ 0.000000e+00, %entry ]
+  %index32 = shl nuw nsw i32 %i, 7
+  %index32mul = mul nuw nsw i32 %index32, 3
+  %index64 = sext i32 %index32mul to i64
+  %ptr = getelementptr inbounds float, float* %input, i64 %index64
+  %addend = load float, float* %ptr, align 4
+  %nextsum = fadd float %sum, %addend
+  %nexti = add nuw nsw i32 %i, 1
+  %exitcond = icmp eq i32 %nexti, %numIterations
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  ret float %nextsum
+}
diff --git a/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
new file mode 100644
index 000000000000..3cbb70274da5
--- /dev/null
+++ b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
@@ -0,0 +1,33 @@
+; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test_unroll_convergent_barrier(
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK-NOT: br
+define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
+  %arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
+  %load = load i32, i32 addrspace(1)* %arrayidx.in
+  call void @llvm.AMDGPU.barrier.global() #1
+  %add = add i32 %load, %sum.02
+  store i32 %add, i32 addrspace(1)* %arrayidx.out
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare void @llvm.AMDGPU.barrier.global() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind convergent }
diff --git a/test/Transforms/LoopUnroll/X86/partial.ll b/test/Transforms/LoopUnroll/X86/partial.ll
index 4566f792deb4..104a38779e52 100644
--- a/test/Transforms/LoopUnroll/X86/partial.ll
+++ b/test/Transforms/LoopUnroll/X86/partial.ll
@@ -86,17 +86,20 @@ for.body:                                         ; preds = %entry, %for.body
   %reduction.026 = phi i16 [ %add14, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i16, i16* %arr, i64 %indvars.iv
   %0 = load i16, i16* %arrayidx, align 2
-  %add = add i16 %0, %reduction.026
+  %mul = shl i16 %0, 1
+  %add = add i16 %mul, %reduction.026
   %sext = mul i64 %indvars.iv, 12884901888
   %idxprom3 = ashr exact i64 %sext, 32
   %arrayidx4 = getelementptr inbounds i16, i16* %arr, i64 %idxprom3
   %1 = load i16, i16* %arrayidx4, align 2
-  %add7 = add i16 %add, %1
+  %mul2 = shl i16 %1, 1
+  %add7 = add i16 %add, %mul2
   %sext28 = mul i64 %indvars.iv, 21474836480
   %idxprom10 = ashr exact i64 %sext28, 32
   %arrayidx11 = getelementptr inbounds i16, i16* %arr, i64 %idxprom10
   %2 = load i16, i16* %arrayidx11, align 2
-  %add14 = add i16 %add7, %2
+  %mul3 = shl i16 %2, 1
+  %add14 = add i16 %add7, %mul3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
   %exitcond = icmp eq i32 %lftr.wideiv, %n
diff --git a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll b/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
deleted file mode 100644
index ac814526647e..000000000000
--- a/test/Transforms/LoopUnroll/full-unroll-bad-geps.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; Check that we don't crash on corner cases.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-define void @foo1() {
-entry:
-  br label %for.body
-
-for.body:
-  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %idx = zext i32 undef to i64
-  %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
-  %inc = add nuw nsw i64 %phi, 1
-  %cmp = icmp ult i64 %inc, 999
-  br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
-  ret void
-}
-
-define void @foo2() {
-entry:
-  br label %for.body
-
-for.body:
-  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
-  %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-  %inc = add nuw nsw i64 %phi, 1
-  %cmp = icmp ult i64 %inc, 999
-  br i1 %cmp, label %for.body, label %for.exit
-
-for.exit:
-  ret void
-}
diff --git a/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
new file mode 100644
index 000000000000..e932851042ad
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -0,0 +1,102 @@
+; Check that we don't crash on corner cases.
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+define void @foo1() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %idx = zext i32 undef to i64
+  %add.ptr = getelementptr inbounds i64, i64* null, i64 %idx
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret void
+}
+
+define void @foo2() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %x = getelementptr i32, <4 x i32*> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret void
+}
+
+define void @cmp_undef() {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %x1, undef
+  br i1 %cmp, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret void
+}
+
+define void @switch() {
+entry:
+  br label %for.body
+
+for.body:
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  switch i32 %x1, label %l1 [
+  ]
+
+l1:
+  %x2 = add i32 %x1, 2
+  br label %for.inc
+
+for.inc:
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define <4 x i32> @vec_load() {
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %vec_phi = phi <4 x i32> [ <i32 0, i32 0, i32 0, i32 0>, %entry ], [ %r, %for.body ]
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %phi
+  %bc = bitcast i32* %arrayidx to <4 x i32>*
+  %x = load <4 x i32>, < 4 x i32>* %bc, align 4
+  %r = add <4 x i32> %x, %vec_phi
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 999
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret <4 x i32> %r
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
new file mode 100644
index 000000000000..5df48e8c380b
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10  -unroll-percent-dynamic-cost-saved-threshold=50 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+@weak_constant = weak unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; Though @unknown_global is initialized with constant values, we can't consider
+; it as a constant, so we shouldn't unroll the loop.
+; CHECK-LABEL: @foo
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+define i32 @foo(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @unknown_global, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 9
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+; Similarly, we can't consider 'weak' symbols as a known constant value, so we
+; shouldn't unroll the loop.
+; CHECK-LABEL: @foo2
+; CHECK: %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+define i32 @foo2(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @weak_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 9
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
new file mode 100644
index 000000000000..cd8cfd75424f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16
+
+; We should be able to propagate constant data through different types of
+; casts. For example, in this test we have a load, which becomes constant after
+; unrolling, which then is truncated to i8. Obviously, truncated value is also a
+; constant, which can be used in the further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is 0 in many cases, and which, in its turn, helps to simplify
+; following multiplication and addition. In total, unrolling should help to
+; optimize  ~60% of all instructions in this case.
+;
+; CHECK-LABEL: @const_load_trunc
+; CHECK-NOT: br i1
+; CHECK: ret i8 %
+define i8 @const_load_trunc(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i8 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %x = trunc i32 %src_element to i8
+  %y = trunc i32 %const_array_element to i8
+  %mul = mul nsw i8 %x, %y
+  %add = add nsw i8 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 10
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i8 [ %r, %loop ]
+  ret i8 %r.lcssa
+}
+
+; The same test as before, but with ZEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_zext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_zext(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i64 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %x = zext i32 %src_element to i64
+  %y = zext i32 %const_array_element to i64
+  %mul = mul nsw i64 %x, %y
+  %add = add nsw i64 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 10
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i64 [ %r, %loop ]
+  ret i64 %r.lcssa
+}
+
+; The same test as the first one, but with SEXT instead of TRUNC.
+; CHECK-LABEL: @const_load_sext
+; CHECK-NOT: br i1
+; CHECK: ret i64 %
+define i64 @const_load_sext(i32* noalias nocapture readonly %src) {
+entry:
+  br label %loop
+
+loop:                                                ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i64 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %x = sext i32 %src_element to i64
+  %y = sext i32 %const_array_element to i64
+  %mul = mul nsw i64 %x, %y
+  %add = add nsw i64 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, 10
+  br i1 %exitcond86.i, label %loop.end, label %loop
+
+loop.end:                                            ; preds = %loop
+  %r.lcssa = phi i64 [ %r, %loop ]
+  ret i64 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
new file mode 100644
index 000000000000..f7758fa22008
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -0,0 +1,207 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
+
+; We should be able to propagate constant data through comparisons.
+; For example, in this test we have a load, which becomes constant after
+; unrolling, making comparison with 0 also known to be 0 (false) - and that
+; will trigger further simplifications.
+;
+; We expect this loop to be unrolled, because in this case load would become
+; constant, which is always 1, and which, in its turn, helps to simplify
+; following comparison, zero-extension, and addition. In total, unrolling should help to
+; optimize more than 50% of all instructions in this case.
+;
+; CHECK-LABEL: @const_compare
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @const_compare(i32* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %x1, 0
+  %cast = zext i1 %cmp to i32
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  %r.1 = add i32 %r.0, %cast
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret i32 %r.1
+}
+
+; If we can figure out result of comparison on each iteration, we can resolve
+; the depending branch. That means, that the unrolled version of the loop would
+; have less code, because we don't need not-taken basic blocks there.
+; This test checks that this is taken into consideration.
+; We expect this loop to be unrolled, because the most complicated part of its
+; body (if.then block) is never actually executed.
+; CHECK-LABEL: @branch_folded
+; CHECK-NOT: br i1 %
+; CHECK: ret i32
+define i32 @branch_folded(i32* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
+  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+  %x1 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %x1, 0
+  %iv.1 = add nuw nsw i64 %iv.0, 1
+  br i1 %cmp, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+  %x2 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %x2, %r.0
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
+  %exitcond = icmp eq i64 %iv.1, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret i32 %r.1
+}
+
+; This test is similar to the previous one, but in this we use IV in comparison
+; (not a loaded value as we did there).
+; CHECK-LABEL: @branch_iv
+; CHECK-NOT: br i1 %
+; CHECK: ret i64
+define i64 @branch_iv(i64* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+  %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
+  %cmp3 = icmp eq i64 %indvars.iv, 5
+  %tmp3 = add nuw nsw i64 %indvars.iv, 1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
+  %tmp1 = load i64, i64* %arrayidx2, align 4
+  %add = add nsw i64 %tmp1, %r.030
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
+  %exitcond = icmp eq i64 %tmp3, 20
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret i64 %r.1
+}
+
+; Induction variables are often casted to another type, and that shouldn't
+; prevent us from folding branches. Tthis test specifically checks if we can
+; handle this. Other than thatm it's similar to the previous test.
+; CHECK-LABEL: @branch_iv_trunc
+; CHECK-NOT:   br i1 %
+; CHECK:   ret i32
+define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
+  %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
+  %tmp2 = trunc i64 %indvars.iv to i32
+  %cmp3 = icmp eq i32 %tmp2, 5
+  %tmp3 = add nuw nsw i64 %indvars.iv, 1
+  br i1 %cmp3, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
+  %tmp1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %tmp1, %r.030
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.then, %for.body
+  %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
+  %exitcond = icmp eq i64 %tmp3, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.inc
+  ret i32 %r.1
+}
+
+; Check that we don't crash when we analyze icmp with pointer-typed IV and a
+; pointer.
+; CHECK-LABEL: @ptr_cmp_crash
+; CHECK:   ret void
+define void @ptr_cmp_crash() {
+entry:
+  br label %while.body
+
+while.body:
+  %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
+  %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
+  %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
+  br i1 %exitcond, label %loop.exit, label %while.body
+
+loop.exit:
+  ret void
+}
+
+; Check that we don't crash when we analyze ptrtoint cast.
+; CHECK-LABEL: @ptrtoint_cast_crash
+; CHECK:   ret void
+define void @ptrtoint_cast_crash(i8 * %a) {
+entry:
+  %limit = getelementptr i8, i8* %a, i64 512
+  br label %loop.body
+
+loop.body:
+  %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+  %cast = ptrtoint i8* %iv.0 to i64
+  %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+  %exitcond = icmp ne i8* %iv.1, %limit
+  br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+  ret void
+}
+
+; Loop unroller should be able to predict that a comparison would become
+; constant if the operands are pointers with the same base and constant
+; offsets.
+; We expect this loop to be unrolled, since most of its instructions would
+; become constant after it.
+; CHECK-LABEL: @ptr_cmp
+; CHECK-NOT:   br i1 %
+; CHECK:   ret i64
+define i64 @ptr_cmp(i8 * %a) {
+entry:
+  %limit = getelementptr i8, i8* %a, i64 40
+  %start.iv2 = getelementptr i8, i8* %a, i64 7
+  br label %loop.body
+
+loop.body:
+  %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
+  %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
+  %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
+  %cast = ptrtoint i8* %iv.0 to i64
+  %cmp = icmp eq i8* %iv2.0, %iv.0
+  %sub = sext i1 %cmp to i64
+  %mul = mul i64 %sub, %cast
+  %r.1 = add i64 %r.0, %mul
+  %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
+  %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
+  %exitcond = icmp ne i8* %iv.1, %limit
+  br i1 %exitcond, label %loop.body, label %loop.exit
+
+loop.exit:
+  ret i64 %r.1
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
new file mode 100644
index 000000000000..dd8582e6877f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define i64 @propagate_loop_phis() {
+; CHECK-LABEL: @propagate_loop_phis(
+; CHECK-NOT: br i1
+; CHECK: ret i64 3
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %x0 = phi i64 [ 0, %entry ], [ %x2, %loop ]
+  %x1 = or i64 %x0, 1
+  %x2 = or i64 %x1, 2
+  %inc = add nuw nsw i64 %iv, 1
+  %cond = icmp sge i64 %inc, 10
+  br i1 %cond, label %loop.end, label %loop
+
+loop.end:
+  %x.lcssa = phi i64 [ %x2, %loop ]
+  ret i64 %x.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/pr18861.ll b/test/Transforms/LoopUnroll/pr18861.ll
index 62f26106afb2..c01eef1d0e5d 100644
--- a/test/Transforms/LoopUnroll/pr18861.ll
+++ b/test/Transforms/LoopUnroll/pr18861.ll
@@ -2,42 +2,85 @@
 
 @b = external global i32, align 4
 
+; Test that LoopUnroll does not break LCSSA form.
+;
+; In this function we have a following CFG:
+;            ( entry )
+;                |
+;                v
+;         ( outer.header ) <--
+;                |             \
+;                v              |
+;     --> ( inner.header )      |
+;   /       /          \        |
+;   \      /            \       |
+;    \    v              v     /
+;  ( inner.latch )   ( outer.latch )
+;         |
+;         v
+;     ( exit )
+;
+; When the inner loop is unrolled, we inner.latch block has only one
+; predecessor and one successor, so it can be merged with exit block.
+; During the merge, however, we remove an LCSSA definition for
+; %storemerge1.lcssa, breaking LCSSA form for the outer loop.
+
 ; Function Attrs: nounwind uwtable
 define void @fn1() #0 {
 entry:
-  br label %for.cond1thread-pre-split
+  br label %outer.header
 
-for.cond1thread-pre-split:                        ; preds = %for.inc8, %entry
-  %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %for.inc8 ]
-  br i1 undef, label %for.inc8, label %for.cond2.preheader.lr.ph
+outer.header:                                     ; preds = %outer.latch, %entry
+  %storemerge1 = phi i32 [ 0, %entry ], [ %inc9, %outer.latch ]
+  br label %inner.header
 
-for.cond2.preheader.lr.ph:                        ; preds = %for.cond1thread-pre-split
-  br label %for.cond2.preheader
-
-for.cond2.preheader:                              ; preds = %for.inc5, %for.cond2.preheader.lr.ph
-  br label %for.cond2
-
-for.cond2:                                        ; preds = %for.body3, %for.cond2.preheader
-  %storemerge = phi i32 [ %add, %for.body3 ], [ 0, %for.cond2.preheader ]
+inner.header:                                     ; preds = %inner.latch, %outer.header
+  %storemerge = phi i32 [ %add, %inner.latch ], [ 0, %outer.header ]
   %cmp = icmp slt i32 %storemerge, 1
-  br i1 %cmp, label %for.body3, label %for.inc5
+  br i1 %cmp, label %inner.latch, label %outer.latch
 
-for.body3:                                        ; preds = %for.cond2
+inner.latch:                                      ; preds = %inner.header
   %tobool4 = icmp eq i32 %storemerge, 0
   %add = add nsw i32 %storemerge, 1
-  br i1 %tobool4, label %for.cond2, label %if.then
+  br i1 %tobool4, label %inner.header, label %exit
 
-if.then:                                          ; preds = %for.body3
-  store i32 %storemerge1, i32* @b, align 4
+exit:                                             ; preds = %inner.latch
+  %storemerge1.lcssa = phi i32 [ %storemerge1, %inner.latch ]
+  store i32 %storemerge1.lcssa, i32* @b, align 4
   ret void
 
-for.inc5:                                         ; preds = %for.cond2
-  br i1 undef, label %for.cond1.for.inc8_crit_edge, label %for.cond2.preheader
-
-for.cond1.for.inc8_crit_edge:                     ; preds = %for.inc5
-  br label %for.inc8
-
-for.inc8:                                         ; preds = %for.cond1.for.inc8_crit_edge, %for.cond1thread-pre-split
+outer.latch:                                      ; preds = %inner.header
   %inc9 = add nsw i32 %storemerge1, 1
-  br label %for.cond1thread-pre-split
+  br label %outer.header
+}
+
+; This case is similar to the previous one, and has the same CFG.
+; The difference is that loop unrolling doesn't remove any LCSSA definition,
+; yet breaks LCSSA form for the outer loop. It happens because before unrolling
+; block inner.latch was inside outer loop (and consequently, didn't require
+; LCSSA definition for %x), but after unrolling it occurs out of the outer
+; loop, so we need to insert an LCSSA definition to keep LCSSA.
+
+; Function Attrs: nounwind uwtable
+define void @fn2() {
+entry:
+  br label %outer.header
+
+outer.header:
+  br label %inner.header
+
+inner.header:
+  %x = load i32, i32* undef, align 4
+  br i1 true, label %outer.latch, label %inner.latch
+
+inner.latch:
+  %inc6 = add nsw i32 %x, 1
+  store i32 %inc6, i32* undef, align 4
+  br i1 false, label %inner.header, label %exit
+
+exit:
+  ret void
+
+outer.latch:
+  br label %outer.header
 }
diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
new file mode 100644
index 000000000000..49498492344a
--- /dev/null
+++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This test shows how unrolling an inner loop could break LCSSA for an outer
+; loop, and there is no cheap way to recover it.
+;
+; In this case the inner loop, L3, is being unrolled. It only runs one
+; iteration, so unrolling basically means replacing
+;   br i1 true, label %exit, label %L3_header
+; with
+;   br label %exit
+;
+; However, this change messes up the loops structure: for instance, block
+; L3_body no longer belongs to L2. It becomes an exit block for L2, so LCSSA
+; phis for definitions in L2 should now be placed there. In particular, we need
+; to insert such a definition for %y1.
+
+; CHECK-LABEL: @foo1
+define void @foo1() {
+entry:
+  br label %L1_header
+
+L1_header:
+  br label %L2_header
+
+L2_header:
+  %y1 = phi i64 [ undef, %L1_header ], [ %x.lcssa, %L2_latch ]
+  br label %L3_header
+
+L3_header:
+  %y2 = phi i64 [ 0, %L3_latch ], [ %y1, %L2_header ]
+  %x = add i64 undef, -1
+  br i1 true, label %L2_latch, label %L3_body
+
+L2_latch:
+  %x.lcssa = phi i64 [ %x, %L3_header ]
+  br label %L2_header
+
+; CHECK:      L3_body:
+; CHECK-NEXT:   %y1.lcssa = phi i64 [ %y1, %L3_header ]
+L3_body:
+  store i64 %y1, i64* undef
+  br i1 false, label %L3_latch, label %L1_latch
+
+L3_latch:
+  br i1 true, label %exit, label %L3_header
+
+L1_latch:
+  %y.lcssa = phi i64 [ %y2, %L3_body ]
+  br label %L1_header
+
+exit:
+  ret void
+}
+
+; Additional tests for some corner cases.
+;
+; CHECK-LABEL: @foo2
+define void @foo2() {
+entry:
+  br label %L1_header
+
+L1_header:
+  br label %L2_header
+
+L2_header:
+  %a = phi i64 [ undef, %L1_header ], [ %dec_us, %L3_header ]
+  br label %L3_header
+
+L3_header:
+  %b = phi i64 [ 0, %L3_latch ], [ %a, %L2_header ]
+  %dec_us = add i64 undef, -1
+  br i1 true, label %L2_header, label %L3_break_to_L1
+
+; CHECK:      L3_break_to_L1:
+; CHECK-NEXT:   %a.lcssa = phi i64 [ %a, %L3_header ]
+L3_break_to_L1:
+  br i1 false, label %L3_latch, label %L1_latch
+
+L1_latch:
+  %b_lcssa = phi i64 [ %b, %L3_break_to_L1 ]
+  br label %L1_header
+
+L3_latch:
+  br i1 true, label %Exit, label %L3_header
+
+Exit:
+  ret void
+}
+
+; CHECK-LABEL: @foo3
+define void @foo3() {
+entry:
+  br label %L1_header
+
+L1_header:
+  %a = phi i8* [ %b, %L1_latch ], [ null, %entry ]
+  br i1 undef, label %L2_header, label %L1_latch
+
+L2_header:
+  br i1 undef, label %L2_latch, label %L1_latch
+
+; CHECK:      L2_latch:
+; CHECK-NEXT:   %a.lcssa = phi i8* [ %a, %L2_header ]
+L2_latch:
+  br i1 true, label %L2_exit, label %L2_header
+
+L1_latch:
+  %b = phi i8* [ undef, %L1_header ], [ null, %L2_header ]
+  br label %L1_header
+
+L2_exit:
+  %a_lcssa1 = phi i8* [ %a, %L2_latch ]
+  br label %Exit
+
+Exit:
+  %a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
+  ret void
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index de61e847a5a7..dcf159a09a1d 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -15,7 +15,7 @@
 ; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
 ; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
 
-define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
+define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
 entry:
   %cmp1 = icmp eq i32 %n, 0, !dbg !7
   br i1 %cmp1, label %for.end, label %for.body, !dbg !7
@@ -44,7 +44,7 @@ for.end:                                          ; preds = %for.body, %entry
 !3 = !{}
 !4 = !DISubroutineType(types: !3)
 !5 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!6 = !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32*, i32)* @test, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
 !7 = !DILocation(line: 100, column: 1, scope: !6)
 !8 = !DILocation(line: 101, column: 1, scope: !6)
 !9 = !DILocation(line: 102, column: 1, scope: !6)
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index 8e0d77513cc1..b915b4fdf489 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -256,3 +256,69 @@ for.end:                                          ; preds = %for.body
   ret void
 }
 !12 = !{!12, !4}
+
+; #pragma clang loop unroll(enable)
+; Loop should be fully unrolled.
+;
+; CHECK-LABEL: @loop64_with_enable(
+; CHECK-NOT: br i1
+define void @loop64_with_enable(i32* nocapture %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
+!13 = !{!13, !14}
+!14 = !{!"llvm.loop.unroll.enable"}
+
+; #pragma clang loop unroll(enable)
+; Loop has a runtime trip count and should be runtime unrolled and duplicated
+; (original and 8x).
+;
+; CHECK-LABEL: @runtime_loop_with_enable(
+; CHECK: for.body.prol:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body:
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK-NOT: store i32
+; CHECK: br i1
+define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
+entry:
+  %cmp3 = icmp sgt i32 %b, 0
+  br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %b
+  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+!15 = !{!15, !14}
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
index abeea209f639..a35596aff11c 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-SimpleSwitch.ll
@@ -34,7 +34,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us1
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us5, %.split.split.us
-; CHECK-NEXT:   %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 2, label %default.us-lcssa.us-lcssa.us [
 ; CHECK-NEXT:     i32 1, label %inc.us4
 ; CHECK-NEXT:     i32 2, label %dec.us3
diff --git a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
index 02552ea5cc40..20f03c987eb7 100644
--- a/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
+++ b/test/Transforms/LoopUnswitch/2011-11-18-TwoSwitches.ll
@@ -65,7 +65,7 @@
 ; CHECK-NEXT:   br label %loop_begin.us1
 
 ; CHECK:      loop_begin.us1:                                   ; preds = %loop_begin.backedge.us6, %.split.split.us
-; CHECK-NEXT:   %var_val.us.2 = load i32, i32* %var
+; CHECK-NEXT:   %var_val.us2 = load i32, i32* %var
 ; CHECK-NEXT:   switch i32 %c, label %second_switch.us3 [
 ; CHECK-NEXT:     i32 1, label %loop_begin.inc_crit_edge.us
 ; CHECK-NEXT:   ]
diff --git a/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
new file mode 100644
index 000000000000..0f74614da98e
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/2015-09-18-Addrspace.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -S | FileCheck %s
+
+; In cases where two address spaces do not have the same size pointer, the
+; input for the addrspacecast should not be used as a substitute for itself
+; when manipulating the pointer.
+
+target datalayout = "e-m:e-p:16:16-p1:32:16-i32:16-i64:16-n8:16"
+
+define void @foo() {
+; CHECK-LABEL: @foo
+entry:
+  %arrayidx.i1 = getelementptr inbounds i16, i16* undef, i16 undef
+  %arrayidx.i = addrspacecast i16* %arrayidx.i1 to i16 addrspace(1)*
+  br i1 undef, label %for.body.i, label %bar.exit
+
+for.body.i:                                       ; preds = %for.body.i, %entry
+; When we call makeLoopInvariant (i.e. trivial LICM) on this load, it 
+; will try to find the base object to prove deferenceability.  If we look
+; through the addrspacecast, we'll fail an assertion about bitwidths matching
+; CHECK-LABEL: for.body.i
+; CHECK:   %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %0 = load i16, i16 addrspace(1)* %arrayidx.i, align 2
+  %cmp1.i = icmp eq i16 %0, 0
+  br i1 %cmp1.i, label %bar.exit, label %for.body.i
+
+bar.exit:                                         ; preds = %for.body.i, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
new file mode 100644
index 000000000000..dd436f474ea0
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/LIV-loop-condtion.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -S 2>&1 | FileCheck %s
+
+; This is to test trivial loop unswitch only happens when trivial condition
+; itself is an LIV loop condition (not partial LIV which could occur in and/or).
+
+define i32 @test(i1 %cond1, i32 %var1) {
+entry:
+  br label %loop_begin
+
+loop_begin:
+  %var3 = phi i32 [%var1, %entry], [%var2, %do_something]
+  %cond2 = icmp eq i32 %var3, 10
+  %cond.and = and i1 %cond1, %cond2
+  
+; %cond.and only has %cond1 as LIV so no unswitch should happen.
+; CHECK: br i1 %cond.and, label %do_something, label %loop_exit
+  br i1 %cond.and, label %do_something, label %loop_exit 
+
+do_something:
+  %var2 = add i32 %var3, 1
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn 
\ No newline at end of file
diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll
index e990144d5ccc..a02a463764dd 100644
--- a/test/Transforms/LoopUnswitch/basictest.ll
+++ b/test/Transforms/LoopUnswitch/basictest.ll
@@ -64,5 +64,44 @@ loop_exit:
 ; CHECK: }
 }
 
+; This simple test would normally unswitch, but should be inhibited by the presence of
+; the convergent call that is not control-dependent on the unswitch condition.
+
+; CHECK-LABEL: @test3(
+define i32 @test3(i32* %var) {
+  %mem = alloca i32
+  store i32 2, i32* %mem
+  %c = load i32, i32* %mem
+
+  br label %loop_begin
+
+loop_begin:
+
+  %var_val = load i32, i32* %var
+
+; CHECK: call void @conv()
+; CHECK-NOT: call void @conv()
+  call void @conv() convergent
+
+  switch i32 %c, label %default [
+      i32 1, label %inc
+      i32 2, label %dec
+  ]
+
+inc:
+  call void @incf() noreturn nounwind
+  br label %loop_begin
+dec:
+  call void @decf() noreturn nounwind
+  br label %loop_begin
+default:
+  br label %loop_exit
+loop_exit:
+  ret i32 0
+; CHECK: }
+}
+
+
 declare void @incf() noreturn
 declare void @decf() noreturn
+declare void @conv() convergent
diff --git a/test/Transforms/LoopUnswitch/cleanuppad.ll b/test/Transforms/LoopUnswitch/cleanuppad.ll
new file mode 100644
index 000000000000..b06ebd7235c8
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cleanuppad.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+target triple = "x86_64-pc-win32"
+
+define void @f(i32 %doit, i1 %x, i1 %y) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %tobool = icmp eq i32 %doit, 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  br i1 %x, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br i1 %tobool, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  br i1 %y, label %for.inc, label %delete.notnull
+
+delete.notnull:                                   ; preds = %if.then
+  invoke void @g()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %delete.notnull
+  br label %for.inc
+
+lpad:                                             ; preds = %delete.notnull
+  %cp = cleanuppad within none []
+  cleanupret from %cp unwind to caller
+
+for.inc:                                          ; preds = %invoke.cont, %if.then, %for.body
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+declare void @g()
+
+declare i32 @__CxxFrameHandler3(...)
+
+; CHECK-LABEL: define void @f(
+; CHECK: cleanuppad within none []
+; CHECK-NOT: cleanuppad
+
+attributes #0 = { ssp uwtable }
diff --git a/test/Transforms/LoopUnswitch/cold-loop.ll b/test/Transforms/LoopUnswitch/cold-loop.ll
new file mode 100644
index 000000000000..1fbc08038bbd
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/cold-loop.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s
+
+;; trivial condition should be unswithed regardless of coldness.
+define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 {
+  br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit.loopexit
+  br i1 %cond2, label %continue, label %loop_exit  ; trivial condition
+
+continue:
+  call void @some_func1() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+;; cold non-trivial condition should not be unswitched.
+define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 {
+  br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+  store i32 1, i32* %var
+; CHECK: br i1 %cond2, label %continue1, label %continue2
+  br i1 %cond2, label %continue1, label %continue2  ; non-trivial condition
+
+continue1:
+  call void @some_func1() noreturn nounwind
+  br label %joint
+
+continue2:
+  call void @some_func2() noreturn nounwind
+  br label %joint
+
+joint:
+;; unswitching will duplicate these calls.
+  call void @some_func3() noreturn nounwind
+  call void @some_func4() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func1() noreturn
+declare void @some_func2() noreturn
+declare void @some_func3() noreturn
+declare void @some_func4() noreturn
+
+!0 = !{!"branch_weights", i32 1, i32 100000000}
+!1 = !{!"function_entry_count", i64 100}
diff --git a/test/Transforms/LoopUnswitch/copy-metadata.ll b/test/Transforms/LoopUnswitch/copy-metadata.ll
new file mode 100644
index 000000000000..2a634c25a23d
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/copy-metadata.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-unswitch -S < %s 2>&1 | FileCheck %s
+
+; This test checks if unswitched condition preserve make.implicit metadata.
+
+define i32 @test(i1 %cond) {
+; CHECK: br i1 %cond, label %..split_crit_edge, label %.loop_exit.split_crit_edge, !make.implicit !0
+  br label %loop_begin
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit, !make.implicit !0
+  br i1 %cond, label %continue, label %loop_exit, !make.implicit !0
+
+continue:
+  call void @some_func()
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func()
+
+!0 = !{}
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index e79d874d9ca6..3d1c895edec9 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -9,23 +9,23 @@
 ; It can trivially unswitch on the false cas of condition %a though.
 
 ; STATS: 2 loop-unswitch - Number of branches unswitched
-; STATS: 1 loop-unswitch - Number of unswitches that are trivial
+; STATS: 2 loop-unswitch - Number of unswitches that are trivial
 
 ; CHECK-LABEL: @func_16(
 ; CHECK-NEXT: entry:
 ; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
 
 ; CHECK: entry.split:
-; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1
+; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
 
-; CHECK: cond.end.us:
-; CHECK-NEXT: br label %cond.end.us
+; CHECK: cond.end:
+; CHECK-NEXT: br label %cond.end
 
 ; CHECK: abort0.split:
 ; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
 ; CHECK-NEXT: unreachable
 
-; CHECK: abort1:
+; CHECK: abort1.split:
 ; CHECK-NEXT: call void @end1() [[NOR_NUW]]
 ; CHECK-NEXT: unreachable
 
diff --git a/test/Transforms/LoopUnswitch/trivial-unswitch.ll b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
new file mode 100644
index 000000000000..db3328278dae
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/trivial-unswitch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-threshold=0 -verify-loop-info -S < %s 2>&1 | FileCheck %s
+
+; This test contains two trivial unswitch condition in one loop. 
+; LoopUnswitch pass should be able to unswitch the second one 
+; after unswitching the first one.
+
+
+; CHECK:  br i1 %cond1, label %..split_crit_edge, label %.loop_exit.split_crit_edge
+
+; CHECK:  ..split_crit_edge:                                ; preds = %0
+; CHECK:    br label %.split
+
+; CHECK:  .split:                                           ; preds = %..split_crit_edge
+; CHECK:    br i1 %cond2, label %.split..split.split_crit_edge, label %.split.loop_exit.split1_crit_edge
+
+; CHECK:  .split..split.split_crit_edge:                    ; preds = %.split
+; CHECK:    br label %.split.split
+
+; CHECK:  .split.split:                                     ; preds = %.split..split.split_crit_edge
+; CHECK:    br label %loop_begin
+
+; CHECK:  loop_begin:                                       ; preds = %do_something, %.split.split
+; CHECK:    br i1 true, label %continue, label %loop_exit
+
+; CHECK:  continue:                                         ; preds = %loop_begin
+; CHECK:    %var_val = load i32, i32* %var
+; CHECK:    br i1 true, label %do_something, label %loop_exit
+
+define i32 @test(i32* %var, i1 %cond1, i1 %cond2) {
+  br label %loop_begin
+
+loop_begin:  
+  br i1 %cond1, label %continue, label %loop_exit	; first trivial condition
+
+continue:
+  %var_val = load i32, i32* %var
+  br i1 %cond2, label %do_something, label %loop_exit	; second trivial condition  
+
+do_something:
+  call void @some_func() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func() noreturn
\ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
index f16ee4171da9..58315a73ec13 100644
--- a/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
+++ b/test/Transforms/LoopVectorize/AArch64/arbitrary-induction-step.ll
@@ -1,5 +1,5 @@
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses=true | FileCheck %s
-; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -enable-interleaved-mem-accesses=true | FileCheck %s --check-prefix=FORCE-VEC
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 | FileCheck %s
+; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
 
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnueabi"
diff --git a/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
new file mode 100644
index 000000000000..65f5c4e6266b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/deterministic-type-shrinkage.ll
@@ -0,0 +1,54 @@
+; RUN: opt -S < %s -loop-vectorize -instcombine 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+;; See https://llvm.org/bugs/show_bug.cgi?id=25490
+;; Due to the data structures used, the LLVM IR was not determinisic.
+;; This test comes from the PR.
+
+;; CHECK-LABEL: @test(
+; CHECK: load <16 x i8>
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: load <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+; CHECK-NEXT: zext <16 x i8>
+define void @test(i32 %n, i8* nocapture %a, i8* nocapture %b, i8* nocapture readonly %c) {
+entry:
+  %cmp.28 = icmp eq i32 %n, 0
+  br i1 %cmp.28, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, i8* %c, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i32
+  %mul = mul nuw nsw i32 %conv3, %conv
+  %shr.26 = lshr i32 %mul, 8
+  %conv4 = trunc i32 %shr.26 to i8
+  store i8 %conv4, i8* %arrayidx2, align 1
+  %arrayidx8 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+  %2 = load i8, i8* %arrayidx8, align 1
+  %conv9 = zext i8 %2 to i32
+  %mul10 = mul nuw nsw i32 %conv9, %conv
+  %shr11.27 = lshr i32 %mul10, 8
+  %conv12 = trunc i32 %shr11.27 to i8
+  store i8 %conv12, i8* %arrayidx8, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
new file mode 100644
index 000000000000..a0e741a3cdbe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnueabi"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+  br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+  %tmp = load i8, i8* %arrayidx0, align 4
+  %tmp1 = or i64 %indvars.iv, 1
+  %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+  %tmp2 = load i8, i8* %arrayidx1, align 4
+  %add = add nsw i8 %tmp, %C
+  %mul = mul nsw i8 %tmp2, %D
+  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+  store i8 %mul, i8* %arrayidx3, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+  %cmp = icmp slt i64 %indvars.iv.next, 1024
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
new file mode 100644
index 000000000000..eee310491805
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -0,0 +1,243 @@
+; RUN: opt -S < %s -basicaa -loop-vectorize -force-vector-interleave=1 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+; CHECK-LABEL: @add_a(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_a(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i8
+  %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv1, i8* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_b(
+; CHECK: load <8 x i16>, <8 x i16>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_b(i16* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp9 = icmp sgt i32 %len, 0
+  br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv8 = zext i16 %0 to i32
+  %add = add nuw nsw i32 %conv8, 2
+  %conv1 = trunc i32 %add to i16
+  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+  store i16 %conv1, i16* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_c(
+; CHECK: load <8 x i8>, <8 x i8>*
+; CHECK: add nuw nsw <8 x i16>
+; CHECK: store <8 x i16>
+; Function Attrs: nounwind
+define void @add_c(i8* noalias nocapture readonly %p, i16* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp8 = icmp sgt i32 %len, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %conv, 2
+  %conv1 = trunc i32 %add to i16
+  %arrayidx3 = getelementptr inbounds i16, i16* %q, i64 %indvars.iv
+  store i16 %conv1, i16* %arrayidx3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_d(
+; CHECK: load <4 x i16>
+; CHECK: add nsw <4 x i32>
+; CHECK: store <4 x i32>
+define void @add_d(i16* noalias nocapture readonly %p, i32* noalias nocapture %q, i32 %len) #0 {
+entry:
+  %cmp7 = icmp sgt i32 %len, 0
+  br i1 %cmp7, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv = sext i16 %0 to i32
+  %add = add nsw i32 %conv, 2
+  %arrayidx2 = getelementptr inbounds i32, i32* %q, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_e(
+; CHECK: load <16 x i8>
+; CHECK: shl <16 x i8>
+; CHECK: add nuw nsw <16 x i8>
+; CHECK: or <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: and <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: mul nuw nsw <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_e(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+  %cmp.32 = icmp sgt i32 %len, 0
+  br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv11 = zext i8 %arg2 to i32
+  %conv13 = zext i8 %arg1 to i32
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx
+  %conv = zext i8 %0 to i32
+  %add = shl i32 %conv, 4
+  %conv2 = add nuw nsw i32 %add, 32
+  %or = or i32 %conv, 51
+  %mul = mul nuw nsw i32 %or, 60
+  %and = and i32 %conv2, %conv13
+  %mul.masked = and i32 %mul, 252
+  %conv17 = xor i32 %mul.masked, %conv11
+  %mul18 = mul nuw nsw i32 %conv17, %and
+  %conv19 = trunc i32 %mul18 to i8
+  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv19, i8* %arrayidx21
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_f
+; CHECK: load <8 x i16>
+; CHECK: trunc <8 x i16>
+; CHECK: shl <8 x i8>
+; CHECK: add nsw <8 x i8>
+; CHECK: or <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: and <8 x i8>
+; CHECK: xor <8 x i8>
+; CHECK: mul nuw nsw <8 x i8>
+; CHECK: store <8 x i8>
+define void @add_f(i16* noalias nocapture readonly %p, i8* noalias nocapture %q, i8 %arg1, i8 %arg2, i32 %len) #0 {
+entry:
+  %cmp.32 = icmp sgt i32 %len, 0
+  br i1 %cmp.32, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv11 = zext i8 %arg2 to i32
+  %conv13 = zext i8 %arg1 to i32
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx
+  %conv = sext i16 %0 to i32
+  %add = shl i32 %conv, 4
+  %conv2 = add nsw i32 %add, 32
+  %or = and i32 %conv, 204
+  %conv8 = or i32 %or, 51
+  %mul = mul nuw nsw i32 %conv8, 60
+  %and = and i32 %conv2, %conv13
+  %mul.masked = and i32 %mul, 252
+  %conv17 = xor i32 %mul.masked, %conv11
+  %mul18 = mul nuw nsw i32 %conv17, %and
+  %conv19 = trunc i32 %mul18 to i8
+  %arrayidx21 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  store i8 %conv19, i8* %arrayidx21
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_g
+; CHECK: load <16 x i8>
+; CHECK: xor <16 x i8>
+; CHECK: icmp ult <16 x i8>
+; CHECK: select <16 x i1> {{.*}}, <16 x i8>
+; CHECK: store <16 x i8>
+define void @add_g(i8* noalias nocapture readonly %p, i8* noalias nocapture readonly %q, i8* noalias nocapture %r, i8 %arg1, i32 %len) #0 {
+  %1 = icmp sgt i32 %len, 0
+  br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph:                                           ; preds = %0
+  %2 = sext i8 %arg1 to i64
+  br label %3
+
+._crit_edge:                                      ; preds = %3, %0
+  ret void
+
+; <label>:3                                       ; preds = %3, %.lr.ph
+  %indvars.iv = phi i64 [ 0, %.lr.ph ], [ %indvars.iv.next, %3 ]
+  %x4 = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+  %x5 = load i8, i8* %x4
+  %x7 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+  %x8 = load i8, i8* %x7
+  %x9 = zext i8 %x5 to i32
+  %x10 = xor i32 %x9, 255
+  %x11 = icmp ult i32 %x10, 24
+  %x12 = select i1 %x11, i32 %x10, i32 24
+  %x13 = trunc i32 %x12 to i8
+  store i8 %x13, i8* %x4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %len
+  br i1 %exitcond, label %._crit_edge, label %3
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
new file mode 100644
index 000000000000..be08a63b212c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/reduction-small-size.ll
@@ -0,0 +1,191 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: @reduction_i8
+;
+; char reduction_i8(char *a, char *b, int n) {
+;   char sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += (a[i] + b[i]);
+;   return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK:   phi <16 x i8>
+; CHECK:   load <16 x i8>
+; CHECK:   load <16 x i8>
+; CHECK:   add <16 x i8>
+; CHECK:   add <16 x i8>
+;
+; CHECK: middle.block:
+; CHECK:   shufflevector <16 x i8>
+; CHECK:   add <16 x i8>
+; CHECK:   shufflevector <16 x i8>
+; CHECK:   add <16 x i8>
+; CHECK:   shufflevector <16 x i8>
+; CHECK:   add <16 x i8>
+; CHECK:   shufflevector <16 x i8>
+; CHECK:   add <16 x i8>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <16 x i8>
+; CHECK:   zext i8 [[Rdx]] to i32
+;
+define i8 @reduction_i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+  %cmp.12 = icmp sgt i32 %n, 0
+  br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  %conv6 = trunc i32 %add5.lcssa to i8
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+  ret i8 %sum.0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i32
+  %conv4 = and i32 %sum.013, 255
+  %add = add nuw nsw i32 %conv, %conv4
+  %add5 = add nuw nsw i32 %add, %conv3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_1
+;
+; short reduction_i16_1(short *a, short *b, int n) {
+;   short sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += (a[i] + b[i]);
+;   return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK:   phi <8 x i16>
+; CHECK:   load <8 x i16>
+; CHECK:   load <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK:   zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_1(i16* nocapture readonly %a, i16* nocapture readonly %b, i32 %n) {
+entry:
+  %cmp.16 = icmp sgt i32 %n, 0
+  br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  %conv6 = trunc i32 %add5.lcssa to i16
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+  ret i16 %sum.0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i16, i16* %a, i64 %indvars.iv
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv.14 = zext i16 %0 to i32
+  %arrayidx2 = getelementptr inbounds i16, i16* %b, i64 %indvars.iv
+  %1 = load i16, i16* %arrayidx2, align 2
+  %conv3.15 = zext i16 %1 to i32
+  %conv4.13 = and i32 %sum.017, 65535
+  %add = add nuw nsw i32 %conv.14, %conv4.13
+  %add5 = add nuw nsw i32 %add, %conv3.15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
+
+; CHECK-LABEL: @reduction_i16_2
+;
+; short reduction_i16_2(char *a, char *b, int n) {
+;   short sum = 0;
+;   for (int i = 0; i < n; ++i)
+;     sum += (a[i] + b[i]);
+;   return sum;
+; }
+;
+; CHECK: vector.body:
+; CHECK:   phi <8 x i16>
+; CHECK:   [[Ld1:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK:   zext <8 x i8> [[Ld1]] to <8 x i16>
+; CHECK:   [[Ld2:%[a-zA-Z0-9.]+]] = load <8 x i8>
+; CHECK:   zext <8 x i8> [[Ld2]] to <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   add <8 x i16>
+;
+; CHECK: middle.block:
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   shufflevector <8 x i16>
+; CHECK:   add <8 x i16>
+; CHECK:   [[Rdx:%[a-zA-Z0-9.]+]] = extractelement <8 x i16>
+; CHECK:   zext i16 [[Rdx]] to i32
+;
+define i16 @reduction_i16_2(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %n) {
+entry:
+  %cmp.14 = icmp sgt i32 %n, 0
+  br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+  %add5.lcssa = phi i32 [ %add5, %for.body ]
+  %conv6 = trunc i32 %add5.lcssa to i16
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ]
+  ret i16 %sum.0.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i32
+  %conv4.13 = and i32 %sum.015, 65535
+  %add = add nuw nsw i32 %conv, %conv4.13
+  %add5 = add nuw nsw i32 %add, %conv3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
new file mode 100644
index 000000000000..de3626b57d83
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -debug-only=loop-vectorize -loop-vectorize -instcombine  < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "armv8--linux-gnueabihf"
+
+@AB = common global [1024 x i8] zeroinitializer, align 4
+@CD = common global [1024 x i8] zeroinitializer, align 4
+
+define void @test_byte_interleaved_cost(i8 %C, i8 %D) {
+entry:
+  br label %for.body
+
+; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
+; access group is 2.
+
+; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
+; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction:   %tmp = load i8, i8* %arrayidx0, align 4
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx0 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %indvars.iv
+  %tmp = load i8, i8* %arrayidx0, align 4
+  %tmp1 = or i64 %indvars.iv, 1
+  %arrayidx1 = getelementptr inbounds [1024 x i8], [1024 x i8]* @AB, i64 0, i64 %tmp1
+  %tmp2 = load i8, i8* %arrayidx1, align 4
+  %add = add nsw i8 %tmp, %C
+  %mul = mul nsw i8 %tmp2, %D
+  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds [1024 x i8], [1024 x i8]* @CD, i64 0, i64 %tmp1
+  store i8 %mul, i8* %arrayidx3, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+  %cmp = icmp slt i64 %indvars.iv.next, 1024
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/vector_cast.ll b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
new file mode 100644
index 000000000000..78af9960e064
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/vector_cast.ll
@@ -0,0 +1,37 @@
+; RUN: opt -loop-vectorize -tbaa -S -mattr=+neon < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabi"
+
+; This requires the loop vectorizer to create an interleaved access group
+; for the stores to the struct. Here we need to perform a bitcast from a vector
+; of pointers to a vector i32s.
+
+%class.A = type { i8*, i32 }
+
+; CHECK-LABEL: test0
+define void @test0(%class.A* %StartPtr, %class.A* %APtr) {
+entry:
+  br label %for.body.i
+
+for.body.i:
+  %addr = phi %class.A* [ %StartPtr, %entry ], [ %incdec.ptr.i, %for.body.i ]
+  %Data.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 0
+  store i8* null, i8** %Data.i.i, align 4, !tbaa !8
+  %Length.i.i = getelementptr inbounds %class.A, %class.A* %addr, i32 0, i32 1
+  store i32 0, i32* %Length.i.i, align 4, !tbaa !11
+  %incdec.ptr.i = getelementptr inbounds %class.A, %class.A* %addr, i32 1
+  %cmp.i = icmp eq %class.A* %incdec.ptr.i, %APtr
+  br i1 %cmp.i, label %exit, label %for.body.i
+
+exit:
+  ret void
+}
+
+!5 = !{!"any pointer", !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C/C++ TBAA"}
+!8 = !{!9, !5, i64 0}
+!9 = !{!5, i64 0, !10, i64 4}
+!10 = !{!"int", !6, i64 0}
+!11 = !{!9, !10, i64 4}
diff --git a/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
new file mode 100644
index 000000000000..3491e08bbaa2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/agg-interleave-a2.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b, double* noalias nocapture readonly %c) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: fmul <4 x double> %{{[^,]+}}, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+  %0 = load double, double* %arrayidx, align 8
+  %mul = fmul double %0, 2.000000e+00
+  %mul3 = fmul double %0, %mul
+  %arrayidx5 = getelementptr inbounds double, double* %c, i64 %indvars.iv
+  %1 = load double, double* %arrayidx5, align 8
+  %mul6 = fmul double %1, 3.000000e+00
+  %mul9 = fmul double %1, %mul6
+  %add = fadd double %mul3, %mul9
+  %mul12 = fmul double %0, 4.000000e+00
+  %mul15 = fmul double %mul12, %1
+  %add16 = fadd double %mul15, %add
+  %add17 = fadd double %add16, 1.000000e+00
+  %arrayidx19 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add17, double* %arrayidx19, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="a2q" }
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
new file mode 100644
index 000000000000..0cb845520246
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/stride-vectorization.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+; Function Attrs: nounwind
+define void @foo(double* noalias nocapture %a, double* noalias nocapture readonly %b) #0 {
+entry:
+  br label %for.body
+
+; CHECK-LABEL: @foo
+; CHECK: <2 x double>
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %0 = shl nsw i64 %indvars.iv, 1
+  %arrayidx = getelementptr inbounds double, double* %b, i64 %0
+  %1 = load double, double* %arrayidx, align 8
+  %add = fadd double %1, 1.000000e+00
+  %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+  store double %add, double* %arrayidx2, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1600
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+attributes #0 = { nounwind "target-cpu"="pwr8" }
+
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 8c375ccfd315..abe7d6de3f35 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -499,4 +499,146 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
+; void foo7 (double * __restrict__  out, double ** __restrict__  in,
+;           bool * __restrict__ trigger, unsigned size) {
+;
+;  for (unsigned i=0; i<size; i++)
+;    if (trigger[i] && (in[i] != 0))
+;      out[i] = (double) 0.5;
+; }
 
+;AVX512-LABEL: @foo7
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+  %out.addr = alloca double*, align 8
+  %in.addr = alloca double**, align 8
+  %trigger.addr = alloca i8*, align 8
+  %size.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store double* %out, double** %out.addr, align 8
+  store double** %in, double*** %in.addr, align 8
+  store i8* %trigger, i8** %trigger.addr, align 8
+  store i32 %size, i32* %size.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %size.addr, align 4
+  %cmp = icmp ult i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %idxprom = zext i32 %2 to i64
+  %3 = load i8*, i8** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+  %4 = load i8, i8* %arrayidx, align 1
+  %tobool = trunc i8 %4 to i1
+  br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %for.body
+  %5 = load i32, i32* %i, align 4
+  %idxprom1 = zext i32 %5 to i64
+  %6 = load double**, double*** %in.addr, align 8
+  %arrayidx2 = getelementptr inbounds double*, double** %6, i64 %idxprom1
+  %7 = load double*, double** %arrayidx2, align 8
+  %cmp3 = icmp ne double* %7, null
+  br i1 %cmp3, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true
+  %8 = load i32, i32* %i, align 4
+  %idxprom4 = zext i32 %8 to i64
+  %9 = load double*, double** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+  store double 5.000000e-01, double* %arrayidx5, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %land.lhs.true, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %10 = load i32, i32* %i, align 4
+  %inc = add i32 %10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+;typedef int (*fp)();
+;void foo8 (double* __restrict__  out, fp* __restrict__ in, bool * __restrict__ trigger, unsigned size) {
+;
+;  for (unsigned i=0; i<size; i++)
+;    if (trigger[i] && (in[i] != 0))
+;      out[i] = (double) 0.5;
+;}
+
+;AVX512-LABEL: @foo8
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: ret void
+
+define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
+entry:
+  %out.addr = alloca double*, align 8
+  %in.addr = alloca i32 ()**, align 8
+  %trigger.addr = alloca i8*, align 8
+  %size.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store double* %out, double** %out.addr, align 8
+  store i32 ()** %in, i32 ()*** %in.addr, align 8
+  store i8* %trigger, i8** %trigger.addr, align 8
+  store i32 %size, i32* %size.addr, align 4
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %1 = load i32, i32* %size.addr, align 4
+  %cmp = icmp ult i32 %0, %1
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i32, i32* %i, align 4
+  %idxprom = zext i32 %2 to i64
+  %3 = load i8*, i8** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i8, i8* %3, i64 %idxprom
+  %4 = load i8, i8* %arrayidx, align 1
+  %tobool = trunc i8 %4 to i1
+  br i1 %tobool, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %for.body
+  %5 = load i32, i32* %i, align 4
+  %idxprom1 = zext i32 %5 to i64
+  %6 = load i32 ()**, i32 ()*** %in.addr, align 8
+  %arrayidx2 = getelementptr inbounds i32 ()*, i32 ()** %6, i64 %idxprom1
+  %7 = load i32 ()*, i32 ()** %arrayidx2, align 8
+  %cmp3 = icmp ne i32 ()* %7, null
+  br i1 %cmp3, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.lhs.true
+  %8 = load i32, i32* %i, align 4
+  %idxprom4 = zext i32 %8 to i64
+  %9 = load double*, double** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds double, double* %9, i64 %idxprom4
+  store double 5.000000e-01, double* %arrayidx5, align 8
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %land.lhs.true, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %10 = load i32, i32* %i, align 4
+  %inc = add i32 %10, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/metadata-enable.ll b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
index ba8e11e58749..74c0c16086fe 100644
--- a/test/Transforms/LoopVectorize/X86/metadata-enable.ll
+++ b/test/Transforms/LoopVectorize/X86/metadata-enable.ll
@@ -60,7 +60,7 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
 
 for.end:                                          ; preds = %for.body
@@ -111,7 +111,7 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body
@@ -162,7 +162,7 @@ for.body:                                         ; preds = %for.body, %entry
   %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
   store i32 %add, i32* %arrayidx2, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 32
+  %exitcond = icmp eq i64 %indvars.iv.next, 64
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
 
 for.end:                                          ; preds = %for.body
diff --git a/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
new file mode 100644
index 000000000000..0bb78ce177fe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+
+; CHECK: remark: no_fpmath.c:6:11: loop not vectorized: cannot prove it is safe to reorder floating-point operations
+; CHECK: remark: no_fpmath.c:6:14: loop not vectorized:
+; CHECK: remark: no_fpmath.c:17:14: vectorized loop (vectorization width: 2, interleaved count: 2)
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum(i32* nocapture readonly %v, i32 %n) #0 !dbg !4 {
+entry:
+  %cmp.7 = icmp sgt i32 %n, 0, !dbg !3
+  br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !8
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !9
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %add.lcssa = phi double [ %add, %for.body ]
+  br label %for.cond.cleanup, !dbg !10
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+  ret double %a.0.lcssa, !dbg !10
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !9
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !9, !tbaa !11
+  %cmp1 = icmp eq i32 %0, 0, !dbg !15
+  %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !9
+  %add = fadd double %a.08, %cond, !dbg !16
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8
+  %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17
+}
+
+; Function Attrs: nounwind readonly ssp uwtable
+define double @cond_sum_loop_hint(i32* nocapture readonly %v, i32 %n) #0 !dbg !20 {
+entry:
+  %cmp.7 = icmp sgt i32 %n, 0, !dbg !19
+  br i1 %cmp.7, label %for.body.preheader, label %for.cond.cleanup, !dbg !21
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !22
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %add.lcssa = phi double [ %add, %for.body ]
+  br label %for.cond.cleanup, !dbg !23
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %a.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+  ret double %a.0.lcssa, !dbg !23
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %a.08 = phi double [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %v, i64 %indvars.iv, !dbg !22
+  %0 = load i32, i32* %arrayidx, align 4, !dbg !22, !tbaa !11
+  %cmp1 = icmp eq i32 %0, 0, !dbg !24
+  %cond = select i1 %cmp1, double 3.400000e+00, double 1.150000e+00, !dbg !22
+  %add = fadd double %a.08, %cond, !dbg !25
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+  %exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !21
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !21, !llvm.loop !26
+}
+
+attributes #0 = { nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !{i32 1, !"PIC Level", i32 2}
+!2 = !{!"clang version 3.7.0"}
+!3 = !DILocation(line: 5, column: 20, scope: !4)
+!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!5 = !DIFile(filename: "no_fpmath.c", directory: "")
+!6 = !DISubroutineType(types: !7)
+!7 = !{}
+!8 = !DILocation(line: 5, column: 3, scope: !4)
+!9 = !DILocation(line: 6, column: 14, scope: !4)
+!10 = !DILocation(line: 9, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 6, column: 19, scope: !4)
+!16 = !DILocation(line: 6, column: 11, scope: !4)
+!17 = distinct !{!17, !18}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 16, column: 20, scope: !20)
+!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!21 = !DILocation(line: 16, column: 3, scope: !20)
+!22 = !DILocation(line: 17, column: 14, scope: !20)
+!23 = !DILocation(line: 20, column: 3, scope: !20)
+!24 = !DILocation(line: 17, column: 19, scope: !20)
+!25 = !DILocation(line: 17, column: 11, scope: !20)
+!26 = distinct !{!26, !27, !18}
+!27 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/LoopVectorize/X86/powof2div.ll b/test/Transforms/LoopVectorize/X86/powof2div.ll
index 6bc738a7d143..3e4bef6d4d07 100644
--- a/test/Transforms/LoopVectorize/X86/powof2div.ll
+++ b/test/Transforms/LoopVectorize/X86/powof2div.ll
@@ -6,10 +6,10 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @Foo = common global %struct.anon zeroinitializer, align 4
 
-;CHECK-LABEL: @foo(
-;CHECK: load <4 x i32>, <4 x i32>*
-;CHECK: sdiv <4 x i32>
-;CHECK: store <4 x i32>
+; CHECK-LABEL: @foo(
+; CHECK: load <4 x i32>, <4 x i32>*
+; CHECK: sdiv <4 x i32>
+; CHECK: store <4 x i32>
 
 define void @foo(){
 entry:
diff --git a/test/Transforms/LoopVectorize/X86/reduction-crash.ll b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
index 3741b95d9859..6393002d5071 100644
--- a/test/Transforms/LoopVectorize/X86/reduction-crash.ll
+++ b/test/Transforms/LoopVectorize/X86/reduction-crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -mcpu=prescott < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -mcpu=prescott -disable-basicaa < %s | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
 target triple = "i386-apple-darwin"
diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll
new file mode 100644
index 000000000000..47a6e1029eda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1024 x i8] zeroinitializer, align 16
+@b = global [1024 x i8] zeroinitializer, align 16
+
+define i32 @foo() {
+; This function has a loop of SAD pattern. Here we check when VF = 16 the
+; register usage doesn't exceed 16.
+;
+; CHECK-LABEL: foo
+; CHECK:      LV(REG): VF = 4
+; CHECK-NEXT: LV(REG): Found max usage: 4
+; CHECK:      LV(REG): VF = 8
+; CHECK-NEXT: LV(REG): Found max usage: 7
+; CHECK:      LV(REG): VF = 16
+; CHECK-NEXT: LV(REG): Found max usage: 13
+
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  ret i32 %add.lcssa
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %conv3 = zext i8 %1 to i32
+  %sub = sub nsw i32 %conv, %conv3
+  %ispos = icmp sgt i32 %sub, -1
+  %neg = sub nsw i32 0, %sub
+  %2 = select i1 %ispos, i32 %sub, i32 %neg
+  %add = add nsw i32 %2, %s.015
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1024
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @bar(i64* nocapture %a) {
+; CHECK-LABEL: bar
+; CHECK:       LV(REG): VF = 2
+; CHECK:       LV(REG): Found max usage: 4
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  %add2.lcssa = phi i64 [ %add2, %for.body ]
+  ret i64 %add2.lcssa
+
+for.body:
+  %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
+  %0 = load i64, i64* %arrayidx, align 8
+  %add = add nsw i64 %0, %i.012
+  store i64 %add, i64* %arrayidx, align 8
+  %add2 = add nsw i64 %add, %s.011
+  %inc = add nuw nsw i64 %i.012, 1
+  %exitcond = icmp eq i64 %inc, 1024
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
new file mode 100644
index 000000000000..fe9d59efc8b3
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -0,0 +1,46 @@
+; RUN: opt -loop-vectorize -vectorizer-maximize-bandwidth -mcpu=corei7-avx -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1000 x i8] zeroinitializer, align 16
+@b = global [1000 x i8] zeroinitializer, align 16
+@c = global [1000 x i8] zeroinitializer, align 16
+@u = global [1000 x i32] zeroinitializer, align 16
+@v = global [1000 x i32] zeroinitializer, align 16
+@w = global [1000 x i32] zeroinitializer, align 16
+
+; Tests that the vectorization factor is determined by the smallest instead of
+; widest type in the loop for maximum bandwidth when
+; -vectorizer-maximize-bandwidth is indicated.
+;
+; CHECK-label: foo
+; CHECK: LV: Selecting VF: 32.
+define void @foo() {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds [1000 x i8], [1000 x i8]* @b, i64 0, i64 %indvars.iv
+  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx2 = getelementptr inbounds [1000 x i8], [1000 x i8]* @c, i64 0, i64 %indvars.iv
+  %1 = load i8, i8* %arrayidx2, align 1
+  %add = add i8 %1, %0
+  %arrayidx6 = getelementptr inbounds [1000 x i8], [1000 x i8]* @a, i64 0, i64 %indvars.iv
+  store i8 %add, i8* %arrayidx6, align 1
+  %arrayidx8 = getelementptr inbounds [1000 x i32], [1000 x i32]* @v, i64 0, i64 %indvars.iv
+  %2 = load i32, i32* %arrayidx8, align 4
+  %arrayidx10 = getelementptr inbounds [1000 x i32], [1000 x i32]* @w, i64 0, i64 %indvars.iv
+  %3 = load i32, i32* %arrayidx10, align 4
+  %add11 = add nsw i32 %3, %2
+  %arrayidx13 = getelementptr inbounds [1000 x i32], [1000 x i32]* @u, i64 0, i64 %indvars.iv
+  store i32 %add11, i32* %arrayidx13, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
index 6cd3c9c3bc01..cca829b9457e 100644
--- a/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll
@@ -17,7 +17,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 ; widest vector count.
 ;
 ; CHECK: test_consecutive_store
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 64 / 64 bits.
 define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwtable align 2 {
   %4 = load %0*, %0** %2, align 8
   %5 = icmp eq %0** %0, %1
@@ -51,7 +51,7 @@ define void @test_consecutive_store(%0**, %0**, %0** nocapture) nounwind ssp uwt
 ;       p[i][y] = (int*) (1 + q[i]);
 ;     }
 ; CHECK: test_nonconsecutive_store
-; CHECK: The Widest type: 16 bits
+; CHECK: The Smallest and Widest types: 16 / 16 bits.
 define void @test_nonconsecutive_store() nounwind ssp uwtable {
   br label %1
 
@@ -93,7 +93,7 @@ define void @test_nonconsecutive_store() nounwind ssp uwtable {
 ;; Now we check the same rules for loads. We should take consecutive loads of
 ;; pointer types into account.
 ; CHECK: test_consecutive_ptr_load
-; CHECK: The Widest type: 64 bits
+; CHECK: The Smallest and Widest types: 8 / 64 bits.
 define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
   br label %1
 
@@ -117,7 +117,7 @@ define i8 @test_consecutive_ptr_load() nounwind readonly ssp uwtable {
 
 ;; However, we should not take unconsecutive loads of pointers into account.
 ; CHECK: test_nonconsecutive_ptr_load
-; CHECK: The Widest type: 16 bits
+; CHECK: LV: The Smallest and Widest types: 16 / 16 bits.
 define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable {
   br label %1
 
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 65cabb05f2fb..02fab4447341 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -25,7 +25,7 @@
 ; File, line, and column should match those specified in the metadata
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: could not determine number of loop iterations
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
-; CHECK: remark: source.cpp:13:5: loop not vectorized: vector width and interleave count are explicitly set to 1
+; CHECK: remark: source.cpp:13:5: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
 ; CHECK: remark: source.cpp:19:5: loop not vectorized: cannot identify array bounds
 ; CHECK: remark: source.cpp:19:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info
 ; CHECK: warning: source.cpp:19:5: loop not vectorized: failed explicitly specified loop vectorization
@@ -45,7 +45,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp10 = icmp sgt i32 %Length, 0, !dbg !12
   br i1 %cmp10, label %for.body, label %for.end, !dbg !12, !llvm.loop !14
@@ -67,7 +67,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z13test_disabledPii(i32* nocapture %A, i32 %Length) #0 !dbg !7 {
 entry:
   %cmp4 = icmp sgt i32 %Length, 0, !dbg !25
   br i1 %cmp4, label %for.body, label %for.end, !dbg !25, !llvm.loop !27
@@ -87,7 +87,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 {
+define void @_Z17test_array_boundsPiS_i(i32* nocapture %A, i32* nocapture readonly %B, i32 %Length) #0 !dbg !8 {
 entry:
   %cmp9 = icmp sgt i32 %Length, 0, !dbg !32
   br i1 %cmp9, label %for.body.preheader, label %for.end, !dbg !32, !llvm.loop !34
@@ -122,15 +122,15 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!9, !10}
 !llvm.ident = !{!11}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "source.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4, !7, !8}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "source.cpp", directory: ".")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z13test_disabledPii, variables: !2)
-!8 = !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z17test_array_boundsPiS_i, variables: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, variables: !2)
 !9 = !{i32 2, !"Dwarf Version", i32 2}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{!"clang version 3.5.0"}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
new file mode 100644
index 000000000000..df8c668f1262
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -0,0 +1,113 @@
+; RUN: opt < %s -loop-vectorize -pass-remarks-analysis='loop-vectorize' -mtriple=x86_64-unknown-linux -S 2>&1 | FileCheck %s
+
+; Verify analysis remarks are generated when interleaving is not beneficial.
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:5:17: the cost-model indicates that interleaving is not beneficial and is explicitly disabled or interleave count is set to 1
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that vectorization is not beneficial
+; CHECK: remark: vectorization-remarks-profitable.c:12:17: the cost-model indicates that interleaving is not beneficial
+
+; First loop.
+;  #pragma clang loop interleave(disable) unroll(disable)
+;  for(int i = 0; i < n; i++) {
+;    out[i] = *in[i];
+;  }
+
+; Second loop.
+;  #pragma clang loop unroll(disable)
+;  for(int i = 0; i < n; i++) {
+;    out[i] = *in[i];
+;  }
+
+; ModuleID = 'vectorization-remarks-profitable.ll'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; Function Attrs: nounwind uwtable
+define void @do_not_interleave(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !4 {
+entry:
+  %cmp.4 = icmp eq i32 %size, 0, !dbg !10
+  br i1 %cmp.4, label %for.end, label %for.body.preheader, !dbg !11
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body, !dbg !12
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !12
+  %0 = bitcast float** %arrayidx to i32**, !dbg !12
+  %1 = load i32*, i32** %0, align 8, !dbg !12
+  %2 = load i32, i32* %1, align 4, !dbg !13
+  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !14
+  %3 = bitcast float* %arrayidx2 to i32*, !dbg !15
+  store i32 %2, i32* %3, align 4, !dbg !15
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !11
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !11
+  %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !11
+  br i1 %exitcond, label %for.end.loopexit, label %for.body, !dbg !11, !llvm.loop !16
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end, !dbg !19
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void, !dbg !19
+}
+
+; Function Attrs: nounwind uwtable
+define void @interleave_not_profitable(float** noalias nocapture readonly %in, float* noalias nocapture %out, i32 %size) #0 !dbg !6 {
+entry:
+  %cmp.4 = icmp eq i32 %size, 0, !dbg !20
+  br i1 %cmp.4, label %for.end, label %for.body, !dbg !21
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float*, float** %in, i64 %indvars.iv, !dbg !22
+  %0 = bitcast float** %arrayidx to i32**, !dbg !22
+  %1 = load i32*, i32** %0, align 8, !dbg !22
+  %2 = load i32, i32* %1, align 4, !dbg !23
+  %arrayidx2 = getelementptr inbounds float, float* %out, i64 %indvars.iv, !dbg !24
+  %3 = bitcast float* %arrayidx2 to i32*, !dbg !25
+  store i32 %2, i32* %3, align 4, !dbg !25
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !21
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !21
+  %exitcond = icmp eq i32 %lftr.wideiv, %size, !dbg !21
+  br i1 %exitcond, label %for.end, label %for.body, !dbg !21, !llvm.loop !26
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void, !dbg !27
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "")
+!2 = !{}
+!3 = !{!4, !6}
+!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 250016)"}
+!10 = !DILocation(line: 4, column: 23, scope: !4)
+!11 = !DILocation(line: 4, column: 3, scope: !4)
+!12 = !DILocation(line: 5, column: 17, scope: !4)
+!13 = !DILocation(line: 5, column: 16, scope: !4)
+!14 = !DILocation(line: 5, column: 7, scope: !4)
+!15 = !DILocation(line: 5, column: 14, scope: !4)
+!16 = distinct !{!16, !17, !18}
+!17 = !{!"llvm.loop.interleave.count", i32 1}
+!18 = !{!"llvm.loop.unroll.disable"}
+!19 = !DILocation(line: 6, column: 1, scope: !4)
+!20 = !DILocation(line: 11, column: 23, scope: !6)
+!21 = !DILocation(line: 11, column: 3, scope: !6)
+!22 = !DILocation(line: 12, column: 17, scope: !6)
+!23 = !DILocation(line: 12, column: 16, scope: !6)
+!24 = !DILocation(line: 12, column: 7, scope: !6)
+!25 = !DILocation(line: 12, column: 14, scope: !6)
+!26 = distinct !{!26, !18}
+!27 = !DILocation(line: 13, column: 1, scope: !6)
+
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 8640950be32e..77a405ebb434 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -9,13 +9,13 @@
 ; DEBUG-OUTPUT-NOT: .loc
 ; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
 
-; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vector width and interleave count are explicitly set to 1
 ; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1)
-; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved by 4 (vectorization not beneficial)
+; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define i32 @foo(i32 %n) #0 {
+define i32 @foo(i32 %n) #0 !dbg !4 {
 entry:
   %diff = alloca i32, align 4
   %cb = alloca [16 x i8], align 16
@@ -52,7 +52,7 @@ declare void @ibar(i32*) #1
 !1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
index f41f08df07a6..8d820e277b26 100644
--- a/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -6,7 +6,7 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @conditional_store(i32* noalias nocapture %indices) #0 {
+define void @conditional_store(i32* noalias nocapture %indices) #0 !dbg !4 {
 entry:
   br label %for.body, !dbg !10
 
@@ -36,11 +36,11 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "source.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*)* @conditional_store, variables: !2)
+!4 = distinct !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "source.c", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index c56f9122e462..a2fc69a6e907 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' 2>&1 | FileCheck %s
 
 ; C/C++ code for control flow test
 ; int test(int *A, int Length) {
@@ -20,7 +20,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 {
+define i32 @_Z4testPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp8 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp8, label %for.body.preheader, label %end, !dbg !10
@@ -55,11 +55,11 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "source.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: i32 (i32*, i32)* @_Z4testPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "source.cpp", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index c7440f84b2c9..f68b6865b072 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -9,7 +9,7 @@ target triple = "x86_64-apple-macosx10.8.0"
 @C = global [1024 x i32] zeroinitializer, align 16
 
 ; CHECK-LABEL: @test(
-define i32 @test() #0 {
+define i32 @test() #0 !dbg !3 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !18
   br label %for.body, !dbg !18
@@ -44,16 +44,16 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!26}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
 !1 = !{}
 !2 = !{!3}
-!3 = !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, function: i32 ()* @test, variables: !8)
+!3 = distinct !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, variables: !8)
 !4 = !DIFile(filename: "test", directory: "/path/to/somewhere")
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7}
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !{!9}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 6, scope: !10, file: !4, type: !7)
+!9 = !DILocalVariable(name: "i", line: 6, scope: !10, file: !4, type: !7)
 !10 = distinct !DILexicalBlock(line: 6, column: 0, file: !25, scope: !3)
 !11 = !{!12, !16, !17}
 !12 = !DIGlobalVariable(name: "A", line: 1, isLocal: false, isDefinition: true, scope: null, file: !4, type: !13, variable: [1024 x i32]* @A)
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index e691afdd6933..0214f1c4847c 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -12,12 +12,12 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK:   load <2 x i32>, <2 x i32>* {{.*}}, !dbg ![[LOC2]]
 ; CHECK:   add <2 x i32> {{.*}}, !dbg ![[LOC2]]
 ; CHECK:   add i64 %index, 2, !dbg ![[LOC]]
-; CHECK:   icmp eq i64 %index.next, %end.idx.rnd.down, !dbg ![[LOC]]
+; CHECK:   icmp eq i64 %index.next, %n.vec, !dbg ![[LOC]]
 ; CHECK: middle.block
-; CHECK:   add <2 x i32> %rdx.vec.exit.phi, %rdx.shuf, !dbg ![[LOC2]]
+; CHECK:   add <2 x i32> %{{.*}}, %rdx.shuf, !dbg ![[LOC2]]
 ; CHECK:   extractelement <2 x i32> %bin.rdx, i32 0, !dbg ![[LOC2]]
 
-define i32 @f(i32* nocapture %a, i32 %size) #0 {
+define i32 @f(i32* nocapture %a, i32 %size) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata i32* %a, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
   tail call void @llvm.dbg.value(metadata i32 %size, i64 0, metadata !14, metadata !DIExpression()), !dbg !19
@@ -63,11 +63,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !27}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "-", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, function: i32 (i32*, i32)* @f, variables: !12)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !12)
 !5 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
 !6 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
 !7 = !DISubroutineType(types: !8)
@@ -76,10 +76,10 @@ attributes #1 = { nounwind readnone }
 !10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !9)
 !11 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned int", size: 32, align: 32, encoding: DW_ATE_unsigned)
 !12 = !{!13, !14, !15, !16}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "sum", line: 4, scope: !4, file: !6, type: !11)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 5, scope: !17, file: !6, type: !11)
+!13 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !6, type: !10)
+!14 = !DILocalVariable(name: "size", line: 3, arg: 2, scope: !4, file: !6, type: !11)
+!15 = !DILocalVariable(name: "sum", line: 4, scope: !4, file: !6, type: !11)
+!16 = !DILocalVariable(name: "i", line: 5, scope: !17, file: !6, type: !11)
 !17 = distinct !DILexicalBlock(line: 5, column: 0, file: !5, scope: !4)
 !18 = !{i32 2, !"Dwarf Version", i32 3}
 !19 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
new file mode 100644
index 000000000000..ab2fd5e4e1c6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=4  < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Vectorization of loop with bitcast between GEP and load
+; Simplified source code:
+;void foo (double** __restrict__  in, bool * __restrict__ res) {
+;
+;  for (int i = 0; i < 4096; ++i)
+;    res[i] = ((unsigned long long)in[i] == 0);
+;}
+
+; CHECK-LABEL: @foo
+; CHECK: vector.body
+; CHECK:  %0 = getelementptr inbounds double*, double** %in, i64 %index
+; CHECK:  %1 = bitcast double** %0 to <4 x i64>*
+; CHECK:  %wide.load = load <4 x i64>, <4 x i64>* %1, align 8
+; CHECK:  %2 = icmp eq <4 x i64> %wide.load, zeroinitializer
+; CHECK:  br i1
+
+define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv
+  %tmp53 = bitcast double** %arrayidx to i64*
+  %tmp54 = load i64, i64* %tmp53, align 8
+  %cmp1 = icmp eq i64 %tmp54, 0
+  %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv
+  %frombool = zext i1 %cmp1 to i8
+  store i8 %frombool, i8* %arrayidx3, align 1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 4096
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
\ No newline at end of file
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 991d027ada5c..0d70f557f834 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,5 +1,8 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
 
@@ -14,27 +17,49 @@ entry:
 ; VEC:   %[[v10:.+]] = and <2 x i1> %[[v8]], <i1 true, i1 true>
 ; VEC:   %[[v11:.+]] = extractelement <2 x i1> %[[v10]], i32 0
 ; VEC:   %[[v12:.+]] = icmp eq i1 %[[v11]], true
+; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
+; VEC:   %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
 ; VEC:   br i1 %[[v12]], label %[[cond:.+]], label %[[else:.+]]
 ;
 ; VEC: [[cond]]:
-; VEC:   %[[v13:.+]] = extractelement <2 x i32> %[[v9]], i32 0
-; VEC:   %[[v14:.+]] = extractelement <2 x i32*> %{{.*}}, i32 0
 ; VEC:   store i32 %[[v13]], i32* %[[v14]], align 4
 ; VEC:   br label %[[else:.+]]
 ;
 ; VEC: [[else]]:
 ; VEC:   %[[v15:.+]] = extractelement <2 x i1> %[[v10]], i32 1
 ; VEC:   %[[v16:.+]] = icmp eq i1 %[[v15]], true
+; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
+; VEC:   %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
 ; VEC:   br i1 %[[v16]], label %[[cond2:.+]], label %[[else2:.+]]
 ;
 ; VEC: [[cond2]]:
-; VEC:   %[[v17:.+]] = extractelement <2 x i32> %[[v9]], i32 1
-; VEC:   %[[v18:.+]] = extractelement <2 x i32*> %{{.+}} i32 1
 ; VEC:   store i32 %[[v17]], i32* %[[v18]], align 4
 ; VEC:   br label %[[else2:.+]]
 ;
 ; VEC: [[else2]]:
 
+; VEC-IC-LABEL: test
+; VEC-IC:   %[[v1:.+]] = icmp sgt <2 x i32> %{{.*}}, <i32 100, i32 100>
+; VEC-IC:   %[[v2:.+]] = add nsw <2 x i32> %{{.*}}, <i32 20, i32 20>
+; VEC-IC:   %[[v3:.+]] = extractelement <2 x i1> %[[v1]], i32 0
+; VEC-IC:   br i1 %[[v3]], label %[[cond:.+]], label %[[else:.+]]
+;
+; VEC-IC: [[cond]]:
+; VEC-IC:   %[[v4:.+]] = extractelement <2 x i32> %[[v2]], i32 0
+; VEC-IC:   store i32 %[[v4]], i32* %{{.*}}, align 4
+; VEC-IC:   br label %[[else:.+]]
+;
+; VEC-IC: [[else]]:
+; VEC-IC:   %[[v5:.+]] = extractelement <2 x i1> %[[v1]], i32 1
+; VEC-IC:   br i1 %[[v5]], label %[[cond2:.+]], label %[[else2:.+]]
+;
+; VEC-IC: [[cond2]]:
+; VEC-IC:   %[[v6:.+]] = extractelement <2 x i32> %[[v2]], i32 1
+; VEC-IC:   store i32 %[[v6]], i32* %{{.*}}, align 4
+; VEC-IC:   br label %[[else2:.+]]
+;
+; VEC-IC: [[else2]]:
+
 ; UNROLL-LABEL: test
 ; UNROLL: vector.body:
 ; UNROLL:   %[[IND:[a-zA-Z0-9]+]] = add i64 %{{.*}}, 0
@@ -90,9 +115,9 @@ for.end:
 ; vectorized loop body.
 ; PR18724
 
-; UNROLL-LABEL: bug18724
-; UNROLL: store i32
-; UNROLL: store i32
+; UNROLL-NOSIMPLIFY-LABEL: bug18724
+; UNROLL-NOSIMPLIFY: store i32
+; UNROLL-NOSIMPLIFY: store i32
 
 define void @bug18724() {
 entry:
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 2fbb2de797ae..59ee66a4a35d 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -6,8 +6,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 ; CHECK-LABEL: @multi_int_induction(
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:  %normalized.idx = sub i64 %index, 0
-; CHECK:  %[[VAR:.*]] = trunc i64 %normalized.idx to i32
+; CHECK:  %[[VAR:.*]] = trunc i64 %index to i32
 ; CHECK:  %offset.idx = add i32 190, %[[VAR]]
 define void @multi_int_induction(i32* %A, i32 %N) {
 for.body.lr.ph:
@@ -113,12 +112,11 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; condition and branch directly to the scalar loop.
 
 ; CHECK-LABEL: max_i32_backedgetaken
-; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
-; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked
+; CHECK:  br i1 true, label %scalar.ph, label %min.iters.checked
 
 ; CHECK: scalar.ph:
-; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
-; CHECK:  %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]
+; CHECK:  %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %0 ]
+; CHECK:  %bc.merge.rdx = phi i32 [ 1, %0 ], [ 1, %min.iters.checked ], [ %5, %middle.block ]
 
 define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 
@@ -142,11 +140,10 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; CHECK-LABEL: testoverflowcheck
 ; CHECK: entry
 ; CHECK: %[[LOAD:.*]] = load i8
-; CHECK: %[[VAL:.*]] =  zext i8 %[[LOAD]] to i32
 ; CHECK: br
 
 ; CHECK: scalar.ph
-; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]
+; CHECK: phi i8 [ %{{.*}}, %middle.block ], [ %[[LOAD]], %entry ]
 
 @e = global i8 1, align 1
 @d = common global i32 0, align 4
diff --git a/test/Transforms/LoopVectorize/miniters.ll b/test/Transforms/LoopVectorize/miniters.ll
new file mode 100644
index 000000000000..81cb2d4ca5a1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/miniters.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+; RUN: opt %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [1000 x i32] zeroinitializer, align 16
+@c = common global [1000 x i32] zeroinitializer, align 16
+@a = common global [1000 x i32] zeroinitializer, align 16
+
+; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
+; CHECK-LABEL: foo(
+; CHECK: %min.iters.check = icmp ult i64 %N, 4
+; CHECK: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+; UNROLL-LABEL: foo(
+; UNROLL: %min.iters.check = icmp ult i64 %N, 8
+; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %min.iters.checked
+
+define void @foo(i64 %N) {
+entry:
+  %cmp.8 = icmp sgt i64 %N, 0
+  br i1 %cmp.8, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.preheader
+  %i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @b, i64 0, i64 %i.09
+  %tmp = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [1000 x i32], [1000 x i32]* @c, i64 0, i64 %i.09
+  %tmp1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %tmp1, %tmp
+  %arrayidx2 = getelementptr inbounds [1000 x i32], [1000 x i32]* @a, i64 0, i64 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i64 %i.09, 1
+  %exitcond = icmp eq i64 %inc, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/test/Transforms/LoopVectorize/minmax_reduction.ll b/test/Transforms/LoopVectorize/minmax_reduction.ll
index 5a0356fe11a2..19a401213fd5 100644
--- a/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -412,10 +412,10 @@ for.end:
 
 ; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
 ; CHECK-LABEL: @max_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @max_red_float(float %max) #0 {
@@ -427,7 +427,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ogt float %0, %max.red.08
+  %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -438,10 +438,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @max_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @max_red_float_ge(float %max) #0 {
@@ -453,7 +453,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp oge float %0, %max.red.08
+  %cmp3 = fcmp fast oge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -464,10 +464,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_max_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_max_red_float(float %max) #0 {
@@ -479,7 +479,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp olt float %0, %max.red.08
+  %cmp3 = fcmp fast olt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -490,10 +490,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_max_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_max_red_float_le(float %max) #0 {
@@ -505,7 +505,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ole float %0, %max.red.08
+  %cmp3 = fcmp fast ole float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -516,10 +516,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @unordered_max_red_float(float %max) #0 {
@@ -531,7 +531,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ugt float %0, %max.red.08
+  %cmp3 = fcmp fast ugt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -542,10 +542,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_max_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @unordered_max_red_float_ge(float %max) #0 {
@@ -557,7 +557,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp uge float %0, %max.red.08
+  %cmp3 = fcmp fast uge float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -568,10 +568,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_unordered_max_red_float(float %max) #0 {
@@ -583,7 +583,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ult float %0, %max.red.08
+  %cmp3 = fcmp fast ult float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -594,10 +594,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_max_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_unordered_max_red_float_le(float %max) #0 {
@@ -609,7 +609,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ule float %0, %max.red.08
+  %cmp3 = fcmp fast ule float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -623,10 +623,10 @@ for.end:
 
 ; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
 ; CHECK-LABEL: @min_red_float(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @min_red_float(float %min) #0 {
@@ -638,7 +638,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp olt float %0, %min.red.08
+  %cmp3 = fcmp fast olt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -649,10 +649,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @min_red_float_le(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @min_red_float_le(float %min) #0 {
@@ -664,7 +664,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ole float %0, %min.red.08
+  %cmp3 = fcmp fast ole float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -675,10 +675,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_min_red_float(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_min_red_float(float %min) #0 {
@@ -690,7 +690,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ogt float %0, %min.red.08
+  %cmp3 = fcmp fast ogt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -701,10 +701,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_min_red_float_ge(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_min_red_float_ge(float %min) #0 {
@@ -716,7 +716,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp oge float %0, %min.red.08
+  %cmp3 = fcmp fast oge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -727,10 +727,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float(
-; CHECK: fcmp oge <2 x float>
+; CHECK: fcmp fast oge <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @unordered_min_red_float(float %min) #0 {
@@ -742,7 +742,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ult float %0, %min.red.08
+  %cmp3 = fcmp fast ult float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -753,10 +753,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @unordered_min_red_float_le(
-; CHECK: fcmp ogt <2 x float>
+; CHECK: fcmp fast ogt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @unordered_min_red_float_le(float %min) #0 {
@@ -768,7 +768,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ule float %0, %min.red.08
+  %cmp3 = fcmp fast ule float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -779,10 +779,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float(
-; CHECK: fcmp ole <2 x float>
+; CHECK: fcmp fast ole <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_unordered_min_red_float(float %min) #0 {
@@ -794,7 +794,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ugt float %0, %min.red.08
+  %cmp3 = fcmp fast ugt float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -805,10 +805,10 @@ for.end:
 }
 
 ; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x float>
+; CHECK: fcmp fast olt <2 x float>
 ; CHECK: select i1
 
 define float @inverted_unordered_min_red_float_ge(float %min) #0 {
@@ -820,7 +820,7 @@ for.body:
   %min.red.08 = phi float [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp uge float %0, %min.red.08
+  %cmp3 = fcmp fast uge float %0, %min.red.08
   %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -832,10 +832,10 @@ for.end:
 
 ; Make sure we handle doubles, too.
 ; CHECK-LABEL: @min_red_double(
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
 ; CHECK: select <2 x i1>
 ; CHECK: middle.block
-; CHECK: fcmp olt <2 x double>
+; CHECK: fcmp fast olt <2 x double>
 ; CHECK: select i1
 
 define double @min_red_double(double %min) #0 {
@@ -847,7 +847,7 @@ for.body:
   %min.red.08 = phi double [ %min, %entry ], [ %min.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x double], [1024 x double]* @dA, i64 0, i64 %indvars.iv
   %0 = load double, double* %arrayidx, align 4
-  %cmp3 = fcmp olt double %0, %min.red.08
+  %cmp3 = fcmp fast olt double %0, %min.red.08
   %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
@@ -871,7 +871,7 @@ for.body:
   %max.red.08 = phi float [ %max, %entry ], [ %max.red.0, %for.body ]
   %arrayidx = getelementptr inbounds [1024 x float], [1024 x float]* @fA, i64 0, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4
-  %cmp3 = fcmp ogt float %0, %max.red.08
+  %cmp3 = fcmp fast ogt float %0, %max.red.08
   %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
index f7c7ff7732b9..13cec71fc455 100644
--- a/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -17,7 +17,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind ssp uwtable
-define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 {
+define void @_Z4testPiS_i(i32* nocapture %A, i32* nocapture %B, i32 %number) #0 !dbg !4 {
 entry:
   %cmp25 = icmp sgt i32 %number, 0, !dbg !10
   br i1 %cmp25, label %for.body.preheader, label %for.end15, !dbg !10, !llvm.loop !12
@@ -72,11 +72,11 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, function: void (i32*, i32*, i32)* @_Z4testPiS_i, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 7030b6b4df2d..2683b42dc717 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
 
 ; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
 ; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 1f139c26d790..842d262d3192 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -1,9 +1,17 @@
-; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -S 2>&1 | FileCheck %s -check-prefix=NOANALYSIS
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -pass-remarks-missed='loop-vectorize' -S 2>&1 | FileCheck %s -check-prefix=MOREINFO
 
 ; CHECK: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
-; CHECK: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
 ; CHECK: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
 
+; NOANALYSIS-NOT: remark: {{.*}}
+; NOANALYSIS: warning: source.cpp:4:5: loop not interleaved: failed explicitly specified loop interleaving
+
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: loop contains a switch statement
+; MOREINFO: remark: source.cpp:4:5: loop not vectorized: use -Rpass-analysis=loop-vectorize for more info (Force=true, Vector Width=4)
+; MOREINFO: warning: source.cpp:4:5: loop not vectorized: failed explicitly specified loop vectorization
+
 ; CHECK: _Z11test_switchPii
 ; CHECK-NOT: x i32>
 ; CHECK: ret
@@ -11,7 +19,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind optsize ssp uwtable
-define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 {
+define void @_Z11test_switchPii(i32* nocapture %A, i32 %Length) #0 !dbg !4 {
 entry:
   %cmp18 = icmp sgt i32 %Length, 0, !dbg !10
   br i1 %cmp18, label %for.body.preheader, label %for.end, !dbg !10, !llvm.loop !12
@@ -59,11 +67,11 @@ attributes #0 = { nounwind }
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "source.cpp", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: void (i32*, i32)* @_Z11test_switchPii, variables: !2)
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "source.cpp", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/nontemporal.ll b/test/Transforms/LoopVectorize/nontemporal.ll
new file mode 100644
index 000000000000..106b19031228
--- /dev/null
+++ b/test/Transforms/LoopVectorize/nontemporal.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s  -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo(
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c, i32 %N) {
+entry:
+  %cmp.4 = icmp sgt i32 %N, 0
+  br i1 %cmp.4, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %wide.load{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+  %0 = load float, float* %arrayidx, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %wide.load{{[0-9]+}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+  %arrayidx2 = getelementptr inbounds float, float* %c, i64 %indvars.iv
+  %1 = load float, float* %arrayidx2, align 4
+  %add = fadd float %0, %1
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+  %arrayidx4 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  store float %add, float* %arrayidx4, align 4, !nontemporal !0
+
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, %N
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+; CHECK: ret void
+  ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/LoopVectorize/optsize.ll b/test/Transforms/LoopVectorize/optsize.ll
index e183fda099a2..513657cd3723 100644
--- a/test/Transforms/LoopVectorize/optsize.ll
+++ b/test/Transforms/LoopVectorize/optsize.ll
@@ -1,18 +1,17 @@
 ; This test verifies that the loop vectorizer will NOT produce a tail
-; loop with Optimize for size attibute.
+; loop with the optimize for size or the minimize size attributes.
 ; REQUIRES: asserts
-; RUN: opt < %s -loop-vectorize -Os -debug -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
-
-;CHECK-NOT: <2 x i8>
-;CHECK-NOT: <4 x i8>
-;CHECK: Aborting. A tail loop is required in Os.
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
 
 target datalayout = "E-m:e-p:32:32-i64:32-f64:32:64-a:0:32-n32-S128"
 
 @tab = common global [32 x i8] zeroinitializer, align 1
 
-; Function Attrs: nounwind optsize
-define i32 @foo() #0 {
+define i32 @foo_optsize() #0 {
+; CHECK-LABEL: @foo_optsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
 entry:
   br label %for.body
 
@@ -31,4 +30,30 @@ for.end:                                          ; preds = %for.body
   ret i32 0
 }
 
-attributes #0 = { nounwind optsize "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { optsize }
+
+define i32 @foo_minsize() #1 {
+; CHECK-LABEL: @foo_minsize(
+; CHECK-NOT: <2 x i8>
+; CHECK-NOT: <4 x i8>
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @tab, i32 0, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %cmp1 = icmp eq i8 %0, 0
+  %. = select i1 %cmp1, i8 2, i8 1
+  store i8 %., i8* %arrayidx, align 1
+  %inc = add nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %i.08, 202
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret i32 0
+}
+
+attributes #1 = { minsize }
+
diff --git a/test/Transforms/LoopVectorize/ptr-induction.ll b/test/Transforms/LoopVectorize/ptr-induction.ll
new file mode 100644
index 000000000000..47d33352763d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ptr-induction.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; This testcase causes SCEV to return a pointer-typed exit value.
+
+; CHECK: @f
+; Expect that the pointer indvar has been converted into an integer indvar.
+; CHECK: %index.next = add i64 %index, 4
+define i32 @f(i32* readonly %a, i32* readnone %b) #0 {
+entry:
+  %cmp.6 = icmp ult i32* %a, %b
+  br i1 %cmp.6, label %while.body.preheader, label %while.end
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %a.pn = phi i32* [ %incdec.ptr8, %while.body ], [ %a, %while.body.preheader ]
+  %acc.07 = phi i32 [ %add, %while.body ], [ 0, %while.body.preheader ]
+  %incdec.ptr8 = getelementptr inbounds i32, i32* %a.pn, i64 1
+  %0 = load i32, i32* %incdec.ptr8, align 1
+  %add = add nuw nsw i32 %0, %acc.07
+  %exitcond = icmp eq i32* %incdec.ptr8, %b
+  br i1 %exitcond, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge:                   ; preds = %while.body
+  %add.lcssa = phi i32 [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  %acc.0.lcssa = phi i32 [ %add.lcssa, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  ret i32 %acc.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
index 647e58a7e41f..63b138f1d560 100644
--- a/test/Transforms/LoopVectorize/reduction.ll
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -175,8 +175,8 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 ;CHECK-LABEL: @reduction_and(
-;CHECK: and <4 x i32>
 ;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: and <4 x i32>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
 ;CHECK: and <4 x i32>
 ;CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 6b63a0d8db6c..88dd2e4d66ca 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -96,8 +96,7 @@ loopend:
 ; CHECK-LABEL: @reverse_forward_induction_i64_i8(
 ; CHECK: vector.body
 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %offset.idx = sub i64 1023, %normalized.idx
+; CHECK: %offset.idx = sub i64 1023, %index
 ; CHECK: trunc i64 %index to i8
 
 define void @reverse_forward_induction_i64_i8() {
@@ -122,10 +121,8 @@ while.end:
 
 ; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
 ; CHECK: vector.body:
-; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK:  %normalized.idx = sub i64 %index, 129
-; CHECK:  %offset.idx = sub i64 1023, %normalized.idx
-; CHECK:  trunc i64 %index to i8
+; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:  %offset.idx = sub i64 1023, %index
 
 define void @reverse_forward_induction_i64_i8_signed() {
 entry:
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 1f07d3f69594..3673b71db30d 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -11,9 +11,9 @@ target triple = "x86_64-apple-macosx10.9.0"
 
 ;CHECK-LABEL: define i32 @foo
 ;CHECK: for.body.preheader:
-;CHECK: br i1 %cmp.zero, label %middle.block, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
+;CHECK: br i1 %cmp.zero, label %scalar.ph, label %vector.memcheck, !dbg [[BODY_LOC:![0-9]+]]
 ;CHECK: vector.memcheck:
-;CHECK: br i1 %memcheck.conflict, label %middle.block, label %vector.ph, !dbg [[BODY_LOC]]
+;CHECK: br i1 %memcheck.conflict, label %scalar.ph, label %vector.ph, !dbg [[BODY_LOC]]
 ;CHECK: load <4 x float>
 define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
 entry:
@@ -73,7 +73,7 @@ loopexit:
 !2 = !{}
 !3 = !DISubroutineType(types: !2)
 !4 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!5 = !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !6 = !DILocation(line: 100, column: 1, scope: !5)
 !7 = !DILocation(line: 101, column: 1, scope: !5)
 !8 = !DILocation(line: 102, column: 1, scope: !5)
diff --git a/test/Transforms/LoopVectorize/runtime-limit.ll b/test/Transforms/LoopVectorize/runtime-limit.ll
index 6bc71e160ccd..a7f692cef170 100644
--- a/test/Transforms/LoopVectorize/runtime-limit.ll
+++ b/test/Transforms/LoopVectorize/runtime-limit.ll
@@ -1,12 +1,25 @@
-; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.8.0"
+
+; First loop produced diagnostic pass remark.
+;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic analysis remark.
+;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
+
+; First loop produced diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
+; Second loop produces diagnostic pass remark.
+;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
 
 ; We are vectorizing with 6 runtime checks.
 ;CHECK-LABEL: func1x6(
 ;CHECK: <4 x i32>
 ;CHECK: ret
+;OVERRIDE-LABEL: func1x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
 entry:
   br label %for.body
@@ -41,6 +54,10 @@ for.end:                                          ; preds = %for.body
 ;CHECK-LABEL: func2x6(
 ;CHECK-NOT: <4 x i32>
 ;CHECK: ret
+; We vectorize with 12 checks if a vectorization hint is provided.
+;OVERRIDE-LABEL: func2x6(
+;OVERRIDE: <4 x i32>
+;OVERRIDE: ret
 define i32 @func2x6(i32* nocapture %out, i32* nocapture %out2, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
 entry:
   br label %for.body
diff --git a/test/Transforms/LowerBitSets/function-ext.ll b/test/Transforms/LowerBitSets/function-ext.ll
new file mode 100644
index 000000000000..2a83bef2f074
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function-ext.ll
@@ -0,0 +1,22 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly handle external references, including the case where
+; all functions in a bitset are external references.
+
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @foo()
+
+; CHECK: @[[JT:.*]] = private constant [1 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @foo to i64), i64 ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+define i1 @bar(i8* %ptr) {
+  ; CHECK: icmp eq i64 {{.*}}, ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+  %p = call i1 @llvm.bitset.test(i8* %ptr, metadata !"void")
+  ret i1 %p
+}
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+!0 = !{!"void", void ()* @foo, i64 0}
+
+!llvm.bitsets = !{!0}
diff --git a/test/Transforms/LowerBitSets/function.ll b/test/Transforms/LowerBitSets/function.ll
new file mode 100644
index 000000000000..bf4043d61c41
--- /dev/null
+++ b/test/Transforms/LowerBitSets/function.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that we correctly create a jump table for bitsets containing 2 or more
+; functions.
+
+target triple = "x86_64-unknown-linux-gnu"
+target datalayout = "e-p:64:64"
+
+; CHECK: @[[JT:.*]] = private constant [2 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[FNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>, <{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @[[GNAME:.*]] to i64), i64 ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 13) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
+
+; CHECK: @f = alias void (), bitcast ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to void ()*)
+; CHECK: @g = alias void (), bitcast (<{ i8, i32, i8, i8, i8 }>* getelementptr inbounds ([2 x <{ i8, i32, i8, i8, i8 }>], [2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]], i64 0, i64 1) to void ()*)
+
+; CHECK: define private void @[[FNAME]]() {
+define void @f() {
+  ret void
+}
+
+; CHECK: define private void @[[GNAME]]() {
+define void @g() {
+  ret void
+}
+
+!0 = !{!"bitset1", void ()* @f, i32 0}
+!1 = !{!"bitset1", void ()* @g, i32 0}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+  ; CHECK: sub i64 {{.*}}, ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
+  %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+  ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/nonstring.ll b/test/Transforms/LowerBitSets/nonstring.ll
new file mode 100644
index 000000000000..e61c9123e086
--- /dev/null
+++ b/test/Transforms/LowerBitSets/nonstring.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+
+; Tests that non-string metadata nodes may be used as bitset identifiers.
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @[[ANAME:.*]] = private constant { i32 }
+; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
+
+@a = constant i32 1
+@b = constant [2 x i32] [i32 2, i32 3]
+
+!0 = !{!2, i32* @a, i32 0}
+!1 = !{!3, [2 x i32]* @b, i32 0}
+!2 = distinct !{}
+!3 = distinct !{}
+
+!llvm.bitsets = !{ !0, !1 }
+
+declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK-LABEL: @foo
+define i1 @foo(i8* %p) {
+  ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
+  %x = call i1 @llvm.bitset.test(i8* %p, metadata !2)
+  ret i1 %x
+}
+
+; CHECK-LABEL: @bar
+define i1 @bar(i8* %p) {
+  ; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
+  %x = call i1 @llvm.bitset.test(i8* %p, metadata !3)
+  ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/pr25902.ll b/test/Transforms/LowerBitSets/pr25902.ll
new file mode 100644
index 000000000000..b9a1203ec0e1
--- /dev/null
+++ b/test/Transforms/LowerBitSets/pr25902.ll
@@ -0,0 +1,21 @@
+; PR25902: gold plugin crash.
+; RUN: opt -mtriple=i686-pc -S -lowerbitsets < %s
+
+define void @f(void ()* %p) {
+entry:
+  %a = bitcast void ()* %p to i8*, !nosanitize !1
+  %b = call i1 @llvm.bitset.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
+  ret void
+}
+
+define void @g() {
+entry:
+  ret void
+}
+
+declare i1 @llvm.bitset.test(i8*, metadata)
+
+!llvm.bitsets = !{!0}
+
+!0 = !{!"_ZTSFvvE", void ()* @g, i64 0}
+!1 = !{}
diff --git a/test/Transforms/LowerBitSets/simple.ll b/test/Transforms/LowerBitSets/simple.ll
index 0fcdf0b36d63..a22d998e2008 100644
--- a/test/Transforms/LowerBitSets/simple.ll
+++ b/test/Transforms/LowerBitSets/simple.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:32:32"
 
 ; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
 @a = constant i32 1
-@b = constant [63 x i32] zeroinitializer
-@c = constant i32 3
+@b = hidden constant [63 x i32] zeroinitializer
+@c = protected constant i32 3
 @d = constant [2 x i32] [i32 4, i32 5]
 
 ; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
@@ -26,26 +26,26 @@ target datalayout = "e-p:32:32"
 !4 = !{!"bitset2", i32* @c, i32 0}
 ; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
 
-; Offset 0, 4 byte alignment
-!5 = !{!"bitset3", i32* @a, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
-!6 = !{!"bitset3", i32* @c, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}
-
 ; Entries whose second operand is null (the result of a global being DCE'd)
 ; should be ignored.
-!7 = !{!"bitset2", null, i32 0}
+!5 = !{!"bitset2", null, i32 0}
+
+; Offset 0, 4 byte alignment
+!6 = !{!"bitset3", i32* @a, i32 0}
+; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
+!7 = !{!"bitset3", i32* @c, i32 0}
+; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}
 
 !llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
 
-; CHECK: @bits_use{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
-; CHECK: @bits_use.{{[0-9]*}} = private alias i8* @bits{{[0-9]*}}
+; CHECK: @bits_use{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
+; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
 
-; CHECK: @a = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
-; CHECK: @b = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
-; CHECK: @c = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
-; CHECK: @d = alias getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
+; CHECK: @a = alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 0)
+; CHECK: @b = hidden alias [63 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 2)
+; CHECK: @c = protected alias i32, getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 4)
+; CHECK: @d = alias [2 x i32], getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G]], i32 0, i32 6)
 
 ; CHECK-DARWIN: @aptr = constant i32* getelementptr inbounds ({ i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }, { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] }* [[G:@[^ ]*]], i32 0, i32 0)
 @aptr = constant i32* @a
@@ -61,8 +61,8 @@ target datalayout = "e-p:32:32"
 
 ; CHECK-DARWIN: [[G]] = private constant
 
-; CHECK: @bits{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
-; CHECK: @bits.{{[0-9]*}} = private alias getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
+; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
 
 declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
 
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 73d9f44ee7e5..69e67cd7c1dd 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
-; RUN: opt -S -passes=lower-expect < %s | opt -strip-dead-prototypes -S | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect),strip-dead-prototypes' < %s | FileCheck %s
 
 ; CHECK-LABEL: @test1(
 define i32 @test1(i32 %x) nounwind uwtable ssp {
diff --git a/test/Transforms/LowerSwitch/delete-default-block-crash.ll b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
new file mode 100644
index 000000000000..23588d56c335
--- /dev/null
+++ b/test/Transforms/LowerSwitch/delete-default-block-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+; This test verify -lowerswitch does not crash after deleting the default block.
+
+declare i32 @f(i32)
+
+define i32 @unreachable(i32 %x) {
+
+entry:
+  switch i32 %x, label %unreachable [
+    i32 5, label %a
+    i32 6, label %a
+    i32 7, label %a
+    i32 10, label %b
+    i32 20, label %b
+    i32 30, label %b
+    i32 40, label %b
+  ]
+unreachable:
+  unreachable
+a:
+  %0 = call i32 @f(i32 0)
+  ret i32 %0
+b:
+  %1 = call i32 @f(i32 1)
+  ret i32 %1
+}
diff --git a/test/Transforms/LowerSwitch/feature.ll b/test/Transforms/LowerSwitch/feature.ll
index b82d93455436..09d25f0b06d4 100644
--- a/test/Transforms/LowerSwitch/feature.ll
+++ b/test/Transforms/LowerSwitch/feature.ll
@@ -4,49 +4,49 @@
 ; On output we should got binary comparison tree. Check that all is fine.
 
 ;CHECK:     entry:
-;CHECK-NEXT:  br label %NodeBlock.19
+;CHECK-NEXT:  br label %NodeBlock19
 
-;CHECK:     NodeBlock.19:                                      ; preds = %entry
-;CHECK-NEXT:  %Pivot.20 = icmp slt i32 %tmp158, 10
-;CHECK-NEXT:  br i1 %Pivot.20, label %NodeBlock.5, label %NodeBlock.17
+;CHECK:     NodeBlock19:                                      ; preds = %entry
+;CHECK-NEXT:  %Pivot20 = icmp slt i32 %tmp158, 10
+;CHECK-NEXT:  br i1 %Pivot20, label %NodeBlock5, label %NodeBlock17
 
-;CHECK:     NodeBlock.17:                                      ; preds = %NodeBlock.19
-;CHECK-NEXT:  %Pivot.18 = icmp slt i32 %tmp158, 13
-;CHECK-NEXT:  br i1 %Pivot.18, label %NodeBlock.9, label %NodeBlock.15
+;CHECK:     NodeBlock17:                                      ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot18 = icmp slt i32 %tmp158, 13
+;CHECK-NEXT:  br i1 %Pivot18, label %NodeBlock9, label %NodeBlock15
 
-;CHECK:     NodeBlock.15:                                      ; preds = %NodeBlock.17
-;CHECK-NEXT:  %Pivot.16 = icmp slt i32 %tmp158, 14
-;CHECK-NEXT:  br i1 %Pivot.16, label %bb330, label %NodeBlock.13
+;CHECK:     NodeBlock15:                                      ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot16 = icmp slt i32 %tmp158, 14
+;CHECK-NEXT:  br i1 %Pivot16, label %bb330, label %NodeBlock13
 
-;CHECK:     NodeBlock.13:                                      ; preds = %NodeBlock.15
-;CHECK-NEXT:  %Pivot.14 = icmp slt i32 %tmp158, 15
-;CHECK-NEXT:  br i1 %Pivot.14, label %bb332, label %LeafBlock.11
+;CHECK:     NodeBlock13:                                      ; preds = %NodeBlock15
+;CHECK-NEXT:  %Pivot14 = icmp slt i32 %tmp158, 15
+;CHECK-NEXT:  br i1 %Pivot14, label %bb332, label %LeafBlock11
 
-;CHECK:     LeafBlock.11:                                      ; preds = %NodeBlock.13
+;CHECK:     LeafBlock11:                                      ; preds = %NodeBlock13
 ;CHECK-NEXT:  %SwitchLeaf12 = icmp eq i32 %tmp158, 15
 ;CHECK-NEXT:  br i1 %SwitchLeaf12, label %bb334, label %NewDefault
 
-;CHECK:     NodeBlock.9:                                       ; preds = %NodeBlock.17
-;CHECK-NEXT:  %Pivot.10 = icmp slt i32 %tmp158, 11
-;CHECK-NEXT:  br i1 %Pivot.10, label %bb324, label %NodeBlock.7
+;CHECK:     NodeBlock9:                                       ; preds = %NodeBlock17
+;CHECK-NEXT:  %Pivot10 = icmp slt i32 %tmp158, 11
+;CHECK-NEXT:  br i1 %Pivot10, label %bb324, label %NodeBlock7
 
-;CHECK:     NodeBlock.7:                                       ; preds = %NodeBlock.9
-;CHECK-NEXT:  %Pivot.8 = icmp slt i32 %tmp158, 12
-;CHECK-NEXT:  br i1 %Pivot.8, label %bb326, label %bb328
+;CHECK:     NodeBlock7:                                       ; preds = %NodeBlock9
+;CHECK-NEXT:  %Pivot8 = icmp slt i32 %tmp158, 12
+;CHECK-NEXT:  br i1 %Pivot8, label %bb326, label %bb328
 
-;CHECK:     NodeBlock.5:                                       ; preds = %NodeBlock.19
-;CHECK-NEXT:  %Pivot.6 = icmp slt i32 %tmp158, 7
-;CHECK-NEXT:  br i1 %Pivot.6, label %NodeBlock, label %NodeBlock.3
+;CHECK:     NodeBlock5:                                       ; preds = %NodeBlock19
+;CHECK-NEXT:  %Pivot6 = icmp slt i32 %tmp158, 7
+;CHECK-NEXT:  br i1 %Pivot6, label %NodeBlock, label %NodeBlock3
 
-;CHECK:     NodeBlock.3:                                       ; preds = %NodeBlock.5
-;CHECK-NEXT:  %Pivot.4 = icmp slt i32 %tmp158, 8
-;CHECK-NEXT:  br i1 %Pivot.4, label %bb, label %NodeBlock.1
+;CHECK:     NodeBlock3:                                       ; preds = %NodeBlock5
+;CHECK-NEXT:  %Pivot4 = icmp slt i32 %tmp158, 8
+;CHECK-NEXT:  br i1 %Pivot4, label %bb, label %NodeBlock1
 
-;CHECK:     NodeBlock.1:                                       ; preds = %NodeBlock.3
-;CHECK-NEXT:  %Pivot.2 = icmp slt i32 %tmp158, 9
-;CHECK-NEXT:  br i1 %Pivot.2, label %bb338, label %bb322
+;CHECK:     NodeBlock1:                                       ; preds = %NodeBlock3
+;CHECK-NEXT:  %Pivot2 = icmp slt i32 %tmp158, 9
+;CHECK-NEXT:  br i1 %Pivot2, label %bb338, label %bb322
 
-;CHECK:     NodeBlock:                                        ; preds = %NodeBlock.5
+;CHECK:     NodeBlock:                                        ; preds = %NodeBlock5
 ;CHECK-NEXT:  %Pivot = icmp slt i32 %tmp158, 0
 ;CHECK-NEXT:  br i1 %Pivot, label %LeafBlock, label %bb338
 
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 8a2eedd96baf..6aaf594b3056 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -mem2reg -S | FileCheck %s
 
-define double @testfunc(i32 %i, double %j) nounwind ssp {
+define double @testfunc(i32 %i, double %j) nounwind ssp !dbg !1 {
 entry:
   %i_addr = alloca i32                            ; <i32*> [#uses=2]
   %j_addr = alloca double                         ; <double*> [#uses=2]
@@ -10,8 +10,8 @@ entry:
   call void @llvm.dbg.declare(metadata i32* %i_addr, metadata !0, metadata !DIExpression()), !dbg !8
 ; CHECK: call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata ![[IVAR:[0-9]*]], metadata {{.*}})
 ; CHECK: call void @llvm.dbg.value(metadata double %j, i64 0, metadata ![[JVAR:[0-9]*]], metadata {{.*}})
-; CHECK: ![[IVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i"
-; CHECK: ![[JVAR]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j"
+; CHECK: ![[IVAR]] = !DILocalVariable(name: "i"
+; CHECK: ![[JVAR]] = !DILocalVariable(name: "j"
   store i32 %i, i32* %i_addr
   call void @llvm.dbg.declare(metadata double* %j_addr, metadata !9, metadata !DIExpression()), !dbg !8
   store double %j, double* %j_addr
@@ -35,16 +35,16 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!14}
 
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 0, scope: !1, file: !2, type: !7)
-!1 = !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4, function: double (i32, double)* @testfunc)
+!0 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !7)
+!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4)
 !2 = !DIFile(filename: "testfunc.c", directory: "/tmp")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !{!1})
 !4 = !DISubroutineType(types: !5)
 !5 = !{!6, !7, !6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DILocation(line: 2, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "j", line: 2, arg: 0, scope: !1, file: !2, type: !6)
+!9 = !DILocalVariable(name: "j", line: 2, arg: 2, scope: !1, file: !2, type: !6)
 !10 = !DILocation(line: 3, scope: !11)
 !11 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !1)
 !12 = !DIFile(filename: "testfunc.c", directory: "/tmp")
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 831221b7f97f..071d708e1fe8 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -1,10 +1,18 @@
-; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
+; RUN: opt -S -mem2reg <%s | FileCheck %s
 
 declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 
 declare void @foo(i32, i64, i8*)
 
-define void @baz(i32 %a) nounwind ssp {
+define void @baz(i32 %a) nounwind ssp !dbg !1 {
+; CHECK-LABEL:  entry:
+; CHECK-NEXT:     %"alloca point" = bitcast i32 0 to i32{{$}}
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i64 55,{{.*}}, !dbg
+; CHECK-NEXT:     call void @llvm.dbg.value(metadata i8* bitcast (void (i32)* @baz to i8*),{{.*}}, !dbg
+; CHECK-NEXT:     call void @foo({{.*}}, !dbg
+; CHECK-NEXT:     br label %return, !dbg
 entry:
   %x_addr.i = alloca i32                          ; <i32*> [#uses=2]
   %y_addr.i = alloca i64                          ; <i64*> [#uses=2]
@@ -26,30 +34,32 @@ entry:
   call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
   br label %return, !dbg !19
 
+; CHECK-LABEL:  return:
+; CHECK-NEXT:     ret void, !dbg
 return:                                           ; preds = %entry
   ret void, !dbg !19
 }
 
 !llvm.dbg.cu = !{!3}
 !llvm.module.flags = !{!22}
-!0 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 8, arg: 0, scope: !1, file: !2, type: !6)
-!1 = !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4, function: void (i32)* @baz)
+!0 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !1, file: !2, type: !6)
+!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4)
 !2 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!3 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21, subprograms: !{!1})
 !4 = !DISubroutineType(types: !5)
 !5 = !{null, !6}
 !6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !7 = !DILocation(line: 8, scope: !1)
 !8 = !DILocation(line: 9, scope: !1)
-!9 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "x", line: 4, arg: 0, scope: !10, file: !2, type: !6)
-!10 = !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
+!9 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !10, file: !2, type: !6)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
 !11 = !DISubroutineType(types: !12)
 !12 = !{null, !6, !13, !14}
 !13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
 !14 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !20, scope: !2, baseType: null)
 !15 = !DILocation(line: 4, scope: !10, inlinedAt: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "y", line: 4, arg: 0, scope: !10, file: !2, type: !13)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "z", line: 4, arg: 0, scope: !10, file: !2, type: !14)
+!16 = !DILocalVariable(name: "y", line: 4, arg: 2, scope: !10, file: !2, type: !13)
+!17 = !DILocalVariable(name: "z", line: 4, arg: 3, scope: !10, file: !2, type: !14)
 !18 = !DILocation(line: 5, scope: !10, inlinedAt: !8)
 !19 = !DILocation(line: 10, scope: !1)
 !20 = !DIFile(filename: "bar.c", directory: "/tmp/")
diff --git a/test/Transforms/Mem2Reg/optnone.ll b/test/Transforms/Mem2Reg/optnone.ll
new file mode 100644
index 000000000000..41ee77aff79d
--- /dev/null
+++ b/test/Transforms/Mem2Reg/optnone.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+; This function is optnone, so the allocas should not be eliminated.
+
+; CHECK-LABEL: @testfunc
+; CHECK: alloca
+; CHECK: alloca
+define double @testfunc(i32 %i, double %j) optnone noinline {
+	%I = alloca i32		; <i32*> [#uses=4]
+	%J = alloca double		; <double*> [#uses=2]
+	store i32 %i, i32* %I
+	store double %j, double* %J
+	%t1 = load i32, i32* %I		; <i32> [#uses=1]
+	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
+	store i32 %t2, i32* %I
+	%t3 = load i32, i32* %I		; <i32> [#uses=1]
+	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
+	%t5 = load double, double* %J		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
+	ret double %t6
+}
diff --git a/test/Transforms/Mem2Reg/pr24179.ll b/test/Transforms/Mem2Reg/pr24179.ll
new file mode 100644
index 000000000000..e4216ce4daa1
--- /dev/null
+++ b/test/Transforms/Mem2Reg/pr24179.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mem2reg < %s -S | FileCheck %s
+
+declare i32 @def(i32)
+declare i1 @use(i32)
+
+; Special case of a single-BB alloca does not apply here since the load
+; is affected by the following store. Expect this case to be identified
+; and a PHI node to be created.
+define void @test1() {
+; CHECK-LABEL: @test1(
+ entry:
+  %t = alloca i32
+  br label %loop
+
+ loop:
+  %v = load i32, i32* %t
+  %c = call i1 @use(i32 %v)
+; CHECK: [[PHI:%.*]] = phi i32 [ undef, %entry ], [ %n, %loop ]
+; CHECK: call i1 @use(i32 [[PHI]])
+  %n = call i32 @def(i32 7)
+  store i32 %n, i32* %t
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}
+
+; Same as above, except there is no following store. The alloca should just be
+; replaced with an undef
+define void @test2() {
+; CHECK-LABEL: @test2(
+ entry:
+  %t = alloca i32
+  br label %loop
+
+ loop:
+  %v = load i32, i32* %t
+  %c = call i1 @use(i32 %v)
+; CHECK: %c = call i1 @use(i32 undef)
+  br i1 %c, label %loop, label %exit
+
+ exit:
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 731847440d76..6181543cfc63 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -206,5 +206,6 @@ declare void @f1(%struct.big* nocapture sret)
 declare void @f2(%struct.big*)
 
 ; CHECK: attributes [[NUW]] = { nounwind }
-; CHECK: attributes #1 = { nounwind ssp }
-; CHECK: attributes #2 = { nounwind ssp uwtable }
+; CHECK: attributes #1 = { argmemonly nounwind }
+; CHECK: attributes #2 = { nounwind ssp }
+; CHECK: attributes #3 = { nounwind ssp uwtable }
diff --git a/test/Transforms/MemCpyOpt/nontemporal.ll b/test/Transforms/MemCpyOpt/nontemporal.ll
new file mode 100644
index 000000000000..d9dafcc7b816
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/nontemporal.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that we don't combine nontemporal stores into memset calls.
+
+define void @nontemporal_stores_1(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_1
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+  %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+  store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+  %ptr2 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 2
+  store <4 x float> zeroinitializer, <4 x float>* %ptr2, align 16, !nontemporal !0
+  %ptr3 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 3
+  store <4 x float> zeroinitializer, <4 x float>* %ptr3, align 16, !nontemporal !0
+  %ptr4 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 4
+  store <4 x float> zeroinitializer, <4 x float>* %ptr4, align 16, !nontemporal !0
+  %ptr5 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 5
+  store <4 x float> zeroinitializer, <4 x float>* %ptr5, align 16, !nontemporal !0
+  %ptr6 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 6
+  store <4 x float> zeroinitializer, <4 x float>* %ptr6, align 16, !nontemporal !0
+  %ptr7 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 7
+  store <4 x float> zeroinitializer, <4 x float>* %ptr7, align 16, !nontemporal !0
+  ret void
+}
+
+define void @nontemporal_stores_2(<4 x float>* nocapture %dst) {
+; CHECK-LABEL: @nontemporal_stores_2
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+; CHECK: store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+; CHECK-NEXT: ret void
+entry:
+  store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !0
+  %ptr1 = getelementptr inbounds <4 x float>, <4 x float>* %dst, i64 1
+  store <4 x float> zeroinitializer, <4 x float>* %ptr1, align 16, !nontemporal !0
+  ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/MergeFunc/apply_function_attributes.ll b/test/Transforms/MergeFunc/apply_function_attributes.ll
new file mode 100644
index 000000000000..e9ede4518206
--- /dev/null
+++ b/test/Transforms/MergeFunc/apply_function_attributes.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+%Opaque_type = type opaque
+%S2i = type <{ i64, i64 }>
+%D2i = type <{ i64, i64 }>
+%Di = type <{ i32 }>
+%Si = type <{ i32 }>
+
+define void @B(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+define void @C(%Opaque_type* sret %a, %S2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+define void @A(%Opaque_type* sret %a, %D2i* %b, i32* %xp, i32* %yp) {
+  %x = load i32, i32* %xp
+  %y = load i32, i32* %yp
+  %sum = add i32 %x, %y
+  %sum2 = add i32 %sum, %y
+  %sum3 = add i32 %sum2, %y
+  ret void
+}
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @C(%Opaque_type* sret
+; CHECK:  tail call void bitcast (void (%Opaque_type*, %D2i*, i32*, i32*)* @A to void (%Opaque_type*, %S2i*, i32*, i32*)*)(%Opaque_type* sret %0, %S2i* %1, i32* %2, i32* %3)
+; CHECK:  ret void
+
+
+; Make sure we transfer the parameter attributes to the call site.
+; CHECK-LABEL: define void @B(%Opaque_type* sret
+; CHECK:  %5 = bitcast
+; CHECK:  tail call void @A(%Opaque_type* sret %0, %D2i* %5, i32* %2, i32* %3)
+; CHECK:  ret void
+
diff --git a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
index b955e3c9582e..806ca3c17a6a 100644
--- a/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
+++ b/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
@@ -63,14 +63,6 @@ lpad:
   resume { i8*, i32 } zeroinitializer
 }
 
-define i8 @call_with_same_range() {
-; CHECK-LABEL: @call_with_same_range
-; CHECK: tail call i8 @call_with_range
-  bitcast i8 0 to i8
-  %out = call i8 @dummy(), !range !0
-  ret i8 %out
-}
-
 define i8 @invoke_with_same_range() personality i8* undef {
 ; CHECK-LABEL: @invoke_with_same_range()
 ; CHECK: tail call i8 @invoke_with_range()
@@ -84,6 +76,16 @@ lpad:
   resume { i8*, i32 } zeroinitializer
 }
 
+define i8 @call_with_same_range() {
+; CHECK-LABEL: @call_with_same_range
+; CHECK: tail call i8 @call_with_range
+  bitcast i8 0 to i8
+  %out = call i8 @dummy(), !range !0
+  ret i8 %out
+}
+
+
+
 declare i8 @dummy();
 declare i32 @__gxx_personality_v0(...)
 
diff --git a/test/Transforms/MergeFunc/constant-entire-value.ll b/test/Transforms/MergeFunc/constant-entire-value.ll
new file mode 100644
index 000000000000..cb193d06ee41
--- /dev/null
+++ b/test/Transforms/MergeFunc/constant-entire-value.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; RUN: opt -S -mergefunc < %s | FileCheck -check-prefix=NOPLUS %s
+
+; This makes sure that zeros in constants don't cause problems with string based
+; memory comparisons
+define internal i32 @sum(i32 %x, i32 %y) {
+; CHECK-LABEL: @sum
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 2 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @add(i32 %x, i32 %y) {
+; CHECK-LABEL: @add
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 1 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @plus(i32 %x, i32 %y) {
+; NOPLUS-NOT: @plus
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
+define internal i32 @next(i32 %x, i32 %y) {
+; CHECK-LABEL: @next
+  %sum = add i32 %x, %y
+  %1 = extractvalue [3 x i32] [ i32 3, i32 0, i32 5 ], 2
+  %sum2 = add i32 %sum, %1
+  %sum3 = add i32 %sum2, %y
+  ret i32 %sum3
+}
+
diff --git a/test/Transforms/MergeFunc/crash2.ll b/test/Transforms/MergeFunc/crash2.ll
new file mode 100644
index 000000000000..4b3a3f911e7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/crash2.ll
@@ -0,0 +1,54 @@
+; RUN: opt %s -mergefunc -globalopt -S -o - | FileCheck %s
+
+; Make sure we don't crash on this example. This test is supposed to test that
+; MergeFunctions clears its GlobalNumbers value map. If this map still contains
+; entries when running globalopt and the MergeFunctions instance is still alive
+; the optimization of @G would cause an assert because globalopt would do an
+; RAUW on @G which still exists as an entry in the GlobalNumbers ValueMap which
+; causes an assert in the ValueHandle call back because we are RAUWing with a
+; different type (AllocaInst) than its key type (GlobalValue).
+
+@G = internal global i8** null
+@G2 = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) norecurse {
+; CHECK: alloca
+  store i8** %argv, i8*** @G
+  ret i32 0
+}
+
+define internal i8** @dead1(i64 %p) {
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  %tmp = load i8**, i8*** @G
+  ret i8** %tmp
+}
+
+define internal i8** @dead2(i64 %p) {
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  %tmp = load i8**, i8*** @G2
+  ret i8** %tmp
+}
+
+define void @left(i64 %p) {
+entry-block:
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  call void @right(i64 %p)
+  ret void
+}
+
+define void @right(i64 %p) {
+entry-block:
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  call void @left(i64 %p)
+  ret void
+}
diff --git a/test/Transforms/MergeFunc/gep-base-type.ll b/test/Transforms/MergeFunc/gep-base-type.ll
new file mode 100644
index 000000000000..bfbb247fb3a5
--- /dev/null
+++ b/test/Transforms/MergeFunc/gep-base-type.ll
@@ -0,0 +1,46 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, the type of the GEP pointer argument does not have
+; the same stride.
+
+%"struct1" = type <{ i8*, i32, [4 x i8] }>
+%"struct2" = type { i8*, { i64, i64 } }
+
+define internal %struct2* @Ffunc(%struct2* %P, i64 %i) {
+; CHECK-LABEL: @Ffunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %2 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %3 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %4 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %5 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  %6 = getelementptr inbounds %"struct2", %"struct2"* %P, i64 %i
+  ret %struct2* %6
+}
+
+
+define internal %struct1* @Gfunc(%struct1* %P, i64 %i) {
+; CHECK-LABEL: @Gfunc(
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: ret
+  %1 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %2 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %3 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %4 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %5 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  %6 = getelementptr inbounds %"struct1", %"struct1"* %P, i64 %i
+  ret %struct1* %6
+}
+
diff --git a/test/Transforms/MergeFunc/inttoptr-address-space.ll b/test/Transforms/MergeFunc/inttoptr-address-space.ll
index 5f672debd919..86deb2c94953 100644
--- a/test/Transforms/MergeFunc/inttoptr-address-space.ll
+++ b/test/Transforms/MergeFunc/inttoptr-address-space.ll
@@ -21,7 +21,7 @@ define internal i8* @func35(%.qux.2585 addrspace(1)* nocapture %this) align 2 {
 bb:
 ; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585 addrspace(1)* %{{.*}} to %.qux.2496 addrspace(1)*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496 addrspace(1)* nocapture %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
   %tmp = getelementptr inbounds %.qux.2585, %.qux.2585 addrspace(1)* %this, i32 0, i32 2
   %tmp1 = load i8*, i8* addrspace(1)* %tmp, align 4
diff --git a/test/Transforms/MergeFunc/inttoptr.ll b/test/Transforms/MergeFunc/inttoptr.ll
index 0abbf6239a34..05ae766a6e37 100644
--- a/test/Transforms/MergeFunc/inttoptr.ll
+++ b/test/Transforms/MergeFunc/inttoptr.ll
@@ -48,7 +48,7 @@ define internal i8* @func35(%.qux.2585* nocapture %this) align 2 {
 bb:
 ; CHECK-LABEL: @func35(
 ; CHECK: %[[V2:.+]] = bitcast %.qux.2585* %{{.*}} to %.qux.2496*
-; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* %[[V2]])
+; CHECK: %[[V3:.+]] = tail call i32 @func10(%.qux.2496* nocapture %[[V2]])
 ; CHECK: %{{.*}} = inttoptr i32 %[[V3]] to i8*
   %tmp = getelementptr inbounds %.qux.2585, %.qux.2585* %this, i32 0, i32 2
   %tmp1 = load i8*, i8** %tmp, align 4
diff --git a/test/Transforms/MergeFunc/merge-block-address-other-function.ll b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
new file mode 100644
index 000000000000..ca1a6f2fe2ab
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address-other-function.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}
+
+
+define internal i8* @Afunc(i32* %P) {
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-NOT: @Bfunc
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
diff --git a/test/Transforms/MergeFunc/merge-block-address.ll b/test/Transforms/MergeFunc/merge-block-address.ll
new file mode 100644
index 000000000000..4ce13e5da874
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-block-address.ll
@@ -0,0 +1,91 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; These two functions are identical. The basic block labels are the same, and
+; induce the same CFG. We are testing that block addresses within different
+; functions are compared by their value, and not based on order. Both functions
+; come from the same C-code, but in the first the two val_0/val_1 basic blocks
+; are in a different order (they were manually switched post-compilation).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: tail call i32 @_Z1fi
+; CHECK-NEXT: ret
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
diff --git a/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
new file mode 100644
index 000000000000..8c86ab1fbc33
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-const-ptr-and-int.ll
@@ -0,0 +1,20 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Afunc and Bfunc differ only in that one returns i64, the other a pointer.
+; These should be merged.
+define internal i64 @Afunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Afunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64 0
+}
+
+define internal i64* @Bfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Bfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64* null
+}
+
diff --git a/test/Transforms/MergeFunc/merge-different-vector-types.ll b/test/Transforms/MergeFunc/merge-different-vector-types.ll
new file mode 100644
index 000000000000..7696139b332d
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-different-vector-types.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Merging should still work even if the values are wrapped in a vector.
+define internal <2 x i64> @Mfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal <2 x i64> @Mfunc
+  store i32 1, i32* %P
+  store i32 1, i32* %Q
+  ret <2 x i64> <i64 0, i64 0>
+}
+
+define internal <2 x i64*> @Nfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Nfunc
+  store i32 1, i32* %P
+  store i32 1, i32* %Q
+  ret <2 x i64*> <i64* null, i64* null>
+}
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
new file mode 100644
index 000000000000..3024a9a76a7e
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-different-labels.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; There is a slight different in these two functions, in that the label values
+; are switched. They are thus not mergeable. This tests that block addresses
+; referring to blocks within each respective compared function are correctly
+; ordered.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1fi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; Right here, this is val_0, and later the if might assign val_1
+  store i8* blockaddress(@_Z1fi, %val_0), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1fi, %val_1), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_0, label %val_1]
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1gi(i32 %i) #0 {
+; CHECK-LABEL: define i32 @_Z1gi
+; CHECK-NEXT: entry:
+; CHECK-NEXT: alloca
+entry:
+  %i.addr = alloca i32, align 4
+  %ret = alloca i32, align 4
+  %l = alloca i8*, align 8
+  store i32 %i, i32* %i.addr, align 4
+  store i32 0, i32* %ret, align 4
+; This time, we store val_1 initially, and later the if might assign val_0
+  store i8* blockaddress(@_Z1gi, %val_1), i8** %l, align 8
+  %0 = load i32, i32* %i.addr, align 4
+  %and = and i32 %0, 256
+  %cmp = icmp eq i32 %and, 0
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i8* blockaddress(@_Z1gi, %val_0), i8** %l, align 8
+  br label %if.end
+
+if.end:
+  %1 = load i8*, i8** %l, align 8
+  br label %indirectgoto
+
+val_0:
+  store i32 12, i32* %ret, align 4
+  br label %end
+
+val_1:
+  store i32 42, i32* %ret, align 4
+  br label %end
+
+end:
+  %2 = load i32, i32* %ret, align 4
+  ret i32 %2
+
+indirectgoto:
+  %indirect.goto.dest = phi i8* [ %1, %if.end ]
+  indirectbr i8* %indirect.goto.dest, [label %val_1, label %val_0]
+}
+
diff --git a/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
new file mode 100644
index 000000000000..e1aa30ac55a2
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-block-address-other-function.ll
@@ -0,0 +1,61 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; We should not merge these two functions, because the blocks are different.
+; This tests the handling of block addresses from different functions.
+; ModuleID = '<stdin>'
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+
+define internal i8* @Afunc(i32* %P) {
+; CHECK-LABEL: @Afunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then)
+}
+
+define internal i8* @Bfunc(i32* %P) {
+; CHECK-LABEL: @Bfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %P
+  ret i8* blockaddress(@_Z1fi, %if.then.2)
+}
+
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z1fi(i32 %i) #0 {
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp eq i32 %0, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  store i32 3, i32* %retval
+  br label %return
+
+if.end:
+  %1 = load i32, i32* %i.addr, align 4
+  %cmp1 = icmp eq i32 %1, 3
+  br i1 %cmp1, label %if.then.2, label %if.end.3
+
+if.then.2:
+  store i32 56, i32* %retval
+  br label %return
+
+if.end.3:
+  store i32 0, i32* %retval
+  br label %return
+
+return:
+  %2 = load i32, i32* %retval
+  ret i32 %2
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
new file mode 100644
index 000000000000..c0c6dab792d0
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-different-sizes.ll
@@ -0,0 +1,24 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as the datalayout says a pointer is 64 bits. No
+; sext/zext is specified, so these functions could lower differently.
+define internal i32 @Ffunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i32 @Ffunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %Q
+  ret i32 0
+}
+
+define internal i64* @Gfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Gfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 1, i32* %P
+  store i32 3, i32* %Q
+  ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
new file mode 100644
index 000000000000..6bd656408ac1
--- /dev/null
+++ b/test/Transforms/MergeFunc/no-merge-ptr-int-different-values.ll
@@ -0,0 +1,23 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; These should not be merged, as 1 != 0.
+define internal i64 @Ifunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Ifunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 10, i32* %P
+  store i32 10, i32* %Q
+  ret i64 1
+}
+
+define internal i64* @Jfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64* @Jfunc
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret
+  store i32 10, i32* %P
+  store i32 10, i32* %Q
+  ret i64* null
+}
diff --git a/test/Transforms/MergeFunc/ranges-multiple.ll b/test/Transforms/MergeFunc/ranges-multiple.ll
new file mode 100644
index 000000000000..bfa775d217a7
--- /dev/null
+++ b/test/Transforms/MergeFunc/ranges-multiple.ll
@@ -0,0 +1,44 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+define i1 @cmp_with_range(i8*, i8*) {
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_no_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_no_range
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT:  %v2 = load i8, i8* %1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0
+  %v2 = load i8, i8* %1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_different_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_different_range
+; CHECK-NEXT:  %v1 = load i8, i8* %0, !range !1
+; CHECK-NEXT:  %v2 = load i8, i8* %1, !range !1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
+  %v1 = load i8, i8* %0, !range !1
+  %v2 = load i8, i8* %1, !range !1
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+define i1 @cmp_with_same_range(i8*, i8*) {
+; CHECK-LABEL: @cmp_with_same_range
+; CHECK: tail call i1 @cmp_with_range
+  %v1 = load i8, i8* %0, !range !0
+  %v2 = load i8, i8* %1, !range !0
+  %out = icmp eq i8 %v1, %v2
+  ret i1 %out
+}
+
+; The comparison must check every element of the range, not just the first pair.
+!0 = !{i8 0, i8 2, i8 21, i8 30}
+!1 = !{i8 0, i8 2, i8 21, i8 25}
diff --git a/test/Transforms/MergeFunc/ranges.ll b/test/Transforms/MergeFunc/ranges.ll
index 46a0c76cc7d1..44e71300703b 100644
--- a/test/Transforms/MergeFunc/ranges.ll
+++ b/test/Transforms/MergeFunc/ranges.ll
@@ -8,10 +8,10 @@ define i1 @cmp_with_range(i8*, i8*) {
 
 define i1 @cmp_no_range(i8*, i8*) {
 ; CHECK-LABEL: @cmp_no_range
-; CHECK-NEXT  %v1 = load i8, i8* %0
-; CHECK-NEXT  %v2 = load i8, i8* %1
-; CHECK-NEXT  %out = icmp eq i8 %v1, %v2
-; CHECK-NEXT  ret i1 %out
+; CHECK-NEXT: %v1 = load i8, i8* %0
+; CHECK-NEXT:  %v2 = load i8, i8* %1
+; CHECK-NEXT:  %out = icmp eq i8 %v1, %v2
+; CHECK-NEXT:  ret i1 %out
   %v1 = load i8, i8* %0
   %v2 = load i8, i8* %1
   %out = icmp eq i8 %v1, %v2
diff --git a/test/Transforms/MergeFunc/self-referential-global.ll b/test/Transforms/MergeFunc/self-referential-global.ll
new file mode 100644
index 000000000000..d3d1c62aa7fe
--- /dev/null
+++ b/test/Transforms/MergeFunc/self-referential-global.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mergefunc -disable-output < %s
+
+; A linked list type and simple payload
+%LL = type { %S, %LL* }
+%S = type { void (%S*, i32)* }
+
+; Table refers to itself via GEP
+@Table = internal global [3 x %LL] [%LL { %S { void (%S*, i32)* @B }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }, %LL { %S { void (%S*, i32)* @A }, %LL* getelementptr inbounds ([3 x %LL], [3 x %LL]* @Table, i32 0, i32 0) }], align 16
+
+; The body of this is irrelevant; it is long so that mergefunc doesn't skip it as a small function.
+define internal void @A(%S* %self, i32 %a) {
+  %1 = add i32 %a, 32
+  %2 = add i32 %1, 32
+  %3 = add i32 %2, 32
+  %4 = add i32 %3, 32
+  %5 = add i32 %4, 32
+  %6 = add i32 %5, 32
+  %7 = add i32 %6, 32
+  %8 = add i32 %7, 32
+  %9 = add i32 %8, 32
+  %10 = add i32 %9, 32
+  %11 = add i32 %10, 32
+  ret void
+}
+
+define internal void @B(%S* %self, i32 %a) {
+  %1 = add i32 %a, 32
+  %2 = add i32 %1, 32
+  %3 = add i32 %2, 32
+  %4 = add i32 %3, 32
+  %5 = add i32 %4, 32
+  %6 = add i32 %5, 32
+  %7 = add i32 %6, 32
+  %8 = add i32 %7, 32
+  %9 = add i32 %8, 32
+  %10 = add i32 %9, 32
+  %11 = add i32 %10, 32
+  ret void
+}
+
diff --git a/test/Transforms/MergeFunc/undef-different-types.ll b/test/Transforms/MergeFunc/undef-different-types.ll
new file mode 100644
index 000000000000..4694146e55f4
--- /dev/null
+++ b/test/Transforms/MergeFunc/undef-different-types.ll
@@ -0,0 +1,21 @@
+; RUN: opt -mergefunc -S < %s | FileCheck %s
+; RUN: opt -mergefunc -S < %s | FileCheck -check-prefix=MERGE %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; Cfunc and Dfunc differ only in that one returns i64, the other a pointer, and
+; both return undef. They should be merged. Note undef cannot be merged with
+; anything else, because this implies the ordering will be inconsistent (i.e.
+; -1 == undef and undef == 1, but -1 < 1, so we must have undef != <any int>).
+define internal i64 @Cfunc(i32* %P, i32* %Q) {
+; CHECK-LABEL: define internal i64 @Cfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64 undef
+}
+
+define internal i64* @Dfunc(i32* %P, i32* %Q) {
+; MERGE-NOT: @Dfunc
+  store i32 4, i32* %P
+  store i32 6, i32* %Q
+  ret i64* undef
+}
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
index e126bed9b232..213fbe3bbff7 100644
--- a/test/Transforms/MetaRenamer/metarenamer.ll
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -12,7 +12,7 @@ target triple = "x86_64-pc-linux-gnu"
 @func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
 @global_3_xxx = common global i32 0, align 4
 
-@func_7_xxx = weak alias i32 (...)* @aliased_func_7_xxx
+@func_7_xxx = weak alias i32 (...), i32 (...)* @aliased_func_7_xxx
 
 define i32 @aliased_func_7_xxx(...) {
   ret i32 0
diff --git a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
index 92fbd20d2982..be219404d5be 100644
--- a/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
+++ b/test/Transforms/NaryReassociate/NVPTX/nary-gep.ll
@@ -123,4 +123,21 @@ define void @reassociate_gep_128(float* %a, i128 %i, i128 %j) {
   ret void
 }
 
+%struct.complex = type { float, float }
+
+declare void @bar(%struct.complex*)
+
+define void @different_types(%struct.complex* %input, i64 %i) {
+; CHECK-LABEL: @different_types(
+  %t1 = getelementptr %struct.complex, %struct.complex* %input, i64 %i
+  call void @bar(%struct.complex* %t1)
+  %j = add i64 %i, 5
+  %t2 = getelementptr %struct.complex, %struct.complex* %input, i64 %j, i32 0
+; CHECK: [[cast:[^ ]+]] = bitcast %struct.complex* %t1 to float*
+; CHECK-NEXT: %t2 = getelementptr float, float* [[cast]], i64 10
+; CHECK-NEXT: call void @foo(float* %t2)
+  call void @foo(float* %t2)
+  ret void
+}
+
 declare void @llvm.assume(i1)
diff --git a/test/Transforms/NaryReassociate/nary-add.ll b/test/Transforms/NaryReassociate/nary-add.ll
index b3093ff6ecd6..654ef2c49617 100644
--- a/test/Transforms/NaryReassociate/nary-add.ll
+++ b/test/Transforms/NaryReassociate/nary-add.ll
@@ -17,8 +17,9 @@ define void @left_reassociate(i32 %a, i32 %b, i32 %c) {
   call void @foo(i32 %1)
   %2 = add i32 %b, %c
   %3 = add i32 %a, %2
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
   call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
   ret void
 }
 
@@ -35,8 +36,9 @@ define void @right_reassociate(i32 %a, i32 %b, i32 %c) {
   call void @foo(i32 %1)
   %2 = add i32 %a, %b
   %3 = add i32 %2, %c
-; CHECK: add i32 [[BASE]], %b
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = add i32 [[BASE]], %b
   call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
   ret void
 }
 
diff --git a/test/Transforms/NaryReassociate/nary-mul.ll b/test/Transforms/NaryReassociate/nary-mul.ll
new file mode 100644
index 000000000000..467843c7a39a
--- /dev/null
+++ b/test/Transforms/NaryReassociate/nary-mul.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+
+declare void @foo(i32)
+
+; CHECK-LABEL: @bar(
+define void @bar(i32 %a, i32 %b, i32 %c) {
+  %1 = mul i32 %a, %c
+; CHECK: [[BASE:%[a-zA-Z0-9]+]] = mul i32 %a, %c
+  call void @foo(i32 %1)
+  %2 = mul i32 %a, %b
+  %3 = mul i32 %2, %c
+; CHECK: [[RESULT:%[a-zA-Z0-9]+]] = mul i32 [[BASE]], %b
+  call void @foo(i32 %3)
+; CHECK-NEXT: call void @foo(i32 [[RESULT]])
+  ret void
+}
+
diff --git a/test/Transforms/NaryReassociate/pr24301.ll b/test/Transforms/NaryReassociate/pr24301.ll
new file mode 100644
index 000000000000..898707831f95
--- /dev/null
+++ b/test/Transforms/NaryReassociate/pr24301.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -nary-reassociate -S | FileCheck %s
+
+define i32 @foo(i32 %tmp4) {
+; CHECK-LABEL: @foo(
+entry:
+  %tmp5 = add i32 %tmp4, 8
+  %tmp13 = add i32 %tmp4, -128  ; deleted
+  %tmp14 = add i32 %tmp13, 8    ; => %tmp5 + -128
+  %tmp21 = add i32 119, %tmp4
+  ; do not rewrite %tmp23 against %tmp13 because %tmp13 is already deleted
+  %tmp23 = add i32 %tmp21, -128
+; CHECK: %tmp23 = add i32 %tmp21, -128
+  ret i32 %tmp23
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 9fc5ad1f1008..fc1d087794d6 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -2684,8 +2684,8 @@ define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) n
 invoke.cont:
   %0 = bitcast {}* %self to i8*
   %1 = tail call i8* @objc_retain(i8* %0) nounwind
-  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
-  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(tag: DW_TAG_auto_variable, scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
+  tail call void @llvm.dbg.value(metadata {}* %self, i64 0, metadata !DILocalVariable(scope: !2), metadata !DIExpression()), !dbg !DILocation(scope: !2)
   %ivar = load i64, i64* @"OBJC_IVAR_$_A.myZ", align 8
   %add.ptr = getelementptr i8, i8* %0, i64 %ivar
   %tmp1 = bitcast i8* %add.ptr to float*
@@ -3018,7 +3018,7 @@ define void @test67(i8* %x) {
 
 !0 = !{}
 !1 = !{i32 1, !"Debug Info Version", i32 3}
-!2 = !DISubprogram()
+!2 = distinct !DISubprogram()
 
 ; CHECK: attributes #0 = { nounwind readnone }
 ; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index db3a780f91b0..ef8d8e52d1cc 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -34,7 +34,7 @@ target triple = "x86_64-apple-macosx10.9.0"
 @"\01L_OBJC_SELECTOR_REFERENCES_5" = internal global i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
 @llvm.used = appending global [6 x i8*] [i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_" to i8*), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast (%struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_1" to i8*), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @"\01L_OBJC_METH_VAR_NAME_4", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_5" to i8*)], section "llvm.metadata"
 
-define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) {
+define i32 @main() uwtable ssp personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) !dbg !5 {
 entry:
   %tmp = load %struct._class_t*, %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_", align 8, !dbg !37
   %tmp1 = load i8*, i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8, !dbg !37, !invariant.load !38
@@ -84,7 +84,7 @@ declare void @objc_end_catch()
 
 declare void @objc_exception_rethrow()
 
-define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp {
+define internal fastcc void @ThrowFunc(i8* %obj) uwtable noinline ssp !dbg !27 {
 entry:
   %tmp = call i8* @objc_retain(i8* %obj) nounwind
   call void @llvm.dbg.value(metadata i8* %obj, i64 0, metadata !32, metadata !DIExpression()), !dbg !55
@@ -113,16 +113,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!33, !34, !35, !36, !61}
 
-!0 = !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
 !1 = !{}
 !3 = !{!5, !27}
-!5 = !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, function: i32 ()* @main, variables: !11)
+!5 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, variables: !11)
 !6 = !DIFile(filename: "test.m", directory: "/Volumes/Files/gottesmmcab/Radar/12906997")
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !11 = !{!12, !21, !25}
-!12 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj", line: 11, scope: !13, file: !6, type: !14)
+!12 = !DILocalVariable(name: "obj", line: 11, scope: !13, file: !6, type: !14)
 !13 = distinct !DILexicalBlock(line: 10, column: 0, file: !60, scope: !5)
 !14 = !DIDerivedType(tag: DW_TAG_typedef, name: "id", line: 11, file: !60, baseType: !15)
 !15 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !60, baseType: !16)
@@ -131,17 +131,17 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !18 = !DIDerivedType(tag: DW_TAG_member, name: "isa", size: 64, file: !60, scope: !16, baseType: !19)
 !19 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, baseType: !20)
 !20 = !DICompositeType(tag: DW_TAG_structure_type, name: "objc_class", flags: DIFlagFwdDecl, file: !60)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "ok", line: 13, scope: !22, file: !6, type: !23)
+!21 = !DILocalVariable(name: "ok", line: 13, scope: !22, file: !6, type: !23)
 !22 = distinct !DILexicalBlock(line: 12, column: 0, file: !60, scope: !13)
 !23 = !DIDerivedType(tag: DW_TAG_typedef, name: "BOOL", line: 62, file: !60, baseType: !24)
 !24 = !DIBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!25 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "obj2", line: 15, scope: !26, file: !6, type: !14)
+!25 = !DILocalVariable(name: "obj2", line: 15, scope: !26, file: !6, type: !14)
 !26 = distinct !DILexicalBlock(line: 14, column: 0, file: !60, scope: !22)
-!27 = !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, function: void (i8*)* @ThrowFunc, variables: !31)
+!27 = distinct !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, variables: !31)
 !28 = !DISubroutineType(types: !29)
 !29 = !{null, !14}
 !31 = !{!32}
-!32 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
+!32 = !DILocalVariable(name: "obj", line: 4, arg: 1, scope: !27, file: !6, type: !14)
 !33 = !{i32 1, !"Objective-C Version", i32 2}
 !34 = !{i32 1, !"Objective-C Image Info Version", i32 0}
 !35 = !{i32 1, !"Objective-C Image Info Section", !"__DATA, __objc_imageinfo, regular, no_dead_strip"}
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index 464426abfb06..cf14a1f9a663 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -819,5 +819,7 @@ entry:
   ret void
 }
 
-; CHECK: attributes [[NUW]] = { nounwind }
+
+; CHECK: attributes #0 = { argmemonly nounwind }
 ; CHECK: attributes #1 = { nonlazybind }
+; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/provenance.ll b/test/Transforms/ObjCARC/provenance.ll
index aa5a932a86e9..2587c11d01ed 100644
--- a/test/Transforms/ObjCARC/provenance.ll
+++ b/test/Transforms/ObjCARC/provenance.ll
@@ -1,4 +1,4 @@
-; RUN: opt -disable-output -pa-eval %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -disable-basicaa -pa-eval %s 2>&1 | FileCheck %s
 
 @"\01l_objc_msgSend_fixup_" = global i8 0
 @g1 = global i8 0, section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
diff --git a/test/Transforms/PGOProfile/Inputs/branch1.proftext b/test/Transforms/PGOProfile/Inputs/branch1.proftext
new file mode 100644
index 000000000000..3e28112706f1
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch1.proftext
@@ -0,0 +1,6 @@
+test_br_1
+25571299074
+2
+3
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/branch2.proftext b/test/Transforms/PGOProfile/Inputs/branch2.proftext
new file mode 100644
index 000000000000..7d9bd72b29f2
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/branch2.proftext
@@ -0,0 +1,6 @@
+test_br_2
+29667547796
+2
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/criticaledge.proftext b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
new file mode 100644
index 000000000000..f369ba7c3504
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
@@ -0,0 +1,17 @@
+test_criticalEdge
+82323253069
+8
+2
+1
+2
+2
+0
+1
+2
+1
+
+<stdin>:bar
+12884901887
+1
+7
+
diff --git a/test/Transforms/PGOProfile/Inputs/diag.proftext b/test/Transforms/PGOProfile/Inputs/diag.proftext
new file mode 100644
index 000000000000..aaa137e3a420
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/diag.proftext
@@ -0,0 +1,5 @@
+foo
+12884999999
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/landingpad.proftext b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
new file mode 100644
index 000000000000..b2bd451611bf
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
@@ -0,0 +1,14 @@
+foo
+59130013419
+4
+3
+1
+2
+0
+
+bar
+24868915205
+2
+1
+2
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop1.proftext b/test/Transforms/PGOProfile/Inputs/loop1.proftext
new file mode 100644
index 000000000000..58c05fbe1676
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop1.proftext
@@ -0,0 +1,6 @@
+test_simple_for
+34137660316
+2
+96
+4
+
diff --git a/test/Transforms/PGOProfile/Inputs/loop2.proftext b/test/Transforms/PGOProfile/Inputs/loop2.proftext
new file mode 100644
index 000000000000..1c429ea5d5f4
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/loop2.proftext
@@ -0,0 +1,7 @@
+test_nested_for
+53929068288
+3
+33
+10
+6
+
diff --git a/test/Transforms/PGOProfile/Inputs/switch.proftext b/test/Transforms/PGOProfile/Inputs/switch.proftext
new file mode 100644
index 000000000000..7b406b87ef70
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/switch.proftext
@@ -0,0 +1,8 @@
+test_switch
+46200943743
+4
+0
+5
+2
+3
+
diff --git a/test/Transforms/PGOProfile/branch1.ll b/test/Transforms/PGOProfile/branch1.ll
new file mode 100644
index 000000000000..cc354d3425c6
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch1.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_1 = private constant [9 x i8] c"test_br_1"
+
+define i32 @test_br_1(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+  %cmp = icmp sgt i32 %i, 0
+  br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 1)
+  %add = add nsw i32 %i, 2
+  br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_1, i32 0, i32 0), i64 25571299074, i32 2, i32 0)
+  %retv = phi i32 [ %add, %if.then ], [ %i, %entry ]
+  ret i32 %retv
+}
diff --git a/test/Transforms/PGOProfile/branch2.ll b/test/Transforms/PGOProfile/branch2.ll
new file mode 100644
index 000000000000..1e8bc5ec2a38
--- /dev/null
+++ b/test/Transforms/PGOProfile/branch2.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/branch2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_br_2 = private constant [9 x i8] c"test_br_2"
+
+define i32 @test_br_2(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: llvm.instrprof.increment
+  %cmp = icmp sgt i32 %i, 0
+  br i1 %cmp, label %if.then, label %if.else
+; USE: br i1 %cmp, label %if.then, label %if.else
+; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
+; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 1, i32 1}
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 0)
+  %add = add nsw i32 %i, 2
+  br label %if.end
+
+if.else:
+; GEN: if.else:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_test_br_2, i32 0, i32 0), i64 29667547796, i32 2, i32 1)
+  %sub = sub nsw i32 %i, 2
+  br label %if.end
+
+if.end:
+; GEN: if.end:
+; GEN-NOT: llvm.instrprof.increment
+  %retv = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
+  ret i32 %retv
+; GEN: ret
+}
diff --git a/test/Transforms/PGOProfile/criticaledge.ll b/test/Transforms/PGOProfile/criticaledge.ll
new file mode 100644
index 000000000000..0089bbea1558
--- /dev/null
+++ b/test/Transforms/PGOProfile/criticaledge.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/criticaledge.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_criticalEdge = private constant [17 x i8] c"test_criticalEdge"
+; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>:bar"
+
+define i32 @test_criticalEdge(i32 %i, i32 %j) {
+entry:
+; CHECK: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  switch i32 %i, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+    i32 4, label %sw.bb2
+; CHECK:    i32 3, label %entry.sw.bb2_crit_edge
+; CHECK:    i32 4, label %entry.sw.bb2_crit_edge1
+    i32 5, label %sw.bb2
+  ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+
+; CHECK: entry.sw.bb2_crit_edge1:
+; GEN:   call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 1)
+; CHECK:   br label %sw.bb2
+
+; CHECK: entry.sw.bb2_crit_edge:
+; GEN:   call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 0)
+; CHECK:   br label %sw.bb2
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 5)
+  %call = call i32 @bar(i32 2)
+  br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 4)
+  %call2 = call i32 @bar(i32 1024)
+  br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %cmp = icmp eq i32 %j, 2
+  br i1 %cmp, label %if.then, label %if.end
+; USE: br i1 %cmp, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_SW_BB2:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 2)
+  %call4 = call i32 @bar(i32 4)
+  br label %return
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 3)
+  %call5 = call i32 @bar(i32 8)
+  br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %call6 = call i32 @bar(i32 32)
+  %cmp7 = icmp sgt i32 %j, 10
+  br i1 %cmp7, label %if.then8, label %if.end9
+; USE: br i1 %cmp7, label %if.then8, label %if.end9
+; USE-SAME: !prof ![[BW_SW_DEFAULT:[0-9]+]]
+
+if.then8:
+; GEN: if.then8:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 7)
+  %add = add nsw i32 %call6, 10
+  br label %if.end9
+
+if.end9:
+; GEN: if.end9:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @__profn_test_criticalEdge, i32 0, i32 0), i64 82323253069, i32 8, i32 6)
+  %res.0 = phi i32 [ %add, %if.then8 ], [ %call6, %sw.default ]
+  br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %res.1 = phi i32 [ %res.0, %if.end9 ], [ %call5, %if.end ], [ %call2, %sw.bb1 ], [ %call, %sw.bb ]
+  br label %return
+
+return:
+; GEN: return:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %retval = phi i32 [ %res.1, %sw.epilog ], [ %call4, %if.then ]
+  ret i32 %retval
+}
+
+define internal i32 @bar(i32 %i) {
+entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn__stdin__bar, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+  ret i32 %i
+}
+
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 2, i32 1, i32 0, i32 2, i32 1, i32 1}
+; USE: ![[BW_SW_BB2]] = !{!"branch_weights", i32 2, i32 2}
+; USE: ![[BW_SW_DEFAULT]] = !{!"branch_weights", i32 1, i32 1}
diff --git a/test/Transforms/PGOProfile/diag_mismatch.ll b/test/Transforms/PGOProfile/diag_mismatch.ll
new file mode 100644
index 000000000000..a2d0b20620f0
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_mismatch.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: Function control flow change detected (hash mismatch) foo
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+  ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_no_funcprofdata.ll b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
new file mode 100644
index 000000000000..2e5ec0444b42
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: No profile data available for function bar
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @bar() {
+entry:
+  ret i32 0 
+}
diff --git a/test/Transforms/PGOProfile/diag_no_profile.ll b/test/Transforms/PGOProfile/diag_no_profile.ll
new file mode 100644
index 000000000000..ce7b59b8f69d
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_no_profile.ll
@@ -0,0 +1,9 @@
+; RUN: not opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S  2>&1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+  ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/landingpad.ll b/test/Transforms/PGOProfile/landingpad.ll
new file mode 100644
index 000000000000..33fe62fbae03
--- /dev/null
+++ b/test/Transforms/PGOProfile/landingpad.ll
@@ -0,0 +1,124 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/landingpad.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@val = global i32 0, align 4
+@_ZTIi = external constant i8*
+; GEN: @__profn_bar = private constant [3 x i8] c"bar"
+; GEN: @__profn_foo = private constant [3 x i8] c"foo"
+
+define i32 @bar(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %rem = srem i32 %i, 3
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_BAR_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 1)
+  %exception = call i8* @__cxa_allocate_exception(i64 4)
+  %tmp = bitcast i8* %exception to i32*
+  store i32 %i, i32* %tmp, align 16
+  call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null)
+  unreachable
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_bar, i32 0, i32 0), i64 24868915205, i32 2, i32 0)
+  ret i32 0
+}
+
+declare i8* @__cxa_allocate_exception(i64)
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @foo(i32 %i) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %rem = srem i32 %i, 2
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %if.then, label %if.end
+; USE: br i1 %tobool, label %if.then, label %if.end
+; USE-SAME: !prof ![[BW_FOO_ENTRY:[0-9]+]]
+
+if.then:
+; GEN: if.then:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %mul = mul nsw i32 %i, 7
+  %call = invoke i32 @bar(i32 %mul)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+; GEN: invoke.cont:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 1)
+  br label %if.end
+
+lpad:
+; GEN: lpad:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %tmp = landingpad { i8*, i32 }
+          catch i8* bitcast (i8** @_ZTIi to i8*)
+  %tmp1 = extractvalue { i8*, i32 } %tmp, 0
+  %tmp2 = extractvalue { i8*, i32 } %tmp, 1
+  br label %catch.dispatch
+
+catch.dispatch:
+; GEN: catch.dispatch:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %tmp3 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+  %matches = icmp eq i32 %tmp2, %tmp3
+  br i1 %matches, label %catch, label %eh.resume
+; USE: br i1 %matches, label %catch, label %eh.resume
+; USE-SAME: !prof ![[BW_CATCH_DISPATCH:[0-9]+]]
+
+catch:
+; GEN: catch:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 2)
+  %tmp4 = call i8* @__cxa_begin_catch(i8* %tmp1)
+  %tmp5 = bitcast i8* %tmp4 to i32*
+  %tmp6 = load i32, i32* %tmp5, align 4
+  %tmp7 = load i32, i32* @val, align 4
+  %sub = sub nsw i32 %tmp7, %tmp6
+  store i32 %sub, i32* @val, align 4
+  call void @__cxa_end_catch()
+  br label %try.cont
+
+try.cont:
+; GEN: try.cont:
+; GEN-NOT: call void @llvm.instrprof.increment
+  ret i32 -1
+
+if.end:
+; GEN: if.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 0)
+  %tmp8 = load i32, i32* @val, align 4
+  %add = add nsw i32 %tmp8, %i
+  store i32 %add, i32* @val, align 4
+  br label %try.cont
+
+eh.resume:
+; GEN: eh.resume:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 59130013419, i32 4, i32 3)
+  %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp1, 0
+  %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp2, 1
+  resume { i8*, i32 } %lpad.val3
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+; USE: ![[BW_BAR_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+; USE: ![[BW_FOO_ENTRY]] = !{!"branch_weights", i32 3, i32 2}
+; USE: ![[BW_CATCH_DISPATCH]] = !{!"branch_weights", i32 2, i32 0}
diff --git a/test/Transforms/PGOProfile/loop1.ll b/test/Transforms/PGOProfile/loop1.ll
new file mode 100644
index 000000000000..aa5aa86b1e54
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop1.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop1.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_simple_for = private constant [15 x i8] c"test_simple_for"
+
+define i32 @test_simple_for(i32 %n) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  br label %for.cond
+
+for.cond:
+; GEN: for.cond:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %i = phi i32 [ 0, %entry ], [ %inc1, %for.inc ]
+  %sum = phi i32 [ 1, %entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i, %n
+  br i1 %cmp, label %for.body, label %for.end
+; USE: br i1 %cmp, label %for.body, label %for.end
+; USE-SAME: !prof ![[BW_FOR_COND:[0-9]+]]
+; USE: ![[BW_FOR_COND]] = !{!"branch_weights", i32 96, i32 4}
+
+for.body:
+; GEN: for.body:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %inc = add nsw i32 %sum, 1
+  br label %for.inc
+
+for.inc:
+; GEN: for.inc:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 0)
+  %inc1 = add nsw i32 %i, 1
+  br label %for.cond
+
+for.end:
+; GEN: for.end:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_simple_for, i32 0, i32 0), i64 34137660316, i32 2, i32 1)
+  ret i32 %sum
+}
diff --git a/test/Transforms/PGOProfile/loop2.ll b/test/Transforms/PGOProfile/loop2.ll
new file mode 100644
index 000000000000..ec3e16d461bc
--- /dev/null
+++ b/test/Transforms/PGOProfile/loop2.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/loop2.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_nested_for = private constant [15 x i8] c"test_nested_for"
+
+define i32 @test_nested_for(i32 %r, i32 %s) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  br label %for.cond.outer
+
+for.cond.outer:
+; GEN: for.cond.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %i.0 = phi i32 [ 0, %entry ], [ %inc.2, %for.inc.outer ]
+  %sum.0 = phi i32 [ 1, %entry ], [ %sum.1, %for.inc.outer ]
+  %cmp = icmp slt i32 %i.0, %r
+  br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE: br i1 %cmp, label %for.body.outer, label %for.end.outer
+; USE-SAME: !prof ![[BW_FOR_COND_OUTER:[0-9]+]]
+
+for.body.outer:
+; GEN: for.body.outer:
+; GEN-NOT: call void @llvm.instrprof.increment
+  br label %for.cond.inner
+
+for.cond.inner:
+; GEN: for.cond.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %j.0 = phi i32 [ 0, %for.body.outer ], [ %inc.1, %for.inc.inner ]
+  %sum.1 = phi i32 [ %sum.0, %for.body.outer ], [ %inc, %for.inc.inner ]
+  %cmp2 = icmp slt i32 %j.0, %s
+  br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE: br i1 %cmp2, label %for.body.inner, label %for.end.inner
+; USE-SAME: !prof ![[BW_FOR_COND_INNER:[0-9]+]]
+
+for.body.inner:
+; GEN: for.body.inner:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %inc = add nsw i32 %sum.1, 1
+  br label %for.inc.inner
+
+for.inc.inner:
+; GEN: for.inc.inner:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 0)
+  %inc.1 = add nsw i32 %j.0, 1
+  br label %for.cond.inner
+
+for.end.inner:
+; GEN: for.end.inner:
+  br label %for.inc.outer
+
+for.inc.outer:
+; GEN: for.inc.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 1)
+  %inc.2 = add nsw i32 %i.0, 1
+  br label %for.cond.outer
+
+for.end.outer:
+; GEN: for.end.outer:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @__profn_test_nested_for, i32 0, i32 0), i64 53929068288, i32 3, i32 2)
+  ret i32 %sum.0
+}
+
+; USE-DAG: ![[BW_FOR_COND_OUTER]] = !{!"branch_weights", i32 10, i32 6}
+; USE-DAG: ![[BW_FOR_COND_INNER]] = !{!"branch_weights", i32 33, i32 10}
+
diff --git a/test/Transforms/PGOProfile/single_bb.ll b/test/Transforms/PGOProfile/single_bb.ll
new file mode 100644
index 000000000000..f904d09b8e7a
--- /dev/null
+++ b/test/Transforms/PGOProfile/single_bb.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_single_bb = private constant [9 x i8] c"single_bb"
+
+define i32 @single_bb() {
+entry:
+; GEN: entry:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @__profn_single_bb, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+  ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/switch.ll b/test/Transforms/PGOProfile/switch.ll
new file mode 100644
index 000000000000..3177dc0bd040
--- /dev/null
+++ b/test/Transforms/PGOProfile/switch.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: llvm-profdata merge %S/Inputs/switch.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_test_switch = private constant [11 x i8] c"test_switch"
+
+define void @test_switch(i32 %i) {
+entry:
+; GEN: entry:
+; GEN-NOT: call void @llvm.instrprof.increment
+  switch i32 %i, label %sw.default [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb1
+    i32 3, label %sw.bb2
+  ]
+; USE: ]
+; USE-SAME: !prof ![[BW_SWITCH:[0-9]+]]
+; USE: ![[BW_SWITCH]] = !{!"branch_weights", i32 3, i32 2, i32 0, i32 5}
+
+sw.bb:
+; GEN: sw.bb:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 2)
+  br label %sw.epilog
+
+sw.bb1:
+; GEN: sw.bb1:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 0)
+  br label %sw.epilog
+
+sw.bb2:
+; GEN: sw.bb2:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 1)
+  br label %sw.epilog
+
+sw.default:
+; GEN: sw.default:
+; GEN: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @__profn_test_switch, i32 0, i32 0), i64 46200943743, i32 4, i32 3)
+  br label %sw.epilog
+
+sw.epilog:
+; GEN: sw.epilog:
+; GEN-NOT: call void @llvm.instrprof.increment
+  ret void
+; GEN: ret void
+}
diff --git a/test/Transforms/PlaceSafepoints/basic.ll b/test/Transforms/PlaceSafepoints/basic.ll
index 32aa4da68f21..8cdbc217b849 100644
--- a/test/Transforms/PlaceSafepoints/basic.ll
+++ b/test/Transforms/PlaceSafepoints/basic.ll
@@ -74,7 +74,7 @@ define i1 @test_call_with_result() gc "statepoint-example" {
 ; CHECK: gc.statepoint.p0f_isVoidf
 ; CHECK: gc.statepoint.p0f_i1i1f
 ; CHECK: (i64 2882400000, i32 0, i1 (i1)* @i1_return_i1, i32 1, i32 0, i1 false, i32 0, i32 0)
-; CHECK: %call1.2 = call i1 @llvm.experimental.gc.result.i1
+; CHECK: %call12 = call i1 @llvm.experimental.gc.result.i1
 entry:
   %call1 = tail call i1 (i1) @i1_return_i1(i1 false)
   ret i1 %call1
diff --git a/test/Transforms/PlaceSafepoints/call_gc_result.ll b/test/Transforms/PlaceSafepoints/call_gc_result.ll
index d78a0989c3b1..f2929bfd58ab 100644
--- a/test/Transforms/PlaceSafepoints/call_gc_result.ll
+++ b/test/Transforms/PlaceSafepoints/call_gc_result.ll
@@ -21,8 +21,8 @@ branch2:
 
 merge:
 ;; CHECK: 		%phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
-;; CHECK-NEXT:  %safepoint_token.1 = call i32 (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
-;; CHECK-NEXT:  %ret.2 = call i32 @llvm.experimental.gc.result.i32(i32 %safepoint_token.1)
+;; CHECK-NEXT:  %safepoint_token1 = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
+;; CHECK-NEXT:  %ret2 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token1)
   %phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
   %ret = call i32 @foo()
   ret i32 %ret
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll
index 3cc7158afcfe..b98073d6a6e6 100644
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -1,6 +1,7 @@
 ; Tests to ensure that we are not placing backedge safepoints in
 ; loops which are clearly finite.
-;; RUN: opt %s -place-safepoints -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=32 -S | FileCheck %s
+;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=64 -S | FileCheck %s -check-prefix=COUNTED-64
 
 
 ; A simple counted loop with trivially known range
@@ -10,6 +11,7 @@ define void @test1(i32) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -31,6 +33,7 @@ define void @test2(i32) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -55,6 +58,7 @@ define void @test3(i8 %upper) gc "statepoint-example" {
 ; CHECK: statepoint
 ; CHECK-LABEL: loop
 ; CHECK-NOT: statepoint
+; CHECK-LABEL: exit
 
 entry:
   br label %loop
@@ -69,6 +73,65 @@ exit:
   ret void
 }
 
+; The range is a 64 bit value
+define void @test4(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test4
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64-NOT: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+  br label %loop
+
+loop:
+  %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+  %counter.inc = add i64 %counter, 1
+  %counter.cmp = icmp slt i64 %counter.inc, %upper
+  br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+; This loop can run infinitely (for %upper == INT64_MAX) so it needs a
+; safepoint.
+define void @test5(i64 %upper) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK-LABEL: entry
+; CHECK: statepoint
+; CHECK-LABEL: loop
+; CHECK: statepoint
+; CHECK-LABEL: exit
+
+; COUNTED-64-LABEL: test5
+; COUNTED-64-LABEL: entry
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: loop
+; COUNTED-64: statepoint
+; COUNTED-64-LABEL: exit
+
+entry:
+  br label %loop
+
+loop:
+  %counter = phi i64 [ 0 , %entry ], [ %counter.inc , %loop ]
+  %counter.inc = add i64 %counter, 1
+  %counter.cmp = icmp sle i64 %counter.inc, %upper
+  br i1 %counter.cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
 
 ; This function is inlined when inserting a poll.
 declare void @do_safepoint()
diff --git a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
index 9387f42bf0ab..2303ac7ef515 100644
--- a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
+++ b/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
@@ -7,7 +7,7 @@ define void @test_id() gc "statepoint-example" personality i32 ()* @personality_
 ; CHECK-LABEL: @test_id(
 entry:
 ; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
   invoke void @f()  "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
 
 normal_return:
@@ -22,7 +22,7 @@ define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()*
 ; CHECK-LABEL: @test_num_patch_bytes(
 entry:
 ; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* null,
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
   invoke void @f()  "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
 
 normal_return:
diff --git a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
index 6048f63c7f7b..eaefefa7ad1d 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
@@ -6,7 +6,7 @@
 define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
 ; CHECK-LABEL: @test_invoke_format(
 ; CHECK-LABEL: entry:
-; CHECK: invoke coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
 entry:
   %ret_val = invoke coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
                to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
 define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
 ; CHECK-LABEL: @test_call_format(
 ; CHECK-LABEL: entry:
-; CHECK: call coldcc i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
 entry:
   %ret_val = call coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
   ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PlaceSafepoints/statepoint-format.ll b/test/Transforms/PlaceSafepoints/statepoint-format.ll
index 496091f552d1..c3712a3ace00 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-format.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-format.ll
@@ -6,7 +6,7 @@
 define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
 ; CHECK-LABEL: @test_invoke_format(
 ; CHECK-LABEL: entry:
-; CHECK: invoke i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
 entry:
   %ret_val = invoke i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
                to label %normal_return unwind label %exceptional_return
@@ -23,7 +23,7 @@ exceptional_return:
 define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
 ; CHECK-LABEL: @test_call_format(
 ; CHECK-LABEL: entry:
-; CHECK: call i32 (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
 entry:
   %ret_val = call i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
   ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/PruneEH/operand-bundles.ll b/test/Transforms/PruneEH/operand-bundles.ll
new file mode 100644
index 000000000000..efe8f62a8fb2
--- /dev/null
+++ b/test/Transforms/PruneEH/operand-bundles.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -prune-eh -S | FileCheck %s
+
+declare void @nounwind() nounwind
+
+define internal void @foo() {
+	call void @nounwind()
+	ret void
+}
+
+define i32 @caller() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @caller(
+; CHECK-NOT: invoke
+; CHECK: call void @foo() [ "foo"(i32 0, i8 1) ]
+	invoke void @foo() [ "foo"(i32 0, i8 1) ]
+			to label %Normal unwind label %Except
+
+Normal:		; preds = %0
+	ret i32 0
+
+Except:		; preds = %0
+        landingpad { i8*, i32 }
+                catch i8* null
+	ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Reassociate/fast-ReassociateVector.ll b/test/Transforms/Reassociate/fast-ReassociateVector.ll
index 9fbb5ccfe9a2..fb76b9d990b0 100644
--- a/test/Transforms/Reassociate/fast-ReassociateVector.ll
+++ b/test/Transforms/Reassociate/fast-ReassociateVector.ll
@@ -16,9 +16,9 @@ define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
 ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
 define <2 x float> @test2(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; CHECK-LABEL: @test2
-; CHECK-NEXT: fadd fast <2 x float> %c, %b
-; CHECK-NEXT: fmul fast <2 x float> %a, %tmp2
-; CHECK-NEXT: fmul fast <2 x float> %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast <2 x float> %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast <2 x float> %a, %a
+; CHECK-NEXT: fmul fast <2 x float> [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret <2 x float>
 
   %t0 = fmul fast <2 x float> %a, %b
@@ -133,8 +133,8 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) {
 ; Check x*y+y*x -> x*y*2.
 define <2 x double> @test11(<2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: @test11
-; CHECK-NEXT: %factor = fmul fast <2 x double> %y, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %x
+; CHECK-NEXT: %factor = fmul fast <2 x double> %x, <double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEXT: %tmp1 = fmul fast <2 x double> %factor, %y
 ; CHECK-NEXT: ret <2 x double> %tmp1
 
   %1 = fmul fast <2 x double> %x, %y
diff --git a/test/Transforms/Reassociate/fast-basictest.ll b/test/Transforms/Reassociate/fast-basictest.ll
index 64b74e3e8c16..c8a2bd9c1935 100644
--- a/test/Transforms/Reassociate/fast-basictest.ll
+++ b/test/Transforms/Reassociate/fast-basictest.ll
@@ -108,7 +108,7 @@ define float @test7(float %A, float %B, float %C) {
 ; CHECK-LABEL: @test7
 ; CHECK-NEXT: fadd fast float %C, %B
 ; CHECK-NEXT: fmul fast float %A, %A
-; CHECK-NEXT: fmul fast float %1, %tmp2
+; CHECK-NEXT: fmul fast float %tmp3, %tmp2
 ; CHECK-NEXT: ret float
 
   %aa = fmul fast float %A, %A
diff --git a/test/Transforms/Reassociate/fast-fp-commute.ll b/test/Transforms/Reassociate/fast-fp-commute.ll
index ad89607a21e4..6565bbb3d201 100644
--- a/test/Transforms/Reassociate/fast-fp-commute.ll
+++ b/test/Transforms/Reassociate/fast-fp-commute.ll
@@ -33,8 +33,8 @@ define float @test2(float %x, float %y) {
 
 define float @test3(float %x, float %y) {
 ; CHECK-LABEL: test3
-; CHECK-NEXT: %factor = fmul fast float %y, 2.000000e+00
-; CHECK-NEXT: %tmp1 = fmul fast float %factor, %x
+; CHECK-NEXT: %factor = fmul fast float %x, 2.000000e+00
+; CHECK-NEXT: %tmp1 = fmul fast float %factor, %y
 ; CHECK-NEXT: ret float %tmp1
 
   %1 = fmul fast float %x, %y
diff --git a/test/Transforms/Reassociate/fast-multistep.ll b/test/Transforms/Reassociate/fast-multistep.ll
index 45e15c7f3539..aea997cdcbda 100644
--- a/test/Transforms/Reassociate/fast-multistep.ll
+++ b/test/Transforms/Reassociate/fast-multistep.ll
@@ -3,9 +3,9 @@
 define float @fmultistep1(float %a, float %b, float %c) {
 ; Check that a*a*b+a*a*c is turned into a*(a*(b+c)).
 ; CHECK-LABEL: @fmultistep1
-; CHECK-NEXT: fadd fast float %c, %b
-; CHECK-NEXT: fmul fast float %a, %tmp2
-; CHECK-NEXT: fmul fast float %tmp3, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = fadd fast float %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = fmul fast float %a, %a
+; CHECK-NEXT: fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret float
 
   %t0 = fmul fast float %a, %b
diff --git a/test/Transforms/Reassociate/fp-expr.ll b/test/Transforms/Reassociate/fp-expr.ll
new file mode 100644
index 000000000000..5af3b1991c9e
--- /dev/null
+++ b/test/Transforms/Reassociate/fp-expr.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+
+define void @test1() {
+; CHECK-LABEL: @test1
+; CHECK: call
+; CHECK: fsub
+; CHECK: fadd
+  %tmp = tail call <4 x float> @blam()
+  %tmp23 = fsub fast <4 x float> undef, %tmp
+  %tmp24 = fadd fast <4 x float> %tmp23, undef
+  tail call void @wombat(<4 x float> %tmp24)
+  ret void
+}
+
+define half @test2() {
+; CHECK-LABEL: @test2
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fadd
+  %tmp15 = fsub fast half undef, undef
+  %tmp17 = fsub fast half undef, %tmp15
+  %tmp18 = fadd fast half undef, %tmp17
+  ret half %tmp18
+}
+
+
+
+; Function Attrs: optsize
+declare <4 x float> @blam()
+
+; Function Attrs: optsize
+declare void @wombat(<4 x float>)
+
diff --git a/test/Transforms/Reassociate/multistep.ll b/test/Transforms/Reassociate/multistep.ll
index c499646a8b6a..5685bb949537 100644
--- a/test/Transforms/Reassociate/multistep.ll
+++ b/test/Transforms/Reassociate/multistep.ll
@@ -8,9 +8,9 @@ define i64 @multistep1(i64 %a, i64 %b, i64 %c) {
   %t2 = mul i64 %a, %c
   %t3 = mul i64 %a, %t2 ; a*(a*c)
   %t4 = add i64 %t1, %t3
-; CHECK-NEXT: add i64 %c, %b
-; CHECK-NEXT: mul i64 %a, %tmp{{.*}}
-; CHECK-NEXT: mul i64 %tmp{{.*}}, %a
+; CHECK-NEXT: [[TMP1:%tmp.*]] = add i64 %c, %b
+; CHECK-NEXT: [[TMP2:%tmp.*]] = mul i64 %a, %a
+; CHECK-NEXT: mul i64 [[TMP2]], [[TMP1]]
 ; CHECK-NEXT: ret
   ret i64 %t4
 }
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
new file mode 100644
index 000000000000..c2cdffce61e4
--- /dev/null
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: faddsubAssoc1
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
+; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is A op (B op C)
+define half @faddsubAssoc1(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fsub fast half %tmp2, %tmp1 ; 5 * a - 3 * b
+  %tmp5 = fsub fast half %tmp3, %tmp4 ; 2 * b - ( 5 * a - 3 * b)
+  ret half %tmp5 ; = 5 * (b - a)
+}
+
+; CHECK-LABEL: faddsubAssoc2
+; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
+; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH3C00
+; CHECK: fadd fast half [[TMP2]], [[TMP1]]
+; CHECK: ret
+; Input is (A op B) op C
+define half @faddsubAssoc2(half %a, half %b) {
+  %tmp1 = fmul fast half %b, 0xH4200 ; 3*b
+  %tmp2 = fmul fast half %a, 0xH4500 ; 5*a
+  %tmp3 = fmul fast half %b, 0xH4000 ; 2*b
+  %tmp4 = fadd fast half %tmp2, %tmp1 ; 5 * a + 3 * b
+  %tmp5 = fsub fast half %tmp4, %tmp3 ; (5 * a + 3 * b) - (2 * b)
+  ret half %tmp5 ; = 5 * a + b
+}
+
diff --git a/test/Transforms/Reassociate/secondary.ll b/test/Transforms/Reassociate/secondary.ll
index a52000ada537..388cd6bcb6fe 100644
--- a/test/Transforms/Reassociate/secondary.ll
+++ b/test/Transforms/Reassociate/secondary.ll
@@ -6,7 +6,7 @@
 
 ; CHECK:     define
 ; CHECK-NOT: undef
-; CHECK:     %factor = mul i32 %tmp3, -2
+; CHECK:     %factor = mul i32 %tmp3.neg, 2
 ; CHECK-NOT: undef
 ; CHECK:     }
 
diff --git a/test/Transforms/Reassociate/vaarg_movable.ll b/test/Transforms/Reassociate/vaarg_movable.ll
new file mode 100644
index 000000000000..be4fe121fae9
--- /dev/null
+++ b/test/Transforms/Reassociate/vaarg_movable.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -reassociate -die < %s | FileCheck %s
+
+; The two va_arg instructions depend on the memory/context, are therfore not
+; identical and the sub should not be optimized to 0 by reassociate.
+;
+; CHECK-LABEL: @func(
+; ...
+; CHECK: %v0 = va_arg i8** %varargs, i32
+; CHECK: %v1 = va_arg i8** %varargs, i32
+; CHECK: %v0.neg = sub i32 0, %v0
+; CHECK: %sub = add i32 %v0.neg, 1
+; CHECK: %add = add i32 %sub, %v1
+; ...
+; CHECK: ret i32 %add
+define i32 @func(i32 %dummy, ...) {
+  %varargs = alloca i8*, align 8
+  %varargs1 = bitcast i8** %varargs to i8*
+  call void @llvm.va_start(i8* %varargs1)
+  %v0 = va_arg i8** %varargs, i32
+  %v1 = va_arg i8** %varargs, i32
+  %sub = sub nsw i32 %v1, %v0
+  %add = add nsw i32 %sub, 1
+  call void @llvm.va_end(i8* %varargs1)
+  ret i32 %add
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index a22689805fb5..0bed6f358808 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
   %xor1 = xor i32 %xor, %and
   ret i32 %xor1
 ; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %y, 123
-; CHECK: %xor1 = xor i32 %xor, %x
+; CHECK: %xor = xor i32 %x, 123
+; CHECK: %xor1 = xor i32 %xor, %y
 ; CHECK: ret i32 %xor1
 }
 
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
index 1ff1abedd31c..3fd7fd9282f1 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
 
 declare void @site_for_call_safpeoint()
 
@@ -18,11 +18,11 @@ there:
 
 merge:
 ; CHECK-LABEL: merge:
-; CHECK:   %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+; CHECK:   %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
   %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
 declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
index f4292a998485..19f1423eea03 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
@@ -1,8 +1,9 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
-; CHECK: derived %next_x base %base_obj_x
-; CHECK: derived %next_y base %base_obj_y
-; CHECK: derived %next base %base_phi
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
 
 declare i1 @runtime_value()
 declare void @do_safepoint()
@@ -30,8 +31,8 @@ false:
 
 merge:
   %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
index 2a7a7444adec..a28c925f7828 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
@@ -19,8 +19,8 @@ loop:                                             ; preds = %loop, %entry
 ; CHECK-DAG:  [ %next.relocated.casted, %loop ]
   %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
   %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
new file mode 100644
index 000000000000..5ebff642347d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %select base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i1 %cond) gc "statepoint-example" {
+  %derived1 = getelementptr i8, i8 addrspace(1)* @global, i64 1
+  %derived2 = getelementptr i8, i8 addrspace(1)* @global, i64 2
+  %select = select i1 %cond, i8 addrspace(1)* %derived1, i8 addrspace(1)* %derived2
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK: %load = load i8, i8 addrspace(1)* %select
+  %load = load i8, i8 addrspace(1)* %select
+  ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
new file mode 100644
index 000000000000..8e43e638f989
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %derived base @global
+
+@global = external addrspace(1) global i8
+
+define i8 @test(i64 %offset) gc "statepoint-example" {
+  %derived = getelementptr i8, i8 addrspace(1)* @global, i64 %offset
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NOT: relocate
+; CHECK-NOT: remat
+; CHECK: %load = load i8, i8 addrspace(1)* %derived
+  %load = load i8, i8 addrspace(1)* %derived
+  ret i8 %load
+}
+
+declare void @extern() gc "statepoint-example"
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
index c2877d8b5393..802ce5d79a33 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
@@ -13,9 +13,9 @@ there:
 
 merge:
   %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
 declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
index f72201b5138e..e0035d353887 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
@@ -12,9 +12,9 @@ loop:
   %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
   %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
   %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
 declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
index 90d91d21d7b4..4e0bb14cb453 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
@@ -1,7 +1,7 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
 
-; CHECK: derived %obj_to_consume base %base_phi
+; CHECK: derived %obj_to_consume base %obj_to_consume
 
 declare void @foo()
 declare i64 addrspace(1)* @generate_obj()
@@ -13,10 +13,10 @@ entry:
 
 loop:
 ; CHECK: loop:
-; CHECK:  %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK:  %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
 ; CHECK-NEXT:  %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result
-  %safepoint_token1 = call i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-  %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32 %safepoint_token1)
+  %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %safepoint_token1)
   switch i32 %condition, label %dest_a [
     i32 0, label %dest_b
     i32 1, label %dest_c
@@ -33,21 +33,20 @@ dest_c:
 
 merge:
 ; CHECK: merge:
-; CHECK:  %base_phi = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
 ; CHECK:  %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
 
   %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
-  %safepoint_token3 = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token3 = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge.split
 
 merge.split:                                      ; preds = %merge
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
 
 ; Function Attrs: nounwind
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(i32) #0
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
\ No newline at end of file
+declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token) #0
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
index 9b73377e31cb..c5acd2962f9e 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
 
 declare void @foo()
 
@@ -20,11 +20,11 @@ there:
 
 merge:
 ; CHECK: merge:
-; CHECK:  %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK:  %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
 ; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
index 0b785d72d63c..95a42846a2fe 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
 
 declare void @site_for_call_safpeoint()
 
@@ -30,12 +30,12 @@ there:
 
 merge:
 ; CHECK: merge:
-; CHECK:  %base_phi = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK:  %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
 ; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
 declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
index 467429b4d27d..49cf20eab191 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
@@ -1,6 +1,6 @@
 ; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
 
-; CHECK: derived %merged_value base %base_phi
+; CHECK: derived %merged_value base %merged_value.base
 
 declare void @site_for_call_safpeoint()
 
@@ -24,7 +24,7 @@ bump_here_b:
 
 merge_here:
 ; CHECK: merge_here:
-; CHECK-DAG: %base_phi
+; CHECK-DAG: %x.base
 ; CHECK-DAG: phi i64 addrspace(1)*
 ; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
 ; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
@@ -37,16 +37,16 @@ there:
 
 merge:
 ; CHECK: merge:
-; CHECK-DAG:  %base_phi1
+; CHECK-DAG:  %merged_value.base
 ; CHECK-DAG: phi i64 addrspace(1)*
 ; CHECK-DAG: %merge_here
 ; CHECK-DAG: [ %base_obj_y, %there ]
 ; CHECK:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
   %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
 
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %merged_value
 }
 
 declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
index b89be7daa67b..e5ef42dda24b 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
@@ -24,7 +24,7 @@ check_for_null:
 loop_back:
   %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
   %next_index = add i32 %index, 1
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop_check
 
 not_found:
@@ -35,4 +35,4 @@ found:
 }
 
 declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
index 848633b2a275..946d89a08e27 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
@@ -13,9 +13,9 @@ loop:
   %condition = call i1 @runtime_value()
   %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
   %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
 declare void @do_safepoint()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
index 879f3f9e24da..cd0473a67678 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
@@ -16,7 +16,7 @@ loop:
 ; CHECK-DAG: [ %obj.relocated.casted, %loop ]
 ; CHECK-DAG: [ %obj, %entry ]
   call void @use_obj(i64 addrspace(1)* %obj)
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
@@ -55,9 +55,9 @@ define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1
 
  merge:
 ; CHECK: merge:
-; CHECK-NEXT: %base_phi = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
   %value = phi i64 addrspace(1)* [ %a.cast, %left], [ %a.cast, %left], [ %a.cast, %left], [ %b.cast, %right]
-  %safepoint_token = call i32 (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
 
   ret i64 addrspace(1)* %value
 }
@@ -74,16 +74,15 @@ entry:
 
 loop:                                             ; preds = %loop, %entry
 ; CHECK-LABEL: loop
-; CHECK:   %base_phi = phi i64 addrspace(1)*
+; CHECK:   %current.base = phi i64 addrspace(1)*
 ; CHECK-DAG: [ %base_obj, %entry ]
 ; Given the two selects are equivelent, so are their base phis - ideally,
 ; we'd have commoned these, but that's a missed optimization, not correctness.
-; CHECK-DAG: [ [[DISCARD:%base_select.*.relocated.casted]], %loop ]
-; CHECK-NOT: base_phi2
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
 ; CHECK: next = select
-; CHECK: base_select
+; CHECK: extra2.base = select
 ; CHECK: extra2 = select
-; CHECK: base_select
 ; CHECK: statepoint
 ;; Both 'next' and 'extra2' are live across the backedge safepoint...
   %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
@@ -91,10 +90,62 @@ loop:                                             ; preds = %loop, %entry
   %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
   %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
   %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %loop
 }
 
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, 
+                                i64 addrspace(1)* %obj2)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+  br i1 %cnd, label %merge, label %taken
+taken:
+  br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, 
+                                i64 addrspace(1)* %obj2)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+  br i1 %cnd, label %merge, label %taken
+taken:
+  br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, 
+                                i64 addrspace(1)* %obj2)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+  br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+  br i1 %cnd, label %merge, label %next
+next:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %bdv
+}
+
+
 declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
new file mode 100644
index 000000000000..6084efeb0509
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck  %s
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here.  A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) 
+    gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+  br i1 %cnd, label %taken, label %untaken
+taken:
+  %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+untaken:
+  %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+merge:
+  %vec = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
+  br i1 %cnd, label %taken2, label %untaken2
+taken2:
+  %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+  br label %merge2
+untaken2:
+  %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+  br label %merge2
+merge2:
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+  %obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) 
+    gc "statepoint-example" {
+; CHECK-LABEL: test3
+entry:
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %obj
+}
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) 
+    gc "statepoint-example" {
+; CHECK-LABEL: test4
+entry:
+  %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+  %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+  %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*)
+
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+entry:
+  %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call void @use(i64 addrspace(1)* %bdv)
+  ret void
+}
+
+; When we fundementally have to duplicate
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK:  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+entry:
+  %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call void @use(i64 addrspace(1)* %bdv)
+  ret void
+}
+
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, 
+                                i64 addrspace(1)* %obj2)
+    gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+  br label %merge1
+merge1:
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+  %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+  %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+  %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  br i1 %cnd, label %merge1, label %next1
+next1:
+; CHECK-LABEL: next1:
+; CHECK: bdv.base = 
+; CHECK: bdv = 
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+  br label %merge
+merge:
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+
+  %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+  br i1 %cnd, label %merge, label %next
+next:
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i64 addrspace(1)* %objb
+}
+
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/basics.ll b/test/Transforms/RewriteStatepointsForGC/basics.ll
index 2a61924a5927..48f464356865 100644
--- a/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -10,7 +10,7 @@ define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
@@ -23,8 +23,8 @@ define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
@@ -39,7 +39,7 @@ define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
 entry:
   %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
 
   %a = load i8, i8 addrspace(1)* %derived
   %b = load i8, i8 addrspace(1)* %obj
@@ -57,14 +57,14 @@ taken:
 ; CHECK-LABEL: taken:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge
 
 untaken:
 ; CHECK-LABEL: untaken:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %merge
 
 merge:
@@ -81,8 +81,8 @@ define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %obj
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
\ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
new file mode 100644
index 000000000000..8221cd0e0f82
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
@@ -0,0 +1,74 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+; A null test of a single value
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+   %cond = icmp eq i8 addrspace(1)* %p, null
+   br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+   call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+   br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+   br i1 %cond, label %taken, label %untaken
+taken:
+   ret i1 true
+untaken:
+   ret i1 false
+}
+
+; Comparing two pointers
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) 
+    gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:   
+   %cond = icmp eq i8 addrspace(1)* %p, %q
+   br i1 %rare, label %safepoint, label %continue, !prof !0
+safepoint:
+   call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+   br label %continue
+continue:
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+   br i1 %cond, label %taken, label %untaken
+taken:
+   ret i1 true
+untaken:
+   ret i1 false
+}
+
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) 
+    gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:   
+   call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+   %cond = icmp eq i8 addrspace(1)* %p, %q
+   br i1 %cond, label %taken, label %untaken
+taken:
+   ret i1 true
+untaken:
+   ret i1 false
+}
+
+declare void @safepoint()
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/constants.ll b/test/Transforms/RewriteStatepointsForGC/constants.ll
index a30fdd7034a4..b30f64beba09 100644
--- a/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -rewrite-statepoints-for-gc %s | FileCheck %s
 
 declare void @foo()
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 ; constants don't get relocated.
 define i8 @test() gc "statepoint-example" {
@@ -9,7 +9,7 @@ define i8 @test() gc "statepoint-example" {
 ; CHECK: gc.statepoint
 ; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   %res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
   ret i8 %res
 }
@@ -22,7 +22,7 @@ define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
 ; CHECK-NEXT: gc.relocate
 ; CHECK-NEXT: icmp
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   %cmp = icmp eq i8 addrspace(1)* %p, null
   br i1 %cmp, label %taken, label %not_taken
 
@@ -52,9 +52,44 @@ define i8 @test3(i1 %always_true) gc "statepoint-example" {
 ; CHECK: gc.statepoint
 ; CHECK-NEXT: load i8, i8 addrspace(1)* @G
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   %res = load i8, i8 addrspace(1)* @G, align 1
   ret i8 %res
 }
 
+; Even for source languages without constant references, we can
+; see constants can show up along paths where the value is dead.
+; This is particular relevant when computing bases of PHIs.  
+define i8 addrspace(1)* @test4(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+  %is_null = icmp eq i8 addrspace(1)* %p, null
+  br i1 %is_null, label %split, label %join
+
+split:
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  %arg_value_addr.i = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+  %arg_value_addr_casted.i = bitcast i8 addrspace(1)* %arg_value_addr.i to i8 addrspace(1)* addrspace(1)*
+  br label %join
+
+join:
+; CHECK-LABEL: join
+; CHECK: %addr2.base =
+  %addr2 = phi i8 addrspace(1)* addrspace(1)* [ %arg_value_addr_casted.i, %split ], [ inttoptr (i64 8 to i8 addrspace(1)* addrspace(1)*), %entry ]
+  ;; NOTE: This particular example can be jump-threaded, but in general,
+  ;; we can't, and have to deal with the resulting IR.
+  br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+  ret i8 addrspace(1)* null
+
+use:
+; CHECK-LABEL: use:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  %res = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %addr2, align 1
+  ret i8 addrspace(1)* %res
+}
+
 
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
new file mode 100644
index 000000000000..6af2a3012b5c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+  br i1 %runtime_condition, label %here, label %there
+
+here:                                             ; preds = %entry
+  %x = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+  br label %merge
+
+there:                                            ; preds = %entry
+  %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+  br label %merge
+
+merge:                                            ; preds = %there, %here
+; CHECK-LABEL: merge:
+; CHECK:   %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
+  %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
+  call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
new file mode 100644
index 000000000000..8c486d6b3896
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
@@ -0,0 +1,35 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @select_of_phi(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y) gc "statepoint-example" {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %merge, %entry
+  %current_x = phi i64 addrspace(1)* [ %base_obj_x, %entry ], [ %next_x, %merge ]
+  %current_y = phi i64 addrspace(1)* [ %base_obj_y, %entry ], [ %next_y, %merge ]
+  %current = phi i64 addrspace(1)* [ null, %entry ], [ %next, %merge ]
+  %condition = call i1 @runtime_value()
+  %next_x = getelementptr i64, i64 addrspace(1)* %current_x, i32 1
+  %next_y = getelementptr i64, i64 addrspace(1)* %current_y, i32 1
+  br i1 %condition, label %true, label %false
+
+true:                                             ; preds = %loop
+  br label %merge
+
+false:                                            ; preds = %loop
+  br label %merge
+
+merge:                                            ; preds = %false, %true
+  %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
new file mode 100644
index 000000000000..ae793b2cb630
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers  -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare void @do_safepoint()
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+  %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+; CHECK-LABEL: loop:
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG:  [ %base_obj.relocated.casted, %loop ] 
+; CHECK-DAG:  [ %base_obj, %entry ]
+; CHECK:  %current = phi i64 addrspace(1)* 
+; CHECK-DAG:  [ %obj, %entry ]
+; CHECK-DAG:  [ %next.relocated.casted, %loop ]
+  %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+  %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
new file mode 100644
index 000000000000..2b9485388f80
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %base_obj
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+  br i1 %runtime_condition, label %merge, label %there
+
+there:                                            ; preds = %entry
+  %derived_obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+  br label %merge
+
+merge:                                            ; preds = %there, %entry
+  %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %merged_value
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
new file mode 100644
index 000000000000..71bb309d1301
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
@@ -0,0 +1,19 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next.i64 base %base_obj
+
+define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+  %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next.i64, %loop ]
+  %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
+  %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
+  %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
new file mode 100644
index 000000000000..3fcbf26a6fc0
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
@@ -0,0 +1,44 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %obj_to_consume base %obj_to_consume
+
+declare void @foo()
+
+declare i64 addrspace(1)* @generate_obj()
+
+declare void @consume_obj(i64 addrspace(1)*)
+
+define void @test(i32 %condition) gc "statepoint-example" {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %merge.split, %entry
+; CHECK: loop:
+; CHECK:  [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK-NEXT:  [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result
+  %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  switch i32 %condition, label %dest_a [
+    i32 0, label %dest_b
+    i32 1, label %dest_c
+  ]
+
+dest_a:                                           ; preds = %loop
+  br label %merge
+
+dest_b:                                           ; preds = %loop
+  br label %merge
+
+dest_c:                                           ; preds = %loop
+  br label %merge
+
+merge:                                            ; preds = %dest_c, %dest_b, %dest_a
+; CHECK: merge:
+; CHECK:  %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+  %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+  call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %merge.split
+
+merge.split:                                      ; preds = %merge
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
new file mode 100644
index 000000000000..4d43d7f7307c
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @foo()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
+entry:
+  br i1 %runtime_condition, label %here, label %there
+
+here:                                             ; preds = %entry
+  br label %bump
+
+bump:                                             ; preds = %here
+  br label %merge
+
+there:                                            ; preds = %entry
+  %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+  br label %merge
+
+merge:                                            ; preds = %there, %bump
+; CHECK: merge:
+; CHECK:  %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
+; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]  
+  %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
new file mode 100644
index 000000000000..2d555d179c29
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+  br i1 %runtime_condition_x, label %here, label %there
+
+here:                                             ; preds = %entry
+  br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a:                                      ; preds = %here
+  %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+  br label %merge_here
+
+bump_here_b:                                      ; preds = %here
+  %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 2
+  br label %merge_here
+
+merge_here:                                       ; preds = %bump_here_b, %bump_here_a
+  %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+  br label %merge
+
+there:                                            ; preds = %entry
+  %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+  br label %merge
+
+merge:                                            ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK:  %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
+; CHECK-NEXT:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
+  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+  call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
new file mode 100644
index 000000000000..e90ef63184ee
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %merged_value base %merged_value.base
+
+declare void @site_for_call_safpeoint()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
+entry:
+  br i1 %runtime_condition_x, label %here, label %there
+
+here:                                             ; preds = %entry
+  br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+
+bump_here_a:                                      ; preds = %here
+  %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
+  br label %merge_here
+
+bump_here_b:                                      ; preds = %here
+  %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 2
+  br label %merge_here
+
+merge_here:                                       ; preds = %bump_here_b, %bump_here_a
+; CHECK: merge_here:
+; CHECK-DAG: %x.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
+; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
+  %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
+  br label %merge
+
+there:                                            ; preds = %entry
+  %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
+  br label %merge
+
+merge:                                            ; preds = %there, %merge_here
+; CHECK: merge:
+; CHECK-DAG:  %merged_value.base
+; CHECK-DAG: phi i64 addrspace(1)*
+; CHECK-DAG: %merge_here
+; CHECK-DAG: [ %base_obj_y, %there ]
+; CHECK:  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]  
+  %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
+  call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %merged_value
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
new file mode 100644
index 000000000000..628696ba2c2f
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
@@ -0,0 +1,37 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
+
+; CHECK: derived %next_element_ptr base %array_obj
+
+define i32 @null_in_array(i64 addrspace(1)* %array_obj) gc "statepoint-example" {
+entry:
+  %array_len_pointer.i64 = getelementptr i64, i64 addrspace(1)* %array_obj, i32 1
+  %array_len_pointer.i32 = bitcast i64 addrspace(1)* %array_len_pointer.i64 to i32 addrspace(1)*
+  %array_len = load i32, i32 addrspace(1)* %array_len_pointer.i32
+  %array_elems = bitcast i32 addrspace(1)* %array_len_pointer.i32 to i64 addrspace(1)* addrspace(1)*
+  br label %loop_check
+
+loop_check:                                       ; preds = %loop_back, %entry
+  %index = phi i32 [ 0, %entry ], [ %next_index, %loop_back ]
+  %current_element_ptr = phi i64 addrspace(1)* addrspace(1)* [ %array_elems, %entry ], [ %next_element_ptr, %loop_back ]
+  %index_lt = icmp ult i32 %index, %array_len
+  br i1 %index_lt, label %check_for_null, label %not_found
+
+check_for_null:                                   ; preds = %loop_check
+  %current_element = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr
+  %is_null = icmp eq i64 addrspace(1)* %current_element, null
+  br i1 %is_null, label %found, label %loop_back
+
+loop_back:                                        ; preds = %check_for_null
+  %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
+  %next_index = add i32 %index, 1
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop_check
+
+not_found:                                        ; preds = %loop_check
+  ret i32 -1
+
+found:                                            ; preds = %check_for_null
+  ret i32 %index
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
new file mode 100644
index 000000000000..a82af3b96892
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S  2>&1 | FileCheck %s
+
+; CHECK: derived %next base %base_obj
+
+declare i1 @runtime_value() "gc-leaf-function"
+
+define void @maybe_GEP(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  %current = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %next, %loop ]
+  %condition = call i1 @runtime_value()
+  %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
+  %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
new file mode 100644
index 000000000000..a378d1502add
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
@@ -0,0 +1,151 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; The rewriting needs to make %obj loop variant by inserting a phi 
+; of the original value and it's relocation.
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @use_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @def_use_safepoint() gc "statepoint-example" {
+; CHECK-LABEL: def_use_safepoint
+; CHECK: phi i64 addrspace(1)* 
+; CHECK-DAG: [ %obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %obj, %entry ]
+entry:
+  %obj = call i64 addrspace(1)* @generate_obj()
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  call void @use_obj(i64 addrspace(1)* %obj)
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
+
+declare void @do_safepoint()
+
+declare void @parse_point(i64 addrspace(1)*)
+
+define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i32 %unknown) gc "statepoint-example" {
+; CHECK-LABEL: test1
+entry:
+  br i1 undef, label %left, label %right
+
+left:                                             ; preds = %entry
+; CHECK: left:
+; CHECK-NEXT: %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_L:%.*]] = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+; Our safepoint placement pass calls removeUnreachableBlocks, which does a bunch
+; of simplifications to branch instructions.  This bug is visible only when
+; there are multiple branches into the same block from the same predecessor, and
+; the following ceremony is to make that artefact survive a call to 
+; removeUnreachableBlocks.  As an example, "br i1 undef, label %merge, label %merge"
+; will get simplified to "br label %merge" by removeUnreachableBlocks.
+  %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+  switch i32 %unknown, label %right [
+    i32 0, label %merge
+    i32 1, label %merge
+    i32 5, label %merge
+    i32 3, label %right
+  ]
+
+right:                                            ; preds = %left, %left, %entry
+; CHECK: right:
+; CHECK-NEXT: %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+; CHECK-NEXT: [[CAST_R:%.*]] = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+  %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+  br label %merge
+
+merge:                                            ; preds = %right, %left, %left, %left
+; CHECK: merge:
+; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
+  %value = phi i64 addrspace(1)* [ %a.cast, %left ], [ %a.cast, %left ], [ %a.cast, %left ], [ %b.cast, %right ]
+  call void @parse_point(i64 addrspace(1)* %value) [ "deopt"(i32 0, i32 0, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %value
+}
+
+;; The purpose of this test is to ensure that when two live values share a
+;;  base defining value with inherent conflicts, we end up with a *single*
+;;  base phi/select per such node.  This is testing an optimization, not a
+;;  fundemental correctness criteria
+define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+  %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
+  br label %loop
+; CHECK-LABEL: loop
+; CHECK:   %current.base = phi i64 addrspace(1)*
+; CHECK-DAG: [ %base_obj, %entry ]
+
+; Given the two selects are equivelent, so are their base phis - ideally,
+; we'd have commoned these, but that's a missed optimization, not correctness.
+; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
+; CHECK-NOT: extra.base
+; CHECK: next = select
+; CHECK: extra2.base = select
+; CHECK: extra2 = select
+; CHECK: statepoint
+;; Both 'next' and 'extra2' are live across the backedge safepoint...
+
+loop:                                             ; preds = %loop, %entry
+  %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
+  %extra = phi i64 addrspace(1)* [ %obj, %entry ], [ %extra2, %loop ]
+  %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
+  %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+  %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %loop
+}
+
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+  br i1 %cnd, label %merge, label %taken
+
+taken:                                            ; preds = %entry
+  br label %merge
+
+merge:                                            ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+  br i1 %cnd, label %merge, label %taken
+
+taken:                                            ; preds = %entry
+  br label %merge
+
+merge:                                            ; preds = %taken, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: gc.statepoint
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %bdv
+}
+
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+  br label %merge
+
+merge:                                            ; preds = %merge, %entry
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: br i1
+  %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
+  br i1 %cnd, label %merge, label %next
+
+next:                                             ; preds = %merge
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %bdv
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
new file mode 100644
index 000000000000..96b7390b77bc
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
@@ -0,0 +1,167 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck  %s
+
+
+define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %obj)
+; Note that the second extractelement is actually redundant here.  A correct output would
+; be to reuse the existing obj as a base since it is actually a base pointer.
+entry:
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
+  call void @do_safepoint() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
+; CHECK-LABEL: test2
+entry:
+  br i1 %cnd, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+  %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+
+untaken:                                          ; preds = %entry
+  %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+  %vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+  br i1 %cnd, label %taken2, label %untaken2
+
+taken2:                                           ; preds = %merge
+  %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
+  br label %merge2
+
+untaken2:                                         ; preds = %merge
+  %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
+  br label %merge2
+
+merge2:                                           ; preds = %untaken2, %taken2
+; CHECK-LABEL: merge2:
+; CHECK-NEXT: %obj = phi i64 addrspace(1)*
+; CHECK-NEXT: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj, %obj)
+  %obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
+  call void @do_safepoint() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: insertelement
+; CHECK: extractelement
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %obj)
+entry:
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+  call void @do_safepoint() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test4
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%ptr, %ptr)
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
+entry:
+  %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
+  %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
+  %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
+  %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+  call void @do_safepoint() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+}
+
+declare void @use(i64 addrspace(1)*) "gc-leaf-function"
+
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.relocate
+; CHECK-DAG: (%obj, %bdv)
+; When we fundementally have to duplicate
+entry:
+  %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  call void @use(i64 addrspace(1)* %bdv)
+  ret void
+}
+
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
+; CHECK:  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%bdv.base, %bdv)
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
+entry:
+  %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  call void @use(i64 addrspace(1)* %bdv)
+  ret void
+}
+
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
+  br label %merge1
+
+merge1:                                           ; preds = %merge1, %entry
+; CHECK-LABEL: merge1:
+; CHECK: vec2.base
+; CHECK: vec2
+; CHECK: gep
+; CHECK: vec3.base
+; CHECK: vec3
+  %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
+  %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
+  %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
+  br i1 %cnd, label %merge1, label %next1
+
+next1:                                            ; preds = %merge1
+; CHECK-LABEL: next1:
+; CHECK: bdv.base = 
+; CHECK: bdv = 
+  %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
+  br label %merge
+
+merge:                                            ; preds = %merge, %next1
+; CHECK-LABEL: merge:
+; CHECK: %objb.base
+; CHECK: %objb
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: (%objb.base, %objb)
+  %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
+  br i1 %cnd, label %merge, label %next
+
+next:                                             ; preds = %merge
+  call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64 addrspace(1)* %objb
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
new file mode 100644
index 000000000000..c0dc6940e5db
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @g()
+declare i32 @h()
+
+define i32 addrspace(1)* @f0(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f0(
+ entry:
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token {{[^@]*}} @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+  call void @g() [ "deopt"(i32 100) ]
+
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_0]], i32 8, i32 8)
+  ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f1(i32 addrspace(1)* %arg) gc "statepoint-example"  personality i32 8  {
+; CHECK-LABEL: @f1(
+ entry:
+; CHECK: [[TOKEN_1:%[^ ]+]] = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+  invoke void @g() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: %arg.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_1]], i32 8, i32 8)
+  ret i32 addrspace(1)* %arg
+
+ unwind_dest: 
+  %lpad = landingpad token cleanup
+  resume token undef
+}
+
+define i32 addrspace(1)* @f2(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f2(
+ entry:
+; CHECK: [[TOKEN_2:%[^ ]+]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+  %val = call i32 @h() [ "deopt"(i32 100) ]
+
+; CHECK: [[RESULT_F2:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_2]])
+; CHECK: %arg.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_2]], i32 8, i32 8)
+; CHECK: %arg.relocated.casted = bitcast i8 addrspace(1)* %arg.relocated to i32 addrspace(1)*
+
+  store i32 %val, i32 addrspace(1)* %arg
+; CHECK: store i32 [[RESULT_F2]], i32 addrspace(1)* %arg.relocated.casted
+  ret i32 addrspace(1)* %arg
+}
+
+define i32 addrspace(1)* @f3(i32 addrspace(1)* %arg) gc "statepoint-example"  personality i32 8  {
+; CHECK-LABEL: @f3(
+ entry:
+; CHECK: [[TOKEN_3:%[^ ]+]] = invoke token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @h, i32 0, i32 0, i32 0, i32 1, i32 100, i32 addrspace(1)* %arg)
+  %val = invoke i32 @h() [ "deopt"(i32 100) ] to label %normal_dest unwind label %unwind_dest
+
+ normal_dest:
+; CHECK: [[RESULT_F3:%[^ ]+]] = call i32 @llvm.experimental.gc.result.i32(token [[TOKEN_3]])
+; CHECK: [[ARG_RELOCATED:%[^ ]+]] = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token [[TOKEN_3]], i32 8, i32 8)
+; CHECK: [[ARG_RELOCATED_CASTED:%[^ ]+]] = bitcast i8 addrspace(1)* [[ARG_RELOCATED]] to i32 addrspace(1)*
+
+  store i32 %val, i32 addrspace(1)* %arg
+
+; CHECK: store i32 [[RESULT_F3]], i32 addrspace(1)* [[ARG_RELOCATED_CASTED]]
+  ret i32 addrspace(1)* %arg
+
+ unwind_dest: 
+  %lpad = landingpad token cleanup
+  resume token undef
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
new file mode 100644
index 000000000000..48c45eaa1b01
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
@@ -0,0 +1,88 @@
+; This is a collection of really basic tests for gc.statepoint rewriting.
+; RUN:  opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S | FileCheck %s
+
+; Trivial relocation over a single call
+
+declare void @foo()
+
+define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; Two safepoints in a row (i.e. consistent liveness)
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; A simple derived pointer
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %obj
+}
+
+define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: getelementptr
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
+; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
+; Tests to make sure we visit both the taken and untaken predeccessor 
+; of merge.  This was a bug in the dataflow liveness at one point.
+  %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  %a = load i8, i8 addrspace(1)* %derived
+  %b = load i8, i8 addrspace(1)* %obj
+  %c = sub i8 %a, %b
+  ret i8 %c
+}
+
+define i8 addrspace(1)* @test4(i1 %cmp, i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %merge
+
+untaken:                                          ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated2, %untaken ]
+; CHECK-NEXT: ret i8 addrspace(1)* %.0
+; When run over a function which doesn't opt in, should do nothing!
+  ret i8 addrspace(1)* %obj
+}
+
+define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
+; CHECK-LABEL: @test5
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
+  %0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i8 addrspace(1)* %obj
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
new file mode 100644
index 000000000000..f0da0c06db0a
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; A null test of a single value
+
+define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test
+entry:
+  %cond = icmp eq i8 addrspace(1)* %p, null
+  br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint:                                        ; preds = %entry
+  call void @safepoint() [ "deopt"() ]
+  br label %continue
+
+continue:                                         ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Comparing two pointers
+  br i1 %cond, label %taken, label %untaken
+
+taken:                                            ; preds = %continue
+  ret i1 true
+
+untaken:                                          ; preds = %continue
+  ret i1 false
+}
+
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+  %cond = icmp eq i8 addrspace(1)* %p, %q
+  br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint:                                        ; preds = %entry
+  call void @safepoint() [ "deopt"() ]
+  br label %continue
+
+continue:                                         ; preds = %safepoint, %entry
+; CHECK-LABEL: continue:
+; CHECK: phi
+; CHECK-DAG: [ %q.relocated, %safepoint ]
+; CHECK-DAG: [ %q, %entry ]
+; CHECK: phi
+; CHECK-DAG: [ %p.relocated, %safepoint ]
+; CHECK-DAG: [ %p, %entry ]
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+; Sanity check that nothing bad happens if already last instruction
+; before terminator
+  br i1 %cond, label %taken, label %untaken
+
+taken:                                            ; preds = %continue
+  ret i1 true
+
+untaken:                                          ; preds = %continue
+  ret i1 false
+}
+
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK: %cond = icmp
+; CHECK: br i1 %cond
+entry:
+  call void @safepoint() [ "deopt"() ]
+  %cond = icmp eq i8 addrspace(1)* %p, %q
+  br i1 %cond, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+  ret i1 true
+
+untaken:                                          ; preds = %entry
+  ret i1 false
+}
+
+declare void @safepoint()
+!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
new file mode 100644
index 000000000000..eede1b09d161
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
@@ -0,0 +1,51 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles %s | FileCheck %s
+
+; constants don't get relocated.
+@G = addrspace(1) global i8 5
+
+declare void @foo()
+
+define i8 @test() gc "statepoint-example" {
+; CHECK-LABEL: @test
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+; Mostly just here to show reasonable code test can come from.  
+entry:
+  call void @foo() [ "deopt"() ]
+  %res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+  ret i8 %res
+}
+
+define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: icmp
+; Globals don't move and thus don't get relocated
+entry:
+  call void @foo() [ "deopt"() ]
+  %cmp = icmp eq i8 addrspace(1)* %p, null
+  br i1 %cmp, label %taken, label %not_taken
+
+taken:                                            ; preds = %not_taken, %entry
+  ret i8 0
+
+not_taken:                                        ; preds = %entry
+  %cmp2 = icmp ne i8 addrspace(1)* %p, null
+  br i1 %cmp2, label %taken, label %dead
+
+dead:                                             ; preds = %not_taken
+  %addr = getelementptr i8, i8 addrspace(1)* %p, i32 15
+  %res = load i8, i8 addrspace(1)* %addr
+  ret i8 %res
+}
+
+define i8 @test3(i1 %always_true) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: load i8, i8 addrspace(1)* @G
+entry:
+  call void @foo() [ "deopt"() ]
+  %res = load i8, i8 addrspace(1)* @G, align 1
+  ret i8 %res
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
new file mode 100644
index 000000000000..f04c6784a878
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
@@ -0,0 +1,104 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+
+declare void @foo()
+
+declare i8 addrspace(1)* @some_function() "gc-leaf-function"
+
+declare void @some_function_consumer(i8 addrspace(1)*) "gc-leaf-function"
+
+declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref() "gc-leaf-function"
+declare noalias i8 addrspace(1)* @some_function_ret_noalias() "gc-leaf-function"
+
+define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
+entry:
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
+entry:
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+entry:
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+  %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_deref_or_null_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+  %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+entry:
+  %a = call noalias i8 addrspace(1)* @some_function()
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_md(
+; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+entry:
+  %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 %tmp
+}
+
+define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_attribute(
+; No change here, but the prototype of some_function_ret_deref should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
+entry:
+  %a = call i8 addrspace(1)* @some_function_ret_deref()
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+entry:
+  %a = call i8 addrspace(1)* @some_function_ret_noalias()
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_attribute(
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"red", !2}
+; CHECK: !2 = !{!"blue"}
+entry:
+  call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) noalias %ptr)
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i8 addrspace(1)* %ptr
+}
+!0 = !{!1, !1, i64 0, i64 1}
+!1 = !{!"red", !2}
+!2 = !{!"blue"}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
new file mode 100644
index 000000000000..0d53af704df2
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+; This test is to verify gc.relocate can handle pointer to vector of
+; pointers (<2 x i32 addrspace(1)*> addrspace(1)* in this case).
+; The old scheme to create a gc.relocate of <2 x i32 addrspace(1)*> addrspace(1)*
+; type will fail because llvm does not support mangling vector of pointers.
+; The new scheme will create all gc.relocate to i8 addrspace(1)* type and
+; then bitcast to the correct type.
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
+entry:
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK-NEXT:  %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
+
+  call void @foo() [ "deopt"() ]
+  call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
+  ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
new file mode 100644
index 000000000000..00f28938cee9
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
@@ -0,0 +1,149 @@
+; Test that we can correctly handle vectors of pointers in statepoint 
+; rewriting.  Currently, we scalarize, but that's an implementation detail.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck  %s
+
+; A non-vector relocation for comparison
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
+; A base vector from a argument
+entry:
+  call void @do_safepoint() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+}
+
+define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+  call void @do_safepoint() [ "deopt"() ]
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; When a statepoint is an invoke rather than a call
+entry:
+  %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  call void @do_safepoint() [ "deopt"() ]
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare i32 @fake_personality_function()
+
+define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test4
+; CHECK: load
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+entry:
+  %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  invoke void @do_safepoint() [ "deopt"() ]
+          to label %normal_return unwind label %exceptional_return
+
+normal_return:                                    ; preds = %entry
+; CHECK-LABEL: normal_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+  ret <2 x i64 addrspace(1)*> %obj
+
+exceptional_return:                               ; preds = %entry
+; CHECK-LABEL: exceptional_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %13
+; Can we handle an insert element with a constant offset?  This effectively
+; tests both the equal and inequal case since we have to relocate both indices
+; in the vector.
+  %landing_pad4 = landingpad token
+          cleanup
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK: insertelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
+; A base vector from a load
+entry:
+  %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
+  call void @do_safepoint() [ "deopt"() ]
+  ret <2 x i64 addrspace(1)*> %vec
+}
+
+define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test6
+entry:
+  br i1 %cnd, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+  %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+
+untaken:                                          ; preds = %entry
+  %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: = phi
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+  %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+  call void @do_safepoint() [ "deopt"() ]
+  ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
new file mode 100644
index 000000000000..c5b213f4c82d
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
@@ -0,0 +1,165 @@
+; A collection of liveness test cases to ensure we're reporting the
+; correct live values at statepoints
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S < %s | FileCheck %s
+
+; Tests to make sure we consider %obj live in both the taken and untaken 
+; predeccessor of merge.
+
+define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+  call void @foo() [ "deopt"() ]
+  br label %merge
+
+untaken:                                          ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+  call void @foo() [ "deopt"() ]
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated2.casted, %untaken ]
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+; A local kill should not effect liveness in predecessor block
+  ret i64 addrspace(1)* %obj
+}
+
+define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  br
+  call void @foo() [ "deopt"() ]
+  br i1 %cmp, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT:  %obj = load
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT:  ret i64 addrspace(1)* %obj.relocated.casted
+; A local kill should effect values live from a successor phi.  Also, we
+; should only propagate liveness from a phi to the appropriate predecessors.
+  %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+  call void @foo() [ "deopt"() ]
+  ret i64 addrspace(1)* %obj
+
+untaken:                                          ; preds = %entry
+  ret i64 addrspace(1)* null
+}
+
+define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj = load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+  call void @foo() [ "deopt"() ]
+  %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
+  call void @foo() [ "deopt"() ]
+  br label %merge
+
+untaken:                                          ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: br label %merge
+; A base pointer must be live if it is needed at a later statepoint,
+; even if the base pointer is otherwise unused.
+  call void @foo() [ "deopt"() ]
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+  %phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+  ret i64 addrspace(1)* %phi
+}
+
+define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test4
+entry:
+; CHECK-LABEL: entry:
+; CHECK-NEXT:  %derived = getelementptr
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  %derived.relocated =
+; CHECK-NEXT:  bitcast 
+; CHECK-NEXT:  %obj.relocated =
+; CHECK-NEXT:  bitcast
+; CHECK-NEXT:  gc.statepoint
+; CHECK-NEXT:  %derived.relocated2 =
+; CHECK-NEXT:  bitcast 
+
+; Note: It's legal to relocate obj again, but not strictly needed
+; CHECK-NEXT:  %obj.relocated3 =
+; CHECK-NEXT:  bitcast
+; CHECK-NEXT:  ret i64 addrspace(1)* %derived.relocated2.casted
+; 
+; Make sure that a phi def visited during iteration is considered a kill.
+; Also, liveness after base pointer analysis can change based on new uses,
+; not just new defs.
+  %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
+  call void @foo() [ "deopt"() ]
+  call void @foo() [ "deopt"() ]
+  ret i64 addrspace(1)* %derived
+}
+
+declare void @consume(...) readonly "gc-leaf-function"
+
+define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+entry:
+  br i1 %cmp, label %taken, label %untaken
+
+taken:                                            ; preds = %entry
+; CHECK-LABEL: taken:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: br label %merge
+  call void @foo() [ "deopt"() ]
+  br label %merge
+
+untaken:                                          ; preds = %entry
+; CHECK-LABEL: untaken:
+; CHECK-NEXT: br label %merge
+  br label %merge
+
+merge:                                            ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
+; CHECK-NEXT: %obj2a = phi
+; CHECK-NEXT: @consume
+; CHECK-NEXT: br label %final
+  %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
+  call void (...) @consume(i64 addrspace(1)* %obj2a)
+  br label %final
+
+final:                                            ; preds = %merge
+; CHECK-LABEL: final:
+; CHECK-NEXT: @consume
+; CHECK-NEXT: ret i64 addrspace(1)* %.0
+  call void (...) @consume(i64 addrspace(1)* %obj2a)
+  ret i64 addrspace(1)* %obj
+}
+
+declare void @foo()
+
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
new file mode 100644
index 000000000000..8f5c0ff4a710
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
@@ -0,0 +1,44 @@
+; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+
+declare void @f()
+declare i32 @personality_function()
+
+define void @test_id() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_id(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
+  invoke void @f()  "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+  ret void
+
+exceptional_return:
+  %landing_pad4 = landingpad {i8*, i32} cleanup
+  ret void
+}
+
+define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()* @personality_function {
+; CHECK-LABEL: @test_num_patch_bytes(
+entry:
+; CHECK-LABEL: entry:
+; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
+  invoke void @f()  "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
+
+normal_return:
+  ret void
+
+exceptional_return:
+  %landing_pad4 = landingpad {i8*, i32} cleanup
+  ret void
+}
+
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+entry:
+  call void @do_safepoint()
+  ret void
+}
+
+; CHECK-NOT: statepoint-id
+; CHECK-NOT: statepoint-num-patch_bytes
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
new file mode 100644
index 000000000000..e0bd542aa5d5
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
@@ -0,0 +1,62 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
+
+; Test to make sure we destroy LCSSA's single entry phi nodes before
+; running liveness
+
+declare void @consume(...) "gc-leaf-function"
+
+define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+  br label %next
+
+next:                                             ; preds = %entry
+; CHECK-LABEL: next:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; Need to delete unreachable gc.statepoint call
+  %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
+  call void @foo() [ "deopt"() ]
+  call void (...) @consume(i64 addrspace(1)* %obj2)
+  call void (...) @consume(i64 addrspace(1)* %obj)
+  ret void
+}
+
+define void @test7() gc "statepoint-example" {
+; CHECK-LABEL: test7
+; CHECK-NOT: gc.statepoint
+; Need to delete unreachable gc.statepoint invoke - tested seperately given
+; a correct implementation could only remove the instructions, not the block
+  ret void
+
+unreached:                                        ; preds = %unreached
+  %obj = phi i64 addrspace(1)* [ null, %unreached ]
+  call void @foo() [ "deopt"() ]
+  call void (...) @consume(i64 addrspace(1)* %obj)
+  br label %unreached
+}
+
+define void @test8() gc "statepoint-example" personality i32 ()* undef {
+; CHECK-LABEL: test8
+; CHECK-NOT: gc.statepoint
+; Bound the last check-not
+  ret void
+
+unreached:                                        ; No predecessors!
+  invoke void @foo() [ "deopt"() ]
+; CHECK-LABEL: @foo
+          to label %normal_return unwind label %exceptional_return
+
+normal_return:                                    ; preds = %unreached
+  ret void
+
+exceptional_return:                               ; preds = %unreached
+  %landing_pad4 = landingpad { i8*, i32 }
+          cleanup
+  ret void
+}
+
+declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
new file mode 100644
index 000000000000..688cf36168d4
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
@@ -0,0 +1,32 @@
+
+;; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+;; This test is to verify that RewriteStatepointsForGC correctly relocates values
+;; defined by invoke instruction results. 
+
+declare i64* addrspace(1)* @non_gc_call() "gc-leaf-function"
+
+declare void @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i64* addrspace(1)* @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+
+entry:
+  %obj = invoke i64* addrspace(1)* @non_gc_call()
+          to label %normal_dest unwind label %unwind_dest
+
+unwind_dest:                                      ; preds = %entry
+  %lpad = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+normal_dest:                                      ; preds = %entry
+; CHECK: normal_dest:
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: bitcast
+
+  call void @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret i64* addrspace(1)* %obj
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
new file mode 100644
index 000000000000..584dc32b7529
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
@@ -0,0 +1,279 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s
+
+
+declare void @foo()
+
+declare void @use(...) "gc-leaf-function"
+
+define i64 addrspace(1)* @test1(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test1
+; CHECK-DAG: %obj.relocated
+; CHECK-DAG: %obj2.relocated
+entry:
+  call void @foo() [ "deopt"() ]
+  br label %joint
+
+joint:                                            ; preds = %joint2, %entry
+; CHECK-LABEL: joint:
+; CHECK: %phi1 = phi i64 addrspace(1)* [ %obj.relocated.casted, %entry ], [ %obj3, %joint2 ]
+  %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj3, %joint2 ]
+  br i1 %condition, label %use, label %joint2
+
+use:                                              ; preds = %joint
+  br label %joint2
+
+joint2:                                           ; preds = %use, %joint
+; CHECK-LABEL: joint2:
+; CHECK: %phi2 = phi i64 addrspace(1)* [ %obj.relocated.casted, %use ], [ %obj2.relocated.casted, %joint ]
+; CHECK: %obj3 = getelementptr i64, i64 addrspace(1)* %obj2.relocated.casted, i32 1
+  %phi2 = phi i64 addrspace(1)* [ %obj, %use ], [ %obj2, %joint ]
+  %obj3 = getelementptr i64, i64 addrspace(1)* %obj2, i32 1
+  br label %joint
+}
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @consume_obj(i64 addrspace(1)*) "gc-leaf-function"
+
+declare i1 @rt() "gc-leaf-function"
+
+define void @test2() gc "statepoint-example" {
+; CHECK-LABEL: @test2
+entry:
+  %obj_init = call i64 addrspace(1)* @generate_obj()
+  %obj = getelementptr i64, i64 addrspace(1)* %obj_init, i32 42
+  br label %loop
+
+loop:                                             ; preds = %loop.backedge, %entry
+; CHECK: loop:
+; CHECK-DAG: [ %obj_init.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj_init, %entry ]
+; CHECK-DAG: [ %obj.relocated.casted, %loop.backedge ]
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-NOT: %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+  %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
+  %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+  call void @consume_obj(i64 addrspace(1)* %location)
+  %index.inc = add i32 %index, 1
+  %condition = call i1 @rt()
+  br i1 %condition, label %loop_x, label %loop_y
+
+loop_x:                                           ; preds = %loop
+  br label %loop.backedge
+
+loop.backedge:                                    ; preds = %loop_y, %loop_x
+  call void @do_safepoint() [ "deopt"() ]
+  br label %loop
+
+loop_y:                                           ; preds = %loop
+  br label %loop.backedge
+}
+
+declare void @some_call(i8 addrspace(1)*) "gc-leaf-function"
+
+define void @relocate_merge(i1 %cnd, i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @relocate_merge
+
+bci_0:
+  br i1 %cnd, label %if_branch, label %else_branch
+
+if_branch:                                        ; preds = %bci_0
+; CHECK-LABEL: if_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+  call void @foo() [ "deopt"() ]
+  br label %join
+
+else_branch:                                      ; preds = %bci_0
+; CHECK-LABEL: else_branch:
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+; We need to end up with a single relocation phi updated from both paths 
+  call void @foo() [ "deopt"() ]
+  br label %join
+
+join:                                             ; preds = %else_branch, %if_branch
+; CHECK-LABEL: join:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg.relocated, %if_branch ]
+; CHECK-DAG: [ %arg.relocated2, %else_branch ]
+; CHECK-NOT: phi
+  call void @some_call(i8 addrspace(1)* %arg)
+  ret void
+}
+
+; Make sure a use in a statepoint gets properly relocated at a previous one.  
+; This is basically just making sure that statepoints aren't accidentally 
+; treated specially.
+define void @test3(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: @test3
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.statepoint
+entry:
+  call void undef(i64 undef) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  %0 = call i32 undef(i64 addrspace(1)* %obj) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  ret void
+}
+
+; Check specifically for the case where the result of a statepoint needs to 
+; be relocated itself
+define void @test4() gc "statepoint-example" {
+; CHECK-LABEL: @test4
+; CHECK: gc.statepoint
+; CHECK: gc.result
+; CHECK: gc.statepoint
+; CHECK: [[RELOCATED:%[^ ]+]] = call {{.*}}gc.relocate
+; CHECK: @use(i8 addrspace(1)* [[RELOCATED]])
+  %1 = call i8 addrspace(1)* undef() [ "deopt"() ]
+  %2 = call i8 addrspace(1)* undef() [ "deopt"() ]
+  call void (...) @use(i8 addrspace(1)* %1)
+  unreachable
+}
+
+; Test updating a phi where not all inputs are live to begin with
+define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: test5
+entry:
+  %0 = call i8 addrspace(1)* undef() [ "deopt"() ]
+  switch i32 undef, label %kill [
+    i32 10, label %merge
+    i32 13, label %merge
+  ]
+
+kill:                                             ; preds = %entry
+  br label %merge
+
+merge:                                            ; preds = %kill, %entry, %entry
+; CHECK: merge:
+; CHECK: %test = phi i8 addrspace(1)
+; CHECK-DAG: [ null, %kill ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+; CHECK-DAG: [ %arg.relocated, %entry ]
+  %test = phi i8 addrspace(1)* [ null, %kill ], [ %arg, %entry ], [ %arg, %entry ]
+  call void (...) @use(i8 addrspace(1)* %test)
+  unreachable
+}
+
+; Check to make sure we handle values live over an entry statepoint
+define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) gc "statepoint-example" {
+; CHECK-LABEL: @test6
+entry:
+  br i1 undef, label %gc.safepoint_poll.exit2, label %do_safepoint
+
+do_safepoint:                                     ; preds = %entry
+; CHECK-LABEL: do_safepoint:
+; CHECK: gc.statepoint
+; CHECK: arg1.relocated = 
+; CHECK: arg2.relocated = 
+; CHECK: arg3.relocated = 
+  call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) ]
+  br label %gc.safepoint_poll.exit2
+
+gc.safepoint_poll.exit2:                          ; preds = %do_safepoint, %entry
+; CHECK-LABEL: gc.safepoint_poll.exit2:
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg3, %entry ]
+; CHECK-DAG: [ %arg3.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg2, %entry ]
+; CHECK-DAG: [ %arg2.relocated, %do_safepoint ]
+; CHECK: phi i8 addrspace(1)*
+; CHECK-DAG: [ %arg1, %entry ]
+; CHECK-DAG:  [ %arg1.relocated, %do_safepoint ]
+  call void (...) @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+  ret void
+}
+
+; Check relocation in a loop nest where a relocation happens in the outer
+; but not the inner loop
+define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_outer_loop
+
+bci_0:
+  br label %outer-loop
+
+outer-loop:                                       ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+  br label %inner-loop
+
+inner-loop:                                       ; preds = %inner-loop, %outer-loop
+  br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc:                                        ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+  call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+  br label %outer-loop
+}
+
+; Check that both inner and outer loops get phis when relocation is in
+;  inner loop
+define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
+; CHECK-LABEL: @test_inner_loop
+
+bci_0:
+  br label %outer-loop
+
+outer-loop:                                       ; preds = %outer-inc, %bci_0
+; CHECK-LABEL: outer-loop:
+; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
+; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
+  br label %inner-loop
+; CHECK-LABEL: inner-loop
+; CHECK: phi i8 addrspace(1)* 
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
+; CHECK: phi i8 addrspace(1)* 
+; CHECK-DAG: %outer-loop ]
+; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
+; CHECK: gc.statepoint
+; CHECK: %arg1.relocated
+; CHECK: %arg2.relocated
+
+inner-loop:                                       ; preds = %inner-loop, %outer-loop
+  call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
+  br i1 %cmp, label %inner-loop, label %outer-inc
+
+outer-inc:                                        ; preds = %inner-loop
+; CHECK-LABEL: outer-inc:
+; This test shows why updating just those uses of the original value being
+; relocated dominated by the inserted relocation is not always sufficient.
+  br label %outer-loop
+}
+
+define i64 addrspace(1)* @test7(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
+; CHECK-LABEL: @test7
+entry:
+  br i1 %condition, label %branch2, label %join
+
+branch2:                                          ; preds = %entry
+  br i1 %condition, label %callbb, label %join2
+
+callbb:                                           ; preds = %branch2
+  call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+  br label %join
+
+join:                                             ; preds = %callbb, %entry
+; CHECK-LABEL: join:
+; CHECK: phi i64 addrspace(1)* [ %obj.relocated.casted, %callbb ], [ %obj, %entry ]
+; CHECK: phi i64 addrspace(1)* 
+; CHECK-DAG: [ %obj, %entry ]
+; CHECK-DAG: [ %obj2.relocated.casted, %callbb ]
+  %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %callbb ]
+  br label %join2
+
+join2:                                            ; preds = %join, %branch2
+; CHECK-LABEL: join2:
+; CHECK: phi2 = phi i64 addrspace(1)* 
+; CHECK-DAG: %join ] 
+; CHECK-DAG:  [ %obj2, %branch2 ]
+  %phi2 = phi i64 addrspace(1)* [ %obj, %join ], [ %obj2, %branch2 ]
+  ret i64 addrspace(1)* %phi2
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
new file mode 100644
index 000000000000..0020c5116c13
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
@@ -0,0 +1,150 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+
+
+declare void @use_obj16(i16 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj64(i64 addrspace(1)*) "gc-leaf-function"
+
+declare void @do_safepoint()
+
+define void @test_gep_const(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_const
+entry:
+  %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj32(i32 addrspace(1)* %ptr)
+  ret void
+}
+
+define void @test_gep_idx(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_idx
+entry:
+  %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj32(i32 addrspace(1)* %ptr)
+  ret void
+}
+
+define void @test_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast
+entry:
+  %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj64(i64 addrspace(1)* %ptr)
+  ret void
+}
+
+define void @test_bitcast_gep(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_gep
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+  ret void
+}
+
+define void @test_intersecting_chains(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+; CHECK-LABEL: test_intersecting_chains
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+  %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+  call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+  ret void
+}
+
+define void @test_cost_threshold(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
+; CHECK-LABEL: test_cost_threshold
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1
+  %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2
+  %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3
+  %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+  ret void
+}
+
+define void @test_two_derived(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_two_derived
+entry:
+  %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %ptr)
+  call void @use_obj32(i32 addrspace(1)* %ptr2)
+  ret void
+}
+
+define void @test_gep_smallint_array([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_gep_smallint_array
+entry:
+  %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %ptr)
+  ret void
+}
+
+declare i32 @fake_personality_function()
+
+define void @test_invoke(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test_invoke
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
+  %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
+  invoke void @do_safepoint() [ "deopt"() ]
+          to label %normal unwind label %exception
+
+normal:                                           ; preds = %entry
+  call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+  call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+  ret void
+
+exception:                                        ; preds = %entry
+  %landing_pad4 = landingpad token
+          cleanup
+  call void @use_obj64(i64 addrspace(1)* %ptr.cast)
+  call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
+  ret void
+}
+
+define void @test_loop(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_loop
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  call void @use_obj32(i32 addrspace(1)* %ptr.gep)
+  call void @do_safepoint() [ "deopt"() ]
+  br label %loop
+}
+
+define void @test_too_long(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_too_long
+entry:
+  %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+  %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
+  %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
+  %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
+  %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
+  %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
+  %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
+  %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
+  %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
+  %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
+  %ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
+  %ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
+  call void @do_safepoint() [ "deopt"() ]
+  call void @use_obj32(i32 addrspace(1)* %ptr.gep11)
+  ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
new file mode 100644
index 000000000000..e1d0140c1dcd
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
@@ -0,0 +1,32 @@
+; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+
+declare i8 addrspace(1)* @gc_call()
+
+declare i32* @fake_personality_function()
+
+define i8 addrspace(1)* @test(i1 %c) gc "statepoint-example" personality i32* ()* @fake_personality_function {
+; CHECK-LABEL: @test(
+entry:
+  br i1 %c, label %gc_invoke, label %normal_dest
+
+gc_invoke:
+; CHECK: [[TOKEN:%[^ ]+]] = invoke token {{[^@]+}}@llvm.experimental.gc.statepoint{{[^@]+}}@gc_call
+  %obj = invoke i8 addrspace(1)* @gc_call() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+          to label %normal_dest unwind label %unwind_dest
+
+unwind_dest:
+; CHECK: unwind_dest:
+  %lpad = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } undef
+
+; CHECK: [[NORMAL_DEST_SPLIT:[^:]+:]]
+; CHECK-NEXT: [[RET_VAL:%[^ ]+]] = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token [[TOKEN]])
+; CHECK-NEXT: br label %normal_dest
+
+normal_dest:
+; CHECK: normal_dest:
+; CHECK-NEXT: %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj2, %normal_dest1 ]
+  %merge = phi i8 addrspace(1)* [ null, %entry ], [ %obj, %gc_invoke ]
+  ret i8 addrspace(1)* %merge
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
index 5913db21fcf3..b4954f6a9b60 100644
--- a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
@@ -5,18 +5,20 @@ declare i8 addrspace(1)* @some_function()
 declare void @some_function_consumer(i8 addrspace(1)*)
 declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref()
 ; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
+declare noalias i8 addrspace(1)* @some_function_ret_noalias()
+; CHECK: declare i8 addrspace(1)* @some_function_ret_noalias()
 
 define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
 ; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %a
 }
 
 define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
 ; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
 entry:
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %a
 }
 
@@ -25,7 +27,7 @@ define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
 entry:
   %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
 ; CHECK: %a = call i8 addrspace(1)* @some_function()
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %a
 }
 
@@ -34,7 +36,7 @@ define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
 entry:
   %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
 ; CHECK: %a = call i8 addrspace(1)* @some_function()
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %a
 }
 
@@ -43,7 +45,7 @@ define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
  entry:
 ; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
   %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 %tmp
 }
 
@@ -53,7 +55,7 @@ entry:
 ; No change here, but the prototype of some_function_ret_deref should have changed.
 ; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
   %a = call i8 addrspace(1)* @some_function_ret_deref()
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %a
 }
 
@@ -62,11 +64,46 @@ define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "
 entry:
 ; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
   call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) %ptr)
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i8 addrspace(1)* %ptr
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+entry:
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
+entry:
+  %a = call noalias i8 addrspace(1)* @some_function()
+; CHECK: %a = call i8 addrspace(1)* @some_function()
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_decl_only_noalias(
+entry:
+; No change here, but the prototype of some_function_ret_noalias should have changed.
+; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+  %a = call i8 addrspace(1)* @some_function_ret_noalias()
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_callsite_arg_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_noalias(
+entry:
+; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
+  call void @some_function_consumer(i8 addrspace(1)* noalias %ptr)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  ret i8 addrspace(1)* %ptr
+}
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 !0 = !{!1, !1, i64 0, i64 1}
 !1 = !{!"red", !2}
diff --git a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
index 355ffa634f3c..3cd4bc65d1a5 100644
--- a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
@@ -8,12 +8,12 @@
 
 declare void @foo()
 declare void @use(...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
-; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 7, i32 7)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7)
 ; CHECK-NEXT:  %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
   call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
   ret void
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
index 26ad73737adc..584fd7add1b6 100644
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -10,7 +10,7 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %obj
 }
 
@@ -28,7 +28,7 @@ define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoi
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ret <2 x i64 addrspace(1)*> %obj
 }
 
@@ -48,7 +48,7 @@ define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
 entry:
   %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ret <2 x i64 addrspace(1)*> %obj
 }
 
@@ -63,7 +63,7 @@ define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepo
 ; CHECK-NEXT: gc.statepoint
 entry:
   %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
-  invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
           to label %normal_return unwind label %exceptional_return
 
 ; CHECK-LABEL: normal_return:
@@ -86,7 +86,7 @@ normal_return:                                    ; preds = %entry
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %14
 exceptional_return:                               ; preds = %entry
-  %landing_pad4 = landingpad { i8*, i32 }
+  %landing_pad4 = landingpad token
           cleanup
   ret <2 x i64 addrspace(1)*> %obj
 }
@@ -110,7 +110,7 @@ define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p)
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
 entry:
   %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ret <2 x i64 addrspace(1)*> %vec
 }
 
@@ -121,9 +121,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
 ; CHECK-LABEL: test6
 ; CHECK-LABEL: merge:
 ; CHECK-NEXT: = phi
-; CHECK-NEXT: = phi
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
 ; CHECK-NEXT: extractelement
 ; CHECK-NEXT: extractelement
 ; CHECK-NEXT: gc.statepoint
@@ -131,12 +128,6 @@ define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: gc.relocate
 ; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: insertelement
 ; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
@@ -151,11 +142,11 @@ untaken:
 
 merge:
   %obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ret <2 x i64 addrspace(1)*> %obj
 }
 
 
 declare void @do_safepoint()
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
index 6bc4d5324494..207003c17b5f 100644
--- a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
@@ -16,7 +16,7 @@ taken:
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: br label %merge
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %merge
 
 untaken:
@@ -25,7 +25,7 @@ untaken:
 ; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: br label %merge
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %merge
 
 merge:
@@ -42,7 +42,7 @@ entry:
 ; CHECK-LABEL: entry:
 ; CHECK-NEXT:  gc.statepoint
 ; CHECK-NEXT:  br
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br i1 %cmp, label %taken, label %untaken
 
 taken:
@@ -54,7 +54,7 @@ taken:
 ; CHECK-NEXT:  ret i64 addrspace(1)* %obj.relocated.casted
 
   %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %obj
 
 untaken:
@@ -76,16 +76,16 @@ taken:
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: br label %merge
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %merge
 
 untaken:
 ; CHECK-LABEL: taken:
 ; CHECK-NEXT: gc.statepoint
 ; CHECK-NEXT: br label %merge
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %merge
 
 merge:
@@ -114,9 +114,9 @@ entry:
 ; CHECK-NEXT:  ret i64 addrspace(1)* %derived.relocated1.casted
 ; 
   %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
 
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   ret i64 addrspace(1)* %derived
 }
 
@@ -136,7 +136,7 @@ taken:
 ; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: br label %merge
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %merge
 
 untaken:
@@ -163,4 +163,4 @@ final:
 
 declare void @foo()
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
index 012fff5c9e19..e1657497485b 100644
--- a/test/Transforms/RewriteStatepointsForGC/preprocess.ll
+++ b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
@@ -17,7 +17,7 @@ next:
 ; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
 ; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
   %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   call void (...) @consume(i64 addrspace(1)* %obj2)
   call void (...) @consume(i64 addrspace(1)* %obj)
   ret void
@@ -33,7 +33,7 @@ define void @test7() gc "statepoint-example" {
 
 unreached:
   %obj = phi i64 addrspace(1)* [null, %unreached]
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   call void (...) @consume(i64 addrspace(1)* %obj)
   br label %unreached
 }
@@ -46,7 +46,7 @@ define void @test8() gc "statepoint-example" personality i32 ()* undef {
   ret void
 
 unreached:
-  invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
           to label %normal_return unwind label %exceptional_return
 
 normal_return:                                    ; preds = %entry
@@ -62,4 +62,4 @@ declare void @foo()
 ; Bound the last check-not
 ; CHECK-LABEL: @foo
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
index 1a5289b26656..d11441e9346f 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
@@ -25,9 +25,9 @@ normal_dest:
 ;; CHECK-NEXT: gc.statepoint
 ;; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
 ;; CHECK-NEXT: bitcast
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret i64* addrspace(1)* %obj
 }
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
diff --git a/test/Transforms/RewriteStatepointsForGC/relocation.ll b/test/Transforms/RewriteStatepointsForGC/relocation.ll
index d7a84e5820c8..deea377c5a28 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocation.ll
@@ -9,7 +9,7 @@ entry:
 ; CHECK-LABEL: @test1
 ; CHECK-DAG: %obj.relocated
 ; CHECK-DAG: %obj2.relocated
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %joint
 
 joint:
@@ -61,7 +61,7 @@ loop_x:
   br label %loop.backedge
 
 loop.backedge:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   br label %loop
 
 loop_y:
@@ -79,14 +79,14 @@ if_branch:
 ; CHECK-LABEL: if_branch:
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %join
 
 else_branch:
 ; CHECK-LABEL: else_branch:
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
-  %safepoint_token1 = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
   br label %join
 
 join:
@@ -110,8 +110,8 @@ entry:
 ; CHECK-NEXT: gc.relocate
 ; CHECK-NEXT: bitcast
 ; CHECK-NEXT: gc.statepoint
-  %safepoint_token = call i32 (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-  %safepoint_token1 = call i32 (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token1 = call token (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   ret void
 }
 
@@ -124,9 +124,9 @@ define void @test4() gc "statepoint-example" {
 ; CHECK: gc.statepoint
 ; CHECK: gc.relocate
 ; CHECK: @use(i8 addrspace(1)* %res.relocated)
-  %safepoint_token2 = tail call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
-  %res = call i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32 %safepoint_token2)
-  call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+  %safepoint_token2 = tail call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+  %res = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token %safepoint_token2)
+  call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
   call void (...) @use(i8 addrspace(1)* %res)
   unreachable
 }
@@ -136,7 +136,7 @@ define void @test4() gc "statepoint-example" {
 define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
 ; CHECK-LABEL: test5
 entry:
-  call i32 (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+  call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
   switch i32 undef, label %kill [
     i32 10, label %merge
     i32 13, label %merge
@@ -170,7 +170,7 @@ do_safepoint:
 ; CHECK: arg1.relocated = 
 ; CHECK: arg2.relocated = 
 ; CHECK: arg3.relocated = 
-  call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+  call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
   br label %gc.safepoint_poll.exit2
 
 gc.safepoint_poll.exit2:
@@ -209,7 +209,7 @@ outer-inc:
 ; CHECK-LABEL: outer-inc:
 ; CHECK: %arg1.relocated
 ; CHECK: %arg2.relocated
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
   br label %outer-loop
 }
 
@@ -232,13 +232,13 @@ inner-loop:
 ; CHECK: phi i8 addrspace(1)* 
 ; CHECK-DAG: %outer-loop ]
 ; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
-; CHECKL phi i8 addrspace(1)* 
+; CHECK: phi i8 addrspace(1)* 
 ; CHECK-DAG: %outer-loop ]
 ; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
 ; CHECK: gc.statepoint
 ; CHECK: %arg1.relocated
 ; CHECK: %arg2.relocated
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
   br i1 %cmp, label %inner-loop, label %outer-inc
 
 outer-inc:
@@ -258,7 +258,7 @@ branch2:
   br i1 %condition, label %callbb, label %join2
 
 callbb:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
   br label %join
 
 join:
@@ -285,11 +285,11 @@ join2:
 
 declare void @do_safepoint()
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
-declare i32 @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
-declare i8 addrspace(1)* @llvm.experimental.gc.result.ptr.p1i8(i32) #3
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token) #3
 
 
 
diff --git a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
index f04e7c797cad..445ab7bd768d 100644
--- a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
@@ -10,8 +10,8 @@ define void @"test_gep_const"(i32 addrspace(1)* %base) gc "statepoint-example" {
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
   ; CHECK: getelementptr i32, i32 addrspace(1)* %base, i32 15
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
   ; CHECK: bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
   ; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 15
   call void @use_obj32(i32 addrspace(1)* %base)
@@ -24,8 +24,8 @@ define void @"test_gep_idx"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-ex
 entry:
   %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
   ; CHECK: getelementptr
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
   ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
   ; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 %idx
   call void @use_obj32(i32 addrspace(1)* %base)
@@ -38,8 +38,8 @@ define void @"test_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
 entry:
   %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
   ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %sp, i32 7, i32 7)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
   ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
   ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
   call void @use_obj32(i32 addrspace(1)* %base)
@@ -47,6 +47,40 @@ entry:
   ret void
 }
 
+define void @"test_bitcast_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_bitcast_bitcast
+entry:
+  %ptr1 = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+  %ptr2 = bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+  ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+  ; CHECK: bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+  ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+  ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
+  ; CHECK: bitcast i64 addrspace(1)* %ptr1.remat to i16 addrspace(1)*
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj16(i16 addrspace(1)* %ptr2)
+  ret void
+}
+
+define void @"test_addrspacecast_addrspacecast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+; CHECK-LABEL: test_addrspacecast_addrspacecast
+entry:
+  %ptr1 = addrspacecast i32 addrspace(1)* %base to i32*
+  %ptr2 = addrspacecast i32* %ptr1 to i32 addrspace(1)*
+  ; CHECK: addrspacecast i32 addrspace(1)* %base to i32*
+  ; CHECK: addrspacecast i32* %ptr1 to i32 addrspace(1)*
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
+  ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+  ; CHECK: %ptr2.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 8)
+  ; CHECK: %ptr2.relocated.casted = bitcast i8 addrspace(1)* %ptr2.relocated to i32 addrspace(1)*
+  call void @use_obj32(i32 addrspace(1)* %base)
+  call void @use_obj32(i32 addrspace(1)* %ptr2)
+  ret void
+}
+
 define void @"test_bitcast_gep"(i32 addrspace(1)* %base) gc "statepoint-example" {
 ; CHECK-LABEL: test_bitcast_gep
 entry:
@@ -54,7 +88,7 @@ entry:
   ; CHECK: getelementptr
   %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
   ; CHECK: bitcast
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: getelementptr
@@ -73,7 +107,7 @@ entry:
   ; CHECK: bitcast
   %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
   ; CHECK: bitcast
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: getelementptr
   ; CHECK: bitcast
   ; CHECK: getelementptr
@@ -96,7 +130,7 @@ entry:
   ; CHECK: getelementptr
   %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
   ; CHECK: bitcast
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: gc.relocate
@@ -112,7 +146,7 @@ entry:
   %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
   ; CHECK: getelementptr
   ; CHECK: getelementptr
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: getelementptr
@@ -127,7 +161,7 @@ define void @"test_gep_smallint_array"([3 x i32] addrspace(1)* %base) gc "statep
 entry:
   %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
   ; CHECK: getelementptr
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: getelementptr
@@ -146,7 +180,7 @@ entry:
   ; CHECK: bitcast
   %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
   ; CHECK: bitcast
-  %sp = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
                 to label %normal unwind label %exception
 
 normal:
@@ -163,7 +197,7 @@ normal:
 
 exception:
   ; CHECK-LABEL: exception:
-  %landing_pad4 = landingpad { i8*, i32 }
+  %landing_pad4 = landingpad token
           cleanup
   ; CHECK: gc.relocate
   ; CHECK: bitcast
@@ -187,7 +221,7 @@ loop:
   ; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ]
   ; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ]
   call void @use_obj32(i32 addrspace(1)* %ptr.gep)
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: getelementptr
@@ -209,7 +243,7 @@ entry:
   %ptr.gep9  = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
   %ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
   %ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
-  %sp = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+  %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
   ; CHECK: gc.relocate
   ; CHECK: bitcast
   ; CHECK: gc.relocate
@@ -219,4 +253,4 @@ entry:
 }
 
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/SCCP/global-alias-constprop.ll b/test/Transforms/SCCP/global-alias-constprop.ll
new file mode 100644
index 000000000000..be7e083e6a67
--- /dev/null
+++ b/test/Transforms/SCCP/global-alias-constprop.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+@0 = private unnamed_addr constant [2 x i32] [i32 -1, i32 1]
+@"\01??_7A@@6B@" = unnamed_addr alias i32, getelementptr inbounds ([2 x i32], [2 x i32]* @0, i32 0, i32 1)
+
+; CHECK: ret i32 1
+
+define i32 @main() {
+  %a = load i32, i32* @"\01??_7A@@6B@"
+  ret i32 %a
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/commute.ll b/test/Transforms/SLPVectorizer/AArch64/commute.ll
index 1cff73d9f695..2bce59c62000 100644
--- a/test/Transforms/SLPVectorizer/AArch64/commute.ll
+++ b/test/Transforms/SLPVectorizer/AArch64/commute.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -slp-vectorizer %s | FileCheck %s
+; RUN: opt -S -slp-vectorizer %s -slp-threshold=-10 | FileCheck %s
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnu"
 
diff --git a/test/Transforms/SLPVectorizer/AArch64/horizontal.ll b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
new file mode 100644
index 000000000000..8f8bf2648aa2
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/horizontal.ll
@@ -0,0 +1,270 @@
+; RUN: opt -slp-vectorizer -slp-threshold=-6 -S <  %s | FileCheck %s
+
+; FIXME: The threshold is changed to keep this test case a bit smaller.
+; The AArch64 cost model should not give such high costs to select statements.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux"
+
+; CHECK-LABEL: test_select
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: select <4 x i1>
+define i32 @test_select(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h) {
+entry:
+  %cmp.22 = icmp sgt i32 %h, 0
+  br i1 %cmp.22, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %idx.ext = sext i32 %lx to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %s.026 = phi i32 [ 0, %for.body.lr.ph ], [ %add27, %for.body ]
+  %j.025 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+  %p2.024 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr29, %for.body ]
+  %p1.023 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+  %0 = load i32, i32* %p1.023, align 4
+  %1 = load i32, i32* %p2.024, align 4
+  %sub = sub nsw i32 %0, %1
+  %cmp2 = icmp slt i32 %sub, 0
+  %sub3 = sub nsw i32 0, %sub
+  %sub3.sub = select i1 %cmp2, i32 %sub3, i32 %sub
+  %add = add nsw i32 %sub3.sub, %s.026
+  %arrayidx4 = getelementptr inbounds i32, i32* %p1.023, i64 1
+  %2 = load i32, i32* %arrayidx4, align 4
+  %arrayidx5 = getelementptr inbounds i32, i32* %p2.024, i64 1
+  %3 = load i32, i32* %arrayidx5, align 4
+  %sub6 = sub nsw i32 %2, %3
+  %cmp7 = icmp slt i32 %sub6, 0
+  %sub9 = sub nsw i32 0, %sub6
+  %v.1 = select i1 %cmp7, i32 %sub9, i32 %sub6
+  %add11 = add nsw i32 %add, %v.1
+  %arrayidx12 = getelementptr inbounds i32, i32* %p1.023, i64 2
+  %4 = load i32, i32* %arrayidx12, align 4
+  %arrayidx13 = getelementptr inbounds i32, i32* %p2.024, i64 2
+  %5 = load i32, i32* %arrayidx13, align 4
+  %sub14 = sub nsw i32 %4, %5
+  %cmp15 = icmp slt i32 %sub14, 0
+  %sub17 = sub nsw i32 0, %sub14
+  %sub17.sub14 = select i1 %cmp15, i32 %sub17, i32 %sub14
+  %add19 = add nsw i32 %add11, %sub17.sub14
+  %arrayidx20 = getelementptr inbounds i32, i32* %p1.023, i64 3
+  %6 = load i32, i32* %arrayidx20, align 4
+  %arrayidx21 = getelementptr inbounds i32, i32* %p2.024, i64 3
+  %7 = load i32, i32* %arrayidx21, align 4
+  %sub22 = sub nsw i32 %6, %7
+  %cmp23 = icmp slt i32 %sub22, 0
+  %sub25 = sub nsw i32 0, %sub22
+  %v.3 = select i1 %cmp23, i32 %sub25, i32 %sub22
+  %add27 = add nsw i32 %add19, %v.3
+  %add.ptr = getelementptr inbounds i32, i32* %p1.023, i64 %idx.ext
+  %add.ptr29 = getelementptr inbounds i32, i32* %p2.024, i64 %idx.ext
+  %inc = add nuw nsw i32 %j.025, 1
+  %exitcond = icmp eq i32 %inc, %h
+  br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %s.0.lcssa = phi i32 [ 0, %entry ], [ %add27, %for.end.loopexit ]
+  ret i32 %s.0.lcssa
+}
+
+;; Check whether SLP can find a reduction phi whose incoming blocks are not
+;; the same as the block containing the phi.
+;;
+;; Came from code like,
+;;
+;; int s = 0;
+;; for (int j = 0; j < h; j++) {
+;;   s += p1[0] * p2[0]
+;;   s += p1[1] * p2[1];
+;;   s += p1[2] * p2[2];
+;;   s += p1[3] * p2[3];
+;;   if (s >= lim)
+;;      break;
+;;   p1 += lx;
+;;   p2 += lx;
+;; }
+define i32 @reduction_with_br(i32* noalias nocapture readonly %blk1, i32* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) {
+; CHECK-LABEL: reduction_with_br
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: mul nsw <4 x i32>
+entry:
+  %cmp.16 = icmp sgt i32 %h, 0
+  br i1 %cmp.16, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %idx.ext = sext i32 %lx to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %if.end
+  %s.020 = phi i32 [ 0, %for.body.lr.ph ], [ %add13, %if.end ]
+  %j.019 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ]
+  %p2.018 = phi i32* [ %blk2, %for.body.lr.ph ], [ %add.ptr16, %if.end ]
+  %p1.017 = phi i32* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end ]
+  %0 = load i32, i32* %p1.017, align 4
+  %1 = load i32, i32* %p2.018, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %s.020
+  %arrayidx2 = getelementptr inbounds i32, i32* %p1.017, i64 1
+  %2 = load i32, i32* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %p2.018, i64 1
+  %3 = load i32, i32* %arrayidx3, align 4
+  %mul4 = mul nsw i32 %3, %2
+  %add5 = add nsw i32 %add, %mul4
+  %arrayidx6 = getelementptr inbounds i32, i32* %p1.017, i64 2
+  %4 = load i32, i32* %arrayidx6, align 4
+  %arrayidx7 = getelementptr inbounds i32, i32* %p2.018, i64 2
+  %5 = load i32, i32* %arrayidx7, align 4
+  %mul8 = mul nsw i32 %5, %4
+  %add9 = add nsw i32 %add5, %mul8
+  %arrayidx10 = getelementptr inbounds i32, i32* %p1.017, i64 3
+  %6 = load i32, i32* %arrayidx10, align 4
+  %arrayidx11 = getelementptr inbounds i32, i32* %p2.018, i64 3
+  %7 = load i32, i32* %arrayidx11, align 4
+  %mul12 = mul nsw i32 %7, %6
+  %add13 = add nsw i32 %add9, %mul12
+  %cmp14 = icmp slt i32 %add13, %lim
+  br i1 %cmp14, label %if.end, label %for.end.loopexit
+
+if.end:                                           ; preds = %for.body
+  %add.ptr = getelementptr inbounds i32, i32* %p1.017, i64 %idx.ext
+  %add.ptr16 = getelementptr inbounds i32, i32* %p2.018, i64 %idx.ext
+  %inc = add nuw nsw i32 %j.019, 1
+  %cmp = icmp slt i32 %inc, %h
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body, %if.end
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %s.1 = phi i32 [ 0, %entry ], [ %add13, %for.end.loopexit ]
+  ret i32 %s.1
+}
+
+; CHECK: test_unrolled_select
+; CHECK: load <8 x i8>
+; CHECK: load <8 x i8>
+; CHECK: select <8 x i1>
+define i32 @test_unrolled_select(i8* noalias nocapture readonly %blk1, i8* noalias nocapture readonly %blk2, i32 %lx, i32 %h, i32 %lim) #0 {
+entry:
+  %cmp.43 = icmp sgt i32 %h, 0
+  br i1 %cmp.43, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %idx.ext = sext i32 %lx to i64
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %if.end.86
+  %s.047 = phi i32 [ 0, %for.body.lr.ph ], [ %add82, %if.end.86 ]
+  %j.046 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end.86 ]
+  %p2.045 = phi i8* [ %blk2, %for.body.lr.ph ], [ %add.ptr88, %if.end.86 ]
+  %p1.044 = phi i8* [ %blk1, %for.body.lr.ph ], [ %add.ptr, %if.end.86 ]
+  %0 = load i8, i8* %p1.044, align 1
+  %conv = zext i8 %0 to i32
+  %1 = load i8, i8* %p2.045, align 1
+  %conv2 = zext i8 %1 to i32
+  %sub = sub nsw i32 %conv, %conv2
+  %cmp3 = icmp slt i32 %sub, 0
+  %sub5 = sub nsw i32 0, %sub
+  %sub5.sub = select i1 %cmp3, i32 %sub5, i32 %sub
+  %add = add nsw i32 %sub5.sub, %s.047
+  %arrayidx6 = getelementptr inbounds i8, i8* %p1.044, i64 1
+  %2 = load i8, i8* %arrayidx6, align 1
+  %conv7 = zext i8 %2 to i32
+  %arrayidx8 = getelementptr inbounds i8, i8* %p2.045, i64 1
+  %3 = load i8, i8* %arrayidx8, align 1
+  %conv9 = zext i8 %3 to i32
+  %sub10 = sub nsw i32 %conv7, %conv9
+  %cmp11 = icmp slt i32 %sub10, 0
+  %sub14 = sub nsw i32 0, %sub10
+  %v.1 = select i1 %cmp11, i32 %sub14, i32 %sub10
+  %add16 = add nsw i32 %add, %v.1
+  %arrayidx17 = getelementptr inbounds i8, i8* %p1.044, i64 2
+  %4 = load i8, i8* %arrayidx17, align 1
+  %conv18 = zext i8 %4 to i32
+  %arrayidx19 = getelementptr inbounds i8, i8* %p2.045, i64 2
+  %5 = load i8, i8* %arrayidx19, align 1
+  %conv20 = zext i8 %5 to i32
+  %sub21 = sub nsw i32 %conv18, %conv20
+  %cmp22 = icmp slt i32 %sub21, 0
+  %sub25 = sub nsw i32 0, %sub21
+  %sub25.sub21 = select i1 %cmp22, i32 %sub25, i32 %sub21
+  %add27 = add nsw i32 %add16, %sub25.sub21
+  %arrayidx28 = getelementptr inbounds i8, i8* %p1.044, i64 3
+  %6 = load i8, i8* %arrayidx28, align 1
+  %conv29 = zext i8 %6 to i32
+  %arrayidx30 = getelementptr inbounds i8, i8* %p2.045, i64 3
+  %7 = load i8, i8* %arrayidx30, align 1
+  %conv31 = zext i8 %7 to i32
+  %sub32 = sub nsw i32 %conv29, %conv31
+  %cmp33 = icmp slt i32 %sub32, 0
+  %sub36 = sub nsw i32 0, %sub32
+  %v.3 = select i1 %cmp33, i32 %sub36, i32 %sub32
+  %add38 = add nsw i32 %add27, %v.3
+  %arrayidx39 = getelementptr inbounds i8, i8* %p1.044, i64 4
+  %8 = load i8, i8* %arrayidx39, align 1
+  %conv40 = zext i8 %8 to i32
+  %arrayidx41 = getelementptr inbounds i8, i8* %p2.045, i64 4
+  %9 = load i8, i8* %arrayidx41, align 1
+  %conv42 = zext i8 %9 to i32
+  %sub43 = sub nsw i32 %conv40, %conv42
+  %cmp44 = icmp slt i32 %sub43, 0
+  %sub47 = sub nsw i32 0, %sub43
+  %sub47.sub43 = select i1 %cmp44, i32 %sub47, i32 %sub43
+  %add49 = add nsw i32 %add38, %sub47.sub43
+  %arrayidx50 = getelementptr inbounds i8, i8* %p1.044, i64 5
+  %10 = load i8, i8* %arrayidx50, align 1
+  %conv51 = zext i8 %10 to i32
+  %arrayidx52 = getelementptr inbounds i8, i8* %p2.045, i64 5
+  %11 = load i8, i8* %arrayidx52, align 1
+  %conv53 = zext i8 %11 to i32
+  %sub54 = sub nsw i32 %conv51, %conv53
+  %cmp55 = icmp slt i32 %sub54, 0
+  %sub58 = sub nsw i32 0, %sub54
+  %v.5 = select i1 %cmp55, i32 %sub58, i32 %sub54
+  %add60 = add nsw i32 %add49, %v.5
+  %arrayidx61 = getelementptr inbounds i8, i8* %p1.044, i64 6
+  %12 = load i8, i8* %arrayidx61, align 1
+  %conv62 = zext i8 %12 to i32
+  %arrayidx63 = getelementptr inbounds i8, i8* %p2.045, i64 6
+  %13 = load i8, i8* %arrayidx63, align 1
+  %conv64 = zext i8 %13 to i32
+  %sub65 = sub nsw i32 %conv62, %conv64
+  %cmp66 = icmp slt i32 %sub65, 0
+  %sub69 = sub nsw i32 0, %sub65
+  %sub69.sub65 = select i1 %cmp66, i32 %sub69, i32 %sub65
+  %add71 = add nsw i32 %add60, %sub69.sub65
+  %arrayidx72 = getelementptr inbounds i8, i8* %p1.044, i64 7
+  %14 = load i8, i8* %arrayidx72, align 1
+  %conv73 = zext i8 %14 to i32
+  %arrayidx74 = getelementptr inbounds i8, i8* %p2.045, i64 7
+  %15 = load i8, i8* %arrayidx74, align 1
+  %conv75 = zext i8 %15 to i32
+  %sub76 = sub nsw i32 %conv73, %conv75
+  %cmp77 = icmp slt i32 %sub76, 0
+  %sub80 = sub nsw i32 0, %sub76
+  %v.7 = select i1 %cmp77, i32 %sub80, i32 %sub76
+  %add82 = add nsw i32 %add71, %v.7
+  %cmp83 = icmp slt i32 %add82, %lim
+  br i1 %cmp83, label %if.end.86, label %for.end.loopexit
+
+if.end.86:                                        ; preds = %for.body
+  %add.ptr = getelementptr inbounds i8, i8* %p1.044, i64 %idx.ext
+  %add.ptr88 = getelementptr inbounds i8, i8* %p2.045, i64 %idx.ext
+  %inc = add nuw nsw i32 %j.046, 1
+  %cmp = icmp slt i32 %inc, %h
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body, %if.end.86
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %s.1 = phi i32 [ 0, %entry ], [ %add82, %for.end.loopexit ]
+  ret i32 %s.1
+}
+
diff --git a/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
new file mode 100644
index 000000000000..87d021d534cf
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/nontemporal.ll
@@ -0,0 +1,76 @@
+; RUN: opt -S -basicaa -slp-vectorizer -dce < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; CHECK-LABEL: @foo
+define void @foo(float* noalias %a, float* noalias %b, float* noalias %c) {
+entry:
+; Check that we don't lose !nontemporal hint when vectorizing loads.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+  %b1 = load float, float* %b, align 4, !nontemporal !0
+  %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+  %b2 = load float, float* %arrayidx.1, align 4, !nontemporal !0
+  %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+  %b3 = load float, float* %arrayidx.2, align 4, !nontemporal !0
+  %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+  %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't introduce !nontemporal hint when the original scalar loads didn't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+  %c1 = load float, float* %c, align 4
+  %arrayidx2.1 = getelementptr inbounds float, float* %c, i64 1
+  %c2 = load float, float* %arrayidx2.1, align 4
+  %arrayidx2.2 = getelementptr inbounds float, float* %c, i64 2
+  %c3 = load float, float* %arrayidx2.2, align 4
+  %arrayidx2.3 = getelementptr inbounds float, float* %c, i64 3
+  %c4 = load float, float* %arrayidx2.3, align 4
+
+  %a1 = fadd float %b1, %c1
+  %a2 = fadd float %b2, %c2
+  %a3 = fadd float %b3, %c3
+  %a4 = fadd float %b4, %c4
+
+; Check that we don't lose !nontemporal hint when vectorizing stores.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4, !nontemporal !0
+  store float %a1, float* %a, align 4, !nontemporal !0
+  %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+  store float %a2, float* %arrayidx3.1, align 4, !nontemporal !0
+  %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+  store float %a3, float* %arrayidx3.2, align 4, !nontemporal !0
+  %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+  store float %a4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+  ret void
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float* noalias %a, float* noalias %b) {
+entry:
+; Check that we don't mark vector load with !nontemporal attribute if some of
+; the original scalar loads don't have it.
+; CHECK: %{{[0-9]*}} = load <4 x float>, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+  %b1 = load float, float* %b, align 4, !nontemporal !0
+  %arrayidx.1 = getelementptr inbounds float, float* %b, i64 1
+  %b2 = load float, float* %arrayidx.1, align 4
+  %arrayidx.2 = getelementptr inbounds float, float* %b, i64 2
+  %b3 = load float, float* %arrayidx.2, align 4
+  %arrayidx.3 = getelementptr inbounds float, float* %b, i64 3
+  %b4 = load float, float* %arrayidx.3, align 4, !nontemporal !0
+
+; Check that we don't mark vector store with !nontemporal attribute if some of
+; the original scalar stores don't have it.
+; CHECK: store <4 x float> %{{[0-9]+}}, <4 x float>* %{{[0-9]+}}, align 4{{$}}
+  store float %b1, float* %a, align 4, !nontemporal !0
+  %arrayidx3.1 = getelementptr inbounds float, float* %a, i64 1
+  store float %b2, float* %arrayidx3.1, align 4
+  %arrayidx3.2 = getelementptr inbounds float, float* %a, i64 2
+  store float %b3, float* %arrayidx3.2, align 4
+  %arrayidx3.3 = getelementptr inbounds float, float* %a, i64 3
+  store float %b4, float* %arrayidx3.3, align 4, !nontemporal !0
+
+; CHECK: ret void
+  ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/SLPVectorizer/X86/bad_types.ll b/test/Transforms/SLPVectorizer/X86/bad_types.ll
index 2d8f3832ee29..98c29068bb96 100644
--- a/test/Transforms/SLPVectorizer/X86/bad_types.ll
+++ b/test/Transforms/SLPVectorizer/X86/bad_types.ll
@@ -47,4 +47,30 @@ exit:
   ret void
 }
 
+define i8 @test3(i8 *%addr) {
+; Check that we do not vectorize types that are padded to a bigger ones.
+;
+; CHECK-LABEL: @test3
+; CHECK-NOT:   <4 x i2>
+; CHECK:       ret i8
+entry:
+  %a = bitcast i8* %addr to i2*
+  %a0 = getelementptr inbounds i2, i2* %a, i64 0
+  %a1 = getelementptr inbounds i2, i2* %a, i64 1
+  %a2 = getelementptr inbounds i2, i2* %a, i64 2
+  %a3 = getelementptr inbounds i2, i2* %a, i64 3
+  %l0 = load i2, i2* %a0, align 1
+  %l1 = load i2, i2* %a1, align 1
+  %l2 = load i2, i2* %a2, align 1
+  %l3 = load i2, i2* %a3, align 1
+  br label %bb1
+bb1:                                              ; preds = %entry
+  %p0 = phi i2 [ %l0, %entry ]
+  %p1 = phi i2 [ %l1, %entry ]
+  %p2 = phi i2 [ %l2, %entry ]
+  %p3 = phi i2 [ %l3, %entry ]
+  %r  = zext i2 %p2 to i8
+  ret i8 %r
+}
+
 declare void @f(i64, i64)
diff --git a/test/Transforms/SLPVectorizer/X86/commutativity.ll b/test/Transforms/SLPVectorizer/X86/commutativity.ll
new file mode 100644
index 000000000000..2798ccb15e48
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -0,0 +1,78 @@
+; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
+
+; Verify that the SLP vectorizer is able to figure out that commutativity
+; offers the possibility to splat/broadcast %c and thus make it profitable
+; to vectorize this case
+
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@cle = external unnamed_addr global [32 x i8], align 16
+@cle32 = external unnamed_addr global [32 x i32], align 16
+
+
+; Check that we correctly detect a splat/broadcast by leveraging the
+; commutativity property of `xor`.
+
+; CHECK-LABEL:  @splat
+; CHECK:  store <16 x i8>
+define void @splat(i8 %a, i8 %b, i8 %c) {
+  %1 = xor i8 %c, %a
+  store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
+  %2 = xor i8 %a, %c
+  store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
+  %3 = xor i8 %a, %c
+  store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
+  %4 = xor i8 %a, %c
+  store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
+  %5 = xor i8 %c, %a
+  store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
+  %6 = xor i8 %c, %b
+  store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
+  %7 = xor i8 %c, %a
+  store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
+  %8 = xor i8 %c, %b
+  store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
+  %9 = xor i8 %a, %c
+  store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
+  %10 = xor i8 %a, %c
+  store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
+  %11 = xor i8 %a, %c
+  store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
+  %12 = xor i8 %a, %c
+  store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
+  %13 = xor i8 %a, %c
+  store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
+  %14 = xor i8 %a, %c
+  store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
+  %15 = xor i8 %a, %c
+  store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
+  %16 = xor i8 %a, %c
+  store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
+  ret void
+}
+
+
+
+; Check that we correctly detect that we can have the same opcode on one side by
+; leveraging the commutativity property of `xor`.
+
+; CHECK-LABEL:  @same_opcode_on_one_side
+; CHECK:  store <4 x i32>
+define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
+  %add1 = add i32 %c, %a
+  %add2 = add i32 %c, %a
+  %add3 = add i32 %a, %c
+  %add4 = add i32 %c, %a
+  %1 = xor i32 %add1, %a
+  store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
+  %2 = xor i32 %b, %add2
+  store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
+  %3 = xor i32 %c, %add3
+  store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
+  %4 = xor i32 %a, %add4
+  store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 1d349fbc98b5..4472225811b1 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -21,7 +21,7 @@ target triple = "x86_64-apple-macosx10.7.0"
 ;CHECK: ![[LOC]] = !DILocation(line: 4, scope:
 ;CHECK: ![[LOC2]] = !DILocation(line: 7, scope:
 
-define i32 @depth(double* nocapture %A, i32 %m) #0 {
+define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
 entry:
   tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
   tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!18, !32}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "file.c", directory: "/Users/nadav")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (double*, i32)* @depth, variables: !11)
+!4 = distinct !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !11)
 !5 = !DIFile(filename: "file.c", directory: "/Users/nadav")
 !6 = !DISubroutineType(types: !7)
 !7 = !{!8, !9, !8}
@@ -69,11 +69,11 @@ attributes #1 = { nounwind readnone }
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
 !10 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
 !11 = !{!12, !13, !14, !15, !16}
-!12 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y0", line: 2, scope: !4, file: !5, type: !10)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y1", line: 2, scope: !4, file: !5, type: !10)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", line: 3, scope: !17, file: !5, type: !8)
+!12 = !DILocalVariable(name: "A", line: 1, arg: 1, scope: !4, file: !5, type: !9)
+!13 = !DILocalVariable(name: "m", line: 1, arg: 2, scope: !4, file: !5, type: !8)
+!14 = !DILocalVariable(name: "y0", line: 2, scope: !4, file: !5, type: !10)
+!15 = !DILocalVariable(name: "y1", line: 2, scope: !4, file: !5, type: !10)
+!16 = !DILocalVariable(name: "i", line: 3, scope: !17, file: !5, type: !8)
 !17 = distinct !DILexicalBlock(line: 3, column: 0, file: !1, scope: !4)
 !18 = !{i32 2, !"Dwarf Version", i32 2}
 !19 = !DILocation(line: 1, scope: !4)
diff --git a/test/Transforms/SLPVectorizer/X86/horizontal.ll b/test/Transforms/SLPVectorizer/X86/horizontal.ll
index 83b2e01f04e0..62cf4c1fcfb3 100644
--- a/test/Transforms/SLPVectorizer/X86/horizontal.ll
+++ b/test/Transforms/SLPVectorizer/X86/horizontal.ll
@@ -1,4 +1,4 @@
-; RUN: opt -slp-vectorizer -slp-vectorize-hor -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
+; RUN: opt -slp-vectorizer -S <  %s -mtriple=x86_64-apple-macosx -mcpu=corei7-avx | FileCheck %s --check-prefix=NOSTORE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/test/Transforms/SLPVectorizer/X86/pr23510.ll b/test/Transforms/SLPVectorizer/X86/pr23510.ll
new file mode 100644
index 000000000000..efdb0ecd9996
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr23510.ll
@@ -0,0 +1,38 @@
+; PR23510
+; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @_Z3fooPml(
+; CHECK: lshr <2 x i64>
+; CHECK: lshr <2 x i64>
+
+@total = global i64 0, align 8
+
+define void @_Z3fooPml(i64* nocapture %a, i64 %i) {
+entry:
+  %tmp = load i64, i64* %a, align 8
+  %shr = lshr i64 %tmp, 4
+  store i64 %shr, i64* %a, align 8
+  %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 1
+  %tmp1 = load i64, i64* %arrayidx1, align 8
+  %shr2 = lshr i64 %tmp1, 4
+  store i64 %shr2, i64* %arrayidx1, align 8
+  %arrayidx3 = getelementptr inbounds i64, i64* %a, i64 %i
+  %tmp2 = load i64, i64* %arrayidx3, align 8
+  %tmp3 = load i64, i64* @total, align 8
+  %add = add i64 %tmp3, %tmp2
+  store i64 %add, i64* @total, align 8
+  %tmp4 = load i64, i64* %a, align 8
+  %shr5 = lshr i64 %tmp4, 4
+  store i64 %shr5, i64* %a, align 8
+  %tmp5 = load i64, i64* %arrayidx1, align 8
+  %shr7 = lshr i64 %tmp5, 4
+  store i64 %shr7, i64* %arrayidx1, align 8
+  %tmp6 = load i64, i64* %arrayidx3, align 8
+  %tmp7 = load i64, i64* @total, align 8
+  %add9 = add i64 %tmp7, %tmp6
+  store i64 %add9, i64* @total, align 8
+  ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/schedule_budget.ll b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
new file mode 100644
index 000000000000..2cb2373381c7
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/schedule_budget.ll
@@ -0,0 +1,93 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S  -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Test if the budget for the scheduling region size works.
+; We test with a reduced budget of 16 which should prevent vectorizing the loads.
+
+declare void @unknown()
+
+; CHECK-LABEL: @test
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: load float
+; CHECK: call void @unknown
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: store float
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+define void @test(float * %a, float * %b, float * %c, float * %d) {
+entry:
+  ; Don't vectorize these loads.
+  %l0 = load float, float* %a
+  %a1 = getelementptr inbounds float, float* %a, i64 1
+  %l1 = load float, float* %a1
+  %a2 = getelementptr inbounds float, float* %a, i64 2
+  %l2 = load float, float* %a2
+  %a3 = getelementptr inbounds float, float* %a, i64 3
+  %l3 = load float, float* %a3
+
+  ; some unrelated instructions inbetween to enlarge the scheduling region
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+  call void @unknown()
+
+  ; Don't vectorize these stores because their operands are too far away.
+  store float %l0, float* %b
+  %b1 = getelementptr inbounds float, float* %b, i64 1
+  store float %l1, float* %b1
+  %b2 = getelementptr inbounds float, float* %b, i64 2
+  store float %l2, float* %b2
+  %b3 = getelementptr inbounds float, float* %b, i64 3
+  store float %l3, float* %b3
+
+  ; But still vectorize the following instructions, because even if the budget
+  ; is exceeded there is a minimum region size.
+  %l4 = load float, float* %c
+  %c1 = getelementptr inbounds float, float* %c, i64 1
+  %l5 = load float, float* %c1
+  %c2 = getelementptr inbounds float, float* %c, i64 2
+  %l6 = load float, float* %c2
+  %c3 = getelementptr inbounds float, float* %c, i64 3
+  %l7 = load float, float* %c3
+
+  store float %l4, float* %d
+  %d1 = getelementptr inbounds float, float* %d, i64 1
+  store float %l5, float* %d1
+  %d2 = getelementptr inbounds float, float* %d, i64 2
+  store float %l6, float* %d2
+  %d3 = getelementptr inbounds float, float* %d, i64 3
+  store float %l7, float* %d3
+
+  ret void
+}
+
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index ad2794167a5e..7b5daa9d7823 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+; RUN: opt < %s -passes=sroa -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
@@ -1610,3 +1610,26 @@ entry:
   %load = load atomic volatile i64, i64* %ptr seq_cst, align 8
   ret void
 }
+
+define i16 @PR24463() {
+; Ensure we can handle a very interesting case where there is an integer-based
+; rewrite of the uses of the alloca, but where one of the integers in that is
+; a sub-integer that requires extraction *and* extends past the end of the
+; alloca. In this case, we should extract the i8 and then zext it to i16.
+;
+; CHECK-LABEL: @PR24463(
+; CHECK-NOT: alloca
+; CHECK: %[[SHIFT:.*]] = lshr i16 0, 8
+; CHECK: %[[TRUNC:.*]] = trunc i16 %[[SHIFT]] to i8
+; CHECK: %[[ZEXT:.*]] = zext i8 %[[TRUNC]] to i16
+; CHECK: ret i16 %[[ZEXT]]
+entry:
+  %alloca = alloca [3 x i8]
+  %gep1 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 1
+  %bc1 = bitcast i8* %gep1 to i16*
+  store i16 0, i16* %bc1
+  %gep2 = getelementptr inbounds [3 x i8], [3 x i8]* %alloca, i64 0, i64 2
+  %bc2 = bitcast i8* %gep2 to i16*
+  %load = load i16, i16* %bc2
+  ret i16 %load
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
index 4de7bfcb898d..ea41a20fd38e 100644
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
index 6eaa73f53189..707f680e64e8 100644
--- a/test/Transforms/SROA/fca.ll
+++ b/test/Transforms/SROA/fca.ll
@@ -1,5 +1,4 @@
 ; RUN: opt < %s -sroa -S | FileCheck %s
-; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
 
 define { i32, i32 } @test0(i32 %x, i32 %y) {
diff --git a/test/Transforms/SafeStack/AArch64/abi.ll b/test/Transforms/SafeStack/AArch64/abi.ll
new file mode 100644
index 000000000000..cdec923eb74c
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/abi.ll
@@ -0,0 +1,20 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[TP:.*]] = call i8* @llvm.aarch64.thread.pointer()
+; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
+; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+  ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/AArch64/lit.local.cfg b/test/Transforms/SafeStack/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/abi.ll b/test/Transforms/SafeStack/ARM/abi.ll
new file mode 100644
index 000000000000..e33c913ae916
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/abi.ll
@@ -0,0 +1,18 @@
+; RUN: opt -safe-stack -S -mtriple=arm-linux-android < %s -o - | FileCheck %s
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** %[[SPA]]
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; CHECK: store i8* %[[USP]], i8** %[[SPA]]
+  ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/ARM/lit.local.cfg b/test/Transforms/SafeStack/ARM/lit.local.cfg
new file mode 100644
index 000000000000..98c6700c209d
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/ARM/setjmp.ll b/test/Transforms/SafeStack/ARM/setjmp.ll
new file mode 100644
index 000000000000..8c57908bbe4b
--- /dev/null
+++ b/test/Transforms/SafeStack/ARM/setjmp.ll
@@ -0,0 +1,34 @@
+; Test stack pointer restore after setjmp() with the function-call safestack ABI.
+; RUN: opt -safe-stack -S -mtriple=arm-linux-androideabi < %s -o - | FileCheck %s
+
+@env = global [64 x i32] zeroinitializer, align 4
+
+define void @f(i32 %b) safestack {
+entry:
+; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
+; CHECK: %[[USDP:.*]] = alloca i8*
+; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: store i8* %[[USP]], i8** %[[USDP]]
+; CHECK: call i32 @setjmp
+
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([64 x i32], [64 x i32]* @env, i32 0, i32 0)) returns_twice
+
+; CHECK: %[[USP2:.*]] = load i8*, i8** %[[USDP]]
+; CHECK: store i8* %[[USP2]], i8** %[[SPA]]
+
+  %tobool = icmp eq i32 %b, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:
+  %0 = alloca [42 x i8], align 1
+  %.sub = getelementptr inbounds [42 x i8], [42 x i8]* %0, i32 0, i32 0
+  call void @_Z7CapturePv(i8* %.sub)
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare i32 @setjmp(i32*) returns_twice
+
+declare void @_Z7CapturePv(i8*)
diff --git a/test/Transforms/SafeStack/X86/abi.ll b/test/Transforms/SafeStack/X86/abi.ll
new file mode 100644
index 000000000000..f437c4f7157d
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/abi.ll
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s --check-prefix=TLS
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS32
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck %s --check-prefix=DIRECT-TLS64
+
+
+define void @foo() nounwind uwtable safestack {
+entry:
+; TLS: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; TLS: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; TLS: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; DIRECT-TLS32: %[[USP:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS32: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS32: store i8* %[[USST]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+
+; DIRECT-TLS64: %[[USP:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+; DIRECT-TLS64: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; DIRECT-TLS64: store i8* %[[USST]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+
+  %a = alloca i8, align 8
+  call void @Capture(i8* %a)
+
+; TLS: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; DIRECT-TLS32: store i8* %[[USP]], i8* addrspace(256)* inttoptr (i32 36 to i8* addrspace(256)*)
+; DIRECT-TLS64: store i8* %[[USP]], i8* addrspace(257)* inttoptr (i32 72 to i8* addrspace(257)*)
+  ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/X86/lit.local.cfg b/test/Transforms/SafeStack/X86/lit.local.cfg
new file mode 100644
index 000000000000..e71f3cc4c41e
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+
diff --git a/test/Transforms/SafeStack/array.ll b/test/Transforms/SafeStack/array.ll
index 6036bfc2c9c5..564213e6d58f 100644
--- a/test/Transforms/SafeStack/array.ll
+++ b/test/Transforms/SafeStack/array.ll
@@ -1,9 +1,14 @@
 ; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
 ; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -safe-stack-usp-storage=single-thread -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck -check-prefix=SINGLE-THREAD %s
 
 ; array [4 x i8]
 ; Requires protector.
 
+; CHECK: @__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8*
+; SINGLE-THREAD: @__safestack_unsafe_stack_ptr = external global i8*
+
 define void @foo(i8* %a) nounwind uwtable safestack {
 entry:
   ; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
@@ -35,4 +40,52 @@ entry:
   ret void
 }
 
+; Load from an array at a fixed offset, no overflow.
+define i8 @StaticArrayFixedSafe() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayFixedSafe(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 2
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array at a fixed offset with overflow.
+define i8 @StaticArrayFixedUnsafe() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayFixedUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 5
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array at an unknown offset.
+define i8 @StaticArrayVariableUnsafe(i32 %ofs) nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @StaticArrayVariableUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 4, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 %ofs
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
+; Load from an array of an unknown size.
+define i8 @DynamicArrayUnsafe(i32 %sz) nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: define i8 @DynamicArrayUnsafe(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %buf = alloca i8, i32 %sz, align 1
+  %gep = getelementptr inbounds i8, i8* %buf, i32 2
+  %x = load i8, i8* %gep, align 1
+  ret i8 %x
+}
+
 declare i8* @strcpy(i8*, i8*)
diff --git a/test/Transforms/SafeStack/byval.ll b/test/Transforms/SafeStack/byval.ll
new file mode 100644
index 000000000000..f9a06e54d2df
--- /dev/null
+++ b/test/Transforms/SafeStack/byval.ll
@@ -0,0 +1,51 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i32] }
+
+; Safe access to a byval argument.
+define i32 @ByValSafe(%struct.S* byval nocapture readonly align 8 %zzz) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValSafe
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 3
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; Unsafe access to a byval argument.
+; Argument is copied to the unsafe stack.
+define i32 @ByValUnsafe(%struct.S* byval nocapture readonly align 8 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValUnsafe
+  ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: store {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[B:.*]] = getelementptr i8, i8* %[[A]], i32 -400
+  ; CHECK: %[[C:.*]] = bitcast %struct.S* %zzz to i8*
+  ; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[B]], i8* %[[C]], i64 400, i32 8, i1 false)
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+  %0 = load i32, i32* %arrayidx, align 4
+  ret i32 %0
+}
+
+; Highly aligned byval argument.
+define i32 @ByValUnsafeAligned(%struct.S* byval nocapture readonly align 64 %zzz, i64 %idx) norecurse nounwind readonly safestack uwtable {
+entry:
+  ; CHECK-LABEL: @ByValUnsafeAligned
+  ; CHECK: %[[A:.*]] = load {{.*}} @__safestack_unsafe_stack_ptr
+  ; CHECK: %[[B:.*]] = ptrtoint i8* %[[A]] to i64
+  ; CHECK: and i64 %[[B]], -64
+  ; CHECK: ret i32
+  %arrayidx = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 0
+  %0 = load i32, i32* %arrayidx, align 64
+  %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %zzz, i64 0, i32 0, i64 %idx
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  ret i32 %add
+}
+
diff --git a/test/Transforms/SafeStack/call.ll b/test/Transforms/SafeStack/call.ll
index ac12ec02b0b1..cbac4ce1bb0d 100644
--- a/test/Transforms/SafeStack/call.ll
+++ b/test/Transforms/SafeStack/call.ll
@@ -6,10 +6,11 @@
 ; no arrays / no nested arrays
 ; Requires no protector.
 
-; CHECK-LABEL: @foo(
 define void @foo(i8* %a) nounwind uwtable safestack {
 entry:
+  ; CHECK-LABEL: define void @foo(
   ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
   %a.addr = alloca i8*, align 8
   store i8* %a, i8** %a.addr, align 8
   %0 = load i8*, i8** %a.addr, align 8
@@ -18,3 +19,160 @@ entry:
 }
 
 declare i32 @printf(i8*, ...)
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @call_memset(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 %len, i32 1, i1 false)
+  ret void
+}
+
+define void @call_constant_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_memset
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 2
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 7, i32 1, i1 false)
+  ret void
+}
+
+define void @call_constant_overflow_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_overflow_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 7
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 5, i32 1, i1 false)
+  ret void
+}
+
+define void @call_constant_underflow_memset() safestack {
+entry:
+  ; CHECK-LABEL: define void @call_constant_underflow_memset
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr [10 x i8], [10 x i8]* %q, i32 0, i32 -1
+  call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 3, i32 1, i1 false)
+  ret void
+}
+
+; Readnone nocapture -> safe
+define void @call_readnone(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone(i8* %arraydecay)
+  ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg0 -> safe
+define void @call_readnone0_0(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone0_0
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone0(i8* %arraydecay, i8* zeroinitializer)
+  ret void
+}
+
+; Arg0 is readnone, arg1 is not. Pass alloca ptr as arg1 -> unsafe
+define void @call_readnone0_1(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readnone0_1
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readnone0(i8 *zeroinitializer, i8* %arraydecay)
+  ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_readonly(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readonly
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readonly(i8* %arraydecay)
+  ret void
+}
+
+; Readonly nocapture -> unsafe
+define void @call_arg_readonly(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_arg_readonly
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @arg_readonly(i8* %arraydecay)
+  ret void
+}
+
+; Readwrite nocapture -> unsafe
+define void @call_readwrite(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_readwrite
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @readwrite(i8* %arraydecay)
+  ret void
+}
+
+; Captures the argument -> unsafe
+define void @call_capture(i64 %len) safestack {
+entry:
+  ; CHECK-LABEL: define void @call_capture
+  ; CHECK: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %q = alloca [10 x i8], align 1
+  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q, i32 0, i32 0
+  call void @capture(i8* %arraydecay)
+  ret void
+}
+
+; Lifetime intrinsics are always safe.
+define void @call_lifetime(i32* %p) {
+  ; CHECK-LABEL: define void @call_lifetime
+  ; CHECK-NOT: @__safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+entry:
+  %q = alloca [100 x i8], align 16
+  %0 = bitcast [100 x i8]* %q to i8*
+  call void @llvm.lifetime.start(i64 100, i8* %0)
+  call void @llvm.lifetime.end(i64 100, i8* %0)
+  ret void
+}
+
+declare void @readonly(i8* nocapture) readonly
+declare void @arg_readonly(i8* readonly nocapture)
+declare void @readwrite(i8* nocapture)
+declare void @capture(i8* readnone) readnone
+
+declare void @readnone(i8* nocapture) readnone
+declare void @readnone0(i8* nocapture readnone, i8* nocapture)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind argmemonly
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind argmemonly
diff --git a/test/Transforms/SafeStack/cast.ll b/test/Transforms/SafeStack/cast.ll
index df6273a117c3..23f525d5e0b1 100644
--- a/test/Transforms/SafeStack/cast.ll
+++ b/test/Transforms/SafeStack/cast.ll
@@ -4,14 +4,36 @@
 @.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
 
 ; PtrToInt/IntToPtr Cast
-; Requires no protector.
 
-; CHECK-LABEL: @foo(
-define void @foo() nounwind uwtable safestack {
+define void @IntToPtr() nounwind uwtable safestack {
 entry:
+  ; CHECK-LABEL: @IntToPtr(
   ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
   %a = alloca i32, align 4
   %0 = ptrtoint i32* %a to i64
   %1 = inttoptr i64 %0 to i32*
   ret void
 }
+
+define i8 @BitCastNarrow() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @BitCastNarrow(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i8
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = load i8, i8* %0, align 1
+  ret i8 %1
+}
+
+define i64 @BitCastWide() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @BitCastWide(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i64
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i64*
+  %1 = load i64, i64* %0, align 1
+  ret i64 %1
+}
diff --git a/test/Transforms/SafeStack/debug-loc.ll b/test/Transforms/SafeStack/debug-loc.ll
new file mode 100644
index 000000000000..e72d0e9d2ff2
--- /dev/null
+++ b/test/Transforms/SafeStack/debug-loc.ll
@@ -0,0 +1,83 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test debug location for the local variables moved onto the unsafe stack.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.S = type { [100 x i8] }
+
+; Function Attrs: safestack uwtable
+define void @f(%struct.S* byval align 8 %zzz) #0 !dbg !12 {
+; CHECK: define void @f
+
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+
+  %xxx = alloca %struct.S, align 1
+  call void @llvm.dbg.declare(metadata %struct.S* %zzz, metadata !18, metadata !19), !dbg !20
+  call void @llvm.dbg.declare(metadata %struct.S* %xxx, metadata !21, metadata !19), !dbg !22
+
+; dbg.declare for %zzz and %xxx are gone; replaced with dbg.declare based off the unsafe stack pointer
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_ARG:.*]], metadata ![[EXPR_ARG:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+; CHECK: call void @llvm.dbg.declare(metadata i8* %[[USP]], metadata ![[VAR_LOCAL:.*]], metadata ![[EXPR_LOCAL:.*]])
+; CHECK-NOT: call void @llvm.dbg.declare
+
+  call void @Capture(%struct.S* %zzz), !dbg !23
+  call void @Capture(%struct.S* %xxx), !dbg !24
+
+; dbg.declare appears before the first use
+; CHECK:   call void @Capture
+; CHECK:   call void @Capture
+
+  ret void, !dbg !25
+}
+
+; CHECK-DAG: ![[VAR_ARG]] = !DILocalVariable(name: "zzz"
+; 100 aligned up to 8
+; CHECK-DAG: ![[EXPR_ARG]] = !DIExpression(DW_OP_deref, DW_OP_minus, 104
+
+; CHECK-DAG: ![[VAR_LOCAL]] = !DILocalVariable(name: "xxx"
+; CHECK-DAG: ![[EXPR_LOCAL]] = !DIExpression(DW_OP_deref, DW_OP_minus, 208
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare void @Capture(%struct.S*) #2
+
+attributes #0 = { safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 800, align: 8, elements: !5, identifier: "_ZTS1S")
+!5 = !{!6}
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 800, align: 8)
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 800, align: 8, elements: !9)
+!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!9 = !{!10}
+!10 = !DISubrange(count: 100)
+!11 = !{!12}
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !"_ZTS1S"}
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)"}
+!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !"_ZTS1S")
+!19 = !DIExpression()
+!20 = !DILocation(line: 10, column: 10, scope: !12)
+!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !"_ZTS1S")
+!22 = !DILocation(line: 11, column: 5, scope: !12)
+!23 = !DILocation(line: 12, column: 3, scope: !12)
+!24 = !DILocation(line: 13, column: 3, scope: !12)
+!25 = !DILocation(line: 14, column: 1, scope: !12)
diff --git a/test/Transforms/SafeStack/ret.ll b/test/Transforms/SafeStack/ret.ll
new file mode 100644
index 000000000000..b2b8e5665297
--- /dev/null
+++ b/test/Transforms/SafeStack/ret.ll
@@ -0,0 +1,17 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+; Returns an alloca address.
+; Requires protector.
+
+define i64 @foo() nounwind readnone safestack {
+entry:
+  ; CHECK-LABEL: define i64 @foo(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret i64
+  %x = alloca [100 x i32], align 16
+  %0 = ptrtoint [100 x i32]* %x to i64
+  ret i64 %0
+}
diff --git a/test/Transforms/SafeStack/setjmp2.ll b/test/Transforms/SafeStack/setjmp2.ll
index 65fd920d63da..bb15d7e03ace 100644
--- a/test/Transforms/SafeStack/setjmp2.ll
+++ b/test/Transforms/SafeStack/setjmp2.ll
@@ -25,7 +25,7 @@ entry:
   ; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to i8*
   ; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** @__safestack_unsafe_stack_ptr
   ; CHECK-NEXT: store i8* %[[INTTOPTR]], i8** %unsafe_stack_dynamic_ptr
-  ; CHECK-NEXT: %[[ALLOCA:.*]] = inttoptr i64 %[[SUB]] to i32*
+  ; CHECK-NEXT: %[[ALLOCA:.*]] = bitcast i8* %[[INTTOPTR]] to i32*
   %a = alloca i32, i32 %size
 
   ; CHECK: setjmp
diff --git a/test/Transforms/SafeStack/store.ll b/test/Transforms/SafeStack/store.ll
new file mode 100644
index 000000000000..f493dd038bb8
--- /dev/null
+++ b/test/Transforms/SafeStack/store.ll
@@ -0,0 +1,63 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+@.str = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
+
+define void @bad_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @bad_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = ptrtoint i32* %a to i64
+  %1 = inttoptr i64 %0 to i64*
+  store i64 zeroinitializer, i64* %1
+  ret void
+}
+
+define void @good_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @good_store(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  store i8 zeroinitializer, i8* %0
+  ret void
+}
+
+define void @overflow_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @overflow_gep_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 4
+  store i8 zeroinitializer, i8* %1
+  ret void
+}
+
+define void @underflow_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @underflow_gep_store(
+  ; CHECK: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 -1
+  store i8 zeroinitializer, i8* %1
+  ret void
+}
+
+define void @good_gep_store() nounwind uwtable safestack {
+entry:
+  ; CHECK-LABEL: @good_gep_store(
+  ; CHECK-NOT: __safestack_unsafe_stack_ptr
+  ; CHECK: ret void
+  %a = alloca i32, align 4
+  %0 = bitcast i32* %a to i8*
+  %1 = getelementptr i8, i8* %0, i32 3
+  store i8 zeroinitializer, i8* %1
+  ret void
+}
diff --git a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
index cc7f0d4f2773..30e26cc81841 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_discriminator_value.prof
@@ -1,2 +1,2 @@
 empty:100:0
-1.-3: 10
+ 1.-3: 10
diff --git a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
index abcb0ba38415..62227746655e 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof
@@ -1,3 +1,3 @@
 3empty:100:BAD
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
index 50fe86119b71..33b4c42cab44 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_mangle.prof
@@ -1,3 +1,3 @@
 double convert<std::string, float>(float):2909472:181842
-0: 181842
-1: 181842
+ 0: 181842
+ 1: 181842
diff --git a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
index 038c45f77e30..608affa3ff94 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof
@@ -1,3 +1,3 @@
 empty:100:0
-0: 0
-1: BAD
+ 0: 0
+ 1: BAD
diff --git a/test/Transforms/SampleProfile/Inputs/bad_samples.prof b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
index a121d8c1ac40..bce7db9708d2 100644
--- a/test/Transforms/SampleProfile/Inputs/bad_samples.prof
+++ b/test/Transforms/SampleProfile/Inputs/bad_samples.prof
@@ -1,2 +1,2 @@
 empty:100:0
-1.3: -10
+ 1.3: -10
diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof
index cd1cb5b1f16b..ac958e325c40 100644
--- a/test/Transforms/SampleProfile/Inputs/branch.prof
+++ b/test/Transforms/SampleProfile/Inputs/branch.prof
@@ -1,8 +1,10 @@
 main:15680:0
-0: 0
-4: 0
-7: 0
-9: 10226
-10: 2243
-16: 0
-18: 0
+ 1: 2500
+ 4: 1000
+ 5: 1000
+ 6: 800
+ 7: 500
+ 9: 10226
+ 10: 2243
+ 16: 0
+ 18: 0
diff --git a/test/Transforms/SampleProfile/Inputs/calls.prof b/test/Transforms/SampleProfile/Inputs/calls.prof
index 57d3887dfb65..be64a1ead428 100644
--- a/test/Transforms/SampleProfile/Inputs/calls.prof
+++ b/test/Transforms/SampleProfile/Inputs/calls.prof
@@ -1,10 +1,10 @@
 _Z3sumii:105580:5279
-0: 5279
-1: 5279
-2: 5279
+ 0: 5279
+ 1: 5279
+ 2: 5279
 main:225715:0
-2.1: 5553
-3: 5391
-# This indicates that at line 3 of this function, the 'then' branch
-# of the conditional is taken (discriminator '1').
-3.1: 5752  _Z3sumii:5860
+ 2.1: 5553
+ 3: 5391
+ # This indicates that at line 3 of this function, the 'then' branch
+ # of the conditional is taken (discriminator '1').
+ 3.1: 5752 _Z3sumii:5860
diff --git a/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
new file mode 100644
index 000000000000..528e42ca3880
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/cov-zero-samples.prof
@@ -0,0 +1,10 @@
+main:20111403:0
+ 2.1: 404065
+ 3: 443089
+ 3.1: 0
+ 4: 404066
+ 6: 0
+ 7: 0
+ 3.1: _Z12never_calledi:0
+  0: 0
+  1: 0
diff --git a/test/Transforms/SampleProfile/Inputs/coverage-warning.prof b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
new file mode 100644
index 000000000000..57989b837a0f
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/coverage-warning.prof
@@ -0,0 +1,5 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
+# This profile is stale. Function foo() does not have a line 8 anymore.
+ 8: 1700
diff --git a/test/Transforms/SampleProfile/Inputs/discriminator.prof b/test/Transforms/SampleProfile/Inputs/discriminator.prof
index a6bcbc511a16..0c2561d725c3 100644
--- a/test/Transforms/SampleProfile/Inputs/discriminator.prof
+++ b/test/Transforms/SampleProfile/Inputs/discriminator.prof
@@ -1,8 +1,8 @@
 foo:1000:0
-1: 1
-2: 1
-2.1: 100
-3: 100
-3.1: 5
-4: 100
-5: 1
+ 1: 1
+ 2: 1
+ 2.1: 100
+ 3: 100
+ 3.1: 5
+ 4: 100
+ 5: 1
diff --git a/test/Transforms/SampleProfile/Inputs/entry_counts.prof b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
index 5c2172b5a4d3..95addc9f7a11 100644
--- a/test/Transforms/SampleProfile/Inputs/entry_counts.prof
+++ b/test/Transforms/SampleProfile/Inputs/entry_counts.prof
@@ -1,3 +1,3 @@
 empty:100:13293
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
index 14d7fd555dae85bec9e11b6a61bdb05466757aed..a074f53db945df7b0e59b7dd4b678a93ca911b89 100644
GIT binary patch
literal 105
zcmezW^uoq3C#S3kPGinZ%*<nmk1|fn&(8$WNr^?73=9mH=W?^NFic}%U}IPyz`)5c
zL4b+zI}6i(ZdS(491N@sK!P14&B6pE7#^`O&bMJ?1kwx)bGez$&SL~}=5jMI008~Y
B8(07U

literal 112
zcmezW^uoq3C#S3kPKl2)PRq~FWSDRBh=q}n;Smc1P$Vg_D3jsrJjS`)K%u$Z47rJ!
xc?_3<40aZVX)FwE3=0GpI2k4gFu~OB=Z2Z{orRTgGY11J1CU?`DP~~;5&)S?AaDQx

diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.prof b/test/Transforms/SampleProfile/Inputs/fnptr.prof
index 6a3b4e2315bb..2491c427393b 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.prof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.prof
@@ -1,12 +1,12 @@
 _Z3fooi:7711:610
-1: 610
+ 1: 610
 _Z3bari:20301:1437
-1: 1437
+ 1: 1437
 main:184019:0
-4: 534
-6: 2080
-9: 2064 _Z3bari:1471 _Z3fooi:631
-5.1: 1075
-5: 1075
-7: 534
-4.2: 534
+ 4: 534
+ 6: 2080
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
+ 5.1: 1075
+ 5: 1075
+ 7: 534
+ 4.2: 534
diff --git a/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo b/test/Transforms/SampleProfile/Inputs/gcc-simple.afdo
new file mode 100644
index 0000000000000000000000000000000000000000..93f22ce3053384d57b90f03b3ca2beedd833a321
GIT binary patch
literal 1972
zcmdUw%MQU%5Qe9%`@(~Wg@nX2SlNmN5kkamA@=s(M1&XcGG0WS-^{NR3yG#QY3G|u
z&&-^*)83#zD%C4B(PNu;GH|D72H(kUyE<GfrUIVz#&dJ+S`J_<chrJ=a*DSreOz(3
zVk|SL?pwyqQuV;}lf|bZ{`9Rvra~nmM^<NMSPRC_;;{zpCXDi~-G#az(RHUnzjxg^
zk#j?Bfzy$p*i=311T#+hY7<EjOr|5c?0JuK9M-0AUl1Gz^EqMtI2rA5{eNe?YwYD$
znBNoX`Ghe882|5#FO0o>lR0a!<~!#6L(C_PDZuz7nX?NuyyDEAJt@XqWo=0^=RW>{
Gaq<Nj*NA!m

literal 0
HcmV?d00001

diff --git a/test/Transforms/SampleProfile/Inputs/inline-coverage.prof b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
new file mode 100644
index 000000000000..3d792733149a
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-coverage.prof
@@ -0,0 +1,7 @@
+main:501438:0
+ 2.1: 23478
+ 3: 23478
+ 4: 0
+ 0: 0
+ 3: _Z3fool:172746
+  1: 31878
diff --git a/test/Transforms/SampleProfile/Inputs/inline-hint.prof b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
new file mode 100644
index 000000000000..a6840346eb43
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-hint.prof
@@ -0,0 +1,3 @@
+_Z6hot_fnRxi:700:0
+_Z7cold_fnRxi:1:0
+other:299:0
diff --git a/test/Transforms/SampleProfile/Inputs/inline.prof b/test/Transforms/SampleProfile/Inputs/inline.prof
new file mode 100644
index 000000000000..386cdf8a7b5e
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline.prof
@@ -0,0 +1,7 @@
+main:225715:0
+ 2.1: 5553
+ 3: 5391
+ 3.1: _Z3sumii:5860
+  0: 5279
+  1: 5279
+  2: 5279
diff --git a/test/Transforms/SampleProfile/Inputs/nolocinfo.prof b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
new file mode 100644
index 000000000000..fc69aa8ae783
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/nolocinfo.prof
@@ -0,0 +1,3 @@
+foo:30000:100
+ 2: 28000
+ 3: 1000
diff --git a/test/Transforms/SampleProfile/Inputs/offset.prof b/test/Transforms/SampleProfile/Inputs/offset.prof
new file mode 100644
index 000000000000..b07ce3504fb2
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/offset.prof
@@ -0,0 +1,4 @@
+_Z3fooi:300:1
+ 65532: 1000
+ 65533: 10
+ 65535: 990
diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof
index b28609be66c1..ee9c6d62dfd1 100644
--- a/test/Transforms/SampleProfile/Inputs/propagate.prof
+++ b/test/Transforms/SampleProfile/Inputs/propagate.prof
@@ -1,17 +1,17 @@
 _Z3fooiil:58139:0
-0: 0
-1: 0
-2: 0
-4: 1
-5: 10
-6: 0
-7: 5
-8: 3
-9: 0
-10: 0
-11: 6339
-12: 16191
-13: 8141
-16: 1
-18: 0
-19: 0
+ 0: 0
+ 1: 0
+ 2: 0
+ 4: 1
+ 5: 10
+ 6: 2
+ 7: 5
+ 8: 3
+ 9: 0
+ 10: 0
+ 11: 6339
+ 12: 16191
+ 13: 8141
+ 16: 1
+ 18: 0
+ 19: 0
diff --git a/test/Transforms/SampleProfile/Inputs/remarks.prof b/test/Transforms/SampleProfile/Inputs/remarks.prof
new file mode 100644
index 000000000000..1e905834cf41
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/remarks.prof
@@ -0,0 +1,7 @@
+main:623868:0
+ 0: 0
+ 0: _Z3foov:623868
+  3: 18346
+  4: 0
+  6: 19475
+  2: 18305
diff --git a/test/Transforms/SampleProfile/Inputs/syntax.prof b/test/Transforms/SampleProfile/Inputs/syntax.prof
index f3738912a9dc..465212d86e84 100644
--- a/test/Transforms/SampleProfile/Inputs/syntax.prof
+++ b/test/Transforms/SampleProfile/Inputs/syntax.prof
@@ -1,3 +1,3 @@
 empty:100:0
-0: 0
-1: 100
+ 0: 0
+ 1: 100
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 25bd455a044c..1700749f0be9 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -4,14 +4,14 @@
 ;
 ; #include <stdio.h>
 ; #include <stdlib.h>
-;
+
 ; int main(int argc, char *argv[]) {
 ;   if (argc < 2)
 ;     return 1;
 ;   double result;
 ;   int limit = atoi(argv[1]);
 ;   if (limit > 100) {
-;     double s = 23.041968;
+;     double s = 23.041968 * atoi(argv[2]);
 ;     for (int u = 0; u < limit; u++) {
 ;       double x = s;
 ;       s = x + 3.049 + (double)u;
@@ -19,7 +19,7 @@
 ;     }
 ;     result = s;
 ;   } else {
-;     result = 0;
+;     result = atoi(argv[2]);
 ;   }
 ;   printf("result is %lf\n", result);
 ;   return 0;
@@ -27,117 +27,213 @@
 
 @.str = private unnamed_addr constant [15 x i8] c"result is %lf\0A\00", align 1
 
-; Function Attrs: nounwind uwtable
-define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
+; Function Attrs: uwtable
+define i32 @main(i32 %argc, i8** %argv) #0 !dbg !6 {
 ; CHECK: Printing analysis 'Branch Probability Analysis' for function 'main':
 
 entry:
-  tail call void @llvm.dbg.value(metadata i32 %argc, i64 0, metadata !13, metadata !DIExpression()), !dbg !27
-  tail call void @llvm.dbg.value(metadata i8** %argv, i64 0, metadata !14, metadata !DIExpression()), !dbg !27
-  %cmp = icmp slt i32 %argc, 2, !dbg !28
-  br i1 %cmp, label %return, label %if.end, !dbg !28
-; CHECK: edge entry -> return probability is 0 / 1 = 0%
-; CHECK: edge entry -> if.end probability is 1 / 1 = 100%
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca i8**, align 8
+  %result = alloca double, align 8
+  %limit = alloca i32, align 4
+  %s = alloca double, align 8
+  %u = alloca i32, align 4
+  %x = alloca double, align 8
+  store i32 0, i32* %retval, align 4
+  store i32 %argc, i32* %argc.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %argc.addr, metadata !16, metadata !17), !dbg !18
+  store i8** %argv, i8*** %argv.addr, align 8
+  call void @llvm.dbg.declare(metadata i8*** %argv.addr, metadata !19, metadata !17), !dbg !20
+  %0 = load i32, i32* %argc.addr, align 4, !dbg !21
+  %cmp = icmp slt i32 %0, 2, !dbg !23
+  br i1 %cmp, label %if.then, label %if.end, !dbg !24
+; CHECK:  edge entry -> if.then probability is 0x4ccccccd / 0x80000000 = 60.00%
+; CHECK:  edge entry -> if.end probability is 0x33333333 / 0x80000000 = 40.00%
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* %retval, align 4, !dbg !25
+  br label %return, !dbg !25
 
 if.end:                                           ; preds = %entry
-  %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 1, !dbg !30
-  %0 = load i8*, i8** %arrayidx, align 8, !dbg !30, !tbaa !31
-  %call = tail call i32 @atoi(i8* %0) #4, !dbg !30
-  tail call void @llvm.dbg.value(metadata i32 %call, i64 0, metadata !17, metadata !DIExpression()), !dbg !30
-  %cmp1 = icmp sgt i32 %call, 100, !dbg !35
-  br i1 %cmp1, label %for.body, label %if.end6, !dbg !35
-; CHECK: edge if.end -> for.body probability is 0 / 1 = 0%
-; CHECK: edge if.end -> if.end6 probability is 1 / 1 = 100%
+  call void @llvm.dbg.declare(metadata double* %result, metadata !26, metadata !17), !dbg !27
+  call void @llvm.dbg.declare(metadata i32* %limit, metadata !28, metadata !17), !dbg !29
+  %1 = load i8**, i8*** %argv.addr, align 8, !dbg !30
+  %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1, !dbg !30
+  %2 = load i8*, i8** %arrayidx, align 8, !dbg !30
+  %call = call i32 @atoi(i8* %2) #4, !dbg !31
+  store i32 %call, i32* %limit, align 4, !dbg !29
+  %3 = load i32, i32* %limit, align 4, !dbg !32
+  %cmp1 = icmp sgt i32 %3, 100, !dbg !34
+  br i1 %cmp1, label %if.then.2, label %if.else, !dbg !35
+; CHECK: edge if.end -> if.then.2 probability is 0x66666666 / 0x80000000 = 80.00%
+; CHECK: edge if.end -> if.else probability is 0x1999999a / 0x80000000 = 20.00%
 
-for.body:                                         ; preds = %if.end, %for.body
-  %u.016 = phi i32 [ %inc, %for.body ], [ 0, %if.end ]
-  %s.015 = phi double [ %sub, %for.body ], [ 0x40370ABE6A337A81, %if.end ]
-  %add = fadd double %s.015, 3.049000e+00, !dbg !36
-  %conv = sitofp i32 %u.016 to double, !dbg !36
-  %add4 = fadd double %add, %conv, !dbg !36
-  tail call void @llvm.dbg.value(metadata double %add4, i64 0, metadata !18, metadata !DIExpression()), !dbg !36
-  %div = fdiv double 3.940000e+00, %s.015, !dbg !37
-  %mul = fmul double %div, 3.200000e-01, !dbg !37
-  %add5 = fadd double %add4, %mul, !dbg !37
-  %sub = fsub double %add4, %add5, !dbg !37
-  tail call void @llvm.dbg.value(metadata double %sub, i64 0, metadata !18, metadata !DIExpression()), !dbg !37
-  %inc = add nsw i32 %u.016, 1, !dbg !38
-  tail call void @llvm.dbg.value(metadata i32 %inc, i64 0, metadata !21, metadata !DIExpression()), !dbg !38
-  %exitcond = icmp eq i32 %inc, %call, !dbg !38
-  br i1 %exitcond, label %if.end6, label %for.body, !dbg !38
-; CHECK: edge for.body -> if.end6 probability is 0 / 10226 = 0%
-; CHECK: edge for.body -> for.body probability is 10226 / 10226 = 100% [HOT edge]
+if.then.2:                                        ; preds = %if.end
+  call void @llvm.dbg.declare(metadata double* %s, metadata !36, metadata !17), !dbg !38
+  %4 = load i8**, i8*** %argv.addr, align 8, !dbg !39
+  %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 2, !dbg !39
+  %5 = load i8*, i8** %arrayidx3, align 8, !dbg !39
+  %call4 = call i32 @atoi(i8* %5) #4, !dbg !40
+  %conv = sitofp i32 %call4 to double, !dbg !40
+  %mul = fmul double 0x40370ABE6A337A81, %conv, !dbg !41
+  store double %mul, double* %s, align 8, !dbg !38
+  call void @llvm.dbg.declare(metadata i32* %u, metadata !42, metadata !17), !dbg !44
+  store i32 0, i32* %u, align 4, !dbg !44
+  br label %for.cond, !dbg !45
 
-if.end6:                                          ; preds = %for.body, %if.end
-  %result.0 = phi double [ 0.000000e+00, %if.end ], [ %sub, %for.body ]
-  %call7 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i64 0, i64 0), double %result.0), !dbg !39
-  br label %return, !dbg !40
-; CHECK: edge if.end6 -> return probability is 16 / 16 = 100% [HOT edge]
+for.cond:                                         ; preds = %for.inc, %if.then.2
+  %6 = load i32, i32* %u, align 4, !dbg !46
+  %7 = load i32, i32* %limit, align 4, !dbg !48
+  %cmp5 = icmp slt i32 %6, %7, !dbg !49
+  br i1 %cmp5, label %for.body, label %for.end, !dbg !50
 
-return:                                           ; preds = %entry, %if.end6
-  %retval.0 = phi i32 [ 0, %if.end6 ], [ 1, %entry ]
-  ret i32 %retval.0, !dbg !41
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata double* %x, metadata !51, metadata !17), !dbg !53
+  %8 = load double, double* %s, align 8, !dbg !54
+  store double %8, double* %x, align 8, !dbg !53
+  %9 = load double, double* %x, align 8, !dbg !55
+  %add = fadd double %9, 3.049000e+00, !dbg !56
+  %10 = load i32, i32* %u, align 4, !dbg !57
+  %conv6 = sitofp i32 %10 to double, !dbg !57
+  %add7 = fadd double %add, %conv6, !dbg !58
+  store double %add7, double* %s, align 8, !dbg !59
+  %11 = load double, double* %s, align 8, !dbg !60
+  %12 = load double, double* %x, align 8, !dbg !61
+  %div = fdiv double 3.940000e+00, %12, !dbg !62
+  %mul8 = fmul double %div, 3.200000e-01, !dbg !63
+  %add9 = fadd double %11, %mul8, !dbg !64
+  %13 = load double, double* %s, align 8, !dbg !65
+  %sub = fsub double %13, %add9, !dbg !65
+  store double %sub, double* %s, align 8, !dbg !65
+  br label %for.inc, !dbg !66
+
+for.inc:                                          ; preds = %for.body
+  %14 = load i32, i32* %u, align 4, !dbg !67
+  %inc = add nsw i32 %14, 1, !dbg !67
+  store i32 %inc, i32* %u, align 4, !dbg !67
+  br label %for.cond, !dbg !68
+
+for.end:                                          ; preds = %for.cond
+  %15 = load double, double* %s, align 8, !dbg !69
+  store double %15, double* %result, align 8, !dbg !70
+  br label %if.end.13, !dbg !71
+
+if.else:                                          ; preds = %if.end
+  %16 = load i8**, i8*** %argv.addr, align 8, !dbg !72
+  %arrayidx10 = getelementptr inbounds i8*, i8** %16, i64 2, !dbg !72
+  %17 = load i8*, i8** %arrayidx10, align 8, !dbg !72
+  %call11 = call i32 @atoi(i8* %17) #4, !dbg !74
+  %conv12 = sitofp i32 %call11 to double, !dbg !74
+  store double %conv12, double* %result, align 8, !dbg !75
+  br label %if.end.13
+
+if.end.13:                                        ; preds = %if.else, %for.end
+  %18 = load double, double* %result, align 8, !dbg !76
+  %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0), double %18), !dbg !77
+  store i32 0, i32* %retval, align 4, !dbg !78
+  br label %return, !dbg !78
+
+return:                                           ; preds = %if.end.13, %if.then
+  %19 = load i32, i32* %retval, align 4, !dbg !79
+  ret i32 %19, !dbg !79
 }
 
-; Function Attrs: nounwind readonly
-declare i32 @atoi(i8* nocapture) #1
-
-; Function Attrs: nounwind
-declare i32 @printf(i8* nocapture readonly, ...) #2
-
 ; Function Attrs: nounwind readnone
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #3 = { nounwind readnone }
+; Function Attrs: nounwind readonly
+declare i32 @atoi(i8*) #2
+
+declare i32 @printf(i8*, ...) #3
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #4 = { nounwind readonly }
 
 !llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!25, !42}
-!llvm.ident = !{!26}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4 (trunk 192896) (llvm/trunk 192895)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "branch.cc", directory: ".")
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5)
+!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "main", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: i32 (i32, i8**)* @main, variables: !12)
-!5 = !DIFile(filename: "branch.cc", directory: ".")
-!6 = !DISubroutineType(types: !7)
-!7 = !{!8, !8, !9}
-!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !10)
-!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
-!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!12 = !{!13, !14, !15, !17, !18, !21, !23}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argc", line: 4, arg: 1, scope: !4, file: !5, type: !8)
-!14 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "argv", line: 4, arg: 2, scope: !4, file: !5, type: !9)
-!15 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "result", line: 7, scope: !4, file: !5, type: !16)
-!16 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!17 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "limit", line: 8, scope: !4, file: !5, type: !8)
-!18 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "s", line: 10, scope: !19, file: !5, type: !16)
-!19 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !20)
-!20 = distinct !DILexicalBlock(line: 9, column: 0, file: !1, scope: !4)
-!21 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "u", line: 11, scope: !22, file: !5, type: !8)
-!22 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !19)
-!23 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "x", line: 12, scope: !24, file: !5, type: !16)
-!24 = distinct !DILexicalBlock(line: 11, column: 0, file: !1, scope: !22)
-!25 = !{i32 2, !"Dwarf Version", i32 4}
-!26 = !{!"clang version 3.4 (trunk 192896) (llvm/trunk 192895)"}
-!27 = !DILocation(line: 4, scope: !4)
-!28 = !DILocation(line: 5, scope: !29)
-!29 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !4)
-!30 = !DILocation(line: 8, scope: !4)
-!31 = !{!32, !32, i64 0}
-!32 = !{!"any pointer", !33, i64 0}
-!33 = !{!"omnipotent char", !34, i64 0}
-!34 = !{!"Simple C/C++ TBAA"}
-!35 = !DILocation(line: 9, scope: !20)
-!36 = !DILocation(line: 13, scope: !24)
-!37 = !DILocation(line: 14, scope: !24)
-!38 = !DILocation(line: 11, scope: !22)
-!39 = !DILocation(line: 20, scope: !4)
-!40 = !DILocation(line: 21, scope: !4)
-!41 = !DILocation(line: 22, scope: !4)
-!42 = !{i32 1, !"Debug Info Version", i32 3}
+!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
+!5 = !{!6}
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !9, !10}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64, align: 64)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64, align: 64)
+!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)"}
+!16 = !DILocalVariable(name: "argc", arg: 1, scope: !6, file: !1, line: 4, type: !9)
+!17 = !DIExpression()
+!18 = !DILocation(line: 4, column: 15, scope: !6)
+!19 = !DILocalVariable(name: "argv", arg: 2, scope: !6, file: !1, line: 4, type: !10)
+!20 = !DILocation(line: 4, column: 27, scope: !6)
+!21 = !DILocation(line: 5, column: 8, scope: !22)
+!22 = distinct !DILexicalBlock(scope: !6, file: !1, line: 5, column: 8)
+!23 = !DILocation(line: 5, column: 13, scope: !22)
+!24 = !DILocation(line: 5, column: 8, scope: !6)
+!25 = !DILocation(line: 6, column: 6, scope: !22)
+!26 = !DILocalVariable(name: "result", scope: !6, file: !1, line: 7, type: !4)
+!27 = !DILocation(line: 7, column: 11, scope: !6)
+!28 = !DILocalVariable(name: "limit", scope: !6, file: !1, line: 8, type: !9)
+!29 = !DILocation(line: 8, column: 8, scope: !6)
+!30 = !DILocation(line: 8, column: 21, scope: !6)
+!31 = !DILocation(line: 8, column: 16, scope: !6)
+!32 = !DILocation(line: 9, column: 8, scope: !33)
+!33 = distinct !DILexicalBlock(scope: !6, file: !1, line: 9, column: 8)
+!34 = !DILocation(line: 9, column: 14, scope: !33)
+!35 = !DILocation(line: 9, column: 8, scope: !6)
+!36 = !DILocalVariable(name: "s", scope: !37, file: !1, line: 10, type: !4)
+!37 = distinct !DILexicalBlock(scope: !33, file: !1, line: 9, column: 21)
+!38 = !DILocation(line: 10, column: 13, scope: !37)
+!39 = !DILocation(line: 10, column: 34, scope: !37)
+!40 = !DILocation(line: 10, column: 29, scope: !37)
+!41 = !DILocation(line: 10, column: 27, scope: !37)
+!42 = !DILocalVariable(name: "u", scope: !43, file: !1, line: 11, type: !9)
+!43 = distinct !DILexicalBlock(scope: !37, file: !1, line: 11, column: 6)
+!44 = !DILocation(line: 11, column: 15, scope: !43)
+!45 = !DILocation(line: 11, column: 11, scope: !43)
+!46 = !DILocation(line: 11, column: 22, scope: !47)
+!47 = distinct !DILexicalBlock(scope: !43, file: !1, line: 11, column: 6)
+!48 = !DILocation(line: 11, column: 26, scope: !47)
+!49 = !DILocation(line: 11, column: 24, scope: !47)
+!50 = !DILocation(line: 11, column: 6, scope: !43)
+!51 = !DILocalVariable(name: "x", scope: !52, file: !1, line: 12, type: !4)
+!52 = distinct !DILexicalBlock(scope: !47, file: !1, line: 11, column: 38)
+!53 = !DILocation(line: 12, column: 15, scope: !52)
+!54 = !DILocation(line: 12, column: 19, scope: !52)
+!55 = !DILocation(line: 13, column: 12, scope: !52)
+!56 = !DILocation(line: 13, column: 14, scope: !52)
+!57 = !DILocation(line: 13, column: 32, scope: !52)
+!58 = !DILocation(line: 13, column: 22, scope: !52)
+!59 = !DILocation(line: 13, column: 10, scope: !52)
+!60 = !DILocation(line: 14, column: 13, scope: !52)
+!61 = !DILocation(line: 14, column: 24, scope: !52)
+!62 = !DILocation(line: 14, column: 22, scope: !52)
+!63 = !DILocation(line: 14, column: 26, scope: !52)
+!64 = !DILocation(line: 14, column: 15, scope: !52)
+!65 = !DILocation(line: 14, column: 10, scope: !52)
+!66 = !DILocation(line: 15, column: 6, scope: !52)
+!67 = !DILocation(line: 11, column: 34, scope: !47)
+!68 = !DILocation(line: 11, column: 6, scope: !47)
+!69 = !DILocation(line: 16, column: 15, scope: !37)
+!70 = !DILocation(line: 16, column: 13, scope: !37)
+!71 = !DILocation(line: 17, column: 4, scope: !37)
+!72 = !DILocation(line: 18, column: 20, scope: !73)
+!73 = distinct !DILexicalBlock(scope: !33, file: !1, line: 17, column: 11)
+!74 = !DILocation(line: 18, column: 15, scope: !73)
+!75 = !DILocation(line: 18, column: 13, scope: !73)
+!76 = !DILocation(line: 20, column: 30, scope: !6)
+!77 = !DILocation(line: 20, column: 4, scope: !6)
+!78 = !DILocation(line: 21, column: 4, scope: !6)
+!79 = !DILocation(line: 22, column: 2, scope: !6)
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 10f43a1a6126..53ea9297d7d0 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -24,7 +24,7 @@
 @.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
 
 ; Function Attrs: nounwind uwtable
-define i32 @_Z3sumii(i32 %x, i32 %y) {
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
 entry:
   %x.addr = alloca i32, align 4
   %y.addr = alloca i32, align 4
@@ -37,7 +37,7 @@ entry:
 }
 
 ; Function Attrs: uwtable
-define i32 @main() {
+define i32 @main() !dbg !7 {
 entry:
   %retval = alloca i32, align 4
   %s = alloca i32, align 4
@@ -52,8 +52,8 @@ while.cond:                                       ; preds = %if.end, %entry
   store i32 %inc, i32* %i, align 4, !dbg !14
   %cmp = icmp slt i32 %0, 400000000, !dbg !14
   br i1 %cmp, label %while.body, label %while.end, !dbg !14
-; CHECK: edge while.cond -> while.body probability is 5391 / 5391 = 100% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 0 / 5391 = 0%
+; CHECK: edge while.cond -> while.body probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x00000000 / 0x80000000 = 0.00%
 
 while.body:                                       ; preds = %while.cond
   %1 = load i32, i32* %i, align 4, !dbg !16
@@ -63,8 +63,8 @@ while.body:                                       ; preds = %while.cond
 ; both branches out of while.body had the same weight. In reality,
 ; the edge while.body->if.then is taken most of the time.
 ;
-; CHECK: edge while.body -> if.then probability is 5752 / 5752 = 100% [HOT edge]
-; CHECK: edge while.body -> if.else probability is 0 / 5752 = 0%
+; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00%
 
 
 if.then:                                          ; preds = %while.body
@@ -92,14 +92,14 @@ declare i32 @printf(i8*, ...) #2
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "calls.cc", directory: ".")
 !2 = !{}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i32 (i32, i32)* @_Z3sumii, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "calls.cc", directory: ".")
 !6 = !DISubroutineType(types: !2)
-!7 = !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
 !8 = !{i32 2, !"Dwarf Version", i32 4}
 !9 = !{i32 1, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/cov-zero-samples.ll b/test/Transforms/SampleProfile/cov-zero-samples.ll
new file mode 100644
index 000000000000..d81e6438ee01
--- /dev/null
+++ b/test/Transforms/SampleProfile/cov-zero-samples.ll
@@ -0,0 +1,142 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: remark: cov-zero-samples.cc:9:25: Applied 404065 samples from profile (offset: 2.1)
+; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3)
+; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1)
+; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4)
+; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6)
+; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7)
+; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3
+; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9
+;
+; Coverage for this profile should be 100%
+; CHECK-NOT: warning: cov-zero-samples.cc:1:
+
+@N = global i64 8000000000, align 8
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z12never_calledi(i32 %i) !dbg !4 {
+entry:
+  ret i32 0, !dbg !32
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+; Function Attrs: norecurse uwtable
+define i32 @main() !dbg !8 {
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i32, align 4
+  %i = alloca i64, align 8
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i32* %sum, metadata !33, metadata !19), !dbg !34
+  store i32 0, i32* %sum, align 4, !dbg !34
+  call void @llvm.dbg.declare(metadata i64* %i, metadata !35, metadata !19), !dbg !37
+  store i64 0, i64* %i, align 8, !dbg !37
+  br label %for.cond, !dbg !38
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i64, i64* %i, align 8, !dbg !39
+  %1 = load volatile i64, i64* @N, align 8, !dbg !42
+  %cmp = icmp slt i64 %0, %1, !dbg !43
+  br i1 %cmp, label %for.body, label %for.end, !dbg !44
+
+for.body:                                         ; preds = %for.cond
+  %2 = load i64, i64* %i, align 8, !dbg !45
+  %3 = load volatile i64, i64* @N, align 8, !dbg !48
+  %cmp1 = icmp sgt i64 %2, %3, !dbg !49
+  br i1 %cmp1, label %if.then, label %if.end, !dbg !50
+
+if.then:                                          ; preds = %for.body
+  %4 = load i64, i64* %i, align 8, !dbg !51
+  %conv = trunc i64 %4 to i32, !dbg !51
+  %call = call i32 @_Z12never_calledi(i32 %conv), !dbg !53
+  %5 = load i32, i32* %sum, align 4, !dbg !54
+  %add = add nsw i32 %5, %call, !dbg !54
+  store i32 %add, i32* %sum, align 4, !dbg !54
+  br label %if.end, !dbg !55
+
+if.end:                                           ; preds = %if.then, %for.body
+  %6 = load i64, i64* %i, align 8, !dbg !56
+  %div = sdiv i64 %6, 239, !dbg !57
+  %7 = load i32, i32* %sum, align 4, !dbg !58
+  %conv2 = sext i32 %7 to i64, !dbg !58
+  %mul = mul nsw i64 %conv2, %div, !dbg !58
+  %conv3 = trunc i64 %mul to i32, !dbg !58
+  store i32 %conv3, i32* %sum, align 4, !dbg !58
+  br label %for.inc, !dbg !59
+
+for.inc:                                          ; preds = %if.end
+  %8 = load i64, i64* %i, align 8, !dbg !60
+  %inc = add nsw i64 %8, 1, !dbg !60
+  store i64 %inc, i64* %i, align 8, !dbg !60
+  br label %for.cond, !dbg !62
+
+for.end:                                          ; preds = %for.cond
+  %9 = load i32, i32* %sum, align 4, !dbg !63
+  %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %9), !dbg !64
+  ret i32 0, !dbg !65
+}
+
+declare i32 @printf(i8*, ...)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!15, !16}
+!llvm.ident = !{!17}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !11)
+!1 = !DIFile(filename: "cov-zero-samples.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!7}
+!11 = !{!12}
+!12 = !DIGlobalVariable(name: "N", scope: !0, file: !1, line: 3, type: !13, isLocal: false, isDefinition: true, variable: i64* @N)
+!13 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !14)
+!14 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!15 = !{i32 2, !"Dwarf Version", i32 4}
+!16 = !{i32 2, !"Debug Info Version", i32 3}
+!17 = !{!"clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)"}
+!19 = !DIExpression()
+!31 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 3)
+!32 = !DILocation(line: 5, column: 27, scope: !31)
+!33 = !DILocalVariable(name: "sum", scope: !8, file: !1, line: 8, type: !7)
+!34 = !DILocation(line: 8, column: 7, scope: !8)
+!35 = !DILocalVariable(name: "i", scope: !36, file: !1, line: 9, type: !14)
+!36 = distinct !DILexicalBlock(scope: !8, file: !1, line: 9, column: 3)
+!37 = !DILocation(line: 9, column: 18, scope: !36)
+!38 = !DILocation(line: 9, column: 8, scope: !36)
+!39 = !DILocation(line: 9, column: 25, scope: !40)
+!40 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 1)
+!41 = distinct !DILexicalBlock(scope: !36, file: !1, line: 9, column: 3)
+!42 = !DILocation(line: 9, column: 29, scope: !40)
+!43 = !DILocation(line: 9, column: 27, scope: !40)
+!44 = !DILocation(line: 9, column: 3, scope: !40)
+!45 = !DILocation(line: 10, column: 9, scope: !46)
+!46 = distinct !DILexicalBlock(scope: !47, file: !1, line: 10, column: 9)
+!47 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 37)
+!48 = !DILocation(line: 10, column: 13, scope: !46)
+!49 = !DILocation(line: 10, column: 11, scope: !46)
+!50 = !DILocation(line: 10, column: 9, scope: !47)
+!51 = !DILocation(line: 10, column: 36, scope: !52)
+!52 = !DILexicalBlockFile(scope: !46, file: !1, discriminator: 1)
+!53 = !DILocation(line: 10, column: 23, scope: !52)
+!54 = !DILocation(line: 10, column: 20, scope: !52)
+!55 = !DILocation(line: 10, column: 16, scope: !52)
+!56 = !DILocation(line: 11, column: 12, scope: !47)
+!57 = !DILocation(line: 11, column: 14, scope: !47)
+!58 = !DILocation(line: 11, column: 9, scope: !47)
+!59 = !DILocation(line: 12, column: 3, scope: !47)
+!60 = !DILocation(line: 9, column: 33, scope: !61)
+!61 = !DILexicalBlockFile(scope: !41, file: !1, discriminator: 2)
+!62 = !DILocation(line: 9, column: 3, scope: !61)
+!63 = !DILocation(line: 13, column: 25, scope: !8)
+!64 = !DILocation(line: 13, column: 3, scope: !8)
+!65 = !DILocation(line: 14, column: 3, scope: !8)
diff --git a/test/Transforms/SampleProfile/coverage-warning.ll b/test/Transforms/SampleProfile/coverage-warning.ll
new file mode 100644
index 000000000000..14a2710b0810
--- /dev/null
+++ b/test/Transforms/SampleProfile/coverage-warning.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+define i32 @foo(i32 %i) !dbg !4 {
+; The profile has samples for line locations that are no longer present.
+; Coverage does not reach 90%, so we should get this warning:
+;
+; CHECK: warning: coverage-warning.c:1: 2 of 3 available profile records (66%) were applied
+; CHECK: warning: coverage-warning.c:1: 29000 of 30700 available profile samples (94%) were applied
+entry:
+  %retval = alloca i32, align 4
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
+  %0 = load i32, i32* %i.addr, align 4, !dbg !9
+  %cmp = icmp sgt i32 %0, 1000, !dbg !10
+  br i1 %cmp, label %if.then, label %if.end, !dbg !9
+
+if.then:                                          ; preds = %entry
+  store i32 30, i32* %retval, align 4, !dbg !11
+  br label %return, !dbg !11
+
+if.end:                                           ; preds = %entry
+  store i32 3, i32* %retval, align 4, !dbg !12
+  br label %return, !dbg !12
+
+return:                                           ; preds = %if.end, %if.then
+  %1 = load i32, i32* %retval, align 4, !dbg !13
+  ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage-warning.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !2)
+!6 = !{i32 2, !"Dwarf Version", i32 4}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)"}
+!9 = !DILocation(line: 2, column: 7, scope: !4)
+!10 = !DILocation(line: 2, column: 9, scope: !4)
+!11 = !DILocation(line: 3, column: 5, scope: !4)
+!12 = !DILocation(line: 4, column: 3, scope: !4)
+!13 = !DILocation(line: 5, column: 1, scope: !4)
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 2445c5c5d609..0915fc884f82 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -21,7 +21,7 @@
 ; This means that the predicate 'i < 5' (line 3) is executed 100 times,
 ; but the then branch (line 3.1) is only executed 5 times.
 
-define i32 @foo(i32 %i) #0 {
+define i32 @foo(i32 %i) #0 !dbg !4 {
 ; CHECK: Printing analysis 'Branch Probability Analysis' for function 'foo':
 entry:
   %i.addr = alloca i32, align 4
@@ -34,15 +34,15 @@ while.cond:                                       ; preds = %if.end, %entry
   %0 = load i32, i32* %i.addr, align 4, !dbg !12
   %cmp = icmp slt i32 %0, 100, !dbg !12
   br i1 %cmp, label %while.body, label %while.end, !dbg !12
-; CHECK: edge while.cond -> while.body probability is 100 / 101 = 99.0099% [HOT edge]
-; CHECK: edge while.cond -> while.end probability is 1 / 101 = 0.990099%
+; CHECK: edge while.cond -> while.body probability is 0x7ebb907a / 0x80000000 = 99.01% [HOT edge]
+; CHECK: edge while.cond -> while.end probability is 0x01446f86 / 0x80000000 = 0.99%
 
 while.body:                                       ; preds = %while.cond
   %1 = load i32, i32* %i.addr, align 4, !dbg !14
   %cmp1 = icmp slt i32 %1, 50, !dbg !14
   br i1 %cmp1, label %if.then, label %if.end, !dbg !14
-; CHECK: edge while.body -> if.then probability is 5 / 100 = 5%
-; CHECK: edge while.body -> if.end probability is 95 / 100 = 95% [HOT edge]
+; CHECK: edge while.body -> if.then probability is 0x06666666 / 0x80000000 = 5.00%
+; CHECK: edge while.body -> if.end probability is 0x7999999a / 0x80000000 = 95.00% [HOT edge]
 
 if.then:                                          ; preds = %while.body
   %2 = load i32, i32* %x, align 4, !dbg !17
@@ -66,11 +66,11 @@ while.end:                                        ; preds = %while.cond
 !llvm.module.flags = !{!7, !8}
 !llvm.ident = !{!9}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "discriminator.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, function: i32 (i32)* @foo, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "discriminator.c", directory: ".")
 !6 = !DISubroutineType(types: !2)
 !7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll
index bf66e693a9d1..50cd575295a9 100644
--- a/test/Transforms/SampleProfile/entry_counts.ll
+++ b/test/Transforms/SampleProfile/entry_counts.ll
@@ -3,7 +3,7 @@
 ; According to the profile, function empty() was called 13,293 times.
 ; CHECK: {{.*}} = !{!"function_entry_count", i64 13293}
 
-define void @empty() {
+define void @empty() !dbg !4 {
 entry:
   ret void, !dbg !9
 }
@@ -12,11 +12,11 @@ entry:
 !llvm.module.flags = !{!6, !7}
 !llvm.ident = !{!8}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
-!1 = !DIFile(filename: "entry_counts.c", directory: "/usr/local/google/home/dnovillo/llvm/test/pgo")
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "entry_counts.c", directory: ".")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, function: void ()* @empty, variables: !2)
+!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !2)
 !6 = !{i32 2, !"Dwarf Version", i32 4}
 !7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
index 368da42fc8a1..7b07ca9679bb 100644
--- a/test/Transforms/SampleProfile/fnptr.ll
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -5,12 +5,12 @@
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
 ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
 
-; CHECK:   edge for.body3 -> if.then probability is 534 / 2598 = 20.5543%
-; CHECK:   edge for.body3 -> if.else probability is 2064 / 2598 = 79.4457%
-; CHECK:   edge for.inc -> for.inc12 probability is 1052 / 2598 = 40.4927%
-; CHECK:   edge for.inc -> for.body3 probability is 1546 / 2598 = 59.5073%
-; CHECK:   edge for.inc12 -> for.end14 probability is 518 / 1052 = 49.2395%
-; CHECK:   edge for.inc12 -> for.cond1.preheader probability is 534 / 1052 = 50.7605%
+; CHECK:   edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55%
+; CHECK:   edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45%
+; CHECK:   edge for.inc -> for.inc12 probability is 0x33d4a4c1 / 0x80000000 = 40.49%
+; CHECK:   edge for.inc -> for.body3 probability is 0x4c2b5b3f / 0x80000000 = 59.51%
+; CHECK:   edge for.inc12 -> for.end14 probability is 0x3f06d04e / 0x80000000 = 49.24%
+; CHECK:   edge for.inc12 -> for.cond1.preheader probability is 0x40f92fb2 / 0x80000000 = 50.76%
 
 ; Original C++ test case.
 ;
@@ -46,7 +46,7 @@
 
 @.str = private unnamed_addr constant [9 x i8] c"S = %lf\0A\00", align 1
 
-define double @_Z3fooi(i32 %x) #0 {
+define double @_Z3fooi(i32 %x) #0 !dbg !3 {
 entry:
   %conv = sitofp i32 %x to double, !dbg !2
   %call = tail call double @sin(double %conv) #3, !dbg !8
@@ -56,7 +56,7 @@ entry:
 
 declare double @sin(double) #1
 
-define double @_Z3bari(i32 %x) #0 {
+define double @_Z3bari(i32 %x) #0 !dbg !10 {
 entry:
   %conv = sitofp i32 %x to double, !dbg !9
   %call = tail call double @cos(double %conv) #3, !dbg !11
@@ -66,7 +66,7 @@ entry:
 
 declare double @cos(double) #1
 
-define i32 @main() #2 {
+define i32 @main() #2 !dbg !13 {
 entry:
   br label %for.cond1.preheader, !dbg !12
 
@@ -130,17 +130,17 @@ declare i32 @printf(i8* nocapture readonly, ...) #1
 !0 = !{i32 2, !"Debug Info Version", i32 3}
 !1 = !{!"clang version 3.6.0 "}
 !2 = !DILocation(line: 9, column: 3, scope: !3)
-!3 = !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3fooi, variables: !7)
+!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, variables: !7)
 !4 = !DIFile(filename: "fnptr.cc", directory: ".")
 !5 = !DIFile(filename: "fnptr.cc", directory: ".")
 !6 = !DISubroutineType(types: !7)
 !7 = !{}
 !8 = !DILocation(line: 9, column: 14, scope: !3)
 !9 = !DILocation(line: 13, column: 3, scope: !10)
-!10 = !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, function: double (i32)* @_Z3bari, variables: !7)
+!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, variables: !7)
 !11 = !DILocation(line: 13, column: 14, scope: !10)
 !12 = !DILocation(line: 19, column: 3, scope: !13)
-!13 = !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, function: i32 ()* @main, variables: !7)
+!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, variables: !7)
 !14 = !DILocation(line: 20, column: 5, scope: !13)
 !15 = !DILocation(line: 21, column: 15, scope: !13)
 !16 = !DILocation(line: 22, column: 11, scope: !13)
diff --git a/test/Transforms/SampleProfile/gcc-simple.ll b/test/Transforms/SampleProfile/gcc-simple.ll
new file mode 100644
index 000000000000..1ae927158c11
--- /dev/null
+++ b/test/Transforms/SampleProfile/gcc-simple.ll
@@ -0,0 +1,218 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+; Original code:
+;
+; #include <stdlib.h>
+;
+; long long int foo(long i) {
+;   if (rand() < 500) return 2; else if (rand() > 5000) return 10; else return 90;
+; }
+;
+; int main() {
+;   long long int sum = 0;
+;   for (int k = 0; k < 3000; k++)
+;     for (int i = 0; i < 200000; i++) sum += foo(i);
+;   return sum > 0 ? 0 : 1;
+; }
+;
+; This test was compiled down to bytecode at -O0 to avoid inlining foo() into
+; main(). The profile was generated using a GCC-generated binary (also compiled
+; at -O0). The conversion from the Linux Perf profile to the GCC autofdo
+; profile used the converter at https://github.com/google/autofdo
+;
+; $ gcc -g -O0 gcc-simple.cc -o gcc-simple
+; $ perf record -b ./gcc-simple
+; $ create_gcov --binary=gcc-simple --gcov=gcc-simple.afdo
+
+define i64 @_Z3fool(i64 %i) #0 !dbg !4 {
+; CHECK: !prof ![[EC1:[0-9]+]]
+entry:
+  %retval = alloca i64, align 8
+  %i.addr = alloca i64, align 8
+  store i64 %i, i64* %i.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+  %call = call i32 @rand() #3, !dbg !19
+  %cmp = icmp slt i32 %call, 500, !dbg !21
+  br i1 %cmp, label %if.then, label %if.else, !dbg !22
+; CHECK: !prof ![[PROF1:[0-9]+]]
+
+if.then:                                          ; preds = %entry
+  store i64 2, i64* %retval, align 8, !dbg !23
+  br label %return, !dbg !23
+
+if.else:                                          ; preds = %entry
+  %call1 = call i32 @rand() #3, !dbg !25
+  %cmp2 = icmp sgt i32 %call1, 5000, !dbg !28
+  br i1 %cmp2, label %if.then.3, label %if.else.4, !dbg !29
+; CHECK: !prof ![[PROF2:[0-9]+]]
+
+if.then.3:                                        ; preds = %if.else
+  store i64 10, i64* %retval, align 8, !dbg !30
+  br label %return, !dbg !30
+
+if.else.4:                                        ; preds = %if.else
+  store i64 90, i64* %retval, align 8, !dbg !32
+  br label %return, !dbg !32
+
+return:                                           ; preds = %if.else.4, %if.then.3, %if.then
+  %0 = load i64, i64* %retval, align 8, !dbg !34
+  ret i64 %0, !dbg !34
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: nounwind
+declare i32 @rand() #2
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !9 {
+; CHECK: !prof ![[EC2:[0-9]+]]
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i64, align 8
+  %k = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !35, metadata !17), !dbg !36
+  store i64 0, i64* %sum, align 8, !dbg !36
+  call void @llvm.dbg.declare(metadata i32* %k, metadata !37, metadata !17), !dbg !39
+  store i32 0, i32* %k, align 4, !dbg !39
+  br label %for.cond, !dbg !40
+
+for.cond:                                         ; preds = %for.inc.4, %entry
+  %0 = load i32, i32* %k, align 4, !dbg !41
+  %cmp = icmp slt i32 %0, 3000, !dbg !45
+  br i1 %cmp, label %for.body, label %for.end.6, !dbg !46
+; CHECK: !prof ![[PROF3:[0-9]+]]
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !47, metadata !17), !dbg !49
+  store i32 0, i32* %i, align 4, !dbg !49
+  br label %for.cond.1, !dbg !50
+
+for.cond.1:                                       ; preds = %for.inc, %for.body
+  %1 = load i32, i32* %i, align 4, !dbg !51
+  %cmp2 = icmp slt i32 %1, 200000, !dbg !55
+  br i1 %cmp2, label %for.body.3, label %for.end, !dbg !56
+; CHECK: !prof ![[PROF4:[0-9]+]]
+
+for.body.3:                                       ; preds = %for.cond.1
+  %2 = load i32, i32* %i, align 4, !dbg !57
+  %conv = sext i32 %2 to i64, !dbg !57
+  %call = call i64 @_Z3fool(i64 %conv), !dbg !59
+  %3 = load i64, i64* %sum, align 8, !dbg !60
+  %add = add nsw i64 %3, %call, !dbg !60
+  store i64 %add, i64* %sum, align 8, !dbg !60
+  br label %for.inc, !dbg !61
+
+for.inc:                                          ; preds = %for.body.3
+  %4 = load i32, i32* %i, align 4, !dbg !62
+  %inc = add nsw i32 %4, 1, !dbg !62
+  store i32 %inc, i32* %i, align 4, !dbg !62
+  br label %for.cond.1, !dbg !64
+
+for.end:                                          ; preds = %for.cond.1
+  br label %for.inc.4, !dbg !65
+
+for.inc.4:                                        ; preds = %for.end
+  %5 = load i32, i32* %k, align 4, !dbg !67
+  %inc5 = add nsw i32 %5, 1, !dbg !67
+  store i32 %inc5, i32* %k, align 4, !dbg !67
+  br label %for.cond, !dbg !68
+
+for.end.6:                                        ; preds = %for.cond
+  %6 = load i64, i64* %sum, align 8, !dbg !69
+  %cmp7 = icmp sgt i64 %6, 0, !dbg !70
+  %cond = select i1 %cmp7, i32 0, i32 1, !dbg !69
+  ret i32 %cond, !dbg !71
+}
+
+; CHECK ![[EC1]] = !{!"function_entry_count", i64 24108}
+; CHECK ![[PROF1]] = !{!"branch_weights", i32 1, i32 30124}
+; CHECK ![[PROF2]] = !{!"branch_weights", i32 30177, i32 29579}
+; CHECK ![[EC2]] = !{!"function_entry_count", i64 0}
+; CHECK ![[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK ![[PROF4]] = !{!"branch_weights", i32 1, i32 20238}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 7, scope: !20)
+!20 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 7)
+!21 = !DILocation(line: 4, column: 14, scope: !20)
+!22 = !DILocation(line: 4, column: 7, scope: !4)
+!23 = !DILocation(line: 4, column: 21, scope: !24)
+!24 = !DILexicalBlockFile(scope: !20, file: !1, discriminator: 1)
+!25 = !DILocation(line: 4, column: 40, scope: !26)
+!26 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 2)
+!27 = distinct !DILexicalBlock(scope: !20, file: !1, line: 4, column: 40)
+!28 = !DILocation(line: 4, column: 47, scope: !27)
+!29 = !DILocation(line: 4, column: 40, scope: !20)
+!30 = !DILocation(line: 4, column: 55, scope: !31)
+!31 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 3)
+!32 = !DILocation(line: 4, column: 71, scope: !33)
+!33 = !DILexicalBlockFile(scope: !27, file: !1, discriminator: 4)
+!34 = !DILocation(line: 5, column: 1, scope: !4)
+!35 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!36 = !DILocation(line: 8, column: 17, scope: !9)
+!37 = !DILocalVariable(name: "k", scope: !38, file: !1, line: 9, type: !12)
+!38 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!39 = !DILocation(line: 9, column: 12, scope: !38)
+!40 = !DILocation(line: 9, column: 8, scope: !38)
+!41 = !DILocation(line: 9, column: 19, scope: !42)
+!42 = !DILexicalBlockFile(scope: !43, file: !1, discriminator: 2)
+!43 = !DILexicalBlockFile(scope: !44, file: !1, discriminator: 1)
+!44 = distinct !DILexicalBlock(scope: !38, file: !1, line: 9, column: 3)
+!45 = !DILocation(line: 9, column: 21, scope: !44)
+!46 = !DILocation(line: 9, column: 3, scope: !38)
+!47 = !DILocalVariable(name: "i", scope: !48, file: !1, line: 10, type: !12)
+!48 = distinct !DILexicalBlock(scope: !44, file: !1, line: 10, column: 5)
+!49 = !DILocation(line: 10, column: 14, scope: !48)
+!50 = !DILocation(line: 10, column: 10, scope: !48)
+!51 = !DILocation(line: 10, column: 21, scope: !52)
+!52 = !DILexicalBlockFile(scope: !53, file: !1, discriminator: 5)
+!53 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 1)
+!54 = distinct !DILexicalBlock(scope: !48, file: !1, line: 10, column: 5)
+!55 = !DILocation(line: 10, column: 23, scope: !54)
+!56 = !DILocation(line: 10, column: 5, scope: !48)
+!57 = !DILocation(line: 10, column: 49, scope: !58)
+!58 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 2)
+!59 = !DILocation(line: 10, column: 45, scope: !54)
+!60 = !DILocation(line: 10, column: 42, scope: !54)
+!61 = !DILocation(line: 10, column: 38, scope: !54)
+!62 = !DILocation(line: 10, column: 34, scope: !63)
+!63 = !DILexicalBlockFile(scope: !54, file: !1, discriminator: 4)
+!64 = !DILocation(line: 10, column: 5, scope: !54)
+!65 = !DILocation(line: 10, column: 50, scope: !66)
+!66 = !DILexicalBlockFile(scope: !48, file: !1, discriminator: 3)
+!67 = !DILocation(line: 9, column: 30, scope: !44)
+!68 = !DILocation(line: 9, column: 3, scope: !44)
+!69 = !DILocation(line: 11, column: 10, scope: !9)
+!70 = !DILocation(line: 11, column: 14, scope: !9)
+!71 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-coverage.ll b/test/Transforms/SampleProfile/inline-coverage.ll
new file mode 100644
index 000000000000..7248540b4f7c
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-coverage.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; Original code:
+;
+;     1    #include <stdlib.h>
+;     2
+;     3    long long int foo(long i) {
+;     4      return rand() * i;
+;     5    }
+;     6
+;     7    int main() {
+;     8      long long int sum = 0;
+;     9      for (int i = 0; i < 200000 * 3000; i++)
+;    10        sum += foo(i);
+;    11      return sum > 0 ? 0 : 1;
+;    12    }
+;
+; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' with 172746 samples into 'main'
+; CHECK: remark: coverage.cc:9:19: Applied 23478 samples from profile (offset: 2.1)
+; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
+; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
+; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
+; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3
+;
+; There is one sample record with 0 samples at offset 4 in main() that we never
+; use:
+; CHECK: warning: coverage.cc:7: 4 of 5 available profile records (80%) were applied
+;
+; Since the unused sample record contributes no samples, sample coverage should
+; be 100%. Note that we get this warning because we are requesting an impossible
+; 110% coverage check.
+; CHECK: warning: coverage.cc:7: 78834 of 78834 available profile samples (100%) were applied
+
+define i64 @_Z3fool(i64 %i) !dbg !4 {
+entry:
+  %i.addr = alloca i64, align 8
+  store i64 %i, i64* %i.addr, align 8
+  call void @llvm.dbg.declare(metadata i64* %i.addr, metadata !16, metadata !17), !dbg !18
+  %call = call i32 @rand(), !dbg !19
+  %conv = sext i32 %call to i64, !dbg !19
+  %0 = load i64, i64* %i.addr, align 8, !dbg !20
+  %mul = mul nsw i64 %conv, %0, !dbg !21
+  ret i64 %mul, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+declare i32 @rand()
+
+define i32 @main() !dbg !9 {
+entry:
+  %retval = alloca i32, align 4
+  %sum = alloca i64, align 8
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !23, metadata !17), !dbg !24
+  store i64 0, i64* %sum, align 8, !dbg !24
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !25, metadata !17), !dbg !27
+  store i32 0, i32* %i, align 4, !dbg !27
+  br label %for.cond, !dbg !28
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !29
+  %cmp = icmp slt i32 %0, 600000000, !dbg !32
+  br i1 %cmp, label %for.body, label %for.end, !dbg !33
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4, !dbg !34
+  %conv = sext i32 %1 to i64, !dbg !34
+  %call = call i64 @_Z3fool(i64 %conv), !dbg !35
+  %2 = load i64, i64* %sum, align 8, !dbg !36
+  %add = add nsw i64 %2, %call, !dbg !36
+  store i64 %add, i64* %sum, align 8, !dbg !36
+  br label %for.inc, !dbg !37
+
+for.inc:                                          ; preds = %for.body
+  %3 = load i32, i32* %i, align 4, !dbg !38
+  %inc = add nsw i32 %3, 1, !dbg !38
+  store i32 %inc, i32* %i, align 4, !dbg !38
+  br label %for.cond, !dbg !39
+
+for.end:                                          ; preds = %for.cond
+  %4 = load i64, i64* %sum, align 8, !dbg !40
+  %cmp1 = icmp sgt i64 %4, 0, !dbg !41
+  %cond = select i1 %cmp1, i32 0, i32 1, !dbg !40
+  ret i32 %cond, !dbg !42
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "coverage.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !9}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !8}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!10 = !DISubroutineType(types: !11)
+!11 = !{!12}
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)"}
+!16 = !DILocalVariable(name: "i", arg: 1, scope: !4, file: !1, line: 3, type: !8)
+!17 = !DIExpression()
+!18 = !DILocation(line: 3, column: 24, scope: !4)
+!19 = !DILocation(line: 4, column: 10, scope: !4)
+!20 = !DILocation(line: 4, column: 19, scope: !4)
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !DILocation(line: 4, column: 3, scope: !4)
+!23 = !DILocalVariable(name: "sum", scope: !9, file: !1, line: 8, type: !7)
+!24 = !DILocation(line: 8, column: 17, scope: !9)
+!25 = !DILocalVariable(name: "i", scope: !26, file: !1, line: 9, type: !12)
+!26 = distinct !DILexicalBlock(scope: !9, file: !1, line: 9, column: 3)
+!27 = !DILocation(line: 9, column: 12, scope: !26)
+!28 = !DILocation(line: 9, column: 8, scope: !26)
+!29 = !DILocation(line: 9, column: 19, scope: !30)
+!30 = !DILexicalBlockFile(scope: !31, file: !1, discriminator: 1)
+!31 = distinct !DILexicalBlock(scope: !26, file: !1, line: 9, column: 3)
+!32 = !DILocation(line: 9, column: 21, scope: !30)
+!33 = !DILocation(line: 9, column: 3, scope: !30)
+!34 = !DILocation(line: 10, column: 16, scope: !31)
+!35 = !DILocation(line: 10, column: 12, scope: !31)
+!36 = !DILocation(line: 10, column: 9, scope: !31)
+!37 = !DILocation(line: 10, column: 5, scope: !31)
+!38 = !DILocation(line: 9, column: 39, scope: !31)
+!39 = !DILocation(line: 9, column: 3, scope: !31)
+!40 = !DILocation(line: 11, column: 10, scope: !9)
+!41 = !DILocation(line: 11, column: 14, scope: !9)
+!42 = !DILocation(line: 11, column: 3, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll
new file mode 100644
index 000000000000..16c4e64ec5bb
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-hint.ll
@@ -0,0 +1,38 @@
+; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+;
+; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
+define void @_Z7cold_fnRxi() !dbg !4 {
+entry:
+  ret void, !dbg !29
+}
+
+; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
+define void @_Z6hot_fnRxi() #0 !dbg !10 {
+entry:
+  ret void, !dbg !38
+}
+
+!llvm.module.flags = !{!17, !18}
+!llvm.ident = !{!19}
+
+!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !10, !11, !14}
+!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null, !7, !9}
+!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
+!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!12 = !DISubroutineType(types: !13)
+!13 = !{!8, !8}
+!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{!9}
+!17 = !{i32 2, !"Dwarf Version", i32 4}
+!18 = !{i32 2, !"Debug Info Version", i32 3}
+!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
+!29 = !DILocation(line: 5, column: 1, scope: !4)
+!38 = !DILocation(line: 9, column: 1, scope: !10)
diff --git a/test/Transforms/SampleProfile/inline.ll b/test/Transforms/SampleProfile/inline.ll
new file mode 100644
index 000000000000..590a20f9d1d1
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+
+; Original C++ test case
+;
+; #include <stdio.h>
+;
+; int sum(int x, int y) {
+;   return x + y;
+; }
+;
+; int main() {
+;   int s, i = 0;
+;   while (i++ < 20000 * 20000)
+;     if (i != 100) s = sum(i, s); else s = 30;
+;   printf("sum is %d\n", s);
+;   return 0;
+; }
+;
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3sumii(i32 %x, i32 %y) !dbg !4 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %0 = load i32, i32* %x.addr, align 4, !dbg !11
+  %1 = load i32, i32* %y.addr, align 4, !dbg !11
+  %add = add nsw i32 %0, %1, !dbg !11
+  ret i32 %add, !dbg !11
+}
+
+; Function Attrs: uwtable
+define i32 @main() !dbg !7 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !12
+  br label %while.cond, !dbg !13
+
+while.cond:                                       ; preds = %if.end, %entry
+  %0 = load i32, i32* %i, align 4, !dbg !14
+  %inc = add nsw i32 %0, 1, !dbg !14
+  store i32 %inc, i32* %i, align 4, !dbg !14
+  %cmp = icmp slt i32 %0, 400000000, !dbg !14
+  br i1 %cmp, label %while.body, label %while.end, !dbg !14
+
+while.body:                                       ; preds = %while.cond
+  %1 = load i32, i32* %i, align 4, !dbg !16
+  %cmp1 = icmp ne i32 %1, 100, !dbg !16
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !16
+
+
+if.then:                                          ; preds = %while.body
+  %2 = load i32, i32* %i, align 4, !dbg !18
+  %3 = load i32, i32* %s, align 4, !dbg !18
+  %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !18
+; CHECK-NOT: call i32 @_Z3sumii
+  store i32 %call, i32* %s, align 4, !dbg !18
+  br label %if.end, !dbg !18
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !20
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !22
+
+while.end:                                        ; preds = %while.cond
+  %4 = load i32, i32* %s, align 4, !dbg !24
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !24
+  ret i32 0, !dbg !25
+}
+
+declare i32 @printf(i8*, ...) #2
+
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !7}
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "calls.cc", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = !DILocation(line: 4, scope: !4)
+!12 = !DILocation(line: 8, scope: !7)
+!13 = !DILocation(line: 9, scope: !7)
+!14 = !DILocation(line: 9, scope: !15)
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!16 = !DILocation(line: 10, scope: !17)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !19)
+!19 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !17)
+!20 = !DILocation(line: 10, scope: !21)
+!21 = !DILexicalBlockFile(discriminator: 2, file: !1, scope: !17)
+!22 = !DILocation(line: 10, scope: !23)
+!23 = !DILexicalBlockFile(discriminator: 3, file: !1, scope: !17)
+!24 = !DILocation(line: 11, scope: !7)
+!25 = !DILocation(line: 12, scope: !7)
diff --git a/test/Transforms/SampleProfile/nolocinfo.ll b/test/Transforms/SampleProfile/nolocinfo.ll
new file mode 100644
index 000000000000..08bca20984dd
--- /dev/null
+++ b/test/Transforms/SampleProfile/nolocinfo.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+
+define i32 @foo(i32 %i)  !dbg !4 {
+entry:
+  %i.addr = alloca i32, align 4
+  %0 = load i32, i32* %i.addr, align 4
+  %cmp = icmp sgt i32 %0, 1000
+
+; Remarks for conditional branches need debug location information for the
+; referring branch. When that is not present, the compiler should not abort.
+;
+; CHECK: remark: nolocinfo.c:3:5: most popular destination for conditional branches at <UNKNOWN LOCATION>
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  ret i32 0, !dbg !18
+
+if.end:
+  ret i32 1
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "nolocinfo.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)"}
+!15 = distinct !DILexicalBlock(scope: !4, file: !1, line: 2, column: 7)
+!18 = !DILocation(line: 3, column: 5, scope: !15)
diff --git a/test/Transforms/SampleProfile/offset.ll b/test/Transforms/SampleProfile/offset.ll
new file mode 100644
index 000000000000..499b2826402d
--- /dev/null
+++ b/test/Transforms/SampleProfile/offset.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+
+; Original C++ code for this test case:
+;
+; a.cc:
+; #1
+; #2
+; #3
+; #4
+; #5 int foo(int a) {
+; #6 #include "a.b"
+; #7}
+;
+; a.b:
+; #1 if (a > 0) {
+; #2   return 10;
+; #3 } else {
+; #4   return 20;
+; #5 }
+
+; Function Attrs: nounwind uwtable
+define i32 @_Z3fooi(i32 %a) #0 !dbg !4 {
+entry:
+  %retval = alloca i32, align 4
+  %a.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !11, metadata !12), !dbg !13
+  %0 = load i32, i32* %a.addr, align 4, !dbg !14
+  %cmp = icmp sgt i32 %0, 0, !dbg !18
+  br i1 %cmp, label %if.then, label %if.else, !dbg !19
+; CHECK: edge entry -> if.then probability is 0x0147ae14 / 0x80000000 = 1.00%
+; CHECK: edge entry -> if.else probability is 0x7eb851ec / 0x80000000 = 99.00% [HOT edge]
+
+if.then:                                          ; preds = %entry
+  store i32 10, i32* %retval, align 4, !dbg !20
+  br label %return, !dbg !20
+
+if.else:                                          ; preds = %entry
+  store i32 20, i32* %retval, align 4, !dbg !22
+  br label %return, !dbg !22
+
+return:                                           ; preds = %if.else, %if.then
+  %1 = load i32, i32* %retval, align 4, !dbg !24
+  ret i32 %1, !dbg !24
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "a.cc", directory: "/tmp")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 250750)"}
+!11 = !DILocalVariable(name: "a", arg: 1, scope: !4, file: !1, line: 5, type: !7)
+!12 = !DIExpression()
+!13 = !DILocation(line: 5, column: 13, scope: !4)
+!14 = !DILocation(line: 1, column: 5, scope: !15)
+!15 = distinct !DILexicalBlock(scope: !17, file: !16, line: 1, column: 5)
+!16 = !DIFile(filename: "./a.b", directory: "/tmp")
+!17 = !DILexicalBlockFile(scope: !4, file: !16, discriminator: 0)
+!18 = !DILocation(line: 1, column: 7, scope: !15)
+!19 = !DILocation(line: 1, column: 5, scope: !17)
+!20 = !DILocation(line: 2, column: 3, scope: !21)
+!21 = distinct !DILexicalBlock(scope: !15, file: !16, line: 1, column: 12)
+!22 = !DILocation(line: 4, column: 3, scope: !23)
+!23 = distinct !DILexicalBlock(scope: !15, file: !16, line: 3, column: 8)
+!24 = !DILocation(line: 7, column: 1, scope: !25)
+!25 = !DILexicalBlockFile(scope: !4, file: !1, discriminator: 0)
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index 620d125875f7..eef7b162eb7a 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -40,7 +40,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
 
 ; Function Attrs: nounwind uwtable
-define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 {
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !4 {
 entry:
   %retval = alloca i64, align 8
   %x.addr = alloca i32, align 4
@@ -73,8 +73,8 @@ for.cond:                                         ; preds = %for.inc16, %if.else
   %5 = load i64, i64* %N.addr, align 8, !dbg !15
   %cmp1 = icmp slt i64 %4, %5, !dbg !15
   br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
-; CHECK: edge for.cond -> for.body probability is 10 / 10 = 100% [HOT edge]
-; CHECK: edge for.cond -> for.end18 probability is 0 / 10 = 0%
+; CHECK: edge for.cond -> for.body probability is 0x745d1746 / 0x80000000 = 90.91% [HOT edge]
+; CHECK: edge for.cond -> for.end18 probability is 0x0ba2e8ba / 0x80000000 = 9.09%
 
 for.body:                                         ; preds = %for.cond
   %6 = load i64, i64* %i, align 8, !dbg !18
@@ -82,8 +82,8 @@ for.body:                                         ; preds = %for.cond
   %div = sdiv i64 %7, 3, !dbg !18
   %cmp2 = icmp sgt i64 %6, %div, !dbg !18
   br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
-; CHECK: edge for.body -> if.then3 probability is 1 / 5 = 20%
-; CHECK: edge for.body -> if.end probability is 4 / 5 = 80%
+; CHECK: edge for.body -> if.then3 probability is 0x1999999a / 0x80000000 = 20.00%
+; CHECK: edge for.body -> if.end probability is 0x66666666 / 0x80000000 = 80.00%
 
 if.then3:                                         ; preds = %for.body
   %8 = load i32, i32* %x.addr, align 4, !dbg !21
@@ -97,8 +97,8 @@ if.end:                                           ; preds = %if.then3, %for.body
   %div4 = sdiv i64 %10, 4, !dbg !22
   %cmp5 = icmp sgt i64 %9, %div4, !dbg !22
   br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
-; CHECK: edge if.end -> if.then6 probability is 3 / 6342 = 0.0473037%
-; CHECK: edge if.end -> if.else7 probability is 6339 / 6342 = 99.9527% [HOT edge]
+; CHECK: edge if.end -> if.then6 probability is 0x000f801f / 0x80000000 = 0.05%
+; CHECK: edge if.end -> if.else7 probability is 0x7ff07fe1 / 0x80000000 = 99.95% [HOT edge]
 
 if.then6:                                         ; preds = %if.end
   %11 = load i32, i32* %y.addr, align 4, !dbg !24
@@ -119,8 +119,8 @@ for.cond8:                                        ; preds = %for.inc, %if.else7
   %14 = load i64, i64* %i, align 8, !dbg !28
   %cmp10 = icmp slt i64 %conv9, %14, !dbg !28
   br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
-; CHECK: edge for.cond8 -> for.body11 probability is 16191 / 16191 = 100% [HOT edge]
-; CHECK: edge for.cond8 -> for.end probability is 0 / 16191 = 0%
+; CHECK: edge for.cond8 -> for.body11 probability is 0x5bfc7472 / 0x80000000 = 71.86%
+; CHECK: edge for.cond8 -> for.end probability is 0x24038b8e / 0x80000000 = 28.14%
 
 for.body11:                                       ; preds = %for.cond8
   %15 = load i32, i32* %j, align 4, !dbg !31
@@ -167,7 +167,7 @@ return:                                           ; preds = %if.end19, %if.then
 }
 
 ; Function Attrs: uwtable
-define i32 @main() #1 {
+define i32 @main() #1 !dbg !7 {
 entry:
   %retval = alloca i32, align 4
   %x = alloca i32, align 4
@@ -198,14 +198,14 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!8, !9}
 !llvm.ident = !{!10}
 
-!0 = !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "propagate.cc", directory: ".")
 !2 = !{}
 !3 = !{!4, !7}
-!4 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, function: i64 (i32, i32, i64)* @_Z3fooiil, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
 !5 = !DIFile(filename: "propagate.cc", directory: ".")
 !6 = !DISubroutineType(types: !{null})
-!7 = !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, function: i32 ()* @main, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, variables: !2)
 !8 = !{i32 2, !"Dwarf Version", i32 4}
 !9 = !{i32 1, !"Debug Info Version", i32 3}
 !10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/remarks.ll b/test/Transforms/SampleProfile/remarks.ll
new file mode 100644
index 000000000000..a0e6a9deb8a8
--- /dev/null
+++ b/test/Transforms/SampleProfile/remarks.ll
@@ -0,0 +1,185 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+;
+; Original test case.
+;
+;     1    #include <stdlib.h>
+;     2
+;     3    long long foo() {
+;     4      long long int sum = 0;
+;     5      for (int i = 0; i < 500000000; i++)
+;     6        if (i < 1000)
+;     7          sum -= i;
+;     8        else
+;     9          sum += -i * rand();
+;    10      return sum;
+;    11    }
+;    12
+;    13    int main() { return foo() > 0; }
+
+; We are expecting foo() to be inlined in main() (almost all the cycles are
+; spent inside foo).
+; CHECK: remark: remarks.cc:13:21: inlined hot callee '_Z3foov' with 623868 samples into 'main'
+
+; The back edge for the loop is the hottest edge in the loop subgraph.
+; CHECK: remark: remarks.cc:6:9: most popular destination for conditional branches at remarks.cc:5:3
+
+; The predicate almost always chooses the 'else' branch.
+; CHECK: remark: remarks.cc:9:15: most popular destination for conditional branches at remarks.cc:6:9
+
+; Function Attrs: nounwind uwtable
+define i64 @_Z3foov() #0 !dbg !4 {
+entry:
+  %sum = alloca i64, align 8
+  %i = alloca i32, align 4
+  %0 = bitcast i64* %sum to i8*, !dbg !19
+  call void @llvm.lifetime.start(i64 8, i8* %0) #4, !dbg !19
+  call void @llvm.dbg.declare(metadata i64* %sum, metadata !9, metadata !20), !dbg !21
+  store i64 0, i64* %sum, align 8, !dbg !21, !tbaa !22
+  %1 = bitcast i32* %i to i8*, !dbg !26
+  call void @llvm.lifetime.start(i64 4, i8* %1) #4, !dbg !26
+  call void @llvm.dbg.declare(metadata i32* %i, metadata !10, metadata !20), !dbg !27
+  store i32 0, i32* %i, align 4, !dbg !27, !tbaa !28
+  br label %for.cond, !dbg !26
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %2 = load i32, i32* %i, align 4, !dbg !30, !tbaa !28
+  %cmp = icmp slt i32 %2, 500000000, !dbg !34
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !35
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %3 = bitcast i32* %i to i8*, !dbg !36
+  call void @llvm.lifetime.end(i64 4, i8* %3) #4, !dbg !36
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32, i32* %i, align 4, !dbg !38, !tbaa !28
+  %cmp1 = icmp slt i32 %4, 1000, !dbg !40
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !41
+
+if.then:                                          ; preds = %for.body
+  %5 = load i32, i32* %i, align 4, !dbg !42, !tbaa !28
+  %conv = sext i32 %5 to i64, !dbg !42
+  %6 = load i64, i64* %sum, align 8, !dbg !43, !tbaa !22
+  %sub = sub nsw i64 %6, %conv, !dbg !43
+  store i64 %sub, i64* %sum, align 8, !dbg !43, !tbaa !22
+  br label %if.end, !dbg !44
+
+if.else:                                          ; preds = %for.body
+  %7 = load i32, i32* %i, align 4, !dbg !45, !tbaa !28
+  %sub2 = sub nsw i32 0, %7, !dbg !46
+  %call = call i32 @rand() #4, !dbg !47
+  %mul = mul nsw i32 %sub2, %call, !dbg !48
+  %conv3 = sext i32 %mul to i64, !dbg !46
+  %8 = load i64, i64* %sum, align 8, !dbg !49, !tbaa !22
+  %add = add nsw i64 %8, %conv3, !dbg !49
+  store i64 %add, i64* %sum, align 8, !dbg !49, !tbaa !22
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %for.inc, !dbg !50
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4, !dbg !51, !tbaa !28
+  %inc = add nsw i32 %9, 1, !dbg !51
+  store i32 %inc, i32* %i, align 4, !dbg !51, !tbaa !28
+  br label %for.cond, !dbg !52
+
+for.end:                                          ; preds = %for.cond.cleanup
+  %10 = load i64, i64* %sum, align 8, !dbg !53, !tbaa !22
+  %11 = bitcast i64* %sum to i8*, !dbg !54
+  call void @llvm.lifetime.end(i64 8, i8* %11) #4, !dbg !54
+  ret i64 %10, !dbg !55
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: nounwind
+declare i32 @rand() #3
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define i32 @main() #0 !dbg !13 {
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval, align 4
+  %call = call i64 @_Z3foov(), !dbg !56
+  %cmp = icmp sgt i64 %call, 0, !dbg !57
+  %conv = zext i1 %cmp to i32, !dbg !56
+  ret i32 %conv, !dbg !58
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind argmemonly }
+attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!16, !17}
+!llvm.ident = !{!18}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "remarks.cc", directory: ".")
+!2 = !{}
+!3 = !{!4, !13}
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!8 = !{!9, !10}
+!9 = !DILocalVariable(name: "sum", scope: !4, file: !1, line: 4, type: !7)
+!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12)
+!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3)
+!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!14 = !DISubroutineType(types: !15)
+!15 = !{!12}
+!16 = !{i32 2, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{!"clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)"}
+!19 = !DILocation(line: 4, column: 3, scope: !4)
+!20 = !DIExpression()
+!21 = !DILocation(line: 4, column: 17, scope: !4)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"long long", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 5, column: 8, scope: !11)
+!27 = !DILocation(line: 5, column: 12, scope: !11)
+!28 = !{!29, !29, i64 0}
+!29 = !{!"int", !24, i64 0}
+!30 = !DILocation(line: 5, column: 19, scope: !31)
+!31 = !DILexicalBlockFile(scope: !32, file: !1, discriminator: 3)
+!32 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1)
+!33 = distinct !DILexicalBlock(scope: !11, file: !1, line: 5, column: 3)
+!34 = !DILocation(line: 5, column: 21, scope: !33)
+!35 = !DILocation(line: 5, column: 3, scope: !11)
+!36 = !DILocation(line: 5, column: 3, scope: !37)
+!37 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 2)
+!38 = !DILocation(line: 6, column: 9, scope: !39)
+!39 = distinct !DILexicalBlock(scope: !33, file: !1, line: 6, column: 9)
+!40 = !DILocation(line: 6, column: 11, scope: !39)
+!41 = !DILocation(line: 6, column: 9, scope: !33)
+!42 = !DILocation(line: 7, column: 14, scope: !39)
+!43 = !DILocation(line: 7, column: 11, scope: !39)
+!44 = !DILocation(line: 7, column: 7, scope: !39)
+!45 = !DILocation(line: 9, column: 15, scope: !39)
+!46 = !DILocation(line: 9, column: 14, scope: !39)
+!47 = !DILocation(line: 9, column: 19, scope: !39)
+!48 = !DILocation(line: 9, column: 17, scope: !39)
+!49 = !DILocation(line: 9, column: 11, scope: !39)
+!50 = !DILocation(line: 6, column: 13, scope: !39)
+!51 = !DILocation(line: 5, column: 35, scope: !33)
+!52 = !DILocation(line: 5, column: 3, scope: !33)
+!53 = !DILocation(line: 10, column: 10, scope: !4)
+!54 = !DILocation(line: 11, column: 1, scope: !4)
+!55 = !DILocation(line: 10, column: 3, scope: !4)
+!56 = !DILocation(line: 13, column: 21, scope: !13)
+!57 = !DILocation(line: 13, column: 27, scope: !13)
+!58 = !DILocation(line: 13, column: 14, scope: !13)
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index ed38a175288a..debbc7c87ddb 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -13,7 +13,7 @@ entry:
 }
 ; NO-DEBUG: warning: No debug information found in function empty: Function profile not used
 ; MISSING-FILE: missing.prof: Could not open profile:
-; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found 3empty:100:BAD
+; BAD-FN-HEADER: error: {{.*}}bad_fn_header.prof: Could not open profile: Unrecognized sample profile encoding format
 ; BAD-SAMPLE-LINE: error: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD
 ; BAD-LINE-VALUES: error: {{.*}}bad_line_values.prof:2: Expected 'mangled_name:NUM:NUM', found -1: 10
 ; BAD-DISCRIMINATOR-VALUE: error: {{.*}}bad_discriminator_value.prof:2: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1.-3: 10
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
index 8e5d4ff773dd..4daa610ccdcb 100644
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-apple-macosx10.6.0"
 ; CHECK: llvm.dbg.value
 ; CHECK: llvm.dbg.value
 
-define i32 @f(i32 %a, i32 %b) nounwind ssp {
+define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 {
 entry:
   %a.addr = alloca i32, align 4
   %b.addr = alloca i32, align 4
@@ -42,17 +42,17 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!20}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
-!1 = !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3, function: i32 (i32, i32)* @f)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
+!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3)
 !2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
+!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
 !7 = !DILocation(line: 1, column: 11, scope: !1)
-!8 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
+!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
 !9 = !DILocation(line: 1, column: 18, scope: !1)
-!10 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", line: 2, scope: !11, file: !2, type: !5)
+!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5)
 !11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1)
 !12 = !DILocation(line: 2, column: 9, scope: !11)
 !13 = !DILocation(line: 2, column: 14, scope: !11)
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 3770a3e8c642..09252a09d4b4 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -2,7 +2,7 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
 ; Function Attrs: nounwind uwtable
-define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 {
+define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 !dbg !4 {
 ; CHECK: @f1(
 ; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
 ; CHECK: %a.i1 = getelementptr i32, i32* %a.i0, i32 1
@@ -57,11 +57,11 @@ attributes #1 = { nounwind readnone }
 !llvm.module.flags = !{!18, !26}
 !llvm.ident = !{!19}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
 !2 = !{}
 !3 = !{!4}
-!4 = !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, function: void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, variables: !14)
+!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !14)
 !5 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
 !6 = !DISubroutineType(types: !7)
 !7 = !{null, !8, !8, !8}
@@ -72,9 +72,9 @@ attributes #1 = { nounwind readnone }
 !12 = !{!13}
 !13 = !DISubrange(count: 4)
 !14 = !{!15, !16, !17}
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
-!16 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
-!17 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
+!15 = !DILocalVariable(name: "a", line: 3, arg: 1, scope: !4, file: !5, type: !8)
+!16 = !DILocalVariable(name: "b", line: 3, arg: 2, scope: !4, file: !5, type: !8)
+!17 = !DILocalVariable(name: "c", line: 3, arg: 3, scope: !4, file: !5, type: !8)
 !18 = !{i32 2, !"Dwarf Version", i32 4}
 !19 = !{!"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
 !20 = !DILocation(line: 3, scope: !4)
diff --git a/test/Transforms/Scalarizer/store-bug.ll b/test/Transforms/Scalarizer/store-bug.ll
new file mode 100644
index 000000000000..84c2b3f840a0
--- /dev/null
+++ b/test/Transforms/Scalarizer/store-bug.ll
@@ -0,0 +1,25 @@
+; RUN: opt -scalarizer -scalarize-load-store -S < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This input caused the scalarizer not to clear cached results
+; properly.
+;
+; Any regressions should trigger an assert in the scalarizer.
+
+define void @func(<4 x float> %val, <4 x float> *%ptr) {
+  store <4 x float> %val, <4 x float> *%ptr
+  ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
+
+define void @func.copy(<4 x float> %val, <4 x float> *%ptr) {
+  store <4 x float> %val, <4 x float> *%ptr
+  ret void
+; CHECK: store float %val.i0, float* %ptr.i0, align 16
+; CHECK: store float %val.i1, float* %ptr.i1, align 4
+; CHECK: store float %val.i2, float* %ptr.i2, align 8
+; CHECK: store float %val.i3, float* %ptr.i3, align 4
+}
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
index 527634db0f5b..6f117697dded 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -6,9 +6,9 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
 
 ; IR-LABEL: @sum_of_array(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
 define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
   %tmp = sext i32 %y to i64
   %tmp1 = sext i32 %x to i64
@@ -38,7 +38,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
 
 ; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(2)* [[BASE_PTR]], i64 255
+; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
 ; IR: add i32 %x, 256
 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
 ; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -71,9 +71,9 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
 ; DS instructions have a larger immediate offset, so make sure these are OK.
 ; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 255
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16128
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i32 16383
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
 define void @sum_of_lds_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(1)* nocapture %output) {
   %tmp2 = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %x, i32 %y
   %tmp4 = load float, float addrspace(3)* %tmp2, align 4
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 073313d40e77..e7b3545839c3 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -52,9 +52,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 ; @sum_of_array2 is very similar to @sum_of_array. The only difference is in
 ; the order of "sext" and "add" when computing the array indices. @sum_of_array
@@ -95,9 +95,9 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array2(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 
 ; This function loads
@@ -145,9 +145,9 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array3(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
 
 
 ; This function loads
@@ -191,6 +191,44 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
 
 ; IR-LABEL: @sum_of_array4(
 ; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 1
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 32
-; IR: getelementptr float, float addrspace(3)* [[BASE_PTR]], i64 33
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
+; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
+
+
+; The source code is:
+;   p0 = &input[sext(x + y)];
+;   p1 = &input[sext(x + (y + 5))];
+;
+; Without reuniting extensions, SeparateConstOffsetFromGEP would emit
+;   p0 = &input[sext(x + y)];
+;   t1 = &input[sext(x) + sext(y)];
+;   p1 = &t1[5];
+;
+; With reuniting extensions, it merges p0 and t1 and thus emits
+;   p0 = &input[sext(x + y)];
+;   p1 = &p0[5];
+define void @reunion(i32 %x, i32 %y, float* %input) {
+; IR-LABEL: @reunion(
+; PTX-LABEL: reunion(
+entry:
+  %xy = add nsw i32 %x, %y
+  %0 = sext i32 %xy to i64
+  %p0 = getelementptr inbounds float, float* %input, i64 %0
+  %v0 = load float, float* %p0, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0:%rd[0-9]+]]{{\]}}
+  call void @use(float %v0)
+
+  %y5 = add nsw i32 %y, 5
+  %xy5 = add nsw i32 %x, %y5
+  %1 = sext i32 %xy5 to i64
+  %p1 = getelementptr inbounds float, float* %input, i64 %1
+; IR: getelementptr inbounds float, float* %p0, i64 5
+  %v1 = load float, float* %p1, align 4
+; PTX: ld.f32 %f{{[0-9]+}}, {{\[}}[[p0]]+20{{\]}}
+  call void @use(float %v1)
+
+  ret void
+}
+
+declare void @use(float)
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
index 2fdd158a35ed..eeeac1963741 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll
@@ -44,7 +44,7 @@ entry:
 ; CHECK: add i32 %j, -2
 ; CHECK: sext
 ; CHECK: getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 32
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 32
 
 ; We should be able to trace into sext/zext if it can be distributed to both
 ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b)
@@ -65,7 +65,7 @@ define float* @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) {
 }
 ; CHECK-LABEL: @ext_add_no_overflow(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 33
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 33
 
 ; Verifies we handle nested sext/zext correctly.
 define void @sext_zext(i32 %a, i32 %b, float** %out1, float** %out2) {
@@ -110,7 +110,7 @@ entry:
 }
 ; CHECK-LABEL: @sext_or(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 32
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 32
 
 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b +
 ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't
@@ -125,7 +125,7 @@ entry:
 }
 ; CHECK-LABEL: @expr(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 160
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 160
 ; CHECK: store i64 %b5, i64* %out
 
 ; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8
@@ -143,7 +143,7 @@ entry:
 ; CHECK: sext i32
 ; CHECK: sext i32
 ; CHECK: sext i32
-; CHECK: getelementptr float, float* %{{[a-zA-Z0-9]+}}, i64 8
+; CHECK: getelementptr inbounds float, float* %{{[a-zA-Z0-9]+}}, i64 8
 
 ; Verifies we handle "sub" correctly.
 define float* @sub(i64 %i, i64 %j) {
@@ -155,7 +155,7 @@ define float* @sub(i64 %i, i64 %j) {
 ; CHECK-LABEL: @sub(
 ; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]]* @float_2d_array, i64 0, i64 %i, i64 %[[j2]]
-; CHECK: getelementptr float, float* [[BASE_PTR]], i64 -155
+; CHECK: getelementptr inbounds float, float* [[BASE_PTR]], i64 -155
 
 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed
 
@@ -173,7 +173,7 @@ entry:
 ; CHECK-LABEL: @packed_struct(
 ; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], [1024 x %struct.Packed]* %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}}
 ; CHECK: [[CASTED_PTR:%[a-zA-Z0-9]+]] = bitcast i64* [[BASE_PTR]] to i8*
-; CHECK: %uglygep = getelementptr i8, i8* [[CASTED_PTR]], i64 100
+; CHECK: %uglygep = getelementptr inbounds i8, i8* [[CASTED_PTR]], i64 100
 ; CHECK: bitcast i8* %uglygep to i64*
 
 ; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))",
@@ -272,7 +272,7 @@ entry:
   %ptr2 = getelementptr inbounds %struct0, %struct0* %ptr, i64 0, i32 3, i64 %arrayidx, i32 1
 ; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, %struct0* %ptr, i64 0, i32 3, i64 %idx, i32 1
 ; CHECK: [[PTR1:%[a-zA-Z0-9]+]] = bitcast %struct2* [[PTR]] to i8*
-; CHECK: getelementptr i8, i8* [[PTR1]], i64 -64
+; CHECK: getelementptr inbounds i8, i8* [[PTR1]], i64 -64
 ; CHECK: bitcast
   ret %struct2* %ptr2
 ; CHECK-NEXT: ret
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
index d1a0f33d5a21..601ca5291353 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
@@ -25,7 +25,7 @@ then:
   %or = or i64 %i, 3
   %p = getelementptr inbounds float, float* %input, i64 %or
 ; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr float, float* [[base]], i64 3
+; CHECK: getelementptr inbounds float, float* [[base]], i64 3
   ret float* %p
 
 exit:
diff --git a/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
new file mode 100644
index 000000000000..e32d711143dc
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/AArch64/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=aarch64 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
new file mode 100644
index 000000000000..ffcf2175091f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=arm -mattr=+v6t2 < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/ARM/lit.local.cfg b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
new file mode 100644
index 000000000000..5a3b8565213d
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ARM/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'ARM' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
new file mode 100644
index 000000000000..b4bfb51dd142
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/cttz-ctlz.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -simplifycfg -mtriple=mips-linux-gnu < %s | FileCheck %s
+
+define i32 @ctlz(i32 %A) {
+; CHECK-LABEL: @ctlz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+define i32 @cttz(i32 %A) {
+; CHECK-LABEL: @cttz(
+; CHECK: [[ICMP:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
+; CHECK-NEXT: [[CTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+; CHECK-NEXT: [[SEL:%[A-Za-z0-9.]+]] = select i1 [[ICMP]], i32 32, i32 [[CTZ]]
+; CHECK-NEXT: ret i32 [[SEL]]
+entry:
+  %tobool = icmp eq i32 %A, 0
+  br i1 %tobool, label %cond.end, label %cond.true
+
+cond.true:
+  %0 = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
+  br label %cond.end
+
+cond.end:
+  %cond = phi i32 [ %0, %cond.true ], [ 32, %entry ]
+  ret i32 %cond
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+
diff --git a/test/Transforms/SimplifyCFG/Mips/lit.local.cfg b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
new file mode 100644
index 000000000000..683bfdccb742
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/Mips/lit.local.cfg
@@ -0,0 +1,5 @@
+config.suffixes = ['.ll']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Mips' in targets:
+    config.unsupported = True
diff --git a/test/Transforms/SimplifyCFG/PR25267.ll b/test/Transforms/SimplifyCFG/PR25267.ll
new file mode 100644
index 000000000000..a13d45a0f271
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR25267.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @f() {
+entry:
+  br label %for.cond
+
+for.cond:
+  %phi = phi i1 [ false, %entry ], [ true, %for.body ]
+  %select = select i1 %phi, i32 1, i32 2
+  br label %for.body
+
+for.body:
+  switch i32 %select, label %for.cond [
+    i32 1, label %return
+    i32 2, label %for.body
+  ]
+
+return:
+  ret void
+}
+
+; CHECK-LABEL: define void @f(
+; CHECK: br label %[[LABEL:.*]]
+; CHECK: br label %[[LABEL]]
diff --git a/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
index c23a96df52ee..73f9a0f88aca 100644
--- a/test/Transforms/SimplifyCFG/SpeculativeExec.ll
+++ b/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -69,3 +69,29 @@ end:
 
   ret i8* %x10
 }
+
+define i32* @test5(i32 %a, i32 %b, i32 %c, i32* dereferenceable(10) %ptr1,
+                  i32* dereferenceable(10) %ptr2, i32** dereferenceable(10) %ptr3) nounwind {
+; CHECK-LABEL: @test5(
+entry:
+        %tmp1 = icmp eq i32 %b, 0
+        br i1 %tmp1, label %bb1, label %bb3
+
+bb1:            ; preds = %entry
+	%tmp2 = icmp sgt i32 %c, 1
+	br i1 %tmp2, label %bb2, label %bb3
+; CHECK: bb1:
+; CHECK-NEXT: icmp sgt i32 %c, 1
+; CHECK-NEXT: load i32*, i32** %ptr3
+; CHECK-NOT: dereferenceable
+; CHECK-NEXT: select i1 %tmp2, i32* %tmp3, i32* %ptr2
+; CHECK-NEXT: ret i32* %tmp3.ptr2
+
+bb2:		; preds = bb1
+        %tmp3 = load i32*, i32** %ptr3, !dereferenceable !{i64 10}
+	br label %bb3
+
+bb3:		; preds = %bb2, %entry
+	%tmp4 = phi i32* [ %ptr1, %entry ], [ %ptr2, %bb1 ], [ %tmp3, %bb2 ]
+	ret i32* %tmp4
+}
diff --git a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
index 69f6c69059d4..bee80e6acce0 100644
--- a/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
+++ b/test/Transforms/SimplifyCFG/X86/speculate-cttz-ctlz.ll
@@ -7,9 +7,7 @@ define i64 @test1(i64 %A) {
 ; ALL-LABEL: @test1(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.ctlz.i64(i64 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -28,9 +26,7 @@ define i32 @test2(i32 %A) {
 ; ALL-LABEL: @test2(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.ctlz.i32(i32 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -50,9 +46,7 @@ define signext i16 @test3(i16 signext %A) {
 ; ALL-LABEL: @test3(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTLZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.ctlz.i16(i16 %A, i1 true)
-; LZCNT-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
-; BMI-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTLZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
@@ -72,9 +66,7 @@ define i64 @test1b(i64 %A) {
 ; ALL-LABEL: @test1b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i64 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i64 @llvm.cttz.i64(i64 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i64 64, i64 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i64 %A, 0
@@ -94,9 +86,7 @@ define i32 @test2b(i32 %A) {
 ; ALL-LABEL: @test2b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i32 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i32 @llvm.cttz.i32(i32 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i32 32, i32 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i32 %A, 0
@@ -116,9 +106,7 @@ define signext i16 @test3b(i16 signext %A) {
 ; ALL-LABEL: @test3b(
 ; ALL: [[COND:%[A-Za-z0-9]+]] = icmp eq i16 %A, 0
 ; ALL: [[CTTZ:%[A-Za-z0-9]+]] = tail call i16 @llvm.cttz.i16(i16 %A, i1 true)
-; BMI-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
-; LZCNT-NOT: select
-; GENERIC-NOT: select
+; ALL-NEXT: select i1 [[COND]], i16 16, i16 [[CTTZ]]
 ; ALL: ret
 entry:
   %tobool = icmp eq i16 %A, 0
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index d228499b2ec5..d4a9c81e506d 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -50,7 +50,7 @@ define i8 @test6f() {
 ; CHECK: alloca i8, align 1
 ; CHECK-NEXT: call i8 @test6g
 ; CHECK-NEXT: icmp eq i8 %tmp, 0
-; CHECK-NEXT: load i8, i8* %r, align 1{{$}}
+; CHECK-NEXT: load i8, i8* %r, align 1, !dbg !{{[0-9]+$}}
 
 bb0:
   %r = alloca i8, align 1
@@ -58,7 +58,7 @@ bb0:
   %tmp1 = icmp eq i8 %tmp, 0
   br i1 %tmp1, label %bb2, label %bb1
 bb1:
-  %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1
+  %tmp3 = load i8, i8* %r, align 1, !range !2, !tbaa !1, !dbg !5
   %tmp4 = icmp eq i8 %tmp3, 1
   br i1 %tmp4, label %bb2, label %bb3
 bb2:
@@ -69,6 +69,16 @@ bb3:
 }
 declare i8 @test6g(i8*)
 
+!llvm.dbg.cu = !{!3}
+!llvm.module.flags = !{!8, !9}
+
 !0 = !{!1, !1, i64 0}
 !1 = !{!"foo"}
 !2 = !{i8 0, i8 2}
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !7, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !4, subprograms: !4, globals: !4)
+!4 = !{}
+!5 = !DILocation(line: 23, scope: !6)
+!6 = distinct !DISubprogram(name: "foo", scope: !3, file: !7, line: 1, type: !DISubroutineType(types: !4), isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !4)
+!7 = !DIFile(filename: "foo.c", directory: "/")
+!8 = !{i32 2, !"Dwarf Version", i32 2}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index fac5b186e89d..34871063bbcc 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -4,7 +4,7 @@
 
 @0 = external hidden constant [5 x %0], align 4
 
-define void @foo(i32) nounwind ssp {
+define void @foo(i32) nounwind ssp !dbg !0 {
 Entry:
   %1 = icmp slt i32 %0, 0, !dbg !5
   br i1 %1, label %BB5, label %BB1, !dbg !5
@@ -41,14 +41,14 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3, function: void (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3)
 !1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
 !5 = !DILocation(line: 131, column: 2, scope: !0)
 !6 = !DILocation(line: 134, column: 2, scope: !0)
-!7 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bar", line: 232, scope: !8, file: !1, type: !9)
+!7 = !DILocalVariable(name: "bar", line: 232, scope: !8, file: !1, type: !9)
 !8 = distinct !DILexicalBlock(line: 231, column: 1, file: !15, scope: !0)
 !9 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, scope: !2, baseType: !10)
 !10 = !DIDerivedType(tag: DW_TAG_const_type, scope: !2, baseType: !11)
diff --git a/test/Transforms/SimplifyCFG/empty-cleanuppad.ll b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
new file mode 100644
index 000000000000..57b362889955
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
@@ -0,0 +1,415 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; ModuleID = 'cppeh-simplify.cpp'
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+
+; This case arises when two objects with empty destructors are cleaned up.
+;
+; void f1() { 
+;   S a;
+;   S b;
+;   g(); 
+; }
+;
+; In this case, both cleanup pads can be eliminated and the invoke can be
+; converted to a call.
+;
+; CHECK: define void @f1()
+; CHECK: entry:
+; CHECK:   call void @g()
+; CHECK:   ret void
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f1() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  ret void
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind label %ehcleanup.1
+
+ehcleanup.1:                                      ; preds = %ehcleanup
+  %1 = cleanuppad within none []
+  cleanupret from %1 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor must be cleaned up
+; outside of a try-block and an object with a non-empty destructor must be
+; cleaned up within the try-block.
+;
+; void f2() { 
+;   S a;
+;   try {
+;     S2 b;
+;     g();
+;   } catch (...) {}
+; }
+;
+; In this case, the outermost cleanup pad can be eliminated and the catch block
+; should unwind to the caller (that is, exception handling continues with the
+; parent frame of the caller).
+;
+; CHECK: define void @f2()
+; CHECK: entry:
+; CHECK:   invoke void @g()
+; CHECK: ehcleanup:
+; CHECK:   cleanuppad within none
+; CHECK:   call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+; CHECK:   cleanupret from %0 unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK:   catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK:   catchpad
+; CHECK:   catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f2() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %b = alloca %struct.S2, align 1
+  invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %b)
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %ehcleanup
+  %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  catchret from %1 to label %catchret.dest
+
+catchret.dest:                                    ; preds = %catch
+  br label %try.cont
+
+try.cont:                                         ; preds = %catchret.dest, %invoke.cont
+  ret void
+
+ehcleanup.1:
+  %2 = cleanuppad within none []
+  cleanupret from %2 unwind to caller
+}
+
+
+; This case arises when an object with a non-empty destructor must be cleaned up
+; outside of a try-block and an object with an empty destructor must be cleaned
+; within the try-block.
+;
+; void f3() { 
+;   S2 a;
+;   try {
+;     S b;
+;     g();
+;   } catch (...) {}
+; }
+;
+; In this case the inner cleanup pad should be eliminated and the invoke of g()
+; should unwind directly to the catchpad.
+;
+; CHECK-LABEL: define void @f3()
+; CHECK: entry:
+; CHECK:   invoke void @g()
+; CHECK:           to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK-NEXT: catchswitch within none [label %catch] unwind label %ehcleanup.1
+; CHECK: catch:
+; CHECK:   catchpad within %cs1 [i8* null, i32 64, i8* null]
+; CHECK:   catchret
+; CHECK: ehcleanup.1:
+; CHECK:   cleanuppad
+; CHECK:   call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+; CHECK:   cleanupret from %cp3 unwind to caller
+; CHECK: }
+;
+define void @f3() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  %a = alloca %struct.S2, align 1
+  invoke void @g() to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  br label %try.cont
+
+ehcleanup:                                        ; preds = %entry
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %ehcleanup
+  %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup.1
+
+catch:                                            ; preds = %catch.dispatch
+  %cp2 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  catchret from %cp2 to label %catchret.dest
+
+catchret.dest:                                    ; preds = %catch
+  br label %try.cont
+
+try.cont:                                         ; preds = %catchret.dest, %invoke.cont
+  ret void
+
+ehcleanup.1:
+  %cp3 = cleanuppad within none []
+  call void @"\01??1S2@@QEAA@XZ"(%struct.S2* %a)
+  cleanupret from %cp3 unwind to caller
+}
+
+
+; This case arises when an object with an empty destructor may require cleanup
+; from either inside or outside of a try-block.
+;
+; void f4() { 
+;   S a;
+;   g();
+;   try {
+;     g();
+;   } catch (...) {}
+; }
+;
+; In this case, the cleanuppad should be eliminated, the invoke outside of the
+; catch block should be converted to a call (that is, that is, exception
+; handling continues with the parent frame of the caller).)
+;
+; CHECK-LABEL: define void @f4()
+; CHECK: entry:
+; CHECK:   call void @g
+; Note: The cleanuppad simplification will insert an unconditional branch here
+;       but it will be eliminated, placing the following invoke in the entry BB. 
+; CHECK:   invoke void @g()
+; CHECK:           to label %try.cont unwind label %catch.dispatch
+; CHECK: catch.dispatch:
+; CHECK:   catchswitch within none [label %catch] unwind to caller
+; CHECK: catch:
+; CHECK:   catchpad
+; CHECK:   catchret
+; CHECK-NOT: cleanuppad
+; CHECK: }
+;
+define void @f4() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @g()
+          to label %try.cont unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %invoke.cont
+  %cs1 = catchswitch within none [label %catch] unwind label %ehcleanup
+
+catch:                                            ; preds = %catch.dispatch
+  %0 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  catchret from %0 to label %try.cont
+
+try.cont:                                         ; preds = %catch, %invoke.cont
+  ret void
+
+ehcleanup:
+  %cp2 = cleanuppad within none []
+  cleanupret from %cp2 unwind to caller
+}
+
+; This case tests simplification of an otherwise empty cleanup pad that contains
+; a PHI node.
+;
+; int f6() {
+;   int state = 1;
+;   try {
+;     S a;
+;     g();
+;     state = 2;
+;     g();
+;   } catch (...) {
+;     return state;
+;   }
+;   return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be sunk into the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f6()
+; CHECK: entry:
+; CHECK:   invoke void @g()
+; CHECK: invoke.cont:
+; CHECK:   invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT:   cleanuppad
+; CHECK: catch.dispatch:
+; CHECK:   %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+; CHECK: }
+define i32 @f6() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @g()
+          to label %return unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %invoke.cont, %entry
+  %state.0 = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %ehcleanup
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  catchret from %1 to label %return
+
+return:                                           ; preds = %invoke.cont, %catch
+  %retval.0 = phi i32 [ %state.0, %catch ], [ 0, %invoke.cont ]
+  ret i32 %retval.0
+}
+
+; This case tests another variation of simplification of an otherwise empty
+; cleanup pad that contains a PHI node.
+;
+; int f7() {
+;   int state = 1;
+;   try {
+;     g();
+;     state = 2;
+;     S a;
+;     g();
+;     state = 3;
+;     g();
+;   } catch (...) {
+;     return state;
+;   }
+;   return 0;
+; }
+;
+; In this case, the cleanup pad should be eliminated and the PHI node in the
+; cleanup pad should be merged with the PHI node in the catch dispatch block.
+;
+; CHECK-LABEL: define i32 @f7()
+; CHECK: entry:
+; CHECK:   invoke void @g()
+; CHECK: invoke.cont:
+; CHECK:   invoke void @g()
+; CHECK: invoke.cont.1:
+; CHECK:   invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT:   cleanuppad
+; CHECK: catch.dispatch:
+; CHECK:   %state.1 = phi i32 [ 1, %entry ], [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+; CHECK: }
+define i32 @f7() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @g()
+          to label %invoke.cont.1 unwind label %ehcleanup
+
+invoke.cont.1:                                    ; preds = %invoke.cont
+  invoke void @g()
+          to label %return unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %invoke.cont.1, %invoke.cont
+  %state.0 = phi i32 [ 3, %invoke.cont.1 ], [ 2, %invoke.cont ]
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %ehcleanup, %entry
+  %state.1 = phi i32 [ %state.0, %ehcleanup ], [ 1, %entry ]
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  catchret from %1 to label %return
+
+return:                                           ; preds = %invoke.cont.1, %catch
+  %retval.0 = phi i32 [ %state.1, %catch ], [ 0, %invoke.cont.1 ]
+  ret i32 %retval.0
+}
+
+; This case tests a scenario where an empty cleanup pad is not dominated by all
+; of the predecessors of its successor, but the successor references a PHI node
+; in the empty cleanup pad.
+;
+; Conceptually, the case being modeled is something like this:
+;
+; int f8() {
+;   int x = 1;
+;   try {
+;     S a;
+;     g();
+;     x = 2;
+; retry:
+;     g();
+;     return
+;   } catch (...) {
+;     use_x(x);
+;   }
+;   goto retry;
+; }
+;
+; While that C++ syntax isn't legal, the IR below is.
+;
+; In this case, the PHI node that is sunk from ehcleanup to catch.dispatch
+; should have an incoming value entry for path from 'foo' that references the
+; PHI node itself.
+;
+; CHECK-LABEL: define void @f8()
+; CHECK: entry:
+; CHECK:   invoke void @g()
+; CHECK: invoke.cont:
+; CHECK:   invoke void @g()
+; CHECK-NOT: ehcleanup:
+; CHECK-NOT:   cleanuppad
+; CHECK: catch.dispatch:
+; CHECK:   %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ], [ %x, %catch.cont ] 
+; CHECK: }
+define void @f8() personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) {
+entry:
+  invoke void @g()
+          to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont:                                      ; preds = %entry
+  invoke void @g()
+          to label %return unwind label %ehcleanup
+
+ehcleanup:                                        ; preds = %invoke.cont, %entry
+  %x = phi i32 [ 2, %invoke.cont ], [ 1, %entry ]
+  %0 = cleanuppad within none []
+  cleanupret from %0 unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %ehcleanup, %catch.cont
+  %cs1 = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %1 = catchpad within %cs1 [i8* null, i32 u0x40, i8* null]
+  call void @use_x(i32 %x)
+  catchret from %1 to label %catch.cont
+
+catch.cont:                                       ; preds = %catch
+  invoke void @g()
+          to label %return unwind label %catch.dispatch
+
+return:                                           ; preds = %invoke.cont, %catch.cont
+  ret void
+}
+
+%struct.S = type { i8 }
+%struct.S2 = type { i8 }
+declare void @"\01??1S2@@QEAA@XZ"(%struct.S2*)
+declare void @g()
+declare void @use_x(i32 %x)
+
+declare i32 @__CxxFrameHandler3(...)
+
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 345cf6282e3c..887373a2d3db 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -simplifycfg -S < %s | FileCheck %s
 
-define i32 @foo(i32 %i) nounwind ssp {
+define i32 @foo(i32 %i) nounwind ssp !dbg !0 {
   call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !6, metadata !DIExpression()), !dbg !7
   call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !9, metadata !DIExpression()), !dbg !11
   %1 = icmp ne i32 %i, 0, !dbg !12
@@ -32,16 +32,16 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.module.flags = !{!21}
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3, function: i32 (i32)* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3)
 !1 = !DIFile(filename: "b.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
+!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
 !7 = !DILocation(line: 2, column: 13, scope: !0)
 !8 = !{i32 0}
-!9 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "k", line: 3, scope: !10, file: !1, type: !5)
+!9 = !DILocalVariable(name: "k", line: 3, scope: !10, file: !1, type: !5)
 !10 = distinct !DILexicalBlock(line: 2, column: 16, file: !20, scope: !0)
 !11 = !DILocation(line: 3, column: 12, scope: !10)
 !12 = !DILocation(line: 4, column: 3, scope: !10)
diff --git a/test/Transforms/SimplifyCFG/implied-cond.ll b/test/Transforms/SimplifyCFG/implied-cond.ll
new file mode 100644
index 000000000000..317adc4c3472
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond.ll
@@ -0,0 +1,81 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+; Check for when one branch implies the value of a successors conditional and
+; it's not simply the same conditional repeated.
+
+define void @test(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test
+  %iplus1 = add nsw i32 %i, 1
+  %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+  br i1 %var29, label %next, label %out_of_bounds
+
+next:
+; CHECK-LABEL: in_bounds:
+; CHECK-NEXT: ret void
+  %var30 = icmp slt i32 %i, %length.i
+  br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+; If the add is not nsw, it's not safe to use the fact about i+1 to imply the
+; i condition since it could have overflowed.
+define void @test_neg(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test_neg
+  %iplus1 = add i32 %i, 1
+  %var29 = icmp slt i32 %iplus1, %length.i
+; CHECK: br i1 %var29, label %next, label %out_of_bounds
+  br i1 %var29, label %next, label %out_of_bounds
+
+next:
+  %var30 = icmp slt i32 %i, %length.i
+; CHECK: br i1 %var30, label %in_bounds, label %out_of_bounds2
+  br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+
+define void @test2(i32 %length.i, i32 %i) {
+; CHECK-LABEL: @test2
+  %iplus100 = add nsw i32 %i, 100
+  %var29 = icmp slt i32 %iplus100, %length.i
+; CHECK: br i1 %var29, label %in_bounds, label %out_of_bounds
+  br i1 %var29, label %next, label %out_of_bounds
+
+next:
+  %var30 = icmp slt i32 %i, %length.i
+  br i1 %var30, label %in_bounds, label %out_of_bounds2
+
+in_bounds:
+  ret void
+
+out_of_bounds:
+  call void @foo(i64 0)
+  unreachable
+
+out_of_bounds2:
+  call void @foo(i64 1)
+  unreachable
+}
+
+declare void @foo(i64)
+
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index 3b4c09d96f77..100bfd4e9e3e 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -17,4 +17,17 @@ Rethrow:
         resume { i8*, i32 } %exn
 }
 
+define i32 @test2() personality i32 (...)* @__gxx_personality_v0 {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: call void @bar() [ "foo"(i32 100) ]
+; CHECK-NEXT: ret i32 0
+        invoke void @bar( ) [ "foo"(i32 100) ]
+                        to label %1 unwind label %Rethrow
+        ret i32 0
+Rethrow:
+        %exn = landingpad {i8*, i32}
+                 catch i8* null
+        resume { i8*, i32 } %exn
+}
+
 declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
new file mode 100644
index 000000000000..fe498b5334e8
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
@@ -0,0 +1,215 @@
+; RUN: opt -S < %s -simplifycfg -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+; This is a bit reversal that has been run through the early optimizer (-mem2reg -gvn -instcombine).
+; There should be no additional PHIs created at all. The store should be on its own in a predicated
+; block and there should be no PHIs.
+
+; CHECK-LABEL: @f
+; Exactly 15 phis, as there are 15 in the original test case.
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK: select
+; CHECK-NOT: select
+; CHECK: br i1 {{.*}}, label %[[L:.*]], label %[[R:.*]]
+; CHECK: [[L]] ; preds = 
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %[[R]]
+; CHECK: [[R]] ; preds = 
+; CHECK-NEXT: ret i32 0
+
+define i32 @f(i32* %b) {
+entry:
+  %0 = load i32, i32* %b, align 4
+  %and = and i32 %0, 1
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %or = or i32 %0, -2147483648
+  store i32 %or, i32* %b, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %1 = phi i32 [ %0, %entry ], [ %or, %if.then ]
+  %and1 = and i32 %1, 2
+  %tobool2 = icmp eq i32 %and1, 0
+  br i1 %tobool2, label %if.end5, label %if.then3
+
+if.then3:                                         ; preds = %if.end
+  %or4 = or i32 %1, 1073741824
+  store i32 %or4, i32* %b, align 4
+  br label %if.end5
+
+if.end5:                                          ; preds = %if.end, %if.then3
+  %2 = phi i32 [ %1, %if.end ], [ %or4, %if.then3 ]
+  %and6 = and i32 %2, 4
+  %tobool7 = icmp eq i32 %and6, 0
+  br i1 %tobool7, label %if.end10, label %if.then8
+
+if.then8:                                         ; preds = %if.end5
+  %or9 = or i32 %2, 536870912
+  store i32 %or9, i32* %b, align 4
+  br label %if.end10
+
+if.end10:                                         ; preds = %if.end5, %if.then8
+  %3 = phi i32 [ %2, %if.end5 ], [ %or9, %if.then8 ]
+  %and11 = and i32 %3, 8
+  %tobool12 = icmp eq i32 %and11, 0
+  br i1 %tobool12, label %if.end15, label %if.then13
+
+if.then13:                                        ; preds = %if.end10
+  %or14 = or i32 %3, 268435456
+  store i32 %or14, i32* %b, align 4
+  br label %if.end15
+
+if.end15:                                         ; preds = %if.end10, %if.then13
+  %4 = phi i32 [ %3, %if.end10 ], [ %or14, %if.then13 ]
+  %and16 = and i32 %4, 16
+  %tobool17 = icmp eq i32 %and16, 0
+  br i1 %tobool17, label %if.end20, label %if.then18
+
+if.then18:                                        ; preds = %if.end15
+  %or19 = or i32 %4, 134217728
+  store i32 %or19, i32* %b, align 4
+  br label %if.end20
+
+if.end20:                                         ; preds = %if.end15, %if.then18
+  %5 = phi i32 [ %4, %if.end15 ], [ %or19, %if.then18 ]
+  %and21 = and i32 %5, 32
+  %tobool22 = icmp eq i32 %and21, 0
+  br i1 %tobool22, label %if.end25, label %if.then23
+
+if.then23:                                        ; preds = %if.end20
+  %or24 = or i32 %5, 67108864
+  store i32 %or24, i32* %b, align 4
+  br label %if.end25
+
+if.end25:                                         ; preds = %if.end20, %if.then23
+  %6 = phi i32 [ %5, %if.end20 ], [ %or24, %if.then23 ]
+  %and26 = and i32 %6, 64
+  %tobool27 = icmp eq i32 %and26, 0
+  br i1 %tobool27, label %if.end30, label %if.then28
+
+if.then28:                                        ; preds = %if.end25
+  %or29 = or i32 %6, 33554432
+  store i32 %or29, i32* %b, align 4
+  br label %if.end30
+
+if.end30:                                         ; preds = %if.end25, %if.then28
+  %7 = phi i32 [ %6, %if.end25 ], [ %or29, %if.then28 ]
+  %and31 = and i32 %7, 256
+  %tobool32 = icmp eq i32 %and31, 0
+  br i1 %tobool32, label %if.end35, label %if.then33
+
+if.then33:                                        ; preds = %if.end30
+  %or34 = or i32 %7, 8388608
+  store i32 %or34, i32* %b, align 4
+  br label %if.end35
+
+if.end35:                                         ; preds = %if.end30, %if.then33
+  %8 = phi i32 [ %7, %if.end30 ], [ %or34, %if.then33 ]
+  %and36 = and i32 %8, 512
+  %tobool37 = icmp eq i32 %and36, 0
+  br i1 %tobool37, label %if.end40, label %if.then38
+
+if.then38:                                        ; preds = %if.end35
+  %or39 = or i32 %8, 4194304
+  store i32 %or39, i32* %b, align 4
+  br label %if.end40
+
+if.end40:                                         ; preds = %if.end35, %if.then38
+  %9 = phi i32 [ %8, %if.end35 ], [ %or39, %if.then38 ]
+  %and41 = and i32 %9, 1024
+  %tobool42 = icmp eq i32 %and41, 0
+  br i1 %tobool42, label %if.end45, label %if.then43
+
+if.then43:                                        ; preds = %if.end40
+  %or44 = or i32 %9, 2097152
+  store i32 %or44, i32* %b, align 4
+  br label %if.end45
+
+if.end45:                                         ; preds = %if.end40, %if.then43
+  %10 = phi i32 [ %9, %if.end40 ], [ %or44, %if.then43 ]
+  %and46 = and i32 %10, 2048
+  %tobool47 = icmp eq i32 %and46, 0
+  br i1 %tobool47, label %if.end50, label %if.then48
+
+if.then48:                                        ; preds = %if.end45
+  %or49 = or i32 %10, 1048576
+  store i32 %or49, i32* %b, align 4
+  br label %if.end50
+
+if.end50:                                         ; preds = %if.end45, %if.then48
+  %11 = phi i32 [ %10, %if.end45 ], [ %or49, %if.then48 ]
+  %and51 = and i32 %11, 4096
+  %tobool52 = icmp eq i32 %and51, 0
+  br i1 %tobool52, label %if.end55, label %if.then53
+
+if.then53:                                        ; preds = %if.end50
+  %or54 = or i32 %11, 524288
+  store i32 %or54, i32* %b, align 4
+  br label %if.end55
+
+if.end55:                                         ; preds = %if.end50, %if.then53
+  %12 = phi i32 [ %11, %if.end50 ], [ %or54, %if.then53 ]
+  %and56 = and i32 %12, 8192
+  %tobool57 = icmp eq i32 %and56, 0
+  br i1 %tobool57, label %if.end60, label %if.then58
+
+if.then58:                                        ; preds = %if.end55
+  %or59 = or i32 %12, 262144
+  store i32 %or59, i32* %b, align 4
+  br label %if.end60
+
+if.end60:                                         ; preds = %if.end55, %if.then58
+  %13 = phi i32 [ %12, %if.end55 ], [ %or59, %if.then58 ]
+  %and61 = and i32 %13, 16384
+  %tobool62 = icmp eq i32 %and61, 0
+  br i1 %tobool62, label %if.end65, label %if.then63
+
+if.then63:                                        ; preds = %if.end60
+  %or64 = or i32 %13, 131072
+  store i32 %or64, i32* %b, align 4
+  br label %if.end65
+
+if.end65:                                         ; preds = %if.end60, %if.then63
+  %14 = phi i32 [ %13, %if.end60 ], [ %or64, %if.then63 ]
+  %and66 = and i32 %14, 32768
+  %tobool67 = icmp eq i32 %and66, 0
+  br i1 %tobool67, label %if.end70, label %if.then68
+
+if.then68:                                        ; preds = %if.end65
+  %or69 = or i32 %14, 65536
+  store i32 %or69, i32* %b, align 4
+  br label %if.end70
+
+if.end70:                                         ; preds = %if.end65, %if.then68
+  %15 = phi i32 [ %14, %if.end65 ], [ %or69, %if.then68 ]
+  %and71 = and i32 %15, 128
+  %tobool72 = icmp eq i32 %and71, 0
+  br i1 %tobool72, label %if.end75, label %if.then73
+
+if.then73:                                        ; preds = %if.end70
+  %or74 = or i32 %15, 16777216
+  store i32 %or74, i32* %b, align 4
+  br label %if.end75
+
+if.end75:                                         ; preds = %if.end70, %if.then73
+  ret i32 0
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores.ll b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
new file mode 100644
index 000000000000..77e3158d9bbd
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores.ll
@@ -0,0 +1,241 @@
+; RUN: opt -simplifycfg -instcombine < %s -simplifycfg-merge-cond-stores=true -simplifycfg-merge-cond-stores-aggressively=false -phi-node-folding-threshold=2 -S | FileCheck %s
+
+; CHECK-LABEL: @test_simple
+; This test should succeed and end up if-converted.
+; CHECK: icmp eq i32 %b, 0
+; CHECK-NEXT: icmp ne i32 %a, 0
+; CHECK-NEXT: xor i1 %x2, true
+; CHECK-NEXT: %[[x:.*]] = or i1 %{{.*}}, %{{.*}}
+; CHECK-NEXT: br i1 %[[x]]
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_simple(i32* %p, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %end, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @test_recursive
+; This test should entirely fold away, leaving one large basic block.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %next, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %next
+
+next:
+  %x3 = icmp eq i32 %c, 0
+  br i1 %x3, label %fallthrough2, label %yes3
+
+yes3:
+  store i32 2, i32* %p
+  br label %fallthrough2
+
+fallthrough2:
+  %x4 = icmp eq i32 %d, 0
+  br i1 %x4, label %end, label %yes4
+
+yes4:
+  store i32 3, i32* %p
+  br label %end
+
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @test_not_ifconverted
+; The code in each diamond is too large - it won't be if-converted so our
+; heuristics should say no.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_not_ifconverted(i32* %p, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+  %y1 = or i32 %b, 55
+  %y2 = add i32 %y1, 24
+  %y3 = and i32 %y2, 67
+  store i32 %y3, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %end, label %yes2
+
+yes2:
+  %z1 = or i32 %a, 55
+  %z2 = add i32 %z1, 24
+  %z3 = and i32 %z2, 67
+  store i32 %z3, i32* %p
+  br label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @test_aliasing1
+; The store to %p clobbers the previous store, so if-converting this would
+; be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing1(i32* %p, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %y1 = load i32, i32* %p
+  %x2 = icmp eq i32 %y1, 0
+  br i1 %x2, label %end, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @test_aliasing2
+; The load from %q aliases with %p, so if-converting this would be illegal.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define void @test_aliasing2(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %fallthrough, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+fallthrough:
+  %y1 = load i32, i32* %q
+  %x2 = icmp eq i32 %y1, 0
+  br i1 %x2, label %end, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+end:
+  ret void
+}
+
+declare void @f()
+
+; CHECK-LABEL: @test_diamond_simple
+; This should get if-converted.
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret
+define i32 @test_diamond_simple(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %no1, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+no1:
+  %z1 = add i32 %a, %b
+  br label %fallthrough
+
+fallthrough:
+  %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %no2, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+no2:
+  %z3 = sub i32 %z2, %b
+  br label %end
+
+end:
+  %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+  ret i32 %z4
+}
+
+; CHECK-LABEL: @test_diamond_alias3
+; Now there is a call to f() in the bottom branch. The store in the first
+; branch would now be reordered with respect to the call if we if-converted,
+; so we must not.
+; CHECK: store
+; CHECK: store
+; CHECK: ret
+define i32 @test_diamond_alias3(i32* %p, i32* %q, i32 %a, i32 %b) {
+entry:
+  %x1 = icmp eq i32 %a, 0
+  br i1 %x1, label %no1, label %yes1
+
+yes1:
+  store i32 0, i32* %p
+  br label %fallthrough
+
+no1:
+  call void @f()
+  %z1 = add i32 %a, %b
+  br label %fallthrough
+
+fallthrough:
+  %z2 = phi i32 [ %z1, %no1 ], [ 0, %yes1 ]
+  %x2 = icmp eq i32 %b, 0
+  br i1 %x2, label %no2, label %yes2
+
+yes2:
+  store i32 1, i32* %p
+  br label %end
+
+no2:
+  call void @f()
+  %z3 = sub i32 %z2, %b
+  br label %end
+
+end:
+  %z4 = phi i32 [ %z3, %no2 ], [ 3, %yes2 ]
+  ret i32 %z4
+}
diff --git a/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
new file mode 100644
index 000000000000..063bde83f7b3
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/no_speculative_loads_with_asan.ll
@@ -0,0 +1,40 @@
+; RUN: opt -simplifycfg -S %s | FileCheck %s
+; Make sure we don't speculate loads under AddressSanitizer.
+@g = global i32 0, align 4
+
+define i32 @TestNoAsan(i32 %cond) nounwind readonly uwtable {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* @g, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %retval
+; CHECK-LABEL: @TestNoAsan
+; CHECK: %[[LOAD:[^ ]*]] = load
+; CHECK: select{{.*}}[[LOAD]]
+; CHECK: ret i32
+}
+
+define i32 @TestAsan(i32 %cond) nounwind readonly uwtable sanitize_address {
+entry:
+  %tobool = icmp eq i32 %cond, 0
+  br i1 %tobool, label %return, label %if.then
+
+if.then:                                          ; preds = %entry
+  %0 = load i32, i32* @g, align 4
+  br label %return
+
+return:                                           ; preds = %entry, %if.then
+  %retval = phi i32 [ %0, %if.then ], [ 0, %entry ]
+  ret i32 %retval
+; CHECK-LABEL: @TestAsan
+; CHECK: br i1
+; CHECK: load i32, i32* @g
+; CHECK: br label
+; CHECK: ret i32
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
new file mode 100644
index 000000000000..94d3565ce985
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-2.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+  br i1 %c, label %t, label %f
+
+t:
+  %v1 = load i32*, i32** %p, !dereferenceable !0
+  call void @bar(i32* %v1)
+  br label %cont
+
+f:
+  %v2 = load i32*, i32** %p, !dereferenceable !1
+  call void @baz(i32* %v2)
+  br label %cont
+
+cont:
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
new file mode 100644
index 000000000000..92bdf6ec5c1a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata-3.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that dereferenceable_or_null metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !dereferenceable_or_null ![[DEREF:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+  br i1 %c, label %t, label %f
+
+t:
+  %v1 = load i32*, i32** %p, !dereferenceable_or_null !0
+  call void @bar(i32* %v1)
+  br label %cont
+
+f:
+  %v2 = load i32*, i32** %p, !dereferenceable_or_null !1
+  call void @baz(i32* %v2)
+  br label %cont
+
+cont:
+  ret void
+}
+
+; CHECK: ![[DEREF]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-load-metadata.ll b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
new file mode 100644
index 000000000000..89815c843152
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-load-metadata.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar(i32*)
+declare void @baz(i32*)
+
+; CHECK-LABEL: @test_load_combine_metadata(
+; Check that align metadata is combined
+; CHECK: load i32*, i32** %p
+; CHECK-SAME: !align ![[ALIGN:[0-9]+]]
+; CHECK: t:
+; CHECK: f:
+define void @test_load_combine_metadata(i1 %c, i32** %p) {
+  br i1 %c, label %t, label %f
+
+t:
+  %v1 = load i32*, i32** %p, !align !0
+  call void @bar(i32* %v1)
+  br label %cont
+
+f:
+  %v2 = load i32*, i32** %p, !align !1
+  call void @baz(i32* %v2)
+  br label %cont
+
+cont:
+  ret void
+}
+
+; CHECK: ![[ALIGN]] = !{i64 8}
+
+!0 = !{i64 8}
+!1 = !{i64 16}
diff --git a/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
new file mode 100644
index 000000000000..0e95336bbc1f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-make-implicit-on-switch-to-br.ll
@@ -0,0 +1,30 @@
+; RUN: opt %s -simplifycfg -S | FileCheck %s
+
+; ConstantFoldTerminator function can convert SwitchInst with one case (and default) to
+; a conditional BranchInst. This test checks the converted BranchInst preserve the 
+; make.implicit metadata.
+
+declare i32 @consume(i32*)
+declare void @trap()
+
+define i32 @copy-metadata(i32* %x) {
+
+entry:
+  %x.int = ptrtoint i32* %x to i64
+  
+; CHECK: br i1 %cond, label %is_null, label %default, !make.implicit !0
+  switch i64 %x.int, label %default [
+    i64 0, label %is_null
+  ], !make.implicit !0
+  
+default:
+  %0 = call i32 @consume(i32* %x)
+  ret i32 %0
+
+is_null:
+  call void @trap()
+  unreachable
+}
+
+!0 = !{}
+
diff --git a/test/Transforms/SimplifyCFG/speculate-math.ll b/test/Transforms/SimplifyCFG/speculate-math.ll
index 0ba93d29117a..5655d5d78821 100644
--- a/test/Transforms/SimplifyCFG/speculate-math.ll
+++ b/test/Transforms/SimplifyCFG/speculate-math.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -simplifycfg -phi-node-folding-threshold=2 < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s --check-prefix=EXPENSIVE --check-prefix=ALL
+; RUN: opt -S -simplifycfg -speculate-one-expensive-inst=false < %s | FileCheck %s --check-prefix=CHEAP --check-prefix=ALL
 
 declare float @llvm.sqrt.f32(float) nounwind readonly
 declare float @llvm.fma.f32(float, float, float) nounwind readonly
@@ -7,8 +8,26 @@ declare float @llvm.fabs.f32(float) nounwind readonly
 declare float @llvm.minnum.f32(float, float) nounwind readonly
 declare float @llvm.maxnum.f32(float, float) nounwind readonly
 
-; CHECK-LABEL: @sqrt_test(
-; CHECK: select
+; ALL-LABEL: @fdiv_test(
+; EXPENSIVE: select i1 %cmp, double %div, double 0.0
+; CHEAP-NOT: select
+
+define double @fdiv_test(double %a, double %b) {
+entry:
+  %cmp = fcmp ogt double %a, 0.0
+  br i1 %cmp, label %cond.true, label %cond.end
+
+cond.true:
+  %div = fdiv double %b, %a
+  br label %cond.end
+
+cond.end:
+  %cond = phi double [ %div, %cond.true ], [ 0.0, %entry ]
+  ret double %cond
+}
+
+; ALL-LABEL: @sqrt_test(
+; ALL: select
 define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -24,8 +43,8 @@ test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
   ret void
 }
 
-; CHECK-LABEL: @fabs_test(
-; CHECK: select
+; ALL-LABEL: @fabs_test(
+; ALL: select
 define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -41,8 +60,8 @@ test_fabs.exit:                                   ; preds = %cond.else.i, %entry
   ret void
 }
 
-; CHECK-LABEL: @fma_test(
-; CHECK: select
+; ALL-LABEL: @fma_test(
+; ALL: select
 define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -58,8 +77,8 @@ test_fma.exit:                                   ; preds = %cond.else.i, %entry
   ret void
 }
 
-; CHECK-LABEL: @fmuladd_test(
-; CHECK: select
+; ALL-LABEL: @fmuladd_test(
+; ALL: select
 define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -75,8 +94,8 @@ test_fmuladd.exit:                                   ; preds = %cond.else.i, %en
   ret void
 }
 
-; CHECK-LABEL: @minnum_test(
-; CHECK: select
+; ALL-LABEL: @minnum_test(
+; ALL: select
 define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
@@ -92,8 +111,8 @@ test_minnum.exit:                                   ; preds = %cond.else.i, %ent
   ret void
 }
 
-; CHECK-LABEL: @maxnum_test(
-; CHECK: select
+; ALL-LABEL: @maxnum_test(
+; ALL: select
 define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
 entry:
   %cmp.i = fcmp olt float %a, 0.000000e+00
diff --git a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
index 994e47eb0d64..53daa8292da7 100644
--- a/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
+++ b/test/Transforms/SimplifyCFG/statepoint-invoke-unwind.ll
@@ -3,15 +3,15 @@
 ; not optimized into call
 
 declare i64 addrspace(1)* @gc_call()
-declare i32 @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
 declare i32* @fake_personality_function()
 
 define i32 @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
 ; CHECK-LABEL: test
 entry:
   ; CHECK-LABEL: entry:
-  ; CHECK-NEXT: %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
-  %sp = invoke i32 (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
+  ; CHECK-NEXT: %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f
+  %sp = invoke token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @gc_call, i32 0, i32 0, i32 0, i32 0)
                 to label %normal unwind label %exception
 
 exception:
diff --git a/test/Transforms/SimplifyCFG/switch-dead-default.ll b/test/Transforms/SimplifyCFG/switch-dead-default.ll
new file mode 100644
index 000000000000..e5c2ef65b318
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/switch-dead-default.ll
@@ -0,0 +1,179 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+declare void @foo(i32)
+
+define void @test(i1 %a) {
+; CHECK-LABEL: @test
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+  switch i1 %a, label %default [i1 1, label %true
+                                i1 0, label %false]
+true:
+  call void @foo(i32 1)
+  ret void
+false:
+  call void @foo(i32 3)
+  ret void
+default:
+  call void @foo(i32 2)
+  ret void
+}  
+
+define void @test2(i2 %a) {
+; CHECK-LABEL: @test2
+  switch i2 %a, label %default [i2 0, label %case0
+                                i2 1, label %case1
+                                i2 2, label %case2
+                                i2 3, label %case3]
+case0:
+  call void @foo(i32 0)
+  ret void
+case1:
+  call void @foo(i32 1)
+  ret void
+case2:
+  call void @foo(i32 2)
+  ret void
+case3:
+  call void @foo(i32 3)
+  ret void
+default:
+; CHECK-LABEL: default1:
+; CHECK-NEXT: unreachable
+  call void @foo(i32 4)
+  ret void
+}  
+
+; This one is a negative test - we know the value of the default,
+; but that's about it
+define void @test3(i2 %a) {
+; CHECK-LABEL: @test3
+  switch i2 %a, label %default [i2 0, label %case0
+                                i2 1, label %case1
+                                i2 2, label %case2]
+
+case0:
+  call void @foo(i32 0)
+  ret void
+case1:
+  call void @foo(i32 1)
+  ret void
+case2:
+  call void @foo(i32 2)
+  ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+  call void @foo(i32 0)
+  ret void
+}  
+
+; Negative test - check for possible overflow when computing
+; number of possible cases.
+define void @test4(i128 %a) {
+; CHECK-LABEL: @test4
+  switch i128 %a, label %default [i128 0, label %case0
+                                  i128 1, label %case1]
+
+case0:
+  call void @foo(i32 0)
+  ret void
+case1:
+  call void @foo(i32 1)
+  ret void
+default:
+; CHECK-LABEL: default:
+; CHECK-NEXT: call void @foo
+  call void @foo(i32 0)
+  ret void
+}  
+
+; All but one bit known zero
+define void @test5(i8 %a) {
+; CHECK-LABEL: @test5
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+  %cmp = icmp ult i8 %a, 2 
+  call void @llvm.assume(i1 %cmp)
+  switch i8 %a, label %default [i8 1, label %true
+                                i8 0, label %false]
+true:
+  call void @foo(i32 1)
+  ret void
+false:
+  call void @foo(i32 3)
+  ret void
+default:
+  call void @foo(i32 2)
+  ret void
+} 
+
+;; All but one bit known one
+define void @test6(i8 %a) {
+; CHECK-LABEL: @test6
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+  %and = and i8 %a, 254
+  %cmp = icmp eq i8 %and, 254 
+  call void @llvm.assume(i1 %cmp)
+  switch i8 %a, label %default [i8 255, label %true
+                                i8 254, label %false]
+true:
+  call void @foo(i32 1)
+  ret void
+false:
+  call void @foo(i32 3)
+  ret void
+default:
+  call void @foo(i32 2)
+  ret void
+}
+
+; Check that we can eliminate both dead cases and dead defaults
+; within a single run of simplify-cfg
+define void @test7(i8 %a) {
+; CHECK-LABEL: @test7
+; CHECK: @llvm.assume
+; CHECK: br i1 [[IGNORE:%.*]], label %true, label %false
+  %and = and i8 %a, 254
+  %cmp = icmp eq i8 %and, 254 
+  call void @llvm.assume(i1 %cmp)
+  switch i8 %a, label %default [i8 255, label %true
+                                i8 254, label %false
+                                i8 0, label %also_dead]
+true:
+  call void @foo(i32 1)
+  ret void
+false:
+  call void @foo(i32 3)
+  ret void
+also_dead:
+  call void @foo(i32 5)
+  ret void
+default:
+  call void @foo(i32 2)
+  ret void
+}
+
+;; All but one bit known undef
+;; Note: This is currently testing an optimization which doesn't trigger. The
+;; case this is protecting against is that a bit could be assumed both zero 
+;; *or* one given we know it's undef.  ValueTracking doesn't do this today,
+;; but it doesn't hurt to confirm.
+define void @test8(i8 %a) {
+; CHECK-LABEL: @test8(
+; CHECK: switch i8
+  %and = and i8 %a, 254
+  %cmp = icmp eq i8 %and, undef
+  call void @llvm.assume(i1 %cmp)
+  switch i8 %a, label %default [i8 255, label %true
+                                i8 254, label %false]
+true:
+  call void @foo(i32 1)
+  ret void
+false:
+  call void @foo(i32 3)
+  ret void
+default:
+  call void @foo(i32 2)
+  ret void
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index fedf6b172d75..2887aaf52eee 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -S -simplifycfg < %s | FileCheck %s
 ; Radar 9342286
 ; Assign DebugLoc to trap instruction.
-define void @foo() nounwind ssp {
+define void @foo() nounwind ssp !dbg !0 {
 ; CHECK: call void @llvm.trap(), !dbg
   store i32 42, i32* null, !dbg !5
   ret void, !dbg !7
@@ -11,9 +11,9 @@ define void @foo() nounwind ssp {
 !llvm.module.flags = !{!10}
 !llvm.dbg.sp = !{!0}
 
-!0 = !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
 !1 = !DIFile(filename: "foo.c", directory: "/private/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
 !5 = !DILocation(line: 4, column: 2, scope: !6)
diff --git a/test/Transforms/SimplifyCFG/wineh-unreachable.ll b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
new file mode 100644
index 000000000000..670119467dae
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/wineh-unreachable.ll
@@ -0,0 +1,83 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+declare void @Personality()
+declare void @f()
+
+; CHECK-LABEL: define void @test1()
+define void @test1() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+  ; CHECK: call void @f()
+  invoke void @f()
+    to label %exit unwind label %unreachable.unwind
+exit:
+  ret void
+unreachable.unwind:
+  cleanuppad within none []
+  unreachable  
+}
+
+; CHECK-LABEL: define void @test2()
+define void @test2() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+  invoke void @f()
+    to label %exit unwind label %catch.pad
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind label %unreachable.unwind
+  ; CHECK: catch.pad:
+  ; CHECK-NEXT: catchswitch within none [label %catch.body] unwind to caller
+catch.body:
+  ; CHECK:      catch.body:
+  ; CHECK-NEXT:   catchpad within %cs1
+  ; CHECK-NEXT:   call void @f()
+  ; CHECK-NEXT:   unreachable
+  %catch = catchpad within %cs1 []
+  call void @f()
+  catchret from %catch to label %unreachable
+exit:
+  ret void
+unreachable.unwind:
+  cleanuppad within none []
+  unreachable
+unreachable:
+  unreachable
+}
+
+; CHECK-LABEL: define void @test3()
+define void @test3() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+  invoke void @f()
+    to label %exit unwind label %cleanup.pad
+cleanup.pad:
+  ; CHECK: %cleanup = cleanuppad within none []
+  ; CHECK-NEXT: call void @f()
+  ; CHECK-NEXT: unreachable
+  %cleanup = cleanuppad within none []
+  invoke void @f()
+    to label %cleanup.ret unwind label %unreachable.unwind
+cleanup.ret:
+  ; This cleanupret should be rewritten to unreachable,
+  ; and merged into the pred block.
+  cleanupret from %cleanup unwind label %unreachable.unwind
+exit:
+  ret void
+unreachable.unwind:
+  cleanuppad within none []
+  unreachable
+}
+
+; CHECK-LABEL: define void @test5()
+define void @test5() personality i8* bitcast (void ()* @Personality to i8*) {
+entry:
+  invoke void @f()
+          to label %exit unwind label %catch.pad
+
+catch.pad:
+  %cs1 = catchswitch within none [label %catch.body] unwind to caller
+
+catch.body:
+  %catch = catchpad within %cs1 []
+  catchret from %catch to label %exit
+
+exit:
+  unreachable
+}
diff --git a/test/Transforms/Sink/catchswitch.ll b/test/Transforms/Sink/catchswitch.ll
new file mode 100644
index 000000000000..2648f85f3eb4
--- /dev/null
+++ b/test/Transforms/Sink/catchswitch.ll
@@ -0,0 +1,37 @@
+; RUN: opt -sink -S < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+define void @h() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+  %call = call i32 @g(i32 1) readnone
+  invoke void @_CxxThrowException(i8* null, i8* null) noreturn
+          to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:                                   ; preds = %entry
+  %cs = catchswitch within none [label %catch] unwind to caller
+
+catch:                                            ; preds = %catch.dispatch
+  %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+  catchret from %cp to label %try.cont
+
+try.cont:                                         ; preds = %catch
+  call void @k(i32 %call)
+  ret void
+
+unreachable:                                      ; preds = %entry
+  unreachable
+}
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @g(i32) readnone
+
+declare void @k(i32)
+
+; CHECK-LABEL: define void @h(
+; CHECK: call i32 @g(i32 1)
+; CHECK-NEXT: invoke void @_CxxThrowException(
diff --git a/test/Transforms/Sink/landingpad.ll b/test/Transforms/Sink/landingpad.ll
new file mode 100644
index 000000000000..10548fd5b7d4
--- /dev/null
+++ b/test/Transforms/Sink/landingpad.ll
@@ -0,0 +1,33 @@
+; Test that we don't sink landingpads
+; RUN: opt -sink -S < %s | FileCheck %s
+
+declare hidden void @g()
+declare void @h()
+declare i32 @__gxx_personality_v0(...)
+
+define void @f() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @g()
+          to label %invoke.cont.15 unwind label %lpad
+
+invoke.cont.15:
+  unreachable
+
+; CHECK: lpad:
+; CHECK: %0 = landingpad { i8*, i32 }
+lpad:
+  %0 = landingpad { i8*, i32 }
+          catch i8* null
+  invoke void @h()
+          to label %invoke.cont unwind label %lpad.1
+
+; CHECK: invoke.cont
+; CHECK-NOT: %0 = landingpad { i8*, i32 }
+invoke.cont:
+  ret void
+
+lpad.1:
+  %1 = landingpad { i8*, i32 }
+          cleanup
+  resume { i8*, i32 } %1
+}
diff --git a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
index 278250a9c80e..f2853aca698f 100644
--- a/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
+++ b/test/Transforms/StraightLineStrengthReduce/AMDGPU/reassociate-geps-and-slsr-addrspace.ll
@@ -57,10 +57,10 @@ bb:
 
 ; CHECK-LABEL: @slsr_after_reassociate_lds_geps_ds_max_offset(
 ; CHECK: [[B1:%[0-9]+]] = getelementptr float, float addrspace(3)* %arr, i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B1]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B1]], i32 16383
 
 ; CHECK: [[B2:%[0-9]+]] = getelementptr float, float addrspace(3)* [[B1]], i32 %i
-; CHECK: getelementptr float, float addrspace(3)* [[B2]], i32 16383
+; CHECK: getelementptr inbounds float, float addrspace(3)* [[B2]], i32 16383
 define void @slsr_after_reassociate_lds_geps_ds_max_offset(float addrspace(1)* %out, float addrspace(3)* noalias %arr, i32 %i) {
 bb:
   %i2 = shl nsw i32 %i, 1
diff --git a/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
new file mode 100644
index 000000000000..cb73565b152e
--- /dev/null
+++ b/test/Transforms/StraightLineStrengthReduce/NVPTX/speculative-slsr.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CUDA code
+; __global__ void foo(int b, int s) {
+;   #pragma unroll
+;   for (int i = 0; i < 4; ++i) {
+;     if (cond(i))
+;       use((b + i) * s);
+;   }
+; }
+define void @foo(i32 %b, i32 %s) {
+; CHECK-LABEL: .visible .entry foo(
+entry:
+; CHECK: ld.param.u32 [[s:%r[0-9]+]], [foo_param_1];
+; CHECK: ld.param.u32 [[b:%r[0-9]+]], [foo_param_0];
+  %call = tail call zeroext i1 @cond(i32 0)
+  br i1 %call, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %entry
+  %mul = mul nsw i32 %b, %s
+; CHECK: mul.lo.s32 [[a0:%r[0-9]+]], [[b]], [[s]]
+  tail call void @use(i32 %mul)
+  br label %for.inc
+
+for.inc:                                          ; preds = %entry, %if.then
+  %call.1 = tail call zeroext i1 @cond(i32 1)
+  br i1 %call.1, label %if.then.1, label %for.inc.1
+
+if.then.1:                                        ; preds = %for.inc
+  %add.1 = add nsw i32 %b, 1
+  %mul.1 = mul nsw i32 %add.1, %s
+; CHECK: add.s32 [[a1:%r[0-9]+]], [[a0]], [[s]]
+  tail call void @use(i32 %mul.1)
+  br label %for.inc.1
+
+for.inc.1:                                        ; preds = %if.then.1, %for.inc
+  %call.2 = tail call zeroext i1 @cond(i32 2)
+  br i1 %call.2, label %if.then.2, label %for.inc.2
+
+if.then.2:                                        ; preds = %for.inc.1
+  %add.2 = add nsw i32 %b, 2
+  %mul.2 = mul nsw i32 %add.2, %s
+; CHECK: add.s32 [[a2:%r[0-9]+]], [[a1]], [[s]]
+  tail call void @use(i32 %mul.2)
+  br label %for.inc.2
+
+for.inc.2:                                        ; preds = %if.then.2, %for.inc.1
+  %call.3 = tail call zeroext i1 @cond(i32 3)
+  br i1 %call.3, label %if.then.3, label %for.inc.3
+
+if.then.3:                                        ; preds = %for.inc.2
+  %add.3 = add nsw i32 %b, 3
+  %mul.3 = mul nsw i32 %add.3, %s
+; CHECK: add.s32 [[a3:%r[0-9]+]], [[a2]], [[s]]
+  tail call void @use(i32 %mul.3)
+  br label %for.inc.3
+
+for.inc.3:                                        ; preds = %if.then.3, %for.inc.2
+  ret void
+}
+
+declare zeroext i1 @cond(i32)
+
+declare void @use(i32)
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (i32, i32)* @foo, !"kernel", i32 1}
diff --git a/test/Transforms/StripDeadPrototypes/basic.ll b/test/Transforms/StripDeadPrototypes/basic.ll
new file mode 100644
index 000000000000..6845faf7d03e
--- /dev/null
+++ b/test/Transforms/StripDeadPrototypes/basic.ll
@@ -0,0 +1,12 @@
+; RUN: opt -strip-dead-prototypes -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes=strip-dead-prototypes < %s | FileCheck %s
+
+; CHECK: declare i32 @f
+declare i32 @f()
+; CHECK-NOT: declare i32 @g
+declare i32 @g()
+
+define i32 @foo() {
+  %call = call i32 @f()
+  ret i32 %call
+}
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 3b1fd74b9813..32d7e77b20df 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -4,7 +4,7 @@
 
 @x = common global i32 0                          ; <i32*> [#uses=0]
 
-define void @foo() nounwind readnone optsize ssp {
+define void @foo() nounwind readnone optsize ssp !dbg !0 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !5, metadata !{}), !dbg !10
   ret void, !dbg !11
@@ -18,12 +18,12 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
 !llvm.dbg.lv.foo = !{!5}
 !llvm.dbg.gv = !{!8}
 
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3, function: void ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
 !1 = !DIFile(filename: "b.c", directory: "/tmp")
-!2 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12)
 !3 = !DISubroutineType(types: !4)
 !4 = !{null}
-!5 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "y", line: 3, scope: !6, file: !1, type: !7)
+!5 = !DILocalVariable(name: "y", line: 3, scope: !6, file: !1, type: !7)
 !6 = distinct !DILexicalBlock(line: 2, column: 0, file: !12, scope: !0)
 !7 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
 !8 = !DIGlobalVariable(name: "x", line: 1, isLocal: false, isDefinition: true, scope: !1, file: !1, type: !7, variable: i32* @x)
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index f77ed11d912c..ba8979c9772f 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -1,5 +1,5 @@
 ; RUN: opt -strip-dead-debug-info -disable-output < %s
-define i32 @foo() nounwind ssp {
+define i32 @foo() nounwind ssp !dbg !0 {
 entry:
   ret i32 0, !dbg !8
 }
@@ -7,9 +7,9 @@ entry:
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!14}
 
-!0 = !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3, function: i32 ()* @foo)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3)
 !1 = !DIFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
-!2 = !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
 !3 = !DISubroutineType(types: !4)
 !4 = !{!5}
 !5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 08eff003dfca..39038c955617 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -10,13 +10,13 @@
 declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
 
 ; Function Attrs: nounwind readnone ssp
-define i32 @fn() #1 {
+define i32 @fn() #1 !dbg !6 {
 entry:
   ret i32 0, !dbg !18
 }
 
 ; Function Attrs: nounwind readonly ssp
-define i32 @foo(i32 %i) #2 {
+define i32 @foo(i32 %i) #2 !dbg !10 {
 entry:
   tail call void @llvm.dbg.value(metadata i32 %i, i64 0, metadata !15, metadata !DIExpression()), !dbg !20
   %.0 = load i32, i32* @xyz, align 4
@@ -30,22 +30,22 @@ attributes #2 = { nounwind readonly ssp }
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!25}
 
-!0 = !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
 !1 = !DIFile(filename: "g.c", directory: "/tmp/")
 !2 = !{null}
-!3 = !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
+!3 = distinct !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
 !4 = !DISubroutineType(types: !2)
 !5 = !DIFile(filename: "g.c", directory: "/tmp/")
-!6 = !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7, function: i32 ()* @fn)
+!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7)
 !7 = !DISubroutineType(types: !8)
 !8 = !{!9}
 !9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11, function: i32 (i32)* @foo)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11)
 !11 = !DISubroutineType(types: !12)
 !12 = !{!9, !9}
-!13 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "bb", line: 5, scope: !14, file: !5, type: !9)
+!13 = !DILocalVariable(name: "bb", line: 5, scope: !14, file: !5, type: !9)
 !14 = distinct !DILexicalBlock(line: 5, column: 0, file: !1, scope: !3)
-!15 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "i", line: 7, arg: 0, scope: !10, file: !5, type: !9)
+!15 = !DILocalVariable(name: "i", line: 7, arg: 1, scope: !10, file: !5, type: !9)
 !16 = !DIGlobalVariable(name: "abcd", line: 2, isLocal: true, isDefinition: true, scope: !5, file: !5, type: !9)
 !17 = !DIGlobalVariable(name: "xyz", line: 3, isLocal: false, isDefinition: true, scope: !5, file: !5, type: !9, variable: i32* @xyz)
 !18 = !DILocation(line: 6, scope: !19)
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index fee1ff0433b5..8a506c3e3962 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -41,7 +41,7 @@ ENDIF:                                            ; preds = %LOOP
   br i1 %tmp31, label %IF29, label %ENDIF28
 
 ; CHECK: Flow:
-; CHECK br i1 %{{[0-9]+}}, label %Flow, label %LOOP
+; CHECK: br i1 %{{[0-9]+}}, label %Flow2, label %LOOP
 
 ; CHECK: IF29:
 ; CHECK: br label %Flow1
diff --git a/test/Transforms/TailCallElim/basic.ll b/test/Transforms/TailCallElim/basic.ll
index 2488b552d8f3..b303fa743ca9 100644
--- a/test/Transforms/TailCallElim/basic.ll
+++ b/test/Transforms/TailCallElim/basic.ll
@@ -156,7 +156,7 @@ define void @test9(i32* byval %a) {
 
 declare void @ctor(%struct.X*)
 define void @test10(%struct.X* noalias sret %agg.result, i1 zeroext %b) {
-; CHECK-LABEL @test10
+; CHECK-LABEL: @test10
 entry:
   %x = alloca %struct.X, align 8
   br i1 %b, label %if.then, label %if.end
@@ -188,3 +188,13 @@ define void @test11() {
 ; CHECK: call void @test11_helper2
   ret void
 }
+
+; PR25928
+define void @test12() {
+entry:
+; CHECK-LABEL: @test12
+; CHECK: {{^ *}} call void undef(i8* undef) [ "foo"(i8* %e) ]
+  %e = alloca i8
+  call void undef(i8* undef) [ "foo"(i8* %e) ]
+  unreachable
+}
diff --git a/test/Transforms/TailCallElim/notail.ll b/test/Transforms/TailCallElim/notail.ll
new file mode 100644
index 000000000000..e6fdbd1ec77d
--- /dev/null
+++ b/test/Transforms/TailCallElim/notail.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+; CHECK: tail call void @callee0()
+; CHECK: notail call void @callee1()
+
+define void @foo1(i32 %a) {
+entry:
+  %tobool = icmp eq i32 %a, 0
+  br i1 %tobool, label %if.else, label %if.then
+
+if.then:
+  call void @callee0()
+  br label %if.end
+
+if.else:
+  notail call void @callee1()
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+declare void @callee0()
+declare void @callee1()
diff --git a/test/Transforms/Util/lowerswitch.ll b/test/Transforms/Util/lowerswitch.ll
index 6e21f916c038..1eddb43c1a06 100644
--- a/test/Transforms/Util/lowerswitch.ll
+++ b/test/Transforms/Util/lowerswitch.ll
@@ -3,7 +3,7 @@
 ; Test that we don't crash and have a different basic block for each incoming edge.
 define void @test0() {
 ; CHECK-LABEL: @test0
-; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock.5 ], [ 0, %LeafBlock.1 ]
+; CHECK: %merge = phi i64 [ 1, %BB3 ], [ 0, %NewDefault ], [ 0, %NodeBlock5 ], [ 0, %LeafBlock1 ]
 BB1:
   switch i32 undef, label %BB2 [
     i32 3, label %BB2
@@ -43,9 +43,9 @@ bb2:
 
 bb3:
 ; CHECK-LABEL: bb3
-; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock.3 ]
+; CHECK: %tmp = phi i32 [ 1, %NodeBlock ], [ 0, %bb2 ], [ 1, %LeafBlock3 ]
   %tmp = phi i32 [ 1, %bb1 ], [ 0, %bb2 ], [ 1, %bb1 ], [ 1, %bb1 ]
-; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock.3 ]
+; CHECK-NEXT: %tmp2 = phi i32 [ 2, %NodeBlock ], [ 5, %bb2 ], [ 2, %LeafBlock3 ]
   %tmp2 = phi i32 [ 2, %bb1 ], [ 2, %bb1 ], [ 5, %bb2 ], [ 2, %bb1 ]
   br label %exit
 
diff --git a/test/Transforms/Util/simplify-dbg-declare-load.ll b/test/Transforms/Util/simplify-dbg-declare-load.ll
new file mode 100644
index 000000000000..0357a5e6facb
--- /dev/null
+++ b/test/Transforms/Util/simplify-dbg-declare-load.ll
@@ -0,0 +1,52 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+%foo = type { i64, i32, i32 }
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #0
+
+; Function Attrs: sspreq
+define void @julia_fastshortest_6256() #1 {
+top:
+  %cp = alloca %foo, align 8
+  call void @llvm.dbg.declare(metadata %foo* %cp, metadata !1, metadata !16), !dbg !17
+  br i1 undef, label %idxend, label %fail
+
+fail:                                             ; preds = %top
+  unreachable
+
+idxend:                                           ; preds = %top
+; CHECK-NOT call void @llvm.dbg.value(metadata %foo* %cp, i64 0, metadata !1, metadata !16), !dbg !17
+  %0 = load volatile %foo, %foo* %cp, align 8
+; CHECK: call void @llvm.dbg.value(metadata %foo %0, i64 0, metadata !1, metadata !16), !dbg !17
+  store volatile %foo %0, %foo* undef, align 8
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { sspreq }
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !DILocalVariable(name: "cp", scope: !2, file: !3, line: 106, type: !12)
+!2 = distinct !DISubprogram(name: "fastshortest", linkageName: "julia_fastshortest_6256", scope: null, file: !3, type: !4, isLocal: false, isDefinition: true, isOptimized: true, variables: !11)
+!3 = !DIFile(filename: "grisu/fastshortest.jl", directory: ".")
+!4 = !DISubroutineType(types: !5)
+!5 = !{!6, !7}
+!6 = !DIBasicType(name: "Float64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!7 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64, align: 64)
+!8 = !DICompositeType(tag: DW_TAG_structure_type, name: "jl_value_t", file: !9, line: 71, align: 64, elements: !10)
+!9 = !DIFile(filename: "julia.h", directory: "")
+!10 = !{!7}
+!11 = !{}
+!12 = !DICompositeType(tag: DW_TAG_structure_type, name: "Float", size: 128, align: 64, elements: !13, runtimeLang: DW_LANG_Julia)
+!13 = !{!14, !15, !15}
+!14 = !DIBasicType(name: "UInt64", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!15 = !DIBasicType(name: "Int32", size: 32, align: 32, encoding: DW_ATE_unsigned)
+!16 = !DIExpression()
+!17 = !DILocation(line: 106, scope: !2)
diff --git a/test/Verifier/alias.ll b/test/Verifier/alias.ll
index dd04ae05f634..1847c0d4214e 100644
--- a/test/Verifier/alias.ll
+++ b/test/Verifier/alias.ll
@@ -2,18 +2,24 @@
 
 
 declare void @f()
-@fa = alias void ()* @f
+@fa = alias void (), void ()* @f
 ; CHECK: Alias must point to a definition
 ; CHECK-NEXT: @fa
 
 @g = external global i32
-@ga = alias i32* @g
+@ga = alias i32, i32* @g
 ; CHECK: Alias must point to a definition
 ; CHECK-NEXT: @ga
 
+define available_externally void @f2() {
+  ret void
+}
+@fa2 = alias void(), void()* @f2
+; CHECK: Alias must point to a definition
+; CHECK-NEXT: @fa2
 
-@test2_a = alias i32* @test2_b
-@test2_b = alias i32* @test2_a
+@test2_a = alias i32, i32* @test2_b
+@test2_b = alias i32, i32* @test2_a
 ; CHECK:      Aliases cannot form a cycle
 ; CHECK-NEXT: i32* @test2_a
 ; CHECK-NEXT: Aliases cannot form a cycle
@@ -21,7 +27,7 @@ declare void @f()
 
 
 @test3_a = global i32 42
-@test3_b = weak alias i32* @test3_a
-@test3_c = alias i32* @test3_b
+@test3_b = weak alias i32, i32* @test3_a
+@test3_c = alias i32, i32* @test3_b
 ; CHECK: Alias cannot point to a weak alias
 ; CHECK-NEXT: i32* @test3_c
diff --git a/test/Verifier/align-md.ll b/test/Verifier/align-md.ll
new file mode 100644
index 000000000000..2de489ec21eb
--- /dev/null
+++ b/test/Verifier/align-md.ll
@@ -0,0 +1,59 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @foo()
+
+define void @f1() {
+entry:
+  call i8* @foo(), !align !{i64 2}
+  ret void
+}
+; CHECK: align applies only to load instructions
+; CHECK-NEXT: call i8* @foo()
+
+define i8 @f2(i8* %x) {
+entry:
+  %y = load i8, i8* %x, !align !{i64 2}
+  ret i8 %y
+}
+; CHECK: align applies only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8* @f3(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !align !{}
+  ret i8* %y
+}
+; CHECK: align takes one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f4(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !align !{!"str"}
+  ret i8* %y
+}
+; CHECK: align metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f5(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !align !{i32 2}
+  ret i8* %y
+}
+; CHECK: align metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f6(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !align !{i64 3}
+  ret i8* %y
+}
+; CHECK: align metadata value must be a power of 2!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f7(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !align !{i64 1073741824}
+  ret i8* %y
+}
+; CHECK: alignment is larger that implementation defined limit
+; CHECK-NEXT: load i8*, i8** %x
\ No newline at end of file
diff --git a/test/Verifier/atomics.ll b/test/Verifier/atomics.ll
new file mode 100644
index 000000000000..e49a0eb7beb2
--- /dev/null
+++ b/test/Verifier/atomics.ll
@@ -0,0 +1,14 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; CHECK: atomic store operand must have integer, pointer, or floating point type!
+; CHECK: atomic load operand must have integer, pointer, or floating point type!
+
+define void @foo(x86_mmx* %P, x86_mmx %v) {
+  store atomic x86_mmx %v, x86_mmx* %P unordered, align 8
+  ret void
+}
+
+define x86_mmx @bar(x86_mmx* %P) {
+  %v = load atomic x86_mmx, x86_mmx* %P unordered, align 8
+  ret x86_mmx %v
+}
diff --git a/test/Verifier/bitcast-alias-address-space.ll b/test/Verifier/bitcast-alias-address-space.ll
index d9794d9e338a..d5f2266aa619 100644
--- a/test/Verifier/bitcast-alias-address-space.ll
+++ b/test/Verifier/bitcast-alias-address-space.ll
@@ -7,4 +7,4 @@ target datalayout = "e-p:32:32:32-p1:16:16:16-p2:32:32:32-i1:8:32-i8:8:32-i16:16
 
 @data = addrspace(2) global i32 27
 
-@illegal_alias_data = alias bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
+@illegal_alias_data = alias i32, bitcast (i32 addrspace(2)* @data to i32 addrspace(1)*)
diff --git a/test/Verifier/dbg-null-retained-type.ll b/test/Verifier/dbg-null-retained-type.ll
new file mode 100644
index 000000000000..f0368c8c4877
--- /dev/null
+++ b/test/Verifier/dbg-null-retained-type.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
+; CHECK:      assembly parsed, but does not verify
+; CHECK-NEXT: invalid retained type
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(file: !2, language: DW_LANG_C99, retainedTypes: !3)
+!2 = !DIFile(filename: "file.c", directory: "/path/to/dir")
+!3 = !{null}
diff --git a/test/Verifier/dbg-typerefs.ll b/test/Verifier/dbg-typerefs.ll
index dc50fce0715c..2370f8b64e5b 100644
--- a/test/Verifier/dbg-typerefs.ll
+++ b/test/Verifier/dbg-typerefs.ll
@@ -14,7 +14,7 @@
 
 ; Add a minimal compile unit to resolve some of the type references.
 !llvm.dbg.cu = !{!5}
-!5 = !DICompileUnit(file: !6, language: DW_LANG_C99, retainedTypes: !7)
+!5 = distinct !DICompileUnit(file: !6, language: DW_LANG_C99, retainedTypes: !7)
 !6 = !DIFile(filename: "file.c", directory: "/path/to/dir")
 !7 = !{!8, !9}
 !8 = !DICompositeType(tag: DW_TAG_structure_type, identifier: "1.good")
diff --git a/test/Verifier/dbg.ll b/test/Verifier/dbg.ll
index 395806b1299a..d5728a4e8272 100644
--- a/test/Verifier/dbg.ll
+++ b/test/Verifier/dbg.ll
@@ -2,7 +2,7 @@
 
 define void @foo() {
 entry:
-  br label %exit, !dbg !DILocation(scope: !DISubprogram(), inlinedAt: !{})
+  br label %exit, !dbg !DILocation(scope: !1, inlinedAt: !{})
 ; CHECK: inlined-at should be a location
 ; CHECK-NEXT: !{{[0-9]+}} = !DILocation(line: 0, scope: !{{[0-9]+}}, inlinedAt: ![[IA:[0-9]+]])
 ; CHECK-NEXT: ![[IA]] = !{}
@@ -16,3 +16,4 @@ exit:
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/dereferenceable-md.ll b/test/Verifier/dereferenceable-md.ll
new file mode 100644
index 000000000000..94c89c332022
--- /dev/null
+++ b/test/Verifier/dereferenceable-md.ll
@@ -0,0 +1,86 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+declare i8* @foo()
+
+define void @f1() {
+entry:
+  call i8* @foo(), !dereferenceable !{i64 2}
+  ret void
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to load instructions, use attributes for calls or invokes
+; CHECK-NEXT: call i8* @foo()
+
+define void @f2() {
+entry:
+  call i8* @foo(), !dereferenceable_or_null !{i64 2}
+  ret void
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to load instructions, use attributes for calls or invokes
+; CHECK-NEXT: call i8* @foo()
+
+define i8 @f3(i8* %x) {
+entry:
+  %y = load i8, i8* %x, !dereferenceable !{i64 2}
+  ret i8 %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8 @f4(i8* %x) {
+entry:
+  %y = load i8, i8* %x, !dereferenceable_or_null !{i64 2}
+  ret i8 %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null apply only to pointer types
+; CHECK-NEXT: load i8, i8* %x
+
+define i8* @f5(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable !{}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null take one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f6(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable_or_null !{}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null take one operand
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f7(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable !{!"str"}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f8(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable_or_null !{!"str"}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+define i8* @f9(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable !{i32 2}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
+
+
+define i8* @f10(i8** %x) {
+entry:
+  %y = load i8*, i8** %x, !dereferenceable_or_null !{i32 2}
+  ret i8* %y
+}
+; CHECK: dereferenceable, dereferenceable_or_null metadata value must be an i64!
+; CHECK-NEXT: load i8*, i8** %x
\ No newline at end of file
diff --git a/test/Verifier/func-dbg.ll b/test/Verifier/func-dbg.ll
new file mode 100644
index 000000000000..e56de94d18c9
--- /dev/null
+++ b/test/Verifier/func-dbg.ll
@@ -0,0 +1,25 @@
+; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
+
+define i32 @foo() !dbg !4 {
+entry:
+  ret i32 0, !dbg !6
+}
+
+define i32 @bar() !dbg !5 {
+entry:
+; CHECK: !dbg attachment points at wrong subprogram for function
+  ret i32 0, !dbg !6
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 0, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!1 = !DIFile(filename: "dwarf-test.c", directory: "test")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = distinct !DISubprogram(name: "foo", scope: !0, isDefinition: true)
+!5 = distinct !DISubprogram(name: "bar", scope: !0, isDefinition: true)
+!6 = !DILocation(line: 7, scope: !4)
+!7 = !{i32 2, !"Dwarf Version", i32 3}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Verifier/gc_relocate_addrspace.ll b/test/Verifier/gc_relocate_addrspace.ll
index ddc1b230a8c2..ccf1fbbe95ca 100644
--- a/test/Verifier/gc_relocate_addrspace.ll
+++ b/test/Verifier/gc_relocate_addrspace.ll
@@ -3,21 +3,21 @@
 ; address space with the relocated value.
 
 ; CHECK:       gc.relocate: relocating a pointer shouldn't change its address space
-; CHECK-NEXT:  %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(i32 %safepoint_token, i32 7, i32 7) ;
+; CHECK-NEXT:  %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(token %safepoint_token, i32 7, i32 7) ;
 
 declare void @foo()
 
 ; Function Attrs: nounwind
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) #0
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) #0
 
 define void @test1(i64 addrspace(1)* %obj) gc "statepoint-example" {
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj)
-  %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 addrspace(1)* %obj)
+  %obj.relocated = call coldcc i8* @llvm.experimental.gc.relocate.p0i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
   ret void
 }
 
 ; Function Attrs: nounwind
-declare i8* @llvm.experimental.gc.relocate.p0i8(i32, i32, i32) #0
+declare i8* @llvm.experimental.gc.relocate.p0i8(token, i32, i32) #0
 
 attributes #0 = { nounwind }
diff --git a/test/Verifier/gc_relocate_operand.ll b/test/Verifier/gc_relocate_operand.ll
index c28b8d870365..f7c919ec1e93 100644
--- a/test/Verifier/gc_relocate_operand.ll
+++ b/test/Verifier/gc_relocate_operand.ll
@@ -5,17 +5,17 @@
 
 declare void @foo()
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 define void @test1(i64 %obj) gc "statepoint-example" {
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 %obj)
-  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, i64 %obj)
+  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
   ret void
 }
 
 ; Function Attrs: nounwind
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32) #0
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32) #0
 
 attributes #0 = { nounwind }
 
diff --git a/test/Verifier/gc_relocate_return.ll b/test/Verifier/gc_relocate_return.ll
index 3957d4c0cec8..77207f6c47b2 100644
--- a/test/Verifier/gc_relocate_return.ll
+++ b/test/Verifier/gc_relocate_return.ll
@@ -6,17 +6,17 @@
 
 declare void @foo()
 
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 
 define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
 entry:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, <2 x i32 addrspace(1)*> addrspace(1)* %obj)
-  %obj.relocated = call coldcc i8 @llvm.experimental.gc.relocate.i8(i32 %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0, <2 x i32 addrspace(1)*> addrspace(1)* %obj)
+  %obj.relocated = call coldcc i8 @llvm.experimental.gc.relocate.i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
   ret void
 }
 
 ; Function Attrs: nounwind
-declare i8 @llvm.experimental.gc.relocate.i8(i32, i32, i32) #0
+declare i8 @llvm.experimental.gc.relocate.i8(token, i32, i32) #0
 
 attributes #0 = { nounwind }
 
diff --git a/test/Verifier/invalid-eh.ll b/test/Verifier/invalid-eh.ll
new file mode 100644
index 000000000000..906b24a15c30
--- /dev/null
+++ b/test/Verifier/invalid-eh.ll
@@ -0,0 +1,38 @@
+; RUN: sed -e s/.T1:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: sed -e s/.T2:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s
+; RUN: sed -e s/.T3:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK3 %s
+; RUN: sed -e s/.T4:// %s | not llvm-as -disable-output 2>&1 | FileCheck --check-prefix=CHECK4 %s
+
+;T1: define void @f() {
+;T1:   entry:
+;T1:     catchret from undef to label %next
+;T1:     ; CHECK1: CatchReturnInst needs to be provided a CatchPad
+;T1:   next:
+;T1:     unreachable
+;T1: }
+
+;T2: define void @f() {
+;T2:   entry:
+;T2:     %x = cleanuppad within none []
+;T2:     ; catchret's first operand's operator must be catchpad
+;T2:     catchret from %x to label %entry
+;T2:     ; CHECK2: CatchReturnInst needs to be provided a CatchPad
+;T2: }
+
+;T3: define void @f() {
+;T3:   entry:
+;T3:     cleanupret from undef unwind label %next
+;T3:     ; CHECK3: CleanupReturnInst needs to be provided a CleanupPad
+;T3:   next:
+;T3:     unreachable
+;T3: }
+
+;T4: define void @f() {
+;T4:   entry:
+;T4:     %cs = catchswitch within none [label %next] unwind to caller
+;T4:   next:
+;T4:     %x = catchpad within %cs []
+;T4:     ; cleanupret first operand's operator must be cleanuppad
+;T4:     cleanupret from %x unwind to caller
+;T4:     ; CHECK4: CleanupReturnInst needs to be provided a CleanupPad
+;T4: }
diff --git a/test/Verifier/invalid-patchable-statepoint.ll b/test/Verifier/invalid-patchable-statepoint.ll
deleted file mode 100644
index 4783fa57f8fa..000000000000
--- a/test/Verifier/invalid-patchable-statepoint.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: not opt -verify 2>&1 < %s | FileCheck %s
-
-; CHECK: gc.statepoint must have null as call target if number of patchable bytes is non zero
-
-define i1 @invalid_patchable_statepoint() gc "statepoint-example" {
-entry:
-  %safepoint_token = tail call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 3, i1 ()* @func, i32 0, i32 0, i32 0, i32 0)
-  %call1 = call i1 @llvm.experimental.gc.result.i1(i32 %safepoint_token)
-  ret i1 %call1
-}
-
-declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
-declare i1 @llvm.experimental.gc.result.i1(i32)
-declare i1 @func()
diff --git a/test/Verifier/invalid-statepoint.ll b/test/Verifier/invalid-statepoint.ll
index 6c3525a16df5..6b4bc087b3e0 100644
--- a/test/Verifier/invalid-statepoint.ll
+++ b/test/Verifier/invalid-statepoint.ll
@@ -5,15 +5,15 @@
 declare zeroext i1 @return0i1()
 
 ; Function Attrs: nounwind
-declare i32 @llvm.experimental.gc.statepoint.p0f0i1f(i64, i32, i1 ()*, i32, i32, ...) #0
+declare token @llvm.experimental.gc.statepoint.p0f0i1f(i64, i32, i1 ()*, i32, i32, ...) #0
 
 ; Function Attrs: nounwind
-declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32, i32, i32) #0
+declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32) #0
 
 define i32 addrspace(1)* @0(i32 addrspace(1)* %dparam) {
   %a00 = load i32, i32 addrspace(1)* %dparam
-  %to0 = call i32 (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f0i1f(i64 0, i32 0, i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
-  %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(i32 %to0, i32 2, i32 6)
+  %to0 = call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f0i1f(i64 0, i32 0, i1 ()* @return0i1, i32 9, i32 0, i2 0, i32 addrspace(1)* %dparam)
+  %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %to0, i32 2, i32 6)
   ret i32 addrspace(1)* %relocate
 }
 
diff --git a/test/Verifier/invalid-statepoint2.ll b/test/Verifier/invalid-statepoint2.ll
index c8b453c31e69..10bcd4f4f318 100644
--- a/test/Verifier/invalid-statepoint2.ll
+++ b/test/Verifier/invalid-statepoint2.ll
@@ -3,16 +3,16 @@
 ; CHECK: gc.statepoint: number of deoptimization arguments must be a constant integer
 
 declare void @use(...)
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 declare i32 @"personality_function"()
 
 ;; Basic usage
 define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg, i32 %val) gc "statepoint-example" {
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
-  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 %val, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
   ret i64 addrspace(1)* %reloc
 }
diff --git a/test/Verifier/invoke.ll b/test/Verifier/invoke.ll
index b56b72f84b9e..8fa9923c0cb8 100644
--- a/test/Verifier/invoke.ll
+++ b/test/Verifier/invoke.ll
@@ -2,7 +2,7 @@
 
 ; PR1042
 define i32 @foo() {
-; CHECK: The unwind destination does not have a landingpad instruction
+; CHECK: The unwind destination does not have an exception handling instruction
 	%A = invoke i32 @foo( )
 			to label %L unwind label %L		; <i32> [#uses=1]
 L:		; preds = %0, %0
@@ -18,7 +18,7 @@ L1:		; preds = %0
 L2:		; preds = %0
 	br label %L
 L:		; preds = %L2, %L1, %L1
-; CHECK: The unwind destination does not have a landingpad instruction
+; CHECK: The unwind destination does not have an exception handling instruction
 	ret i32 %A
 }
 
diff --git a/test/Verifier/llvm.dbg.declare-address.ll b/test/Verifier/llvm.dbg.declare-address.ll
index ba132ad53465..90cf72aea681 100644
--- a/test/Verifier/llvm.dbg.declare-address.ll
+++ b/test/Verifier/llvm.dbg.declare-address.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.declare(metadata !"", metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.declare(metadata !"", metadata !DILocalVariable(scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
   ret void
 }
 
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-expression.ll b/test/Verifier/llvm.dbg.declare-expression.ll
index 4c1bd65230a9..54ee1f750d4a 100644
--- a/test/Verifier/llvm.dbg.declare-expression.ll
+++ b/test/Verifier/llvm.dbg.declare-expression.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.declare(metadata i32* %s, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !"")
+  call void @llvm.dbg.declare(metadata i32* %s, metadata !DILocalVariable(scope: !1), metadata !"")
   ret void
 }
 
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.declare-variable.ll b/test/Verifier/llvm.dbg.declare-variable.ll
index deef50fd717d..6f415b7c1fa0 100644
--- a/test/Verifier/llvm.dbg.declare-variable.ll
+++ b/test/Verifier/llvm.dbg.declare-variable.ll
@@ -14,4 +14,4 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
index 717794b4274b..9612643aa9d3 100644
--- a/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
+++ b/test/Verifier/llvm.dbg.intrinsic-dbg-attachment.ll
@@ -4,7 +4,7 @@ entry:
   call void @llvm.dbg.value(
       metadata i8* undef,
       i64 0,
-      metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !DILocalVariable(scope: !1),
       metadata !DIExpression())
 ; CHECK-LABEL: llvm.dbg.value intrinsic requires a !dbg attachment
 ; CHECK-NEXT: call void @llvm.dbg.value({{.*}})
@@ -13,7 +13,7 @@ entry:
 
   call void @llvm.dbg.declare(
       metadata i8* undef,
-      metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !DILocalVariable(scope: !1),
       metadata !DIExpression())
 ; CHECK-LABEL: llvm.dbg.declare intrinsic requires a !dbg attachment
 ; CHECK-NEXT: call void @llvm.dbg.declare({{.*}})
@@ -23,7 +23,7 @@ entry:
   call void @llvm.dbg.value(
       metadata i8* undef,
       i64 0,
-      metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !DILocalVariable(scope: !1),
       metadata !DIExpression()),
     !dbg !DILocation(scope: !2)
 ; CHECK-LABEL: mismatched subprogram between llvm.dbg.value variable and !dbg attachment
@@ -31,13 +31,13 @@ entry:
 ; CHECK-NEXT: label %entry
 ; CHECK-NEXT: void ()* @foo
 ; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
-; CHECK-NEXT: ![[VARSP]] = !DISubprogram(
+; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram(
 ; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]]
-; CHECK-NEXT: ![[LOCSP]] = !DISubprogram(
+; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram(
 
   call void @llvm.dbg.declare(
       metadata i8* undef,
-      metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1),
+      metadata !DILocalVariable(scope: !1),
       metadata !DIExpression()),
     !dbg !DILocation(scope: !2)
 ; CHECK-LABEL: mismatched subprogram between llvm.dbg.declare variable and !dbg attachment
@@ -45,9 +45,9 @@ entry:
 ; CHECK-NEXT: label %entry
 ; CHECK-NEXT: void ()* @foo
 ; CHECK-NEXT: ![[VAR]] = !DILocalVariable({{.*}}scope: ![[VARSP:[0-9]+]]
-; CHECK-NEXT: ![[VARSP]] = !DISubprogram(
+; CHECK-NEXT: ![[VARSP]] = distinct !DISubprogram(
 ; CHECK-NEXT: ![[LOC]] = !DILocation({{.*}}scope: ![[LOCSP:[0-9]+]]
-; CHECK-NEXT: ![[LOCSP]] = !DISubprogram(
+; CHECK-NEXT: ![[LOCSP]] = distinct !DISubprogram(
 
   ret void
 }
@@ -57,5 +57,5 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram(name: "foo")
-!2 = !DISubprogram(name: "bar")
+!1 = distinct !DISubprogram(name: "foo")
+!2 = distinct !DISubprogram(name: "bar")
diff --git a/test/Verifier/llvm.dbg.value-expression.ll b/test/Verifier/llvm.dbg.value-expression.ll
index 78a7c50f233d..dd3c29f91073 100644
--- a/test/Verifier/llvm.dbg.value-expression.ll
+++ b/test/Verifier/llvm.dbg.value-expression.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !""), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.value(metadata i32* %s, i64 0, metadata !DILocalVariable(scope: !1), metadata !""), !dbg !DILocation(scope: !1)
   ret void
 }
 
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.value-value.ll b/test/Verifier/llvm.dbg.value-value.ll
index 1acb5f8132fa..e1d02de484c6 100644
--- a/test/Verifier/llvm.dbg.value-value.ll
+++ b/test/Verifier/llvm.dbg.value-value.ll
@@ -6,7 +6,7 @@
 define void @foo(i32 %a) {
 entry:
   %s = alloca i32
-  call void @llvm.dbg.value(metadata !"", i64 0, metadata !DILocalVariable(tag: DW_TAG_arg_variable, scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
+  call void @llvm.dbg.value(metadata !"", i64 0, metadata !DILocalVariable(scope: !1), metadata !DIExpression()), !dbg !DILocation(scope: !1)
   ret void
 }
 
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/llvm.dbg.value-variable.ll b/test/Verifier/llvm.dbg.value-variable.ll
index 66329f9fdaa7..745f7ada5873 100644
--- a/test/Verifier/llvm.dbg.value-variable.ll
+++ b/test/Verifier/llvm.dbg.value-variable.ll
@@ -14,4 +14,4 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"Debug Info Version", i32 3}
-!1 = !DISubprogram()
+!1 = distinct !DISubprogram()
diff --git a/test/Verifier/metadata-function-dbg.ll b/test/Verifier/metadata-function-dbg.ll
new file mode 100644
index 000000000000..2a6fd8bbb48e
--- /dev/null
+++ b/test/Verifier/metadata-function-dbg.ll
@@ -0,0 +1,23 @@
+; RUN: not llvm-as %s -disable-output 2>&1 | FileCheck %s
+
+define void @foo() !dbg !4 !dbg !4 {
+  unreachable
+}
+
+; CHECK-NOT:  !dbg
+; CHECK:      function !dbg attachment must be a subprogram
+; CHECK-NEXT: void ()* @bar
+; CHECK-NEXT: !{{[0-9]+}} = !{}
+define void @bar() !dbg !6 {
+  unreachable
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+
+!llvm.dbg.cu = !{!1}
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, subprograms: !3)
+!2 = !DIFile(filename: "t.c", directory: "/path/to/dir")
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !2)
+!6 = !{}
diff --git a/test/Verifier/operand-bundles.ll b/test/Verifier/operand-bundles.ll
new file mode 100644
index 000000000000..d822568a0445
--- /dev/null
+++ b/test/Verifier/operand-bundles.ll
@@ -0,0 +1,49 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; Operand bundles uses are like regular uses, and need to be dominated
+; by their defs.
+
+declare void @g()
+
+define void @f0(i32* %ptr) {
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %x = add i32 42, 1
+; CHECK-NEXT:  call void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+ entry:
+  %l = load i32, i32* %ptr
+  call void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.0, i64 100, i32 %l) ]
+  %x = add i32 42, 1
+  ret void
+}
+
+define void @f1(i32* %ptr) personality i8 3 {
+; CHECK: Instruction does not dominate all uses!
+; CHECK-NEXT:  %x = add i32 42, 1
+; CHECK-NEXT:  invoke void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.000000e+00, i64 100, i32 %l) ]
+
+ entry:
+  %l = load i32, i32* %ptr
+  invoke void @g() [ "foo"(i32 42, i64 100, i32 %x), "bar"(float 0.0, i64 100, i32 %l) ] to label %normal unwind label %exception
+
+exception:
+  %cleanup = landingpad i8 cleanup
+  br label %normal
+
+normal:
+  %x = add i32 42, 1
+  ret void
+}
+
+define void @f_deopt(i32* %ptr) {
+; CHECK: Multiple deopt operand bundles
+; CHECK-NEXT: call void @g() [ "deopt"(i32 42, i64 100, i32 %x), "deopt"(float 0.000000e+00, i64 100, i32 %l) ]
+; CHECK-NOT: call void @g() [ "deopt"(i32 42, i64 120, i32 %x) ]
+
+ entry:
+  %l = load i32, i32* %ptr
+  call void @g() [ "deopt"(i32 42, i64 100, i32 %x), "deopt"(float 0.0, i64 100, i32 %l) ]
+  call void @g() [ "deopt"(i32 42, i64 120) ]  ;; The verifier should not complain about this one
+  %x = add i32 42, 1
+  ret void
+}
diff --git a/test/Verifier/statepoint.ll b/test/Verifier/statepoint.ll
index 2807620f79ea..c07a85b9bd36 100644
--- a/test/Verifier/statepoint.ll
+++ b/test/Verifier/statepoint.ll
@@ -1,20 +1,20 @@
 ; RUN: opt -S %s -verify | FileCheck %s
 
 declare void @use(...)
-declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32, i32, i32)
-declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32, i32, i32)
-declare i32 @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token, i32, i32)
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 declare i32 @"personality_function"()
 
 ;; Basic usage
 define i64 addrspace(1)* @test1(i8 addrspace(1)* %arg) gc "statepoint-example" {
 entry:
   %cast = bitcast i8 addrspace(1)* %arg to i64 addrspace(1)*
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
-  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
   ;; It is perfectly legal to relocate the same value multiple times...
-  %reloc2 = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
-  %reloc3 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %safepoint_token, i32 13, i32 12)
+  %reloc2 = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
+  %reloc3 = call i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 13, i32 12)
   ret i64 addrspace(1)* %reloc
 ; CHECK-LABEL: test1
 ; CHECK: statepoint
@@ -39,8 +39,8 @@ notequal:
   ret void
 
 equal:
-  %safepoint_token = call i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
-  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(i32 %safepoint_token, i32 12, i32 13)
+  %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 10, i32 0, i8 addrspace(1)* %arg, i64 addrspace(1)* %cast, i8 addrspace(1)* %arg, i8 addrspace(1)* %arg)
+  %reloc = call i64 addrspace(1)* @llvm.experimental.gc.relocate.p1i64(token %safepoint_token, i32 12, i32 13)
   call void undef(i64 addrspace(1)* %reloc)
   ret void
 ; CHECK-LABEL: test2
@@ -57,7 +57,7 @@ define i8 addrspace(1)* @test3(i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1) gc
 entry:
   ; CHECK-LABEL: entry
   ; CHECK: statepoint
-  %0 = invoke i32 (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1)
+  %0 = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* undef, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0, i8 addrspace(1)* %obj, i8 addrspace(1)* %obj1)
           to label %normal_dest unwind label %exceptional_return
 
 normal_dest:
@@ -65,18 +65,17 @@ normal_dest:
   ; CHECK: gc.relocate
   ; CHECK: gc.relocate
   ; CHECK: ret
-  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12)
-  %obj1.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %0, i32 12, i32 12)
+  %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
+  %obj1.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %0, i32 12, i32 12)
   ret i8 addrspace(1)* %obj.relocated
 
 exceptional_return:
   ; CHECK-LABEL: exceptional_return
   ; CHECK: gc.relocate
   ; CHECK: gc.relocate
-  %landing_pad = landingpad { i8*, i32 }
+  %landing_pad = landingpad token
           cleanup
-  %relocate_token = extractvalue { i8*, i32 } %landing_pad, 1
-  %obj.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %relocate_token, i32 12, i32 12)
-  %obj1.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(i32 %relocate_token, i32 12, i32 12)
+  %obj.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %landing_pad, i32 12, i32 12)
+  %obj1.relocated1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %landing_pad, i32 12, i32 12)
   ret i8 addrspace(1)* %obj1.relocated1
 }
diff --git a/test/Verifier/token1.ll b/test/Verifier/token1.ll
new file mode 100644
index 000000000000..ac7ff30948ea
--- /dev/null
+++ b/test/Verifier/token1.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+  br label %bb
+
+bb:
+  %phi = phi token [ %A, %bb ], [ %B, %entry]
+; CHECK: PHI nodes cannot have token type!
+  br label %bb
+}
diff --git a/test/Verifier/token2.ll b/test/Verifier/token2.ll
new file mode 100644
index 000000000000..b58079de770d
--- /dev/null
+++ b/test/Verifier/token2.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+  br label %bb
+
+bb:
+  %sel = select i1 undef, token %A, token %B
+; CHECK: select values cannot have token type
+  br label %bb
+}
diff --git a/test/Verifier/token3.ll b/test/Verifier/token3.ll
new file mode 100644
index 000000000000..2cce6b83e7fd
--- /dev/null
+++ b/test/Verifier/token3.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A, token %B) {
+entry:
+  alloca token
+; CHECK: invalid type for alloca
+  ret void
+}
diff --git a/test/Verifier/token4.ll b/test/Verifier/token4.ll
new file mode 100644
index 000000000000..87a8b14efa00
--- /dev/null
+++ b/test/Verifier/token4.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+@GV = external global token
+; CHECK: invalid type for global variable
diff --git a/test/Verifier/token5.ll b/test/Verifier/token5.ll
new file mode 100644
index 000000000000..6fc1b045375f
--- /dev/null
+++ b/test/Verifier/token5.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f(token %A) {
+entry:
+  ret void
+}
+; CHECK: Function takes token but isn't an intrinsic
diff --git a/test/Verifier/token6.ll b/test/Verifier/token6.ll
new file mode 100644
index 000000000000..9614b91db737
--- /dev/null
+++ b/test/Verifier/token6.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define token @f() {
+entry:
+  ret token undef
+}
+; CHECK: Functions returns a token but isn't an intrinsic
diff --git a/test/Verifier/token7.ll b/test/Verifier/token7.ll
new file mode 100644
index 000000000000..939878cc4275
--- /dev/null
+++ b/test/Verifier/token7.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
+
+define void @f() {
+entry:
+  call token () undef ()
+  ret void
+}
+; CHECK: Return type cannot be token for indirect call!
diff --git a/test/lit.cfg b/test/lit.cfg
index 6d3c41256422..36b4c7044083 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -221,6 +221,32 @@ else:
 # also have a post-assertion to not match a trailing hyphen (foo-).
 NOJUNK = r"(?<!\.|-|\^|/)"
 
+
+def find_tool_substitution(pattern):
+    # Extract the tool name from the pattern.  This relies on the tool
+    # name being surrounded by \b word match operators.  If the
+    # pattern starts with "| ", include it in the string to be
+    # substituted.
+    tool_match = re.match(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
+                          pattern)
+    tool_pipe = tool_match.group(2)
+    tool_name = tool_match.group(4)
+    # Did the user specify the tool path + arguments? This allows things like
+    # llvm-lit "-Dllc=llc -enable-misched -verify-machineinstrs"
+    tool_path = lit_config.params.get(tool_name)
+    if tool_path is None:
+        tool_path = lit.util.which(tool_name, llvm_tools_dir)
+        if tool_path is None:
+            return tool_name, tool_path, tool_pipe
+    if (tool_name == "llc" and
+       'LLVM_ENABLE_MACHINE_VERIFIER' in os.environ and
+       os.environ['LLVM_ENABLE_MACHINE_VERIFIER'] == "1"):
+        tool_path += " -verify-machineinstrs"
+    if (tool_name == "llvm-go"):
+        tool_path += " go=" + config.go_executable
+    return tool_name, tool_path, tool_pipe
+
+
 for pattern in [r"\bbugpoint\b(?!-)",
                 NOJUNK + r"\bllc\b",
                 r"\blli\b",
@@ -235,7 +261,6 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-dsymutil\b",
                 r"\bllvm-dwarfdump\b",
                 r"\bllvm-extract\b",
-                r"\bllvm-go\b",
                 r"\bllvm-lib\b",
                 r"\bllvm-link\b",
                 r"\bllvm-lto\b",
@@ -248,12 +273,13 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-readobj\b",
                 r"\bllvm-rtdyld\b",
                 r"\bllvm-size\b",
+                r"\bllvm-split\b",
                 r"\bllvm-tblgen\b",
                 r"\bllvm-c-test\b",
-                r"\bmacho-dump\b",
                 NOJUNK + r"\bopt\b",
                 r"\bFileCheck\b",
                 r"\bobj2yaml\b",
+                NOJUNK + r"\bsancov\b",
                 r"\byaml2obj\b",
                 r"\byaml-bench\b",
                 r"\bverify-uselistorder\b",
@@ -261,29 +287,29 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 # for during testing.
                 r"\| \bcount\b",
                 r"\| \bnot\b"]:
-    # Extract the tool name from the pattern.  This relies on the tool
-    # name being surrounded by \b word match operators.  If the
-    # pattern starts with "| ", include it in the string to be
-    # substituted.
-    tool_match = re.match(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
-                          pattern)
-    tool_pipe = tool_match.group(2)
-    tool_name = tool_match.group(4)
-    # Did the user specify the tool path + arguments? This allows things like
-    # llvm-lit "-Dllc=llc -enable-misched -verify-machineinstrs"
-    tool_path = lit_config.params.get(tool_name)
-    if tool_path is None:
-        tool_path = lit.util.which(tool_name, llvm_tools_dir)
-    if tool_path is None:
+    tool_name, tool_path, tool_pipe = find_tool_substitution(pattern)
+    if not tool_path:
         # Warn, but still provide a substitution.
         lit_config.note('Did not find ' + tool_name + ' in ' + llvm_tools_dir)
         tool_path = llvm_tools_dir + '/' + tool_name
-    if (tool_name == "llc" and
-       'LLVM_ENABLE_MACHINE_VERIFIER' in os.environ and
-       os.environ['LLVM_ENABLE_MACHINE_VERIFIER'] == "1"):
-        tool_path += " -verify-machineinstrs"
     config.substitutions.append((pattern, tool_pipe + tool_path))
 
+# For tools that are optional depending on the config, we won't warn
+# if they're missing.
+for pattern in [r"\bllvm-go\b",
+                r"\bKaleidoscope-Ch3\b",
+                r"\bKaleidoscope-Ch4\b",
+                r"\bKaleidoscope-Ch5\b",
+                r"\bKaleidoscope-Ch6\b",
+                r"\bKaleidoscope-Ch7\b",
+                r"\bKaleidoscope-Ch8\b"]:
+    tool_name, tool_path, tool_pipe = find_tool_substitution(pattern)
+    if not tool_path:
+        # Provide a substitution anyway, for the sake of consistent errors.
+        tool_path = llvm_tools_dir + '/' + tool_name
+    config.substitutions.append((pattern, tool_pipe + tool_path))
+
+
 ### Targets
 
 config.targets = frozenset(config.targets_to_build.split())
@@ -314,6 +340,8 @@ if config.llvm_use_sanitizer == "Address":
 if (config.llvm_use_sanitizer == "Memory" or
         config.llvm_use_sanitizer == "MemoryWithOrigins"):
     config.available_features.add("msan")
+else:
+    config.available_features.add("not_msan")
 if config.llvm_use_sanitizer == "Undefined":
     config.available_features.add("ubsan")
 else:
@@ -324,8 +352,7 @@ if lit_config.params.get("run_long_tests", None) == "true":
     config.available_features.add("long_tests")
 
 # Direct object generation
-# Suppress x86_64-mingw32 while investigating since r219108.
-if not 'hexagon' in config.target_triple and not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
+if not 'hexagon' in config.target_triple:
     config.available_features.add("object-emission")
 
 if config.have_zlib == "1":
@@ -333,7 +360,14 @@ if config.have_zlib == "1":
 else:
     config.available_features.add("nozlib")
 
-# Native compilation: host arch == target arch
+# LLVM can be configured with an empty default triple
+# Some tests are "generic" and require a valid default triple
+if config.target_triple:
+    config.available_features.add("default_triple")
+    if re.match(r'^x86_64.*-linux', config.target_triple):
+      config.available_features.add("x86_64-linux")
+
+# Native compilation: host arch == default triple arch
 # FIXME: Consider cases that target can be executed
 # even if host_triple were different from target_triple.
 if config.host_triple == config.target_triple:
@@ -345,7 +379,7 @@ def have_ld_plugin_support():
     if not os.path.exists(os.path.join(config.llvm_shlib_dir, 'LLVMgold.so')):
         return False
 
-    ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE)
+    ld_cmd = subprocess.Popen([config.gold_executable, '--help'], stdout = subprocess.PIPE, env={'LANG': 'C'})
     ld_out = ld_cmd.stdout.read().decode()
     ld_cmd.wait()
 
@@ -366,7 +400,7 @@ def have_ld_plugin_support():
     if 'elf32ppc' in emulations:
         config.available_features.add('ld_emu_elf32ppc')
 
-    ld_version = subprocess.Popen([config.gold_executable, '--version'], stdout = subprocess.PIPE)
+    ld_version = subprocess.Popen([config.gold_executable, '--version'], stdout = subprocess.PIPE, env={'LANG': 'C'})
     if not 'GNU gold' in ld_version.stdout.read().decode():
         return False
     ld_version.wait()
@@ -417,10 +451,18 @@ if 'darwin' == sys.platform:
         config.available_features.add('fma3')
     sysctl_cmd.wait()
 
+if platform.system() in ['Windows'] and re.match(r'.*-win32$', config.target_triple):
+    # For tests that require Windows to run.
+    config.available_features.add('system-windows')
+
 # .debug_frame is not emitted for targeting Windows x64.
 if not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
     config.available_features.add('debug_frame')
 
+# Check if we are embedding timestamps.
+if config.enable_timestamps == '1':
+    config.available_features.add('timestamps')
+
 # Check if we should use gmalloc.
 use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
 if use_gmalloc_str is not None:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 0f5d223a3abf..ae5814f02f41 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -19,10 +19,12 @@ config.ocamlfind_executable = "@OCAMLFIND@"
 config.have_ocamlopt = "@HAVE_OCAMLOPT@"
 config.have_ocaml_ounit = "@HAVE_OCAML_OUNIT@"
 config.ocaml_flags = "@OCAMLFLAGS@"
+config.include_go_tests = "@LLVM_INCLUDE_GO_TESTS@"
 config.go_executable = "@GO_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
 config.enable_assertions = @ENABLE_ASSERTIONS@
 config.targets_to_build = "@TARGETS_TO_BUILD@"
+config.native_target = "@LLVM_NATIVE_ARCH@"
 config.llvm_bindings = "@LLVM_BINDINGS@".split(' ')
 config.host_os = "@HOST_OS@"
 config.host_arch = "@HOST_ARCH@"
@@ -34,6 +36,8 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.have_zlib = "@HAVE_LIBZ@"
 config.have_dia_sdk = @HAVE_DIA_SDK@
 config.enable_ffi = "@LLVM_ENABLE_FFI@"
+config.enable_timestamps = "@ENABLE_TIMESTAMPS@"
+config.test_examples = "@ENABLE_EXAMPLES@"
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map b/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map
new file mode 100644
index 000000000000..a23e0c34c3f8
--- /dev/null
+++ b/test/tools/dsymutil/ARM/dummy-debug-map-amr64.map
@@ -0,0 +1,15 @@
+# This is a dummy debug map used for some tests where the contents of the
+# map are just an implementation detail. The tests wanting to use that file
+# should put all there object files in an explicitely named sub-directory
+# of Inputs, and they should be named 1.o, 2.o, ...
+# As not finding an object file or symbols isn't a fatal error for dsymutil,
+# you can extend this file with as much object files and symbols as needed.
+
+---
+triple:          'arm64-apple-darwin'
+objects:
+  - filename: 1.o
+    symbols:
+      - { sym: _bar, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }
+...
+
diff --git a/test/tools/dsymutil/ARM/empty-map.test b/test/tools/dsymutil/ARM/empty-map.test
new file mode 100644
index 000000000000..54d9a35cc6e4
--- /dev/null
+++ b/test/tools/dsymutil/ARM/empty-map.test
@@ -0,0 +1,8 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - 2>&1 | FileCheck %s
+
+---
+triple:          'thumbv7-apple-darwin'
+...
+
+# CHECK: warning: no debug symbols in executable (-arch armv7)
diff --git a/test/tools/dsymutil/ARM/fat-arch-name.test b/test/tools/dsymutil/ARM/fat-arch-name.test
new file mode 100644
index 000000000000..ac7af1e55d55
--- /dev/null
+++ b/test/tools/dsymutil/ARM/fat-arch-name.test
@@ -0,0 +1,21 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -no-output %p/../Inputs/fat-test.arm.dylib -o %t.dSYM -verbose 2>&1 | FileCheck %s
+
+# We detect thumb triples from the binaries, because those are the only ones
+# that are guaranteed to be able to generate a Target instance (for example
+# we would detect armv7m-apple-darwin as non-thumb triple, but you can't
+# instantiate a Target from that). In the user-visible architecture names, and
+# in the lipo invocation, we need to rewrite the thumb arch names to the arm
+# ones.
+
+# CHECK: warning: no debug symbols in executable (-arch armv7)
+
+# CHECK: warning: no debug symbols in executable (-arch armv7s)
+
+# CHECK: warning: no debug symbols in executable (-arch arm64)
+
+# CHECK: Running lipo
+# CHECK-NEXT: lipo -create
+# CHECK-SAME: -segalign armv7
+# CHECK-SAME: -segalign armv7s
+# CHECK-SAME: -segalign arm64
diff --git a/test/tools/dsymutil/ARM/fat-arch-not-found.test b/test/tools/dsymutil/ARM/fat-arch-not-found.test
new file mode 100644
index 000000000000..e15d9b69b5fe
--- /dev/null
+++ b/test/tools/dsymutil/ARM/fat-arch-not-found.test
@@ -0,0 +1,13 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - 2>&1 | FileCheck %s
+
+---
+triple:          'armv7-apple-darwin'
+objects:
+  - filename: libfat-test.a(fat-test.o)
+    symbols:
+      - { sym: _armv7_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: libfat-test.a(fat-test.o): No object file for requested architecture
+
diff --git a/test/tools/dsymutil/ARM/inlined-low_pc.c b/test/tools/dsymutil/ARM/inlined-low_pc.c
new file mode 100644
index 000000000000..7ade33e3e44b
--- /dev/null
+++ b/test/tools/dsymutil/ARM/inlined-low_pc.c
@@ -0,0 +1,15 @@
+/* Compiled with: clang -arch=arm64 -O2 -g -c inlined_low_pc.c */
+
+static int foo(int i) { return 42 + i; }
+int bar(int a) { return foo(a); }
+
+// RUN: llvm-dsymutil -f -y %p/dummy-debug-map-amr64.map -oso-prepend-path %p/../Inputs/inlined-low_pc -o - | llvm-dwarfdump - | FileCheck %s
+
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_AT_low_pc{{.*}}0x0000000000010000
+// CHECK: DW_AT_name{{.*}}"bar"
+// CHECK-NOT: NULL
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}"foo"
+// CHECK-NEXT: DW_AT_low_pc{{.*}}0x0000000000010000
+
diff --git a/test/tools/dsymutil/ARM/lit.local.cfg b/test/tools/dsymutil/ARM/lit.local.cfg
new file mode 100644
index 000000000000..442cd554bfed
--- /dev/null
+++ b/test/tools/dsymutil/ARM/lit.local.cfg
@@ -0,0 +1,7 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.c']
+
diff --git a/test/tools/dsymutil/Inputs/absolute_sym.macho.i386 b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386
new file mode 100755
index 0000000000000000000000000000000000000000..5ca0f2d6868ad4958ea86dcd900762d8bf429b44
GIT binary patch
literal 8592
zcmeHNOKTHR6uxO9*7|65CAQ#Dil~bu#bOJ(&}!7Gw4qilKDeDE({{={VrPQwE+{I3
z3zxbp{sO5W-B<(#!G(XpRhQznh=|`ek0xoF;KtoO^vpfy-1E5in+eRF({I0i{Mjvp
zNP;`S-Qekz5Kn}bNI)I|OPN1^Id>;FIRPc6o>*ue8JEIE%BkG+RNXvd;y8?~y9><P
zBq@Dk$q(gFZ;W3#D#Rg7tYDE?VvSOw8Y@y3J%7=3<|4!PHLM~BYp!ymb;P~LBxS{U
zXgVcnI<qc}+jFcT#}(MukfS~>j^{`WUIZ9Sj9$G^H={w0afmSB{<$WSl(ymPvS{g^
z7wXONSxz8!TWw|y2pGiSk=~dis|mpz*|s^nWxL>7!6M)*Wn+#R<Y2*>wmEFKRI!X2
zj*U6WQI2xk9Hy<$ZKY^hM-@3bv5sn+9K09db9{Yl{95kfSo}<8F@ZRt3Got6d>-i=
zQ$Nzuh4XZ4MLk;hJn()T2H%6eALAy*R~Yw#^I+1Rh<VTVf$Qr*{U<sAuRbv2GZk+!
zW0{3aY1t}4?*(%W+Jm9*gO7&4uHWrj-+1%-*#K<89=f`O0H2I?jHKh(E93avswdVM
z0&^~W-ag=`m#Ec6?HkLUZ`kS4G}d4igg6FY)|%GB-|2}+CoYN<0tx|zfI>hapb$_9
zC<GJ&3W3%L+<d-~AN=?w_nR({Kd0`0y5#aq%5wwH0Hpen@92Do=Q}(3UGzRrIXrVD
z@NQl^b?`h)ibuw0151mG7TRO~TBa8Mk(quxck+zIv%!1Roq?`MA)pXY2q**;0tx|z
zfI>hapb$_9C<GJ&3W5KRz`=e|PL&71*>dM3>UBubn|2Ee#juGi9o;r0zd4-4lszwv
zC8JpJ4LNV<C1X(x+q&t9GyH6j6ll;V#aoT5X0M(`_9RK{fyVL?$g|0W__?`oi{Ay(
zxX}MB<bg=0ojptT`zep(LU|Odfl0KNcmdr8ccY2UZwwaY1imuhuNuf<{-==P9z<v&
zGv<^lzLybt!E-Hc2_5gWy=8AS>&1CDZJruAopyx`_PirVI~vi`Z^A9i!KSVB{7OL<
K@c#yYQ~U+ToYW2g

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o b/test/tools/dsymutil/Inputs/absolute_sym.macho.i386.o
new file mode 100644
index 0000000000000000000000000000000000000000..445e32271cf5a63da5f450bee66e0b6caec17cac
GIT binary patch
literal 2472
zcmb7F&ubJ(6t14>Nn$#-(UB#v!VYX?v!LU|gK;;qPF!`P&SLQQMT(x-8AfKhhnbEj
z2oYr2toR2kdohAHFXGvQ;KF+H?7^!Z{BiT7;`)8n)tyfFDDlDTSM^@KufD2!RsH1M
zn~#G+h&<GQn$QOYd`e;eAlrS!M8Z?VXqitf>TyyA?&V-LNy<})&YfEAv$X}P@{?{;
zT#LF=UU2J+4cYV-eEOt3cy{K*;V7)~kE8ce^LsWFa)&Dy4T><FdcEsj%3?Secoh26
zK$H>N3`VQzxs0ZH+=Z_Sy{<7nfCu*xlELT&T@^@Uyn=TQ`e;2o{Q15Z(|Xb(4`Quk
zcyt}F;r6m?Ss%tcmeKSYdXDtj5`#yQ<`I)XOv<(!IC8$_^uU7s;|GrZ%2p^<U{uf?
z!&;bUZiqK?R1ZN;oUUAHjE=^b)fij*V)U-G>wZg-07@ET1sI(0vc?z(23(W`kG6kd
zvE@dEn#Q=NF_tyPj=nsaZKn}KWOzIT2FgnO4UEr;luoAu8lKbU(E?-beR&Lweekbn
zjD4#-;-iT${Q5;zWN8c&BgcX9EXyN<p?Gwh`839a#!w4dyO<0{aHZ3CEj5f>K<+p<
z1yaLW_-;Wm7<h;jOq$1i&Etl~2-DLT;TyEj#ik;Z#&`;h9q``+1}AQ<)<?{P`=yFy
zco?m4+!$=2_x~pBHuNp%95g8eiGh-Vb+k|a^?Yvf;eUrdaP0Me&GV0rp%J2)s)$Dw
zcZR2-%Ed=HXQ-QK7^8W{=rT_K#_yo|nl-cwrZ6@e+wEc@Z;uXqYd>GIHXB=jkt>SV
zOCx#PvVOLvtrtsJR=zA4s}ARF`i>O$kKnk*Mc)3!vbb!PWln786vlgdBfbXNKn>y4
zdwy#%a3yYPxeYI*pjI(fu!kS#3K%`Z!Y|coXMQhTa=X2z@0F&?m2#ytfhWRi?3}7p
z%9G`(J>|(C(|Eg%c%8+dCl33*Xgf_$G`&D|MD!L$mUC9r^!-0owSm}6ZMEq?IN{|u
zzE!+Ztdsp{uOyOZPtMG;8!7fqN!Sm67#ElRxym|J=L0(#4iU**y3)D-L%wDV_kg;U
zU=S9FAqHRzxPLs#kZ0xEvekU`r&UG&B($de<j4B}VU2MKKR}C6$d{N5VivLu_lPY+
z7*~fzOmi715g($kUT5r~KXL~)7WSZxu(?~?@3s9d<>wOsRZj?QvXAR&a^G-!mBlLK
z_=FI5-f|T}+}7{wom;9n#cjS(dlA2x=<}YF4^!0Tg8c@%DZ~(>%$GO(v8jh#vM!}6
F%umi!4tW3o

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/basic.macho.i386 b/test/tools/dsymutil/Inputs/basic.macho.i386
new file mode 100755
index 0000000000000000000000000000000000000000..ee6be096fe5a1e060efc9db2dc33dc57d4079f85
GIT binary patch
literal 9080
zcmeHNO>7%Q6rLq67;55xatH;QRb{mhfo+<ogi9gN6^&?0fdo~cGF|^fS>&I{UbhJn
zk|hI*WeNGh1;K#}m-dhg2RNl_OM0s4g$o>dppsFGlmmyTM1=3#-ElUKxg#`Bc{|^m
zdGp?z&toaG<D2*Y`1>It!~paGXdL<zj8BDW7{HTIDbGy3ZoOx{{U!`0LkjF|Id2Ur
z<*apfHZ;yTY93nV-3Mh}ij;1C$@Q^s@1Ea2D8vB-Zx0F)*>R&$SEiJ;(nY6~lTK;A
zjLzN~E{_Uv68)F4P?cBBRW0j3iHDRYre>#9FIp3-))3TB-c&78mI|`ET+EaUa?!2y
zo$vW#ZaDXYh@nKvobB4m+FL`!^z%h}V(iS|oNT#RESFSY?>eGE433BEz>>QnWu{u~
zT6^n=nLF$Lw-}7)@1IlCr{AznOsl<+#sKOkpGzQ+`yibAkPvmca9nXqvscIkKTGBx
zfzHBy9<7T25d_>1d<IH65-9iEbI@=;n7d>V^ZSis+li;3F=(n*t)vQ0CY4(*<X}Gu
zWexZ5-W>Yk%NPIp{`y#a_4Z&ofj%x(?0>}5F=ew4@(k9>dEC=sVU7tX$GAQr)<K1_
zlNY()S+2VIV)E4_YH%__9EKNjoic@pqJ1{7$%_`vfM!55pc&8%Xa+O`ngPwgf02Q?
z=AFj9k(L!}%|zF&Etc4v*=$;Wwye!&B+=pj#f@tt=dQMWf&aUWd&3<%p2eJ2djkKu
zV@eBuj~lm|u>>Y}<`~D?=<s;Zn%OX}v=Fg9N!OnnKN`(dLU`Y3TpRx2+|~JHeDo8B
z6o?<hUil4X<BHMrK=X!Qf?sbpJrXxQ-W)^9_~(zpzr(Xe)7tv|3AD_)@*Tm$16Wfm
z(K)`oy^Yb9wdJiTJ{rnbU!Z4{8KZtSo=JHApoCkV4|smyd4YO~hj<7v&!;h*8;1Ix
zXmCz2h2WV)i)KJGpc&8%Xa+O`ngPv#W<WEb8PE)91~dczUj`mKB;FE+#6A2zSzM6$
zrF^#L=H)`(&gCm2U9_E&n3yk@#Y-8xB933M3*t4tZx|5mr}0?x9tVL|O!TDBo49c0
zcrKufcd!Z%1J4f_;_J_To#VTVBp$rePXi|c%>D$FcbCMc@lZI&4oEouG1w~p%fP+y
z*AV}G^fNv&Looh%C^6$J%z8}xK7cm;iRnXszHcF>KXD#<ID&g-Q-}|Vafu%Trcb{*
zFR~v>sY83~d<D6_LO=5oGY<2D&Td|XS!WwQ-vs`|%Kt#fpZHs-38m0S)piMZ5FK3O
zYJeGsucrosb^SmbtZN-OZVY)B%)fZpuk&_*$?cs0^E16mjLUYuU)lghDyFB@O_7=|
zE!Ny>N@VP+lYJqX?e+v6MLWAtPCDb0$C70sy<O;y9PM>In%wQmt%~1%WQEMw9|^fw
zaW2@{&rZqpbB*@~81D(dZSDu)UiVS|c(TtI23axPE5a*UG6;VnAggXIBQyAA3cgm5
Fe*ib5U4j4r

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o b/test/tools/dsymutil/Inputs/basic2-custom-linetable.macho.x86_64.o
new file mode 100644
index 0000000000000000000000000000000000000000..07b36a20c1da565e05e356028842f8b686c8d9bc
GIT binary patch
literal 3144
zcmb7G&2Jk;6rb^WH;(N(P68nXf~^op3$2@mh?caaNlA+XRH{NqbD(T&$4OkAwdIcx
zq5>76$W|&foZ!G8&?D-J3n-<CMqFAXBo3UBH~{Aes)pa2eH+`GL`t9h=FR(<-@KVO
zGwXZ*{{65?2&oA)U@thYqltn+`@tJz+=Wh3ENGSGahAXo#4Y3mnPw@yShD-JK7Mv`
zyrw(7Yuq6`ns(hUL^IjE;wlM;OmjL>N;n$fj|`6w*YpZ^eJnc-Av`trww`Gwi^V#t
z8_!u+{Z`cD(caY)@=GVnvu3eWP-fs=93DHj6|%~`D=eeP&qej{Sf7oT&81YB8!zZe
z;Ojl&C6dWPdO_`9Lw*k43MaK!yo@!Iqap76zU2IVXS`lI9H4MO2>gh4TyG&^&8ATp
zH(sG38iSoe+gLl`CGz=f+O!h$1h*^RI*;EgUM_iA6~vw2AYCX>k1fQ{kCI<Lk#gg`
z=feAEk9egI^E<BhQ^tFp^J`O=j1^ln?67I3a`VWmWR|TDGS;+dzBN)GeV_5B*f<3|
zGR>j5>~i6y=ggS`s$mZ>&Bphjk1v{YPu3WZ;pxoGIen5__akE|MECx!G2S8%x`20u
z=qiBtXg1_WbqiBKzz!s5C%A%m+InA5h#^R*;sTh6*6A=^zXm`G*aN*2ul^BV{wut4
zih}>7rh4{4eEG-lr8_N=A>sJrD|jH``{=<6em;4RLpPLMIlm!qbHv@{yRxd#<n8LZ
zjWiQ$iF`)m6`mcqye6v(^U@vIX-{4Nr?R+!(S>xOn8{humtx0bebED@LfLvh+CMPR
z|4QG%=z(l@Vg5+XcC7E9NG6JzRDUcbj*MFQa;YfFR=Jp-wjb?`RYJg$$QLpTiBj4m
z-S!_9Gr63|SSgVx%nI2mp9{6?S}1Hh6>61j$jLJxw9sh#>EK}CM8Jd6Xgx5z8VH~c
zUfbt*^q8PALw+qprqd{x5d&n+)Eguue5Z8b;i%Del(umonZ$2kyPQ6CT7u#jEJQ=9
zwVP|)_r%c|0ZVB#YZbO?^tsc$-bu%)HEY+|=Y7{PdYd)pTcfH$0Z((3oR~vT`lS(N
z%gmN1Po8$EW^M9(afJ@LY$sLt$VcSlCnxAjEJMm0e|tDiYoZfNj15slHmNK^Y=TIq
z(@j~C?*Dc0#1B)IK-;Zb@oKRWh+M4%E+6{kA*D!@)3Uz;-voaPR;fdl!9<f_!hDky
zP#jEpj1-Vs5abP|387L(|2x)c$0&Y^b!Q!9BXX~Ua(>hp!g@@h*=wi$(69qu{$}-I
z%>(d{gDI9av<JMMxP(q}TLDq-owMaOzf-!$ckw&f$)zzN7sE0fZ1SxFg|eVIzxj{W
zy}9WeAu${Zf9`p0A61%&;wbBz9LITxo#L{HyU%f)&$hS{Vmu;(R?1E9KD3h71eUc&
z0S5f5+j<_gW~n#%Bf4h1tV`orU1&{b^pO9e-ex?hw|40M4t;;O{zOdg>DD7-y4Il!
zLtC2A{c6k`9uH>u1c(6a!e1FB>2!g*&a*rNi6jmWLSpTQUxdV8kW=*klyv?abp2$<
z8Q-&{6O3`jD$9>q(mZ6Jhs3QgXg<tuoMibt`=4Ps#4^efLXAJf5?CSxfa~M{b%R4N
zI`v16B7}*Yx1key3;a<?B07bnJJd@$@aPs2(IZyp7F{k~nDio>^-nS^1!2;IY%V0S
S0?%|x;Ok#|$5}Z_`2PdgEnThv

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/basic2.c b/test/tools/dsymutil/Inputs/basic2.c
index 13c6d07c2a08..4524e9bc6cc3 100644
--- a/test/tools/dsymutil/Inputs/basic2.c
+++ b/test/tools/dsymutil/Inputs/basic2.c
@@ -20,3 +20,9 @@ int foo(int arg) {
   return bar(arg+val) + inc() + baz++;
 }
 
+/* This file was also used to create basic2-custom-linetable.macho.x86_64.o
+   with a custom clang that had different settings for the linetable
+   encoding constants: line_base == -1 and line_range == 4.
+
+   clang -c -g basic2.c -o basic2-custom-linetable.macho.x86_64.o 
+*/
diff --git a/test/tools/dsymutil/Inputs/dead-stripped/1.o b/test/tools/dsymutil/Inputs/dead-stripped/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..fbdffbc61a77479281e8ea3ca1455cfa196661d0
GIT binary patch
literal 3200
zcma)7Uu=_A6hF7^_tCXoR~#q?aE%!e473|e`6JN|HZqrGK{n<PmydR9N3m<0wwr@M
zAb>c64@CArjE_8EqAw;I!;3MP=!-$)3oj;OVxppniBF=j{?7f*?YeG+oqXpzf9|=z
zd(QoP=fNNUts+uh!Vl~NM=kz=K~I8jvhyL-W4jFr?gKJ5Fo`+LPLShN(z6v;ckSWc
z(cuN#Xa_`LJRbI9kjTS6ADv@i$Z;l;m87v4zo&b+d%^A#)ucWLJdLvuvPc77IyF7%
zlq)6e2i~jQhfFp^0}}m_$akdPI*>*11}|3_Ul#9g>DVuFx6I?C@RF%iDLtd>U6kK`
zUOdouuAmDm#LMP01@6*V?;5GbFSx_Jn4o#A2R^1{>n$bolj-tOybZj;>gaWUo#Cnt
zc*$Zhmv-{WDTZ5;pTYZ6U{;D(NS)9N(pYauHcGqT4K#_eAReAyF*)9d*CBX&rGIgS
zc$JgIW$VoeUPkafd?em@VG4Owoay|V+5Cj#4D>9NLUdQ~j)`+!fQXmqcPy?fRYSe$
z<4&f8)o?pF&f@dCOU}bp!J8F4F?r7%H#*DO`N-xqV&nXMEqJG;lV3KSFXY}+a4*WQ
z9gYac1~D**8`ptDi04uVAiE$L<{-G9xLu~B-n}CIaNp&hujpLIZnv~0i0z?+S2l9z
zx2lz3_0BK9R;yKY_rB-y-M$N7`j3v?&SWyNmbQx;pX0}F6Fm!SJcq**ok^F<*+M?L
zBepBn5#3xVP3PZ;ZtvRJvHiI%(apKs%v8M2*SUq_m8oKUBAuMrhRZ8kET$)7<HaJy
z50`-y&y=#|G7VBHmpo3{e1)9X20I74*zBcDq2Ss(p!LpBDp?X6TJH(y+%Q8~n$DNA
zllk;Slp$u4Id`&=^wYWw-ZY1=Et$j((zvY_G_*8GtyY`Dt(G?&vDb(1oeiy4kKxks
z>{j0Jp5P0-AG^pKe!2Cf5SyOB+X*}>z8G;_BGB1NWW@+hH=N@D5=B2d2563HY{b)K
zk;gV2i2PlP%%+Xh*~(o<gGYc9Xvg?327^$8dx(=*he1OogBrRX7?t`seO<A8d-`an
z)ISI*7b=l?Ts%kF+u|LA_?}f2Y4V!)i_d#)TCs;UsgX%Y3(o-YYo#7{yD3|r0C;$(
zGpUhF_@8I(DK|!6<L_w)XX4)k^#eBBxV?#S*MOPK;;A-FK?)Ab7gVu0lu!acCR2hM
zc^8(>r9-K}`!H7dsBfsdkE`V|nQQ4oS7Z~;MHQ-^5x#TS=2~s=`U78_y}vnmV&9R>
znIDdRdggL@F8J?_`O~+K%>^rH=hHI}z5)IW{4MxLFwYl{`8=43^J4oR8z7z!wokGF
z;yL8C;yd9Mn2GD)e2mZZ<ptut23yZ7Y=dR$<rt>r>aCCAxW*Xn_tz&L+6t-n1td>7
zlj+O(GMk0sTzNjdWMQvhmuj1+<J?^En`?VNPQV@lr^L@I-3|uvO6hd*6PNbaev+Rl
zspgM}#+?+lY^_0*gh*aX(2{nOyE=7<)NDjWLaY2&B?;bmHy`kBs8-GVP<J?T$+N#%
z@N}|0h`TCrwd76Qz6(4pIxYu!UE)mtM!Z?Xc!=*QrTz&w<F9K%MvCuOqNNwCuBnL^
zLX&r`vUUXet*v&(B0CS=JKJyB-oSY~X05g3Rs=FUYAJibB5$+5-)au@dpsD4-zg-!
zk#l4D8zjK(pG1Bo@^g{rMV=CQMC2<Xd0vo~{fyW+zZx$gGAa^6`}c_Cf?DuPKr%f6
mX4|YazpQK<;yG`@K<2K(qb~0S6g)EWsE8piO8xVL{{I7|S8yl*

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/empty_range/1.o b/test/tools/dsymutil/Inputs/empty_range/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..671bf4a0ccdb69b480e51a84ca0dadad159c44fc
GIT binary patch
literal 636
zcmX^A>+L^w1_nlE1|R{%EI`b_1R@|n5=eD`Bya$8sJH@D4~!2qlL3Um<^k#W_>$C$
z5~yM@Kg2a61i^=~*q{uMc`&_TAr^)Pm>LirAD@z%RGJ>2n3PnMS_a~~ggXYg!Ii+w
z;fIQ-A^8a^gdzenFEcMKA0&@%o+wn46qGiF>W%=?(BQ#pUQuFRdTKGIdHPUuIH0sY
zif*tViiiqONB|_v%>|c~0J0LG_7?zY9*{H;2Y^)|2$1{mL?J>U7FMW7=hN83jDkS@
tOnMB0jG>@_6$4_3>o^!uT!}D%7sv$aVFTIAk8F8-e3Wroem*eD7yvknD>eWC

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.dylib b/test/tools/dsymutil/Inputs/fat-test.arm.dylib
new file mode 100755
index 0000000000000000000000000000000000000000..a8d4f37be4dea45ec9e97186b332a5826a9b3880
GIT binary patch
literal 25180
zcmeI4&r4N76vxj?v&2dlQbwDnP+;T_qF;j46`tV3=m#jZyO~cRNG*MdL5mmBpHR3g
za?vW>1ua^HD;Kp0TDU2MRuQyn(L(xu@4a*1^c1ziNcbK&cjnBSGc)%yt2yu7SzLMb
z+RUn;d!V(@KC^~7udNRDH|TS=W#WFQHqV*`3rRnF`{V118=wY_aI)?lW)H2w;A8io
zVK~rxbpRD#8WIELPE@JvPYJ`~$V@S=$99Fk=UeWDTD6s&U7kDFGtlE&QPS7>qe=kO
z^4!A{V`CHJXo@5ZSNm5dzu`4gasF&z{R}h=&*v}n_vZ5M-Lq&+<|k<O^ZoY1J_c>7
zXdfLawiQRFitR&_W9@~zH*Z`wYcVTzqThys`5MuQYvs62u(?myQ07o_zb-*3{ywnJ
zpU5FNah@y9bIl*_pIm<0n)y`oZ0X}+v|&mJXE8ehrQL`JdA24t*C!DH5fA|p5CIVo
z0TB=Z5fA|p_zwwWLYzvcPlwY(leVwQO4)dgJZ~0~<?@W_>ibb2t<Bhr(wo6H)U!|u
z&%uS{FwC=aJqN&9Fl!z8@mqI_QyrEbgNM_$w23F<3G<fXRWXpIaSE<Qaf%2@#GI_x
zi&HT8!)-~nCUX4OaS8;cwk%FDe|xa&$<p!V$KOkzFLz&juQ)}t#d?T<2#A0Ph=2%)
zfCz|y2#CP;C6L4^rlLSaeHy4(H%?Kvew?BUH2xw^5icbURK#_R_b$$-h>qKgQ}C=k
zk3}+{f^rZ#3tcm(f*k@rJPG2u|KC1aHq5a|{9<oBD{U{=f_=*Cs>qjcey(I^PkuZ9
zB8Z)tGvI`K=){jxu#M|t{9sj#ql89E5Ixg!Au-pAd3548<~I0q{1Smmax*wY%VzU4
zf_O~61INz7=DwFu=1^``en#Jc{s%7?&viW-o|!M?znm?{J$7MMv!hTy6~pDFSioP)
z;_^6}0<+y{c6l6p4~YnffCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)
zfCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCz|y2#A0Ph=2%)fCy}B
z0?|DMr%uH8DdHphlK-`Ne2>4oOOfyC->pbHx?d4rsPleB+WsAjv^ko)XOTAV|BLQ0
X2x#H^^Ym`TP2RVdES1+7BvtzfTBz?%

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/fat-test.arm.o b/test/tools/dsymutil/Inputs/fat-test.arm.o
new file mode 100644
index 0000000000000000000000000000000000000000..1cf16803e892f1f0492cff72b12b3bcb7c388f34
GIT binary patch
literal 50736
zcmeI5&u<$=6vt<s*fiK!MJ<A=s2DX7iJG`pV<n*=NTjqRH`G>Y4@ldMyQxB*wb|XI
z1#$4PXT-69#0do45eF1bNF3mwXeBPaaiN5i@0*=<9DD6-QdRWuebW4%`R>eTXMKCm
zzwP|=vr=jZsem-1)I5sp9b`o$>B}qCLXrIxS<#C6P~AexRiqDzMgk;20wh2JBtQZr
zKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{
z0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JB=7_Tet-0E$2k5g`1Y$|q<az#prmwr
zRO$)#B_{ufF)5$JxFiG$&}Q4AdnYsv6t;ck!nG^A?YJiEyQ~$rZTp71vT@T6LSJ{;
z_TtrrOBdr|T`nR|(8$D-VvTyErn|Z|WWCsS?>>4ooRt;d-ITNJ-G^vQrqo#TRvYrX
zZjC1T=8(n(KI~dHVvlOiK77Z!=?1+T-{_S}T{jx0DMg2d(`?pV+jG`MqGunX@itN#
zjmFA{8fy1h?qmHZ(!I3%XgalSjU%{IqOp}iBmB6Tc$P_Mya$c@12v*MR$ud6Ozq(j
zK;sPh&)|J@S)1t?29D~%Xp#)Pf^;2CELU}uCWwZ5U(f5!WZproR>wssZMuH2*6>Qx
z<+*aDG#>gJ-mTKP>h$dF%!FEX!l}>=!g5WWUG|zAVW1p;eRI~{bbPhuh01!-8Y>>l
z4i%5)P9lg$rmBXD%g5d=oX@|N&peNyAfxDbC!d$^x{4GXdRT9_piY}Dmt|xInZyFI
zte3O;YZ{tID?6-~E-x&}f;b5av8bKX_4BWNGA`ech)Dk_-K+Mjm9Fl!k71qqY37Lj
zahV8{jan*)GK=56=+~>#x|^4?&g-%0P;wVxZy;sV>&PwZpW+M1t>_dG>0m8Wqu&G+
z1v4X~uc5umUr%C20wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2J
zBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0$l`R|9|m*
z6<<~HQ%&Rl{}m>S@4b}&m;e6-?Da|f|7E?{zuIFts++v+-yO#g=;!}08sfX0Mx&qq
z|3_Fa{{IukSN$;l|KDQ0m}4}emd-oQGT8rLG{pZujYdEJ{|8t<iu6U=eI)q*kKj&;
z#xE%}`uYD)K|}oi9}LtO?EfD^L;U|Iv&i;Zo9P((_w)akarwTU5&!?-=>`CJ$pipc
zz==}^09gO-<eSf0007xO-oK7&AeRw#kN^O399WY7f5bl;XHEhnKmsH{0wh2JBtQZr
zKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{0wh2JBtQZrKmsH{
z0wh2JBtQZrKmsH{0wh2JBtQat2>kKr;ST;+TRdIc{{I#6nJ!6>QYVlSeC+j@WMAu*
z@NG7J)^Yo=F#%%XSTAm)Y<1M}U+t0ab`biyyZb-n<GG8blAzc2;g%_}UcFJ%E4uY!
z9V4ecH|LGpC=>Pi#SMt{jGu67y?!wSnr0*7zc03B_V*Wj*Uc+R+@FuXk#_?*|Kin!
zOBbWX$iHvbqRRHZ=N%kdAl8fbmsYP|oPj^h{zgqXH^4_<rr37J{t{viXh&_#i}gN8
zsn;+5K*d0p7zARief0*%BDihzBp!j-ulwkYVNOz9f^Dpmi|}erspBX+;}Xat%wu#J
z*U_EYYIR(w(x&SNYYne7U7jmfO5>rw;oT~ot4`0(&P=FPC!7l1AS~C^*=4V}5eCZf
z*JrAS3QHhyGop)qL~pmCPMf`!Wn>0f70I$rjI^VC49%k_WoUxiKm7ffe4`>H4>*M+
zwsd)6N!t3MC=1dFi~4fvG3=moWUagtG1;u8awxNi$s%tFp$J-c^KzJZB$+F9b6*b<
gnn1@Z!TO@?xhO)$q>7Yjf9m$`d#Q)(%?if<103qn=Kufz

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/fat-test.c b/test/tools/dsymutil/Inputs/fat-test.c
new file mode 100644
index 000000000000..f6c4c76fe2f4
--- /dev/null
+++ b/test/tools/dsymutil/Inputs/fat-test.c
@@ -0,0 +1,28 @@
+/* Compile with:
+   clang -c -g -arch x86_64h -arch x86_64 -arch i386 fat-test.c
+   libtool -static -o libfat-test.a fat-test.o
+
+   to generate a dylib instead:
+   clang -arch ...  -arch ... -arch ...  -dynamiclib fat-test.o -o fat-test.dylib
+
+   To reduce the size of the fat binary:
+   lipo -thin i386 -o fat-test.i386.o fat-test.o 
+   lipo -thin x86_64 -o fat-test.x86_64.o fat-test.o 
+   lipo -thin x86_64h -o fat-test.x86_64h.o fat-test.o 
+   lipo -create -arch x86_64h fat-test.x86_64h.o -arch x86_64 fat-test.x86_64.o -arch i386 fat-test.i386.o -o fat-test.o -segalign i386 8 -segalign x86_64 8 -segalign x86_64h 8
+ */
+#ifdef __x86_64h__
+int x86_64h_var;
+#elif defined(__x86_64__)
+int x86_64_var;
+#elif defined(__i386__)
+int i386_var;
+#elif defined(__ARM_ARCH_7S__)
+int armv7s_var;
+#elif defined(__ARM_ARCH_7A__)
+int armv7_var;
+#elif defined(__ARM64_ARCH_8__)
+int arm64_var;
+#else
+#error "Unknown architecture"
+#endif
diff --git a/test/tools/dsymutil/Inputs/fat-test.dylib b/test/tools/dsymutil/Inputs/fat-test.dylib
new file mode 100755
index 0000000000000000000000000000000000000000..4def340a9507e4e2a9587a555a17db8bcf4c5f81
GIT binary patch
literal 13012
zcmeHNO-NKx6h4zyQi&E4Mw>w*G|&;W$%ulfF_NN;Mi3${&W|PNOw7Cqhg=AeZEIJX
zTDOcAQA8Uz6;UmtUAs0Rw2Y9x@6Ee!t~%jh>FOLfbMHOxzI)I8?#$wR^Sxhw^+rUR
zA|gB4!q17ce-HXWtg92T*Ur2ak-h!SJo)tN$1)~IxWEp0EBx&?k-O54p2z|CL%1=i
z;qeqBtE@V$77WHrU^^W7`DToloArVLnl<bFWu5SiW|<HB=+yE`CQ=FKiCxFG&Leg$
zShc(|g_)T`-cB>7{(8-J0$RWmnq9{Rm=`Vr=esa+@%(Ud#9|f=X4t%fmKR22UhZ1F
zhi*77a7BD_!t3>NC9f}AnVw3EbcxLOW6rvU{kRr$XB)+sd)`Moj&?VE6i(y4*}LO+
z!p)1%8?l3!#C<ADSCiN79DlTMHrbnb`t#Bu#IPkc+~xZbJ7x^F2R;PnyH%Xu0s4r<
z%B5lqC#GMil)T(b-)V~hrpjSB*X_gyzU0yqjCoEqML-cy1QY>9KoL*`6ahs*5l{pa
z0U!`DvqNzcA2hcniqh63^U)Qsum;OI5!Q+Bct6IIU?E<9em2Hwth=J@1jgDi>lBbj
zF>d3cL0y(W7mvdkQHhP@Z<f7MOu|w(6UM?&LEN<6KI-DEDS73zNl)doxgyr5S)PNn
zq4FH$CzR*-pFGFD-`9JV7Eet-cyTWt>;BfJJcs|Y(+@>J5l{pa0YyL&Py`eKML-cy
z1hyUl%X8fDiH`QwL`OqBM@!>8M;s?~d5$v}{|nDiO*<eus)M>r$H%Ik!xw~Do?{M&
zqn?8#%N%@-_5v}>$FSY7c4PH_?T%)nws#%g!Fg{Z`V0sJeB&Kj3kf9z*r%qTt|o(f
z!zq&43JpdF_F<mXH&Cm<J*Ux*qTMVF#)r#^%<V777N0*^7<>HnL_->k!63&frt51k
ztp1_{XW@NV{RPjcrU)nkihv@Z2q*%IfFhs>C<2OrBCrVvM9kCx1RYd!v^pdQ>o}|1
na5WsRe#6ynux7TpjREv6mvIt(JoC%z#<dz@O10$buor&-)R`7S

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/fat-test.o b/test/tools/dsymutil/Inputs/fat-test.o
new file mode 100644
index 0000000000000000000000000000000000000000..8159cc749772abe9914b1a0830d743002955025d
GIT binary patch
literal 5000
zcmdT|&1(}u6rXKA8cht93RV0dLPe!E1{;m2SSo&0ZB?|W;9*VMZ5xS6NY+w8Bzh7Q
zJb3ZoK}9_2AE02*(u3BM2SsX6UIfots=v20lP;6pWTS%d!OMK?yx+WeGxL6(&Mdy1
zW{mkX##*UlEcgX_pVwvIiZc2_wsBPNN8bZ6j=lZ<brAp>eE=5oL(B9NgF@CoL)3PW
zcVq;4D8|S(IYk<wj}ToqjVY7Yd3)&4`Jtt@T9bIT{<5tOyisHL%DA4%jpeCd*AJfS
zJAK$b#H#NTJrd6h@^zxS#XeZ#rH6-$#$+{~#Mvf{K32!X5igs`8DygRHz9DkL>UT*
z%2MHoS2By%2o>+1usbQr9*=n0{D>poyugvgO^<lRbZ*=zRj;>kymX<EHS}EiGQm~F
zPvXgasqY`h%MV|&7Oci^gE+^XV!_jO*K6~ykRDO-4k&o@9`Vep1;_P{3A`i1zaY-B
zBFcH;{^8PDkg9*r75@h7;89%KXu|MvYg(FYHT0UePj^t#_b~Jn)JepYPM=)+cH;|Q
zSjaMuFnI%0^raU+NvNhpmLUgn@f}AQ@H1_twkg^k@I~8#Yoi#PWWMOp_TI>z@ZN9>
zmf`C}$Nq4beBKOgVH@$QX+NSX@T<{$P>qeH&2G~unQ?wq`siTB1_sU!#wLwoDU;8|
zQt?DQ5!*77#p&-(CD4fPOvQI>wVQ4~nk!r}OKfWQE`3*WLZ3_*Ste(){xg03Vk#>G
z%Z$nU__p}X8-uiNS}yf(fRfm4sjb@ngfu#a@K+HSghHyZAj$yqTYJk`qI@e%A*UeI
zSgMO#Q){YLz>qr_Ep{-<SIAl_sZ_Ve1_L3IpSUND^V2L%0+IP&wwslo3gSM&W#v9|
zzWd+#>4dc#jQuG;eHJ*fxKTI1^7y&&Q{#BH{0}G8{3`JzKh^h-<GJzEdU1{=KfS8E
zUYmao{M4u5efEgw#!rU8ll&CmiA!z|mCAYM{<-nfGsVAwI(TmUG$roSEtLP1pQ1K?
zI)-R8IX^W-PwQTkt~VP!QH<#fHAqkOxXFqOVBhd}lVC{BOLIh0F+S}C)g5F8J}uxI
zmA}hDWUokBw&gpf*1L%Qrji)1aqlTikjnj(7;aoc_7taNg}BMwk}3?h_s=MXcR??D
zz;Ju>+=cxB^tJ~Kw|CA5*ps~XfZ@g+-Y{(W81t!oK^Vjp(vMl}iwLGIrnNbu0~^^1
zjP@G2T-R-WIPk(=fsyin;l>3zFo*{(2#mQJYjMl}PlX?kYxqHHb$XM0gAL5t0?G(V
z`uqNGzDX#h@J;d4G?+tV4OV%RB+#vCZ2{}1`tTo@=FRt+9y~}Id5m;?+SyABZl=cw
tMVR)2;!ei`;_Wdab}q7ou_vw8Zn8&o7XkF-o@mo~rYe-bRrE^C*e@I33YGu>

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/frame-dw2.ll b/test/tools/dsymutil/Inputs/frame-dw2.ll
index 7ffc93397315..d07c529d3108 100644
--- a/test/tools/dsymutil/Inputs/frame-dw2.ll
+++ b/test/tools/dsymutil/Inputs/frame-dw2.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.11.0"
 
 ; Function Attrs: nounwind ssp
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !4 {
 entry:
   %b.addr = alloca i32, align 4
   %var = alloca i32, align 4
@@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare i32 @foo(i32*) #2
 
 ; Function Attrs: nounwind ssp
-define i32 @baz(i32 %b) #0 {
+define i32 @baz(i32 %b) #0 !dbg !8 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
@@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "frame.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7, !7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !9 = !{i32 2, !"Dwarf Version", i32 2}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{i32 1, !"PIC Level", i32 2}
 !12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
 !14 = !DIExpression()
 !15 = !DILocation(line: 3, column: 13, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7)
 !17 = !DILocation(line: 4, column: 6, scope: !4)
 !18 = !DILocation(line: 4, column: 12, scope: !4)
 !19 = !DILocation(line: 4, column: 14, scope: !4)
 !20 = !DILocation(line: 5, column: 9, scope: !4)
 !21 = !DILocation(line: 5, column: 2, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
 !23 = !DILocation(line: 8, column: 13, scope: !8)
 !24 = !DILocation(line: 9, column: 13, scope: !8)
 !25 = !DILocation(line: 9, column: 9, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/frame-dw4.ll b/test/tools/dsymutil/Inputs/frame-dw4.ll
index c8674b13e585..f3df896a3bb5 100644
--- a/test/tools/dsymutil/Inputs/frame-dw4.ll
+++ b/test/tools/dsymutil/Inputs/frame-dw4.ll
@@ -4,7 +4,7 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
 target triple = "i386-apple-macosx10.11.0"
 
 ; Function Attrs: nounwind ssp
-define i32 @bar(i32 %b) #0 {
+define i32 @bar(i32 %b) #0 !dbg !4 {
 entry:
   %b.addr = alloca i32, align 4
   %var = alloca i32, align 4
@@ -24,7 +24,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
 declare i32 @foo(i32*) #2
 
 ; Function Attrs: nounwind ssp
-define i32 @baz(i32 %b) #0 {
+define i32 @baz(i32 %b) #0 !dbg !8 {
 entry:
   %b.addr = alloca i32, align 4
   store i32 %b, i32* %b.addr, align 4
@@ -42,29 +42,29 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
 !llvm.module.flags = !{!9, !10, !11}
 !llvm.ident = !{!12}
 
-!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
 !1 = !DIFile(filename: "frame.c", directory: "/tmp")
 !2 = !{}
 !3 = !{!4, !8}
-!4 = !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @bar, variables: !2)
+!4 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !5 = !DISubroutineType(types: !6)
 !6 = !{!7, !7}
 !7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, function: i32 (i32)* @baz, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
 !9 = !{i32 2, !"Dwarf Version", i32 4}
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{i32 1, !"PIC Level", i32 2}
 !12 = !{!"clang version 3.7.0 (trunk 239176) (llvm/trunk 239190)"}
-!13 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
+!13 = !DILocalVariable(name: "b", arg: 1, scope: !4, file: !1, line: 3, type: !7)
 !14 = !DIExpression()
 !15 = !DILocation(line: 3, column: 13, scope: !4)
-!16 = !DILocalVariable(tag: DW_TAG_auto_variable, name: "var", scope: !4, file: !1, line: 4, type: !7)
+!16 = !DILocalVariable(name: "var", scope: !4, file: !1, line: 4, type: !7)
 !17 = !DILocation(line: 4, column: 6, scope: !4)
 !18 = !DILocation(line: 4, column: 12, scope: !4)
 !19 = !DILocation(line: 4, column: 14, scope: !4)
 !20 = !DILocation(line: 5, column: 9, scope: !4)
 !21 = !DILocation(line: 5, column: 2, scope: !4)
-!22 = !DILocalVariable(tag: DW_TAG_arg_variable, name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
+!22 = !DILocalVariable(name: "b", arg: 1, scope: !8, file: !1, line: 8, type: !7)
 !23 = !DILocation(line: 8, column: 13, scope: !8)
 !24 = !DILocation(line: 9, column: 13, scope: !8)
 !25 = !DILocation(line: 9, column: 9, scope: !8)
diff --git a/test/tools/dsymutil/Inputs/inlined-low_pc/1.o b/test/tools/dsymutil/Inputs/inlined-low_pc/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..7ab4e9205bbda61e7873fad3ad8bba40a86cf5e6
GIT binary patch
literal 1960
zcmb7F%}*0i5Pz?`TPj;CrJDE=<3=FB!O|)UYD6)JfQdmRS}v~JvXJ7k+iVwti3uDy
zVB*=6@nAf7GVx|OdG_MXyMKTuJ!zuOyf?3fg=m>%-fw1h=C`x&Yd`<|_B+iOXCOG}
zpamQ(P>^GwA2ea^IebsZ@HEzdD8CJCR1<?BwjH^vk+h{hbE7oV^p%w&1Nn$rMs&8W
zau98M&Mhx5*!3tBVcQ;iFnV{qk)n9+Q=~-wH4<X<iq8uwd-6S`LiL7sE%G_#a_FvL
zzdQSTLwp;AKU(Cg`tt#n#QFNf3NrSZ_*M;RjKLx#rMY7|)%S*uzu?w)^L-*dqGQL2
zMEQU*KBrdmT-$e+kZxChimyZjt?~usMX@09?>$X#?<T%*KmFC5N}O*z#`mH{zUWD9
z&+lC%zMItF)_?ire)g)qizV^?-V@(-;^TO@Kq5d|D$0=Y9)J#E%8h^zkAgmhe6(Kz
z!nqgca~co@d6e!T6Hi+l-m>gR&HhsHU}IS;Zdk7dzI84?oG(~?QMl|sw$2WoA1)5{
zTYa9lvNWJ#7YfCT{jBQa1kB|tY#>^yvH2i?P{gXN?1ao|w4e?PXR7zcBW3&}g=cwB
zCTD1wj^v5V*R^z(cfw~U@?6VYGB2CG>B$^KEx5WggUQ%r4z9*-a7IiM;zk}gI{CHs
zX2sPs)WBIYSR&c1X*<b_IZQ2?vrw19r$yjmuR!G-r*O()Ert+4MnOdg9d(T*#xbh#
zNd&Il79KiGpMt^A>9{v5cz{#Gbt)0t5AR#9V_Dk(3`!bPrKh?ec16Y~?u|}hAJ`=-
z?iLh`ak^8`-g>lYKC5q<!(dN<iq{eXFwr^S8ag0~SAAog_YcT^bCBz>V23pW#Qrp9
zz~4;%e+uiH<{-pzt}qv;fNqWvFcZvzk@H`fgD`T;0Oi|uBSaWG&^Q!FjIQ>q(RnMO
pLq2;h^OGBdvxIjDdkIewo*~3F$>7UK&^)xwXEyG74-P_s{RM!ev_Sv>

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/libfat-test.a b/test/tools/dsymutil/Inputs/libfat-test.a
new file mode 100644
index 0000000000000000000000000000000000000000..6b34b0fd6f90004bcc4cda6295704ad477562ddb
GIT binary patch
literal 5136
zcmdT{&u<$=6rS~uxV0Uts7Qc7XsZOJP;l3d<GQ742qbNKsHmDqlml!W<F*Fl7<&_?
z7nE}1AAk#R0*MoxfddFmz=84?aOnvoZk*Z@_`aQ)&9dILH{pPJ((LTKnfdm;`SHGe
zKkk0@wGbj{2$5lpvMj{J&+1+2G7m#{9J+)WcSwlI=X~SDv9tAF<Hot{ROw{dGWVoZ
znLTx?R4r9Xra5QfVao23WmPK`v}$_nTGqCUD=%MKTsm*Aym)1GX%XM^Bnr1&)wuGj
z8n-l8Q^;%CYjx)tr`dOk8y=jvefRfWIA_pd_#{#&79S=G<PnrF*cl-oa}wk-=RkCu
z3&xV85@Op<bK8;cvc0-=b#<>DeA9S7{f_S)c&|0rH?P}$r{}6|d-0`(E9d<~-1jA=
z3BgPF2W!0cM#EJH@idN}^Pciow^4`U)z{a1%`N)>Py009(+VIiUaND>-Mt{*TMFl_
zs=rnC{gIaY8f9Ov-nriF58C&c!qL^ABmJ#+yX~gkslQ3M0e>}~wl6N;#`^2-h6Mfn
zRT(y|>eKKuAc6hx+1IT%f_Rha9BaH+V&XYByP^J;6y618)UA<t@;tX&ohGLQ?b}v(
zClucIBk<_Q5suAe-5FdA1TS$P^9Yrnko~-$mr=q*eKM>oQrBqXK$%<3Uca@`F=val
zqGcX;dYhd$%yOlCx@tWsyfD-dCoguon@(SB*Q$24VsF)ZqSbMP@qqC};b=NpI5P2Q
z0T-(xl7)*$=ksTB&*l>FCpitpxm=FM*HGk6A9C%@iz#JXkv&yBDkz4LIqZe8<N`G5
zlvrL_Smq>M@_47nHdYAo*q8mcc}WRG{o^PU|6I0%bN_<ucMS0#Bp8Io8e#%<8bMix
zQETVj9GzKk`=n#)z|KIG5XabI+$&7tX%G#?@Gm%2La3NiPy-SvSBr=3+XpnWkg?;l
zil)_YKKn`XnOZ){XCHDcO?NTGw08)g^T|h84DuZ7gK}30PwwbXXnq;SS1(|22!<EO
zG)5FhaX#hVppOF>QM^P9o{u;fQCviV;<cPn2kWYeIZzGQ5yd~BVlc(_4ySD6>BI2F
zbdC*hEHTJ8aWJBI<__lbihU5*ju39i;7k$Yn-~~Td~yO97l3hRD8_IO=>dcM@zfCR
zpq*j7v8ML0jhdluJb0ru3*tBkYcPt+bCklO(4{F>5RWx1Gz{Z#+PzH4d9-3O#zVi%
z&GQP46JyYPI!dRw#gG}4xQyX(MxG!jk~3^?{r|Ul4-z<cVR)_fllOe$9-F<qN4GP;
zVUdK*nDpP|JvtNmw|S4oflIEuKP3Kefaa(ue$ja9A4J5(i{cuY{mD4b?|-N4J80hX
zIYE4~JJR3b9HjAd-V+xuijRI%woR-0%#c{lA-3(aFNB-ipFoV$c(2C9i{h!Wx)<as
z$~z<RhI7_Ug-70^{4fHKeuVMYIOjb#4n$$G6Q8s%@p&H06O+bL&aF%1Mq@Oe2ZHpi
TPV7EtOvRkYyhrYX%zOR@-Qg#G

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/mismatch/1.o b/test/tools/dsymutil/Inputs/mismatch/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..2a855ae293dc148bcaa94c64bd13b5dc6f720ee0
GIT binary patch
literal 1972
zcmbW2&1(};5Wweck~TH%HnD;!g6=^jMM-KawAx~u`c>3}ABFayE}L|dMzfocY)geg
zihn`Di$}qO*9w080SX@VU-00^MbMicpw8^h+q9b$QwQeFd-G;~J3H@f^7iMK-`xNr
z1UR6EQQy&^p`r|;F3@@h*-*TVuN))S$OO_p&eL_rTy=PXM%U-gFU&R9az-_f5O3RK
z!?5md@Sy9J+?~8$vW%ixvI`YmKbM`$HX|e_%C2wQV6HRXjHY@P&D?TPuRAr@jlB8n
z^$UJT56^D1{n5(fPIv*5SFYq;N03)!jx_TTIrD8>o{`Jd%oVD4dwr63i_<&hQST@v
z$a~G42HR&`-#Q*D7uL4Enqe2sdMod&JP_uMcJZWX0z9KyEt|S+ScGe-Px2;r$)j6C
zDZ%$`aD9iFSJ_Q{RU;qdJr3|5bjWk=RolMr8|EG7`d<E%=iTQ*4I9|zeZMnrn0cR(
zN526Zc)q+9I1i6kF4=@@>0gSYmzbAC4#~qEc+_X_1MGL>C_Rb#h~qcW(kB)}y^6Y3
zs6MW7tGlE5D;bq9<4x68%v!xvv9;0kczRe%Ikjc`t~PRVd}Qp@kd`W!SFDWh8$B^H
z1e0}Vv1Fft3|OVQWjOgINJA!_k*jnyZ$SY>LL5;CA|W-t|A6|T5le`@*kI)#gw*vl
z_5PZQ-HyWP*hDNjfG?T`3i#9*oKA*R@=oFJZX^Ls#{0(L)0ytg8BBz0sMH&PsN04(
zjJYd-DYX8v(VmjU;SwtCgoHST#!JNs1&H~ZI4TMp^>?D~ryd7Ny^KofAD=v*Km%7$
zXK!R@-Bg^D?L{T=eBQl`y-<LzD#6K5k0;rhOY30nAC&)z*+(!Gy21jw&_?h*V?aE2
zg92y~hA4>DcyD)fgX#jA=mz27I?-N4^<F`UUuqu+6mMJjcL3M^v0J#n)+*5B_kUoI
kPy{B@U#J3j%s3CCV4B<jU5D6eY^mGir-$~_AVn|0UuqW2djJ3c

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/mismatch/mismatch.pcm b/test/tools/dsymutil/Inputs/mismatch/mismatch.pcm
new file mode 100644
index 0000000000000000000000000000000000000000..94f3cf5fffb6373385d2da70eae0833d17c37ba5
GIT binary patch
literal 24940
zcmdsge^gu7o#&IV2$2j?97%Rc+6u>$ik}t&1f=d1i5~_J23g2}iEAMY4x}<LHqN-y
zjtjD%>rCBQC3QMZPr8d@JGEOUR$`n(W<AFsrx|*VXUk+gvz*<`F0{$C)6;b5blT(X
zdUp5oz4u+|JxRnqdv^a>od`X>d*8j^`~C6x{<v4pkAC;j|9n7_bU70K;Jps-MfzjM
z8?Q(4?xpW*y!Pzn4Xa8Y@f~AI^v+kF3kD~Kj!r0F@DU6KntKDO@99@!Jk|Y4)is{q
z9}bN^9XhOx84NbIdpfqOyRd8XQ~VZ<M{8W`6%39IJhMMIaxnDt(8%b4v0$*t6Y!*Z
zi*fSAt$uNl{4^)8a1jg+4h<Z6I(T^E5W9`>dOZG?WS30}U7gCEy#8d9xX_CW`V`}Z
z$M&<^>Ek`Ej3=Jy=@(bY&#dEx1_lldJxk+j=@aAGl*jLPJmMWal0M#_DD-_s8Sh}f
za*_JTO5Y&}dFXKFc<0mBd*+_wg~rFjL&4F|K^iWDJ~7@Fg`WEzkF<s!Nnh`RLZ29K
zwqF@I^^vw-6@BBO{psWVB5l0)?{mC~FO6qiuUWC%GC@yBTqHl!=;Qhv9vK~?9_jRL
zQ^xzG@_5-j#v{IPgL#NOL6dF5yB*Kz@VOVC)NaDthc~_GKE3HBKKZx%|J&$mY}Kuo
zq@O*Au_I4=tBhBcjWc%Bl{w=LyYcd>@m$<k_`<Yx$CS-GW%1%~==ck_yd^J3N>Av^
zZ%4|`By#^@SnjdQef_fkj-e+{?smxe&%bB9(qOzcXS}d#Iv+K@=`)^dFkP{m-j3_M
zeJj@1n5{QvITo{eb+%*k7XOrWM<{3Wd2i`lqI5xDek+mt^iji}R=KMc0~)&T7;r(u
zKKv`=n^EI;qsG_Gpv@F_8ZYwk<3%O=JU0J|#XoQLPFa;PS2p^mEbjJ}l39J(ZGHLO
zNbz}n>6ygV>yhHKiLx^^QN;f3#q0XAMQ_RVNa?M_)>Hb@>j}r@#7RfL>>HQ6F3P?-
ztV`~hH1xH~J>&A8rPaI{v+>Pw<Any(ja6g3!E|ZW_`1_n`0A9!Uu5Z+x9$tshKsB%
zD;xLe=wjcz^*|`6`K-4j?k&EXDElCR>56ASMrz*N?>ET)JcHk1*mFnT<B)w(c~^rR
zJZk8jmIFr(J!j?o7j-7EF%Ho<jTfAztE<L0S4~$W(@c~^b~SFFe4^>nvN7&6&8(VU
zjT$fej8|7pugn?I|NHUKgWybm%r?BTvAxI&=G)dSoY-PHFl9UDvGlH3TS8X1Q&;|h
z9s*7AC)~fz?~r%(6D3_v5N7CGvgcoFFkXO?M2(o`f}NP~YQOQS&o~1qfZ;pltync?
z+2OGrgTi3^n8h7#DL#=XJ`*X6dn@khi_d`nk+Sn%N8DTTGjDM;u{G)~KCOoca({Z(
z(2Gf1Wgq69miJD|ek|YQGJ)-w^ePzc#Cqp<496^cNiA*udE1Vajjb^YCf!>^vooGI
z&fM}AgXgF9<<}!+cN3*&BgMG0m?#VPpY6t48c1)0lXA~dL-&%oCN7z7IE`0)rq>%x
zw61<rY|ePiXDoC+ZQB>K4Tdb3g?LqD;Zq!2v9`}!4#X_OA)9;fM`hQ&<qHW17#L5K
zp7uJpQr*>;Er5&!?4|ft;$&WlVehmYjLZH_hCV3y9SEH`yk}{$`P#Dan$vWl!FZ$J
zcnP-SG@XNj`HbhG<glPQtUC5Wzs}aWvXL_xWb6ppG-NoxS-$9XK#f7hZ7>;&T_}Oz
zx}aJPL(eAJk0qa#{jgR+K<`nxr(e#0q1kk8)pSiVod=(N#v34Dl2{EU6n-jf+W~83
zOm2aFd2Bnd@KDa5|GgJtExGG0pCy?>nhq=$(mao^ky3B~Y7ap}tj@)Lk{4NM2MjZ2
z?VqyxL$H0@F-R>0OCf2(f`s^v&pgjm;%)?{3bk>ZhShkBXUS|&e(n2)Uc2m@Cd_%N
zRo>HT01y334M#YmZum^`24f6zgpKqYUnQ=Zz`b8VNr?WKt)HP8xaSW4H^;KR;%>xo
zg4nh7mL7i{=YbX8;sup0U{d4y)*>*$a!h9%grzfv^c7hk5n2?6+8c5juVLO*(^biI
z!D+n2U`ip$Zn^+7iBE0F3BbVZvOmfMk!RSm$q=|>=*#0vuzzE!crLMZE>Z@2nejTH
zw{!aP^NG?~h%&mc3@f(m4Z&tN?$B){69@tKh=J~=As8m4Hckr%6nuBp#8LNZyvTKY
z_qg0MYv>UWy=PMPK^b6K?>#z`D2;l{89F6xWGkkgOXTL~A6T(I<FU2G7+eEgS1in~
ztynV50a9yX=>1~u6}$0!P9rRc*gkFq+FxQ2^qScik9+(%9K)e|JyAoS!vMhv%{Ra1
zDVfoi!AJp&P@W_`P@9GPeI837WH}Zh@3UjYLO36T?lt~87bBko9;!rK=&XawfL27Y
zG$6L2r%n#G60YKAz$ph{1z2*|W49w%axs*1md$v|TfWHcMPD4g_`xKc2ti7Jk!3Js
z+c9MUjzRySv2;I_<9>hx?`u)xU%*l%)0<}FcNa<SDnO2{|Jk%{Z_EaKfWdOyfYgSI
zHbQ~~Rx2C#7FqhhSS)Sc3e{?b^3&3e_dIZJ(ljf?cLV+|O3(*OyfjVF1YlQbweD&T
z7D#~Rkb@4x-bq>`khA9wxiFyU-bLBpYu@t6#XU#mK6n;%8)i*#ZBRn9q5G)3s~2o>
znqGtIGMAx1(5i``o*Z{<!-1IfSja{TS=rbWvjO@@6UeAx4jbLRn`JX(KIA}QO}F7>
znFWzcjX1({r}hj(;n`ewWdEF@H;?=pc{h9=H-NCcy=KQFTY4coECD1rWGtjdO<|$V
z6kjzZ<(>>JF7=b8Uu52ag&8XwV3ibLge-$AR>T9(%v)OkFR&EIqR83}mlV*I!O)=R
zrz2(8$<iGcnX|a7FFl_qL11%n($ITH?mlYpU!?GZn6`-s%m1N7WX!A@zwa}?9=8ua
zvF)nQ^vYr?=1IC;{ehURDQ4X_WqAf>va%84hRKkii$K79+3koUsqN$z0Xvb>^NPLa
z?mR2^nN<W_l)KFY<J|~O?ii9VlK(eO(>1&CwN>MJp9y~d#-#Cr8P~=)<5Qc{2pC?m
z_Uo)WKtl|=eRKCMZ}};2>2-baZSFF>rKbpQOM$nb=X?a1b9&E3!>&|}CkKWNz3^Pq
z@LBdxrY#&UBaX;~z&7Mfn|d&28Hm~Z9$1jgJ^VxM+P%fg3Am4PSS1i(8g7NSQ5K#(
zqn+K*4WsUrgZ**<tN?f}$oc=trCmOp_%a1206;}m;-P^wVp^aH^A>0Vw8B03*IHu8
zWtS|G2QR*>FFE6NfU1SW$y+aJ#Q~g|GxS|#fl7mj-<Hg|Kl+%_%FpO47QN*s^rfni
zBXch;pH7s{cuUD?Cdyzw6hb6Q!WSoZ9hJL>nSsElqq1*S4sMcrhGp10FwHRUS}7t|
z-Yp`flsR`anBJDiE+7=dgm3sLo+<qHl<km?zzrPe(@`8g9%^h{^j6H#ViCh2S}rHf
zpEdR^P$ZTdkRl~8pP_61w>-+=mqnO#!%hwk=mf+#hp6fDab=>xc?4<JX8?2%Jp&Cy
zFK{SsU!D?h!mZCl9GFx?!1M-%0IU~*_cS2O&<9M5Lhkb3rT*fXs0q;#!6PKTXpF=6
zHPCuvb^_$L<5PVcY>$Becn$8VRyN`q?)y+E=ccmiNI5+IB2o{Se2fL0450cDS|Bz9
zkVfVFKW|jl`zqo?C!sNP?8dSY-iKmxWH<4tKNizz(5ByG+c#y~7i!G;8->Yi)y&w#
zr#35V<l;k^&9M<Po^DcR<jVx$7oL4iuoPj;igj?x;&xaR9zt_&drQO9CCY$D4ZD2u
zt~+u7OoP{R0=p0(MfWI7TQ$BiZ9E4*0<X=oGkCEp;3ODmHx<78b!8%;1mY#WE_cHw
zF_*sdM8pAqjU4a{Jm#uPnUvubBn9u|2TA~@aDmu{D-uGEsNB8OKcdh8PC{Fplx)7m
zqFhA36t~2S{)@s%M1~&gju5#;_)t*d4cQI|zv*szOreLM8%`$y>;i5>l)0DBaXFGt
z?4C4q=P~EVnS6&_51di{ceV=-`;4zirpr9;b&^t@6W*3Wsa1gd)bA9MrYt*PKJ!*^
z29EF8yww{6g*P9a6xkE70;WXqFxm1}U<hIxz$t-Aq6|%^y(KXJ^N|X0kHvK-%hzWg
zJOs4dNZ|+K5Rc6ZCtEfkj5>}al)TT>CvsBk-6Zek`jhmnOVaa#Fif|IoGao$E=*F}
zbljydMBW30N4x|{t<5f&KU8S<()8KnfD|>AhA;kyNkv=WWMN!~5afVbq`mVLt@VX+
zj^`>82ZvXUh?>sDV{L+LJK#(9El^^T|7V|2OcIbADM9`MrbQDccN8SUCpZzVlb8s>
zm=gL{A8tUpWJFDXPXyohYJ4SzxJcgBFS5uy_zu~J@NdT_AtNN4cOfQk`Aj1B-2y%)
z0aS=+ZVla>cmRVS=ehwV`zS6t2Q_d~5(*nBd~6$$%}Ns&kyROZaUg%!r<9SIDxRk3
zqIgkX0{0r8{+CG$21f*_1hmlYDH|fF_96>`Vf&ZG5|lt1QROPJ3-ex%udIl*B<(s*
z)|B7->9iRkWgbYNnnB5zz7!1n;X}#-SFBAQir*-QjafQ?AyBIRucl3{is9d0HC<xR
zTIiyjg_bP$m<_u(A+BbY1%+Xr2$oK<+)`YI<|p94SU^Qq>9~%<4(WdhPfJPpy$<A4
zZHg$7z;dbxDq&n6B!i&pTpCr3A+SwE7b&W2P#Z-Mt;lK({w4E^f`A!#Qn+#?P;lS+
z@(&Ux-w`u1kKp!wS4_dzh-~)~syY&Rkl3QJ@UegxoFfL(cH()UMBzE2H5N79!Wt=U
z+=>)(CULTvz=5U75Vw^|D}l>mBGMvYEc3_+)n;J}ND||*XOwJ+oI68oUL^=1LPmU!
z+SiXLVIieO<qdHHAqi!~;fu&*xnA*X;(!h{It%<ed@fwxfzZtjO7#Fq@ztpD^>Jh2
zmOVn0eHb5QE)<;_C@1ZimGl3$Oqs}AjzZsT0#O0QfrP6R6cvY;hsBJrci7{B6>IN2
z%aUTY1JLRf3pv~=^7!trARXq0iQ)$E3S}z<3>Q%15X9WzfVOM|!!N`=Zwfvjz#Er$
z=gD1&KzK!HN&0&s+0*)p<-}H$8W1%tz_=#I1xYQsjpIDb)=|{$4!kW0P*N&x#dgy*
zIB`U(@u~O3jGGL5=j0$b10;;feK2Z7xl0WnX(z>uvj`X>Wnd0!hT&63#nfOs4EaFF
zhAaswUP~zFKMKzG8&S7JTy#Dj`)4to(|`&m5+Cx9D%8!tt(3xGIPe;`NQcYMz=Gg&
zagjJ#DLtUT8jLZIQY5Xnm66&5Q3C{bhA+VJvASjc7tI9<paCb%#;Z;fIDdheHm{SA
zLxHn{Hmb`P$Z!GLEZQmD@`u74o`M5SBRk|2<0BwM^-Mc>sve>q@V&{$6iZ?DsL8T<
z_6>wDBFuzVL&@p7$T~sU6P5Vfdp{Jy)kJ0LspGmT!LI3#iAm<ZlcS*4ELB?IwRt?O
z=KNMz+yX+Z<wYTJKq!UK3`?^RAG5mKe=aCQ-I8JnR7e=}5oMuX`jnuJ>z<k1lOM*&
zC77yLd{&qwf^%Vv03Xy9q5Qs_KT_l((rSD~l@A4<apC*4!Xj6WNQ?p@miQZ6Z|Tct
z;5guLftHEf&ulp`W$R<%4f0m-zaKe%sPXX)SjB5VNIQl4z)4gF3eP-$*3dUh!9x$y
zWE2t$+QeuK`4gp7Y)Rx^5M!d;5rQ-u_lwcMqJCoxsVf}ox4tPxL(H}dsqT^#dk0|v
zg;MDOOoM;GHXO5}rtP1%xSb`HrkO<;wdv}(2{3+c(pdPd7nRZEryAryzwBR{jEm8t
z2whtH#0c$hg`wj=79@=$mq$n+`s3#i%_6r#5ec>807?^^C<o~}3UI(m+fyrzlmZ?R
zPQXHc?+Jx_9t+^nOQm&}pbZ!}<1@XvY%DzUl2|D>=lu4aVl==SRU_d_ejtX5Sq}ir
zp-~OByNt8s3MuaZkb-rHnQ>A0*vq{5BMNKC7X3~+e|HB`PLvbx^2{zlX*HEN045Z`
zmxkjLuUN))wn1zSOj-9a<01@)`<NnM3?z5&{j8W2jxsh*Gs4E<g)c{`R*Y=xa(wE?
z&rwx}s+xqK%&iavQT72WS{naNQ}Hsn<E^LlB_}X7ITR|f#=RwU;YG}F@&}(mRTE*r
zF?cZ#h1u?g&x@%EpnS&H*`(0JLPuO#uiQ6I`n(sB2@EGnNe88DFZL=6Pax>6IFl$n
zODcrgPB9D|dqClf?}~*hDIstM<(iPK1=*s`HXJ(cwNXI>XoiSrj@DTGb!Cl6$%jcl
z_bmBdYF5^DiWC44Q9(6Zwjx*)lFEN7Qx=8T5>><%EBq$X8Omjftk^I@`~!c1oD{_w
zDB;P}q{uN*<KplO0%*U9ahrLr&<;TikHlK#UEuZ|c^90*H1^>7#PS``s}ES9f?Ww*
zhvR=I7S4+R*xf*7UL*1TuwXlnRgj>agGGQ9KH6G(4T69O{_R^r1Q2%sW&*Db=`Z=-
z7CZrPCASNabdVB4+=<*@mMc6VETx@0L@$6-umy!>+7w8Y=o@o4DpH8DGz}pWc(Q7G
z3tH!+d>`5fdoTR<tkD0Sqs(BzjitE-K?&+PcOjhwqJ@7D%g3H(E0hg+rN^=ZdhVDI
ztL8vgc=j*E0(UjY7#4eT`G58W!B|Qk5RajB7Ek2<yh2FX=3~`SlvWT3!EG_I-h4r<
zBDt?x@t0z7?9TK+mmopf{-bygWd{~cM}H&e7fEgzOX5mj)*8-VZ&hX&v2@dlb)4~&
z-v^VSY~(cizbe*vg;(rhRtR^aChYitjc`oRw8AGB1#L>S5A@rl#1hQb)TV8gTND#e
zh(uyVAu(KPZhLhaVW@|LGc72Ifb*z60eMhS|AZo9QI~+x5dx#I!%{FpytwZ1Zz$HM
zO@cVkDW&hFxfj!j(WC$rGy(^>1jDl(=M?A@yA0NFNU2i^+HT6}v_bh1J^==KZ4IOr
zzWKKbn2Y4dNf1QAe7{<n(i`t8zD!8d0jCB$3`hI+E51yjWM0|J?nNrp^tx~#Ns&SS
zSy|rm<!>k+O7H~ehfS8f<MOTs6srv=*?j@KElbY%w5gGiA<og4AU}FPaq<t`X<~x@
z?3=P`Fk#zOL531bn~gKrf{({yf(9POOtY$N*FoXV`2JckZNdWfBc(5Ui^H>F!Fe9B
z^SlokJ!u8)^;mmDjYkR<t#BG=CrvkyAS@bR0RSQyhwVd4pLkzr>8?2g@P8N9%e)Uo
z?@QAk3Qgeo0=x|0W`g(mfaj-_Qk91<PH2gN`=;6gG=aq`a0J)1lr$LMW(f<0b8rnP
z3&cG?5z5J#0OyOCVH*C3e4-yhL6rlvEj9c`OBE#)6nAY^z<J>-T4LG(&cG06ykJX`
zq~8jXkV!F~P|*YV9jm0N{PKLxzfr7)69yXW0v=Hh0b(z*Z6F1@`{nIPH%&>_sYK~@
zDpsokhQ)WZ6k?OB7lgrMGAvDn4S}BssNo93i!(Ss04KsMI<{F$8?Q%U!|sk!GtBS!
zk^oz#Mf&n-ZwX2Vf3;0f*AVP?BVunKN(U=8ci^Ym8Hp!mI*0;$LB#Ui_`Fv;BSK}T
z>_gxR0YP5mGA1viaex9RWWg*KfJ0|4KD=~W%Y*|=X%R>yliy>=g^R4ip~kD9(x@?_
zwQ*CS{kIy`MOFx)piD6TsV{4nj7$@|GsW1VB4?^EU+@Ak=tx6&?tMXnFkTy&??ZV9
zn@vR)$fX}!EtB(U0~Vh_v4U-YqSTL=9~y{o<u0lU-qKSPlI2!TW^f6S6>?ShDF!bv
zG};;f_gE1OKC5eK`3G9QA?yd(-b$3lBUEpsLQteMNq>0h13^4TfQ({-6<e-Nh>Jou
z-JjMlFr#jab4pj@MjTxztp1&bt1Kac^_`XBTUj<ldj|+q^8J6JVREGOI@^H)XAqqH
z@b?8jt^Gx`X@zo17`Z1dLXDf=7t2(|d$hDD#j#WqK#-Xh0p@*P8BlD#P=S<m<M@kW
zX-O|E3cP^#7~-7x)O$h}ya1bC;5C0F=s}?MGcRBkx=PXYWyo<^Je#b{-!m&*8qfV#
zVMMgM2XI?8y^gX8JPzM*QRml4LqPc=C4(_KXr%kKn9)gLxe~^RlDnuO$NyE(jO|wz
z{3|7K)X=mjom<+JwDf1DEI5MF4V9S3fi#AM%uO_JPE(I-QR8`>N@6=-<ek5_p_pxp
z!$B}Nfjv7^_IjKmJ6@VQ+opgXN-o%nih3Pod1oJdl#cEob-JE#px_2gMz#VQT=Y85
zL&OmzENqi*TI!tyWXY(;?25{?ovq&8Uh)}mvIu8*Qrp%Xs6}93=+x116oQKo=@^gG
zj^U^bG9ZTh;g|(S`aXw0FXMA_zIoHS+$m3g)b5FFz$xBI9nL8~r*ljkIx_l2yQ{j+
zRa;+dcZ9>w9^9I|S5@gMmZZlYOjQ1I=U(%Vr)=vEjvPK1n%F<=4fuXdCkD5+jgB9g
zIK1`X*x->cPD$px^vbhGA2@XJfoC0G`_lOF<n^<84;+<FN+<GgNO2FIdt|v;IqSQQ
z2*I^k@<_NAM;gsIY>Yd2;bNUmbQU+M0WW%%y&Cuj`x}uYyMD8MUBtd=GEa0+wQDaY
zXBwZp)%e*HIiEY7^Q1JFHxgaR`P9<<)+OC1XFPRtIoslm^)tHq6UVo`-}vNg<72m{
z3g_@2DAm7_`)lVzANnrOZ@bl4zc9c4(~{I+X6s>7pv%cu7Ib=P&a89(pk&>cBz<Ky
z_pQ}}U-%yS$XD>|)!g&uf`2g=jLjANqW__fR&#II3w{$V_(em(=yJh7E*JdVTrk>D
z@F8xF7mV5I&e(G9=xXjSq6I%*&Hc$-!OC*MPgipfFXtXz&K)<;>dfoF!ck|gUTPo$
z&gHEu+VHdI(q*)@0&O}l=^=a$<VK5He62pCwERxr+@I=&d@F_}X^WKj+O^3S^Ay=u
z@Xv#xaT*-2NVUDO<#~I<zw02eVeOs#ldg-Pcl9_UdOUA^QJ+I+cpUd~@up8Y(@vkz
zLiP!jV4oz)4R{-r?{sdJ`r(+VH1L7_oOjzUdPEAvaCVmNt+zKeUVmtC%ep-KmWD}5
zr(b8^Klo1ma|_W2&33xcw9fv>WV_@V)VJm}yt{Dfv44&hJ;GY_r}7$}kY17o7uH41
zlj|RKwX_7@ctU#Ag+cPn_PHr(P#=A;;Uro%Y-x?6@1w54h3NBkj~*B6EC=<;kscEx
zMT;KyMAkis$zWvYHt?sdx+EzDNm6Kh5Vry2<3hsBi{r}7QG3zkDP`u<&p+H%ZEQH%
z&K8z8ZYOGK*2&X(4-!rE@S_{HL@QnTulKTv8Zf)Asa33b;e{fiN0&?V;9|p;1^eSS
z7fx-gnmo<EMOpiXh3LayJRm*F1}!??ZgvfPwBF8^;u^%7ycaL&1~H%AbMmw_aAM)S
zH|&=p>&=dJ#?O&B*ehb8@6F<?G!S^$QLU&$QAm=G86{~)PTs>Cil){VC8nb7I`4_5
zR^M|uCEuJh7kN)Cz`#ilq=uXv$*z;Oab+tX8QmW~GB|Yj_tEqg-j9zQ93MM0Vc(om
z)Xf-L`pfmwGCjX-;K)dL0*~Q=w|WfH<MvAX=G4v2_A2A!gF^>!0^1%864(WU_Da>Q
z{q&27toIIvo*6rY7WDwU_rZ}-)>4--&NxA3c<9-oFdlSfG(S6Z=<vwcDB3nxRn%2D
z?e|9G&8k7RckFHq?riJm^kW29dV|jsXl!k7?rv^Jt9ps-v7o(L{XlPT(Bo|jb~pRG
z+B!Szl}@L6JWrr=XIo>?-`wu$ZEn&mL}_a5?C9FPvsp8b(&TB}wY$yVtRXNpV6&&I
zS;NxQQ#7B3N~Ni*c~`KpD`2ltEx_mZY~Sg@6HU!6Z5_=){4S=$lSYrSIO2A7g{z{{
z-Z*v;0vZ_z57`?@?Dqc?A#{3oG`i9_+=1zX{@opcww=wIrZ#Wqk%{oo1avPnF>z>Q
z;K;;KFz9fM42-e+#l^`riLQ-f;qcJ@38;w!L>@lefgfKKul4-?Y@IcOde~6*XU9ec
zi!+D_u|_owZKH?BF?)!3q#`ZY)ZEp!y~E>eZ%*CD&xfD|0Z&ID*x1q)@V9kr@6xQE
zSQYGO?g{$*oxWy&psl$}qcGc{h5kUGO>?u;-_-06wzmZW?GS56Q=6xQ26a`b^_O?@
zwrx-KsIGFUm7e$THThCKs;Vp1O77hgXb$pjLGU;0yh#=WGbu{7wfo%Fnl;c34JW{=
zR*ygEZ42PG8aBGv{hgj(w#M4jn8B`Ae;~*ks$8{hbyFKS&RblS)wR`iRqkqab9-mU
zcBQ@E<*KTxbvditbv4zswKa9l+IH0(tmi$ttNZt;sLeVxC?mo+arc<KtF5nD=~-D<
zUG1)|u6EW|)jI2IYAW58HM`Xl@74@_&+fi}zp1UI<^Jfv(CB}EeTj}#=X=sYOYCe3
zHg)cX;iOBRx3#o)dPphM+O0f>xw1DfiKXcgAC0vnr>k_UWUUOE5_#4Ee4UsT_LsG>
ztF;q4sdYWdu!7?m`pxO0UcoimxKgdL)E;8O8f|E*bk%_#%{p<H)Gx5dM;s520p>?W
zCsd43I<)Kz45Gv4@SxhorW!|B<4Bgq#$ea(mX<a$*=#zhp83e=;gP3DhX&I~5b6vX
z@S{o4Fs7bH)p~@r9$BOHC~G~sMk~>`IozAARkZ?*nJKI9X=@6!;%T>9DU%Xjld+;l
zcFw9=v#?gQW}O;jq5b1UN*%IKtUQgzEb}UjS*PVnt6J_F(q#Rzi&8XYk)vqJB1F-Y
zMS`p;E47LladNd9&zr<kRca^5?JMg;U`;UVGl&F&N@#Yk_K)#W<Vq^tLa0gB9v()t
zud-I^R^`g*rd^=07(tFkaRp}Ir(G(Fw1!<Oty$zLrhy`45vi&*i&RBxR{C4o0|>tm
z^zWqDA7)uuQSGdB)z?+KhsvvKT2#!UuC1Of(OsJcuyG(d*mc??P0fvMJ3Z~eCe~V?
z*4pgrYJ=Bc6Ia%|ob|4H=TLcVwt3q+5RA74_E6unQHU($1<iqA2M_4d+ILbI$q8_&
zVTY(DEvj@P#`j_1Ds}iIT6q(^LB^=W?J6+LRfh<&cBr(fE}d{<YI1Z(c~xyH9u5ZC
z1n9sgaFkcNYO*zxP%3L{s_Uz(YKF?Sb2NFuS+)GCE9$GA?izP>O&ucAYIjXF9?>kC
z9$8b4Y&<5ZN_9rWr%?;CQXLdhb4rq~3YW`?n9EsR@2YWDRaR9Fm214HpkNJ&isrQ>
zO6@9%)~Iu6-lmqQQ!O^ptccW^!5EGyl@+!1weEV5UR~#QRaMrIY%>Q+ldV%DTsD<j
zxHy5Mmab?=!~=fSAUdt9sl$iMSxF+(%+^kwHDa!+fP^dS>uahi>*`#H({aG3rV1uo
zUsI`$8`zU;N}G?%#jR$OGM`%Bn*4{grvzSE;jF86VSTlz{MOV!_Ei882)#ZlDQk#5
zN$Og1XB||6*8s87sFHkjy~Y_OJ1DYuSJkPZAlb~Ut**YdsuH}cs;PzeYuz<Y*H9@9
zn6aQV14kc?l=-k~HNd>tdk~aVxm}g8nChy^N?>7CjnnNS98Q~9#L-z19eL~a=B_}{
z(~g{aXEO`)HP#2$TPIwe5<u=B4joc~8|xwBo#Em17`BdLAr|fo52uH)bwO45ryVlo
zkJH^S=0jt0!u0ug8=o+JKD4blVal^>9Xh5%lb9}jHr~W$OP>r)YfgroTpa^sPYuvX
zOdHNf4R|9DBg5%aqfs?=yIKv&zb6Hn40X~($~$3l7J3X1XXFG-s4Vgr9?nP!SR)HO
zhKDmU0@ldlj^W{qgkUXw;PfaFT#ee*u+=Cq>{2@!dL*So$%UpXlhQ4@)QpvKb>d6S
zSSfGhOU+m*Z{$nOSScD+5>lzYLJ(h)y9St|YUFF;ZRwR5dN8HK$pvebS?MH}o3UE1
z&TO?Ai{))>u^DUSZEUR>OGTS%sk>FnQK;eTsdm*@*Sl+7^)>1)d{Mkd)-n%`k<FSu
zh5_7kcvPL4?($Y1RfjW}z*`v;!r=@y@K(l#aAvxTRy89s%o6Q<q1lT$YAfc8=DoAk
zw$l0r+fnOInf;nLF8V2CXbcKdie`aInP9K#kqz=mSfY!9e@QuIfgsUU5x>Up)k6t&
z&H_|o2y|Hsu2fx^T(f|dstXft8gwOB1*=w;m5wv&E_^xZ_N(rZWN^B>;e>Ex$>QQt
zOi59aWN<o3n$Dc2biC1YNpd*@HPlqCft2bloXZ);qwd1Ftbvjwe}E?nleO5U?x1K)
zwvx2Eo8WVHHgizR^zA9Gq)16}Sxb|qD<>*D&S^RdPOrgePFHr2=1k;07@yZ*Gw;FJ
zoSn-6BXxwLT~?BZNyy3uLMo(JKOmT%5z=tlF(i8Mz(y0-q*#<j+dX-oq%u3Os_4|r
zA;ggBBh-{~Pm$p$)Lob$GQEYm3zLM_Wf0F=?hm$kH?Cim8K})$1>3WLcCy2on605B
z$?t5CttQ2liP*`h)`~2ZC4<^E@j5vM<7+m!PWE7I&4Sj{AqA1?$S63E9-ODyuv!@+
z$<_=ENp)BYskKxj*_uucQ%a>Trza~;r{?3l%|wq>kF`)b)r0dk6FpKrIBzr1gF2*e
zF%vhGcD|6TA)R19$0)Wll9jo<eGPopl9A-CmYyVMF%&20W6)U{QJ_M0_Hvrmz~<Bl
zjJw&H%ST|$&C1&}@Y60xur7&s_kqhq75WQB&V=PuhqX{y>7jNQ_u_bx!u!MI6b(FX
z&(3|mjI|IsH3nmU7C=t*VEotO@$N1<4B+wNP>auxqaM^HsPY}G-b~xb*s$vCXs61v
z`q&LS`qC9>Y7Dk@w9`2eZDD$QW24x~2(W@oM^ms1CF+2GcXPL=T~owX_4Rgkwg=lg
z8~-5K;&0X-bW!zUd*%1li$L-I#)F>TM^CY%de6!EhVDJPk%Hf|ld`dV52m!=UpG-c
z%RXyIXLfN?v7Y1>+}Rvx?QBZZ1#0zsnwScyx4^W!A2{g2E4ix5s!HER$3{nwgv0h=
z2ft7^;eHSni1RZe2S%{x8|+BClh&CXvSFR*O4S%TI0&QCsw%suQOKip?qJ%v6Mif0
zNq_V9wl3P!Pa2SFDzK8-4mS_&@tm%%=JbP3>=t`KOC9c~%}RH&blka<ELgoc<nj4%
zHUfr-eJQ6?ZM?ii-TPh#ivDOQ^u_d}QtY27etN%y+KN?2qk{C$4AbcV6*nNg(19Tc
z@zBsj2uB77)s0PiaKa3y;?x8Mak{9x&ELtZxH~<K!OpH=uQ*o6_Uru3>^K~0W2Lha
z2klhz;Q`ubSF?kGRhC<=osc0LMD;xDoI2))eF+q$)ny3QVRxV<2#CotD764mv@MLu
z>=rvpxqs~7!O-ZSo&FKq-``BMrk}8M(Rs}9`M&iH%AxJZgnd(9?(TKcLnY?#4d|p_
z;ypd6lk9KkrJp=_aw<1Gy*gEQyYb0*&gW)4PtG3yblq2+1^={I@R7OTmkkB|b9D6i
z!{rC&oOw70{@VA}>E^y?e7gjvOXqR26la;~4_+R86W5WiJdE?GU*HY+`-+9WPkA3L
zz!6ir>%;pZUC=YX{j1N86|(ct?8ayQPTx%zsWYaMSq#kod2cpG>K1$I|EoWA7W;l)
z17&8%Obor}9%kKjQqwa!X+2KecQv=OliqZjj&19t=JyO|)=O3?7-WZZM@EAO4+ozO
z1;Zow7s%<bH@)anC%xPL?+xp57(CTg^b`LWIvonvNvB``am#v47YrUj5Jty;CL~S>
zKW46zzSZ&H*R99Vb9Q(#`OlZr@^n(qwI9B_9y11m$%pAoJiX|wx=#8Cv#rNNBZJZ-
z>^487tdkD?*WhQ?<6ye)(AdNnPF=Dy>(qnJuIr@FSGVrSV`O!Ef*wDegJh@JC5is+
z^9H<${PY(Q^(nmD(i@cTf4Ii|^xNOWjVtLl|1W)C%reeDX1RB-TX`^^$YXS7{D@%1
zAU*RPyy=G}FjHv*`Z(~}h4<Tde;M!pjQ9TadV4i~U+@WBNAmF-1a`Ka4yenHY42mc
zq74_#roidE4S`b+mNY$J9XM|MmFEW+(L6gnFlaxQW7bcC*U8^Hv|t{UsY_qO_lm9R
zJCgL1226(6J7>Ku_?d@vGk<bQfj4%a{;<>FcI8m_eIF*4wjG`rr1R?OKc&$<Ed5@=
zBZiy;^Txu055H=9!z+F`L5c!Z0-5w{nOge5>+^W)L+^~n=}T*ur<K}SAAI7c80^LG
z<x2kxum4wl=yyl-;#QxFPXqA4#O9>`7lNJ{lTP<P3i2f#Ubuxfr<DE{&_RCj3M&7X
Kf*}9j6#Q?W_SSI#

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/modules/1.o b/test/tools/dsymutil/Inputs/modules/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..eca930c905c47c7dd176d4cd9d1c558858739577
GIT binary patch
literal 2444
zcmb7GOKcle6umQJ$BrkNICa$~LZV(6k^-_LQXrHl#i1>U#DbPUD+-#79Vd0M$Cm9x
zsbnb;AJK>nSs+0-NbJ}^DMErZs@fHFNxOnY1uT%dMEuY3yz#dY?v?KM-FM!d`#zfQ
z|M=~nkcego1ULY`Nf8Hw#=&Rl??H|lyQ{SC15?PuYMyP^a+|dlh;1*OIJ?yHeQQD)
z#Oqi#AlrTpwQZL&7qWJ~=&a@P?rO=lkEfT?tqdP0tU?EatubXq+atEUlFO{G+0|O5
z5eD9w^y!n$l$5fzl|Iyqr#F*f_;`g<wh`{e`@B6F_Q!qVIhjl)w?X{Q`h2{r3b$W8
z_Ks7!@f;oo(4^9*20VXhP(*#`J-@f&xNEs;FWyc^w#Y>uj!x>xux+PYF63<2DH5)y
zJ|AyZVf2f~Q^P6U-+N81o=|vS-A{dGC)<tpYZu<FKJjW7%lG`=VO~(sJD!ri{}<0Y
z&#M(Q5Occg`%vLcDZER#C=9J&f7O<E?T3d~$h(B=!MmaGK2msJtBY6I1CM?7F5pOG
z1{WIx*Hr%7(1b9-FM`{N^{E=$!+qJ;k0<_O;!ZvB`@&8wad!cc9~puE<q_!jpuc5+
zlr7*q$2W47YQE&gr;{_usrY2AvhKbSfBeYI(@#tvj!zZ}8^u&B_T=<onXT4V^6s3Z
z<YcKNNl7JBFp}kLQRbbBBvX(Luju4m$-6aiDr;Ga4auHIMh3i!soJRakQECWR`lQl
z)}1YLRNIG(s6VI~)^^=`vu<J64#;B6u#TF~pjUyI7}hKv?O<3K8H<SwB!RMT;ODLJ
zXUDv}iSfnQF(ie8qN~s%62nJ2+`$B}=1dc?krAwc%^<pI5`Sn^T7~_zHU7nX=*m2*
zk(*%e;SM+UC@N2}k)UJX=9!wHlZH5<MvUt)PJ@lP0OJzWUVRiwy*nbDp`BTV)X#ka
z8Paz*^X*>QZFrwThV|X^oAh3r@x0|az1P2kltCWgU?B1tROCwrst^O3&ei>LOgaIH
zJO?EYH*8wrA<C$*LL1C<BYNiwlspga%_I&NXJ+E%ZwgTI3V7kA^a2~?a|5{PdtZHF
z>vH`u#FkauzX<JfF(ln{{{`JojsXnveu9B$Yv{ZoLG*xEP=uJQ_3{tznp4}C%xwIF
z`U~*cg$kJ8$#FLP02stRGsN9e?<zaZ5ZmrQwJ^P<xkLcBUfYZgg~IPMuUQgp5>El?
z99Wm5kec?VH4LdSAFL&%dH)b8mR8maxoWZqyUk&8K$pShfi!N5776d)eM=7l3@04$
jF~}Zx@1JPTK}<QtX*f}bl#VOCs5I9eq43FWJOlC{tbq|5

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/modules/Bar.pcm b/test/tools/dsymutil/Inputs/modules/Bar.pcm
new file mode 100644
index 0000000000000000000000000000000000000000..f3c0d07f2c5017ddf3aab39cc57d927f47d0fefc
GIT binary patch
literal 25636
zcmd^oeRPx8ndd9nGLD3aLL@<In#w>^`4d^TWjiFTC4XRhj4fn=jZ2YbY+|PZTre3r
z%}`4qN?O`cQj(IMbjJ#ikOqPpOpnuH54K3N&N<A~%`&?_yPaLTO{SSSCmpxl_OM;L
z-QRQXd!_f4<iPCN{bSGSMAobK-uK?;J|Dm5<6e=U{Nbbjxk8dOITHThos0Jz{juVW
z*F$*s()VRvtNpxTN$w-QV@!$O1@d!#|JcxxG5HHV{C;0^uP^mI{Ys3dxIeDA#`AlE
zf&EVe4#{Ks{f!;2&MnF=>^lDo{1%NzYh3Q-_a7K|a*uyxU*L(Mk^OrQ`29^TpDWc{
zjFT_lx*bTh$yY|@9lw8YXyEV@{zGF2*=>y1<MOs7yBKA<c8NO!;v)H}O<v(bj2Aqx
zhuuyeFDQ>Ep4oMe;{^r=4h}s<<E!Zt<8{je-s^b8J9;F2yz4T3J@R;$?~%TP5c1HW
z%<(L{<$>gnJkA#7%{2Z6Mn{7~{{4Y{G+YLKV!Vz2%JE2R=#ljGJ}T2E#w+>P&^H>`
zlRn<=wDCIcalElFk7iwOSRQY$yxxO%9gpjCaAf~bw)K7}k5?j(H-DG$h%ek=?q^Ss
zu-4+8JIHa!gimTW;O)ViUW8BdrkD8S-|qcyqo=V|vr>{iyboiCpKw>{FE8jPt%l3f
z`s-Hx#U=gOxW4eY33GeM;trYI_!~I(+)a1MOX1Sv+VWfBveSvYe;khVSR;M?5$|nX
zPkv;FEmH98d-}@_`m59W^Gk+vQT>}9{n-Y?Wvk(>xW?VLXl{*JdSj-eF|%7^IXYwV
zhRp4Oob~72rPGPhS#9~vMBWoebURxk-K`i<x8t@B7c}exzt+DQ)xR3mzhMMzhPYjS
zfsY?AD%tI_co$9H8M8ZNmd9LN;|-ad9W5nO+Ok{P@;l+;bK26=iA~qS#b**_r)i?F
z^}BP|v}JSdl5648n~6;)wWZe*wu^}qw*H7`G}3(`;;F;BB0b}}zSc<3Xk_R7QvRe-
z|K_Ove1qZol0M#Gcx_4lhTTy3O336bGIh?FcLyxPMdp^pHM=!*v3th6H;~hO#$6J3
z7vD*gy`R8z#giZ-HE-VQ4H0j?&TG@{ydBwTi+G}uZ4D9s5nb;@#CJs3b0$*oyv6`F
z#vvNJ{=D6AWl8_$lHsysn2eIhuEee5k2JluppSbDlS_tIqWX&-{goxd%hP)Fe?1<!
z51i?bS%w$abQGDvd`s@^@r|avA<I#hsdv%b5->aMn)3Ix5NL`&!TwyYEwZhjDCxF?
zFkRogwcxb|{dp)!RF7HCTZsv;^y{y9^plVR7~Vc(#;P$(yUTJE3WM=uCTFmv_;{lD
zbhs?;uDGKuJ`Mhd%g(uNad*jw?&4@-Q`B92N(&L>{ql^i7n8O|JeYSPvTHoz#qtdf
z1K5s9uYloptaqk;IA+>KYH9J#SlSoYw8l)BbXO70&UoH9dDC4Co}bc|UkjJrNtB)m
z7vs)cqAb{dW(U^NKzi#RkMtbT?U*;##3jRZyZ*Ar@J54y*41x_P3y0E^o8~(EW2Zt
z!GH;~5U+|%e2Sxs=8hTD-k51PU~vxqr0klzd^TYN1LKL(Q*Ikqsyo`US&)%{y%gU}
zoX9WH?V5=A;}P##T_2SEHiS+b-Z?+se04#8)owW7pugU)e+{-{H=KondGzO?<glP=
ztUC5wzsAzKxP~(sWV8n?Dl+WfE}wJTpvEBM7MP61&Xz!M-B2x?u4iq;izT0lcww!A
zfZijKp8iO|bIpdUONOhG;T-tv(O(Av<HTw(q443Lr5)DFnA`&Ua#`B3@IcOY{=p5g
zmfUfdPmxR^O&b;qX`aK^a49$dwTGY~R{LB($%`zs6NVWx_lL~h0BqlK6jBSoQb?Mx
zAR)eElg~1hxD$q{LTzlPU^VXIDKgs=Uwd8GYmIm&2y;H)8rj*Z0}s9P4Tm|Su6qpe
z27L^2gpKs;Um>m<z`b8XNr?WKrJtc0xaSQ1SKETN;!fChoY=MLrWSu~=YSRN;#q|)
zU{d|c)*>*$bW~#*grzfv^c9&P5n2?6+7+<tuVUUM!xhPJ-mZU*!IVsr)o>nW5)ZA;
z@xj2Y5pR?UB44+2t<HB_*O$+iVEtyOcsj9ZI$Q>OnRMHrx6|74bBWR^h%!3604p}_
z3czO9v}@Lo2?T(9#6V}$5DXJi8>NK<3SM0@aMZmLFLE5)J{swn()9?4-Z>ueKp9|J
z?>#h`D2=+y89F6xWD};IPUIC7>|HcJ>9VxM7+eEg7fsBr%~&$b0a9yX=>2@&Wvl-C
zc0DYJ*gmQU+P}si=#o(%kGs4%9K)e|JyBhsO$WgV%{N|hl}u{OV59&>C{L0esLgD_
zZkNdyFdYq$_i0}=5zfb;dyT)%!^mfWhYArF+UwvlpcPRp4T!Dlsf+kq30HB`XOH+`
z1z2+Tx?5o^xfseh#b!L|E}!G}qAd<ycz+yDgdnBA$TS$Rw1-T<G3Y-umhJ~~oGUo+
zUW)2}1WS<&ZyNQl&XL>|fE-)-;e=&Z%mRFX!E)Sy)P{@JK!OBTi)(fjnfkz3EN#XN
z)oO+E)6$OhtT;Pvm=fZ<4u2OV=z}G`HbKw?U{`3h=1LA0NPuRG_-(pf<FrN~XU}bN
zVL;Jca}jT^apQv*b{>iJ!Ly*-Fl&NqgAy8bJB~!Q^@1&S!zHLLa~U!OEg1;v$#KV4
z?~R#{1}wCY#WhVa3!slQfs7jFu*T`RQ8r2DLk<+ybPG<FSrEC@uq`-!a_2A<p3QYT
z;+@v@=96C|?}o3VIuN$2*JyiiV=rWfC4eMb1Pkd=Qdp=l#Fq?7xhF%5*ZRrQFEH=G
z!i>e$uu2Lr0;a)5Gva|KXUr{t7g!2pQDoi$m*mrw!O)=Rr^02|$kJ^Wn6tQ}Ej^bg
zL11%XT-SR$vg3%(dx63aV%l0Ftl(XV$e3KxzwXh$5w{LM^4JxR;pMqh%#(Dx+PyJL
zQ_Q?OWO@>2vbYA~hRKkii$K6k*{!fGsqN$z0XyN+bF#hXZ9Nm|Gb#wU5ZPfQ7~g^5
z<hCveBLzRT8?IXQmzMPBJO=pv>*M<KMqKOPjEB~z5iq=H?$?;xK|>6>ePjDgclk+o
z={0TfE$%Yhr6&n*OM$nb=Uf<=b86=W-L_PW7x4}2df~Yy;IpisOj|fyMjVj|fo;H@
zHuYf4G!V0RU9cdFbNF5L+TFzq3Am4PSS1i(0&a!4Q5Kv!t)5-C14i8&@%Kl3U<JT)
zHd62(9qQ%7i7!xa0sxd%B_0?^Bc=tKFk^xyKr5Vsf2$^jTz1JkdGO*p+LF_58>pI1
zoVfX-S{%TcX<gq17N|6c_-)>p_mhtqt^BmMV$NNDTwAIbIXwO1!l^{*q`Q=yW}*z{
zLm@<>BzR$b+mXnQVP+sO>S)9>74ff)^bAK}@4z(OjAOBgT=@<WDW%N0v%&C|M0Nq8
zASV2QhvJ#S?}jV~H3V+pK%a)<@X<hH<D9!<nih)~2GMdkasG_HZ<Zpl<bV_@f%yzw
z3tn)^gI^S3(se62IG_^{<1C`4i^t@N24@hYnV$sELG%nX5WT>mI6e7tzzMfL8Ma|k
z6#)~gWdg8X1l|*XEL|TkEeg3ucFp$}Peu)hjtCwh@i~1QzORAS8?zE1zZDPlaj-oK
z0^l{ct6E%xYq;-&ft(xis>9{*_;W}-VDd2*a58}ELui563_u!<6nv*qUhgZ25AB4;
z(6Q?adUzj-$&uZ}Lw_NrQ=v`2%d$IU*&S%i`KiogwrXbV!ISIdHFEJG%;wmL8BaCI
zGxB8u@C(m8BUp+sX3;ztGC6G~nTODvTkg`}M2S4$5#2UVWZUhC4@`sCvje*jAVqh|
zOk2{wJfS}eKLW4KvNL$G%its!Xf+hR^bL6;pakM2zAk6OS}~Wl^my0?e~ldQG(6^#
zL!Ol36(j}k;{{3prf`AShAR?6j%Z}ZeE*0{12_q7u~V}7Ll)&C`lYxfUi6=3P9idN
zncD;87U4rdi92A~EBvOjX`M_DK{uRE0@wxIhA8tcp5<~RpSWXOw<Di9N6zHi<a*$Y
z3cj~RaM+{2BpEL9xYtffbyj#=3Z<3+@}WP-B!x`vFrOJSI0MIbbjIwCfx;UPjf?CF
zSOHU_c$jQ?6EFm^4d9f(BvFQ@Q|=O&|G97lxX0qU6Xh$j4;}zouA%S)afr*}hLbHD
z5JnwG5=!1D^og7ld)G#`bNxyB)_Lh!K^Uf+L(UbpAr~g8tv%+D84}qEgh#vtNiEMV
zm_Jl#`~1Y2<bV`4l?E^T-MFkRaI!G2g9vg!Ez;f@iq`r9Imhy3iG#ySdPGfU<FPhD
zwiWOd@yt?UQt+)$$R-KM4VNH)0n?(16YZ;#;S-z)*GWu-U`!5us}D6GUDBf_z$b$5
zdnLXYLtGTu)-STieE5!t2jO4)Cm|ywn|B~4cll%@@10eAOaiC?(cBuiF}4DOAm_Rc
zCVMC@Itw+hQxXarDO~p$k<CgI7m!uycyXX$+r#q6OchU2bWuE~ErELtPW-oV69z{F
zC<nCA?T`f#R7a7Cz_8;hVhM5}ji_>o*oAp7#updGT9S4hCu=I`{Z!hFkTMS>P|cv^
zOIr#CzI(sCz(sSDi{dxRVPmFFU<j0||Ep<JD`NP!mJF{kXf1S5&O%F$^cZ#9*CMWF
zmIZ}jo(PssvD|z-0?kjrf3bjytkQN3g&orW5}uZl^1E%wr`lvuB7x;p5mdssTu25%
z)!8(v7(-y2h%Qo8S)ewuAX<^t>b&#D=LG?i@T73%NTA@pwdL<8PP{E<WFEokc~wlo
z*NANQHB@yZ@*uG}ec?Kv7@Q*p(stsxuSDiKqBRyZ-NYIxZQO(uax!tEnZSXi$q=`h
zN-MsLVj|KaU@Y^<2-T)w3rG^<u_xthh@3k^Y+fbsAwoucj@s8xC}AO`Mdb}~0wD=y
z#K8;5WVv4PY+|nlH98agJA5u&-rm5C)pGR!N%57a{*6(6;l`aplzkW<WiAw*8Yn02
znTizreVIIwyBvkSsRW_|iUSE(DJUuqE)0tqVehcVy^H4F8I~o*EPJ8VizafoA@can
zr;!eG!$ffdc!jbR0*3P_aR_3rb3j|rgW>1nt~UiA5a5kQw&zE>5rOcE(7f~yLb9i{
z6$^<?C^aBznuT$Vj|!4nG;2nAn606x+v$5t5Fn>i+={J+t8n6oRO6xd#EffoyQU+4
za0W;ijr75&5#`P|e59TfGfp932$z95s2K)N9uZT6?J(rM0SmGuq<AfXoPRGk->*mA
z5^>SFc<eV~I=c=PP9#3$9~G!ua7!+Q!EoR;Zjug{pN0j&=i(x9qEcER!y1e+pHd{X
zw~ZjR2cr52?sQ*(<70Kpf-f3Z$$$o&H0rO|4dDEFX4<??LJkGa4%(<JUm(K;XtQXi
zaO0PRIeZ=tG>z<lU5t-_5Y;pF;Hi3ua=`b-*U6T`>`|3v^XwZ4UqqM*t%j1*b&+|D
zvL`C>Id^>^gsY0mlvBqwRf1g;9}|;Ie=kQyttqOsz-#k(SjqWKu((--SPOGP;($;J
zp&6EDAwFhycKoxT5Oqt6B~T$@%tw@kdg<o{ZCv+^<et1RMlQiry<)R4Nd)J@7y&-0
zD?<4_Ie#w8MWogEiYgxpKBL0-sf9(Z9FZ6WLM-vux8Bs2Pr`A);Q}oad7s(1H)QE!
z;SKUu@V_59exPx~YOLZC5YkGaK5!D1fx^?zp3(IUQ}ED(G#Q1&Rc&H4hWv?ADz+r@
z&WkZo?g&7djeEpsU{Sw5hSU{~^@VSX(Gat3L#jJ3#ok63K%rE60Mp<dunfn{sA+p=
zOip`ArD1XoMs2t<Y5<I%9oHAW@Vq=)<ns*?Uw_0qKOPsOMG?BR_K6WX;0goBelAEF
zMJ|t!KJXXMAeu#Pg(4Da#XgiK)>01AeFWfum3E|78ZHGqBAkGQe)<uadoB~;(M_dw
zhoB7@IO#FGxu7pR{i0YYH|K(mtztC58dW3VN`5ScikbHU%%M>YwcGSl<O(V80FZ)p
zh?#Lwxb7uh{1JsUWQ$&Vq+oj|Qcjc;@9@knL1{IWH~=OTz?TN&V=tRVHI_ka4TQ|Q
znQ;+@!+nIv7X!(iyEcnS;V5IHG$U*rUie~^YQ@O5F2+MYe}<|$RMjN>WNw8Zh_Vl0
z(fsJ|nu-_59dA0NEjf;<$)Qk*HSR8<3pZkh6F>e8s+tG`j>3z%D9m;?d`?VF0OirY
z!6t<s7TV(SdLw<Kq|dt$nZR(Olyp$a_I$6r@C1V1iqnbGGo(VO?G(enu?G~q@Qzrx
zoDu?OP_7ABT97SjEW?3gZVMGOfM$q@rfH4E-;mdclzf=<bLYJ0#b$Y3CrJSS5fxOk
zWh;U;A*uYAGI>#mEm1{WG{bKqouOQ&$czmm#6R#C$VpM0ffAlbO^O^7H7*XnAb|Fp
z7`K_{3bYGicqG;u*#>Ukj%<Tdn7|%fpIE*Pdi6dFRIn?7>tOsph=uba0CqP}nO8}?
zKPcGFV-+N5XJHXwg@?A5E<q3w!N2*T5CO#PgPFi<L;CZcw**fBT*>W1BpsxL5O*T)
zSLHHK2uo?_4$%wX6l_6ZnKlJdCHls^HL?_<EKNhm1fDDzeh97eP`(dsguNGjcS`7g
z&k<&@;Kuy)te^z-oI8+C0@1=hisfTZvlYsQywYWAho0NU#Hu;a6`uK#Sm3sX2!_Sp
zT*0@#AQ(&O1L851&f<x@f36Txws=@|6r~jeLU3D5tT&z$t4QvvR{UErICf`xpi7V-
zZU0fchq40;r=!0U^ou07j3sfUFR2aZZ#T)ai&(m8(LBod$?t>7P&RU!{a+Ppyv!^1
zFe`++Q3G~-z(zPGXj<W;bAmQG+6Vfrm17BJYf95LMp|SOkcmWMMIkX<YF<Zm8eyo1
zgEK8CiGcH{J^^`9QU8Q2Vo{fX(Gdcpu)|U?LcF+U!#8E?Qzt<j=;YG({Pgo_#HdmL
z3L1d}T!O)=&a*P~iCqSBFd)~d1Z_7W>9j%l5k3J1d2J1(7QXrSGMJ0x$W9PM!F<0`
zn$i#6k$stvrVUOFco>ZK?U8+%Ov#MAm)(n0sOb&iK9VAX{<E^Y>nq=sJ(S=H&<~p|
zyGA418c?j(p=9?3?6%C?XVRudMus>?TY~)PXNePk=1db4^k;X-tipuHt_U*ZSlXzc
z#1?!!785k^FlK^PWxMwYcgFYEifI!Tupcgc$z2?r3JT8ih@Izs$mmHcXs^fI8)!US
zC~JjXKQ(T+js#&&|1tm&$vA8uTKdS(gqCib)&c*wVZF@zQ1m`O@qy3;o-e@5@NFh|
zpZ9rwN-0%&@WPmy7`Sh$EkF}ktO7@HRZU5Q{w<cUP&fzIfU-c`^$Ve#oC$Egh#4l}
zkH{x_Arw?OK-+x7@6=RLLP2rYCK;S(pH>so0dNL}FyjSVk|h0Jkc3Q%@q~&V$nRJs
zP2rbka{fxT8crB!unKrYJp_n7$F_lF=<bhfNxErDvQ8#SuTimD5ircXt)>v0T)iL+
z9+P2dDs1rmLO=~y7+##g`2jc)X3??rYT9@`3LAE}<(gqZ=a&W8GA+`UPq<4^I{0^w
z$?6(_{jNdm?Lp~a(c<*|QavN_#7GBGU@wSRUX9PV)iWYgwnsb&Tp=LHi(JO|`7{nt
z;Djug<pOZ%%*6-iZ>gEEmnkg*iDdG76uEGbc{tE`<zbZ?BU&3Z6k30;Qe9+)01C<k
z3qJo9^^%ckVt1w(TU6vswdJ#JAO;<22u}Y@&>)Q00_J;A-oa*5kqL6?$5zYuOxl3O
zr%|k68=xrlBj$$&B3!wHs)D=pB!y&omE##)LS%(p6@H4r3k;362EaX51cT4&o?rN}
znr{gE0k$_2rSUM;8>tW!E=|%OoPS>s&k-P_m|(`1YZKz4zzyf8R1D0h8{?eP<+vV4
z7YeKYpyDb^h+utZBJiy&8=}1f1S$pIzf>_fTzZY|K!Gy|PTu{j;HSC2h&HWIP6;CS
z#6_TS?a#zA74aS|ElP1L)dUb^rbU4HpOXg^n=e!#CEYmoyjWV&3yT6T;5~*oCmwoF
z$buJO(+j-Dj|4pkv_5nLW}&MTU0;M87sRv4%KTlk!uip>KMEtF-93QYlHm=MP2h3(
zhKn-4Mj8Uj=O`JB(Lp24Z^evu3d`j%MwHw|4LSZtK{K{rS@18H#8E?2qjX+rQ_|9(
z44H5QWd~GZ1_#m@5;8Z@ygp4mu159ea4L!Ie35tl{_0}3Ee;34+ywUQP}%FTi|lxQ
z`b?V)dMLSID=O-?mF1tg?;$$6gVgC-!iItyG#S|nY;ewPI|mVmk+85$x(TUw9FP@3
zHD+5hLfhHO-R*gg4kwFnh9|Xc&4F43_Jwv09Y-O!2#}8PIPEBo${+(`$RCWEaHQ{X
z{CN(a>kEu)bMr#3{;1UzUY#RJ;~Jb(enw*(J9v2i7p;!!y84=`Vyi6}d}`mO)IDca
zu_SG{Z+7y9p3TOehhEUQ0|zCy&+{Oi7Tkn)&WkTUb!5fC3oD+oeeKJm$CB4i<*zs*
zosf>_<4|HR`aL+;ET8SorHOHElw1<7#SunhE-qbo)A8wCjp!_HQUhKM$y1AR2f8_K
zm86s7`S~Ghex6l(*JF(<UpB7#4OVr|b>jXb{VT5buNa%lTa@l!nqKw6T;BU=9KHXa
z7FK=9vtqwQ|4CB*+n>03<FV6?Kgj#7{r(TEE2^HxtS(TTqH^um*RR02=EmG<)9c1-
z8fj^!a6Yj)-uU=rWBtt=g;96iTw~oe&F1*A$0u(T-U-*;YJ4=FQ$H2jeD2uh3ypPC
zn)=hn>TcbrzZR;)ook`OshPrf<D(~U6rRf2eEitvmu?i^3>7Yf3a4EQc@0KzXliCH
zW|R+cp3ToKTK(Yz(xwXB3QN)|P&kkmEo$+!di2jNJ}@=YPO_^Q#{G@AXEux;dp>{P
zfx*MUp+gn?b6;Q-ZS)FPTN_)RwKjZpt2}t8{E4<A=+g|&ejdwTS=46>={DeA9^Ujx
zXVvKw+QmNUJSu%B$*#s*Cx55&sB|C4N2PUm)A@G#WHi5TJ?nmK8x0`^Vv_VDd=9L%
zHa1?ne{f@NzI9{6xTMkMTK5dTUGU6o^gg4NZZzdu9~|$H9D~}{{DyaCPp<n-wCF+B
zqCJ`4@QC!HG&q|ZHIA=*$kEc``@tj9Lk<j*Z?sN_q(N=;zJ?QMS-r6}ioOpy24|zs
zT3uRP<eK(rlOwGYBSnigxWc*jVKNvKx($52Rg)xTRgx4MAH=Q3__&ZT^W3OBbJSWi
zeo~(K)UywCSL+*2bg+fxk6MXZnsxkC{(VFfJ^aw>jnPVn_8Yxyq6W;aX=)W~o_(%}
z=+Wd6J-ArCan`!w#_Y*8RpY1Fw<v30JsW-Ci+iPq*q}wnI*g8ik5*dQQXGR=ll#JJ
znnBEGb)7gR4IH06=MH+GAvxIaz8~*zcW^n;J&knj_eU=2NVcRL)GRgNozfJ#^Z&;S
zR;YRv=W8_=&GxGC_r9px|3^#1pDbQu?lFJq@LpoqnOBPP=N2!SKlEI?Iv`2*M>NCl
zY-GA0UTL)D>L1t2gXIUG)JV_Z9X8SZW@Zy;Tl*><5FgP<KgQ>?_`HJ8-{5n=_kgXM
zjLOPwitMQ<AW46zm!$Tb{0CMSg;o|NLeUP5`*>5U=b4<6Z;u;`+{b4DV{FofKS@>y
z@G%Yu<s<v|1P>1m9r`TB`4-+AM)r*!I5=iq&wyk-MwO(<N@;<feQe<HNN@~~pz{xV
zbkYWErFDJk=6Y+De#79<UYvZl`u*f&{C;bt;?^GeB}&$N`vOlMIEWVI0KE6Uk^QWt
zE@Pb0z}TMQ;LuY;K|E;BXntzw;GvNN`_Z<(s-mvKZoRt~T(1~pOXv1R|JJt7E-yxK
zq&Ik6zQ)#$<{ix)XjLweJ?6JoD<A0X^}F0{{vFNU?zXN@Yo*<;9M9$J+S=CW_cnL9
zdYhY63z3@|yE?nKZ*5l1BR9Dkw{36pHme9s4cP4JZdS20^%TvgqEc?^Zr<i^?Dkn}
z6btZpU0b%g@I+H{OIv5NAHPTHbfwWFFOIlfUE!#xv^E~t2LX)?1c$7RBzEinju5)s
z?TwB!4tHXDzju45uWf6ys;SN0b$Bc|GzQ%ZjEx-}88|#P<oDZbBLfH6{o>-}nnc&e
z1Hs_Xo-wG24MZL~)QKOx6fgJuo@|{}gSyyI)~61P3>Ie)5nzo<8rt?B8pZ4Z;*o+h
ze^YaJ+m=q3yQ4XE8$a8E7WiD9K7V6Nx6j+wxusjRdSaEov$@Cb@pgHdy}q{QZk57p
zffjmwzBbj(E^kw_*Wc0R^L0S1olR}7P8!rvrPN>E$=$Xk)uXz~p;UU_!_(wR^{A?@
zR4Tc9r?1)1yZOQ2tn(&W;LoHe)z;y0R;$)PH&mPet6E)Nzq`$c+e+BzX7{(cdf6Il
zQ)BwOTfII%Z>VzAI+aar;5ct_R94qk*Ht;ImCYSpom=GgdWWN`s@7qza@N&U*Vfk5
z*=su#bFiLw>8|YGqo6kH)S!$A<HX%%^6s|2X1Ql&U3Imywz}G0TUBeXuc@hYR@Q7+
zPP|<;@LjumeBP$EmX>>?14E<#z4awJQl0Ng2Q9Iy#oyGm9fp%GdEVC2(d8ngP-?gQ
z80N~}z$BKYM|?EalANy6sgSiiXiDT+2k>-ZR@h(G#_rZG=%m{9$ioVbXXrPli+cH&
zY2!+@%u;)Z3CpyhsnSsgdQ|JgT~a^aP7iTBcmOazvVTm$2)RScR^K2xtPc(<U2Ljx
zgf)(2X>9a&Z*OU7Ba_Xhqso~N?>{v1#QvedG!le5g9iLq6*L@BPNQf&%vujG(|Uxp
z9$BW9=vyD`&DN?|fy&I})%UbD`C9R`Q>m0m2`|f7(IY!&6|GrVD_XNo4YJVw@gk)T
z*(a8tMq`$F<;JYja-~%)cNuB2e%VDSnzG1IG-VN@Xv!i%)|8c6*^D^3YK`Yj;;Aa7
z6Xf=l^&zmvpY<6;0)9C(yIcFmcqwuvl};hlBx?^1Bhpt`D|M@KWOP$6P*{v0N2Ry|
zv+vO^Wkp)XF6GuN@)XlR5weI>(V9i7qBSf1Ege3DUkLiQQtS`2tgNWES32tJs+~jS
z)io^&W>MExSGVY{P6OCD5FP9~?UAPD#<s1l4u2DCtxs!h_H?(wYp{tc>mBxbN4<Th
zyf)jsZJh|l+k88zZ`vqC7V?5-pTCm_bZPBdDU9R<IFztMRFf7}+7aV>FmRPJd=jm^
z3Em)MRN{6O80M%$1X(*&T2+@$I59Ojx~;sbHWd&1{cHks;1k%&D;+i2nn@^?wKdiC
z)m1e^<?1<_+~BNIe$^HA)plo%v%01Z5oxuvrW%i^7EO;VD@QgSlT@WLBjVF21zD*K
z3aL3INk@glVMokmuda90*sCh5Du>EdUQ|%9j6_BAauTI>g+yzVIW%umO4P0tn`o9r
zYR_N{$CS#7+WJ~&JxH&vb2_RjYe=@41EtB<t`aVrN-12Nd{RnRv?JmHziJSj*45PE
z!(p!^k*Q|upw22WS5-j5mGvlZSJu@z5U1l{RZSI4w!WrP88@&emz6djmy27;CV4)k
zyjA%RYflNhvcg_h?ZEnKQE{)Sf$XaQA`p6gR#KJ`dy>@U<jy)M1g`>Oxltkc>Uxzk
zN_LQC@2sj*LP4^bSzBFwZB-?BSyfXD@z*+Q?2e&Q8Zcu)X$FoyDk<|}m1=-_v-cn<
zsd73hVKLQJm6gE4sv5h~K{%W?v52FyB0BQcEzR9NzpDc|_10z<=Bum^uD4FOJUM{e
zGaNXm05{e{#5=>o=`m~_#X>CH86Hj#Ve9;g@J~Ht${(k@Va$id<b>(-@isnT`g~|x
zcEXfrS37h}hbA#y`fR+3&6YkHnwFgmIk_?h$etRYlbAM`ks9zu9!3V!r$(b<>JFtE
zl7CMMG#TooiIjK3<Sg_U9?r-Km{3{dF+7}+60k-Vcnl9`WCW~{#T~=L841C1`oQUt
zBe)u+t6{5=Vc4N`GW1AFhms3TS0=eza;X_B<?6(jnz2&e$d{V2Qr^gynz2$eDkP*(
zeVHJ>BxemUMbXID#M{y<G4x<chm#9dE3@25EH`7dT%Fl!GZxF+*kUu*%G=mlGnR@r
z#ZtE`mLpTc*Hi7Nuda92I_hhbUHGDSkE~@L8Y7!EeGCJ*>F}sJGu`E_JgN?6FoCx+
zCIo{SY~Zbo4Z+NG7p+Q0WSAw|`9iZ7bCg!h7tMQTt8JzA^>?7woih7naa{D1$xs;-
zrWDNrl`_Fz(IXq=ldwb=8UK=U$^t>6t1Nz%;VXv{>YN3r#1QDR99${7Fu7&{Ekze5
z+%)J)t_oHyFDo5qlwJ68((PB-Bgx=&cf$$c$dbjyrI?bUB+1}(l2n~JP3d@}>XPJg
z25Km&S_UbVT{xFBj7Qmpb6Eu?N&Wy&WG1VzP1!-#m~16!WjDd+>}=+smg(D5TuG6V
z<g%J3RaZ__cAQgn6r5g$(VVXAAkCS`doVsP!)D%tu{k@J0Y=IQMZK&f50j9U4TKa(
zuY5o-JtL&yv|~v0;DL=Qu1T>djkde;K1pSEU{%nmnnQ>o(?=*N<(?wLQ7F4GL1cOh
zWfvw1waXx$wcHzQ^KM+fDl<@<w+gmr0qtamWieYtN0Q&!AX`a_BNMTcQ!N)+%1Z{d
z%i?u%493@NaGmVI*qQ~csY41P(~(hd9z8fuvthM7M3Su;7?SF+98#;PNU}AZ9Hx|V
zUrtX}oKDThd7FtIsUFLrbgBpEZ6<o8dT`!mpa*qG;bJCk$nAU~SwlL(evVOWXCy0g
zdHXW>tR^GLTQxmN&SEG|(8r*&Jfc8_?Cj+<ErZRe5g2!~GnbFRn46WiY2c?`kYHUB
z@$Lbai7NCLiku0{sSeAbvfM-IFz&|jB!%~e$tfCm+@78Ld>P9ja%v35{w#o;>cRN0
z#^deXbQr+p#-SFE7e_s)i(lb8SiPCHk+EUb)!9LnXXUXQcJ!s&*VO25>+GO&BI?5Q
zmc~Z0li_0pna(DEH%in#@Al>$t`1cZThZ6u)z#te=xY2ke~Y(Seb7bGi|v))Q!fI=
zdm9gWdJjFtj_O?}=Nr0r?M4cI*G}@r@?DtHac|v3`7Haaon6_*NyU1Sn}2JwueGZw
zO&6%u>uO>uq}&2i?|$H*2e0HRDys^8+kasH{=>nb)!)f4lufwrhXvyN%*fsm?D_gT
z)9$2oW`}H8C%RHJ2KEiYsMM;;?x_^=NS)K4cJ74VN_*1Vyrr$1_Vkkmq?ih<WVXZ2
zLw!7_ySq94pcA{r9#B(<`)RY%oh%)<ZY2v=ZVtIT9-NJUA!1+3ZdV#FZ&CKX+kv7#
z9}0Xi{iqcCr=g$NW23fW#nC7~{qxRrIzYh<NH4H=2tqtKG#0>-!9itX(@vZ)!>KqW
zL4KSr+R^6i;#J(Ou10@Xx4%~$t7H3h-ez_jj<m7TUWtQtiuv#W?XxS{!N4lZt=2Bc
zkPV`Eo^?(gbHly_iqgt51naQf*Ww4nWEqrN04eGg#$<Mj9i`lJVBfyL{y{7K<H4U@
zPqe0=uyoLY%<6OPEBonKx)tZR$E<7f^S0+oKP@qSeZNNfJG}cxHPXU0t@MlgPK5Fn
zCYD0{Ao}Bzu1BYieX8zh`>KDLTlJA~)vp>>^-r(DG58M_R<v!tmQ#N#XY=VBk4?EY
zOE;{!4btTISLaHvm%RUdoM<(kH0DcIon(9-C;A_Fcvf$0x`e~=rO`%7`sw4jTIpO<
zVZLNsN9V+Gyp+9MIF~A}!%she^R20WeEVe+eGj=G!olzc>fMF+U(*FW^H04S-m*yd
zzstW%e>GjOgD&~ucB2@RpF014>r66O`G4&-P-e7E{uC$hm)K7?z{e(>EMAEN$=%Hz
z$$w#<ju>mC=J#}`S4w8d?`Ma4NA~;o9r8aF@CQfmFU3C~lSqg7HPSmBf1_K8!~LnQ
zq931zAM)2or{4H^%Sue=_a8>kM#qK5q&4zL^s@nt^g`!<&s~Y160pOT?56?LOf1nz
zJy+j-XC?j_YrjAFu$fx%;yU%oo^R#n;`*sT@bC~j%dC|?g7&P$ZxKcY*+0Nfo#~+R
zrJC;)(O4e*?qz7}Q0IX$_PYgYrt{Jo>EM6ye`X~J@Eklab^r%E**~96&GhpHjdb<X
zoNe?&7`j6*`T>JR`doEudp=v^4wv8Mr9+tfT)cz>;tlA6pG{=EXn_yo-Ime9Z+vN)
z`{}p8jT@KKZ_3~2mbrhgx8Kjwab3#Lq|J)!EM1egwxeHo9i3%AELg#Qa`K<hqZw^F
zyuXL*NAO9q@Z*zyd(e&dTX=s3?_c4)XQkF!jo+crzjYoiz;9L9ZyxA}3lZCdd!46f
z^#!BBcPf9i@8o?YO)Ja;$MnB;{rCc!rzQplt!HzL+VNaznjk}Z1!HjNkfdMYx?&Um
z_6y@UzDd$A8rX;Q_8E7}r*L_rU_4}$*pHQX1OBqp=vMhq`8^+ImL5AaHc02?H!(sg
zWaWj1D|J)zfacSy9@OQmGOj6H^}$ySKXAibv=3`?R*gTsD*QCql=C3I7z~dZHp3)y
z3#6P?Z4c6yI(*eVh_8>~|38tp5f`5|7|3dgog8ZN!C?CRRV{ts^*Ow?8@Im|q%T?*
z{xsuFcXQ}VuBJS93qA>sH5C|r8!n{0t@xwMm0G)9tu!7R6Q4rF{9c~)C%pbo_aTF(
zH<W?BIDX~9>gks$v{;GF``!7k8SLXH4Gs8C%cs>4UDbF$h&Nue3c5v$+lV(_kKnD9
ptoW<Z{Et-y_^Y8Fcx$BOZ>MmBUevIPND(i;{D0T*^Z&iZ{{lyi1$zJh

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/modules/Foo.pcm b/test/tools/dsymutil/Inputs/modules/Foo.pcm
new file mode 100644
index 0000000000000000000000000000000000000000..4a39a06c24dd94b16cba5c21d94c484df03db7f4
GIT binary patch
literal 26060
zcmd^o4Rll2o#&Hm86#mw5J^DIBr>6?{D~~v!Y-t><PU7u*g_WAI22i66FU{)g2~Wn
zhS&m8GNm0QB`N7icdcMj(m+sy>2bR3V2d>CoWpG0EbaQ7?R4xmX}jH%soQS5Y?s;D
z-~ZlsrS~K`Fmq<l*>hGW&w6_IzI*@o|MB~Oy!*%x{`BGhUM5MJED3*bWaF5nKUN(0
zti{nyznA!Yua`G0%6-Idj49EP$D1X;f4KkHu>1pGe!s7=+n4;EdM3tGTpv-K<NkfY
zz`-X1N8~a6{`xjo`&MNac7Exz{1S~vYh3E%_aEB(<Uar4fxr{}g9rB?^7|WHK3B50
z7$;Z0^zz<hn|x+eUh(_;`u84v!hdA=FuRQLx?J9-M3)Jfu7G@HJbgTEqBBm!c)>&a
z*yYsm%zKo*B}vD-DKBOmFR*v-;r^#+d^LSyysyh6-0yhAJGv!xyh}2Dm*nwYzEAoN
zL&*I{(#Pxe%LB<@&3)?M_?5EWz|c^z-+wT0fQCz>PmH(WpE(|B4c(Hu-lt{y#CXAf
z4t+y`eW~M(rHmK3&+&%8FqCn<Z_DG2%j=!E=XhM7gM$bAGp%=^mk;EX$LnUj<RAP?
zC65ib#tr5H)=G_Qa9qNDghSWyP3;C89vt+c>vYgZeDh!T|F_;#->g|7Nxxc-u|rR|
zEA?0A^%GXZl_~uVtNzlW{(MYd@cfv$b=2Y>HM#LOaN_w}?&6n2B`39Iw?n08<2k<=
z2zOb-J-uP?9bH#$c&9C#_uPB>D|Py7Q~C>whVh90O^^P3o#Ber@K#LY?pZK5M=jk^
z)A6X;t+5=RHhD+Qt%0od<L;8Fc*%^m>{dMIiDSB5&Ed{w45-_AM~4#{cI9vNZ$|Xr
zis;`kf;L0UuD{5~j};d0aap_zChxS_J!+Q6Tv+WLH96awiYK+Dx3y(=Lq+4-lC$xR
z*F#0;;-zP4qLB3)v)8qyv+m;Sp^{tiji<FG*W<QJ@l&?muxBXTc`@v%!Meg-Bf6gE
zaMw_H*W6<6gi-(Ikp4oQ;l`pqR%du^QU8YBQ1I%g$y;b@pEmCaSOyBsO$)2{Xy|0m
zw0VCZtMQz>IOZ<88!vr7j_HafKt^)joY(8Z-dvs6rrUKVyvr8$M8eza!v15r?y<1%
zn6B$wIPV3G0c?yxG<N+3yW#4h{>??h70ECWA(35;Sw|jicx_%E^B5);4X;M@mpuBb
zi-uRG^yvS3EU+A$>5W<j7FM?vn!$Wa_RPr*rv0Oq<1SP8g1ISRcG@*%?`t8@B!7av
z*<M?CdoNMaX$N7to;hpYYjyeyP?Crqvs|ze6JG7rU-jrGAO$eIb=r(oW0qEz<v0`u
z<3~--U{le_c+uHVY0O=IS6g%z{126myKOOd@vq!Pk@&`lyXcG-BFOpWIbAm<Z4P@d
z?^t;ENZ5<z8yp6(9g|)K!|hn_bn8IWw42n@;+?j%F05{jnlS0^LYkfNynf=Ay9hi#
zqb<80D!m&oITtFzmDzY{u=m_ftfh|h);|*NI;Pt>XRMA%h8uSM6_4SKIs>h%*ASi3
zU-Re->`z$sL@j**6J{Y^6`J@I#}~|P)297V(?G!D?E69Kb$8iJ+y(~5;w5L?Hm+26
zwWTv4BMy5hx)nc_TddnX7WT)&-Zi=&DES=-ojAN}Zlv+ry#AWqaG_3rqgVeLY{zam
z4+Zn+$D!n~ped|6`h2g((!8*mGZ|#G1}rKv>|ZOJb=#oEAmcWejK$6rLvWo?Et{@u
zP1uVip9_0ot%88=W8tpeaNhHchHHz4Ym#9ceD>&XfPfKVHJDKFP|(r}Yh_Gsf_=Fx
ztyp*<>+65#hFFX5y2~a>rjVu$i-k1D@iSBc4nXZ8Xo%H5+e`8y3vGvCM$NsWW^Vwt
zZ#fRB1z;&8O<0f+--(Imm`dCY!Bn9(wllCAchMx7?Wr%luIsjjJ!6D9k2i;RHS54b
z?_Awc&Zrw6L#$38g&bicz4}*)s|IlIw@?zIKWgb^Xa?>%gMV+E*OuQ6*-jF>Hr~?W
zuWcMy;Vzm{*a9ZiuV^j=6HLc7mOfZIV@OY-2@;`2VW`~!yZ##HT{K*k3>WPB*BDI6
zBv}m?U?#EARarh5xHasJFhS(%cCFF*?&x}Q`4X&O87-QMZ=4F1!d@oaHt6k?wro6J
zG6_*eX69kVrriP9?CMs{YBGTUaE}=1Z0Lt!LTW>_a6rMg77ZMAuf_@;Cw2^lyC!vA
z0-|?~ggsCOSk`-MC*ma$cNs&cgpF*()Kl@CyuAGj<|kd2rYM7JpzDH(*|ix<hB-iL
z4Gg_s$hl(Gf7`By1rgha^g#R97zAB5>SHmNH;ZF9bgwI->#^w|IHCE*%dX-HZ7GZt
zzzF3@&;zxZ$=l;H`2wcn0rEbr3ns$(D0Hv>_c<8(Jn&E<;sSdOTn4lvf~5hmbzL=K
ze>34KF8b_YAFKdN?p${}ge4b2IVahSr`=_<++MUr!He&Yz=;r~^cI@>0+!ZM6L1Xr
z4~?bkfh^}T4!oBm`tQL~B*U9V{kLXG?g~JTuK3lMWp~s9e1O4n+<?>u3Rgpd1Xc^H
zcNdy^z*sD8+6>idhVs+WPIN6hKVp~^;=2KV7a{0_CB8OB&;(#tXtm~Q78Xc=W()gm
zy4@qRMj&U`9dcnn(cQCQZ?|#7gBN!l3-`dYpxXdzf@^~k8g)C5g|~NuEq23Ys4jCE
zG6XFe2<pjkM_28SnvVx8w2+0>4N(iAk2Had8s@Ot>A6`tLFPja6xMVbPL^2^xzvy?
zICXm002H3hbtmkd(sk#OUnB2^pF=tjw!7PCdvHTHWQQeyBwH8@=~7ZypfSW24GFm?
zLW|dW$<i+}@4&*0g;lUh3NQkuz6CSlfhVWUO@J3z3S?1e-U*lF)0D!{pyy{orPs;Q
zZ5NrdxT`H0j~64bxj3Thz7yVgOy|8w;Ri8o4H1_2u0&)^Eb3qP=--H02OiyW)nj;N
zHW~9I+^%+i)Y1?&?-?~c2{Tz(4ROO{NYI5KV7l~n$d=G{a*KeSP|3J#?>XDfg?o$&
z0xpJk8VSaCA~?CDOTb9p5AB9)R{iBg{kX>fzkg#yf5C`z{hP7T^(h1lESP&W=2p-U
zg>K*6am!tH+Ff#8TXdVd40p+C!rKzyE$A5!0dvmmx~SWpjPb(00bMsd*BE@3_0Lik
z4wn%_WI|vYaHmY&7d7pTTD&e;ki|Lhu6ph6qWL)7M;WXV2rve>Lfj|~PM%fIuG<Nt
z?hgBV!#=PA;5ie{`_B&b^5MkiDL4TD%Bm6z>`fu237Rl%f+j#KoPB?*CWc&g@f>;Z
zqPyDSvu+!xnu(vf^^#f~z?mss&qWrf)QR|Q&Y1Iqj~K1&thRjCU3OAiq8K?e_0s&A
zc*%sjgq&u)6y`%AM7%h7ab)|k@Xi5dATa7k*fSaSuL*YzgkkT%G~Kjgp^#kpP7x_3
z&AGkK@Rme&0ihr!{Emm>nSyVOS`KRn+`xez4aMO@f%^Jccli`87BLK><uc;@IepIz
zMPi8oDN+LS8M@}Z=#mG&B*LT{R&sDaCm_aoL`|1Y$P@KVBS<qp37~`M8E7DSfkSb6
za^-*%ZhazT!=x$###YG$V7&;u#{gNn9$;Doau4sG>n)my7!Vy1JVN5L`WSp)9j!NN
zB|v^FHrm6%_BaTD*Wj*dVKvU-z7GeoZpy0;mBHiBBK3gDM_Ith0ICO}1!6M*X(XKY
z^?G@|uOdFQ6B<LuZp`c9eJCbJb`u-@3o)GvZF*gnJ)@RAf%>c;%1mafX2u>oy<T1;
z7aziGj*Xb{OoKclUnT&*;M}u<r3hmd%zdLKr_Chu5Snw_T@oBCmIpkh+wKW(zZ3R>
zY4Cb>U>5?U$S#>_i~3i_^ylG6;I&zH1}}C6oCE`{hJu&BEKdZKK)l4)<*ZvH=F*m&
z4B6nXkprHE$6R#ClQO)5q~LwLKncJUP7vF0MMB6C3GbZi9h7MRC!sBNN;bdCqFhA3
z6t~0*|BK8?M20SNYk=G$d?+Y!2Q2%A-*h&tlj$MohSP}yyMWseWzMDZT#n=ucaG?G
z<}&BVnS6&_51dinH@6B7d-Rtj!zCW~+DWO-3vWxI)FMEB^iMKLqo!7v&$Jnwf#W+q
zZFWaN;mx%pB6|W>z?3K+CR^SJ3_)xII3+NNm!j#6yBOv_9x4a-SX_6iY(?h513=5w
z6n-EMaar7OvZZ^4QOA&klJ^<?SXPp~Yr;FY{v>?sob;R^4Aadb=L*@73zO8=oN&kt
z3GV{JBVK}}mSz{sA1btCZtPrQK#H15f*1eeh^#GevM{d02y#Fz(%xx`)_MY2Cvs$o
zgTsq@L`~;o(H23r74Q}I%ur&I_tlTdCJD$56(fHE(<1Rxt@(-Y2~LFTBql;ICWpRN
zN9vF+=}{Bl6T$br8e51WE(&k&6<K61d`H-W@UQjHAR{E3cOfQs*+e|&oqRqf0aSo!
zZVucWUWP%CbKL-wJroz6hZ@)^35AUmtlL6lv(m&xWK}v|9LU@LkUTO|#WNIL6wPXj
z;a-Dd|9!-S!4Uz<0WEZU)Pe}At<XeZ*!D%S1UZmKRJlm(!n~Ja3kzZ`3A>JwHRW|b
zoH8S%%mWEjGbs7emVklpJ|Hh}!Q9}Y_>FSdsHq(o0;TGGCS__x4FA@m;WY-W1rEwt
zXvyI&qi)9<#MR8QpfJo6!O|&~n~Q~^`EmF!7EqB@+ODIpL;7FL(^67?w+;DJi!4ec
zu$(G_N*I?5$snjYpF$O52y7G4MUpBD)J7IWGqPHpch2~NAYcNX6s{Zz6x_GA?EUzu
zx5bRiBRD<Z5>xOsBHMipRUL^uNOV?Tu+Aq2=ZJx{op|mmmU)h7jYUnjutrK7HzI|c
zh@WaCaA0XN#BHY1itmz`h_nb8%RDkdwMp0llEhf_NjV!L=gttDR|$NGkP)Aw_Voiw
zSV(D6c|)8)NJ1HL@FFr<u2(#p*snp2&IJDsp9`0_KX7xETs=Tid^MtfV@O}HVV4kP
z55`BC3q_|o%1OH>!+Ad{l_zqSq0l!OM^r#@AmJ(nMMc5+0Wl-&9rn0?!Q4H~vZSbG
zKeT$mL=Jb9Jiha3q{G}WQQQDtp=^bK;Q~q=f|wf|(B}1E_=TA3O~D5QcthbGx#3Pk
zAiN?pC;gp}>=|wOe0(EH4Tzd%U|b_Zf}|$R>LDIxYbfe=`rZ-*$SD=KVyodAoH!!Y
z*ywv=#x=U#Q(-?i10;-udtlUva_8zkR8NW-ClN4&O2Hh|41=eSiK)SM81nvr1z8eO
zyrw|be-fPU)uV2SxM(~U{hgT3u0w?ri4XZl1?uMAmP=tU9C(deq{C%rVL|Y@IEkOC
zke11?24l>n6iMxE!$|Fcs6K)_-DlzWSlu%3bH;ob(14Rh{Z+dGoWH<Ko7YLmp}^Te
z8<ph?WVirr7VQ*l_`EQO$KgOz$PU=W_y`D5JyQ>!tcNHEd~al(Y$?nhRarLAzJc(C
zgqhH4C^?-MnujTSq7t8T_Xk3_s;Epkbxcz!*fsVMG0D_7vt-noq)H3CHjjsuoZkqG
zn?Z;*KPw~-2&E93VQB{9qh@E@uLXsuTT(263JGIAqAb))9~ZQ7-7}JV^1>Lo1e5iO
z&B7!RoC{+F_@J%`<@aR$xhxlvR^unCd?@$~3E!s{7P)dnViX9m#9!ZhOItPp#{q{6
zw2bF`V#EGXOAiZgkhg;Wy~yzc^^dH=DlP*dtrY45Cs7$FIQ!f=UC#go4_!!;QAo^h
z5u-8WkC#xfC7yFZjEQnb0Me}ACq@H{di7DHu5hd`eoc&qm~A^!-8m`xHo^c3rBVZ!
zI`3Y~K-7$yws+d(v=>(xCT3yOhO0vc!1(zQeZh+_$fJcHuM7Kn!```(m>4aB(51OY
zjL-&G7&!4WLDCR%d4%+VzjzkWEOILpkx(o4p)|3Ea*)nr00*qJExFQA3E&ao1T6I9
zkILM0nE;P&Dy=&NZNR_@kKxUEeZkq6#7en2=e2DUqXE{a8VOhOeKAzjydPi=jjG$U
zT|Y^#kn#=yDOiV?87BqnUgpIgQCLH^=(UIQcC;hqL^<&;&+OuqR#S-sU_t?WNia72
zifKq=>BFOeQS%;VT!i6pAEV@pf#lBJo5iGXl+huY5jGAld?`Y;Vq{yFVxvEMmZ~~b
z)g=66ZiOI-vJYU<+|VByiss23Z#<(dK8dNxp-_o6<}RiaH)4iU-~R-vng|1q!;85n
z%y!m&T1-s<<<Y;vCWRgr*kbZ}!#zW!&$|(sz;GgzbWqCnLbtr|ID+2tv+<I1q(Z3e
z6v4pp3@CW<9kFmZB?Qi(TobS~AzRc~1_CGC7Aj}}%@7ey(He`sEUytM`2gwXt~t+3
zjq<uqlL7!D%Bg0{Rs?H8Qu&)wc~OWhQAJ!Z!*3#;p<JfWj0Z-Df8Z~WlcG2SB|Md!
z6gehpTpWHu06lMF+-9CD&?<=GkyvwhJGgx(yd6$q4A0<t#PV&>tM^%;f~OKV55|5h
z7S4+Rc)Eegyh`G|LBVz&s~|x;4~qaRJoISkG6Vq;{G0Cz5kTBNm<hZ#q(A3*OYj81
zm3Ul;q=S?Y;*RJ1Lz&DI!cuy2hv)@x3bvrIOb-Q;CHnfD)v^>KEKNhm1fDD!z6-7M
zP`(dsguNGhV^ZjU*D+?W;Ktn4jGzSdoV$=t9MQrr#Pab>vl+^UywYWAg`V4n#i}{b
z6`cE?Sm5@$FowmmxxBA_Rxp;*2gGA2oyFogzb+S2ws=@|6r~jeLU3D5tT&$*t4KUo
zE&umoa6Fyqf-XUV^!ShBJ(L|-I34+epkE}pr7Vdnd0A~Zf4fnhUBuE23+5rlPktRt
zhO&{<?0rV8@d~fl!>kbQMhtl312)1jLDLF8IV)(BqkW*?8ab9=wx%>~W4K8+0hvf7
zRumG$rRKC%r4WXCI5^XUk_b4D>JyL$74?tFA{KQC7#$%n3Og(XBgBho9{GxFed;8L
z1D#y@o|}3hg&0)|KtUsLfJ-nq*?wMzKJk>n91O^HDnZ-La4Kz3euPheL0($}sReKT
zNCtC}9N7tiD46e6N>lpIJF+hm(zL;;0S|+bo_(?}lPQ^&pJjI=6>4}xxQ~R$p#Q8a
z@A~3bWDg~H0`$W}mfb_)?R6+t>rk@$ES|Q^*{4&cMn;A>M_Y{i=qK@0pK+#$3Hq~V
z)U3jUEms8@ax87sPv8-JEE*Lw@GxeKRb@L52zSPxuNBclSipX$<YjkJa55-3&m(r8
z_aUPvt)OQ;=I%iK(E?d3?E1+O!wn<|v-(#6fJnw+`_R%ye<HMW`;-p&za8sk-iM<1
zxv>v~Ch&X#UWPwrg7<l!=cklXl?5*jtBHa8rrH8DfyF9t1lQD*)al=12@8dDa1AI6
z#9TiY%E_4k=ZlzO4E~6Gq8CCzl>@ZR)%`(D6(tlDcWsoxdFE*~F>L^6U<fl_uq8p#
z9|cLsq!>@A=z;u>RnioGc{=N_Wvk(Yfd;F9N7O@r*t6_0kPO|u;jIZbO-a`2c*%7t
zRx1LA*|*gc;vrWz2!qFDSegtQd_NaZ!xe@XXRv<&PJ~%>biJB3UXQ{9yE}5tFt7a!
z0&JNUY0Jjk#V8&8hb^+Y24KId5qo=3I#{qceZN%CNIWspMikf!B9?E(rrqip5h~lm
z9t5rs5adNJW8^{#2Pkkt7R+)1ICSRXgLAjlOxVwq7J)<}`8|$YxX?ThsK5G<N{tb%
z4H*ioe^jY1vO)j_WrBH+e^I?;WSV$7Q-nuU<V>|?Gj1RTZD|Nj{Y20pjMoC@dr;oN
z!=^$L<kE{rEhEz@0~Vb{v4TASMX4V#KQs{G%3V|y+$E<eB+IE7N#hbCE99#1Qw&~U
zX!K|R++#&B_^i&k`R}XwhOi%Cdn;ZN3sJq13PGWg1pUFe_XY7B0n&;IW;}9jKwK2K
z>3mqlz_hwC_9<P7>9KX8pz2R5uCjy()^{!p-^#KfdUk+7CC~epDkg_YuCpgl;0%J3
zcRwZgY3?nghgK-31d)5<Bv8NRCt{h3c#oDAp*WUm0thlwBEZ~F%L9ssFH|5U-8k`r
zSX#mhivlm;J&HIdHu|2B1uwv+7I=*x3VIM|{mKoPg|1R`eF<`$7xyMA^Y_dO=Z13r
zQ5X?D-2=ES8s0$J1RjS!a8c&hNJBvREG2_c+GwQty_nHXVYwW}h?2XgA;<nv(2U2g
zEclm8;;5miQ97rjAz|rHj+(FqWhYc(8XM9W5;8Z@xIRTau0{0Y*p<Yde35tl_NpTG
zSR4+5xd}Y8LuIeaF0$jfsdFtd=%M6-M^O>Ctu*)C^0l;e2dUHbxD5q2Xfm=D*x;<&
zHVzSokg%|abYoKY2p}trYRvXXm>y><pKi~2bl6#hJv_<B)*PrsU|(R@&~_ApivZ~u
zkJFB0s|+$AhWx>(30wLe!=IP%y*|&lCOc=;)f=(8LaVYQX+(p4%Fk+S!-tO^{G8QM
zRa0ACS!A^ZgHIjUn7rn!ERv*0mN!LwpWSWz+31U!rbCA$x6kt+?H1gKBkQGCo;tSd
z@Wo|M*}n9Jp%aPor*fAalTOV_fy<G=AD>^$e?#-qkAL^%vM<?Ap4c*}*>a|S^T}M=
zuh@;_!P!Q6-)}Y%g>$3il5j4zHyX2X>cT-=tFtwtv$#kN_|zqKG|C<5;)qp}PLJf~
zj#_hbtlE2Sd0g@8V*YQi;&Ioh2afeFyV1LBcs6H2dSG!X|AX0__l^0#8+zcs&gXyL
zv+STm|4CBr+aJ4ibIaNK@8ta6{=f&;WtC53Ru?EvQn}`@)-S`J=lbj^)9c3T8fkI5
zU@pEnR{z*UeeJEA1rc}6Y<<mj&F0vN$0lwT+zr*-uK#2#t9Ej9^Z1F)7wc;#HMM6?
z)ZD&VdwsM9SFVp1OimZX>OXn<X2F@P%_mQ6e)(p>t<i${(Sj-0d`_Ja9GaY7gBj%w
zp67G33s?PWrL?gemqL=14+{6@L<*Zc%^v+v7gkPAw~}1T2XKAEo#{u0PP~wN;85Su
zVE>VF{(T@Ygf{wws;u=*&sppI?R@Zg`J37bp=&?KzR(l7D++sTnq`mRS`H5SrhV-6
z4b5ZUBzyWzl3j&EC;z6ss&pM&OW8qOq~9xXtiwTl=^zE%`@Z$Od&~BlG>0Cm+vpA)
zE+5)=VB?Se=^0#-{z?9bbNZ%FASy}k;(PB3YkmFo2l_T-=UO+^jYt}8wsl|M+j-B<
zM3x(^bfF>J`rt^L<ml5j=hnS5b9&wHB83mK7VYWWx<_GIeKXk+<H(A&j;1ExcOI42
zIxtAC(K<CM^=Tu^>rSC%)rRH>`mS~K%|xEFy0kdSHXYC=Mp`FEiWENL3S}?HWH3^6
z+xu6|ngl8N2~uc$5Vs2B<3z&DvqSRC5o_VdX?f-|&#mmN($}48V++e2vJ$m4>&Thh
z<wO(Rymr-wNQFcD<!&}n9cI@wG>bLQJYPukXmW@ioUGa~V}0c2%<0vYBWKvJ2y0(8
z6IuDW{nA=CXyJ)Aqhs%fE39lOjy|l(eepF-ALg^VPMwkVo}3wX2j5%yKnJsrDJj=A
z^5fQ6XhaviY_2I;y!3{7<cis<8=XD-UG0-Oo-3CnsToRA40#creXWh_0ZyUbUBz83
zI?^FhX=;}0aL8&;KmV_MU|Ijqf9fBou`)V)rR;gl{>Ps?)c-eTUzS$-8?z)eJhXc1
z;ng`C8hV{EjWiW((^|hd*fTUcech}b$q#>_1QvB(|0pdow8Ci1);|Uovv&cEwFfoQ
zZX978wnG{zWTN&)GG8)nf)Hs8!dkr-1LONFzHi~%yiX(j6yMeZ8tK>geg*gM^{up3
zkzv<ypr=z|+l2v1`b)hewPxk6Tva%_qA)%hY16n*HZ*&l%_{!dh_TRpat11-#XVBp
zKS@>yeG3PxvcZG<f=B!Mk9-PEU&Zmr;DMn-hlj1}8OW{2sFD<3A<fghTlO9u3=ZQK
zbpCFaPI|;zVO^iRxZYZ+f26N}KX&X}{eE(ee!sOsacLjDL6h;?fxwf84x>dm0PlTZ
z@E~icNgHP<FuZRd*#A_25I5S>nxE=Fd}Q#@LA0%}EUziITkq|?*DD6u+P<USzpbUc
z!;29dsSO^NufDmhac5&2T9r#=xB0DA${V`7{VsQle`lk&v!$cmT4A><$8-5Qwzbsz
zy^U?I?#2eyLgc3Uj`q$S+Zt8#$W5;L?K@h$jVc0@12(!k8&xb#-bM4NsFa&J8@K!G
zJAKw_#R5ED*Vb(=+|kh3)Y9JQ$9q)mt`vIY#Syow${pnu*7`#SAfUm$!G3E!iQW1y
z5kiN%wce4!;dV^#_wH!-wQOrtHMO`qjt&R=hoO6c;o-xBdyfwH`~5cC;NC;*dQnkg
zO`>c4p<u9o-!Rm~1|p9fX~)YtMN8ekFH>jLpe{C)^{GRHeMM<R1X!bzhL(dzhA?}8
zc%&fB-_Y3EvbEjiZfi_l#_ML#0-vkh=dW+-^m$v_w|1&lPptB{H+K0w-VRTr*Voe6
zsZyA&&_b`z*P^=E;caO2`rBH3zBY)py`ja`PJ=osmHNv&xm&g-dsJ0AluFNgcp5y(
z9+g!UN+oyi@-_N-H$V8Baoz+A{OJ@W+uA(ND%Be3f{GJhRkO?McenU(SqU4R?D{rW
zH(TST<e2`>X0OlB8!8=}oXVyaaGbX|DylYB)l@pGl+A4&?OWycT8E>ua+AYe>8z=)
z+O(;<#=fadF$e2;kM7F;T?%S5P7TV4FizY(Chu(NX_R|b)KpbDH&s>HH&t%3*H%|o
zI4i1mC@0>b8u*^wJw9(kOH<SR(Sf1S|Ni<C9m&r3q=S~&(d2LF*a5>yl{{~2YU^;3
zQYf`sz72C_ZeS8i(IY+@Ye`I3;Z(?49yBTPj01Q&Fe~gYV`FD?2Xs>HdgNgR$J6wi
z(?z}fOSEyNT4JeP#Dpc<&{W~50X?d9;wq`1Z<mKS9y|n?A3Qj$V1(SEX`8PP9o7f?
zlrA>eILI0YGc?xwJ9jiSwUEhX(oyBiM-LtueBxk#UkV9Aok0U$vIPx?l+!3$kFwUI
zOSB$it;d#VCHmF}yEC;aR-iI7dG%c_4ZddF?NlmdLc&WjR`kfsSw(9G){54QQ-dtD
zf3!%cL*|L)yV00oUb!*jv|MQw%Uwd6tY2nPilz*56ipd~D4H@zkTqqbRyHF}u3F=H
zlenu==>)lbWxNTj@n^gTk$_(g&F<Cy5nhU1Nrh7gHNn~=1Bmn$)=J$f9ckUv3ltV3
z$WbY-!0h|9OIeYYuuHi$gFMAFP=pL3RkUW1s%Xtfe^Z+e;TMAbZ4~>%EGx>Z>=lmM
znkr|1SygqDf?3qH+0`k!tJ45B4nzmLPPwI_vA$)StIglQT5D5U8$F#Z@EUC5idu)g
z)=_KkFWZ!9-j;R*<1M~j)Hh`mA`5vzqtD;Y1G<#<Z4^dw0vt-%A*xA>D(r~yJs7xB
z89s?t-UM%uHY#zu5)5<HAcEY~Us73<N;ok!F}kg+a#J!M_WRic=)fnil~p*ZGc}V?
zDmGPD)mBwj_m`>XXmEqGO8Hfl*H+n`)y}Hw8bqX3&gv@MqFOZFvZNf@cuZ0i%8ZCl
zqZDL?GAN|xq$C~X4u>5vm%XaiQEjiRsI2HOQ+ZKA!4eV`%}Ys?+7%M5R_4&WO({{k
zQf#7G7O6drF&tAW$~V<+a@K<Msv4)GvZ9(~n?6vAZ0#!HvZ<89#f~kdbVWNN9`LIg
z(P>R}4Za-q3KE%Wwl?al5_4raBwSI8vV28NjRSEyHiT7I!enc!E0l2qyK_lt^KrSj
zm28sdQ_5SF|FHI?z$?n_HB}C*ZxfyrR98dxl>iY4y*48$ONc!|>QZuN9Tb9B0kPbu
zkbG6G${8g($g+1<)+nJM(afx^rgl?h1$bFmy$Ryq<gB(k`b%iQv<0OYIQpoh%!gH~
z0p`uzgP^3+>8OCkR8>|~01GRt?M?^baLU9Yj?Rea$XmBIcKZCTHssXX8d;dHvOc)p
z8sYNf0CL|z;IIPRSPv2J3=E{kur(A5v2bT#AT@-o@hieV^^i$_oa%-#9~u)Arq0LP
z_=KtRp>4?tlb&7e&@mmF#B`~%@g_D~>SSnIax&!P$`~MXYJg5++F)90z#DlO8BCoT
zjf$z;lxj%+Jt5FUsFNa6-U*Yl&|_dAEhk_?Ws%3gKw3(`8d=~mFp!oJutpYl3=E_t
z1WV}yr$>(9s+F#Wtwx4nhtkQ=ElC|pEHqV_<Zg+jrmd8#6JKiDN_it+YT8P9BVTIT
zO3|p0kV5rkg7}i0)xZ=*BVQA5ORdDvjY%C&ELg3~awoCewAFHTW~)tGEN^3rO<OB(
zV{1)YD%uoF-Jw{HObuU8m7}(**15@1TdnNE7sY#IEc4J9nXKs}7{E=3N7d=+E^p;g
zbuf(yyp=H_7))aWZ)I!<rl-4TRWc&YEYZ#vnz@*xv|_$!-aAumE2Xc$4Yls1*)NIX
zqMuBL%Ahc%XcnlH4)%&3nINBpCA!G?mylBi2ohao@v979Ih0W63_vA@K$oT9O3{VM
zH3MiVx-j9UKv!Z_uxfc(sW_wT!k3e3zsep72B*3kP6$Vq3@$Folq4ky2B(sw>da|M
z#T!+Z1eeoLLrK*VNU7|?xtwM^$}XJCDkw?t2Y4bgS&ePV4zk8%DoHE52|j0LGY7SF
z-=5@3l9U9O)ikNPa-uThoT{VX^b(BbbY%u<&P3jW@p%b0^B#=NnYj!wQbs81WhHo+
zfUHa)q(FM*4T9-uAq}S;L!t){Y*cYgl0_-B-IMnTDl-GCf=<;OLJa9XLP;t26lsn^
z*@X!r-CHQTFiEIg2Jx)v{$QJT<N8&RhT6PUuss84Cps*N*(y2`{LTc~N>Uu@h@F^f
zsmM}ZGN@e=uM=Z1zGi~!L=VQ+3}{Uqk`S4SjDqv%!FieqtK}gQY)!+EWQV1YT1`cQ
zt*PWNrIh<}dNSg4az4)6bo5B}SPG?+JveXE(IeS|^EM4Vs6!GL({V#?=L^Xg(h2r+
zjABnlGBTI9FM-c$G7`L1)05yVhT;T$1Ukzj3RK9<UQW{z*qj`JaW^w_`3Q`;8F`xm
ze(D7Y)+G?{K5&_+LVuyi>9CyauoNoGJ(LdPUK~$Qcz>9jq=ConnYquGu@oXF$6)Nw
z0LaN6jQ?sp-qA^$0bFiuYVmln)q}eD6~2Sjo9Qt!9$0m>w^8L;x$TB+ed+Wy)caf7
z+i0JNx-h-9zFs`Z@UenSdxO6dC2F5{N8?Uco2rPd=<Dw2X!Eyq)PKg`<ZV=MbW!wT
z&&uzs7lGpajR!rwkDlU*>OCjt4|MO@jTHQzo#Y40_h3rf{dE)Nv&^%$cVreP73)cE
z{%wuE=8lFGU7%L4tAVMI@)4N&=?6A?@Jg<tvZ~OxgNF_tJQ@sI{q6ij*@WwUSRnS#
z4DKJqGhcsu%9WJPY?BS^L}!Y|z=1v(m0DHVHI+ght8w~M_MPxcDR+7sx3+ZBGyQ}C
zDW(D|neA}%P;bxa>}*Wk=)^9u8`RX{ddjSHB}2z;+sJ~I4~JYH5B5gD5b<2fZdV#F
zZ&CKX*M_1$?+<(~b*mKnC#|2@XQQ?v#nvc4{j=1xJ3zq=NH4IzA3{9bKODf8!9HbU
z!!GPF!>%|bL4NEm+S%gm;8omhu6ln*r@vclt7FgWyp3!-9BE^Py#gEU6!YN*dd{w7
z2Lr1Nmzp~uLpF%we%3j;%?-~bP?T1dAy|hUz9v5)Cc~iA0!UG}FebB0Y%AryLkA87
z4)$5;A3grmdZIOThoyryWLAy0uIQyL!FAYkI&58&o3kTZ`X9x{)?tnGRUFo*G}2gH
zEB$==snMMIvBgooN&c}3*C!`WJY4g%J^wee`5zkd|Di6wcPbxS^gozi*0T9}R_*Pq
z&1Y|JnRIQIZd$YJq=|2@%9dU)e*fFp(P}(x%$2M<$@l`^Hdy)4jNaIA85{RYBK4B=
z!^g6<(s;wexsq`m?JpO5ie1>3D$YYsuf*P0`5z%ipI1!ud(^!a|6n_oAxRxL{w<x*
zJ)xQ*zeRcgZx^u7Ur*)jq!YgJ-YCYz3k9sX4u6x^_&#~o@c-GLy`@ImL?I+F#kO}#
zTIpnsMp}VA-}ENR!!kGMZGeU+`Ss4mw#2`fPg{~TQsaBNvnwRC<oC19zJmw-2afok
z3iyMA_?Pc#(=mPM#RHA>PTRlJt-vPaWLMFTz43xR^kRZWI`hWQnpS`ezyBzLIod`v
zEUiWhedtvOjns+1D?o0iR6rN;p{>mt>BaW{nY{w9MX=48Y&2@4{ngmce(l|NR)7+}
zKQSnFYqJm6!4IKXE3naeu#f#u1*nBKF>9qaU)+o_bkg8cfy3-QiNinje_{oO_Z&Vn
zd<fe-+4`tI?bOyt*FLVn3o6+X-k4xlsD=JF1H=uD^y#YR)?Bu_oi4x2OWQJ~2dE2I
z&N`e>=hP1^!ifVWlJWCw#;g3|>x#CUIRAUa?|;=(rBTTPuT@-YQJjC&yicHCXdUf+
zKT1l5w_c<b*YM{NT&I^2_N~xbtDv&<@3DvS@Xic-e}Z0u2;0Wo>pX?4E*cHKGr6mL
zr<WHuEHm#tq5rMx`xntXIkvaYdOph-8p)NWn3lhaF~s{hzr=a@M*iz&Zs_y5b$I6n
z2Yuc?=WZg4xS2OHYLwVZQ|vnZVLQ*Q@&@wz-ncAnIWpWwd*e4ULdu!GbaE|&iYnf@
zku)ncAJ2bKmz8f^U6B95GluWDVF%iWG+Fs0Pv?i822ZjqQp<x``JXgwhOK2kC}rgv
z4E%@gL3HCke(?+%bMnyG@*p+V;NS3uN%7uW@PE?C-GH`F84S3(JXfMFYharCl~Qu{
zg=f}c_L!UGa2yBeC2nMrHA<V<FG>0|zO`A#4`TFN>%yN#9CS4+h(B9#Xg-zw8GK2a
zPwS{XTjMh3Y{SVBy5=U+(^5Ba-!l4<E#-RgrORHfeaw#&=}~r(&T`qC2Szq@HGb25
z8+N=!vq|I&nmb+1L?j~=-=tLZ;n#B5`%M4ZeTX-7aCGG_acBTfS%8IX88irDNXhAS
zsyo-Tl6~Z~Ver3w$?%Q2FB$0lz0ctwK0k+p_(k**A7#iScF-l_+6El>xNy)VVwl{E
z@f$y@a9D8QQ-MP(mEf;N^S|=*@K-}O;m}Bl_tS78p`~;V6~Rq3X!=cb5l22PNPPVA
M{{_X*|2HWA2i3Y!YXATM

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..20d4b7fd84ed29a70148b93034b2c440accf94ce
GIT binary patch
literal 2084
zcmb7E%}*0i5TCdGptev8c))1P9@La5Z39F;G!X)7HAW4fG#Y*B7f@_>Yrg;w>PaCn
zMsJ?|6HL5%@va^{dDEB}jVEJ_iZi?O+Om{bCz&^I=FR+e-pssx^YhD}4glc-T#((!
zZ)wqyk@g}dNZ)}zYWBf5_E9=w0&y3|Nm<FRR#-tH<=m;nTtinoFK-BM%V*tiPx+9P
zGLubJ7iGCpvdmIW&ri;ruDd9lHMV@;0%jRGbK61T<;}EZaN^x%93@PT+SO%Cyksg>
z%GM~~t@$auiyYoAUd~uBDa48QnsJs`zU2JgVH5c3E%PfSjm2zvJKixGh9)|!3fiza
z@RG%1J}Zr6f#9~~r|=RS-!7h+TDBVEyze8<?;zvd`JeoX$+Q#iIpdvRyk~9VRo03v
z@B5YU&M@Blf8wRh0_IhbRbwS*WTZSl&7qbU<!b9}3}avE&Hg~ipK|mgyCfG%*bO_t
z`Q8%jJ;8W!#$y!%0BV#hQ~iD93=7d&zo(3MfdSSL&mv#z{HSjsatQb>Hwy1V_OX8-
z%1$~)zJ%ONbYH%6v$?JP<@m;j>#x#z9DBW%EtPYop$|nyBQZT(DOHV|`rz>J;8?6*
z59jl>LbPEUiS>h-DGekI(-^R(9J`f`q>Dv}RtiN}FintGhfvfY4PkOyE11=?Uia;X
zDG=S_P_WnI3WmJ<g59DAuc1ZI*dr$V4<>@H;CSG8;3Qt$H4K{HR?qh06CM+E0^rfW
z6$}Jy1u}SNyYN1%AZ{4pDyTk?sz#4n)G&xE^LRXqETY1tJz15sjorW%+~SC5g;a4A
zAU-=8rw<SUF?EKBYy$B&kx1VtuLnMdlgp#Hk0V>qC?m*3^f`)Ay27=RM|&&WW*_o%
z`k)CqZ~c0RX+jd2e6ArAIb-+OX6xlp-Y)Rq-;rhv;L{5YhEZCVMoCquR}A+yaV=HC
z$_xrw+@1lk8WN$-4&P(+qB-A>y#AYNw7IEz0h7VdeQl~s>F3Z#g8^KH*mQT{D(D-?
yVyCggV}M=t6kQSF5q@3MFcz#iK;ezrAl}0)53oc*;NEeTxQE@eD3WTJ0saCMl-;fX

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o b/test/tools/dsymutil/Inputs/odr-anon-namespace/2.o
new file mode 100644
index 0000000000000000000000000000000000000000..df8e567bc3a58bcf26b9d45eade8b54b3b72c317
GIT binary patch
literal 2084
zcmb7E&1(};5TCdCXiVCeekc`1_MjyNo0c}%4+UH5M-)+Ms||`i(j;wDliirFwjR`z
zwjiQ6&;AL5S1;bxqbF~QAc`juQRB?+yd>SG7zgIfn|U+8oi{UY-~9aYrwu^302gF8
z@>^PTWTbt_ang67kD7h(jeV4km_XdYaZ;AjYb91tNI7>ZK3CV(&dVFZ+wxgA+*3Xz
zrA(z4%1g3XDp+PIPtH%xoUXYjoOQN*-vVYCIdj`V;bqOFWpLu%WgI0;kJ{B_Q@q5&
zLLptDe7EMO@Gf$At9Th>(WDS3-fPBLX8DrydxuTnt2fQBkT90g#qD@UX&4%)PZhLo
zbKoWN`D|Jmi5$Ug%TM9OIlfgqb7945i1WUWIKM-Tcjtfd%O{dfyyuK}objHuh*w(A
zH@)vy#yi7!@BfLHG;^3&NtTV(jFFP^{7DYA#28mwXJZ)qN^kZDO8%6iAL(VeSio-B
z0nYapXYWbIi!mOn5CBl4WSMI3BV$;I&iXxNybBDlfp`}An&(G-3z0*>Z@E!;FS3vQ
zdr`L2G4dtkMxy)jog2+9?JvhRKU{y6%;MPVm2{z)F%5krG8P%q!=*ymxTz0Eqr>Av
z{dzc?t>gylw$Y(}FjIwrgkc&3)|6wn(vf674}+y#9u`d#<kb-r6-Yvu+}3kuxv1BC
z`(YYHr#Kw!_PBx}@BUz?=)!Ag5j1v*N&kb%per~LI2Jg87k5QL^V{m#ZhXSyf=&QD
z8n}XifUQ6Z?`#*|XBET^qg(~m2U6AOaf|8(ab+HlXOYDo;nJS0N!sqcfh)L0r)QN^
z(FG8jor=*12!WV7Lqs-#_*Ep*H;WsA&*8+%817@p7BtETG7)`_VwA3Mt>n?(47bsT
z{G2{$g7#a#9v(rNKqjAS$VATA9@}ia9Ln1T9{fAfi~)Rlp+OX-b!n7Tg<8dMZxh#2
zC9KS#kj3p85Njb3YH#yBMlYK41IQb{sYcbR>IF;%L-)1m4yB(%9}Na@6{70yz*W#U
zki|}8hsOZBYAL#Ugh%*wO~Y8Q<^Y9v&<64Lu{_8U1%Z1fSmGXb)1pYKVFvgMHRatb

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/1.o b/test/tools/dsymutil/Inputs/odr-member-functions/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..f957a9b9d4231eb0601998a84ceb8a6e6e769706
GIT binary patch
literal 2236
zcmb7FOKTHR6h3z*=_AvoY2!jsWELuQVH&ky)nZ$$QlW^L))qmAB%Rb~G6|DuS_?wK
zN9{svSGx2Uh$!xb{s8?6Zd{4bm57M(JCl3UWZGgqaGv*^Z_YjU-sID-?|<5fgia9P
z5co3-6AU^8o@IXzTH!*_mdsH;Fp0UF7eSWg*-KvCuKQD0W~a7&Wwl9z@mf9`q7{|H
z+Op>C%)-1?@mxP_S>rQfH?KBQ6wZc>{AdC5gIXgQhQce9vVJ&-_p&(|_G&l~Alel#
zoyoX%mGN8qQ+Ue~X1{oOCs*Q<AYLyY9_U?Nk&pu@cQod>YkzLqnYSxD@irx%groOr
zpenFqzv*(hU|UYQ$Z$LQQ+Ttoe!qC7%su}=g8MxzcXLeQ8GPAVDX`zB{>tfW5N{-a
zH?U7U??HLj{bnUzPWF`gCtkKxL|>k@;4J2yIm?<HZ{E&liD$~-N8s_(TgUmzqYCWT
zzH8-NoJPICvRdEUmdqZNcwZ!*bjcvPt8CVd_mOve#Nhmr@^|2dEHcp-)2luD<8zB~
zf)QcqKc0iouVwyQ=q~u^gA^uApLoB4>nT_Mt)?f|zTVl)7I5v&s_j<tCC5CQ7)m6~
ze$QQS?we-@2hW^Oo;3Rlg=%qNJ9aL4l1g*#siIxX*zT#^f|K=7TuEfhWg75`WlE8C
z`}*nB*<7h~rAm-ZexCA<C;d#?#X2N>POeggM6Wm=jfeGUtnF~LSM*_fT3qN+!?<8v
z#0>O=SRIY(QTB$rMlhLBn6AYKVT9QrYU}Y;`!Uj^mka|n9SNZJiP1=-lIuDLNsH8h
zxQyO_>vy5UpJ|BB$tgCEUA=y78jC`7>NC*%FhxX&&=MP@&#RcBV&R7fk-iA=2rRnP
z0{TrL(9S;<;gGy52*PC&w^kv2{65jCZ7jXuV4O>cCQ@S)+$wj?%-4j8V*(I05c_MD
zXU4bw^t~b26JQ?>`Z1V^KfyK1SGfKd5Z?{=e{xiR2Y|(-gSkJQ!uV_Bi%ov7KR;-c
zTzO`U!cT$y5kvEC`Gi0`CT@$z$uDOo$;qkS8wu*;V)-VCrI?6ywRfx|i&2h%w|?`u
zYBe=^8jHrBYL~l}KZzWT1|Wr~g}NaHe*-M`ehx^$MXTQ`SbDDri-@UdsD2I~#cfZd
YJ|Xo{siFM!VX66JJuDPi>d_GW1sA*cNdN!<

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/2.o b/test/tools/dsymutil/Inputs/odr-member-functions/2.o
new file mode 100644
index 0000000000000000000000000000000000000000..c696866fcad6673668009c72209d0f1e96f117cd
GIT binary patch
literal 2660
zcmb7FO=w(I6h3!e-XxhfGo3byifx@mS`ug`lQi_lmNub{PD>F<S{ns<otaFMsq@Fo
zB#lC#kdh`;#V#z?g$q~WN`!7)=t7|&g1YOX3tb4oA9PVLC^UZGd-o=jX@cp2bI<Sn
z?z!i_d*Ao}{PD0=L|K9X+yefALmUix20TW43%ajke`#rKU<x^+=ece@H(Or^aozL7
zW9Qdxqn*(T@d)`{yGVe(7P(4c$aTkandwQlR<C+4*FAl4XygSG7K8JF_8hg>LPqgm
zjbZQ#<*esw#{1bMYWd?f@zR-0H8&%&72ci4#LJf^%AC@ScZ-Jy8qjhf=-c<vzcydp
zbidVfX);%P6z|&&*&<)?FgMbM4A)IpDutX|N*4+DNPPy+oX2+Y%9+dFfi&+o!w(kJ
z=?fXro75)7*7a4=*=D?jCcK$#;?>`*Y`Wi1HQt0S=)?cT%a)6%tL{#huH;MOu6you
z!(JrD%K(adBL<u+m)ITh?=94({pP0Ji7HORpU`~Yk{WN&4@+F*X;U1LDWg-a-H&|9
zLu~F}MdKCpq)Wgfb#75#Ct@)sI7k4o_@}`KU{XH^NgH*@%iu=hCvLdWza9NTYU!8M
z{O`%sJHtPvmWG#8H|Ca?IZ-=S?hZd7HvB8|a}Ol7G<R>uHEsr;dw%BpH_6w(&K7WU
z;xoBwEnhCh4<?QzlJTy3b-MIMynkSz|7fy1-c=~f6nocwhm+k>9<TNkbHz-q+A}d-
z%GNQtmdI8r(pxW9WK`VOUg{e?I8iPS&j`i_J1_ZCUE7&-6>*GGpUT%H4>#w`NKMpE
zwJ+LXThUnX+2~HS3!4daC@bnX&pXG^O)H_U4@Rvht>fX7VZ5xioaq>VVp9<b?(jrw
zr&!Swj)OUE3H4$}+!;r(-G<js4^bryCpif$yh{y+*1!oJ9*{t2Jt?wB^>r|9%y|{C
zQ5(@dOqL|8gv3&8J|6qS6rjelKjXV3Z~=-^?bb!eEcK95Ewfa_nlwJg`0P2j#JYlE
z9<0J9fUPFrXsuzeTiOmW_%4R_>lkj+@)O$5QvYN_B=k0Yjt9+e)FV@%dWA4vW&2b(
zaP!hFm});4i(JF-1emnP7oxhNftypDBax3V9MJw#TB>fuXITJb{d3=&8XZcpS-f~;
zwA&;4P`Ipu=wM$h@KSBjdH=@khaZD~>q=i{XQ#mI+aj3dGVe?9ui%h(5+ZCURb#X1
zw3$O<*|a&2Wa`q>H?8Y&a#%mvG>tWKf0o$Tx8Hm2b>&V#l2;d<LBvPF-bVt-uJaoM
z@t|22PmK>y1L>nPMXyE7s_Lx~QL`}>3%9nthb&It4gTz3uD!Zyj$VeMv4y~?i2nV0
z9S%q@GL>3wiC|P*1FW`w3y^@X(7zDStWITHA<H^p_ga~RZCTIt+efU-+2CIL*dbd5
zVF!XhK8aia_dKcPQ{F1L&szTN`-6XKc~{G?wA`=dUM(Sr7d);dbRm`@JkgA=lWU$K
LD165JXG;D9HN8Eq

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-member-functions/3.o b/test/tools/dsymutil/Inputs/odr-member-functions/3.o
new file mode 100644
index 0000000000000000000000000000000000000000..962bf6e2b3828f318f47502b89a8c04ed0a00bbb
GIT binary patch
literal 2832
zcmb7GO=w(I6h8O8yh$=|=4H}UsOaK}k)}$1Yy+(|)C3cqVi9SQHVEZ)W->{poj+z?
zrZG?m1Z=X9G$10l(;|XD3#nTvw1{+*F1m0lSP-qdE`oybJNIXj%p^1(IQRVC@1A?^
zyLV>e@4x<SB@#Aa0JngBhX*j|S@2iv+=pJ_T+pzV#s(%admID>L6}_#m9FfGv8jn|
z+i1^gh4FaVd{os1W69*ORCt+EDjyUXHVAg(`J4obD?yhvo;JyC!kf;f=VyazSdl3~
zFnV$1{97iSdbM_2%Yxo-J%PyDp`#v!moH_cyAki#oypK&h%`9%!Aqsnm28doySqMv
z_nn40C|<5OQ{pL&cx(JIfS%EEL)RDLJoM+V=YA`x;%v6sjCV_C4=7mrjXw_1$NS=?
z%H@1ED5eSw*Hqs_jdxR`5FKvQ%C=~zPvMo)SLA^-?su9m0H|9Dx^+N2SzkGoX~bJ;
z!29`tc;N@-J@@;K#+%XgJ$@=)rc^*(VK863mMcyN!MRaARf;3}5QnvMs-1|JxQ+AB
zjce#%b}pEy;55{P#``v<@#Zw%ry5V2<Pw>Xbk*-it|$>3`*%m<T{Qrxi|O7z^>rhb
z-~?ACa`8vN4|V<%NPb&T2fYX0Nu0ZDvpa)N(I=CukCMxe6UmRRZ~pi)l7Ht3$>n>A
zOLtk5@zv{_tMgAT-Oc22JN#O<Qq7f${)xU*eFJ_^SeY+g^^Xq@9)E4%nBSAn*9!gH
zzLNvTs5D*aEo2MnY^8T*zL*Iyx!RX0m#IH2lxduT_uqYW{KQPDG*%;wJM0|g@H(n^
zI#od&BWzBt(L_D6cs4gSR+|VAUL3-dGgPH;E{B0cr+CTha%?XVdCuz;lXwTLE@6AF
zd)j>+-L(6}jbYFB*cytx5ySJ~(Alm*C=M$`k$wmpKLo8E(r)V}+dJdBnB3k6_(Nhi
zT8ChO=pb29g~;<=lQiA+Hq4kKqiTt`GI()LrA|Co>L}af!_`73`4m7+hdv3X5bgFw
z$PDXIAzD6Rh3r}5bB)jW3@);-VOR!>m<f=*4*PTVc|t@uZ4IoD)GTV{52LM$!D+$0
zP-K4T#DyDh$O_&OQEPqSYnb93Pa)Um5oCUT#Dza#F;Wbf-$Hg^pCWFphu^~x9<N_7
zLM;rBBD12eZ3C*!x=4<XB)NNh1(|UqlGrCGLR%nqu;ga1R#)6dJzuUJ4SqWHXHV)1
zxB4=;3|<1uy+Lx5n7Cn#wZR5RHV@L++z*bMd{YnmnD(mY|KxDZ%!e5aWP^EMHaXZY
zzYuC~n?G1Iu(aY1!yX6AF9wp2hCenCp9Gi1ZRaPbiFD6R>Gim;wYZW^aZ{UEJWpaF
zAri6HwwuV}MHqACAMXCvmN`=z@e&_eZ^q5v2+kV?FX=qJ4mIZSmu^u%Mj^IZ;ur<j
z0v7wfJ4nFY<rYe^?~`!s=!7F)v?4%2$Ty+5aE?Y#TNb+y!3p5rUm!6#@<2=Z!NdMS
z#UuP~b3CCX|I}fg+!{+?`yr&CZ!qT%X&X86T0lsOV(hT)kgrVsGofEVDExHd)iV#^
Ee>*TuSpWb4

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-uniquing/1.o b/test/tools/dsymutil/Inputs/odr-uniquing/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..c520930300555c8166c93b145416c707e7ae9a6c
GIT binary patch
literal 2544
zcmb7FU1(fY5T0}QH@nHY%}P?Kl*P7o^N>wwmo}+rv!uqvwy892U5awE+1w->ckkWo
zPfXMYL9G@Pst-j3!G|K)f(S(rU)2YTpbz@!n}Q-jLD5ow;y34>vp+Wp#DO_;X3ora
z&Y78W@7H&K{WnA;e1rk)2Y<su5)8TzoMU|(x~bM~E2(v05_6A=GtIiSR&Rlr=DA06
z=UTd+1=WP{I=1!Gi`s{!Y0g`7jYYFouS&CNo_I2QcB1K`ao$muKRUpq(P%njqVY;j
zL7KgI@3lK)ekI^B5N(N<pPQ>%tBl`yK8^RTg4rov(Oz&k#EbVZ7Y`(q98?Lo*4518
zmglSH?M17$8Sjw}Z=y68vy(bJOfz4plq}QEml<x;`7~ZmVeAyonOl|x@!t2U+I_#m
zyYzp~SIHN=c%LdfeZK2E#H+7Yw!CjIKPb>`SCA1_la}!0ui%t%uDaQ<pDo(+rg`c_
z%atgs`ZuYJcYw!Re2)66l6&y1C3B&QYPbR3_m)z4V+wCKaMY2dB}%lUmZWIDkD@IR
zz4=>Gcqdg*4tPvoZF9c;Sc|hmG{U2NJi8&kRsKIfc0<QG=m~H;@qN{u+s!+gpP#(^
z<I|rNN@)A!s#UEO9XokAJ(3<yrs~y({Y>&uCUa<Xcp#Z7l~&6`E!)xI0dnT6gAKd5
z(kR-C=|ZJKL-lfnW@y@~)vbA&rg1t?3ywqP#lsM*RHJOEn6FJZ)fuwME>ONesl4sj
z>t&}=OUlQBqFV!$g>jr3BzB1du|&WZiwF0_c8Olx(?|$kEE+u)eF(|$C8(wNN3r7C
z14f%Y*xwcr>cPrXg55@t>u?%d^op^_t79=Y_&D<1l_4YIs#6J~NFPllG<0-SxO>n)
zNU_^xVVQqF9aTx6Y)iiW^ctsO6r#^}M#_HjU*O3D0e)jb?4#gCIT;MGfxAkijeVq+
z(;*Ijo~=W$`dqLfYQ7`Ajkquir>EHQanWVGxyF4fPX3G$T7~~P;?xmWTf&!^#OVoF
zn_XvZ`cGHOz0KMgJ}gDj7?xQ-78PTGw|FM9M3XbwNxnHJftfp$iEV;{v;kuMa_zO~
zFRA?U2<Ds`c{<3K!A!GYj?uctWgF90xY}pCy*ZBKyI1~x{kM+|(tE$4?7y=ENEQL|
zxYSyA;(RjStL*$W;5+#!fIl(6Lz4I7{rR4}&OgD-%6HwVJx9K{gFSx&-vxj0{*4bS
zSbGV475owS7WjLxOeQ3!ov}c&K_IzG=sqScH?MLbVB$VdE8Hx+!bODSzELo;ouD#Y
zKNuuy2+7^h#|AI)jX^r`u7*)(K|FVs#9Ca$yF=kC@Z#^*{oqgj;v8&jsCc4mEPlxt
z@6q~W@G<BhMj<x*Js9Pm83<zYX9Ql{tWsd`eMur9BEH~W!$2Ut0UH8?uPFJTl6#ee
Tkn0&G`7-zLpvlzVBclHR2Ywrc

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/odr-uniquing/2.o b/test/tools/dsymutil/Inputs/odr-uniquing/2.o
new file mode 100644
index 0000000000000000000000000000000000000000..c520930300555c8166c93b145416c707e7ae9a6c
GIT binary patch
literal 2544
zcmb7FU1(fY5T0}QH@nHY%}P?Kl*P7o^N>wwmo}+rv!uqvwy892U5awE+1w->ckkWo
zPfXMYL9G@Pst-j3!G|K)f(S(rU)2YTpbz@!n}Q-jLD5ow;y34>vp+Wp#DO_;X3ora
z&Y78W@7H&K{WnA;e1rk)2Y<su5)8TzoMU|(x~bM~E2(v05_6A=GtIiSR&Rlr=DA06
z=UTd+1=WP{I=1!Gi`s{!Y0g`7jYYFouS&CNo_I2QcB1K`ao$muKRUpq(P%njqVY;j
zL7KgI@3lK)ekI^B5N(N<pPQ>%tBl`yK8^RTg4rov(Oz&k#EbVZ7Y`(q98?Lo*4518
zmglSH?M17$8Sjw}Z=y68vy(bJOfz4plq}QEml<x;`7~ZmVeAyonOl|x@!t2U+I_#m
zyYzp~SIHN=c%LdfeZK2E#H+7Yw!CjIKPb>`SCA1_la}!0ui%t%uDaQ<pDo(+rg`c_
z%atgs`ZuYJcYw!Re2)66l6&y1C3B&QYPbR3_m)z4V+wCKaMY2dB}%lUmZWIDkD@IR
zz4=>Gcqdg*4tPvoZF9c;Sc|hmG{U2NJi8&kRsKIfc0<QG=m~H;@qN{u+s!+gpP#(^
z<I|rNN@)A!s#UEO9XokAJ(3<yrs~y({Y>&uCUa<Xcp#Z7l~&6`E!)xI0dnT6gAKd5
z(kR-C=|ZJKL-lfnW@y@~)vbA&rg1t?3ywqP#lsM*RHJOEn6FJZ)fuwME>ONesl4sj
z>t&}=OUlQBqFV!$g>jr3BzB1du|&WZiwF0_c8Olx(?|$kEE+u)eF(|$C8(wNN3r7C
z14f%Y*xwcr>cPrXg55@t>u?%d^op^_t79=Y_&D<1l_4YIs#6J~NFPllG<0-SxO>n)
zNU_^xVVQqF9aTx6Y)iiW^ctsO6r#^}M#_HjU*O3D0e)jb?4#gCIT;MGfxAkijeVq+
z(;*Ijo~=W$`dqLfYQ7`Ajkquir>EHQanWVGxyF4fPX3G$T7~~P;?xmWTf&!^#OVoF
zn_XvZ`cGHOz0KMgJ}gDj7?xQ-78PTGw|FM9M3XbwNxnHJftfp$iEV;{v;kuMa_zO~
zFRA?U2<Ds`c{<3K!A!GYj?uctWgF90xY}pCy*ZBKyI1~x{kM+|(tE$4?7y=ENEQL|
zxYSyA;(RjStL*$W;5+#!fIl(6Lz4I7{rR4}&OgD-%6HwVJx9K{gFSx&-vxj0{*4bS
zSbGV475owS7WjLxOeQ3!ov}c&K_IzG=sqScH?MLbVB$VdE8Hx+!bODSzELo;ouD#Y
zKNuuy2+7^h#|AI)jX^r`u7*)(K|FVs#9Ca$yF=kC@Z#^*{oqgj;v8&jsCc4mEPlxt
z@6q~W@G<BhMj<x*Js9Pm83<zYX9Ql{tWsd`eMur9BEH~W!$2Ut0UH8?uPFJTl6#ee
Tkn0&G`7-zLpvlzVBclHR2Ywrc

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/submodules/1.o b/test/tools/dsymutil/Inputs/submodules/1.o
new file mode 100644
index 0000000000000000000000000000000000000000..d38e30f67c2c4f8031669ae48cdde5743400ef9b
GIT binary patch
literal 2232
zcmb7GOK%fN5Uw7NAHl(K2oi}vFbDIJU~Ccu*<BtnkX1k&ctpT$G`1&Uz#d!UaY!Uf
ziB>{_!)oDxh&Uhw7Y-cYA0Tnq<L(i0f&=2TQuc-b0bhA~{7NEBN!9h}>aVM7s@wkO
z(|=#1L?T2Oz$*A(9yBm$1bl_<b?8rC2wIUbfelPz-pG?6!|<$m&)0o>dh*J2%MP^7
zQenJ~ZB<$dVptkRId?5@*d=q;vYnZ-VVufNXIl{gPE5MgaBEI#$##!n6s%lr)~I@J
z!wtO4*$clnL$*n+E@l5}Jf#^7CBQ3|^9^@5-g0{|^jkgRnYo;6%`tvwegWQJ61P`8
zt{pGw#@o%!0D9vK+7j`VB@|R1bj|a+rZa0*SL2C}Xrlimo`h%75n&i+rBbvE$1E}2
zs{8`HBNCujJnkA^(tX~ea`%+P+t1D0N$clVG4tJcW*6T19`U?|%9`iBDe-1xPPHH6
z`TacOqJUV_o!^SY%SgPpz~ifY2j5qI`mXc(ctzV`xK((Xd<RMrFDWnH@;Z22Xa52Y
zG*<9t!{A3U{y8K=#KGsl?Zo-X9NXPJ=?7l^eJb^ZLvPnpAE$16sg+m3!h8|UtIb(%
z)h;{Qm_Dv&v^}0%bFOQn2Y(qGJG58ZQ!LJv(k<WU=w3Qf^$NCgjM8+@bS=jtolai0
ziv^l2RlEgC>uKMwSMnt)nYKeVf=qWd&%+$$ubM82L9sJ26b>blTec?NFUAMOCfpEZ
zy9gzg>WM$<34EUI)H2i<{Tlx*K0HJe*0Fd~<ip~~#BeCV-rXaohmParj)X~(uG0vN
zrT}m(9!E`M1H_)bL2B*#ZgJ$riRkka*ovNk{pUB<sC1GJ%PJj*Y<32}vPu@i6qsj1
zXW9+^aKQ|zLLY~%iu5Ez^SNUPak+p|$=t^nQSQ!j6E&?0KgdevAx4VY7{Y4o2}JBE
zJ8ocz2-*c9^X-F**5a&!{j-nXyk_qu@YKcZ6dx(zex3mNw*M+!TYOO82Onp}eiO{p
zzLxFowckKL6T@|64t}io4hf3jB8G7qZhe3M%+jrRUb|PHfPE3nRp7k1UMVn$>%cB9
zigZEqp*bxTw@J(=`=hZt9M;^Db*3PNv(OcEFwn&3#72yQ<H4fY=&-A`T&Y~B6|Ji7
y-*{D_zGhp5(qcnQR|Hn`{|AN8YHQ%ZNjaXuC)^>WCgo8ndGi4t0x}vmh`s}LT=A>`

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/Inputs/submodules/Parent.pcm b/test/tools/dsymutil/Inputs/submodules/Parent.pcm
new file mode 100644
index 0000000000000000000000000000000000000000..e6909d7afa5e3c2801623df405f418a7ff572900
GIT binary patch
literal 25260
zcmchA4Rll2o#&H`j3e2OLL>n-ZDm4J`4d^TWxFA*CE3PyjV)w>jYA^~Y+|PZTre5h
zW~n6*L#A|ANl8mO=^iUU3JporV0xSmd$2{Cb?4Bnn`L%<W;;7}o3x#tlaAX?=dfL7
zyTAXv?@I4Ua$si9j!%T7ckjFR|M~lWyldx2e|Z1@td=BAo`iq!T7}m<{bR)oKkM=8
zr|&ELyzm)5uq@9J-?64duOj)rKwy0M=(zj^Z-Icnwcnrqp7}|vr?@_$_>BAahJpv4
z2p*Q#3<Nw~uI{bMDeQCMZhnc@BOX`!2?P%Ad2(-Hbbs)P;n4&84h8}(F25^1TdY$c
zU%E4x9+N*Al~)3Rq2WD8o(LQsKg2F$y*`((Ej7g?)Ag8qWmPaWBtB@x2YMIlg%0jz
zmowKpD6c2(3Ebm)!99Bp4L?QetLYQ#RmuzA>v|+Rx+QbHzmVzckk|Y2J<@jwN*+F(
zz23VS{NB3jdcm==&~V^Da6c`VMW0x2lT6>eu18iww`B5LAp3<_@AkijzOmrm%=IcW
z*4wa0CN%w)VK1ED@z0Ot<ku^&C-mEV*Y&tPhei(!=i>KidA$$I^n~uR9?69}%>C>R
zB7PlSr*WSK?=Rw=#tnFR@uDBPPA~cq@BG`n|LgI3+BJoe^vnCOcH{|nt^V~z{gl=4
z`i%a%ReyO|e=eafd2Z6&8Me5?CO7^K9)IqpyZpsS#R+ZItw`mWWd6@bqJ7rrz+lvO
zTh~_*-D!&!J^POS^=AFm8U2N2!}*y04X^%Ov*C5C;mw4`J+NeMk6ZfVrekrlTVpvk
zYx0H7ox!|~=iL=E$%;8`)y-u76GwHs+M~VgSWvg~whkX?*#~~3e<P;<R!skz5wsZ+
zcKt=ZexkJeahJunWb)0L-C?u5=F(bU*yQYLE1%X@-qKdxiIkn!R-8$0z7{Dvo2)!T
z8%3<&n7^j2oOhRBi&Wf9Za$^0xR$hCPM)+4M!jRv-iuLhBlwE;P3Q*NqkUu1T?@+v
zQ%3z8WBLothU?4vM6==2vi>!@q2%ST$yaLXo;5!nw2YLR+m_ZouAvW)&zkoI^IFfk
z%M<RhJITs-li03o3S^}B&3~mi>MPLsY`R^yqq}TTZ!EgKIT|>s>z|DJkLvo)MvI=;
z7$C+3RAbj)up6!{>)%*5ye=80Vx+Pw3G2keEteMc39n&l+3<2qf7z?QvTS&1MvwWg
zB!c%rGJ|o;$kN)bQZs~aSv7ZJlWAYra?EAwUoy7^%}%?f>Rl}qnwC#!aFx#%-9AW^
z^x8p~ZeYP$bg5Z?0Y(zjW0wn762i-a`YT@j6tn<=cg~u@HFoKAS&qSAuzuX+47HV=
zNS2+6R3_ZjceG_^Apc0^dABX$F8`&wESB6HbC;dgLIwH1IIHW&rtMKL_MMFGo{0Lu
zzQJLD*s<vq2;2^SXFEsYrrl(g7T>I;b7^gR+=NYcm(uP`=ANmW?lQ>yw6^M6r1DO(
z;%uZ0SLTzIp~15|!AmpQZD1nWcT~4?!C0S=4A<@Y*S&_<nhnI)pdmh^zv|VO*q^XG
z9=8kyP1uEGRchi}99uGX&6@VbO(Q{zbLdBv*W6WeNgD*1NLHM7+qhBP(N@lZj3oS}
z>}K+0LAh@CWHgY7`qt?NVC1)<bdvC{g^AXyi~6f}!-Z!3^+Ek5_>SFh4hH7cpNEmd
zgJ!^W{JBAmrG05F7c$7`3|drV*uPpe@3z5=LB=fz8O+X=Lvg(@Et{@yUDOAb&qjUl
zRzX1j(P-abwCK53!_{TORmpH3a`x)4gMbMVHH1*|k&vYm-pYjB2LEzdI>C4_@9Tf(
zhFZ(-xT~f~r_iPi%tD*z@ikHb3Bc^3XsFdbKS=r_5AB9y#?6Cavo8qWw;Y4kg76g5
zCOk-}@A%ZS%p~qa;Hoei+i7@>yKI`=_T-<wqU*Oty_1AFkF`g4wd){5-$L^dE~x8X
zL!wz9haTZ0gZh_Ass>2!H!u>SKW-UhXa?yyLw|2u)K=e#*iMkRHs93ZU)y<Lg}ZD{
z;S1PQU)Wv>A()P7EJN^gCXj(r6Es3hVX56gyZ$QnT{c{i3>WPBOAMxDlB|XcaFaxM
zO`abPZjJh4Oc4dTUF&rI+q!`Q&VuzT;j)?J=9x$({AJ2*gWb+(tIj7YrlHE%+#<Z#
zv^xl&UE8TyOD+%u?vVhUEyHk3Xl;xb2NZm3*}zfvN}|+pe8*U{Z(7$UAbQtC)C*&P
zXT7t2Dp?V8S21)-`N(E$J(J8YD%!VXe$r)Wi!-<ex-OZRUz@=)+yPo^Vd(vQ{_9r#
zx9xg(5Q%+E5469;Am~-2K9O+w@;HXW_WEMF0h<nr6P9m$)m1*Ft%Q>T7-2jqdSEtl
zMUT5o{-EhtkfKlLl8JCW4%_qmE*~qO10E_=Tw-rT$bePEz#0%+*Vh;gv=gr4qTe3%
z!wbN2?}l3uuv`Y?oMt<oa#ziBf6<nOF1|Z~AVQEbSZW#yS~|lf;27*57E9NIdCt`w
zcwdd_zXwl|3~w0q-<l`AD*!oO_{&Ml?zjc`0Egwc0j-Ucu7w5(td`d9E;S86uwZT0
z4AW|d@e^yu`&OTuFiZ>eT}Ql&5%j?mFHI6O0oWB*t+|p11_{t?(SS|2dxCfba`xS(
z5C#<8Js<V;8#g_8ao5r403r*fjj$nvHW;B%xASOpdq2ctH@phdWg$a`pk)I=Jq7Og
zntgHev7m(*Sz6l?w*dOc63D6H4r`s>8<kV!J`_OVO}7wanFmowjo3mnr*@6N;Mrcc
zqrMqke*wicif;HirUPNS`;E2-H}ykzU;!lAqF|&?Nnwe`kXSaP^qxvBE)9~WUu4mN
zr5Q_W;FXkM1WiLrX5<4;&YIf*FYpxTqSU+-A<3_)grmXEPe&@Rk*C`(vS4vXTX8;F
zj>P8Tgs%T~bmvi>?;@oiB(!xzSkVt8B4cV<|B6@tTEaT=@RloH!%Oq&oF^4_wfo|h
zmbm%xu<1#-$<kV=8!khNE(HOzmA4|cl(kb>1nfjA&ddIuzwK;vz^EYLVsxjGV0<T%
zliRu!j1>LYZn$dIzq+hH?=>LqU!TxlFygcRjYN231_2{W=0T0Q6EwtO+c$RHbXT2n
zS6tJU-Qpp`U2%%=wgPwydd^3HIj47B)NN1acv1g|t{;(W5;4pA;S9zRG7`v42yBDy
zjID>_raf_s&jk;%I7fb<=G|Skm_+!ff>#0oCJ|Oh8kM2xGwR)SJK@y*(ZFET4^aR-
z=b}Y_<507YAihY+2>?(wl|*n)1~F~0gjo|T0aoE0`k|T_3fbig6v4~xXv@#IZJ=r{
zdGh89YIOi-W^@A=S)$S`^0x(J{*OLjwW>4P>Unq732lX9<;ctni>H$nQ|<~1n#oGI
z52X;v^3cVJ?MI_KN0@`asbf*^bTqIo+BXt~zXQ{BvyP=w3gtURrj&N)?q<WA68Qy`
zf}HR>Udm@mz7e(@(h#^o0s|V#!^eUi&%C>OhL}YTgKW8qB!5;vFh`kKYC+1BAbf_d
zMPGBti(eLL(se5ZIG_^{;~cW4%g5!7hGvnZnV$sELG=tYki8(FIK2gO!ilgx6|rGc
z6#<iLWCFl167NYsmTmx;7K7fSyB7w_reX$UM+A@1_`E)W*w;+_#;pX%ZzjS69Bhw)
z07MNQs+QK`Gs5?wVBQUx>qr$M{ya(#xO|)?oD84_kXj%&1CYj|MPK*G{JxC*&`xL!
z8@s-!NA#hb9Mw%C`~|U{3T*~mmdC@E$Ag}{AIm~!Tr+16o!Th#$km55n`0w(Jl!Ji
z$k_zomz;e@h!knel6fd>a@tI?3}HF9+!dk8a(Tg{y6xWR_S;cEgodbR2X-MritUnx
zwyb|?QhyF{1W}t+XNY32Ly{1n)ll-{m*tIs63CZ0U(V)rVlQpQiHHsH8a3b<M9gJ}
zyeY#gXbREC2b2Iz;RA9TZb(QuV$q!ogQGGHkR+_dPQ~VTS(c0Jm-3cG>Hm-=iOkSt
z?hH~`L<|Kb?x1C#h?~xq4Kh6h-3U5KU>9&3s?5K9j_Z+P;?4=(&H@%3xsY#D=s_?l
z`sP+4VXyvG$#9wHy>>FHb0XSODzyxd5C1_XDQxP5`^=gl83ev#vu1Z36y8`rA*v@}
z1zd^pVe;k8z!2m%fKvjKWF>}9yUXGJ=OfjS9?R=aRu$%6JP5R0OX&yl5SPV`AX~Xd
zICTO=C`F&}hw{?mT^HTK?I#sm7o=weVc2dSHCM!jT9~x9?zlr1NOTtv9{Cb9wX(S2
z{xG2(3zKJ43sTlp5xV$aCuD0ukcD#{LXrb&QTEPKwl)yVJDx9V91>pEBWpUBh<6CG
zt$?qnca92^qCfwT?2>@oNIB{k2rZU8*;$-QpAba2O=2S?V{+<Sceoklk{&$)z7b;I
z%Za5p@}lVWK~Y5(Aa+E(NdG!N3>~4^yaP44tEQ6qZx{1537~>Rb9?Z{_-ZVIn(I1*
z?4`Wu9L&H@MJRlvWWyFBo3$n`qN>vI=0MT*kH{-CQ#?)CMcKTz9N{%I`A-ujERGCN
zPH17<VGA;-u2K_$Vb>Q03vwcjta6#eg?%q4mX-uBDZfsTHx>1NG-F3-nI{tHW>E2^
zt$+Z3aKFsplDWl2`5V=+aZ@)i1V%OZrHri=IsBW;hD!`uOB__Q5X;d%qi)AK<kifx
zU@$Ba!P6<1TS!D<`ANhtmQayb+ODCoL-t?J%Th9aw+;1Fhpb8zu$(G_N;sDb#UQ9U
zmq8U12z(RSMVcxL%tls3JE~fpZ^8JyAYclS6rmgi6vDT*>fPkYx5SPtA~?O@5?gQ{
zQSDwrS4W}<5}((XZ19W4Ibxt}Cz<=pWtk&eV_DNp@JMCjW|WXq$&;-F4y;Uuy3N#D
z@n04jkre@BSwu#vHVt1uk(h`-DOW=j+!<o?E`c8zGV*iuzJ5dn3mGjsZ^#n}NvI+Y
zT|_0z?TS|u`!wj$nGoL*a}o0P1#hg8y9X$Wuf+7Pjp<7^?GmaS!1`!&q3P62HEG{;
zwCJxZ<&E4`X!K1dkrhxLNVrN#QCVnlMC=HEhd=IHGWXB2Dk*N+2diE(QNRsT#CJZ8
za+o_Nnj63?w5^aZTtJIM5ObXa+M*r;zmRafA>@DrZ!Ef_Ali!zgm;7%q`woIJ*}-?
zOm0T20a?==oNHoCkkqDGJI2#&4Q1U<|C@pUxuoJ=Y&Bd(5J#q(2)`qCT&LST6AeHz
zK*Csb08Wi8ccJ-x^`_Wy8VN(B62d{xFm&pu*cxJoBkv1ZP$i+nYYXQ6HzD~!J^Gf&
zi_RzFzZKirb?9)S@S*sqK;5ESaw`mugQ#(nY`E$SJP0utACf0)q}4L4!5IsvL{dlF
zC`x-Es-NIa_Zb8}*0(JBtg%=IG~lFBf5mQq<S#JS=6w<hC<u12MrHc~6)r%VWjiIC
zJ}2DaF$B;IvV(T9J`zH7&(w>j`yt8&-<jATdkXVMRh7-FZy<arVJ55^Moyng&Er%(
zQH#&H`#qstRaU0lI-#i*;+p(`oMh&kc`|BEQ>O({o9Dwy$!~_o%^}5FoEI7ggi;F4
zurvqpakI1QSAs(HEh(2khlB|qSr+=Gj|tki?HMUN`QVIPgXw<7Bf=$-oC{|J_@J){
z<M-zMiL4h<R^uzWd}#QLiP)!B7KL(TVw4E6!e8HhQ(HBKz=41Zv`prIeAB+LWq_qO
zs9PcbLDcv`&qHg##j8L_E2a9tNpuEE&OCcoH!woULm$dyG!lzD#A*!rlNHo#N#<V=
zYogr|gf=~U#cB}IpgxY$6@m3@Ulpq%XWNcacR`B3g*1RtsmuhX*|*0s5;vo#?VB|@
z?d3Iwsd+fH;mVi+Fn(@AU-Gr*<<+8(HAnq}QQyKuLaY`;>e4<SR_H<~3?BcfAZZM>
zJW~4L7oI^ji`oiJB=m~?Xico68l?9qzyX|gr8$jM03MM}z(YUwuq-{73GnEq*1AK`
z1`M3?8s1pcmz;S)aLV1esB4>84X{StNQ9E_i>2b`eE@S<RCB|2{WOI_syhIr5FK)6
zd??xQB5(eP#u}<cpFLW%qZ=hB+KG2~WtXI~npzwH6H4GKLW%L0Ok*0$5FQPL&5twZ
zA`D0P2vaNuk~?=lA~r>!jE~Wd@Nq=p%Q5N|quRQh2><jM>grHelkk&;6@nnDK7d6F
zW4~`HTcmKj`LwqD1h%GtLM_&WyPQ6_ku#k9{>RbPL>h1mQOreYwzK(DVrv2@ul_Z*
zDeSPsmXP_44vdjK??z?<$B9wVK_%Ps{W9Z8B)!#Vk`-skgwWe5gM;H4Q0U^@f^oSd
z1kRvc6STCUTGUuZg2&w!YG?q>kP*!gk7Zw$c|=J*LiV|9!TUn1%-1P006;`F^=uhO
z@Fo<M|5zzAh1?Qd#3eK0CdwJAWlGI>V1)b!@d7m|nlmuMlj%)SW1`2!;THtZ^Cl*3
z7P*3*f*78OwMVx@+P9<I5fmoz3~oTMZ-ZUE%MuklmB8mv;=c>Vc@qFnH_(|^X?!pw
z#LjaS6lmw*5fFu!9xc5JML-4r>AOM&P`4j$f~XDcFL>V+G68U<9v7nMAR~mjlli}{
zl4U|zN>A>Py#P)j7BrUWp+LGt@5x^)YazzUG^9+B$+F?QusSc*`>;m%d&xJZh5h#(
zWey8zEX>RaO3=@_1MMV{E&N=tk7t_gFgDbcE>kD$+%_(_=0I0+_IrZC?afgvi)V91
zfBqRESSlZokD+yzNap{lT4>qgW!+J<R*(oGY%#UocusJUdahdipT**MI@1SRf(GgF
zALV;!JFs**_Ip9UC~_-V5m)h|+HrokS>9db(k)BoF(yxb9YTh&QP3RxlHl=m-m!;U
zA>EA`@WcmVL|}rYm3(4e&?aa5K)-czF2Q_F>DtC<o9qHIktnPvB}Pci@2bln4E=CO
zrVT9-NFLoMAP+j~ACgrp`Vw$DLSQs@SP4dmm(V=)71{gLMGyx%x%Is;^Lz#|suF;b
zM&JO~U}(DgoD6;9DT6r_l>1bIwj0q*+F<-hp8$irw+2#6-uP=7%tdizCkUcseo(1R
z={s-Bu}o;whM)#K48;ca%CStQWLAEb-H%eJ<uwsLQYwS}v$nkJi(io=l#mI~4-Z*(
zk43jPqgky(%kDFH+OlAu&Da_h8S)%$IqIXIBu{?YnV}}w&*NdU3KO<m5oE}@v{65W
zNAQVwT+qPNm`T=^?cFcJ8GpW3Mh{^D`;m$l-DRQakdQpj*m>QDik_^3p7ogfgPtQL
zvQ^mi(-Vg4C=llLF985ijKlX~r4Ro^Sn2i|9q@lU_+`<DviF6__k<<zdI3>}KW0Mo
zd6(CxR8m!iE{>~-LHMTL0xW^$DhLEu)s!^r-(&>~rE>@kXbU7<KNH5ug@E9ToM962
zh+?7-N<o(cv@JCMUQHDh6qI*umce=MX*Drj0B2wbb6$ugMbbYDl2A!8nNZUM^&RV^
zDdO^M-hY(6h7$%FtO6d<4*_D&v&TR(bPq<irouE8S*MZ}*Qi;oNEqhdQd5YBT>T&n
z5tCtQI&JX(Oh64c7~Y)0`2hqG=F#zuYT9@|3J>gV%RR%Q?#~ObWmcrEnsk?=b?{%d
z$mSY^|E@*u?M3Tg$>Q|?LcJr&#7GBG;4jEnzLl7Dt9L}IY>#@8xI#hD7ln+83mFoi
z#0ga}s|66yS%?oU+)@i+A2V7c5~<?%7;52C^GMKh<s&LHMz%I)D6#&t%5+f`0w|~w
zEPCvVYL-!H;^|Bo9#K&+)mF{9ff#h8AvE(7L4$B!3xw}Qdj}7jN=?wqARe_$%w{ZD
zb_UG~_5c*Ee&qbHK%^^o&{c3(oT8L0zh)v!O318Gt0GP@c!8tQqX9^dHNg<GdKVVI
zua+Cqet_-GWJMxE{YGj8MJiJChZf!y#B&75Y9^TR$h8G|QSgTIqbdPr^^I{(>Ggyj
zM;A)!{-Ba7D~RBIXQPO%tQw+c2S`+keE&fu<VeLe_5=!&L2~kgPYQXO2TSRp71}8w
z)Smbd^sM`dU{jIr5o<BZW2q;ABr_ueEcleXpm_K~4N|g=<IfA$Qc+kmcmeNm<T;7(
zJ3<${0h`(2HNG$CL8A3bH((aFO4;>g=y6fpo9fKpwJKZ~%l{YQMD%nI;I?dd4Q&%d
z9R9#XSzn_J0p;^l494l8k>+<|M?0nEavCFA?xKgB_!mJl9>22WUv7z`ho(m9{EC*8
zr#~4s;Rwo3n8Yj&q%kC9VWM?ohIw3#>CfX-5_|GR(fQkJ%GhIZ1PB%;@XQXKy*|6B
zju&RmcF3TImJ1$5#oV^ag0uInr=vS4ovtNqXt=?WQLVrS=iRpRP;mqW3wua6DfLeP
zvZCn5Y>!3hakld5_JUW3lSMeglYVT?fm$T?C3X!RM<KWfl8x~^?HG>ApaNpZABvlB
zr0-Gu^A~vESY%wcDnINRj9FcgH8{mPp}{%jXEe6)Lq`sL*6OHhs;zaFS#6=vQ~Nik
zuQ?oLlJwAh?)9%e_LT9bVN0Gjcxd<l&I`N!-UsQ#;AXt?UU=!LqpJ^HT>X^oPd`6)
zJoWjhg4IW*lhTO-984U*ya(THmCyICqRr{^s%E3)!aF-Njel_@5m)eo4=(v4JEv&G
z+2hm?zn8u$jxb`EiGouT#)8P8w2>{InxEstgvPLBz5l(%;(r2v=Upf7KRUSj`rzvE
z`TQm6{^gnC_vZ88H5UJN?EZgPEdHE#^#O_glca*TK6LZOmNTC3<p0io|9jTewNGPh
z7l;##;RNQNZCs5L&Yo2>rdN&EG}7(al7-|W3D2Wbo~D~ON@DKDc~9dt%_E89k51hv
zxf5x;<@rP+uW35`$ob=sT=X<fhqs)`d*qC#@usKgTDTDxuZ2sdXG;>EPn^0@aysvk
z6UQHU@kYtbaLHo0WX83a-)vl^F;35}!=_vu=L%MpuKDEy(&lPhiAd5K(6}c*R@&xm
z_v)p=-v^R^rD@$iHvakQ5oyzW@>kbSJfG(o84V5Ld)@8u4SX-ye{kqXX!vk7f8QS*
zlVupGvwGToY#;plZG5l6ZvMU~ztgl**xcuFKJ<7&Vd;R)rF#h1^6{c~I?qn;ut4?>
z(_rtUUZPVcf2T96G!I8ir44w|8TdW#TF<$+Y^PhLU|f>+(zQaX$8+uep-rm_tecuA
zB#m~Jb??wyMbFH|?lW5HLdz=agA-knV@TUx(ERq?sSUr4l|INuw5JN1AC_K_hUQkq
zj1z_H9c^v??>sE6cVLkMqje@M4QXTdHJ`-DnoaF7%w6vonu|Sab!qWom1(~=wbBN$
zQmphLS7g<F*bI(_X?y;xU6UfEI7JGr58~EfeSDCx^Zb~+bIe*gaZ29#^s^82*6Ev1
zb}_~Z#;imw?K*L~;69>>ZeG7;Q>@0J{c=Css2RI!TG|EAbI+9$J(_%?2OrjKnzKH1
zWA4=2+KJQbTa1mbnTtK}*?rP_wrJ_`E~8`5`-N7<iem^oxi4PQ3}HX3>*Q%^&xyJ7
z?$A4?Rj&EJ(VtyUR=0P}M7|U$G}>0_ALRxCXFfZqks>C#)+(+Qyj3nq4Y<C?|A4KI
zlnuur-IfL=DXN#G&b)#L)|7?|OOxSPm&SdfrQQ2XUinuijHT`qbC}DN-~30(iWRqT
z<E$Dzus3vMX!!6au}(E6JT$t0?BJns>qgnAH|kSYWV7Fhoh0dFMbaYK-<CZ`MnmIR
z1`B+*PbWQOt+8%QU)*S|)ju>eybq_gt$_dmWFTOzQC!+fzeSPr+Wz2^2M=LHxd5NN
zfAjzwY0O$@EI7V*BsBcga0oZrvxc7<K6H5W-~o(ntgUXWwp;Ix;NU++{?_gtp1`(_
z?j9djaAXd6U4Bn{SL@E!E{rN!WVZ#Zb;=w1`vWd_M_^~GueYP8+gf9{E7x=Rd$x6W
z0>0KRSAT1ZiV=Cp)6?C%V_U0gA9={-*}kL0*Qz2gy<n@Ww^b$5^j)-{ib{E?w{?5K
z)9bg^D;V(lTwAxfa7RmPTSs?m0KZGq?aH7>W{#v?SM8{-v3d^fhk{1;godpiQoHrv
zqJ$oIr^k^Y;cjdn@a^dKcWi4_4RyGCj*N$f$6<TH@$o~Wdyb3`2Ld+R=$?b@dRbYD
zCo$D?FccczI}S6kfyl#$yYUMfWh>pkH`ip<qAs?S^{IoSLuFY+1lgdHhK>V=$FO^l
zWTYT1(9+u5v9;Uf?rKe6#;+W~3jD5af56k$>-TkZZ|zlaPofHRxAq0Rz8-I@&)?D7
zt1_6autJ~T-=VtL<7;X41-d%?{w}DsyQRa`O^Z5emG;XgxjVL|XVld?lt#~Icw4;b
z8MSpaN+Wmg^0x-~v;gFrbKjH*0@)O$$GW`EIu#FeK_v-@s@>%axI6r~tb~nTc72<x
zpYhm`UNg|!?(+xuK&_*}sT}Hn#QBJ$rmmr`vDR6q9PaAr-YSnbIUKdM4Gw#)v$4Le
zp`pIf-q5AkgU!6lbmja$1+_W124zGTC+;qr_jU}l$}?*k>*|~hb#?ZJ+6H@5eSM9y
zrhbQV;~lDn?>gP<_qBAiwcQ&XSQ_*1Z7$J~o_tq2h{c|^KugaKI8LVK`B+<5kBf{#
zY2ET|*emw{(^!Tb@zvN!YP%YzLf7)5X_e<(z}tge;eR;?d)s?plj_hTFDoRTW#61G
zniW`Kj2qPotoD%*Rv5!jjiV9tsQAQHGC%(=FG)Of5HLS_U|b;xc|zMZ{}3i@3=Jtm
zY<h5%4UXm*^aOf$w6%4R%jU9C<<3VA93Fk*!0=E83BsH~1AfsGG#pfJqZmEHMvts8
zdX$YGU15~y+ZgK4HLBo1<z_PXeH|_ScHHe$8f8kuD+*T3$SqmLXb#be(VSa@ER27!
zN@+swjpe&Bm}6ggFz2@1Xcg?PpiMR}w<^U@4n2yY97+^JIW)+Ia#Ab15hqt|@q9?!
zRjUkw+`n?(1knU?-h)gaAg5+`8~=bPMWLj|DU_NL?cot*{|axVX|;~5X=(<A#|Uy%
zhAS}p9{o}_q!s*99?hXou?-9%he{QrIkYN9bJE||<wyF3q<<Ub{&35h>N<Okqp7jZ
zIb2m&-=+{2O>KAeis|YyfUN`7!LKuJX=(LzY;$!5TG(h)#%Qazw*ygwZCumjus1oH
z?88+Jx%TboMl#;v-$ipXRw1%b7qt2V-8`Yo7~e)|BqzY3gdL)qtf<D09N&wDYnADf
z80ABV23f0;v}++SM<X)GhT)3Z#!SLVsHxR$RkaQ2d^iwb8(;$8z*be`sLwS_N~vk6
zuWPEStskyZ@6qChWR?1>t8S{ZJL{cw^^M3#>zws<xJAV@-Lj${*?LS<HOh*JZ=+OX
zjWQ{u;j|_l)eeUpIhVbz$x&~wt*NaUu2Mx&LBR?d6~ilOl*Sbrtyk92d`ziPyHagp
zSXQY$OE4T$YN{KW8k|iay{^&esI93d-DWS8p<BC3xoj(?a&c-$sa-LSj0f_nM|Rp+
z--tJdy@ph#+O3NwtJGXu4Gq^cHPzSFG&VYrr{hpieJxzJslG;;H?TWb)HYw2t6M21
zc|WDTRrL=WPb<8p+TK{_0DldRy2ko?=)M*p0;M<QBxMD)r$}8%?`(oX@hTve2Njw}
z<60SvQWIp|J8K)2P>>pC-qzUEP+J38*48&b{SD4~yJNV57R+KO!@)5}rDeXX(hRU~
z?imCnwN6J3Jf^O;rUqD8TW@zd2!}H^7I}0|L`U7awYAqDaCM=k-qy;}e3kbh^fro+
zCnu15M}mhG;KpW%d}m}NGly-YT!^JRBO{q9Y-2!?{;8Kt$K%W}jQuc}+AwoJKE^l9
z+z(?bZkUej>VS^zFeJ9i+>H;h-7+`B(2AR(CRgSFxl02~65EEdN&`N~)5uWf))-W5
z-K8``itj0frc#{@mGVj0oTVNkBUv>8Q!2|mMn<wq0yfAJkCBnAihvEWykle}s~}j(
z9ymR623M~PHH;e>h8@ZvL${=DD8*=|G0D?XtY&e_&55&`#VH@;tY&e_2RW-*oMKR+
zA%*G71aX#}^}rOxAm@pXWp-ld#<UHm7*-pzJV~&d#Vt2y#%&g}e2g)h#Va3Uyk@bA
zF$JqT6zs^<aDM6>O?6Gq21ir9atddP&&b*4p*3=O(+3EEyAIE)v&&sR%CqWFmJs+T
z6GAAIB?dmq#1P6ZcQL9IM3!4(oHLrcnWOY#&NQE$tGAUgH_(M%ciQb&#BniCCPU>=
z*isA&RLTZ>#f)5#Ps$QgWc*9%DF+0Jsj~W2j;~xwm~#%G5=&spN^qr^!t|O0v=mdA
zax<VS#TC3-W-AkClv6l6nf|MskrHrbxZ#9wWXTcY(n3j7k`izxNvg@5rcAt1O-V^P
z3pJEft$>uuDO}1~&ZC^drL2OIlzf0EvXIr-rko&KOs<x+a+;8HZZUIE%Z}}7sia9s
zNm)&kYAPoxH_oXh3Q4aZXiisdkmf?<GnkxL5Hp{_#GG5o03&6FqGl^4!xUuY0wD#`
zD{l}&&q`@H?N|~scw(c<YtkahpzW@*Pf?j0SQT`t_7G~wjuA>qd8Eh+6v`<~5!ul~
zIfZFL9WqE}ZTAM-d>Xf}nk>}jqeARCKsz;IMa)*wk&<^V$X1f#$VTkcRx4$eGRvTL
zMZ8X}!Q`3?u2VCZSaYB?O-MszCNc`iV+NOLF07W9NQpHIL(&sgLTWV?DY0gf!;Dg%
z%jwC9)9L-VY_riLJ!2)5PS4=7%|?&(3@+O&^q>i8T+GG|d7LwnGo=&a=NQGFjN}w9
zA725V)nue(tEML<SuDi~`T%s6R}`p_TfCg66|gzI0+ViT;qn!jaC6Ew1N_ts3DKnx
z?;dcOsKR_<$l0)*p0E-s%QKV#<8Bg9QFw2doTh>2?YX7T*;omY(`zvC=K$pN3?_dy
z9`ER-!vHQf4z+lFIO;)D0*ctd`pxth84s*_y1S_JtUPwZj=uEzTRee|?k+kfqHauY
z^?1aS3_okgbhiY0(W3VIcC_wvb*Y-zin;Edo~}SwkLS~YHeajypo?M_dsco=vj`OL
zZ9UlOJ<JqORPVYuf1rEUX=LDcog_b4zKc-0?roZApXJ`QyC=6gsaa2Y3v6rkxA(MU
z*aD6ETrJFmlwC*l(+?c<;GJAWXH{Wu2M!)Ma3mD62D<qN<q)n1;DI<lGrDgS&wK;j
z8CNnUvqLs)5`9t(2KNuasnn**uBi<2XrnWbaqfg)%DB_ly0xR1p6RC?NU;?}$$W>q
zhx&L<Z*ObnK__;J-Jqrp*E4pdD>)`^+eRL&d^qItdT}-aj)><{cDvGf`G|7%-3}D}
z$#C$qnMbAApEZ7BuZ_ma6i1^1^rwmGbbvw{&|YxgFqC*`csz(BgG0)}mR&euhEs7$
zf&w^Qw6nw4!@IcKT%JHrZ=hcst7FgWe68#_99d(Hy#@#E6#L-@dd{vC2Lr1dm)d)v
zL$-+Gel|IM%ni>a(3DoTA=rc+{<Z)hCdZ;Q0!UGhFd?%`>?q~lgZuXf4-8r958r-r
zBhi|9!qPzpGH;#lES#i+)W(@#eEWy&FgU(QQe@n^t{{JhPWo}V>96)`q~als)Vxn4
z&Ag$NOjoqh&+a=J&i~oua+n{bess$9iRt4XZG75Z{Ezd+?;DGM-CR64Q;dV$?=7yr
zx#iaJ#;F^R#5|8Ca31@3$yA=b(2aBGuavu_#JAnAbz;MCL0FP@NRsJ8tD2kN4_}rv
zI)h8PmTX!jHUERhD$RU4h6%GcvPx%P*^dif#OKJ<58%wJ{KtGT`jUyhhu!P(hrlOj
zHvPfszoZXz&#uqSeQ%v~{}lf&z4FI3JL#i1kZlyJ@-y-p>H0T+{RPaIr>EwN1Ko=G
z^mn$K@uD-X{~kZ9{yjGNH|$Y|d-hZsZBwN<a6Ch&%t`JoPimw>{E9(uYZp6ZPFLx4
zu|{frM|Y-BGE0E~I~Y8AAh7>%;Hh9BG>X5JF8&xh9Z=UuZ+HErt`G;;(^JJfp&&ZI
zu8~f^_S3dPY!?U|LE=Y8j>aYS`x6+YBk>yPYu*2ARiTtGu>+gz7(6bs-+<6aeOG_*
zb|L<Zb0Cnq`2iZm51p&mO7DZJLQp?C1hV-LDgD*@ub0w&Ui^;7@V4RZgX8@CG=_)`
zjdbXL20mU0?!AW&jvvH9P(};gPbafA(x>X$I|~@qJ6!>nkB(-tKPHXegP=*xc(H@j
z^hNPQcJL9rIuzHGA3vx3@&TW}im}%}VEq4OXg<fy1!-5yF`E0*$8k?&1D$a{B1ADl
zGk=5EH}P77@nXE!;N6DzUcBDK>x+2(Ena&IwbnZPzQn`$94W%@R<JYrbkII(n{;pR
zmae&IH26;!tnr_^ue@codCzhEZ(QHMh~eqUJww)WdB*&SLi{q1_`RG>>m)sc4y^Zc
zyabF8oz<JgH%V&2JO&x^JOLkkp7@4A{QRt$y-9DKb+>&KA8!;*gbAjVKkQVyRX&`4
z&xfL=Er-X4=-mBgNoJs$Q>-*u%>$Z`6+fuUD>kkzDSq!uhVQuHG1`x4@`@*(E{;46
z>E=Bs<rQzC|II=xUL1zFGL2G$Oy#HWu64ciw+r-L>%u?nc(JPl`Ql5S7hmXmud5v*
zgA?!{{5lK!;n(t|KjP>A*BqDw`{8Jm51-Y6DFfS+nA9@p&}ja5agn6K4=&-wDW!jl
Qm4F|R|1S>#{=a$nzwf{jLI3~&

literal 0
HcmV?d00001

diff --git a/test/tools/dsymutil/X86/basic-linking-bundle.test b/test/tools/dsymutil/X86/basic-linking-bundle.test
new file mode 100644
index 000000000000..c07fa5894f36
--- /dev/null
+++ b/test/tools/dsymutil/X86/basic-linking-bundle.test
@@ -0,0 +1,38 @@
+RUN: rm -rf %T/basic-linking-bundle
+RUN: mkdir -p %T/basic-linking-bundle/dsymdest
+RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/basic-linking-bundle/basic.macho.x86_64
+
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64
+
+Check that the object file in the bundle exists and is sane:
+RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test
+
+Check that llvm-dwarfdump recognizes the bundle as a dSYM:
+RUN: llvm-dwarfdump %T/basic-linking-bundle/basic.macho.x86_64.dSYM | FileCheck %S/basic-linking-x86.test
+
+RUN: FileCheck %s --input-file %T/basic-linking-bundle/basic.macho.x86_64.dSYM/Contents/Info.plist
+
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/basic-linking-bundle/basic.macho.x86_64 -o %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM
+RUN: llvm-dwarfdump %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 | FileCheck %S/basic-linking-x86.test
+RUN: FileCheck %s --input-file %T/basic-linking-bundle/dsymdest/basic.macho.x86_64.dSYM/Contents/Info.plist
+
+CHECK: <?xml version="1.0" encoding="UTF-8"?>
+CHECK-NEXT: <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+CHECK-NEXT: <plist version="1.0">
+CHECK-NEXT:         <dict>
+CHECK-NEXT:                 <key>CFBundleDevelopmentRegion</key>
+CHECK-NEXT:                 <string>English</string>
+CHECK-NEXT:                 <key>CFBundleIdentifier</key>
+CHECK-NEXT:                 <string>com.apple.xcode.dsym.basic.macho.x86_64</string>
+CHECK-NEXT:                 <key>CFBundleInfoDictionaryVersion</key>
+CHECK-NEXT:                 <string>6.0</string>
+CHECK-NEXT:                 <key>CFBundlePackageType</key>
+CHECK-NEXT:                 <string>dSYM</string>
+CHECK-NEXT:                 <key>CFBundleSignature</key>
+CHECK-NEXT:                 <string>????</string>
+CHECK-NEXT:                 <key>CFBundleShortVersionString</key>
+CHECK-NEXT:                 <string>1.0</string>
+CHECK-NEXT:                 <key>CFBundleVersion</key>
+CHECK-NEXT:                 <string>1</string>
+CHECK-NEXT:         </dict>
+CHECK-NEXT: </plist>
diff --git a/test/tools/dsymutil/X86/basic-linking-x86.test b/test/tools/dsymutil/X86/basic-linking-x86.test
index 19b4e3bef663..37797a323504 100644
--- a/test/tools/dsymutil/X86/basic-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-linking-x86.test
@@ -1,13 +1,12 @@
-REQUIRES: shell
 RUN: cat %p/../Inputs/basic.macho.x86_64 > %t1
-RUN: llvm-dsymutil -oso-prepend-path=%p/.. %t1
+RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %t1
 RUN: llvm-dwarfdump %t1.dwarf | FileCheck %s
-RUN: llvm-dsymutil -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64
+RUN: llvm-dsymutil -f -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64
 RUN: llvm-dwarfdump %t2 | FileCheck %s
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
-RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dsymutil -f -y -o - - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
 
 CHECK: file format Mach-O 64-bit x86-64
 
diff --git a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
index ad3ba5a15b6b..56b78588b758 100644
--- a/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-lto-dw4-linking-x86.test
@@ -1,5 +1,4 @@
-REQUIRES: shell
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto-dw4.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
 
 CHECK: file format Mach-O 64-bit x86-64
 
diff --git a/test/tools/dsymutil/X86/basic-lto-linking-x86.test b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
index 395234e96166..68103aa44ca9 100644
--- a/test/tools/dsymutil/X86/basic-lto-linking-x86.test
+++ b/test/tools/dsymutil/X86/basic-lto-linking-x86.test
@@ -1,6 +1,5 @@
-REQUIRES: shell
-RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
-RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -o - -y - | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-lto.macho.x86_64 | llvm-dwarfdump - | FileCheck %s
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. -dump-debug-map %p/../Inputs/basic-lto.macho.x86_64 | llvm-dsymutil -f -o - -y - | llvm-dwarfdump - | FileCheck %s
 
 CHECK: file format Mach-O 64-bit x86-64
 
@@ -117,7 +116,7 @@ CHECK:        DW_AT_type [DW_FORM_ref_addr] (0x0000000000000063)
 CHECK:        DW_AT_location [DW_FORM_data4]        (0x00000025)
 CHECK:      DW_TAG_lexical_block [14] *
 CHECK:        DW_AT_low_pc [DW_FORM_addr]   (0x0000000100000f94)
-CHECK         DW_AT_high_pc [DW_FORM_addr]  (0x0000000100000fa7)
+CHECK:        DW_AT_high_pc [DW_FORM_addr]  (0x0000000100000fa7)
 CHECK:        DW_TAG_inlined_subroutine [15]
 CHECK:          DW_AT_abstract_origin [DW_FORM_ref4]        (cu + 0x009a => {0x000001d4} "inc")
 CHECK:          DW_AT_ranges [DW_FORM_data4]        (0x00000000
diff --git a/test/tools/dsymutil/X86/custom-line-table.test b/test/tools/dsymutil/X86/custom-line-table.test
new file mode 100644
index 000000000000..86fd7e294686
--- /dev/null
+++ b/test/tools/dsymutil/X86/custom-line-table.test
@@ -0,0 +1,40 @@
+# RUN: llvm-dsymutil -oso-prepend-path %p/../Inputs -y %s -f -o - | llvm-dwarfdump - -debug-dump=line | FileCheck %s
+
+# This test runs dsymutil on an object file with non-standard (as far
+# as llvm is concerned) line table settings.
+
+---
+triple:          'x86_64-apple-darwin'
+objects:
+  - filename: basic2-custom-linetable.macho.x86_64.o
+    symbols:
+      - { sym: _foo, objAddr: 0x0, binAddr: 0x1000, size: 0x12 }
+
+# CHECK: 0x0000000000001000     19      0 {{.*}} is_stmt
+# CHECK: 0x0000000000001012     20     14 {{.*}} is_stmt prologue_end
+# CHECK: 0x0000000000001015     20     18 {{.*}}
+# CHECK: 0x0000000000001017     20     17 {{.*}}
+# CHECK: 0x0000000000001019     20     10 {{.*}}
+# CHECK: 0x000000000000101e     20     25 {{.*}}
+# CHECK: 0x0000000000001026     20     23 {{.*}}
+# CHECK: 0x000000000000102b     20     36 {{.*}}
+# CHECK: 0x000000000000103c     20     31 {{.*}}
+# CHECK: 0x000000000000103e     20      3 {{.*}}
+# CHECK: 0x0000000000001046     20      3 {{.*}} end_sequence
+
+      - { sym: _inc, objAddr: 0x0, binAddr: 0x2000, size: 0x12 }
+
+# CHECK: 0x0000000000002000     14      0 {{.*}} is_stmt
+# CHECK: 0x0000000000002004     15     10 {{.*}} is_stmt prologue_end
+# CHECK: 0x0000000000002013     15      3 {{.*}}
+# CHECK: 0x0000000000002015     15      3 {{.*}} end_sequence
+
+      - { sym: _unused1,  objAddr: 0x0, binAddr: 0x3000, size: 0x12 }
+
+# CHECK: 0x0000000000003000     10      0 {{.*}} is_stmt
+# CHECK: 0x0000000000003004     11      7 {{.*}} is_stmt prologue_end
+# CHECK: 0x000000000000300e     11      3 {{.*}}
+# CHECK: 0x0000000000003013     12      1 {{.*}} is_stmt
+# CHECK: 0x000000000000301c     12      1 {{.*}} is_stmt end_sequence
+...
+
diff --git a/test/tools/dsymutil/X86/dead-stripped.cpp b/test/tools/dsymutil/X86/dead-stripped.cpp
new file mode 100644
index 000000000000..ecab7580ec0f
--- /dev/null
+++ b/test/tools/dsymutil/X86/dead-stripped.cpp
@@ -0,0 +1,48 @@
+// RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/dead-stripped -o - | llvm-dwarfdump - -debug-dump=info | FileCheck %s
+
+// The test was compiled with:
+// clang++ -O2 -g -c dead-strip.cpp -o 1.o
+
+// The goal of the test is to exercise dsymutil's behavior in presence of
+// functions/variables that have been dead-stripped by the linker but
+// that are still present in the linked debug info (in this case because
+// they have been DW_TAG_import'd in another namespace).
+
+// Everything in the N namespace bellow doesn't have a debug map entry, and
+// thus is considered dead (::foo() has a debug map entry, otherwse dsymutil
+// would just drop the CU altogether).
+
+namespace N {
+int blah = 42;
+// This is actually a dsymutil-classic bug that we reproduced
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_location
+
+__attribute__((always_inline)) int foo() { return blah; }
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+// CHECK: DW_AT_frame_base
+
+// CHECK: DW_TAG_subprogram
+
+int bar(unsigned i) {
+	int val = foo();
+	if (i)
+		return val + bar(i-1);
+	return foo();
+}
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+// CHECK: DW_AT_frame_base
+// CHECK-NOT: DW_AT_location
+// CHECK-NOT: DW_AT_low_pc
+// CHECK-NOT: DW_AT_high_pc
+
+}
+// CHECK: TAG_imported_module
+using namespace N;
+
+void foo() {}
diff --git a/test/tools/dsymutil/X86/dsym-companion.test b/test/tools/dsymutil/X86/dsym-companion.test
new file mode 100644
index 000000000000..4327a2953d97
--- /dev/null
+++ b/test/tools/dsymutil/X86/dsym-companion.test
@@ -0,0 +1,339 @@
+RUN: llvm-dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32
+RUN: llvm-dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj -file-headers -program-headers -sections -symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64
+
+This test checks that the dSYM companion binaries generated in 32 and 64 bits
+are correct. The check are pretty strict (we check even the offsets and sizes
+of the sections) in order to test the VM address layout algorithm. As the
+debug sections are generated, this is a bit risky, but I don't expect
+llvm-dsymutil's output to change much for these tiny C programs so this should
+be OK.
+The 32bits version doesn't have object files, thus it has basically no debug
+sections.
+
+CHECK32: Format: Mach-O 32-bit i386
+CHECK32: Arch: i386
+CHECK32: AddressSize: 32bit
+CHECK64: Format: Mach-O 64-bit x86-64
+CHECK64: Arch: x86_64
+CHECK64: AddressSize: 64bit
+CHECK:   MachHeader {
+CHECK32:   Magic: Magic (0xFEEDFACE)
+CHECK32:   CpuType: X86 (0x7)
+CHECK32:   CpuSubType: CPU_SUBTYPE_I386_ALL (0x3)
+CHECK64:   Magic: Magic64 (0xFEEDFACF)
+CHECK64:   CpuType: X86-64 (0x1000007)
+CHECK64:   CpuSubType: CPU_SUBTYPE_X86_64_ALL (0x3)
+CHECK:     FileType: DWARFSymbol (0xA)
+CHECK:     NumOfLoadCommands: 7
+CHECK:     Flags [ (0x0)
+CHECK:   }
+CHECK:   Sections [
+CHECK:     Section {
+CHECK:       Name: __text
+CHECK:       Segment: __TEXT
+CHECK32:     Address: 0x1E90
+CHECK32:     Size: 0x11A
+CHECK64:     Address: 0x100000EA0
+CHECK64:     Size: 0x109
+CHECK:       Offset: 0
+CHECK:       Alignment: 4
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: 0x0
+CHECK:       Attributes [ (0x800004)
+CHECK:         PureInstructions (0x800000)
+CHECK:         SomeInstructions (0x4)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK:     Section {
+CHECK:       Name: __unwind_info
+CHECK:       Segment: __TEXT
+CHECK32:     Address: 0x1FAC
+CHECK64:     Address: 0x100000FAC
+CHECK:       Size: 0x48
+CHECK:       Offset: 0
+CHECK:       Alignment: 2
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: 0x0
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK32:  Section {
+CHECK32:    Name: __nl_symbol_ptr
+CHECK32:    Segment: __DATA
+CHECK32:    Address: 0x2000
+CHECK32:    Size: 0x4
+CHECK32:    Offset: 0
+CHECK32:    Alignment: 2
+CHECK32:    RelocationOffset: 0x0
+CHECK32:    RelocationCount: 0
+CHECK32:    Type: 0x6
+CHECK32:    Attributes [ (0x0)
+CHECK32:    ]
+CHECK32:    Reserved1: 0x0
+CHECK32:    Reserved2: 0x0
+CHECK32:  }
+CHECK:     Section {
+CHECK:       Name: __data
+CHECK:       Segment: __DATA
+CHECK32:     Address: 0x2004
+CHECK64:     Address: 0x100001000
+CHECK:       Size: 0x4
+CHECK:       Offset: 0
+CHECK:       Alignment: 2
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: 0x0
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK:     Section {
+CHECK:       Name: __common
+CHECK:       Segment: __DATA
+CHECK32:     Address: 0x2008
+CHECK64:     Address: 0x100001004
+CHECK:       Size: 0x4
+CHECK:       Offset: 0
+CHECK:       Alignment: 2
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: LocReloc (0x1)
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK:     Section {
+CHECK:       Name: __bss
+CHECK:       Segment: __DATA
+CHECK32:     Address: 0x200C
+CHECK64:     Address: 0x100001008
+CHECK:       Size: 0x4
+CHECK:       Offset: 0
+CHECK:       Alignment: 2
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: LocReloc (0x1)
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK64:   Section {
+CHECK64:     Name: __debug_line
+CHECK64:     Segment: __DWARF
+CHECK64:     Address: 0x100003000
+CHECK64:     Size: 0xEA
+CHECK64:     Offset: 8192
+CHECK64:     Alignment: 0
+CHECK64:     RelocationOffset: 0x0
+CHECK64:     RelocationCount: 0
+CHECK64:     Type: 0x0
+CHECK64:     Attributes [ (0x0)
+CHECK64:     ]
+CHECK64:     Reserved1: 0x0
+CHECK64:     Reserved2: 0x0
+CHECK64:   }
+CHECK64:   Section {
+CHECK64:     Name: __debug_pubnames
+CHECK64:     Segment: __DWARF
+CHECK64:     Address: 0x1000030EA
+CHECK64:     Size: 0x7F
+CHECK64:     Offset: 8426
+CHECK64:     Alignment: 0
+CHECK64:     RelocationOffset: 0x0
+CHECK64:     RelocationCount: 0
+CHECK64:     Type: 0x0
+CHECK64:     Attributes [ (0x0)
+CHECK64:     ]
+CHECK64:     Reserved1: 0x0
+CHECK64:     Reserved2: 0x0
+CHECK64:   }
+CHECK64:   Section {
+CHECK64:     Name: __debug_pubtypes
+CHECK64:     Segment: __DWARF
+CHECK64:     Address: 0x100003169
+CHECK64:     Size: 0x57
+CHECK64:     Offset: 8553
+CHECK64:     Alignment: 0
+CHECK64:     RelocationOffset: 0x0
+CHECK64:     RelocationCount: 0
+CHECK64:     Type: 0x0
+CHECK64:     Attributes [ (0x0)
+CHECK64:     ]
+CHECK64:     Reserved1: 0x0
+CHECK64:     Reserved2: 0x0
+CHECK64:   }
+CHECK64:   Section {
+CHECK64:     Name: __debug_aranges
+CHECK64:     Segment: __DWARF
+CHECK64:     Address: 0x1000031C0
+CHECK64:     Size: 0xB0
+CHECK64:     Offset: 8640
+CHECK64:     Alignment: 0
+CHECK64:     RelocationOffset: 0x0
+CHECK64:     RelocationCount: 0
+CHECK64:     Type: 0x0
+CHECK64:     Attributes [ (0x0)
+CHECK64:     ]
+CHECK64:     Reserved1: 0x0
+CHECK64:     Reserved2: 0x0
+CHECK64:   }
+CHECK64:   Section {
+CHECK64:     Name: __debug_info
+CHECK64:     Segment: __DWARF
+CHECK64:     Address: 0x100003270
+CHECK64:     Size: 0x1BC
+CHECK64:     Offset: 8816
+CHECK64:     Alignment: 0
+CHECK64:     RelocationOffset: 0x0
+CHECK64:     RelocationCount: 0
+CHECK64:     Type: 0x0
+CHECK64:     Attributes [ (0x0)
+CHECK64:     ]
+CHECK64:     Reserved1: 0x0
+CHECK64:     Reserved2: 0x0
+CHECK64:   }
+CHECK:     Section {
+CHECK:       Name: __debug_abbrev
+CHECK:       Segment: __DWARF
+CHECK32:     Address: 0x4000
+CHECK32:     Size: 0x1
+CHECK32:     Offset: 8192
+CHECK64:     Address: 0x10000342C
+CHECK64:     Size: 0x8F
+CHECK64:     Offset: 9260
+CHECK:       Alignment: 0
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: 0x0
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK:     Section {
+CHECK:       Name: __debug_str
+CHECK:       Segment: __DWARF
+CHECK32:     Address: 0x4001
+CHECK32:     Size: 0x1
+CHECK32:     Offset: 8193
+CHECK64:     Address: 0x1000034BB
+CHECK64:     Size: 0x9F
+CHECK64:     Offset: 9403
+CHECK:       Alignment: 0
+CHECK:       RelocationOffset: 0x0
+CHECK:       RelocationCount: 0
+CHECK:       Type: 0x0
+CHECK:       Attributes [ (0x0)
+CHECK:       ]
+CHECK:       Reserved1: 0x0
+CHECK:       Reserved2: 0x0
+CHECK:     }
+CHECK:   ]
+CHECK:   Symbols [
+CHECK:     Symbol {
+CHECK:       Name: _inc (2)
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x1F20
+CHECK64:     Value: 0x100000F20
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _inc (2)
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x1F90
+CHECK64:     Value: 0x100000F90
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _baz (7)
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __data
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x2004
+CHECK64:     Value: 0x100001000
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _private_int (12)
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __bss
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x200C
+CHECK64:     Value: 0x100001008
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: __mh_execute_header (25)
+CHECK:       Extern
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x10)
+CHECK:         ReferencedDynamically (0x10)
+CHECK:       ]
+CHECK32:     Value: 0x1000
+CHECK64:     Value: 0x100000000
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _bar (45)
+CHECK:       Extern
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x1F40
+CHECK64:     Value: 0x100000F40
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _foo (50)
+CHECK:       Extern
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x1EC0
+CHECK64:     Value: 0x100000ED0
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _main (55)
+CHECK:       Extern
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __text
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x1E90
+CHECK64:     Value: 0x100000EA0
+CHECK:     }
+CHECK:     Symbol {
+CHECK:       Name: _val (61)
+CHECK:       Extern
+CHECK:       Type: Section (0xE)
+CHECK:       Section: __common
+CHECK:       RefType: UndefinedNonLazy (0x0)
+CHECK:       Flags [ (0x0)
+CHECK:       ]
+CHECK32:     Value: 0x2008
+CHECK64:     Value: 0x100001004
+CHECK:     }
+CHECK:   ]
+
diff --git a/test/tools/dsymutil/X86/dummy-debug-map.map b/test/tools/dsymutil/X86/dummy-debug-map.map
new file mode 100644
index 000000000000..f9bc7b099858
--- /dev/null
+++ b/test/tools/dsymutil/X86/dummy-debug-map.map
@@ -0,0 +1,22 @@
+# This is a dummy debug map used for some tests where the contents of the
+# map are just an implementation detail. The tests wanting to use that file
+# should put all there object files in an explicitely named sub-directory
+# of Inputs, and they should be named 1.o, 2.o, ...
+# As not finding an object file or symbols isn't a fatal error for dsymutil,
+# you can extend this file with as much object files and symbols as needed.
+
+---
+triple:          'x86_64-apple-darwin'
+objects:
+  - filename: 1.o
+    symbols:
+      - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x10000, size: 0x10 }
+  - filename: 2.o
+    symbols:
+      - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x20000, size: 0x10 }
+  - filename: 3.o
+    symbols:
+      - { sym: __Z3foov, objAddr: 0x0, binAddr: 0x30000, size: 0x10 }
+      - { sym: __ZN1S3bazIiEEvT_, objAddr: 0x0, binAddr: 0x30010, size: 0x10 }
+...
+
diff --git a/test/tools/dsymutil/X86/empty_range.s b/test/tools/dsymutil/X86/empty_range.s
new file mode 100644
index 000000000000..dfe734f1b2bb
--- /dev/null
+++ b/test/tools/dsymutil/X86/empty_range.s
@@ -0,0 +1,61 @@
+# This test verifies that an empty range list in the .debug_ranges section
+# doesn't crash llvm-dsymutil. As clang does not produce this kind of debug
+# info anymore, we used this hand-crafted assembly file to produce a testcase
+# Compile with:
+#        llvm-mc -triple x86_64-apple-darwin -filetype=obj -o 1.o empty_range.o
+
+# RUN: llvm-dsymutil -f -y %p/dummy-debug-map.map -oso-prepend-path %p/../Inputs/empty_range -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+        .section	__TEXT,__text,regular,pure_instructions
+	.macosx_version_min 10, 11
+	.globl	__Z3foov
+	.align	4, 0x90
+__Z3foov:                               ## @_Z3foov
+Lfunc_begin0:
+	pushq	%rbp
+	movq	%rsp, %rbp
+	popq	%rbp
+	retq
+Lfunc_end0:
+	.section	__DWARF,__debug_abbrev,regular,debug
+Lsection_abbrev:
+	.byte	1                       ## Abbreviation Code
+	.byte	17                      ## DW_TAG_compile_unit
+	.byte	1                       ## DW_CHILDREN_yes
+	.byte	0                       ## EOM(1)
+	.byte	0                       ## EOM(2)
+	.byte	2                       ## Abbreviation Code
+	.byte	46                      ## DW_TAG_subprogram
+	.byte	0                       ## DW_CHILDREN_no
+	.byte	17                      ## DW_AT_low_pc
+	.byte	1                       ## DW_FORM_addr
+        .byte   0x55                    ## DW_AT_ranges
+        .byte   6                       ## DW_FORM_data4
+	.byte	0                       ## EOM(1)
+	.byte	0                       ## EOM(2)
+	.byte	0                       ## EOM(3)
+	.section	__DWARF,__debug_info,regular,debug
+Lsection_info:
+	.long	22                      ## Length of Unit
+	.short	2                       ## DWARF version number
+	.long	0                       ## Offset Into Abbrev. Section
+	.byte	8                       ## Address Size (in bytes)
+	.byte	1                       ## Abbrev [1]  DW_TAG_compile_unit
+	.byte	2                       ## Abbrev [2] DW_TAG_subprogram
+	.quad	Lfunc_begin0            ## DW_AT_low_pc
+        .long   0                       ## DW_AT_ranges (pointing at an empty entry)
+	.byte	0                       ## End Of Children Mark
+	.section	__DWARF,__debug_ranges,regular,debug
+Ldebug_range:
+        .long 0
+        .long 0
+
+# CHECK:  DW_TAG_compile_unit
+# CHECK:    DW_TAG_subprogram
+# CHECK-NEXT:      DW_AT_low_pc{{.*}}(0x0000000000010000)
+# CHECK-NEXT:      DW_AT_ranges{{.*}}(0x00000000)
+
+# There was a bug that would use the currently active object file when a
+# debug map object isn't found. Check that we only linked one file.
+# CHECK-NOT: DW_TAG_compile_unit
+
diff --git a/test/tools/dsymutil/X86/fat-archive-input-i386.test b/test/tools/dsymutil/X86/fat-archive-input-i386.test
new file mode 100644
index 000000000000..f4ea288768c8
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-archive-input-i386.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple:          'i386-apple-darwin'
+objects:
+  - filename: libfat-test.a(fat-test.o)
+    symbols:
+      - { sym: _i386_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK:  DW_AT_name{{.*}}"i386_var"
+
diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64.test b/test/tools/dsymutil/X86/fat-object-input-x86_64.test
new file mode 100644
index 000000000000..cdd5a4c08d9c
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-object-input-x86_64.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple:          'x86_64-apple-darwin'
+objects:
+  - filename: fat-test.o
+    symbols:
+      - { sym: _x86_64_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK:  DW_AT_name{{.*}}"x86_64_var"
+
diff --git a/test/tools/dsymutil/X86/fat-object-input-x86_64h.test b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test
new file mode 100644
index 000000000000..53aed1ec4443
--- /dev/null
+++ b/test/tools/dsymutil/X86/fat-object-input-x86_64h.test
@@ -0,0 +1,16 @@
+# REQUIRES: object-emission
+# RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs -y %s -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+---
+triple:          'x86_64h-apple-darwin'
+objects:
+  - filename: fat-test.o
+    symbols:
+      - { sym: _x86_64h_var, objAddr: 0x0, binAddr: 0x1000, size: 0x4 }
+...
+
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_variable
+# CHECK-NOT: {{DW_TAG|NULL}}
+# CHECK:  DW_AT_name{{.*}}"x86_64h_var"
+
diff --git a/test/tools/dsymutil/X86/frame-1.test b/test/tools/dsymutil/X86/frame-1.test
index 7852e68a142a..27bc17d75984 100644
--- a/test/tools/dsymutil/X86/frame-1.test
+++ b/test/tools/dsymutil/X86/frame-1.test
@@ -2,14 +2,14 @@
 # RUN: rm -rf %t
 # RUN: mkdir -p %t
 # RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
-# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
 
 # This test is meant to verify that identical CIEs will get reused
 # in the same file but also inbetween files. For this to happen, we
 # link twice the same file using this made-up debug map:
 
 ---
-triple:          'i386-unknown-unknown-macho'
+triple:          'i386-apple-darwin'
 objects:
   - filename: frame-dw2.o
     symbols:
diff --git a/test/tools/dsymutil/X86/frame-2.test b/test/tools/dsymutil/X86/frame-2.test
index 168e342a4f74..89a7670f86f0 100644
--- a/test/tools/dsymutil/X86/frame-2.test
+++ b/test/tools/dsymutil/X86/frame-2.test
@@ -3,7 +3,7 @@
 # RUN: mkdir -p %t
 # RUN: llc -filetype=obj %p/../Inputs/frame-dw2.ll -o %t/frame-dw2.o
 # RUN: llc -filetype=obj %p/../Inputs/frame-dw4.ll -o %t/frame-dw4.o
-# RUN: llvm-dsymutil -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
+# RUN: llvm-dsymutil -f -oso-prepend-path=%t -y %s -o - | llvm-dwarfdump -debug-dump=frames - | FileCheck %s
 
 # Check the handling of multiple different CIEs. To have CIEs that
 # appear to be different, use a dwarf2 version of the file along with
@@ -12,7 +12,7 @@
 # appears again. This is a behavior we inherited from dsymutil-classic
 # but this should be fixed (see comment in patchFrameInfoForObject())
 ---
-triple:          'i386-unknown-unknown-macho'
+triple:          'i386-apple-darwin'
 objects:
   - filename: frame-dw2.o
     symbols:
diff --git a/test/tools/dsymutil/X86/lit.local.cfg b/test/tools/dsymutil/X86/lit.local.cfg
index c8625f4d9d24..05f8b38b3346 100644
--- a/test/tools/dsymutil/X86/lit.local.cfg
+++ b/test/tools/dsymutil/X86/lit.local.cfg
@@ -1,2 +1,4 @@
 if not 'X86' in config.root.targets:
     config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.m', '.s']
diff --git a/test/tools/dsymutil/X86/mismatch.m b/test/tools/dsymutil/X86/mismatch.m
new file mode 100644
index 000000000000..33ae782df057
--- /dev/null
+++ b/test/tools/dsymutil/X86/mismatch.m
@@ -0,0 +1,23 @@
+/* Compile with:
+   cat >modules.modulemap <<EOF
+     module mismatch {
+       header "mismatch.h"
+     }
+   EOF
+   echo "struct s;"> mismatch.h
+   clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+      -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+      -fdisable-module-hash mismatch.m -o 1.o
+   echo > mismatch.h
+   clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+      -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+      -fdisable-module-hash mismatch.m -o /dev/null
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/mismatch \
+// RUN:   -y %p/dummy-debug-map.map -o %t.bin 2>&1 | FileCheck %s
+
+@import mismatch;
+
+void f() {}
+// CHECK: warning: hash mismatch
diff --git a/test/tools/dsymutil/X86/modules.m b/test/tools/dsymutil/X86/modules.m
new file mode 100644
index 000000000000..046a8c1304a0
--- /dev/null
+++ b/test/tools/dsymutil/X86/modules.m
@@ -0,0 +1,117 @@
+/* Compile with:
+   cat >modules.modulemap <<EOF
+     module Foo {
+       header "Foo.h"
+       export *
+     }
+     module Bar {
+       header "Bar.h"
+       export *
+     }
+EOF
+   clang -D BAR_H -E -o Bar.h modules.m
+   clang -D FOO_H -E -o Foo.h modules.m
+   clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+     -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+     -fdisable-module-hash modules.m -o 1.o
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/modules \
+// RUN:   -y %p/dummy-debug-map.map -o - \
+// RUN:     | llvm-dwarfdump --debug-dump=info - | FileCheck %s
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/modules -y \
+// RUN:   %p/dummy-debug-map.map -o %t 2>&1 | FileCheck --check-prefix=WARN %s
+
+// WARN-NOT: warning: hash mismatch
+
+// ---------------------------------------------------------------------
+#ifdef BAR_H
+// ---------------------------------------------------------------------
+// CHECK:            DW_TAG_compile_unit
+// CHECK-NOT:        DW_TAG
+// CHECK:              DW_TAG_module
+// CHECK-NEXT:           DW_AT_name{{.*}}"Bar"
+// CHECK: 0x0[[BAR:.*]]: DW_TAG_structure_type
+// CHECK-NOT:              DW_TAG
+// CHECK:                  DW_AT_name {{.*}}"Bar"
+// CHECK-NOT:              DW_TAG
+// CHECK:                  DW_TAG_member
+// CHECK:                    DW_AT_name {{.*}}"value"
+// CHECK:                DW_TAG_structure_type
+// CHECK-NOT:              DW_TAG
+// CHECK:                  DW_AT_name {{.*}}"PruneMeNot"
+
+struct Bar {
+  int value;
+};
+
+struct PruneMeNot;
+
+#else
+// ---------------------------------------------------------------------
+#ifdef FOO_H
+// ---------------------------------------------------------------------
+// CHECK:               DW_TAG_compile_unit
+// CHECK-NOT:             DW_TAG
+// CHECK: 0x0[[FOO:.*]]:  DW_TAG_module
+// CHECK-NEXT:              DW_AT_name{{.*}}"Foo"
+// CHECK-NOT:               DW_TAG
+// CHECK:                   DW_TAG_typedef
+// CHECK-NOT:                 DW_TAG
+// CHECK:                     DW_AT_type [DW_FORM_ref_addr] (0x{{0*}}[[BAR]])
+// CHECK:                   DW_TAG_structure_type
+// CHECK-NEXT:                DW_AT_name{{.*}}"S"
+// CHECK-NOT:                 DW_TAG
+// CHECK: 0x0[[INTERFACE:.*]]: DW_TAG_structure_type
+// CHECK-NEXT:                DW_AT_name{{.*}}"Foo"
+
+@import Bar;
+typedef struct Bar Bar;
+struct S {};
+
+@interface Foo {
+  int ivar;
+}
+@end
+
+// ---------------------------------------------------------------------
+#else
+// ---------------------------------------------------------------------
+
+// CHECK:  DW_TAG_compile_unit
+// CHECK:    DW_AT_low_pc
+// CHECK-NOT:DW_TAG
+// CHECK:     DW_TAG_module
+// CHECK-NEXT:  DW_AT_name{{.*}}"Foo"
+// CHECK-NOT:   DW_TAG
+// CHECK:       DW_TAG_typedef
+// CHECK-NOT:     DW_TAG
+// CHECK:       NULL
+//
+// CHECK:   DW_TAG_imported_declaration
+// CHECK-NOT: DW_TAG
+// CHECK:     DW_AT_import [DW_FORM_ref_addr] (0x{{0*}}[[FOO]]
+//
+// CHECK:   DW_TAG_subprogram
+// CHECK:     DW_AT_name {{.*}}"main"
+//
+// CHECK:     DW_TAG_variable
+// CHECK:     DW_TAG_variable
+// CHECK-NOT:   DW_TAG
+// CHECK:       DW_AT_name{{.*}}"foo"
+// CHECK-NOT:   DW_TAG
+// CHECK:       DW_AT_type {{.*}}{0x{{0*}}[[PTR:.*]]}
+//
+// CHECK: 0x{{0*}}[[PTR]]: DW_TAG_pointer_type
+// CHECK-NEXT   DW_AT_type [DW_FORM_ref_addr] {0x{{0*}}[[INTERFACE]])
+
+@import Foo;
+int main(int argc, char **argv) {
+  Bar bar;
+  Foo *foo = 0;
+  bar.value = 42;
+  return bar.value;
+}
+#endif
+#endif
diff --git a/test/tools/dsymutil/X86/multiple-inputs.test b/test/tools/dsymutil/X86/multiple-inputs.test
new file mode 100644
index 000000000000..01b4f1bf52b5
--- /dev/null
+++ b/test/tools/dsymutil/X86/multiple-inputs.test
@@ -0,0 +1,31 @@
+RUN: rm -rf %T/multiple-inputs
+RUN: mkdir -p %T/multiple-inputs
+
+RUN: cat %p/../Inputs/basic.macho.x86_64 > %T/multiple-inputs/basic.macho.x86_64
+RUN: cat %p/../Inputs/basic-archive.macho.x86_64 > %T/multiple-inputs/basic-archive.macho.x86_64
+RUN: cat %p/../Inputs/basic-lto.macho.x86_64 > %T/multiple-inputs/basic-lto.macho.x86_64
+RUN: cat %p/../Inputs/basic-lto-dw4.macho.x86_64 > %T/multiple-inputs/basic-lto-dw4.macho.x86_64
+
+# Multiple inputs in flat mode
+RUN: llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64
+RUN: llvm-dwarfdump %T/multiple-inputs/basic.macho.x86_64.dwarf \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-archive.macho.x86_64.dwarf \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto.macho.x86_64.dwarf | FileCheck %S/basic-lto-linking-x86.test
+RUN: llvm-dwarfdump %T/multiple-inputs/basic-lto-dw4.macho.x86_64.dwarf | FileCheck %S/basic-lto-dw4-linking-x86.test
+
+# Multiple inputs that end up in the same named bundle
+RUN: llvm-dsymutil -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-archive.macho.x86_64 \
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto.macho.x86_64 | FileCheck %S/basic-lto-linking-x86.test
+RUN: llvm-dwarfdump %t.dSYM/Contents/Resources/DWARF/basic-lto-dw4.macho.x86_64 | FileCheck %S/basic-lto-dw4-linking-x86.test
+
+# Multiple inputs in a named bundle in flat mode... impossible.
+RUN: not llvm-dsymutil -f -oso-prepend-path=%p/.. %T/multiple-inputs/basic.macho.x86_64 %T/multiple-inputs/basic-archive.macho.x86_64 %T/multiple-inputs/basic-lto.macho.x86_64 %T/multiple-inputs/basic-lto-dw4.macho.x86_64 -o %t.dSYM 2>&1 | FileCheck %s
+
+CHECK: error: cannot use -o with multiple inputs in flat mode
+
diff --git a/test/tools/dsymutil/X86/odr-anon-namespace.cpp b/test/tools/dsymutil/X86/odr-anon-namespace.cpp
new file mode 100644
index 000000000000..a66fc830b67a
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-anon-namespace.cpp
@@ -0,0 +1,65 @@
+/* Compile with:
+   for FILE in `seq 2`; do
+     clang -g -c  odr-anon-namespace.cpp -DFILE$FILE -o odr-anon-namespace/$FILE.o
+   done
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-anon-namespace -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+#ifdef FILE1
+// Currently llvm-dsymutil will unique the contents of anonymous
+// namespaces if they are from the same file/line. Force this
+// namespace to appear different eventhough it's the same (this
+// uniquing is actually a bug kept for backward compatibility, see the
+// comments in DeclContextTree::getChildDeclContext()).
+#line 42
+#endif
+namespace {
+class C {};
+}
+
+void foo() {
+  C c;
+}
+
+// Keep the ifdef guards for FILE1 and FILE2 even if all the code is
+// above to clearly show what the CHECK lines are testing.
+#ifdef FILE1
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp"
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"c"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE1:[0-9a-f]*]]
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[C_FILE1]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"C"
+
+#elif defined(FILE2)
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-anon-namespace.cpp"
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"c"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}0x00000000[[C_FILE2:[0-9a-f]*]]
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[C_FILE2]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"C"
+
+#else
+#error "You must define which file you generate"
+#endif
diff --git a/test/tools/dsymutil/X86/odr-member-functions.cpp b/test/tools/dsymutil/X86/odr-member-functions.cpp
new file mode 100644
index 000000000000..737d5a7abf61
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-member-functions.cpp
@@ -0,0 +1,109 @@
+/* Compile with:
+   for FILE in `seq 3`; do
+     clang -g -c  odr-member-functions.cpp -DFILE$FILE -o odr-member-functions/$FILE.o
+   done
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-member-functions -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck %s
+
+struct S {
+  __attribute__((always_inline)) void foo() { bar(); }
+  __attribute__((always_inline)) void foo(int i) { if (i) bar(); }
+  void bar();
+
+  template<typename T> void baz(T t) {}
+};
+
+#ifdef FILE1
+void foo() {
+  S s;
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: {{DW_TAG|NULL}}
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NOT: {{DW_TAG|NULL}}
+// CHECK: DW_AT_name{{.*}}"S"
+// CHECK-NOT: NULL
+// CHECK: 0x[[FOO:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEv"
+// CHECK: NULL
+// CHECK: 0x[[FOOI:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_MIPS_linkage_name{{.*}}"_ZN1S3fooEi"
+
+#elif defined(FILE2)
+void foo() {
+  S s;
+  // Check that the overloaded member functions are resolved correctly
+  s.foo();
+  s.foo(1);
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// Normal member functions should be desribed by the type in the first
+// CU, thus we should be able to reuse its definition and avoid
+// reemiting it.
+// CHECK-NOT: DW_TAG_structure_type
+
+// CHECK: 0x[[FOO_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_specification{{.*}}[[FOO]]
+// CHECK-NOT: DW_TAG_structure_type
+// CHECK: 0x[[FOOI_SUB:[0-9a-f]*]]:{{.*}}DW_TAG_subprogram
+// CHECK-NEXT: DW_AT_specification{{.*}}[[FOOI]]
+// CHECK-NOT: DW_TAG_structure_type
+
+// CHECK: DW_TAG_variable
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"s"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_type {{.*}}[[S]])
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOO_SUB]]
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_call_line{{.*}}40
+// CHECK: DW_TAG_inlined_subroutine
+// CHECK-NEXT: DW_AT_abstract_origin{{.*}}[[FOOI_SUB]]
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_call_line{{.*}}41
+
+#elif defined(FILE3)
+void foo() {
+  S s;
+  s.baz<int>(42);
+}
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-member-functions.cpp"
+
+// Template or other implicit members will be included in the type
+// only if they are generated. Thus actually creating a new type.
+// CHECK: DW_TAG_structure_type
+
+// Skip 'normal' member functions
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_TAG_subprogram
+// CHECK: DW_TAG_subprogram
+
+// This is the 'baz' member
+// CHECK: 0x[[BAZ:[0-9a-f]*]]: DW_TAG_subprogram
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_MIPS_linkage_name {{.*}}"_ZN1S3bazIiEEvT_"
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name {{.*}}"baz<int>"
+
+// Skip foo3
+// CHECK: DW_TAG_subprogram
+
+// baz instanciation:
+// CHECK: DW_TAG_subprogram
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_specification {{.*}}[[BAZ]] "_ZN1S3bazIiEEvT_"
+#else
+#error "You must define which file you generate"
+#endif
diff --git a/test/tools/dsymutil/X86/odr-uniquing.cpp b/test/tools/dsymutil/X86/odr-uniquing.cpp
new file mode 100644
index 000000000000..bb7ae50a2c72
--- /dev/null
+++ b/test/tools/dsymutil/X86/odr-uniquing.cpp
@@ -0,0 +1,187 @@
+/* Compile with:
+   clang -g -c  odr-uniquing.cpp -o odr-uniquing/1.o
+   cp odr-uniquing/1.o odr-uniquing/2.o
+   The aim of these test is to check that all the 'type types' that
+   should be uniqued through the ODR really are.
+   
+   The resulting object file is linked against itself using a fake
+   debug map. The end result is:
+    - with ODR uniquing: all types (expect for the union for now) in
+   the second CU should point back to the types of the first CU.
+    - without ODR uniquing: all types are re-emited in the second CU
+ */
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=ODR -check-prefix=CHECK %s
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -no-odr -o - | llvm-dwarfdump -debug-dump=info - | FileCheck -check-prefix=NOODR -check-prefix=CHECK %s
+
+// The first compile unit contains all the types:
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-uniquing.cpp"
+
+struct S {
+  struct Nested {};
+};
+
+// CHECK: 0x[[S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NEXT: DW_AT_name{{.*}}"S"
+// CHECK-NOT: NULL
+// CHECK: 0x[[NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK-NOT: DW_TAG
+// CHECK: DW_AT_name{{.*}}"Nested"
+// CHECK: NULL
+
+namespace N {
+class C {};
+}
+
+// CHECK: DW_TAG_namespace
+// CHECK-NEXT: DW_AT_name{{.*}}"N"
+// CHECK-NOT: NULL
+// CHECK: 0x[[NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NEXT: DW_AT_name{{.*}}"C"
+// CHECK: NULL
+
+union U {
+  class C {} C;
+  struct S {} S;
+};
+
+// CHECK:  0x[[U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type
+// CHECK-NEXT: DW_AT_name{{.*}}"U"
+// CHECK-NOT: NULL
+// CHECK:  0x[[UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NOT: NULL
+// CHECK:  0x[[US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// CHECK: NULL
+
+typedef S AliasForS;
+
+// CHECK: 0x[[ALIASFORS:[0-9a-f]*]]:{{.*}}DW_TAG_typedef
+// CHECK-NEXT: DW_AT_type{{.*}}[[S]]
+// CHECK-NEXT: DW_AT_name{{.*}}"AliasForS"
+
+namespace {
+class AnonC {};
+}
+
+// CHECK: DW_TAG_namespace
+// CHECK-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// CHECK: 0x[[ANONC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// CHECK-NEXT: DW_AT_name{{.*}}"AnonC"
+
+// This function is only here to hold objects that refer to the above types.
+void foo() {
+  AliasForS s;
+  S::Nested n;
+  N::C nc;
+  AnonC ac;
+  U u;
+}
+
+// The second CU contents depend on wether we disabled ODR uniquing or
+// not.
+
+// CHECK: TAG_compile_unit
+// CHECK-NOT: DW_TAG
+// CHECK: AT_name{{.*}}"odr-uniquing.cpp"
+
+// The union itself is not uniqued for now (for dsymutil-compatibility),
+// but the types defined inside it should be.
+// ODR: DW_TAG_union_type
+// ODR-NEXT: DW_AT_name{{.*}}"U"
+// ODR: DW_TAG_member
+// ODR-NEXT: DW_AT_name{{.*}}"C"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[UC]]
+// ODR: DW_TAG_member
+// ODR-NEXT: DW_AT_name{{.*}}"S"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[US]]
+
+// Check that the variables point to the right type
+// ODR: DW_TAG_subprogram
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"foo"
+// ODR-NOT: NULL
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"s"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[ALIASFORS]]
+// ODR: DW_AT_name{{.*}}"n"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[NESTED]]
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"nc"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[NC]]
+// ODR: DW_TAG_variable
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_name{{.*}}"ac"
+// ODR-NOT: DW_TAG
+// ODR: DW_AT_type{{.*}}[[ANONC]]
+
+// With no ODR uniquing, we should get copies of all the types:
+
+// This is "struct S"
+// NOODR: 0x[[DUP_S:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR-NEXT: DW_AT_name{{.*}}"S"
+// NOODR-NOT: NULL
+// NOODR: 0x[[DUP_NESTED:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"Nested"
+
+// This is "class N::C"
+// NOODR: DW_TAG_namespace
+// NOODR-NEXT: DW_AT_name{{.*}}"N"
+// NOODR: 0x[[DUP_NC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// NOODR-NEXT: DW_AT_name{{.*}}"C"
+
+// This is "union U"
+// NOODR:  0x[[DUP_U:[0-9a-f]*]]:{{.*}}DW_TAG_union_type
+// NOODR-NEXT: DW_AT_name{{.*}}"U"
+// NOODR-NOT: NULL
+// NOODR:  0x[[DUP_UC:[0-9a-f]*]]:{{.*}}DW_TAG_class_type
+// NOODR-NOT: NULL
+// NOODR:  0x[[DUP_US:[0-9a-f]*]]:{{.*}}DW_TAG_structure_type
+// NOODR: NULL
+
+// Check that the variables point to the right type
+// NOODR: DW_TAG_subprogram
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"foo"
+// NOODR-NOT: NULL
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"s"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}0x[[DUP_ALIASFORS:[0-9a-f]*]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"n"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}[[DUP_NESTED]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"nc"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}[[DUP_NC]]
+// NOODR: DW_TAG_variable
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"ac"
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_type{{.*}}0x[[DUP_ANONC:[0-9a-f]*]]
+
+// This is "AliasForS"
+// NOODR: 0x[[DUP_ALIASFORS]]:{{.*}}DW_TAG_typedef
+// NOODR-NOT: DW_TAG
+// NOODR: DW_AT_name{{.*}}"AliasForS"
+
+// This is "(anonymous namespace)::AnonC"
+// NOODR: DW_TAG_namespace
+// NOODR-NOT: {{DW_AT_name|NULL|DW_TAG}}
+// NOODR: 0x[[DUP_ANONC]]:{{.*}}DW_TAG_class_type
+// NOODR-NEXT: DW_AT_name{{.*}}"AnonC"
+
diff --git a/test/tools/dsymutil/X86/submodules.m b/test/tools/dsymutil/X86/submodules.m
new file mode 100644
index 000000000000..b2425a91cbda
--- /dev/null
+++ b/test/tools/dsymutil/X86/submodules.m
@@ -0,0 +1,52 @@
+/* Compile with:
+   cat >modules.modulemap <<EOF
+   module Parent {
+     module Child {
+       header "Child.h"
+     }
+     module Empty {
+       header "Empty.h"
+     }
+   }
+EOF
+   clang -D CHILD_H -E -o Child.h submodules.m
+   touch empty.h
+   clang -cc1 -emit-obj -fmodules -fmodule-map-file=modules.modulemap \
+     -fmodule-format=obj -g -dwarf-ext-refs -fmodules-cache-path=. \
+     -fdisable-module-hash submodules.m -o 1.o
+*/
+
+// RUN: llvm-dsymutil -f -oso-prepend-path=%p/../Inputs/submodules \
+// RUN:   -y %p/dummy-debug-map.map -o - \
+// RUN:     | llvm-dwarfdump --debug-dump=info - | FileCheck %s
+
+// ---------------------------------------------------------------------
+#ifdef CHILD_H
+// ---------------------------------------------------------------------
+
+// CHECK:            DW_TAG_compile_unit
+// CHECK-NOT:        DW_TAG
+// CHECK:              DW_TAG_module
+// CHECK-NEXT:           DW_AT_name{{.*}}"Parent"
+// CHECK:                DW_TAG_module
+// CHECK-NEXT:             DW_AT_name{{.*}}"Child"
+// CHECK:                  DW_TAG_structure_type
+// CHECK-NOT:                DW_TAG
+// CHECK:                    DW_AT_name {{.*}}"PruneMeNot"
+struct PruneMeNot;
+
+// ---------------------------------------------------------------------
+#else
+// ---------------------------------------------------------------------
+
+// CHECK:            DW_TAG_compile_unit
+// CHECK:              DW_TAG_module
+// CHECK-NEXT:           DW_AT_name{{.*}}"Parent"
+// CHECK: 0x0[[EMPTY:.*]]: DW_TAG_module
+// CHECK-NEXT:             DW_AT_name{{.*}}"Empty"
+
+// CHECK:     DW_AT_import  {{.*}}0x{{0*}}[[EMPTY]]
+@import Parent.Child;
+@import Parent.Empty;
+int main(int argc, char **argv) { return 0; }
+#endif
diff --git a/test/tools/dsymutil/absolute_symbol.test b/test/tools/dsymutil/absolute_symbol.test
new file mode 100644
index 000000000000..cdd6ae832136
--- /dev/null
+++ b/test/tools/dsymutil/absolute_symbol.test
@@ -0,0 +1,16 @@
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path %p %p/Inputs/absolute_sym.macho.i386 | FileCheck %s
+
+The tested object file has been created by the dummy Objective-C code:
+@interface Foo
+@end
+
+@implementation Foo
+@end
+
+int main() { return 0; }
+
+compiled for i386. This create an absolute symbol .objc_class_name_Foo
+We must not consider this symbol for debug info linking as its address
+might conflict with other real symbols in the same file.
+
+CHECK-NOT: objc_class_name_Foo
diff --git a/test/tools/dsymutil/arch-option.test b/test/tools/dsymutil/arch-option.test
new file mode 100644
index 000000000000..0920efb529d6
--- /dev/null
+++ b/test/tools/dsymutil/arch-option.test
@@ -0,0 +1,39 @@
+Processing of the -arch option happens at debug map parsing time, thus just
+looking at the dumped debug maps is enough to validate their effects.
+
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch all | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch='*' | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 | FileCheck %s -check-prefix=ARM64 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm | FileCheck %s -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch armv7 | FileCheck %s -check-prefix=ARMV7 -check-prefix=CHECK
+RUN: llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 -arch armv7s | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=CHECK
+RUN: not llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm42 2>&1 | FileCheck %s -check-prefix=BADARCH
+RUN: not llvm-dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch i386 2>&1 | FileCheck %s -check-prefix=EMPTY
+
+
+ARMV7: ---
+ARMV7-NOT: ...
+ARMV7: triple:          'thumbv7-apple-darwin'
+ARMV7-NOT: ...
+ARMV7: sym: _armv7_var
+ARMV7-NOT: ---
+
+ARMV7S: ---
+ARMV7S-NOT: ...
+ARMV7S: triple:          'thumbv7s-apple-darwin'
+ARMV7S-NOT: ...
+ARMV7S: sym: _armv7s_var
+ARMV7S-NOT: ---
+
+ARM64: ---
+ARM64-NOT: ...
+ARM64: triple:          'arm64-apple-darwin'
+ARM64-NOT: ...
+ARM64: sym: _arm64_var
+ARM64-NOT: ---
+
+CHECK: ...
+
+BADARCH: error: Unsupported cpu architecture: 'arm42'
+EMPTY: error: no architecture to link
diff --git a/test/tools/dsymutil/archive-timestamp.test b/test/tools/dsymutil/archive-timestamp.test
new file mode 100644
index 000000000000..f3f2162fa595
--- /dev/null
+++ b/test/tools/dsymutil/archive-timestamp.test
@@ -0,0 +1,24 @@
+# RUN: llvm-dsymutil -no-output -oso-prepend-path=%p -y %s 2>&1 | FileCheck %s
+
+# This is the archive member part of basic-archive.macho.x86_64 debug map with corrupted timestamps.
+
+# CHECK: warning: {{.*}}libbasic.a(basic2.macho.x86_64.o): {{[Nn]o}} such file
+# CHECK: warning: {{.*}}libbasic.a(basic3.macho.x86_64.o): {{[Nn]o}} such file
+
+---
+triple:          'x86_64-apple-darwin'
+objects:
+  - filename:        '/Inputs/libbasic.a(basic2.macho.x86_64.o)'
+    timestamp:       141869239
+    symbols:
+      - { sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050 }
+      - { sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000 }
+      - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F20, size: 0x00000017 }
+      - { sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000 }
+  - filename:        '/Inputs/libbasic.a(basic3.macho.x86_64.o)'
+    timestamp:       418692393
+    symbols:
+      - { sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000 }
+      - { sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050 }
+      - { sym: _inc, objAddr: 0x0000000000000070, binAddr: 0x0000000100000F90, size: 0x00000019 }
+...
diff --git a/test/tools/dsymutil/basic-linking.test b/test/tools/dsymutil/basic-linking.test
index ec6a5b771461..bff5b5df9e66 100644
--- a/test/tools/dsymutil/basic-linking.test
+++ b/test/tools/dsymutil/basic-linking.test
@@ -1,6 +1,7 @@
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
-RUN: llvm-dsymutil -no-output -v -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE 
 
 This test check the basic Dwarf linking process through the debug dumps.
 
diff --git a/test/tools/dsymutil/debug-map-parsing.test b/test/tools/dsymutil/debug-map-parsing.test
index 5091dfbfc24d..2b9d0917609d 100644
--- a/test/tools/dsymutil/debug-map-parsing.test
+++ b/test/tools/dsymutil/debug-map-parsing.test
@@ -1,15 +1,16 @@
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
-RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
-RUN: llvm-dsymutil -v -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
-RUN: not llvm-dsymutil -v -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
+RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
+RUN: llvm-dsymutil -verbose -dump-debug-map -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
+RUN: llvm-dsymutil -dump-debug-map %p/Inputs/basic.macho.x86_64 2>&1 | FileCheck %s --check-prefix=NOT-FOUND
+RUN: not llvm-dsymutil -dump-debug-map %p/Inputs/inexistant 2>&1 | FileCheck %s --check-prefix=NO-EXECUTABLE
 
 
 Check that We can parse the debug map of the basic executable.
 
 CHECK-NOT: error
 CHECK: ---
-CHECK: triple: 'x86_64-unknown-unknown-macho'
+CHECK: triple: 'x86_64-apple-darwin'
+CHECK: binary-path:{{.*}}/Inputs/basic.macho.x86_64
 CHECK: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
 CHECK-DAG: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
 CHECK: filename{{.*}}/Inputs/basic2.macho.x86_64.o
@@ -28,7 +29,8 @@ Check that we can parse the debug-map of the basic-lto executable
 
 CHECK-LTO-NOT: error
 CHECK-LTO: ---
-CHECK-LTO: triple: 'x86_64-unknown-unknown-macho'
+CHECK-LTO: triple: 'x86_64-apple-darwin'
+CHECK-LTO: binary-path:{{.*}}/Inputs/basic-lto.macho.x86_64
 CHECK-LTO: /Inputs/basic-lto.macho.x86_64.o
 CHECK-LTO-DAG: 	sym: _bar, objAddr: 0x0000000000000050, binAddr: 0x0000000100000F90, size: 0x00000024
 CHECK-LTO-DAG: 	sym: _baz, objAddr: 0x0000000000000658, binAddr: 0x0000000100001000, size: 0x00000000
@@ -51,7 +53,8 @@ CHECK-ARCHIVE-NEXT: 	found member in current archive.
 CHECK-ARCHIVE-NEXT: trying to open {{.*}}/libbasic.a(basic3.macho.x86_64.o)'
 CHECK-ARCHIVE-NEXT: 	found member in current archive.
 CHECK-ARCHIVE: ---
-CHECK-ARCHIVE: triple: 'x86_64-unknown-unknown-macho'
+CHECK-ARCHIVE: triple: 'x86_64-apple-darwin'
+CHECK-ARCHIVE: binary-path:{{.*}}/Inputs/basic-archive.macho.x86_64
 CHECK-ARCHIVE: /Inputs/basic1.macho.x86_64.o
 CHECK-ARCHIVE-DAG: 	sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
 CHECK-ARCHIVE: /Inputs/./libbasic.a(basic2.macho.x86_64.o)
@@ -72,7 +75,8 @@ NOT-FOUND: cannot open{{.*}}"/Inputs/basic1.macho.x86_64.o": {{[Nn]o}} such file
 NOT-FOUND: cannot open{{.*}}"/Inputs/basic2.macho.x86_64.o": {{[Nn]o}} such file
 NOT-FOUND: cannot open{{.*}}"/Inputs/basic3.macho.x86_64.o": {{[Nn]o}} such file
 NOT-FOUND: ---
-NOT-FOUND-NEXT: triple: 'x86_64-unknown-unknown-macho'
+NOT-FOUND-NEXT: triple: 'x86_64-apple-darwin'
+NOT-FOUND-NEXT: binary-path:{{.*}}/Inputs/basic.macho.x86_64
 NOT-FOUND-NEXT: ...
 
 Check that we correctly error out on invalid executatble.
diff --git a/test/tools/dsymutil/dump-symtab.test b/test/tools/dsymutil/dump-symtab.test
new file mode 100644
index 000000000000..b83ac7f7ad17
--- /dev/null
+++ b/test/tools/dsymutil/dump-symtab.test
@@ -0,0 +1,44 @@
+RUN: llvm-dsymutil -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=ALL -check-prefix=I386 %s 
+RUN: llvm-dsymutil -arch i386 -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=I386 -check-prefix=ONE %s
+
+
+ALL:  ----------------------------------------------------------------------
+ALL-NEXT:  Symbol table for: '{{.*}}fat-test.dylib' (x86_64)
+ALL-NEXT:  ----------------------------------------------------------------------
+ALL-NEXT:  Index    n_strx   n_type             n_sect n_desc n_value
+ALL-NEXT:  ======== -------- ------------------ ------ ------ ----------------
+ALL-NEXT:  [     0] 00000002 64 (N_SO         ) 00     0000   0000000000000000 '/Inputs/'
+ALL-NEXT:  [     1] 0000000b 64 (N_SO         ) 00     0000   0000000000000000 'fat-test.c'
+ALL-NEXT:  [     2] 00000016 66 (N_OSO        ) 03     0001   0000000055b1d0b9 '/Inputs/fat-test.o'
+ALL-NEXT:  [     3] 00000029 20 (N_GSYM       ) 00     0000   0000000000000000 '_x86_64_var'
+ALL-NEXT:  [     4] 00000001 64 (N_SO         ) 01     0000   0000000000000000
+ALL-NEXT:  [     5] 00000035 0f (     SECT EXT) 02     0000   0000000000001000 '_x86_64_var'
+ALL-NEXT:  [     6] 00000041 01 (     UNDF EXT) 00     0100   0000000000000000 'dyld_stub_binder'
+
+I386: ----------------------------------------------------------------------
+I386-NEXT: Symbol table for: '{{.*}}fat-test.dylib' (i386)
+I386-NEXT: ----------------------------------------------------------------------
+I386-NEXT: Index    n_strx   n_type             n_sect n_desc n_value
+I386-NEXT: ======== -------- ------------------ ------ ------ ----------------
+I386-NEXT: [     0] 00000002 64 (N_SO         ) 00     0000   0000000000000000 '/Inputs/'
+I386-NEXT: [     1] 0000000b 64 (N_SO         ) 00     0000   0000000000000000 'fat-test.c'
+I386-NEXT: [     2] 00000016 66 (N_OSO        ) 03     0001   0000000055b1d0b9 '/Inputs/fat-test.o'
+I386-NEXT: [     3] 00000029 20 (N_GSYM       ) 00     0000   0000000000000000 '_i386_var'
+I386-NEXT: [     4] 00000001 64 (N_SO         ) 01     0000   0000000000000000
+I386-NEXT: [     5] 00000033 0f (     SECT EXT) 02     0000   0000000000001000 '_i386_var'
+I386-NEXT: [     6] 0000003d 01 (     UNDF EXT) 00     0100   0000000000000000 'dyld_stub_binder'
+
+ONE-NOT: Symbol table
+
+ALL:  ----------------------------------------------------------------------
+ALL-NEXT:  Symbol table for: '{{.*}}fat-test.dylib' (x86_64h)
+ALL-NEXT:  ----------------------------------------------------------------------
+ALL-NEXT:  Index    n_strx   n_type             n_sect n_desc n_value
+ALL-NEXT:  ======== -------- ------------------ ------ ------ ----------------
+ALL-NEXT:  [     0] 00000002 64 (N_SO         ) 00     0000   0000000000000000 '/Inputs/'
+ALL-NEXT:  [     1] 0000000b 64 (N_SO         ) 00     0000   0000000000000000 'fat-test.c'
+ALL-NEXT:  [     2] 00000016 66 (N_OSO        ) 08     0001   0000000055b1d0b9 '/Inputs/fat-test.o'
+ALL-NEXT:  [     3] 00000029 20 (N_GSYM       ) 00     0000   0000000000000000 '_x86_64h_var'
+ALL-NEXT:  [     4] 00000001 64 (N_SO         ) 01     0000   0000000000000000
+ALL-NEXT:  [     5] 00000036 0f (     SECT EXT) 02     0000   0000000000001000 '_x86_64h_var'
+ALL-NEXT:  [     6] 00000043 01 (     UNDF EXT) 00     0100   0000000000000000 'dyld_stub_binder'
\ No newline at end of file
diff --git a/test/tools/dsymutil/fat-binary-output.test b/test/tools/dsymutil/fat-binary-output.test
new file mode 100644
index 000000000000..fafef14ebe9b
--- /dev/null
+++ b/test/tools/dsymutil/fat-binary-output.test
@@ -0,0 +1,32 @@
+RUN: llvm-dsymutil -f -verbose -no-output %p/Inputs/fat-test.dylib -oso-prepend-path %p | FileCheck %s
+
+This test doesn't produce any filesytstem output, we just look at the verbose
+log output.
+
+For each arch in the binary, check that we emit the right triple with the right
+file and the right symbol inside it (each slice has a different symbol, so that
+means that the logic is looking at the right file slice too).
+
+After the link of each architecture, check that lipo is correctly invoked to
+generate the fat output binary.
+
+CHECK: triple:          'x86_64-apple-darwin'
+CHECK:   - filename:        {{'?}}[[INPUTS_PATH:.*]]fat-test.o
+CHECK:   DW_AT_name{{.*}} "x86_64_var"
+
+CHECK: triple:          'i386-apple-darwin'
+CHECK:   - filename:        {{'?}}[[INPUTS_PATH]]fat-test.o
+CHECK:   DW_AT_name{{.*}} "i386_var"
+
+CHECK: triple:          'x86_64h-apple-darwin'
+CHECK:   - filename:        {{'?}}[[INPUTS_PATH]]fat-test.o
+CHECK:   DW_AT_name{{.*}} "x86_64h_var"
+
+CHECK: Running lipo
+CHECK-NEXT: lipo -create
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: [[INPUTS_PATH]]fat-test.dylib.tmp{{......}}.dwarf
+CHECK-SAME: -segalign x86_64 20 -segalign i386 20 -segalign x86_64h 20
+CHECK-SAME: -output [[INPUTS_PATH]]fat-test.dylib.dwarf
+
diff --git a/test/tools/dsymutil/yaml-object-address-rewrite.test b/test/tools/dsymutil/yaml-object-address-rewrite.test
index dcb39be891cd..749719fc5bd9 100644
--- a/test/tools/dsymutil/yaml-object-address-rewrite.test
+++ b/test/tools/dsymutil/yaml-object-address-rewrite.test
@@ -1,16 +1,19 @@
-# RUN: llvm-dsymutil -v -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s
+# RUN: llvm-dsymutil -dump-debug-map -oso-prepend-path=%p -y %s | FileCheck %s
 #
 # The YAML debug map bellow is the one from basic-archive.macho.x86_64 with
 # the object addresses set to zero. Check that the YAML import is able to
 # rewrite these addresses to the right values.
 #
 # CHECK: ---
-# CHECK-NEXT: triple:{{.*}}'x86_64-unknown-unknown-macho'
+# CHECK-NEXT: triple:{{.*}}'x86_64-apple-darwin'
+# CHECK-NEXT: binary-path:{{.*}}''
 # CHECK-NEXT: objects:
 # CHECK-NEXT: filename:{{.*}}/Inputs/basic1.macho.x86_64.o
+# CHECK-NEXT: timestamp: 0
 # CHECK-NEXT: symbols:
 # CHECK-NEXT: sym: _main, objAddr: 0x0000000000000000, binAddr: 0x0000000100000EA0, size: 0x00000024
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic2.macho.x86_64.o)'
+# CHECK-NEXT: timestamp: 0
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _foo, objAddr: 0x0000000000000020, binAddr: 0x0000000100000ED0, size: 0x00000050
 # CHECK-DAG:   sym: _private_int, objAddr: 0x0000000000000560, binAddr: 0x0000000100001004, size: 0x00000000
@@ -18,6 +21,7 @@
 # CHECK-DAG:   sym: _baz, objAddr: 0x0000000000000310, binAddr: 0x0000000100001000, size: 0x00000000
 # CHECK-NOT: { sym:
 # CHECK-NEXT: filename:{{.*}}/Inputs/./libbasic.a(basic3.macho.x86_64.o)'
+# CHECK-NEXT: timestamp: 0
 # CHECK-NEXT: symbols:
 # CHECK-DAG:   sym: _val, objAddr: 0x0000000000000004, binAddr: 0x0000000100001008, size: 0x00000000
 # CHECK-DAG:   sym: _bar, objAddr: 0x0000000000000020, binAddr: 0x0000000100000F40, size: 0x00000050
@@ -25,7 +29,7 @@
 # CHECK-NOT: { sym:
 # CHECK-NEXT: ...
 ---
-triple:          'x86_64-unknown-unknown-macho'
+triple:          'x86_64-apple-darwin'
 objects:
   - filename: /Inputs/basic1.macho.x86_64.o
     symbols:
diff --git a/test/tools/gold/Inputs/linkonce-weak.ll b/test/tools/gold/Inputs/linkonce-weak.ll
deleted file mode 100644
index f42af8faa844..000000000000
--- a/test/tools/gold/Inputs/linkonce-weak.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-define weak_odr void @f() {
-  ret void
-}
diff --git a/test/tools/gold/lit.local.cfg b/test/tools/gold/PowerPC/lit.local.cfg
similarity index 75%
rename from test/tools/gold/lit.local.cfg
rename to test/tools/gold/PowerPC/lit.local.cfg
index a59549d47abe..d968938d24ce 100644
--- a/test/tools/gold/lit.local.cfg
+++ b/test/tools/gold/PowerPC/lit.local.cfg
@@ -1,4 +1,3 @@
 if (not 'ld_plugin' in  config.available_features or
-    not 'X86' in config.root.targets or
     not 'PowerPC' in config.root.targets):
    config.unsupported = True
diff --git a/test/tools/gold/mtriple.ll b/test/tools/gold/PowerPC/mtriple.ll
similarity index 100%
rename from test/tools/gold/mtriple.ll
rename to test/tools/gold/PowerPC/mtriple.ll
diff --git a/test/tools/gold/Inputs/alias-1.ll b/test/tools/gold/X86/Inputs/alias-1.ll
similarity index 100%
rename from test/tools/gold/Inputs/alias-1.ll
rename to test/tools/gold/X86/Inputs/alias-1.ll
diff --git a/test/tools/gold/X86/Inputs/available-externally.ll b/test/tools/gold/X86/Inputs/available-externally.ll
new file mode 100644
index 000000000000..cbc5c12c65d5
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/available-externally.ll
@@ -0,0 +1,3 @@
+define void @zed() {
+  ret void
+}
diff --git a/test/tools/gold/Inputs/bcsection.s b/test/tools/gold/X86/Inputs/bcsection.s
similarity index 100%
rename from test/tools/gold/Inputs/bcsection.s
rename to test/tools/gold/X86/Inputs/bcsection.s
diff --git a/test/tools/gold/Inputs/comdat.ll b/test/tools/gold/X86/Inputs/comdat.ll
similarity index 69%
rename from test/tools/gold/Inputs/comdat.ll
rename to test/tools/gold/X86/Inputs/comdat.ll
index 464aefa49dc1..734115180fff 100644
--- a/test/tools/gold/Inputs/comdat.ll
+++ b/test/tools/gold/X86/Inputs/comdat.ll
@@ -17,9 +17,9 @@ bb21:
 @r21 = global i32* @v1
 @r22 = global i32(i8*)* @f1
 
-@a21 = alias i32* @v1
-@a22 = alias bitcast (i32* @v1 to i16*)
+@a21 = alias i32, i32* @v1
+@a22 = alias i16, bitcast (i32* @v1 to i16*)
 
-@a23 = alias i32(i8*)* @f1
-@a24 = alias bitcast (i32(i8*)* @f1 to i16*)
-@a25 = alias i16* @a24
+@a23 = alias i32(i8*), i32(i8*)* @f1
+@a24 = alias i16, bitcast (i32(i8*)* @f1 to i16*)
+@a25 = alias i16, i16* @a24
diff --git a/test/tools/gold/X86/Inputs/comdat2.ll b/test/tools/gold/X86/Inputs/comdat2.ll
new file mode 100644
index 000000000000..5b7f74cf0b24
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/comdat2.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+@foo = global i8 1, comdat
+define void @zed() {
+  call void @bar()
+  ret void
+}
+define void @bar() comdat($foo) {
+  ret void
+}
diff --git a/test/tools/gold/Inputs/common.ll b/test/tools/gold/X86/Inputs/common.ll
similarity index 100%
rename from test/tools/gold/Inputs/common.ll
rename to test/tools/gold/X86/Inputs/common.ll
diff --git a/test/tools/gold/X86/Inputs/ctors2.ll b/test/tools/gold/X86/Inputs/ctors2.ll
new file mode 100644
index 000000000000..af1590eb277c
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/ctors2.ll
@@ -0,0 +1,5 @@
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }]
+
+define void @bar() {
+  ret void
+}
diff --git a/test/tools/gold/Inputs/drop-debug.bc b/test/tools/gold/X86/Inputs/drop-debug.bc
similarity index 100%
rename from test/tools/gold/Inputs/drop-debug.bc
rename to test/tools/gold/X86/Inputs/drop-debug.bc
diff --git a/test/tools/gold/X86/Inputs/drop-linkage.ll b/test/tools/gold/X86/Inputs/drop-linkage.ll
new file mode 100644
index 000000000000..075306114331
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/drop-linkage.ll
@@ -0,0 +1,9 @@
+$foo = comdat any
+define linkonce void @foo() comdat {
+  ret void
+}
+
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/tools/gold/Inputs/invalid.bc b/test/tools/gold/X86/Inputs/invalid.bc
similarity index 100%
rename from test/tools/gold/Inputs/invalid.bc
rename to test/tools/gold/X86/Inputs/invalid.bc
diff --git a/test/tools/gold/Inputs/linker-script.export b/test/tools/gold/X86/Inputs/linker-script.export
similarity index 100%
rename from test/tools/gold/Inputs/linker-script.export
rename to test/tools/gold/X86/Inputs/linker-script.export
diff --git a/test/tools/gold/X86/Inputs/linkonce-weak.ll b/test/tools/gold/X86/Inputs/linkonce-weak.ll
new file mode 100644
index 000000000000..3b7dad1b1eff
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/linkonce-weak.ll
@@ -0,0 +1,19 @@
+define weak_odr void @f() !dbg !4 {
+  ret void, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkonce-weak.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/tools/gold/Inputs/pr19901-1.ll b/test/tools/gold/X86/Inputs/pr19901-1.ll
similarity index 100%
rename from test/tools/gold/Inputs/pr19901-1.ll
rename to test/tools/gold/X86/Inputs/pr19901-1.ll
diff --git a/test/tools/gold/X86/Inputs/resolve-to-alias.ll b/test/tools/gold/X86/Inputs/resolve-to-alias.ll
new file mode 100644
index 000000000000..eff02a6f4d1e
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/resolve-to-alias.ll
@@ -0,0 +1,4 @@
+@bar = alias void (), void ()* @zed
+define void @zed() {
+  ret void
+}
diff --git a/test/tools/gold/X86/Inputs/thinlto.ll b/test/tools/gold/X86/Inputs/thinlto.ll
new file mode 100644
index 000000000000..4e0840f3691e
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/thinlto.ll
@@ -0,0 +1,4 @@
+define void @g() {
+entry:
+  ret void
+}
diff --git a/test/tools/gold/X86/Inputs/type-merge.ll b/test/tools/gold/X86/Inputs/type-merge.ll
new file mode 100644
index 000000000000..4dc214922dc5
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/type-merge.ll
@@ -0,0 +1,5 @@
+define void @zed() {
+  call void @bar()
+  ret void
+}
+declare void @bar()
diff --git a/test/tools/gold/X86/Inputs/type-merge2.ll b/test/tools/gold/X86/Inputs/type-merge2.ll
new file mode 100644
index 000000000000..a354757ee2e7
--- /dev/null
+++ b/test/tools/gold/X86/Inputs/type-merge2.ll
@@ -0,0 +1,5 @@
+%zed = type { i16 }
+define void @bar(%zed* %this)  {
+  store %zed* %this, %zed** null
+  ret void
+}
diff --git a/test/tools/gold/Inputs/weak.ll b/test/tools/gold/X86/Inputs/weak.ll
similarity index 100%
rename from test/tools/gold/Inputs/weak.ll
rename to test/tools/gold/X86/Inputs/weak.ll
diff --git a/test/tools/gold/alias.ll b/test/tools/gold/X86/alias.ll
similarity index 92%
rename from test/tools/gold/alias.ll
rename to test/tools/gold/X86/alias.ll
index b4edb05a4e46..c659f73d7e83 100644
--- a/test/tools/gold/alias.ll
+++ b/test/tools/gold/X86/alias.ll
@@ -9,5 +9,5 @@
 ; CHECK-NEXT: @b = global i32 1
 ; CHECK-NOT: alias
 
-@a = weak alias i32* @b
+@a = weak alias i32, i32* @b
 @b = global i32 1
diff --git a/test/tools/gold/X86/alias2.ll b/test/tools/gold/X86/alias2.ll
new file mode 100644
index 000000000000..4727e0508fae
--- /dev/null
+++ b/test/tools/gold/X86/alias2.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -shared -o %t2.bc -plugin %llvmshlibdir/LLVMgold.so %t.o -plugin-opt=emit-llvm
+; RUN: llvm-dis %t2.bc -o - | FileCheck %s
+
+@bar = alias void (), void ()* @zed
+define void @foo() {
+  call void @bar()
+  ret void
+}
+define void @zed() {
+  ret void
+}
+
+; CHECK: @bar = alias void (), void ()* @zed
+
+; CHECK:      define void @foo() {
+; CHECK-NEXT:   call void @bar()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK:      define void @zed() {
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/linkonce-weak.ll b/test/tools/gold/X86/available-externally.ll
similarity index 54%
rename from test/tools/gold/linkonce-weak.ll
rename to test/tools/gold/X86/available-externally.ll
index a0cccea56cfe..d47a536dc094 100644
--- a/test/tools/gold/linkonce-weak.ll
+++ b/test/tools/gold/X86/available-externally.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as %s -o %t.o
-; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o
+; RUN: llvm-as %p/Inputs/available-externally.ll -o %t2.o
 
 ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:    --plugin-opt=emit-llvm \
@@ -11,9 +11,17 @@
 ; RUN:    -shared %t2.o %t.o -o %t3.o
 ; RUN: llvm-dis %t3.o -o - | FileCheck %s
 
-define linkonce_odr void @f() {
+define void @foo() {
+  call void @bar()
+  call void @zed()
+  ret void
+}
+define available_externally void @bar() {
+  ret void
+}
+define available_externally void @zed() {
   ret void
 }
 
-; Test that we get a weak_odr regardless of the order of the files
-; CHECK: define weak_odr void @f() {
+; CHECK-DAG: define available_externally void @bar() {
+; CHECK-DAG: define void @zed() {
diff --git a/test/tools/gold/bad-alias.ll b/test/tools/gold/X86/bad-alias.ll
similarity index 64%
rename from test/tools/gold/bad-alias.ll
rename to test/tools/gold/X86/bad-alias.ll
index a98bf710b454..c4e3c3fe82fc 100644
--- a/test/tools/gold/bad-alias.ll
+++ b/test/tools/gold/X86/bad-alias.ll
@@ -9,5 +9,5 @@
 @g1 = global i32 1
 @g2 = global i32 2
 
-@a = alias inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
-                             i32 ptrtoint (i32* @g2 to i32)) to i32*)
+@a = alias i32, inttoptr(i32 sub (i32 ptrtoint (i32* @g1 to i32),
+                                  i32 ptrtoint (i32* @g2 to i32)) to i32*)
diff --git a/test/tools/gold/X86/bcsection.ll b/test/tools/gold/X86/bcsection.ll
new file mode 100644
index 000000000000..f7ebe375770e
--- /dev/null
+++ b/test/tools/gold/X86/bcsection.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as -o %T/bcsection.bc %s
+
+; RUN: llvm-mc -I=%T -filetype=obj -triple=x86_64-unknown-unknown -o %T/bcsection.bco %p/Inputs/bcsection.s
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
+; RUN: %gold -r -o %T/bcsection.o -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
+; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+; CHECK: main
+define i32 @main() {
+  ret i32 0
+}
diff --git a/test/tools/gold/coff.ll b/test/tools/gold/X86/coff.ll
similarity index 100%
rename from test/tools/gold/coff.ll
rename to test/tools/gold/X86/coff.ll
diff --git a/test/tools/gold/X86/comdat.ll b/test/tools/gold/X86/comdat.ll
new file mode 100644
index 000000000000..f65ca73c8063
--- /dev/null
+++ b/test/tools/gold/X86/comdat.ll
@@ -0,0 +1,65 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o
+; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \
+; RUN:  -plugin-opt=emit-llvm
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+$c1 = comdat any
+
+@v1 = weak_odr global i32 42, comdat($c1)
+define weak_odr i32 @f1(i8*) comdat($c1) {
+bb10:
+  br label %bb11
+bb11:
+  ret i32 42
+}
+
+@r11 = global i32* @v1
+@r12 = global i32 (i8*)* @f1
+
+@a11 = alias i32, i32* @v1
+@a12 = alias i16, bitcast (i32* @v1 to i16*)
+
+@a13 = alias i32 (i8*), i32 (i8*)* @f1
+@a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*)
+@a15 = alias i16, i16* @a14
+
+; CHECK: $c1 = comdat any
+; CHECK: $c2 = comdat any
+
+; CHECK-DAG: @v1 = weak_odr global i32 42, comdat($c1)
+
+; CHECK-DAG: @r11 = global i32* @v1{{$}}
+; CHECK-DAG: @r12 = global i32 (i8*)* @f1{{$}}
+
+; CHECK-DAG: @r21 = global i32* @v1{{$}}
+; CHECK-DAG: @r22 = global i32 (i8*)* @f1{{$}}
+
+; CHECK-DAG: @v1.1 = internal global i32 41, comdat($c2)
+
+; CHECK-DAG: @a11 = alias i32, i32* @v1{{$}}
+; CHECK-DAG: @a12 = alias i16, bitcast (i32* @v1 to i16*)
+
+; CHECK-DAG: @a13 = alias i32 (i8*), i32 (i8*)* @f1{{$}}
+; CHECK-DAG: @a14 = alias i16, bitcast (i32 (i8*)* @f1 to i16*)
+
+; CHECK-DAG: @a21 = alias i32, i32* @v1.1{{$}}
+; CHECK-DAG: @a22 = alias i16, bitcast (i32* @v1.1 to i16*)
+
+; CHECK-DAG: @a23 = alias i32 (i8*), i32 (i8*)* @f1.2{{$}}
+; CHECK-DAG: @a24 = alias i16, bitcast (i32 (i8*)* @f1.2 to i16*)
+
+; CHECK:      define weak_odr protected i32 @f1(i8*) comdat($c1) {
+; CHECK-NEXT: bb10:
+; CHECK-NEXT:   br label %bb11{{$}}
+; CHECK:      bb11:
+; CHECK-NEXT:   ret i32 42
+; CHECK-NEXT: }
+
+; CHECK:      define internal i32 @f1.2(i8* %this) comdat($c2) {
+; CHECK-NEXT: bb20:
+; CHECK-NEXT:   store i8* %this, i8** null
+; CHECK-NEXT:   br label %bb21
+; CHECK:      bb21:
+; CHECK-NEXT:   ret i32 41
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/comdat2.ll b/test/tools/gold/X86/comdat2.ll
new file mode 100644
index 000000000000..2156efd207bb
--- /dev/null
+++ b/test/tools/gold/X86/comdat2.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/comdat2.ll -o %t2.bc
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-dis %t3.bc -o - | FileCheck %s
+
+
+$foo = comdat any
+@foo = global i8 0, comdat
+
+; CHECK: @foo = global i8 0, comdat
+
+; CHECK: define void @zed() {
+; CHECK:   call void @bar()
+; CHECK:   ret void
+; CHECK: }
+
+; CHECK: declare void @bar()
diff --git a/test/tools/gold/common.ll b/test/tools/gold/X86/common.ll
similarity index 100%
rename from test/tools/gold/common.ll
rename to test/tools/gold/X86/common.ll
diff --git a/test/tools/gold/X86/ctors.ll b/test/tools/gold/X86/ctors.ll
new file mode 100644
index 000000000000..24c8e342beb0
--- /dev/null
+++ b/test/tools/gold/X86/ctors.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+
+define internal void @foo() {
+  ret void
+}
+
+; CHECK: @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
diff --git a/test/tools/gold/X86/ctors2.ll b/test/tools/gold/X86/ctors2.ll
new file mode 100644
index 000000000000..c39cb7132d93
--- /dev/null
+++ b/test/tools/gold/X86/ctors2.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/ctors2.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }]
+
+define void @foo() {
+  ret void
+}
+
+; CHECK: @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @foo, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* @bar, i8* null }]
diff --git a/test/tools/gold/X86/disable-verify.ll b/test/tools/gold/X86/disable-verify.ll
new file mode 100644
index 000000000000..5b8dbb054478
--- /dev/null
+++ b/test/tools/gold/X86/disable-verify.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as %s -o %t.o
+; REQUIRES: asserts
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=disable-verify \
+; RUN:    --plugin-opt=-debug-pass=Arguments \
+; RUN:    -shared %t.o -o %t2.o 2>&1 | FileCheck %s
+
+; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=-debug-pass=Arguments \
+; RUN:    -shared %t.o -o %t2.o 2>&1 | FileCheck %s -check-prefix=VERIFY
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; -disable-verify should disable output verification from the optimization
+; pipeline.
+; CHECK: Pass Arguments: {{.*}} -verify -forceattrs
+; CHECK-NOT: -verify
+
+; VERIFY: Pass Arguments: {{.*}} -verify {{.*}} -verify
+
+define void @f() {
+entry:
+  ret void
+}
diff --git a/test/tools/gold/drop-debug.ll b/test/tools/gold/X86/drop-debug.ll
similarity index 100%
rename from test/tools/gold/drop-debug.ll
rename to test/tools/gold/X86/drop-debug.ll
diff --git a/test/tools/gold/X86/drop-linkage.ll b/test/tools/gold/X86/drop-linkage.ll
new file mode 100644
index 000000000000..d4c1dd052c79
--- /dev/null
+++ b/test/tools/gold/X86/drop-linkage.ll
@@ -0,0 +1,14 @@
+; RUN: llc %s -o %t.s
+; RUN: llvm-mc %t.s -o %t.o -filetype=obj
+; RUN: llvm-as %p/Inputs/drop-linkage.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define void @foo() {
+  ret void
+}
+
+; CHECK: declare extern_weak void @foo(){{$}}
diff --git a/test/tools/gold/emit-llvm.ll b/test/tools/gold/X86/emit-llvm.ll
similarity index 78%
rename from test/tools/gold/emit-llvm.ll
rename to test/tools/gold/X86/emit-llvm.ll
index bfb90c4bc28a..4a6d5963cff0 100644
--- a/test/tools/gold/emit-llvm.ll
+++ b/test/tools/gold/X86/emit-llvm.ll
@@ -12,6 +12,7 @@
 ; RUN:    -shared %t.o -o %t3.o
 ; RUN: llvm-dis %t3.o.bc -o - | FileCheck %s
 ; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT %s
+; RUN: llvm-dis %t3.o.opt.bc -o - | FileCheck --check-prefix=OPT2 %s
 ; RUN: llvm-nm %t3.o.o | FileCheck --check-prefix=NM %s
 
 ; RUN: rm -f %t4.o
@@ -29,42 +30,42 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @g8 = external global i32
 
-; CHECK: define internal void @f1()
-; OPT-NOT: @f1
+; CHECK-DAG: define internal void @f1()
+; OPT2-NOT: @f1
 define hidden void @f1() {
   ret void
 }
 
-; CHECK: define hidden void @f2()
-; OPT: define hidden void @f2()
+; CHECK-DAG: define hidden void @f2()
+; OPT-DAG: define hidden void @f2()
 define hidden void @f2() {
   ret void
 }
 
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (void ()* @f2 to i8*)]
 
-; CHECK: define void @f3()
-; OPT: define void @f3()
+; CHECK-DAG: define void @f3()
+; OPT-DAG: define void @f3()
 define void @f3() {
   call void @f4()
   ret void
 }
 
-; CHECK: define internal void @f4()
-; OPT-NOT: @f4
+; CHECK-DAG: define internal void @f4()
+; OPT2-NOT: @f4
 define linkonce_odr void @f4() {
   ret void
 }
 
-; CHECK: define linkonce_odr void @f5()
-; OPT: define linkonce_odr void @f5()
+; CHECK-DAG: define linkonce_odr void @f5()
+; OPT-DAG: define linkonce_odr void @f5()
 define linkonce_odr void @f5() {
   ret void
 }
 @g5 = global void()* @f5
 
-; CHECK: define internal void @f6() unnamed_addr
-; OPT: define internal void @f6() unnamed_addr
+; CHECK-DAG: define internal void @f6() unnamed_addr
+; OPT-DAG: define internal void @f6() unnamed_addr
 define linkonce_odr void @f6() unnamed_addr {
   ret void
 }
diff --git a/test/tools/gold/invalid.ll b/test/tools/gold/X86/invalid.ll
similarity index 100%
rename from test/tools/gold/invalid.ll
rename to test/tools/gold/X86/invalid.ll
diff --git a/test/tools/gold/linker-script.ll b/test/tools/gold/X86/linker-script.ll
similarity index 100%
rename from test/tools/gold/linker-script.ll
rename to test/tools/gold/X86/linker-script.ll
diff --git a/test/tools/gold/X86/linkonce-weak.ll b/test/tools/gold/X86/linkonce-weak.ll
new file mode 100644
index 000000000000..3397c3480a7c
--- /dev/null
+++ b/test/tools/gold/X86/linkonce-weak.ll
@@ -0,0 +1,39 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/linkonce-weak.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t2.o %t.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define linkonce_odr void @f() !dbg !4 {
+  ret void, !dbg !10
+}
+
+; Test that we get a weak_odr regardless of the order of the files
+; CHECK: define weak_odr void @f()
+
+; Test that we only get a single DISubprogram for @f
+; CHECK: !DISubprogram(name: "f"
+; CHECK-NOT: !DISubprogram(name: "f"
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "linkonce-weak.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.8.0 (trunk 251407) (llvm/trunk 251401)"}
+!10 = !DILocation(line: 2, column: 1, scope: !4)
diff --git a/test/tools/gold/X86/lit.local.cfg b/test/tools/gold/X86/lit.local.cfg
new file mode 100644
index 000000000000..ddcd48ca470d
--- /dev/null
+++ b/test/tools/gold/X86/lit.local.cfg
@@ -0,0 +1,3 @@
+if (not 'ld_plugin' in  config.available_features or
+    not 'X86' in config.root.targets):
+   config.unsupported = True
diff --git a/test/tools/gold/no-map-whole-file.ll b/test/tools/gold/X86/no-map-whole-file.ll
similarity index 100%
rename from test/tools/gold/no-map-whole-file.ll
rename to test/tools/gold/X86/no-map-whole-file.ll
diff --git a/test/tools/gold/opt-level.ll b/test/tools/gold/X86/opt-level.ll
similarity index 100%
rename from test/tools/gold/opt-level.ll
rename to test/tools/gold/X86/opt-level.ll
diff --git a/test/tools/gold/X86/parallel.ll b/test/tools/gold/X86/parallel.ll
new file mode 100644
index 000000000000..00a0bafda251
--- /dev/null
+++ b/test/tools/gold/X86/parallel.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as -o %t.bc %s
+; RUN: env LD_PRELOAD=%llvmshlibdir/LLVMgold.so %gold -plugin %llvmshlibdir/LLVMgold.so -u foo -u bar -plugin-opt jobs=2 -plugin-opt save-temps -m elf_x86_64 -o %t %t.bc
+; RUN: llvm-nm %t.o0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-nm %t.o1 | FileCheck --check-prefix=CHECK1 %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK0-NOT: bar
+; CHECK0: T foo
+; CHECK0-NOT: bar
+define void @foo() {
+  call void @bar()
+  ret void
+}
+
+; CHECK1-NOT: foo
+; CHECK1: T bar
+; CHECK1-NOT: foo
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/tools/gold/pr19901.ll b/test/tools/gold/X86/pr19901.ll
similarity index 100%
rename from test/tools/gold/pr19901.ll
rename to test/tools/gold/X86/pr19901.ll
diff --git a/test/tools/gold/X86/pr25907.ll b/test/tools/gold/X86/pr25907.ll
new file mode 100644
index 000000000000..502938cf8126
--- /dev/null
+++ b/test/tools/gold/X86/pr25907.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    -shared %t.o -o %t2
+; RUN: llvm-nm %t2 | FileCheck %s
+; CHECK: T main
+
+@main.L = internal unnamed_addr constant [3 x i8*] [i8* blockaddress(@main, %L1), i8* blockaddress(@main, %L2), i8* null], align 16
+
+define i32 @main() #0 {
+entry:
+  br label %L1
+
+L1:                                               ; preds = %entry, %L1
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %L1 ]
+  %inc = add i32 %i.0, 1
+  %idxprom = zext i32 %i.0 to i64
+  %arrayidx = getelementptr inbounds [3 x i8*], [3 x i8*]* @main.L, i64 0, i64 %idxprom
+  %0 = load i8*, i8** %arrayidx, align 8, !tbaa !1
+  indirectbr i8* %0, [label %L1, label %L2]
+
+L2:                                               ; preds = %L1
+  ret i32 0
+}
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/tools/gold/X86/pr25915.ll b/test/tools/gold/X86/pr25915.ll
new file mode 100644
index 000000000000..2d8807e04bb4
--- /dev/null
+++ b/test/tools/gold/X86/pr25915.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    -plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2
+; RUN: llvm-dis %t2 -o - | FileCheck %s
+; CHECK-NOT: subprograms
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "pr25915.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.8.0 (trunk 256170) (llvm/trunk 256171)"}
diff --git a/test/tools/gold/remarks.ll b/test/tools/gold/X86/remarks.ll
similarity index 69%
rename from test/tools/gold/remarks.ll
rename to test/tools/gold/X86/remarks.ll
index c4fa7f787f26..51bd121cebce 100644
--- a/test/tools/gold/remarks.ll
+++ b/test/tools/gold/X86/remarks.ll
@@ -1,9 +1,9 @@
 ; RUN: llvm-as %s -o %t.o
 
-; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:    -plugin-opt=-pass-remarks=inline %t.o -o %t2.o 2>&1 | FileCheck %s
 
-; RUN: %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
+; RUN: not %gold -m elf_x86_64 -plugin %llvmshlibdir/LLVMgold.so \
 ; RUN:   %t.o -o %t2.o 2>&1 | FileCheck -allow-empty --check-prefix=NO-REMARK %s
 
 
@@ -12,8 +12,11 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+declare i32 @bar()
+
 define i32 @f() {
-  ret i32 0
+  %a = call i32 @bar()
+  ret i32 %a
 }
 
 define i32 @_start() {
diff --git a/test/tools/gold/X86/resolve-to-alias.ll b/test/tools/gold/X86/resolve-to-alias.ll
new file mode 100644
index 000000000000..102da6f80f4d
--- /dev/null
+++ b/test/tools/gold/X86/resolve-to-alias.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/resolve-to-alias.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t.bc
+; RUN: llvm-dis %t.bc -o %t.ll
+; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll
+; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t2.o %t.o -o %t.bc
+; RUN: llvm-dis %t.bc -o %t.ll
+; RUN: FileCheck --check-prefix=PASS1 %s < %t.ll
+; RUN: FileCheck --check-prefix=PASS2 %s < %t.ll
+
+define void @foo() {
+  call void @bar()
+  ret void
+}
+declare void @bar()
+
+; PASS1: @bar = alias void (), void ()* @zed
+
+; PASS1:      define void @foo() {
+; PASS1-NEXT:   call void @bar()
+; PASS1-NEXT:   ret void
+; PASS1-NEXT: }
+
+; PASS2:      define void @zed() {
+; PASS2-NEXT:   ret void
+; PASS2-NEXT: }
diff --git a/test/tools/gold/slp-vectorize.ll b/test/tools/gold/X86/slp-vectorize.ll
similarity index 100%
rename from test/tools/gold/slp-vectorize.ll
rename to test/tools/gold/X86/slp-vectorize.ll
diff --git a/test/tools/gold/stats.ll b/test/tools/gold/X86/stats.ll
similarity index 100%
rename from test/tools/gold/stats.ll
rename to test/tools/gold/X86/stats.ll
diff --git a/test/tools/gold/X86/thinlto.ll b/test/tools/gold/X86/thinlto.ll
new file mode 100644
index 000000000000..97def3d7a14d
--- /dev/null
+++ b/test/tools/gold/X86/thinlto.ll
@@ -0,0 +1,34 @@
+; First ensure that the ThinLTO handling in the gold plugin handles
+; bitcode without function summary sections gracefully.
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/thinlto.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    -shared %t.o %t2.o -o %t3
+
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=thinlto \
+; RUN:    -shared %t.o %t2.o -o %t3
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: not test -e %t3
+
+; COMBINED: <MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}/test/tools/gold/X86/Output/thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <VALUE_SYMTAB
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: </VALUE_SYMTAB
+
+define void @f() {
+entry:
+  ret void
+}
diff --git a/test/tools/gold/X86/type-merge.ll b/test/tools/gold/X86/type-merge.ll
new file mode 100644
index 000000000000..d903894345d4
--- /dev/null
+++ b/test/tools/gold/X86/type-merge.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/type-merge.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+define void @foo()  {
+  call void @bar(i8* null)
+  ret void
+}
+declare void @bar(i8*)
+
+; CHECK:      define void @foo() {
+; CHECK-NEXT:   call void @bar(i8* null)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK: declare void @bar(i8*)
+
+; CHECK:      define void @zed() {
+; CHECK-NEXT:   call void bitcast (void (i8*)* @bar to void ()*)()
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/type-merge2.ll b/test/tools/gold/X86/type-merge2.ll
new file mode 100644
index 000000000000..42ad0dafb29b
--- /dev/null
+++ b/test/tools/gold/X86/type-merge2.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as %s -o %t.o
+; RUN: llvm-as %p/Inputs/type-merge2.ll -o %t2.o
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o %t2.o -o %t3.o
+; RUN: llvm-dis %t3.o -o - | FileCheck %s
+
+%zed = type { i8 }
+define void @foo()  {
+  call void @bar(%zed* null)
+  ret void
+}
+declare void @bar(%zed*)
+
+; CHECK:      %zed = type { i8 }
+; CHECK-NEXT: %zed.0 = type { i16 }
+
+; CHECK:      define void @foo() {
+; CHECK-NEXT:   call void bitcast (void (%zed.0*)* @bar to void (%zed*)*)(%zed* null)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; CHECK:      define void @bar(%zed.0* %this) {
+; CHECK-NEXT:   store %zed.0* %this, %zed.0** null
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
diff --git a/test/tools/gold/X86/unnamed-addr.ll b/test/tools/gold/X86/unnamed-addr.ll
new file mode 100644
index 000000000000..290f73d85484
--- /dev/null
+++ b/test/tools/gold/X86/unnamed-addr.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o %t.o
+
+; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \
+; RUN:    --plugin-opt=emit-llvm \
+; RUN:    -shared %t.o -o %t2.o
+; RUN: llvm-dis %t2.o -o - | FileCheck %s
+
+@a = internal unnamed_addr constant i8 42
+
+define i8* @f() {
+  ret i8* @a
+}
+
+; CHECK: @a = internal unnamed_addr constant i8 42
diff --git a/test/tools/gold/vectorize.ll b/test/tools/gold/X86/vectorize.ll
similarity index 100%
rename from test/tools/gold/vectorize.ll
rename to test/tools/gold/X86/vectorize.ll
diff --git a/test/tools/gold/weak.ll b/test/tools/gold/X86/weak.ll
similarity index 100%
rename from test/tools/gold/weak.ll
rename to test/tools/gold/X86/weak.ll
diff --git a/test/tools/gold/bcsection.ll b/test/tools/gold/bcsection.ll
deleted file mode 100644
index 37d2994cc780..000000000000
--- a/test/tools/gold/bcsection.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llvm-as -o %T/bcsection.bc %s
-
-; RUN: llvm-mc -I=%T -filetype=obj -o %T/bcsection.bco %p/Inputs/bcsection.s
-; RUN: llvm-nm -no-llvm-bc %T/bcsection.bco | count 0
-; RUN: %gold -r -o %T/bcsection.o -plugin %llvmshlibdir/LLVMgold.so %T/bcsection.bco
-; RUN: llvm-nm -no-llvm-bc %T/bcsection.o | FileCheck %s
-
-; CHECK: main
-define i32 @main() {
-  ret i32 0
-}
diff --git a/test/tools/gold/comdat.ll b/test/tools/gold/comdat.ll
deleted file mode 100644
index 370bf5641f31..000000000000
--- a/test/tools/gold/comdat.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: llvm-as %s -o %t.o
-; RUN: llvm-as %p/Inputs/comdat.ll -o %t2.o
-; RUN: %gold -shared -o %t3.o -plugin %llvmshlibdir/LLVMgold.so %t.o %t2.o \
-; RUN:  -plugin-opt=emit-llvm
-; RUN: llvm-dis %t3.o -o - | FileCheck %s
-
-$c1 = comdat any
-
-@v1 = weak_odr global i32 42, comdat($c1)
-define weak_odr i32 @f1(i8*) comdat($c1) {
-bb10:
-  br label %bb11
-bb11:
-  ret i32 42
-}
-
-@r11 = global i32* @v1
-@r12 = global i32 (i8*)* @f1
-
-@a11 = alias i32* @v1
-@a12 = alias bitcast (i32* @v1 to i16*)
-
-@a13 = alias i32 (i8*)* @f1
-@a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
-@a15 = alias i16* @a14
-
-; CHECK: $c1 = comdat any
-; CHECK: $c2 = comdat any
-
-; CHECK: @v1 = weak_odr global i32 42, comdat($c1)
-
-; CHECK: @r11 = global i32* @v1{{$}}
-; CHECK: @r12 = global i32 (i8*)* @f1{{$}}
-
-; CHECK: @r21 = global i32* @v1{{$}}
-; CHECK: @r22 = global i32 (i8*)* @f1{{$}}
-
-; CHECK: @v11 = internal global i32 41, comdat($c2)
-
-; CHECK: @a11 = alias i32* @v1{{$}}
-; CHECK: @a12 = alias bitcast (i32* @v1 to i16*)
-
-; CHECK: @a13 = alias i32 (i8*)* @f1{{$}}
-; CHECK: @a14 = alias bitcast (i32 (i8*)* @f1 to i16*)
-
-; CHECK: @a21 = alias i32* @v11{{$}}
-; CHECK: @a22 = alias bitcast (i32* @v11 to i16*)
-
-; CHECK: @a23 = alias i32 (i8*)* @f12{{$}}
-; CHECK: @a24 = alias bitcast (i32 (i8*)* @f12 to i16*)
-
-; CHECK:      define weak_odr protected i32 @f1(i8*) comdat($c1) {
-; CHECK-NEXT: bb10:
-; CHECK-NEXT:   br label %bb11{{$}}
-; CHECK:      bb11:
-; CHECK-NEXT:   ret i32 42
-; CHECK-NEXT: }
-
-; CHECK:      define internal i32 @f12(i8* %this) comdat($c2) {
-; CHECK-NEXT: bb20:
-; CHECK-NEXT:   store i8* %this, i8** null
-; CHECK-NEXT:   br label %bb21
-; CHECK:      bb21:
-; CHECK-NEXT:   ret i32 41
-; CHECK-NEXT: }
diff --git a/test/tools/llvm-cxxdump/trivial.test b/test/tools/llvm-cxxdump/trivial.test
index 450ed3b3780d..2c36620bff2f 100644
--- a/test/tools/llvm-cxxdump/trivial.test
+++ b/test/tools/llvm-cxxdump/trivial.test
@@ -56,3 +56,6 @@ ELF-I386:      _ZTS1A: 1A
 ELF-I386-NEXT: _ZTV1A[0]: 0
 ELF-I386-NEXT: _ZTV1A[4]: _ZTI1A
 ELF-I386-NEXT: _ZTV1A[8]: _ZN1A1fEv
+
+RUN: not llvm-cxxdump %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/a.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..7bdb2a7b9f826e0dfcbafb94b64ef9285b42f4c6
GIT binary patch
literal 1193
zcmbtTT}s115T4ytTP=#EFN!Y#5n3N&wTf1;EsBC5;tgtAQ!S=RNq<C8l!_<t5`Fbu
z@D8593wQ!SoK1Gy?HcgGfthc<Z)SEUB)LC0KBR<zGy(+}GN}SQC}Js)OE3j7s5+K=
zVRUW3Rrg#YXKtBUBi;5p?v=5&k;`ZE86)jD-NwpLTV2b5WmcO_u%PC7!C{*ichQUJ
zOXy?h0C9o|IeEW(d-`CvlrEx1DE$J)2<}XhWT{t3jk8|iWiJLm*8$U`RB?Vgv6G;4
z04&9oM1pgiokpj)G@e4fX804Ct(!*Msvx%g5HDqRZdW=NWyh<U=hq&XA;qFb%U-S4
zvfC|HuDdl)<yNKQ+g(+0>aHDd+~HrgfYZvW8JtxbWTT1RyNqS7Wk=hfB>29j%Xx`;
ztjk<&eH3I?6(f32BQ!qp%mZ2`ghABrI>U;t@#a?^&{jhjM7`I?imvhIPce}cC_(S#
zbta1nev7=PXgt;DIj34rK*K)(`l2?8$Uoe;;P`O|#`UQC`(i%oJ~8?yuwUDTBP@Dj
W+#_lezMoh|yOF`TrpWUre(OK6DqH*j

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo b/test/tools/llvm-dwp/Inputs/simple/notypes/b.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..f41243dc722b011d346dccede7fd91268525d578
GIT binary patch
literal 1241
zcmbtU%`O8`6h8M(>#rfHT2yRiBf(;{8ll7(f`nLDk=RtHrZmQkOsiOl+FI}gEG>Bo
zPayFgmNw4my)(wmBo=OR?m6FgzVkC}r`PM-8=Nr+4+9f^P{;zj#7L`4TQC5<P;;x^
zk=}NKX2bXNk}+=-^odq*?w#l}v!!yOJgHB(Zu@llhbzuZf^F2!&cLn)P}wiqB?|0r
z*EzhG&{oj4(28g&G=M&a0o%KMyTAW@c;vV=)1U_vY*$M$p5hOejOV*jmSy7|Gndvn
z=Id%N0M7x2nao0FF$16(EPG=P_gHzaV<J!N@+gi;1J4uK%wd;7G>#95{n^(LJ-yMq
zI&D>L^sPYn<1jjO?DM0F>(`9K3m*&tF{!|cU#~ZvRx?gFyt*I9Rof1ncARk=p3}v+
z!@tiMc8GnIQ9mn+eg~>{3e&_De#^j;cw6Da`p}m#AI5R%odhFsGC{kOz){H)U3g}N
zi*9w+0cKQ=#P4+BSrab0Rc(M7l_T+}B(qVa2vjZQHAO>kN(wtlPOT@*6%E0HcuC+{
z=K9gokGD@^@nQfFpEUnN)+f!AE&g;`;wMA%k~StjX%ph*GRi~^(h4J>_#04C-$$xq
AOaK4?

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/Inputs/simple/types/a.dwo b/test/tools/llvm-dwp/Inputs/simple/types/a.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..1fc71ca8d17560f4ce433262fedc9bcb4fc14a2f
GIT binary patch
literal 1369
zcmbtU%}VP)6h1T4YOeuN0($*H0$0%r#@Z@ce`>v#D+n%Ji91bes>PV}wrRzc;C%!)
zzJ_lg_zEt>rT7-Q@JupO(=p=01LyBM-<dg+VRG?zdy^6ZQV0~_J(DQFLj)`aa1k^}
zLD{j~Bco;48&%IWa^{kmF{b=_(>*b=3%PtIpEjl(r*%5}j?HD$V43Aw4J@d5Ua;6A
z+BMX5)HG@W6(B`0AgP`Er`_&5M(Hoa2&FH;UdxZ8Pq)n_Fc1=CiH%7p0ZB&iY^0w|
zTo&{syDZ%N?gpUqfZ>+DKD4Z@X!Iul(_wX}ifbh{NZ+8I1W4+fqiLLh>kXh%=tE1$
zOD_@gV+vq4&QE=-gx;@*{#ZtbcBy%^?|5bN@XP}<OtGk>`(CBeu>D39uDTU3n)~N9
zyMthrN_D#xr8rgB4)S2EZ<-P8k6d|swPHK!3{>tkmN}N`q(M>Se-vEyOP|NOjHA^5
zBH>pN6_vY#MHS=xE73F~ymTt}01GO{xwqV)jEAmmRqj8ms2C?di;mCEFUr8r8P`84
zz+p)gUgh>9&J+$$Hw00B2T4);c}`_9;;+NU*8UDXSW5}zG<-~f{6HJG{~o?AE^+&X
lm9H)P&>i-$*opZZag%Ud97fraff(`w-lZUNU;L@4@&*J$Z#Dn`

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/Inputs/simple/types/b.dwo b/test/tools/llvm-dwp/Inputs/simple/types/b.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..a69cd69eb995177ad6c1d2579310f49c55a8a2c3
GIT binary patch
literal 1409
zcmbtU&2G~`5T0EpAz%qL7(ogG`2bSO!K95U2tOu80twUu5+VmKAx@nV;n=}(13dxZ
z!hs9$0=xn*fGW<s0B-@}h(OHPyN#K;Cq|mTZ@!t`wY>Y{*2Zlq1ZWZ9!aUO`dO45V
zY1jr6s?ZO5;g&u1<H;}z?WS|tY1m6iyd6HVSJ#@YMr+w#3WDkA;v8F9T?WtTkH_Hk
zV(30x@tPEP>C{VTucNM`-bGzQEujKb1qR~%clqE;>Bk@Wrt`J&`KLT%Kn07UW0Zs}
z$#>6%tfzDvt!mWNvj~-`Sb_?kS%9dC^E1~f8#DLQ%|m!b3$85=Z2-d&wO8)3O1mD;
zT&!L(ubMX&v9S!YPRSh$N3Dy6lNJuFSb!YUqykPqfhu7Etq{MC3UyXY0i4OsDCv3V
zlQ{E_b+qYw+gsfr>N}gyB5<-4i!AL%gTcg4CV6-m4x)UX?2P??2tCh>{b`;O3`0N7
z({25SIgPzV|4aLTnVfq;^*0z__ir#<_iNu${fq{06>yvtbf1RrVZbq}eI$cI^Yg;F
zkFdxw;(sH9axQawmUEwCg=0kitvWi&Ysa~-vA{9n-dEr$-Kae0)(|%shtiQEeqc4;
zPkHWX*kkWzcMo<Ias+)<DM8sK1vrdx(fTjbi@N`8e0`dAkQjwuhSn?EG2ul!AzU4X
VEo5Y`2}6lbPttxd0N)rce*q<jbx;5R

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/Inputs/type_dedup/a.dwo b/test/tools/llvm-dwp/Inputs/type_dedup/a.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..cfd54c5fbc1a2487494f5e222067c0c17db39ff2
GIT binary patch
literal 1449
zcmbtU&1=*^6n`_5=xRV(0%|KrK!mM_xZPTG{aCk(pY-6tdrh+08q6kZlCJJas7G(=
z*_+@&1bgkpKgCPIi+{m__$HZ2(lO$}2k-0me(%lOWGAogY~H1WfD{6Ks4|HHv}qL&
z@^AnKw7?6UXxp0lNgBkFwPs(jJ61bOcA{ricYUqb>8)7pFr1DrR@iEH1svO(Ou$(M
zFCLHM2%IiB!ywIq$jkESm(kup9iZMreTd3)4Hck8Fd*@-&4c;AHly?$VuaE;u)Qq&
z{^r9MhiZHR(deA8y@O?UKTbY^g^(B^d1oc0fzAj%h&o5~)j2vL49FrmzuPx;w%h-D
z>nH%d1Q<RrZ!TRmt{L<+05jqGQh>9x4YF{`)B$vpa}0ws@XV8_6c*4DGCx7Y1Hlx)
zPG{rHanWZ<;h)NA=({`HTVd?k!xu5wMT$iw-HJz})X&m797LnIo@cX(e~93?ZsJet
zlrV_=JkQtqKXV4#oBmgWzl>G67es$Y;miKV3NHJl?}>he2JZ!c@9dDsJ_$a;K*f0N
zvmk5;FP+MLf&~@h+@IW_+%H^dRqk`Fs2C@&3-ET?Sp+KgHNq;!nJ)#nT(p%|<t}1{
zmDMVED~RtZD38>Bp4UDhCHqHlAK{xoj-zi0<&b<Y$1rX>{}Wy<E=}hZm492j(2XS4
exG~?OX%mi%AEQiTAeQ_CKcXN-$^gG9s{97%fPyUm

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/Inputs/type_dedup/b.dwo b/test/tools/llvm-dwp/Inputs/type_dedup/b.dwo
new file mode 100644
index 0000000000000000000000000000000000000000..278369f80d66ad56d79c3d0de4e1a4e7f5870081
GIT binary patch
literal 1449
zcmbtU&5P4O6n`_5Xf+_a1k_cKfCya=vD@yV^`mwbKT+`Dy*6pOhE0>&WV;p*!g>;K
z>P_(MK?VH-#Djl>XOABA50GAblguR9G2+1o@9Xz|@6Bt{$>S@xu2Mok3V{w3nM48V
zw1~TD*aZV>;0FD0!<u@N*pEW1WuLJdRy~<)g_~A$t<`R{m#umbOh=~*Y^Avjj_r=e
z;H-cfjYd%jP7|DgA18k3Ch7FkXfL34QLmxiMCG}L3Q!{$kY8V4{rK`|woU1A#0aHx
zU>}p<+q2j2`&8o-h(_mxwLXPUZ+pFWU?C(1NZv6CX`nNL52D18ePxbL2m`W6PV96{
zo$Yi!T-pmjR{_Ht=EbG6#yNu?24E(fU-EI5woVofnL2=Oa*kne2A+8kmBIpALgojE
zcp#Vp*zs(X^d0ocB=h%WH1M3Qjb0GB_Fy{#J4>;sq`hc3jJ+f-!+to7%6T#yd%FmI
z$C-H3G9~asFU`}n{?8o6_NxC?_b+2r?n%+#Q24U{u7b;c>D!`Tp}|W5;5*wTvQL6{
zFi<gGdn*X*!b_)e?_oj3IQJ(vDAzNWTb26&D=NmxYXZEScV>ageS)xxappq-9?9Br
zt8y2y!t!bnJQu_l1(Zf=KhH~_kevNJyN~c$Ap6nRgmOr}mwgylo&N@}7MH5?vdX_L
jUg$~^tK69HQMC!j#g9>@F%V1sf$vd}EM<V-6jgo$BS(RX

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-dwp/X86/lit.local.cfg b/test/tools/llvm-dwp/X86/lit.local.cfg
new file mode 100644
index 000000000000..05f8b38b3346
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/lit.local.cfg
@@ -0,0 +1,4 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+
+config.suffixes = ['.test', '.cpp', '.m', '.s']
diff --git a/test/tools/llvm-dwp/X86/simple.test b/test/tools/llvm-dwp/X86/simple.test
new file mode 100644
index 000000000000..d7365c814435
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/simple.test
@@ -0,0 +1,98 @@
+RUN: llvm-dwp %p/../Inputs/simple/notypes/a.dwo %p/../Inputs/simple/notypes/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=NOTYP %s
+RUN: llvm-objdump -h %t | FileCheck --check-prefix=NOTYPOBJ %s
+RUN: llvm-dwp %p/../Inputs/simple/types/a.dwo %p/../Inputs/simple/types/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck --check-prefix=CHECK --check-prefix=TYPES %s
+
+FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished.
+
+DWP from non-type-unit debug info for these two translation units:
+a.cpp:
+  struct foo { };
+  foo a;
+
+b.cpp:
+  struct bar { };
+  void b(bar) {
+  }
+
+CHECK-LABEL: .debug_abbrev.dwo contents:
+CHECK-LABEL: Abbrev table for offset:
+CHECK: 0x0000[[AAOFF:.*]]
+CHECK: DW_TAG_compile_unit
+CHECK: DW_TAG_variable
+CHECK: DW_TAG_structure_type
+CHECK-LABEL: Abbrev table for offset:
+CHECK: 0x0000[[BAOFF:.*]]
+CHECK: DW_TAG_compile_unit
+CHECK: DW_TAG_structure_type
+CHECK: DW_TAG_subprogram
+CHECK: DW_TAG_formal_parameter
+
+CHECK: .debug_info.dwo contents:
+CHECK: [[AOFF:0x[0-9a-f]*]]:
+CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset =
+CHECK:         0x[[AAOFF]] addr_size = 0x08 (next unit at [[BOFF:.*]])
+CHECK: DW_TAG_compile_unit
+CHECK:   DW_AT_name {{.*}} "a.cpp"
+CHECK:   DW_AT_GNU_dwo_id {{.*}} ([[DWOA:.*]])
+CHECK:   DW_TAG_variable
+CHECK:     DW_AT_name {{.*}} "a"
+CHECK:   DW_TAG_structure_type
+NOTYP:     DW_AT_name {{.*}} "foo"
+TYPES:     DW_AT_signature {{.*}} ([[FOOSIG:.*]])
+
+CHECK: [[BOFF]]:
+CHECK-LABEL: Compile Unit: length = {{.*}} version = 0x0004 abbr_offset =
+CHECK:         0x[[BAOFF]] addr_size = 0x08 (next unit at [[XOFF:.*]])
+CHECK:   DW_AT_name {{.*}} "b.cpp"
+CHECK:   DW_AT_GNU_dwo_id {{.*}} ([[DWOB:.*]])
+CHECK:   DW_TAG_structure_type
+NOTYP:     DW_AT_name {{.*}} "bar"
+TYPES:     DW_AT_signature {{.*}} ([[BARSIG:.*]])
+CHECK:   DW_TAG_subprogram
+CHECK:     DW_AT_name {{.*}} "b"
+CHECK:     DW_TAG_formal_parameter
+
+NOTYP-NOT: .debug_types.dwo contents:
+TYPES-LABEL: .debug_types.dwo contents:
+TYPES: [[FOOUOFF:0x[0-9a-f]*]]:
+TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+TYPES:         0x[[AAOFF]] addr_size = 0x08 type_signature = [[FOOSIG]] type_offset = 0x[[FOOOFF:.*]] (next unit at [[BARUOFF:.*]])
+TYPES:             DW_TAG_type_unit
+TYPES: [[FOOOFF]]:   DW_TAG_structure_type
+TYPES:                 DW_AT_name {{.*}} "foo"
+TYPES: [[BARUOFF]]:
+TYPES-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+TYPES:         0x[[BAOFF]] addr_size = 0x08 type_signature = [[BARSIG]] type_offset = 0x001e (next unit at [[XUOFF:.*]])
+TYPES:             DW_TAG_type_unit
+TYPES: 0x00000042:   DW_TAG_structure_type
+TYPES:                 DW_AT_name {{.*}} "bar"
+
+CHECK-LABEL: .debug_cu_index contents:
+CHECK: Index Signature INFO                      ABBREV                             LINE                     STR_OFFSETS
+TYPES:     1 [[DWOA]]  {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+TYPES:     3 [[DWOB]]  {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000099)      [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+NOTYP:     3 [[DWOA]]  {{\[}}[[AOFF]], [[BOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x00000011) [0x00000000, 0x00000010)
+NOTYP:     4 [[DWOB]]  {{\[}}[[BOFF]], [[XOFF]]) [0x0000[[BAOFF]], 0x00000075)      [0x00000011, 0x00000022) [0x00000010, 0x00000024)
+
+CHECK-LABEL: .debug_tu_index contents:
+NOTYP-NOT: Index
+TYPES: Index Signature  TYPES                           ABBREV                             LINE                     STR_OFFSETS
+TYPES:     1 [[FOOSIG]] {{\[}}[[FOOUOFF]], [[BARUOFF]]) [0x0000[[AAOFF]], 0x0000[[BAOFF]]) [0x00000000, 0x0000001a) [0x00000000, 0x00000010)
+TYPES:     4 [[BARSIG]] {{\[}}[[BARUOFF]], [[XUOFF]])   [0x0000[[BAOFF]], 0x00000099)      [0x0000001a, 0x00000034) [0x00000010, 0x00000024)
+
+Ensure we do not create a debug_tu_index, even an empty or malformed one.
+NOTYPOBJ-NOT: .debug_tu_index
+
+CHECK-LABEL: .debug_str.dwo contents:
+CHECK: "clang version
+CHECK: 0x[[ACPP:.*]]: "a.cpp"
+CHECK-NOT: "clang version
+CHECK: 0x[[BCPP:.*]]: "b.cpp"
+
+CHECK-LABEL: .debug_str_offsets.dwo contents:
+CHECK: : 00000000
+CHECK: : [[ACPP]]
+CHECK: : 00000000
+CHECK: : [[BCPP]]
diff --git a/test/tools/llvm-dwp/X86/type_dedup.test b/test/tools/llvm-dwp/X86/type_dedup.test
new file mode 100644
index 000000000000..3005705fcaa0
--- /dev/null
+++ b/test/tools/llvm-dwp/X86/type_dedup.test
@@ -0,0 +1,35 @@
+RUN: llvm-dwp %p/../Inputs/type_dedup/a.dwo %p/../Inputs/type_dedup/b.dwo -o %t
+RUN: llvm-dwarfdump %t | FileCheck %s
+
+a.cpp:
+  struct common { };
+  common a1;
+  struct adistinct { };
+  adistinct a2;
+
+b.cpp:
+  struct common { };
+  common b1;
+  struct bdistinct { };
+  bdistinct b2;
+
+CHECK-LABEL: .debug_types.dwo contents:
+CHECK: [[COMMONUOFF:0x[0-9a-f]*]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK:         0x0000 addr_size = 0x08 type_signature = [[COMMONSIG:0x[0-9a-f]*]] type_offset = 0x[[COMMONOFF:.*]] (next unit at [[AUOFF:.*]])
+CHECK:                DW_TAG_type_unit
+CHECK: [[COMMONOFF]]:   DW_TAG_structure_type
+CHECK:                    DW_AT_name {{.*}} "common"
+CHECK: [[AUOFF]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK:         0x0000 addr_size = 0x08 type_signature = [[ASIG:0x[0-9a-f]*]] type_offset = 0x[[AOFF:.*]] (next unit at [[BUOFF:.*]])
+CHECK:             DW_TAG_type_unit
+CHECK: 0x00000042:   DW_TAG_structure_type
+CHECK:                 DW_AT_name {{.*}} "adistinct"
+CHECK: [[BUOFF]]:
+CHECK-LABEL: Type Unit: length = 0x00000020 version = 0x0004 abbr_offset =
+CHECK:         0x{{.*}} addr_size = 0x08 type_signature = [[BSIG:0x[0-9a-f]*]] type_offset = 0x[[BOFF:.*]] (next unit at [[XUOFF:.*]])
+CHECK:             DW_TAG_type_unit
+CHECK: 0x00000066:   DW_TAG_structure_type
+CHECK:                 DW_AT_name {{.*}} "bdistinct"
+CHECK-NOT: Type Unit
diff --git a/test/tools/llvm-lto/Inputs/thinlto.ll b/test/tools/llvm-lto/Inputs/thinlto.ll
new file mode 100644
index 000000000000..4e0840f3691e
--- /dev/null
+++ b/test/tools/llvm-lto/Inputs/thinlto.ll
@@ -0,0 +1,4 @@
+define void @g() {
+entry:
+  ret void
+}
diff --git a/test/tools/llvm-lto/thinlto.ll b/test/tools/llvm-lto/thinlto.ll
new file mode 100644
index 000000000000..5bd9dd975d2d
--- /dev/null
+++ b/test/tools/llvm-lto/thinlto.ll
@@ -0,0 +1,24 @@
+; Test combined function index generation for ThinLTO via llvm-lto.
+; RUN: llvm-as -function-summary %s -o %t.o
+; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o
+; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o
+; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED
+; RUN: not test -e %t3
+
+; COMBINED: <MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: <ENTRY {{.*}} record string = '{{.*}}thinlto.ll.tmp{{.*}}.o'
+; COMBINED-NEXT: </MODULE_STRTAB_BLOCK
+; COMBINED-NEXT: <FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: <COMBINED_ENTRY
+; COMBINED-NEXT: </FUNCTION_SUMMARY_BLOCK
+; COMBINED-NEXT: <VALUE_SYMTAB
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: <COMBINED_FNENTRY {{.*}} record string = '{{f|g}}'
+; COMBINED-NEXT: </VALUE_SYMTAB
+
+define void @f() {
+entry:
+  ret void
+}
diff --git a/test/tools/llvm-mc/basic.test b/test/tools/llvm-mc/basic.test
new file mode 100644
index 000000000000..edac65257e55
--- /dev/null
+++ b/test/tools/llvm-mc/basic.test
@@ -0,0 +1,3 @@
+# RUN: not llvm-mc %t.blah -o %t2 2>&1 | FileCheck --check-prefix=ENOENT %s
+
+# ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-mc/fatal_warnings.test b/test/tools/llvm-mc/fatal_warnings.test
new file mode 100644
index 000000000000..e9405ada7a04
--- /dev/null
+++ b/test/tools/llvm-mc/fatal_warnings.test
@@ -0,0 +1,4 @@
+# RUN: not llvm-mc --fatal-warnings %s 2>&1 | FileCheck %s
+
+# CHECK: error: .warning directive invoked in source file
+.warning
diff --git a/test/tools/llvm-mc/line_end_with_space.test b/test/tools/llvm-mc/line_end_with_space.test
index 2ce313990af0..673d05a68067 100644
--- a/test/tools/llvm-mc/line_end_with_space.test
+++ b/test/tools/llvm-mc/line_end_with_space.test
@@ -1,2 +1 @@
 RUN: llvm-mc -disassemble %s
- 
\ No newline at end of file
diff --git a/test/tools/llvm-mc/lit.local.cfg b/test/tools/llvm-mc/lit.local.cfg
new file mode 100644
index 000000000000..a1bda0f1fdc8
--- /dev/null
+++ b/test/tools/llvm-mc/lit.local.cfg
@@ -0,0 +1,4 @@
+# Requires a non-empty default triple for these tests
+if 'default_triple' not in config.available_features:
+    config.unsupported = True
+
diff --git a/test/tools/llvm-mc/no_warnings.test b/test/tools/llvm-mc/no_warnings.test
new file mode 100644
index 000000000000..973dc271a581
--- /dev/null
+++ b/test/tools/llvm-mc/no_warnings.test
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --no-warn %s 2>&1 | FileCheck %s
+
+# CHECK-NOT: warning:
+.warning
diff --git a/test/tools/llvm-nm/X86/IRobj.test b/test/tools/llvm-nm/X86/IRobj.test
new file mode 100644
index 000000000000..e6fa517d00e8
--- /dev/null
+++ b/test/tools/llvm-nm/X86/IRobj.test
@@ -0,0 +1,11 @@
+# RUN: llvm-nm -format darwin %p/Inputs/test.IRobj-x86_64 | FileCheck %s
+
+# CHECK: ---------------- (LTO,RODATA) external _global_const
+# CHECK: ---------------- (LTO,DATA) external _global_data
+# CHECK: ---------------- (LTO,CODE) external _global_func
+# CHECK: ---------------- (LTO,RODATA) private external _hidden_const
+# CHECK: ---------------- (LTO,DATA) private external _hidden_data
+# CHECK: ---------------- (LTO,CODE) private external _hidden_func
+# CHECK: ---------------- (LTO,RODATA) non-external _static_const
+# CHECK: ---------------- (LTO,DATA) non-external _static_data
+# CHECK: ---------------- (LTO,CODE) non-external _static_func
diff --git a/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64 b/test/tools/llvm-nm/X86/Inputs/hello.obj.macho-x86_64
new file mode 100644
index 0000000000000000000000000000000000000000..2b59a1cfc63b05797c8a012aed7da860c610a434
GIT binary patch
literal 844
zcmaJ<Jxc>Y5S`QbfnT}8LP7Bg!A8a2M$v->EkqKqktHM-6G$!+FC+*;KoGAG{27*(
zS}Vj(e}saCLP`-)-{fX7#*cxyH*aU=?cVI(zCOSA5n0`Y51g^dfdhw)!;hK&VP~>#
zcSV8W&?NSl<h2gGS|9|Vb;8|Bd~p*kO2B&DdjV`o$Ea><7`0CC1?9ZIBa5ASMrT;^
zYjQrJk)|O)=#`4QsdS(#el_oBv|e9rOOq1f2yQ`-XLSRV@qF7sYcHp><y6s&8amIH
z6P+c&n&=6oK_XMqT4M1r_#J{Cpo7h4fvBSmI2+d(=h^*pZCw7VB~|#K4xb!#!n;tz
zePaj@UQ}3VsxbMe!p1p0^5SJH<KBli>hRjxzEWPHP;#oJav?Losg}t-{5Z;}<gNWp
zvTC+v5B2q*qG}rJ6Y$zcWab`edBi^IS{d$@z8N6?agP5VHpw5f_wtNJFa*;##T<oZ
vn0%0U+iBv=k*@>U_|3MZ@i2<1yia-;Z!O41Y~IV!x}L}TBUeS2yuiJm{_0#J

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64 b/test/tools/llvm-nm/X86/Inputs/test.IRobj-x86_64
new file mode 100644
index 0000000000000000000000000000000000000000..8bfa857fa7d163f3b74daa2df6e179e34a5c44e0
GIT binary patch
literal 1168
zcmZuxPfR0K82_d-%$x0>=|G!M+?jEPqQQj2SP09yAp*tNO<04*7;jDql6qJ&w4}7`
z9;VQBYN)2H9Ehh*vIh<xFeaX-UBp_eshF6iA*6MoZb(Gp8ncH@);C1gi(lrw_sx56
ze!t)Mz3<JD)$0Ome%zKR00clxg{>pm4e%TR;98Twf!BI)Lz&>_WQDp;bD^tuiLCpD
zE~ZbUHs*BhHOJIl*Ho4Z`Nx>Rb!TXLAmS{@!M(S1cb^;|KozEYZl}02v44&s;mrWR
z4Ln|63_gB}jVOSJA=iXzRP30Z)}&aC@&{3|YI<sf*p&GSp%|KKqz93%87na|Q82Wa
z5s#6nl(DR#)j^bwt;bbkIfiuc*c7Wo;IJ(;E&f>Mn<n1`@kka9Gh%g8Y!>;3EHq6I
z-f(Do8YW*Z@lY0yOwU0{s6|EmRm=2X(uafq2?iy%jW{qXDPvSKG|fn3cHJ0_8Cnr7
zLrvxzSSKNMGGaN(|9m0-2opv5hi&25^z4~pt;E~Bs(3K@e3h_BeFv><8=7M4|39cx
z3}|{fZK0O2#WdPNIU_z(0@rcw>>T>m06vlcSnP-TnM>0A9m>~#FN?beS+n5pnaSl{
z+3XBAl%OI#@HI~a>Ur2IR%i#)VJ2({@QvKI$ZL<gt`*lu8I;mda_}OwfU&ZTZs*7`
zZGB89ujuQZ_h;Fgb$0cfy;)$B6KGXM%XPY=Bb<dL38m`vnT}RDlq{lz^v~*-9E$7c
z74LjR3QkYMsbkZh`lg?9akhNxlRMy7FAD24w7HdL5&)bn36$(r00t!N2Fg4rxFoU7
zXY8KFEhKJpecdyK{G}IP->1T#^%NTfgEysdUPQberUu?^Arrjk7JCBp=a%!Ux>HrX
zCzAJ3f&IP3sw!Fpr0Vnoi*2{qU+Z)dkk+E{lPI7MfR0IQTk)Puu<AK`_6$Kmi?isF
z>OCvaL4Q(3w_@l@1jPr@O3`=;M`ZK&V*X)9=l9zDf}a@Ag~zh@^HVxXr%+;h$=w0(
zZyful&JMe$#Bxda!Q>Z|u8VE!muuvBB|M%d$7`uPuIXD0N=I<Pp;e7crVLHO_Y#&Q
zfId9Fvm5a#a`{#8sg7F4P5$^~?Wf46^3VY_lRNKyJJ_(|(*22%r!b=v%Kk5u^@r<?
z;Em)iUs1U4-<$ndB{seoBX*yV3bC*o*zm!3_9T@bA$<Kdln--6F&7RCd=%lZeS6MF
zO%B<oWS_}M5bgzGE8ORJdB{oPxVyhCP{ZAO1@}m{l#>~l+gJBEx0NdaI6ou_`ybNb
Sn7=p)JJ_PqfB5$MzxFqnH)G=f

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-nm/X86/externalonly.test b/test/tools/llvm-nm/X86/externalonly.test
new file mode 100644
index 000000000000..c37412987865
--- /dev/null
+++ b/test/tools/llvm-nm/X86/externalonly.test
@@ -0,0 +1,4 @@
+# RUN: llvm-nm -g %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK-NOT: EH_frame0
+# CHECK: _main
diff --git a/test/tools/llvm-nm/X86/groupingflags.test b/test/tools/llvm-nm/X86/groupingflags.test
new file mode 100644
index 000000000000..a1a258506e97
--- /dev/null
+++ b/test/tools/llvm-nm/X86/groupingflags.test
@@ -0,0 +1,5 @@
+# RUN: llvm-nm -gjp %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK: _main
+# CHECK: _main.eh
+# CHECK: _printf
diff --git a/test/tools/llvm-nm/X86/posixMachO.test b/test/tools/llvm-nm/X86/posixMachO.test
new file mode 100644
index 000000000000..a0d114237ce0
--- /dev/null
+++ b/test/tools/llvm-nm/X86/posixMachO.test
@@ -0,0 +1,7 @@
+# RUN: llvm-nm -P %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+# CHECK: EH_frame0 s 104 0
+# CHECK: L_.str s 59 0
+# CHECK: _main T 0 0
+# CHECK: _main.eh S 128 0
+# CHECK: _printf U 0 0
diff --git a/test/tools/llvm-nm/lit.local.cfg b/test/tools/llvm-nm/lit.local.cfg
new file mode 100644
index 000000000000..c8625f4d9d24
--- /dev/null
+++ b/test/tools/llvm-nm/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
diff --git a/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test
new file mode 100644
index 000000000000..cb9560d74dfb
--- /dev/null
+++ b/test/tools/llvm-objdump/AArch64/elf-aarch64-mapping-symbols.test
@@ -0,0 +1,30 @@
+# RUN: llvm-mc -filetype=obj -triple=aarch64-unknown-freebsd %s -o %t
+# RUN: llvm-objdump -d %t | FileCheck %s
+
+.section .mysection,"ax",@progbits
+.globl _start
+_start:
+  adr x1,msg
+msg:  .asciz  "Hello, world\n"
+msgend:
+
+.section .myothersection,"ax",@progbits
+  adrp x1,mystr
+mystr:
+  .asciz "blah"
+  .size mystr, 4
+
+# CHECK: Disassembly of section .mysection:
+# CHECK: _start:
+# CHECK:        0:       21 00 00 10     adr     x1, #4
+# CHECK: msg:
+# CHECK:        4:       48 65 6c 6c     .word
+# CHECK:        8:       6f 2c 20 77     .word
+# CHECK:        c:       6f 72 6c 64     .word
+# CHECK:       10:       0a 00   .short
+# CHECK: Disassembly of section .myothersection:
+# CHECK: $x.2:
+# CHECK:        0:       01 00 00 90     adrp    x1, #0
+# CHECK: mystr:
+# CHECK:        4:       62 6c 61 68     .word
+# CHECK:        8:       00      .byte
diff --git a/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64 b/test/tools/llvm-objdump/Inputs/eh_frame.macho-arm64
new file mode 100644
index 0000000000000000000000000000000000000000..1c5413230d7aa512792735533eb7d939fd9ce037
GIT binary patch
literal 888
zcmb7CK}!Nr5S~r5BHKJgLD&)mT>`61m!Ly|2M;11qRT^cO<{ExT~YGl(PKf^I@_<v
zZr!>?KcH)WAh7!8?JTW!YGCG@?>*;pX7;^&etac~41#fpb210FI3e1A5B064f5K2s
zEUtltd;vMT)&nZ#+dUtNm{P^F^J2(;(UO$)SaO6A<Fhyo7(!F3-1b}b=A|6W>RED*
z*|eQVe?ql(m@`7pZFr@!uR6_cy;)HzcN)FV2(bk7($^R$>zVoorR<ujwn_~<D46Xp
zFFHqpp6ERTS!kK4CRZAE5K}r^(vg3&4u3<!d(L7>+#~8J)SgGq&iy3>0spc||M6iG
zqZ<@m!x?Xh28BugX*7<h*R1V0uGMv0PGw5h3Yh+M(TiXP^mJ7qqn9>P$u0fVc$W9?
zz@!#KnW=l@jz`tAV}?Eco|q@M7~nDAfhRVBcx!1!%xvxgO&(l~cVt4z623jdxz3aw
x@=w;7LonRs3ixV7kJAXn1M)h4n{o=T5|8QPu<E*Xa{PvOsN;c-`#NS&`vck+Q$GLz

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/Inputs/libbogus1.a b/test/tools/llvm-objdump/Inputs/libbogus1.a
new file mode 100644
index 000000000000..510c1455527d
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus1.a
@@ -0,0 +1,13 @@
+!<arch>
+hello.c         1444941273  124   0     100644  10%       `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+	printf("Hello World\n");
+	return EXIT_SUCCESS;
+}
+foo.c           1444941645  124   0     100644  1%        `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/libbogus2.a b/test/tools/llvm-objdump/Inputs/libbogus2.a
new file mode 100644
index 000000000000..2ccb7f31c09d
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus2.a
@@ -0,0 +1,13 @@
+!<arch>
+hello.c         1444941273  124   0     100644  102       `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+	printf("Hello World\n");
+	return EXIT_SUCCESS;
+}
+foo.c           1444941645  124   0     100644  1%        `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/libbogus3.a b/test/tools/llvm-objdump/Inputs/libbogus3.a
new file mode 100644
index 000000000000..f15a7329f9f4
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/libbogus3.a
@@ -0,0 +1,16 @@
+!<arch>
+hello.c         1444941273  124   0     100644  102       `
+#include <stdio.h>
+#include <stdlib.h>
+int
+main()
+{
+	printf("Hello World\n");
+	return EXIT_SUCCESS;
+}
+foo.c           1444941645  124   0     100644  171       `
+void foo(void){}
+
+bar.c           1445026190  124   0     100644  17        `
+void foo(void){}
+
diff --git a/test/tools/llvm-objdump/Inputs/section-filter.obj b/test/tools/llvm-objdump/Inputs/section-filter.obj
new file mode 100644
index 0000000000000000000000000000000000000000..7dc5dae26b797d196c5b64ff429346a300eeb988
GIT binary patch
literal 441
zcma)0I}XAy6mtlo3=B++jEDhs;s7i#aR^d@#KJ-{l!?o52#$xHk6I8CSn`XlIM2Cn
z_d8<@3Jf0T9B|-;xF&%mDhz=-oZ5i8@gB85`*01ep(^L?68+Hy7hnA|)G3Z~IQ4Ze
z*&4CnRL&-w|CJ4olB=l4<geVMm!XXNRuUQN66WL}sx(?&Y$)|#b&6ARMwU)WJeeyy
R<ojCw8|+8QzBOw<8(yns6r%tD

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj b/test/tools/llvm-objdump/X86/Inputs/disassemble-data.obj
new file mode 100644
index 0000000000000000000000000000000000000000..6381120a1c83ac56c75d637bac1878a83af0d054
GIT binary patch
literal 254
zcmeZaWM%+?J|JcTvltlkN>VFIplle`17kpG1qOozy_Cd~L?m%0ARAqs1B1Z<y`<t|
uBxP8c4M26MHZua%F@p#=fN+p)1=+~V2qIws!bY|c<Udq%AU<XRiUR;C7Z1+>

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386 b/test/tools/llvm-objdump/X86/Inputs/disassemble.dll.coff-i386
new file mode 100755
index 0000000000000000000000000000000000000000..c0fbc88036305c35cd29378d1eece2698757a0bf
GIT binary patch
literal 1536
zcmeZ`n!v!!z`(!)#Q*;@Fzf)*Am9Kd@e?4meDb^a7`AYpRoubob5=1VBePhcpeR4R
zC^1(dIWaFUzeFJ^RiUUfPa!i;!NosVAvZrIRgagKO9g7Qcg~A@{Jd?wFkDoU*2@Rv
z7nMXo=?E|#l$o3XQUh~QfGY!o4<i!;?_=*UnAigbCPi*W9tH*`pa=*9fr0>#0AU3n
z2C+bFpd1^Ri=Y_{AmSilCI$yquqKdNkOnXfr4$$#+(5zu1oTQ$D@uTNYl7?txeLj!
zAR!P?U|>klD@sW$Nrbu;C;(Omiex0<z`)=T+WqDQ2T&~b@EI@w@n8J^{~w8Od?5A^
zFi=Oqa18-i9<otjU|<A#5|*SifP4cWwg6%QP+9|GCm`Deh(Rh#^ipzi7}6NhLEKR=
HBtrlInpjiY

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64 b/test/tools/llvm-objdump/X86/Inputs/internal.exe.coff-x86_64
new file mode 100755
index 0000000000000000000000000000000000000000..0c38deb7cf0bb88f8d30e6f69f8dc73ce8feaf48
GIT binary patch
literal 6144
zcmeI0y-EX75QS%dP{G0=B6ho2rMmbC7ePoPSX*U7Oq-BwkyawszEdoH4=pTwgY}&I
z6E<;M5zGvkyLZmaW|?odxpR9jon|}LZILy1HzMQb%qPx84zc@^OF2%lq31r;D{-ZZ
z*o12jj{E`luy_@VfH_zu!^ce!w4d=t@i*<RxbI?D`-Rxd##LQ~XNBHkAMtS(y_@=B
zSwko&mT)pUg}1B}8Jea4KiBpZlB_#x6=7@LZ(Gwy2`B+2pahhF5>Nt4KnW-TC7=Y9
zKqrAxB!LUgm^`Bw@FSi#)%JqGdg9ES6i=?-!RoXB@!mAe%V71|U%#pSZvt)>hc}<(
ziR9ON@yTnGEdp^KNr@xIOpG5qx*C2RP)3V|3$X+xOed*w_KTV@|D@t;CetYcXQ`B^
V8C7!^ai&Qnjl*GIr0X9L_y!zxswV&d

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/00000031.a
new file mode 100644
index 0000000000000000000000000000000000000000..b784d8163f54f0d06633bf372aff80258e421507
GIT binary patch
literal 2768
zcmd5-O-NKx6u$nYj<Zl)RA%%fEi15@$r-Iw47EnGltyL3u{VP=>SWHy^9e3m2oYfe
zA+21htI&dWrHc?GqR48~&P4?SwUFGjs6hM9efN%&j;4mxfqTF6bMKkC=YIFRSIaZc
z7-PvvjHLrpFf5FvjRBJ}gfHzKd@xyZL4{B2=&mvyww|<ElvS~oTCC-EyTekZD2Csd
ztrm-;v{X^7Wj3+cYf{w`*VRk4b>|gV<CW&RT1s3zL^ESc82JM20-FVTfV23A-9WX`
zbulogUkK<m1DX~H>Ggh0!OmbnTOVr(`~1zmuJwV|fYu)jhg`ZA3?Z!c>po5A81BQ5
zKsG$5Bk?&M4p|kisl&)u5;jut`R$LzWfV1u3XPics(z-3LK9>AF-EzTcxn*tV%iLm
zHeD1A;l>$sl!sOrqN>exZYm^6h5<Co{W9Mm5~NZfl?@R{Au@cz7(;vyIpG)r9KhIz
zu{eF^#lS?BXDa=Aa5>V}b_Bj`j8UwopF1XO+P@WPaSlhF5zaYhggI}|>M89jAFQ+O
zbMcW|v)OAMO}{xJ)#moO5eU@==c+6KB5S2hV2!}MoE&Jz*$?4A=5foo7P2dZeHuvi
z31Pd1EllQxf^bZF*4Px-UG8cD-;G{hFvQfJ-mqT{hC1W<2R>WnOQZ)Gd$$|#xztt^
zd$+{raGycvzvZ#4ujHMyx5eXFl~?ae=5yi__&p@xG4a)nAaR=bt^g&E7cx6$-hVHj
zo}N!yynF83N6F(Rtc&L{?ca*Tvo*q=#CSZxS!wVRsVZeIdV9n#DOphBOzx&Wh5n?f
zK6E^O)#A+}x@-Me{GmVfXYYUBpY0BOjJPE3&u@(W%s+%wcj`c|>FkPe4>w6qAXfM?
zuz_$V9G3ejbp`Se58`_;ag})MMU49I1K=P~@;3VT>&xf%%E3DeUs@jq3g;znJtCLv
zzqEfV(lQ(!r*$#HqOA@`pPJz^H2QFGZJeG3Z?g2CG0b~`sGGS!AoWVJb4@0GF5-7C
zd<q8XM}e}h9)#{@kFw+&@WRFtE^B+~zv!de?VqRYXsmI3ll^n2Jfpf1&j`V@72=w#
z&!*peUO{6Y?I+0xay70;N}d%XKzcQhcqe&w_F7TFWxxI2{UOiK%=)e+$+JSNqx+XU
gqy3u+!pA2kr&t1>jZhxMiEA;wkiNDTChCK~0OCW!rvLx|

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0001.macho
new file mode 100644
index 0000000000000000000000000000000000000000..d81f9f00f4a7dd78ea805814837537a9548f54d8
GIT binary patch
literal 9248
zcmeHNO>7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z
zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=<B$X5v`?9x
z`R2`h^XB=@YIY{y-~IjXgN&`VF~;seiX+`Z#Z|Aw*u%iz?PHAd`H5HO0m4EZf+8^}
z*a4(=spH($SKNBTFyHfsgsxvUQP}UXpH=-b3Y<G`IqUdUVZKGlM{^<}91}(F*)I8%
zUhwf{-7M-Qv<UN=lJ5~oLKtqTOOt(e_I!>Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc
z8OI^Quzl;2?><SO@W1epeQu@LY~KfxPyK!hx8^HbmkcY<4QtW%>}`yfk0oD95)f|i
zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)-
zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*B#;`9OMI!Y#|sW
zpHavx>Gacv`4S;M!n^;jB93j#@!keuzCOu!Lr#cr$UZeyFPHW?x>?c}@j3|eZAd=l
zKf*yitUuYJVZKfouK{8V^W6&Z5f1W!WhcIT#?4538{=hDww@KkGr}Q0sSz2)F|8ra
zrzWRgni!u{@8pEg`=d*o*e~iSPJ(fFKkyI|tshZO&luCnPDwNET*lN)BdfWFRq4-H
zP)vGH_9SD~0kmWZG0`~kDxi{D!c(yu$jMHM1sY?Zhq1%R&w=13hNAbBnlqItQCqT0
zy%SMvl3DbZQ)<vX3Pj^v?|SWzFTXzi=eu8|Kbn7{Jk*OixRkMu4lg~UFx5dik<KAe
zUmBmq-va%yn3k?Mu3pfljBF`Ws%WR_RdmTNU2wG1cG<%3Y1mfMdqGjH_KIF|P~&K0
z0X^u5e+bFvXFmfeKf}XtEv<;;h;%TtB+dQ4f?bXMgo(+~V9%$}m*#sXA>LU}OU&SZ
zl4p@Te*h17HPIvTK;(hQ1Ca+J4@4f=vj-MZYrmwf+>WQ#E^npQ54?vxO=@l8PHO$+
zDiyC?-rA(Se(K7%@w4A-AN>^NscVnDk8C8pee^2w+A<#Ow_mrnw++j18SS;W*kuhL
z9~>Tdf%XmHVn?VQB$(4)g}#wbAg8n>zlW$y`^Qf7jC;KSK=Y87q435Wi><_C@y>(o
zH6jnmo03QEOO<*Rg+7^79p&Ng%%E<kGVOzCSoOWDzAM%DeigcQnUMLS#wmK67r(k?
zLGi13#JgHH^%S)|^7ieq-R`4B+Q<Wu2O<wd9*8^;c_8vY<blWokq06VL>`Dd@c(+C
zt0z7;UK~h|&5ftWiitCahxyFti(`CjY9yWJy?i=7lRdBJTz2N-R;(tKFHg*mG&QJp
z*Ps{I)Ow-lR(LLxTh>{10|y+Q#6GJ%IE(|1ymxocv$!tCU3+!oFQu<Bb}<eMyeq<j
zvy|4Ye41|Jo?}o6i3$5y2ivDi2ppUY1x*XopK4>9JvbPt;rhsv&mR~0-Jd$?y6G4a
z?pJ(KCnMgK5NiD^IjYF}&BPr_5TAl>u`R|vnfY;nnh{YF4|TqA6Sy~kN#Bd4{_lbC
zG#(270GvQlavOz&0?4U-o*q)}Mfm|FHQo`F58=#0or9c|Se=K=0pm~2lNllWb_jnM
z!q)}%{X@qi@~CB)$_b_|IJg_rW=poKXB=%@cP_YgQQRyR5FvKpl2HpBf+Y)k=sH?9
z%)B;fxq7Lf=Z%c3I|1fo)>F^vCPvl+<4A=FZKvO|O9fnbt_%(G!4qCHaSoTXS^w+x
rC`!;V-C5SSK<a9#Xc2+`8Byc#{`wTv!NKdQ=0&;F5^GwS6jJ{HWZWAQ

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0006.macho
new file mode 100644
index 0000000000000000000000000000000000000000..13a4e9aea7fa6931ff7de9b42a0269ce74effc94
GIT binary patch
literal 9248
zcmeHNO>7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z
zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=<B$X5v`?9x
z`R2`h^XB=@YIY{y-~IjXgN&`VF~;seiX+`Z#Z|Aw*u%iz?PHAd`H5HO0m4EZf+8^}
z*a4(=spH($SKNBTFyHfsgsxvUQP}UXpH=-b3Y<G`IqUdUVZKGlM{^<}91}(F*)I8%
zUhwf{-7M-Qv<UN=lJ618K^ShSOOt(e_I!>Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc
z8OI^Quzl;2?><SO@W1epeQu@LY~KfxPyK!hx8^HbmkcY<4QtW%>}`yfk0oD95)f|i
zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)-
zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*B#;`9OMI!Y#|sW
zpHavx>Gacv`4S;M!n^;jB93j#@!keuzCOu!Lr#eJLiVYtdbzaE(an;+h}S`wZ$t7a
z{}B%IVg1P#4fA!%cnuI^nD17Ik8qF=EIaY#Gj2xO+ZZpKvh}POo)Hf5NsY)Tj%f{X
zJ~cW0(!}_rdM77@-XC4!#C}mvaT1KX`+<j$X#I$Kdd8Smc1oIQ=Q5^d8d=RXtV(~r
zf@0EpvL_j<4xlAVh>6CLR{@pO5}u0PKu&g2EYKJOJ&YYjehvgTF%-S0)SRhIiQ1A~
z>Ya#Ulgy&OoKl1CQ6L)Ude>`zeEIe9Ki~Z#{n7jr<)L2G!KI9Kba?3*g{cnGiF6K$
z`qKC;{ubzu#k6$AarJ^WWn@d4QbjvWucAwK>4Kx3w#ycNPs6s7-V2IqwO90#gBnL0
z3+O>d{6k1SKl>R-`57LDYiUI!N2G(HC28*W73^y4CrnI^275k*zBJ!E3GvQ)T4DzO
zlRS&$`2%>stBD?w2O<wd9*8^;c_8w@o;|RTTKgq+<#s%^c6lqce&9XqX;NzwcT(#o
zSE+dI^42Ep^;1{Aji3Ey`{<`APhETLePko)?W0$b*Ou{Mzx}$sy=_>A%V@8~#V%|3
z_~7us3$$+l7dt}jAi<pWD)fzf0y(86`8`Bs+CO%pXWZ)z0Gfxq423u5SZpO8i+3Ju
zuMv4j-jqCQU#irrDD=ss>L?F?X9jgUm1!SD!>aFH^<AmH_p8vg%Y@7qHBQmny!h2E
z3yNROBi_}rsi&yzk+*M;?RFn6(ncPLJP>&x@<8N)$ODlFA`e6!h&&K^Ao4)uf&bS7
zT|M!+@!~*wY;HU~R!p2ZJj`cCUmW9OQzPj#@8#3!ne2Hz=dv>ww_-J^e0gGiq^Uu*
zy9T|urq&BZx59Io+_KK98#v(bB=%YD!C@S5<h{Flp2c-B?%Jyxe<^*9v5Rq7;9U_G
zoTapG<<oQ%_Z)*tNKDwrI@msCLg3(JC}>)s{!|;=?7_iE4cAAWeEzt|@BY+D*G<Qe
zaKGY<IvMe<giz~W$x%h#Zzk?gg7_44i)}IX$;^)n)QpIdc&PJ@o4~yRO!{6V^?wh9
zr}0qu2jB#flG`XG6hKbx^YoB%FUk)fsqv1Wd<bV2>Kx>x#Ogd`4j6xGp3Dg0w?p{D
z5WX(3?;koIkw-1VR8BB$!NJ{_He0e?J>zKOx^uy`i{fUnfC#YzmyBBA5G+~PL)X!=
zVdk|-%hgK-J#S=O-3c%!vz~fRH!-pv7)L5ZXgmFuT`J(hb7g3d51#OviF3H5&H7)j
rM^S=~>CUpo1yWZ_MT-dh&xjg__t&SW4h~*dH80AYmRQruq>%atMcx|<

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0010.macho
new file mode 100644
index 0000000000000000000000000000000000000000..4c7e78d93b62d51b57e665aa3b8ba6b224481077
GIT binary patch
literal 9248
zcmeHNO>7%Q6dpG%4I$7?;SivJgHlwW@usMa0#c<oKa~R^N#g_|P&M_>CfRnq>w4E}
zjHqx_iio00KJX)PLWo;0J#ayQs00UY9N>@>>H$!Yij)HqRV4`D+nKS~>$v2=1!<o$
zJM+z(`R2{@o7L=0zQ6PP-}@O`?O=@EgQOwdM8y@q#Mr~Y-|b<H^ZAKa=K;b(9fBe;
zDA+!vPO0PEGgiDt!!Y0T2ZU}wHc`0VV=t@L%P4T}dgZKJuL|=mN<Nws3E`M1`p-_u
zr}To4FB?|TD4|7|&yswPNDjhqOG8@hbF$}hykL0CcBWvo@Vz7X$VVg!3l!o;CFee0
z&dRth5r*wsmwfk00)_vDkL>d*#a8=1kbLU*Q@A}}*}iDnd2ZT^j&E;synHPAQj&mh
zlm8U4%sDrf`C^G&=Sz6Lo06|rwkJ$6+)f&CJf56S23o23ZIe?R6R+|E=eEV&N+Ii5
zyy%tiyX?%T=x7-Q?V@-K#v$ic=5BnNBvRuO4)Q7c)Qp00VwtWNwy!tDM>xm_9@#=L
zN<OoYSu*IS4fDlAe1z}*yNWn=Y}bDqg!zt0zUy*AghTeJsrtFJ&o!))v540}m~TV!
zDgO};@?rhS7ESYY%XkeCW0>z|h>viP4=g+J<uhJJ+S?p2o3iz+7@qCGCp99YIHonk
z`PAg}OB3Uh>YW@H`ugY+$M=eQij!d6-48sBMC(V?(=*2OvRl$ECzr8w%gpMYX;%jF
z6%-TxlRe2;br3CCTuf9$UIkQAi~A~e9XZ)au|Q)C_Az!4`B@O$z)<v_Qgfy<C2C7{
zsdplZEi#M#a!O6QM}cUZYdx?1@#WXY{(Sd~^hfhgl!yCK=exM8i!r2U6h=<cjdT`?
z`qKDJdkgeOV|u#cdPYH?GP9*jsiL2vSJ6eMbl%lZIb|Ear|H-U{{=<0`YT4sMUAVE
z1@xdJ{$Zp#Kl>R-`57LDYiUI!lG4G@lC<{w3U)R36DB4`gFT-@Uz+djxOithEir@t
zNuEXW{Q*4a*F=xV1Ca+J4@4e_JP>(c*B)3%t^Ja^d`nBMUD`^m?|Tn>n$+6F?bP~-
zRVrS+w6#fl{nX`ewKLyrAN~~OsjH8@k1UzqK70jvZ5a>t+ppW(+oo-LjP_bw?6O9V
z4UG)GK>G%8u|w1j63l6@Lf^>8kyBcd-$PWU{b)COYJP73&^+X4D7-PpVk=rq>)zj4
zBl3{EDS6bsRH;`{I3km(qdfeb8Px4mrhO0%tG;*DccuE?uR_-;6SBIfaf<%trC!~(
zp!n51w4Sz2eMN1Ty!1U1N#udZ1Ca+J4@4e_JP>&x@<8N)$ODlFA`e6!_`i9er%#(3
zFAk>1=El=w#rWxiBYbA`#W6lMl}x93Kc7y|WX~BnkDb1-6{|_*%M<g-mIl?{8ua3t
z+9(vg3eRP7%Lc1%;DE!I*k`o|M{vNA_wVj`7T3kNYp-tnrSvt%E@-g8zalKSOKHQ-
zr|Bl{IR=%Gn6Qs^u|3L!z`@CI(6m7P$qu&Jhl7zCu8(~A{Be=r`Kg<(n~oykex)wz
zWW>J`LT&wuk1F!@X5tPdh)+Vd*b!r&%>1}O&4?(0hdST50o)(Jr0+*k|Mx(68V`kk
z0FEOmxrIVp0p!#^PY)^gqWl1o8t)Lw2XJPg&OuH{tj<H`fbpm1$;=RbJA^+B;cEid
z{X@qi@~CBm%5kPIxVRhBXG@M}WL$mRaL;>AQQRyR5Ft+Bl2H#Ff+Y&O=sH?9t-L;I
zdq$~X<jss{xB=!w)>qFN7DhG#<4A=FZFj(SN(Eeat_%<Hq2qotaSoSo*ud+JD2mfD
n-5J)rKpJYPXcK||8Bz1__4O&LgM&9zt&4J}CDyVsDWv`ZO<o%c

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0040.macho
new file mode 100644
index 0000000000000000000000000000000000000000..f0765a4ce51a83885b5a8df33babad5c9c35f64e
GIT binary patch
literal 9248
zcmeHNO>7%Q6dpG%4Ixmca0pPqK`9E*cvI9y0jW}cD+fZ7#tA~8YU-a&vh8};^{&+z
zQQ@c*5k-}J;78(w5Vu}>;DP{A2@c#iz#%8p1E3%kDF-B~N)W!cGqYZ=<B$X5v`?9x
z`R2`h^XB=@YIY{y-~IjXgN&`VF~;seiX+`Z#Z|Aw*u%iz?PHAd`H5HO0m4EZf+8^}
z*a4(=spH($SKNBTFyHfsgsxvUQP}UXpH=-b3Y<G`IqUdUVZKGlM{^<}91}(F*)I8%
zUhwf{-7M-Qv<UN=lJ618K^ShSOOt(e_I!>Pba&ax6!a#(cO)PAh(uw5Lfok2+~doc
z8OI^Quzl;2?><SO@W1epeQu@LY~KfxPyK!hx8^HbmkcY<4QtW%>}`yfk0oD95)f|i
zpCXny=lU{VERpLx3D0*^@^#DhgeitwNj;9oN9IQYtyKKB$SIDASNVZ+%j8a_khM)-
zbW8YMcIH!bw2XpwQM?7?kaIJ$8(&-!sqqO1`ILQXM!`5S4aW`J*WJJ;ED7>~N45}*
zlFuk)mUQ}Q!+ePbKE?>|{=14ewk^kd8-)4#B;O4=A;Ka1)KtA(+UMwINngb4Ak4QR
z`IP?%2l=r6WQ&IRI%T{Dh%wA}E5t`Q$Oo34`0^PyBkgUBmrdDvRt(PwhxnvMWE97=
zhB%*^oPKFyd{VuW6GHEgE^%VNsHZpy#@+qELrAoKL_IxYOe;Gj&9rkFQ!|aM<{DO|
zKVLyH={?z#j8zBFk|o4M<H)OkN@@vD#cm)cJ1G`ujDa4;4kJGYf}0qM-cxGMRHj62
z$u9LyM6pR`(O*ufLH8&SjdQ*0wLiZ6`uLykev$rY{)zHXFY4e@#yUE@^o+t(2kAsQ
zheUm8d=`HT^v7aay5hKcL7OtNrA(=!ou*gOCA)ON(N5cC3%{pfTS@N)MYY;1ddWeJ
zqm2dhpd<bvB%h!C45a)F55u*zB9bH0!O)U4_xlQVHTDxGCP#xkpF&@n@12BrXFV-3
zga1jMMe_UsJmA$tkH`a&2O<wd9*8^;d0@{TSV*n?lDcv`o?5%Sm0CaW9`-b;wTU~a
z^^>bqymonOllJ<lE8oV?ezSe_Q<SH!J@!7bk@WV_tH^81c(C7o-QM0dEW>5A*WzNA
zHGF(<c;E%vH-L*Bp>~j9PJ0#lMm~X@(vtihqB89tJJB=l^#%aVLtci$8*?nS5|70@
z54P8cJS1;Q9<?u3>Qxl_WKwmMhrcs}x}D0j529h!_pbV`RNwnm=-OpM=8GDq=xtv7
z>Xrq?ujUc&YT49N)b_~Rx5sw7j}~bo4@4e_JP>&x@<8N)$ODlFA`e6!h&&K^Ao9Tf
z>w&JG_}qALAU!rWo*pYE&Kw@*Govq#@v*6qbei|_>GVwYyq<H}nTuPonpD0#F+bAO
zpxRx7UR+b_g`!*GxlC?ZXVncHaCj2?toGnA4mk4O-969Zx)^uu)s4TDzQ)+aI4tn4
z2n)_qTDS6Px`}&^K_w(6>|-5lpE4nEa55A$El_`|jcxYeV5ElYBTqhmT;z9u>ZI$Y
zV@SAP@kO1CcvnKG^{?cpBJVd7cPK%83cAI%82e=A#|3IeL`gi<`NmD)-T)?jFOvGd
z2g1{MDEtF(0!hhj6cP#`r}lYzNVyl~2awcwM^HY5GYfSNa#CV-9x?}vKQ&Kggz(!T
z{9y=R7ufd?9goPPmSHL<n6}{HZcLjk*{+^(v~k_J;MzrTvsgfc*nvw%EpP~yEbO7{
zXxT9H+N9;`rGlO}GOq3fn3GvgJ*S%(Sr3dO6(Y2qe#<TuaN)T!G{^@}c+JE)T+(L!
tuh*j}LC17wS>pn!tEHkv1pa44jl=uvQ&a~BudA9D<xWeiX=PGK{R3Ro8tVW6

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0080.macho
new file mode 100644
index 0000000000000000000000000000000000000000..53e3a97b4aa96e48353a6d5d5fe879fff2271a85
GIT binary patch
literal 9166
zcmeHNU1(fI6rODw8)LNFDj2ouR#U0k?2XcvC=~KrHX55Yn}|P@>2CHWx$WJb?!8O9
zVsS&Q7(!tm{DD4+_|}&`_#z?{^uaeD^r25u@P`N$f<9OZ8ox6?yR&yUX^YiJx`)iq
zIcMfOXU?6uJ9j4ET>JH}O^nU8Fvjjc3L{;H;Ebp-_5k>=H!#Ne#PBN<V1h&)Oj0}`
zSSwPS(sAzFC;e)}Al`GErLJ2w(R`)HMpmlSQQ_S43rVjc3gS&GJQ@=b@gZ3id7Hx1
zdWpwpZ8vA<(ISZFD!d032Jr|>mFoP<CXY_>wC&Gk5^1{*?;V9lF(T5u<O0|baxU<u
z+=S;5V9>t>g?E<%(EN4q$UncBtM~5%g{PmN=2ylmWKKAl6n8SyS>bQ(eECS>#S{SX
zHSyEHs^r|B<<ofzouJ@&7ZqNoYEL}P;gzU5kB9mv`i)k4{;p8cJSJQn2hKAt_loIc
z*5x@rkLS`DPs7nNGX0|YX3j&--Nb5mVFje^6K~>a|MZB=dEz>rAM~&DH`!9-bq4UD
zRTgDCh3BLbGd4ZiAYLTkAMvX{S22$T_yuW#?L;4QOe9`Sr@Dzk`RCbg-k!$mAn4yE
zU=!)_5^vhV`co}rJB?S!DRJQWUJl?9Z{h)^5nd|cCxn5Ze~YU1L1~^5AHY)@S<pO=
zA#pxBa^S_`p%MK~j!1pQy5b`nrJUx8Iq&WU-;31AO3Ta3)P}SnW)-}=<z}Z6uH`yO
z%Xc!x?o<)gsL0u4jFoz*c|;C0jIsn)Q#m43>;g*iljZ`g(Vp#$ZAEzm02g73-cx$a
zRHsC3$uIp*M753%qQ4qajqV|EWas>r*T|ti-u*m&ZsO5GZx>_;DP!&JB0Z&fl7Tvq
zjv!HAvd_YA0e)A=iWfcKPFtf+GM~s7t^M>WI+4vE^Q`^ZLI%&%$!4PB1w~TpWjpUd
z;#q@+9&m&&<E+=kkC5m%Bf<zRt%zv93NW-p_5HqpU$y<Di_rnI=M(5l<GmV@bN)%i
zGsK_hX(SO3@I6A(%$gBcKN0v{yvx?lc$y=*i3s2gc^lH=YWs!TFdjEM<jL6lPqEWi
z!m;^NOR<I4_pm#O%@1FVE$o@2>e*9Ei?nNxo&G9(_{-%TpP)W^_Tl$Y^v9QXoIzQh
zMaF*l89ON_<M@nrO<eAm`kvX_*YiB>0D#4|Q9F=X(yoKPc6XzsG^4(3s7_C|13kl{
zHyFU&Ckj;F7-ylA;ZV3^Q(Kw91N5fC(K}47o<n7)N?Jx`@QcyZHLBB&iOlLRSN(;k
zzuZgEwW)-nLJO4|Dzx<{x?dKJa*eOY5#G|MRaI72kX1?J+6|a)+;J5t?Lya<CL7b*
zI@Pq^jKF^t0s4?!dusk9ul+Ol2OiKVOOw_z0{UEjEq!R(zV#8f&gSjb8*CyrBfui-
zEC!FSv+?|&hofSx?#&u|`hzL{7f+$KY!8nQ<$B_S<3sVmT;$N!K0Y?^!XO_U?T^QK
z7e5dmOCGhSe0J#gQmCwyFAY!h*EJ}0mZ6uI%62;E7x`3TYSw0@OE|<36#BG$ZyydZ
zX2eZ9PvUYHH{+#Ce`<Z1vEyNQATIRM-b~!iq~Zl!pFhi>k`$8uv39mWyI>rS^qQ_2
z@_j9AaXSt_%DC(i^w}e_y!L$uT@LL+GNY_dKg0zNbd}vV2~tSxRm$Xzhu|g4_Mx0^
z39*mIzMCWw0Y#DNGmMMiy9}Q6T}b-B6o@~7O!MD@k05Eff=Wa)D5-slGDUZyeh-qi
zw+;2pIKR+m9eWh7&pgJ#<4=#1IRXCd5NkE{9|rX2C0~gT9hInG<UXoLm^JC)Cd)dQ
z&-!-4vxaQ%n4itbd%!eiNY=O#w2XtUX!-`a4ip?WWsPKfJD;{wPQtf6138)$>T%nJ
zW!tc$6mn{N-I;7YjqAyiy?goI-J+R1Yl~)C_v_VJ6rrQI!>o3JRMkqc!VJ7+K(*#8
Y>r;wN<5gAtqHMIpurgCME0d<w-x2X7f&c&j

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0261.macho
new file mode 100644
index 0000000000000000000000000000000000000000..a573da3b2fbcb4f805be745cf7aa6c28ff2195fc
GIT binary patch
literal 8752
zcmeHMO=uKJ6s}p_HM&73Aguq}iV<~ndIV8fSWG0tCMYp#f+9#cKc#8y=^nd#G`Z!X
z;4UsXF6_ngp7yW@5rl{bk6t{8hrR9M3W6sOIXHe_clD&FlNE2WP>;NN_0_9-ullQM
z=*fGw_~!i@AquO6nEOHqQG_n67UH(B&=X=abQCJ(#PE-mi<PsdIod0QUbrG+oEMlV
z<z(f;Wbc%CJrid|W9&_Qsg-UiBQ+DTmq<R|%?)@MxHf7(aaIbe#atFk8Ak0|*d5Bp
z`$@<9O3z4fx}Z<TOZ`baIj!853NR@jZ(hgSq5}|<_pU74Up<Th$GfU0<m3IW<E__z
z;;i(<hC|A>cgyh_(($hNG44H&2Ra_loI;$Bk$94p=lTjN<;d{laK?l4>p9o-d^6{Y
zJJ*dZrRU0UwpsIC*@}YyjF*PtMdkWb$bXzq4@l`&KNhcu^_<_Q=M!h+@!n<e(tB`f
zZ2V+pWGubIR_-~0DP>I><7D@>3-4K(6Rae6AM7S#EtZwMgJoF!#r)9U0_cYw+9||a
z*sq}zP|8}Z1=w}akDXT;>+yOo6W$*S${cn(3~bk_*^ODZ0ZiEn<v#9L9{=&}gQKs1
zz5L75-@adSd^g5G9TFombbpFD2HA_}<+}WP$UMiO^d<X!gl!d;awGq_*)USg(uorG
z;nXk@vIok16IR0CK8-H`Q@(@JPt4dx3<HJ%!+>GHFkl!k3>XFs1D~IPsnNTCkKX%%
zAFr?YHGXK}(v!~M9D1kjZNC9Ce6BNCg`J;9BmVuT)9H*V*Yyur=l#HK?8KL{5Zma3
zWG%m?`K^5zmijaL6!!UhrXMSd@oJ7Pbz+3(zW)7}Vq*;hh5^HXVZbn87%&VN1`Gp+
z0mFb{z%XDK`2RC7u&a26PcbmWVqaVgmo?OfQM^7Ah<Ond2Zm&;9fjz}QjoGaE!B*w
zw<Co*ca>AW>6RSd?MS9A{?*U2{YEJNRnZkrOlyi9Ea&i1;47;N;?DT<Df+p%geI-)
z{|UT1gXw1tl8Gq9<7l$D!9gt#VRrzh@fLtL6j(7#);>>ccg4#bW_^{lSI7|imawNn
z6@>Og;76(&+9N8w9{DXha=h7t&1!wx&vr$#>UgF4CoDSAcHD+N=0z%Ks)kdIRG0}=
ys>SZJ%0*7K+;O^gP1U3Dpyvn8s%y_2J0g!9w&Uc!khE#L!f7Vwz3e{35B&jAYWsQs

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho b/test/tools/llvm-objdump/X86/Inputs/malformed-machos/mem-crup-0337.macho
new file mode 100644
index 0000000000000000000000000000000000000000..cc438414f1121968f2508320693ca7aed318ce52
GIT binary patch
literal 9248
zcmeHNO>7%Q6dpGX4I$7?;SivJgHjZr@usMa0#YSAKa~R^N#g_|P&M_>Cf;^`+}*Vr
zBPv{#BBH305Bx}+5aQNL4_pu+D#3vp2RP(}dH@upBISTYRSC>{^J9<KamWF2+NaFU
zeDh|$dGq{cH9M2<@BaSx0YYM3gphk6X^?I~#Z^`!<YB<y?IVQJrRi6f0E7iQs3I^>
zkZw}1_Xr)Ofw2}e8;1FwKge|>qKU)^lla(As+dWk)DOxzU&1!~Rs<i;2?cSC7umB%
z@JT)Aqea6i86LC<+h+;BM+66Aa7$C#>~nMH^3*nhqLa0aHokWRU$<b8SQ55yLnWn*
zFK=ai9|^<uZ3w>m1cAi=!iV++l~TKX9|%7A`$^oHuk2hjodPwT6_?rD8ZRFUzO*1f
z9G;`(#+*{4NLM^`9h2~Uw*=n+R};h-!=0oV$5W}Ll+sGZZ-*S?7<r{1D0M9ASL~c?
z(Nf@nojdbMI$TDoT^Mg_98zj!cjMCpksKef$|voUGg9NkGW{TI-#~~DvC0QLq6KFZ
ze5RdUHSnhm^Tk7ah<E>8c^tcr&)x=MzG1<4Lre&9$UZq$mJ9oQ!}5$3cpZfKHU*#b
zA7Ygc)}Lt6GGD(8uK{EX^W6&ZAy)Z-WhcHuHpmKlTjOO*v|ixD6T~4tq2U?EF|Hv>
zXJ_VKnx2}G@8mewH%1pYzMt1)oTzbkKj3jlxPEv&K4U~L`<`yO`K+Z|W=;=Gr!rEg
zKrz9d<VixRqtKGX`9w9ys{p0c;!H(uK#q1|EZ`WUgM=J{{45aMgrV>~CFhJ~OxPCf
zlJ7()w#fqY7gK7{JqifNx!(WUA76fb?9X?<$b7W)M0tD&>cFLh^!Bp!jKo+6(g*1*
zB<zdh6YVXaKN{0B6+bX+eb&r*S+Am>!dKBn*E{d)r`)mwzo+Rs3HE}*TKyHn^P$Gq
zClx)=A^$j}20!^3kn}Tn7+i}hB9Rgf1}#c^zpucq)_&Z?#Dv=ODd>yyy%Xo}tfvJg
z;D3~7Au)dd9%VJrBl1Axfye`q2O<wd9@w)7mecFMq_5o8((9MD(;MCI!Ja0)K7A*>
zapE!-uU*>S!o7a_%D38?Z|X-rh4SpR$KHo5m8l=O3VE#v5Axfu^?KcO%z)rti}GDo
z^4M5%^ab2E02euo?LgEV_bT{}d>nF2tKxeI%ea5+gPt1e4FG5!Vi^?PSR}DEEvEGy
z=&2!jNZu4Ya$hRdm!U8$lB~l#{GF-lb}HjO2#1y5yYjnIe(zU7*CP^S4UyxN*yg2C
z-LW9~<vg_hj!l`OwnyIKJ+|9@v`8CyAo4)ufye`q2O<wd9*8^;c_8vY<blWokq7=?
z5A+Xei&Lf1%;e%!X0jANeJDxiCtjSSle4Kzh7QrW%zW;gkq^k}3)``pP`*69lxk~G
z9jJkxUsD@)DX7qVHeWPIbrTLan8ZG-J(z?8jsm;8r#ZMThP(Fa=3i1@Bjkbx7O*Qq
z+h5HXP9cLganBJ@afxyJNH5tZO;8R_##Pgl`jcH`YY+}bYH)qT<nzaQe)p$7yly%Q
z3GP=KB2Pxxl@Qc6uH>+y&}hc*P*i*pbSqsk^2z*<%h(Kw67Z1c8#e(DDH!!bkmUb8
zAfAJV#6JLzLy~eE3ULV_$Myw$2)P%^4?vRR9ftBjIJ1!FASVQt=OK%L;ZM$!m?8Xj
z2!9yD*Ew$Z2aiX@QA-lbaiTB#a5ttecy3^1eSOOC&j)Ua-z?e?A+B=Cs4ItH340G+
zC(5Q(&}W>$@NA=CW&^`lm=igsUNkHi*-*w23Le`2h~s)TTzIaHkI}K?tQkLtOSoj@
t^=1^s@tE!mX<Z;qwNP}3!2gV>b@;~m<kjlnO;!7%+-ZrmtxOE5e*jaA9MAv&

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-objdump/X86/coff-dis-internal.test b/test/tools/llvm-objdump/X86/coff-dis-internal.test
new file mode 100644
index 000000000000..530dadc8142e
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/coff-dis-internal.test
@@ -0,0 +1,3 @@
+RUN: llvm-objdump -d %p/Inputs/internal.exe.coff-x86_64 | FileCheck %s
+
+CHECK: callq {{.*}} <foo>
diff --git a/test/tools/llvm-objdump/X86/coff-disassemble-export.test b/test/tools/llvm-objdump/X86/coff-disassemble-export.test
new file mode 100644
index 000000000000..2f0b211815cf
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/coff-disassemble-export.test
@@ -0,0 +1,8 @@
+// RUN: llvm-objdump -d %p/Inputs/disassemble.dll.coff-i386 | \
+// RUN:   FileCheck %s
+
+// CHECK-LABEL: g:
+// CHECK: calll 8 <f>
+
+// CHECK-LABEL: f:
+// CHECK: calll -24 <g>
diff --git a/test/tools/llvm-objdump/X86/disassemble-data.test b/test/tools/llvm-objdump/X86/disassemble-data.test
new file mode 100644
index 000000000000..e9c4e7e1e5f2
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/disassemble-data.test
@@ -0,0 +1,4 @@
+// This test checks that -D disassembles from a data section
+// RUN: llvm-objdump -D %p/Inputs/disassemble-data.obj | FileCheck %s
+
+// CHECK: Disassembly of section .data:
\ No newline at end of file
diff --git a/test/tools/llvm-objdump/X86/macho-symbol-table.test b/test/tools/llvm-objdump/X86/macho-symbol-table.test
index 826d78af68b1..19c619e73d07 100644
--- a/test/tools/llvm-objdump/X86/macho-symbol-table.test
+++ b/test/tools/llvm-objdump/X86/macho-symbol-table.test
@@ -1,8 +1,8 @@
 RUN: llvm-objdump -macho -t %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
 
 CHECK: SYMBOL TABLE:
-CHECK: 000000000000003b l     F __TEXT,__cstring	L_.str
-CHECK: 0000000000000068 l     F __TEXT,__eh_frame	EH_frame0
+CHECK: 000000000000003b l       __TEXT,__cstring	L_.str
+CHECK: 0000000000000068 l       __TEXT,__eh_frame	EH_frame0
 CHECK: 0000000000000000 g     F __TEXT,__text	        _main
-CHECK: 0000000000000080 g     F __TEXT,__eh_frame	_main.eh
+CHECK: 0000000000000080 g       __TEXT,__eh_frame	_main.eh
 CHECK: 0000000000000000         *UND*	                _printf
diff --git a/test/tools/llvm-objdump/X86/malformed-machos.test b/test/tools/llvm-objdump/X86/malformed-machos.test
new file mode 100644
index 000000000000..a47e43443c5d
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/malformed-machos.test
@@ -0,0 +1,41 @@
+// These test checks that llvm-objdump will not crash with malformed Mach-O
+// files.  So the check line is not all that important but the bug fixes to
+// make sure llvm-objdump is robust is what matters.
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0001.macho \
+# RUN:   | FileCheck -check-prefix=m0001 %s 
+
+# m0001: (method_t extends past the end of the section)
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0006.macho \
+# RUN:   | FileCheck -check-prefix=m0006 %s 
+
+# m0006: ivarLayout 0x8
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0006.macho \
+# RUN:   | FileCheck -check-prefix=m0010 %s 
+
+# m0010: 00000000000010e0 0x10e8 _OBJC_CLASS_
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0040.macho \
+# RUN:   | FileCheck -check-prefix=m0040 %s 
+
+# m0040: 00000000000010a0 0xf39 -[tiny_dylib init]
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0080.macho \
+# RUN:   | FileCheck -check-prefix=m0080 %s 
+
+# m0080: data 0xf960000 (struct class_ro_t *)
+
+# RUN: llvm-objdump -macho -objc-meta-data \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0261.macho
+
+# RUN: llvm-objdump -macho -disassemble \
+# RUN:   %p/Inputs/malformed-machos/mem-crup-0337.macho \
+# RUN:   | FileCheck -check-prefix=m0337 %s 
+
+# m0337: subq	$16, %rsp
diff --git a/test/tools/llvm-objdump/eh_frame-arm64.test b/test/tools/llvm-objdump/eh_frame-arm64.test
new file mode 100644
index 000000000000..f25e035a266e
--- /dev/null
+++ b/test/tools/llvm-objdump/eh_frame-arm64.test
@@ -0,0 +1,23 @@
+# RUN: llvm-objdump -unwind-info %p/Inputs/eh_frame.macho-arm64 2>/dev/null | FileCheck %s
+
+# CHECK: Contents of __eh_frame section:
+# CHECK: CIE:
+# CHECK:   Length: 16
+# CHECK:   CIE ID: 0
+# CHECK:   Version: 1
+# CHECK:   Augmentation String: zR
+# CHECK:   Code Alignment Factor: 1
+# CHECK:   Data Alignment Factor: -8
+# CHECK:   Return Address Register: 30
+# CHECK:   Augmentation Data Length: 1
+# CHECK:   FDE Address Pointer Encoding: 16
+# CHECK:   Instructions:
+# CHECK:   0c 1f 00 
+# CHECK: FDE:
+# CHECK:   Length: 32
+# CHECK:   CIE Offset: 0
+# CHECK:   PC Begin: ffffffffffffffe4
+# CHECK:   PC Range: 0000000000000020
+# CHECK:   Augmentation Data Length: 0
+# CHECK:   Instructions:
+# CHECK:   48 0e 10 9e 01 9d 02 00 00 00 00
diff --git a/test/tools/llvm-objdump/malformed-archives.test b/test/tools/llvm-objdump/malformed-archives.test
new file mode 100644
index 000000000000..e0f165d37ed7
--- /dev/null
+++ b/test/tools/llvm-objdump/malformed-archives.test
@@ -0,0 +1,20 @@
+// These test checks that llvm-objdump will not crash with malformed Archive
+// files.  So the check line is not all that important but the bug fixes to
+// make sure llvm-objdump is robust is what matters.
+# RUN: llvm-objdump -macho -archive-headers \
+# RUN:   %p/Inputs/libbogus1.a \
+# RUN:   2>&1 | FileCheck -check-prefix=bogus1 %s 
+
+# bogus1: Invalid data was encountered while parsing the file
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN:   %p/Inputs/libbogus2.a \
+# RUN:   2>&1 | FileCheck -check-prefix=bogus2 %s 
+
+# bogus2: LLVM ERROR: Invalid data was encountered while parsing the file
+
+# RUN: not llvm-objdump -macho -archive-headers \
+# RUN:   %p/Inputs/libbogus3.a \
+# RUN:   2>&1 | FileCheck -check-prefix=bogus3 %s 
+
+# bogus3: LLVM ERROR: Invalid data was encountered while parsing the file
diff --git a/test/tools/llvm-objdump/section-filter.test b/test/tools/llvm-objdump/section-filter.test
new file mode 100644
index 000000000000..9c7ab31b0d72
--- /dev/null
+++ b/test/tools/llvm-objdump/section-filter.test
@@ -0,0 +1,7 @@
+// This test checks that --section works correctly
+// RUN: llvm-objdump -h %p/Inputs/section-filter.obj -j=.text \
+// RUN: --section=.bss | FileCheck %s
+
+# CHECK: .text
+# CHECK-NOT: .data
+# CHECK: .bss
\ No newline at end of file
diff --git a/test/tools/llvm-pdbdump/regex-filter.test b/test/tools/llvm-pdbdump/regex-filter.test
index 8b9eca63f585..cfc910e07171 100644
--- a/test/tools/llvm-pdbdump/regex-filter.test
+++ b/test/tools/llvm-pdbdump/regex-filter.test
@@ -10,6 +10,10 @@
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck  --check-prefix=EXCLUDE_WHOLE_CLASS %s
 ; RUN: llvm-pdbdump -symbols -globals -exclude-compilands="FilterTest.obj"  \
 ; RUN:    %p/Inputs/FilterTest.pdb | FileCheck  --check-prefix=EXCLUDE_COMPILAND %s
+; RUN: llvm-pdbdump -types -include-types="FilterTestClass" \
+; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_TYPES %s
+; RUN: llvm-pdbdump -types -symbols -globals -include-symbols="[[:<:]](IntGlobalVar|DoubleGlobalVar)[[:>:]]" \
+; RUN:    %p/Inputs/FilterTest.pdb | FileCheck --check-prefix=INCLUDE_ONLY_VARS %s
 
 ; NO_FILTER: ---TYPES---
 ; NO_FILTER: Enums:
@@ -73,3 +77,19 @@
 ; EXCLUDE_COMPILAND-NOT: __cdecl main
 ; EXCLUDE_COMPILAND: * Linker *
 ; EXCLUDE_COMPILAND: ---GLOBALS---
+
+; Everything but types are displayed normally.  But FilterTestClass is
+; the only type that should be displayed.
+; INCLUDE_ONLY_TYPES: ---TYPES---
+; INCLUDE_ONLY_TYPES-NOT: GlobalTypedef
+; INCLUDE_ONLY_TYPES: class FilterTestClass
+
+; We should only see DoubleGlobalVar and IntGlobalVar.  This means that even
+; variables printed in class definitions should be filtered out.
+; INCLUDE_ONLY_VARS: ---TYPES---
+; INCLUDE_ONLY_VARS: class FilterTestClass
+; INCLUDE_ONLY_VARS-NOT: IntMemberVar
+; INCLUDE_ONLY_VARS-NOT: IntDoubleVar
+; INCLUDE_ONLY_VARS: ---GLOBALS---
+; INCLUDE_ONLY_VARS: DoubleGlobalVar
+; INCLUDE_ONLY_VARS: IntGlobalVar
diff --git a/test/tools/llvm-profdata/Inputs/basic.proftext b/test/tools/llvm-profdata/Inputs/basic.proftext
new file mode 100644
index 000000000000..db934da7c07a
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/basic.proftext
@@ -0,0 +1,19 @@
+foo
+10
+2
+499500
+179900
+
+main
+16650
+4
+1
+1000
+1000000
+499500
+
+foo2
+10
+2
+500500
+180100
diff --git a/test/tools/llvm-profdata/Inputs/c-general.profraw b/test/tools/llvm-profdata/Inputs/c-general.profraw
index e8cef21de5f258c5e08a1089280f282be0a77455..a4d94858a9e7cecbec88d7bb7de76f51152a4587 100644
GIT binary patch
delta 599
zcmaFC^?}#8u_!ISs37M*69WYBK<Qj4Jq1cfM0tW)1%KQp7cz-YZeU_l2B~EM;${c1
z6a!EW%z&v~!R-K4D+*S}4V3ABq|voVgNv~Ss;&ZRW&#gH9Y_r)5O*1AbSY^tFmyoG
zU4W{)!wXTz3REKo#M2vn7$>LylAa7zClL*CqXj=i9S2a2Fc7cm)wBQfL_%!_RNV=v
zx-EhbbzDF-f<SD1Sg^%Q??UeasJet0h<OSk5Op9mAT#?u+njR-(yO5AUO?4N5QC@#
zg$u|Z{0{~C9{@G%hN{zug_yTQ0-{a?sD>Gc13&~29EYkafT@#$sN)05fx`Ke1CRiM
z+fa2KP<1}i5Op9mj9?WI;w4nw3aGk2P<7%!X;6HF)PulVsJabMbty7n^ClNCaZEnI
y$gz0^qX6^d049ma4_JT#EcGJE`FSasC7Jnoi8=B41*t`eCHX~@?O8R!1_A&O*>n2<

literal 1384
zcmah|yGjE=6dg69QSk}BDx<Xu+6jV23&9_-TQ-|bvSG5b>}*gw?JWHSOFzTX-o_8`
z11tp#otrr)gaz@!aORwI@7<Z%nfttD@=X7XDIc@IF7OWcaPn4_ocA-2b8cS2kQD07
z6JL6<-v{cR(MR+uh6D8pU!D5?lTKU35dRzg1^j8Jtzd|+&VAh%{23=LVTe5q|FQ1#
z#FHK}B$nZ?kL+)BeLL{4;Lp37(f#kke}XTboEOja82$_VHRp|EI8dkXU*WT^DW(hJ
zrKiKb)DNH&Yv9C1@CeAdGUs=ZrVcTozVQ!d9rKB0ot*ero}7`ygonYIVLAZn)Yzwt
zavw&6T>h~{IiGyN+iD6(9dx*!@;bS$3KEH|BPYszmnioq-x|0|5vfBS)=}rXBT^@F
zUqtFe?uSUdi4SfA>p*!=64Yy+D&9xdlk;AYZ<g}^^4W*~kL%+*rcY#^O?sJ*18q!Z
zBa=3hA~9*GZ5-zMHi&PM!gl+;%-ZXuh&r)tB<;Y@8^-83Oaqg}d03d-ieNI|s%e5&
gsCCiFO~2jgg-IIiw&T=g>b>a57GaS@L93rezx<17yZ`_I

diff --git a/test/tools/llvm-profdata/Inputs/compat.profdata.v2 b/test/tools/llvm-profdata/Inputs/compat.profdata.v2
new file mode 100644
index 0000000000000000000000000000000000000000..969867584a9934a408363896ef9ae250406b9e3f
GIT binary patch
literal 712
zcmeyLQ&5zjmf6U}00BDl*`X{5wFAmyWLUFT;<nGBA2G~O5d|onmY>fBWuhChXCFj_
zk>S_X-sfg1Q*X0CRT@C)+{Da0E=LFlCdUYsdjYk{!HFL%g5op>s3Mq^Q0^#2qY%)b
Ok-4~xO~oaFLmU7ea2Txs

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov b/test/tools/llvm-profdata/Inputs/gcc-sample-profile.gcov
new file mode 100644
index 0000000000000000000000000000000000000000..0099280af2f9ebf9b78efcca8d015f59ce663147
GIT binary patch
literal 1960
zcmYdHNlw=?H!xv<fK@y|ngxg%p#n@mIzGxcDX}P%0m@Fx&(8s~a}zW3pvu++A?X2$
zF#|ChnixnN1Q@}(85q94g76_W!q_0aAe)foVXlFx0hx{fAoU;}AW;xzfx3SER+t)y
zewcksQ27<pAoA=$K1c<`R*?H3>JOZS$ivjzZHDlfaj1vLg3Lj6F9(V0f#$43GGToK
zlml}=$evL)B(w>I11RWVYDU?RFe5b_K=}d|h9jMgC=(bM2(dwB5lqc+V8hB&kYC7U
egX$KTUSip>au=kYo@`kC0J4`{HY<`|O4tB4*X#QL

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-profdata/Inputs/inline-samples.afdo b/test/tools/llvm-profdata/Inputs/inline-samples.afdo
new file mode 100644
index 000000000000..f8680d86fcb4
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/inline-samples.afdo
@@ -0,0 +1,20 @@
+main:366846:0
+ 2.1: 60401
+ 4: 0
+ 3: 0
+ 0: 0
+ 2.3: 60401
+ 1: 0
+ 2.3: _Z3fool:246044
+  1.2: 39280
+  1.4: 46871
+  1: 60401
+  1.3: _Z3bari:0
+   1.2: 0
+   1.1: 0
+  1.8: _Z3bari:0
+   1.2: 0
+   1.1: 0
+  1.7: _Z3bari:99492
+   1.2: 46732
+   1.1: 52760
diff --git a/test/tools/llvm-profdata/Inputs/overflow-instr.proftext b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext
new file mode 100644
index 000000000000..48d1db88bcdf
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/overflow-instr.proftext
@@ -0,0 +1,6 @@
+overflow
+1
+3
+18446744073709551615
+9223372036854775808
+18446744073709551615
diff --git a/test/tools/llvm-profdata/Inputs/overflow-sample.proftext b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext
new file mode 100644
index 000000000000..a5486bbd819c
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/overflow-sample.proftext
@@ -0,0 +1,7 @@
+_Z3bari:18446744073709551615:1000
+ 1: 18446744073709551615
+_Z3fooi:18446744073709551615:1000
+ 1: 18446744073709551615
+main:1000:0
+ 1: 500 _Z3bari:18446744073709551615
+ 2: 500 _Z3fooi:18446744073709551615
diff --git a/test/tools/llvm-profdata/Inputs/sample-profile.proftext b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
index 9dc6d4310da9..54c821243afa 100644
--- a/test/tools/llvm-profdata/Inputs/sample-profile.proftext
+++ b/test/tools/llvm-profdata/Inputs/sample-profile.proftext
@@ -1,12 +1,12 @@
 _Z3bari:20301:1437
-1: 1437
+ 1: 1437
 _Z3fooi:7711:610
-1: 610
+ 1: 610
 main:184019:0
-4: 534
-4.2: 534
-5: 1075
-5.1: 1075
-6: 2080
-7: 534
-9: 2064  _Z3bari:1471  _Z3fooi:631
+ 4: 534
+ 4.2: 534
+ 5: 1075
+ 5.1: 1075
+ 6: 2080
+ 7: 534
+ 9: 2064 _Z3bari:1471 _Z3fooi:631
diff --git a/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin
new file mode 100644
index 000000000000..9e2e3f5c2b83
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/text-format-errors.text.bin
@@ -0,0 +1 @@
+��б�ɔ�
\ No newline at end of file
diff --git a/test/tools/llvm-profdata/Inputs/vp-malform.proftext b/test/tools/llvm-profdata/Inputs/vp-malform.proftext
new file mode 100644
index 000000000000..2db3096cecf1
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-malform.proftext
@@ -0,0 +1,42 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+2
+# !!!! Malformed Value/Count pair
+foo+100
+foo2:1000
+1
+foo2:20000
diff --git a/test/tools/llvm-profdata/Inputs/vp-malform2.proftext b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext
new file mode 100644
index 000000000000..02ed5a968d80
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-malform2.proftext
@@ -0,0 +1,32 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+# !! Malformed value site, missing one value
+2
+foo:100
+1
+foo2:20000
diff --git a/test/tools/llvm-profdata/Inputs/vp-truncate.proftext b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext
new file mode 100644
index 000000000000..98b4b572b65c
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/vp-truncate.proftext
@@ -0,0 +1,36 @@
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-bar.profdata
new file mode 100644
index 0000000000000000000000000000000000000000..4ed07660f654090e750b19be4e0af609bc1c61db
GIT binary patch
literal 1320
zcmeyLQ&5zjmf6V600ExHYmK2yFeL$%U}Tt_rlBWzFff!ADy;yeON$fJQ=x1IMi>K1
zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnp<VZ)@LG8h!XF!Qall3u04PhdJ=h9xByp&JJC
zP;T!%PLPQfJU{qBG{Uf7S9_nErA)oe0uly-A5c%^CT8Y&H1)5nJ7QRC1Che;A5?hI
zP_Xbvr*SzDrVf`pOq^JCF!i*iVeUbv(d|W-C&Y)P3I+$LNw7qLZt!5?YYY~LQ0R_y
U3iadCz6FN_BP=E0kVh8=0IAU^eE<Le

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata b/test/tools/llvm-profdata/Inputs/weight-instr-foo.profdata
new file mode 100644
index 0000000000000000000000000000000000000000..581ef39a55b28408da07cfecea322493dddc2515
GIT binary patch
literal 1320
zcmeyLQ&5zjmf6V600ExHYmK2yFeL$%U}Tt_rlBWzFff!ADy;yeON$fJQ=x1IMi>K1
zb3kcEhBbR7Zu=bi5d*Wx04kG~pWnp<VZ)?gZpiJu#|e_L;Q7H1q7jCfZ<UqwDiwYL
z(*ZLqDX|FMFpx%U0Abj#tG&<7Ql{Q!fhzj}^+axBW}Zh=|H`@}hP5_OIduP_^Rd}P
zPcfK#&}noxpvx2Dqw628d|0YraDe&{mM8|RhcyPP!$@>jI*ICuRlWs>LPl7M$03g{
F3;+VlD0u(?

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext
new file mode 100644
index 000000000000..a910f745e6c7
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-sample-bar.proftext
@@ -0,0 +1,8 @@
+bar:1772037:35370
+ 17: 35370
+ 18: 35370
+ 19: 7005
+ 20: 29407
+ 21: 12170
+ 23: 18150 bar:19829
+ 25: 36666
diff --git a/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext
new file mode 100644
index 000000000000..155ec5d00315
--- /dev/null
+++ b/test/tools/llvm-profdata/Inputs/weight-sample-foo.proftext
@@ -0,0 +1,8 @@
+foo:1763288:35327
+ 7: 35327
+ 8: 35327
+ 9: 6930
+ 10: 29341
+ 11: 11906
+ 13: 18185 foo:19531
+ 15: 36458
diff --git a/test/tools/llvm-profdata/c-general.test b/test/tools/llvm-profdata/c-general.test
index 01435303d445..efa9bfa18d73 100644
--- a/test/tools/llvm-profdata/c-general.test
+++ b/test/tools/llvm-profdata/c-general.test
@@ -6,7 +6,7 @@ REGENERATE: $ SRC=path/to/llvm
 REGENERATE: $ CFE=$SRC/tools/clang
 REGENERATE: $ TESTDIR=$SRC/test/tools/llvm-profdata
 REGENERATE: $ CFE_TESTDIR=$CFE/test/Profile
-REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/test/Profile/c-general.c
+REGENERATE: $ clang -o a.out -fprofile-instr-generate $CFE_TESTDIR/c-general.c
 REGENERATE: $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profraw ./a.out
 
 RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - | FileCheck %s -check-prefix=CHECK
@@ -14,11 +14,11 @@ RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | F
 
 SWITCHES-LABEL: Counters:
 SWITCHES-NEXT:   switches:
-SWITCHES-NEXT:     Hash: 0x0000000000000013
+SWITCHES-NEXT:     Hash: 0x2618e4f23f2e8daa
 SWITCHES-NEXT:     Counters: 19
 SWITCHES-NEXT:     Function count: 1
 SWITCHES-LABEL: Functions shown: 1
 
-CHECK-LABEL: Total functions: 11
+CHECK-LABEL: Total functions: 12
 CHECK-NEXT: Maximum function count: 1
 CHECK-NEXT: Maximum internal block count: 100
diff --git a/test/tools/llvm-profdata/compat.proftext b/test/tools/llvm-profdata/compat.proftext
index 14da3374b5e9..139202d162e6 100644
--- a/test/tools/llvm-profdata/compat.proftext
+++ b/test/tools/llvm-profdata/compat.proftext
@@ -45,3 +45,23 @@ large_numbers
 # SUMMARY: Total functions: 3
 # SUMMARY: Maximum function count: 2305843009213693952
 # SUMMARY: Maximum internal block count: 1152921504606846976
+
+# RUN: llvm-profdata show %S/Inputs/compat.profdata.v2 -all-functions --counts | FileCheck %s -check-prefix=FORMATV2
+
+# FORMATV2: Counters:
+# FORMATV2-NEXT:   foo:
+# FORMATV2-NEXT:     Hash: 0x000000000000000a
+# FORMATV2-NEXT:     Counters: 2
+# FORMATV2-NEXT:     Function count: 499500
+# FORMATV2-NEXT:     Block counts: [179900]
+# FORMATV2-NEXT:   main:
+# FORMATV2-NEXT:     Hash: 0x000000000000410a
+# FORMATV2-NEXT:     Counters: 4
+# FORMATV2-NEXT:     Function count: 1
+# FORMATV2-NEXT:     Block counts: [1000, 1000000, 499500]
+# FORMATV2-NEXT: Functions shown: 2
+# FORMATV2-NEXT: Total functions: 2
+# FORMATV2-NEXT: Maximum function count: 499500
+# FORMATV2-NEXT: Maximum internal block count: 1000000
+
+
diff --git a/test/tools/llvm-profdata/count-mismatch.proftext b/test/tools/llvm-profdata/count-mismatch.proftext
index 1a2e73fbffdb..b42b11dbcf9d 100644
--- a/test/tools/llvm-profdata/count-mismatch.proftext
+++ b/test/tools/llvm-profdata/count-mismatch.proftext
@@ -14,7 +14,8 @@ foo
 
 # The hash matches, but we can't combine these because the number of
 # counters differs.
-# MERGE_ERRS: count-mismatch.proftext: foo: Function count mismatch
+# MERGE_ERRS: count-mismatch.proftext: foo: Function basic block count change detected (counter mismatch)
+# MERGE_ERRS: Make sure that all profile data to be merged is generated from the same binary.
 foo
 1024
 3
diff --git a/test/tools/llvm-profdata/gcc-gcov-sample-profile.test b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
new file mode 100644
index 000000000000..dbcc74e1284f
--- /dev/null
+++ b/test/tools/llvm-profdata/gcc-gcov-sample-profile.test
@@ -0,0 +1,29 @@
+The input gcov file has been generated on a little endian machine. Expect
+failures on big endian systems.
+
+XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
+
+Tests for sample profiles encoded in GCC's gcov format.
+
+1- Show all functions. This profile has a single main() function with several
+   inlined callees.
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 364084, 0, 6 sampled lines
+SHOW1: 2.3: inlined callee: _Z3fool: 243786, 0, 3 sampled lines
+SHOW1:   1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1:   1.7: inlined callee: _Z3bari: 98558, 0, 2 sampled lines
+SHOW1:   1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+
+2- Convert the profile to text encoding and check that they are both
+   identical.
+RUN: llvm-profdata merge --sample %p/Inputs/gcc-sample-profile.gcov --text -o - | llvm-profdata show --sample - -o %t-text
+RUN: llvm-profdata show --sample %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: diff %t-text %t-gcov
+
+4- Merge the gcov and text encodings of the profile and check that the
+   counters have doubled.
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov -o %t-gcov
+RUN: llvm-profdata merge --sample --text %p/Inputs/gcc-sample-profile.gcov %t-gcov -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:728168:0
+MERGE1: 2.3: 120298
+MERGE1: 2.3: _Z3fool:487572
diff --git a/test/tools/llvm-profdata/inline-samples.test b/test/tools/llvm-profdata/inline-samples.test
new file mode 100644
index 000000000000..421f002da9f9
--- /dev/null
+++ b/test/tools/llvm-profdata/inline-samples.test
@@ -0,0 +1,30 @@
+Tests for conversion between text and binary encoded sample profiles.
+
+1- Encode the original profile into binary form. All the tests below will use
+   the binary profile.
+RUN: llvm-profdata merge --sample %p/Inputs/inline-samples.afdo -o %t.profbin
+
+2- Show all functions. This profile has a single main() function with several
+   inlined callees.
+RUN: llvm-profdata show --sample %t.profbin | FileCheck %s --check-prefix=SHOW1
+SHOW1: Function: main: 366846, 0, 6 sampled lines
+SHOW1: 2.3: inlined callee: _Z3fool: 246044, 0, 3 sampled lines
+SHOW1:   1.3: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+SHOW1:   1.7: inlined callee: _Z3bari: 99492, 0, 2 sampled lines
+SHOW1:     1.2: 46732
+SHOW1:   1.8: inlined callee: _Z3bari: 0, 0, 2 sampled lines
+
+3- Convert the binary profile to text encoding and check that they are both
+   identical.
+RUN: llvm-profdata merge --sample %t.profbin --text -o - | llvm-profdata show --sample - -o %t-bintext
+RUN: llvm-profdata show --sample %p/Inputs/inline-samples.afdo -o %t-text
+RUN: diff %t-bintext %t-text
+
+4- Merge the binary and text encodings of the profile and check that the
+   counters have doubled.
+RUN: llvm-profdata merge --sample --text %t.profbin %p/Inputs/inline-samples.afdo -o - | FileCheck %s --check-prefix=MERGE1
+MERGE1: main:733692:0
+MERGE1: 2.3: 120802
+MERGE1: 2.3: _Z3fool:492088
+MERGE1: 1.7: _Z3bari:198984
+MERGE1: 1.1: 105520
diff --git a/test/tools/llvm-profdata/overflow-instr.test b/test/tools/llvm-profdata/overflow-instr.test
new file mode 100644
index 000000000000..5b9a94af9b29
--- /dev/null
+++ b/test/tools/llvm-profdata/overflow-instr.test
@@ -0,0 +1,17 @@
+Tests for overflow when merging instrumented profiles.
+
+1- Merge profile having maximum counts with itself and verify overflow detected and saturation occurred
+RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
+RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
+MERGE_OVERFLOW: {{.*}}: overflow: Counter overflow
+SHOW_OVERFLOW: Total functions: 1
+SHOW_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
+SHOW_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
+
+2- Merge profile having maximum counts by itself and verify no overflow
+RUN: llvm-profdata merge -instr %p/Inputs/overflow-instr.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_NO_OVERFLOW -allow-empty
+RUN: llvm-profdata show -instr %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
+MERGE_NO_OVERFLOW-NOT: {{.*}}: overflow: Counter overflow
+SHOW_NO_OVERFLOW: Total functions: 1
+SHOW_NO_OVERFLOW-NEXT: Maximum function count: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: Maximum internal block count: 18446744073709551615
diff --git a/test/tools/llvm-profdata/overflow-sample.test b/test/tools/llvm-profdata/overflow-sample.test
new file mode 100644
index 000000000000..cd6268db2ab9
--- /dev/null
+++ b/test/tools/llvm-profdata/overflow-sample.test
@@ -0,0 +1,43 @@
+Tests for overflow when merging sampled profiles.
+
+1- Merge profile having maximum counts with itself and verify overflow detected
+RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=MERGE_OVERFLOW
+RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_OVERFLOW
+MERGE_OVERFLOW: {{.*}}: main: Counter overflow
+SHOW_OVERFLOW: Function: main: 2000, 0, 2 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT:   1: 1000, calls: _Z3bari:18446744073709551615
+SHOW_OVERFLOW-NEXT:   2: 1000, calls: _Z3fooi:18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 2000, 1 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT:   1: 18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 2000, 1 sampled lines
+SHOW_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_OVERFLOW-NEXT:   1: 18446744073709551615
+SHOW_OVERFLOW-NEXT: }
+SHOW_OVERFLOW-NEXT: No inlined callsites in this function
+
+2- Merge profile having maximum counts by itself and verify no overflow
+RUN: llvm-profdata merge -sample %p/Inputs/overflow-sample.proftext -o %t.out 2>&1 | FileCheck %s -allow-empty -check-prefix=MERGE_NO_OVERFLOW
+RUN: llvm-profdata show -sample %t.out | FileCheck %s --check-prefix=SHOW_NO_OVERFLOW
+MERGE_NO_OVERFLOW-NOT: {{.*}}: main: Counter overflow
+SHOW_NO_OVERFLOW: Function: main: 1000, 0, 2 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT:   1: 500, calls: _Z3bari:18446744073709551615
+SHOW_NO_OVERFLOW-NEXT:   2: 500, calls: _Z3fooi:18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_NO_OVERFLOW-NEXT: Function: _Z3fooi: 18446744073709551615, 1000, 1 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT:   1: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
+SHOW_NO_OVERFLOW-NEXT: Function: _Z3bari: 18446744073709551615, 1000, 1 sampled lines
+SHOW_NO_OVERFLOW-NEXT: Samples collected in the function's body {
+SHOW_NO_OVERFLOW-NEXT:   1: 18446744073709551615
+SHOW_NO_OVERFLOW-NEXT: }
+SHOW_NO_OVERFLOW-NEXT: No inlined callsites in this function
diff --git a/test/tools/llvm-profdata/overflow.proftext b/test/tools/llvm-profdata/overflow.proftext
deleted file mode 100644
index cbf3bf161823..000000000000
--- a/test/tools/llvm-profdata/overflow.proftext
+++ /dev/null
@@ -1,12 +0,0 @@
-# RUN: llvm-profdata merge %s -o %t.out 2>&1 | FileCheck %s
-# CHECK: overflow.proftext: overflow: Counter overflow
-
-overflow
-1
-1
-9223372036854775808
-
-overflow
-1
-1
-9223372036854775808
diff --git a/test/tools/llvm-profdata/raw-32-bits-be.test b/test/tools/llvm-profdata/raw-32-bits-be.test
index 86ac56d39f26..d20c36022fa6 100644
--- a/test/tools/llvm-profdata/raw-32-bits-be.test
+++ b/test/tools/llvm-profdata/raw-32-bits-be.test
@@ -1,27 +1,36 @@
 RUN: printf '\377lprofR\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\3' >> %t
 RUN: printf '\0\0\0\0\0\0\0\6' >> %t
 RUN: printf '\0\0\0\0\1\0\0\0' >> %t
 RUN: printf '\0\0\0\0\2\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\1' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
 RUN: printf '\2\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\2\0\0\03' >> %t
 RUN: printf '\1\0\0\10' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
 RUN: printf '\0\0\0\0\0\0\0\067' >> %t
 RUN: printf '\0\0\0\0\0\0\0\101' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
 
 RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
 
diff --git a/test/tools/llvm-profdata/raw-32-bits-le.test b/test/tools/llvm-profdata/raw-32-bits-le.test
index 9325e7eb0f52..1bd81a87fbac 100644
--- a/test/tools/llvm-profdata/raw-32-bits-le.test
+++ b/test/tools/llvm-profdata/raw-32-bits-le.test
@@ -1,27 +1,36 @@
 RUN: printf '\201Rforpl\377' > %t
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\3\0\0\0\0\0\0\0' >> %t
 RUN: printf '\6\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\1\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\0\2' >> %t
 RUN: printf '\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\2\0\0\0' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
 RUN: printf '\03\0\0\2' >> %t
 RUN: printf '\10\0\0\1' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t
 RUN: printf '\101\0\0\0\0\0\0\0' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
 
 RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
 
diff --git a/test/tools/llvm-profdata/raw-64-bits-be.test b/test/tools/llvm-profdata/raw-64-bits-be.test
index b97d8b5dac6d..902cedd476ab 100644
--- a/test/tools/llvm-profdata/raw-64-bits-be.test
+++ b/test/tools/llvm-profdata/raw-64-bits-be.test
@@ -1,27 +1,36 @@
 RUN: printf '\377lprofr\201' > %t
-RUN: printf '\0\0\0\0\0\0\0\1' >> %t
+RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\3' >> %t
 RUN: printf '\0\0\0\0\0\0\0\6' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\1' >> %t
 RUN: printf '\0\0\0\0\0\0\0\1' >> %t
 RUN: printf '\0\0\0\2\0\4\0\0' >> %t
 RUN: printf '\0\0\0\1\0\4\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\3' >> %t
 RUN: printf '\0\0\0\2' >> %t
 RUN: printf '\0\0\0\0\0\0\0\02' >> %t
 RUN: printf '\0\0\0\2\0\4\0\03' >> %t
 RUN: printf '\0\0\0\1\0\4\0\10' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\0\0\0\0\0\0\0\023' >> %t
 RUN: printf '\0\0\0\0\0\0\0\067' >> %t
 RUN: printf '\0\0\0\0\0\0\0\101' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
 
 RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
 
diff --git a/test/tools/llvm-profdata/raw-64-bits-le.test b/test/tools/llvm-profdata/raw-64-bits-le.test
index 0e6853811ec4..d2f410a6bb95 100644
--- a/test/tools/llvm-profdata/raw-64-bits-le.test
+++ b/test/tools/llvm-profdata/raw-64-bits-le.test
@@ -1,27 +1,36 @@
 RUN: printf '\201rforpl\377' > %t
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t
 RUN: printf '\3\0\0\0\0\0\0\0' >> %t
 RUN: printf '\6\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\3\0\0\0' >> %t
 RUN: printf '\1\0\0\0' >> %t
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\03\0\0\0' >> %t
 RUN: printf '\02\0\0\0' >> %t
 RUN: printf '\02\0\0\0\0\0\0\0' >> %t
 RUN: printf '\03\0\4\0\2\0\0\0' >> %t
 RUN: printf '\10\0\4\0\1\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t
 RUN: printf '\101\0\0\0\0\0\0\0' >> %t
-RUN: printf 'foobar' >> %t
+RUN: printf 'foobar\0\0' >> %t
 
 RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s
 
diff --git a/test/tools/llvm-profdata/raw-magic-but-no-header.test b/test/tools/llvm-profdata/raw-magic-but-no-header.test
index b2a697042b0a..76894faa183c 100644
--- a/test/tools/llvm-profdata/raw-magic-but-no-header.test
+++ b/test/tools/llvm-profdata/raw-magic-but-no-header.test
@@ -3,4 +3,4 @@ RUN: not llvm-profdata show %t 2>&1 | FileCheck %s
 RUN: printf '\377lprofr\201' > %t
 RUN: not llvm-profdata show %t 2>&1 | FileCheck %s
 
-CHECK: error: {{.+}}: Invalid profile data (file header is corrupt)
+CHECK: error: {{.+}}: Invalid instrumentation profile data (file header is corrupt)
diff --git a/test/tools/llvm-profdata/raw-two-profiles.test b/test/tools/llvm-profdata/raw-two-profiles.test
index be78793215ed..09eb121adf3f 100644
--- a/test/tools/llvm-profdata/raw-two-profiles.test
+++ b/test/tools/llvm-profdata/raw-two-profiles.test
@@ -1,48 +1,51 @@
 RUN: printf '\201rforpl\377' > %t-foo.profraw
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\3\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\3\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0' >> %t-foo.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\2\0\0\0' >> %t-foo.profraw
 RUN: printf '\0\0\4\0\1\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\023\0\0\0\0\0\0\0' >> %t-foo.profraw
-RUN: printf 'foo' >> %t-foo.profraw
+RUN: printf 'foo\0\0\0\0\0' >> %t-foo.profraw
 
 RUN: printf '\201rforpl\377' > %t-bar.profraw
-RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\1\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\3\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 
 RUN: printf '\3\0\0\0' >> %t-bar.profraw
 RUN: printf '\2\0\0\0' >> %t-bar.profraw
 RUN: printf '\2\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\2\0\0\0' >> %t-bar.profraw
 RUN: printf '\0\0\6\0\1\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
+RUN: printf '\0\0\0\0\0\0\0\0' >> %t-bar.profraw
 
 RUN: printf '\067\0\0\0\0\0\0\0' >> %t-bar.profraw
 RUN: printf '\101\0\0\0\0\0\0\0' >> %t-bar.profraw
-RUN: printf 'bar' >> %t-bar.profraw
+RUN: printf 'bar\0\0\0\0\0' >> %t-bar.profraw
 
-Versions of the profiles that are padded to eight byte alignment.
-RUN: cat %t-foo.profraw > %t-foo-padded.profraw
-RUN: printf '\0\0\0\0\0' >> %t-foo-padded.profraw
-RUN: cat %t-bar.profraw > %t-bar-padded.profraw
-RUN: printf '\0\0\0\0\0' >> %t-bar-padded.profraw
-
-RUN: cat %t-foo-padded.profraw %t-bar.profraw > %t-pad-between.profraw
-RUN: cat %t-foo-padded.profraw %t-bar-padded.profraw > %t-pad.profraw
-
-RUN: llvm-profdata show %t-pad-between.profraw -all-functions -counts | FileCheck %s
+RUN: cat %t-foo.profraw %t-bar.profraw > %t-pad.profraw
 RUN: llvm-profdata show %t-pad.profraw -all-functions -counts | FileCheck %s
 
 CHECK: Counters:
diff --git a/test/tools/llvm-profdata/sample-profile-basic.test b/test/tools/llvm-profdata/sample-profile-basic.test
index 0651c513e965..5116b98f3335 100644
--- a/test/tools/llvm-profdata/sample-profile-basic.test
+++ b/test/tools/llvm-profdata/sample-profile-basic.test
@@ -3,15 +3,15 @@ Basic tests for sample profiles.
 1- Show all functions
 RUN: llvm-profdata show --sample %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW1
 SHOW1: Function: main: 184019, 0, 7 sampled lines
-SHOW1: line offset: 9, discriminator: 0, number of samples: 2064, calls: _Z3fooi:631 _Z3bari:1471
+SHOW1: 9: 2064, calls: _Z3fooi:631 _Z3bari:1471
 SHOW1: Function: _Z3fooi: 7711, 610, 1 sampled lines
 SHOW1: Function: _Z3bari: 20301, 1437, 1 sampled lines
-SHOW1: line offset: 1, discriminator: 0, number of samples: 1437
+SHOW1: 1: 1437
 
 2- Show only bar
 RUN: llvm-profdata show --sample --function=_Z3bari %p/Inputs/sample-profile.proftext | FileCheck %s --check-prefix=SHOW2
 SHOW2: Function: _Z3bari: 20301, 1437, 1 sampled lines
-SHOW2: line offset: 1, discriminator: 0, number of samples: 1437
+SHOW2: 1: 1437
 SHOW2-NOT: Function: main: 184019, 0, 7 sampled lines
 SHOW2-NOT: Function: _Z3fooi: 7711, 610, 1 sampled lines
 
@@ -28,3 +28,7 @@ RUN: llvm-profdata merge --sample --text %p/Inputs/sample-profile.proftext %t-bi
 MERGE1: main:368038:0
 MERGE1: 9: 4128 _Z3fooi:1262 _Z3bari:2942
 MERGE1: _Z3fooi:15422:1220
+
+5- Detect invalid text encoding (e.g. instrumentation profile text format).
+RUN: not llvm-profdata show --sample %p/Inputs/foo3bar3-1.proftext 2>&1 | FileCheck %s --check-prefix=BADTEXT
+BADTEXT: error: {{.+}}: Unrecognized sample profile encoding format
diff --git a/test/tools/llvm-profdata/text-dump.test b/test/tools/llvm-profdata/text-dump.test
new file mode 100644
index 000000000000..94a78d9dbac3
--- /dev/null
+++ b/test/tools/llvm-profdata/text-dump.test
@@ -0,0 +1,21 @@
+Basic tests for testing text dump functions.
+
+RUN: llvm-profdata show --all-functions -counts --text %p/Inputs/basic.proftext > %t-basic.proftext1
+RUN: llvm-profdata merge -o %t-basic.proftext2 --text  %p/Inputs/basic.proftext
+
+RUN: llvm-profdata merge -binary -o %t-basic.profdata1 %t-basic.proftext1
+RUN: llvm-profdata merge -o %t-basic.profdata2 %t-basic.proftext2
+
+RUN: llvm-profdata show --all-functions -counts  %t-basic.profdata1 > %t-basic.dump3
+RUN: llvm-profdata show --all-functions -counts  %t-basic.profdata2 > %t-basic.dump4
+
+RUN: llvm-profdata merge -text -o %t-basic.proftext5 %t-basic.profdata1
+RUN: llvm-profdata merge -text -o %t-basic.proftext6 %t-basic.profdata2
+
+RUN: diff %t-basic.dump3 %t-basic.dump4
+RUN: diff %t-basic.proftext5 %t-basic.proftext6
+
+RUN: not llvm-profdata merge -gcc -o %t-basic-profdata3 %t-basic.proftext2 2>&1 | FileCheck %s --check-prefix=UNKNOWN
+UNKNOWN: Unknown
+
+
diff --git a/test/tools/llvm-profdata/text-format-errors.test b/test/tools/llvm-profdata/text-format-errors.test
index 01513e4fcb9e..05de2e38af1f 100644
--- a/test/tools/llvm-profdata/text-format-errors.test
+++ b/test/tools/llvm-profdata/text-format-errors.test
@@ -1,10 +1,29 @@
-RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.profdata -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
-INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed profile data
+Tests for instrumentation profile bad encoding.
 
+1- Detect invalid count
+RUN: not llvm-profdata show %p/Inputs/invalid-count-later.proftext 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+RUN: not llvm-profdata merge %p/Inputs/invalid-count-later.proftext %p/Inputs/invalid-count-later.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=INVALID-COUNT-LATER
+INVALID-COUNT-LATER: error: {{.*}}invalid-count-later.proftext: Malformed instrumentation profile data
+
+2- Detect bad hash
 RUN: not llvm-profdata show %p/Inputs/bad-hash.proftext 2>&1 | FileCheck %s --check-prefix=BAD-HASH
 RUN: not llvm-profdata merge %p/Inputs/bad-hash.proftext %p/Inputs/bad-hash.proftext -o %t.out 2>&1 | FileCheck %s --check-prefix=BAD-HASH
-BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed profile data
+BAD-HASH: error: {{.*}}bad-hash.proftext: Malformed instrumentation profile data
 
+3- Detect no counts
 RUN: not llvm-profdata show %p/Inputs/no-counts.proftext 2>&1 | FileCheck %s --check-prefix=NO-COUNTS
-NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed profile data
+NO-COUNTS: error: {{.*}}no-counts.proftext: Malformed instrumentation profile data
+
+4- Detect binary input
+RUN: not llvm-profdata show %p/Inputs/text-format-errors.text.bin 2>&1 | FileCheck %s --check-prefix=BINARY
+BINARY: error: {{.+}}: Unrecognized instrumentation profile encoding format
+BINARY: Perhaps you forgot to use the -sample option?
+
+5- Detect malformed value profile data
+RUN: not llvm-profdata show %p/Inputs/vp-malform.proftext 2>&1 | FileCheck %s --check-prefix=VP
+RUN: not llvm-profdata show %p/Inputs/vp-malform2.proftext 2>&1 | FileCheck %s --check-prefix=VP
+VP: Malformed instrumentation profile data
+
+6- Detect truncated value profile data
+RUN: not llvm-profdata show %p/Inputs/vp-truncate.proftext 2>&1 | FileCheck %s --check-prefix=VPTRUNC
+VPTRUNC: Truncated profile data
diff --git a/test/tools/llvm-profdata/value-prof.proftext b/test/tools/llvm-profdata/value-prof.proftext
new file mode 100644
index 000000000000..ca2b1f822097
--- /dev/null
+++ b/test/tools/llvm-profdata/value-prof.proftext
@@ -0,0 +1,57 @@
+# RUN: llvm-profdata show -ic-targets  -all-functions %s | FileCheck %s --check-prefix=IC
+# RUN: llvm-profdata show -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefix=ICTEXT
+# RUN: llvm-profdata merge -o %t.profdata  %s
+# RUN: llvm-profdata show -ic-targets  -all-functions %t.profdata | FileCheck %s --check-prefix=IC
+
+foo
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+999000
+359800
+
+foo2
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1001000
+360200
+
+main
+# Func Hash:
+16650
+# Num Counters:
+4
+# Counter Values:
+2
+2000
+2000000
+999000
+# NumValueKinds
+1
+# Value Kind IPVK_IndirectCallTarget
+0
+# NumSites
+3
+# Values for each site
+0
+2
+foo:100
+foo2:1000
+1
+foo2:20000
+
+#IC: Indirect Call Site Count: 3
+#IC-NEXT:    Indirect Target Results: 
+#IC-NEXT:	[ 1, foo, 100 ]
+#IC-NEXT:	[ 1, foo2, 1000 ]
+#IC-NEXT:	[ 2, foo2, 20000 ]
+
+#ICTEXT: foo:100
+#ICTEXT-NEXT: foo2:1000
+#ICTEXT-NEXT: 1
+#ICTEXT-NEXT: foo2:20000
diff --git a/test/tools/llvm-profdata/weight-instr.test b/test/tools/llvm-profdata/weight-instr.test
new file mode 100644
index 000000000000..7294cf3b01f0
--- /dev/null
+++ b/test/tools/llvm-profdata/weight-instr.test
@@ -0,0 +1,69 @@
+Tests for weighted merge of instrumented profiles.
+
+1- Merge the foo and bar profiles with unity weight and verify the combined output
+RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata -weighted-input=1,%p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT
+RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata %p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=1X_1X_WEIGHT
+1X_1X_WEIGHT: Counters:
+1X_1X_WEIGHT-NEXT:   usage:
+1X_1X_WEIGHT-NEXT:     Hash: 0x0000000000000000
+1X_1X_WEIGHT-NEXT:     Counters: 1
+1X_1X_WEIGHT-NEXT:     Function count: 0
+1X_1X_WEIGHT-NEXT:   foo:
+1X_1X_WEIGHT-NEXT:     Hash: 0x000000000000028a
+1X_1X_WEIGHT-NEXT:     Counters: 3
+1X_1X_WEIGHT-NEXT:     Function count: 866988873
+1X_1X_WEIGHT-NEXT:   bar:
+1X_1X_WEIGHT-NEXT:     Hash: 0x000000000000028a
+1X_1X_WEIGHT-NEXT:     Counters: 3
+1X_1X_WEIGHT-NEXT:     Function count: 866988873
+1X_1X_WEIGHT-NEXT:   main:
+1X_1X_WEIGHT-NEXT:     Hash: 0x7d31c47ea98f8248
+1X_1X_WEIGHT-NEXT:     Counters: 60
+1X_1X_WEIGHT-NEXT:     Function count: 2
+1X_1X_WEIGHT-NEXT: Functions shown: 4
+1X_1X_WEIGHT-NEXT: Total functions: 4
+1X_1X_WEIGHT-NEXT: Maximum function count: 866988873
+1X_1X_WEIGHT-NEXT: Maximum internal block count: 267914296
+
+2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output
+RUN: llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=5,%p/Inputs/weight-instr-foo.profdata -o %t
+RUN: llvm-profdata show -instr -all-functions %t | FileCheck %s -check-prefix=3X_5X_WEIGHT
+3X_5X_WEIGHT: Counters:
+3X_5X_WEIGHT-NEXT:   usage:
+3X_5X_WEIGHT-NEXT:     Hash: 0x0000000000000000
+3X_5X_WEIGHT-NEXT:     Counters: 1
+3X_5X_WEIGHT-NEXT:     Function count: 0
+3X_5X_WEIGHT-NEXT:   foo:
+3X_5X_WEIGHT-NEXT:     Hash: 0x000000000000028a
+3X_5X_WEIGHT-NEXT:     Counters: 3
+3X_5X_WEIGHT-NEXT:     Function count: 4334944365
+3X_5X_WEIGHT-NEXT:   bar:
+3X_5X_WEIGHT-NEXT:     Hash: 0x000000000000028a
+3X_5X_WEIGHT-NEXT:     Counters: 3
+3X_5X_WEIGHT-NEXT:     Function count: 2600966619
+3X_5X_WEIGHT-NEXT:   main:
+3X_5X_WEIGHT-NEXT:     Hash: 0x7d31c47ea98f8248
+3X_5X_WEIGHT-NEXT:     Counters: 60
+3X_5X_WEIGHT-NEXT:     Function count: 8
+3X_5X_WEIGHT-NEXT: Functions shown: 4
+3X_5X_WEIGHT-NEXT: Total functions: 4
+3X_5X_WEIGHT-NEXT: Maximum function count: 4334944365
+3X_5X_WEIGHT-NEXT: Maximum internal block count: 1339571480
+
+3- Bad merge: invalid weight
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=0.75,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=-5,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=,%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=%p/Inputs/weight-instr-foo.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+INVALID_WEIGHT: error: Input weight must be a positive integer.
+
+4- Bad merge: input path does not exist
+RUN: not llvm-profdata merge -instr -weighted-input=3,%p/Inputs/does-not-exist.profdata -weighted-input=2,%p/Inputs/does-not-exist-either.profdata -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT
+INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.profdata: {{[Nn]}}o such file or directory
+
+5- No inputs
+RUN: not llvm-profdata merge -instr -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT
+NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help
diff --git a/test/tools/llvm-profdata/weight-sample.test b/test/tools/llvm-profdata/weight-sample.test
new file mode 100644
index 000000000000..7b22c5f88f1f
--- /dev/null
+++ b/test/tools/llvm-profdata/weight-sample.test
@@ -0,0 +1,56 @@
+Tests for weighted merge of sample profiles.
+
+1- Merge the foo and bar profiles with unity weight and verify the combined output
+RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext -weighted-input=1,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT
+RUN: llvm-profdata merge -sample -text -weighted-input=1,%p/Inputs/weight-sample-bar.proftext %p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=1X_1X_WEIGHT
+1X_1X_WEIGHT: foo:1763288:35327
+1X_1X_WEIGHT-NEXT:  7: 35327
+1X_1X_WEIGHT-NEXT:  8: 35327
+1X_1X_WEIGHT-NEXT:  9: 6930
+1X_1X_WEIGHT-NEXT:  10: 29341
+1X_1X_WEIGHT-NEXT:  11: 11906
+1X_1X_WEIGHT-NEXT:  13: 18185 foo:19531
+1X_1X_WEIGHT-NEXT:  15: 36458
+1X_1X_WEIGHT-NEXT: bar:1772037:35370
+1X_1X_WEIGHT-NEXT:  17: 35370
+1X_1X_WEIGHT-NEXT:  18: 35370
+1X_1X_WEIGHT-NEXT:  19: 7005
+1X_1X_WEIGHT-NEXT:  20: 29407
+1X_1X_WEIGHT-NEXT:  21: 12170
+1X_1X_WEIGHT-NEXT:  23: 18150 bar:19829
+1X_1X_WEIGHT-NEXT:  25: 36666
+
+2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output
+RUN: llvm-profdata merge -sample -text -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=5,%p/Inputs/weight-sample-foo.proftext -o - | FileCheck %s -check-prefix=3X_5X_WEIGHT
+3X_5X_WEIGHT: foo:8816440:176635
+3X_5X_WEIGHT-NEXT:  7: 176635
+3X_5X_WEIGHT-NEXT:  8: 176635
+3X_5X_WEIGHT-NEXT:  9: 34650
+3X_5X_WEIGHT-NEXT:  10: 146705
+3X_5X_WEIGHT-NEXT:  11: 59530
+3X_5X_WEIGHT-NEXT:  13: 90925 foo:97655
+3X_5X_WEIGHT-NEXT:  15: 182290
+3X_5X_WEIGHT-NEXT: bar:5316111:106110
+3X_5X_WEIGHT-NEXT:  17: 106110
+3X_5X_WEIGHT-NEXT:  18: 106110
+3X_5X_WEIGHT-NEXT:  19: 21015
+3X_5X_WEIGHT-NEXT:  20: 88221
+3X_5X_WEIGHT-NEXT:  21: 36510
+3X_5X_WEIGHT-NEXT:  23: 54450 bar:59487
+3X_5X_WEIGHT-NEXT:  25: 109998
+
+3- Bad merge: invalid weight
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=0.75,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=-5,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=,%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/weight-sample-bar.proftext -weighted-input=%p/Inputs/weight-sample-foo.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_WEIGHT
+INVALID_WEIGHT: error: Input weight must be a positive integer.
+
+4- Bad merge: input path does not exist
+RUN: not llvm-profdata merge -sample -weighted-input=3,%p/Inputs/does-not-exist.proftext -weighted-input=2,%p/Inputs/does-not-exist-either.proftext -o %t.out 2>&1 | FileCheck %s -check-prefix=INVALID_INPUT
+INVALID_INPUT: {{.*}}: {{.*}}does-not-exist.proftext: {{[Nn]}}o such file or directory
+
+5- No inputs
+RUN: not llvm-profdata merge -sample -o %t.out 2>&1 | FileCheck %s -check-prefix=NO_INPUT
+NO_INPUT: {{.*}}: No input files specified. See llvm-profdata{{(\.EXE|\.exe)?}} merge -help
diff --git a/test/tools/llvm-readobj/ARM/attribute-4.s b/test/tools/llvm-readobj/ARM/attribute-4.s
index dd0a4a6d6a73..2c27785410ed 100644
--- a/test/tools/llvm-readobj/ARM/attribute-4.s
+++ b/test/tools/llvm-readobj/ARM/attribute-4.s
@@ -15,6 +15,13 @@
 @CHECK-OBJ-NEXT: TagName: FP_arch
 @CHECK-OBJ-NEXT: Description: VFPv3-D16
 
+.eabi_attribute  Tag_Advanced_SIMD_arch, 4
+@CHECK:   .eabi_attribute 12, 4
+@CHECK-OBJ: Tag: 12
+@CHECK-OBJ-NEXT: Value: 4
+@CHECK-OBJ-NEXT: TagName: Advanced_SIMD_arch
+@CHECK-OBJ-NEXT: Description: ARMv8.1-a NEON
+
 .eabi_attribute  Tag_PCS_config, 4
 @CHECK:   .eabi_attribute 13, 4
 @CHECK-OBJ: Tag: 13
diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-i386
new file mode 100644
index 0000000000000000000000000000000000000000..c222a899ba7178b4d305949c330c4033f6ea1081
GIT binary patch
literal 1080
zcmb7DF-t;G6h8JUG#d;Wf(l%MhI%v@5d@*76<7v2*mQZOW`jz;*B}I~4o=~}X=&&O
zG}O@C+FE1weeb@DPYI$AKF|HmJ@?#q&bi)Y^SMYQA_kUNLY2Z#3if$I@1!J&omGj+
zw7+lSY5iElPf{dgczzChU<qz=I@o7Q|I9D@%wd822676F=46{|KR;~O?Zz|N*N=zy
z``fqc(h^)FU@Y?!37le%9_}I%Pz$hOJv*kQ)@q5WRvga}rOK|WT+eBHa5XE;@%cSy
z^<kNJ9`~@WCvXN_2IC;w@PF8Dr{Otev0bmNZQZOcn>TjdaqD7NI!%OW>$SzM)>W<T
zG^)PGX<S|WdN041(9_l5tF<05>(H9rANJ1*y3RQg7OM998|Vl$>o)zQPKGr5y#^hg
z$Hv(H)|eKxe}0laKH3v?kS}D?c~!{e&ay>SOlR_0H8PX%sDkkz+uvA3$k$-dV)g6|
z$?poDyI(cm+X~2e($5)_r=WSGjNz=;10UnqOOkKFfITBNL9Q9co|wKs`d~g{7v#I*
zJ?uT>_#e{K(f@@gG{<0kSV|9Zhg<~UT?DPEjrs+SM_A?{O&u|;!}<C+v;&`k$@c@Q
CYhKp?

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc
new file mode 100644
index 0000000000000000000000000000000000000000..fd337f343e005614e4b31e43edbfd4e81385ec44
GIT binary patch
literal 1204
zcmb7DyG|QX5S{hnnAlMiBnl8w1X76HWQ)k8h=d|L*aXQyfeTaJtYgDc*ekqCffNZ!
zYLpM4$Ooj)599|Z=#VHw8_GE6+y}8&DUy-S&YYRqxpQZBPwLHvm-FOae3^v}hxZ=3
zBH};Sv5G8;FJ&o8!Nj%KKA5ZXr}!qsyzjwBh(s0z8_((dKk+u=!~<Xo@BvmL@~hG)
zIvZLZ|G0fqI!n%OFYC2W=QrP;whyo9&&V9k{z~5yQ`hulIM2vu;`15)XoBy>v8Y}*
zOhZwslO!rh!(j@$H|U~RJo@z$>a(sZUKyHq`3|u6XFv%+EB_~3r?P}TI71^ra@<eD
zjs!_MG&*wD{@^Hx!X%QQdptmz(dkfvUZnPhVZWz6VgKNx1p9I7O8!Oj9$W(FPvJet
zu{VmoA366#uKNSwTtV-7-tr4-#5ohs#ZjE`BKQ}^!5Lo!AK%-H41a}8eIAC%^Im7-
zs}o$U?QT^!H{OX_Z*JGBO|{)<?9^MTRjoDa%I<G`qvMM<yJc_xC=2mubcbBdi8CZB
zmP6@&+4C~M?}2%oE%5>1JUMgb@LPEa(8vF!Y@oj-xU?{rvI5X%9%f_a!+;gmqr3$;
zV;?hedHnBq28DUk0Ajix{#Wk;CNS~sGLM)6+`BOK-Zo)4asAoeoSWB{Yv7>;Yw^D1
OWA$>^;5ERBuJ;@8Tx)Uw

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-ppc64
new file mode 100644
index 0000000000000000000000000000000000000000..25ce5c71b38b682305e8c98fcb1f8c79a06a677d
GIT binary patch
literal 1784
zcmcgsJ#Q015S=|cB*YX@fGj>FQ$WX6Ai0Z42vQ`VpaH7W1v@bk_QBmDB@z-*D%Yv_
zDUoPW@&}NhrlFvP67yzfW_1o{3sErA?%R1Yv$Hod_Pfmo_oK*BkOp1ESqXQnd{401
z_1L2>HK<1|T7k~KO|j40?H)zBo%b^mVf-m1`lvSwN?i08aVHvQ$nR`m@eMw&e|Tqo
zqPl;7Yf?<Uf7)Dsd-V0g)u*q&z_v)-d7>q8u<!HQ68L3*a?yd<{obC#pR46}vW${J
zKP^*AlI@~MiZabhP~*PcnN5HF(DOG%d(V5-t-c-AR~4@*wiMfpucSsw{ReP|dlVP@
zqcYtFmbu{2PP^kb@h~lh6!-VXa1$(Ziu1wClu3#Q!(=B<M+0w5M=zdJyqlGa-;P0$
zNg}ek`2g@dTM*a576d%c4#Zhqy#`X}ey0EIY<)QPj=F)Dez-sUZC>^&{tl0<^Ixr<
z7otnx>+)Wn<A1%DPwqW?bob%b#>~`Jhp{q%mB;?W@k%zG{?s_Fd+3Mt<u>eQW7ie+
zXSji7^Lwgqt^*qj*OPl^{kHfvxLaxiSPtv2A=&SQeOT-sKHtHz|2@mE{hsM>-TOKJ
z8kXI+C?0M>EFTkFSo-_c`3!Rv@}Izmzu&~ZAFZdySe9RWT*UQ)7|zt24HnOM0jqj9
x+f||iJv=c+(ekm%=W1=#g(3PZit7UTKkKaMAwSoTxjFfp{CzAp1;QTk{{{~(Y}5b%

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64 b/test/tools/llvm-readobj/Inputs/gnuhash.so.elf-x86_64
new file mode 100644
index 0000000000000000000000000000000000000000..612e7b2112ff441571330bbfc3f63b5a346270b0
GIT binary patch
literal 1616
zcmcgs&o2W(6n^a@_2W=C36j0Iv2h?AgtoLuiUS8ulci-H+D3OJBof!2o%|V&690sY
zgR_f+D@XO-?0oE2S0WNG>3lQa``*m!n>Tw|ESJ)$6u~T7Bw5`WDrX$o9w>-%G*1?f
z{nV$m5!aW<%&-rT$kra&zK?(4Z&Nm}WNi!3jUK-cacSm%6PGdjSd4Ne&TSQJnPy}_
z{w=>XSSUWc+&x`gJf7^1u`AIeF3XX~AHrVE_mY>kL1`RG;D-`;I&3sZRV!}bl2ZG=
zul&Gm229l}vf=A{yCL{21Dl*Hb}fmqNsYwxzhoTHE243Og9at9e=)~DuLbTtIeyR#
z(ZlShe(HFx?~zkEud^(~ph-^EQ-@8rR@IDKJ3ja^Rf}&I^@s{BN`KZN&Rd`Liu2#^
zRs0rL8T|>plsnahmwF4|*#|cB*)IH~=D(jiouHpf;7P6QZRR)Dmr1Rbw+i{P+A5W{
zi#uv3Unmz<te2uKX4;B({hN()_*=W(kasGE`vA^_G~Y;Q8H0cco=9)PkNyRo86n;?
z;?c)b>=z&6@g9Kaf3pqWQQT&EDIDVQzJYNye?U5jC0@(hlN`j*FNnu?4SvLDLpcpI
t!e=NAo=IpeO7grj|7h+}>5Yj;9pzm7M2GtI^Ss;n_l8^$al|^${{W#QYM}rC

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel b/test/tools/llvm-readobj/Inputs/mips-rld-map-rel.elf-mipsel
new file mode 100755
index 0000000000000000000000000000000000000000..a8e396c2ec36e25dd16cac0458cb291982ccc806
GIT binary patch
literal 2484
zcmb7FON$g&6h3W@Nyj|w84U#Tum=Ulz}#tdRLG08^EN#Xd=M0|rh2-jr=TB2bqyIw
zAPs@60&yu>xy&xVK*_=%ke@IM7g;JOxDYzuSGP`A(MCxQe084p+}Am`eq5+6777JX
za-=BKHXM;z@JDZm94tvVD`j}5WI_%LOo@+vA{l@|2O*9D7%H>S74JeP=woj9!(|AH
z4`3tcF9M)&6MjYBuQ_a({q~WN;QX9{b68seypZH(4z8R21NiPCNbx&#g1L#;5&bi*
zpgz-Tm|oYM&SJ|4aEx{dnUVSSb7~^?e<~#Nl(7&Fcmd9}-2^`bz6`t#lz?+Mi+v=1
zA2Nw&dk1^M@<Z+u<3sKT<3lixPxNtc=9&OM41Ofl8S{UL^D)m`0Pn`0A%v7TH;Lz>
zo9CD}Y5W&rU6EL4$H93&q!MPJWHqk$$>1aC8ec*@_A-ngM?4Sz?r{7e`E*2|8_~ZU
z(Z!v=zE)jbnR9C^Yu~J_Es5)1uFbov)eUjCV}CCOYk09Iu5X!J@AceX>_zlIhNII!
z){LGH?w=f!2Y*aHzUNHh0w@3e(}DhLQSP5C%6_{0EdCWze`#*+v+~r^+O<>V^QK}d
z<ujGj=PDP@R?1WUUg$?b+wa6)>lDYQXY}=p!C;{IcvqhQ$ABsepxN1#0&itZuL*KS
z>{XJJ-T-^k>h^{=hq~PYd&l~D^CGNel9N^eYuW0yVp!u=x0Rvy$~quv9XLs)*&4U;
ztiFt#q%-pRJDCE>NnV=F<j8x&nxb_Kb(MLoeiR1XPXYdqp|eLVJsC_wV9$~0zW}c5
zW!-+2*y*)(yAIC%odBP}{=Q9hFw}WbZh`A~*6|MXd>zLl`u8Jxe?*sd5cZ~{R%5#D
zh0~GWGF!va7PlFSyHs1hSgpD1i;GtluDVyN7i$adu(n@r0<Cb{0$ESw%RlW1*1v59
zF}>UE*sFubk>xuo1)bQBLNTi=8&`~154KxgvnM9<n?Yy0E2eR~<F$h=F*{ywCsC2h
z@;DNctixb~E;cBp*^R|)b=zzKF<}D{sBYuMaTL^dV;`B@4ZptI<Wy(R;I#f5eTXpL
zGD|i8@6yp)5yxS`Ta&!A`G1(kv7-6E;mmxOnQs$=3EGv|In+M~*d>h1|C96$WgK(P
z18iQ#vEPU<AnTjVIN}n({|DpPhr~U|%tsqz_;0xku$yUPe-g`(m2|&1V4%!-Sl2{F
zgTR@Kv<WbdIa%L*2%Jkv<p72>uEf5&gE;O_iTN1Y0T`!q-9=pT;K4?skN7U-U<o}$
zTsf7=blx8zKN}3X>UxAYT{~^eMfwF$nfg=2<w<?`$0mDK-z6N-Z>oT6968TeiTVD5
Zk>mOHaD7|{|FQY09^$fphD}NH{R8kf?yCR*

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri b/test/tools/llvm-readobj/Inputs/trivial.elf-amdhsa-kaveri
new file mode 100755
index 0000000000000000000000000000000000000000..9566ed5c0f1481484b014ec74f281531f0d2ac3f
GIT binary patch
literal 13208
zcmeI3&uSA<6vj`+(b}rC8$nQE7P^v5gM<)VjS&Aqg)S7_+0+CQm?R}LVgi!l&V>))
zWB3N`BlrL=d<=Jb&b{ZGj6)kr5$WPw=>5()ch0>hzggYeJBLqpo(8oVp#}7l#A^pW
zlIXzKh8iphwP~IE>)c;q|FVnu@x%*{`u0nwhF;b9CDu^vNIGtj>kGZ3E>x#+wdv4_
z@=@!C(w2Wr^4G*vzpD3i0Vu{LysFM`GAB1^IZxP7Lpp9%_0eU&<xMGH*4vG|U)4Py
zCMbBi8hH%wt*RZ(gW`F94}0g~BKfkm@Ok{QQ@j_6-{wDgzY-6bx|^SUX%geVXS=Vx
zZ@m#P0!F|H7y%<-1dM<YFak!v2>c%cy6xrto9@yOSh2j@`$d00fPJV$1GoEIj)!6B
zw!+KE1+r?%@$Y~Z-#1?*hxFx$f?!b#!UfU650B#0(FLm3N-+XPzz7%tBVYuKfDteP
zM!*Od0V8lO0^8|uzt!AMd-ZfUem|?v+O2x48E2FDp_@<kz-KrTDAvaRm7~aq<Zghp
z=aCb4UqGye`Wrr6tZ_~KTdF2Tzz7%tBVYuKfDtePM!*Od0V7}pjKDu25D&+Be|kXi
zc#`)i9(1z-#l87Bn~%KaQ;PHcEO%aaG<?TKH}6uM4YGWiclX(vkGy(+RaJ79uu+@>
z5Eb|B2mLgiy!|klroC19XC1@2g?<h|6lta^zAM0d@_f0ivs1pee%gI{<lx)`{w5o8
z_JQ@I$d5qvy1)OnfQN8@0`uJDn4E<mALk`doR7#FQakbyzsoVXgO7Zizd)<Mx9Z>5
zdDrNeEp8}SwRmrSbUCj616H~8f%Q-OjsfRGs2|HTIag|u-{4xKqUu7x4gMoGid+Om
rb)Hw{7~9p~-{BZ)FF*Q5^HC1o5$Xs39ZouF{m+_sqdY0w)%E`XtEE!O

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64 b/test/tools/llvm-readobj/Inputs/verdef.elf-x86-64
new file mode 100755
index 0000000000000000000000000000000000000000..4b907694e800a622f28357c2cbe4a4129316febf
GIT binary patch
literal 2256
zcmcIl%Wl(95FIBah4LzeMHe8o5(^e!DToCNgo2<&l}bRMA|W9x2Zy>+6QkHbvVmVf
ztoj?U%90P@16XFq7WSYp$DZT3ZlbCZOy$g-$GLOI_l)~tZF9ZiI48uUD!0X<p}Jw~
zN5>}4hTN1njhvCwQd9SwJ`D?(^~n6x$IV8K!j|Tps9_~rQ_Esz1BaUQ50@;@^e8ih
znmBBC6dccK+Z|BL>RFUu<Gu6yLziawFQ@)mer}?!%*el?{v|yz`(!?PIHz@KQ=gsT
zm)5Qy%G0&SPc|Ml>s(wHuiXvwC-4(rwxdWquNB8$ocO&&Ua4Kx{qPP@Z(bkVaZ$(-
zyjR>K@<o~B=*p3b{y}Gd{ZBQypwWwI2J(<8n&ZglAMX|zLoEnmRgHeJ7<}lD=Yh@K
z6+6gDI8|^j5OQl|@rBH-%*}-TEE~^$j>i!qn`Zwu`?=YF%+C6>P5z_8`7o5w_nTXK
zN4vpF-L2OTsfl)-%DiwcMwXBMr||~#o#w|~P2>0K41dPsgM7U@BOmuLjo;$kd)B=3
zVB_wDsyTTN<E*$zyPt@wOcpm7bmOEa?soT$`?lSSccQMi-6(0N9=y(Lxf{8Ij<}&8
zhx-s-kZ$C6c3z4bhq__nw{%54=%jW!McU9m5WE*~*=Q%^H}zMmfjI6OGXWv*Pw5&Q
zC7%4JCjWtf*mz`y{x<Z%>=4iSpvHFzLLBRJo-oT}ofYCaPt^7q&+*KoUROM3jW*{m
z*C?)nIQnE=_#ikJ9)2JA{iH@tQOfu&4FbzK=Nu6~GBD4_{@GVw<ET$O|3B=}VR))?
zeSB-NC)1CLUsWvjC=2Uju8UHx|5}6Wt(Z5o1AR9qc!oUoPyU7K<Jm1WRUfU;t)f%c
VcvnSvp~f`)H^aX#ChNM4{|l6Nt*ih5

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-readobj/amdgpu-elf-defs.test b/test/tools/llvm-readobj/amdgpu-elf-defs.test
new file mode 100644
index 000000000000..9a576e8158f9
--- /dev/null
+++ b/test/tools/llvm-readobj/amdgpu-elf-defs.test
@@ -0,0 +1,28 @@
+RUN: llvm-readobj  -program-headers -sections -symbols -file-headers \
+RUN: %p/Inputs/trivial.elf-amdhsa-kaveri | FileCheck %s
+
+CHECK: ElfHeader {
+CHECK:  Ident {
+CHECK: Class: 64-bit (0x2)
+CHECK: DataEncoding: LittleEndian (0x1)
+CHECK: Machine: EM_AMDGPU (0xE0)
+
+
+CHECK: Section {
+CHECK: Name: .text
+CHECK: Type: SHT_PROGBITS (0x1)
+CHECK: Flags [ (0xC00007
+CHECK: SHF_ALLOC (0x2)
+CHECK: SHF_AMDGPU_HSA_AGENT (0x800000)
+CHECK: SHF_AMDGPU_HSA_CODE (0x400000)
+CHECK: SHF_EXECINSTR (0x4)
+CHECK: SHF_WRITE (0x1)
+
+CHECK: Symbol {
+CHECK: Name: hello_world
+CHECK: Value: 0x0
+CHECK: Binding: Local (0x0)
+CHECK: Type: AMDGPU_HSA_KERNEL (0xA)
+
+CHECK: ProgramHeader {
+CHECK: Type: PT_AMDGPU_HSA_LOAD_CODE_AGENT (0x60000003)
diff --git a/test/tools/llvm-readobj/basic.test b/test/tools/llvm-readobj/basic.test
new file mode 100644
index 000000000000..fc6349bcfbf4
--- /dev/null
+++ b/test/tools/llvm-readobj/basic.test
@@ -0,0 +1,2 @@
+RUN: not llvm-readobj %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-readobj/codeview-linetables.test b/test/tools/llvm-readobj/codeview-linetables.test
index d124e6e2d454..880b6d52f095 100644
--- a/test/tools/llvm-readobj/codeview-linetables.test
+++ b/test/tools/llvm-readobj/codeview-linetables.test
@@ -50,7 +50,7 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
 MFUN32-NEXT:     PayloadSize: 0x30
-MFUN32:          FunctionName: _x
+MFUN32:          LinkageName: _x
 MFUN32-NEXT:   ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF5
@@ -69,7 +69,7 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
 MFUN32-NEXT:     PayloadSize: 0x30
-MFUN32:          FunctionName: _y
+MFUN32:          LinkageName: _y
 MFUN32-NEXT:   ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF5
@@ -88,7 +88,7 @@ MFUN32:        ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF2
 MFUN32-NEXT:     PayloadSize: 0x40
-MFUN32:          FunctionName: _f
+MFUN32:          LinkageName: _f
 MFUN32-NEXT:   ]
 MFUN32-NEXT:   Subsection [
 MFUN32-NEXT:     Type: 0xF4
@@ -103,7 +103,7 @@ MFUN32-NEXT:     Type: 0xF1
 MFUN32-NEXT:     PayloadSize: 0x8
 MFUN32:        ]
 MFUN32-NEXT:   FunctionLineTable [
-MFUN32-NEXT:     FunctionName: _x
+MFUN32-NEXT:     LinkageName: _x
 MFUN32-NEXT:     Flags: 0x0
 MFUN32-NEXT:     CodeSize: 0xA
 MFUN32-NEXT:     FilenameSegment [
@@ -114,7 +114,7 @@ MFUN32-NEXT:       +0x8: 5
 MFUN32-NEXT:     ]
 MFUN32-NEXT:   ]
 MFUN32-NEXT:   FunctionLineTable [
-MFUN32-NEXT:     FunctionName: _y
+MFUN32-NEXT:     LinkageName: _y
 MFUN32-NEXT:     Flags: 0x0
 MFUN32-NEXT:     CodeSize: 0xA
 MFUN32-NEXT:     FilenameSegment [
@@ -125,7 +125,7 @@ MFUN32-NEXT:       +0x8: 9
 MFUN32-NEXT:     ]
 MFUN32-NEXT:   ]
 MFUN32-NEXT:   FunctionLineTable [
-MFUN32-NEXT:     FunctionName: _f
+MFUN32-NEXT:     LinkageName: _f
 MFUN32-NEXT:     Flags: 0x0
 MFUN32-NEXT:     CodeSize: 0x14
 MFUN32-NEXT:     FilenameSegment [
@@ -158,7 +158,7 @@ MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
 MFUN64-NEXT:     PayloadSize: 0x30
-MFUN64:          FunctionName: x
+MFUN64:          LinkageName: x
 MFUN64-NEXT:   ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF1
@@ -173,7 +173,7 @@ MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
 MFUN64-NEXT:     PayloadSize: 0x30
-MFUN64:          FunctionName: y
+MFUN64:          LinkageName: y
 MFUN64-NEXT:   ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF1
@@ -188,7 +188,7 @@ MFUN64:        ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF2
 MFUN64-NEXT:     PayloadSize: 0x40
-MFUN64:          FunctionName: f
+MFUN64:          LinkageName: f
 MFUN64-NEXT:   ]
 MFUN64-NEXT:   Subsection [
 MFUN64-NEXT:     Type: 0xF4
@@ -203,7 +203,7 @@ MFUN64-NEXT:     Type: 0xF1
 MFUN64-NEXT:     PayloadSize: 0x8
 MFUN64:        ]
 MFUN64-NEXT:   FunctionLineTable [
-MFUN64-NEXT:     FunctionName: x
+MFUN64-NEXT:     LinkageName: x
 MFUN64-NEXT:     Flags: 0x0
 MFUN64-NEXT:     CodeSize: 0xE
 MFUN64-NEXT:     FilenameSegment [
@@ -214,7 +214,7 @@ MFUN64-NEXT:       +0x9: 5
 MFUN64-NEXT:     ]
 MFUN64-NEXT:   ]
 MFUN64-NEXT:   FunctionLineTable [
-MFUN64-NEXT:     FunctionName: y
+MFUN64-NEXT:     LinkageName: y
 MFUN64-NEXT:     Flags: 0x0
 MFUN64-NEXT:     CodeSize: 0xE
 MFUN64-NEXT:     FilenameSegment [
@@ -225,7 +225,7 @@ MFUN64-NEXT:       +0x9: 9
 MFUN64-NEXT:     ]
 MFUN64-NEXT:   ]
 MFUN64-NEXT:   FunctionLineTable [
-MFUN64-NEXT:     FunctionName: f
+MFUN64-NEXT:     LinkageName: f
 MFUN64-NEXT:     Flags: 0x0
 MFUN64-NEXT:     CodeSize: 0x18
 MFUN64-NEXT:     FilenameSegment [
@@ -286,7 +286,7 @@ MFILE32:        ]
 MFILE32-NEXT:   Subsection [
 MFILE32-NEXT:     Type: 0xF2
 MFILE32-NEXT:     PayloadSize: 0x64
-MFILE32:          FunctionName: _f
+MFILE32:          LinkageName: _f
 MFILE32-NEXT:   ]
 MFILE32-NEXT:   Subsection [
 MFILE32-NEXT:     Type: 0xF4
@@ -301,7 +301,7 @@ MFILE32-NEXT:     Type: 0xF1
 MFILE32-NEXT:     PayloadSize: 0x8
 MFILE32:        ]
 MFILE32-NEXT:   FunctionLineTable [
-MFILE32-NEXT:     FunctionName: _f
+MFILE32-NEXT:     LinkageName: _f
 MFILE32-NEXT:     Flags: 0x0
 MFILE32-NEXT:     CodeSize: 0x14
 MFILE32-NEXT:     FilenameSegment [
@@ -343,7 +343,7 @@ MFILE64:        ]
 MFILE64-NEXT:   Subsection [
 MFILE64-NEXT:     Type: 0xF2
 MFILE64-NEXT:     PayloadSize: 0x64
-MFILE64:          FunctionName: f
+MFILE64:          LinkageName: f
 MFILE64-NEXT:   ]
 MFILE64-NEXT:   Subsection [
 MFILE64-NEXT:     Type: 0xF4
@@ -358,7 +358,7 @@ MFILE64-NEXT:     Type: 0xF1
 MFILE64-NEXT:     PayloadSize: 0x8
 MFILE64:        ]
 MFILE64-NEXT:   FunctionLineTable [
-MFILE64-NEXT:     FunctionName: f
+MFILE64-NEXT:     LinkageName: f
 MFILE64-NEXT:     Flags: 0x0
 MFILE64-NEXT:     CodeSize: 0x18
 MFILE64-NEXT:     FilenameSegment [
@@ -406,7 +406,7 @@ MCOMDAT-NEXT:   Section: ?f@@YAHXZ
 MCOMDAT-NEXT:   CodeSize: 0x7
 MCOMDAT-NEXT: }
 MCOMDAT:      FunctionLineTable [
-MCOMDAT-NEXT:   FunctionName: ?f@@YAHXZ
+MCOMDAT-NEXT:   LinkageName: ?f@@YAHXZ
 MCOMDAT-NEXT:   Flags: 0x0
 MCOMDAT-NEXT:   CodeSize: 0x7
 MCOMDAT-NEXT:   FilenameSegment [
@@ -422,7 +422,7 @@ MCOMDAT-NEXT:   Section: ?g@@YAHXZ
 MCOMDAT-NEXT:   CodeSize: 0x7
 MCOMDAT-NEXT: }
 MCOMDAT:      FunctionLineTable [
-MCOMDAT-NEXT:   FunctionName: ?g@@YAHXZ
+MCOMDAT-NEXT:   LinkageName: ?g@@YAHXZ
 MCOMDAT-NEXT:   Flags: 0x0
 MCOMDAT-NEXT:   CodeSize: 0x7
 MCOMDAT-NEXT:   FilenameSegment [
diff --git a/test/tools/llvm-readobj/elf-gnuhash.test b/test/tools/llvm-readobj/elf-gnuhash.test
new file mode 100644
index 000000000000..8642a4dc9d7b
--- /dev/null
+++ b/test/tools/llvm-readobj/elf-gnuhash.test
@@ -0,0 +1,63 @@
+// Check dumping of the GNU Hash section
+// The input was generated using the following:
+// $ llvm-mc -filetype=obj -triple=i386-pc-linux      -o example-i386.o   example.s
+// $ llvm-mc -filetype=obj -triple=x86_64-pc-linux    -o example-x86_64.o example.s
+// $ llvm-mc -filetype=obj -triple=powerpc-pc-linux   -o example-ppc.o    example.s
+// $ llvm-mc -filetype=obj -triple=powerpc64-pc-linux -o example-ppc64.o  example.s
+// $ ld -shared -m elf_i386   -hash-style=gnu -o gnuhash.so.elf-i386   example-i386.o
+// $ ld -shared -m elf_x86_64 -hash-style=gnu -o gnuhash.so.elf-x86_64 example-x86_64.o
+// $ ld -shared -m elf32ppc   -hash-style=gnu -o gnuhash.so.elf-ppc    example-ppc.o
+// $ ld -shared -m elf64ppc   -hash-style=gnu -o gnuhash.so.elf-ppc64  example-ppc64.o
+// $ cat example.s
+// .globl foo
+// foo:
+
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-i386   | FileCheck %s -check-prefix I386
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-x86_64 | FileCheck %s -check-prefix X86_64
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc    | FileCheck %s -check-prefix PPC
+RUN: llvm-readobj -gnu-hash-table %p/Inputs/gnuhash.so.elf-ppc64  | FileCheck %s -check-prefix PPC64
+
+I386:      Arch: i386
+I386:      GnuHashTable {
+I386-NEXT:   Num Buckets: 3
+I386-NEXT:   First Hashed Symbol Index: 1
+I386-NEXT:   Num Mask Words: 1
+I386-NEXT:   Shift Count: 5
+I386-NEXT:   Bloom Filter: [0x39004608]
+I386-NEXT:   Buckets: [1, 4, 0]
+I386-NEXT:   Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+I386-NEXT: }
+
+X86_64:      Arch: x86_64
+X86_64:      GnuHashTable {
+X86_64-NEXT:   Num Buckets: 3
+X86_64-NEXT:   First Hashed Symbol Index: 1
+X86_64-NEXT:   Num Mask Words: 1
+X86_64-NEXT:   Shift Count: 6
+X86_64-NEXT:   Bloom Filter: [0x800000001204288]
+X86_64-NEXT:   Buckets: [1, 4, 0]
+X86_64-NEXT:   Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+X86_64-NEXT: }
+
+PPC:      Arch: powerpc
+PPC:      GnuHashTable {
+PPC-NEXT:   Num Buckets: 3
+PPC-NEXT:   First Hashed Symbol Index: 1
+PPC-NEXT:   Num Mask Words: 1
+PPC-NEXT:   Shift Count: 5
+PPC-NEXT:   Bloom Filter: [0x3D00460A]
+PPC-NEXT:   Buckets: [1, 5, 0]
+PPC-NEXT:   Values: [0xEEBEC3A, 0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+PPC-NEXT: }
+
+PPC64:      Arch: powerpc64
+PPC64:      GnuHashTable {
+PPC64-NEXT:   Num Buckets: 3
+PPC64-NEXT:   First Hashed Symbol Index: 1
+PPC64-NEXT:   Num Mask Words: 1
+PPC64-NEXT:   Shift Count: 6
+PPC64-NEXT:   Bloom Filter: [0x800000001204288]
+PPC64-NEXT:   Buckets: [1, 4, 0]
+PPC64-NEXT:   Values: [0xB887388, 0xECD54542, 0x7C92E3BB, 0x1C5871D9]
+PPC64-NEXT: }
+
diff --git a/test/tools/llvm-readobj/elf-versioninfo.test b/test/tools/llvm-readobj/elf-versioninfo.test
new file mode 100644
index 000000000000..e8113e4b2fed
--- /dev/null
+++ b/test/tools/llvm-readobj/elf-versioninfo.test
@@ -0,0 +1,81 @@
+// Test that llvm-readobj dumps version info tags correctly.
+
+RUN: llvm-readobj -dynamic-table -V %p/Inputs/verdef.elf-x86-64 | FileCheck %s
+
+CHECK: 0x000000006FFFFFF0 VERSYM               0x24C
+CHECK: 0x000000006FFFFFFC VERDEF               0x25C
+CHECK: 0x000000006FFFFFFD VERDEFNUM            3
+
+CHECK: Version symbols {
+CHECK-NEXT:   Section Name: .gnu.version (20)
+CHECK-NEXT:   Address: 0x24C
+CHECK-NEXT:   Offset: 0x24C
+CHECK-NEXT:   Link: 1
+CHECK-NEXT:   Symbols [
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 0
+CHECK-NEXT:       Name: @
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 1
+CHECK-NEXT:       Name: _end@
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 1
+CHECK-NEXT:       Name: _edata@
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 3
+CHECK-NEXT:       Name: goo@@VERSION2
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 1
+CHECK-NEXT:       Name: __bss_start@
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 2
+CHECK-NEXT:       Name: foo@@VERSION1
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 2
+CHECK-NEXT:       Name: VERSION1@@VERSION1
+CHECK-NEXT:     }
+CHECK-NEXT:     Symbol {
+CHECK-NEXT:       Version: 3
+CHECK-NEXT:       Name: VERSION2@@VERSION2
+CHECK-NEXT:     }
+CHECK-NEXT:   ]
+CHECK-NEXT: }
+
+CHECK: Version definition {
+CHECK-NEXT:   Section Name: .gnu.version_d (70)
+CHECK-NEXT:   Address: 0x25C
+CHECK-NEXT:   Offset: 0x25C
+CHECK-NEXT:   Link: 2
+CHECK-NEXT:   Entries [
+CHECK-NEXT:     Entry {
+CHECK-NEXT:       Offset: 0x0
+CHECK-NEXT:       Rev: 1
+CHECK-NEXT:       Flags: 1
+CHECK-NEXT:       Index: 1
+CHECK-NEXT:       Cnt: 1
+CHECK-NEXT:       Name: blah
+CHECK-NEXT:     }
+CHECK-NEXT:     Entry {
+CHECK-NEXT:       Offset: 0x1C
+CHECK-NEXT:       Rev: 1
+CHECK-NEXT:       Flags: 0
+CHECK-NEXT:       Index: 2
+CHECK-NEXT:       Cnt: 1
+CHECK-NEXT:       Name: VERSION1
+CHECK-NEXT:     }
+CHECK-NEXT:     Entry {
+CHECK-NEXT:       Offset: 0x38
+CHECK-NEXT:       Rev: 1
+CHECK-NEXT:       Flags: 0
+CHECK-NEXT:       Index: 3
+CHECK-NEXT:       Cnt: 2
+CHECK-NEXT:       Name: VERSION2
+CHECK-NEXT:     }
+CHECK-NEXT:   ]
+CHECK-NEXT: }
diff --git a/test/tools/llvm-readobj/file-headers.test b/test/tools/llvm-readobj/file-headers.test
index fd030ef0b56e..2d67089d6118 100644
--- a/test/tools/llvm-readobj/file-headers.test
+++ b/test/tools/llvm-readobj/file-headers.test
@@ -330,16 +330,8 @@ COFF-UNKNOWN-NEXT:   Characteristics [ (0x0)
 COFF-UNKNOWN-NEXT:   ]
 COFF-UNKNOWN-NEXT: }
 
-COFF-IMPORTLIB:      Format: COFF-<unknown arch>
-COFF-IMPORTLIB-NEXT: Arch: unknown
-COFF-IMPORTLIB-NEXT: AddressSize: 32bit
-COFF-IMPORTLIB-NEXT: ImageFileHeader {
-COFF-IMPORTLIB-NEXT:   Machine: IMAGE_FILE_MACHINE_UNKNOWN (0x0)
-COFF-IMPORTLIB-NEXT:   SectionCount: 0
-COFF-IMPORTLIB-NEXT:   TimeDateStamp: 1970-09-09 19:52:32 (0x14C0000)
-COFF-IMPORTLIB-NEXT:   PointerToSymbolTable: 0x0
-COFF-IMPORTLIB-NEXT:   SymbolCount: 0
-COFF-IMPORTLIB-NEXT:   OptionalHeaderSize: 0
-COFF-IMPORTLIB-NEXT:   Characteristics [ (0x0)
-COFF-IMPORTLIB-NEXT:   ]
-COFF-IMPORTLIB-NEXT: }
+COFF-IMPORTLIB:      Format: COFF-import-file
+COFF-IMPORTLIB-NEXT: Type: code
+COFF-IMPORTLIB-NEXT: Name type: noprefix
+COFF-IMPORTLIB-NEXT: Symbol: __imp__func
+COFF-IMPORTLIB-NEXT: Symbol: _func
diff --git a/test/tools/llvm-readobj/mips-rld-map-rel.test b/test/tools/llvm-readobj/mips-rld-map-rel.test
new file mode 100644
index 000000000000..adde78784d63
--- /dev/null
+++ b/test/tools/llvm-readobj/mips-rld-map-rel.test
@@ -0,0 +1,24 @@
+# Check DT_MIPS_RLD_MAP_REL .dynamic section tag reading
+
+RUN: llvm-readobj -dynamic-table %p/Inputs/mips-rld-map-rel.elf-mipsel | \
+RUN:   FileCheck %s
+
+CHECK:      DynamicSection [ (16 entries)
+CHECK-NEXT:   Tag                Type                 Name/Value
+CHECK-NEXT:   0x00000004 HASH                 0x220
+CHECK-NEXT:   0x00000005 STRTAB               0x2FC
+CHECK-NEXT:   0x00000006 SYMTAB               0x25C
+CHECK-NEXT:   0x0000000A STRSZ                72 (bytes)
+CHECK-NEXT:   0x0000000B SYMENT               16 (bytes)
+CHECK-NEXT:   0x70000035 MIPS_RLD_MAP_REL     0x101E0
+CHECK-NEXT:   0x00000015 DEBUG                0x0
+CHECK-NEXT:   0x00000003 PLTGOT               0x10390
+CHECK-NEXT:   0x70000001 MIPS_RLD_VERSION     1
+CHECK-NEXT:   0x70000005 MIPS_FLAGS           NOTPOT 
+CHECK-NEXT:   0x70000006 MIPS_BASE_ADDRESS    0x0
+CHECK-NEXT:   0x7000000A MIPS_LOCAL_GOTNO     2
+CHECK-NEXT:   0x70000011 MIPS_SYMTABNO        10
+CHECK-NEXT:   0x70000012 MIPS_UNREFEXTNO      15
+CHECK-NEXT:   0x70000013 MIPS_GOTSYM          0xA
+CHECK-NEXT:   0x00000000 NULL                 0x0
+CHECK-NEXT: ]
diff --git a/test/tools/llvm-readobj/sections-ext.test b/test/tools/llvm-readobj/sections-ext.test
index 19b7aa0516d1..70ae0f22a0e4 100644
--- a/test/tools/llvm-readobj/sections-ext.test
+++ b/test/tools/llvm-readobj/sections-ext.test
@@ -223,6 +223,7 @@ MACHO-X86-64-NEXT:      SomeInstructions (0x4)
 MACHO-X86-64-NEXT:    ]
 MACHO-X86-64-NEXT:    Reserved1: 0x0
 MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Reserved3: 0x0
 MACHO-X86-64-NEXT:    Relocations [
 MACHO-X86-64-NEXT:      0xE 1 2 1 X86_64_RELOC_BRANCH 0 _SomeOtherFunction
 MACHO-X86-64-NEXT:      0x9 1 2 1 X86_64_RELOC_BRANCH 0 _puts
@@ -260,6 +261,7 @@ MACHO-X86-64-NEXT:    Attributes [ (0x0)
 MACHO-X86-64-NEXT:    ]
 MACHO-X86-64-NEXT:    Reserved1: 0x0
 MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Reserved3: 0x0
 MACHO-X86-64-NEXT:    Relocations [
 MACHO-X86-64-NEXT:    ]
 MACHO-X86-64-NEXT:    Symbols [
@@ -514,6 +516,7 @@ MACHO-PPC64-NEXT:       SomeInstructions (0x4)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:     Relocations [
 MACHO-PPC64-NEXT:       Relocation {
 MACHO-PPC64-NEXT:         Offset: 0x24
@@ -587,6 +590,7 @@ MACHO-PPC64-NEXT:       SomeInstructions (0x4)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:     Relocations [
 MACHO-PPC64-NEXT:       Relocation {
 MACHO-PPC64-NEXT:         Offset: 0x14
@@ -639,6 +643,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:     Relocations [
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Symbols [
@@ -672,6 +677,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x1
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:     Relocations [
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Symbols [
@@ -695,6 +701,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x2
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:     Relocations [
 MACHO-PPC64-NEXT:       Relocation {
 MACHO-PPC64-NEXT:         Offset: 0x0
diff --git a/test/tools/llvm-readobj/sections.test b/test/tools/llvm-readobj/sections.test
index fe734d77e34b..54654e7070ef 100644
--- a/test/tools/llvm-readobj/sections.test
+++ b/test/tools/llvm-readobj/sections.test
@@ -172,6 +172,7 @@ MACHO-X86-64-NEXT:      SomeInstructions (0x4)
 MACHO-X86-64-NEXT:    ]
 MACHO-X86-64-NEXT:    Reserved1: 0x0
 MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Reserved3: 0x0
 MACHO-X86-64-NEXT:  }
 MACHO-X86-64-NEXT:  Section {
 MACHO-X86-64-NEXT:    Index: 1
@@ -188,6 +189,7 @@ MACHO-X86-64-NEXT:    Attributes [ (0x0)
 MACHO-X86-64-NEXT:    ]
 MACHO-X86-64-NEXT:    Reserved1: 0x0
 MACHO-X86-64-NEXT:    Reserved2: 0x0
+MACHO-X86-64-NEXT:    Reserved3: 0x0
 MACHO-X86-64-NEXT:  }
 MACHO-X86-64-NEXT:]
 
@@ -296,6 +298,7 @@ MACHO-PPC64-NEXT:       SomeInstructions (0x4)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section {
 MACHO-PPC64-NEXT:     Index: 1
@@ -314,6 +317,7 @@ MACHO-PPC64-NEXT:       SomeInstructions (0x4)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x20
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section {
 MACHO-PPC64-NEXT:     Index: 2
@@ -330,6 +334,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x0
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section {
 MACHO-PPC64-NEXT:     Index: 3
@@ -346,6 +351,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x1
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT:   Section {
 MACHO-PPC64-NEXT:     Index: 4
@@ -362,6 +368,7 @@ MACHO-PPC64-NEXT:     Attributes [ (0x0)
 MACHO-PPC64-NEXT:     ]
 MACHO-PPC64-NEXT:     Reserved1: 0x2
 MACHO-PPC64-NEXT:     Reserved2: 0x0
+MACHO-PPC64-NEXT:     Reserved3: 0x0
 MACHO-PPC64-NEXT:   }
 MACHO-PPC64-NEXT: ]
 
diff --git a/test/tools/llvm-size/basic.test b/test/tools/llvm-size/basic.test
new file mode 100644
index 000000000000..8b2d66eef3ce
--- /dev/null
+++ b/test/tools/llvm-size/basic.test
@@ -0,0 +1,2 @@
+RUN: llvm-size %t.blah 2>&1 | FileCheck --check-prefix=ENOENT %s
+ENOENT: {{.*}}llvm-size{{(\.EXE|\.exe)?}}: {{.*}}.blah: {{[Nn]}}o such file or directory
diff --git a/test/tools/llvm-split/alias.ll b/test/tools/llvm-split/alias.ll
new file mode 100644
index 000000000000..18e0e7f12d69
--- /dev/null
+++ b/test/tools/llvm-split/alias.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0-DAG: @afoo = alias [2 x i8*], [2 x i8*]* @foo
+; CHECK1-DAG: @afoo = external global [2 x i8*]
+@afoo = alias [2 x i8*], [2 x i8*]* @foo
+
+; CHECK0-DAG: declare void @abar()
+; CHECK1-DAG: @abar = alias void (), void ()* @bar
+@abar = alias void (), void ()* @bar
+
+@foo = global [2 x i8*] [i8* bitcast (void ()* @bar to i8*), i8* bitcast (void ()* @abar to i8*)]
+
+define void @bar() {
+  store [2 x i8*] zeroinitializer, [2 x i8*]* @foo
+  store [2 x i8*] zeroinitializer, [2 x i8*]* @afoo
+  ret void
+}
diff --git a/test/tools/llvm-split/comdat.ll b/test/tools/llvm-split/comdat.ll
new file mode 100644
index 000000000000..45faf4bfe26e
--- /dev/null
+++ b/test/tools/llvm-split/comdat.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+$foo = comdat any
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() comdat {
+  call void @bar()
+  ret void
+}
+
+; CHECK0: define void @bar()
+; CHECK1: declare void @bar()
+define void @bar() comdat($foo) {
+  call void @foo()
+  ret void
+}
diff --git a/test/tools/llvm-split/function.ll b/test/tools/llvm-split/function.ll
new file mode 100644
index 000000000000..37272dbbcee2
--- /dev/null
+++ b/test/tools/llvm-split/function.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() {
+  call void @bar()
+  ret void
+}
+
+; CHECK0: declare void @bar()
+; CHECK1: define void @bar()
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/tools/llvm-split/global.ll b/test/tools/llvm-split/global.ll
new file mode 100644
index 000000000000..6d2425691e10
--- /dev/null
+++ b/test/tools/llvm-split/global.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: @foo = global i8* bitcast
+; CHECK1: @foo = external global i8*
+@foo = global i8* bitcast (i8** @bar to i8*)
+
+; CHECK0: @bar = external global i8*
+; CHECK1: @bar = global i8* bitcast
+@bar = global i8* bitcast (i8** @foo to i8*)
diff --git a/test/tools/llvm-split/internal.ll b/test/tools/llvm-split/internal.ll
new file mode 100644
index 000000000000..ce4272c5f0dd
--- /dev/null
+++ b/test/tools/llvm-split/internal.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: define hidden void @foo()
+; CHECK1: declare hidden void @foo()
+define internal void @foo() {
+  call void @bar()
+  ret void
+}
+
+; CHECK0: declare void @bar()
+; CHECK1: define void @bar()
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/test/tools/llvm-split/unnamed.ll b/test/tools/llvm-split/unnamed.ll
new file mode 100644
index 000000000000..fd24b4ca92bb
--- /dev/null
+++ b/test/tools/llvm-split/unnamed.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-split -o %t %s
+; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
+; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
+
+; CHECK0: declare hidden void @__llvmsplit_unnamed()
+; CHECK1: define hidden void @__llvmsplit_unnamed()
+define internal void @0() {
+  ; CHECK1: call void @foo()
+  call void @foo()
+  ret void
+}
+
+; CHECK0: declare hidden void @__llvmsplit_unnamed.1()
+; CHECK1: define hidden void @__llvmsplit_unnamed.1()
+define internal void @1() {
+  ; CHECK1: call void @foo()
+  ; CHECK1: call void @foo()
+  call void @foo()
+  call void @foo()
+  ret void
+}
+
+; CHECK0: define void @foo()
+; CHECK1: declare void @foo()
+define void @foo() {
+  ; CHECK0: call void @__llvmsplit_unnamed.1()
+  ; CHECK0: call void @__llvmsplit_unnamed()
+  call void @1()
+  call void @0()
+  ret void
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/addr.exe b/test/tools/llvm-symbolizer/Inputs/addr.exe
new file mode 100755
index 0000000000000000000000000000000000000000..38d88b65741e9bc98b5b692fc7fe5f649839a2d5
GIT binary patch
literal 10109
zcmeHNU2I&%6`p&qf3dOGb|7{V@B(W{0N>b&u?&>tI*yawXcJ6u+JXqzYwyP1cD-xu
z-c4-Ek1IpPDuL#ql~5%_UQp8q+CEfOgeq{FinNr6(3eInm0SeW;YW2vN-ZjIzcY7^
zefN4@T8X|?8ENOt`R06Q&di;inLE2r^hfvTnkG2uVuv8Dqs)?!n+q|wPO1v)5!J#H
zH;bD@1(0%lELlU+EwNlwu31(oI|H%|pB~)>dUQ(`SZ;D*!IC2+N$ss9OBG#NR_UrN
zg$PQ3coggW)l!8Vk_DC=ho$0S9;w6=b<!Mi$So*2rvhWi{iD9oXHfYY)GcVFTO`D8
znXgE)r8Mxa6Q^=eHcHYWcv;>qH6d6s?>_JpMOplN!Eqi`?X5K~%_@GDs=H!qDmmQU
zwKWxQPbJg&$@a;f?)L63GnX;9$$kgWJlXC&a7YHi<5&-kVy5XtbAaj_ANyfM-_&~c
z#S<^*_O%Y){>p8SzQgl_ByqW26pIq!{3)vV0%Zg08%x;Sum(<T1WVWp+a4Xyr0tv&
z%R06#Ff1c93f*ENn@l@LP*0GmACD!|VsA9EtIytMZZq#7hq~&YCTPs)Ln*%@Sxt4@
zT~*B056KceV;Ym?6d004n)VbUrz#Jw9-*uX&wN-lKZ0U-=2uncgjl-c{hFXyYWom*
zeXE5M>0@MjX>JLr?QPP@^`#3?J3opP-h4P*cso4v&ceW8<oxVJxh2Bq&rVBm-h!Av
z55vnRPf}aq<K<Iu5PoXb(U&gBMo)UvnOhL(p>TT;B0RHL7cRVeOh0_?3<W@>%ZFd3
zs-?O5)@j1dkjD0B7Y*2l3%?AXeSc?IyAXbJF%R;~WEg(xcOZuguetoU6@U7kEeJ>C
z8xDav|2P6Y{95_7fV7X#QUAj;_smtnz`1@ogms5rUBqeu`=twqFMSJ?`c`o%2Z@dh
zyt)bB&{t@DI%nr!`Fv@~1!&9xyaeD3J+BAPxxN1Q+!0D<ee07z5oRG3x)?3QLv!J0
z+HJHPeWq=I1n~K2!3ixy3y+2t!-a=J7`+dBqFdhWEqoR!d=ftUnGq@ce*QMJ5Ps?-
zXH(}NDgJ2T(`ez7-G#sQF0FejJabM9-}yoI{qXUJLU<IZP#}6d9%_!F6pS8sLYt$<
z9}RWjsq|2I@_D(_0&__hZSkvww$Zqo)*AZG&V7caAcN@o(~tv@ryz$Q<!GVI+l2PW
zL7`0sw2ifuRVOhzA=J@YeiA-9AVpnZPhIoA`kG@^(_-g_yKmhdx{+Y!8$Leal8^4*
zN`}34@~c9)gU@N;-g!>#oxtZ5@B*Z`??m}`D(pII&rIca^=FM|s6$uMl@Yiy0#`=h
z$_QK;f&cRn2;f{sX`#%OQbERNMm~4es)_}}wNtN4qPD`-TU0&2H*HipzfaJ)n$m)L
zzvA~duIFv{Io%>N@%PV{G9+hp3;D{sou%66cbYlfLah*2$y|t?O4caZtmHLHvf#7w
zeEpDw_>3UWAVB!MQK<;)m7G=WGk#j>+z)QATG_LX<A&_d)5;!4BdKf3Zb9kOs$RkW
zd%^FmzLu_4^WkRSykN7veSLQYHy;|#r=5JT%j_{b+qdPV*7<m6huP6(c5ZQTF_Ma<
zM}tQb*<3P{4sJKQ&5qz^CpMbfdays*-+O<*y}i3*>yczSmf9kGe!I-=?Om?lJ6w-I
z1ReC;2lhHjq$SfM&asR*f}(tvI^bf+g?J4gcY0OH(XJKk630Otl3q+GNuS*)X!<V7
zjb2%fAQxr#AS*8`rwY9fnO5PksgO1mUPs#3_L5aCS;?6aCG~1jt0*+K-Ar&@H62tR
zq&f<M>#nD|%IdoyC@e}^gVt8pRBqG-)^Du)^Ajx%T4MzwGBu;_u9~}RZfS`$0S#jA
z8|A{qB25TC1Z77}4e;`MK}dxOSB({ad7{N?X1kp&;3C{Ep|58QAzi0+Hj(YuYaRf<
zx*WBg@F2c|k1ptIt!e+PyBfK?Ey4(EV$vJ|j#RC4Z;TqP+_=g(rd7T1qOQaGznT@~
zaH}&u!Ot0Q&>}{P`?M#~rknWH-4G4>3#`znQUYka;1*Z%)yzOCX5h<YUW?{+8N%zx
zX;x5&WHNJ8<4$Z?m^mlw$}uj%CXt;GW;){}%-&s*c6^7FHk!_x!}%m`U6OHOk~S90
zjR`Y8m4-i8I$5_yeG!Wk8+F-4Dn<lVnn*dqOs12_ox~(^d_j|_%fw?&Oqhu=`$#r6
zp0LN_;Iig2>{vD%n{pXk{>BK*;5{~;96{3=I7DX3R$Pz6xtuUZGUMY3jD;Cb4ChDf
ziTp4a2~@axGJPb&<=F6WHgS|S+%_e+i~-|fDudRReK}JT@b#r6+TUpExQ~(UD=la%
z_m;fBNe~%NGuCfHfmR0Q59$`Qm3zUU)K)Uy9~q&g%gc9kOIA6`VMuB#nV%m<72TI|
z`_JhXG$Eez@v*+h#QPij*$wm)>{-m8)h!^jUrVukKkMhawL;J;NP9H%=X4925OWF`
zlA7B`8ar;E_tUQeqdlDYmSI5?!Xglf&jGHsdi)<o1?|DipH}>7#pm`oezs%zP4H<?
zXPx);rxib_8m4FDjbFjy5fo@Fna}5l(~8gi<n}op#-H@@XI1>OiqGd5;?r5p&wmye
z#l`;lT(_Y3b*S?ue(sDFQ1J3andMd~LvEk#`5ejedp<t@uF#?QZ;^9PVje?3K*8HS
zpF?X^hpO37Nq_vO6rabxxx$i`&5F<CB|&6<{%K&;KJ$Z$A5{F?nW&_n|8pOIK=B6@
z|7SkFKb~3C(=$?!%k#U1OZ40O^*3NfckayRbNPZg*!%7M`bBW5|IDYGP)dvcA^%UX
zrLHo6<@Xcvf^|SWe|dU>&h7I&cn_GjeZB{uqfQ~Qy{oP0jDLU%FQ4B7<`sW46P08h
z%ga7~ATT5?11dm{Q_)w&^C>FG7x$m8uPFr;k=v0VGXMNp0!Hz7uz`}yf6Rxg;glSz
zqN6ZlUeo7Q`=%E}*>Bst2LHE;PX`-s^7BJ$@W0>SvQBu(DEs-_*5Kcu4t>@a@OLP_
zfBa&Ke_#zh#}Qeh{huiQkJsQcJh%q`FrBTD=wR(l+%97@OgwiOBY8~m<355<mE*~4
zmzO_8uN_Di)jHIMGW{II^O5%-T3<`G-$MkXexGxH`>s{#+(2|ay0xm(z-l{K5;ufX
z5--Df#uENy;N&x_TYeb*slfT86kdsQMJc>WOqax~1)m3mCmFb<;C<c$qum-YThdRh
z;C;E+9*b<5<%ML~z>dKCsJGI<O3(XaF&+@S?-k<>%fBlZ<Bfv%sbaiodH*TK*Dc@o
z6yw)mpBVB&vTO*v7}b@+*DwFh=_MJWS@61DjJF70kBjjQSYJ!|=i4f;HO+7TDc-F&
z#C64nN;8D|^7v{E(dt87)eylo@Eg{^Z(N0IKy*C!hkWya)`v3jH`*qO67@(EtH*x}
zaPl+l^Rq+YKlS1FDSW|)k4pb5<4o~UI*N~mTQxpsND$fc_)xoi56SqCP|(EcdG%}H
zrTVp2f9af~(J%Uy7D_AW4=9!zhxepO)2cZCA?-`dPuaoMaW(<hO3cqKz#GwD`gNQV
zFOzoy4@#ssJ0$Vd^LYq(0MFMypGSc=miWGpOT5@OY2RK0KPCOIZuceN!PRcWFMv1b
zEuz`?{rU~zh?LhEKF3@HPUA4G#-X5m&I6}@ZT9*3NZI?>x68`jKMpjo<lj>M`3|TY
zc&YeXfK&W_KW%H+@09*atj{B{RLb+$4>6K;a!ww<?2q6tAN%?a+R@1UgZM*8JdsU|
zCUdybv7K>yB$Y|yYN7;W+wqJ&n#v5vQg+<QWOH^bKPhleGLcF+iMZK)=k{&zT*Anv
z8yolj#m1dVc1pN6Hu3!U_!KA}&8CZ<B2&6?v+X?xd-wO-{Reg<U^jX$7ZKvZ-u=LV
z-u;n2Sb1vXpI?N%H+t``-l%==o;~;X588vhyQ2N%PX7O8B$t<Q3Oh2m-(DG%!To(S
zB!jWxR6@`Nk^4^>VJGBmk;`%aN@H2xf6lD>UyZ-o>&)|@WDn!Hj6D`h$I+%8xfe~w
zlW9AjOQ3@*;Zg??M;f5&ppCmLRhGBU_P(R`LG^U|Qn6ePkJh%)B_2xZD?jU^p32b^
F{{q#g%t8PF

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-symbolizer/Inputs/addr.inp b/test/tools/llvm-symbolizer/Inputs/addr.inp
new file mode 100644
index 000000000000..4de096479dae
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/addr.inp
@@ -0,0 +1 @@
+0x40054d
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp
new file mode 100644
index 000000000000..3a832a9fcff2
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.cpp
@@ -0,0 +1,19 @@
+// To generate the corresponding EXE, run:
+// clang-cl -MD -O2 -gdwarf -c coff-dwarf.cpp && lld-link -debug coff-dwarf.obj
+
+extern "C" int puts(const char *str);
+
+void __declspec(noinline) foo() {
+  puts("foo1");
+  puts("foo2");
+}
+
+// LLVM should inline this into main.
+static void bar() {
+  foo();
+}
+
+int main() {
+  bar();
+  return 0;
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe b/test/tools/llvm-symbolizer/Inputs/coff-dwarf.exe
new file mode 100644
index 0000000000000000000000000000000000000000..018053b979b9179a38870212ac33289972d96159
GIT binary patch
literal 18944
zcmeHP3v^t?d7hQDvi#74V1anpSwJQZ1ff?Ff$iwZvg|7=o5hwv2+`}+>dI`g+THHn
zwPnLGHnQ7_MLj+ZaBRv+wmE4a9MYOopN2;uD@?H*3@DVOPH~HPG!Zzo4&i_kAlvW1
zGjnI}O15(jr>Ed*j^_UJfAh^hGyk2rul?=S-E7*VO^dog;`rMb>-+O^UhyTyydK7m
zvy0i>0zh++7xJn%*FyU3vIYuc70BQe+mMp%Jpe@<EMyyYa3XZ#+es80J*SH5V~j23
zM1Is5JCds(XAID3lOOhB%1MoX#@Go*n_8%e_XVr~V-r+`wlp_64}vIZ_^atnB&d8F
z9ei|{nzl2Rso~~0r4}5rC!HmlXvB+gqCH0dHm^hwgaEA7SmvMweI?cTk?Gt}&PhJT
zHq;Dqdy|B3K&F1@Bj+TI?1eaQKAkIa6pig1<ea2|cV#07wvgD4O!h8B&Pf_$%df0!
zvXPy1+>J~;v33Zeq!F)yoWWQR9n#3e^C9OXjd+dqHgY)~_aPJSVp3Gni1%gQ@A(90
zkcn4~oRc)-eU(qd?tL7U#Je<ygrY{gCT{Oms+vG1-eTmOq=8oxH^b4O-G(*{=UtXV
zLXmiG<bO}AxQ_9Cnb(Ep=lFOhm$mEMy#8pkj^V()?cOtaraqBcQS0va6B(On?0CR~
z(jT%s5m^77SFhNMP4oQCzL)x6&So9rOySJ*fS<98km*`q3yJ;CN>C<Qb{%eIDC63|
z63A63Taa`w_n}PkePkWw6OgzzFh9y9S0Q(yoQ8Y{c>~J6PR0%+--R-+Cv4$*#>NPT
z97j$Q4*4tOJt$8?`mP5?9LO%@qbQTygsh>w9Wsr)6J?T5AZJjXNq?LN^r2rb)<;Ue
zoXKV<_u(Fnat+G*&@+qmx4sSFusacd@*u7kbs6FuApOzRc!=!3%De9$vRSLB;n1-r
zJ?MQE3XI)PqHuIa6Fn#<-*}9k3;VCqtCB|=4h=m@cl)wUv#<Kg6DXA5Gdf&qy_n5D
zMi-@Q<?}uaXXsEiTe<Eclv1T15isoC3kt*?5Gn6|9EUx6s`4NzQ{G*q7EG3Tv3f%2
zYG~Q{ui_$>EX7d%y^yTl4&KnR5}?`SY}@)hKpOaL4j;yRn!xpvfqg|}@@kk|kt)5C
zEPH?R0?o?%HblpW_g`(Ta?BQ`Pm&m3QSHlC7Qp1NHv?c-X3Lz%=$UIW4wG<M(}Ba<
z4d}&Z>ZzhdwW!fkdPzgJA#+?Wf28y%+E``!lF{U>ACnu76&(A|l=ms1Lnl1RIV1YQ
z$uGjtvF4Qb`?mSy<p;fggA$STk;=ZOXn>{5ASSnixGQ7Lf0VA`Zym?sptnR&Q{Jx&
zn+wpYn78`xC()`Ktqyv1)WUyk7rX@L{Drp_fIhVB!{^~n>tb~AE=rR(qg!<GxweK(
z%ZR_A;gFv4{sI-*ah$J5QGot29^>@kl8%)30UM~?1Cc8ID;y_(@G*JyRuqSp4dTd>
zC8@D&<?_$dNiu9;UbgZLSV(!-Lp^rIuy?i1zQ(ScP3)tnK0LInR?_#|^h=?_-#)`$
zqmvC7DXrMIw&8YXpXbGT%Cn;>m>8HyUN|&ZRg^p@rTa#9O#D9l%b^nmo62|mj%j;6
z)-16$CIeUwv_5FRz){)B4FKfAX~-O~H%dpaWocW(p^nr`9fhMU<v-Nsx0U~}t5_e(
zRJ9jo+IGESElm06bu4))J_o10@FyKhei|=p$&P}sxNXUE@z<pxvF-HXp6U{Nz38c5
z=p)NI9wwi@IN3<&CW8}k`Kx;BUkJGaL(&12r?l$P$<Kc<J~ZJuR)F!Pda7%=BVbcc
zjXuV=Yl&5ezVx8JR4*KB$n3bv-l6jz=R5Ses`9&OO4Pq^y`K6tEm@q*qpy%x%_^op
zAs-d=B?ppGePnf2w({u(w4B?j^x?LOY~{nyQW?FD%6q70WVKJ~X{u}U>7x@qz!iD}
z$_;Ihqxxf*2om_0Kxq&UB<MviUonb}e<KY6lhXAM#-XmYNA!&E<^$w9ZJj;Pp=G_#
z(Q5k2<FxliuEE&u#aoA-8q-rpus>-u!`^0aAaoMDUP*dKTC0xXR9(W)s_!PuU)Y21
zQ>7K)Wh<W|JJ{w;`p}7Na;~0wE?apAuw&c!KJUi1*$wvVKOy#Rhn*^&u#s&7(d?Fk
zq@~6>Mpjg7`bcMW)8sX1k3s6Gj_RgtWtYwN-h-xuXxYk@9Qh^?-r?_PWv8&ppTUS}
zeNL`I&-w>PF35avBs)fn{(yZR#X1^U@Z#gBA6YgAajZ;F71FXPz?L04P@*q+HU3V{
zlF3<rBMTnj?b3flJ8eB*FmGd%>8YQlN+x@8B6iatZ{|CNFYsJm!=W8bxcFqs?;cHI
zL)c8Gp3k)ub9EWXE?Rg&?oBGYs2{I#V)H;`7u}nu=hx@!&*t&B<>}X-MqhZE`mg2H
zKXsmayx+>xzn7<%<+ab(|2~f&$<zOkr@x=4|5u*=%RK!*^7Nm(bmjS((r3W~o+1w`
zTF%(C5?gk5aRDo?FNkfw0UYe$t$6-_1kd{P3{KL+>;2QVhuzt8irAdSVm7C~*s<{g
zybnwu)3ZBC$3|%pD{Y+3O6!XqduMU`wX**k@J{d~GCk{4a@q=>C#S#MGH1^m^F6RM
zrjS_{WAvT#qa5t_3C7+7HeLEsoTHkMYlQ3<@=hW52>GCp`-OZ~$bS=ZQpop(WayjH
zJRvU@@-iVCh17&xBjoi$ZV>Y8LdJ#UR<V|aey5Q43As<mXN8;)Qnvg5I`0Ngvt`By
zLXm)FYTAu0U0vO*%VTr}W7f)07*(vtV;HSQB4!3d{h^?7OCZu02F&&X55}!^rnNr8
z!M?V^pc%75(THfuz70-aI2?_bgCUDO0Bxgb4Fu3zd}D$=2t5=DS!QGtdjaKyX&Hf7
z%&@k^Onf2WNHT%p1*}`lIQo3oV*~~g8x7kWn_XZ81F#;Xaj-eiIDQ^V(Sd<zAFP~D
z<w$>MqjgK%4D^8<2%D^{fSVXBF#0*~wE}x6<O}w?DjO!dsn9^HZ?J+QL(Jn~k&%qp
zSXD9a@ulKe(!z?W@~kj}=+_?&MaXxWr!A7JUskZnv|5w#xEZnh@o3OYB-;B}O%B>=
z4-+8!kkEBo5|%m885%HK5^$<XCDvSYUDAwi@tg7fD0~wMnyt~K-QS<ItzFaB(NJFl
zr?b`Tu5VpiS6^!@a-Gk}Gv5QAvpkE+&h{3TRh)Z4*=x6-ca~@Ux%e@nhMnd4T=B1N
zKToTQlogg;J$K36=JQa^SYZv52z1|E{K{1WBZADS?Pm+JcLA$An<~5J-Uw!KH7hLp
z+T6J)&ZJL=2exAv_>6&6LXUSSSCT-g=E6A^;AcaPI2R#%ZlKq>2$hNp*FyX$73X=1
z@D14}RgiuI-N-!^x8dk*#X(liCipB?yqPL^?<mcr{{s(nq0a)g2uF|S!?IEwJ#>zd
zJuDdQ@4upNb0FSd6O4&x!r9I;IM-V^=j`8>papKO^2`>NdjOZ;Q!tD0ne?gmz!xyW
z0(+D@aikxh@nbHd&DtA?vuh#g5e#A`osk|m2eVS=pRUw+8a*RtIy0$hJs@xDLdpxd
zjQg0j%?x<Pcpw-KL^k?1nehZ(#eEGmO*OT?{2Tv@t9ughU{5%_X`sh46IPEEjfNAp
zenn!-KyNf0+G@so+9URhZ8L5h^6LQILz;0zkviR5?EA^gMds78g7Ba|A7d#=`_hZk
zwdn`bP<HbW#>><G^Z@C|^C0TetI}+o*Rga-x*}agwKM56>;V}UN>lJ67cv=nN(dq*
z2oW)>gwV4B5D`sE2yaEDARUmr%m<$2)Y=HOl24Kxu{k1YRs)A#7pOnRqR1LDA+RGj
zZ35w(JE2L0KfnAP5K3jpoU_l?8Kch)9C<<@^rJ9G4he)_NjdT}fs{JP8v;>wek%~#
z74U|LnI#YrGdT$LBM=d%OB{ORMJs`j4USM<p?GVe&|Gj@*LGHSwckh{<Nk@!M^I=A
zfx^mBxfqf>!M))FLL81vJ%gxXO`<R2@Us&)Eqg2+A<lAeXcio?&uR5QL<6GE`)Y7F
z;-fLs+5o2lsN!@%;|OsmnLvDy5I|x<J~4#=W>Hj9?~)U<r>_v%mjwdviz9=q3vSOv
z1|n}_3TixC(1N#;<4FPG9G~#veL%P!;#7C6sjtIW9uanm327iiTDFmT@_In@&XIZ`
z(OjevKu#;Ueky?S)k5|Eh~5<<S4$OY)M|Ma9EC_bgeW^l1cxKiPPNO<JK!ip+VQ#U
zcrb5;NINuJHJ%H=QHZpox$HE8qY!ClsmsoKa1<i#=q@`+a1<i#5TeF&m*8*&KO_WE
zZQsjKfpZFE6Cvbhj^Ou&7$M|CheO-Ko;ydV{yQj=$2cO_;R&ejRac9}u2$W{sOKEH
z#*P8u973?7zCzYzD|`K1MToR)Blh}H9O)lFc<Q{*L(dA4{^<lq`DYzC3XyiYTz2+?
zqY!ClgUim-;3!0TLkG&|PCwU>uQ@_9l^nmrkw`iZ8D!frDYsXTI~)vyy$y{c(yJc>
z;g%&b50mDWCGtfe+_Ko+O=N3AShoHARYHbE@9JFa%|+_y3LwW^?A!}B09EH=HTqJB
z^kEnrbuQv4Dny=(oi01~f};>=XV7KmX>b%Gz3Kz1*5NzQI6~gQs<US*IcgSnxq2^x
z0forkX*@8;;5ej9z){H5X9?o8&_Ix=g<1)RVT6y+5WnGZghnW5G461v{aVmDVsrSt
zvUM?Pa5;l@d9PFmwF;n>28oSlXIz_s&`#lq?8^rff3U-mL6)Af4#yo1eiEghEg45>
zJn|PnZif&`x1t5N<AY^*-rmd7j#^_+3d=TPUlYEcyXx%owLcfBtA%wrX0>{y$J9C9
z1D;x$!{|pL@|^AlN1f9>;3!0{%(TnS_h?=YBJJD{MA`W#a1<i#WSmwguzv<eA=1u+
zs8M!a0Y@Ry&KNjqUdO>vh_ticWoH)X3aN=(!Rq#w7V4d^9~9UEa3n&{qQvR$Cd-QR
z1#l!HIGrsm<1S7eI1-VZ?(QQl&Ju7WqSpK*W`aLw9S%f%;Bf>nYlFr)a*eGA!a0Os
zjny@PC+*m4!$zbXwKiykXpFun^`WQ`>B9_a)Va6~9EHepaTFY7=bI=hMA|v*vhxr)
z3X#_h*$*F21{{gNKjN*V3wCI46*BotzC;wK$KjyBeg-=dQJf79$B3Al>6d<q@SXb_
zTHPWz9613b<{)y+B|vDre9UFD_!BH?s;@!O3S$;A1rh;=U&mUiz~O7mFa|?LfIpKE
z;&8f~+?=g=rWc6hw6*|Hc7kL_A`Yj!!p(sliGU;iB_bheUYBBC5>Xr)v*I*?BN1@K
zdz|0t3k9|w9EotRZUpisrU`+W@>mJ1G5DI+Jm<twxzyp1ot;3&U7X!OZgX+|7RZE)
z^DvMe7bgScxQp`wkg$vM1`rya8qd4XjyfEECxCxkoR6V>*Wrk;6y|erW@GW(r8pu+
zrPjqc2b}vI4$Zw5i00y~077p!0ENeH0^)aZ!a#oH;%o!5!Ns`~$bJ{+J|Hm{=es~w
zxHvxsvfagb8AzXt^CpmoUC3{M-0eabUdkI?NEMLXF3Xnz>2Ptb1Cn-e;y|8pI6OiJ
z|0vYO*#^$biX&onGA_<t;Oulb<oRy_8Fz6W0@CT?{0PW|i!%n~u#59^An&=5w}BjY
zwK@UhBNt~LeI$U_)H*yLND;OO8jADY2&BTr(Sgi!aRNYmE{+9czKiqMKxmz*z8(T{
zp^GyHL~}S3&}#b3-sDCj(Ayg~H%ToN>5s~CI218uIhO2=1P07Rj!w~CjvmLCYNaJC
z_owCNW}{UzY8zKyvtsqNtNbf9d~2?2u5M}3n5??Cb@}p+l`B?JRXwi~10-Xjh*&LI
zJo&KQLpj~R?MWiI&Fl{(!w6~P0crrYL<}pOu!G6$=(OGdV#jU?B#dCtipKH1EE3pg
z_94uT#p0$Bw28ck$T$0YiM&~~h}%YMOA9j$J8TZOk#%_KvXU|T!&{Czg;3p(@V)9<
zR-KG&4n_K^b9$?`zf!r7T#_6|F3rINaY}bgu&1dwSrcc3d^l5DI`zT;eQL~^vFI=l
zO^|!+)!2-%#lgXo#U|3P7uWdPTGktFU9D~Y&h~5982)S9*L1e6U8iYUH$0BkrwVSA
zj{SuFh-rLMD~^*-H)`qZ)9{Sz17Z5_0L=CQtk>m%gvpGRSV#W0b!L!4Jekp+SQCwG
zj)$y3Z`f>)h_FVRf6B`I^Lk1{a#@|KGkGRXWytAN%>NYjB$WbEgGtP9i`dQ4=r=;9
zv!PB_Egd{<<7uuK0gH!~+I!y+Z<xL2Mnq^x&5ZQPS5FKslC!JDE2{94)Z}6i@v72?
zQgv5Li(=fIBtc0vT%(i*-Kb&UP09!b0vNYl+PYPgEC;g)Tie-cwBwU^2uo}$))ncU
zG6-4uH;*Pe&4=X>^Q?&JxKYF(hoilA%rL@$ajmeD31;*~H;2M~!9W~dHV_CM+GHB`
zbtD-hX<>-@upRyDpoq4m%aDNx;{@#twQz!b%}hb)wI&ue2M`C$@bD7o6H&?r!kuXp
zp@EoTKPK~HZalWDY!h>0qH~M9&J}U(j=vVR-Er46)SNJE&at0<1wFUSA7(|xG;!9`
z#a`$1nj70~_v;483HH{bsBA=HyUKiav}x>^ZKqB+Z`zpklYn+VwgHMn7vtmaaL&|t
zI5}6RM$At|M8nfX$e%WZUf2=q%^s<Ej+H}G5%9Uu@UnjueV-eNFKWdFfotx_b+^VD
s?pSuW#u?K^w@-)Vvz@4SMRp*EJ8dld$ztSrtCPjapA4tx|IdE<ALpP6R{#J2

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp b/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp
new file mode 100644
index 000000000000..23b44b329022
--- /dev/null
+++ b/test/tools/llvm-symbolizer/Inputs/coff-exports.cpp
@@ -0,0 +1,20 @@
+// To generate the corresponding EXE, run:
+// clang-cl -MD -c coff-exports.cpp && lld-link /MANIFEST:NO coff-exports.obj
+
+#define EXPORT __declspec(dllexport)
+
+extern "C" int puts(const char *str);
+
+EXPORT void __declspec(noinline) foo() {
+  puts("foo1");
+  puts("foo2");
+}
+
+void bar() {
+  foo();
+}
+
+EXPORT int main() {
+  bar();
+  return 0;
+}
diff --git a/test/tools/llvm-symbolizer/Inputs/coff-exports.exe b/test/tools/llvm-symbolizer/Inputs/coff-exports.exe
new file mode 100644
index 0000000000000000000000000000000000000000..939205e3f82eef981eeb865f95d8410d18514b1b
GIT binary patch
literal 8192
zcmeHMe{56N6~1=-f|KGTt1xrB0v-)j1|<qHtrCTdmlBuT&OwF%%_!7yoM&*C*q;5I
zg=#~|T-v4&Z){>ynW`?T)GmKaqLa22*w)$$B1!pC%BGDLtz;A?w_#N@O@nqFZ{K<E
z*>zl4_eU!sk*l2dopbJY&%O8Dd!MhJwk_<MMZBhWrjs~cH)GvTl+CD^W{cS^X1K~B
zx9U|NZ2)>TT{IC?LBJ{1TpuIb2Zu#>maxu2od}!wS}~NoZBz{~)<yME##U)O;~_G3
zq0rvT*t|mfI=UC}5h`C+ev`2)dFzu(ZLHfb`x%?0Dzv4!<>o;yDkA=ByW(-uzfBJ(
zJw90m7)#gc;dDwJI213JETf4?yr@pp<_L$zvl;{g;1+9Tsb7X2q&hEz)`p~@I2r4#
z?bqW?lfM%}{T3xmq<A5nH;>jT3Zb#hhZGc%c#_WRBL4t{;@tr$C?fG<QcomcbhQ_i
z#9LTEf+{k0-%QV}^;u=0W<Ar-Wl_KmukN4Ts3UKm$qPmzD;OU2MmyRHpGT>xo7wN@
z(Z|gg(}YQ1RrNyiuYdmB^8L#gJF(4qI`~Iby%}?IQ_b1S*jFI*uCD{GhFGDCK%9M6
z1)cD&^^7^7<2}I?h!grAAbqaaKqstd$43kFDj?nytOGis57G%e4YX}!YzK4^cnPu(
zI^HeptD6`*MSkEkWQ6>{Ihz?9hHeFlkTb*q_Cqc}C;TylLmvjFA$y<`o`sA<pDowV
z08d`3;VIiaX;sah!e==2TIfTq)z#TFY$#5iJW?}SxoCi*5I;MH_r+e#i|3`9F_5V)
zdk+2;X|00%2itJD3^oeS98gu+;+mLh-~nL_20D41pd>TcKo`iwJBR4L7;NN@#2ME_
z^5El$R?%11_!M2aDu0w2vTc1`RS(h2O11yWNyC^>ReRHyp((Zt<QNhLK><7pP=sgk
z95QjmegKt<&`YK{J72&kL+C2mY{p<&!iJ$-ETO2|!Amwzzl|v*%CzX;fybrK=J;{U
zrvY3W1vrR1k0LBa$X3PnMT#uEd6H%&oCbJO!mFBPe@O$|3j~x$)&AZhWK<Y|V|RLI
z`60TCWgkOGq^x1|xcCryaY|gVEU7~cSGd)sy3*6U@)x!vXd_qfrc7eatCWT*^VCH}
zcoE*@6;q--#TRB*AkfrmMfiahKCx^}NJ1krPuV@M&;V^q0NJNN+?|%^9i+GWyVH1#
z3DYNZsv>MKVlF_d_kX9g`ZFNgg;rxi9cqz3wi{VOa-I~H!NHTw@1Q-C7o&@}pk*a=
zi!N?;yV5Nwuh})h6(NHP^%T}?7z*r*xZd$0t49&OuQ}`k0L9jaXZD5vP*wv_lg*uY
zvV>I`SM5uYzhv`T%`sQCA43F1Xofkpd`P%QW7lhyWyC&$>f^~~M~?oiMz4U0d^>%k
zv`4c|7%A=8cX#1qP+R93u9yZJ0`cC-#2v|ghb1vz;m*|H<cB?fOI|VeRStf@#Qi3D
zj<GkgA#4ZQA9Rk8C^fqSj@-s^rALn!7EhpMmD@GpQBHeGGA)%ai}T!-FL#vkWZLmS
zN!q>pth`Y1&h<2%j+JBiOJ4Idy%DQuQ8OUaxSNi}&KCvbLtCq@+P>k+n>^Kg_s=P}
zuV+`%8l|xy%g%A-59IkShQ{HlRK)5`cG2Zi$w||c8RJy8R@dnX!9cl^Iiw$3t6YM<
zc!00sCF8F2V54?Y=N{Hi>Uu}zKAIZ!@7%<dOSE;dsJ}f+*_9lO|0Z{p;7y~62v4;+
zRQvM_Xj{7-e8^p;+V{Xx(tHK!{Zx}`bLQ*;s&hMeX3`0F6?Z{*xq%se2opg8|4tY#
z<HCYocx7t_FMviG0w%@z<x?>0wGkb0ZX2c4iS61zlg;-Xqg{3HVLF4U2Qjujd@JF~
zI9JZ7_Vc6<35&o1)DY}Gn-EfK98*}TJN4CdK8*Pr@sNAPHvJlgWq(d<+gjbglULNl
zEnGRK+PmPJ+NGcOEjT`V!T!C*j^^2lEv<PTF+8HyvV$yT+>>gp7I|u8bwl<+w8tR1
z;;C*>?F|}RNT4ZsMAd$m?uo;rclZJAa0R>l2u7^!`L!4)zkKFP>C0!-aoYT&+BzEh
zD7E0gVbrIZM*ve5Tq&V#W5&@<j#_!sx!B(ewoJkPOD%X(Z#VFNv=cY!8|FP6Hm<y(
zShHPN2>MXLLKPgCLY`}4umLZjbmhK`f&-y3^Y!{%ZJ6tfAnKB<x8r!`i@N0gBD=51
zF0L=OUo7H3Qe@xzDfZRdW{#(rzp$wNb4C1JMfMX#_WPe=f3K*1U6K8Ek^QG4`(%-Q
zw8(y?$o};VJHHUK$_<eLlf}d=_c8WA5l6GE)XYlj&C%h9z{N@ZA@2SYxbxHfo6w}!
zd#@L#tlYxNSC+E!`qF%i6Zk&BaQCNsJ7GSC&BAOe%b2acG#~E^di=Uv|KGxQgTFxN
zo=+v;*5H{6`YY6R_Utp4fmP9*hVNQ3WAr=cM+LqQ<Ba_izH61cW4c!~aGilc1D`hV
z83RWQ{H1|M4SdVMtbzYBkfCoXa}8W<;8Fuu8YmjL&cMwEb{hDofiVO1sEj>k*iRey
z69Zo|@Th^42Ikt`tn+r6R?AYXHx%~Ek|=(?rK4jD>oEB`0#W(?P!FouR+G=S#utxD
zflx3M@a^!2yL;ef`{577<aSBk6xN;H?*4!jl|zxR(UkoZobH~UNLcC*$?Rvawo7ua
zAHBu4$Jq$%P&gz@;XZZ}dR&ry{%F)E?~F?L<v{l&;uxM^-XX=%=RZt7e{X!dPYcJ&
z%)Wph(E~INRt`(|-v%wx+Z*Xdl-o%U2SeNC9Wlw@4YI#SVjX5ZM8DY=)Omk3YeS)2
zu=5}5K8ZbC;zO%%Ftf#H%wx>rON2F_qg3zlbZImpV@Ekmtx^E}dLyAQ<xVuY!-e)z
z^IAz>lZeHnu<VUR0#ZEwKsT!`@NU$G2`BqEdE0l!WvO>#s8?!<BdHQ;Y_(;5LW=G5
zO0i%Bxd{iPHIanY-`Ct5*10{d`dTENwY6_vvtdPjoo3|r`7GXC8K@c1=ZoJ%7`|k%
zM%Zph8zc}32A50y(MU{=*Gm03EmXYtF%GhRfU#{56Pm5YKLc_v2Rgk(n=LnP2CDS`
Yxmkyu!!^To!z05t?oek(_St9PzvrjRNdN!<

literal 0
HcmV?d00001

diff --git a/test/tools/llvm-symbolizer/coff-dwarf.test b/test/tools/llvm-symbolizer/coff-dwarf.test
new file mode 100644
index 000000000000..043c175a8d3e
--- /dev/null
+++ b/test/tools/llvm-symbolizer/coff-dwarf.test
@@ -0,0 +1,16 @@
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN:     | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-dwarf.exe" \
+RUN:     | FileCheck %s
+
+This test relies on UnDecorateSymbolName, which is Windows-only.
+REQUIRES: system-windows
+
+ADDR: 0x5009
+ADDR: 0x5038
+
+CHECK: foo(void)
+CHECK: coff-dwarf.cpp:7
+CHECK: bar(void)
+CHECK: coff-dwarf.cpp:13
+CHECK: main
+CHECK: coff-dwarf.cpp:17
diff --git a/test/tools/llvm-symbolizer/coff-exports.test b/test/tools/llvm-symbolizer/coff-exports.test
new file mode 100644
index 000000000000..cad1935a03f9
--- /dev/null
+++ b/test/tools/llvm-symbolizer/coff-exports.test
@@ -0,0 +1,20 @@
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: 	 | llvm-symbolizer --inlining --relative-address -obj="%p/Inputs/coff-exports.exe" \
+RUN:	 | FileCheck %s
+
+This test relies on UnDecorateSymbolName, which is Windows-only.
+REQUIRES: system-windows
+
+ADDR: 0x500A
+ADDR: 0x5038
+ADDR: 0x504B
+
+We get the expected stack trace, except 'foo' appears for the 'bar' frame
+because 'bar' isn't in the export table.
+
+CHECK: foo(void)
+CHECK: ??:0:0
+CHECK: foo(void)
+CHECK: ??:0:0
+CHECK: main
+CHECK: ??:0:0
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
index f1f98af41907..e317ed33589e 100644
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
+++ b/test/tools/llvm-symbolizer/pdb/Inputs/test.cpp
@@ -16,3 +16,10 @@ int main() {
   NS::Foo f;
   f.bar();
 }
+
+extern "C" {
+void __cdecl foo_cdecl() {}
+void __stdcall foo_stdcall() {}
+void __fastcall foo_fastcall() {}
+void __vectorcall foo_vectorcall() {}
+}
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe
index 80fb34bb6dcbae42f3be8481c573d9a2f0fc12d0..a4f148e67c2fc4d7019438a9a80a4b5294ca998f 100644
GIT binary patch
delta 28620
zcmch=4O~>!)<1sE%%CHVf(nX=iV2E}cIN&4EkUu+#vs6a2@`BY6vUaLQU@9>XvDCw
zu$%W@2`ek>Qd&@vVS0TjGAc7lGgPW$Qqhg8ROI}>>&$?3?|q)<|9d{a&yVgt-?i3$
zTYK-d_g;ISGv2c5@UpXmt4%Dw-8|dTX+1n+@3NOluZMl57r|#viz<B)_>3&Q^c4Y1
zN}m&Oe(49mPp77rJ}=;U0dHHi!bxRppA@sbeNeiDdBpqf?M&#eoe{A^SsB9=unhB;
zpwKiD7>Q5@8Dk65B8C|Rj{f?P;rt4AR#?hMER*D~6~)OSMNBjD7MHV(Zu_Khqa#~b
z%6|P11#;~6NwFhmx$;Y0kpH?0RZvw0{_ofccaF>fNwwYqTuZfI|Fc1rS$WG{%OOg+
z5s!AnlcvZ2EnV}hlKjG=6^x(_1Fe`Sr2B&1|EOpk>#rBZ`#DLp?b|>gP&Gfm--<Y8
zv+wJrhg<soZufN#;96S#Bkxa?OpL6Y-%B|QqMYq5c7~tkzen=eh%!4<XN$Or;eU>1
zn7Z~ZyMJt$Hm0Ww;+UW8j3c1#0v%9y;>p+eone{mRR7yyw~BLqvHK$i<;$KJ!!Rw*
zIU{Whe_?;wpx4>4?RMsHAW?>r1BsD%7D%1U2gvR4-ztsq{5qat7I`-%Gp-?ldy@If
zK~^QOD*kMcH3_VV{~*ZP1lGnM46-Q#o5KGs$fgNw8o!&^@=Z&c%k6?>3BQdb1;hAn
z$5Ea6y^L>x{~PHD&(}yWcp<-nqK~EaUPJHl+34B=8DCFHA)|rvZ7SRna81tU--(CN
zXzsXxd{N~Ao>1nsGVa0I{Je<+3YmahS^Q8cmwy-o{;q`axuwwHVjM(=iI(x7%@i06
zR4s$P^7aH=Q?n4Y<%)Z_gHIccio1lEMFot5&-T|1nc<<%R8wkS56=iG79sguXOozw
z1HTU?`nncD=2B(_6Y@P6P)M3xF$K^m5t=Pp6o{q)@h*@ADhFL2lEs@y6XlicK(rL{
zJAFr&9mZ%S(P%9jfg;_~V{@oK{qw^o4@a+gpKU~i3>uW`-xMD2Nr*-M%8k9gR9WSm
z!Y$2iX)SdU=)gUNym4UvLK}vUMxlq5fw{p>DW<+Yj2^jLAoxclNOA*cDu~CNfUO^P
zW=6rZp1`cAqxLVN?z)&2Y`BNz??>`b%V+kr{3sF&EgwfM{~5#k$CfLbZOabxs0`&V
zJKR4A===r9=^aM(1QuP|HI`v8NDDKRXR`E5rS8%EYtzv7(aS11zBdWg9=+{SB?|Gm
zW$5>d{3<jNnhy&^C!sSziIrz&E1MmTf<VA6tsLaY2t+3%rZTk`ks|`p5-<+lhC&x9
zn=jkN%?`fX|HII+V}jYo_N53%ihzbs@lTM&-zDzX;buXHOIru1&;b4aqY8geg8O$R
zwxTRj;wA@wlYh4??$&=%;daDBg-6jP-YLwY%T3gWi)lHb9;!<1bvSb$#8hVj(eXl7
z*BECh1Oaz%AUdJU?jJfVo{YF9D?6Z8VZ8}N$AI;zd$F$gdeDC{UJK(IhB_iq$0F}h
ziTj6OY(^IUdklJ6O^cH9OyzjWLY6*8KS9Xw6=H=9KL*oe@gG1EP<JC;<#@`)H<7?2
zEM@#FpuHypmE$Sidqh?_o>n;HHCKdwJZa{_q^_bOvjWjk(6Nq$wcWqckh+FB_;nPA
z+z{0E^h*~9r6j05^{t)hia5kASnM7rFbC@~cZ?B7DZ5fgs74yUZuo3Z`}=l=agB8n
z^50BF(WoUH_7aFD(IQ?vonf2~ENM<19=sk46WO+Vw)4sqOpFg<4#y-3DoVgnf(tMQ
zuQ?eyBb{rUlPJC_RIsdoDG<>hkyP2d^sxWe;j_k!oziF4xmYHAM=uZe4P$<?`>hc%
zIp0ANG<kV{oCF)gmJ_avl~T-~rH8YqK<X)9R~Ek)`d|PO==tCbdhQjTuMQ{rc@D}Q
ziCK1OkN<BG%RH~qM8%53Yp|q&h;uq<>A8O>>Xh;|6ichN*UV#t*oe(1(6(b?k*J=Q
z=SNZ_pO%64Zj><*TXsPbG%cT+Oe=8O*H*do)Rr?YS3u37xe)YJF72q?*j~ButoKJr
z<wpP3$Z!Rc>Ib{NGyYaGH=sqDuD;e(4i23DV}X^aZB&&1c;uv*JnHaF>hQOO^~G68
z0%y6O9WmY`s0)42h{?1HV>tPXzXbx`jggE?3h6MC9_IfXj^z*&2&00TBIVyjQLxCR
zhil`|uET+76|_^Hs3UGmbHw9^e1{Hu6NzIE7vwpMsk5R`M!`^CJ_;pbb}a2Ju;~io
z@66_}Qui(L9j}~!cFPI(xPog{sa+0ddnfXg3&SAu9uHhv43DEO9wk<qlOp_|j?hF9
zbNOgh>Q+Y}It4Q~cBFYy68djre1R>S*9|N+V#^M+(l-n_OGm0iNc{517%>X)pBNd(
z2K?WQOpK)BSd@Je<CDBbQR7D4CL}2yrHMxKDpNaXNl%B_r=dm@3VCakCiZ9edFYD3
z9O>oQh=X$^GF!zQN#%svvf=)rQRAcD8cba<l#jzEyXbIM_CYGy6g7d?7~gR3F<bc|
zM#?Z%bGwa)O3q_d`4Kx0tz1}G%CE)$!_I7IymJ}#y)RW-$3Vy6O198z)<IRq9mc-|
zo6N|{c1mDt?@$iqne2>;@d-b}wxZLh9yK{mm1fF~<w&BN;1>BmkCL;W`G-bNk<zMC
z2e-+uk1k+0`u9YC%)a2iceGP}4%=eO^?FvIN3(otxL!=QGJsuu42{bZ=vb54#m>#*
zZ}dlwiSoE3(9=DYW?Ij%00dm`1@mN-VcoQ3@xNkB%Pi(aRXMEdh5|N&PeF#t!NNXh
z%gS)@?%<$f=}7Y)lZbytKKJop8Cm>Sn4aE5(t+_##maCj%!IblEL*e~{S;775Z2K*
zgk(fAQ1WsI?QZzvfA=36Gf|xMv)zAj%#<k=SomGxHgC_ElAmai@k#!^6(XUB%R9-n
zs)ZT)Z;UZ{kcB4p6;xDytryRT&342)leS_uMZwo_a!A3~1$9@R!7SJ!rvx-A&btto
zv9hk<pEM*649UUYV9sdaIOh@+kR>GGSN1hLVhhz16utzZZIQ>@W4cEjl<b0}K<wO#
zj8(GKP7Jv3&`he~qn;UESqu5cg<_&egL*1vN1uuZNyRLF2OND^EX7wNK3h;fi{IEM
zc5ie>#<PrbH)+O)$tiWX>~eNNcOY5^t6x9X<au-!I^0pUMT&*Mf&C{PZCdP`-+QO>
zgJt|~1uOtNZU+7F7vXx5vrqO*y(si9m@Unvle|5mO~brBV)q!|F!<@r<)J~WBgS`V
z>0y2v+@@pXNHTm83c_Gg{>V;mXOuU;oAKuN1jon{J673=0#;U~c4KxfA+t~IQKojo
zi3vKBaw2jd2T_&UiA0%b%p<Wg7bD-*k1B_C0PkEzies9#w%D0>=mC`mVakgFt(cL<
zt0h0d$`Id1@$~pl@#Jd2Rn0~DBgZL*p-#b-a%mm>cK^b0V;4b!!5@;r|A0jit05BS
zk;aEF4+)XV=EckIBwhNBPXam}Nj4@Aelz}jDG8C4g$a>W2s7HT00_2vXxvT_Om=Zx
z^`rz^A6yfiguII!q9YQtWOxB9?Ai$te%bi(lz7H?8^N0K1=01iQaAsObB1~eh9Kzt
z_~R#x8wE3FTHte6Q;G_t;2-nHPME4LxEu)hY#V}Ibm<M`bjD4$g*MtcDn#(SYNMb9
z?UBkbno{>nn5b<Tj;)<I#uY{ywF;I|+xDNu2l~YRmI>2nJr1UB>=QGt(SBJ$%*3dG
zJ2K#w2~FJ9pRBVlncqG!n$}(graOkIwz)5Ye7gReP=(e5O3?a0CD?sE!QfCcmIe|x
zqYlcxr#}{PE(tX<;EAs8S8VTph>x!5kKQ*Rn%Wd9A$qHjqV{ZmGz_#|(6{}%NE+$~
z)LGkcT`U?IXvcYlw2Q-_1P7#Zatw7#U#t1+$>HM(y@^r^N`vRGP%3X{N>yr8aQ9`d
zr1Gq5P((v&=Vft0cn+CvZA@SB45=50-a$2dEYw_oy@I}eR|IXKZ}5r*b!@^YEX2O!
zXs6xlzC^*2lvjdk-#BTs2XbZemMhC5{wiEJU?=R{N~XIL1^CqP>9HL-rB&Fq0;hxa
z=8Ph)jz+Fu6eoyixcWs&f`~?_U!)R5G%Ecfk3*1AgZgE;f{0AEU*r-*WUl?9t%9g&
zplFXEBBSh2)hLLL3>38qqT@l4N0>s+E<r|fsz0Z68?}-QxL*_}h)xX<)zN%&awP9h
zl`f<r<LVa`3L-MDeo=)W>hKJd?G<EXJ^jg=1QAWZeo>ns>KZ8O6hu7(MZJQkd!R`6
zD7C0}peW8G$QYPffBDIRNHS2ACWxd1MN0&cY@n!65Je6Yl?ftRz50r;BeQl^lbl4!
zw1PS7g=93J`jXvQ7Y|Nw-D-*YkMoQMKWLM#zQv<&3et#&qJ{_D1C}*RM4G!ah@tqQ
ze^|K?DXe~luzVq@z?JrM9y+sa0E?Mh)+eRI1Z5D?I{T!xWMiR92COEif$o94rhn$N
z+u?(b%Hp?Ud!W;vv|t3EaS0b00hJILh~ACDaQNcK^acV*Ff`zCB}h>9UdWvRWaC*L
z>1k7?(tOu4d=qgP8sqgySl0+#Mc9xqgHOT+1dnf#_n5RiP<kGIaob`#i4XT~lCWrz
zCYaEBR9b#bh`xZ08v9l22iUKYuHd|47S1E?n902PAAvx@1l|t_YEXuIOB_KqN|k5y
zalb<i|DMDfW1)>ZCd0WK{%IC3qGJ&b;VL2e`^4nIh<+y*my7XJCf`E(e6DQ16ZbXf
z9C$YY)!(T#f#^NZUKj=MMk(W(iT*`bt4WnW^ghI)OJcJ4CittqF5w=<zimpgsvgJl
zd&Xz+FJhxDKgz?uTx3)WLNTrcyw?Kmzx$5eiEc%0*!q2D#QA@iGC@ojG1U~)S3vB)
z6yRSmRns5Yf*~1Ly8o4_(MgokH(y#gj0m9u8tW|n(SJ?+!_?)X^LGEjq)gHGcK>rp
zjl?aVCMVc2?Nx%b>9f^cGyt-n(0lnE3_tQKxC>prVcnQCTNV_%Zt!j@WL$Cluje2y
zF2t!qS`g3Q>u;VuPISTU|8jaRfpx~{QNiQR$b#SbF}M+1bSz94;djo6vrVO(J(ZiJ
zzU;!v(Y{-ql?x=sXyFQUM6FQG++z1&{&RS$m4ke9BzGFa=-$H>?OPzFc>j?Zcg3ju
z)Ye)q2+2W6^4tBE<e1ql=m>a|Qg6>S_sC;m#KIUcevKHv%gNrJ-~T8+K;md`Pp>=D
zm)a#{@IRTnQ1rFk-<iyjt2JTfSR6w4>u0WEV<S(Ze}3u=1e8}Q-5vauFQ_>}2l<<4
zMYGHOr)R~_B864L<<s#1Zfpj*Bg(JQU5#sS7C-+gj90k6shsQd2^TgAH!d2^QMs`w
z*ZX0BpXB%5ct0sHn43xpaNL-=2WX&YOY0g5y>WEDK2GGx%K07MU&1y<uJoncB3oH0
zncoDl{W34al>fq_@f(Ldk}@`?qI~Z~yMG%uHo3#j%(?uvpoI4XX()A%5;${EO9%hN
zKcQvic)Cvdgi9t>Sot(kC0VW(LAXM$BCigolFfV3xmBq=T|+j(h2AHx5~I)kCWV3$
z7Aq17Y83MbE+{gn5UsMEPW59|x+$$tHt4s8reYmQf(xO#Khl4%>b~TDj+Q-|eo@ds
zNRA6;ELr<c2}{<XM%8zAf1El==IIk~5IEF1BHr$QK|O04AF6=A@QIKktU#87{BiJx
zX)59RNvNMc>}NIZSO-QX=*icjrHsSdDeZILt2GyIBpv7~uZ}K*Fd@PUVse%p#+?9N
z0vE`f#Wb*zj?jc!;eT2?bAqKU5TNER%z&p9gA)2~FL5)3i=gkdMe%*b3M)J{wCWZ~
zrFdbe(MYsGHzVOY>N|?8d`JfxM;`CBq>^vwV125uU;Xdv#$M+Jc3pv_j#BaSzQ<S#
zT?my1<{Yb^IEqpTy;0stmombs<re)NG#{SR-^<Qz6XN-YPNIR$d8oLfuVUsfUo1#-
z`$qOGqUze|L5b>kJm959I$KZL{rSfDDHI=23s2!1w?K3``dF%zas<@Elf?cPjWiwo
zM~!xIJ#vPdbn)dM2&>A*wk-koXu9XP3s!$DwS&s4#lwG>$ux&5t)<81SVU2Iwe+|w
z14p{l(gSzvl}p>P;nA>Erk?fo;2z-&T8@=}aDkW~;qNe|Dsph|;QOfZ_LkZupu~ra
zts4_ozbEs}UE<A?N|~DwgQGE`E}skC`}r4}3q_6x{U^*yaW%9Km<tSdybrCrD(Rj-
zV)Gto9D2*-o)J)IA~SzGZMBtime4fYjQF6~*<9|gvuLTbvlex7C$whVHx{tYqPcXE
zI}XL6wR><K#atfEe~!7_w_BO5oTtAhv4>#|^Zq7sm16P=vq|WXf`FR3B6OGDmnO3>
z4aHraGjOE)%i;Rc2;F?cz`^WKBa`(dqJinL-Z`cV1qwEX`G6OLeO3F{+HM?;#yIK9
zE#oaGmiY|aq?#od{QWk0_+|(&P&hL9f3PJ_L`p;#;P?<Y{RS2Cu-@2%w}JGkpap64
z_CHPQh4|_mnGAjD&h(nz?go#d>HVuaF@yge)3j1TQh%>KPE0uNCPPH~g8pMX+5i5$
zo1`rJkblkWDW1*nxUveOmXUQdGFX%^4>BIzIM{czj$#TV4*tT0K%lM+jBvd^p+J&V
zsirvo#2+HU3SzNrC_jE97ZbTsCh{JNuewcA{tLOQ8wcZl@+RgoPfH5Fgu{)TnQo{V
zmpY995Z5;WHO2Ebl;}+)5|@{aR1nQP@&|-cjX#y#P=2i$>+jr@1e!r>Qf?$G7rGuB
zW-!sAl8I(Yb0FmgO4F7ygXVfz>Y6YQR8jsNcOyuf;Tuo(l9i2nR^=7Er^~8LZ4Z2c
zoqZ$E-*Xz>nA)jVGr!q!g<g-@BaC5|b0>x@5Z#KLcsUbrPYJ$?^7dp$Y@xUBS^t_M
zVsi$>&U$3a#z7oIEo?bf2pxe>oJ}OJqgn(r5M(WaO!5a=yC9Pel+{Io!!<~hvsWlw
zC>v$g$sqS_<O9*oLS?RTzSIt+8|(<oiEe{P@9uPsFOcM1dK=9mCK4AsLdn#Z1!C;0
zLJ{MMH6C-_sy9<ruDK{Ypa$uG{TACe(hrdd5Tfw$fmUA^{DtUR=;$AjK3a?t{nOLu
z+R^21(a(o3rQmG`#?p7YthNiXs~=;dn%p{hzHI16mC+=@{<ZZ7+RgAHU^JieBNmP;
zVTka*nx5o|eK!!GOFiNKAEr8<|KvTvr=ErtDCkp<ZVm*5XgJpMrMTH=?F;dqI1qgn
z%@8a^I73`(zqcqO`+7OPU^(Xj!E#Q%(_ao<Ui(g77As><3@9r&@CCBP`Y_uyTu;t-
zMF-vMc2wkR+j7ReEEv-}UmXjPgTIU1eY{w{zz4p$zLMRt;MGt@($IGtDo^*CsmPJH
zT0P&Bv*q9ie@0u|oJeC>WJvN)*d`=V#;~Z6<YG_~V+@N4Nj?fn;*4Q&A;~*INxU&E
zJ|uZ5C`mAeC4?kT1tmITnC|<<!Rjl6B9k%96iRe&P+~KN*+P>1pd`f@mJ*U=1SM(4
zu(XiG9+adT!_wiq&u|#w!tZxF-V>24ugduT=1@BA$3o*I#xO}pk{FaojbYM|WK>YX
z8N;}cgbhkm#xNC%Tb-$+(x1uv8#@UJDhJh4S(S4nwNygo9BD1(tDM8tBK3ZER+MBI
zj#Ive=|!2j?7V&d6L&;PmxeuukxDBx4ih~4$j}KQtr#yr4j+Ss^uS~f^ItuSRIR?7
z3XNfZNOv=(dkX1%Wz{J5aBU8%6db07f&(I6v4Ykn=ZZ}P{EF`hKFd2zzUANYjl^~4
zduV~ee8Bx;3@*KKEf(E@2+vBlI2~4Ub>$@Lz1X{R#JOPIcPB*#?T|+{xFsU~5&uhf
z*He*qI^UooV^^&g$M3THt5>PTwmQ52_^K(RQ|jmzda*D;YU%PrIDY$YE|d>_2%0a#
z1nNdsTY*`efUIQ&cY06+$`ARNp;vT)=r}A0OYvO?%~HBo2_54j=nY=q4Job?u=HZ1
z|K%PVQ%5;)!H4&Qcy|XnI7@5kx+RnuUuXo}acKU)3km<S)xs-tL1jeL(p^k2Lw`-d
zdrN){9Aauguh1jV45ii5JxyQoKNK9wbO9$f%GA}Dsg|yB`f>zsMQ;!;R-+}*-N9>+
z0Y~*<7t;NXkk&u3aO_CL2gP*96BOTEXrQrpsPNCiC{9^3g=RJz@!%IYW=6c3h@suT
zWX<zzX4(cgL+w|O(T64O=tGx8-V0*yFVd3fKR**lc${LZeaGIUSnnlCiOkzB79S5J
zY+3REvL#gIP(1J~*y=(&b|N(S7ZoM3Y5oU`67k$yG(M7wshY=F0m%pn{tt@A_&bZ{
zjG_o%fJ(_lC`2gprxxcMJ2sO1>gOnW2T0`YQ{tDP5;I5%9_f*S5?A+tjsub6|GGGy
z?eGs;Hw{m2oqP~QSIzSmt&1Mg4DvX_8UL1b^F{?L=?346AlqbP;`g6l_Y6CRUYD+q
z^BwY@pLnPz?9ll^KFJn(!};g+pNhNSl)7D6;>Zu}{*$iB!^lU8s3Ie#m!sEQ@ghLH
zdm+Jk_dK@E|F%0XqP_eNu92U+$32B)I7=^-`EHZx7fRh>jo7Z4!x;<T%EeG!9vUS4
z@82^~tV3nr-!owdRkksMDvG){@dol>e3E)^b3E*b?vn)fC!nh5Wq*TlQL@GN-WZ37
z0=CX{4UY0>*za&s;(y);TQWoBfA(IrC%zX8)v!2vM#s=|d=x#^k@Pgn@LahyS|`A;
zWC6xY1qhP}kjMxyvlp{#*P~$^IA1DLmmr{|E(utL*JLW<x17a=%dxOxBxUhmEh2N6
zx@54&!s789B4P0MGeL-^uMM~m1y5piGjps=1s?7rTt?!3!^rQeOb_b;f2bWiz3w93
zxw8p-dMWq+ws9S395v~Bi)Q{$O`?-@1+_}hc-wsm|8F&J|2K_?4$#<dx_|0O8iG)F
zhjjJ3?@yesgZT1U(wgDFKq>TY1TSE4IobXpL>G4l0te|+J-u4kv!>%iv!9=bS<>sW
zEGNnMsrBF&)q(bZd;j<$OJJgh8*^Zl!yc$hkQ_v{qTqWI{sDY&;5`ymehfCltki2=
zqqCjTgLeO^2ObjlzHRp}-EbTyjVT-F(>~<hXd>9ZF`1UY^BWb!MQ^%+b@_Fh9uW5+
z{p*{SN6_|BwS~j7sx-rl{8K#dBmMrLJrS(93==M_Tuv7{`0j@;>mDe#49%g2oqG~*
z3Hkj;%41lPpL=k(crQ|yd#ACi|3&XTqp{6HYyRz<v9AfA6CDdnL7TLjIUS6Xbt?n+
z6b9SB4f{I$NMZB$m2ZYw*U^UWE2rJOE)%PO&+e+jy|4eTo3|x*zm0;``_SKbPreW<
zau(l%d(cLb;Hw@SII89*RHk-2yU@TzTWn%v@bB6ZD?W><@Wz$~>V=YrO2s_H-#=s$
zBcDI{VZ|s!Gn+o~y{4lL=BS9W?_c+D1xx9Fs~F4H?2oSeo)tThEzBn;XF%tBR=gLv
z&->=neJHnebv%aizi+`?Tdyh8Fu;trGY+5h%^oto4DBlj=Qn&smj)4=(=oEj6P4pB
zk+M1BX<Ydrg}<|ECRI6Z+g(xEvl#D(Y^9lq%Y%G`{_WdhDE`H5aZ3cTNZEY#5Pd1^
zn(sX+ITl96(SlJ)r7;x6NL>m6BVE(IC#At8w7~_F?2UJg@}7(jio^QE{>hIfi<wrt
z|F%bGitxcr`J>~6&uISo=(roOH859F#JJc`%I2451l<`KHf0<k@KS=#;b$LRB92FH
zW3`HMud5Dzr?a(svQWya)#cRN>h0?U_WA7_M+6IHC1nwt6;O_M`?ZhFl#w)`VsQ&m
zjyuE0;zWRLj}=j_$NXfz{tx`g*HfiK5Vj-ZkAl1%U#O{^ij;7bPx5CXkEMuo=ziwX
z7(9cAO<&KYa4^0tNnc}b%Y^-J{p0Wap{V_qCx)<pkoYrdz9TKHJLmpU{@BhbB!6>f
z4nfqDDxsmbJo%E?hHm-oNw>#X^xrtSS06>BO>&rz{_j$KB&3oa=6_uK-<0(tQibhC
zYY+3suBRJ1z_a%M`lnem9@C%t8rtq3v8z-xCgyQ;(?dIuZTFKz?F1G7lzsp1za$P~
zzubRx&)|`sZx7fQ@f$WK?QI*g=@52&@P&;w=9MNJ^KG+@`Cz||In`ie#)5Z%SA#c$
zfAB3E)A){!`OB*|=GoV6%;h(2%p~xG!AF441pmlE8&mv>jVXQ2#=MO>enuUi*V~w%
zP{$PTi{G~GPj8TivG?uY)O2}-=S+)@8TFBk+4iZ8nf`^1Ne5rmYGZypWn-TD+{V20
zzK!|eL+Am%x6Q^32Y)O0S3a>ZOTibtXJhJ5+L)2x4}NB2T2I)R5#ZN<Y-1*ZmxEvT
zfsOeDb(Ed9G24&Zm<y;w1-|`L+kXD!FkuC``r)hW3jYhOi&?LK&__AZSl@)p5nSow
ztK8R0;gIn9{p&v(DvIcf!keFsQhuVp>XR3vcHt|`EdIG)X`#d+#pc}ezTKaGs*pZE
zfv4^5j1f*ce?Rpjd)5Eir%G}2al8NWr*d&5c87_l*OIk8db)E=U)e&n+599045W>Z
z^*{ERo|1g<Sx)jpa>%*@uajx;OV?pB<i%Ip7L98zU~SH_i#FUI*CR?eOZXSIO&o<8
zj&mN4e8D$XS=sog-`h4{w)JbIor7}}4julpZ8xwl`2%h0i1Kf#2zm_=jG6s;%D8_~
zE1JU(4R+EIkq`Gj{keuz`{MIk2&R7VGQnqGJoB%a;%D_|iuTu>5i)&v=3J=j6#ocy
zol)qzLuV&ZF&$@<QqY4~pd|S0_){F0K%QKNPdp9;I~Hz<vU#PkVjue7HO$*A^7cfP
z{D{wv`;TM`(cUF~$Crsz-o0PG6xY`l+;Rm6H<kCLM&jnHlP~n!&W&Ro{w3$sLwBOY
z>@<gy25Q$i+t3~0Gr-e;eS2;pt!Rd?ZtHt%vJHJfpZ4G@Ee0{QUzw@8_OG^yE3j^?
zYac7DBUSBlXV5z4ZI+3T!5Q9-MnG5o7Xw}(z?iVjWT&DZ4j05YbCEC5Zi}Y$TLKZA
z526Nql^R?L{h8;Nj6~)ubFrFFs+uc9VS7;FyXR*J6?L8Ggq7JFMrVZhP$u~Wtq{BV
zLTa*bcF4iIuC7CYFz+`L4}CMpCwrSJyOJ^-g_^yl;Q^P{Yo^cj&R&Qg0xR-KD1krh
z;=JI|W5T~5J@PNk6z_%Rk6+|u&<~>(9o$(yy6B9h(kqs*5})L8BFpmFgt8#*&*-qH
z1Z7gmwJe@|j77qGhWXNr>@6osml*N+C{8<>zR!KX9QrNH`|ZR--{QO@Yztl!Uf&yn
zs<w4Z4ysBqe#zGcVSN`L3U<)`o4<ZrBt8xW!+E`xN{sMsM_s;SI6&d<)AyD4;>1H2
zLkB4);$TXQ#M{ogit(W=z33Bu9fD-RbJl}=<J92x84=u!9Q>!ZAl2nhd`GEEaVA5r
z2GcIT^c-14_;<nwu(t1Ji+O1A(0A#xvE9?V&2YYa2o7h}0vo{fb0C20XZ$vyYJTs+
z0=8<tbRpUhFVyF{uo6>&JXySMk$wcP@v>^H&_56O&Y^dF*9P{F|E|ly?YrXgy~6m5
zI*p-iKRh@%^E%@wV!yNVU9lLRh<|36Ukqn(|KYAeRv;Ar_=e4gY7q^Rw99eyB|lV3
zmEtK}!!TFd#x+k)?Tu(?_RW_?G@O`xNcaXR)EfK@rso>nxke{If4JZHp@A6xbE4~Q
z&SuOk{2Br=bx6N|WA~x3;jkUyW-<8t2Y>pn$suzZ6V!O~uXl`wf|zvIFI3!voxAZy
zwENeaLs6KIp}Uq~)c(xhz7^4iX}-3doUztx4-hv$KtAW=gV>V<r`gXv4wOs>o?%2R
z#ofD{&V44_z}FcN=;hq8FdLpKBT*@!wh2@ks8J>)Nf(kh@GKYdW`ar=lH`CofKrZy
zEdljc$b#?F@X6?=p$=%w_twl3vGk(2AzQ>|i;1WYV^@WNsHuu%UE=2N?G2I<>_m2G
z6Nbe1?u;;!%o@o)!wx+HiQRYTN$-V;H8)1H(KX|Tu%l~+M6pXE8)^KuK27F6jFQzn
zHH3|;nHa$qC4y6)5vwW1>`RYR3Uv$U^87CBdmiymLWY{eXtq*}oHegRvr*#XAdW|~
zi8T*IvXLR(+~`(PEuIZWcG9dZsM)iE;6Ez1Gy+Nc^g0W=U$3=OI>3@*t-wCTNH#z*
z<RHZ!LCvIBG$GVdlSYAP_5;F`quE8khG}Ei_e9hBHh>J@Qt6vKO2a-b^M03GxwMrQ
z+8DU8_l3Wi_M^QaE|%R!{a>?hJiCRR+Mt-g=8DH72U62ZzG3}InXBg@(=UyxDOa=O
z8s3}4&KJ*2osPyJ{Dg4B4CF!>lx$}vB0PwYhj0Yp4TNnCDT(ZmFmcEG_8P}@HdCB=
z&0h1|bk<IKe>a`|guIlRcaqsJLm1QW`b_p9nKLynq2^B<t0U$V$G*&7ZScz3c5&2o
zRfqSh+HkL$Ef&vw!fa=LM98+_HUwd?6-pqyfN-k~N&{a4K8|pALzJHV#~`CH<J_Y#
zHEK&Q1{Z_G!SKGZ4q+PNw;1?LXVD_zj?ZiUW8xaSjhz@1l#N3H-d~2`-MS4g>Zz!j
zQX4y7AB-I_uy}8Kd|%;FIK2N$;eAPKP9pp8f2r^nRQR8Z6!jJP;lCHz*SMO6c6MT@
z$o^_J+54;Mmpu>Jf3Ak@tELFY-+$4(ubPH{on0d=u{A}rG3s<w{4Xy-v-sPvQvR2h
zpcMNbFF|`3B3nXL55)sF5X$h-AuPEK-zyhQ5hJsIbJ1i0Unm+ENrmA$^D96L!jYOa
zb6DXnG@4?30V<5iuro4*_zb**EM8&WoJldlZKw+5e{maHfgI3+5?Aj8-GtDA622{-
zAcoK{T{o58T+=$2l@FpcRr6{h(%9&sxuBg04<O0I>*kIMR<;9@T?qRSnj6Z~ST{=>
zSi>*#*q3Q;G(4ZqKFp55WC|{`^QAbi)6}vrV8;t{Z21D#`LE;r*#fqW=F%&-vTuny
zF(Zo{?C6o**rzTJGrsFi#LKBk&R)oC9P9+Kq{Uv-=wLOp@^m@a#UiX6^B1x|gbSKj
zmatbO0;OHfszd^{b_FXHHrj^!R<chBvYL^ru+~8Jnz~i&ivodNf097B3s|i{{H=hE
z)zVD?yJ1Mg%S-X*w)ZkF5(Gy8Kgj$D*FLY+U&EDYFwJ5(343j>o4w}aUG^tdvz(~Q
zUZYseT7<&ZuV$A_!EZFqER{ZW<&e}B`P5hKXJ*zz^5xQ=r@&k}6zK{-GqVYq{<)gf
z&=<;qMBEbKDHWcyB?o`J8%U(LGl4{UNg7C`tK2|hyzopAo=L(pS$J~7Qzblg!qX%?
z>1H^PNbY_hF->@;3r~me%q;UzM2^5L5uUliGf#Lrg=eAgEEb+F;aMs?Hwe!%;kj9O
zRtV3n!n0a<?hu|kh378exm$Sd5uSU?>>gCVPhjeW=K<l_C_I~lXS47;B0P@^&lcg?
zDm+gK&o<$CR(Q4x&ko_q3(rpB*(E%?g=dfO?8Va)NW{?sO3-^241w?@KMq46JaN|o
zI!1WL2~YAC1Bv8w1ro`>3jXc-m}Oo@IEZi(;Q~Srg7g!Xi9wixU_w}ckc)5+!q!h%
z5AzogFC#P|oI>~pp%+1Vie)As=nxhlI1wI1*oDxDa2nwn!thU7W;DWd1Pj8=2ssEV
z5sDGY5OyH!MQBE7L*Nm*5eA+1uuSA>mWe}1LNFoZAQU5PLa0XAh43oEL4+2Ba|qud
z^db!ZjAh~xIE0%JZbNV)Y(jVp;c0{e2uBdw5O{<u2>pMa5Vb;!8->|6=IL8)%#kN;
z%)=vX{b>$8WMdXFb|%_!UDg-b_m33NEnfygP750FX0U?3I2q(o2>TGXFBC^9C{N9f
zZ`d}E&1<LcXPDhUoEMlLU^vjR*-oFmFgt-GfL*|mB;Nv$l;pq|k^{#Ab04xZ_(=~_
z4;%-Kf7s5%0jq%%fM<adNnT-R;(<JH62qt%M<tvY2y8w(QwrP*ybpLBct5ZM*b8K~
z!s-Cht@ehAJbNi3Lg%;RwHMwIA}sOS8^-jo-%CBYF9a_>oj~{~OfeAe{+LoAU29eV
z2LY>rxLs#<0)-9$QU~k>QU}xn@rzKV5lG$G3>*eL4jc|_1>##`rVU6P)eb~Qc^Do9
z^;8#-da4IVW5B$KaRf+#qk)k?>cJQw^<X@ZdN2t{J;(v62X(;lKpT*HG7U&Q=>Wz9
zbAaecCKq@E>SvrFCPPpROaPVw6M<#GDZmQgRA4o58gM6YI&e1--;pqTfyuyn;7nj6
z5I-klnt>YNaiA913e*AHfO=p%(1Q9I9)uOx1+)QsfOa6W4_-Gw3QPe;0_Ol@fOCQI
zz<IzV;LSh|cneSmTmZBIZw00S9Y6;#3z!4U2Ic}icYtt$$Ojga09XoSVFVk1B47nj
z4BP>f0AW2$7;rcELBM^$!N5kK6nF%PH*`!K(hUW+f|mi$0*3*4;Ba8~KCJ2yAefh6
z6hIkp1dvR1B#=yX6fg$zC}09G8pr`h15LmfU>a~NFcTOH%mt1E76RjdrNHsP&A<u3
zYT!iRu9vW@#e>)j!6e`T;0?fL;ACJ6Fag*GOayiSrvSTvQ-Qs}X+Y`AunAxka0W0Q
zm<&t?&IG0aZv;Ak3g8l;3h4BJP=jy*HNY~U7Pu9t1MURsfqQ@kU_H<XYyw(<$AMPh
zDWDD54zvS1fwO@<z!YHQ-|S2(Fb+5em;{^)^r%4017QQ+3`_^!0?YxX1M`3jfW^RD
zfg6AhU<L3t;0|CGa5pd;xDR*-unCwCYyqwYo&^>HJAviE9v~})xk>6_R9I1HX&MZS
z1V02A2Mh-$0cAiHFcN43jsm6wqkuWU(ZD=lEU*|j9#{cP2JQeFf%||KU?Xrg@Cb0V
z6y|;w1pR|Goj?h&2RIlgc?F69BZ1+-IG_xe1dIi$fJR_Ca5XRo$YN=80wq8fa4@h8
zI0U#A7!KSClmYhuV?7}1K^TEeKo(113s3@V0}cju0EYm(fZ@PipbRK|6$JpJfJR_4
zki`;d0uBMD0mFfrKp8L>7z-=}8iAXE9u{ln4iH0tyMf`reZW{?BhUzJ1+wulF5nPg
zComk?L-9b#e#9q05#SJDJTM%X3^W3D6rTizC?1$e@xa{unEz}tG@=M#DMbJ`16dC9
zl;pr&BnR##xeC*i<iKW<16xS0Lyr*;>>%ER9wQ#uOS}y|b^tsuiZBI=9>Dw$PlKi;
zNQb6`4j3a)vH|lI7z@k=(yDk96I{=sBjbc@9gucV`g4JA1C{~50B!}o0^AAQ4cr5K
z6j%@Z5ZDC#l$P7$AU+130zMCH2fhL91ReqQ0M7y?uLbvmNMH;2IN&?LB;au%Z9ud^
zslY!4Ob4z87DG-uULN=rKqnMtNe<qFUSKwW*a)luHc$i-kjt|hd@9MoOMv^p)5g;X
zd>B{_c^L2rcyyxR8<4Ni3jS%}S>Sdc52TH%8@M0Hyl!V+1Yp1M2s@n&0v`msQQ%-;
z9C+H%_JWrJlfc&jRlpB`HsC&BI`9NA2lyB;5BMgq82ESK2H*i;1@JU*2e1{m8+Z$F
z-|HCvW)O`Kd;+XTg+qWxz<&g61%3%U3;Ybo1K$I7178C&4R+=bKn6Soi~;TeCIDXo
za=@>ECZHdf25bjr0^b7WdO(~6Q3yN_ECrqcZU&wMRs%l=?gAbKl2bAnxEFjrkem&2
zMh<{41nvYs1=tMUN%3e<IIsnLI*^=?S%l!%13l!Y4F%Bw!46;-@EKq)@MWO1(ay{V
zMgd<3#siyx<V0A2$>1wU4xYRm9eBa0QLYSp3V3ox$lD>W$N}C3OahNJ$ipmw;BOE(
zf#i&|BI7Wi3;eA>a<7trW#9{dx!@-Pw}M{|B!_Z1a3^@oT*0x70PX=#SE=>Dg+TH%
zBY{oeZv&D$Hj`964nYY7<bKJ4r@)s2b!d?R*be?dibnw>faEq=ft`?70(*fOKyup@
zK<OJnkGvXuBKRoqcLOWHCjjHYuLLFow^3I$BVq<n2f-R(3eXEAf6Yt?{vn_Zyb-tr
z{AM8ejU3Pk{?9-c@LpgU&<)%Qyd6j$^+@1O@J|5u05gI0Ku-+_1_g}*G(k`Wj6p>j
z;BoMqfKtRq0Z)O?0!D(52DXFG26h5>0(*d4fzmf|vkr^_?j=O}(ZD3|Ily@EV}PnR
zG5;5VAP?09v?0M3U^?(QU=HveU><N6kVD3?z+&)=fu-PY0B!)E2dn@d1nvYL2JQvc
z0~>*jz~jL8fo;HJK;4U&|FIx=2$le8oxcd|2LCQl_LiM_8<+rm7H9%K3ACZ&allOQ
ze*)$LmjW3yL<uMae?O2sYa6f>{3F05@H$`x_%a~pfj|#p2L$E7G!z^M+zoyia363j
z&;j{)U?ccDfsx3k1|9*w0q8`0Jg^n~UBDdh6M$#I=K`fjuLAPm{{m$2HO~WpjNM2$
z6+{dKtAGi>A|MA818u+yKnJiIxCB@WbOK)mx_~bL%Ybu$Tbt|+#t4yl<b3*0k3Qhz
z5kr3mAF?q$pf{XHc-F>5;zK2(x#w(5LWoXVVq;7px+2%cWQOQD4S$~`n!$SF4$+RZ
zKrm}=0G&C2E)CICYc=QvSom%Ny*nhI4f+7+JFsV^fo=-X^FSW~JsVP*&!+~Y=?Tg`
zLPY$#!OWB)1vF)#`BVtHud)izeTwc2(WKCk5KU`wM~EhcdP6iR8h<#Lm&(_L=$k?3
z9EP=0p?82O4n>fryFxT6(Hf%Xg62asEdtDu>k7qyo`Xe@D$|8%QX(frlX4qEG?l*-
zbP8(5Y=s4e7|PHdqA7#q@4-ZrAs#d-N-Z#fo`?P+c@F3WSk-R>T{wWQ4$-tx902V=
zdQz?(bSiREUdd6cR>V+50vMvHVq1u&%_1*ElX6=_bPDJrA(||-BSfcymK_V0L%qd?
zXtIgS5KTLuD?~#%53?i0P|F%aG>vOph^C?G4bjxfnB&0;Xb5Z}npV8r5KYVJX3!Z}
zSV_4(psA~=#Wbg>7fHE#A$>&9@*4#Pbd7w9XjFr2ifG4Ro%+Ghy<R0rs}=qS>l
z8#Gxc^)2%r(RZL!G#zwbnz#Y94m4RYbz2(fG<Zs+!4g4!htTD&0rUpYDBmMgP#t6h
zdJkv^RG_YE0!>XMr>h0@9Z-{+%Y(ipWDv~z*X41b`{XLnnfQ;!m%5}+=^V)W=+gIx
zU2i@5s^P_1qG%Djr{)kRDwKA@$!o!KRwK8I#)J(<IVm;s?V?-59dIn#(J}4i+b502
zI}cG**kdE`aaGNwxuS^?>!NK8K6VTsJ+m>|R`cUr(b!?<M_(6q)tJ*n<Jgp%MQNgm
zB4d2bcXLItgG|$HjBR?5mV%ZE^aQ-?{hX4?v^8I(i6*e~YsSt4`->@<a>#5O6E`~;
zb*AP%M6nZVo}?68=hei{6HO3x%@txr@0Hcu4?g0havNzT{M}n_tC=`YG?uNXvEK~U
z@7Q)-%T?Qgsz<zBbzS&om955u46(mg+d^e9%v%HK_o_*+38L_ah1x_Db_KN<^yi?C
zYis7+f{LE~;d=JA?}HVwzt?O>9Q#PkD+FKGd_>S%^AkZ!&4_f-xN#Hig>!{a_z~M*
zy`&9v8M}Q_wC8_$NB)27irn+Ryd!7-FI|y){?G5oFR*j-U9*eUtXaM`FMZY8eEKRH
zlJw>4U8yA{MI{U~CM@F*{8Wmc?Jg<FU+c<rmgFzbgMSi~c?JmQL2P1E*RCka%Xbv5
zTI<R$VaBj2`2+biY-;H$m!qU;MgIErOc@n2zbMaLn4bo13iDG-;U&ICQc7aa%PXN|
zxHPqw<j+}Em~Ss$wcfrUV;<8fp6@Pnt-9&<`&{{%McJ$J@@G4jmoQ(3^^uuH!32nB
z-s~$7g{9>$FJ_o8Ns{5p3)YH%-HUjtAoITBd}cz;x&qM{&u76z%S&AD;(2RV7G*OV
zND|bS5-(W32EE7p6qa4G$|dN>Z2b2WXpu>>(4Aj$pCi9yWl;(GeMSE4BDc^#36wt6
zwV_@?H+GYBbjE$_UHNM=SD}*if^MJ~Tg~{@qOpS-*f}Nn`8AVPi=x=-nuV*exHxLo
ztQL)DSJwDei>7+sV`gKZT=}W@V9=n#onaY;`T51n<bSV_DGJNXFIlr{Et2<*3{xJK
zS-9R_DEwQ-UZSB<*78DkKC_uP7)X$NkT}wXi4-GA=BgE|scp$F{HpqTUj+LZYWy>#
zR!07s<;BjTl6=T)HA@T8-EWK2^OxU~KO23yYQ^%xjQka@RYhyDtFm+OPcIkRi;Ky0
za6}W+7z$M~RpNQ;p~tGc*<^Fe@6Klql6Bi>JK)7p^|z+ZC;9`y<{dPOj2C%Q^6zrr
zeRqC|1NMdfVJgJ)YJM)ns`z2eur<)7QRv?ERd<yvFS(C-SDaq79CJnB1QUHza+|C0
z_O<t}TAPP!=3CvaVz)~uGs9J~YVF-<)O<n9U<YOj^lkY;?gHr*Oh?v+v{^!Y#;W`C
z8D=KMdQvbHbI4R^=%RxiyXLy&IR^8$czH>Fe|4{j(`V0LUfj>GrDD>H(2w~+(`6p*
z>#CrXiKS9+U!Px+lD~2}=2H6XdDzI8WB$6=c!H)EOt~H^yu{8abgy@k72f1txiY^5
zlk!S%9a+1+s4$=USV~pSE)sam5wzt-T1N)VvLVD%8KKo7wG?ON;N%JA%VxGQvkQyD
z*N4^v=5)i0YehpuuRBC#tf%EsT*@GvLTE!ci_nhHfxshlA@m?H)woeWh(U-)NJiig
zR0ui*6M_vP1tAR~9l?Q+iI9V^1R)Qh7@-t_(w9}kr>y`{jj$7855hi#MuZ~>#}Qf(
z+7LPs`gW@x(9Cw+P9a1h#2~~YBq4AJHUtMkE`kf80%0e@K7<1ZO$bL2TDGr7%R!t)
z=s@U1=tk&8kUWMKBg7$a2quIU$kTwC2)PKw2%8ahAnZkGLO6oZg3yj2^&>w5{l<pH
zKdL@?Hl4W!{{1ibnp-nO6Jo!=UJiZsP9MDEQ+EcxTHQWr>|m-8|NaT8oZawUhUl^w
zzmjSAEL*f(q<mw&jrso*%J8WL{Q(<R-yzy2q8}p9f}j^3T@;R%|5NaU{v5~`gY^GZ
zpfafmu{FU8<HP)P&CvfB7n0ZPafo6Xs+Wq^N$BD}hGm#dTn+a$x0id9JHmayMayI5
z3G!v~74iakvHUan1vxMORW4BsQ^YDN6th(~s~oCq)n_V++N4fVSE-*<pHZJzhieiw
zX3acJj%K6g9ZjS*R%_B`Ym2qz+C$niT3#Eao1j~-dsMeecS84xu2(0~57T=_>vej&
zey)DLezm?-U#8!s->ZLF|AzjUzE%H){yY7T`fh!gA>1&|5O0`iP#9(#Dh=BWdklXw
z955U)v>HA&v>U!NFvgpW_Zh2<GfgR`g{GyZHKz5Z&89lj8>Yji6Q)z9tEOI))I7|b
zY+h@2n>UyrG5gI&&1cN#J?62NL`%M9v*iuTDa%;vMC%mm1J?c4qgEy!XDsy07;Y|i
zH&?~|nR|hIhdaT2$bHJ4;jHo$dACBWG$`jNA5lJ`d|LU6vPt=aa*S%a%AmSWwO#d;
zszG&7bwu^Cs!jF1>Y6HCJwhF$R;tbF)#^>^57gc2NKK5!Gaen5rzzHWG!>f1HAgfp
zn$I=oG{0!BX~t_8YVXzhv`=VX(Vo;gb-(Gv`dIYV{rYe9|Iqj7CmQk$1%@KSeFm@L
z8N-W)Cd0diPYj<MB*x*!D5J%gW?W#*H?A|@YkbVO)41RGhVg{)BjabrZ;V~WEBNtw
zgelrI)nl4vQkm?gxuzV`GNIe5Oi!BrYT9o)X8OwXwdqIGuO`Mk&OF1cGMmhq=DW>n
z%<Ikfo4w`=^EUGn=BLcH=I6~Xn-7?q%!ka!%paOhnZGb!Fn??Q-uzGVuV%cfvJA6~
zvW&CDTc%iMS(FyN#cr8nNw;KL7Fq7Jc<#0oT1qVUTRfI6mMY6*mKw`$%QKc2E&DBv
zmV=g~miH|mSw6R%vvgQGEk9YVS^^e{HQYMVI>tKQnrNMF<*YiZ**e>Li`8MxvM#f(
zu&%PMv)*IfVBKu3v_5LBvF@@yZGFM|H|uNGx2^A5k6Bx-pIXmYFIq2If42VOvC`e+
zSxk{p+(a&sGjKNU4lb8_fUCre_zU+UcYr&HIdP8rjbr68@)>fwJQK5Gz1$;zME(k9
z#;@`u#Z1L*in|qS6kbK8;wi;@imw#EDdLoimBq^El?Rolm0u};Q!Y~-P_0$hs&Cgk
zrFlj3ohDtIuf0d>c>$w)O#6{mqciJLbhqeM>fAbyZmaGI-Ltx1bTjmG^sgH}FoYRn
zjY&p1EIi6&Hf5XcG!>Z2O;4F#Hyt#!m_9PKn}(b9=3?`+<~PjqEE_F*EI(ROtZ0=D
zTOW*g3b!0ayq?>{ZRQyHAo&RSX!$hxQh9?%{;m8Wg-`Kk#aoIG6`v{|R6eZSs;pCX
zDu0H~t5wCY<t?g5RIjU=RPPG5+^zam6{eP{<JD8tY3dyH0rf1cRhz24Lwlz-PrFK6
zqJ3QZl(ts8Py4F2QTvH@jIKaeq<dMnU)Q8Nq<c^Iq3*0sub-`7rg!Q;(|crwD#PQ3
zeTG*sizgXppp8b%<&8$Kafk6qW3BNyW25nH<KK<%8_ycg8!s7uG%}_z)7>Vw=>gL|
zQ-i75)NT6BG|j9v&oN&#|7;#&Ny2nqXW3w_!+5`E{Tw6xPwELc0s8<FZYUSWP3P2{
znY)?G^l<lMj2pQ_+}GSldAX)qvmcZ2J<Vy&H=0h(ZyJ$yq&7x7K|4j8tW{_YTAOx`
zb^%tkW!jb6BCKprYX7R;t9=<QJ**vs^-ZfQ)9t{}y`%eCH&Q=EZ`9wRUx#Jws{VKV
z5QEK-V;E)}Wt?ctHI^7R8=o+G_8Q+bwiwT0PKr!XrYR;JT4uL0xwuw_wj6g8m&vW<
zO1W3L6^a5yiQ<06gNmJ+I?X$lW0upFZ!ykc)<~<H+E|QB3FPVEn&iL8?^kS9NR(5R
z4=P_$?pGG7rmFv}{#1QUJybJ7Ghb7s`B4+59ibiLL94V{vvxLGm8q@L{z<!A+oV0A
z{ZRXp_BSo78>O3~OV-WRE!1V}{5qw6v3`|4+Nd<98#9cH(TY5y)41K(VEh!r({AiA
z^2SayrQ6tJyvdYiN;f%7nWi^Qe}{p7Yw9$0VX&gjR`WV@DHh=}^JcTB!o1a7ZQfx%
zXYMfb=1y~$`3cNcG@}d`@qh=h*s^k&e3rai-XfPP%!=1wsBMa072}jQDCa4&FwIse
zpH%KfD=#a5QeIX5t`w_=s3KKkR1;K*s$`WC3-V3avTjo?Q{Am9RF$YUs2);1p?VfZ
z{f<ZV4^<CVXNfvo9jP9po}f-tCu5a1sO{=`>V@h>>OA!-b&>i$wO9Rw`YH9h>f`E@
z>Qm}3)a~kT)jz2JsqR&aHR0&gv6@MmX&RMAud!lDU$1#k^8)7i5zR@>DGjgb)<j`H
zY}TIF7U&+-H|u}U-(Wawm}>MWVb3GYsaQ@=m@iwlQOm1wmj>Xt6z(3boZH4d$?f6x
zac22!`R($R^5^A8<sP(OuUxC#q?D>hV)Ff<HS1o`y{2o{9oL<}wE98+i#`c6>j}eO
z42_1*44sB6h6v+W;}j!jywmuoai8%$<5!qXX&#dY>-{^Xo6N79H5M0^j;AcctY27}
z-MBV`5<|F=oReF}Es!sg7s|`zHz{sWWGi!(>FB_}s*b59VFi$Cd>TL6`iAB+%~zU$
zW-u&jg6>H}o#7<|y|<8|oKHDCn(;~m@TKCs;%mhv#rKMz6g3;}7bQo{S1wfEu3V(7
zqL&J-xVHq<G~F-Kc`g~gH~eJyr$K_PJQ{mX6?IU1u!GXLMcgs&W3ES@tk|V^MEQl1
zhS;NOS6x!2s2v!uZ`CSIhsLKpt9@H{QumW?oL&KEW3ygqs5Lx~PU$wxH9lp0+4#M2
zlj%>U4%ny6e5-l6`5o+IzneLW#$&NqN-fQpX_qWRvFj|emS8tIWj$wQx`ZW#nTPqA
zi}5+keU2q094qSrdA59vDg#UBqpEr84E0*|7|b}o=Dem$6QR99y9f)%<Jt&ax-JWA
zy}{IKvRcq6_~i8JW-4dJ2DF;<a9g-XIL~~|BF)2^?V2AnEN19(?ahWS4R;tvnJ${%
zF&~GPpPD}pO`RXi|1kHMd(Et6uw|%age4k#?nL-D(=2%wCX%IXj(J<2g!SqW{I7Ke
zu`$B<XX7)*SB%ZZbkhp7e}q|SF<LHC5682@w_t7$>@Pw-Mm|}tkROzPDwiqFC^Slu
zCK9IphTd+dHSag$|GG#hd@44_8|6y5R&JDAVXJfHx5#gmXULao4rs1wT68b!y{7Y~
z$IOf^*wPaDX8BdsZ>oSwtRAc$s*X^P!UP<ro~WLz9&MUrnr&KO@)Vo4n4U4!qm#~H
znIB|MGAqp)=4DtreC8Knv!~7H(P9~v`%kRDk>(Com^?$c*W{nbJLNyif0N@=C52QW
zS7a;PiVcb~#b)@hTNTxc9g3Zb3kqJ*spwL4EACL{E6ar4uUff7`5N}@5vmxEDoz!z
zN>C+X9pO|e)h5+uRfTG+s#-Nj^SaJvSYTKSkE_FQ(73_$xM>d-y5CKAn3-JMph1^g
zbSdTsNU36c?Fx8EF;QKwUWTQi30~emG{0+xVe2f@p3`<|MLL7drkew+xJ{R%Tc*py
zwppz6+@sr|dr()Q+lF1`PrBXMS@!8()ivr4>W*M9ZPlH|ZhAq->weI6>weWS`ayb`
zexyD|KS7_MpRVWh8ofz>lRizqK%a@7C|AEyU#KsEqrC~U;1PYb{s}nTPYW*h0sWg;
zmyYUN^dIZn^yl;)aK5`1>#yi>mnkd-Cb<nxZ<;(^o+Do(&y{aQ2kuhrQS4J3P&6ry
zC|VS!6lWD3=+16MFZLdpGD;bzOi(5(RZ5c*&97z!UnB_<4zilaGEtc)!IErISxlA`
z!6jS*V=crov;hwJR?7~{E_gEgEC(!2mLrxHIOt~uKd{@<3olS+jk3mB6RgQrmDOZT
zv8G!ytxK$V)<UaG@cy=1cfj}CW8G&xU~RG<v9@5|p0##ZJFVT;UMz4dE@AL0M8e6q
zD7XX(Tr#KPOjvZ&F{PJqd0Zjq;x@ox+RE+Vb_q-H0j>$YU<-GOJIi%&om@B9%So^~
zMakoEUP_j$<R)}cIy}TB@;t1UF8Kx+`BwQ3`7Ze$`9Appd6WDIwy{(4v+@pDXSaM|
zFOF3*MU)~=k)TLcs1zneiXvT+saT@OQxqy(Fxbs7*8hW^uKB85RPM3CH^V<}sm@h9
z)y3*kb(y+CU9H}!-mTsX6KqsBV;^f(w_z9K)m`cyHKUPYQpafGHAxyyqtn<lX&MJk
za=98O4kx9WG8|8;H9IxCHG4Jnnnq2t=D4O+)23<1-qxk*(J)%6HWHpvyf#V8X>}er
zN@-e$Hb<MQb>awE3M;SBR%>^{z1XX*hr86QJ+5ulwrSgOFznLyXc?VU7paTU#p{xE
zoKA<ikfw8BCgkdzm<OeBt15KWx}BH;dv*1g0JPJ!>e_VeF#axG4@_UGkA&gJ>yu#i
z9-ZC>t9R&gVDnCWv2e7kfW7b3?}oM4>l<O~$8p4L)3@t+eHV^Xj6rIMG{oSTnPlJ$
zI{0d71_%5#rari$>@@8%?KbU&(^+piU}`iq!Sy_1Iu7@{6$i{V(^>d99i~W292`)U
z$6|v$=fIkaVapY;<lV62Mp$txY?!xnTNv1H46K)f?WVzUb78loII~p4X7|Ekn_;hQ
zu+}cJRcxKC@CASrrWy}J)xk_1Fj6N>v<wEi6XsbD<2(-2Y=>bE?}1%L%A0j9=%Nl?
z&9(<cQ#|ch=N<6CdQ36q1hdVYZg#@$*kj&nK7zf3H%p;;8dP>d;k}mQP*ZA6!sK#T
zi!q^gVk!ld>LG<%SvnvyF;FE3?&)T3H`k1+8F@VBfK5*SFLZRTPzL+2zznFC?`ZhY
ID@qmpf3^A9pa1{>

delta 56093
zcmeHweOQ#$*7tqSfQ&i{Dj+H<Dkv)Ie1CuOC0J+}1X!q`U?ZX+4u&NToI!&|iy8|n
z>yhx3ovbJ=$fz*PkFuh&qB6s@x(zyq%9M(n_qXmDkUHmip7-VAdjEoR*8c6a_TFo+
zz4rIMr^~Xler44osts)W{l-}P<(>s|>K}{V(o5@C-LvHo&s(<q4t(wXeOsR9@$oHh
z@L0R$W8k|N_icHB$N%E-=C!M>q=Wg(UvfOdB(c8C$oCU&=}l^91nfvw%rKv@;~1vZ
zBc4AN7y#;WO=PFC?JgZV-*@*50w&T`E(jGj3z##=k=(;Fn)|0s3hr7cU@V@Jp0HlP
z>}KzuHqq72id1oDSdX$#fIj`RF-#c4WX&(i$+l%POrQ;EUC6T;__=F}VB*RsHKK(A
zh6x5H1N;417^ZptT2IlXBanbf8MK30^Kbbq^NVux3sy7C%Rxwo^}N0!!Jz`dMpkl~
z{w}2<<<c|?q7d-1|0a0ACrEnK?RM{4?B)&w<;~7-mbdg}zQx(Ov}$pWN0t|8ZCxJ2
zaI;($J`aqkk74S<o&x(b?5MMBk8#cNRflA3hqwemvRj!9a4VA#a{FDIeN)*8*E_y9
z2^)9CxF!$F74IGo)2y1Y7KUp-bnmd2*$IU)Op7~049)I{0EBrWYZq0x#I;-$V(*^J
zFv}d<q8Qt7_vR?>l806Btb+U8!y0(jz`f&PEj(-CnmlYA&&F}jde}ssP2?UYwtQRW
zzMg1alF98PN!}>#n@Ln>ZV%&J;<_vv6IvX@*!-4q*_3=DzULY|#-+o>d1CH4*R<h*
z_SeFYqBmYFLdL*yH%BOTn3-a~bS`0Pzfx0BDvcXXin#3<e{cFQ&SDX&JRb^C3&q9U
z$9x=}P|LtKj&8SYRvJ>aUnw4)%q_$?a(3}4%kr3H?iTQ_!^0yL<Ub`9aCURo&^gYo
zWhjwgZ4$yzmiq^kLIjyjp61EOU2z4B2&RILB|h$|VI9NMI33k3zmo0_5<$Mwd3;3+
z`Xgio`Xh4;G!%<YEQH5gw~h!O4YxR+Y=n9S4TyE^91&)pJ`u&MN_(8~;;M!D+nb9;
zwd54U-JA0{ZGQ*SIOfJdXN%mO;BmqR@_P$BkjqE7dob0wFvLj_hr!Rtn_H|412J}X
z2V(e9YnPFKE~I&yX=k|-WcRjnUT-^(Av0RuY@3t^@BfVcTei#{L}Sg<aCx(3MGHss
zTHd^(rO)f|b`nZDMo~TPWtSeAz%Vp5QsiHxX_t$NgSkVq(e~gKRTA#ENK||L=1Wyj
z;w%=!%NMxY(L`82${iGmst^;(zlfDLCnx8*-NmA+VaX}(peUqN#rGg_j628&OfqMJ
z(q;1Ij%Z<XGWV0KXXJ$Op5haFbBK_`t>R)_QR1*wY@ZE@ybYIj_A}un*#93*_>&Qm
ze>dVD=prLxk~xd(IdSM;O-MmHOxWc*FP=2n@<%k2$GC&Scsp(5tr-xwi+kKb;q@`D
z38TX7NI0FA?p6k3>2L>yfORS}u|zn#;i(XZiLs4>w$X6#GRJYB;(vNlQ_{HKL*P#(
ztuOK~swPtr8o}eWQ}_a3B9$-jFHfE{?j1<n%2ed5noOm*BP6i%s}lD-;*R&+Rg)>*
zaZFq_nHDSkHJiV7GFfKBbaj%-G<Q%SY^)<;ZR&m0dA3o>Tmhw_Gz7I7ebQwfDG6%N
z+!4cc`5%=mxvh8-&%9obS)%tpPQ|5sMkVsN7LJa!H!qA~7~2FZA$M^WRHK#=7#Z#$
z5-sD{ISgYB|0SBSihhb_I4$-S8pXx2R&FMyz`N-96F%WSKOzIug$!S#;~ho@HY1;H
zl9ghd)2mpV$K(m9lSnFWUf$vo`OhCeerE3gUJ*e9xMQ?)R2CNX#r`1~93-B>y`V2G
z2macA%66ejgqgCuC5;r4r<`4B+>@{e9S}}o*<1>r<iq;W6n{zr<Bo-_xb%eUHUAa%
zLo_9^v~UWnUx<WSEwsjzj6|Ix?m<eW1=wLsM2)O}#VNGygl{1+h}{z(@I*f1j#4q>
zzkM$xo}uMb($I2$;%uv0erEd@Hk(@+faXHbUA6pTRcU)w=~>6We5y)aj|7a6A*<HU
z_Pzf1q6D`ZdAfRAQ|0IW<j)GL;@e1->yv<KA%<V!@W?B0_^bR%VvYM90?ThM|1p#8
zyt%LkjhKe%zYN{UUHIMYc9aG%HW8#FN!r3aitWAyiwSxKGeyL`2~`-8%Uf!<qFpWS
zAX=8?r|O8?-t7P2QRmSXM+9+9OJ0sOi<~tUI`T$x3vlRJhS{;aC(kl0FKlHx_lxU=
zF~Ol*k;Tq?UF<mNzI2-aD`puKR2eTn>AEmR<xfn<@m=vdlifjan5dCsjUz8V8N-x@
z<yq1>m8)=Upa5BSj8zDb+B7yKj4xh%)+wkO=8SK<hpAlXlgC*1`7_L}h2muH57*_f
z5doBmMI9IwJ}-fB%^r8NkZ{{LRS;TL6@QV|>?Dl#MAU46?o;Db?2iuwju(VJ>6#EY
zIq(%fYUW68BDTn7EotemQ}G)Dr_eg$9PKz^DL=}HP-((t{n4WHSUG;cKH`|kuLb4T
z%67ysTN_udApbh!MRi#)XgM)UmpY6gG(C!axo0r$Qqt0`J{V7LPzm`L=_!?yEAFNR
zDv|0@(mAKfNTspl_-LjS3tZm?O4*NH6M|-nXyvHGDaVx%l*iuXdNJrj_JC`9u+@GR
zo80N^^<-lxr#Y1tWKWm7u_KSC&Ugqz#9*|s32EFsDZ?yWChWRrSVePj$F2l6&nKDN
z1eapW8fgi`oOj#a_NYiH!%}HV<GRtWWhUdYT^X!xdLEm?#h^fyAF{4)wpr3rpuyAg
zSS}JBCwzoIqhRq#kCHU*OH4^e1lhzmW??lr;Twv!(#%?R8~o<x7hJA^&qXFMu({(R
zZC&`|es{e;eyXtY?ikmP<7dvaVy(B0usFKM7yU@<iqq$<e2935mv`cP&D6GUjd2-6
z43Q{8bNLGDDZkc((7z%Y=~j_n3}X$%LBd*%99*rdHYC{oCZZ{8>fWxR4oOCby#6~z
zFxoT8YC!%pD`ntvdYkFLoz!}S&p>Fw!N9Ot=@C2vL7vc>k8-=j@kuD?JUWlcf6zU*
zD{U#~;<W@~qHV7yEjGWH8$A}iP8Owc4=jKWj)^HlH8Q00)~0cH^@@v2tpQ;yV=Y4s
zX`B#UirgLPd6(Tm8jSDq2?qPF`S5P?uI(bs^ki%>Nodk-&c!_|<?pWG9+#oNaRg7H
zKkhtEQK!9wmwd+i5+iwgbICMEw_w{SN4KzeymJ)J)J(@n57rUmJi5Gvn}rk53I2M(
zt%o9X9hDF0baVzfaxXKE+-^^gBu$(k#~RVGW>@@W%)?9?^YPvC_{%ttcuq%Biex~J
z$gcQKWJ*P2_Dq~dYs=LSszzM|u3SNeV~!qM7Q?)8?A939`H3N-)3<>+of+d|Crzf+
zBW#jv6cq7C9h*9tyU(?9(u8zK(4oUqxK1o(Si^Aa%po&cI);0r^5)xCto+mE<n(E)
zCDOv=;0%jDXIywdRepHDF3{X|OlW?veK2W{0Ap@lX!SG>>kwm`Y9-`sIAA!A`C#Q4
zoyYP!E%&4=bn;}%te<QlD4m=aw2xNT=09-ES2n?`o&$?ZGG)>@nj)uhATFaEmB_*E
zcFmbGOPSx{b~`Owp;+K<z4Qu7TE%lL-p#R&l<@0M?O|9!`<mRB=EROEQ`Py9@El|F
zC5u}5&aN%_&*IcxvFq%V*|eB?a%c348C$R`G%sXopu0H0T`cCCxVJCc&faXUrBj1w
zEtO%uVw}}x^(N3ctgj?Yp$SbH3jenZmg^b(yv<ndj;KH#R6MRP6=^mfYNXp9RNZG-
z;(tgFs_aWn>X%Gy@@fd$$>*ru+LsLPxAXRvT^EVGen6eI71zarvF>)<Cdj&`Ua6~R
zMhLm3x7C%s30UB+Yso9|T#mfC9G!8y;+s5MD03yAV{OCy8{#`Vgn1(}XyDd1Ue^Ni
z+(Emkh7b9gldmWB`d#K31HGMB$eUx~dtoW|k>o<!jkZMaBT0V6GmgyDg6)vYo3~$C
z;s0O!T^>8zX{<%=pgmCFRN@?t{lv;al<xk-Gv2J1iL0ZY>l5|xBI>R_kw`?jsE7JQ
zfxL)%rB7s+@G@#ppUlFGXvp@7Qh5;#*FI4`FUssMs^CR5DEo5l;YB(9MUA}3>Jiy#
zLNV4>UPg1OuO!EdXaM(#dU%nopQw)Jn>Ao~fA>TYS4V@ZuV4}{qQTWCvhtz|dw*Fa
zFQd`Zmu)XEq6ye1YUD-L{Y9sF(VqUIv%F|`e^Dnd+S^~$W9Mc2`pd*4$o~EPMUlMd
zK!1^e7ai;`O5#QJ{Y6>4=x{$#9Sv)1A<3ylQM~)B`D`?wdRw`&t{EJE6sx6}XTbht
zo>Av}*TCMzqjw5Yk9(6wyNmlRYu<H|I?<ax@=q%lCN{tN<zwXYvxnzO`Zzn?v$lf8
z%q{Dc(glDD@OiDh(pnm0-b(tdCa8g)WgLcIG77QB6pu^e?#K2(H#AY+819x!cxG@b
z_(XTmJ}AS5i5uVJb|b?`x7`-*gQ^ceZgry=C*+Wy_EFO2yq4meiVMtmhuw#@jln~K
z1sPMgNNhkju`P3)5S6=2HsYMyb{pNEM?1FpuxOFWlhJWpRDO+5{u;+Q>{rdSUqxQQ
z9mB}qA&Nt0aOR)f?z}17g8+{OfBTLeMx3Q0`4`&IKVXLIaKwU%u%<X9g+Jt{aRR!^
z;Nq;{lmCc_@<Z|)&3N0B%$#u}+4Gsac_m&`;2a!S+={oTHSVDOu%7P)N2!Rh&4Yj8
zY9*QC4myZ5xFjTvYr=uk*~PzdxE`Mwr3k>4dh_Ho?ip;f<;OW35SQtdyika1yyKd?
z_$}v&m2fL+!`AOKBF%Me<`f~})LDj*UIi2XqQJFjmZ~rDG`gg}cGuBaL6KC_xmZ*+
ziXwak)Yoa;eSgh-ZC18mV~i_1GF4C-<7$X(ByQtuDZ&2PFA`+TiB+~!2Z(=!_i`^X
zT;NwYE_69ZUB;TVeMzBhx?@{DV+-Ya&Ou(7k6VH$FO1vbYMnDlP!!|(VNMpoO>=|C
zd9E*G^Zww*<GpIx3Ez;XTqScuEwia)chxqLGd;g5*m;w+YKf0Nh<}hAQ_EMAP+088
zeTKs)?rsZxR_cA}t-==MTq2@$*O|GiLIOuqTWbS6AohT0bc}0BR7h+T9DxI($kBbR
zc<c#ZV(B#G4r)aIbwoM3|M;``Fo}a4-95zt&iF3Afa~d~rGnxZS5K6Lj#~5QO~BFc
z(4u+k*@?_b_~*w?JVjrrD!#~de@@Ne9pq}AAH=S9eLFvFJ{hdwAB;}A@yar+*uVT5
zy|38((zwM}F?jhWm#PG-lYeAUELb)=jPy=~UdQ`xZieg81$UAG<0Z4m0E=XNLLhZ?
ztf(%8&=HC&MqMOD->X`D(ebNqX}}t1+>PQjRX&SDA&&0g_>}TrSv0<M)ID(%LMqD-
zY>II`E}0Nzi(wXae8U^zI7JqUipTMs(PL#Y_s}n}vT8Ct0bP(pktt)Ov&j^LR4IUP
zlT<;cI@~iV4#2s);yHRGY{CJ$NxDl2pSzM|GRjyciy(MPwus;-SqdphkhjxWd$vL|
zQvzc>=T`4jtRsnMAyoDyy6#qNiR$BM*`w(f2n+b^c>Tqa_0k!B$r{$ExDzjq%1Ci}
zuRsDpmNG+765~3eoIkt7TLE|RBff-ho;U;LLvb3Wse~sZzJBg4*BDjtL<@SxbCRz`
zOPORxr>OV%zE5>w0og#$Z*}y5g9+iA7m~4}1$TUUqRkUqv#4WzE_x?auIokhyeXD8
zx0{-~GzBN65NPOqal~s59&i3o7tjkOua;losiC`W^r;dq^)?!rc53E^-$}ls#Ht+{
z#PK5OxE5LTEnSQiy?%9lp_y=<>+iZepE}CLE&2g{&ASjL^)ESFJ9Ql8;Js1axf`wL
zZ!NcJZ>9OrpuK}l(D3P8<$Gvga}Fww>#dk+;fi=^R&UQ5kW^=(fHKu3B3Ry%7hr8(
z8RII}hs~sPw~`O}r`J3|N6?9Q5tVQ&`H;k}SM@X<U7zTqh5Jx)qCpepc$Z&QO53)(
zi-YOiViiXHiTI18s}_N)$Y5AVmDW<wF%e0mua<%iF*vfNmIAzdS1oVHhDY5}6@S*z
zjfa3Q&~mK&etAM}oa?e7UZ%mrf%Ai^TTa&+5ECBNx0Z&ldt2;G$aLh0L`)1)oTDgH
z$2sp?ook&jUm*Fz^_fvFEQ8hlraWEdJFvQ|ieBOUEB3=;??;~Ex#$HH=5C>_wrXJ}
zO|uH5d&E|J);MHRleSKiGAapHGsO$?SnJ9Jy2+h{;jp^cuZ~hWg1OHymwR`s#b$}U
z&y(20u!cE)7uZTLdHLDIJ0#DoBv*LfzkBnf_U56qj)nbux=)X!HxKXT>HRyiFOOK<
zn~6F`WLY^r84CF^hWUW+a=lf%wpkVgqcK)`LQ8oy5zBlEUO$aK==>IobVLpW=qOwn
zT-Pj7Q;`$NdAL5fKY4|e+^yAj<6|5>@1I7VMbVGU&V=~tLa`XWEKYS8UP}cJ)%5+f
zI3k7n0n@a~homlXOsJ4>Zj8>qbxGefp2q(nTa1WhH@iw>XWGMX;!4X4TtTCwk-?(e
zF-(8F)X#amj#B7_?CY=H?z&Jg{FC^UJfE~GC8cqv{*>UGHxbK*{KHqWFp;ao0>{y?
zT{rub|4PTzQomYK(81A?!e!zOsLV*O%8X6v%e{{m8Ml(sISVv8A}ET-$VO5`Gmp*#
zyjJ~>Mbpc#HDmovj|-<6R2sK{Mmg_#Y?z)*M~kK!DbKOE>6C|yn@e*&D1N=K9j3_N
z!pjKqrZ^|lcu7mgBdPofzM`d7#kadZ!p>gGahpGZ8{<2*$}G6o`%*=Hm}V_Om$`#l
zQ4$|i+{H6JABr5^>HgdE&?jks&Ea2>0<pCk#nN#Rr`sXT1v7-n?vJc{NnS^_@WX(Y
z9pq&>f08xxGHZWXT_HF;g9KX7^2&ML&{>xcxwDkRw++5B+azcFMdb5Kb}tNSgGgK4
zX`7tqlX2-aEDFQ~xcPQ^W=ozB`zo(uGPzcN!giC^NLASqpxmuQ)OGwu%OtW7iQy1J
z`DAyivkT{ipjz1InwAtSM4Mc?q=aa=yjbva%cVGcp+H|cZxPpaL3Z^+Y*aH^XDk+v
z{Gcj`B-p=p{*!hyJl+R$8UMn<am5!2t~Zk+?V)eF-Ske!zu9A`PUb#-n?F;}#R}v(
zQ-?IW-Fz|*){8~kPZhJ#OYzy(9ds7W;7165hq%^$M?p&Zbv?U0dM*GwdRpJ;(?gHZ
z&i6Wm@`<PV>GE`Zp7=Jc@4g3OT#m)IAkXo-9Thp-wtrE)!jmFiqMQg(GPjD3`}pen
zHRt~J`bu`&k{7)N$wKEzm^{Z}B$d-|GTTe(u$9dDeM(!~!T`N*fLHPh#wH|zdfz~=
z<bp>MqW2B)N<Q#NLiN6(UdbCCNtoU@%qw};BMI00hI=KCdL$aXucj1_id21-M`Y0Z
z8oZhA@JK9rUyE0g>ygCiedD~66ptiP@0;kAM0+Gjdfz0R-Sx?O9N`a@B)=`7qkLS-
z4>x%8sXyc!=cD)a@k$~*5|Q3l<dux`NF;h+iC4mUBnrK+0_yHc)sXg=QlDq1Aw$)$
zTGCau(5IF(R4o+MQn{*yl3L_Gv^_1*XB4he&b#U3ma%MO%%LZ54G=B&dkQ_3n6Dqj
zpX>v?H;BYSe9&k)ff3S;A=|>8e-gP`oiX`(-#_KMf$}|ye9kg@#crw1K$ZM~X(@jI
z8Mk^htxYAXw-I!&{(<1ToKNU%`9`juxSm`)El`*bcwY>`J2oB}gDxWBzBR?dgBT@#
zw@)MQ&AB~8cpB{X+am)!<B&r!97_b;PS<O<*OSTu>no&k&e}~vW_*lm-&&<GY)Xu)
zZSBn9@G11Ld>cPOYU%NVzka)J&6kec0n3+R0$oPY<UFI$2Suy%R@$Kg`n_jN?`JjK
z@Ub8)$FD4CmeRWluZOYu(^t3N7g9VWVClt1e`&LYsiP8j;ClhPeFVilv$S^a^}-=k
zI26tA{~+Pou#W%C&6|vb+I@Wm`fBpLTXN%ZAf^`d@E(C>&{j+DG#=yrqS#yLYuwzZ
zP*-oET6)ImE#Y|;oz6e32Km7DWKM+w{1-)PI=%1kd0h+gCyYh9$Fuat;}PGQucN+r
zDE~3O7gg(L(#&T4|Moeqnf^y2(6xv1*FVjsYPRAq)PD5@{p_PS=;$SZ<7=VgS5eWN
zpa1R-e~?nEohOb^s^gMRk=W5L6rOa4Z%?`l#lm+bQabQ7*y>CKm7s&JwFQx^#`Qo!
z1j5FG$>F4C*CJ*CAQTh`5`%m|Jt%M))am-QV9_|r=ya3TWKbrk&~-~;uKr9Z$*+Ef
zswxnLu3n>l^%ymmjA}!E<e<#e9O7a`6s}(i!`L&f;El5pS~g0<DH#eXDHmuDXdmbR
zs2<b^ddKzP#zo^im0kqj3F-j_9QtwN-`Vk(;pR=D&ZCa=Q;&B09z8$I>9f5IpAMhh
z^s%rVht}=3G$E4}<2q-XF^bMG{<~<9<8a~{T^vT=L?y*b2^__X*psezi*x*2%l~wA
za#=PP&m<Y{+DpaGo5k9tqGI1hZ0Af%%F<V}(3Kq{JrUOzo2LpRQQ04xrwpgcN>iw!
z@H--=(+S7vQ}1XF!#Jb&CjNNjR@8Ii=P~#w+qydzgd!o2t=szmPDY<%Kg3On`}rWo
znh_$`%XcX4Wz|^7Ms1}qsD#4FHVT!66dLmpuGty1hlf!+d6;bF!8eD8h%6rFWdioz
z=UWYM#xr#lJo?mC0(arF?r!3@pT&dB3Ex&sR2KiMMPde1w-f9M-!g>5eHeV*tl*RB
z7X;Pb7)~1Nnwb-2yTRjli6bTP&QWwOtV;6j27k02JbmFJ-ddu@o?jyQ|J%5WXdE@^
zdW+`$Z%v|`b|tlnxA^fb;s1XvZvQunNA|PWm2~H<vD5`#cYAGhZND>O@g9gf&XUy(
z_c^rCw-bDz!Q*9X7DN~Jx!teR59_o_eovc&pNakaG=?R8PD`_rjGI*tepwyjuB&%W
z9)1!-w572TqjK_Hb>SB^s8-<luEO1g9~d}}1(u(nH>@>UwJkW^%4uR;7w_63JPTaE
z^(0&AQk5>IeQJBDfuN-{ik8S9OJ&4GY@3d|O5(P=gl))wd|S3ZZ7;jFOR((9jTlBQ
zmHi#$KO`*kXN85Ba6#o#dhkgskEX{wXSr!)A~o#X!}yB${h>4EAuP%N`L}(-J;;5(
zV>T;vz3SK;yb=3Rv6y@92JCzMPlQhR?nj#vDkK^VPPSO?-kk4g|Gn7van9rq2hQ>e
z4C^{N5ID<ex35dZD&UN^)!|*(^?b#>QD@)~w{j=^jZfoCv0~$A1$ZZIBnf`0gA2*7
zgz&2P%hpyjaP4-B5CvQZw@(x{VJe*3-auaX+l~_9Nr?a0VGyF6%W$`B9Fm!BA30yr
z&<1o|K*bN0-(AU4{*hG^*oO~ARQ<pT6HzS4DW$_gqVq{%B})J3Tukq!mYwUu(4GHt
z3*OmzO@06!%s4tj@q@hB;gieIzPu6K*01PM!M`F2Ju5#|HJLKWoBf}_lMr&adUnmD
zD(Bw2DiC`b<9MH~GE$^tn3K>|dv6G(zj|+ICNCDqo39?FAOG4GJKpm-;Y;dh!Kfl_
z3?(rln~cW*+Z@MxB2O0D06kd_gxSV9-V5`HeS5_&`F&BsF7)%N`{vP)Z0^5r68{^S
z=kJ>|9a{r)B~FNk|HyQ1d5Y(_BgUqUD+WGV&^^8LzD!|2c8u%BY6X=puMXi$@2#G}
z<D1px<n8$TH}dQ&_m__GC}n-h{3~QIj=RH^@W4DVN!<z-uM{ufP4R(H0pRfm3iwh1
zE*ihCFYznGzHCVl1no}wv%t3db0y`dj2oeF`aDL_6LA6!{Lfq(kI-`)b#`AG0mivK
z@*7NUF~0#i9(?0ZstzrAXgK>PiEDMu_hiLQdlLREe`3!}lE1wtgCP821#k0h4?ioE
zz%L^oDYhF6{)bcW)%%cW@xcd~|0&l8d@fN7_v42DLDw_L<+~59ZQ)K`&o{E)(c3lR
z(fQONx<|i(wTGtdEfI|G_J^Bx(BofOAjS3|#stS4s((CU82j#_PxkwbwSRip!u;H5
zVKlE<m<>lQOe*+H@U=}A=F?^i^V%T`bF9I_3<qxk9|3+I_@Y;FK6%5!-20-1dFW*e
z^TiPhGY0(C7jWQt$-=~ezw>nqlLg)i{@GV7%mvhOs@}q!M;*c7Z+y*iD6>K8%dS0i
zZ&Sw@`#YyCjPL^sv+ZLGGx2i^V*p>;YGJ-QgQM1G7Ur>cEX=w0VF&omHVbp@Z3|-t
zU;B}TSqy$9_<iqL7y<a_KD98-rz{K$e)WeI#vlAN@Y(NLm=@Gg_=$xnKWSk;L>=Ma
zTR*lO>V9t&zl8X;zR0Gyj<nv!ZgK^GkP(Chj(<JDn=gJ;`%(!`B%J<`<Aad`|K22g
z8Y;!_@m!C6^i1H^h4c%Zr+%ZA61NzOwPIn6EAvb~{W=9FWk+WSe---p%)i)QU2lCX
z7w(IVasB?WRM>^}YQZNPXte#;C!OPab@SDxb2DTZAZ=Wz>xoabl;z7$GomV`kaf9V
z-iMCmmT$yr$O*5uEt}Mw$6Bm~7c6*Qu16Ao*>GjIO&y0Rj*B4fgq}}YY3bZtm#b~D
zc<VRFyAT&G+)7-XZPVH3UH+db{WpF`Dh~9iNck*o(qGIPFTs!e_s|`Z6T4pgOhu-B
z|JjWM>d&7e`0n$+|FzKc`F(}PyPo=jFZ9(H=e(|y{p59>o_E~`XQz>x%V#6w;6bcV
zKKRw}$GA@+`rZos2IR2Eu{f8=o7d<ocf$X+QI2MTqdTzZU-D*4-<@qK+PlJ)^<@O<
zyZg&$=|@K1cQLW2bK^N@d;p%tI=Kz5<a3kQn_c<mlp{-^F+DNaN*#6ZoMog9d<uB#
zu&d{m(vo(=S2y>5R=F3xpx^%Bmst!_UiivL)pdP!udoE`MtS=LejRzNJz*}bV~%FA
z@B~i7`_Ks3%KfUt7X}O_>@(@{@Iy;ph&37I{Lfp0DF1f1f5q#l0Y5eMtc0$d^O<8&
z_(}qn^J%*h#89>$6@GYrE>-lv*AjkZcKFgAA<SDQ`fD|B#a&;=NAY)u415dgI_may
zd^`2%x5J#`*Ql;5aigKr=rD|S+tdytekkVZ{5otn29eW;GPr^+Eb?4Eru_TWqxix+
z;T{<N_X`rSr{97+JInbC)`_IOGL!Xj`aDR{vK%(N%oBI5z8D?nk%@e+rEzq|SjK;c
zSS(6O-+rnjQ$LEgIMw-?^Vg%l`#Qdxdh|QoclcewVZi5ooySz};tY?el;R5fM#nGj
z!lNGl9J=+J*95{w7&wvBnn|O-<9^iTJb?=o-bS5YIW9~+dck{<V*Fo^3-NKZv$n!?
z?kar<q(dPd7`^wb2e`&rp6%1$vlk_EAK!>v9UnQ5lS^?YgI7IwvW=IXq5(1Sd;UkS
z$=}Bc+hD~*-zUXlyQgoTBe?S6xSUn<tQ*hMZa1E%@n06~THLcVkKMIcv=nU!<Lk41
zy#_OZPO|tM(+xcl9f}FOf9`UggLj<Q`uoRK)Zy8__jKIB_y5>Vy?5Im;pv=ZouQO)
zsHF2vp%5n#m$Azw#9{EzM_u_WABp+tTQ(P_`8W6^c7*PV?@5M9@gju$GYxaKZBp}$
z_#Xd;X6ItDf5WL6NBQ3<d0T`3Xr=oay~zfJ!+spU@t+9@@qa5^Z?W#f#KM2efRsAq
zKeX%eQQy%RJN(O|=f7TXW&RfB9Zust7VrM;)?gTjId}cRrP$B94_{1ue!sz+gz4yg
zi}55K%K7~}0qvKIuia0FvG!|s5x3k;XU+$M+4l&(Wk0jmLo;1?Mp0yWOJlM$8h1Pc
zUg%xAXr_)n-ktEZAPm=2LL6e-_*f!hLIbiS@mZ1)Zsg0RB4*&TWFWR5T2A<8B6c5S
zp6_k=mFTvS$*`E~sWAyy`jFgkyMRp>QluH%r7t2ij|H$cVRLCrL*N+v*JmS}&?U|{
z=lYVwG?x84JMtJL(axg}JHDP;V-8}2YUT}RgKH)PvY7#m)PFmlpy57>vei5@oDHp6
z;LjFBfRleAR8o%EOAk^G<!QvrbGxwZ*@Zvy1!|N*Y?TltYmNr7fx?rBv<0ydHO>Gw
zz-wDE+)Ac}u_I8NENk;v_9QR(kA^MxN7i1u&LZAt*M^xF!IELEz+S^h*3U4MAj9rK
z&16>)A<R;eMgGC;yM*V0*=4{6?RfTW!R+4kKgGFRbcBO8Y~y0b_gPiTTWOsQ!7+Br
zh$D&in1-l{?7igwnx@I@c6NM2%oH|DI2k37n?3@L>dVSpJqMX~d0@@`N_JAi=hN85
z!g<O$(ahbT^PsVFqnS;hD<J=<Xr>613OWpW8nmq;HG&=PD?GL^rY36+n<_l~Kuk@;
z95$Nl{%sEX5gk%$-j8Cx^kPWE@p<g)G@Pk<;Wht|uo_}6O4#Sv?;2cEwp~bX=3Y}r
z_nF#ow~{Rs&f8&(X1)R?n4+1TpsS#5=4j>-kl6yGfsX;VgVGzqwd_yB^!$t~9*3z>
zTe8Ws7)1Et@Qrl{(@@0K;AcF`mho?Z4&$FQ*DSTLQ$swmNl@VUbvVAgTks*DRMk{k
z*vVQ?>X`oO9qnPg$^&tE|BLe8tTpFQeDq%`93IV1{m&`|y(-`Tca^=3tI0-jugbn^
z?v3uNrcd??WdFGuwzrxBTz~&!d2clh{xR%&eu=HI$D-His`%eNlBTuYjQ{@WzkMX7
z)c^QM+PM_P!gq0$4$LR)fh-b{q?@B%>k4KHQP{P+U<Qvz3MS1YWq6%@AJ7Ck3_1wf
z584ab4cdt!zZWC~QC6p$w0D7eKmjSyuJppyMq4VS@Xw{;i2gUvr6no6d6c=j67hqe
zGth9gaEcH@SIEX$tgWU!ftAjrJkSyekbpEG3n&qk49WnlLy4M=3FAC<S3t58v=>z0
zaDO6O%+dzeF#HDgIhs2SuOzW|vtuyXJj?K65$^Le^_DJSC-ZY~;}X{T*KzRO61I)z
z+R>ZXSB0lBW9`Xo@Yplh$2vyo-z<*6$E|7BvyeZP%uW$rMC4>LtHR3k&{Fm`0T!0!
zOWA*p;0;-l$zJi{V+q-;Lcqtit!731KHN~XhJA>a)l6TDr3jYS99qjh!$-37*lBzO
z-(uB#<c&ObqM9BO*sa6;pIeR(zdapzXW-8X_>V1D;Z^9R`fGSm_2jt?XNUt9+YJum
z$+DQ5fOV`y0E}73ns{xFb!_I$vKZ!z<)TNg92MCD9{sBQi+S~se7U^)Q7~7I2G~Y?
zF|P@QeDhfq{leKDfmZ~CB0i)o+w<Svx+Cc8jyr-rvbrPasn;D5#)sj27|DlGd??{V
z1s`ho(7=cEa_o+v+kiVFkq?vjFqsci>+O_~!84hBn8k-Vd}!svd_FAXLmMBK@ZnZI
zEaSrpKCI-!oqSl$hr9W34<GL3!+m_XpAQez$JkN*L7u7S!^3>o$cIgQ*vyB=`0ykj
zp60_=K0L#RZG3o^58L_hA|G;m*vW@oe0Z4;yZNvOq1_#Un+J@b?=<KFKBRLVx_}Sy
z7K3;QABOTFohIE8bS860(D|$HztkQ`#6FNhgIH#${y}u+08|b6e*jJ!#3DofA4F#k
zK-G}{2jH|pEHdQ(L3HK-R1Num08SgkB18TkL}v~_)sX)O;Iu(3GUWe3bmjn54f%fn
zP8-A`L;fE`XAVHskpBnZv_UK~<o`i*<^WU;`F{XT8^j_*{vSkV4nWnA{|Df-K`b)l
z|3P%-08|b6e*jJ!#3DofA4F#kK-G}{2jH|pEHdQ(L3HK-R1Num08SgkB18TkL}v~_
z)sX)O;Iu(3GUWe3bmjn54f%fnP8-A`L;fE`XAVHskpBnZv_UK~<o`i*<^WU;`F{XT
z8^j_*{vSkV4nWnA{|Df-K`b)l|3P%-08|b6e*jJ!#3DofA4F#kK-G}{2jH|pEHdQ(
zL3HK-R1Num08SgkB18TkL}v~_)sX)O;Iu(3GUWe3bmjn54f%fnP8-A`L;fE`XAVHs
zkpBnZv_UK~<o`i*<^WU;`F{XT8^j_*{vSkV4nWnA{|Df-K`b)l|3P%-08|b6e*jJ!
z#3DofA4F#kK-G}{2jH|pEHdQ(L3HK-R1Num08SgkB18TkL}v~_)sX)O;Iu(3GUWe3
zbmjn54f%fnP8-A`L;fE`XAVHskpBnZv_UK~<o`i*<^WU;`F{XT8^j_*{vSkV4nWnA
z{|Df-K`b)l|3P%-08|b6e*jJ!#3DofA4F#kK-G}{2jH|pEHdQ(L3HK-R1Num08Sgk
zB18TkL}&irslq>$dHt{9^$*$Uf`;Y~*)V~tTCl}GMU8)XI@ZEGaFc~O{IKOvxxkkd
zZW(K7c;Q2K0cH8z?}Z%}W)Tz3C$YXY*#+!4j{*ugRzC9mgj=Wv0VoBeNEa}QXyO>A
zH%*LqAgB^)mC=HlrwZ6_>|xiU84-kKK>V|^OdD_nP~yhF&Ic?3jsdm;$C8|hVaAah
z7(#O31fYeDVI~5rfs=rYAchGA6auFJn}AbEF2p}G3^)m#20IyvPYg31g0NvRPzvjS
zTYv|EcLI+Adw}gg{6n`4H!P;Xypg@+?;kQ6|E?-X0y2z_X;7E5KZxv>h!}?2oe0D~
zWyvH1eSjH2U!WB@3|I*C1C{`J2LQ<dl|XVpHE<+w50E^!4>$^V05}?05A+8%0?AR$
zKsd_IoJ52?)e0m}wE@ZP?Z6-)2Mh*w0m*~iK=L3n)8j!AkUSUwBoBrFCj-NP<jF`N
zc~Sxl18RWqBx3<iNBvA9A~PUJ28IJOfDynf;7p(uI15+^oDD1i&H<JI=K?E%QNU{8
zJm4PS0^mNN3U~mh2G#>Lz($}J*bFqGe&!@1W?(DO0&D|D1KWYIKn@rO>;f(Xb^{ZD
z%&Zs;a)uEBZvX}WZv=(_mjJ_nHvuDo$v_D(4X6R811&)Nt%xKdk_${G0Wbr|Vi05k
z1wboM2($rxfEYcDFR%>!FyKz0A8-#)1l$iC4(u6)F#)UxF9tRNM*&X)M+48y!awfk
zk4QTN0l-e+7$6PRu|OKC<AB|e2LgQ}F(!Zkz+hk~Fa#J0oB&h+Cju?NNx&pvC@=#!
z8JGi{0xSeh1#XSRKW-R?NF@Z*fV+Xyf%||nfCqu$z(!yM@EC9=uoXB9cosMt$N}d7
zF9YWSnb{Z<KrwJ0FdVo5C;`fV2A~3%Xh%eeNGebT%mS)``9KY@1gHg80Cm7>pdPpv
zXaXJpnt_Lb7GN_l8h9EQ3v2_%0lR?lz#ia2plA-p5YQfo$Rb3-fHwf6fHwj)z$9QC
za0xIOcoQ%am<+T6ZwA_cX}~gII&dfOR^VP>F7P059k2<Q4?GPl2etuO5r*4EM0~KK
z(9+}w>;gX=*aI8^6wSrp0R{pCfMLLKz$joKPy-AG#sMb+lYx_gR$vs+2Gj$00!_d@
zz*yjZ;5re8dlMoomaNl2A7C5M4|owc9M}aM0qg;afubl>1Plb~fl<J9Kn;+^(v}GH
z0j2`|fLXxdz<l5cU<ptRtN>25BT|is9=I3CV(B{w^Z_;k{eZ`S!-1{95x}!RF^~gJ
z1YQQ}f#P{6k0mk`I2;%W9061S#Xt*iA}|T42j&3nEY?gLBEx}Yz!AWmz=^;;Ks~S?
z$cABX0fz%m14jVcC>?l_(!*g0r30DyNC%36dSD2Ujf6qK;XnnY11<A0|Jf*5L<ztQ
zN&w~nSqbJT$$?u*4y+`(0@IY_z<neK9wfO29wQ!jjCcb)Mm+E=@fLWDc;IEiI2gJB
z^M6DlEL{Lhf~ADX7>q!lt(dRCi9idGR>c^Uq4gX#GKr9F1kw&lf2rWxfLXxLf%(7}
zfF;0vzzX1fz-r+8z`ekaX}LXs$cMngz^8%Dz*m5$fyaPtz_Y-MK-v$wfTw{yz&C&*
zNepumNE;AsP=Vkd1x5ka0h1x89WM_2YG5J^W=Rg-PF~1Fq!efcHc$dG(3zkNd_2j)
z`v7->r;TS1@NQrs<i5cD;Ne96Y(Qs)dhkyGn}GKNPXcM9Is-feYzICA#C~Jvce+jp
zoDh^jp&zgZJnd+e;6*@@6#s@jFcA1IFbsGQ7zI29)Bqm<#sQB2lYwsmGl7SJR^TT<
z8?Y5v2D}ltQ;Pm?Mr02J9|5aT;c(!7@E-u{fnNfffS&?S0^bIn0loxm2Ob4>0?z=u
zf%}0zvKZ!BU;ywdU?|W9j0Cm=6~I@47CR#EA(8|<56l350n7oO0~P{518xN#2WFu!
zX8<d~=K|@lL5Gpu;PZhc;AaB&fwxjR8Z-iU5PTA_5$W>@!EXZE_aVbbM2<nQ8`ui`
zJMb*<IUomI47?0{8OX?Em?nS*v%v%ugRdkxcsk{TfaeD_^oqfUgQvp?op$I{Bmr*&
ziojzHvNHw<o`)b2NQaSn6dVOi1%DH;5*Z_bS>W@47VuMn`QWpGbf6p!ECG+1%O5QL
zffe8%1Xcr=0#71e0B|q(n}PMvGmlI?06`H1tq@3ohryQsL(n1}uo?W{C>;vM0O{Cd
z2A+nz3V0Tn0&GKh8IS|N1y~3^0(cqx?LaH|a3G_IVb%b|z<bG6`;agf7y`k1U^vhL
zr1P4Q5d02c7<fI<0KNiv66GYoMDULRQ-OB?vw+3GeBdoWI#G`WmVkc<SOH80Rs-!d
zh|p1c9B?lLyMWzLtO6bYzYWMCJrH;pd>XI|d=Ri1d^+$na1XEzxD&_${|nd+JV1#2
z!9bA`t0<6x{_%i7CFcJ!MCgQS0EQvMc3>3nDWC?p85jrL3k*QP3BY9Vw*fQ2PX}g#
z&jDJ2uLDbfEx<}(J#Y`O5qJRj4zLk;0vIwA^M4{DCn3lL(mH<ucn18Nz)s+6Kp#~M
z^CU17_%JXG6;A>xz&`@C0G9*XA(sP_z~2d^leGny0sbDK2)qVp1z!dXutT6l#0Eh*
zFcJzvfo0%V0Cxg6040!52JQjB64(V^3EU5UD=-m!7_c7vDxe1Z6krqhEFcG70Xzx*
zabP>}E?`O-5@sRN4Z&KVk2;1a00sbsz%by~KnbuKXaLp%6M-)RQ-S{gW&sxh^VKm8
zx9wq#V;7g*hs73j5-Ife+zt!VhWOTvSgM}1FkRkw*i#n9N8^!eGA&G~H(ruuVHDoD
zs^Ob|uya{^kDhj<CH$}sFkFu-`o%N6@o3api1-pLd}KkHS00P_Zp3fJo|TCBUT=I6
z;`<Sgg_P#=;eL7AJaRjqz!*J+sX#d5RDkAF65_p;SrP9wbf-5?2JQF8X)QkHjgvuV
zy>Z@ECQn(?AL5PQfVjql(Mn2h1(WPeAWOG;<77m=H=cm_NpGAMf%bm!Zp0U25v0mO
z%$_(Iq4CDaxJ+-H^p_wWhnk~lwc6uls6ewfP6aM{<5YmLTyH@r;)~!Pl4}rOf>r%y
z#FP5P3%zmLD0U;BjQnI=Gve_mNo6l0ZYPEke4;%GRB@O$PMbxXH%`Xod*gA4@At-O
zgdX$8;}P%l#>ra&F&>>XCKTQ{?R=@;IE=G1HZMbcv&S2!er@!|scX)9<J8J-Z=AXy
zEY?#2t#}r1oR-rZ#8a@al5rJ?ldGx4G^eS%$+&7hzrSbX@8KE5YnDEbhpnjR+1-Bj
z!*R6Qr|{>gV~8ifQDngx#A$?*Z`=9!t<Z|5_si4MFCG%_8O7wbNW>FyQX&fsy!=-F
z6p`95o{2d0+xZF#Jq#bOKs*^HkgN70PEDjk*FnT@g_+dclZfBw9T4sP<URf5feUH7
zO63m&<dWWoX&~>7XDl3bz4h=_!#fRZkbvD?bN&@JU$hmwKn9jG&jaE0hrlrJtKz>)
z4pj>R*a<hM#HHL;s{Hb0{ga71e!J(~)WnV>LRg(sv+)Rf)1jS$Kz4)$XOmX=wza&!
zc059!sA)gK2KrWvu`qjTem%lY^<N!iVXnE|ZfIZ%f-E)eBkY7x9|d0*eqFQpRdy1q
zs#*IgJ5?|*tmd~P?8ISFb1aNxjwjwd+kz`R#iy|1noE?eM^*Fvt6+^avzx$PGI(<S
z8e?I^v7V%NYO0XL`qw;7IkqmUnbX8h5ws`pse&VAHTQt`f2`a>mhpeLms@HUG_e!d
zlA5Kj!SoyMy>8{Qdp)N6Kfdd_@E^M@HTR*w#2>0HUL6ecOuzV%YTjzWZ#($f1f%zQ
zthln*W8;dNWv`>6hyHoJ_{kqU6|q0m)FO?&v*swlyEW$tnrqlM08KU1-e4#BjlLtA
znR7?9w-3UOvyZbcpJdBe`@s?De9&P~Bd7_~3_1on32Ftkf!aZxpl%Q|5@%OX04NX?
z0ty9%fx<zNpeT?8qyT9^22dO*8I%E{{8=M$zOf=w2r2<pfOdlRfcAq9fDVEhL8n2i
z*L7`(w}UuP7pNP=h;bJJ1%Sdp5|9Oy3bKMqKs!OZL3=^_K?lX_&~ikYK*vC*L1#c`
zK^H-tpdL^FrcWs7AmWig1;_$Q2IYWkpi0nQ(0<TCP&0@F(JLQU1mj)hf*G;2yZ8Sa
z0AF*{JM5H+rPuXz`{T3}2tF9NG3Neh6a1*c`r;T=8QbvvJ8XwgP#n|n>HBQ9K>qY5
z3-f<N?GJcr05+`qkiA!6N0swodk@wN8xAi28`-^o$tYKX{Qn)1PHMt$V>}gx`MT&3
z+xOSwmDlV)$%Zskf6Q+5@vcZ1$6vAb<U;~x6{raGH_$_%gP<nRY0&4OE|B1`fC&W6
z0a-vdgI0kGL6x9KK=q&|(1)N-kl-Z&6ADs+l0o^Pdq6LO-UPk<lEBWKL*ysWHIVpa
z0W%R44pM*;L75;MXeVed=m6*l=oqLKbQaVF5;ve6NCt`nC4<(0ia=$c2SAU5>OpUU
z&VVj9%o8wI5E<48OF=TwLQopW3Mv8Z1nmX=qfvknv36Y33xYoHHbTzS3+|s5G~~$t
zS4Y}UvI)7i*n;)zvp3`<t=*6t%_IvTNy^@2i!UlFD8f?doAM`q6s5-&7Zv4hu%%jy
za<g+7X7s<w7C}0Vjo+}kASXAuVC@E5ZV_{hiOcQJFJ<EwmaMfU7Zt3|-L#2WLrNAG
z<P_)UCc>Kh-1rjQJs%?}Wr@zoDWYt+mPHrkE?k?R8(p||Q}mLQMa%~^*+<!+gm-<@
z*XHELTC<B%3l<mW+t$WxvE?#P^eRB%#N6yc96CvoV$1Q=!Z0S{Nnz@i!d%9;=Cz~j
z_#0pMWXdkG6&Ehru%;lLSw#|$eUy1g_Ih}WIq#cZwARMk!>s=I9B9G3nyGKH6UL9A
zM6chxO@@;`sksS>%2|{czhU#*qJj<U;n);g(b^5SZ=&|I3yX4d|5MW6Y94!&o#Y!!
z{ZsQew6KXapT5b?vcJs4qFZda@tdIuDsJ{o$<NI#WQPB{1x&VYYHrc`wHuJVw;z~I
zzNz_}qVw|$Rx?a7#bH}oc7Aa#vza(l=izL`k#)>Zq&ao%>UFUN#T$HV40C%;c?&yk
z-g&PjDY@&j3#|o3xv(YzqXMHL7G7JsIy*llceQP8!3KtTqULA|J3cpqU6^gl&W|oE
zjK|oBX5>QZG`<$5NVsSds$QEDOQS0L_FQH^4d3Y4WQN&6CfpRinBs@{L7hz9#@voF
zak;CCZ@)daC>a9?E@9T!7>}`&>@7ZMN^HS~O$GV6v9_Xo&v+<e9_L-2w02cdcF`8*
zDPdAUHs%J;@k8=ipPOy@w`{m$?S>rYH{ngiw!&f?UxlaF(8dM4(H<A7`1sAa9_}qN
z)059m!ws1=e1?>@cjhw8I7*H~XDlozTAxjwch%$C_17gX2J^KryC}D>x`%{Gv5T_{
z`}kBUlT-lj=6Z%MW9#*tN6PddhuwC|rre^q+%?&lD@n17a34rO-xRM;*|N!&yWW$N
zhTsG2!u;Y*RvPp%#cS5&7GV;e^{g8n-@)bo?8jq{pefU7T=tt~e~`CHhj%%MpI(Cd
zif8tCJ0TtSuh{&8P2TyBE#m51>~I0j>Wf+WjuL|7Q=z0vvRm?q<e=nb$r}<ssaP5;
zO_tsw&6KW^o|2xCelG2lUXi+`V%eQCr2;Q$ibaZ3iYtowN|n;Cd|3Ie@*`!Ba=dDR
zO0T*>RiJu7B~Xji^VN&gtJIs+ht%(?KUWXa1ZZ+K<(hjnuWH`X{8Mv9<JQ<k+BsUe
zR;x8@muvI1McR9{k7)O4pV7XeeN%f{dscg1`>pn>wnsZkH&!=Q7p_z4?$SAQ59w-k
zPwHOQy{S8)`%rgQ_mj?`U#GY0ry5j-Si?<*m4;lyX2XMqXACbHUNsyyTr&LA@SDMH
zm}1N_-frAr+-lrzY&5=W{K#$`X&P@zH*GdOV>)gcX&z$^F|RlO-P~wq!f<znXNF6(
zl4TOR<UYw>$qSNKB}XMEB=1V3QkC>u*)OsvxkkQKUMb%#uaiF~Z<h~OgesyG>l6;f
zF2&P|dd16%V~V#GUn#m2Jqn?6gfc?8K)GC5sC+~Dtx}*Gp|bnKVQH#Ws*S2URd=gi
zR=uuzN7bt8P<5&N)v@Ze>QZ&3x>o(VIz!W;`B^hU8woFcs{LBaY5jGnx?6QCb!&B-
zbPwtt)g9EmsB6)k(p}bF)A{J<>ot0l{uX_<{&xM}^p*NL{nPqa^hfn4^q=V4_224$
z(Yy7&hVgd86hpW{YET;z4NDBm4XX{MhAP8-hB`xo;eEr$hOZ3Y8@dc5jgySw#wcT)
zG2OV_xZ0R!++-{<ZZqyMI*s=mA2vR2+;2Q+e8JdYe8YIm__pyw<EO?ijo%o*H+C7j
zjei)0rje!qQ?O}@X@+UGNorD=45m2KBGXMKd%7vpw91rcDliqBN=<(=-EFEiJ!pE=
zRA)M1dd~E+=~dHF(<#$?rZc9qrt_xnO#d=nHvMK2nElM7%t7Xf<}h=ld7fEr)|)Nn
zh2|yZ6!S83wt0;?-)u8)G26{|n|GQYFh64cFLdg&<`>Nk=4SI-=6B2=nLjgMFkiBp
z=_&XuPCY`2za&U9M<S8jAW4?2lWdXPExA|nsARw7Imuzkdy)>xj}nn|l2j^<lin&_
zEiI7VDXo=$DE(d<BAYCWk)_L)%Qnfj$egkxviD^jGO_$dd6xWPjEdv(_vIb(C5ruu
z70UaS@hYdPR`t2cpuR=DMs0rtz1yHZs-CHtr%`D1nlwz20?k&<4$VWFOPWbqmG(*9
zYr0=_BlIEqY5G#Vk71r60VDiY!$yPC@TB26LzCgCq1ABBFdJk0A>-3Vt!ce!x9KaB
z!i-i~aR0#|4wfvHq)S#yHb`ueF3A-MD;*}CAYCkd$}as>x><IYY?tgA*&DLAWJU5j
zFrHoVv+@hD`8GwCVy&WBai`)*#X-f3iZ>M}6&DrXD}KRf4p0UwHOfTge&rPP0<}_o
zgZd_Qs(Kmb`*yWceZP9I`U&*`b&J|hbE{^h=5bA(=Ah;U%@NHTn$w!unDR@s8QK$C
zdylqMw_Udv^Z9e#82u!)ajsscU$5V!uh3WN@7LGp59puOzodUve_H>6{xkho`Yw#!
zbj<H{hP{TT4E2VKh7Q97<1C}f_>u8~@mFJr=@wJADc|fue;+ZQLQj8VzHI)Tynz+<
z06;96D2b5FljtOIc1a%k_yA`1#}a{bqpD0*r#h@UqB^enM0FO^@F%QeBK2r>usT#d
zT|GxFQLEG@b%J_{I!(P&y-{7IzE8bJ{Ww~BNPR^;S~E*isHs5LJ+HZ-5om+8bG0{U
zv$ciV@3cQ?f7MEKiMl^@LcPB}S-(ne)9=vR_vrtjZ_>YqIr$U%HrNn}mPyS_7T(fe
zX_!PNiIb#Btdb`rDY9E-t7LhyB3Y%%rF!1ffCcMQ^z$z!#+-{bGKKh}fHG$!2c?&!
zd9tmt%d+wEBKc!jnlcsRmAjO0DgUYbUCFA9s#4Wgs$W#B+Ru(w%~H=(%h9ShEH?M3
ztJMe9uc+TppI3LNe^d)K!J1HwS`)2F&^R<Rv^Q#(X?^uG^ag#b{zkMSRiB|R*FU9y
z3*FPIKcjEcpG8wH>N&m4pfMN>7DJrjA6SH24WAm$8rsoWzQzT{Y@^kfZ!9$0jP??&
z#bw3{<9o(4#x~<wW4mz&W-FRehRYwINb(LQUXNsobfdIMI!!iD_IC`_ld|t+Bjsb|
zTKPguvt{xsc{N)3h5WqyJNXatpXI;G8HJx>v?2)0@eIWrg-oHtmbFBYuE<oZQsgT(
zD|RRz!k~WMuJ~HPDgLRrtoTjADE*Y9l|jl-<qYK<rBta^Mk^O7Q<ck<E0t@No0L10
zPUVZrM&;|uW6HOct;$c8=ak<lJC#2xe}hwptH!7%sKQmVu?K2Zt5rp+M=;M1V?jTr
zYExZQ`KU*z>(w8qZ`BlO>$T^!V|9mg<Ms9#7|#Nu63gi;#xG3UsO8o800Rh<C?snn
z8ztNLt$D9xo>VT4m!?S{mcA@4K>KIQSI9TWyA=XVzH{n%nnyH$*VJnoHLqY=ozq^@
zhUiLlJ9PKz4q%TwtNT{x)(zJO>%;UnVV~TqKcashvq@t&6kxr7-XJqRX`E@w!P4P0
z{b73B{3{u<4{xaefh0qcEip+K;WSVvmC5w71bMRD02kh;Xi$v78qlr0OXWaopH`hv
zy|4P0>Z)oM=0cUurF%>V1@)M6fVU(Fn(=}Scvtp;>|@zyvYMQ)SxsOmecNfplQCee
zZoO`!u2@%7j}-f5{nHpR%Z;UEPdnau0UF67NrU7~2`3Ge?Udaqe_KvnTcBuFe5Oz+
zE$F9DmEo#0s=L&u)z4~P*PPdk)K1q*wKnYx-Tk_U;f#wqwce?JT>qthgW(>-8H~#w
zgW0&$_&oNnAB<t9nWp(BtEt{}+-~~J^gDK&CFWJwMUI)@Gc#TMGQwyvFOzX(IwU!T
zrQ<iOs3vKG)K3wMCDX3ZDr1!^lzy0L4%G*$c9mN-7Du6#SQ_1GgJvPt_&J7V!vYgp
zgU2iSf;L{V02|M3k^)Jw<W7lEwMcb`s$6A1r}|N4QZH5ObnohJ&<hP88J@?%<qhLo
z##7!YbI$m+ku!E0e>7e-{%&MVzSwL1O+ls!rc@IX!17-@pOuDSoq9p@iY8n4GX}yg
z{e${ieZAgbNJ0Bq;|$YW(?{gtFqZ#`MlOb(8>hf=cIkBKbJDk@J<@k%Gvz-i84U5K
zwNl;v#yTS-@hBgUO>wGphIE#6u5<y$s9LI*nx(PQ#j5?P?^I2iN41*_9~k~-WGtSR
zu9DiM-zhp2|5E&{xT^SF;l{zpPdQTQuN<cwW*B3T8%%~QLorTo`waGG!#h~suNXp%
zGmNpuC0H}=GCqp2dffN{THIsIH@BEOXtXA?{Mh)rv_*PWdO_MD?UG)Wc4NUvkmbtq
zWrZ@EtOQG9nXE!qDf>{?COa!@mtB<IAiqUk$nSY&@(TIiu~V}OkwUBpPy{MM6rpxS
zm?B)UL19yrD7Grf6jzi_Y9u<7ZUxR+XLQf$^9|b#yRp3eV7S2u_hqsCr*geUVO&S<
zEM)m_{+ng~%6-ZuSP~AZPN}|D{h<0owLo2{eox)5{z*MYBhjcd22G46QL{voitRE>
zvqqDzDbkc^?Ax%L+@q<+j<Q$tgyw+eIn810q0O4(*hN3ov}w+1E^5Blbm1)BgZ)ON
z9jy)2PSl2JXKJIgGOb2y(#By2O4g=nGqtO<R_zANf;+Wk+8x@R+6T0Iw2y1|YyW}u
z=w)pa4%{cT?`h9yKi9V7(9LwQOyW2+LM#uE2g*a_VTvehe;Q5A=<nG1b|f$YzO^Eg
z7>Bz+3^u%K;mybg2me59m*MItwL)#cwvvQ{L8dxKov*g3x2h}DJJq{!JlKzo=CHa+
zeGJp|jQXtlqPkOkS>2=d(TFvH*mS}*Q8*D9G;x|FcqLPl1E1J%GOEz*)a<syANz5l
zIIL;X9D^^;XwJeDotn$=gO64WFNA8t;RA)%01qT-Q_=n$Z9ZDRRa=3!@7C@`%MWS~
zqus}F(Kv&v#zk$X_A;&-K02{3P#22JMwCvWGw9-UNjPd}>T-1XI-72*u0pp{m$MsZ
zmi;($AJ#SLj_FQgG|;lriPLuvPTyjEpgvR|u8-0yuvNzCldw@{>T~q@*z~vJWV2Jh
z8*A`>{XzX<?Ec5Fd7i<>c~Re~zpU@UIY4X(G=v&zn!aVr>|v%zlf<MkSxkwhWK)JI
z3rka>sl-%fs>E4&k7*xH%Jrs3Q?u!$snyhGYBzDFE>ky71tN0*t}}E(mY6j-A19iV
z%^Bt_v(;Q^E-{yxE6vsBJ?4Gp1Lk^Mc$&>8&8_A(bGw-{cbU6!0TM77EdA0$B9a71
zLL_05NSux}5{o1eOJ9a03&)Z|Nr|LPQYopH?2+ulIjUaLC~20Ql(b6PB<&Iohs|yY
zBNa&lq#@EUX{1yl)krPUMBFViq*+oc_LCB6nY2<`E!`vCCp{pomo`e9r6;AW(l%+k
zl*24u+AU>dB3XbeL>4BClu2Y7nMIZ;OO|EGvSe0F>=I1sN?EmRk8GdpfUI8DC~L;~
zpcPZRUB=0}WZg1GF2Web2$o06C2~!TlVi*0GZH+QVi1>LNS5J}wG$_w-8jqbRqw;l
zJb=MjkKx&f0osfqS`*yC&a|Jw%;?m1V}t}?d_-b&STHs+Ffs};E-H1qb^9<R8ZjVR
zF&sDy2K>?y2lsHj1j8T^gCGk-pacV;8r^>Yo!^YEZ$rm-=^2ZF|1HNp`9XQ3{FuBI
zXK+q_83!3LR-$l)L}9Qi5*4Y~U-GdURVb<zdld&1%_^FecwXdxA8`>|fe3R^f(up#
z#>j4rkHZ)nZ5R(B7z3&3{}S|hBl;OVj1|jME<_TJZq1V9qbql#6PwV59IiW&=qfAL
g>uPjNt8`SCG(Z-GE<nLTLBlg0>@ED4z)tr604%o%-v9sr

diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input b/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
deleted file mode 100644
index affda60449b2..000000000000
--- a/test/tools/llvm-symbolizer/pdb/Inputs/test.exe.input
+++ /dev/null
@@ -1,4 +0,0 @@
-0x401030
-0x401040
-0x401060
-0x500000
diff --git a/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb b/test/tools/llvm-symbolizer/pdb/Inputs/test.pdb
index 974e565e87f4224cbee60144db23681bf03c0bc2..d26d33a862d8f79c2143dff97d7178ef4ca57b6d 100644
GIT binary patch
delta 443545
zcmeEv34Bk-*Y}-!vl8Sds|3l<zDq(v5E47JgiusSVh<sSL}^QtwA#|rN-w5bN?M9m
zTR|IYX>6rMi&jfZD@sc(rKP2%eZOb!_RAIh_xZp7=jrGDyia_N^P8D-X3m^Bb7r}7
z=jNXtnZIK6f<{&5GfA55C5gYeC2T>^CraL*_@jt_2<QD%1?%PJ{bM!P1VH)!s0OI~
ze^P+Ff719+aoGPqssWV$j|!0IH6-d$XdvHB{znA)-~RuPcE3OWxBvg6-H-gwKPmu~
z|4$0=zy1Gz`~N?w0oecl*W<r`Mg9MI-+EQvmz1l2=0!*m(w(Q)Z;bT4S*<Y9-A$gs
zZX@t<wdW(enCC|C2*sCm<prl5?bIIeBbcPFito>S)f@4hDn&`Y4pQwLral`V?wK-r
z%-GQx5|1oqwbcXh-cd&SX)H?U&mExX@L_dHipOszv2mH1DLJ`gMyF+DsBgtLXl1aq
zm647=SGHGcNKzu=bhdG6Ik|%G?fCl9F}gSlE8rU#AW5kf--BaE=VneNPE*>(9#S92
zl|V@{iuWQ>@mCq(sY8EclckPGNP-@U6IwzKWeIKaenM(EAOb*DC4uRWe*24Gk_5(l
zgDzfAtB4K)m~>i2E6y*-qU){KtYX4o5MD=tz!FcFBwx1e{p32Iul%WNS<dB6CojJU
zDGb4~)P&AqwQGX%FyJiUCcqc<Xa?vBn7}qZ)^(&0RXnkMKoBxzQhlkq)XjkkN^V50
zMh<AnLe;bZexRB*pebvj77eJ&+Npa7JjW8%!2{c~7uCXnp^iks&7`VQX9q1=6V>el
zTb7IU5wUj$HZ2$HD`I1l%F~jf6V->38nT0GQBqSjT0NB1u<yH~j7?HCX)(%4aQJ~L
z3O2+bb9ICr`1OnkH<V<lj)?O^Tn`cVoa7^oLtJOaO?09yYQmtV0c9e`=cT4nZ>ctN
zkU_mmEf^HWQq`hCO{4dV^k=0YDOZ#TJC{y~+}uT&O1xhWQ_feB^DdPQZpy}~%?Gy(
zI3oC~vsa|<LK=UOfA-*}0pEy}g@}Dfq==l<VsPfDrv^Ww&b;4CT{EC2dtRM&f6DVi
zywtPz*JMXjHe|S(uZLb&3x@P(->OH3_~HL^L*8J)8w-a9d7-7&rVe3=8(CT_mUosq
z5UjrupbsDeFb}W^um^A)a1-E<_HG1d3+M?L3djX41grsU27CrM54Z{Nf~f@q+5(0G
zG64C21%P#cQovz%DNnkMAO0TbP=I(qA|Mqo6R-fV0<a#i2XGp28{mUJS`*L}FbFUP
zkPDa%SP0kzI0?83s0VZF3P=TH0cgi$C14ZauveZWUB{2wQb)&DvTAA2*r}7#r=;bk
zXHMyvHf2o4*r^mt=9Gvo@oE^ElY($Z2Q{`N-Jg~*ZLHozlJrz<N`GAPkRA~RL(LW^
z;vQxWsgslm%EeL(>45aQ6oNc9u>d}@LFJIi<W4BlIwI3vQ^t&)Aqqv;m!d@mUy3ly
zLJAT>yricyX@dv{>gh(9?E3)+T288m?ouDn08m@1?i*j25SSN_+92d`WC6c~s`qEz
zW{e)2MHL@7c68>{F?#w8U<EoydQNWIsEqWS31g>rpPDv#tb{5!9QjCpRq%JooRT|s
zMy}3HdCx)GPLb9%Jtr$IcQjZvs=QoLI8}R7`N9)3(~-^898@RP@r0<lB)twA3UAiK
zkL%%I^zhGmSZg0iqPCRL3fGbUd02R-N#xo*()pam4^NzRu=OqXVJD*#I`Th0Vj*hj
z@ZiNSxCIA#3!=)?zVvVM>@#1U`~8DGk!8-C{K~H^gdZO3zJ~kmW5Iz_1XW79^1+)k
zmw2pklm$0lIba8WRAmn@zJ~O^DAJcbw&3E3<Y7aL_I3*L-*R~5X#R(X9PWFi)ATj%
zR(%%rLHxqLvkRa6Lwz(kglA0g4qn{7YOs2EM6kNzfnasP@L-<xGYb}DO$6HMdM&al
zNh+iN@K+a5Pyam}-HMzznlV2dI2)MKVYc~AfM){70Y3pe02ob@{|Im)@MFM>fu96^
z0ayio1(*Y`lBB%+CHS!(5!-+_0DlDhKJYid9{_tnh=agYfT;j~;8%e|fsX@+0e=bH
z6!;`?XW*}aX>_>)+#mQS;5<G26!1-i=K*`NJSl%6exQ*2CBT(|Ujn8KUk1idmA^p`
z?*Oie@NVE*z&^m&(e5>2|M7riz-+)uz&5}^z)8R*z-@ppJXSM6JRlK}3Yd|H|H!?q
z0~`d9YoqbrAJ74i2$%qv0hj}L0k9cx9B>KXg<ew+&=$}WFcdHWKwfPfU_anE;5^_a
zz>B;Upe>*$pm#F<%K*#-%mXY1tOINV90Z&Mkc$h32O}3Z29OU}03a8)2XG$X=3Shy
z*nLCRAXe)j>;k$;>DQ-6O25q9^l=YGijEQ@xjQbY+1YJ8)(Q%~^Fp-{b$@m}c3Qoh
z9mbBURi*|pqVQK^r`Bcl)e%$Mvr=`*)G+pox@Br8<La@gE!i>EJ*N%3uEynr)|jZ5
zH5z^)eaf`4DNtV^%BiPj=L7|u)>DNjJu*egB6S&3`spbd(^7IXVb|5cb3pwthqKa+
zX}QlSd9y2FSODw?oCkPe+SUlr6EFsFX{gp;Mx#Lbqp3xP_$5h?LPgNEG!K|22#bKJ
z|GWs?5}52i2KWu&1mHJ;djP);oCLg?`o;bDu>}#sfZqcisfVdQ)Py;a$yH=-)F6h*
zWqDU@b`@FNY|t83x70iDZnHB?ZaoOSP(3oco&dw-_8|UWnOia}>2FzEL)4-xU<|<3
z+{n~ELqS&ozGxOT{I%8__dk^7$&RYA1uZ?McA{qC*M6uLq&`v5kabqy#Q#sK#|m0u
zuIT+(7<)`@{8&rYKppm26725H$J%>5qwyTMSS?U>d%T{<FfBZwtXinr>+v3##S}h1
z+AVSqTKzEKI-n++yAhx@ARdqk$N($=tOtAsxV^FdoHpL<hB~6KJzK9XDeS`5Z#-4l
zlzGHgfw!o_cQkW<O*mnG_#+m~S2UFG>Diy{D2J%|$-!#r&|rS`V^F6aU_I6N5fQw!
z5!iMHldsx$>Hb&lEMM_@{TH_P`QiN|PpBOqh~ioMSj6H3ELe>g8NB$6XD~m!&^>tZ
zeq<4z;FHQr@hfm2n$eg2GWW{mq#uHcXMXte{cXM-aE@Q8B9G!LUjuJAie8@_%uA{0
z!^JE@4NnPPyn#jV@K2DOm=YldE*{=Vj^L+1Kze>kDh*ZY>6Dnehop|FA$&m{8ADJz
z&@Xrw{~fM}|NINw%pr<~!x?-537WAQV*6f2ZoUr$FRm{Kt62}An1lN8h3+DqPrwQe
z_yE8cP8rh>X&r$6qF_SMxYQb$)|YL7BZ1ojQ#ZuWl|K}?JMb7_jILt%>IaOjEcF3?
z95@m9N#G&C^MS#Wm;W4oJb(!FZHdOorNHQ_`9;9+4f(5p#{#bgP6yrroB>=4JXy!o
zjj21kjt(nfWi%*k0w&A*46qbNUw$w^H>IKGEE1_ZQdhKhLBreAP(&Wz-UZ2=mm=N`
zCTlo;fBYuLZ+H4+^5pp20`~zVyW_tMpW*~rd45Hy+>@!hRz$Hq>WvjW*dR5oD5&{?
zr>Q?qhJTd-T`-|#5)E<Ri1?9+>*ffQ5+qs=?NJMgnmVeBlxmoYb#`1wY_Ph$=ss4c
zdcV??-PsuZ%8y<sp#B@5=7m>9#nIHQfd>Jy02!#{9N;~`{wSvrAQ$ig;>hXID&jMM
zeFc$^q10G0L_mr0fJA`n0>M}mbcJHDAZS`{ML^oBXeF=`+FDb!IANsn{Q8q@zj|tY
zQ~d8;O#eq0ck(&`yJ4_ao*2sf)dj_a*k$!xaYOdAT4h5JetQ>(si)uZ3iw6PJtp*p
zd8dSIrORsCh9pPIOO!)(=qo{JH(O16*H1mZAwIU7Nb$vV!c(NuJU>Cixq)6=s}2_7
zFy!a2(=chUI$&c`Y{Lq;>)N>6h;SUZqfG8L>M3x0SHSIV<E|mX!@-?sa@SDPH|4My
z>Zwgl10EIWfo7f2Zqf|3);puwLbc$XhBaOk>Gw&^q|kCS3)P+PGz?fKQhS;yq_KtS
zop+KP2Stjdv9F=Aq}UB2wx!UPuT<02*jpmrO@wKwwM&G(Oo?}?m*34{=hU>4hK)Zc
zrnZ<Mv`YhjzYP?AQ0gHmrW{m@b82x(!x{rbYL3(uiwI-KgQPsCUMp$J&TXu<xf%-y
zfkR<Zm!P!)A&i<h2%#_3ge_rMVyA8KV^Qj~Ep;(zT7v)AZ`{5mQ+B+tLbC|;D`64q
z)VOWKW1keUn%u!C<v|hl#hgf^dRBz{S9g<!xQ%xEuFC6(4@P12)Faz|bdIrc57N0q
zbnXnbY<nonQ15I{a(1@Swie;4VzEm7zqgvbBaHP{3wPu=JVjb{seztPs2cG8B<GFg
zR0B)ab0TcXdPgmKKZucJPW8+8KX!g<qu;I58`6EHepnjHzEaPXCOM03w6BV=CEXTv
z*v>GvMNQk|r>5@nQWx$V>UdDlSW-T%-q|@Rc8o2jK_YBPnIXd2es0o3?oYV8YP4wA
zkIv3E?kJtxkh!;7ygL+HI<z~<8E&JkF2a_~jnsraVXP5kp6)pJvex>BMsKMX_DqWX
zRK!{u-7UhFtY3++44+xaBhbTD)+5_$sITq)+*xePp-9ickavr^?1NC~t>{BP_238b
z&gX3O1tM(8%+==m!Z4%nyD!JlU!+<39jzYOH^~`i%b~dlTQYZ3N9@<EAV7V3|Ce#0
zHu}msy&+*;5q{6VDtui7?D>3*dQz7nE3XT=lj_`BR^IL|im)X!dsz+qs5WB|I#f;k
zXsBa{Ag}~Ks&4scQtUcgHcLg=5`2>gHx6)<B0b|gU6noJz>m(SZQKv*+(tE?S0^6~
z#irccgGtT_HrfY7*b+EPy>l>(WvR6e<v5y)G)vY5wdl|!XQ(ZQ$|7vZT32oUF)6v*
z$3J$wC47-(L>ENZ7h5!%3t@uyWPqmNU7lA_H$%Z}nL728fzB^%S$rtMmcZYr=ROIO
zzgepG+ZUpSA0FyhDhMp8*Q?vN1gR?y4~l)-mes=|Y)SsS2;UdzCM9{Lc)4o1`H>%;
z6Kvc=b#9|BS!&$TP-rgu6F)WMXuPw#jXp-Fr!8A@YW>thN5fb@_43gnu{*>tpl#2R
zt9@OB{Y_VUVI`T_^(tOcCAT_m2fb-9CL_F{&iZsn+zW!*(()V;wzO<<R|oeZuMd6d
zi;;nHSfM&T%W;en9Mw&SdqG|F+346rTfXsnzQzDx<vZRd$LA47@tRbT$A&vw*tl!!
z+=jj`sBaz%Wijf9$C8{rHrm_R-(ixki#&0khhe%i;`8Z_10v08-wW!w&nG#zh*(Pp
zYxNvVS=H<>g4hLh?iVReZlirlr?n*WJ|4=JtM!j3Ij7lZ$Lh33D_&5S91oLctyf2W
z9-@ABJlWA&5V(q$aAK0PjxC$&dN!7Lhfa`q=T3}v(&m=ySaubwd{TxbUiz1zve$a`
z$u9>wzZQvBy+788E%|PJ876=Cwvey&SILg0g1}Y2qOT@7pSET5u%3-2U-OeBU$>Lv
zofB-dLv>n9zC$OeBV0I{<m_&vjnQc>@kX4YPB81#bcef0bCt~f>q*X^-zu+-Z$;SZ
zv=`LEuSv4CUr&lVVx!%u)0&c*-RQjcAKvI*m5wWhyE&l__$JBuwk?O{dJdMnd%p=|
zCF+1Pq0gUAc1#h;t|Cu9Jt;QXmPsEy6H}R{7&+c~-kEMq%zoo2%XGH0aW~Pq%}S_6
zWue%n81Rjsdblj!8Em8X(&;UcBhQ4faELtJ@x_~@OjmhNoS786SHxO2^o|I-sMwVE
zq}O>b5BC<PytBT|alUHfR&{PmUdP!`_J-R0Y?8CUMw_kETCx_M4P$fFooAnOG!to-
zPx@X>J2yHu#Fj%PJ%_t>8|fA2RqReukWczvy>l+bd2?NPV>&Owu3c-^ccHTMruyP{
z1D&7Sh(FMYEn%yCAI82`n}0vuu}Gx3D)yc4CpjOt<&dN2U<#`yoDX8(tHaKZcBa{A
z2k5kx8c&@MW$Egz^GVK5HrkdtttHvy3t_B>I{(6S$DKFqO&;*WB<DpDYc=^-BJ9%S
zYS9lQ*_I#1J3qA1ZqaEi$r3Jx;xXTdi%HJaHd?OJT9O^P7{=aKuU(w(7$?$Pb@tGu
zNwI@$Ids)?Fm+}c<0Oyi9@9N()~Ypdt;>^~Q8w;6I=3b3+RLHzB=~ZY)7M5zyQ}2~
zo!Bd3*h0E?F;vaHlI-~8b-O0Zu1s=n7qONm-xOh2O-}xiH2K7jnQ<@KXrIw(P3g?K
zZE)Z3?k}Bouj*y^r&ra;t4Yp>Y&oRsIavB#bu~;bcwM;r(^r!nZ3Tg=lKWnp6x+a-
zO`x95-Aayg@8!PA%jogntJkjO#JSnHudme<cXu^H-6K&A%E73H531p(Zu%+Sc}8$q
ziaV<3VpS#J=P>z)wY*<tJj@^P^KeIzpm43r_Ma!kK4;4(PtWJ>y1eGL!L5ULlxZ+&
z*QYxt+qg&Q+-6<WTh~L`G_~j_Kef?~c;|gK`t~}#CH34JVRF)1^~%u@b<d3sjw*tn
zhFIa#wC{rI_+`4|`!$*^nL?^dei<HnOvG9$*(<^>wq)AJCFQ2_i#Ks=6RzgJ4tH*}
zalfi_TUA;6Ybe{QI)B4%&#&>$LK}U7PH$Bu_GXy8WR0+;+?&acL_y%H|FWBtoE>f1
zw9vD$sy6u+4bV^AN{Op!qxI2gO%crswo-a2{!*GUzmj1&^&ohX^S9NcI7>x8h_LHS
zanWyKvaAVx__t)oW<lU8blmTgVqddmvsll@6xtMIyZoVCzcT4W({$PIqvK}VxN~*x
zyQ}e%TrB%bl!H-?d1}G!B<Cnw4g>WZEZyI@9VSm)Ep#7oC)wc?1g_Osc4ty-HCr}+
zzNYoWyQ|Saj*_=ku47gs;g9jo%Yxd{<td%ptcLpGAE9_0aqf>K=K&k-R-M)oc-Ws|
z>{DLG&{-G$ne2F45V#1uh_Ok|8MbUD=-HS8FPhI_EQ?lw_I?{}cb(Q0k4K^~Hj?*c
zEXmp0MqN**HU;E684F__NW$rkpRvncZX(3fWi~0cOhj86IU>Rpb-<gF8~ee>oVoET
z3d?l9Z)1O3XSXC?ra<C-e2W5!m)WT2>(rLS&D|jJD&E(P<v5Z>s*67P5jQBQpNQ5r
z8)@a(QP0FwC?DZY8s*d6S&lQpMjftGTLOA}K)}|#z6S)XZljiUYD>T+9xO}_Tt#bd
z{+S0G>i9(T8|`EhdAo0UoF^L+yHiA4QWlG_rPsY899-2+3diQ`cU6q_?OuKe>~XK!
z*j1g~5PP)<$5gF~RZ<;f(HmJ9{cSZ*^Fm#ouw^k#&%&t7Tz<?8b(y=8dK~xlX3378
zg2d8zGN0uQ8;KFomW?#fb22pEQG}PHE~}-D5QO5%b#&y;N^H2(-^T8yvm14($(L0^
z6VxQJ{rJI3EZ+IktL1&(84<R8-fz5lWf;nDyl-WgcCkpc#NNw~RfeG~w`KC2o{1qg
z^yC9WS<N5vVJXg8HtNYbwISsb{FV=-e1Zp5fpUl2sPEIM4dssJbE`nPqhA%u-BSh1
ztszJ(DVy`is%%mnPg_pE;>kD7$qXs0y~=k~Wq~sA=T+G#=Q)vXHO4WW-jMS$A65-=
zUgp!PLC)Pa>Ww<JA?E>pp&H~o@QRSLo-gElOpsV|F6M>4&}Nn`r&K*BL(UnzmIK<H
z!J{2)ytAK;x}#2QNO?cs?tql{^Aiq8*~~^=Q>QkhY|Z;tM{~F4ldGe-FXFkOt0()D
z->41&Pl#wsF&~PsrN}Z~;0FQA_!2)hDXzpuy;`R>1bknFYdf@kIyWUsu{zlMJi#AQ
zFS2Fvgr0>V^)mjBKXkjSNa*&eKXf}tkXX8%z()k2gLSs$6s6~6=(e{As}48mC1o`P
zF`RI3&H~|t!)@%oI=fMqM!YZ(%5Fqr`|;917T@^~@l3%|))f)9I+%yZ;tNsz2<!#-
zM9CLjRNzs>I|s3+(Rs3inV;iT@tDSXWLCnj1u;KX!rg<}B=(3Psz2RE^o$4(#MGv%
zcv3{(^BKN2m?g0hTDHS%9Fw`T2C|*Zqie7vM{AK9Wb%H&mms=+posP__eih0Eu$vf
z5rT}G@cJQaNW1G|FRnbtRS{>^$V=pK48ra+A8UDS;O~a8fsKy}+J{7m{!*%*E=eET
z^7xKNhB80-yJB_XrrLacW2L&Mj{_r^bc)XkWy4YH6QQhSjd2^vz8yi+z1^kaDz(Hs
zn$E|b;?9~Z$T31BQwPwF@Q&cUYO=b~qeZlT5RC$)$%j6oT=GGtofGy#Ij*ce6y`->
zjm;3Om4l{w8{^g8?1Ee`c`-`%73_Cvvej&U$*Ni`RAy87&M?-7P3O15*c)tm$*ORc
zA|vQtm*vzSzL~_K2i)|04v*PII7IS8+_Q+I$A;7SvbyX!HikE^$A(152nx+aiKBrC
z`-&Whx}ylMbn}w98&#T~`gP<9^;r-OO0W*m{x*I$8-Gm^{s8=2O#Yh0pA`L52?=Wz
zeMW?>qJI<N^I-kTWc`iTYQ#eM+4?LscAH4ID!W#MwaStLNc_Dbu9kacsfs)Ga}s|q
zpVfd3kLDuDD*h=Ewu)aa!d=|Gq!@Rj=F54NhVV(#Y^-B#tg|?d`uMR~L><J}HH4m$
zY%E=DEW<_kDU|lGS=w-_>?5(YM3Pm{N+Mjoo()9YHq>*S*qWkxHsD7Zv6Psb@0M5d
zc@ehiDGAmyDEXLKvQ)yaG-iQ(W@Cu{xh;baY#Dql!fqa3(ywL)U-O+!ncw0jm}P9T
zv9GkTZ{ulApsa2DNHetTf+j50@u(oNYCMTE<QDLhh#u}yS-Q`I+&@+FDc%?-;-iPz
z3h8AlBu#`1AY-nF(U@r@V^VY*8*4)wYbOzY6|9R*)=tD4#8aC=Jpnd;cN>2#5#9^_
z%_e^>s_Y~6zj>#8cReV=<y-bV&u$L&oaaw8XXB&a6BL%wzAnO6W9|~+a8EDE&(o;q
zF79lBdcI&|onvENA;O)(8f~(!Al4vW+5+`_(8fQ`#{Z}Yj{*M>lmAg3*%I}P7fDuo
zHW%UY_3Xuuv}Ar>y+qPtPo5aX{1->CL}#dov8r5Igsm#q<=GKXP+bz$kB3JxH@-9i
z^4{81-tI4mu*JtjK4p;iGgDq>W)koG!p6VP#{Ug3h{QPa%_d&E5zb7nkA&eC2^y=S
z&vIuJ4EJ>r-4-V|8+p;Jf(-X{-Ybe_M$fdBl3^=lwg`{(@{$IKy)I&%%`Zp63q4?C
z?QLTn#qr969~(u~L3~~-=%lTUrIC##UW8vkX^YI#;`yCcY<fFcBw1DaQ4CY%+pw~r
z`2zLaC${!T*vgyuTdmnt=P5yNHRB->wwmJ{Pl$%J=lH2;NShH2X}8)~-mtOk7U3{&
zFR8k>VeGq!Kgqeo#yZ!=TEqv$pz(?}Vm}RTE{4U|A1Q|N8s%m*_lr2>uMa1ucp2E(
zDSjh{4XNK&q?Vu2v=(vJjOG;2ZUc@!{E0SflB2pv)n?1|`pYTK+G6b{>BVT3z?sqy
z$#h-GX0^qn`V?P8)L)BKOOqe#)HD&*GIhjaBKjT2X-_{M9}6G6&Bn4$XCW`Du@uE(
z>c5BYj74{STBKS{Jza#$H}xr=)=p?*T052!J=T{0U_F0RZ`-IsFZj4i&-fTEHJRiK
z;^FPl(p_x)tt#Yy5d7&T|9!-t6z#OJIx1ux1lArVYh$WsC~w^XUH0Ps@;>-DXPwYx
zPYH?ZfUUZZG1i>ohdQv)&JP5KrT=$D*s|nPd_Wvj^D)01hx*NoL;Y6TSQgq@L}_1C
zc9%X98+z1n))9Y_^D!IiR68qA>j+DJdcWp|7IZ|N`Uo1U0b^b3^f*e%5j%EN$|>Hs
z6IPN9ZKVX=RZ20mBDdVn`^95kaIzEY5aVv+xv@{P+VYLw7R5C+D^*2gFwps}ptW@P
zshyYaj7NDF_i44d8jo7NDQK)(y~xuN%$7P^$zA%a5<RrhYBe{3O^TjlD`keg6ruL5
zV0}w$2NUZ8UfdbY7T+1oHqOR>e}()@z(3FApF;deF<ot}tt(`m4%Ue#YahL~gSudL
zTU9Vvwf+4A(ypx&3i-PsljusuxN?eL?t*dUf*`jPd_sg>j8w4J1Zy?3{A;|hD|*yE
z8*530tiNI1blG&fpAc&hkLU)it+Meis*wLU_z#%;Z}OsU@I0A<)T(r{2-_;n6S@n}
zGq^j;iSBJHtX+kL%|c;WW?=(KSU>LD1D>&wjX&7V&$D}=v<Q<0vx^?6q=${~m%SAk
ze=N#vW|mu(Dv2pdPwY|b6ZG2S8_fgH?}-h~cSN++Y^(HaY3(A&M6;d68@$cjV%%PL
zGuAYxc&%RSamOq{Zs~6#FYaY_k$1e@@uG~yaiSMiK|^f?_S6e3--(|A>!aq3AeCS1
zjmFOJ4L!uz_#0HnpAP<HlfNVJC&l>NSludQ?E%(yCTq>TnnPH4AG*lbd&;}+10rm*
z7NM{JWa92c1El61n)kt=wMCFyx?U^7_AYYC(_Q+;oFMMxJNuxEa2xAW6|#N=*6k+i
za$?1rp)a&H&Bi~rLjE=2f63&Z#gFtw7wIHOttM_J!nR8D?0%w)JkbwpqYztRl`1T(
zFA9q{3#&uI`tgYV=pr|FlbS6(oflyjJ@LZ+C`~e1-1xTssO0B1z7OntqG`NQ?rn2n
z^fhmuh$Zg|L26ZUz6jeYDcHa8aF_OZ(DNs)Wy=z=N_xar@}vq&ehaKcX33B9SpzWK
z4zscLva|AQ15nvCq7LFU2cn*BY%C4!EJDzUC~bsUS|>h#AnM5ksa4M_yYA)W+k(B3
zhc&+8Mg2+9Ck46HG6zN2rDa6P)xav7C7<PGNwDDeY^<+W$a>k`oem4=+TBI0==XzA
z=@)GLb1LLN0R9~&{|bI>5G;6<AhjypUxaOq%L@k!3tlxCgIS!du;vvOmW9Gbn}v1b
zNA9QYc0Ys-weeT3kiQ@J6HNZP#Ge#>YiD`mzaYYP6*mKGn90hho}oN<2z=BoL2p&|
zZ4tIrRy5NsH+SiRTMN^Ys|>}1wPm*A=i7_tMMI&dRXlDODtmA!D*K3yWs;pml=c=%
zD>Ap$9w&Ynzm2t*os}04!>lZAr&u+e9LC}uwFHT_J*;h?Hh%yMotvekHcd_1@V52=
zHpKaZh}QN$gRtB9rA}rJZ`?UtEOcUrgZiM2dYevdEp&>9W1(}H9~utf-?CA^tW#TS
zoP=boaW?R@WEkO0k!rO|x(M6ag|iXD2m?l7*)>E^Se>nhp1(O#@<O6c;mt>)DYlJ3
zQ$*WX>f2dFQ%q3YrQzbdAi1zO;!kq=*;o}jD=!*}(n6^=q5SYj%#TmKPdc}%^bt=;
zF$aHt6yl~>BVc+8M!?O2!>ZC65w;umMcG~YS~i?ODZi5f`4-z)pR}{`>{Q72GEoQd
zrK!+Hu8n1kokeJ4HA;KYENv#QlExl!bQGjk)tb6iZ5lYn%GM%nXBq~*8n(i`D=e%J
zSUZ`8Ir*?r=<>hrDBtD37h$_rnt-*I$@(WR18dAN8|&T*S-rseyZH{ysU6ysX7*^f
zi{*mBs^~lswwa~S)91`x`hZyjU#&41_#U>EKe58{-vsLlv;4<+!dNuX;W22Up*H@W
z74km={zpyzRN_yHiLtRZsE{=stjQ*8N4@H2$D-<zV6duwc{?>jMOANuOd2w4ESxnC
zW8qhV+_L2ZB5ZH9s$gYi`QP$_beiRi$1G=yjeo73pBIjYLGLtKFv}Sam2n&2Q+7Vl
zS|4FsX}kGIYdLQ|flZH16{J>;`-*TyHRfP{O0t%CrzT)j>?Fu7XWFvDlBa+*%`CYK
ziH}Va8>^#2)<m#&F<I*qEB0$9V(G8g_<t6av@1Ib{Pj(K&+WWwPi&N~o5%(^cDpdp
z`0y!rOv3V~SVUWmSfrB~<HHsae!z{+b2P*fh<q<O%`LnL?9S(G?0Gu7F<@|BBLf2l
z=g}E(VUumtBXnwGz?jC@W?;aW#tSEsku+jn{A>mcwY{LTnyrxtmv6SNBJO2evGlAs
zV?aW7<+CPZ#T6)$tU7v#u%)}&BD@C^?h;dXwRx2(kofx6@}B9e2wSYTdBGHn&$qYo
zs#CB_vu+CP^QetymyPF3EuDEuznjH<$!ld|cDhs~Srwft!sRQvO2oyn%2IP?j4rGA
zkxZQRn6Bll=PF6#Mc9%yj}OQ~kILhhvtU;<vmoqX8%sAE%LoyE0>aKPJMjqOPja@h
zvDUS*w&!Wt&`tZT`aW4UM$9|!l^-#G;A|@M3-l1ICsC(^%otWYw(=%Z*{JB_g2igR
z{UU5t>a>=n>?PeWB|Ob{PDQ7C*T(vqjdh0z*8^*iIj-y=R?K2^pqzy^{@FJEr6SxN
z{IMqgQhp={<L5|`WVPgdqM~d(&(1{)jOS0}V(^Hy<=@zre}V`XA^(MD{t4VU4OI)W
zv3lBA!$f#DSl=~S!-y5nmZqU<H*mj%>k{*v2wPgcBf{svf86B1!y_NWMDBf&WL0gQ
z2$wg~{rt#-sONru?m--3c~MYUP5873TZO&Cv!{dl6<%cq<Xbu&^3AZZOt7)!i|{DL
zOB!rW<noC>$$7txwY!aVBrlkO(neBkLizd`a4_M5#Hvyi5iVb)#v*QuQdwH77~?`?
z-s>Tj8S}gCk^Dv5BY8!zegk2Tn!+ku_?3s)K<5b??}s+tGM@G@q%ET-_<oBY#<RA0
z4?{mCHn!C^w)aK2E*|9vnrn{t`JIPZ`(D}!3hmyLuY~K9>h<Xt6Di@3?m{UPtRgPL
zh>H|8pt$H*BknLSc!c@IWeQ6ZNzq19RnebCTq`5)O)JhUst-RxS)4JYXl-VZEe1Tw
zqIGK{3x}Anh`1;tZasI-L>B$UY?G2&wKbBiSaC5%-276qdqHPr>&LSxi`HUcC6df~
ze`>}>nf1;QYgwX;YGY(uo$sV9ipB1NNHXi4C^mmYoLTQ5&A3Q2+h@6B7P2@k_T-2>
z($q!1*!mQ4rqE$xw_3!Ry4b<fkVWJO@tA;;+F~ehcic9^F(Ukucwj^6twp#=$*Nhb
zCG+0@qR@?0pAXoj`0=ZcvYI$=S0#`2$MLrjd8`Lp$X8M59KVsr#<LM6Bl6j+Y?SM*
z31_R~_7i}AHQZ?d=n6;%jD@D?hONHCwK8T0YT;M(@WXm|t4r9tVarR!ts!bwYA{b+
z;#T{vJ4JYl1DNH6-=O<RaFvL0(Uo?A$W`1#vRK7^Bc(HZYH;UIEQoGH;aSh)Dv|tQ
z5x7nS*P9H(6)fI}I6bT<pEpA(<yTiCpR4*sS=Jpb{3_jCvH*#EK}XPASF-$y!n5kh
zsp4Lie3eDq<DxFL>IiY03(x3*i%m|ecq=6y7n&G1weYjYQNZ;yxu>|DMZG$bE?~hG
zDKFrrmwQ~*@-lZDb+5Zv_~`<92KRlVas+N;!Syk(%i6VHcrj7B{eadKpW=5o`h*|w
zN&x+J#PuWrz+HetfV%?M1MUIb7`PX33*g?s)L8}qQy&`)jBLd<DC2=ABb=kBKLk9C
z?oBDc4-n<g14eV_KMOn!_+{Ws;CFztfN@MlLMHhi1E&KY1g5)E)<IIbH)RiSGdEmU
z0vr!aH*5XtyHoPCo3!qAcgiJXcHTp~G3!5he~Ni`N@W~5O9m_mmZXx;<}n{PcAJ+i
zU@aZD=TL)KmssBB&O%nVTr~Kk+k7yhtGiPBQuG}DvBNV^Esd|lUoT{pYIw{Q?c0Yg
z;(B20RA^SaA1jC-Dr7-5UUW%L%SazTW$YMCoZTID_#GsVNH&rqX)WXKXl91BQGy%c
zfn#&0O`RfGA2i|yO-)PBNS{I$K)e}7JUkT@F^^|I4+$PKq6K-^>2xU|T@=Q7(eqd|
z6!PsrHQqL;`er_;(P#p!<=38Pel>O&$q8d7PQwkZQ*yQBO}v)MzGb&d*h2P*EWc2V
zKU+t})kFVJciS0I=waPMK8fGdZOJ4u0Q840m=9+wt~a9HzV-;$1daoyfu<8MeoOJd
zoq-d8(S;@a7S|wAcOQjtH{cv#>h2E$_oQx%LE5}|jJjR?-|ljM-Q)gVZ;RJ%-}<}P
z6`9=)>Y?6N8eH=6QdY^0wc#gLu!bywvmzFxwCTZk*iO$N{^%Z`Kpe=!4N<&r5o_9b
zLJ#VR8Mx|JTkg}*)6*i(6O$zE=qYKw4KFG}c5(c}BCK57@C#sO-FUz&EX*-q(Dy(W
z(2ho)F6s6PYv~QDX>YKtd30eBU1D^v3yc0kmlfUXuA&h0+R}Kqk1W)EB_I*MqpkIK
z$=P-6y24iS6YDWUc!PTv<3#Mro4o1~C8i{~nE5cIrEg&0=Fv4vbHGg3E3E)t57+}Z
zPH})-Pq-*xy5>0?&=!z0b!_f~X&K_Mk3W=o?G$;eE|QFYwQ;*B+_3Z{5?tYB<gpx3
z9k-dz!K6WwQ&O_hGn=GO8J8(EO_x6wiHz##VJ00DVNYaQ-8UT9lXi&*5wdVNB7osB
zaQ*1G2|(b8b$lbJalst2r|<<mOmh@tPO{lcy)rz6t~s+^<F?pSu3&~DE^)JF3kPsZ
z8qGo$(B*8n$!+m3-VuD;9&Jj(*VOQgz^s6#Bf&f#cc1ZykMTU*oFia*fay#X{I{YQ
z{G}st6CCbJGbRgkKij=#A9Nw;L3afgz~SyTo?64BhB_dnrMSRN^&Jts_&Q?qDM?)9
zmW3Nb^HU<!6(b<(d~Y#_P~$NT6L+_X375Fa%{N%VGzG$@Vj83-4l^%uyVsOuN~jXT
zSH#Py+SP8j(v7Zm<Nj+P8QtwBCMrC9jl71hz@@0-rqVfRooy$zDa&sBUXLb(&Cv~{
z;;%7$6&X=;;7H&Wz;VDWfky))E`JhmYhdze(ZHAs2%lL1jJdp+kzn~CZgvB2{$j+(
z;}TwR#T#WvQ<DVXpMV<y2EIpb0(a&DNFa1)+#jX=bpV|=eo{t6p9RR*P3b-SUe6;B
z$$oiN@G}`H>A>*4!tbU4V@4#60jBzm16~FU6BW11O$1(pFzS<!*XE@uz;6R%jL$z0
zOtYlpHE^sAKr^MkaMyX_db!Wg8|lu`yRVtEZkfZ=eDkKcM)+;qHuqnhG2KQtGTeBd
z5X?0H)zx!$Up@ha)2wtUprqORYz<>i^JArOyiaqsljSg#Puq!*%8Mwpng{HH^IFZD
z?-pqC2W%2Qy^Dpj9h~h3-3~2f2OqJUJ%e}pE>Pq?$|{IA+ryICSA6asM17^veMNLZ
z{PrG}%YNe7dy(=Jm6XJ<?q%UsJT8%q7Nkti&BT@P3GFL|$sU(@i`}dRaLJSpSZ{C0
znRXa^51)O46_(1ceS+5uR`Vki)%7r<u#P*7sC|?Y$~RHeSCrfa1&OE$k5}^H5l5J>
zw^kYqknj8id#1@p&|7Fwp~2-|1Iz5{nyX$2?0Op>x$R&WK|Wv|pcG)Zaa!Y%D~|`Z
z-;PJ_-Q|Y7yu0qqHvIly-IPaT!b%9V8E}{c1JIJhAJ7cY6~LrQ!ll!D23tiKcUEB>
zAO}uM1lpYTm(ZzyMP~Gph@14V<RNtxUOo_nFTnJ?q@>)mskzg#deMkMqd*r2jTt@h
zd%xgoiE)DBn26Kz`i%vn_q~Kp9Lq{Xo{6A02B}rhz|(L(1kdEre}FZJq)}=i6{?L=
zVvsTl1i!RW6i6FST7eWdO2pB<a2*|sz^jjN@D1ef5`?gyh}wIL0j-o0T>c6ZJ@Vq?
z$t%%rPDqLHP+07@g2&V1s#$TZjkuphj<hMzs+AG<y%h)VsPXm@(g`}T(GrM3G#+U$
zLel;$D>G-@gehaluhQt+2>2;2A)Rzh#Ke^aE7FM5Gsfz);lQ~7Yb16Zozt;0a5zZk
z)Q0_8`8K3c`VCDmX*tsKD1xksv`~Zy3XjriBfYAJXXs%{r(r!ts)j7IJCg~A0w5>e
z64U-Xgx2y?(s6@$dTzaO=~L=OPztpZ)gIN-YLD^}+0D@prN<*$W28$KJ32@kAQaj#
zXh=&24P{2!(K0i%qvb`KGw3KU(iX;s3G}~_S2%uql#He}_qX*8EfW&i$i(Op(p_Cb
zqa}44w3RR)BW;pZ>(i)!Jm1IQYEbt7teN$@mzg2qE;G}J4MjQ};SiV<*Uy~rS!ULc
zl{d3~HZvnDX`z1^WCrH5WV%ugpc;VY7%@;zFzf}s2WEn6=CS`_Z*}Uzy`ae$LDyS!
z>0a<PtwdNsqiKqsiLfzW4HMm&jJThmlBCykqrHw&CGY?8T$Hc-s1oLouT{3L6fQp_
z6?Y5s4)=rL(rcKU9{i4`o3CP5>|abw?=|J4YwpEV_4J3hlo<En^RtyI{p~rcxM~=;
z5S!NxtEZF2Y?WrM){K?*?ZlK-+)sS>l(pyMsKuoS%raNZ^1+-o$QyIkK)PYrn8Eti
zMrhqd<l-`o6_ZfjbuyIUOIH!o6~wfG$PZq^f0(d#SbUm&hQs{FPGd3<&siDPf{nSe
zj0MHMe}T+_ym=RVF+{rocc_T-5uG*w?T00kwC6n0F}O(W^~XItyNoq-9v0EQ;>i|y
zr{#jR5-fvWhXfnnQigfKA$|>oK?3hH*f|-@>z`p^fi(p!&eOZBSNELf{Z_gM^N`D|
z5}$d7dDUlvYKTb%6D}tV3=XBNJ;RzdED@yP!epr~Ux=_b!rHe-w0h0q%P+GUL3>4_
zb%B7U2q*vKGIIuGh(upJt(65gEv1L@v~OA6T1`dLv)I?DEX7Jrbk+btLUOd=Mc=Z}
zfCxdd8s9Fd46(JB^4Ibs-?C5GX<l>|v%J&%$XPVSX@2J{YXuZ}PJ~9BV{bDDuksyh
z7!djcDV$zIwLUoG;6*=x(7_8Yfz83$MG-po9jnXQ@>}2Gt<ZM7=zBy05oJ4#vYkfR
zjyJ!^y6~mvSq9$r^u7Q=#t>aYo_c|~$zy)tf1G7e%9tM{-szZ65bu4CRj1gI=U8X%
z^#i!__%zBkPs=?|%RNuaJ&$sK8_&{DU1Sa8<clPJ0ahDc(*4i_xfN=Y$3?ugNkO*7
zMO7`^;ymFJHZ7L%bV6J!g;Obhywq9QgMWRARSmc#xGeJ_!vN7`$ak1Gzl>LQUGohV
zse>ssgcn@KBdei&(PeC3W($EVJD9{jyo^$v{2CF3aQ7>$ZvC60tZ+C|?I|IdcCcWi
zM--iL?E*gg#rs}C<Nqc|=VFW5YW$%kv#zk+-mw3Ko9stw?_1~;4xWAsPb=E-EfgBV
zuMx`Qjei5;C27C0%G&p7{%Wo~2eqbovbh<9NrUnIng76ic}RudnEAW&WiRxJa6nr?
zB47ewHejiLal#H|aj7zX(Rg_$3*(KM+?3VhOJun{U%=#W7R8I1+>&*~9V)V)92LoH
zGbNDw$nqF0W3pwrAzBaA!`V1~LzY_wOpYXL3n|xG#_>2sp3dg-cNDp0z=8@W<`P9H
z_ji+%**c!>Cb!37z&G9GAbDLRe`{yuI&2fa;3kj8Q>MP|ax0($cNBetuXUGmWS=Nr
z+e2<axSxmIfcck9^N=s%ZV#STO&)?OY^f%9g188w3}3kegdN~3_kct|SU35~%}{Ka
zFNDPH5e_+pvU<-UKOj~}>)?6!08RtQ0&Y_n828}bb$2*f0o@<&gHhMC1cY(OU96aD
zBHne?L>phUVu}aYS4&tajTT=R(k~M)w?7wwjQ(ns)D!8nQpy0P-LI8^ZGhu|>wsWv
zM6?F@?B=xPRO0I=AFRTT@>Ahx?W4SkQ*Ib=qBpf=w%L|m=vzm5FQ?qJM(g`1DFDNj
zb!j!Z=A(Q*B`YGiJ7$)$)ENDed=71l;!RQ~l<X+!RY%4h?MHcRQ~6b)<dcwvlzSc6
zP_7R;b0a7(7hqh@UXfB=6-vsi3BG$Mv?6^r5}#}OD`hrAhM1}G%8iv;{BV86YwY!7
z6q5;u>MsTss$BZ#6xUXA;2+K9-~bTDwk5vHL<*p}oR6THEyDL9tmPAo@B{d$7q*Ge
zDJKJr`P_tfno3u+|44Cdkw&HTXGSTpj?tn7d@lrztSwfw@8zKg@z+E{;@dr;Fp!mk
zKA(p*QE*G@F4@h4T<Xftc}FKcbx|5$xs@pbwSO+aT6o1e_UXmBPBVkO`IP7JEebpq
z{ZH+C@~luLm^MHC57eX2P~2hOGk=`%@t~}O{(A@ST^u1F$DZ?v2)U*FMjc+&Dc9y%
ztz{nyJla}r;MiG*nxdspJ6+7PflrQ<>%zwrM#}viCq=45G$fq{+rY0y!eMOSkx_7L
zKlAxf=o3Hl?NM^W>XqwKPR&J5bUF?%H~$hP*O7rkTFL#YHxTJn%)Ai)a4R{6{mF}4
z$xVsu>sE4S3^mTy$g~|F&>9Z69gmEW{rG~`vVTxNkwGg{)C4U<{(F>M3lAesw1&Ww
zMVuvY0*{TBJIM(m^FY2JT8?M4`H^UN%Gp#x4PHA&u8!X*rYX>f7`b84OCl2*HMM!s
zGa~G(74!_>5`%)a+6sDx*Jy)+p3w`+Y9kNwJSu1k_$NbrYw)ve<Z$_DUH<Vd%y(<I
zMWJ8v$hL6Y=kS88+%oWzATECu|GB#S<-IJh<Z4?v)eFryAVE%ny79q?6lliL8E7`o
zCUle^>5PUs#~oedBy;yFAA6UE%P`!<Lbwdq9V`28)nEABid;lR9-^-KB{zB;@&bg|
z1UL-11fV_8h!FnrE;%rdI#kvhWH+5LYfo_WMW5av!f!~v`0he$%w@EAd^teGFZJ}q
zgtM8{3vc~&bcCQQGilVcama^SiyViRUndd1AK|wJ-|K>Y8t9lL9&l26o@0W^&;gfa
zB{(t>UMComJOd#>I|wjcsV9{vF;ZtwAF*c0#7a{WW|1z;H029ru+kPU1<~FEIRb2@
z<#g$jiBDQ;9u96&dSM+EPDi#3n!Mahd}}Qy6GYm9d~C)FqN}oxbV{}eGscb1oJNmj
zg^=(~Qlqyhvuf^6DM@-n+9Rc~uF4v>O41>>jv}wgsK~c(6HOdTl%Ch~$0|z1Np7Cf
zPfFFMm=?w(vl3lJR)N!}rsw94#hy6Ev94mzh$nVa{IMxSXICX@yWn_5C^ZXVxWCDn
zW5x>cl2Nq3sK!OyAE8KrN~W|~IfgHY{K3BT&~%t=>hLA#unHP<SOq#9Jt1wXR&hEl
z2IrqAy(bFph6@lp!Q>_7$g;$hp3*t@tJ2qaU#K-2i5ikTkk&}^^hO%3ysbpYhdi>R
z98X#?b#x4a^}t?o$KZde7;Rw|2*In0MuPLh-<aursb=`ABjt5=5V4l=ay?^J8ph_Z
z2b88#EzfF#af`@!@~F&dxxzL{hZ*1_{bo~6&{h^z4!@}2O2B7C!S3d&^aattQ?Rgg
z$1L3kXOcsuL3x<nM}dI`RZWPQ5CZFEJc+$5V{1@pB|V@-ON$gwX|FO~X#s&!JgbXF
z?21B20O|wSYSHYjOCn~ZogEVpQ@&C^N?$15anjB()tvN)p#yZ-1P7T`86KJ7D~s;Q
z(ktR@++?)p0AX6>;0>noR+NfDD8npFM4<N3RH)JEPl(z#@OVI8r}%o%EP<Q>KKv>2
zq|b?nJO_YfRF{GH-w@<i!T1sLm6kFs4tM1U)h)pniMm6b6*ZX$quNo2<-WLDlRj=I
z-4;dF6*<!`!etSTg#ot|yMUV5*}7<w^n*yBE9hNCtpszX9d*5$f_@MRI4J0k3F{nG
z&irfT3HyIesRE^IeuV1#p0J{fab!XbP>l#|mz8g%VT#7nMf%3AuT0e;KBuN%(axkt
zDtyg)YOi0B4}}fmp}t4X)IpukL;lUZfppfRC%$gy&NJ^*0`kbwd?PrUi)zwCgHJ@b
zE7a1?rL=6bG;-kw1r2=~wR|g*YW@o~%Y!=VvxDxb%y6w+g-WL7j%%ADI#!(u)iPSt
zMz4vBkXfdXq>^pLq>={rPzmKx<!+TElS(!V&L*Zx)`)ONsHBZcX_iWQqqJ3mrWbfR
zI%u_l<661nB4b1~$UWpEhM&Gq39m=pYq_BIWHP?lXjrw@y^hIMq%JZ~?4tw=XG{Z2
z?gpw!HS}a{zA{W$T3@A-)Ioku?k<HY)o{w}0o*O6O`&NjJ<wPC{TA>Rz#_zL1zrqH
zupPe%jA=KW7chR)6g)muD(vi^mjGd5?d=>9*y`^O!=lM|K7QvS-k1$j+#JNwXx*P_
zqcz1XKpZtmf2NJd^k^dyVU5>v9u!Y*m;7;mX8GelDbFGND*4;~%<{LyK<+MEFj&5p
z;!_c4FMx!h-bFpGKht_#MRCMWbYzD885%H8w?j(EN5<5$B0+Ci+HNiu9tvMsMH+<<
z>)~B`__%PTlC)P32MX_n2WfivL0edJiIVg%(#1m>%`mhGDqzTZG6ySzuELnCa4lgj
zR)OR6uu<S6dYB5J{I=TC4_oQ9_o#isHNu-$4aSLt4nuH`!C2m$+=tC!`cA5K{PW*A
z+Y!7tQT~kO@MDRXqvIQs1LUTT`C?^aJ<D9n;|5?_{*;KuxB^=z*_V_o8$j#n96tX6
z`Mz@NX=7qs(OQ}|D*hYR(|Hxmmdj0+*I;t2VnN*pK71kYK;Xr|BY|H4o(lXb@VmgP
zfOi0I1;%Vh+6IhyowO6UGBCcln(qg^AGjv)LHb;4q#h9i`~?yQ1AhrT4EQ8)D)1@b
zM}aQ`KLLCN_&MOKz;w{yXW$ax>%cpJZvgKG{tb8^@bADhmx@;~nXY9Wp}3TX2;;Qr
zQnXY&)NDU-x(@N%04sd3cBu@h@teNhN5?D90xCLUu^F^4U?1r^(v3r@w9GI+>W6e%
z%I2NKk48u^&RtwZ*!Ap1Pq@>HPOM_wD)-?{`s&|pbdRN|tsC;_gFKBR7?)6xKj`Tz
zfL_&69)8mn3cd!|=B|%&x?$C4d=79Aq^EOs;b3Y@86d40pl4m)@lmV-SB#L`D!AeD
zL@L(aIXpT|ZUZzq4XfojCCk#}@4b-t#6%IhozNV9W1`&3JS0aONrurItHJ+{&7%%*
z>tj%1I<7X`6R*7e)do@`h_e9mLQ6iLBv*20Kk^e<a?`-jUfP)E`e@<Dp4uL3M7G?B
z{mO@B%U!S|yDS?DXvPm^%MAlt2`-wGSq~(d_2N6C<lwNWV{_mXX+lI}=5#@+?N-pf
zM?0P{75i_U_?D^2HM8W-RJo0}_m4d_R~pC5YJ2+e#F^M3ZN}%%l%L6??KnCmVr<VD
znytv4{3EU;9EYen09-rJU!6^|cNXd^_v$D0Jg2}$(Y}B6SxnasVswsseHzn#K8fuA
zdOhq4)<*{dyc_G;mGRVf<+h9SF&50>$MVVSxqAVIgc2TEAopj*B?SfY`(B7mcv|jK
zp|AhHHTwn7=S;ZV1%MR*mm@p&6FXslY4$5>mehGhe%Br6luj%_O?GhaLb+-4&xKXy
zODPyvCy32b?N!mSV!xR#XtQo?#R(^{1SleD9M0y^H}AED?+?7FP|kDyE^@=X366yZ
z!;XR!o=6YhUy|^=?CFidd%Yy@7JkCjs{e;AI^kaqjwhl0eC#OMjb&Q{NjeQEIk;5D
zs{rXZo{Xm{FN)fh|Ndn<uT_M-#`NC&k1lwE{p^KG9y~Dy+o6aV8H0_vXN%;jY&~CA
zggww4z7xoiCGzPbluLV{2TE=f$&YxWnCv&?9MiiR+WL1)EFPwm2>6?(<!V`e<`Lv9
zXomLNd}<XZJgqE_@El(<e!cvTm$@70`T*g7P)a|k9;E6DNCjw8mb|q^Zs3W>P{&H~
zu&NcmQ;G*#Vc*jOtzh0_r(B7x<*_^E>9W@VUbV9l5^&;s;fQgxDzX*4H#&m5<0voN
zDf=}!XeLEP7)e%KWRwxN|9k#$XQd90--R#j{>JC-f{n=MDcgx=zruV(`i&pj1*ekC
zZ|#zMIQ#@fxM+P^{&waAcFXM@y+yQyg^$*?29+$^Eq_4|)}|el-}e4ZlSz7!JDXR<
zb%uxJp`sZgYGXJD{MF;qH4q{dH1`@ba*<{nEnJA-5ip#;@m!~!a8SnBycvx8`0Q#;
zwq}A5pu1Eu;A8nI85%ip7*DXWo*|umBE60|r{RxCZ^^vY5$J6q?{x%E9;HOS<A@y6
zxamBz;i8R}sAt8+7;&{ZJBr4vZ6&ofl0vMwHWWv%!&IHeJ06w$IZg>-takXmWZ6*~
z1CsfOui(-QegCWDoc~Qa@Aq%%yk{7xtRWUw*m=b?nvR%#hsmadLotfqKzdmAO(~JQ
zE<Gc8qYXMbXsDH>spCiM2QrD3Izx!m*Wv4fX&8pn)&B4eZuzQ*mrnrc);J`!#-H!#
z=g}at(@x5>7d9IkWAy3g2*Hj4=G0<mU_E$?0j~qz0MHIV1ZN9^0<9f@aL7eF0MtBl
zMP9z>uCbtXIO37!gq>n>ou%!HC*@AnUT#A59Af7c(`KZPnch7kZ9L7?D4weCC*m^_
z)260P#!nGXb)!1>XL(kowP$+T0mOsVh!hVV0Q7Su;14j&6N2R@PR$%WHYcYGHg*%!
z#*fu>OakJ4l`JC`B8qNL3V&7bJukwvoulOwBn5iujl;NOlZRg@J%G^C=&YjlKx2jA
zUCujg@(Acf8=mkP(oZ6?BQ))W&D};I#s(kaw}|+s&~lpgH$vM4)B+=PY&-(ZBejB3
zJAG&9*HwH<YYQ4OpZ+W*D`#wOM&{_0F=@GJrXF*V-k+r#Y>*_@(PS5?0^5z?Bek}Z
zG_{`gMI`<v0BWFQ`l!3q0YveFA%D==oZPNs$E8hMd>YRk7w{9Ou~hF<l`_L~092hC
zvy=yv$t`PbhD|bQ8B_2gV=87myQJgDQG2{_fTxwotsF<tbWD64yR+jgK4w0FuPVd9
z+=G9JR5qXADU;jON)XxCVNrPKJ`+2>$8hABilRO8J$%3!If!-T>1X7I%|8;^Y0uor
zmsdt@=ro!A{4+h^Na5Sh$ZZ1Jh|KBZ2VKx?jNW{ZMsx3P<>3LfM3Q#yn3Cx49DnFr
z2pGzjeJh8`S$+9?-^w9zT|_zJyl*T~(csnIjB6cncU+7)YPIK%v$7wC$ogmH$0O^C
zGVyYmKJ1j};U0RpSkw^jmg?a>{KQ$zF1vE?b8<sRUs0y^aFo_O_wj^tI8QRRWY#%3
z%$qGJS$0M4FN6B>RZMKedsX3>SpjIwx(Q6<map^P%g6|%^#S|`{Ar#!$sU6Y?GtTd
z9Lq(T>sUrl^#5wPIUDp#0h<7IWhs9abYBm+0JuKzPrzA#tJ`q?n&C%FKq&wVetMNe
z{M`T{{rCkKD?sTMFxEoSZ@}KbZi<v&3HUa`n6gTDfExk-3ETu2CZFF7SOIPUO!QPR
zJ&X+Bu9f=??Uj@wk{IZT8vQ+ujTH(%15APy0+Ud*Q#2lU5pXu}65#p3%YiomzXrS)
zcpdOT;J1K(1uh1L$Cfq$hXUhtc77P}X5hT;_(5AreSqHw9tyk*n1tC4{21^7U=%JL
z21c5692mPw(pSLPO%gjvWx#;^?|{z${{s9ia4o3rd*D{UKLBISAa<H&0RPDFvgB<1
zpe?7>z`p{o1HK8o8Tc0Pm%#8&`R9PKQ=VT18l#%k0j3(X0VYBF08@c!z?9#Ez{GE~
z^}13`<)@Gxe(Ft4=kM{?9vRmG?f~2nxHIrj;BLUffx82b0`3J&qgEea4%`p;CEx+T
zuK^DP-lwM<1#1I^Hb|8s&DF;8^T2@KEIke!39JHl1?Iq1;5=Y*!^?oD0zU=(Jn$Ug
z#lUlcUj`<+mB2;7R4@$(nzz>6cqzhe05VAW+XOm1rI+&F!;cvJ*a4gfyi3oZCw{Nl
zrKxNRewP9~0387IcN}yWsH79Xn91NdHg*|-zXl!#Tn0Q6_zW-!ln)5rt>w1_ze@oa
z8uG+0J$~cyCX5BR1u%7&NMI7IHSi$d7+`9WHozFlrMAG-8)AVc0=EOs0B#SQ2~7Od
zR2_lo@k1wIp&1zFCS8zuGTOfsiS+=@kU)QHAZ&Bs*MXye-vFjs7Xv2)zXME7Qv&=T
z@K)f*fp-8u2@Iv>F9hBLOii>Om}~@HGQSA;Fz^=Oqrl{UFqCO;?tce-9N`~;$tzs}
zb^t@_{6OIAz`?*bft|p=0XG7^4NOxg@Z@Izy8{;jR|4J$>;rrdxEk<T;Of8^fxiS_
z9=eM34SryES2_dSQAWQ6jt9oYsrjRTDSkRI6%+xw+2~s92VmI%h{x}Hy)<WR?$XBY
zO`>;TyK|tn!CMa)`&4<-1k65X0OkPfySv6_Ra+3#MpaK>W3wt1zt4LhV?aflxfz~V
zX#?f~Xd`ziFm2+x?ooMFAEON<*tNZD)An8)wlh%h3cx|Yb%1XYj1({wFaxj@PzpE+
zaD(NB58{8UVJ-N&hvnds+c)Kto@~aV6G}t&C}&IwWiv`@F$E7VX7DPN6zx>cS?52t
zfQ<dO7BjaYFkY>d$CS*htR%ZLe}2MOX<DP%Su)C#(wnwLq(43trZkoP&+@nY@pi=z
zeoA$)Omjdfb@?&}l(L?00jjl6WY7!K%4(SHH3f7LXFl9u?I-v(hms@jPvj5RRYK@U
zY<0yi?mNLjv#`k4AQH2%Q)ZmCcVNX?dj}`@k?P8G%$KM6DJ@wIUg)Rz$-d`!ZHLkX
zu}A!rra*THh4aS#%4$cCbDBpR0?Vcu^?;Hxe+7GO{`^8Mr3A!9wUtSEMsw0!%XLp+
zW*uyMqAIxnV~gH6K>M%u1{Q+X*dH)nJv8<S==DRFy@DZsx#MKav0Ud@e>JxXM^)+R
z&7L}i3AGnz$>aFAaAhicgr5jkTGn{_Bq=0M>Vp}L>$bro-04&r1{8|qZfH;KnI7%5
zea=TXm6r0HME<N(3F+m90UAC=EGlUVEt%oS2%|yoh-^e7x1u;&j9PJRMWcVtuQ`>v
z@|BZ(hreeaZ&pVMmc38$?sb$F0S&&UGTK6nD6tp`MjlCgZXKl+9%62<qjX~L@H=&2
zjq$vGT_s6Q)od)TSnyG~QEd%b)>?6~M%-&=Ty&%v$B)#7eZFiawKA)tzc!C-TSXb<
zWoEUlw+xSSXFbI);JB4!=DvaVs;3NbY}b+`diRj*@enVr2UFg_%j&^sH}fj>l^}V;
z*Lc9<8N%c1D{W=(Z}`LYl@@quV{Lu3Uq_4u3>~eKIKgH;UTMp()mQS^a-Q8lX`?Lv
zM&c>O-a&jy6t-6PH&A>WhytVbQ8IX0X*oaFK&k7jbef{IJ!@+0$3&Qp`*n5@*#+LX
zAtal`2NODuk5efP9mOKAk5GdIeDRJxDzl}b(q3MZ$gk9O55ZA*M<Zx*fyhc2f!P>O
zn{iR5Cat(gnokJhc!Z}lQbs$Dih^)^hpw)Z{8%HX>mBai7}9Lzk&P8U<(<=dWk)tv
zss>yUCD1_`YmfdNUeH(>FME~o9~wiIZFr3)P-R>hRlPP={`5|hrOGz=sFc!$4K7*L
zL}}xVj&rJ&@*GUFWJ+u0Wrnt@5~FbMwz_EJdBGlcylfJqbY;s++}kK^803p<3z9aX
z0DKpXr%j|=sM_MN2)P);-H@xfS@&Nr<^BW9IAawz;+{*mzw2K7>tQqnBmy!3^8o7r
z`vE*_zr3mBg?35@51(%HNV^3obbf@NE|qJtVf;?K5=2*uB%txS@dOP`PEhLNrSQT8
z<q<hNQS37h^qWu2mj56><735{1BZthXAK-yoHcOB^J&xhX?&m3y$kFojYoG;!fFi`
z+*_qOlDiZEKa(#ufV3M<aFbdW#SaWmbb-N76qGH!jJ=ape0vvErGT&PrG)aUT@)Ws
z-T`Hew+cz}WaU#S0tewVqoRJ)n#Xljn%3|0EG5&Y8@h<&^kna2MI0T-?Cj`p?$19b
z@%deqR*nJ@U0ZZA+Sr)Rk938mn(!N4m9QFx3n-)C-8`fXZhPG5G<Ii4S1=Fc&AY)p
zj^UHLDPh<Jp5G0F^+dk58=U1Hexw_6n8<H*Q(D#<F6z|O>q8k|Cv$&Dn(HxDY}k+&
zIE(LOpnh3=Sa<OC<^|os*PE~EuJp(2{WrQJ>*YM42byssPv`*!MG0}jp-N4Ch5Vr&
zU^&f~^+1PM&X0izU$wc@LkSBQDhj?2XVmE}9BnfyiAVNS{F)a$Pqp$?8epCEzN|<e
z%h96#w23r#5v9@oRcA*t)bxFx-BW4U>nB}%<cs{E%{PQ3biB6<&CSK%A36=iq0foG
zt%8nbEqEsj*`4NPJyGS;{8mq;rSrIGM(t{H;*f=4-hw&|)Yx9{h5GH`lVJetB|f(o
zR3glQ^n0imY6~?31#A_ZRarhR2s8G>3V391l!Z)sD@`4@1Vx0<1?>l2<4^QP-e>vR
z-l7RV><yEh$Zvr*_VPkfg7(Ep8Wc~e6wb#sHNAtW?Y3!39L_wX&Wui#wiWV@xDQ2L
z6y!AiY7aEe@+J47sL%QK`;;C5TSVpqkd?pLkOjO(A5cu>aeXlOX7KDjN{~Y<!9!>x
z9=R8kEa{^>&hj1+dd4Lly1QPYhqLtX@#jcdylAY4eMK8c(t~<9(-xjssL>lwN_z=S
zS@a`Cb<A*-qqnGmRlqVmY!t9p4;vYVl^p7)^!J7{sx=hT!PX_mhAN8NT~2xvOkfYd
z-wlg3s6D>H-Sfgk#H9jgewYPJPMcn`O0A7`6@H%w*k8A+iS&Fdt6ZNYVXlbtLCApK
zyt>DPQL;Y(KM&e_?ajG5_65k7Udf}Ev+lh|R}<2Mke*`Ft7JVvOq0sZh&$Y%<fjqJ
z2QprR<L{*^QE(BLQ_;g8=k?QIrNj7OAkUFksofm>&r}>7O)n`<Q&xMCGnoj=VYre>
zX`i>gA!-OPrrakf?4n}(wEGg$Nu_pG(t8uPkw$7XrrUqxQCyt)wxTh;{yUH1YBs{Q
z27oX4Qf^Jx2Z@^;>5fDmLH8fg8{8dU!GdYtc(-8j;x^sl$g{TK!lA>sfoR18c*nYC
zHQa(!!$W%k`!C-yrwbhy;O0hgp<^k29QKw=`HFWut)qQ+o$AA@;J6nqa2*=imtE;J
zeNDSnpGAETzp(G@!YBVwcMJ{TS3kxaI`sgnw|G5XGaQ$NwE><GWWkp&;O^N#n|Wk5
zywfJ$D_dzJpT`r#QJx{)zWPq|Ma+S<t2aM2!)@SB+#QF_u(k}KDZdp*Zrt6mkC$aD
zN$ez#o(gAjlBZ8qhTvtHol}+Ja+U8(YUe1mvH#8EW-6ITeRHNVTy#aTB>NZk;o9vg
zvJqGNKo>K67}<(#Ip(?=>v7L+^>^%rdd))E%eqD-AJ0;DR1&qJebD&E*!2S}08s5R
zV7==hiO~ZL>tASd9AxdTH6iKa0f~UWacUv#U!3jWFUR3xLAs}kFTlM9<^j@w*UGY9
zlY5rC9gL%|%n;Xv(q*RBJz$5&y5oMVQFxpG((kxWp|lZhEpWXIjIZBJ_j2L-(8Ooa
zml9JV#6@2`yc4>YxZn$yd;Qf-VTqfyJHPUN!SI9T7k~69wh`vz-vE9>j2FhHZC$h>
z=Nx`h7~Y$<)x=*<C_fs%dI5I?P68eaJQz3&7#q=Izdr@I2w|k<?*UE+#%h6fQU~Xs
z#gA!-xCHznF!u6r8IQJ+m;sD!R&fay?c-saG#}fjbbva)1Mm!B+{7o%0=^G84|o*t
z;swfa&qpC1CfsUj|6u;>0;LwVuL26;aOaftEmVTMytQ=fGII6-7A*Nolt!jKk_}R?
z`A^v&*|L2+ayg4=JR1tC;7kexX<X9rMP;#v=X|Y3JTu)lfL~pq)WoyKDn;;6^LZ?g
z=L;g;pFg(?S5&O<^LFDe6e;m|QTjxY($Y~ZQnfehY5u&O*LX#_&v8XW(=&0+k6kI5
z_6qF?&F8b;fVS50civD&o5zU`Ho03V#!k?^hNJ(B4l4TRr6zdX^;bJQw;@z`Q#{HK
zkd_CpjT~`c%Drz*`TOHR`CC)Ub(AmAQP#uGF2h!hj&eO&>nPa77HO^>r7BHH@kbll
zRe>oCS12~NiH`QI>jDP=M*w5<Ty!jKp67Q)xCT9U>VY448YZ4C)C8u77PW!L0psbI
zxB(Bl?D^;vQXOD)3aK9O)4=tCG1tVGKBWAmz}STsJ+cY#TL{y$jE%s}flGnunZ{mV
zbh`Xwz>#!D8{6+vD@0(sUp(ZXo=Oin{sg9n9B!yNJ>(#bw1o?ytAY`Q{e4X%UGcLS
z#p5plJv9utJ8(T<av^9zaqDnP;JyfhMmhjYd<Wa(M<RZZj~Nd<2pDdFt{u*Q0@(01
zXa!MIl$~FQ_*CG<>y^*2qCU1>X&H1z2o)+;QFN)s9TE2A&z-4UgD=Q&bHn%>Ta5A7
zEu~~}vGSf57zezk^hcPt%kT){+uqZ6EwY+nu@6`Y*ar9va24Rw9Qp;25BYa*9#{dF
zOk<ldAvR`mG*V+8oF`6K{mo0;#QwOev!MxZ9|-u@lU{AUYs__1K|d3a^va5}-B0k>
z-gh5W(t4Y+(v9Ww6Q$Ve${)?A{Nf(N^ZR<Z^8q`RUy;!3{}A^b;89gi-}mnBy-6TI
zE+ip=00}+PLg+oAmjr?!MJb680i^^8O~Gs^ii!$S1`rhll%{|p0YwEw#exzA1w;&r
z3JM5{1^<BGZ|2;)yIDYe%lEwdWHR@hGiOel)8@3j?nZg1;48E~Z)H)3u1XJQsTk@C
z!THd6+8sJ?Bg#edhzuJxbV9c8L@7_n`a*op3}gCPIC;8I3XPA6DIUDftF0A}(K!5$
z8zwifse_?Ou&J#0Lf$Cr*QiKz4HAX^VELV}^t#GuZ)%_EC1m#3-^X?ybpQ83&zf#|
zL3;|N$kpAQ|CMQmyF4YknelGJtP}2Wn~`p_WT%qDN+x3O64E2l$w5+dZN!9elX7Gu
z5l_RVYkDx79hyjBJrLNhVx9)2ZaC8AoXf+ZAVicsWgzekLQ_^Eo&i6Pi1{0>iq=DW
z7XElYR7>p0DFr`#LEXmyzFdm&N5!FnSRAtnn214+OySm4wPlCT7IE;Ekfpf8jq?;i
zt8F}-&DbX8%VyqwlWcFl>1e<ceytS}+^syOeRpw^y?2e8I@FgTTx8ehQF^M?9qCmb
zOi4dy+GJtC{nCx|XSukSwew5Y?h|vBg_~32YITKwiG3(AhuBN^MFJV8jtAkcSH3qj
z<2_7PaQhWqz^p~x%gWbH(b=gOd=<N#H97_Zo(TP(ey`mfx$d=1Q=<+y9`~Eoa^V|8
zl8fEcdb4><@f+QDZ@_Tv&_!%6&3OVhaoGd6OumDGFLn3LsBkxa4ApHMa{cwQ3r`^W
ze>c}Udko`NXRhVl<HpR4=UOt{Jh&WelNRD?`x{@C@64=%&PoN}wEcnJm2bMuOGidW
z=ria4IC#*&0ju6_S3Yv*!6B$>fkRBLN<56L4O;Fe<^ozvbwnM;H@k5UIUT^iqiDVv
z^U`k#+sx9AWdqp`a00k8%taXdNe6Ng7F_+m0d50)3Yfdl?|^#&e-GRj_zZ9sFgi2o
zL4O412+c<X*4Yty2aN|uh&!a&!2E+>**r5CI1%PdVDwE`Tt^St79TrckqW#Mm~#Xo
zDFL9<@{WNSsq{_*<}vKIz!QPb0p|j91SSJt0_L$Ts=SPCF)q+%!CW2~Ip#&Tk3E)Z
zEc__&0GJ==!S7UjJOR8Mco8tpT4_swcLOg4{s5TiJ_YvT?=#?hU~G)ho(BE{cqK57
zOo=%XIj|1q1mN|++<k8VZU(#&xIOSD;B??f3?#hRSE6kO?gzXDSk0*qVorSy+jKF>
z#j6vA?qS&IJigG~paLo=T8LY;YN(NpcG!c&{~1M<3td%d<>#e5R8;8p=#d5A7P>zU
z4LcXbKCl%-tsxvdhEedJbnY0hS|UvDba@G3E_$5c!Mig~{io@05*myD!X(m}LsrrL
zKp#*&G<Z`vI#P~I{jI4bJWGl&G5HgN8ca;cJ0m)l{wQ+CCt4q}O%@g*dEcym<wa5Y
zpAGnvQcpq^kU?8dx_em@#21g;)dJ!Kvc7hY!1;h_U%NX*-7Bv56yrCM3cq$gsDDob
zzX8R0p-3%8@jY$*#y!i`@BqF490ofXr`&N-%?@zv>K3DGNcpGSwaRuBCv^iD&nZ$5
zP|xShs(K6h@)SyW7<TfxJG99cDjgRAsBM(@i8;izSud@9tMkS`f0BnOS~LRcvb<rm
z=UeF01FgeR{KM#Suyur3H~bE&=ErE@ckYQ%Ek5Fix6{Khe&yi=$M69<`JFq@D)%uv
z`Oj%irSadp-;J6n)|{n{n}Z!<W>_;R{j~cLX!MSrcDGA>QYe<A_V8s4{&>$4dw)^Q
zqSQ0)S}}pv9B*ZQNn7%rJKhR(<k-)mt?;9-qoZfAPXJf3_58toyVc|%M^$ZF=J+(B
zoj<rAaWy?iV>g*iaX<ZxAEAP7DpYDY6C$WJrL{l08@ORjy;kW_6#s%5sV^q|EEaKY
zrKq!R(%+^R&O%4|0eyMa-74xUVbGvhqCUW7aPD_;Bl553-0fWD3+a~4W-^SU&Z8!m
zr>D-l3tYDr()(M?WY@$(y0*oP2J=-vxog!JSje%?^0DJM-70qeCD&W&97tpH#X14Q
zF8`?*&MPe4TyWQN-GKnMnpIsx3+en;vqIS%Ax{Bkb*gB1Ays(EyqDrHy5sd-lzP$K
zs>TOGbHx`rw<){q&>hLzMXN3%q8mTqi26h3hz62!Om@+^i%|Mw9rtHsb)YL3+b(MH
zGeq=L;lD}-BfMsGH;w<<T`ekC_?OgwL*3?aopM_Lv*`V<{_M_+%KOwQ;N+%Yhe#V1
z?g#wEeTTlh;M_0na$#uU^sDYJsM_7Gx`(3vuD=RJVMEfdp*0v*F#Z~f5dymEFZUF^
z>Ec*Vp8mn&WuA6=`Nc;){ZS$fb<czPu7aJqr<E>K`S5-i{sXwlyje5J8bsDI^BsA8
zLCe$o9AH-Bdx3A%j0VqF7J-iS_G^H-S>4qS-Os;nTNBVLqDjSJO_{6wtqK6$f-_e+
zgut!<YWIgC5HK1rAFvH@s6XoUANYtHfJrJK6EGc6@0Bu(f6eJb=U>(E1~w05etYQr
zxjsLX-#Y!hU-~usruVnR_nfNh>sOZbbfql?7$N#D4TUOt-9lVrN-NvI`ruZmz?*L<
zsjk!7@@@zQcX$x!e|FxS-xA2=7maRmQL88nm+%O-*egS{vQnA2yc7u5zR4_TNH<_V
zK6u#P7{I??C{(T`w*iL%BTKw>fo}(H1kCem_MC))&unzESB9dm%S5IFasVl?+g}?J
z1y}?#pBfI>W|vp&`JR=iRD6h<_u26tyHkK0odLZBd*|S^v$Zbp{O7;lPS^cr`|199
zJKZ2eHXe0=&wg$~%4qs&ckP0RlAhzGWXR#n^@Hot|FW*y8D<2IGD-!UT|>mY`}NvB
ziR5P&3X;~BjMwE#xBo2}RJY@@LfUtQaM!<)NfAfBd3On(u~3@yO%Iz%#pX)#2E;lI
z>kjple9psn5o8USuCca?f7Mz3@I!;-g{a{$klKfQ3oz3`R%Idp7TyGy8vx@Q`xw-G
zAQ_nFr;ufDJ77%XyxoAC@x);WK2RxSLcofq@i2qfi!r$n&H#od?<2q|z&`{30T_;f
z9pcGXAxMPd@O>7*zkJX|0zU+t0*pj?djmfPjK)cO9QaP)g&2j%zTd^LSd!!!RWK^v
zQ_X)oatj)x{{zt~S4_0hkmdUZ6jV<1EDv$TSEN^yJXKve73oBhr?P8lYr4kf)|F}b
zvIyL@(a=*nWYI*<nun;&vWOU()zFiO7v^4Q=(z`X(#9rx8t8@8E!i_m|BwzOdq!X%
zRg*@ZNAz_Cdm4FgTyYUCYwa1T7ZzM@4N?##w)HgAj}&CJ^_15^HLabehyQ|C6*){m
z&{j1E3;;|A>_g54u2l0PQddAOfGa0=Xr}?rDl{wH1^`%JJs;pG-6Fv!a88g3GZvTp
zCqcM~iU9vY!+*p10c#0YBEtItX8|#2Y*PUE6j56WF*%s3MC}s3S@j&D(d1r6>AZ?C
zj{qH-T|WLKbJ5YjXuoAD2|H-BupJs0V(f)DYBPZ!1)dH33^0r0YTyOH1;7YT4h1X&
zJ_PfVz?g{3GRzmisFM;e)UDFmzV<3&<>9b}tPG=E9`Y3GDOE4QSC)9V&)7B$X#|`C
zaDSl<SA9eTa0)yH-?`y*5jW}@g1ZH0mo>VCJlSH+`?3DJ+pKw{@pn6{xl5?dkp>I^
z@KtS#0Ci@y`F6mvm;2q((v9{mvmm0C$Ez32?ds_pii;7y%<%NLc1Lq|@obkjrywt>
zQ*TcnJ)M^I_T1BOthj30kLdV#JLC-8?Cj5YAjrJ1pvkSC5n*8d)E%CU#g?!3K>`BX
zjkH03FZnF>fqQfVB|dN|m4*E#f?iAds7wU(cSE(@K?Uard46!~({W{pr;$Frpj4J8
zFBHZ{M|q$xcBxr>VBDP=8(dpC&Q2}mVdl-^e3SUzs7tL<tli@U>s?6Cs4|C}9gQ5b
zXx~!{?;o0clPl=sZ~RPU;1F@Q^mQ3rRnVWH8fzT^JQjn*$v2SJ4|RqyhFV?^@Lj;w
zfJXtN-x4#L16elh06ZD?+$C|H*HRSUbbK=3P~bEG|4`5KyruBLs-Ae@bvEPm!05_E
zwZntGmtjr;ehs)i@aw=FKs;c60q>es-y8scb3ezwS8O_tNJffsW4``~P`?>QS$P-<
z_HvL2Ix{bd4i`m0zXjYFI03jhFn8eWeg&8tjGU=z@Hr2d2bb%BQ-FgT4h?eQG#>jw
za|*!YOb--qIKGir?ED6|B(>pi3WHH+kl7LDfEMLno!J-wKT`oWJG<dDDodxK-Ry{R
z84mCQg71yk1||mq7XWehB6asJNIkYB$8!WXoFC2cG_oF8!ddHI!&*g^#(S#aa_rRc
zo;&olxO&FZ%-R%0^sXY>%tRe1ZGtBTGym)fp!_6=@*KS|0qhP_;RLYzh3ZX2n12Ql
zMbY?)AiB!zTG7FYo_L%;xCqMVD#WoX{sclS$&Im;k?R?NnbG=Obn8iUG}lw7#^j|;
zz7c1u=F0q-8^x!Vv0WLM)dAaqlsL&#ORq=0Cwa<Yk9+ncPlFmA#p4w{6vy&-V$Ir}
zyOy)ZP_(Y?t%Eq3+l_X@qm?G4d=08<bff4ZGs+-ivZq>;uf&x-sXKT~3B9F$E3^X4
znD-zm#=KI~-YNp)bQ%n=`ccZC>?voZ*uk>vA-G~^=w!sV6CIfBsng_B$&d+{VyXR;
zF-YOd5+>iYt@bDeaySAGgQ(sVPsJJ~g^V>#_8Da{??>h6VcY52b{aFqL#~wNRQmf8
z(G)k;lcL8J44#UCR9K#r8t=Fse;F?TWaN7qhpjDcx03BXu<dFqx(jUgOL4lCnV!Zd
z;z=_-UG$`a*Jpa%uDk&lIOhW90|J*lxeX6)zc+z~M_s%x&`~Y$2+UC_9Kv^S`yQxH
z*aiQ)QLlLk^h12hGU#!9{C66CUi!QMzkk<H@u<vEDTEG1g9FF_Oa-g}>>i3#(#m75
z>a_beym#Vkf0O6?cu7XRTZ|n(UFGp&il=}VjE}E&$5QY2^d=G4=^36*aoy=m<DfEF
zgk3<q<4w(;@FWRlPndo!DdAdo<IB2vHR+~X-qc&E7Z!slZ;Yd)I5Q^TvE$(93uU3G
z*xrX<(bz9VqtEB-xKBW1rfurP)lT&T?1mPVCv5V21?MSIw0Nvuwf6<U?5{Rpx9zVq
zzS$pG;?)cFdm427^^6OhmJ`syzX|9AJD?BkfDYIJbp?}Cw!cdF=72DU@vT<2Kr0L*
zE)!74rCR}Wgjxeb1n|foR(3d6ZDGL?*#hu%Rw;f0-^{Qa@EQRB27(Tq4R3#tHL7f2
z4w$>~>cBi)M{g&?b@X<)=S3YHy$84r%shODzCvDD?#1RjSPZ~NF7Pnmslf0g<N9&H
z?2hS{OgX0k&xCmfFi$=4tvw9vWcfb3`$h%0F3YEYE)4d}pXoVaOOen#V2#g1vjvy;
zJhs5^Ev)Xw0>1;yIrJ`Y0`U95wSckQB#UGRfcwGx3-Czb!@!e)KL@@KxCj`Lm2=oU
z$L4d|%YgaDzb83;d?0%@EKUQj13m-H#qcBWZ@_1P{{TJ+Y$9@nz}Re~;gG1DK)nLY
zi)L4Wu@^>jK{l}WMGFDOd0nkEaC6`?z`cQS2;4guxH51y_P)qr{JUXM0~Wc!HG%I3
zt`9s5xC!u+z}WZVebF}W0_Jnt$Q7*}FfYEP0Dl790T`*5^VwemcZc~Da1UTaR_g=o
zMve~v4$s5KKzu|44+5?TJQ$eEh>PQxU8a@s%|#Sg{ct>1%9JBe2JRdI_Q$p2L;M3d
zza=dG0&B%&&@mrWbFYloy!faIi&?<Uf%yP8s*Lt1Ff*JAPNCft!$%N7j!0eDkLD2Y
zI|iYu0DK&nYr|K-9f5xZ<^k$e;Gw{Xk;sM%csh(8+_d4nA0H@Jc~3k9cp=Q;z}tb#
z1HS_t1AGv;0x(_&8tNWHchC2X)qCP9GYnvInlS6_*l(h`(h*c?fv2L?Q>^{#Ha)4^
z0(9&+FB0q;4&FU!*#e9wCX1&n!mBK-@20|F>Ik8}%G7~Dpz#EgCQk%e$B6Y9SnJDa
z@e>|?-)IjLWi{nkhhrqt-kOSW!HabH3FtniQ>TTXolj#HLL<_J<}CEou~rFf8yOhz
zJ>y+y&q5@n3w^r~<G3-Dx(H*Q2kEv&o+(i^n=xNrR7yr#)S5$0IthxX=E6qHo-kqD
z1jp%P3R~=X9!I^lF2=~M7M)z|Y38ccj7n#Dniga%@x+JeJqyM^<0+%VF@Ghdaf33|
zH10-20cRmLa45j53_KLL(J&xznrH8Tx?!(B6=AM~5M#PG==w(i63~ERmc^?Hf7k8v
zly4CjZ3E=yjvJ%l$D_27kmL3<IoI0IYpaadl3eip#3F(>oODcKLFX+rp}<uE*e)|{
z)Nt<AaPq`{TP~C96n<!uaq3BSw8}*_+)hJoY0KQ`I9>;9Yn6$nYiHfj6?tEba^>sB
zQ|0Nw6+%6j+94hxtHp2!ZGP6M)aq$5@V))cW#?l?4IeHqF^X@Nzc+-2ch#r`v<+g8
zq>$A{rFsX%Pyv_L@MZLBRILd7@7MhYT%5~((j?i=Wj|%2?Oj56V4!E#81#H9`7Q0_
z7a43LGf`-;r03F}Eswf9OC^8vY9ls}nec^2Y4T2iV#5Pq&Iv`>9~b<%B$uj!iH^JQ
zJB-QcY+ukM(dPbG(bg0*!qAc>kYSQHP8jOGr-nk44*rR=(A)46I-GXif~A=70V9>6
zc&1_4$PWDKf%3!|T2mO`Eq+G%{P1BeE=H9Y6;LwlI$6CyQ`;9oenL)Nw5`IRy=#oJ
z1DS@~b``#&nG16T@O+w6gUwPM#*N9znvktncbW>`sn)IVS54xRf;ZM&P%f)C2d~&=
zmf_p&#buTWK6aTE%WifX2{+DOzWgdMdwEA#PJllr3)vnc2Og!sk3W~#-dl`*6yLFx
zjPk?%+t+zWMmDyv21*j3sX$WMJjXUa2Q$~F&bB~}4g~}JHUqdQ7_S4(3Zcg%Ikl~L
z>hhdn4dcw=Dyv-DBXjuPN;UgBEYz8>=^|2<fJOw51x?Bg5J47*9>`qfU!BxTgL%97
zU5pp&OnUb@qtYFGFW1Xrxm23~DQAz|cqotMMJhM|JQ*<QuDh})@E1p<Cusf<e|;bw
zQMMgkI2XIyIC9kMdDM>+Vxe})@uXJ~sJA1kF*m%Tz^`&JQ<iD>Y)(yAoRU*a`Bebr
zXQ-Xxbn6r<<{`9<>Nd6+d>LDuBRcDlu`G66u-LgtAM`Y16Jv#GL+GuzrRa&Qz1?zy
zpfj4!tMG6QAlI?^<c*iLdx$UYb{aP0h;nn80<7drHJ2=VU17(0)Jrd32<(;tH;#(P
zSU>><ip5AC#^RR@@Mwq<Ym!z&F480dUixaCQNtD3#3~1M30HSy&=esIksMFFT@3yl
zSC0P5^_QL4+DPn(6l6K_OKqF9D_1~E<}`@v=CbM~K#U&`F8Igo<0FWtSH*Ma&|wpD
zJB^(*Mx}EiUl1o{kzmsK^+t3V7B1-_gg%@*qD+YZUvI~CL*=5=n5w$U0ZFA2Pq_L9
zZ4~ETTf0)R>WqIK0Ukg*z%QVgC-h#bI~^OBwrnswb=arBspg1s4AB(f=R{5Us!LyO
zF`_8vkl|^~jkB_C2nlfB-SS_h$3D63RW!{YM{TVqkty8fQN!uck&^7Lpdg7+UCP^N
z#FlzX45dWuxFpo3)~a?%a8&*G{1uNXw?sUqb(%Y;D`EQ-*mH#8*LT++XWsJ?8Wu+o
zF*d($n>WMEnb}gy<`adONAhG6zk31P=kd&KE{G0_2d@-ym+ah*S-IJr$4wZMm5c0C
z1AxH1E#3_(zg@Ld&E?P6JdUPv@ppQ^Nhm!fpC3S|Tv{9yNTr0hfjB2LgT&1Fu6U?E
z&*{HZ)RZ4?;eHT5w}~H~F~LtVn5#}twVx^=h#Ug<cY2Ds>d<m1Ycr1CjDEog^>uiG
z?kq>#@+?Kfx$uLtiCZ}C@V=MGG{i<if1PwXycC}#IE}}RIy^4v0SMzp5|9ad&N`MJ
zC+nDgB<vktPWG@pcaBcAWBNXLEm%4`PL8kTvN^Hnl%B)of+-UMVRK{K{1VJcl$0b@
zXYs+0TR@r10sjuMP7``Bz4qcijGh!>5*E*S0jjOVsnxZ#4t=NP_ymH5NYT}h!sV<V
zF9FmLKTg@EbwCQT{6aKRXd3nKNs(d_C!RRfGICOWga&P<mMxy@ikU}F$`4=R*w$KE
zo|bPmDkN|{zf<Tcpdwyh!0{;H>=e$wphmaYh=vQsU&Z}NX8RQQbqdT$7Ta?#6j)wM
zwm=7;Mn|`J>O}SF!72JhL^}#}9zYt!ZuL9^b>Ggdm?hmxXSQO3*%gyp=bjO6kh@aU
zOP)qeuJ&YWzGy=!oyQB=c)Ydqg^SxiRyu#(OL=9r@*kM|4Wt<_d8$AO`_xOG{t0|W
zMn{Li)SQ>!3dfpg{?snZX%zLcr%K`z!nlt+RO{;Iz<B?evb$C6m^sI59t{S0jUwTa
zj7#6*g)<kwOF}v!(_J2Ll%-WKV@7>mIQMe#jGCkS1bzFmr$N0-!r^PAZ3$FjEqp0l
zE#yzRdT6~|91-TUlv1~0%BkYhTjIkr-783JE{)penPNpq9;gfP`Mgedy130##i}pX
z{H&EqQQ3loS3DuQj_X;rdm2RjB+hEI?Uc?L8nfL~t87`+9$s5;eE!;t9y{#mUT|T%
zXJ4qER`AN3%%^L?>$^Q6F7SJMujfAB@wdiV*DJOcq39ijHqT&B!yLG!z-#yyDgSQv
zx>C(tub;SvU`oQX)_+o1^UO635gZMe2gpa!tx`h?S&nCo+S4%q49rbPnzWi~khDcF
zz68m-NEYd_z<lYMY*#N0i;1vcRo`UbI>4wOvP?Gxm>I_d)Zj%8YM3$pd`pCTY`RR?
zss3G!GnkGCZG8d!dlz&Zf%k#s@Xm+$U|~E2ToIVftnfMrTm!fexEb&#z&v1KI*w2Z
zK=moA3+8}OBEF4pCqWkh^Ebe?fxiXr0DK0RyXqf+Wz{wX2Gu^P><eb}D+tSg=6B#)
zz?XqJhgh+d0(=eFiO`BUDpw}KKA2BQ=mD5dDPZPPh7bR>#YZ_<WCKS6JB%V9P>fPx
zAIzu{=t{tz4+mBOP5|cl0Uriz3S1MI`LHYuu=yn8oB2T8WcUY!!+=nLb6F(8LnLr>
z;27X!U@nSgz#LPy=M=NqiQqh&@#C-$j$kLyan^SM=B&rK!h0()#w1>doQ4^Z@X29A
z+=D8mx8a)^l>{CR;2#9i%NX(?%FcLT7FIA7O?-7=NjY$1*dwx{!({g^Fy8{)8yK?$
zIUG_Scq+p1d=DOgyc9RG2_K^Ya{=oB`v66ND}eaZ5H!Fbz{nbEk(w8%XTy9PJ!%TR
zd69Z0%)0<<5c^FqZv*Dh_Ho$pW=Ott@CtvUDmxbs%B6#m5Xahpkp?uafVqIBfab94
z3*bfU9AI9=o)7FSWb;LYH(SWw1iIt!*EIwY2ONa?#+MKx{{wF!Wc3+uk2wnuf47hw
zin>!B&=rsiSPIw=;3vW3euQ7ZNWeV67Qk_U=PVKmDE8Z$(IaHZL3F+eI7EWZw>Csp
z$NEa(2K)qyEyTv>xwNd%(;)E)p}3-N)BUegAiuS_Vz0KKBL&)Ax>)F`mN*3Yu46Bt
z8%wm6>}A@raPB1b21akHPu)&!KJk=~>4042O*~HD`Jr&#i<dx7hD4J06OX6HdhuBw
z`j<++ezc3&-6>rU_kN$#`cKeVH52RFdI7YSPS@Xo&VAyk)uf38<X^acOGsOzZ>=Mw
zd=JoHV#gy1)p2g6Zl8L3)p$|j)Il$WGrJDgi$dB*;(ZByY&^aGDK;M+ChL%=T!?bD
z=)UcqSSv$9_IHz?(`|=5ui<2cbr@rtMU;BjQ_l6+tu%JKr>0d9M7+G<(ZilvVJNOw
zPGINx+=4b=d8X*_{7TXPeP`PIUw5Xbv#_LbC*%>JYvp)y?Z4ZcrKc)D-3>pn+t}gR
zJibTXwy?Z#K=YwZL5IcWSpjAq&^NUw+I4xbjUTUz@Q;npVn9Dn=+)G)^XQ3~_VklS
zsiQSIal(kk?cYk5rRew+Ra<K{{P7$qke=yy_`Fc~NdiWvaKw0|{C5dhVMmymasr+S
zFb5`ppQh(uU;;M5&pMR=dh;tI@eU`p%2Le`-5BifR1~q5DFM^4cFm#T6!C-Dc8dBq
zIpCydvp+@uTqHb}j{qwUlqYfiKwDTl?38g}oYL0<%$!n3(HP0YNc_0J<A#Le))Y+l
zRi)pGj9WrK5cizBmYg)=YB;g{SeTUrV=f0>J0@m+OtowTYba8rFgVyFcCgHbgY9lh
z$Vkb9;=%5QR22!mmo}d?5=SnECmk=BjzXL`C{_q50=0pK^;y5!LGw}CH^4yG@G!1T
z&lw*`&&QJ;`ddK12lQMJG_IW$?@Qc|!qJ~N$A^=+VUjp`uMoaDjyO{+ab!E5i*Y>g
zKB66u7S<&@6F9uUypNOcRi1N9IIF`^SFTj0^IsdcxbBUn3f~wtO%`=`HL7ogCUo}Y
zY&bf8*vq?wfjKM7FR*9LAQxzDzy$=v&zoV~q0};zlw!r!aP(PiEhV-iJCCy8Pv)Ff
z9^&co^F~4|&VH`N%CaUZhT{oh2VeXU2J3i6jT<XPmEmJhlkPcX#Nzh;REuZA4g=2r
zi;{k*KO{O8o#?2#T}9$s(I->2ie<YJc@<&SgmJc?D9hPS@!uNJdGZPd?0i)tD307v
zbcH)g27ZQ5X)h}4KJw}=%WYjdcR7k|Zfu()F+L2;ZGMT4e}TE}j=@f?jw-w9&2Npw
zp69^@Z`t_N;*KyYqIhZNs!GdoYj*CW31ic;$BxJysf3<|SkcwQc&@F5qs=|PGipXL
z4;GAGLHVSb4Q?8e5HAk7m+lXl)6IFsq4m;(Lu+9TveW57Iyv3LgVOD&Qm@$Qt_xlm
zHEGoWWgsBA)j9--S$1xpk>jTHA2oLPIOkPoZUC1`<)4_FH6eFWPQVi?@nFKWY$=+M
z{!-i=UAAQ(z4YQfIC?z#MILC+z@3Z5S&v)C%^7|y{9Fk@rbQrnE347_-y4-(AC{*d
zzBejJ25uA{(LO=TkdT@C4x9Ujwh*XC=>1jBX+x8E-5<X?01mAtU<G2dU;KONnV}I0
zd0b=W!IFE24!A3G)Ud44eX@u7Y7AF7<sm`I1DNm%U?1GKAnS54JGIcE<3^7qSl5(h
zm)%DO@Tm#+lZZSTIa~qeJ#EDK#OBkWR4cLJYQti~5#=5wIHErB0S#B1z{nL}^Kc^6
z!YZ<Jkt4#n`i*TaZ&xK%W(hc9&eLA}(f|-obWxb=qbs^(XJf4&j847>U0C9DoO&sR
zNm3n@Jtv*Vo^{0Dg^HbSHAcCN1k*ki8iKN>#~o>gyTeL$FiV8H47z;KsFGYw$aw{%
zt)(ME+#NtSAndqyl@fQ=-EdD=r#jtUXjG2n5i)0#CGHInfu0tNE-URyr@$jx$Z%O%
z8cFFt8s(cM2qEHZr`oAVIY>DF{&cHEUjpX-H$~{Zw0mcCOiPS`RquxM^vIqtHhVPg
zCw3ZuK0?VaJ*Qf%!Oj%(QMVHvRkED(>2@4q?aHkvF=w&O_uKA{c}0-<)gbecAoFiQ
z=ExdcMwpm743oveZ_WrZkM)`9+E1Qi`a5*=f~Qf`0U3r*ftt^MF8Q5;go~aJbiFg3
z`x!H*Ui~<zam8j%ovGe0*l{wF2L9r?Pv1+2f5D{Wpb&l)63A;W#a_a$lMgBN5(fW!
z`%&pnOU2OGOP<nJcz@>mY=|RXFVdrz!1J?$otHdMmeAiRxa}{`PcFow6C_ICTkxgJ
zDCrip+U`3sYjyn}sMQjn)^aw4{2SHUaYXbAAZD0SKzJaH+^q9&h)ZcmGJ$zWd?zrP
z25kiJ&%n(An;^j~@QDCcV0c)q<}f&>YhKvPTem!ZH~}*=z}_$)gCjNr?ot#3zkl8U
z2Wxq$1hqmQ6k$G>Z2uYjZTsgwnEi*aTBRd^f6pSOoT4?roPtfj5O?i4VD2O~0CRi2
z5tw@mcIT9(0IFA2DdPw;V*?n{EWc3D@ka?@j#xM_M=Sz3j|XzN_SeU#_1`dR4?2)*
z9f6rqXJBT;JLi~B4`3&&``UbNlv6Y8oSMmMHD)v$7R+d_omE^`Ie)|8F`L0cVD`V*
z_P;E^znYk-c~NjtJ!AV{!)i5V@SM$HLjVIc54tgftu}-0!0dmA?H~IMeL1ktP5>8O
za0Ko~PH|C>9*GYw>#@LGc6jQ>$3V>o)Wjp0!Bm^Uy|(}9w*P$Fe+qbA2`oygXPOoB
zIIeR+!kNvx2f5Uo-GdqURzqGjM&uxQywnlEzn4MJapv8`9Oc)6Sv(7XIgPIYvv}?T
zX7OZqmImglMun(tKEY>80RNr<Av0bC%#2q6Gvg(|%y>C4Gky}78MC`1X1UVgs!AA~
zM{6~;n|K{?S@_$C@V(XXu?ZH;petad&43v*0~5G4fPeR(G&y2pfH`8=bt(}X2h0(h
z2n_FH@1#_XchmqYI0xUuE?!~y0WdTC6qr+b0GJsT0w(}td#a?E-I;L;Kn*R_kb@Zq
z*6{n0AddJPU{2|S!0mt^2JQ?z519GnK|O4D*Qy(5Mw}ycf%$h1S<8&h1G9`=0Imsq
z5x67p&%k#A{{rj<z6AU<@UOtTfPVu%2K+nlAHaVAM}zle;Kr!!@(>m;7ykzqIlxzd
zQJdu<tWChzV16I?FW{emp$_z#F0^gH)q!yVx7>Kc4`iXG*F3=Y0~^4Q3(W+66*vU=
z2yiGcBBg}^N4nA4mcU1{8@g-Y4B&9!vB0H(=K+@n-T+(%_+#J*U>@jTXN_bbOLJ=o
z3&-t7!n_X=XPJ2in2Y!$U@qeQz!kZ*{}3OXr3Zjn=$ODM<Iy(b8@1w>AqyavC)Bwb
z^FC@WA|3v@8oO*h&G9|k=EE|@Ma&W6ZjQs(VnE0i6@WR0aljm4LI8u6H(>xWdAExF
z^Zsb|f1B<9cH2KEYM1Sw+YgQaN`O}gBz$czq!DKA9bis5d)Rkl4@{<g4#teXu>;@~
zerVHI!FON^neI#Q4FxU&<{UZ+%tcVd_20?rvo?cxI5302>cq=MVX*%T7~^q`_d#<?
z-N3wKl+DbCBNC3W1iWj(Sq&@Mq0Tu~8xd#!x8&i2v$_E=s+KnVKUhit%mb_e>;jwu
z@b#|ofR2FSfcbzefP;VwfJnqJ1u&Z7do<~Qo2`e;1I;?XAwW%MafBCBW&_p$K8!<n
zaE!l(<ph|oxanFM<1@S%aX7-LW4$_r6R1wQ@vP-=fn^zwgu?l$SVO&~?HP{2MixpK
zxFcXBU_K!5JQ5$}*$3{C9^?lg7vL=0uERH9X>thoG$3&4Ht_t>ZWL0`8Ph)h=8j>}
zHuHI-zuR=|oHN=soUXm%dV(_QV_+TesyVeFZkK1OyCAKKF;91eRi|U|MsjqS>YOW&
z7aPQfRbN>7mCBToV06`=riT)YMm3%jvR=6Q$Uhx;TI~GW-JhnD3C8pKA9zc~Xr^DI
zJynfzu0LuxyppJTH6un}N^Pnc4fM@4rkYVJ>OW{f_|APdHrDZYpLZ1Or}fp04z3I7
z^5RK)`C1~Fyw5KQ5`~x=RvEutL&q-MZ`YXZ_)=`Y9q%15to?Lbb)yT8Q$1DPXcbjc
ze3UDe_82->-KY~)MV#9EoVXgsP`w&P9lc(`z#7J|FwVg@>KiX2hsHE8en+yilZ-c!
zyc!LS#}UFS4UPNqSeQ<se(=O(N48&(4&i$=i|7r6$=T7)`vCq20>$z))OniZW~Z$G
zO9FKXk&e6zH3ZNXkb74_=|;weQmz!-SD#`e>*MKMiqQxsOA}I!R=Bfsa4HIVB|Vyo
zTw6&y8SSHQ;pWQesV+YMrjb*&yoKDbPWa5~;`1+jW>xk~<F`{6pMOgE?T!9;(&1=(
z1b&K2bujAG;H6nz;~UV_h89!I_u>YQ`XXn1rePrs?qF22YKW6kp|bcbqy-&}TJ6Kc
zx`IuOgDqlKzPb5WC}yPu5pH7UEi`KB%+;9w>|j)g>MHj9)Ui5x$$ooMVn?GEuG{L}
z(I|&QCD|Q~{;9vKv}y@umCnW`9?_lI=21cBDM990bhRV$dj%dTG3vNhG?r7c1v#CJ
zxG<E=qdkogD1={o8sm^h*}aTs!dBP}qSW<w@fn7CLTA$m2rS?@fQ57}mcN}A?+@5H
zt)3&OH)`#?XyKgG^nr&e`6&Vj=FQ|XPa7Tr!2<l#(TebqhzKABkO>$zGJ8~@5^3vl
z767%QiPd~hh`9_jj;U6EnENhe_j}!OS`F-~$kvCuvhb{9-s4i<taaKU9%^g6T+0`_
z=7@*?xGLbB)^rzd&~I<0pj8+l_9LJuX=_bF$}r&@mW9|1%TXFF4m@3GqM)Brf!~Gg
zGdji_HE;#-e!QLdvaZ7IU(pv33m%>q0lQ4`Jl0Jq+i=;Yx4r3!C@pa-0hv;7VAG=Z
zX7$?l3fBaTQB<;7(oTq7D3Tqt?KRI)Wpxz_UY2%Fd5oIaaom)#qq8Sxk4ErZgD_9f
zdJEm7STLE0sI|3DaR0B3#leh;5|2!n_$_WPq=gq$WcPOo?=EcrE;|cS6tn}J8g({f
z+jHjfz+{KYAa`x7q;%6>#SoAm=T^){`k9?#W;0s^GLwhBn-+=x9<E3nstfdc#OF8v
zBlwNXM?36~i1_XPz9ek2UPkMK?oDYlXGsRvN4AwjcKYMeHPTL4=*(%%j}Ht6^P8j!
z2Mgt=CG)r#ZpLqo!>`25K@z{(*YRsQK=`%RD`_P$ME1u8C1Gds%r=}a`Gc<a+d}+~
z)vIW2iuoPn`1Q=>JdK=;G6+gX(<1TvFs9Dqisiw6zu&cz2P?1h+j*7v-G&C3AS^0R
zoeCA0hQ<&gB`C8>a~gOK78H-X5>Ikv*@A6$s*-Sis><k+cn!g60hYzZ$V+~>olgt!
zBks<#;?an2c|Z&x7T`SaUIE|y<T(2Ygim00c)&hH7Ad;A7Nkb`AA(OppUw}#^O)iz
zi65UsXwn4HfSqN{hr8O&n>{sWg1S3+DeMx^K*%|SoJqM%%ewuQu&;)1thHb%tlQWQ
zBeNz<gqha`*7^L5#<u@Fbr7o8q+IdNYj9h9o|!<o^NO0oh9k2BG~6=bkOSHz6&W5$
zaSQ@qMQ7Sf*m2G7rK_6FD(tdSn0VVY4-UNIlWZ?2N5p~}Ke(fm6|NV9(Y@q|cj#Rc
zva^S(cp+EXT8Z$FvV)DfUz;U^wdW^=UR~M1&870M286ucJZc5|!-dyQdsh=h9*O|h
zVu-|7Pp<>!?{DyZ$~LpTt&We%Q;XwVTF*nng&=q|X_uB^J?pn?46ahDkNNF<Bg}{V
zcAS4gcdNur_%(CvBK&rZ*e-e`a+ZB?2H2NjW9?RDrt|LnDLXS6aW}#l%0I6JS}ZfG
z-!*D%b&i#<svzyYv_{NCdH?UHAs`7o)qURAC1FPe1e^j0PQ$XBq=EC~AJXt|%j;j2
z!{4Ss?TZhOL!5Yek1|Bu<RUArK7@L-6smXVi0s^<!zN9bkUcink!_S!TWcW77JWZo
z-y}^QHXdsCi=Vq}Gad*KGhY&~Dubp?$j+Ud#oesXaAkN5H1%{;&iW8?jsUl7d?EX-
zfK`BsXt!9{zs9!+K4tX|DZe1CM1YhZ^-F3pGiS*^<hK)v=CSl{pIviAz_4EPnbr9F
z7m2V)cVmaoVZR-hpR@?=`S$h<81~1p)%6nak}$qe?)ilI?<C=WJ+6r2yh;+&>xA`#
zz^K+PR?-qzN%TKR?Z3%?E|Y(ozxC}Fw|Zq2=Pp3pNTdRI<uZIBTFC0yX@a&DCq%}e
zz||2t7L=2=p!iw{XRDB5{Txu<oX6*UW;Gw{z<i~(<ny1r*-RZZ+hxZixT2r3^UjWj
zFTaXq7j{wdnr+lD)lzY||MhsN+FCar8Gs)YTpCg-xFsrU=u;eT@6ai<s*zbu#=4kA
z+1C6*q}qa+AY|X+S`Oaw=p3*eI>qj@q)zf)5XFv<TdGr#Rm9(Ets?@tQC(&_uFGwN
z?)`osin1%XuJsa21N$cYk1cTB<~q)umV@I^&np2c1K8f_+yXkb=&%%of}y)*ZO=43
zVR_T<Q~CzhJ1oU_;H5e|mE*NZ-rDa>=bWjWqreJQS_Mwyf)rLan#Se%UVi(x)3_ao
zBr8W)!^TRHLonweNm!GRBj22p+^{xT$pyMm=VLpZ<LZ?U{}Uffm!_lbO%cMCdL;6j
zn`Itx@stcxL@9xO$gsPF=3d##+7IK4PGT-E)0e(bgD$<r8RdIl<PmN@a?B^ekuq!O
z^s4x&p~vF#P9D)Ztu;TFQ4S?m1;8Uv{^k-32Cj@r*tno}3UFIcw2G4`*C;gB@N<S^
zfiGyN{Whu&E^5G3I0K08p4WjHw}<R2K9K#1*||!PhmIYGk9$DFIl}zxQ5eh#_7*=0
zoDaDu2`~FB6E$}&(A~7Tkr{=dpW2M3xPPvvV3PT;mLfAIK1*}AcxbJMXa?-AuS)Dc
z19`9fe_6~wKS$#DFN4`Q`&Z@W>mD1++?jWW#7jVBT!!3a@s$3i-b3c-JZHaWHOKl*
zTtd<UOGs7d&3<Oo2o|4K;_0m305RrCc(gCZZ|jwDT%#2{r|2^RCc$^5$#@!@Md4kt
zh4=Kd6+YpvHoDp~&HAGIB+x|4yWoz>`$7z9;$bvmyGA@br@yQ16Fb%vsaWpAa={`=
z)o5Sx-_~2=uu5~?Baz{3xLrmSqtU3#V=ic?)Qq1CvKq%s8=cK+54*+5(|(Zv`=crx
z5lcP^K3DASfs)m26FRYOpv6dq;uGV3yIT;+5_TE#DIG35t~b+#&qw-C$l&_=9NXS0
zySGv9c|YMbr-uSO=LiwEg=NI^hx#dPxHd>{XWIwnMmZb~o`D=`YYkGV_RVPe2Bba(
zyjV35gPmV1>N^!RzpYf%QYfULn%c9n6iFRD+@3!-MY|i0EQ?3YOaN>{?c_Nj@5<$Q
zAlp|3R0D7gX7f0-09<3W;?uwjh>J4~?7qgSzD;F>z|}X!b^QZ4U167t;Ctyqhv7iy
zh;b99Aun(g9rkL<=<k1=9fd3f*cxU_K|03;%n>+e6dbP0sK9ojc@u<&9~K1V76``N
zPNYeMy>xJ1bR=!Q-85<yZy8j>ps3WzJ08-`S)<WkoAnWR+|ro9_wMCD*WZ+^@zC`r
zB~$%XW;C@PZ$w1(#lTaCcsVNuJYP$qtnpZh=^;+lq&?=@{8-#FXjH>1?0bN$w&Kc{
z=&K7A`qHoCjVVwkkC|X}0NOeMdgPDj%L$-8A+-Ld7pjmo5z5bup44`tQK4lVreBCN
zCVT2ti;6yTLm2{dvm55O!xuM^Y)4N_G}_~Kz{3-v$?i;6u2CMQ#9X6JNPqEAfnUa}
zL~#?0xPnEw#_<s3#rWyaLwo7H>4<I$rOq(a6Pqd1l^%5vaB#;v6?WYDt^nQvV1<8h
zCwv6<fgSNBm_uhEH_^4F05Sl9I@Q{6y8^rL5U3YBLYLYc76Sl%@qMF?c`DqhqkQ=M
zO(q}*um-RQ_FDi40apMIy*w~vw3p7WaaAbxf+Rm4+2l9mT^wg1eyq|t4SD_wa5t*X
zA1H)E*#8XR%F%bBl#3QZ37x{qQQT-VJQfk=8HB7yGR+{HCRAv$2f+!pa*rvJAs3=C
zyODt5Ht9@!b5J~M0=@i3d(=*X9AIw2#{=`IbviKjniGMW0_Os^rmK0zYp&xJ=vA*#
z!Btd&KJyxtTb{zS)~#jTIc`Gk&@8oi6fID=H5(~AEp9yI_Bmn4u-b)qwgT19H@f5X
z@@e_TFlhU~%!e>_pxBvE(|4zCGog|1LtAGWjiLuacM$k+1f)Rgqpn1vq*+FiK8~_x
z8Lg~wLgznnG>%@Fg$i{{tV^REQRi=$(V1CBkEpd`T~YJcZ9FQywwt=mHk!E(_LRrq
zqy7>Snc(?q#2>VEwoxweg5L>WY7QA-XE$<&b(+r2Hu@(&qsV>B72AW%g+XSsIZd8p
z)Yl_v?Hp9-CR8{Fbu5z3%t7wdrq~CJ1_`s8bG9gLc3DJWTnjd<H5WdvG@eF1fOV91
zv;fq2y?xIE$lzGI`T!U<pu`8U9yE~#KWGfa#Z|9A2+KBf=0T&1YfDcmG1sWj=40W<
zL$1b+og}UjJI;|tEEi~o3~RC9&X*PQC~K}!uFX8ZleDu;KErQ^1Sl52OHxEjC>b3M
zx^HOfTw`;?W8&mr=($UI_vPdRwmAb?!1}n=wBR8m_hZ__=qtMT5EhX>CgWiw_hV}D
zFw%LLvKf6r3m!J6xWZaeiARiED%5Pr+4q<JlU~Z#EVHD+y(ipjRulK1(T}S|UNix4
zV?QjL?WNZQ?bDAKH5>jc_PnU2rc!xgR(Ck?sHlEx_Qx0VD1S5Q*GG&#W#1LDI;cNN
zDXg`k+vgeet@1WIe&fD8f;~jb<{8yoVXf(ndBzaee5@B1x+)~r6Ruo1jgvtog;T?C
zhn88{RiUg$A;MMsPCn_5^4m$7b3xRnryhmyRisxQH74dYXvI;-eh@pqyV_>w;_Q6e
zoM5{@5oBH)WZoWRelN&;thl+Xb*{LDb**6MV@9(uG$a=Zy=@yxTnf~kvKaNDWlIg{
zl<CY;<0n0`;OH{rT^(|;>Pe$NO1|LFCyn>j{q}>P)|v`f3fKlX2)F`>@uG_W3;;|8
z@I;aCyANKR?h08!GpRZ2e*b2H+P(uH_uB_72nNq2GlM#?A`TpY4(x^#O5Op1hg^NT
zAm$^|H|oEfoe-P6aYzw}iU53+y*i*HU;rQ|e`{)0eaT8=hbs<`5v(*?hRnu9lpoc~
zoEsfY)+(clKARe@LPPU74P1qR%EL4R$XYIBd?|&J<;Q6&6D`FPxyI0toi-D`g^lib
z)~KQHr2J>${tm5w7EJcg0U+xmA!BU|3%;(sL;7l?TGUCgPQdudzs>#~YO&fF=L)%t
zHmo+<GX7z;k<7UA8lyhro;JSU#xL6V#2V0*0sVF(lP0V+##;67QZhcnmf;?BaV;8?
zK9umB(GKs(XFX?>v-${;e}hLKn)4ibn?P$GNcW*V&l$B6CkgdH93kMxG*t^C&H-E^
zgxq=_DO@1dZO~hWNaA^B^cr=0-l$^D5htF(tBie(rolSuNwIFE#oIpko`~1zh36st
zo9XKFMwh6Mgn}DczjFU08o18LLxn)fqUiiOqp9oTyQp+(=@@Fe-Y9JqiJx-5z<#B{
z>#;_DnGUSSaPu-*8;o8q_XxUkgHb=E)CeUR&aFjt^-@&01sPY0^sTskCVrz4=PEUV
z+HN$O>QOXlBMLf|mTfd9TD8S14zk!e-ij)1g2(QZz6m+rU9suTY%0=`O-3ZF8g3PW
zEVgR?f><rsYQ)i~7tr8$r#&3v{1=RPTo1DK1>-GitR2J@JBUYU&WlC^*TNBWVqb~!
zbl^ou-$D*3mP&6nqVX+}Zob)w!#6@JN11DkFnaIp5@pD{+3>)2&Svx~Pt(rL23ebJ
z{`oflS1F6rzLyqkK@#?=B<y9TY$>tdONhOYD1<mS3OgWt;<Z@Xv)SlR5ic6iboWa}
zPwSK&z@2sgKhb+H!T0ZU`6Z(VP^Xs>ncr#J%Lwi-+W9iFwG;+`I;1Mk_F_?+ZSYc+
z#%x3JRi&-l&_>=uXSNyrttK|@D4Vtu^?n7k>6HHp7^c&nSByrk^pRYhDw6RvG9iAu
z;b9w8tcvvOkFd$GZ7RP2n>kG5b!c8d(eyi8qBL!R4cNW19kIQQ&TKc{wfJNiM>E$B
zdI9YzK+wzRVgW{D%Mj)v+@jomhfy|qy^!|x)uB*$1HrWpxa9fctdd~{_m3-S!4A~W
zWkRdQgIu?k(JMQ!tLj;?&W38ct52@qpo=?<iq;R}qz>2c-0L-}w9}|q<u$SHhH-8r
ze5p2v)jFP09B(%@NH7wg_wF>p&=ltHH6BD`7WS6$I5#@W-!gil;Wz^$YT2}X7;Rmq
zqx+0nVXC%34NCd@AX{;78yz9GS#P7lE~Kq*V<XmHy3FWzN__{GRcQgEbUMjs88v*@
z7$$Ex@H5dX06PFj0G9v}IN)CwkO9a6@WKBy^jzD3xd%G}?98_KA)W9TwR0s7-^~Gm
zJsCeuHUM_c^JG);%`-7v!IJ0O&^I5JYXHI1vu&_D2sjJiX<GO>|JE0Nv+PFGwA!Fq
zX<~K-NW=FHd%FZglFwBHZiN{M;v02!vzZ}aAK(HY@;rJtKrUb@U^n0tAoM5X8ejmx
z3s?gfnU*zn#H6ed=yFi<e4tM$7#o*_MFl`TOyh0=b_4T;axw3*P<ZK!=gkVbN!M1I
z(G^+6(@E@AU&iaWimo1&*<eOh#bim#76K)Fm?CQTiFudKdr!WLrqbKY=%_S!>@D^`
z=v85#iOyc5J*&+sd8~iP6sNB+Mr~_dVJB_Eh+#qJ8CgB#34<7v(6ng?*PUOzWOvgD
zk3i|LUMrWq;ROutU-;-=yZ1Rg)ukgJ2|d^_F@KGUuoxNA(@KWM2zik#OL4p$mk5>6
zR$zvg0IWXZZwJ?byR5d5KEHglf7?I=0%Ls<FZ(CqTWtrxg?hN#)wxcObw2Tk9w)As
z?oS+#{2!FW<KzMgfz6)>nHNdKl{=TDTCTzLq9~kG45onUfX@M3l3hQLl5_;XxycTr
zcGkf9OLm;l`Ab&$tK~1*$A!ROvTMb>L>L8@?0sUtPb*%s>0Gi8i_=b&!p%#zGD-qf
z=7t6F|0~nCO1%7;>Qs-@r4SMPdMd<MXla>fWIBGxc?vzy(2Zt=WW<(FpLWfj>J+eu
z9aj%Us9tj9Ywyy<V<&q2lsl?TNpPDY7V1=dIhebMxd#}iBdCyYxfGZOpe)B3;@)1X
zL%mNLmDS1ui&`lBI-(XB5x<C_c`XAX{9Xj`CpO|}cOf;4gr}FUV$7a6vBS9G*%{JO
zC<#%Cb8Pu~KCV;T7Hs6**g6huumz`9F)DADJiW2ntXh%R)|9;(?6DZElK_`dyKQDv
zs<S`rcd=}R+87G8e+@8IM?v?9eLiGHEq6M5eg>m5+RL5M4N|~WRXJcM9|GfC6M2M6
zqH$A$!|WNW%@{Phj`B4ZE`i%*xZ<6WaQziA`T!@^Z?0>b_kv?!lbXhzG=8-e;>&}y
zYp_#q-+KFIPt5I@eOK0`(Ukh4SuI>y*Oo?W$wA||o=aaIgc@i8T4miH1#>6<BqbJN
z+&n|9{p02*X<#9AOpnu^LX0(5;d&Yjb5>EqPrzcm(5e%x%wjvufHj^a-T4V<cQ9=Q
z%4}hIDCAS45-e(bYBaNU2l+ZcV?Kqi4`}_T@bxhr{S<?)kEzljF{K~EPU>I93%`is
zpAvjbj~>FX>|=WS5Qa#f)8#|P!m>XKA+`|MLnK#4dipTtH6<$2M~9(CD?tfgViZyL
zGb5hudVYpE4e<F*i0F#Xj7T=T^qKPT^=BC7!(8EWjKj-P@6U~ot-2Le8GUE-Z%qx4
zAetR1>j;J*9cld$M5!a4RF>9Jn9``*QDc@h*!KCE?Q<A?c@#d!P^n|c&@q&L47!#v
zG)*DvIAXcum{D7wL?@3S{~w@kUx55ECa*}Xj~k)1;tLG9A5$J5Qyy^}&T(Yr686)G
zyvL2k`t$V4ad_NJ-?GbQPJaWvfO>yvbg}l@ar)7Y)4Q~uT@TRFFA?zrWStN-P{Xf4
zTV&IAvuRJ!;uD}fOK+b5-?OBD1=A&B+OmH+wCFck(%o_N@K?yxl5zCvS4K<5zuD%R
zMMnLwa#GD>v<S-o#HdEzBBQaDAa;0j&gM~r-Y!B2_2}0kD9Dp3{UqYnlBS)6WgA+5
z64~E|F0y4Cs`Rzd!RjOo)OE)!oVU^VuQ37WL$7}gFa7D<*GB)4K|&Nxe`UIB;Ew(A
z-@s`Ut@;MsM$wsXu*-Z@9KCTK0x*gOpMso?q6MdLu3&r|-TjmCtTo4u<u`V&JVFnB
z3l@uM=eG!JF&W>%&thuv9Yp#mn(>`6)Y@o!xomscO6R_V7mT65H}1n5$jiQm=>)y^
zz0oe@8<hr$p7o>cakK659CbQvGzj@ktn=lv0$Op}sO|bKjwkgMsOU6~O8iEbPa{))
zqZ((7c460qI6*5%`eCCEEj?p&waQddGX1g57tbc0K@_S}!Vg9ts6Z$EV00*-CPYeA
z$GLf{m_w;Sp%H5}7lXgexRoydVBCXYdiIZq;BawOC*tYlAJGs@qrxAJI(heq1Fla&
zzr<paXPaYe^AkblwL#_`LFNyF%qN1(7mJ(oEDv^|aEt2%SU$*nOOUxukU1mBJS@mO
zImrA_F>_uEYeg}O7S`q<^WGry;UM$rAoG8M%%!UoPhdizIS*OhIM4#-E<xtogUn-s
z%zTeuU=A(}GCvn&-Wg!Vl@=|m4+AXN{1uft58?ZX+MGv~`HAw+qjmX-UO$f+?oU-{
z$SE8V`H2#LLYw(Jjrz%0ZIz5yQE;lDMU^gqLq$rzfGSv#mR&Gfg;f(GOM{Aq<v#Qx
zn$G%E<01%~;<f=yo5x&)nx!c%xQOX{(|GF1Q|S{Ijc7JJ#)i^A8<m;(XQNr!w!%)0
zRJk_vji>uRF`Ck*pN$MCQm+0CzWtbQE1*%oKxNVo?`NYa=tpn=f-Z1~`1k9J##5<F
zSlt;%<1fMIL|SzTbJvM<k<m0t{S}t^H2znx%V*1Gbm~`B?FZ>9yF5<yenWOV&MpmT
z{(5u`Yko5#t;Kf6RIppn74+V3AYVh5e?!);QH*kbH)5>yHf3eI;=M#q{SL}k>A>%3
z2w$bBKfwQ0y6q36L)pDT>u**+v6Ei(2Rf^R1!0$spF%OjxU8G~F-+*~GTWiwU%=>N
zDs-8>!b;ekLrW@jo8_pr+iZ*;bG#e$W9U7%*$aJItOsZ_jq;dX&<$;66M7f}zDwGT
zU{&RNv;p7sXq5r}ZRjMU{*++C+b9}kg5_e`!{{}N4T0MU8W;k%-)L0`EUVLHHVrRG
z4TT#ETNrFB($O%4*OU@Vz!t^FXd;~~VOFGlCCoS!0lu3dzhg@Rt)bo}LGUUqC~3Cx
zJr=q08k7it{eU9CAAtD3a99>F2;c>*1bhfM3kZi0)dge#rUTXi{z+M}A2g=`5tt0x
zii<ogB0=fho5OMtz<F<WI=&;Jjc_y=&GF5;_;s*%v>48dXop~z1U?slV{p8)H6RUU
z)^H5RH)}O=ffoTXE31==Tkw6zg(Ka;nvJt?xWcLt%$q$pGg=pr#%D)icJ%opVcyMV
z;FW+afJ1;qV9t--p2l}5-rcDUNCU7oBolZ(U=3gw;0Pct50ee-9nw-jkikTNz*TKl
zl?+EpB0v+UE=hvfx3CTOjxY}gIERR);(IP&9bgyWW=ALUj>9n&4+B*PqycyZoR>gm
zgXw6P=L2>CiU1xw98?$37vKe~1MCA70klwEiwC3uynro$B0yvqiUW`dm=D+nI1UIe
zfsIxr^zvFJKIQ^;01g5E0K}Ip7*g82*M-V(G{S74Z>2EHtms<oqW8<0$*>)0nGLL_
z(0A$DDp6ptR=kLoS!ON$OWI?Z<@6uuuw~Xsz=3f%oj`Zp);cTm5dUUnr~;r;Oekws
z(?6k3Wz9Yo?&bisy77h;fj`iCxLPlWbq!38)aHy&=*zO^M2ioYBjuX2M0tX;$|1HF
zXhAvDvkUZIIkOSa)pF+JdTDwz(rge_0g|X|HGT5}7`4(=2q!E8>rrMoS7{HmeAu`}
zk1j}!Li3?7E?7|69H)bi9*<gHntI2ZeWcpXF9~V){(oP^@(Dc)yuFB(cT<5ag1`m}
zs!vEmlu9pn73)T7J>Loz61X358t~BoP5TMjtqA@5H-}!Mdw)Sr9DJW&M6ZRH{6bun
zBhI&*vaaRYL&@)?EJ~lf;DKxOSX<t)F(8I6d<448>mKIwKf%7j6@oKn`R>exDYwTg
zEE*U?DHq@+=_71!$$3{#k~i2RZ*wQn%B_0Lb-O?o#t)5&W<^MZK6Cz$g9i;9u<Gr0
z<s)|<95QU-{evpdhlU%k5W**|`~}`42FEPS7@9;8@4_a&ohyk_V%#wca|Xvy(K`q+
zzJiGd;fHoz=)8wGzbZd7$nPP}-wx`7)dOBZj4wda|KS0|q?b5oJb=hA+S9=>mwH5(
zC;xxUQzBdw-&-KdERS^o{KItJ%jQI2v}>#k^(Fw<0X_tr0`Z>>s0*SLK=2!~E5ji#
z(CF~Byg?68=gHN(eB~}*x9eQByB{8o1Dxx2FW~z$<mnGUb;wV1K;U(|qhX#7m<PDT
zfdG5pXA|%)KqS6vmr^UH#UG=_q&xV{*ybRdix=(+LhG1YjOYQ#3>OYI@V4^G*xFJH
zBaM-5=%XZa2%Z70*U%h_Ef<eAM9r*1R~wq`tSVDDC;iKYRVY2#tfIG|G0CXhed(!W
zvyXlcT}(E+M9mahS+Pq-Mxr$^teKSF$n2rdqGgTHc+8^0M({R^^v32R`aF8HvDwTu
zZ;E;Y_IP76Q(r)Bnwa<KZ_xTCW&_t7lldjF&J^F&jK^#`wW-<2S`rkaB{Zign$19K
zWRSLm-fn7ENqkzUvvGS$6<_Nl&H-Gw{aiw&nxP%qK!cl^UCO$ra;iBc>ZrMUDs69u
zmfJIxlAD{ixJpl@A<fO`vL2!H=MLN-Xl_<T=B;mTR`faZ4ZIvEoIiuJHCwz9^jPYl
zty?Ipg;^&ySFATc3F*T9>MhW?t0mKKD7}SQG4Ww>9*F%v{D!V-vcD0#_L_m4+??GR
zx6pzX<{eSDiqjFS0lg{YJU+gK^p+TDou+Or%?B~1ENqD`r2&20(!2vTG_@7VrZtUk
zWtMj(O{1Qst8y5}p**7AmbSJ+hth)%w=yS&JUor_C!dn$MMu%_))<T}rTo@t<{zF$
z=c^kvWmyZ$Vz$+gxAbT_+uF?0x6|M@=sgb7f;I?wJFRVF*1^4Dh06488*?KDm<!q>
z9x=3sQB%6u)~sU1+*@1>V<@4WS#5BNSbroBBVcr`)fY2AP@-bJSIm`N)wFN0$e<SG
zXNcVb>^b)0xdH#?n7|;ouwrOcJF`x)kXcY-=u22b@e!6{RxIWUH6>h|q(DXkdBuqJ
zv=j(qX9;@0i#uGlZYk#HT&XD2i`sOFB6s;SXith+z8Dt_x54}jm^Yj>o%2F%q+%{n
zVrC&!;<7}{>fYzLYk&Q9AEl?7wVI5Qz(R$Lqq<kfpjd@BUaxoZB~+V%{A_R4h1YSk
zKGm$&gf}!gAwoH!4Hi~P3R$dHiF-qAaBzctdn+DZhmqditk$NtP3k21j{BI^^LjPh
zQuwO&frQRUzE9{Dx=bx!Laz-L{V5A9^gn4%d$Ungw)l)h-SD4?exDAshmbU*tL-sF
zszWt8nEj*I3aid&(A7B&j-hJ?J<-AJSN5Q|`6qm^|FeVH+1e(|BV;JV^doVa-|S%x
zkc0(USET(N&7`tz#ZBEQU<Xm6li9v(ZL#;yNfE>lw*N%z)e3=4|57IeJy+~`zn?0;
zRTS14LH{P!{ye^qvOAm4M3<hy`6pY?d~Iy$8C0Q*`3Y|6IM)S@W|<jOxvN=UuT1G(
zF}#HF?yhEcm$w(S{k~)cXtoNwn&sNOE{tTeU{e$+3c|2n_1Psiyxy*1W5;eMC3FLe
ztv>RGzPP;TvrA5L$o+QyOFvf8c(8C~&Y<(Nv14FGH_RX3p~7xvqw+_E#qGABVqAVd
zC)uR0(#$AI>TZUj{c6$Oyu&r1ms~1>S?<>EX1T$mg-%k`$O(Fs->&%$>?D=ENqD4>
zPMw|0^qEx$a62XRFw51r-Dj<i)ZOYc`!lBz4eWt7b`P!UfzGW6uZSeLD|9X^{S8O1
z84_k#C46SJo7rzxyP1pA@M4`|d8SayG_!u#BndTN9f)cG&Gf1?RDwxVm}b_AS}2Y?
z(V}=H2YDaTb7ZBPuR#NTARRqrS{~=8ztc*iQayvLx&2F{^q%PH(`ZajRKnY6K~J+!
z)L8M$4^HCXmo2bE=yFf9UDU(kB)z{+P#&iAUgi}25qi6q*~s-sp4u7lYML1hyTlCC
z+1=DT0|SF)^bn)3XitXujQ#{pK|*rZ^->!JL-IMo#YNTaw`=AU>{Z&+8)A9Y>m&E=
z;{4Tb=aYjoKD%UJ4LCy9tzdD`@8o0gzR#|aPrUqg$v(UHC<`pw?(vcP>(XmJJ6u1C
zAJ;j*T|;aQRd%Jk^wO>7a#v_R_3VS0VGGLdW0o%)Drxo?9Go}wF)JsO75iFJ+}xH%
z&-52CKMz!q&h$YMR;5yXG3yH#*Ag=L;GVr9rS>%|qJtjX7el#8w6(7}EGk!sxG??C
zz~)loZ4m59H2yZTQTlSB80PEC7KwSVYmL?qS1zixD2~j<*&G>e$5dC#L+?pK>rcoe
zx_Fyet==PI-54XaP>jy`z88P*9O#9ma}uTYGizCa*4#T!qG|n*)D84hKcwjk+S$*n
z617I6kOZ#$2Amo=wWM=!(%+*}{W0?TKtitRnxOT^1(cSEHus5NQ1AX`xq5}-T-O!6
z-+;4klrX<Xyl`D>NekcwyJpt*H`@(9I*U`^7M)fjq2S&#TI~46Dz!C-`|##s*H4O&
zJBRYZ)p_3mT*bTw%Y@6pr@hq?-Q#6S9e^a<OIZWVa<PiyEunZ07cenZbB8Lbq&mC9
z5uQ&k3^2=M$>!|==6%+F30NJ^=YTI#?}6sT#H$h>mw!{Aqz2ge>dQqsH_&`O<eHG@
z`;MQ)CRUom34hq#33=|6qX(fYN`KK`ql>qj)e@@-`E0l91U2nrG4F&N&2z2Apn$g$
zW2&MhQky$azPRP)4zqldxpO%XwHU(1HAsB%k!-)<1O~{0bB$hL4zW~t2gYxKt}H*-
z=qkI8p@cyw_ZJ>u{y$>~{W^GdkY_?U5tGH<M3>zGLnk%8+(TJ|kgbDg+8`9lHCi^v
zY?b(yN|q~J!#NgLqOW+Ji1U!W`qjVw8bntIL5*@;$h*73w8p4c?X4JbN*!$WPl%Kt
zs|yDfqBn)KrPN|Bw=~*07!p5-4i82htxc7NVAQ-?i0%+iESrNU9o98!O14cw#@*?Q
z(PKjDkx4|n1iN4eI+br}&k(bmr81UZ)@^UaiR-sRkWH5;Y$zrs_e<O^;o^(Bc>XEZ
z$8+z4oPBP@5x47zuk@iv<#r+GZqJ{iJ88jC)SB6}cBt7Qs-8rqzm9B^6MmfFKk;(6
zSutU@cvhn*J~eP!!r((g?XA`bqY<^pL_XA^+cMFbU8eP!W?t0ul3<o*|IvWwsZEy5
zHqK-ro$Dy}PP1L;i$YjMTj<>yU*Nsd^oC(ptdB;|6w9b0^&O45P8AwI+RVc&_9DB?
zqr@@buz&_KdV?Ms1Is0}XN);c=8I#^6PR0B<1nXwnDWPAlDVA<8O0PN<e+op0owBM
z<{KFP44hzoj)6nNiI|2@q6HJp@6r8E%Y`!hk%Gcpu;m`1#ANflKB?gHWDx1s3I<L!
z%eWB#QTLi1@_0|3vv-b<5A(5M&v2*=%0XER=m;1Nm<?D1;Oorh)rI;S-_E^wr|`|E
zcX{6(-|*z@yJL-YN=>!zE?85|yT_rG&hu@$^WZHiR!?Kooo~-wutqvA8i$NP=j^p}
zu9(^aX1v$#!y!sBw+7#=X+8q`UHDGHH!GEc70Opkj6lMnAmL+DJ_TI@Z@lv>>29Vm
zb`->pk}e4m>kF6**aY|xa2gOE1@$+eBOntn8?X+rA8;xU|1{hzS{=|DFbI$fm=D+n
zI0Co=s9v78jiKw+)VU=zvEJ4&V<h8E1I7?V-n(i8+#lw)z@vfN0b^RAr2s=x#s56u
z&M+?l&g+JcrTFL${4;QEzz8|UuWoVR163$;-`TECpo8{N7+e8n-B(&Xp5Kd8Ixyb%
zb2B~IS-1u2!de?B{~(1}F@0Dlk{qlPs|~wAz1Uea^8X#(7(XMiDFpfT_X@Hb>&FIF
z)wQWr^@49^n1@}i3w&bCY^!I|WiP5+CN<1AN1#VomXGQ^o8HSu^D>6M&4(HzAIHBi
zhJBY(XQH)jir}#_;A{T6)A*SfZ#5R{Sh3~-Q97-iX->(T)|#pP2ZeUo=7GooUV#0;
zHaiDZJ`Zxg?6}+X;Svm#1eWq27i6w)n-kEOf<JAXWroGt2CJj(ILKy}QSkaK^YhTK
zOgoTL>Q=ce^U(T@p#$^G5AsIFsOE#4iMjaZH6Uk^ix;?R1A?0gUbx~`;SeCWHSik#
zowm;U(>n0wn$OqY2i5vK*>d}LD*3;w>AS$V2=L!myVgT96N*JFfEREhwd)Ru)e*o2
zh}K(=nU*Ug8sjfko7H_8$4GvkZ^l}~#ioG>2ajMDQo;gstUajH0*oT+QZ}P%G>0t*
z(E0^ttJI3(Mbn1bW!1|zPqYhuQjqx}+nlWm-da>p;|UXM!_gG=q`BI+Qu=qz{34WB
zMomr21UTh4JNEiD#=tV<atkW68x>jO8y;e<jx6Qu%>?iP)^!MY4KOd^EsE7?v&TJY
z$wlZ0bFlJ28Pmd{|Noz_nk?Kh2&*c>HrqX)c7ISZiQ>8&G1o7jgf0)~Ws#g?OIDgi
z*d2OurP(ZM>8D(}RehsRWE%EW*IQ-Q3VBtWMpA<fW`e#FCyvl~Y{Cg6%yo}Y?`P4l
zB;h;}ROs1<m`BNCJd)_-8c?*NQmfJB527)vValPWR-0X-rwD!VTTgQi$t|tBw|gR}
z_8POcK7)EQgL#y{#_SQbQ@o@X3u2xMVg*@i(bj#>L6oQYYt5LDu)|7ZQy#~`^0(HS
z^{}pgaV<1AVTb9&v(Pd{Qj_P*VYv5l*>ecvPI~V-G@2txe;zb<9;OW&&C0a$lsnWK
zBb>@2T&3r_o6?_$RNqbGpNIc>bohC*6;|=Bb?`XvFdsgziIz2e9U9mzG-(}jY!|Ix
zXTFy3tuVqqa$BI2KjSF+&0o`;^@vJ7y|Nyqvg~tqtyV0#`J`_E+g+5%Xf5^LU^WWf
zDugRE+Op1!qjeihuYQaYH-gh38o1GH6?Nwk=G3hir$MxSBUp^2!i{FN*leLtIy)Y3
zj~8=usNJ!L7YB+qnQ8h)8nel4miUr5&Cq(FKX#@oEHE3z-hU`~BOTaewsOOodac5|
zC;kO9GRip0F=>dQbyc(+>X`7m)a?b65`Gf@{>+FKQolKg^cRt&rqu97vr%+waphBQ
z{>gdMqrB$Ql(xKRCZWwa3A(7egsy8bx&f598HF-{25-i`ivjf1W{hb^(z(qTRnMel
zTM)`#dT$FzzM-pI%<}HNM>QI{HLN_<-)few@t&~E^07qi)<A)Dsl=m#X?)<wtQC7v
ztP?P`<%<MVOqKI0IAg==ds`t$AJe(5n9&spaS8~PTKf>izGOZbQui3M&Zo`2tOPp#
zk{K7(QEaL~>F!T^T`Ki5RAF7jNyqX4iz>!ll>Rd2c4&@YHtRI(DXxFYjWg`pUCe4q
z!n46ozu=(M)Qa2mCbZ{e%x&t@;g_)%e}_;{72nFW#Wsu*CW@!M(h;!eG$|Of4W3dv
ziM_vDUuK*4*sc90+q~Sa41d{Xr*cFW?0JP-dt9~f2IeeD)bLGACvxbaH&JNw=)E^F
zOF>m`xf=_Bcha)maGY0gaX03Ws6Z|Dngr=D+>1tGBei%744Tr;x6ld=pxAvtdvQbv
z3rTeg^tZ8Smv;(%LO4d~S_Smmz&tkKDMlR3PXEH=0zS6mj19PNaSoq4vjs2w-e`=#
zGlsxEhL`P~*#Tw-{^k3?sX+<?zu`C`;lDRC$N;|_z#_m7KoP)G5xYAoCSY2CkKusX
zfHi>qfU|(mIP43-!Mt(L)w|Im{G}{cO!T|xF(dTr7QN5^uB#gwir&ML{Zc$%cB({)
zn1vUH#PC%O|FlRS|1p;4i!aObYWza)(5bu-Pf4%3<MQ^S$Syq3Qj7e=zjy$$TH6KS
zAJ02jvU>nG0`3irBGUdJd2a$<WpVcZpC^Hw^Be-?5V8S*uoKpV9c4)%NI;e#i-6E1
z5FnI50t7`ta!?dhv>?c!q6Ui=TCCs}ecb@HqJkzCTdAnv(xRe*OGOL%``j~U%OOE%
zecxXH>z|9^d!Ctl_L+O`nYm|X61WyD04u>+;Ah~kK?!Ugh?0YTBbvFBV<F#d!7IT|
z;1aO6Jix%j%{u^#72q)NI#5ng+z94^tH3$nYH$&F3wSlS2D~1;6}%N(4@yF0|9%oN
zEx~NS?7+y}MpAnAe1_N`W1qd2P5o+fn85snx_TS)@61&~_whwYO>7146KSUaQ<E^c
zyRB96^!}*iI;`~_s*LK<^3F@7u3ipsplgQ7w@3|UXuUyosbUZJnoDH89nJr$s2J;h
zwV;ZT6*~?eCaQa@q8h}V(f)E+7){fR$4pQ5Ku1h(Rk0Eir74CHc5pj-l<M+k)UXJ5
zwz~4osE%kcZFv*U-%6c)6J~t3O8PVG?6CINIFO<r)TlqR9`>M4_;0L^j(Z%JTV^j(
zA{~=08IUc7!JcERSZ`Om{v0(95vlG$CRUZ&*F7Og>~)ZJq+7MMtaJ|1T$<8z+^eoW
zi2U)U+6A<IL#IL(AkCt0T^&tdR-;?$aq~D)y{hWJ1<RMm4E;-qvYv)w`&(?}rfO?*
zsM%Prdy7rs0oqzFB{Ky+N*#DBYLy++R4y0c0W{^A=1EkG{}R=rmAt{A?M89rhSN5K
zi<dBJM|+mUvNWUJRExLR*m~_RQC*^3#^Q}PqvBQLzecT%iZ&MS|2ZnTjM(~l?$?<P
zP8;|6zWvk2gT8P7H1N|fcZV(#8XeI~cny8u{+VF2F!$C0cY29<NDCwA6Cg0|Sz+#%
zhPh7-bDt6BKF|04if@1`^L=0EPw_or9-dd9za5oO=Fh<2e9qVUGWfag+w0G;uDW0W
z#`(UbJ|utb!`!?2zL)ykFAj4bZobu`Ls6+L7Tj|vY6ur99X&)}%gfe+_=zobV~jd*
z=f6cYwiZ@o{w=D7ZCz85{~-e^evW<=qOmIT=&2~|$auL7Y|Qv5OK%(aEn}yD-1IAn
z`j*}tCGDU=2$1>NR%pY=&vM4fMCRC{0~$Jp%3PZjH|Ep)@L%R2OKtv1_lpD;zT9>g
zyJOJQIIcR)YTIet<zmO<(AG8=KZTo_*Fh|I8M<1KOU!q3)sL@QO_U6b>cnHV(N4?0
zgNfeG^-TG8Cq?NYQ00Bc*V+UKP&BJ$2wi{gv?eNTDSl6UhXk}6y}9+fuVCJIo1Iqk
z*oZdT2IhS`bJJv&x&BBlG5(W5S4K=xTU%ekkDNQ}BzAwWNCxeGg?V>3_|@yb?sC<k
z*CW~|&{cLw?yYd;J%{7dBGr7mGhXYxMRc8~zL-)cK4RfKHTnwhiFs-{s8YVQ5+i=V
z@-jYhUg2ZleAVFh&iIHHMQWhnj3V_%ar7(Ash#`D+3^Q$(zlNkdr|g_5oN#f-$taB
zeFbOShbccQOimA*S}iVgjPFUapy{9}<?yC}L%=jpmK1w|vZR;^iVVQAA?h0rFK&)e
zAO@u+hHP*Gh|zaCO$PH=mh@i1kC`~+gR{Wdpa=<bK?=}|puo->k1v5DR4fAV?Dc{|
zzY-Mxt3YVIyFnS_WlTSNJP#kvsg6Qg{u0b;%qGlPI(ktTKAy{=-pw@jaNRsTrn^Fh
z^aMV_7l<0SLnm+d=TV7~44{Wjz%>4+BKc(0?_BmSYC<Zhg<AJKdih7bk4n*tG&|`Q
zB2b44(!vb(WADGkCA`y1hY#)S=eZ~oz9y6GI7L3&`10gml#!16Hj%5!*%9M)8!IH)
zY2!@trK-Io&kat+$X|V|C}_rG^FSRe)eCFW-<`l#e5Zq~Sb1r0GIV;!fXSe=t>)k~
zuq7z6sZ_r-1DXJ`Kaqv&YX0AebR_#3%1CrG&#M0#sz0lpRng*f)Khh=@pl|@c1oI3
zETz_rP45^=BRC-)Uy*D%nZAuz_1#YMu2D^Gz+e9|82nrtknNRFK7SsIbpH0xSzliu
zOkMI=TbmF*o|wWS-Ry|Psls~R#_XUIN~@M?^HNh%?fWtEr<;YUy400mwN*!7bGAqo
zh69B+8ypP6Ty&*NBalWi9&8OxQRyRH31zqP39s_r4t59c0DFV?f_=fQ;6>mz@M7?f
zAS>S9ouKTkgtes$ZAxXo2s_z)KCGmMrZlx(1s4;16|wOMPZZCJagL79qM%Xooil$<
zDUB)G(}x<*v(&3G&hD%ror!VIWi~L!;~eDt-5{xzCaUbih~$Wd8R~V9a|9ZoNsXMB
zTZ7etM$S%g!!v}WcePeISnX=$9Ahm}R;*JN+R|g4?UPn%!sbjSsxL=!U<&7m6&Xsa
zf$ofTW?S3EXM&1p>`a9Hr8cGqRA}#=xTm3EwYYXkTnTDrV`pC$lAdi$grBL;#m7nQ
zqpj9D4DB<YnFv4gMOYH&%#Mi3RLeh(h_9%ObGmE_;ZSpOc)v<-;rtEx-P6L^hx~ru
z0?QSgL;!ZF?E;@EtEF>X**2#3r&7#f#%_Ke#2m*+U#!Cte=|%PMo#_63p=6vrlCUH
z23o$H=#hS!2REy&ua@E#vK?BcJFJ|-5e6PAoP`T!H3?+$EVO?M^nChm78%LIEW>QV
zJl2YXEtUK@-pW$%ueXxxPjYZ*>3>Xz?x(6QbH!c?|MOz&5HTVSqz8N3fqlS^pmf77
zpme*gU_TH${SD951GG$x*T5LN&5Ar~$?1U;6-OS6IAvArXz%Q7TN~7&G-s#yyN^l4
z?W;AC4ad~IU${FeMD4_isp-xWE~rOcM9uk^pu$JZ|09lBcHSH_rL{br=Fp-N6{bZb
zTFQUOo8Y>?gsAiH@}{kkBIcy@cRpE%qkD%koSgt`urpRAW;z?kd1(!yJ0@^9$0C)P
z>Ab%FR_(MRcTwr|#j}(r)7doc58CRUY8Ag#LTk@?$JD{jaW;Wpd#N**U4(Zpb^f7j
z6GbY^kJ1LD$NnF;fj<6W^Ss-@P7=JYwXpNO5gT)yZ*#QvP>!>`N1l-JUzI6~!(3!H
z7M5Cfk-BKCb8tl8G5Y?`)b*N7>q#;S8dbtuf_6RpcB|2objHT54m#0$4Y^l+G8SGq
zzM|!2B6fVIJXcVVq3llzQ`lT>h52u=x1Zx~V{3b1Oc(HlvbR$d^7<3oHdLfcb3Pe~
z=INn4XScX=WCkt^s@AyfRG#V1)+!_4>7q#u&v%wZ)ZM6F&3ATe-fW|g=P_EjI4df8
zdO?{Dn&$b=R<>W|y|EfF-Pr>Tq6O2PvD}S%!*pk#h>RTlXx{LpIv}Ytqi9?j_zvny
z&kx$^Iuu>tpq<el?p4uphO>uFb|w@#S4leGD{@}<-)4W^&b#UNY=gc{8!gFL8%t>V
z`?-ypdik>!j0aRJ;QcCjfpaNWf!w)(zTbqKrYYA(s7*PO)E5h!4Xku^W`VPX)l4NX
zMAo@d-L%j-gO?0HUx?JOzC>znsH0ohE^C?iZ*M)SCNFXhia6nW)9|mFTB^sWZC3`q
zO|ick_ztRV2kng7Hus_}a*ku8cT_3yU$a2sAIuylcpwR>!11<PSL(bNjkN=%h&X>&
zXG)!8B4T_R&`)bmx@R+Rny&h2z%Hn1|Bza@*cm(Ep@37mjIk76UC%uMyP!Vu+Mr#4
zIk1^c5UE!MoPyfULA&k&{!&_ze5G@cO?6ExbIzs~UMh25MUfBiI{VmVrgR*W?`T<v
z+YP<W0_*LHZ@tb)bBC#)tq4o0jh*~2&1kB>O?W<1(N-Ghi<rZhZ!pnpDRAxkxP>#!
z?Lyg<b88HCLy8yAFO_nSa6~!20ar7y{e*B6W7U7Y{y#o_^6E9~f4stNlotYe3$+}z
zH0M3NQ(3YKIrGUW%4w`*bzs}*mSGa`<5&FQ_CIx)b4%{E$<y8~=zetlz$x8>hddv<
z(wVeA{Z|PaKX4{){F9SInv<Q`YBwuIwJe#bFX~53_wO&Mw9li5t5JWANT|MArq6cn
z{*#kasNY!GY$@gWR?B*yb!v6}z`Q1U(X56;pQ`bSI*IEmIX+rG1wD?Lb(iQhELHS7
zLY76W^)Vb@Et!%QIOiFdRTfT=&SYkiUV2fY?>7)Kt1N5FXA-iM2%^nE5ky7S>ISv|
z5mNPZQ`Q`0x|sut>@^OQ>E=X`y7f*43CnvWm<lcd3CpX%G;lL0^UrdyH@F?71D1Io
z=7&r`9|L6qitMZ<-)F&reE)|N(4Ud<^8fvFrsuPk(X72y(ePI1fk^J|es?Y6*HATJ
zowHBUr6Xi7zcU(f)m6ZUs$17N+dD7Q&aKs#Rgo>V;I7JJ?K%~w);YJj2<e`?kVA&5
zx9(!uLm0#w`kM%I|Hq6%M2xh+^y%l8J+_g^{cSDvQ<n3AinW(SS4LFyzt6eZvXa%I
zaz>Y4%G!eDm3&;9t>`%n)%xQi_a>{<Ev$SD(N3!a<DGU2BbWJ7vRVMSXQXx#jrUBC
zw7!XI3lVUYz~ftxwF}iz`I@M{-NKlXtlHl1Y!O$ay<A3z=*A{*rsr-FGEq&wpX4n=
z3CS5-f1;*{V7ZOAoz$*fkqH$~-tX+}wvsD8-r;B7J(=c)Gj6%eq7>twT-7$c^-pcj
zcR8{@3xU*wzZ%NUx%_m3I*`u0o8piK!p*5{$AjVP#Nk`Mq07UM;iuXWZkcvt%yw_#
z`S+)@_Ijz$7YRHIEWmg%vNSxBIz5Q}DNJHVbOkUuoYq~N@W@7Qy+T#E(^<mF*h4$1
zrLVb^*O?G;e28kk$JtJ0>~coqG-{W#eHSZB8de6PO+y%uIRGYF@`>7B9*hZU&m31<
zcF{(DPzNNm#4K*vaSm3`>~;39nEI8o)T(&<QD+CI-1qXdbM9Gcg5j$gzf@I+RC8!8
z$5vAZl$Ga{b`?K9?fhFEbWjfMMy`5ohGbo_08xGYXs=4%L*xHQjoO1qxL3{F<7^-I
zwI-G=?fSlRP?qD6+73m;-WlrkOjj#)e2>$Wx=+*H#VSTaXyphB+w27rQLrpyXlF#S
zm$GcEhVNyf>d_SGEPWeaqUJNW;_AK5ZZ-+n{+jch|8{ZmwiiVhx{%+OiTd5#ak38<
zyBXXM-UC*G_kwSNzXksSZUz4aJ^;Q8J_vpQ{to;*xDEUS+zuWGe-C~MJ`8>Z?zGAr
z-hc9A7Z#_$N5IqIqaY>gcoD1vz683!m%(UoALs#J0TaPj!DirVU<$Y&YzrO$JA$u+
zUBEZMbg&Za1y+^u<05{%2@V4P3=RPgg2TYKz>(lzz_H+8!ExXr@N)2P;1%FI;IF_V
zU_N*hoC$samVo~NOTkaTCE%yvGO+A3ew6X!IJg4*61)LC0j>hS2X6s?0M~;50yls^
zftx@?OYhyF1(t(Nz^!0Ya68xxd<aYicY)18(avrGRz$K}lERNyvEVvj@0(y(@NKXg
z_zu_$d=Kmm9s&D+AA)_skHLQ6G4LYrGw@>Y3$Q<U0vrH-4Gsjq1qXrOgBjqNNS38C
z`C-*T-U91@LqI2(1=`?Hus(PR7y}Li8-v+kJUALm0>^;KAWtrNTYwY6R^a7eJ8%*x
z%xW?yL%<X;&8nk*e8c%S4hSBqgrk0_tSWfmJ6WoErVEZ*1xI~HjjD3CPd%9>z5OzJ
zzqx8lbAX{@C)z4OZu*Y8r;5eNPc`{uPM#ZbVZQZ-s`sj#O*|g$t&wIVvWoL<h5e?p
zsZC66-*I05-)0GCbJ!fcvGAwJjYpV{a|`$pXZ!fb*CgxJ<+<-)RbO0IFHv<p>TD4A
z)2l)tlVejhuo=hqsK2sY75!SpHAkJzEyB9<1LusgW7LMMOBcd=@~4|i{^bFW#<UKE
z`guHgH$H!Qnh56zDs?@wtsY{pXRKo1w7Pw|pT2o@djxb7P77mS=8M2g+$Es4)jXIJ
z-D00w%{~pu-UQUSy1kwg3w?Su`}9CUdsVY9OJj}P1k}HpgE>qu0cBLPPh)M}&}UV%
zHw#5VKdPF2nqDdp`w7+U^_oCLnQwBMN1#u#$`WI}(>3k2sb7Bo<?_hi-amZHi02%R
z8NU|$59gMTjJ@xf@t03<E}Zee2Mu`~THEjL+NkyN>zmuZdwcWP$~MDqgt2P-hfkks
z_2Q-$bJq?SmwHX^nx*EgDCvYL*Xk-1t^p6??>bB~9AprsqP>DLL5c<YgYh8CzTPIF
zOq5c<Az%lPuBK;n%zwSIdz=l*)`Dm(NM(-zrR*;SrKm@OQg@?3nUy)l@MAC*Ip8JW
zW#DLV95^1l9GnDVyi>qwAV#Y;<bk<-PY3z!oeAcHg&=lXvw`2<`Fzg-7lIUyo;uD2
zmq`9uWARctS}#(HV-erigQeij;1cjw@M>@!cn!D##E(AjwF118Z%Wa7H;7;FJ>W{P
z9J~SiElACH9{^W@+rZUeg+#cXA2JsuP%qWZ73mJ|UT`D$Cy<G)_f>Ec_&P{r-YW1e
z@GX!?y@x=i9^Q9BBKN)z-V2@pe-C~O{sH_R+yR~j9|FsM<VOWREIRd*U>)!&kn7dB
z8iWxad>U*BJ_CvdDT(uPc+T+xh*~WVF~a@9m%tQoKgjy7;{ezmB+=fk;2U6fuo6V$
zR30!Z^Y-J%n^=hE>7T(&@E|x8BzNB7;9tN?!MDLNAi4Bj2EGSQ1j(&84?GUe06zz3
zgI|EZ2EPQ0z^}j(@M~}(c#8V>F5$<&uviLGEZ$|HlM((#kW%p84BFs2&<)-R)&mjH
z9F&gtKCnJ`KiB|7dT?|AzX3ae-+|=bi}c_~2Y&>6fJhOJUS<4<j9`QZ>w<kj8@w2d
z1_y#M;2_WgQaau^FbhlsDJ3uBhT{?tal?@fB2_pjEpKaZB-jQV4R!!2J#S|)2b4z$
zdQyzubbgEjdxGOZ#103=>P5UjL6n5hMvix6uq8XqFcl}ZS>A|>t)Dr2*5w_rLnj!$
zM=R^3Gl8$<Ojmm|cn?zxGF`1z$w_CO)Tu*-=L})kHPW~Y$yaK-pbRh33{&@<WWRfY
zb{fG59u)bnta$gNjMT%__dhtzJAmO+-~WwiZ!L47#%Brd-gcJC-5=RY<&BJJR?+vg
zv%HSg=Z-_JPHhLgCYc`^7>;4Jjy~Ehc*LZSO0`_=IcYrBa>a6&&e(KUtXg8Z8gm`!
zt(L2WXRP+nfj~0#vqoboo)r(a35Xh3TRw2cZj5wgkMWP`n;6p_j+6}fzk%=8dbk>m
zV1`3hMZ5dnz<x#&#NTvG6lN;s@kCmGPgx)Fs{2~oN$N<IUFVWUxENb=(n30?M^jBA
zVde>T-%GgqEqj@+(G-1tVv!&QdLB(wziMm8kD{sSzZC7iEmOcc#Gd6z!8V@vCPZ}4
zB4x6Z9XTv2BAQdh@^(Z6Zo#HuI#ytR9Yz*NGSrF=c6^x*QgX^*;(HQw24Q(TllV?$
z7#m}i@U-U~oi@40KLz_o$tml_*yk=TDc}K7>_ST?T8f0f@X{$VrIR~nQGQWLVSa9r
zL%msLH_~Nf+=6Aa_&mzU_^G*!R@PERyD6Uyb^rTiG!+R_{^nt%xMpLd1yu&hXnIlp
z{JGW2$hZZ|=#ZwB&A-~)k?~V=8GTht8O4yGk1)C&U3OmWD9~em$#%5EMWOJQi;*&#
zjFB?R=%srZ?P$jQ(uGCU%E-6{%V@Z!op8=FGJa|<qe<HT*?ldK`s+#hbQxuxR~h}U
z^tIDAg~A^)nJ%Nbpp;R0u#6U6J&z(XeNCRI^0@`eXuGDJea<p6;Yk^d^6VtvJBjyt
zcu74ws!nvaTx3z5#~8i^A9GaW6RNnaT{qLu$eygFaC;K?4or}dxiljaW)@^#lP;*y
zg)*p-^(Y1Y!V9f2LWQvJ&r*HtTNjtH4Hj3jraE_V89%1Dv@R(zuKZ7ntFw;y?6$bl
zjU0yg-zu)nB4_Xy-Vay%itCMzp`)F#4HnlrO?B=bX#CXN1Iug4bq+<^hLrucd*Jc<
zrgXwbyJMzw;8L>}=9d&!<5I>gSVku`?YSia<EQ2_a`cggdv?o^i!Mq@U$ABTcO-)T
zOupqWyakQa>Go1aMa5UmTv)A)j9ajbCTQAow<F`H<}xa%rHoEcJ})--@3*5hF{X^d
z+tH3d8I>%YGrv^I=r3LE20`)C5CjYAaqZ#Ujmh|_R!}K^uKZ#x1+_88^|tj}?f`dn
ziumnNSv-qZ%{;CcYkNf(kE@=ATfauGA!Gf*^iG87)lAQ7YM4hnoOWO=`fqpMWsOat
zhd0bhoo;EEi%Uwv8m4gzmiHk|d+vs5{8TINfH;4wmNH6<MN60T{kO|#KjW7Cg*VJJ
z79toeDWfF?b7s$>WvX^P?FJ)c3L|2MaSxVLKTUt`c4_?7Tu#~A|Jh7V&iIZejsGns
z*VQ*EJj`@S{>m^Su3n9iPQAgDlgxVNFGk*XJJdD3Z1-q?IT?4e@GjHCt(tzm7PTtL
z*EYfxQ7f_yqSmnJT0|{9MGQz*uhddjoe66zMweAVP;9L(S(#_s!l#8ILHYA~Jb}#d
z7oOL~7wI<Po8bkYIusp6e|`;3Fu!fIACuoBr0vOsHbz*wkPHvkft=kq`jgPdFz3#2
zMutqS&z0e2$kg)J&466LZYtMgSl!p#j!Q8IcBye~*$t(gXli?kM*_cr8svVqropn6
z*-hx;g1>CnB$+r^Hr4G&XrvvzXrl7XdTK6PLlZ39gWAs=-Ily$VNw!hd!sa_Ry3xp
z^J`4s=|Ijd+rtDnp2X_1&8@95{e0POZc=lGGg#9Tas2&0c3hyh7|UP|b2O33;aU<?
zNMNfjFoTP97-whjIN=o#-nlavtkKZPZ)k7X(bQxxyth1KGU$^KYpzj46U^^^?Z@<%
za?-Y>DK)zK0@=gKS~7f^082^if7)A~X;yQM_U~_Mv|&FxPVGEsyDl*{!R(FH6s9_N
zl9JVgbkhY^XR!|B?COjrtd*Gmv^w`E*Ib=Xn%vD<qy@E_tJBZ~^Se*`G1a-7v~5bJ
zI&Z$f>ioQx3^ya88!-QAbslbBb9MG*3qm^Rpo>B}sId&@Fk2Iu8m%NT+X(EI3#`$(
zI*hYxv@>CC#r&r=db~x=8GOuSaL%GdCHXU0=JemB8#<gBnqYok49V{i()MHvYIMy7
zX80eqWVk;GeGGH%47dK-;p}vZ`k|p~p(;vo74h=bp%hnp&&XS3(cRn*A-fGzl&2*t
z>jPBVmac9Q18!08y<o?yqL!}ch^$-Gz?Kc;)rn6yt3B1@a;cpyUB@83vz4nyJ^ut?
zpRDg^+3Bi$0LS6wft8JqSZTE>4By*6qDjq9$25P%YO?VgIWXVEPE%g4hKc{%2-)FX
zH2YXY>jsG@W^Ma$^F6=2>n-);0J}Tao~5aR^m<0~Sd@j=pO}<*`>zsJMl(BGl{X`d
zsoUM0s$Xl@@K_7?){Iaoa{UbN$cQR(qS`R2UZQ?<hAU_YC4*4psJn`ZCn&kfZR2}U
zX&aL{%#XgaDw@w}_OwhpPi6hqou@BmL;qN73ahmBycm-wZ^u*~;J>x}+W0yr;N|eU
zQ5-(ljPv?haX3$uhdBv9oyX(W4va`F`zLL5gK4L_{wrC=u7_wL>q@dybun(Y`hJh&
zx3EpIpUZC<5#={qt&ZD#zwhR^v`evnl;6@R<@am+7I{;C|BK(kjrd*Wh$S)^X~cm|
zEiFgL?;-sD3c@tZRyOMRMDQ_mY)Lq1|N3t-yh46V0a?6pTmkz1?ezKUfW3qn>hD_U
zTq0^N=SM3DWDn5q@ok^Swl#R1uRS^(#h~<Xb}|G09D4EFNLb+!=0OL!BOj!Y9n-}h
z5}0U3`*voqR92&Uv0Z+kGs+q)C8f9j&wx|;{t=WkiWjp{5%6-6&kGf%y$tYq85rWl
ztlP6cU}E{e=jB76mybc&GWdIl7qg7bB8&D?Hkd?9!l)e`i)?&O=36#C{e@>%b<IL(
zcp+Tr^M4g6O=wAoKeN19v%f;)ueoE6&m;TEfs!x_ef~6dxz3j@nx#(T4WOj)#*l28
zb+16-v8$@HMFZ4cx`0yNvSI8mZ?g;(h~?M5SjdI;!W>M;GCw31vmg?PrI9Zdn!NTB
z=ZhsiB$gOGZx6)ssxKD0kM?rF7mMuW`x91D-OD3BFZ4d`<#C^vCqlfKm4iSmm-)QV
z!L^qOJ}(nPyqHO}KQCo>`Ml8IwU>K9DY1KfUNZP?=AdRiCk;VbeNT-1MUrQk&!Xty
zZ3ISv(xqi<|7uXS^d*BC7&Eu47S4n~KtqU70?Gm<pi4jr2wtuO8VO23B1W4jl$mcy
zf@Hf4fBNq;=+x<MuESi|cPNc5-3`iO^EcwU*L)gX>8=KG>!^qDLz>sgCUN_?)!Iom
zhJ%W5*Hzq_?&1~4>(qhnt}6mcUPoBNYD3(8F^Azv*`RFQ6=9ZP)?oaPU4<%5e={X`
zp+aqn(@Yl~^3K(oZrmn=c?NS3bG)17cDT}r7?Yt+j<TC(h#o{UOkd1+ObKQ+rX2Gm
zrV?`u<LHjoG$sv`jmg6-!K^>=;L3`%Jzd$+TpxKT!__J2rJ+(jyExHYUGexC(MOOM
zgh!!@05KW~?c+{p;tcM2XdF;H{<C^0lY4KTQ3o<zox1MT6px|()X-6moFtQdQ95)}
z$>D0p&kR+~_c}YK$7;8MTB>SUN2<1jI+6Pi8QcSI=4N71Mjl<UVz3Kk<Y&~bOI`nU
zW#rLk=`)<7u}Hq%q`&{$l5aWTQ2XAjp{dIxKg$!NJmc+-!7VLW9B7FV%>&twm(eGD
zKR$FzYb7h#@+Wc2UtP>o%+IrRI=m|PS9VfAf8vbgxf5q-LK7F9=U!;yCi#~C!xJ~&
zB#srttNcrdrZJFszlbe<3|*&lC)f~$CU|;=NFshcjteQx94T)8Xw-TMW)LV#FnK1y
zY*)<8FPMIH=}c8sWyfj-2ICZbr>ans_SY*fQ^|ibyx)%SRL{jiNvGhtkSJyNlss;`
z!H&05)R{4^cX;&nP>!opyXQ2kl0%&4#<p+uclCq2Prhc}rx`rf)hKSiCJ0_LAXW9G
z(T!O)*45ZkrQPMWU!xTBM#Yw~a8TlV<ucb5o&2JE3|eR`>DE{q-{_pVRq<H6LqUy2
z9J=<AD7~4#(7JCXc88>ny;Qy49;-1XLt|nJE+d7uzR;Mq))G^+iAlX*V8=D|cL8I0
zzL`j)lH@Nu6N!OLm?^)^52c-jHeN$>zL|KYmYCZ3GSP5WNG6Qs`DQ}i2#~+<Or!-e
zVP?5CWx~*$Zzc}b5|g>Nm-T|#A(=3iCg{Rio0DJjs=&AK9G;2nKqd?)F&RC(HVmDK
ztwpV|eS`NBmw!5GCSm%TZFz}w{Sjt)Ofb{bM^|&y(?7$Mx$W7r%vukso;A&kuXCx)
zX`hVVyK<1bu=U|BZN|R5%=-uR;|XTJGG+X^*=JzVx#T~cbTW@kQ>)i8@!b3;X6{E4
znbGP4zcES7c=NLO&p)Ma9uxk^syg3^d`<%<!!nz1{!faVwJ-fAD;shWLuR(ZFUun;
ze#~|Cwm8w%E8o?_icxd&UBj3&@5*<@CTGr(&gK|WTyph<l5yjPb{|qWbH-eAbbOXN
z3Q5u$Lz0tUG-vkwZ1{Fhqm6!_sz2S8kn}{T^T^`j5>g?`=li(+0O!hZXVl!qxuu#X
z&Ygd)<2Qke@4m+OHR>L5UK#G}i@Q=C5a(l|&g1jv6#1P0rOx1-+=z^jITN@zj~v=V
z%1GjGs?uk0@l<Oye1@xOQeQ(U#W0c_PVi?UM_mh5mq|hA3G=U-GhYbgaD9=nH(t_B
zsclkwX1HSeKNIRitP<yGW9JW|k3TJSigY^t_Ptd`fh)n?%kSJ+rSq+p6u26?1L<$0
z^07?nT-_^aFi{Os>u^q;WSqw@B=ZwW(sZKps@bKRjHs#tS8S@hHbQ~<$mQv(x}HtO
zPKqY0_^SDcuXR1URsETi+Ww%E6oGb@e14=dXOgy4#@X)_NLw?tK%BFfMiQCdIgqnW
zYKu6R8)s7}5_E5qvu&zMoOgyh`>Os)WzE8QcerzR<9nY<7w31vozsl-H!2_J;irwW
zE(eKPk}esGX2$C;AF(G;ZJ!cmFL@FA47G>gB1X?wKd!Ul)AJ13$Xvpc%RHV9eCO-$
zO8>Vnu}4+Q*(COJzqLuvRmBSnbm%{*u{cLq#p=1+dDg)@+f}DaOtC4{+{<Q`E?y|h
z;&nYKe!J1drMeT(EYLOEL+zUFitW<h?>wsb$}AnYX~>y=d!MBQHCmmV&79@1@)Ww7
za)oJ^Li*50>Y-VlrdGDf7t1B;+CtYjE2g5V(DhEevJMDBS(vGqC789C9hiNXZ6_tM
zEwpepigJBffB%l}fu52maa&ZnkfIcsLpF9_)$T*}_oaLr|00XkJV(f}m`oWWIrFR)
zfkj5_m7xP2=y@)YAC95|IeHVtB%+Wn32cEDB8JW{$kFj@w>;%p5y55tx7aSV`#QUh
zv*A~g!pl{|8|~y#@%p2uPMWO!j@93FiFK66%hc=?^9jA??A@sM`DDMi5j&eOYOPj$
zCoyiuh<2WLU1ofl97;gaxX7~v9AnAE$m3bd?|r0sj3qVb@oeH-3TljH3Q8tA<=BN!
zbVTtu1N)j3k0l(*x0J;g%aldDqs-&U!%_-jj1?@1P;pVF?($d9fR^(s38X}4-J6?V
zT0E<$R5CHp^E~$4kgkho1UWM__4qc09a_^;vn5S+Aab?!D9?-JFpJ3UAu-X!rgEBE
zjo0tasMkJ23duh|kvRMX6N9};yA+K2<Lv18G7)>_Pm26aXqQz>Cu%|b)s$94CliyJ
z)9QCWM=O-L8d3l*A~3tz_r|UNV@7gNeOwY(dj^D?c=Xf3;WZmtAtP<cf~bs|KgsA@
zWOQXPqeTVNi*zRctSh8{utp4#$z^!olN#~6`!jH#$w0LlkU&FgAhc;pLj5x6T3?fS
zl>~~|vIFy1{I02<P+cWbBPOxIv>H0$8-<x3VH|(Z9)GW^C_M4PIR2>JvvD_v155zt
zWE{T4{4em0f9k~e_{JIi5#$?vzm|?-_(l%(xzFwA8+X>qH_Fe8Z-iFPCJH>Xa(+3!
z(YJQK@ePr(04C?~5Yx|+dYiw&dk1V8$sW+gTM?M`)=^qq!$fnt47LdI>tm*|pTy>r
zDu1pnx8dX^>f2#9Oyq9+n)j6A$6hM&eYSknv&Rp=KVV?$hP~xaH2szO74Iu-T#F>`
zeVY9!xougc$s0f^6%n=UY6W(o^ahmdJ(+!{ltk{6E}7E!2`>j#tToD-?TJ;{`K}Zm
zu$i+0S-Vp0TESh3M>$eOW2{luj-n&smRVXnZ%E->BRzCBc4rCRgEB1DaJnn5>BLZi
z5Yd}ipc*6uCE)}<;d>7R#X_(@oS?6Xx=E&`?h}GGj6<X=D4+YptP%*`5Q5dA1R)~$
z-4Of)LGmNv1R+BB(<=RXS3=|q&|W&jmDKcYLwlAOegcBB8Lqf4r-B52X_+Ac!Hmrs
zl37g<@2mD)k9adR)Cs{%#F)*-&L2cT5a(LKl}OszE0MH&NpltA$5gslE)h!+?6l>J
zDj&<F{nZ0Pt~1W>t93ZfK4qMJ^0X1ZWj~3e`s|GO9bL_?yUtQdH9I4V_ceCaWbwQq
zw1%0kq@<#tqhF@hNnNQjZy>3gjI-Y<kW^Nv#rdP^&Vh{fRa?Y4gX4ds`YdTYTUCj3
zPPntLnM_jFjX39pJ9jt5RH)L$d1<(_Pa5B#^2K?xaW>L#h%~-KtrO?nVa}3tskb9)
z56+QaWYG}US#jxda#7ldU;3UF_%_maiT~S|xD~49DuTb&Z*AfaNZ)s<u{cL=8EUEL
zZ|7=REq?Fx6NiZ3yZv@y;`g7`u2r<p!+z&5@%yOXK1BTfLY-WN_`OVdR=b);ItE*+
zTcIm1awS=Oc$O!L2h#I#fG=$k7PNM?Ym&9I;*-_L;Y~k6-j+k6iI|?4k(ffv$_}uC
z2kb7g`Vv@_fV;MjNUT|z;ER)7y_W*J)EWgTUlH4p{V-<l<TNfPGu!d9D5271V}1=+
z({CIo>m9NUUzAYy=Tr-7({2bI9@IAz>l6MXqydht*LHf|9F-PW;NeF6?V?N|*Z0?F
z{iWuCL1oL+0L^Bx1T}T2TxOQZpu=F0r(*t9n_|Py3;{kx1IBdy1ZU32GMJma1i4U!
ze}QG1!HnyC>Qn80`bp=YU+SY@1`2(i7$5yCeoOcw|BW|$hh|d{;XxmJmo>jJcx(z@
z2DUYX`glA$nfk)BY2YO6bHO|i<2?>$AV#jlEa`v-rKjfml%8qDI#V?62zCeSp=fXr
z!jCZWE@<clOW%k+L~Ri1Ct>#C=`fxv+3wrNw_LV#0y}fz5_E|D7qWHeN@WYR9|2s*
z{-GCTzRj4&E|z;JAGDq*ca5~H6Kd2J*QH5`GjtsWmc}wE65%e*c*YOz(A_u}2wTY2
zlWXHVge$mQ32~!BoEPC!7EflY`uDq<ww-I7N?DoF{C=_dp2hXuiwYr?VQr}zbw6|I
z6BS$TcRg)ImeW_?c$FJPz3b^(-gYei?G(9178Jw!Z_c+?4J(Tc?y1wg=3ZT0Md*ac
zHo6n*{*zZ>I<bgm=c64|d;bYj464?Jy?Y{qUAQ(4q1rp`33;*OKhP5@`O2N9RfJ^j
zD1S0t&!AVyLaOw*49pTU);MUgj@7u;1sUq}pQs1OkIkU;L{Se&0=I%q!23Z7ryS!O
z(j4#bTf(7Rh-wl&hrfjo$_BudphR>v*crSAMAgM1EI@kda**CDzPx#$mwyG|3Q$_m
zbs!6*qO+v$qgV+Nw!R)>G58z4Nu*;P_z1Xxko2_=cVKY>3xC(&N7v6`WBns=YYf+)
z^Sd@yu#=t~WCa(-2`bBeAvPe-F<b}-_=w(r8dI@<J}<UCu~Yhg4p*zOfj?C(*u{|Q
zRk!YPB@B2pOLGI+xy%(gLptfRA$Hw;b}xt6rTgrv)X`mZ=A4RNkGQV4E_#x|*FPU^
zb6A3v)q#PY){t9{`Pz*IEJx7ItQ>6B6Yn7zI1#<a;{q4JHaTd16)XgQX0zM3{Gq=S
zv6pbiSo0k+5&wqYm9F*6r#3O~o@UHs`jnRxm9&0rZi34Cb%R>9?RbI5nWy@0WaQNM
zbe^Sd2kSr`4|O|LIfP+fc0J-NnDWI~<EqZQQS|GpRws9TBR6~Isn1k{B)hxX`An2y
zU;*Ky2EKJ3J0c>3=&4}I?Yz^2AfmEnqr!N4I}i3;=4Ph&^Gc2KrqKCqHys!CM=jB0
zY=};)NxyOJ6ENqq^cz))XgrFHaTceMD>wX{W~Wro-*iiJjHRB-af`)nD@Oj9uV~J~
zWKG{?D*A82g)+c?U?wPx;$m<RSPIg!w7$@6a5y}}D;$8CvG*EqBzQBJ11b<=?-5XB
zg0Dc@mG=j*2uvkGEJ*9?RahbRa@b8@Xd`s+2VZ+Nt5A|~{sn}drO@*U2ZPfb<3~*#
zMsDlc+zL0)|FDZ(+E^P`kwefwCu8h`#xGxJ*`kOZbD_e;`EnIujVX<cZc-YW<=H&X
zYQMiOr}4%-*K{>~x9igg$0cg?9#`+=0Voqvek@WhoKd(ichRDmoc3l5tvP&Up4zep
zVgFXOXAcbMKGkxst8w&<!u*8@60LB5OxwixSM(`O@%5zGw(z`W0-m`99Rv|qURPuH
zvQ+=InzPr{G{QMd-MH7)Jh{QJYSGUxmJ|?>u(foxXRj+Zvd1t>jk(_yr;hJ+jVIj|
zgI{pn9@*d+-0R|5cQiWgd~*Asqw2@6Wm0hNxxg<v4Nw{Dm`a>_ALilLE?$4|_w234
zBgqFhdh<l#`I!f>J4atUA9-I(M0(vvOGIIdKdbYiTZ*{Q0cPy)#A%dcXS3qtpd~vx
zsFv^9v3i7OMDRvp#b}|sm-P2EAL(T@2hrCH?eCXBN9bD5uQ>=i)sJk5NPh+2(rxF0
zmx0%V6TxjDLV|Y(I2n{U=+<TWE@C`+pTYv^!TU7$D{wcM5B?dH{+&!hgmI*R@Cxpl
zaX795W#Cx?N`KG5ti%E<8lOpcs7NFd2G&}_5RquT^m`F)?!-YRB`}+6lZ<dl<6Kh4
z4tPXuJfT*8P)5I(Nk=ti@W03l{Cr>seBeb)<dOcCy1Upl>WNB(jIs<?rzt#U4n+l5
zE}X|;UDs1!>_&>*Am4Hy6*MDhE&6+h@4JWZ`+t1jy?o!teBZr&-zR<FeSF_v`@R`5
z;&p&^*^!Y5Way9xlhn~F*K8}JV)&b`e?>G%Otl=77u`B(TwKnGysyrytH*n3nlaX(
zth5jBxNq*Iab+_eKl<6bH)?&5|6i>H{F5S$!N{L<VR?tn&mg?mb^hFp@~`q+X7Kev
zI-UM&i<d_HVo>#_fSo|5xBBLBsj9AgGjQ`BxEbAP4_fS}UbVPS{g-)#-(+HOA2<a3
z9VqPNc`zH4UQ7gzQ@(^S-_{8sErC&6xC5V#Jn$G;)1X~*M<w<P?xDU8DystiPWo@J
z2n--QdP#sDe<AaMk(?eX93ZO!-6Nhj;>xz-O3vttl8!oWagpgKH>qhy=_e)XrK7H=
z%d+Vvq7=A~zO?j&P!FRz>(ZmxHY8K*xaiGfk@;oMYR{KKQ`eFm;?8_a^-B|p^nM%I
z>1obxp?DP?$IBXHq%F}VcKE(!x41-thQ$tx3GNxg_>uJ`Jsyvj-wolp$o@J3ve&a3
zyH69vZsz;~lPq|F*<TLdVf61EPbK~_R{s1&+%ti^vUmpVwEBfsWz}w~afEJ77Rd<6
zpUZ$lM!ypzV4p51?^QDvl@=FZCxs=6Wdfv=C50vX#fHvBh6Z{h;g%zqaQ`G~pGOB4
z++%%6&y+6Cz`rgDi9Ea{q(H+<!aqYwv(>%Rc}LlQm`R^b(o2tikwX?@vdwlBr<&4K
zVI2RBjB<x;O4U3O&>LLdFQU`pcW6%eee%qD-+%D**5V_Pdmig{<{7p8X?yy=u@$xR
zB+oOST1P7M6intqzIj+eL*^r*eRjfT?p93&jgwqkq0>(^RK6Q^u619TA_W(J^DbtR
z!<n+$7Jprvd9{35ikpSupPhY`A=K2cE$DEBvrw55_*tlMqfNRWanONkev&@a1UshF
z`mZltsvW-uf}O!Zps>!tAa<gj?9Bv+39E$7=?T#c5WcFHPGo|#gzvGSOpwNbzX8XC
zcY&9K_k$u({T>v7>LE}jP7i~^Iv)db!6(5y@D-3aysv>X!4JXNA{c$ak2zS-jP;$N
z--56jujF<f=;zg@GnmdKIp$x0S1*E&Pc6B919s**$MfRXW%bPSskQOz^POdd@@+lS
zf`6UQL~F!mcy$J;^`8RyxA->x3kJ&d`5m7)A&WR`)u@xMgu#0>XK(~WsyBS!ei7=7
z@4F`@CK6E8NntULk-dE1u9ND)ldf6T+KT#LxxTi_^5FHgv8reqrL5(gi&gn)B#ctx
zupD!y=QHyXR#r&Fe)n2bO1PRaKoqH!lTrx8ubJ_1h-QX=f#`hLp3uTtwfr&0G6|AF
zLBb9n6hgH*3iRMcr-V-a<n!A^hW^I7B)qwryDye&=4xl~hF0`_YG?3%n%YXu@2*QN
zU5_HcyBDQsrPK^j)PJqi{FhV|iloJ#3`+7BiTO@SEya|YF0^x%n4vp&JM+`jR$_kl
zK+~~)!M0Ov{P$%uw2hpiv2_T^tP!hZ{4bOqQC$Eqgy(J_ewx~H>vs?2R%doh2Cd?P
z<b#?zieF}U(zX6aUL6mfl&-uZB26p2*RkETtuC%_fR~*KQAz6LcU(5>mpAm5PAvzh
z<Co{ia2Y@B&^OInuWUDb#0%q7lbbG8w>=TjJ-F7fK8;h`%SzbSjJ_r^QBGI~S0H7v
zR7*6!#KuW*vDXM0M!t}<-)lD^0~|TbI*puP*Fr{ZTfFC9<#pAk@x)l=-#liIwBl0C
zoHaLpaZ%Bt!iCgdT~C_Ix9nK!BDL7E=W-6{sAb<>wiZ^+NnCxi^u6G-43OKw=fH=-
z=fTIo-QXVZPvD=xec&q~Gcz{A%nj33;Ol(<8GHlu56FgHo4zc&IUxgbnd1nAG%&}}
z5D$h48I~F5LEjWD^o-tt2uwe%F#>&4wb1`Hgx<7t2_J?M9>FKR1bqq~$D?07sCf!k
z`wZ^9B!V&)UjJQ@KqP|yL>kzMG|+G#{~f`c#xx#eRjjIGuZTt?%l6oVtiPz?9=n_M
zuDaS|_W&O9*u}iTm)wZ2zpJs0>`oEfuDraFJ)+SmWCN;QFYGpO^z&f<w?=kXtEsYM
z38Ja$7i+iobVOpX*oT+V#>1}xyh&^i^0d*`LC>tMil<C<GS==Bk=aW%$gk78BCoOi
zwu_|oOSb!vm<0lVSKE{A%L(3U4rHpy&Fvm#CprZmIDL_UL*B>efC+tn=@j2CRvQvA
zF6NhB@Iu~SDxmQFuLmFIx4inDNH}d=EIfq2BGUnfk(eUP8q8MAt~jI3+*h=jv!&>S
zZ)Sku!bmoD;d3ydg~OdCj7-ArT*64A2Lk@uVkiMe3g)N8H1bYUrxg6ch?YiVpr~oK
zx#M`nk1cGD%v`N{wX{2_(=F{7RnpQPZ{4I`YH7EQTdQyQOCU$)0JOw(lRDFq*0WvB
zYh`ze+};NTx|dK3+S$sUWc^9mt?f=geoK3ZDz8F?+}qktw5rsd*hf}D-0cOcOGH&4
z^?Db(d&I|m)N(GYRvB&V9agIPybVODeSJi!eKir6g<RX#eu0OpGuzqidHQQkJG*J*
z3P05fP1R8yX=l55@%oc?kiMfHYEMY-_-Wqph4iHw&;jQz%1R}bU6iLIjkAl&>}V&b
zZN2QCs-!C;$L>_S4VSKeEHtAH&1j)n#Q}s@I$~L@EhCEinM_>YPYvXXY*o^UOzcs~
zogvv{NcI>K>n-(AXCmf_)GjzO{B<SbB-OGT{g2di#ci&7sVi=CjoV!97EyMQDW9Tl
z1pR_aN`vABL-B&~{a5jwpmwK05U1v)LlCDjdlGz{TF{f=Q`OiW5H#)%!ECjsI|Q>0
z!E8gYR0v+Mo>Fl=aeK<RJ*C|u4qR;Ff2JqCJ!(!bD!`-G^`icnee~vQpnN5$jNa7#
zTy<w}8qGEOlA*+Ejb^T@>W!yzmDvYR<;Ih@51w9Ahs5eNW%Y&NpK3v0zW!+lcJ_s!
zwX*wR)mo+ZgJ87UB44BBD?uIU2f<R+?;@<0s{D%}cu-Zz*Mo*2=3)q{)SQd4s#5DN
zhTyca`tx;K6I5jMx656$|D#zjp_^3FP++?nJJcRVE51{{s#MibSZJ#9TmtZXy1+YX
zoj?~=bqSQC1)8d!!|cv9j{IS`>`_|;SYj5SI<tWnRK7r*+9EJp9Tj*=wHyw3RQ_;#
z4CT2?V6L)8*trx`{s_KaQ@aHIsjN$Z)@rN(B_Xg>ofLRbWsU^Mqrho(bfkT0;K`ln
zE~qS}S&k0iN8D6#gx{xOTcJF41ASW_Bi#l@Gp9HRhVs^C*oE@eozSK+1<JtWV3uLl
zV#+bTSK4`n3uYS1U{A145XNNB;<0l2G)1^uI!b()G)y)o53>xj3G*1nf0IESZY@Z`
z^v6uXlw#Imc685RFO(l2VNPQja{)|eOcrJ;rWCUVBRA1JgE@#fj`54lPie9FGC~Q8
z&5QXx#wuMTlUWgkd;6|*T-^{0{z$w2ll0d~Vjbi%!;R-Gnm2v=+>*ufXUOfB`GxM-
zz&&+_qygQf=EFTQq|DI4#OmSpQofC!$greNtu?7Ky`{d+IijQp0e_7Q=q38P<YS4+
zg8H)9o!msmdm%8E^{Xc!e&cR?mYS7gziv%ay~e^#rm6g~bh#C(O1@Ue*BCYOGCRe3
zK+U_%?iTUDQPuSYt1F)eF0<dU-dEelA&b24vwdG3m|z#H((w!-U#nZk<Mg%9>1*RO
zX@Z>?;ru`?oM3mfoNC2H7@t$^nuxFd6Y<qbO_)f`t&B@6aam>cQ2j2)vWKzcbeNFW
znFRUeDtQtvmm8PMjmt4{S*onbxGXg;OO4C^$+)ahAB)Qx<FZC&P9chN^~w}1%Z+8Z
zA>VKX<h#^ESKzYCxa=}6OQz!Tl3F(vmzRvoOU7mDG+h3o7Ei<FFUI9Bs%jch{6kIt
z6_)=nmj5v1ZE_($r3U2Ua>}@zGA^z>JJA!(8l2@=5ppeoC)I9hMXN2-;id(7Fw121
zY#!_}S*$Ks{qqSgT}{ZxD&1He6sw`?bF3nUez?B*<EiN2+x;lCtEWSCxlk2b3zcUE
zUki<&wKMQ@z1l8T*Q=@-5NuGH1$=EV1iK3$*rpDN)i!0#gn%JvCSOk(f)g_#IH2rV
zSRLSOBLwfME%F6ON>gS-@TKZE8>=r>{%pJ0<M_yzTi>M-4s~>_&66%4Rs1;H{=19T
z*=2#<bmIcJ_6qqq%zogg_~6wyW?1!6EM9;l@V@#SDpk3_Zbl>IcOPkR?H1aX(5&Vy
z<g>>Is_O!~iQ2u8kS^yLZia-V;?z{-EW+0sHE$6ENx8AwEmphKyJGc{u}Uk&>My*X
zjnzMlRk>K5QqN))5&fZ>zmV`_7GsyJx-7;n{X?Dt$IdHuL)AUl5%pV(iF%<*z7mS-
zeZESrgkpobRqVF;?2d@tQ|gS^9q`%ZT!r0xYTi}Yed)8?jUDBs-n+`~UgKzTKBGuq
zgvg&8NwT4l73_12D3>^*8BM}Rl#vW+7c#1#sDG#3qO2$PE9GE{Fsm_JF}pE`F{dz%
zxfWM$M;eJKz%0Ya)kixpFJca3zQIIu<EdOr+aEI?Q-s-|CqdraT=V8h>AdOYSrn1N
zC+h*Cq^z`nRwm3;cE}#o9)71sfaZokdlE)0G%^A->n$x)?(;B5L|@W=k{E={gL|@h
zbVl2mD|LpmE7n8%i6e}`o;1RGPrH|}bI@jrhiQ`Ij>bH43EN)=ZUbAv?wgZJ8QmNa
zs(5Zpoa~$%mcK*$PJw_|bJu0Ym6?k+WI<wPh2B`_6?yZvo7m+A?b0PUu}jqn($Bp2
z6gwG~Bw><i)cOLMmXUR!qoE9<j#A%J8iOgeQXuB<@-!X6S;gE2#o{&+sDBujj0Msh
zbDgYN{8DKi9hPSQnkU!g>c5NlA2G^7$vIY#jGQ=K0N&i$GbOJj6hPq7p5ao<E(m98
zn)*zxr5J*b_AH?wPGR)kX)C>T3dLwH5i<IcvUe)^%w`@sUP}VicP(q*v|Q~g(=5tH
zia8aOOKjHoQp-byl3Mkf_3pSbe{(R7O?C5-)%4$K;#OoS)5CRoj#u>eCsqY7e!Xfn
z(RA}^l#le4hMb_YmVrMJdvtqqESy<7qqtxub`nQ1Vf;ad(I(=oo+G_rBm|FYf}gBt
zYoGSSPKYl8dBSm^r>5RC9DmPifB)k8EPv#4=oNAi6>piMKVqRfP>(0m+fu@vC=*GL
zWO7Vj=H(Wa#=>%R)42)0uGF2%kOnI~t0@)I4`JyIemCd0^s><2Cf~BzEsdagyc*5f
zUA+f9Nr%}?S7uuEj<!6|sk)N`(~z$Y+DSXpjeEHcLONP^ewmJz0hVI8D2h@p$)8P0
ziw<o{z<!QyalQQ=&nMC*O@bh_=au+6p08Pibhuz)%7Ot8qd6pSL*L-%I^-{rx6!ra
zZM5ajTa0EZ9ehIWALD82vV<aQhgpdzC+LFns`dA}+g|K%>c+B*Zrac|EVfoH$Jf@q
zyc2k_WiC2P#O6M2|1PbnTg6Xhb}!2sr+TfhJ9!rWOFUesTaCm&OD$Mo$3~3%mm0X*
zZm!m@s9z^30sHxy?pmra__*jewPyuF?KpK(LRzXRmO&xwZph)P%XM~w4rwn-5L2(S
z3!Qgqn(=a8KS>05$Kw^BT*rE`^_@C%J<`#4s{Rdj3$^4XJDT5PSK4I}(Wlj$EA4i1
zolZ+arw0-s%qv=D-fXvzYpI>)1)PLcMXPIXCT-E`p_}cpM$<G!H*z`HbNr{De!BFu
z8hDG{%5%B4@2qn#R|c$718%f`vhGp?R*{;!B=}gBvl{X3F12ozU1mMZu`lG;heKQ*
z)-gpqd|ExXg?#Q@ZF{YkRr*Z?@UoEkvh%V8kf1i-WEXOGLtRPtzjW;K_Em5R>#)hv
zN$oU0;3Qc(Y_fD%XUWr5Yr_Py)JzSy#cu8Cpsj;BnpCmi7Tf8jlCHkPRurmdBSI8~
zI%1=JBe|;LlkC_EAh`mE)z!+rzD%Ck3!R<sKso<K%n{5-w8?P!%D?jY3(eGX{ZsX`
z<f;pisT?f$q+t4DaxjG$FJ@E!iob2Lr?@z-*J}$E%Bejd;@FpJ`OT~-A5+h6vG3-A
z_R06-_=9!?cHM7Z9}#&%_5Q8hDx%&AHRZQ<6ZyXJe!HXK{lB%Zu?AEmZ?%8tvX6Z!
z4Hl-S_Pk-msv|p)VSi9(c2N8h#6$L!fY|y>=d+7b7CrJ1J1?|2MO59-T84<LZ!=P6
zfVC`wj5@1mFH~f0jFMIwM()Q}m1Es0wH!e_hZahN*Q)sOVOi-tsCw<9(;QUOcCkWw
za0C}Gzrcc2`7XOCBJxtz;1RoByF`{4sJ%eH5H1j@?L^aRus(?a?G$Juu~%g`+g-%-
zBahhAt-Ol3NA0g|Vz8gJ$B6ZmXYK0(wGi6aYg>xiLCO9*%Ted3kujm`QfJl3g*5gh
z)WTY=GB?qBVz-@TF`*s02iCmhV`++V)rXOn52zJ;;JVkVJNDSElWx<lpK=LI@J>GA
zH_V2``3~*;p_YsMoJxeq>s9@|aM<hBfW55ceybMjCCCZthQ0O+p18l8wm%LTLUw8{
zRg+(Uc8R*_1^Uwx_4o@^^b++h5V7R%nsHV%d(qCZ32xp2y8^eiud~Y{d?uU^`Bw%r
zXy~CkF5w;JeCKxOnAernIIlaODiP-BUoG56wH(3ZP-u@bNZ0i2q+dKg=96q%KwC#t
zmPD26%6R@;DkbX8owac0%zQpYgp8zXTaMgOGv^gAyc#>HUYTqeS|itvK<sRy{niag
zIU?POINm|OpV>=q4Eg6f5+(@@geijlHv}mMTSvHM_jg6?jm2MlxABBss0u6X{j5Is
zs-iYi)wC*mhSgK;;j8Tkw)!kbs^xDm(ocJ8yI?=<sp`K8&+R!-_A1(`yf^LWI7_SG
zHzC_Y^kSaGJW;K9la-(y>K@2bTWX5_DCgUb)Q~oPLWiEB3qiL1c2t~v)BdexU#00+
zl(I9@kHK6XY>2y2+leMaaNxbF;=n<cS;$A+dklg-)u{LEhpnsBnfK1IT3yo^=w}f*
zv`=Z1$J_tME7uc#F&2?UDNV&J!Kk!PJ<VOcd*<fPE9l)*js4Wq_+tHY>b%n2;_1Jh
zI-{gSV}35mrd_d-+PKKk-(%$=j#7H)F^`p|sPT?<4cph*Nc&cq|7aMi-iT>vmu=&d
zCsr5EoHe6(@%&OM$E6i<N9J5UZv15;{JT8Ahb9++U7lj~E44Pl-b1U;nILqzLx_QP
zIMVYf-O4}v?m$bGKhZ2;r9~b$b__sNP0_7=MQ-klh3b*pJaH<|s#m9lY*>cBQ7e7L
zP>)u>vFau138c)kjAeh_mgYe;RR`Id*6hE2sHkMz!WrX4nOHiVL~Z=^)8FIyo~->`
zto_K`hU+wE6yk}Wnr2SGPjCIrq_WI0QG4o(LNd2L>fT*cGKAws<F786DITT4j=^IG
zewzrTJDK@RUcm0k&`6n3<?0!gBvfX<V|5&Zbk~%y(gOat7~CINVZicAOZETIop`B`
z_6^tuDN6!&CR(Fsebw~Evu4etcnDEeg)GM{I@)i!1bdnK&0cp>OA-H9YRgDn05Z#1
zrN7VMiTRp3dV`&ikhM(8E<uw>zn-SQV^wivy{2XU&rUE5qrzPWPP2zr66yV0y`ii`
z)S(n*j_%`?IZ|c*g{mKPY#x|M9X)}np3FrY?ybwTb2JUnggq$`_DeiG6jirDSy;eQ
z#4?BfSO;9cmOy9gK;>2~w&?Yy)?N}E?ewmagtn#_xQc7eHB`8tq5sa8P+7nbhHlF8
z6GxeogUK7vjFnXSOF%RZUXT)4F`!-+>8#6G?q7%z&VR0vBVQM8eP6^Vfa&ij9lrl{
z)J23*hG7JWh{<3PK)GLZyHRqH_r@d0v(Ye+4#y%?wTG9pp=qV*3a%Q?bIkr`Eq-Kl
zl=HHqtzh30`$^bK|6%?d=s&W0l!u*k5uvAxlm+^Tc#`Jp^Au?RVlR!=*iRq2aAC2u
zPx9K&vs@Rj!|?&UNWKT^d<VZL_`cu8T}H%<e0yuBC|mhk<{;%VbQfWS32o9Iyz1_{
z^^$7kJ%%W$I`1)-9$f{}CARC3X6kB_@?jyj8t;)++6qlGRr?WMWWs1+x)EW7^B!u?
z@tF2ALHmIhmH9k1qXK`P_m~JQN+L^Nh^PCji6~IseqJT~C6s;>-$8nRJ*d$ix}%!=
znM^2sIrP58NVzz$XNLZ!^HBMs5u+C!CkGcD{q0m19bdzUq@zxLSSP{U{q1iC;TkN`
zrj`)2v`;QjJF9(qv6u4ae!f8YOIWM1mnOy9RkilH7CUL5LT_03RKvr?laztaQ=q)W
zzC2*>FBfybBLn-|M8|J%bJ*(a%UMECNO0yr&KJaF@ZZ6g`A^^96+AF;Sg!NZH+a>2
zSi<Nm=DDHL*R%ZFH*}fnSweIb^{LL4zvk)~eKbO!=ngv?@e7=ah)!WOBgV=YS9Dk1
zRZpCdxzPt|=t&k~Voyo`kTuz$Eck(1agyoE2Wr<z=7~|KRK!>Id)9h&^eg5o>rbhB
zU$WvWQogp2xuCI6v*Wp5tvg-Id?j2)^iLlzR4(+k``JnTr8Ul1E_70V4B4>Gpo-hh
z*bhcpLsT!z-N~BHuA@6PV#p6_d2e^Sy26+ysV#mhSuz=-KDOL*t;f{Z2zPs{pyKKX
zcS8%Ek((mjogxPQOD#`ycas&pgo@LV?rk>o57c*m2f;lJ+`)<7ZY1;2d3bo?)walV
zzLPzFA!g7sLsms*)F@ydBr$&dy9+JEBltaqd8D-ETi5(GHa4mI`rEE(qP37Ke;bih
zai*cWk7c!0y*!jwx|-&3C-Ak{Ls@rHTRiUA<RM>4!+e^Nr5C4(V*(9ShMcyl3V*2+
zP729nM^i395m})TPQ}{WMFDb|s<%}w8@bziwrKL1?0A?J+Er?DBX`rdc<m!9;3G!+
zxT%r5Q`~&*;~KOe%)WxWU~y8tf-h^1`WOn-lRUBR*o1p^7$V3G_MBpY)r5IMb^Fjt
zN)nN}xh8m&1CTQ9GVf}{t9h~R7FLnEHP-!nMAX-6bYpi{!R0>uV`Fzm`EIb+*7jXL
z<Br{wIQM3&xq3Fv-7TWIj~3Ll;ByzdyRjLrN!P|h+FNx=pb7T&k%F2O)a$h;65RzV
zEzzBV*3#HS2uJ$}K}`q>VFiSpZL4iXax3@4mZa+S*6!Yt0<einY2$82R0G<$`$!1d
zZfzTPqFzS5kmb_<uqB5#<SVKvIbCQg&dOlAh$+Ra#cacTnKFHUcQx_0(qF0hTsh!>
zH_WHiKJ@>qBZPj#Oy9SpFHJkgie<H@*ruj(r&_ajP_RG2g_*UUpriH6SzpJgucgQ8
z>p%3BBCsKzn=yZbkY!}KCkvGIpP^t!a2VJN#J?9e{e2M3U`Shu2~ip^``^2*gOs*G
z+B)}5V3}MS0+C;1{CscgNNMD|sbj2Fad#K@osrfo)hnI;J?m@p8q)G~cLVEMb!$3|
zXO`NY4r{tbm*m!nILCDn&3(M@WA#b8yEPQ`yTes~qk46Bw}_newL=YhD>^~V@9u8U
zvH{`}+J@}g8XkC32a}?wYK<w>57a%~-3f6i{}i%D0iGyMv(ziy;Vq}t_Y(U5sFpq4
z?UVlbtq^8Fn21vL5Vm3&JRz=9`90j1TaT-!dbnHnI;dk2Wo3C8$Z}Na@BD~J$4H$L
z@n!5Hp&9J?1{V4!<>^U2d*}!}j*bK_j}gh=BWf&8)+6fLo(vlgt2=s<pr_TdJ>5+`
zALtAQgLy=K($n1{=}8@9VMK;ij~lh(b$$~F?o>s~UKFaZuHM8pNay>(K)xj_y;apz
zcWYJN+wF>bPP?ZB++{H8t*mL3OK<ggZ<4V<#r0um8>dF~;cK5-+{gVw`lZ?%*J?58
zm-*G@zV8Iz_YGn0zfp7g667c9P6H%BzSdnqu0K)f)7%NFe?Rw1D^~64hfAzFX`tmr
zF!0##^niiwwHLV`wYsWw84|i0nyyOIboJ46Ri9rBO&_(TKQw&|O&?_q&|m2T-1k`-
zs!G0i#7g4GF#a=q{xj6A1MxpxWe&n|xS<)YY52mwPH2(J$#5rHi`1M9s1_NjMJA+0
zI;3ur*1Og2Ovr9ihcY3%&5+$@$Zj(cMu2)~+cB7-O{w8Spj3uZ8A|1&1T|%|EGWy>
zQ(2T<xuGmqEr*e^&O_ZLw9tEo62{}k<#Chr$5nc^`+BwG5|Z<rdg&6pK4%D?6R)e-
zD%?H{%a@Ji%f=VGp4slZIX!V88@Ed1R%zTSO<@!br`8Uslf#J;kP<y)2o41Z3Mx*G
zaA(-otcrf)-Cs)6Or7X{nR-7t(LIb>&7b6Ms>WZ=c-vJixSY1uN1&;CQLHl5QL!4X
zk|sgCNQnDTl$zkNNf0PiC06Cio(w#$G6kM91REwp@Uq$^R+Yx8&J?Uj;uM6`@SO<R
zBs$2-fei2AyAqQIvrUEaib(si5}b!QpB;)K{CQcy@t+eBU1pyKTG_c&(Ee@7e;Wzg
zN;eMOvG7XbF*`B)FdtzYLs+e9#tpS{0q%HA31$su8)i4=ZOjRbi~IRfvMQEe;eMhn
zr_FlhQ<#ZrUcS41d=JJ4HVmtAkHkz7vl7+be0QS=KDr)*`U^g+#0qP=+i9URXwRUX
zEL3A>xi431XV4~ZL0{9|16Z*dms`}X)$U@otN@n>)Exy_Jz$8N&D0IJ*G!0?GsMps
zV%IEO-c~JVLHxE57hB)+QUrA08@lfe-G<rFHB}GIhK>_J^GHF@Laf*yD1^ADT2Y8g
zPqnL%UNKtro5R;=L$`YlbOq|r9Ow!RU4hUQs~x|_WvP1U*ASz<JCD8?J=Z-6ZOoZ-
zu|i*$KqJ=-cBnVH*s1EoT(>K!e6W;VS5BEbDZe55Xp?Z~H<hQz{XBW7D#GjA5?ZnK
zg<3F=Sidl_eqmzGn(t1Gs5?acYQDRxRab3UKwdt=j(nY&4>ixR6ca>Km0wI=x~os*
ztGm#RQH3Q4$l2<q60EWf9dAt(lb1;g2q9N3Sb$5e5EolZlxHE~EirUU4BbZyp}Sq3
zSqR<jCi0bwu-c~XSp@MmLyUrLDUm;~ZYqWDc|#Yo7`nGq>SE~LGIVcAptCE!S?u2F
zqP;w@oX);beZCy&`<6^K`g$btj52!e18Q0scF+0jc8cBG>OdLf->Wz;&{R$HLeX=u
z@wL+n#b|W^J1XtW3iwLO3apl@0V~{nZ0bah!6Vgz?_ou2SGaphcwWek2-z3nrH_>I
z>t523a~*a~)x7Jl>+ZALEq2-JU9rm@qP|!TUE1~7Em5Pc$L@9?UAfq8Q_qUs^FF(n
zmDs(dx~vSDGM{U@>|bm61y4@+cc{w(Q{(g55B2YbI=Jnu8Ky60Jf;M*2D1(GBIXF@
zG$xTv$-bEJm=eqy%nr;x%rQ)z;Yn~A*4q1HCSgi3Ycbr+q*`>eTUKnk!JSes9#sz(
zY)iP%wUjM_Xsyn-zlnN%t$RvDhgj8do%=55iuO{cE7Tuv@g%Eb>)eSRV-uz4)OuS&
zd2V;NPP$nWmSLS2U}4e)SEy09yMGg5cTn%$?j9)Ee7$>$m7uO(Pb*4LyVkqsT4Po6
z2KU9l<PGjCtnupc4YcO*>XQxbKX|U~U|JP=;aWV}pdPq`=D%0Hbq53KE6UpFj^&GR
zTBwYcOgKhvbf;M@)wLVl-QxPCis$)yKZo^6Eq%6BdvLPmsgoPso#L<16e6R>W6Uyv
ztE1|!+=Hz}6^j)jvz4Xp+=P!A>Xl9I+18^f{Z77KQ~7ri^LuLXon&8bKa$mkNJn>e
zf)ahDV>#YayY6%kiijDlzGz*yN!%n&rZs81lN@5K>&a0q?{decjtV;U@jHdsrTgvP
zQ`7FEYfe@V+(jYHSBLI$ueWYi^X_(cO0CdgEVVKnY1OiPv$mHzA&eHt&1%ox?$%xI
z)Yj{);cOV^I_g+OR%@Mbk+?guZ7A17#QVIQQl8DotnaFRo83*@mTR9kTU~hL*~}LF
zK7wl@9o5yF-JSX!(4<#b9W7a=$@EB@`GdAAvs(GQi`egvP=XWTyQ6xSn7F;@dm!oW
z+Q(q)X0AZ_D;Ld~kSxdjs>?m@+083a5|V{VS<g<9lJfn@a!uN3)nIUBld&d(pmL>@
zeI1=kZP#18h}~%&mbMF2#P<Qao`HJ%UM1fPL+`F`y_bp@tM=UMj*XA(CJFHnP;kYx
zdlxnOgL?5B?{hbf8=)h7IZ!(?N9e9n?{g37^`mxbtZN_^>pQwiQ1Xhcv`W^Nw0kN|
zj9g+T6P}f7%YE+Fo|`o3bnR70SE{$}bGJyHqkXt&4o?O`nW9Obpk{xM-C)m0%%<k6
z<Z{|M=L^eO$F#etiRJF*oW@vO?pB_yIuM6rx3B+IR3vY4KWuaK;=u3StE59u{DXTS
zJ^7A5xXUPpxE=KC@oLl#hAKMg4vMH{#n6Z7F?68Ce{_$b1HJS|mWd{-#ShbvZ?1Uu
zVMwj+6+?Hqms(n_B^s@z4w!6A0cIIy6T+kjnVb0i45kuu0uzlwaR<yuOenh-?P$N?
z=~spMIrRh9m&$5A^Y9GnO+`eq8wu1ed5Y@NVazh@&qJxqKR%peco@eT2aO^RQ-TSX
zKGzV@lbDY%b=cnOjLF92VU}SwW1hhr#2m-y4k60OK@IvfPXx67QG7LRepS$}uix%E
zRy7zQL>XCDSZFPEJ^g}qK89pY0>}>XXsaN<G~XyKr$H}D+yPDd94usW(66PuhC-CT
zBn*COx#mylgcwzxSw)LNl$4FO{5E=Y4S9MXkpAvHBp^v^dC)G<XMfQ=$+oWK$*-T}
zzi39*tRzpuQ96;qa$RU9PbHin@vFr%dI!#vFgiQ7Yr5{b7mNCZ@n@46^A6X0j9~9$
zj$mZdPp$c=USb@}Lym{F&lc>mM)^x8-t6efId!QFIlv-EH46A0nr68IN&ebk<Y#kC
z0kns8i2B5X_T*2q+!7~Y2$z;Lf2!%^tjZJ*Gd)KUi$Uf%N~At+65lBUDqqzbCGNLb
z1e3q(FepvQrJXuThg045alIx@eP?Z|MU#RbR$J8zAJ=P{ny5cS^WPkYk#f12c%^KP
zV*DyxoObb*f-I}JwJ&ZdCwKBaG3<N3@4Ing^%9zda+mz=#Yl21FkPtCm73n0E8>%s
ziNA!Z*D7sfgC^Xei%hE3c#NUAvvk$Ses3~~5|5%0K=_&#-sYn<ItTP-$2uL>W7@BD
zRDTi3`2^`2f*(*6)w)UWz|UU;T_Skk=gpv<?$UC<)N=v5z9glt=c&MVAKkUm;Y_4P
z4i5E{<Vtt@)|8*vNrS`A*Ye6jnx5?AO1dS#(j5Ky4P^~IrIh&#XAL`bA_4`jS%ak5
zufGw<v-TbQZ(UB5kNf@NStA$1;iN-^=1lCW<t*EGHY2f~Nmh8)Ua28#Qix||tvV+^
zpX!vVxuEL$xjcn)z1_Y%iM?=;t1)tDDqFV^Z|=NOJ;4a{k8lohgQm+ww_u8gCgivV
zhPD**L3ppmPB_0M-P&B5DBbu$(-d{L4rOd3Wp9r^VQh6UO)<Sm);^bVe^T!vy&d?b
z_UqQcb|BP-0bcbD)OogQkQmuyeMug=0nMG<E9mBC!Zdk$LO<A2*@l~wDyJo`FQ38<
zLd_c1c6*Y3$xEG(n+Z?+o;!`!Ucr^`@+?MWhTOr<gTrd`r(6YJ!X1Q_cEsEAW{fKN
zn*Y)|aw(#Im)G~)P2v-~-5C~Z_?dg$gMbx#+&v;@6{yiKu{Q`V_u(HwoQLjpck;YJ
z>qIi5ff~8`ZnXkyPL@8j7n!e2RRNw=nn;wO%@*aYs_qL+$Uan=FEGcsTTOU@dD)|C
zm-%{0zPhNEFS>7GU-qFFk?lsPcVBd095G_1x~((2TzBnpH^FYyOWNsL0}pAac=sju
zFgK}dTZz~=LOoRJzW9Q4nL}3${dxi=6HyYQ<sPJ}&v%nBM7O!DrfU$+Ls+XWtJSnx
zxSG>cJ28Hx9u5mFo|nIH_97aX6m|yajGHZ0<fXftcSY$UIGRJI7tswYbgIlZb9wzO
zn=hlZYGr3e-H)(~b`D{b6@9!Z9ty!RneOuO|6}e=z^f{fzVUPJmgSria+BQb`$7^z
zNWvc3684<{A}|6;*pVd(i^2#dK|pXs*`#4ySOiBMQJ6ua@3;WM2r2>!Dhdh+hzbbq
zIx7EPRo`>ZO$_RM&-?tp?`qRIy;N`2)z#J2)d;6sY~<6_!L2m!f=JY=H&V@pgW=UV
z)5l4>?G#?*AMUMcB*X*xpjz7#$P!#><)rB|%BN3|3+Zbp0gHjs3i1AQ{Dhaqdliw$
zraT5eF9u{{DL0{}5|-j8l#ZJ|e%h2707WZ?`K&4eXU#63iP}-KY0;E<dSStnT7HKl
z3~WG?mC2~mR^@4=?d&eb_EJ?)>szfwROY4IK*jFi)c{t<nO9@yY8EJ)Y4-Vcj$gw&
zTT&U50B9!$%}Zu@{0C9m?UQLTxDZlt-NrCBVWa1l2fds(6GKl0-GoF>aH7q;8#Gl{
zB_OFjRtAOPNe(@sdNy!dMH9y<g*=gIlh={M!qxWBay*J1JYi2V^&?VI0q6<kJ1(FH
z=Yi;1`GCL6k+vOdfzm;kBnq-Svp^<-ISeX5ieH%c4=Ry1lMCDd+n=o&AZ<X+O0-qh
zQYAzer|ua{r6a8u0$B~0wULSKxM)`&&xw)zAFkQ$=yS<PE&~3N^D_H>cv}R>l^6vm
zmHE(wm<_^b8h#i7=HWL@G|9AbgbBY#ARUb&<Gl<<bKLGS|8VboAbkEB;n6{>FW($%
zQD%RloEUn!%WaR{u=Qxme;2J8uxRCyU&OtgZLzC&jR3)a6J#7_`SaP*9a-ha(;xX_
zSC%arhAy<|$f}W~dZk?wV~gjeZ~wewhgaX}wZ|jwsI`xUQQGL$r$$B#dXrwhRt95x
z`TU$%=wn*++WiPlF2<U)aR=b;8UwfpT73mJ_4pBh!-0DA>f}-R^lIgGY})Y?D{naQ
zy2@d|khXW(ydR#i#A8KRo?sM@A5z;+Ci0H91#K^T6#vkIxVjRrm~14gd3ewuUnIx6
zdavGI%8y?1qRFahyk4^2T)p+OaUHpbi)>?gMWn@$UrWJ)xJ&Hn9;8<jk=qE8f*W?q
zml%>{xO=>$2kSV;Axc3I|B#OD(>;c-HQsbz;qUE81iYeWkoX65s3Gmta`@5k$HFIa
z1wOonc5;7)-wj&5{OCYmZ@_T)wTQr6R+hqF1^*uS55Qjm|55lW;r|`}diV$6KMemM
z{LS!Bz~2G?6#Tp4e+>U0@Tp-%py-kaNY3RJMRkK_1)mCTtJ;D~Mc31iqr&$swDj@p
z>EM>+e|8$20DN!&aiYzc;3iEIONn%x_fA5Hk;>?s?AAyL74;V~+;T={eAtH3efU{t
zuz2B2aA0H$PMzXQ9WPVAe<rwDvtbOGEE!0#X)pdJYc~`op(o4qEk2ge$H4bI!s(LK
z03`^@{4($DV$;XLQH2gJ?lJ5Qbr6(Lt~AlQA30pwts~)}*$9(gxdTe9U}K(R$dcC~
znc_l`aTdHzA2IkWRJ@ij5S{BwK(>ly2v*yR9cRJhbQk;126u61F^G6y$=!wL9Q3f7
ziZ<sE`v%8qO#Wm6i_d`<S}#tY3-0ClD;E$7EiQ|)ZWW!*2M_n$!ci@-eo@a$f!-71
z@$<ps)X#+RN$@8rvlui?IAcJFvHhyS_Zi{20IiRZa?$fba0`1#xgtt;1VxE^F9e6A
zM*w^2B7oWLG$D`zVR9iMW>{P~8yo>0v11p4lLBHm(zE*MqIhv%rS25*p9c5Fnel{A
zgWGvpb7cK1KrO_MPa*v7B4R#+Dnt)XVIdK599_i7&rrcG<>ILsdZIY|DGn}oeHPpu
z>JHA&!AuSn#h-&+9a=7t6+R;?d`8x9K8NfPAB2cV`;bkGAv_Fo63Pt~Q!WBFq6w4K
zp>+o?2H&Zo-v?X{&dJyy8^dEp*C3wZZwm>jcI)dNx*WVA2y>&Z^Y_6nl{@l>;4x_P
z)*m3T>>^J75Zo(hC}+i;9ifJ4;iBlr;B=HS;m6>{t#X+H#-gUyeLH_4qj|Dz@~Qt%
z@~B6_pZ)|F0T!e^+>LOv;hc9uo`c<8s9O1q79k$qZwp~cPjnxTSLUzY2>QQbIUW~2
zg_kUOMZkN<;_w<LmInO6TGiLtFVSOq*L%xq3|>B#+SqGu1~Izcrl}|z%T{xG8FOP0
zmQ$=0kBP(U@Ylvp_=nbvND>L`SE2U_N2Ij`E-Sn4muooOQR_sps=?x>s8zLA@Sah%
zJX>A47`Vk5CxDGjM9H&G2O>N+trbN)wZ$1DKHd}T&OYu#FiE}k^Ea3XKgvgINgbha
z9zYzlX*pr7u(bH^#Oz~5uw83rT;trEF?r34J?=X=sWcHo7`;kNv1@6tptRhsb%^Z6
zv69Z1+d2UI(60R{GK%9Qp^?NDkGhN%10CA8Dc^BH7Kxe5X|3<O#RMm+3!N#a)&brT
zCmR0rSjc3x7Ub9O848KCX)Vd;uKA}+^Uw#DkQVCGbw9YY+k(;dDz|nQfc@as{`fx6
zoI~dP@js*eqx(CO&HL@~Xp1+pp{cw7dXM)jc|7LHPB>Bdn61*{4@>5OE&JbM5(Phh
z2L=tV)K0(~l!p(3<$CavmM==aMltWb4&lLX@QPoTV><9;p|@n~@ri$gJ7jjyWCcZX
z7sxD#-2(wAlTR{4Ou+*SnE2IS1LO5d7*>Mo2zMjgG`K}@55nz)E5Zp5>1EO*YDIA7
z18Ps=?}m@4(c`R>!SVS&mh=MhHhiE}84r224i2rc_gG5OAp?S3>9=(=rD~=F%cQ@0
zCYd~p#!^ZLGUMTD;8wu3M>>)(je=`$NYc_<sT&)sJ?gS0<cSZnv_z4Wq9w!XSDzFR
zmjtmO1rsYlY)jE{C`Do}W)%D}=SXQgReM2A6(>`*4(?PwKeSG2Q$==~mJ{V4j6xOp
z9VPyfqNUW!1A?cdiigs)w0gnB<fV!uX<BBzV4_i}A}C#J=Pu-eLm|Fxg>@7sJz{XW
z_6Uqsf0GU(d4p(^p|yiIU3yzHw0`dP`I70N?Hv)R5}`KL2C2>K1~=7`R5e{xWunp0
zd&vZZsp52|mZMG(Miv66!J@0y&NeNdEek`%!x<)CLa#w({$!Tc#XXNfTiq~E<YsF*
z^@6Dz=7}lUXp(<w>VSD-Q#LxlKbSgTo;cPpHFdx|VKf76T`CHip&uUMlBplsa6ddE
z<~7sqbT8y!MM(~m+}Uzr<e(i}M5i2W7A|(|$k9sOZ~F4eK73Q8G)JOCVsLZN(?eo;
zbKt@uv0DaSZI1ppBocBt$$(t#1DL?bZlQJYcp%nR6;frg4v~Kp3tDKIwk8Flg*z}(
zT-gLohoddD&@cxlwrbX-pzhli+D*a0wgH6zohlZPcSGH_LJgNrO2vyeU?iuJ7bikH
zXz}Rt+zuGGEn-0j#OxH$kaq~mp9uU>bn2*Cyie=Fn8uibCf`E`tG(e&Z9byYjmh9f
zU>euJt$;JN_egUu0{d`(8eN8Oe~YAh@eQk_joTXdJ&UA2VaGTE6`l{b5pEaUDY(!f
zcw!uG1Y8Z=PPn6Rw3#E35FKX1p4SYRZfBOVx7rPI2%Jz7FHOi!@``l+Zs`+Se}TfT
zz+Hv=67DOwui+j5!qk^x5OW{Rx$!Ow<P{`iXz4v5;CqrNybO*|wjW$c3@8n!muDZd
zC*UDA{D@r`iTpS>nmBgCq$x8HMztexnM^NtF*qq!9_IZ09LOZaigP)jo6=NqaGOm)
zc(#av(BYE=Lerqw-46nxm7FK1L32oS0=!oU0*-WiRUz?9d%tiAxzN3yp~&lpvgz_z
zFgg^8@hgO*6QCAw1JU#sIK9>M(h0KZ{YjMAu<bN5Z*qndUtdT7#7lK{_Xj}A<KIjf
z--OfC8QbfeKbzCfMkSz`J%M~l=D<*Le6Es+-Xx29k(ouv#Udo?2P&eme&`=l_e&QI
zd}j+$TnvKPLev(6y=w7`NF;{SiM_f=2E$1q#At>3-G|5iUuiAeUA1sz%jgO|l@e`p
z#*3RS28W34#HY5XTijLqAP5P{`)l2N5_Osc{<GDbsY}t_{|md4db1TX)l{|ZQtxWb
zf1?HG7z%BMFGa|!YAkOsuEc=l@p2sbV*cy4GRe%Wc>?>(${WCr@M#Q5#8xLmTE_23
zjmMQu-`Pz6+mMolQS?02+B@{-Yl5eUDJ9zLP~vJcSj$x37o!JjQMT7F(v6DL7))8^
z`-_Arq-Z2*FqVA<_M8W6X}K*gQIu5Fp>;2Sza1=qtwJ)~hTwUI386qOc8C^k3%DdA
zhiD7cF=G7?Ehl?6=lC0Ny#u2ai4x*26VxWY0zpZYG2+q?EwkzEocwW4N`i}YPFmj}
z&6rDUoyi9Osk?)ki8Vu^8*rR6b_LX?EV7{5J|%V!)#7Y{m&J!ewb+<FoMbGhkPV5#
zIT`7HJtf>XYH{i}b-6cc)j{Z}ZKJjOQ2$P2v<IY)PA(8DAG$a2N!gopbAAii{8{+l
zGb>utwEnW-za#+3dYM@Ee~e^BK(Y}-=^>`E&z5PeRkaYh*jSEl7UktYgu=`6YE6rR
zzz{5to60eS2>Yd*Jw_ZV*NlvTJOayjM5wnu<8M0Vk=R1aNuf~30%VEE9jirY_i>6c
zklW$n<-wXK>|Y!p?R3((+;dqZKJSd=>!bIGm1DtteIeS6({h@*z97uls}$kcFCs#O
zEw6D{tgk@AmM^SY2YZOK$>c=O`D*!**gXy{K{{^97vnU;-JR16WP~M2+iSuYuSKC~
zR6blJ?|~s%q=$qiQcN4KMUV#gcr83(6=w_e;d~%&_tV-!l&VJTpd^Di$$aoJH{jY<
z8$@?=gQMr))8h1aVAo*rGdzz8%s<73y4(p`T@VK5+zcQdl+h}*fBoM^yvF&E|AxA)
z0OCCc_um+HNyb@<)G@+}cahc_{J=>0ej~8tw2mcNUk!}J&`Zsdk)e|h*g*1`Vj<2z
zrjyS;(yE5|Zj<2?Vuiyu<4x6r6ckSU3m3L}*^V&ltqdZG@^D2KK`TC`Bd{=&#9(QK
zH}#`v8KPf09N}vc+|8(ZM_KjKaTBM_9(OAl!ofKnR~@G_NJCYls>^ga(sYAto6nL8
zCnBarCQ1SwgSik<bwZEIEAb0v84F;l%EXG9)602%C-_sWG~$CjdW;8qdbT7Z;YlfK
zrAXPQM`cqKt@Qp;bMb9<S*o|o$fB#Ig;jzS)G9un!r$gml<YyPA*TI~G^81%lC6_I
zGrG>3J%PAW%0q)Cd)R6Lg;)B*D`!p^H)Yn8iW!)4)zWZ-%rLIJ5>Wh^uHL8+{qBIH
zA(;m^8beZ6e@J{!qxlbuJc>2b&V!vH8{QqX;jKZEyn0Von=Ae;BR;b6405>K2lS|J
zG+eU$U&Ew!_zxXPxM=8P`bkO}&JEh*N_V{>l#$*ORUX0~<`DWvQO8of8(KpmnojA7
zne|U!DHZ@53YK$Mk(rqc*xYggGssMcJcLgJCZxk<z%}(^1-~~_1b8S+9=12cir$E8
z3@ZqOu7?#P5Z4%1l=;FNV1<Q75-Us;YH0eye*zzdJcNEEktb{+24@_w<&+OwW>=IW
zjCxUGO69HP)8>F-w<a;C&0?EH=HWXq#G8PyLjsHPo4^`jgg00;Ii`F1(2$$yh+0Ur
zkPR`1D+@4X8t=&C+nk{^9z@ivv7qJ`A%z?ZtJ*Y1^~M8fXog;IEGWG-7Bj|A7$?WM
z4EY*PLLW^JG!Tp5b`VJF+ZFK3(XT)G_bcY$Z}qE8+l=WTsTMSi<MqH!-Q%N`fNc`f
zCh-)Hb)Y?_5cRtRK*Uw<5aSrY&GxUArkrU2c7V4gQl>WEu}S$z_f$)R&TTnyi25X6
zAf*y$#3Vqexk3h>=W%8NW)7I_9-<==s97t9(aqirQ>hPF*1tlr!z9fTRy+PC_D5C(
zCKbral>g{c1_E}KffaCu>8Q+bZf?hVnidkM#Ey-~^IMZYAMyU_AHz2_q?-B`)3t)g
zv)S^!3Diyh;P2rm`~@yN5V{T2GiL4Ea)DHNh&d<BEgwIg{-W*=hrgQhyssR<OrtB@
zq--&r;dX)kL>M%LnktVgDeaUqh_>uu%Qrfy2M%%%q;n;x%VhT_^4R?|T9CP*j?^k@
zHBl9aI46vqQcmsQN{|vuM~1OLouLLXoEuU3=ec56RZ;Z19>G@OLXe|~Q{HSRXjFa3
zPv%xu%$NYXb_|Wy?}xeiZz(wU1vInN>E*MgT3z=AL%)ObP*=$&4@Vx-F^2IahCP1D
zB-~j-cUCh;*@OYRSve>OO_e|%MW$xhV)ej@d@y&BcdWQ6xF@AVaAB6DpfkgfibB18
z<CNfSfLWOW+%su{P9NYt8UTijq|uq6Cwg$r-}HbmJy6qa2=dVyqWh+yw4)%PI`#%x
zl-^nMz3z<jr6WemKV25mvH7Mu7qI;%f90-PZ0gAb`4FA{36;dG#K&$BF{?op8Y5;)
z5$;=D@HY{&74QiK8nPQi%+?_sV~9A5m~HfhS!)MPi0nx82V@`h?3xjdUC%s|$r-L7
z6vD6J!tx}Oj2du;vnyy7`7Hv-X@6^Bk-&bnuy}*5g~d!~(WzfAEHcfn7nXX9JfRq-
z+yDOyi<Bl}EU=&F-w&t>>9mkP0Sn81|9)-B+#S!Z#uf^hHist-ug^)|g#$#1#ARPG
ztPeS!)`;7ZDZZH!>7Xgj9C$V5lW>xGGSLEcf%6?#cd29<$E4yWlF4P|XHK(M-3Zc2
z#B;I_4oM}=JaTv_p7;V;PD?I_DF2U%>Ht*eRdqWmN0}lp(MSe3%}bha@~sTwM$J^S
z)Yiyo*^!jM<T%qTq5NxWwD;7v#PJ%fm-{V<im18f#k#jdK`qWH{DVmp@Rpcfi*pM9
zU`$#BZ>_~Sg?}*SGlGvd436etW1*HA<)517i~LR!P13bY(Rp=nV7SwLm8(UE7_Ct?
ze2e7^wU^Z|afw#T@r(vmsLC9rKq0*zDNiU7%kRV?2y|iYgpe^r?4yLioUjE}AbQ4^
zo>D+dg>#XXWD5uvO&4i5d2Zt*gF(f5a1wbAv~rPlv*#_2;u9sJ0f=_G->Op=YiU8~
zzKnYz1O2jY!o8YZW#a+)Sa9fWS8w=};m+FX3odIAHXB^0DZpHVHqueU1vr0s5*;&`
zM-2H`oQvQ};2Ow~d?KV918}@B9)#JnW@R2LqzBel@1>*P6{y5>R1s<rUPmXSBaz3m
z>4XLHo(6S6rwsazLz}T20^eAMjJ-J_9o_pVLI`bnBplKAj2>1!19<osh9fx#dUZp>
zp_{3l-IFkJ3C=9Qp}z<Yv6_u(Q$E0OHgo;Ra;Jn7i&g7SZem!Pp-#Mj01b2ScWxxW
zG{+yr71z*0QjFRl>;~Rf4gv@wWZMze*rM_r!W%9syAXE0MP)zYsOkQT%3)s^&1VoV
zD)2o7Hr$bz6qWjQkP6wnw_hoo`h)t3!p(m2-|x;%MFRZ18yPIPFgJv4%m^MYEHpDK
zD*rrTX2mQBHPLyEv(7Sc0D4E>Nox_g`l+G>Yo`x9sn;9+75HAVT2OA2w<T7<uRw*%
z8dd;Axv}#5U|6Pb+Ix6#iLa7*o6|)xbN5=>zhYeZG+HtVLy7##L?qwkr)vhj6%<)b
zTsr;!%mC*zMkV9hELc%y&911lVC>{{*<P5ca+1Ge0CRwQK6<tWjboNYaxBcwn4Lr{
z3n5n#;Cp{7`{`Dvl_1a`h0M{Eqi?l`_?2X&{XLe#ks(W=!JwZVt_mIfJ=YnR@ych-
znlh<+1~-;)5n_dD<&$J%L0Y^C<@@DSG1e-dGo3kFvr{E7|1?kxAIqkFxs#}a%`|35
z<auWqiU61Yd{LOy2UJ%%Eo4=w4?+<p`vt-<Z}Ai!?F+Yx^hdOxnj!&yMQ}9ZZ-ARc
z6r?FpCbRuBfM%F#G~oVyK*XN<Kqk%uA)|8%b2eBNp?)BY)EOupdP`)J`AE~!%wtuO
zU=wE)#CdWQC@jgRuNWIS&>xd6W|A`E^wwLc)=Dz+Q32@@RMUymD#~Zio-n-<Ve(o<
zGQ*-&!J^9rJs~C;$xQX~2Y`+bc~R237GX+c8V*8l<ZNsZETpyCS08b$>Qi_T;81l3
zsaAC*LSqoJIbwXpoUzj;a6^#Te?%Y5L;!vbX;%@q6C+wNa~$Lil~j4F<1O(U4GxhD
zncgB_`Gk<GcNE1+M5KDwqhQihb-k*z&gPmB>M(N6gHKefcf+c9g%)!D)UOKR&0tPH
zhw-K_ST8RC|M9nGzKLA25FaI@jjSUX6%b25)8lQMn~Fz2(-V8pe7}#uw`cma5xw_h
zmX|(Fq8*jA+-!>Kh(3J;oIeFeKGUjtJ1T3it%lSeY^$!nN2OYNZ6xpVb|g<W)R=gQ
zIq1m!-j1xS$euN2_LN(RlJhL3)t4y|f6D=tiNi>jw>~YAOSKfD6DGCrY8mWpM&Kap
zhohYr`4ds<32&*hCQq42SY&b*q#9vjk4z&EKZ*Un!g-Rs@ufJy%p?LMS#1J>8~jt~
z-Bkhrj?i0(wBbxRaESpVc;o%^%ZnhS4}BvX4G5jKx8iLTy?~VuXmVE00>Vpe11b>~
zin&B;^4IuY@tX#Kp<IWDonKd@*#0P8oLG&^5A|&T_ZGW~bX<mr49e%5d8S>*wJ>Wa
zYAz$H@(XC<UAf<x|9d8YMe<cr4TO#?A7kT^1s_z?B@yo-HQf@CH$+zO4UcFj*jD0-
z;fl_r)|zW1uidEawDniTvm3RRu)Xu?MyM^VM^DldxI`pLEPID2*aV%U4PyExEsx2j
z`Nj#cr=!KTP1-;4_;C4VaQhF5Ih%0^K2|)m83)6s`iqy2*kU^!Vu-}Pw`>i7fb4Yt
z%NN#)I-kQH<*@p9fwv0xBU*U&DqmE~_GWS`te}+{_7^eW5$p!S#qvj37wW}Fv{Kb0
z61G4^%wrRYsy$NVSJ`wicZ)Vk?O1ngi}rSqdVAgDPa!!pm5x8HRUmzz?NC_iDAsS+
zq|Mw)Y%`a_(Qcw>zM^cz-*ig76F%)W-hh7=ZfOf#xWGIhWus9@XR1r+SYxxWyXXzq
zaQ8u%5FbR^#&#f3cB)q1LnLiQ{575e7t}x2E<=IW+m^IPT5L@k-I>fqTw}YFH3<I+
z4T>Kg2n|d~9!CwY!x49jkhQzC=WI)D;(vB&ZPmBM)m_>wn;s!1?$%mHhGA~8oe1yI
zH5#iR?y22csv3b;ZnR0zJ1yL!<z@Hcl%3Qi$~KI=rJ9z{;npTNU##2%d@2#o!*eeJ
zVO5m@JikeK;fT1p2l{0zMfNKgbB~zviZ;}Jnn93=PpT50!R1mMKc9L<%Xe?#I4Q^^
z<mI?-?lBD4di%heW$9jI>D74L35v_)xMaVg681rb@1j__4|Cz7*t$<EZIcg5WmhWZ
z%$`1L5@9LnZ0+HCD9R@I^gW*eVgCT4P2aEA75-Zr6omd=c1Y_*{rlD-Z45fE;IQ^F
zpIl$OMIs<=0#-P1qz`0_Kz|dG`EPAa-JD7w8EtbPLwsY1=D*clVaG+&P#`Z{KJe;2
z`0+sCBK%!K8gwh-ua9g#L`$)@!7EVsZ*7H1*CQECcT824WVnuS()Jp4804bRJ0o+S
zi8%JZ1!h(F*Ny}K1*u~MtQ`-;V^t|woylMu@j8%KVM(D!9D`vzgPyrT0{{ZbWt?v&
zj*eSF+t=yKgT^TdoF^YAe~DZe35ke-X~lVx)kGZ34aA$~%HOe!nJNaPVOGo;>3IuT
z0Se3JFtXp|@`eK%e%~^%$&jM;glvmiB*Qa$QiUG~g3(Y{kt<bM>FTJQaRn*22gIZI
z?pIThb~4gVqZ$onAblas$^xH{Ev9NyVsW{PIMzzUOJMH;*cT3B$uS!EL6AO|NNq98
zx?5s12l<1oR=LENr%2xYPYxro^PiMu${baMrWq0Xbks1BV?q^Cw#102CK%-i{WFIK
z+gd6c&|?&)ddsGU<U0F1J19+E6V?3zkE5=pl%ld?;zY>@&}hBHFf8?zqx>zEpk{Nu
zr=Sz%^P5j|+~y1@OG+V<Z0I`fwbrf{%3oYvxyJ?srlazSMnu%JoaHYJf~baUqFoHy
zWW;no!;x*d?xeFKBYUeJ;6OG^2Jcjo6{mUwFld0A<`T{hlpf%6T@L$_O05}~Kf8VH
zzm14o8X1|nMrn@mCK<b#={avE-M(ZzaySA(w~VMkXSr;70hXwd%)ShFz9NBVMa(nJ
z;r7qu%45na7dQ`@b}VMZk$Dy=ZIpcR^j(Ih2chgooJblWYph<W-o_-OAwA*jOFneD
zbhpc8w|h{(5Su7ZGU5UzF^<{A(^-Z`gbp<v*`qi_a=UbDK|Qhv0D2LSyV?Lqyvya(
z9hstdl@TRQB^i!@e{!BcabJcJlS}r7$P^g$4mkp{3r)Oe)@12A010!t?0P^H)gs;n
znJUC#REe0@9{!Non|ia30FT1uFMR`9HK#U4qo;9gcC8(ajzd&t7|{WJWKOO{n_dzo
z*P@vh%zl8OzCg+LW%uGemjD+JZ$Pm2Cwv9jxle4#gTxbAMq+gkj`4{}qDHjvf@n?K
z7JvYN_PRsU0<^v^n>|)b*Tw;YJrdPpePC+z8z`<lND|``s>wM}UotI2yt4nePs5PD
z%vB!F<spAO19;90z`pE84f^<Abc8j_R}D|p+nj?g3CeZw5`Q~H>H9`Z^*j6_S3E*{
z8QF@#6V_7oBG4RJJ^M0}!UThQHmuqVRP6_bHIk<g2^w269eXV(Na+^%P++3+YT!s^
zNst^g67K+ZkwO*#7<eGszEU1nQD2&)x)JemIPA-sVL<T?kV`A1ZglGQ0ENdf;BQ>A
zn{gv|3x7!5Bm^h43WS|+M(|~t)(uT?7=S@jRwDQknT7Lb-^(B7OGHxX;ut2bzoU)A
zu6iQP?I?vrn#9tUK<ogOaAI@Yv`08?BoLsyfMLS(B;yf;nT;(m!icVs$dXPUXW?(!
z<%J@g#(j_qZlQ9{>{1flAd=ujBW@o01lxU_58o&QYmoRQFL1KLlZb>X>V=i!+_ZEy
z1Wl6>%z!rf;%85<L?&}*voUaW=RdU+zH8wT!|Y`OSF58Vouzbd@5kr<{^<uTDn~zd
z?8}?FxZY93OZ(N>>s`CMY}m%qg&<LKFLoUDZv61=^|D-}H(mRqJxIPN8XBBW_nmO}
zukEicz_0FHChexM*6pdSP}1e=OC2{u6Zgu?*!wK{o-Q(_+tSyb!f8rZar~5aGYo&W
zIgL$AZ&7s`jQv2d=`<L{f#NuQ-6(?2fT<iO3eG@r5ccKH;GE?fU^j``X}ZviaEoPU
zaEg&9ww=+&*^~jI^Cw!YXDQflj06p_Z_805ia*xEJ1wxH3T0<I-F{&h9vT3rd%iDS
zfiK-`v64Vk`J${%julowIK?2wTIt%DVF}{&$67CSnaDniw*3{?l%bxv%qe#~^UrF{
z!6rX`7Mr1$MbJ5<fD!3)=%;tZ@^k2t%i=`}{6w5Pr}auXF5xTDSe=O-O#^R)R{Op?
zhymx3si#<QUdxOb#<33oDLbQf7BAmZym%fbKXn{U0z6Cmxu^K~yw=5*C3`jddkG)C
zYT1&yVuks7`iiLiL<^tp7ga#br(>xve95JmsZV_0h1_MQecuJ`%sf_@G)47+m8LbN
zq0xF=s28;1P*ojy0sYfT)Y8{5ar^?>Um=1%MJr|ph=He_P3tN@)n*3;b!9xlIwHz%
zv4^ky3YTJfi-fO`e4rRW-i>t&zQS$=BlY6f+VG&4&At|!UU7zt{BN|@LGOCQ@RlNl
z)qJDPMrVKXjrI{La_n1eLr?{l2^x_Eq5c~wR(z*rRg+aiI_Y{2>%~zxvS4@xzNt+e
z3NFsvCYbxthFb;N9+(%4FW_(g)3A6fZT{N?b9><38sJR?^xyUNpuTMZDo$3*NK$wa
zvZL#53;MfMHdtsHw9P{0d&5nJTLkwY+)lVR;4Z>BX7s>jb%v_EoKHl!E0Drudgp-a
zCBiG2-lokde!U||8S9wskk?v>@s;@kMMYmDAq%~xywAd?bAhXz$#%*-4OD(wE@fV6
zpP+2ACEHsmVtZdBnorx#aq9O2OPs@@Cq*v+njlGQ$z0FpOv}fhna?OR<Em_TIF(_J
zv8Yg`!=@~8NC`HPp!c~c4$d$6;L3{Ike{@ius$nixXd@J16__@%2SS8m0O%e_NE*O
z>%BQt)hK5#Sfkhh18Afxlc@Gpma9(t7Dqa-<V`RLkb808VC_;evMA4A18q2deBH5&
zn@yTCE)IV&FbS&#IP5?Y2}?2Kw2B#%#N~d5i?w&CQZI9(=zTh>q<=kDq5LQ81F<xJ
zPgn}t(hcR?7eA`yXuY+r+H@^L8xIyKTx+T|(|Y1B8@TLfSD-xr>we7}iMqY!Eq#zv
z&mO7|%Cir473hJr0p3kTHHxa=I~%wnAK1geZYC?s(FqyKAMG4!;2tcj=$wk%N(=IV
zKcEKE&`ul1f#LR!Y8mgEZbAoDwSX-kNPlcG=<PD~G7@*?8%q*l?za=NP))r+=MiWE
z2u595#Q^VBUFuLCTDl)4^La3u_d=^7S8>`#Fib+9hA<g5#J;<YCe<?-lw|yg^^i&3
zcAMg0bt+;Oz+=pqiC03$<1ae*CC=b|49X(xS$ASky3?R6Db%`a{lUG4V@QJS=~{oV
zmp$;8Q%lF7goCG@fZ<Aah1kO|Kn9rpa2Qsx$GGC{P9Cu!yB$8c02)(Q8+)j$z1;!s
zE7alYW{+_7L}Z8_8ETK$a+pnSjqOJ$ig024)4_&2Y#RTG&ooznJriJ@+DvV{mI<Jd
zs712Y7yk`FJ-TbDK%vwLT2HMhK|oR+6|eb9CnnY+1)v1zdYYefd*B%N)`R?whAaE{
zJCx@qkp!#v>}sSsMcJSZRe!PtizabKLiN-<8gaVR)7eek8mQ1g%k=UY0AxxA@Tc-J
zr=#0)QZhhc|MG^BJYxzlfPO9xdj%Nafm~oPw$rJYLzXz>3di@rylsOt-KyPFkr5PH
z$>xZ;!0lGhax_H^G`3J!;kZ2PD5X4uc90}JuyJ@U<QZ8g3dBQM;Hxx=E)u+y$E*N_
zIXO&<Gh&rnsAjV22N~D{IHB*M%mt;9t&(xc92bY}bUG?mjGHGxsHl{AP(@5!o~HCx
zUs0PWEo^zpZktV^`9jFq8)?L;RBYf5@Zdy=CTWI8C;EJZ12jK*L>WSJThOq?+uA`y
zLv*&xH<^ap=R=|TIqW?iQ3t8!gf!`@zdT|g!w6v#DdVO5VLISX;<$hM#oa>q(~*bc
zZH|oQSv-}=0##<BS^y=~=TNv??S*dc>Q44+IV!@Eh=8q}xu>{xO*^ixz5lr0N?j`q
zRiC6jBIc-ijxE?NHmiCwypnNH)zk1k&Q*BsL|`txS3pQjN;UmOhE4yACmHm)p>?%`
zVxLXVQ*RRA*z|Vpn>eE-YQ0Hxvg=7<vpJa9HF?Hjgozn;JrcPV+x1M(RE{3b%CFcQ
zK}tdoNAI@lIqE2J$*#vCVUR=5$tdH5<9!JuD51AVG6YVP=q51|Nj+((D+FKAfyv}$
zgVw&!#d-%CxmLXB(4*AF;+R8Eg7*yqxos+JAWcSkS_?^^^>dNy)bl(GWvQ47q%b2n
z`A<X@s8jA@jv4?dlq~#n{{DnVvXUl}*HRuWV&m&}IQ370Q2hEPdL{;7cN6_-^`^S2
zP`#V2+B^(cGefEtk*X0fS#$u<P!`*mu>X(9U-L12i{KjJsjQnr4VkJZQK9QGRHSnB
z0^;9;y8`zhn6P?A3ZOVn??`3ACm4+xD@eW_R$X-LsmWqNK0ZdlC#wb9vHTr|t9v6{
zzo6U7V#Kq_`c3LlVWePuKS-wW&B1_Jn}efbK#D%xb}d=F6cG?9UP#fSZGkD`ofQ2B
z^21W~zA$GnIu-buC6>c;w@x7_nT+V9gxpdbOx4?kb>U!C3ByTymB}d*V)HaTTAeR?
zr0MP8ElATd-McyI5QadkNu+otP48tp?A?W=r&0lY`cy#ZK<t0K73SN(#f#!}J>26L
zMXNF=4;NF?^<FS!xg%X4hZ~Nb3_Z^_CRJJ{*mbwlA!;&oU7cFDJVS?g|EM_GLLWr!
zZIP$<49fC0<wlW`8v>?2PalP<U&_;8CexVt`tYDJvO0=K6i;*EN?m~-&8Gn;fV4MO
z5C;VA816wh!pPaM!5j*x&%rlYfBzE#vW@Z_(yjsi=OZ1-^JoiDhV;LOrQQ>QYGn-)
z6S_7yCiuf!f#1Sd^JL&CDtQ6UQ7LVLbcbIC7owy}t+{EzL~4n5mPftoL7X<ik;F#6
zN?VMvMS>gnzJ(Oo$eT`YgA|db@p#0OcxEpAS7cZzE_at!LPw57Ff|CT0!XMfv(_A?
zU5YSTA_l3`=S-U|cdrz_0%6%;q;Pl5t3HKO3j$Jv2!D|ciC@eU{GHw_jky>(?A^e#
z!B!mjmo?nBn$pr~;|p4%97Lg=l9|MqQ+@F*pJO=0X}{#x&ibOoGB5VG)u4tY2zZ?n
zd~16~?cooEIZ|3fh$EGmdZWTpFZTrJdD1pQ?az-|l5TEmojSZ)4aC|r5eG2+@mE7e
zi}F*OzC!+1s&c<=yK)lrSB}^|21?e2D3Pi>q7lGEo30GPJI_<p&s2>#t(fY`s8J1<
zMm6rrCP^=G=A*XX<G$YJeD%4nL*6jrQ;3P8Uo3|a_eF<E!~D6gYHWJIH5pX9U!Nv*
z<c#yc-rS%}ZE%_t#(AvMq+3uJF+Gu-r=>hif?bAxHsHS6az67g35Y}NS0V@g`ZVbi
z1~yGeLFMR#i1-fr$?;jPMj&*W(n;A5&Wpx~($I+$Rm4m~he)?$9K%!_ry<S@lt7NB
zJZ`g2j;JzMxck@H{^=c0+6=zUVF@Up*d6Q}&mYvLU|9dABrDf&L47jJU!1ImC{UaO
z$4rPq>+U-Y;|UvV-eG9v1~O?Wx4bFX8TooJ&0QK2J0r|XqI3z6q#Uijkpm)a$!Kb4
z7-b|(BlbFsyC|M9jn1c9bGqfgwANtEJW7Rfj@c9fOus;me;#wF@$;&uIbK>6BqsP}
zO%;O*U7ikDR+R@hvbXvXOsA5xTr!Z%B)UPslt(#!5m&AP1xmD+r%}BRabzC%A#p)6
zvhhA7F?}E?#4aq>-V>?XA}tFWEE}IlWf3Mw0Jq^ND;#|$PoyZ*Tns;D((ScvL|M8K
zvGx@saL^GhK9`v=YM9JDA531Tk^%&8DE%j%?kMMJ#zRQUvdmKpjqqmjKx!%){RSrr
z=R}QFi{$lGty8EL%j;{mVh{JL1F60|kp2f!T{y%#kRl-6L2~#&Xd!I`<$+X|-+|Oa
zoJSr=xi`xX?i^Yx@<nYH)LBWJh!=}w_~v3!hQDb8kHvy%F46PJRv42|BK|aWB#pPW
zghc1tdNs9bXu9A>)^FN){RN=&dl}I|=ciX=F@e+`!UtYb{D?g*0Zav{kR~BSl%}x2
zsV{0Q0rcYWBlSj&<EPojV&4&GBkD<7Ubt7`rhK$|k{YD{w^w(w+AB*jSCuXdsE`W^
zN5Qh7BJZGMrQ-Nfdw;c%={s@cW+60>2{B5VP?3dd6OdOT+j9U1_n>^sCRK(=J;4Sm
zc{>BA(@V4PxypS^@2@AAQMeK_?LE}qsDxzl&Aet0g30G$qDu8cVPiSZLE9Z_50GOD
z<MKRIQ+t|nhx(+Nqg+*oDCcd#ig@8}Nc)O8OAJ?!CW%>uO>tgV#?l^by$l?G4ygG(
z7*&?B)a->+r8`6)aoQ9JEl9?aqxHjIQpVzeTw|=Cj3tB)Rs;2FSAcF0gqXzzDM*mZ
z60^{^fo~gxA9jdabckE*pawxCZZW*#7PoGXVsT59<|l3;n&HseLh6?86}F_Zu;n%g
zTT%hm17S-V1S|vaU$vCB^n%2t2l6>xsBnPRoP{zr%(plPiCyAS(Bj6}0a`sC<Q`mY
z+o>F5Y@z9J4~K>NRu9>$;r1jLTWqIxR_atxb=wUX2PuGwsV2(aG?Z}4VhhZ%_&v5@
zF6XPy7R=!=dJ)ND3tnfOt-~4-ffZ$tvPSXgm`JwZNzT*_t7&~DlM9@27TAK;*w)rp
zGTF&_^3gpBT-YsW2(8Aia-K9b27<~yXw+l4LT^fvsN0TvD=mdGUbbKl2hu^V3+tOD
zlv%>zi%8glspRyw3MJupiVKQj)?gxZ9CkGj%9y=45<;1$)hs2_ma9Atp$yFdVhyqY
zBV-7sFb#4;rIIIkAn&wo@eVGq9oC*`nBq_g$?BN}*pwz}f;vK3sg|l=+B_l;D|vI3
zi4$GqA`J(+q(hnM!7sW?C0k$uuyj$%5TS9q9FYC2;<cV+KW56gptP?jMaumuy)tGl
z%k}k8MsfZizhzs>eJTK3Qm<~fFKgzNOZqgtp{ESxjCXOB>6kz!mTS6weiOCsS{Hpn
z5MNDdUxjG6$#6Ar>)^J-?T0%BcNUyYV>>n4q0#Q_kL=1=0TF4<O<9qYBUCFVGXMAi
z&2R0>NLMS*F@~1|+DLzaM*>xJE_E!Cb%>bj9d!K1L-Ez&IO+I@{bhTQatIRXb9UOT
znY}4x_k<wR(c3v;l;N3xSX-dd$=+XCWuK<>b@ae}Zi>f@kD&PT&Ojwj&Bc~vAb83O
zTr57TmMEia0rol0raX$YUGp7q<N)wg2D~VMmgB$K-}Ieu4CL^w0SRCPGL-oN;#5B<
zIS~p{b=GtJp<wl^;s>g-*gE?Vkaq_dqNS<cbB#rdLC*W&Tw?}^IGt-mK!l#-J=X{X
z`G%=<&{jU%*iUC0Z|VbawlTy!+X$+yHV-$@R_knoDm0HPl;;S?p-!@L0t;ytRLTB$
z-w=IlY#Hqj*rOt;Z^zhssns|00S8^Ue<X?m@4+kJWq4=rDkh^m-&o6uAF!v}<t@s_
z&NudQnq{gD%P93At+6s+jJ<`j407LGCD8AD!-5H*a?M@OAjl@Q)Mb%LJa#+Obxc}+
zy>pHJc&=p;28Gw$#cBWbxkeK@*YJ4HHC%oR?*Fsr8UZ|5*15)a+<Vr!hSht)YOFe0
z*^G0IYv#GeI%t~wk#miUoUZ;7dchm!f3DGr!>)g>k-)U0k#mikIAi1I8kwBw*XJ5}
zoag%I8htsRxwDoV>y{kW$hpRk46N~UjR72Y{c{Zyhi5C@)PJkZlmeV<?8OpF^Q8W{
z#(yMgTjv^f9zE~5hDI1)hFMt}r3$PfQ^hSM`V95?y5lAK%Yo|iV*N;6YN(UA?|KWz
z?=2d?A#C=)+uEoCIh4mezj|iyd#q%ui=K$*haBkf1$xXukjXr<?#mXvq50!|0qLi|
zLkH-OzUaVOK=ja&`Nk1H>ENg?B0`TJZEC+qe`Vlr+DX#i<@lR6g!DJ;Log+vzwgE0
zbOcR*qtkt6z2jL5A$74ru^wZC4V7Q{D}*d69e;L)UktwpehEG4&=r3SLqIq9BjERh
z|0npp;C~6f1c0Pm4Z=N)6%~p&r8~muSA+ja=!Z(xbb-GDJ^=I652N6(!uL4%tKpMO
zVJ-ao;p3TznvE!5c?f^}4FM0se-i#i_%FcU3V$E`N8!H%|1tO<z<(V6DfmcRa}NGC
z_?O{71^*ZL&%g&?qdX6vdZ!LPEunx_(+2(l`0c&l9k5~JfFk$@5itP%5%@#kzYTvl
zd;;_*_{ZRnfse}65Q9er(W3Sq{CV(ExtcrRe*}Lk{8R8>fPWhPF7NkY_!R#R{Il>=
zPv8%<qUJpO^YAZtzrXc<|EP*zO7-{g=J)Y3y$igUay=*V9|5!ny^DEE586y@_&ot4
z<Za+);y67-JtJz%aSXLuY%SMsD}I@iqov?~=&a$m_q!AJ#PogM`)%q?g#jMQSIqc$
zF?uWxG&+mrV{t0cS?n8&W3%z%>R3J8);Z9x*tH^e9E$BB29MLLi>tg~%wm^&zh`-?
z@mKG+S?sF~;*SXTcoh4UC?2nOfVW^g4ntzG6;;9b6OTjN*fjCXcztS$jf1fn0V_jI
zitv6%dz+XdiYEY8rl^{LGsH}ewayST#q$*0k%RfVK7csg9fdj(&1p+{qeRDvIGO1r
zhE3G-T!T5K_@&4hE4FuYI-JqyuAySvM16Rt?>Ua{A<0v42ckl-5t4V4tgTKEhmi?O
z8Aj=nMDZj&GoXN@nuw25^$d3phNmbg*l$wPE{Ls@(3HO7*d)D`yDulTn$lOqOxC-&
zCvdRUlnG+WWSsC!#8dBjsX9lvr@&hzs-~c47m00C^qjC4Wfou0x-{|G6n%z!Gl%2A
z#oM*Nh$*+=cqmX4f4xO-M*hS5aOQR5799J;i^8cmL24!*nu;psiyhPSC|ffP+I#_t
z!ZA&!x6@Omq3DsKb{bNY$`m8LDK1co1(YIBw4Sc#!y@&B=}5L(+&^6(r2a*mqpw}U
zGeghobO|PA=ynAyH6(<6oxe%@fS&D;!kg0^7LE|hR4DcHE>SxJIJ`@2o1xzf^}(PD
z!2MPf!t?yhc}P%V9s7Mt>2b7Gf#be!HHq;vzIFsVfy&A?ail^YYSVPlv{G+@n`^@=
z(FZBwdGeZxgepD9-c0wtRWi0p596;voA6-py;V4(ZYH)?q4?Xyu_}Eirg-*Dy+hD5
zIHIIITC7+(6D8No)bri%b67mLj&OgK*gX>`j3+r*N_I&4x=N_C^uO6k28btS>9L(g
z8U%?^p$%$_+)nop4&yUJlJRg@cPk95g*Qy7v-R+7zo>S;C=0AFdAjH{TYov_Hm;C`
zu}d3#ZxWs6=uw3xLf18je%bro4tqgD_=64N-xJg4pua|nhvw*+wvh(o-``Hm(Sy->
zC#!Md4N6r5?<>)%7HE?xme+!AbrSn(^-t)zy@jBp6UC*42%JM+oG7{zh_^_LzEiIz
zg#PeOy)AIrvj{1gi2;iM1f`L;R9spF)rMVS*kYW7e#@vr2ISRHTe#S@Snr78|9LU8
zrH~gVN|pd{v%2X^P+@{`7uX(BcwCo;OQCb5sSi4bufgBz;I_l<hdTxLJzVImFfR*N
z1h;Dvj=14Q;BWIRzXpHP<-T?B>0Ez1{QYnjQvd_L<2L9U-sIh%=kw_*?m_uoyeev1
z|MHdj>;&EJD}$rWyJ;l(_xLgM0@g;mswAV}#y9j>^BZpHv*^~I3u1jNMBU+h3IZwC
zWvqMvq|JSQ_}{~Cc>B-)=|S4)Pe!^ma694NqdaiYb8(8&7XS2z8xJ=hZUx+9aC_lS
z!6{hGqv2Y^^@p1bhXtN5AY?!MElnnA4~yp78vf41uAR2v=H(Dv)0Oi0-n5LG=cjYf
zLL9iVsujuGdn1lmBFN=^<|=7}Z<>U95ZFb($KkMyL5c@<Dh5*P@2Cznr9&sr96-QW
zd;!>yN;7O)8tEV){ksARoW~TsF9Kx7WS^i&Rmk>()R&QQ3F4!#4Ub0Tv^$qFP7M0Y
zNQtB3hjKuwHwXHLzXPxX2=VAs0lpmv1Sw+URU^V67AcWK?o+bG=~QS2<bP>|>LmY+
z<?s>8>Y5#K^^K|e_Yqxi2KIUTXa@S%I&;RE8vh1L50@*!a-wgxGvu1mKf!{@jpb9O
zQFLSdL){_Mx}Dsj!3cPUGxrf6y#-w@YO+j$Jx%g)BRc93M@tJgH1;R>J6LS^#qi|N
zLhvyM$g3-KB(R6S=}A$^tH=yCap;l}BTjv2I0E0{6j|I6V|>U0K1lKJ2tpvgju;EW
zZ`>IsqQr+F1Ab^f|A-l>rH}6=qLjk0-ZZ+q6+Vt+)-pJT(O?cfO%$OPn{ig3&6+bE
z!PJBjG|7x_c%(+7>fqu$bbJ;Q=;DuXzaggK<dy--AayQ4%2rVo2B{=7mL@%9%Vn|t
z(_tF)HR!j+DYr>A@b5<}<Grn<BPP>!F3EF1B)z9ju+T{3EfapsZK30&aX(WcvH~F=
zJOD{RoD>i(WT2g$#7KZVd-HB_zN*wXNOH3p1^ZiJrF>ba-0hsNhm-n?#<T7}NC(X)
zUkA!c+{`j%!nxZxLm%gCMOFVtjk38Nqd=cWAiXJKl%@!%A~bkcexQQqLGGm3s1QQL
zQf>%wJ)PZuiKgwqtnynYj4vf}KvH)(#gO^mTv(Z^;kcZn?sTR}Lvosdeq<k@#M&0v
zR8`8XYkBwR?OybRlw<csdNYi%1mB7>eb#OGmnuYv|7WgHPl(O95n@tkh$oo3C>e<k
zelLExPk#ru5ss|@gEUzbuGBlYZ*NZW8{$&rrH0$<7O&Jl2t=y#zv|YjN&lBb1sT5*
z6&%H?K+T09UFq~Ajft1=H6g;6%pv7UOC?YweQBgjruk98fn*z49%v_H+HQ}-RE@;S
z=cxTzcLeAT&~b{t0b*H-Ka4BO3lWdgGsmLlu5>*)Ko2Z{G_MICqlMa|wOp-@){oSP
zT^b&-+n5>>tpNv=#XnJ{S~C4k<K=?`3iy<j4m$7Q?{|R!-Eih#Y0>d99D(K>Rzo7<
z=oF!XLoRZI>G1z%9^ad}X_KgFOF2OfSDo(4QPnTTL%zUGTQow4@Z7&y7ZbeA(slcy
z6RvQGmD5RYKFA)7cDY=kI9#mPE{7slCTf|&rOTEjQ_DhR0PtTbRR-|)C`|3{axqGx
zM!d$zzu6vRpWthVoH@g}As!mI|8PU(g+e-4uWWFxUWVe#bM@``n+OdnVe#>{+x;BE
z=&~dXaG;~z?p^pMT0!^UOk>+b=t>YyQ`O`L{0ZIOJVfI6EC^gK{Ck{WnchU=UEHo3
z)~A|XLBz_ZOy#9?lF-R?pKvRQ(1m~>qunPFy8VD-61sc<PlwDzRfhNooy_~J^L6D;
z$B!7vN>JSk)h!g|P7I|z7v}*ESG=C%a3$FDT*-E*y4wS_$+)E$Xb;87Dh>v;349hj
zUZZ2!n-J6AUg{d4PqpbSXWHA+=`etV;ZPw>yTz4-GhxWXUC{s=<MVCwtqs<?X*b~t
zMZBY0w<qaYE_-vFI_J6q^)@v>+uP&X?+{la5GD!;6Nv+fD0?y=|E6ngPy@Rw2<OgJ
zA-KWXG(_3x%$JfNsU2DDt|;B%bVcfe1AuYEv`IKG4%aHQseE6PdYAT!czm=jV51Hj
za)-s>P0$oR4~rt$s2vu&H|hTz5&dUXiNOxVBG@y=A{Qeg(ZUMuV24_(l<_eFsRk^&
zMx7pmllov~xYES7-8L5aySh6f-`_d?>kbk6t`V&fTX8pssG`YWTTCWNoECGy3g5Yo
zhrx};t)MvNuo7&$WD68u{Dix-<2fRQ5rH<0vfL(O(9cFp=0b)Nh?Qu7yJIS}Xwz}Z
zMuUudTRm~BWeV;x1iBPvZ}O-fZ*T@_p@QbhYW}7-nk0L3n!o2l;i-aHoP^$cIDn4J
z{1e!j643o+DmA^SqAZ|NYg2GtBG&G~nG<f$Xg6v-F#Or5gEN8IuYjK!njyLzH#`9}
z?f>(OA_kqq9q;YG7>>bI!x(O_Tx_ZDuW%N!r6enm!SSs@suOWyKfoQ0CYNP+^^k1#
z+FqyvuGt2Xf<ORYb`NlJmI%>hh~Wtj=8rzyAX<2bUQ@)=mkm!R^MtLDkx+^5Q6E!b
z$!IN!dhN9{fd1)x^VunOB!oouYr(KVa~i2@LvD-O)`ru|vn9eQ*|r><I7hJpQ#D~q
z0Vj&{VTw#N01DJ`$}V8aNn0S-kLpN{Xg57d&>i)Z%Tmo_xDw7IM$fHN+$h8`T@R=|
zLwP!T!=KFb()2_<=*~^9?ipC4aLnaO)Lnrf>_I#giLf{KU$8*qsCBdtGPff%l5`vm
z+uTg_<7jhn7Y9gl*u=I3Gff-H%Ue2O!#Yl2EpIFM`wpD2c2i1m-U^ElArU&^?L3b0
zU*BZfQXW@Y-z4zyyexE~_3akULZ<!YwR!4?@i~y!Vk~uAZ4zi#Z8b1A%pMQ=(h1aL
znwAIL1z`{$y<>Q|2@}vFRr!`np(fnSK%0vu?;0^WHF^psz)Ol(euDjn8#vmUyfV3q
zCa-4%VZc~UzSv7?pkvM%`T(=TP=@43m+}m_?4bMU+c7}hv*jdrg>tps_S)@ovTJRD
z^q?CXe=#C>wm-_y<!q-en91MPT>2M(&%|t>B4_)99AM3MnZTOuRD2owjA#48|M<=J
zc<*cng@-w4Iosc5Xgbe!jmGyF2Q*`H#FPCynS#5MhV?c6wkl}n?q~-7CY~pIZ!~>l
zGw=MnGX;u3>D1(4n(Qu~>>WO05Ydd0v@M}ln>_6&97hiu5RZ!&MHxK3>>i0jlQ0$!
zbpDtVCG(IFfslzZ0Wg+vhv)#+V-)&<>A;=+s2-9I*u=w$M&uxk0b5c&|8_J+2;7_=
z)>nR`V$}vGqQ`RNmZ4$AOaegH90|FXHkXrGw=Z;p_p}A*s>>CoyBy%vg0&f#)WHr{
zk`4u}NUXlhvgrYFnx<vfpWNP6QGD{XVK7_z<u#h<9eCa*(YTThHMG{aX2h1-_<J&#
zw|<f(B|3JFbNO@F`#Gcmhkcv_Ee@Mn@)%koN!dcRA%{)z`@v#)Ic(RWJmRp8KgeOX
zv{B9dIP78$ut-oLe;;EGdn^`3pQJ?N2!9F`&mor?+Zs|J=CIpv0*k|@s-H#Gy&QJA
zKZnh6VA;4?#9@E(Uvk(_b76iQ_KPyWH%QO&_lO1@_ADkgkJ_XBIBb7XT20gR3C=1x
zY{I%&WvDyC1G!!$Hm<JjZ2PgNmsh5v5z0fqpBRImLKWn(3hi4LaDkSZ8rU#}G^JI%
zFt5Q5ka-U<6yYuGY6>C}u@@(6rI!67S;J|F`PG$+5y)q9`z!EwLvDW^!ib?JZr`7S
zk1s|9eijFhlPRQ|%ZrxDz)!?zO9tM{X3`p<OuV5kWV_y3%_uoRGes#KH+kx;*;9$Z
zbh^jOuyL~rNDt5Ho++w!Kq_4#cJI*t6+Qy2C=q@z8p?}YRF(MR2O~zKP3plw>X%ZH
z^f7op)J56BDV}wZ$)F$9pVdN#R2!q)cze8wpZ?hJu+5|hWY`=)5Oyd|=*1INB<)z4
za5%nA6{@j(ZJjitXpQ=j3ptMUr@J~^ZE4$Pe?l1v2?}X}kHG$8ZYvn-qeV2Z0<3o`
zvz&w##B5OfidHxymfS{-pa|kND59pFTz!GrHBW*vIqa?kFdAAA-O0u_Aq-(fc9*VC
z?TXcSrj{#LW3DJM8;Vk&P0xf(9OCkNY<jVi!V(+j)5mcJe?Gk{hxqa7ojAba(+~1@
zBYb*17i2B<vS2Ttego%{eEJ%6*R(dj!>2Fe3>Kf>kH0NGeLa8s@#%l&0Bhxz39OZy
zN}Z4X{$qUlGYn1g=`N!1|8$6f=Zr{;O+UgZEH?cy{<c=`ll*P5>6K{lb8UZ<O@EQI
zNH#re4}Vx}y5<y5T{a?pTzVtp;N#M%ths3Gb-48N4BFz-{j1v8;(UqITU@$K-+)X1
zloMH8x=i%jTsqBx2T{+)xO9r!iMWPbI)yt5{};LRbfyS?TzVG{u()(d9{jlU5>8+(
zk-ho5F)qD;WBk`hnbzXcC2&73J(aUqTspP)1?;J7{u?em0Tw*`x%6-b>c^$0Z~~J{
z|B}C~sVkHFap|#~z~a(B;_v!gdJ!Xl<kAB%Ko8#VN4fMd49()woAI|bm1gm`AD4b3
z2Uv4mCa~r@75^Oi?2mEj_c1g-E`2!%_;Kk^%M`p|&{DpTzpV=XOJ?wK=^N4X?>m^>
zaid(i;4F=E=^HrC;?gDf23-0BoXFzRWup3AI@Mz*`r&$9`YYao%<Z~=I~pSd2ROaO
zr8icw>V2HZ;?f&d%%4jq<UZ9&PUio0F8v8!ko>sx-5g+X>Hovu4Y>4&IMAO<ui=me
zT>455w77I?2`Q#sk4vZc<St&a6`=o@T>57`fBm@hA2`4wL09>^5ib1|j_{{IM>)je
z()}qAbLp2jfyJd$)jOi<jdAH5SLAKh{|T2qiJ3G%E`5#+@Nwxg_}h<5Z$fhL940o*
zrPp11Suar4;i6zSv|)ye(eUiUQ*hS(is}&?cf(*snK1Uih(j6fdY^~Y=7D?lNcsr*
zNN<U_hxS0Pp-dc<-p@#huk2U!57b)W+zTzCTK|0b;C!`|FHO9#R}WP068rYTL_)2E
zQ7afmttj3Hz_p@kAM~%DsJnlkZdYvqsp7ByhJsCiSh-(MTKkcXZ7J@kqbB6P`6iT+
z;CDWzH?s$%q7CQJhIOy%PvRDOkNv3G-J%Nl@P+J|eZEd5I!{{xsXlZQe|zi?<5DSc
zw{$^ZY6hht>!g+qunbN|mh-y0X}^&n)B^xMTI3#J-~$fmnYjZv(c`uj;H_^}M*0AR
zQU*T&#8zjBO$T5d;zJIm879k;QO6IUA2LMHYkFpZe9<JFYez?cE7GWT^7&+9=)X;;
zZ?gU`_xOuM`D=Q(x<<@-4ZYY(tba|9>$Hdy6#+<B_mykcu8m<JX}BU|o;S+V5d!!M
zS4rBIqg}yNarHGQk#!P<ucN|ma?WyW;JdKwfvVJ2RK2c8d1N6`=v`v+y1IWtK}F)B
z*Y(VpJzRc&NJO021a#w<>S+73SDbtuvh{fJ1H71c?)Y-3=mz7_Njh@x>OKn~r6TtY
zJ*`-0AOXC;qhblb>s+0ItRp~u2yqBXhdL*RZ|5`3NHl`__1|LU8yKt~#Ev)gIL}Yq
z%%y5FZfubdg(T&R>Q27_%`l)~r#BfBr@x6=P@7>Q@Imh<V_R*87uz}>0=8`spB~b4
zvY+85+<*(UgMDL4_1wVWa-&9j*bSo7VLiv~A57KWAm$xL6<-(Y59@LDVhM*fh<%4)
zM8!W7wQGa;0hxNlHYG%mOaXPzk6a|Fic4LO1sqT3#TKghgQBtDAVwc)P(nNhuRH=R
zO#jr$9DLx2-pf6c0pIMyz{4WtEdW`{!SZ2k67g&l!w~F|Pi7Qj#0nXKq5vpCEPo6A
zK3RPFmOjh#1!tr~;noF+1YB6m6IE}6zFZLlj$&Z0hy_RW%t&c*f3vSe)Uj8Z(qLsJ
z$?4rOllr6zC_zuN%63yfUlHmt*oO(`=)e1-sFSaVPRB6lfb;$Vkew@H{V~+~Zcdqo
zlp)-qge}{|kz;xn*m|NelEhz5>IrqN-+{WQ`jQy%F38|ZnPLXfC-_XS;1j*Fzm%zp
zyw5?WlHb$U!9vdS@9BB=;|NUJ4Lh=TfY{=$5U$XPgyX1WLKeYa?W-i!|F{^5C>097
z$1(Ama|*h!EL%v;I4-swM>CFR%4QrFLGSB>QZ8{_r5-Keh_s=W#wP_Nx$G_Gy|2e5
zT;*g(St8od+6<--lBg}}4!#fiqV5w{Kfs9W6DcQ9*Hc`RzxrxI&DbYKpMZj8NH#TN
zvM-97u}^HGT!kFf(HBMa-zQEY7vLHvQOqz=a1umqj_7d`T72)-Ej|h2t1b~cK17-&
z;`E33Tq2B*V1{UkDE<fyUy?1?geBSDS-Slr%+f7l^eN=nLOGH|(Fr{;@?|dkJzwEe
z<t^D_%n7}P=L*L?;)|m?Z;|kkgs6ZGKI6$2asO$Q-bWlbttYu#HKWEh@uA8p@xy5}
zqfR88(Yv_~j+ON76zE%os5%2h<OB|$!<k6Rva0U+GZ<tT^f~=8D*cFXpVceWouc+E
z%+ws<#8Z7$BO<d?>^_TxJH^$rsLC56;~X?4Z8_M7>;F(&3-vsDskO*Gk2I~tXz49K
zuWwNMi`-A3QrbU<YAqKz>W2Pe`X?BkDI6@#89-zQ`c6@l-;Nxj$)2EGKnS`3=o9PO
zT+jnl^<L5GQ&izzQT3@l3$*dvr+Qx0PKGn6UX?chK;A3RiG8@r2p0W614l$bhlT{@
z^0OxDcVg>j=(!8xz-O>56E2p0j$*@`qu3+%HsZ?XdUG{bBwR$2j-vP?RMT^ti!nR!
zI>n5OdWfer>zB~mu5xBj)pP4sUewzKgYh}=ExNZ%T>TcBo3$eQJN>pGun_0~=&*_6
zgrIQo!go;Q%&0s5oetq>tyuOWQf{c*{Uc~JD8uRhU_4$DgMUKk@w#O{QOoy<l|LhN
ziBNw*=oV4<i~h9zk!Gr>m>Co!9M=GEr%1V`--jXCaSciOi%+jXxAflo2OEHW|1#q)
zw9>5_z0j_as$nU?kcG^>Q0B~ne-CIoS)1`!j4|JbBi&8Yc)n>Jvwb_LLIOnsaqt!m
z*U&m9naAmkH0CAD74uc4$mKPlPgXKFB47piaQ-@*<Dru|8*UNYZ#@TJgaRtj``ZDB
zj63auZ(7SdjK43yDGOi=4X!m@f4K2*^B1&%x+VU25^gWt3AnG}bX>Z~g6j@93T`gk
z8o2Fn`|nW2(?pHW+KlfF^!Le7BhOPffQD9zjfwWrZ<<JP8&NUk{V8gy(ih5fBt533
zkS0{57>sC7!7z#@opXsra;`0K8#&p-@T9$}G_~d9L@Ys-mug9%8w>4R`j5CWg4S0Y
zaU0>;3pn|6IG%ixSreL0MD2F=lgWwWp(TB%Fv5%+w|_7}*(v&j8A*Ml#q`I5Xl19u
zlDK}vuu5&Q?p#Vc+Z5$dltLm~FhpF!lR;VP!a!noXxWVtJHm{p!fl-VE(pUG@(p+5
zfeX0)ux#ROGMDaGN#Y823N_qF^K9p&q`=yZp&|yS9);oP^6nfpTxp9ak_l4B%@hm5
zjdlqo9Q829V>V>HWbQ<Ag)~E-6i331I84w>;YJjQzB9tegV!d)$Wgb6@(7HW?8slx
zk^LaTC%zAw(jcXC#L5Vx6%50Oe@7TSJ-2Wvv@%+^GS-O{j}h*^pQEUSUEO<;bAjmL
zF*4P2V!Fpj$~eevO;tDGg?N*JnmmXGX%u%et^Y}42LY1UVP|HB_?*4s7y<f8{7l}%
zA|ujBD(uC5Tnx6k4d*8WY|98)W$&UGd^wq5F6DF*7%j+i#DYllL0@ryB!I<<7xCqm
zgX7_<Kgm@;hcdF8a*J}+P+YhAk0OOrG;S#zPFEqix*y}@xlu+=%>7){JwV(xKztXP
z+133ZqPL2wC}WaaTEdq|MK$|ad>Uo+av$ZXLa(Apmic2*9Br(Q8NhA%CZN(@W4qH;
z&S*!d*-KoCHZn6*j$Vmv)-l#!q<zKTIgDmiSN9r}J4Y187-{M%F*=4veNK##gi*gg
z#>jLp<g^y+HCr5{v~$GO7~`7zX^zF!e?Z#EkkmNLp4)i5I&knJ4(=2Oq<oHpVPDn{
zUqeN097_9GY>ESNC2@*H7_2F%T{qUiB-E`DN8*g|*v~n-Ihaa`w$x5S+kiphmpCIf
zCKf8QFd+q&Q|?u0`ceE?Q4kO4-*I-BP)E-bv7w_-`t29xh{jEVdGXw|hw#-&wjl-8
zkb4`#aT(SYnJKK6@jje^j#1~}E+KUTZSv4N@kVA$9rw;tyrHEwnIktq6eJkTQzc+t
z%@I`zz_544`UE2`X68U@g3Xo+MWt2HgCLMQ8RWqPRBnQ}DziEhjU@NIoOn2<u-W}>
z64CvC<zU(dNn4LNwMsO)*&Z4wLM|AwDNk^8BF`?G4Uh15fFG2rfa3WF2gjhVsJFVh
zmvSu%l8k<8nplv8K}!)2C1JH(Sa&YTXrksm!M*=?z)67;yw%)PM)wSi3T=_Px(OrR
z5Gg4}TJ#!R{G$P*V1mDyK^(_p42z8D1YCsT-`kvuPGlr8qrh;nG6hw5U+hT1$adxw
z{{zH$g7JnBgYf38IGtjI$2`o@v52<r^kO;1+F+z&;P;D+R3k0sPn_yL0Jx3OgRE20
zFG-YBsyHzZsnnC=p;RN&a}vtLFfM6di{7Ey)3EOl$5SyXf8$0x=Ivh<w^f4D5d0(u
z({8A%`vs(aQ52?O+>eOzG?3h3usvu*B^>8On_=DA6c0>6UF0^hzu18!F}HBZkE7g5
z7(mP9(zgTNjp8%_xrcLf6_-xy$uME0qx4xE+=+vUR^B2yr5jz;Tg3vz!hGOE@H|U6
zQ6vvJRg;L)FmWW^=x4hySlp3hBtjXlQwA_#qga-Ke4EAN8Nh(2#Z~&+A>2(7cSVeD
zim!`eK~tkwz*BM|;Kvix$Hlp(Mny&ihjqdNETt>7LOsc0k|;fgau<ksnV@<LI9Tqb
zi5T85j${IOwYZvTbZ|ewv17PZMA-^Nrz~SCOpHFBg+#-|fh=QCKwD1ak~Qcl3bT!V
zp1XMArk!rfHu)%>cDi@ft<N^<g4CULgIgIl+u33wU6;P62AmU|Kg%~8*k!VP#PYp>
zc)G5$AAUoF=rv&b{GSK*WAWJd;dMB?_SUr=FPnEgTO0nY-Fr3a>Oqg;9hO%?&-fPf
z;ghcDwE?!oUp<*1E?-q+`L?x4KA=Xg{TeySI^f}i6|q6lt7}H)uPz!Lz4lF9E37$$
zsC@b>@Ignkn4cVsxIg8K@daA+>Rlu7MjW1vTYY6j^r|t@q9{J7x7ZjTlwUmn9UH1H
zT=DtnQ6ooe+Ses2;`zg4a5?!)g!hJ{_n~NtwTD}Z`^_{ZaRk6WQud<x54DP#Soj#E
z8mKQSE#aqo!@-V|VJyBkP4M>DhbM-X8SZKV0-y{8LT?6A<OM?E1f(1Mz3^YS3(Pl0
z{3!g3aE_(mi{Ta_tT{%v2(AQfiv}JHZZF(XxCeu9h>20AXCKzVokdtWS~G%P!P<{M
z-h;aUM=uzZq$3g|+8*u%0QZJJ3T_(Qe7Jkyz9yjXpM-k>jx4U8fKS#|_hM_@9sYQ@
z8n_j3WN5WDDtaRv>H_fru0rs_R5!4rvC4r7e!2*(+<M_I#>z8a9O!1GF&9fy?|v~@
zhNg5kk`jpNqGe$f@8C!;%97s97n_O!X1+L5j1}TOP7&fuLCfuY;qHn>ZM*2y)##x9
zkC@lh_yB9$ux>_Pk1L$MU7*Q5U*d1#e<Z7Zv^CLG67<M*>pXwg=l<WNs^{c>%UKHo
zA923!<9NQ}u;zBBk_u}d*0%Fqfp+Df5?uFp#R7cU-fb>^*#iOlp6*5%qK<VpTEPqI
zfrL+pHa$Q+Q0X4Vpxhw^RH<@%FD1}E6l(S6BEFf!ufSNt$2M9XiNC|+bv>{)IC3e=
zJcUlYdZH~vw-yMar!iEm5#>D%Jn|xT^u&^~Kpg04bntlcDg8PukoO`-SNA$5e=)s`
zG@F(yI`=XnL1>5d0(TvdEAH-Pq-Jc(qZ|h?V_x)9bSPdi6q52vt$dF-(hCbqM{%ha
z>eQBFsrudAl$UlPT|{<oBPZcB1N#6k3k*_s!@|cND4-G$;(`~5s@_JNP01Da_BL9$
zdvl}du60-U3e+-69PDjm#x`T%zhHfR2e<2+C}i7_3b5shkUmCg%O;#{hrJ09Gex<F
z&FRqEBpd#YgBre0DTJO%S2wZMA-Q5K@~IDs#eIw<+xgbw@;alb`X6zy4_dTYeA)*K
zF|1zq#n*X}-4~;^O7tP`h?vqBi)xZs*%x^37F+S<N#F+dfGFBz?-ix1INcWv{k9g=
z85!IlT0~<-P(Q$4FLL`a>^}XBq?jID{aN-NN|b%9J;2*M^r5KjhuKglc2HvOZ;v?B
z&xrNx;121E@oDFm+TGu{GgsoxF#9#^T+Qv^0Z#5vZOY3qeV*VASC!>rcYh${t>RLD
zV9RFV8~`@u6|T*0+j-#D2(^z&27PF#HjCl`NW580M=JLgP9-l+68>$j+cbbGd5Xig
zz{KiNTuGAiY_m8r(8zO7XhnSi-AatYJ?M+g!aa!iKn|AN7j0_j7cK@5LT`U4ss<TR
zw$xVQ^20#V+r_3q;I__l2Faxpn@z+7Gz!*;4^ryjR-*3)BQfSnPWEeV{1uT<VnhYV
zerhQ;Y%t=)O(jN%x97gL#VY$@;7@Yp#2QyIphDoR<jN^*vshVzq4`+sE&=m%jvFmm
zuPq#VwFLZma7$|NpKbR*PTk8U<>iEyYem6e?(aT>!4%!YsqO>#K|C<jn{i@+Oaa+Z
zQpyERA<y`U3wfNs>6FIH)rw<-v6Xs;gPU+LQH#yO7-IAa+snZ@JdbG{A8#ox?>1sZ
z%@D(p@(xGJw{EFm3EE=FH;b)9j6t^SRw85txU+QO847-Effzm1xVg`FOg|-avzBR+
zv|0_70}L`m`&L*!t-%yFoF@dOD{O}#4tr3jHyYuc0(crxR6DG?GU{GmSla?K><M2O
zKX1h#u8P3`kv)hJoj|m1WhUR|3u^_PBMEj=-SQiaIt>d=k1~+UckAviGahj;N4B?v
zv|0UoT-hR^ohguOc#(P|BtImK^XJSOzGT@5msaW%#9aUYe_@>O0yWlX63G38_)u&@
zXw7cy^0Yt4Mwh2cYH^TQTNgRe_;)alHcwU>nQiB>vgLBd3);_A^0(ZH)2e-nznh|a
zE!o)UU&Q4hT8yYJHUgsL5#BwV?9X2G!i(&ps>%qrA>|!+Ls$#xZ<u<A*x?Q9+#L_!
zo~SY+uu@&EGKP9?=j`)=w3Zy_Z(`(3BPVbjM@>+~sIEr5`fT0fGqK4;?JvzXRu89P
zIt52evj0#Jb4=ZDb~jlAr>-U@n0obS7d$oT*MrnkB(kJlT2ZTtE?bRFb#reuW(UJ8
z`lT936|RQSEGFTO)elESK`ltvC60>mMG+M`DynLY%*f9;sy=sfG)xTaZnPkK>y-Nw
zPEwz}IVutsf_n%I7oEErH>hD^@j~!7d7}JI?7PQ@Q%wX2lwAJ$h==Y3mCO?_-U({c
zgJavN$6&M3tSeP0PyCGNs1lBT%DbGa${QX6mdCcOJ+zp%<$!KT(A9ky<fFfsvItAv
zBC&E2mb%x()<syV?v0>anONvZCr1t$IBVrgadMFn)ngq;9|e#20_tGNSNb#TKwlQH
zsfvxm9>!_#a;#yNjBvgvTnx^niARiCY(&Pq%^*L34s@M&gD!W?hs5&5X#7gC4XNQB
zS&Zf9khruM9W|WWy3LD|XsgIx0y167!8UH;duU;W7`X&_VU~Xh_PqgI&?hjh`2X?t
z<$+OD$^Sh=0twJP(~yKaB-}R%aw=#Jjs!^v5Fj8BCdmMST;$+TG2nWD0!t7mR8UZK
z!4(BhT+sy=Tu{)Upx}ant1BpIaB;mrf1j$?Jv|fM@9+Ej=NBz{Ue!D5)vH(au6{jE
zE7lgs%E(2;!gVT$)oYfJ)!o5tDOL1bT<M`ib{tV%5(jtu@CQHJw3W~>2txG~mEwx4
zy*89TwGmB8_v&HIx=XBv5{tKt8b#5hd-W8vui$$J+N&+?xL0rAAz9GQ`7~9CTI~Ex
zCI9kXeYAU1l$uXJL=%@|&Wtb5E!Y2u5l-R?y>DV{47XxJ16Cv+RC^>mu)z{737-ub
zskL{rmv+?sXR28N4QnaUesW(2cblv|AXj6vKf0NYt$;uKnJyr6JX+nS_iP&_k*{xX
zvui!pp4Ce1&4R;c54{<B%T}``w8T|`)wCukl}B~#pl+s``_L>m(~I|^(K1v%;71K~
zZ+`jn`%sv<M)21K<aRUZEA_4|`?Tb$jg&U#CAC12Sn-LAzD=m1V~x|aY$X)wPqcL<
zMh{=LP-BhZbZjNY8qZ6D9#Ci=H92FAvviGPzu!WQHTFt0{?<q6%HK-LSOsQ3BmXMB
zeQ(9ff6#w-1_l`1<kw5;L4g4V<8G$it02sK>D($Tx-@IWwNe9&(?Zo(i-;2OX!SP{
zMX~!)-2X5PIjdox3(JzltMwKU0TJ+PNah<YII9lmOG=)3w0kwGk2YM5f=Y#|#%Rn&
z9$iBTX0@RCYb+_ILMQ%FPi=9(pp}kt%auzQA4$cH0r8t9;ApBMs?{uBpSqb2{t;t|
zZ%JQ+@-I>EH3*HQsx|tb`BtGCy<^NIsdlIf%4|i?Y&Kd^X?r~~ob6UVEvrEWQ)z3B
zehU_Et|2Al7QYr=sM?A=cCD`CGMVXX5wV7rtVP6U>C{@x84pPjf5a7+EDr2WOW2t+
zhQ+hEoH=8RghS_y&r-$%de0&6NYWLJ{uB0&?6z=rZwpRcNx|dPX6-mTUa@!PkI&L>
zBzHgCO1uL<?-;K4;Adb4$$<}s>;3rIcm#HtC(^-nn7JT0X@s8HdzoaL*I*<jk=a3$
z$XdaxNhG%&4^A(VZ#_15P77xfWD=QT=|W$6a6N`YiF9E-Y(`%iumLLAm;4(rZU3jl
zuWWP)GmEbq>RN20E}}zh0BULL2F!0ip%WYQjw3s?<vQMq$w`T2fia+O%{Xp+wbt0K
zjgkY0S#x59u0Ges%ZE+d(T#*5e+*%Lv1@c!sYmGS65nJe%tx~6HG<{uOQ-m8lD;EM
zH5N*IHIL*)#&2lAM)<M>n$8bD-MtYy@-N!KkGJUPMyN#$T>`D$`%=xj8eKy*BXK`#
zA5@e36K~tsQsRSp$94zenD_B`BP9%7){7;h2&<m_EtmNZBC~x|gNGX{*mFngu~hq@
z9?`vp@br0uwwR=L*Its1A+GvW$xij{E#UP6xgOHHc7IqT#G`qZ5Wf8_$zR27wGH5Z
z*J1@i4FayCaSy>ie@p&{P?^8eiiaSE7<%F%u<;QcL{{!5?da==Fzzs^-NP_iknO;S
zp+ne~co-?)p{j@V>fTCKCZpQ@ECZrxXQU-#F>m|`*W9U7xIBT`OYxf^`X?x3lis39
zP)hP-=&f2^7FK^dgM^xJvRXR23DcGLB$hLStbI9rGgyx{Vp+VQGss#R@QB{G?E~$Z
zq^uLK@&}cy@sfTVW{g`OL7kH6#YZq&X)2-}hW6p(q0Rs^i!ME)x9Gh?vI`xRL`jn0
zPA*d^@ljav!Ib_eGLDm2znxoNrez$fmL7am@7qeT7doNjMSw@?+@oN!mcDxwCj(6C
zy#;=*xm4RxgIb!t1wMDMpnp4uJV2YbNJ%apNxZd1Z`}JY5;b%pnJ!h_4iDQGzP&&U
zIM2On3F+F$#|YV(S$-&4sF@|(idstEil+Lfpw-M$p%-t(+WEJFc4n3z(lO37Ofq%m
zlqyp-MBz=CS~4HQivQ&HYEC(xmOQ49a{swK&6uE1jqfT3R!!4Jh*jX}T%vrWmM4%z
z$&c&Jt1kyg&OEY%bQz95n5UblY`FP{0M40IHWanyc+RA<eE{c7DhmR*{ye3X{O(jZ
zBV>wsfX;y@_v*H^I$Q7Ya@;n(R}+l#GoFPp?fdfPXZ6``42!~_)1QsZ6#IfVx#?VN
zqvllmoIVsjOMf0QpM81y^LoYq+bs0I%|QRB+2`-fJOdL?OgjJjcfJ0%v&}=xFxLX$
z)PWWU7!1e)Oj*pAp<q3Dnp&6ljpFXCY2U@EaI#p0YE8>nq7XG#)w3Ozu{q(HuT*%I
z9FfAC-`x~GJwj#F*`=Nj%El;OGX)b|z>0?IdCJ0{0W*xh_Jv9xE$0&PPI3wGC|^5>
zHSkt=4vSR<Mh{jA!chZGSY?G*p+KI?jMEkV#6lIXP-=pY6)sW*%(p7Ef2s<v$1IiC
z(W7rt@%39&`c|d#(Fr7+s6H;!eW{AD40{y9alFDOpH~^}xJ}`GV^zF^*Q;mmFe~F5
z)bo{eKIWon>7!NnatHOyxLrL@s5KR>_h8hY3+io&sPlML(78AjaXNMafYLf!BJG^4
z@MkcmE(YEw$16}XtAG)hf-=7I0u?V3zKjpsb^cN@Uf5Y(Y<sj!(O>)p@@EDPVq(wF
zSFn!4&tjSQKzS_2{5PruPu!-S%Pi4PnV{l_D+7TSCvpEA((i`ox!_CLDt@um_Ag>9
zgc<A&@$mCfwbiy;Wl;~;wH2!byR90lw}O$Nqbg`8*kcBxTdL<_C?&6BJ{YIy*Z6HE
zgb(K{N&a}d!VmGNXLm?|(=ThJ(pO)?>I)BSx61ket}#PGj?g(yMc66Ff6*Jzri|Zj
z)##ihktv{aZwa^Bc0QIpINk}X*Tl=j8xlcNFh1O3{0T^69^P%xYAkI%*F{Cx-Bvw!
z*xkjlVVf;cr(2>PmZcb4Z!uT{hMAE|9aPQl&QbYXuq5lZbb~LvgwV7p(v)cbRaSo-
zU=?u9(qf-g@aHm-*EB36Ne%o~+wZpQ1!K8p*DBTXMN7FZTN2x9b+^}kwkn39I>#^O
z-ogEUgk_l`@oqm9TYJr7XosmXOz)?jmqFu%h11nDysN_RY^Tl#EVCqc3dV#bQKi-*
zwY$ez@y1wBIn<7FAc1PmaD>%1-&y^*w`Jj8n}N$lK>u8|m3Gu>su6`MqvQtaIo;}(
zJ}|=htr)MGBL2K$<S1&z3|*L{=zKE{=R2ek+Mm0@D$8=!SZ&nX65SNbvYmj^aR%#=
zfh4q=Cb>XmxZ7f2$2ApRW+_=b`XV#5tdn}4vzl<f^>js}{v5$&iRc8{hF6NyEe5Vx
zy<(?T)9{XJV|OQ%6_wDsUsM?n`BgnHSd1LCo|i_d{5L}>*#w`kn)a~O1Usv(HaceM
z0Dqi~87hW}<QgCJsTxl~CujU-t0qHWdSR=y^_HbufdEVLV4O;S24do{OIO<!N3H&U
z5L|G+>2M^>P|`uGZ3kG&w92b8@S%@zf#=Yc{M=z>{EDRmpIf$l!#@2qE=fJJ5A%WE
z<l2v+W><>dkA;H70i1Fc#>0Fmn==FLO5^tHslEFPSsmQrC9Yx#H^h9!{!Eg(dy<!o
zhS2`~STDFyqO`K+bx1;&_G2)6LlBxLh{e?I4gDr_yr9(!C{h3wzrjaJ1RYa9lkDGQ
zau}JoDk$=AdYeWIgxa2d{X~y;EvDqZVZJkve1FsL$L`vtzhU9_-ho_O=X5h`$Agr3
z086iD>ye=3Ak9C318t8>zVBhzSILJ>_k)7tWq=g9xbZ=H4N+T#OD-GHYWINT9E41#
zYasW2N>aV+ir4T?6;=cY-<(p*3@+_$q4B_5y-o8hjXS8%?)-+JuXi=Ychc>t@;icS
zYE1+&U!*e!G4ek|-yPIDcHSvG80dp)a?N5rBRI9vQilQe+cfM=9A<e{(CWfZiAF2l
z)Z0X#mHe;4&Yi+^mM4{9k6r-@N1ji-iPfLiBw-xtsCIterpUkRld;L{`@8;!L55J(
z7wqA=w7C*)6sYP*3G)(&5?+jim&pYyW^)NB{o%>*NW9twR(l9q<;kp$#5-P4l)i%I
zuM8<RN78GDpl!Vbt?ubmnOs69T{8u(7NFRlmP(izOEIrNEaN0RURJBH{#AHbpXdI5
z06o<W8(nqu-C-<8eM-aLf?l1V>2G1ik~)ZPd<(?nmk+<C|ILM7wd#nz(DmTU;Yana
zT%GN02761P3?r50fNAKra{!fqm6ZIhr+LO%{JaSG0pNv?X$$BE=nqH&i~$q?$^f?m
zmIKxSwg8?6oCjP1;A<;dQ+ncEPkX%xKNkV+0<58f?|QDoWOe5a-WJU^_QZ)2z%IaE
z!110h|MD)@R$N|+K8E?Ymy*?^_!ulkE2=rBm$Kn_4~sv&De*l$ADjH^-@`IYp4ilZ
z0o%bnYA79J(r_X54v->*q`$9sZ8S?r;d1p4WxcP5H7XFyV7~jORip6|7(>s78?Dfh
z*T0YUFA*HS(WDkB7t^`-(a`e+ZNKA(BM@XBhknizG+*4K7Lyj!h~t=^E)}%;gfuIW
zmC4<LR@W!6u1h9uXkWO|5ElAQx_lf4b1{XV&~L*T-(@FYF1AS$e(T>^6MuyEpU^ut
zc~VGx%&Y<Bt~GklYiIOE6!(GN$@MCY_yFRqqxl~|cVCr=%7(CTUZu^T2GFdOY-(Pm
zQy-wF>RKILboB@5>h~1+AsXj>>ir>R*j>RkkLPU@aYpjKP6WWKXU(bjLu_%M74&Jo
zXfB$ze5gl6x0Qrjuz*x4vV{xN4%2JM)9fiEb-68fqU-4rO2n&v>~x$vZ(CFRN%V-f
zDETDBbd)BZ)NhLRN?Kl-QU@Ru4?9jmbWMbG0}gF;7k0TJTGNG-*i_EbDD!J<M3;Sm
zjpzc2r^?~ZS}tL=9Kv#GON&oI0NJ$p6pC+4ubt9UU8UsuNN*F*GZy|dmof`{!7zt;
zT}Nq8TN?HeEM{Ap_z`NPZsm##XiHmK@e#WE20F%hJr<M~`?-fD%*TE0y!3xy&C3zw
z7-h&dO1!9`T&Mg)|AT8Uo%shg-@X>=<PZhgtKnUS{Y|1N;k52G5t9FGJ=ja*PNSl4
z()`oBALyb-PV2GJ$0bgg7Kig{wi&~!3&;G5E}zz$H~LXxjH7288`1H}F3wcF6vj<`
zMTneKb(B7od`9ny`2a_Y3bc<)!pcyx?9bDxGdT4)ntnv=m@1+2x}DP*M!VFjIbXh~
z8?JHW`&e(@05?^V=Y74itC%)_3<hV=YahcJER^JJ=%)AdL~8qqK3HEXm}R+vYmHk(
ztIOY<ja4=@Yw7Y?y*X|9MEANLrrn=F{npaKPte0a`9$y7XshHS_Z35^XJKa^5e#k-
zc~9>_{<B!qeu;LUh37sjgbKX0#>T{{$4gvB$KFldERg7MZgiGgq=cJ<cSc7TqCJ3f
zL@EjQ<CiG;9QyIQlH>Sr-o)aC|0A^I9Q1W4Z9Ru`30uNB!xyoMbWcFa?=@sx!$A2g
z>6hrtIV{-T+kk%Z82uvcjouq5`BRt>m4a<eipl$zw@~q?sD2_n_$d_l44wNF3UF2!
zN#=`!+POb&NXAb%=#)dl{)xliCxv!5GKj=W%&LQMUr(g@|I}N=BuX{w!XmT>!}!QI
z@ASBFi`hT1cKiVy{U_@49zJ4?74#Tt^%=$o@6mwI^g(7zNvDq5a{oR-OFq-9J1-P8
ze=$#KTR$1Gcfs722Qw)q@0cc1{O72`6w3G<M`8Su;#VBl+$gEoj%=l6pQEW0X+1~$
zlJ<X&_jX*8RU$^FttBhouTG>(pQC#x2}v0PW@aMA|4Z-K>@LaL8GsFFK(UwD`20%`
zbB(2{f9W^HY_=E<3o>I{-Sql3y@yFcQv(&wwjYImfwR9y1l^2%TdVkE5>TtDn7?Ts
zOPDt`Qp`QV;rcJIn)<q+D@75whWLW~7Z}(6gY@%yn*ryAYLAFC?W}~A&*Oqs)d!;W
zI${@3DMBJmKac&F|4_|&{UA6CIz4d^WA_>*4S&K~xJNE_!=M)>rDgJ`qw^<X3
z$_`I8ovXq68AfZm{er%{Q8UryDEj7Gy;WyZ0=>bPDu+$xje<)RHRa991WNxBHf(G-
zmAH)9#CAefZ!@!{uaIyL5eNI@Dk+=4a+hKr5vd(Sa`Og>mL~?5rxc0g`bzKEy`P{*
zqGz$SU~aM8Be-;^8Gj8^)gX~FzJkUkQsGz7*gT2BpQ=<^o=CgD((?z5l-V}#&Z&$t
zCA=GJ=f%Pv_mw6=NtFhtU&KJE3oX8gH+2#v#X%IzQvx+yo+mhdDMS^hly(0_^rQdK
zg^SSHaS}sapu}x_iP~L)o9`@WmlVwnxKP6UMU)itkVt0wC4Fr4Y(eW74>;a;9__yb
zq4|X5oNoV;t|5hsDC29me049BnuKseZKWlQR`*TKl44muzoeZ&yY|r0uc4;vN&g0F
z`e(`6QLe`&?5OEqC7b{?Wly1`{3K2P2DN=eNY1S&d#L6cJ+;x_1>Kb1{kt9=|Gosw
zK-G^(*wNj;l3531r%Jh1TyV3)5+}-PJ$w&4j7G;oYs>2Nf+3HKRz-{Y@GPZ3!b9CA
z<~<$pX;F5GEaTS&ufAQ(l}{x7GWN_?!CbUAdSFLw+-1mO6V1OY+O+C2#yEQ<O*2>{
zb+&}H_AnhqjKnzMN$vk~-?(T+wLNg8(AA8Fw;11}R^P%Se<9HvYxSOlS-MJ+uaf^;
zj4u967ruqrd`DDrtkz35mX%5P2etb*)Do}N{~MZgP-3WwH}^S1d=HP-m1R0atNx9a
z`9h*iv!u@4?Eg2~I#FV)J#el>n1ovf1g)-EdKD&Mt0d)$d`adNROG7UsOra>{iTH2
z>8Xl*PSdYooIRdSUBTg&!g>YWCSi5C1uN*XhV*d<qlbHSL(2TO9@}rG(A6}N8}BIz
ztNT9KMkt4=Cc@k(`3-4mN9-@&NiTkf-q(-Le5YR*Un{A&>C}$uLJ9ND9m;g<rNsZ}
zed8Yzw9}XFkuY!ls;X4cvj6C>V#{~f_i*9;8_^8Ih(U14_mJ%9M(pg<10#wg+WkEg
zDE{T6-@|ZY@aFnazoB!7rX0&Jr!y2cIEpVg<@CAJA0^+9aD*FZ)sOn9=uwhyvv800
zYZx=K7cnyE!jEv(S)~7j$19Zh6G}><jGv&#8^D%n#6)k9bl2eOpA5MA5~}$LDl?4s
zBb|F-Bz<feU2y1D{~76rQ}>@?IG0E|$JLLb`9JH`=1D<2uKodXUB!kNN;8_H*ZhT&
zucFtyNj_ksF`7j!y|7l1V#*GH=MA*tDtyGv5%f%Ru$L!M9RJRSb_1Qe3P--xO3m_N
zjd}?mEQ>NaH2YjwaJtNfh#&|53>993ga5^1GuTq%j$Co@KM2in@DI?B*WlpQ!mH)r
z`$^f3gTG0_Zk&Tq1{t;Z1*Z?%3)*q;576OX;FpJjQ@kHHK<!pJ9zI#3JEl`rKGef+
zpeesXQ6|%>Utxpi($QbhFBeNf#|D>6*fEm98QBhg!}@kcU$aPJI{rOx{dD7ToC9~c
zjP~*OHRkqFiq75rF$p{Vd>tjbjGnINsTi>nmkHHz=O0Sgap$L`DCN$1(7J(MLu&I;
ziRQTT4fLbS=or00(4p>p4<)({QKlPP8LgrDKDRN*#6ebY>A3I>w8L%W4|pn)(T)p$
zPQsxsyfi2&JL3(M+`#C|_X#yH+QY%sG%%V+zbgqHudP(@l?Fyv95p)E!06cQMTy~f
z?QxA+<6=9k5WM5I{~_THfdLJAMP?f#hKj<Bq0w&(G1Pg#ORt4NL<fcB7@SP{G0Yh2
z{yCDT&9Tj{3EA=9;f?7I#-A6w<GwQ+^KFT-nEoCPH*RwO7)i!Adh3B65rU&UjU?=7
zVMhsvI`GDgX~s8t2h%My$Ah<}RSk_)S5G?D&}cINH!z?H9N(QFVMi^4bGD6VM~ZJ`
zv^RT8RL6yPq;ZXmYO|44DOv_|?7R=sxkg4CvxA@=UyV05B8(R9%jo&+t8b)n5k`Br
z7D;p4i{7k?FnY$cl|+uSW}WOEN$*_NTlc$5XpXnevf|p_x>&-Fw}y>u{jJ_3{${~D
zwr8M(9e3TH{E^1}Sf0Kd2}MqALchcreK09An;Pvq^lrl9a(wdw3HKBi$xbVw36;bf
z9n7(UpBZq@!)Zrjqs_nv1+5m(xwjTc*m2HxOZX_9^PdCGd3+PacY)9x?>t_@j(5(W
z5lxKh4*w9e<Nq&7*m2AM#uwJuEn_K?-E!i4LU-J9TXA8ITUIVjxn&mr3GzV@c$s2Z
zQ}nI}Xh&1yy7&}H<ap%^lBwgB`_Q$fM&J0R63yu(KS(skD}O;5x-lsFak1|F-jO=P
z$&>9WdQdmoxMw$|XF3_<DZ(&%q3{6)c)&>2Fj^o~Wf&dJy;6qbyq}{ThSA3Lz{>{>
z1I~Lo{pc}n=(0}s21A|qOM-Kp_f001e2TMu5jw5@5w{q;A~}b;@4wPHuhG%{hN-6R
zJ*m41E?<Cbf6`+(r8w{iowX*^fuE%nChXX=^onV8bZw_IrqR`XsVNyh>#ec!5#J1E
z^BXDAap12|VKbw;%V^2Yap2(wXYb6_*-WBn_U7(hI^X{q+hee#37xH1TKNR~cBH%S
zs@}u(8trUuw1_z_Q9^w?qI}dGI^JB0btdoKNRNW0oh(V5{uN;a`Sv*68)4k!Dx<AY
z=v<Y$xbtRnC5hwC!wvd2%E-ixHObNFblZgNc=Fv6c04)r{EObFS&iTwSB`Anh&FC?
zJwvV-biP-EO5vI3^AdK9sH$tID@WE-f7ScB4%6K+u*au$nXSivB}pB7{F#Iut9hJU
zEl|G*eSNIar`g9sbsYF{T_)&dEsW)j&U?7+qWD`2tvXzlfaA=gWHZrm<_*0x`b%hJ
zXRpl8`~6$uJMKJP;ydpAW@#bEouf9dyNn*qOo`|CbKrk+86A2=2tL%I_Yhp@{2cMw
zyR&{zZDkB>Hdx{~?j882+?dSmZDphlY%5GUPQH_bL!JCN!8tQ-WN>|JqmTQlLAPFn
z_BIlF=uACXaG^dQ>HgBd=;A)&p)W7$9q4Qu1Gi9%Ddpkk-#p6c|15HGoPLI6<v4w0
z1E)V8qwB40(2U6*dOghOVGfdLj``Y0m)jWlh!7>Wg*z-2s^j7p(tJiw6}02w_t93M
zaVOLMw&)1+$kh&Bc$q|U^l7n#9o1Yf;ZXm6JB@1xdsitW$GxwnyW1J5jW!EfP0(-H
zApyt9KPF*EU4yf?oxH-S*}D0tBye2)dP<Llt6wZ~bX@%)Iu&cQX|_(#j;n__jR;Jy
z@~K^Wqo;e9!RzBOG_$=C;VP%X_OO_UvN{68fYr3SJtY39AwE9wKa!K<<KK|5a&El7
z^NEBVAHSW<IHPC$Yl3!6&~^zsF207Q#~JrWZx^+7rs6zfdy%fi8GYSvdBr=%KPpii
ze}7)Wj=#qXmhotW{epJf{Y$hp9?jZBJc{G)KbB~YyZ>Ioq3-?wU5-Z;J`j@Q?GH%U
z@%DeEVI7R>_zWq*@%FhAcD%ix&UApt%T49%hYQ(p_NyiAID6#+9cMp^rgVg|PNc;h
z;hkVlpK5CK>+pi4a=iU55_Y`(7L$HyYV_%Fhv1$5GFQTmyPpECbfYchbuwB<M~jgR
zb^2dWO(*znT%y;>sCI`pqduLD9x)x|P>FM!81FZLR?^w%(mhUSFUWELuk5#!@Gfix
zJOG@t5fIKVe&IxCXCo?pgTzsOj8{XRlrYOQ#XJv{NTcvBM#olt1zm|HsJWO4JFiO(
zq2w+YemqH2x?rN!i|+1Xw2w}d$mLjLQpf6ft@KHHrHio;JMoEKA=BA3uB(ySW{N~~
z-c%}*@J(2XYfqoNskbt35rjHXz-wHy=~P!^l&g~ByFtU2TFH6P%LB^A5_WoaP<B$E
zi>OZ#&F==ov_PV&O)$>5=;f{5(8$<fKHLMAsW(OTGzMW$B)O+C8+)redK$UdC5`Nb
zD(t0Yy$r%0Q>)$xt)-gY#$>$nccHiOy$g#d=lUYy1Jo+P7{rY>KEb%Py6g}5=pEo`
zoDQ>J2ekL#w*znQ!|%oK<A6Fq#9c0}6JRJH8&Cwe3$O|B0^kT>?_O*-zNTq20owrQ
z0co#e?;cPKi25sTUid58eiMG21$5en4SvAW^kP46*Xn71!$v*eGeEZk7?A^Z0j>ZB
zAH;?}U>_jtP2>w$38)1`{T=xNHUZ88Ivql`fTsa<fc}TE!w=XCz}IoK;eh3UBLMH)
zSoQ{Nd>iMtPUA=15v*|nwgWB#5{_bD6tD+y6_E4}jtT${0h+#xH$(yJ04D)$Yf*W?
zHo$p6!ZD-;>;YT_B)x~UfJ1<$?;|Z>9pEIO?eS_PK90nI^MGC_aEt-)0^kS0&<`|i
zDc}IW`ysBT0Bi?b1SFh9L4ZAgtAM0aC<t%};Qa`30owsr07?IV<N&pRgwr_l0H_0G
zRi6PjfRli>A43X&?SPAbgilZaU=QFbAn7a$0DJ}*eh%UU909ca6zKrl0T%%Y|3o^#
z9>5Vy?s$&P)7mwF&j4wdb!-Rlz@$0{|EvV;1JnU}!Ol(t@W+bw0=zhbu@Vr`3+)To
z1K=wFhXZa0908>DLCpZ!eW440tOVcz35k#_Afg}g0Mr5ITn7mOru7GY0NQ`f03ZOp
z20{@5EeAoV0AYhs0D$j=UkSJZD7YR6LIAyPKy3lNhJs1J&|zpYK&Km#2cX+ckSHK4
z32z+(W+vka$WB37fHA31YQWIp)!=YAI30lu0lh{d3}`zFi2=(2+W=Pp!_yE3909aV
z2UCF4fWf1|FrXID>1Jp>;54Aq7{me80dg|HC}4OdBnap>7EAz=s>dM#APFZ9k9S1E
zVJ=OZkM_A0AC3m}zXKN#0@eWb0n$1{bob(|V8BYiHo(S0G+P@mR)7M1kiZP+L05ET
z{Jv`;q?U#Y3;|Dr-oG!*4^Bl!;I)TED5`pBHfoMcBf3HK(;<mmh#hbU;6)}$fU}(t
zvopT>3<jfage`(?Is`_;z))L2+<07qI04F4=F(>3sO2I+5-Je^d=8%di}6A?p34DO
zQ1BK!4*=4#aenpo=3t-|#L^yJusgcl-B`|xf|7ybZh$nvM!?k!m$s*yOFMfD#2t@<
zkx>qq+y+>OJk~+W`R3*#FxL_cx5V6$=hr#Nb52_{Lo5mgoc<S@#*OwznJwqz{N~1d
z)EE$vj>G@pI0-e$0d$K&#7j^ti1+Mdv~M2t0HBql#(>jbL-X-G3&*`C9%4uQG&DmI
zU?pG=psolt2Q6zBnhSaC0Yv1$rr>qMrvYWv;3VxLj1FKE-~iwXAPoiI1=tQa0JsQ<
zLV^7O*?>yG8o&zx?Hfog7IaAP07N|oj110#o&feha$}}|HW9V`Bbc}q6^TYy2b{)9
z=<03|U0F*+hF|K2XRpt|Eua#x5wI6<77+G1Xn;Mf0jSYwzyTC`x*am?gzkp48=)VE
zmP0uKTL5KVqy@|btN|<qWC6ASG=NL{)%EMkKMXQHi-2c6lZ54fS4mHX6aL#Z$~+g|
zjb<bp{qW9LO|o$dE-}57j1`P+FUO}CT_X`Q{bso7cWA}UMm30$V~m>vw^S8DiS7by
z0`S-EcR^YBqwoh%D!)8m|MsdQpxZZC@mnhO?@0K)H&pnNQug3UcxC}`KWX)Uy`Z%I
zoe2M%w^R0j`Qv~(z_R`7IPLz2)pK(yPn+@C@q1J_(a+&(9O#ASGT{Qo2K?LsBf`&v
z`&GoMjp|wRfO`6OsAq4qF~{q@M#UdtonpVxQ-zOO&*B3r-r>j8Gx;6$^b?*aUpw{>
zg}79q68e+W^ELEoE}-T%^$ee)@JpbfjGvyR@aJw(&vXm_-4zx75emlf9vrUXAB3iJ
zcti*FJoPTD86zUCn$^Us1QXFk7{BD8dR84%&qx>r#-CcHGRS}p;P6%$34SJ8@oM_1
z@EO<v#<#L6GVXpA-VIC0`1rdZfBrG-1C`Nt>(%qbtLoYLZuPt%t6I2j{CgF4b+F>i
zRT-VIBy$1Xi}M@MOX1TkNfhe}fA=5M^NEq_IsX;NpA#(0R|(E+P~pQ7DjeNkJ=<MY
z_(UsQ43o*}c3a`X>lA$)M9TP6->Ro;g?gSTRq=~qLYbZcPsYzB*DbRURugoFbz{Oo
zSTBCID^bt&H(87v$0?#Lqt-P!;vRHRfITF(hr<vJymk6;JTQihFsHz_<(5?z78Lj@
zeZa*4H-m8%{>q}#{M?Gl@~ZqwP3r*s%ovUKKi&|BKW>k1M0ZT|HV%j6wZ^n+qPNA6
zb^KGyEhw!h_Lcd|%ejOYtuu@Rj28F?4cL-W4!5@O1r0d)86=ON6DCn+`zxJj9js_`
z!a3TbB{40i*CcO5L^oImWLH6FCwil*`HC|R1*gO@b}6_POoeN?xrN1LxoLi1S#l97
zg%kIfLgN#uu%M|dz}$RaQBjG1;T$lg@Hx{0cnAThi?mWs!_04Q!><73=9gE_L5^B=
zu}!$Ew-As~acO~H170`z^U?JZKq|U?BDG$8%%>tG{+O&uW4VRwa9VQa@G&DCsqF(3
zO%&5$jwVg^8nbv<0j7$5ix&C{(Oz~1hfn2-6r+AzZwG%>@a2A+V8^c&Ymr;-XOSRD
z7fT>pP&eE@^6q4BtLlK14q%J}>9Hs#6#4yS$iLi{;4VqP%;frV^Oz-v*0z_VQ(STC
zAlfF3jja|z<w9v2C0&1TC(H>oP+(grFu=JGS<<ZWNnPoY9B<69z4+ZUAUVlApo^8z
z2hf6<3eX~;oK5L+j<-elKImdo(x-S^G~5Tp!zW=`CPpCYT@g$l2YNMs7ZfjCl>7Y}
zZd_H$18P7aNBJwq_$n$#l$V!EZ?Tn@$C1HGJCbvNh?-g5VGjbj$`_l@N?~7slZDv0
z`><B#&heF07Ze2*JSVi^tn$))e?<lN305qX$zGL-WKdWlZRWRWr-Nvf{_<jE=Vt@b
z96t(6D$6R%bGePdkm~Y#p^lnC^QU^-@GEBcSu+*lTPi<y;%9YQ6JePfYQ&<#N{b^6
z7i1`I_K1VZ9XT>LW5TGE<jl+w;{%-b5=)tz2}<td@#)#<9Rb=E49_46sfDfHw!vWq
z@F*IMy2Wc&Zv#J`-{~I(V)}E7D++IA*^8X{dA?rTOi*;3Ac`<s!)}hrN|nxQTRtw(
z3$3c3&q<Zfz<^~M?@yKv7o5=4e%nD4mY{aDf414Yn)7a^1vEau<4kC>tv{(nr4{}(
zUkU4i-N*Ny76Iim30YLwI+S)yH3<6syxdvPAWnooo4)M>p;wit2=woK2yp2cC$rp=
zQYHlw+;v=Ntdyk{K^fXD%u5DnDefT>p`<|b+s1U4va-2lg{6fhvr2_|X6E)3Wn?Y(
znus-B4$1FoLz&aPF)iyw05cKh{h-tC>E5WuoCW)`6*2V8bZ>_+p(M`qHcvrDw&r4Z
z4<liT2YS_2?0#E_)y8+4Ranv~!7<5Aq3ltl&wyGd1WA8nQB}p9k%dM6l&V>?{N)u~
zy4?yd$UreSzqGiRds;<s@zE&W!Uu|<hI|8ft9%wD=ZU__);~ATSKzbc<(Ql@5N&mU
z$(c}DVCe(!0h1FNhgr^ET;|6cCaQp?L3Aq4^|YuCvn|^4Ad+P7E3d37OGlgWiFehN
z`Rx`ov?zyN|KZBeD{a!W2{xYZ=|Wtt2FoRY=Q*t4#rk-(34N^04|N2AmZg~;$JxrW
z2fl#i0p-a9dxh)=)`^Y;@Rm~P=6c}k#fQvwvj%uCoy+w$Z^$aNQWT~|B(iI*f}>}8
zTld(2SYxs#WG9c$o|u)MIdUu}q-+b~;P=O5rA%SV#AB+BG4u%HOP`H#Y8PnT?eH)G
zyw$q4Mw~`oLbL3yEvdLwN+K1$%F4yyi)*QRLj76>6uTPduX79S1nk_APRj~X*$Mti
zZv8BXAH|JW#B{sB>{C);QH9?>uW(@hKtCV)Q3&2P1C?CyKS4lQW0J>@8j+ixnN5%R
zyj?^HEujuhKB|Z91s_3pyOW_DRe_V}WkGbi9CT<^kF?NwglM|0$us%g5UV>-LY_C8
z=U8Djbxx3ZALI8e@TZm+Ru*C`G{K)=Sy+nUwiafa!?+$&>*7jQR95YhD)x##+q#`q
z=2Q(;*#5!1*|aMsBrT6wt1XgaXMa#;v$V`#5^OgmV-%q6!#swy+gDbWTgkl?Yb30(
z=q<lT3y6>XMJuF(s_`2(()6>gCu64Zj_V6LGt=nWd4x<7)r&ajZ(4<fL$AO4g1Y-~
zO5z(D=QEm{UyS0ZhkCQADN=kvRX!YGaHr8jKBIMu8Ipl|mFX(>3ffC}4ZfbOw~mwF
z4bfk$n?fbb+dwJi4~W~E!t;#wLk0`FOml1Txcw#GijFT03m&auodw)3Vduq_GMbL$
z<}bqF9Erx;65DANqkk6kB0=*e&31YXXg5BU^l_ds7sn{l^HIrXX+9p^-;}HuX#9ON
zd)J36`>f!3-&eie_$=+uH#(-jCFpXCGkj@U!s>;#7D)CV36I2UJbX!!+C*X#9-JLZ
z|5@r?fTLi+XpD(~E-WzqfX~TXDljIPL$S<;1C_XvWDp7rH;2AF&TrsM(igP3#P|d?
zSW;?C2rT5(U#PJcX@ajA=UWtmzbdmd9NQ3peSqItr1{^h(F~A_D}Qg1CJ7a<{{@*W
zOkC#x=xl{I%bip}{VKhA?wkVJQ0XmqFDjtMRS2&sppq)@o$fsa^eu;L3uw{;?*eyS
z0ll-pJHs8}r#=h4E1HBqWtoS;etKh}cWV>(3=2`_r;<g8x!h0h@N)}NEJjXy{Iqs4
zN;>JMhPNWeAN+L3ttcsO7P)Wp&T=QsqPe#LKXVp+dYkt?_wrd(ay#g|X3@8|gI+s}
zrY`Z8xa(%oTm1CSrt9zUPI7mfO>6J)-s;YpO-421FG9H5yV|{dHg#C)y~ABQn;u)r
zHJDAUmvOo|bl)<>A3TQ|-s!!^J#!ANzSH|e6MThG8N7{i=*B;IV_#Zo%yBhXdXJ0#
zewTN#JE4$L?nX9Qh4h!ZQH`=f>T{3x53WLb?jCOi9$oH5re_Q3v3tQRF6(c+9F^!c
zmo_X%s<gRetN?Rmb7}1ggx7$6pEt!lcob#c=k4d-G?(t==k~eu(tX~6vAgE-8|=Bc
z<+CwnsVb=~U#ul(TKvToQRDl)neK!ln##|lB6{k6<ds!KU-NTj5%pUMd|46Qxe^s#
zQ$*i#cuNtbtnyBE?<t~3R&mo7k$W}DJXu5oS9{01>x$?;enu42$E!h)E2fk`dPlkw
zis`;TqE1=G^z9#kpHobe)*!sRm|kCl@RnlgPy<08DyFAuAer;U6uZ`2(j^Rr-Ks;8
zuVi+WZ?<1mM$?jQ=Mqsund>q8O}6d-%wb}~3riN1&hzIk^i@`fe}LChgPtwtn7%N-
zB7Y7~?|e2s?JqXIxGWey2208kzsS#iCJ_IC9ls<W37R>CIaY!VYmC#b<#c8ZR&!UA
zS%dQq%O}$6nno?@#u~$Lt)Yw>Y!DL_*BJBg63nR@<GScUlh|zB5U@Y&6!1RQTBEBu
zL`ZD2)gkMVG-WN;i_7W7wJ70ky0F%mY~EWh(mERVfRXQdgI;_9o3(FnPAw?waU%-9
zTdgyCcCVEL?gpn_9YbEPe?#!NJk7G9Z%{F!cTUb>{?-SgvVF9p^7{j9AK#$Y))`%+
zmrUlUF=#<P&Y+I%52YX1;Y9H^A@TcU{6QA=_W&iXM=j4vx%UTb_h~9zZ<OMt?^Ek>
zMBoB_x8ArHoA66EAoL79xWTB1ep`|>YB0?GcM*<yX9M3s?HH|AUt3^g!M6h{9^R%Y
zAjP+p<wy0tZZojZN5Z!vnyN({ZALU4X4y|h1+J$Xun~<_L=!in`~*6~p+R(Qqw$sN
zG+lTQ!aYs#4;ftt{7dqQmD>jSeTSS|SPblmonr~7C642rPty`4jecmVBQb7e_KP|6
z%0oD4uuDkQsD>e;VK?^zT>{DVZ_*zIhu_n(hY|XPwmu9m^c{8HgwSCcvB~(WE0@AI
zV|RZZ6>m0rM$Z?HxTcCD*0FiCbF<O4QH7AUQs4OS?&N*eXht!Q;N6TUT=fPWYeR*P
z7-P+L)0j<15qr?#N1%uksq>>a8F34Zd(>!-(6}u|d#YV;goX2vW;(5U6iw!*9gl*O
zD`ai~CqGM0q1t|h6LxKOtzIFGr_+5~jHx)U`#DmY|Cr7^uxm{=v0$N1p!lsgF7XHz
zZp9ge6xzQPJp7qXY&G)Di4s?xz2MVCb13~WM5?2$kD+VCQ267<bq!)>;LX^r4Z557
zfjCav#8B1aXq$b4=C^s&$Vk1VzWZ?q`Aa(XI7&N3pCcl!<JPwsx8dEh728nX-t@#a
zD9lCb{RBe)rt~L_(&*PEuL<sju8*VwtR^NMeFEy%k1js}L-E5*di~EvY;^ieX5~I<
zc2-Po_6!>GBw{{5)1O4M<J%}tf`?6X`AIbUbgKFjLIt$`PiXd*G;uqs*3xHH4L^{Z
z8W(X+E$QHPBR{%_pzGYlu4MtOZB4^=ppdu8w*ytXi>h`&3BQww{IMLT+kQqncA(q#
z6Vmd4ZuX&bJJ4<W)3~Ql?;EK2DdS%E_de?L1WtYbLgt@A>_p4{Y;=toCd>G$kJ`4a
zL4njCua=+rGj!?^ihLS%-9k&AM(zGYo1aEq=TQ7kgi0xSr}0O8p6J+4#Oz7ec4D7w
z3N3jCjh0)$)vXE9^eOZTNG>0pdj=<6>L~eHoWQM<h)OcN7Fb6so;BKc{#DS)f%HNO
zipoN)qodD)n=v$Qm(jdYwro&^(M>f?qiN|bBhHmaTR~`$=XcTAu??f?$S$J{R%O_3
zYy-}x;@#+1uhZ7uu-#RXmJcK;Lv=4jK4(mbt`PLyT8`FJNPNQaZJPfal<XQ^dJaap
z!7Q$3c7RRp(AOlDQe$TO8V!3M4Vp>5=h2|osOov6XETo^yg^!xBQ=>t&pmHsnx^3S
zmKNn|VqZ32Fy4=3{}sF6XuaWe<2W4FiPv#BsGN%bio=nx`F}N@g$*p;XDp1&6*Iys
zuD-bqns3-|w1w?yx8L{%mPCI8+nEz7;|+wa(6GN56gf~jDE1?0*@5upRQESyIJ)J4
z1IWH59X((SitH#t=Wid6`_yPoaR-etXyf98$OB&<dK1c&Pe<QGdtRg7f5*7G2vgF}
zFzxJbgl>YSK{Uai@%g<C&>W1Xd2GD~5Inw4!hD}MKxYE52^zSb?*M+E1>l-Kttr-?
zhx2t!|J#kwZ4mnl_u>!);3|N>JTn-O16TxD2Y3Nc3%CdfTjA2;07C)UfFi(3z&1c`
zYEo|EfZ;a}A5CAr<?Z0E9Yx05JUkmkDf~P;iWczm{3v>gpLL_?V}AZHikcn46O*iw
z{ESGWTlwitqi6WpGL1gvXIvV^9L2L+8ja>>LK@w1)H~2UIE`N7XHpt{&Cj$n>hg}a
zhdV2c#=pbE$u!!)&$2Z7h@Z7_6#K3>)s;qh?;^M+jh^J^mNfd3pS#kiPc5GN(x{xD
zwaA{IXVd5>e%7VYjmPlR(&<iqdeiAmer}1Q#_!>|FOD+b!|1!#2y@}9P~peW(TZun
zF=)pUT7L|ldI=ppX7p^ZWR{D5O>KbhCcS5Pv9%um9?S_|q<Rkq`z1R39(wirl<__~
z=ohr?eT>n*p-Tw0YBXDQw-(y<Azkhp8gd+S>~7;|#&M%{_v<AZ-)Nbbh`xoW;pPwr
z*Wbp)($?eXlCkvSad<{QwL1Y8{WS3e%vmw+IzhpA(4`Y#V<p9Z07v@(E%^YvZ6nu*
zsM<D4{16Uto8;`+_HDRs1l2h}t3EWI#~tj+Ct)Z)lt@`2T)%52YzFtjHsD>VISE7Y
zIlXui{jclG8K;b&BOu50Pr=<1+Wjdy8{S*^Cp!K6xS{}$Z)pEN5!FvqK7$LsgSLJK
z?}}ITIE3%_evT-}^mFvpuC(l5hWf_pzWcE258xikz4gD%fPb%ta_{`To*CRLgL-5A
z&N%tMexiYU<9RR@hCS~%z)-+6!0mudfIWa(z<EH}N-Rp@dparBq-sqj{c?fTFq2Zg
z#B*0B-NVm)ne-MvYw;z%ukbvZN#prhmq{D=sg0$NzVeQCd&g3jiwMV!rMdh}7)#Ia
zGifY+%g?N_H1HChGsn`c{45(wuV3O>@>q)f8tFG8-PgcxM>>A)K{|dOLb`A8Jc)GQ
zAbuUn`^Gz(a+Y{?O04q^a(lCAZXII8WzjSIOvs{d>ku<7i^g9@&N*51$YtQ?WYMSm
zT$Dv!zV)UwSe~WPuiqGFTnp(;ov5dI87E{HQuoV7-v$eD{|I_l3s14HVjeG6vQxw6
zIW0q^=xw994>@;kvnO5n^3KafMI<ta|JkSr*zo_4nh~rR(elBqna~Jc2Vu=<ix#JK
zzj%kbXHKB^eu0D*O`v|i;<;u5t^E~}*)oAX;ODLh>sy;>`U&f2m^0ibC#-)Rq4N`P
zUj?2&OrVkcjL4?@_}Ma>-r;ArY-;8<Q(6wr=2`DeH;FmVNE$VA<k-;}siTu#@HBO~
zMw0IGq?#k)+gO0S!UfK77njfF8D!ogXkK(008+Sl5A8r~*S#-aa(SvFk+`^tXH?|9
z5{vi#d@ep5-NX|YScuw&I_?3~0xkk<=Qh}d-{_`MaG>1)>`1SIZZA{~bz`v$Crr4z
z*8lwR?>SWa^T(D`9rqir8r**i;8pqiApDgTd$FozFB}?1)rSLu8z})yOao9akJ;ay
zHktA~kZsOndXk@WCexSvTr`>bc=235nacUOaWcKc&+U`xCw}gkOgEZ%9-2&d^7G_m
zdXt~$CsX5Qc>XY%GMkyPFL^x>%x5&j^xR-xmCet_fiYSP-EDfhx_4#JjbAirozbQ>
zHyO8$;|_vw;tsgYno2x&8-v7UGzo+g6VTFvi=c7MJ<U5r1V{r>>Jalk!hxp6p`&p)
ztTl~p=IMjCOcyuvTxTXobceyw^jb4dPxl&&!LdRPWiSXAU@$3As$%e_K-gh$s4!>_
z4UiOrgQ=>yr!U@uf1)|~$q}lWPVu0223=_GxdE=Ga}*-w)5Ivxb?*GuWcb1c(!MB<
z*EQ$mV^N-OA|Ubo?L4zlukNvUWKm75XA}~giuGJq9kv>yB5U~fKYg@&J2bWa2fI5V
z?QlQ@91Nd@tN-EddRiMY473zBgU5ebu*S|rhW|IMO~ZB!AK1GcaW?^80Mr6#=^1Yn
z<#aHkUW!BgapR8}@9Bwag@*CtODc?qr2az>atI@QJlu_2y!>sv=Yt5uJ>0`H7SvWf
zJ?hBdOq`5d1Xu&u4q!3u!!x*J+Z~!a^8d8jY;|1ibl1?)@PFyIeg3Z^I0;660K`S(
zUVYly$82A{1*`JcVGR?DbtQ#ZWy|$d<QEoda{7h4!byO=g*v{nDAiY1Syk>&EyWhu
zqDs*0?W41y@y3*v&Z{aLSydu?M<ZlyG|b*~JMYBpo{LN;6=L7iS2VF?VIek(V25~r
z6Hl$qiI_`qeNB8U)8z?^vdKRLlP?eVv>j}(Sn`sY%{8|5g3p@xueUNsZwSPprRT$2
z(dxctQ{EG^bBPFwk&5#OmHrV6{3Vsx=hX6fI}1;IhttW$ItjGEt|uS<u(`ngaB5X~
zIg;C_Jv6@^FD@uvbv90z6qcZ@Ks?RQ>#kf_x1fmGMHTAU1TJ{A=?S59lsU;)ROQDu
zm`Xo5ln%)#x?PzhKKE&Ji~{8NjV+C|pv1^IyD)!VDzAEjuL`>+S)p+<N(-urtTPxa
zHGd#41nhE<7gsg8pr9Pbat3LW?aZ(Y8i=ICKf#X!c5_P0{ejHjX0<CFVfATuni(@l
z(lV}+aZqcVutEM4=9Dg+4BaYS2#);B2&)js+uM!f(O(nI_~`{K21aIg2qYe9OWCHC
zh0xUbBzswWWe6=G{Si#xkmMOdb(1|2k+QGMEYjvYPkYKq@nl6_M411+rtW#3eA<$N
zcr#Ib{(F*M^m*n*9=C%3qGS&8hA;nJp`$aALUP_`;X2U7nV!<f{xCiKHw|0)h?0<u
zOm|thjpWbuR77@y1>(O+ba{qnp1H*eb}Qt859ZpGm<QYVTDoVfDuE3){~e%f(>=2l
zpDQg~9<7*e^Ui7cuRldIPSxAOy+xa+A??%54u7|m|K6c?(>$drKmJ}T|IMWxw;*lN
zaKt-l;YQN1TRgW=-5B7~Fe>4{@pN>mC)bjIg&Ry0r`r7Ww^qMv>D(0Lw{)~8GQvWA
zK*fv__TuD#41dw<yBs7HX>GD_dueeFq!q=hp!ge2E{-C}R$90$#<iralOZZ;KAew}
zzlSJ#GRlzrZntoU=)p<Iug(gtrOuNOMUoZhD%p*+dm@_oq!oOQhE23N&BAn!|F%&^
zwx<<+obAa}yd1I!yO>bgILV{p&6$HQOn74G;sj4l<Q@yz7N0WYI<O?WvAM+G7`ikb
zk`z8RS-28fJl^Jwzr4qP_fR<Fr2fk-+#|GV9CYrg6<kTJ#@Wfrcus=9)$~LbaNO`_
zorQa2j;(2>$ITWrJPRxq@WDF##nA4tb{;1!+!h+fI4L?SO2vDf4rba_*=OPIq=}iV
z6kNRocsu^Po<7g8tGCU<EvNYz5OWm92K={<T!_-@Mh*94;cttDJ{#aN;3w#@F}B!S
z!iDl*4qdw$e28Liv~V+M$<3aHkzU@s#$OVJa}<&AT^0`4fQ?3#YDYjXE!5N0j!~jB
zl^AgHUmx0<4xKq~1<R;+Iv9t|lD}JF|2*@QTuxg!d{8dU=4Tqtl=0t7^vWo7G?B{@
z3%7vMM?o%9T3TBbuYit@wDa3%;Xb8_BW>pU<6RB@8&2m&fK%yI77FW=K$!zA8TP`F
z4gULxE)4fnsP4Sc!r@cq!?`=NS@gD7@qVE2;V4BoTx#Kdq4lYDYt~sfFLh=di?VS+
zoJt#peclvLo<g3qkc|uRO+g@~U2@`8%;WT0GUOn2K5pTbQgSltERB-}Q_g?q>1Yz`
z8|%>?3-=UFPx7?rt|H(C57?%Ga5Y8wJcz0+Ez0K+ggUm91M4u5mSvmm!+W8#52U^;
zV`AvlY%?-E0X=&l9nLn}R4+xpMGpQ&Wqk0k6lZSyI4+}d*aJlZO%4iLB;>9R4Mjm0
zLTPetD3Lj{<FLN`PMic4N6KiZ?S;k+r1*(u3%!e#w-=2rsHi9_Q=4)mKp9i-wALDE
z0LlWJ>bO>{l~*@3w}TIyZ3>*u*~fI6J`tiwh?A(ib&OqQ=aeD02eMd*V?(wSkX(tO
zpmRN%#js7^)8+T*34!!YF$S>mZmlwx4EhCRn2oj`2z!iHY}u24p5UJw`|{6sSH?t>
zG0AM)aOeQehT7*flyl;`IS##f-!!$o4&^|F{Ig4OM!+78?SV(P2SGfL7*m*6?gQN(
zB<%~Lk16#PSo9H#F#6#*?CsaxEJGDOhgFtaTv(BxTMiu}&$5Q$xZTf+v`Hc{A(zi#
zR?GsP+y87F{cDnma}e>AIoV90M<$!GDY05pGaH=m!la{tHaoX$Rtbjaqx>b~k-oGT
zbK?l1XN?+>IUdLJ$7VR{ej1q$!h+S(hS9xIy3*h>RAZ19C_OM^*oVk=^=a!QPm8F)
zS+qkAiL!FcmWCwa`r=kajP-gxAJYzIfKMC7V+1iKYjSEDXg#bF_u}_pi3Bybr5RUZ
zA9Ubz4uu7orIo||vwT%W7{KxovRu@8=?uDqyfF>=_pZ4VJH^ZlXI_TT{3&Le@a^DW
z2-QqM<L|{dY6!i^KR05~G=xq~F*}CiyY%?!nhLBpf`886fP>%o062ao16w^9_F#xM
z#yV&*0>|cf=*#Edlq!D)BLRATVZq4q(&9A#q6wAdg(b5C^ouYWLo{n<kczQv7S3yc
zLA$qafp;IGW&11ay2%I~I;G-}hrHq8{NCIUYDM1m;V0pShmd}Y*`nbZc+MdjJv-Kn
zAA<aJb2fzVKMBnqrXLDYQ%X+rH18`K&aKsdQeYsRfw4V!O(});PT)x+)3@g^o!-91
zY|TTLuo;$Owv4B5Z^5cwLOhM1X7+O@#nV00%z^H#czS)BdA)m1JpDAyywSZpp01y6
zULUp<ewy}A#{lAw)6I6x=796G$?2Imn+jGU%&jr>`gAkaUDT33o^Hl;FBE#_a1<#z
z<Uw+rUeBJ8Js$Cu*0K&=Pu*vjZ5kdzQ(mvQ9LY)S+0rgF1`Z?U&nl|SUr?D_>C1E8
zuGvy8HJ^o-1Nb$AN){H^_;iZY_>;PZt@wTh-CP1*cHt_V8D>n(+ZdE&jhuwJx5L8?
z8gb<6Kxh78rOFv6RppH$yPGk?L*XbKyz1m}a0lQruL|e3ZSI+q>`cigmrX}SWkF#n
zx`#c<xoFY+B?T6#T2+lRz&GVM!xFq7<@Xh9rPxdrYXVU+p3fZzj<O#TBf=uWX>d$b
z?E>14wF)oI%bkTq0^}MfeCZ!m1?3bvj*v1frrMHU+G<7Q{Fr8E*hysw^NsWS5J7N;
z`xMPSywB7?VJqvU>leg`H%rQrbj?(@{E0wS+m&tTm%g4p4IASi<JE=qO9}?qD-%53
z{%1sOf+wt1ki@~iYHF-TYr|->&ulR)hkvRQ<91a7(V}2>f@xqkK>LAdbQxKMZ-Yr1
zMtYvvqTSGsmB8R%paQ%{;ON$kl$?i`DfXyFLQ6}GfTnq2*)fN%s8r|x{^S^yWqtgS
zRSK`#nv2rScc9Y0&EH|+CM}oV&oj;H7hpvY_)TGFpXW?x1-+8sBu;|52Wd&bN)Cnu
zgK74N!=9%XNS;<&b(Fe#9&1c+=TEjX51h4jgtraMCWkO(nQs(q<LyFiC!8&sJIm?`
z&XMP(BDMCpisBR&!b}Ps%!m6(0Z)!}l^|xKOdHQV!fxOLf_KWZ-{m+CHA&GF-r-U4
z&+E0I6;-Hi2h~#eBk*g$j~9ZS$R-xSJlctb2K!)}$aTHNCX`lHmQ_{GDlISeRR)GG
z!-r$)hl9Lk3$Hzz8OkWU%0+brW>jM~k5y=vb-n4S0<(3*KKxF#sdTo$j0lfIL!?R`
z)%K`r7hD?55Fho=^)2uf7Wwkn97vBx2HFcjbbkpG3q-S-wo<X1JIl9#-%HE0`KtSq
zXm)6xVmY}T<@kBE_FoEyEY$h*-!E9v>J^MZR#Vf1k$)N*%dM5M#em#YgV%!5;q@z+
zOCMeG7~$KJSf-a>HJgXCKN#Ws97(1h*n|nF2_^GSjxb7ow%doFX&MZANq*U4j1g@e
z;wb4fAF<o<&(U(EQ;u$iM7f!^+EK<xl!~$vL=Vm-gO1rzGC4|#I_Vi4Wh}X7na%s{
zL&_|P@>^cUF}^IXoIC4C%MiEJ>B~9huqFXSTb=q91|#Bhx)q3)ff${1mg-H}bKt-V
zhWjb0(CmwW*JvxpZaOudYbIY8SOZ@q2J)udvXVLc)?iZ7by)+G({gW4nmQsGoD|uM
zi7iuU(p(eYTL&d)DkcNU=O?7n$8*hgv5e+~2}x6vrXpRXtzIvra-;mZCnmb7mStzw
zO{E&ksnQTXLEhxSXhCsfGowZ@>wwSA%1lG9GNRdt@TiIEU1wWw!q8X7YveH_qbYYD
zW}uafpyGKL*|vnS7*DJ4vt3iD+<5h(sJs_!CujFEUYNwtCI<Q$$Avw}lI*mIU!ks8
zTpVa}UI(6_75j_%+%Ep?_<Pp<e*7O0KU?CL$yk-+cR;$b<=pWyN*DMi@Y|^>AC#{h
zdZ2zrD#5`!Bmt(0*}gm}SaljK;Aj)M08oGqFw|{r{d`n>uD~Sj5lT<Rpg~63CRiw3
z4FF4!&+nh)s?mwn&{5a3`GVmoDx2ezH&c`X6!jOgW)VWuB;2UdRF&b)@t`!dSgV(E
zgb<RZS{pi4B`PXi7?NnrAS;m&LK107H>flPg|qqPGQ2jUm|H&3N+X2OG*4U1EmDcB
zG?0J75G#?QaH_zI<u43V1yz*!@?m&$?Sjhe8VMn^putv}GTtx>V$N%)5rUP*9t*Cs
z#GQu;9V|kiMzyxMg%DcMI$@k!LS9)4q=~T82q84h9!uI~n2IrT)pm`Zw#6-!&_sJJ
zanHv1A0yvDLFa983n4Vk0jom!*e-)a18I^5Ske|kZgq&#ZM#m0?3-lg&K_@Qsz$k$
zrJ?wff%y68v9r(<lpgE~!Q*Qd7JeJtxeG@n5^~4L%GAU%`ntqy)O|#9a#Gfa<cVs<
zgx@K&ZcIo@V(Vs4c*gXj*iy5F$<bjG*=kEa>PI6>&A4iy|4Sx_9O!jXw$OxS5gKl$
zQ8{f12nW>x$sT$nG*#n(vdZGTe0k+j8^f+R7L&aIE!rW9BYzZjb4@t?QfkJA<M4wv
znc~af{C4qAak<QZ<JrcbDwAm(qI9~*KM}<iw>_{ggV_XLG;TwgC#~_5+AvegUs!;p
z(`9E(+-v1u<kwQKpO!U#?5Od{cl4W<k)Aq!?1Ve|_eq#GDRo*(Vac?j!jgG?I9gdj
zo=P^bJO9LAYi*#3ug1%Jus>nfCQj34_OZ5RTWf>Np9*R2d{6u8r8uRUqiIR_zfZZk
z@Pf$+s2FhjH2mS4?}be1gmo;YVJs-|ISy#4<Fh;Eq$Y>bpkoeA8q7(Dt{b3Lr&|fV
zR;K7u0b)AbUvPnAf@xDiX>6BmX42~wm>opRN>=GGHswlGC*=eaA&KAuXNJ&{LuduT
zw0Z@yQ?Z%BQbS0(qF~xom`pD4{Al4OsZ0^AhTt*eAQMJH#GNxnG8I~<0!-RwD>~X(
z==*}{72<hA3O-`dEAu<0C)-T1%Y+z`YJ-SV>(%DCL(2{%w9@gE4>c`ErrCD70w@UQ
zc~;SclTM3NMUhNs!^cTFe=F;b72z~W5%&p^=CAX#n46>GDgC=n<zee)Q82B)qAduQ
z{P%4dFZru<1DQ6^Y{1>&EDo9URdM=QWye6sN4)gQ1D=@v1z2%j3cWuIeQcoh$*ZVP
zX~U!|xwR|JByCEcazE@F*9Hqr(<(jNtFL~*=8&fs7>cs}1Ov7|IR2r~Y!ch4fLE*y
zO$Sbu`Qj@8#m`H2V%h@vb+c(j7^mhfuEcNyt7T&rnBCg3amsPnKz3@}a)j|zvjB?#
zMQ;AN6F-M-gAqb(Fy^>7JH;Y2qfNuGML4i&V<6@naxFw|ahL+;P&@n_c7=aRZY%*w
z5GbEFY8~3Fklgm7Gv?5I&TS2rM002r=T?9{R+YjTwAkhIa;c&@9@eJeE@EVHj&nOJ
zKd*6aFJNe|QaIx#JH>Ltra7Y)n>NkExs6+7wy4gAPUK(|r)1?Ad)5^TFT*=~?owph
zcy)5d#$UAY*7(H6FM@7KK9-xe@nOvcUtO`#8lu^RHHuKN&>BnF__pXsl94qGwDH?*
zyft94@%^zDBk`?KmW|(M<7Hq5Q(+VM_BTmj4Nh(RNgHpC3j?nBvgop}va}!%x*r%?
z-1V)EFA{<szbt^q5~>|P8$Y%OX9OLxJIl&I0?tU3E6~Ai573YqNcLh+Z5;}y_}k3r
z3985KyC&Qe6(sfpquMN+n?(&|Af@oL9DG7#5Wc9u$rz5;Mf3NY(e%cx=2RIkOyPps
zgzrOtn?lLAVd$_F-D`@Lt7HPUY#H*Q&nYyYqpU%nQBf4<c9b>fFH>kMqEu(0zo;mI
ziU)*}^|{oCt&Tk<2?!-eXoVHnnUvS%?adIF$H3kTpO2AUhHOk|CbKnDri4w*RMpL6
z?2H5K_lI$rO5<(^Z$I!)?k}o{K&vlOi#gUz$<Zp#_ZH6Aif@-!7lN0@4dhk(k<_s`
zud;MuevlNAS8Ht=E}5xMn28pcMy@4h^TNB5xc#s%t0qf<JbTH_VHmJk1Y5}xLh!-|
z<mt#{x)z*9;hzTY(`m^Pm=6sNKK*x!`{5E-0sPl1u9x7Er6O>7So%|{`oq7a+I)&>
zOlpR&OtsVGfVE{&&R}+&w7gNEc;a@M5mcb9=ZCmM@Ine}zuoKb-1v<YrU)Wq#l-L{
z3J>KhEa}IGW7P_HZ8I5Qr%fC+LY6u>udEW74}P6^QE_q7;Dpe>`e9DMO!4!O$_!;x
zRbW9bp&3T%$-oB}(imGg7cf8{IVL+$NtQsarZ(OSm}%1S*_7%nX#U*asbjbEfx1_*
zg|gc`O7JS1s(@ipi=SM?OqJInW!02?Yf;8bt7uMQ>v7v~m<XF-WftIf4!ZqJ+Fgx)
z!{?l=e1#XgeWFAmq#3Z{;qch&qSHP_0UlKwwRO@|AyHuF_;gSj5Jb7Ga;mkaP9m5!
zXQJLIC{Ym$HE0-fRcON8EZ~c5rZ;`*Fvnis7OXuN;kk+2)iW1Q^bp*K%KxapI0(X?
zmGCS@TZ%{+ooXd?<5ExSG#<(Ehz_a;O~EthFA1YKdz7}1>0YQk9H92mZ{nrUmZhF<
zVyAtybr~%0UEsuLH6e3tcO4B)<D+AUGOQ_#upI>#zlb#KmYxZ#9H8O;IVRe)S`2G^
z6n`fVPcVw{(J=fRmZdu>s`wrm{Mjj{VR&Na#<bwvc42tpqZ-bwE&7X(cH(Ed<*2Vu
zX(l&Dfc+c{81U}pov7hVh{B4(Z5)WgXQF&${sB??!<$=CSa$+Z24nr%N6Cm%eb+c?
z-NK6Eyu5j8=_FvIHvWbtG74zGt`K^O4@?Bqb5A`QQU+*a#s|u*&YP!(EJe0qoaqoP
zT4<qhA#_={38d}+8(L`Evsr<H^VCXSwZqj9^@&Jp>-x!{jIAM7fOfGyEht@7kQA)3
zVjx`t)2i89LiF}}mR;b}A<U>dcx>8%`m~?|w*@n!7PSHe?yXNly1;^0RuHGwU^I|$
zPJLQXfs2AN<+%(itIg=jdNd>p(84B2Qw)^aV<){3v;eWCB04lF(6QNh6n_u;3%kKQ
z8it?4*1%%tS+cYzLU!-tX^=yc0X|AnmfnkLP#)DtZt`=d<OWl2r*K+ErC0<5V$&Q!
zS-HWY<dN%M=1qRK<DWbu%A+Conr((Hg6Xgm&+|!-aae&Z38pD&sf<@P<t%DAH{RjM
zqn*etyeZ5|9vx-uHu-sh%UB76V&~(uh{~svH<T7!Muf?^B`-&ATjb|-{2a!H$j;5t
ze3e@qj8O<pnn}?jV4d=4H?pYS!auFHk(Nd@ySLw-C~6=ZZdlC2K?~X77foPQO6#hY
z`ar{xm@+;R6F4$I#-g#B1gMr4aoQ|JzZpZwDy*~Nx;>l@w?`%H2e6mOW%6ZHa(sak
z@!=kY_f_zbbWX#}GHqs8`uaXICa@p527bss=7gd4*by12=$dNe1J%@44wTAaUjoIH
zX=81Vnx*iwZM;2n-<1-mtc=QeZ?Mo;gv%KDjxDvF2!T+~`^;98{(J3pb0F-#Rr}z|
z!~W~$AkgYnUNRpHyf=a#y3cIw-j#w|aLm}&yib{co@MV-@`fm%s8NE5b<gQa@2qJY
z(;>VSPGv!Td2_aWwn%hVk#9EkHJh3ZK}%{v4Qm?5c8|dCrExNhEY9P@gg6eFTjDGB
zR|K8#f>85m=ly0+ekJLCvuo#R;2>X1&8jR_%hZ`jfI0-u(p+IWbzX^6T5HhN^C@E`
zJm_|G=X?#apOBIYbAmc5`;mseolnaUC7e(H<kRMrd^iz(G#}hom0?Lq+C?dheKPU|
zbll(4?fr7cvo3-Z5YKMffX7Wk2L|3&1?a!xW+nP9@Ua%b$<RKpun4_5pY&B`o9Q{X
ziaKTZ%I78Xt(v|Gn%mxIOtWc1NUz`<Jk$AHA3BV!cG_){p3j@ItHzrhjzuk?W2;QO
z4zSAfgx?OyNf;+v(HMUEZk5@wT@EDVR~yOlVu-zYxD=AX^)9Q;_ETD-@_sEhm_bxj
z*#jOafmUz6lN=BbpJ)f&W)6+(FgJ^Ku14ebLd(sf6DX;Bnbq}iBjW;ekb>m$+3Na7
zd`qUEHN#36hU(JR8ndfmZ@g||ltz8~G>JD#Y~-4u9M4r_b}&k8ac&V@VvQMNl-s!N
zf<u1IdC9fhE@tTsoaarjV8TgmnXerBiBqykNmG(@vPPtgN=QiOon9oei(Kxcac)`q
zZTuuYtH&pS>s2=@xW4FQ5gdtY8bz;nZ4yaq*5LdEn8H_Zm|fbn#%vn?G%V6=+P?<M
z&da;A)jn|UT39yNXZtT=K1PqwRr@buNxkqnbbmCJ{TES(Ff_ydix`emoWb-P;zH>W
zJ@6h9%D4X_hW4;#4GI&{wqIz1rh`Hm%7E;G8T=8=5*&_9)in4e`!C`j2nRD*Hww)M
zH)#Jw@E1Bz3HvXi{g}`iWMyLG$f7pT8Z<bI@JK6+ikK;5HQYaG|Aj?OK^Sco_`5%d
z?#2a$S=HH^cE3eNBWZQz!3AI{v>2yupzHkCKy$m_TFN{Bfxn1_%h;L){vviF94zwA
zcj8@Ni|hu&I}q*|h>jhTD?q?z2L6`+AykBo?+q2%Gpj=Bf2kLqx;m7BLWF~x<W-0-
zh{5WJw?YvS*ARw%u>T@rH-*-?;pWhA$&;bsYZpTcJb6h?D6I@5p8PtPUadvkSEu1R
zn807enJZ|>Kv0XQ{ZDWLE#k=cp#`o%?^=)e+<dIeWje1`sXq?iXN7J0b7Ngjlj6Xo
zTbqRImzat_bWAM?S6Y37CpQZ+@WxGbY3=db{)=6R%T4S6z7E1^)}i0J#HaYmXD`rF
zdG&_z+r5nU<MwH-+_wE^(H@AK&sa8JsWze3BH$GTK0A<V56sq}G$a_Gotm2-7>3}|
zS(|<qCGi#Pg(cWMoKPv(<04;NN~^|7TiU4M0FPn(B@4s}fv#n@LH}<6M*yDl8DS^L
zu@XpK1g{1>NKh|6mXFZi>c_XwRpmf@_IQQII~+HZnQG=nm*eI9B^EHV5FFw-FEgW?
zW$1c)QUveIFD@u50W-F{K7!vxV7{_m#cgkO2<`Vfbh!1RO1@cH^1-M>`wUzb(Xb5|
z#V>-pEu!iCvp<S1!sTpc8}}9uJ+#4m%Z=N5XOVfZiS~{?2=&syjf{BDLhSQW!w1d7
zO%~cQye570p!trw%%rCtLKd|qEqEB=xMnnR6P`<PE!k!~>zdKOHiMqjoSu6G;a$z?
z&PO?36pi0v-dc^zN`Gw~+4u6EcpN{5PF2lNx_Y7)H$4vB%u>$_s7o%we#B$_3tz#j
zvYPgJZ$%%_Nrkt<1u}kIYZWfOTRnHTcS*V-T~v6LPvTW;-SboeeUi#J5uJcD_&<cb
z4_uX1wmyE&2^ui)yhlMnK|w)5{s;;R2nHr5CJv;QHX5K{P@tetP!iBcgCzweEoiiX
zM!V9WBV8?Mw2`hhTG&V<jdrzxMrOdYq|q*RrTIPUtcA_J-?{Vi^Lc#Uwg0ZY_S$Rj
z_wb(c)=d)b!1tY59=JoewN%RAKse`gNBhUUcv}fRS!B83r&6v7<)O35-Tr6_UPiK2
z+X)cfj|V&~a8uI>{Gk%}Kc>^$B;%(2nH9X9WV*=7TA@-$vcFWzO%)z_qs0k)^2IB=
zW28R*p3MsNs)BuE&3q&`?M{~TX5`|d0lZUVoFoh^7WP-U!v5R}8qg6$-y`|mzGBMC
zO(MG^@Nq}&7zOh%cJn}R7JhGnW%Dz_M?Hia76|W!#miXZ$YM!Ys~va4bX>7OJ1}37
zhF3qS6?Lu$)gsmQM^DhPJ<Py|RH4=O=W9%+qidx8fc<eKOcfl7+wc1d_UDV}DrvZ)
zM7RrI>*sV|d+@_Exvo;d_wT|taBE<%rxv@3_xiBhP#|2Y^;f5gd{x`8gZ;T&mZPMf
zf3Zw1661v{)HEkl^CWCoT)`7jjCrjplI$eqTbBvju&-e8m`;>SIbVBVg%7ForXmD!
zeScNVTNOzt+bUka7x5w=1&WfSK{MvX-eJ7RfqB9us=4daA{Xf#7ho=NMmPE1CFzw~
z-ZN0-TAiA1ED4OobQ212e`UQol5X|NEX`P{t6>oFfDbw=e=H3}t1nbMD{_Z=x8rh=
z+tnw|!yGVz=_)GoKyCuewrVMtBlXKnt2N*h1WSQob;8q=#mh@{q=&WR=uy&u|9IgH
zHF2MIP<^kYUsb29#HwH?FAbOUrtj6V_T`QB=NTXwCw?TnT02@9BeGf7yQi9ZznVA$
zah%VbiR$gA)CqIc2@j`BN0m6I@W@lu>xX?sF5zX3{!IlBnC%)m=1YgE>h&jdB)%F4
zn*Aj|P8Caq0tMW#hSWUV$Z<!>%83X#c9Ww+6g8FDM~vmJ13m>y!2eM}mt6-oDRzo(
zUH83eDziFW2|_Q&{`Sja(!2*{BndkA)!MPUpQK;ZIB^&Xa6cK7<OJAk7R3^$WBi<P
zUROc2I$5<YKeNu+_GhF*#x$IYP=0l}C}LK{3)E7z4@>%>`qJvhrQYy#Q8?}~k=t{H
zTZ?r4r|O)QY9wr5Dg~~NEF+vRSkXW@C36KVz<f%bbN>{PD|PDX)KYa=mh61BSPIPh
z7i-7rOCe?Ir5QR>=dY!qEOokMbsoo!Qt_0U)~s_lfJM)W_&P~G!iQb1)^dS5=Y7g?
zP=w3JJt*bNcIyaFL`y=JM!ZwnaI#wDur5nq-SzhBT%W+O*k_t`3L0<^z#Vw%fYxfv
zzxjl;-=*<jw?x7+Q|TnhxH?66z)iSWcdG(DNMx*$h7V)Oa0gvlzE+*D;(n<Yt{tD&
zc5Yy**l9=J-4(5?$<sbCG!~70n;Wb{XylPN<BZSpN*z&2y2x4gNe3D3!h0Q5QEbON
zw-=*C?!8mERxOu<y_nm(2p>XlHLcbVpWuRYUjJEIFj|jJ+jY5x>%L#4KC)lmK9=Jl
z9aLa7V3thXx(WF%l=N^NK-NT&OUlIpHf^_5`)T^SuK!XU*?>m2wdta9DfDK|lQh2@
z>zCz0m3!4fD|MY$`btN;!CWpQTR2tQanvbu)LpP?l<xoT>NHmodU>w0)ERTsIk#go
ziA8#aI#1tdX}GpR6zr-J-mi1s4$H6ty=vK`x(prFg0=Bd{?uK%{s*3ygkB9iH`Nys
z5H~r**6N%lt75w=rF@>oiM%M0D-oTzV{aYEMQ2G5)IfAf<4m3|&+sy6#v@*<A+Z{s
z#0{L+vtFH!tTt292S?I1lve1vKc&9nt#P9IO9?ow8lYTpI)>3qNqQbwy+9NzGwX7=
zsG;*T7=t#|s^)RqrQl87X4mTeU8)xAR>dPV6xV5o)mm==KNZLWa@BK1N2<uR*lVzR
zmYI(JR1!jdB<#FRSp&?`WRZ7khXsp8_Eh#(%QV49cyI8<Q7jHz2TQPJ!`16g;8e_t
zL_Z_-PlU3J^|yDNWCW__!#|S(UFt+3>YRO^lAfXQA_T2*gT1(s;Ps!Z0m^rz1C6Bq
zI-m?_$ML04ozRz0K)n9<mP-SrPl+$I>-KBbkQ=Ga;;F~)!#d(&T?OZLQ(CF=DLoG)
z`$~I>8V91`-0TC+s+c==Tll}pbQsRUeA-_|*s9)Iqzdr&4mtn4T4or#8_RJPG4Wwd
z&(af6`wnSmt!^@Ab=vJ}={$~nsDEOLuK&Gy=VHcZdYsXbCL98Z<wTvDZggm`r~#)y
z*Z&Qb_p7D$>p^C}&iSDFSTzR7K9Lb3?c2VD{W+n-R~lZaPSvF&$WToql|$4SZ>ot4
zG+wOK0CQ@Av=itcb6o_lFGl;0Ns@lJO1M`o+)xGuctj=2&AJK>e=q4buxX$z({_z)
zN7b7XHH7-=GIP}Y!%t{O8Yz#e$&(!<{k+BpXANj&c`y}QVEY5&guUg`v8%Um#ovWj
z>KsPv3FojXkcwr;16Zj}+g&LgrD{Y>_=)5<d?h@L;KHk*O9otKs#A??M><|A4HQ&J
zg}pk$Zar-7Rd4Rrz|yN0JDsN^RA=6c5X_3@sKQqfF<HL&hz!{5#O0wyh90{sbWTp`
z{+@u5+4prdd7vJ}+5@Dctg*t~xx%$~2@mU3cHb?|R)JN)7CwdUSOI5Ut^@46C~MlS
z0b+3Etfv+!&_L3l%PLwok5-t_e&aD;8a%3V+p5l2tv8z?cSwG9u#7lErz}wiSg``*
z=Mj3VSNfu3)-+H}ZN66;DAA5CPL~r*hx$T^n)sAPv_uU|9Xd6>x=O;KFz=$fx9VzB
zUnmXH^<S$Eb&s5M)JqTFFA6m2rf~uau!7Axg4KGGNmh#`>xk>7NVy`+IiHXsHGmDn
z$$1K{YQ55A#6M0LQ12ZK5;;k?(^@nL<xP3&gr4fvt?-RmNbiF-?9F>8NxjvoaI=Qu
z<P32-cU8buZ{14ucz?A_1B&xHDd_1V4V_YFOj55dc~R1bXGpnbb;{bUX8Q<JH4=L3
zl=Z2Fy7ll`q*K(OaiLdzs;oktDo0J1qvwIs+EH}5nDX!xQ6LbppQm7ZsBj=cDPoi<
zL~mR&)ET3_r2GxtY#OwF7vc?D+*<-xnaN#a{($ZQ1=A$rRWFtX67&!mUM+H$o&$F4
zl<n1JSdu0UuhdBBu1B>Sa60a=5}}x<>M)q)=AUwVSbvdFpA#C?<artzdol4m_oZg(
zcvwwa7a-*ebeWx+D*4U}g)7t&S0B}SbA{Vgky_mgT+yq2{a;i~yIrK=$P9ks0A8w2
z<)}X5idYWEHl0xC>B5166;Fl5P}LNv&R43d<*>#TSB)zdy`+4#M%Z?&jxt^zCA$7|
zwBTO#@&PEq9URr=c+*Fw#-^5PRo@xL?#JaKwcb^Wj(nN$VQ1ky4N!-}MGp6c{n^Ae
z_42@A$#7NgJ**>aR_}M*A^ADzjXT^6CuELO?@mx}Pr}GB%_etU=W&0Ra%)GHsUBcz
z12O;HfVY}78FSCxz4mG8Xf^g;&R?mU%u2M1;Al$1RQQ6U;Tv3jb%m50UMKAPnA8tZ
z=dM&IJykC09lC6j$|g&Nn^R@88P?@dqR!H*bJ?vXUX6T?{aqR{z4h++qITRmUD^xR
z4#RcXHK@s5bv1RN4IWt8jS&+s7tL-yA{l#Ui=3fzcNCiQNOLsC*I~KhU~I}5CFO3Y
z1&4JioORpH!6w6dOgNb97r~-z!AdwD6eu(0A(z+Xc7#lxGjBa!U)6p7Jn~u7JoV-z
zEGHB-wW<l5l)Lo?#90H6I~)srnR-1$!C{1KjMbF-tjOmvAp80c*X5U_%dAUH<ykHf
z&ig^B&^$pD@x&C^1Bp6qv_`~MUA}ecQm!^o>cu4qpHSypi>YNxnAvCGf2Pywoq;;i
z)fzw&VP78ksp--{r5-$v21^6oYWhG8xkW#ga@F?82JhFY3sl7pYks9#axEgM{rn$-
zBNJ;h=r0ZU>xsw{%Mts9sdS1cQa2J(m21&4FW+d~a<QVGIt4d%q}_T$<If{S{?Uh}
zpXPY!$2m>+|3vMu?*Va=Qx#I60il%@D^La3>T-?LRgoGi<=Qo5H>lijpZx77P$S(%
zb<QN6+Wk5J-{*Dz?^cslV3~1;rMhfdouq?4ja(Pup*;5)>MN}p`~BfroSvfxk#^mr
z!VxE6eN&T0)?`dEFW<E~wKvNOq=P{O5;jqwZkJuU8%Aq9h#M#QaXNQ5?-n^yM|PrE
z<il87+)kygp8YU6D{vZ?<@DW`Wq=J>LTezGxnqs6=6QNDao2S}s4+iIohVTc9Ibaq
zLv^0Q`_(yf)Hz*o9`HbWseYnJ9Ck^J$<%?k!j|1%F6oI98Ou!j10=y!z1bVJu_K!T
zbr0}`rmXpX4NTrTHAgYWoZh4(D~cC666JZ_uhvbiPA#-s15&M8@;s)-zWyt9B)&n?
z(Fq^n^J=O(4Y4=Xw5QaxHr;LuRKcsh;;gmUj(MbAI+aDRGb_4QEt{yma>Bq50`vNJ
z)&SzI>vRASjVt6#6Ry>cYvV-jRfU>$&aZ0xaMY<ucwEb=Z*{4qcBhFVH@_F|#oe)e
z{U_@(If2E@-s!6mD;aaiA-O>n3D=|5Q5}g5TC$>f8QP$h>+{vn>m%vm*pAsE1G@ZH
zFBb*lo=5!S2Da-#Az9}N@1M7)SBz}C+oeLg`bft**)1zH6gxg9`TiOQyzdb?NjII;
zFGYb<dLFo`k+$|u*q^<>K<C6&O?6Qt*kL#qYn}v;V#~Cv=>xMx!2<2559;zj%$i@S
zo0zvccenaVGunwpxj^lwvP`|a8BW8Y^y&;T-Co@Tnze&!<x`8LqXd*^g<93*t{S?1
zb^G0~Q#hC_<$J?qAl+GFxh5|eKv_nh6o^($Pt4<2PT;d1jbzSh;tXAOP0*4pHURI1
z(M&h>gj1?M<QysuU!5+GoY^!!oE|Iv><yOuc1#T{R%S}nh7&6!qfZ-h%#=CXu7}D5
zHLXn}WrdF1KSml(RHtmfro#h>*4^%=`b_k_Qm!3Sz*AM^%lC$u|7IL0IKf>tY)1nu
zx5HG-U3yqd#8TjqZ`TG7!-;sx>MT;fR5z8>XG9*}A$(QY@p+N=s=St$E&4a*lt_l}
z$cYErDtEkk<S6y5$kpX?lxosY8;y}b2a`8ei!a#oC!(;ocCcMfK90JIQZ<e^YCq1r
zeRD^pchc(})~)99cP77Y&AY?32E*C~90HEf?i1D^{}c?q9M}Qu0S*F3fRjKEa0PIn
zt0%0}U8~?0jX(>~Mz%4=c#8VKy42eVCq5q_7>EKE0x3W?P)<!BSnu=CN64%Kb^?t+
z8*m&r1M~ye0TbDNXPxR?j9|G9*hRi$jj@#SJF5%Djy0TIgI&=)un<VWD;A8g6ZYVe
zf9&rha2DtR_?H-mT<NvnSwkInB7!$k*YB+J9MiEe=aTKD^}b2j@bE%l2e1b?37iFb
zfFXmPJZW9x+K-dZb-)GNwFh;bw2pO5$7L>cxEYf}uHo_);Dj~e2JoZw!N5Ww1y}=Q
z1BF06+1!nZwBnRC*qNXC;^*mGD8t<tk7rp++>Ocp+Yqj6fL*{Lpc6O;Tmtwd4o+|b
zH|jfO4RYRt%Y(oX8bqU>0oYIDfu+DIAQRXMRFUsEV>Xq4X!WDwafXX?C-&ZRz$L03
z2cvd=X!UR|#|dx;P*11E855lioPxc802&-;1Vy&t@;Jc1S=<j?2RyLlhXJtwKYNu6
z6a(9U8ekU{d}N(XJH{LQ&ZOG$#&p*ngrgzA!5hy}(8Z6e<DIK;xf5t4bEoxQ3idD}
zXlti+sjHU{dIutarPSSNolX}#41-)wTZ75)G#tk5!)iDLbOPsqOTaB(n&oLsqvF$6
zALlDm&>i4F1<`QNw$s*FTdW_n1y%sLKruOfY@JEBqTy3f{+KKv8z>}O3~UoI!7#k9
zKpq0{W(1P~1OO30Jb;(o#h~rRkF7pb9)qEJ(DoPuPl8Q{C1~Jd{Ccs=Ji{f~2!ufa
ze$jjc5DzQ`GJ$+xD^LaO1X_SL;5cvr=m)L?E;C?tz>ivgZyoELi_2nQ8=d~WHHh}1
z8#+D_CUQGt4U3LI1da!m0+~QQuob8R8i5v|4LA;50Qvz_2s)tzFXNtMCj@xB!NKGM
z1OrjPLSPM$4HN?9Ks~SrI0zg8P6B6vE5Hz7%*42<Bi0z}ycL%_fkqnif@yEX8Xk@-
z!c6$0rEi=u)pib-mw;P<Q#d<$ZI^XCWyfJehq|n8&O=BvW^r?^-p1H5TX9(h@W$<s
z%CXb_?g>L)U%;vLDfZOVOk?aEXgu;&rZM(i`a!%monMf#Wm7J`PCfDn<@@$3_B?*c
z{5xfzShu_S=c6$^7rriIbNr%Bo8p%(kB?7HNL<Cgee?H`R!tPrW1U!L*o&s|Glh7H
zR375QfKWYc!=_DX>oc>~=R!;!dG<2?hdlhU<9hshRy-G9HIkoV&yVN28#1;Ojug0^
zpKj0R?8or}_)Sim?Iq&FXnBt{ux#X;k`ZRs;0FapzA}X^MANl%v^J7=>2}^o$NbEr
zd;=gB=9TvBz1llP)!TI&?fJ{RmdC^_+O#Pq2A2Eb9VCA60X(Ql2fnaQaKYCAMt)}Y
zX*%6woiggSTuwK8@G_SL<lk$Zj&~d;gB&;Cjh<=nGCZTE*Xrc>yj|}45~1X_$>?aW
z^-GK2%y-`W!uqVE`Ed&Gw@$>1N0a-llR<e*-%@qI^=`8xo$j}eHy@)uPV^_o0qbPP
zW+-c>jsfdLJR~4f*h$F{&E1s4bp4&m0qa;ZUboY58P%q}(|+0dnA!Uvf-UAbZF%D2
zwEWHZ!ulf-w|(FSC0wyC$4ltyu2`p97EhvguUO}MF2#|VAE4ErGRzn;#lvK~Y7H8P
zQy|=1epgU`z|e%M_g}TfSe8zr7p__#!rSQvu3CR>zC{gRS?7|+i#8v+_LbEIx($D2
z9dCc|^9kp0Gd6A$u*1nzTH=gruOck_Hmqe_`}E*H{fF(u{<=p54s<U6MlP}-`j}GE
zId%Rcf85H<_nrJ(bo_4>3Ub-1s^9bS<9`<UA*2|y{b!V7cw@&uE@Y6o*B0b(?2wy<
z!g_5RjQ{<^N>uQbZ33OSY;(Z$^m18KhO32)FKp}nRoN6e`h_jXu^u{vlI;t;S}7uQ
z)4E6h>2Ei27oK$N&M^+=gS7k4sJ7~7zqhQR@Lt<;^0|aV&A(9UICVTbW<2e@WOH-q
zxpTUG6f1r5i_$W4w&efIE+?B$(%L>Ad>RIX!LOj6KAZKQcBbmxqPV}HLeK3A$NuuK
zwVO-ce?^6Jlc8Ao#4%Zu^9wiOck=$FUK8o}E5_VOIPL^F45f~_gAPtZ#ccclLCz!p
zKNWv{1&^IPhj(hjB5nh)h|dG79k_lb)eP8N{^`&rlD*b(T3b72{41AH$ykm`*S@r^
zr>8F4M*q`Flg*Q<`!W<fIRzuLmwn{R|HF3bJ)cm=m#AP$k;b3+r|qhpe?^C{plpEe
zFHLl2$(Rka<f_f}Usc#b!}%EVk*iR@?V%C%=YI8n)WVpPVB*@Z(8=PvFoI)W*$V$@
z`)Zx9sp4yt4O`9T{MxqppO)2uR#DzHl+AWTyC|Fa%HaQ@#bmr^x3&~&=MO^deGiVP
z{pFy|DEl8S&nvZ=<i@0Ts&HJrtl;EUVgEki!=DMS+$FqvrIbG$Byyy3`yWNlsFVEt
z%_8^BW5%sg$48RTP^<;l3HufZCp;^BzEU_cTN=t4Bl5~y!h;RMduK>~bB4&aZs9yX
zN$=|r*&jQz{Z=heZ?lj7UNXAlgbQX$g(MuF*)qNQ_k+5hN&3-Og&Y1|@_nBdKJkjA
z52|ebPf4%PkN28C6}bTA>>t&1*Gq-rLCJ`FS~wENJ??n_d|_9FWS0BX)H$Q2US)!$
zJH9P!t`Uy>sqp?^33qRn@+;8>w=>wLAJvAdDSdUsT~Lb)RN^GT-2IwVaLyOL`kZjq
zZsFbfQO(-#MJ{<!_$CRv=1M!OBZd7FCB18fcGQ8JCGMztkrvdu-D=z@aeB1gEf1>m
z+&CoV{gKD{r?AN}H(eL**1s1F28o>YiE#MK!g(u%2g|fV(_~3F-7nniBORai6*)Cj
z<ix*-+;>X2C{H+CrzCNXluLR^cv#!p{hY`v)p;FpDq@Apl0zgT@-azhP<g+O$aA)&
zuhtJVy?sQ!p$f0WF_|0ke?++SBjNT$X}3rRxEddNak=PJ$xoIkEi;{Okc|C9lHskN
zl6BNbddV2!1Z}|oQ7Lyy8(z6r<dwS24y#ih#l1LpSh+>&pO`OvUO!3cwNEwHf3?p2
z>2*>dPZbGQZ}-Qcm>cpuC>*KFC`XrV`+rG#xCR=>%Oc19PS{)HgQqH%_o1X$vooT9
z)4-p!pf<cVK@{oTDCwTQBKzMId8JMG#CYLK{jh4gF1y`UDSx_9IOGK>7g#M^q^qS8
z%Mtz$6X7#g=3(3!Ghe(Q+@+rhwLT^J+xJMizfM_(I$5$h+4i4HzI(E8lR78=k`1?e
z`U&Bi`WaH*0$u<9Yb3*Wt8l}8!X>(H2Xzi3H;E$2*Cf9hpDS|1eQKHBcO>0;s&I$K
zlhr!n(!-LTdQ>>)SFpc*#OgfT*NS{tS3%P+MDBfAI_er(xA`J>>qtvfZrCN|eG4SN
zS<{;kD7c>vm1~vHm+3`Eh!k+vin}j~y!SojDB%-6(y()-$ko*%n|BJA=qImNHQ)^B
zKwR-*3oFp@XW`UJDPPuPzjwl;lxo_6Vkyu!MJueAg5kX)m;6$=xkA#bpBB0Hq;Taq
z;Z(JltxM$O{lZ16XmyyqJ|5N3ZSki5A)+K(3fk0^?SByYv_`%JjKXe8?Z90}QgoLn
zbY4xpT_a+*`bzIUsW+?^j8116^WUr?)3H_>@Ye0Mp;jt1tP&p3J;1y`$`xt(-9aMP
z>In0;N%`6mtv4hbt<JdDCgq!lk&gaN1Nl;*t4=cfzY+O#p0I7Nu)BJ9ZG*_WU4>U_
z!@W9?u1%7@TFbYqkCa*^y?}%RF$MPZ-=y2@(dQ+@Q=P_Hx6$hRHN8d@9R7{S?HUIz
z>KxZSBKfItn!iPOzwRyjKNh)3Ep$2r<L9|-R|R&fbDU3+hN_>C3aPr^7Y&G<F<CgU
zP-We0Z0g-P_$Y|mtz9VXoY(bk*111@M)I4yVSg?-yiE$4%Y~z#lJpxpgrimC4&4O{
zo|JOQGbG<%H>Xk`k#qELdNW;Q&jZ30YT483oY7_4P>F{WaD7o4PW)EnBHf(wbeTo#
z_G;cE`5hY3DuPtrF7><gD0XV6q;H=r>CU>`OEq+tb?69_s-$3uE~{qUE^Bovnl-ST
zcuUG%oFIHdEmqwra>IIIN0kRvp|u*ytCvcCyKG`*rmS?)tfWOUoYlK;c!|7nBx1cH
z<#H}cdhdS=uT=|$=xMlFSA(za4bFP<Ii*LrK#hdSc3;E%JL`>xEm#^{ttXih4J3j1
z+?9Q#Pp4o&G&dEfNgXv_TvSDx6Q#UOm*Y{5m^X7IeZK~}8*3r+`Y+;x061Ld#{5$$
z*raE?6ZIlrTqB$`C_MaQt>+_rRpUm3Ze}66{9HBiwQrDmt&a+y{zCs$TXakk64X>h
zOGJ*FFYKtxt4QZ|w+0s1XC*&cBVeV5@=|Tb_a({SuTFkKIZ6K(z55&8|J@^{V4W6l
zo~|9MGhY06k-hcaux`1QKQ8P!M!5HevPL@3)nb{}65)~}$-klJf!&LA{pa13jO{Q9
zPsRR!6Hc8YZ2nw$_?B>7rEt>UH2r1a-D)ZCS43{OAndwK6mZu=tE)!D$g+o}fWMmT
zCeCE6fU8EVq%k5FjhAwPdN#CaIp2wrep5$ieplq)?}S@F6wc5+A?~NrUY`!AtU$fp
zUqkQB52V6?nl4W}>Ru`7j=C&QjVOX0gcbJPE%_I9Q!2pb!s&y$dp7B+aaCvUk{g;b
z)9Dx~Sg}zW@YY@MhR*d>HL<6z|6cXtXg!MU*KN6brqoX|q`edN^F4M~b=ulll3(p3
z`DVL<nEzASK#nfE-O8z1QXy_wc%?emfGX0iPB?Hv^81!Z!x!}cQ>!7oSyzGoe`!0<
z3+J&VdHv_KNy6#^;SimI!6cEJI)qPrEnIi6u<t@)=V`*puZjXqdWUo4eNESc%69dQ
ztGSY%gM)~D|IhH2gi;Ma33?z|sdJs7u|CIL%I(!ic3QbRMbcAs8*dF1IZ>Cfzs`Bq
zeUiRXBWG%~uK(09$!PMH5kzZ9cAh43QM}~4ZxOj&ms=~W$4=(IL)cfHG*M%`?<bOe
zHB<QfdEwn_gx4O>^}n)95~2?ax2wsTblJ4)+}{{Ea{X1xUDXjK>UNx<tKvq0<X5UQ
z-&9}d)_cI4C6a%>EM7*`(Ig3x>aEqjl3qGl(rY&hx9Tc6t=n<MI?dOPyVQC8mq~h{
zr?ivPEb_1_eo^;;GH;!$rc@~qt{ryi$VyeQn^tY`C27$77m?!%MLv}zd{mcTp!!P1
z7AcpcJ~sF(k*janxy%%)%gIq=xw9(Ju7St=bE(h}B;5Nml_P{}UlVT9^FUvymeX}!
zG~yG=t6g*eE15C>D|Oxaua$}++Caq{B5$_|cd7Gv>Ie^O;JTS6`R-%1VRg#ZB1x}M
zXHHe;%hBEPD6cB?Z%Wm`(C~s(DA2&NUlqEjtK#Nb?LgB{os;qzQ-z2BAl%e1+?*rj
zou3fdQTK}Tbs}Hgt^0q&fFu;@vNG%boUH3USvL#MA4|DvXW^y;!X>{K&QQgMbuZcb
zx}?{sh23X}Lhe<P-Vv$i{}Vdr4c|$|2{k=#?d<FOv@WNC1tM3}NJCw2BDd>)KCG53
z8kTf_Z{f%aDc^fZ<m7)BF5RHJSCTr5vu-M$dh2zVq+sP^!hLGe0sTbesD|360?Bvu
z7v8JOGE(pL21m;2>N%{wvb#*Z67}sP?TQsmLo@&-y{!#z6ZU>iI9#VFNfph}*x!sf
z<%pQ7@ni2;k$cr?`_yuM6_Q?-s?M<5A_<<_@b*86eAFrmmAoo)#ysKh6iHv{D%_;Y
zYOkJ*e04cCsD%UdAkwG4mh5A1r_6LBTpH}sWp+`$xm!1r6S`a3^oArsk65jTq~1kc
zeytj6lkj&zwm@>Q<VU`!<#q`>YP)^*h>G>+xmq$Z^ju!6pN2R0XajF)zM8OB6&cp^
zz2gJYVV4G|WL5a0-gYN+Xvew=^3<98f>96sn=U>n4b<rwu1N1%Z!DMe;bYQ3gZji(
zRXBCLr2Ff7Pt=HcLkCdsTggAF7A@T<?Zq_+C!E#wzj~$=Ox`Y>p>r0g6)P7@`hJao
zwNRQ(JfIP<=r@|Ln@dus$b)wahvW#G*Gb2xej@V96}tc91%UPj3qBC;$`p?N58+aE
z!iHx=&eGfH?l~fN?-#zR0p=pKV+Czd!Y9;{yXDYn%1}!b+ynb_!Hns`ks8UI-9=ul
zBfdIK<UqB|iE5E+)dzZYU9VmvBW$gZ{EIM^T@l^1THn`tgTk$4dRJ;5BMIS`h5hw*
zdF?@!|4lery|>_Tkpp#42z*gG&eK(NLw%}C_msqUrM&k7DQAljxhz$;U2ko;$VUpK
zz9|JVbe(5-id?Cd$<pN)uB)O$EzqD-+1)DT>y9dGds#jr`#0OU%v7Ss?B+1(_^^Fn
z#<r}t)t(trAyv=w&Uy#4S}k?7SMnWSmHM9QBY|pSPhAZ+G;YLQlJaZ4xIE_HT^n}P
z*zTS#1tQf+GBnnQ-zVuubxLf8$br{{6LpTA^-nBo%S9n)b<UN#tdrME`L(|QqkmKN
zOevW0o@6AC65g-(1&7^4_SFE=rOV4#o%V#rg92wM=YCAMQoTJ|>xFAX9DYjjJN~BY
zKU_OJugff1cP+EV_L3y2kbF{jzs_Moq?-Jcq}S;bB>!6Ez-5wOu}tf!1rm0O>^%eD
z_Tl9b*&+$sE9ebZ<3qDA6^u68%=>7D)rj&<@w((>>2dN{;?`k0IsJ}WpN{kn-@1DO
zt?}jrzfmsJ9Nzh<)oA#UBR@8HhJ42xlg+WT@*_N-QuC43ZRC-_){m?c=^#HI*hB3o
zL;W9FJsjI`LO4#Yop_?n#{*A@?VH3;r^b00Q_WFS(P_PpT09Id>glvPI5uL~E!5><
zjCVHq;@LPLj0U;rK63XoeoW=3tq#6Nrr@zOpa&RvB+x)0^#THb2r9+nfv#1!Yy?_>
zHfoB-Lwp{7c)XE1qK&C!d~9_H>4ba^xCC4S+;F(?0fGU3crXR{;Q>P538<I*F`mT<
zpj5ubLxK*J8)JCsLxSdFYMo$=#p5l*ALCi0U~EHCz(TqhWBl00PYA98Za*PNar2DH
z3H&f1UZH;bVZbn4F9y<qktYIcaJ>sS1RMi8flI(Oz=?LxGiI9i(CK-`M6&}8%rmCC
z^Mipqf&cMfAo=2{J*bN30jHaH(b`xH;p7>s;b?ybkea;k(CQJ|AB)EyjV_Grte4?o
z-b(&mR+!7%@NrBJ<L4^_<M3=MKiqc>;D`I%!eKEW3Rp;M<Dg9sRmU0knA@leg&jR+
z^D~Zvcqq_&7cTb!oxnNZ8gL76ir@zX!{-~*9rxh!Ag!HmOmsIQF{?lXuoTDy@~Il7
z{IBB@@4qm40DeF$kPhSm#lSY8hR&mvS0)<{p0^-6&4r#o6p#XB19&X-ZaljfhBD-L
zH=YeOu%USY0YE&k6j%jh0{N6N*_i3<5{+(wFsi=Wm>GEvmzRKRz%9THf!_xR2BLt4
zKnk!1$Oa059W*@INONtQhqVEm0s1M&#|TO)j)m?3|MD&Ws_h}*7|;ov1FiwL0H-+g
z1o!|^Knk!1$Oa05a=Peaj4wNj%O2ngFa#L$F+3muhya!XtAI=(AJ_`)1R8-B;5cvw
zxB&D6*8#jR+T;g>0kOaeARWjBih&wn7qIVc{6OC!TpR;Bfpfqms`NFcmaU10sz4!7
z4%7pCfP=sh;3RMs=mCZRhXm*i1OV~CDj*Zc2daQZpba<<Tmbrk>wwDw%oX4VgaM0z
z75qohTwD|b+khJ25O5B-1Y850?ty)QU|=Dz1}F!10QJBg;2>}WI1BUuSAZeFVId3+
zcmWYWJg^j41!Mw#yeVQ(c=sws0Mq~n0siNJ!|tjbX49D!cx}yHX44A54`-D&+=1-^
z&H>k%5t|mfV}!_Cg&ln%_!<sVUWH~;3wYl|%pCYkCXA2#LqI;@g_u!OhC;ExDxe+}
z3&Bp0Kr^5bg^o;sB51$|Sn30<fpqNFRmdw`2{SJ@n@%Dx0y}X(cyTDE6YyID3!;o0
zI$nhjMx0!c1~ghQEjX=5?Z805Wi9A%@pa5Iu=sEA9iZ(7Tm@W+w9ao}PQVFr0B{Xy
zKFD7I^Z?s{LgX*}7CsB*QgEG(Yd2sg%3lE*QD+y@&w4=N)wl-SkmltB`$udtn~E{=
z^r@Iza0D7zictr6!lb}LARTBy<!itdm@zC1d4Pe)ngTAwwtoyq3lk1j0l*M;_fDi8
z0=8j?KbHavq?%0}zSD7C0~}&lG5@!I3{xyeT_jv_gZA!d*nk4yGm!av15UspT>ET=
z`Owd;r!a~Qa3oAqJWw?mCJllTKsHb~9{J$2YY`OEQSWK^7dp=c?*VxK*a6<bhvI`k
zHa<B?0k+<SiU9w1Z5Xf$*b40Mfg=IO0T0~prT`H@E^rRVz3YG<5D)AE+JF?C_HuzJ
zoJCdug+L9^4>%yd1zdI#|Ls7beSjMf2Baf%D{vgwJ%H0RuodgqKtq3paw{+@pbF>8
zeZWCjWGO}z0XTTU#F(aHpoRfU48a2DfRh=BML_yGOe@g89`k<&_mbCu0Gyy)0OMiI
zDL4Xr9gQ6efwO`0Vd9}QI1U;LyNr&2-~miUKb#a8f<h^feOAIBzzf0k;A6lgzz<Vl
zg7rFmF{OTHQ#q`C%Lg|(xe!h|LmPm<V-p4}1^As~{EPSe{hEAm4=P_-jrb6V#=+NC
z!6IM>7qiI+I0EbR0Oc6jRyg1;U=^?w4wwy`9gQ^)*$vmR;33H8%5V{Zgd^Z=jCu$+
z##>?PVyx9%m^y4SmXAM{B;W=`d!Y5MH84F!(*mRe7a-%6Tc+zk=V&O#&J0C-V3Eb(
zGvFQIs3);HaCevPjR=a>)&nj(hX0O0Xave}-Gke_eSpVYcylB)j(~rF*C6c{a2#ku
zS^#((_&V}d0e(OV<ZD2`ALbdd1Fnlvt`OG?!Htj&FuxD_0@C97LopXzL?PiU5I+Sm
z0BFJI6W1`}6*#mxLF2_xtPya+==Y388Gyg^a}3x4RM{{UV=!ufU(3-3oB{ajMf}^e
z=a!=_Siuj`G8bG3J`@Yf#A5zypwSg{vT!;~iN^SSh5_;Lbg%&~Ou!tXk!;`+uH%7o
zNUs6604{(R?lsqdPXYnJG2~wdOi15{4^e6W{ytR1-6#wA;4ik@V&IHr`0o(lfP`J(
zejqH+Y`U}!=B&gua2%CWQ0dw;Sc*SI=in9KMzA0F2)O5II2Eq>E!V4nd|+P%mNEty
z4-6sS0Po<uC!rkR@)XB^T$r|+O*_H;NH`AmfV>z80|Jn?5J(4#ft|oXppzNS>V{6P
zm!ljSkHGbcndlhU0ptU_Lg6>T7(F<O)5DMs)Q4cuGco^}xHyB%2*3n{LGA>$;rbFP
zrht7=u^eevaJ>}V2KEB_aqR&d0t{Fw02qSr3}FSG!4q8pFnQFoa5f$!tXd3Y`(r@B
zPAH!q2dz({lda$txC`Hj!cpKBumkucxF7Jsb^3EC2lxSnTo%}c>v-VUc1#Q4gzIbI
zt@wzb=Vd5{JMv&~3J_e20o7pS;KH3y3h*dHLMNfKS79b3#MhxQ6m~!%FI@W}Z7K4W
za$z*)fV^!;uLs{knR5*=Khk%+0t=xnKgd1kJQkb|)B|n6R^TA&?Ls@pP;M#emZRUY
zHe5uYvKNHKsCWzo(!s}(b{#<|8;u7*-iLfQM8WG|59Fl)>1Z$?TnJ^(;Cc-jPDj28
zZN!7GBdw5?1RA+*;4Ip!0fI58*c!}#BPxZhL`B~7uVHOI2DASJYZ%*W4-l~h9u3?Y
zMBE0a8&C+bfxj`gm7^nY9JmfdY=ni7=Y-QxF51`yTnC0cP{$L4`WiB@6#eV~4qYq5
zsRS1$G#&sf26BNK;2>}o@cV`X4;lwtHo^SpxC%k!Ah2@@MzI$A1NMP_SRxjCdi;1Z
zz99%F!zkBy!BW@(+pvLMiG@<&Z3ycvz**o{8UFLb?zI#s1R4P+jAR8yQ4Cx{G;{-U
z15geT&}TZrHB7fJALay#fg0c(a1C%Oz-j?H;puKOpa8H3xB{Gna+d%H4B8J^2$Xde
z!u&;WkOwjUt1!2#;qkzchu~B{f$7)4bok_L2srsLygm)1ONY;7V4l~ZBcOggmQ^NJ
zP8Q6+0c`>Oz?w%;9$10taL9quz$L)zQ7Db`;n26(|NZ_8TP?5+XajBmVM7=(&;s-W
zem5Zl`+!S;*LOG>0rfx+;Pc-&83T<#58!nR76f(x$AMcw>@X}0>;o<V9^Ydh`5q=X
zlnrMB9CC1=09F87fjvMcFa-EL3MGMZ;2>}TaCr<q0jvSG0WH8;;1=MQizx*PfnC6H
z;0oZ82Ok3pfkvRS4F3%QejCv-P!1dfdH|<Qumq3}Gy-P;r+hdVPz)Rd`T?H;SOaJT
z`hlp;aA4pP5WEF#0&T!GAfOQK0rkKcpv<WV4Fbi$L7*S-c^n=M90#19z({}{Kquf<
zjLJYi5L<$dfaAarkn$uH2cn+BXkUV({t^Rt8LO)XXFsfnFf7BfSP>E4@E4%b8|(i9
zF1#ioF$lp1s0R)KRTb#`H&6<E3GjFZiU9RMCt&(1%nxJ(yMQx*!!~pd<N}SrS-`0h
ziURq-9^f3{@+<}o6h4dj--n9}fblad37{Bg0eS$pDvaPh%o%tqa1gizcx;C<Ksj&-
z=m)%>g9U(XKpSuc@Od873RD3{fNOx?4onxY12|TO|E>c8)o>)B1~?830l_a|u7G;r
z4B+%4Mhp}KM}UPeYZcH57`5mKs0S_pJ}`eKun#E26tn@ifcTwI5I70A)M5Tp@ZV10
z9N_f|It2Cr{Xj6mY``I42#Br6h=JpPQv)mj)Bx9jh*zN)a0D>@9LfPzKqp}Af^tAT
zZ~^ey4dsA+yRrVS;9}`(Xb3n9c>DsE02+ZyKtLnr6F3N52cnuV6~HmT<#m`6*aKVx
zB7O-a0Mi~Q08{~;fUy?}0QJBHz~>DN1UUXi8Rp~P;6y+R;Ia=50=s}7z^@s3KnrjU
zNO=?10(t<yUm*`@0j>cNE$AFL0yyo*pnw|SEZ}hfg8~`>)31>S)BtD8@Sn#)WCBM3
zr&eSFHNaUQ;w@+j90#2K9eF@KZ~^c+1Tz8$f$KojVORz@1~|M8rv-KZX8^Z%Ffd>j
z&;$6jL3yC81^-<GBHE!na0D>D3-bY0z*!*dH*g}L4Y&owAAu7ACjpnEunMpfI0tyW
z2Ok3V0R2GlZ{b70A;9ez+5y^tTR?mV=D)E6^9%&Mk9h_T0@s14|G;Q~V}QePbOP)I
z&H-L0&<U^y=m&y7KqtT<U<io)9jpc%2b@mAYCsKe7VtO)YXgnIrBj&yfDh3za1gi-
zM16#gfn$I}C(H=!0L}nzr(s557tjOveGD@KEx<J(;`gv3a0D=&ffa!&pc62<;DkUu
zZ~^f71k+cB|Mme_fUrNnJU|<83y439%0MSzbfYp*4_p9zK1F3<A8-W-`y=WCZNM!c
z{v4(VI0?9%hxLG+z&XI{Gbjxl0?JJP2`zytpc61Ipd+9jxB&QEL}B0%VEP=U1nPlH
zK)|0+59kLL_dpro3J}%{Wq>1q=?k<EQ~{lU@n^IT)B_j(jQ(RU!JI%3;Ma$F1dafv
zzn~&e1#|+&UokZ<IFJC1fXNjH3!n|~7>!eo4JTi~Z43@FKrwI#xWov>RL;cI0cU}f
zFibaK3dj7f2*>;a#{sula1?+)DCbYe10tX);1-E_1+D=6V#2cke|mio;18<z04dRs
z0V4*Z2l|2Zd9W0)G#2dx{NXTv5*#}pJ_C4_-HpP)HRgC|nSgl%ZUOwsXg`pD52j%u
z6aoAaVP?Q@5flWBdoizo(_+*Et^mbJXb&h{0vX7>4|Rbvz!e~NDFzIj0m|HxQ2;m#
z_$|Y-0WJX^KZXLpF~Bbc*T7jIDiuoyaJV0h0H)<o5ZDRqT8T2iF&wqir=#owoDJ{B
zS${L+t++?PT^7IFI2c$1;Ab1kOq~JH_66LHzKDY+un@?`33n&33ikr(lQE)fj35hU
zUI*t2hK%dnVpwGf4xPYG<nIfEKTO8l``~13LHSj%VjxV9@)7I7fJX+>!6`w|1K8nQ
z24{1H`Jin%G`)q}?rd;9G(7}dUk;6Nb7Eiw@ee>DAQ#sckiHGP6*nA>;5J}~FHSe$
zA;1siTE;^GZ+Q7s1SmM*j%q09fhCy)hXfp;_=+<8ckahH8V91$G?;8EOoI^=Ld#>o
zaWrxW-n|P-UI5%G(Kzm2tB{t>=^>bBlyC8YWx%U0!w;ad$0V#CC{ptXrU$r$eq2^y
zrC=~E&Ioj{x^ofcA7xE_3G&CwCS<aF7<VenSKZ`qm3c6=JsoCdc0Q@xs_BRgoIZdC
znUi{iE9!;Ap){wPzYw`V`?(s5^B-p%?URfXj|-o|jRvQ$gqF;{SntfWFduW0yKn+V
z#&QFK5wky51ar~LQvNgvCnB(Mdi!(2W>}l$^V>0gz8HW>n6v&{xb!Zq5GwimahJsT
zgC9$JVvTU?3gPC7Dh~=*JSyy|BR!ue^&@>Gz33a^%CZ}B5sBL=?x6ab$f?>ujv?}4
z+)Q%*aIJ8TDpG`b<@EEK9==J+cVTUF`i-xJtKSy3{Z-gqr?jlKN)o~sNkeO=h}<_U
ze4|m=R~5}!rsd`cx8_TJDwY#>cvvmkd{g9vBf@bdT24pahZTVGWv1N-vs^JpO`Y|r
z$c{6mgMvz}s8e(kUeD$GMikfdB1BM5_y3jf?w<%3>D0wprJg(X2rie5aE#AC;1pg`
zF!Gnei3mVkAS+qqCahnU^Zq7WizUf&IPPYc>*_>4e_Yt}7s3PEl{ZMaqJEL1cZi&U
zhzS43?@yhOjBcHq(?1t^FhUCMeOeoOMfhloaJYK0qppq<zmxPlbwb}aL~iKPdanv^
zSEmj9ldk`6T~^^m(&16u`}4>uRiW@9t%!R!PVd$Z!v7%Y8LC*S`bPIjNk4s7IQ4F+
z=R8W}9QBpcBVUTh(Tv1gDcFGc&K)&0h@7b2zg-*Lu0Al}C*^MH+@4Pt`C_EhGpkP}
z;=@XAujFTvUw2+OxdHa)jF8_7m#S0Ud|E1aYs0I*ll<KXw_LtfooZmAl&i)kC7kZ9
z9Tv<Jx$?iHd|<rD4LZP5T_qRGo|OVumk1|l#wlGk(OFV4Qs=HnL$#wS-Z3EcTIUKk
ze<Iw0y@M6<$6mp_dWXmr*pOJxcuvyGy0=L}sYa;1I+xDs?E?jpKe$}D=#Rp^x{Ul)
z5l{8@0uALyUzBpLx;oZYiyVzlO%K}rAuTT}FK_gw$MHhjksk?KP9b08M`Pwv;@8%@
zJ@(k$E-mee_1SoHBL7xKP9apc(5|nonXjZ6`N?zaN}|A)!cDoz8TnC}J0)E+@Fs2K
zjr8%{m#hd{E!>oY^xTp3_vmPfG2ZicKV);Y+u8p~SCKv8n}5hg{_X5Luh67aV_fFT
zKji1kiQwPuk)OHwxBrnY8ja))e5)GqFzmH{7|B?>1F;Px4>c`RpKA1!*)B`1oWe~{
zq;2_Od@-7y`@ho5G`&bOMtbkDdr(>$I+hy$V8_gnIzMEN*x21vW8bRqj>@9k+|0)}
z=h>&mWw~*@%w?9ShW9n6Wo_QP5qr`|HPiNR$-~Q^|DU|cnwOTjY0F5xk>4J=s(IYz
z57{QmNdAT&+P(ju7Wkv%?T+&cGt)M1l6QIDR&16wfX4Agb9B!?AgZ(1LQN}-NoLDC
z$5t2z&1MS~Jb)*!CQ|JKc+%$++7DXdWs_eVTEDq4I}5KwXTO3;Oz}90@h=f=%z7dZ
zZ)ZoktL+X49Q~#INO5~QyLbE~8uOrWzvsaTQZFx~_=yZS{)nFjQ|W`o_{jH??8Did
zg&(%sgvCBGojFTnn47cKYmUh>GK^2?)Ps29sDcI`gx-DRx(f6+3IsX-%D(U8)zcKd
z3J)j6PzGmiqtaEzWXr(`^x7)pL(eTzh~GbL8hIZ(h60zYSdC}HEVOsEG25I+=S3P`
zZA=(-+rV4M_aQV{MR5-qKQm9K6A!_Lf1o~)cbJ=Ky)1e8y=f2LpR^*;bpObRz9!$F
z7(Yh6f}fzeg{psI+y^@U6JxIV8rjyMM++scF(!LXw(nOMLoWQsWZE?18&6RwvJqBx
zGkr%#nWE?-(_1vW#+YdFvC)LJ#)Jt0Gj+DIGd5=C%4|)?Od)^ST4S6gU?y!?YrKyh
z10TkS*HH4q(9J@-S+Y=vP~XGGl+kvxvj;4+TgW}lnCf}fN5+XMdkm?CBa`|trKTbH
z)VNA^&L(~%><`)ajVEN|r?nc=jG315Q2H><n8iAOp9Y;Ek4rZ`nNW;%#*r;&OGaTK
zj$7<$s=Fn_m=$Q@yu5t;j8;}5{{~wMdciBW6N}?#g+ZAOBL;V)wHdIqK_wZ+^B9!l
zI{VlH*BL>OR<45!ZKcw6@C)aQ^x8TsjKg$v9cui7&U0$OSo&_AF@bs9dgCi!mp|L}
z%Gr{em65rwq!3eMe<eHmG%W=z<ezEpAt}?|Ly<@gOo?<f6C;nH?o3SZZ~w;{Hf<U)
zUop96q0|NP&%%_ZP-2$xANXA}=M8WX3ngqoA|4srfO$T4A3eRnNW9C!Z^E5x;G-Zj
zeZ0YV-CRQjkBECcV*JE%Y$;uS#F&9!F>}v0rdo`p6q9XC;pB(2?a3M0cvx*$wqbDI
zfo#~Ii%w@76U-Z^Kif$0bh=CCLV}EG-H4_wl$rxgt0^Z3ql=^J9OxE9`&n|Mt{kJx
z%>!$Jr-~!S`ofZYHWy_+YWP?@#?!Np8lh!Ap8rrH7tsnLPaYez5n<zY?IOt=SqK05
zuqc(5Z*MnkL(xV=jvr#EL)u8bbgTt#$JqL5_I#TCn6cX8;YKe!1}}tC?>}aQP8v~)
zwHBQ?D7o66hgyvjXmqaerX?M}&X#LTG2@xITx05;>Gse_zs)t=X=0ucVp-)$OY&gE
ztCW=oyL?BLAWnTF&v<AGKQd<@+U7^nax*qQQk3xsY-8GVzYg6ifu?LU?ze;`(Bm78
zCoL-y=o{wZ1lqXCc-T^tKp$^1mRTATXi`4P9ZI0h`NkZ}kpy~^`FH|-k&pbd2{fw!
z^@bAYK!H(um*pAlz-Ix?*=#&xS-61S-E36c<*-pRau?8|EyhMm)dG5Z3u?A3pjm~+
zI?M3|^h%+z*>Y(CjVgkCYXNO6LIXbc&^Ihc-9ryNZai*Tbq{^~I5f<^heDq)Ub5`E
zhu(hz_BwM9Z7en(rv9-uS2|y8EazW+3oS9$(U2Qbc9$5BSfUovolkNb3+d@6(e~Db
zWO>TiV5wh7k3NO;wuSV?Q^s?ao`tk=EAkx@>6@*{_erFON}+RXB7MlbDv@HIHXg9#
zCelk!V+=KkG`bAs4?!+7Ua<5e(v))JX^X=mdc7R&`Yoaf6~;z#@_{CG6~-n@<|4ZD
z8RJ>Yjz#psGsfP#ERU;sjxVD5KSh;Gi|Fm2LKT;LX~8z*89L{Os$JVqbp=u?QKIl(
zdbbh=+j%dAK8w!V?xnY$g?#Q_n)Wj&b?aVw{bx|#V=;}YLQC<B>6I$fOJ7W*x1;>F
z#q`p4$a|3goUy=?9Yo8XgOeUyOwTa4EvC1h!&DzzOkX{R5Op?*Mm>)yzm!A|F<(!j
zH=jp6(-QiM*<}gM+JSVhCG^Y=7%6HAeZ%tNCA6U0c*wG53B6X0i7Z?~mKV_e)+IFO
z1-Mqt5_*n#&l39l1>_$=`4^2vmQJL<2>tuf&WlJl-A519K=!(i-m8HeaUV^og$Y;P
zM{m|*+{O3Nq?gd|&im+vmk@yJ@1wWC=HFBIONfX3nmj~&91hoK<Gk|3NR+urt}kOd
z`DNlrc5c?jf5?7@;*jlq<aYMP_4y?@Lt-tBwD1uXpolq|w!e&pXrbnp5%n$9^)h1Q
z5xNTLN3M@ZGzi{0&eNGs0~R#!GR5sgDElwU*omi)KcU*4#&pZB`{>=Bu=<|+=o@B?
zW=5TTH18~{GeRvEV?I4im<~+KtAx+m&XH?9>g_9{(e;Km{b{y?qw~lwICfG-J({*q
zEsEf2>vpCWsh4RdIX9r5&qR7R%wdWpe<FPz<}k@pA4}h}96O0#Z7{w;-Rf8EO&opI
zzTpgj@W7qyj<oTt%?sZ%mhr?;!q1H-u-~=++_)QOrJFy;=sd}L7s#N5UG^<w?Jn5p
zBP!ixOd4h1+1L_gw3iF*rBn9gkr=#;2Dx+?*><BYZg_U1(61>K6f+X}e+c{%Lciey
zh{pW?9bP%Fal$(Xb{k8~(dYEcn3kVXBnQAB4&LwE?+DZIQ-!x@`?ju2DEt@3)E`Z^
z-$bTSeET?e%KwGoWH~p1p8SRJr0-(v(|oi@<8w(`-lmONc$t044?D+Fa&I(#?OT=d
zkHt0?<z;QoLASRz5MSzRG@ddqqKqbEqVEUyYiT}Drsb|rE6mB`&2Ge(Ce!XF`%x?X
zbz}Nnw~q{N6X~VbjU^Tj%<k*PG#ox0e~B#{$?JY8$p?OE%#T|#N?Omz+ww@7p7lp2
z!P!rH&dAKfQ7j`jYjf6yf9QFL0{38AUZdnam@AKW^7i1!X)d7ly~gApj!}sl*W*C1
zr>J<`?8!+RY=47;h0Jdl6Q<oh|9ZGcr+oI)V?oNuk;|77-heycy8v(CCbMz2l!Td!
zaSSYGwYk-Zne@UN#w1JFOgez3$Hn7Le&kn7d8?f~#$M)g<Z+qr-Y`D3bXw_?f5Y!K
z?|aqeLJs>3FUwggP1$Emvz)Wi66On5+RWTzrB|6RS?PV|ek)zxXZYS_;g!eTU$fG<
z=6_5WveJTPV}=DkIl8eK>)6Gh*O)yF`k2#w4Em1cV1s77i59~Qx}P}$zy8e}Wzg%)
zu?Bs}9B<G!%nJ>g@GJ0QgBCC^HRw_16oX!3USZI?%&QFgf_aTWqgv2kra^OB&|khm
z8(1zzdFFD1-eayZ=rVJSLF4v=>(Tyxl;2~}W|mu^Kg(^<pZT~!-|t8JCk>i*0NiQN
zvI9szXVB9JjL5t02vy5mK?j^MWYAX!5a3KUn)Yk3lZ_r`Hf;0?vxkj7X2!|=&V%3p
z8_j19v(e)Ru|34w=so5X<hSxE^#Sc5-9`_zVskFE(ScU%D^)hKyoG}GHd^u)<Q5yf
z_7)mCWTQ`+kJxDRzk`q4Xc2R#jh<#cYooWBFWBfS=1WlY5cmodWxftYnQuW+W`{9!
z=V7qR7@B{W_rWpr!eR6mHio`9jQufo3{86*ym$;f`nECOvSJLq$(%lhF24=^^2gAu
zcX(eOLvO!>`a98H8|v>HLk}?@8AAu!ux}nmd(53<Xk0tS*)xVVw&T#@G?u<+*=sC4
z@GfFOJbC^G3p4DUxZfCmH~U6Nz{0Qm=4bI*gp*oE<^-QX;@rIUC?fn4+I|$frE`pZ
zS#8OAWMf9I$+U4K&Mc>nqxdGoG5cytOIx)*zc68a;fjoPrYA;LUJ4>QZxBD^-Lt7^
zV<CRFS(e6%|0jQW&XZ7QB%dGS=K70rHg73RTa=QRmb5VwAHnl>%S&sG1@CO(0?Uf>
zI2$Kt7uz;uU6?@cmOD<c^h}^n%N-}hT(U>_jCGqf7p7&bFU;A5eSM@X|9<=(Cfw2G
z-Oj@InOtoAslt^B^woQC@ndG1@LPn36bsGyEkel(3q8QR%0iDbud%%I%5RO)<{3Wt
zR2&K|U7wS=V$)K4j9iwHmj$cft{7RfMm%!{eSFLaylcZqLiP-@bo^sN{tTL9PgtT=
zif7QH9Y(@kx7(<iL2q|p_G@O)W#;-BH0gbC;|#i=dEX3rmigcedY8Fv27SeRYz9sF
z5Aewu^Z@gj8T1_Uxf%2xbI%O=hPi(RO*;<0HiI5w9-2WfFq?wteP*X1`j*)Uq8TT^
z9zpalvriDc#2gSrA25dn(Ra*IK@|D{I6jEhF)t3HSC~_R=tJgJLG(RydJxU}9XLOT
zHZT_l(W}hmLG&?mRS+SzVBA48hq*q89%XI}qSu)B1<@zW2ZQL&Q{c8BieWw$L>rk;
z2GQ%xXM*Tc=5s+b>O*i(5Y1=q52DS?*MjH`=Aj_^j2RF1jQ$Ah6if@4jbM75*&~?V
zWcCTB&zS>)X-p?LESMHCM+MW9%<;i=fO&B+eZia(Oyf?2R|V4&=Ja5C`ZS_Ob}+ro
zoF7bIF&78Zw2$G0TZ8HTj}bMhg6S3Jox$`O^O~_V>G$BqU|PrA5=?I}w*}K>=HtN>
zdIo$Zm^L$CK>N)7!So&T^<bLc1vZ7yGt4d_^Z|3!SQ_;SIAts?WA+H4mze!R=u_sf
z5Ss7@aBK)Y%)B^+UT0nrLVsaS51|=n!MPzg9D<8O=q=`LA@nVCO$fzwgLj3{)6Dxq
z=zZoxA#~@b;A0`Qgt;?>USK{KLZ2{S3ZZd-1YZlGhnR1L&}+<2q4Wi_TPRIC2lff2
zN11~|>A*SMCN2!6(dQwjgwp-YYeMN2=Il`VjJYtBCVd9R2Se+acZAX#%=MvknR!nr
zh5je_U?^>7J`zfAGoK8l@0ibq()<hHo=|#*`AR5#z&sR6qb{O<V<tU%5waJ~A<Xzt
z>>FmBPv(3Mj-N?SGB2G;?=i2MNtQo>GiTBw=KPuT9P`$h^f7bQOd8Vz-Z_&VU~ZgA
zuQIpHq|bYBq&qT`X7qwPXVT-$7iQA?%vWa8=r2%iXeQmy><~t;FfSZSpD`O@H0jS^
zuP|E291uotFh_*ZW#;%W3cUng8b+I$SB25r%$Z^I9dmvd&F{l_tHS8DKFB-6=nIxx
z!f3%?Ah(6lv&_fC=)=Dt{X!Vc_$%ubMvwm$=|f@kF0(^8ea~!!(}I4mS2#V(91u<)
zGDn2d=mFGU8crJrAg>Ciw+7HoHtMllh<ca7<*3KJ1NE5eQSVFeo^ZOK`CvG`!h9s0
zKKl~&JHu((70Bnp=~3oO;dFrcS~z{fd@G#hTm?JLq9>W%X3=|Bp-1p6n)DUw$IhZh
zze4)rS#*GT#Vq=UIeiw*`5K&ydS9dbwpsMq*C@Yp7EQZ`^hVTUZb3cfHq^U@a%X1I
z{evjiGmBmuMEaFk^ab+}>U{%tm`#s<gM7Ey^ft>rv*|nL;Mp|)Z{Voe^vvIozj!u%
z%<_uaH0C-ueKtM7oI9IdWiFmgpEGZpO;c`wYi831=3TStP3C>G=_}?#vuW11;A6At
zapun1^e*$c+4Md0rP;LLzrfdK)3eOCX48kvP7ySE2<#R?_cQxM&@0Tr5%d{zR0K`B
z30@dM>zGp_=#87uJ3WHFxoIGNPNiwz;XIigLCd~FeqjW?#9SUhpEB==pb7sCu8*LH
znfFA{>&you=r7DiB51}f@W}|;$b2?}-eT^Fpl_M4L{Q8ycqoFNW_F0A_nD1Iy7POm
zS0pWA4v3@|m?I+TlkeI8B5Bs&!I_ct(%+2*)ZbyaQt#i5xt6VwbrWn@sF8IK+a9(w
zM%KLtsU?yuW^h|1En+?%NzZ|4)p1liZ??^~T!^GG7Uc9t(gV!bBk5IU(;WJo*<}t*
zxx==^;x&gh-T^sa4!y-3F^9foj-Nv@4&bG8=xH$3{|+smc0kLSbLdV-<mAtxCCppr
z&<o5}bLbQ1opWg1o#4hf^bm8)9C{5*P9LI$t9PP><8x@*kC1a_4n4|zVGbQ&?w>>7
zFkhcTbDY4Yx%4En%UpU7OvRmO!POZp_|2t7&d3RyOV2UK&ZUo;7tf_JqrfZX(gV!t
zbLmy)+`05QbMai7au;~pT-v}~Gnd|E-ZhuLV%|5GX1Rb5&85egkIkibnLFpw_sr+!
z(gIiTrMdJh^R>D3A@i-dG<r1HDT?lAc8j7{n0=z?Gv?qZnq&n>MbSFug;Dedb4nCl
zW?mCTp$0fRiZ(MBM$y~M<x%t<^NuK*Zv)py(KF0@qUZzWgHbeU4ERVCEn_|zMK3X*
zjiOJPd!lH<Sn!o7dU&jDk>wWpW!Wj3E;GADQ>Yu*Cz>`h2S?M}%u&(w9rMCyn(q!y
ziKb_m*F@6?%-PX2Y8<#QnwBw_N7GBpJEG}R=K5%wFdn=onjU687)`G;ABm>FFrSR3
z86M!X(X^4dCz{@3z7kE}G7m*lj3>O)DTbb9_K2ZRm;+*H60e4c7+S|1A46|2FO8wg
z%&TH3bRsx2hBh<j$I#o%TVv=u=BgN)?*%*5qg^kww-4<yx1n8c@W~i@l=&R$G55!i
zWfJOLkD*1Au>4%+(HqQu^XSgW;MjSzfqBI|`d~83rO%^LKH%JWw2ZlU9=+s){;KBD
zXDsiWN0WTPjq_+7a|_yIZktD!nUBw-&?(?E^Jp{kg?aQgbN@X0ZVLLrFE>0i73>j9
z?@mR!A3k1V4vVD)e$Y2QmR?{^iKWl{kiI6CCi#Q2V`&|8VJyADTpml8nRmoe=rr2`
z^Sx9s%{Cr?6xlw_#((JBI}I<tv(Raw8$#aG?TLv(c|vv5ZTzRH4wimT1Ji9Y@yVKJ
zfNiGbM|jahfNctH0rLWEQ_b&FEz=Y_9Dt(9bdjY7G6&iwo7>4Z5Y233@}R6hoApPH
zq4vK9(t#N^&ry7vRfk(LuZi!x7HAt~wls#)n=@?FEIwZJ$qd^Rg!Om6oniax4rjiR
zv;U!u-<vd%oMzfw{vmB}p>?7S9~W)l`%k|4bK?6cN}35(ER;9XHf!`9{P81a@vWPM
z_RqABxqBwGv=BbD^rX>YwlTQ&4YTJYh1qj*!t6~{3$;pOmq<6Hq<6TzLL!qN<%Ofd
zZ>W}~Ez}+kyZxF5!qM1ia-M}rxI+H3Y?Cc_dC{U-wn?yU)-2mp%czO;+$`HP`~kdm
z7A%%WwX-qIGoLv+y)xV8WS+yxZsa}NHrD(uh0nIlc4@+`sr~aq{JEQ&qHJ#XL#OHX
zOL<=427=5FNWCu9`w{=Z2`bRnpE!Erm5B0TYzVnVK*czUi?Gc$KS%{E)ln0qA1$;$
zftN<h7TG+V`Cin59&%`yOMXq^ktq2&WklMh{wUM_2uvCsYai!vSIGvo^c5F65NX?B
zadM?`bN)Zt-UP0yYV8Bw=YWce%0}5t9VRP>azG8m#6Unr0TFR36_7cZ1e|g>(x|1T
zSv^@<+Nd_D4Gy<5?WS32X}7vsS=zmo4VqfFvdq5!v({ew?1OOL`@Y}z`THFg`<d5T
z&zdK_r?X238ph#e9jH1%$5^NXJ;vdH4)k#XoHnNeMGVAuK?lkns1J5Fbf7y2>O-B&
zI?y}({X_?9JqY0r$QgvfcLaF;KGT8raTw5%oQZmZ^RkXKI1%4{JJRe#@Mm_UZHb_-
z>_`{+dr3zcIvDykbflXHgYMOiv}-WnM}X(=AHaVt<Xq@TbsPqEqD>rL)``C4@aj$!
zF$DA@JJIz+@I9py-7^GsS9YR(L%_ed6FHOcy|NP}Btg%XPTOxv(tioW()!F%y3r1^
z3o<S5qgA7HETB5*`BAu%br;2shJ);(q|th4_&RK+W3T5~+@ivUlCm1i4g)ltW}XV_
zLXD&Ke(jyp%%uKp2<;k;`XeZbqB8Yd=jBOMm#I%~U#bX(B+<v2T%jdVLYAKGoDxO1
zXX%)vh@z*m^q#S4=9Y?lId_|n{Xx}bCB<S(0%tp}s>y<5@1u}x1luSv8$R4=Azh!X
zM>tl|EQa1fYXEgDqn#YwMMtys$hNZ<NjIHC>$6c4y+|Qr^yu(Uv4C%`vJ^I8nMAFT
z45X|vdZ=s1y9!-9y$-ATYP$4lP5^2gqjz&&{Ry4M$_XXkptq+(V^Bsq)6p@oBAosl
z0|yPG_#9Y)C+~8g_I|3!(IZ{{m>&{l25BmHI2DQcB#vP}S`P;2$s~Fs2bp*#i7w_K
z6Ms&keq#~-mPDmv_1?}4Np$~M;Qfcv2mBo{l$^QxC}WbGIOeG$hz!)bb;c(DBzmuJ
z>xACwIjkvWB4fi13@F506LxCh-<6nX=4uHKrD3*-p2@;nQpa=kfQYbA;R9;T8iFNZ
zTGV#uIDM#Jta1{&kElBE03<r8aDv|T3aj?atSKzcte9Shm4@`W6O-fnd6otWXd_5b
z^BkO@hdRqU)7KMFc5lV_ej<GQOa$FNQNIWEWB4SLd(^Fy^sCw!u#c;ht6wxOBbubA
zJD(j*pHD)?Etj8+)&)q0-6OY0O@<6cPPhS;oRb#bpkrN!r}8-IZ=xq}KwcHl-W&9*
zoDX!Sb2q?Mw{)i5DM<NOJJan`z<!`J?V5s=Ki-*spMv6dq4V~^Q}N<4TAAh3^w2h2
zv0T8$d~v3AIK4SdpXA6RS3Vl7TYFP>zTVS)K}_ahFH33Vtom}#OyOcPTc*z{#Y1?W
zeLs&~XL<{-(BRFviW<+-Pe`(fE2%D)C=YIk!d28vSL^*K^4E*F{#qv)OT~^F>?cAd
zsrq0xb{(*L3i|}ggBxZF<)x1NQfxJ`@olBC1*kY0sR}{d_a^16YKyj?W>O=BZ_=g$
zJ%W>EUxD6pnwcyrRm2__7y>%cQuBIAX=P!VfG<UMn#rY73zq=|>Nf{rPmXQrL~R8r
za8P!k{xMb&ub8g)3FoIcdE*vR4D2!o*a4oPs_Cfm>XXd)Hwdg%D+JS8pq)QQ(6;IN
z@Ss*=KV`m5LH6;$BK>l7IpT|S1HLt~NKX!T%cLwdv$|Tj+5uWGsdg0UOJeiPoUfVH
zP&=!kw5G0#?-KNsiF7I~Mwz&dZbRVAi=s`%`b~}>wqH>KcSTE{UW%ILGt$cgBH_Kg
z$^$|j|EA|kkryMW5J1;lY~gcg<E!UXR2Qe!%&DGGQY7Y<<Dg|3Im*Dkfb=B+k$$+=
zfclgNg!tW>=Ag}`dbr~x6#{{ZY<-zNI!F_n$$71@ju)n#bfHX-!Q{QG9L2GgvdZ-l
zju^TRU(OFBXkR(X;06bMSFR_ucMdmOiDw<uufh)4;-H%V2(u_&5_x!CUO6_w6{nR@
za)sU}P;;tj<{b`dccY%tPOJ!U#vP*ZH$wl{bnHewmY%v%4{nokjmX6NcN$&V<*07P
zF(wd}b)2Dp?D8AUTV>IstCzcGxDco&U?@2%^-y<@L5ifXvbMaCw|813hm!=ZqJCB_
zb`@&*s{E9REy65TAiQ|GDxS)kId-1#!Ag%nD4y1$u)a%cVT-FKQPGGEvx-KLT%f~j
z?oLbfTt#D<`;<~$Ut3sI;(6OM#NrVMFR}9#Pc1eSHF%=<cgtLXP(0Gy8B|pTa~rLs
zn~ps_GZc+5SEuzLaUUM2VpLd(HDxc7Y)f?=HnVwD&+w>bgcr{*;&=-ZzO1gWw%o(B
z$4WVYP(0Gyb(B;MTRyeSEvjy)s}wXUN9UP#@&>&6q6*$!Y}J<g6t1D>65K|GgNN16
zt3p-d@t&pLxOUo5vl(inW7T>>dofaA=Wg4bqH54wjivM&eW+*GWi~CZK@sn@ePfM2
z$IlT@vGs_?PbXyMRZ%}jd939DKdS9|bEL<r=za60#iKmd;@M#FNRL%K>pVQlW0^-L
z=`HjcY)PE&p|QMC@oWDUI$@^tRC|*CMjaaT$neiC)zV`X>&KJ=GzKzL8{lSpJxVNJ
zRE2mVtpbU&&6l*PL5~P`SptQ-Ssman=ostUX`yR{%PD<ZNuLFM_foG}(6`K^&vGWE
zuZ{{q;)tVpV2DV!<VnX+RDNgZ`B}Q*YBaM7UWHzubPuI-BAsBJQ|V6*`cwF9Ju>V`
zD{s|)OZHMt+nzaF&-07_^$(eih1gL)9lObuzPBz|@JdD~_=9wK9y}({g9cVew4D1h
z4~|*A3dLp*uC!8=L(g`Qai%Hx7!~Hj9+SW_DYlI)_?5-+RCbfzA!4;S>4H&4aeP0J
zTUpZz%hEUL5n-Eg@Q|x&Zh6XZBmiW>nC-i7(t{jHCx5g&SXJd#9DbL$ihA_5<)%x|
zZVosh+OQ4_FnVxO_XT>M<3n1wKp)-ZmanA1s_ED|UnCQSK3kv<3ZEm7IKT!`gIiJR
zLA@60p<M<Hm4=#lVI)mj2+6DIIS{zM{8dVZT&v)y_3GYqVxd0qnxQiF#L%GFR@J2b
zE?Ff)8l^IGDpf7gN5mW%4SJ*#?;cO?+vFd|^}|K_q^q-k5`u&rylZA9Rg{FwLCGwc
z-89Lq-+Ikk(rUqh&Z5PUY{te!i0CSl4w1eX{<XqRf)fRxu@Y^UU2L&dsoZ~{ou{(c
zOYAxu4{B7a*-fw?wkVaU19@?x!O5@-oF$H8SaGPQw-;kn_OeZG1@`81$x`<8CPyR6
z;ZbsNK$M6ej8ENOlGXriuWZy$I$YVqWNuZ|S52QjL)JK||5r>iZ^01UGGRWgx&?xl
z(Q^o#ty1ZoTl9#e$W*%ot9G-v_%cP=e_;)>(oz~uQ|zsF4Np?at$Jkat`r-KY9L^<
zis(JGkjXaKHCu{3WJhQx$db<6rCF`Khpf(ymnl%G47`i<+w2DBQ~YhvJl9LJYT-R1
zBdLnX2H2x+)x#dLezXx}u90?WTxr5BR8(K56Ceru$WGEwS6E?If!n*>t{-v^eaRdv
z<X27SeJHiOGnu2n-oaB@;wfs*VhbycVEn5Q6t`5*j@_^5s;0|%jcV+-K%%RpD56zM
zb)%>F7IN{QHC7%`(l91HD>VmioU4L$Cmmj@hYp*3mCQ>VxPkgwjNheMgUpi-Y%*7P
zVrySTVI}shpf+Li<naBa2<mw!MqoQ4X!xC&gn_xGcVbdzP#<ii)#D6wreKEWDeX}h
zUG2wi6Q(^7LGP_>cRfx5Y`^g?eU}4+lOD_U$R525rA=Alh)Q8bJ@1yusLthS9M-L(
zu!gFZqoq8w{kG+Ljl+2;f__<n$=sLd@aay`bm*$~?c0^AUi<HqbvJtV`LvLON_y^Y
z42I^>VGdmM9|p$J6)Q1jKNPV&ccp&X(Pi2xX$GpLiXt40;BGtR-J=ib^5ba>Q*EN0
z^aL=Db9CY!J+$9%h$UxgN%icCI&8vWi#bz{oA5#)Y8&dTQ!+!aguNQWoa5L+wpurw
z_%2<I6vB?N+gIzQjytfY7GsWo2T|0$X!MiK%qc9YDz3yZUu7DraMIX&^{(OPTT2tG
z8obYn+)B&u)kAwd4nw%qWBR7Bu5302zH<6vvOHoUHWsR6o<)bj5?*TVm@G24ODf0v
zm>l<E2swme5k!T_k+$3lide#;St|C}2RH3L-H142rb50@t<C0>C_GjLeg*K;6kekF
ziat3i&$Rgx^Rx-ZR!UPJ$CA}CJk}p4+NF*;W<m>iE#TOE;JjbI-<j-358ki8<6P%Q
zrKIm|U;3VzMZft`!UGKWzy##@Q|mSQ2hIol>6113d(JcdwCO=`=Cq>O58?ZPRy6!!
zhPNVPEy9A<^z~Zcx3s1g9>I4^8@l;Xge%)n?mECfYeP}%5yk}2cO0G!ply!<pVO9Z
ze;i?Qdz$bB;2YXg+>`oj=l=He;FJ1|&VRS3)=%kmPA!mbdJ1z$0fF?wQ~GT5IeI*;
zcZ&<`B6D;^dR~5RUV3gW&PpLoE1ox_(LIprp4OjtP6@>RPW>_GvOwCm0aH--29k3l
z#H|aY!5g7|iIduG(ie8LoGMT_)qR^#P_k*~CbTdk>7z}0v?~wE$v(vUb4x4hd5f!c
z;BhJG&%kGfn_kGdgJX4~(sM%IFiLp_K2t-5pmJP6^PYj&?sOjmm(a`4=+|N<dHcC%
z^yv=IhFVl&SvAG8Doglu67M!k5tDwZD!!i<K8FsoW;#S|SrwPCT6PG|cul7lp2LiX
zlX^X`cZ~~(mB#bDQb}E%>C;*TFXKFiq$~*EUiUmIlL(CHL_L~6w;~_)l2w#*Fw?zR
zH^OnRgo(#As0}}Wj&0UMBl0YCtrxo3K^?cCEa2`llO26r8+)Y#dq06!>P_v!kGH`-
zsILazQa@K}7x?YK51_SMZ0hGq?G_s9-?OWqE45pAsK2-cQ^F^MsP|SqBI3B&O&Oje
zs*^NT<6PZ`rf=1+O!!$9={cg%)^nyHHrfRf&gD?lgz!jJ9r$BA>sH-}3NUjF$C2u1
z%;vq9T7{bU#n#w!TlF|s7n3F*vrn?1d(Q9uLh&!aIdS$(2J>FfqdNL43%f%c77H2m
z6TQtr5}kShezuG{yr{>u^H=rQP)d9e;GQ)3MKt=8+e`o8gQ#;%Us9g-3#|f)<1%`V
zgCcqxe9i(VefJ_SQ>dMebDfm^lHM=sq)dX;{B+Dd=BHuTHak%VeFt*s_-K0Mr518|
zr_AN|fNwUJ|0Z(z&zD-rWkw^Hd%tXRGJ7sF9=SXXcqejk_RCnR{24o(ncmDbE7Me^
ziX3~7@n()$85W1=G$5UkTNhtOGuB1l9=A<j;dIQQtvirJ<LTfIeMH#TOJthLW<9^Q
zu4Z~k{vhPe)jRd>VL6{&iWPZRxf8R46KVNQ<Xt0e-HCdoO`IGc6_sMb1eLmKs&>%{
zMsFS|(HJpeI<y~>TefZmr2h@|O(XUA8?1Vq{yd7{B`V}EyhgydOv?HMUn^)U10SV>
z9CW5{IXFYFeIlwW*DDD2(kcFWg0lGQc5;1+S%QC$-oEG+eUZ~~5B>QXz}a;07y>u-
z+68R4Yv|%JOzi$aQ+L7D`q08%5c?91{ahdC{9!cq;G)R=j6*%IV`=fiXd3l8zWp=l
zrq}iR9seYKH<%VsJO^uN>~6gtBh9_L^<S|_``R8z8bwDDgb(>kresESQC4BKr&120
z4sT%a*vmwt8m+v(q^^c(R0_RHDIf{o=8y@<L*$~_4OVyek?k|z(C>C2(vCOvKCT>d
z&N;uduwG8A%RS9p#lJ^cZ^F)3sOn99v@-`s``*+abbLeO_v$zB8t`5uOBlVqSETD#
zd(oa0iOURlbY9pntG=`rLQ0igT`6`S82+TheJGrUxBv#LA2pa|tEuFxmP%DIeuHN2
zgWiA8Dh__!zI7k^bQm`u+mCoXf$Ju@$tuF+92C?m@!hDy+en%BO*Ee(XDgUQ75Z(I
z@-}Q;L{r~}+$pq>zZTLO{wmqN^KJbh2Wo`z@8Wa#_F3;@HrzQSoHiZMd*a0DM+e}$
z|EBW?u-XS`+(G?2=YdS>_#S%Bhcc-r!oeSAnk_+H1J1e>a$``Ws?j4}xSMWb92ZKy
z=>~EZy@x`{_ce);B-krJqVON$1S;^(Bbl`CJ-yUAi`78Kj_MKa>s7*?t`KRH%BQ18
zisQ}V&`Mu5uke(Kuc@jntScGUfMHeH#N>VhV2dT=Ycyv_-&D*TG?b*zE~#$7!d!wj
zK<W6|lp*lkj87bw+;0$CE2SjPPB5+n2UN>z>PmzVOUV?eBu(t;_nfau<{F<p?*Te>
zM33rY(NB={P0!;LDf;e0C-t2k6xLq5URm-cH6Fp5ud;kP6&}GzvtE4<pcL_~$loE?
z5j~=vSl*THfA*hfUFsD5jdb{*y5Y8tbPtyiMV}@`49k?8<y!F6oQ&$y8oX^`CC(rj
z5%8qeR8=91GApXl3F?bDmnl7mJp47)^);0xLb}#h;=5|v$4v>)rz(LK6_Imv>SH~$
z1EiLz1j32FkM*ejmK~!!cFd})Lng~L7B<B4mK;GtXC4ePX!plh(teN{Kh`5UiEbq)
z(0)k}GYLMZjK9Yc%)XI_VW)6GD-nlU_)Jtd>2-BAb*!{b+5UnRw@h&)ffG>pj;7VC
z$UI@musW0U#YcKzJIk^ybnqiRR2NIiFjO&?)54GRs8LozU1lkhf@GPQ$QJigxtVFv
zsNuI(tRh=YE~v2_>~{L|Lriq3s61ksISwPVhFP^TE?nCBDGAGLG{v4GYvt1lOS>$s
zoS<fM^y6&^oDFYyWX!8?D4{tY>Rqy}1UD=Rc@;%7#Gw{WaM3+u?sYbNRmpI%=xEjn
z7R@ele5gmdRg&B+Sw`X#j1<0`g8T3aH`s$4jzI;ZtRce^4=%UF<V?U_UyAo$51ywB
zm=`sVjO6ihhd<X#wD@GJa9Ytv@6;|;CG;eU|A!vYu}F2>-bA!;Ht!$$mDLT3CRxhP
zsGoodkm6K6)`uw^;hPOAGj@2<NdM!)tMJcCd}eA^VXdw59H88*H_iM28wqlhPOO7|
zphxtxYPFkWMrGlGmWpAxB_Xe7VnuODD(=1!b+E;^fE*v_k)2fjZj)@ZXGEva{=(uC
z)__QAA97657>d1Uxeb+aX_xD3twC8;OiK^zQEtnnzLc?O?urP@Y0{)h(0EEI{5Dyo
z#AlX_aSh6C(FP=|shUqL{_)k=x`TT*=t5ejAh)o7fNHXTFsUj_N@{^AR_XSegy&)N
zN<}qVp`5y!qLTW0WQA6(eC47^Q&}$^2`(*~T6XCmX<c4ry>LM;a7DuMQTU6msfHf`
ziAosGFiSdt6}5I29<i5r=+%-{ZhR;ei;aKnH7oBI)2a87_m-HGbntyWbi6WmljNFM
zhmj9gjC>&nON3-CZLG}1jI5k7xq10%>Elv!Gjj6A<mQh~&&^KH?B7qSVcJPdONH;l
z-7`6Z8Fn>~KPmBjl+aL&k~iYh4e*m4aqAtz-!Kf?3-~LY_<$!a1`O4-R>1WLLOY9{
ziGwgHRE=+~Ew17W)U<a2OM>7+eDB2X8PF$(VG0LyZ$j4N2>pQn1YvJ{U%(X3Sjhb=
z5NeO&<2gSlgrH+!E=D*V0=wgPJB+v%dRAlVYZWG``nN)zj^FqA{fwO)`Iu=qh+msF
zXb<rF6h^Ipf)~LPg-9jg`xeNohw>cA>WaVb1^pDrdLNNVHxTXT(Vd!MXj(TQZi7`u
zBrdy!u!A5Pim)#X+5}}k;_pH*J&50J5dI!uXxg+t5Ua27eIM|DKuI_piq8h}1!hYT
z{)XT6FlrzoS&QF1@a2I1Wklj%ZUFu-S)h!t8^B4f4B(VuBRF+9Ta+=JQk+F>6sMS*
z^GI38F<=W-K3P`&7e?+x7O}4=KVctuyvwEj!g<d=vn^56e&+|U*?EQzmJW0?LQd9w
z2swY-A@kWkvJtW)-Gh*w>0^ZKP^J_8kN(rtca)!u>5Tk;u^DGMf}~`>V5ed4XUAb5
zuscuq5RcEC#F$9=OklLfZ#b`&-!xll-2b;?^Iw&k<H(YxF>hLG-2b;?^Z!|?asS_n
z&41yL%~f3X>`O<HpJDWndB{bC%RUzkE)gm(Rrw#cXaLPTt%uX@*1`Uc3arNlhf={g
zy^H_;sgwQb%=dZ`eR@vUgL21=$W2+)Z%S51YVH`5E0X?l21irwPkOL_PHZ<PEshK>
zrUO6eK`oGP{y}f$h^8Jt>sR?}J)YNCelcx{3ihYnKkIFq<>Jw4$bZ0a@-k=;3>*HV
zcWjn6f)0JB`{Q~e$1h-5(i0l`{-W3UqMb{pn08VCgoC!27XRv7co!;Mi8MO%E7B<F
zYG~MU0bG^8`Hu8_diyu18Ts%PPO9yQRGM;L@6@baW>%!r&huc{@*o(tTnUEE-}Mg7
zGDPF@rxUAT(yPD2q(g00L<argTf<5^%n^C^4{$xuOJ+t9b-U04R|HMU4#u4#7m(Ru
z+q%H23wnLC2AMHfL^J<H3~IN7cHN)8b748fvg0*ege@_BV9Ox`Q9g6A#VCJDe=Y(;
z!Czol8KNBHv%gwoSWNMYgG2Aj42EZq4fYeBecxNb&3mdDgHv>cBls$RoSqPlS47(#
zz8QX}!lkh4YA3X8=&O=6IxM&iJ?{)|-oL|X!38~vqWpq`{a404F3Lm^kzcUSGS|@`
z<okCZX+;se1E0<2MKt#4eC3Zw{MJ>8p3|yD+QIbN4A}oeE7%{l2A(H`m$eS=+^lf3
zxGkW)t)Zdd3TO~TXi}RNk|~lz4dABrcR<a7Hc->IACgIMjSTS3l}(O0;CeLxTs!(J
zu0d^mb7j(fZ8`eks#tVy7wog(#nNr<z>s{6(h%0(H^V+!&aSwuJ?vT+tkeVq`nKz3
zI>B5E0>QQaN~D~qMt%znZlRDn+g?t&9fEuM?@xeXi#r52G@B)63eTjJjvVVpFZgqH
z(^3s6O7!qfaHYQ8m3zG2sRhH$LeZC8K@c7DH|2YqgIY)*(>(@ITOACGN!4Tz>%lEm
z-^cxfThV`*ydVlGDJs4t!7UVpXh#E`twbVT2u30<UXQp6hPj<vaB$P0`)FY)xPIsi
zu7JlCS9zBf;%z4BF524#cAdOWMPpKkZ-xSTIRp$VpMVBYjz$>16Uw0V1{lJg0t4#5
znayT!H13mI9tyH`Pb;~oUm{wdy@?JkKvW+NLsYwOWVxe)ODQ6}g$yvGx|L>yLqlz(
ztoMp3Ai{T4^JwAC?936IqM?$jh<=Um9pA@jY$Vid*#x`Lb7(2P_fpC#2+z9=!UsJo
zX-lFpp@OaGy=$i5sHF~`lGA+mMASC*G<Gid(_G-+{hZ`4rroX<j5FQ*S8_#hW}Ijx
z+6vK2X}+;A6KxH3h~{khQZf|LqtPvBFx~I~oo9yb|5gll#rS5pj=qXvle!>*MIT~r
zY;c!mGu|}GMGo#4oB^5t%BQdN<Kj_9HF3f1o8>a?`hoVwLCxYYE`XWAB{aEf3wD`m
z-k_Jef?;={V#v6>1$T*dd`eT<Az!^5CIuz#6eYHduDzm#YR}Ygp3W`-*RxlEtM9?y
zW<;*K(sx8gP~(-bYsp}##%F~&kY;uRZ2${j*Dcs*$L4V=><-$n1E5{my~WB^bVZXW
zs|RS4P1=gc{QCdKXK`s=Pq39U8#1lbRgEn>J{e87eU-Ly`lelll+uRCh?hpoOVE7s
zN8{OS*VWLu($dK%ynOPQI)kW?OaJA)g1fYuwB&u~a?<+*7t$lWf&+c|*%orF2U}oo
zu=TyOpWkbO%~gd<vi}>cXXr$4n7iSQ_nj2fC%D>|svJt;R5{)Us^)?rica)}h1d7>
zX`xdaj=e3x2JP>Q)C_}(bi8kHO|y#3q)4W<@xfQMI<&N(pPO||qpSM0P<SHl>x;JT
z6v3{C`@yb}kiS{*L{(N+&8qa72GJPfRP~3Zx#0R-YWkyp3#HDqEruo!0LKp4)`{ZB
z!M1M(v}BtcM;)S+YmmfSzCsOEdrk1HW@8?WeP{6rAbX$(x)22kzRSZ@at#FS+$6Xr
zXi=>WY{3T8>T%Rz5Eur10~>PEz_4ym3yqgqeVwG`T<HZRBG1kYm1>IUuEZ8P5oQhZ
zA$^+&HM>tj4eTnS`Gb9@<}Gw|Fc?ODrx<2j+d@Q4yL!-b>tNSs*TSwt-z%<(Lwu|G
zlGff2t^-5BRi3P3Ga|`%Y*OhgbG@1buH$RbVc9)2xSWOz4Q|(L7Md1bL92!$N|R2(
zI54!-X%zKZDebuz8t%Og8s<J~YKWNW(`ijLVN{U}HOEs_Z2m}YF*c8$(pytj9W>RZ
zK+~X&Xv9SJe4pYw-AZXahI>>nj8ot=A_U(lp<}~ZDDctFcKMW$ithEvhvxfB;<r;<
zB6e)2*fd0;cCDmsspsON4(T8Zi;rw2t$sA!w~zFusSm*Fpy9Ckw?`lx*-%W&hPTi<
z@;#g`cJ>R*Few!Y3Wid;X+#SQW|>N)k4C_*LurcP#*r<QDU^;O!L4cUNRTgH2ZPWd
zD5dK&e9NCtFK2)uXg&M_9nLZuJ*tJluUY#3LF-3>VZ&pR!Ds6`j8=^XZA|Smi0x?K
z!=U@AF%z^qW`Y)$`;_}3&CCLAPCaPXWwp>7jifVvm;{-fjUm&>;YdD?o{Kke`Sg4<
znmciZ8Y*^=Fc~9DeYfdmfmue0Ibb`!3C=H)^ZlF_Jk!kI@zi517zSlPj~IEpGS)Z4
zAqvSw)35=J_jR&qcs#eoGATOMIpi1z6+6H|eaHE3eX-f;!QjyQ^WdXPo>c{Sex7fW
zu#M<3c=iv>wPh41*d$b8mE&6|_GSt%ptr}vtiI24QZYlp1m9!9sr1?eFsytl5UvV_
zs5wPAYV0#J&0P7Ao|_2HlMV0}p(<@sb4fO~TW}1mp9IR0Ill_Zxy{zWW(0!hzR93n
zY|@s~)z>#C6;rm+*tWFvde8<uhH@z^h`FJK>cyO6y8i~a?G6^bZc1<kg-mH7F{81u
zY33A&?w*N+2kq=;O*Ur)Z<@+eQS1mSr#4q?3Nw)cS<@hH<>RolHb1yLx}v0{WY9pL
z$!z-a6iUel>$)sh0@gXrT4K`9p+C7tnOOjz347AiMF$F6Ou^yQqY%Ou&qf}ipj?07
zv`!u9^}-fhCK`L58hO59z;q~B_Y@Q;rsyf<H8nGQckNBT`ItHsL1*_3COb`w>C};)
zC~85eOk}@@wL_~?BI(28VBMw0x)U?{rw;G8D1OSI-hHRw?tm%SO*)9&#lhiSv8;<_
zZ!EN8l}w8XiYd4oq37R2+N*J^QX*(jad6mwA&zF>5FF#bR$J!3D#dWF_oo|6f+NC9
zATLJCDPXZf%l^55LsnZ$J4=EiUGwoLZx@aU3M#0>-|^#<uxchr-lwx5afDEZQUpV&
z2Ljh{tlXu5fEn&W_(t%*PAY~w@Sp)G#mo)wTwKEp9kGVR3);-U>(cj3C>^O7hS6bW
z7)hr%$QFUv;*H0W1VaNT2b5P7DTbw#QU=P`X(|W*q<IKjyy2Ty(O3gB#6P|0YQ=D#
zwlZZ69pqpceaj3(vF7)($-sAloOpcHR>klW#g>Dz8zpg&M3WJ?_~PcvYz)cmwqW^2
zrcx=bV#-<c90#}3+swdkEpR%rNvvV#3$14>20UwC0m`1_;$X4}TuZ>fsiPPgGxOIg
zhTSxiDSyGs3<$z#BL}fE=t(D}ls`GZwKu2rQ0jYQu)nJw>&I9^6cVvyti@gXVYZUE
zgvQ<oa<Kvv+6rlnJN%b2L^Q<`@;L2b$ON1`NALliN8mb#)zA~B(3uD+?4w2{^dA&I
zBRJ9-*oCrZ1V=dfQUyL;r(gpt5oTPDu+4@ONKBb@zteh<;H?SFXGc0jk;6R53@mZ|
z!o|=0+aSFX=q=QXgCKI8Kokoq5yc!VlimyKV}f!DI1*Rw-ExhREh+)tFTIwza%d+4
z7dBjISn(9bEJK)Zb_2&oV`ec+8R=M09jd^vgL)uHd>;&0>=g_H5%%wrGy{QPXcwtz
zSSYq|SFV0@g~FxLLY7iRYY@0rfHKxbnfi3bvx;&v9cIddbc%x?sBJYE+QZr6Yz%+D
zSd1k{skRrTfCQd7m4k^ij~Ob!z&_9J!*)*m@QPOy!+o@sDPN(39DGjSGQ%%m=xt;8
zjk32WhRZ3o29(8=#KDg=8G$P@M$-@tOGB>*UMW@#3ABnSC(v^o;8Y?9jWW2K9JTm*
zf?OQ@jS@K!y|~a=PWAvK3`7gLTtxbB-mw^g#<=dZoT(SkMg*=akRn*Mw<s@C{C-6_
zjZQFSJ^jhSGZa1(3_pQkgvo%N7~0lH`%G61gJ~>CP+Y4x_<)uoNc^fR9u~DRP*IXq
zRq_4o_nEkOgx7BH=Rnx`2pwZ7ztMRFE`E|^F)*ALO!CKDZ7rX{tvYp}EaK4;Cs73g
zSKIEI#`ZBo1;T%=8x6OUqC;stQ;wzAILM=;%<u*n@@)+JUmd+oF~pKy56Ty*7YCCm
z1A*(#tMC{pQdDWUY4%?~D~5j5$doQx%RwCNWQM_5yT8I@r~;*Hf9nH^A)n4NWhKtK
zBbY}$5V)S`i(O2hU=7T$>#c`ID+X+KXUg|!AqT(H8fM^!wXi!uFa#p(K4nt6V(3SQ
znQ|PR;-Hk;&O)$M2G3Fof1RSK2%LfaXxc1M*<iUt)?15dD-$HsK@P+cOX&TxQCsj8
z_GzZZeh52!`uBe+jsK?D*`R)yk~r8(lQ|F*D^ln&TE&pB={XJ}>1_lqzNtf1ZBro0
zUbTs3Fm6U%bHFfw5)rtPP&5?-S5)inxSZQujeUI-&1A}Pw48$)+Q<xD&?LNKW&<-k
zezqc8F}zPFK;mpWm@dvi3KUTITyRtk(X`ek2UkwlzE(Y1afm5)E{av4M3wm3m#9eB
z%|&%9=J;hyLue0UhtM$&hS7PJ+L(-eND#xhi$Ar$^x?R%Od7tG;^%?#d&=Sf(*_7!
ze10|@1k6y5aPwUo{!k2e(R!wQl3wFr7ae7WBVdTNG5ophAPS?@aGvy=Kp9EBI2b?~
z2*e>}ZiAJEHUq|6HC{0_GUa?)%K`4wVg^2QY-hMNzj2q+aGK77gpV(GoR74Oq#pCZ
z(J$4T<M{Z^T<S!2;eHyB@X<~V=F%EwxTgujAOHM$yJFZthnezqI>o_9)OG<FegMOG
zP;ex;rn!7X^(~4)%!WqFsrX1-M2K1PP+GYFxg}<{Wp0Uy<Vc5DK;kQOzJ*jA7r>If
z#ETXCNXN63v=E3Nsqsf-dF?`!hO0Bs6*E=qtaJ0T<CN-Mw2H~aJ`=7S_k0ho<II(b
zLVdyH>V<IK=QB9h%p`Fv0@t_X;vkk15xDsIjn+;XLvC+=?0On|o+QHxn#q)6Wi}Eo
zaBO6THzsJ>PuK{?NrFm9YyHdm)rvu^#&VKiS3#uHKc6Dp!4Zk=kSYGA3a)6Ue)`#I
z#nG`4lJLRO7-s|)_f)vvRON>H>u4zg*EmoPw^7a?6U240=D=?a?O{qWpCr?DGo5FM
znB5KyWSYfA^43J!dRfTu?ZdZp`K>KI)H5WUV&3W8s&ipgal(Kp_`klSsI0zsVSSZG
zhf9o}9j45v7r5RPc=t+y=3WA-DlD>K6uZ7lAAekwR!xrfUE=x(#1y`84!c(CdKYP>
zgQpZ1PnliRP*YP`KLvvEHy>f+m({8(Y37HWUBar4lC!Md-0&KIO-P&xyRoWv7T>u<
zCysXR8Y}54araqKX_-_o9s6ACDoPL)jv1wV)43Zx|5fL(R<`fm9YR9w-#2v%vEi??
zqb_=(x>0hFowOVEKH0ewjR^{AYiH`&L|>)n`*6pQ1iJWDXM>La*tw0#)c8r~E5vu8
zj1X?*z+F(dh%v+^!0ja%N3ZQrlNc|9VsVFNb`QBiio4Iz<!V##!Lyx1L)gW-!Vv=6
zOhHW}9cKw;Ery$j8KqEKyci{j-@JJOXeLJy)cl@1WmV~7I+RaTy~bR((oqD7J28S|
z3T8+^7&ZFZ%Mb|1Y*Q+qt5djwi$^VCc*my^Qr@Qw1c~QCsRkr0Wo&U9Zgqteel-a<
zhgljn_6vAk;YQI~ma>|5B5=I{N^XagK6k|V8x`fRbe1XOsKd<&vZx0F*BVs*)i#C?
z)=qg!F^B~z?$cmZnyXOk@Cv0ZHzSP>)OgAwcM5Vs2lP-}!E~6(XVWPT+|>3Ks5w%L
z!`G%7W~eN`gFi*=c2f#S@ch$M4(_FS%y76K{W6mQohdCOZqaXw;Q(!A$|H1;gAMd8
zGwhm;tTGw66S_S5Z$B!AJ-D#~l$|MwgK(ORz!g0Qdx>ofM^<d!tr%{hRZNLXGdNf!
zg2em5ppuF;RCd!&D+a!IkvC0qx-;%;*EhE*9Cmu#1}S$^A_CX8dDy-Tl%kxRc`Jr&
zG9|)jCP?_!$rZPumD`7#CcwcLO#aixalGw(B#z|B#7z^BeHZ=7i~)@laeHutYu3$j
zXR*@Z?$#gPB{?qA*xSLefvPw-L`yliKuKSl4bq#mhapLHjDsn39)T-;85#x%V1ex0
zvH91+(WJlu6n_UO*H9J$XRG_^#yeobg8Mz$!4<kaJH);v(Z68Ecj+hs*H2)$5`?Vc
zDuj0(%7K$f72&u+4wQYV7YF-k>Q`{Yf+cXoQxAgD<Z6xZg^@?N=csXoKbIPr{9ani
z!B?~sfotlEn)Yz0RKpB!o{2xB7{u~;B%b`l8n$dj#QJe4B`-yzF4lD=SnN;Z!OX&?
zko*m;LEwr%h(kCK#Sypy;nt;(?NySCaIFR?Z=q8hBv9Ks!SL=OPu;)_7vEiVpJE85
z6p%P_X(|V=(mW2tjumM^U)su$S~|!9W=#;d+J1yLn<0TMKrd5!XU#ozO5k*gy-QFg
zanMMU5xAxt(X=r(hHWPv_(3r|L#voFfu7@F61~j~ZyeP$u1}PPl#d2}uNbZ-$1+fg
z8Et9gd`bkw)%KV?LBLErK%LqnX@O#Tfo3w(AX?7BqqLD3UOujA@L-{Z8BU+_dq*)0
zq!Ubu+p7>b8&A;Z%i(Q&eHC}6S;aJj4HxRH`sfQ9yBr)p;aU)IwE2?mWRAEmJrfPg
zQE<#@RTuqf4|7bUV;sz<^US#HOM2vP6v}lJzXH&$l*PebszBiU_)F@z63B08Jwtz^
z*BBc3FZyH!bn)e2s%mFlWB=T_6M@D~8c+J&;7F%l2%HoDC9SIi2lx3@eC*cULXFG;
zx8`62?PSKC|DtTD!YsvEhJH>RRt862$+xoE#T1vs^vXczqe<Uj&o@R_I=IW|WVV)H
z>@wobKN-~qy%X)+*><#Qmtol8wC?N9tthI*7`nPPqpOK{{ad3Ot$5cM+*bQVYh`dW
z>Bkad0c9UBa7)Emy6jyeiguqgf+(=exGErFK)&pdjQvjUV#+vegc_^okG|5bEBgzh
zQ+rrp3GMxZv7atHZgj^~)YeCg=#YsS*$D$;dZneMPl)e}5a0d#(b+eR#dKGN(aoKo
zl9dK+z?grH%g7p^nZk!+v~#AZ%^`ErbBEiwCXUG+osv6de0G`uDy>2|vyU5{k&~Y>
zJl`W7(CoBydTOtc=|6B_VhWvjAD78StTSQ<Xg{4hd!^=Y1R$Bh;UW?EMdEiEelGl?
z@QcPT2EV6ZV=O|_j~jOnD(-iE#Z@uAvhrej<)y~-nuODGenu-T1Ox_tq4@F3fN}Vp
zJKIxph@3bbqS4t`gQDC}YnZSzA(*Pj&K!}SUDHreIuCLrZ9)*>0+CzNFsrV*ALImU
zZH(4{FNV2Y@$)xf{(xoS`|<=2EWm_4l;VLcg)XLZ|A+`H=4B?3Pq`@A7ba|0M-OZT
zU_4JV+@|<JlWtI^hwc#*76J>EygvY|1}~3^0dtF(JZ2K!WK;5|NeJUD<}D`O*EXYf
zn6UekJ#?=F#s+f3uN3byVGr40tqnA6D?!LbQxU#n3Vgz5%s))#4K}(XCf&a}czDN|
zN!`kp3g4Q%yO@^>jnE!ExWN!d4JLRT;_WlMuHf;RIMWZ>!E^HSM~ssvN$XO}OZdoV
z6`pg{&MKNfWA(H#5rym?_H1w6jC&}UJ?9jxGgLhkChL9$v&XG9ZJlm}cTUaCn^Id-
zk1KR5DvPI7HE3t2lU`(8<6W0@k<qQJm|j%X(7U#Hx^~uyi!a_W)QHFNpmCWWWYIH4
z##&ERbGh^EIht8))pSiNLt1pq2=-8SwJO2usHn^crcv)25hhwyg$t<iJ7Z8=?V_p=
zGY=Shy^W2Q##S}tV<YAi98ko)*`m&Mm{sV?pN(*e{KDv9SB*NcHSH=hwm2@*%yOf&
z<tnfd&0pPKBi>gf`2S<wH*EX}mY$tXoA(;2{kal^Ys8`A!#sjE5`LF~-zfY><ClqF
z7Jk|IeT_65gV3roucu8n8WY@?RDY<^q&E)-HP?XYC=??Q=HSOQm#o!7*_&~*Pj!uE
z!NLK<NRywtwP2S4h5;pSqSM%|WAOb6jEzPe&_-1t-A&-t5-2K_s{mURE~m;sC}6!z
zx+_dNZ$l?HF|>_w4d|0G!JLM~2{2_01`Lm_Yur%`(1tXP?bIehWNeebls^aeMM8+G
z?^6tVuhKBw=>k_&-%x`q3vr8?N}&;o#|9e-n7oEjv*cu0{CuljQ+rCh>`}V2z%w^t
z0Pd&EC#o_!xW+PoKkGT^+ZcQ+4#%8lpxV)%Dx)**7n6xN4Wx|XL4krz2kZ#)SCE@@
zzndbso62<FiCyhwmul;Z4Z5+~h;ZZf4AgbSxavZBh&OM&7q5yFpA5zG1(lU}WVhV3
zp&e@W52A2n6+OJbOKh0qB}dtCqoL6&HAc95CInVNUQLn59dRXz#`kVAP$|X*Y8o!%
zWYvH+ji|ec67C$RqS5FBcZ)ZpQpQH*m*aGPzEtO(GE033yvqxxybQOyX?#N%cYwTQ
zxmPsHu!s$)s^<qXtwMS`U?`;ZC1?V?h1}CbMjON4%jjZe(_P@_?#OrOQYhF8w+VdP
z23!T$!)Q<^ftU%sl|A64Oy%c2;K}EEqH5|(<@8-sBiDKHvHL#&o_?p$1IU3Q;}S)D
zJ)fsaQ(04M(B}<?;rB2csBxqo$DKQG12=l{v7%f!D98LKCz||A^gQcD?=5Sy7f#u*
zl_QIM;K=5dR~L&q^F;u^Kz%h3iXzM;e;KfAA)>zt^A^6%OSnqO9R@|sLFp`JB}h@e
zGEVu?F2LkPy0QX!$CTd-@*^*a*y~N?+Y7}WFB~V*KOpNYx@nyt%cY&8A|te$+w4_X
zGp$Y2`XN+Do;>o7-aB3@RP;XLXbDfjBeR9m>l$G8m1c8w5gZxy=4Aynb;C=@RcLg=
ztr>R94|$1=GFd+nVrSJB<F<na)DRW8_2kmh_75*!Z@C|N;W$M;hfLJLX<~0ioSCKn
zmOzbrnhf(6a@0$R>M(u>U){PI$7NT}Hv*hL81&$LBf|eQB+*|lw2B(!X|R6;3{8({
zoRznol`!*3a@HuDyCDX3Twp{ve=?}&0^>^Oe+-PjjVLGJkE}3^_DcJ&(0+Ki-kF|S
zV04T)4{3ZYF<;rq38b|AfrfH{Vf17?&o~2X)jWSF3jkDkb<&_k3!&97lsYa%Y)*oP
z++X^+C}W|~UVi5P%P+$1G58`#%*<8ShghB+fJrJ=i>DAQwPQe^Um@E|?_}o7F<WxG
zb6&V$H!{*bM!LexNBhV^!Co>)8v`+xQZO}$<1w6q*@s9B^EQs}J@pvZUU_Jwj&5CL
zP)MN4myU=s{q;+$Ft;dvsG~wqj?v~Ljb$g`Bfz$x&cYqv7Ay)}(UqFUGhT}BO2CQ$
zi!ou|_BbHLV~;nEZw(mXw)Gp^82uq^GzO=y`C$?KlE}4gB22nsB6vihM#;lV@uKi4
z$C}BxC|h{G#2k0oNS<p24<MN|tkE!>NuhLoqY>5L?28uF&THC&@MRsIh$uO!p~AgG
zu2%}vAs8K8TY0fpAHiNiR9VUZPie*6{Ia^5S-7XSj7x@6Fd~!|-E0`sy;IKclH;M}
zgy5zg&pg#s&(6nV43}0z;a<GThAbq&EvWcMi-a*{<Tq5{hGWZ?Y?){^we!@&PStz^
zauW}XY9ikr{}?ZvD$SEvuB<(&=2pYy%nPLzx1w?xA4;2UHKHcuSw>m4YZIm2yacOA
zO=Uh@_sG{iR^z7kq8V(g^niL)A-5sIHX69U-Pqn<GP#&chdge|BL-tK(JUrqCalY~
z9@v0jNq4u6uF|A?&qh}riY+-TFx3VeY%;%RGJ6}9;AND``T9`WbvtU`nW6Og?HF*(
zhO)(wi<J`0lQ*p+uJw|kJa++j&?;79Q|PQ(Gjb~c`+ktz?$_Uh&47JeM2F(1V5>|z
zo~Qzh9>2rTGa2b^{4g)Y%6P8e^SM=!H#y5!7ElD#OxoiRPG4+j6+OtRoHqf)4BS*H
z`=6CC6kUvPz^=S&2wI>$SEB`bV~G*$zk!8Cch{ro*Cj>=_f5TdKc=kk>~mBK_72E7
zgWf?$QNgyF8B}BA-D$#ZvB5qxdGE5p>~mR=Cnv`~2QzonHbIe6VxN^$uwOvOHS#%=
z&O33(dL^zmF3$_6QmlIz4ZYJa60XENQ2wN%+6G*0gjMk-#bA;buPQ!QhtaM(QAB%%
z(I*J~dWUHgJ6;c=z`KmL87z<+)WQal6{-iB062r|YpbC|mczl4R@_HuRx1|nnlPGn
z7n=9hoN1fK;8CEpccFP#pGKgkILC+4dm!ch$9H!bU9t71<1!-}LGNWo*YQFJ`pmM0
zxTG#fV?5A&|GGk|L{e@gUY4^}%)_7%8VngF@v@w9=;26<tw;;>9c3J^WBDa>Yk5<G
zXrqM3N!)1Q<Z`;~Rp)iqv_98q+DmP@H{Y}b)iucsdwtc=6xY=@4xR0VRF)Qm(IWKL
z@N}j$<}TDDE0BuC$W`>_x=x32xMwS~sG@jyT}@S5NogT&Kl8K*z|*S9dYAUr^yKs~
zin!Z|^J9$TdOgGvT?$n|V1MiSyN%B7!DgYWL39_ur@O(Y6|5RsHw@!ZnS^=!@@-x*
z@y0FjfxFX=lLhZqDB`i^mFVImU6R(z?F>z7vQ<8CtG#f__ZLH#e2b0q*)7vs0&$@j
z=E#)8-NuCZ)@yG~6hF|SaPtMTCiD+^(JLLd#|vxZfpmLE{C0+OLn&W{gD_<%UBB9h
z$QF3pTX2_lFCOz!sA9i^>-af*eVV7pE{3bzYeb|i@zTk65lWo`Zqs#XL$=A%MO*GQ
zy0l6|9uB6h1xB}IId&|not0l)QD2MscNAT3U&%todpdO7yf23#L#yR-3l$ila(_$@
zDN62-@yz6_37#B}7@qpSTx^~(hpt@Ll`~5d)wv#?OT2E26(^oc^)~qhAGnw7ICm4q
zZFXc6+zubOzxlwu>I3KA<pa6f2ks3YxHr9UoMHDP!?<<kC8=Db7Q;56LdoKN6UjE<
zMg{^ljA3;63@RW+KLvR)Y!o)~qYhB8A`|uoSQM<ngx!G3O2KMO*cNn!6l|^uyUa$n
zz=UCxr09x`#U`-812i5$gpOgX$2A`dbed}ty^7>wvTY|@)0LTKSDUf!@{ng0<8r^V
ztsNBgE>=X-zcuDGSvtX4mS0xuqX&7fmY77bYmFF>psu_!)0P6hFgnvSYmJWdZkf@8
zSBy`qWtYOojotKmxe*ql7H!2+u`Nnir1)Q~8vpMWOt`SC#cSqNHuzpRrpNahW4u?5
zC-lcPkNyUlVr>Spi&suQgcnxg?`U+`st{J=$9_<Y--C$$%n*8HpHW6ZZyCM1H!Z*)
zm@ZrgV$T@w;V`ORkD<t;$bDY;;5Ckkz_UBA2TV=uDcDnhC1a?b2D21wgUQS59tySz
zFrKpKeGm%vtO?_3d<A<^$_o%%tN>iyY4qS`y&(GUM%4e{1-=uE3qsQbTWCu#F<)cv
z(|lzn*dmc&!FYM5JtDLj2G1{UsHt1XZK;Gk&oJ!rWSDB^wgZM|+0<KN7W`!iSFaU%
z;BJMGk3@P960Y7xY6@3R4K{_V=kl7u)k{ZB;gxQFj0{UWO%dWzsOI4Oo?|m`^#o*7
zIqF@&rf~JlWU!b_=7s6a@UQXkkx8`bDWh}GNpMH+w&<c)&Zsu~Wwhx}8*x1*o9=tL
zDQXc!DcGw>Ocr0pFq-+Q(Rrvs?J`j-O>%FQZ7}}f7F#-lQn@#bzI)n;h<J<hNIg7<
z3P3r*fiQ~NU>FhadGN*dF3$&HlnuOl597UGy0c{blW}+e&wM@4f*<wBtSr7HiS#D+
z+8w)dQ=Hq@Sr73-s)!tCW!6h}P4gqHSsvl(o=5DO<YA<Z2Srbf3Qnq&uwRvo*eOH4
zX1wj;JFcguAYXVt0p}d{_%u-zV^idjbDHJM!4nY`*zrQ)&lv69KL`-7l=2&2O~qX9
zB}Vlv&w&R$t%jPp{3@6|k-Ilb|GPy$)9Q5pDfM=?mS~lC{S`)opM`h*VHw76(s|-=
z0d@DY@SZbDXtAg8UbJ|`Q*}+!B;Lz7rO%1=?7673@D!YQ^h+j!>=63{Cs8vBtN9UI
zcsZwi`*5tv?#|`O15CwUl4=RfDhh<K)SQNzOfewM787|YMLPnA;qbU&spG5^vS8zD
zGi&BF?a@{xHLE0uEzT;en~}l`r-kF#Vl@LT7IT|@V2ooWW6VpJVnVafvjHYJT&^hS
z>2q0}n&vTQV!Nb?cOw|%?F(sM1}X)eIi@^ai??ZQ?oaD`>s@I6W}|JYXQtS5Z+ME-
z(L_Urmj-255@v{>TcJnvyu_kRFM6f@2TWJ3U!~(RTq|m=*^jtpN-eC#_OOyvdFOvB
z-rgvgd&zq27%#Eji7?g+r^<6A=MqYg`H-57mfKpw&TE#*WiqMf*Sr-?G<oEX5T$4}
zB7)Smrs%wRZt&u1rxI`hR+wHiqOw(%MMH5_A>LL;g0lBtj_+*~(IJELS6$-#HC{qh
zDGS8AATJpaLu*yaX5oTzTqr-RX70pGU8liIhRUeN3`%$zbFR-CGy!3R<*Rs!i)E-t
z_riVmFQbO|6_Z#Z7l)gdo?l|?VlO?4pH~OOdav~ck}Mm25O2H#QeTqKZ_wc#3drO1
z*o>O^;mLCE9?#>NiOlOPyjSOTFD2eNu*3)MP9L~sUN|*xThE4P)MwXJ^T3}jyc*Pb
ztSw!#VZV6O5L1p-`MmW<#w#CeSVzU2y{9J4vC|?>Di3BGba96f<%hJV_*M3KC}4)}
z#5@#ZVs{61>4tQ$)hT@aNo{^fdA_&<!7BRTdSoZ^iqp})(Q=lVw)P!&3T7XKDwr6A
z!XJ4Is$ljpsDd3uhH$Hy3t9!kz8re!w9&SQ0>q#aRQsV6%swo@>wr9<RVaJ!O~LFV
z1_iT^HWZ8p!yjKS2g3@+gJFj8U|7L;Fw8I>44bfN29JUm$fIFJ$fIF~@n~4Vco4ua
z9t|rP4~7|5V1w~CEFR`;WSHoNT5+_Gc9aPFjz$HukAM`+J_1rO?xu1+q}tT;<|ZCJ
zp(rUcY0j%ghw+NZzFktm%%PMRNhuh2YFYRck07ssKGXH&S*sASMe-~<#E!a_cUhVn
z7mI3Zc>^PD`xJY;x4dSw7tFg}v&QKsUNbiPo%7yc`9JJFZ+^q&D;VoLf6-{)F=H}4
zRD#{)$qyU;H0`9Z)QN4LVm@K%ZX=NHJZTuz{afQ+&&)#i_9*ezX3RaM#_a#qz*29k
zsVNW|`-L%+V%{{0cqe+_tHa<OyqhorzexO^#uN{)-n#Hp+qmDucsvT<yp21Np4ev;
zieb5$9W#L9Dsd00FW#&eji0CQF&TA^Iq7G0f~ImkRD>QYp5(F*yYeL*PhQJjnX0=B
z6}-$2$JpCKnBBNb7_fvZ8#q~M6)8csEm@NQQwFNavy|!T%PUH4m1hOR)I)4s0aMa`
zqfG?IKEJfK0lRza?Cn)aI8ED+@lusiS;}v8b2}nUsX=?TAF~zpLTq6@)(opbV8gTA
zn!2Lm5-cgHXn5DBvzw_;70kX{S;6K)*UTH_xf2sM*x+#_m#e-gSBlVHCKb$H{up*s
zGiA&4*mmz2asEgm>`E>&)bUJf^D+t-R^#6>9-(O^MkF0OW{mcDx;Tk3zpd8JoU&V;
zGU^~5KVZb`;@Aah1i8JK4~W{%UnCqf2HU4pu+O+Okh4S0Ur|)KF@`>Q+!!I|(^}ql
z+{2qK_EBTL+cRzE`OE9D#3jcwa{k|*z|hWm4tUU}M~xis88xw|*u_i3>MTYg{E!PW
zSNGTAhxJ;GH`5Nq?>hW)k#NZfso)7?@SvtgF;xFXmF1Ht%Y#6jf}h7{7lu>RVIyL=
zn|)SH65zH<;e*nr8wI?uxY#!Ja*Ke~msjTFAtNp}rvKj(PKyp3#x+tFcuaiLKEoU#
z-+Pz-92)x%G}Q`acXb7`xAqEV-`%HR*C2vw5>UbHo1_)Y-in$qx4}E74<UX7QFIic
zedn}-*^8Kh*^8Kh*^3y%WVce~NgGtn_B;uYiGD{oVc%a|M`looduKQe|A!G(GS^Hz
zduk|{J(db)kC1}dBcx#VX^OdG?kQfO>~Ub2cO1HS;vn;Io@t@Io2X#+ZlZ$O^IE~|
zu~RU6UMrYAuT7ZSuqT5ev}d-0*^@!R?D?u-_Iy<^dzvuJlP0;`aCY@1ikTW6(2x78
z0LSj&Sj`E!$x8^!=w`{d|6`*IP5^QV=`r7wXYZIPnBDmKA`fp=D7(E3^VrL(`dF&C
zZcr|_ipx2_!kLFh<Aqx3o&s(@;HhXQgwe&1F+RAPd9Yi;dcD(DKM20unpxGw{H&)(
z&kE*A#pWpngI+b+_b_~1d7XR&RO-HuVe08mLF?6iy0rzuQ18k`!R#5XV0Q0QFuV6D
znBA!q%<dQx=5AxyQ<{O^DV^e#(p)pz=@(u?Kl~DUd$C&7G_pA<m;^R~_LwP{y}O`b
z_ThnoS)N3V=ybdJOqW90?PZvE{Khu1SLWISoNM`5m%v=G@|gL6U7iu&H-F6dyqxiQ
zVt(Box5b{gxy2GCpW#zL`}R)-vkx~F%s$*yF#B**!R%X673@J6I2Tj#(;OIgE1-Rk
z>0&v&R4Ds4RRyyTITehDl8-_zud6CpkqP6Wq=NBKlIeITsbD;kWEhVmP1rPphms8B
zp`;?@p(Mk2D5+pPlw=rcAq)1f8Odk;J+Mbj*b593!^`nX#1V+#<zhbgr(pI{pkVgp
zS_QM8M`4&8*@cSb+B)3-&*$h$%K0cA%C(QK=o?=facLlF{XZ-Ld&&GS7J<73sGcab
zI2>E?y`jz++YTlFLbttXq_x}u6=#1h!@Q;Hx^ka$+{la^Pv6t?*Z*(5(AM&pF|FHc
z<XT<N=mBaOcN=;>yo~!KO0XI>ZU^jb^c#7T%1-<^f&PY&VsA7C(WreGVArn0qWSAL
z8Y%SWJ|jRb?#e3RB@lAs@)!iZjK5#O@7&qmW@j-9;}d#49y`~1oi^G`wU_)xPsz6m
zzJl2cxq{gXxq{gXc_Xen#LPjXEUt>eUNjZVKJ%ks_Ts8w_Ts8w_Ts8w_Tp;761<A5
zBD5D*1+y1dL;;J9ZiTYX@+g>nMn}QyGdc=ppS@8qdl6PJdl6<BXDU|WRIcvx<SI2{
zmaXJwGuP}nsbKbQpMu$QQo-ywsbKb;yjf<ULUGn?D3>*<f^pU`jB8Q_<E&vA*Q5%@
zS;H`{NfnGMQigFws$iTm4C9K_gbfxuR2W#`0UGu+RLu4?WLR@m9-h?ZI?rd7mwnFY
zn)ct<cv9qlQRQ9U&bBk9#Y!*uXXA3)NIa2mV!$3`x%=ZJN_Ep}<6FR*RvS2M^FLG@
zm&t15YY=j^@d+I48~pt}e&^0^OAbBIX&BEkmEkuEzdZaV;Kvg{47&@z!qa}*r$6Ho
zlRZxDO5B7K*xFAE8uMKvvPs-?q5T`XIB`LRc=nAi30;T_DsUmn8U(JnU-L0EdELcy
zgadot(NQsq=hY$|pV291h^Dry!0`Jwys<?xz({R-%5M`DgLvmA((xuu1*PLS&0_}r
zJG@S6GPFnda`fI(#em&;Oj%3^IdIdr%s^oH$;R;ar~z!5h|8T6dk-jA(<%<0rsp`=
zC4p~A;HMIJPJB6A|3J^*gUE}AWcbD#+%_NSm_~^RT#2VOZJVj77U7YThgh-D^drq=
zO7Rv0U(C3X8T`&5`Avobgg?Z%b|4T8;t4yx7v@jq6OUWqw%L1eeT{gPK;FyJnwH)R
zY!p>-kRZRZ<(GJnfPu#t_br`A;OhJ<pQe+^jb4lvb)k?GQ)ui>@%MpppbU~l;K~3)
zPaDIpu8J=eLp`l$h82>5B*Qu|pty?*Pguh(uSP6Y3@=N{T{3t}28U#DR0iUC2XP73
z{Sf^GbvS9>W+Yy4h;$644g`dF1OwOkP_V!A*FR_x;c}M13wRRN)SQoS&R+{~a#zOf
zL0ZUcn`sRPZ^^IE=rDiP@|z>9tL+0wp;!L&T$Xt&xO@B<(_2Y-k5WM55RVe@%{90w
z9rpx2fZL0L{*qTx@HH>Dfc*U2s4a@C7j0#(A#{*~N9bFj1`G~#UKNAB@^9}ehF2+e
z4Jb!Z5(oFtWCX5lpq|mn!|>_icC1L`Tr90(%GLB70%xmWdS?xMRNTid{iF{$9t6Q3
z<l-Qk5)ruE;SMpo$tG~=PMq5JNhNVJ&16b(QGFyXJKTt`sK!`_b~`ATVIjg(BmOg2
zF|>)Jh=-u`3i^{78!7xDsCX0?n~pVO$sB2?k3Fn7&d^wpptMzSaFmuZ!`|)=Z4W4v
zilFi<<|~HVX%ACALB}|FoX#`DOFbRvM0*%E4B7?<l*xkotR4nsCS@UTuDFVRhlQ?3
zuX1P&Hjek(-OxsHbf)#p(TiT=U_2c~;97u65VOO=CQPnt&P3;Ris7H6uLWf{zR+GK
z^%}}xh<E}<LY7h^LpIV{4))Pb1g^sa9pVr&M|3KrmH*%jR6={uS*C2H4v!%ClzMQ`
ziwZf&mO%|I<gY!n27xPlutO~JDPi-czWGljYz7@>%D>Yo4nnByqhKhz)|(;SnGd&@
zskW0+K;qD8DhESp9y81s;;?R`V{_-flUuA9*3nj`e3uS#@C$v*3|*5P+Et)XCVhIL
z*eXWZ6uS<T^C^jgr)V+)*Wo4%`A=VDkI~>g+67w0l;Xh{zLoTCW|(uG!@7sL2t@n)
z4Ct>Iw(v_zk>+zn5vbB!-{79Hob{+Z#YO%yW8mbV!u7SwIf$W+2wc~VQg_4$72x2w
zmNK^2Atg4QPB5joRiAHG4u1>`exn`QR#0$E3lUBapR!#sh&LM|9ZRSRl#X~>$_#ZA
z9O76wGb{n+(0NH#(SL*XFy&`-jDvhS&kQqeaEN6}#qi{7$N2IHku+T>{&7%_rz{SB
zrV0eEgsBegI0zI&msg@Wg#-iM)MLspdX0mwbd(t;7dtevG5EK7vqUkRA^izZ7Evz_
z{-g{J#3KYU6&F$?L$=Xc4*o$q5x9OVbzs#*TENwRT>njI9Hj*%be1U>QHLiHtfn3a
zTu+pF{Dm2wsX6|rVtAhlnX(%#MBpCO!X^IfC_{bV_}Ti_powX6`9JfG@`20o#&HcG
z|1skPZ=^zrl%C=PH{A!W+y}0@+}(04T6*4q4ZX>S(8WG*w>QP9__NSuO_65&8F!Bl
z+yg#vkNChn0i3&~Jly1iaf=V!HXpcGFU86Da}3|O6e;4*xcxqG@A<%e=mYnOz(Id=
z9(>#f<JUfLKls3%YYxZp=NSIh9MZ&H^nvrQXxV|=`M?F4ICpcgH+(Q&<^vb!1J}KI
zoLj`cck@Vr8{h*s*at4z2X459b9+Bs!da1NAuq+{`oK-{ft%J6j`P2`B_wb+`oPut
zz|Ho7TVTVv?}iffk7Iwc4cQd8#0PGL58S;i<J{VVEhDvcK5$R@z&+~&_ktHr);}Tv
zIQBcdkSbkd5#8kj_offrJ3erS+&+*W`@ntX1NWs5+{q@mYg8OL_NSX5?YN)qIC|~5
z;4pVk-`#c(!3>p#nJe*2ZUz_N1IL|p=53BIs0Eyx6=GJVMMj=EX-Vj1K5#KUaF=^<
z?n`3d-GjUYceM{(Umv&uE#lz1gIYw&Yd>0weX<X}G#|K;cATx~Xq*+9b|n5*{}`8p
zUrWa1`M^yQLWX0<><q0#h@W;R=0XCrVd6WapVle_!ku^V1c~`Ch^m%_M7xJ+e!)y1
zE5Mm0)nyIkyoRG-PA#i&Zbj9sDibB(=n6shtkO~(s|W31i1&l|<#zE}{`l;S)G=x4
z`Q!3(GqOi$y{}6d-vpMN?8Xgi&Y+I)+(5Kp{CAM~cSp?;%n9NLW&G=?1q49}{yDW(
zP{g$NAXI!IHW{H!WZVb3rZVmqGTJ+X$Q1GhReTZ>+3Rk62533BS6>r}uoB-vT54rU
zVfC!q5qy`4gs%oXjJAFfa(Ni6i_vlmi%QZed8wU~=w4vE)2UBzJN^Usv+wG!21SPt
z!j8*PNGp_Y=NdrOzhs*ESxDIGYZ9X=rzFHbq^)JXA74Q#E=rxdx-jv&*oVLspry|(
zDyeO#sHx5^DXOXC6FRJtBN()LPa?)xtB)mywt5(}F{{rfMt4|i)AhjWifgWN<gRW^
zFx=y_afC1}e`LluTwyyoKQCoiW_o^lc3$pem02y}larAjej$%SFdMiV*IYejI#Qg7
zRVsE0GzdqG!MDFA&SQ+k^8^;GmF6B%Su?$`Qb0|QW8&7@xROF#+ONEjVQsWA)kWAO
z$nj)_0T4JRrMS2b8{8Eg!-KSu6=mfLA=70Yj11G#s~hU(;pSJF`s+DGRIxn5u6k)q
zTTTPSp1rT9nN3&}59T7vnZudzxEZp?@?Eza|0e)ro+lA1kG<wPE1#vSKW$^g!X5rO
zwGAfYMw4*@=N$EUKBV=?-0|6Y8CmK1!^dZ*=4Fh@wr5>NR?3L<{N^c4^Pe##CJ0~1
zNgtM<nU<TMmXenuV{V6-EYF!Nab{j~lA}7q!5Nv<nDs~2a2D_ygRFBlgGc=f!m*k7
zeuGQ};!j9EcXonCZ!8Z9caI;Jo|~VOJ0>-K+_?Oll-!i8^t|-kanhdLyv+O-2!s$$
z-mNN5rh%L#xX82F^1|5^pqa-_L`e!Q)`po@zi795^`4=9RxeHnojxvgOisF4PQ<BO
zY1t)U%bu4_drG|u$FP#JifZMg!l&b4r9XVMnD)FB(%BuLjj!gXIWlnLnxbRZXEKFL
zD{Lra*Pj53;eF2SHj4vsLcxavNUK|>aOX}&3{(Xi1EozX&z(@l>HRk|eq0$aoh+N$
zS^j1o{zm8GY_=KySA~(eCAc1Qc1dx(s0>~MEI?DRet=E()4JmvW{j3momEm*Q#Wr+
zU2#cWR!#A&%96~AdR}i!K&<@`>s>-1wBTP~E#L05&3W-^cS6|pX&K{kQu0zq!q-yA
zWap(%%9Es*!jTFb%iYby5=GZ5mO1zyY}Ts(bF-&iZzjka%_PX*0Sod<5I^lrzzVdC
zDxBuYDQqYg2iw>q7+wn}uByT<ESY><<_3>T>=nXg;w;wkeH0*>06qwi66ekiZ?V;o
z{LR&(W)i*yQ@Gx{XvUiz0|ygu)~l|jGN(v50>c7i9ETOwmk8Li;0w}*^MPQrcU+0P
z=UK$4)$1Dyr&m_gmzUHHuM>y;R0|QK<rUW9s>ckox#rT|-K(@03pq~#-dD>ktS*~{
zv^B4&HmfuC;6a)R6XgxnoExdWA56)p=WwpFZeAWNFq@`0#9*X0uBfKAB%`_*+da)@
zjp3OZ@2u-9Y8b2^I^Tv2^)E-uzts6!a7uTcnWTKZIwz^U>^V)ZjDciMPK;H&ytJv;
zV!lM+75(BS^ysOa-R6)z!~??MT(?)@Wk#nq4A;|@rC}RTSy_op)`KASYvRN20p=;_
z{YCOW4_KI%SzKo(`3v}tQ8yaq6mp*8AXUAf<%*)a=2kVu00HDYO4d>uD)Y)`RnHj5
z)k6+;{#!kwk(LqH2zt(n91E6U$(Urp23m9l7Hp6OD|TO^tiRYu&5rjzEG)N}5(U#8
zfE{uuqvQK3pLiGWzRnz+Q!wvu%)sF-Y_P<!VR5G|%R2;`K~9S|o_WVJ;xG_9JP6AG
zj@@qOIP{88S~7}pn_;UEG|J8oK(GGocXBl;y$gRNbIF!{fJx(0CZIbpCO0d^tP<@I
z5VEV?3Q_D@F{Z1i?{#pc+x?}h$kLq?Ke7fF>6v9fmhPN>O<_S=PJao*S#4E%!?c_M
zO$p%^*EEH7*K!i<Fphs;Rmz7~7M9^wXqL(F0b17F)cQJ+DY5t-gnnjOs&p;>9_(g7
zng=k1ztcV6L-~8S=R29dM|i%6@%K3S%?i`BbZLSu1x9LVb$v}==M>4oH{ZF(M!Fd=
z34ne!f&zT=xdN3k<@ippeb?f9pzV7u1|q!AThTApa*OQWOSIf#`}ayMce?rQ7Ww*s
zmRrvUER}$DAh3UL0KUS8-vWX5?;Tq1AbXzg)^Zc=-}|*(6atU@L->{{;Ff*+BLLV_
z;5feRDR2_se0Q8O@C?4Mv3>uh<yI(zKV-jm46ke$SBPuaCC^6!HnOA;eF|mRZGgqN
z9hoI%g+=oekUe{yV^URBU+F~ry@kKyJ>R?eyPxO#5P$dgd>`lU0iN$O{C$n*TXW(&
zL4LbgK@bB5dI)0pdywb5FMlU`zLWVI3CgL#{5j%V_N@53KzwtrO5$t9cYo`9vG^8s
zsaptGDFCvv{1^f>zX>IL0ZA@8t@V75p3R%r`AeMHjtime#2frYW1zrRR&DXmEUa%B
zR##YERGwGk$tLF6;a~1S^0jP?R(ZTzFk1b1$!jxYzc*g7W5Z&rkYv<<|Ky06{|@=D
z-ZeO)$N#QNPS4H=x$KHSZPcu)TG_GSbl_en_q~tW#0RZDJg_Tm(G7q1CjfKQxYh0H
zH3IMqk3R*Czw9ztqj@z5`OGG^3wG20Z$=-zH9xnQ*kJ>(%Zq!CUcIUSuU^$WK9D5l
zh(5O|#dxi`KAnA_*)l%aFIl@%jV-;K8}3vehaS~^QA34-Cf$#@f!^Jdb#1)6Gw6DF
zb#D$e;g7-hcbZ!^N?(eIS&gaitrLJ1sP0C3_3Vnen(C^O>V{!_S)5s4{0lVYR--OU
z(!T*a9;jr=()1Nzi@W4i){m>K!3B}R>3O`f*lj36*=6|}z$F$;mNZU*Nui2vcun0H
z^eIgT{|5907S7XUSsV&w>3shpxAsyiH~8(~kte~U{<)Wg=I5yvAAg`B$6HLuf{A7i
zvbhP2JrH1@!1x~D?J&{dfK)b?aScL)Lpuc=&V2;?nK;IC187d!@jQ#4_9KXJz&#i^
zA%KbOGbY@pHW@(CT01KgI)lz3lrtu+f@6dvHeUYS3MYZp-~0@Y^|wFoRc;N=#ZGgc
zM>#UjNU}5L*#RO&xO4LxSh&D|>+B<DiTxQcEV!c=ic~gz;y(f=TX(LbVnE^0#^G&&
zF{S9vRo0Zr{Aak+jSdNB!^V{I4c(?=`~m@fni#WK6)Ow+)dL?>T8i6BOoitG_cvz+
zSd6NDu!G3?yD$ct@UOL1Vl%RbkCCO$ZI)4nh?uIuXN+KBai%WQ;n+BbsceineqzHM
z+L&6Nl2DGxy(Zkqz@2o<SWb3SiLEby-YJcV!W?7E$u7H|HWQ`MHh{31{q-VbDp!bu
zW{30KSv7xs8GIQtJ!$|aN}o%>#(TkVga!Xb7S^X!;)E}kbq&>cfSgTm7_0XU?46+M
zYNwzbR-sC>oRdq%Rl*f2!otnqEv@~G)lq}4bPqM1#$T|`5|s?I`D;R7Nio;KW@2%E
zGcG{lge^l+eo>5^@>DIG%2CGSN%<pFvePm#W#O5iYXanCa-JP3YTPC$nWRmCR<iy-
z?7a<~Rnz(hzRo^VMtYfE&NNNQRA_n|Q+k<BFL!z)Nd_U^)I{$pqZcLV<n<;=h}BIc
z<VupzB}q4gB#9e0NkXn9-4HiP{@?H3`#EQwHRqg}F86o;pa17S?DpCFdDgR@^}MZT
zt-aRXCxBOq)pTLHe>(2RJTj<G#I=L~I!O!`P$zk?jB9W|O||smOBJ6mb$5;*Ye2w7
zs^G_<3=c<ta&;(tDnddA-hwiA!brTg#G687_gVuu@|UGMm4xt})sj$pgiK59DnVH?
z#h6qJ_+-wDPiZP7*Ni>*I}^M}L#{zXiOaIIaBjdSzcYM(O`&817HxP%_Zyr+ce@1(
zJREJHld;7AEr!J1`&fnUe?q}aL%D-L9bcaCKCAHEx70ig$>DZ=`ODBPCx)7x$H=r4
zBw6=*L?%|Z%&3lE+wlCt$+*k-y~6*^@(q0X<m<Z+g@!u~GWC%|p=^A;^P%yfnsv5;
zkwI5Z9EX?6M<*>Npu~3F<%dwcH2m7F2m8PF>KlIu)eSWSp9huuMt0ZAiGEl={X;0r
zT`3cAUr*<bSluL!tqqj8Hzj<WihCKjH+sxq_p=Ga<Gn20n`qoS4fk>aqpx(|+fFY1
z;r;2jpBvyCm_`pB<i4~0yKj3ziZ$R=PGbP&uU|k1CWbO~$&aC&2!;2oFPPcBx6*j9
z$*n{t4rn;(cqIaZ1Gyp`xzH8xU9YWxXYuU=gNH}2>){%&yp3Mxp!|rz<F1%E%%ee?
zr>a4%a0B0<@xwX^=i;qnV@ig(4QLAX`E;nM!F|52a&0K5*k#dctO$@gO;?lro1R%w
z4H@{-&#)`qConGiyqBdWMQ`$X9sR1gQA>`#^V(432u(#?3C~R3>`JbSt!oI;g^Otk
zzFZ5wToZqpKbe&0Ry}FR;OhnrzTygRP>y>wFe>LwPh@8RsYDN+6sma%e{NOyWE`GO
zQX~1e&JPS8Q!*x|p~Ao<d<KeND;ehQ$q>!;<o(g5?-vDh=OdxaNI$}1M0_)uh?d$K
z5Qs|56=O@(SsWO2>7W?f(fjk{zOUF;<d^qcmSb(?{d`C6yS&#`XX54@-gMjJ>5)QP
z?C1kvFwEae2{+r}_tk{0^kMw20h`W-^Zsf_cm6q)U9~mn%inrO?;006y@?wWaFa0w
zYYO6K0kPsccCEWy7rJO@Tac)XATbdSl#u|q?QVyAS&U$=;5@duA-+xk_uwjYVX{&U
z_i~&uS9o^n+Bv%I3L)k$d`*J<<?5fV3$=?7Jr=zK?%NZ1?e)H~Yd&mZ#OB)-=+tbn
z*^QA7-?Ttgvb={MRD~{m?};06nz+{jN4C}#H>6uJiNqtOsqTJ3R)D6s<zkH5*HW=G
z#49iLkU2J>k<vF^AF9!%7ii<vBsH+`-c^wYQ)(ivr9oc36;yA7U*2<Z^^MnuYIf;k
zkZC9&oD-2LGRRQYF*2bG4G;~+0QEIM(Hh*!`(FYO&Fl*}K1*@3nTq<_kG_O4Um&ca
zZ*m8G++5!z1-=Z|jvqT3RMdIXLbcf|rRy$7L$$FNXz;X9P3#OB!>cTP=d@7WKt27?
z(NNtB7>5U6iEmE&iEMvxg*fcq!|T>uOXj_%0q-7WU|p;IBksXddtZI!Aon%fwB!M}
z*H2gZB~-WS<@hOo=|SB&5^Cx`tB1neM*iS8a*Ym557n;2HljzFFoG}3_n#I}SLorV
zyPPllB~%;lX~nB+hQwwN(Wi{Swrk9nc(dw^#AwfitFOWrB8&ilE(G7`<4w;CbAX4Z
zVPnjDEL-4yxp2(*;XWSqSctOtSRHTHcZEU3p33b|(N}phOGGTv7aR-KV&~p33ialh
z2rUK~x;BbLJAG0bfj5leRkesr><MGBtpxZ6z^$1>!pZnR*BC+fn@Bp-p|Wf7!Oki|
zWDpcELw|b=3ZJc?zA;p*Ik4t<?_L{>l>x32NpYTkZ=63S<GscBkbS5YEoiA8{A;Lg
zr<IWEuo5h%z|e}CW_Gq(mGIjMZouO`W^wCr4{<Ddk2>6_2j2uPHtTnP^=NL@ci-gw
z+J0Xk)2(K=ei^s&QQ=<X8F<?v`kT>F77wCvxF?UPqxTQrcX?)QN8xQ$H^VwmI2}gn
zB8~^IzBJjMjKH9h9(cE$|1>*FAPe`sN67;5G{7=BfVpEg4(Ym$;JXK1EzneR1A}5S
zM!pu@>FNBypuQu<T|FrVjS(y$!K=p&#keXuQzuq>2<}iA;0ymCy+P=Uk=|3LWTa;r
zK0+u@g#F5RzbJN}4>%e{3-pTt&XfwMh~hMT?B-CWGbBxq4dC-fBhqx<EuqX4ilMlY
zv_X2#LlraYpaRYcwnoGrjQjF8IZaQwC6tW>C_SSgD(4@^3B10{$1}$b#379JvKJZ(
z92rwE-!fst0h4hArOQKb*icRdj06tKM4|*aqLM;HVsK?a5|%=v-AbK|F*vj9lSlSa
zNetIfS9=xsWxWxb;FF$5J0ip#HO^w7G|`}%AVk>+FksIFk!cc-Nh7Exp$xl$$vDfX
zr84?Iw*e8f&g-aS;`drB!!{>ls$Y;q$yBn6Ib4h3;Q5MQ@pQku^u44njJif16I+_P
z$uD%XU&vGmb?=Eyq!v`S`uL||0fVQKVCYY{=P~N_4b6SnOn>-sJK%U+#OJ3C>5Q)X
zdNTv)e(<eiSxd!xm34zVLs@;nbA~#9(pWrrg+t<DM$Dh^JQbxbV*WJ(?&wlu^*QPy
ze22@U#)VKI;mmZaX7zA-{m?9olD@t(bXpLs>zcDe?JLXzLb|?SHab+S7{sYgj<=S|
zqvN|P#dj*cTpy}iJy*r1#W+FK9ACQ+Wma_;1<~VqFHe6vJ5;mEokr#Pto#pk-0=r6
z$ghL0w4&eV*Tg65L$#ZvD|{Z3qb$GP5}=)^N=70YM%Rl`E0y)jKyRPHF*m-B#d=#q
z=mm<emL0<&@7`<U-kXS*z4De9$?gJE?RE7zp_+JdfaL|_Cg4la9y%%a)a~a$2))&i
zE3O>hHM#_nH4t2+#{gVqj?uDy`sF(iy=hdQWd~FN0t;Q>045(w842dJ38PgK#^0TM
zgSp9<2E~od(^rTX*a4@j(Iql}Y~F_qO1cjnU-ha9S9|F=k+bwQcZX`8lcR=A>gTuH
zRUxh~Gp=(9=d7l2uQ_q7d->S-jFpb93DWO^a-cx}ayKRkI^vFJ4t~!MwhMKOdqTA@
z?+X5fUwPceyrrkn4!KWR6sZvxO~8t%5<frVy?*Y!^CwQ=P2VJVzgRzX59~`DS{R{U
zz9-bEA*Y|H1#h<4$@nu+C}nZ$S(*C4Js1-{fcp~8@5gU9Bv9zUUFgD`j75G~!aR+5
z0gxJ7Y{1A7%kjIoggxxTgg#x&q`xE+OB0FsXcW=OIL|i$GidSNW58kMBtQdX)0W~M
z%RlaymoQH}G84mf)Du4L@;KZe_9E*p_hBj83f!0Sq)*|Q<yQi}#_^u8Fi*oKjiFbL
z#J33tm0T^$J$XM}O&D?Y#PP^v$0G~yERpv!RP_6Ru1oS>mO@E)qGyo)`o~aJ=4m<l
zmmfn}{FSRez0doVuYd7>73d58?PXlpK7POVtEay4-$=`f^x*puye`l;+>fgZ_1nC8
zk$#dl`|7}9TwSa`<<%wn7ogO+RE-_vzWot?F9|h$rk`%1y?TrFjVM-uH%I7&heLJU
zpC$Sw`8ipCEI+5~U*zWueezF!u{k<04`JdN2v7dz>AS~;YU+;jLh0^k0nhC4BJ6Qv
z2m9-g84=U<<?}+d2P5!%&jyU>Tg$)a##zG^u)KnLz;oMJqL!sb^thHY-@xP2aJ0i&
zWw4T?Py93Hf=s`l>e-)i;22YXQ1K-*X3XaB$Gpb2gShL9t9aMffDT)<3=ew5_b5+J
z$MQbvkW?tBUUDmmEeDU!y&Uy7z-jO6ah8zzlkYEh-*yemsJV>O{yCs$V`yhQvqH(m
zcuh-v)uq?P$=Ki%l#w`{hf*LRGQrpG#H3L{z3vut@R1ak5q;PEP<?Nh`t<zJDd<4o
zoe!<Q38?&K=`Ih1I=G_78{}(XO2p;RtpLB@0;>EL&-KE|32L)n+1p-a(dh!4Nh#;G
zu7Y~UFSpe%7j}aBqy?duuqXG;1);_jDE=J%!-7yF_h+s?c_FNpck^}og)nyhEYO!N
z44vZsEYyGIU9#Cz-vHbm?BaW?ZWnczhi1M#@_wJ_lT>f)NMc2*$B>Z|+?Abf3}^(=
z_R}p#hiW$<xL92z&vm@Dc0M`6M(CD{FdBakZIQncrKjPZgEx{+S`=y+!HY^;6&AV%
zg73xp{cU>2Hou6M<Of5XL?=b$;;zNYb(;T-nG@@TPj_c3ka@<HPtV}!kbgo?Ec}Y;
zzDvlB99$3dGry1ALgS~%`a|i&cz?PYKgCV5ux_2{#ZKY|cy9>SfDVu5Km#)Mng>JK
z&c~Vhg9kBx2H{Z`@qs$1K0$Fh$!?tKB<Ac393;9xfnJC=@J{K&<&?gpKSP;xb>)aI
zn5rFmEgzV9&niAAOa5PmnlN~H@AJFq?GK0QN6@qSvSyte{dtb*JwWDbV(2$Pw`{<D
zRg63vrkT=7<zU32+++9^)mMOvPYNE-QCtoqJmMAHchE==#>TONnA1Ay8=o0g!fv_)
zuBDeHKJSybFyZ-8nmUMk61qOcZ&T<I4=Jd=GmuDx_oelMa|rjPLgJ86X-3o3kAO)-
zh{wZh$YEUj3`L^ii|0D1j<|(l6K@Ay>-1tPD~)H)QNLLD7=pOzDZjlYv6hVRw@5?P
z#v^~WjxXPfaTrt%u4xahZKTBtq~ZQVRQ`oqnItknRlzMEo2sP?kscZ4_}~PeUIm{S
z+B6w!#3ctAX`UmyH*j6zPCt4E$ek7>6eJyRUfHckre?isGJ0doaNVJ%gI<)CWB{L%
z1vuJ$QEfc#LnGX5-y@D^ygrv(4K53TC<I85zs>6s>l#Jl;FPY3Pj5G_BO%6G<3>na
zgB(BSN1qTF!`CISBV*7oEMj$IZ)WV?#J2vzwc4?JBMbaIBFnhf(qF(<Cwi}V&`2Se
z9lbedq!9F0y}K1p6oPeQcoT(SJ>wqfHN@hA6Y*iolko-Rzl0iIzy>mQQGy~dxfY4Z
zwMa~^ML=@x3P`tjEYzs;DPFbcI4Bk`Z_0_@g9xNQi`@gq9$rIy<oU7C=}o<z4r4$A
zEtC!`iq95$_^0CA#*bmn>a^PW$Hx#jYXQf!%*ME;CFbI6hEV4HG1m&HraoRX3y<BB
ziPr*fic7jTNYE9Z-@R3(MYuO|@S~A_dTFR}E3W*;;|lLHshBC0T(Tu-RaCt(`Ox(`
zegczEFN5&QR<(7d$I<hW?|(;)K0V_P;DvwYJX#ZrzqX($p`9?QO@gAG$rJ#$DY7P*
zg!>dn@Z6k^thq^^9$Yyy6gg3afh{=}DbrVd%xJh}Z82$`pgN+Agj^a(Yxi0Ll%qQ1
znyvf;&Jqn-DSw--sK&V`Rw!v_tda_<D+){eC!AJo<iXKlU3a%KFMu`l#RhTXFQCpc
zV5C9%0_zDFbm0{|%_`UnKxrXi@%G}V-Y#51t~`B<xm&<DmutWV8OL*F*L>vKaj%ig
zlVkqb`4MiRK#W_@KymHH_<-UyMNGyhR$=xM+!tnfW`A(0kIS|79<I>2C^j_j4<t`l
zo<yqQnhfwvQ4Vm4crX%*sVblby0}<P0nA3v4_=N@qtgg)&wn!1)+w&7pL;UYw8~(Y
z4ry_&qs#N5X52f*EqfWvN&m1Sl-aOt>%u~}RaXMp<1`p*fxeX@oDPZf(V?e8U%UBr
z*7GN?3_Tb?dj8VWe#%{>KYiNI$@}SF<Y%$2^9-h@Mqv7nYn#n^7n<H^eICF~hN4C+
z8pjNL_mOxE!5u6}$}5EM$}(q|!TLdc_cNi|<ao}CP~BQC$KJA@mf({-_u*0_cPpa)
z^E07pf_k!nI#QbFPh71HJ%9hIP}P8QRc-y3zlJ*N#m|LWcOh05)|PRL5B7sbS7!4e
zF^t$5W3YuMAl}BcK79xE?9rt=wg>t`y(B8s&OU=;Dx6lLV5!LwE-gou;4D5txT)oF
zfH6k)ph9$Db*P=QEmMbAhq9|oG+4n?$JH1nA48Wa#>T#%e_n)h+(CwCYfc6~)#c%N
z2@PY-6t|}I44!Gg*W*lM=hv=mTn<>sr8jVK)HJtjAmZ+lo`j3|;N__4KCv5d_J{>T
z{n#W_Ran(?Q+}Pwyr$RjI^9i{oQ#1!Ba)p<h`kLo`MW{I;daFp)t~%|XZRJ1oSC+n
za-Qkq-QnZOaw%FX>=mEq_Hq)IOV{tN3DvH0Cs5@tLl1o+)S{b{!EY<DfwC>tOAoz;
z1~M_5jyWan$l}@;#3W}<9HEzA2sOc&rNnQobSlzXo+(`t=J_5JrljJ>SSPA`ai8j&
zi?hd2{E$8-HIEMIMg<bVF6aAw0@^1a-GVv3i0=i{)Svw_^Zhc(n3qN*V?MILC$!Kf
z#FiZJMa$p+_J7;9AcW-{Z4r9lhd_(ho=Uy$L(20W55Jr@W{EfAifW0^)T2IAGR9y*
zQAe!8pnA-$Fg9cmzvr1A_Bh}|gXMXj3{+Kk^u@|K0q}%DWEOt2vREaKdJ^|2krg;o
z&vuJ!J;%ycR4e_mPy1y>#SJ-TN!+S5^{iiLm0u`E&{6YXv}EQvA12JORfc#INUNe+
zjWU8CA#zy-*7%IP=vT&ys~W$T_#Pyomv3HJ2RIoHf<9I7H=o9Oquk^8c)TB^Jl(wl
zpxA;GeAO?w!6?W*E)v{c$9?K^x}SqI^7YGW%yPNgA>IHE)+~>{dN4NH4L6^PWBU;a
zVCpHl<A>Mjy!Lne@H&In{$PpMS-kdl{O~%5*Z#nX*SWm*cl_`=pV$6?iPr_Z=7t|`
zEai<t-tc$)@VY0j{T)BNF5<Pn<A>M%+-q+(jn~EQwYN`!*CX6(Z;t}6OWbR3D*~@4
zyVq@0grRaHZcKM)o`(+|C(@DO3&c6y++K$38RqsfTw^;~Y<n55v777)e|wptIu@I|
z%W%Ckwz~}5&u|0V$zt2faE<L`vF&BJUT<zM!}UgUdl{~|oh-V&4A<OECL8QXe>bjW
zOvmfJxb}9Jar+r=a64Iadl{~=oy^@{)*l@Qyu*1D5{mu>2A$jI{63eQkGHd*gLh~6
z+sOwNoqrDYm0#$;xF97;Ava^tJKQ-75|-TIEmTCymS{y+3W6beM$Dap>TO)p4YyeM
z@8FvF`}`c1<rv(y`uOkq_&52p0Z1^LyoUTS9W%6};T>0x#TU2yMN==rtuyrHuVHdG
zL-oPp+&<A~sOtz)-MGZcou<qL*^KBUuljZv<OVLBzq{3w1^9?=xhZr}puf9TJ*9ZO
zmFp={TnSPD@B0nnZ+`sD6y84oqcGx!*Fm+NQ&lYRp<h_SF=yT-f<o<{#6f@~oR5IZ
zGCQp@lrSqGezXjWpv*46%qM;smS4aQmG5<<I+;tje@G_@{LDZcMnuCD$e1w4vN>+S
zp!&kE<ez?7X^fZ-rC>7_+3Oej$}c4Lwhz~){jf0Y=W8E-zmHF5Kk>ya6|&qe+~Zj)
z^g7ox^)J8tL9cuy+5u7IDB~W|Iw?#-9MOO07yjO;{90c+l4kw@7^T9p9MJvfm-%<J
zj5oMrgWPW4>t6!uCm->Mg&6N~0d>^J`^CqT&W=-<@nZ=`{p#bQpYZ!BuYWs$`|fir
zPE~uNag+ljiZkdVON$=yU04{c;Nw;F@kC|p{2GA?*-Nz?<*2fBAOA!jUml84Lhgze
z79=t&;uwZRJ^+UlBj1p~Ztx_ZNM)ah=$dL{19(tvEaRywO;z>FR7+6CSGAY+nT4wR
zg=+YPD3M>HgDg*cd}c1TA<_%^<ud(pq{-ZaHo!@J(#eX#JUZ*>|D0?g^5-tHqq2QM
zb$w(}!%MzIL=7hc&T_oZN_fpmPVvi$T~Hbmfk9khwt<h^5Q{#2!|(^3KwR%8Fdm0&
zrA${#;nPq?h!f7?BZ>3O46CsV4~7;%YE&)S=uX<_BFK2jl1H2LBGP)!iLr1qx3HvZ
z0oB$g?!jaIQ=U&uj8C%ZGvdl)sXng7Rs_duh|6@u&kNI3E4QG>8J`4*F-fai`-Isd
zGRo&2A_dL>$!w6P9!Iq^=#hrdlZLhjyuONfTN0@siqf7DjOnbR>VN{&Z%3RdM#-E>
zSXw4$#sZe*H3L^?mzneoE-d9AdwExKO#8{X$)Q&$sJgg?V!C4C7(<Tg<`ebs9mN*K
zD)r{hV%H+EU<h-vu3a31UAVKfTy|}CFnq|(gQAXA7*xGb0VR+Jo2ZNjIMLsbM%d3I
zC=D!58a%-rfP1stWDpCKuI5%9nGHV=v@+DlY`pfud!h)vIel<b{<64}niH29=i_d`
z&69$mBK#&>a||D$gW1vGLg10Ti*RPs#EC^Qo>gI+Bj94fWZk5N=t1=a)uleMeiktq
z+6b}!KHeXFyqNG~esg&g=0B|R+Uq8PYy_L?&5W>{LjA?yiiAA74yeH(ARMy*D;FG(
zk)XN~IN}==emvY!LtS|6F+eRpLX7sEnvyHwZ>A^Wy(3YGBH|Gr&(hQ=z*y)ioGI?H
zzBP=-HP!@Se^cyZ<YED&!)0rI`Mr04kimOgvepeBh39_TIvka-vMe>RFE{AOgibwO
z<pWFzjdAI-4#l+cgHSVk=GxneoS4<1x*A0&Pd0~tl5k&xYtvE)kKqN>cwDoI6MQ_v
zM&P$e<Jv5L94oW`G$$%W%qo8EN(D99CwiShR1__p*7Z?XFvJ;Yntze6uv?-E^OBlr
zS(uO`XPza%fKb?cJ_hN!6Z9-dLhG^Kp}wyy-cxFrE8~0wiF|+kBp>WJ@Xnh-OaA)l
zH~$f;8=2<cq7YfF@a|O`3ten~4DP5uxopMYq+`Nwcdvud=a$@~^svunZMU1_<HM1W
zL3kYJKDdlMkQ_N^Bp$EPPGS!vaSxC3crQCv*nJ?$d$H${Bl)<_r5Af1iF-c1dPdoa
zF1>m|#Q~4^6cvbg!pVn}c&K>#tSnvs!%%h$aYN}@*h)U4VHY+Vtt37M)}HI@1|NoM
zcchig0f^Qp)@(i*A_?*Idw}OU9vp}Gdc=-Uz5mi%EcJ;Wg*rrEV|gN8R&pO|mp|h*
zmOH->6=d=eXuA8Pp|#?B3DVGFou!)|3N`F8*Doh#<6X<jSkIJsk1ZA01MW4JE<kv+
z-2V@4WnGA-CEd!p$d?u6Y;0w1($h7f-(k6>{?uPu`M+IcNxiy@ENfTbPj{82yW-Pb
zWa;igcGp<CzuYC3?k{(RrTfcWVCk0d*H<n!BsWhF{wb6-z_*<OH+zWLQn7W7Xjax<
zRrUYkt*Q^B;l;8g)!TjQ1~uNdx2isZdx^HH@_wnUs!MRw-l|Gvd0SO$x5v+9J5`C6
zXs0Ug%TCotL0<m+ovM1)&!H2Gf#YtVH}Xf8`z^blA2aImc0cQSM?)tD+}(wlt_l9{
zwiiBbFz0PAY#8{jZzZHe|I@96EL*OvghVV#x|NX7<Jn3`{QrL|;s3vt5J?;}%>UU|
z!Y2%W^|uo0#W#e~A}3*F#gTHhI?>IIdHp1j9dBlZajg^#_m~**k1$SHM9blx31h2c
zFnln~FE`)Kl>_=QC$mmxZeYy7&Q<SK;ojHRFrF&mI}v!VI>fgEOmVj}BJ9O3z`y?#
zJ`V0=W@fGg{Oq9<JdD1IWdctln+s-US9!*0eU82$n0ZEoJjc>hN38;iq_Rw1dBo!c
z)pNj+Y?&);;^B^3?ZShh9Ke_W;zd_jed#^dTjR2TPgG+ZiT#}?gs?Elc`-|`sE~Pj
zeL>=aHFP6G@C3+RtVW!dvUJ;unciZx=1>lEXc_SS4Mdr?laVec8*5OSS`QfW?tH%t
ze-oGGWq$P);JoV7V1vy=8aI(FVYh?Y07Pj$3$k)n*bo(fPxU&kSxGy;Lc!gL`?BPV
z6%hAL+!JXFTq^W%g6b`ozBdAUB@6QmL^LaW+dvRW#3bOTcZ^B|ZddXyaL9Bwi|N1P
zejL*gaJ|hSC<r#e4}c>rr<90iGQHiPK^rP<M?tj%IHF<>yEswcuGr>GNvwrwYNty)
zw%84W+i#^HdmieWh8WLen)(FgWO9)DWhFBFV>zlIR!>FsDQ68tUnb!gQc!*FGn0eD
ztdz_U&K6SV^bV<%Es~b8vP6}B34E&Na#uC@yq9ai;Eon7%#hGNAW93nxRv1{5bg=i
zSNKgl!QnO8`vzxGmCzx>9t}tR3viL9yFfAk7bCYK9=3qb&b~DfeTIl574k^}@xFH}
zlm|cFwJ<3R2yL%SQ2pqaaaTW(Yu=E_8&<%AHe(dOQWexse&Is*+LH=f=uV^fv|`sW
z2EFpJ%5lwGF$9lbyoj1)>VcIrvquK-ty~c+jZ37!meCG^E<9$m7<Ri8KG)asYB`8&
zs@i=c7kVa64xNB9<RHh#<<Py6KDkO}qrA3gf135?mlJ`GmkbTZiz%=VCH4W3N_F(u
zDw%EFSGaH*)7$6jjpk+UI!G=7tKzds^JUfG+%;!*XuN0u*+#7cJh(no%6HOKb(fCJ
zhqzNJ-tNi$C`3~hWpcxg9F!X?rE)xmDyWc8s20xrV=pU{#=J*d4~Dmd;o7t;)BGp6
zbH>nqHjvB~5Q&w>D=babGl)@%>}leA8@!LgXTX~{66TzY9NcFksl-3~3N8r*^uTJF
z^((N#bbVvB%$iji0Z;xi^h4D$8@bQ@4!N6|V!J_djqP}x+h#zd;MsQ18-uDbNYQ?B
z{i;PqOfq8f#ENvl#ZC#c9?wKnb0COv*e<r2XKu%nEqS7xJeOF^s4IYRQ2-fbIh)az
zC__eD;Vd%ZtR{7k?{wgH0xx&q!$fy<>2DQ(3W7{mS7E_mY^zfn5Rt!(*ehP!;x_3O
zy4LG;rZg@_`<O5sRp9f|-r$7`j#*ZAAjs4%ugSN&Tg`V+!3DbO%swukPWW!aUog5!
zXbTG$vLgQpjr$u$!uxMP0G=>BX2~em<&#+&A6>6I%8KS_L-EFlubcXfaUY(Nm}r6;
zV1b@40U;Pv=lBKq>2Knb5w^%=q&HwZOh(RguRTUD13nK^s^>?5Aodr}CBFY_z~Wno
zH+2Fgt4$}lt>z1&hj8hTJ5R!U>B_%{#b5Q^32;0tLOJ=vqL=aKd0!pi3LcM3*M5OP
zF8Q0{ycC6aO;PijlfKeXcL5IZXa^I#oeC9Ie-x0N3ULTT@{jl+a)KIwGU7~jnRwg^
zY9MeEGC<n7_$R(VMOB=LxW{b;e0Mg+ZL}#qm$hIFC8Y?SpoXA~sYp*mG51EXXrje@
z*QMi&iRF0BZfOJqvc%YEv)$KEMMeQ6tu*;c%u_`*+T|&xMiD}gRY7fI0T)B37D$sv
ztQ~k!V2qu!TvZ8DJh3!24v101^@(`yhmS<Nj08g*Sv1l=WM+2D9FAM)ga>m!y1cz_
zQXSo=c4jNRwsvOxt1!Ev9BDBQUFXsgPfSx&a4n5s`Yd>6Bc{4#JV6kLM+}aI=LQYd
zfg|E>-y;$UG7e1#k~A=$KtSCDnAj@w_W<0QVS{BwP$bM0>lWae!zmM&_}+my7+)}5
z-G+kZb1z&*dkkuY6~r?ZRJU7<nPazj`W4h2E`3i*6S?VD9xRK!-031ne&Ic6mM}qF
zavqbIqvpind)IC^5b^@0sWv7__xku8AGW2)=ejq1y8-b5Zs+cCNHNntOg-v4dfh3R
z4YSEenkpGHY^?XHD0vh+LFpr>WM(@!XXa!!bpBjNhjTLX@Wtf@qe30Z`z<ovsX^wM
z_{jA&4UmRB02bx1P~Y7kvso23vAf^IQMyXQ%p7mWe9MNJwR<cCJ|-@_S56sk9%lQq
z<bL$k=HGqGMVe_U-C1PRgQ^>5o}b6gkoK?`AnA2kH?O^Z$Ws;4Wu$((|5G*p@?1{s
zo)sdIE3glLfKn}|W9{Vij#_>`l2=gpe4?Y;zKj*;>rvucM;&@Epl<D^)Uj^@s@8&_
z8hlqkjrzz@19OAwSa(PLd2vw9bFi-JNk?@WjUV{B_CLl2)ad1iZ%<$_fI3QUaMVt`
zCvW#7LDlaiN1gdL-bZ-B+@R_>9ldIHKsmVqb?Hrb*Gz#@ho%SA`n>`5YJ0fT?0{N%
zi=#3RDb=d0QmfVk)F*{Ob@UZH1%1U)!`}<4_ihNP+TXz_COB%yh@cwv2^xZ<(q#d)
z_|AaZjKo8)f?+-fN!=S%AN>(4M&{$=k+Xh=ap4VpUk=BTlcJz{qa>(W?Za&T5xm|f
zpwx<Sj+$|&qecV+s^orrU%oh?((sL&{IF633$YULm!N7<Kd9Pay~#<-9rfcUO3iGI
z>@qW`27#d=pWqSfugDl7#r61vc)^PSHDezdJ2a@4Mee~^;GuxJeJS3TeIF`>c&cm(
zs;ABls0C}3y74TC7nT0`FG02K`GDG(>8M-ZbkyX>l{)1lN43bpn~U)c@K+$(Ytn=2
zuCs&c9whxY&rs^o`i?sLXDrWYhmCHK)<aJ#RSz#*n^)whrvJc;CpsXs4Z0+#+UMhA
zHj@Hs@7+PQ=Y*ho2EO&ny#aN{NUR_}=%|0Z8dP^Y7*rP>0l}*rwQdg9QH=|#?k_uP
z$1JGq&49`~)lsLdMQc9`swR-gzrM#x!m6M<KA^fR1Ow>-b@`rvT6Y0(j>6`^%(c}J
zS{q**RMRHI;HCuC#e1Pca5oauhu8NHsL!tns7tN`p|^0a4)pf1qw==`=O(2lA;f+D
zBf?P+rJ6Ow8<6T@djF-Mx_h^yo_##1CV#2akw0R~ce<nU?!dZ|Tu2h%rX2ZnKs`Jf
zUIE)V<DZxq7<rqcF4lN$Q;wtFfZh+6IBM&cXw|`hdjIO6`s~txY65+By46ur&Q$95
zl>v2)cGRZ7z<ZBE$FBv{Yxw;>Dj(Y!2D#NyfBy-V{+^>gt{qT^Hz@TfKHT2DPEf4_
z!#^XO|8zP&*IWfNS&?V3X#<8a;oYEm0XiEsJg6>tPpP`29JTOUxZfVUmk|VSI0GNO
zY=T8uO9JW%xapIX@o4_9@BpabT6n_+XTj~>hEYC*7uK|O)Pai})nO-$?n9+shr3-?
z0E4ThRQ0Z~pDUI63Tnw4g7p*MU5S-_&jr=Mw*zYD>ku!*vnz<gKLpf=6X0t1D|PAh
zLG{%ZM+IO^duL+x%cG8Z@Gk*%+K`}{|FENu4Zy1a)(6ykFxIAcqXeX{ZbGlu2{f;T
zJ3idlQN6(QtBc`DVCdVukk+qoLF&KW-vjEN{Q)%`zIq{ys}R<G7@U8x9svSD>3+E9
z!`~|PF<{MSBj9Wcs@a&)J#lJKy}CuI7GQGqKD>;5Gv3#N5ntQtj(PyWXUWc>%F2cH
zV@6<DYrGl&9@OIiqy#3L{28;vg?K$se=LjNfRS+egK&jUA>xIwazwe77=b<6&rx%a
z1ym7QodxlCI_jvG(2~Ldj=Bv#ymf6*eQ*MTN-sy<1-m(A3l?Jjg4In^5rq0+wdr?C
zUGTP}DuTgxt`Df^-wdkPpqeim2h`VJ1=Z7ttUdVg+6NKg{`O-)t%9ZZ-sz|dpMV~r
zg1Io3>%YW%0xre702(Xx;fap={28cgAhznhhb|YZMrLnDf&pVY*aq*wnCz&ZQRb=r
zkO;!~DTf^O$5wE&<_K)?rnj#Ms_z4ix?^QfE#B*>UA-L@c^s{KWEwh!e7tMy)PQ;l
zgbMF~BsSr5X7Jh9PgClbyRd3>2lkarRqC(slEYbeSN<qWomF(y2R#DnKH$B!FrfZ|
z_;FUdpt|pxpjrew_z^DJ1Mb}K=YYDgZ9tt1mGps1F7Du{4^if_BE<h^yE*E{VTkLn
z+?I9G7tF=B25=wV1-<V=c<za}svQcd_dg7%yX!#w-@vawfG1?b@J0vKu(sGFuper?
zDWF<j6i^d$18UA9?A>UA4@tcpR9#PmO?1KR)xD0oe^pR@cq44(O{M+>x4yR?{C^$v
z@fiwEJ430p6R^1KyPzsMH>ggV<*41zb)z>Bm|le6F9@g|4T9?YjR92``W^)$X*00C
zs0A4M&QbIK1se4OYUyXlr#A=GZP58S_u$=!e?z21cwT>TKs7FKR7M2^ygWxWgv!@N
zzJT6uhEd#u2nC;h0Jbm;26Q!I!PgMev7eOM0ZxnG52yow2&xmh1k@}TY^9~J?y~|a
z_-R1ByBG{z6jas0%xUPD&YI@UO>KS`yOD+mR62UGJ_t;s9>Qa!2OKqVI;7Ytpw2uI
zZw(wU7Wvg$N1b~atQJ0c1&rlKSZ6gLk3kM_9b&`Z??a{N!{0wSpl*iojQt1P?tHl6
z2Y7D_?p>9x)OQ^)^L%+wU3)SN`dL%}@n<4Ntlt+@yB<R~jMjH}6rP4gZo5FK`~MBe
zEQ9}VXpRp~_k{`$BTeoM1H2a7S5HwY=QBq&S%?C#fko&U--EwDajv7n5dVwy0_v|&
z%ep7=RkEKQH4KDv(V<*91YQEW+0Y9rLzw=oFrca-LbgMsyr!e0-hW@I7sewzABHU*
z!-k}_@c+xk2Go(>@MT1`b_lQ8M<F%@q@Up~>)^vB&jwW<y36*}@ugNowA(+%=-Ppm
zqt!hID>eNm(16Fxo*Y!g_oKh4q}1aNgZOJfwHt=~%HNSwRDnA}UHktCyU7ozgHXpd
zbn}784m=p@2%SMx@6HUWaRE$0J%MRXq=2t~5mYC=;ot@T(9<*Mz1~7RN2HsKY^Y5=
ztn^#vs5Vev;|<ul-5kVD2&yj;yq=%0)E*=(OOd=h)yh%h;Q_-QhGCu@R8L-mzM}(%
zv@Zu$%`@Tu1wUZ!YB36h9d*&?UU0YsYPfU^ruatS{V_<lI*$#iDSaLFC`A7B59pBK
zu786{hNAQ82?pk$3WEf5XPt-M4+%+sbl+Ks1>YbXH$qhGUkmx;^8xj5<P)FN03#Cu
zsy5=yi^Jjn%clp_88DV7dmy$?R4Q~yKy`+7XNMpWCZS(o%I14W1BQ9t<#>@~7P{Tf
zkW9cRKY@Kb{wGApJJE<LLFD-=;itm~;C^Bl`r_iGK&<Rc0UMD5HaZ2Y#Dt;#=pVWQ
zci5DeQ*LET)(WK%nUk~-2ATpExg`Y<F(@g~St($1Qos=1O`5Uxv-R`hfw=Pi2wQ+7
znPe8HfGtS@TZ;ITl<4vl1y`nktx5q~ZNsGfY~flPh->~4wjM{a)^A7w+n55j8L&t)
zqFYlG+?E2iJq2uMX_(i3ws3c8Antgr-;)BiHwA2e3fMsxhWZOL^4H&m3;826hj1j5
z%;6NUqbXnt;kXn{wx1QIB?9`e^c1klDPS2XV6}W$BvI>;-j(LmX9hiQQdmw37*fkb
z<^JHdJ~uJYg&}$I3ab3|v%!S?QEK@Cz2(Uo*-mT!T;QE6bYvO*()t>;okDyDV+ByU
z<2B<?)yNL_jFBs>Ahd8Mx!wkBoCU+@G~BBCdgpLI0~a}4^pA7~7_gJ05U-A6j}K>v
zPj7^O9_^n?0(v(1be0D6GV;4zXl3LV-T_)ehp%N2ars4tBQkYicWgXynZQ;B^fA&|
z9nje;L1#U17Xw(R3JVKx9@qG?34&V;rn3k`9oOM}L#q#a32d^X$D+J5!_hNI2lEK{
zyfbkN3&-QUa91~Wgo^E63|L>B1=g$im-zxa9Mro=rzlMyA)R?}g?<L^>=s)w-Qm$W
z2$)$%)sr8aDX^jny3f;~Gp~Xkg>(2IlsL+uQ*~1Pkl=D%Wp4xa*oPZF5!lL#dL8NH
zoS=7*&K!*7*l6MX=SkgM)v;YW0qg9-^4&JSbp623g-%Yo&U*%Q_NMDzIERZ)L^=yu
z3!O`Eo0luNg-FhkIz?f6*j4QWw)7;uh;%ldq}Pzn9^lq<alJanPI};g;9^Ekp=qGw
z!Pb1==<fncudJ&+3p%-#byJ+f1Av=n;GX{PH`&i9wrdt(=?3hK&ClO0u(_4>G}2jI
zS<fe({lLxkamn<k9v@(4z@w91h3~2M_#te@ng%rmR;!9WKsxoR=t`?VCl|ON<?>G4
z_utr#xp9w9Yrv$<gq=HP*<^tgRnY@c-YKr4r;yGB;EK%>x53am&Jx^NfRQNWL)c3@
zZ^Pigt7AzOy@hmESJC@OXFG6dFoJu3?Z;;c?m-K-z39jn0!y!|JO7pKs;c|r9L}$b
zPs12=#{T8f<$~KAuu}}!jYoU871-#idIjmssH!)S&QjpkGH|!Nd*d>}U2E0x)24mj
z3T%5-UH>`I*<V$MaSk5^ZiYeUvW{a02ySIe1evg*AFMbguzJ<>Y|?37O)n#zp1`eP
z&~YBPIBGuw07ErV*}Z-GWw!{O(be=Z(wSaOXFm@*bAVgl!2SC@TZ;sDC19rBD;HE=
zEwIhi^jMU4wpP<KNoN~y{a&>2Odu_P@)NS4XwTa%*!Veo^5(&IR@1vkXLmJygmm^;
zxR1Yd>?6V53m9D9Z5Qj9@#xgi0y|hu_gM`(hpXvPIEN|7N(OF|H@<HyxRn7DzaX7=
z2XDg&$m1s~L$4#9oD98#beaR#Y}fEXuSD%R--1;;Vd-N+r*($TdjWI`GITGT!yPT$
zo}b^pOK=M<*js(S!GOx+r+bE8L^?e)^cvFXZQ)+C^?|5PiY(af8~^%>(CM3@tF8f^
zei^zc&fx*Tl^8=ARJ^up)P9BlX3D4XlHzEeRFa{mk<R1{J)d-@1DE|Z{giZOj~q-l
zarMrx*_p7HcHa~Ai#Zwk0O`!j(3M^Uoh24OJ<`(AYk72*Tl_qI*G*RmY;}eni1N;c
z3_XQ(HUgIgg`dNxT-8KyH(PYJZv2|z+U0e7hTcLt`!n=D(m8DLGkovgFh}6ws_OBw
zyLsQMa|D)NU3Xp!IvLe<f1JZv7Ve`tO{2D!W5Jqy()M3MC%3v@K|1->^(N9O1g`ir
z<-Gdi`#uxgBEZ0Iw9UIe4Gk1nadln)CD55rU59ZF&#>rpuKxPRf;$f|i7Bk(#eX$<
zUtr6t>)E8Us=8iAI%|Pz+RwHtE3OpW4HoRt#~Pg>u+7!=G1A#qU1zTYot+l${z_f%
z5!^i%?8UpAuxoMkwZFO^i}KE)>Ut*W9JO#W?)j8ynTwl-Mwz_UtaQUpfmN=dcacuL
z8u|$7<O5gwLCWCKZ^lOBSVs%?>alOX5js6<=ste~oxU~nD4fH^z%}(1&izBw-%9|K
zzJzsr^=Q*gLg$tmdL8M^t)X|2&Qjpw|EO-in74GH;I0Kg@(t2ya@mw81-7Av&RY*U
zn``J^IEQxwH`kyu;Pg5#3T}GM_&O#Zy7gy))vKu&kxp(+y@qrOfGhH$A*ZkTC5qeK
zg7vS`eu2;_s;R5K3_1g9>ZUk{M*vsymN1acdSy_);BrH#w3)EbH@jJ-tIK&c^)%91
zSyRs^owdLf+wTONkA^)lR&X~1hW}mJ5qA5~xfcp-cTIhObdJ{4m0kgzEGQ%$MpB48
zydjY0o_A*~mqaI62xSt+1mVp;biZBb6gSlaQQjHRR8JwDlBO}85ci&Mjv!m{c%KZI
z*aTrGWu~<h*o>xn3+c>hs`ru3Jd4f^y%s(wxQi{=g&({bjd4qx>dvoHK23FhoWrXv
z+%0`?o+otHTd<;A9$-4892^^)>J_B3wW;1jI@>MW-4|SQiQw+GV3&>j%Uc55+f>)z
z06GVo>M+jX!xrw{r&sMJxT;ya>>{_nc!9vuo9Wr4lhI5sBb_V@_rY_YI!$nMEZC5v
zC(+d0cI7tH$4Do?na+L<bP6oo`&YM##?C?ucE<G2UKKh$o9VGA?-VuDGfAhPg*)`n
zhw}xu*n+*@`EL&hY(z7?i*!nw=_8~w*}~n{?yRGNJKcgce0<1Sfz4>9`@9Z1bDHT<
zIEUw1xDBSZyIgP=Td<2Sxj4$t(q?)c>8xy~caY9%3-|0T+nElzvRrS$ZX11GKY?v*
zrt{taovqDuFPy{ME!-L>?R`mbcU!P^ZEk;9V0)YCMWl1EnO;LWhb`Q@GSZ^CiE18i
zKfS&jc)id`Z?3Cu1f7iLx+%`#EDQIsg{x->ZjJ@(eRak;0?Tc#r;$#6b3LDQ3M|~m
zBb#0j+(HX>?}P!ENcUvdv$;M%Iz`QOr8hyRpM`tbq`(ltEe1^bFUF1HYto`VJfgWC
zi1JQJb3KK0CIgrL%lwD9^2d8q0jrF&z`CD3b0*=0&1kN-kj~uZdLQX5ws3bBzZ8vQ
z%PrX9?Yo{5I;)%O&YM7ILv!69=kQhwx8~K~KQFjDE!aJ;JsQn__BPimNas*<y@_;G
zi+JA0?5Wd1=v1~~Prgy3yTGzq==yJg&Z#YQ80T=lh5PF-D~bfSqXqlqZ`Gnc+_QzA
zO*(yB=w+l+Y~dcAw5X%d8EwG^PkSob_fKx2kCDzTEp+x~(3xZ5zWD6QK0;@K1sfci
z*F|7UTj;SU@2qN}XOhl(3-|E$YoqqO*@9j5P?w=XXL}31i*)w1&__t;poP1-{q%-{
zd(?teZ7_9~z|!+{pSMA$R-PV(b2uk2USE~wpY*lhHU}&d7h;FAZ#WX|1G?wwb)-|2
zr+1Lf0E<q=Z<a^n&j<^)e(UPWQpbcmowo&arswHiIEQChxV5?lUKHGU7Hmf0ZPC7V
zNuFLrIxF+^8q!&7;jZ7<`=HR-Xu*naICHbWw&m%n?|{zkJlz!M@O}&TmA%9F3+`dS
zO!<89!J1nHmT`ujMmqJ*(DO+r7nwC1Ewa?#u$S<By0rznc4Lnl1Xg&4K0rFX&(M{&
zf=)jRcki!(=va1$1#2_&&E7(%<P1F!<(;W#=qaQ#!@{kf_2l0Lcdi9H|LsSjW5va1
z=q;qP{0zO1bk+j5Dhf)wmbU41v(VXW!TzwY>K=jZJVSSW7j*WYq5I<;K5W$)czV$Y
z!A(QPY0B>Omb)ekEF)j9Af0;odK2m7TDU)tzImMBwzgmo?>!Rh1M+qK_dut2z7FFY
z?q}hi_DS7nj2U9V?(H_^TdAWYU(Y6;8Ton{>C6SLX(J~!{&le6F1P4>+Wv(cfvwKh
z$4F;GzRvzT=<EcpDd#H79$h53`z<>4e;PYQV2AVdSd@3tTI!jklYvaulx2l!*GKcQ
zdKOHNd^gsgw$!^wr*%txgmijaxYvXxw3j;j0cP_0)3EpV3T#A6-Dex<OlYY`;T)cB
z(do8h@e#qDWx-xQWn;8Yn%7dVBb_BJ^$yZmY2kjo{P9nO&RUC~mK}<tHnF*-&U+tp
zwzt&1a1QUW=nQFf^=_ea(1P8zv(GGn9c`%>kxqImy@qsZwTjpKIkTtUB)H81Gv$-9
z>Ar^r*0GhY`T^+lY^9sx9PSHTQ(sRE-LYG6i)~niH^vBTbSpiLbZ%*-=abG{3wPP@
z{qG3wVheU(@9OsoY<VkvfOJ;3(v|)JI$MEj^8RJZiTwq4w?(I3xaTT??Qf+AqP%ms
zm7YR6X=ldMdARrI#e!Q4Ftg1QzaPIuU^!>%Eu_=@OudhE3M@KD&U*G4!R=|$X<g7b
zI&SKFrtZ8Qbc)Z^{c#SDws23qa#kOqGueXmoVjz0z-~EHuOOW{XX;I)v%tdrp!=2o
z6x^j2EWPu_==|!cGx0tk&{=<`4&xl&Y~gl$`#UXkwp*~9$NzY)!1kP}XOqssGxajk
zIcnj4l+A^oUcZyxI$qA_|Fi2G0;|<pA0wTd);fC!=rp%*Px_(brGi^v!Jf){1|xB=
zj_$4XSd@2)TI-plQw&_03t`?nx%Twlf;+;Zleuu<sREnOTJIvA>8<q<(wSx9ep~I6
zRf0Ruf@NNG@MD23X|4Nw1Uf5Q>rps|*IKwg4gEYCV>Vi_{ogfsTj*?St=Eyx?$&w-
z>Fl>~Yt8wfrr;j7U}yK<SuC)$Hac%7=w!6fy>JfKYZI@Nhu<C-?FVx$*bSF`6BIhF
z+vr84)4PpcLpnn&+$&G|<Q2g!v0&f*y#HE(O>LvAehfM@+UTY@hZkG8%i1-G=9$Ya
z*xpTzmI<BJZS*wK+0aJMC!MVp?!mNrmkI7p3%2vTx(qSycxP`LeSmZhwb7M!fetq_
zn6cs9lFpY4Ze_sC*wn7woM^sUudN=4@=k7BJ%w~yTXcSSuEqeN!vUFD$Ckfdk|D6(
zZS@w?>DN~8Bb^}@?yrHTIty-z1zY#WyP|pB)V8|wC!jN<t?rL=c&>$8nA3Hn&{=H3
z2E6s=9|X3%tzJPotJ~^Lq_e@o-T%)kqBgtLf~mT<ekF8vw$=4_gU;Tz8gr1@;X@Yg
zmuD}VF1V^)ynIeRyfeyA<#u{D>14Ij%Sh)`3%5o8qNsfGEm&~S!w02~j_vd@(&^bw
zXMYMheJ$Ka-syg};1*l3L;X(QD6rA(^jMU4Cb!cwN#_;|x7mC3scP5O=2)=e2B${*
z>;>)gF49@rP9GtiRTl2{XD|3%=&ZM3OTIibQDB?f={}!<&h~bC6wcv27VfgHL+T6e
zK?~NeV*hAgd$gTiM>^>RdI#y$Du~xfj|r0|2%Q`Y_Q+G0Z4y}X0-g6c=oA#_UO0!l
zTezcF?TzM#MHX!2f{*VLIs*#yBGMUApx2NNx1g9l)cV*-(cFK!1#|N6>n(I<73iva
zKxbZoZi;hwiG{o8FWdVG?n(>x=$Khi9jq<T(@1Ayfu2t~+brDSzgCFKXSW60eer{=
z!nKM01^NK#94*k5z5tzcWVvR$4mN1<gW%S(U={E9EIP)^X|D&OywkkBo<ceW7H;wF
z8y*!p-7Q$BoJ+qJSW$bug>(kA*ZW9kgoXP+r8bueE;DkI*Af4C@+yH%Z?8N56Le;^
z*Zpx0&$Do=-CC`I;4ZOX$11%RZP&{7dIjmMZLc?x&PEIO!L#en5<1%~*uO8$|F^()
zx7YQ*1fBiubr|RHVc?pvyuw|3#|mzG*n+7J!1nZ&71py!CpWB@kxpyinlk9SW8`AN
zEwo@)-#7hdf%OjSW2DnBth4um&JYXtt9u*&NpMRn*vM@yuM^nRupW!@&Wy00Njmd@
zYqslO4}ATL;4Zab?e9MJzQ9(8^)Axc7}iHfXS;>F?4LJ8bGN+~KZ6HOm@jk=g>|2=
zK&NsCJqqXWshFM11W?BBS3TP%+Mjl`U<ba+j|iQ<9rQZVDej<mkj`k}nsVOM^`~<M
zcQRmRn-@R$NHnjT(Lv|!1D&}YbT6F4i!D0O*2#<ZQOhk@)w5eYB6Y0ppcj$Oh7NiS
z>1?%d502Z583xZrc3Q9sk2iWtV0%00s$YYS>ZqII9L~aQq1mq2YD|ds38z}HjZ;p4
zLg?go)YC|(u%n(&Iz<-lg^xW3E<An)Sg=PPTQ^K#BRc8>q%)zTuCyO?rdzl}?i`vf
zxU(!+wFBox<MX_ZdLYU>i#zHmq_Y>eq7ypL;g&Dw3Z27%iA@mp`a9vJ0?X*6w~)@M
zo%BA^>DVbAcfo`Uvjw*=U`(87Tcp!`YOgB=Hlma6{0->r?WFtT96ku#Gy`{Wq-kTp
zJ!HYA=5%occDR#XK{`h}=}n}QhU~(mbLf<@_X=*h1*@|Cn?iwQbk_9`fKI(GI*fBT
zA2Ya2bjdqAz-L#cNBhja7OY?75jM~5FD7@<vq@)87rl&h76Z42LFaEPn-3P;Re(`>
zQpe3dT+m%$YrE)Uq_d%m&i)tZY_saD_u!!01otptryF#xn0rdpXL7pgu_*6!@2Y2#
z&gibOwL)yy`*YhqCUj;1X0~}j)#^n8Ti#XgBApFg^%2t91Kcd5&IXTjBZ?=3qkz%D
zrCqPyI{$#ca=Ph02SKNxn;wO8xL>!J{g9vQ26ZhI+!DZ=;zH;&?K9y#fi38!*OAWF
zZh8ml?6v4T@!22y3a)}wO<vD!IA*%QvbyWMZ^=)0-3#Y%Kj50YpMC$=7YXiEz-ZjU
z&o$@7a;$mX^&-+)-CeICo!u6ldiP9uS?Hwoh^M0#e6&|!IX!gM??9(x58V{!a6jPI
zGV1*P#hOb5cQjz8zAkvI_5A{y(?d@qomD;beA3wsT(ezu|4_wEavT&$J4>k?oTXha
zpZxP|p>yh4`T*$^o~0{&4?07DYuei7D?XnrxU&GmupvsP=E9eH3v9z#dLYU>d(P5R
zNT>4I@%+5LG<TTb<^yK7>zjq|TrIG^XX`DbGvRE#k96i*bgp`Q;yHr58Zai)(ynGR
zc0enhjcz|%cRmC<N6*&%aSmtojIFyk4LA)(TzReF76O)o3!&3#<`6_kk4|w<y@GUZ
z>8Ur7&Jy5Ce?dAUDt<Usa909W5f?&7-8+=cRMf%hp1S@IptHWG4&xl&hE_=IB%P0b
zJ~`@Ndo0+&_xMc`kItcU^lZ{mz4S8DsoX1`PIiw~Ckk#o!0O>b_}P=Qu!6t}dg)`N
z)3cY({t<Kr0N3=P(@&lFo#0LY%xu?tc~fZyZo8)T(qmEHnbS+pB%LJ|of}^10;zbo
zYXOr!iEUOpEA0~4=3aUi>Fn*LkC2W+c47MBye6jw1h*bwCa<UcWoERmZGEop^Ka1U
zcdj0Vb9gdvO&L@h`eRd}GZ(OQTohunEBU$j$)9osw%}a7j&zost9Ovja^Q-e(l~l|
z%8Sxj518okWWfIU+_yoYv-Mn^cNlcGpR0S}9NrDwGyqxWEkyGFXd`<76FJ|2>&{Q)
zeL*l``_I*jNaxVGdJX9uHE^c^-f-8{@q(KMuRH-4)H`95?rR&3&l$aS)t@Mz-nuEy
z6_x|HmTvJ=joKB~<7Z`k_fyyvu(`LMMy##=d7FRU?w@z})&~ez=jlpEaL)D4^@|X{
zJ!u!ZE9JMQ{o^v>s^mO9khpWs(^Gg}>7DCuHOkN>>)+FOl$1Gmp58*-j6QlF&f)w%
zG5RBcw7mP#=m?|8fVH;hl=RV^e`ei%bbp+~e6}lmwgGPc+qaw~b#Ap_W*rCn=oO@s
zalYO}ItAy)=%9m&<EI~BG^axB1Yr6wwDVneN*&YB*Y%Hr&eHRB80YYAgAQ>`8SrKQ
z!YjkT74KEvAarty^lZ{8EYizJrx>_WXI&swIq3}=q-#Ml3_66Fbu2B?$4F;mk<R`F
zboLs!jIcjERs98_lYT*rpG@2|<&%4X9{Wp;OzzrN-GS$Nj4Hh`rC$ozXuw<#NwDvz
zBn50j3>GQ1VQF#<SPC{Z1#Ef>*eyw67)8xU3dFvG6tFodU~^Nz=9w^e6i0pGkE&Q;
z0^PgbKZGq#0b7y+wloDSvOEQFWeV7;6tLAPU~A)HvG%it>*ImZ_7k=t1#Dvq*ya?l
ztrwvFWERZ5-N|6vQ^0nnfbF(n(tfsZj}7$OPuSiRu>C1u2UEZf0hV;v<lz(rkEVdB
z3zJJNtu)MQKU<hy8tAs4u*xZ587W}3QoyoY80tsgTPo1hOHnW<1?<!mu-rs2wx2C*
zo(Sl}@>9TCr+^itfOYg?33p8vrYP7w1*~TZSntFzzD`)FqQpQK);9&LUkccO6tLnb
zEK*wcL!!Xaun{R>qf@|2lEJ9|3CVzfO-=!ungTXG1?(0B7P$+R$RAZP!vNZ_St($1
zQo!aWhegyp0VYU3764WQOciG2lycD(fMq0stxEyh0+=^EAWbj+xB_77KLe1NQlbhE
zry!b+nTce~WC2z+3C+_Z04A%jBVaX>6zr1%R-A(7wH7SeeztyA3ZjbuOV+{_fF*0;
zTEM&!U}+U@y9n}6*1~_HU^2-Z1*~!s$)xv9Uamf1-cVJT@!QYV7o;fIr&K`{z#q$v
zNCCSk1#AIe^;mx*V_OX%)A>ZO4*>Iq4$=xB;*ZQ611y=Es$MLUoUw9l`&lp(z*;yb
zs<3GaSZBbJsiGHP$?T$^2a6=Ea2SA<k}y*OSh5yQ2P|15<|c%p{cQbG0Fzni`V_G3
zfF)yQFJQ?ea|kfk@FNK#Tg6L~w>|@~WJK$wfaL;~NHXsDuOoooxHV=N5Q;0Aet;#D
zObKAg^g9i(WUf62FuxI{$A1d|%uJ&DXJQ3Q^#|(!OUBG*z!Eb9{qIUabYIefN*zrB
zt9og&3Ne2PSkz`*|D&w&M?=g{QLw97&@JKp!*Uk_mWgvB_Z^V}7MTWMBEuljds0;R
zFkp#AiZ|`0D7PWLzF7NN;r0{-_X3tozdxpcrS<d6shqgCZn*P)>iL<!)My$E?}kFs
z!N7DRA>(kCKO*e~tToPrT_*sX39DEAz5_L}G_Sg@6sU<M{E!IVw+Gb1^%E^re$zJ}
zbB2VCw_yG1=z%Ej45*{0kWO(O<itLmYc1TB=)lr&kL*w$@<(=7W$P`Zvo%}qBb{vq
z?f^l=nYh~xSaS=uGh26dK<8k#?vHc$5O762T`k<B2Ax6+rt0byq*JS|-b6a}Fq<X3
zx3zF{0LuYmln;A1`6I7|^>zIq=yb2I!#Ia~qH7d>+<9^znYcxONgcNc5u6F@TVKy6
zoqqN8*fg96_~&B(JiWd?MmV-WxbR!NbN$76N|DxdRlI(VMf21%JltIW+}uCs`{&mF
zxxhbn^v~E%fI2cStbh&Qc&8fv0_s=%@z;@&0fkqc;SV1s_wUt!9iQah3r6pCKLgv&
z-Fs=#d+TdF|3Uql1MxmG{B^*=AJ5|W3I|^5=Ni_nI6sa9@9a}}6QRPZ4b|;9@Wx7Y
z7)Luus|3fBI6lWw8xk$TaT|`;aQuwpOh|q_j=$pg8b@Oo_aGc|alBm-)t-Qv?anyH
z;kXaS8XW(^kqgybfny$yx2Q%u25E$2FpisXyny3l95s*v6yO+%;}IM?aQuuT4->%s
zam>cC0moq+`RGrE;Fy8qH5|X<gSe_)6}-U)$K5#I#!&(O(g_FmCqIhgBOKY)@NP>S
zx8isa$3YxT;PHcT%*XK#j$n0^!!aJmG93TF@hgtg;qK?+n1X}9x%2o#9BzFPRCp8@
zQt2u1|MFCPKJb!No=Tn9p&V68DN=%bAEPTmQ@<1;^-B@{4mwVy;z&XrQ@_-$saNWB
z8MrT7r%G)B-XR=RD>ooZYiTQN92<6)dms9z7)*ezOVa=;T8hy0mPsHa9z)TZ-auoc
zQJY@Dy*W|Oh*+*cw;=cQ2)2N(0ylBJf}TPzc>@IKA!e(@TcmaL6p<pOW=m3}7%^Q#
z3@YXxo~2TZj0H4Q@e(s8(0Caa%$QJ44~nR7(3(0SPfTWuq$M2z8-l}Rlqz97xE=R}
z*%kQx9*!Sy{0gz#E-iJKj1A6!BI&>4<NT@#eN#j)K2H^h_{IOpEMq`f_LIyV*|5j)
zK33HOm0pE|tu&3tG#ZLOYBaDJMnp5DQXRA%abwz#tr9b`(l8laBc`DwGm>O2kYF+g
z%$tnAfNPWa6pTk)uD?P7>W^GYAHWvV%GoknH#wHpk#n|=mQAB$BP3AK>ZuyW530s&
z6y2P0gtp~*0QApbU>X<oB1T1vr!mp6Xcsg*8WN3*1}NS@51{&JH>QD!U5Oouhlvd}
ziNJv9?`F`U&$AOTjgE#!aSH(xpSYr;>b1+?jZ<<F5u2$$w%U$brVdTRqtQ`qG%l)x
zqNm!#E2uh(+w96Ialbo{M5C0cC+bKdl&Kop0HsA$h>C1qrzcT@REa4;+E;1yK>vyQ
zcF`MKB)X%{+CjYR`~QeDRT9D3R)yVPkYV;0to&J=zrw)=xP1owpPdFp{sf3o?4~Lb
zxe-Ool!_uYRYB_(bx`E*fF4Dis1K10D?+`LCkD{?OjU>>+G?QNidWEe>9SNCb4TeX
z4uLVhuZXw_WGa`5x(dWCm1beC!4|uCfQVSSWHLkvQ@Uh`5|%h}JM&+&*D6c&6g$Ol
zik)hp*eQN`f^=MFxHYX`5?KbSk0bb}yq0f@x)2gDd$>dvpGc~<K}T$h77<^WM8qkr
z8R8Xjn+7d*Amax^;*sCY+G*+K5_z1&p?@>j*j7%<HN|d<obF|comMS5f+>1CK+y|k
zK)S?vhXF+VlZBc4Gsk-r=R`qBVua}xwkL=R=@B$mQ5g-Dfk!-q1{)DKp}y!QV!hO(
zcmwsvv0cn<y#Aefl(C-lBjxEQB>2E9B5o#=EC4Z-QayATswPpNz_?B|F}#YZL~>^6
zwufv_y9N&Zo8vZ$j1@6ni<m@0V%egC6jmLTT(S{yG=$Ut&0r!ioW^uK!GwP81{BaU
z;|V{YN(s_f6G<;(S~v}bVx*DKYH1Uu#WU!b7H?WR;|PtHJtK|RG+<E!C2JZk3^k&D
z2GK-AXi9+UV9Wk{>HuZxg0mEK55{+@fGs!G!uU?lpi0UUXeh}<>M&!C+nYuZwd`35
z>dFopR0&m+O#Bo*%TQ{j${1{DxD7B@$bch8D_wF#1_>lUAEw?or$c>;8&Rib#~~hI
z>e3EE9H7_%i0*6!A;=$Mr^v<4>_8+zD48o!Wwxp$A!VSUG?|1-lx0^ehTG^w)SpaG
zsR9mF!7wWUhX$NV{O2SQ|LuIT!Z7+j5<fiae=2^||9gqw90fg#xbc-A%EhiEe2WM0
zn|=HqOQ~UUDubh6Ggi>(m`J8FJeiU)#{-QZZN?jV0ge7<#(#UJOS)jXT&aMOAYRb$
z7$4~O^d3pS%Ze9t^@#Ym#Awr}C6O?Fn?4PpCG8zWyv+3;fQT7i=t)M-XZDVfq!R@k
z$C=`#xMg%xR-Pv!rfpHo$t3)9B{U^bx5)5F@r!>`#IK+=6ftc;M4Tubf8qN&U7G4J
zdqN50BJC9sWG`ijJdtH5vua7PE8uxc5|PJAJYwc+)RQT4wvHl~VKl?)?=?+NX(Dxt
zxZNtPoZV_NRhVj^NU0jLm!rtp)0t_vBr{^*6upQ&CUNLLk(HMva!QKXvT5zK^cLk5
zdsOQ7{BKzzXV+__&%f#E2&EWFCUWYB$s=d0nPQtN5YbWv6svdxlMkxLR)wfQ#Lqm^
zS3xA@V-z#+BvJ|0kjN7*h8oPlN^(zt{$&J1ov_vAjqx<PWU62QVyutGc|XiEw43R9
z9`nEdi#TtqKs+E~KE*no*nR^<EV-kM{ZjRu(ra2_J>qtw3^LRu0}NHb`2H9wNG4vh
z8!bD=Cyy%~L(so%-Lz_|M68;6P88=QmP^o~-I%dntev8!SJ(qk*SI5?yOoEAw)p8L
ziL6^Jd(&~MfD*RV5RqrHzomQq9*nSyWpFaBTN0TRNlYXfSo{vb6+OFKGrlJhKSfH>
zGw86Z74gdeHkrkX!BegC`TQqQkjboD{wya9+%mG9xGpyu`;kLl6)7Xj;j<xn0Us`u
zl~DdCo)D42CoP^&Hq1Cr6;Q0=35nD|F^UJ6)=%|_2hhVL;86TjweLL<xBKNImqc>_
zL?RO;CQPKtY6n9Vv;~t9VZvk}_Vhf7{&72+;o6-HV>|nPiS3dD(At@PF_9^EWaQ3@
z@^&~3ov9)s^>1goW%Vr)Ti+sP>BDD2q-2mPqW+l+K8btDCMy$(oBC&bu|3V5|DoPH
zSc3{v<P^2!^UUQXsN9PC(&11A|Jej36fF@@1{d$6buz$UaG`}$jC4O5A14qvyTMK<
zk;u(SOB$|ZSnSZuCn)aBTLh8LTZ!Vige{3w@!vHvFq=HPiBE1M_M0hwggX6i<@i)p
z8L53N{@XEM46r=@{GX)oUiuFGOLt0fo`wakzeSwKEzJlFNgfE%%4tvxEc9bqyWO3Z
zA<nq)U7m$akwi~FrHf1ylFRFM96Ly6&HV5JNS5=8X77gtClWW{q#;fOqRijf5l`Gs
z0t%%?7flrPOg9u4wB2to=l>)EN<5bULxW7z0f}KUDoR9577ZCM5=F#lBxL@_k&Rfe
z?f-TpwBf1Z!vm0{*%7fZ_Hw<5`GA=*-qep7D305*%V*e;Qfu~@=_J#asEVqQRWr;F
zn7Ei77sZ|^DJiWA=wC!1)rqf?mmxkH7h^wruSA{D@eE2CXTAbWnWAyy48F%(N++gD
zsE+dH2lR?m`GMI0>9`_xfMNDhR?=XqA(4lqO3|!v9!cmPiBuwez5N`(WfoFO)VndB
zRL6#Si6@i}r5Sr30R6{55b(x>6c2T72Nw3{$wo6|S|r2BX^NXtGUE!3m%}?Z|2IcE
z3@!|@v|eU4(m63%5MQPU%||sd(qXI+3#SUqjMp}9c{~%Lg+mDLMAEZ3e}#h`k^~lZ
zlgV1_W-xvN(;i~ByKr$)#tt@&xg$qbjFyxx<`N@rN|nl2O);7+5^*N7l|*hS4We|t
zzMs-X)8qWH9PXIR&`cvsHdGV0i+WuIef;snblSib5cThsq(rC-a{hbG>y{(ui+Ty{
zB@#Ezm#)<c5>cx*^<S3W>0V;ciFI#0|C#kWiuAZ7%I80{a2m$%mG?=fWyh67UKWvn
zQ6AAgM=9Z;!G^JcVk!>Cj<Ut<1+SNYZy9dnKixRs1`#n_>Y4#XhHadPO6G#`(gyp#
zHO=0OR#iSFe!_}|oR52^$j9MYLN#aAUCBo<IcSUi86dI)IfQfDocLk$WLz`4t-<+t
z<Hk{xvBa+;jM3b?!BpGsTFaXYva=CYm})483L?MDNQa$Dd91y(u}6~Ue@r+;I_!#R
zR3atmmKlzju2HP!XvZGun3=T9Go01{YK*{Y&792~;Ur5!$}_T&M8xbprN?A9EfRD4
zf2L59KGT&X43{;!5pkB4Y)Gh1V6b@p3(+z?VxP>unJE^Tu#*Wg)dwPD{zkVllM8u3
z$$rCLJts`cn9=mMcR2rRuKyBF$q=~}@d3?^6LzGdNXa;}W->2HD($6Q6H?6onYYo^
zsFSji3yM0Cy_8BUz*u?gBs%{moyLFL>L)>k5~dL!y!}7_*H$maeyBDSOY8qXw0PIx
zX*0C3Te<$*6uB8>>~UZ!+juZutkH>yvIaMC<G*BiKD|N$iaoMP6jVgKZ-6P<6{Sll
z(?jUG^bS9QM%=ZVWsPjahsz2y(NPWDmDc|(&R^lM2Q(DDCw1tby&pL;MgM!m_pi}<
z!_wpCjTwyG@Xq*e4>HT+x1tUSQ0C0I8Q(<};@-BQyZL{ruvwPKC1NIs?eUfl2}egg
z2u{AOMN;n1tkuwS6T2@kV?P;WtJnuH`C$6ZKEO=9B<-TK#Fo72KImUM1X?m@J{ilU
zRratsSu1JTu|Z|>F(pHM+)i0O|4Fq~R+0tVkZ2>cc9}bt`0vh!pdn?=poJk>N;N`P
zVCuwfmtAYrVDLM|xILkGMYN_-igam!l&-`JU&4`64~1+In=_yyW=<{}K_ogeLbp1e
z#Rz|`0R7v~e<*4(Fte*oCUVoN$rxK|CZjY^Q<A2EUKxQNl8tuRauFXz7kg?MKPD(^
zwu9=RamoZY1D!}OLVX<fQ%irkjkYsB#Y#vTV;P1Uag9!_*A!*4&WMvvG**d#)Hy9V
z6~5rk^sS!zMm-fl|J?H}`Zo1MU!|`6nX3QJL>1@%ZHrGnTM?TBJU%N=Y~(cVNU||n
zB9$C3qt8Ix_JA`k`lTWNOBMa74n{!9@BhO=W{NQ;^v`%-R=Bh~An8VvxvuH9$@&sE
z>EZhC|5TSBLcDhWAhX-d`^<nvk7<NBKw~yl8<E#0q;eky3^S&SJ8?8D@mwUt=X}zK
zFg~P;ewYP`M57=5So&#mCfgLHYJ^tc5Rpn&^PhLq(0`)vnW)o|ka;#3INmv9szuUw
zsvtTa5b-Vi8W5#*GhA}?o2uh6RUwI_okNOR%E||#<3H-p%mqY;sr2_-te>$24UOQx
zSe~9p><>UP@8S^gS331~6n{n3Zn_Y~SzghbZX{k*Ua^Y@{Tlf{6g#DBhkJMb2LnT*
z)+CDwZJ+}4*$?9YU6VFM>t@$l&NvWp2Zqem#i}_qO2gua=24s}Qi%%;)ske=rOenM
zF~N=njC{#EF6f`B5rc{t*zx+d_;u8$eU}z@rajRO8LUc4G|~PqengQO3X@5Sl<~lH
zO*>6zUYIIzNi-4?DMm!>6t^i(vE*c8wMCl9brVUHeTmWG9pC)F>8ecG=&m%rR4$vy
zuw%?a|6jp)%SyTa1eL77QARd%{K*W*|8yC~eHuxsmQYn}O{(Wav<(_rs-(uuo+ETO
ziT#ZA^xZvdEkrAQ!0}`|w1Z?elc1-KxU-+O_$3_|1FC!4?-}#WkPrR;pT~SN<VO&l
zM9jz2)iRif4~yF}P9$5pB@@&3Bc8bZKXwG`0<qI5%}B-$gu#X>xjnf^<D)cbq%>+f
zDJd2GPD1a`1<15W8jW;|sU{g|8;pkZdvlmgBfS-&+?rr4V}23(H&?(>1+-uhDaC1b
zDCM1Gq&gUANDtLSCnLq!+3$>xK*Y&(V?KUkrEHz4q6H|+h{%`IP~23Lc#L#nj4Ni)
zGGh$8@aX$L$`f2t1r`oA%)r8lDcS?qlu=!j2ICCvL*fjUdPSnvPyKNnSEAlZ0*Y8X
z4Uj6L5pr7vhv`%m=lyJTB?vl3%z<JkR_cPU)e`Y-_^$;VE8cMZr#Vh*1hJW2a-!Z#
zx@PJ!flHOzPcGq|-a-|b@y1>MOVP^^JK5B!h~D%JQHw3|SoiAnt|@W@y#MRJDC)BJ
z5JCSkE%tkhnwD>i+f38kc|i96i7dXn;${W1MBmgaT~4~~(CNwN7$c@W|I-OZwv-+u
zhD$GEJ}FO8sDI9O&~O=yn4I`IVA=0~;RK@@^(aC{Juz5DJsK?|Ad|~7CLE&!9ePgz
z_y5Y+EtxCHoUS?Ip-a)Y=u*;YnQqn*BBxaBR6VKwNsKKh?~t0JW-lqmYrCMt2)ZIe
zuGs;Z?ii8iCy`G&4ztrJPX}eHi0YwHQZ+I~W2%Kl>rYU5?|-qMVw86t_&*6i!=M^z
zw7dVWye+4S_(+y&lC>V}qG@#H83T%UFoU9Z&;YNn<3IJrK7cO5fJE_?BaJE*^C@D}
zAnh2zO)cz(e^dOo<Gf@x6tBbtQ@Rod?t%(@`|{rZb-V`lTVnuKLFvB0_5T#F=)*KT
zsv=d!L)9dU3FQgPuB%7T^~&r<d1F2^8j%>8uycAdZ?xU=1&E$*N%hbrBOGutN=aT%
zt4x&JOMpt$OD2n)3_N0JbXAI7qNN|a@MQ;xnc|X&Wm>KDXvuQ>@~#_TCr1xpu1J-|
z(t7A$w%?}8>&=kKo=)70@t=f+QFgo1;!?35lKvFpkL&KKe-1#{^&XG@>3R}SD8Xd$
zol>PcGM<oe>fLn7==*<+Ihp7a>SPhGJkgH!<VLsz39ey7kutkq)5U10|4GcJ1Ws(6
zNCJ?X7*Qe#Fy2!N67L!NQ@IyiOL{u0hf-h{O-WEq_G6hwT>mSUZ;0i%q$0*h<D)^A
znRI|jF~Z2BWdtBvG2;Q_ggm)skH?m=HI2{!VVtcrBPioSyT*y1u2GGM1$h1ke`GMA
zo+)1XKbO?9HL|RhEfg1{E*THRin+|zOj;;*wvOtg=(!Y=qNkJ@a_I?dw6`1=`ltIv
z*W=>1nGtg-ZmMOtj2UZkl(JVwb4bV4(M)U@J5Wu+@&A)t|7SfCO*x6k)QUz%9n;X*
zhugWIIi6&kFe|qMS!5{mBb`a2j)ZEVzf(1KDo$qju3s|a=6X1by^uND%N)>WdTvf^
zQbn{IsK<-{VnEc9X+YE!jVf6OBe6g_6|?W4otSnb_Ci~s{djH<*@)kZOO)jizeU`u
zA&od`bN!D57Y3H!D(*e0#qD~6DebuRfB$`Pmy&j*%)R=*Mch?#EpeBUHuV2nth)de
zq_FN%;x1*}k^ic7mlAg=X-DiY8Wf{N5{k_PLprJ7qK?~3t0VHeT7MbACV@Jje;NP(
z53FB3T2}tQstk4PDg%B(lho>rv?>GPx9EESXvamnQtpTN|6APec%xk@_p1s0|MxnV
zvfb}id;iajGSu-5;Gf~B)D#aFaPaq4{`mp_{FZ+{igU|m9?oE#Z#I5E<NuBj@VNn?
zh}fO7>!klPnP3{lOonL`Ke56c6A}+NXyGHAXMw_zH0J=QbMvJZ?BJM)%s$CW<EZkQ
z&E0$=;x51JgbNxPN5V`dnAJ!kAxS6SX270{qhEH_OwFmfM!3i2TTDin{W4$RM`jq<
zm{~Cq;SxV4H%wBXarcog6Oy;mam67tKMo^#pyc?B3zA?knPBQ<CK24Y!r`@y9~d<y
zm*7x~FB{<EHpwSB&Sc`j!6$PJX3x2t|6x`d&sF@`PMBkUAYvzTRE;nw%#aDGd?2J+
z76b13KQbVCXWE@e{G#`=^v%a}qHjK;Gj-1Vk*jTKOH?-`9dUI|y;8)Y+o+h)V#)1A
z*XG2esdws}OE#0~osuA&>Y!nBE}&C&$eq?LdZC3=HzH<NN4|<o3E9SJN@oVdp2{QQ
zB*gbWB@&T%1J{B4Uecl1Y22oSBFSAN<`p!&WCq7M9vU5OMhuTCO5_a``?HV^CGHQ&
zy!D^#dQ-ZIr2+;gTL1eX##Wi)PbGTB7BNmw>d-%<kyxgPJ((An0Z0}jQlAV)w01F4
zK1-yiO#@9BnE3st#E}UjVYJTl{o@Hp!idR$na<H$Xyg%!-ZY5gv4_)I!~ziAH|n;~
zzXYT*qGKfLlb%1uTrnCVp=ZuoB??ls0aO1}1=S!yic>ew0)-tD)nRu|x1~Y<cF$zn
z0-s#l{%+cV*%2iRPKiX%VC264u`Kb6F?u~#%qG}cx+YsJ?s<Ws5>bKKCol?9V&ZL)
zguPM%k+YYiBxuF9G;H@PTkKJ(d(Zz9xnOys9$U`V*tirojgf(d4<Q+BsHW0=3G`1l
zV82aI;AD~MGUZ7mrb6VR8b(3eWzBAyIRt~3sOAvA|3H!2I+C6$l~<S^!N`_KEey}J
ze5P#T70>z}L2rnZCs(sQf`%uOp{UI?iy~)0qsZA>Nx3NHuTU|iEYgCcB5v9x9gJi%
zse;M>)83naM^UV8!__lvVUZ=71dy#+WRWe%BB;OsVUv9m5Da4yAV3I&1VM#i6jW4D
z)<<w-6cmozK}E!saTgEn8zM(WMU9Grh=K}!_dV5>G-HT(Uf=V5|9?@})w!pttE;P?
zy{fA__ki3db1)>W+t2?w^}v5dyC&$Qs>K_XlE~(!8rKd<;u9T<_A8_msYgL&<YW9x
zkxdE04x0#<a&R)k;ugwNwe6>3+GEduxQZDswQso$Nonjj87oea4Z;6K+g@a3Vr$1G
z*_iG%!T)%*iD!Y~KP8=NX`&ZKGHz8eGC@h_LL|vgc`8XR7$3y6ir;@WLU9ZvFDdkP
zcZmAzb~ua=-Lg?)kWodeB!!XcNMXddW9nGMfA=h?Bz2@NQ%h)8x&BYHN~xzwHP-A;
ze0`DAarqp*{#R0%ZDKsPe>Wkxv36t$A5M@Z)FK)kF2~U!kqxwZWP@ShAK3*um__4w
zJ~eG!92=4?ZvAEFC<`SyA5ti`^QA<GQ#?nEv5orw*JrINwx`3WEZJaK7Vm5fYux^Y
zazGiNiWcqvaq*;hb_%;tl9MTo?39gDBYlkW!4g$u=zoLsG0KDgMrEi0e-?@v&qX!7
z0-gLHvw-XWGzKOfD~%0Cib?(=BjobsB^e+J>3VcDUbu}ljD7#v7@${B@$Z~)o&q)X
z{vRWehMXlyGgU2SaG+jXc5zEICHYh+XzYiHsMGoW&tZ((Y$h|tgNB1-bCcW{50c(J
zk)Y9`4dEkvN<R0C(df`f8M1SLOHoF(<AY2?{%>giNpj9`$OZ-mTm?I`jSdvuw%={{
z-8P?sYlQd|NFIA{+)z5U|C^Hs+IM&OMZqbuWU%q{lE#?O&XZ#F{WQB|1tVxuj}bK4
zWYz*s#u~B!Q#}3sH@E$#Y8gJcLorS&+)?ab(Snn?;aTSWZ<3y5B<cHr^rg9B@{$XX
z<fU=Pw0b5iBMaT^DU~(c|7m=&F)oHTMho0*al7Ha!v)8_|Iev5`R!(dVL?f5*sT?(
zYKTt4g5V#PUUL2e@*C1~5T{J~80A6xLYUD14boGds12oR0{{Ojg@-Jl;QY5)5G=Ap
zu0V9fe~$(F{m=h<ECBzx9{-=Ppko7VA^Ax*sv_C<1b#CjrnTgrkUv;VenHQ%n*4%Z
zX3%9iY0<c8#F$X37KMinh7S|yT<BQLEcdT?ylr4<Nt4{5S-asj8tOH-#gw*$YCIaf
z*$JM^kso?LT18q?<Cxq|j9${^$bzxGKHTTy)?2f<;9gkZqJn!t!7VdnpWAA3N*=#j
zYPRy6XuQCGlJ^nVfcU3ZB=?`q<6?6-m*(vKtD3|3??1O*|8wINbW5mvE`Lha>p#>S
z5Zzc^{Qn0?xp5+H9vW#U9~t|R_gsiD9d-vN)DkoKq^6k1aOK+m*%L5yg*(~YvmCC5
zP{oX9xTjP`cuJC&lID?gNby?2Xo>L#qomz@{%`b>MuV!t(27sO7+NvVU|7YVgKEQ|
z!^n$UV@eG&VxF#u<R?%(cruc+!M{rGwk~pW<RvY?k&P}V7c7ePNcR7P<YX18V+<21
zMB_x6DV6k~?5-N%zlrruMtZWxWwB1lP;r@s)3HC>+V^1)=F+`Ko4x0g?2AE4HexoD
zvJ-sIkHShhF_RXO(ySjg!iXtc6f6#g2904HHHeg@SteUKV9F5XCrDDcKa0;`lPXF5
zH{0smFPa#&G}6w+3$lQu=Ch-+AS>mDg6mdMqn^|>swV>;I6D3Omyh15TAaNYuU5QA
zo6l*rMo>o118ZqOsFsfa3vn3xz-bxn1MPunj}FJ(rirGhf?huVW#c3*RgPr62Bhar
z=+Aj0dNgw*{wGvQu(=UO%A3WH^g*;iYf4raV`Sukl%+GHOkG3!&nXgPeG`hg<-zRt
zbX!Tv1b0IiRxo&_x|uDxqzoxVev>kki<0DlvJf<a5HAN5B*XuBUQ-wizm4{g@9tSN
z`A=Sx&x{4gVp5o+Cv{2HH`;>0hRlY<rIGgskd^a5^BjjHjwf#sX<d|MhYfYfl41&j
z!G^>nD+33TjU<gHry-@A6(y0CY~ggiH8hJPE|tPTklDReCSINX3VDshpIq9tu=5^<
zeI>sCLvr!PoU@USbyKyD$MdtK8&cg)RIy_9HLCjv_)oQk7U}Q58Z~m0)~!+`DGdlo
zYIsgnWu#2CqRf<JYROKcS(GyhA44`;SuF$LKZ8vQg<B3v<3D*$htm&Kp^4>xvvQkR
z69JdTcgjUfyU2g47gZykJSAxY4U3^n4m$8xG=ZZg1xOL25kZpUM3p#^5?q`zBchcg
zWk~w}iFUX&BN+2Sf5rG1$^CmY1N_&HKuc*n3re%|w}AX)k@0M&Vu*|pDiT1h|CJ_y
zBt3n+vFVg(w;|%phtV^Ik`tMDpKpA`$%HZX|KDRl0U9#)o+g$;Nkb8D>uD1hfW+H-
zsu%y&He-R3Y=23%0(K%8d-ubvesX=!_<A<PPVj~$b)=+DM8J~LZvMwu0Q_fk<CX!V
zipChBO3@s#5WnNqk|bsfK&j?HjC!7mYTg?JsTsL42A~=mYelsz_EIORq%lrpn_D)F
zT$JRLO69-dyBRr0{0NScM(d7t_Wwl#<TgBx-TCAHe=<=oCEtv+q){67Q0hw>v61}J
zT`-1fM(-9jcSQU57@8AhtjI-k{jW54nbJ<%Y&T1cQDO{eJf~aePG*$os2IS~Q&QF#
zDFw}Xj1j_95kNQTO|)dlO}{|<Q4-5aU{kQv@QA#l16mJ<W7tFwNLJA>xg!}GAez;Z
z>;l!2c7$vtJBw`u=xU-<W1;>`?czt)11xIfm|&V;cG?{eoJ{o?Vc#8uJtr^B=;DQt
zq;{KN+Io(FQXfB3vVdy^G(^QC{Nn(Ngb%V8EKT?*NF@oL5jwL2(k*;{(HrXZ|LGv%
zr_GfLJ4jY=5z;MakH8?2yp;Zuo^+B*49mDJBYrJ_F@tfv?#K9Vx5My^e!FKclvD}}
zEh0&2MtZV!fWzpxS+u0|$1hycsF9L|IB3*3J&UcpaQ{D5&b?w;=N#>nNM;HXpAVB2
zoa2$yl;o2=`=O+}m+{;WAgHSGkK=;$<0tG$N6*}=VGKOzSj^H)Y}V>HtkP@+*}+k}
zsmmFbS*-A5kf21cD3ElkkXwD??YCLsa4%YrVx*E0W}{7%A8Hh}hE${0&}NXb+F*ex
zZd>n`WKt$xnom`&;^PSuzLAoQF)1%G%`!sa4l3wk&0+y<w0oq69cF}P<dZ=GQi`K>
zTSX3nUsa-wVds~j9@$Gx*#L~`Kg}kiAY%!)qY}x*c%S@Z=V)w<g>-9*F(xE6<;X1M
z8h;Q^iMR+mShSQv$>qpN8vYMJQh=-}X<kSExrLAF<Tl%6Bd2q*Z8(@WQV`==saMuO
zf3tK#!J^dCzMo7O-L~A2{wq++khn^Fy#(hrT!s=Hpc$N5Z#s|sj|l>>5Qp*Y9HF6x
z5lXV<RD#NQS58(s;noRMB87q`+#;!InG7}Jy&Ngx9u+NQX_F07k_(KbP3=IzU`%i0
zrItcRX^!`B^lqi8Af=kZY=n{mNm5gQjcuR?6bmJ2p9-FlFNU8~757LD|3;(ULL8<Z
z9nHz=vc3CJ_fM^55Ik@%JnX_nWjd4|`5MymCZ$<Q(^0qmV29mHd$I3-mSiH$s3{lZ
z<=;E)E@DBjXucO`82&KLa!)+|Obc3of~UgLVI9E#Kjo3bqCfL!!8o>^%!Zuc(D?n=
zzt^MvHHQ`~O`Y6oRID;ODmtl0a|~H&4sAUs^=b3i8bUCp<8FsS@|aFh_~@mKV|oJ@
z8YkrLoYC$0PLgH6;2PQ!T0zQK!;V^QwbjMnlFTkmE@M7A5PlmtUf^gS7+TTW(NQt5
zFpkP-&MC<i_1~D_(q_9Pr_p=2&bu9wThHA(PW9&JnB8$f(NtfP;m_y#y+O8kDwyS0
zGxGQmll&&^rr~kBXvgRUV^6rqBl4v*3b;*jNt7^TKS8~qU8PYa_WX}zp`iUQS-6;i
zV;Dwf_6I@d^Iyhe@sd6h)cAWPy<|ZT{?{bEWI+!Wt!$drtG`>)&0I3}U;^s;2sE34
z3stwC1@SkNN$N0FWerGAVd9jGQ)vp6nNTvCIoV`#JFcT(nmHo}LXDt`HbS}kalM%<
ze#Y;=FcK~4wYx$`BZ?-A&Rmhtb15&z>M>R{F(Orzf@XF}FzThyaZXs$R?Q&wlCG6%
zlF&)}dSfo*CyT9do|0VJe38pAW|Cs&jGTD7=aep04i-JdG?mT8B%k4(;hB5xPgc>U
zk3gpkdq}a9v51}E!01azJ+dmVJos<;#nGRPUmST6&*E5S;sRRILL4+=WEtg$)OGK$
zaBoVA*#p{t@`XG!JfrP4{3L6P*^OB;@X<VIAbTjB#uJ`w@3^6&i|j>O5OkB2Qcto{
z>PcEd){~Kxi-{!X<)98p8*ky<+OXcCE+1I9G$I&_erbX^l9Q2`9x|T1CnGB<N!cM=
z3>C=+QqfS@%mp(mk0~an(SmrX|1*iFFwwXe!F2>#Qawp6<@Iez)#UA+i7jQxn~4cq
zAAC9Swwm`GNE))W|K7x>>tWdH`1i-ZN0-&dWn|fc<c3>k{GOO(JvPDD_k~txs0!5}
zYziu+&cRfsqViN52r;f)#!|IaLhKy#!*M9F8Yd@KAAz5j2J%!3Rn~`AJtL(_<K#wl
zdF5vG(S0TJ7sakzw%3=^F!mEP;*FSUsB$%U^U<h69OIK6dwumhV}0`VUSDg^#XhMW
z^^NV!rk{*!n)&f;+T&4A<2RG(aM3x5e(Fo<tE&~?wzhNIOPt%LJ2(M&iSpK{27Cfd
z=Fgpxm7b*zR`q#R0A+G8>PwM}KJ#_(T<VjhpZO-r&0qS;)OtCtYs_<bK}oB9zLf5<
zpCybs3spGkiNGGtZC^U!wjMQ!w|(|=U$U&-=d10R93x0wmh>C;cHC{5pTgUEEcGsa
zGZ~5$pDQMVq5f2-AJcsD!<V3Xj!znX<s0jn?vwdn`7Vu(G5E1_GqU0~u&=VR&}pj%
z{Ds<$b2HJ1Ir34~76<eE9bazFvD|c!?ax47d8(>BB9iM|Iek)=eC<nVq3ab7#PD+#
zt|LM79W3z4>tBOvvms7j`_3BA`&CfW13BPn74Ve#YHD^?-mL80>FS4~vYM)(AUjjn
zVgDPbX&ENZzBg7GxzqFWa;LejX(l<}_)_Y&Rg<%(&6t&+mpy$FD`m~eSLRmdO86UJ
zVlCdad#u{T>}gI<c$dDi6D?#7P?NJWGqa|vL%Kt};Xsv_#WpkYvocLNn;D`q3TEZ!
z=D5p7vu<udUPe}WY^0Y!OA}QM&t<4@(geA%&}Yj>g}&CczH%6Ah|T<5)|Jf%e2qMp
z`{extz80P<d@}ZsudNI^;Ok%O`?zbEpBKBP>>*q;-zO~&L6Cy6Yp#Ngv1&m6l^2eP
zBQ-V$%vK>>Mwz+_NSSJ}BGfT?(7@Do=lWIaZvK(!v7g<$gVbjzuWuo$H9`{$lsX}g
zD`WWvILGr7aDIyAtwGw`GH`+~C1|7AqlV`P@(X5Nl$%#X$Ob4cqecWK#R@giSyomJ
z%bJyyH#;jcHO4|-Urwdv<>lrLo{^E8ndNZ5A<8Q_<r62)%F1`jS--L_AD%TWE0~d=
zn^)A(8AXk^i<PmFYN}TtKQLIg;c%agB*n_vMy!dLn6f;%sEu=q+Nh}p&gnTz--m71
z(fRa&f#9sEMoboghz(TlX}N*?VOf*1XXQf!%(-iDZk-yPlT&;qcEf8~ZW)TSR~;c%
z)j-heP+pWg>7$4HN2R6r9z1NIe}v|rDXGgM@R7`JhDRP`c0*V_a_D;B*5hrGJ=LVz
z<_fj9+>!K#CBq66>zxtE$?VvE9RAMA%9u1uqPr3sIb~CFW)xhOJv~!qHuE*9tn1}u
z<z(c{h?SI$m8`OoTIyf2^I)R+dF)_fQ<-rzv7B>R#^kB9@~5)#6sZ<Eqf#~9+T?;s
zS*-)Ja#Sw~?XBLi%Z$uRRWpoq367_6{1cXD9FV~cQ(fe{d6fE2XN&&UD6`C;92|sN
zJrPUgp$7jSFJtlo<V||WY2|gC{}e|U=c1_dZ<O=YApE`?<tA0d@7BoRIS_>McPCYJ
zepLcy&&r_xYMr4bb{V(z=^qpOS3GGb(6Q2vRO?)}4VJFQBizQhmbw#-u!q0@j(a2B
ze3m{Y;P(Oie%#vHv3v54s*g=bUbhDSW2AiYD9D0jQ*+qq9}co?9LnBCSu{2fU6$dL
zJ?E;I?UY@NfthP_ow9$T8>VcIQ&z`S?@AwH5#D5)tHA|M#R*PX7gaVHKQ%ks(w?+)
zN_ut~H)Po0KEwR;+l?F8ujjDA!{@he-FDolp5uCCPp9-j1~aC$&YA;b-J__L%|9id
z7d-A{TaC?b&}!g_2Ce#?ys_-*8Pf_fvr4TK$e)%oF=JfGRWIsyPS4)$=BJK37rpDp
z)`FQ?<7Q0`<Yi^Xwfe`Zs9BJ6Dz%_hdHHb_T~sOR+K3?yS_MZ#0KLg&)vs^>zu`E5
z;~0+PI5e-zL82Cb?2T}Eow7108;E>kOr=a&1*hy*e{mTf%d_71nCX~$H#%i}Xypw$
z4K77t!c?VtyKb`9sd(;y;*0Na%6#aJY49$m>~c&pOxZu3vM*dWe$*-J<hnNaxKp^g
zd-283qKutc<m${zPT8%lvK`K~>s>c~$Eml`RqsQm-e*u>)4@HutcPloP+U0NAz@k9
zh(B>IE^ytX(7DMoyom&+CD-cHbJ&P+GxD-7Ey$jhIW9MUG91FVoP7EO1o<(wff32f
z9G9P)J8jmuIp=mv*J?34W89_Ck)8uur;Sci9p$Yh$!FEj3Ntw{J#$)C9_-J-%cXio
za)$~X+v}@j=-1Vg9J~Boa!O;*!5?I1MsiK3$Xc6R$BPch;FOd{E1?TNI9-r%O3FF1
z;F;tq7G@&VlImO4#0IRcmeg3vR7<L34bjCDG|Uh`D_tQ0!g2`_-jFcCClSK=5+(E&
zt2*FXu?efhPgo-X!h;e5*ySP7Q4ov?QV~nlVJYdzm<~vUwa<_!Aw{ejfF5EK`f3=Y
zVT6QOI8(xec^Vc=lsOSA5%8?ogze%dd>{eBhY}*}k}#pSL<mud5)O!!1UM=-p^5kj
zxe_4UC?Uc!2@_UIgwR!@gq~vg0KLT~91=gFsRRfYNQlr?!h~)TA<U2{p}$zkfQe!g
z3N$Q~0CIMPGa0hzME}9WJiQ3>btKG-T1bSDDpA51otvO@*NHy`g^MLX*e)T$%iurY
z6^RgbNtE!DSTzB^h)t*|euAx`g@l;vC1C>YK**6OVU}360N064xLN#!^CUok!zZLk
zm;gc(R!fv{w^+3STg4_kE`Gu`2@pm}h%jElgn&c{SrR3@E!LTUAH)XuE!dC;Kni!_
zKeE*Xf`H-t2KPz)J`g*vMejlhv9)(346rLV=9EOIk_g^bqO9FZtU3S)BVnxg3Hv2L
z_(ej5E)pgTk_ch2L<#SURTuDq*n|(oPXOHsKkLFHx-eNHEId=9gcf4e13(7|)x=Lo
zk^rHmga|_<Ot?rQgo`Cg7$;VJz~^EUDoBV>M*@KVC{Eev#3@krSW~zuTg;FMAzz||
zuviTMk85~C{K(lA&sM67Ls+hlFu5feg2Ib(BurSU;US4Kw^J+|@NEq2N-gp5K{rrj
zJ=`s|k^pN%<p>u^n2;?I!YYXpo)oJg;NM~seiA>Su>=UsB}B-SFd<7KgozR*d@oib
zz?otbI*Ok#N&<vg5+Z~&+$0g^)=QM|1QvS$?}|+T+W~e3?8a>1jKhf9bcZy|H<J)+
zH<vI0Do$u8QNopCH38HSn*bX{I7<SArV=7tB4NTTi4c}clyIL|X8|4(oA6KZ6Anp$
z@Uw&nM<h%*DiJ~#i4wYs)fCW8Y{EkE6Rwj0VX=e=Z%df4Pa=dui303|_E=Mm8zXr|
zJI^uJ5W5*_CyJlolK|lk2@&p+FyU^A5H?Aaa73);N%aHe5$!CAYxiudR2C^c&fgGs
zz94p9D}800zb5Ydo3ZoS1KlDa+{UilNvTSxhKF%QskP0Y)euBup-&>H1~x<qXN%PW
zkRdi9B!0po2@qCD2w*2*Vu0x3gyiOVibPm@s6+`P#5x=BoY;hy#822E0m5q%BG?ip
zbdU(4qeKba#cBz7L~O!9@e{^KfG|NqgzF_t0GA1nDgr!MD?l%?30dMN)Rh3Cp@ayq
zYJ_x&5T;3#FjuVBfc0V%9u+^~4G9qTN{H}_gbBwbLZ~lMLOZeA06L0I7_JK^N`Sd3
z5+cBD5tiuO{SswvzgTU_ogPZncSvaz$ZZJPiXYXi&JrMWkr3fL2@@7b1Yjrh#FuT+
z1l7(o;bt%>nTpyo#U|v5pRi5>gxwM%d@f-^p+pE@OO$X>tagCg#3pPOKjC2s5Z;s!
zVUL6fUr2<2>j-Cxbq>HUHerzX371KLFh@d!xe_Kcl?b7|L<tv&)gI7a=Z1@)Im9f4
z0}>)6Ntke>L<nz4l<=`w9Y}`$SX%aI&u0R%O_1}-kaJEz>x&=NAUgp<j)Vw#5+=aF
z5blzw&RHFin<qA5z4!?mB|vyiLWH9dCfp?veJ)DyiPZ^kzSx8w;wSW$00AbIuvx-{
zZzMvfDN#awvCakLiA}gc{DciU_l(YcC}9>t)Cq8?g!W=}1|W<iWQw1F=$3#WkMN;B
z_nkghR_rb)1cM0ebgr+?rAvT?U=U%sKDSYyL-@(U_jK-CvCl*9fcOb;!~_Tu;S&iH
z_DY1%N1}ua#X28wx!8oe#ZP!j0)&?&1hCH-g^9SM2AH+P(zl2kO>A9VBCLJ6L<vj9
zx&W|BY{F{s6TnzPri2LdB}`Z%5dw6M0N2+Q&{AvyREq%BB0#kW&q$c?jzkDwNtBQ+
zRyRP7*o2wlCx`?HZ%c^qzJv(_BtjS>Q36QY9WYpI!btHG;A#nl5+WRyFu|4xp@~EZ
z*NNo^+$1&u(+|RG2@sY_h_F_|gqI{j*db8@rY$`H{lz9+D1O2K2@oK-gjXa?cvT{V
zof0K%5vwO)x7dI{nOrU*otWJAy&}$+fud2#8avfGz!&+TYP3IR%q)%xj3T;CT5AdO
zZkRw2U@-}6#Oeh=98Gvg{DcJ(AY3OQ!ZryLo|6dSUlIk_RrB#M&FMr*f&=HAhT0Rw
zPv|cJ!a@lVu9Glfn?wjtOO)`8SiJ!dd4OGhwo(I}=1YqBhD(68$4ZEBv4jcZBtn3B
zA{-T~4_c~n8MYWXEouLJV(i#o{HO*W8z6vNgr6l$XebfFK#3ASs=k2R#U}hi{Dk`?
zK)_s*Fh|0Ke@TS!rbG!f#Oen~7MqYFenJ}w5N1n=5R@=sheQY;N|bP!Sp5MHh)wue
z`~<{Jgq0E^yd_}*?nl6N1Q?GC0ndm{h=?CxSG-mWjv*!~;&xVv5@PLI8tO}gxmFS-
z3=nGoV7%CbtHn>aQ38Zz5+baYFk!7k2)9ZU5G-?@QY{hcP&nNJi3hZ?vILy38Fzjm
zLVb_pPAlSka@_gXao%!TtAz7OapzmK<j7)-_NcZJ8i?++moVXai4c}blz@rGAi!f{
z6LyN9P*VbgHWDJ7BVj@(i2&@Xh#9w{ZMVBmfd4ms?m@8zqc*~I0s=0=SP2laB}6EY
zFkzlV2=gUM0Ck4|hKNmo`z2f~0m1?a5pIw$;Yo=QASHxjVhzO<w!$){azF$QnS^Gd
z)|24OSH<~+xbp>=Oj%CtY7#&*R%;0n&_e<oJz<GN3CqM91~@J@p@#Sg$r2#cmJng1
zgbBY%gz$ny37BgS2VibWI4*v|1ri{PlMtbvgb8U90odhkR|<=3ssRq8t8O8M;~jz8
zW5gz0Dt^Kv5+JOR5aD(S6T%V!*k$ig3i=<5!3}YiCKvN;x>zGo`%1A1LGcqdXh4w6
z9M)n8wIo8o6rSK0YZL%eYQj|U6JC)3K-P^)DkBR=VHNC{gjqgEB7~I^CEO#{MSy)`
z6AHyo_+A2pGbKd8!UbWHL<o~5N`NmM4ZzA0z^`^I)dT}$BBz<2s!opUVV1%92C>rO
z^X0I2!Q?tgh^=*(Fu^YoLJx@&t`chu0A8Mer4_=n5+Gm+h!Bx5;TMSj>@z;brU*3s
zPaKBIGn(^eI)^o$v8a8S*n}^|PxwXxgj5L;G9*m6Od^Ci5+!UG>tevwViQFCgj*y)
z=qMoqrWAzNBtrODq694ZjRUk5n~*Ag!Y~OCur@>(Ct*USL<mnwln@bXJOEQB!t>%M
zz!wr=a0#m<Ojsik0;GWevAhH@Lu^96_zAF&gqtNqz@i_(pRf;mD@Yh5Ln$gNfAgq<
z^A`e{%z3ia<SrE}J=V5OSSNl$SOSC%5+Z~oOjsom0#?fiYsH!X7$-I%L;Qs45+K|s
zA;LovCUletVYEaE<HZU9E)kpXocIYZNr3P#2@&3vFrlSH2;C)0fPP#GxKM1u72+ox
zlK{X@*snLCHRG_HG_G;6P9o%H?Rz9bcvzx@ZDM5ro)DXGuJ{ReN`P>^ga}(COn6En
z1Vj~tZ^X(3bP}5|Ui<{it_dI*0iiSDPz>zyKj3?ePOpq&xr3;NVr8NBIbsto(r|??
zMC{MPbvn0M=U&t~EZ*_lml}@fbCt!Kgu=QKnh1a@5MT=l7fF<mr>k780aD09cz9m7
zP3N$}#oWI&Al63CRzGStz@ZpPa*rcP?8&%jkN6320)%7<5o$}AaJ57T*GrUegIL)B
ztP2q4h@Wt?1PGXw5<ZqN0l@<Sfg<4-v8DjvJP5ywpRiH_1gw-0UXU>1Er}4ml_&x8
z^r?WoViT&1pU_1D1k5!Eh?)o@5yBl3B|I(GGytL>!u8@OAi5*mD<OR@On6xrVg|+B
z53!tG?l-(ETMGZ{ia!UnUy}ggn1l$&B~18TB7{pMN|-FxbU?P)gxkeWz@&$eCLsbQ
zON5|A2;WPTfEC?b0K6vQ7x5Fml7P;I0Jaz28xyw}W8FeZM)MdW5!Rj|Q9={3W&kq9
zCSYZTfRz$LISCWsh6zx90+yW#6UCYdSSU7Ok@yLSKM63ug!3g#=phk8wnPbqV&wr6
z#U`{CKcSNZ2;FtApM;s4rK>E}g)s0eTqpJ{z!rVMGZJ79yyCexHSE&xsYH1W%X;~M
z6=D;xY(VHK0RrN3!bk}dFhL;TnG0dKSOtJ7ViVxR30U_a{2Z%dSFUWSr64G6l?h0w
zMSR{wLf4fDYuA$~0fe3nSSvOGb1A~hI`^@JnA<O5!gmrO9F{0yy;zq4o)nw#y7&pL
zB|sQ2Aws@{2?Y`%%$6vjnpkrHb;KsL5<ekT0tBqF6X4(paPS1I3lQ!QYc2ryBfKbn
z0<1dWWeE|Uk}%<Ui4ZVdCA==y<p4}`34_E>SS|s=Z4x3pCSk&x5+VF1QGi{phNaHH
zQqTYw6_;ls<IvO=`wG<VAbvte2@nvW5LQT-aIZuN8zoBEB-WK^w`@a8y@6J^!s=E7
z8+mf4vG`F9YzYv~k`Mv@iU1A}evl}kjac&li025s#ZSnR0O3~&0qn|WS*kN0o%KS)
zMij10MQk0r6=vZQ(bq-^gT$H-7$!CWK|LWT0m9W1A}o|J;US3--jXN*OFcmVtRCSa
z@e{DZLcmfMz%Jj;Qv7eDOq4|?w`Ff45!P-gQ9>)R764j{O@QYkz;O~bNr><d2@}qg
z2w|T@33!kh;#zqpOFf7iw8UYij^83eDeH{763*WncYY=KY^G?HaehPG`Gb@LhlbBc
z2)E(0R07sB313Q-P+P34056J7_*ndeJrV%el{#B$s?)9^!ld!V?Or2c)?P0W!jlpu
zJSEoEfc;_<upUNuR04!2B?Pe5dE5*Y+kR6dQqhaqt`@NN5@GED5+y7ZYa!rHu?d^R
zPe_yip_POPsS+l1mIz^_L<ytBS_HtfgMf83!deLsK9CRr3!Q{x5&_s1F0j<6m^Lp!
zM;hQ@YH=z5YEf(|)-|XNE)&{`p8%6fz{H5KPQrxyBtp1fqJ+6(T?<$&HereQ37aH9
zzzQJ&OALhO5+Q^nO4u#db%0i46R;3UfaxSeBt)nsVZv025T4gLtkf<>Zjy#=;%Bb9
z1PEO<OqDQm>m))zU{Cl-pKB!c^~k~U5U$X<do;YI;WvG*xkOn7k@*dPu3{6qiJyQa
zaRLmCJ{Kmyg%PfkC;?&GjerAU6Ap@>fCz!`kc0@`BuqfGMu0#Nuw=6Y0D2PM6+gi5
zz1UKl!7A#HW|vl2Y`~9cQ5BrOJMMfSHdmt&Q|eI(v$fYHLij+U0K3dMOQmAZ$#u?!
ztXKnUBjy%(puQBfZxKIXjRXj5B}9Ns67nQMcut~(SH!vr@Gr3mSPvn5ECE7PLWEBx
zO!!P9gmj4#riyhlV7}M{OrZ&wG!c>{M1c1t+$<5oGKmsy6KfgZN3jWb97-4^0RrMs
z!UzcyMoNT$EeV90Vl4;M7n{&P`~+J9ghmn~G?p*{Q4nE;L<zTwwE_T^61s?=kRbsA
zmS+f<uM;km2w|K=37KN81mK|r0UII+k4b>=j)Vx`OPEkkA_QzSAj}tQ6#!8Q0Z|Fz
z4G9qNl$CH=$4~3H<46Cm>v+P-)~VL2(XB?7e<Y#p&sFP$VM!Lk=V2XQjmx!S`&kDx
zuxMb^3aeV3GRGF;^}5t*-96aEhWBBb!ImKoYl|8PlGMx<%X)NLQK4sJIJs9pO*(tU
z-X3+zUW1dN75-i+4G>GIt8iS6BLfG#u9}CV4UYLZw$AXT+*bLl@3B;ugqyC5+X4F&
zX6QWn;!b7iY;9L9<@Kt;A%)ob*vX?dAVS-7BVJS*VX2S($d9+wzwt8Akja+vVhz`W
z@}u1?)%yoay@c>&OlwPBi5R8AIFG7yg+~qC?@``1@Uzps>Z31}`d4d@y7dx|nmxy>
zZs>!rs$_Z8@2`4QrN&;h<0xKsL)dZUDo;=)cD2+thPh$P&oHuO{k^L8F|TS=(W7=G
zTI$!4c)<oH`<b&m%6i&TH}1F8q!nJ(_e!r?-^x<eKk}-(s$1&nVP4g~F1{~@yyqYY
z(#@mhgDSuE@v3=fzsxMeC|DGF@DY#dh>Zxnv1GA-1vWFUdc~v0F1FM^Uc)1g-#qH3
ziD1U>UeyaM+OSxu&k-zNEME2XQcKk;YpL_sd)0GYJu34XOPvkhk@2ggEWB_Q_{vh-
zZ?e?>qh9sRtCqTAxJPXqiha`0S!&O5OO0HHfzAcx_IT6<KUymA83r5N;#CLE^QyL3
z44e#de?Hoy9y-sXezh&t=sin~eZW$8{fryb^Qc`TJZkS0uX^ZTUbXoKuj-Bk>Q~<O
zs*m9&*Od3FN4MiWr~AMTggrOEtJL$id)3$-mKrewEbih}^S{Q%!y{hx?T1SJyvCzm
zt;>%~T9!&$V5#%cv1>c(QS~-kYWXipodteAaHd!70_mO$S?Y^c9<?2#zIL=%wSlZ&
zaGh8E<2JnN2^rWj8<cp!qh2iVs*Fn@N98Rw7b@@|wu`4vvDCox@YD;k@deuX_F9i>
zh7KS5$WjT9ga3O{ER}u8qXvBJQFm{*)Y`SskTkEl3ZonKHkOrodDX9Yvn@KqtM0tV
zQtAD$ixl;iL%=@9t3sVFvQ&dak9uaCSM^`$QQI=S>MKynUT&#<VAoF={d=F|ji%?l
zYIimk(JuF@B^O%i?H4_2!Zb9T+S{vA_n=YmY%~@LzWmawHh?^{AIH}3cf9KH=^lK*
z(xbw!TWSeLa?4(?s*DCYU{m|i;~*zS_RuP?8Z+Edm3M+=%kdu5Aw1o`7xh+K>fu{F
zDzAZ3BcD{N-8mk$6Ph+R=uzviSG;pD*Q>tx!cuos!s~sA&fnSTRb4^$Ctmle#IvyJ
z9zs|!)T?sVf#i_>H6tyxWgFg@M2Du{XsOr0x^FSnIP{%YeX`M`CVDOP&t#7pc_H34
z#8aSaU&ZEoJZZdmw@1C+(yNxQvD5;}2`Jul6Zqc`H>w4rw7IfZJqVQ>R^U+!Gw`n6
zDvzpyrRfjLdDIUrJnED0EtU9!S2fP?s6&M?F1LDAt-(sYIS|wufsM{spKN|5Ix@<u
zB5R>?w|i8VPrYgp2Do9XrG9zaQqNxMRZpRfUmx+Rt<yZ}<{fysI1@qK{7IJTi1njO
zaB+J`=j#t!>P<W~daN-P4d&s4B_F~TY{45=*TB%=&4rtHc+?&Ddek>y$<*y0wHsEk
z^)j&OdZ^Z29u-AC<7SWQaid4Q1-7=tO^1TAW!K}})n1j|#8O{D4xVWg^r~~xEp^LX
zFgPDts@oNo+GJZQ1Trte4Jvf7)IHCG5})C9tp`2oFa)v*<Rw}WIt}Y))w9&Ua7~|4
zmYM<+UUmWM;RT3&b-b!ecc>&(@KPA1t{pHrYV1)p@3gQD*HRZkz9yfAZ#xGMSSn{1
zRO(r!-uWGpFA$uMylT_?mO2hQKKVktWAutw-Tock!hHt(-3CfwCO96pqART8EAM*L
z8BLUW7tf~Jz>-?H`Eu}j2{hpQ7vYGuderoT9@X#+$WyvUJ&Zjk6SBSPmzj9*fOEkg
zuTrY}7kse@H@w56-o)>1(BTUqY+t~bRJ#+p3+X)$)_hFQR>7klc>%BQPJ)!L@v3d@
zEOq32k9q}~biE%hP(XDb8;Z_N!^U@**|`^ETi8^uN`Oq%gn)iI4FbQ>tKO|+sV`Ss
zbyRbZwKojIr&t~O`W}zExT>WdgEg$TOi7<6DV>8$paMyIm1+fYoDbUV16FHbsfw5x
zl<97%!^8<z?3F!qJmyI2y`Po!_Wj4Moze1n$TcU{YJsZe!5{D^4=xZYaR9iyI(D+o
zLJt%18dpBvfWf^+B0s4HCJ?~YiLj7+K`Xwydj~E!2N$HEVh>b2j{GgiKZS}v;d~$D
zSEJz@QGXxu&!N0V5?+u5=AgmBIDaqBS3>?1T-O}!^g%l{kpBw#uDFh`<i3e~Q(QM2
zxSgM0e*zbtiwZSS@DmU(ud2t;&@SZ9M#ToeX1GBF<=t`qg$Yn4VBrkBAC4PLM)}LA
ze-Qb{6EWhMN_`95fqKuOJc{c>Xy*;on}F+T0-K_|9m>zmr~y*p$8D%^B~F};3cG=G
zfLUm07aF<^=Vt?d0se}5EpWaa8n_PF2KD-)yl)Mx_MrX?s287?nd_61wm$VrVz2VS
zDs}Mm6-OeDm5>R+u?j~R#|9jWaomdIP8@gRxCh5h9B<+H2*+nQzQQpU*M9+g7e~7V
zaL=$Ac?a=^-V2sm(HzGShzcf?FTnm(dBmeu{{YMSI~-D=0lxiZdsJ!__~*VJwH}q8
zhO-!);#DPejyG|4kb=xkvJ>p6yOULFpV(1$k{xWl%&RKE)mOOz0f{)HClxsA+PH`n
zhb4Gazjr*U`FJ=bIIMdfLQt{5qyGK7N8Q`TqXyN0A%pF>9?s>7Z#-&VSFE$ajZaBJ
zOn^2Lan!(36Gw$h;WdFaj<Y_n)Oc9fPF*}|Pw+OcT8N<HQJC6a5!Jl<zDKQHZ>ecd
zd(}JW#9KJBa8W-TQT!hDsz-fxk5VtS#=m<z>QOyAcvN**yff#+EMJZ{fUoqZ6>!Th
zy$%1d(yP{h-0#4UeLB~x-n$=B63ltG`mi70ASUd9H_NVqvd)ItB;S|gCaZAVhGX`l
z@Zl4^ss`lhZsb2L=T-Zz_9)Lj@VODfD%^A~?0aR9a_$gx8g<s;n6%ETlJ<GjsFyuz
z*+1Y9P_`1D)AJ8YeT3gP!v!2_33I;RQX8+uJ303uQvH55zBvGat$~1a+DzOC72m+o
z5V2doGz97(`+WF~FudZfu3q&vUYF0W<WXONO5-u2t*;@NZHqy4veajvqT{G@sJo?}
zd(^A8z;Yf&oN>jO5LWn?n%yk*`cOpP#}IYbg1S?JDczKQ3eeICma?}&T5pGhW0ZSw
zG{MDHaGW;{{^L$W%=kUInWfSY;bi{=8*~$*WQ@4i*<STyL%8e_UKM%};X5eSc9KWc
zsE@ewFd{CP;@vAfs&*#KOe)-T5EtG5GlI-&UX|X!QcW-Ms`>$_A;R+QV9QUn5CwIC
zEQ|(g5E1vojR)bVUk{E8I1-1A-$S4EsF%-x8$KTtL0OF?OC5tyuEOtOIDZ>(6tMiW
zmhvOQ`UoBP_fv34zm0<7TIf|1K8GZu0#)fPRK5+r*W>sEzo}CD@SCc)hq%q7Hp7#K
zXG7>7^s3Ht;lRLO^`2KX{n4Yw!?BIag$%u^)Lj>VqU{lufsKX0yT*fRZ=r(&@mYZ3
z;D75}2*(PKT8h~I`hcZ|B9fU1RU0}IAys{-%UhrXqSs~+#?WG=KB$5a4dM9K=G=h;
zt!WM`yA3N!xON!okHj&GM`Q3Ebut&i0B%C`gZ!92aP^xJ=Aq*ciKTWQ3PO!~!_|g8
z>d|Iibswfr_EE3OtmIYY;A-E2Q91(tZ|RBe5&q~^z0uIzE8wE5deu!($Dck#lr$FX
zLgPNncxLv%$1l+4Coo`^55esbOO;*jRoBkLL?s387&c?^I>dr2;h%m$v=xNj<D&uU
z4m7X=y<Ul96^=<Db$=WqaE!u{frDi?;8>5kyAwR>AJFgl@ay##T2NJ}-1mqd>sso5
zOc+M)!i*S#xdC-=z_Ap^N*v2^+=62bj{CoY{y(wQtBxS@eX}eAjp?Y=-lJx`0dgba
zZGr~I;P=qJmYP=vY(QkRY$x8GMvRw^$o7X@VFLz$U3)#M5yAo+#{saX?;2P_{H}^)
zBW{!gya#yC1h4wei+KYkzAa{2s$4Vhe=$^|Z5@v~J{fTzqTA7kV2|vuFnz+b3zeGe
z0iU3=qc?)LAmvy@HIvZbb%^=)K)=@_68>T_%yMr$OF4u|Dg<;)rbpd}cDJHqD}M5*
zt%w(M5gldsg6i6M&NCBMvk6|;cpkR9<>RneKX}!YhF%rMtSGX>tNyhYQ}I_2BNQN(
zsEU=(&Iq|7>6JmP%C$Y}S(xa8CGfy-QuA&@kO?*J{2f&Hp9trg;#(N_&d1>+upM7|
z)D{TtgL#;imxmd?1i=(;9IWh7eptYpf@x4A#Ci9f4d*fiF7r!_9FxX}u?#V!omZ{C
z39-ctORdI;AHEASp7WtvuyS`nciTd}CUx_w2Iy${i!rmBjuF2Pb%cLt*9L}d5uDk5
z#qQlLW1piv`j;R*6$OMsM;=m8C^V%BAiWrQO@X4fG(tsw(@Bxv^q>?n@|^-(@CsD%
zMX&0Bkq<cnYGSH)b|%8TJ$*6jg#;%-qThmXn9vPV={pede(P1ABbxNxg1N%wIHn@}
zhEZ8_0r&?evrt*;aZKelUXM5x^TG!<Tk4t{u{Z&BGyx6Wc_zqBA!LWxnd1X3wX(lQ
zr42%79tgseerKs(;BUF%Ue&CJSB(SzKOP9)z78_Bhc)bnNac1oA-LOjUw~{BT17XY
zl=CK}B0V5|JpDc?WPHE-=9fW;uf{4%O<c1Ilb!_rx1IQ4fzoYMCVx9(dF0(AD(Pkn
zGTb+s2;TE19B~Esg8KMfl1c_QVyUF#6tCKkn9Mg98`1j0>^^C!_6W%yEkt0F?<u-D
zZ(>HvQ5upi1=V<?x?dF6HKeC}xI6axMV1;c7&hbm*r2c=1-{Vrx}}qW=Uw0%-7y7|
z?wP__now>;{S7!Mm~`pv90iZj(Ib$rhp=As>Q1on5KJ#jbN+m<T5u5h{}BR<s~cO&
zcS$i{-JPMD8148|&A`pwW|rDuI^z2s4hkJJtc=}*9_v(WzG-AV*l}MPDAEW4#0?&G
zeFqqw<p|4m;om{;R+xHu<j!D9YRy5I7ks-NFS%nvvUe{g7|DF-VyUfVDkuKtZRdW4
z|F0}ycERjB_MO4mxEGgPCnjMnsR0U+5Q(WW?J%G*SjJn+6;Wse%<jT6Sb$o%wabE(
zjvjk^L&V@XaWe{;+>5eoyl7F@^ShFlu1aZWHC@rFXH#jmFePb4%U;!L4ML?6IG{c%
z6Gs*fSV&d8hHG6T^R7-w@dZ(eAu9`q2Zt9&8QFAoN^bK!{Di+&`8W!2%*JsUjyX8y
z;<y|KI~ZFDPB;rWtZ}HTw~k+!@{YI4dH6jKM;jdTw^}!+1j|=6Urp+lzqR7Jq@Suv
zV0WU=c}cnGBNEQnBJ@Wj_`L|J|8?~?CBEx?(UznxvhK6wvdM8@m>4v?OPAicxpM0#
zzQ&UJsjr1Rx6jv9+Tq(p((MahRsFS=DhGY-<&1-<a^RpZ@l2JTHo9lpkP-a`4@w`F
z)^qT%UMjuk;6WqOMvqt#>FO(=jt|C6oR%-C1-`_y(zA0fEtshCC+B4aGIK7?$euni
z7v~Ey@>TkPe#1wkrwtl0Yz!VP;zL9DQ@25OE~}{Y;UoORMvNTNZ&2^S7pe5Yqtb>A
z@DCZ1)=T`i`jXq^2PO^Zi8{mj4eB#oF*mrUe?Zy@e-D+Oo`Y|=5IVM3=@~QFocOo;
z66N77zT{Tvqk9e+k?!xw?yCX#y3UB)0l680X<38#b)Mp%{ibJT&5_g{zNW$SfmyR=
z1tw+n$erWQn=}jE>D_NY+Gyua*($xy$bO)KzINn{%s_q?a=iwR>@gsXKY9#EZHEu{
zWM-w$Kr897@#W#uCwV!uCe1=){2a>Qg8bnQS!Sh=n4Fh;SwE5?t#8^KS#2kuA$bjx
z8<cAXIjYhJpY`xmtc<^xyw(^JiRl|t29|A@ICJOQd3|6n-07CA;vAO=OuF$r^9u^p
z67ePsAJdUR%lu;-`a0_z|E8RwfBO&?>Gub*8kvfIlzV?Hc1WZz^Vd#3zm=}D1;)sv
zj_h&rt_k=u17hE4y_A~JPoI0X70yiI7Wl$$rZ!exVCUVb>o}O%>X=6$jCZG}XSj0+
z-9ep3gOx&{q=NYEJqyYR+r{*Id9Lo@Zml~Fz~_rb!>4wIf9;Kc&qWv3Y>yD?Gn6kt
zOTEur=C2nVi8YX$Q1a14Oe<V{<D$-L1WS#&=xzHwF|r>UgEko5MFG8vbb}xFMFhaF
zlbFP{l-Ck4{lK}$5r%MC>&&q*V+c#ipl5eBK){Zri!al_)v}0dz8!{15eDQ=eVu`E
z=_&_Z-hjoK1-Quo5as>x7#?oG)TB8&-(UacU#>8j^tSWkK^K!BhD`<i)CF)qEmB!w
zSz3+YM<5^9qTK1>eoA$~U2ZA|`!`CjVR1dnq(|XbmUMs>h6%sF8oodS!MeN+*a9|o
z!%(?s76oZ4oCBkaUcQXGaT`Dn412{e%-FLL2B2-E#Mr`DnF~$RT+f2EO;`$!bVr9<
zgW6Ht!lYH`7Srs?dhL(vc2z1trlBhpT7U>=BBVjrOez2YmP2Bf;+9OQ@Ni6c)Sr@s
zhx>rU;QmX%QlCD-znHtLW8Kk5TsYqmdzSBjkTbp615=m@cx=%=mObH*w}H;?0p}v1
zv^#Cb*qQV;9Q`|SBE+B;SRUwuCqf_!mpD6|3pSsv*Y0#R{dYb(x*xjQ7!9pO3>L*T
zQ-J4!TJBWO8O%m>bZ1pC1ypD<$z4OKTrgfIRA>yL?C#MEg{@?0R*jTpL&^l}W#FOI
zbWH85;A~r6c}}9aOykH2P(d6_e5Uvm`285Vvjg243bsB1raXssxG&nID;wxv2N89B
z<dR^Pw!t+6Ay-Jj*kAr>IY|HP?r=5=q5Qn5F5`k~i%Vg7AaM6X$cBT4{8mcJWPRTK
z_hp<jsT0m~)tE^@f&tKVrkV|5ML?@?PcXow$1cLs@JN^qDDxWJcqUAYJ9U<!uX_@M
z(<-?O_3u@<)Sb41f9|yDe4RV&pgH&cV8mi^*PaJWnB{1IsYm<-3_{Wb<BG9K-@7Vv
zVVG%<OVV`-)GP;r0?syo<eSuP3gTo4+&yqVKh=g-LlP$S2GKzL-y0#;LH?ZXh`-BX
zi5#>l=z{yVI13jI!4d-`+Jbu2$Hiaccnz%H239|Y8|C5XhaT_fh<d313-V0a5EFQ6
zHNn*heFhe@llcWvL}!@thr+brFzKPrFtN+fGn1ym81O;BEzZSdoyr^0L8j^Ggiey$
z-&cFtkR*?l1y52ts~Z%*mOj-Ve&8MFjt9r!Y;Z+8edek-3FEFkzLK-+9B44c##9|@
zpx?O1s#!e{52kXG(?!hU)lo<;)7s%!8*&`MmWsM8+E3#gNTW$jFUDrUo~=z*<8zSA
zFM8oSVJ=V79imtf9eN6#y%Of+8ywFz(Di%N*Uud?p-vePef^rOKBr%;(ZBo7f|ER}
zn~S0$8!S6dpS=xTUx1&x(b`jO=uY2j2IgzsRg2SMRdDaOAVqvuk^@n0nhWm(Ui8V<
zEp&p&4}g%gY@qYz@8gg%leP}jzeZeu{^Q!w7{^QaU5K{0^Y-^TAnc%6oj$5Z20R1Q
zlaG%!Kvms|dp(c#z$zW6ei)p129n0dOD47P>tDO+e`-&~pqRFf&}jaSY`}r8h4;z8
zz?ismc0IVg*2(Kq*l~1>=^;mU`1_{r{9ByvP13(>fqB(IJd=(=isIAuE||rj-_@(A
zK5BnWI0y`KQVadeY{vw2Xd+etL6uG5k*~V`MA{^X;up~ObKB?#M(#%UjDRo?<EPmM
zp8+0#Uynhqp6{mX+znWbZkcpXdHw6Nu2^?Sg9U_I41wI8@8T6}{L+^yaxDgPrz5tv
z;r{mzHtiJS*I?H4sdzf=C>qav2#I_fgM8CfHs8_K2T}f7KwrNK*D~GaNLxK<dTWgP
zGMJM<Grab45DoL$9$#xT>57U_kx9$^4O{R<q^*4rn}Ii1KrXPdSd=<IsQ3_%>6@0U
zUDpwKU8es=A<j(Oai15RPOPiMq4NQqNqow<tFz8M;PAI)RsEE5C5Si>vWmq()e&-8
zqd#_J_eA&6Yd&Q(iBCD1xEGn}0<`l1<d1ugS7O){q0Yl&r_`eAjt}6|Lz8xwg>7$Z
za#(1YjLH5?L?^iU5&UN2!$FhysFUeNNW!g<Gp2hh;H7M6Ugy3#Z~lJhg<Lg+G8IV*
z1kEIBZltBo<;7_uOWWWE`05iQFi$nu5=h~*Aj*{>NEl3EI@V07YjgE){vB7h`1c({
zVY$FklWNb<zxd$OoyK8|eEIyP>BXf-AnmTSY)}<Xhw|WF#pCGzSm^v!ZD9q1w~j;X
z?pWa-;19nP_<kk*l-OL(FLnhXmWLdR?`x&cOouWtEeFoR@Z9NJC{7RX(4?x4m8plJ
zyV4`|Rvf8#dN<hV-C(D8gPq<Dc6vA1>D^%eAKwku4!-RY90(c|c7yGE`>7LlgQ4{Q
zvE5)?cB_B@tMpxAK^?GduTi`!jLS_Gap8YucUT6Z*%P*hp(B6y?y%Fl!A|c6`{wj+
zu+zK2PVWYTxj(%d?DTH1)4RdUewWj`!A|c6JG~q1|L?oO2DDcC-!`25-;wZtn>hc~
z1pi~9_}_T!$K#9>|Bc5Z)mMC9dGnALJwZOAezSzf9TsMT{1-H}6WSbHhb7e`m`zo|
z)-RTK#%eWj(MpZA@MM7bz>69ezp3#UULj!l&Z~63xpTcgMVBv|gynKp@a@zUI$*a~
z?1GzhUVmK>``WQJkmZYa>+%5hH8anx=EMtb*7;)}=zL_Q&Zm8<^Sh?%{KzIc@54-(
z*QEs!Oz_7TY%wDaZKQD*7WA37-`7}nn8uM;>+`+@oxcDN0a&k<Xv`U`(O;l(B<7(!
zKeCd(ZZ6iynSUTf<BP$LPJ=(`g7GbMevdPPvVL72H59_eXf}{tSL3U_G%j1B>n+=?
zaocu{+4pKJd`4s1r5bbE>+6CeKh^~ik49|Jjoo0N!?RE$osV9wu`IULu><~wIzQLp
zDPOQ)d6g0R{H6{Xk2)Q*9@6J;cLvf~cRZ-JI6Q2H-F|FfXMc@bhG-mGs4rZSs&VlR
z8ml`Uw;Wcz>IhqPtZ=ZMCDrux?>(XOdp2lH7^=%VY-1k$ALEQ@+mpJ2-^oY+q4PNx
zYqTHLxCy)Yd4q!C8aJ=g<>M#me8M9-pV~_2*WlR<>%WJm!Nks$b$MON9PY37FV`m`
zc(}p}O{!?Tz`0<{0iAypPn&qYtRqxU$3}pykWT%&4v)Dli1lutrg3v;jm-}^$af-f
zei5F&#dcqoou~`uIcl{ZPXS{M;E5)2o^!*{%Q_!*I(pdQae~u<osN1oaVYbEU$?t|
zufA^6H5%(q(dCD`WBj~9&U5-i^K}|mIXzwG2+dABnqs}qVU1Pp(zxh)C%;SMk{dPd
zaXPSbz0Pmyt#3H+GM$e+uQ6?mqyGtbuErZwU#8I)(ia?@rSr`l!(s2y`JIl`zc*Fq
zZ^uI^USIbYjn%vB>+ShEe|VM7S8*s(aIMaVf}(%;oeKN#?g<;}&|2f|T{K4U>H*6K
z*3b>U>NNb~T%F(V4B+q%ogd#o*B^<8ZoJNNwCA8hxp~;(&GKNHW2;~MO;>y?U*ndS
zG}i5=(f^^w0G{OW!fjP_1NLm4Ke$xqJNMAIrklpOj)6;cv}?a(QE%5m8dR$sQr0@I
z8(RE@##MN1$Qu>B<J`buLAKNIqNjBE_*XSHaZGd05S`!inZ^!|rQFg&=a1%SEbKxA
z|F<{>XQopj%TdQ;&2`0uNgC%lLbK``Ju>T{F5mE?#$AJT`MyCq-vn=sZ~#8XQZ{*9
zm%n(v#%%11j_H5IaT?p45#GMhY2bB@1yPNy3N<!&NV@M%o!|1T#sr7NVaJko?xf4t
zY|xnF_>P2JoiBtviRu4%M~cT()jdwTRaeNqMdy!>(D^LKZ`b|EIq!JOoG*2Lq@z8x
zI_c}0J8B(pgnZ0SUC(yf3q9=k|9-9YiAZ;S;W59?^FKzDgw5M)Jmz%t@Wnd6W`(Xl
z#-Z34N4kezuFt>c*pJO^b$-ceUBAFFM1iH?KX0(i;eEd!b^f3u<vHtge!LT4>_1;O
z5O(sf2K4#reKc;VsxikgI1f1O;47xF`^?>@&#y|?`3Hj6>VhVYp3ZelujQy^s|C7Z
zf)h~ea|}p$q%Pm$bZCE)bK^N0w>a99@QThSY<9{W16Js?7worJp9tNeF?Eq{*!ooG
zXF509?--<~_v!K%9ld`QFJzE{Bb(`R+wpG8oRRnQ>2klLMQi@8^G$lkFsKR~9^%`h
zv4+xi>k8Yh*Jw4@<+~PZEKJblS-)zW=a}AYP6r>LTSbQsI@*(Vv#z(<5xPxhI{BFT
zg8!i@x}cUb(p`?;c4(l>3x3eJ=&;5P0eyod2Xua@bA5I-o!{bU&9P;=UbfSb0>{qm
za>`SwhPc0a%LyV6f3F)@@|eczj#~K~o^HNQm!~?yHQq5S`;O`I!|OEKPDir`=?;u{
z?8W98`uxzdHEx0O$Mk>GQC;A7OzH)WQyJ=vd|yA^(Cz2wdf5)Cvm9Gq7Oz;4<TDpJ
zH*i>V^bB3T&ve`=FX*Q0HM!5x|JxmXE^v5vfn$h#)pY{{9h14Oj=r$N-5S$w(&w`V
z>U_A7#vBtrIAL~bO<ix*Dt&&}3mR+P>F9sqVqH);QCBSZO6NP@sPlm<HP(Gmm%rz*
zX6LOsf3Uqizrk_Kg{yV>)Awskb%eOAqa824tjlwPjuWx((FJSXaxPq}aj_${%|Frk
z_Z+oKEzmc7daTApPKQ#DIM>zD<?lJ|r#fCV;B;)LBOAf@UeOoqd{^VpZu-K6iaMX|
z2+K}K7Wl8|$?LRfy55>BonPa`j%kkcFLv5n?8rc2V_om4!`fXFVtKq;*g-dBIYO1@
z!~=7$)fcSVrE%aIjjf*5IL{HPqx1Fkb)AOC{G!j_-b&*#N2Us$nbMMro%*r+f&Vkl
z(*=uu))zGIuJdc05zd2-aKw8Yr?J1kuIG2kUv;#mGv0Axy$%lR3LH~i){(JDp}uaj
z<CZtQjXch&NQOSqq={}|j1$F%oG?3Zy)J)mp{_U2F;E*EUM4uwUN}>qKR8@t!3P?n
zjutg>I#SCK_L(r{5FdQW;U-<NilZLmm*{+FM5$q%pW($5R^Z?AD%r+ncozeD3^vDJ
z;L0-J_cE`NB{apEwm4Exc%`g5B=&??%7R6o12eCZp}Vn;Tfn$i$!4GkVUe1J0|HUv
z>ttncb&0Q%wTFbLg*e*aScKzLzUSe7VtXB~F>e&|HOkZ9@8BD!d|mSN_d8C1zvJ}x
zJ5GPU<Mj7CPJh4S^!Gd5ACNiy{SFf`o&J6YzuR&8`yJ-fO6IFJ=8HD}uYSLyOBfT1
z$*#H6NBAgF{AV~GAEQ6Rfob^3Kf`fuS)4^Ob0<8&iOoY6nz_@8GQFzFhgYRkUeUd0
zQqfm9I>b+zvTz+|LNjrkY&LXl+_dV}=5tWnd~SoYHmrrjKCFR>cWnJ=0q~@Ar`TsW
TIRD|?X|etchjl&i_xk?_iM?NE

delta 272602
zcmeFa3!F~X_dkC2xy*$b<}v1una3DoOw7$-TxSd+cSG*Xgpli8<PtG19}1xpD@7R*
zN@d(0igM{fAyX+Tq9{ZmT@>|y?|q)<oM%o$pYQkg`Tl;d|LgVDYjtMtz1G@muf5jV
z*K?kktB1}!Gk8h8z(<%QHT9FkzbP)ZIXuk&$q8!Ysd|#MP-@6HG<Aa%)R0MMT>HWU
zmiw@)dP-0|pQ<rZkQBBxCFsMd`k&E>J`;kZ80o^+lo*~)s>GD7{>e%Z75x&LLSJQt
z`smr2c`R99m)VVF>OW_;PVkXBJFW*yQmPc>;4-X*IQ(B_fUg_6g-BAazOPM=J|{a!
zuag}E+IHDZ5^kV896+NaDToCO&JX!@N&AT%Yd-qu(xDNnCFwfKVgR{%ySC9O0r;yI
zU?gA~;1j^!+Ne>*ANcnaYS~#;k_sV#g7h$2x+SbhNNprz0d)XzfVzNqTBU?X`zPVg
zWIzhNvn4E^H7AD>9?vGwl`Ub34Q_+QpMbvr5|9|c51;^4fClgZ_yT0Qt%UcfRttYd
z17ZM9z%7Yd%?_`t`G7-LDpt`oy^!4-;OpA1hQFa~t<m9aFxAg?Zyw?cRWK<|swTB_
zped4QmY&ihiB;C~dISS&Vvj@?sh9MK)mQiM(cka!IP}=NXLI(rzO-kQBU8{bsfyIb
z!R;Ya-_tXxTrSsOrhdC;V!7NPk(-fI9=E#4ostvJcIzcMi7ZDyk`v$MMN!8#sj9Rb
z^<+8F@lx#wE_XHjm*sHauSY~WUXrESA}<(uO+?;eDL{G<d2JlO{q@&h8}+PSi6Lcz
z<C9XNbe~iU9Asc`(TjUUv+jCHuf+77qWlRdOezvJRzr=41UF5jd5!nF)132FaNeS`
z-ihpfy+QA!kbQ!_8e1dXE2OC|_~-Xd4Ea!$JcZmTqC{}gOF=nQKh^sYR=_pQX6e)K
zAHquYQ}@T}U*7Mh`}OGsee=}7K7RWAJ{{P{dRd=f{QrEPmzm$zrG3MAhi=U6!!ozB
zyfilBH*|M(S&~`+ase{|s{scACjgfL0WjElfEIwxfPsK9fcbz8fZc$@fD3?IfB+ar
z4ZuBs&VVt1nSiGN+#mJ=J^`Flr5RF?2IhdafL?%MfFi&Wz*@jIz}tWmfE$1y*lP?R
z8PFFn5-=GsAMi9_1K=RwDj)zp-W1RmPz0C*cp9)1a1e0L7gG57@tD)v*L5sBV1c$}
zLN8WxcY$!KAzeGSAJTPv(WnQfq`+W&q)16~e5vQ>H(@LEjrrN^n0_TcnjO>w3&I%B
zudZhl#Ig{5P(gFHSzl2Q&A!!l6-2Rl`tgD!_KvO<Heu)VmW5Ff{oOST9Y3yU)VPVm
zhrn<`!M(bkUl<m0%v~x3Xg^+*%+*(+q?Nm5%)}u@<0nWk4bAa}eyEVx=B;@}i<Ri7
z{h>0zmw;P<8kNv*fPsL?l_b4t-{MLqgl$PuV32FvQl%l=dT`1FU$$S*C{AJz=%b3W
zS*rd@aXfoiKVF;$FZQ1m4SP$NmBd2zezS7W>{n(rXOHL?XSMd}*qW<~&R(t>rFWd&
z9%i+4_E0tD1gbj+@I$0c2DAWl0Q3SB0cHZ$0^SB(00eD~o8QEro!18~ZO&HfE0$)n
z)mu+3O=Ld35Emu{SY5PLdvf(&Ib6~AuTEv#^q*I^XKnSCC1DNTc$`Zz7Albe*^X*R
zp?jqdMgCyqwR40@SrYf|ZF+G@qC*xXRYgp?fm}a*Pe~_MtoyG?WLLJPuldmr6~w*#
z@r*`+lEfXQ1D+!Ra{$~?R^rK>#N!;?AvPh;;}jJ+1a|_D19;oKx4nC8+-={9<qZ5z
zhRuz7cVKDOVCBipU$UM0sm+P_zkezJe|l+azeCS({ZyBzdlaj#FDdQCPU+`M<JmWQ
z;Fd7_?Oz(LpV{sg@~yy|CG^E3J&)F>^t>%Oj&9F#4%OU|j!AiX-i~1X#Fk8!uBU7Z
zj!O~cDlm*#+fJmT!J)dK2?KRIy~oxZ7AEk6ZTRsb-4ghzCVsqrYHLo$O(9c|q+!(N
zkv$AD{VejV+WZCe08n=~sr~g)+X~r8{nWO^ka40s)NB{GjgflI?L*ldy?A?k#N(p8
zlXQ<1RSsv4zHfVc$RbhN!7SlgnWNv{p5xdpN-V7$g;sJh)`;9BA#spY!<70tk*|t0
zkLg=P+Rv1Di+*KCAv><;x#AOclyZHK6#C?2S*tBP{jk&?GYIc-__$u`ijT+=rG-*k
z1S(G{m-4uN&6UWGZ>_nrDhsK!lC#JT+ZYnb3`H{OJw0m|CI))mu3%PMpSUX)v1|qY
zzk2JQUE^iP{MAOU!*_&Ft<+oY9+2^{$Tj3fIFTL{=^%tE1M5+d?p95e`lv(IZv$UK
zemJVGu9xlp(b>yJ-PTPV>82j2pWPG12I{x><Tz7pxOGIjiWrZ%k2llv-;8F>^rdeW
zI(~iLkhQLxPh~yit<lc&BG;1jxJX;FUeQb53WHCS?RM&)zjf4kz=projqj1}1O3q6
zDE5JVes7Mm#D=Sjv?bj}z2ClQwo%V}J6O+s&rg49Uth<A0;ihfk#eejd*A4cUN%l`
zMcR^bph)KjtI`y0uI8=LlKnqAQ*G3>-P9hLo9U$oqM)TC2XdT2He6YxEt$jhthb|C
zIAosWIR0FDjb79*zCAkQfXKBpx>cktSw9eIxw<M<@(K0vmbGkmg#O|?pE^ry9G1E{
zc;wxvuX;BMdMh~;tRH?i(>cS2pD)su%=7dH??q#v?ebosqqQir^qZrXy*Jv~z{a7b
zNLw<e>w^v&RuG~;e(>{_m2LQctm2vo!X&`3@L*B?di5%ZZgr*AK1b0`?o!;!gCTcL
z?TdoK($W_qZOMFXl^*(jEg6rl??vj}-|y>qRbW_>@7H&|KRRQjjn6`nwj^IG(g`7|
zl;Ydc*IV0zKKRi&)kgi0o7&TkS^C(+QJ6{Qe;BN<Je=w5Ys0@!q%Em)_1lM|S*~95
zNTH*qD6`~E(Myhuc2>4=_(KF8OWt6;!BMX1c1M3~by1XAIy&jb^+<P3q`wR?w7lQ<
zD%$7K^0k$E!G}Ga@7Y-F5@}1|5B2jOM#~?r6k3is*4MF6AXrkb*7xiR(^ntsl`+-E
z>LHP~B%dYHokCS9$8U(Ax27AE{pjp#qwe6Q_OvBeZ~0LaG}rH=9A{%2Ze2GnUuE#9
z)=EF}Q8a6%U-_s{#;al+Fjf>i);=TB)y-IYaxJ&a8-adOCAGGC*q@KT+zL<X(?9Oh
za=yT}G(1_PEjzZTtATo%-=TmwF*I-vOLWI4g^r$rqM8|TPwLA)8Jdx4<D2Z}>lpzo
zz9Rw(10G>K-;gTh_yA`u8+8>owMSPc^;eEZvAX)9<2lY-n|b@J7F-tTATN1Zej3eG
zebA?q9B+s+t8Y*0=RY0o+-T#l!p*^yRnPw{jGfdU`)r7Fo(*@B8`qM||3nm9tjC?m
zaSpfP_IBfXdhw*b;zYDOcC$YC(@6c369XM}1cJACStmz3tJ&DdZZ?*9M^19_&Yv9W
zy!u*sWt<UdukO@GeI6zM_L}~$&wDzL+K}ILBU|#_{5)Fz^fe)0qb~+J777G!`AWVR
z?VM_3^N^d3C0~OtxqR)u9O3M1!|mY4wbXayOYQ_0zl0OmaO=8pE%64O;!ZIA)Fj8x
zukv<#OQxM3?fgdMTBh-_NLvnjQeS$SOSbX!=$89zxTS7fQ!>+yF8KfH5BI8cLh*!}
zLwb*|a-7fGI4pK^u;hK`t7x`f@9}lilV=7x28&{Ek;k4HozdCGq=lP_sZ3LhLjM{5
z<JJ16-#E^Wch<L2N4lxaM(8DHqp*JK@l~*X>};mf&xZfoD_o7<zLWBGGz)^rlN|4f
zGH-cLemy#4yU4Xvyw1(x@A7`>cfrp`YiP<l{hLCkZlj*<rncmDoQq=5>J852IP-0|
zd2U=wy(Q<O*%W=>xy6neqRfg(pXqt$hh|i=ark|c(b~Vun&Q{euT<kw@R;<Oe*63o
z=LLanwe*CW+LCqpw^8!dP5Lw6_H@2$L*C&=wuBA*E}9+H8+<p(F;|p%D|Y*Lqn(8|
z4ny4>Okwq`3t{Xtz2AkQ&K@@0EH|zt+Nldste<}KLXI=Zh8yk1wImyRF`6~e7hRm>
zxbm{yt3ob~c77ppEk%4F(%xQGa*0c}>(U74E*tJfH?AdF*7s3tpFZgO94Fau=ecn$
z$;!TuX3y)_zMtf{UzB<4Y|7=)8EtJG8o4=`Iy0STw9h1;NxpoFZFF$WE2EvYZPeA=
z)RwFpuSBu>`a4&0oRSUq>PvRV$@n3f`9anwz37L5j<-a)x4dV680~yT<XW0s<>q3_
ztB?JWYjW<7<6ADU;XdNVHKjA#wnaOrRhQ0aRs1~hN!L@Z<~T>$IP`OKu=Km`YP6jH
zk_h)_t`2n66A0cyceyq?Bh<#m$IZqR+7zUv)=^vM=kfT@^lR4&Tiw{m6=gN~oJe~O
zP$qShmV#z*4xVP*+NiI&-qZP!jm3U93#%O=KSj%*Z=|l3S(M)6rvZ+o0>ir@dwv?7
zF~i1ZyqnM88}g#MMQ!1qYFbO)jY-ZyHtH^JYO_D|n>V7^a6RPb9A|SIZlW94lK8Qo
zqvh<4`VSvP>Tmzt!f|^8*HVO7+w=L}N!{_wB*$kWx0*Gx(^vd5Ambg8YiV@5NPD@X
zsnN^IZ^|$Jym`h<tHG}WoG;j@bvL!ujE%oWv6pq{@7T-vHPcya!_Rl)TTRLMEn0qT
zgD|3^-v&A|1%kKk&;B;r+0e$OmYa>KdwuLpJ~Gd}IizKP4foc1!`Dm^%?73^9hK@*
zp0cQt$7HHQ@EqqQL1C%rb2kS|=w-i0%h%Tnp^yDO(6K=vcnjU~kI@;+ZEXJKW@8F%
z3bIE&B*#_e><vwy{bOj$A{+HEH}&6}@vK}bSC=>kPcx?J#kX>tJ#8G?xH(wO`1w|}
z+}~(M^6h~Rhd}Ud#;V(+GyZz9ybJy)(iIs@T{%_WUAeZ|jI2LLI8WKAkGQGLX6T3h
zjKWs!`9E`<Z`g2OcH>$C_xmfF9iX!ej{4MJ107QZf|tO{7#r;zX=BsZ&BoOGvPBHW
zvTPl2@3rAJcH^4jQ3@(!U8xIWInFvZ?CNf8Q$X6sSTwtbOE}5#%{s0)FX?EM%tmLF
ziEPWd_K9>w`uq*2Z<hkhDH{bUY`pU|8~O8Ya!cY>3M6*XE(H=VvSCkmV_On8P$BU$
z>Y}njM`uy$EpeF&CAAXS#x^5gGB$KGF%?RKG_FyasIfw4j14=;jcp0&?*jqrP@E40
zlx^78g;RJ<DQSfdi<W)X@dY=1;=}qn-V!LrDIAY>A5%+T)+eJ>WLr{}xH);G+%D4L
zRa7Yk+p^yVdKR|Z=?KVM>NfJ(ZgP*<B+`wmRKa|=Hdx#T79M{i%JV~8CfZmGceC&)
zdkP)*LtCb-<$;5O{MkT9Q-NY>yfaPrhmF(~*_Mrjx;c5;(om$IM_blQTOkO~=b@t^
zIV-UNPSr+!<Aw4eFo0H7LKg&Zv4iPwC6?*@TA)}pe&i<c$bN|$RED8kqArzT+9jgY
z5_>xxuM9(3Y-2LR&BP-%^b`O?A(|4vhB(LCum`!ZJsO`#Hv=H$L<$Lnay!|uo4c_+
z0_M<TflzMF3qrYX2SU020>zTDCZ$wiqig@VrhGvAPNXdd{A&%pS%rnlcz#-iJ>WcU
z!+*z(?~(Hq^{WawPtnAxkaMdI`vo_)N6t6sVpYib#u_1Kogm0rAW$qh|3yoKpv_zx
zr|xb}9&L`Knht1lB&9pp2xluBc0)I|N6LF?j{{QPOD7$WvW5*iz>V#ZvJQ2rhVHII
zW2>RNzbGjm$$q4tt3kj+BHOZ=T_SBMvW$v@Az&G;2xg;OuD4;68`~q`Ya(6CVQkK+
zN~&Tx*lUzk9a7J=v6$#);gNa~ZLbcg7g1Ii3)Uxm>PJ_rL%(eWnx)^qG$;g4mTKcx
z+s)0R-)17MJ5=deWj%!P1mb4ILJ^3AY~+%g+|!<LS{e$4hjY<`X>TaY%=*E0`fysL
ztzh?yNPi|&kc^$+4ygHiFE#vvbBSQ;6~-pRhw{T&utOK;G}gJ*db)<Dv-PBfv(ap{
zK#Ch-LwZD{dtzGMRvZ=aAo>Vx3}-p4i@~;|jbacvBfxeLrAM$FM;%cbX41Y#E07)M
zBeJWPJ6Dr!j3UVq2}Y3=7s>iGJ16$(%2S*Xc~*;l6Y1j+_C51l>$g(c5y^Tc>=$KI
zM2*#@Tz9!7y=mj|DWycQVENNh{h@8OXmf&6%{Rb-flN9=)1%k`wEAQeONzLED>rjT
z*d%{VDh;eDrqldN#}RVYU}26fqL`29#;ubs)UgJOP0tb8)x-E;z%@DQea>Yj(2UD1
zeyGQrG=Qh61f&@Q1!?850)Nl2^>=cC?yh+`Y7P?Qw`;KVY`SY*O%^4y!L$!ca5jQ&
zMzfdM2-mt8Hbh2Ji)Dpzopy3@_$jt=(%Dp`Bc)*EJ%K!aZa9Kg#j?e$7d5EE`lQzt
z7=~N(v(ZqI4iX$Vc0-X~tNKZ#@<#KM*oKr9$HM4X9o8aUwb9>jaS1H?08#!f=y#d)
z0h~T3{cC}0RsE5T^pZ$l0O=Pd=_RUJk44eBIF_5SNt9cST_Ms&W4Qvj_}fKZO|7yN
zsBu^4;%}$vb=iRQd7{Xwev(LAk}nqNY|T$<ta)0#m;&Pwnugm*d)r9I67E0*v#}gI
zj5ftXPuVt-G#g1Lk)DUzrkb^N;*EX85h#kRcK%K*kp+i@itOEJ+9t6Z#hVsNW%byQ
z^b0$9Cs<88A<|aUu8Q>6sO-2|*;U#HuFLB)m8K;?;CF32cG!3v6=~JSPx{s5an!>j
z)4A40{*;Y;6Xn&1iZ)T%J?PIR^;xcCoItVKIlyS=B#}M9r?S+^hsS>2&PkMj>rCk#
zZIxu$D(NB8#gMVc$J2#9xQsdJ@ix*h8)=eAzW~zZCTS8U4Wrz9pdQUe|MT|pb{{Cp
z-vRwjlRl6)_K~;`1*+A~-6CDyZck8t1E}W&&27L&q`xFEtp0q)R@oMjj`8)Af_**h
z+(OQVXy<$z>0}$}5|M5L(sYw_2`3Gsy$#XM2W|BC+vvxM^f1u(G3m!qN)p<cEQ+jl
z))eXT?aZLEBo^$KA&Qp!QukC=eR(qL?yM|wtS0}lt-OANDL)ws3g)5)Q%nlXc4aba
z(fFdkv*=FR=&p(MS;+f|DetvyR3(MsIM7DxveAA>#VHtBKHNsN>S0vdoC2d=DsZeu
zK1$A1IOj7WyD2U))$`+132wB{P{&j@K7EX>mVvffibQ&_pP$r2>~3+=BD#`_xO1P4
zw3&^xC*fjVFzd;&!)Rd|`mdgiB-}=lEYfRG+cLAZWV)TkCN;Y*1|q9zUx{@2{`*7Z
zeTH_vCpPoAuzzf&R~xYc=Mhn6b;TZ$wxZB+%1Vc{$LUl$q#ctEX<xRHJZmG_D$>#Z
zeo{4mkD+hn^f}JQY@}0cq)VwsV|3oqt=LElrD=^>W?WY>ct@0*!`v(Kz~4Q99HFxy
zV@K%c#;i|VJyBYI4pT?uS#y{pgj;^WtOd<&!bUq}QEE(+`5nz8#F}E!cJ)yC2z^GR
zz0syO#ccWrE#ufnZP;(RvH9%NVCu-g?DJE?6}Vu^%)k<FlZ|Ahn}kPDgQO$_v*&HJ
zF9WVRRg_v?Jwl|*cl8m<YbG=?u^EP#-ZuX2-26?w?dA=9IzW>a1bBLC5SK5EVw$6;
z(`@vy71BQp`cWo*b55U=?y!-v3Q2o`w7p3h!P^-{jatBEzc|Q6vl@GlSZlcK5g~DH
zkW~xtj59~*NDDU9xkFG`mi(efTb6u;dbEUU-lQun(Y|Rd(Y~i_By;Q}qPEW}Yts8-
zCyzVMN=~2SEU=M2U?-)#R<Pu$2aPbaq!rrKLf}|!O7L#eY}8UHw(58-M<}5+mXKk#
zT73UjODVL%qNz(J+Vo{>wCU&f${X!@k@ix9Xj4<vmT0!=HkG!4P3C68CO@{(AF$KY
zzDzXii}#F%UCl(pRtX%dVUJT@mf2b7DrwRumH0`G(Xhv|*y!}hwpvEoYY|eu0n%5+
zRxl@>$)yaV%r<D{{Wki0E2Lim`h_O_U{0UYxRH&tPKBhCK>CnL+QQx1UfGz^{`qcs
z)BIkfZGI@^ZwDsnl{`bs5xSC%q2;7Nx9s$gNZUK821u)#^`E7sZQ)cd8|nHANq@&;
z>53V7-{PcUl-v$lTV|u5TOs`k(0^dkucDH62tGpus@27vMcUSA%DPtspWgSf!t`dg
z%Ia5G*>qGk!K|ze7dDuJaM9QqZlm|J(^GzX)D~lsU~<tOlZ#*8DenN^inLw78L0Oj
zv)(`7p(-8NIEPE%8pk$90AADq8=32DESI@i^2Lfk6TN-TPr;q|#_CI|XAyIRYIbC^
z9b*N$rMv!A+R=26?S7iH*3Y6i*%8a04z>!Lx+^RnxEFx*QFB7jovz)7j-GiRI=Zfn
zKD0vmQJ^1a(l_MvIgM2t>5aF!I_>If57K5PX}~*1FnFpH+~cSquv+_uNZTw$C@cg_
zG(SE*8j+zvXN*}J1-j)PD=Mu2vacq6WzG#tX<ui!$2=S9qzXyj2k9P@bTKEza-j>f
zHrz(vyF&U6pnuk+A4_Fj;2uc=)oOGNk+wCO^1BN6nA;W0qDr>Pem_u=p1Yv3OtZ3T
zT-abr#`{ms3j*KL(+N90E$xOlef0p<>c&E8cQ-WfT^r2~JB{cTf7E%)Tn`<k2Hn{s
z8A}AJ)xhZ@ZEK()|I9~|-t*ySPDZbu?T*FKXj{z#Dy;cckd~M=7t-_|7->7&NHgrD
zbgc)}+Jj?<QH`ExXS|Ih%uXT%eF(J;GHXksMLp5ZYx~Ow_tPR>Q9GN0yq=FWu4Uz5
zTsth#tzOwJ(su8u3R2mu`4c*u1M7XsM*2*Jq*pYJ-wbeTcMB(l&-X&3=iBHfS4jT>
z=-)Kym(aCdu-={m)#}{VB5i9lE$uC=cU^A`VhwDS)vU0x38-wSSy?)j-OpX^eh6FH
zM*qjYimbmY=(9}vU{0UY_@Y3yRD7~R(tAJ}ZIWKwX9TFCJ_t}-1cBAs=SAAqT45nK
zRZY66HZ)B+urE&27TM~bZm*w8`a)65sAWI2_HbXccC?LTfSp9t_9|*CF*nf)IX$f3
zMw($KrP6+I^B()eis{RKEYlGvP>juBW3#kDe=Kq??6p*EjBOkHvp&wxMYgfq8HT;Z
z_uXjb*hbC)Vv&<E0NA^2*qhwg)*`2L02VoW>Bs;G|C|l`2{*R2zR4Ph_01Z}8we{L
zBTB7)=_k^*ejzqUSYgN@EV<eX49gUnxcQsIBrWCG-KfD}bj9vL=!!TSNr;_9bj3(T
zlLm;}fjova;Pg38#YXzmTb4rkHs%p38I0O0^VUSsvB8)a9}y^4lU}E+A?C<m9hInx
zHT;blg5ht2ps<?sw7n7`-}ka6ot8aOU^Cqw0{Q-BBYoITO8L2v?+K0_Mk{lnjbS#D
zUUm{u+j`XYj9J?l3e3ZXXG4K%H7&}!X%j&)T(%Zy`|>d2`P(YH`KC>OqO#5)ZEaTO
zpneY^Hhw2iEv<Z3A!&V()-*|fq_ZGx{Em%udxfNaApOI9W#-76M!1+W6yajAAg~%b
zO{8sRDMbI2Y0|sQ8u@At!^roLt^WQM*8d7fSDW=0P}XpC(XnCZq7F9trWMjJ0R5vT
zeRocu)3~mUG_*p}Q6L>?k~VZV{oHUg{pudxLsrvIiF8FxZvrOq%o+@*KZwEb1A%V!
z+8ZKm9}KI2l$rH^OvR)46lVmcI2&#BE9~^NbOa2#)Fe@9_XwzLo{esjolf-D``A+2
zV;*2Frlk*|-pr9~SVnh&Z#BE6NLSQs0*`r;wZJ<y5@TbMK(``lbcNNA18JUFeHw2A
zc1>)gtU}W6Ak8*OLpUimY#zee=O@uJtLkq=+OF|b(8rndzwYL1pGeyD5bNdG>P5f@
zi38*qjRjDN$hLZNsT<8RNNg18{wlx85syU>55PQfZln^BJ7?I)$GgcrqsBan7=uw`
z9;J^#92;cA?&8Mwj2gpf;~1RR58rJ#>A5km)I@<}by>Jbm+!JhBJVkT9pDLZ1CI;X
zh^CLlddo)?S?&Afjq;(XiYR{@r!_89Z&fI89Hc!bP%Y9=Y^0Z|cpL`k%WqJXahQj1
z8V3X2Z==~_qj}#bXMWNjW_9mV&GDF@E)+#pL#K#z`GzhNc`aFGsR8p0FUzQGJZ^Z5
zFgUxpN>U$@whUxE^_T#c8c$ayz^0~6fUxasB<VJiE+Rb_!cI25xC^JxamLz6gKeaV
zl$Q_PB)(zv<dS?0nO9yfA9Fq@R=|Qoe-YVVqD_aHXI%N^b*f*$9!NhZ%B(KfDbiM>
zju|XvKj~*v!eg|r01o$}jdZz<^i`3r1JW>aV0o33Vjf!v<;=0s7uo0+iuApp&oJp1
zQduDe(5|A$>dEG!p{x((7oi9G(A*-79tk%75jOrQB3%OhPnrBv$T<;B^R<!wx~sfR
zR~6+4K)S;ut;$Jps5B8xJ1_7ptsb}0UlHl^pg&>KU!jzT(X`h@k=3-7<eY*w?WD4Y
z#pC?LxU2HGjsH{||0gMb60o17z{wD4<s^tS(niwPMlwO9A5i?H-sa?O0;kV$-fJUm
zY$L@dGA5(8uDk(Jw0SaON{~RYT5(&Pl9zY22!XRpsVr?&Ji|Z)b)14>;CtJl`xhc@
zS>;b6{S}1$$Q1S`e&`;&d@2UNLpJhVHu5scn+k!;JQOM|oC+1Kw~>*J>@|^&#d&?G
zxzu=#Zck;+I~vy{jE@d{AOfCLr*qfFDH8ti4-pg#QjwSJ$#aVK^Sty7Pu^ZCeuM?L
z93t#X6s3EL{<QMaJb9}`P!M=#RV}EDv-rrAqLIlWPYj71y-_0%3nnHuA}`gGx0;;O
zz@oL7bn>FKrk<kHR$gOI-t@iPI|QD|R-t^(qK;Tki6XPz2h6-wv)x0*nwI0GHu12P
zX&+}%A~qmIk=gD{u>mCV%yxfe=B1cyA0@|husA06<v4nZsf!6>OH|~ULRS^r*&@%>
z#jBJD7AalC=>acliUB}#TsG5<MS6iaqv7R^L^{&7ZaPb1{yU!$x{>0j$9^T4u0G0Y
z;BH;u4Au=--3HBI?b#e!$5Y4Y=NW7S>*5+TlfA(Dd4K=lw?KRaq6)sH0LTRN1xx^(
zw9Qcu+2*LLgb9>i6rANV`!JgD0PNjmqhDVkefgEaTgJ*@nT>RAg{0+I2CKYR215ja
z6(Bl`bol^b$X|YCu!_XWpqZ`y`W4n+er3?cYh@5_qxY+jzWmA{Myw3de-Tb(#rAJ)
z^GrkP@+*TsjTv%sJ?2MZb+A_uS<Qb%q$_HE`PIQH603t}Z89vVP=@lWgBQ%Hex`GZ
zjeeA!-dG(Jn<VblK_44kdpn)5xbmxmAzrJ47=dp!J5Z!6YIgb6!737~gTDlOYl{A(
zNL#~#q5ATx14XP38lSb1mQ_evesyquH&uFw^=y2=M(aJ%=bINswdGd^?|H8bW_wk}
zR|dMVGRPO%mL=u6(L5`IDI)#%mBAEaWzfw=-r7y>SsC;(Rt9~Hl|cg=c1<_7XJwFX
ztPIkPm4WNrLUv1Gm+9;hmgKlRp9eAP3sslNxs=70%jOsRE>mx0SM$c^_xmo-r=t$v
zP<?M^C3<Nos}%8zU?+9v9}wz4ykI;J`1}h$zI0?M3yXOCF%dFFao(6wBgPFMCW@=k
zZ4?jc>?uy+OF+$0!%Q1P?L1Gq=kTJ51>*$igPy!z1$m>!j2brrHFDa4o_vIRF2OX)
ze-aWDc(Mh0+ezaKhKUax&7+bhvDPZ4J$OVs@4@Oa{$T^h5MTvedy)l5yy_{=8urk{
z!lI$$ij3m5R8wc)u*)v?6njKgHzAPI%!k=Mju~P;U^&MaW9B#CnEA~oG4ty$kSv?I
zPoymuc)*zXJ>WI-Yic8@XCuiJ>GCtbOk?I3Y9sZrk=8P1ezmNbU$Zk}ren3@h)9=r
z@rxp_{LJs-PP%Uj8<+luD6<;)vPfGEJRs8Lr+x=K6S++13LE)jHu4f<>Q~~Sa8Lav
z*~muN$YzLi`KjLwW9pY7F2ERl`>b#(J`1p>emw-K$TO!v*UUU?>bJz0`XyP@Jaevc
z)XFobKw~SI`UMzMzcr>5*3>UWL|QRFF{e9aR-QTCnINWqGpbtCyr$ee`5g9DE6<$z
zb=mv(so%#|p2_xxm1inJuV5aSX3PVlph0UQ_>na^gi2gTSF%bfi>H&TSv*T2R>H!R
z`1XvV_xXm=qi+X<;@T(wRA849mYC4DJr7)C@VP=`*x+}XkBK~AG|ITs%wtzPm6U*8
z13FZKUG;dn2x68_A!}H)W4gd^kBDL1xjp7;w}vJ818d^T?2Q>SW#bcAclzAbf9O+J
zcly#*l=%&}UR90nxbe?j4a47NEZ@X#DC{XZxfwIkXUV@5*Q}rVjjEI>ja>svSpY-X
zs4eW9RQ`dkl_2C_s@sO=+kg{*%RCRjKO`4~kX{GS2+%o*9@W__w%Z-chW&3W6I*xx
z8_WJTmT8WEGnO@0NZrehv8i-?FJjJAV*6Mjn@tn<AvK#yc#0@wKYW*{!2y;e)Auy)
zUG@;2+0SCwtHcff@l{cT$Djjj0p6Fq$TMA>R~X&%HXF!3pvT@u)&~aO2OKYqZoSQl
z*w>W*4obe}RprptcUVl|FW>X9UOZ${(fBcAD64s;X!eUy_b=bOQ1^ZQkTdTXc6&bg
z5DVJbbnQdDvO!eFv)Ud*7M9J&kmcefQM8R`eZY&GpdOJma(1NvN-kqT;4H`nma%a0
z%}pLp?-Wa)b{G+A1D@XB)8x^Le^b-@6M~-T#h21Ng7w047{L2;f;_5ueNwQ`-@cjY
z33qq<v>*?KJ0Z|vz&S1$pn5f|*8!P;zSU@OPbHLcw=h38%m}r^5Ndh6-iA;+2G41L
zC4jX&50Hsadl1hr0bF>se5hr^ww7!fqVNxM*6~@sSALKlJj2G}w(IdT=*nbbXEBOJ
zkn=1H%XsY~?@a#XyKH>-)%fyb2ay+G4uMdewCw`N!#fu4H-ffN{#h39+$*w!#O^N|
zCM_1EmE4~g0vYW(i*DUR*H9TG@c$Z9<aQMIHH!`n5V*K->NV1CyFgvnYT*=lg;k<y
zUo*eBYodleCKl9LPMGREO&h;viSg?N!x%9X^R~Pv(*8&rUywE0HJ_fp!Xm=9i$d$w
zT0;>|`tS;Kh71&iK{(Nn1vQ@pbfCO%SZvKGQS<~pdRtk_kev8{ZHPeOa@3-dZ&*}F
zj6hkBFVI$o*v9LsE2!)n_8~h)CFkI)$EfTay5bn!KF8AVNI5T3gU++p7^A>%S$s(4
zOI+dn%`EG~!Hh~S0g=(t%OGRKz89(E-?CU%k8Xa87p3Y`$#=-agJae=FzXwb^{K)4
zESpwdU}NwCl>bEt(u?E8Q|?8k%DpbppXXSr((95$t&azU(S7GxHJ&^8JZnRKmq0b1
zCUUmp4esL&?&A&a<2m=&aMpC{dlujFx)8q@UvTu3?uQ=u^oaLKp$Hz<^orZoJb`Q3
z);!9(jOo@Q8pR*;j9TXLTJUnAv$7AJzRaqGd?~0b^Wla8q$`kbFEzM=x7fV-hKthP
zytEP(U%>`%2U>Oo>(o3UkYxu0=+G6^>Y!^JsS;^Fu-Lc@qOKTxA=y}az;7i#LC81s
z+Tev`N&1nx{D98CBv2od277gW2iNo;*a3gof7Wm8N8aBz;S`KU-NfErecHuSz33W$
zjHiU(@tEhz`<+!bJ~Mo`khc=v&%>QLjSRLvBm5r-d-qrPso;M%?A3rUjR2hi!vNC&
zPXji)vTDc&l<lfqhq0=Zz~n?$omR+lb6UdW7*?A~nViHL;@bjpuv|NZYB2=J09hUe
z>&=(tc=R2x2eA9;XIV}Q8I;0ZC9<5W+)phPc@mpK+Z8z}WM%~zQ#eKxRafPKY$fHZ
za&z35c}11O<drG(>b}ag*;=}&%0sau+eMSp@F><$^?BN;$%XPQNaZ6pWVei5)iu#a
z{@xF*$*U^&K?`<Om0Lqvq)<hW+yb)p2$I`FAUrVU1<CiI*0Vv75ieCc<RP5n>khfU
z80GdNnw$XK0PyY)3c2&B$4y{228tNqm}UUC@_b1;8TsBLAD^>(Zy<Y*co^-5imybw
zzo1)gS<x1Z?l#g5L^&VrX5zULuoG|$a2bGmxY9j<4uF6I^h7H;(iIde9}Z;u=~N6l
zdp`v_<@k_8_wlaGH@lKA*7j3Jr<@p3rxPy<!B}N|#g)h9{j`V||0F2y#jC`!lmMsX
z0f(>W@Q#%eN@lK(wPk#mct2$%$}b2dUxh4Oxqi5v<(X2%;O`bt=6rzXi^LTv)mx!l
znaQB5XbRyqeej+=#EDOC-%XjBVCb3|AfJ)tuafEas&XVvt17pUuP4*%RpnNG!NQ^?
zX?e1I0?W3O$#RnXY;CIIlxxw1Msfg8JlaUE>nN?w)s`gm!M~fZhQ_AIu`u<eDRMW*
zVNvQ3!xTTZUqjbYV7O~2B^Cbl4J}GV-+V)RQsww+e+b+Lf)n4UyjGijNtJ8Mct)nl
zU8{x0@|proUdW%CCO2k3QfZo;$k9%x$!!qtoQ=S=KJ{n>zpqazjpblk(nziz)=Dr)
zGeu1?7}6i9a!st|Pd0+UgG8Psa0+FlL*NuTl_rPLl5{zf6;W9_{I!Tz5J9yX%hm8V
zYDvUnP-8hhY=PjxBeyX&KO)jWMm>+vuEwb6Wm`RuP(%~d^N3N;geGz?-~9rmm`?Yu
z5<%yh$T9N%SUS324yRg8QRn-V(iG9-I9_6slS01~$mPF~d_0z(dxwR(t~Ql({m_X$
zvg9F9b}4_<Cr2ARiYT8y_ET9K)P9^C*>a9K4}J>6j>m94M!P{NKlioId+*};cXWK4
zoyM8pgy(w+n;^tNz&Sn!Mj-3}+5#wOt8a{}UL85e7e~$9>FCSR^4h!Dw2xua^k+Bu
zQ#O>2cSny6CDububW9hfS4s33-&9{gEqkDoCW&n9&SC_tB4s!pbFJ#ZoqQ-Q>MwUH
z=j0weRpj72dj1bMdF($rIlk%p>atMRlIHRRb$Kovc?g}%h2IUQ^gOu<9%J+1DN|jm
z^5pOQP&oG?k-LXKrqa(3$!X^P5)WpcHhOH~e<FZ&k1Y4I0r7Vo5rzSI4q$bp>*#2?
zlE%KGlN02`(8?Woq^N2}c|HmJs)I4iC+Exc*mu+~U(Uv)U{yX8P=k)-%kiPHg6gqy
zR5d!%o2hbmbiwdK@Gu-&l12zjBU160SD&&9Fa{*it^#l!;<{ZRH}UuXssoool3LQ)
zTE4;5eHupT8nkGdykG_&68MMfJwt>?vlUs-|A6Hr{G;VGijt)60N36@a$A|ynes6<
znU2qdwkDHS49{6lDaCR(R^loymf!M2Zr0;+c7-nSe`+pkA%|@eai>rceOGhY7UinF
zK;EHYf8gX2wB%LtUn(aycvtl1Olb&4_K}i9^fDi-dW#XCzj|tYeF8fRAPM<NkUS`*
zV6MQ8f)7`pQ^`_!hVy$-gdaF@$Bc%83x2Z3oxazV^`z|UkIFkfD<2U4<!ufBmo1_8
zKU%_7*DJf_KG-H&Eq}uLVc#3m-N!|n%73!IAJr_uDBSNp`ZHSzrzb8}@}cgHF;PIy
z;KmrVo+y#4u+_Ax1ha#ov=0wQuHe(z)c6~&pG)LN{83H*%W|O^dptV%?-<g5VbWw*
zMX<TI&3~VSk3}&xO0ynUc5Iew_?v^I_w4TfqJY7F`vH}mzOGky$#s3PMmW9~3yD~|
zy%&p(s^9U&MmROxCs$%CC}W>IN&YRLs<creLk@i>A{5R$QqmBpVCkA;Kb_qt2Pf<{
zi&B$4MOI!)swZzJ)!Z+S#>cE4+YiIIE~p+dU4{qVOLSyE;zeh=xnFMYP{ekNv4_G(
z+EnUsKyL16CbA`|ncGX+x>g;KKjZ72iHGIa{4W{O;3a=&gDSY}dqnOlI-qS${DKOA
zPqXi2CA%^D76Io@adbY)_CnB402}c4zd7A*Cg$6o;p#95`2oe#0NZq?+-&9o@FZ7{
zqw+i%8aa6k3ya(Zyc0f@Uczr^ctQ==TW6|S2EFyCj%8R6UY$U1mdTL`Q47s3OZW6a
zbt|v2C$9>zkL2L`Rji^$o}x-tUK5_j&!YcaK&?KKyE={tVXS`m%(dzx9^O0Cpf3=u
zJo@Hi*Z%{ZH~w$wd?45NYpk^UH1?D-f<~N?^Vuvqc?R>oRyZ?ah~fOw12zt%EfjKA
zPOAA5_D6BEf+<peDHr3ye(8kxc>#>t(jLk?E2lXQV3iB&h4R`s&fzy|`qR3z2y7X2
z2&D|CvuEWdHPZz9+AI~z%4zs1spC=yQ59b|?w}rD%VDfBjrv-SZ?IdiGgg({EpCf6
zex^Zc<tXBvB#nY`@VnWP)PweXEjI~C5X|`(ud>C*czFNzCjW2b0U=dI5kCeuia7me
zH02uz7)YzWk)!0?arF8(a%9VNtf$eJhyZCB9E(SQ24-F()Ag;q#-{5xAjdg5n8i`t
zIeB&pehGkc!dPj{l{dQ6?cM1YM2jS;qdUEWPM*UU+?f2&%khrZqE2Jk#b;D4DeF8o
zO!~N{pO>Tk*(}$pALMQ_u&-Rjz`)J#L^QT90NnipVhnSvgXf*XKwFgM0{#QxV2Jqx
zSd+ofz6AG#g!w4*4h#I;@@}EwY2a@L9E>qsah1&#*Bh?r8u*7iO=BaMomAr4IATnR
zVk2EOnSyo6ND8c^80)v+!0G={WX$+)d1;UnYr4G$<zcRAm6d@SzBqF-NJ)&SagLkS
zm(nY?=&0h0eM+LNo}*W*R|=;~!AdodOmsjg!L-T&rL3l1c+_+W1|8w#RpEPyfNU{c
z<01ACU2`af^3Hsk8mmO|9k^;raLZ2x1^0;*GiDw!^Q_^}%Cm;ULsV8xS&Sdv$_rL-
z0h5*nE5WjKo@zOi`p7K{Rub{J%^yLOP+eK?XmZ~0n?7#8$a0;nu3&zs(#4vJ3&<t4
zl+iOh-DwZJOQ1FTti!Wm_fLb~2GDv2KF@KLX8`1<K3;oH_uq8_?y-1pYrmVN^G&?H
zsQd(-9iq&o2V;~1d|&TmjFJ>F^-Hde8B%Awo8vtgjwYv5i4Q3j#qH3U##)_E)8C~*
zP9;g6oKH_UmB^022^8LY86uipHPb0(G`nKuiFpqn0j<2Ig6>_q=2T+k(_hk?)qO+h
zp4v*deDh1Xx3<zSB=j_wvMIz!72^Wmn#iWdYAb2DV!5Zb(weQK+qGee$rKl><jCC(
z18Z3#=(udDO+g~+Sz+a6c=DE;dFd%;9+kzyIG-?!(#+<ZJS9ws2XQkvmsxq{)b1cT
z>nOn?2dyHL`x@$4N9p5u#VEqBa~eT>50%z|9j~FYbzrd@C@@Y5lh>TaO4>J)GUJq{
z^3BsUHBM=W<Drdl=)Z<vaZwV{J^a#?b+S~CuEi-c*ka1Bt29v-e<jh7QvWa-mx@)w
z!MaL-16d$6z6cnAHZP|0b(L7>?;_h+Wbw`}IK$C!2Oh&NM>|Og@sMl)_2!R*_%fUl
z?<f&O?~7%H3iKk{6|XdxpU$TrVzo$YyF2PZlQRV?;pS#%Of~aTO-)*PDdy~IH09M(
zhC23(iX_Q1#vP{P^`NeGq$NO_mnkJd30BsfaW{5wf>I^qw5XwhsT{rmy^e|#lo9f8
zXXsJ_R2feZ^`XiJXL(a=p|RHQhVh^kk8isv*{q#wU3~>V%{Ll5hRR}?rfXay<vE7F
z3dEtle?7NoBdGXo4aZuIm9}iLOKYOwTxcD&YzmZkQ2~D2l=2?px5Jw%#u2zD*6;~l
zKrAK)F?SzT{{thcXFwfv$I<lPjV<*ca0kFJz)Zkuz+M2~AEgNg<!!E~n<*`P0@B4K
z7?%tRMitSSy>boKk#1)yVf@-v7CJATvW&;rEF~7V2bN|jkH|sBx+y_f#JfhAj_F*t
ztXZ`=YWyN5;i11+MW$6<Ur4p~%e84`8)YQE4Xb6tZhBC9wh~>lo#6fkK8>tN$%rpA
zrFz1Y`MeNsHe@ToAefsCL+>vzlf*|z`1Exd?a4+H@@Zp7C5o<QD*?c~4a|t=mU4*)
zDj(ygYdwC+{ihDKY^x;3wRnOT^UtSb<F{0ef4xOs04%VLqs95Z{u)4w+A3*|e34zt
zlxPH%wS|Tv>F2gebVTtI&gc)-N7|ykt@5q*HjcI+ZbJ>)AuRTyvF(&-tZ5dt!wB7<
zHnu~M+(u>Xz@b0=+)hcV*-5l1(eDu6Pg<@`ksk9Y5OZek{h#1FHE3Th^}82z&8YZZ
z&^4oV_bT0R|KsO-!Fn-;v`6>7Kw0gfpxQ#57^u>?JXcIp+Jod6t!fWPSWL%(gbz#I
zZm&d#bPyGHk{XDUK4U$QO(`9e;0F0m@>cmObtPSXOID<#a=K_gpT|#G#>@B;qK)Go
zwDdK~@1VqY{MxNOzB|Ob=2;;L4~p3in2z}OgByqEDek<N1s<QO;M5oFj?vi;X!0?-
z*+EHi9u(bZd?A2STo-A7fgKHOEEYSWecNa(41g`5$2vkK!W_7Mk90(9p=LZnUKUNN
z!e&YXgnD^X@+sv$)CDH@DT$7Y0wWm`8>bj&Y3_aC{RwTnPjum-`(U#D=_YV9PCdny
zV0`A44~B<zo}P(UO8G4h<Dkq?;sE9&wZY4cZ5&O7yn{NSsxJgOA9sznPd}j*olw=g
zw5OBOK4ha{-UF<vn;n@?5uJh2pIUatpgWNAJ1b!hqXr+LjZARQcdh8G%w{u23q4EH
zIQOXbm^(eeoj$mjON-xtcBduL2a@!#J3Yjf?!VN)_iX892u)e|T}5-ubgHA7Xn|G1
zB6r$T!FG4r!?3FBNLQtsKY~!rzL*2naUJigD5_ZYcn4?CZvoeXDe#@+UU7e)3=TtH
z5r9t$=iteMbxcUbJ0hL~eb0%m_s$3=h%=<8z<?j`-eHy~*>`Gh0ryVJBkzE|7JT`k
zKR*G!^D?Ohqz5A3h~wwgp5x%d$U7J3x;{vGSC*4~>GfPC6(QnEE<C)D;__gn9jP}S
zzFn{Kesj=Yxk?=C=PJ!p*8A~b@(?gP;zbdq`HZ*gqJ;p@Jo_q7BO-cKY@chx!i7pJ
z+FS}k*?;jIx2t&{!ZWA-cQ%!hQBh~W%QiP!Yq-&J!;Pj?;6_=78`U@5=pMt3vV=KX
zZj|MAqvkd@>SuGKr)_R@%$@dFd6LbI>f7AN-{wZO%DYjTqms>y2HD(buFZ|UddhXQ
zD~w_JYKALn6R^sdiSr?h4l_y1$72Jf<Rjj#rH=VZ6Zu3wz4?G|WV;W(72b{6wDEG?
z0W;kM0ZMc1HPgm|f-fGdJU%dJ4i}xxS8~{4N-scoI!vPqls-6J-B+LtkZ*tMs#U1e
z@`n^Hrzzu6`pPtAfVrQ{t?zCY=k3T9S)I4V@l~D2XZg0KZB^&JsB3>mNBws!kgw=A
zzykNiyN*s*-mD~AlMBD;RS(y40BZq!J~<OJ*ta2xr-dGW+JLZh3lM;E&&3k&mi!lw
zm7V{%5lguvt4v$IL*78$A5#3VP!mxgLPP<_H1xp|z-EjJO(FFLa;#FKwVlW+qO4We
zRX<pl1}~5wjJSX-^?zN@aYxr%qMYy@*MWOfv7Xy4oPJ%R)Wo(`$Wp}I$*wL-l`ubl
zqg;}z5_=bGm6<D)dZqz!qv2-$pE4M3eD)z}vd`Z(@53rMUV$L>T&<o_miw^jbbK`$
zFr5NR5J#p{1|GiiMKi0@;{7avRtNj5^mK`miCb7FOOzx>i6}KLZ1S1>D-^Lt>Et*q
zvV+WMbJ{g=4d4EmPSamT4J&E;%gRvmB<xJW-!<Vmh36ytKLv#Uq8kY>!cI)=T{kX+
z>f>Hk{ZdTLaTgf->zkFaSTG&mtR#hfG?@2%l$b2@+nHBH+Lso8T{(i56sjtMVn!)~
z;*G(s<)zB&ejx1ey3!44YBt6vl6JrDUX;#B#99up6L1W00ifO^Nil#H0B$FIR$#Xi
zk9K(2<Qn30AG?<9^Pas3Li{wyy(u<B90~E*S<zu)MROc)gW~g=TnKpgxs5Tc@l0uo
zfWHKgvu5?Vdr#7fZ)p#>8tqors=gD3a)s`t(&wrL)AhYdZF#~_8uyFVm}YkI(Wu8h
z<ySU=I__7}s{959H5y=(95dSsespv@)@a9`QBSO&_5uQ47sI~B2>YtUgB%ZbPbW~r
zTe#u1_Ofp;d1nF*9wXQCIS8Y>oWT3{DSp-a7{*cDvp9x<?JEi_Q-<P2&HOSYDe^j0
zi@Ol87Jl+z?fqO1p;~2F>aPAQP^B|vipqRk=gX82{rtZY6p|EZ>_8m*T&V$OS3c((
zo{)z3Dc^utiB{aHU+sPQAG(heEd}@@_5=C?iU6K7;GJBU2CjYK0hQF{XAs>keITM6
z&72+@LBE|*ViMS4o|A`7O#gC<Ffgq*Suau6Sxf*jsr0PU&GCSs;*r4EfP8=)Un?Vg
zhl=cC%3W2hCawHhi3xc~<cu!IdMK5Bt<3ZN%t8*KqHmOXA*V%7RypL)sPr3Uw(LAY
z@4pBi>wZqD5fXod%Y08cJSP>OQ{t;N5=F)hW*)gxj!@V2KD80#PMt&V=isPD$!?i1
zu$at`()cOWXp#2C!CMvSJmV)jmPO<kxMFv}54btB?>tuP-q`^n`=&h`&IIg~Z?TYl
zl=^+E6ow=m<$||n0a6NfHjHKd!*uprWrjofkQZ6Ex<*p|cglMq1tOa(x{<l@S3q4Z
zC=0M&IC(*7ov=V)JR{A7gx+!&ihQdB3n}ZO5+ClJ&E+=ITcqXRDlrc4LN0wFm4b#n
zO(!qn=;7Lj)b*0m-w}6=i^^p(cCF)R*Cl0vT=y7_+vL+2Zw5~P9t+&M0?Yc@l)AL;
zdnHLhHgzjuAr$k9Pjxnz*kx=HwV{y93bF0<%4Mv8-k?*LmF6KI3I<8#8ubP)vn%iU
z`j&ACU}r0<Ri~;yC~>lH8P$KyCzbiqs2||$zO?EG#U-~XqxU!aq{>6f=+DhQp{S?i
zM<qV4qae>SIrDC9Bl4_0-8OUus1f-h+ld1m>jnu|8}^8<D)DkBR8Z<uTkcjyKa~1J
zR?8LWnc!^Pv*}evkz0JGQp`0a2ETBTbxmm=_lCf^W!8@1Ep2kw&bOerG@Gd88U%gz
zBi>G{W>MsY$|c!ESFT}q0UO2Fp>=OYyazW?#&uZh0YOwE#x}k=)t~aOE3qNN1pmtH
z7dSuP{4t6)UKh)O+t-yDA>%*x^mVGap^R4@eo_Xo$6Z%`QiA<)or2v~+T)r`$J@#f
z_8D!wjrD_**q;a&Ij;Oa(W9`{lE0KmtnRW1bq0H5*$TBa^IdjQ?Sme{?={S0n_Rn?
z+MJ2GLriZBKmc)Yil|x-BcW&N?L9*B<t)GMKN7fnw48${A2FZ8v!Y1r*(vahm>Yn{
zHwbp($>$o!dSln1qEWNLDOij(4g06N;NBYm7f@Ny{b&$i7+@w~E#M&F!u_EbL;Bz_
z3(y%*1XuzfzjMkx^0mWO2n_x8Fz=W0ArJ^Jz3fwqR-aS+s;Ra>h#wH(fl$7Am`<Nl
z9>9$o5ec(wk+At)B4Mm862=;l&^wzOWGsnD=$*|iG1iELU5!Y1Q}8Mu3GX!`;m0CK
zT9L4+5eeH=5DD8DkuY0CLf-*KBy>e5s2zM=BZAbqeqw0$Ou6jC@&*{qEx><h9aqsZ
zuCmu~?CSc3+Lcl6GOYt`-t7}k(`Kj}X!SBV*>&p=Of=Z*h9jjJj<jAllK(Z)(Gu<+
zoqerxFxB`|>BVB*UX{(gYRK|rb(CvVj2dgSmQO_Z!r}ixYlFR7i-{xzInl>eD?xq6
zSDqP7wVny8Egy@f)5&Trd2)ODGg+-IudGeAQ`9!Ta|*dVKSr-U6BI#nQq+3*+RrN~
z>S(<26p^YXv3>Z(6m>S+O-EAI5xxhDIJQ6i^)#@1rKw}s)2@AKD(+y-p%pFEA#9)P
zW(!aOKcSV{gdKF{wNk?vu%@(DJ6UfM86qOY^n=3z#sHQ8PC$R&8<sr$3<S&vaI4|r
z#%?)d5Xbp0z*>MO#MA>H?`2F8(zqR(=Q%<W9d!el6^0nB^#19J77s1kAn=y}-2aj8
z0rUdsSBJN7opGw0d|V4VsNMbW+oY$ut9=|>5Meyu+N=hWC#YQyHHWpP6+P6^DY>Fl
zlD<b&<yXkZyVIVn-pRJ~TvvKeb%Z~duNt7fTy7bB0%6PB0&@r4U3*+}-e>&YTJye7
z%Psp3%8D1-Dndn9p}9LviRTY+{V-6yq_8o#UaqFGF|NQob%r03i$<ww=2YHWc2BR{
zd)#}^=O11qy3-wCyQt+oi0@P#8^EXUuWHLDs3-73+{p=Qn&aW+yceyN>Qbtiug2na
zs;qo<0KN&mDqn5lSZPCY8A#7_q-^S3poaUtAy8)0_*FsS)V@%SaO}4couXF?!1*02
zD*)$js6ipBzhpzwD8CR$S2*Y9bgWQ~!DW(bzznTQT+s3_&-#;i(Sy1dseM@tZ7hPH
z#?Z+kH8JkNr#bq|cx`6Bn2vA@U9^JdRYlskyVj8sCaUqQCiR%824gF1_(U}+u8|<T
zg|9>Y=zjZ_N%ud)3Hi_Wv~e86rMC>)1wuy)fy&>M<R?9nq|r6bD3i2@)!6h8MXB+d
zKjA3-MWmato|uFg7wSEX1B66pz_=w10x3I0#Sg2&j%0VWT;5MmiW9$wA#XYzd00(M
zKOhuRAa#?Bb1`1W3!)DGnwD|)<jvs(ICQ24lhmlVKLjbC;~R%`n+4fH-0bM&&STPx
zG<K3oa`H1&<-1CuRAaK5$pT!1CL=QXj~BhhPtfyk;@VO7VzrU~vhsPWd7fbuh=~T?
zRi5`vdA!V7Y9n;<#93;47USA8OI73<V+O&&0c!y<_gMQJ70u%I0*8lDe%{0PIy?sx
zo*CSQ3c{=R?ClunJ$oK!@Hd=qhIAEw+$n0@A!3Mt!DnwOwtDUsG`weUHGrE57zS7f
z*bMk&FvdF9-r?$D1;6&h0}0K!>>ExSI;b(!y_wdOa>vLueE7Bu;*a6axynQI&=PeV
zYlRE`@T%N&Zm&C-=Xz%!s<~8+a<mfJ)-tXYbzBN-!{M~Ol+V3d(Tb%o@&^UcX2FXa
zd4DRiWAiClD|{yk*xprC7uo4g+OoNwwxY4fW{=adCsqEs_CAi(qcN8{0PfMoF&W1C
z9Nl~pBkUMzrvrB;jny&kHlew?n&?<6a9av%<u4&Lp?x}<(uB_I2rh#t>nS+mB<lT?
zIw>Ty3Fph#n5k%sar-Eg&H^K(o?s&lFDMva;3;~G{FkZgai64g8Nx{zon59jk;9r$
zl{~evtNU^_#-Fuv<v*`hWhg9OjX*zPh_Po=k$rkr8Q%8ER~p<tE3!@Rfxx{FSD`5`
zM4j7=0j&SuM}!y{$G<V;i{lG0ItU2Ix%GAEpaWodc^?(Nyuh93?aN!s<8fb>PcEm2
zXQ}tfSC_lqoTbWgwK{O7Ou#_yUD&<6kw}9V`9?-gMmiVXSp%Ni3)U=78IW$#oBj+7
zs2j>7z(Ne=(@^#_{^lQQ*f5A1{@`1mmaLKkd~U(Ob`KJJb#)Hn@9H7ONzW%h{tdvr
zt1Vz8fP2?}$Eh~MsaoJ0K5Ak}b|-E|Kf&az4V@MgQK~-An$oUP*h(9^SPD05+L<F-
zyA@3-WQ&@Xex)nV=69O-y#h(f6KMHLp!9|K94){8`%yQ8lt=vE-~t_K`W7`B@p07_
zwU6_vpwL8g=gxIpaI7mn3K53%C<@uCMkmY?XgT;!YX^l_##iD-UQdzd*%+8cgMc1)
zOmIoXYxOaLGv{<&usAK8o^O50RI(Lu<+RA|Cic3xxU=Z|RyC=?Il<v;v~9Vx79Vf$
zv``o5tK?2nH<{;gPV*^i8ywG&r-zV-`}-|479Z)@rcQETKm(rfk{RDSZ${U)snL#F
zBAdV4X0%9lIk&663_ls4utQA>`AQV0mg}99G<Js?Urp}L8CsO*yU`QJ)%#pmcc^ds
zu@<iFZ*e|NU3>PbzB2f|vtND0JbCDlTYmECnZS9*8t%t;o6OzKhu!}BclY&?(#3f4
zPh$!9-&_bd6R;XUH)gAc|F1Y)?SFPSJ~p-XE$yx-%a38c>A==GyeM3B#^7@O<e-Q$
z?Bz_R6=iBt!YqMti)~}pD`*G<(qxfuxz%L4R;I=#bnU@=w~3@+8dTFg72@Mmy2#fs
zns9rOFrR7pks21B){~<fUmxLHV7mp^{a8z%^{q}bKT_4W6@s!MHc)Gt4$(y9Js_-)
zoAG<J@grDWyvVM{T+%>+%AF#Ou6(4%r^ktktjXb4f!YF2k|<F5#iJiZ9uEnI2b569
zkJWB*&k32bS)i0Arl6ebS%I1Z(l(A8aF!t2^D$Oi?~vmYHQ3iEb<iU_F!9b5HCy)n
z9`*i2eFOW(j^hYYb13V$8Z1BBlg8~(>pIrkkRNv~I<Ch1qr0}B#%9`NSIe)|Nesl>
z&;0+%D{kc?{^wyZ0noX9>KV3C__cZBBjr-1W?G~VA9XO|v|6RqroB<JAN6gfCClz%
zr3#;Ag@aX7K;K-$qWR~4K2(>BIoDMT4|t*Ds+tzEM<{X<BCa(&>~J}+sUI-@`o?wG
ze(U?W?Ts&I`;Do=PnZJrrG7uDkFYIt{3n>zUV-?PuQ#tP6mbL7pxu;p1KqdfeyZ|u
zU^tDtp;mF+66kBhuWoVUe2x~~0MB<_yKbn@R$@C`z5h~w<S&u5lQm*nT&HBMvLYhf
zCs0pCvlc$1{vVBSG4NQ=4CUX6a#tZ}&;!_%c)$}@VrAnCOhMW>T<kp_q$N6D!XWQi
z%W^%u<8nB(1%4=A6`>jL=!Or)3<sT=jAviKG{9Pb_cDXe&?{QAbA|BTnu?B6`SF|g
zvVt!uZ0q=*-q%j<wDDjrBks0@@En&Obd@&KwrhBiKr4`M#?t38T59NDv0Mp@%Ng0t
zSiRQvT9oP3I^frI9&>7Gam(v)v~K35SRP@Y5P8=6;0ZeG)Yh{LxZ$rgVL#Bm+FG!D
zA<n}qnd-!9;cPy&jMb9Z^E5VAix2r8{={!YDqb%X@1%{fTDE*<oVbVH#TT3H%(z9I
zj9MY7=J-pHiJQeKp1fODUL&5zFBn;Q#(B|B>Rm@`kGBt2)zO-VgbL2V<=XB?SL$eq
zAyq|D8?%VVem`murzNtQu6}XaP=Bt$HydiNLPKMdv|rKe;mO)tXkJ{3wiq>RPthKk
z!EJ{-40oKv!z9ygF5vH(Sh!VWJDw%L|DpYG^ReE*`ln^+zhggs!?836bO4M5%pdNm
zlBT^BC<iI@dZw1j^5{yYmWHh&XO`9+=j(&A(9uh1Q5Lkegm&@AHad?oId{BqiR|Mv
zPv07s$c~z6;}Y2iX4<+$X5|@|$PQ3(8?6s6+@5TM%0HpNY%MWv0!#qgwCF#68iw5z
z!DuQbN)5$u$`^sNX;8Kn>j)93<3u)}`_87N*;;&?n}Q<JjV(!u0?nZ2(PFkp^O=Eh
z-e#ovY}*)|ucy)P*;-^sQ-K$cf!x?Y;k2zNp{>S0WZI*x7K~N+@U~i?tnZArO3rG=
z7U*?bx}RVqndKvF=|Z~Q7W!U@E96?Dys(k+QIiSnv>N{CnMGZ-5$J@UyK3X1qv74O
z=lvJD8H5<u3Szoz#)}PmAB4dIt^&A`t_-vST)+s7e+X)PUh6T`e_EG%Z;kMaqTZXT
zHoI(6?n_5^Gs+k)2%s0BXaxO`tTo1{$bC;fQI~e4Xj5xTa6W!pV<DVx4jj%{O=9Yc
zznh|62dnd29Fdf~N>S-nSU@8_nD?!=7sY1)5==Y>kSWakMPJu}@PN)5?sm8y%+;Rr
zo3Ui7;oIEDH{oyZZ78lbK2Gr&_npRuyDbB3V?jkKYiHhp^}a@PH<cZRkUS9a!HWl=
z3rN$g=X_f1co0X#`mz4ut`?oN?h3x*BdjIYu$HylTAp=Z4%G_yj_Cx%_qycBM|_V-
zrG_Owq0}N@3kqqQ%DpbUT-1!gZ{T3CZ7zypg}ZPU7>uoAjejn0A0Cc|qLiOD8yk^r
z>F0cH5<<w>0xcVl(gMVeH|bOXaE}UH>yWN0ISOI$nd7Ndp%&RJz;GM$en@3Aonj6Z
zHw6pp#S}hN+)Sk>3$-@b*F9c{=+&4UMOqkA2}N3>Z+pQclHXaYNi_<z8m^~`v@d<3
zi~MPbJrn5AG>Dr_S<^M+ifXS%4Bwf6r`-S+B9G6QHsg61z$XcIW4Mfb@4X$rX_%k_
zynV^rk6NJ2?Mt16VRS~u7{ExMavzE@_Zk<XtfG568<77t;2a=mdTG`(xE8F9qER!o
zZgPugdVQuADQ8C0r!%!$wc18Y7`XEu7++8{ByVWZsPW?@yc81PnB%}85lwHt6%;9V
zj;4mi+I@JzW=gR(6d~_aF|?aT5wj4hno-AD2us;iI!j9n?I<?htSbj9d@$SCNTcN0
zS~9-8lQ&yy?ieEQtd-pmdSx~&?0u14MeGOi;rcPUI9ux!vP@)0NvgyLdSfqTGj*J!
zHIes@H@@%iqd+M?o47!wbF|=uGgcA)GA18hdFF)V1YMb<^+{c9pqnxJMO%8GE&WRq
zdU&qZ5O1-rn+pq%qq4a$I=s_57upG@h<|BG&cdeLsf=x$Y7k*a22UFU4__7L(Wrl6
zFiE1Nz{cgZegA@j{pt3<z%ZH;=3!7AN`vNUL$KYsXC5*e(8YOLw7hyeRhq9wwtP$Q
z6Zd>DAQ(+tD)P8S(i(g6=305CDyC81d@Z=;G^;4xgFe>ELjw%9--)L1s*D#l`RDvU
zrqcP^Ybo!GB5QQJ-yk<N`LH|P9V*}lggPyK49$Iu_VLGubnP*Wq;HY-IGXzwWju~{
zzC*+L123gLu1%6}W>BRCTK&k-W?X%L;royL6KU4s3*}u=UN=~jUpHsE+>~yL%8f5$
z28c90w?M0#^0mO_BfGI_FkYm4yVH0pu?4RyRhT5FE1;hjXgSqf0xeOz6U$@a)n?Rx
zq1Mph>t@Ga!wLiX+q7b#7AxOuPH!&M2Fo)sY2x3us42Me&S``=W@t9V%0s9!^6(MF
zMKIy2R*@M1H7if_Iq!&Cv}zHIuQF|4q!rGHZq7x=rxM)yZt6~Z4rKG)Y0qqBmMy)^
zmVVKe-epU_Up`&UajJZV;|JHSCp5gsS%<C>hMxwM@H8IHD33p~X~ol8Hw1`_PisFi
z)pc@(_8x;Blsv2TLC?E>e^z_nID4P)C?*7eO@M=dbAX^3SSbQJ1I7Rr0{95d&*JSf
z^nnQ0-jknyXO`|hpPy0gfZjgD7ojrmh<<}tj^G^w=nJq1a(;M(Tcl<{Ukjlt3h18I
z-`*KD&>|q+092m|WI$iQ7{HvFrCGIkq~9q|Ly%vsHS;aP)JI%&n;#lVjuI`Jf4#2+
zfo2BvE5XP;m8Rq2cwC^>5TTXZ@(e2FNb_-<TN~oL(amJ@{LmVdv_{n^ca0W<&6VOc
zK-xhY*ML(g9l^u#roiRv0At5?2eB8l*pM=j?Zm9Y2zk7c9hC8cHeUX9IKA|O)`~wb
zy`ZJ?XRWnbL;l3qi^cO%_w!Zv^YmKa{RR9TS}ql=)5bY!J!lyEbhn{5r)%pFQL@ST
zqShMkJLSEo1v|0@lC{*$rnxU-$nnnR{+&(xUew|fh70U|_>zhBsTxu2#RU^XpgY!U
zu~Ee$yCpi)S2UilO<opx8vVEdJ9HC8qPi&L&3~Dutk<GL7KrRLDaK95_x)a`SJuN3
zo~PUEwe}%z3Je}^Q_Q&hCiUB(&46#9eIYbsZM7i!bAwh_e(OQ1l2s*~TK#|Qy?I=f
z)%HJro&)DO=Xs782RH&U2%;h)3X0+^IHH0{&N+dKLykD4mWnu(R^Fu4rKKsQrR9W`
z$4YPDaBtqy#I&nrCY7b7rfH?6^?R?q&&1KXb?^7{xqtjVyk3ia_F8KX&)#dVJ+Hml
zRNZz|1Ppekc#ei_#>C+Q9o~%b*afn^jN!nwQ8fHzQzP?j!CXZ5zU&`Guf1%lZ@fK<
z&Va&h^;e|Z?iEvWs|ka3wh(K_4#7RI_=lTQ1VW=p7fcEC&MT%)wlIM)7#?y~M<PXS
zL3OmE>@BE{R#dS=_>@vp1fASs3IQsmRG{)0)ncna6_=VKY3x>X39YES6qhBIZH3r%
zpwg|T{kEabBxX31c$gM#Gc`6Y7)@sm=%I9Y8$@#fClpTAx0`C<H&P3x7TZm1MIEUH
zQ|<<nMp4`S&2;pjZlQVGP5vM)+K$fWFZ9NC6WP`|6De{g@-pRd&UaGr4t&Q>`5iks
zW=4un*h%;V!6F@Gys5TV#1mr(r}FKl{uH>)RD&MaY3gY^?o42~Gl5U(&`yN@if-;S
z^?=j)Rb=KXn)ND@`<C8#)il+1+Zm+L8APGBuOUbfO?VBp8bqb9L6z2|3$K~_+v<*y
z&0?&Rw;AQU4&Js@^g1GJOXaVdni|`V;RY8$rqA?yC}x+*pD}295%m2x!1QurqPGIG
zh<WC@c($T``d!qk(+*$|-RrxM+s<@hm+9}ek<O&2Ig>7;atD%LLRWDy-?#*69*1f2
z4!cc3HC7AOKJJd(f>s3vm{tp?_w%fpiM!EdRJ<E)c!}VZ!$xj>OX&68Sb=&<p!0F7
zJJT)h+vw_UQ-tj!L6XNWWV?%c!xT~L6@g|`DIo|YwG!_<T^D#iY8-1ycxm1nCJh?1
zXrJj(s5@;x=A}?e&+Iq#glf3}FIwQN0~nNDpi>7-@tSNzYz0vmB8WU_>IB)%JBY@-
zfJzTy(P}5%Walf&_#2QxRLl;HCbF}HQvPllA&w9&zkrzq;61=afaPnfodMDTBLNEm
z>jAku1BN>f?nSt+SxU$^260Qa4LZ7H{LTV+cX)haVmxrJ^RG|fH_u?OvsLI`>)Zh3
zZh-H!<{jX^02nUH=}ipWGK}+i{gV&H-feo*20Xm=bpViy-*@d4V~|O8eAQEHv%!3~
z9&r^%2sjDwzl1d{zyQEvz!t!9zzsm;Wt19VJYYFsHz039exFHM9lLchQ1#1x^^8w?
zYt8jOTJv?OHQ$h0^9`vr_dB)b;XAbEVW-xFE3_tDp*2qkPA{#gh<5CyHG|Fz7LV5a
zc@#x&!MsA3YEzf$MrU>7RGYfgIXdecr`puXqc(LfwF$+*YO|43d$`nQ52-eLd#laf
zUTU+qm)gW#gx^w|y`|b5%4)Og%A=;!czxlMqnPFu)npAMyF_k%k5fX0sg`YmKzlmF
z$7x^%=8%O{UI8Uhj9otHr(#MuhA37G-T-&Iev#$?jRWg%90Ttr=B-Kheqb__{sAnW
zY@)g!n9^)Jd_wJ|2_GQTTeSHDgnFA!eE@CwHbos5zU<>zE<PuM@Zl{vp{NECf18#b
zhdzCq4jzYY`#aq{ZhAWCW5I})=hSWA)uiW6pojdaCLK9p>dtP>4>3f6+tcY@-hxc7
z`Op-?$ekZb=FdJfH5bhP#JZP}a{g(0-xe7mRrZ(8xEoT+Nn|mV@=ii;q|)Y-$WAI%
zN~G-+d~K-PDboU5wln01&XB$7lT!#egaST7VGg0}k1&rKLbIe}JB=Q2_eZAsu#8*z
z5z78S>h>|%=Q4W)wK;9F(3+3YS<aOK=gNTCP&|!de1yX^rFo}KsW3+I`e_7QN9UNb
zj$%K7?<LCl#MITc)0wAloOyE4W~T0?Q=cH?d&zc2Oz~1a1@BQOZ#O6JaawW4)YyDl
zpo?hIXZ{Iv{!>#m<uoauB9bqNg9Ux-3WoIxo>N|npvOK%`TiV1Z+>cOrTol^1e4{g
zDUxyZ&YBu&ei%jb5?3G<9W%wzytAfMn@!+w*2x)<oerKw@=*dkkA6+H#ZW}0DMhJG
z*_Ft31DaKdTqM!vN>oe|9_0WsiK0F;b+k1Tfdbtk+nEYJLx0{8#_SNJD_!~wGn%YO
zI<nb6mL^rAlj%bRpMx|Ik6s|Qfpp<>EU*uZq`zK<XbhwwUqJc>Qt=nqQpt;?2fj5u
zZ=3GS@aN9*dYG1+LlleXjdMtA5t+V3m_^j`O9=W>n)9V;nC)3-kekjRC3NXa1bKy`
zzA`<G>ocprg6{|&`pVSa_(3FXD{*Ge_OUbIc4xp(sPlOg&X)pRMDtb!#o{Rp3>v?T
z<WYQ0I(r_wL|@X)^C*-rsqO_+d+oAdjx_|6a>A5It1g%_ZNEC>ecu@`&V61$7D6cY
zYg2D5d`$n^)G@S86xWuVptBC?Ap90;Tww~g)fXO*Ch0&ozc!6Subcl3hJ$^g=*(sR
z+O+!{WM?c@d}B&193(;*4EA6tFq54AFsFaMkAH=apAACrq}s**mXH64kH502ztDEQ
zD#B)tt{P!YAAgdMKi$XQ+s8l3$3Lrzzp$n4i7JSewsk)Koj(49KK_$F{%?HzzxeoV
zwY)yC5N))MHv;}NAAffr{}3PlWFP+mAOA`p|7I^gY&W*F?eRje|9uL$47vN1+FnL$
z`IL$-LzR6>doN?G`e`j1`UN(|KBa_jp-8`?vEQ0ru>I_e!PWW{in@XrtdxBPt<OrU
zub5hE!5GoN2!|m+ioE42^l3EJy$Z&9I0JwI@Pw<FJJ+M)s~Cvai=m!8C_Z!5RD(T#
zVNdn%Owr8zohdD-k%-PSl<gElbC2OV?3V9Ly)ZAj{T<@%!tu6-Gxi#0KwWUv3R<8G
z9lVA<v8M>{nGohtz;!I=4x@tW2swgEu45EBg08YNmNLEvvVaP{M|1^@OrtNpM>Cp1
zx0zB%Nk5=C3YpTFmTg9VvEc_(h;5Nb1Z&7nJ^BP4`T?q9xj+}u(dUA~DD+2DT|9*+
z;+*!QDa^Lo87SJ>2A`+3KO&8n>F|$GhcA=;29kQ2`ra^g4B9DpJ<4^TvlPzWKxbCw
z&~BPw9dabyRMh?$LgX0L_81csv-37p7}a~VpPbz{=AnLSFtzbhQ_-Uq_<?^29r9D}
zMV}Y$4`&^X^;f&1n<`}=x*Zcj|Lp8<YRk~$O$Z%DB__n5M3wAxrC1fg2GUp+Q7)o#
zc3z=yGiXO>pc%9;sl*Iq2;F2~A4i4-G?qIJSSy{<kX}7X(1ArwvNM7zbv1$x=xQWt
z1HaQy^x;-G%PGeShL@?>s<w7tKzQvo#wUQ&fa`#2zhEN^&<ijbupICj;4HxKtHBTh
zNC%7rECcKT{HqzrY4F?t)PayYXCsBuf*MBoSwKz(xbDF$!|(c-V#HuJ$J3H5z+k{0
z;9XObvLSL)<vehmP|dj1WM~NJ1;~Y;r!6z_o2M#^;cf!BEI%H=@AFEba>sPVV8lUS
zp6_6K5{BRDfLs9Y%(yPw%!Z$*FB{;#1~>pX4`5x-mvC<3cch<!j3X?7r#RE#ZUF2C
z90gniBp2fALi`o?JwV?DL7xB2#Fx|okM|s?6a4PQLC^=lKNGN6mD8ao@Ouql58x=^
z3h;NkqEdJT#7GkiHUe@1yUlX>a}^?;2mc1Zdw}bJFcqZ%7z0=i*aJ8TxDE(0D~41+
zE?_y}0N^^Hz6I3*m<HGYcn5F=fD_V&EKLbDOv8^gfcF6B0o8Owq&tRIS06IsHrA;?
zwXyO7X*M+icA@DX!MJq$C?4%m8^cf`Ys(U`3dzf0$7r=pjmNe?xlIj5^E+Wv6Ju{-
zrmGmvU})9Oc9B;A43npg#DCZ7{ZkYhq{b-+sB@6o+qPD4v*lvG^Z?tcv>8;}I)Sb$
z&I$7F%K`c%NS$gc7a2esagIig!m=rH`xz{oqMdz4hl16naBc^yZ1r?$h}zg*4ML+B
z>bd6(@M1r<0whHx#je62sz0@Q%+x@!I5O-|A9#(bI9i>gARZ+Kt=vF4F{(6Uy8$Kj
zzp#Ss7J3$V=K{9p;;OH}{@E4myUo?ztJnCz(|}umK!xUae&UHy`%hHfK7EjWxzE^~
za`&04(H9Etb*D=8ohQ}zOsT$~l<NB*cTwN|Qhno5SytaYrTU&E)%Q}VzPCvA{qCLW
zTf5}M1yZaT9l{=yT1UI>#MP9zTfpJyzfkN6QnB}yiXD&Huws8sD)wDMu^YdLq@T=c
z7g#fsnti*_?8c94(MOuv)A&OzYM`s_lp0dEH}%r(8A7)^!V=Z>rWy^=c#RP7{3#P~
z;JcbF9^GSRCLKvuhbn%Ql%fv99^ujyh|@iEJ4J18yJrTAqi2cx9?EW_)`Ind2~8l8
zooH<nwKoiiUTvawwNDYeVqLijOD!MCf@y%JY7bnuUfmQDKb0z)BG^<?Qq{+m*|aoO
zO*77(A@5(EPE~X9KzG|_>Nvdbu(_Gq*tqRMzA({+Vw$Tlm?CC0SDV@%@yXF6w5U1S
zx;L8V*^kh{=4!2kzX<MpLmNXacasuiuUNRzKSBX%sOV>CNSfL;=$iXW<N>j3GiX;D
z)Xnu7)TD*l!1&7y8rnjw5p>-v77Jnl{gD<h?r;xnZlOlF$$T=G69xH0kZsdua-ovg
z>EmVnnxwT<6T?Re^vi~xsG^BD0na*yrzam!c1txP;UPgDhz)CxS-=kjt^@Xgo4U6X
zYf^Dbb&$P-Af3c&<l904a!fTzX@%jzN$S>0eH62yidN8rwdh<cbr23MWVA-zz>HaI
zHPje0i+ZZYXw9Vs8&PR%Xu;NWqP05J{Ln1^fFeqMqK2JDw!sKuF%`9eZhB}IU9MxQ
zCl*+-K<h*e-4DUM*hZbKyhuaZLgkfFaa*MPB5iD|CgL<|h4h_ktG)uYT-*-%uuwTW
z_2_ClwU*8DP*pLsP;7fOZb-5~9}zbnFgi3u3%|6a7z+PR;g2@P89v9(t=vGGAaKP-
z6V^Cg3o~4I-brv}S*WDFnph=e78DD80yO6ATDlrhg&HEL&C(&G-f@Kq^sICUV{?)8
zJI<RA*ryZz7mXRH)2oKrB8%KlO`!60HMB}BnARfc3y3@=e>Rte-1+CoO~TJjREo<Z
z!Y|K*NB;8buj42?Lyd1XP$VW_NaU>U6l~E#HdqK<<zg|+L3zR)CxQ;6%^7N3Gd>mK
zN)bJoAzMTxrI5v{SkP0j{=v3}JJ@0n6zhB))VQ`8PF6lZ&zbG<Fh}*05{Kd3n}&Bq
z>io&$1mDv})lei-ua79YQXZm!Ifq3Z)TZ{nA|yW`CqJDNev=M&fRMz~?GEVIBdBgi
zwSSEjBC0MpA#Zn=$#=8qsg7#DpfW-8Ojv;bsiWG(wn0Q5DEe=XcmU4esXc6&;=_E<
z5%f+cH94q}pvjYf90XD6oz)IOVFK@&QvpAe@%sc`nzeB9@9d1EX9_$YP?6PFOj;Kt
z{iQ&A%6J^*cTv~X_(h<_SqZm}{be@Q?5ZBa+1g88p)`M;P0^WZBc(cJXJSkQ?*o}?
zcjM&A)b1;5O)Sb(WU9e!x6a{;;Nt=w!^|(caZOTCSEwA5&4csUbKgv{-4I2Io84o=
z{W&+zW9r?5lgBkV+F~j|6vke2=<-7BU9IVcNzX2-=%zLeJtU&IU(_87W~aHFQ<Xl=
zQtg!7U9~`ewd}4AGG<N|M}aXlEA6fZ4;d`@#1}Pn<$j<C*WxZX@s)hcYk-@t1=2Oy
zdb$0w1L#h%J=Ea3-Q8$;5VeEb?<t%(8rTCmwv<YGpmRGqSr}IDViY|EXD&@7@Y92?
z;+OV{Jbrl|jWglF@%FdLcKtzWm8CY)@<pn!zDy<Do90tVmT)Sv)I|FNL3{%m#b1;l
zpIu!^wruq+EbAW5Mo-zMkdyXw>1`;Wrw^L-UmMEqiJra<P3Vb6*qMrZs=e()Mc7Eh
z%-3q9!1kn@J=OO1hXhIV{_X=q4^j5L>I}Tgcko`dsqx`LxkvYA7A)U9ObNZvW_M6d
zFANNp&=PhqGVi6X!}}t6Igs2{&R1K0oaZVlkEmYr;L=<bdx^?(AeNUrB=`3AIS<Y)
z2dCUPk0qIR$aWv1DD#lqQM~EKHFb-Z2iL@ndxP>2MY~cryQeL^;>LMQ&3kYuI8G$n
z-0eczd7t`>@y0yr*&EZD1S;ySh6ddb!95iR^2@!|=vZSBmruN?Zq}vhd`|`Q{XZ*R
z=#452qJTb_s@xLXx)^+L9~ny-ebfj{E{62MP%fWJ`=}%ABSjQknV#FOBPpRT1UsJ!
z`l?N{9}^rS+<n<X;U8k$VCZM)VU(LIoSDjnoSCXO?(@Ae$X#zfUG1yJB|R+AsTiqQ
z@JOh~`j=qw3c}KvPZ|Bxc$;@j+&kyftbX{^XJ~CdeAB!1MnAQdeYwa&GGYx7w;Xwb
z)_^X7q`W}^{W0=-OT=E!IK|N45NMPSsj%vSJ-z;Fa8kJ-H#B;`C*v9z4HS{HJux}F
zz@yFxg8kc#{nhqE4lUrXZ--8+so>z=Qx&*i;N&JTcZ>A}uAitv?i|b_R@c2RWLNk%
zVBhN*!~?6~=pHXn#sGZ61C%#F4Gxza`vu2~SRrJt7A`Kkb+YJRxW4Dn)&XiLb{h^3
zP#?DK775E|+c@D$${DCmO}He|<NEi!SmT9rpZ=?)O9R!H%$Egw5%t;x^H<yNSHlwm
z7V(Ea=GR#~IqEr2CpkSbe3`D^uf`<=3-*P6vJ=EEnee{>IeNml5rYCg`xcDMhf~`@
zs9zK14N^m!&0Nfh$aQ?Kt}GEs$G6J_S#Z1)WWl*iTR8@eDh6Tv=1pb!xlFg2I)q{e
zqu$p(!tsBHA@p9v*-@Bh@CVl@foB`P{2B^Yl>nubHyFj5MY9H@QZCc#!D{P-SLJ6J
zs~HlFy^PYL0++HY<9ung&TY1+aC<Q3{_hF)?ncd!igpDP=7N+lMC~8zFOsYy1cZfX
zyI^hQJ}lUV-WURj&!Q7U&_=^3YA8m{PYb3&A`tfjS(FWQ-5R3UW}x7Py9I8pVD-l*
zVfZGJEgp(a1y*;5s=+o{utq_R6x4G=QA}TuHVhM!i6U>;4RJ94^dO#GcMU{$KFY=C
zhdg$$rHfG6!|;_a3U==HJSDoBiie@qOrwp%)Q<Kjk(vGqiVeojrCE%x={C}eohAay
zQ53i1lOhd1_6<AxNF$C~=As-TsBbQ`*#+91s}|Zxd@xIMs=I3<YMUo!8yE8MovSE(
zxZ2+GoM5bFczRxGjAPz#b)JS<u`(V#lg3U2tt!NVLhg7q5L3K@@oFKavR9crn-V4<
znj#v=&Nf;y0mw(Fe1bYjOd2PuXE4FCO~S<bAu5`LndXaB!H&fdI~hGBkJ6qgQ2&ZC
z(7-9`KQVepnTol1J{3<@zd|28YZ{huA9hqsL$us2=ntxwm3+s|2f?IVb_|@U-ebf^
zjD1M$Sja~_Tt_+hHUi&5;2Uf8gD{o?3;@gotOD!?@J%E(4tf^9T{qZn;J52GlCKqS
zUqk7xHCE?Uze4vJkg#C6u==m8Xe5Krb@ZbQZHbqW_{K9I{Wv>BE-ATeqVZiy*hO<M
zhkBol<fYI+6~mAi)#q?z44(s$>z;qcHd-fSYz$xxpbT&pa0?I<f`wba0KhcBD!?AV
zX~2yZ_!nZwdMTh6U@~AaU<2SCz(qh{4bTDJ3ZzIXkVvUO{wfv70?`r{-l0G;g`i3+
z<E%g;r2<(a*sCg#ND>O<xFC5HNTKk16i79xKq7?#aYW8l*BFhx_+&p;m3q<5c@Vr_
zc(ha<4duAH2tq!M4i%xL4WV;In9<`&f%zEDIVfX36i>aj-10qpsI91AK87B31Ug(O
z2p$`>rH%8|8HHopFt_KB_ZFvrAT&PjrM>0!yAB!u-G_d`MR)Swa`~MbM%6^JXar7w
zw9_98MT+>1wmP(Qff}j}c7{)P>|LP3Kus@aJWd0vJD$Kg#}GRFg!*pb>`>Wm<{5Dj
z3cq>T-nB2mtIoV1;kyUHdkwtLa2{|Az$?9Xv)1j}GT4K7d0W8iB`McY;7;P`llOAJ
zd#T?k=D)R^&#U)G5%Kj<g$7MjUv~sPsUA?w{0=8C=(O1v6bUQSYPhYRz{r=$xo=%W
zvBgmNZK-oH`nPDxXU9&97}=dR7ptu^0z?p8MR1m5hSTqQ8Dg}L|6!*;U)F=|5l7vp
zRBW|b$o{M<UxDZHbsa(DeI8%kvkB$MMp32#TxGi|?9MG4-kNc#7B161fRO-i_2E;T
z9sbazGw(MQHX-3?3_UQ2F{Fw{DaHc-{ib|gA^-L2X<SIET(73t7k|K&UE4iOLfIH5
zQ&Nc<Z+=;j#!%yz)mYpMoKu3Py#{BuF)4e8a-K)CkHH~rto8Oi&T&|)#1TW48^DoB
z0WY9i%AyG`z?VyFUr@W&7%lj#UaHMKE-nF=?eY(#`Ww{x$^^>c2xe2!2DOKMvj~!1
zC5hQGi6_Xm5t`_qoCMyL->8NuH#v}<?tQX`iS}<)ldx%ebt8rpH)RNaYWAW!0+%XR
zzlap>qeCx3p$#DAB`iYRcfw<)(l+D-f0)N!@BNhh5=0#KNnb+v*>vJ1wKaA~ZJQ8q
z_6fe_Ru5V)dlQu3dYZlo<+X)2Z&KfiJs~2(-i=ebYk$ZQd;G^}(Pp)=x!|9SE}~)Y
zVj1N7m(&RR5`l@U^4$fbyo?mKPy#zEDCcFhsij0Pt})P#O==`<dRd)^XYCVS!S`j+
zz*p4P_WMq9+}*0gltr6gK@<b1;uSS6yszMpt4usX%oF|=m>%U+;*3+w7Bx$GmL_ab
z(-NK+q&YC|Rn<WCvjXqAcl9hC-lDel1KML&*Z#d=;s>8=i=mnY1>vA>TUCnvRFFL7
zqMfQ*PBElx!zb0Flx=F$8Vv=NFBW*_6!lICvr4pMo0_a-QYHB8{RCfT6~0VL*pB+i
zq#@g}zmiF7w_^Y{fG%yvIB*KB-hoth(xDw-`G9WkP(%H8o^n1u)~HmiUiS?VWu7}q
z{swTM)a1wW?1B$#tMTEL0v(IVnCIS_B$t4UGlPdpAyaSDrBcj6jtb^<#4E21AE5A^
z>a*s^kECFHx;(H3ZMpv*E1lk{M%q&a8nbz?0*Iu5SFwVVCP<3YN{+$Mn6h8Rlq8b!
zUsV%R+6n4UPIGj4q*nOlNIC>r-~TbECuan#@at0ft7tzM5WI?g&mMw%rU)&mEnh>A
zK0*ZACwg?2pSq3-uOU!IGlBPXxl5e>a%UI3#_4~?**Ly+`dy7hb(Fu(T><WR{uQI_
z7)p5?1K?a*@-}L2HXV8!<LaBosZ|;Ft?r}MWgyOWTrI=Y2P2V|`><1*MHTy?BA%s|
z`w>AsdSgFyQYM8TfU^^4tg-(Q=}-=0uc`0`^fuf04GBkw3zs_`9uz0T@9KQGL+5kP
zu1=jh9@ixw*ZB62+WF-zQrCF)H*Ng(LE=6=lI)OT{=Kp6NW`}gunF)U;5s0zCU(+m
z#$q6gA2R`~0J{OF0fq?d`vbBf6dLvdE`_Xr32U^NV6p-nDHULrQvp6I72ss40B1`D
zxY0)ee&^$VUMj$hI~3r+yC}d6sQ@w0Vg;Bl6`<6eRTSVeQUOLv1z20?mZ}OcQYygu
zQUTT#93BOjBK%bq;5eZGL#GJR9SYF#)_ZD{KbB!m9aWpgF6qLp=<n`U_CQzmb3Z8;
zOQ%y(1-927=)y~(H7U0OQ^}|B0swjw;Kbwa&sC_^?UzKjURbkA7L7{|m&?J2lulG&
zlKg{6sVn04>=d@8q+@DV<4;}b!DDIzrG!e3LB$2p)nh2M$0_^+G}-q=%pvX^eNEXP
zV5wrIi0(<HfnlwoKkQcWB%Tv(pUEG<tGyj;=dqZ$no2*w%z6!(j$@!-CPJ-sbGM~t
z$1%m9Bhb7E#ZM?>=L+x_nspp~?mjAoQ}=C=3%+Y9uXcYg{4KCh7>md{*e)WeH!0u*
z)S4o4;5n^{<FY5P#}+Qoo`c}EsrUr;>yiYzy2uePwX~(fC)7t(KH%qPpvRA^Q8eR2
zHL7;5KujJrF*|?i)V$I89VX30-&D;u6NA^0lDXCi3@G0HP)$<DNW}ZcaHT!upXxGo
zyhMEZff`kaq*~gZ5?}0@!|ZnYU2~WNPQPmo@{teyii_?XJOAqRJLf32YNC9gP@VK(
zr$08RP)?F!d>G>043eJeLvQUv&+?)7_Ms1P`VToZ_#~%)etl<-mih$P?&E)xJ~^p|
zj-BIV|HMh2=lWnDc9W<_oNaQ_L!EwCfkZp~4?F3JKJ=C};S@UbUbOa<+EM9Gm8W>S
zDN~F%uauYq<pINnAK`4sa7V^RD&EO{z>)Vk`U-@p{KD(t8_(?~N6D^W8Ql6%{X7@h
zgx}mPxVi+NbrA7j9AAgwdC6|@o`-u2;F|Zj9!}w-WZv@=p8sL_qfne1bKM8!br07>
z@FXJhoxEHD?iP6N+GVJ6>4P6r;RoUj(Z7A%s~TwXgjaQGry>%FHUO&sH?>pu9q1}g
z_Q5qGxVPZ8Kv&7eG|t1n48P9;t^>~F>QFkqb!31+m}g|ujMLz6gYNYtMjKguspl<Y
z4N7li3ehJ6c?*+}#eJ#Kcw-H}YT*hMt~J&`w?DU)sX<q7D{+zNw%P!40o<K!0UQNf
z1O%ejN(S@-Oa?3maBp@T;QxQU*uU(>w)oNLD{29{jZ;_9W?ptgT~%MQXeFuK0~ido
zsQ7IhW;%OQjS+`J4?rq-sN_9F;<j)W_<zR;%<s&_eW&74cRi}ea(j_Qb^R~?vmId{
ztLOcfn@=j8R-7^?hIgODUlECIC4NTKGkNHQ*Wb+;X?DWv?`CP<>uw+A`XC<j`8MQ+
zkX`Qv>}{=)+wC7%Y;b*GqA=a{+pjo6li(xSeoVt!jv`B#YFZg+eyv6biXF$g#!ehN
z4Ifv{mIj6v7Wzk^xe4}4ZU>sjVURJ_W^Qk$_FN1#sqk}Sl=0X0bi`)vjx#^uLFWES
z6DkNYC)!)4GfVR-rPPE<gUt6TV@L@$^BL}xU~_!<48h26ELWY^<A^uN#)5Pb2Y+@1
zn=_Tw95R#wL(E~2uY?fP$V(!4A}&j^l|fI9lyF+1)bM^B0DmIHoQ&Dn-VmhvF<s(d
zl_FSOaoEH=*vB%}kDaMbwwp7Jh76i}&RD~7)NZzDD2Y?i_{^s$rIz_gd~|s&a~eMS
zW-TBKaB31x32kQQV^U(xy$komVCOPMP6m$S_j!N;!(|qbFu2KpPJlY#VRJ~{E9Krj
z!zNTuC+1--81>r%v=}tkDwXH~So`ydVP3NG?g4zY|J}5{_Y<LiM4$%XL#6?i0k#0%
ziN&GHFYv=qo10XrG2OAoXr63#RBK><PlW}@Q_1GU8jqi6VTrF&feX*mbC>l7gl;y>
zF*wDH)4U5QyA=wQ3yx=2zAEE?sdXj&;ntPP-#odWv5sSMgn5kLODEgGsOtzim2OUi
zqjWF_Q&@&M#6DRpC3}|AECz#N1RlROF9=vJNJYb@P8&Xb6xlM&5%y;VYHgLmTF$Y>
z<C;N22Xk)?k<aO29)<;`PkNYNDcpi=aCvdLvGDybms?f4TYp4|4j{v)YqMtBXI$9}
z%J(Ze)yo`j8$OlA#3)APyiTB!Ee9&%`zh2a$K1hKf3h&SldwRri6cBsP?9JplkH&-
zuBi(*6DK{)!S<ORQi_W-nJ(m@S$A^8-p6YFE3yqjL3+1jE=+E*zg3w3MoSf^o`0vI
z=D6S2_<zw-tHmjJPh-<^N6Zj2?g}iyyAkHb_GK%$h(}Zrt|b(e2Z>!y-SW(Z#_w0q
zKl02CqN=TA&Ri&7i=h`z(|K#r?>R)A<~pwDPi=>rn_~B~V7NILhKLpoH>Vks<y)`a
z#)){iw&WqHk#4`|oYznfPShTsWAfnS*{8aW*b(NY8a|}&c=My2%`@Z8^Z(n*qVXSA
z7MZAzF>x&P-zh66L+JOw78+_~|0FJFHZSk#-1gwn3-iOco4dLItseKO3s9~`?Dm?&
z>2iU&x)Mvb3(U2YY7{jEz4j<tGQ~Uswi7Q+LD#onGDq0i(Aa0;eG-R{$0dk?Q_by-
zXPs6Q|1P*aN6yOJep&2qy8WIbXC9n9at7-HQ_a1x_Mbfs=|51w>31-MdL5?b&4YKS
zc$&F2cH0k6LqGi?-JWK?7sC;0m*^D{NVMseu15NT8|OJCxR#2in}ge~b(2zBxY(Dv
zah|hO^E^1WTEPAWqDUC+CV5V3d2mhKnq{OT>OpgR4b?TI&^#KoaInxk14Z6;o;gjM
zBumFIa7C;?428@ykHn)r-_JAqNrOQyg~H0@w-m|$&{(mnYq0o-1#~wA^d#U4z*0vs
z!~mR)xNmEB1C@<9)$R8*;zk~vyAgYEQkuRO>PJ*_3a+4}m_r?%MwkON+*%W@`k-vp
zj@+f@*?urf_Q^7IbIqu2u{kZgb$4DvZ-@=!s>ZjP(b8gbyt#uQ$I-VHez9UuA>M~|
z+$c8VzEERY`waAbGdl4MhVNV*>+yB}r9nor`<THM%(?G&oys+Q--Aq^t)tMdoy;2@
z%equMYIL+&XI`Ty0d#6T+Hze|O3=dvT;vwV`wN}LNh2O#1W-bWxmH3`L0atAE`sF4
z&U+34Q~-7x+sokR9C^WQ4bWYv1PS0W;p-*nAqG$-`?}EgC1}9`RQGvvE&E^*<UaHh
zjj@lBVS5%4bfJOIBYWeq3uq2DcM%*$`u0^UraPWDC+SLn<J=ZkBgw>=*tdc3Ej^wk
zx`urJYBI$`-#;>e@_F*T)^A>D;~Ma<%i8)OW#tZN>(h9d0iX0a3{#jxji)<Obh)`6
zrI(u7FhF*xIX>xXM=rZ`Xx)48x{W+n%ZHCFofz-ut^zLZ(^OK50{xl}b8MDQxO{8w
zKzsI?+c*Y)XP%}wPQGfcZ$`|@TV~Hgt-qy0{$G{OZx{m&zT<(`)Vhve-ZFpWk3Eu8
zWl;HVjo?otL#odtZ6`&QLsE`Yb~$>hoiw4`9B=<zF!OSBfb)2%VmL&b!C~Avf{tWZ
zYSYDXvn64-;Csf9hDdpxnJ?1)EO4H(3`AKXk$osje>};H(X%p=vyuWCmZ5kXs)=LP
zK67IY8QA=;`D6E8)&Ej)2HjDdSq}Rpb6-Dt;HY^bj-#GB3PJmTlnRK(yPYUH!vaC8
zfS|og*%jvagi0Y$_n}baZDdg}0_S<ffQ9T`T3dn1*GGah5ceD;JImzHkhltSO`Fo0
zODo7d;y&llj+rAgq*V7~^Me0&N!=-UBSn+^o8QW_Xp*pgbk-bS<Jv#?8>;pR7ydz)
z@3V$cqe^pi`?rFDJ0Lm2VjXw^Ti2Lc-1>*(p-OYKg1A<HW*$+ES6=xH*zCHB!4cTe
zGE;HPJAjtx*z}D#*#sk)r>>xG+L3bA%<lt5Wmw{6)ud5Dh9!<BUp4zDWQjuUh(eVj
z$OS%1;5>rM^^-<xuVN*ovmkXx{dnYjl;e}DTn}k<^Cz>k3hS#P|0mSrZzzC}J0x~_
zU4<H7HYYd&Z<*imSK?QkvLx0`dY3Drv%8%A@s05UC%3-2+~X-hvBcx7Vvb@7#+96$
z6iYD8R4jb=a;ajeWy=r&>LU_)Qo6lkF9*<MM7SD6C7+ptSNd5p-IcY4E9>Ynb3?Dn
znyfe$Si;U5At*3F8)%uNJdHcRmPF&zomn#Q$e7I%sH}D**(_ECw3b1Zj_wE#aD>Zt
zVnNDr^crfPZnUyoq2$Zv`ux2E{b_6rB21!FF&4bc_BemF+)Cn_?oPH?d|DFKjkPp3
zCOuAP_G>k0e5|FKG4*k37kf_)x^e+$ZwK2f7TOVOIS<y=wJl9!U9xu?savMwVH^}!
zF5pKYzTRK1zXmZ)hnS){C_BQoRa=>E8As!iEfa7;;Z!n;YauBpSmTs;oy{P{Qr%u8
zO1A1M#1yW8c>7Fuq01J+(mBPkG{pkz08{XGrDc%2Y_21Bl>32sjVu}misq~dM@~+q
zTN1-x7ouE>y~e7Gbz8Wlu!)y!G!ihQt;E~U3g&dIwT8I+f&H{K!%_<zhchgRjdlu-
z*KjTK9>aQQbXPC%lfZi#6ZZmJI#cvMa|4{j&FNr?up7E?t+YcRdA^Xlfp(6f4i<C+
zTd1^$<-gty+^O8E)#LHTXHJ>z1rghXbrDNqy=|g;_)3u69zG}hRj1^e-gTO`%drI6
z_Xq~hdK<UAO|q@c&as5p$_1T8K{m3t9VI!C8l-pVKFgr$E=jCY4~u+4#Ew*3x!#gb
z`MH+&F(^qHZ40K%JWC8jVQikIE4E-t^DMzpDPy_N4IL*<p4E49@7|r8bR3gEVw_x{
z=|`1d37;)ldgYBDJ9=Vfe%?rI8?XV{Cn;dKB{cj+Z*tZoo?Z#^ZmckXT;@y0l`;-(
zqX|rod_bfxBj?bV8#4I;TFc~VzGP?GWptRyC%wsi^2UyLlE0zbAV(STA^12MIg_(G
zi)jugV5O81mQXxT*lmO*BD}uj<YLIe7xs01L^{m@S5l^j+;`%Pu@jkrm#_v)ybO{e
zOKmZgkFW#}+2Tz?s+{IUiE~AfhLRHa>s9g1UHDs7@y%R#e@Yo?38k=+mV0#f2Zc}`
zBQqI^tjL&QB^t<t%<-Zi`b<F?^qrh6^4_ZoE=7J1RgAO*CrqwF;tMV&+hU3166!Q*
z#zZuKlWhkD<fHKRct~6lg3O<LoHFuJN#`ZmMRMn@8Wk`(b%My7i|qcM#Z<!NWs)q*
zjH5>BaOA6~g30TB$xaE_M9L_TcleUAM!=zWQwo#c^(7}u@)wi`vhAWIi(=!XQ>Xyw
zSYBt~NCGE%N|ni+b;c)CIoOQtCekm(N{y6Wk}Yc(zMNOdwz~a!!e8d{JM&gSv7?c<
zPh4o3{ESIcMvB<KrW}xsS0~b6pTh0yd7~}<Nq9Ag1wCunefiU-Pnjrl=fd@zG)*+!
z{E?y}<EV7BB{->}i<~{_!A>G_DZVK#yc5YE)Q+x>hKhfWY-21D*hMcIZ41Q=VfAQR
z1Rl4`W8@gBU=6cuj3pf%am5(R$JUx{po<0oCIg-TYygx2PS@iuu$5Knr+Jq5{bqMm
zC@s&Df|HhGi=dp!sI<ru+@qpBPm%AgoPLnBjA@f5bR2_r!ugCu+V1x5rKuXHD|F9r
zi#?)?H-ne18UTj&%#iEL;M7*7V3@=Vdwdz3`by7GXdN?zWO(I)lW+=TE*RcthMC?B
zUOMY>F#HOJsONndytLLUl(N7Q>h~&mdyKGzN4z6>@6cOk!B9BDVoy5nVQ{G(AqBT2
zzKZ6;;|vQR8Ce~~7f6y*b3Gw(t|;7^3!5AZA**YulHH1G1EqjWVGF$#)oYZ;$V^7M
zmBIT|4CLtZGNLMq>Pi)yR8+UB;P_oXG{=r2-_Ed7Yt@&yDq5=-gzcVuOL+KT57DK4
zgcwbuj79jU#ggnIxwDF~5R;ErCA;-jeJWvcawn0SJM>ly*!%}M-IweXlT1<`0lAki
z*{K@_PzsaB`I4P_Ya!)<Y<ohIz4aE*wEhu`C47eu&84*P$|`vL&UAtl7Ay9YE0WKx
zwX!=)sTh_bwN|dn@61^N#XgFh&2gc9wAN#k1F~OnXNBH=3fBvS+FI{o_EK9rTsR-K
zwVz5Kg>b&-BKxSV3K!l>ZJnX3k3wyYC);96gx?i3iN<3rcE4%(#Aip_!g1R?4+Pvw
zn6ub2PFe4`u$Z;h4QQy4c&u6iG6A`O*??8?thH)ecdfT%Dau*OF0u3ow}{0JgL~kV
z;p_sc;b=WdqqZcEWc@l!C}F9nV8#Vxds{E^RD|R{wLb+sZ;7ZoNRp<Z!xqAwFa4u%
z5@6~Wu(H6z{=@T@-pW}=$@7-C6w_<SBm{ulqfc{ni|#tv%63<2J#siuRs|<p*?U!R
zvXz~!f|IT6hQw8AWihT+*6=r4Str@b22^cE?ySxht*qOIs$_R7yCLzO<^`3+U4X&2
zWjKcm7TN3qeaX&d7bcrsj4#<K-1TL%Ywb&RHoHEe*@X|5WKXkm3d49j)qzTx=R<b2
zy!Eo>ZF9A}GRfy|c~LIWcDB4^m*1JW4B7JB4b-RQ-7i~SUX_+N*%gOZ%bVxI`Lw*J
zWy@RRBKx$w5*Oa9<-wpfx4bs8<(0WwUME+}+ay|^@nlDuyV&x8GMSrRsCm~8ZdvJA
zn#GJWiwa87I49H6QcGyt7sP^?Vfd_R`NO!}Sp2to;ZPI;=kUU@C^26-RhFWeq&u4J
zv@B5k-os_Kx1PqSk(xWu(%%Do0WhHPgaJ|ky<9E54EZr$=}bM#EzP5nx(Ee0b=s6E
zBgRY_Hg#(Lu<<<GiAEAbsH7a<JBP~4k>@8VcAq7r+K4fEQ_!kwgT6sv0$sx}WQ;>?
zVr(tjTQcJOnK<LaBK%j%*@v0Q=QMVoCBpb)7kXr$B|7Sts>x5B#5r(&VJwyJvjqFa
zbyeuz=Ph=+xX;oD`F3>JZ+Xhkd<&oSf5NrvC@Qlsxtpj&=;HAsAB3qa9*_5txGa_z
z_D^>DS*F;(#_4CdWB<!eKaX<Qf70paX&d`1oqir9vHx?YU#J>JeD6f?t||L9s;sb#
zR+1gvj#+*-8XMN<j^<5$*I7%ZV$Z!ThW4D}3DYr?;sCQ~NF@T~(!om0Tk7EYIF~M^
zr0J30k&>&DHbqPkMvfJ2<60MQktuVEjN`bfC2J1<E64y|R?og_3GGmNMNTnXrS!Jb
z?<$o`PCvd<<nafm-&IOCuF#8DEu)lqj)3nh*A%~Lcox_1xFtg|ej%FMoBGNtmi!g@
z+E?~Ku`OsAmN$L;_^D&2h>YS{F+~f8af9iKHcknoN=18C-+(U<`5*cER2_2=&8D?i
z{zlzwT4Uuf&9Z4t;cT^OlduyW6$IahloO;S8n30&+#s!c(0P12G>IW5h>HOBrBmsn
zAgvLsbZWtf!b&ZJwRl@iln6gnSlziYl!W7LT6<fFKzjnCO>>|Wx*Dvd8B?0c2j+7_
zw38NOt!0#!hRhVO^C4}H(%2TL5)CJX21aX53-Q3K`+$-6riu6Noa?+YA9IWZzxPqc
zKe9V_x25NXh^K|w5O>G&a~lwH0rLRs0lO0kD`&@6_R>bv=osxV%*Zy2)e_<4#cCsz
zS#&T~i?h#5<MtW~5%8R}m_@f^Q3tDOLTxS4Z*>|bUI#Iv+)-N_fLr!j93t7pk#wZ>
z6&fDvh||K9a#{_%Upbf??^lwH<!N*zNo!&}kw$atVy!p5j<!V!$6KJN&hQrWToQ`D
z1+z4zId!%DI7OIIPm9N<>ezZ(gkM1mXEX(ZrvaU<r&)0P@IpPXzDrx<k<_~`o_7Tg
z8wGA#ALJ-f5|G0vvNb>&QIyd@3#CoXv{ahhNDHH~1g#D>E6y=bJIT|Ic^-xB-9rt4
z943&)VJ+ncOl?W+>f-|@C*lK2DXJk@N+nCFWWl>wTN@%_9Q8>85q7g1Az_VT8$*JT
zn?|4wqJxb<gSkJZ4H7itq*k(Q#y3XPuTpq2I9`<;ugcJW=g^^4mJ9|HO-KQQi84|V
zg^3DM5q&u2GzCLQ6EO6r@+M&DFB$qvhH=cWA1`LuQ$gD-X`2PjxVN=Te@`kxD>Swl
zDnOy)W~lo{q%?=G5&J?Zy*X-r5Up+wfq9?}3rtuQff+;<%@JrBWuzexo(JJT^U@IL
zEjq=hw@7ILhA*k01$<vhh8-=y5DWu;Km}7uOE9#f685!YUnrez35IdhvK3I{D6bV5
zR>JTed@ChGU~4dx)7aKPl~ZwRFkB?14SW{`gCo6-w%!7Xuk3_oJd46R!&yx^owcrz
z;MMFar;5%{Lg8fV0taVV*})5E>_kyT7jU*?CxTMDY7HSAd0jz)i7Ix`udsva%!KnQ
z<*{R;5_WKJf*m&a1xKO0ZrZ&l&r)^<k<wiohJwoL4&Pfa!Ug9`QhLA%rW|%q66}nl
ztL&_#j4U|#Ms_YzWtP^%y%EWlCr&oT;eNPmed0QP`@XWZ$<K4?F2*j;(RypAWZ=BD
z6CcF>7aFRIcty)wMezfjuG1S%HFbxUa@ijZNyY>a(1}W$S|WPi!q2c2j1B?40OJ9R
z0UH2ifYX2*fWRiWMFL0%3;;|9EC#Fx>;@bKoCo0Lb{YnIU${~&8eXDoR*ACVwf8i<
z<-2-msfv<K&2rE_vSB_8LR3H%>?>g3y_9vI7K4k!6YkR*8&_0Pqy0)Fcn{yFeGIEx
zn|ni{y)Uus9_g!1q-lMi5kIG;eL(x%NrNkB1Nv%V#vdzbN?)x3yBr<6AU=ZoW_|l1
z;UFsNhc=FPLi#~3!7xjIAmbzwtBd`#N#s8ONpz*C0ibl1l&+F;o+;x<83@WaNf{?8
z?+pZHHl1S%Zi;gfvnk_#B(aPR-4En4iCiYxmk$Da35@=LQX(lhV9trp91P0qR6H1z
z*Cpk3Nf|r@l)usRA)x$CQvOC2Ly*KjY2Z*G|0$9Gl<akef&DzS9R|vINx?gjoP;G;
z3$xw&jC=B;u~Wxm)tjd#WJ}N@jJH0cp1ImY9Jbk;i?$v>CBwD;)Fuyc#ZuoqpkgKJ
zIHQ`;C7_JWKF0;71S~>oEm2^zhJ&jsb4^l)lWhci!)2IdBM@dPZDtgVu8si1LdqBk
z-$KbyHWCc0=oF(?k&+Jv=+k`oHcN&p`C!;f+9;s*Vi_C^N2r8-a5$$iqrvbgwHyu9
zr<6BZn`FN7x%g80wVx%NuH<Odlq<}8uOn-Ww#ovzOe)YKRu(`574Xll+KNg!6vC56
zN&uFH3(%{*PnW<&M+>w_NFRQuv0&AkqIH2NO_&02+-KCNK&we*QxFs0zL)~_hC7;5
z5USTygqlqgra~z$lc+LAmCz@QdR?NDrvddhoM{9KHUv1}dPbe6y^OjgQGwHe3ZSIv
zNHX?woX$j&^BC8R)&hr=PfSP3!zt=Pa7_K2CQbpz<OjjAkd`uTl@oWCahvHj<Mujn
zy=DM+geJ@Y?o%hO3^?PJ&*{tzt;t;#;~!CoS<P{2frn}U9<tp{DGmnD^18pP9Pw%s
zwxI_Ayp>}vwDX@)kmHfiLcn^!9>7V!bwD5nYz+aKfRO+mz^wu71{?=mNweZBT0l(#
zI#J!#TB6pgC63tw<~g!gYvE=ZHBUQ&7f_lNX^FP{pIMORJ4ed~Lq94g(t?d0f2MYe
zwP;#a6yP5oiZS<4!8ga<@ALL4p64rq66r`+Ii_)fV>UQ=M-~q(Qqp`aRK&Co1E|6C
zwK3+$1WzBj=MgO&Ye@#fM#qKun0DZ_`RxU0rC<F_)fXYznM=?FzM`CkTA}gQ&va~|
zR<B0HEzaq1cMecP2GGD?&u$a632qwa?v}_M2jBLXHa93+ur$Vp$~&^HZ&AC)wc56>
z0^d*+0H0NuL2Vz=epMc$wvVEHK1LajX~EQMF`DdSRQ#w`h^Gi_i=nU9c~REUoX5~R
zth+^jEy1VmSgg&1X^WI4hyZqd9@A>c4{~q>p|oa+HU@h=rpL5+`!^!>5$=rhN4>{E
z;_a1!G|^4sZ+cH8guq)L(+X|TV(RMoq-xamaV-wU&$ysGUzF)6cw95<sHa&gV2uk0
z`&Xjrpj^7IM4$5*RltkS(FhK*%g%c=i?jveP^kCtqO27J5pW&A2St5bm22|D=XdvD
z$nhTzHHKBWl<D1?{!B{>X^n9OAPX=OFdMKOu%)%*qgC4dY!tOw2`Ut;)o_d_zoog4
zVN!XL_LgYR;yB{K=Ry2h5aE<QuPrcM`<9x&pw%}1{4L%8f>u+xCW8X~{0rJcN|GaL
zgNFNmC%=_SI*7{O$H4CF7AP)QW7~q#=NPtX_q%n~(ZoATlLu{Vp6Ab&ChI4*{>1~h
ze{qX0`#;uG^)Vml1Q-mUqt*2Bj(7f|wNQ5L;9=DPYF3IiaDaxCV#ao$I|gO@F_T<h
zs*N{Z>rT~oYW3<_dhmo|m0QfY1zZyg6rK}O;JqKbe!x@VV_Fgi-nmm7uJm%)U)8Q@
zNI~1H;RQ|b?bYVHOVwL;e`{#$efjv$$n5Ww@V$2!nfiBS_X(8hhBkZzrk}E_OzWgz
zOwzd=%C`6%i<Z@?wG4*6R8$T<I+a$GYjNRo1@&uOHt-yXVfBFl4U-oM^5>ZLv#o2-
z@usO1un+otDz)8*x#c-3*oP=P)1rOaew*n_Da*Y><l0MX8n_?4V`#~KG><X#`hHaO
z82SW`am<%O4>}?bXuUK<H{r160Ilv3teddf;A4B<RpVMe=~GwQ|N1R8@<%)9-<i2&
z-?el8BkF{8oon`T8r5>01%BSU+5uWWr&`#qqb%>?)T{!<5KcoXu*6aaPv~j!bz8M#
z0Z*u6cczZOc?7%;1ssF!snd=Z9qLi;F|C^Ys^F=KO)F{b5%q^d<VDA{2-uKV3-$z?
z*rIQPOQagizIue=tOyz7?9(wma9lm6y`X5L1%J`BNn~q@PHo3=&0>F0;HsLV8tpiI
z9Aiv;hy4sXgF2LbM%%88rrT$JXWDtUzQEPC*w7ejF_$5?|Da#!`~R$M4QPw`Bj5=D
zovj<xoko>uXV6QE29_t<h^I`K$_Ca^Hn5(8dS?UcC>vN`LB6wrb(9UPqikTW%LWEh
zaNNL7h%g3&tASZ%18X50SPR*}TF3_0qDlknC>vM{(ZFa&O>2F+9$GDg0vqe~sZNBo
z7sW&x{e!%X0W62`ZfU~{glDioIEqGH!IvMS&Q~$+@Vmh+z3SQFV^nk%t^F94UWJ<d
z;YTulhX4zx@;fy51vkVq6pol{+Ib6jv|E_(Eui9CztP-%CB?Nk@n@yOH4gusKCZ`I
zn|tn`9E;zAgoL$o{N=XxqMy=~nkjlBY$xR@da$wSPc*l=E}w14q!Jg32M|r^oT87z
z<Cr-{JsuBB%`)ouC|Cko;-@DX8~sdkL-a=QVvHN=xaFsB(hy=}fc_(JYpd%XO?e!x
z*;`xs7T<rWEnUUvb62GQT^rhYWX5F>^v@RKb%d?fo`vrImB~5|=LYJBFdArPL-EAY
z5Stzf-*g)aH-buRdT><J?^qJw!YS}(u#N4pKEPd>ohlG6q4QN{8EA;WJqup&$IgIZ
zm<ZOYO4nw`TSR)ityr+<3){Fn{g^=mgY*cyDnfWV!$6V1k{~_NK2(Hw2x~s_4lW;>
zt)xQ;h5K3Oz=7pOTd*D+`WF$0e8z!ohs!vxP~*>)@bF+nSY0r@YUl_}&2uEaqM*`*
zV7-<yn3e|XZySI3j(S2U*k$2hcUy?wfZgi*G~sXb4UVM6*!4Adq+qXIZwzNnh;X)s
zK&+}?lbmn`=dv2$#FKTQ$Y(vtS&sx~y=!u=awSY3Ny%Y)43->o!ob!}vbB?J?IfE6
zYz;NV?ue?bZ&x@&N8<G6Nc?b|9zij6^hiYCwvL{L-~zR*jvnT|hx)%HvAzP^>pz-+
z4?xE_8L$|z0kEgNLZAL!KjFwy^#Nwbh9rHNpVE(-rRa@~{jSL~<#SW?>afMQGzHSp
zk2a@3ejd2S0=mIyH_SIiI|cLvT}aX6z!A^{qF+eOn&`Fs`du?n`xDhdX<`$-dh9PE
zSbg{VES16^gZ*nZb!WhXTx*-?q4vNZICi$VC`WNj>PLr~Kq4;EO^*F#ifyXLhkyA!
zGp2(v%+Lq}3N}#1Bkgj^YpVBGHqskS^|)pSL@K-+Bn|473xA%`&yXc@!l5LN51Mzd
zU5A|SB3mlHGfpI6Gc-WtEZO|`Jmr9-JWq2{(YDvoid1Ci722DsN7#;C=L~tGd7dt$
z>b1f*i4?~e(-kXThpQpJm#@8_cf>YBp|X@UM{145cfaWVZvK>dR54hOqxH>oi+!7*
z$GGXNG3t>r1m#kXE;UCs^2nYBaqURiY4Gi)>1q1@l-42`&iO+(@CIX7r$5x`pYB61
zq_Hg!<tbV%9b|xg#e?wGrzmBJ9!hOm>I-oIzq}<Vesonjv8~Wn{eBc}71%kg^jBdR
zHl;Oqq9so>37%*tPc&U>4W4)^X#<{k$rDdXTj5J-tFKd%@fHw#$&!{V!zVk#C)3h)
z2;Y)2+JmR1<Y_5*;6r$k(eczPT@QmLg|X@2!t0Zq+YvIR5zd%kW}%h-ER|(|aW0+8
z0LNU(I9D>xm5fHXLIrK<02TBob?XSuM<wT@lJij~CtSf9*$JG>=#5S&y=9Vf8O3%*
z-Wqn+CqqWpc19c<C1s<`{6<R2)ECf}F38R{I@kq4aW;nYy^VuDiUp0$U4e8+q(g?n
z+DxYYEVg+MXM$EHX=RdDCW~WyH`Lo9y4np<!QmJWNrppihLMgN-Su=$>E~$KNB@?E
zGohdUchvmVetK8bYu*4T$3Fd`9HXhAKcp?59lRXLsAQ^SR7(mU0Ok?QoQ9$l46h9U
z!=qHesAZ%LgtL({*x4o-$`UR9v}zze+Cim^FOzux`w<ZNydUES-&vNhO#V<+Ga{}T
z*La*(b&VrjR)Kw&9qu;!nul<{`$o<s(!zhWf%I=JJ$PFUcFo3?bLIgD0H*;r03jV3
z;-)r^&1C_`02Tt)1NH!p11<vmamFnYklN8PcaXlzgp-QR@=%x-nvkc**NDqt6~(9b
zq;h2?V#(kxIgIw@=|M(#8lA_rrOWUr7KbuiH!E1x)<!^BhEvWcy+17*fjW8^+X;G8
zI7N#=d6-HU>yv2qNKjVLijhFUEG;uf<_jrqmJjA_l6jkCwu}Ph9f}<VW|$KmrBA}`
zyz)`tyCC^4NWSHx!Dpqdqrr#s3KI}%>KJ7A<PtrA29D9=soG=udWsnfwszEVEZA^y
zkJ-u?)rU@vMVx)8XbdQQsB{c6y5#{-#?iqCfWp4g1hmI$<MaX8EXW@R6n2up>^HlE
zf!<Hj66ne}-4ed60~cK*SdB8H_}G-8>~8jvZM^<AKA~bfg1*DCO;XNK!33mwMy7g3
zrrK$u9%lT$0}Y*sQS$dPk6!?X&$>Mk+_+vc2~k)nZxUL0ZMwj|+Eh}Y-%DdAV>p9}
z(`2ApNIsmRpM-B2P=FZjrGf%b?q%jlSeLL(L40E*-x$gF#T4+(r`uD&H=p_X)557h
zt)jJ4!MsW`WBX?s65mcsrh#v}<O`e*zJru79ef8R-$9OawB!18eFxj<-Z&QxemGs4
ztEU+s&Y+$P^qQ1jh=#iY<`jV2=EUt_+&gr*5bPJoJ`awShRg#;oenb8j(Om~hzmH>
z*6ku5ixmMij@lOKX&UMT<&J9$RB#jPkIRbmRE}>R*v>NB84i+$(iNy)^MSI`g!xFI
zc1Jog57{bX944KNySF1<o(o*^0^r6__5$GMJNec#ZWZli+;%4}a3OF9DQThCT;})Y
zF|KLgpIL<aos}N%nO~uM&hkfAdR%%X1n0Su0a<`CfQ5kdfIWbdfa`$3u2^XYWC6wi
z76R4-$^fSUw*X<8FtL}KiJfi0c)()720&S+LbV!bv5r-X^cZW6YFO`e*&?e6TV&Hv
zbq#CLk!AY*MoTm`DAu3B_TRx`y|t|%o@-WG_T=4}8Dx7(j|+cDpxLfwu3KyI(5Qg2
zpVFT+-i)U+PwDO0jec6cUoq3Hr&0fADt%fXr({sna=o=BqdrIN$1i!*rY=wGL9~21
zno}oweK{n$6J1!Yzhavpm<)zV&VkWF+PDHuU?-hefe!TzQda80@FCt>lpbpeqpX#B
zvSOn-EA__qgal4uVwD9k8<m5EM*^;{)Dvs;5F9*OWh>BfR(B(TnvmW>8R3{t7(C-{
z?CMnr(VGsf(nrIBOUi2a-lDwKNcjj&Uyb6p(tr~hh>vLk>jPn8g2;o(BUHLtZ*MeA
z60e}yGqLuLW&&$Gp4ZsYDfSsXIH9eF#HS6B8I#Qmm*T=5p&`$pd3K|X&!CWo(y3?k
zr}19NglF}{gqK7d4=Wjl<SL*1kihdb*$y`DS{|bEXZ5(GB?4Wnbi=aVFg&|FUWpSK
z=dpLqB+jV?r6PlzC)*l~UJp~tHF`wdWg_HbN|Ira(~7PuglFnnlV+{a6I<>Ttb>#W
z3eT!DY%D|13S6O5+Zmi^x~>FyYRx5AlRiO8I2d^oPWXo+L<i+Dm^S~Xp}R9C#qbm*
zt<^_ImtmQO7n1N&VqgrHl=F{g*D;fX-Krd!0GE9ex6zvGBAmcA=OA3@7d<$4MO<*>
zQk%H=E>P5SQ0ld5>2s)v3@U$453X^o5og4Ph&+efY9~?8&#X08uG2&8twgc@-Ca9e
z8?`B6o!+t8WkHhnWrKlUB%+d6v3a+!QqU71V)8vmp88Cql688VZI)miE-H^%r_qUZ
zdaZ<kvJN2*yG4PC`_Y1R7i#up;5yj8z`SZGMXiUN<2cuPtTWwgME%z5(Ks(KeZ5Y$
z<suS;q0HI-UUEd0=-V}@hr=)Hk8%U=_ln*QEqlc)dLfFzz6I^P6J>8fcZEi}1w~|Y
zbl!><g9bExn|?1E(7|n(+IOSr+absgIreS`t5Vz1xm15x8t#d~rc4{aV8BekG627j
z!{f{?_<a=c1;8I0Ix&DYfL#8KYI2=VVq1T%)&4e6cRc3&SBu0LcNYHUzWh<_bIn5{
z$D!u<&=L<md1vMdU>Wd#U=Qq$ap;|W#4zmV!}!TGz(N2ULg6(HS4XiP3B3on0SL>&
zG#fA&Fb%K_umf-ua2{aDX5}jXP;iEOFlDz%&1<{6<_(dWH$-aQwmzD-kJP;LrRH5C
zHSc^M&1<ZzPjgr5jcVK!p{uH1(Ds3bEB8Y8M%+dBzT?!rO`W>eU+UgCse7kN-TS!I
zz0XPATlO2e_qnRNH%zFIs=60%KeO%)@Q~b1-V5i}z0XPA8!mNkBdL44OWiv|#BrzY
z?Jn>h-P>L2-hl#rcisE7&^8|3yIbnswL<sS%M~H3>RuVbqkF$@NDr;i8?;&~c&jSk
zaROIW`MOecDPM#?vqrCpnWNCYFNz>{YTw~f`z{mt{eA5#Q>dza54g3e=W)k99-O->
z-f-hQ+V>5ieU0BWq`y3;H$i_d)bIYgs9&(S)Nc)4D6^{S7xV_}^bXDbDrg@4+t)|`
zmI``R{o6_E-)=tox0BGn)&vp4!ur?o>)W_iigmb*a=ksAqH?{dvENAQc@T@QaOXPR
zZE!*EyiZTGy*-lql4|aSsmG}ZXq;Qxx)0iV65dXL>YpK)c=M=(jn{(bkZHf(8un{4
z_G5JNIQ8AH4}!)mmA-@Qi=x;A`r}vt-+BOAxfOkKKyPhql}~?Z2$L<(l%v{PQT9PW
znj@X9!f||ZQ17ZEcXf|KVYi~KNA=c!YLnG_y6@V7alOlM9-8WI)Bd|n`MrkXxxe{3
z(4W}{zuQ!Q4YcQhY=!<_s^7ei&1du7kn4aDbUev`Ou$HVN-dyUN8<Nlz#70Fz)8S$
zKp52YNi?2iI4FA^Zl3`B2LD+j9jOU3DD!^RLg-daYh<m(=tWw9`w8?ev(dYRpm$jX
z+$KO~58QHWU{0Xi#=1XcMPS#Bx7o72x7h|D>_Wf}z)=8q5FzLU8Uiu_V*rZ*>jAp~
z|JN}Xk4}_>^8f1?<f#0Aw_~_6Q4Y)hf8Q~DijKkf(@^SAiTN^Iw;E>8u1dWLyI)tL
zpMZ=0A%e8eG%KZlru)~cj^UzW_)-k~dD2vw%l_)_=?8DpAk+h&$<WUkq2Zs~WzuQS
zXS$_EwBU|)b4N<<k3Q2IH0Ug1Hah1Gu$CqK{3tu)<m4ts@;CZi#y%9T{b|8hOEDWX
zg?EsGz%!0YKG#EAUKgZII00JCu+qyK+Sj?9X?csv`y*Uk+C;kgIeNNHBDm}hdBQk_
z>VARluqEYuf$i1@MI@d#Vph-+pq2Vm`UU#IR66*D-rg1_qTrXY<%1@vWIu;J(Hhk3
z9H@H))hha1nZTlR=q{H~<vG2p{V|b1Aa+!FCrQpxS5Vt8_5Swz1*s8^&v41hsGp(D
zU+N>2k0{_P{j1m!B5Kcxi_d-h$H;yjn|r_HQkV03BfnpAoo`zG<-A_Kj(;APkLThP
z&l$l19IwlCvQ>khu6&_~;C<*5=k;1<O$4w%y*)jWo}66GM3EQtOmnor7SY@%U<0e@
z0`{Wo2u#%jInh*l0gD`sDg0|}el-&u9(h9Ngcqv@evP~~&ZBJ;t3}h%3t03m`&#dW
z4KC9+7<zQ1jBhZw>6j;l7p@RqxI%cp{|0kRcvvREyVht8XORptvGjGMu@@0H$ShP!
zN5`Ry`T_-2kbPNCtox+=E>V5F%)3$eJ@Ux8TjE%ISzo82tvK4<&{aj0=}mno_C+?{
z#LN^%O>gR5wT>bvpK?-ytihE2lU@g@<o~4K{j!T|BI(-1@SRLft9$oTNB_o%X43!I
zh~_*-G=V*FTpN%D7z0?`lTZ8&_g`6N9J*qt^)uxq%22HB;S?#>rf^<Ytc{H?<x$TK
zrU*ja-9x>0nfyVp8?8;5ux*1WoF4v63!p3ebpM1y?mCfef4A`0z><ikp2AR1FLBK`
zMm&d&*2JK=;VjOcq3*Rj(*3Lr0=^ZYpGT`|XPZWqi?mSNM8PU;CbAIBr?Gz4I9z{N
z>St|TBTZ0cD`oY=`UH`k@w47v!z^gBnM&0pXg&R{Kg53Jln2+5s4;@q(_kN>d=m;{
z-EitziK~6vOxD`iWv(!x41n9ESQ`mil|o`n4WqRQyP}lFeT7mgQ>_-pKo>Vs);Vsf
z))_jAp}=N+6fxekS^xN$+O?eFl0NU{>^~}byn^91o@U`2cvnN{e`Y*=$EpP+?Hzz4
z`Ca{h(Q)*9eZ0~54_6C40<DJ@YP_6FFW%If8-LBEGdJ}vTClTDyxOQ^WKHV^)fmv2
z&eXR?Q~g+L3~qmPi-o8L(41HZQ~+&_wI(u0bP_5B?$C35j-|GB53Z(sQrp_v7Sx!9
z%`+npqJ%hWVyHKoRjTxd(&Mq#*eYRoQ89?t##!U4pt-LNqKY_cd=)ecdk|@LtZB9c
zkuX1T-4r^^WJ{o&I@TA=4FtM~);$myN<noYSq-RJU27Vg3DVhE*V@BYKbf-~>ehbg
zq|~$Ct3*0->RD?kN(dFi<NLAI6p!{hh$`c)iArAzs1M{|+*h@x83!lJHyD!_n1bl9
zC_o%2`lP<Ky=|<B+EXH9DJj94SOv``0#l9&_&RTHE{m~Ll7O=CMsrz=r4zo~Toz*~
zpaI(JOiF2hB6(V*%tg{n6v@*xtATZaZHhn}3^5o!@ac#-6p)DTSWaydty6K%tu)a(
z%=TAzSXrEZrPzjGdYf_@T3f?e+7M##HkC`{)rKgKw<#(~uw*A$KgI$1ghtl(b`#cj
z6tR2gS+o6`<}|X#8-GovMm7sfMwVb>;dmo!V9<4+oc)^Y_@R-tA8aa9c1uJZM040_
z;Mm&IiZgik(%x36$-(SIP+)6oB+5LgHR^3S&1r2-quH&k7TVL=nvA?%V*J<CwvE-(
z3uX4Y^U>nL&gD$r#^N@Ax4H9pbcQ^4-T){A@Tw)RSk^aHZST|I_kPspB>Z^)V1)Oc
zmU~O<UtPK6mCEtR@G`(_fa8GcfI2ytgaXC`mIK}aoX_E&XQh#Q9vYgCt|uCAazJ#W
zsVD<#HToBdW_4=%41G=v`W(iHUWkXk;zmy$vURWqBS75_(9KNVYKo+O7xn72gFC5c
z$D$6_k2NslWm!GX2L0X{G1sz$>)7xgRii#C^llooZIv@(cdq>2x$x^+^bNZY$A)v0
ztnVmqN{Kxogpl{UJret-l-T$0kk|u068pZ8SY6rG%ZhX4Z{a$sH6Cx$4#}~G8sECe
zr^{-G^Qis&MQ#?n)glw&VJ9vS=UC&CYF}cKTq@%}_(#F&4|AXGYz1I57)BwOoK@m%
zz<t(W<Bu1q#(mcLN>5sSpEWUIq=?}qm?~{86dI4Wt(Q60s-{bO(xv;X@%6?D_RWHo
zyYvvjTJ^?3&r9MqjS((xVKt!Ty{%#PLn2@Y#9B{`>p8d2QF(7`gz?5DI@8-)Gkm9D
z8IJLZ5lq2?jd#GFBU>M9gmTW2)W=$=p`f-7u|A3Xw;gJIRxEpY?>6v>mwCoJFV?tD
zy7-=O`B#_!mSaHqJ2KVvuFdn$$W$N5)cE`OgzWHLxhx0?IFJGj*@N=(AP5PUJ*I1+
zu}bn#g}~wM%W!%>&l*s-ov48&qDHv1J`#RDASk7U`-=oph9k;!N*ZnrwH6AFT$}~$
zPjBZ~?Ln^yL=6!I*URk76#a&&hKY9wXH)TT>kHU}YBs`}*x(1jdBBiv$Z|`|o8Pju
z)O05Z{bu;olWX11olU4<+U?He`!MK<F9DzUl5a;?18k{+r>78D-XX(_mLsj9NE(^P
z>&p9Z-4pznyTFX_NNWfmgBfWJj+!rm1-fP4VqoiD@;)M7#0ASe8G>atx|UXkPKIVw
zE8l9T^BhM8-5zO;u)E^mb1NFM%YRQglJcz%4fSzuBIFIbaFeWW{kP?<+kY%?{~vqr
z0UuSh{g2K}dUDPgCSei+X_EjUKnMXs50KEMH$kO$gaDyf!O#T-BnT`(X#!rjf)q>i
z3J3@a3SN*RAV?7u6ciM$sFeSAtvxemCPeT3z4!mT_y4?i2e&7C?X~;btL(Z4^tO%p
z*qu`LGE}NlM0re33+q{`SkH?-tmnNFSkH^XdUAi3Ad~fc@)xY<D+#MABU`Z}+^*y1
zLkWrx)hdAxeI+n|K7?J7%!e9CkjaNGir=3PCAj$zz9t`9B8dKcXr$sp``mnJil98m
zhej$s^p2Yky{Pz5g7Bf3%~niI`%Exz1|PzNH1)Y3=A=8-!b{VF`(a7ccUeFn-=JOI
zXA1ey9b1qNfz4+cH1te)#SQS>!cZDIDPlBEx}BU9(bBO8!oXKu-b0y_p&a^xHPG!I
z%9{-3&=*`vz-uQ%IrIgya)`8xgL%B}A$>|jYK$*7PYWo2BH7DFq*9}o!$YHR6_XT;
zHOE>Q-tg0$DG?u9FJiY)L=|TTte{&h<M9#!Pj9I$#<?_SDm2f^v}G!^1`F+F#5<Bq
zHE^X+L1bQLp`d9IiMHP&sp7PVhn>SDtxg!}&D|p$@9W_9y@<!0dn8CSdPrF)`Y6br
zBJ1>s6h0_eVJ_5}i$#OxM&LmjmJZFjU>zIF0zL7a0LLfPJW~t2M!f*AnLf%Lzf8)?
z-5oeziQ&5_@)uIVJ7|XnTl)HGA09;x*ZVxM+`I;bH~{x3mb<T`PLJVx3*Vmj?!)*0
zCdt{|B-axpH}1a-!B;W)Ka?TtwBcH(l@aZ1NrCk4%7_{`c5r1SY_}Idy`I-<SZ0r=
z%vCVF3Mp?@M24fOgsM|*EZsZM##Iq-Vdq}I*D(5CqVcam?d(d+UV|!jC{LcD%HAb$
zcsSLlg#=|dZS=xwdWYCzfyq+77mvuXIwC4<nm5SY<zvD!YkHEMNbOc*bOzC!)iBp~
z(e~95eXxloehuv85E~^}1EQ&VzD=X2)<pEQRx3KTCgMn#HLqy>n?MKt$xRWXfZuvE
zOtxyYWOIZ%MK!G(76IXSba#E$VtE^X^T^+aFOU9n_}+$lH60U%Iz~Utt<k{qaDgN=
zBy)nEaK*U+FQ0A(Zb=*g=3as2Dguk!d~p-}70KfE9&9}?n`jGMu!<5n#cYI?#4XkV
zUg}}2h&~*OU5EH%^uv7RT{UCT&WQJHLv{Mc&WPIBn|Whr#8}(CC>p#gqDFKeM1WPc
zF`4!Y!rt74xsPC+Ym69zZB_MlM`Wbc6U=Pu49iAPgNdCiaC6224~y>xJ>}B-_&A;b
zkFi?XiQ32J!bfytH>~ifl=cz&JcNdQ6w$?TM3Qg{5)<fAEQ>?oJNHpUb;lwJEAJoH
z)X`DWHK$<QQD*KzlzJtHo#wDu2{TX09;Nubn8)~<3io0xd`)ZjM)b*=hprE>j2b^K
zZ|o4JQl9sCTgqXvtiqT5BPAh=r8NWt`?nX>`#7RQ7}|H%KO<Ul`+o7yh~8+t%tI01
zh|*|zToIs_g1LYItVV+vD}lI}$b3bO=gjOh5Siye?2-u0SM<0a!Ip;=Sqv3>{d9n3
zO*#L`1z%$G5?Yda^JOszZw)g4E78Q3JHM-229>G>*YR<twQysB4d+VFMU25Z&&hKU
zk6~{@)_HKE`jmejqqPaGI*;COLMPeR0?TKhlx~!H0oGj$YIgz3%y+OJtT@@{Sr`hS
zSr?!xxM<^rhyk|idDQ5;h_cRSP)D#AW=~!|GKO-#iHOP`Wd?a4(DQ-S@UWecUc5{%
z-e4-^BnEhcOwMJ(%!l^)@T9vGS)OF=bpD%&me}u~b`f=(jU_QyInM?uHa6j6L{-rK
z`itOjTPW-jFy5!kOK7M4H0KhU<Q#2h;7Pi4DWaw8GnKxjjGM-s=osYZ^`}$2ZxN|3
zP5d?@HMXUMzGCT$*FJm)XkFU=E!3;^63okT<|IO0y8CTJV_TV#(yD3StMt*T=APIK
zCd}K?7ijW#5m9+QL77~A*8R?V(d&x;q}Q+B%RA!rXV&q=F=2H?&KxvxYB3J0a<7o}
zazuCQ5A?`ov`+<^%)XX%@-ph*kHW5?7K1~uA`n!uX!MncabaP#gpMqhVCwgHK-8jN
zFx^+5;(q};uEOO{i=v6YfCZtaw%?5C9=65P*0k!QpeU+-E237|em4w9=NUHXR>U|o
z_N`kHXHk%2|B6@<)(=`6_XuwM`xqGWevPP9z}q8PojfocQ)_TMIT1L*oRlb!zdd8$
zBV>w?|83w2o{6tJ_}f?Itd61QJN7+q-n-wsFL>aw`x3`G+L*_AGbD!Ndw)9C`vzf?
z3M%QD0BeZC_k^1Ow-|0S+&;K-aF#r5v&jpD1j^$H#TFrKz$_f9jkeBQcv7orok_Y?
z8-fE5<E>g1+nrMMnpLZed-o1n@kHy7bORqpDUdp!A>rZA20$aKVAEc7PKsuZ+-LNV
zypjF#1}TpFA?>wk8P-;G%cj+Fw33LX7-~h?0a{|@U;(SeXjllA`wkizphY9r^Z+f@
z*-L`Edp7~I`P7Pb1!z?;y>u-=D+k1|K&?uJ&Vo45iwG%T@np#xv)N>7MUMd0=@dKW
z70V#ZV0sp}&d`!TRB|S557c6;PtvhKEivRtNpmp0aWp*M87Z-rTZST5SyZ0)6rZ8=
zAT7f=PUUT>3AV@<6i<zur2HT)DqSVf4m-b95zYwecQO?+n&)9GqopZXDSWtSOOW<m
z7z(k(uBD<IcG<N}R@@U*N^5E>@Tg1c(1}dFY#gz=#41v8X;qRs^q)wmzc7#<@~S>w
zY2>ERohZ;>NuomtzX}&P681I<9;e22(YDgs?Fie>czQQUdl)95?n2*umc-;$#mfY_
ziS|%Cm)70(Qxfec9TH93Tv{31?Ib$x((3a!GFf{Fs!*?F&{`RqgO8)Ui&IIJmgVWv
zWIC9v)rrgyFgDMj6Idna;ZiA9P0`9&M^N(=tqwjDQ?yja7D45g>(s(%FujwawY2SY
zGvd<8!d93OXQG6{0W}lmm0=p?l-Hu1K0#i^Jb;Fk*IHVgw57Z@0L1L9pk>(FC#z+r
zoi79h(xeKSW__$^P6Z8W%N{yYP3yq*t(Kv+2rJ{RN>_5F8{lghT2B=HT88!^QruWw
z>mJr#6$eN1b4CPXaxPOVBU;XNjMSm%ifM3*;WooD9r0uB<<a{5&bo*D`D#AAf_ebB
z>%i@r0nal{`&SRZ??m?<h2uTo;DQDJmmhzh>87>6qBD=ia~Xxa16O7g=nbw1+&H))
zR;nGTJ=drq225ADzMx4q(0M~*T{OUEz<NUMQ=1MgmYaaJg&ooa6&?utqc6U370CE8
zBsrj`O}#2d^Oo7PfT!|sh0+LHykjPkTm!cat{ADeiYN+fqV)}v$&I$7u^$p{0^9<)
z4RCwlPQ%@V<MCZmA8^}qyZ^hf%<U~E6TTj8(|wUdJInQo|AD6YAG9}XmqpbpYa0Uj
zd3PqxeQx5s4Dvt?EIz~;)zD3_>j8)PgB1}A-S42Cw5~5Y*-`s26!!@(c|<es*Z8yQ
z|GTRH1q=IA4(3BuCA@W(&$9BhAQ)+n)8l=$k$k|quhtl!*nV1-=ry8YJn!XLy!P{4
z2pLE8GL$etGw@>2<bDt>(`ju#?a7=iZiQsOn+JX7_Gd%*F;6<-_Ip&yK%~cQ=?OQe
zSAQ@%93AfuZdIN3_J=5Zm~QmfqHNVeeX^ZN=>w2$9qKedE65q(PR5gMf!jaUU5wdo
zzbD)Gi-mtgj)BPb5-#1;8sIZ=Aef|viFnU$G&T#J^v*zSxa+1n?k8?bELhN>6}zh#
zg$J^bRxIx4gkBIUp{8CCOYbwdnt)}x5bYA=sD_uVP@PH~7suiUgJWk<x4~LQa8tpg
zdrgAM(&nZ?fk7<lo72X@TKDYlC5&GQ=F_fF^WY$RQC1I(+ty<FL-2TV+B_BOpqwFE
zYDkKp+v#kwR>4t6@GKS=lmYIoQ?zyns?v~-4bf^k8Vagem4*~MRBP<$E?~1N-D%iR
zaQ+@RQmggB1x}7(_&h=R!_cx%(8gg}mB>vhiijU>+1ugt{V;8$V}ZbN*2Uel*J;?}
z5F5XT)6&Pa%KTlv7h5<^KMrwVP`%-hEOE4KIEt7|TSjOxwzvowIw1)ZI6{kqqU{=i
ztlQA!5y0rAFxt2=E;Gg`#>k*rd0KTGoEww}wCS`kPwRk3A1<+PD>+AM8QEtexJ~&{
zR~DDJ_)PrCP_NXjDlCML2rLQ^a}j|1V=GM_2^!u?8%Ju7!LkY)g>)}dJ$#%$Nt{^k
z;9`7SNjp-5$QuPU;Bthb_>ngQ!)bHlh(PNNIy_42V!Inb6-R5;@FIP;(P)GydY?aW
zc&%Qm5)h}k_YC&W*CNH&VU;z8=H^51jH9*r$Ul#c<!fCq#?!`V4Z{8gwG#rxQkDwG
zAnQqEwCaun0*jL>a~DmcU1Okn{8PYOVx<S-eao@h-)v1r(%;8wWwYBrkYH;W6o;B#
zi?o@*WQwjX>W8R7j@l;7Yy8&6!DcNg%_m4KG!tEt^`b{p_BidsoMBQRlVqPM=6|`E
ze|0hcdWs*f#l+SDw`Yp~uvqY?ls6vj)dqLSYN@t1x)9*sPLJ2Z(RgPHpumDvO~U6A
zPCtWgVrkA~Fs=;RJ6Zb<183tDu+koMZ3=)x_*0IWOa<jVLA|DG1x(RjPu2KN8|O4&
z#8JCxNCdg@=NY;-4fe`b>NXuJ(q*9)J|k!?Ws9Pn)3t`^{=3r=Es8(5O?L(ojw{NW
zfdX?H&qLm@F}NWaXXnPLwFXwJ7vS#=aC_iR!`+0_#$(wBt|44kxTAw%lfjL^-yYR`
z0siLM%nkUmn!X3$({L6S(!jUO6R^d)xy5^is9gnqy1Qr3<V?u<f46V)-w8s`jMLjT
zOaLL{lJ$WC)S{R^(2%DoA^(%$dcb-0{UmetsTmSvso59blAeY4Jte|=!UW*0gWCsp
zmE$~#;}&qO>fnEU;hut<2e$@p8{7%Fn{cI{0wP>1xV~@`;1<Bqv|(B$yp|)}a(Xdt
z`MH~0K3<Gleyq6VAvd?g*UK$!7sILkaIL@XX8?z5)okFnpAXlf*>5t;XvHvHieaWI
zhUxNUn9Y3{W=F*^A5#poi(;5F6~lZ-h}C46FN@!wVXhb0gA8+~Vwf`(!`!48<_^U$
z_evaphPlIsVeWW<VSd64Gh@*_?Py5zs=RnPi9USa;iT*Hv{o3H^$N8Hj^Pr(>_trg
z4KJErsC^ZRTPXW2)yz$p|1arm4?g;Q8LweyJgaRC#HtB+P|CY2wT@QY#?omOCJc^a
z(FLr1C+%9Ly%kXPX)8TBMa#l&G+{;66f4U1VMT2fD+0MND`HBCo`x?dqy03kJ!U?I
z8Ff_5Xr^LDJER}{n33&V1l^mCh*#uWR@jm4=ZK<_Ga#rq8RkcPOllzbQFZW~(%>a*
zowUW@1K}pX{TXkXJs6YGaKfX&W7xuJ3!YPp)4-RloeB6dk9sdj@u)-idr~)L1@W;f
zSV1v<CXiM^nL$kg=!>!u$<%C_Eh!SVj~u%if$vqM&{d}$<!sTB>ndV~B?vQ7a3kPu
zK1<Inv&BS%Z8QO^=m>WQ=<DzWBcv6}Y)(2cF+l6ebK%UPn3+t1TLZTT?joE8JfI9*
zb+}e=J>kZ|F|XJFcL44poCQ3g3|uX^w*Tc1tWf=L<qz*}*4kPtG&#-Mi>1$ip~J_H
z8_o&=-jPzU0b?_(i@!f!FoSGvLc6wT9|Vqg8e5qs&DYL_U|xcm1x>peGB03d@d3IM
z3MQtDGk2ljEvXki0WDpgi-~t>Bk=sHz8jO09bHuITdfSW-vizZk8xJ3fv{z}A<{e2
zLG`%{O!(41(!Rpk`=CA0kn?@x!6WhVIbI5F+oOeI%5U!;Es>Ava~k<1Y2f{vz2eK?
z3)^F9(Zao2fYtV^i<W*25rOZ>&!KhW%gOQgbGN_IF|BgIuUvsksQ2Pev<>()--lv7
zL;3rVkCO`bX{oI*Vx;rc5gxs{h4`HT%Po~e_vaP4$0{(6c~1hYD~nU`cnevsoM~~A
zbw3iviv;^6@pk*Q)bwmYT5qd{(*jRmtGZ`l0rJ={0iG&#(yIMhit}@cQqPO3f==#7
zBRDDSQ!O>~#qwMlKHQ`ZElf&bzuK9|hS#?eK3Zy|?pNsdsTLJFP0+4c>D<#1rD@)$
zXz>(U@~KuXdz?gQf&_CN*KsPhy<n!;c!T6PBxu1rt(t7MSl*K2skJAbe8$T?pTd$#
zr+S~E@H-@CKOB#0?4H-<>*ex4(_)+|lNhwLCK=s9M%8KAXIg6PR>{9D48kC})R(Kw
zX2#oe<}*kOjef%?R+CQXhf6QRacPMziST_89Vq>PmXdQvvJOGE>Q-P*-~)lR$GIbZ
zLbAC7Q_aX7m-`8Pv&>XSqY|9qyHt1p9f$j(4rt|^SEZUWtx1+Taxjd|i*ZHg55PPE
zWoCaaB$@X)hC@E)qj&^>Y~5a=y!;Ao%4_&fP~J;)<)2!Ww6#)&`qs0SPTv0Ia=s*R
z(`tQ*vJYuh9KK*K_Dl5iAr$cgT5?D$SH?G#=}GxJqJPJrf6pD#Lj4lpI=)1|A%W)M
z6`5evS~9F?%lDGKx(0?90P-Z<JZU9SeTv9L_7bjg##)4LA>ssFco@?XzThMQ??0@y
zbo7^mAM;WsZkGE3i997>wU?FW^yg7GfSq?F%N+DxJs~ExYIU0P1^S=|UHL*A>pUwF
zSt+XJodZ5g`A5KX@J#AabkaGRcvMS`zAuP;GRIulJ6DlAu2N!-n9EgDxlQsx9_Gm(
zZu4_wJ%&Y)-z9-Jya{lFpQG$!AONIuYCqJ6bF}0bO8tx^lmbjc1`SirGCF)rYaH^6
zB$7xkp3&lqY8}V?32rlM_a%7ao2iPA;VXO$U*=;0Z>D0B^9Kz4q!Zd=EP%d$Ldyu)
z4`AZ1kQjOn95{%1(SC|Qi9+g?xDGFS3(4i*Pmdr7mk&=qi9r}A7*SpfuEu`acoNmv
zpQ@^{pTbUQ9b9Lm8tRxM(}+`IrpUSrGnKmZ^eL@e{CPn;iXqv^QG784LkTF@*M9I6
zST|lux$zZx<Q;OIMp+L@dP}_};cC1?y-s7s;hqEy^#*Y@-l0_-D=m#{*w7oq<$s6H
zAQsZq&mf!TlzB#rvJRr=XJFOtE1G@=Oc|!^mal>H1fBmH-X};ui?y~VDCaCH{zMwd
zh<zf>Jz_VX#fV)%z0M)V0*-;Jnoet>(VHam6W+|Z$P3b__i3$~^PGgO_J(np7pU}s
z!s*-a6*^f!3(q6>dbIz%mgq>S%r&xmNo5NCb{^IE8^vGHnmX=EvWnN81Fs9D{0o?w
z(<Rb)iNy0dQ;OcdfKIl)jD>X+`ekxl)JEap$>fVz&v{>vhkJ|03}+4Px(LKIbmJn5
zvW+TS!py<Xm8tzDZ7Z%dw|<LODo^R(0;fFnQlB~BYAY~Xkp3N}Solpn{>Q?Po1r1)
zeTVLOM8M_^*(!+R2OtL7cZa1Tg|N#={o$gl%NlM4e2%iOpa{6%@rpJUeDcy2EhA=)
zq|?E#$S)xQyr36D%k%YcYWuyW1u*EJox{>;=J#5h?Yk=U_~M9!Agmc!PSF1Eu?~Zc
z5kDaBfU3y*a6lGa|3RyY3nk*OqJ^tc&Q(m<##g1@TX05h<W<dZmKP%{(VH)qKfb8&
zs#Yf)D$)Lbp^ZDzjelV}BcIZK)t(H4et}lNqpFk>kBj5C{fgNQXVJ-DwM{ml%=#Ue
zFBR?j9ZVbS;rt)ylsBo<9YFUN&AP+Y$74}<0eyn3_W)f$_3mk#0$#3+H{ZvE#ZusX
zq=zS5?`toiAGX{FN<+GG9}_OmE$pNt?S-@S=TS?CRd0!UJz~{OOVD?}|4Im0UYqf?
z4t@9fd$!Pcmec<y=SbLeSo17w5ZD_7gRsEF=X2f$U=4rad?zyg(_?vEktkm%v<K<%
z()v++J<IDC@%J6L(i32lz_o(w3-=V<ya`z_Z}7)XxD#;K;X)^3g8^JaxE^q$;bz0F
zgWCgldZLv!F-yK^({Jl&?=z)zT*lUpyEe$HPoS4AJ;~+JV`4kC<)Gm>Z4xWNEDw29
z1yd&kJ6*8n)J&|o18faVbm&#mn!^;dS}NMATLLWc*5}=$9`oT{8P4O0m-H=``nbef
zk4noE^!KcxmJQxjb!K^M+9-5Z*jPBPAw5#B;_wA?Dr=~9q@LLNpMt$Ej8!<EfwxQf
zSesS)BipA|OC8%V%j?KRU8B59;%o@3WStVKPNrHcxDGE;kEypz(4WV(-&5o^HCB*a
zQhq*auBt4#`xKH@gEeH0(o>u(1vMD^7n+LTMTn&j9w<eV*OH*_mf8qnJ%rn|KTV9%
z>%=#apylX~aUiicubRjs^i?_>rI*71y%wd%fb$2H)-&+QDy>(sE~9>>(POG1@1Y^v
zU?U$Zf*=v_(?L{NTCZul-iAIdt+#MKD!H(DG}kI-lFO+_Ii^bx*D%+y2Ql-gxl>QI
z9-};`o>*bK)D?$pEze1xa_M_?;AZV9&CE)Gowjg7iaG*BqhV#|Z92vYU8TGHnM)O-
z^~8F0q>Xc+M`uZVmcSK-AoBwnTl4_cm7saSQ;D%u9z+wP(FXNtVKfqppzZKEROdLQ
z=o_TymykzVwA3ga7u_wg{9$oPxw+pQ0_O#ST*o>=Pmj^7#7>v2W`l6EK=^#aVy@#=
z1TUog7=4IiwTuZxR9xF5bR|Z|c6<Tz=>(=d2IrK~UyjX|y4(sG9Wco@HMpOU9g}7q
zx>iO{t#CtP6~YEP=Dl9~6^X*rb-9iO$afIci`7$b32Cob>GkokdLnv#VXU6&7$dl*
z{xy&eGVUO{5v$*KtdP)3=z(0vN=eCD4x?v?^j8A`PZMx<IS}PK0f&HWOnSA@<Z{UE
z2(2my;<5|IG;|g}ZERW}qv>!tJ*w<i5?mFUr6OCdCzI{3?de`Qy=-iFN2ZgS&^wj!
z!ULMaJ5y#HQvXh(mxcAo-OML%Sm0p&iTWWJx28TFCskVpU$&|PA0kl34%4`T)kb87
zO%}QjkVJc1$6G2$ec9?m>%{4)v44}+c~h)iu9K(dvMDoO7sV<b<7yCI00xD9LQCTH
za<To}a}{j1WJ`C;eC*-mmVH~2JQ$C{b)y?9YEXin=y*<$yJHA@nm;Q6&HsXc*)CGM
znlOoxpf|P6X-~#wy{v1g1SbfU@@RNj{2@N6+(0VM*907kw&K>xb<C7fWG3ouumUkL
z5uN6sWr>iMV~Q>%>UL}TQfd7+kdDjpv`=*-h1@HkSJ+O<budM2BbQ50DKib{0Jy^#
z%<0dUBu)m>=xKVHcuaW9?<2tq6}n+|z`p=0bfE~J(iRtbwwhr41BAhs8=4&FD4lod
zQL%F+826om4|2yd-C*JlvO!Nq$G=AvlJ%6>hXm^-B=DqQg}ceaPW>1wf}REzF2Y=v
ztf$5u#7R~h6%1H}*5T4~x37}Ye;P=K?nRVwkoeZQn;8NL&l;o8UKKD~M!Ak{h_sRF
zrJ(z9Nq!1AZ*y$e(PQHG3({+W=Cr{u6p6QKv4Xe-D6x-90oEh?(Uu9aJ1Ez&8R<2n
z^GL|iQiAg(cNQznNiUDw2M9P@z|1lq#fu7hW9wj=h*0dgS%#1E2|<dMp66mRZ)#45
z%j<1yCpyt{iFyL2owF<GZ9?WrlLgV)V-c}<J9&Ktki!bPQ2}YKB1c7#$~o#)5x&zj
zv7+8GWVw<GRB#gbOXyNXeN=@$0?Wp|Flru#>-eg`6o+~bx#!W-sbGP50#?>Fv%u+e
zI2DPHq8q7t1ILRJ+FR<z^C>PIe%FVGv=wlO%CkA`ucUVfsVJ~u1*=W<()2dYCq<UC
z1zaPmI?BK+kxv#aNz-vAZ%t9Bn)+h_vgK$JwBnToP^RE~wLs4@*E%eBu@-m;;k>MM
z8sFlZDt$Lp@*WQ#t?#dtz82>4708!2RPwS_NlR9~+TK+Zz*o$G;jow)TTINXR$^vB
zF)_1Ri5XpZ*@I#xM2VSQC5V~f62%lVSxU@wR$?ZLM9gF<F_SB}rkJUv#7veFGovKb
z6f;>QVy2F;UQ^6u5sR6urv&UTW)2B6dmGXq2K!I@LQcndIRRTatCjpYD`*eOpC(HF
zydfz+D1Sb2%b!yc{-FH9wtR?`qE-#{#kg#j&NqQpHH#cM5CS7;e^Wh06iO!i>C{O<
zUCs2wc+a%tl&Z{Zcy(UsglrOzpjA0YW&|D10kbX;4D-GomV6^{N*?lMC1vO84e&_E
z)4BRr5HsDH>KV<?34WbWk0N_U{OYo2)?D_~;vCgwD?FBti=X?_RK!2wqE|`(R-$Hx
zt`7Rlt9*PTu&Mz;mgE3W6c)sv5b9M0g^;p;LKES$eNvU~?Zzzl?q+%<f{r!QYvL2u
z9EeLPt2qWZ3f)}qke-{#h3Xg3(h?fbCBT%Y^96q0X0v>Q=RB0E6^KW-^;G9gDag~7
zB$T{`TXD)Gy@lSzI-2^m&^zFUhAl0?-ScRF3%!9es5)n{7_w?EV&pm&3-gI>si)Zf
zs7j4m>d|0x-C9D?{k1B+&{9vXuq=aP?1wzr?&hi(nUV_6z^a|KL3Fq!n0r;a))Hl^
zD4|^ZrVftFlW`f8)=IAue^?UxDlp8_!Me+O-?lq2LK0zxG>`II>E&$Kaq(p<y_%!0
zRGK&b<T~b|lp%O%LQgGQRuaDlQFq*O36mkoC<14I%Px)9db0ICWe$V%Q1v_>$n{)o
zsTatputIx}`VE7Sn@!VO>xs7GwdlvidPQtMKiC@OpHEj>BX9=k55aew(jG$JOrh5N
z!4v5ZL5$g{@F7sOmDa-N1bw35%`qwL(W@<%8g%|4Xwu87aZm**K8vkz3TuP3XHj|^
zNxO9$Jux;*>Nqx_xg{o`e?W-4GPtsXCbvP_X|#orrFETjp^aYFxk{>^i{7Z?gY9Um
zPfb@;(k<XVOh%6#_P(G8!T|fwvd0qdhFdKYX;)j2-(b4d7PK*+g4#hpcuPvN%l0iO
zsfV?-wKAHPJ7zxRv;*>d$^({Tfndd>VmYFw&o5fl4uy0c68IY1X3J4|8j45Nd^+4-
z&v10B$!*a>+F~}^Vm>)KumUDvrS|Zc<X-@F>VVe%obo&9F}BE>^y6}nB<@-305#{h
zL{RL>Gk4FUE{F~?_U|?5p%r>U>;*x4Pz`>I;ydawA*!8f(25m$IeNIGZn#_Srmd`H
zpUtBVvrgJi5~>FZRq8Oq=2Kxubj=ak)e-8(F{!jtcGgJfjgC-^e;4pGw%M2^ZfR4q
z)vTn<pv+Fv-mN=94Vh4bo8=`W-$6QtC4K}=R2Z1FNpzhMjBu~SUm|{9FLA4rbgUC>
zn6(173z*HY`J{K&TSmSu;3^WE`*BGP`f-<zXQVspfv%4wFi`?IW0kb23D2jso%Ig3
z*qUU_gFaN6oL!*%<Wa9K`eUuX6K<(gh#Bcz3)v4PCx2M2IBUYH*F^_0tfdSH#;aF{
zdzxXh$=X$q%Kp_GRL8r`>Ny!&j9xp_gTB-Y%i>feiSyJ6Nu)IvLTB_^H9bMgys(<y
zJJ7oq&FQKaML?`H?+fPmNzuZ-`szT@d-ze_|3vp$4Bdyf^|B6Bd|S`kfLFy_gs=YN
z-cIe^+FODy)W<smb_WUg&VqTj^SJe)Qkc^z5BHzwMZA?~;3RB#np6}$SpPU23yx<-
z>#4Pe-Q#uPIqt5vSo(@z=_V|r4~f4bn%9(t?e6il8L?E5qld((%he|c+B0tQvRDS-
zNf15C228yDi`y9UA?-%e7B{R>Gn_j;ov)XMT)L63cX1Av=o3J+ra43O$QZp!=u8P3
zWTBq9dYtw3qV;27-J$f?#_2D2_q?6Vlajt&!73;>zo)r9lZ`yT$gS#mb<fj+tU>Zz
zA-B?+NmgpSR$o>0)D!x+aO~c{HVNkZ`ADX0o>ej1f$s<a>pCMr=Efc7LVNJ)x1Jh(
zN`m}#nLUxzzL{Q)j|Fh-F9pS4nb||}Q=lu{j-*DpdVQ;nrcZ%3lSuuh!r<*1#U&vy
zC?)Lb(Xy%Fl8JaM1l%S|LhD$M$uWLaRxVH?-UHWDW12|to6=8g5^ZyGVi}ZJON(`~
z=&UG!ra;Jb90L1jNW-Q<)Q!VU01$QmptaK=s-7#&u~H%E{1wX!bY_|!(|o1`AB9!C
z4ee!4Y&4X#`NnV4D*ah-mt$Ra4rEwUnU>`8j-Yzep@-ZrO}(e<(Xk&1+E;jhx5#Z7
ztJw_Pe>EMIpG+HpjnCof5I?wMYC0OKrPOt!n<i0LN}B;T-9f;7i+HZ%1Zvoa9+`o-
zI6X20y27s#`kgf#ORag9CuB<EDzL`UMqpKaSjxdOeM;ad^gh^Ns+#O5P;W>QzgwwP
zf?m3uCNZ)RLyZ&_<EAwcwr#;tT$R$~c|9;PR<MEvix+HujiQI2*K-|-0#En$mqzQK
z*AtU71+3=Ym|?3-)GXsg75)3?^@nV;ODhZZ1?n~v_Gu$pHB)~H8cyacJtNbI=3cBG
zz=Xsq^nK}`+5r<SO)Pz_Ei8#PmeMA<|3HPaKw+U0?RQ&E*cUmLmmwt8#7hG@JPY#d
z2fBj9@rivwuM!t1$n^u-SQpAcsy!%N1AzquP^or$nb_{)n`j-2X{-q3$~qhis0LK{
zf}RrUMLzTgrWvwTa^B`|P;}`9<ms3s@S5J{Ye4#JJ+(|i8LnXq>1v+R@+O8Emn7$K
z3H4Vw4$$P;peLeLv!O(M6{A#+T6B0eRF2gWp)%;2l_y2lOsj|JE@OWbqg0L!g2v~`
z{Ab=KP`f!O>`@vtM^8*wW%(C|@kXzn!98DDV%PEN84PPcTjrompQQ71Fxz3n>o=G+
zlggf!P<<{4xB+G26Z@7lMV@ViWuzq=DJ$*IifO^Txq6J*p4C@tr5#&iIH_a|DMikm
zY0F$xA6+>Y8TFA+rK)i;I@4X`;20%fKKGaFP~gm$^rkV-3RsaUcVK_<>$K^4J=Qfx
zJSM@aKsB4h6GIJX?@LfAE|5MC`QvcjJoswU_<8yppjua<o)X<cs_id{8pYC^%ZykW
zm8gfwjY?f;av>6Uh*lNqk72IlE@GnHT=OA}M!AtY%-40ivej!o5GT{b`9NGjC+EXZ
zd{44?Da2zaz9W9qP`sJM&1D*j!Lp3!Zz!&yb}#Ey>g|xIvqJtA__?fhKtHg2Ag~}e
zL&Doc@tmBwrJiYN1zwr}>t7KoLV~}~HPb8e*K0Pi=Xf(6H*UsKcyp}jHmAJ{V8Fop
zaC5zVdcGvvKcEFH$F4q><4%Dq%aQwzb=1?;Zy^>}4oJzmh~=2;riDaWvJhI4i>@pL
z>qw-mMIgaM8ng(u`Vqk&7jg{-#*e{x6CtFc&=BkSJ89J-7`6YTV~g~Z7LjpW$7f)p
zjP%w!pe)0<^VK9DShlo-ngOs{f+gH0q{~9;P8>BNX#T;3n;ui0F;WkIlQM}4ad{$M
zWZ6zgEs;+0XCK`nSR6wHUm1@)yNBzx7DJ>ZQ7`^<qv?x5BgbhyfBr!S7lSN<=^Db~
zKay%r3%QGGhK1NH)lf|yPrP=HXAPI=De-S7aoIcLXha}}Y$;0(a7AxDD1QkO+f0S{
z*lr}z;}7Y$N_dGLnie8ux)czH9S~`jcV%IQ2hCMUPL1uADC<eGF4a@hUX&JMrCu$)
zrAYJ-0>aUXsh0Vec2K%N3UyivF@1psEk$LDXx36RLogolKrue0y-3RTM;!gQ6zYeS
z;$H#NLfh7U1vG?ZfmaZ72aSJ4FGyFU(jC>V;btHf%R%YM1elMfEN4aVbDr4CNv>DX
z^b4uot9nd?PfW76(_>@LOHm&*Bta$|4NpeC3ae$eV3~&G&Z3}YD7y2W!2J!$os_j)
z&xo6v$Stq9(=;@dPtF#eQcr^6xN14-lu7R_hthIK@UziBnW&SgXEvd0%k`M_B@*nf
zoCHd5JZNLqq|6oI<dvxP3M5=gupTrq&(mba+DS`R=ozu9c>Z=J&jq|h=U1SRJL%R6
ztjt>}eI<01fCR3aNd`Nq*Gh=qN&<e+lzf|(t(2Ust8^!Qx>65M|GS|2+mrRAit8X~
zGa%vz3Ik^od?z7ZqAOT}O#|~2NkSQz%qw<M&MI`(5dkX$Q-Pmeh3V=G0yYiIPv|fw
zdQ-rrX{i!bnhGz1>?Fr)m}+mCs7%XTn)sUD()LOs_3olS>WUEttBkuW;R^n?<@1!Y
zT8}C??+r4|$w(P8j9%AkACC3HOxyBbUYKcH+J)3Pu4!9#^}=d-&*_X&FvHK%e4Ear
zOt$CZXl_@%a#7M6y=nxM{dSwcOcRTiZPJI>pdAHm)i;GT68>VbWYYPBkSIE|Rc{EH
zrN0Ht6-B+?(#QVKjL|<cME}YNEx`cw+MlpJAARsF$^T$7p3SM#lfb~?N>7G)1=kjC
zAl&PU;l3aY*LFNfCg0*;{tJ#fPdIMEG#PFsb6g}-89dfHNwM5rzAX1k>8uA??lwu!
z9Bb=^<z_!tjOG3!(H~^FzX<Iux4105WkB)7;#S3fpBMhib$fj&3bj-*;Lj8TwjGY8
zgje)v+W|KRo~bzST*ZOs3FGtUz{{iv4|3oMiUSW)9M~tO;=o(}gagl09C)7M!1If7
z;CXHi{CNo+c%I_GU4&tn9M~skHwRv>IIvZ5-~)pHAO|j`IPfqD_UFJy1@<5Z4iFA(
zyPcp^?sJ0nAPe52Snxc>f>rq*WWjxe1#6z-m$!7ZF50P|#-6b=J0V7K0m&|K=ooVD
zf`0AFV%9tuVuTOWm<DzXb=svjO)oFOYQ32)vtHs4mW__%q2F1tTFm+Z8(e9$YZuh)
zYJw8$R)-LUuI+*hsNw@=y{Z?*@75bTas{k5QcDJux0{zh1+1<aWu3{LNV5kL*^fl<
zI&#=YdbyCn5}HW&&+1O=Fg&INHK8K)`$&HgbJN#8!nDVewYj$D&OF{)GmA3!VB+FX
zu^`r7XHoth?8A6j()|J}B&vOQ31^nTSbNTOfGq%ZJ5Xb933xGVsu>6}_wUrBy9iES
zD#8D>x-1#izEGseYHoS_K*CsF<vQ+Q)UBXSd-Wj|whFkuRkM8IZc>Y7hrra$`=Vyi
znY|F0@6oNjdP;?5QWFEZl+sDLSq}<K87SA0_y?5zF_vB55OA_2ENC?AW4)a7kfeVX
zlJz9CK&2ObK<^_6JN1rz45MkIL`*^*)hfvc6!wYU9Sbu3KGCPvxhL_}lN~(k_NZWo
zc(bW3ezqJ`;{}U<G8JsoVjThEQS`%=tBvCdC~8>&t2DW&wQ+6%gpw#=b-Nd<Z?$Rr
zK2Tenfcb3?Wq#MDYe*!uk${!m#?wQ6#Lop&kJaahzq1&5FdK*M#~So^QW^Ie^f|h<
zAEw+Fl>I4)YcKWs6w<zC9cuI`IAUhe{!jIftQcBz4(Lx>XB7n<)PJzXuuS5aI?t@!
zG>D5=CtE3GlinPA9a)mveu|{!oAmzg9>OFdX2YC^p-WmQM}6`RW7vjK;bDCgkKQj}
zxWrTD7y1CKN-g{XCPe4^JQ8br2QZU$RXWT;wInFr8^mN?mGmQeYRJP96hw_rLd)uS
zL=Oz<Dv-Lg;+`HGk}ICJv?<7l#b{c11f9@ZVCp0_cfv3_e*~@FPr#nTBMf#N1(7@^
zV8gBEGr0Lts8*u|tS;K|No2f$`F?CgaFR$l+8Ja7WBPLp-8c%aHH?Ce>Ca++*yLjv
zvkN3jjJI-J@42+=7@BJlT{@=svA#jgkE1EiQT}ld%Nv5P1}!(l8?+3eUNBPukL@?;
z<Z;yMf?(@H9Q;b#cNF#|QvQh2zl0{p#KzkjnZlUH4+?fosVsA>0LuFkD>8=!e46Z?
zaL8=smwKpElZY!}=!}!P^77F8v>j<W%1MyAevwJPGF?NCj;oS}c`IW$xlVxk{y{k>
z&?MWb+X=lfwzYD3*n(2)b*c>OPoSCpkoXO-qL?Pd<wg&uD<?pAoh{V<N35HS{toLV
zT_p*X508%i;#aB!w~IzkpF{&Rqh%+Ny++$l>P@YE$a+dI=Ncurr6hfhJ5c;A{nS*9
zM%kxePnngkrAGTBf|ul_5shY@!T_5=hdI^xKB=;Zc~ShlLClk?{*_;<ZB?o>1+TPE
zs*il7Pql8KGhboW_-hI6?ngEoEZF_J3-g!NPwS-|@X0?9MQor>r%~m1DgQK2WLs$Y
zX+6QYM{q`Cnt>0TDGmZVfib_L8>jWCkSl`Gi8h5BPFE&Iw&#)x?(~Zi$WaxK!tUua
zdKFiYpqZB?-w-s#qqrAN(3~^+aBOb;4cO6x1e+~Qb7r%?Rc+?}{tMl}t=0X$)}sRI
z!xpn2(JSBy<z-)^fbD7f*Wdv|Bzie&{Dq!LacA|q`V@go?$f8=z=0zu=Up6^m?<E0
zMSzFd6uNOvkD`@l^-{PKddpc5;uP9@7Gn(|XZ4hjLW$(${y?Li19zD#kkXu2Wg2u2
zvoNpImU9rA?@Pc~Txi8C#H`2boJUIf9hFiLcRIJ*Z{m*#s(@LFdX|^5{%xu)uT#!>
zjOY&qrE?I^zVcMnHkx=Ilv@>-3SvQRUJ!T0JDB#I;T;dVgBcd=&7WKPbvkn%Qw&cA
z(C>DmMwn-Xq6+0)05efBc;AzncyB{_7f^i{Ex7;!K1k;;fB+6jL2~G)-}HFf=fPzB
zj&*!oIQ9*;3+|J!=|~_9XX4aQ;Zg6R{BQJ_XqSykbRE~OED7XIf;>0*vz3;AgV~Oc
z>EJi0&u%=jg*l&KioFQYyqmHv>UA8U5>M^N;*q|WCSKGFD!@#GM?L4vxg-a$zan-p
zupW`?;HiEWxh|m!t*PB5tl@T(8253?Y%!We$@xN>d<k9cqJ@ljigsPXPVXCnm<i@n
zR+8d{78hN+1lCnua#3u73xiYX-|8um<0OWu`&0;^glo)(R%?OQ#x(w0y>av$cYy<a
z7+Rr?cHGq~JD!&?r6X{%$rSV*wuWsJa3o8#Sl2G`DEptcN`5YWo`%bH@V+$E;XBy>
zZwh#v5CZc|+|czMRLajtzpR(bIw7GRWdw8M;#ZQ6GgeivjowSayt5(%7xlV~8N+X>
z@Uq_6_8VS}yswwd3YC^n!<wnsDt;yIil>y|P04jd&xrg(GG#HR7{v|S!rn$%ns7y*
z8DbM!E=@oEtjAWciYFcAQu#2;tR}FgLZ&>uo<yy`2M=o;L?f+6LZ&Xk>JS|B?E&Ji
zEX~0pZjcnyhMvuJ9FSJqi|CFjg4SOcENk;Fvi_i_q$LZu1x6Ng3pKgdOkk}+X0=6`
zR3zT+2T-hwhW!AF?JO9<k_S_{i?;lr56G$=$P~)+$ts~n;@^VVo;)cYC-jHFR8&R5
zt*=5oil(QpVsE!gLiZwL*2WbDKPE8VMyz5fV%>EW<LFzuauqb2BN%)<Qwf<f6n_l@
z-zi|LfVl&Qh@US?$aU<KQun&1cW`zWunx_@l*5nFu4`ztP7-8pfjmWb5ySdC9!i7o
zpCzG6|L2Y>q=^h3C*Z-7Ez{>I+6b_99Uc4;WICPnpFpOs2$D%$FN@zK(@o;94l-p)
zp?Jw6>h%+9J6D4ENd`q+>!|Q2y=h31fHnH(C%V)1k$4>5s&5m&NxPfKaUIM~<?@V(
z;K=s{r?fZSO@Z`K0;qQMb$wvHmxZdFLO!fKbrpX@n*%0CiY3~;+udS$Q{XM#cuZ#c
z4b1XR1iMHyDq{|~(+#x6Y|6hO)HLS?w4HSlClcI9Es8SLzE1~%ky%R0q*g3=Y@BwZ
zdL-~~;#bB5Pm1rR*q<ShzZ9^^t#*r_TUWK@8#L%=Xcw>0m7l>jcL+6iuv8V&RV?8%
z+$RdM#A(c5KuPZihO&8i%-s_|>*$Izy+?C?LC<_C;GXVQ=hE!@1%2%jNu^e8xDq!7
zt&F#!bqW6fEMS#H<%*ml$4yk^s-T)zA)gXI3q4hlW7O*=wCh|tc@wL$j}*&jxcJri
zXeQA7VEQ`QsBC*KnA-oMC)DUD;mShe0kTy5>i9VG2qiF;4a%L;C72#fF%qm}=$%^_
zeaUp@mR{4fQ(~!CvAJ=Eh@TIFEBf3(ng7x=TrUXN97|7#pBF1sRR+=If9W4!n|t=J
z5aH!RsJDRyVh2t96)jsoggHxVuSQXww)_eLaugl>6^t9ooAo!nL4`<-u?ER{B2x&n
z!&(AULZ1iycIx*V1mO&t^PAq%Sx3^146<A92XaBS0VA8P{033og!JF>Sx=e2Bc~eF
z?sw4f3>3>@L_23lyt@$fuX{y(Zz}v9B$H0N5YJXIjJ|dl6|q}P{{!)BQQ9A1oOu$@
z6!mo|{|~*uu}{FJsGmyKJD9>nZblSF&EF~K4o1zp)DKYS&u-)ZXd%qX_Mpr&Xx1Ib
zhz6nbW)zCoIg}g1l<&-FXVCdO5afjtWXku~aq(MeBRTSzq-qYC8KFJ`emM=h3jzP5
zJ7-@``E!U8@Rudb6!25&x4RJV@sgcez$c5x6!011w_)W@F=W)@9@fk30yYKwRN8+J
za=9u>iW3!C^(5Go@R{N_S*NPJzl5JbkK6~Lw4^!r!Gj;AgZD8mhe<?}2lp4h$s~Of
z@(B1D3*(IpM^C{t<$LFay^Ph^z-YA^iLPhDx$zaD^KhRpep8-L!`qKW6}%Rn2kgvz
z2{py}C*n87`F_btiF4j9GK01ww&Nv1GsXD~`ps&jIA;jhU!1R_Oq(IZ)F{>{3(D_j
zGwL|r6--lv&!F`-V?fr@FfP0)!e0}=zX<Q+6P1PW49W>GGWdFm03#6st}wuea(*Zg
zO{uL&@cjTI6^rBN1B{f&wSr+v?HpW}bWKl4ep=wB*#1oX$zFv9qoRE|1J48n8V#Kv
zNT9#){t!3uqKV#-Ad}%VqTd3I4z?e{*nm!mye7e>^u85F&oTUjz)kVpD4gB|9M<#x
zAfvJEN*EbG>1Au*7c`T2eigq-g;CN-{sR2>FzWr2p6vKpa7_uW(VSqTskIy(4mQeV
zrAkawb|;D7Bun3vJ<KzTTp>oHBVJHV5gtXILW}~(uR=pkQRY~9zeDFkjB<`hDTXPl
zvDG-#h_Ri=$Y)tyjXH%IiMH#Z^mwAs&74r9O0*`COkvG586QTU-_Xm}7$;$-v~J?Y
z^+@Yp;y0x=cx2h1^~$aW0ylY2Me&>B+D?PQj2AI8d?O5mToZR7B^lLhts+E<CC5iF
z$(q7>u=uNpjAW^m6hR|hMzW)^!25cob1hmQZj`G%OTcR4nulvo@te|ly!a17I=|tS
z&bbk);9Vum6wkThH^p-{HIFa~l0Or$sQ{c2zbTi$iBNL+x~fs;ZV5N#GVbz44NbYM
zWSWx8-2Qv1pN5flgeGekUbur^GithONF-A%pOi#Rv7A76H6z1yN5JMNxh#HDEPqMu
zbfb>*Wnu5UKVEG+VrzR4Ezyl~w(c6eS>EVOp@zX1;Aa^q15{PRh=FgsVWc=VNKs7b
z{Ti(|jB?hgMSBec(z_k~W;YsCoaW$K`b+QE1!hX`3|#Sj0oze-Yc%&a9DH3bDf`Rs
zO?1ALkz(8GP_}zHN^_u;Ycy*AJ65|8QzZCpiRmxF578_K7}*MX-(jTS4V5zvBh_|B
zBjXRfET%qOk>HzWC7vn4*VC{_qo87aoeOVD@LS?HjdkXe$eP8uEl$UM1$si|N`m8W
zt`Bsl<15?|?}a<qBC(M+Mj0{Ddj!Q_xC7->6lgraP{r_ALS;xV4OZJyqL>5hw(cX`
zOW~-o(b(FTR+Yx+8i*^oV6aC%Dp5>%eoLpHOB?NR(?O2YNK9TJ!KNr*C4N(sbD8hy
z)gmVg+?3@=X1CL5WL-|yXbiqLd~#uvf3^5cE~@J4FUyhiqxbb1cwJ$7G#K&$UCedY
zR}$4^$VbI*vdukYjY0ixYYP*MYLTBys42ns=whLdjxlD2oUn5vmZq~e_1NUA;xVOp
zU?~nXrTM??^w9U9NM|WA&ue@m_@+FsC-|m3Zy-Hn%5&6aht;U;unL|j&~MX?x<*Rn
z+mf8WKrbt({`NVNVd>7~{b;OF+fhkyO!0o3R@E~G)Yc?9Q@ocJzrT1tEHKm7Mw0c*
zg5O>>=$Wf}LfWrV1AiMmQ{wnb``fgoKE}^BJAHpuPmYWc6jR*4Wmn?<2T9Hp_t}DH
zihE=aao-tw^{R5{zf3#r2t<3O3Yy8cHq(uAMh8rm=EOk|_K{FixDTOx20tWVQ?_rW
zRRH4>k6m#X0gsV29`ZL|kW4}vCVrD9r;FcTvX7un@nCcVCCC))&(ZXFqiM(-0V~tI
z!4mP9;(fmOO*-{W-Xq=>Oxfv<mn4EI<fl{X1PJ+I(vGH(Po<LyM!Cpo0yc#_n$rk{
zVbz7=6OAgil?Km@M^oQKBh=cTh9!bk0%dL}6oBVwOCnnQB}1fp=C_iPDc!e<Ur9Bd
z+4-mVP3gXv97#qM*G2)Gyl1iaP2oP7dL<bzIu{GUnnpO=YHR6kl96HiM=25Xu9pPG
z6!a&=Zwh*x338zmwg}ji^RLq?7rONi0h@CEbMc#U{yXve%lWtIh6`2rSb|J3|F-x|
zF~5njlZ^sbwg@Xz%y$sKDdxN3VQe&ce}_~jvz7##QvNydn^IoMK2yrqp+{0ctj+1^
z6bvgc(!Xm)jpQ{F%M|l%#BYlEc@Da#8P$?U3fvqokBZ-v^Q}=T-H4<9<&Cn=AmNVw
z(*8>-EDxcLTbIfk1-4rbs#d|M933U62h5vic%=bhBP$pcE0>b6w`C%L=k!CxzZr`F
zF9T*S0RVpo@Le+`T$!dk2eL@~+@86P%OHok6jaejiA@ynI7~b}4*S>K-JeD|6`^x1
zqDLyiyo#gg6^%rvOOVH4W=U=T<SBtg^nOL-Ni2P5rlLK&Q>Rp;X}Q*dXl|_UDgMTo
zfJ>yaAM3G>HUdyf_&lxGold42Ev*B|RSDEJ+8vz*FKdy*#BYvjpX8)IS5cpylwS!<
zX|SNGr7upnXVI!k=wPfS@2?E56i;DQj5=5e$*E!t!8+>tDh98ThE+usHqhj%24Q6>
zHXXhxRG4mb#}Sn)>Bg^COr4z1K*XsOTivL`9oD(J@l5Y&Q>>Pia69YcKsDS=IL~_b
zHo!e=-iPq_Ww^kpI139`7p^1RP`F8Oi{UoI?Snf9$I|-;ohmyZgvPmbt$l7?YhMYv
z)=H&ojZ(VSP^D{abn9AAC|zq)F<t9vrE7%?b(*@?Uh(_uTF6>-t*a;s>sm_P^VhX%
zxzq90wGdBqt%*w4QfiLBt_2j<wIYwWqkD9%%etCLi4nNT@b>vATefo=^)C(mr@GR%
zYPq$o$`Z%aw)VQUtv*WIQd*&@ZRHEPscmtoS=;I)a8uhtGNNsbRNB_FKDlszjTOHs
z*;H-)wJjtq+SV+kZN2K&wv^duE+lu7Rkl(w}`X<N%B)YP^XnA%p+?K;NAP!RN)
zCQ!ZKAUy}P|JGf8?x_;>&M|6WOK@S1@fh}9T+4yAu%O75Yovw&vvnJcjvX|sjZuKW
zu(pQzocp9{AW*o?aD40S9&7^TYhw91CEgQQ{3d~O2=~2Ls`yE|k~T1waFalfKfOPy
z_{q8d<iy+o6#g<?;B+fhNjC;rJFD^igc{$r^TAZMx=}XpVVh;ryB&=~)-rL^QB63{
zc=n8Cy8gQ!XZ!Eq0&nJpyZ?U;uCbvsyswdO{gTe~HBu{NwPO24Z5d%fH9-9S=L)~1
zw0<z+&r+v;MwH{rmK;Ov!C|xiOB&w~_Vv##X=y*BY)pGeKyA9Y1F2>g|2(p%U`cnj
zc!IriF7w5&mU(UiIh=y}8;SKQ33xQ5v<n;RU2b%XWu(B(&5Vo1Z|<z<ix*8GL4TAI
z4wYyuK^NnN6M%mZ@KXY2ePS_f2iSJKC4JrBcpPSa>j9|c3d+YP?OjRw2@Br=>rqa)
zwkrhA=84+*wgPXP8!4^-A>c8RHtRN9#jo~R@gm5#;%|X{CVcKg={mgY&^I}5{uPuy
z5ObBjU|xw?LBj?bQ*lGywSh(#N7dHc{^|~OzBjpQQKvyhn=o|gvXRDF)L`N$ql@>t
zl;X_I!`OVUN%`M&MT+kYDYgFeij<Nr#Ty7VH35vRV5K#y=)I0c8Ef=W@VcvTJ+bp{
z1l$C;+0XLRlV$PDsjYWW?@30uRbPOZy!^$T^EX9`yWOOiAdPO4;#na8MT*HiCD0_r
z{^B=D@l;C+?r$X2yDgC(q{T*pZ_?sW@td?*UD4u>Vzf9^(c%ZiXmO~b#hq?i9HnS+
zl%mBMlJtYLI7;9qEsj#O_=12<TAU(&lNJ?eKS+zd$uTXCQncs`W?CF2v}ilhihdku
zbdJ0ushh-jr&UqsLB>O2*1sug93|AqcXcO~>WiT<vj7SwABTopG|xC~eTvS^gCKv7
z9EGqhpXx?)3q#6Kqe8>Lw)u92P|2UCyh39*HXEEQG-^6iA7<)r;2lP6Vqo9Pd?Pg`
zO@g??6=$z4{;KrTF>F}6d%_5zdGn3(uxsC$j~vI-mH9?@$CJe(O`}dP8w2nZ$U84X
zUEazm$56-BMrr&VyTGWDwo@Y50!~{~{B{&?6}VLv;dpFsr97Zl$n3%8UFb!XAU7%#
zE;sd7+P=U@b>?+v)M)TMHYU|_X;u1d0akJrND%Mi;j7csZz^RjG*TUhB(WF0Ea?CZ
zTWE~J7T1#tVW6F)TMLcnVFpcH1mAL6vd9?g{6M0F1Z3Mjk;YPM<+yx2#d%AL8-q0;
zK1Zm^@Bux7AeZ(C7f<b&WPLF~{LcVQ)uI$F0~)3hc9Ag_2idX~qw{)Fx5damiO#Su
zmF_M!ey|>(D@)M02gtS5NX<Gf=_JUxL|$iZ^%&>F!`d_o4+xGq)DO@^M0LLKsM%uN
z>&)(3(fdnb>aCQZ0x2(ZYi1B@=o*5o-;n+aO86a3eg(cCY1J!Wu9v97tMI)~&0jS>
zw05ANWw5Uvqr7EC6=y#w5!Y0eh-vOI+PKU}4H+Oot0==2lt!gC8Icsd9Bbl%IFbMw
zi=bi4jSddICl}Ks%(Aq9Ids_ORAB{d(>By;g%Jf`r<Fz`9a?Av2C)ojM{`!7%ev9}
z6)4F?a;!v2evp*>ssAEJOpUx&ER)}R(hDn%N3l+F2{9d?_2M$rk^aV$5|*aqS_Ol3
zE)81+E3pRNus|7Jp<}Cz0gmQ^tE^w1%jrR_Ujx!PTJ;)+Mlc1fHfjb0_r{LIRRL)Z
z`~e)ZZ-Qz3YV^%!0rM^j#bw#n9!6VMqmfV1;nm1(KV1SM?(fjo7|&wQ&#X15Z#=DC
z1A;kC>1*NphFY&RMmgV<w7S@yw4Ra*Fqv5C;98JdGTm4U#&NkX?RdjTaMtU~MR`HG
z-6I#Wsov|roJzf3N4HO>!>^+Zv+2g`==OFr{%`PgrG<Y(w}(=<b*NfsKX=ve2j?VX
ziwP){_O3GqILiw7x-HK-*-N$Il)WCAd_euyqiW-5{CW`KB|+qC?aX0&l-94uuuYbr
znck!m==^#N+w#=u@2Gbb%KN+VyzRSwRBbI*+kPa+8we~-lix5>qtgd)nba8d@VZ|H
zNd2+7_skohsks!k0d<{66E~oCM9Vgyu006TZ;9CPo3qh)2^R?--U!Tcbax|6rq(p^
zO>|m^ft+-qAEmdZ_Ys6wz0SXhxrTF;vk40y=LAu;3~$9gN3%8=i4}elaE>>=1o@zH
zBc7v!n^2mD)M>L371C@F=NL$h3$f31(q<zGhRrGj1aux`r4Ai}opfNcF&bPYdkZYW
zew4Qb<LXUXwFSI;ki=C}Y)q$5QrK3bi*ta0r(1g9`A<*#d_ehIL1fqH+Ey^-Uj(D8
zx0pPjuL(?%F&Fz9Wxs_EZAAUvLWf?X@oyPbBJT@Q1L-wJ`eP7neamR?uny*0@WB)%
zdW<5++s2VFmS+jOjIxV%7)K$%j_ttoOn=Jz5L46O`5zjazytGk8c&9G5H`YFwB-%s
zT}B*uPy8<9CvXydH!R!c)NVI?7b*KAgTgAx0Oh8fya$gFU;oHxhGCht2g!%h!97Nu
zuqbJC3(iw?x`1arllB^I(Z_jvkp`{>{20X4g${m<{=7!%pFmxlgqzrI;}VNjxVpqw
zW$Xz#JrmBOHkM=yk3jSBtukxicEa&Pooo-g?^3pm!{2<J@)|h4O8ErbO*o&+WD0zB
z$3Ima`7UL?M|s~2=!S5mp2vj|aBbm6z|DqR2e%LIBAjI=c3!~Mg6jx36mAmSO1Ssn
zo*Xc4%n0;iq(wIw`-#44@JHeg^r<=cPxPga*BSkzzZbaw=sz`p8m%{yv6Whee={}w
z-zj1EuaV&&`LiTq4*wtf(uMWL$b_2$pNQ$q_PB*K*wP0_n>}|>{Lz<2{@s{iD{^y%
z9l{l~ixMTy7Hq-mp_5h{Wy-SnxZ9u0?$0DXax;n2ib?cVOro!15<?V|sHB)ghA;`+
zHv{O4jaa1oVF2CRXgr5U*Mv<Z3U&Fj2|TyRY{I43#Ad}NHv5cy9xs~}oA^+`{%pb*
zmD$8*W)n5vJ!Ax0@jO@1VT}G>lyw*+n@0-|Lw(4jgNKbO0eOS1biZkUlS+MIl)}`H
z>kIHW?5g<!Qsi~o{{>jhN7U{JxW<<>`3Mxrvvdu<*k5JHc107__u~U+soqf-v}MLn
z@1sW9w8}%c9o0EslnSUpjx-Zi%L6lL)lu*XgWfp`aovsLkD-X&sM|5{*j_m9f{aJf
zwPPp-&PN`H^q)!-kE3ju8v7DeTR@p#LP{@?luepjfKw=_&f7HSOXDr;hd40;?)8Zv
zb@VIshwg|h^SGfuq{0*6UdQO26JQE4MeR-+e}tkPTVFuw@@UHiZ~&}fe*;eR5%#O&
zbCz~}1JrKx$VKP{BWcw|h<xnZVjtcBxC9g=dI=09h9-Y&s4Lo!&cdt|9J38(A-=5R
z-?0tfi>Q3rg)gh{VG+eSMDIVnXo^|H9Tcj}3mE!v9pOg9&4b$vcL44p+#R?wFRE9o
zz`On4>n%;zA45{Y%7c})h3iS%TLs2YkM!VDL0rBKcnjIqpwA>Qp*3*KkS@aoK99Hk
z;F`hpg_{KTD%^W;hv2Tlg@W-U!PSN90XG`XWJLevL;q*;p+(nWs}7+v*JS`ZZeVF?
z2&LUHG6IG?B>X4Fp4V?IUb6DsiIGQ>f#h895VH^SQWfTGLy9)uFvf-<0oNbKSg+{)
ze{k&h4xI-dy{rBr-l%xXcJlQzM!?&4&1$Vny4Bv)Q5OT;BM_O#)TMq_dmYD90_G`z
zEa*i+j;Clnu&qxQU9;K?!Vozx!rn6MX~D8s3`D}gqeBt)B<~#6dl-ZV;4Z>hW@BRB
ztst~TcD}8`%19}Mdz6umRtz`zSt%+0*h5K5Nzd9Cnn>HNm~?nRGx23NzK=Y3Cg`hH
z!MFu?=>azm&ZCetLm@`b=I5Pr1Ln``O}$ImPvK^zdJcO7$JJPFHMLs9^Uc9D-C<9)
ztyB-d7etihU~WD00eByn$QN$aJCY2`p*kXi{_y(oTpE5Xmu{P|nhfK3CagC6^&H<S
z97_^Rc=!VxH)H9cNP9KxXnZ=-Uel2z<uQv}pSDNZtJo%~;?`?iLUDay-r`mlJhQkN
zWR0{(VcoJ*ls(E()f=h`Rf)z&*)y=odTkVn&`Ls;)yOT@o~}gM8$ftfC=H}8)UC9=
zrmag^_XGE(?WL?eiVl~y{}hTQ-xY5kf~uq?;M0T(6YMP!;belnW<i-b>JCNLa{uZT
zk!>KR;xCBQMcf{6aS#Q(`coapHGRcbF~aiGcHK19PM$#ZC8}{q@ZS-Z=e3Y|z}*bD
z5AGryZGP8Q>D?q$9oGsuT=pus;3%6vr)iiAjdYoou<s`U+U`Ub{p_+I4+ZZ2%JvQj
zjjdu=>to~QLT!dy1Gf#1h4@i?eTSWAxbZO4P<u}+__l!0uzKJf*gqLn1^>48S5XkW
z8DrUiF&hX%n1{lhgS!b=8im;helZY&yD$C@1ur-aKTj?OVw~^6_bA*&IIk2(;h%z=
z4OhB$5Hx^53w1~jj-^S<Y#Q~=2oI#w_rpS|?+9BcP09!_6MP3rR`QJO(Sa1$*Ip@@
z=TFYur#GwHqiOcP1sv*Y$F=lHVNIyC!(JW_voKqT+$Q7D9B0R4X@<jI(aRUUl5l^%
z-~;mxv;*ETulT}eUcbo~_L(p*Uoc^62)?UuOun#3@dexISb8SXUc+%+5X=IH`!R+B
zdkKue2j(p=_Kcz@O~!EFESr~)nY=F3TV4~U%IoH97Jg0|6Q(pZ%I>n3QVgU_F$R*L
z7)YFAASsH0r1&zBN^S;%7p1^JxXzUm?1Pb>aF7(?AQ6Ya<yi)9#`x=VKg>yYs)fge
zTnDt&9N3Iyv{*_i7f~yqDtu37gr^y;Fj9xYO@e!M4naIrp3gK?-QJ3>H@AnD8O>Z8
zzokQ2uHq`2?qi0H8`NSLO=@ND5q41p;>2)fXZrw}RMQ?B))#HZztQwgC;M>PmTM1n
zoO8qWcv7KK?d|2LM<;t*N~&QG4eyFV^Y8bew7a9dzXI-a18)wc_6+0#h1JDqksqDu
zVDF<~>)fy(DUV@&8vri}BZz;O=~jFD5J!RqN5AmnfyKYmG^@S+vDn&fa4C#m{(b5#
z;Q7muag^5H-a*O|HqDK2fC}58riWVD!$RGlS`^P9$#pbZh<{(u+P3x~szI*0VO=S$
zEgD2>(G#MXe}n1$HYhPSQrKxX>~*Tw#y(1=*Oj#x{6^5Ohdk*Wa>E{{<_{q~ZojZ*
z?kV@abhx#Bc-T(2H-WmfwvP>~<$f#URXX2_yQv-u?*^@-JO-&IW3GW;b-L9OnQ@`)
zE8Vd6!=(Qq(W2vsTH3pXm0?8@zgBd&1+e?JKrz<1VTUNch5cEsYFH?^AOG+;d<)c6
zN;}gHdy3{XM`>@ny_G4xxu;8q8Y<ct+Sts~rRUtRHz}(ba+IWat&V^5XjfCzLty*d
zuwZJ(FlpRY7=--WN(Xc8Plj!Cd!M66ay`+i3xVJPZ@QG@X%b$8<lkJ%%dw9Ni{n)V
z{GOoSns}nkbHjSmbcRXy;;{<(H6&*fPwAg>!!%mfnA;0P8h+gk>oJs;HwLS2*cgLq
zXlWIBC2eekJkPnk3)qWW4jb9qyQ>Zn&%f7bSGK1x``oZ<)Giw>+b~+8Oretvfg<Dc
zJvZz<>eUdmQI%JJ@q2-;GysYa!WuWM3FS9%7d0%7m)7vB&#*FdI1?j7(p&5f*gzBO
zBfV1Kmi)WGUfkGJ-&5Ra?tqV}upTm!5#$E#q<99Y5d_Z2ze=>WF7lS<Jm-exQdV7i
zAMaRhTTY?Yru}v7kEur4>4rT@-RdAo;XY0HRV@5=cn9~+?x@18+Ngr$`JNl$AJnV1
zhp?)~t4QVPN){3l!dmHub*AwQlelFP6yDo(w-(|`lPqw<%FyImo)H?FsPI~l4p`VU
zw|6EL)<mn_c6-}Wd`(ZvqmxvONLpJ1ba2t_y-1lgJO%3EQYgo1dnOV+;Px(}*6eMP
ztU|x0W7R#0zUPJwq+ZqSiNXx&P)B=LY3Ohdb3HVazV2Y}ty<s1U3ZkgU2!2$N!)cq
z$=uZ|MCebr>#GlP*T53E>uV)&*YYKB*V-j;*C$Hgt_@4zuJudcuGNLRhI_c{k3;F<
z*7h-8_KLSVinG_JN?@;+64-0PpR(67C9v0(53<*;C9v04!d}BY>=m*#p}D71J^VGd
z1pfM53H-IM$zQ`g{Pj5Ga#MTzMWgMl!gpJ;9hL!44zvuU$=B`GgDiud!WHC`cE?ns
z(Z5EAMdr3{-*d#Uk;AJoeDuKn3a@qn|H-CdRWRBh{DZ0t$Fjx)V?kBD7>pTw6F01^
zr8c$(L-2s5H#>@%D<n~E+@Pn%A-t@mj>7|k7pY1}`Mi-FR@-4K;E308cmmi>@L#nt
z1N)8Zry>Sg6^tX|D$}N&yO!_KG{--TalB!V`iF(u%&^w(c)00W^3kBNaJ&Lev~^Se
zh^)GJ5^ep6e^_}mkvFVZhLxabc)~imGYoTQSQW~JH>`&@%>pS+U1$_$V2(7Kr}hn@
zN_fMHW!MCL;|c5P&d?LD859R^m^<D#_;M?EM57}NZC${PjfHyj(*$=(YYp)Pj_X%`
z;Gq8F;pcF}9lm*}Px#oUq`$bIJsIyDrozV!DBn7#SfTbjZiYR|g=+2-ao=!nVAtXi
z4~#IwBqHK9LvwR&4&~7cW9?-Nn1)cRL5hO7iEBFon7&5aELH0W?#^oNz-Yv;1X+#v
zJ5hg2fJKEc{HhBx)qpoaIN<%qj#X&|a$1{hmQHB(CJ3)K%%s%?Q21XSXJ1RxGVRsF
zy1KQ1F{Wk^?$HeH4Wahg_Q$=dLFW=wgPH!S!J{Rp2Bk_+4Z{CSHJIS98f^VD)nL(s
zs=+){H3%PFmYWKzfJ5l@y7s~Tx<O>gx<POWx<Ta<bc4D7PB(bL)D6Ntx&aK6@3Y+2
zN|;AENG?G+m}V*m;U4AS@=%&m%l^31EIislw7+)H!(TgyW9=ZZk%zl6XJf8sIgYQ}
zu8{`9|B{#sx|tahCNnw96ZoE@Mw9GiTQNa0VPZDeV(Eyl&##FOaWSzm(J}2=20wt;
zgjP<nR}w?$?Gp5o*ZuX9!Tx&5n<eNawf?J@gob&nzW?eaqGEgOzW<?KQZNlVMQM|+
zLkEB@G8wynp`(iVk6<jgO-Duvy#T-4kQRS!sQ<p<=}G?Kx6}NFVL@os%6@=Qt%iSu
zJGK1_l-<DJf2E<n|F>L=$Fh_HeFnwBw}9tbDt^F$7Che@I-#{1;0j;~wYT-JL6i2d
zoZW%A)-T`?{4LymFce3P4i>CFdwzkXTf^_R*#!8kPq@uubu{V9H|VLKwE*99V|+7!
ze0<ts$%WkHUjQC5{l_Gp<M;kThfHQ!=lz8)gWtEwol`KA;10H-z)Rpy@uFic6@I8z
z^5gt1Pfhi2!ry23H`($z{^5Tw=6`6ee*qqW-<O8oK=b()@E@jM@(;ZRfO`&w%M)t6
z>R;o374!Fg-9P-=RsRh4U1N3G`wLz4qks4_*Dcso#6QX35_<3^I?{`83ElZG{|MWD
z_0Mn;wB&`rhrAG>(pR^7yM%?zGj6{p{Iw2Nzb?Jr4V(BlupXQZt~z!`p2e5@1qqZY
zg?*2S_}lZtNib|7@%}>pkJ6_`XWE<nSD(V-rF&lG|AYEe0nT;YFB_I|W3dY>#-(u6
zaxX==S1^v4kcSQWU+le)UzJtXHh#_-C^3HA6etKNC@2^x6d)idC=@tgpyG&$fP#sE
zf<lRjfI)$QNl8l@?14raX~2O7OB-pRkp@gkX0!(yaiGx#8fm1FMjB||YoE1f=6$~J
z=ktC4fVWSV*S+@7wbx#I?S0?pa3AI=cyJQrJZH|gFID%ccwM9?=SMAI`7G|)RC<~{
z%SFY^o%pLQwIE{LSguPpmMhSOq;Lh7cy2i287|OZe3iIPcMYF1sRpME($GOJZ-xy@
zrQ5II^f1I~C70muAQhiQd9`Fcf?2-x9P`v7<_`QVr9s7s{-x@ETc;a(o--^0nCllZ
zr$~ir8A%6j)>H?0-;|O25u}vd{}W#3*AM`dT)&=qTojzZpN`^2RM!&C>23c)J@l^|
zwdI7V$C(2~u39b)$OvLqu{`3;rmWu0vInA~>aYiqPO&w%cI?KwJbdP+WZNR<-r3Ae
zVa(@Y@d6c?q7Z~s!EEVxL`+>G9oYVr8_vv=iZXK35|-;8VOHPbR_&CDLYerWPszPc
za{5#V%VAqto*0GwRe>0M@kVjge&$;h%zk)8tkUD9VZZNLHWR0e5sO*ma`~n#=1Qrb
z8OyR>oV5YFuWBz3_sfb$kK*qg)J4Kd<|;8wzi6I_I{=mMwT(Gj6iK<C%Xh{wyJ6+v
ze_cQ3Q<e8a#86x>&YOmPUCDu>n2#tDU+^4z{dsss77CO_a)Y*5E2=`aC>D~z=@p{6
zOFqkGGRGyDOLe#6H<Qz=rM#Cr%k|iXReK|t@9kg<>91g1MlvE^nFr03BI&XkOfgsL
z)RVrJ8;TKMsFJzACf;qoj`MqDm5kzGpf)x=Dyxp&$~iw@>Sc1ADbSTjps}9C1%~Fb
zcUQ;=yQRYzJk2qhP|D?sY136FK;2`UuYZ)e8mmD$ccnMWXQ#Mawk&I}MJipb|NFQ=
zrgW4Z$g-`hb1yM*hnO}OcYNyLnIK+1fWKK)3gnCPbtiF0)e;xd#M`Z1IK4uab4fg2
zW3AS|{T^;0O}xEdM&c*yzs;5N<3zDED3FbMX%b3{9_IWM;RFO4rI4F5|CW1&j5x|T
zVth6(mPo;Bga0GMB2KC~ACdJwE(Q3VoH|_vLavJ4JtJ(=qK!O~c$xD$>DXfdr;kZ|
z=!OES{ahzL2DU*`a)Kx{`W)=98m<%Ps*`1BD|2?Th%4l};qZuZnJ||sZ!2-4L@ZUG
z$myn@++n7~1M5YsaNI6#r{`(rPMNZ_ELs1R5&?UD%mwtB%wDp|gup44=2ftO;sJ5a
z4i}cIW$GHlQVm#+%K7RM2NZWaE**<6g-OKBEs&8q;UudX&J(9g5$CZNMFzyQwlarP
z@IY1G?|yE$PVz&<v=hQ{5{Tk9a=8|X3;nXm734{T8;}N5#1h@I7x>NRisxmH`(c0O
zD{W%>CQDYtO9qlHF&=N|H!64wpO7g<TO{&TI&r$5+bz&d;jyqP&^DX7L=FzQPjbcX
zL(I2aIKN(;t}2e@2<do8+A;fzeWBim^J8Qcd5J<X*rru~4VafKwf>zXLZ!=yDrEiU
z8NV9{@3rBE&i{zH%~|~%nNByhjOB4t=6dNM-<>-cgYT&QK2t(^`~#exH?4>|DXH~;
zQet|91cowkjt&_?{%r1`3adcH_>mZv{Z=za$N=)@uv{Ul#7)|*R2`w-S=l`+WnfbX
zXvX!QEhDb{7Z*&F0wY+rs-mf^igB?}x~%VNXEt>USd~*xs}-k7JN7bV_A>Wp#m79v
zXY{va{pX1{=8Jcp#O4u<hN{GQCM>w&`a&MztrwX)WX^kF8P)N)n6_7zpS@VHUUty|
zj98VMI>J2eFYDhzB4E6P)_R%K6j7{2cC{k0MA0&Cpc)gb8uY>5p#sy`3{DS`05p)r
za*-_02q>n?Wuum=S62W@U5v`vu0cjtzn#-fWsVypgjUIV9}r*hk@!#tuUAglDS-(;
zpkREqkSP0qW*B#9E308lB4)u5F0ke6>P7RoQZ8tLFLo)#vSr_{6qE5g_i<*F5ZoXg
z)=9moGOp($=ZI^uEZ4)rn0{UFVV1-0Gjf5>>3L3Yk^s{i&2o!$Sn@c_Ucx?NnX~W_
zwI}##I$a330hUmvjS#Qz$I~x0fS4k#-|xlpd0XZXqvBX9)?)Ho;zVKMofCFkF;@b{
zIBpn}iBlvn`As)05j0Q+k_(?u5pqJD&reP~=izIre&qr7fgZKnqJN#OgxaVDteKY_
zv%6)a)}qj;Y(nX>8U|$lPjTi36J-6zU>8Ldos%f$A-iigd_-Af$dw1y2`4qK|1wda
zER73{zQ6^n^I4A5vuV2}Jx{jJo|m{>wrno8;>0J##6=SE`dzr4^Kye?i`y-w$hECl
z|LP)40!D&3(+E0L>HQLDN(xyv6U{rsQXR5vJ7lg+WeVz0PWeKvoCn-6awT7L=62G>
zmu|fz>pxdCjS>zMC$w0|6-wk_k}iQ|AdJ&PFrjL$%i!(%!S<1y-Yq^gE^(kqHn9rf
zHuip9cfk~!!VK#bgVCLoh}J7!oFJjoPnMUx<aa+K9Z7`j6_cm@h|@<UE;vabD?;PS
z0w?3y3CH1h%EARMIHaqKD%l*;We#KHMAIz_q+z+Ka_MqL94Y3G(j*ebZ|3|aITc&W
z`oAS3ZV-j*rDGS4q&WW<a>es9!Vx)Ko)>Q(k$^HTCL5AHVpP2OJVLKhEMFAX&t-XR
z2ankHepWPBPT5s5B?HE79oocWWSB<Xn3z07j$S>BxMJRe%p*@T*UKhiEh8Dh9gs3@
z6;_4ffq$_APO@C53b|g3IH##t$X;0lZRkoQkTl7%ijhsD6Pu9=nF%4>K(EYgr#N4o
z+*F1cIKS>;9&xTrS%PtdP;5QVxsN!dA39cwhQQLs^>6zaH&7uRjV<Jp%Qf+Z3Ni73
zM6?76OxI*;{3H-X$nM$loGdpPX{9W~dTD25`hb&(@vFJ(j$;MR%4X3I1(b$uGJ;Gw
z!K8>2rO1dIT)14B6Q7KtB!F3O<o5L9l$Elar^I<pWv@vLmi1qc2H~)}B5^`5@#;?a
z#v@3dfR<{VpLgPhGezMx3B@VNJm(&wfQ#I!RmyQ*AH(&WQn;MgJeCIv#2XXEt1FIj
zhV^1D*d|U{|D5_G1stu!nS5l*Cd5J`a_B6RDQc3qFfKk-B|eoemdlrOK*5j)cN7Cp
zR3`6sVFg0uPUfT+r-xt;5u<crUvv3famE-2E^qb#r#DIcTL?I6YJDVNc}UEkl0Bfr
z%^2_sbajt&1MzZrj3{UMmhAs6GH2&y8CGoNhSR4TmLr-O6jmKpV;<F1b%T}Mc9`3X
zl3TeZv3QX&GGf%@@LDzZl_;(%Sc^gpi@1RjS!M(CIp1kDbCp;^zf0<g&-B3iRXg>O
zobDp=U`!Mp5lclS<7BPYzg_}`y?Ch$V)=Ze_lxt~l0#-1OlFSsC~>|@SuNeqOGgs8
z#vE9IIu5bA9$6g~vi|d>+<7Q$T>n$hL}}bBn}&rmZ$@rns!nkpYYEL!GE%)bll59w
zfM0)(RU`qb+nduP(4Mlen>cxhEI${U>GdxoY!mOde~ByN{9#n=Je*J|7$@ExFW#Pr
zk)f*2L*hf+_gs#D)Ec*5;uG~!-bXB&g1J}Do}Dl2KNA~(>L6V<nRKX&W1lV&Q{e)R
zL9VKhS;*zAw=w(4+=q#GSBsMlNW0f$*(S=SWU+wv8f*BR>aRjQ|5F#^GM6JbU?@2g
z%SXljTM{yT<gR!uh4VWXGDk><5wh%>#N;kdaXy+ewr7T;vTC<&9b48y@(a#C!U?%E
zn0ukQ8fm`7_y(+31VLS{Jbp8iBbv3;bCPX0UpAQtu&Q4Miz*9NV-%|6A|$G1c@hy*
zO_>iek&gH;0b~>fj0MG;6aD!V+bJeID?GBF6?Kw&9xx@0r5m5k>FW0)F<4z%3Cp80
z6%n%h5@nU$GFl4j&kIl*`^cuzCb!dGXw?`<#A{<DB6iC9ZAfB;>)pAdI9c!gvPWdg
zE@)fG<%YyLLu8<tDt@T-pMb-<8u`FNZm4<@bMM2f*oc@uL_%(vEVnv|Y#uUoA);8f
z<X4L&vn7&-;l!i{YAV}xAWlRu4lIE6h(YTrU09KZ>5wYBSUoyc>o*3bHI|ntz>D^c
z^2h;hIIxiOdlOl1TgmxOTRA;Jj$jk<>>~q(vi_S8T9sxcqG9$St`H@wB5gT0)MJd<
zI$e{<O>6iwo)C$2W3s*zWfgSDK>Wl4BVw^CEDP0NrHeR6=l$HlghZ|}3|Gy4t~g64
z6jUP*gzKvGd^w2pNW&3`6Dl;HmC%}k*46UOmZ`PK=KB2Wb2xfoFIVl|Dv<3k2F|7u
z;%vEKoXnktm@rC4*8eQ$cS|U)mQ!*EOs*6df@xJdEh9X_CM>rtkUg**mEuJ*r5>_;
z3ru%QhvGyDa^UDRaC(CsbB8!*zBs3grw%`xfn|n>W>g%zB>YczZ3lPU0nb+H2@)AQ
z7P0Ii&g_H31pde8e{zEHgQluN2O3c9BN43^bFAdEGP1I1i=ez(_nES*HHd{;Bp}s`
zB}Xwes(!T$#BVXn{qqz<%TX~^gM?TMj!e1%F|C_yw<V&W9*QaFsmFG#_?FCN*^}IG
zwwN|Se5D`Z6Z?<ON#ci(EYm3jG~@cum-SdLb6OwFBOVup+GNi4|KbkpWoqJcrCb*C
zEwNOKEH?`YIOB46oFc2F9}!bIr+T{}{2xy=FqbMOH;Ez<a<uA|krX9!$K#Se;VdE7
zXhJ-4!8WWcF(u0`a~&%bC#&P6oPJYeii%`Ps_v8ZpI*updc--drSLXcB_Y^;7v~4c
z`uADIa-wWDX%aUE<Q!lDZ&w4W$4Hgem&jDOh=s-^M0dwy{7UmgR8S`A5tD~J#SN55
zM-yUtTgk5!AMshl<wnFu+R#oQ+6j@4s>Qk6;43PE>fwCKY0t~qv8_Nls1qJ|oJSUq
z3QD0)F}aH?r~AnXr~`ASMrbOV)A&*z$Ve(%?yQ^vbKSXIj40Y4r2gC;PA>thz)4J;
zE6eSym~0AOtF}=KSx%MWJWgIbVm%s!U+LT=E(|@$9i7Lfr4;MI)F@7nb`uIYeL~u?
z-){7o0^LbDOva08-B3+ctdfxj21-NXj7`{V)Cgi^m$R73`7w_%_sHCrIrBYXn?I*}
zh=M(+Z&VPLGOqtya!5>&n1527up5q{=Bz=_4OGfzl2*jBbvd(M*!~F1=U-yZmgSef
zpJhLZC!?~BXR4`0{Lwj;aDh5FBb3*jl@J>PLCHQ?E-qlN&8)ePbZ}BmJofi-{WOUm
z_R^7)bW|x*IwRCPa;~}VMbkBf2AD9p(D1yDu6I~%)9c<_`Jq+jkF1L^-ebUNpcS}G
z=R2()wpD*iUkIE4P65rp1sZkH=8@e;R(58`;V890BYDl&X6x-J?j!sevkldKWc85Q
z7KFzfpq$!1vih;xK}5e&K>cl9BXAD50$c}%0kw;n<E2Md3rve~SwkTUwAqyLJ1hJF
z@B+=wWhbI=4p0u90GfddKo@Wo7^Lb2+5)=zJ1ZO0TI{}!H2OQM<)-`L&c)=_W%XnG
z91XRBW<dQZTo*71+@ow)&4C)btb9%HVq4ZxhpYCGM<h0|4L}A^45&YOs{u{}tw1}_
z2izo=ZmWlaJh5H^fN&rd*a8#+<-iHx6wnM@0J?zNz&*gkhIVyZElI1zqG|-r0T+QQ
zz;)m*pi`#|z!gw`;~osCue&D$8Nhzv7;qYB1=@i=;3jYfm@x}$7I3DiZmWmQF5t2Y
zxJo{ktRAv=#CGfs_yVE8CLk5a!q0G9Ev1ZwnkzM5vNCr&jeWlrXb1X$o4_4l60pHJ
z(i!lip-cE9*TIFForQYEg8F-4{P>#Ak<1opOP$W)@*;2rxC@vgT-gDxKqRFs(w5Pw
zkKtLdj<6uGlR7`PnrC|zmxI7<zyzC)Bj8SBi?n%oySn{ltH&*l&BgG6R-m0SFI(-?
zyHl?_=5_M2mEFu?XY>c)os@HXtRAsAjdj%uv;%z<;h`;}EDz0&YI>|}sI>=w!0$^f
z9@;~6wZ|&JG}0CJrU?(U=J|=0vuS`E=9zpJYY*XNm=TLL^&<OB5MB7h%F)bx5yk|#
z($FVXvrYHovY2`o!<%hBwc0|#4{LUQp&s~l8L$aR1+sur;5bkVoC7WbSAb#QE})~_
zPc7!@YpCv1i%0FlABLX+TY#NFAy5vSAdf3nkCLmWrg<I2{$2_k2Wo+Hz(wE+a2?Qj
zVFmy@z?D)wwb}Ojaajx;18RWNKr7HrRi4@+v)Co*4alJmPwgSo3%Kl}eoxK8I(jKa
z0%QRPfl{*Y(iSATdc$RbU|<7~3}gWNfnwkoPy?I>T7k<zA8-?x;RC+{oB>ZD00;+S
zfh_=D&)RFXpg?;B4gt6WzCbjv3FwL7@E8%o+^~>OI~B6!M*Nk-YL#rQ*4tUGDC6>@
zvh_}3r_6@2(x2ngPQ@sfi-HC9&`Osm4NZR?Do-A#6v^RW`rAmAP(U3z<b&n+3Vv@@
zX5p5M6uoZcwv@f0kL}qLx^{hNXk1vtrcIlXJV$3~t!j9a*PxZd1EI#ENtr3>yXn(G
zE02Y9bZ*v2)8XKVHrB2W3PKY>L1;^-Ta3CYZ`hxX4q3Tq#st;a?nqTQ^ws1iX4{7P
zsv@eW%E^M&^g8@4`NapF%oa-GBV_%B)gsfVmB^!^+g1){^Cf2+rF?;K{hZ2q_3Oj4
z=)b<OGQ;aV8osdlt6|vxz1RM1^@1sWaL;MP$^mb{2pqAR2TD-%CFPA+t+dCB)RNNn
zZ%@n0Ny<pymxDifVnv#%SCz7*F-6s6{gu@s{RImA%F4m16JvvtNo%7YPg=VtCw)hr
z`t~%}yZK)BS9qNXbSU{ND$aVZ{;yU~>-Du{Jqi_Wk=Llzdc3u(Xw+(f;q-iZYt(9m
zedBz!+a@I??MQiM&t6^1v@zD=XJoA&o@Ixf*r3Zw-<_`1v#`#Aoltef*H%G>#`$#M
zYpcz8o!7Omt$wM$O(lP`T86iBwf)WNVf)Z<qn`WlF1xhdI$hE%(Xp2Fu~=xU;lOGb
zX?yqX-t!;Xi^wNdTZu-pW3^fM1E<nhE!B8fk9dXMa(q5>9H|Cyh;L>Nyvf|%&z$~K
z=FAOTz8gnSRX$3%=QEZoj&pv;X_hB~n6F)8ZhBTetjkT|1ivii_!8#PV&<r)xS@Qw
zo$4t4pUkG!%;!Bgzb%<%x2w!W3pjnEo#nu%nG<CF^?riGl4?->Wq{(6C0rp9o0O8r
z<<V5b3!LutGIP^AobPvp+2d7CH+_j^+s~P+R&aXTHI_@HoKq#ITNjLRMqEB~6r!=}
zxFdwwC6#$XOr3AR6{|xz-To}I?aRzjhnYLxW**5F#m=!DAr|zLffn34Bn>{wJaUo?
zI>B?4B6=BNUJc7FySQBT6w4JynJvU;T%P9qOkd_eqp#wYRu1pi9^~?M96}3pnR4{3
zgG;IoW8~;++Q0?PTA8si8uJHmDpcjp{+)S59@?2c%yM2YbHppmMeCVO?=qjoHm>Rq
z{Y5e6zil3OJmkz7X<jTR++um6i@7X=IYOo)!H>(e5VQ3#=eJ0ZPZy`P$4;g?O!4G$
zQM*`fQZoA2b;yXk9^s5kd92_wk7YCAbZlIzp}-x?m6w=%!nxrxu}Ee-%Q3N>pCVIg
zag5VD{=w-!xMel2f3IrJsIX>^mj(jUx!`~_oPLVsbXi{A;*7oKoL;?`>-WpHI4ZX_
z<FZ}W$=na6aJiy?$oh{EFAscz8}j-kbCfKfd|9tO-*S3{1Q`1fmg7ER_K~>YC5jbw
zb9$XP<J1i)C+%j3V*JXa<C&b{<;-&6J(klo=6;-MRk>=pb37@_t;Led5A9<PtKf1W
z<;-QWTB>CkR%=|&+KKbW3jW9!x8w$`^8gn(`7=(xCUaILPL^WyT0HZ9o%21Sna_%I
zmPv#gdWO?2#FvV~SPp!W+3z{#f~GZmQ6cNrR7MiDn-xj<8|T;IE?-$-LM$`>E~h(T
zk5lQ_B%ox<h%05;rd{Ct{I{9M#CJ|^G0FwHZdn#*-{1n)uW?7Wrq^v2%Of(<3VGt&
z^iwYHC;4rXe%6)KuZdhQJSu$6lk=U7^|1b0{>%yI+k}@f_e)%GDrUJ(ZpufiSfL7e
z4x;}(=TFH%TyW2#EYkD^b6PQ%KZ`__?jn|*dKR);{}V1!0e8)+K*S)+6>l=P6>@rA
zKFjsLW3K*h=96MEw<|2CG&1LkqIFBTzK86F7P34mo{|si-NckVGLj*Qd|^;VDdZy^
zc*sb~W^#k0V(OC;5l6&V#(&QBtev@BOcKj&ek|WQ!R+&bJgjRv#s!*w!aOB=fNdBT
z%##W&i&?Ih5f(kq<?HuLy?-#rh%=toxP05B<nLj2I?m~VW3ay(;ZO#%+bL!b@$UK)
zEVr05r%S`*(<8z0OV!Vm@;%}sm6n`d@)~o9Ou<>%WqXfsx>tcXjgxGnb#a_=O+vEu
zuUQT}#u|^w95?J>IW3s;d-gJS>=N!_J}VX)l74zbfjIGvg3%|qp}J?dLRuMfne1k{
zvR#Jk<MdqFY}~}V^SwB|el@o<D(l^LdRP6F^UpdmTZ<3ba^x-0#T?)avvTGb(fFD~
zu97@%IK_kW1LIk)oX2v$97-*cS@vpTu96WCiF3wCI~B7z-{q)k2lJmW&Ix6*I~B<?
zi@C(<wm)OOCK0V_vB>g-`Ia2P243a#lXF=??5;-N$d=H3P0ogiFLHW^T2<&@*Cy++
zUZ$c=0!x1j7aX%^HY?<Y>po_=X&bY>$fk1e$d*uE_hZiQkxeWwiR)MVg43Pg+?WMC
z6_&Y4mpQF_l?&!y=Yr$kF=vZ~I$!4eHdzgRvNt%10t2%+KSUzoNpUtOxxa9GnA^+T
zA^U%Y1d<RO@{~doG6hqjd5M_RUXI~oqDWgfmv@up*eem!BAwGa<YN!BM3&3s9DwtP
zu^-#Rvi{G?8Lyu>W9&)h#5>H^GBpw6)%tHZy-9Ynu)Qq1NaX8DW4Til85&}_>^IEu
zVyUu%C{BnAVYZj$RVH)WB7w!Fg!5x00#-{buat)TDmlMHyu4pHQGFr=i?)n$zK1V!
zgX9-D;mLt2a7$wR*gGuu$~|GjI;rqmW-n{z@w>tj>AW^^N1gkbD{?sBOq{VLg5{!n
z%qQ6*1-g#6I3dlC*>-^0dXhP=m^o2QZY|5KMJ(l0#raKtWOj*R1w8&PvP8rv@qs|G
zn1%7MiurfBARWl*w`>;6A)(xmo1ESJ=5V@&n9lY*%i|Nwo!!j2yP4w-bNvY!P>CGd
zJ7qOks6(q-|5IYRBI#&k12<srz&tRm$RTdfuZGjdWK$|x%IT(?IQ^`w8W-{QTXI7)
z6v*YOGPztYc0uF%pQz*vy_jr3*8jM8am;$I&>`FM$P$(lt+-ylobRW^Y3qGBzfM+*
zttdDk_45yNe#>E5|7oe55GQk<KJ8rMggxSfQy011L=<;Ch7*i(-g*h)ZL$gizvcAw
zBg{o&$^7?N&dg#C>y-6xD$DR}J7@HN&D`)9v)^iFCpYH&msx?ca))E~A*Y`_!RaT(
zH}rC@$d`jit|OO&!HtT?7s!buT{O&<Sf6jh1<vne9ugjTg5@;X$2;9wPFTnsD07~-
zhUIjLoM{14Zz=OxM^>~TMq;v)8z+>l<N_XYFW2)5%YiR3`-yWVNNo4(<#fGF;b<Sr
zEl)CM%XXZ8h2@x5=0LVof$pp<7xj1bYA($znA5*x*2^xIAlq=ftOhd~c?!H()w7T}
zACY@Mi#)DBx{~v+)v_EVPFm-z(lP&)a>A+4<^r9vJceW&u1ewbdg=I<cyC|~r-#^a
z2l=O2w*CwAn8bb`QS@vqr$<PCx6~A=^<OEPSy*v_ZaFa6-eNg!A9pYi$;>~{jZcJa
zIKOHybD}txsqF7{|K#)zS<dzn!kt8c9>g2t`nQ#v$)?3zVf+Q=rbn6UPcomCbHId`
zl#}&cwt?jY;mikEQQHm7>9Sk{w{W>Isb5tu>;I(28Mnmgyd<!6OT@BB;(QNlZcxp)
zLT3(hl{j&lIA6Z(lD)E;(!@8KD!6=!#FLIKu)k_(OqPR1qG&Ff4*WOg=gw!g{sZ$_
znd7#dT;Ay!PPdmmV)Qu6da+#7Fw14KnrvlTPAL!#Q)H9yTFV9NW-y;^Vy^gvIad@L
z7q35G$LS4XVh;~i$m2y$zb5gZU*^1Ng46r!nG34LltYU+VJeI{yP6xi^$^QFyI8jV
z2g_xX%z=)~QH7j8{%4j`-eImx<Mc%F6{q!_?!}Q2_y4bPLiH}@2{GxEJPGNQ5PLR@
z^X*-k&!;d)NoX~d?KVzUPq+9=i#X-2<*Z1jgy=-$u^sbo|2#M3Q^p)2bCW2syINv>
zTMg%@IdenjA7pu4oYg`sHc=?$#0N6rVEA9>CGDQP!E&!9+EMGj0&}1W<OVTEJi#5L
zn=_x4<#fK7^ZjHQHpzJ)L=GMk;$$iFxLm(C^DS9jW8%akvUl`9J$?T#Hz4tHr0V<?
zR~*~Q945||_!Cwr<zY^bIx6LU%4{zUPe>&7+QjL(atu$zwx<T#_9v;|BI`d_Ojs|9
zSo?E9d%4%UC4ngg8mk7z$~iy2T{;%;FA^u7c$o9Y@|YX;GndIts@Zy$t=nOL)nI}+
zg+77hv{}r7vd$9_Dpk2rxeYJ*fb)CBq?MRtmER)y@qgg_%q7ezrOdf9Wl>VEy5Mon
z=#a=aB@wUeeMy&HF!5uSO;<99?PRu<N2vpwSx%SbmAjASl1}DZ+nHlNWUf@s2Zalo
ziZ~%pZkI>o7(UX->3RvJV<@aDxGiJuUne7$!)2~mpkx*2=gNU3N&=UY4X0<yK=quD
zFYAd(`pdaMz1+)L$U4q^k~``w<ovN?%x<ztb#_R-JItMO^J#0%a>NL8pxlUL|5D_)
z6l4Bl#A{1(IU{5XcNk)PK!EQz%JMOjfOJbXk%aSH-bWr%xdpPECflr!v|A>TFRg*|
zb7gtwsuL9Y*Hw#Y@_xkyB4joAhy|KtE=QU<zu|(gG?+J!<-pU-6>=17Tgn6Jp5BJ1
zKg3Gra%niW7pOUPlF*qcrs^H!0=Hh~2E2+`4iOXg$T~Jd1FC+U#QbarmOUihUP8G?
z64#FsAITL<My!$b-z#(CCMTefzj1{GnPVsUC?y-}8Wk4jOqcbXvW?4UzrkE54<>Tk
zSWYox?vV3;_d_iEMM;CVWO@0qSL*sDa+S>Bf*!wNt`={Pk$@8+k<j`%&cF5zbA)s_
zD$6WIHYr;P=@pS&E+yr6e9<9u7%%&^R~M(p%M_%XVL2p*El?FB^~3`4ud?jp!5sAq
z=97i=k&|`_KgU@)S38F<uB@4-1-OJd-kf0=oME@drbE}Y=tq^#9myRoRjWO$LiJ&;
zGtC$3-uu}+t@?gb^?jP_6oxOuSVubGTNXevkO34^_9A>K3SX&lrmm0iD+jl!ALZ;f
zIpND1Ko(F690wYKbL4W_>S2n#Y!!~5EG%-@R^yv3o$lHKv*XVA3JGwI#@z8u7Uv!-
z6TK^W_26^)R`T=Eo``5hw7dXZ1qOlJfXM>99tCg&+<{Oa8rTG+0$IR8;5bkVG}2Iy
z)jU&goE{=+)I(cf(T>Z@Kp$|E!alKj$RZrKcd@`0U?-&@iDDOPHWv3Fo4_?30bj~q
zjDfe~+d5Da-@|dhZ;#<SI1j1s;MkctyW<-rKmdh&YPFKWAJ%NF|36>Hq4bBfd11zH
z-e6}l2V8+*U;~g0WB|p$G2k@N3bX@#z)fHhu%YgUwWZ{J#mbRluiy>bo}QYy$A5k+
zr_qzYnWMguGYs4XroWkE{8r8)x`5hFlV12r47Ms~z!L}n)c0|=0LJg*%(lOZ%R%5a
za1St93i|{2t`xqbV_J$!{AO4$EavK^c~VZVm9wq-rp+BdecR@TZ`x2(uhl#{<)zuc
zWzKuS4=&P_7n*Q=RDB!8X^FPLsTG&)Kp$`uxC2ZA&c5nvV2Mk#MW)TTyg+43GzVKp
zY+1fQG_VQC0uIu7lycMg!@Phi5D9Dml7S3hKX44FA=9OBTpMg`&OiVV3v2;$fI<pc
zs?GMgipxRZ9$*rHu><aaFAxeu1Dk*>;2;$()s~vsVM7Q8@cXn&wMTvK;Bpd}5rnA&
zoB>ZD00;+Sft^4hP!60RCvPpu;^K0wL0}lTOJ&~L!%@Y-&>W}%P6MsLWuOnZ3ETlD
z0h<u?00aQxKrFBY$N>t0a+>niX1fgH@-}b}Fj)aF2iyT)AQXtEKp)K^{v0kZ0#|^$
zfNmvh3%CN_Krj#qBm)`1exMjQ2GjtjfmYx$&<ETE?ohptwxD2VD3k&UfpXvka0+M!
zx`3;|ZQvfDg+V*O7YGG50a-vPa2#j^&H)#J>%cH@7tpQ3x6uGsAQ*@QHUP=1un868
z;xy0-v;%#>O<)qRSq&=!vA`B!Cy)aa0_DIdpc%LTbOBd^LEtuE5)Op{N5CEM1ze#E
z#$o)=4hP%}^%vteaUw09i60LE0&r5fybC}71Kb8?K)#JyTX0Tr#&rhv^G#rrefTj<
zpdCl53v-|__~v$Wj{Gj581Tk^a0+E&fefG#^-95xJKz>TE6Q9^H=%ZDz!ylKhnWL%
zuvgb0vvdQju@1kqgS<#=!;|1G2p0!|fG05}DC3TfGw>9|(Hv<&s~$6pBYN~p7znr>
z9d7wMrUTf59kLL(d>57imm;lTP@OIzI0C_d2{L_woxok-1W<}Ho5tZzP%ahM2XXBV
zoJM&a8fgWZkv=>d3U9(S;EptJl#kqv-#o&|bLPWWz>!Ex#-M}kU{O2F|0Z1I0PU!3
z0@kHs#Q@HLEB5$Qa4Giut2kKL;8+z5m|N*}gGlQFPGEn({RAu!iyu~k$#Zah3g}X>
zgb#o(uS1()^&R(vfU6Z00N;G503*bOBcQ&&>-!wc2OTtUa~c*C=4k^^GZ!XZ3?+br
zK<O;xgNL_ZjV7U9J{BB0F9e?hx^P_!uEmM?B5)8-Nm7AhGf@w~GY=i^_;eZS5QrNW
zUF|$L63_>DI-@WU2^0diQE~>(bphZFKr?U|NJUy95RHS#PM{Px1xx~V$ZrQ<2WnB~
z0^kmW13Ab$2K3>&;4c1gbkpnZ!c?ACXy{95w;rPcYH+B$09=Gck};x4Kz*+_2UArJ
zs2|rj1xuJGW68k^*TENnoD}#AFu4sTUW{qQGzR1J<P5kbVoJf0;2DmX|Et&-ccRf^
znAjYRxC7xM=m-cM##8`<xED0vfQq>G1($+1fg8bBfqOszrosl+8+68$F3{_a!`zy(
zAg-^^fHHui8QKPtfmF!hGYa4k5Q-t(MP*$AqL(`w-vq4zPr&X0{IVr*1=hI>9LLCx
z;c(duWB|!<z=OcB1=c)dcU;GU&4JrMB;+gLgBZ0r?uw7W(&bpHg|KvZ!CWjKS3V87
zL({v^y7@_%9;0aoa)3LKZNM{NvOx<dhN%gMBEH});G5uDa5NeZ#&Idf5#boi?Jih-
z$NCDcTY=-azKdJD3xKCToY@!i@BJto1Q%II)R5Q*zKpbB@Con?a0c=NfK<pP;K>D;
zXRsaQa^#oddK0)6vMaa{d<SV80B0Z?@-VQ$1u+0<$3qDdb<SIa`FF&H`cil+;E2(m
zdk}?yQs6333)E<s3TuoSXvOtq;3hB$*dXopIy43=1Rz=#f=j_&%V8Pd6co}e#FPSe
z&{iSp1h0gngI!k^K>JY4AsRUd+{5(-;5G`J0=EOsfH!V5v%uGZVBjk9XMk;xegRKT
zP5}oIR3cZPEZ`di>*0CAAkYQaL2d?50^#m@-M!~w&0<^^;GaHJPDLe?!&r*I?ILIg
zZUqN`uYm97!>Mq+0S#pU#lVF^tmQ*U1Iz(epjPGOLAe8ZoiosmYn$irOU>X($bDc>
z$XkGLpdc8Dn}8gk95@YJ1O^r3*^N6onE?c&@ksE_CFmHa1&V=YFZc}}U>pQTtMsKv
z2O5!P2YEmECh{Ty8z3C=AaDZL_a4Sgcq%S@QSdktb>L+1Wu$onleqQ-x&T*LDHt$U
zfT?fdYg@tK=n^=a8YRqH4jgpFfPm9bJ_n9={b4LQD8Cc8;HObG8r%-H179z}{7*vg
zhLBT+f<ORJstN<mxZVI<Eyc6|j<_}fvhaLBdkqTUcDx;216;&|Ec0q~4$i59LcmqX
zj<3K1FJlUiqdmYIWpAR)6<l8|K%s-ktN~v}V>gkRjEq?DFgh_mhK_;gSJ5dNyNYXb
zq&I_)1Dk*VU_TJ5>Y*J^l&e7pJJE3fxE*z`;JO+0e35?~?2fb<h$RJu=(Gz7;V9sa
zD0mO@b!658<!G=NydTOq;`$gGK8A9)(MU7+p2|Z9Hb5-e@CF=Fe+zICgK9<jSm44Q
zC=QeZr-0jlNfydxVg7w_F^G9{_rM5&bAS$8>ve3O_W-*?Sl7TNU~nH4&4Cgd(aBFR
z(oL8?;7S4%-i(#;B&Gz<kjxWNA2_`g3MRq%lA&Y@`T-iZ!Q$It(Ny?A8s`648X5tz
zcAx^V6Gtt(ozNP%2Y9DLYn*n>zl9<|DsTe03~2vAPyz~pc3=_+xCa@y0New-C$P^0
zjlf;N_d6W(fL7pc0sis+C!!Zn3-kfnBu>0QDR2R}2Y5~)Dgs?kA&LQZJFz<gJAq@s
zIbaYlPlp0PDsUXQ2;2djcOf_dS-=UP9T)}*w5L%4NCrxQW}pwyWuQ}F3s4HQ0)v2g
zCdvalf#bkM;4a{}8<qfafL7op;J62;C!id-2uuRLS!fz)1t!%S`=YaPhyd;Zp?lFN
za2YV!hd2%70FA&+z%d660_DI(U=r}n#r6&K0glgLAV4iJ2)I9sy1*n5yC3}keSmo$
zECk%m!~P$A0Hb{c+vJ-V0odkcoMdLf(Pm=_I>32wSUU)`0(Ss!C!{ULegQNBT|iAC
zI)5KZf$ssHMNkB21O@?{!!SQ^5NHN&0(Q@%?*jZ&2($vjfMYQ<1&V=lz-_>}1fvE@
zfeXML!1V>J2%sEj2krvyFJb^|FlFFlz(wF5;8_Y~fa5?HFbQ~<!2&?R3H);z&>ev(
zft^4Na0M`V33Cl(0JXqXU`9En1;_$U0eygZ1*Q(j0UCjufa6h&7$^s>0GnXd8ekA`
zJ%)aO#$%ZOJGk(z#F79m0Hv6M%YgPOMh%<*t^>};p)qh8xD9w$p+n#tFbRad2CD&G
zfH}d6z;U1ta6ADE0H*+xYA6Pj16Qgs|2D5fJD>&_1YCa#?SMw$4&Yk@<$w!-?j#xl
zP65M!=NqsD&<flGg8vKC30wqb)M9>tW589wxek^D&H*NGLJ6R}0RPzh3<>}>z#!my
z3JL&?z#YK19wPzzfPlB)L_j;>{Btx2Gy`{mfYZnW+5wXWSPM7}+yw&OMjp@(nEV2r
z1LZ)$75wAah*1HjfMLM138Mm90h=?(15N?Mfafof2V4Ohn~?{c0)~Oe7AOkz0gmq=
z4`>AL0KUJ16@iPuj8<3%IM%Ap|G2O_3$F!gft!H)yBHbJ4BQ0*-h=i)J797S9RcOQ
z6~N{^tOwKp!$A1^@FCzbp#2&?1e^e_1I`y<72q^*8}M#h3MXpAoB)$R=m&5jpbK#S
z4H^P216n&80$M52$8u3Y>Tl5oa2xQxh$et@z$6gbi6($9!2BclA#fb%0~~(`qX4IX
zVZgHs#spe{dq8kE+y}S_%(w(20mp!=fZfNi5m5UvjCK<j?w4UCpc%Le1oXg2Ks#Xa
z35*1k16Kf>PhljW1{effufWBCM&J(M+l$!&E&#gU!-zmBa2e44fUyHLz#!mywE&f`
zqB3v?@V$o0zy(0}87c#%z-2)DZ&U_O0M`NMKFk1c8n_L3_d{u*3$XbdN&+>&AmI8(
z^aC^kcL3k(C<}BI;2)a-m=b6N?g7C!P!X5}w)_do0J=da1C#<+0GlCn0Mr13fa@3N
z0B8j60I`3DHG#W8z)egea0Rfrg?c~@FbEX5eu=rkEnx=G3fP$A*Z^DxJS}jB(Qw=Y
z+^um=0m^|c;GTjP=5h(94j2a1pI2T4Y`mciPz$KPm2~%kp8%eZKq(;jQ78(y`(pmf
zeRX)_11bZ<K(s#^0bG~C5rNbI$bf4gMh~cmpE*IW6p*|e9RR7ps0*k^lkI@_3X}y*
zR)PV$P)r-3g~2I-NkBcU^Ics4O;*E1;V?505CIJV*T*ogfaBw+2<Rf=fPhI98U%D}
z;3q&Ma1+q|7@Y&Pz)iqC8u`F55D<e^1Ka~V*Fpi{DiH7lt_$$bFc2M!r32W-p%K7l
z9W(?^1I-&y2Doa6)A&M^4a3oI1rEvCka6R46So95IQ@kJ>b=q4?wH#}Q1&QpoPc0p
z6OgeB<>z7`PeJiiST_apul`K8975r<Fh>;5lE7&cxUdv{Fc)(^4+ma7D*OcI^MJ`w
zK5`ov@JvQJICU{b2&g}`zhVxnLCND#P%Fl91KbD&yMP(%q38zO|3L8#@lYrphvq^E
zch=!93VbXRP6)mX)H>tL0yf`^qdy9_&w>I@@b38tOmMu~aws<&OED6T2-rdKoxtt2
zIPbcn&8@K1d{_hnD20+&fj+d+1*dL?l6L_2Vwfwy755lOJg71}F~6wLJ`c+coH2s=
z$MmY-7TW}6PVK<70Qb<5Ge+Zy(S!pz_VDVzp)O2(eKX=k8kQaq3FR{YZ4B*zbF45O
zuIL=))nA0Ss)r_hfcmMiNQ~YNnT|NM7ow47V8)XOy_ol4pzmwQ=p-4a1-gJ?z~&A*
z2etr(z$xG&Fbo8sV@Fsl5^&xPE2HC@^~eWKV-N+gTQH(c(DoJF;N&C1;kdXTI9&<T
z;mDW@lU;Uz34);%_{5i3V!$w<{RJHW$v`R43OHggJ28lI;GPRi3=}RxIY;={LRj);
zC;@B$+Gpt2>Sn-OlQpbaAQaF6dfmVN_5VJe`+pzL{a<}Nm-&D7@f>!P(?BcG4qOKM
zfSbS!>@w<eJN4^(;eh(wZYN;;+zwn0oB&P%%@ns(d&ot7cIOEE&(H4EX7$5ocMn^b
zW3TcCe)#-uX~9j1|9_w1Z3*Hn?uXCtAfE!7feXO?RT#u9eg}&wRt&<kZknfmHm49F
za@YpnOUJ`6l^>IBELWhbew8bDA*88{+>6X7KV`P|<O)MYEQd$~@dyg4eEkki_qfa)
zg|Md5hhAp63<Fg11fB~jKKVJzRmYT!{kE!<6VBpsiYj3J-z@v#QH+ukhFBg&ELCzk
zB8g%TALfKZ%$azCqSDh4<Q0!%t5ocVvzuc3IOg=jobQ3%$@oo~IBd|WK-g={Mc9m$
z9G}CSzEgN6SFG5>atBPV%Jut6gX1h`d$4RP1F#R}^zms6IdgtK?zpkv;C*_lIibFs
z3)VL?_xy%A|1IXJHs*vB=AuWrUiu$dzJ|tBd!2ydlp^LT1bZcq=P}y{alLpK*k5H-
z$eiZO2wbq0sq{8qW{V20XnTgaMMe@Mig(KthG9!o^@gyMDDHlOIc=Eh)p;|YJi_eu
zN6fz}Xe)CyfG3(tc1x5BRxFR4XL;-tvr{amN1bCiQ%2PDKFjeUk2kPfw3*rOSIlkL
z$y7gO1?%{t^S?MF8&AnphSNJNM~L?~;ZUg3N3cCAj=|Abv5z>N{uNHYhHXuy2bwd-
zN%?N<%PM_f4zs=ZR6(zdph7D6$Q+k!;({$tGj}4As0Kp55e{Jms_j{}Ph&aHxU7(d
zqq8dCf>Vd$O6jobBbLWm(E^>Bc(0QvFeK|UP%P6C$_?uWxnf%hvrjswn@N6A2d9UK
zB^yN1dhwYd3(k+JWp=4kjQOuFlY~Czx|f*8M1i57vD_BH9i}|P^0g0{6U0(ZkF)G{
zjM<}*%bV_FIYi=y-*uLUl+&Ys-N-IZXqn0EGr{b(k{j^LXL;gPmR)4TWq69NOq>1+
zbIWn&32WvkFG&w%zP6Y-#hTk2K)c5E-@l&|^pVVFmzcA0;xu-U$+E31AJaEkuH4A^
z2@<ON|H<jMWW?PF1ggHz3g&o;thuvU?lH&sRRO<TPB6t?fs$izN1?b0hjqmj;v*5V
zy!4-O`4%yKgTxQ7DNa9mi@D(`=Ca>2o4PUQOS{#-hW%9s*<H+m5}S*jV0r8d<|u?p
zRjwO%9EvT(B2$4ZcT2!2k^H<$PPZ1>Nj9lU@tKAaZqK&h4i`KxyWS}77E}ilF`OP4
z&a#J#$nP=tFK5oj<91d4n(P@p;$$s9m-0Q#WrNIN!^|OK(SnILIHCIkW+yT6SqY^v
z{+!+<k*~;^J9OI3=~EI=vc-urEjd5ob<Qusx!q_n+%PK6lWp6l-zXR8@@2a$`jQKz
z%kuH^=ZYa6oL(tjo++{4Z#}11wJ;}oF<+BaV;#nFh?Vf8%+?an%$`&-=0EN=?kH1C
zmRZLI)AE={B!F}-WjRKcVVMM?+zL*wm$^>-JIkha%timjTq)-ZJ?>nTCE}E<*8e~<
z7jS!wx!Y1IO2Fxm<<cS+s5r;@4O_XPju%)C%w(?cWww=N8HPo!I&_lr#6&2|XK|xw
zT>st6IYDp36^ou_?v~}_^gEWFnk8K#*pxJw_$N-UBIXF(XR8ivhgdE_M@rT&VeY-j
z>HWE~{}12;B~>7O5oh?x?p7}&PZY)CWf?UMaJj$;<~*4~C$Uh%=bYYC#%zoGZ`J;2
zDf83}srMc1uQDnn^!7-|?2v8M^b^icc$v9VoUTp0zEhTORXXQ;$w)_K*+$Ld^s+JL
z>buNQ$*f3S!QVI`O&Z9zVmU;X%?LjCQwru!uVV=`wkJ3}LYCnzIYXLBdv#~I9XDB3
z9x|n!FLHW>_*Ox-3nxrWGMm*h`-!IcG2BtM1dPr-oS(Lsx%&|3x7}ko{@2WL`=y+W
zd?J#|wLHS<`E2O|UEVcLuovejDVB;dM-~!LCZ;u)^fK900^jEHEt{FkWa{Fio`=RA
zr%1%?ao}=M#!U?KpKz25<VADF+4;;x(+3DSW=G&dD>W4j$2ouWx6ED=L8nSto=W3#
zWq)Bg<|USMpJVp9$mt_$YS6!KNJ6ElFBdp3`~6fE7wr87bA))Yy{wM@k2t+ZoY1eH
z<)$lAubTO!IBm!cmPce&MdZl(?@i#2tHoI&{vj3RlsY0EM94OpD~hFwZ;bqo%Mbm5
zIZd`#Co`7w#aD*J7xE=N;c40bn^L)8f*c+j#QRT5gC{Lm!KnpY&O(mmqe(1}iREnB
zsdNc)9;kSM^Bek@Q%*34eZXA#BJ8i`)Iz-2N1P^eg40_(nd`-=rsRgBZnMm#bXXE3
z7W|gWhsY+>^faeeO8aB7XXw{(x%dLf7?5QXlgbsMWbVpBS++mK6|N04cgnWh)+_1T
zm;+t8e5M>?tN56$%Pr&dk>|Nwr39$d^D>uC;_XveT)<SErtCA8$7LA>iXvX(?IjY+
zdyjHC7g-(I<t)eGkV>~>wJg7de^)6<&u-tBvnK<W)9;}B*4T5BWF#R=r=NcJ+X1SK
z!@J+k{MA@B>Din;=_rsfo&6qlsO*pbot^QY*<a9@%Kr90vQ_?c?dQp5oi;m~{+*rf
z@5}v9>(+0aPtgszO1J5}9~oE4zw=O2PsQuB!2<nPtZsVFo@bNx{d+WvB|YPRr0<dR
zT*;X3;mU}#<L}Eg{-gH|({-SgWKL^lYkcae-j$b<n~{<F%wC)epxV9l+Qs-uA8vAA
zTF&&F^roxn@+9x6|3jXY<Rzu<*(Yz{!|w<2O9ThVI$oPK^M{uaZX}=oP41>RqdYyV
z`IH^6IpDpV74h0!y}qj15c}R_yq2ujn^59U@QTt0DE}uY@F7)$<{Y%;_YH2}o3k@5
zCCfN{7?7@ZK7H~NZPs-DbT;z&rGv=-{wLZxA8j7j&rI3>Y)X3057X}$Wi&-*N=C+>
z?Pzb>EzVHUCT;eDgf(oby=mL0OX#Ox@%W)J4kgizcuRY;>0xElz$R#YjV3@&e>UzS
zNqG09YCZ|t>9$Xo{3-b)pyXnTQItdJ3EDgZ-VuBxLAzwXoh$Cxo0g_iFOF10HBq3<
z&DsK(sB*LRs6L50A-S1glG8J@l9C=z%Zc5)XM5VdeUY$Hs5f+<?k$Zho<uhX$m2=v
zMZG<>JP8wbQm>-<G^X<YL>61{W#0XixCN34<!#Y^4C>sXEz=Lu)E0DTLOzM8U`26>
zcysa7l&+*jRHo=FYEa}(-HP6%kwnd5ZJ|tCQdat&^xZpfY+w^BzKiIkULm=AXUgu>
zj5OnvEy75r>r7Xfp31xav~H{RA(XI9f(`K1{UoSsLS;%ap(du@ByGN>agJ0U;eZM0
zleGo*7oB-3F%eHAHD`LJ*HTC_;=^GJ&Q^NDKYqyer%Ytu|D`d6CQ;#u2VJN!S({;S
z^rv@|wMXXspVSS;FpzIZ(SAQ?r}3<W8QPbUlY_enH5?u$eVw8`0?l2vX>(`BP0y4G
zMQwxqb(FSEdkMK?+l<4r-HsRLno!twZIM2M(znA|Ca%+w?N~hZ)UX{jUZzf@&Mn2p
zZhSLmN>)}<PF_|TJ_?&7tD$r@-AJ;WLyoE1RhP*xj3Z9pmywo|x-BmU1@+Uu7Hzbf
zb(*njzcgdlDJ+$M?EQ>^9bs>8x~hi{@Hh&%Qb!tQzlDa<U{m#7XGEaPZHP@+TzjTV
zT_)=tC>2URJG9^GqiB2wT+D>Lo<d?W#XO}s7-~1tuBWt!2k=|t=GMzJJh8r?-hWDa
z6R!?V+{x~_Q+v|1c_-d8O8RuohLY2@`z_Skgz7K=-bSv{7Z~nrq=o6)d_(h2T9d9V
z*560DlKgtQHuwIHeMTMWiAPLq>1w(bYyaI$V+vle3O~^4woN+<Uaq|hUY}3#yI>YC
z%G(8X7gM#8OsH*_R$y~)u6pe$-io?A?b+>Ydp*TItvMU+xzOIHwIu~Q*Z(Myk%>sg
zYg8#+==L9Zxtup`vX8g2Qb}3Hc9VAG?pERI2gBd}A)h;z0zc%JF63d(%g_=G_vX@p
z4DAu6)R_!zi9zQ|H!`$2hC(;`E<=0Ua2YRo&D7?bW}yo`{ql_mZOzqhEOA6d=GU3p
zV@~Is#Luu3?Ae=?xhFO4hv5GwirWq2+@@@h;hYn_vRm71xD!cV@7CrTCVxzpd$e^1
zEt<CN(GD1Vqv?j?P0_SDOWSJ5il+Cow6%u9XquUga>t`-YqsWZsEMZ66rYNwPqLBU
z8clQeqF!G#y}nn=e?Wg&W?vUW^Y>|o4X!ct);_K10sU^th>oF0a<tusoiX%g4r-Ri
z(A->Yo8eRp1!Y_AHC&9LGuf8g4A*05^D}5bx0Y@^gM7QS^xa;|XAA*r>HTM+VeDF(
zvtN6~aBwZ1*{{7#cEgqyl$NKhH*}#y9!d<N!~xX2x0Vha(DoYao}jm%Q7t?{yPiYK
zTb`h=pVM|2_CG<E`;mU)3Hs!qcEr&B1Z~bo{>>-oM!vR#&W)mFvjdjf4UVz2`+((c
zLqIIuIAFQa5FJa03NVJOSo*F2HI73r)Q%e3W682eJ7l;SOGk^eXACBB^g%xAImgkG
zLzV{&;c@i#A<GvG8F6&rd9D5d{WD^mQ*pGo7!@wY(VN9k<u)pmXor8Kt<%xdMV5I6
z-*vR)1r*q@j<z0#wQ|<coEOpaiFNeGix}g%b#&=@^l*J09WBM!?yaLU#g<15&g&_t
z1Y-<cPv4cHeCm2SbOh}buctFFSgtZe`cu$LaK~foX}97N>uK|gmi~s?_4N5maLd*W
zbmm3NwT6ou=zGOiH_+Nr)EnGDpO<T^47WGX+zR-^<ObSZf!Vc-ryELkkEbO^;djCD
zbmS=JVnaNAeH86)iKowxSgtc<A^#<CaXejm8TmDkk7<t?8o}k3I}BZD=NQrl<LNsk
zPsY>Rm5^;VlI1Jf3x<G=wDu_MuxTUBd=>rXY@`FPBGT;NNN<4kzoCv-u?j3`=v4&6
zkxRxEh?DPj^^Trrr^CvtWPKdjdq1Ae-mUVVo!$-_DeO4n-LEMTrSu+@dmK%gP}Onl
z1Bf~*uaWv8-9NR1m-rKJo7j_c6}G77pQ5?7+HCWP{kkQXWCH@mUnr(ZTZ+F+%&*cG
z84hlwx2mwBiZ{{?#Tdb?*Nh{0FX%M{2DM~&Z?c+&ZBH{%^YOw7ZLhwICQhLD6Xa5j
zjo|{tRHH=`%CCkJ$DLV;D{nk#H`9Cis8m2bDzjK(?U~m#y9d1IBeE|-WG|dYb6-cN
zSLV?TZ<D!({g2c4-X`-6HS_6(*R^lZa^6pk9SnI}+tY_X)gDAr(J!p+XwM(4O|5i$
z)z~mJ<1hDMv#)_tp5#@dJ&TR9z6P&_{xjXGL69~hvy&hld7U)wAW<h_%3o9ZNo>Ey
zolhBP4^^U&N7ZY_aUhgu;M}$+BO@gzEopn!blm;n*t>!TR4e$60M+JdvVQ|T{1=6Q
z=1%Vh|K1GLJGU*Yzfsz{(xEquA@{x4-_X|R0|VrMnv@lr%csqMAKu<kaafmxmtybU
zo|T8235e6L6AvP%T68&^!a#<Ad9<TeJD?Asp<0~Ae^4`+^mW=Vakguz`_E)+>aEkB
z)B97*o0@~myX$1k>e#8qos*t9eJD_-v7)j!asJX%^v|?K5BzXe$eBlnex|K4?4L)M
zex@zNab)ag+LHz(uRW!$xo^L5VD#zFv{}@8N^@NuEc<Ip=Dw$7ll?)@i3OrUN@^+&
zYAG3Md(+_7Nz<bp#uf=n(982pZOOY{o1s5Sf%TYx8Sf?3<G89%q58M9c^*F;i6eGz
z$Ho+kLtReVUgLY)e>fZ3VMr!4`j$3l<`2i@nPmNQ%;Q7k1+rJ)0z+?@dgD=G|MZeQ
zMkznnW*cml(Se_1Kf}*2y#8}-&MfUmYX9b!o~wDD_n`U+pDrO!mHG1L+GmEwIrPD4
zZK0ug4t=S(bq-lJfX~e#f5jK(&{oCmbLa)d7w6C!#a(mgvj)v&8GaInon8GFgh97*
zWzy=<ByayU(GlSh;g3g##_ZXhnv%EDH7qPt{d^9W?VCeO-qscy2ItV`x3R2m&!HoV
z@6Dn2Rl3fBzErZg1I_vcI>RqEtWa#@KsyxMInYtXjt=y$VrK`sq1e@d?rQ|QJJ1rv
zo({B8v9|*qQtazMZz&FNpidMBJJ7d{=r7!X<~O0gSO;3y1bGw6D^7Nxw-xVnpwAR%
zIgsHDct6@dgYv}=v{lLF4s=AxCmiT~#itzTt21c-v;#f(OK_tD1^p7~=NxF)FEu<z
zo{kWg(Sge7bD+<EiFh}N3eDhQ2U?@}jsqQ5d=CvP#t(3Oqj-iREp7p8j<l@>JA|_%
zy{*_A`7P>{x=}10>PV~J(KZ@3IMVCy;OMf`k-mNh1@}9W|F0mIJJOL~p`qiB^r7M!
zNBU0jDMxyw72N1ZyA-!N(wmAeIMU~eFFMkLXTg^pX^rBmj&xY@bw_$%@l7bA_%;-M
zS8aukbl_ce#B!of-o+N_=tK{`2X=R&jqhnI48BhEn&MC=`s_XE7wbfG&#A51iQYVi
z`Z-SY%{kOBb)v`4gKM1V_4C*|PoX`<jZS2EALDFyqRsE)&@t>pUnzOgiB|m@F~XZn
zFCZ#8zvpp5`<LEDeQgp83BMMQm8RCC3SiT7(q&wRQ6C^`2T|?^_;dA%Kx65B=}+xW
z!GoaP(~EyKHGP1vT8qP|I%IF!o|O}}J!eD8Hr=!8n2dIG-WK%r2UwE67G(JwaDW9Z
zQ5<YRYk#B7G6c_~_X|zu7((aKXN9J7=Y|`DQ_8kIdvlUfw&$ep!IShk*q4gYTI4+X
z;5S&h(er3Yk?DMz{0+9$T64P5Xg#aoC<=JvmJy2lTaVRya(CmFLH*!?%J==B`RmgU
zsG(2i2XOvl>3jF(Bs~@zkrcH%6%WjfcZjNfFz2t$%~ZKcu^HA;h$*p=Vf52>ZG5qA
zkx_x@?dho-_Cy=Q<=T|YG*|?;)3D(q(_ZOE?{{b(55!F;MEcR!|64+=AI<+zl~^NH
zHu=%U54ErderRK-AHDe@W<JZ0K2yBkk7oWBT<AwDz!a&swxImq;!ttSkKR%_C;aGh
zm2+C<*k44>X+K(Z5jo9%^o-(je)P8Dc0anIxXX_o>_m-o2Goe_M2$W_I)D_zpdX!4
zJnTnbD8A!Ivpxdf^P@Fj>iQ9ClzoI6GyLftm81F7mx}HDY0mG!&i=GkvAaJVR_u-2
zcf|qz^p)aJf129`j>S_3#hd);1;xq!^uFSq{`9rtEPtBc4c_lh8x<G&(-Fm`{`7(3
zWB&Aw;uHR~@DjMzpEfH#jUN<I-0V*uDn92=-zskRr^O$GyZmXZ;w%32isC+h`bhDh
zKYgco*q@eM2H)|gncbEcxj(H?tiuMQ(q}BA?-gUC`CO&jEhEcIU}tPjJxF(7MxQA5
zUPf=L^nhh_LviRb+Vu${N8~bE^Dz$Hv8b=)P0Q&0k1ZD)wk)F+pCWSXTt<s8<ItV6
zjCLyyo=Y>YAiZ!ItyNsUjDmWQeqtFNQhaI|&FMvY^D^41_yXGh1nFJN=&0hWXz%w(
zA4GeKZ!e?GpCa9EF1?}HdoBh20qOV9o??>#`cSbJK=)k*I|k4i#qI%gRIzUW{a$fs
z0L{7vjt-#BiZ=z&8;Vl{=nKVJ0krTl@WBAurMNVJ&L}<}K;I~?4Iuyj1~&%K0mbJ6
z=mW(U1IW+^z7jx>DZU;+M-&eS&?k!T2GE23U|k?>RBRqdulM6d&^3_0Q?hp;t@s=q
z97u;1M+VYIiZ=w(%s+yY18J?|j6ixt@%}*iOmT4_&AARf7D!ta*96j=icbg9mx@~h
zY4HHKJ&<-Qz8pyJDDDfSZwJu-9q4ld@+9<8JR^v1D7Fcr`F{dC2hk42o<a1s;(#Ff
zT5)&~Ju(Q64WefhZwaFJ74Hn9?-l0+(W)VEVGzBbxIBn14dE166GXGV05=BFHpLf$
z=#1jaLG;}hDAyN6EB*|=8AOK_yUwML6yFJ=nK!|cL9|x!jOFx-Vw>gknPTVVH0KuB
zb2)8Q9I%|;R2;sXzEm8$oECqH@$OtsN4|udvz$Ira`|#v@)yV_meXFvr<T*Ze?j_%
z<uq$p>9w4;4I{m8IlZO$=5qQ<@g39~0Z*cy;u*p8u40>D`fddEJ%efUSC9jO>5Z?@
zPGm5BrQ{94wB)bg<Y1aJ3_s2Yrmc$iqn=7H4yF~i!N-DW@d(muf@$}NCF~VU?<@IS
zFnzE1VlcfvigH(i>4xI#!F1^>qz?y^<zK;fgX!(Bp@(@0&HNkscMPHLRl0i!z5X|p
z_YI*NibF%_(ru(ihmhr{<yJ##2<;w4e>oxa{wUH5QBQF>>W!iN3Dg@yyUiiA;v2Nv
z9zsXHLHgwo`b2Rb>irGnZ-&suza#%{2)+4tr0Z7Dmx|3-(BivbyA`ziF7n-1(ECdE
zT|wU~4qZX3#=+4m=mo`_R?sEIsVm6xTX5D2TBrEn3VKa(=?eN>@$nTj_aETe6|_xp
z;|h99@wpZBmEwylXvsbBl@+vC@%0t-uHxYp^qu0nD`>?8Shtc6D>h$AA1St5Ni)9#
zyRM|QioI9TE8juy(3N!KJH$)pCG_Awai)x1NkRWa{)UxwNOAH?`cQGkO1f_nyniLF
zQCz%|jw(L3l76qaW+ly<0-s(<n-#aNq&F0|ucR*&UtUQIzX$iNq+N<{uB0=H@2sS6
z6i==s|9^pJgwg@UHlg&vzm)%lR?W5EY6uUlI%Ey+52amGn3XM|bVl*cP+Fr$c3~*J
ztw(-&D1EK?L@0g!J@QY5(%gT6n?vb+1JW;q()WtHLTS~Hz*j@*1;vA*bV>2;P_i_!
zUSpUHrOhUgXN1ukifzK^3&qZ1w9pjn8AiJl2ZYfX#o=M}jpEoa^1lzfC5#Rz-Wf(8
zD9#BZ!~NjGFnUaJc^Dm0d?JiKQG6<l9-IMg4x^2VFND$Sio3$-hT^MXG~Wz77)Co3
z-wva<!E|D;W<e8X*2@egtLTxL$kA5OGm0Ho(ff+sSJC&1eOJ+{2f(4L=mo{mtLT#A
zO{>V#9Gtp})+x?fMXxD7xQaekT)K+pT7ZwQqHT(6SJ7LF8&}a+iqEa0C6?fet7xy{
zE34>T#n)HScZ!Es(F!Z@-Bom0v2Hbeq}Y5l&D6klt7)xb*VPncVFkZjO@|Z*uckTH
zNRM1iTNQ6uO=~QXp1hilD$ZC<iyuV#{?)Wwaq()}Y=!h=tLY8JHE8c4q@PB6id$FH
zhl<-*(|tDJ%V^KW`Z2@x)$|HvI+q7a4%xtxchR#gQtS>OCEXS&=Hc`vQVe$C^rd3g
za9TVI>>W<K6$gjYJBlO2>08Ac!YODrI60gSDb5I|4;AkZr~B-{#o@F@@v(3^s<<Yc
zey{j+IL)#Lw}#VZ#qHt$i?w%;ui^UM$Im%}ByyVwxt@qrU4k@-h$G^Jh|nbBQbk=7
z2@)bXk{}d`mKIf|F3Y|2tu3l<)e3cqR@+dNR#g?HE)7L#6;<^2teLYXlab^5`QzvH
z8Y^ekv!1>8+H3FGv-h4ETE+5kJpIb@R6I2fKy+5bQ$EZ4@wAtvb60W=gx;qsrLYX?
zN^i4l(UnfJjP6PSL6GrXX$Z^IuJkU;L0#zz%dD;xW`-|{x>CLwd^5Y!UY1M18w|Ow
zEA?Yp3OknNUFl{p>`rv0)(z15t|L8`P6_lU%b*169ReAdK-)s3&c4=!%6s@$$=2l5
z9$kCH_2@$HT=A+$Aq^!zQ&Iw18iKKZ0!?j*oMa`?SNyv$fm}l&OA;srl8RnMovaK+
zoh(V9ldM^rKmpG{Zb_gaEO#f+yDZBS=*lz5!}$bCXaspbf!=R~^G@C98cVNk6#gvA
z64H$(v5e?OUp<TSG2O_uF=S#lN@3Zz8@<gkts9+WnbVB|nn<0E?Pzcl$=BTS4f!4p
zvz@u}hup-%%yrzCVD++z#Iu(>6>&ikE=}c=trRgp5!0GVyzKQ6e%0P*pWw8u^=?29
zno6#<yV%d%;xl2*aONIve0YLr^ZzhGl-f*k#!cmbX3{gpZ8U|2H?3)gWIED8{xu7)
z%aa-y*OPk~k||{2M`bM~C)|0Zi!58CXjquk*fhDpj@e<7tC4RN7B|NULlfHDTnfb{
zV#lw|rJFU7&*RTYJQZq;kQ#gR;`>$<SN<@t6Sa*%f=)CbLTX*PKFJGAPP8;aF8Tfl
z<i&|F&B;Y}ZzZ2>+e$w9LMz#Nq9Rr*C-*47E+|Ns){6gF_|SmXF!+$B;8*R85XC8H
zDzJEYxHX*e4*k$tYGB?YZ-c^pIaa0<atac7xmm0@ogt}>)S%(NRUv(b7xqif8K<mD
zn~?i`)T@p3jOjrDjc+40#;X0wHpu>9ni44m(y`Muo#}cTB-oh#Zi9SnAakVD#yFpP
zMFKUIQO%7Bv_2BLb#$DCnf{5Co}>GbxT9;^7ALyV;I{DU40^LIOe1MOpBz9}`PVOG
zj)LweB}bvSeQ9iz)WmeymzG6IBTN;(WNIgcneO;fILn8=G_ak7TO~i5%hJb>K5vK6
zjqsyd(fDokBTKYoG4=JM@zGMODaVgK<=@l&=uehQ{3yIV8eu8)?eY7VA8lh<;YT;y
zOHrnWe$?24-(LPS%mVus{`9T|_#}V2#=o=tDXat1o9R!v9e`WmPa8Vmd@1z&`>;PX
zjsedZe;UH_x<4&u`Ou$^vUCfeS{;FJ8bAph@jE7frglWWlLBa4N7$zY&`thr3!u<g
zq%$vI$B0<zu3N)jF)=AmzjU$Lc(x)TI|s9&mGjH5)0F2Wthu_;vgf6I<0f)Xk^(JT
z$17HthXs0!92lr@LrFJf{zaY{$K2@1ynHNc8;l9|a4+vh!;++qb&NyoCr)*v4N2%~
zZlMcFmU^3<5@|@XG@wo|0SHQ@&y%@dOr+2hDdpMnQ2Wv%HmG14Ivo=;<I%3WiTTV@
z)>C?4MA()T$<4Gjl=h}b!I*_Vn}RU-l#D$g-lu1KBF6ultQ13;)Rren<Si8(ywfP1
zkp(mZNaJLBj}2ZfRtzR^>I#0iXcWWg0we#2uTesLLk&|UU;ifR(oIHz4HHOWns^=c
zN|ksg#h6q?WzcCljnx|3IMA~W?M_9Ts6k(*B6SzKnu<_%CUY-v#!$yzl5hLhCMcO;
zC+B4F`gsB7f0Q+xI~tv`kt#!NwW0JTSWM-Kw5gY5p~RzDKK#2El0T71!Mzb2rxK}m
zZz<e#CXrt0ji9-dNPGEr1r}#}OFinhsLi!&v)4FQXzF4nERj<BAn?jVX>K1W*i;%y
zoBBwN1MXwnC?>$fQa3W8ED^5_-EkB68m1ClOkPCAI8pM8D5(>TeNn<}yO_{RrA0U$
zFgQjoYd*Gt@Y+W92z%Tl2GhWPo+C_|m<s5Rcu#FW`TeE0UDU~aiu>HV9)%8&yq(24
z+V`TTpF2<VbFgq7J5%}qX{2dRKRPr3oi#5)50C<|n6<-oAV_P?>BohjZwE;A=%qoD
zJ93#lNWzMcsPj=Ye-J7rm9`F&nwyG(>D(a1>bziz9Snc22&S7ad1B@^m^KWCKg)vY
z%3%2FOz@7zL!{nD|MeO6__Gz}W4V7QUsgC@YiRyZxaL3FGF0l1_r|)UNzd^tl_gCI
z3m9Zy4ayjiGp+!895|$4gIC1T)HF;#zTL?l9=utlpfEE#ud+4QwAyvUb2AlHrLKk0
zW#<jEuVtxgAHU_>Gn$cu?OoWhgr6ehM({ob%(=?hNzVy`u2R|BPKw4ddgzF3o_kac
z*HFiFw1KfS03!BhJ91pEifs)hc~2Ci{8?_^yTa=&BXZJ5;#6y~{RghumX2P05S9tk
zrTPK<32$IkDC1@!Sm7d%VN(72KIrri$ntI;v56$wnJmMklX$DGlp#IWs;}M4@Y1kx
z7)pqSyRf5D+JZv)w0b^+#%4&q0Vb_8kVQ^;9#ZbxfZolJ61@IiB%6<r{fIy5eumV@
zZ<ca)sB(H}ZXUY;))bj3StIfk&Jp(d&K1G8+rE29QE05zW=b;~@>}QFUwPvSZQ}|@
z<mHb^=Mjb4zqO_0;b?cy(O3x6_E1_pT$*G&vqKtzfW}vaTa84=gWWh;&Vh*UX(J_H
z;|W?i5)~Fr>3lMaj`Od{bc2yOB+YaVtZ^&ONY7<C*Q<dS9n$iVlE3j3r9*)(?9D9c
zd9PdYwsKys%+JWeWrixU@VUGQa>>S}I*r<AOWllKG!4H@kqu~DHd^IOH|jUi6W0uJ
z5t-vgtE!xs=SEkmoLJ~a;aO}VrmUB^(fBOSI9GXPfm>J^#f?UaCu!_xsZH%xZRJ{_
zV+Xu_Xy0hb#dw4j4e8ow$%8ZUV6=47{8ta&BFnqAlzleb2t-(f(~TSqZT+JKBt6HL
zmCk!bRmgs7I7ad<eo=mwf?3AdY{T=bK<diR%t4itHvt#np`wYo$h(KxMpjg@`AmLV
z1ggyJk=T@^+6-+kvLY*lO)a^DxlVhK7xQ{GFGT{RW_K~&=Iqu~AqBSd3@rU>N;Fzz
z*M=QOnxsEgvnwmA*t{gNJ1i$XcXVZT$Eu!_6~aczu94o%MRxbM6GckT$;HZtHZQTX
zACTggDwEE~x^5N76_NBvbhcTQNpDp>B`d1f%o8>lx%ljKrOh{Lc4dWZgW4J>dpFyj
zPgF{JK}L2q;!W-JJ{7vcyeD)^g$@x`FcF`L)A)KKZOuay97_lDq^LSZai#dD3Ax%Z
zwvMA#HYv7pu{xF}+R!-7I~Lib2{nuj$-NK`ym$eOas*Eh@u}ALFBAtz@sGc%5D}j$
zWXVNYmt8PHiBA!p^D1pbe5y7Ds*Mt#!e(@(jT(B;g~|@Rr5Y)5sdZol{Zl9f`X5yx
za$Kqdps%RqIH>oiY9%&BvY${o)VOb$G7kQ@SLqKm>O_lnr>%e(zomoBx4x2(<4R3#
z4LxG@bXBdymPl?gwHlA)hEei(B<EdOW;JR=a&2iEApU>-AzZBN5fnsYX)6<Tqa!Sm
z=_-Wz84Vxeg@aHX>@A{(6Que^<BsSFQr{Y_e*0Ic*7U->%wai|ozjkn>iHa2i0sU!
zJ(Ca(5388T38;~LxT>jW0&)euQ>hz~Bez0>I%eu(=j0np?4QW91JsC-YAdZgC$p7D
zYh+i=)2o#JvgB*}TcSxXO96iG_$xTRn2f{|1DUN_%Xy%XNuGnqd9t*<|0%_rXovQl
zk0`nD=r!T#$}y_ks^CKd9$7GmQ6ul1?o)_LAK4chW?!_nio#XPXNuGp8>O<RNYDEe
zTvlADY-Lq^MCDVY==hQHLmoqsH@Q1kOXx2@{xVcmh%7a#dR{3_{8XuL+gI<)X<)2h
zwdH+xPf1%SV4{wogXzdr>BY9;io4~TflQ!9Vr?EMriwzzwy6DN8c-~C3n`U{ze9)S
zW5j2T(EWool@&|<Tg3h)6UjW|G!FJq&MS{?jN|7jU=d#;O>etH$+dESqOpjum&UG;
zEku^e=urup&{Qo14=-?m1!NPonueHdqJ`vSR$-Z^Ng3U%da-PLSL2KkY*Oi{_FBGi
z@-rT)6EpO>p*gN<-|!wCpN7U7OgC5@B$rpDz{uCDSm2QZB*p_e4!bSY`5QUe(f<{x
z!f1@8xam@0Lw<X^(ut25Hf(g|l~7ddXqo~T#$`((%zsxwE1jVjK-Qt-(<ST#6PD$4
zn-e%!)kM9H3fLlQIRmgwahiOH%ZbKsuTVB%9hY>`z|`?TrF+Y@G|HeLM`Z9S?Vq8|
z;4r$tDZi{WQ5{f8*^|t#0hU!IWl@YuSREPwSjR@%;;JKnN?53thWEqcUPQPenzn+%
zxS5VX_<gBG;*Ks`4tj&})7PbArdS{Q9R>Eh^F9|_`bcF$jceXd=&}nmS|tyx4{AUj
zv!#@-O$BbuFlG877kfJ3U*WJmO%!Jj<y#vt6{gGEJ!C!el;UurHM5cDNwjA+ZdkfE
zS1j;M5AZev4=fnz*N$$^mdxD{9LkNm$i6mz;-+3?ziRH49gSaj$=UDyhLpk^58i!4
z3a#%}qw@ZXIeGxo6r=1}p24OT4e01R*MTPI;2r(uNPCUu=hBrNCa33*j^k%#()-A{
zG$EJ4)M~C29IAE!v+P)&(7~VGg<r~cjGZgx8BGxl=-0QTP}=<mzTa}Jd7V10y~OU<
zYt()o?g)BPHj6>DbRO=GvS|+sta9*4Z<6NYHb0`lj@bFqO`}iB4JBvjud=Z*SMF)Z
zx<HEY`Q@gn$+fGDG!Gi%B|5SIZtq`DxjP%4J3c!fJBzpvW4BxD(c=YDeN#|7s=rVQ
zwOAEgI&hdHQ1%D$ZCp@0)n2aa?DQOLAmVGKyHwj7@BXSs`3t2H#);VfD?QgX-k#h9
zJC8_?zx=^BiRl?*GIKCJC2F6u;zS)6q3pj?`XUTW#@lyhX2>_qmCwK|qWMtQdmgjm
z=#=EmkfJ{xNqg9`zp%7#=oEEQPB#|eRx_O37h_hcs2+tcmI8{C>^U8po<B0fU>GlQ
znyNWJUOumcrNAq2<%XLbAn-U3xIuna<Q0Tjh*M?g31=1AxluIwda>jlV0V}Bmh4p<
z46$M6=3;5FDa3=0F7iBND)FFYi#@+I^*TxyI!JY?kUT%D)9X8XT}F74Wr^oO`fQox
zVk+_^kENbRO~*Xx+)|iEc+rMs_+8{hla}*&FY2+v^C+#`iq)EDS9<O<&68-=JJL~;
z!JBedL)yG4c8zq1mh1$g-n-I1lc6r1d{;VTDz8iL{tt)<AIe_~8B(7*zlZZP>yyX(
z(s<Ll`t;iS(kRn``t;}fQoiYYeH!rr=C>;9)2a`o@tD<$+l1}Y;p=d-`?xN}uERW@
zvo8%<Cv7st_+k^P^sXt#m$t3P99E$(-CPgb5?^Y(0cmBq)7=fy6pvmabll|7!5-{q
z%vQLJuT(<|voM6n%tp_dFKXXH`#(hOC(wlt(HXr=k3K}GMAuRbIL2&+nd)FRl3H#A
zIGvI=Vhm6(R6&g?WE@YG6sSl)n#RaUv}U8!0khUSup@66KR1XHV%|>l+cPP9lN1<`
zX0LeL$T8fyY#b0=&uR5&)h5h_IFbKmOpe?Sq1esvNtt=akj+x4vDg`X4{tLZT9iE$
z-Hh6L8`Z2lKS^^Qf7<bwvDTb-GFG0~;+w(`3csxiJ_gT4<9sm^whh%9SkcB50}QD(
z3B=c_Jg?y0rB8h@@o6uBJ^ylm^LbGKoM*Uveh1F;Kjtrz^Iu%45G8=3ih(Er^2zy8
ziR>j1`B%@20u(9vSI>(A;JlsxS;;_;wqi*lu^t6%lL7+D?4zcHRJm)47lCNN?z3%}
ziS8?UjtO#KP{1+3SsGN2=E6QI)2^Rjj{>33vY*G(Rt2NS^KMf^wpg~a)uVIUq)>CA
z%FSt`bJaY_Rzc>E(SIWJ6oI8U-+En(a0k)apx$XT1uND(8F86DVeuUO^s&^S_EgNZ
zqwUZyX>LK4^UV?T>~<_e1fg2FVd0`Z)Sl~f5!%mF`gXLa3Yx;ggO;-BL!ZFTWOb*L
z+j(_GZ1(K$PL@wFQw?9{euCcD?n}EXm1^7RAYS-<b@8(6iFi?X_<VKY!sqSb@)E~|
z8ZWG<E?)S2b@9UI_3_deEqXW3BN&E#Dm5M^f<cJ|C1d<Wi$O|7a*uDn9=l5o(G%E%
zL&Sz0mxZ9S2ZtIP;t4ne#<fo|X<1lrhsO?SuE{u(R_;Uvbfuj;F|~P;jJx=*ocwl4
zP4SRpobs#xE=+=Uqlvpv|6^(8E_8+W8Y*|U86z-Jgz}1u>LxnE>UZR~W$>hXAtrcR
zBVWqOK`K3h+B{aZ&MC|oFW-R2i2zwZtv*AJ)>1Nrf1~C~IwR5xcvH1{^AbbT7&Vnv
zvUrPjLYQmFFB=;=)RtF}t*pDT2bGl1)9ufYv3jAJd}Bl;)Spnp-B2%VR;eDTQIDry
zP@4|)+c9dlG}Yw)ic!H0O~=C?crU7;i6!Gc(B!H2{#p4$(Vm3bms&xMF-!M-xC%6&
z?0s@cXY7;um=5=&)BCWRbpnrMm0_{qOh1Y$!|zM|XhfN`*!VR)DuYoDnfF7?qK^Be
z0^H+o-7nq4V)^>dQGSa(R|ALV=k>+#9)U1aB;A58e2$zM{!^M}mpt3V<n&5^no0Ts
zdD%!UzX0leI}uM>png=Z&)qTj3+XMRaUtC}fGO}Tp%nb36kyy-?Y~5xKBNI(O3(Az
z_rH{u8God>ucVi-e7^E4RJn7y-LrX^sm#maRT=dO&+~NrE7<p;+g~9Mb;<o}7~GIw
zLoyW8C+UUb3P#wl+A>1KNG*!{8pv%l_-ojGK~q@#vSa1f82<Xdir2kz?~{>}p$
z4=kh$2T?ZPEJe+{(98WTkJa~)bO@a9Q%e@P)cp`RlPH^ijiDL*>xCWf9g>zCjcyco
z7(b(Tq#wr8f@wFNNH`*ep}p)sf);XuE+4_FCC+*rm9Ck}vgyH5DZp}Il>J(lUzjm8
zdlFwR)WxH6tft7<N7fala~GrP4(CwHw^BflGR)I5Mm7h82Ls*FEeKvgE6xi}K`G9I
zZ`-%h2=#emv>9~q2Tx0Z>Zx_|sWqq3<EmA^OmW+b{M0lS;fGsWN671x$f{u><B_%Q
z$*Dbi_Z=GFwNIDc-BbJa>^=1PuDw&bCbel@Ij}mFExTXl>5`XQke7o=D{%2;k*mRG
zPbWKbc$chn3OFrEu1R7+^Z;G>NeV4iM`;ffaNnE)vAKgk`d9h1_8nD^Su%O^hx{B>
zo`@rZk{Q_Z{t;P+udOK=xvAR9cXj+qf=Ffcw-eq3YFA=&L3S=S64>8ez^SV*_c)_q
z`s55BZUgmjaVK|(9-WdJ=XMiA&BID2P=Y>%nThfX1(?Ss^pSR+;e|=*1%+Mtk!8-Q
zO)N!!r`mJ9*q;~cTEtGx6<)>ml2cN!N1+%XoPd{{sMjgXUgQhszE2HLNujRxLZTq_
z=tn8Am7o}<WFr|b`N-~!NACOPy{ObtzO^;7qnf4B)*rD_-dflN)2tt*;NnP8oE1uT
zle#3Q+mwk*)PH1UXRuDucP~JFWoIVgej33R%^!(V>V?0#g63`4ci~6mFjXPfomGq*
z4sh`hkn*b~x)-2!s9t(~A5R(ef?!IdBPXQ*&%!7>^D()Elola18`edH!E_}#ek%n(
z1H+@EY*Cb=N?q^aN|a#w)u4`b!YcK0e8IMr;LWFoC#AYBA}^ch(GOB!viLbqu|(AN
z#e<9q^6IGUCKODhUQ2_?7%e}*BL}Id&u#SP4`?iX1m{#0ubVH%z)^5cRw^0kR>mSK
zH%f5K)#%gn3&+{I<INZ^%5GIT#;X`^6~&dW^bm;*sRD@0%*+P{RV-W+qh_>c*W@m#
zT&Mz(0XJ2boR>K+XSmW{<Q!y+i0iNN14FY;NZzh^Q??R4tEuG)$ybs`x$+CNmD3CP
zba1X3#P6!fC;_MxlPlNIS`;H(O{TJ4BCAnC|B9;TcFD8zvY(<vR!V8j_fi8j0%p^c
z@6m45l3HoN_tG=ZT2z)U<MQ)|=N9&rZ&ukUYM;50+`q@|qq=tfEC2Q_1|~ve<}0;8
z_D&bB;btZDWqQ5hca^efWj6B0j6o-rl$|>~PR>PysJG6Q9I0?v<2>-^imPvws*IDV
zMZyEA)pt^T&j=B<d*y`Wy)a61RpWOTT{sR0jTDx6VaIXwUsmzEH?5I>i%{84Q;ti4
z9>U#zat0_U!^^L^LG6talyd2xwZ&%vl)#ut?#H#gSCWE{m!Ac!d^R^jB<igcvkN8^
z3WxU@jyGXs<>f2OGU~>NOIi%a23j^ce6=7al{6HNuH#YXeGsov2CGP9v7I1qj4eHX
zxTX`u6NkbZ=j<5X5M#fnpdnXw)0x!pm=sVem#?`Q%8<KI8FJqVQwB)-A-9_{<W5nB
z+(VTi_xJYkd38hXEy|GlazlkrzE#kSsb?rd?#yPDYVD9aK@7PGL+)p>QaEq7)ZgUa
z+deeakGwtPk$0<34o2R;$!}Q0ln4gwJR%ov`b-{*oBr^oQTs5GEhNoY{MkC{W5`tp
z<xq%6K$X!svPW~uB%6_xL3#`i(tV5h-Qg2lS{X5&Y{32U9B0FniI^fWM9;vx{7do0
z68zo&JpQx)s}H#pV0<+KBP!(||G{k?Gv|g7Luwj?VMLeZpyPjThIMu%=wOE^S`}_T
zW1xb&^;^>~arJA}%o{e!kV(^Sd0KF5&i`Z#8E<<wcgo6*GE&5C&k;2JbKfws-K+0H
z?f%0jqpg2=)}${1sd?<VaFnpsZO=ZG_@}2wH9#kfetY%z_N5hndivD#8y}8|QyK64
zm#1e{d_8J#&$Fg+HFf<9)Xw9=F`F$z%m4E9t_qd&;Dis3yWBxWm*glJeSzM*<LOlu
z=|g3=@rCkIl8TUtOLshdoJwnk%jN1zsee1j!*zP|ZxGMhERbGz9b};i^}5Tx+S1lY
zC3igwtETRcJEE1f>iSa5JtUl!4v@{=dpPa9r%%`)bAKI8KziK=a?7hIQR=@^Ce69;
z>89u0P6zKJ&116wLYf)$(m#6N+l%c<Q~ILVTmAv3VWX183~K&BpSs<jpVR!ofR{V~
zyzDiwmzlgX>EQ#^d6f{b*VJ;lkP3L;e>ul>z#h@cdxQ(W%PgGZib_8B4Ohd)Zj(Xo
z50Uqx`6BP{JoI$0%I=SsfL-Vg;yVvPe15CQcHSdTNss)6wmt$~S|RApKl044s<#)b
zh^9RTsQ<@c+Va?QY*momRhy{Se*m4I!x{U}v!E)-p8EmR@VwOADRCx1splnaZT}YG
z-G)3aNZw9{no5o{Y32n<7mof$&qc>k%jYjhh=;kLkzG9KSL_V0;$nNQl4$>UuE1Xb
zKeipbGQ7n_srt5twabgh->tVm9P$^!=b<BH7NzHdIQ0^UmwW(X8GiVZ17as~xeVgf
zCF~szx#gFoCn{q!87FhKT?XeZj`j2Rkp<fosrrWAmOi=y&Zg(r*kj%Ln*%R&rzyVy
z>9t<TXIAaJT*nbsLdg{XSB&GVK0pS}S3Fs$@|#jHMO{U0m0m>ha*;M))z_B2)^q8`
zRWO!b0CL`QXb~mXo@7L)=!6$(?YRcz{h}!2A}fHYzdI<=H}vN3T%yaMl&du4`jg0c
ze%>`{@O9v_Tu^d3lYd-ykjY=D<qaT9u8897yWs$NjUF-5HUSRuGJ21yS*<VsI4?Dz
zD>vbx`)6W}bnFj!GVPBllyb=L`U8-(^9p2KwLv_c;tCKKb8aCn(#|Q!5#)MX9~btR
zZ$aH}BY&kA1oGVND#?m#kQ2{&{Rw31MIaHsS<ZN=O*Mt_H|Ei829``x+J&~=g3TXW
z6u$fm`gv0n{YZN0j=nnlv5oyB?j5(>K`~bEQ9^MfJ@>by>!9sXBav|k;3a<p?mSr#
z|K|w)DYd_gEHuTJmbok=>D#+cX5l?@zXy=tO%zxThj;GjBT;dp>7Qrl+&wAGDS8G{
zmZ5p~pKP(|wDmso6$6CE`b76BH<M-ma5K4tB)9w{Ro^d$P_GBfb1f2yJ$RBwZXAuM
z;lBWdTmciVZBHa7g9_<3gSPBqH$0T852#G~@*zO|-(askl(K2?BfYQeUH|K}^bwF{
zW6>&*<_HRZtaq3{9u67v7@*SMm5gOB{!bro{@6+!_a89PHW1nVbC7xGMUpof>%=wO
z`k%PWaC2UC0f3^(AVJHUQf+G)MSF|@-I@dtUr=4g49hE;P;FRazr6`i>uNX)y~q<`
z9Z$n+K%e+2^l}W`tKqHdi|u}To5t`UEy>9n*O@}34ES(5=;W=7ZF>`)PBm+ScxJIc
zF0H92_BUqGPYmimUIArNPAxsi-c)|0^|gR3oe8Ae8^+Z3e&WJ8i>B1(!W1ZU8RY4#
zr?VFZukUgOGGw|y{^AJv9GP8!jNS<c$rr0FF4#$3<wER_T@-&a==?_rZj9bW(i^Vc
zb*h5w37?@GFTsmvT;aviHQ>b6VmJ+`qtB;5W~h^G+?Uh=vgjj$Z0V*iw%y;~&=@yl
zA>w~Pau!_D3x^g|8xmJ3&K<}KcLiB}BWp*J2S8;z0J3^K88=PnGV7Pv$EzdhWlwz|
z*lTV9-mHpp9QH&x%IxqF)Wb^;w>zjM&F}(oNrsZU5qR#`TQ`#NH|EoA9;@3VaIRC4
zxDx4xNcJpjq-NgWT!)Xgvu{SwUT?j>?Ot3*j~JQCLsdq)*oNhguEvXYVgnti>)pKO
z`74peH)+~k&jEC^p0_g%@o`YN)wI+H_*7fCagPQcR?k7k&(qnVDCgvQ-aa**w?!In
zsz$mHv^x*)7Oju$mv2QJrq%b()rVkOJ@1-WuzOtJJFKR4Y`8In((8Mt(_CK%=||8(
zU*vZ+pv4RwLNEI1J!elkjTZR<S_<f98S0voGiF?M*IK))ODNqRc|8ULn&<DGS2eG8
z2Yb<231i{-065@QUO3*tJ2n!z_$<IdF3PB5KRC=Q5P@-ig3`FtX%>D~rMKrHn~nq`
z4<XU-Bgz|iXVB0f2gJ#=G6=|9rSPnbe9o+Ii1nyvsCO;8%{Oz?&B#FNo5+9+{?F`y
z_7rst2JOrZ+{_wy53i%_=B*~;{IOt%cdIpbGE7xitG~<^&;<r9c?BS^5bx?6H(rDs
z0?@h{0G$u<uAb>Tn${4YTbq!zEe)US0ou?Dq0mRi3g7(}>Y$MI=ur*$B<30TB;s}9
zlO4}EfcK-np8@gA_Ja8JMh=k9c$qY6`%EJchrDlhAq{v|5BJCV;L>No>DK|AaLaIN
z*;wDJ*dwkVWj6-WYnsUCzfT~Q;4wWILXU@d*QeAbNWSSS7--0K{Y?`+vpv{8AeW|K
zE`LKXFK_Aq=}tc}GHsU<Y`82o(<AMzE|}Ig<8rMLoE^gSNPFs^(Znzy*Le%C)@lA^
z0L!y0&B)BvmDZd?xQ>AT+}!(#ySW3jzX$vu*#b!0Qe*)avk@-55wDs6w)=Ms-DrV)
zgzQGyFm@SP-K~W`-qF3MB~Z5C?I>rwHPJC=B)u69wBZKO@*t+cb5CY=CZ#_I(2|<~
zA)nO;{Q+tf0Z`Lh0HsAdIY5hu^$w<hR^Axxdcrd(z!M{?^>|(dPPyI+PAQF1KqF~%
zYX<{@skF5l;JaJ1xBs>mmpZi3UqS5d>q=|fP&sqj0AB6}b}p~;2wSvc@8E&3NR(;j
z7LeN_y=_%{LVwJ@No}DoV!gHPlNSwn(A|yBGN=@vn^3|$FRI!c)}r-Mz}addYg^iR
zXOXp?KFs`aQ#P#~K>hy$89<Y&=Exq}$rR^|lG>tC(vm;;)}ozvwoAr{?CNgp?KTIf
zWqS}eJ%(76Q$NsN9|-p5;7c_voCO}x%g7a$C-Z5bvkbCc6QIHl4(Jxr@!H7j;SR{`
z{beHcq!@iqRFCAF=67QdK_wNSo!8Mj+YM{kg|@uv2JrUW{z&N^k&$`-y(e3Y*E>wU
z2}_Cf_O4mFITFul<03hL_QoQvsti=^Y(-SF6Blb9@`>;nNOL=Ro2w%2(UwFleNe1h
zolvZ%{~~b#_aK|~$+F4a$4L6f3iiYe8g>tgk8|L;XmU?LK3BvciPVP(2ssIl&Um#&
zb$LHQgFAzL%X@+yNDtSnyu8uX4R-BeQBJM8z$k4ojMBPPA8%&frGU+eh7hliw7K#`
zxA<>`VK=U-H3I2Cytg0csxCzw@RjQ1WDgrOAv<?`RCpW8jraB^u7(=R4aerk1(G#h
zDRA}JfJXLtbfn?KiDGBwJdvI(SK>RO^W*B9el<}_SMPwf)uDJM-q8d<>(XdiS8v{e
zvXaGq+6iH<jb$5oMZn7|Egzt=Eq~tS6xkr0ZZlG@lR)gSZk7Op*J1Dyv>`CST9aXs
z`@fro!LO7K2wpBcg@xP?<CZ1C+X*iqInJl7ib$Xv{Oe_M=?2EF*i7mN3T7;XobmGE
zOM>wr#Q}mW3}*2=O=N>~tXbg2>#D)>XgvCkf1GEz0}aH3wny0DGF@c@f2@`8k_e~3
zuW6{?>!k@0vcYrIFcC<+a)3pD8USG)iEXBy8iRie78MDD@iZS0(*sx9l;|CRjfV%>
zFo=xZA)Z&nBr@}_ZPXsZTv3OgMy^c$^PD$+5p1qBmXUQ85k%|RfH#@THEhrCw|!iG
z7Y6cFcc5`NJz~2_<lh4Z{NaY}s2DCFyE41D_s7B@ih2QJlrMO`HQ7gtXcikKVt43d
zdrgkR={@hbT^2_2wLB1;_|LMfeEr2Z`gw0B^W(bM4G9=0RRFooy=I=U3Z+)h1O5vo
zv$#&#5at%}{0h54Gsq3E?OrSl%4rQFD-^-AA?RK>mDRrZ92Fa3p-Qe>w!lBmzJa79
z7+I+$3%Lux_#_EdZUI=A1c3842lA_^wraImX&S@(&`K7Wv=hQS9|lNCHoy}ChVnU+
zv8_!>W*yyT<UVpw=IRchuw?H5b7UaCcdN0en`YY~ECy3Lp!`M`7Vpqf2>JCg4KxOq
zJC|`5<&vDC<IMeUAl(Ev-umK_0*jVG{6I-%rt-5MScFA4iUS01ei_VS22Esx4`J|(
z#=tr80}cW?iKDcYkyq#l3m>}527S?b-mr%Y7yE;qS4)M#FVwInkaegdi^enn!kiFN
z+2YusV%o%G!XSg@GjgmVis>L5EQEn*acpq1`3HDXM9I<yyf_2~^5Dw17S|~Dom*1j
zT)&2RfdGDU{f&pb?0tDDgj|pu8p~Go=uH+Ww4Q~$IHUx%ytWjG*PK6MMHu<_LN*HF
zmJ~ao2V`o@*kVEWIrRd>_ydiBuup&1r`5d>M{^P21CcS#M=RjlzxLk?!mI*cT7ub3
zI?KYD{(&&_<4D0kut7HDf?acd5C&tZRc}t938nN#CD*2G{4~d5Lo~1E3X9BX*YUyy
zrKs_=1`zZfdss;H6C3<5%wTw5V^F*9Om>m%o9-m_0df+xWD!c;A<XX2Dc_#v6vhC!
z>AeHb3IlJN#>g;Q$)YFiWP?Gtc%UDVv&06q*Iht2sHBihw;8#R-1|bTrDhQ3eK2UQ
zF=(~Xcvu)*!>eh4l-CnEOH0|H3!I7WLr%dBvc=L!)VPvDKRV6^6BV(7Twd@FY=0I8
zqVcf7zg=FqA`JKgPL6c)^s*8cHkt?~b3_|_eHEr5Ua4u^{<fXWjulLp)CR;jkB+c7
zL08#;-+Nw9Ht<Trj#0x4Melzu4AxM?en8683xRlr!T|g-pM*hMyTKqx<Iae|!k`Y#
zXM<Ms5sO$l$OgV>7{cEzfYkcC5gA_uvL%^WSg1XOxeE-?)MfW^N}X#Kd@E8Iir2~j
z=|pd`*hlNxfL|j0qP?B7LFvBYzlA|6U0~#3dc-1w{QJXzU-}_}tTT`UjhDX>2J*6e
zpi!Rq4K&uIS!~e4if_yS!9j*U4c?|ZcOVShs$0{3K+rLqWw8cdgMz`&R)e7>l)|9y
zpZ;o%$~W-xL*xTs@G`yz1%pL#_+Yr*055?tB)?YX4I!tniPkW(oc6G|N<Xnd9b{Z<
zaB6EfhmLH}h@^o)#!^cbeW^Qyc{B_p060sW!lK2Kx&CB>$uy0T<+PH;Ioinv{HUqu
z+}L3F)$e-=gF1AZk)in16ht&WH3fs7U8-j3*5Jk{m6Adhr8Dvsn!;iUEoFnvFwmxO
ztwleaQ4IKlRY1y9$9xsU9G@?R48kBwp7d3sz8%F40!W^1<;~a=`Io#<py>HUJ~)$0
zM_9<KkiLrtqw8CdXfSY6><n(3br;5kK*-L(rC~78LDZ3jy#9?lGFahygk^4pS>6)B
z<0-6tu>%f5n1AYty((Z~R<8YbbHAA-SmkA}Kzx?LJOl<)sXc_5Kf_Q`Loq-<W~j4f
zQK&G8qOpv`u0$5{3YRaH4UrvyMN(gE%*ThdhExV7TgLTjsa~;^shR-O<1`8wipx%L
zdSw8N=EQpaihVUonV+LxLy<}s8pC21KD340eE@^|0B~_~Aa4eKq6YChw4agkTy3Dy
z#>=r7IoXgqc~^nrBzabVi-1Y}KqDrZAk3EG1_NK{nH5{S4YghGErBp_aSCY-BiGX&
z2-CgcblQfDcNw8<{b7qJpeknYvyF-ccA<}j#Z_tvVS0o)_OY<AjKssDKnROH38!8X
z7O&DYw%A51S^P#jA<W(gaM8ze5^jmB=L&<4beoa0$UPrIp8fVEYd)eVKCg0+*a>it
ztcZic3IWgX-86-TiIzf`N7xJo7a%xaT_L|%yjyL9d32nSc#M+87IG<o0l%nerQLwz
zb^5Qf|ACO5B`+WI9nN4Nk<dgoC@(Y^I086@rjQx+*S#kUme5v4;*k>;Tj?qr)R<5i
zLG6LmMvx0NECkY%I<hFB0W2odd=}Fb@fv-EU#2?~=ujaXA2JcWsXcjSTe3Pm1VYa5
zBr=YJ;rnD};Y{rz<X5)s0D$w$219!7L2WArpVC-H4yQL+#L#+(_Wa7WDToze@V(nn
z_N1J`{St#=j7Im<<X(Isk#)F}V3wQYKOV_0nQ1UQ3yomu-g$5bfh?n5fM8rdhQ)T8
z#RmM6OK}-SD>p3savWdcWomgdN}#bVon?bY^bZSpS<V+*`3werNff*+Cl(L6am5cW
zK*+W^lso}wc_AK);u8>6{5_h+b~wl6*ej>63;1WWhT#`!4+|Ii3BnvgaJ0P_VT0Ee
z{4z}#$Q!%@jc2K45e%}ZJA`@qa(s;%2rhjs$jwK`Ulj&F&@@J3>Ys(YZ01YHijXgU
z_ucb$t7OQYemhj3$!k-Ac(0@TL|Dl?DbPkH!Ybo1nzh}EJzjRqb2Efoc+BuHydh0t
zf$3@pGhggK))+K8xZqP^(3p-h@-w=@qBg#k1%uAV4F>K=xHOzX+z;)37X}+C4iKXa
z>&r@)6haf(ChbJ!7$XMYxrGD15H^!(D<GzSPSDXw7|Hggt86j(2UMgzw%FpYSugqu
zi?P)3Wu%fs9a$`=0TAZKKULliv%#F)x-*19BF$&yHu{J~5FKP8Pf04dly?jv3uh>J
zGP3Y4wVw>eroR{r=-A|f20~81+WjBF_!f-?1eb|7Su~;bEadeYUs^o{-SSwvz^4|`
zBM8&C7ijNP?*P-q3v?P%esc~}vJY`dG5r45qE>?1o5oCm$uqywsvyk#Gn={Fua!L?
z<7X_(WFgn;Xxa}v`iZkF=FvZFy$06BD9R_)YAVitMae9Fq-+RN#jlGz>IRtkD}@qB
zL?*N&6nEOg1`X*a2y@$u`~-L<@~t+}9079j-AF11=dg>EI|T_AP<Q@0{UScHhM!Al
z8h+vmvy#Og+R4hJ7b%6k@ykUT#q!og&X3V)QM0;%X2WG{r?;oY_p3K+mVYhMDp-_W
z7p-d16#Osmuku~ArLL3t@RiEZMKG{6Tm1TxFyVb}Xd#Pk;Q#65VnhntD-ewK(5eGm
z;CH_<S}x5oiUY)Gq`@r0Xd)Z%w_rtWu|Z+;-X+2ykG3-MbvnXA-aHv-d=(oa@QbJ+
zi%q<vGSIj~If*x0@~>Z&Uw4&X?sO0*&7Yy;Spa6P5nSsGFMoUMUy)D>8Ee2!UO3^&
zkoNpbUfNS)_!^DH8Pi{X(A*kuro2}T1DG1{Pui`@&eROP-Ue^0pe>;bOe8O#1)A>u
zNdZo<_MkCNINO+dv4~ZE^`lw%W#*T?hz^6B$F@F`4hXiNXg?d&ptCISdWo74O%xHK
zi1w7sCo?FU#Z=|w>x!61d-&vg$}hZ-hJWo)M42KEDned3R0@s(MlHbP9j<}KHnbDJ
z%>3G(ddL-fayaCxy8~v4Om^YrQyz-A*M`F${!3xC>js!+KO_%GOt7T$=^gf;hiNH(
zn$sTH=YTMCG=Pch|M+?%&&vnm@xmKG;$cG<XBY%OHW;3Rk~6@gr{G@}^kf@0kXIde
zXfqf{%+E|@gBJhcgP2gV!K;uT{@%76LN=I7TN(Kw9bvJNuCf6?O_``M*t6}2g~H$!
zYUl#w`_z$zL<1m9ue#Ad7qt7oC8GhIzM9gojH?%f;Q{)HEgH~42=njtjD|k;E8R3e
zjvacpsX*dUQdc1J$jss$YR^Jm!BMIumB#X!vGgW{>0U$H;EJ^Q`PM;p#wnmW`Cf}>
z1S7gkws?yku{club&!QkO^j;WV}r?^X7m>Z`0z0x=;Ovfn8r6#lHg}#;Y+!AY!UjR
z&pKhTfcCS+S~|-DLk9?R44x6_tTDLN^Icrw70)9+-GF?Hl3CQCYzVU%&m^)RxT-jX
zdnw*ig@L?w7O1Q~DUSG>eqtlvwnhU+m$K0-kU81+ZwRAVB)Nk=n_9A%NZldK{5m_c
z-C!`J`@`?J^D%G)eM-|9xt~_D@TZ+@!0!MQgF!Z^o1GXZ48Eb;j5L$G2Slta%>0f+
z+!4uHVuRWj|Kyy>DQu;5M!L`x7BA6K2=m8yJ)qVgB`xv;VNjEfGx9~c!D2nRc*4LK
zV^r_UIfZB32QL!_;S>i5s0Xu{M-$nAU&q)S2zD(SeE)o^+HSAVRz}vPBP?R+DjV=i
z6txEFf4it|T}%zVfZR_VSzM<9EaZI&O3gH)`FsY8Ni0^=K?u_=dFx7jc}lwp86{}s
zl`+KxagH<Qi0(!MMh6C-NS4Eq3x$7NH}ZCpK)i_HO}1@I>siPPRtg0k62%$QUp?rt
zg#KnN`Fn%0=mn!;J*>IlGa*O#&q{)j85>eBK=2y8F)X&xEH*eiz-V|C=?R0ZD<6#$
z20dv%BVVGkES%^cHt0IYXm|$*VKD8$F}`z=Q@BK}>H^uAl3CQEYzX<0=sNbc%2^7i
z(f@a0P?y#)vODczah`r+gFbj9nyXi&P%z<Am@v3Ok`IvbVjr&-b;mFB`y(ntoU3_S
z)9-f*gD+?r8~j2mS-e0y*&r9sPK$1x4Te1q-6jlPrQ3|eWMVytebfx1n4hNh0)n%|
z2F)hE!h=J((EQ9856AV3aa@V~@SGzZKk({E$B)Blbj6&O{EtbkRh4#)k{N!`OJi6?
z#}9Qm()E6du76gsV?%z*(Gh9pQ*`{Cts^^rIJ$~Xl%Gq_&s|ng+RMl~ey-Dzj-OL>
zq?`E^9X|_N?8tE8Qw$v~(sD*uKgI6-Cv{5s*>K~NO1b>3+x8S)=~Hz4ETW?V9*}iN
zzq;r?^b|vW%GePpKl$fK$B&agq2uy%34g6lY1i?y>Q5NjbvK`)<A>)R**&o9imNL<
zKUMF@kRMuhq~m9M9qIVF%4&4Qa_RZe&T5phF8C?BMo-c4v)hh1`4MzQS6p?Q0apb-
z=<3LjpKNxd<ELjG=@K33xc*ZdDWU886y3n5=+d5|8==t^%d?6|i2rfvb2Li5F8?Vy
zew5x(>G{ER2fAVdKcDYFX_);K9Y103$c~?{b)@6h7gXw${zol6KQ>;eluxTVey@Qe
z-KMALwm(I;tN1BOex%z`g8VqRBOSkw!jX>OHKEn<4V7BLQ#z$q$B!dx4YfLc9=bZ6
zNd{5yS9Jqu%YnN12KHBV>zbTR@)?s`4YELE;ZGOyUIVKk!rEHULbA1wMrYOO{A$o#
z*qcI}XambeH5J~bwG`Q+Ha+I^5w-CZ&bk4?(ax3W=(%EC6dS9nBIE1GvSz9q7Eox4
z<l`GW6MxFt^op>)q00*17=I2)nDC<b2auM9I_%BOpLnwg7io!3_aY5@OX%<wl$3Jt
z;5}NSAT9mfmHjYQq&X3w2YtJp7uYQ5O7X*Luv&RK=z3r?rkB&e`vdnf58fYmnt4un
z;5lW+dMVfq=~<HmHp0_jNYLo^)&HBPa%3L7R51J?c<@re9*~v@j1)U-us7N*ep$sr
z$G1Kr4fn7H@~jbO3NIgA#XMGtb}`R9%&PJHn840+jV!@WIZd8N)9U({4}4wd@QOl@
zL*Uu%O<|B0Ue*2zO2HGhV~~8a!D*I5H$tQ9x8m>u!GjkWPGugv$Z#3+%&f2MBVa$X
zFPr|g9<vile%3)3fWL+A%lZF36gs@{@Fw%%g@>-+fTu#obFrH*ChHU)XUxt<YIqu6
z-nmKW@Dju!u;-T`PGX)yV7WnwG)-SLJ}R&!(DjgYN?qD!H2F~I@b#gi%!98FU1Od*
zz#{5po_3I3_Q&N4>_g}fimGnii92e2;VVV4hrxrd6b*#5MEF(4A@h9e(FfjD%4LO)
ze{fz|_v3dTzbSP1n$afa!PkrqG0$`zcHM(6%LH}_bRqa5cxJ>#Ulclg^(goVc<|LD
z3#6q2SZ+`PJM72T<e43=_lMB=Xmoovu2tI}zK%4PdGK|l4a}3|Us<=D_bFXI=eaNg
z^JGES08*qexPQqCp~F{{9y1TVq7-lxJf*;D)2zL}Nq2!g3?1KK37+4lH&gS9uPx=m
zp1-y<n|U7Uux;v}?I3t~lNHn|&&;ddYJK6WOy`&fUuAm0JW0T6^E0qzH?`i=phJ^a
zd3FT8R7a$NuQ#Q93!ZraGz!wP5m?dZwE^f{(nFiTZqeyZ^mqg79*XbqW%<3#gD=aU
zW}e+T?76otrV4DCPS<L5Tb|c2a2?<a^x?<AgD=qcfVA+3=WF9U`;zl>CF9sX5Hj`@
zbRyPSx2xvQvxV+_0Igyke0+Wv^W4|*bbqk#uE6qSnhSo2G~TLNq1KmMAbA`I4?d+7
z25IpDR_mWvEyFqr9zUIKa#AXHYjR%kG5e{^gOAxSW1dDjZ1<?2^8~i3PWNh7_)9|9
z0z254CnAtszXK2NlGElVW6HB}0voN<efgN*XQ<>S1|PSFy(vDBCNWPcu%iEAU(Wr^
zS6+SLI_a;|)tj*?U+|;_(oyEg3Z!eyV*^%OUpoq>e=o2_(7ED=@N?J-qZ)_P11a`<
z@XQIMfsmFZz-sGc?uOX|1<zXOFiKE)_MLnyQs~wN(kABF7)XbhrxaMxF1TDJQC&_8
z>|tFRrw^W%FY{dHQ-KtG0zBsf$pUGq09G4o#UoyRP4L{(>4rJ2`dH}h2jYX#;4uWz
z2Ig@Ns>&lZ7^GfXyrA=dq43Yatqax*o{%7V%sdf66z~IhtiWo^wdjMxTLrc+bfT~1
zGJiF**&jle6-2qPHx&iZZ04B*thQX&UU6G3unTp%I-@Q%7P=KdbdGt}2GIlN*{H+*
zaH;l4f!(gtjr{!iXrbF3L@6i1b07#G$Og}09X9pQ<_dv5q0>!WR<ca!&IHk3=D8F^
zr<vzEu%ZuUAMA=K=_jxcq0{<j@9-bl?Q%QyGE?}E;0ZEQ4@gT>bJduAEj_%Cz((qH
z8=5xbrYQ4R&9sVnQq8oBc`||3=4bL--3JM5A#~dObo=D3r9w9ouOvJLo)u;agS4y#
zR@+W}XWmk;UmJBitM1)JNtOJRnQ1EXRG4WQWW$HRim?Fur*Ha;O9a*#3x;0Q@vFMN
z{C)JBY=-Y<yZ(ga@p3BFEF1U-XAkfdQ!a9hPM6a}*{{yc2_IJ*0(<_r+9c*F0v01D
zIn+Bsw(LahA!MF;8Xng5Y~Z&~=<xlsqs)WvpIu{~GGNiOt32aeKg|@_GtgmVpz5B_
zeTTCwr*R+OQ#%bFd{lEFq{Z5{(ydXzRBT_x*_N@1I^FN-N7P|TYFpaGJpJ3!A?C@_
zVPDI?c2Drwbh@y!|8YdfX%w}k;Ge-$(v~ccmYF*2Pr+rc3hX?cZu$I=8w%Z$wltS{
zR=1@M%(G61tu=e7+MKrNbW6`BlnS2Gw)B{J%G%<4@Zc%eVF&L1J4|4Y>2!zv#`Cm-
zT&`1XDHryp^KEH1^Hk`tle&I4Nnmg3bgq_SwcEPimd-JcA&MR_k25MsGy|@$kjwL*
z6+B)#-5AgL=|blhMJZ>&gU`E-g0wW%VLQxgs1BPVbh@%8GkOW0=qTFDJk}^W%{+-Z
ztnGUF4uMV8>C)C8=q_~qqbU3wc+#S%2c#uShfR+dre20^I^DJVqn8Vwq9|I$JS9=I
zi+N`1unmXJ`%_@&>2$SMgncb^OQOi*7x1i(qA*CyIvqBo@bX!K-J;X|m$&|c(3M8f
zROTs*qGileuEXA~d;W;P9@FWTzvVMm=uSn^P3ActMXu+;Q=!AoJ~L*hz~0j7f{Jby
z3f=uE8UlM0UfMi~d7Lr!*T&D*Uz*hwSTCLKO5*b?gbtsZILbVDZT~gqX{y7z{Lw<a
zltk!s+gB`5hj`KLDE0z)tnFwZq$LqpaT!K23}fCs@U2KQRi|6v>4BD_w6Ff{XcO}c
zYDb5dCkt57tuxPqUirrbwos>gcjb;5LRZp`f`0|ioOWb^v@FqKw|ZVn5ZJXk-HLJY
z-Mif0x3r_V%(J^4ZD5{q9X2y~{40XzgiaUJcTpFiJKv5TGtc#Q6mStd_jTC2t6!QU
zuud3EXv_RzrzRyr=M_!4ur~!o(`@Exs>8l^@)vfyocBnbZjkLBk7Q+?HJZ*bPf|2J
zV4nUuZ1$aNe5sPLnL3?ojgad?XN#tkOW>ItO`{+!Gj&+^8hdgCcA-vpT6&MK)N&fD
zqiHYmY>cMU%u}kvF8FBDVu3xN(|t6l_?*xki>C0);5idbJs>R=I_$>eQQryd9i8q-
z={2>_HMFNy%;VOcb}>&QT+Ow0a?JnRc)=5)(`7v~P`%d1v?q@%;7M#xVUU)-I&AIu
z?^_EV-mIxjBe-4VH8Q6?O=X^<_Oy(7rt7fjQC(D?c{<&Jy)({;G*+~yo6NJWJ-Pk{
zp6xnp-V4jW7uYhL?*1De_ZGUt?P&<?O{dz^B<8uL!@jcDS&g-aI^D!C*I5LQvxSZ_
zkB^0}F;56IS|3!Le66X#w$SO`9zRAMCq!E)wgNoy78(d?N!4L<J8T*!cn0ZoBRd7F
zL)R<|ZDO863msyf5*>C-%bo>-XAX4SIr8ld_f5W^eKSqyR#+(bDtI<p$O38Et;4!M
zJN}@+9@eGt#joE-2;C_Q&1IfT7TUl(w{+MOzs*=Lun%>**gvnw3Y~KYddxgN9Vp-$
zctSc<_49SNg#QG#g-%!dx%saNU33S^g}o`h1I=chRAB2vDSWW}YScA>&C>BKd+x^H
zLRZ*<&N0uN4)lO|mgukxR)2U)VAtx>=rd>E`$D(71Eu^9p2Hnz6r|;pj^{)7ko^LC
zNvFHN>6OPqcdG;KWuAu}=rr><$5i!C>c#_)1l9*SZJC2VZc{FFEn+DAI(VXEs0XAa
zQO7fB<_9+fwy#e2y!E$Vg)S|IRxwXb4DDi`A|3YGTSu1)>~x*(^u@y^LN_mlJZ^wz
zMGS>OS~lvi|GXEcjuT3Cx}J63!X1awKOcypsmyaMhL$nU86EbI8<9-}wnC>%K6h2^
zQ|`pjP3CdyNUk@*6NIabw!Lp|)bC@#(^RLs_hBow%#j^w2<%PPjx>pRl7Q9L$=u}!
z1_+)^=(O$htFn^WLRZ+4jxtY4N4my5b98Ce>)m;Yz%J3HQD^`DmxXRiM~eLeJY^ke
zAf)B6j_1?c(rJM`rPE!IF6<P#OC4zw^W5o3hnUBJ7OeG8(=Aip6IeH$?xSJd)bWpB
zECt^JPt#bkKw2Vo*hfKg{uMk{o$leErRumYDVF9kPybljz&x2c?4_iKqXdsluj{{0
z)lH72$ILS`mI7{rXQ2*z``an%z2j<~ZeqwQJw+NDV<{K*rqWoN%{&Km*sM2psn_sh
zI^C_GoI44gGqH4zc`9P*0rT9^VV$=IrU<M7R@ypvrl>?+u5jx_DSv{;uM>@ev^3IT
zPp=&RuHcD)P8)~Y!oS}rbk<I^mw6I9(P`%CtK)g&r>yG&o2Jv<FIcR`Xig^z{|h`t
zou~(-Wx5U<zTEnY;F+h>wK(W~Rp?f9qE*bZt`qHIp6xnperWq10$ZljeK5D2XY%B6
z-QiB;aR)r7I#C#;<&qAYbi=2mz~0j7V&2|aDs&G!(NyMfw$d`@@v&BI?^B!nb4OrX
z=yd;1d3aLj;;nR(c~Y(9`Zst60jsUsv3XG|1vX2^6Ek;`uh5lPX$b61bF4Ipd6wvS
zx{d6p_G4>xx?A~Hb&S5nN=KPzx0S9jPq_}e^Swg#9`uAxm$T${Hv|6hVDr3{V()_I
zx|Ie(S{~}K3x8Rq-k&<hRV}moiOa7D9-lbc#5^H!bclIc=&&IH7dbgO&1ju2s(-%P
zKgY*W@ICOP#*qcmGDwG=JN{U6!IP!ajh!7-A#{atG?#fw;%Ec&%+X<2Ua6caU!v2U
zyZH!bl)SEuqsPp%C5{5_gJ-u6`~1ipb<9<+)4f;zQ>frM5l6YOH=U28+01iYhczth
zw^v~A>vZpY(etLzId!IU%;VLW9xzW(=c;w`*H`8r1h%P8_rdfQp9x)LXG-}8Jl4)M
z3eu9K!+zJUqNc$1*Xh3Ba#g(-%Ir*gna9?dPBYJB9ro&H#leDSrcU?Xu?rK0ZeeE%
ze*m7<ov8<;Wup!o_~jV2pD)$vO0N0gj!dbq1D$CV^Bn6;yO`&U4*OmA-j(AGoo-oP
zheW}1r!#r{3m!uk3WKz`;f`F}-iN<_{Di>z>2y<v{j6TI8+D<n%oEXtmN8F^4tudy
zyCT7psMFm!YgjIHeY?<2=1J>9t`EVJqr-mpS%-xJTcpzsPkDWX&`s|`Ltt;3*M%lA
z&k7yZ;FU2*VAtt%K^5-=3ElQCbd-6@y3jS|ISj1!8r490p|QZ8(eeB_VW8SSS9GD+
zN8q{Bg$6=e47fYi`gzNOV0CQbrqj6u%8x3@eTrW^ZDO8A@pOoJB6QgL+u9V0{KV*V
zBj!h_@thb>!H>byH=ZnzmNXsquuq^mcFWP}{<mnS8e2v2G?#g%$I}MpnWw|H95(}_
zIVEOS=ycyCCX>*ui>JrTvpt>y{sT{$4*O%h`6|z0oo@NnOZ^1Tsd&nTy{RIeW;4%y
zV72vi`|$JXSjo9-)jF6vIc~Gy@##wEm?xwwJz$;|I_$)K*H#H^v`$w(?7J6*F23uc
z6d!c+T^Eh=K`*aEF0Xw_o$bwnPMg!Cqk`2de^J*(d%-fZ>!Q<OS)xPwrPZk^(p#s~
z{dv7*J)tY@O5sM(ly{{bkd_m`)`wEun@-*Twc7t)*XjOU{`q0S<CH+Fm?tQKb}>(6
zLRCM$J)=OK2eImOruBQD7d%M`<Y59&{{#wyv}Eb9=O_EUC9s7$-Lf-(y(V-e2{e^?
z<|NQE=2@b{X78(YMPS$JbhCXHuNJy333QWrb|;W)4e*o$tMyOgIXl}3?0KE;e<ycK
zLU%ubhQQwB)Qu)Fj~7NE+M0dg-1R(x4bkbYU+M6t&_#5kqs$Z2jjl0IBCy&%quUxu
zy)Wvk({;Hv5jCi^yR>c;>ja*hZZr_mQUojyuy{9KQwG`pnIKD`<BNk8+sN7r1tjar
zyU`}*In|90G0!bvy=82Kj1BLUua2RdK;oy-&Aq;S6Mo7(L5UPx6FiZLWP!9K0$a>j
zY;)r@58XSW)>k=nwOEHaB-VXD`sh0lGS9I@n#(*V5@`eToC4NKi@mpImcX8Y&RL_|
zuqgf|p}UkwkC~?;kpgOQxq#)*OS2E21!!BX<LE_{G#?^*MVVPQX8FJcLg$r4xv)1y
zCedu>>7P^?Ka8EY`<U8?*`VVKJ(r7h#q(U)1#-DclIR@sEKH&Y%(DVmk$1*kdHcpg
zfnBTP*&4L+u+VK!qLkX;Ih91CAT5Su<pEle=A7aa>U>ip=$gV%=srrCqt-!EGVNua
ztYkXPJad2*b<6o#kp4qok>*<HcpxNr7L0p;w$L3&rf_HQoJpo0kQRP_ke26c=Wc%p
ztRHkC8lEk0wa49$;-BagTE#r6DYT1uCIcI(!H&qg93!w>pc8GF^ZN73yZeQ%JcT@5
zz;ix@!XPd8fz{^y^OvTn_cMMytLEoH-P>v!(LHG@^9<@q%b2G~hqYbW%vq4@b_sM^
zKbtqkX9`_ePrAuGmwJ+`EBhx^d0<@l^74u#bxt%$r|Z3B!@GhfI+cdN-jtL|lbFW_
zY$GTI&++y%RO~|N!Zf-j|4X_fc*;`gDDzxOrEARN)JvD29|kQJ*bwMKH9WZ)!v_gn
zd@qWv1D>2-G!W9V7T6FC)^ha0Sb;qNoz_2FU%vRh&|U9Eo0!L|HyvW0NL*#KJoIy)
z!2;VCI&EDxYu2=@&=vNkU^nnA=}i_$ODVA0ynpy_(ItUB16=@q2;YTvdZ|F@?)Rp-
z%oEawHZYI1PgOtP&G>x0z-B@hqTz9y)2z18P47dGnP*-f3UCL{5@5CM{gG|OHGy3X
zosW_8qx4N)rM<Tb-S)ne3wzUnzBHS8PM}q~Y0_N1FXp1ap4aJeKN!+a=&tvrbIkLw
zFFjx$zZa_ZRUJBKTo>4;(D`W6h!4G-E_5+3P>Kh5`o2J;AT5J{_10i}e0g`Dz}j>=
z=@U10fx(1h`U|v|c~-nYr<rFXu-ZD=KRQpvmP4oQQ>LD@%@I83U!ZVLF4qgx1JdGy
z5^LMVzBR3L1vVNwZTz&Z^+hY8>)(%7F;8JX+QmGRfz{Sm-B0d*Ah6SQx`rQ2-X?T&
z`jLkhcvknLFi6W5V4a}kSR2V8`#<(Usmf!u4i^eY)|K_6smybrA1z~^!@zQIG0#*;
z->>d2gpjc(pcB_q)~(*%BTne>HQSragYRXzN<IM%ZvmSKr4`rZ29Rg(znUPhk;q^l
zO2@8w)69`H1aQ-qESkg;A5%Tb^1vuXmXB5>K5EM6gO807-H6qi558gFxg8L+x5iMc
zH*EcK6=}_-f%qNFkB<id#9`0Xo0k1GDq1O0NiJ<-o7Hw%mP?25+j3rG!*%!f=E~uc
zA+M4zTc-=jqu{#Wv*wWn(!y^c5NWs8VM{c+w$N$IvO16EGEZ3^ZD5}B8Z7rkRq|sK
z-JaFTx+YR`4uo7~O>Oj;c@k|D-~%3;2FpXDKre}FN5(FJE)svUpICS1$IrhLx??uV
zg}v#Pjb<~C*VxK(arrjJKfEWf5gJ`QPHOX-I+o5cPtjO<z&uMd*gze&RHNf2+oAmh
zbvSTpETz-~&;7A93ev|9!65w1<F??pn~d6FYwQzQ!(y$cw6GFGL>Y&S_=iE7ArU$*
zGV3M^1tjYx;~VSEkVHv6dH`uDsgD<9p!X~>Sj)!gurqP3k3iF4U4c$lYNnJh@ZfvF
zqaZD1X1wsw&NE(z^+IJh;T-43L#Oj;Kzo@dq5+*|o=6RLkPaKI(Y4U&Vj56*bMT}#
zpdOHxzQA%1#A$ZcVFzh=tU6s<16svAg$-yI^Gw!YqjcC3=tAIB_D@rtZgnVmv;fcA
zPzr;ztP8EoPfZ<m3v|Li({;M-p){3wN<(Q#OUQD&JZzWeL+K{#gRr}Y^&yH3wj#64
zRbogho2TQfuw(DorJ<c->t&Zdc8Q%nGG~xohRCx10=FH`6nWj(508+yy6(#_k&=a*
zBD@sgqX<7m1Suj!5sehlR1qx{ffs==S)?MO6%nHdym*dv@rp=PM3N#>7137_{S|?4
z^RYpiA~F?`rHC9w*c4Hyh$2NyRz!&+rYmBmBIYP!o+1`1Vu>PFC}OoD)+%D1A~q^w
ziz2owqEr#P6;Y;$1BxhD#9>7oQ^W~HoKnOYMVwc}B}G&y;<_SkDdLVScKBTPy<US#
zuKPOSJK$UYfN#7M;iCvYMFc4#L=lY?(NqyF6cM3_NJT^|B1REbMZ_y2Q4vXsz*_)0
z*}jVCuZTg4z}J^qm#K&>MdT>LriemC6e$94hhl>gMNC)3OhwF5#5_eTRKyZRtWd;i
zMXXiCIz?<$#1=(tS462Ib}OPx5eF1eu86~mIHrgbia4c+Gm1E`h)c4lSz$tZr!_Zx
zeT!4keR!jJTD?Nb=4zTLcL&3u^Kziee~RuE=#&BIlbnm8Lys@gD^`=_1UEi~boWzq
zhoR$sy}Cl2u9{$#^lsHa`i@F(LXI5i@VQ8BDXNf$Jw<2LbBglgyrDP#bL8$}&^aoh
z4LW6f{iM5RLg%Ou%b+XP)J2t2Y=P1ZXR34QZs;7@eFvR#Bk*Kqu0Ls~lpp5}CR~&p
z83sK?7xfffDs;;5<;l$CKo`t&T-B9uHk5optxmTMI!D#?Idpu{ug>lebWgMjE<cz4
zTy;vj?oa3(RmEfI9L0qjMox}&0noXEzqq=bHiFVo2_vC<q7?RwCO$<s2)b&5v{){E
zVf9K`HwQXLnOOmyBhRdZ&Qav=f{sgX^(uZcFb+J0^w?8$XP|TBnOo3R8;D&0Zn!b5
z#x-*38$su&4qHR#sD$y*Ir2;@bfWoIc@Y5z;)tgh=0oQwGbPYD%FNr2Ig1Ueo?^J=
zDZ0;}qB{YdqnNo=#ZKveP;R9e-hGN;Ei`Q5wz;x}@*jIG=qb9^&^d~WgkmTi<+LAk
zj$E4c6uY9QB#3WmS4mGTKbK+!l#c3m!&7vnLT3mmdrRY)TA0>w$1NGZ;)1{F&40z8
zFdAIsb0$95MLy|Mh}RIyCu^v{7VYrxp?uCsJ=Y73RykKwJ@;w79kauIhM3xZ4FkTv
zo`Pd5j^A+b>l9OP@C&Ivz;O|WF9ts;IQY{ZU(>kFbsH2<N9n)7@ehtx=uyYw_!!4k
z9M7WZ4#x2|j&E?(M2N)Tu;EyMqZG#t9F5_GG#ra@9K`V$M?)8bVJMDiICkLp8HX3j
z5QT$3y0HewDOX&C{=pwD>fk9)9CL7dj^i#4ewX(!9IxT{0>>j9ZQb#1T^w)W_y$J}
zR9OccIXM1@<0l+`o_J&n$E!GY;<$;U3F2%hjwLvb;4n(C!!cGu5Uj%=Kj3(TqY;9x
zJB}h8{P+7fe#9XXe1N}2qUO5)w}g{{t3Gj5TKA{a+5{MK&9NKVg<ON|LTxR=g@(JR
z%@%cN(&oAW3IG38Cw;fMZg}xJr2l`ZmBYaOiGypVKAOA8FXxr>$F&>p;C$8Es1icO
za!9iiv|&9CL?X1=IUiz-djUs1#4m>n$7Krs7LHg0yrbCWxaQ1rOmhJ^);Z2Ou4~pt
zXmFr^2?7psZK*`Si_&qhbD(pOv)8mi&hcHH7ez>D<C{Z`y~u6$L9r(8w2hbJj~g&o
zds~z|7svm#5&VwA_+o~Ny`(LWD2N5RVK}%E7K>KM&6=C^>rjZ?Zp7cmaQuaXTc*BM
zibS*-6D1a{G!T~*F0pXEXtnmPrdW2rXu_OZZpPdYw1K4$AV=LGmyF}33lh!7!KKv3
zjy7i4{r@=itn3iEQ`2^eTpb)dq9t+w>8nKBIdWj8(FN{rMJPDRr)czZkw<O*cHp-*
zkJ^~lXOFY?8}M97oHfx`qg=&uEOY)jhB;rN4BUZp89079v^b_kcyVlV1#o<G1!&`&
zD}!U4Lr4^kTfDX(XXh)CgGR-DD2EJ(4_7~j7l#Xn5QmxQYdOxjmN-1LVI;ywgo_9(
z5h)xc+^V&a!7Y0%`uA^=@dsbVxMeet-7N^T1`HNe<cX=+t<_#tX~gEbVcuMzs@)E~
zySZ-TF!m&Sje8Ffw(NEGl=gCozWdzfx}j=lXg$X@#D!r;iMFS2Zye5CKz5k8Omq3T
z`u|_f6HbFui{(ovXIglcm2FXYzJv~dWQR?G)H_Oh-Djt9hPlt?#D(Ly0NQ?=owgoa
z>^LRPM=G6obwSQv<ber$QQHnV_Bhx%$l0q$K*+IIwIf$*TOFuWZHF8HT8E2Z&^w#^
z1QD6se7M<g<?wJy^vHiA`Tt|D+^5>HNqyW4>76U0P&iUJku$^{8at6Qw4gSPLNr!8
z0$~4X{l<P|zp?*>@7QmZeU>r;5bas(ATG4lIkCvIw)d|t602*z|F1wpMf8*hZ0Qc1
z^8bo5;h@S$<L3JRwRaxya+Fp7pGkofNJ5gEn@~a~4G1BH5<*Ru-a$G@$x;NQOA!HC
zR-{T*@QCuhup%mo1W*XRqOiW$D9T?{Kt)(oVgUp}1Vs4%zWeOC_qlU-Z-IdNewfc^
z^SiS%J3G(sobx;9IWs$%)s3LOI#*8w<-I6RQcMs|8k2<AxFp3y<-;5=J5OA&PvaP+
zj(Dd6Cg!gfx<JMZd#HK56Uuo{bJxe?AjJvBTL@qGK)fb+;dVjIaYL^lVBPZ;Zb@C{
zDTeeycoQRDpM?~IV9pwhDE^4>T&0AhkX1+sklV}?(Km=jX51rv?6HL9X8?;^I27O0
zC^c7UI)w@(*1f3Pys0-Y!phA<t!pyyxf90ok}^)WztG8xm{Q#Znz0hQ9u|ZhtcN=h
z3b88^yRj?AZiw*XRR}*`6?@jMBNv4U7L>&<g$uo^gLa>2=|prOT#J##6@4>7lb8{!
zU08-;W~jI$$pANGK(VkPYIq_{ca7KPEJIK;mK#NTsmB>9L6{Fxn1=CItimXtfsEJ^
z1UI3x`h*bRgiiYaRZE!Dr|iTH2OwrJ2bDi&a;L{s{SA6*$9%vOeD$C&Xfz)6IA&+$
zcMp6IdIG*PhZEQ{N-#y`11nT01iJegb!ICQ^c4B9jJtW{<8m#SE!C;rwhOiICbN5X
zYH0_x|6$C=^{{({tg}i-O^5T62ZG}&8R<cDDuP#booBv6v=PBXuOj?rir%;ocI_k5
z@ju+HnQM5i!Pgs++GC*N76c{%hhlggCl@O97^z(~uWvmif~hBYbAkx!t0yQB$ci5x
zW2Y`2GqH=_Ia8GPvBps`d(qojur=2=XA%}+lL=g$O`ORd^$wan%AoQd1rk(ZPN%Y&
z-u<=_a7Y%k$85SzdY&eie{-Y)C7-PGMM{6@h1r2Cw`LZ3iJ4^(Fzb^jvIs?$&!0P{
z5VBf4=Rv|?8kX@}ScdJ0=~A6FMhWH6O?v7rf5SG+VSzYXz$_UfE*Cx5doqAwXZM>U
z_kDZJ!c-jHo{`q>f!+QvOJ()8*o|3*tyaw9yOdD01+hl#*UlhJ!=C##xf(0v8F!|4
z9eHGs2Fj^?HPoyU>dmKNJ6{zc8++&>nM$V<V@+j1-F0>45-!q;QG6Aj&1)<qMm}42
z(YL6vrB3!Vm}cq_`)((btu=O|e(35ME8B-iE-_KP$tk20@DPfu-k>Y;tqAmhhd_e9
zg|wFH4EB_vXpH6E5ch+1cRwuIj6j!AClW7+CAOnT$0hOK{z@zE+{_JqI;Ur(c4HY9
zdt(}w`<u94kTsB1gd%oPxoqz&dyr#V*oIxCHI~$73_}|1A^;>YXV|Ay!EIHfcj|0c
zmN8v30oX{s%#&RES|`lczM)L$+F+{J+;-vOF~j@i>TUpf;kY<mR==dtEU)$A=6HFy
zj@L?cA)FSk8^XzlP<QiY27o_z@7T35jB}1bzd2GyaQDW_9ve&$h_PlyU>`hygZWAH
zG<7>zM!*Z=6Y-PyW=S#TC(z>sznqHYr5<S}(YiEywTLv6bg3Hfgt*O!7uW@h$E?sx
zvAyyzO39#U^!H}eukiIGPkDPeO`Zua5GB|Uim@Ms(^vH#b33TYcceP#3T&0?9c2ti
zg_X~WaXTswl$bK8@){Qrh9)RFNALTD6bhSnq2wH%Av)Volcznb5x71P`gkE8Oo-qh
zE!tH0kp}?Dqo<}RzK00J<LXIj>n7J}<;-s9FCMdE)6nlv@zq<F>!GZL@cv!Gk2Tlb
zsAW_|LFL|%_mYj0IsKz?%Brm=1pl_Iy(2e1^esq2y&T8MuDYFz)>C{5l3F2Vk7?9N
z+Rsx>r}8x7hr53$qdfX?GJ-8XLnZLYNyrra0gteJbfE$+S1z8e2Be$3+Lz+{N6;|W
zuU6Vc<mdFOE?;?vbVtpHcTTnz^eg_a7L~E=KQh|RT0CZ4N9ZHKu{mN>FtB%QRtv>G
zo286sWga&TrtnMfNC8zroK;}CS;lY1{K2D+X`b2d7F+ldzC&hdmLXa+v|1LtOZDLL
z-uqcAm+<G_1l9c$pU?Zd`z+_)*VG*O_OhRM#s2LnwQG+z5FOl4br{{f*#Gdok*jq|
z0qhWPJvD@35q7X%o+0vVD`pNZDW|)yQS`;8hbE`j{Se(koAPN1ZNv{L9(=w-E8c^U
zlohw(eFj@1%-vaW=C~gaQ;Gk1?;T&I5<us?xr)|#ML?C)YQ*v7o()%1GWOcqqspT#
zuCJMUpmWemjn-S##Vp1HayDJ_9vn3@zPYZq8mE4ZXStAC>Q?~mXN$+Ixo%!Lod#^u
ztO>Pfwg143o_Dyui(Yfh+lXHs;zW3~J5g`UUG$&GoGbgXV{ki(mcsEH5~-K<!vE&$
z*Vd=&1*3gs^IH?&o*!a4BGTs`^g4Q)mLrg)5(F}8H<$e0+K$BBm!$l(20Lu^?m{?A
z@5R1(g4SK^Xrj8-b;^QL0D5qXc0_^yDqMe3*XiA<T8HsFh5e28*Yug(G^k!ta1$s>
z-d5^z0aD7Tb#F<<c`08PA%c{vZ@1%Lif(W2mkChiW#H61CY`r8hgmQw0D4GfI8~h{
zvEqr1q}KYrjLD@bS*KIFLz25PIbKhhx-WIRC+j%Hcv>NqiIh^E6%TvQYeb7rAbYFb
z-AQ{($ezc#`lKA%g_IJ46htbb4&fy7373=FNsLkB5IJ!I_A5(0HAj3#)DcnCH2KoO
z&y*A5Y@I&z3N6TM7L;=0eQ%C*_C`jB=18~^hiDqRihWV-^_8GAu63K-yVINWxK*hv
z-@2Q2336M!cuZUA6j{K9^k3$)?yEk^cgJ6GR}};N_yd#UBKT`eajXJeZ`PPx`mY>w
z9$fjGHhPNuR@EwQ>k*$X&l^!6D%iXheorR_i!jL4hylHo`n-1~P_)OjIRVvET@Jf3
zv;EiDF3y{L-ELhxW=!diIV042j_<bQ{_EUVM##HweATCwd&QHX(UXJg=jAz;V+0J3
zLYAq#{GOyuN_sQjl|*9QeS8M*8jbIAOC|E^(wvv<u1Tdcr{K!@X^ow{c+A*w1cCpy
zrE8SVYI@&{_Y?ZsJD4wupa&nO@F1g+wL0vszgtWj!;drK!qAICr^quB*~n=kf}zP1
zd|#=GduM?zb$GapPqf8V429uMZS|i*Mf1w(Bi!gATZM}se4Z1EOD-HD9&v|5$RCPg
zEAJCNSWeIf$pHIs322jUIhv8d8qVMjhEV$;VJ%z(s$X~Qa<zv77c<g)3Aa{_t6$y8
zr$D5P(#>zR!rbmQeT^{O#6_GWfD1K=$WphT@LkaGH-1PIv+mZ3naB6vUW;O@x8`eJ
z>IFYrMlFh4CcnL@VLUDLY9?<=T&Km9&}W4I+DkR(59k{SkH}4E8v3<^xM7&TT@YVn
z8UjIm^y5yyB)DRka;Ii~<PzoSgNq)niv(IvO2NF?m4X0qX$-wV<+l89Q{T{tvnida
z!|TPjlR_wB@+qgTqZ1@A&3+iX^^|&tyW^Syl|J&|b7v@y=g*kvc%8JHiMdcb8))XA
zE^A&p=6kzhJ>1UG*A$jdUv8qF%;TnUv$%){0pB;sMo+1Uk;aXo?&(Rlc8Zyw=;h))
zWa@j@59+_<XeD1EdhNNzV@5aqCER*b!!6r!kP<g|0Io?w>1&YUUDPff{2s8>D%bN%
zIHKR(gPP7=gm{Sad@C<Omh!>ZHL-+%ckl4~DMMqcer+$RU&4(Vh(ePz+bzo;Afm{y
z)HjfWxAs)wQLFTnc|}yIL(ZHOC&U?ytGs1X;dO@2E(0_QzwmNnd$;OtH1z|Df^J6@
z-n`x{kZ1o~F?Z9{z9lIb>*=a0F4mhG_K>ZE3*v5=92acWNr|g5{MZ!{1tcb(Vo)N-
zmC<_;J*(gWK?PB^RzBV`!Q}FbS9i|#glYeWm5?Cy*l?jB0EvlaM7Fx3`syH+37&cL
zS$Dy-&;;?!>wbn$GL_ADYH%aZxJYF_>o!%Dl7bH3;;dbBTeNKIK8Kl+8G<k_?(w&C
z<P!bu)|BE0dEm_WqEiZ;`Qpg0ruNpc!5k<9$BVPL>oe1@6Q@b@THB`6G<+$ei%s-d
znFnd$2ky{~ncVqZ=$V(rS21F_po_6+i0n^c+lQ*YM~R>cizR}IAO64{BbW;|kPQ68
zC4(C|E-Ohp{2m$Z(r7l<caL_D#{n@BPRQMmOyDpTmsIlR>P<ezK`XZ40US=>cmFSI
zhqD+vxig$+FQ3}FmY||M?kN4akF53emUohUv&ojvnchnAE`Kc59o6&HTd&|-t1W@x
z_pHX>Lm&(sp@_rD$4u{>z2(cD*ff3U@icAm5P4X~ODB9AM%yP02>l&wNx1`^Xhh@L
zK3yhe%~C^8B?(jA-g-f9Qq0$+7u5a7u--w7+pcH{enblJwBaQQ?`<r0VE_$^O~A=}
zD>m-BaV$ji7|f&EyrV2{6)n(8p7N<?K62bii6BHTP61t`sA~}IVka&hGi3|E5wg`)
zWrQCWL-c(b9~1xD==!4-_HI?qy_JAJOq>R_v_tf(>6PJ(elfho=d0RT*X^c{aL;2Z
zHfostm^%rfg?vM)i6e0CeX0{=@7Vr?SZwcnxN$RelhE$HpB#r?Uj00ELxP8wrwlT#
zN7N-ED$cr{8Qv~kJZ8CfJi?kK^C;T-cIO6`i5ZV0u4jR3p2Rx?NfGA1VQA%s&KM_=
zzYDOZkXRz-2??y@;eZ6-f`mRS++Wfk`P$Xv-^HnkFm?OwFJbL|#z3ImwJ4Sb*m605
z#gb2xU6@+cu09!M4%O@~z)nho7{Dqf>!oZhb-U6S3lcfQqYEV^viw9jaUXFrC4EBl
zH!I-CVVx(tZ}0Xe@?1VhTCLO*T8T#F<V$H?mPU7%C2smVsA;z+c5wWpl|SZ{ASR6O
zUw)0C;`m*x2yQ`H&q|BO>|A0{I3sBTXJl~sW;6Qe{?Z?Rj0fWubXRNSK|RSuCT${&
z_1d|a={n8UX*Enw?ahSHlPH<rdu|NCPdw5cT^>;AGYi~6bT!LN&>u$-BO<#HHQYJI
zX#DOmM(BQt2?!#mp1a9>3WN~DI)mGwG%!%l?+TzEnu#v_hpgZGZD-+*v!3oYW3G1R
zr0)0mF-6*#oBfy@;Xtudduk#2T>QEUA5J{EgdGt!Oqee@;!b!^4$F5ODho9K9$82^
z9p)+iVE;s6W8zt=Kdh7Dg9pWNGW7yP=sv;8;S-nY3z(02(*P&O4EToEOztCCAr@S$
zf!}m*7|4HkV5f(v`$}91;0v*DxKH2@_(&9z{d%Tho$ZpYc7!K-cKvC7Z*<=wHr@i=
z-PkU*qWh_Lgzj!^mpY!2>caAl(%p@1*`Mg?jqc%VE*rYLv0ai!<K6T01<T&MyV2c^
z?b16%_dyzvt8JWkJr6asru3G)|NLcU4%y$67YXQ{6`LmJpj-KFh|O&+%u$~|A&<Xq
zv4h)d@LE0Fb51YYTjZ8fpTci{9ei1+UAOS#VY!6gyQFsA!tb2YJCt0`>%|<mX#10T
zm4?ia`su~WQ4PGhfj?^CYr@%UhkXtfmZQIKiT<udf0KU6resdypIU-6joMNu@f>a4
z?;Wle;egbIq+;#>W&%$EgU$UJ+T;hC0!a16+MiO5f?}nap`hTw-<#-f9F6uDwL~1+
zURUyyRO$r!xSP<Nq2oYdkm*foNQx2WDHIGmNt~M=xSI`apxC8~!jY)9mg+@8LW#!I
zl1C5I|L1}hl@65;x7B-nf!b23b<_|g6%pLjKsn@v1hVP3E>l|ZlTjXZ?wthc1qu&l
zWz?8dLR8Aso%xfBrPoi6lk%{x-}H}d{0#kMI0^sq14L_IPr&Vtxth`6u)CYP)Y9Hu
zmvMV@Z6>xk1tj*ynXorhV{_~Z#lf}_%^I<n?{4bQPm%p8-DA6BbME45Wp|ihJLZ61
zCdrrt?{>4`;h4s0j!hIZbRew*5|4XlOwW0uy{Cxi3qu|aTv%_$PN{wuVoQ3Ovjgo=
z-io2T6UL55G(`SZ>hOM=A2=OO<Bo@onneS&-zs*z?uTZ~f{yehO56mmjh6j6n}>BU
zMitZF6ZMFTJ7+V;blO`*L5Bcw!>->&g*_1?yxqN&i%3u%J7|A}>7N>0-UJ@*_4==i
zBIN|hPb5BpSX77>_wu*k52d<1PFbf@fQa7?dCZ+mD}RVX(!<e1F;+;rDX65|{}Jqu
z!{Gz5{V@e*@T5cw$P*Lh$S0@1_ySifeNyY;UM|U#U9I9o6x6AE_%)**F~akEqu-s;
z6Ba&Ezgo>`NeYbE<8#-@B+YUHl*3qW;g+YIEC~}hvDoN*5-_u$eE2O3+MB6fr|8N4
zq+LeNX!1KlH_nLA@F8>^Gc`*lL;-0VQ#8vdP0hpn&*k!TQSP?EDf2NK$!5KXptN$+
zKr58SX}d2`QS!pULjq;Xdnsl?p7DHsqvuiK5s{CER;8M!ZC6M6UFiQn4OGWv7L-$J
zK{*2DB^NX=RoMm2&Q_i&SGS(D<O^c^Wq(p_zTAS^zR=f<Jxm7!c};1`8vR4BaJ^`3
zCMJX^^x>#fK2BjXQNa&75{%R?)|FVsk@iw1n&UT{2~1zarIjgu66r`2bCmj2`Fgq4
zKW*=ev@8CXg~&8BL*4{#e;8&!_2Q)@8&JtK3ne_A-T?Hs3MuM?d|3TMl;ec?Fpf`%
zaK<l6p5G}DV?XGr$iNv1N1V~c%k3Y}f?Caf&=^!zE&ftnRuL8?)zxfS%!FLf2<3bz
z`nCtg$o|lTO(?^$3Aq{3@q~mKP(94xHf&}tZ{y^{ciOBlQN>J{B@W;Ef0lzYG)q2E
z)S0P5qH=S!st<ZHn=2^l>E^0G_r>KCGq}z2$;M3+Cm8$yA3!O4s{`V)%X5ra5Ba|t
z?OgwPyVUR66ZP_ov8%}b%bI#3plPlbyZLCjsid&`h|Y3Zm;f#4iy7RBar#nSg%BY*
zMZWD43lL8Fd7Wt9yXVTQ^8)mJ?bm9;nRDpy%N~+YE069^3&tJN*=**_n7ghwimtB@
zmi=Ro?yo0%NsWfQs8KhY%EMYrxiby(tJ44TE*GMMh;VTm>bXq`(TOLUjPPQ;N7#7>
z%;|@6%57=`5)vJe39~~BF4q3I9;jc$_Kueqk^P}OPc@}lTz=s<9YQ9Fhbh+*$$W@^
zY>({vTp#JBgvL(Z3ut8;Ou!ruX^q<~WH{IPSpP})okgtdPZ;zy3R<OMHv!bcBGhj!
z>U&DVn6mzZU2N(37}ax)$A*1P$<4GqZ<^RFm<hisxowtJ3dY{C8QcWT(y*c;{HSeN
zn*jQc?hF0&e`ETcdPeb1Q(tGE{%>#8<4{CHZ;^oS5DEBAUrkVom1$W<0VcSjcN6IP
zpLYD$nLzd@BmK)v!1e$2Ck2T-RmjO;zdQ9e6)|$j^PM8etS1?s_vEU{GoNDRB$~J_
z#1#~?;ydIQt|gFL$SZ_osg<naNX?vRiEN5+plqmNEUm~Ycgo;X-1yQ{2ena0gqw5o
znG6{v`=N?Bk|(A(i;*ScOy&=Qio>>5BJ1oY$o@-J`FJ@@2*t9=lstd^h6hA(yUsF+
z9Dc3Ptc3ba@1e6yBBy@;N4_eJ8OV>`6Gvb3Mla%3N(RclT*-PX>J^`OoI~OPxVJM9
zYCi^Wj-nw%l<e;#A>0TPpL`N3-WM7)r~k7Yt_c!i-qeg|RPH;s>ifHu-OVPFB&2+l
z5c5O|;zg8rSfUw33dJbBWhyIFma2-PgQ!7qNp{`(KmD0N_Jwke*GYFbwo6Uvo)yu}
z$eZ<juwyfUg7zLM@4~qF_x2mMf*zmYKoO-jPSYFp@jRdv^_V9Xth!Kf{f~>yt(1zo
zuBSU6Tsu(9__th~JZ1fUVv=KU@fwliB=~(7jZ6Hb<g*s2^s5q?oCoK^u^3JMetKjX
zyOINNB1QsJ@w1I7Ht`SQ-|wo6?_q9)Wq4{5^uYoqfNKB!7LJ4G#v$tANkf6~BvFy+
zh=BN~cRmyD`QByOpSWaBwp1?{i1_YFVwL$k2`4_Nm(zqGnU8D0P)o?c>c>SrH!G0N
z5h_gN-^2fSh6As<b3=(3DdLkK=}kHac^*@^7`Z<{ZIfF%=fz=(-MQnzRS+K_qOd<E
zXYfRf9zLW0%XMPEo~$2R4Zm{B`J@MBk0-=f602b`w-#hE4u}##8zv8Gz|NSnRcERN
zQO|4t8&>YyFLTM}_%0;m3BdCBeN0|6--k#h*%VO3Kr8<ze%$XHf*zg+82fmCm~uDl
z%>{`gw*(R1X$AQ@DZtYN=EEeb%ngB;H%+rIBnF!KJ!Zs&__<rnW5`|m;F9R|DMsw)
zP)bxosds@0Afg&y$Nb$NCnc4aIqC4Afw&BQ#6%qJi?xv0K@01(jtgNc`(tymz_wxq
z-Q$1dU=B<9Kw9y#v?sgc7qRUd?9A>$Pcx?`+MSuB=hmHiGU<pSol!ADMvMtv`NLC_
zRxZ)~@smS_aB^5|HPF5|dOw`JwUECPPTt+uCWVU5oy}RKsQa``ssE_1+yD9F47T!@
zIB&ut?8cgj|HZOF1&e><Ld>=DX;XgQ`~Ue<&#O9+`w#6Z^n<X@i!2e`HN-=N6Ab*<
zl}Jrj(Uk<W%8?${-a+(~hVybDuS%PpNVAi;Tda5ea(#DEdDL~KFZtAU#hn#>f!bSz
zOZCbJ(WAp`|HWgzxO=mTBvxcUZmd`NW5wzC&mkDQJi4q|5aJ?lH@oyB=(-q1MtWzT
zqTgHQIdLVq7m?YvZjh>&UC#y-Bh7*o_Pw-=3mx>AYKa7ZkW=vZ)ZeAilfa4gHT@sV
zligWx6i@eF{}9J27xcdf&1(7kEkY1A{k+uE!w_XA0x)`<VF@+#qgezHNrWiTg9zrL
zVR#gZ&Hqo)h2oK##On}FEY3we<siffzI*_2_;Ny!6zhqEt5iQrSj&$F_9z{2&D<!W
zLG)f8d)Q&@?#?l)(=);?aoztfWtZ$@#LSaxCCRl7L{sQC#imCidb*;;=*je4e$UVO
zqWNB_%LUtq^o5?#;@TDyK`SGKA-|`u+k{Vb2>$b;j^lGaBtE|LrRxpGiYXL@B0X+v
zU|;Xeth`?M={RwPk8mLniYfF-gg9ZVE+<Y((S<sC+@XBT<p`et<+0=*`90lv?2Z0>
zLl6eK1*)pFxI&{k!yIJXm|cpsHlLL)&?q~!HT-Yu3^on|pR~pkQOJQ1&u<<OxC%i*
z#)csL9?L+Ys|6PIS{fqj7cWH?HMPX5)KRWds`fQ7;`t)(;!`k63DgtUh=U`}OIXII
ze{DQ!CmyLY?<4e)e?Sbt6r+u`i&5}|6l8*vzfAiv2_Pu07s43Y#<bn%Yv0j6utkOW
z*OCs(*&!F6)#IRWw}EP=;a>bZy?G|OS9PJN!$%o8%<zmC7{D~8X0rn9*O3t9X9P~{
zYuFKgb6etb`NZRs1fMD6SNJE@UnI!(m&IdN>PRC__tT=tX&t_Ph^$zbeDsXUN928j
zRtzvGXTTj1Q>eVPlNOK3x)dgeBsMxx=(7XpB6xg6aUC1iux=txIdQu26DQ(eqqic<
z36bB0sjr6;A&x`l(+nx5rzpZPaY*+od<y39K@V@7IQFaAvcQY$E=m$V8@#=lIri3$
zAJfbCBQ3Pr7qfju`*vy@Kc<~tdPMIH*dUTA`AZz$k?+(^AG4d+GCz+v!a1L~jLm6S
zsYArvRG1x8#mx7Y^_XpEIcC_C`bQHzYLukARIgaHg7cEhXOz|yJFw@YYsK(x+OLz{
zt`Qwab5EJW6URq8Eixck^j_vr4Xw=ad9m{3S|OL=60vaOiynx$5ZS;pz=Qgni+x{Q
z1NVDHpCRZ`B_wWAX<i7fIM4YE!L<&Zql`kRvnh40>mt-RKY16)A|sH%Jt4UNw{ju7
zW$~CvO`k=M<2Dn&xXg${G7d))ERXVxlWb>nbPJC5`ZcI{xD0ixd3|eU!s#T@*ijNS
z(VAH2nhnZn%4<(g1H^_J;5+}&E6S7Bjwuh$d*<<C<86CpUmbJ%W#*an|3U0s@kO5{
z)w{Tg8pN6zk|L<o<tHtK63%R510P|s;cW?@DqXAzLNrb4GE4f7K8L^gY>1#D6bYhC
z@7FJVNplF4Fbk47TTg+MQgpy(vmj<bxPj*-f(hO5;tv1mcI$+KjMPBnGUK7@IF9@9
z$wfZ(S$@NiI9;{!w@J*@b{v!v1a+jIqPIZ(>@y*H174H+6-lO4@jf1*B`EiB^qu{~
z(C4;uo1vDdo?pErEO*yQxp`o$m5DeB28N`=o=g><W99Z@^sVg0$bIITHyK(_J&mj{
z)yqxh&4_pc>yuYz7Y8mU`+EYyhHf)#<o3igv~wxe>jEY887t;N#5^Ijny5O9PCKmE
z87;{xV@fdv5rEf2t<&0yQd(5dd79XP+PrYZ(eU{OyY!1aBD&m%$w*pJ+>G8<w8A95
zgISzKoE#RN;l|Ww|Bi4VrFs<Py}mb($C(fpcig%o+Y7%KyNUh3$J^JjV<(Isy5z7&
z#$Gf2s!44t+7oY%eP+q43)}8p&GtBI^!$-0oFJch<1cUgMdzKYHqfp(d)#FE_8-Qs
zVUvE@KFpp!sBN&F`O?^}?aAXtkG6%=w7Pn6$Z-4G7UL#0R?9<Hu-Wfxn`TcO+?Ls>
zTZgP{cfYS~2fKXtwh^||?rl?SRBcF`J-b%hNp|_hZHL*Y@`wrc*}}M8S5OzGzHm#f
zm4}QUyYm9^^c$_|p~A^X{XJ2C|5*4`gTD_6S)SADlKNi~gT{Naw7yvTf4lMiO1AS>
z<0hECe4<_V(%8XEMsC^mrxmVR*go~DsqL$#i>|7+ubv)zRZahX+|pD~wG3F^3cxHY
z0`sj5e8eii`Bnu!VKv}7O9!;Cp1mp~)EeI^MXE0=O#tN2TAs3kc1sg00z<6~tZx-y
z;{Xc-Y-8y_y?Ll*!0`djup(<0S{b<2D!^@41@s?j0ry!tNN~Spz<etJ4_gs<%F4i>
ztpc=L6)0K_ILp$(f^#eb&a(nA$BMuPRtDx<1^B&H0r3gUwsbkc9Ls>YRsasMBCx5I
zfqkt491-ADt7$EJO(s`nt1l9e`og**AYpxkWy|YBt6Bk=Vntxbu(o?xJKd_hd4|=1
z+bkU-_@-sR|5yRI--^H=<WquYtO5+PDljI%s+JDb+C0kueFJc?6@kO844iHi;A2(=
z*0CBe&(dLn4J`viG;o0xfvc<x+-McxD^>-*YBgXxONR?~wG7z93cx;A1n#vm@H?vr
z3NKDhlCRZ?>HOz5y%z{l`q1AubZ)Bu^?ZGMaJ|heT|r0to@IjUjVVbowXt_I!Y5mi
zz2{jOkTU{1SrvG~YQQs=t|*vk8L-d_K*5T@53LOR$SS~nRt3a6@W=4xpTe6%tS~}v
zu4qMIRVxFE0U)s|Fvx1a5KBi27Fq^uYXx9OD*`)N8Tgr11lcQV1yYptJC<1)e~wBd
z1j!0kWAB+(1$4kk0&x!<W<}r%D+6B+aJN;pmhtqBjT=YwW5-^)zNMKybf9IxSplxK
zB5U8aGVtp<WdB$@N$R%I(OengBdumN_Fl)*$%1Vy1NO55aIO`BuUZ-Su~mSltqP2_
z8Zg1qDS|yM1NO24u(uU~C#(z%w+b-bs=zi@0}i!xs^G)`B`dIYr4@m%SQ)t2D!>n|
zD#%`$qp44W5qG2G8q}~p-qLB>dxB-a%2oj6Tfi|^23D{N&}LO&oYjDKOQ#EVu?#rW
z3c$y#2z<`Uz_nHZuCpreTdM&}EnP$K2g`szS^=10MPQbdf!S68PPQs=n$>`hSh}X*
zyOsezw*v5(6$RO=>m|umxiMlZG@LOGunKz*v??&zYQUwIt|j=SWx%Jb09<WFKye@_
z{(XbaT;<%YtwO{DnrY=HSk>a&PpNMY9`m}kpBFZFY=5Y}J!F>)ES;g#e_iFLN+()E
z9ZMVx3P9~9GZcYUtPJd772rgx0;gCFILFep;bTMTamS&;AF<7qCbb(%0BmJt;5MrO
zcUTqpAFBc1vUH|kCCh+mRsg135tw6T;Cib7n^_gu-fF;ZmaZc>-7-K{2IN)1dR7Lc
zAi!Q$1y-~gFxt{tf*F<pyIKJ_z>2_8RtB!L3h)K10(V;tc*xS(f<IXXJZA-9q!oe5
zRtBb91=upYxwq9=+uzbTf{%o?kA}5ttjL>>TN!vgz$B}(wwBd^8J5l!tZf-^f)xbW
zt6S*7(qfzFa(9UBD8ksgXcgcPs{)r>4Y=RZbp=1O40y~6z%Q)`)T|6#Y!#qlRp9eh
z1MabOJ;8&P0l%;Upw9urtqjby3b46Vfg`L29BJwLf}<=0+N}UgwIZ;Nm4S`J+Ll&j
z?O>|`Pg=TxV31|NnN|Sqwj%I=l?B<W+b79B148n0<-AeazJjz}UG`+8RoQzts{sdE
zI!|zjWq@=9_@ouXS{XRjD!`Ri1wLanK;^@Rf<IaYe9{WQXRHVewlXl&D!{r{1?E`|
zkaEr!R4fC2Wd&e}6@igf1`e?baE4WZtHRpNVeN;OZKOBl(ZFyk0#mIFY;F}`Ppbk)
zTManV(v1Z_4BLJaw*4)v$v|vVTmULpfnBWz$UvJ2q^PWYCTv?A*1i|ker*+YdD5zY
zQYIir+Enlp%YcWh0BmSQU=u3?AG8YaDXRiESq-?u(#>SNH}+CjEhvV}6_#goY1ETo
zY6b14Lll9ttPGrM72rIp0x~SHr=^<<j<XE-s1*Q(7N86VtZEfNp#><k0EN~9!OfNd
z|7!){Q7Z!bSs6IcDnQYy!1-1KzH8}1!S^i#Hnjq<(29Vl1r}HZ*xIUqGAi(xr3Jz7
zEdy4v0?=kf;4~`(C944ES`|3YYJdvxEd&=?2Har<;D4+Ls3-$elmVMq71-2jfE;&A
z!JU=?-?ReoZ7TvdSQ)tADuUum$0dn+GF_Hf#$VI+K?*d2Y-(e#4U|jT6l&<t8=4|c
zYP|1(7?&^;E!#@pHron-gb~nO1^ALxfNQM^oM1KJG)uP@+-Mnan-ze&tSHFdJSou~
zsbOj6ILg87y|-0?jjaZpZ0R<F(<}pSv;y!oD*`uL8CYx;LH74k^dBxa4&SV>+tO<6
zy@RFO3U;&%*vSfj<O%qrl?B;9Pfe1|8b=CgI81DOBD1ko+4~%;0kYP1f~A%LqpbjJ
zW<@|$0hd?>_^efd&sh!F%F^uxM_LBlWd-0KD*^+o3@m3AV0o(ot6L5DfTcSKG=Bs%
zD*}FKMc@c41OH_eV5wCBl}CW)!8-~x4+ivofIbIEJb;_60^Dj<LH3#o6P&`ygpoK|
z4YYJ8?L9QWNGq_mniT=f6M>zr0-R)3;7qFlWlMJ!oM#zukrjYXSW!@V>3lVbRNmv9
zu|ulOQh)DNZ6BZ8{&Cge0~+b{HEkc4+x}y1Pa8-3hqjN+ZJ#ldk<|@7Aem(8E;^m5
zmI0?*0XWNwfaVjx7p($(+p542s{yN9x~pJ{WxyI%5M*y&oFq$h>^R*IRsQ!p_up1#
zZ&l@h>MCFds{#92x|`r|%Yb98032sUK<onQD*#Hrz>Zb}PO$WSf-@`wzG?+PUIIL8
zWkKPU%ai0-k-(54nuVx`aMs_{_My4$r)bWU`rcOKXcH~nU7#}rl<0wvS`jE)8F<br
zzyPZP%UKN=X6YV+&sYXLV+G(FRs_ClWnigQfak0VXnGFpWa*v)wGn^~tRTo<z9vc3
z_D&}0RX3q9c2Xr?_TJtq!0uKBuD2R+p{08XF0l-_!U}@yZ<>F9SO=lhc5*Hq*J-nZ
ztjylWSOqxVs=!qNR15E|wcm!dp;lmRk`;luRtDC!3h;ib0(V*sc-+!`+E@SdKJ9~S
z>ON{;9ccxvD`Jblr>zY9+$z9hRs}SF2Zmd^uV81(fPJh0>}y3psSwaqT~K)bm+FFQ
zOe`K*cdf)2!zf?U_DS`vo!ggF@5|TLuyj8iO<f^CT_IpyD*_*~GN2?6eA%kNVygkS
zSh~O9R?C1tSwWD!@hi39b?964a*|Kbgx}W=wK99FzXUvDRp3`v12RkBFWAE};0P-K
zM_LiM#mc}*Rsqz{11_){Fx}Dv1e!Pi-?9SmfE9t|tqe@E3XoY9*urXn+I9yDcCrlE
z(+a>%Rs?RfGN6eO@O7&KN`-**_8`HzmH`)90gy8Q@=9QlRe-~+3P>J-vn@SXP_|4^
zc=dO>VH0AsI8($bfAjjMw(lrp@$Hz^*LJruM?2Fhz~xp2uCN+#m8FXWCt3!aYX#su
zD*_i;8Q94xz#diw4ze2XIZKOzYb^t&TLIY4ioniR26nLuphhNeht+_)EImYUuVuih
zRshtH1QZ^?CRPECwkq(f)dbnA&*=Az(xWvQR%7A6H*^_Q#eJyu{<IZ<&s!0=(aOMA
ztOBfQRp2tK0jFDfnBWG>fSarUsHgybWo2Lus{lJ&70?_S5MhT4R5=5`twZ+r7xbf#
z##wn_#WmDuE3@|$s{q>uI6Azk(*FZ`^YXA(32V29weJUbB)}i)+p^dHsz<aozRb-X
zf7yzb9-#v&MgVCfu&ot=L#)P^ofJUs$(x!h@#c+T?b~7P#{r}<Y<oUz8)TIuwf3M@
zfy8RSa+V$?7-ku8k`;i{tq7c9Wk9ok;7F?iXIl->r1XP=2P^|rI{>N^fj?OpQ0@T!
zVpZVdRs+-zIa=@y%YdpCfL~h?&}<xd$SOc;RbYnI08KTH5vXVa&bI<^sTF~*Ss76A
z1E>WJsJH{n(qrq}GGIw~QxhoGo(gL(hqV>0%A4P^8t|;8#|fUZ4ET!`fL*Kz>}zFU
zKdS(jSQXIJ2H3#T;{}=$0moYbc*2Un(^dwQ8v(Umfk&+t*3uIcDKEa6Bzwy}aE;g(
zS?9IA6|`Hr$cjLlm4Q920;tUbsO=%hULKHk6n<W;$Bs6_(i63}f*;sCz}{A7?PRL}
zr&twG`3XpVKO|768Ccf}z<yQ)9<wqq&?>-0s{(6S4VV+wHnQv_tsNS6IV-#=foGS?
zt-`h&!Ut})8f&r@+wKYQqX0j(;>mhb%{$;6s{ra60P9%|P+5M8U~9_&%?|)wE&{f+
zGO)i@0OfE%Z4Y2+eV6R@;c0TIm`1C-9)UNm7-4}Gt#GRL9$`g5762}?3ZSVJuq3QK
zVCjdo_PAxhZ>#|P-ipALRtCOm72rEo1twYz*xAz41czA$9Bu{R16BldB@S4@D!^)1
z1v0AvGcEmyKnWgDf(O(V04}vMpzi~2w<;i22bNfRy5J_ufUjEt&}<d>j+Ft;a)HgQ
z3Vh6Jz{QrHA^4(Yz&%y~p0T1JdwHca8KF+lP7#ZrJWMMMPFJ!Ddrz|}Fx_f^DwHz?
zy50qt6#(^nfUB)6$o`h4$xn0?in};IGOc5su~ud8Hmd>SEd8iJ`3jH)fTyeotZrpM
z<s2~2s=!`W1O9C3S^Bcqr=`h8nlo;t1G_7nIaj51{i}4>gcY=#K3fDfw=%GWRRGoW
z!0}cCPO`KlIN37bS}OomD+22D08%|*TdM+UumI|0ovjo3``k3q11#zbzQ_{Ca+?*j
zn-X#nSk217M5_SGe1H-saJ8jn!DlT4hFbynxfOvYtt=@1eO{W}tS^|Um(LuZQH3(N
zG3tM4`<HUtuaupAhW5I)s|4@z`4{km28TCW`Z0amk1PY!&IBH@A~4L#!0lE69<VC#
zpw$G~Kjx>&(T(Fe2}c~*dE8SiJ4bt8X$9b`Rs?RcGVr)n0JXz_FIWxus-+*-ai8Bf
ztzQK~B;BQI@8`H-ZK@Tto2G0<;4CWxmsth4+N!`ns{yN7dafX|3|Pwwz}8j-wy`px
zSqGpt8gP--fFD?To<Mz0;8`mOvR5`ulc$zXlCyOplk{SlaR~ngS8TU3d&|neWUB&_
zTtM@p^97e%27Jy6z%^C`j<PbKE9SsQtqNRiH9!r*3k2#g028bLoM=Vhek%j3Sp`rZ
z6p(ZRw_19kV5nul`>g<cJ*=rw%i2BxZnP?Esnvj40p4%fMOwSu3V;fG;OVe!j8#~Z
z@&HGNwND4A26!fHn_$IHXctxHz$~i(v#koKBMwMlF0OCO0HrYCG%Ers(|{+f0@SPu
zs6qg)we%9fY|8)@Wq=$4P{#%kpTNCV6%=0FKds+KL;M7Dg@DyvH1#O@r?y|OzvJ!2
z`dkeWOOh{GcBzi`9V-Aou%aM)>HTRkSNG*y(D)!bj#g{Lzv48ju=fR41unE2aFL~#
z3FMQ&Ayxoxvm$V(m4Umg0;nAVJYY58|5<vu;HQ=W4_g7))r!FTtqdGz6+km+K$9k5
zkfm1$lzoA-tpJp*2wZGsK$p@1UGxODu^ON{^pk?EECaT-0<fDEfgx4~Mp^|JWmRA$
zs{vyy{ghy=Wq`^f;2bLgpRh6@lL9lW3hZGuK;4-u1<zUrY+?mqCo2MbS{cwa3E(uV
z0$;Qm@V}N`C3wOzV1yNby{rfvXJtTD381P3xZ7%guBCoj;`+F%i<4!sH9M@Py9p((
zLmJ}x8*M)!w|zs2=kg6P`n|RvncL1x%6&kU4HbxN0-PV#t_g+6*Fqt3b0|b^4~58K
zp%6JE6e5?_3z6)(71Lxk2i4*!AkH@uItq^ULxrMvU`=UIP<m)YnyeN++99WWmN3A%
zf35BMby|H10-Gf9eD~RCdkBUQI{eM+1#MRl)ED3WqB`PXtCak8?NHZAz2#rP2@LmL
zLVcvG927sxhx$lOA=X3#psN7Dd7(Z!Kh#HOhWhB7P#;|q>Lb-vltRNoeKaD}N4tmm
z=zvfkDKAmnNZo+D>h)3S`@PghHT6J0r21*A+#uA)s<ZGnZQm=mozleHs4r{#`*PcV
zpzVz$JvsTicGxwy!_;+k^oCxkJ3|Gf?hK&r44~X0D1EP&BJ6kCPK6K`X-~Ixp4{)X
z{f6B3xf+>Q5Kn76>KK2X4qoHj1|(O8f=-@HwI($aWPh2QCJ*RR$sFw$+a2G)QFL{J
zy_XA>-n3BZso|&ktnffDmEO;_eVtq-_{zaLXycof3nk!)Py&t)CE%D)0xk$8;Fh5T
zl)zEgX<`Um5lTSao5b36p#;=TV606FC19am0+xQzO9}XUZ9hO~I8)~ohb*DO=%3N{
z_vN-z)w#nwtL?kywntUn@Q(Ws*Zts<VWY;sJkoAmwQZXv+bkOW%FxpHMEg(mB?~7f
z2{%WyPfL=s4oZ?cX6b^_rb%*=&f<3Ue|FGo7dgT=rX<NPchJ82@x$VM_4~8KbW@>Z
zZKB#R*XkrrT|@U$FVr2R+J4nOI&iccN3%Fujvb<#b!*X1>&y7J=!AS>l=1Qf&E)Re
zPG2aMI$I~S*Q6x*n@;k}Tj)e|!b^76$!nj7w0`X2y4?Q(6<(tC?4#nNgug#0s=u}-
z9;fBAI_TNLYYx#br!<<+B6*Ev({*(Ht{z^xv3KYdg_TPu?w2GGTwGHR*U+Kw*<JUE
zs_gvS0ZH=V=}B_DYQGtZSeAR#yWde(yl+u`waY&@=yJp*8zjjGR!)+tc$-z!lNo5u
zYI#CjT_e7KvA^yg-6TmKDQHNdYRbmqY&(hK_{sWzAGb=9@91^$0{yRr8tbKc{j@RG
z<HW~rw+RP(c0|DTy#xMPoSeH-xLR|Pisy2sNwSW4w!VD5g&K^G`KZ>f*GZiGz9ji?
zy;jhd4^z!v|HaLx#K*L?G_g=1?J-w(NN=bcn1|}Vb20G+MeX;*(1MM^mz^puA6pdu
zUN|7&GkXQ}e}Aee(#?83=;=ipUJ^U|?-};EQv>;&B<~7&-?K-_a!0JEdmE>Rjh8K#
zBztbAGi!XIAGEk__-m;=<!?H8wp)_iyU4c&yj#X)d9-n?rCaNPgR|8t&}VCVW9(Pt
z4pn_)x_*+pFY?FxWvqA!1%Br{|6HhUq1<Z3{3MzGfzJ28tD&#9x_&w)nvU2;zN@o3
zNp|^$B=b=jnq}G%a=|^4<P09Oaksc-c})k~RfgmS*zX=1-s9g3WrLZ!DBASiKgT7>
zZu){(w0?wcwp*h82kk4{EFbotseM^C5*;j;=>(o%RR6R7&rNcMUpDp)6SOO~9ygdO
zM)m%Jfn!R~9u#)oc@3S2+<&56G+t&m7<xwK@_icN@3h~IicoUMU*sSU%hk^mBad#N
z(m_sixK89D4X9lw`XRYG%NaVx<_e{GisE>gy+CZVH4HXyTN$)*GVFBS@UZKO!q=Be
zlDlQdGaKGUmA0heNi~fyzfi+GA}@VV9(QrW1o1v0f$@&caH>YOk8IDC&-29qCERm^
z^ktKTN9c~Tf_@gE_}M}t`6b2B7P9qBZTo>@XsW`)mn(IG+pOZZh}%&)X+H2hX%0;N
zAChJ_{d>~<o;1HF&Gl$lcG6s0*3#_J8t^j7`B+KzQstDFmBc2g*8Qc@`smHW_CF6*
zR%5w;>J^1?NA0Ghn_pf%M9+CprihnA2dj+M;Z9TPIZQc`W#<4%Ix9*3M53?gHGha|
zYvG*_>mCs;Cww{V^{I5!s)O!6Sr3MoJ8I#8dN#gN|30KbN}s6z{^KOO;<ok;bxG*i
z+uGMwqoh1&eEV3Q)_AyHhw62zULV%WPJC?a?soL!?bGb|$Hop{a?CH=4<5ASx!<&p
zPM7@giLtj2S@Odd#?Iem;4ZqB@xtH6Zn0$RMWaXj%k$Ks@4^&9CY={141x8xo|O3f
zztwyp3`oBlmAAb&Ux1DG<_qu57nad{;omALm6mBzYUsvkN;Yp!aS_oZZkm3`y+E^4
zP3&eW37x7b&}Y<c7&u8cM{s-eFVsVrsH`?dQ_pKPU;3oxM=ZMr@PB*Fw{g1tSC6dJ
z^T;&4jF-l&wEos7G>v^mv%+7g4%kc+{egR_q|iKhH!-tKV?NA|XEmn1ew;hhjk$4s
zy)xdhrxy$yQF{9DuyqSfWp5fSv#6uM4;b%M&2d9?tRrNu$0-i9@QmWyWr4fY%=vAj
zl;QomC#l<`<M^`CcK(;XO=hiaul}Q^(tq7j-4U60(2km$bF*)}Og>5n*L3-s{q%GJ
z^~dtdmvj<s@)PcFT{N(B>%FUlzaLvKN!Ah}TgYABnBBR3f~MCOsc714&G6ooy6-d`
z;$oFgT(hnTkJf>9(!n>-wqK|*!0q?p`f&K*+3GsHxjbE-N29NwF<t$LBQ=ZHC+=TO
zuDpvLB&O`hZ^N!$L)_}?zCJ@`nZ{l!>Zpf>qkX2)sJUF+yrTUVZlEVJh}U>osMB6$
z|M(p}o@SJ|8?5#Kzec@A<5ek^Jie#en~UTOsylxr1HB*)ztb=<AK$kz+^uvH|7`H^
z1NA*#&<S&8alZIkBnoC1M1wy6{ms;Zn5&bL!GEs(GaWD0@h+8(4qu?rUuko98Zf5x
zP3l}7e@k(^(H?4R=rcoPhdWg3`oeA0+pieb+8WM%SHnI_<k0bQ=ty;|+SDtRLz=#8
zLlMjJ(o}U?buL^y*jA3Z*?6@Z)jCc1nbyr4i6t?$P!`xjgL-0`s$!k+iIEfiLua|^
zHc9fu<<&#eL3dI#+y?Up%VD-wcS<LC?Ib;A>rh2l!=d=%bq4Ea8#)=5wcAzLjaV>6
z@wJ<VDVx5z=!(L`qiPz&2e(s?RTkV~n673hWKUFd+_z=;+z!!stfuoDDPzUU+B%PT
zS)}zpE>O>Jg4%RLC5R&av?J83U6S51{>EW<ENa{C-X+!XD=b)k$wT8;_{g9y&KWmq
z-thb8rgbAL%yt)pqkg6VsrDbFBft6P!20pG)BC*G?`+|k$&z`yjk|ATsqyXoQ0HYT
zVe9lu!$NEEJ^8|D@5$G{t$bmg{{;Cu=G~I7hm@$6+`Z?x+7}S7MavDna@O4Rs(shl
zYv9+1&iVVuPv6tN!t@uFD~`NT{rQ}7J)pe6ODQ0EedLmL?`i+>$UANsv)y33{1@%>
zr%Q0_zmX;tE|dU*E36f+-yEcrbKfGRl~F-6Z{2N`fnC&cXDQ*?4|i9~_4Ph4)bf}}
zE$?`K^aCU9u0OZWQGKAh4I<(FgHC_`m%E?-bvkukoc=?dr@#JNI{n!Lbq3dkGq_^Q
zIU84W&`A>bQ-#|}^<SJkAh}K)vfQHgpHxmhQt9xOtJCC%dhfb(RcMH@{ZF}l<MHzb
zWco;Q&tw&4N_8w>)>vQH$*OOYoS@fX^@fl=KVbZ%!PB+AhW*#T@v9G=tv~1JHP^m(
zTl*Tiv1X-#<J)x)8EqrG=lb!Vu{r0ApE{WQvqQa!G<Zktz1Nb;pz#k69Q+~ueKPs%
M{^irtz&BU_|B7-oD*ylh

diff --git a/test/tools/llvm-symbolizer/pdb/pdb.test b/test/tools/llvm-symbolizer/pdb/pdb.test
index b5d0f15fbcb8..fd058fbbee3b 100644
--- a/test/tools/llvm-symbolizer/pdb/pdb.test
+++ b/test/tools/llvm-symbolizer/pdb/pdb.test
@@ -1,18 +1,45 @@
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" < "%p/Inputs/test.exe.input" | \
-RUN:    FileCheck %s --check-prefix=CHECK
-RUN: llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false < \
-RUN:    "%p/Inputs/test.exe.input" | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
-
-CHECK: foo(void)
-CHECK-NEXT: test.cpp:10
-CHECK: _main
-CHECK-NEXT: test.cpp:13:0
-CHECK: NS::Foo::bar(void)
-CHECK-NEXT: test.cpp:6:0
-
-CHECK-NO-DEMANGLE: foo
-CHECK-NO-DEMANGLE-NEXT: test.cpp:10
-CHECK-NO-DEMANGLE: _main
-CHECK-LINKAGE-NAME-NEXT: test.cpp:13:0
-CHECK-NO-DEMANGLE: bar
-CHECK-LINKAGE-NAME-NEXT: test.cpp:6:0
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: 	| llvm-symbolizer -obj="%p/Inputs/test.exe" \
+RUN:    | FileCheck %s --check-prefix=CHECK
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: 	| llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false \
+RUN: 	| FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
+Subtract ImageBase from all the offsets and run the test again with
+--relative-address.
+
+RUN: grep '^ADDR:' %s | sed -s 's/ADDR: //' \
+RUN: 	| %python -c 'import sys;print "\n".join([hex(int(x, 16) - 0x400000) for x in sys.stdin])' \
+RUN:	| llvm-symbolizer -obj="%p/Inputs/test.exe" -demangle=false --relative-address \
+RUN:    | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE
+
+ADDR: 0x401000
+ADDR: 0x401010
+ADDR: 0x401070
+ADDR: 0x401030
+ADDR: 0x401040
+ADDR: 0x401050
+ADDR: 0x401060
+ADDR: 0x500000
+
+CHECK: foo(void)
+CHECK-NEXT: test.cpp:10
+CHECK: main
+CHECK-NEXT: test.cpp:13:0
+CHECK: NS::Foo::bar(void)
+CHECK-NEXT: test.cpp:6:0
+CHECK: {{^foo_cdecl$}}
+CHECK: {{^foo_stdcall$}}
+CHECK: {{^foo_fastcall$}}
+CHECK: {{^foo_vectorcall$}}
+
+CHECK-NO-DEMANGLE: ?foo@@YAXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:10
+CHECK-NO-DEMANGLE: _main
+CHECK-NO-DEMANGLE-NEXT: test.cpp:13
+CHECK-NO-DEMANGLE: ?bar@Foo@NS@@QAEXXZ
+CHECK-NO-DEMANGLE-NEXT: test.cpp:6
+CHECK-NO-DEMANGLE: _foo_cdecl
+CHECK-NO-DEMANGLE: _foo_stdcall@0
+CHECK-NO-DEMANGLE: @foo_fastcall@0
+CHECK-NO-DEMANGLE: foo_vectorcall@@0
diff --git a/test/tools/llvm-symbolizer/sym.test b/test/tools/llvm-symbolizer/sym.test
new file mode 100644
index 000000000000..01a6692222e7
--- /dev/null
+++ b/test/tools/llvm-symbolizer/sym.test
@@ -0,0 +1,30 @@
+#Source:
+##include <stdio.h>
+#static inline int inctwo (int *a) {
+#  printf ("%d\n",(*a)++);
+#  return (*a)++;
+#}
+#static inline int inc (int *a) {
+#  printf ("%d\n",inctwo(a));
+#  return (*a)++;
+#}
+#
+#
+#int main () {
+#  int x = 1;
+#  return inc(&x);
+#}
+#
+#Build as : clang -g -O2 addr.c
+
+RUN: llvm-symbolizer -print-address -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s
+RUN: llvm-symbolizer -inlining -print-address -pretty-print -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck --check-prefix="PRETTY" %s 
+
+#CHECK: 0x40054d
+#CHECK: main
+#CHECK: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+#
+#PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3
+#PRETTY:  (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0
+#PRETTY   (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+
diff --git a/test/tools/lto/opt-level.ll b/test/tools/lto/opt-level.ll
index 013a1f408821..7bce0c606c4c 100644
--- a/test/tools/lto/opt-level.ll
+++ b/test/tools/lto/opt-level.ll
@@ -1,8 +1,9 @@
 ; RUN: llvm-as %s -o %t.o
-; RUN: env DYLD_LIBRARY_PATH=%llvmshlibdir %ld64 -arch x86_64 -dylib -mllvm -O0 -o %t.dylib %t.o
+; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -O0 -o %t.dylib %t.o
 ; RUN: llvm-nm -no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O0 %s
-; RUN: env DYLD_LIBRARY_PATH=%llvmshlibdir %ld64 -arch x86_64 -dylib -mllvm -O2 -o %t.dylib %t.o
+; RUN: %ld64 -lto_library %llvmshlibdir/libLTO.dylib -arch x86_64 -dylib -mllvm -O2 -o %t.dylib %t.o
 ; RUN: llvm-nm -no-llvm-bc %t.dylib | FileCheck --check-prefix=CHECK-O2 %s
+; REQUIRES: X86
 
 target triple = "x86_64-apple-macosx10.8.0"
 
diff --git a/test/tools/sancov/Inputs/blacklist.txt b/test/tools/sancov/Inputs/blacklist.txt
new file mode 100644
index 000000000000..b782960652bb
--- /dev/null
+++ b/test/tools/sancov/Inputs/blacklist.txt
@@ -0,0 +1 @@
+fun:bar*
diff --git a/test/tools/sancov/Inputs/foo.cpp b/test/tools/sancov/Inputs/foo.cpp
new file mode 100644
index 000000000000..71b35b32392d
--- /dev/null
+++ b/test/tools/sancov/Inputs/foo.cpp
@@ -0,0 +1,5 @@
+#include <stdio.h>
+#include <string>
+
+__attribute__((noinline))
+void foo() { printf("foo\n"); }
diff --git a/test/tools/sancov/Inputs/test-linux_x86_64 b/test/tools/sancov/Inputs/test-linux_x86_64
new file mode 100755
index 0000000000000000000000000000000000000000..2d141b693b2e8534288aa0e657df34ef409cabde
GIT binary patch
literal 2355767
zcma&P2UJtf^9Onp0s;btCfG0_2#O^N_7)Hn#ex+S(THF#QL&?jrYN9MP_f5?y<o*|
z#NM%@*szNgyMAqNHZ%AAf7z$!yglc`-aDDh&d$!xmh8Pu5AM)LDv=0ck_s&ac2^}M
zz(`Wj2gPayT?v9pXe?O6-x@-7!2+n}02O_zqAM|1O7R0Rw<JC@$YlTxeXfzJ;Evs^
zSSD09{{6pP)R5(@y#Groy2M=5TyDG<b3srT@7ed5t`kG>^NArEBVEkD`5AA8ZxNUA
zg1*PpW8W+2fZr?VfZr4IFw$Sl#rm<jF{IPy=~5Nkv3nKEgdpSJ|H~OKOJxA;`d@l7
z9+tXeCWz&QJ&lWD-&6dsGk&7@(a9wDR~c^wF&E400^eix`~REJ3WCs;%KMj|sjMQ9
z#GKimrMkn1L^t%WJ3OZL@F63|O{_h!al_gT{p*Y!Rj0nO-b_B@ZQH2_V;4mGyTUco
z*$KdWhkgF)X~4C};=|4=b&ZCHo7a83cf!n9Wq>INK4(IhB@|K%j~O~gb3t-Km+*7C
zAXJwKyR~|iutBCw6|~t<LPA$TtJnbFlL^VvZSWBo)iThso>1JOlQzjR00b%QERqE$
zi6lU%Whp3tqnB{P5>!jnLJ@*Au!f*e2)#YiwU+Y6Qmw>149XS4ez)~dm@l5AO4T0^
zRLSHj&|50C%(r}E357aIYe~F36+)m&yDCetOwR8E@-#Iqh4O-4pDmGfP+7@TPzk-D
zauuq}g%F9<M!U?TyQ_y(=x^CpF6eYptxPVLt`cOjWVuohl)EIitSps+T)ir@jxbn|
zCkj`N)%TDn&)691Z$PssgnTd!G`RFCv$RY|5G*Vt8jnE1(o#?<plit;s>@XpxtW8;
z*g3(!jCFK$@{n4}*ZNBYwL~hY1eq#9maGVIkVD0L*m)Qfs)X(u3+dw-f-F$b1}Q*u
zl_0ETtb*V!5fWsQ3WJ#_xfaYT7n}maf>pA0@L`zX>5*&$ZDpku^jRHj<{M<uHcCO)
zE+ANFtFm`h_`92_ETr0GPaz@PL1AeFoI<UHK)F^S>uz<y)j^UEGA#^}PL`7BM759_
zB$u{^mJ$qtkl>*e6s{_nnNU$?DQMM}G9ghW7Zkz?*4$1KK`R%ef)FBA2tnE~Aw(+}
z%$;OGK?0~RNnlQ-Qz<}0A3<d+$UOtKTA8IpAy)_?VP@&G)EmHnkfcx_%LId^O70k9
zIABw%P^ii&1T$$W`w;A>fImSM%~K^Ig3w`>R;pEb2s?rz<c?~!ht&qTJV6p9Pm~Ie
zKrwA<h~Tc1TKlY0SP2Sf6<14$cG_SF2mu`}B|@^3ppZ$lP-c)SO>Swa*10CwTL}TS
z4w9Lhgd~N7MA6+9ysM_Wpq1DQDs6q6`YO31Ur^c0JOqOdGy#<@RhCjam1{ed#62Pz
z>a3E~vypgucGU(*)LKE7C0F#9nG4n(C32ZyrW5joWLq$jkSGWW<|m$lVBL6Ts?ga<
zkoY)RYCXKZm~BkeYNg;TLYU-)KA`;x6<jJ*LRWLaQlbh9Eby@rWGZ=oOFO~Cwy~3i
zFxU+IPn%%9Pok)%@6cT#7uHJj9ehrJX`DcHP}Cw796Q}zPz%oBl!63GUkJJi3b|Y&
zbn{%MFr>yo>&XQhnE{j&oTN}O0rVDv<z^uvQvJNP)`FK*Yb`ja0wk=NEd?{Vhg5*!
z&PN4tpt00^wh-W|u+UiwK2EYQmDW<L7@eSy$-xs9QmIFZB)6g@%u$gf2<j?83X?*M
zsI=DF!IlcB9&;W+B2CctmI-=^RIs*D$=wArg+!2BT8Bx>x(dk>cR}u`1)HmwFM*o~
z>Lj7DtdoySu!e?ku$C$;<${MyYUvsxxU#+>NcRhZ$1e|=s;pFN1K(hQ&%@DZjkbwG
zYPRr1B^=i#$gFFc$z?Ub;?gso$INRDcl1n@tK^N(D3%BXf+R_Bm4JHYI%%?p)?To5
z7d${t1-an-aEM+mm08*gXQV=MmG%M@$vl`vGtX)+?kYhkNT7OBt#DFMg7`q@<bgtf
zptY9HR_TG+00co8Em-cA2P-UZ%4J&j5Q!u~t1X*guL=+XW@*byw`wJhYC*1oV5rg>
z<mUG94YeRm79<kw8)-r<GfSUDsRWAgOiZ#0ltt)eg4SJdP=f!2<oL9iWd{5*I|;NW
zc#Kq{QYE-bRazlSkXj1@2ve;J@)VpbLjt9hRrRa`4B4{P+7k(8A@=t5p}Q*tm7op~
zpbta@_16h<J8hzko76`e5-=g9mV;#~G=-UyL8TV7Qf;oFlXwcya5{Ub09ru7u2NTN
zc|mq8KxUX-LnX)+?#T_M#h$Y2)qE@^@Bx@g<|@e4s^kErV5ybK>}0`0z1T2Itwdr8
zl}b=(b&1ZhM3qEgskCB3Rl+g7VBe{{R%ziOl@17j_NdWD=4R$&C0JNO>^>$4HUSEm
zOfWAnlLQ5OoUsyA8$zHQOEoh&T&UIt3VJ)&u5yL7R3fP4GS8&c0IileTNqeYNcS;=
zmXpb4_ENdxLR+0m28M!)flP^3DzR5oe(tVUwT94H&oV&>lRDX}_Lq;glq;Z#t$l?P
z77B<_s)Pt>8A*MI4GQQhDtm=ALN3T{5`_?jLe<{fUzL*ND?r_ijx3W%psfTI5ad#6
zvV(8}tf^B#PxMr1wa{`#F2PoMLTVi{Po)*y!N%3kT1qW76$PO+xVl;%78t0o6I3#}
zB+*i5UaPsFvNUH3NmMM*NClNbZRsTVfIBLjgjrfwiQ_WMiVv-1f_*APFzE7@N+r~Y
z+1JtuzF`IZMFk-~#-LC@=<)ypOF@8kfKWh3Ko>xFKyN@4Kn;in!~zBbh608Gh66?d
zSQ-O}1B_=kkR|})0aF3f05G#KrbPIg3`haY0HgwD0cHcz0O^1^fcb!hfJFe9gb0fP
zO90CN|C^S>%}Rith^rxA3&;Vi2W$k0X)|1G1#AcG0PF<p0qh0j0rmkPx(J5=hXLSq
z!g0V!z$w64zy-iXfS4}9MFHJhf&3caI^ZVYHlPT=(mg;iyMXivGvPVqC6K=cyrKKI
zl$S#O5%2}@4e$f-8vvdqm;=fJ>;UBe5c36l080*(vuj7VhH+eQ`5#&Zx~>R$Wk3}`
zRRDArHWd-70a)^;yawfrR*SCdKwcM6kM0{l-Vo3T&=k-dpaKK}S_6UrZ2)2lf$MgF
z_J9t6j({*g7eH5ln0ml<Z$KYFKY$t#4Tu560$3Ud`4GS`x*rbtNWdt-Xuw#&IKV`}
zWWZFwbU*?i5s(6y31D$=Hh`rx$malbfGog5z#;%kOCVoL*UKPZ0ay)K3s?u(2-pJH
z2G{}E31De2<of^z07r?&=*Qvu1mN`l(9Xj3Ily_qMZjgiH2_ODAuj~n0u%x60PX?q
z10Db#16}}FdI|X}x_$$BDc}R(Bj7XOE8rX8JKz_9r9Ui}8ZQNN$SnXYoLB*@0X6_T
zKsf+QFfa%Xfbw+jN_i#7s{q^q9so}OOFop>q}&(sI)HirKR^QjOAR4!MAuCqZw6=q
zXa!IK0st%pQQj8v5I_e2TW@v+bO(s3CtQaEB8b)-@<>2mKtF&Qz*02iv4DYq!GK`^
zmPSxMigFF)W9WJ;<Z*y;fbm3|2>B$so(%aEz*N9=fEJJhNCu<=W&`E`bbxsPma-_v
zJN&x<XbS<00NH@W0G5_gz6|o^fRz9}U^QS3U@agAupY1puo<ugkPFxa*aHyLUbx-|
zI0QHX$OoJNTmW1ITmoDHTm#$$6asDoiU4;2cLDbS4*<mgmL5a?4DcLK0(cF02lxQ^
z3}ERS<^Mtc1Mr94n}I6=%mEevYd~3m4Zs$_lALmT$Q=OX0WN?_fT{owfER$J8j#nb
zYbE7=kT(D{1T+RT0W<@&0I<{waupyD5Ja@LkcR-;0YU*C0G$9`0Nnupn|jcFPsk$x
zeE?BJ>kGLW(4X$v_oCrCmhJ~aKInfw8w}S&0K)+z0iyscX(%5J`53@hx@Vuo!SzJI
zWTH)hd@5i%APJBRU^B`zKsq1;Fc&Z%kO^1_$Of>q1oCBo6?DG}^0k2VfDJ_31o>vV
zX5ZNY*IVg+8|1lw9dy4F^4);FbiWVs{eT02Lx95o1AwJ`%1=<vXea6V6y&D?X8{)g
z7Xd6?rn~_1Yjk}b@<PCEy1xhceZWJ&Bfw+86TnlzE5K`jm`dUL1K=akzC!*T@B{D%
zAd!K;0$4JG9OjpTIe_iESOdxeYyh@^asYdP1HcL33~&Ls0xANk0Nemo0q%fm055=;
zs>8K6pcbGmpgzDK&;ZZ~&=dgEJs}X#8W02s0ki{j0I<}N^3ITV0dxg)CmQ>_CtQaE
zA^?$qK7hUemij{;4Tu2@1PlQT1B?KS0*nTTX$)MC1xy4?0!#+{Z;FTesYILhKc6N1
z&o$#uhWix2Ou#Ha24D_gE?_<&3$O^V1h4|Y(n`qnfVFhL9`em}y%q9pfLyxYLHSO~
zcTv8F^1YDf(e-}F4+0JW4g(ATmX1Syny$}4eim>JZ~<@;a0zf3Af_vDeGPCOa074?
zPzbmKxDR*$C<ch>F<n1}{25(8hx`Sg1n`n*BL6G6e@(QvkbeYxqWdq9e+7I8`~ds}
z`~v(2`~e8&D8+(uw%uX{um;!w>;UBe4ge>BGoU<xr3#Q&1XKo80aOKe0K5R+0G50p
zuK}nDs0FA4s0*kM@CP&oGyyaNGzYW-u%x1#T?f&1FytYC_J9t6P5_p|AnyWz%@&~t
zAOg@E5DDl5Py_k{1^@;Dh5&{ESQ-KONV?WQJ{mBF?#Dtt9x##aCqX_1FrDt%_Y>ec
z2{03o3YZN@2V?-|0a(g}d;wq~U@2fFKumhLUPbq-DPIHmdb-{S`4+%dz&1cGU>AU;
z-H`7E<N@{p4gwAV4g-z?3;>q$AwLc{0XPXb4LAe11h@)d={n^%AioJH0^9-I13Ul}
z16X<p`6Iw%z!ShTz;gggC6K?QYj*z@uHOSb06qae1HJ&h1AYKl`UUxK0NaygNea0+
zzye?ium+R`$N}~MM?iUiE1)8vGN1~8rK*ru19;MXb;xS~d;zrqbpT3$AD|(i5ugR2
zC7=~R1qcAJ6bN}RUAKk29iTm+BhkVj?*ixs=nm)s=m}scobm|Bd(m}o$otSW<B6i{
zevqr_x<BO6fPr*BjPenXj{;}_qXA<9ae#4v2>_NRLOvNVh3=<9KAo-;C}(^#;W`yC
z8}K1uR(RIQ&`Qe=W>zjv+GNvu`&YL|kVih7a*O4s=U?CdU)K`5Cvg2UX~N4c?W@*5
zaB0qy4Tnn)J_Xv6AYE36xS@eFZue4c+Eh688PFDG{iwlm)qqhust(o`eX;3Z^99^D
zh`e|}dZ^#I61cv#W4({{t$RIR!L{s9me6O{f(+-q@1+-NZXV<J=JWKawhOJSv@bS?
zxXkNkb|`Ip`_dZn-;xs!-BWUx2c+k_G_P>k_g!R@KCa~=-Ck|Ws-EPN+xvsvo<mCA
zcc0wYoZuOsk37!Hvko~vKJI0+CheWajydV%Z{@1#-TLwDj{DDcb=c%Mby4_SpL&Yl
z<*v+5o!M|^tp^<=z6cHC^IZbkx0&Qp_`&b1!O!egPhHH#i^=iVL+bDUG<HT#_0Y^)
z?}u*O7kxOxOlCOiTdTh6*PyqyF53@2ds1!ap5oT4FZ(XoIPc{b;CY^&RHxPVKZoXI
z-s@ejR;z1!49#*~g{8>{2dv(&snF?Xt0#HYGnPw+_3T-#QDloXc4O+5FMbu|yR1{^
zmJRNmaNpRydESnb6Vq}6C)AjCw7_q<VdIzB$BVVWqaL4X{-OM{&m)}%mrO|S_2_td
zt#xHjEI+rh+k^`1mu6P(FeoL?rh1Zp-JA1I*8?<m=^h(bXV<nF&z6=%?F)3S53~WF
zMr=@P&pC&W7&>Qa+SqbyD-X#|7n0*`-%qe_2%q=aTH2|MyMAxmnyTfMrg;ZBs9$VO
z3M^y2-)-XvA3wX-h3OS9FM8B7u-3`G<1PkO&ADrF)U$fPlR8hI52|-9Z?wO<x@D-{
z&TcQ?x7*`cuzZ<=U2>IY4>N2!pSe^hZ*ecd>4PGx_GIrdjds?qb91NqV%~(O3*&W1
zvKyU0P^D7`D1ZDh)zXfQ);14)Yv0K+`9w}1JL}!Y(kh>N+qfidsJ2Da)HCYM1^qub
z4~J_>{RiJz9#uG{vs0BTz9Zqf$>K>p5<i};b1I?N)=_Z3VtC8#8ptn>HTZq-9^%HX
zi}gh#yw+DL9s}3eJzm~fx4Zb;&&c0<EUW42l--^db0+TMYWCR!@WpYESDk#SQQ@Sb
zZ)K#i?~!(gjyg6q6AoOBPJZSw<(bPS+bMltAKuzy*Y&k!1B<R{)E^Y#s(U52bHB$X
zE-c@vY9rym^d9#{f6sa~vCryuckH|TIq7<@g8Q`J;d5VaUUYA1W6fO8i&56~_9faj
z-mr6Rh*P!CEw(nXO&{^H+CBZ4@yGY2+?!prGo$m|0n^<km(z|+%KM-UN{^XT<I+ys
z?H#?&f9<}j^yIn=%i;@{jq)4wCv{~%H?QDFCEqSr`7igzLyrxk=PaAlZrI+yFy;2B
zr#t;~Cfza**stnyw|~QJz4qmF9#dx9pSqvsABsG1#dmSLuTKB%j%~UW^1QKkz6m`Z
zU!8d9^`Ambr_iH*^Fvy<-01c8*X}{l3*9`LxE=PJJo&Qkre(tW@nG|?Q(Zd-)LXFs
z#J9Wa-}!W`m0u*BmHz(r$hP!NR#M~TUHfPr<;-imXY!^i692qlU;mO_i{~uLshsh;
z#?1{w=S7~I>3!CB+mki5S-z{<{E#*Y%~vd%mLGF5HT{G~+tq&}^9R&alugL_A*|PF
zCU?F3)b5v08}G}>=e+M$51x5AYQ?5J?W@Pv*Uq0e??n0xiBpr1tI9E9m0pIre}2;E
zu)pN{3)%KPCB5GpTxv88+p|ILcf+vXZhY^z-EOs*GvH4_xV~Z4hkM>1YBq0H^-ne1
zrfgG8syS>xiSGWPw3*>uuJ;^LG+=i%`DJB~*-Zk!RhShsG~MP+!#hvLv|4i^_<Zsx
zX|miiXSTE3t#3)eaZR3AZ0r&Jy|2w6T^;S-a_L@^+lJIVms?o=&FqCE3J&;|t{U58
zmG*q4a!2cW1sri(;xc~JV@=+4-;%vv{=HvLyFP7ct?}j9b)#1o9A{tb8rw7Y=3Dn0
zRtNV3ZJl)c4$m>=THNjD(B|ZZ9{mR2-?OLNgY+GT;IrH7{`1M2F-O^Hovu*T<Ja3G
z^R;_dnRP5mKV7U2Yuv}__3voit{$TbA~Q<MX`0;K8q>@9casx82KK)Bp!(uCtCGX_
zwma4R@8w5z@}hArWv-Y-&utVPS^nAM`6o9H&7V}^(UwWMg*C=p@zs3udi><sijMCy
zvP12ogI4YDU9u+2>tdZ9jsC=q$hl@^S!>%LU(L&g*FdKlAD<4}S8d9)itU<*zX%#J
zVO*K-jo;Qk`fZ^9X<zA>x8EzSym@%s#R_NqyX~7&c51%M*B!No-b`sWQZnmKivw-_
zx-_rte{OETj@?-qhy4EZ9JM?4OY@XNZLOMAko3Cr^Mm?lXeC|DtLY^-FMG`uj?8#x
z*Jn>`(-;1?%MG}CFJ{{68c`XUebW~|8XlqQxI8|4S<ks1E6Ym11n3-(HgEFst9o_S
z4j(t1?f)~uF0JbDomN}E&y~wQl)G2Ptx3h_6TaO6zjpSmvvo-y{g%U4)$UCn7U7mY
zxNUfO;F+`al+(BYJ0oUydiVH!hV$?CxxOV8M?7&o>Fi(Y_q})O)?V-Ho_O)YKdfBM
zsD;TTT@tpf4E+jxUkb;$v}kM*(XiF^wCpr}N<sfO;~e8HwqJ0ovhMSR<>$&eALu@E
z#>5kcHXqaZCLZvcQLo8{$<vy*e$%x4bsxK1&GS_CU)Gtvqgul7@Rr^7{b>BT`f&fX
z4q49g`g}g78{Ga#GgZjXO$o7cGOv9-BagWAZMjQxWs1e7Pi1%Yaho`2i+APi(R*9m
zxVE`eo%H){kK4Bn{_ZhnQmNMI<&vv||LbvJa(r&f)SO0%HyoDvu9FwOSbHnH|DKrv
zk5v^fT}^J`5ppJeXwNp8(;N4>zc+CD{*&o34_9tH5nQI`hV~8WEKJIu*8lV_hr$De
z_v3sfU5WWMHLCZG>;0<@yZ3#{_!duW7EOEl>-UQb|HZv>Ia@n@Y~Q4gX*sppEmdA@
zd^FB_+wwutEqBMXn9*p2XHLz5g;lrJ4{B+%eNv|_51P5mv)Fy$*~)~tMB5Q#ZcXl3
z-sy;0dH-7beTO-}j#;vL>d7qYdUu{5_X9KlT)i=Rab(%#?ynmcE?e%rvE29fM<+H0
zx{XVo*QD@^zGpjJ*q0jHqKA*2V*G_oHZ9HGv>qDr>W-yX>Cv?}m16@#77rYHrubRT
z9a(|2@|-uaa<_+cu3{PU@kG|W-ghH}0Sl__-SH}?V9J0oibn%}g&mw1yX2nLrw{v-
zKV9VkbF2LN+BWn2$Bl*Gde=>T)vPEu@AlN~;aSxWoU8q6?8|aPYVECOGy8K%)t+vf
z9?eRcH^|MQe67$)Ar4>jj^AD3l-9CVpY3fH3i-pkFCDl{TJE>^t~ad*y~_>++|0_X
z+$<v^%V71iR=>iOF~L9^4m|B{^z&U(U)SRPivKL;Kkt{<4(QUTjZq)Bof%U6Z9?Vj
zFInjgiywcVo?9g^y{_iyEYJBNKd;BE$@VRHX*slVMu%@7Z1<h&&@etc?n}XhH`Vf1
zS>#5|3v_-Sn|vU!*Ne@m50{_2Tyso(<+|3J!>_HA>34@-aA=SoGj4F=%PB2;zb$>T
z#jM4<TSMmUE~~w!%DcZRzUA^KhdQn~C=Wl8Rwvtb|Dm?a_gzcuwBh08Cl422JrL&k
z!?Agr+8b|o_1!pj_<_5+SN&G^1D*Tc7*wNar)}AzLzjHpH}AX8?Bp{&Z+EkIZn&rR
zp)dJa4I&e_#`y=j36Cl~DtVujCOcGR!uY;-nx<S#t!DQ!wr5V^)sx=+fhSvirQf$&
z5l5zdAJNFq=l0=B{X72*xnFy7>s~Ye<Xdn5c<7^iNbLCrvp=3H?ET-phW(4=OI8Hx
z&c_Cfj47O1Zt;mffy39%QtYc-{&A~v=}G^!n6mTwo5FJ!e5YKDa+?-)Td973BWBKv
zYh}l_`SP{GyCaXYXQ!N0Y(MGW)Z6A+q-OU!AyD3STlIwQZ6X?#8&ExU{b2Cb;x9LM
zJ$~rs*>G8}PUrfXH;i5HpI+U=x%0MpkJ95VHH&R`t*=}2yASMYZf)^0Z`{H3)YnU^
zEZN%rNWqmwt?#wAU3boH*zw;>kDUr{S@+4+YU}@dnp?qhZt6>?-oGxzB<Nf9NwZj0
z^lkUVOV{Eo|0oB>x2}3(;>E7Qk5#uz@~ZdqYc838E#GOvjHEgrY~y}?b#E`%WS%@w
zfBpTQ?~esK&rBO3znyZc^RxQ)k+Bo=ws$`KDaS2f!|@6(9p2TfBC#`U+un4ZS<-~C
z4rkAvZGGi{=XIB&v8N(Z7x`BI9be<X+qu&pjFEL(Ty<dPAJxRPqhHUbX#QB`KFpu0
zUKYRb+4fuZ+O<~(?0&!Z*R<_*uKMj<^Iu)d%@3Ul*EVUfd3pn(mjAHU0d+E8oIZH}
z<F+X^Bl|kED0AKX+o5vX&!2P%bUFEI<~_r-CSKzbZ6oU!tvwu<x?^tl$}``N&kP&$
zF6ZXWS-xv-44d<!P4N8jd3yq?9qw53K&?g-N_Xwas~%^q%L&$viPJSHT(;XOy5=_R
z<q6Y=E*V$*NY_@KcCOp`GO17=wCv6P*~)Vn&mUgeS98*^R%vkU{It`?49I=9t(D#w
z=sz$k{9Gkj_SZA!BP{<URlf7>%ejr8uHI{OdLHm--QWLtReq!EZ0Xy0ule77G}_3#
zt#|k7mwj(0A5^z*wsPKi{W56VNsn(`yfszg8}_A!yq&MQZ^ZGIt@lo;F!N1!+wJ}D
z2r{8^)y)q+U;j3#&7VdOAz!21<34L=zRR}q?(2Si*>kqXxBV0EG<ImS$<BJ+d6`dC
zo%KF@-M4MmwjWrqaY9nfl&4OOdqr-_(3S5v?Be60mN}dArX*V|PMh6ySHDi9TzB4&
zpS}81mtA?4a%Y9UOzB)CO?q}rbNO>{;gmf?KLvTOv~3%C#rH#_$@()^1H3|&A6uqn
zxb*D$<96@E6Kpm+-kF+nYyZ98`OPL(-m05^ze%p+)UIKAFY9u9a^@~>EK!EmOvzcg
zP8OGa@Q_1!$;K6t%XYMWSi59iWCe$tb%WN;{~0mZ*7w}V;CfGUTSrGdSbsFlbJdt{
z&HmeAbv)Jn<o&_V<`!mOw{pn++OJ7n;nL=fo9EbVTDzk7OV*>0`ko77y&?x+*ju|z
zkML_|d(Wo?E9?f>4SzP;aCqUxww?!nD@(#c-|w)UW4Ao|Si#HndnQ&3I<rIKkX*d$
z+udf1H>$tn$YNi7UNCV*{MP7|^6h}lL2I`Z-h5W#l-I0(vmYztColK3$$fevDtz9|
z@UiE7e3#C%{Nv|5VY3c=&T;a^o%@QM#*H50&}QkD*Jknl+F6q5TiqHJCO8Q{Z#;Nd
zCOv1!s|6ztEXb@pYH`e()Ktec7gmIg-rxD|-VrM%Z~ERmVdJ;+85t#?^3N3aOUXW;
ze{Emf@3=+3z$d-NtzMaTv}1alV`y2Q#+|n2?{Lbi)B0BC^v=y|>Q`<ompy6b_K(ME
z`VRJq+*Q5zi_JayRUg~bHf2{mRa<G{k`Tu;y#qQ$bsd`Vy68+^^o3`!_KzpL47}TE
znmXuG%ikG8<}}T}^h|m1O6!eDEdtgCjXXcK+53TyR}_A;y`z2^4c|KwSg&|mRFvCE
z_lV#Jp;HqkpLH@TE?l`J!~gfoQ7?<U0vq)!8FV(Va;KAP(sxyIRJ$DN@o3-a$Om6F
zioP<pUPEHj3VT;Ob#0gJgPuvsWknAPo?drwcE24IDjQd>{edO%tuOw`S#f!7lZH|D
z-F-uy-P_6{-0DZo3^%{hd&vET!FjzF?cS95EXplz{q>R`4~jO7w_YU-`7_k)?XemS
zs-?|cxAV!~*$*SCo;a{1;&qL;F^8hsPAY9x`;=qx))&VMzJ$yBrZ${7c+7dnqyVkD
z(a$9>&t~UW__FuA&#XGLn!mDJaqeAT`<$3xuJXZ6Kb5q#8C3Q}owLgqzHv=BKYZ+C
ztJ)o3_MD=9Zq<8!NY?!>U0R%(+~+`<L%s9mXU{hGKIdy*;ZVyedv=~^>pjGO=a!jc
zQ};hQ`KF|`Z{qcxt`~=9A1q2au9@84dBNkYrz%+kuXX<~IgRG_ueZ*7Wb+L%i{h<!
zTRcdKx!7U!<bEBW8wNX146Z&~3;c$5k*$XO7*Q=|f~=x-WaDzf+~+OdJ9W+7qU<{t
z1Ft_S>Cvk06W!NWgC=S2*M5KZp2vpaEv|PSQ)zQv_Vp3t%$IlUmU4J)hoku}dnz0}
zb-UO2nde7d@@<^Br$URtDMOn_drv&NbWZtJkbmxP_gV${mTqIxhqxprUub2s{mZB(
z`tbh!+$Q?@4Ssp4&bGH#XUDfIRn7<ozMO8EZkvxDm|f7L#IEkjx<elxjmz<S-7fP+
zdZ9ea=l19yJ_Y+Wtn(jzZ|8}eR+Y=YKmNW<`Ht4UXE*M#s4YJ`y83LxB$u^|d%kop
ztW!DeU~t|kyW$oZdFgdBtZK+5@9te5xqNN)`E?!T?uVLP%5Hh_eZsFN9wT-ac^~Uh
z&F<Kl$I(-JPaNfPTkhxAHD%YUddHlU5ql;iwULe6_go$x|1Rm^!gYDoJ01AG-^HzX
zXlLKSC7ag1Q2B<wY4~-U+0dJ=a|VoSHm_~^@qCxdMVXbKcbz|U?2!K=%e-zBvwv~b
zDocO&bL%<&e(s4sueV(9lWzz*eeZMMg_)Hj``G3T{jz57_9}PhOx?e<%KGGB^Do18
z%MY(O<}@7qquI(An>U_bUQV{V_Tx@*bw4L%*>4Iltj}(-OmOZ}XtDCEQa9_TA+h)z
z^v(T?{2gjE>lJdsN$<7s&S~E__cJU1KE1fh>0#k+EI$MJtE;l7ZLTkQtDha!|KX|7
zI>MR7U3?23dVD^8HF^J!`hq#!Z?U-ltt0SwxYdoC5Kw31?R5q}*DCpm`S;7T8?ke&
zW5k*ln-6CkJUhd2&9NDdmmFWWJHKbdkngd>8eXn#=i%q_XX?F54W51QyQ`?VJ8n$Q
z!eLQf`qLv*I<FQcUBBDU>&u7Q|J@$)u1t%<&u!#A4Q(%$R=eWRHZH`eQ?&JXZ}WY@
zovw^n?bmwK9OdEg=AE0mY}@uG{M(xM@1@QW57W#>UO#u&ebh2(>Ew*{H=>K9eeW*o
zS#*8v<<t2tn<mv>aW>{q&b@uN`ZrP8Y#-XXS6yq@UhR%}J2%`>_5AN)no$?ht(*sj
zHjJv<_2d!HBAfEFgzOeo-<LBCD7f}*j#bL0OPbK%*H-O2obch9f5T?(X3g>%){8Ev
zChhRO=u=LwPxX4g`?<zy(5gatOp7(;+)~!;_<E`E(U<yLR@z!vt!pytvTwtk>6eG>
z?dP_o?PJxErBidtD`Sf`k7(<7e`nLjyEdHk_n)71t<2m4-~YyqOa5<h_Q(}=kI(fT
z(AYop^VJ&X2F?#jc{sqM&DSwE#^&{@*Wyga>GeA<`L=Fi54ZGbdlvqB+0JS5y8Rbt
zmM)B6t8zRMT6s&ygFZ1O9#h{J1~twqNp){>eD#CAc_&ZZkgYJAw`S<zv9?*Oj%Ay@
zYUh-hcK2hi8#^*5oUY-O=OP?hr!*W4RQ4H?RU^dB`~2mdcOQJ+F=2Syin%LQlN9xy
zhZYVGO>JPdY{>iUURz5y^jp1r?dUTNWn(kXfzM{nNuGAayWE7vMTgD*oX*GzuF__b
zcex*$^fJ%OPV}fCpVh!sxIMAx+o-<XBVGr^g&*#<Dr?QCDLcaU*>3QQdOx;N<crO{
zM*Q+__3J~+M+FaBtv~VM;Eq>r%Wn=|IR8@nZGR#<Z~dOVx^=afF|8YCjDONjy7^hx
z@h_b}e)>LqZsv$-FJ|W&azX=(H-z8GtX!eW-nz|4-?7gvQ>E&u-?i%Am{BW#PjH3a
zwJKR3aqcuTeB$K7XW{n_J+@!((lg3=|G1M5SNqxyn;(+5&bK5V`j&4!$Cr`Y4So#+
z-gQhkCY&`t6A^qrzE*|n2}LdL?yTq+d+6P`>JN(!=RRN1?N!jx%>Azy{AfRabgwGQ
z54QYX@5Sd{>qB3r@4RIBQu6uY<G2>fZ`m)a=euh|sx0;5_2pfMU39-}^=obI%J13-
zBy6~RX`6i1)qXE~L;EMsSy6KO@soh4dVA-l{uG?t2VXB)(x+fS+0B0Eer?ZmUzVp>
z=WpdRY02f2H#FD6YP6PmHz?yU&$0OP7l~wLE4MyrXT9TQe6;aWU)LyS*xVmoqeaT8
zen-6qm^GTyvy(0+GwiD7W1Q22ruEx?u98}m*ixq7EBi8T|I2$T`fjxJ36GuM)oEu&
zeoSQA?8;}B)vLU)`p6Y0UsPJUuR;HJt+uDcC1#AS?v-{fZ**~pOMOqLJr{RZ@2K8X
zWnIsr3fXnl@Kfq12l=hnnk`FTHBzkKJk;~R(nvSios33B$y>%)yg%Nk_uPfi2anmm
zS>Lwxyzgs!em~v$!RE6SSGk_;lhCK3vvcff@atC_`qUZ!Uuct#qvnV7&3aODW!SXQ
z)63e`@c7;}s_BwiMP*}cwpZL$vwWNJW+VQLT64oYMBdi_Uek3K4V^D<zSZ*K*g3L6
zxBrXp=pE!6<@NcZW=eeQ%cm~~9(o<`@-m~$A-}0}enqDjT1*`huC7xrY(Z>PnZb&K
zH?;Yk=KPwT>@((?O^AJt?X*#Umi0Yy;`hamKHnZ6Tz3EJP3!)nn&$tkwn~__yl0zr
z-YvIRgWnRo0NxdYbJCyJXy6Ukm0)JQba?icO`rP2dGAy1@&($93irNUxmLqzo~?NY
zpMh!fNB>;8x9_$NQDd8D#otOCAboJTZJRi_o_OH;v`4q<|A@G-LoPkH$noO+l6^at
z9rD}q^k+_^SGO+?me!K?F<7;%>N!@;elz)7V&{Pz{_;UQ(EdgbhO+~IqhHqxg5oc7
zF2ETrkH64A{)2y8(%<DTuKRbo4rKpLpW&W=r*DFTnt$Vu{71dyAYTFCQ&k`+cp}`h
z^C<3rp@02{oyYy7-13=!*HiuvIrsiSpZO2@JO9Do26R{aMgJ`SztanW_iy%${)Zf^
zRezVW3Ha^*Qf}>k=wIm{^wn@USn(G*trGvPXNle4={^2opZ6g9Z}q+U4}Ny`>2Lg=
z|F8o)EB814hyN(o^B?>x;8676>|i_a?{s!H>~H0=GlqYoKY-`#{zhkKJpV@jI_B?m
z8wfOi)3X`K`I|qyO#C~)YSrKAyFi)0=`-UW^?Lsgdc;5E+y6sPcJB9Y<?i_h|M-8%
zXXi-&W(V0n<U_OkwOpHjweLUZ^ZudFwtvvuy0CKL;i}jW&;ZZ1vwT%!7XN|Xv^BdG
zl8t}id;px4V088Ic1DU8^k69m4%IU`rLO{v_w39wOS|S1y(^1QaF&bsCs(9$-y;7Z
z;$Kk*>8eZ!2#|<f_)U7MOOgKu@jEO){^A(qZ*Ke+oW+8i=@Yh+J#m2NwOPKj1@V7C
zIWlM`mgdDEU08{BAn`&Sco>1nDNZ(}7uh1+FdoYtgJOi)!Dt`7I?za&QwBenIT-nK
zKV!N5soc30kgl*pIqW<&OW!LIU4!&dNETYvM|#0kC<sy}oTFgLDGAF}u0?unBnvSa
zB<DMp>qG{v2@gB4dKumzKRd6%k}V97jGp@o`Pn>xrI<+KpN#r+MzUbh49gXIqkSSs
z{#F?GnV$&r+8Q}SN&Y=>114V~pnWcpeAQAa_pPa&A5=j(ion)JIXNU}`5@%iNKp<u
z-_FvlmPjw&h4q?=WZ|$H=>>gIe@mhdT!?go4CUmJoD5K%>96{Pa^_m$7ry%-zfz9=
zPwc|@1d>0}RR5LkNY}bxxeut^4C8#pR?s?_$_W8~VfN8ko6@_RBc1Pesldkgg&8Pc
z)W;njbYXPuEK_|dL7*}A-;-Eh1L@NT`Y)p^_oAJBiQafB%25pIXry3k!nvt5qK`0@
z|E&qq<)2OIs-{RUwnquk6l68kk)E4j%6}go_F?*KxPEt<^wDZe?K9K{_1FAqZ`9L+
z^v^PmcMu0QBY(fL^uaLF=M6X{q(fFn4;hd2uxwL(rfX4t*k$wwHvePkLJyKt&Qzb}
z)K7CqV|_=G--b3se&uB>H=gADu|&Eq-n3kyKhkrrBL8SCRv1G4PVv}O{tDy#8RFHz
zP$MUVabxNfi*huazl{ZRv3A#9Mt|r{`b_VF{Dz&T`dplh^wL@AC;Q26?A#cWQ``>e
z?0hv#n_;5Q+EF#y)IQn?NY@WTJ;7~^={ot3!qt?2^i1Rrdye)O&!(*eJ23e=uHSX0
zxS^5<8RdR(LiwH`*63GLuv{^YJ?x2ed0W$RcaZ(nuE^hw>NOnZKgN0`V7tT;z1bA9
zLlCx$827ICMScV4&)X^^zpfeD`5dY#%*`S_Dc*J<dgE;5S06(=Por{cMInE|1k?7C
zfZ9w?wT)@LbTs~!a{BCtxXt);ld)dxSpk*|OGysbFP1MueGFYupMeyB%22$@<>G^_
z8|iu2)NfyvL3(j1>hFlf3YMggl4}>)2;^5>M9Y*V{^<}3jqSq4?abjM--)w>E7^ZB
z`c-pl6x7=p`3t6F+{>kU6@tSs`I-XMlRe+WQXH6z(Tf97zPSwPJBY65{J-otlH-i!
zrduHYO`6XLO;FAbup>+5yh+cWXlDcQpR9*+wAW4BebW%+FKvVUT71UiI@v+b#oNOW
zkB#*jZ)%@s&`gY8AVvQlMD^X1f^>O`X}K+_T*X(6!|a(cmJWg%OpbnuDSya(q!&*#
zE%(VV;!i{S<G0`pGqp?UIa4_a)3LtAYtSEHx@b&|1|nU31m%CGb{XCR<qIiTuPelV
zw>Oo`jW4&UzS^<K59%4yu0EuH9MZupjp-<;ZLIG#q+1fbPB`h)6C4AQI3JGfPW(?z
z>-ByK(zQ=a>2E+FlOIO&&ygg*JjF5fAXENLE3jO{9`pyXU$3V)uja<DX$_D+*B0gM
zCx08|Pvw3!m4AH{(hVa`+iPbU(o6lYT#XefI-L4@?gX@tn)KgA_AlV(KMTkX`Z4JL
z?71SA-avaYJ7_n7qe2qL+4MxDD+Zzb=EhIqJT~|P<LCV$lICOT7_3(g@w2nljGrIZ
z3gB}_m*-%8hm)Uo0Dd+Os!m{iQ%R1tHq!MpKY?k#F|7gy<Gia1)@w1*mqB|m`Fwlj
z+abMx<_Y4nmr>&QCCB=Dk{rJo$gjPR^<vK@v1CsDNT`Z*v3;{@A-_QLWVkh^EucTs
zM?Vbtsk*{-=xD}q{}lGG_9WkIDAEl(OzptNZN{&sb)`5@n4E#~<?u{9OX9qx8?|pK
z*B|YwBY!~-$`{AqP828gUT9DDEG|pQ;Ll9|Ty7jpbVq%3or8^hVYbFhxEziA!gq{g
z;<K}_22s79V0|YdyHJkib(-O(_KBE)a&%m|E@4QQ(>$ab$>|Dl$>=}T+88NfeApO=
z`~~DcGpSzr@km#4{dZ7#q=#|w{1mhovqLf0UhloIT%opUxmREw#rXO4*NR3+*S-lb
z>X}IT-=laKRu=6;Y6@0RUsf-n8`?*FuJBiXlvD5y<;)^ElNV9Br?7q5ejH1KfPv|w
zqV+q31!J<)6aNKM`$t(4ozrKQJ<>}*VYz!r&K~lAy#w|mQJ-lPufjP0{6YMxVW#n?
z5&47S8_NGe@+Z;yS~&{qHJj)&)KssZ$S=;L#tb7mH@{6MdlvM<_L@ibX_Sim+CL~q
zPx4PKM7nxoM<ZpL1V8u;c3|zIj6(m<B01wJ&Xh_}PjF*n@}c>gdX%Z1-%@{6-N$|>
zK5tqbD#+v;xOHX5B+`>x@3$p?)^h!&JIU9_V!Md(Je%ySqV>C|=Us{?Dq4q%^E8is
zRIY_-y=sX4ae=A7$-%tFxRGR9-##$jv3luRAYF`O;V?gE{G}ZKgqFy!y^Z!^&-AhM
zsRQzdd7}Shll|?XnV6h{BPd_g|E%Z_GPHv^iV>!f{=7eA5?yI-+8+%NpN)FXLpei8
zPJd93(RD}AAH@A7`!PrtxOsTSK%^JfLHXjmJ;|En^v3!=Bl*iAU^4l|QOGaWYaXp<
zH3yJiTwm8&fc)|$=(lJkc;1ZW;ib#b&wG(Sj~I{qxxdkFPl#TY#*u=Srgj(t?ZWio
z+a+`^(pAGv`8SB`Aa4DY2lizCpqdX314(=)`;9l!3$jtpK5F097ARlGjTfC^;AZ-$
zx&5U}2`DG`b9<wFaUNcm>ZSZ?>Ywb{EGDOb_J1}~xu2oA8C^sBIX#Wi;F-H#NLSLh
z55uJ~t$>Lo<Ck;m#7!_jGdjE|&&Wq+7FIMtx{h0iE(CXC{7Pq2dpg5}huKZzhjLDV
z-Yj`*kYC}A{veLyV<V7X5sdsdh`$8Z5ll{T`yk`@ClP%$1PWFzZ-2*d<S)%NjoT~0
z&Wv9%ALWbwIf>?D+N-AYY6Fm8H6HyFKZSLQ7Uk>osE_z8pMm7_?KmC`=m47MrUw}H
z7ymexLHR;u?2qDle<t-K#X^)nh1z#uHRKO#g?1MEkp%pZ)l0{%A7@bemWG?!A!iwt
zYlHEDz1@qYPBpP!$|cw?F-R7kP+V1T^Y)5$s9bK|ICM193tpfcaX%-pJks@?9M-H%
zf7LtWXU`w9)EfN3*k18y2eIExEr)de_luD}NH=i%qBBT-STk%#v^Ep~e#PV~xqXZm
zFwii%!1doc6_Bp&kNPYiJ%ebysp95!?Q|rco1d)nK)R0WcMFG6xgF5c63vWe5b}nT
z97k-gR3r=csUPW6v0m89@C@{9l79&XAV}84-)k_^3&xuE(=D|ADxh`BEaGo%gZ%Qh
zSl=>W5|%clpr7PsVta|tf)1rPQ^2kFdn_k8{o5G7zm@EE#TV&nF22=(^@}lX*q|M1
z8@~n5snWQhnS*rbR>rikE6P!9MEVAEq@S6IbS*btkB>#VhHLjn&`FK$?vHwk^Ycz~
zkk0R)w_b?!Fm7Fc)C=jk#y4ZZ^T(v;0P2q#j(>1}lEdvIwIV-}cQDnbk)HJ7_EE>l
zh;EDRD~^w&wIu&?uu&5+Zm?(enVtFdb#2-QC^eYc`Dq%;;n#OJ5|FOp_7B%)BVF+T
z^*Km(s7CW-T`|^+y+4GdN`sJJy~ea1BPpIJssDm|7*m`(^7HGvj_IV&cT@f6jKTUU
zrlCGvNKT6^<kv=`KaZE=2M!s?AJzu_UmR!Cm855!shwYsB{?IopF+1arUjW)ZdcSN
zjOaQTpqam^Q%w7BY#HP)9f{?pQGc<dxTmK5Nzt$RN0XjA1C5f0kv;vyI3JICwxV*g
z?T}wX`#&%}Gp44%#`Mu~zatbS5xp4oVee#N=^?n2kw3%K&Pg<0s5yVW4--hnui)sD
zAU-fZEC@&W+pSRk#74+3aPy_|Vm#T1?Uh9KiKvVGrQAMnH4w|n)pPyv3j`KMS90r-
zEaF#lzt@zA{g;bxL5V0wvkv_^nDk#+3F%>z(Eeh39fA1?D_2AF4Uzs1+J(^zDDH{v
z(j6Ses3-ZG$e&7b`1M`wrO2;4kM<GQBknY>DE?vEUiYBAnElo42L(vt{L{Z4$}tSW
zda-j$EcK-LP@InO<f9bn18tFCpm|yhl7*izFfsiNhp=36-ld0%FnZ~F^nY=mEf6{m
zqbpKO_53;j>3Z4^5!WMi;Ok8P;@ha_cJhaZFt0PxDgK1YP@#PkPYi05&)!kQ(kq}d
zet{c*JzSBlUkr``N!+h^8H@TT&9HsNdYz_uvVOQ}9FWjHkCq#ETxy|wZDTMLq;S$_
zs}tFW;t9B)F};Ch0<(`c8~ZzZmlR9SyCFT7+fVxqanH!Vy0wunk?gaA#xJ!q+D#nq
z+CjrHe$^<{f0XfC@cs+%D`uZCGqeL+=dhILN&GY~75%eC9i;2H{q2!m$<Ed&rxnS0
zOY;YVm1%#uEXEVgKR=QFIyy(uAJq|h(|krX74376<gcavB?O|K**nKrGC=#X`WDYJ
z_2<dt|Ha&T=iOlBmrq3dfVqt6&tjy9aqH*}H134a?|@>z?$;3c)uU04sQ+1#Z{YU*
zf~O$AVXkSv@P`Is?V{_9^v0wQ`+bSgbI)MC#Pxoq&L~Ga6Zu8I9Rv$Q#;>}Gb`yUu
z-dhd%b>~gXZ3z5EzZz~zzXa<DM%PzGe`D((mOjvWynyr1Z3D1e&27|EoS*clgmix2
zel7XG`jKh59X*hrpD%?_Tq^hlUxbuN`h24CMVVkKr?4r?(VoY4fo^C_on~Ua4BY(F
zw+zx16Vb24-zh#1M0zo2x3@Gd%eisq8FU9`e?vhBV|k>e@Y)9Hxs%YI;_veDU@oJb
zx${VKp`S8(?oHI^5BLE~s(GZ(3Df$nE{F1k;V57H{iP>3Hj@+97~}IS6eFAjHbyU)
zg!Vyg;hjw+zm&62es$zmRX6R|d9;2n=GKoAHR_`njPc<l_1|c+n~wAUu3!gdw_;1I
z*JLwPROyA~YDb&acT8=h^S{@;hKevb+S#ZldyfrEJ;xFMbSMZ?GRc2d1L?){kuD{A
zp8%wna`TNen7A_eYD?rd2ft+L7ucWKQ_bz8b|rrKa<l_`Hy=x<x+A|Z3gvi^oX<m$
z&i^jM{=EV#x6~W`LF`|%Xnav~`!@3yAisfrk03J`|Gxp#Q{#(rQs{$HDBnoGi*^>{
zi8uAH0@@E0{n-H3Vf<k<&ll;7p<E+<q-lFKr1&33`*osxf3O3qSHWYngIKTb5dV$z
zHAweGvk8xBzNF%Q-z-eUa+TbA{2R>MnLhf39gPxDD|l~L7nH9$VcNf*_#nT!Idb|?
z#j4Cex*-<nwMn0n>L^FO8sn`vued<-z2X(fFWM)3D9X{bGwny7Q<1Ltgz=5NmzJf_
zRHTRfG?l}SEwOqjySFy--8cS)_s@c%7`=e&kC$~wmvi&*+2c`v<$ly({M~sWwTrru
zsr}0>LVoRZ^yePr&-T$MUs2DrUKTz`hqvh)H5A)@`2?gJXg_2Y>91BHJ@+oQix`K8
zfPI)h7^b2ecjBJ{Uo`p~wJ&}P-d#2b>1uA?KGu!OeQs*cytyd9I1%gBmg*Hu^KErs
zQ+*nwp`79eXg3x0qexmGsk!}%{9!1+U@7u<Ao)`(qZ|$SvuK}|3glO=X=AKued5=X
z|A#q+8tGYNhv$umUxxhbU0*CkYEgdJ)7Hj&_I^;7-ofY0Zd!`xV!5?Ppq$(Stgkrk
zG*6)Va{KeQ!5^5MQf_~2hc(Jkti<}Zq;j?7pZd3^_J3I)`BhDkj;#RigshA5bN6Ha
z72CIYPox`Yf2<Z(OXyAWWWzF3`R#k4eC>GDCy(qCIEmzQ>tzp`mnyh@p02%-U&ZxD
zStq2+x$zPHmkGu(bF}|ZD^ygGf&2yB{7|VtI^XUa!R?uyRTa_y#dzWd;eyfCQCQyy
zvhyt($90^Y%hLF(;eH=<E01!7o+w{DhfwN(`U~8C+UHitukVTFMpC&?Af7ON3OIkL
z5{YthbFpQuDxw`WQat2;mx-ozhc*iR9L!}*)gdk!<#XdjV;V2`^U{_SXG*#8NCj%M
zc%uD{{be<^%Q0HN=xLt{h6!Wxj-~o?at=WtGU_u8?I5<xesiSf(!LOW3iB(PFUj{~
zxuH~U_zdJ%aP1O7_SbesesO)5J`VW}+&;#iX-F@negx(+CMEf2=}?U4wlbu*DU1C2
z@@VH`BnxagZ!c646w4c7m56^R#bF&6hp)$>Jxdp$eDQZ`2TPQ%<Kl1{`5W&K6}(9w
zZvA*xMRYnh3uZK?`(*!OI;Seq??oVgaX-`kGMLuaa&DdLO!IAiUA&CyRqzYzCBBz&
zQw@}_;{1HT3baE(RnzvWEBfJGl*8Vk%+iTwC`Zf9TbfrO|KxtBnBM~Fa@zN+jba2}
z7nGx<co;?W%+4f#j;VicOhmed_IaQi8dDPpQ$~O0>~Axc^x^g&a;bfbDb9%FNN{E3
z*U@>5UnKuUHKZH7P@e}D_`wJzmYdrP6o4eoXWadeZs6wO^Thr_`!-^~d+de${5hFL
zeUPr^{Ac=1)Q3N}d<pgwSbX636Q(4SoID(f#Q8}Q`9tmjte0qq!xYC#xq0Afs27vZ
zpL_3@g>?B+w8J1Yk5FYamaF$c`edSaTS;=NqhGQ2e6lp6FVeO1aJ&<Lhf0TA*1jsP
zKkk9?k<rz(9*jjX!fLPsqnFZrBa`Ux&%uoPtGl2+{ip%nK%8Oq5_+1p<1}$y*2UCr
zc@Wr{eEk57KVtvgts#B5=Z0ddB3;3qGhx3=GdWsrpC^U<LB*}p`+<Fo{G5H_Dxv<m
zWf(U$n_-2U(70bb7ws(WmllzIiaCA4VLrp;7jWhNq4kmG7W#?kw===8j9<b1z7PX(
zp4n5+ty_!1yo|138?%sh+n}DV;22Df{2=NnuBQTLA-$CLjUAC)IM@W`@atcf!N||I
z?<y723$~f+e}>viAAoeR+-mV8rwrOttgjUWE~Y<!E^Ew0l5dHA3t`ikDx@Jj>?cfk
zAc^OrmcV$#>Km3~s?SughtdArKCKi2Bcqqvw=v2Q$J2AvUksnoZ_iLax-lL3b=-KG
z6F_u2$0F9N0=1WhJ2zgr1@ddT-`C_Sq~}&geZ+m#X;qM}=k(tMY|L&NC#-J{`R6Ba
z14if1fwRXeSbcRL&~D<oQcL<5hhn>k@#@hW<X6%;-Wa2m;CKA~qz|_b#@^@5%2fuN
z+H-eP;^*eoD`7&y^wA74ZLdEN4;fv-&7V(?M0yx^-k^}?pT)B=Zj_-0hzv)4blg77
zTWXhr7}NOkgW{@^9Wa0tO8sd66f8G)3;Lnf_$j;-oz@Gb-2CUaKb2dI{jM3ci$;NR
zw05TU>FZ7O(Mb2Ozyi(;M7lzO?INBZK2LE{PQN1zCVg7kA-{Sg+Cd!09j#Fxm7e-t
zS^UDahRAQ==BrjvKC{1`^XDJbU&82|KUxpouTS(``h8&-=~IjBlRE+H#ooisQWlMy
z8YR-_kbS;VKQ$aN^#>K%hd(ErFc9mdp*RU)#F#cyJQrvm9@I0YLD5*R(pZ!){w}t3
z1l5=JA;oxjT!wTNxBst)`HZn2anAt-lV548V!NNG`nrqrnZ2g<Is<;f<R}N5_PgJt
zryxPPsE@oN^7HGIvR+6p?Pw}zTLaRQiyK?!quom1qdyl>0S0JLrcW`+5#tzJ-ZHwJ
z&LN2O?@(Vx*Kqa;O+$J?2*ytshK#9d1kzR9`9?=))Q3OcZVrCS<nw;p1sa;s1@62=
z12xgP{r}mGkse0RyMg-|lY1E|mpiW&JOud-G>;YQTSWe>;r3T^W+K0qn@2tEfOPc?
zY?q4QPb_6Y9Aj~kKgTtL_%-)U{p3^}%Hh{jeiW|?IKMp-iTwKI(4dgSd1+UuxUpQC
zPl~^@PZ)sw0>xF)-}G|gKaA}to&#S2{=@9g`%fgTU-;)7nRyw%mRp}{E1-Nm*=->i
z`m-<ctMf5F6i^%v4JAFNo7yKo2IUw6kY60flR$q~u4Xdwi}CZBH_}U!uw67#qnU&y
z6gL#~9A6lcg+3IQ_~%vkP#jQ9G}Y%WjcYpY`Q^9b{ICo5J9F?qmcG&W$Um3;mGmj)
z_HPXouL|0k_V?Z5c+?r`qTf!0{>a*0n}+rgpNq_fE@W)S0E}1S^Az)FJXKKq5$6L3
z#vs31iTv#U2C<Yl2<a+r9jY9HbpH9WQ;A5|jYYfVlANMyNaxps?$F<v{)!(##`4(v
zUs!rK0O`eZuwK209&V3vN>jjLAd&kBOK2RF_h@aTKQsOn1TEEzKS$IE9DtQ8=gygz
zQry;Y<NFtgW6Yic^+(bElLAnV<_hXB{@zrc^i+6Ydx`$JnZ{ECx4+sY3+40c!3Nat
z<XoJeP4fZ%T+r)Es81=)i^OxPnG{d7KBoRN7WTtgz4T+yp3!8tVxlX#{S!O#H%$`S
zSv*hJ$OFsOjzc?$zhC!TfOP$B>=z9!uz-~`-_~&RkS{Q?VeO)(_a}h+8B>L>sHY*r
zv_E=~zZG-m7FNOjB$HEGAN3LCC}H7gtS|L1u^lUPA^BW?+ztaA;}5e!{ejJxj(H<p
zxyn?Z3*vqkC*Qsr@n<2w*zee5J50WotJiGU=VAJ58lnG-aqk1k(GSJ;0{1YcN)%^`
zdzrRxz%rC0aOcYEt{{DqIvT%^TEX+P4oKHIqx?*guYmbB(^JhoFZpmK=}+rEaeT=r
zi*yZl{wJ^&()A&x^(wQN___1v_I*fy?tL1|DE{!z)ixY~a*DbAy7+}i*V20AIW>HH
zs5q-vF}Lohk%V+L&1b~+ib_U0f9}0|Iiw3*f3a+V^imoxGRU3>V8P4E&E?Mb-kFbd
z{`~y6DM&X&V!JdXIsZ*X`{+Aje-xj8nKKXh_1y1MRUw2K>&x%^c_LlNH1!AbUP#wO
zVS9<sH}w_gUEJ@ChiF{m*Vhsa>KV2S<%rKE)uT92Ouy6pzj4wY`Bjs!UNKbPPtfn!
zesxGV7z&b%=o5RA{tE1;5KfKhat)%FLHZeL_k*2~UQFv&@%-&^@GI8u^xV4RaYf`;
zooj29E6%$ljfl?00k$o}>|9FYh3G%g&^;O5z{Q97u~;tu96>C^Pa{7!-^ir?qT|j7
z&hbMz{Ch3#JE0t5zG=L=TpQ_{k)RkPQO`<XE>^B?nrVG4$p6E*d2%PRLm17+#JIYS
z;)9ZVu2x3zLB+*~ZeCcf!WH!t_l>>kBR!1bSe#K?_#aQu9hjb)k*4)}*8}NVE)FOm
zKrwzft-D1%JB>p6|K~X}@HgfU3U~BFEq$S`1@adNXg5*L$7tj?Y)3y2wL(S1Ya+jj
zo(qnUB0Y}AcTK#h{$nCZKG%N_k4O3PwWjSdMTvBNU;UU1(hJl$uftvg{{w9~$?1=J
zZiV`=v=!*gK4C49U;O=I{{W<GsolkXY7cRM*{9eK`NjRf1uCR#=LZ?(W}6u^VHk8|
zCcl*1U-A~`8{GS9szNmxzrg)&vIXKfE0;eH*&u-AbNjJoFpe1IaN|fAj7P@)&W%T9
zb;zH~#YxxU$gi7%?QV|M5_V=Ioj*T6h32J-XBdBO(-(%3o%wUrkzzdL{A&FwlAnxn
zdVxt;nniMikEZ_fqchdF8`8z~-NVXAH*n+KKA1=|`xv<MexKZtuHfdqWoMy2%24#f
zQ1YMUgOOj)`Exw@A(PL~XIjB}nbEa${$GqgbJ9@0kcRTb^9fxOk<OprcJ7UG4BS3s
zebQgc`P*Yp<j*~1YPU-i=M|itFF=1Z`U$u0j)Hbz^n(A|8{0$NuYL^e$msljvXdOk
zRdD{ShyKCrpyz(qkdyubr++%pOX*x|e`D$J|NdH%9D3gZgcoDlH3;eIF4!NVslBw8
zNY`@X(bfvYKN#m5qMQm~E><t)HH;hL`R$}3$gdb;%0E(t{QNneGzpcvuA@;u@jUrH
zFXRs!hVsSzQXdHT%s%o_rtK9@^7Y(vaBjeF)YB6EXEnv)Gh}}QH(sb=V$b;bbLNR7
zu-sy<UQ1xSVEhFqus^06Yk~jQk<r8G_mKdi*QN1SwGj0e{XAqE%2%Glc=EXne&GVy
zv$PG;!Cb}^KN<N8X#X>s=pL{hWaX+Wqus>y!V6eWF}v~WsSnVgj9xm=)SlxgPU^UE
z^BnX?#vir_?J4&6IGXR3(z-)DPg1!$*@t^CMC)Nl7gA7;*sr(H_*gs~{Q-t=W6JAB
zdX7cE0<#;_;3X)BKR0d$_B7g|C63qPe0UT1l~I4ruk0owU3(MlEIxOT=!$e5_dcTM
zE0A7LWXeAis>9@~Mxg#M9W|zX5dRrn&z-w=rTK}0`<<?1IF-x&t}%kfBjr={DCnNX
z)D-58#(Hu4t{MH2F3d4)-<!l=&>874d>hjZxMgw-T)XQ@PyYF)Uf>T#|L69BKSBc;
z_2K6E{hA|P!Rf>Pe?8+@u133!BuC%mhxFog7_Y?hw4NQ1p3ANGw<}3Lcg|`fbWf%y
z|6a`{w5|~@nA&Gq3d!f5m#pNBbd3%2*CxBYZ-Vl3t+C&oq<N05Bl4@bd2&aJs|8#<
zZ$#rVe~#jMca+bcANK5q^21u1_V@Byr0Xi9|1TyzlV&4bLHiG6W}&7P(uFYeXE82a
zhFjJydTt(dX%^B|+&J|E<S;wy7oa}kdC_*%FABKv?rT?+!`rPn(RE8q+l4(Q%=9VX
z`cX>_mFt1^73b%^Gmy@o<IRC_oXJtsIidz+pY0^Sm>U<8!LL}o`17~rBap7?Y3dI}
z<R`_=FuplhU;%#hk)L0`?5T?S6w~>`^VBf9c;r`kq5qdFgK|7Fkw1*qn__!8FG9M4
zn?JAgCjFCwjRH24K2@ra9k~6s-Y~y1wmXgct;jxyRw7-?@t3QFbPfG3FY4n69fZl@
z_jRvA|6=yceTeOqMg1jGPW)9c&cpKGm|7B@KM&uU;tzi=_d^)Uk#o=A-H1ZEp7yCk
zJv$9Ux=`2D|0jrc;QVUmK;&1l2Ra~$<E#z!N8MQnIFQ7B_>7sve-in{_0dP_M@pIx
zi2YHPf&Bb)tdS#-UR;9ymL@Y6BYabnUvcMtJJbHKiaUp}hT1)hi$BfD4-2^Gi04<w
za#h?q^dQ9(;XUdj+H+<S>Z84dew&VJ3ZupGoqO)jiN;y}dA;M}_+AeEM3l3Q{2`ay
z4;e)33*N6}6mLsUpq}FV<N~dmbS~KL5U!2sF|<4LPoX~c-+1aT>p_1;S8(q^nm8Ki
z{QDEO!a9QS^Y2M01b;Kqx%bB8iQ@~`E>Ats{yG)_A&Guc71TD$;okF8(gOJv-0!w4
zVu_w$Dkp0S(hIofaL-sF-9T|zY~MB1F8ur7oM;`V=E}Vf<2BP$J`e4@i{e=2=_p^t
zweK-#7sjun=Mnai{U3k>7~>MR-&K?1Oc<S)7WJ%RgZ!n9v0Tw^Cux4muU~BakY9Zg
z`>(-hZTMgH?NJ~8-1STy@*BAJa;1L3zh~(Q3^c}iap%p3Qe0JX>lEK+$gg^W{pb|5
z2TS?^ST6tk-Eq;L^g9kr%Z(|W=41Rir3O^fs1LWlH&I1)<Ms!Bi~WV3ce5n<PwJ9B
z)E~<dea{f2^ZTf&F<37De!2b$$j|%9eQHPkc{-O_$X`J5P;B2pO;NrgwT)2|F%Iv7
z{%9PJE`vit3ZgGG>4<c-8^#&fRx+kGosq62JBaaN>_DXR=TZ`Bp2okI;1=!I>A88M
z1&s@ef#{#2-_|xq`P$J)7wdHs{D<k!pI7{(LOTEa-6V(`tbI!((4RL^zuTcle&GiC
zl{g+9cSCyVI4}q#jDCX9G#cq@?!4A+8aMgpT*^>;@z23crTE4_Pv<os<>zw!x<5D)
zt8YP|sU7<HV7>IUFpi1uDe4WwE8|zwdoOz!YXQH%!`B&I&h=9rjn_g~Q~48m6F<?#
z@!}xGC4OJjCk6SH+&KXUnD4Q24cz+)?}Ps{dMWpN-x_PA^Uu-fYfyc^qCelEc=*0N
z^2=%8PxPO%8j{1ErwvvhUCr&!+tIp1&&|(!K|f-8^8ILHCiO2*>@Q=<K`+zzqE?~5
zwU%N7+@Wz<&#l)~y-1(RXn*m29noo2U+(;K0j&#ix#zhol28u+zWwlONY~Q-7^r7V
zW2Pcq&ixMJ?TK^^xBuA|?9c2F#{K^8O?vX@RJ+l9MoZ(M*e*U4e@b^@|AOr%V_FGj
zWb!qG(4Rk(J-t)W4#hpu&f>Z2A2c5E?-S@k>k$R_{-vJep9Xp_oET3U)4W|l=W|4Q
z^8dr$o5#0RT>Zn>v7I;}Acr+<#R1bMKvO#;5Cb80;*hH#2U7w~*<{C-Z4oc37CQk7
z4z?kRP%)IY6lh78r=_${Yg!5+6tJ@c1q^9f3o*NdSqzkHEPlUdmaEa|N;1Dc-sSV6
zd}8UIJ9p;HnKNgbGjj!<JGJ?EA{<G=vtQ)7$$4>KfW!MV{@J~h!w0o^tP624rgyY+
z?aEK$<@NKdRs=b`=B8x1^jpDK)tY~LBIGUM*(>Z`N&hPa9oCEZjFb=WPvqrwzxDWo
zIozkMZyo^rss8mRalYM|%g6EFFz?r(wjcby`Mi9mIKNuTRgcgMdj9q;qTO<>-P^%$
z)UNR!pI@)%^LC%Zd?$M9`Pj$S^74a!<$NW_>uE9WeOi2ejhNT^IRJ(jm%@MYdL*Ct
zgg(;GC0+`-LHOwJ7uYTACEw?~9=YCnP94XyPwZzY;N3D>#JnqcmeYBipz~q33OZ={
ziaX|U_>eY#uM_swkREpu`mSG_*C)VmB03LF;&eU%dW$}bgj}uu@wx2!4)z!E_Vv8K
zx<0)b@|^Mc#eDw0Ey}-qC5P+h_K(8hrFJ`oKO*T|C*UJmp1`$`1600Gi-#<}gu`n@
zyc+DsKC8P#{hD9qyiAPO7LMot2>o(};M)={9`bkvug9m!jVoomVhz_*9x-0uDChO-
zdGG6m{wmk>(E|tY`iFLKK9TeHP~p$&@$H5NUVn+uv#UjaUq!n_pZ?c@Fh1X7e+|R)
zBMz_8;=>b;<?tdAUzL2={4HL8r<T8cBkV&O-+r+Uc$TPtJM3YC>*w}ACdO+>^EZb<
zH^Q@8lb?UaR3~^j1u*fE^gI)c$M{M+FQ}wU;Hm8=a*1&_G<$Mdh}S=O$|6?(4Pw0h
zb0V*&<X+zHT6WhkK3XQ|quEhsfZ(jZS|0c#-MqZd&*e5BO~V+tlf(Noe2zUu)Gx|Q
zIXMgZn(Ej0%Rlu~UcN+|*Qa6Nh;B6+JRb}|aQ&RgBicB;PwUra$U`dMuf;v`Fii-)
zUdVZfYxcR_&*A#HG521>;YFIBn$*nU`a1IO)^WJbS4+is4c)`(Dc2c~J(HK${lp*$
zPI&6$=!9G%_>d;|iZA8xzAr0Tf9{^h`*k&%qw;-P{NWNYUxu#XBP-?AZD;fHBW^CY
z<^1gycEX5Oex{u7Vqe%?-mH;(J+DX49~hQ;T-dj={ITPB`Sriy^pWd{@0`lvHDceE
zEKkc5)GvL1jSuc2!8^76YpuWHd^n`#_r@^4*nHH^Tl?vayq-dBJ>@TlaCo(d!$>`N
z(o_K#c{nJ>J_pr``K7&g=645j_=uL@drT>>r$~G6%=d-9*3S!i<R`rRdd<Jsh5==K
z@(WH+xh`{|uw#u<KE86?y+SYdYWnvY!4EaJaJ^Y0`t{#ydHp3K?k($iR?IJbzgxj_
zUS5y44<E_l)go>@LDcUOdSOt@<GN^?X!i`>ua89eFV+gUme1f9_3L@;oe^GM&rf(2
z)e_xmG`;!V3pw1U@k5vBSD$8|FA{c3wYJ~$R)=U;JOAgE6`UXRbMl`q;^hZ5zv&Mp
zf`7Dq;{SnNN%*Ji^AdJQi6$rS`<cK;lPCLs$>Bp<Jo)%<a(MsIoFC2?1OIPKOlsHA
z<m4+tk5p@NxPLk?-z&}wlKDvOGda9d*sId+UEj&!eYJetxzsX@V^89A(9e^<;W!T0
z&xdb1pVy<GCv&8*mwL7J#~~rN_5En)2)Q&U>>SDeM?ue$JX!x2Oay$c8_&LscGzi*
zKH7TX!<`&nqm9eaw~790>vX>s@?lWZkNaV_Q2pf^e8LYnTz{|7W1?UBc``49Zd8x&
zgJiw1R@e{vIWrL<56d+>_WGrq4*hN14w2`0<dzHih;_G1ayfl`xAS(3#C|JD&r4uG
z5I*{OG7tQm!#g$kvkv?~<vUj<<G&beL*t^y35x{Z_Qtuza-%Q^YK0x8=P~XT{9~NO
z`B2)8TYe?#U&8ylS_raEF^+>8o(B|)@>-t2SwfHN`w^a%_LY{Oytjz=cSy7UAE@SV
zL-=Li;$$|CYvFMH+?WMRIDAm^Ushop8U3|!{Ld^7*Uy3Z8Tw7-YxX45f9)9@UMThl
z$@y+}i1Po0;>PC@f#;kPIDAB$w=Cy-z4YU>^H(c{oEg#fPaR*z>*>_&>YG6j!m~tM
zR}G8uHQG47dOa^+E#h60Kc6e$@Imd|n5!WkseVJtcPf_tofe;eMfe^4|K@u6Awh@D
zH}iUGL_QeA6#Kj-_^MpvtNlX17isIiQv;$Nt$g?<4)<yH@D)No>id$W9l`mw|8Z_-
z{z}NV2Z9`~zgOihF<wI&J{urEX+G-tA(O8Wc#8cIvcG54^72J*p2Pb84`JV4Bjnq9
z&0n4(>_Gjzuh;5${rbM73s&*^_4A`z*Yffs+CICuu%FAd@xAa+QNO0|4qwgT`umJ_
zN<F)akGsrYdRoYHogc0f^eNZk)qiyG`g=9MY@(1G`gx~spDpPBw?&MeH*mHxesCkN
zr&F_M48gZ0!ft#*$n9Ad@Otz-mg}w-{nhdx&JgpfPm?oCAa`lL=;!BLig96ZZC{ch
z_*37<)PE~4KXPHRef!Hi4(}EFr=(xAQOwgC&8~Y%(7E#kE_tLKvN+1?(etI7z$e7d
z`abmYCv&)du6w1hH~O@8&u9~PYWZE`pl+ys{rx5;_?-4rtXh2w8$pO8_W2{^GsA}t
zpu<P*cj%tS>(|evIqG;0@6+Vcx33cYt>pE{yuk<2E{#i#ICod-!Ddmuua(nH%E@(C
zigs^Gh6hg*?cRb8;=@}pjH85~TcX9O?hNztdOrX4cXD|7@`bD(`QD%NkKu5i$QzOP
z^jsm}+PcO|ehwef?3qI`U#P!BntfX@;u7mMyK9y37Ya2$>mu+Kjf<Wy`r_%l{u1q6
zxR2L}@>>4MQDWZd`6{0ZKSDpZ>c@yz6aM;sx)-K!xS@^Lps;h+Z%iJqS7F|;asN*;
z{D`|ap87fSIUZiVSH#;T{r|9*!%H-Ib&}BIHS@R~_1QQ!oW?$|H?`~2<myGsIlN!g
zBkhVO1t0cnaq^8qJ`YBd@%O^sW8*8%U#$>`Uhd}b8ZEw;@8$5KVlHQ7yO#^OQK<3l
zE@4OM=j|8}ztnE$P6#4=4(8o8?)@c)7i#<Hb5G%P*7si@FWOzN#o>#8!OItF=eI8?
z<#7GH@spqk2%i#-|1Y?V!$-9F^2I^C9{n8k$zp!>Y4hUfB3@6C$lrrHV4vlf{?u-f
zSYMEQ;udyGucnXAxt*6E)Xp8f;z)syCO=>Pxv2jxKJHTfZ-O1d`YZfR*=|K6hx;@?
zqJB27Uq6riEzCQrN8dMeLOq8M-kIF)cQ7tg-q7;gf&h_x(9d<>Q^L!SXny+RVqWX{
z2~R>kQ2jOca64_0u+ugQyKzW+?`OkmUcN|Mw~9irQ$3wpzShgN0^XL~uR5W>O0G!O
zzd1h><+bxjw+Q`Sq^$>i185T+^z(2^4-<H5bo)_|!}asuZaI#_`!&0Fo{)Rhr*Qh*
zD(DkK{nW0W|M{Yz|Bz;vww}T39}@f7^94U2{S98vp!R;=_eFmPKS-88lY4kQ`g^<9
zt>th-obQdWIs5$KCeiM79RJ0_9&1=9;F`ZsjrqmsspS=azJ`}yug$L(p?~##PjSc%
z;)h=CyofV|UMSb{)BYgjXSp`N4ifUYM4V@cY0EzIdpVwlh{xV2;2#UQ)G7E=%G*B)
z`(fzMoSx4LI?RFJLH*U=dlhRD^f`n35i<Yt_UVESTE7Y*_oyEI9IEGq{nIJlZ_*$J
z{#8N$5g~^muG#1B*Yo-Z&rO~eRn@%yzWX`+PZN0g(@x^>{<}Dya=-0~KN9#;U>~2w
zytpxO4u@B3c|PY~%Hie0?t;2upI_V|+SSf2pDpa4!g`L6l#_?R@1t?o{l0A&I0o1B
zMK$CMn@7)bJudUODuo{G)%5r&oxGkB4gUvb3H-JASbqbrU(e$`J;clF=lt|So)bR$
zIT&|dE$Y|g$$X3p^-CX@J20;aK2n=Jj*Wu<I|G~#r5+gpKQj7=_&L~~eXd{6;nl*A
zll=TS9%SX$^YN8&`zZK};QGF#5%2@`OFsu}Ip{;>^>x(~n>oBllVeZ)K(wo^Q@_{A
z;l=>(m-K@+LjO{``g^QqUdhY*G(Td2)H@nKGzh)CUR$5?ivAXAa(i+OuYYhVpBHPy
zeBbyjL5C*JZ(&Adyk11sFWzs+scIN^!+#|F^?Z!q3jI~6$*~JWzpAx(?Ax-x8hp-5
zK{sum)Rjkbc)w;x8Dd@+YVqNP=kxN_n*6+`h~uxHS2jh!jh}P+NWJjuZeCtL*XzD|
zflmc5FXiDce=OS7=Jmru@A$;~Af$Zw6X?(QO{4$ucX4>HW(N)ndDYpPJYQai+@pGW
zwQ*myki!c#e4aX<!;9XiU?W~2#=Y!h-rvC=@_xzn_f`mGs=rg?&)0;VGx*D7e9jZ{
zTtAm4D&&|?lUFt0<@FnrxL%$=o+I?$4>?>v=V1$2knpJvar+<BkbN!?_S>L1FRoR<
z|K7#R7tZ1GN6yDrri<}CnQH|3zJS|n1U{NS^4b9$KB)2G?iOCpkftB+`i`jQW?sLH
z1I%B>;WdIkp(fbp#uXgiDc+m>mJo<#6L|gl`#aWl^78t79zPcS($D$7p-8l=&AXE^
z?nF<0+^4SO@M>XiNIm!iVgDCtd{r*$S+B)sPHE!x6l(lDSJ=1ZVjXi5Cxh`Dkc;r?
zo5<zTVS)|~LJsKpz>CGaE7bg;D(FEbZ=dCu%kz0UZV~wWA(?N>&*t!6A@}kGKEH+k
zLiiL4KS-csTn=_6c)v!^kwy-$`4g9%ycN7R_C^8M=yt`;0xrf~*8d5nA;qbDB2Fdo
z`40%i@DYBrEPv!$4%hQ1e<SpQ{+^u=3wilI&3?OdJ%{^lNT$zib2&ZrbLWQ6<>mGL
zBmcOTmtX%6K8~G&o>vRG?bG6!H=n@ksTTf`?AOWDMg4S;2tHDN&b?gVsl`JsFXQlj
zE$-x5!r`4yb9^oqdhi!hI9xw>rBUepPAwktHtay6zaiv_#PhEQ@&4-XZ(8N!<qI2<
z?Vn*GpZhgB%mf{XZ+-VA^X)9yr3BaWRVu+JgwKdJjtij2sr=CUD1eWQ|5qF<`b!6>
z;`8yL?8~^fh{HSAaK7ym9h-}Br}~RDy>srJ0<Ntu{PlDWuh#6t-%RFkogdB@<E!Vl
zPZ08CL|b1T!2G3tjeM8)OX}YoVK4P9;NyGCINmR_gtuF+#R2AZh<g6R>3pZ)hZip4
z<qNg-0-vyx4Uunu5wG6pKAxBF{TH7vQoro)=Jo66%T=N|qNn~opa)lQxc+{hm!a3F
zzj|DvQSjl2c3#ItU`;BozfbB4At#HpJa&iBvpT<>-N5nD^GQm3Ib8QQ2QTLB>hXb(
zx_J5ZT0ErdCmgQ7$LDd31JS2i(-#+(a`=cQKT91PULyQ8IUmE4{@Quw(@zrZYJTD&
z-{<)2=RVyEJ|}wWaq_EXaJZhY8~m2Q=SdgikjDj|TnBx`=BZe}mgm&HTqDZ&B+J`R
zMEN4IUVv%HK5x$C<qfU;<F|8oxrif4Iax3CQJ=^s=dIwJ>507jkQUe7Dd=fvdi<lK
z1Rb>VptdaM_2~Q82GB0!TkZUfhbQpzg__(Bf1AS%&5k_=<3e=vY4UuZ&?7Y(9dfVY
z?ds>5PZoBUo@eqKL5HGWCDZ@rC~vo1tAA64po5mDdZ5sE`ui!q1G&NGqn3BNLX3N#
z*l#HL_Js&9ub(?nC*)zt54n7n<9ODE98dlIFaLmmVsb|FPk(zWhgaX1Tu)5EM>KmT
zw}_Y5{i6dx2gc9ZxsEG^Jk<ALJtX93p?0ptA2Cg+o|+&G34EkIf1ZdN^lAKXvydl)
zTKsL~7rdT9;TQN>ZH960JOS6vtC@!ROYN3u_C|X#hZk!4?yxl+UaqZcO#7vPcX0gc
zMZ4z(Ib1)7>LJ0mhKLu*aev@CKJNN^D{foM;d(sNS<dU%&w;5H{9JgvlVKw3KV}uL
zM?d#tCIk!dr=HJqR1Jsgaf46MU&hbcc_1f%A=x-;diHlhJ`8E{|LW^``RXUQoP^qC
zpYw%Y(9h4jP|#U_|Hy3NXBBC2fG^Q6!lzGLkN8VFhwJBwwRkyPfB#E2^fL8VKS%vQ
z#4!l2pO<bldshU4^>rtoJTuTX^EAWlZft35bq9jA{-E1!xR<oMzw2rA27(^{!sgmQ
zz!NZh!6v__w%)zcv)WyUUqO%Ib}p-O*L(b;<g%)T&26ooWwk4s#XZ|QvZ6*$FyQml
zc^kZSR^<T&=DxVKw#9H?yv(_@w8ZVMTeYfog}1%5^mP0toZP|HK9Aem+R(NLrRr;|
z+wryZf<R^IBKmbfungF@cw2#~xU;ZyQT4fX-#zz&T4x<Svy}Y+l<sTI%qn%e+grSL
zP3a|pT)?g%P_A{a0C8H~b+y5|CZh7HRc@chA82c>ZT1FNyW309a(#1kJCF*NwYGuA
zZ5{3)0nwin;4&xmN_HEiT01}?)=s^rrM9(^2rs%N$N(CqRD-8dU?3a_0dPQIgnH{&
z<7salunz=@V_G<~1iX!{jTjn0#8?u%L>a{920ZQY`&--G&28wrx2@H>i(#(~2L0|<
zp9Nyq-d1lg8A8kutgTxa@LuUjzEkfB1pRHR2?KwS8ZR{hO)c(FYqPg?CHvXt^R&|K
z`ewR4%cyS-tZu>IcKQOmv98V?;7__QZ0+#2)`Q^#p%&0jTrNP%L4R#s0<zc>bXL>D
zb@bqAhG&(ht{ru{y<jwOncLS??>D>wbZ4~zBGoo?{Ebkc+2iqnB=oG9WG%JcRwJ-F
zP}kN<#FBUFR=Pc{^#BBFK@W7?@1{bv&CPCKTOjE6w)mRCZH+#ETU|?SV5NOYKjsB1
z2ok20!T`7%{2q^c#p)pDwQW<f0!;S0U~}d&&CLOiCk-MN%0Un4XxAy(lE>FZee9?O
z7i26L=&1FzVSiD^LS9TIZ^LRx2UeZ86(dyV@dexb?p5`EfP<&arfFt}k%cn|<!kh&
z84QUxO@`X$1}q@EsamTUFlaMaFmnq&ug^0}d$7K_uDLBh)Jm?dHrU=kqZL{K)^In~
zw$?X$+zribwLvyN-EHk2KQ2482c@)a2?af?*hE9O{B0{jU?xn#EclVcKaIL(!;X>y
zz79Vo07PgK!ng{w)s>aG+XJmWKf01-#GrRT%AmK!Q=gs(tf8rGuJx1Tvq8%$L$d^P
z$=lj!Ur^%D7>Y(prFXCnWFm5DaN-^)xkmb*j)7#wwROaDsGycc%(Kdeso&=J%+;!F
zf!J!RTZt)-8<{%PjzMn>Wg9b^-eP>4p}@1$NdwG;W|pQK+Wgt3IIM(JVPhk_vdcF)
zdu!|K-5o-6+7L$4S?Pm1gqVRo;-{1W$imN<evrbo^;d|8GqlVVPHi?KxBS4(7~Aiu
z^LX3S2ttc*gN-Y_S$P495hBcfa3x(@JsngGg;cUMLv^>*Hn?@p2DzZA?Q67fBemm!
z>DAuiCaTk<&p;lqVt~m{wf)*aogl0VwZw%YE!6u^SgU+#CW!1Wn)9U-*H#ZO7F;*j
zqi&T$Wlgw%0c0_dA36$RiY79|Zha|gU^Cm-22sb1XYfsnr=`umI!om(-bQXP)HbHm
zz!oByRSF^*s0|XeKnXA*pQZhVy4GMbV;_~qG@|uh;tQKrEPQBE!Yn{3_1TscfmVO8
zt)q@qRMHK|wYEBUC_r8TL_AEn%=ObKHu_1*snu9I#s)Px{_QORU$d7Cwxma?!VZih
z<9M6Lt)7v%91HbOVZEm{7)T+YFyp`iu<jdml_EF_PihEafMMoA3hS7|?t}i)C#sLe
ziM7qRkLi?IVCvcx;7(MXfX-H+HXWCA6J~QV)ui1ZO@=^SlcydwnzxRI-qL6^5oQ5k
zQ@ur35<(4}S^_ti*4zX=T_c6t-I2zQf#QY3>TR=pPrD{%kGKeLt!-_Smbc1FZq-)G
zgt*V!-bTu_Esa=|&9$_J*q~;r%i9JksLhSptaZ7)!7`6jBqi5oqpf0oW@?_Q^LYG1
zoT&{#9s4>sQ)a6G9e}B)PknELo2m`A!HM#$O2QW=DOut0af05)P+KU#e!}zdq>(a`
zqI>{BELDM{570VH215w>p@)(hvrG+H2_8qGv=+R@pKZ<cZm7gGhLx;MRU9h)fE5hg
zm8zl;UPmT!dfGwq2J$WRDRC-bTK$dww4Jc9gYeJR17O<dZwvXdk*%pEs~te3ZT7aL
zI9F0?V8rXC<nRTWYW<#MH!4K&kH%I{JvWK7Su!+2z}{*y6J-OdTkDvkq!Ll$mE;B4
zA!frwEp2Hy!(t$5-vzRpwoHstUuqi~h)vR13UXLcFqqDyk!5(57?H76iGtbv1FLSt
z9<urWig?dBE~G%*em`}dGkAM#7K1L}Y2mH2Ig&xuNOUuq&azBH&>w1rrK>C31_~M3
zsPvF<s%`ovt0eYJ7*9zK`ihXZIfwvYs1*h3ns{h5X@0|GBFhN|RAy<mBAo*!v1lRA
z*MtYOCajT+I@99f2F%!6QAdW(lDC*uK3J}=O+{9NJ_`A02>V3H%}%>p50X+qT+GrA
zHjPN41biVk#yc}<S+Fo!>Y{RrTv9w$L8Y#(-QChwAA%)o6IoK6Bg<S>g%HIxQrsb1
z$wVREq=r+(k`3v=NNFQZk{rzNN6m`TfvrjgIvH^)vP6^Y&H!<JCWesh+IjS9Yb#{1
z717R?6>ZJQb3xM<IJ7w#S44U|ExzDt44UkluoFmT=<yafx~+9BX=J)YxH;LGQgMa+
zB8O}R1;;(J(s>oKA`#uuFqWe(7gG3ugj$6w$(=~Zltzl2i{Vn-Pj2($Mh4CYnm+Ch
zv%$9NMpyi`?zR<Ic<RWUs=pFTHX3$#6oylX@oJJZlM4cD38D`2%;RqHA{-!H4;yr>
zNgE1y5agDffzlvnne!fb(l(Du64Jbxom2$aL5W~CBJSR(V-Mo-;Z+BX;&z_a5XB4`
zIRr7sLP(nFV%8@ikvb<+gCi;mspfG?4GF*8h8z|p?c|J2c}YHQOY<?6-KTkx#XGc`
zTS@C2EaHKu)ebR@S=ZLK(wjwCHKR<(=dK@1*}A5-4&8HZXlM=vX!*>hmlm2_DWIFe
zWyn;Pt0&?q#te9fyQ7RNv$VrRyG99AKpqLLMP_-rmB%M+kdwoOy0Wi2Fl*o4;DsXc
zi5QYPGVQei%c;!J6edw>e2!*H$o+#5mU>B=W_~=AKVglGw=Js;3C~!r>De}E)zv^l
z7r+u5P)im~-gXeh^6WE-q$EnSNK>jhJWU(nhOq{Rn$?1;s9{r1tChuEvAE8b88T2B
zCTLxn@Vo2`k1b?f9iU>N6{(IJj|w+4ZKh-G*5{)iVX1U_&l)K{AL~8Mo)lA?o^Q(@
zk5d-P_0cA+gM}UFuVBcZ#=5gGNn$Q_nt=adk-?Tk^)}SEDoAU3TP39J-Hd9Ot<Q5O
zSnMdF#*^CSvAskb&srZUCEjCWVCih3Ei;6cB^wC&q7EU`acm4T_OU*Lkd?J3<fhsc
zY0UULh)t#2zmnF}*$SgtR&acF`->LxZAV4olBAYce@TpUgCc_*myh%H#xX9VvjC>f
zgEXO`58=gn>)rSpA}z=eVH9V>XhU&{z*k%Y;DS0+eW6w!0aD40H37@5Re2(r*^&fl
zYg=n;2uty`{xs{R7So4l?zD1QoB=V+vM~j2FnpFe(QzeOrz5C^57~q;{;VLi4Ng|s
z2DFwT)^82cO87uy@pefc*$^I3CK6;KlqL`|VMuy}B@n39Lg>}CB|AqcMkHGw(3j)o
z9F?=v+ln;@Z;-`~G}PJifF+Q#i{HZd!2Di;DUgW8s-rdr3sVw#EX$oNc!U8STa3$B
z=cT+FFDZyxiae?`6zK&j2m$G!8`&Y*G|3M_ZSY%IKumoc_48bZ23oP9L<jXQAG752
z2MQ#BeqKLvH=3InXD2_BlnIe^D~NTFGzv=+v;p#uuMV>j!JGi)fE6E~pc`&efIF6n
zF-~D41T{rmBtxNwx+cU!*aT6Lqz7p#0#gs=@F~1xwPQ_1A&Dv6kS$eXY?1o9S|4V&
zHVO@JgIHFLO>Znz@Yt9{708+1MptWt1Fy3mXwsTJPLo+YSxr#HIY>SS8(Lu|2&cM4
zqX6Z@j45j@DIrB2)@Cx?X3u4&p9%-EatTR@0E$~+QXprImd07|Uwt6SNVXXe$;iT$
zOVUrY{w&uBEh}HGT+Tt8dM&zPH4eF<Sr`*xC${*nRt_HnLLke7$nzlin}l{-DCll$
zaIe5pVSN?>PJ>FWU6xirb)ruj;<jm45v=Z#6x1~WNk|>mU`Oy)Pa`EaW~8}PFUclg
z#swA&IpEQXSQL88nHZuNsiVl$6EPZ+kQ5(GB1a-5I409TtiFO|s6`!5@^+gLZ6%4S
z!VydzLrfkr(po&XVK>RfUY{P9r@&YGXn}lArUz!IIv_?YLy<9>X_mcVQ<XJV>07gk
zaFviWSP1I7vs(OV7TYmopfN04A6AbtWUN~4BkPIjza$J&wN7eKF|`b}oyDM(LxrU(
z&Rm(O*Ai&-r%Hyp1~0RtSWs0)QdM3g9>sO(X)GY4-r>?nUqgY03APbX2xGQl4yz7w
zh1fq6h5>XUC1Kc>Fk>*DQJ0PWvy7!Ng{b5jQ6{95Q-{?~3r8$-8jQs`z?XYjY85iu
zOJ7Bh5)Y;Tsfp~lw5Xj&)H8JjwPlJ(NN#ILzf5QKre$SoTk@iN>Nrx?gWK20)2UU)
z66u~Qh|*-q5%8?iWhhyj$W6|+h|h<KuXs?zU#(n=QLDFjJW|T&OvGzpQBket225aK
z$LhnH2#l!h`TS|4l@@I+7b_Dw+7gmYQ8}C>Rmmn$(I7LC=VPi+QsnR&vXQz4>fXz&
zUA@oDEwdK_5}Snc<ZW9~o7HAcij1nb!rp`xgUtbHgpO=4k=D$Rh2hB5zde%H5Fmd&
zonJ01mYN{NJ>dy=t--e1ER7^cA^gY;gD6wdRT4C|(9ovX%WUDqzG;cOI$7*XT9klb
zvC^E>#=${#O<6oe3{G~Tq~_7FA>w`p+e0!N$f=D=R%Ydh+2$(3H_0HDaV8$ZwJdqP
z0g}5JsNR4cWKcyGk+^_Sdty_mRXX87XDa3QkmS$Cg+viQRyRZ(+@?Y~=J0N%%b`<&
zd^wvZErfkoT!jsNnl>7o(Q6UdNEfb`ZCiOszFI%?*wsNL5uCyFvg#m4Oq;1@;i@Nd
zsV!N?fsz%Oso$Kq)+&0YqLicuKeaY%g0nQKRlF!%*og7YBFeA{4F$7|l{U52W@yT0
z%S=di<E?p?WepveWmjwI<2}~kS;-zM+UT>lfx>{g6632%pa<t?nA7!`FHJ1ymEzJ_
z=845mGUz9H(q?#N*-(nrD$^yHD~X+bEwwByU&ULo;Sjg0^E8uANl7PI5wH^5Z4j_1
z10s01ELEGJsZ-w;Vmo5(X0XKHGJuGqvoO$@gf>(H+srGdnx)D#IdSL{1eZ5kphRZ(
z_**h<*Rd$Y1AadUc|soH12gtOW76eM^_OJ`(`E!D2|*Sj-dN%xungE(CsBpGRIH6A
z!7QG%kY}nv7`y5~OJ0%zcBUi6b_doyv11QgVy#6>^?78z%S=N)S=0oXQ-PFP7&fik
zVUU3!S@&EL#k2UgR=-uLl`Lzh7BHI^S;KJAS@4nl5}jA^#<sZ?n(azYH+kTOq~97M
z2UuPEXEc&iT<)yXy$aOb0*9p)e3fNXm8~Ng2nr6vx;4_*MPM*PQ3?Sz_}jA09MqUW
zWy+4X&?*#qkYqn41r#mwDS;A80HMvs$g`43mFid^VnM06a^X_FWy{MN3)%+RQJ>V3
zYJDUG*-lmaQIOrl)5;!&L{CyDAvqP&>QgzviUl$tC2%5enOL7vaYAEc5wx)lnl^1o
zq>}b+WGE-*N48{XG+Wn^#x0jkGS5AmS;<^bZ7!t}Y7E;pxilupDL3A#Wa8d;wjHmQ
zlt?inZ760Hwphrfkfg;|%yk<}<PRC9WW^-$V#tR@EDtgYv5^PX`U<R!LM~C7OI@?4
z7ST(7BTL{OgKNJrjO>c^0TPQKV$W&v9vW*<B}quMv>=p-4hS}{tzW9mP8v2ClW7;!
ztR^jsRULHl?2^c1;b1+-H+?hyQ=c4TRMm=cvBhz?QEBZXt3hrcVN}LoATiyp%?(;{
zWnwTj5{;1-n#2d=)V|}A_-P|%V3=C?+ju%jO1FfXGq0TRI8VTn*6z0`kSWA0(NpZW
z43i2|KG+C9LN&oM6t^a|o3^mg4lrc>W?L#}OTa5u%YzECi8I={YMaW~P@mo_v3gA|
zb_3=rqBP8YXY+!qK9OQf8x0v00-x3<>tU@U|CyITYO=ewfhN9Xn%R&;8Hk`HL}q63
ztU~56%V|M5b($nX-$~2@>(gQui7#m{T1mwol7`xq*g;DQ-M(21!Gr=)8}&ieLnYcj
zD@mzlFWS&R7K5aNgONRSB($u}Lh6`8sCZoDuxG#pGNEA%sARUtWXHG~7bzlwjc{he
zQYDIIgNj9Yq6@Y<vz+uy<r3RcSQ{cCgOcfCMc2VWZt}ovYo~ZlrfM?gXjn*5z=g+$
z?5mS~Z-KI<S%FeXd?OBi0-d>+oPBo+v`W2R4OwH@3MIm!nR-Q|o^I2d)hQl)v+s~q
z$PN!EH=K5wnXY1YO(rBk4ZraBGZvFE3?@XeaVU)^BFBI+GmoKh(NOPcs0}qEcYw*Q
zGzLhb!$=+Arhz(wFtkuF`kW+fAX!vtqdt&6+HJLzw37(u2m!;^=G8)x(o$`xv2iu!
ze~>|?aTHl6fM*#5Yh^t#4QJsIb|v(+Siw;yC%X%uw$5j5Q^yr)7YzsOT4ozE5F6!L
z9yX&W2PG+8MWs_BTp1Wk)=7qgl4MhQG87-Nc8X0THr*r>4(7{OimHu`aUohrcq8L3
z^$?s4i?XsGb~ZyT8MZ3<VEY7o*$j!q8jRd|kzu63`vG=*mfB3_%@7tx<5~1LnYQxi
zE!iy|#?QDL$f}RXHx>^Qh8T83+qY{~W>X0Dr$$>c&<2Y^{3HXFHWUR=eLpsTtn6Uj
zt-w?gYf4FVAZpE~n@!_Z2S^Et)k}>Ff-OEw!7Tl+YvLi0L=!xPGTUikat$y`^<q0d
z$(KY6me(Uk9~38AWr#4!lCY2KU^ZwyCAqhvjiqMTv|^De%bJ#K19LO;!7x^Z%|Kj+
zOPI%1PeDk&eU46Lfs4D6n;iE2w<;P#rml>usth9INi-u!A7+T>TTL~~jbJvEk`z$G
zKsLp!x|F>!Dq7Z>hzhjK6m9HeACgZHfIoyIoe)k)^2XKESZwf;=qDwm*_5;JSB{xV
z=)zJ(>!}?OlMLtEr7<<EYHd%;u~xcLR;?3@iCMTL!TdIzN_0Loj~e=5EKC1N07UJ0
zN1Gq#kC17{h(zL1m<cPg*?WrRYlE^yFqTQD@da?MORL0WXDOpogluf;m!(?0@~lU#
z4Qn^OGWw?}!=Ir}xu9%Q#OhXN?5tvp>@4h*s4NjxX~3Zh*kQ|!J%*=<yJ~5`{1!Q)
zwE-md@dRP?H_0?@Z^pr?lqri&A~C-$fPfL=12Q=-(SC}EPMoV^Q^@K{%L>?j8j)F~
z6Z|9&hEzFA&ixg&e!rKF`h+zG_DZC;s)ZqzWehwM3cgxI9W)eBr?mqn@oXa1Riv}w
zsl;820#@fNJ)fx%-$tpmPsRY9GHTy|WK$$~a$T(U8oL0JcQ{yh$;U`@QjiApp`V%C
zz|-su7kwsUlW3ja%UmHHcOg5nO1%a)i}|i4wFle`N%y$y#hCnPaoiuxEBkI*V@1A)
zJJ^;jN@kUB!MqVCMM_1Uq`VouIFVMUD<7Fy0bIwT*7m5%j$78TN;AquKqs?@nKPXc
zD_an8c@`;TokP@%c6Y*=%zEA{`argJc2~~Qc{UW0h>YnB5+Okt!BuZm-Br(SvPnyZ
zbcPnWx0HG|j#|4+L`~U^E4=AbXJy;3&1N78E;0Yqk(45inKyK3GsPIy0!WCrPN`Ah
zs4ne-R)TG5`6M$5NV}kf+G9O^jNP+*6SCD+M8+7oWXg_dG4)~DeJR=z9>(bwv}}`2
zP-RZowJ1QT{Fw=W3C1lVtD6~eHLZI69t80beBmcDW?(U8>spekrA9zUWf!ZZW*H)U
zJzs3(vG0L}40lr5sb(s1DIAk65mCvdOX@#OoFssrht0O`A(b<miAS;cZ0IMucdS{y
zK)_NR%j_SQQiJSnD?7?&AS^2RJ@5h3;36wiD4D6MM4qoo@3F-Bg}~&?-|EBqOc=An
z#$_0^`&hg?xlXIocur;-qxMz`fwITY1!>@OYi0~7Box@^v1PT-Zg;RbK!TCw3S>qB
zrKBsJab)Cf>`aAFR~_EiWUCr1b_u39-L<W-iurQ&S3Oq|gd)z<$)23hW0`Ii4oRMg
zO^+noex+EQoj+kUZPf^y(anr{xf@j-IQ!5pW};Z=Qp@7y3uDV>qfucg8cA|8(By4E
zj63ZvK0F0OE?C<Pr^XAJ%yJyDqDZ^faMnKMon{Z-D*;0lbv9eZu)_lOJE?Z8m|~38
z3dHFiSvU=|Ypm%_N&+3@C8?^o-PYKO*q1nB%DzoGyox=Wfvse&zuF}!r;_3^{M_@*
zO{UH#i|H(WAJaPqK&~;Zf%^ECVl5vbwNY@;nCjz#7Nyw_v82RCVTA|zJ|63}9qErD
zmE@P69D7=>GzCIF+AKyp#ED~7q9fQ*&yJW$0#XRszgn!Bs(1LCNg%1X?p1G*t2wq4
ze?X|_HVUsLJ??E3?PXvaEMc(e06{Z`W7iW0wPmOhV?Gw?PqqLsP7)x=`yI6YQ1&NI
z&y_E5P~@}7U(n4S3_k^bGvqePF%~IcNw}mr$4x_OEJsMC=JMK5&ze`v13?S`*b5&d
zwWqUzMW#A<BG8zQ=ZBP%@h{O1riZ#A#ikuewoL0^B-67cQjniPo48X_q$ro}3YO&0
za1y?w<<?O5RZge<c3diJH)sZ>p92=j(<avN1dMc2Ns?g<8x51PTryDfD`ttalO?o1
zb3LZTXuwSFx{#lCXDI7oa=(g7OO)jKz$w)xrZpQB<`m(DQ#b_2UrXm#i!<)j%7oiJ
zH=EaOoqL#pG!QGP2Sub#7PWbn#-JZHB7_C`tt7gW<~#*xS?Uv8N0yl;8YQGwy#nj@
zxTSI^i(_P&JQmjxV`fd^7{i_i$%b%_`OfJC5ffoi8ZCAvXyRt7cFmmgMse&8FYQdB
z#2bw$G}1V_H|uf=f+pG4kJaPXRN`9GpX7X|ZiF(NH7|)p$wgKwoXRtDbdzi*O8RN5
zOSdQ~>y(q1Y>G7Af-IV8<<4Rdm=UMikU2!M&z=ObdQ9T6W#5SiT_$Sc+y~9OuqiDY
zX11z^vmLCyIeo^W)qlAz&7z%2W5kaR&MKE#UJ+=a8i7gkT|2fzL%d!&%0*>FnoH?c
z7NM9BWW_6;tYjDVBr_A-@GL_}c7?G=XS#FwqxLI9@`gnetSsR0Peu|ICnkAFFRDO7
zR1X0XYv9FzD49eFw)Fsx809D-(~i#bwqjf=iuNyJXwPH@S*X+AO*u7rWFGH<<5|sW
z6C@Xj@711g!3v-a?^*TRzNBhQ3$2;e$e5gPxm;$0pqw1a(Aku-x&gK)=0Imvs&GGn
zz57tV(}ven(he3pSl1L{S^MgfmM=86bHTtMsay1D#hV>pq}dgg4v~EnGAmR6thF(l
zLb4jvoBik)-Z9t(>io`7sf~-3aA+8}=`%Y+WgrL~!0bDDfnI|srVfh8o=l#Dw$_L3
zG~{^(v^$>-NOH{q?<(4|CU?K7H?1?o?Wd9?KYFHZj8rS>sAH=f8XMAKa`1gMD8-rh
zjsf<zPD)`?t3nKynL+$e;3R6Oy8<#}YqvnihLcGReZyOtT^Vd?o4vNqzA<Zheibb*
z=5Yo#Vqq(`jGLiQ8kE6;i9^;k!m{pw(qhF+r8y+O;znm^mR1YX8EY0f7=Nb<46=c2
z*6CqG+LKpuUQ!;mssU(fx?61%I+Cs2bXI|du@v^eTsYVnXrOF2QfY|e0=$_!!dK93
zMumJ^4NReVA3MHXuq{b*ww_X2q7@yJU{sgyZTe<WA}NtSQ^B;3y8U}C`Du>X(lt}J
zqo-&S60-6YIyU%8rh&)6v+nJO#Og2GBg!<ZO=VUo1PyG`uDtAZmOLn-a>AG<aN_tj
zn}%gCl8<su=%X|n9OPpwtV4;}BOsaO=_hT5QISTWMtaXxHf^4iyTGa!FJBmg-ydk=
zuUgB{W7QzEDW;6b*rLAhfKxM@DbAQi=vWO{UN#gTc!>2>2A$|^1PPPj-?42p@VDss
zxE6z3EnB%V>hSO*%k;nn28}dbcDqP9=){Jql5bF5=G0hO&{~UCQ6$Zl@&>6ol3Act
zk{(YArPvxXOzZNQWrMFpD)psAHsvI`5bFqOt4Ho;C>&~KFWv&qYUeB`u062{P2~x0
zS0v9%ns>=-TjWQjzIg*CQfu~1aCCw#EZCrK(SzwR3KLU9Y2V*t73bQV!m{YCO}!TV
ztgXLEB#}-Adsn+gul5WZJ7l$x(Lj~0QqmN&LBPT^p;VMo*k`<=Z51}qP`V>tm}g&y
zY>fv9P=*bgObbdmLm2CC+;!q_pidLDu@FmL*J-!hd<9jxA;NktvE*x=7N*jcEa21y
zce$Xc(VD~=#0rz#>F1d}bu5I~A#lmB#<0O(5)-AtVq;m;2{eTeh-dF6v8m5$-g;xY
zN(IV*$+|-V^CFYiB{3FucT)9ZSdS38JgWscCL>H_GpRUW0wDW^BxH&-QPbhoM=btQ
zpI~mPn%t~DfkqNu0Y!goMnclb+VsZ5F=88ChQ-aK=qUUMkp!2ai400mVgZb`x9zF@
zfxv{Z-NH6TWoQRu!k0iZl%d^xwy))tFXd%KX6V2)re(Wq8B?$oSt0>@kltS>-f2OH
zxYV*Ybg5lI&50n2dLz{(QSXv3XT57iy&*?KX5`5L4?k!#NjlUgzxpLJTi8X6)OIa%
zDZBslRh5*;X->jj-STZ~K!C37^`F_yS!>0G4VSf^Kw@8=UQ%7yg--3?4_k+n{+4uW
zIg-sY?Z%a5Am36BO{aAi1WvQDqC5b{hW{<Wo3!eYsUTK%t*xat<s`Zk#Lwza5esBf
z`8Ka*v_f1Y%`ieL17;D_620~@9PPq+eYMxqoF)%lR;H$?sJK|hq7?&Yyu|Ga=ENr(
zik1%`E0zw5S}C7l;07_n)O%Kg$aWI1Qm`4K#F(c@SDVrj7e#@qP4eJa29M5a?JJmh
zR)j>S7tOmQ0U*;t@VXV-`ENsq#HO17)-2hXBR1W((6$s|rb4zp4^kLXx3aX<uEy2^
zcDx56A`YroDH>=^W*0R*UR_(thL1Kecup*UHqXg1X3L&QMH0lqS+?Ff_X?ZbcBZ=!
z=Szt<%a;^O`%3L47SnBxc1<v_MFaIAp1`1czcQf7a|zV)wRrumY(|Ljj9OUMWOG({
zyD}^pC`kGyowT=D>Z@#n&ajWus*mJGwv)5c@1~K1Kjzb2B}yu#(Qj^A(zdlE4zh%r
zg|3oUq*h^>7BT}`nfxL%gYuizM>6L>S`8+=D8U5Gf@7vKDr7LJ*wgrVl14mJI!pho
ztq4hy$|eKWR$`H!7cZ1oyQ;(+Y}n80Dy&Z8c%H+WN<fMkXL=k9aCR@GkuyBY(?C6Y
ze8oZ)iY)u-*d$ohm+_?dH;LW=N$Z`a?#589pS>tVr6KLI%M#*{X;X=6E&i1}K)r-V
zn&~}RvM5lJd&5*bnBRgJHy!AZu_5*CG}Jvrq~Js2y{|adC*8|_Ec(jiiVQ8F&g>cV
z6pGEhGb}@bf}}>4H)LCgXALUI3duA~w?JmqW{DN$_$@|xRTfywfi}dlY8vZW0;rl+
z2T0WAUTB;4WD#Z~WIqDTLJxc?12HPIwXC!;6s3EJ)d!s1$M@qh!x9{+K8&s;A_m!f
zs4?5ib_dL*Ofz}fi;9kA9!Tp}y-69J>2tO%GNDV^bL;GwhNbi+iFEnu5pVj?snx7{
zz@>_{e7ma(vr;PT<%3mcB1Fx2nPj7S(v~f*=}(`ss#SNAX5dd^D`fV;M@bwSCu~*1
zL=9WnV#26qs{)zPhQ702VZ5|`N}>xY#h87N?kv(o$K=FRA0=YTd6S{HS$7Irt)!fe
znZnvUSZvdnRR>CttCOn0$FPe|o5FIc%W2LvqxVY08ZWMIu5M3rbc~cj$}4RwCeyR&
zNv~$vzAlwLt3#|5VidByN?kd>)Xo<clFf#a5`7GArW56qV(CvKlZE&)i8unEb%dXK
zD7OQo7e2BEU}K%#sq!Fy(S`kNm(P-2D9(?xFK983eBH6y0y8<%t?wkB9JA^b45~K4
z^i>BeBea$FwS}@b>X|x;;iI{ZH3Ga}vB{e)s3ng28q+KJczT^Z*(64p-f1VxkTsfh
z2|&#P(&i!$T#-iQFUwTP=eEf~10{Xg-=1uR+SKYmO16%#VPI*{u`JB7SX%lXOR8|E
zL2HO@ET&gyXmARD1C$tZOFCLyC{s%8i^*0oA810RZo%adDUormk;IyPnc5d2MEm9~
zEXQ=r0g$v(om5m1jKPag`AbAqI7V#@y2whERUfrRHj#&`3R}#}Ha{IONbi=+(lw^p
z)dOK|sAUZU%LEe!kctmuz%gvFQqB%j9}mLAg4Y2|RVXS=gc;J=qt+0~aF$ww+r7%y
zNQXO%mn3E(1rE7NBvdmmfikHR^Z-Kc)=<j|SVDL`rE<<Sqdj|ov}KY3WPd!5)>B<=
z2kJ1*lbW_BjvNa4$}+XPe8JrQIg3uIFIdFHWy(UWieUB?Nd%JKWtE^&3Ziz0_aI!|
zhPSUHWO${AFFy;PQ$+#P&a}pBbx6M8O?`H3A#p1^zy^J*`etls3i;gi*2bnxg_B+f
zo2j66o?50tWMgDJKT(bax6UDdgpH(<f|yor`J#jY8}GhlMH?H5dL#?CHfk?ZWlI-L
zAcDpY@}au6dYx6}t%l}MAo+}N5!y?su7gh1BPdPQ*3|R8WsUG(lix#@c#!f2w8gQ+
zP1H&^7rUGb7P@E6oHcWfv3TJ^_o*{a72hTN-ty&u%{CS;UtY1m=`O`>!x)Eux%ke*
z|6Kf|EBePj`Rw<2V*<X%G0=GSH_s?ACgK-=g5RgQ#<O||PW6n(e_k_(<g=?hzzt&(
z{*FT}yrmrD>wo|Mqd?x*qy9AozNWy}6!@9~UsK>~3Vcn0uPN{~1^!<|fm7k~6v9_2
z!vD}wUN}6Hj3<7D3w(0my?iO|m*Dz>Z>~PTfKT*1?$a~X?7G0%WtGn{wx`rLL8&jt
zcsu2OzH)yY+T?Xpd%+pE6d3TK-VyEX{L4zXS9!)KN_%I*xk|=ASE(<@SR~p*b)tXc
ztoudh`3TH>HS!JRXSxC>e@=c%6gc-0`JHkFexNLjZ&QIg6nM1)pQ^x@EAXQgc#Q%-
zMu9ge@Z%J?Pl10^fv-~FCn@l?3jAaR-mAb%75Hrme3k-VufWOgmY=&7_-qNpcb@{E
ztH3uX@G=G7ufWe#;5!ufSqgkmfzMOmLkfI>0*@*1g$jIXHRqEFfDwPnPmux#n^~Xf
z3Y`1?{8otqSNUzO0$2I1T!E|nW-4&PNq(voI3|wuS+2k-q9H#u3Vf0T;=4(KW5Qb>
zp8}t3K@DS-0$2IJQ-RYQm7lc=e2N6(yH|l9tiW$m;D;#i^$Pq@1%9^zKTLu5De%J;
z_yz^8&c}WQexy=<hXPmU>7W8XRw@620{?~rA5!4z{EaE_BBlI@0-vV9_bTw?6}WMJ
zf^Sby-~|f2Sb-NR@DmmIR0XcelOhHFEv5W)1wLJYmniTV3Vf~tSLIT<0$1gdslZQB
z>Zw-X-&Wwu75GdAUZcRjqrjUKc!>h{DR5ODu2SHt-0M`}s@z+vz~?CS_bTwy6!>il
z{B#ArUV*Ff^KJ!xhEl#yfva+Ig91NWDc`Ta=PU3XDqInWg9^M{DgS~3uTbDa3Oq>;
zD{xf~n_ce}n34R$w#_ump8jBd>}6DDc5Nxx%qtz8vkA9GPuzmPj^oPli>{mKW_)NA
zpA$FIHDMedWY>SCYZBe@es+BiU6aU;_p$3=(>00e_<DAIJ6)5Aj`y<bTj-iZbG(yX
zucK=c$#EaMzK*U*6vu1W^_6r@A~;^nuKjdPqBma7uCJhL61njbcD;hGNz}%R*!9J9
zO(Hg4$gaOj*9XzH!LFCkHJKXmkuL!_aRFVE=#3At>oe$@L~eYLUC*Lx61DMuc6}0E
zlZcJ?vFqdMnnY`SJ-a@Nu1TcEd)f8DbWNf(-pQ^f(e)8@?PJ&D=$b@oyoO!x-->Gz
zrSWQZ{V`o1Mc3u*`dzvvks2>y*RRtxnLP0#cKs4vlW2_>vg>E*nnY^cVAqe+HHp&r
z$QRWAO>|vE*F)@jBVCi|j1RKwztS~{%y>V$zK5<!RL1+*^{?rgL}YwDyS|;SNi@cL
z+4U`SO(HSg$*$MYHHpHwk6m9!*CYbtHSGFIx+c*VuV&YNx+akqFK5?R&^3v=cnP~+
zLDwYW;zjKGV!9^L7B6Ji-=%93X>o&HFQID^W$}^E+4$2niLm$(yFP=iNp!^r+4U^C
zCKE2+&#q6RYZ77cK6ZUPU6bgFuV>ds(KU&zcrUv?n661w#XH&cB)TRM75B00adb_h
zDPF^__iw>9nRM}LcKtD3lZc9!<9b{EvmY%rjBWWJ^RFL&%&wyJwYl_tXZV@R&G74H
z*PA2N%beT$YqnGw=JtV+&Gfas=8-BR_AYAqZ-e8wZd7R)8#=D`85f%0_TkIy+Ix^0
zeyd~LWt%t9JrW@Qz3frCH9F)tu9ILJ=t`C!*qe*;W_Xi1@a}vwXV83RZwL=>q=IJ8
zvv}AHZ{yFuE}rk4_uvN@jL?(|@nq~848UdE@_z?V&L^8m9+_S9hBl%=@EaBihhF&T
z4ingg-x?jfEPfRpaU5sF1N{2}GrV)-erhkg#|*z3yR?0x5&M1aB;ylDTe;a_I$EE2
zkc1iidZxkAjQ{@{{KRo>wRs6^ui~N$s!@eAyu0*S7q8+-R3Uy|vl_L1U(~idLv71R
z#9mTy`DU|cG+1DEeb{M+_inB>qw_wTGubeshgF<5*?43d5Gj3ZQE7kde@>eO@mon`
z8V%;jvYUC?!>H^NR@v1kYmHaroV@S^GjdKrLwIqXIS?zt_xN=S&RDy+VBI+j)-Im7
zxq&`2<@iP>gwM%~EY5R0up_**U?5f)K4-GyfyD*g{f?h3IAh)@T5<gBoCTl-=La(~
zfrfoT=Ul__lm5-6{aSr=zpKB{Du00)Ij(fK8QvB9D`cn{t;{XF1dmwlo00jYyW>B_
zJ#>Nki{~Q-5HC0p188i<b)-Cx|EBS;Ccvu6pvvS}$%~U14<7OT%^>9OW-~(m@pKU9
z=g>#PhX8mp2mY3TcL2z#yLb*!w-bF6^%b)EhF(zXyNEw}Nm6|k9JnmGzCE+8`Yx>a
z9%q@)%Ga$d00y58&ooMRZ>%0yWW<i^nq<V5luR<pJHIeo(K*lKo*B-60bh=v<RAIL
zQUK>2g)cln63-PLjSXLi(RDgnw>LO^{gvR%{C)4ETzF5c7DARP!lMV#qnta>EH;ct
zt~oGtC~7_%aPz?@|H(h*p4TuDbIoYpv$%H>-P@l3Eqsytzi#2gwfR@vUTri)7UngC
z^Dp5)$K&UB@l)_`h5Ir#;MVwIetouXVZm91#d08a7RGqq=)0^#t8h(o?rtDzq4Y-r
zra|dvbZ>k9t@yG~dj97YN;~*ZLg`WbqzMXs$^Sk*H4r;AJfR_y|63{p*2m8~_|Ng{
z@;`i!5Oe$lb0Gf)KxrC?@yFBx!b^cLL{a=p=>6pRLBm;bVa2iwoZ&C4dUglftHOJn
zG<~D<euY|H;oYwA{@6tz`l8bJUC|TsP?I{poZhX4FkYNp{}|tiP8q@DozXklPtNZc
zVpsTWSNPqSA3DkGI@1Wf=N#SY?D}F(X!9i%msVU>ae0NC_<LI^<L{}nk_5%ov%nt8
zd}8beF1k8Cld;dA=Fyy*^NToW6#m#e`LH|aSLDDu>1!Z9$rZ`}Iet1pKeSPH#$*H1
zXF~rZV$U#Ly_`%BXXMsh)kYB$-v^fV3#NPSQOumt&1TnU<)K$l&(F?25cNRbRk-6D
zaW667!#m9IGiG={3cVPcX;d)bTM_P$UjWi$JlXsKxhulYL*|0H%$}Enm1cD12qgc~
zhe+}lkTkjP)6vm;bNj}PIQpuLinUupGf?9VsL>36V1{3Eg+E#p-b?fY3YW#lFE9*w
zvAM!={}xn%q3hb48#*u+|J6jJVnZ$lXLw@lpTC=EETRI@c^8&2f`4)r2)+|K;z7s{
zBX&S35!~W`(Vvga@RxXGJaq}3xd`^0kZ0i$GrSMChToZKY$P+bVgu<L-f=Vhlo=j~
zfA1jbdndpK<F{oOR6E1ZIm7=&|6SpCXb!zOb&}EDA3S(n{_*cF1+R^sWrQw_eE_J-
z@s~GHe+2adtTeHx^fAU-AI0{-+;xVx5&uqe0il7|Nzmm}LsQIX{?7hth#QQfEByLs
zA-LOx8Nrklqa7FTiT(=t2J}qo85^Er(g-f#FabCKStxwaKOr&Vr-K~09edlB1ly7e
zF9EWy=v6e{mp^EY_m)q@cvm?39OY}bhNh#!pP@odP8Z0}Mm=_PIi!4lqVdh;j{CQ&
z!@d75Ww>AY?L?!J3Pk5M%w)rzcV^OXAM+h;e!(8#j2T;V5|}<Z?_diFIi|@3h_8n8
zVd-a54&wO=_PjZ2_6&qhH>0lHQHUUO&#UINr%XrX2&#VIE!jU=wL;kVG}Mbitw1<-
z80w1kojl3l%I6bEE8OZxc^;$oQY;4V$Q*sb?0F${pffz)q83S+nr|~I^yBN15imSg
zn1{~<6_|dO^V3}OUJ1!HVpAAapP`bTmmS?D;BD0M<(rHVVy*CFz~FTAvEcEg{f_&m
zbnPD(nren0H@h|!0$%n~XcE~6Psb=u4MuYGyoYIEMh`sr)@oy&b8h!zj_#Kjq_TLr
z8Lljzy1lX(;udALcNb5m8AZRN`A6*!8RhGmrr~yx06B)MjV>4}-Rl5|Ju_(%@ZD0?
z^JcKBqVpfc)qe)EA%9iGRXo)h!BZ96x*0GBEc0a8RSZFsS8PVFE}r@b#Vlc$LDqv=
zV;vVAV93J8a7lUh`=OK3EW^dzp2x9MpJP*zJ$e=%HKV~|kYHDA7laLHux1O9(vnXk
za4q>l`V|EI-#0ixkENM}*>;@c{(`Q3<3dM(x@Olvp;`7~=m4VQ_DkYUIL{Ei&~TNI
zC*Zfzmz|FgMT~mGSQ0(!)YuVsfpgbv?ghwP^VuMr!O%N@%jwIxKL4k0EhRBC8BIp;
zGxonn8OHU;KJYf6F9*q>`V)SN$rbO#P4U~Z_o%&X`8_BFA7nAAFwBR_(Eznw@d&v+
z$@+xIu=ke?9dh@bqfcG?UZT!&qkQeWS*Rtc{*?YK<`pFMhkCQ)P1YO2`(gZ)c)tMe
zS+>FP{Uz?EAj<7g;I}*jeqg*QuhZB}z^|LAk??rczGeED&&K#`q>^{X9z?Lf48wFL
z)%HD_6bH~yRyo7FoL$=stIGCt9QFw52%^o^h?KZ;_PWYOI%ZhFMgaM=>e^QjI@Fc3
z&sFwOXtJ|wz7foY5`YM2x_5I!FPs-La9Yqdq)TibwGI9mPewX3o7-lv3BMm-kCZ|Q
zg3D2LtQZ1?`&-)}=1;fi5A)k`ztOdKoa5Gh02B85*|-9~k^MgK_Sxox7)<)iH%}-q
zCw^px3*`OztQ+y)<S~8E?EA@^E&0gh3mIG_CY|BYjYNsWd@#HA7j{f?hX3Upcr$PK
zo4u~+iV;^Bc5Qsz2AZVi&LNr-xUA@UGZ(t?HD}HiXZYXFfwziH<}w;J5L-Er_@}@M
zKD5{zcq`A`02pIYGg6rkddHk)TLY&k;GC_l@GzqXGaM>LpLgZFVMZ2FS+i`jKW0Wy
z)#xUmH^<TQJ#@VEWhk)xA@Egjjv4*-d%Ji%4Gp|fXh!C}iEA@@9uxk>X3lmn1}Y^R
z>da#YcP%yi&v6bF2H<5Rn>(hnp*rRy4At`>AD7KnPFL+Kp`Xq07xVx%68<3e4#HL7
zxQa)}f+4Q^uH*h(jA3Yr;5ld6haG2gH|+rM02t0Ji?yF(!fI#4JiCJP9{9B5aBFNI
zHAe@`i7@EEbGs{H4sair`3RQ32shV8p1txaVfJdQiZBB|epAS^<EcF%&yHg9EOsT3
zCcW^J{^3cb{lidK!{cbuFbRiHL-;LWGg&HWw6wpfXVlR>gZLpj?+0iL_Cf45azN46
zw?tdViMCv2`$C8Cwhj_)^?X2tiuSy)XB0}gX9vEv_YC4MvB~zHe~Dk__C+rcBzDnr
zq*}H?1l;;b$f#UbGc8dZz#HBadgyv8?979>3nLQna&U3yLtG2_T#<99nvsP?+qwvl
z0~__92@UixA&}5PFgD|{PtH5Q;DwFhS&O>g41GIQ10_AjbP#)V10FS_f2T^x!#oo9
zZ>koO{&9G3*Qld?_L|Wg=%_%|Mv6H_xzSQ<I{vZ4%y|YvDB#}+>KZ#=FWc|v>B6t@
z4klfuoXA-AuipX7QjD%*0~G_|W1CN8*3(EaEAV?NKta2T4M_H71%Ay7Jk1OIk_u21
zk28G_ZiXMj;P&y`zy20(Q&6#D18L!y2lp$&gD7wlFK{hAKNGj8?Zvn&{lBjFrkc@Z
z#f31yiVI=~E<%Z}EmJLdC*=3uib#2Z314cQ;pn*yd?hq0sNsARwHi#SL<&sD#@FWh
zk99)2LI$1L`Pu~5;hjUy8899;$73kze2~hIfR4_M^ovxZxdE4>6m>QkjDESo1FmTw
zm3QwB9s?E)9)r&O`DJ3|ZL#}tA1oCAITZu@9u1&5@Ii5SPuDy1&92eHj$_Fo8F)Ky
z_*6XBAs)My%Yk7}^t=&B$cMPVe8UXCNCLC#?OfCGmygVxS0TILD$IBG3<f8;ruCOT
zYwp=SJfZa2%5eXl-6YsXXLn3P*==SHcz*JzIq+H@lyX^clFvV}YT6E0PCu}7PJ6Yg
z?90$GT<nfOu9(v{^NYe7T;#kkycF*HOD~ZmB#wG9crqr-#eKMrlmP9(3FM8E#oB%^
zm?e_`mqgJ}sdy%Ih&cm}=jI|a`~o6JT<_uGC3qNdwGn(h@-&1c3kS{Nz`F=oYe~?#
zClEVygO-NyKNbBGy9Z`A%tn$aZ#uf~KpAJWvbYc$*>OFyD_4I5$dN|8?kaZk^3jKx
z&SLry6O81XhgGt*s_ZS`JGwQth@OE7UbfZ$N>%u+s-E|Q70&48Ipm3b7`p+1Xf%=!
zvu<<y3=Rtb#)1VX@h)jR;iIwmz-946vEtwg4<$pP{}cKYD?jF<@IPHJp+T!BOz1>%
zu;H8LzXO+^*~xb^`3E~bfCU*dd@0P-t}hBI9;P|vax8ow9^^yRH>YFKtIp^mbgi;@
zuB#^om4^plc_0E%>c6<+Q7VMrwf?0Qk20mR=Cc97{PQaw1s@o&I@f@O{UsHTVif2y
zXFe{cRXhsIhFxBR%Yy+oKBoX=KYX;k6<tq{uMF20pW=An!J@xRgWvYRy~pF*grF>s
zUGUlHXn1Qy*@qR5#UEB8JhlLUvaODVv5N4A=!B!|G1Oi050XEXYxh;`d=vQPZD7qi
z7H>9l(G##*Rrmwvo>yJdMi48<Q*5d_9gCkda|T>FubFH1Q9_U7mVW1&FZi#`MVp<D
z%J<BiC(Czlo#5!{Mk~V5K!?q;O=fsmaY@Gl40Xr6wU-t<V$LroGCg#e8SUFa-31W~
zA0!gP_7aX><m%#b6G5RbAo6IUuc31AQDLPmDt!}^A`&bvi9OAxMP+gMy3WpVMHJ%d
zxf>mdJzoIsqFVcMT#@GD@`~$w*5(#5!`>Nvn106obrA6j?m44>?*$;aeJ~-lel2gk
zVjFuLNWUCxHMzJW!N-V0f{&)&KrFX&gV8y9Es%H<*j9y~jP1I7BEI8^M-Wx+-cDV?
z1JhvC?g%;gv&G?UU2o5K!fNUIvaq9xBv-tf3c@cFy+gmLgWJJBvE36$-##6C>faQK
z-xoJAIapCM;zQ8&90-#a96jfN7G}?gqvv+~pt>RFW_)DkRunQXd)x4dUE4+mJ{Y%=
z<Y4*QkIbE~Q*eqz;Sh<!0a~&MZxhQ0yJAJ?4k(l8w=0@=vNPvD)biXV(b+|=X}g_0
zTfh$gC9;<+iB6vBDjRUz5CzVzX#0o>SF_0MdJW;(*UGrahtwZ1PdI19+4ar@=fFE>
zIwwBuoCs_1nvv4SsNO<utw)TA33qgd$s*y1?iQFE$rvH6;hgq79;phST@l_<v1SMm
z=!5}B&v&TRvbPaNg6#x*2~W-Cwi_P&(m9P9;+)DJnYh;pmKkxCeG)v<jGhYt_mSg<
zsi<SagFkKkCyYE-I8d;=ve=PR3GO^4IDJvLwZNSAnXBvxM|YIQDmw4@W0}ysg0%53
zK-72`li!@S%T@NYqx&8FuGoGK7=0YLVvh?fT<*k5g%tj$KoVVcj}-pxsEP~!)qlib
zC(1c`{zjE@jp692rHdJxUE!5QRpBpz<8+1|(GCXHTw;jW1Bho<U||C`yxH}}-r#|>
zmcznoRhLdQoMo>BSI17pFL=H_vg_A;HhS&owSI&h^Z)Y_V&7Ph7~Nr(y%2z3oc}CA
z%BBXsiQo5&yJaspt~-Yi!7MQIb1(}~+HnIV2ATuIxhPrLRc&;xDmQ`?D%Zw}Cy)ud
zwu`1q?5OxeT8v^r{wHnMq6%T}xn{)89JA2mV#hz;94&7euEZtrZpS3EYpcT?ct3CW
z6w~qWzoSbK4g0{{`2kcq6U95<8GYKUd)k~9H>1rzg`vJL$LxA#)Euph`4)AL28r~)
zzK(iYJ{=Y^j0Z9&ip;t}XW74<js^e5{d@jY4eozuv@o_dbO6k-?tUTW45J_MssYO_
znD0EnRXk!&9B@J~k5p^}Q;mZ!H1T&Zzk&y#3Kv|G;OnH$NMD&&l$$v@yN+TkbOg+{
zM@;l>*a^ORoC6Ob;DrR74dCB7@b_R8#q@(Xh&3J!`4^pcD=%QsQ+0TXn~qEP%?^HZ
zDcy|VW^5LRyE%Lr!EYh>Q5?RX!`B}TII}12MGLby&`E`Vio#Xlk=VOOvBr<!w<2`w
zF5DVklR)`6s)X#1xEE6o@Ov;}V&@-@7~XqFu%{3DCZob-_<DrqIm>Mz|1+V_s~3g0
zJ1KtrZ!?T|-{&NqJ6sefhcQ8h)H9t3pp`EKQy<Gsh!Vui;nxMRh`S5?;R=5otBOJw
zb<O+qH#9a7FuP)xyqXBKRzWIm4VCgC<MCBQ)st<E_&u=elR+sV*BjO;A=fKzvO=z(
zOl7^i8BT%{VgJLCEZ9imU$e)aW-GCXihsduu|2qtUOT!!z>nCoZ?Sn)J~zWM>|7;0
zE<>u&bgvTAJi8mBG`tn2n4>#FHHS|rc1D|vM;!MrLI?Mb3m(A;@g=PqOo;t0Jkc1=
zkKOqr=!EE;dG}OfjpuA5cojL4AI7#KTn!OmI?j0k8UtZ?P_JV2aFBZ4uNkRPRcz!x
z${NPn&7n(qEZdBnRm_IxbwcZ0q&m35F^$6Dnfs4m#G6LMI~an78MwdVz>^7B1R!Vm
z?_rt1aX(}vrXEB8z}SD+Dns%-xBwOwi~sTpz9b7dvFLF)D&X;7ikDG1OIbyLwAf<~
zd{|r&hKV*b-`Tadu;T((c)L09PTuep&geA-vGGtNFk>LkX}Au>bP%!wW`vnABc{Dz
z!X#+o3a2dB(en<CLu6q={5AZjh??ZJXknhNcSu$5LkNXxhu42UJ;NeB%$@4l7Q9^X
zu3*@~n?b~S^YD3!QVC)?A%6(7fO{VM4wuU3^WP>ZpU^7lc3ezuM`YpjVQ1;H;gLOi
zsCVf4?2a;^v&+ocb#8PrRPJke=0+bdA}0?BmYp@lKb2!g4xl*=F4QOSH<5Tlb`X!B
z&@kKx1mL~Q!e}JCmkthm4PT1)awWt@B0lziHa-pipBSHVOdUQx-#<)_&!xZle`I{#
zJCu#jEx;o&KF@Pt1pyP@<x7VqjSr;<@C6{p{SoK5B8ZlKIgXwv=7cNqP=QedHeu1L
z5Fnio6#-x%9v_Jm;#d4-V(#cw1r*Pk236ri2oX-h?(GwT->eMp?|OYgX@AoC0UU+R
zkn876HAlgHn?fI=xe5MuE8Yr|l<rp`2m<>evTQSqjU_OgT$=%@9%qKnXBS1dKrE17
zV6IdnCxu_YQLoOI7jtnjUS5>rVgkG9>MwAW?G62STNiZ)&76xSS%;QH@;)<j5n2g%
zO~oxtp|Yp^*I=RL%)ilfAmAK*n&#E$IR6ahPs663Yd!-r=KN9DnZYLtT#g0%O~=9k
zh)Qz?LWlcuI{xD<+un|tznMd!kqeie8T_ok59@U=?`vcVwelRMVea`r>*IC@g*l_u
z@EINtEiiL-SRsUi_;?2K#X?V9JjW&;!c=tpV8#N^9)i1fK$lmA_r^XrnB97c-wM!!
zi9qc$xY?Un-~BIw+f~sAhz0s`Dz3kQen3}aX!pcU%YnlZ`5EU1WJ)=@e~W=HeQfup
z946U<)0~lx0=AG$i@ficGxqoo!mqM-Ovi$!7#xx4oZ#8L&PZ@Os$?Y&bcNqAXCUaX
zi%QX*Lfbo78yIJn4f>h9ek^#3D*{Rm_-DIvD?qS;z(Qx)Km8YhP0NPb9Zm%N1@SHi
z@hY~Bx2yq?xX?d^5$Yx)R1k!U?U=%-bT^0JLGTd4`#8Lp!|x+_J0t8+>?izIh~GMj
zZY2o23OB9z4W(;+R{H*M0oPZfTf4rPc<qs&^rrqBF2MRl?4fHX8J)<wcn%9jx!wJt
zr<fGJ3<1DSkesilK19Ea8caRDjs~QlMPMvlb2=iaWiN!Tj0Gp7PYCKp7joaT3lAh#
z-OOm$=m~gy&&vpbiJXb!d$D$3HWF%K{-ppud>#_Y3SQxsOvdyyMOnBD8G-)=u6yJh
za1Q)jXShFBTgb?~6!vlK(*s~z_8!OpCA|0^Dt<1CV?~pS&q8sm7e$sL7i_@sz%yO@
z9qlI##0&AWY?HrY&3+aDUKZ`8D7e|PDbyHU!+-7w{a`sMtzrkmV;5CSwpk+B`<#Qd
zoXZOgQGvZgAT;|BzZE-(J-e4~?PJeU960t1e)n;H_e;9FA9tCp^LGwBoS5M=0J*KR
z5>F)5p)%i|a)zIWrHshB1OLaD4Ftb|<&5Bkv<{LFe{}fVu6?<|bI7`5muI=6bu1n?
z8ru_R@wf?v&ay4-r;;%}8heAmmKK^bUTmL(_~VFc#%E*_ls!@9sDdAAnaN@nOyJo5
zgt~AhsLs0Z@qw(@+X;9UfV>NLAC%x{5q}n|?BLO^+t5|MMEel^j7_50N3?4_enZGQ
zy6?gdXT(7(?&q2j#1Ebgp6HA$M%MS=`fv;DlemJE?_yUukDK8i)3d{v$>#xV7FIY4
zCpe}U&h3jSR%ST9*I(h7G64@wHFLU1{NT=1y0eYp0+r05km}_y-`Xb=l0V07AUR&h
zQnqNue#a@u4_-MMU50_vNj<*=AjoeLp#KXNO^hDqC+j~#mqlD>nposuZ38RE6fC*R
z<#t!(D$|ZS?;#2(5G3kce>XKB$t#xf!i=!qgG$T{4W0%~%gdCS;lHt8J-b5(nA;h-
z@GwFFl4NG&`~q{vkBKDac8ngrnDZQ=<%IAgic1zk?G7KMtmkteE$whA9dQ;wPf@5M
zLiIpnIc{ho%P^c*Ea<z9Q4`df4Yyzyd2Pt5AR$}CshdPE#Xoe0KcxjuCsrwem!sQ7
zb3Homip^L<h@SYjtxHkmF~w{-K2e1;(gIGY!qluP01Jf*oZA;MHb4Z#8AeT!MFq|o
zi>AWKbac~H1YoLTDt#64uj%}&gn!NDU*-JE<X_eNYdQa_;a^Ss%g4V~v9F#^w7`F@
zwSM)AUtK>eG=e2qdQ{gJmqfZhy^R-G5($3F(kz;A?Ht|aIx|%0hvOTw@}G|9uGYKA
ziY)GWeZCo;vuG1cOsuYbN)FW&@E;4g!+U!-b{(X?q-D>)$zo56z|#_TrRJ~hfu}_B
z24)GInTe?LA&R9{Y@2|ZCWHspyhZ^$Gy3DjO*fxvmObO>&P5sS5a9+QXNWrOaCDCY
z)cG)aVHo5v@n1DCIiih)TpC^Y7S-cJ?(N%qg+6&F=5r#r1xdixqz>2DqqfBViTWU@
zT+^_G9iufQtQe_lNJVlD=`>8(QwR7e>X}$I#BxadCOpiSk#vVlE+cJ2#Fo~PniAfa
zvuAto(1bVUnzmP2TiUy4H$6d%OLMfvrGyVfCCX+`@gKseL{5}z8oWO+=a#Cn&x0p8
zBa@4f4ddt;0fJ7EjN?S$6{gERN6%hBW4}k(n3VPC!s(oRhrbE(<t!{gSRpv8YT8@Q
zviCz}&dAxwiA(UtjDDhkzVPw{D%3au)XGO$u+!2_OYsQO?J4<NF6GDP(jZ6YeL8`O
zz?+~09tooso`V>>hl2OfdC&8^chX%7M=*V{i39&gz#9RiEDJnGe`9|B++9kMAG2DC
z<zmbD-8KB~{ZzrvaF=9m8E#qr0QVoEE1#+ALFUzWATyj@`{p>h&cH9T>(QwNc*@c9
zOZ+W;xne^nzAHAOKgRIEjQ`E;i>ZXcmWe1N^#PVy3OlAc!!J7r-pCuCt;ri#^b+K6
zu=Pick1D#}p!|&&ojJ7IAO{wgbI&K{yiQb(1sVAI{YYfS&&o}v_X*0e#fpccfM=o3
z33UVO!j6e9gv<wq@`e$~sTiU4egqI$xZgI`KjVg)Kjs)t?*OYV;+jArFjx;UiKO(r
z37CuEUh<Q|2xA7}DhY?(6@EPSH=<7@cX$`Z?y2!?WV<JW%D>MAZ75v%AP06~1Rl8?
zz={o&4dn_y7TYkMjreW6Km$GXXB6PZz#X`m5YNn>f&VD%0p_`wqpqHh96dXUFf+E1
z5)`fkqSX)4&)7Tv#hi^Sg_5a+$ApE;p3T&Bq>CI3S}W*!2G{V#HiL#wz!@{)E=K4k
z_t>EGx=L`x0wy!CQiz{378lYbzNc<u4H{LEvz?L43n;b@_4R7(qe@yXVLvZ2Vb+#D
z%gx&5R*?HK3f1kzu8YU9>tY9T+s);ebk0aSZIwXk@YBeb2j#BKXXHE)^kL36C(L#E
z?B)u7^GTxG84%GJWQxMp^ShP&?qhWKOx%sR08fy!L9q{sqzI!TQ@dC&Iq8wo-Ga$k
z_V4gPPMKxF6Jj9V-e*fzNW?=B3<0lMjKQt^Ju`X@_--HZoyOmZz?EQc3QO6r_kihr
ziue($Gn2vI+tA#c(DhDQCxiR8o0iHbVz4{(&0(jL7MYNGYtxz~m95rx^VS~hL^lzr
z+yP$aHi=`=00+XM9(XNxLm`oM22|0uz}e0<-NiIX3@mdxBi+TlkKrC!7w=LnSd?*w
zZ)8Ac<i;(x;U)@gp+eB&!;_ufZLBL1S6-phyPJMjMe}AmBgf$X)w98Q`^{(GMN%kS
z`kl=0?L)QB$fdJkP<_A9d<bfnKJ&4rga<YhI>T4v7II4cXI4dBvkRQj;OwbP_6YyY
z89}(=>R0!g;j3RPG@pqf>j1f)$j%5?0*G|0vYq~yok%yB@f_Miw4%TnE(tDnuGvzJ
zUUKaZlS@i);>sh@`;A8eFCI^g#c?f35#Pr`d1M#vbcPW^nYU>iW8Oa%fCGoES-o`}
znfS05kl#5-R^ye3Fhk0+bu<c~PGq~LUWep@ca1S+%aVG+#_JUNe8$Xq$;|l~4T|aA
z!6aAo7{oLtGZ)}6Gg^Tun9HU<fKNE4=3Ihlz9}{fXq%A(hF=Ae$8schl5D!eLb3=*
z;-<lGK8~+P?!-+lxG0ZiG)J9GtUuM%AJ!EXL+@H$fb`p6(I9TbA4Ieui5_BOcr9Sy
zMSSW(N(4)t2$r53smxivBzpDK9M`nx&4~#3Z*xsUK6UBHxsH3U&RHHj>D(y35;GVB
zo{O^2nG>-RvdJ~=B{Mu_#B|(ydgv`PQc-YjPItjtG{OAS({tER<WNWUp!bQ6oQ95|
zr-$<b29cow1-PI&l6ooqg+%;@_9vv57Y{L==;-~wSzcfvZnM3-h~8JsIhqpvX<>nk
zN-}~FUS|{dU)15<7`t4@^<>zWqIa8|U9W%c=pM$U)B8M|y3wl-Ev$-8h8{%V5ar9B
z?l{;Pbq!z{_bTM8Jngui%u`pi{PW820OZi<K<Vi2f!yFMjMSk;(dy3!-W@l(sSB*T
z=cUroUPoc&XN8VJ*Jnj==e`_qOqu+p*|j|nsg$8N5l)}%zk=2xyWalXobjbIdh5PU
zRBlH9@fm&7dYGedG3q#Py<^IPUdNOr>v7$OYdD7s`f%Nk>p@%(;d&6)L%1Gs6gqH;
zwy<paZYSG}heUwTeXg=WSNOT$u}<&PysNIsNYz~S353O=ACcrO+vfjJ-VEm<-xi5h
zkin4d$bX`!ZRn)oD&+9OqOp_s6%hHbX!bcN-E*HMhd<(Scq8kY;~a>CXQdq8#DO!g
zf%0`JhX+lB;J%R%Kv-{jQ58>TU>!@jrohbEUsA%~Vbk-#*Z;DncLnlLxIF>?WKQ53
zC(qfXdC9|EY+j!A7}64GUY@=j;Tj}&1!tcZy%ZPLd?tmL(0Ad@u4y}+2TUDtI_^C>
zcqo4+6gw}PcY16-1`Plwa_KNJ&!!H$!u=vffbAS#Ff?m{?IV|1^E3AIFD54DU&6md
zP+%2O>L^PID^gEVnoW%v4i=jb_3*oi3%R&?M2Z`V*d61;Ob@?Hj&!tfxfxl!T+`R#
zt*&XG23cPHv}eo%stoGL<l@lV@mA#3BPVEz2m;n19kQ?r=^!8v4gUL~Im64VqCd<&
z_-u5YYetQ7RB@u<1E<vf4|W1?T^~bT6*rlY^Uy<AvCoXSeUNre$6pZ9T2{Q=70sJM
zYlH(sh!9ZTLo~01ew3UW&6`{LKII}@|39de5`N~oK%R0-o|(}bPbTZyT(gyZmF?(w
zkJ6>FEe&?zbyLu1GxySBR0q%bgILQ|M7|fvEA2TD_e5?C<n#xzoHJqMxppYAOZ#+&
zYLk*POf#oxDw&5lZAg9vN|e?X0e<LZlPj`#6}Hm>gX{lS;>~(m75)OKDg5S)c27^j
zf1u+%lf`RUjv-K=YR(vTAuz`~@TV)$0nnr83M8z_%pSsi;TgdGN887MJt3Xjw#p3S
zKX7%1W8ebF35)~bPgvjZBbZ07+{Hfb`ceSJS=R5k;U<8WMDxy|!7-6QH=?;D$U6du
zL8N@a`@qyC_BDZCO^zFUkl4tbV53N8A94G1XS5vp9XWlAkp0&678L!Z#USW1>}i2r
zz!hu_n$Owfik?Np#P}iYYxp3r0PGKE&c~EXHr?#Hx_GX^V_=dck@cC@M6BGwd_OV^
z2ZnMV<R;>PS@u+*it4K|*IX~nL^E=|WhGvVX-#N@kAWuHi)Q#o>~7>w+sIrrBi-Ai
zxk&LBVihbgos!EQsOsr=g%=e#x^G55Fy&bHi1g*b!(HKLO8a{;KekfP4Qa{9;SV9z
z<?7<1Tz@^*elUSx&_<VSoeu2;IwAcC(s-LIYL3JAQ?BUp9DKh@p$Dh|nI9ZI2E2c9
z%o#oARD6%%8-c-#_u?Dyqp*vq$dv<?HSl@pUobR_tJx$)-OJ4!?0Pt_2CVF3EO8D5
zFB=@J`0@!5&70~Xt~iYDbp4Plr84|<M)KgUy2j^-yg8UmCPEw&<IKQ82KR+RCULgt
zE`!Nw&1e3gZSl|>@q<v5X%2WRE+mcs&%)RY7SX}vV<Lveh7VZ$0%prnpSbx6%22vD
ziJ-Z<_7YNS@c_{nPQymd5s$;P*f5ot&anuYkl4bd@auL`h@2##9HPs+68uVABBG}#
z{5p~ItC_Qhh&SCVL(W~`e2P%vS+E;(-N`F4BR5#=*2&n7?kCvoCc$po1-oq*>;{qy
zJ9?A(jhGEQC;4qu<u`bBI=^lIKjk-c<E!u+Vk*RMPFNWkS&l*i_AK{ny2Dv+8-7|W
z=Yq~l!)UHBa`=8oyr$}V#&7mdf!~%U_>H!_5xbe;S6tzpvHK8`pzEoO?VMrQ?ZbQ|
z`S8UT;nCQuO{8up`DIlienu8e^QX*IIN@`c3a3LX#)6+Bq!*pHjKk0V4Dd4n=NA0;
z_|4P#%_h3}EqZ7^gXhQAAXvvT&bx4n_CNEuLoXF%p(jY4{jra*6F=tq`2V8rO~9in
zviISH&=58|icv(6pizkm0uoJxNCF9TO9yag+!c(Ds6j{o1tpL~Xxc_}T=3_<jX2{z
zF1R6KNy5Hwf)e%{!oCI&`v1PC?$+H&NRavF`FI|3Z`ZA=Ter@B>eMND&(4GL{)%I$
z5zDR)9+dJGs{D;3DDMO0gSMlsRgZp9kMfL1`YPQ|cxtInrawaG|4_j?9iBE7J#|{v
ziFqwtQQt%jfI{PndO1qg38H?E8X~`QqBK|3tx=V(sBVZsj3ef!Xcu8?-vW4bkqls|
zD}novSE)~o$H!m`U<*`J(b_698lg-9RImo*@!g_jVhO1alwIf;n}&il-LY3^2%#Pg
z!ycXKF4}>e<vlAiv4!EvCbCT&-equT?bERcS_qW5^JgQLQFb7@$Nt$LTrc+(yV7d%
zGo|m{dW#Y4RtfD``4*iSz?rq4$bG|NRrqtKU5pQAaFMoZ9d>aS%*~0LIALtPxQM)?
z5jjDX_5kO#uTTkYltx1@aQpizAKGS;MdVtK_kb+b&SzjcqE%bWb&oyCn~WA)(&84R
z<!zA-ItVsEG(Te0@yZdf({Dit=8pOmH@W;Hqg{>xcrk4kwJmuzdamdJZ#Sx=qY;eL
zTrP2%8(hoxfDg)^f#q8R3ta;jy4tRVoO%tyi*cI2K}H@yPuh|_fV&sD0Wjuh?38r3
z-JP4kg_u`nq{mgJ#m&ZA!|eP@4k~{Z79BPNPK<8T?no6fGIy8)4Uz&)BD4*=pz}Pv
zM=-bLe!=LcNl`7pMrF6_KCKn--NJ^D{RCKDSx8OM|4_hPxYJo!iiwbT0EqQML?AE0
zZ`g_TFzOQ5kqxf4iy5Z14yz3aK=|Dic^uxCVdtk!9Ph$hA%6nlH<a)@?>CGInk~3R
z4%Ybmi5Fp5YRSDxHW0{L%+~};J&f)ig%<WT^rt%Ft7iXV)8wj4@q$hePtp;)kv(lU
zxe``l2<qPB%-H3QT$ow7)mhj{J_3%d!iCf{Vw{CtPy)Ue3)ksB=eVx;1&^aUa*c@j
z-uUV9?s6qy)A?=C<Z|&L_rStN+*>*x*+V^=y8XY3f427q`e$FD!xiqDNVj)9Z_|p$
z$Ke7pJ<5!o&iuA$543?Qn+bpVV~c?U<J+MInkL}_#C3b+4xuBdEJ&W`a`v8wLjH0G
z3QAloh4{pp<A50aZ-IAk1Q;=yY>Qwf+i<U-#9Ng>iOBj0LVcCw;ZOM!z|cOZz%1VY
z1VX_dd!*v?<c5t=u>zW1%3M|W9t)`qRNRHaZ&cw|EIgZqy-@g$DxAr}b5KbC?S~30
z3)G`IeAFC|wC7d%{fFg^CAdt_i!}IrwtCo4HTs;5E<z*hDZ~6}H{q#mKMah<1>*8O
z2{dqiSE?52C`_<Em<&xkJ$XjXrD+qNi^m?5ZV@>xiE`?R%exvOo!*)7aA?P~5J>6k
zSAXnAaACrV7Z^tr3imA_8tXfK;Yx_&CNd*}KhBKB54bOJO1LzZ;JabH7AUXPe~H0e
z*)eI}-ue>X(7B*~V~OtsA%Qqke|tb-&5wM|99JNtg<8()&8m?Zae{SkqAEfeTyF30
z?y+-9@k>>@n3d2ff0Qc%A;G?$16*TA#+--(D~2vDLE0~1W@#A4;lk(Cgl=Y;50U8p
zo3IYj9~eHm6sp@JK&ip^qt^ZIuFtIjN;#kV9j|haEuMfMl3s#YP7EHe?^?PDh@a{I
zCwHWfFrL$QsW4zk>x}5wO#kQHoZ|Ms9RVPyaX(tBbHV8>i^w_K?Rzc;cjo9Xpm7h2
zj%ynF=#46dDp%zPxFbJrfqqa|F6uLe$6B8tfx<hqzyA>kq!nYAfAGgpExCB7pyki|
zfO)@RqVy%b)jNeu>Z_*<Nl#nlDRD*nK|NiIVmN8ikK(D9P?AJYZ{ney|8jj!;=ND0
z#^#HKCBX5eUJM%^i&YOOe{a70_7%)`vE29keEx>(m#*f!Bz7V-+U+~bRe~2`me3HZ
zIk$t|)^mG%!G0>LKxT+uaIg1@mem@!_bm0b$YAA9Y=c1AALH2&9+e$C01uxGpkf^6
z;B56BqxqHe-mC9e0d^t#r?KDC`JKaYGj^PnMM79P3w~io*jMpdE;pe875yn!ViKT-
z?HLWZ<xeTQsegpl9IKq#?k^ifcpdBcJO7GC5fdCMaf`MWP0Nj#=z3~Gvts`C<>krP
z9KYgMr-0@{RoMoiy^Z#r0-X+PhxRp&@Ht+<bJ!%HOvh+8#C^1}zLSNUrZEw`oTV-?
zm`Nmch}NRNXP&h*w19NRUQrA&LVfft{OG&QkKV8Yzzg;-0e)~?aSH08y*>qni_iqB
z*Wy=mBhnZ}&N;XqePIqN;BnC!93cX=UfrLA+@O7o>dx;{*5Ic$IH6I5FJ&%oXMBNL
zzC_;R=}#<bhVNvH+rHzFC!t8a;C{Sd2Q*Q<U?N>QzQj>@#tcD4J32bQcRV9~l&1R}
z*|<Y&c(XxLl5#gLNY3T*wSEwHG^=7bYhHw``%`{YLr!9kB2Dg{rTfdVH#>=p1Nm%_
z`~viSL|b+>lMMYFzeK)$5R-d=Y!6F{jg8tClu7}##!cGo{|N+qJ)7c99_RNxV<r?}
z7g9vTgb(B2V>Mq-C?W;%Q>CNCiLYQ1eGO3eZshYAheAwU%DMPh?S%mt7$~5bz>z9d
zv-ZFP)J*d>kcoF_$1g)X;LoaEPGbe|k1t0%|2HyJZWiWHwU@rem@pu3&!!Xg@$`(r
z9fvfb=h<*%$cS8CjBe~~e!Z@L&9c+8<<A6JBXWVM<_;m<Q8XfTqQA?9=;T)+-t3ws
zM1f|14rYJ~7RtbI{fT!%{vnnp;ZRf5fvSh}FdMZ5Bx^wrP0gm2i)D{O5t@3u&~~Kh
zg=}iJIfQvYoBC#(*=%E0J_gIls$8ln_b@BJrd}{I=ml$o+Pus*`Lnv4Z3Y@I7{Cvi
z6;%0QR`y*5l)C47vq9&e25$#7xK>7VPrO;#fy$VzMUU#UB`+xmYEw_OdCF|Fbdmna
zQh9Jt<&{@qM6givEAq_x@1Z{4@qa<@NC|58Hk<kH>0-8dT(!A2sLgpnZ5~x^?ljvZ
z2aW3h%t#9qDX1*J;1URmF^R{n0P<TE&w(`pWRd9-|6^mprX$)zGQ)kMy;vQsA3w1L
zvb!J|RLUFZoYvZ5t$q&jO|3(c57qCzMm+3oWMJHHg#1E@lUHCt4W#gJa^nb)RSt>@
zXyu~lR#gOXl(?r-)k;%EeNogkrkT|0po)5<sIT7eLREx_iNt^E4Wm_2TMY0j4saLu
z1^%HuFsyMz#d!=ufD0q_+sKN>>OK<psS`9TE8H_{vgFSJxA_n$`c&LG^4uT;VaR$O
zcyf}d45B*~^oTG?zvBW#al5^}<B+=$n>VTXVK!%KAI#u>L1zKA4B6w)SOzRRQJnY_
z2V$JR!p?)yqW$eb;trK=IBPmG`A@X=N>9}~53T8%L_FQxoZ+-D;*@^w9}?YZo%la*
za78_fbR5TqpCz#^#c>ER9jali-@6_;P#kVwcLZ)mBiJo2=0@MGaV-!>2J336_ELAC
z1{hQeck-Iy*GsRSD|6!G`bKq$%k+*w<@7i#b|b12!@}6;n<?7n(C*}L^Jga++zPY4
ze}||hA0L2p+XhAkyIYIeLV<4No6^4dl3H2)cqqAQ`kZI?#6)z9*XDf|2p|;nQV;Nt
zeE&E622+IMctks??y+FuD22{c!6Z5t;w^X{XCPjP{N(-#Kntn~ks8zcTX&htLi!4U
zc|_jc{TO|UeZMHuCVodU9oa%_dpqH<L0ek{yWd-)J!w1`sUBecF6Ue&f$;7a6@s!J
z(Y4y@FF0gC(CuqrA_vqY(FS&WD&L3gpm$>$(f?5th!%zW*7!i&^8>fTnvxH^9+o!B
z_<`yrLHUB@>)K_~aezJg4Eek@K3b1<jbZp}96kyk)Focq3l|wftN=V>oi~aJ-sA_&
z!v-wmJKEtxZ1_G6@QYyRdjb0~^X+j;)8Sk2E=Xf)w*%Vd&-HIsUs)d?H35mdn2zgB
zq5!`giLY{!G&de$9RozE8$SiK$W^mUd4_IRGqeuI9`&adD6F_vYa7<Cr`hhIpmtD)
z*lzE?p?CwwxX}ydbS!FT6draKG{sFCmeX*NBq=}eF3s6vaeDGTWCK{_EoCU%HhgVH
z@{x4!QI^4`MkQyDeVyj!pRDg3_YIz=dH1Gy_oZQfcBi@N$%|Adb!ZcL1KI)o{G$(5
zVwZOEhM~|Cat?q<4-L=G#EOVN7q~GhChf?ov?HrCl22whd!NJr1N4US4+gOM$TIIz
z?gURj?9=cWCO^VX;J$nD=R8J4>A7h8p<vAUu6&SwehL21{4;k=Mj`fn6d@eVck4nG
zl6>xbrsKgr4d%}RVyV=Jq51pZ)+vw15DfS;zidNSNkf=@P>l}rz5%x-8~h%zX7k-%
ztP$Nez~AS%3VK?;&Eqe)jz@-Y56F@cPfcrr))Uq;_fljDRupeg&pgRXhu`A?$4n^U
z!#(g<Pgv_-zA-bhC^LEKh;+4|hDBwWk)`Nqc)!eqdHE#`QYRdBFW(_A!>w!iHYwPI
z)y&4oIO6tR)5q^=9ET8VM6&o(qQE&|I6X_FYZL~8kh)Co679har2CYu%88M~r=#WR
zQmr>08UA*x;S2jl#8JuDF+-zLCmb_kJVXLJdmcedM60yvt=QPvYwr)darRpDWvdbO
zCM?M>IxZQi5KlN=W}yhfg<5+arvR#%C!>EvTHfgRco?dX3k%_zpf*={4`de$enx>r
z%xRtPCVRr}kYNMBEoM(t)=($y*w3(eX2MU^)m^lwF<gA<@$qD|{*;MqJHxvQ{u0St
zaa5ZQ=a{*Xu=@OslF*c9NSZKF+V*}|72g4!SH1&WHoi4y)6pKo@=5xy=g)?8BR`71
zj5q$o?wIIbd5_vzcX1pTB%V`&jR4nl`L6W!8IWIeO!qenBLe6#N#}A_{8z=p#|`B}
zZI><A(zsOb8yqdIjw)n9eVJ~1y$e*m7^zoj)x%IknKAm?$I0;xZXxxKSoMhd_WtIo
zdT~+@;eX&05qxj`kb3W_dhMj%A*&vor$PNarRsH*dIzm~4MOVOr0OL}y#rP~EVBpo
zm!RsUO1=G7z37m7k*XfF9?beat6rm!dTXDMdFe0p_PUpE7cJ2?BMcJ$Mk*o4nLj0W
z7tt;qa<<>&Yzfm3%0sl)_D=q)7L`5~RDyuyYJ0{-Kqqy(<8Npt)}j$09V8Ed+rB#n
z_-{0Mk771U$!z@JbR?OjduNsxv{e2D>Zcz6cork!Rn}y*{eenvdcxea=>dS+y?nQO
z`8NNJ`|$UNV{pLWN#xSBpA?igXZmkykQrG_-762xK)c;8ipYWbBx4X<z?+faNc{B^
zmM-7w8i))DjKahE=9=^gGO*;Fm=?nva1vbv-aN~byl&|C?m!VjlA9p#0PigBeLpi5
zK|jS^r2Ts>dl2*qJJa-mBWfegr9O<RNQ+LpWc^;Gyf0OKWI}n+H@eJkOel{mQe+44
z!XRmoriKsJyVVngRXBvztM%2MdEG9H$Vpeq1Nyb`1*|S16DH?rLecVVu7Sl^51xY*
zDR9=2y>AEaM-r5)#YrB3(&7#<B7f*-$ajP+LiqCj_;RI?n2?^_AL<<z=j&g}$%f}<
zD(d{rW+795531sw!Ya&ena3uyZ-Sot<T(_C+<x$w+~bD_Xa=$>I|~@^s4b+<rza$J
z7PLW;uQ|6@QkQV~{+5bh^~e!Tn2xkLTt$ZO)>&`}uk!eY4aggHKX>D*tvK2XP!7}l
zeUGInAKCJaon}BTJ15br=JDqf2{XO(+}JSWYu;%_r^D3e{oOiaNu;yHkhfi(_j6a)
zihxTZPz;@8^oH_@{Ho&mNs(#!#fm4VPhe^qPvlHDH@_BbZ-Dro>(v-?&WG#6bAFO5
zMKYG8z#hHgRrqCmja&g}CPmCx(kzZ8$&tg_;J&{86ner{2osKSjV)5?N{U9b!o)~X
z$yjOt7zTWIfER<YDQBPFEOK@(E|QZ%IrfgPrjN^t|MK-tBD06K!xSprATwTng#!X1
zAcDA3QmDMZlH{ae7vbqJ{b_RL(97XoH`Aw7j%b0T0#UAMhxC6g?|2Ri8Rz?BFuOL%
zV7dhl<_Q@L@WC99v41SajwBx6G$IiL_J|}&J`+hEB$7=1#*QTYOeD#`oyYf7DwaZZ
zB-#C$m=Y5E=PbAs&og}^2BhVU0hYkY1g=P7{|NS=0ZopkiRf}K--I9gex!8>uo7jK
zIVY84wlfuZXZoK%pGbk_QUKBIyCWL7WAGz@y(JXSmRKYjgX}wtz@It^_RLC8UOswb
z`FrTc9l0;9I5HA%Ic_j4cjOGY1uB7W4n&4FfO;Fut@@ql(1P7^>sg4#@|-o&hDl}r
z2*spTEYUIPNBt=<DLDj_5~{$YWv-I3(K=cw1y$b|dC5kB<WO8}X!Rks=!m}<kzgVC
z<dzrP@u|BEOX1Tc`cvRjhhThaQ#C%#1!8evf>y7V$zZnPVjIl)GMGF}t+^kuCVbK+
znD|6XNE`6A9iJXSkzzXc;w~7U-u|~8pKieOs_>}=)|dd?$`E{laI^5K9@|!fPsqDm
zB|gPeflrL`JS9F&0b@!9KEab0j8D&>Rf+aCl=8nAic<M3(NXGQ{V7nYUkFNdKNU)q
zjEym{t7L2o<JL5>!7_gQLtxuzLlA7cE)U^f<2mFIlv_2K!?vT`0+#43>`VPAP_A1r
z%C)Z<<$!q{G%g&lAYiN-<RDyZ2>9wU3{vEMZTQ(E&zbo5FQ9?8<_kOiy@n!%e=6sM
zou6Iwi5>qQuLl37Ai@~<cO)49@`CV>tLD|=UyEw+uSFI3N4|J!{6iATRK?O7Re^#S
ziVX!DzYvOo5iHSBaLXk16eu_<1O*459txI>jWe*ZWNbU@wxe~MWZc@(waI!x*E?~s
zVZ!rR2C&OFM7veu`$F*bGWiNSzP4hCj;|5=Q{d}>V0?Ag1z&--{4!klRk-kraN*Yo
zF1y#s7fiv$_676cny;IDo`T$Ix)RqALQ{)|)CDZ)27+JbAU<E=^A6kv^V7aaunt$B
zE}!S&c~$(h6v>)^&xeEYc}5UEPh#6@_-R}<_#9USJ~KbVsqq>7G!Og~Zly4O%26q0
zF9Yga%JZSfd<{#0%$MWo`TA2va<3*M4pR7><7crfT<HrE)m8J=x*&DQ*i>KyUIll-
zSeJF%-?|-O-QI8AK4RStv2I5iw{~LrV~wC%`?W!+cHcrFmQZ@Z6x!$|9ZKloiJV3|
zz3gNO=ml<F#}fT1@&}B2S*FpZraH%ul_IFBKUk{j3lSJ4mnmL|#tkkp>e5186y3a_
z=;j4QH!qNGP?M8|3nvN}P7*GhAY3>(xNu@{;iTZg2@!f*yILlp9p=JDZ+9XHrEc`r
zF3+SlZtc~kKq^@D_A!bSy}gCIV0!!GeLKB9gXdMz+W_p^2E82$rnfR;wy6)Nv28W<
z)~*_QYgYxm9jz6;L4M>Z`O%~bl7j=rMso9@-38lMGgzXN+=u#8lH7Q_JCx*xvF!i1
zBv&#v56GsI@&xO4vUNMfx_#HWood~FY28k<Zf97xrPl2d>voNGTV~yA*6m*Fw$iw@
zvxuKo2z4}G8AKi3=7!5tz&nILtb#@r%pVr8MCT7*>QBiZ>I=++_`^PfKZKdGhUo#J
z8G|9?(n4M0)TN!ebX1olbxBnhm%8*<mjUW>zq&l4E<@C1q`Ks(%LH|qtS(d3<z01|
zsxDuu%QSVFp)RHBvP50hs7sl;XzH?8T`F-=@_)I^_!L}h{AeEb*wl?5O&MkKBZ&@;
z{>aXc?!$~Le#Fe6!ThN2+jf4Gi04)DqtXiDM+bxXkt8fP`4K`etK~->tKml-tKdgp
zp`TOBe=wm`-KK&ACrk%GVndrkn_LjekgjHluFy8upF*MS5zLH6bk-RW-0pBqsr5h{
zs<STw02CUR&E~1O2zxMADq%(qTPjeUiHi-Y2j>{}`uIu=&qhCV0lf+hE|`9vXNj(l
z4$+@NAH@)Dv@_xw>Z1^&rey8348lA}o_n8P&GD+UH&S!=2<9zbB4fNLc#QoqMp@fo
zdUv&kd=h<`?njSrs&WhM61Py>r~1Q(91!Q>LqHvEGNc=GoHL5>R$^Tf>Yq0X);$8R
z&<XJhm1t|;AlA)6RY@kwV<<9x55X<;jLj{yN4p6bq|$Kezya~8V%dm4_D!o4Iar3)
zSlgVmQ}C`(T#5eY9lUabKm9_tC83T=_h-=){EPLl5?v4ep?i`La1H-ZGCh`o8OeJ`
zZz_LF(C?^n4|(FN@DFW-e`pHi;T*w$1Y|vjr)YaKY#vKy1Oz-q=}vfxo_AKZ0Vdnr
zd|!_>V2ngd;^v!*5|0k!=hag@8VaM>fH8iKxJZ`2xNJY89s_WgI7^l<&6DpU7Yj7|
z<6^_OakIcjuwQ392NHsBmziTRT#l_+qK_p)e~PhOV2uSeM>PJCj^6U?Fc3N!CoGZ|
zug1mp;zoS2K`(W{r=`O!e7cMHl=`lPPsAdHPpM(}G~x9ie4^L7DtwCEuJCDpFh2bm
zgio7lz^Bwx;8SWiJ~6|LJMxtH<o4H}0W3NTSd;|sH?hdmkXY2PvU@NVJvJs3iylOY
zfknO4(;Bhp>4>Va=%)n&%*G3YfY}YfAVK^D_=F*a;M1ySjZqo+w16c#K7FY_trnlQ
z2ciX^lGG5!<6^_79}r1&I`}j;$HJ$+#HYP)S@@J<;?p%@_>}Zo5I&t#4L*H^q)5V&
z41Ah)G6<D^s{xf<f=X(=!{kS<aQxwpf?D7YWZ(=QDpIuFSp^D>2}YrWXAEE&jHnGt
z3>1n|PisV>>rO^j$%sbI6BH`J#Rk8l({1o0Kf-uI@aNUh#*hsBd7dSP{L`OSi$5Qq
zQ20|hSKi+{`2A1W-(LrQls44DA1YDWmylb4o5(jJl;TI+wc_#F<y+;;LHKh?HTd%+
zQoIv?_8R!3<Qo@Zv4n~jQNC@f0g3vbf*<t{$0z2rsvSP%fhVN`p<=2)s6a46t$Esj
zo`Fz{P+}m|*Xn7F2(|5a)d+QYsQ`31E;fXkiii(Gz7U@<oDh8KWsb<er*16K@hMJ!
zS}i`^f#FcTO)ZhvAHl`;`UJe*<VUyI6(oIq76_8t9_wZiLv+17r;U~a8D;nga$=~D
zBY4`Eh6UBi$l>*vac1#d<-fip%+}6txqhocjOflo`tDx=K@S(vuzF=6j?4e19&Y4G
zUhW+KCaOpjfGmuL#~h78%#kZ$gFI!>5pEE5<mSG<<(`E3LmyVL5K?SJ9zChTj$F$t
z<fUfVk!$&8x^b}2aBj}!xEq`NpNZe5;ZP%dVDydt^d@oGGm(iPyaujFWKFvU-`v8s
zX8N$=_?yV_ZLvwk+_B?NajU0m_;fVC8yh75jYqciOItjE5kimPTbvFwrmV03s;+(g
zgSxiyct4Eq2qJXnZz<!}v;3`<)SKP@-nc^5Dk+M{i^${l&#@(<EBjxLmPKE`W95zg
zBc{*8s{=6^-W_SkAD!vl=1$lR1@~NzXE*kaVvV?%&D_&?wg~=phen?h$AtGMqtlSG
z8tRC%_aE-Sd^gUza0h1boC{|`Has|;0`C0d*t);p`P^_B;EYyC_U37eoG_V4_Kxy`
zdr|K4pNlgI7K0k2c+~Yw9Cclqj(E3v@&-AYMvnDD-nU{dBlDxM#s3E+Lsa8Ihr6=U
zo!O4o;E=p&7S9`J_R_30=boi3iM?2V)H*>P>Glqzb`Rlwhtv7yqgl>j0b~wdaT4uT
z>_rDfS<d4n>;#Xpf6H=aZ9wJC?;>XJ7;XY+y6#Whtiw$O9%XMt{cWtj4?j>P>m;7;
zVg3EMIn4Te-5%E8jUTg6cLVyZSc4y1`C|uu?8MEwby?1bZU3KLljZHjajmPsgI(7D
zXSFOud*&C{#~~ny4lMf9S$G+yB{O-JvtSa>d-RWpb|)iRb|2<%5#s_9j(STmhk+SW
zqjIiC(ujs>2`5kkVTp$WixN%(<N+MVgA6@?Om-(9!+|9u>U}vMv4_zU>cP%R%bSDD
zS;e_X)p)dpbKI49MSA3M@7ny04gFtfz=njA{!jMudLrTI*!f&eOE_-uGxRs1l-;2#
ze{YOy=7cr!vqvO;Bf;*>RQ0+EJy-$3%P82ZYDA^x6_=&v%_++(D$AQyrUygT_aq+~
zI<@@6vw?2QfNpW{b;C-^^sdbWS}e=-Zpds~;w+ejG7Gt~oMWDkj}w$U8C?Z#;nXu^
z!7g)Q-)hkw2ezy_e=Ku2$8AOo7~twCE*9*H-7VKWOR}7M5UiQK7Vriv6@Dzm%`(D!
z1#SdC_-MI?daYIaJ%Bk8CTlYu>?Dq$1F5?1cie0tj_}cXw0R6^@`mnjgFl<v>_^Ai
z0sg_mWtt41c#Q6b9goIX>d<fYr90;ETlSP;Eae!>PV{+*W5K{=2<x`tW*5i8M_W0T
z^%%whj%72)vW{a}$FUsXL*V(7hn@Jd%_$@){R3-BO|5sSm(^*2bHfLCB4~iyYdt`Y
zGW*gAb?z{`{UFtb?ZFAMPT-ZRIS5RdOwKxx?U2=+rIolLWFFgu2|CDG+QL~{VS^YR
z0vDb<z**YtBuQgJ9zMW{Ybe~S0pEt=eOVdKJ<9+cL}v{G9I|8ooGs7-#$|)UWRa<y
z<Ty-hTa6O3nF>By#qlPtQEdnSj@{%)=pdXU4cW!9<0GoXw*Bld_BCq|L>Yd%KqP1o
zf--!xl5N@#Q*C%}OyGu~2?SRgvVj99?h{ZopgbPh(M-p#*f!R<KuJQ16+x3j*o2Vc
z@mVsV#7KS%M<L&W4iqVZt1I;9qu*;9*D-!v?2oELLa3I5*yDIr+DLs2WQYtJl4r;a
z%*VPpxS7w1DaB0@2Y|k9(3wfhnt^ip^H?R&=m;@uyI>Z6K72%BmLOERbI>Xb&2f#q
zazh#LZNn18mtS@u-8ts=n4jXnu0;_WgK%cbUtur5Qp!VooOIzJDTZYQN<3{#pk(1Z
zscSmAiZ(|Ht$$vG$>cvX{)&7FyAOt}Nykg)^TjP^a>5zho(<-f?Ks9Z&J`R;_Hh=h
zWkI@g1xB6?n#gucQ#JNxd7MvZAd6uqqESrAxdL;Oy__wZUBXHz5n<0}?Qhci!PI2~
zcG)TYRX=-BZI8xAABqSdjg_G+8-g5!oi3D}<{VCT2PDR<5sNA6D^9TPUe=|A&f3k&
zE8j)o@Pqg><c~x6yY4r3eL(slP^60r!7C2_v19lHi6MQEc2pO5ALRrD2HppXS17lG
zb!V_H9%e0J-DU6M=kVF60pvj4nS2|BgUFf+R>_9aF!Tr+&vr7N0MR4Bk(9`A_(%BD
z${;MVBnr_OScEsrm*Q1f#b~ocUVIRDyD=3qCq&PRIh+$##~3ju<Uc!PO8&&pAxBsj
z{{aCEzLGDW$yV$UIGe7&N#>t#;)kg>ZNVKy;5zV8A%u$A98;-!lQWCVW|NF*0q*9q
zCT@sRYAW!Y=%XlO9={0`B`6rX`d2}@dH5Z>PTi9q8&reRL@8Z&jMF&}Z{&R9FD4Y<
zv^t2D58cbDF5^_s2&P1k>680OiJOEHF`$Q!@ih(cUdJWs4V9d|0A4dFz^_}wuj4$B
zX4E`D*eXUu^+9Hwm>)b3zXi_&>8y1Rz5rxIT4;T#{sV}T-8x$T0X)fWU4d;2AU}d9
zyV=|P0V3Kczuwh<?K5VP>P!XaU8b`XpS_RYzLDQvWPbY^qIodxl*;&sJF5sMb|GIv
zO&?f*>Sg$o4ftgdm`kO45CRas6#{5NvbtB7@*%$d$$gymjrfzD_~<YEz;%oG7QRZR
zmpoNX?@rwP&OzCxR|W;R2s8w%Nnn^{5c+y_F=R6vn3yJn%Qxe5s>FsR8lfySPaMwT
zXRndo*!hr6MsG4FqyWqb8VLFEeOZ5?3`U3W88i?A<iJIineq}iQ|0R^8z?hOWOS~B
z2zVIi|KvX4;<{$9{mdx>qj3%@63Cy&Im>WvIEt5VD57L|hB6|f%;ooS-xRqz<x^QY
ztFOSH6|p_o34X2Tuh#L>Wi~**A~9OsgT1au+4+;yT7l6M5Gx#mSs<Wa(d->{4~$>Y
z`UBnrWI7C-Sp)$X3z(lt5&Uw&$>ppmoB_|WD7qm+QBx3xv%8QZBmRlbEYrqX^Eor5
zZ#$C}NLxIXHk$sLC4`EOLx2jANP&^kKsYvRGrWkP(Rm|&;k#{gGM98R7w=VnWIO_(
z>X1hs7IBQiWbsK^i_u!q2>KRVQrNN3$U@c>%mG?v0nYlQ@#9Z&DxkW`Ovo5y_Q<kD
zBFy9W1tYy;<%0Cb_10?2SF^65H2HiM7#q!8V5p$<Z>;+}=K`Zp)-FXbhx;49ojM_P
zU_M?E+xti2z=})z$@R&-B;GO-Z~M=F;1jAq)lOlRmWb$&L5x7505IlZAnucU$%C7{
zqk2cdp1Ht%B9LN4<iMmuKo^Lx1$QEeNbx$2qNaR2@r|4V)QOt>SWYQyK9WQ~>3>Da
z9daE~#xI|R#}zu?b3P^_P2h^AB0Ci;=R|F1M>Zy7%(rj^{hh{q?_gb`oz4;P_LR2j
z?TL-#S|w+Y5zXYVXqVQkSISU>5K9g9AX;w=AF6zm43!GBVqU})g|0g}KkZ+D7Y`3D
znlxG2a+A`e3zn-cEJ~9u67N=B=%0-#k`Zpfi?X&68#y|d)_AY!dl9>uMy=SaTlnFC
zqD`T;R3VP6di(-MGI14}7}$i#Veu1B9|oKS)z&$>e9up%EjNgGvtcgSX(q?IVJYrz
zi-7MY6$#N~Q$T(R_Abvtg1pS+<IaNl(7ZkHEh4ER&KYp|pN7)}=Pw>LojFS_XHE}1
zf*0pxU~$4~Xz76xCRjYO!KROUl9vr_lKSm@<9sY+yvm;$ojRcd^BV_BayQVaGxSc=
z|I7FBV6kP>FpP+yblf!XB(Ju9^@87~2M!BfVk?5@rUz16E*V<ytN9c9Mlney{Zlw>
zZRi*7qMgY3SRf8dapdKf8Xihd!bap>5Ajf5bFDbvr^__p95XAGhRPunC=0STcrYc-
z#N*bNzas+jP$FA5GPa{&e@txSfh;<Q$M_^4<2rS01WS^y=l+=3Ef1=v$;XCXfm|r?
zea=+-V~&kz=}Hj4s^LY22MzhJy)y$FCFbC+FU6VuFI;TS^z|P$vPHY-=M6Ca07tjF
zy-4ARGY3w%^Z#t(%-?|tRxtuNT>_gH4{KK=MN^u;-?8+Bjb?0$zMF01k7^xd2SVmh
zg&TzQog}ZLn@qnEF%WC*B<bV3bt~d%JFT<ZXNVWTev+J)k$t>5TO-T`mB!e%#tkK9
z$Y*UV@Agx>k;1o>9Oaf0q)GnbpeM38Eq``{^a&feAIx7L2dLy+*Ft_G=eVa{rI}!i
z8;WlHR%YxiT&Ca2j~AytQ<(ho1I(JlyF1%C8hzI!0tZkP<2b64-c0WoQH=AaihExW
zS=_rN{$GgVR`BlO5(lR>p2Xv}9Y4IJ_f<?~UEI54WJ&KXxWjD{nk3_TJ+57G?S^X#
zt~cP?9sg7DKMntT;D1m2Psg(i^yE^_J?N=FT-zwU6|PmsXJ0V8MehM>BcCn57*gKQ
z8H%(%dD*#er61AameT8gcYG>#J`%h`5Y>;<kw=1pQo3Lgv^_lK4x9V9ZFUe+H%1_V
z4Z#^%QV?IpEB>P+Cco%{wwRk;-(YSM9NVISwf^=Sa4B|Z_#08;*o2ErmV?D8U(14G
z$8vR9$ifoGY+Nu=xSNhUnLfu)>e08n;8}^|3*5;(IzGi+WQpUSe1w0+jt^K+?0Aa>
z#g5n1uNT!XpZZm(evMJTo>G@#>hid{JghDcsLNfrptIX}DRK1WMc&nik4qdGxCj6o
zJ@^QTauPmg{+pB}{491PvzEs{h#94~YmaT`oVWhv2|bk#re4z~AVR?3{;PQA!@(X)
z$%ivvkg?ecT+oggJ~nP^q|)!O%{Od=%Km20HpE<eUD)fAxcPG;a5Fa5+k9qklV`T+
zYqj}UwP^>W*4sQ~Z{soBTx7M$R&DOK+GN<<v_Km`|L7%5cEcfVZ(t7b0klEM{Z`Y)
z_NKp&H$LlotIboY%~-3=Dx9q>yWu$%pP-F;&v>g%g=!P9+PrUXlWVrQ)oRlSG7)1t
z3m1JVp0Kw`GuyPa+T5Vp^s?GqZ*SAgY_r>8&cR)(%}}dN1ACjb&lz*@lhx*F)n=mA
zW*HKmm~-%v*~V8>o44(4o<tipZ+C^Y8Ed`gQG1&d^F3EuZG38MuUl=dwYO<xwmDwk
zoP!;z%|WY8J$sv#g~r(CTWt=jHub@lb@(jWY@37k&_<EO8&;b*R0d41vfBKgz0Kq1
zdj?x=-cW5mv)W|Y+a#N9lB_oKRGT$co6hz&4zrEJYE!1#?6lgP++_Qnr3J<uEQ>Pd
zZG&pF#cDIp-sUZ{%{x|`2q@f`g9f<h^Y*H}&EL&7k6CS&u8}rJt@k`=Z<A=YNw(UY
zuikTo^_~v)Hj!qV23DIp|0VBv!fJDDqiqfr<{NXcyq-A+KdJZ3vf32e+q`bJdDm)l
z2~0W6K}THlIe5w5<{`7q<5ruGKan=qSZ(gLx9McIx!!8iQ@!UltIZYmHYdgz-_y`)
z^MGpenAPTRnQac{nr&7@nsYEywOMGjnQ3qH3fd@I_`qs24K57K!6IDrIhbT`^MLuD
zVOE=);VVI#hj7u`+-Yyq-h59_tIbcU&0MR^W%f2l#~O3c+-lPX9v-~sYFza9>_<`*
zlfTVj8-KIutyTUsf3x)w=2X-h3600!EXiv6y}ju~^L1ZZZ7xx-yTWRdk2ZKS^`J_`
z;6olKWFJDw9?g9KS`gHvI0~CjQ4Lq-^Bl`^r8yO=7Uw^yT72J`AJYoCk~V0gtOhe!
ztQx$+27KE|t>y3NF+b%&NXv+vXl?#|%+OR~>18Li%JoR9+7pct8ibTp0VWFEf-<OS
z@BbTWnu}-YF?mI?wonyMVR0uGBT-QtAAF-8JkJL$@Ide8EflBudLg@6vrXx_5#Gpr
zMCUrR=T@OO@7X&eRK~$nEfE{p^HOdOAdyv0!(vBk+(fzu_Kio1rAR1s&V0#sHLzJD
zJn#&3G{7aD=T3YBMakuh{o`c62-m=w`IS!Rb3d>MIh~4jV_-N&>S$c@D(AS_C`C?d
zEbBP)B~84X;XKn>xP{euW>g`L_+a!gl4Fi%cd}_E&O5KsOsAJy8VYBOY*JKtuGjH6
zo_dQ4=RVcA#BmSqsdd6kqw;I2`}a`a_Y3cdU5AUDUWmNBsH**da6~-KR!<`_W7GX;
ztG%bn?@;CKSUw5m)~UjIs=~FZLL4i+fC|jH{*Zcfp?Y)$A5E6Fa;k7|_4I`Bo7gM)
z^hG?CBNjDPcs&bWLZQlX5GySl)2U)fruZIsj^vq{?U0=_3Gr}7e1k1s2l+d2QbN44
z@?1F5!|%A^L_Zvxm_PDd#K$}i9TuPc4-{$7HDa!|#0~NoS-hMDJsCZ-4oS0|1<WIF
z+T)qN4(R9bpZy}xOTE!qZk*Mci8EO<eVNGMn$ZGb;c;$XF0@+ab7Sr>m&%xhOi04Q
z5(C1<cW5smsU>Q5WY(uPF5h_`B+^5^c6U)Z_A_E&M_C;KNsmgs42;z^!<Ag_%>RUQ
z9F>8?o;2rkjKy*dT<01*D+xpC=tf*`M;suWf&5!BnLJAPwm6&~d}~Ag^)y7m**Hhh
zpYj$yK<kgM2C^Q-J;JG+1&<MNd^ciyI<w&=(n$9Yjp@V#2h8<EPx2F$BOIRO4#UoZ
zy8>PX<hxruw4906>W}f&pNafnZw6*{y*obMSs;5haF`$`v~U)ZUX|~{Xu)azgp}3u
zhh+M=!|$KR_4#~+$A@P65a*tWtbCYJ=FekRK4xXEYEmc5Y~oCoO<Ec<hq79=Gx{;*
zC}y;URIHC&Ix?ffGTc$=09LX#ysJ5to_KRE>&IgnF&O;CK*DBIeQl02)A-T^1pibt
zz{umhq`J=dHtm&$jU&8k5SWH+aAxuqJUj<80k21AjBNS?CbnNL2%U%D%&&r-63C5s
zm^tVYkW!9+Vw?#WKCIM^tb_0vBt+@7OtPS8#}Ji;WRRCLDG=rk$2r~}t!>3g3(j`@
zlCzmN&p~~cIq?1yIf0`>vO#8;OgHk;=^Hd)%cKlDhT&)75KOcR9PJm8H?jqYXN91O
z<2yW*aMjuT?Qix6Dio*jG-4#E2JtbiqO;&*JnV$@KQS09OXEDgcKv*V68GX%t3>!p
zM%S;TSIUiKv=ZmW*txMKbo=lio&*x^FZUsjd)G%?L$h5-#1o?xPNG=C^y<gGGm0(K
z|6wa8J;Bid?zZbaZI8Q?*E6?b%&_Z$9<GFAu+bj6q(At5r^A8UBHRfZa+=VA1mSiJ
zzg>a(+zA2G^ABy+)s4W7nE!$mcA()^XoyOEe0L{egTyQx^t-vTk8gD1UY?p#GNTeO
zcO|Up6NL|QMJhKH{8K758Q!c1{zcA|!_}oD4?0R-o72~YBU1CrD%o@|Lb${y=Z}54
z5>x$jDpQMq7SEe7fiqLlmOsuZMl>OI2vsy!H%BWP>o=0G67$PMbX_s~!#YBEoCBD6
ze5o<+{NhwM&W#%X6ey7_V2YsL&F=yAm{d~x>P3+Q<M8HSL@i%kpWsg^cpJj>N<V~i
zF{WSnhi-Fuw^}jJV|PvgkX_zyhv29Lo=o8VWh4(xKt8o2Lg7H29*}u1e_<(3y>WR9
zr{PjkI0Ju)$t8ue^*dKdMk!0QQMV)g+AJ*xmo)F}{NwdT^ho!9#0J{^DC1tB-%-!y
zeY{lbC+93k*IG|J#L@E3aSUkXso8kh60|D+g1ugfn%E+QfepMTHzIxFkc+KBpz!v-
z$PJx@!H#3)7EdT-LzHPo-cS8M$7JO`=JEAG=FXmR#rZ7Yc@&F)+QRK!{$Kc6NcETQ
zm89!9(0VrfhN@~rMaPVS!?{<2dNC6in|?;J)e4ETtcE@`bor!1m+xEo?nka^jJYTV
zL{=RJan7wr;nlt~W|*g-rJ=k7?x(ll*hi0$Sn+9j_`y}f8Q=hz3x8tTJIs*U6qLD4
zyRI2$BLhlO`<>{~HTW&UFNtyuLIuucXf{sl%FOlPcm_luItwn~p4`HDIXp&VCQjrv
zBZZ;cJa-u${)$OLo+`Q_lUEOoF29@&ysM~R&UHe5p<BkzmO|CRR;diiPGCh-ce=Mr
zDO1`banGPq#D=%`9qNaa+0UF?&-wfX=ri5>?G(YN!Y_I87QTy1N#Rud#S4KzpXzt6
zlEN=_T+*()0dwdroXi$}$CGyn#M@e}P&mk=L!2s(cG+Vl#QPW!A6YZRIS$E939!3-
zugJRq_XXcuaKDyt?^Ffc?QH>kTXR2}12I@ZTly{UQx<+j&BrIxpeO5IeIcIY|3aVv
zXICOfavD8!0&izR;r%Fw*ID>GKJ}?S`loI}MbF>^N&Wos*QNO~&IP%Q9zp@!52p=A
z_w#wKi{ZJ0E#SDmF0PL+=enu{u=8uOk;FG}p%DK0qQ+p7IoDMygfC`&2;qOAB40`Z
zei8%W(F?{h0aF5OMr5?hXHN;RmL&+Wr^4`y{*+sO-$!fVnhv#6P>FNiP?UPOq@lC0
z1MWDUnAF3?_}PVZy>Csxi~y;Sr@de#S>JQ3qm>+c6u&&alvM{vONfTQ6Te-)LNc)%
z{dadnvah%D<UYM48497y<qu0g;`H&LimuVlLL{sw>h_NugEXOj!3ZpcbamnUG;VWq
z`MPQ+?pDcjxAM0?r2*cn{SW~1`I<e5dl8?PaKnZ#Ee3*?QXLsqZo-iiy8h%zeo*wW
zi-+OJXdKiz6Z%;3(DIT(QVBYZs6Za9`on$oi@Z|g_F<JJ<?a1=&rR~4*kiaC^^ZrU
zx_muifPi?@wRZK-`aW0ky?CE1`68SCS7PdaO_2b7yFyK5#MJsV<Cn;c8TxsqZw&Oo
z0FKLs6w+>(7mY;09-l}FmG8BWKb>fI_jS>Rm=(;Rlr2(HcC9N4hh5-+&plDxP#Y#=
z=ZTC3l8n3%G8=t)&=(|ax-MzmQT5|>L5t;{P!W4kkcf@ZC2K`P{YEJ<{*(p#FoSJ|
z(yEOEus*_1aY}m~|A*T?`J=CE2jt@3FQn>!flrYUubc2>%>N{|-uR?28D~2oP~>Zl
z{xu?x{21i)!RrvU??P#G;pkv*>%OQ@UAQA~ZV3PjRWw*Dbtb$r1Le*H|Ce$Rwulp;
zkd3&H;@?c8X7TU8abVT)?<_nF<=+pp0W%YC(Eg4Dmh6_Sn~9{6avCSjA>Z*U92#t%
z=#KcNcR7xBYB=rG;Lg4^Iu2GybPSA;=;HL~q=<>Pt)6geboVUZZ7Z|TF2#4-Qe2XO
zGo(=F+wLl&$T=<@IovRDTlZk%Tz_KX#?Gdy5v)zm{!~<&EuZPybop#WR-dZ!YgkVH
zUjC^ne_54pWjPJd@|RWlXjSfn+kZNDSXPX|s~=W{?O4cq6_28Dzzg!mek|OLDbgO3
zwSbgVRem$e|B3PntbC?is(N^xucK*OLH+tXRWpe-|HYckQ5dNTT`a`8(z+2GgT909
zTbxzE(<s4~aIQn*{kthDpQUlzowV#v`3@!8C&#$0Y>VuL=CL?fuo<4wU)SH#^1GvY
z8x+GWA`;;5IFy>0191n?;_w9DEx_<$Rq_7j@<EA>F-(&Cj26IGAC%5&EZWOi9jt!}
z>hW!4{iI$Zkd;zrM>-k5J%3^uOZ5C6v-PLU@4+0J!CUH`<3B;!R51DxxE}5qgS&CK
z(^k$Qqj46Tz_fdOUGZHpZvSHqVRvw!4(JYML(wj*HqW69aeH^W6Odrv(d!nh9CjVz
z8YTz(>_<ub`Q$0(AEO^7Pnj1~I}D#2)JdEuigU~~XJIPd*~edSHO&$Ck>5RSHFsMa
zr>fB$i5qq&R<~e|0Aio-?1#L>Jn0z@Fh}Wm>>E3<Jqf!2q@_J!tDMh6clOAMxztua
zk}5co&mCCE^SE%3@ouf%C1681`m{wwTh`_<pHW@05U=Tt*Pz2Zbcm#&;HycQ$vd6n
zB9tNUBA$jQ{M#x)VPVGVL&`O?6PM}V5`S+7iuG58@4?eEogoN<>0MRfA{PG0LSp)5
zs&G3Ce_|mq{cKfO$U=I`l)L9LE+6<)n!f}t_+uUnj@#gO4Y5zLZ4Dr5J-7mf(kR-b
z6nEOrSqjkm@dIAQbMb9>^gZAqZk<`U!SoF8p{qcp_?<d&FcglIiRegs<_N5YQRhaD
z=4lKqF*c^*J**8shnVdsK^2d0WQ?c=ZZ<90<t*SHTDpp$F1(5|<Wmlu%blG_5Sxlt
z5qYCqBt_(Q_ax`Eh|5WG`MQCm(p!LirZ+SV#j5s*MG_UKA4czKfpg>CfjODUC!G1q
z_+dUrHyKK541k%MnXHX?t^95IV*4Q!vxMp~EY1w3>Jc@pAbkbn>ybA)4g<{So17De
z0p95A7RR#*>N3DU^NI#NiqqqgBGoX5Aq%bJ^&N)cIa`MKGN!cMMB$ge1F&LrdY~dF
zb6vro(iqi&bI5jTI7rIxJ1kFj{ZksHe2hjX@hO5$YgD0&h5SCjrdg`+Mi$bysjz7c
zkOw)G0g>{jV5bLte-J<AF##8r1E0jl(F#mad$UO4lT#R~BPCTwoMic(avN%EE*@j)
zb9{n(Vu#1uEgtU@gxJnOAmGFFpSgU|nC}GGmW5;StgkOV0h65|-V&Vao)hW{*$r1n
zv2ukRQm&A%`zlw+uvqa;0No~^gB<5Tk#f3i=^u&K9(slhsRUaP!H_DQg-_vKU`X8*
zL)zeJTbbGR2)4|UAw>^^W~3O>r5Wfdrztz0!ydtCG80bboFgZHk+Yb^?!aQ5|D4pO
zBAIjrR8nv$jHaGG8V?kD4}4Hiz#i)WWo{09CR^b%>DIv&xyN*wXz+V<z)^T6C4{MS
z%4NdQK$Am9$>ljWnm&{I9W0m01KR<)Fqg?G^f^fneSBCCz^@$bm7`=193^X<<NGWA
zm@{6`FJZEdvpjvH5KIZk8ggJ?Z}bpcTK?t%L1DKaoNVtXa55ht18l$GL+SKgT&ACI
zVrmC;YvGEVAH(xD)L9!mUE(IC&+QxW5aUoJ6>;K7AU%xup(h07E+6*5-h!h<HZ@@G
zTjp1Sq2cjD<_(KRPP+!$QB*{hd!`jmyN@2qe>^8+Ir<UCLOfSg^Zpna=y3J$5Ff_i
zArA)}k4HxS=iu?(Ym9Fwj@hP>gKzDY3Wat#+GCIlp}JAnagfj)adb8Io~Cl3|0uNv
z3bj?9bV=1bp@ZF4D1-F`5BAWrGT6E5p^FcV!TyFv)|{&S&L_2W7+!QB!l%<(+kO+@
z-Ng4KY&uBP{fZ6UTOQ~~yILh+*1s3&kAwL~i2o}IJ{?4qfsvfot$#!w&Qohi`HiSK
zJg`_>;L<m?0$2Qwon3W?@+{^S)U)EoT!@atX*dFlkw+0ZPBah1=AyP6r)~y{J>I>Z
zNX?nQSLRk+S}w0R&`QMd#YQEC3v|~9+-OT!qIJQ6LzwuCiG$+d_@MKR#TdX~V*t;w
zTKl`qY3Xe^K5&|OsH<y%&DBMhl;QY*Kf(o$zWSe*)1z}n!Is?iRX9ED?cns#o4$#r
zF8?chh!3ai{SnC*Ffl*5bCn2R1!}>*sJIb>cxWb^tj_7;4~5OzINe(gevn-Bj0<LD
zB5V;!sO_BYrGf|7X~X=ohRLPRT!Be+E!8lOp*5TZR2-$2^Leg^=!1BuB%0GFJe_H-
zKs8@~0Fw55a0=cm9Mn|U6s6iC{G^(x1a7T?^V*g8XypqH2hhA^9YeU(qdosO6F`5{
z0R$BnPihY&7;1D+5>OpJvH+E;MRaZyQ9cqm)d{ky{8S+zvvsufLxSxWR?wc(=Op)P
zZTc0BBMi`+ZcYI%y=346UaZ_4&Ghcmaw5r;fa&?0Ko_Cpg3w;$yqpkv8zA%s1^Q3m
z@&`$Z<l?~1`=B4SzKz~k%SL=<s5=aG#l_o&?v;{RPz;b9Kjjh!m!&Uo`JRCX63*NG
zTFV|#8BF<|F>!RO$c%}PT@(q)kx>xHX$sHp9Q<FLT$Hm3{WkBN8<96nXcy-Z<TSz<
z0<qt1B(e90xxCa9xzU-QOce5pgVN<y(D2t9TmGIP*zzu6h;QzNqZd=#PAHs;-$9i9
z;&u8L(pb@MxdVs;8X1^3O+47n!pVX)E*OtRF0X$IyYr>R6^|8vw(~nbJFJGz0eN{n
zP^jX4jK){U=EZr^bH(*}zp!mYEpf=`={@#jkp6Z!yNRXRPe<XJ(=pbkW{hi<qL1Yt
ziyQGE;#Fw`I;VdnM1GiDU?aJ}zGr%QBFjQ4+~ff+?-JnwVKtluUjsZk58&DrK!`<T
zpo4ZVtP2Bfv|hdP^dliBq?33T9AT6T3l$S)gM^%g?eJLp5d{S^Fh#88EVvM5SpMKr
z1bQODSU{_SZ;!_6FY0kV0qpM(vyeXV{$LLN31X}TlmA6H557&o`B4+j^k8^=2ApNK
zEjR;nt7GPvb$Km-+~7R{GTRzJPSuvS2fLVw)yCSh3!}g9-X*9NySPzJm_9-5>sq&W
zww8}Y+EcI&I92K5&qTm-B7_9y=hdDj@#Qpyh7^eH_6H~rr~3E@MtZzup2)rG7-x@H
z2U@KGao|!rAll<aLUkN~1wahp!Y(>A-Xk=AzlHkFY#zRKCuO=$dWCq#1#Ya*VnsI%
zVa5j0vA^nx&X~X9@4$niw~8RWu~}{D&FG{PIx*?(h{q>0LM(S3j)tg3<il^kRm8>%
zk?jo=wfmH)1(6-#nMXmAxMBY0hRL%iXv>7i_J)$7v)~zYt`ixvN&;GZ_n+A53`Iwb
zbtY+ppxyr_$SOX=5VTB}r9Fh76toU$SyxzI9xt%uqyb_Nq*Xyww|3USDC<Gz-E2zT
zn%0AApd@)0DCy_7EQyQtpxVk=qxmyvZV=AdG9^0P3f4i~_PgYiG~@B^BfU6|MMoHd
ztJAXNq8}*!WM_f1N0BpqW|+)0&U*=6?{z`4;?Pv2lEHE>CbLy1nYWy)aeprEsR7ft
zn*AHaF<2g4$iJa?wzEs*>YtU*Eym>Mr`&X+JtT9ilFTRF>}ajFE17!YemFXKc082%
zL~7?Yd%O3<E|>S3c-6&Nddq30fAON^qTCob!Vn*jG2zhA^%YUjxOF+KiapMrn}uCe
zI`bO}7S@_i%x`Pw6OQiH@reUh>j?G=`NX&VOa$v|=-p#NR62-cqQR-T!+<v|96L?k
z5Q{EM-hdh2rd_i?lqo>}hSeUTe;>8<@5^h_zZF}k3}*{Gu|sDILxi~H?OT!N$k4xK
z?ZTh33GoYBvowP(aIsD6i=Si*QFVja`Ehob-IP)t%({25!z{Az8HCw~um5G3J%BC@
z-f$>Y%?p^q^=)dw6rh7|)b<`?mb5Y>+8`3hPySj5ZDkP_bzQn%>vu$VBV!#ItgRfs
z%FbASfvupM;WaVV#nmId!5Ifmu_h3VHBd|C(`X)#<~qV-^+^;u1_o`@y6&OMw?X8g
zGCd&=2f9L0zmeC#d?Y11CFaxL6=IvJm1rIFdGZHezI$32+E=^+v_J2LAhf>&KTiYg
z<67IHb?5au+VkUsR2tWncC>HY#{^kTtctyy8fu@tYC#RGij8Wk#u$)$6kQl-e=@~H
z`<|_8i}pvs*$l~0jVuh7Rzc^*RcDXV1=Rcs%O0J-oC1&@jWF37C_~-N30UM(W+;^T
z+=klFi{ZDLjDe-MW^|F~qDYk@VD=0w-OVnl&kfXX!;g-Q+ojli-0Xe1&yM}qDB0<X
z>;ijY)pN<4#_jgl!mBbB&EtMy>|`IZ95>7;fg~Hw4d`ZtW0Z7*v?k69ZhC*V1hr=L
z<=t#pyf{d>QAYB1XTczv%YMhDNv6WBTe~pfOPATKT?DNDC2Mz7OF>dGTl|i1uhWr~
zN^B64e$Y-wQn7X)f>H6`uy#-U1EV=3*6x4UlVR=dF|6H%JF2vH9dHi1bZfUf%-R*7
zmsq=D{;V)-H>`%UU@Dl6&UfAtzJr{NE4i*X<eOk?cZ@v!OqioT<<S?xi2TX4cIRYZ
z9jx#oJk}PVz-H~9jj}q(k4aoXHm%*#M9Pmxg5}3gm+5eRmwMAPy%e19H?3VSifgiV
z7go=d48y3!O9EuOwF`-|tCkYQ=w;c91hBGkv(z-1HtsIkxO233&OarOGHu*)?WWyg
z<9^-6Bsty29k5upaR<Z3g;x_cZYw*eYOT9EF&=CB-FQ*eHtuzo>M#(O<k586xN{I+
zkaJsF-Y>d|I~H(2P&_Q$3>dhDb+d0@olN^y8lARzdo%hl2@K|~;4RVm@7tpzG7`2+
zJj6206B5V&B@0)p&ce+_=Q?3YTp2Cg-CE19EDQG*)PRNCW)euL$F+t`Z;VncA3rJ6
z8L^P*-B|}Ba>oi36`4BnI|GUXf@x?3f|<o*#O)eda!(U}nFjoFM~1_0e)W_i?}Byv
z$0Qrv5@D0p&5QgFnj0i@jZ8<TcdJ;ox#ymeQpB<?*RHIHMv!P_n2@ClIUVusA@Z7%
zU3Q32nhm_zlGm3k5QCMG<KfmS>RyAoGKbmlAL~0s82`YWD~<*}%HZr~XQ^xKc{%dC
zwfen>iB~Q?htDQwIKS)C2B)Z}JHNZ?qMRoow0A+)&fXotzq=^pnPrTrP?3z#xd&PI
zn50c|2$oQqAAoq4%o?aMq=eEOnZ2^{gQ@(O<%dZ&nCTQ*N}WUI^u<k4a%pa~tb1-A
zn&<M0n-x)l>IP;sjbR>|yV>pRxgJv8LqnjsU}~*7$s7hun2bDdb#<I%7t+|uEWSrf
z`~p_Xf5BVkdj1S}%c9VU$xdL|{-L$q41Qd=&RKAbbA!aET#>5YB6j=J+f;86(}Ttp
zsVeM(H_*M#MkeD{JSBf<%RjYD1Fr6ajcoRUjU4Y}`JdaPT)Q5tI%FhqQmNJujFMak
zhT}P<Lv;|mvW*VGuL!|iutanSK5FxK(#P>v{GHEDv_lZf{|B^N&ZimjNBLMdpik5P
ziB1h}Gsm1YSj^EewO}?!-Ecom-$o5-joo5yR<~(>(<y`^#*E*w>Z&R%=DDr)8D^l7
z=7Wi&&+skg4Cg*u#q7dpsD<5prPn^Sr(rqom|zix+NYpnXk;1P3E0R864}UEA&Jyv
zIU^$KloV2><(!Y+bqZlPH;MC`Pe>kgyu(%zkfh8;B!jlT1IeQV4gzOk3zTaYVyh3y
zqlr|iHF<2omI*L89c~w0sgnl<VUStUw3QA#QI!^TCGfb6OjYUEAXTX!RHfcfmBt&Y
z68oxN<QnGh;PHf?G!j*bt?Ft*UyeQuO8Ci~AW*`|rnR7ijiiKczzmTx{?{~rueki9
zEz_Jmi_`p>_<5WDb7jJkFhM#iOXX`}3VJwTk3+1kZz=PYa~JN1-N4i?+^6T5i{jLY
z?PRmo;Xv%9*$js$NzFSEk$VXqzny^j&-*{n2MpA65ORWT!^qL}8*~Eg3Y4l-ZB)by
zw!+GuWIeDW?Y?`;NBnBWvsA4gED(1ymN~>jAqz{JXK@H~WO;YB+%tqT76Z{&e{&JA
zzb>bVC*go6^0?dkJ5#fxy3)@Z0o$-SN}*4!kuJN5Ae~qke#a;6@X;Zf-CGyv=q8%o
zeK0$8bc1H6(2bg1D5}+<*%{VM{X!d>)n2nRn$Ib)n1oV;H?ISC@W~8_CcX<ZBAPga
zXtiri4D0Any%1$U2!&`Bt4)*8z0M-YEo8N6w>Ie`OBa{juldRa<t(@wm2`8><xA2&
zy24P-W%+3vev)aZ<)^xWaVq!Sn#$3p%fi993Q@+Ef<EQCGX}<#{J#XoljAH#@sdo5
z4nk4Vmtdd$DVA(?CH=V#SWNI|D|5BXna@4-nGENxH7N|jyD*13|9$(?D&*#ih=G<#
zlqI}dU{9PGPxH~Sme2Au<mHcOZYq+<h_G8*vj(%_JtAbV95Z2%L0Q=9io`V_x+yaf
z4MSATMrrxm8>S_faM4wfL0PzuIS1YAWbk*~N`TC6ZT$O|E_pMGv6oE5h_fJ;zf|Y~
z$>8nF4Kk>tF0uhXNd^`8Q3u#f#cpD(@q%4$yM%BHiDy`gFlB+8M5MjZ#f01GMWi8U
zwOq@{vtU;Hh%{;17)=e~Xna3J<7P)KkR8h^<=X4(p(KXdkj8o{_s;5P;US2LMrgvb
z2yjFaGHlXxENiX|(zBsQ!E+pled`x-^;Fz|hhO0#<|(JU@i0$4#G^?UQw&iaM&uyM
z9x=YqiDa|jKCLBAucyvxo++qBH%0vJkJa19P`6clRZo35aahp35HADzpx>}>6MuGd
zH9BX(GzK<G#X#(~pCybDBiz}RBYcA+bQUBuU==Y$A)5oS<E7f1O#d_B5S6aTHO~CA
zh={dk4;kFCY_ixfcv;n+tj7_oEVw4>+0`(WRPX`Sv*|(VSzoAUy`Y{wXQ*dnJ=Lpc
zhTvQ93>mI8I&DwZ1oWYE3{hQ9YX4lp#aKiWt;4jq^8=z+hlF4>k`qA~3+ayhoh%_}
zsTSM<$Brb=A-AZ6(8MH%bBmA=4gKYRd)pG4dWc5H#HJD2SZqU4v5+DoXX2+JBkKS;
zA9gbXaynz5(<WRv{t%<X>@6UVHmW0GCf>)p4DXeSh^&(bOWp@!Z8e(k(mjvewO!GO
zak>E^k+t?^867PjWwDKi^_c<N_%yN>Yy%-0N3{nb6+kGnE;2Prj|z#)^d6$)7h#3>
z{UAE1PA3R<bLQ8p8=bVqaID$B1Mk9K_+V$@jZS^KMT|Cuxl{`=YTpY#1B7A(2qUJ<
zn4qtpwpcR6Xw^HT?QPZ0XruWsG`EY<liG_*sZk0S5!z6q6tn@2vL;cw^_0X>MHr#8
zpej*%QtS2xiG%3qED%wOT}+)p9E0Kv;-KoUeT1JRjz59Ibs&x>xMkNQj;XD}iQ~;?
zI<&Y&QM(r*db+dVX3JSHx(a83^Sc_10AtPrBS{=8SS_NW_Evz={#Ud&MCmR1gn%Zd
zoNRPzy|J7$(RU?X3VXvOPWAD@Dn3b<U-?Zf|L(x=YL;KOpsq*;yZq{aMoM$J>cSf2
zSIM__x;!2ERfug50JLuD{o1lR`Sn>7eUu`<vadYj@~i2Hzb?N@hnu9Ti;!&#|A+GH
z_1D7W*9`dQwW3xAiHiKH*ZQ>N*Zkvl5_Mb<PNGK}=`a)d^;5zbm0x$~2Fb7X^-foQ
zHAhDVt$wV}3}|&HHWSyz#yhIL4|WXs71g48`Ss4%b~-s7`E?J5V==_n&aXm#O+-Fg
zA!LzXo_1^?`+jN)II4yG+CS_s$*=#Qd9eIiiew1?bNMy;<uLj6!IL144=*r?L*&<1
z{5%c$^*Q$yoAOJG4JVHENHr_a68ZJ^WoJ}=bszfI<X7NHizfb|PY7sY4K@v*k|soc
zO<MGq<ku|Zpk+WU?XQ1-QM3H|2kM5&ui<E<<kw^8k}N4TV1M-svA-HhwZCG2%{eT9
z*5y~Ad3Ew@7qY|3C`EpK5r4+zmuJXdmtWE7BiR1B4BNr}XZF|H$zk%Vc{`|>k>?pC
zD)Orbex8Q?)r6awO%lyGC!9o?0asuq^6NOl)Xt#&_2v^n^6T;A(WhyD-GYt``L#V(
z%?xPus)MzlRk6P|fgMAB^*q~@UpcX2b(Q~Hq?XRAE8<L^eK-dR91mqCY^_k{*$h?D
zZk{Q>zw&g{-kULkn}|)Ad=*oLl=~2=Wd$fA<+9qc!I_Y9O&|YDQf}d6Ch1`AnEtf_
zTd2$#g9qb(Vx_(650i3B;z2^w&oM|yq+9?$PeaO;?h|m)g$e|!=L{#I!^jsa&=M&(
zt<4#ca`Ha7F#b09^rW28{sOc&DCaYMLO?nD_tt`PM9NJCcdXJ%Lw>@l*Vk5<`$yIz
zfh}f6_K&Q<y}5s6s)XZ63c0Exa%9aJ>E*(ol^^v29Qi3}^X>abuxEt_U9^v;GxRs)
zNV%(7E>ZwbCr^V!+={Ru{9;ZG{F;fs`u-4j%S2DTfDxIsr{Hh*2$#&5I6T<7vI`k!
z1rffaSKten9Exm_o)!h>2H%(PBV;!SR`;t@Qa#962|T#z?<VP%--Hf2t&^-2TXHV9
z?T?Z8BL*L}%t+piB})nX;jWl%_FXaI@j!Fy`2D+uBy@dXPm}8Of!~pfR_5al^sl`G
zyTe$-Z;Arbh2bo`4nM1OW7KkwOqWM&L{eUk>7fg%R{PXtat}UiV{EcT<s9r>10FTU
zw@K8&`-lsZ&2u$I1z|z+l)DA0j0)<B?)8<NB;3jxz&7od2^M2(i(*6teHaJEwiTJ!
zly7zoC-$<l6l0r@A9cXuJv#-8bmxR&@+C~+%aoZ9@&#L4T9|wx3QSB+$B)`$vdvwX
zpJiclfV6R1dmdBJi)nR%^8e88`UPU1Au9O}%*B5%iYp%7>+re&x5BUf(3*`Wyi{I+
zdK`d1<={ntS0a{Vj9tXvA!nhMg93`8Pw=A-5KF1B)7^;Z>getvB#9O1UC)<Y4%^0{
zyFI`n8{M_U^S?rOKM%6g-2!x8ch~Po^kE37rkIxr*xsA?M#J0)S%toFNA2X)MlAxl
zeT=Q;-X+4;>jSguFui}2n*;&PFJoeEFn4z%m9EUHC}m3}V(m;QWg`Zfv`}#xuJa@`
zH|&|9y-xjp#DsW%%k=IR4pxTA5Dtc*ttBDLN8Y9SMn7zKy11r@3~uyuMJ@?nTw-1}
zogkQV!{uF#9ZwL;+R=O&h2cfNQy#vew5gO;7lJy@Z)9+@6Ohc>0Q@9v)G;MWTg?Ps
z7&p5OX>cvboexv$%&619Jn)ww_bi$ln17Bw2|5PnCUWjFx7QZ=qo&I&n|f0q_JMc$
zMU;O4iab5my6JvvY{Ri^rq~x0#~-%UVoVZFnN42&m%=GC@h;0!8XrYVDP#lU(+73X
zN;qY=a}8FPb4J1`*WYL3ZzRsTZbW<o9T*ZSD4g=^@2FM(kHRS*%A=eL3a9k6044ez
zA>ot@P(WI&V>sn2TkLRK9I4AG5`9oO<u6E9D|9LTqA{>e&Tu$ot9uE*zZOop<{pa@
z9y2Ehl<>voT2O+7Q$7Sv3=N-&Wfs+LPokcmD%8G8Qv36fk_tJL#vqMsT7I{9Xlci3
z(jXT$O427LVPhirejqmM4e<|3bZi?6en|tHnXo4_65&aQJ<jywVgMok)Wu}WwKssi
z*<=Us?m&(T$oMq~!1~yjTT=TV#^kwAnA#)bMMAVk?22b6)kk6&i;q3-wpj1&>VvV4
z$oaAI5R{*I8t1<J!Vc>iL%aDdi*C8i^Q0E}AKgi~5>q8X8n6UH!Z3m}R;AP=NExM3
zWyY%E=w273gK;aHOiyb68Eshtw+gIMHvJ8b{B=mQq$~lFsMZP#M5I^dRjdOzx0cz#
z+2&+T;FJUhD`0WxBDAJm!?gqDh&wG%V%PBLfU66-FeGWIJ|&>gX85k!x$<WTJLXb!
zFbYX}I4fUNRrHAMm1$l*+H3DF<>R;nF8#f)iH{@QizKc)!a8PDRqJ0+J-;jH_c7X0
z^hJOa57rHK2^$1<N}+NL{4*yc1W)=GjkOK)cc0pgSVP(q!>2T7fCVuK#oDGlF`7Sz
z=7vyYB;0Q8!!Jl&2(r+C#YVtnNJJ!v%h;3;GyCeAh)V@bR<ZfUi|%#edKR~0X7AQ+
zf65Yvk{R2d^8D=}u4hk1>%=9SVSmL>5?38}!A|(yPFypO>BJQ#48KGUSP_OFkYNsO
zVgl=KlVPgeutDkjz%bd<sTt<kcUUk>*ES=-zkDJhp-yj?{_NW=dK;k666lTRah;Oh
zHZqFx7I0I{z9No5?Ch5j2pLX$ldGaPwX@(k<d>y4u{9`d5?kX0vCs|zW&53HOQJ4F
z%bU<Zmq@)pZSW2J=^VGGZnXBxI%+5kbv<55I<s$P*83)iv~Quz>zd4sW~Bbm&`5n~
z1J!L`mgKDE+C8_~)E5N0?y8+jO9EZnqODG3(_!Uvg0vLuGNVQw96M-47WQ!Ydq#y?
zcE*<TOYyK?=tE~go<&gHA8WoTYB)(sC4Raa58w$&y9Fe5C02lp4Lh<i_E8j&sA&4t
zL1JC=o6InEo+H&jLUhV2VY=ucQ}@{X9(j0M6@HHzAtJdj=ig#C2)b!+U{_T6my%?$
zmcWs23eb%quUeQ>lAEaAwYEA6N|Ss}X+J1fs$7fvv?gChN9=dT2$BQBf!J{`Rz*qG
z<y%zxBzNju%XGUsJ@sNARnikqaA|~!I0l1`^uDAczBCpM4uI5t1}8c#!Wy*3AguQf
zg$gkJWkOhYH>f3HRVT!B!m59BFk#g)r>PKD=}qB;wPos=CaeOBu(%)8B&>f8wPe-(
zim-O~17R&YV#q3rC#`v<BC9%jz}Bxa;L!3Q<dl6ok`9KCKsnmV`X(42Hu#&faH0@Z
zZFeK-ka)Vk4HA6}B+`2R9u16&5sWIIq9S%{<<1unU5ZW(^7=ua7!cRVm31MmjoRm6
zm&PE%gbPCcQ^VH3bb4C%I8SBUyjhXd+3Ro?4$0)OA1;|gRQ3GrXX2qgrTtK1R9TKH
z>wfv>bm^!SzEBCMJNMT_N9RVF0IXR+oq~>j?rVcy9d}~np>3#udJ{Xi{=X4W4`y3*
zqzkCeJfNeA2Ms#n)?aNteo{O|2&vV2Khw+04LB^@TQwcMM>_JuRMGW>ffgNcqfDr-
zP<=qJPDh<HZII}#P)IwmHacS6XT+xRH|k158$A{c<?53H8v1cr-Dv1h?A%pD2&W+I
z7Y)hZK>ESvOdNzF+ehCkQxtSDl)R#eI2UyTj0JvhxTc2#F|tux9e^?l6lK)K{u8v+
zb&*M1NPT+F?PHkdI;b2omf8)iW4puP%A;2#0^qxVfm=C$B~IHgm+t68(AGUp+h~72
z+MDJUQ&wq-+#&YgOS;X>Lb{d3uzHmzOWQk`v!Y_@?-6oJ>FEZ=wb*A+oJg*};U_7s
zj*{!*C3fiCy{k&-yicNQ0eeMP3GcV5go^5_Na9*UB)u`i-@5G(>aG>8f>t<6QC!_}
zrVK_;hU5zDRTGojSxaABGu0W%w-K<hPdfr8ssc7hPL?~2`?Evnt)0-@BDZ(7L2?~_
zS0r~SNbdIWCdnPcR!>kJ>suWL%3xO=U2ob|*%sNcKJ-F#U{Kyt8|Cp7lsZseuSLQp
zAyNw}!r&_Y&*ur)?thwq?P~%yH>V0czb0-f`uUdLc7S%mN@T5+4{19UZG*|J_-)1#
zT)KE2X5+s{b<D4+Q{6)r)jj=)MRk1@)h);X)xE#VpgQg`(l+BKsqRqSAoJ-012Q|x
ztA>m$%Y20Vv^vQRupm=amWxwD=Eh!j$aK|OE(fWNuY+(C7}ZBF2ANIKX98sQ`}{hP
z*<>(LMP|WNQByv_aC)1*)}puF`L&=oFB<5+hA?{j3>};SdRsNmfa<wg)le0BTMQdR
zhw5LYx14l4RO?1>NoX5PZ@0f8^mgBx{{g)<ekhFI9_#^n>x@)@%26-$HVi*cgWh=l
z3lv6h!l$=Y4H==gMA$rMh~9qdVTVjt?O%&ehu&UBF9yA}(`N$o_IPO>=q>J@TF{%m
z{o1<{DM&<xd^XeCe|_^RMQi6s#t`q0Kx{!CJsq<ux?n~-;wk*EIbF=BS?Vd`iJool
z8n{w+UNhq-5~MUIpES2!-!n^l33~x5=7h@E0ZP6i+K{nwZtp?l9GOhxj)xw6izPY9
zjQ%NZ+&Gx#Cvf73pnTVvIMAum?L*QOb)JYPc`qVUJkA~()}ADj*Jb4C-nY0c=J6h=
zgHYX9EDF;Uj>hk}atqLj=T3Z#-$84%7p&CX((ht$+B#SQio@9VR2Sk=rY~$yHTyCg
z)8>{@l|vC>4)SKWf=8wj!}LAXH{|`oRuQ?0h9B9bpEP20z<r}(eRM$|w8xc0U*`r|
zU)dV=2QmP2dSrcp(QC54@DP?;g8iV@`hvU0^@S_B>{m^4ub%-8T?T)J+sXO@!=d{*
zYbq}6bpkA3umJXgCm4LbLctE?W~hf;RB0265l*FNHcCSQIYu3q;v3JggE@Uubzts{
zgtpdF{6Qqq4F~hszXIkLZ?J(G%X|vVVcKzxV9rDz1{>Lkpk6gWZX_*GAkOWswVWNs
zQk3UZsmP|FkX#N6|1Ub2-f%!E33XU&+K34n0%bv$IB``%{4pWUagQQF85Cg>wxtNe
z7gT_`K|VrB0D=5Ri!BZ`xEnc;<ULxtR574foyFj^oIq^!XvKrB1sQ&d3gkga_6pla
zNd>Z@bA$~sAChn(KaWurE@UJ_(i&r*Lq$<d<Jr{J98X$R<H=L6tKfKARW+XbsvXaX
zV`1ZYdT!8oLhKJ3C^mclt0iiFXzxejEu4kw?fo&RU6s9G^0cJ|ZM0b(b+q@F6xl7x
z#v8)qt!eKUBdNG9ZqJ0h|8Q5k$gTapY{MG89PLd!7kfYBB@FPtXL*bU6VtiglepFG
z{fN6cE6U!Ns6T(olgZG8+HEv6A+h)Wj-RJt?{}VIht6ZaR|%c>L6TZKB-+)XbEfS5
zImvbib=O`gK3#i%6nZi^np2+`ETrCw6lR71toEcWG4>n3YG_5Ir687+9-bdH-2S*>
zp=E!(I>IEm>h1kTXi%N~aT_`??2kn@%B!QjfBSSp%YAuW7+fuTzu!(1uz%J5ICz~M
zpmno9rlD;xxviQc_Q!_#|5f{gTS|1Qd(fh~iI7YN)%8I!7G94fg6h7;f{M-lIDnt0
zVSoJmvjLf7YpaHg*dO0Q#Xm##$DP;OA=6d60vpciZttIqUJNpus?P+-Y}Ze<C9@B}
zL8~$L?dQ{k>i^;NwrH+JZyj=KL2s|4L3Q-DYuFi~w{1TfP>o+xHB`mk-=LXL{j2m=
z(8Uhby3v~pZG-78%O~`fUHU(ux0bhs(c6oiL2o^N3!=At{5%bM^ZZ~yX5y-<AtUsb
zfgI>(h~9Q|wnL_?_9ON|)t%lxL@x%tCFwH(dV6MCZRzcr7ivduVfOwzDVDweV6mb#
z%if<o#IpBO@if@p|EGFdD|`R_>DAf$F7JHP-iMYhmL6gR-9=mLi_wmJ{yc=rjZ<jg
z52$_2ZR+=SjpykVA@~4o{fRlc4JYYBJHkNHZ8)S0Rq4-|E;Q6kuf_Vt%8o<|-9)du
zLi#G2>lB;GQ#d7Y{#V#5<?$XDtNbj(Djz}~8x)-ct6V>s?$#Q!g<k8L#zW8W&*{2E
zGVQqi^^3y;mOikg<`j{dGMHO$;#?^Mod<ba270=HYCU}`mT~?N)^~!8^AAyQ8piqK
z-wH_SodEbgUIviT2SY_Z$OrSvEW2@j2V(bXZJe_&!v~{Nq+w#)w&NY_U<2@Jp4{^#
z`Y>od-JBp8=N|87(EiwOYC-#Am0t&T4gk;)hlk`HILBm1->c#!D~?X1lLh(sng2i2
z_XONy4P44Q=1N`zs)Bc{(uk#PMOYf%v2WF>GqqmCU;H0|j`*ptGh%5t!JR15;Udev
zze0c4;c}Bfa5$?SgROz+OkjlhwC=|l&nD4bizTSehDA^-msndghyRJO>pJaeHxz*6
zd}<mDpBVP+pD)yh%xX{85YT=bsIU5f_Uq_Pt0thGacjDs`WbG6{F{1o`^rqtoH7*|
zqHB>~iUmL${cT-l2v^CCaT(TtNPQ}QZ(YHxePsvp#f!tieBKN@UtP1o1as~9YV{y~
z=1LogtK_Rj+bX@DH=wyeam!>%bm$hWGaO<!XpQiB2IA<Zq<LDisYc00|6jq82_4&h
za!=Qfls^n!CQm{ok5kMqj&UWwrF;#uoa3IZvp;N9g6V%Mnyv(NkUy;5-z|UGo2U@x
z4{N9@RN)VMyfV!HbnLqz|5FwIxdZLY@pP(cJnueYji+f<;~AqWR5704$=HMabB~6P
zC+${A8Jp=XaeEiIEXUyo$jQo(Ol~H)1n&@^pv$`hnGP~RZj$l!?zDT6@?2814ncL!
z)u7;df!OU2qwSph<IddcnM?ggCpZ$LK;C(vQABZiyfZ>`z$DUl2Ar==1tz(CO?*!x
zW$n3Gcqq||5nBPDx9x?qwD-ToyuK1@@6ZRK;<K3>mV<9uWgzLLcVSG3g8Lb@^eDKe
z!G&D@XO;wFyW<)7s4)G5!kbQlOYPwE{Nt6*ae6FPsZ6deESXsnpA}J3I9tDSm1LB%
zM2r61E@XSmXIvaN5gJYO_lqL^=Q`519PnGW{ez?jgH~;F_07vk;6~_9wfhI{t%LJo
z-h~c8rBVTNi^<)fjd=D{L2?a4^&Wc^cm2?Z&Vun4i>5CRs;zocii^{s)S`jp#Qv0%
zm%_MtXd(3<bt<+v@jgTWIj|nIQ(MqH{;3@_AI~*H7DGVu4#D^U%m^hTa_+V=!_w^@
zni&>R-1e|V`{dd{6L}dyqo;;V)s}q?J7+ZF2&&u8V(%sxM&I)+X}{#y*7}$;gIKvq
z{|;llIxLM~ZZ**=nER-9!#Gl226k6RDPKc~OD{Zr$D+5Ccyckc^*aV5k1R>=YzaC&
zI#8&oJOKJZ5Xy~ssJj!G+Ci$Uk}4%QDM+$ltu!f3Z7VFSEjZu)7p+xpk62$qP;z^O
z1xj(;$ws7C*O*k+&k6?_UPYizNI<?F2727%_3?HJt9Flsy(OThH$H(AmP4uQf>Gj3
zxOiGW$zfmPZi%|mS++%IgY0y+GTp#!XMs5Fe9tV=BCwKU^Vp@~r_F;<TbMNd#PA@v
zignE<1SG=vqE?$AsZ)fGeO{4~oQZ0tUTlX*S8e1MpsbgxkI=DqA)t|79z-t&Q60t_
z5@!Mt16`+9M^aE~(tfbhQ@OUAj1nS+!tP=)imHjquJ44(Gv&WKh*a*#oKWf<w~vgm
z_9nakV}l^(&o&5xfI~0n#xWg<Wg)Sh1&lq^@{kQz7s55A#y`cDp)1;2+q|l!24a9A
ztt@KT6DQOl50fw=r1;XBjEs($Zv5!bgGiz_^>E?(|1o#v0W#MAe;3O-hiZ#bHm#zZ
z(IMw%F<ZM-bV!bGB65UwBavfvFt+Isr6L`4Qc8(JD|h5bNs%Mhv({O;kNLgc@6YG+
zJfE3)X3Wg)@7uROvd_%(`8?0O@B72jf=0Qg=e98JU8%^G4-=S(OCr}O4-@G8z9QGx
zmnv<bLG1Z2b%?e5IU<PZ%0U0+l3q{YMCBDOo(^9Sc?mlc=^`vu2DtkDko&LBv?Ee_
zw37P-QB|{f%xHt=-k{>jF#QFXe@y4mcD%+E`9tRlk$)K+DvZJLC6(<eq<$QnW=Q>x
z_+Co=9;654U3<IGU8dmQgs)H>0TYW*2s)2c$jqFf=zRAQdm+OC<hNj1O6w}B0J1J2
zq~vaeqp2;!DpUYjUue;JDXi6{KA>S-m(5bYHb&Uu<dQIc@SW-2)}h5dzO*r~?8H7j
zbbE6fv0r+b;v+5;y9<{wi2Y07Q+#x9o;^lb?3cq--&g|2I$$LmeU1~FEsQiVMx*dQ
z|CMOPeqZdO#J&%y9x3+I-nHW>dJ7w|e-biFiT#^Ukzg<OZO@Oi?5W~cnap+79NnH;
z@UWjfbq2##1MR6-`4*F{Vz#H+y`$Lb`Nc|okQ4!Ps%Nr2)dn?r5{relrw&}HqcNlg
z(Fwrftl3zsa~63ks|PV>{q*U7(VlWQv0<z^d47y_43{x5);H4?W1U(UhOy?t4=5tW
zdZ>zyktRmX_hYfAF2OE}vEG-EfIU_4Ruqi&7KEf?te;MVVytlXl;AI^zdpFaWU=|P
z0`yns2mSQdBz$k9zi#2XOh$`Yf9;*7811@m?UBm*Yx^1#sfE>Ft}Aq;MyJ2pVy(_)
zq)K`WU@q&28MXgC{dEn{slv9V+mgP>kIg3FG6pv5FjcYH%M0u=!ul%%PEirD*|8)Y
zBTbAYDDRJ6e|>^o6q~h>kN}(Iycq?X-8R@_v%Y_aVzZ0ruU;PsjtUWfE8Y=7O>aU|
z+{GTqW90KwkLrOA%y<p*xH379h8S|t?X<86H$eS3J?zz68LE};wNfh~=(d5hlkv(M
z3V$2FR;o+0<ClFXpUe7d>?#wB(djQ6YyL(YEp3@0A@!HO(sDWvUCNxrU#b{bX|%0T
z4A(C!X`aSJGZR9agZR?M9HjYpG9f~z6q?bt^^Gic>VPj0ZF}(&MB7fntrdX)${QQE
ze<L_5%5ui|*A+%u%#$3YA*A645+Tf1$5GD~3tQ<>m1f}^m+1ItVmyH={``rd%|d+(
z3o#wBi*mFK2@bGTohf!~MO)aIg*!<^dQSS1PPQi!$2|O=IFD>6&X;t!WS}cj>i+6A
zj$4uu1SUH2B~f_5T)IDJ>J&a>IT6uChed`t;Fq6;=*nF<$XR)-A>+vbKt$A2EiX&w
zKzk?C4V;JHXVmIVqjzww?nV%ons6I336HZ1j|X{2`srgdAky~8&cWDTQa#Pskx$>+
zfGABwnD4>Ip!qi)p!wooE1IW}vXO>M(jq6LlS1WR)$!DJu8kzRV}_2Wb8tB#z*DFg
zj0Wg;aT2tKr$gC6mF|+BwK$Bnyayd5EmFe9Nu-RVAJlYmD1z6E+)-AZ>Dgr*f#kCH
zed?O^;WqWyd_Ew>7p{9xs1Q1zVrwQ1v!d97P46}C{rg=B#-3Vu;e9MZyh!Sj#n2@p
zx^N;oEViAbB$xa1V9E82dNP}6O};lJS5zjN&M{JQ?JTcLu23eLzR;3aGq6^&8H;x^
z(efT6bcCE37fWElPLt51+;xLANoF~4)oe^%(lp6oToPABsY$+`sPJ+8vtWGWzpZPM
zPvB}4ktVqZr&n8=y@`?hIyA}4(Q1-1*hMkcTM``5B)eY@#aK3)<W-1F)+Dpj3#&=`
z;<>P=q$Zj9sf{L?f<dlqqp_{@FzWFlkWdk5lDM*d5-QTkrb?46DPxkjl;u1A^KWXB
zmv0KvB%Mlw<`-Z>lAw9kBxi6LgC<$}qK>EYvurfU(P=uKzJT!`0iGhzBoE<y=|X6d
zDp;-vezt@K2>!1ZLJ_=;CYgz*R$EOHm@l#7XZxmx^*>%|%l^kg+{LtAO8Fnd@P(x_
zdI<jmnU^U2k4y3MU{^ph-TR3RuF}+VnWsuFG=)^5{g01J=~606|D!(^E43j~1Un2*
zSDAsKVA?Dsq?E@kU4$n}g2;cKt%#gmgrrYn671WbS3s-!aj;;o_@*w{e}WfMM1p-t
zNgZTOjJB^pupfz5us6Uiiq4lyaDdLs;5P-7KS>C)VAm5d`NpD0rC|TLRbd7D#9Uoq
zto_hNuz&ZuE-*&Jt%*QjL?GC2z**Lsz=&3`8zoF4l!Cn^I=TKm7h!&lAi@50anStE
zj}^_cV88a0m<0O)kB+C54{U_R6;pIP{RUSg0z5?^*hd!k!&9^_LQ5=H1pl*y1qi+h
zevm!E+X(ix-9iZVu=A&7{I>XOmVdA&HN1bY=~Ye83rE72^ePrvvc9|U4<PHK^bc+r
zuf$f*nZaVKlX^1y2Uo$Dl47f{{y|>6F1Dic4|1?p7xZMbzx|MzYCmEutsW%kJ#irV
zf)5nYv!Jj2VN8Pl_*ez8hBJa8cEe;{(C2??LM*Dv5Zy779@PtRI>ee7UA?ggdM9>K
zbiYl419X2aev>`j+X(ufA<@`^FKAX+LI06k7a+UewGs52U(p4~6nGsG2#^Q_eRCfQ
zIA!HR46z^|4r-P}C<Xo15B!^getoqdK|lF?8N@$lD4J(MfBVds1buvtj;AK?*a-T%
z6LmZt{X8N(MIh*3J@1F7Xa)U)Sgr`ZK*9n9U*8pq;B5r`?oJ^DJ(YW3Y`!@f2ojRc
zf$KzlVcD03+^@hK^ji}QwR|4)P;k?smm~mNaV5zGOjwo`Q;}AZbRMH5)9cd-L*2C!
zLOkGk^#HDzY6o{la#Fa8DZT!OyjK-*lTzsvj(ApF6tezDUuT7R7GWtkh`%g>pcVjP
z{C1y!v|ql!>S98er%mV~%lOh}$a28h^gIie9`#mLOG$9>l*$HIzoWQ?HAEStv9uUZ
zq<GjgO5vf>TT(-iK*w(+JSV)UYlxNbHezuFbeKw9^gDwCNX&ZaCPve7&=7M&y8`+a
z7Fw>yE?^k?()dP#0}SIEX~!^h7i$PYpsLBrH@<)<O!syzA;uX??=w>9O~#Xi_ql1R
zk~13kR(N$eGak;01YdOCr~cz5{A!=p;TO{T)K^+yJ3_}fYt&2*XW4s*#lmI3LKCyo
zc4GJwRvw@m{xk<0Z>(Ywa~sM85HX*7OA#~82RE7_c|^o4kA{jA9VMSr=tz4rn3!*W
zUSfj{#4~UsVlfbPkV<4s$3fB-F^4h`^@SELdSI<0;gb>wAmOGXq9EZT9hig@V<O?7
zhAWs=e8Yx>i|6Vjyd3UFQIT+;6FStQB;ndv97w{>%>Nb%SGdB4gd0!wBjI7VjDdt(
zJ*&_$dP*<}XM1!Kz73UkMMT2eaK3V^B>XnkDiUrafdCSIW>^#?T>D-o;c_vNumevL
zQaJN98xn3gUMJyV@T7{0gx|*j&lc38B;h`EI&^>(cHi~iBH_C)vmxQ9k!cYjbe6(j
z<1z*k9yL^<WAWr*68>VWPQqEJaVsJcuJV_TjOe8BAIB^b9wdPP5}r3C3KH(qj!C#;
zOeCCvCkYasFv*65N4RwoZUM)rs7QD>j)#t!gx|;FKob7G?SG4e$5*l;;kmE+k?=pb
zjDds~Wh-=?ofu5QM_f7yfBv3{ji@3`x;-Ebi3f1pw1tf5Ncc*uRZ{p<2?UVv-&s+R
z@cVZ&311cy348D)LBcCvwjtqf#^@wG9NtP%k#O>19cocZ;oXOVN!XY6-y-4V6>LcO
zDAFRD6mBpnCK5g~SfS(gmx4*S_Gq1i55H|<qp&3W%0V3&(UI_jSgT05Kmq|ITpzzA
zIN(R@W8=>4b}|Vk$3(&_1}T`8c+rN0Pma_{cmaHrq9Wlg1v=ECB;jN%4wS-8@BD9(
zaItbWBwXiZKN22*%NV5aZ37iLvc171+~+x+gl~fHR78fvhI}0v(UI^ZtW_jjQvv}b
z-2cfaNI1ETj9EWklLrzwSLV^U8&I?_%c08ho2(QQ2LcbsSSseF1m^3(6#&o4bk8|N
zbsCPa@;PPk4D|oe@Q~5V=q_urAchTiL_MK5;oNWu>=_97aXVmjYB=-wd`T}~&yn?X
zZ<lx)9ua@a;i6W-D7q)x4*<|SJj(}f=g9@GX*v!~%du*Y@y!tR5`5wp`CCXTp?q1N
zP&^vpgWxGFUfL`4bes?IEp@bR#xcz1Bo<7^p;eI{D600Dh&{lK6a@l!=&1TR&B2Rw
zRQ(YG%SDf>&x}7GR6S-DUQ&!E#7yFsu)Qq2bmLF?^hXmd{v1Fr;BD4;AA;=@kdslP
zjjH2wV>Lb?CQag-A|9SIv7e5-wVqJO`v${WxdP;U3h)en!$e*;YlKd2VUSm4pN70N
z;{b~5^Y24MF*Y==*~^SfXf(Q+pu%{gcl+A0GTjGlWQG;fF*^Jq$Y9A@>bQ2C)%!*Y
z=R!fwZ;#K48#VuR{ART9<3?Xno9hHC`IZ+{Xx-otk#<Rp0ne<p7Zkw<-8+CB8hzyX
zifA(m9xHf4M^g<v+V9XWyiS#8B!YOT*w!f~f+8D!1F1(qP%(FuxGIhqwMRpY?eLp`
zwTO;s%Tux3WGD!#!$voBePI9&K|l=_`;*|}I-fiQXZ7y%FX2Z>ewRG-9;1zFgC&<Z
zm*f|BEcV2W@{M$_RwFZRB(5XYg4pxF!;#~{_L8-R;!novVWU(6!AP90{U3oCaM#v9
z!Ckc{C<Vxai~8e|cuNj0iV}DA`Y`Uu;sKuej19(J7tCX2=3?Acd#%v8<7Yu4F2niH
zrbLV!cWSvC%O!G+Dhv{dx16oO9Jr}t?+Cbw4-zfYLQ2u6Sp%JL<n*UfGS^wu5m2;(
z$Lt-Cf+K!=oU98PVsV9=j6KRgQJ|Ul5g(R$T7r}_P-`cXJ;inXk!GNDZjjB)Pw$E#
z1EsIyI5D4e*91!?7sxjKvlO=Ph(f$f&td+EuCt0+B$)}41U=rp6ZAM0`4!<)(23E;
zAGjoXq@*1LOcYRiua~mPcDaJ-@D~g^WjY*3Pk9ZVi`3BRO2}sq4A#z}1`|rJxaCfb
z21A)+K~Uqq{RAvk^!GZ00Kme0iCc`VJwp+mNUEkT&@0Av8M#eGwlB~~lJ=GuPg#>o
z>M4tAY^N-0T1qnhUiOqQw<;o)$36Sgufd$Ar3G#9-3)vOfMumQb6ZuU1r;3?i#uwS
zz~Far?^Ap)Cc;;yQz?=&yd_e;0bb*1+TJlEiT>JxflW0#r*>Uxoal>LO<&oaS2z@w
zU8ZMwx@Vnd@^}!r=d0&Qns^pX!9Q*=(-Y-HR?}6I@tf}LCagL1%#>q^H~S@85cx=q
zVUMxU8pW#;z8r1CVjm2a)bcV>OP`5EEi@Z03r^@Fp3{&$>>@LNbhJZf@Y5i+%5Nu<
zku!gyIJjD^|M3r#+XNzvf<~ls1Q5uU`-hlI+H&EVXfSm$;P?sHa{EiAHeNa6MrF$#
z4OTdlvAt9{6BztN4aTb%N&!ToiRLMO0mfgPBj;AS%5@Vqoj6otw#%B^bUD5<4&no1
z_|m*tw7C5AQ5~1YNE=*k#Yk1=e8%Opmx>OTsoM&N%Xq9;GGMZRi$o+%fU0L~#B{Xa
z@B$H*X3(7`gKivzZR)7z0QX+7Q-*iZ+qfwo#qe?Iz7-`lIOgTKPL&>l5s4>_M^2Hr
zI0k2o23N(!O{g;d_fg;DmY<xyC9X4N1|N3J+vGZvl3m*4rd`087=cv5b^6Ba3M;rv
zAU<_MyLjw7ewDfNt+aD7?Cf#Rpf$dA=KkvDAH+lG2?yPIju#-c;vDmqxK8z_eHUPr
z+Sjr87?eZRBTx=6#sNa^ywnLLv&xQlf5Tg^D9GgB(=V%<>s0UTmMi%4-gwD2@$|tB
zE8O4W#@+>$#W(%rH~q3N5#RK~0KsAfiTDKt#qqy%Z@YNhQgS?>^y)g*Is0~8J`nFZ
z(>1#k-3n+q=2dc?>5^R>Ai|$HKRG5ZWoUKDDgl{&5Fh09D@DI&gX_$HvTw^}NtW|d
zR-Flx>7{di%D#@bcq*$B0PQ-{8Db6e?mE*UyUYsrBKq;z&*@=r5D?euVTd=XV{aGZ
z)aP&)$AvSOk6h1L?Qm~G&EkpWzO3sUi;n_q1&08(hkPK1g#a)C&*5H;N97lzW#2H$
zVMsXL8?$OK82{sN?=J~5U?`?8q!I*rB(Zt^I(#9vokZKlV_VvA@iaPR*^x7k9^h>@
z4BtTn?7qN;NSb5uB0hxK+pH(PNb_7UR^uD7GgfD(V@W&t&1bX*JLWuwC-use?|?_+
z9kP-g^H#Y|_j9=0(rbL3*NhAxv5#Y1LwuIY#74_E(wji|O$K^a$FE9re73VS8VcR(
z@p*nCEuVKUBebv6wWL@<Z@j|#gZ$QY)Emq})WGpIzky@%Dgiy$>Bk)}j0Q-&_fB!0
zDeV|H4u3nn<>&uGI&_`0N4Zy>@yC3-YQ*8yn#J|T4wLiJ9P@Uj`j!<uAc%$NrY!xb
z_>IUXr#Z03lu*Nwb2D`q5bt%#{!<b|{xP_R_(55fQeEfb9h1bSuC*6>y_DV~6XN}J
z%^^_AtI~_NuINoqO!Z{O6_iWQZH;+}&ct+2W};)BmyqB(JwU*umY1}uWBdyIfQ;PR
zvFnAb%kh6-_NDN~jT?Yj_zU#f#alPnlip*S3Yc60WMF(Psh<o_w9>g^7naULu^1M`
zUBiOSvgUoIaAx-)F4q*!aP6d`#k}au%J^-`Qf<a-2lSafojJOep9MWT6URuKde+uG
zP;1Gj3tHX_%PCBe%fFm@4n^R^7*WU6wnSk$nK1PMM|ST5)TwovlQZ3$RxPx9r?l;<
zU~PLC;FsEVH~y57lbR<jZM&8b<Wky}+(Av-wis?{TbiC|Jckd6y&s_aVnu7?&SZ%#
z14@Ss4Mu6-J`$zBfccC%9)(gz&MU;f60Ht5Nsq`;o8kGB;$Z83*3dcxY+Ny)*|^sk
zDM-#?SDlS3;@Wk(uj54;+38*lZo|K>(;1GOxfmOpe-KxY;6X=g1`~&TflR!B(%HK(
zrw=nImGLELg;aMF<MM}tRJUFdX+{lLd#|pKskcf#NafAi+&vv{pHF@|f&qj0t<U4U
zrF^ofT&J@Tx5Jg|^e{)xKKi&CK6cDo>N+*h;l31qE16D;ApZu8A#$A_<jDD!*FdaN
z36qpu{jhLS9`r$)H)T2BJ{fljS(`t>`g0`y<Z?}`UbDf8Z<4VQNs&r009LZmKvCg>
zM4k@;4GVQz(simv)@3<)*+&cN`qz6RU_{{<+(YWSMs_vU&TCD0P^jtB3E~v?qZqCW
z>UG~h0%i2Fp?Fg(D7-Y?vABX0fZ7YP_-8LwXeW(DJaKVTfC2`SQ5{7oDtaSgG|pO^
zaql49gs(f?Q97>RBV5lHYj2^o^|7|WYFfSL!$dycjmP&oT?dn$o=rI64|Dmz2k&hZ
zP8D3CgeQ9mr-}qCACiU8TKh`uS3&h9WZ-M^tI~u0D)aVazFoOil>=O>Z~lUpM6Ojb
zT&vV>OO4mN!nv9w`z37O?=tQE*7pp4zppEr?>7ryU+n$H^X;MD@5;dUJKaTpzXxd4
zu;q`o-(dNpjpVDD{^$+jy;=com&x}^4ThWWN6$LwH{hj`On<b&Pr?};e@sG5IHP^&
zeddRQ*AMG0d`^EX7+$aOv)E4_j;{$`ZGzv+#Ak26S7UstS37hx0GVqa!OP9H_{$cb
zO|<v(^$31HUj@^T--fR*_I?}qb_F`W_sa}?zuE!sC(fI5jy}i(`#fi1xHWLpN=vEl
zsFjh@(ow5p$~}%+kEC>W)Osvsprh8Tlu?itj#^))yu|-5OnDpsW*`GnGlQcN_^Yzh
z(=i3+O;!r*89I0}CnXOzIlVb4$>2x2o{#I5IVoS_pZem@IrxVpm(yG4(4&CJ2t398
zCO*Ki>|5{w9n+STmErv|h2$OxYR8X+;~c&eXSmTDIdkR1@XbzKJ8I^n(AJJxIVl_H
zdu%}02x<9}29^hym%9%=m!5%lqpvvv0%yo`n2zYIcm!gwj&b_oM1CJU^a<^uS#|t2
zyN)+Qq<8c(0wwb{PW4=f#reG;Pt~jEx5tG)eWcz`1H4BcJc@NJlg<|%7QE3QpdvxW
zdv(A600B(^f6`x={rC{Bd3ILT&<yXw6oc<d7`av;fG*`^tts@)MKLgmF@T08h*w|I
z*|L)F?+!qMPcVP(At0fik<|%EXhMiDr9C&Rq0{TX-7MY?@^$nw0;j28mKrx>hy1RL
z3Go~9+u(xu^M4Kg2fO*he+#aI;eYstB7y%?Wx~V1>_LYAKMyMSKdn7C82%}|ogMtY
zzz)Ii{~we;{W1Ae$RDC-$e)Ms0n493iitsn53CN9Ki-I_Uz0zHrKS97qe#EOPp0sB
z<p2wx;pifum%$VFtg@aMAbb`l(oR9brv(=0_tAyVeI$JJex3hG`hP^yKc(X#f41OS
z(0{fje@?6nqW{SeVZdKtyi!Wizf(to6h0O5nfOn3RK!18duTB6x8=R;i2p0>kpCZD
z_@v{)Q7!I&5&r!;`@{bsTnEFy=ZYeMf7y~@;QzvYhW`-sAqe4rbh-7=VEBKRKzrH2
z{}$|E5B~srb{RiF<X0ZU6jqGEr$a9iI^u7#!|n{U6Y*XBKFGHZZA$a<DV0KpUeLhs
zYl(VgwxuWD%*>{Ihz75Mui+x41)r}u9!3yc6FB#j7C~i=)3b>3D&(NGJVeg;icD`W
zF?I|EF*M;firh)_yE&pm;UK*C(B7&RO-oPq?ZiLSk;8*6>O8|yL5gMFJ?dMLIvJXg
zdv|4kAzAdiQS>!T3nM=ZuK%WGR*DjdlSo?_)$&PLZo1uo#y;bVtH>W@F!5-CV*rJk
zJ{CHZlNpcM%mH~7Bu@4rak63rGWBBmm`x~WBDR;IoN@S*y~BM*m%S9q5d+N!5-wbf
ztvE+cLwX(0ABg4<`Y3lY=n0i7t%&c8FK|h*oZ_@B^7PlU>3UG^t6qrUP)!rT$uG_P
z9wwWDU=a!i85%u@n0Rcb@HjF6j|e)_E}^TLRB$eFsf9`uGtr_*i<#6G%fGkcFr6e^
zGCT#yn!M=@3cX0YYgv<HG@b0;YKY&AqBE2o<ylX7e=P%bQD@=iGI9AiNIT->uyo%_
zGs}@iJXCcD<xftV@rjW6lgni;#IxQ{U!uCevA7}XG>k9yV<+#k@e~x#cg!p9I(1{V
z!%VT1!g+56?Zjo=gyQ3uj9P*u&1%=_(nD^heEOvAlx>+pq~Tk`g*Sh*JV>3;7Ri~F
z*8Rx06qJ-VP#&nR!#x)m1=F=fz~#p5N@n(@ZLxIM%3=j~rF%!lix1-_lv2OmF>V;q
z!PWfOsd%jNpKYEuY7ymg;<DPQ#Y58g5H_E&{SN^>%YO+4Km7dc3ML4Vsqr92wD>l}
zXNn>EZe;HRJyK?<l$9A;88S0Oztlc6MBnI{A^d)1hE`H$XeBl$NOJ5c!^1Uqy<Q@Y
zD^(U8m&=hZ5wcBoUb}=3|8q88;&3l16Bjp%dq3l{Zdv>oOL5FoKx&S19qupjbuO@7
zbI7HxyhKm&f=aqQ<#e@5OjH28&0OtPtDiPkCyCXU5_wth<n@llUvc>VLjUacsf)=i
zp3@(R9GdaaRBn_*y@si-3l+2a<NxKd9>@O^9qvK2eXA19-p03%aXTSeTzM5etxDwo
zj9n;ypYGWP96a$eaByso_(dWgl=T=sAH0dfvA^P*RMA&S>6tIE8IIt~=VOy{TcI5=
z&*60KJc@te=C?}Bhj58Y^|tZ3P8D;Ep_97#WpeHjf0c0LP&;NmQs)qK&6313o{~?`
zseur@7V_nv%bA=0NOp+K4Ib_o_Y^LXyt!^6SpcDvH%C)_EAnA5)1%)1iGNO_coglD
zQx7kpGF6N5R+Oo_NoA@o!Ix&HihsXj+y(qL4L=e!*t{lwhcSCM!!XN%Un~<nM^jzv
zE;#1NbmV~SE1W&ba>l0{GCt1YttjK;DO=D5D=uW!$N!NOtabr%K-}gSR~4x&{nq>|
zD7Pi^PlEEwFzw{~kQ!rx$v;ki5o(Y?a(eDg%*d^o-UF2!hBJPP!=<GQqR=#zTIrhR
z?(R0p`R4aZC9@TfG~xyU7+lNb$f5HLrJ`}T7vUm6*+71oUG*wjADqW|YoVoSo(1_M
zPub*8LA5z_+?P%_6udyHfUx}HSbRs>g8BHD<L><c;QWTjq1qnKq+Y-_mvklG0gn#O
zB{jGdT;jUGxucLn8J%}|({8$W?#Kwv9aTqOQ5qaq;7@u5{QQI4EQ2GCGLNRg@dfdR
zWo;CHl(QT7y$<(8d}Z8;4@mmC={}J=ih>wQq*BsoIjqJzyV?-KE$t;CRH8?n9AFW`
zIuNf(2!G=uaze=P9H6Ap>))6}kpBhoh98|Lv+;MFo+8xNe9!@6R~ztuXOFK))j?Z3
zQHu1!SNO=diG+b?NfR$C!KsuramKI8e+S>WajMhqtl>;BO(?xoj8H&{nmV}RL`@2P
zT!7yw5;a6@?rpe~xf&K;1mttbwB#?v7o4*>j4Muse~wRrb2g1c&Sn)QYFO&zFXBH*
z@aKPlKaDoK@Y24#j2_SdN4Z`%%6Y@BTQD!PHSJ9p`G}3-KNz!C*<fVDzfpgM!pIDN
z#X=Sj^Rs}y-*IHKnvP*__^Y*k?5CE$iRD}tdqU-K7dU9f3`)!5z}@MR0mIhjHVVyv
z<P3zU&3y;zK{DX0_*1-u(f2oEKvA1J5+7R(_y=8C3^=B%GWYZFg|QTuL>Y7GuE@>(
z=bI_M`{ChGrvCi9CFauULBmk1r=&2shbHsggo^;?3o!W=A|w(e|5f?gL}31YkR*xF
zh%&Csy<6v5mj07~0Wz*ql+r#ejZc~e+55Js)?-vC)fFPw-(DgI03o7@YuNN(La|BV
z6FS_ZiGBXT|CEao`Ac7~z4SLoH&KOa_9VmCB<NjDr^IwlQB-`)`05hG8@eOJgZyeu
zyaFIfw!Sg@cOYk_m1Jc*)iJMBnEWaqV=T>m&acA8)$^-9GryXAT;x|vVB|mI-xNwP
z<KJDD!esdbF-sOONQho{HS(puA?fO%q$^JSYc`VNi{7M4bkiBo3Z+eFqol(w>G~mV
z!q?lYVv|W&doQjFO5u-<{6!kVAK&A<{A{~;BV`bi_55ph*wj>`O8M6#PAllL1AkM{
zr7c80EKRG3759DnpdNI=N)dFKb(mu%Dsy}M?{?r&Gu2u*6_J-ObbC9npXnh04d<_?
zdp4(gaB3S(vHp23y&WHnc%OK=tSh075rg3B>)(7yui+`Fo|X9>NjrHHYKqr(<Sc=J
z60hDmkIQdc&%xL6>dh-s&y~}A{~U@I{f#__|E{TG*Vb+n{+z*1sou8n&;Xf@_hmZT
z{=<sEFbB_565c5sN<m&AI%M;yoR9v(u?ip3zedx>tO!KSZNk$JC|&-xx)FT<7@%q?
zf}0*!;v}phFpW7$>82>joy^aIBG~waDMt!Imd+BFsI{aItmPB2985+xFq}>pb4p7^
zunp9z6hUZ93{4uhTwD>@SYjivy;KB)@uz(Hqnj*6&>0^)Y21t#^drjQsqDj8hf5Or
z)SdyMA1RVvyG=*Z#*VT|N5j$b))Gh42&v<Gsw$nV&Q^ua7<?6`LMJq$YJRR&>9jCT
zH-jv@MT}6gn?WBMWjp4u>}SdMl!OeL7U$m@Bt|7-b;-|%6dyDCmQO?#;wi{8Vogn+
z*~d?NGyjUx4r1OL=a|epIgKSt))fMQP7f+O_8t+O`>^2L0l0-p|9e}`ZQDW096!!&
zj7*KT<wUV1vu$x~+uHkhysF3pzY<-@=x!7g;l70_f<uYkgzxSnwjGGR5{)FjXr;4l
zR*#6-cAXaO)A0zt<7a_wul&r9ZBN*qL!hq}Y^#=U{ZwaLshr7^`70qYj%)C?10t#h
zFEsa}2Jh<7+^cHvY`B+<G|9a*1`U{^r_We#ac>tw332awWNB3EBA-fJ;eNrtG&V4L
zq-|(Owi`79Rk$~pg^S%O@x_Jh%TVntn-p+vbf_i;t#&eiePtt1|JfSrHZhvt0oH94
zdiq%a+T<oRv5DN?SSUau@zy%$1o-#V%@Oi1nQWgxNDKZY){KaMS@|OIaTbufc9zM%
zzqFM6i})$pit)0(q&f$Qe^=zUruc_9;Zxkg{F?>-P0wuuqoH+Ps-t2_M=hvc{p{~Z
zMR7U&R;R};^<|s(Hu|!$dMG3EY&R2;k$0Quc+gbd^iMQw#-K90VZD^B?3q&F`B^k_
z5x4kF7cA}hO>{EGavD)UONsfU)U_6i4N@%D?_OfD<wyWI+}s`f2|gedDZ7Is#Y<Lw
z#tTas;5}3NMuaCnsW`Q{q=@%PQFes`BDoCR!Gg;iZqXeaSf%Wxy)G^u&wixgD9u=V
zD>!ZpcL&pY`@D#bZhvVx8@nmiOEp1-wlWG_1~-$?Myjn0+y%snlfZWwjY<N|-h{~+
z!5Zs=F>W^*izX8p1~XA>_|)UifpIc&Z)%MkBb1g3AjYP&Hq_N|R0gji4mr}4*7>Ho
z()td?X|fqIT((gkY^#G(Q(8T7%Ci=9u%BSAuk}a6^;mA%nwrvT<|`$X*3Tg5iz}^|
zgMZoBUMel>G?7Z{$SRAYIw+31662v#$FFRigE%q|?CgXOh@<#W7LiAe!qT{^4k;}<
z_n^~V!AdI=vsziejw5z*bT-)ueV9^dp${{VmGnML9q6(TbN>4p3x(>#)Yk!7a_I;x
zRs2#(0K!yNKB%h9+Tq#8WEfIaam7hh9fheGsH&{|Okh7|?o8pjCIeuN?yy|f6#NZ9
z+13lhrR%OMRtnek*bcHIMKx@fz3jJ&g4dxpTlE6A^`uwGmmvT7v&Pm-Ovm5!5_bOc
zvrhdbO#itbzXaan1lz}d?D6xT4>yRxf6kp@s_;w1%LV$+Bk>f?e>QHHL}u?lFKHVr
z(@q$3w+H&qwZyJP>p!2X#WGFW1GnN;M1!{`(`u>L=Rj%!%HAT7X%YC(FXKdJO{VRM
z&=BWWg&lt%mRmB7LJkAZloEl|f5m^UzRVJ4rvIFo?&m+hfXf*C=Lc)*NP4+-upD#W
zDoKp}=eDSmlV(9t_|L{WnmlV^ET|i+|2!QVDe2ZwLIy%@=nWAHHHt*ugx20d_|LoE
zHktSSTY~-PA9wos&vkJNs|S1kxm21R=brxE&wm~!wk%rzx%Kr-5z<^7*~-R$9^6R2
zyU2gQ?}>nIBk-SppYF%D(fZGCV!6e(nkn+d32BP_EB^Cic^3DY{_~5i1N`T8_<;B~
zHvjph6y-m!OATh>?{Aj)Vz(;?HFZVcKi7FnW8Egkg&Sk>pN(l2+YT2XF^v-PpXXl}
zA^(z5(icKHjQ`B_quf1k3Kkj0a1~waK&%ZMSJ!R;LD3{GdoXIJtKx6bo-rElr`1n)
zo=Sp6@81|p4$MWn2NJ$-A%)1LRQpe9UKmYO5cv6Kcr~)W1IapH($TFjUQ|2ABcj==
zb}dy(`um&K3uLcUZ<aq084IH%?&7~*3a-yRp6<C&bU==!KVxk@0bjJk)L4kWW%mV{
z<XU}nS^4`6d@S!EM>hfpcW)jHVOWHRjrP9~gjM}CfPUDQ^vdrH<bY<2<u@oW=e!}n
zOs|psy?Vd=CkV{baaVp{{N-<7k3P5xH=`2<A)x;^Nbe+Afc~glAh;EYib-HhKU!C)
zyvx<EX0I7tE3<!V7gtaXcQnNvWMW_Ae~0lHD#t;4^h0=p%9q!}a^BjD>4|!Mh;Gd|
zSCRb407@c+g#LY%#qS$aNBq7jPVe)oFG_{hzTbkMSRFsH6K<lPcwNv<#^OEvE9u9`
zX2!KO@H;pEui|rJ?GX6f`>VufKipx5&yEHD_}r6X;Zt;P{qM-%SvLv!K=OAg{<f9B
zKfP*+KXF5Z^7qa=f~5n?-v%2=I*9%jmcLUr2g~1C*D1kpul53g@^>um`rnnm88-^}
zlKgFozwPDkn8}vli%$OTygeAgEPvOpCkXq?-zLA<$=}x16`0RXvR)%l{#L_X|I_mK
z&zd3l_5PnFzb?cbcKn(mY9LJc+vr-0U;lUHZ!!7%?Bwqgo22~x?S_!@_c>HMK+9<I
zHzV0!{sz`J+)hQfp1)JshPmBQ%Uuy6WJfJ;GXFEVKL0bVCI2(?9{y)`cmC(Af&34b
z`)7E(iMYz}xa05-2H%`ILH|rX%KyyHqYrc4{EyeYlRxCCt4?ogq+~b`E@~6%Lvx6D
zeMQnpJ-lzzQksvF#3iu2iV9}=i-JqAD`yC}%+*VOVjK!|)B?<kMXQd(JyR&QnYad7
zIC8E*26jTi*q_?PrNldO+B#URkGzE#?7FzDVV*5DcKYJ-P**&za-Fy)@ma~9Ev}t(
z{gk*yrnmfu_%pY3<5iB<YcR9L^ILBDJFs7apWLgT>z?6FXp9TbI$2NeZF;lk?2>|N
z@n<)lb@tc_d%?M6e~CLAmnel+0mq%cI$HhR_^;u=71U~wm&UO^1TuX|n>S($(YQni
z(z0Zpb&dbZzRKBSvvbLxC7j+{*5Xbl7RLXYmC(4v%~?ZgAjy!&?}9d^TV~^Hzj|_S
z)iRzPOO8~F-?0%D#yjNuG%j(QdYv_me;EFI;~%nbb@o{8T(S$#Nb1ec_|+M|J}bF#
ziAvd5<)&3C;W|}n$R!9#`&OaKdq`r2XH7oEP5}FlF3pO|jl-w-Y%p4&nr(ZDii0T2
z+U_{I9JoaX_l<a;BoX&K<a#Fob)GFtjPf;3`Qqb{d@8-fBR>3XN&Ypi)A3mj*Xd&P
zzv9{DU8{;0B)V3`7Zi7`D&}$1+7C-PCM|KDN_0%jJFybOjIKmDy#A_2z%ez~AZYRu
z5t3)o79u`3dJiTq%WYf2v&6N!7#@aQ9qva#8pwiq@W6_rcllbN05s3hdR40DM;?lb
z?y6L5mEoDWfeCopD*7k#%DtbGIOLGWs}jmJRFT3}crgHuAm_taR;E%iLX9P@%#cI2
zS+w^Sz6mJW(@UrELy5~&#SAha{5*vFdc0_b9UcKv9(%D2D!TRvyv+L>x%UQ#@~5J}
z)u>VxT3`zbTWxEj1D+uAB<dj-Uf|#ay#hBx9X6`&D+L#WVU>}~kZbKe<j6SzNT$j$
zs-c^+P@z)~Nb?_(8Z<`Lra9|1Y%kL#&oj<gUFLm;ck#FxZ`8*?Nl1xK(FW1>795EA
zdm|Nc<hHD<ZV@4iDBH63T*}y<FR*=01DTH`K>4+0m8&jM&TU!iP>&{~OySzHXcsLL
z8GvzqCNMt8YhfI1S+vML{iv@O_*7fe#TF9b#`h8~sBxB<%x-9$zr-$dK^t0ykhxxh
zcqZQHv;0yZ-r2;1skyC707I@*B^>VS!FZs4_a$}W8dr9>sl6fH<-<{bm30LD5e2XH
z!fPo^dlTNSXTxi+Bul_^c<=s}Cg8(m8uh5W4mI!@Qx$&W*<1_iAyf4FS^;sjd?A)2
zyvH}7I2`opGq(H*uz3n3<RtKe#ywi|sc<4-+t7!o;Y*tn0pGPahL}gPMW*K<*N4(u
zjG09dJ{p@Trrn|bg<l#&n{Ec%yoXefsB~kt*@H`p^hL=wvn%Rsv%gL#w)q9qN?B4e
z+q~A?#9raq=8g##0%K&GlOBt0awS|4=iA^2!z`%_2;D0Gm~Ws3RSC{a&t8diWh8L>
z9qfcpSCgGUJsmmg!E+eg@5ot!KZv`a+jf<N&UCoj2*sA>ITond)}q0c2kBR3oHJ)e
z!DhY+I-51FEwM%V(ND<TaJ9r1&(qIuYKCU-S6O0FogJJh_R?OVMV;cBvE#KcEs#d5
zB9(P&k2|r4<UH10`;6{8OmW49X%_$_mE#Yqv~CEev_f_iPq$cb5~Bl3YoAf}BUV~$
zbCA+{M#ogM$#ubcHIYzKH8dnLjmPl;F<)6+M1juw<r$r<%rV>MMu|=WkRLi9x=Kgq
zl}(~RC+!s)o&03T(67h(qmx!eicYo1Q`kePu2Lp|qzuiz)o4{N6f^2Yr<lx1kf-+I
zzaWKmt=07(GXE0Rm#q_**0{dI-4558xsNAi_P9+o<<JQ1DBwS*EW@AFBQ`!zleE@Q
z|6$>ORFR<PB**l|Cg{W0mgyBTy6%DA;->-Tt#InJIbjis%f3WLUn~@tT3?(_tyafT
zV0u309iTOhYKU?g!S_~;asLl}Ilo{)nKbr(z@M@&XVXklS7L<mTzm|ZxA#q8ma|CV
zpyz~SraO_C<&K*KvqafJRlJOjtY$aZm>Bgd>&W`+wg{0G#>h~>e%0lVEc@JlXk<Nr
z^@=$TXaJdj$!bQe(vffmn&v5;ybUrkzXO&5zH3Ioq4Oqf7=wn9jnHOd(%vx4S+e54
zf$;}tBE`L_GCR&i?jW~SoV{Fxd2k-|5nodJpY2d@&NkxJf8ZMzMp5NFM|ElR?X!99
z7|-4z7=nRbII9JM94i=s^YPD0jy-@I5nNb?Yx)Z<5EKoHGa4r{Cg;%Y-h`)WC`=Og
z=mvfsTZ#2h%ojltSVtf|UGv8HA>g)OMq5TkNGnlaE0|3!e-+EYY;*&FK+N{#7N`mD
z)r*qaixQ6AAnl8{KxD>aaeykb#)hk2IGZBs9BeOnPADXPiEi&&JohNQfM=VN>Zr#d
zuOn>3D#UTCa7pT^94;UpBgNCpB^gf?<N==6Tqo5N!NkI^jI6##h#duwMWX8w(}DKj
zn*awIU2n9Brk8ZcLq_r=fwgL*EUcv&$8aE3{!lLMq~-PsO5aJ~P;K@Tj&ueO(hWv0
z2_Ve7_$0xO`{*vyqmr!mAIMi`ftoug2DB8eJjSx{!A%rvsg55?g}uQC354r+stJDN
z!v$zwu_LE9*bH^j8ysWgfz=!EB48@w4a+82>0XO#Jdc=)f{$^=o)RioU%fgpRBwMh
zmUO8Ujm%fSqM%LmP-aDUV6`scWU>0QIGWiy+A!n^XZlLuMJ{fR#mf7lP$a@*#>2mq
z!r)MOF=9GVxR~#f0rKS&s^}D|^xPK4+sFY4GN;^+u?v?(<^{Ai@;K4Q#Z|$jk*q`!
zu?Tj-HVIoXOq4wzO3xV6;9W=(kL|b+KE0yt2#c-(F`ITysVtNvV~Fw6a#*S~iTeF0
zPu^S1j*1EBsytahRFo$lGbW>p7L-cVrv*DP4~u#OD{|$3{&3O{X*5JK;dcxYMR+$0
zu}oi5gKY?m*TtuK_!OYYs!5+-g-=bUK-6|P7J@6vfnSL$T+1qUUTV}Wjp+EsaQtBD
zr@{Pu)=!<~<Ar`&fU*LmpPJDt`01yu3ks{BUXN$FM=}OKa?>@6C<vNF6qbH^v7Si{
zkw!6e`2<PF4}qk+1*bz_w6;)R>#v_q3}Zf`8>D`^>?hVwS7UT4_iFzu`suqj?e)`s
zBuO;=R1>+O81&OIOkH=Hr_0@R^+om5&N?Q#ibg+;e8$3BnsF~m{tKa>ZpLOxKdq1e
zf_^HCUm4tZV5gs!Kyb2tnu5v!lNF0VKXE7S1)lTpEA$hFeC>3MImY5K;?v~PE|AKh
zvi31o+32V2a=Mhgt+t7%a8fqZ)H7L|z6(b$E1rs6FR_$;A4`>9O2!X`k7Inlj=&qz
zpGrf6TuQZ2!Sn0sTcqCnL+g&5fly`AuIq_E7psT4r&ydL^d9^=q4$~_A6~84hxJ}Q
zE@RMpUz{V1P<PzXDmEC|jX|Q8-kS-hB?7${8Y6txRr{x7!3ZzC3`>>XQ@<a2@6cI0
z?$GrfK~zQgP8-u87SnUl>8|GaIPn{u-j<2kC+yT=OHCgZWLx_12TTsN^dTw$SRc;8
zr<OjXx&UP)TKaG*@;sJ4yavr+;q@Uk-KKkf;X#Ddk2iqdg0*66`4U&aYT}}y1E`Ba
zUY;uXJ8_y>BxpS{^_x&pyq_cIBeF6{X@B|!Da&P<-mDbz=6G0<3{Aex>Mvqv;juF;
zj9(H;^Q|kSFrI>I%UQttKc6VcibIp_V1US~(Tj&BX|K>e13wv>wBRYtRYz!&Rz>P6
zs69Tw9@5=o*q$(!|G=S10t&0s<79%;=#w6yd-<H{@o3-*5E$_G_F&fGGMaTrlTQHh
zV5tz0PLuC2Xc9*z=?C$lFJ@2Qlqyz;B?T{L{E81qBGBn0ICdmTRK9*nN9CrZK>G(+
zT!EpXEF7kTyNvOut&>=M#0oIy;H$s@b3p2JK7NWaabXN8@Yfuyp|GWK8Wr^UnXn^6
z<wx~xgk7X|+KzLdl};TjpoEAd!Grc{XncCoPJGaQrt^o0Bte{FFWsR5OkP8VfILfd
z$s^r52Y;n|wxAytseAAr70<iNQ3!<&p>gIm#QdYI(ll<uIE&Z~=eKt{Toez>^z4hm
zCqI9JaU(FAaNA`zxVaIdP8m1s>K%krBn5YjCMrJ*j5QI5HY;@+MF>SL?||i22qn{l
z0UG;^0V@Gf0X{Y_eq**0$H9bdMMs*t285`;F4fZ1BI0ioMHz<erM7*NSp!j&eMXg+
zN!N-*+e7%+Ng<T9%dJv+iklcNd_X)!E<h9r%RmK&(6SRQflPb|Zh0Pv1)E@(gy)J&
zT+uW^qa~{Dq+5>g^1>7`?lmhj7c>1yY2gT;9R4ugzYkrff9Y879iV&Vp?doCg_cwP
z5!NcEX(qrRaU^M8bfFLaClv4K0sRb5J_Yk@tO24az>y~c9H$&(PU0uifY!(ZQgL{q
zfMxSaHiX}xm;@$!>VLzjh`?eA6+4u0oYvnW`jDQwJs2OwQc3WOp~NAG3m`m<XVG>Q
zT4%)4|J~oBY6=4K!{|w=FIaR<lc|`l`(c8UaJ|kG$<4+kQT0(=M1hH?|5BLv(h*G6
zAI9la{RDiOBBE*sj-*yR5LXZ#Rd4ERQT5{z2rwXL9ScvzsS5~q%7Dx_9zhGRZ9A8&
zKeLOsXL^>Ud)9Nc`Ed@deA(9wy^r#>V@t9s*>HzJbsTKm=}Rj3fi%hT{G^gBDLUdl
zmg--DfdcRSPTWhCKJIGJI*_GVm!#)5O0m>V4(g{lUXQY>(jbO3&+IQDhoZpwn4_$E
zIGpNDSXe$7?{hB5cjVK^246#X=VL!3JB>`r9L*q5q1!8P#<iI%vptP0V6CtM)bcHT
zOituhl?%oRbWf$_G6XoHcmNPV2@H>t3>Z$0GLK`{Dbvlv>77%s=Q84t25KfpPA;~W
zdS?{=lrQnl1maH-+w6mnnLpQJxSrt8WPD_luPFGF&ZCMRW$O>?C@Wnq7-i?rNtAIP
z=69$>lPHU-4^tO_TKK+uZw+lNj0VSH$ECuK!z?$uIY#;}{s_DR8%YcspD=(V^03p*
zM*gAjEUcIpL#t04Qz3rY)L0}U8BjzrlCg@|A%gVPFg#8~GUk^dEkt90G?o6zSqV|r
zLjA9bNDJw0j5-)(B;yL4Xst{yXrU9q<Nt!`UoPF&%cOm&h4ve#7F|4&Q7Vv%m3_sL
zj1qz0;?QnTsHAaI3sKnQ+i@0^n_6feQaPFyx}khbTIgtj!bIKD!K7XNj6?+6R(oLm
z7hVf-aL9ubren}TyfzbS6(#SIK!6sy9>2ss$fv^hx)wTSY+gcIh|W5qc)1qAe<@J^
z1jY~2dYY;y;j~aadi-F-vM}+34W}eT&G^B9cZ({1kaB=U6+3R;gs!E4Pa3N~1x)G3
zUK#2gIDSx}vMI2NDt_=u4^3#r7(aL%d-%r>+~55B@q-3AWO<62_o^J1b<RV`%BT_z
zjvu^+4`PoWbTV{QzLH>Ls*gP(DT(6;PB<Aw89y-oqoJ^+vEXpb@q@RplVaDK1(f{a
z2Ltwp;#<4;LGofk0PH->&N^TpKRA!-itzD+&PHM42aEPGZrCaJCY&s8gPQ~YNZfGz
zVAf@(0xM+v;J)sb5{n^za4D7t#SiL!4T%2R@q^z+S$>uoKPZ7@kQ!yj@q<?QAcpus
zxxK_26s5nbSg?+3juEBo6|jzrgIiO09mnDJnO$SjalEz<)+(kspTJ<Siyz#)Clv44
z#1HmDeik}@khfdG@<N;q;U7CDfytKJ+)5_lhs!}-B!1APi$(NN#t*7vsZIQ#$(JNV
zigJFOo#jUHgZSqxx;8Dh+Qt1WxBj?{A%1Y{p9&LMzNdnsV!e;*RDB~{pdzB`^*DJs
z2Fs19`W38IRDFX40xY+FyTVg(VEmx!e6rkZ#;HaVKbX+jG~!xf+9_qCg5n3?e&ZKE
zxD5A&j32xx?hO||*t4s!@q;^evg(oXgW>0c@jmddd`FHS++2b1UL^5@Uy%+gMEqb1
zmYb>vt=1Qed2@ipe>;BgpJA3~WyTMl$ACO7e()nMi9chDAH29-N7?GL!6;jV`JYTy
z96uNdkERIY2gwg<XlsG<a$=1i{Q989yL}lzdi>y{Kf<%HU;N+^h~L7+5AML@MEqdr
znWuu|2Ym}9L|F^fDI1X%x=8%s*9ZKxP&Dy_saWc-h4vfsXa5)D2hR?%sNB>-@1Ijt
z&Qa|BxQsyy&Dp9jarjg)Y5$oo5y7_A3|Rk#*FqcvNWVWOEyQcfW3AFc??@m(3+=<P
z2GPV1Ui~~&{J^UJYjD3+@9;Y&l$!MpeZExnfA|#4FY5m~;8UA=hjKKs#;X6@RVtv~
z!Ft}Bj@I*@(4QBto_CM_yn?y5&uh9>f8OGqw$BrzjoWJvJiTB1z_KuWaGn76E~p2a
zdpka+Us#Jj9%cW?T0iTLyqg}m!93rNf1h|>eh2FZdi<VOPd#sd-}5TlKd%$!wOJtD
zy2I~zf$<+vPq{YT^Q)NWgCEt8--Am?ipkfg!fxpa#0-(o_9cCQvJlVG{Awcg@Y_Dg
z2w#Y@5tP5H8=vDFk=JuyYoB%~zsWKiXgI28T9jFba$9s2)aH0FpWdNfkFADta5##{
zYIx^fP5nflb$ArZ{jG*kAC`*CWdLx};V|Iw|Cn@m(F({?_kc_~)E;aZ^<?;`dbT?$
z-`(+~lBn0<Q)4nNNutse_9${hT{r94n({ZWMNvG$xnC4-EJlN}*}#1R&Jw1QyI5T&
z?G(BONF{vsx3~0Nny~<N`}t|yH$clHEe2AXy@Ab?$gL}(l!nt4Z#D-05?<;m!vPua
z)5djZ=VilTwV8OJ40vi(8So1~D~;9kFB{vT-fjt2wjGY4xGaJU_(g1oH``gTj57Do
z9ZLgkhv8@wOG9sUU|kf_Y`a97jSDHGPCq;aLY3)<-uTnZbu@j_qWAXrLecwU|0sHA
z3t}cNW3V6wY*P4`bu?Hqy}e7KgDr?h<BQCKD3uwz1+gZ>BIj-r2(Tb#{1l#+1O3>J
zXp;pOMBVQ~`bf`*3)Q$K!*pn$#Q0G%HLhlrq-Vsd%n}~$<w7mpi5ppl%u{Mfzxm)N
zd~-OLcF<Z&H<boyM4w-Ma-%NpJ~|vs0@Khc#4pLpsT+KcB2rEz?k<#aT7H+wno=?B
ze&>I{rG25VW$c?S?Si9<RXHVo2A8qBv<uhkC_8a57-a|kkSJr9cGh_lWkrz^&rH|Q
z*21`EGgMQ{SW@C8u#wVAQy4(HOS^epcy3mjiBsYph~&b#w2$L)!lj*AV563%pf!k@
zfYs8&a9awmmP$BdR!ghfnzV1KiM?<A7hT#9ds|d)y0n`PDJo}$bQzLXF({-z)+$U~
zbs(6u9q625M6g2Ih{?7^<kCLfCU%8%2i7V|UMGP7g>)%?Ni;6)QuM;wro?F+*Twpw
z{rsbc+L+4dPmCEQWfb5ay*5+#j~3u2hH9{XG=Ogm;~#l79Tby)bnpjVo?UMQBmGMC
zdaQ!BoigQFVXLx6r)jb*djIH7Ecf@1x}#m}-}aBn^;GZ?B`wNcY;-`eC;La)xQxL+
zYQ9>>*3f;xmaTu(3*%6kq}V^I39my+%%bs+R^Dj|&otw89K}@#|7aLCQ~X;&LP_{X
z_pA!f!b%IVe^h*W5%@>jS1R#Wb+3&ga$q2;rHD479<A_-=-E^YmQnggEwMDvKY9d>
zU;j=2sOqB@z3cu_+I~gvtd88cj6of>TcPmr{O(}MH0ozv9o-3Mq=?kfew?<PDr@DV
zs1xQ0>nB*N$hn0C0@P8?^6<1A=pQwjN_Z?P|7ckYQzf<E6v{s;J>Abgs*W3(*lqoz
zZEs!FKRUL&u>Mi|WxBZY><T7<5t!Y{FUiWO#b1gTvkI%6_B1P$a+-<d{>o|TBp~$P
zPWcV!N&pHd8Ge1YVpaB!e#d3({?XL$b(C%15sb1QHb|7QfAl<jk0MO@)xl}hW~R1<
zaRJr#vH3@Pa8$Hn<Dm>7-9MVQBs@1O&BXpuABg0_`A2Q>IN=|;x7(<tq3b0?*+07D
zNKvSzJ$J;cmOjGLK((|SJyJy;v$HEap_1Y8oh>Ri{iC_at7s*||KKtPg|z59g^9D<
zf=T<xj}j5=AAJsIq=*#K1C3)>NLOO5qU28{5TKC$hVL4UfArpDQb;x>!~XtJ2X0U~
zu$Glfb~&Vre#hSs(OIQEJ39Mn%$m{;^X`@-=XBIjEiEoi-2aVs|Hpp!=g|GP1>QeF
zyMLJ9{g2T7r2_ANSiAo&zxx}}{lB89U%pRZ{_A$c8`C|a?dCGEOI9^!ZbAvH<2D-9
zP}4nD4YhA4e)Qfok`g8&xXAbUk~Y3ZDiagVFp%>CKISIR;}m@KCUp6>Hnc;1?y$i`
zEIRcbX_E~|H#j30-gCnuMU_2&msCmJ2-t-7?xfy@sq(6V*r<|P6jW=C><!4fH4>`S
z7jlyzn@KrX3n4*Y%84WIdNF>kfsZH9?Vw0IwMRcjCs_I&b&5cLzSU0?+kxQJL?eU!
zbWo#wfqNfRka6U6#mAVCw(!<6amF*q{|L#!XM%r&OOhP&Oz=o?<ocEsBXws0CyTZO
z<LPq@2W4A^72{Kwy&H>S)WMBj8h#$SZ!g0E)S6=4iw1!F$3h1V^<6|SjoN5UBNiug
zgV9Yw2Dz*m-`I&0y34;Ba~Cy2KL~9girD;FXKAmX{sW!=Can^tEAij+lfv>}Pu$9|
z^jQ4Y2}@LMv}C(|Ul)e$=AgGvsO7_(gVFitO8E-Rb~E4##G(vzL`b$v$FbC@7xhl`
zg%;bD$69~3y9r%n|0>)4-a$deWV`zsg6)c7#FWN%t#BCw+m)NIvt65Cg7I|63Z3oF
z?JZKao7TXO?FOKNKfix0Y}XMRL5nln8CwKoz;<=Mj)LuWLc-d!UFi6x<UgD@0jYKA
za>0PfV8BaXZimh23@KX;c=c{{0`a7)r-g=l_Df>5+cCb7XFf)$;abnr(KvDwS;5`4
z5p~u~ttP1QlTn#S8xC|<(M0C1dX@<svf)5q>n|wZ#d4k1E*N=Z!D^W7kYliV6YZj5
z)mt;D|Bzz}M7yZXm;#Tb-kwwH?Se7*9*fT&7wYX)J@DBZ82cpnj3bj<aY=k8D>oul
zZ!^AR1kDlZZTm*>8AT>ZYgls!K|*W4m)ypY$rmuAH^Rsy?W9$1=x_(l__<JXb?RC;
zOEY|^`p+N5l^e9&zH&p~NnlWIwzH1Kb6FBfn5ea2ZYZ8Jn|4qIMKv2wLbeJX@g?o{
zqTVdfR$LlZ-_2NJ>AP829>(6x#;vUHG<)+IED2?Adgl~I@qPV;QhdkP2Mg4E^dRxO
zu;Tj!E=Lh5zI$*ObgYW+GOX3P@q}@+>t9xUyD}7HOvRU78w^<*Gp01fcNZ>WP<)qu
zu1n0!b-{Q_{Z8VE?akt-11v&&bH<H+iZA=?SnbWu*hne9BO6V{*Jw^CCbY3P_e1d7
zE50E96ZMXp`G_Sl)RPwr9=r}Jul@v5c@W9lX}U*X?=1U$hZaeg&LFKv152u-uo3%I
zolOjvOfiu7Ee;orVvEATYx9{dT}Q1AC0z%o$8&dU{Xa~UUc9?CGWq&rO%1K+ZjD8(
zda#FNMXu4=XM8Y{6eA8rps`pyu|M2cth>Bv*M-xIG`=U|u~_?zhto+1v8J-dVtJeW
zQxmNC940oYN&VO2-Hb)}fSB>J=<qjowvNBNAIN+On6NwdTS++|!)rIPx8kMTd09`4
zn$J^w9PU@}aYnAQG8qIO9?a&<O-)`gn!b+5T&z%|^EBmH5$8_J1GrX<;%y6B8e8Xr
zGw*|rY@Kt+cC12Z{g`*X#hM*WNMS72YqLTzr>P+^7K><yX8WW=5Swh}OZs6fCC7P6
z+EFkOCBF?}LT>rUG9iyTTt_&(5pj^D7+wmB2)x*GwGI3mqKApK76(y}(|lXcQX0P%
zu6r*O&@()LI$@1WuVGmu$n71nox2bKU%B)#EH(8N_Y930UMjA^R<<9vFq{0pjT@l}
z;W9ij0DsB?ubbPDG!eOj2buhvZKwerc^IRaRQERz=zkp_5RXJTp#Ra2b%ix`WuS=<
z#JxCQ;)qXExEF4a)L7w9Q_y=Xb!cn0{_*QHxfX-5{}t9NEtPHp1P<EsABAECe?7&S
zf)=An#o1Rx)>ZQ+1LKnqaeQ)ps^<jGjs}R@rkI(&o?i>qbva_<X;gE6+Lx5=BIe5P
zifR8i?oV3lx8p`8{WqD<u1;R@6rC<M>f6`BVR2awtoJ1??M|Z_mZ$rctLbxbOau{6
zcpUQ~<M(ZF{e)I<4X8fauc-U;dGa;*jG(7b+9o+L+!;Z%i{=)POi@uvpqCy-jCgr<
zixHu<Xpy}$psyFsfZAfywak!o1K>fYDIXh3c^w3E4*?MA@^G|(agA14vJ+G1YKK!>
z22cOl8R+mia$dmpk{w<6Q$GE@J6UbfPhqu1Rm)DQ#Q$NXG9THWD~lAE=FhESh+O=T
zg`eMNY)zrty$KbTNsEJ^;fJ(yXs%(}VSjG%&nD6?R%t>zg+?4d7l`YUY#}bqs5cwB
zsx$j@v^-K2s?925GcYNk(3l~i1Z|Z+(~d>yE?rv@py^DsHz9^`P~*90YU7`2bk{dd
zQbcHYe!E2?4)CsX`n}q)X0qe5Iyox3x8R0UN5%HZsg6o*lO2`3^Z`CM<mdJ2^ZNLl
zK1k1P+Y*ZL6#eOG-_q%E)1OGNJm$<TQG=M#>6tPI=WPp)ma3f@d+JDHY<_H;|A)t}
z97T_%H-JXG1nnnuV0J=T{L&_svq}|IqEC?Q8L@%7TPx!>7Gorwti;GtN41u5f%1#&
zZ5Z1)oNPXRs?)QG`OS59e$*!zRG&fmohBW0UqZSg;3S?7Xf<L&<*^h=aH^zNdhrts
zLY>AVXZ$U=T6asK=Y5gsd2S0Ij>S^XFIIFkB?~7QsEco#t2cGzbR!;jF^J+EIW%Y&
z<{Seo_jco_rKE>fV7!yVeGV8fDj};yoFAo~^!vLCj+OIl?4+`vOE}WO9LA3u=<Vp#
zo~*l4M|~$IP7cn@ZI6MIZIe@1jJue5Dq)yim0-ws#}!0wDlrHfW3AG?Kbk;5inAhq
zNpOlY4|FDUZ|VX8WO1m$E+Y>DJtOz2M7BLklHZnFJj2@*l{+ML5x0cV8RZdC-gk7v
zf8}Buqvo5>BtY4y8IEGQ!WuOyewBPxY(|Y*zw=6yHl+=CVGyZ28NvL2>^%#LV6p`~
zr-{W19KkeM;Rb(JSea+B0#(HtC6VqTRyYGpDp3=8dUe%w#StYJ1&2}2%$E2YM+~v>
zM-zYPYL;>Kz`!}i{_|IfB9+l1`JrbO4RxUgRqE?F#l!4yBbF+5SS$d6@hK!-#oX01
zZ-wUyRlC9w%-Pw{b{t%P0Q`VFku$ed;$68_afU1UTt9n&Y{C_p-T|2YO+L57MK(ti
zfsOun8K^l@t`OWGd@IciqCmf}@#I+&$g>E@53W^&koB-R=+@zW3E$ZcyY)LRNI_b{
z2uhTn4%L423SE%59<_*GBQCqCk|EBOB~tK+IJE;Ah$1DUZSn~=(dxxT7m}3l#^M3e
zWEAti&nFLgGW0B#j_L6TGzE;4#Qg7lnE$<E#H|GJ$O0XD{knJ`qv!1kf{vU%gff$r
zD9oX0mEoB6CxVeBh>;fIk{F3v52HZy*r~c0%v(TWkmeq)A^`JN33ES|7-s#P{ex*p
zhRfoZy@v`V7Cw{^yXtbwf`nstA~?pO*7~7SE#F3`K+C+_KnaWNJB+X13}PbPpEEso
zFj2nIdvK{Z<-eu-R!VNnXwt+nW;TuU4~!%()#xYCUJCU09=Y7~_p(0<_V>KmHvXOy
zH|qXg{rNWj-lqEs>F-T^gK<vgiM_wK<U{!y?C-hYrxcmLcl~7+(qr`Zc3;ZMkL0HG
zxZ?Z%oBrP1Mg$)rDQWti>F@o%KyfJhdsiXn6@$OGX9{C$3VtpG@ip^8`FrI)kVs>H
zZ`}$LX+`7j^~XWbYEo&M(E>I7h4A;1=~QXy?|mtu1b^@JYeBpVeq{wor>m1S2q%n`
z0x|C=5ZUne%JmEO_vSQUf3Nhb|AN2w<)DAT-@9tEt-n_j$8|>P@7)e#TJ-F$m?6Z~
zl~74RiBVsTtpnS^nQk@}ccr>o#5r<?QgDi_jwSde6^<TdSMu>83dLZN?x3$-XK@NS
zX_P2*(}BrM`GkK^e#+cHpGQ3XRcvbPfOjFi#tx?4k^P*F%@IBz&(8vFjdbV+2eeb9
z5qPyhb8MizM}+hUKE}hnh$q7hcnf{jxYOIp=B#lV{~!ZU6dL9e#_37P^_8vb$mvdI
z1JG3k5n<<7N+MAaf#Ja=NhC5LqQJ|2lXSd1|9ODrEzt5Vp~d;ViI(mrS{!Z)<lCSo
zwCxQmP62jrc^xw?jIZ8+Bzi8E!kss<lQJOcNx;B2&3eU72<g75z^Cv{^No}zC`ocO
zh*4lG(0?K0!^Cf?o^|P-pHn?2xi+`mJA&_OAfx$Qe@KxXX*5vuPs}L~+TFJk@vJIX
zkm?!{2OfL_Os3uMOKOG<xW)j<yEu3VsV+pfa!wF0B6s?dRb+tEO3o93M(%apH_;FO
zp9SHc--!D@+QQ9|PIJV4AG-6TA^srFbWUC27TK{-O^Nzi5hYf1Gn<ZtR%NYxs`45%
zNCAjHqq&AlTYm=^sM6N3!Qw{Yg2i<7Pe%OC;9&8y*k1A~oejpO%L(J@+KNLQIh~jd
zo84Froch-1ic`zuQ)34%iBn}{e*=lc2+EH?d`ZXDpR?^THH}zx8VcYfrlM+x*CDQ9
zt9cm>TP+O7Yv9sHV&T%`I73-+sYe0@*_(AQhUZevS3YKphTyfYGPo#zZdN7cpHCCa
zSsl!o--nnJnT9m3HXu&6f~mxr%|_yGAqSsj#uWF$FULw~NKQ`d$2USl{(--crhtZ|
z@L&|2xatKRzr|(+;g{6&Btq}vg^JFy?lUEE_V8GxaJNKV3P;b4)v#Vyk0|sunslR1
zl#o?-ccyz=ht`kyGD7`GkBY0Mh~3nW-zI|VYJRG?j`d?7T*jauZ}#d~>G!cMR^BD7
z)cn@ON>S;@<t25@v@j;W0?vCj7S4MXJ1Nd{NWefpc6>fO=cO+2*N-K7#I7Hq96f)h
zqp-)D(BKV={|5Df1?urpwa=e6nh|&VUYE|$7_gSQ)t7WT7J~ieBV*!|#%Tf_sv@ma
z#7)72QEJ1qTpiobeHegkyv$(rGOQ9C!QUw&mG~QuhAxCkd;`n<nb2p<Lx)z-LFUMn
zo)(=QV8XWHn6Rx46S^(MN5^!rkqVR;&(<J7P=>;h^B<x5nkRq>-}_iGA$N7`!6h+a
zlwBROJvv(UzaNa2t?2sUV2jXysDmp?{a0K=4fNkj;KHm}xbQg6e^y-Rk&powt{Wc>
z7oIkrgK*_m#{w_}XtKgst6~QhV3ZXh_u%PHS()Wcm@qTYGs6Q$yegr~QNtGV%y2}w
zs9~t89nG-V8*jxSV(`n}$8y~aykIPQh)Uc*gBwAEu#&m8DXjh1G@Sh>YQJslzrhw8
zP>O`@zYE6G)s+Tt<lG~epm8y>|2|Mmz|q|uxFjZsGP?WWSRGG)z8A~{o6&m1Ou({f
z%9kb+6xLjC7N;>l^ajEO9FDBWexig0%)VdU;mKYz`%W7Z9toj&xW6$leoIzdx@T>w
z2PU8J#}>b0`C}zfWJ4z31(<wvGGt)AIM<kZuz-Q-29s%b`;u06D|*GVCPznZ!n?u9
zJv~uJ?zdkQ8FIT{!0`XreX(n?JW%m8yZ^tYc#2gcK=>)1H5W?988^;QJjaTsFD_$H
zJdIpBS_ZrwjFv~ypkpbX6gV|SsCbs2(@@jGc+~^NGbk3tGaMT!4y-641I2UynD89v
zZ~mP&5<2-Sp2;H%qj-KDtpm5xbQ{G};w1@PRy->{D=Nj)?`)zO0FOcO)WLFp#dG(4
z|EA(Oe-)A4GPVh%;#qMPOi%;U0=2YaFI>i;c<PPP@ziHpFcWlnL6=R{;9nGxQM~N5
z#sJYP9ygXNvacdx0majGWO%aoQ#_R(2%&hw#RI9{DwPhC#F5}9Qn_Tb8Ty21NZIv>
zIe0r!ivtC&aKsSoacCe~p~oFyb(O*j7A>DZ1L1aKYz85M%B3ASwW)B%vxTc=s;~(%
zTSUjEQA#Dvvq(;Gjx@S-*K<^Tu@a9R|7I|Phd(c0g{v=`V@R$<aE#R#+?>82$2yye
zC*tZ0UOEd)O$I!PRIey5*lsLs2W;i?kH-;E3=!AaiyRo4?oGQUv=vA}{tyR7erQ>N
z(-;LO(X?U=tV~kMN+FKXu;Xpk=oI+UJI!i%NWs^}T6{p9Nk@T4kGmI!GlRY@ax1G|
zCqWuem-j82YM7R$Q>bDDd=H7hiys9ZsxA+9JU<Z`ma9)_m~3er9s@mfH}n)0>Diqj
zsmB>bWLNF92?sGNj(uD}Num>J8;MaQ7pFcOih~1-Nz*;Q8SNmPb$0V5mE)3{<qh&U
zc=r#-6^{6lS~sL@{K`!45LDV6<qp~?ovGq@F(97_&zsO}3ZX*ppap-`_efN*PW$V_
zh;&*6#r0GE@zZI0B80q(sdvJ1lcjLt_6cLi-K6bL2xlt&qH`iL<FV?L{}>6>(d<VZ
z&B_rsQi%<*kxe6QkTb<kua!hJDhxZR(@y;Z-nkb!B2A}F#AOURt@ATFo+iI)gQxN1
zbUfV!zeN^KgmZc$z|&Ej%B+&_usx#1)8|;OIHa|N1$5ekp`kcJubAOui;o%2+KcFH
zpxQz0a1P`{T!{MNO{~l)fHx4`L!`{8UvCvkSkPDmig9{=x7TFNa63z%4@JPD#$+mm
z*)wXs+4^wOm$a%LG2gOu-!ioCv0f_;X&8MS;iw^uS}rZ}CVVp~Set$7maoHp^1zv<
zFpSktR%J{{$22(@xw)d2Z$7HAAZju98UM5e)rqn~HttRUBI&Mk7g1>Ffw(@jLZjx$
z5DM)zY%kMV&*M)nN8M*kufz(C-_ksh(%UTS7`SoitCl9?*1cj#4iP_c>z*t(k31W>
zESqu1+T5FPVWN#oa16aQOiSE+Irlxh8#?$NF+2F&AYakJw-i;B$%4}^(z++ChkWDy
zBNl76G;Tl-LVi0|W06)2sEx{DBgLd|3lN#rp@*Qqwq=FlR$YG)#Hn@f1qfo3!+c2(
zwx{^RGIVgpJKX2lHuM=!+{yM^>O$a61i^Q+Ry)P@qVYsGI71?GOx%Gtv=mv^%fVdK
zdyLLSH@;hFE>fCkJ&tZR?ZU{pNG*Q}%S}awmIn&-7@wr!^(Y>Ak(Spc;rNEafwnEL
zLoL3cX=(DAk5?d&h*rJ_=wolQmWRPJFCvShX|N5rj6s9FK3GT4rWb<|v~rY=AkW)H
zhM>9!3x%K~I7V49hfBgD`*vg1pin#^h}vI|aVKB4lNe)XkkVOd9}k3rIt%4B7fn2*
zypo~3F3rHa2qKXa_EA6=?qz8+gEW`fX-)-Fm20uS%JdFTfwd_3EXs!SUj{M`S+aT)
z94`Qew&Og`Kc~yQ@7^*oSyT<@kKkNqljUM;IIm1+LyMve$pE*ek^zZm{9zJ57vsM4
z_-&+`{Kk;Me%u55aT(c<A-V4*i~A@&B_Ez&hM**3BGg|C?pu2R+*b+N8`W%H9iJQb
z-~-~nB)%!)M&~L|>bUFhd??(dj*z%}AA;E^4xdJPUfopVg2SClGCMaD-8`+6B^u%#
z^SE_eYu?knCOBWpYIxhwaF=Eb%7)gWPV?Bt2Hc0;6m#x-QGp8cSZWOj#h#|hlJi)0
zK-LPL@+Hl6lCE0WAdj;$avoS+RC%J6>gy(?vgqhYN6r>jR`3td{1c_i^-_OEgmk%n
zm|$bdFM3wufL*SUZ_@j5*LfjL`5HJ_x)7%PzI`Ugu&H&}C_!y#vd1-`B>{QcW7b5z
zy5aPc=y5$7T(%VCY0+RQ<GWHslR{5PPwU<N;FSH(`{}8gm}?b-o;uc7N6$?j8}wW=
zOh?bo*NX~0WA_#cJ#DdEvBFjf3wT<|_(7^yMYgI)u@|JF@{LUpfuX9EE>f?=hrt~e
zDl6_8%irqVTB<A@q;tkbYh`$A{5TX`=W3b~=g9dTqJ^1}?)D|!gvBVDpc*;%=UgrU
z=GzA56D3Q<qLYD1PpFoy4&wss5BYd5=&dkCiQ=q>ta>H8g5J<oqq1Xb?rsuxIVqxY
zu!-Rs2x(Up6(BiVqi6^1k6k3rWCg<!!*d6GVsDL!L#Q03NDG%n0I1TdA_g>{*tUey
z1!lQmqdnleCz0k6_F);^Zp^_Y@m-XqEYI}T`EIV;4nLWmeX^viFPs#iEsLb2tlXbE
znxePi*Y2`duRDVy)AJ8de2ek!<Kel_KV_9~+y{x;y1@xHL-Lr;sVwLeaYf|LP#rwf
za)uty33N*(v9or$c5+1GD}w5Xl{ku7(=L(vpvn&#PbWDGd(14;nQ>c@CuaLv)J@gt
z=A-(=Th!eaU%>u)U>B(S<*|yo2?@qexFqT>hz=uf_EhPL-^SQcwj+nfqD_K(A;}uU
zOBM>Tk)0J)rKFq(+IWXf($S?W&TqHKS^a*{^5(~)qvf$pNSWAFr-zuY^^d5wTRPbC
zcN(_fYCFW1f}AE)J;+%l9=1H;DY>zdy`R^>Im|+rBkhEGFJID?RmuBNO{p32ryMTo
z0gi&Hw)9}8;>f=@;gZqPfTj?NU&~^gdI@e|`~GE<y`mUc5@0&3Y(q~R@~k*4Yy+FV
z-albc9a~5)16;_QYKayywrLLl9?F;th>KfAc+Sjtv;_p%dS?46+#X@fp%d7DLZu@o
z7u!p<G75jnr~h@nq-o4?%c-~7@jt+JSGpA2CF5qJGcJkk=*001&sQSg7P0Sh^`koK
z9vn&XFrd-&F0>C>VY+hgv82h}Y1Fr@ydCZ_VEK&PyDR&L>C9$RX?m%?Q7zr{=~fGE
zEsasVVFGmHFr7`?X}~W2$m@xXl!bR1NvrWoGjbUgP&M_8+x`=ZJvEO9afp255X36S
zXUj8tV2;n(%4DQ1&G7c6o^A9HL_LgdL3bU-e~$>o9=itUfPU`<5_n|GXCXGQSVDR<
zAf1`p4*q2u_?KhpQ}Qnf>!JNi#Uw3pYPDPxR}dqYocm4V5+3VwWdq42)TzN-@@SCv
z4=$0Xjkd{%-yPI#Rm!!yRO6)-8VEVmC&2WQEvD<vSUl1hkB4g7_&a#y#7M;>Tr+gr
zD8VCCtQtL!obIaQui<bT9=RTEJ<KC)I2dp-B5+G1;Exx!Sp0zuQ?y*s9?KP1>}HVY
zTyZUam%aNF$Q8dr5Mm`qu_=u>oXxKb8=|ht{<I5&QV6L8V_2a11x>ZJuMWy-xe<Xf
zj19)m0%y1S)en@B+F)vV2`pDOSbY+Ao|8s$UCf<P7p+baDT)}#BZ3s$Yym0u9id3E
zB0e={;gZC4alR#DIi3B84!F;rkuroZ;-_M#Jt3jTDz@7M6MErPEbS6ndMF4K!&%ds
zils&N1EqphtlHu`9N?_zv4eyQ{O~EA!w_QX0)ntA<jFVgzL^YP=;wXpNQ#V_)AO4z
zsrs!D)D+i!y)33K<vH9h<6m^Ro{D-hQX<|5q8G&2W++i;Km|r)bPHL`es;WRm==06
z()6Eo%QVW#NRMK%qEM6|0c1Oj=WZeb2-n*h^5EwhPH(Fc!L~|3;AydXQrdAMe-Wf@
zC&<6;FChPZ!zl8oO<DQ`9}oi^p;wIDam(tYaPq~|Hd15e;}R!4W$E#8gp<NgS#sdG
z=~$;MZTiWg^~X&hz*qSAp-|K=qE7xNo9!|lu1CW6rh?L3ADx-ozosm9!N6kDw~)e7
z##w9P4FrAn%C@2JPH4wr`lk6X##M0Zq%t!ndkzaW_RQKM)5;S=KxDLM`9@24G^Jvq
z+mz;Uh8z+jmrlS^t6x{j+ar?Fjj1(F8GOX}?k|f7rKkG+PawkmLzE0=PqpSyL4r|w
zs>dEso@%|U08bU7;<}y^YFt>iH^;QzP*^9dHyUdcXt}LXayJNUTpjnq2F+r#^;Fe5
zGKUq!d1(hMRkBw7eo$X+{0Iel33)|!mYg-+pk2nc+V~;4T`IF=b-*ZYo!he_*>^O&
zD9V}lbMD70nI%;R2HViyA3Y=#XAtd8hi@ZoV6;an8<<!;5R#Z55Q+k|E|nJRHE^~j
zGHj_}5t<CO^*)@t%<bbAJ#s{LpQ6SJ*CR$ngPHe21oB|!aPx}74ra!@qL71`$6<Hr
zFb>C`di|<+ALFYd#0jG2N;WXhUk^_BBTI1tH?UlT6cY*KC>vPzchC`e?Vw<8NP1M_
zkqdKv9c|(fg*lQNh6dtL3mGe_mISP!VEB(33T?xYYOTc)+VW;tuDD{Oghh4}<8u5U
zMG1afA?yy5A5K#d$Ld<8;(Dq2D)2@!^G1&|)clQX@(*QX>EYvW7zLkDS#K1&Zv1@=
zUb3(tSiXN!8haED^fUJ6p!<ad^#{Z(gW==^JB$=O3kI}bLQ(<D0yaMxHnU@`25ijt
zqBn?izL(l#%^KnvbKZhKzidaJ6}#ItPGf{8l|zN&m-Kj~_XFZDW?f=9epz|S=ac-h
z5P#b6i~At)i^!b4j$7bJj$aFYnLWslUk>9k27dYa9-Uu~^$+Hky<K&F`QSN1CEG9H
z7da&o{Ng_)G9rG-_<>L;2SrBDFXgdZ@yk0VEQWv%j6d5)#xsbk>@p@pFy^-;f)1)w
zWPxhZM_|1NRnv?5dulnRLv<E+5kXLz$8qb86a+m<Eyp3M@6({Ju*i)?%qmxc71KN^
zDcY(`&$2Yn8edXA%0N^`?a7j872_d*8~|)eijV3a8gf_&1;%?ya?~{4+clnx_i+ea
zL*jDJG{LqJdv<TOW7=RB$(!&?-(cEU{fK<`1;pmLsAZG1F+|aZ!~GVsc}V(@C^R|U
zn&DS_l|~<(Gdym3{WK9bHjZ>em|6^;&&cI(^7AfmhAy&NaOL-{#4*vP3_Sv;O$dkW
z#`bGqT$0bWm0&09`$MMe5Mf2`Gh~ES)%V*JCX<$w0$>pS#IOOX?|;d#nEqjU0dKSE
ztHAX0`;*VI683?^T^64kC-DKXJq<HW_xursC||^l9t=8A;mJP1*e!Ti=lt356r$q%
zV9YAPdmqk%4$t`kYehs=Ex#1Y73Y5-0FvP;Ae-rCWB=VjtS`|UkT|EB{Wl=vk+(y{
z&Pg1U1P*cuP2$K&#0B!=zNE>;;k<ix&`<YDy@@<TIYNqu$4W*4^K`tO(=;<(y@O#s
z1YJ8U4cIzq{0zaIA~%trf~?0lH1lHnGJ<)`({QF^xdf!iBv5(#jCp7a19(_f{tTe7
zszd=~pfXeg0i5aah>!$+tCjEz)6ymv@icaqoI?3PdU&6`CGl|4tRxRI7`hy6^70cF
zoA5yf8}I?KNtEvB>*+eaHubV`?^ir15ymrr$HSeIOoCjXRFA^~)njBM#cD6rxFm)K
z!h_#iY%<CO2zX>EG*^I0X~QFkT#<Tw8ojR~A4vMn>K+R|?~1-p_iRq(=^Jy={G#d~
z8(t@i_iazw@q8t!s;(Z6B|M+@VG+Yc3jH*sS`_Scpe>_|<pk!x^(1uZjWN7C<7p?n
z3&~h9A2^}JQO**Lv!ZCWQ0vpMUcz4{woe+}(ccxMPps-JOx0);ZmLEbo2eQvSnNY*
z515at@uwA;cmiKQ)wKK`Je2u3@z8R#L^<47;dA48d_X)@fp3b~inzZGBSl&fFZO`U
zqxw_&^_I)PnBn5ZHgJ6;W-ea5NSmlIMS#kH=u<j=_jekC(u^-NVDL@l;ze2?ZKQZI
zc2nA?mI))URlnAu*a}Gt|B{hiMzyN+Gq<JYUPDFqqKgoTT?CeVNyU@7o{_9NH|4@x
z=iZvu`0q4FyT8+WtbqEUx8}Yv!UQy56dbZDO-DwtM+4&=nV!9JJ{%WSEf`F1NukrM
zi%&^3qa2|z(Fs9WtFj8b5^3w6LS<!akm)%n*#!m<*-Ik*W7JL}Z60vy@RMQctU6#g
zGQl$4o0br=)UmF-DO6(_ojNSF5^<0AK5MsIHe7q=nP&SJgR%bUp&U0FZ)x1}m|&~F
zqLXzOxEEtNOt+@pB}@5x;j&)HOV2ol3N{%jy-TLXvxfvkO2gLz!;7KQSEekqf~B`A
zue**?CMr@DKmf4b<nm5fu6XB=1Vi?%#?AOGYLBqhjv2W;|J&FF@hB=D1C)~PKX-7t
z4ptPUjn6K^5EoBcN+Q5j<sPA#%5q3#I!a6m>fz%HXn29)l~s#B9*>U!Fbt8!AsO(}
zhGQwr@+yQQBrROaDt2CK^hIYt!3HzlV&@<7c^GU`!iCuB7(>x4uzeI9dwHsgh<EH7
z$gOzow&JxNIW!{EDE*{~XEP!`FaXbbJTd@LGORrp2SBTIT9lcdp;)SPSu)-z!-JIi
z4&&uY^j_hTJ=Fs9Ia#Hct42_=2UrX&Oc`sj9nDExOb??cJ-F4<W{(onnFf6|y2=$@
zvnUK7(A~=ZFdxpw2c+JJ52N#>o@${`^!X#f7X8djX&`d+{c-p!(m+J?T^fjpzDMdG
zOB7XHpxK7D(f0t<G7s{h0>8|+c(9KE0((y<5Q6`xd3Z)lT_E7Bj@jr0k(-g*gsZly
za{K{RTaTT{WToNFqMj9WIKy3%U6v$mqoDmO@j6cPNP&)>gMsTrYmMbDmFZ{N4zYGw
zDn;|z*P6{>8=MLTux4@$#B#|8oDn`@ynse6={a$~vp8w$0(V&kctd`LTjwZJ8?s+T
z^gG))O99rNe2K<eh&x0jKtDh3sO`@k_jIuwC~|d;7x4jc2eC@@k)(T@s{GW8or00*
zx=SLF%PCvKRVku!$^&2N=0?P9!bz%6u~w<7I|Vc(`b41NxhCOR#ViZR^c*u9Ttf0s
z#Flb9CuZa}WE$uVma*c`i2M1HZupy+2cd>1MNJo4Rjww~c*OLH$g|Sh?_l2H0xNGq
z<%fd7UM^jN{XO8+SceKX3HArfDprS^gjV=URx@&Eo}a<bgt5@~OC9_zQSTPn`5S8e
zO<1qR=K&vB$dap!=4F|mh+jDldI3=460hE2{HBb|c<d3tZf2amt}61f>1vsd?WLJF
z34hZ0SK@t)y+2!K-ZO+MQ{&X3D@)^?!(by7yW<(XO&%6J73GZH5{-4K*!%$-tDr$!
ziCVVe|L$d?HVP{~7_Uk;zVd~RS9>d7t@cZGsSWPK28y@#nNZ1YZq&Lxh^h3johebd
zAaWZds&4Pmc~4H)Qa<y^{TZFcA<X-X+@XmD#nUmG9$#kEI_q@phQP#B8|bAdjc(6v
zO4G0G7#LiDG4>4_UsxM)smJi1rZ&QXT8p?$G8+eK`M2P~91AtiLF)PxR8yFdJJ@4;
zqG&=f<`ASOQ%n#lfK1quiQpTGYyZKtD(2P%aD=9S;2Tv7=^Nds;@UO}=s9dJ8RjYc
zDW5)nqh+3PDVoVFRsES|JtiXg73WlVP%ulB#W}y;qA-@!fiM<uRBvLMj<Fx$l1SsM
zP)DE*{){jd+YxBDVy)thA50)1yigv$BoZfS7e%)7SUQSGL2hr<<aQBAZv%~^^1~TD
z23|(8lFB8h7BvdPY;+@C1but2jR^YmP6^qWq+JF+YKou-HSHqmB{EX*f@vwqDl$NW
z6wF|P5+o8PnvGTNQ(X+Dl3I>TzbG-ZX11Sxf%`x$+Jak*?P#3hDevMSYZz+b^1rB;
zXc&@BkpCq+&9<vQU>b%l&&2x}?sdd6qFzFt@5IS&N6z*BEHkx(iYt({ZTx{tve=@W
z5dQwnI%;>e54IbAZYeR#bHZPR3nA@>$mfKIM(S;|bfnrE-fFR}YB~9d#Xh+bAaHCp
zGzelITg8GZierWgBGOTfYKs!nUCR?8Eu|)LxK}}~K%Z1DD4yv#LSJUqI-l;^dy4Dt
zp--ZePL1(e$n8UQ3~6VhPo6*{4eJwj?rV1=Bl4mLSzkn(Z2s8KxsOJfyosfjGNCq<
z1?azG>uWP8?&57Im>E+qB$t>a_BP672)38X<o}ra@&KF4|NjwV3zry6B}=0elAREv
zp&4dENQ<bH78IE&6(uuM-NuzjX^}Q0DJ`}_ghnMPk$ox48T-DQ`Muun^E~G~_nvd^
zoOADdzWw}T&b{|J=icZ2yx#jlW(!+0hqQsen!=<PUN$u^F0(f^uhHEkGC0X@87>JK
z9+T|GG?c{pCB=%0#cc(x*vuaVi=hzA{Od4s+7KkJ?H4xl4}EF^LH>S7k(?VWKoBHV
z@xVp)x!SX_IB}M&2q{18{9OtDbr?>f&#$iZ89CiLuEFV^+D7-eLEd0~HCp2C%a3bN
z>b98Nu8>Dbf$A^feGUQmsRpb+bac24P|iY?Ly7)b)Mn5uj#x=f1U-mq&M(f%T8@_?
zYGRUdI<BPuwJi;plL-r`au)&^6%dg!L?1AtL4V#u)Z#E}_-K$?7eP!|vLZ+&X|y~w
zok%9)tk8|^3nSFU#K+9_&9NR7p$||ciC!v1ozhbCiX-EJ_k#014q$Iwb)SxG`bYq`
z7m(AN3-Sn*GdUD+HTw=eY!WDmCRSHXCACct8nNG?H(oIO1_9R-#%tDBX?J(BRNaDT
z$IY24!-x`Gv7o9iwG=}ofU^7Zr<&Zcz4j^+`f~?|Y0jkHC76s);6_kc=mT0&V^akE
z(447VJv$uI$BhaXV{+M1?I`%H0-a;Aig(&avzke>ACXJA*kA|PQFPwfAI{BbUYTaP
z!%Mf)p;?*|1V6-$_ul}(9%;$kEUhlRnyo(0bcUA^&(odZeH+0U{)ei+^y<;gFIo%K
z<_8vw$DQB7ohdL}7V3@vP<~u<dgP6GW2AUwJ@H0$^{M!gZQ|)izNP6uGGlqkd+uGP
zX*c4jtKouAwC^W}1gc~jnY0d$c)d_{+Ejxp`NUVLt{W$aQKa}`S?<V>n^BDOt#ZpE
z*9u?lpB93*XMjD0d^pwrdw$$A`sX<bf-;ej1cBNPQB3caU)8^k&%Ev<jUCWQ$QXI#
z@jdRA--DAwu7pJ-xRz^gr+Q#<?D<*n)GYl-VOq9ImpIFk;pvB6E+38Mf@dK<M_aU4
zd;e^4{2$`FgkdR)?Gj;F#yeJcTtn~Llz6;nGNsq?r}41+mysr*`!%Ug7W1Mp2+!y(
z4(j72?KfN!C*nAOdrtJ`wM-SZ83M6u5<wL+e67%CaWip7zQP7*qk0)!=`o`g^)1?(
z;967!-Dne)7<LGSq^(lpLz6Mxp5oWTVEY=7`<?&JeW-wdwt8Qkdh*uGKQLL<onp_R
zGEct7i9_j{LU1uiu9k|TU>Hp4mnCO6jZZ>tXenY$+MZqHphI@mQqJn2#t0MyOZ8ub
zz3CPj74ThPWB>wPZ;`g9viBXDioIt&xfoIdUFgY%g&A_RX+#Vl@x>H#!ySWQA)6|e
z4#QFj2O4byQvpmv8*_>f;pl3KWKzmt6Au*ZGWsHPUt;1RS>j<tM~MewOxhw`5+07=
z!UHK@iYHP~59j3xRt`14v7iLI@}Irkunj$RMo_m^bVjK0zG@dbslw_)9CESt-^u0y
ze_Ct;4*0^W3p(ILJj0K#4MIh)Z6=hj#wdKywx4OLKL;ZskQIfgV0tLJ`4mI`yw*q;
zh1J>8r`!+LyU*muHOfn+eBazt=$?a}egGe2J8%>*WchG0{lhGLibN`WzO2n`P6(ge
zc2?4Ey-|D#7Cuv8Llu(nNqjFV;S-DH5^YlW)H?n@5I!LxHADF9T4;ie6R1cL_H~e8
z<59lda7kbjWl3)NUbR|G73S1|*d8}K37-VCwdDiyG0ve@6rFbk11xBiuRG!M(K{wD
zOw`h@g;xAj#NwtGZYDM3+ys4>fW{kY7KBD3u8GEN;HEa>=IL!h+<b3{o1?VOy{ws9
zL&}<e+k~1Oz~cG_Vyju!OoD3fLe@B09ztMrc-!1x$eM$;4t@)7V=+|%OXcY&c142b
z>LdSWf<{|lo_#~mRBR{DJ`0+jxQsy1RKH3JntNJXxlz~pf($HZDj=m#I3c4IG;`mI
zT+lp=weo0}6Ptje-J^OzM>|;16g}Y}Xkc9_@z7BJ%Od~?h7sxhmz3<m=y}o$)>`Zz
zlMIPdy`C;{YNq%HQASfG6r1P{8KfO9KTSesdG5`n2xoyw?19MofdAt*9=O*ARJAap
z$tHO9M;?gCMuf9Q^bK&!+uB~qylourj~hST%1Yyb@?k7-8g!0Y;_$Pe+x<B)B8elH
zPr!14Ic%RyKb(a+E!uT<&YXeL#m*VzA3EAVO8#LnN4IED{^1U5B2<V}Qoc~iT=v!Q
zx2Bfb4@yOF7+O5(h7q?Q`4*RiPcbjgE&WDb&h&dlr{BC5R{G7XD-O$Kl1hVNS_qB7
z)4W)f@4;l{=YfXh-cV@RPTO7+LTMm`60!{JDuC3x=;n{S(F%-jmY}^$><rN{y@mEp
zH7DSzP8!-^nV6Wwte3Er-4R)9%aZ(8nh=9She4ICgGwJvpC6ZUf>J)V1A1&H9nb|A
z{0CI5kV+X3uI;4Us>CxXDfHUrHg?kG*Q&B>OD9*d%h^uijL?_H2g$B{ce0C@cEM69
zyI6C}9*O$xe64?Oas2L+1=vcC&VxG1fm>~S2O$Azp}ys7b-p1Hrq@?;sTHYj-C9bB
zlm==r^m%)o+hVj=@hTzZlsAo{M-?f_UT<ov!HJY>PSx!M14U>Q;Zk*H<Ip*H4TRNm
zaGXgPr0UXw7<a_c^PjL>!lDE3MheK2xtXESOnafS6Ey6+14p%n2T5Jqm0lq^yM^f-
z`1vo9z4HqndKze~I|sZR`V(>T&^K;kbLi{DtA~E?Eynh`&5%`Dhsa}o{WWuM-8%>P
z(BFZj^3dzf0UY$h+7o+7XLGDCgmb|2EaQ-RDd8MAtTml&9{Ag(Oz7849{A$=fj?9m
z2mWomN(Ww~$+#5_4^+Z$)HMUJqW2nwQbnA1?nre9EF*E=!!=5F;zEiO^FqKRihWX&
zIM4P@%tcr(4}5pA8&KZftXR-NSCzM;TBm)ayivP&(#OKpOCi?@Y>xZ`N&dA7{!__t
ziDjpf#@6sPLt`6QOV`+HCHc!hk{nJ%hJt^S^b=G&L@HZOZY|wUeG@*++SwSu$!0+c
z)e`SMn2PrxZgRNju^L6-GV&pA&Vj4ZcW^Imp1q8aM{c?K4T;8TbPX@gL>&u%9X6E)
zw>JnK4tY)?Heo7N8DkO|$UEhZqp_*9CEG+EiyKtVAY=R_>YwnxbH!qZq`<At!Uk{J
z2%A{|k%*psgg;r>!2hGo|C}GD+bT%f$9q&dP9kS30w|=FrIkctVQvGwqg{^pQ|1y}
z`Z6gA)EjZOmRCVo<pvu9m98NO^aUwstI)+(AaS7Kv!)!65T7-&bDW5zpz-lnO+pzC
z>sD0^hczAb+&ek8<~Br1B8*(K6>)2c)akry00#8F*0jTWd<Ef^XfOQ;JGD2#ctL(>
z?dcVgRN2&EHJ%2M$0(t$zCa1&lXsN##5dAWGQPeI5wfbQD7m$*L4+t!a`Y8*^F5+u
zE|#kDiR#OMlHa$xK#6lnB2Z#el1Sb%fs%gslO>ckafXdcVF~)#=_U)i6pEnnR0J-C
zU&<0CDBY%HJ*&BL;R?w;2@Q*33PkTx;8cwMFGu83;I-Fctvu`Bi%o!3ioq|jCIgbD
zBGoTdD&=ak|0St(22zQnjv|%9_!E+(d|FN7=|lpFhS<%Q>++~BT3md*Hx)O9NZLaZ
z36WE}KdmzF&r^;+#u}Cb9{@#c<J`b*YJyX3&|U9lk>WoN(X)mz&N&)pb!bl3D$-sF
zw>cfx(EnQgrOe$vrH=<<TU{5VSCgK?pN1VIpUuevxvhw(E<2ZosMr*5l9~hf96uS%
z=%p+oF1I`7&ZWAZZD)e2Hwccvb-+D<IsO3Ajf|kaTL=c*^e%!0m_p9DgDGT#yDv@L
zeIjv3&%AsZft)=$n0MvNq-IVEaoT?R63Q2wp>dzg%q8k+X?TIa$Vpf3DcH6&Q;_^i
zpy1Qj3JTJ`nv};o4^<VXXByG{T9_h&f+!D`eYdjlmQ5=mx|d=ksRRH8`N^Q*@(hK7
ziQ2^S@Fd*9?rmD{wmewg?J3+1fTd??rH!pJD2Zyv#8P(Hr3E2%!tfW+Y0{2^_Xgt(
zbt1diS;u80$0e-x;ScKZm9>*z>EDpE5>Y{NlD5Zx!UMcMC`p`>qI}!xNSU>gb_4(L
z;vbb&nKhxgXPHI6R)R^`t8GWSkXh<FGr)HVmJ0k}+JLCbsJ;;vXQN#mY4Xb-IHw8!
z*CS6>ePr5MV&uug8C3s^_=zsdxY1d&x;lKw@by5Uyy6TWV)-CQ%Xp$vNH@@Wae_*0
zXdP^7P|2;KrS3KW!_NX>CX6yE<!ETBYjtiXm-obSi4aZf1u_#G;0IYn8r>#1Gm&dQ
zc7QwTR#vdQSr$f~k>pGE|7FaVEMG|%Wi(%M#|{!@$hbo@n13u%lBL+ziefjP1h^V)
z=5Q;`8bYoQ<QS<X9;nwoR;AvvwS_jKK<sh(ICg&D*u-$2y3X}cDWFhq-Sdj$)G60T
zT^mfjH?cfeR_)ckURQ9q)lo)qu8)n7nh4Aa<?vPfDcl!(we%?@r1YLI)G-PDDy|p>
zSWS*MQx91#ZNUo>$fc=8nKZ<C0j%vcg<PW38zh&0tDwrI*I<VT?{>j$E9hR%a*3Y|
znAIPt5Gql-SPF6}ljRbvcPp3V-L{P|QTL>=RlwbxOPp{QDvnTB@(7q}LiUyM23GRp
zrfy{)1WTfPk~`F7@)Z7{K*%cx@kc^-vG`USd>u9<P``$V=5Lk9mrODmCv~nt+)nN6
z0!p<luX4M5SO_lMN>0i03SOo@7sTznj?x`~<pM84FlGKpuF(HVDm8!4B9*p`0DSt^
z3X)1wa7p0tD3u<GQR(zibsIWOET__`H*A{1q0_}@qN3AcESFO0cCi;g$6NU&+)Jg-
z>m95t+x%3LN8CRUA=?W$z@y5V^61V@B#)4CeJs&`)?OZ^;^S;p4I->HI>-P#gTIsX
zj!GD&?OiT?nn&c&Edb=bUSX9>>&uA8vRs-(J$R#QP$n5v+u;hkb1#?V^1P=FoC#5J
zTD$yrl1m|7`J(Hh6fmXIXj3Yk*0xVFr4m<Z1=@~z2CyoDHi9xCo*F!Ao$vwyt1O8w
z&n+2CUgZ78Y3X#il{PnBDroZ#v=QyX^`Irj7WXs87H8mP8e5Dg28b3s2-0ZxEI=T&
zb8@(S^prxH1f)`HLyFQl*~an+o;UX$V0z<L5@!3W^WGR`^#_5sYcDwA%|;q+B2Jo(
z2syV9174E-Cv?c<$5sAYh$Tp)7`Pi49;c;PI>chBcw#Fp(bqxi=s(DE2}ukd8^jGi
z&v;?gHW2%DRU2DrD(YMqr>uoPj1Ijn_?_xtRdq2%E|pCW;&=8=RCR5z9=AHpzzcE)
zsF>>j%Bd`J$uW5$3B_FX9kUjUVy+$5jD?Us{o?8GPqfITE>8hKKV2Rqmr7g_kz87H
zR;5$vDmHTIVhNQ_Kfo|?K_?gb!$ZS@=;T#@xCYB5V7?Q30i5KYal(m>we&Ukpo90X
z0DDQ#S>1z<vQ)EC9hTBhYdL&mBa;v%Uw{va;9hT*M5rE_hy}-d74*hCRCxY7y~D#V
z4tq12F2!%w-M7hcp#&69<-+@-=;#aQ1w4&u;mf$4s3!{VXAUK{XO?k$sg-;JYo(b4
zIghinUCUlcGGaqWHy8g#ec~#@Ot*v!Gue&2$TX!9wVn`RrtMny@dPG)<OL1PgN;;t
z7{K%l;xD@Eff7-*?{G=7i2M*9jMt1)k`7BNh6o`+2Cn%Y0V_k0;bGb8$d~mhHp6qf
zqgq20N_3H@N!xd!IF43%K3ci2>B&E?L=n5zS3O}O>prnDaIH^086H`|bq_nWUaQP6
zVL?Sf{s<jkFQLV(#Mk3%!r<#Jd@vMWZL#1u@U@T(Rs~;B{ygGq<_SsH+zR3F^>J;3
zuu<Ww%i{`F5vKBjFCW%QeCeT%?OMGR1>wul2|-=PY<v)*_>xaLI@W@(nNI*;)hY$y
z>uy{|fUm2MOFG<NJ`BFDgq2cA_*yzhp+pyXnzUC@!*v|^qNC*<UxRQ{DY;_A#sFU(
zj}?S3HFUL8I|;5SzeFVX+B}$2Dz1*vAy#2C)e%B0mHRC<CN~eF2x99JyeVLHCti^s
z*}p0^X7eH{K4x6~s>`h-o^jRVsHErX<-*~rOAUjdQQ_+NV+u7r;%Xk&N?er}a%=6<
zf^g*v)$-KteaJHo?cPVTB!zD;9e*yCPkKAs#1*%P3ADXy5OB2=F&SNMk=dkOQbEU+
z$Hqy&9+q?{Rn{I?AL18l%VCxXT=h5Q7Ui&bmRpZLs!-xq{VZusQMs_0>$r2rdzD#L
zaYKoz&&8$yQ%4UKgeiT8k*|$1qn+A^;FS7Tut8M#`hKcp;v(Xo9-|~BU9AhmF1wJ*
zBJ3J!qH-va+Sq>kGv&4ZL<V?hSa=x_8Ndp*i%J1k=BnrQRn+nv^_-`MFF{+0mA=uU
zh+_?b(O+<Qc*jAeAs%zprJ<0+ixuRvIWuW3Yzm?4hs(Ke))qNoa^%CNp6?!XytD(B
zO3<~$IirNYzKe^;ES489zCyUrd%)Ze1SDnamE|+aN?dRz_dZ;byb+n)Za3?j18f|z
zCnXSju$0Y)?Kmfh#bNZ1V26keE2!q)Y13fHO3zK=0CVHoYn4&Dm3t7lAbr4H7+v1z
z{6i)<UK2Y5aQwBuAaKw&%Z#A2TIP>PKhAzgcVu70;p~__5>3TW!g>T32*q=2iV{Qy
z_tEaI_!2m{6j8?c^OpqwG0TXRu+P(H#YXd##8%aejgEdw;+6-^ea47Y+BLU91peA^
z1ig)0h=a;zd!9D$djteB^0|#=SYJV#aK3^wXpe+lwF-)mq;2ugC$mDtgn+|4r6nAA
zpyg&<5*$1ZwEQGjCDpd#PNZ6edKqS5OscW4AOt3&Csm{VQIqP>111<oi+zFLAV&)i
z1_^_mss6Ltvp+Br%5zBP8gim#kF3f_psdjph=Ha5BKdIx1A@~(JUcX7;QIg<ALzIk
z8Y?!N4qQ;Qtsk1onwwp#va8LeyEw<$_?bA=%kK}`Y+B?N7xES_;ua<@i~$+jv`O=!
zI+0PP5xbx%GK0bt1a(UeBwp!=lg3H&^Ywqe<XSmh0>v&^@$w=bBNQu2{{~*DX-0?9
zCSEAfj(Q;q$UIyk2r{4IG6Kj9-J?QgW>F`|yp2*9lZyAiC@3_@<n;@pVkF${d)z`Q
zS6#)v05Wgv_5hh%7h<ck)5|Ak_o|9}kz*LCaoX(adHDcOO$N`tQ>c2tuuSfpwG|ia
zd`9;$kIv`wyLi`G`~_m0U1B4*>!O`3Z#X-j_g!Xec(@c07czTXUvsyS0^(ItcgJ!A
zFZlU~v?u0Z-$aIvV~YgxIY-z)?4&VfZ4mxMvKu}0`WFZnx?0r>FH2C|UQB|5Gjt~4
zl0e}x7TV`ul{|08*pLTFlL~pdz^oB?h>|=<`$R>aud!SLp_8!}O->*e?d5;mfdP&5
za<!XK9h`gNcz(WP{1cTAh}k6Y^t4e&bQ6N|aYiko_$T^+?LoDJ#YW+LoK$?AkM<{I
zm!nO8(ZuX}#+v-NwzvfsIe_~X;{XsMLFY-Gqq!Q#@JDvhgjOpOd>vMx)q?Ce=Fw#~
ze5bTC=H@+#oP?0}qB9PQ@5pw>K%YV1aCU}TEUw0HM4nt6MnEgw`R$9v)pcf`+@`w~
z*y;VezM>?#keL||h7nN&!N?klh1mS8%AL}tKWB=jz9b_9ZPWX};{QEHLXa(+@<k%D
zY&LFZ2#WAr#fwkIym}`(z^x<^Pda}fwgf6@_!M!C>d&wmL^#`1jjFocN{H^UyU6?(
zz2Mqutx;-_d%JG(yR&Tgivvita3cxGIbvrZna*u<2SzA!N3~htu(m-UCwrgr6GZ&y
zfR4<|;W@K{vCifNspLglh))SG+8YE(^0F<#TM~qq0ECRav-B={l5eSphITiSe}G!h
z(gZsX236ixm9#C-6Z+KrNAXR#0rs&=iLB9&v618$-<#-~8P6@9_eYQd*&R0!@(<fY
z-Tj{U!UW<*;2<Fb9cTMHM~jlecAQ1{)v@C&jmr=!;t&Fy{`?xlO&s2(<M8aAz~Qav
z>ZEwyUcn`aLXV!e?psu1WuFbfp`aB*uoG;RLWJPnUO^C?k7BRf2O@#s`?!&WU}Lc<
zfZ)@cJwWjKuN@(nXzd@Q=c`iD#eiIaKRAYL@YjDXLPmk=ZSX1XY55}Xg-_<kZ5l6z
z2|TS2;=m+Ok1FP*O)BC3JWVK5^X~Y0tXqyPNuT8Iv=J&V5?xJungY&)ZYh_a>|tOF
zn?TsAI1@%#p!$_ss-6Y12j?5vQS7sDthmKQPb>~mMU7Db0$`ZJWwpUVMqrCCu{h1t
zH6NlcKvRI)PVNPa{fjV*j<ITZLyJeBDZ&`_&-B0$4{l&m32WfmQ@|K`m}px}i6@3a
zeGFc!Z7FV0te`eQbQ5PonjZ-%_1x_y;S#k}R4wGD18Q<ilsl0pZ*~K2Ch_*8*r4==
zS_%A0`z&#zB}0mzyErcbM`aWwegk+|3!lb?_D?b8-KpJS<=y$ON#502g-?n7d`Zf?
z9e4}etI-m+?atxlj=p*6=xTs|wm_d~9Iyu438x2X4JK)9ikvS0TSZ^b6IL15CO*cO
z$@nW5Yfv*B{?s~Z2d(AXZYCKckZtK$ZrFB=zX{We#|+?J>V!O&Y)~<skp~sCnWGoP
zJMt2T1|tu!yWw`gZV=)y3hZX#G6L8=wN8cI>|+JO?jcT{V@6pxuq)Qpq*tQ09s{s)
zheQm!dAFIc>moJ?Vs6}DPOuA3H)7=70)9*MwPG%8{~}%9twVi=u5}Li$Fv8>eaTr7
zT%WPea^w6RfMZ*HT)<J}9_gRXkL!WOa(ebWbku{N0p;!oyy=@`*OFdBwWF(!SkxDW
zAvgYh8HS1o9!4|Y>&8?iaOl+SP{l7Bd7RdAGR43j=L3NUY85`wDH1eC3W>0}BK0Ua
z(9t>d2yfa{xKrF#sF760YdmWGU_zVTPNh3bu(m621KRSANwi&pSG9Hs^AOsoG|hvM
zEBPmHFR0Mzuss!6;V%LkNhthx8sjF?pASFCNJtx*_$tipX<bxZzqR%hMrh?u(KSd{
zxken}yAOAf((HuT352Fv{Sh8k!MR39wOnvgn&p9^)$CQyg@8%)&3g_4=1e3;)Pgmt
zfH{6JRK3GK@7X2{WS*P*DVmTA?L2&^^e{d|si>uNR#fu{6umN7uNqN#+Vvl@ph2W=
z4bB*cr3x^iT8EP}6qT|a+7Xh$$|h8vw&gL}8$Ibx)cdXOf|FkiJ)v|t!`{S>xTKTs
z(VJNAca>C02SQFiDAfjqIPA*c%=|p$#tBMATQT-AYU~U*hWpHXN#_T#T%P?ihxNUH
zNNKw|{MiqRL>&PG(|fOylrZbd-R0L^vn8xP*Lnu)a~C!n0>segYT_;IVj<~soqtnl
zn7J>UK6mqZO1de~o^qkj?doLe2NCFVQ?Wc)pIbKhKh@{b9;TD6d+o`Rf<8AS2{?Q2
zfW#S_QCo2tfj&3wS5=?ep%tjlO@whG2vs=xT+@ywy%M$Je@3Ox?M*bn_ng=ufN#k!
z?!d?T+%T}!LeS?v*kI~&Yo->c&ke@M?e)0{SRA!JxA<qN&t2FP!b`<dS)ZGCS{}Bv
zX!N=JZi!f*tAnKie}<Bk@Xr64K3Duf+JGQ^u1rUu?FKaI)1?~gb5Gzh0)4LKN~zC1
zwcDNwtj{%qB~nQG-1>xIeQp}YYkAk_#^Ekfrp1e$fIjzVPIy>_>T}iJ$L}gceJ=MW
zRlr=m%SNB8yiV2UHlWpPVd--t+AC^81o~VXtPj@b?tbgP)aT0FFAtqBQ|NP-b->AQ
ziZG6%&!yus0)6i06)LHQ{}Xchq0bFM;f$%z-2j8Bko38=H!FnluFt)J<?`&;6?*}Z
zGI)9Tvmd0-)qK~xK6fWHzhwXKXyM>Lu1Ab-`%{-Fbr3OrX$nV-{oByM0Ukj32<Y>s
zrpPhlYFNNqngYFnb1`l&K?+1{{T~sFYIUP8YcJcQvBl^*yRJ;kpqPn&GNr+%_@`2W
zkOz~s*D_V1cx{K3&>4S-Z$-wi*72x$?7qYchW+ZxT8B$dHms6mxvh=D_j0LW4t{pq
zrTnwg%i!-g{GD5m9GQXG2FmM!olnN(R@7gwuD|ZfI>35YY&yMn8h>m3eg+%;EYGq3
zZiuf!`C=XM#SHzD(=z%`fON~|ea_}h=c>d{;kl7LTteLP%DmEuGenYcd*RQx8Gnil
zk&^e4a%Xl#x*Bik2LW1Vd34Iwu0cde=LWx^Zb5q(m&6Sg;lle_xo#<+l@IuY#rEH3
zJuAI`7iZ;GdRjG@Q_|!-hw!R$R<6bSUe5|Y7moa|Z&J_7R@|7*#18@@)Ah4Zc5x6y
zDE%)|h%{cJLS)cZ8?n`IwfNoa?5=gl(7D4oyTdpkWSAkRm8oodkXds1D_9<Uc&ClC
zJG}d`XaB>)OZ`5@;njnGjH51h2Or)6_>^&Yd$h*keH1Yo{qUB>d)jBXq{AD73-5<_
z&|>xQPTOol#kWyW!~EAzJmVhNBf90}%j|#`jHZ(Vabayb8HSqj;GS(^P&4-$+yuw=
z`GtIJ)2ImOas6*!7S)y{XXK-lXJc`5Ph2;j8@)(9uM0QPFAd53`SMrsOZ(yh+R**x
zFRg+Xj9=Ow7uH|ulmVo0O`q~h&x0wU2*2h2SofKDa>sI-o5K8_ZFCw0=gr)#^IpuV
zo3PMHvKuis(W;D+zB+<?qXh*a1asGrkR?TOe%wEAQ3Mw5xf{WpsKZ<qaVZdH#&^0L
z!pNvtKmOi-2mW|Zd2u+sn1B~wQ(iodjo)7%FOF7T{D)qwf)}6BU-V@?h0~1ddlwMD
zs)^htp8QR+Rv$v;HX1kPdugluvVka0-ENgkqD4Ooiob+AYcIek5MF_Bxn53n{=)fu
zT{Gi6?|1Cuxdb~coz`Zq*P*5cXVT|SA(u`oHKuq>qb1ZLE^`)kB_ZQN&8Hu+M7d3I
zngVfpt)q+d29y#d{gIne<ZP0ErC!xME@o8z3yqd!{prh0!2=LpXtbo&AHFf|n4^Fz
z?a}7+BAN9QxztW!hA|VT?TY5$Mw>TDZbai`wJQ<HB5w3E1KBk=cQ24+y83S$U{+o!
z$n+pRW5ZraCSPVPykG#c6E55XQ;_S~CI-23^%|Vm!}IuL`?9*?hG01eJXIpeOdVfb
z|D?<KCsFLF+j{y*YMv8byd_8cq>mw~wOHjR-G>*9pOl3Q>rZlWAc`9J>5a{wl!zNR
zO{cLxgpXpiG!-s_)xNT<(GqZ~rds>?PAnxjo`_~|JGJ9u>C{>@dK8hBeQ$0(yp`Ch
zQTh&@pZE5>xpU@8N6_)VY&Lh#3bDC+XmdZ^ZJrm}ZDVs+MzXn`G5%Ska^}7)HrHF0
zL`mF1I)|vlkGgoqqgYJm-C>YT)8?G3Hu0uS)#h64t<W-7;!m#HqzC@nU7Du8%rEhV
zp<^6w3{=PdEm2Lh(K_LhP~909DSoa(@_>Yrb6N80CRl8TwKfE6wp<WwEfMTA8sADC
zV~k3$Ii3gx$;;1#e4E(NB-#)rn*GcR3nDl=+Z(GT!C;?)5g-pYUmNlgIrJROnob~y
zV}SO$<Um&hXssOh7Yr*mIPfA|2>)Te)~FlX4a9>X3O>SHFlQ1Pfm*%SNork2ipwNi
z618G+;ekx|e=9LH<qxY3GvP-;p%0<BXuV)DNq2rfykM5xj>eVelG{@a$ac}2cI39l
z9Xe)uq0c-lm-Hb(3Hq$b3`?I#(DW$2n?O@v{Ar+R;%!VDrc6h?0m@8o2+Ay4D=Bk|
zaFmBjqD(F>JWwX*Ye|{dHCD=;T_PxxMUVJ?he?@R@PbL10l4x^nLhOu%EaRiL>YJN
zPGYYbZXxB#EZ&clY-S;bw%S<R{gsnwfwr2kKrMvM)h1D4ZFY;~?A|3INN=AlgmoJ#
z2Jx>+DuU*@G(484H*&+^NKM?X_*O~&tVz0M?_W)fOHHK2(?|RuHlI!0n_-Iy1u{Yt
zm!AcOwd;C=VNtHK+gcsDd@7b3ChlR>w&|wBHf`aHgo#5mjzV?TB$4tr0-KhUagI}%
zL;A0k@*mZ+PLh%WiIp9d%j;?XDgJ)MnqfH^B_#qn<MC&b|8$a&B>wd<J?IN+w{@Yz
ztB(XGUEdH&ch?7)hOCibT1Y4TD_jzoNP2lUe20Ch(stG=8`^$=ViY!fThikm*eYr3
z%RH(IrOWC0!4rZpZ6S=|D~S|?+0jyyScf|{`ZIU<G3<=!a-GfQSnfl3$!)hbmR2+B
zns{r=TSoeuFKY;j*0d2{H~~%9;evx5J9C?2k*BBEQ=a|<T3C^%0zsBy1wL2B_)hIx
z=G|ISQFi0`GA=M%Kj49Bj-Mr&=9*t@G>&E83*xfnmj$!Mh2>X(-rew8gP>&jc@beJ
zt}y_Mf;O_lQ~du@8R(_Y6C}2ZWQNaC&czjX&n0E+4$h$LuyVqr*Rk4Ld<InqbsZtw
zIK`g}doZaew=K8RNLu2VhDK5k7seMnq)on+zTI-6la!-318ry31z4A@l3<++wxSjL
zO$X}?;xX=xsNZHvB3}BlRr2MdP=x{7ksh%Wwu;nAsCwL#eB@Q~EcqU&qe#A8IDX!_
za1Go-O1^J+KMX&O7(keInXdE;%;ye21dML@%n750>xPJ?GxcON_<b2nUEC0iUB6>)
z1Ki3Ja)-v7-!_)Nt<E=Ef&poXj^8QR_CwWDZf^cjsvaDLJ+sVLrc5C8qSQk{x6Ic=
zK{XKy%E;nS5RSY}rYHn$MSPZN{RufAs5-kgSSHsieR@CT`z%iljEG0cgq~AZ&f7VA
zXol(>obnUSnl%p}-y$$id<$-=(G_Nq2=Dbu7`2Eii+oux;%BF3r&bMa)ft!%Oc$Jv
zf1oy<I#GRwh7YvLZDG6gZ-0t=2;ncX+<tlnJpssvcL>`8n>&1!QgV?kmSRE_QS2&_
z&}#Weq;fl^*p0*8g^16h{iiCWJnflVm`AW~{5ZWFXgfY09O6Uth>~4aYvXlo2VNk7
z&P@s2^N1PKRoeZ#+=+I7%n`Jk1_EglU?d6J6;|)>8*3@FOVo;df#@1l4n{`Be`=a=
ze9G7=R8uUN=7K$ND~KmXQot5^*0%h(t4FIzaVOZdhiaoied*<#)Ih}5M%Ux(KuM;9
z3V=Mg%tn#w{jJz}R+6jzWysnHm1KSvl;j^U^;uTcdQg((@=;iBD#=uI^!`(X6L>gj
z_Qe0N+9)I!Ra6^wLtsG)Fp<0PXe|I`I2u1GxeE($Nigx4i#qxfl}-zn+R*7s6v8l%
zWjYN-5?ge}jNIrdS4Bmq4H(5N#poch7sy@s>SHGy1dAk2hkNi@vU{A$8wT7EuAbH?
zZkX_uDX%s^FO%Wy<yC)tmiKZZ$t$kc%#WLfMPNEYnWNfTE_T{`ceEg!`jJZ9?LXKM
z_s`iXao>PpP&mZB>B^{xd;W@05LO!|K)C&X3qnh6JJ&t^+QtN-sBJ$~9UwfuRDzIe
z+nfHVgHVqcxDE<j{6M8si^Vo{x*mlud~jIBII!BFQxsjhBoN19{Ig`3?X~-+LqOdR
z0p(t^l{e~0bEL(<q>W-{0LiQH^Xv=#Y#{jun5`2e!{U)U*_HT0$6__x>h_M$0iBqG
zG)wH|>i{8@wl~1*HUJ#ZJ6Zt7Pf-cmccBeoyUQ=b+_fSMqQU{-f~qD_Banj6VtKF>
zob<$h3Bb;+EC5Wq8~}WBaS#B%!6ivSj{qDoS*6q5?`-HaW2OqgfxjAbDg*$Qtzy!u
zy|(>h0PvxR0Jt2ZoFxG76gvX|PJYh=0K0+FIsnkbUqbJMJ9;E#9H^S$|Jx9RXR1;g
z$;5cd&v^bb9f*~5A(!YsmmjzKX%dbr_P50QcjJBVv$UI%0%;hr?%&Rz_+0-)Qg#Yn
zgW-m`>qr7T*3o}Ps|NKv38Fe<dDVLo`X=<dcbPW#qvA39fAH04Z?Pv)rV`T;(`@~}
zB5xtte;R{Xxr(<YWNWhjUMO)?Hh2{zNs2yU!-Mmo+q-;LMBM7t`NFW62TeC9iC{SD
zO-zn`6O!um-0L(Lg>u;4i)<9$2BWZ3O)+_}h;MFy?&WM0N)P(q7|pDXE2LF!jY4^k
z3Al%Vxrito*1jBw1E9}gpmPx9N{X(~@I!34ums=4pT-jhH>1tb9lRs)1}od`@JDj?
z&4NIh|2wh^f5Mz#UshGTrLDvZ1ocaC;emXwzpaw**SSvQTQFTE-*Y*IPQI!YqbJ{A
zmzj8f#MmG~Jor~XnG_!Q!G7IbttWUqrIfs}G$CVo9&$>q_Kjasa;0zX%8au`(g&op
zI-gc!SU;HR*OUj;194wmS1M+4nbL4l{|qOTwoU_ATo6+(hI)#`>@PDw>d#5}%Y2I~
z`qyWyE|QT~Hth)hLO`K-lJDcAPyXwh`%A{z25GSwXX~XG#V3&q&~(m|F{2i|jhn=z
zRX|V`{V%$bEMA_RzkGj1i~`JDl2#^RAw4Dax(m8zf>VJ!_Qc41OXd5Eza_pOlKu4+
zDpVN8ET4zL&~PE2!(@F)U8!0HljlP|A9NW$lgl^JNN9b|DE5bvan3wU@)<GU9Bd0F
z*>O({$1pTCe{xO-f;(piA~x7kYsBUe++KkAUi>MZ{#hdv!jy2s5GHpUAguj1R|@Gf
zpqN%}o{rr8xbT47zb8tsZ`?OPZAka0QlAJqajnm<%joOy+=ysuebRh{Q86~cTkUo$
zER~{I{(iV=F2j$o*M(ItE~Uz>1RI7CNutb5eP0GhCBlDdD}P5<Fy(LFM^^cJ#}HHg
zZo>Ok`P)?ggq{3-`wToe!Sc5`GXL%6FZpwmDS&sJ2c5?b5#NUT!EJZ~C7Nh{!;;<J
zN^YxvIbY^H-65raHa~7MmU50XCa$q4&$s~ZWn~EU)K(`D-cQc>JLa}P(?vK}c!b`k
z!((&bCnqpRV5eyy_Vd|JZo_vz6vu5Mo#;L=b_81p*D;pB>nzu$qc*>c$r&Q_^)AFM
z4DP@N?56%gd#CQD?86!yM!0tecX008(ZuLp+K#)ICXU39FQ6~HaAsvRFxiB#qt};%
z8b9-E$syQ|K7h*z)c85CtL*&n7sSp(672p#ZHcLsee#1rw}RAoryw6<G?FKG!XRtY
zj;@L^V!7>Djdz<HA#ZfCw8=YO7drzre*HKn-Vtm^=TeV2Kg0oXaZBe8Cw-agaZV-k
z{JJIY>)eCzdgCC%E43KZYjJ`Z5^8b$`UV7IU&1Qh+rfY>3xK`&1BVT|_C;^Je*bGK
z4C~CY^80G?VO%1=d67I<X_1!5OZbh#OlKq3;=ZIxE>FPnV2%2;cJF;ei;(0AsL~>P
z0x3tG98<#T;7yyJ9ihYPz;N-1(^~E8<wp22#qhf4F9pCle<=Z09SlIr#w7u)JT5$l
z;GSbuvW%T+CCljd#2NUM_+lbj=+fM}M~%64)Ov#E))B+-2iOeBPN@0Qixee;>Ek%M
z&g?P2qXafMo7=DdG)iyV*G`-H2EgVA*bv)vmxm<j^*Y^7_^j%PO#;>)3pim7aynFP
zu8zW54y2JNf^oa^Rmx8{O3p|PLMjSB(R~0*8^e1o?9AGqS@*i~yJD~3puOGz{hx*7
zuC&{Y;-T~rUJHk$W%x{Sf`M#|=pF}heJiXtL=Y6zLt1YXU?I{?+rU*|x!9KfVId7o
z8Wwu$3rqn15|>;{9A9tVkE(@CN*#wib9YH#<eJYVMylXd?O|LJMoRN)ujIQfo5{x+
zR)N_5pIXV+`)!qc)fN;A`SgsgT<yDJk&#cXe-`T{EXwh|2+{#yO|+C(!=OX24n@uh
z9R(^Q$z%3M5=00~2$KA`4G-zwRSK+CCl!utmrCTI$v~b!W@-NG<|3*7yyT4aQ0wZ?
z!6^rhXu5zeqf|shdBe{{VL2O!T|Ldl&$sj~K^3<5GGMB>u=m2GbxE0i9cEEW<sbro
z>^_>BEbKiULoAaIX+`cP-UEf0T8(lNP}sKgQ0$yXJ(_Llq1gR|w)AOMz0MM(1YHkl
zGq0x8tG5;qLCO-bfYjPgC8W4vPNf+-q&x;Gx4x{Bt;$pzvXy>QCEKbwg+;a}i$q1X
zCRi?Elq2>6(xnW3ki8WctjVEf0`Y9(n@F#)+JdG(Hp#!6Ou~d-2|arxy?EeGr~|3~
zoMiuM?#g`<GU?zOx=MEqRJw;AAeD~OJ<+zDZ%r#qGn2GBk<TAH8w**hdzrO5oV41=
zAlE&ptUmZB>v(b<?ke|^D%p}gwz9@H@|i5z^1dk))+i;@6wHB+MzZz8`d|(3u=d;?
zIB?m#5n;?KD0-fov%L#g3o$khe;Pj1RyF0O_%ffy8yG;WlcDXTA^_gqQzh_N-+LFA
z5$JmlW~!8W|3fRK#*Y`LgY~_hFjm6pdn{dC>U)x0M=s(!Wb;@NQ{VgkRp@(9c+vOd
z?LMU234O1b*d#$Y0CQM|6EH1m-%jlsuwKqNt3culV}{7yqW;&UJ_vfiea%mHulF4p
z&3i5shwDGE?)g=(i#=!4XUA;Gddc+BBMa%nQ?o;(3Sm@hDfF{2vDt_yxL5h)@=jPT
zWfZ2B=x~7VM!UBMX@2@f(BPD?UFngRdbML-DpJWeuwxPoD%9hUZ6bw&_+ns6h3Y0w
z*dQ`+G9<5LjMf-2Bt46U9S_5BDIE{qN!RQ}m2^+MZzWy-*HqGxy&-}^(UR_$b5W5l
z8_OjsYVei_dSz~w;Ap7bF)9o=jGiErdRnG8$zNtjY!{UQKp=yD9vM`(tLt!H@v-Iw
zb(2d^CS@R`e)nQP>@w?cKg^;pQ(*fwr}|x)cn?YZ(Ag5I4(E1Gx5URaSZ?5hefEjn
z3}A32?=GBMuH*$m{s*pmO1~7c<XJ`te~H7@Xn*v%O=J?$SRt51-OmFa6{iHjqbDvS
zheze-nR1h07T_>+d)J0?w~SFK7YoB9I?D00K)IQxBchyKJ_yStJYslD40ymAx_M-Q
zxPUc8r-1zWN5Bk$Dc%3DozyVhgNWHeP{s1&R^BP>B4}FdPoS*kvW8t$l>ZtTV`CVx
z?4cH39y~guM=(uv)ggA_+cv88m#?Zc8Ty$knz*ots$lkX6c*7240jf23ffNXaEh>q
zHef5*A{yvCe4!h}O+80(6KoK54ip<^6+0mY5$8J`(%z{oS-a73D&`CT^&?^?O4er4
z_aZWgRz0gyt@tDxs-1mBrP}wfJzOBk1=a395f#;HW4T1qT(K9>yUsu31R)!P=nLYS
zHdYG-$1=%DV95swjYsWx+4OJU*Q>yI)GU0QO`vyz<56|6l%tG*s*X$YT+mFE)K;g3
z<3h%vJVvJX4Ob}~|E3LvtH^J~Qf}k4!lLjC$D*Qedn`Ak9LJveCX3jUyVT9Fv8Ql7
z<Ly&7%U2{M>d7S{<n-bRAh9LlGm7T-3@)W;^T0nlJgw4c#2YqrdIB{pX2_`#Yz~3M
zf<jI%(75)9NvrnS)RC}?`b3mD_9|{9ae0;48Q`-2Q|@rdA*afz5|@NG)0?o&GM|BB
zNNT*PTc(M}4c*<wn<`^fA;p`XOJ_3bF8bIx<E@(ej12KGHo68PM^8UgQOBFgA12LR
z)b_YHx^(>^SNr3T0XN|rIISJ*NQC%5i8p=ilN({go9Z70x;7yoqoa%c4_8do(d9AT
zv}>43maAU3lI60|f-LNR`0GP}tB~VOLk{9QgvOg%KO1ShsTSQ%_#ftoO#;x)J?R8A
zi{l|zo5c)Oq&`5E9y+8D;!VTx4PXZ7-ip|-U$gG@v{7QO*(>+R2Sj<E#8nvarc#((
z9gSD+=l!&=6PG~C=9HvE+TIRiT>Z!KrtxKn-CN>KB@Y23b6=Mj;ds+&T#^KeEZ($W
z2$PR(%|Pt&fR%iEQD(yIlF2vY{X!w1v~4@@i;R47eQB(huy~*MMU6LYc_IutEb*qX
zorwK~#+ycUG2>17?8-*G=@kCZ<4p(nFN!x^OwL$`^?IDiquSO4UlimP=G5RZqz;5Q
zC+-_5$iQk_?a78Z5U%VkjB|pY1rv8A#xE=8lw0S7T<)jo%@RKx)0y5**im}f+r<L>
zc3~q+P}?*ozg>s{CE{9lm7?<*6j55c51@Gw9i$Wk>PK7>Xgo%gvInblTJfrl+BP3W
zAq*F$)3dNu3dew|xi^STZhLr0I_=zJ0wrDS1?0zfgPagzV?YhQMJj5+$vn%P>?ev2
zA)cfMlaNdgnKSuWGoI8SiBvX>c%_h%Q#?uUBoK(b4-0u;3oV|scpwu~NUk%lSXtkJ
zXH?er#oL9#`ea0*D~3u(Be{H7FOX+Mcj}?uOgLWukK#$Wv2s&F<4FVd0(PadCG1$A
z>w-&yokxAH(qk&6ZhP5EsSd*hrC6UU21_KIKF89-V?61DfAJkc<4FUDL>f=(PPY^K
z+-^jrh&M@>d1LMBN1b44QR{YU>%njfKc4i#BfRHAG1!!4-SgE?i9Ki2X55>S@kSj_
zYK95ZQN)u@?WC<0DxDfnDvM&U|0<re^b!*%!SSS<cL68=yex6T@uaI?(Q!icXx`&V
zy9cPGtC49XUBz^jbn9SG6jD6t*&R`lt~HiRRQ%3cVmQntI2vlj@$>AnAOMb_c+x_!
zLk=X>)dNYR`UeG)qKdbUdPv>m??&6GY_o=mU1n9_i3zS$fiNnYRC?pK2dM&XRW`YN
zJ!VE5_~1~^2^3OEVCdmj4l5nQ>H8msl_uc!BCPZZpL6~$lk#Ip73MEXVWq)%%S=+J
zuoGaJgLY1e3+@yyr3=nGSmyU<I!@A~OUFjp&~f)rm5$S4X1Gx4!qAbQ1=TxsYeaOE
z%gbT81j|RfB^IzW(KbF%AXt<jQ#Nr(sga5HPaItueY5GZE)Fnpn)XG*3OdezadgQv
zgU~sz2#A;JKjFbKdL6#6X$kdz!G;`Ek78=rRmVk34yht9#vtb?ETI>$T)-;TzenFi
z?VZLp#*m|jjjF3|aEvM$#*j@vJc}_zTD6vPAvV+)TC)`ZN_$ZPh|}xl;4%VZ==u9p
zQq3P}V+_rFTqV_D*c~pA<HCPk5ksjJQh7Co{@Q3F=Mk|NFotIJbHa*^F?3&ZX$+B-
zqj;?cIQRb`vkAsf0#_Ep7#cU&^k3I)&Bjn7tR(*?OVS?CF1Z&U=Y#(V;AVEM3dDZe
zN6Iyh<x=9f2guFs%XAb@>?6-XU9fFewG9;={CMsq@xd5U8Yn(OmC1$DuBt62I~y+T
z2uqmG{%xR;L#lhUDh>7G)XbCE78Yj(OoR-RMx@s_+D?Rg3wIaNYz*x`HEG$S{c?t-
z8M7FYW<!}YZF_A33J)RdB3o8b=W(sU^EwVI@rPXJMV{)T((lG+35Oxos8<gZr27m}
zPL1phGLIfJ$}1+|WykUg^q&k<m?{Gbqt@eF1pB+&Yh6%2#$Bu2SE|bU-AMNn2GVk4
zr;s*L0zbC^^$(JZcAU~C7JvbR7wfRfkNdF&$uW4Jkv+bSg%*_p{))dq+7xQ`dKg>b
z0X)y#L)E1@0Q0=zHXHxhqhjN^G2|e>!2rXJ1aR7I(PO^yI+OXKG33E?_d{qmV~De%
z`awkO(azSR&gz^*zY@nz766A8QP+eMQF;Nejffh6+Y5l)gFjh7?$KtPG(}V*pZm5g
z{{}ezK%_+1@woQ)vgdSYc&z>1e3zt7wWn>UQ|1wsI;&xD2=NpJb%tX~v=mJqsnZrq
z<&l@aA7ZI8euVvjw}_>k+Rxy2di|coM`!JKq31W8<Q6!^9@b@14J3Mh(liwfZpbee
zS_a5+Xx#v@ZR3F({#<_CP%KHH%4y^@<l*IxXb_ln5`O?0rFxTix>2SyobZOQDtAg6
z_DZ)}vdQwhj3Lf;DVnH@;1Y%tFO26puk<ue#Y_TavoV<2bkDgi;^tY;X;^E(jB^>r
z*M$&jgaRh;9ZF|N$jD>AxMQCTQU#q$X}za2Bpf50>7$87I>Uywz}cfuOPsOe;WJ!H
zTgn3$9CinOuo1tQHOxw<4<8g~W-`&KKg<=OJlta{52Nvxwbx~s1Z1hX7h9t!4(+sc
zeIRun^pfQ*Z}k1|CIY*PodE*h=;aQ9=#zL<yR|;`Nu;`qkaUItmV5>se<+@%#Kw&N
zIsj_|fb|-YG6;M4l;t*zz#jN4t74OZF!pdzDHK-Z$7y9)!X*d##1}#S8n?Hgj9J5m
zcCA;u-;;?d^ui58twddgf)M^1>;W7*+8}DQ#k5UUD+KkP+;HMI1%K!;td}@Y|KmEO
z`5-a!5bcP_DR<78O)YYPcxg~88?$$*%qh2NcJUf+FJxUN{sdttpMB9G6NBv52(<0H
z1{mBuEJP8EPg4{@nRxD+ZhLN5=~i=ym2Q>L<5oXD1jhBF47x=FMya?xy-J~7J1yxh
zfblLbV3aqy0XLH3@JD@%1Sv`BX{hCOcS2>TAMjgn-$t9E^rcq@(4Op8FDAQXevUTz
zPg2g8B~wcY!diaZHw_3pD{w^8%0iT-Q->+-j^AThMS^5AI-QO#rTar7?{wN8-`-@I
z&pvKrJrC|D$i)7`hA$ZENw|=&3+s8^FQ$qTK~3SCSRO3hzO5xP_5PDirvvsAtn_K%
z-3Zr#wk3W8ti~cZqlA2a$7KXwj!9irI{i7whE6N)Rq6CHjFrOiay0lkDmop)WMzq;
z(PA$k%yMpXf=#eG$(>H01;eCBqD7eL{bWMw2T7hKu{Jrkk1otE2Y@V#^Xv4uo;xaz
z!KauHWs{;vd^wgJ_0(IjR&f<z?AFKwv+|&mY)@4QS8bpT;mXM8Q+okzHEax_lt6jZ
zJsRoHhAJfCr!h!H;>jF^2}Xdi##k>f#-4;zTDPkN#{R((=T0o)+!)D4puUF)Hb}0$
z=(?RlUAK=QtHRceLE=hCu4f2K+-p`Vs+kabK!@0@p8>IYgGq8NN6SxNR%N`dJ%$$u
zWUO(xFJ5bsqS9*cqc*g<4{dGvtV|(V)rNT}Xw^s3%9ll1+>z1iN6cb2Tu<$_3AaON
z4U8xwb~x@NQCG&;9*C>1U7T>Ih${wNY;EEo073n0SsaNDzlQLMEvjN8rHJOs@R)D3
zth2h`7az9S?<1%u;r(Xe^Y!q;Vrq9{NVK$%BLGG<EDshwEw1=4g-`A-6B$DIO!x`N
zsPI^j@VOnA5eT0vl2tnO8el`Gq~0o>O2cR=9N{zj$EfHu6w4(HV#QuS_;gNk0)dV2
zxd@i$Abi5rYoN2}2^K<)?$Cu$ZCF&fSL-ef1ZK`z1=5a`K!bc#Wh7*bjEV7OPJlRK
z|1Cb8ANK{8a(Enxe;3%^f)P&<uhdp~slE<*V6-XJTf$?nqIS2cw0-8m5H($V6}GIJ
zrW@QYQp-vTrj1J!M(5tLlFQ%0asz%y_si4fR{@a%4VLy4gIza44De8hu+_ToZnO#K
zMWm?^*i>uT6wP55@@WYE<gC9ut^ZEbvbqZ|Lj{aq1{7uwkSHvRSG85RBotDgNe`OD
z#EvSZet*E8Qta4x6?RG?IyUP4pirxwcA*QzQhzVDl%(4~G?`g=)1MNX0yuu3=njr>
zY@F2|2csp;!J-QmP(MYTbQzBy-frUY{+c=-fu4PqJF<`C34Gd64hfC)(fqi&SeQTw
zp28d>ms%DL*9NCxXNO1k;{01=d&pDwg%B3D;yzTB@Rwls;^#w&x}3Wng?o|bYIR>k
zl$CvwT)P)}Z5-AbAcT7{Pis<%+>58!6Qoy5VJZs%7}mYmuwVdcRsh;nOXX?*Y&8KW
z+>0}R002uP0Hma9P&|kL=&{dsr4A|pZ|i5JQ-^MXPK?50uyzU&g;T!|Lg7<MLOFEZ
zG)$tf4{jt;c;o?nXMn<H2_8_Wf!`+Dq0m>up?*>K@5iGP?kIH5Cjb6{E5rKtPu?ar
z(=X)Uq5QZ7SOWjP>elacb2M)K_U)xx|M`2Z^n6l&1;$;|#|r~@?3&-WAR^rH+R0d}
z;_j68efj^`t^e323+_5D1n$Q73&P!6Tt<Mq_ik2kxBeb0oqkDCaW`gAq2jK={2<)r
zB?5PYBawc)=9#z~E_MdETh!Jy?v83hz+&y)`ewdI4;FLZp<)C`OD_ZY@frSx{75V1
z-+oVioe|YiW3(3d8T*eo`cIPme__@r-)N|QqcqZAld+UTZ7KeL?B!)U{ZYQmE*Kdr
zKbjv`9t)wpCcl^$Qw+N8=)JI!aZv;le|;I7aDhN<e%#k5QkY`~9>UNwJrKUzo02r7
zje1r-yNgVfkS3~=JB!0IoRqPCunKfj#h2L#FBpTg@5Y671g48dsu$-NCQB}j{o>}a
zd$gan)2Yl!ZN-g#SzCw2ya2q-gAVw5YxU>-dZ&#gwm4b*Jeqs2je6Ald9irG_<1+r
z!us=ExNWaeeqJtqp8MgsQvPiDmT(|4qdEM4?ou+Ise>L4N5R4W)FLM>N(J0XyWv(^
zL2f0-;6KgU7Ha!v9I4``f4<H%o+ps!FVJ@CJaDhS&>>0J8h)d#jJrqYUQ2M{fn?iS
zsU)k~n@ARt7jr2pP*}a|P0#u5VUuL#@q$6JR=BW|%qdMnY3)1k8}me-V3K)FRl5#%
zFi6I_-mRsHWXM{%AKyw^T45Evwh<|Mm*Dn-WTz#V^0c#SO@XrmFN0*q=740i5a!WI
z_6A<n9>66DoR|3{u0vP#TB;;_sFzjX+|@}?tOh;i3YapwqK8I?;AN&OkW9Q#S76va
zu@WQmzcPhR?svGS_R`IeJ2TJGWFxQb_6Oo-QsK+LQ|=JgsI+h4PWZqhxO0D+XI1m)
zZ1c%k;>i$szchFER5tiVmUS@roa(*wcsBemy8QYo(X{tozs!(dcK#QL$`|ly#^Q7w
zJR!f}eYuksE@N{q4lIICKJ|s-S!d@DlGrkab+RjzI=9PF&#n*N*Uk9giCFi6+lg0%
z6ku+@MSMSYh<*r5#)YFTOcuBdbKd%;!6Rrmx7%!fxx6Hn2Q!FkiWiU3$F_Q~2UCk2
z2ovh2SOPg@T-$?xB4!$rZnEMlu!(a$DPW+FWAPD!TlLZz1@#~DOtlM5r0srFcPp#7
zp@Se8M|Y0gFIh!e56LF1(H&<Nfq6nn$k~5;mdPIWx7HrJ3A~}Ju9Lb-!{eLu#7!hy
z(DWVA2}a~^tkuEKa+C1IAl-n1S2VY?IAxsswL`zW2)tEH(9ucxCg;mhz6rz&>6XEX
zWAMfJIhzt=)df7(wHF)vF_6m0jF*6XY##wKCppkhPd<U7<uMz%ZevEGs5^V0E9gqu
z$W%2MG$*N>{1Tjvd>MKLixQ@S=Ood+oaZUXGMO)D5~W|#TZJPVnO3=-lO*pk5ce>_
zNjV^X)PZrN^C%9y%s_FL-l;`E6vsH8%s{b$<#^m)NN<wCATB+<^(s^0BKIkzce^hD
z%l_R>SjOXZ?IXNE@|#MeQvCeeJ!oDJ-l)>>{Zt!U;B^#~Ft_J$UXQ*8`4G;NW3KdY
zUT{N7gkQs00&?Gv&roRBUR#b_{oEcL&T~6cLEdQw?j)hwR^O$RY~ByTYD4XrhE7;j
zqk7cbup!v;fB^#@Ov)}+2WkZ$SO2v6`Iy0u{NZda{!jM5yaX>mBgd4av8C`KU*ca#
z)y8b24{(*_OI%5mlx^TJi8tX=^2@A9&UuOM#P`kNd(fYJ(3ap2q@ItRPcLMg$4~|O
zSh9bM2ZBzxK{DDuy4VnNc^g$UzJ&BUAsV|16*_&mReg3MqhPc*erk%vn^OGya|a^1
zfJ`3uZ1&)E1AU@G`d@@qwc=c1wjm^~%fC^dbKfB;aEtR)1-xl9RbeDhBEUw4IQ%er
z(yEmv+$b!;o8I=f&w#k2sZtv(hBvgvx9NEE*pJ|ReFhqrlm}v)b+!t@>rht0IO9Ol
zei&9!R(kTYAPC1zQy}I(kR+FP$8w3Q-C{3N{QqM3*tPgg;iO;i6os?e-{7bz*-dan
zWbfSm=Wh$dQ#1~&3)6fMn)BG*@RNG#wr)$Zu^j(sr5^bSFn3&NgJCR2xi77zn#Ug6
zpj(4OE<g8)apXmUeO1(eiL^ONu+gIfnDT{o?~Y0S-4LE@>S#2Y-3@E-9G_x~tx*E<
z@CZaurla^S3s;zUxP$Sq6%uICbl_oU=O8@PMx=}6k4HT0zfL7wom;JVsMb=&!{*+F
zhKErfM}>z3ESGp#C-wqI8ronL{HAbtkQL8lC~Ciew;>P&5k<OMa(0Q^vg40*^vxR-
zU!LP*zyeo2yHCxTR9D^7KRa5t^rz-xOF8!P${n<&JVLE|!l)lueez|E1{hPbyJBMH
ztu@7DG<9stVlvlBVCo@Ccu?elzV0L1RxwJg9n9d?axao{ip3Mg<=T#oezq>(_hE?`
z(?6^Ch!F~zmWSFbu~7mb19-s5DSQ_mFfznq5n=ewPF+m&1Vo%CA~Yv2nL`9Sq)-eh
zfzgj&^st%uqiH;GuYf??w?6?!7Tqc_Lc=SyJX{h+Jm$saT%!`SNTQXXCsE5`%8Yq1
zPy|7de4rDQ8ihwd%sw9|#I%(eVToBC%Oys>;=K^$Gh1i0)wDVmxTvWmhBgl4DOY<J
z%u(6@<our!TOIdhEr)ZQZK_~XXju;=H>xTeWc)q(ACTgu&)36n+iv<;+n`Tnk0PZG
zAEtc%jO8UUh&F#mf%e(3CZ>JXzlaz6tQl4^{jB!cFI%Fu&t8e=<HP!KAoj-u93NZz
zY<6SuCD=ZD5(#yMY@hkwR~T-D_SvtK=|l;KI9o`$Cm=3KQ2r<O*}IEP0V(XW=^p_3
z%Wes=&n`g_DRTR4d2N-3dF^ae`lB}r^09sPO*eylg<_xecu%2SdoBLDi0!jdxRc}w
zll5H+vd=cwvaka|@KF2gHHZThwsc(m(^dkxLH5}iyx_q;n{hEk*k^Pnl2^$&FVj8f
z&qA}$j@Oj-S)-e6h#B8d5R+Y(dthrsZ=a2O$5d1!w9is;3qcIkJ{xe9$};{h?6dmc
zn{YGjvu2ZlxbE$xtY!ObJT4>9NAIb@Kx1Vl5IeDrRo41Zabn8aF0h0O$v!(WNr9NH
ztQD|PMXsrLeTC%`R~^M(==RymS2-a{m$RYvSqoJ77P5U-3|}G1=s9|GYa9ElQGNBu
zABQ<nIQH4wZ<)tF0{iT4EH?xW$3TapDD3|i`|Qg3CLT=ttj;^YLqgjiJdDO=1bFCj
zrAoN0R#rSbgUS#S53OMd6%rnHOpFQ-A7Z)0Lo=}#x_$Qa72)v^WS?D+GTg$o&(2(~
zZfWC|)-Ao}I<ci}pJ~WmD@6Ou{|0Sq#P(TdEDyDR1U|IBsO$Rw(mp%+t;Ieo@)j^s
z9l;<y2F~_bUtC6DpIuu`CFuRltpx3bvJev^RbUAfl6|&tyh2P{nV|{`i>*Eq%OyrC
z@Lt&1XGv8paB;TJ&VxBdZJ&|iRe*g)3YiQ0OpkBt#RY%W(akf;r(AVBMXwjIgyHh3
zvS2DcD$@C<iP-j@{J6KUh&v`A`{pvl&nUGrvlRYF?!P@H(3HSx_%F-D-uK|H-&L89
zkO<dZ)f7j_+R;aRBR0?0!kiEZvxU{nXUI4@OQJu0gw1^F;~s{zq{hrz=e^(wfd)O|
zk<>;l6bc$z>BfuME~(bC<Up}LRxhSqvB4?r)NB*}qS&C!1i=3)M45E>bFo1`ToU}r
zwdH|!b>o<JoQ@KReV}m&`~}sRl~;$oQ;2EKi^nPyYp1=2bpPCLk)&Iu<3^GXloFc)
zN9JvnEXWtcRT)5vAD)?`S`jdRhjdF*Ui6~jl|8!oQDJaOuR9(4ItFL@i^U_p-J~`H
zG(8D_UyZ-PG1KmX#g*0qh2Uw8Qvz+`^}2inoV44?8e<0y)ff~z0z9uU-%Z2h%Khcd
z_jetp_u=BxojD2p?p>zccRA8JKY#(L@T*=S^|5;YFX7vqqh}oH^D?vk^kp7K-e$rN
zI4#f+4sw2|hzfQNT!}}Wzu+59DH9&f(w58kED2vt<r_&)zOPnL7`;?2Axl2MbG2Wg
z=?i@e&G}^<HVbcs4hB@qV5LwHG*>2{4ID#9O2p6;P$rt*#+Cb4Mqle;y|SN+k)%ju
zk{Uvre+#k{i-v+atptZ9M%F<j?~>3reVHMn&g28NOJ<pvzn_SZ!E}r9Wm4xZ(203%
zc1x`q`c3IffbGV+aT$T0e02qtaQEI2!US-lyVp>4fQs!5y1_^5Vwj^d$>zVRkj%Xf
zASw0?mP_oH6?*~f-(KE=efVDz7U%>;hwwuNpWms)g2@|4zmHZ7XFtz3x~R9azr#l^
zk>%K`s~XtYhZU|8`!|Ws{aR@B;@qRKN1pqU_UGP+;)QG?WzWE;{Ip-$z_t{^YgzD_
z7i(K%t$N;}EvV-`Wk<O4-Uvq;SvR5cE$K-6f6Vx&M5|v$mJ>0#p?D4TB8$cSwW~if
zk9RNHXXAKN9<_43Z$f)1{djX*uIF$`$J=9DuEcUgn-!qV=z7+p{S-<~SiOCjNYfHM
zd<AKeC27)!gt8+gZ_7fJeMFU=FDq2BEw_^E%s3L2reWzcER{!E{(c<k7s}cnX?;&a
zj&x&`33v2gOvukkX+4YFLB7#7kWp&Mwp!*!e&~Wj&&CqDIV3-h;mHY$K`(SG(e+)?
z!8Iw+XK8UAL>^;B@0O7u+IXD}h*n)LC`J7PwXsn7g(1?}2o#M74HQ9e6ab*+FA=GP
zN!f+~)WS7@&Sk0gq&(kJUX($VnzLuo0YB86I21o=cumF6ztc?o+)0SZ(9^|ynF2rT
z0fcxsS1$-Zt?NgGpJJCv{B)>m#ZPloe3<w-)6$>{@FT)^z>nMTodCgynF>+7;b#Dr
zO8m&*k35fdr7if84n$iB9R~A5;afufPf4vgS;{y1N@%Ax5HjMaq59XINFhr0uTSzH
zPxfB`_v!aCX^S1#;Lq5qx;a-FF<4~EK_|?Qi`@e?Z&~h_u*r}EDV(?)s)*hF3*VAT
zCnhd|QkmeJxFVtE3LNeg8E1;5`;r19*Py*3TDfYcPm*oPgFCtHWCOZgAhu-*s-A1f
z<tS)7N!`~{{O1#2nKd2U0Xp7F`cH4EmXoyv7l2qyPS$eqS5i*LmGp{srX-yOYCzfi
z8k_~P^Csal<{G>cFTe9RTCoQzxiA}`EQmfQ&sdY?19TrHPm<V<WRjvuUiN4@6~f4C
zK57xIh_TJkQWt~6iuGNv9^j-mks^{z3V5D2U?T|qD(?f^#22tN2^o4pt7FOv^~<{%
z8x}16q_&6B6g`32rZ44W++K)9N}A+Ul{~G&$HXdlE0KSv&$f`n$sV~zYlN0kvU%&3
zSf&lZ3nU6jMF$~0P_TIkNx`&uLBY4MAwd*;AWodR_lbh>=+|yF{4(&C4HfBL;i<S{
zltRV!+M7uG&#et;jao!&z0mVH+zC)8dTKs>hd@-Ya6wPBZ%rz0fxS2ZMnDc?iw+Xi
zoS)kQdml5I^n4bs0Y8Sf35)r05B)><#NE>*-f*n6oHHVLPiR;AGTYS0l0BB-Uyu59
zJtaD(lFkIKhEJzFZq6yud5OsUw#kYP#Qs)M5ak<Y1t>^w4Qyjke%u3#Qm_$sLLJ+f
zQY6=xLDecaPGlt@otE~;gBceWi(NIs9bIdaBn~O9>ma|%n{+;kHKFstb?V4<mu)bg
zw`qe3hUJupsU+ci@;n_7S*(t*hubV#=9y5T`r`(Mg3odG7g72Js`5?<>xrW7=V+g(
zFW&<x{@kRcxkY`8Uym7;|3cQEX~@0FrQDl8Ms~6(L@<yZCMVj0bp9sSPBCRSIeZNI
zwC(gFygEto1oRyIW?vR}X?__m5YV|xv(DE&&@#EGO3RFzHne=If;d*}nrqp}2wfF4
z_|pX?H;pjESM9aWQ9F=(i>JJ3NzvDFBPrDD=vySxlCe{Qut<--`w}~}lQ=Wb4u=n=
zN0$fJH}IAp_xd)%HW#q;MCLQjMpg>tLuY`ojHax>RFdGX>wEqzLOk{-$inh9Y__~;
zd9mf(rg;Ud8WAlF*9h2YyF~)?6VI7jjYhy;5|^7|sesGks{C8w{LI%<(XTMH8UW*h
z2^mL4@^W&ZQxzv6SO;&G34PO-`4rB)kbeX5CqEEEQ1iV<xX|0`-Hta>{U?#E+4y;2
zqAB|Q=~m!HVmIlyBt#s+g$J7493yEmqPkU!9fFb+)?&E>K;s(>juvg_jlZ8!nVGFB
zD-O0)c35BG!ku@qR-XEXViQvRhoH?4%`YA{OEX~s5z>eSYPHtKLRjBST`A%A9|{L!
zYW9GV7+*ph@wPxy+E@@<FU9s)LbE%M&aWKFS>sBfRFLoq^8J-9t1_D3VT%Z*qTm<+
z-Ahp_2+(r36uB{IxT07f20*Lame5E>Qy2FzL<h$JI{l8VqejY9f9fg<;M}m>lFPi%
z3Xo?^fIN;rIZ5-Bc4U$%HhSY_V59PLz(!Rxt&;MhBwp8g;{`&8#};ihFESBn%fPkn
zzTAdurK0wOA;JZ2KA4t*et<}r_JvM-3xwmRL0*6JltMTU@?NeVhV>FBC5#P$)1>o-
z@Gy}8;TXV0;&-}^ketyj#+TU{>J_5rM!z9s6PN6qedQ6(qpCr66?M(q^NlD99Tbmd
z2Gr)zmvIGC$hElm_`HgstyP^6^efuHG6g;YTD1Vu*n|TF2$HGK<i)a@+dS4#BFKP_
zC8T<NIuSznLfT6Nx$uW5DBP0<pNJk$;IdkQCN1w*V|%%XZyWX~IRo=LA941Pq$U^H
zV#$g&S|T^p)1GY`OOC?bg_udlpUU^qrcN}WBm!!`J`E`ShLDObW;mW+4go0wl1Crb
z-)B`IRj6VWHL)cH6}d(x=URh`Fpne3ca|_1G)$qMdtbU--T=!bik9+T2s9uKzNwaf
z#sVcZj%M&oPIUenJafP}<Tpd$v~J-W{i8#MbNKj`h=1#$wzcTcN)3f-!dTk>7t9?x
zTzl*z;2-0&B62wtwvztQ{J3wijC*+@1il@wb1$#;_ye6N5bZhu<ap304xJ{#k&ahC
zE`-S9+3Zl>i<Y(mrS2SJA=-dY@5T*PXzNXj=!K<Hy`umKB#Jf^rD7IfndJtzCI=GZ
z9fMid(YqnhEbz&!pM9C+y%eycDm4yh(eoOOXYCH3Vg-a_T7kBG(g3?V&|yl49XX}6
zw{S_Y^VqZNzEcc4-PDbJr=mSYm~Zxi{UFTM`wVl{mqog`jmfIG`a>uKp2v@a06X>s
zQAb*=?X(|}=%1SvNih3U+)0A4jo2CJdoP}}pihugIx)@ARtg*$jZk@O+qNrQyf_GN
z{n7PR;w|Hk@OW#2&x$kuhJv>gER!iGG|QUYYh!7aH89iu*!9Mc4)4t@pFE*b`|}F+
z)MozrFuL1X+>fXyNkwr`$C|>MSi}{T%REk07Vbw|{u**ZD7IrBlHUd!I220V{^dYn
zpuq{0Tkxn3YGx@%b%e|TFtJIlN9Q~fV^qfut7d7RU-ITyi#S|01h`yWQR0&GOY(6^
zxb&D`vhtWpsY}Y+Q;PFT7S%K;70qA(iOIf$6l%588lD9tABY5!ak!C$<P5PX&@B%g
z4G+m6-SR{5+k*2;Z1XqPBHKhC*|8=+g&|4)!ZW{-zYyL%WTH6CZ<~ydv$psNL5|#A
zwMl1%ygE$St4aKylvR)B-l-$1f^%azP}G|3g{NF4AJ2ZJ9Z<_%n|nk;sjP-TO0gP7
zKBf>j!j!L8xQAe*x|X2zUW(8uwWvDDU_|L^xW?z51Brz>zpZpOhXMEqEA!iu1_O^b
zp!uH+0kCIqFkT=;vS-k}Z>srWm3~9Ykfj&Wr>Z}yNciON{I)vPv7oT$w=H=D8y`Bq
zEgQM}5zcQLLiZCLXNws-g#f^<hn!$)xCE8?ZD)xiOIYQ{jsJzqY1sUye&|%NIhMM=
z;(R~9pHO5()I8eTvk$6#abIboLP)OE*7Jf2Tt+hm=7{hTMmdk8<HM$>5XQFG7GcG)
z+|c$>Q;?^{ql}D0XMZqc{%4`HCvkhBTt19HMd)n(%Ot<hp%*TiZg?4<k*bdYFL`C8
z{NmK8b_g>OT0E9`mpq`-sY59nIyF0|(&==y!lKjUhoYiWUo4l1I41T2Vyne|Cp@U;
zT#ElF#l!Z2879J~5gLzL=GaeCH`cB>t4k_}c^tRoKzIcjTsQ-C=`GMp5~j$TQWtMQ
zXJm2pEF%tq5H{j6&X4kqKJ3j#>%5Po4%uk0lnAAqc(NaN)H+n9&v6yGnM?9ys2c^*
zE2qAR!b;oMpG1=IpTG;h%QiTzx-U&<?-y7r&9qVUGttrGm+xX@7R!r+nS9VO*NxA5
zAN{PidKhQ@veNRbv#!<yml5b{SLUi`{f^=eJI3<7JZuxejtYtsIqjLE8h`zOdDPo$
zl$x4*h~q?FnbV`Vkv#Ml5pE*G66R9}E&QQRSfFn*rBUJNYsbLRoYXZXA2>u`Bl}65
z|5IVlf0yO@`9Enr|2N{TLOlO_jq_j3ftvdHk1M3}e_s^mA8Re=f1Y~&U$i^_9Ur9g
zUn2PY<1&)-zbEMY7j@V%{rp!d)bo!~&~i!%&i{eP&p&QtoPTtHlIMT7$McU$)u_%t
zicQFQ;K2WTEY;8dG3)tniMI;z{O>Z(e-Q_2>gWHmLOTEVMsfbJ)^h$)`epL}=k3ma
zxBKb*7YjcBxQyid{~L7vV;pu&KmQd9_5Ak<I{&$mpMTuQIR8b>^Z$>>^N-5Z$j(3a
z(>}%hv}>WPgFrz2v>#oJ`~o<vPucg=hB9;*MIJ&WDGj|xemFnw1}v3cw5O#;^f>;2
z*BTm;N3V6uoe~jG<qeH_^-iTcLp+_m){T4yAfoYFZ@OE-gEz1B6f9NYKwjkUzY=hu
zi*3)@!6BtD;83(k5F8S5837#1?oi=y>&2lVCYsnUhC`8Zg$aj`?^5BAz6WqP5Rp1`
zH*O^1a45!rLld+pmpRU&t&!Mj=}it+hyI-t1`gr%A({JH39WMp3{=1P;P>Ra+UwtD
zAHCUqz(4}kFY+hz;~HX-gn>={;#O$P_^950&13yy{5B>u$H4-zea{0IR*#lS?TRp`
z3fY-8yEh>vy86XIxQC!4HjU~RU;Tm|%oFw_L|s_*i#7E6#rNF$mzxhKoa&>XMY#2w
zy8+U^7h^@-hi#TAxQqa#k8Dvv`q5c?p2q&=dtns`suhNRIj@&Ov3A<;{{W>C)i2J$
zjU-5u#iqbeAG<j`NQ3;#312%S)G<F@&qo!djKgwufKq(**LfW8N#uFZ!A+@nR?;o_
zEc*v%0-tQEUq`7D<bkP)KT`Yexh>FUw~oj&k@%R$Z&DGt>a;B)Vby=9i7&!TctlBq
zzy+rix$`ks>!}bqLLc+G9tJGg$DF4fr}`&KF(2Zb`p^rm<<^cF<}H$!?z$vtKEmqs
z?}g-A%WX0xe3_JCo{+H%^AeW(#!zLJ_yQlLnV6&`Blyg&>kUl)f*^_XG1tcHTB&n7
zD!H!2y^s0N4J!T0oN}VyIg~u`L7IlcqAi5MA^_S?rWE<IvhkKRT*p143yu-ob-O~p
zMD6<R0BIM9(oviW>^3<}W=LF$yGf)@F}4bMr~mxzgjK`I%!S5hwMpQ}AOfJ3ANSz5
z1T!u!VeGtsv^<pZ(8-JuNqQ!dM1>;H9HZh4519&y_;z@XkZ;eQAVns`<3Dt_>ceaj
zOAD3(*=gZ?%|>CB*b^hK$jt28*v-^rJoK-T8~zTK3yGGbXJ(PV`P=C_e0(ocsj%3X
z;QqShgil}3Q@vk&26+o593j=l0x4t5cUWazNE)m1?K%xLu_gS`zx4pN9zP}J9s8rd
z#3ji)kN)W4>r`5PeJn)YfhSJ;SCEp!-;clu5|~1Ikx0|?7XFsRDBD%RlKb$tT)z4?
z6Fd)yy?~gT{+APUg8k9RD>|#)J(D60r+j03Ld;NpgMO-!Rzm#^1-&>$k9^4~W4QZZ
zYT$`T6Z1;r%i$Rx=vk48M+G0FWYM71*2bvbU_Lg3_}B^98zRBvZY9Aaz>F*<9*GBu
z@yI;T(?yVa!1X1Pml+^3lJofKNHPhe(*DaNlMb>)d@HHRCxz=!DsPg6o5*<@EDU5N
zwAS|%%5z0k$!;)yPRx74SvvI$JMmfcBehM;qxSG3h%*9zs`)7T_t9b>HvuFR#xdOh
zpa#c-0Q5L6BLGl~KbfSQ?;D66de{n}hjyp{x)x@W2n|G<pC@yRS9Ku*is*nXfF$uL
zam1j%Tpqx3DSc{*y-07Z4fw+XAQd@=LBNT5)jmaHo}@qW^8gvbXJV@(zN|&iCIp1+
z0zU0y^m${Qx3i+i^Tuw%-#GVT-q?GyQew26zhmXRG3s9mhEz>iP)aN)4O(MD*^{(l
zBCR-#dMWYwT-0K~ROAP2#NYG)@%hg5`G?HUXB<GA40^`Y+o-fMF}@`Jo}sT#yA;pJ
zkE?+t`y1lrvEo+O@yjLgGDIAD07J-?PYR5{QcNtUMgj%R05g`Czh{|t9$EAI*BJX1
zTHi^UF^aj8^a?8|bQ^r5kJ1g?D@rqdr_)6Ns=Wt7{R?<;(^hd5DLGAh55|EI%i#(|
zoZ>AaM%5|V6v-Y#RF`6@I8Te~;u}$w$h9B9CnGO9KSt19!E8hnCJSvw6ee?K0}7Mx
zM~FvH`s2d>k8nv?_E^|Ia5XbpqWOL+jwhq}EHh>HX!S!%o*pJngGWpBtO!@wAEMNV
zmI)=32*aYqh-;4)cWU_URujMS_ainn?Kca4#U4v%4DGJ8_Hwm5z^9Y5Yp~fxju1Z1
z_h*r)Tal^{)MJyY47e+b%UJxnKaf@NZIX;)j@$Rqp$sYh>$q9$E7t*(IB)DL>Hx&P
zJ{#IF!Cm3s%udSYdZ+`C9C9GmOLdXcgEjwTI6(AjPM$lrlVw{0Nj=F$wo+#KQVr#R
zqqrP!Nm}UGIa}G5Vahh@NDs;&MOkb1u-qXGQKOd)6`#_0B*z6PQV{piLBSeg0GAO6
zxI2DTHN<h66&RTt1p!$@>;yxmkTk^oi9|oo36fGn{2a@rfNL-I0vcl0FW~_qp@EEr
zom$He2_5}P2E(Jn=t6?W*`HN-G}&tdk83ul@X$_6;-^Iok1@AI2ago24~EBM@BH`h
zsQjSZp#tF10AV2o9*^TP0(i7oslsFEZYw+<TCc+6T9_(@1dml1yc`8Q0$48LQA_Lv
z;4vV_13ap|&+sT843Dc{C?t4n`bmXHrCm1gD6vk3$Is}0E~IkQ@EF`aI(Rg|`e1l;
zdF#K2$Lagz4ix~8atISC@aT%m2;fn5g$j?`|FOcO(^?fCC1HpZ5<F(z92Gns$8rge
zOT=CP9=9&{0FQIvhmams86J7$+$IyE*a(7$G4_$QE00xSOP8tJdVU81BV^#?AyiAS
zy2@(RY;+|T`wC!$DX(-f^06ldPaAv(YL(ZSkCm`q1xB9M_zm(Eppuv>7TA*=E=k(P
zHBI^yyQOK2FZD{X)u+foAbbVXLX7pPJZ<;Aa%+5<G`!i=r!J=}eY9B;^r;I7$>@l<
zNaj;pwC^KCc&r;Q{-ZR}61Q8?&~lBS8rO}VgB>G0KcJfIgpuZ{tVoz?Q!oMA^!B*6
zG`McOKh{bpoEDn^D70DX4hp&CYC8<xXLSPv_Y2eeVJ7%jC;R_SLcKnl;_>Z_o|rX$
z*JDbRKsnB=JBQW*`EhSdhDqK6R@?d2ym@KOgWpfX`-W|fUMp?l$uxid2z4kT)0`%n
za1+dXa$uh?bF|O(dL|Lynt*3f0J<jPTUYP{C>_Hl0*&w0U;kdd%ui%|iz_FH7o2(8
z$_qReP|W#3<pqbg;80n^&G;5ue-lTGgIiM$%2S`#OELss){8jU5eG-wv{Lv&gu$&d
zEkk)j<#B>H90D5ucf4U7?k;$P64?6rE(t(irU-0(*ap7CquW9PTQ?#SM9jcrVC(E6
zm3B=wJJGJzDnUCAZ2i06pj}}Fwnnv3XqTvUUyi`m?GXpI8slz~Vf<ul6#`qO@O!0y
zIXF`!G_du<1P*KgQ~7b<zfIj#sELY30D|xByP#SLk6GNA3#FUq$BiU7Lt<BRP^7_v
zlbvKw?{noObBbLFH%~1Lu9gXzaFQK#m&eU+o;}SBM2HH7Vu9G0Mw_`pVgpX?|FF53
zrFR6^XMB4H9Y4K7LiCOp(+Z&2vY9}!9BJylEbbj~1=?Q`C_L7(?D~#D!M;7zeQh8b
zh19ZC_*u}1m7BGifrs$Qjgu==NgaQ>sRD!u%2`@qy*&EA8XE#>Sq{I-{^+ajbx|m>
z6ikmtc#+Iex_|}COPhRgIlTM?>K<f%ROXv{vfv?m>9hhL<@4?bxX8tHC_k<{mLNNd
z&REt-9dnn@Gb?N{7x@G^5#9R%%P%0zQJ=rt)?2ln4)QCo)>CY+WV+<zh*ax2N6k-X
zG>%B?;kARYR;a1m+3A#)?MJw<Q9cLPX%eNZtqt+*$yQ%WM^bHVGsg(AQCn*>06C=J
zg!lqC^^iN`%j}CUfIN!|&4IiJHb~8sYu-P`CE?Cv&HJEvD(<HJWu?<QE5uRZ1gJi{
z4cOgdst}`XRfu0@JW^moMy3?xUA$32UOR2|B8Z^D5J9N8Xe5D?WC)E#AyqHRjlRW=
zBm|Sh&H#Xcx$XeS&0E@o$2w%}I_5tG>4(Emcp>$}ODAytlcRok3qH#TwCaZ&21l(Q
zPMo9a7{CANq+={vCcXvhhcCdo5e8#n>4#S~R0tfQez@TV1C+viJC*ex>W4GBnt&Ag
zp}#Qz`Nv-&`eAuA42(cO-1v=3yNZ7}(XPmkf_AJQF579)uCVmO`x+>;OVk>nkSq%Q
z@G{&@B6OOuRnQM}zjnfCsD3yFyqHe_^uw92h3JQu&5lMtd=Os(RySnTN59+XhZC2G
zy=MKeH#ars+@^3r)B^OwBK0HI4}YR*&vFPP>u0-0|F`<#8!2@Bg7m|*h5*IFKSK1w
z^LQZw{c!ngRZivoW}_b-T`XwC`r$V)L0mb%T<C|ruUCK&fqqy8>*dj(X>16@)1fb&
zj=qh4_~A=Y>W6<VGWEkW<TbJ0XRjYl#YaQ+!vrh|(GTDMJR1G*$QM#SykV79@2D-m
z0_%snU{*x$6=NOGUzeyUbD&&%#dvKh)*8S&OzH6hU-*yp!^@N9&KUaP*?K@;ht)y)
zVFoTE&<|5)sknRT7b~5fMNtJG71j^i!2A)gD-8W`>op4U+G$^W6RCdq25uxFSXb-}
z066$_cK~Glux3V-`eBdnO#SfuE8+FStMOR}{cwK&sP)72nW~QQ<w_?VWAXy=Em%M7
zx6vSOVd;k#;uQi%s2|S9J%VEk>!14%^+SJ06Ockb999>A{OXqw{qP80h(JGFFhix?
z@t>S%w-Xf@e1cd%{P=H!c7>%Mw!d1TU7}V2L&l=e50BM05&Dv`RnQNA{?rMhq59zq
z;Kfoud>7eFLH@({r$wV5CgMxz`r+smHu_=uT(Q@zAGScwpNk~B0`$Y3m;)W5|8N?X
z8e)p|!{x*OTm3Mt10BC0{jhHxfZ{!bUxNIHoAE*f`r#+jR5`V6nT>w<2TCOv53C<f
zfC*Aq`eBQj3J@aD4^Pv~Wzl#j+t?6@r`1!Pj=qh4_~M8t^}}z`g;x3xA4IlO82vB{
zALYGN{D+OOBt$>F{^Mx$!!@5s{jl7RR=wjA`4w0{T#m-Og`^)odKFPKGX1at)*8TL
z{jf{=f2<##yIJmxp&$NR3&^XDaE~6@X8&P-Tt=WD*8f<=-GC)lI^F%1I4Z0kUJ3K3
z5cR`FS1QPBr;VKvseYJ-8%YS36gvX|rheoOfUF<JJQJmU*krb;AMSrXyna{|pLNg=
zC*2jbe%R|nRmXU3v6GH5@=Nh8SU+s{yFuK-(hs*_R<x0K5}|%LiRMk~-V)(IoICVC
z)DItRZ30s0hkdRBAYWe+q93lo3lZptZ%<Kaw|1cu?Utf^!PE~keluuSSo&ejY6|TV
zwWCv`(hvW{6lRIg0mfEAKb-Nt6GlVz!+zk!Qa{XoE<`_k{Jm)O!+3lNT|a!_dmH_*
z*XLrdSwF0@sxb7!rBx!<4`0GkLrk%L_z4Q8{>%7s&z5xjg7m{%uK*}UAR-i`AAXM)
zBG3;r-&5t(;srMP;a8st8nJ#j3?@in>4#M?+}dDb5$K11$C*ceps^tkPhY(2bo6cX
z!~36%Qa^lShN&O+93ECbd<Y*6)emc7Nr--UZc;S*;p}&$et2k}Rqy!cQ}Gp8Kl}vE
za|=m7Os+)Kj7&c)i?s&uSU;>c@ITfMH#L<zW9WygssMSHAl#$qhqvG|0{yVe+bZrl
z&9&0$rs;xCtRG(Z$$(v9_zx#vrXa7K_V|=Y^~0XHk(55#0)1xyzy_1t0g&~>9pJH%
z#FrD~sPKx@O#N_k8taF8e~sXbk^CL#1@-S#piOU#IfarrsCRR9S?cvvSF46tigOEG
z&l_p-ma1h8{#IxiKLnLEs|{~zuP`WEuyRPZan2r)#XdijSEvi+kXCu^$M78P5v&}(
z{qTRN9Hulf(I}L|<Vrwe+T0N3@Eg1kfpR!vqDr>8U)z!mQP5957N}(B;ecfZ?Fv>7
z-O;Y7PoZ6+_Sd^WW^YbDjD(7FFrHb$GuhZGD2HR-Z~|zka@YZk7|J1c)p&j=!1dzt
zU|6AHZTpze`dv77v5LxvpZZ7#=jFHw*3uhh5?6kmCb(rvpiT-tm5|XUKE{{XRl;&5
z{gHA0oaffGUrrzaNR___XNM}C;vw9ca5`G>x@t}9?2+W0@UCTvW@c+zmnn_Bwj$OF
zoGmU+d1Po!+wei!@(m$XlKzciGF(#Nme|m)8FFZ19lU87F5=6ifgvKJmpVNgPdeFv
zc0gAzML&Rx<pHbe2q?+XSEaE)>w^~vRu{dcy44vkRgwN*I#7Zw<mxb7gc@?Mk>=*h
z8pZijmbMP4cy*=-$0HYEtg@+#w9x{S0Hf)Ek?VX@zRgp(jf78WV@t^Y@p>5eBp|<!
z2q`<>#yr9Z%8$GCafVMY1Pbc++<Zz9euA-=U*|lMq9E4F;8Q}Xydv>)i4<#7l1QxO
zqz0ZwvyLNNW8-nu#J%HGb>*#Dp=N@3G+PMW7aL45>I~GAr4()$frT&!>s2XsN?Us$
zw{6>kG6in+ycMUL`?rO!vOy~7nGifCWb8%rryQ!XIpS1h^8)TJa7Tm2bt7AQ;d;q;
ze3|#s%YnAz%K~-Nzch`y3V2=HgBJ*ORI}_M!Su_qDz$QFI#6p0PO0|sLW5d;%*4e^
z243(SmoP25m4w4vN-7j<uT`1|;62X3^V)CvEQTpd@CA&Wf$Uln2m?L|yR)h2HT@DZ
zQk0ggd(2L74G7;~k%x=tL5h6yuBl&6z@3nLj;a}pAmsjS8Tc$)c=t;fZZpK#Sz-(B
z^MtiAl6hY9St#?wx2h(e#4`*w!wwM~wCn$4?#csfD&PN%F{J3)28kj|*^?Am$2!AM
zvSzIa*@YI7WoB@V$(F>IB1<Yt*|P7Xlr52c2xpKbvSgXx=XuZjp7Y*w&$;)WGiUVc
zpWHj=oO|!{JfHXTJkR?)&#SGVgWfi2j|#qe4$C61o}t(1vo*60ZZ;BH#|~~2niq)R
z!1@6H)CjHklRC8CV5q54=RrU#ZlMmXe0X0;!W#rFo9=y!BpF&CE-*q1dKrNZ4Q2rh
zUv&`_@qCFZ_&Sx@2VYAk0$)=c;cFV6B=A*E6A#F(-`D{7@+G%hO0n0lo-#b7MZMFF
za))eLjR5yhlge!vKmME+^=KWR&?+wcvM1hVFT_EwEc@JFm|!CE6E1s(iR~-M?9wx9
zjrCoicD{Fno>K%3{bjQXyF6~^nBaXYacwED$of2~Z-w=8bs-}0nT(+y2OQHR{F>jP
za>hna2-WBpdJ@e0R_+udv-HU;7R(Bq6vs}HPhPPz-XNUV)K%FrvaY($Gr`F#=&N8j
zVP3<dh)&$fE3OZhFyg>#7>4@=GV*8w0Ww;RwnBzUUNKuo^Sq+v`5rioN)4InPSN?M
zOc$2Vu&kHQgKL=-=L){j`Y9LG^no1n`yOU%V#wKyH%2ivs)KGKKbvC}7AJqizXkK=
zUtFr`(yf%ng*2SdB*vdOOld5@Gh}D*p0cggf0)vEuLwy=9p1}R8uKCm!P(dk@);C*
z0&frmZT3@snIP-#)NCu=?U}@N$5I-TG3}P)(5;ll)AuCZ)l<TgKv`-(rIVD#NjgoL
z7lpj7i3)Dv%6Kbq8dDm5vG}U4cv{rYuWFpN2M8k0+JXNgBtYKbiam$zd3*!4v1`yQ
zlLfE)L@wx2<an3<P9vv4#qw5V*l}LwZT*H*n5AN<5D&#+w@$;YvZDrOQkFF*Hn(J<
zda=n3EK?6p;*W;ZL&d{%aH)ZTn6E}3_HJP^H9E(ZrW-d`y|vH70D(_GCkSk>pMaLT
zaProU;thg;%}%@dBZNZ!{M5u-IDlRWwstaaVH#o#w>;<?x5?ziUWiQ(It(`p3;#%y
z1jd;i7;a_ZD*gESW>=Kat*scyFJBjfv|OzBn#w<$Kh*hWNe9k9bRIBWB_;1p#CX+X
zT9YHk;T9&!If90YitIztDnvt=pEeZP2S+2Tj}K!j8J{cp$V|SH5i9wO3e76M1Y@Bc
zi<o)AxTvdNot`GT`kP`<XoDhr*jzZqgwnID6YF9KTGv~oRz9H)EJSyOnphXlcmmJx
zWue+H*<+!=6VxmxewcUlUwuTZ_gYtfxw~1zlt-~86g_4eCYpE>*|_Sqc*6k;75hNe
zU5Ec!>8>%lD3}2;7RrHmM0HOGf`x?c)N$1gb@$Z`Nq6;>_Y=TE%~@jIi5A=AX#%5{
zKNX?^3so9wg;9xx*c6adthJ1V&cr0mR0EAeX;F3C>bt>vA~Au2&dc1&80iie$>st$
zZwM`blnFmIjTS(|xWV7Ya-p+y_$OZpQyADtq=E+Fq&B}a&@Kz4@bN0OabC3Mt6}X+
z3$4SSrNcARS<`Aw66cAtN|7c+N|uNAf|TkNw`8S~^j8Bd5h+m`oF@?Iz(eiBPURyZ
zs9sr9m68ho3p_kDLnzskc!&7A5#As?*c`Z!=Y2s*%|0=qq<UUiBUuo9a2P-%d;lag
z;tPS`37v1QNw#m3jo5k6+~3Ng2?1;rJJ`z7&DbbKxrJp89ph}r^#-v7oB#9-k;Aqo
z$z4GX>Y2HjnAAfKQ8vBD78n9C%>2&BCPKY5nhTZj{C8hyj#oM-u(!UdJ6|@k0_>|I
zsZap73nmaN0Gn!Ap%D=RaK9U(z)?hj&&iDqKj!DMAv16W$?0vBoIYWdoHjr}U5k{H
zd4$brXMPsC0RTKWP1HTG37U=ZlFZ-cjP<<lGHvmB7EPy^Xe)Y@tgVRo>D89V;e3fh
zrG=|(LtEnZ9=Khs`1i1#L^U8t3DkWjFbYIInoZEWj+IQUbnx}0Hcv3_pZfIAS3=K*
z=$|jsB5*z+0@pGgO@IEm``covnm^e@Pty`)J++%F*3E(G&OfK&7-h%a^CsLZGYHID
zIo5!T$juE^tNioaoFoQ1?bMUN^UtYQfu5b%2N8KSmVbWeV--I(^UuExl=1WMB+`pv
z9!z*V*9*%(Z$<#&X8yVJMTy*NE6<{%%whid5j;r%YLO-$K<e@UD@ehG@{MtBDRZz^
zvXu+H@1ABX(x|wB4aufMr7<fcEoxR7dRuIN#)V^Ii_L6iM|`6g^)wN1%qJ5}q@FZF
zMne;f$_*j&Y!%!~`)OA%_^euXH_?{jc8&%<H)U^KA{PYQbfPrd`9$8?dTL9PaFpW(
z+4_V?{qxQScHy6%kVD4@Kfz<iW#D7sBp-J93ojk8%kuuRo-TiAqNh`s$EwHt^AOay
zqo>9=`&nWayL!rw+XWbAaajNgr~6q!!8hT|b~`_5NPaS%DD7J$fC>Y@c;AO#_MxXj
z@XNj?f%#<)u4VF@D*}Bj-(2M>!kO<ZPn-GXfBG^7a%ZkWvQ^Hqu=o}%(my^^TjSiy
zHxEgbmN|#{<|=rG@b9Q+?4Hu9w!KLIh+o)a-10G<?P(tAXM2#O<dbi1_ZOg7aXeAe
zR_to=NWUN6a1iNN?IY`MK$4a2Uc<;&J<>0RkizAB^M<pM?&>KM2c|3E{1%=jkeNjj
z6(arE-d5O@B7GFN-cr)A<O+@&(1`H08OQ3dK#1j<QA@ze+-%kd{MbwQXIaLY_-D5U
z%PZaL=|cPI_s=?=Nr!*-1a6l7Gs*<ltU&@Y_-Fd&XPuaU6>ssI5#RgX>`CYcc8foi
zwd!WiIP$}^q;o`8VQ-#<IQ#*%H!aTr1!cww0-_Q(r59cj25b(rt=v<nWWX5MzkCEN
z9EWc2agDIj`%;LYR6M48wA-(L$_lYJx?>XV74}_{3jxu6&mLCRZ5G|9Dvw61I|qK`
zgAuaz!Yue_KT5b+k!UmYKoA4bdA_eSq!tR)o5p$1`EePmv3VR<vlV0?I7nQY7F7f{
z@<xg=)hYX&9GTJ$jTBX%{-GaqIuK@*;|P1BBF5q-NB*e(aZon=dolifK0p4kLVeSd
z_%HEWa^(N;Iw>p>rjUv^qP^P?YddivAAOUpjy&*fIm*N}iNX9^-8Fe|s#~tfVw}*N
z4%ehVZufN|$5#bo(3z(JJx7=4{WXdZaMAiPzB~iqzBNVwH#gv{EWt|xuFZLWL%PZ8
zS)Skz-meC6?XZ4~UI<t?ZM74*QcvPYn3*wO>RBGbyuTbN5@pp=j`ap*6+?2`_PoC>
zbg(kd*0tf1fgd@uYhW~sECuVwcnPbqlYR_s|0o_k7{Z^NTvysZ^7TtuKSpz0%d1EG
z6XM8GI+GUF9=8E(EDLrV@6(*g#NY7`q{HsmKAHQ|H)vU=H29H`2F>!%X6G19olHul
z;GNDfTL02FFsMN|$LK}u7B$3f5v%aVZjqHjMgIFZDU+9jhrAfG^mim~737#1#zcp4
z7kw^Z`nzGQS%O>CgO{SeX7Y{B;CoO8d<=da&wx{T_&V`<DQTHW3b=}8G1##>$u;Vo
z0@ku5`e3agUOK>9&Mq?68jUd5$mc}EkqCu2N4~06HF^?h1Kp&u(ONT7K>hhqnJa54
zZ}kAy5}jbJ4W1;hb^+T-EF)N1sn|I%)}mLEqy^$`DJfV@>AXdM<_uDws^KjUdME_m
z%G3^ww^RL0@pi42n|Le#y1-knVdmOpczbe!D|p*|*fHLw;8r8vzI*2W;;sK3GrS!=
z4!nIl!Ut~$@X`U^X1yxo?a)wjjWE1T`OtN|J#)wxZ<)F|#hdbnj<+FPGVoM4cCv@J
z{#Z}bhqpQCxe$2UTs<(}2H;u~ejA6|toZGwZGKzVQQ+;)`{vqZcv}JY-UWVp6(>+T
z<hK%Zcs1YNMJ);Ax9AGN@s`z>!4h})@LOizzgqB{_`L92W_a@+0p3b};Dfhrc<BId
zPj`^<)?=`_Mi|~opcc+$e%tiBFWx3~c8a%m@g%`-*|}tZw`T2w;f*A%^ILc&$9Ut%
zo>W55g}__8svK_?6Wk+kEeoP1!&zkBBu2QG#7&3{*|q>3JtWU^Z}dufdhP4A69NCo
zcMW=mxB2#){L8R_zlv8=c2mun8D#ea8&<n_6o)74J7gjInC#x!xYswHTVLis?zQjp
z4?$8*P4ZdpdHx{&8|;I}19<5GkF#Ep@p$NMe?9$*#sy|HEVi34n)E~`tqyb}wyU_0
zbj43@tfgc^16pclC;p~F$1C&LZU~nQV!MrP0^?C2k;QfcvDjL~b|IXn!0Q9&bk*^=
zuW}In!?h;-Hv_j>@t^(pZe44Q{{|YgE5~;$5U;qwe|wzsA8r-=C*rN>r-JjJUjdU3
z|JjT0!v8kozjgbG|K9PzTQ|IPfVZbx$$0BAz+59L{~>I0oBwvnOJgl%QhUezhbIZV
zW#^J1{%aW+Z^rm8ysTsX<9MsoS;t#X6ilf}62E|v<x?VXtqI<q#cl54t!WE^w{iW=
zwafg?h9h0U+hv@a?9ks_hFgtz+f)4h;%(wZGraZK3%o5K;Dfgtc<BId8(x<2_HSQv
zjWE0|8}2&ZUf$`8w<lh4inj;wB!RaXTr$90N;7+Sn~3!^eevDqPCDKqDg?&cWL#^4
zx1Hs!@n%21JNA;mTcJMY+GTh<04LoAejAQMp&jC_K5i9w6Y<t-MT6tb&G@e9c{9A_
z+y%Tf=;wpC(Rk?qZ!f(l<85p&bB!>()gS6Q-cI6>W!?3!rF`GgDc(N8lLX$%a>)R1
z{hJ2En{Rwqw1i{49l#M?YRkz>?N!JX!v^3dsMk^>H)?V+rbUV6N)*QBe3Hu{7HneI
z;3BS0i)x3PL3C`t03B8+b^6Pba0D@5Z2VmS&t`G)8%+f8=JfDWGn-cPF+N`Dh$=Yl
z#<bBRyAN!osQKnW#CBP|)s%hBq25AiY%8c-@S~sjQ`T-X;_#C4M`7SFxx`5{7z8Nz
zsuP=B-x7@_@JB<kg?18*(`8iTUs9%?B_vWH<w@YvW#;@07_RRnV8~|6gkiUfU}$r;
z%#Ow~3?tq&(@zuc8BfATq@vaDX)1bISJ4n#Dw1{)+T$!|9fg%`@6#2cv!Hn_x0s2e
ziT4_wQd#-CksbJ@DxYI{6?-)sn4PD=&TkSbLLW6T6dxL+6*#WwW-KS+SY!3`{V_5<
zgqk_42|bb!^KbFln6}$he{i$W$kUW-1&M47^@^w@xy5HE#U?ilWZex-th=YaPxdP9
zld0C7rmXx^x9*1c11d?j{s`+%c~e+7DM5Jx`%7fq=k00ad<?BTbVy*Su4b0)Nnkmp
zKi}5OI=wk1?<U!#?d6o3;9g<Q;`hUrOX5d}HA_3KZVH`NCyGd9DoIs-f6RhPCV&=3
zl!yyUJK=eJI0R`?Pr~D+h*rA))*N>QikJ4aH1*#K;Wqc;rN<fy8fo9fPnko*!aNDg
zf6G6Jl$q}R=M5W3k&gDC6LG8X-$Z_5cK+c0o16KG*HX;<x8Yj>tx;VCv{<djcD!`p
zzfGtwFLpn@ZmtpLza=6JaXCNn#CjQKwUoabJN4fVqNq)Pt{0aKK=)m}V4(Bee}3&z
z3(z4U8037%{o2Ti2`}sZn^KfR&Z2?~PDlq_&N)TP)6>;BzYaHxRDBqVxLBz=wTrQw
z{YCr(&I_9!bKTy8I=!w~P!GQ9r*Y=Wu?2N<e@)}*T~Mp6l{IZ|LA|p^10t`SFPN9P
zZ^fv`foiY11#sdTqxP!2<youcs9kIiglMSRM{^!|b7OVBqw!_@<9|NN*bdh)caHbA
zWZeW*UVdGm@)2B*`Ue%2*%>E>+{gECG(#mX{eHF+bVNtD(})PY$(!5E9H?GN?W_4A
z?T4>s#X6ouYGjQ|YFr>DscsRq=8d|$LmRz0X0`5))Kl8V0GE`fcQ!w@8XhLVxr+-3
z$@;wbm4O&hB<s;3d_~!gmDfC3Z_zI%_zz=j#eP}m$Aa)*Q&avci`!h}zuFrAb@0QU
z#D5qB><0g>bk2Xc)sO#j-e>+hWX69h*Af495=c||4=<hYUrmNIUW)otJ9CYw{D;uQ
zZT|Z~UKPQA^&Iox3LRp-xMYa`Y6OOuF+cwr7S{CT3nw?$@pijlV7zt4wI=z(Ex64+
zynRz$;O)XI=GtZc<`)P}T)^8)%N^q_3bz{ZRuL^?etj<p;ZoEP&h>i7E5DoJt?O#w
zEg$xdq*?E+@zMd_id2*F*0zngMi}0r5Ieezx9^ww;%#KKQ@nM@lLV#RX{SmCc#D3)
z9^S5CJxw3piea9u;J3;71LN%~H3g{i-hJweahrR1t65dxZAdF~?J~SQkJ2|6@OA=c
zGCPd#=HXT&-Zo~tzjzz5&kS!le*oU*xADQ-S-f<Bx5dxPcstj^Tq6u`^WJbBZ*{)&
z#ar&0PVsh=&UWTG!f{+Oz}ueZ?BQ($*3<OiZAm>HZ)NfZ##=nDHNo4pJZ|Fc*UAEK
z*_)edm*H(YA`utx*6&-#c&m(Cjd*L8<^JL=*KRYsty>DbJ%_y`iQnGAO9yzX{j7|)
z!OhGy!thoZk&w&$ws(;)-adc9Dc;85Ndj+$xMYC0*DBe=TQ2n3rVnp*qjkJ(&KVeQ
zd2p=>-p1iJ_xP>jGXifjUNqM(!&_^Vwz+_}u!WBCw)Ja&yrpEizj&Ly(+qEYzXRU3
zHS@t+)|VaOZTHhM-X3gft`UZ}tsPy*+v{KX;_aE|o#L$ko+R+LkV^)5yHdd(-ezMx
zO&{L&)zI<QE=ORz&B3)Mcq@+E+{4?&@&a#V<IJ_o@OBpQhzod|u)s0iTH;nCzx7A|
z*!_&(D(*1D+oRtAZ>^g8;B6{iI^efg%gK0~-pE`d3~wzF7P-uCf6w>D+onoR@wNa@
z5_qf5B?G*TC~FUI714;BKD>2)LC4$GY=QAs3D=t7Z4GX74{slo5qMh@W3F9>w|7w5
z<^tY|%yW#l({ugt7LNY0`-!)WTg~wH(^tUTnMOW%dlFk)4)Aull#I914b3&e@OHYj
z>v$VE#}{ueKkXE6Rq!N%x1C%vz*{c-yg*w3#{A?)tf%S2+tufEyd`A`jJM6W)&y@Y
zaGQI0d-y4Vx48P|+GTjl+R7EYEuQTdZ-a2Fk>4iXyuWyBw#f`{?|uQiz1`3UZ!7T9
z0p5m}l<~HzuDM1S-UhXB9dEg3`Qq(lS*LjW@iQH7?YU%tx7j7^;jJ0g)AZpjzLJi&
zViAGy)*RQG;O#;tH}Upqae=oZ(dOD^c$<oN#07q<{i$QTMdDT?-io3*?0)iF3ML^y
z3ZkZt&p#+R7kJBpy(%eR(hx5l;4Nn{8E=hhn`?yOEfQgo%l_o&Gko#(Rw<`=Yl9~V
ze!Eatl??D!@d<l)OTl`YKD_05TF2Y!w8jDJy-uN?-vn<7xXnF&D_>OLtyfKR?J~SQ
z`JyX$+l}*{9qtEA!L3HTeRt*l;;sK`GraAc1-yNXd3h4v4&bE&yv=%C#@nIl<{Dvm
zo6^K}ygf6`7jKzLI>npviH^4+Tr$Ah#>ed8tv}Y&^x<tzSsic9?*zu%09<Q=w?eqh
zJ-n?eEbw;c1#|5(ysbby;sU?DI@K}WO5j!_-lEYTc0c(o{0B3<4VekNmBi#H32)u-
z(gEI{E+pfvM^$r;FuavOSmZLlZNf>;dSyQPKc8@lw|D7GXukiJol6FIYxbx;yivcc
zBfP1dG0&A!!B!Ae%>K<VB@W+g%+7zsRUBqtWnzO5(xNutPcU~jXR-h*rNmHb4-Lkn
z$@^pX(qA#HbzpJrRM?#MO7<ESWQyh`dn2BQqN!w$3X9~DJyzhw2DLtinmXQD-%_8N
zHUc$!{!`PE=`j`uO`wNB4AI{=nbgg@>oKU6?h5rt7V(Vncm_u$D;2-4EIJ2FCYM|O
zBtIqg4+tiNQ(^<pYqdNj_I*5^PvWAnmHa!rvXpSD4n^SIGuNj9q$8>dkmkU}$|k%d
zklH-aH>rRO(yf)vK{}HPEr&PIwAN17+DKEaS&c@Mx;~0dk~D_;FVy^}#yOdXaB!lI
z&t6<8D6#x5e_(uy5=%A@p$l7lZ~6giGfhJXQva;t^J8=hP%(-HKI{I&@Y##{3Dhza
z4WCcpPuBQ6_iMWG`9(e%pDC4s;`3t!G%n+_(gaD{4)J+uyb+%{&{+n2<}3am;&bI<
zKYVuh82C)7?1RrIFmB2LKF{Zs@mb=T0QfvnN7I@c_#E+}q_JAc%V-a9hR-T^kl@dq
zTqwY2F8s1U`A;8wZlQ%3{anT_&`|J+=7d-;9SU<5Jq4m(Z1ol3(VJ#|o+eCcGxZ;+
zK>Y+n%0?g-24~`)w5V1;Gfc)Nw=W)!k;8G}H$CwWu@zfAz$x^GyX<*nXf}A-T)pFI
z`3*<G9tUL;vHT8t*Pk#0EWLv0=8uz98+y~CxV;;0=R65<Xs1>kQ$9Gw;5fPna$Pt<
zG2sHqB>pxV5Np|sha+w8`1!s1d~+J<Oic<meTREdhe9$Q&-rpB{z>1(drT>{NXH}f
z?8SJwQflm)1Xwo0SU=Ub&5J)ONq9-H%t}{=uD2GsWW9Y@-dt~tHe+jPdP@&1h49af
zm2_82S&iO+)Q)y&Q#@!k9wcB{nF$4EzZ3QT35U5#G2FaYPAibg!_8y8e<)92DP~PN
z4=|MAsOP7DG@t}WJv&3m8ILUOBIV4b%}{a}4!TPy8Sa%NZ677kxLrfZ4GAS}PB4_b
zR+yvY0GS>~iRF=-K@NTvC^0$s`CUAoqog<fi8<?pl8CQ#l(eERprPc;@jyv&%!g7@
zl16LK8+b`5`5P~6prri6vfjFv381%P)m_uukE7G2H!p4%xG2oUpaY^^j!*%S3w^;V
z!=yzuIgjj!A9q=v$MH|B_a6AF?xQ+i<-rr6I*d2)dk&k0o(I>eL&GWTOD=r7RDU{U
zZTRYGP+8bO58AiVqCVZiJk8j&?ch=;)5|+#X9}g@s@~YnMfK<o-*zHe7xb~^%~5_A
zLU(uwC*Ryk)Z4Kf9OAtSw`(xO88-KngGVxxg#LJ{ze?*VFn@9Iy7d7#Xr-%R*-D4w
z)#$uUzg9Y8DWWEhzjqk^*q4Z(k1xSH?P;ofd-@XIBWND4I1c!mi&1<k{-}LUIgOWu
zKLVo-{C)GFhQIn_yA=1;S4sh{FBX!|siNuY6|IYoVmm_@9S#``pfkZO^%HcR)mHN7
z0!MWRN7YXV^*kK$pgZw87$<O<463B;&lVJgF-a_QK<93ZrUjQ_2j@JWHA5l}CrBWi
zSoD+g^VpZ5SwQ%YN%R~VQhwbWkMX#cp`k0`kzEMCk!~W#=oGyfe;WfDY@YRT<N?ZX
z(l~&EC5^)w@MaG2scb^SE`%IBtM;Nk>*8&bSr@^NjvO$O7FCBQ89#@cIR@F-1js{q
z`40ks?{OUivw8pE0~&DDhxLF4{Ly4U!-qQ1;_*-IlM2iss#C|0^uQm0u@hr}vDz3A
zrDE(+!q^DBB#hZSs;E&`p^-7g4g3a$42j}q@D+?psfp-^E2NjE5Kkg0%S0*GW6b%X
z-^Yi`I%)j3Cl|}PDPN${|8GxD_9At->KleMGltCt_2Q`-|8Wr^-T8VJE6AD8aLA2D
zVdf96gsmfI23xmzjQZtBA=&c9OpeJYiVg&ZFm{akDU`!;zDdV1>dC|ObqnXC+Bru3
zW!x&;IGtzH2Q6iOXL3sC*~BI{ur_l1(ZtBjr;yxGD$UaUIX;Etb|Q?t9i}p=M$V>?
z%)m=Ba+^~~-pDLAR^}A-H}a{uxhB}|$}5N^utcTz^n^?wka0VjUa@_sZo%UB!-8i;
z*s<Ug<s~%W`YrBi!Xd~!1kM-nxpcnhoD!HXn&VnOz9>&ONqm8cs5bdxVJ3m0bB~+w
z#Zj~e=sw$LC~<R(FX|4F*Cv~14QIj40!!+UisQ-?Xcr627nZ|EN`Nm6vr)-0GxCM_
zBxQylU#u7jDCNWslEfFy@X`Ta6bu&{Y4eyrl;S*GA&f5`M)=_(Uwk`Q)=A^5;EOfz
z<dt`_MuuGlU(gHteDMmND){0r451<s0d3EO1w)%Wek4^niM3MZ3)s3%zSxk)79Ozf
zy+S5@aRsIMI$ta)=LTPNc+W9kJb_zf8z;V~i59IOd|_?m;0qHYH}VAuojxjJx*uQc
z9R?#W^SCf_#uvTt(g9yozDKH{gLVcy>Tl%TvU5!^z9@x4KNt97?>mn9Vk&MG7A$^0
zEV$p@$bboI-vZqfrO0pT;fuq)b3$A%J)}}b-ow?4i>Tr0!;QmIcVLDg{$3V;XGZ)c
zyejFM?6`(81s#%v$l|hXrWkogpl8D)CQPy9LH@xkvKUmFE~Y)n^q(U5$lLO&WRoe5
z4AN1Q6Q)g(#m$|;n8IVsC5tiI#1s!0|L8XgXPx+@bsy_Y(N&EsrVIgy&S0-ZWs3Y1
zSv*-t1rbd-v=O~u{#R(Ed;xzDJ%P>#W(o5tviK8@I#)<{DY6*rFtSJ-DC?x*;k8SN
zCteJX3_HAt{+T+$Zd6eTPZSv2&cy?aWyfy}v<VcwQkC`g7@F&us;t2ZDYFFZ+$Kv*
zza^x4FrNua{G64Gl|>cFPrAVp&ku0S5_jJ6wQpLw3;Yz6C9Dk`EMa2cMwTF<(?=bh
z?8g#I--CfiJ|Ya9u|xyBbifjxo1_Y2i6&A01|E%817_fiB_dER=K@PC?C+Q*`r}q%
z!Q%JBf*akiW5FrPzrUEVM4<c=um6cunV~R7AeP+Ef(gIigiU_Wh70-5Wose>D)RZ~
zx5mf_sObTjCg5aUqE$2Wjjwjsh0dnsF~Wy`D$U+fnitpf)kr%-?=9mQdI7SDe1h%m
zzLLU156Rgndh2no>|zl)9Nrd;9IQPo^DyEgtm-_!>%~9W`FZq7vnS}-VdXr@HHN<p
z?CgjV*eOcn@L)a_JG2{VLl66|2|eV>ZNLs!!!lCCP6R0&Htz^EcoL`MPexS)$~G}+
z_U|KUq8+S><yXn7C|RpKb2~FjI?)RxpV%K$huNSB_95BvAp&HxxS#;C(^rFmOb#Ye
zlqpyuMFz|dB?#dI#_uZLwuS3>Tel|w-lpL~Bi@p6g9xwEhqv>83q6<083=EuifD>X
z7v7S3Nm{mvw-<4*5pO*=-!HsXO!mWD{sF+-E4h8}HXSdCt8C(}^A(|o*$)T6TYJPy
zF5xX4XGm)auy(MfWQxzps%ZO7*>oc_3)<-gf_L^u`*@qtL&w|GTu{K<pv(5~R${yQ
zW#}8)!dn{*{srC+_b$TYpL>nj7EAs=b{rm3Jf|Arq(#+75nO6cDlAAYR|=PKymbV>
z`CeEE7pmSWgWUl&8nRDw11|}6f1f=N?0OZ}^qnr)<$Y69woTX_?CuY{YwO*H-8h}*
zM#wa`%B#b!JL9k#3;F_fCm$BDW79y3c~schoCbQ~FQJFh51GSm5z*XfL=+r$eRZ0f
z<b>wNc9S&GT(zacYZnmYUc8nW@elozM?rS>Rv5jN!@~u=ZR7$1+_K<T29AP!>1{a{
zRrM<XYy_cp(e0x<M0bqt)WMLShHriMUlmeW(n@4O-CD~S@9F5)(XFCeMz?4lGvWfB
z#T=JhV;`=Hj;|HwN$3a6q(vS6N%9zz3U}FF3SS(%ZdEsXM%{p1ZMZ5=T@X6#nvJm3
zUD}6MKa%AXiWa2H(w$Raa^Cs|DbfCJCA)bHZjMQ+(SIBLWHzOo`E<&xH71zKk(46_
z7t*ffj~_IJh$3P(DiNM_!^4KB4NKfGFlSPZr|9n+Bh*XE#KdpT6tnR>E{Rt!@gz(D
zY?5+>v&%BE%RJ+U5R{T?T-aJl^(d8>N9j20q~a{57jQQCA%Qch+E-TNCDB_1yCmeP
zy9wunjy7fW!x>q{Jc8K=1vHp-(_yA28EhVoU95`)Gkxpw!e!lYOMTm}_xNjP9bT_<
zxd2{Yq}qd5>kaysFk>^xe+0f}-q7&%&2~$C4Zu}0zT$DI<{>5(&b-ArzJ64M?y_VF
zfUk`RG~B}18?UDWUr*s?n0Bts27J}Vf!KaKJiwQ-DKNf%q`wEp*KBr~0bk?rri$*r
zNBZIGQg`61-~&GRYKNB&@KyY*(9!Fet?~63A|6g->BZOPS7m$&OH;=Er8{c&@zn<p
z74(&f%LVvqc*Y*SZeSr5`%OXcWyV&jr&#iu3cH8lBc?@FM`c@TKIS0ScmRJ<p+=KF
z*`EvJ&)h9cD*VX?=U8ihTIg|lrT|!Lna4e>-RhVQtgXS#z}md^2CV(I%p7Y&aA6=W
zYfpa<gf(&#lX6sMmkF#<1uDKo5xl8lZPYLwYwZ|O)kt~+So<ckz#6N1yM&j7HJep$
zKb#Uex*leVwb{gF-=ipvW398!Wt8=`of&D}LBg6ymOp`x1@0!kXP3va;DG{X)45E5
zv*Rc2;cPq>RLxB!KTmQK1Dv12^B<R0d{Msf%!WYu4_s)>e=NfdKKaI7P6M&?DMFd0
z()z)9eAUN!WG#g(-*^h~1y^Syq0T`L@{LLDBwgAJ#9qX`vd2hSz@Fcm0L`b&&2GLi
z0f6E8#`o|~E#Fvihz=?`pa^AdHD3p)Ucs)7x*3%V?<&*rl0cQ2T@tc`*!hIe!|c2L
z!p64oR|E49+asuPiNV6#N}A9;#GlUS&RObsyU1GjijJ(>TrfaZ;&FS(s=QqP5@rk*
zLVmI9Upl@%TpIviRdJyaUmb9R;B=PgaEz~{V?wVxZU@3we0EK->B3hToKmf2wQS<+
zQfq&FJ@oDU!q=X6{qS|RBk*<WZe|%@FWk%Q1YZ$Hg&v~+4S=sZ4{B;iuZK9hm81#1
z7^uf7-8r+5uQ)tZFxLq#7~reK5qtRBk5w~}hZqcBs*4zLPK8)5kwz@P3SQzpP!ZCI
zCsQ?8wNc$fwo~>HE|C(5KRe~EN{59$``-+Nt1_6n?gnq|YawaVCa$L9UL$WU`Pyw<
z4F}<H-Wr5|YP|K{ARSlK1p(fg)(*Iu{jb0k>u*1am&98(``f?%L+By(x;d_9lQ%dQ
zC2%fzgR#veO$cu=FIo_|H#o^IXWe{R$J9736kux4L3@}QftAwA8w}F_zz=6#az@2d
z5x9ismov&T??*yDF&@{dL0Dgc5H&ztOp98EyV$rFoCJCl@2iKg-hl>e<P#4bprQ=6
z3xf1Z?tlE$O8F{w*ypd2q*azud|9iC8RYn|&%Vt_vm&b)dPg8_(Zry5r2_63ID<Ei
z3X4lh;{{aAVcB-%<QP_?gD5^K=UW6cwGV<jh%dAO(5hf3NpcY1!%GJaVx8Y*tqi$p
zs+G?PYgG`@aIL(mX~mPMhF)>S3&bYXD_%s%8OL@7XlNFa-T$JjnOe%cBbmceN2@@q
zC^G9Y+R>A1>FCLIw348UwSL7N(o~8hJV_AP<6JNRTbKR8z(!IQE$gXD-i6k~UV_b&
z&<;bcdrHS!{F1<U%Y$qE@zw-)r5kVEf0H%)`4v;mGQ4%nq-oX-yrngfwQ3)4>v6vk
zZ-?jJH@wa2<A=B3S^#f9UiHCSHtZ@nz}w!vvR1NRHq{El+mB(IR@}nd8*#E`YAKb`
z%;gMkkK#!JZ{Kpk0B_g!*u&dgETdM#l>*`IDZp0Ecl>@_1sk8=LVwWHC21udT1@j>
zN-V@x{P4jB#B1@l?e8rYz;9E_-dbHG*U)!Ata@><7fWg6#K7N>TtllPmU#))4~<A!
zypsm5;a`J7Y6fF_VMB1QfRMTed0hEqw%e|u@!)XrNgaEV1h%n)Pd@r+Ml&F26n2TC
z$84{kkmgBbM~8348-$^|_B@FXcabKjn{wv`3r(=**indNIAe8?7;88_&7^NHcr#_m
zSQ$RmmFxQfpT(f7kd?za^^V8r*cqUS1fx}!|7wLE@SShj`W5BPd8AGh2|l$56f<Ir
zujL!|lg;w+G2VTkEOo@ngd-}7ior!Zv5=2gxftU;<d-F3UF#iiIjiv;4luE1dAc?&
zY94OoivMsWkcjqd^bgXWCXPq9;VLfrMP4M&o+BmqlTugE{2^4%v#-E5p=Y3qEylYU
zIhBEpc^mYgcl}$Sf3+S+CR~y_4fUd5w5?wsWX}(Y!0iHf<XS*D%HyA+_}P2L<#Wv^
zL#jM38-ssp=bEQ<6Grbz=!-ufE-Tp-;LC<BB-LFkf_IfhcuCO9gBNk$W$fpNUQU{Q
zCsPn>_fC#%kZPi!s)x9O7!+^c(i9Y|Pd$K=M0@$n`nr;8D}DC@hP3G%y1K-JTHrx~
z#*_;rptvL^3)Tx&@@FeNn)S}6D8FNcRYN2DCco+@+2pUCHIeAqgR;qgiMhM(nEZ?N
zbZI&;`JA}j*W@oLW&g)b-qi?+<}h%R?}dMIlfR@~?4p}|bNm4&U#T%nJ|FgSRFkKy
zxzZXh$>eR;!4yFcj~=(SJ!A0Luog8vz!k$r+2lm{k<L*zVli4)hX{vHN3`!qzm404
zbw9{u0VnOnAMKfS8m(VR6`CY}fJu`y(#aDa4td&J(hqHyrI`1$iG64NUAFHXm~-on
zefNz{mwi8t+qr$S!?f=yO+R7Q&4!EZg(TP%k~adR17gt~Xfk9(v@Zo3L-H1$&uzXN
z{>i_=o!4}We+h5H;-h0=@x?LCNwWAi@X~?Bm)|Dqs{1K_i*Jv{1jfG1;vYjO<dVg2
zu9Yr}Pr&WM;tOzDz_IGEHL%5t1OR4A-coYTQ2TWJ=Ke_aLuUR|toK*RqpG=2^{7g~
zqxyJ`@Tk0p;=EVPeX4A@fr)ga=2Nk&)1q4Ab~n7L)>~w?PE0X@U;?Jf>RwfYzuki1
zg&OIC;3C}aOEGE6j*srASC!OJ2ORgR4%P$UmSEdO@~W=krE{-p`6i|y?p0krZla)5
zySajxS2YjO179Ou@TwYC*OgRTc?9Qjr4DrDRo$Z#llkQ3NlZYpSG9X%U@%5g<ba>0
z_70{f!?D^r^{TqyvvltM^q7f6C+?C>zWF6hqUj7IZoQyO(}7pD3b*^3{3Yf0iTBg1
z`lOw1^4zOB9SxJ;fISh3W5QD$am-KaWzA$h>d!GZf8mN@9J31Xic1{RscO17rU-5q
z*1eR=0$$avb@t5q%PB5+RSEb|-M-fyHnH#TcFOiW06~sBUezO2(q-R=p4aS~`!qK{
z{I9*L`E7KI=U&y-ny~m^kNQ}==a?glKe$HLRqjLn7Eg8lx>xlhJa(5XzV~zKviS12
zU0D1kE(>^7S@DC!CXKd7!wT=!@K{^I$*Wp5$-=AhJYLUgew*x1UEHDjQ%$Fb;7_f@
z4d(uo2e-T7Pi0*#D>V9`3HY8BUxH<+t}AYVZ^pCf0$&H*?n^Fd%6m!o)1Ru=QimG%
zr^Z$T)H)v$P-9uDxp?W^pX$1jDTw=1vwk;G&}7u}Gb>^KR7->m?)g(UaP0E7_?Nyx
ze0ILJvIDJM4*jWb@F0P|I!r*aKQ;V^z%VrWQ%{eF=;QA?O3dcPXX$*?biav2qqoU?
z6M?cnclhSZin=r%_)|S_yRXS#QbvurpZ-+L%eu*Pe`;zKn0&tj!sJ<&>N~u25VH?n
zE^B7#K7V#ufMR-`U3&cK8oPLKpmjR3RJ$wa*4>560{+yO%j}u;E8|@7r=G=!>h|4d
zuZexXwpq6CVhC^CvF{z_(`DadaXYtf?oTa9{IC6~Rxj!n&;6-+m0|H8{N`iv>+#Zo
z#gADc>uS?(e~VwfQMUMZ5vaK2PZh?I(&@1HV`X)Ve~Zfk{?v-^16#b0Kh*>MyEJpm
zFH2?WPf@dJ6YqAUsA9eQW0E6hq0pa3;?jQK0~5)cI)}tm6LwOAZ(}O<`@Cz_?GnC8
z6xj(kvxTHLGF43yQ4@yW!zsn)PVScPm=38o7vJjMuY@<bRRC{ZSuf*l$?=fz7GTyf
z`z%y-y@Erd1v`UJLY;6dZZC-2jePPPdeH)8ft?hE8O}=3wmfb()}i&LngDM844_qP
zkAPNTysLD<OM({lnA+HXEw@-!OSfPAq1A33*UAFqYLvo=h+@z+`{wW;n7iXioJf10
zNihXzD3>>R7U?%By2!Q>^yk2}VM4$wOF_U%iFE6>;i?l=OG!Whf2!9`ftYw)4|*KW
zN!{D63JI92`>hq`*p|C*Ch9s?)R=hnj4DsU^JrVc7o@$KxTGSnNi9JHyW_%7c;ZV#
z+Hu}fG2SWc(?j1@UG@!=`zI<KF8vI-(}`Rp1D~1gN7r!a&nD@AhN?MU=FnCub4bMB
zOv)TQ%b7Z?cf#4H10Yp!%Cjiv3p<aqf9Pw3dN!AM$kit`m|#qLocAn7@~bL2resAs
z8I>3$?)4;`BLj{9mmN8B!nYM0gLhM?w{7FlnAqfcd98=JL;P%*J65cXF&k6Ss_kX1
z+L#%dgvaw;l;KP{XHoq?=P4|Sq3^&X3<LHgypF%B^V~C60GxjNMZhUH{-8wts=_If
zJ!76t?H*Xj3c9eSMg7DzOJefTY{Y7=*<{k}Ryh0IyJ#fJ8s0IKH9(*A6bpUEc~4Ra
zYcHG;tu1!L*U}APp_LP{+sn}r(;Pe-nMmT2*ssSnc_k(7>i`&1Neffm$$FZ2Q<U$p
zNV1)?LtqcJK*T0#jK}9eXR7ogcl>BV8f#X{r11gD+(K~DLLiOeB_ui8Cymp^H8g1T
z8sX^wa=A2wOq^RgE7_-2j0#b<)fWqedT;@D4Ie8DE5G=YuyW!FrNYmumD`*}aPv#1
z7Sc0J{F&|C#7Rv)CSK$RuA3=j;wRzpbKNu*CO*vH#OWy(CXOm#_L(sAL>$d5%p40A
z-4yDAB_2@+j}WH*JC_T1;!*rK^IVq+PyC97t>R&nzs0D<Bm*O2lOBsnVs4b?X{AMt
zAI*;SU}bVyKqdeKPI$aSPF2!#*j<k;b;L93FTC``+Z=A+djV4gPkel_m8;?hqbjDL
zu3yi7w8ki3?jXB$yyLZ^lB4YK__)P*2KTgBKf)R1*LU$ns~+<t?1s6<#@{LOPG`RX
z1X{0VOl;b(e#3cjPSu&L(N(t%#n$@?kVZ`SL4VRPY`yajA5n*-q!u^wpyrRK0HQCq
z3lOm|{v2L92x^whXPP5T0YraohUVm+O}v+~l<SObi_Agz;8IW%Q$*60{bG}BxLp8e
zCYJ?(b8?;)aD0=*2tF|Xjjx4%({ar9+j;#jO63Eb-&iRD#gWO83vm@Q^p622RyA1=
zf1nyt8q*%Bf$ui`8FK%;R9L;Fr^#%>4|?M7VWG0&JzttD>#)=&6MiZrz7P4y$}tqM
zaV0j@j))s<JR&Z@&_$^cVkC}})<RPoaSgLLETMI9zl^r)%BzDiht*HK?nyWXv_;3?
z3yii#$@MB(yJ#d)3$z(Xi=pkhlBbpsjVGZaJ|C3TzXZ@$ZHqt~bIgX|C83QRGaH(!
zKS$Qouni`fdh0uRiF_9E6lbeo8}0$T2_fs>aniD&tJ=zfFA>|k$~N3X4MxR-lJOvc
zy`o$)z+TtSL&IKb6Ymw}k@r+T#3Z3mJ1n|Y^Osb-bwxov;VmZ=6cfMYfp*ay(@N|g
z&QR6>e*?+}R$~a;fH$#FDq)DWD(;1My%M$_Ht>O%q?ewKNs6h$HJpN!GMR<?tj)@7
zv;>@AhvI+yXW1_r1D7V&dw?Agc^81){jK_~D4+2XK9{AyuG4udBeu}BQcJ)ITs{+*
zv*lnlku7B~YS0W4I&15hSLmm-!TFGX#D%AN;%V4tjObbmx$XYKEM{2TMJ>6G6YvTn
z0p}H8WHzk>zazY`70A)^ti2-k>40?Kf`&kjZVS~f5)Ubjhv@l53Y}v50}E>W%U02U
zNm6tk-s_n7tj1pxS^TwkAVRoQ3gPw)Z4jsoj!4|%G_IL=ICp6$;h#_yeT9EsAULWm
zg%rI}fhbmpenqiDJc4OZ(POs2N%tf^iT9N@c!So6g6yJ@Rax{iS(WY9SgA4=W%$gd
zz9v;ZgeZxtvWpax6K9*p<e{l@E{=WHWAb{+Kr{iQR>PVxe616uwxm;}d0dyeUdRg}
zUD;2qRt~tkT1ZFB51k=-Y#=QiI3JA=mlm~a2mv4Mo8S!U7G=@iZ|om`dOoDqY&czU
z;W$?E62xI+x&C;j===C>6)n|r2M$oZ{cElPHUr#?D26Z9fld~(Zo*-`8kO?vOLXWx
zIPBJ&`6R^P1i&*&K6I8<+<@u(o`fG^gv@j8;^&*{2MxvG@Vb(L$dp`H2-4qmrK$ED
zE;OhIGr{Tew>?irsk*N97z%3Ce~jY4HG)jpSnvvKRiM~BZ0P3ygm#`=X`-D{U&-1z
zkLt4YYiCN7W9_uXt-|!h?+0a8!jCYI^Gw{;GgtvBelT*(<_SiB)yP*xzSQ|@!FxgZ
z>aPWsd^PfAH~6ahba|0`dxg~^*F}7h*|f-&{83xvLQ6b`w#co=;mZ0Vhp5rX(l(e*
zXjaESa<cfO$A8XO^`AB6tEB}2!Z%ln^_TP20=(gXulj!?tMcn*R;rvfU*@ZJn6I7z
ze043Ctjc=I7PJzigRkb{aRRheHF<%rhD^1Btr=gH$2u(W)st^$5MQ17SgeS}mzu1I
z`RB<i;vewa)4w9d<KSlzQrP6H7~HBMhVez$ZvQP`<*Vq&SKj=9N=@tnsTHV<uZH0z
z0mUX?#Y_>}8TGx1cHTqPJ@e<7H&Jb~8`?Sjuw(6fg<FN`i{B5vivP&U@J;yYxt^i&
z6-}LE@zpi&ZFpAwKUeAM34~YS0~&|XSP=9xQgr1zpN^emQ43e|$yZb0?1_uhqPD%o
zB0}Z>(KsLG82-claozLb1?Cu{PX;?^*X=omLnpH}kA=g@kt-IHI^UK0Mfta3MR@%;
zYMKmG;l0Z`t`bpqew-;Su!c-an$1?xgV`m#orUwPT<@&n4Sw@UxMw(hGsu1z{KIDh
z`G=Hjfjs^FLqoC!`*QU8^UD$5=m<zH*-|_&@b>HX0&gtY;#s2N&1SOY;6zzTxxWQ5
z$*E1QqtD5)_yw^DXCt>pAM|)gW};fk3+O6Hea+5JvQT>wJV_vNDVGe8cxysvNW4YK
zmW5bZ$?-ShXS2n8N7aA47VD*C$Ei<MG}1hu_xp0^@r<7rN|hVQYURchj*NM@ldWk}
z8R(GrC@AtCFT^lx#wK?}>f<0gWIP*%2wru&HuOF0E904(`8H6QMFutF`#&ze1<PQk
zAlgukZeSB_0_3})5A&3^e@0~^9rK9^p=Yqg)=xv+t`XoRX`{O1t5`B9QG9_ULMaYJ
zZI+#_F-^j8=GA52O>BL`AwZJDQ0&3|)|{n<fIJCw;1d!Yb8`ZfRWKMTddxO7J$n)#
z$NS1Vc!PM5KpN*|+x7O<_soaHigdVBzpwpO*JTDbAeOf-iMWxguDPZ<<gIPa6yKFq
zQd>1;)<-~QCl#5YPrMk5hX_<Yrb>ouFfNISf||oCD9w|tP^vkKao$^$)X3I__Nfu7
zKfoYzHZ3ZDeU-jgAM8!WQ(2ilLe3@aA17nO^A-2aDaYosMS2$%>I|oBi*)@ejj__d
zMM~#E3(|79Mfx}%q2UC?aZjnyf$!-bfjn3@EC6C%#?d4k&VwaDXfxb`whRMD7;W8C
zE|nxRRMQ%KEOC4e03<(#IjL?Uqpdc0N$^mMH<qcn*^pINFiBQjiv{Fj7)DYwoW`}r
z{ETdfPq;rH$j=Bc?Le$c-)7cTW~1_1+};nj3uI(wB0=>y{dyE{R8Z=S4FM3?oV%sm
z#45<5rZc?yoaWRd%~ToYkF==E-Ssf)6!?X$m{jCJDEe>@{vpzS|A6D0c>l4H`J3St
ztJ0zQM&}Mo)h|=I<e9HNeuXCaK$$-)508)EsfuH#1yB%4rG-{5!=~@t%cKFM37vn)
zf~iN^{<p!@j$%4Wc=E5qgFwj@?6*k4)HB$Hau7`29xZ~Ys`E^Osj?r-3c9!~Bn1UY
z&3+Q@qo5GSBeB$M2iz(kA$~txxaaUALQejrV$Cy6{&^m+?RQ#>kN;L|itfK{e#4Ic
zcKRbOMt}cpa7~x|w~C|Wg|6oue<z@m_!i86E4oy30@CZhZA*(xkN-9rw`;s4`){AN
zg1!A`{@Y7Mgn%^v?T;(~-n-bl@$uhQ;Ei<qZ*L`tm2&ZHf7Q*M#0`k~Z(R|+xZuBe
z?n!E^rkoh-#DDt<r#1^nc2p%}=D$sjw}PnAe~ZO>>+8R@u9jZ^Eef9=!>dJ;BVU|l
z<-gTHNxkmBr7hN$yY%~SpWlhpXvv}f)(MZ$aKikzq0RqG|E)q{ote1*_Co{!()x43
zOw51#1TUTYZ*51&s{80u6aQ`Wc&;_(zr`YmaL0eUgu{}BC*i<<Ta4QUGU_pr$o|{N
z;UNHm`EONP2ln5pbjmpY?Z_~(Dixe*;=kqikV~HVZ@U(0l23>K_TDXB>Nfqinz)q%
zMEBobdFj9P-?A0dQNsPVNnt=q#m@vvSTg)Ayma8dRr^5rZ}0uj#DD8GPF7HPL=oxV
zsyK)fm<23sZdJ_0tpXC__rrgCd#Du_Op@WHTG;mAcCnF(*Q3365OZ`$QoXp6$ihXo
zZ;UafltV>71M&xExwz!WCvgX31v>SCRY$aAsf8P9QEQ%s8<^Z3^R-gr!moJZpQgNm
zmz~gy6b@zY;=Jc=#7n(~$gAD_X`HCOH6|K98q2>DYkzL_6`2@KnjRRu0GOU-og){)
z=hc76H{cV@z*lrvsHI2Z8Q<a=8eiRr^&Z7ca^<Hu*kBS94T_K`2xWC#*&tdpH?tz7
zD}F^tSF|D|znTeY7MEgGRk09S;Z+s0@lO`mT~zwzBQ&VGq9UYGOn17{YW#C2OPDei
z6Y-+QY{jHdPvR4}LfL^g2&hqXdFbiYNrPq0?wo3+*;Qk>X6I8veF%a)uGvna!qk(P
zL~(dh6HM-|UxX_Oy;;ejY4#DEWUXnoo^ozDyuK${C6W^z|BREO1)@7?qJpluj<Lu4
z$$M7l)?7cdnJ`O0(OV-d*D@s2qF!yo_>BbuCm8~<&G85O2l5lU@ec~;sNR?I6S3Z7
zHnei*U9o(=@UhAASq_!{4Ec=TF3opi`JDE*<K?p>Zq)#FmDORMQQnKy_-#8(GH6*n
z3Vn+O+IzArt2fjp@sBT^$B*9D-U4o3#y*Xdo|uf64(P4@J3>FxKQhrza-yuC7ZHA>
zpWbd=cC4RuxK)5b{C?2e)VHkwU_x(=npo1C$-EV<KItE|%6B-b<3)e)(=Ab9#14Y6
ztfy^)%b9(z0*bV2)1v-qL38Z4)JFnvOulSk0OUNj6GgFjesCXXw9kN2D77ylpl7qu
zC1DU-4b@z3<hn_GHKawrC!xMDo_|5cbA9FrTO*!Z$6HoMu+^|BR>KBZ4M*`#Rkom5
z^BntREH@8bqScVy#Cj#^8-&Vp;u+g7YOv&>I9;Dg@399#?UB*J8eJSCavYw{d5zA9
z<{$ovM@Y$&(1YGiu2K0q5V-;SGb$n<!@Ej2=2#IT>69`XE9OrFWX)xsXr;NoM{v!}
zAs$;gTZ3kM-Yw=yq^axv9b=?{;CgG0Bb<d3T2Co47$jDmk(d(&(s|GVx6`?lfZOA5
z*}&~2EW^x`IZsf+Y}tMSA8b$dHh!xODR;p~3*RI;^3(CO_8E39CZb%PVV6a{&1Y!>
zcDQRnf}@txTjzDj+K76^?W=IR#!)COKBF8&;}rwL9#|R!L%>~F)JBvD#!n{2=$?eP
z@OX{}ZQ|+~Wo&lgL3t8*+t-s<frjrV3N)~`ud8@TXs|hBcuhZ9PuG)y2Dvr@c5v<k
zt|pdrU5Gk5b<2x3&D9lb5H`(?t5<+CScIUP;RtDM<qCb%T-=@;w+ow}&t(CdKi@Yr
zn@8(Oit=d#mVT<2#Ml9aT&n3e?@gvpU07`9uuao{6W>5L{W;@IX#eA(vgx;(8ItK+
z?Mk2yLCEBu;$!-Mg;*FQZeNevHPc7ncU?JJi??8@rmvO--Sn+6K-u^$-X;&ze?E}u
zt7Snq{mX*<OrL%F_-tfsYFQ9B{j!%~`YS&4F@2if=fLzg_GWtG6)LyKn&|1``?8)E
zBRmKpF&98j?6a^$wL0raPvZ9cxLuh3*IX7b{mZ>VGksPFG^Z|wDJCW4A@)CUe{J_*
z-CrBiIH<oi6PKI$YfW%#5Pxmcn}Uy6sASV$`>iKaHuu*a_FA$29n{3LFUb70b<?!f
z&Mkkf-DzpnLtcWgXV+iLk7sb4a+j%c_5aXcQzC_wG=FXU1;8|Cl7K1m*P7v_1Anbx
z4_R|9Mq6pF&bwT5%wNllaE)Wt9e-^O4vE(NwL$&T<*&8E!vuiOVb_YhS^&7ho1p=i
zwJe{+(k%OHpLZ4baN@5m?arjkm>@awVxoz^mV#<^-CvvYu_oYj`fIT#b;&yL*B->}
z8fh_q?WwB&rN4G46QMypHII|lh*Y2<(-?sU=C9SoO9!Nty_>A31_}QD+6#lYnwY<K
z2ho5_{@QGu=$sCJ?RDHPZ2tOa&E~6h4bA46zjhJJ9EyaE{@NFv)8ntLd_xxL)sZHC
zz!{XD>!!b8a!95hzz+}(SJUG@rf=6@^Wb*P^qI$0?z#WcUpt-_Vf5EVoq_3RP4qGS
z7`${~`nkF=J#l}n@dy(=)kK}YuBWg`>D7~PxaJ>qq$hEEH{32v|8|0A`n5ZUX8O!u
zyNX54fxkvw5^83xWPjCDD+rgtulzf$#J*^1=Zn14k4vzJnovuNT19`LyxEmpVPV*2
zNKD25QHzW*uGj441YUYw?8SaKjFe}Xw>Lt3G&{_$?gU!6$oC)ENGtJu%UI$YU^E4<
zraO;wm9^4r?DqX>qnz$r+{^LBrY1ph%FoY`Q62ORfwY+~bz+k1W;KR4Vuz;${+QYL
zM^8ec9`+B$KebPqc!$uWR+qkpKOhZO=M>Pias+MYsE65k@V;^rZxEg`v5P`3EZz93
zNKK~=HNn!~eYhnsPk1T94Bi;gL24@>4ht}8D~DRFRzFJRA_8i0h}0sE#;vXtM_)qf
zI3&RAooJT>j}ge4t;q#LT>tD87+b!xiLWRhVd0ERDt0~(3xD+a4^5Ja7w4VRR~}}Y
z6F+)J+Dr(v!FPcofv%5-kf7xTA-w7B%>_LH!c-b3Ym1gvgD}{TWQ2IN8d7m8Etdkf
z`hO1U3s!ZiA$?wy7sl{`>Eb>*%~oy1=eMrHt@`g@R|Q{+rst_0t;)^{nZv|;%B5Rm
zp{lFXP_@umCt#uZhl%x`RR*-dOR`U!ZE{6B3j6HzzQ3E(ycd_+Xp&mgIL*z8)zVV^
z0;HwH&)0rCv&wY`Bp0V5d~&}w0CArzHg;<~ZWWd&en0H6eFr}~RQF7xSCVA;L<7+-
zoT_Ah7C$7Zeenm_lKU8KRnSq3QBkyoxV=eoUpfhk1;Bajp?zO_d_TTfoE({BFb|`M
zqI@<iZuj7F{hZ{w7QqR3AEf}8MD8!!WO|lJd<!{okw`hJwmA1$b0cdHSBF}LVxQ78
z^5Gdcf*kuvEdISHtg}4&$XHBt8p2__@qMk=>K3u#$NlEtp<U)I=5uWy`W(sGVq(x}
zJe?<Mhv1*`ycqTSDEqJLYd1fLbktEG<L>(c87v5>ImD4Cl(ikxC37#5BctE-M@Qv1
zxfWRva09Uh=TanX*RyRWO4?es%lK<gxH2<vwzEd3rb%0IuZ|EXPTbysj)vxQKdx|D
z463F)*VY0h27Dm!O;u8#&P>WrfDuB;z)AszmE7vK-s;vA3p^!IVlz#&=oMMAXWypf
z+OTbLs2dmUY<#>j1Cb0D?aM;6;%IVT@nX@|S}uf)baV&-Ro^Gcw+^)q)d;?FO9H;L
z7s!z27y07j;nEZyp!<&S5lz)9f$)KCy)u^B-1OmN*;PM$bUg%o9DUaZACJA~2p?x#
z%UUct$R8h3U1fY6KtSRWK8E0|=5*kr25uMl*u!N3eB{Fq5@x{pTi$usVnBwE9asc?
z@KHp@$NyTS2Or6;WXWzHV1kcTU1WR=LAc{4J__xY<m&(*M|K6m$3--B{lCD+l*@kj
zXm9}dSc{Q(>J~WTk9&COAd=qRLe^sVTmJa?=XDt$D@JNcOfP@5$4S%az(--+F7WXk
zmj&=~y}1p1e2GQS2Om$V__)R<U%uZgJ^1L2F9I)%GxHbyOz<)NH5ni65wy68k2^ah
z`8vSI8r*NqAHSlNOU1|EX=xeAA9%X(ukcU*@1qR*OTq^(L2O*|H{fIDTR!+WhL;ZT
z@kKLPiz$8l@v$Ez>nzd4{Hv)5a9rY#>OV{Rv+rNs{Yl5iL`@dYUr9?-_P%5TBZ*HU
z91p{7ewm^W&UeRfL^ix*X-75V^f(HYhg499cU<>-Ae-Egh)ek#hIvRlQPJ1+w5W!-
ziOCgnDe}R+WAh<$;2$V}O^XVDfR$CoCbuuHP7c6aw};@@@VaFi#VRXaWcuctA6I%A
z^v%DRTK_PVRLv@tu@tV=Txnx1Ybf;(J4n^G&sY|@<J`DiBmNtrA{ny@OQG#6F1g$@
z9eCc3TBF7607LJyH1VxT_%$kp+o)iA{ctutal=3aLw_O|+A_4Xm>u9(z3fTwYmL4^
z0}_8rC4gcSA8N>WYt4k+0O*}Ql)O@B2V}<=Q>yh<A({qn+P>!QHW8_nYCX-h#`?{!
zBh=tp!@K~lHNQExp5<0r<EfF)wo4kTru1t8kTkP9FQ6?RA}H*lDh;kY^7fxmO5-OP
zd_Asa+2xD#rqHZ_qgaqBhBX=Ip~gqXd2o6@IA6QK`QS3t7*Os)TMd|FC?iaX;7lSb
zF)TCBO1y)tggUV}jEpVxqVgJX40)U{-+7bxQceqV=rJW$M2#F*VpD4+2FM6YVbcDs
zk|wd~Xm{@I6x`3X!Xm8e%9271b!#xiRh>TxL9jVG0Vjk9%0OU3xaG{^VR$-c$blqr
zwqPoq&JgI-dYKMP5j9Hx0?>`_NuX0Fl2Wy#vJGz#FOm~!LtEn;$=ce{%|ctBQD7X8
z=!I*mz0j5?k&Mz<_#c|ais1}tZMCeeTtk(8Y9E#_wyV28HtPWFtw{zG2^Pm%Ay2FG
zM~e+j(dmjM7BWwZTJ#haSwHYe@wXZNz>n&d%;eT81Zkd5PkItlqu=u|PbWFDGp=PG
zN3ww7Lu$g~Py=#UtI#UoU}X537+Iy)-!Q5apPbr?e=<Yi*KccjO*aZ(+$8DL0SZ01
zUq<0QrQ9RrprXCvA}l*PJ}v0T@M@L`4#1Dbfk3GKTQ(E$bPhk-y<vL0r<^#cb6y{Y
ztQxC!0Dcd2C-_MnI1TXz!Ovy~PR@o*SKLi$)CGh`hxE|m>65~(g1UKTDy#$N-+`LK
zf_LE1vqTD)la!XL(#s5|Zjkg=Pw9?||I{~G@iN^XYBiH^WMc6&L5N4O4Wx<+cr4N&
z1U#nDgpl8{_^RMZi+Z92Jk8B)4ZaMhfUpu3f=qJc(z*gEPG$g4s!y)Q7CdM7>x2@+
z4B*YpxzJgsP2vFmU7UXYyExXHWbE{t3hVV<oX{H;*{#>sY0zNZV)vBX`Os!_n1SP<
z07tMHz%_%R!*mAldpbG@2K@6k9oIv))+#z?Yy&#Zb<xS`0lcr2dP7Br&0V&?>oRn3
zSEB4|CM((F7+K4J!B(=A{u(gSy^<xal@Q@zC99A71qS}m1Ojic5PnrC-XLqV-i6f<
zThk8sNS*H%(M;!~Q$_iF5VMF4AlUbkuy=rq?YS_wFgdbOZ5?HbC*GzO%hr*#*tL_1
z7GDvc%=!i%MI~McMY$myim-9Djy4A>9Esbtcno!D_mo+=IT<N1Yeq)F`UYwQ$D88M
zMqWIgv(X3mCmWS?PkH%>&PLQufD}ok&A{8>*F=hhRb{TmOX4D%T{H=`nVvWot?OXL
zMGKp9Gnqq9VIRahAy#FE#zZ+*={l>Y97j2SYCp$Bv|*(V!WLXgprL8CtRN`4g^Y$?
z#1hMh=bB15b!+HQKytz+6pG?=W3*giyY?nfXxfBJmq8(CZ%w*^N-{%$!tx(<u{wal
zJGfng0%lO%Q>LJ$i&4)Z2@2N92=oy+qn`J5DD-1cV4vRb4;>18H}<IwfI{Dn0tz{C
zv9cI12?{okAA7Hc4230at)MU)HSUak*lfMdh&(tH0?lR#4F*|O==wrUxf3sJ^K#wz
zTXVU<_-9lP)%agVAFVpuIxTAcqh?`|*?cNJA2F(tN<J3#Y3=aHxa1n`aaDADtuRkQ
zAHo>LVA1$XG|FozHF2EYjK6)^Gm_<#LQh02ttNx*VjH7}r+((gw1|EACixAN+8T&t
z(wgFG4)6neWl&NsB6b=14x$taa0|5zl({<C5Ptv<;BZ@)6W<V0QHrB$%)0b}%G5kY
zB9Y6~9x(pVlaQc8iyS=dlg{lYaH&N=ui_8jnZ0WOuH5YexLC@eIbJ$QITU(9)=R6_
zen~zuh0jSXxe-)xwY;jSMQp{#6-Re%z2ZfLoWreieUfIrT_S6ymNKk1_+^Zm@T(}i
zGEq=v2$!h*L9csw15XnE-7U<AVg>(|m6}zp&?Sn5RDEubp=o}p%0(=r^^g_v_wYnL
zgjv2J+WSki_ctx;P`IHAG@2ihqkzad)bpOiSvh`BX1Ry64fx7cT*Vxc^XlbkQ9JYF
z0r9tYdqz^FMVyx()f{pWV)ZIazub>_yCvbyP;l|4_;lvAm+4AsW_6+P+IcOD-{@Dl
z*utO~XhlSxyS^j+^0B=D-gTS(a^rEo0N&Facwaw&Wl%kRah-T?`(A>GnwRUVLS)%W
zK;#kzBdNQ&MewfjbQ={Sc^pad@8@JmmTh4o$;UBAmN5`3=}bYK!5Jvnkz!iminT1`
z7wZ&MTWMJXNZHHMb9ROL0v;rw@+%V%7bnW`Dl6IXD+Tg+B?Q-iK#VGM6*0GzHCP8}
z$1;%ygVv`>IhdadUo0Flwl@iMQe`Gzw&*5D?tIxqzU%99`7(EGL}yLDf!whGvK0&b
zli%vHwPB{>cI6w*OvO~GdrCwW5)5`{eu5-KQ*#D*WHdOM2Pec1JQ=WTmrfe7cpIcK
zXE_jY6k94PBIsyd<*}A3A}$lSLMQXHmBg-4iDm}EK;-&J9j>Ku^bv>Q=yTe@dLr0-
zpb48>QwHB28oo%T3)AvZ1Aagf{N8%FS6KZ)E(C=Ch4D)QnY&sa2Wp@!*lx7T`Yqr=
z_G_O)e?YYN_ZaWhST<ZL4AuT({>IH*lmftk)yL{#;O>Ymbgg`z9kz#lsjY1fQIM-$
zoffqkH)E6wh~a`g<&}O$D6jjA2IcYHAXf2P>{X2iQ1Q!o02RN!MR!4Uw)1O=0_>@5
zB5svefBbl55t_f)vQIB|@6^MNIMv8^E`gEXZ02L+RbO^w<e4f8In{1zE+@v9x8cik
zzJ#}G#g{?lH1{h>PSun_&trXm_#PYO>{lagY2=;o5Mkt3xe&0XS9v<Hkw>qT*Yp%c
z!P4btj+@nbU^D-R8TUOPzZgc<l5x|1OEC`_H|<sXL55W%T&<y5_5m_riY{n8Ky-Y~
zFi&C%IYRNb@2PH&jio8B0z{w%^`qj5MdW?}ZsK!kX{y~pTbf?emL^Z)82rs-d4kKM
zH3b+nAUUE(7RYN<ZDnC)DDv?%>btOejL1YhO4vWd!NQJ3=m4`0JIW*MK&i$?bv&Av
zRT=E?Bzy#HcsIqL%v$xQp$~Dc7>@VWiKmX<q5Drfl{I-Wfb?xsJ#}0FHz*hJ1_c?c
z3?y{ESW%8Cg*%~F8UarJ8!KEhvq>}a5zKJXXe((Z(Off_Y(`WZd|##37qW632J-*T
z*FpB7CLnOhp0ZXjLmn%7r7CRUHzh<+G*9qIi@Ny`1=#9|#d~oXOXTdpKY)dBPvUy~
zOT|JMbYKGuwef{?EDVk{dQ@EEEa-j~K~u;<cL*-kZ}VhPJ8-Hd;{itN{}vrvwymEm
z-Dv&2@SbM<-fd+4Y9Cm0CgB=W7Qy<zYb<Dl)q?(wmt^}k2f(Z@Ei2)lhUQfA5n26W
z#3x(_FKbpmXo$&LWKAd1Qq&BmRf~8$?7V5Mn_(UEjj40te!(X5xGcaX=So?bxrh*C
zOTVH_52Kjcw1lQ!nBP2gHD3j1iTGsS^E#g#Mm1U>KAD4Sxk0<hCwHEb)%RS3Kzwq!
zm8L(}_+;GYvJz`6?JGFulW083h^P+eFtd%QpnUTDW<Ne@@)Z!(DaODjpW+P%eA4Yn
zrWDR6v+D)rlU9gxGJsDm&61VlkWaqCLj=rfY61eEj3{XZG!s5~_HKm6CyOI9hEFb+
zkg-s@ZXiDSv$-a5xWyvuvq<8Gz$Xcx$)a|^Cw1`vqxE+}7nyDA56UN1H~8_%i(kO{
zU#suKC!gb`13u|lTvo!o=m323DgqxD`6T>PSq1j_WaCWT&|7j@fKR3uvodpYK56uC
zgi}6=uc-6M<V*qiq(pOE8y#Q0IMUy{$oZ!DBn%Z{YJ75C^~vsdMzXWiLU)B1K0#6C
zc97e-T7)0D%7uR%OCHWe{T<g`2Ms@-#Np;1Fk%zyQWw@fTW^M}yy{BcQs9v%SzKc0
ze3iTZ(@~U}#DcAmN6>2)mt5kBrzzDPsBu5S7@EjCBAJYT^dwA#GseRe8oj(ueBi+~
zI=%G9ADVbiBfoWH4ls5%+Q2Z?F<a09!$cNkDB}!Mt7cG!xrRs}0~jWGx~w*b3=@Ng
z2!=VL38;zp0@c41u|gjs8CB#WXxWAJQUoo{(;8_(%L$-?1uX~g4>f4{MFU6{Xvjv;
zlJq!}JWm5}t!@gECf+li1QxdpL2VqDdvKny!ieNF7n&vu96V!pv+(dI{s_5oDFQ}%
z&0TaD62BTcR;?s-_zVp=G7Hc+SyRvt8yZjy<E;o7HirgWC@ivKrK|Z-4_hr+*7r2x
z1+JF#W_`y_l{I27h1eAL3d<M2AC&VXeng<GZ}dvp?o*WmSm8`Vn67^3!TZE7c~Ijo
z@?mA}Gzt{IlsHv7L-?k8A;C9;tC~W|lfd}q*+!beT;ZGFrpQ{WuFQSH5#J=^F~VyT
z%m0@b7~gzf>VEM}_vL1MGkzvOm0ZohH#_l$1HPG3kga;0Z+1N&ly4FcDP#cO6#ht7
zn?t@iI9W$sFHJxy-z<E@3VJ4d(+OR=f^W{>%ox6T6kkYBlC^x!6eK#|WN)a+-4(uB
zI7t?`1HS2r2N)4D8qHz%jc*z)@#C9;(*cdQs`&8D61;T4H$(CZzFGBb0KOT7c)}&V
zDL7Hqh&{eJHbJ+1@%zCyEAm+xy$Er9`KBAXZ=LYXoZ>p)Z2l)O-weRDLE{_DAhQ|Y
z?2Hn8^I#=YC~>~oQdd)$D}2*sysWkA%CqQLa1h@V#$yDUup7(bo2uygLVQ!FSTMfH
zY7AlI_{L-~nK8Z*A9(XyGrlP`6`(4N?Ih7@#&%@d;|&LVQz9>0^*G;jsu+}S@*<AP
z0KQq2EUV2S-*m@AgqL^cSs@^mZ>s09f*#`=RpcVRxrB97@J;;Rq3{g=Vl%$^IyaL%
zk8e&_Fa?RuH#1NY$K~z{-&7wb3)}(U+)naGNd8kH@Qqz`7~>m(jzbIm_~zk{0F6w~
z`0!0#ymY`f*>efLX;eM{-$WvMa*1!g8Y^qW9^drAy~6Uv?+4#B%xPuxf^THIPgQPU
zg>%9;l^)airro8$d=r6dGk|Z}dj#K1FKY@V&NnSkFvk_<3g28CBWta?vL(t9-z>so
zjC`{Lja=ZHy@l=<-;DXvjBhqg0H_v~7t5(9k@3w1yy1XvmgQip9_O1&rGxU#T*M6-
zz&ABU%W8ATH#bM=s2isVNadTI*{z^w!Z*XPZVJA6^g_n)O*MQWJ-!)K$`m9z-;{kp
zle;T?volc^xC6dP!UK#5`2ww1LE{^@_@>8vKfd`Q8PJ$i#)ohA;iUt<nem|Dn?p|q
z;F~Fko?POa7ZPNR*yEdf@w(-U-w(bykj={I&G}~Z31@t>8O87-pLSIV%r}#9Z3gho
zg$D%RlrCusCC)d0KBp<n6~0LrDQm5|(gK|d4&s{{c#IKEub_JieDiv~`^7f}=9uwK
zhp_-vji(HJGX!rq;G6nc*{a9+W?1o{eDf^gs0`qnUxv$SbI3Pi@DSnU719Kx@=g0J
zR?suy8_!XlZ<d_S7{2*Al1ZM&H<86mL89}`no647UE!Pd!(@Rw;G0MB03$-4|NTDl
z&A*@d@lBo4fW{LgeE6m_UOM2LvY7?n^e7sDZ%QC~a*1z#8Y*kV9^ZV3dxhnT-w(d&
z7GY)d=6sX?kTbq%jv9NxH%TW0^Gz{an*n??B$MEqRgarOiSx}s4DIC#bA@m64w1E1
zUHS6?M||_k`~GN3MYk6C=5G{;2i_62>%m#&(-zDy<C`1t0M#!=4SbUw`&172W`7u4
z^*G;n9t+AhTM##70N->NEUV2S-xR<@1nL%Q0#f<rLRw~vty2@e`3&o(h;JGn&ltXG
zhcBeZH&Y6kf<)(=I1KOQa(9JqF1#xX+yUQwjRzR{W;?pK?pu5_cA6jG92pL1eE*ma
z-(1B@2Yj>UF3FqDX85;Y0KQp<=*cC%Y5R_>5qo^|815C8FMdDx=EfZ>qc`W9FZMa(
zn^O<!d{gX5V7^&`Ycqgvvg4<!^=XYBF@+N6o6H#U>n7jK9VBb5x-u}6BfjZ?#~9J{
z9@@mfHzTv%FTSbru^Hb?9tu!(c+|i*Gw_B3zIo$cw(4=d`7D1>zG;EDAp`j4uK}{!
z9P-TqJVc<bx+Wl%Z-(5mf}RQARM@Ta&8dSK!#C$|%6wBIpD9RmzBy7-le;T?GvqB<
z;12kv8XjOoNNe<e-8a4|IN6VH-W?2R)G6S@H}QDsfN$b%ki3a+#^w#cH}w%cxx_c;
z`pFux$2Z^LUSav-_k(XnU$-)PbG~{07iWAk0P|%<e6xCgV7{q~YcqgvX8c2npnB*-
zxlN(O`Q{@G;&qd6p6x4Zt-6vKtz{1KX@B+cM^iTRlYwt?N8B&I`Ek4%-#qgUK=oH%
z1K*UzK9vK$`RA(4Hx+UP<(oedH)H_c4DBte%^}}Z!9xV<c4`7r`6fGlsCj9q3E!;8
zx+&tD_xEND-@N~~%s0zCrXbPzrdJV7?ym4n_Fl5U9q`T1J^c}KcGG?2o3F<C@lF1L
zfX3cjK75lWw<Eqea+%~!d{gk@0DQ9_(UVJjGq{JW5qo@76ZZ<s7r!5TlOI3A-002u
zW;<3mC-njm_Zn$@(|lJzzA3lwF7OmTGz|SWT0iGjwY>oI0amN+1!<l{8jED3y`bzR
z8BSfZ6P&E(0~9H&Db2Nh&Nba-?bTMs{R7@{HXoor9wj_E2!YkAC9n7IX!9f_<Imnd
z;#0C#-RK#|I{&rny*hR9&i{w{102g9Ht@zZyy1X1*8U~)#*GJq^2S2={~5p=^}ESp
zcgP!Ic!)sHN1A}pM04PR6@D0LsKORSW@FJDD@dd1PMtIs!lYRf%^cOr(fL13vvQlA
z{|)ekVzen3!?Tf<%TvGP&i@w*Y7!657$J0O9(qF-H5en*0aa|v2t(MW;Q>awUWS%0
zW3t$jKy&gmyJkK8!mgK%BD?nK>95oWc760AvFNj&{>L!AimcjZPyg9-vUrO=;J5Iz
zWsCLnA3+$y?fGS`kI#8e|IjY7aP9YC*1-M3g!gb+fB^F02L&bo*@mwu+tA`_N&tcT
zVIok&FXD}MJA$nLH!^1ZPnA{IHA}$tKfk6s_tyW{Wd&mWM{AhV^&gKi;v^Z(U;a1=
zu>QyUt^fIY()y3>9-lhnYk0%S`hV7E{m&eH{YQL~5$peJve=!j|9FT1&qtboX#GE9
z1s}8Zf33d$x7wCL>p#AbwEjl~T>lXPaf!RL{&$i^?PUGO1B`aP3@upxb{$~-AMUsQ
z*X&N~f28mFk4aZf*8e|c@fOV#X#Gc6;pX~}Gn>;h)1?OP7bd)i%YxQ_{2*KF|0-we
zKWgg5`rmnTkoEtH|N8I3e6X^oWYu*I3%LFxLgc!0Z~bpCD-i2{s`K?9j}kb6qX>K)
zSCY|;WyHzAY}`j0>U2I>x1oM4ai<H=G#;BYJ}mJo-f+MYQ&VJ?*mJLMu=!x4Q2mwx
zEb(|dSvd|_;!s;1Vtq6LfhE2>VFfbA5~>PCl+a~`&JtBN=q%ype6S+;LOM&dz1ufH
zl#mzkj+-p8^c7ju4p?FU9$>Wo@rxZ;f6#d@Zw~h3l6$Yg`X^!*pE^sG#S(k*(gBzJ
z@0hHF{kLKJZHns04e>~g#S#+`t+>b~72C)vupdiYX{{T2B9{fY<fo%nW-gLRvZY^9
z1~0YX5_bN4z<HBP&g*>g=-R-15|3+*`6L(k<j4_Oefj<+3|Zxqc0Z`;&ow^j)k;=k
zZKXO|3LM50Me!&jqG~L1fT*DJ!ScW3$0t8@1fptT6UK*6KEN9e_@v=snNLRC49X|Z
zVX|)q@X0Qm1Fd^&4*6s(9VVT;b^|6{DvxOb0-wBg$O>pCe3JJ&olh>W%osk|{fCT&
z2X6%8lPy^_iMtYYbZIV&+5w*w!vl=gU+XIe)*qBl9vSGzC#%}Q`lE08@X1KLbigN#
z56VhN{3igP)JF8<BA=u*lT~1!PiEtOVd#~)EWjs&4_KMGIiHmN#xb9CMoqkkPxdVj
z$R{N#QT@F8^TEdbPCB9e@Sm;{eyrw$C1log=SsbA@t0)fRadSZ0gu?94|eoL9YtbZ
z%PlleVLv?6pN#^9CUjzoTl2ww=%>?*J|C=e8({1xwp)A{rqFdq40HN7nPDFPJ1E2a
zhDaa-7^ZhqS#1s(<|#Zx0B?;ZAay_d&OR&jF_MwzR@_p)#d;~WHYP98Nyg3jU_J4L
z^q^(lWmAyo`{C`wHMs}pnGoiK-HDS04xX_)A8a)qAb3U~<Nz9?g4SQzMTfC}UVPZ}
z-h>XH`Cu_E0gX+63;JR6!6L4zz_2+VZ08=5H|>W%c*&1?s?#(@HXm#&;sp+eruw#s
zqf;SzG?q1DZ$G>|?iH3Vem^KDD}F@abeL@Ssmh8kEGQ?)e6Xv>biV1iI56LA#<dy1
zH-&bSzKCzy{$&a!UVoMMo~AHY_+}~2g4SK*>PiAy1sw2AZ#>3`Chsi9H<OOuFTRQC
zVa7LU%>b(2*xvEsn}vA8K}0j?SDA0VJs*^Bx*$@>0KU1~P*$5mzWD(U5vYsP1f=rK
zxL>TGXTmqN=IVS?W>LoQ&F!5s-&8(l3KE@fE?_z>m%A%`Gp>Owa0h(T7!NQaq}$B<
z$Ty|Ckwkp>=Fg^p#>*Jvr>4&s-%Q3!0)|b#Y5%j}o9RjbzG;bg!X>`>x1OvKdwjDF
z_X^7wzaM-v^(QN%7a@)>-!z=<jBnym6EEVMh_3?kO>?TB4;J6ppAWWhhv1uYXH22Q
z`DX47O<}I^O})CZ)-eAVtpX1C=58GwO=2L?Bhwk*JaHfx-?-J!c(60cMm_FMk8fr)
z2B_{T2EM6|{Urx{6ZxaeH?>X&<(q4W8!~`zlA~p{IpmudJVc=Gh$bMFZwhU<f*#`=
zc}~(TWf#^>5#MZ{9}3^tpAVL_P3D`er%XYj^UaWJn%rICn?ki^fji)v!?pYoasv%f
zLE{^{=rG1N0*<R*C5ia(&4?I4<K$@{zA1Lb5#L<cBKW5C$pCzF8o`uHd^5JDtPy*B
z(-ij#%NM^NeDfrJMBw;FwtLJ!#tLUz?iD1y$%C4B!8iY#6PRyK?a2_nslHk8&EONJ
zP~v>^>=jL6uJFxo)n%<={xMnw9Pmvt9wWRrVc=h)feQJw?|0uXzUkh9WMkx;D)j-X
z<dX)z*@-tC@XeHsGT-bv7L;!i5I1B1-xS6%)p}U!kZ%sYprfvrCLon>7H+VDo(bP{
znyT|nyIC2-H;>{A>G`ylM@>PZ^G)`Pn%rICn}t<nfji)vo_K%}A)_bUN4{y)mL%fC
zH-+l}8gCu<;hQCR=^(xtvQF^Ls>1>JW)Pw$m-wb&6<H(p_~zL2y5)=C558Hk*2?Hb
zeB;YE-9B>0H*<d0`KHp$z<e_R*JeO`vvZB$n+Fe>LW%RumUEiIT;ZE8IJa3}xG?{C
zvm?GKjK>(!R5h9LO`V_a7vJ1$MY1vS&HkDIRbgyP`Q+2u;|&LVQ)0EuH=Pa!<(s^S
zqcVVR7CkGg%^}}($3ujdcjvGWkjgjJS6M;Ngl{fk-4yZ7)oB^SH(%py4V`aJ|85Er
zoo{BG)#UC9-&C(83)}(U+<wL%A^FGNN4`1KoFwAIH>;`v8kzp^;hVa6>40yt{~-9L
z(f$B@6N%`_CBFHpqO1{neA5T_3d<M2AAHkrg_Y5p^UV#ca8BZzN?Ucl`T66(d=r6d
zGk|Z}FBg0>eV-|mIN!ARQ&X5Ld~>aWtToI(UgL;w7U3~QzS)7^EyOo_Q6O$z%1reF
zb~6T6^TEcvM6xmRO}{Dt)uR1kITiDd@rDDwS+-Q>n@f9w^37bt4H>{UHOk9sbI3P0
z%ju{arwK^qo1IInpl8B2!?11&zDb&#F?>@EUr6VhLAy;sqVrAJ6ix1~@XgM$vcMhi
zO%fhpM93HM_mOXUG$D!j@J-9gfW{<@@ssv%_Ti-izM1iz;G09g2H=}1h@M>Hn-|K+
z8nMSW_e$%QFMdDx=D=brqc`W9(W9O5&E|DF-*lc3kZ;OO#<j+JXBYbCFMKP*spKyN
zC#!nr6Gt_rxmNEy1gB5yaZYU|ZaH|zS^s=RJW3!-^bI#b-<Xkf0`<=~X+%F!r4HUd
zU;i0^W6&-GZ!E<d4tQheH!^Rm_&F$V^g=w80lX3Ulq`0Kys_a)9X%~H0ilw4>LM%r
zFw&5Fk8dfB5_Hn&n5>hAoBi`y@P%~JsQnXJxmExCzlSu52WN~B`sb&Yltm53u-iZX
z3Lapz>p?@!?Ap42a!^D1Q9k|iy(_@3XYLe>KI?fohL>d3Hv8wl_*xck%8z~vzurGT
z17QreXE*xiYm|_MYrjVMZgJg&CvsVU0Dk|<%76s{$ToaM88yO!00Q^VZ~j4F|9g%N
zxc--zjBA6&8}`pHzVM}pH%f0eb)|T`@#g_eZ?41}3B_c^VgCJMNAX4tJVszc6hyu<
zn8h2fuLu@z*sZ`c#v9@T3)Cao7~_q6Wr436*e3GHZw<j44tS&f0+~04Z4JsB&!YY=
z19;<?qO#f?^2QiEL`3<8Gy$pcM*A<UkjJP)uA8}~cs|gnqs(ZXI^3*J{&~K<wnc6+
z1&JPStocoodvNv$p+32N5n134;*Ce~03$-4N559k{d>FUFz(+Abo^VJB;pfq+<pqs
zcw(EN9#${b8802g8)fH-c%#QAKibjjlS}N=)RNx%<ewgsHDWK`_z?FB%NM^Nbkl9F
zmC*~jk?lTJ$^X6u-2~acZ~nc`HxUT|`KDYkT$=&$#*jHOoR)706mRs~qbbd`cq2QG
zik9PzBVRg>H#Qd1ktO1dqXW$RGV6IQM{Cee^ocjlmH;>+vEAdt8};yp1K!B-xy&04
z*9YZ|JMiN(fH$T*DvRAAZ@h$u2=x59NeBq>#*?$H@M98h9Kzx!l2rGG>!jgkyz%iY
z*~)jX4HR#T{Z*5AaK;EB-Y8vA7PW(TBlQt~yN>K{X4ls7Mr1YmQ9kj8rx@(I+y=4e
zvv{KqUOI?3p8rf1Z~rxZ3%?$3l>J4Mt{d^jF9l@b+K)GqalbI(qFfdrfUcif8L%J#
z-*_Y6APWKr9B(vVsIUJIeGp{*$F&);{?C+Ew|rH=_5Wv0ckZqK`DF#xR*uYZzW(R)
z$H~#&W;hA3{y$GY(P#a?{5Y-uYsK=$_bKY(4JYgW44?IXW$^VM{&+^L|2Rfkr(LJ(
zKOQ2$^C$MFRDz`S|9@8SF<bw!l!^5}%ljF${!f>!eD{ig>;I3M#NAo{^U9)jvi|4s
zx9iBBW_BH5{jW?v%4hvAP?*;LRle&#UOHL-r^({&zdX?Tzg?5AE9*Z_d)DpP!TOK;
zg$WnsvY_?<6DtF@SpWMvTmMnxF6wvlyc=-+FHwx@;e+*y*v~f&nJV&4tCkWftoq>x
zZq@YWO1>#?E?IFn|7xbAeA6#Eb!><LH?<qfH~l^Le&w4MR3zCL^G%x{0lt30_K8p2
zkbQ+C-q=4y<_*u1puDjK@ks{oMhB0qHix`X01pwMTc`<0%{N{6$O?HT`KHgX3JU7j
zH&CYzH}g&H@P+iPiYecjf<(_Z#UT#jau3cPA>^Ab;9zHcfpd^=`Wg=~=9{*6xsUm#
zvE@l3KKZ5v`2daYmk8=%`KGIQNz`MrKXJ_@k#G9<TR+;-^G(a(+jF&~H{aAYyQ~p=
z`KHHkudsaa`$0E1CR!Q2pc~(O(-++>=q5<M>C|UB-+bLaFyAb}wL$sDet%+i{8X%!
zG0BmQzA=Ro-`~i*UQ?JWd@~p4K<hZd`BzgN@l6LjMxaUbi@f(5<C~G62ICvI>Zz)f
zA=w!Drb8ZpsspxdeE4Pt-f$47yfI$po6i;o<(n3WhcbX~{=#w4y4oD_%>q0`psu<m
zAeC>1d}swd#y4`^{4J%z8#>?g>=O##*zZp~pDgoDiLXsTqVvs>)tcO0;hP~@Wq~{3
zn`(G~5h1NRhQK#=(P7-*5O6H`6iLK~Z|dX(H0mt!;hT88big-p;{@M~{W1XG)JH7g
z65pJ|fzLVw?D5SvxK~)d`2FCU(MeWDFZf2b`&8xm*F)x;7~C1{-5KNE7wf$d>%E7C
zzBek}5#<ajgMW9xzvHW5N=*(V(3&Ks4a^slcz9s0==hpp11rLni}T(~eJvYi?IuO$
z?#T!W@5Fcy)$^vrjMzdCZx`J@x<how=uREdPQ=Ei0bm93C>tk~j2+7WwgB_~@P)Q~
zVY;;8FW-N{zv_I<{#6G4AgpR&r|98pU-||g>Pbw(pPD2!DG-}fzex19M0z*evpktj
zTJXjde;~%2xi}a9IyzwjOG-@A6!ytDhap#HQY^bE?0h}t<ODEM>b<nI;cMysut60?
z3u(;u%*F9RLX+ZA^(*d>p0+u{7k6>qH05|(TCZ-#Y*ZpV>xPF7PaBrFVc>)Hytmj<
zB}41PCO5Q39sZa(#=9|QV=8X;vYU5!5+>ni#GlAo^+(UR(fH>&G8Fo#xy30Vk;u-N
zNQ4{L1AmN3uCXsWkoP0@bfU*>#VjXJVt%}@Wc^CT-h=F-#NDI{Zz4@#)CaR}PuxjT
zpr5_H{%-oevU`lIxd-PPmqsY<E~><tea)g9)+4xx!B2j5_zzrRUc4Qb6jOkH*#EKj
zFK{+h|Nl6C2IEpVs3}Srj9g0NlHx6w$qZ*WBPF*`qC^rgQ<VG6$T6KRN~J56Zn_ep
zh{j!sBqaCSng|nyaXY`~Ywfl7UT2?u&g^scd5_Qc|9`(9k9X!=*53Q9*Yo+ht=D=j
zk9Q{BI43E7)kH42B<F$<9=R&E`*C8lD<_-AuA0Cvdn61ssb1x3x{ByJ`H40^8Mm_|
z&jsVA!63PEIL_FcP_C}b^9cPbg*TlOaVJc>Aj0<_1|!0!)5V;=O>;VAzD9&@IuRz~
zqlk%6Hbx>sm^NOW&lPk)w0AmT2s`=!8Z_(P(<3lRrZ?-3q$8dY{py`Qs%eV;E#-jz
zbsrhfU+&|G(EnSuEUxk&n4sT>8gCu_%MnpU9sPaiIB47GzlN?N(Eph>KS2LKSs~G{
zhe0a(-|h|9A}7)1d56ww&AA@MrS2H!%&ZPBxIJHRedM*QVV=P4K?$xGys@9m5|kj2
z>6!^@=Cx@uC3?;cgA#PD;I@I^8Mfi{MRa~wZ38GlqpVs4^%~#c8iEq;96l3n&vBP$
z7h7{*Nk{e$gxZ*+%=7V2KkW9f02`5~*a%muH~9u=XMa18UWVI%u??t}cN4>m8^56x
z^$JTdi3@|@d(4ggOc6!Bb4r6LGqLvLXD4>!4f4^#v=g69;!@<Q!5h2pJrkyEL3Ovz
zl-CgyM4c&3aMH7VrW`#_LR7jFPtfKkqbbXs7=kG-MA)8Vf+MM(?JT1wk6g%^%Hd2=
zd@5hu=MPbs-x`NueGXHu7;wd{qMFlG&#o{$t44Svc;4X06YtWT_mldp<IRU&6mveA
z>`3je2nf04{&kq}J-4G7!`(TZs=9L8#jna#4f4-HxZ?Kw?E+7JIakV|eA#iNNlMm%
zjD|%!O=9RY3{@?L>cxC{4^7j_DEmxfiD!z>Gy;EWKGX2))gZx{)e9f+*={wnB&aZb
zrW&m9b@@N=l86vy9q@bOWg={U+YqooCcmJ-oAcmBlF5sRiImVOJx#Uk8KVO2`uZS3
zk=)VSaIUnbz^t#!aL$tcJFCN<`VG=X7zhTj%ldJ_aBmj7>}Ah(1J1a66h5FI#Jv<i
zzw?#}pkI1k2K2q3M+49o&O{98<ru00nhYi8i@cyeJJWM*^H=cNktLCMK}I~QUeL^{
zW_aG{0G?N3c}LmI#PMAE-H7n~>lhi&<=za$^F>r`>v;YVaYmHz{4kD;wt?q57_O@E
zMPe=#JfDBIC_L*<kK41C&AGRvqwzAu_s>Q^cACyG^lVGbsR32$%vuM*C+9Ru%Pw$c
zt>hp$;fmSj%%o`#LzO>*r&7|iAG~3L{MS5Un%^R|(G&5DnC3pJ+Hhu1z(+>afe0(O
zLO(|e{lY0}p4)oX<i+9l;h&25$FhFsSDK3}2|9oVZchQ**a`kc=B4<j2L$JXtD`r$
zX>fnX?KuH|2QwPz?+nBY3oYN9CR@4Z^Km*SlDQ08{dh6f!M=S{B?MpnIE|MPWTTdj
zR>gGTwLmhQKnXS{!*e91xd=Oilu}hGWehh;$hjbxa(8?9wDvwJ3CMCbiVuy)P?ZGg
z??<&r;i%9IP@EZHKh2ZRKEJ!9BS%Ry81q57&KhkU#`8>+eAL!dPeH2Z0u0`s$;uK)
z6=ayc>q!}=!+=KKznCF>x)h)C86%-^4*h~Zlx2|YcBNsWGcO_I)Q}6(a#T?jKhUL}
z7JBdWKuNV%@B1T3sw^TRA*r#dq@39w(jx`rnu19SrkKY*pwy@NlEfau?_SoUqoZ*F
zet9xPYMe#i9@cXlGEtY1iOSkxF$tyKsx@w<njpoeI>YXFkxzBP-~J#8m9HhC+J{O)
zwKqvZ<>*wS(;$RHb%xcbqMBf5mIy5RoCDc9V6jIHEUIDvn~XPzZdZmU$h)87f}}_j
zVDY>HLAI?qg`?3gJbW6p+#E(cW76V7O`LW!Cj8D2=^r>{S+hoAW=zy;Ov5!)Hr}L7
z5coJaBN!iz87Kr6)!h_---^i67uTpER=~VHV_cr??wCAhW^I~XPquQycft(C_owr@
z7NPhJ)5KhpgXlNcS7wU2o=(*H<$V<a>7CW6V!R=$K!TR865NMX1L)z$bYCpO7`!1a
zAqYXicaAa(h{2On@Sr)$+>qTqL~}*Kcz?GiPszEy%yX{4QNor7<D15|Hwm<@gyJ4t
zT$rYD_*0|-@++#i6m0Pb8MUf=2BfKiCQ*vZMtMcn6)%Z0CFwz}XH=?><YE-Dc=wlt
z7zq=8VoaVWi?Q@9OJby}1Q(+kOMH6NPmDCkYAQv&#6Vm^<;uBNlz9=t%t&O-M}#1Z
zYQ6F(3_r$oAz2)#USHfD_wqGFvM&h0_r$99B_B`@V~)RjlDvQwP)P+ZnB+S0#*3-t
zard0}w5bghcLz&^yFL5JP3v|<pHsx$|5%H=`Oxby)Mz7azbooC;%Oc8{N!rY9G$<C
zP`@HA`+zg6E5~xH=BEJn+Aphc=Ml=oco{*&-EcUEyQ1r2AN2;B`rfGX)+3Y~5D3|f
zP*mIdQ=ViKtVi7H@JtL>A%3lx3xwN!h6Tg6dAMD^J%uOyob{_p0?#=&^01^7{)R{F
ziz{EZBcFb6dO{3&1r(c93{R!3eS*(1H!I%QlFtXT!ueywOmizd?`>_Sqh*D=Flojk
zw8G_a4gV;mK^s_93g0$jTHtSn4p&lR{&IWv_&ZnSk}?mJNouiTY=aQt3T2{%P2jGQ
z60YnltFxnv!j9|m!H(D${MgYRFC$=wbEwRY)Tx2&XoUiBogF?zMNwkMhleCth0l&h
zFkEFvp;ws;xJs=b56uo$`SZ{vA4*Q?g5|%CojY*gz+0U)vY_-G0r5+7=&e-GA1=?y
zR1Z0w@y{snOKte^4mZUw2v&EwJ^#Xyr|-(k<8E)me@0V!mv6s0gsW@c_QmCP;zUeo
zT)<Uf9^z|Mm87+3?abbZzf&_WgkQUoI7F83;3;H`Oq!Q)KGvS0*`Z#-Q*&ldqGt@q
z0sk7@#;e+E<pJFuwPmkAi2$aO&*w|Y@`V|}jKPJB*6EFwWStBo(<7J^ERiGx4Pol0
zX09k@9PP}aQhh;F=ktCB>i$>)y;9vJ4AlGofP$0I%t=b2<>F-o6nuWLETUDD4HOhu
z;m4s)L{Oku%JgWI>es@_(0Uz9_!Qi?Pp9BhVlH5yz8@5tf=P?W@~cEFWJ91{Ni7=b
zk~@%}he~9W`PkLvIS<->`jk?`QXYTLz~9hO=~uv@_4d`7<m%{Pb+7^8$Z=;4hxq<z
z>HCBIzkfe{|MU~$``x;B@tjufks^(0CdOP1nm5J(+zRVCnJdcnM%fEnNTUz(=N0!W
zCyg!4@O!><9dN&F^?u^}+wmQ$$)Vq~*8lsz(f7Nl-<O_mwlrqC7(>tZDF*mI-?jSl
z8S^ouqqs7o%Al^w71QH!l_9u_Qp+768s8p#x3+ReXVyE=PWYlPZuyO*!B(evhryG;
z8srIQW@WI-?U}E*UzYPTqXu$L2!~>BCaEpC8x8J08`b69F1-dYG3uWym{D}mU5G8B
z{EqOZ%j}V<foc+~EosyXtlF(<Eq4z8`Mj3u7~6R}ITQalvrb`VQZp~|#onN~7*|>h
zDzFUoLhvRI!<$%5-UQ=|y(a>bpVM)9;Z02CGQf3?u@@ccJUO#Q;_Wog8Po*bkOyt{
zZ8oK3R^g{Qvuolbb`o!pKD!z(u$mvH2<Ja6i}3hF6A|tkAw<Z{&xg}2n;#1TY`;r3
zKUIgo{Pem=<|kZ1UfiiGxtEnG5^*N3mFBsGXmj0wU<!}Q%lN`C&z0t}3ICzr2&en?
z>t&{JpF1+9bjH11BBSGtoif2>nx92oxz3bUh$N!Pl%0RsWXjvPfb4G{V~d-@ASk5a
zOgRVua;Ah@B}k3Iib4@YF<m8~qB%j7F&yc*&M`LdVhJ%#&a6l9wnCIs2SJpaNjg!k
z!bfZ!-XNls=EK7!%JUD&5?nRTM1qTk2?@SHy6)K*HKKG<Gisv(eKZRsskuWZNd!7>
z-=8`)o>C^oNR6-hS*6A!caRrw2ClO&F2618SAaX<tT6#UKfoU+*BzCuNxZW471V&M
z`}gMID#^Tn71#22@X3DNFaY1pZ0T@jeZcLP&leX{RfNKiQz#4rcGj602cd>vwzTj;
zE>w|Z%O4{m;)Q-mHhy!i`#wPcBf_epLt(l|aQ|CPkfm&w!YhQrG%DOATMVutsEIg5
zD_J_msx}fvtN^p48ZiU;^r}`Eu?t3jJ6#Tm6(e>w(1^W`%L{Hk&*k7FZmuiTxtSr0
zr`7%Yz|SLCEK%1MuE)o$!FYwAVP-W>_LXJW@Y&!p{D^3IAV-s7(e=pIWH@oFB*SEO
zFE)wgb%hm+Oqxw`H8o;ht<901G%bnWtJdmh_07C~Q#NCiG+hJ*tuV_Mm)44N)t_JR
z*@MQ<3SY7QC!Fj(kLmGqS-KeIJ&w`ZrWtEvzrfeIHTxN?MJ7vlCeEy6N@%I+e3;nV
zuI6M;^|}+V{;`JZ{`JnxDnLXSncQQ&Wf0UGEkHnSo?o{78uhU_pSO7k3$(dA9eiHO
zYpI;s69JR7oX)6CZy%quYBDdV!qNv_t7txFY#}W0L5?HfGmQ$D&y8^n)zs!JQ6lwS
ztkW&9C$!>Gf@YLfJkqfa!awyZQ?Dqh*OXPlF7A@E={v!ReJic$V`;vmaxFu_qBH9i
zNJ5Z%3JFDjqCAvPsC(mw;-gg0U+_}1c7fa#pH*Y-8}R|V8!yT9lJgXXRIA=EOZDET
zgGsgRAOW@yh~XusXok0smLO5;*Zqp*EX#D(X5IcKvyuHk?*^RSkqGxbTuh}mdq$Zh
zw`V{6(+0gP5IdiUy(f{3FcuEaR3h@MO7jkcA>703cgtal-5!p+FlPG8p7Kny4DxQ|
z1Qa5<9Rp?W=AFsfB!}{DLS&#+3)t>AeRkp9Sj{`B!%JbfX8ex3J%?DW`sA1L8Q>YZ
zzX+O{7m6s?O^bmtGX!`reuogjt)d8S50BtX%5@DzU62bs#4rEq+~8F$-d00)fEcHr
zR*69xj@>>=Ax3OOQajsSmRgf^6RFjCM3&m&tY}Hii{qKq`q2oa))~W97VP6wA|!^&
zi9{BU-&LfE%TGg5*B_)56^*1=BVXK*+s!Op=zc;aAJ+ZRrWhY8pR##?m{BdC@?-@$
zO0<zrsn?Cmlhg_b*MEdbjL}WKJI|+Fn;G3Gap4A=QQ}j$fXYU{e2TZu<;<tFIj*~+
zdOl^}RuJ;xbUjSu`IHavM&$XFN4m-)d~djk2&bWPoV!Nc2e}U&cSSjOQR#dOp<=<$
zIxX9>HN7Ivr+k46sRT_@rpOpHK69@HmSO=Xk2Qiu=J}KkSIA7Ukx#jFk37xIhnY<C
z&HcnQbEf=*;3As&6z_VQOmX7^s*}fEY4%vH%b!oV=BQ2-J)g4ZcMzo|)`rx?BhRNi
zgEu12r`*{^mf%xEO(Zz@0oiulj95qzC6vcufh3#O>LiIEpE3=@RchR%Op1{j58iE+
z8gV!2`IJS8Vdqoy_^fSTJw6*n7YZGpRlwIR#b>XUjz)aexigoxNVH5D63ml-s*kK2
zn~#Zle6|J0D%*|EUdA<4>gv(ZoEw49|5Nc<D${*XJw7|N5oCU1sOpCD`0OjZj37Q6
z<(8#7cW^MN&b(hpmB(lO5%mZnM<+guS*7zinQca$zm53p+m$-IU1FA$_-tw?3#8`p
zSzFTN6rc4d8KwBFBJPd#1Hm8HAkvnG_^f3wSzBI2U|~U9T8Pg+SfS5u1fKrG7_NGR
zN@z3*ySFZTeAe)w&J7Wt?f3=6==PXO3?84ohL=Q)$l|k}sj}3j4K$HjHcHC59_DKX
z9S{mdM{584WJ_vaVz_GK?&4D-$7d5<R+vEX+3hv(gZ$&O9rY~7XMFR)MT9~Fy6f@T
z2N=(T?1F%`2fp6j6yN1D`V3z$Kr~+9i+citu;kr%8CqvJv+D8<2^UePa}f!nFw!K?
zb>IS3)>vbgKTHBOY}Wj?n=H_$GAs!+*xnv#GeY&{egZuZW_ixeWf~1d)&Fr;crB@^
zpgWk+<sv0$bxs|8XtKe;c(MeQkU&^$>0aHhBvXVu;Zy5Cw?hL}x^X*H7aa&AutO(O
zxL8Eo++cvPLnJ1@>~GckglxDS`g5cvo1*LxT`73(m){p*&gdU?5hb%uDDcl4!}EkR
z+-ee7z1S_dn99IqVwO;~6N{f}o*FSPE1eHztL(iA6Y6g#%TWL0Arq*tMHRNLyK+Wo
z(;3P?w*d9VXhb39mcoVlze|izKf@|wH(5Y>Pp?D+)WhCOlL;7$y!a^msqLlNyi14r
zg9_AhRs-ti2dGfz+XC*yDiJ|F%xwX&?PRF8=@$&@4et@M;k!GI57%T9YIlb<m{0pb
z7f*!6yLaOfD!BKHc>%cV-4!0Vw_)F`E2q^&RB5<8e5*e)-`ha8fvdCgh;uP6@BC%>
zgH`;lgk!=IM(?G!#<Iyq{I$AmS%?R)7*xsa8IJAq{0i!b&%ZB$kIn|zm|Uo?-ixSb
zN?gjOSjW=OaEZoivGm5SKxItu^)#g(6`SeEp1pFnZqIhq3~kQ_;JX3#>>~_{f<0T<
zRwj9VUlWoa=q!``BNV7va8N9e+`U*w5!}jfNnQcNg<GlFv-*{b&7K|mQzyBwXY+GG
z@@ud-q|};nd)5OlBd}-jZDf+)-zON!U1>r#+@4*5XvJb&WsT^cey58lnLV3g%bxYY
z#Z=0lcu<)oP`+8~@F;)q>Im)G&#h#rm+ox>^+Hs3>-Ow>6r)84>JKf{=P?4P*TQgr
zsGni2D;AqQE4fXFy0B;KmI3NF_f?_J?b+ja8G$`(cBc&WVZDMuy>CY$8*a~PAS4m8
zv2V|Qny-r|LVGqImr%idm6#WRyJySrz+JWqepXJaZ%`6ZggtBhO}O^#uWy6fvz)oy
zo@Kxzm+aYpP${F^v!otL-MeB#1!}mIs_S1}+=lT==pXSnn3H}j>XEPF))>Vhm$12_
zH#HnY;dSdI6khL>>%+Uz1(WXUn&w%R>iIRv^E)-&Alvw~blpeAb!*_dx!2LG9QW)>
z_Uv<e3e!9%Xv>Du&yekEi|hYs>H617*DoVoe@Lk7j|;rMUVpw471(M`_<a0LGePy;
zHTf0g`tv*JGD`h<x2~?tqs^76%u+6p?nKfSwd&1Lt6t}5HT+y}>?7iO5ApSrt#}iC
zg3$WM1)O-Q(2R_sc8Prw;s+uuXiSk+dO}%t<5wksq~wIEE_Cs$FkwtWBS%J^%u!`y
zGH!F_&<OsmHFHZi5dZNz=CU{2;NjkYdo$*14u_#@0f+f;Z;o7rHy>5r98Pa8(0rb2
zP(h>A|91mC@QWHLzi4iI#lfkp{Gvv{0e(}w@|#}9hg{FThwI>XEU!$z<IV*78CB_L
z)Tf_uTLSC(rSdB(wVfYBKjbR>ka~`c4EiBTpNz);xN<O%zI+~(;FGi%<$+0_Rc*1k
zvKg*EAmHjTa0@Zu+!q*-IVPc!BRx&K%6wcU8Fc`eEBG_#vaHrHdp)ov26HgrD?Tyz
ztx+eY9^UM%yxD`^q>16P)k1l5EWJss2^0^JJy+5ZYQIvFr$Cv$ktw==c=B4Bo!!a!
z%vfvb+k>4-?eJZ(`(&n4Yvs&BDw@C(f8Q{)&h$<AZU!E~!QqAdOUF{lbxifGCij-M
z5f<XZFp?mXZkH|T4_%FxR6NYfxGH-JQI4pXMEh{1`)9^8#%>HDx|>KDkn6st)V*l?
z8G#-B^DE7cVm_gt*xAywD2;^BF$4l=-W7VsAR3&s4AtP|ELBmNH8^<*R~M0DHU)&r
z&F5^??dlWswzt)K#1(8a`Yx$;k+twXtI}PutW@F}hM0ETCR^6HdxKfllI;W=d3of@
z{+hswE|0Y4M)v<wl3Oxs+7g879&ujeYFte<wV!BHg`$xoO@kR*xh^uzQ^;mr0W<53
zUH5mYW5TFpM|v!K??fRaOC0mXWpeAA%dX6!`P&viI<enL><3z#q%c+NY3fXay|G{6
zUBwE}E`k<#)04{<Pc7x}Z0nuN8e@CGk%P2bDn$LIvfqccPz@BfxDHPk@HD<U`>#U$
z=xul>z9pD9MO6bONDa^v{EDB-cl<o9?H#7Jx?!Nn+k~TuB#uW~eRn9*PejxcEz;Nd
zTmnx7q(3-^;KIveIqwQs$&zrLy$#Ew?40iw-ie8@uv-`k!%#Ub@9zoP+-N+mF1Vjg
zGfvzuU|*psmgGUYr!N;ot6lTK{jK*9_g5esc4kwpB)bNSS>#oR*@bF%BUxJ2ItP&!
z*<ZH#0Zm#`6DrzN8=0CO`I#iKwk-J$(ESO}J^UEIKCYxveTg<Rpt=J;&pZh#Q5_y)
zK3f0{&l#`;m68GEDZ2i7L#k)1=VTJ^fV2x$&$MZw4qUsZaJgj9VcI}^&h5#^mHjKL
z`AWp67{L7;xZb1{QXAGoi?Mp@mW6l-SlO>d4d<+P1}Txqg)7zq?{o{rPKjf`Y<C0o
z_PoyMXde=DRBN3*pOB>O)jIR@pcQ`7-UVzM7~msr>{3d^Q)mlbIZR)Y*n(%^ptO6T
zlQ%grlz9%id*L(4K~VHP{3#xz{91A+MV}ckFw0+Bt!_um#Oij|Yht*q9*8%HuVLEi
z#D-j?qG!+GjzXj)Vn31gXerz3YP~g)!d9#0>q=|&fbw;^R`4YZwR6vck0g;Mv*)mb
zAnz`2t7*K|H4OD?gK#yKz!%cgsnUdtW~{g0QY4d6&Q8##Ix+@>Cb7;Jcf35g`Z%%x
zT_4UXD1k=JSLpU<eWF`)=xtt-83O^*S<%*sHsfY46=Cx>yGV-~wzsZoAq2vgZ>ON%
z&O*v4M2Ql=7k=rjv-O+=te(}~km~T`AJW8gtB2kMd`s&kcEyDf4&Gm66J{cbyj}D!
zHdH+8OCm1tsdXqmk-LkD4crZ6XpkZ9{=-V5wQ`W2XpnL8OHk-QM>UPa+sD-Dq!1{~
zhONiy%c82+!D#THeXnmWi)v>NOQN!n$j<!0PgLP1vXe1f<;XTM7if@i1AdUXiBs13
z)cvZcf-Ynmp)!*Z$Q!LtDEp?YN1WoD^h@2LST;(td{_?7Dx8!<0oIa7n_EyFt@b(q
z&G<Kcal<euiToM#26DjZ1iHY(0)>b7-b6fX1gyDZ_Bb=cT!*~6o-EnUDNv0)rS{0;
zS$G%TatM@!-x+BaUX?&!&XSy%@LMhn!*IbrrB5RBo<;i2+wiq!9l=3pt3Nph(W3?&
z3~Z3GSM(4E2X70-L2^4Wsev4P7MB+`Z8-iE57GQ5J>;am$p|^e&jAPfbx=820q?R|
zcu7Vq%n;$Rn`B9S*v?#1?~u>=;C-5;daG#xrHP5r=M3IfQ_2iDW=hg(!!}`4Kwf`X
zt+3lLE__cXV<#~wAmdAQtdWs-t7!){t8}X&is@}-lm;2ve!H(@CdY2Pl!}SYs1NP*
zPt9RGV!WXXUJNf$4|%ah5YN2}V$l_fy;u^@g(en_m!Vgu((i`Gzod`<LLT2p8(&2k
zf4BMgF8cVP^7s=m>ivI1RD%0E&Bs^L$2XM6f31z5ri}l!M8Na&d@>S-{ZQnCq?g<K
zL&5|=1Hb;MCW<!t542J1SSS`}r84;c-Ibj6-3eWs^^+6YI_vTO(96p1gi6lJI4*`q
zR=$mS&ns8C1y|wD_hh{Ou^#@@xPYzhNm{?Ee#OeJs?N%7%erzZ{9cM8_qc3)aiGij
z#83FUm-u_#K<DFY20F*P6XNl1Cf;><?n{7p;$9bjH>U@Rch++!G?E?&3AO^?CdjQH
zPlM;O<_!SM;m9ST--#=yLs`mc#=3Ia`1U}shI{}%ucUwO@@zEV2cPrgt}^~RT+~P3
z(C~%+Ba}--Pk7`=ai21-oR+?go|TO;+q>Bl@Kku_7kyCxsbuWeM*cR*^M|_KGrtiR
zLPm86WH0`vY(|~N1&ibPFGfn?Ta-3JK(px|>ami{ll|0W<z7_#ANgJ6M?MwT;r(C4
zbwsY@84P80=*x2d7wP+h{l9-NegAYl|L><t-*4gn{RZ^?mFoA+`l(=Zt8&s(N#!cM
zpUU|XeE$zZ5fclD{pg!fT+O$HKc$$rAvJV26X$1~q6xC@V>YWPE6ri8jgAYy6L&6e
zMgw09&ZPy9Wz(CsJeFw~t{FF!*Ii;?!{g?jC4>1;J<FhVW#n153T4$4&(dV8U0^#-
zx_f_AYA9PfwC1(PKZI4Aip3UbEq*;-l2r?H59+HmxPW+BmN$08ogy?NN5L;N`vE;%
zxxV=T0AS+~?FeB-r?%{d8M@4p*>UWq%9{{z4t3)jBthkDjEBW6A+7T1jls;ElvaU_
zD`XF1=P&o`kNM)(`%+CZD=(@1zeUj_od5Wn^oZd9n;6LI;FoazoAdLOvK}%(xr);7
zi0&7{Lv4!h3a@YwF;uvyw_zAog~<8J{sZr0M<`ByIvO5ZpSyt?hOUB6T=^e!_l9>2
zpMW4nN$;hjWB8BajE=ONyRoOdLwwSz=fs}!U_Gd*D;<Ks`md8)!@uk}V!2~DAA0OH
zjiV^0;|?IZ;ur}nsI%E272ViM8eK`XOKyGow1oesXn{wK^l4YP@HJ^KtaiV33yGJj
zd80f;0}tUdcr-(1ftttfRH@0UtQ)mbs7Y1UVd&1n>p3;K&+d(F+QM8S?-L)7rIJLb
zd1-pTqzOMHh^_wQ8>Hp96CfAZZ?(x9PNEAY8OiC)nNHHlQZ99*4eO1f|GXC>@j8=m
zC6$g_#LS?2;5z)gP$6K_B9g2c2o|z+(89EIX<Q`~_TA+<$d}x^VbR?+@u*+?S6`3+
zuD*~OGXATlvFlN=NRVsy`6!U9!F4LR2HjyUN=~lYuBedf=T{;o*UK0x44?+~Fa9oG
zat%dJIgnh}zYB6rYvD((&3H+6FHCa1T~#L6AI;1q!pZe=`>2!a)@lCax`Y+&ycZ)O
z*MXOHa*Yx*19C07Hav0-h4KtSE+rldV!wEP<-<BUzkWCyl3yA4s<2)Xzn;S&+s13%
zH7dW#-exXM&aYf_Xo-UHdi<q``PCRhjr>YGT)h0MfI5&se*N|)_|*ih0TsWJ8?TXg
z83DiUjFb8GR1<TFaDFvz7j=GZc)_1v?_!%kWc<p)l~m(ZRm=?d)%WV~_*LOZMEu&K
zWDnqwwXLOajQacbJ1ty9oYnDnfIZ}Ea~1G)?yk>*72`f4eb*OP2Sbs`q-Z7^Wdi88
zh+0*+J*nRND*9Y!wz+C<L`6;G-&|*~({B~;xN@GXu=R+q-Bq&S$2KxXeE2QHYl?^C
zfnO8U&P6Bi+xEO3L`1NbJ_Ez$z^{NU++RXjMBfe1jW42yJQ5LsU$anwpOVfr3H<1=
z65(Phoy2HY7aNQME+yaV%z8laE<3yqg1+3ukD$NdWdsC$tFkPoEw`G;X-y3wC%(cu
z74>sMPDOQcun?r*lB3J2ExUxRT6x`I|H5BI-#=9+=+k0mkkkC3QfPvzeq|mlqYs1j
zHP314`6kV|?2_jBH4UdKc-HfB(bK%DbHQ|!&hb;7V_!r2Sz1T0PTN7zbO!;;_B78P
z#Ff6dFY}4Hxkp-o1&`r-6fU+i`QBvZd#HpQSH<PMGeNr$OLm<D#I{kkRLZ@wCD$O%
zTp;JiLO<`d`tu&TA^7u_mY#Ppz8CCycb(wB%j9{pdYC?M1>^IIc)FMJFudwERoqbR
zr;K~BE!gPfA5U{{zmd{?5jAdWoRh>U8QYM;aCx%V@Jt46h=Cq+tj_*j{Dsq_x6wOn
zT1C}w*m$!_0`=<<Q*L!Z0t%*BHsTjSf>5Vq$ONDVP><#ok#FJ@9sEI0r<BLxNN+Lp
z6dlg&oz6dOjwix$KC865gk!1tqUoo!u+PuWY{C{)Fk=A?M#>{;5sguZ0F1K12QYHt
z3Ytr`^q7jl6#vhdzcu#<={zkre8T1XO9P~QUHTH;!@Bt>g(rs8M9|y1fhrMtW?TuE
zifxPvY;N0l3Fb*un?<V{>-EXxHbh>6*#qM>3qnmqSq6IM^R5YB(X&%myGKJTN_RpQ
z;>MsBq+#mv!?CXK_E0X|=Ns>?Neln}DU0m2>8ae1jJZqc@x&57CL_Y@ViMWMSDL|r
zc92OKFuujV?>>%&fF&MH9~Om&dYqATJ^}yvbIE=QM1;Ba{qGgT&(#ooqk4pfLDKRl
z-mHzHYmNr)OdM>k`I2{QiI9w^Op+iW4s{nN+Ig~MC(`<9HvCLGHwFtRDDD?~Y=>|r
zs1TJq6TL~1`!zSzj^At>^0>iTgdx*v+;q1h{uFW3I<qBh@JYE~;+GfQxw*<`z4veg
zkQ^mL6I34<@$kRbe8DRyU;Z~g|C=9qJ@fx{Jec_dw75YXgI03oznig-l+=q?FVhaw
zJ2+HI_737}nRo|Rp<Z0~4z?g7h?aLSdYnD)pasVJn}i-)gh`-9wIWSI$lmfHOoHkW
z2$N9dJ<TK}vgxRNFi*m39)Yk2ov<Z1AlG2sKO{S%;$aaFW@8H-YT*>$rW2-Humd$0
zid#@E-oz~^k8M%9Td)D0!o~WCZMR^=7#nUuGYs~}K-bN*xQvo1!T>*o_`l&67%)%~
z%{@hskjP$+v5J8X*eeSR7<>X=Ut!c=X8J%Rvq&W@Qy@@qsjjGD<q!x{8*FpP+Mvpf
zCfXp@DQkn(w?<DJ40*<$Hn<t%Wh7i;shdQZfr11Suoi)Y;M=vJ4T5af(&7WI4KA@#
zZ%Iga&R5csHYjARu*S^GlDT7#nCv16GM;y7#(6u(DcZ3f^f_@~h0AHxO}N|^4QM%+
zc?e#jp`{Q!_(7H2KmUA`UkGmTph`aUNemT|<3$38*o(iC<WL;?A?Gl9G4}}(<+Vkg
z@TQ<q9z>_~Vh$PFQ4i+S8ItIJzGzK!d)cJlIf)ISosqx)|Nh#jzZCnmnfo)se)T#>
zn5V+uNbJ}37gWsms%C<DH(JT+_NyW)>!M}9=4RNiUl|xIYpnwI3if&xyZ!3?ngsn#
z=d3|LpFIZ+Yteocg@1E=D9H|ikH%;1;NyIuijNl8n&9J>vNAr7)r}rLrj4|Lk8T+3
zkB^b-E(1PpeO1E8H)pNl<5B4GDB+_h{1o-MIllb#&!*FM@b%Ui6<^!0Ho@1=s7%)N
z&kHDHi<bUrf>WuJw8Ek=(`u)O8}W73n#+K%4KGXhx~IS@zRLVkZ1^(AkDq>e=%gL|
z^ggZPXGRqh{5+59WF0@9Yex@1|2|;@Kl3rzUq5Ze#;szvUoX5U;ivK`tN2+BU0O`~
z$s9kMDTJf=?W@T1`L#}}!0A`X1UOw|WZ=Zrj2>_nJ#GUyV=&ksIB({LmAnbHvSU4Y
z)6FXZXIs8i;Jg4O83>&JU&=ogWl#V2d35vbM*TqHq|2lDCu+X;>`R>KX_4o93yx8&
zudW6+Do^^t(2%(H#Z<-6@YUdh)#;YlG5t@v?S?#_L8LDtUky&hcz>1g=#OEkj8JEZ
zT2~phJd(<o{f||Z(R&3~8HO}L(DgraJQ(u-FWAMyp`$V$;;%5l!&Ru&=8gx)!<Or!
zhlkMv?ct#X#{1);$CAr{hbqrWc$j{~Djqsvb1h7WRXo_$w`O=kBR!N{sjCPJR2Kxt
zqlTF{e(!G?M%&An!02a`B<nDG0iAfGrB<3eY!62N4)BN3B~}SL#CWeAWf`e>636Sa
zBp7whvj(F=R%)4myht3EKTJVLq>1A{7}qE$1G&1ii7#2^j1VGE9Ir*qnT6V|A`-_V
z`$wEOZjPZs0f@wL_eB(Z6nEmdQl=!jR}Nbf-CmZu)Jy}2^N{WG{eR=5>z{=?pV%JV
zCDpTmyKufyx4Lj<Ec$MqVYfX8(x4$HUhHOd@dB$p=}syx;}xh6<XuHyQ2xd}gVwva
zvVEf}s8`L~HMyjNKcU6%e;M$7Gy9-g2G#!@g;7{mvf+O|bwD;0U&ojjiVyyk4aFl>
zqGu?|Jz&pJtnI6*8?p3%7`v+Ys$Fr`8@xYG(l&MWThliAYzI_h6#We=cyVRf#Y_8r
zGF~S5o(POrMxu;acPyG!iXL9J_qK<Z85r;HpDsiL-D1bfh%pjgPVcdbmpRyu8wOsi
z;Gy{>yLh-AkFUqC!!MfPVF3Eo>UgMAF?x7divySK#IGKV_s7G>Utb11^v6L*TCK{#
zT~_h%*8HgBq22_$c(@6Vuj8TLc@sQzJu2fNu6*?Hu&9SUJdDP8e>}X72G+%{AMPC`
z;bF^PR`HNCFDyKS>pvO4Z$-as8*3L&f9#O)RH@JePbJYCSJy8q%S8`QgS*+oQ+<s0
z$5V&TF9V)RpwW${U$XzSil>&?K^um>G2+Ac+6rDidDbpoKH4thWlw<#UN#?=@iM(k
z^zhQ6t3A9F;y`A9yj=U)Wx&gCBPG0a+h!FnSJIIM*7Ijp@bJcHyLfnQtBi+@r%dpW
zdr-zhc5L+UP#=d}+wo8Gx)||L=99~Sho7F5@R0I{RXm)Bt_uSXMtu?PYb$tiJ#80H
z?YGEyn((g)o<{DM@zf06%A=K7-QL+Ao@QXYzkXSW-BZQx-;5Y0;py~dt9Y6NT^L0?
zS;0%y47+%_20u^thr66G!ArZnGG1b1qKB7H-S+S@7~}o%^4tfP0WWtAmGH81lU2Mt
z3tbroUW|Bf@%3IW->2)DPVM7TJ-nM4RflZhOEbIKnGqzdRcYQQa4rKK9^%Zb$P54Z
z0fi7eZ9Uc7%&pG;S>5HkI%OkDsnHK{BL@iYxb2NiJobcRYTKGq_(9S7IF~o}hTQ@s
zyj8*OOT4=Y)<n{~sSV1U*%NWWz$RthPjJtoJBN?yNMI4o%5aPpjS9D4cynAs2Fx+m
z9ql==R;_IQNTnX|p-TQ!W6+P^(9s&FG#o0E)MC|GEC^EzcuaE8Ex|ksc#KsZA_3;r
zU#$Tqk9GWro)v2qxey^b?y_o63h9o6bg>s1&5y(E6>=?Z4iwN_LWcJoCHfn-_a^TY
zg5}$LOJCF;J=7tr!Ip5)-i+T1&F4OM>B~6i74nb8^M=*oLor;4k~bf|#B#8$iZ8;^
z9sqvW4)y>9?MBx&Ee3tdL&#ET61gX+EX7?@25S?N$kzN~O_qgh+=m#J^A7L&n7zZD
zGn;tq%=&`!7W?)g%G2~SJUheK#gU5JE8PR1`<o<bIKcgx9YT_PG49Uu+JlE`q!8=?
zchL>uP<vg9s3i4eG*maUxmK4-z%PK1cppf$0fXL1VGf3B5<J4Gafq$Lz9^1yB|(mj
z9HID#J=(R4+Q(5O9}lsKk7`qq$V&ZeO@@2f;`cEupe4H1ehrrN_Z>s+>hEUjW&J&j
z1=HVsx6AswMq$+T_YcYT_4g=T!(V^DhTT)euD|bjM8eDOYpvlWk4?q#X@2^<*ATn<
zy9aKr>+hEjndtAVt+M`Ze@1(BoBI3cT{`n4(BEHRxW@j&)Mj@*c2gC<{>~ntO-Ldu
zyT+O<3)#|lF)Z5p+p}8L->VOp=<mf_Wc@t`EpjasQxu`UZ%Wdo7^eO{LT5XxC+n#C
zyYw5EL4PlQNSlg8mb}WE4EM5gZ)2D({f+f{oZqCjSlQ2w_ii@nQ4-_IH18vD%l2@m
zyE^yS<||EgEc@Q0SIX?3w@>Kqg>V)8eD7JCW!*jCq}Ff-zPEa6&qSEww48ubd+1uh
zeQ(v-a^SpZ`5-15W!3jqukk}`4KBjg|2Z8k7jyWeSU35pF+0i6_HWvflz9*?_)2oY
zY3rs1lmEhl5@af`unL*gZwejtB^N05|4|&ZH1xH%zTI|au1fS>dx+?UirP(^WTL;4
zuRUrgM@{Z{MNysVa0y4P6^^G~mC4%@9|6%1gyE=_z)+3oI421$)e6`R*x<!8Gm5v3
zgGS(O2R%HCcHG_(%2QK1^PLUhZR6~&0=BiUHW!KP{-3OgEuU3_X$fR{=z1SxyiIkN
z(*F5B1lgw#AW^PJ^NvZNAdIs;%n*Cpa!zv|3wUFH-X&;G%EFK4i+`1AKIS;loEKVy
z=E}*ZYKWbNe$y%OBDITd(>zNMoaX$_2+69oB$|tQ>j+P(-tr#}ES%<-*hy^FvR)jS
z8uM2`)P@lI0QkLv_-%1l{g=HZpfp})4Jd`|Ft)XFR4XC&8Mfdd5-%#4hEW2A(>=@(
z``aJ6bQRSfd+aY0zVH1-2$qM~AN->|dMFny7~iE1;;Aik&PULW^9l^tcz=eD&a3wl
zO%Jjq3h9w83AxQaEe4|uIzc<sQlv|Y=%7Y--PcQ-kVN*#QfsozW7osH*vPL1;rr|d
zZS(#8B{JXt`ZE~c*R7ZN{yZAkMw##R@37DJgLGK4bpGijR`P|*!1tv+B#b2eU=1UM
z?94O+-&ggu%l8%E%Y47I&4lmyYh}LA%hMj+Cf|ECv&Hx8Fx;Q-&7Z#<eE+wbHX(^D
zW3e?^=CPY$XKeCa_2kw1)$e)>G(_AlYr!HJ4JZCELBsyl0u6k>%xrYRjn;ZxYEy~d
z5g34S7%y|XfYqHU_)?tptNXf2czEPHYk0_K*V7i?=&tvF(#tL$K3XW_VbAX-c-XvB
z#>4c3(ZfTF#`f?~*vN>7Yo}ZWJp6W#gokbmtm5HH+PNEDJiO7vE*@T+FXLh3Zzg!i
z&6V+xy+3+*sE>o5HGe+>{g8L75f5c1Uj{t<bhm_uly9x#;XHI%7<eeE-os+N_((T`
zjIDUF|2&Qv5ih>IF_`~2b-4fzUy^n2)qoSNcrou52|p3Wi?eYJ*~VOAx!4?4?D68b
zGzmj7bFE>hkS$d8pBgVt?kY4w#PMR*H?l@py1_&veDk9aERPpQ?$#dNrvF&&W?TN_
zpA9ro^4)dE*x89B%Hoa}=XTO2B#|}y+L|o$*b(TiF!X^%zGvKHo9`pOlKKAm&%yZq
z<`S9jeRf8j?}hd3^L;+9;m`NY<1Yi>Ur3cOQu#}37%60{vFFw=KlB<7&9s*vdi4vL
z?;F;c@O}CBGT$?IXpe4_@Ad22;``q>X`&Q-FN>XK#n1PjI%pG;$WlJHCd)i_ev)mz
z8{@?(X#x!q$BUEa$Y@x$#sm%X76~-)crjzUMswT!HmczmYCHbm_5=+OJoj6`&O9sl
zQk?PP+!P59%|5e+hkSNqLRfex${+mS=b_hE!kr)5zg*dc^EJwMVG~Z3aP-W32LILj
zUt6@t2UBg^<9#2?_PF}WVD|Wm1+qQ<d5cEWX!+lRYf5~L(EqN3YxvvacG%uk?Ed$q
zB#E;VKeEQzLUzYkgFP-S4)2V7y`~gyj*=R&Ueh|)L<2YeR<_3{H*1e>(;m;jQPAod
zZ3N-?{dAJFT9ColYer#bRq@;7CT+C|No4asv?j|uHUzf9rvGj1KX5k1u01aJKt@BO
z<tAvTJy*8J2RCUnw{4F-I1Sp4Jx;}VncD^I(e%q;k88D-@G$#*Yk0u<H>0DD2iDFm
z9uCct@eu!`2_CNcTE@ecU!#YI(bwC<Lko=e$3u@1mjMq|?v(H_eWq1Bbb8u09!%;t
zwj|m12Y!1`_6N!@3FZ%+`$F~ye)vTLMzs8aUezRgMCcF1;TkeVF0rP=2_q;@SA6&)
znp;Jwe&bjR2_sLwYYiiXtZs(Dvzm`9Xd@UOaX#+UJ2K;M{oaJ}H9wdAf&J^XN4Mz@
zcyNZa4S%2`hWq;igPx%2Dein+t>)T<B(m9WTa#rT>rLBy?XB0C@cm#b+kD^mmdy9+
zi-Pg}iqB-e|GYNpd><TVpYL^W4S&A3d;Bu+{ZcatBNN}Wh7tJxPa61M`cAuiFNK>+
z{{KP~zBm3v=KIOj+N0a#`;4n>@%?@b_vibl$1Vroo7|>N2>kyytjQAo|1jHp5Agq6
z2sA{Tk1KdxMnj|dCTOVrkwAme|8}KDbNl{3PKdVU|6{z&ZTSC#1Ye3XA6Ki1gooL$
zS;GVT|Dm?=V5a}?u&@88`|1CALG}NKe)>OG14gv`y}?ci9})U{b#M(CBbQjaftN%7
zCrTKZ_^LIGK>uSWt>1b~>Dz7Vf81Qs|KFG}zVZ8h`hU6h=yvr#j$XFm@7<5#2L1o=
zWzqk)Y7+weKh2sfq5o+otv!D)K>s(jt^Z$^(a`8C6ExJG>8JmH)M##B|Kqf1Tlyd4
zWo|?N_rDza{}u@kv(b`X-6E{*5T5RL0=9boG40B=j)!p96aM%8@YZLv^_Ba&x+Wf_
zGwRjV&v$4mOy>Es&Pk{Ee(&sU*tr4~x`uCH*@*wrKJeAqzl*<It5deoJ8TATN59JD
zjoms&C6@Yiyt(deK`ak_`S~A+SgVnVNbjh&l*kxiY$t(3#pR6=^azHHP`8yOQi(ni
zL3cvQ1NANIXUlLx@eeTpn1S8wYQB>!*fHY&ae3R73E0(ChMy5W_6B@dD&WI=lmo;&
zE>-Waub#%AX6zBK8Gq=Blq1BG7AeD4yFAB0Bo+q~02YzF=oIk>k{;}vYkFR2QGC$k
z-z|wFfX?G{dvfLT_?Az#bDmMo^ITa&@M}LMG#L6&{P3o1bjK|wG_1hB!F1c_$<``O
zh{#INHphi?7UjWMqmC_!J!-{%p3%~qG>jy&q#QGhnCaLv4{>ymETuE%P4Qure@~2Y
zd8YHmFrN8u0Dx#N?R)_(oE(kUI&0uF-q^*Jlg+=9Q+|zeVNxY`&fu!<oQL8w&&D{j
zw}V1F9Yw8X_!iLB_=E4WLjMaSYGA!AZ<cjWCHRmsnfft2@}4#Q_RfuaL!bw%unyMt
zJ5`rBvrxh3Q1yv3djVb<b&7MrdUfxds!K#u3$eFyFV1jQRZ1RKSRBq8ZTatcbvgaK
zSAQCqz05g1*ST<6=Gn5&?5pv^%C0NB*_m|({^Gwo27jbxt}H9{?@r4Zdp0d61JiNB
z9fK1V;`okJ-X|oCS-IUK$11}=3Y?jTUl%j;7UrJageJa%SKX5){Gf2cnLVBal;cjo
zzQW{$c$F;JSr}|)unSGyRhU~srdRW<9T~T&4uIMtM4eV>S~0WI_)Lrkp4=YV0V>=X
z_Ad5>iLNQ!|Nj~f$j|Q{6tLs$ieyh==H4<bmXCFZ+=Y58wS(D&J$`|^P`fEAxic)L
z)-PRo9+pyDBHo(k0*I%d;;n9y+MedgXX75gJZG+~l<NE0nO*sk&zDChXkc!d7d<xu
zbwgrGIsBgIIL@S1=Vx3`x<1;cU_K1Hr7<6Q`h3(0x+@p+p`3%u0!)<oe9-+_vs%)8
ze24z;>hBTGp6Pt-Wlzw?-W)|^_?Ej`dP$);vlpI+TA8?lbWKtEsVMtp_j9BEqy2%#
zcw<UkyYa^4Y&qUo_P**Ck%0$5pb%r{y&?#!)PKz*!ba!3rE0#C&|{IGx3v8d9$tm{
zZrZPXrrTwRH|ExmAlM8o<qZ%NkEkQj!zp>>ehdp9Z;0~~exrjJw&uNC$9R5%6?_cB
zD0}B249t}AF>|H~K3;iQ#z(hrqKA*d3-<7_2;=?nu@zgpxc>e?)!(7oJ*7tHJg#Xs
zNcf0Di+OAKSPzvN0v|^GVg(O9Yum*`_lYtdUV7IA51B8@cu4swdU(h?XAcjbV!S^d
zR%2^dvEyOV^%5Rp(PG{j9=`7$79NcFFut~emwGkr;^n6CGG6+<ZGxAsUKuZOUqlZt
zi_Y4^%V>=E$II(?Uk1F~TTQ~tma$gxl5=0N<K^s)cJWd$M#f8{H%;(TJ4eRLL3F{5
zmOtVtu!omajQ7XOquAzE?E0nFwGv)tqlLUR{nGp1u<&9<f3WIy@o?xF84vMqnBd_m
zkBo;cpF|H2qfgnxLko=e$3qWn6)Sc;REd-DFdZ%At>K|lm$2|)v@b=y_CFtN>2*f`
z);Qk(`!5vvedBn$uZ?)892P&EQaNJX#`)MKMsb!P)M`7O@NGy$Hk~Z+&39j(o=y0U
zP9kzfKH)pUL}VWf_6PSfu5iGOUaC!l<^s^HhjJrDHdZTO&8kS?{_ZKO;C=#%8P2sK
z5h>?e?Bc~Z{=bKxpOmibz>nh<6a1Xal<_nFgXrO>*Kr&8sgA+^`1w!!%YdKLP6<Dw
z(yij>R@(DhtoSkF$zLCii!<s^Bi_vNM=p%Ammg?9Qnt?%UNW)IBPYuCx!J7f+2`#?
z?b+uU81E0kh3&$DV3hI$BPvJ`JpH6q2+o1JjdFgV`BirDa62B~rR`=K{(=b}28@^S
zQ0Kkq;bARKdA5^J^kBR{9zMo4v10ev`d=yG;ovZ<cz6ryGYmW!^@;Jd75!1RvR%BC
z#?RC7()M{1yfhsv<K;A(=te8vn}Nfm?ck*s#{1*t={A=EFHOoxc$tq5?$-SKA<4Gz
z;-8<sgkvl@g0%H2bNndIvfA%#4L|(S&UwA79+&asm}-Kblh4TbnU9XT(ZWx!L-z1f
z4deasb4RPofS-R$OZdq^k9ceNx%n<z_%Z66a9?x$rDZQmFYETK&Kp<Wh(F`?{J-D1
z$nP7+|L=8p^;`d&um4-~Wz@$M`lfj{s3lc4@)vjU{`0F5e@@6E{cFfyjLZ=Fm*+2<
zO(*>uo&3f2J*0ml%wN2X!T!p3@g3nP-zeoTGGZj<d-f5l%J)m!(`qaJ^p77~m$9Tj
zQF2A=(P#21hWY(nrHi`$|7-ffj887lX1PAGasRrG<eWy7%c7Ie&hUB)%LQgf2B>Ml
z`V&oPRegj^t6eWgk5-d*+Mrbm2K&=$0Jdipdwt@KmtqiCP_<}!BAeCUDy@3ZHsAnS
znbs%%xAAI*uju5Pr#++sYvVW*VC4>zftCGY^ng`=hYesI{nH4nE3nVYdT<^keZBI$
z1T0rStH8Q+yBV<loB8I;flrrby{!Lg_O+$|9#COge~bxCs}Gf7x(l7dqvcji+GYc$
zDHv?fe~Fhv{}oCwoz>SWOncB)+Q{^ue?H*YCF+%B2jBkw$-B0%XXJxXznbO8>*22U
z^5Zx40UWe^Xur_}WW(t@T^|!b=Jl0v=n@>Q{KTRy1mwu;D@S9zKbBv|)+*~NH%j^O
zdkZ8iZ@J$pmU9|&ER$TNTHb+H&r3AJL)8m*@o)_uU&lk2r%mwCZjg+JSnL;w79Kv`
zY!45EG2R~!&o#IVc)077gol+qt>WQX=;1K(<7WCH&9h26e{BIjk_m^WVz&fU(}%H5
z!ilHWVUU_(0)smT$}qT;9X%MlzsVj9`eVEdgG+2eJ%WMNmOb#qT83Krw|L}I%YP*p
zEbVR$28ArWp@8gM5}!AA%zfmf#AEXTKW*uFc!F$?`@%xtI$0x4oRp^@7DD7l;I=?F
zUkgVl20P5y?K$VdsagAfEvfC=#(u(ZHjjORp+a)JgD%d$#XhbSI^&s6SW2P|93f9w
z!iJWcLWxe<z;PmA14pW|q2(@d*9j*i(Ut6GO>}$NqMOYa?5r^X&#rhI(*6`9e<|Ok
zCc73XqMvxL9sjh`eX8g^Pnw8+On)JIp7v^u=D*SMPxt*|!#|yk!7_OZSZ=~)@K47b
zlMoQo)fxiw*~0o90fw?2)A-y>{^|~5A^)^4w&Z`WCjVh(^6%#-e>D4zn*7(>kv|3-
z<c}Rx#h#xTcT|@DJ(lHPH=6RF)!CN(yJ+$sY9{{&{Nz6_it@)H%t?`^y=P;vLH_ZV
zL;iou^1s`%{OeejzcJp6_G_blF^d=2agw^7_3cAx5SJD|#t$K&8REyQ`Us%$_;Cxm
z+D0p07`@t_qu2uD{jt{L#>)^dRACa<rn{|TtrP9E4Zc6mh$ky}*!GWIJp9o~#zUpS
zCU_`$zl?{Kqoaq1!7J_Ip+3g@<Do<K%YcUxha@~?ceIL!mNlb>hgsNu@6Kt-L#7?H
z&wjvwvYoxR%_cuq-G3kEp}tE}IWWW_O=kv*CSgBfAFp3yf`dH-I`|<?AEU>u&=y5E
z38QPNjS0lId37Vbw0%A~R{{x|gwZIgXLqYx_#L>00E*c5e&cl_PWc>D#5dkQ9Ae|W
zE9A!e2CnaX%KzG=)?CraTk6pe@d`f%dLxcnCq0f{Fo~=yTD})iA>;NG(x&@zv@tmI
z*?5ODi<Z~jIZfR;1L{Q>Y5vtgbvLei#KhgG&|S!tN1DH8;LghOSEw_*i-<HwFON9V
zY>lCs1koG*5bINws4gN+-YTFtBF$^|NnD@NzDUXOUQm14-Lz#lQ1{!(XB+KBQLp{;
z>F*vk=F_d%n?V?5&;J;h!qtj6onU5v6MOSYSJ~clLl@m>`5%Q#B~=o^0R@XN-rwGA
z#a5_d&tFg5EnzS&*%}7(*?Oov6AT8+Ul)ZZBVH(mPW5cyvw;G1OjPXYq`D5*0Eer^
z*^F{{xdAVabi^MA{oh|q?>E8wTm0W|0(2a?4)1T$-gjoNP*N>lwxv)j(k$DayHwqG
zV?Pt!SNR@U_iaZ1aQnJ%><<y^zPm8gU-$L9N?5OA&9Ys$Q^MUlN!D<;mvx1r2JQ?w
z7KpV3WGLXVJ1wVmTFyX}Uhd)P?=Z8bmGCf1@1)KTn8-8fZdslshuM_pzmzroVsRw$
z<U>bbs3uSN1Bcj4m4z+)9i4QzEi1|!sT(wF+5mHC-Ui5-m4VCi%&GJF$M7e7p}gAo
z*HXTV;!%@+wB7cSeztWU=)LH1@A6~|SM&Kk!TQlYlP1K-UCld(YKEa`SCi~b(XQt9
z?<D&WVL#gGxQ1*aF0pw|60X(7j3}Ky@WghBtNE?0akY?re6_G=>iGjt?G_e4;`0a6
zadSvlSo|;VH?jD$Itjt@)b@ZO8b59Jqb-NSs%`Y6UAI6Jr8pmu!LSwo`2+87m1J4x
zPHVEvV>@UkYoN~=%-$RAxeI(pe>{>~s^^T_DQXLuUv!OpV5h=9?;|m!Z>StG%mA}b
zOOA_AaDCW<*Ln)plNRs`Ftbu+*0)EKU1)lE8R=mCQr_~=nLQCfTv|?Nba!hXpR{T+
zy^G#%Ub<Fr*7G~l`q#I_dOpZ;1gxh~Rs&4+8t>s6GV2eqCD?Su!xN=55EjBhcLv%S
z(C4gNQs%)jNi9~6#pY$w=tXW`KEy^rJX)3Fc{GnNBCZtC$v2gRh9c(*MuAP(L9+~{
zo}K{lD*gM;V~)dEksgb?>q>oLcN4SxSqGW+k5ju`CBzIx`(Poalrg9>&yV*1WHeT%
zLG9OP$Tm>hKE2=HW}KcZo96<Sj}1}WuO)YjCkqg2o^>r}C7nwrI41bUW7mk@rLS#T
z%mtE|-^UQ<0SI#i2{TxiK7CP>Ckf_kmV|k0b8EuPX9p@uodppdE1LW;`+U35<oqlF
zcMLjIhAGm|(0X1M>4E!9M7lLa7U_&fq9xKc-`EnV1H(0u9;U`_H<mRKsX=BC>0YJL
zAVi8$28lzYhau9{x=8ng$oufSZrdb@wD;{+MOqnV2b!ETvRrvY<#2jG$D-o3;{M!d
zPegpEmbAY|hJ-g=E(}kxBs5cFxBmB<Ah26I8G+XhP!w4-+U$OhQ<fuMCL4#r0)bjz
z<Tqo99#QPP+*zrL=%C$U(61Wany@2HL*UKWcKO{a1V7$@oS^d196LWeIgP>XluDMx
zT}*twFOr0ucz$u%Lz<=tm0y%wV^cd|HC-$!S*m@ZYl)zpvE^~RGd3@R+4Z@<eBki0
zb0wikp{@NCIg~=aq7?p<>TMsqH8wsBY>n+FAiBlsUnBu_Y!ZfmE<%9?M%5wofsv0&
zJohX3n7Z_WlD;0#i>(in6W~oa4RI4Zxuv*<*y@?DIPsQuoQ!TP8;p6K2?oRc`x<}L
zM!<kCHV%4_=xU)E=!W2K9S<Yn)|H<l1#?LD6@=*q{QQ}Q2!0s!A@(=6R!MV}!>5D$
zc;8~Gc-Y=AIG<Bey~)*sM(uT7-cs{v#@#h(b5DRFQ9IV+QBZV3IfLid6LfY7m#JB*
z=QJzVC<K69s4C+v)zdtC*^k(w>-HSuVd)=S-nS@s_ndXdFlVMymv@@yR2XjG<V0EI
z%iJa+pVvy(&>4L-=7)0oEC~GiPj&W3An(6E(d5n7h>o#yz8LIiJmgXZjdSNd<AS~z
z|8)NVcR+Ul+het;RZvOSbHx;-X09rcTC*T6=2C!u>6Dat2wugiF{o{W8+eR;xK>jU
ziLCCe)+C(Ac9!D$fmrV6Gidwii`yS4>V9VWNv*F^_xH06vJcCn&M*sQ58cA?DY6mq
z9Zf9LRV`(ev;|#tEvcknwh561zZ+E2=#TtW(gBMqNgdw;<K-x`fc3Zth@x$MR&n;9
zt+HA|<n)`ZAu^wJDq)JqVEKrm@KV(0=6E{KO`M9SH-5H@r`H<Dc-q*(1W&omWjtk5
zV{<De&H|q5<7{d>`!Vu9Fyg5UcB2(Lo_<2wLCadC)VGSK^H9$=@>@!Mm4E+`DeLUw
zVRAhg56e<a@G$Ro84nrg!W%6-RD0hZ9=6Xi;^EBM%YcWuKS_9KR@W*XjzAYi84sh@
z*u}$BH_3STx}6CgKDbTB!z1Xj8!bGPn`sXZYu_{C;V`yN6}x_Tf0=}bI(4k#VF&H3
zjXeKtrXL2avWtg-2{ImLCYj*jl_oMCx}l41wD3^)jy*gq!gzoGV=J~!6+0fLEtT*P
zSKBHc*3%YNdwAgc<rNo#q9$4h>9&I8V{iSl7RLk+T)nZ+w+Xub*+>9GS^q>kT>*@!
zt$)5H;Um)ZPh3OB2-ZKbud4XhKYx%gk{@pkBUt}DBk-)PfBqyGANl$xZZ55VwlZP-
ztXqX(mG#fgmLg&c{$aT{{UYLU!;N(_G*PPSpV()08P-1+OR}s}!<sCy{&_kIeBYpk
zpeo-VS!SE>{n6E4Uq^YnWiY-^y+!7`+Z}bj=izW@Eq;iQ@3V0Yf4=8pD^{`decX2v
zMq+NThLJ+H@T7t7lb71%dscOs?@L>l@co+xGT%pbj27Rky=IH=e@@p#DfoUCyQ+$x
z?{gPOvTSy}HCg7dBl-UgzGwVko9`p8lllJn9l`kiW<8njeOyuJd*Lhg`92@l@aOwx
zY`rRWzQ6FTgptbCtYM^(tv+tx`)iBs^8MASGT%2eGvWL4n`FLcwvQIy>*H){8}&_p
zzpRN;@V#u_<>32I*g3D~f3CGA%RF}eAKQFajk}r;obsJO1Fw;YB)>fQ8W|1Cnwp?t
zUV=aaU-HUG)@W|~yzOc@n_9Ey5!APDe^CPjU-H8Fci7rg?D5^)uO&P*i?fD@e0JnW
z)bTJ1$En)e@9@;sG9JEeY=VamYRPzb<gV!9q1+4h@UYfv#KYkOmjMs&qnuCE4|T4x
ziiaIgW;Ww}qkb6ltzA3}tRmxKW}*omUWu3S(5-Fs@KBgz4-bnlUbg)OY%8`y6}x_z
z_PK<IxXM=XupWgCQN}~hxpwi;y^@TFml~SjA@fEV4=Jsqhlf0mJv@Ah@&0&Nz2`FE
zVbW(39%7wV@$mh@sN=!)wOu^4uPEbT!p$al7+GD$Lo+n)jaGhm`*Zg2FazWL@vv~$
zWx&ITPb55?Mq_rh{8t>|EFRH#e(oG>%8jyqX#S;LJlu}Q*RzMi>zm+Vz;!Yn>ij2q
zcvzcl4-X!U_s7G>e_aMV^#4f0!@(=8;^D2mQO85Q&+X#jCOp25hkkWU@X)oYjEA`9
z(Zj={OnZ13jq(0?c>T}IfQNfOl<=^ntW`YZ(6(E9>qka^vg&7c@o)_uU&lk2Iwp8%
zca4mPSTqifmc96NqCGqe#&~}`Jh$yK;Nh<KB|NMwZ50pC(stP3co6&T*HVA`U-NGL
z#M*v)|9X_OAKS%8L9C3AMzu}wQTu8c9|xO84<8;J)@&yqk&5yD_;?h{FvT8k)S4;b
zV|FR4_~?yJEFthwRQ=ljJ`Znyi?QC1_M51iO+<gbJcphBY;#Je8yfOz5RDASlKfRk
z(1<VMzT1ds6rE#9?ixchitt#HSPb^(P;KlhEB5|;yWW;K^iYgd4qXd-01lBXr7C9A
z{p$bM`C^7o^#4^`+*3mzzT4tN*av%5aj3xTa)Svl+f|f-8GCE=fcfcY8^C-VgZ+Uy
zbz?ZnJxb><q`WBsbIqj^7K6ocw9_^Kn0EH#nc+j-bv+N@NX{8qigpV`aPmw3Meatp
zys?Sbn*gL%c^M!F&=faXnrq6_HUQEIgZ%+Ac*AATTnTST0QndV+pT32`qJ)Q6M+1m
z-GA}_n*OBSur7~xCbj`8J=9Y@7uciJ(b*eYX|BsLVN|jsJ(e9@cozKyEN@v`b&k-#
z(ys$SiE^AD>D}0xdh{vpKHyTj4`iTmjeq9>m#0($fdr(*!z__E9J)P!Q}dYN>BI}A
z<-qFfAE@B~eHVudRdRXf(`&51`QpCW>T<YNXH$~_{`qR&BZdFzjeDL(c)j$Y+jAH@
z!F+L_?r=Fgx!F51y0J@m(#$3N%FgM_lbs8e<Gb`E*j0oLO*CtspV&6E&qVadyBOr~
ziyygK-H*X`U@_FCyz9z5>qtL_t*U{WkTpCfb?@3G)pIE|=f)%Doi*Az*ju!insX!l
zs*_YHIj3$32P>dAGgl!d!alZDZqL82%oSyOCq0n&4!TDB?Pt!r5<gJd&*;jD-Snr+
z;oBqnbK~_Q`E|FhT@YS-F0y$mOE|KYrC$piw7rIEE!t)+%P7})8NXBCT#S1e<2=V{
z^Bmp0FI}>&qZ@vJavl<{F-*Be9IkO8PQ6B%#>?=e{0cqPE8rK<d%is{;krrk^Wbu>
z%%fHDOqi9av0nm`2{9SCVyl9&r5ufyu}*)MaIA`*j`w3+XtV&y-<Lr1!hXwu>c|;`
zcXHAb%Ce@I2dvL?2_N205|%!^1|LE$nJcQ|F^Ix`@=$KhDr1e=<#qX7*~^{TFA;7$
z%Ux%-BQAB<JmZcziQ8B8x$0kX)!guPjBBFLH{LgXM6%0Up=#+=JV`m1Z>_6E;mGoM
zry{-6qQ%IYFf;$g(x?1l%kxUQCN7UbD+DzAa=!8tzV3W&S^P$qx3kYRvC!vh<9NCX
zgi^8bZ>qq{KZjqb2tOB-viH7H4mJhHWwY8TB^=Hg4#?AU8U~OJC_^}mJ+%#F@a)DG
zMj<pF9_YVU`+ig9`#)R#eo5u~9~SZbq#g=DL^G7kR?lF7BfUDy#a>E0-{Z=7w+DOr
z2op&U`u5;=jUQ7Ab07CR{#p72G(zLG$U8G1YCw$7@A4d^dn9#By05Ei;?d^NZ@?q=
zb*6%{-W$f0f);9t){i%mr|69xj@J{KJJM_8-e;jGl<k7VC}U27o+I|+=cE#^!}x8b
z&@83Jac5Q+TnODN?{K$Z&dKfgi{xewHv~Aj8D6<PUxc~e*K-;(p*3>Ktzt|z@W-dU
zvTQth;fgHOlSvbnh&C=W@KIV$8rry|#EZN^Op<d!h)P#=vu}-Iq)^4lG~I$vMc$zo
z&^ay3Y9*|C`DIU#E7Gdvv!$>zTBuPHvpa`Rk=v7(<~hOEVV?rJxbR~iVk4SB*C}}d
zCsj6`KwdV|8co*irt#EYw{NI4d0~`|sN>(qc;TpN6LOrIY%IdYE}pHC`=+pf{q%XF
z$p*H~HVe9KR_mL>0`^AheN*^dN4`MKRe9*0$O`@~(qVIZ_EK}z&#})In|f_XZz}Gk
z!+(V{xb!BVMS8TPm+-hf=^YrNOOJLO7qAj2eiwq|()$>+EV}DW$QHSp^v(uI&#g4A
zNa3U{VDq2Xq}PNs&krWOB-Hzo9iu(%LV!oGEmdQ_FYdr^uop&Em@C-}Q+@k;lBYn?
zx8;2<Mc<aeouEJ+c&+N&ZJW5hP4*PHJ^KP~!u759V^!axL_z@;I_})WuVvXAZGJ*V
z)QAeAa@4;GGuNu$lUA({cTT`6UDv8J@sUxhqB)ke?#rtNOX%u_=?(Im^TtB4M%J8p
z0B)hG4+7vJHW7-H_f=4kryL?<=)R+Gw{HvN1#gE4<OOMmnC^$XL#*XgEzoPimL9W$
zJkdYvNz@N&Q{#(U{Hq!AjPbo<-?yNpBF9_o+ZTYh=G595&AfrPm+;!TpsXu<tuy<3
z<*gR8RZM}ixFZ^Kp;tqgU7Qa5!!adxz~6YKQA^?0mDNY#sm#sS(*S<-X{ZhjA*{5z
ze|_|$w(3>?gW(!b&;$Ay>xrFUVbrGsD7S_(FpT%jEaJ4GW30*)4Je6h`q5xO@vkq)
zW1UE?ivAj5%B&G?0?Q&~_E<R+ne{y<;KOBB<GN_cY~>@iWHuSYHJO3j=x2eQSzKlc
zxzS4_WHU9$EU{>r6=7D6u?Hq=GHb&2{vAwavRUEe&SS4G7iI;z{%_WPGA1^_swlRL
zf_vjfd2uu#Gan&dtMuVK6!^YBLV-_V2q+LJK)D8v5KMMBiqa2b&I$p)t~6I3j@PPw
zxVbb}8P#T~`r%w`#QI@Ho{Y6irA@Gwe@4dIylbO{wO#{kVeL8$*RaO*Lv!pL<5<Ji
zaJVqQ8v5!6ZRkW@{)6iW^wkZbADmh5;_{-e?ko6HRMY>Rsk$=GtTA{S5O54blv@Su
z1gGR??Qmu{#k;I4US=(GW`*e+RA*dH+zI!_c8?9Vt0cKV2<tm?5lUa93G2xT-;22G
zU3}us=~UH~(=J}wPe9!>N!t|NW@+|Ay3EKwfCFjDg9@4CAJ8?e_SRIxi*~q!NTac>
zWtFMRNMzLy2P3hjc)4Sk$9`K9vEp5LNG3ymNfR<0I3<(eBQ(&Bj<Iz2(<e5JpiH&3
z6);>Q16RB^FBD`z-GWAjpk5V`A-Lkz$Pl1-omt~>c|nGe_>*Je7~45PCqo~+4Kkb@
z0y4ywR>@Ea@3QuINn|L=zhc=@b{>?;kXj-b8SXeKgf*SWP*_D1R%bzmjPa2Y;cXl*
zt!~DNa4%&qTtY?qKd}PoO<0oy!H_PSD*^U{P<a{++3Vx+3yn4@SJ<NigSkDrxex3S
zEkBYy8hBf1d-S{JuO7`8Q+YiXRqP(TR^y{VD@8skcziVUP1RP_DMR_FGtR6OTnPE7
zy6zSyoRga?FK*)LsGIpMETbW2KSzUbms&c2hMxXy=MvS2pG!E%ucXCI!}Yl7b$k>U
zH?0q@5>@I=>m$WX_o>1Nj1%*EjRDwqgxLh@He4O#`D&1VE^43Klb`0<!&0zWf=347
z_jg@~R_?l&S;{vwu*l)eaaq|dLT7Ue$}U99UUkYI?5&dr%Fb%bUUhgphHJ`>pAOR+
zd&+nSi2_3?JA{x$S^nVXkZK+E10n-w*0Z?0P<F%dC%62E*a^I?1me!DUL3ovd;_8E
zs-lCSV)?nU>xGvQD7#vFxSWLL?{je|);MZT;n9KLx>d(xva%~*Q4^MZW%pGtU0x9?
zyT@?}RoOYjyuhrdpy7NGE37=-x|f}wFU)%QNZ+$dM*4LZOpspT9~tStme(dPR19nZ
z=@~usnTr7Fw_~`5be=4^52~9ZT`}tl(nFf{;K>pV=@!lUAy#^{j&xzx;~oamZ@m;F
zBYhZNMu7C@=rRwDB#=JhTri~fJ0c^!7Fw?hwPYXZE4%CRiV*3Oa0wOZ*NAz^Xu^8`
z6%6VAW*ut_ds)SKx>?`niz~C#EXCxverk*-mG$z?N`BMrt#ixYE*#K&X96-#<+zH9
zy%F!I7QEmGHN)xh{0R?&M(Y(=Ji|F>nrgw(^9-8rEH6!|$FF7CRCh<Gd0oOUoi&xP
zcubz-OhL3{Imb!%A-KI(1B`w5N%mUgIL&@_-VB_57+DN69=FhGi=|xl&Kx8pvUfPM
zuxphXVRHi&oVKan<QhTKAPuPN>?SNcna~#Lbk#sj#Gmj+=<avZ60=AsWy9Sb$fee5
za6eFY<hd9}(v+33!p`ig@IIT1H;BBY`Ijt<_E|KX$31v)fj4%3fe<atx^AX~Xn9AY
z$FLA9L|b%6Bf3m*(ej(3(4tINNwlCZjj|els@KT9SEp|eZEiAdXK!w^LSR=8kEByQ
z+gU19H3WbZI^gAto3@CP*q>UtYW;<gze>LNlFJ)A;*ijGP#{26>_EJpS{1H`FV2Tc
zV(p7^3wUb!F!n`!*tQzUuPvu3zO@(crFjmg)jxw3{pzQOIwmZlrB{dBnQ|7U#O+Kz
zo9cZqMlDCI7@piSx5|`Wa>|s_c(OSM#n0fvABuR#vhc0(`igt}g-@fb3SZR?t8gf@
zMqH}FU<W=H#6`Oijkv*cN@+3hM-9BzP7ctBYvNf>!{vo%iAo9bEa~pAJ*nGUayU~_
z#NuuB9YQ$veSvCnYvKcT0WV4FSK~z(1yC!uaOrW++8gWpmt3P!{=w6~`-R}<Q1J9^
zNgBYH2L@>wC0e641kzZ@|1~&a`TaoH?sx0v_HO#k#^5#YKsv49`@)QxE_K#PJWWiL
z+j9~Q=kLD<W3iM+$|L$th03PYB;zB2vL^Z4p1eK{#XoesP3}HDSv&s<PR!5o7GLlA
zMqzQE95ScX9L}t#=nGW;)#7<52t|Xsy@PNPi*g1`HdSvqGh?{;{|ZC4d7EWs*3Ku{
z8v3zcqdpa1M%V_w+(Lk7p*i=sG+nN6d&6zU-Uh=p8t%vB&`QGn*vG}QyJu-8D7!m2
zc>DFvC}mSjQK_l{YtUsl(Z-6b7W^3FH=J3o;vRyI9%7X82xU;xqZ9->vqsR{-d0U}
zfs2VJ$%d{#rHM0}^1AE^yg_WN#>c*+(95zZZS<QgxW|q|r5X|%59|?WdY?EM?;vH$
zOCb6sP52=}<az0+g|r-Z0t#%B6HsBJY$-slH$+lHRfJ1Akup^`A+%xMZ846#l#4!}
z<phR$rIEOj>JFDt=E;@AXM_rK5?Q-V76^%4M;<FARqWwIVOD){<vg4({6^ji?T!K8
z&IPl`w57(Z0CV}#f?+Pb&ELqU{tZwLJU;$U;MDKmC8qvGn)=yr@I{GenpPq@%2*;A
za?1I=@bfx$q$w8%Q#G?Z&+70iF<j#e4;AarGD(UUY?HpOP8s0HLdAfj2*3M|$JCk8
zk|JfggDWLZsCJ7dMUwH7m=b0NxWcboPA@5P>Tpze9W)ubw+RV0+bN{QGr(sq5Q4bz
z3Kc5SRf5|teox5jO_#3Y;A_$#tL}z+i5|Fw%8et(m3aXr8f~yfiM{L)?QK<xcGJAK
zm5H!MVA(G+=+FI4hN7Rp6Yvjejk)>ahT^O9+B}BJkA$cZP*K~hgFcC2EPJTKufcE)
z^n4BQws(b<z$to)m3S<~8lY+>QoW^08;wA^znMU#nDHBm5fEnLQQVK`1COyi_*>D(
zO4TLIMES0Oep$4zQu2YN$VjB(B|-lT6iFCny~@wBwA^`tDyr?DvWhB!ASOyG>f;n!
zDryjht8l-7UV}6j$c!Yd4+eMt%n0iC^4KXTHCN^mvh6@VHwMnErZo30mT*UDG8tfZ
z&U`WpX)(vB;?>n+n{%=gZ-+6<pQATCoH4oz)X1U3CY;!g5@4MZZ^13G;5>!k1iu%4
zViL}HR+pB-;DkE71cn1~Z+7utG5A-^7O!()to&;{I7N`*i(4~?N2cV$>Pa7U+31VA
z`LfZeBrF>p!yMAGQ6lP1il}EE#QovT+WOJgs99F)7b>CFACo!*Z}ItCrMSeY6I7RY
z1G&UGvHkE|N+9IIk$a_l8;``YQbRTLyS@G3B+n)&q_+6W?d$+g@g2edxBXyi_DgsX
zcE!}(HT(|WDUv^WkR)%|{JHxNS@N?GU|5j6g=}#L98@hyKHPPYvKTIF!$a(*8NvlX
z)5ibix=0Q#FZ3W5eic1<h;8dnnupGJa%K(2+d$=&ouLOEOx1(&c$X#NCF#Md_()FN
zFo;@mwJfey2Ljb$<1Ip3?~uoK5<Sd?I_#}_Y|iXS_$Vx|jl%<EeHCdI&1-AJ9>ED;
zd5yyHlhq4#!i7}jxce|okWgaKk%{aEaKzd=$>z6-*nAebm%M*0_mUMjSGZw6_maO+
zykzXiiRiS?7gzE*pQ2D??Y_V{U$R-uIroys!|xZ_9(c)0{eK3$WdGX#kX|ys7v?*m
z4b8VG{kQHVtHTermM8*6QNT(<5%Rj{KLI~p_l%{(kY4f~!85buY6Qv5nt92ne|}J%
z8O=*(9YK+u2ULo1FZo6^uZqA+KAg+tG(nM5&Aq})rpfU0lAS1?=9-v$$(yLNZqZ&c
zT_w1e%<l<#(Rt9SN`se7gRFYV>LnWE5-KxRig^JguE3A8LWz8~3|bIgGEpGSdkef|
z23z<EUhyT2IIed1lOao*>Io;sG8z5iVQ0hyFWXJF!jNK_w^6ng83=bQ*oqJ-7Ijvx
zzf+&sFj6e)@V{D0M8HN0*v?(-46=}Eo(t|AJ{x?!0$p&FwC50-HsjfdVlT(tnB6xp
zpIEkt#m`&r!Ui$tQV%~Xcdh0}tIHMvTRNOsSXNZ+2U0B1U$kA1y@4f0HQmyKp2OQ}
zeFu<Z*<O_#+>eey$$bQV^txrT5-YVUP>G%0AS<!O@P4AC#2);QEhQF@;VL1%6>}jF
z4R`wAAA|A2-<{5<B<E~sKe*E*JaVUJ;$LBQoLMfCM2jWdiJIJ!=3n6uf~8WBbeR}l
zxl>F3AkF)DDOA2%&U3l&q+pL?nqnvYWx}3}Uu5>&fr!Z-d-%PemUiNRWmPSO!5($^
zyBH2Mwo38$vHuoxCyH}Ep)CetQhBK)UkB<xlY86bAP0KeZIZdSJ^6Nc+sL$YZ+kgP
zIbJefA3BB2rtqY@{;VwJ#=~j85|`(3=%IkjHDAZAjqh?@zf@T<+MKR>-E~o!g}ueT
zxUVxbm%;5F6$4q9hOGBEC*PpU8gG_T08p0Rv@Z0nOSsT^wBn8R{uxNSiR)$BwZn?3
zP1>oE5xYGi+Nr}o#c&9{OFK#EsCYZaOYzRK#Z$=f!j7`=3sl<tAkWF@?SXSLbe%jW
ziE!tmG$+5~eB}uIDRoY!W4IP_;LvNF|AHlG-XDHHaFLqni+>skn4m{(Qv!xa)zMoj
zKB(a!e~i4UJ;raCRG;!^c(A*wC!ATLOyy0)uk5)U`dT;lt5)F+&9Cyjqkz(~{+hCw
zFJgj#tF{J;Z1EZ)GTMU8#vsU{D7M>#?|6DXW2`&Jg;m_YAxC)wYzQY9YU`%kBx$u|
zXRvcBuQ`tMMU2o;^@Uto-%Gz<1pM%0-i}~zQ{8-IC^G^rMZcX&@e<iLi-H*tZHbI0
z5enI(FA2z_i%pn`b~kPU-LK4e?BqWJ=kJ--GUV?>NE9W=?`<MYqV*6$h5Y*%u0b9-
z$4hJ(mWVjy;iW5(e>_Cav5SB_!0e96cX`(MdsStW?Xg&#Qb2E#bsUG=h-iBx{^Z*J
z5__zxIzt-hXSW9Er(x+vszTh1mm1`)tL?WJ%JTZ-cgV}M3ULK$f%Q;gGG@d=C_x$^
zcqpMxe0`iRt=jAeYqJjF$Yj-XdPd9(T&5q<_+6!s#-8cR@KpXT6Kb;x*$6Lzp2|*r
zaqqstZ8;TdNVc5!moVLLO2+#A6=H@yR?=Rt;kB9>dmEeeacy5*!zZY+V<o=cbf{{d
zuY|!)^NvN%OWC#@Cgc8`&n3uR7;o&=o5|ZUWckaekH)k7YoWm{cv~SdnyR}rEK!$S
z7#U4<cq0t=XI$sWf^j&&OJQ75^}fyUQ-Z`LA(-}t&NN6}WtyW4F^$$+v3ifU;S}7}
z5=^VNMP*t;yvqjTB{7YXTwyp!O}>?x_V}h?OuPRlfg@hjUL8S@5Y`Z_F5?YY9z$J*
z<()p?H`Ha;6_^b0Ci6yjjkG@QL0ns<;mO~XNrT~OJ}(#zHN(SwtwMGT8aIq7-MuCT
zeN@;s{FCHak$Jwv(|09%X0HLi*$TWzC2@I}Dwk*IMmBFQpZ<;X;m7ze^K1#{r1kV^
zP73<9WN*U<dAv5!qkK~kD)(0+)Tmm1iIoM*gp8(=4=`$vCZkk!14_C<J|=ZD`0>ef
zF1dEFbo?#4Xl|q8m_${18EijB!tDDG-hVUN$N8XH=97Lyq|GL7I)jW>F0XN*-CVx7
z*I%IkfcpO!G@IB;GUE;&rR=GXD5ar@Qm!vSYXVw78uNJNg4^5h{U50kb*#hT%=%K%
zfUn{ms%S&#(QGj3y;WH|P_5(4#z~0~b8MqQT2UMN=R*gMQ*<C4?JJ2}N=-l*c=+{e
znPTNO7@QCBbUbyrVwY-F(<4n?s`hJe16zJa0}R)+Aa`?J6M-jX4Gi<z5k<n>3&F~o
zrW1oY-mbKmLjg_-D%JRD^i&!X)a52cD7=qJ_!D9yALwXDT_+A9p?F(0LlDcZ-bgO@
zD%1cuvuoge))jA%R;)r_3R`k<Uy;kb3ZUrvv$@o$`3S3s>UiNT4pbA)&TJ1pvYvFN
z-a2r(=b_wL3y?Ze?=2%M^4fOc8+$By;QX2o;+mTN`;||SkR#PfC9*^iL#4F8KVnpb
z2eU|}m@w6he`v4di#v~MP#kiD8N2~S-zWfgX2*h508lx+7SJCFVs`f4wgm171gvx+
zba1I!4vvO=A-fy1*BRXnaVOs1=0QXuLR_{_MJicWl2-)IMlK!U?B&j%q7m8Nv2ggj
z4gX$D{3>u}{eg+#3k3s$*PppU_NxTFXq~{c@@y2YF6cFsQ)D--Qg}P)^g?h;^xD}3
z^qT*(>T*=TyQ}~&iCkgI>c=^<tP0ndGm9^ueY;%~6_(G`R!)<`VwPmey48`S)rQ?P
z7XfW%B+F;zaUqpbGsUEUNe9t#-3pWP*;`Pir1gyK&JDF+ANwOwr+Xe`+W4LB#ealE
z8s1ujw|FRYL@{H7A5uN}hv^GBu@g}6i8VSz68idfLT9evQhlq^S{zDqb~uI1NK?Km
zlIj|?YKCff;r1+1d=JZ<-1@0(_bRXE9*cjWPbu}GIWbqEipCN#L)7}J=I^oElBhvU
zt2Mpq_y!mc*#DELrQfhdz`JtsX>fZAsVmoQ*y^ALNaZ7%q`9BSlXP^Y$t3MwASBBt
zX%-Yhv?eJ9E{|qyBABFDj8~n695F9^nX@{USh)6Ln3VtGGAC8b3iU;43J&iAw&5-^
zQ%bgnyzK$_2(qksD^%FOtM#NDl>o5D@VZHfmFDBa#rVmO)TCVF6(*ufm@h;(jr@c|
z2tJ5=-PNod_Jf;c<xITtrKDrIInHA*#!FJWllA)oTI@=s<p|`KyaJw=OT|@G6}D5E
z6s*J%#VIe>lvTqoD?*oH_X*L38l0^k!#x9@jxK{oP4isjy9X>Qi_d1uM&ZCuCX=~!
zo|sIYFPO2J?pS2!pb)Bpt26jqZE)(dn&}CH1nO`HhN~p-P%~2MGIc#7^)%0M9<*Z-
z863dg$s#Ox?NBTI)Mv`nf+IJfQs7zqDek|%jjAP_Sr60O-d4442K-O1&>du2ie^n#
zD)29h7pjU6J2m~!2dbZZ^Kuix-H0}%+%!>*INP&H6I=&1E*)NUT&glaILcFT46`b<
zCYF)U{o%VYR7L)NWj;hkS|dOK(sWhvqs$2qkfz(q)<fa(sYmBVY@sZlZI-DM@$A}q
zJf5v|J>uD(BA)GZGN!0_miJqw^1#hs>&71iCipF}Kj1As!*Aev(1N_s?!_z~-QwfE
zy;NVI5TB=^>!)vJYKxt2XS*X<XOE<E{roJMuFZZVsT<OJ_0(tLbp0EFhXuM?NbkLL
zy)O9(JnnliT%#+D)iE{<n=5$L*k9m)+Q=tlO-^LJOW@e6OvN1)V|9$xypx!^n#a<;
z@iv$`p)Q!3x?J_ODRyO3@si9`EdPpSwc2&2EGO?06FE)zQecN~mAh+$CZ}FnW{({1
zK!#e7)q!fdtlF{#AHh;R0Ds$hlT=m7zJ&{^oJ|xn1BW~Py<i-bLRWZdd8`2xGQ~jw
z4!0oXZFM*)pS>2O94JV+07de`ALk8vspWE#XDg~q6)$`bjqxN?cjegDU#O(acmgkP
z#!C<~y*K{e-A!R%`mNN8nYu71d(Qdd&Y}R-g`_+7%#?L`sUtIYFjG8d+0GK@eMhQr
zeNw=``?L5iiei**%t;US&1C~;$2g9>D}KPp1pWgSUxOb&0W~IookkIf>Haa=7K)bU
zc(}=*tIzGsE`b0XZw<p+T$$XjD3==Q^C!8y85<X)S{5^j?AaZ7D9VoH^xf!gu|K_)
zGn4c?@Vh9EKS~mO=cVQGb)_xwAUuqc(4f2P3mCpS1>-Rj-#IEe$jQNX_8+NYsP}dG
za-B=B^sLKV7n6CeLi!2(RT%$tX_xbZjU)D>|9IxvhUu}HXK&6ZiEp|*ebE~_cIIO2
zJ^haFJx#C3AGke!2z1bqlJ5HZ-QH)lwBG+m++BxRb!_XxZ(KWQ<1Qgs&_;s01PShL
z!GgP6NT+Fn1OmYWB)CIxcM0z9vvCj39b>$6t{TPabI$$Fx1Z<ke`@nq_b)ZZtXWG~
zbmN)3_fb%OqqZVGC$eu8ziUZr3fMPlxBLGp?kl&S6X*4GxwYhzU18Dk(tiz~-1WYN
zmXMh~QY@X6XYO|o@|`X};GN!g*owcH?f?1bK<UY!{_HN_0Nz6%y^@vx8g1v$B>WuE
z{pUM$3P$|lUH#4U)~x#eZ|VE#6yz_U*6>?u?dkQu_d)hipW=<YM%P0ALcQgG^B3yn
zt3<l#_qIfg5jSHL-u2(Ut2csh_jx<N1K}<qNkQ|Zr3J;bl}2Wuqp<j9?<h=0sWXKB
z$VVZ+8(THx1-%pde|yQwO_NI|ukpg74O#EL^PsL+kA?0PleZs&5BRRw|EEvy>b_Ic
zz1Ngy603_=!ue|s9f#BL_vv}OXBz4~b*<p)c~BTZbQ<y(IO(N1RYUH`CwE)@9?^lw
zU#Jd5`HRKm^pAbZ%7JL#j@~EnGp3W9xI;p^iAT8S$$a`G$p3Jj6j<PQo}{HnpC?DZ
zbI-%i&Xc<X{)_XZeF9m^(`?pxl8hD;@p-b0p#S`N^4+@6&y!bF$barUSwPR-=-hG7
zlN)OzI8RRhsn3(&_WE|`|L^mpNJ6zW{}<=UJNiw@_IZ-L{r~gxWN%)-^Q3SZI!_+`
zZJj5%*8gwMlb4hAd6H|5|9O&ezCKTG)2j{l5#T>@p8QCEKjnK6DB;wb@xMJ!iV@_U
zC&#5Z-g%OUD)f#f^LGOgohJu6@$JY@up&NBPSVRA`dtMnzMz|!U#0N#lR9_2^CZIO
zSL0LBx46%*uFUn$6?uMji2mq3zZy~{!sk~j`taQu;q$9OZ{FI^uSU9GYd^n=K3zS(
zN`&<{45anbdv>z^N%Vn}|KU9R(zkBk^Q(ARd%ThE^Q&#&MtmMV?)_h!hYjLNE3ebj
zPxbsNCN1W_I1dT>&!2}SSAKpT-l0PNbLZidiTXS|u_A)=a1Z@VTJ!vBG`%kPPd&fN
z9;j}u|HXNDKknz}VJvzZ=Kt~Yt93d3&cg!9={&r$$~q6zuKwSihquS;^Dyml|MM`>
zY<(V{-sZbv|B3UkH+^PFore{s{BO_0oCJC2;SOnzcOFKe3jgoU!wv2J`}1%Yy-=g`
zFm^1uiRrUaa-NjC?w%)~{XQ6a4LQ?y{1u}Y7KiDNeEiYRc%io&`L~693Od$Zetl0^
zu@dy^B+sinCq4yj;pm+FzL;H3q2K5<JL<k#EQkO4n3(imr+?uq|Mh+Jl;8dRN8xd*
zzW$^>U%|hBiT<+<%qq&$AIrO|$X6vSIPcQV0i}ix?#I6tYwPXE0iS{@%6gE+px->S
zpHlg4Li{V4b~y_t({lE@%UOq4j2~jqa_DEo$Z`tFaz^rNt$^?1t&4WB64iVy(umiQ
z;*DHNuORnE`<s$T&UX6$^hzPJR1{pZ8=XO6L2>5uzoZR}N`sSj7w_kuap~{C^7HJd
z?Y&aa>R*R;mL7gV-`s${nI4aVy3zg(E4EEm6I{KkQ1Mp_6^jPv)J`9jhi6@CsiI1N
zI@ndHHlWskinds&kBf*3G1O#5wNz9lP(wf!1Qi16-Ugv=(%=*~R7XW6Ra7ic`9a-I
z%zH2RYfz8Z3gyt?Jf}0%?Nxk59sb$$>JhDjdKKP93S0py3aH&jh1x=cv%pZdyO>@L
zQ`AXNGePwLHM^X&J?kZ*GSJ{OG1PFLb=zJ*QR_j~0+kEYApY-$1vR}YR6QD;gobLS
zR&_tb6gUG^Oi=q0Nv~3X8nH{LfiyV#w)=YZo<2b7t!lQShJ)JjgY;@PsK@-pVL{1m
z2$g^aXP}{uE2@s7z6aGER7p_3g8Fb;s8=*Nc?{KCQLz<O4OA9T2|=|0H9TCX0W>&w
z=y-NFLm5SF`^oewAE-<G@1(<uJ&HpAAS*2R3Vn!_3j7T>&N#ktrWtBG&$?Hzr=pUB
z8V@RzUUw385meEKvZ`PjobL>^MNwH5^?@!wZPD_e%7K~!s?<}VzM;X1W~ly(x-!}n
zcn#FMPAE`bi3S$~b?3fNS7~rIZuRvlt)eC>Y8R*#pf-a_2x`+yq1MpgbTQP6PNr8C
z6}13V8&G(C9K4OcI4o$z3!xU$;AAq?97TQj(G)laR0>d&=^{|uZ*>&v^aP>)p}{%7
z#aG~7o^`L_Dn)e%b+n`OY7+g4s1Q(Z#|ibE24}3HIx4E2qFRC)32HQ`0-(Bp`ktvW
zhDxTWREnwrsxT<`ka^cZ3S0wfDO0cIZ6$e_xXiO|fyYOgUX=#*sDl*P9;@08>il>q
z@B|IcQbYZrs2>#-3~DZ@2A~Fk+6wA#rdk@RnWBm)DhsIkpsIrU2Gm?o)0s+Ys8ou2
zGSU>73REDdGN6{zrzB}#of{(s9;d-Myves2#`CP(tNDrw0=2Wf^r{4?PM`*j6{<H4
z&M-q&Q&dAm#RJs`R6$VJL7jLd)FB$20*2b9sCbHs0V)S5_hDitsHF5R89ir+&+n?;
z-{>oFsG@d{FunSh&P_UGu76LzR&Zbx`dgE*X!G6acyty<m;X|O@EtT?pMMGA3A}1M
z$S)wT({miWD_-*~{T;(N`Q3cA`8ihbH;tkOY&#Y|I3UjnXKih2UP!dBoAU*TmW79q
zXz}Gw#VUQ|=cvIQip#|bt<S%SG;kyhPA;?dDm?36oS16uJLs=DXziJ4ZM3q#K;;6p
zqLEMwX>e|B@U<a<qP7e-ZCC{ALOY>afqGC(R&~3jP*-ShrW)!Wo^@B%T~WV)`UO-v
zPzymVYA@7W8k}l|3RP4VMfCzz7Sy#5(yO+h;&l@$CJjy$LseANrD3MPrl8)?-wIK$
zUapf<EhDJfeTAw@gR@@VMw0U*nxe)lsv@Xmpz2~(*NRH7Vhs^0G7U~=L+#{Qw^!vA
zl@nAeP<yee8KA0xs=!nRLk&^XyP>AQB%qRkI`m$8)ezLKQL?JdG&pDJogw$>Kv6}l
zP}Dm-%{<aZdNmQNN(!pb7@_je;EXZUn<A#bHj26qYB;E3Sk)!|;;^8cvxWMS2B(yv
zwkj%xqV|C*2x<{l6#{Db1fd4d;JjMrE3l)Yjt()sS_0}pYbkKqJLy$5P+8^*m5v5y
zv7vG+YNVpZgPH?sD5#8}a;y+4GYw92Lq$_mVMX-;RTor#P}jbZUe#DFRAm~RAVcls
zS$AJO8f*$|2r3S!$8V*;8K8Qv6RHah&cU_5UJX&yTtyWJwY`<}su;btqw}gEsA(I7
z`jrM}h@pxqs=lJ!S3<o&wZN*9f;zTEr~@=O`3?1^uqiN5Q8BTqY@kwuI>lcc78JBq
zs4r-6!q@l;T&Jj=gG{d;;u-7JmQvuQH`1%|pc;az#neng^-xqFMV$imE2wdxs(~7^
zLsr$72B(&xaw{r_qTE+Pl|WSi6$jM3-9pWv!HH?8hlNb9t`9T?y03&jwvYnHM3XDH
zpT9UP=)gXqcGBQ%k#~&b=v$<y$%-0@0#}3D_F8&13RIe-LM5fa>29cYiuzVj?LmDH
zY9Of6pjv@y%v2UbWl~gtqN;;R4XOyJPX(pG1)%0Ib&1{ta*w|2JnP;#YX_KK@ym01
zNIlV9diCmGDR4Kau#-~Yk2E;r4fUs@Iw&e7sL`O-f*J-Y?|GrJ)8LdhR4YZLRa6vE
zMM3ojRUA}>OG1^T!FjjRS716to$7B2e2Aw`Pn$`Bc|pA^AiZjQRj4{NI4cZwnP=Tz
zg(&I-s6RnHdnLVE531V@p*qmuv@z80iYl(C4WJr<S`MlgsJ4z!&1i5^7^<G4p7%2a
z&Hxo3R6|gCK{a|LRBalZqbq#9O01{_iW&iGPgChtQc#bBrB_d$3l&a-GtyAUdDiV!
z6Gic>4BA)yL7jOiy;=fl(i@@1(%=*})L2C&RFwNlC>N*+pxT4V7WjsrKxU-Dd9>VD
z;J1p}*Vpu_09JLYi4@2;H=XTuK`sAMs0B1Qa}8BUQ3Dl~8q`!!LqVkk)jF$CO=xiH
z8|p)T)2lp+iUg_}sKKE2^B0E&P0b?I1R9(`LoHI&oj#_(J9u^<1ytV`()Mzoeh3n(
z9SzRTWxifzQPea=9Rjt!vGl4KsK}stWDu$o4Nf0J-O6VQ{7z9TKy?Nc1=KeF;;^9o
zpms2o!%%Y+6-`l-L1h57f&N5Sa0#f+XnR|3`*r$wv3vB5Rn*4brdOe$&Nh+)*MjN>
zDnkw_FeMGnWJA?bR2N141ZoVZ)u7UVs+Ucu?`UwoHB^A2GAZguP^CaEeI{3MT3+c@
zQS>U9dlg`)K|Jf;gXeph0(*jb)ldrT4yrz=VW9dmwRVZGSKlaVtfE?iS`4ZXs31^h
zKpkbOgQ5P-V^&p0Q8hp{2h|6=y)vl#wWU|LXmHXRDu-It>z<}Semjv4nIKTTK*a;q
zwVqJzX>d+0_7(V4t!k;FGJ`tUKzij;hxv=cf}+(I>LXJjhFYemmWql4Y6z$_Pvr`R
zfLa7<E)7m`Lv>VCQbj$Z_W-HD{Gf86z%N0its|>SMuYQwk*~m*iaOlG^y)mQaQf&j
zy&nAiiL5F#w-mSrTXZRJ(FKO;%(L#k8m6dipk{&^1*$%%tR-bt8E9~t80s5E6;RZC
zP_;m10Tm5YnxaA_rNK#PsJpq$s_u6;1&#p~6V#uNrND#y#bH6U$_Q1324~+wU$2%Z
zYPO<!g4$9~dNl-8J5aB`5$Y)o&Ok#|QdAv9H3rok)KK~pomV444Z;@f&HE~kq1q}c
zwxVi*$^vRIDEaS(1x0Qst9sXfs5=XM1!h;&wr-|ZH9=jfD+Rh#bgb$(P~&KDrWwll
z+O)l=qTE%D2jv#Hn7=qI=sgO2$pwCAs4zukRa9-PsywJi59JER2UV(~6!;AdPBcS3
z%V}11rK>5h45)W?q`>o_F7p?M1?{aO)HWKNjeq%iwM|hI6_pp%3Qz+;{S0aoR<(v#
z)x}Vq6jf1C89=oGl?GHnP}eHRs?O8kWHMB8MSbXE3QPnl1*lu}ujhDQWtRf?OqS<Y
zTWN64(}$Pc$HBHd>)y_*6cq*3QGVAhtk^72r9d5=D%36-oUw+=sHk>|x=VjMK<Cv+
zP>n!+%qFWkI$fy!G&p4pb&Y4;Ri#qYVNiuZje8&k-v3sp*6pQNO=xgl)2E5u0*~;l
zOCA5g^lBxjN42EDYM>T?s?<fO(lj_r4fV64epJ*HP;)`W1=R-B51`sH)zVNE6jek~
zLqOFBbr8ME0BTn+S=DA5oTP?|s;DQOO@VDe1%mns)YZz;D+kndrVh{Z^=b#ty1kmO
zsH&iL)|6gV1T`I0lOeLIx->Y$3^iC$4HZ=YR3A_=K{Wt1VU$pzG&lteRYXzo6qOcK
z4p95=$!16bD#kBD1<>H!pX)2|btO~a?oOsxF+p9gAq9qjI>}!g7F2YSP{A}fvkkRY
zQT-J4G`{Ti$)GBN8V9NcsD@0{F;sU&eXXc-puPq5EL;k#3hGBtLzs$fs9cJ=+0hiZ
z4O9TAm7rpSS`2C)Q``RV_3A-I)2k_p`U})r`ltJJGIRsAkH0u9=oqL2O!YL>LPb?o
z)GweqfXWPNB&eY2()KTCaIzYzt)e0;sxPRtpzb=-t5TpE{vlK?8k{S0d<AAy)ZZOU
zuUdjSRb2|42I^x4DR9;Tp{CH_Of=Lro^@}WPKx>#R0ycXpf-WJwM?kXG&mIvHA7M9
z6%`DsIH(k$`hq$DY9~`4X8Q_ksHijTO@XOEJ*R&#O;_;TUFlUYsAZu3VrrG4k}4`x
zQBgoG05uuZv+~lbiJ-<X)y`0-c-HMzNk!d@CpS(LP_;oV2h|r;H>Oe<YP_Oees2mq
z4k{t2B%nHhTDnr&{wEF2@maoJRa4X=MQsMPud4Lw{2l34R!}$A2z8ML=SM@uQB*TU
zEdVtT)O1jH@ClI^>xBxS!6{;>{pu4Ui52xLs63z=ftm}d=q90pX>gv*^c6TtQ3u+Y
zUJV3wr-~Gq@0JvJETde(Je%dwIU5bmd_%3`S@#MKR#a<H(?A`)EmROzRd<K1syYo$
zLqq*r)=>ErRRPp@ph7^M;4cmfdiRe|FKBS$8ETE9oVKRGY@nin3dHmFUqE#|DO7tJ
zoZa-1WA`~jH$}}*R6I}{zm;CSMX#!Wnsi2}u{1dS4E435YAWg(eRi0h6m<c$4pb~q
z_s$D-lLqH&L)|Z9dKE)aXF+8G)dSRC{^GEpw3mfSMuT&6y05?mirU=9^lA&J^OdE*
zuR)CfRraP(#b|J*7^;n;x+!W7sIj2JZ^~vU394VXP~B;8sv0VTqP|qrC{SfUEdceQ
zv=sR9u~7fg;6ygmRi1UX=*8Biz|NpvSCRs|f!YZw;|HNq(ct_&&DX1WiW;Y=I-r(<
zDhg@{sAN&z@;i_TXmC0is)eG;DXJ2vmY_b|kX{u9btI`!duede8!C;W-nKFY<^z=!
z)K*Y$N=dIKq!B8V2ItK0z5*}utlO*Qib@UYa7F3WG*Ej%{aZw+Cp0*rhFYMg){2S&
zY8a@}pn8CMUr?x*G&m&<RYy_D74?KZb3ivv0Z`%BrB~@d)yO1NWg489Q+)+SR@6T&
zO|LG2x?e#G+ym-iN$J&(C~ycDxX4gPdDiXK2u1A&H5*h2s70U_gPO-wGeeD0R3SyJ
z1yu)BM^Jr01=N%R-_#%~v7s6$>R}60;9O9#LB$7^2~@O-LVc`2)B*Z*v3vAISJWSh
zngD8BdFj>qYtr`PC8SrCO9@qm24}FLR`aaet9ptW3aTflX`p6;+E!1f^)xv73^i0y
zaTV1OR8~-pKs5qYwy99XXmFh0d<7O$)Q;w+SG7T1DJKOc1(gg`NNb^n)8Nc7)LZp2
zlHQ6c3~C~%Q&*)|r}2rDD;<P7OM_F>P=6~byQ0#9st9T<s0pB+cNXda4NeS0^;Fcg
zW~RW%pgxq90xN;44k~6hp(4@XY@Y1vRUSo6Qq&##_z&HKt3X8s6&F-sPobjI;B+(8
zBlR(o%8EJ&svW2uSEN@5@QIX!LxqY%gY%`K7AxvgQ&ZpyP^pM={w)#^5E|Sj75$i?
zuwor}2%*>Cm+7Bph6S%Ez^gBp=hpoc{PRXDhgK;Ovs!4`A~Ex9r=Qm7bSfD+U`3?R
zz^~}LJ{_RXGZjAEBW+pw>18|aMlBthzeK4|^eggiR@`xu{*<^#;WOQ@+}}+9gmcuo
zZ}@ZE4^z_TxanE-*5UG|-ckA^Jq%aGTSEMPmD6~VufHpK*1bc5RV67=NzF2HsdikF
zrX<EkPmbULZ!bUKB{0-0^>K=OO-xfh#gR)D161eBLUjXGbGY>PTN<3bzxq~Hi)Y<c
z%~I4|P@7B3s<MIl8r0}fLJg(C8DOaRimI)sBcQs0x{Ctua|vNVOMVh+J`GN8Lmg2C
z#!}R3P+x+Y2WkPR>!CtjpuxF4(O2NlirU)P^lB=oi>0K%R-oE|x-(v=t28*j8>)h$
zdMIibsBxe&fXV<W<s_jJ)8JG$R8&QMrKt9x%7MChQLf-sd|o8oG@+8y;6ydl4)xKK
z%Z*HdwLrb49nNp+ftn8L_8OtC(BN#C;Oo_3MNLptAyCUfH3iiGROL-Vm7&4;!B9mM
zRY6hdK(z*y0#p)EzibuiCmNiLhI*|&FY>;jDKHwS<e+X{kY1g{=S6n!5NZnz&bjfv
z0@o^PrJ`QPmK*1v64I-=pvHlEvtOuZG&o^~>aM7^in<1B1gMsvs)8zhK&XN=IHe7h
zOHnBmwI5U=Q0YL$1~vDvP}6B}{vGEl@PYcg$gu{dSIa>?EG`9JJuki5hmW3obw;T4
zG&oBPwNO!`6!jaZKS0d_H4@ar8$#Wo!D(Trwu<^jQA0u11JwjnDNuRu2$h`%CyAjl
zD(Z24Q(!w#aY3a5^%0*J`R%b#<7jXW{o?D@HT8Lsd5WqIYDY2Y)unUNt4*L9zZa?w
z4bD(Q%}`VWMHK?o8`M-#eL>BkpOrx`8-Az32{u$iMSY>DG@!DBst+m{)R|~P9i_p!
zH`Z5RQbp~mXL=PK)U~2gU=mQz+|P@|6{<E3&MZTnQlA&;tEh+cufXWZ!6Z;O&PuPA
zgBq7ms1O>Q+J+jhsGN$r0je^nS)e+BIvpg`KQuV83{_20H|m-K_k;RWL<(#RDl4ei
z$%TqSgR?cv*Q+>+`b|;mL9GFm4b&Zc$fHCmp$gI9^f1(Z^?8vhikc6qJ*Y=#q*rr6
zWdfC&sjmz*N>Py%H3n1~P%}U^2Q@W=tZD)c&gC(_0!u4uT^-Y_UZ75XBfV+`DmAD_
znT5JbgEPTUpVa3?Ix4C)sGmTk19btP7b%lZsG>AD6%4gmQRx&_8B{S)S5Hf?egie7
zq)_8&aNdXd3hbw-)3r^3c|koZECtR2RU6d9GD6*<!C7gj0*d-sQ7J(E1*#UP1fX_S
z6>1X=PFq7gS0C~yp{Nh^4;bmjX$<NMQ2*fbBBN^vHIxP?rJ+_R>P0P6;B`<5Kpi?I
zy$S`@si{zHXmF1G?CVu$MJ-g+K2UoLNw0naRT0$q7DD|@gEPud*%Z}OQA<G$096T8
zG*D++3w4wR=Nm)aRUh(5q^RFO<pvc8)J}ZJqew@g^3&iv4)GN@Pf`18nqCbDb-SPx
zc=)9BY6z$&orMag!I@{M7K$3As7|1M2Q>y%QBd={3N@1kr-7l;C@Qa_>VT>astTw#
z_>f1#9zxZk!TG{a7uAP6?$$5`76%m-R3NBz?&n2%3zdupXV*`@UQJQdbVYpyYC{3^
z>V)*F2dJL?gz8L#)7MaS6jeh}@j?9ns?&C%a)W9=La3HBI5`cKP*Kqp6#yzDsESzC
zLwsJO8mJ0P-5BjF@TmIe$)@j2uWr#NyXhW07c2$F2ek-PK2SNB`pr;d6xCHxr$L2*
zx^P?yYzHbCs02(^F;pc*WmeRFP^Cdl1(gX@)d|w8@-#S+3>95b7pj{ASAhCAzZBRA
z)OCEwV*;p9rq=!F>(wsxd68cf^*g8~pyGj=3F__?S=BWfoQ{SXs;IJx8Um^Xs3XUu
zSB*fWnl4lj4Nf{k6;srkYNo(8ppt+Z1}Yh-Zhr{Xfd=RFC|`kZ)kja3DXI#nL;0jv
zr9hp=M^FBoFVrj=oSzN#x1w4pDnF>9pk5x80w;iaw^*nbG&m&;ML%NA_cA=0qEdhg
z2DJuMbx^sN3zd}y=fy~0fq4{lq^jvvBvAMAN`c>liVJGgTA>Eh;4C!MBlXdf;fe~6
zE;r6BP+x&MfKRb(TrbpW8l0wvTCAvoiaG(RHYn#G>D7;*;%ycxCJjy^Lw&EP2USdg
zn?S_^wE$EZP*JxD^<gVf`$zbCm03}96g3aj);#DHr~pufcM6r424|3=Zsagmu&$!Q
zK=lBX9n=>7V*2;p`-Q4UgOk@#vlJCaQT;%D1?tfe=~aJF6aNuv3=PiR;l2VJD{A|<
zrdMr1UCu29E(28%)VZ@l9jC#WZm8sn>ZPb^peBIo3hG66De&QCq3+P&)G*W;o^{`J
z%BHBopelgM3u+~(47Y?zNrMyJP!kn(wX!KN6R7vOq`*gqrB^?I`s=PxvuSWP4fFNt
zJ4OAfs92y@f?5J9JE+6=gxW)c)74OcimIfjC-g5-=n<zas6n8dY|^Wz4~4o%gOk}%
z2YJ@*)yGPvz<Z!lf~o>)KBy#5g^EXmb782jz|o3Yt*9HIj(sh?N(HJVs75b@s!fCQ
zi=oOY>U%|91T_lO!$Z>cw4jFmD^x!koU(?Bq^Q)2It}U@Q0qZm$|}8@|5m7(G&paD
z_zK+0vu>|WR5ZOh0P1m0DR4BXsh|#i5Na0<&N4#{P}FEe?Ep0oR83HIK^=|qo?p@L
zr@?7us6vV=s;IS~8i2|KDiNp`(S>?QgOkiqFTXMcKCNI1Tmk9}P%jTk+mG=VhXoam
zEmU3_oFjvMy;`NHKNYnI)UF)Tt9_utKs5~%svZr_a6@%bR3k;r2h|tUWKfksjfgMQ
zKpLEahRUI+_==hVDkrG+pkjbpkVvRGG&m0i`3elrVtTcwyy?|cP&cwmfh9og<}VHl
zI+8@Fy)-y;4E3j?`YUP*sNX=v2Q>`Tv*beEr@^Ufs8))~rKky@s(`w6K(1hMP)Sn@
z6`uwtj-k>i>Q*^Z;4h#ef%+TNyDz1{qUnSRroq`h(ATTWJnP;#Qxz2oYF#$z)z6^T
zgKCjcsD?B+y$tocqN*usG^mcC8iDEs>c=mI8bX7U%~16e6-80QK&1nf4OCuGi?a$f
zj|S)J0AGQL6}7&s=~X{ar?X0dANNb!A7_?c9m^rq0UDfN4RxGn-ClK8RBuo}gW3pc
z38>7ug-Sz%Q^`<c6_r6zjX;$E)fZHIQ2p}>)q@7-V}D<P-zw^C8B<_sP%plc0)s()
z3F>NLq0Z6ZtTt3kMU7F^SD+Sx`mj%Wbu*LnDpN6`Qq$mkZ>T*y>-MUYqLPDZ3TgwW
z*`Q*T5h^kbPHIC9SJbQ0rofL;<smf@sNSHOfO=F>sJk>cC;It%RYFmV6?F^L{w&g~
ze4tW*+Eq=c%``Zp4fQ^wDX_Vs4uTp4>hWIb)mi@Hupp;~P}ga2iW+KzqJk8)98_LV
zOF;b!DzJ`F(P?m=_VpFmTTur~nO;o-b@xjtusx_6pz_uiDmx9%pN7h(s3D3P0BSm@
z%%HviRiTkkC24RP8R|&})2sZ7Y7VLfs2h8vSBLnE!-5(&6{-#mPJBZxRaAIMQ($>e
z(Lv1u^%JOWErjYogR`fPuU8!uHB(VJKyAt_y=nrg9H<{#3pIoWr@x`HD5{pC5`pRp
zDix^6pr*AG>Q@?^T!y-p-t;P_qTWT3-JTiL#of}YZT!VyL9;suHI)YER&QT{a}>3u
zgz42yP!}>ufxm+q2x?Dfp|;ZCOf^(fMRixyAyB`7Y5=M*sC!+7x=Djm%}^;7l|@mj
zL6rp+1nN~fDe!6!q0Z6ZL^0Gko^=nIOT|rrQ$W4RC<UI_CB0e=>efJ^F4N$w@8#>&
zBt?x^)L>A{K!t$n3Mzb*P&a6BIvc8{qRK0(9jI2IDuBuf>d{!C?$Y37FjPE6y(?x4
ztOhC>sK}u1rIlXwm?l&w8l1B|eFYxoS+`d!6qOg$kqpwSEjy)Ge}P&sQ>ZyKIAaVI
zqNp~CN&;#)s4!5iL0txQhN)79DzB&%ii!fNAgBVM(t|2JUshFw2Io}|Ux85+b+oAI
z)qVOI@$|~;L3%0h)eb4}N*d|aUyFsBO@p)8P}_Od?bS#{odPun)DBS7Ks{I{)NLA^
z=7t)isKScc0;(>kj-cv;YO!9ZhBP=qhWbWPkBXQA=Yfg?DjTREP-k`sb(99@V0T}y
z{!MKPoU5oYpth%zUWMb`+7tZ6VL=`D3)PwiXNaNJD5}1qdV=Z&Y5}NUKpi_S)Bzft
z{D$hLs6a(E29*s|J5W_X1z!~EYZ{#JZoUG)R@BaKOs~p<x|&uB%mOMFsD=-Pszrk{
z(@^(QnO^l#R8CO8g1Wy=_SIhg;;^9QPlQ@PgHy{;3lx<@Q7J%G0<|2}2vE143w4<W
zC#IqJGf&<Yy<XT97z5PDG*VzkP$fZSeJfN38k{X%eZ9({sL6_Y5J_&F)u6I~`jAoz
zY#Q)^U+~tW!Rc<Ot32!8INvJjIH>PI-PkI<+6byg6rnoN;AAn>bVUUyYAvYLpyq?>
z1FB*yp-R!<T<YR0uz{l17Ban>1L{O-=~V|%`9WQdBh(ohobiTAqNomv`UTWzP+37e
zO(DIykwB=6G&toAb&_Y@UZqu3A5cX>-P<C)S_bNFQlYNV;Jo|6SKv5BohoPwYys+N
zDk<<UP#r<VO(Rq^8k`k|s;a0EMO6UxC#bfdz5<melTg`caM~CuwxWtFDi^3mpfZ8F
zom_fVH=9t^X>d{)Y9G(Ky?S226qp25d{DPGORxR_H6XW8J!x=`cJ}pZq@orm>QjL1
z_B|=3S93u%12wyVP*Z7eMjEP=qM9h`E~x&XT7yajYEMz2w$k7fHq^&troe=XIu0rq
zsINes=PwQmx>s7Ln>09&I{6CRq^NztrdL}*-AW+^KH4O`nhYvlC81){;LJ5tUqua6
z)KXAWK`jGS3)JpvLT#bJsc)!YMdeY{WKh*WbpaJ0RKMCnb*I4zG}N=CrdN0Jn*xV|
ziUKM(s3ZKvVL^i%3e|@OXJ<!Wua+xnnxZ;@TAy5c^=zZ`>Ss_RnhQ0M2B(jqIw|Tq
zMb!b-8PsY}6+kU$E7TksoE(PAs;Fp+Dgi13s9vC=f;!SksJ%2e*E{$MypzQAYGXdr
zt8AdoCX)gSfZD-d92WGfyHNLOa3&jSuA;gqDlw=rpk8l~UJVA7w4YG%X>h(ZRC7gT
zQq-qUQTVA^DNyS{6#-Rrh)}^aI01%At*G;PO@R)mS4pM7{-9n5Nr5d!3DuAWXKj05
zuP*Scd&rDc)Jaf_K@|eE7F3c@q2kftbTHI!iYlY1ji8!?dbM79)g4rk@j~UN!AWbV
z+KPIe#}qgRR1l~&pmKqFGFhl_8k|$#`wC2;sHKYf3Dm(P(yN}J9we4t&7UsROd6aJ
zL;b_EZm(J@svD>wpz?uQ2<qS;LhYi#DQ>7xMI}{K15o)vJ^x#J)fUv_1wuJAIM3Vp
z3aqH8!?{hbN`ndyk^)zO$_Og|CZTfD;4CmyG(`<lR5nmEL3IapEs^wU;Xa}MpuuTk
zsGU6P_NsuQ5`n4(Di^33pdK9*>MjjVLPHHv)csthz&9V|#)%2)!8++xLr^C{9cF4@
zTVJn=Dr&Z(u7cW<SbDV-R8mmuL9JwJprPI*GzHdC)B#Z4L3IXoioZB4Xep>anaX3R
zb&86us8yh{fXWVPJg5nvLYcbL##dkuMQ!`q^lB=oONpewhij!*)j;(D)s?AfhRUs|
zo{Aa?YCNcApyGh)1gZ^F-x=y*0@JIkiuwUmc~I>^?dLBJ3%Y(xHp2xPoM?txq^K)7
zO@TE*y-O$sW(GA1RQXdvm7u}d*xJ{tc8Z#)sDhwYfV#0pdQ}?K#S226qQU86s7#8g
zsHoJS+JKq`>Qj6v@Vo0mRieSkWT@*r>)tpYa+m_6fJy<XDX7h$Zr>H^3JuQrR=xsf
zDr%LY?thRQ=V$`yRR&P~Ky`Q|R4W>sv4(1-sCJ4v2Wlj!yQ`&F1wcK1A(TUdQ^ruq
z6qQO*dqEWjH6PUTc+#sEpdK>yx}~qc(>&|;>Ueh3tEHeG#g_s*f?5G8)=ODcWEz~M
zhMJ(L9~Ctn)Lc-xKy?O{22@g}S{kalqKYUg1XO)cuU1K~vVlqjDlSt=4HZ{WPqLW;
z`+*7swF=bTFQivfK#gbWa0_3r4)CnotNDs*3TkIO>D3RQ=7AdYN_y3s24|R|epFOL
zMU?^72UIptEkHecBh-BwoC1a_qo{a_`U+GIP<K~KuhM|}7t|A`?l<=p7@(-#Sxv9L
z0CoKfDR3UBi-FRs93Ny=nQ3rl8)^&By1nYBs2A_$#+eMNJ*X+5c0~EeZw_pv!Kq`Y
z{)+lqQ8z$+3o4M-NI%Y|4yfocg!&YnsMv-osHmG?nF7y%3IO#l{fVfApx%)|t9rrI
zwr0Lwy@+cHoT8|mpw<RTuWo}n%3mB7^aNBmQ#}o}Qc+bEwGvbZP^UqS0d*VH6{fNp
z>IX$dR@7WjX+cF^Aq7?fbpg~#rmi&g6_{O7e`hhh8UyN7Tq$tZa-pJwN*hypm5c^w
zqM@8PrdOR5)f-d@s9~TE@fU{$WdfC&sfvbLs;KmeY67Y_sM4T*0u>8XWTrke@fFxX
zQD?q11y%s{JdP9?0ID3QN7z?)X>e8<DvP2*74<c!1)z2;lU_vzbrRHJrrH_mR&3L&
zl8Q<OstKr(ptkWBhXuWkEp2~FgOkcoa}@P5vnentsDz+OgBl3xGN?059dGRGRZ~SR
zQq=u-ay##fExn2gsxYW$fwHRmG&nyRDy5>DDe4TUfuN2pm0rDyB?ZPvAXESiP7y<$
z<5~BRNvx<tpz?s425L2^REdQOqQQC6$XDPbMIFdwdbJJIomf&}eNbINbxSH#2O6CD
zhN`Kk!HSv>Y8t3CpmKsb1L`PK4Gk4fQTY`0E2!^4-CiQSx))P=RV10LDnAWQJVPDk
zS?X10`tkZsMiiKw{+ZuMq3Cz*((fv2T&iiQX5nvRQmy9CyPWg%_b&9yf${r^YSAwQ
zre9gxx?*V5%>4U`DuxbS@riz$K*-fbpPHv@JMO=o|LT93&rScHmVW;de(i8X|CP{^
zh48zAvB0LEuKiD6U-GlB=U?u0Ec2HEq4b-GqqS-oH6S#g@Q$9PLWAmEs}N9e*xMKM
zt5)NUh}MrP3prD9$DOFuqlz>Y61Vuivc;pO=aRz$U%sW5hg2yZGpur?kib;@tzm(K
zn2iN3me!8;)!N&uK1~`J7P!LF{-Jr&@+oZ-N8Z||TG|q$Wx(1xSlUo%;hD_ZI%}<!
zr8PC0Tj`gUmKWMArKQzcPD_h#G`G@gZ*<>oU6D%bDD5~$UZwx`(S6%VQ}$JuW@(e5
z#bGT$;hjCJh6auh?G3K!4$=N`uW3)#5*Me_^I!)0hJ;~>Pt%nT2&rA1u6WS5BG;zt
zTZyi3QOIjEnjU5q+1)KetaYYf8N0>3BC)M?KDsPbcVx|7=S7Z&x$CTCt@AKVVVxNw
zSm&nK+)r<vzd{aD@-QvOu;lJOd4GDh1N6H(ybk<e$=^WU#!GP5oJGrfdh?a?dYjBA
z{}6$k&XTWPl5VtT*}Zp;aOCZQ5h8nc;#Qh+drQB;n{Sir|LPqz0a{wM&LN_?y{&4k
zvny+|2ll4(@4!Tnz1|jre1HpbJCIq+aV$BBPwo_fT+x!RT$B#<Q1T&;y!~*Xm+tLm
zn!=hBMj*d?rT4=~$f=dwP0OP#xus8DpVq${^7F=hJ1raJeY^y>8%ecX&XQyL<fakG
z&8;;byCB`@#<IH~ws7R_hbSU@`(YVP+4ov{(Y%v+IJ7Kkon1t8_rsB%y0^_)ll`zR
zjoSfrCLDjMd;1mS<6My2fgml1T5@!s+&BVxgeCuTUOF&9$(uOx_CqC0UP4n?bL0r*
zLYCYUa%LsB&~gk*uIZB(r1tN|!xy?6DIg!^CAi&)q2)b2be~_J^VVD~0(rJ2Z>O)N
z+q|EWmvZEFV}vEo^vN$$`LDUQCAWo~QOUKmT*{Km`sCjukfT|0T*ybbICl@fIm>Hy
zA1^+2=Wcj=_#RE!k869M>oaXDe`{Fa2&FC7+5t<OZZ!8Xcetf>g;rQ;O|&-A(yIAs
z#VjoYw8z{Mx6*i8Yhq~uMspt-Uq928o;o9y&S%ZtMLRh1u5}jCyhoW;G=-i2KDm3X
z)#KM+B73KIf5_F9+)m54yXpN|-zTq2;V;*-<aChVq%<7}6xqGjb4B*9^~cknU+ctH
z2TssDUF+pa+s2W%b2?kv3P0`WQ{B&z(Ap@io7M_iT1%t3SLbI-D*!E}(lTl-lBFd!
zn)^($oTWLZq|)QuV0Y)7=E$q`P*;7A?4>F99&vYdfTjHoZM4!xYi*LH_4U&>Khb@w
z0<D<R3Tv&IrDZdk+qXWJ_VJ`t`kXhZTj_m{yh^iL+9jH@PlLRcwgTEhrOnXV^Deq?
zV~yta?ZIPRX-jC$lvZ79TP&@l(cEjgz|s;!3u4W^e<O+JKH82J&AWe}o$&78qDkl*
zK6?gUPh{`@-AG?c_r<nkriXuV<n^$ewazIl;~97?JWEVut+NxfUaYz6Y#^Gu&Un^3
zE3+og3Km4L&K-}qpWZqXLQbpXPse$k-t&zg^f~i{rXUwc>UKb#VBIZwJu_i}2lxx!
zHP6>_s3lMK$=AF!D><_zH-)^5Wp{smE3$inRT9}d!HTks-nu*L_Cvj+;zR4Fv=7JB
zl}co3k7&x(O5;k+v9z81tzm(=l(t-Jmpkj-KHF&So!HdUdP2L+EpaPtp|!s(t)|i3
zN|RgKm(ZpuEs@r`Sz1)1xobQ1K=<w3QK__s()MuV?Qr^Bqk7J;j;8EW;ulN%8Cndb
z_0d{1OZ(nvZl#qitthk|L8fn6wRW_VURw%3ExM&W`bR45qqK`0d6iDJw8J!I@3&p|
zb>C(~%gLI1UxbO~-f^u&^Uj>XtjWFXB&2WPGe_MQeMI)|<?@ivCN@>)6WP5l(p&4y
z#Iki?6tUL%`iN8=#+tj%+Z=f(!~Kr>WH>`pSZ7gho$8gygM0eSnF+a|l0&t;(2@uG
z<Yx)o4k&q+C6|SKJ&{>+9xb=C<P1J}Tm*6*OTKqlx)G}6vmANd$ZW~`X$sx=Is!Si
zC69v~tmHvjzS%+d`3IkTD!zX=wukFJ7l3?~FNk{uGirH`B`5aD{UeZjS@NYr(v1)$
zAK=LAMl(y^NK@!Wx(MWKmOLDCJ|%b2a%xL%?vpRXbGxDLj?0ej##fLp^96Cc5v1jd
z?RB4{`{a=k$dfGjpM%nk(JZ^C^d^pc=Yz=J<IxhDvhTZkqIvHC424!qX`Qv!%F>$p
zX^AZ@FSO^}c(>}bqPeF{POIwptjTH9`wO?9>R#J>SNHSW0jYWh%Wl<sIP$8#|GhqA
z*U^++^>ER=s(*%7OKE+yw${?V_tQ#PT2W}RRMlBUbE_UAns>&gU`?tX6X;g0cG2TI
zy6PMIrRvqZDcpV@<;dIl#VmO{O+ha0$*P}!Sn^Mhn<%-LmfyA0`?IZ2cH;W4xsD}&
z4LJtO?v+X@virEOL1b@##$_3g3#IW4C$VVWmAbZ1I`EPUayxK@Bd-G^taWasDRiJ~
z1nb;?Tle-y$g`B(L(8QsxwTJD5rN#-l5;?=#IoCg<RZHr_|#VK)z~bf0~yeP{Gxds
zxUyF|@QMGkZU+u=<aOYHXx{GHOjGDU_c-(opFM&*w{!<aLSD$SyUuPRyXzcnt+OS|
zSSRmSdfGWxG;f^+pfyukMy=Jbw8TbpuX{5~bM{EpK}tK#k$1nRw6wi6Wv?x*rTq@=
z5VypwbhOrPx6xOmub+1DCil&&v<kFgN-M0j<(8JsPn%+CA9qWo1(kN6Bd^kamUfAz
z+)C5qD%7#G70@2WHkHoMS{X|lYc%&NB(SuW(B>+wy4C_Mt)$W19dd+!R=KZliJ>)6
zS|qKVZLP1uvt7PQT`k1Y_R>6U!gxws&5_r)IhHohPb+I_eWC4%W%|}eYaK1EuF*2!
zDnzohY|sWMEveRmEiI<e+-thyy6)Shol<EY*4z`|07u>l@TnE|%{u`$(iBdBPBG{k
zK6~Gwo5<dC_~DQ*#x!ylEpM^p=05pObbmR$C4U8Z6wB@%7bLQKwhR#2dlZPyGR~G#
z=;x(tx&s$>NC%22Z68P8wJv9A>uJi?rd!$=Xzi8OPiyfkt%K2MX}sm?TUv2wX;lxi
zi{?K1pKqzJPAb;q(SKt!{~pE=*}FQowoBCq_%gaZJkF6<^=~43J={rCZq@EOcV5*!
zoD6M<(ne^lg{AfM(*{~vMQFiF3)Wh8OZ(DKD`;tNw@Ib<d0V+V{4Ph{4u9D~_wXD|
zp@)N``uFh86|UOr;cUp0lpLnz)s{TiC&%>U$ka^I_;0-@S#o*EC6t^`%iSzFlTTh1
z#a*+KD_HV_t<sHf-XQLEKhKeO-E)cTUH5}D<zDyU*q_lw^B%2cLYu{!+uKml-2M5c
zx$f;i)?|Nv5y3jwU)H@X3%Qb#^Jw{ROU~ev*GKm6Ku1fyw?#VeoG*gA=Cd4m`=OU5
z@24rOxore;8cQArc?QdFZwHC&o=f>e_ImpR%Q%+;=znzYpTDH<snXDDDJ_@QJ~ZRr
zdRjU^?N>{Cv018)rK-NpkyrIT(Y$m11Wlpp=8^pSSz2W8F8CetYQCgyKSyhMlqL7|
z$q6HnUtQFDpd{obEW7>8DYDzoGFAuDux#~nrD)z>eYQzDkVvibDo0)i0<Co(qbYQt
zVSw9#NOYClyS16M&MA<$@CI?$IZ9-Aorjw0E7hB2Yn`dBbykMfgEhCG1x0i3sY#-F
zSL!R)<eu94DYM@r@`($&pHDVQ2U08fGDltqYFY9>GzGa+1o9|Lo(y>(UsAUNBSdyP
zkihCdPnN9?loZXoQWc>MQd+RqE;iBq{L)W*e_r25Z#PKQ`B-!Nd6y%v>iMF1{X9og
zsJj10x1Z`PT_&=(^Jhc8%p1h5I!t7@>Q>e|2eXW-GvI#jWUaF%w27>_>ntIfd);$d
z>&(rXT=y#<{MVV@TIcKa(t%=Xowqsi&iwn0bwAJ0l)cWY=X5_8L3_@d!mau@t*y1R
zpZv7hmev^BLe|`>D~aY-Jw!C`>U_hRRDJ8cf7P`__O4E3$hFiuAOEeYE@rLs8co?%
zC$-kOhQBo|FqYEhXf1}NP4LsMp4B~U3vC;3F}H`cw069az9MD)w0V}63|cRxMbp|e
zOMA8M^GfAsuKQj|9i(}xG`rIN=E&PEZ7gkp(cDL|9M;+fLc7kJ)UC9=)^b@|LqF}|
z8Qr(9q5Y<`lv;b!Q1>mapSHx(uC0|yt19gXM_#47ENv@I*{4=dOPc^KiqeK?EzHuo
z`e}tNtt_++yv^Od<<VL_OUvM=y*sV@_G*n(I*Zrl-eosA^3K5YqIq}ODVoAvw(1Ri
z!)NdFtQFb2%cesP;&r-mh?cK5;49+E{e5!&2;|n5TpIFYCFj!e3`<VulOubwdc00#
z$<Au&Mw@r0x2HMsu6rYqy=PW?Y05qs{yC+4`#ZFhN*k@U%$C;IPa9`xRiGVzYt~j+
zYme*e9iGikt8Qr@S4pL#m3E&auhI>cc8R9!O24qQ70`+)ZHCrHSlU>lMW#zZM~eIe
zci%bF654aVT<#99uC=O`R?=wc@#y)>NqrR(LtCh{NLov3Y0p;rDs{CQmbRDXX~E5u
zwwfcaZ`bSTt1!<`i*IRtp#>?ejn)=hT3tWw=m}kEHfV=lo4zI0S}#kB>8Jf-X_r<=
zrNfkVfFrNc5|*}+rraG83D>lWrHzGFkTv%-7$BPaXdX*6?=<Mdnmn4<d`aK%*}F+m
zME1^^{E)BzYvlA=-d9(jGYNfi&lmpk>f_u`Prk5RIuNSleH?jb<0wmBPg7X)%n0O`
zmOK=4u#!7#xr`+@^~u|x`>#2nC4UL|>MPTYL|Tqy$x(fBTTfQcX?Gse-8j5Vx)H*%
zdslAY$lDLw>gb)nh^FilV~}XxGx@>LN~m>q6wTcaA)<Nvp)qT+A67kcJD}E?+gfKP
z$l<(k+?|#{%M~m+vQMrbfqd(z?!dvN(t(Le{+lCjKSZ<S1vG^<e|+k{=1G>^6>>3_
z-QG44*?qmRtG4cKHI}VcNi{|DdYb{-bKZuo7EfzomKNZr#kaImOQh<BthxQ%!I4*W
zJ<+@?wTh-tb>axB-t~{}XMf1Gl-y3sL6%(KC;$1_zn?uUIUVE=ys6za2Wt6bEqx|@
zT<op6`4d;BBg}h6pUIN<($~`Y{};<{Z&!2V-38-C_MRckV;NV<eKfgxggf9}slL#f
zsCBjx&Akg6S?jFJn%o6Tybh>!P7v8UyV5|8q1G8kWOqj;x7PW7k#C*ubM>0mI*-vj
zRlS985x1XPIP&(u&6>KO%UF|jMvY*dQLT0MhTKlct+YJHl56|qMUVWi)Y`*(ccp@y
zfMxee#S+==Kr@lOEA@8a=N;%Sns=rCp?T`SKHk)B2R3o!bs(#?&Lyl#2fm14otdq5
z_JrI`$t|>ew}$R*O`n{}lhsLhnt%GZ@9`@I<djN|q2>9O{Ca`6=DiR7dppLGx6{{B
zH}>%D;&x*xM_xBtS@KMu+|`rSn#)*nTgY9MTuaO8ExD{uP7#6p?x5~QT*%3l{N^vU
z=BwZ7KHsA$T*1u`+-|5fud(D+%!CE*;?3^vv{@p%J8g!@-cB3GvUPg*70uget)cZ}
z&FyUs(cCkkowd%=tjSJG9KkwsTkDJsIir&Q{Zn-yvn4w;1$pazw*%_5xx+uM+qctJ
zFcTJdgtw%-<{4VPSzY&btWRzpfjrxi>q73M<T6^GW6Ak_a`Xt~`j-5DzN~pE%kEVR
z=g2$Bn~CgQwF@+5Kc7h@ns?QfL2IS7X<Ex_X=D7fTl=}6p4J>%a-~((+Vg7MPj79-
zjpm--3oR`nw4;2BxRrjIrz+iIX-{a%-k)78Z8v{wSl}q7t<>6ROZ&r5%VTN1p%qbD
zE3MVAwAx0?fY*92_{V$u`t}vHr+h28l?G`orKLsp)7D$sg}GAcUrO7@k+(x`SJhWx
zJx$p=WSFImf!0K6{j|2+(mMEQWi72Zv_wkFuC;!amda0yX=zXXkV+5mP2=|MGDlve
zWi0I<nzDDu(Y^XA`~hu<(tgoeprs8pn)@7SyrtEK7R;J^wv-ahJrB=T(Pv9u*5o`)
zb(g;3vsaE)ME0H|y`3XfU%hAKyBv9sVv{WS98E!9b;p&}BSH*Io((yKWw)PUBD;?W
zwM6#r-@z<f@Bbg<A1?0O)it4&P+AGCrM9%(e%d%oiwf;!xLMoN+3H%~{Z{Yl8#HCF
zt*)i5<8KWMT&%RYT3cypzxruuEbV(}EtFPAYXdB;yq|V=x9(dCXi1e8Lu=(M?e(nB
zD_vq~hiRT4;~cYFHgM$amUyChuY(q`Ci`&8EqAx5N6)q*d;73E<UTCByQP`P?ru3!
zSznRwShk)&<h0hA30h92CD7V$mKNDhd$LRS@XSo9`X=8@?%H;7<edyHENu-<*=t*8
zX+J@ms<d8O%Wi3Hjpm*V{VeSpXw{XLMQblB>Aod1n)@hO#?l_lkV>N|?L0?brQ0m+
zAWgZIx@(JOX)~d1;-kXtTd3ATEN!5lc4()*3g1EN#+rKu78A`q18a%q?Y*yAlf9Sj
z27SY4@6!J$vbXnMO_!=us&(Gv$g4Vmwa!yCWuFF>t#vMdcKWuddXm;IR@7JIN29qt
zjBjZTp@k}~g4X6+S|Ovk&kfG<Pfz!qqH&>>Qreqos?si&c8{j)N~c-cR{qwoz)yU0
zyVrNI)(TtNbU&?`rFDh2R%uPN7RA!4`Dqy}Ed#WUN{gqpBNg;j2=LRK?YeKLewRwq
zD{Tiy-VXWA(pJ%w-M3Yi_9L{jH%;GqXsv~%wf58cTUsG#VM@!awd|Ib#7`@2Y4@f|
zrDc?MmLspym*sWe_S2MIX-rF-4($!!g6<9p(b_gk>+h$X+@`NWHE7F~RzzzdmX^a$
zn`vnQ(Ap^N;S^PAElayXQ+B28ENvBkYgk}PrOnb>YD*jEr{%J=*3gb$H+`$2wY%l?
zRVeMJ{kv87EeW*IN{gbkm6rB`HnUync1zn&^HgaurLE=2+aUui?N2`~#M1gh`}dlu
zw4K(<Sz3KRt)`{rfVNU;$+Z^G(qj8*$t~^5WT~{B(hhOtReHXxz6zUZ%I@2ZE&3{q
zgO*xpgS0l+(thyM7Ft?qXeY0lzU9(dXG=@xr}eP37n7vY5T#w`$g8xVrJbNDyVBy8
z_9wIwN}H&)NR~FzPm5`3^`X7IVk#}CwL@j}RVd)6o!qRiLTqS@mG<wis?tf8=FpT~
z=^RVj%-<Ro*g|Owwbsnie)rQlSlSQJk}9o{*0Ne!6+bP%rKN**<g)2opw^z3)_wap
z@$*VQZqj`_LGx7U2&HY~$lD=XENz9KcEHj`LMyDaZdw~{X)XP<36@p>+T%;6(u`WG
zVQGo|v<8;uOpr?FEA2E#UZp85Z7)sPJ0!EE{SK{>(nf3Tb}4-o`ub^)H|ncU1zG~7
z71r8vOUve`t+%v~<E7Gl7fs*pbL3Up&(bc@lwIjCOIrbLkkV#ot&F9O_0zt!w3g8F
zDXqHJ0xhkipB7|kiJ?8bU@DEIwX-GlRd_b;^GdI7&{ttE&C|W`m(o^q<n?WirOorx
z{<5^b(3&W%jn+C^T3tV_o26xgmPlzywH9n?G5xf{mUihEsr0~k)3*a0d6j-Dq5HOx
zrtBRO)zZd78=|xUT03BAo&2<;>-AMA2`yM@Ikh&?($e^8(=6@TSgG{>IaBFXj=V~n
zSlTg~vMX(4Y4f1XQQCN|WwEs3ep+rzs|#(w8MC%BT6<btUxoaB+PlAX-(o_`qqLV{
zs?trCc88|yN_SY=M*h~Yz`Li-+WykoC`+5-r-fQtCulR2)<A33Ev>SjR?E`TK&z#+
zI9f|?Y468;UTJbmJ4W+VX)L8};mF$|H;d`3u*^@pwN77!;n237GJWf!wWXHU+-L#x
zKU#+T9s}R&`uxy(vF5%vl3q0Ty^%hmdGC!RWKG^1Id+V`;j{Nf8p=Ao_eL&+O4VtV
zypJRA9lu~pUQbhy6GkA%u;ihT51cd|=q$4Pj^Fd5x}Qy1w%+mEzE<}$FSH>_ORKdl
zmKNVn>t|`VewM0(S#$e&oFlL5(V}_%+(}cYdgxKNpONS)x&Im>i^yI-e};VZgpvDb
zxtt|`?~}ibKt8pGJK)K=A%`eAwU(n;@)th&@IU@*9&X8JLZln{l)Q^0uNymx@Rjnq
zv4*D5jV~gQb6fI2$d`|sZnW3(5KC_8lh+*aU-RYFx*HiFk5+O#Ef=%o0H5420y)%@
z_x~i_$iuSxp2k{^ymR2)H+tXxNmKT|Ehn1yx19Z<-R0ZG-D&MabMM?eqItco&zju1
zs}8#zh-`M+vsJnS=^&41*<EL#$nO0$!dmCY(Y|$NzzMTjG;f_JXr7Lp3QF6?kymw9
zOIzWmb+@#U&_3{{cGuQTYe_AwrO{$xZ8<Eh0JPOg%c!;Mh4mFlY&62SC}*WUNt_?0
z()LO_&5>8>VoTdgQ}*NI0!#ZHT3V%z)><!1>+7d|Z)sJao#q?M?OS23m9VsIMoW)M
zvsl{4QBr9rYwq5=&yjau#}dukdzWYmd#}Jj`i9T`R`J3Lz4zup4p!?NFS5J$_7&1s
zWH`$>+ue0e6V2Oub)nT$JuIWO@s^g~XzulGXlXH_&EOl|?cvLj>iRaav^z9q-@nN$
zZ6kjx{ror8!@opx@88U#c|DxMn%uu*54b&4_wSMAx`)jnzu_C*y*gF3e6t|;(36Y%
z<d_l2gDp8a<ap}pJRhMtFh^wXZ)R`Nl)b}?isoIN_57{$IVjc7`C98}X_JlSUY$3~
z^xfA1S`1ZnJ<;5L23u8EWKH_{#eV-ixkzO1@i9K+m4{3}KMd!pUHN$d-Ooof1$o6j
zf4QM0uVW@Guo27dj+!g7yW5wE?Dg|kmaS(pNv#fi53Qf-KpoNCb@sB>S)MiNK+y=+
zxqqqNQC~pL$g;c6cf-_m&uy*q0ZrNK93`4}-M90%(m%^pRWH@rzrp$*pJ_Dr@ui%l
zb%*wg(wb>)tEGMCr$x84Owh`yYn?zew}+!d^X{F<tVs_8_PW<v-Ibe{=pG&%Dpfz`
zBhc;P-yC_54i&9+E}$v<-svNn*TaF(<}0ne))HD;Lq9Fp(!Pe)NNFjxb}7HUB5{r8
z-ia?3>pSt<5UDhQ(vEQCRr;r;ZKWxDx2(0a3DEZO;pFyhh}ODVT30`<kENA`Hb`lC
zwDygqW$@F2E$!7{sWcyJ?n!%-BkyF0Dw?-jPSF&0OSWD9yTw_gcgu9hm-%pV<q$3J
z%%{(s{yzEGPJelZC6|UgTFJSzJk*lY`Q+Xa$ki>`86@4vqvX>ZdEF>($$M!E-AEOI
z9L<u)LcYicmD`O0T7I8bccYU}-nqlS8=DsDKIex#O3CT9ywQ>q`sD8;kh@v(g@MwI
zTuR=@k=KpBmb{*((2b1|$XP6TDCF~e<hXZ4XOZ1UjRGQjj~Y!`#-j$GO?2?yTcGdO
zywHAO&FyVk(cBX+z*=W~*5t%X=&e(|Vx1<k_qcg(fOMd+TIU{)ymMe{9(}5;qbd8u
zYih0YXK0W2n5z3|?I%n7-cL(sX+@#US6WuB)wHw}MsuHM-ug@T@KJxMw2{&-a^zK-
z%F+(glv}B*{b^~lp(RjSnAYy()_ognwBd9q==yfBw3^WN?KXWYp|us3mfL8N10u1O
z+0vpyYr|`Ecgxd$T&cTT`ithBcsFPYyJh_r`i9TmFFN(7z7rQQ6Bd|}*Xhc?X}ORk
z|KyXmZuOUcwB+v~AK^on<tm|p#kBk?7x&X!^VdE(s;qg(ji@6IMt0wlDP+O__LU{~
zW!SwDZ*k;Z_O$|gH{xlUvLEQ)%-5S}A+)cRHd$*!EN!%(w%XDfLEEmhidw5=X@&i?
z&XyJkt+&$N_ED84vb6g&WmlTj(zfxph6PsSbKkwxOGI-w(dDo81)0H`968H3`(NsF
z^YkWa1Nj8Y?iHyivU@a66WO~WWmv}17#m09Z=!imvXVmkNoi5F*2dCa_V#J+6V;lQ
zc7W!o>biVxx>c{^$h&oNSylhVnpC}VlYiBbt*W~~4p4GqEkDeu`&rc|H;O=BJy-WL
z2=b!srk{~TcKf+lWUrskdwt%4A4K!|xsT?l1I?ATh9mEO>1Ap2{j{u>)(_evrM1;s
z2}`Tzr``TT-+kGk?cZjuPBPKlJsHdDVJz0<>ZIN1e|08_>^%$~?<rMRR_ol#kyrJ;
z9DH@W9xkUTdr#J|);R)NCSIqz!@Fv2yrs1;n)_5e(9(jTtx{SBtu?Z=M1I<VIl6~;
zdq|~Y_(XPh_$iLO9iCY<@1Wg7Q|RH@^==Q<Q;EJJdp!(;{DoR)f05mNcr&~1VMms&
z7s>^#b(Vm(o7d^~Fo)KbT3Tv9?b&Qy_0#TB_3!*>;c8bn@~&?mOFK$a_S%+N+FWSg
zvF7%0oM`SoEG3%P!(pt+J`DZazlY65_Ig+e@>@PcUAcgkKV{RWL>8YM9D$s`k{@@I
z4$NcO?Z73Dybf#;+1ueqXv*%u;aR$$bD%X;+E}fPwzMIB+GtCw1?`g3N@}f!rRDL{
z%2--7Xgm4fbbI)$D_81Xos?D&Z_*U5&YQLVSLgXmuG+ggi<t=v?8LI$!zm)WJN$fB
zy~9ITws!b((Y%wk3AB4ktE{!TmR7`1>uhQ9pq=8Q&RyI4F6#PrwzP*dWv?x(rS0Hv
z4Ga8PY0I=$(9&l4X*Xx+-O>YENu@Q{S|m%W;it{9w9L?6Z7`K4)Y_r1^i_!Br!}#(
zvp-0sOO&>oBk#VLWNB+@%I%x`{2_^@g+ObmwBA~4W@+vGwByrt--<v>rnIlLmetad
z8_j(j`Nh&6c9u&2;e*`WAs0CEDt(?s_w5i(**m0?rOkpiQfXtfw#Cv0`Dsxttp>Dj
zlvZ48qb)6$pSES1?pqXSPxw%DPk<+#xKj57s3DqnK3}IPoB;1u(l>ne0;q$?-U+af
znXtg$l{{I?@ho|?PoCq+>d`Z;C0B=BmSy+q6cyP$TaJ9GuTD;ut+VCY@45q#puJsZ
zv_~D)wf@c0uF{mNMZycpX_mH{zcnmS*5>wewrFlYTZrcMb3AMEc-L;Z+fP+>Ws$vp
zHh^50*XhdTwM@S<PQC0a=#x9I@R#3D<qmlArw+2_03|=*$lC*#Gi&)WO<~PRBaoL`
z@_fjP_~C-rOfOX@XnC3?kMPM2mhqb9rD`PirD{tHt^&9L!|oO+EU>!;8Vc+^L}z0d
zTOd7N7-baAdwBWSUK$WjY4<tuZud-<c8R9!4RwEt-cT!`?O9{iHbZNVGU@F()=&G}
z(pp0Mmo@jYR~OCQP-{i=HdIO0WJ3*>7TUi(rMJl5WseDYj*?$~uP%F6OTI%>kTXRf
z=dt8v%+SAwS#8!lP0QIWd5llqxRk!(vqR;<Z+ffNhP+V8rL=q}qu#1{eRAUn<e8TI
zww<iGmXhys<lO{wEcqNwp&OAsS>3p`EqONN53Ah$P$e{47)QnMOK+P9yt5+hcKVg}
zLl};mm%6uf34eW9+#>>4$xE|=aRjcCH)dGLNFmWG(AU=AMZf(xUs38&SkS3qQ3AHo
z+nWL5J5o{`!iuFtzL|4+vpOJrA@V55=W$MN=?8?5M}EJEIAb`czi<f%?}z*(@}8X2
zUyTKXw?MuLd1KD$uOkD(D<PkYye#MR*S-Pad60)9&%rtUm3lyUa^yXbC+3_!@DLCl
z33(&r?}t)eg!AweQdb$|w>YN{^8|z+N1h$|Vb1B}SOMW1kS9XEhI9I;TR`|6<nO+b
zdZ%+vA4CiY4?%tt`AE)7a^4O3A>^GnFU5HS<g1a_=DalLrI1fUUX=4PoM%No0(mCR
z%W|Foc}L`NIWNcgo8(e&E#xnTP+p$%>&S~Bzr=Y3&JQ5Zh<rEa6**stJPz_DoLAy}
zD)Q%rr4PSyUYYY@$S)!v$oaRNw@1DUd27zAa9#uXV&qjgugZC0<P(wS=e!!{>5&gW
zo{ID8oX0}m3VBq{zvKLAGU>y&$R7-*yawmzk>^8xiu0PB??9ds`DV^*alR0F6y)<b
zug&>*<o63<|8ri4^M1%rBJat0UCvt|--Ns|=k++RgnTaYvYglFJP-0v<T*HRz<F}y
zJ&-5nydme2kT*j9eh}r2I1f*X{g3<>=Z!f(jyyZ^!<;wad;{`C$k%Y*obx%z-xb9E
z=ez~yA;@ncAIbR-oOeTh2ze*YyKvqB`D)~~Iq%AODdf|T7v;Pg=UI`DK%R;7?wlt;
z-Vu3R&U<kFCJFXG@)rXs@5%Xf<VBEQ;=C8<2asn(zMJ#joUcS42l*1t`*1!L`SSwU
z|D5;bd>HbJ$Om%XkMs7(cOh@hd4JApAYY8U3g-hjFN}O5^8B0+<UBp{0mxHvK8W*J
z$Xg+g%K2c<p9W$7BY!Y}@*$j`N1hM)Db9y-z5{tm<eNDk#`!|zQIOB$d^qRhk>3x-
z{^xuI=lzhMMBbD0k({?cz6p6_&PQ=x3He;)WjX(m^E}8yk>}uiH0Q~Y_duSQ^Pf16
zguD^*_x&jk;XFJs_CNAlod3-Eapc*NALcxi^9{%oAz#D!7|!P)f0rNopYt%zLy+G@
zK9cjXoOeTh2ze*Yf8o3V^3}*|b3TsqQpl$vFUt9N&a)yPfjkrE6F5(Ryd(0soKNKZ
zO(N`n<S+VB{wwF#krzRJiStREA3&ZF`EJf9bG{OJ9OO$l|Bdsh$e-uK{^xuO=fjX+
zL_U!7shqb*z6*J4&VT2;2J*$ot8hMz^TNm{BG1qHbk5TwAAmd+=QB8ug}fE=sGQH_
z{Aoh$f8-DPQa+3G^T_idKgIcM&UYYBiF`BXb2wj!JPPu8od3c3c;xr<V*hhKm-Bwe
zPa^Nh`8>{BAm4<%G3WC+uY`Oq^0J)&$$1{+p~!P^{uk%Tk@rBJnDYgkM?&5R`TIVU
zFXTKt0ro%gTbwWA{5bOL$PaVAnDY(D6Cq#2`4Z0OAb*z!`=9froQEL4iF_pI%Q)|b
z{1Ea^oG<6R0rJ(zYjeJW^HRvCAur1LO3t$)AAvj*=c_nRfV?B}xSX%%{7ro9f8;NE
zQ@)1t>&S~Bzr^`k&JQ5Zh<rEa>o{MDJPz_Dod3=FROHWdWB+r$p7UYIFCrhv`3BD0
zBj1I*HRl^SuYr6q@+zEf;=C~OiOBPFzM1p%$Oj-##rYP_V<B&aJSyi~Ie!`t`ycs(
zUX*X+{5<k}$WL*;o%0>YQzGBY`3}w(B9DT69_KqbACLTgF6@8KcX8ei`AOtGIp58B
z3*?)SH|Bf~=arDpMP8Qky`1Mk9*R5%=leKMj=Tr*#GLQvJQDIo$lv#*`~c_SUts?u
zzs31M&W|I{j{Gp^hdAGWJQ4CWoFC?V4)S+jWB+r0g!2&OH<6Fz{2$J{AwPt?6X!=c
zZ-9I?^4gpq<Gd8|X~>Imew_2H$VVX0#Q6!%6Cm%1JTB)aIe!y~{g3=b56Vw*ejRxc
z<d--<&G`Z38IkYi{0!$Sk;g&4g!8kUPeuMbC-y(*=QtmR{37y!oS)~sJ@Q@1TXTMa
z^BTw(Bd@~wMa~N&pNKp^=a)E7k9+{~RGeStJQng+$fI(8h4ZIzvHy`j=uY`n&d(#y
zhx`=h*ErvSJSFnYoL}dBA@V55=W%|6^YO^<=fM8w{3hr9ke@`}lk;1gw?MuLd1KCR
zb6yGgT;ydrzr%SR<e|uOaDJEb<j8v<Pt3W)c_ieGkiYLnc{u0caj^f9-{Slp=f{y}
zM}C;|`<!n;o(TCG&L40-2l>0~*#DeA<U9oVP2?jvf5dq=<cE-V;`}k^4Un%!UYqkL
zoR>mA4S7+{pK_iR`3U5hIDf`@0^}W$$L0Jv=Wk+T|093VmGT#yUq@a9`6bR@a()1L
zM&!FWf5rJq<Z+NM;rw6Dry_rz4f~(-*PIVSei8XV&fjp}9{DchtvP?oc@5-?kyqjT
z9p{CSPeh)d^Y@&mM?L^~D$YM}9t(LZ<WV{Q$obP)*#F2MbfNqc=jV~<Lw<^LdQ+PA
zKk}5wH*?NEh&y~C@+ipXan3(<J$yX!`&qI7Igi45KjbHo_vAb(=Pi(LLf)A3Xq;C<
zJ{Nge&ZBdl2YD#+9Gu7CJUQ|n$P;rOlk-T(8zF!H1Ld(e508ockNg(ru{l4EJUjBk
zoX6pO1M)=3*Ki(}^Et@heTDtcc_8N@$ZsMa$@v$YcSC*%c_+@}aozy=YUH&!kI#82
z<kOHB<vaoBS&@%Go{95>oF_ou5qVtB6LJ0~2KGPl7o90j%=vZXMUY?OJc#oH$TK3}
z&3O{eS0aytd<o}CIiHIBc^2${&XaLI4EaUm136F5d3)r$khkVM1?M%8FGgO4^OT$y
zMm`aFe$G>Io*wxC<f%AM&3P>3t&m6MJPqehqhtRgf6$5Yw49$u{(o5e68NTy?f--l
zS`@rRsT2h%T9vYBt5k^?ZKQ=8NTCWS2qK8!0zMT|KtU;_m0n^LpZl&)eS#}4?29dh
z77$q#L6pS>rdFVUKmj5D?|0@VxhVzZ{r>o|x%WFWXU@zybLPx?uU7cC#8ZfuD*Qr)
zFC^ZY_zZ=|Dg0gH7ZZO*;Z>I@{U?42@kbQCLE%2)sl<mUe1XCT62FwVN8#@%Je&As
z#4{8gP<VUdmlJQU@X-oyO1uqmyTS)3{I3zf(}-6(l>RIHSK@7nuTywCg|8ysj`%!<
zpQrFIiMJ>Iw!)8Hs`Q_D2jYgpe^GcL@hgZwtnjZDK9cyA#QQ7!3x)R~o=)7Y@K+Un
zJ@E|UX$to%JeBxW#G5F5gu)vUznb`w`bz&5{^$L`I}+ca@XiYVk$5KY6$-yZ;Y*2M
zL;N#^pRMqZh+j+mWrZI|Rr*i76Y)t3-=gpd#GS<NQ+S!e?<Rg7@tYO?p~AhyI}>**
z{8@!}Bz`^d)(Ri5@D{{f#2YGnsKV<Lzk&F^IHmsz|KmR3Hxl2h@T(R6E%7Ykr3$}L
z;R}hoiO*1YoWkEFeiQL$6kc_S(tqMzh+_cGb%YHH_Yuz~K1AUQ6h4r64snmd-%)rr
z@vg)(6dq7`d*a=QH&^&*g*PRx6SpgTfWrT}7r2LbWj&?;3jdXOcjD_5-cI4Gi1#2q
zPvPe&{7d3q;%_Vb*u_f!iRThG6#k3C3yJq6{;<NoR`^Kby@>Z$_!kQALp+bTTj8%N
z{CeUy6HimPU*W05Zz0}9;Ug5@h<I<}NAL&r-&^5--UGZ3@f`~9tneR+_a(kU;g=|U
zDe+s0f2Q!W75)+N+larc@B^)t{uA#<e3HVqD0~9({>1N7c$vcQCVo3{77l!VsBkaw
z0mRv8fIqA7j>HENr!fXTUg0f>-$A^g!iOrnKJh`s={zES4~73R9Qd8YIVb^swZgw8
zKA3o^!Y@?#LgGV+&ro=r!rvu+7x8BlUX`NspZHMVRTDsbqXNegyqn-X0815k7r|i!
zZvps`0&@utC+GtBq5?Y+96_)Rz{v`{gy2Ym=K*{`f#(o>0AR+7oQx8&c!w-vo(kMI
z41xbbV7P1Tr0~teA0%#ettkpGCH@d`vujOI_zdC?6F0lo>Q=Jo&k!F)-0WI6Df|)Q
zqluecYl*^#5cd%`yVmIn_Ylt~Zg#EH6rMqR3~{q-9jEZ-#K#ghyVk)9w-X;n-0WI)
zg;(AU{1M`2*P5>Ib;KVfZg#E73ZF-OJaMyYwJH2<;uDCQU8}fA)`LO(G2&*|x>n&2
z6Mvkz*|pA9cz@y(iJM*PTMBm*FCcDqt;GsYBhFS9?phyGcoX82iJM((KZPF|3fxcJ
z>{_!FzJqunakFb}qwp2Pi-?<DYofwGBmM+&vumwsDa-pZ@hQa3u64V@ClN0uZg#E9
z6@DLagSgqX&Q$o##HSKByVjQ!?j#-{Zg#B&3U5vPN#bVLdauG85)TqLyVg8~@4E~5
zQ^d`#wUfd(6Mvex*|nx9yp;Gf;%3*Hpzs;QpCN8`t<^1*{uBQ<akFdPr0_?GKTEt+
zbxkD-A42>&;xiOJUEv<$&l5Mh)@cgQApQbzvuhov@aDu{BtAsZ8LV(S@t25u6s{|r
z(H6f<-0WJ@6~2!6E5yyNHCf^Fh`&nQuIShl{x<Q~h?`xjxKQao@z;r)UF%wfKTP}$
z;%3)6SK<AMze(KeTHjK*oA_JA&91dr;c3L*CT@1E4=KC}@&6DvyViaRKQb8jJH*Yd
zHA~?;h`&qR>{{C>d<F6Mh?`w&qQXBT{$Jup@CQb><^rYv#NQ{rL*d&MK8g5r;%3*n
zT;cZ-pF!O0T4yT!X5t?Zr*J_3C51bQe@L8-9(aMmTND3?_<ah$SK$qbe@xu$TJsdX
z?@r*K5O=EhofN*A_@~5MD?CNvrNn0vZ>aDDh0h>P*27(EHS`5rNS-17IdQXV-K6kG
zh<`!6RCSdl3LirJOX6nNI$hx&;<JdGUF$T3XAqwaTz0MF71)B{9D?jxhbu6i;9P?2
zT5ne1{y_le5oFhTy#jwCIG-T9*2@*RoZte2>{`!N;1>iJ0#se=P|RIwt!#bWx4Cb1
z_jUK_o#!zQdIAq5>xR%vx1Xikg>IDUrIiizj8c96Po2J5RkZKCDY~&-Km4;^`ujQh
zeM@>g0&%95dIOj1BQ`&j8|>J^U7pj%HaCQS9wR3u*T_%sRODb@-)3_cJeFqDida(_
zB~=O4?$9~zsX1wdC0fw{IDno&c8f&aDe}+q1o2eK8CqiL5iGs3%*65{SYG9)#QYOl
zB-ng6=LYdR7sPDUZmqPzbh|IXYaA9%Nj_c==m~n!3g4x=za%kZz1FG#5p8+^IqcC2
zSEFL|&=Q2D=+2d+M77(0MLkwFxCOP(&QY{nq(ufj0WbNj^j(%4^tMR%1p1KG-}4<_
zV|mrNxrma-C}6cRzqQvWQN=HGsG3RC{Yw)DxCgrLa1V0dc}H1p0G#I*g?w#1fr;QU
z<ZI;#js=&~^uReFp%tDV=C2(59UUdi>f{6;9^2Zq!YV$Z`t7>Elg;;v$M3X(7vD1;
zyn+E=u_riCW|Dv9S6M9AA@LQ~V?0Jh#-aHHyhcd8ewp1yy|WG1wSSekwI=aiqjPRx
zY(j45Yx9SBN_RH!_*bW79AdprlBt*SWtZ33FNPqk$6vnIQ@Y3D36<v>9aBAlZV4VE
zJ2k-*oRFHxB6$M0C1AIgLv%q~WzvwS#OjaNxFk2=OHIf%c6owb?H;2WL{|+xbf6aF
z6Ct<!+fncFiBPGO%K0ZkAu3SiEIgPs`{8lwT;CLrk)K%kI&i*JPQ!yXd%5o4XGcr;
z{CwHYFKULC@DU107jpnK4M8uepbHqpx@J%og8KH((~V;pyR}w@NMN)1k2jon;elx<
z|5tUE)+*a(6M-DNEf5!4Hr<}TK}z|Kvf)>RQxBik*L`bmNzlER)m+tNxcnE15n9F^
znx9Pnz_h9av{%s%^!=S}`5U2^+1h7U=YKQ6J?M_Ia1MET;8s-EdS9*{=$fDh&Ke$g
zyiHbN$d`mV)QXlOPd(5Rjr^?YP(r^l_dK>$ZBA%X$OeAAfv(A;lf2GSt%ysCUT4n~
zU;S!-n|l4qBsiDJE7-z7MNeROzMGBeem(Fo^!AW%h#p$48+b0=>CgicGTP|Iqsdxg
zc1E(+<R<VlAtO;YuG1^BGo)3JOvtpBWI-<?Iam6Uv_!j>=t<O?bjSbo^a|FPO(hAs
zerqa)`F0W{mTF4uDJ%p*m@8G#%M9XBTZ~co=L(-hydCjDz@yU9T4mXE|A~6qWN5O@
zE!#n^QAz<t<l{9~dV}3Vo^_R8V})M2qdvDT2I^~_>o*(STDKq3?MJo5dRkg`sv!4G
z>S=BG*u<U-6<*>Am3W-h`5#oG>A?CzUv#PE^|i!!Gfn+?G&(a;{rHTMTKeG)dTn-Z
zFxyto<J<=WJ-UUo(mU!08foB-+;nc#Y*d`bC<TMaZ1hFCg0lmSnt2Vm)iT?RvfK*`
z?=1CYprO?_qiKoYK(SxHnbrg^ssQzy>CAAsHu(`~45X|TW#Pf1+_F4nwfNNojz<Pv
zYK!P+?eIwVqfh!{CMv}n?C|!mZZ>xy$L6o8rxm^ktf(Y^gg5wQ>NGsdRzvGE6yr%f
zI50KQ8+5raSLzL3+6_N?Mg3GngK5cFpRvL{uK>@8?lCrrhcUD(Eb(2f8{53b332f?
zAOO<PN4)P8=c;$#A}G*Dn2dx0cp5`q@#tFULPd^^TCWEeN-o4f3|#d|OUZG>jH8Uu
zpFlSdY=EB>-KbYKO*d-viV8L|-6*eI{qmvE@M%$Pf<4fU->h}e29x0)<1al}wDvI5
z6qVqoqG&n(A}?D-(JJ#vugC#o(BY$~BBM+kz>nJ~_aCb__NHuO788hVz=}$=!k-XN
zHy&6kR>J`UO_CN%5XdT?%*Av)kduIbO$d-(s(2SM(KYBn$Iow!v}I4d_u?oM0Ka>>
zn+^SnRydg~yog>{H_G&)Exw$B1$lJQ@<)0DJ(Bf6my~ip0lQn9{f*&GMK1MI_242F
z3pO^}Ko@XSH!4M!V|JU0YO6}mDcqgkt^)lAs7FK^O_h_eBAW0lBm_tA4DAL%?|?gk
zUCXxmHruGCdT<uAp?8F!U83a8Lm>!_RU8o-tTX>b+V}rm9{0Z><uMdLwdK(ZSd2Vw
zz~~s&F6D6*0#7ZEriiKJam(v<$Rp<|t32XA`M=BKizEM|JRU_t$m6`b{#G98Z~U+F
zIB^E;;|FXSL;Ki(AGfj8Y#;B*_VE?47<qhx*^RI~{)50%%cBr6l{}WbT8BJlJ!zH4
zSU4O1Yx~$~wvS6{&QKoVgufrAJY-9t-3+{5XQhYzhuZqzN!p{su&zG`V3z&Yr=rmh
zLyOXlMRU=(&Oi`1KPv^%9Y1bknJI{mr68^WMnU|_=1xO(DW;#of@qFFDTs20S`5`}
zyo+O)z(UNKk{3tjD-^~<7Gq_yJe6_xeh`s^Cv2y1ET^`eT3wf&+M@@amzls;y?7`p
zi;g*khw`ud8(U>fd^-~B!RKk2^nk2G_C~3%NwKiHEw;(4eH!xkE8;z+d*jfPemba|
z&10|d8m{;F1fLCdZggXujo`Z;tY2ES0KHdtOrl~q`zGg7Z$!D(fN#_)_&O)`H%h*T
zJjU@ekmMAMYtf_%@Z&ZrOi3=1k{k()lB}exD9Hiiqmt}}Kq<+u8ETQ_0!k-C#Z+45
z81WJ%IoKje7?nYW*{q~EQ!tn`o7L&WxvDO4o`5)IP7vqvKV!uC>)(pgnmB;Oa6wY<
zpt%#?v>M$N!YtF6wx*1|#$K;+Nc{Q=<cluS*n}RamXveT_If5&P?ovQ!?{{FShq-X
z{Sh96Oa~uj4@`kLyQ$q)HFx;w+biUGCM|Q$(^8%@@l(5Hz6LBto`FZA@|=jkQ_FLR
zifPL8)feiJ=TlRx^2~exf0rj)SXiDl`_E9G*2D{t*c9D7<v4^Wzq?<ot9_oPJ)KGY
zU;C7-|8)G+uKyOmV(S0wanbq@1y5K1JFuhx%V^gBh0oVf|L5RtQ0d0W_O$lB|3CFV
zZr}f*{yQUa9rgd~^Jh~3e|n5FaS`S`sq`N|we8t!z+&n@FeY066A^eC{Z}!=`v2@m
z+thxs=Im))!6~ahK>K&|yLA1R<reMs^>7y~Qf{;_H;|j`3FM@>%ZsERt<C=W@9bI4
zUOQ%jWOQ5A)rDKm$et}g;@J8>%+b-^&q+I!t9`mNJYK@eLY?O9B!5~?p@>_sB8H?G
z2I<D{YG$NzHCk1y|INt}82USI?S>)T{5GhU5XMuR24HYifEP8lXc=_Y<*(?5GzA_G
z`_5z}H507>-P#QN)b`w;2Nt6Sg}$g7j6>k5)!=p&({xatd*);{D8`MPQ8noPpZ|jz
z9I|*R`*xq98dwv5iNty^NH0ZB?}fWOv7qjTc&h$Ljc3oKF}5|N6fVI}ZGT+@Mkz$b
zvnMd5hl!Kpr~SpI6zUw$ev6pOUq5#m4PmYEtOJ*SM#r-iZ$%8@8INbfZujUvqDJr^
zjc3CNuR}tM1MuOq((u(DwEq9A@A*??eSd+U+V%Y=u$cOO>fva8Pe!2B`a0|TZp2je
z{hz1msPE@-8E3S<Z+r9qUf*$5|6_fx{5__=$N&4Z^<8;3d>+eu0ql}x266bbtYX*0
zxML_X-&Nomf>)dNd<mdz%qK@Cd$8VeVyYt>q1R(bbuWAk_o4E5t>`H{c_y6*O$<#O
zl?_wK`#|*o7AKZ_F`L&2$U{K9&h*ij;kl0rMUXa`OM!Z+h|?YRg3v@lQ=S3TJy`#m
z=^)I_LRjCRgG$;t7&craQ#aH@D=-a6+KrnMaoR^;j^`R)o*8I}AZ;>ts9-TDPM=gt
z;>4zShy^=*md9v0oJ`XSxg!`04cF9Q6(BA*xY)+XrBz5*#&jOzr`)vTh3mDVClC>N
z6h1l9=Iwl+KI##1Ge!X(V=1QjQ$yaO?OGAndwK=Gyo~I64*!_jdZ(xLkGamBqi?}-
zgE#G{FF`IKg!<aNX}|c+mJ1QO@dMsHp;Z~{G1^7X6}!?>`Px8MLd@ceHhX^+ChT(5
za)j6HoFa=eq3sw_J=$LmsaD}<aVprV8VYcj0&TYgyo_K^fO!hMiy*tNDks3-zpiH!
z&&)?B-ySo+0NScp(@F1A#^OR<k^+ArcsW2dZkvxoFwyy`ys3^RxVMNZ!v&wLLluzh
zFL=@?g(51`jdy-ZmC>CwQe|93oSme~v^GPXHQHnzu!*V+Vrx@ABviCsE2LARbKr@w
z38ffxyqVgK3nK;NvTVMtc*rWQ#{770(4ByZ<}pn%BR#rRaeP%vOZzB~w^cQ^yccVF
zsG@$2$GLXY(yI2ur(tEv8_bgF9Xj+|V}eG}^2+#(y-^<a-+!BwLv}w@Q3m*v_4bnj
z2N1jppsKgYi#XoJa!a0W96(zW`>=WuZdg?Q*5j3WhgNvOIF-K{5U(>sD*rKFju~i(
zAZ;?nnO5Z|HsyI@K|Q1wmFAE4+7IZHj>-DR+HB(rl~y*Z&64sWBF^TcTOssqGt{|x
zRPUH3wRvn8wi-`xVFQxV4{y?2SD1}QN}zR_o_0((wkic4R5|ennl9IqE1+FhqjX~R
zJTm2~#u5!C0B&4fPi>V~74k4tug>wm+t}za{)FR|8@#>+CgAVAcO+(d9)r~?bjv30
z5X)KZnLAO@nC{TEoNDCqU%X|Mpuu^KBe}+Qu@CkjJa7MGf=u|2C$Td*n;)6Qd9WBx
zQdaBYqOah7#LPS2feSrlzwsKXU$lCe7DIZ>eZwBJRx}AgCwt97(yHY-%ku)zV~f+Y
z@K=k|bX~zBrn2Q{oML__OaK$o78>I~!<?)Z3xBrTa*KY|3NJ>5cqaX2Peir)+v3G7
zn=UK(P+7e1U#QpY;%Z<0;!UaXVO^q@j^}NiFv2v^N|zoc0$rks?rVlRH*1q+!?l{|
z&Ds>q@uOZhQ_YGeNgeWk9j}M5ns;pUNMz}YbI-<<kt2z8f{o9YKS?26j2T`ZH`Tfg
z7s|0l=iE9P?X>x5OzmHVmr+g~8mmt#SDnJvv1wFKK-tB!Rd!`qB|J82?#Z(r&>(%8
z)YBL7Bad`cc>rpCWD}pW4PbAuU)2%d(cTigw=bG_AAs(;HdG|$`^BqD<u|Jc0~w(&
zA}E!A9D%Xp3)%m2uLH&gC$Qp<6(&dkHBi>78+0xm`w<F@faKZ_0~|DLq%8n1Cyt8=
zu1+AHQG(e3&cnoEVKHn?*iCQXRhlu~H17xvcheuMP}xm)Wrnfc^wogSUD4W3ox*y<
z+@7>1)PP51T55pTc|=+h-Vy3`I;95mR-p*eCjUWcM>T-hlug8x1$P(h#B%;IS`>G|
zuK28}A4(BTDw99XZN6JHKPJ7|#C<K@+TzvP;<e6|#ifr!n)|SF7UwQron3q_3^>ZW
zKu39vAM~`N@-$H>F{^O*<2{k8B1>ERjk9#(P33Wit6W(2S%d#KI#-S!o`oeKZ8k{e
zKU7%q_{E6(y|#F{bKS&d%u&PpM&~+aR*g<1t~~y*Y3H~@0@Z)$W~l=$pda#Jz1#-l
zD3dDG2R}-MxD95NiuoCILSAVgdx<#;{EFbC5e*qQM`}oP{fGV;_2UrjVbLM2@ByVC
z@3f5Q#{eK6`&LxK4MVN^@eWASAa3Puh+nc{1kA=m{rG}cahiSdCfO&?AmU7xeR9wY
zb#Brow}{aX9nciHe+gq}i+(5vk^0eJrd9f(`mtU>!u{CQDpdAkOKR%}`Y|jOh4rK0
z7pTWUy>vI!V`qH!)I~}?mMQhv>$`sRgN0js4bVd?6`0$VD)2j0AmdOdkuuEA!A^r4
zA;@z4zs6bmSX@r=;q2naY*lTc1go{#8=wb$s1r&L20^P55mjm0V@mX~Qn%$vsb6*r
z#P$Jrl~UJArPK%ECo1)gy=2S<kn`fV7?VC#sRCCJ#K79L3&UqY>gM`;4cnr&xPYZB
zD)BY)?}~R;Xs?%OvrEb{<X`LhU{pCWN~C(6ptLM<{+ict#>lyph$-jy%uvd?^kg}6
z<rJD_wJYNz^uq4{!$87zrLPK=b_EL|we8BkNYA-dd*XhE1;@v8IoiM7Is~{-Qm_sI
znh-IE00((Tra1(FLR+ni90H^R<3^oWniXcM=&)iD5tF986V#+x4Ut$ZmZbS3G0c<4
zh^3*i*}o$m8r2i~gD@k`ooSGoA*Hs$7w_L5?_B;EHZnx~RoH2>Z(<y@%d`_?`iNEM
z+ik_A86|})^1I3P`cr0fsq7{XjQR;$G(K0VRtU-@&oZmtCFKjns2olVDnmU~9pIlm
zNa0Z_7BGc$vW?BTMzz?D6aNV9wOH!!8~9QB`$^1r9$SR`e_+}RzTT;-p`{+|Vn3-G
zZjjY5;ac(A5Dve~L7yyc6FJx&W$uj7gM9^j9yj-}%1+1F13Kaz9gfMII5fUQb3;)9
z!fkSX?H}?JcWQq6n*3mmL-I50uao&<7&}45&w1b{U}dL}?3@ul(Dqtt5~-hMgH`<`
zfF$eZ(mMEISd<?>M)7(KLj?#|{IJV_#FE{z*Emz5xs}ka6X}wviipsr9kW)_cC{fX
zEH|_PDYvSwFlE`s)eu}V1joM8YOlicqyLz{lZ6=JZyJuBU`}UoScug}>*P<#G0a~N
z@OKh>(eYgjf2!9#ef^y*)ChmogA{*Zq2|@ipOR~szYlXL<yzGj<qtjgjHGV1f^lw}
z?`&F^a_OIa6rPXnjR?iz2rX$|X|`s|-o$kr3gT;wj>Pp5p7cPgszfyYcrV<9E!bX^
zp2a~A=C&>4W9fb<?-Z1`-aeAU9Ur~VG$t!Cz0ZR;a##WH6*Hn>|CsLorDlGOd`1J>
zDxTksd0Xc(-vgH3<a!X~eW0;k>Lozfn_Ps@comA?WYl#r9_rY39GJSNK0#9o-|c*N
z+BWv^+Ts%D@`(xVav4w~^^dOFgD@@_fG)<5v~S8@R(}k~ooxkjmJ5`Q6aAy*>()b+
z?*zT4Bw0RqU&XRa2UtGN=s;>)#oRwwxC6ciJ@##3N127o8xbv>n)cuwp<YaTuyAcv
zsMlFD>UwM8YQ}cp^153&-f&2g-Dz86eG+%E6ndn1)3C`H#oLA-OYxeZcy*4iJ;rK}
z@m+X1!qNXc>XR3v>(^j{!C6f|<we#Tbph(GPj^)hoDcRImFzWMybjWK{-M24lI#3a
zdtphg^SJgxX|D5x_ClHa(9-z)Mk#AkHm6|MW@FjU|Knjd531>rVRy~AQw_U!;<ijV
z?B=Y8m=2{g*WG)Dw^2;2wT(iX{SOD+J-&<1+y0N^?F5kU2IWpUwN`;SJg>1&Y;O<m
z2h$=pF0I>g({|;;v76#V73De)qNDa`H!aIGxZ-|X$5uE0t{RX3w;Daz3q#h*v3G@M
z)aYZ&yFtxSxEh01@#al)47P^P^k^^!%c$)oVGPz%VzdjzY(R1+?l)jbZtXy}xyGL!
z|F1Qk;GlRaI%h4@PGqbXYbvpgIc?kgzYtfhx}6XU0dcnTZry(@K`Ww5i&+IZR?LDb
z<w{P}Wbt-6%IE^b>r5GaF+wj_p<d^9Z88mqwHa(5`)IaNot?H_w)A4zpz9p8KbS3j
zp|#!M+1!qTVfIIy56_&ctAf5~(0vGkn<0*2@(e*v6||K>SO~-bURmkQ;s+Js`dP9~
z1@L2OQ;QJD0eob9hLPD?kAaP-N6-MZM642)6g<kdbQtCFI#*~@4&ot~)6?<D#uFTu
z;&ra^XkCtYg6{3`FLkXOW-M<`$#t&H)oxmw8|=3|*ZFm>)^&4L1I*>?A?%1fGP<4o
zhRhy#m~t8e8=H3;EB!ymJNHlc37cU1N9OnSP>I*Md^9Y7Emvcwv4rtgPWY+n&RplZ
z(P`L>Px`gv-4u@Jl%&><`UwLIXW8hs7<;tJLhZ`Qo2aMTG~4(=5kAWkDnrasKlX>~
zxv4+x?uMJBY+me&TAqaB=Nik!qtm4T=Bl8l8N@So*c~kHL=bD&TEF5IY?<d&{&O7r
z^sl{NcRrHZDrZ-PLTdb!vA%MKdQtva-eSsCe79c_d7cy^!bADh`bQtSBk3OFd)+wb
zHcp6Ji@+mx$6y0#e7XN-?ibAt3`@Y2<NlmD+gJ2d=buZu9g<+M{59ulMOcP`UQOVJ
zQn@e2e<eZ!*{O+oz?+()2d8nvE_RWRNyU-{H_z%2Q!*OC1<%IWvOA~vuE6uT=JUmF
z|A~bBhN!{kkzZ9j9(*gQk2sPbjwE3$d6vvexSE1cluIrpq;tQP^t(i_cTcWQxCGkl
z#zXqUfbOvSkJx=J${n}hjpq@1;TP-SBK}{8LfDDiN=|V%0|(IUVfc~vWU9XN%l|@T
zLrak8uxd$z89o4&kg-C%h(L3`ZmQ!@zizfvyYK9Ps~S(0Bkn&G%0sdemF!L?yHh4(
z>%0RgW8@EZr#Aiq2@7`Eedh#RAK{TZm!C7qJfg@Xlg!-_GUt<w98Y7i9px>ib@3mm
zH>$;n6~*!;;JQmu65T-wg*_XEH7fd5wi_QV*hPx;_xM>nQWcCB%URVYE9^gDLD|(B
z1jWc?Duin!o^AP<`^g*u&afhBw@FPEe_`HLHypXlyu#6)(eMv$y_kFm<^g6u;OfM{
zNg-ZoKDrwgp^zf&m>%a!2F_8W@q@^LF~J7B20+RS<W_8WSFkGEbbHGqw;_4Z^>6%0
z51~>gPeIpTD2RCNEokkvFw>@dqU(QX0d3?q6^DK{9fCeEVi%xw2@jHRKtH7s-f&VP
zHJSYvigtW<JroVk=o(UGxv1`lkb)xF%4M7dvJ!FF2Et9b){ZM#5X9p4Ue4X`YQwf3
zY<d1?+%oUF3ezgy;N7W-IXvQ$ZL@XJX1BjWT!ZbGZbP_BMScHoT44#Nkb{mEh>W@a
z<#f=4N!O^iC3t+83ou3>wYi%<D$WC)K()ECx$f0#;FO1!dHlx{^Z$cFyf#xcu*{AY
zPRAgUj`KfskH=uG0nDG5NeBfYo-BTCjaUEDHkQqiVSPJ-4d3hlN7v7QwE>^{5C|mD
z$s6p3O<0bb@a7F9deHnEdT_KW9P0v!<CxL%XgBC_t{8=GeEC=z$-N--VCZR{#`2o;
z3d}}#t{Hs?sCk2q?=e&xb)FYyOieqeN$@peCVE0kkk#01x)mxYobv9fE4|Lm*d16q
zWK=T{d;$ck+>Fo=Z{zrOHF-)_EK27A+%Yh&3+nNFJ&0L-TLG4I_lC5wU4S2koiAMP
zLx_uyoR3_MV;`b7rfRAh@`w(K+%;vUU3M79RIkt-#N;Iea?Wh00=p8t3p%JQ&{a@k
ztfwje9ScSnXB;aSpKkMI>;AHIH)jQdnR!SvAKM?Hav_n9@edA!{iWP?y7d<UL9ACW
z;Xj{7ow3dWRuvCj^nU_fG|WLnWIlu$;&h^iYgC81OSR<y#G8cqjF8PYOsw*e;<#jS
z33>)&ub7a}R=w0+Fd-GJ<#=G9P1a_iR1wymhf8I$25*b3`GJ77&{7L)4+7)h3S+de
zjgM)+I7W34>Y)|RWlKe8kvRV>luBGK@XBLva!DK=6VwqWRdO<5OZLY0x4Lm$rs2-0
zAmVU*BX+S2%cS<3fm_7}2`-HV7Xw7U$*IWQTHz|>jkPA^fhNn^W%I#gYrMZEMJxIU
zh$n>2!vMWFm!}7MCGt?$ly?!3-;2k8LQAwlIUiiK($`+s7A*<t_Ql}drf=A(J2z@i
zz|7kg5hyOzH~gkMzZ;cM`98z($t&Zyc=~!i?j>bC=*ILE&cX!ZaY_oOBHKgl?oPm;
z2?;3Vz=UKyP8Jt`a9D7r-P;*`EtW9geN;8T3DJ7cTsfr?IV1^<HvTJ}!~!lD?ExR+
zb?zJqx*k@zpZI@)%Sqya3VfX42L!n^&`p6)5&V!KCy5W<D7)MjvY_zq15^s{25p?z
zqyF)HEDk8OL6~m%;9j7xpFbw0Bx`;cjIyv}XQsQj9404KPX4w0{6n=|iWM=nV)|;C
zG;Pb<!z5zAL3UtVf*T7x)Z_<5y-_R*G`at|cFdTB#Ag7as_dmI>%rIL`J?he`9p=>
z3f`q!a~g{mwauR(1%my;JH;n>N1V1xOr>U=S0v^tK{^_TJ=r{evsqjXru1N+)N$Cb
z3{?E`h)wnvPoXu_1dl_xx?l<d4e*B#!pp$u2VDO(3H>0d+mXaasRd-fUnqZR(cfL7
zvf3(EGlnrT^<MT`51}>@e?scL;)P{6cFLYg9y`^VWT*B6+c$j7{tk8m{awcknKC;S
z=NlKM7R!Xb)I9M3=Iz+4H3pYn5Qe@DOdSmUN=-(_!%Wt;ioVRaXp2@j4dmD^^4Jd2
zy}@@=6Tt=+y%%Eo0Sy#$`f|kRyTB9d8Pc6!Yfm&{4M2x+99c{7#MRe4!N=`-1)NIw
zxXx{(&*or6hT{mH^MLlmBIKb5My8I#d09t~VAE^aH+Jd~XnJjrz`$!0bo&NYXbKEQ
zCcM($$8^@gOO~Ft9|O%MU_-|;wXRK`#8MiSIMF0IV(Il!59-c!ki?&uB&g~t#}iEI
z<YLrZU^3E}?mWYt2H_-qBC$S5c#WR64;xTOlCPI@C=_(OB*V=Nw42biBWvq;fYNHC
zR}`kgr;+1m(*c2a{`CpA9lX|@aw(Lx$Y?2yhJW-%;Ei|E1Mo!P5FmaJnb&4vT!E9n
zCUz|}N{j>0!$0BLWSh^$MCm<)6Vj8r2S=tSW^9q>GU@|zpN_V;X|m~M>=B)j7&G;r
z&aFoM5~lhyDp6d_0BD0we_a<tu?-WB47`}>tfo@hJ8Z7P%@Syw7*ejl+fclD;oyYn
ze8^A?C*Y@T66-sl18oux`tE&X@f9FukxY$B_d=*ydf-c07ie0z66h;L`T<shN=rRs
zC@WGVq1w1GGx<q$WQh6W_0lqY!nhbLyX4zjG9`VyKqoQU0b_L|3}YS`ov(wjVCKzW
z0?TfubvdV4rn(=gs?wN~Y9_7}Mf(;>4Vq8o>&NrC;%V?Q(~eO-f~?0*z(OlrWdZ&9
zb_VR^PK07Ug_Y3Z6%-=qva8V9;ZO#hiy)qFw%AMRExSnLnD}xy_YW7Q&Lk9ED1XJ8
zp@(GaeS=`Y3`C^2@Gd^bj!k#*Qd){OT<k}A|N4*y0nac9mo!6Fp-H4LLOK#C2RLau
zW*E?%ulX;l=2wrF{T&isyqNVhAvFb#<yq3uEmWC201mLiItJeyF_8g8pdM-Pcr$l^
zSS7jvmU&ZWS-odpZnsr_M3Z30%8;4p8YVJdbmy`$twpLNgXQrh^T?a7c~<J0jwYG;
zC1_Ul6bJ5-IgdRzoKEbM$g(l8T$!5@$z+(rk6U5BAk%Pma~KmYc^aB<=995e+est*
znk*j8{Q|_rN)R6lWU^&^ERv6KmNP#=a<8fycw_K{U~(wVU$937nwE$80@NdS#i5zN
z_<z5U)u3ED!3+4LEn`ozP3*mtuc&5XZA3cO+Q>6yF1j#B7yCM`a3Tz(ICliA75&|P
zXEWMsN;N>GIbs?W;&X<>3j%Vv<W}j0ZakMdodo}l{)75~@WVqQw2NLx807yDhJoPr
z%6Ap}T6R^SZ$iOi>F80n`TBUUcH&<K^ABFjIetP#Jl`qK#xYF9M|^m{*_q11Vv^yY
z7z7AWe}MBE@n5?G3m_e83H6o2e6<`1+V$9V$Q$R7QCUcQ1eIFZ87(PNBW`ewa<06e
z-8|femJ2Z=QabySJ8)})yWsJ3w7W{*tu(^8Lg8QOVO}e*mAuxE@VabY46p2c62we^
z;I_S$+t+}B+xlRLVhuVtotLX+FY`)&6~iwl<x4lTb*e>te-5U~(H5bOprLRg#r8#T
zR05C5YV~<N#viIqM@H+^4L(gC_lnk09_e9tjo-vMio>Z`Cot=CuQ*ywu3cJ~5BMjj
z1TXr91o6}*ilw)x07H<yXfYbGv1|`_wv*@u7#+B}s^ccSz!??S@psZv+9J!~5jli1
zsUk_YL+*<&fkrB?uLq{ME%Ra0p3(PHw(OX6BF?A&-f}Pbg#OM*z>N&!6QI}C5O2W(
znqAy<@(W(d{d|G5KplgR2x)YBKRSrN6P_L`HCY)sL`Aom9T=bB4QA%Q9cK$Whr`5g
z_n4VokId*CKF8dRWz=Y2S5@!6RO&}MGg*P6ne~2H%-9>z4UF0Lh?nt>I7wK&Ff0AA
zC{kEG)CFaW?p0R%KJnval=1j+CM)^vkdoSG#u5t=&ejf<a4noGEhKN@soaRdh%3Wf
zKY{5Ja=n#+$+dnG*ZfjNgzE!)V!8eW@5uYkITo(h0OQaxQ-6ybWsZz8X0qSX!d^?o
zUU#q;SnwqI(=b88`o|n%A80lTo*>R%KoxU*d88}mWNxte5>a?@ndG}d>|t*W1Bcyj
zaNtC*CWsqWEja~<T9|&dRI7Kdfm-#6sMVkU<`httW&byHist~i$`2cvjz`{EATI0x
z#%7No6Pa&gKmwoe{x>nCFVVH^eIS3^9NKHwTm*Tw>?7#V9)Zt=gZ3x{nx{7~0{;(9
z@didGi){~KYyUFoirf!&$aJDy8`*(XB~H$Kg7zeX?WBr{*bbzH4cKj@T0(78QBRnJ
zDoo`81pe~|uJ#7T%REp`%La|KWlg=ewN!9~0-E2;y0w48gDm3duLj`0u;|WA)UMoC
zDVM*<sAvO`M6L$FHnX~}1_-CZPD~_GX~ka50diE0ar!oK6^`?v{${e(!@B)~EivJW
z7l+z|%ahJ!o+K|Fbs#4`*u_C;SyE_fqH$<D$n6AHJ&LnaTS0ce-9Xv3{N#00b_bC6
ztFvSSS$+|g^8ybhcmfY5;%=Rk+`v7_;%Zcs!K)EmEx{p1^w?EHC1cOaRlps>jwwGT
z5pAHn@G?f9gUFp_WbqrP$AXUkAXChIV3|R6L2YG*EN&m<Nx^oaVB0`k9Vtu-mP*Kf
zxq-{U3>bP4Wpn&7kmr;rSLs^;`8n5=YXKsQcORf!m+3gHmK{E;cE1;A)!?9~)OA#C
zLTVdRto!Av=5}#21QI@_b|p@!wL@l-m+&1PY>+CEl<IakCrB!qOF<5#j&2hJ=de;H
zCW?1{myUGLj<Sk}wM22zP*Yg;ntPR~DK2p~<OESul$P!Qlh=i;gxL1UH7Zlyunur!
z@Gcwam^4so0<v^Kf|*C)VMEqSUX}ef8Y+eT$rqB7E!fT{Zbfb3WT#tATj|D6z6Zs*
zcYzV<|J0*#+$%}}z!8w$KBHj7U!KnKG;IS${QN<R%s;M-jQID8Rh6chL|2EoYOD{+
zIGsK>%_Muk=@N4dhhaivrxD1ON+mvRCADTG*r)fRbmq|KG8*bF1+2UM6WOzIV*2!U
z!4ru022wnMPZPl=)a65LUBF3vt7Ukrht)FNCH=e8TLx|#JlQf};lgYD%u7Sgf%GGm
z0VAQhbx7G0q=Fim3BQ}olAD$xc^2l$Q9!9NiNbk6YSL?0DMK-TxY8a9UwqRBv0(A>
zY|$P_`|}P|fu!z8wD??%4wN%-0xdOmzAt8cw47BX{V7Z;!Lu^WN(o@8X4>V*@`GVi
zvt<zczps;OHsEE7YqzOp-7t}lPQV8ViM?<oafZu{1!4Hv^mHE-AKpoEz)h)#b&YJ?
zf^z%dsG2c~R5Pz>&7fv%B}kE3gc6y(aYzi?73qyJMmQw;;vI3;eH!VFL&7aF*Fzaf
z&EN*g*`=SkK%B$&7<AM_2_);%L7t{!LepkrI805w;!b**7}dDmHp5oy@KzJW^NfIz
zc(VTysv~-JkUO0<(*t%dmH+~8o9fsCdn`<K3>&m29;-TTxECb<rlYE@j$z{jb%c+)
zLrnbgpQvLbl~hNWuqzT$9i>S#J`bxSYUHD8sf?BBN*O!c&4@03vj;-o%SkavjtV2L
zx(o%Fw3Ip(tD4Q)N!9!mW@B-ih-wa<N{8eh+Pf#<28h-}!}hMBbQ4>yf5B?+7Gqi-
zJXK*_E{3BrYumev@1V?}h>slSj<CIA9e{sg@BV6Qv3EcI8f))X;T?*oW2#U(v(%!w
z5_4T4E6Hl_zCt5`7EXYh7`1l|B;V{O(!u)wZ0{EIjVM?O8W8iDiT%;hLyn_{13W_o
zi&!qxY+VG^_@C_EZ=Wdzlb%qUnPCM(51A-Dd!&lp1^r+xtQ?}0O4_@hE&<bZ*}Ems
zBo3ZnBo<?0#I$$k6ra}K(bbT9zmB_&Z{5Zk=Sr;*nh!!22l#@Ep=+}SlZni?;N8Fh
zzVA7U1AHqcUr@Qvo_(pUXCQSu9;f56d!T&{y5P5&K+E2oQGyL-;!dmyN%!{X6?AWL
zavl4E;7k1>W?|+lGK<j((cJ#WX;|bQ=hOPXo<IX{piORIwrN<(dY|M0&&{^V{n5Ms
zUhZ{!z_Oo{JDDzn#3FM4TMt<Eb28!XPc3p!|HPC#=3_-~Y@&lDxfnG=segMXBf8q+
z6^-}>w1luw#OBsg2PnK)!?O4?N+Rh|>Hua+QNwW8Yed5*C{fp2Dzp=<&*Hp@hWt?^
zH6*enB-$ThXAhSYy}^#D?EJW9t)V4}4+cq9cwP>x8V~w+SQT(3QI8Ke7V5A;_Prh6
zV+<|3l5*W8hMuP+jH!mLq8u|(66W~VgAgt}w*ENW4B$s*V-pLQ-VIV52Q5RU<lrZ8
zfMqD_U7e@u&+Fj7S1a)U`yG=1@90hh8^7_4h5x_|$^X-e|KAuR!v8y%8z9S){{~k6
z4TO;YAs<=zFI4XDER%nS$$!g#&OgSZXuY6}OWM4!Q$~wakMRxGT`_|zcWW+8EkL;P
ze=kD8nxJ6P|4q6}767(HZ3zL_M?{>Q{L9>mejH8o{b}K@q8hdQ>~xmfbv>C4Ub-7m
zFqGX6z54_M>h3{rrVW>>wv@#WagGjyR%|EuJ=YN8ucoXP(C9@PSmGMbF*xSnG5p;o
zdVYu(=?J4o7Fl4I6&UE4%GV6}3jboO*g23gQ3Vo5P#D-a7T%)YC-2w5c%8ONP8Apj
zump$;Dga>pVw+rqD!K<+&T7-)=3}FPJiWazi+d5cArF_<$_;s+6LGeY$9`TgL!CmK
z+>fav$HEX>n}WGSu*p>gEX<|PtB=fN|5n8vG+N<i%r0n8?ZaQ)-ab`-Nls;ONehE`
z%shrTe$5@Rx04*clhZ_7c={Z+_0#9DeDhO?BnxE<MoVWj{KGty$QYmovs^Vv+WC}}
zV|Vna7$>Y%jR_M?m}VDqA(QTb?Dko5-n??mh2hZEkZe^iHke3z?;gO)vTiTeogGuh
zT#T!Cd4Ndk1|?gE1*&kuc}&=x{Hq19FkVh=GJY3bWsU>e_tR#_`D^aXZ=ucZV)xhF
zo`0TrwS+2AK`B23ryf0SIPr9u_y^IDiG$hg2X_y6+dqWOm?%&w)~V#7XAA{wJ%hgX
zS>1yJ+dmXe`ty4z<~rC7o&dmX<JEz@1l+OOG#vguNQ=)@cp$G4f+7n=PcrBQ1bG7I
zGebR`=0*lQh5$8S4<`A8vWmCTB3r6>IP>l+37o@P*5UZtv1q{+tkI1NuCqO!$BT=d
zOGX_i=w!?9!lL!l1IcF9v<g?v`J&*fNY(URB&)^~I48_b1tp86hALPWE*?>bWJBAD
zDRhx^4$*~5PY~J$p(5`Dl!q<^v}sX$#G1H)_eJD)yvaXdA?799%k*>LSBUShG5Mwd
zQ1L3Ka4`pw!iIzud5n3)i&%8$)`ZTOfYYAzV3Ms0x0a&eqAfv=<F>PX<zt;y8pD@=
zgQ>=34Op<WSD1t=QdogW7ojBbOHpbJ_UJ@#8FV#Kf!ADsKpjn{YKO-CPBHz@_wuAT
zu8g@}c0uCl<|t*`eqb2WrP9VV-WrJ7?+#zTLj#t%Ncut@E@ceUe(x2(G%^hpzmH|^
zBD*m$V$=7CC5S*X{>p#(y9)*7Hk2oi@xm2cTyR3TWyaU1ihdWM1hqP$@O(vdxeX_V
za*cA2vBqP3i>tilydvsT+UxRUSqC?R7084+!8?$h^+%dro<HU`i0=?Aw{OA{KL?y0
z;9u{t^sX(c5aQ$8_{w?GGSm&2!eJqEgGTN9EwI0BoLlaEcbgaSOh|o5a?=Uihz+;O
zYF#V`Cyl2}4Oc6iINyIL#}3~yNY*D!`?FXj^1zaVnF+EOVjpyprP;<#-N!{FOdt)m
zK`lUI-P4?Arw*^D3LQKO6k6AhaqcxO9G`W@u|!;DM$BSho*9u>QebMso0?wBT0BMD
z8_X=jTs>>0A1dxcR2+5+Flvd<sq6$M$1@A1{-`)p!f}+XK;35!vjpGE;9w4N%&nba
zOO@d*W`<eF@H$B>N9NZ_<<|@Zo0<d}rI{Hen%yBZS8~kIBdA9Ydr(hcWGc)CZ{C$A
zHODlmI;X^GQum0KoO|QeBrkVpxKb`R6^cfAA5N)y0(XGS19efU7p3z3+z6E{P}vVp
zQK}`Wd<ZHz*m8GI4i<9mLg*hdR6GkDZ|BGyMG@Y(ppeTMEXXyT)}#|llrB8)$e3oj
z0W#)caful-#bS3_shBHO4sWADQtF9Hg)*NCMpQ`5s!{rrzz9%Z*+a6@!e-Xe12u<N
zVD><J=J2u1tgg&O$Vj#xmi7lx0Ha9znIMK2fIOe%XjL3$Zg|b&t1g833ajo*9V|BU
z6(xO0+;~2o{7c70+MqlqeffpVC5gj+iy0jKs2uN0dm%6T$C(t^3A)@7d-o=HHwRtU
zhUFnH!fF$*rN0Q<%Fu5IUF$FwH!X;qxHyi5ohn>J;JOI8$pG;?As$$=#ueWJr9JuR
zZHv>o296Um7h|x+I&9&vj$+v|9zGuy-h!n!#7s4<3e4O(otqMlXH9(!4a{*}*O6%K
z1#{n{bbMWK=Xw4OXa^s*?`B=k(*q6AF!LMY_H?iI*%iJEy>PS&$7;x*xBr(X=>DV0
zV>XMzF+hxu#UXGQDB@4}q|pfEwey;=SBK+7(yRLs2u~X2BD6$?vRCYXDQv&<s5TiR
zOgMMH<EDfq+$2hDik(;h;kbt-feb{PKSEOuo<r|do+m4q%blMP%`~|KG*}=vnrb;q
zJlM9bYh9P1<9|F3=LfXeh?MzDYMjkgjaqp8a)*YQKz~gmZOXHFhgM8)2IUx9f3j|0
z%ZqI?-v@_Pj|Qh*!+1yQjY^S6)qIIA?1Q%D4?&E37z2$x^A(V->V*e>sjjLk9zG-D
z!owCm;GTxe*?iy{;dVaY%t_`kJ~YQe0#mV4HWI-W`^-Uj(2ZH!QMq2@fnu+bJ54uo
zU-BA{y_H>@Gu><C&(w|lxfmROuLmD1(1ZEKx)I!j=-D{4BvS>a<45djf(~VJ9$Hhd
z@dL;PgNkbk*`aXEujO#rEaf%!!i&}-IwbTcBjJ*4N#hqp*`yn<&sAaFW!MgY%aL0N
zK-kD6huND4$#cNuSuIQ#kt1(Fu`&=X2JIl|`V@-+?0L4-=lp+P-m!&@XEAxO@rNhs
z*>IgK(iN^}Mp(4Qc<2oTdbOM*I8Bb}+kA)H*jhD0{O+OPFmLAm9YnSP6fbrMWR%I>
zx81q>)*F1!hBW0x{EOI`%OPfS@^+V)zAiF(OE*Kjgm=Vg`buQ-c9$q57ATT+0DFoN
zU@E)Xt(VAV9hk*>;4Ek0JymHtya8F681U!dsMWzZR(??pV(Er#E)_+8;sr_sn_5(Z
zU7hj~^ENk`^JGQU;c)DGzS^8Ic?z+F-dh%r`?h+7&zFs>b-pZiDs~YYX!tS~u9yX+
znutAbMCVPe;rc*b7r9u|$|>1M@=uZ68=S=mv|ZJ&(T3r1ryh8nT1z9oWgmaHaZ-F*
zPjrCh#U0rYcF9$>t;B2~v=Y3z>lGC$oBy7(P~zC;zXcG_C3=EWWTT&liD3S_QEv^;
zKS(Ly3i#!=0;CLP{`>^{Dm-Haxgj^H7&#^38{#pRLyT9U)_U-c5*q1k!U1!vSCk+{
z0k5nziYQ-CML9JOt*53S&OFb7R>C%eV{)F-UG=?ji<mJQ5{oaiT=vz6U{?NxZW(17
z#7wko3=ip+VS}1%OGm^D4cODEOoH{}kssR(Hpff@Dw~hJ8Vz95XZ38p<~ZMDM;mDY
zk}&hwoAMW%2iCe_xV^?N$ff~%I4ye<5|fXvIJ1y0CVMd*G3mIpdmeW%C@tTS#@ZAt
zn}WBbpRp{X4{bD6x33HBplR3v(9@cfa#p$*9fampV(e3Ey#|D?L7ng{OCM1G2(f(a
z@IdpZ6;<GGm{f}hDTY-t2=nXVP<}{H-mEXDkD}w@aQL?jf7(LsJ&so-XGefmjSokZ
zpVX81apFV6xDkneh<J}MUKGJGTH|$~X32X+aJUn;std#Toxm;Qb+VA7PJ+1QQ4p`O
z9rfl^sh4o4EoNTor_$!#VM_g{R<y21bFe`7I;<bIccY(5)QS>e1;U!~{No%iBGem@
z3Z^18(24?bL!_hQChouBHD_PMOZ{yGOXLmam(0gBBrGi3V-`(opgZ?<N@+UYLB=$v
zP?1c~m-8XI(I^#jgdPS3lNy7HS_qURuro^>m8L6`A&P7z0QjoteesczMAn9-KE#E{
zZ_+WTWmg9S5hcUO&gL1~=xMM0NL0J3E;3~}Pihz1oGlm#t5e@e$D}rOm~>1j(>KU6
zEQK|Zlv0@(q|R4`BRqi!r_-lGhFkUNz6eF?N9A}+X4#P-sR|L5x;TPM6^y7<(+DnA
zFrrcyL~yBU5tV8LJUm{NDs61eF_!aik=wrmM-`Mx?c~Sl;L|`#tsx7#>855u)^ZEo
z&!Q)m%0p%H<m+f2Qw%-^dj{!tn?Q}jT+#v24pGDy-Bb#Qa3JV<w461_1!Hx7dJk{d
z56J<Kfy12KbCKDWnFo_DMjLj|g)e4v&x8GiLYP{ORSU5W4x4BZrfiZ#q7EkG+<z`P
z26hnbBTVRQMM(7#Pm;*@-6A^MvZ947_Scd<*ABdaGEQkkX)UwRYAk7agAbt6<a+=0
zq&LMv?|~>i63XR8$Vr!vw^S(JpR$morc?3vq8ct?_Gno<F$5AB5vp2_WY5?y<zs9B
zh3_7XrP$9x5o1(E5wjS`U%W(WzjTe1*|9>}@0{fnyzzr6bbl?17h5P|9#m1xRi*Aq
ziqBXmjy;JYPTYgdD{2v~gc%6SrM^iN-Ps}WC;Ss}W0YdcD-}f+9QBCh_oSa6i7oOx
z3oS>OR<3az<j`G;|B&MD9G2RnW&eo%x-4!m>BI$6;MDVJQHmRsA|@4Bh=wiQ-dygM
zJR$N(@l{Dt*}F-Kl&r^K(t9RhXth;+x?3n>@FeRI?Fl#4NZ}kqTNPa;QFOXpKm**a
zQmpniDO$-jvyf{b$(i2I=OlDIJJRqn?UGi`YS2P6j8|_ln!O8#AuJ78d9cP7joF)5
zo3pC7ib13~BTN%=AhFu*W08eQqX-o)S%5+dQn*J_K=JZG*DM~ahEKjp*)|v#E88MT
zlP17DPtk<q2YT2rAy&ZZiKbzu!0pB4R<#jRRpyM1C3lM?M?>JAXPN^Plro(_VijG&
zTB3@#4E5y$37%02Ta<Q|QetEnsjJKflbP3MaTO^&CMiKXqIHFxy-_N5Nrfc}8#vRJ
zpct1)LR&@UUm(;`5`y#fCpyy}2=Qt;+a)D6GDzA0tLL7pD5)Z;wJKh`@IEN$fqY2P
zH67~)7(ac7<N103vJK=f7s-M>`eOxRBAN|mayh1tox->>U!FdA96y%J0A}8&?%To4
zVhX-3QG=~!HQr!S!?j)Eh2mp%&*T5uHx1_SB?o8MMaf=t5&oqq)-SN}{w}%7nECUC
zJl%U(wEYoG7%e-ukgsYMVo+rvP_94lK5}u-^<)@BuoGMOkhD1;Q!BKabftg5bhOAZ
zV+^S5frJ)+L7VNUL`MGYb~QhWL&Q7q9=$){<o6Y|!eib+ue4V9yjt&{S>Ge`C-TUB
zV&vwPZ&!D<`N!!t-yqIGpP$KnH#wVFA7`oOwU7|bdt7xWR9u#=&4%0k*jjNBUhz3S
zw{ul~)08r8b{Smt{1%YICcSgcTHm=4G4vJ23M1ov;w{#YIYlgY$8x_(_<L8i_{omp
z=ZTI{ejZiNil5hU)0~B$r||mE`AMvcpXda-+K;O)YvCzBR2G&3EIShlAaZeG7Du|7
z4U*96f{oXVw#-{E3=r$^J1iX<@cD?LcIZ+|zWq}+S$1Ejp8cXDTrKGf(+JE#;OTeQ
zp%LDG^+_8IdZn7fW~{hlJvN&PDL2%03lEzgsx@pr%|?TtIZK%+7CaptGGE251|2Zt
z$({(~#rmo87juu1jVb1qI+?TpNh6!;%7T39upG8gHWg_deitA8jDb3v4pfcPy|3Zj
zT-UkzMy62pE$&7fF7hB}i)k>=AES1guIh>pUrrp167zCjwEcv&?&1bB?N2x+HSdN#
z*XI`7yWp%t<1u)}G1o#m(l{uNi3qLmQ#`v1Zomj`37(k9s}kWBV3!+@;@jTPKukci
z!_I0<E;|M$<4V-TI4!Y9qL$b@MQf6D9g!xDFOffvM))ar9B;;p9sI7}_MoPG4slW2
zLm`pY2}PmxDEXcb4i5zRnf-}`!WBMdcO72EFk~mv!LQ*c2PJZW39O6~b)mqE>e$Iz
znBlMJT+N5IF?ZaI{0)OQ&eYhNN<;f2MjGxww~nC_f6d)r-Hmu!ZjIPk$f|WT%fLlL
zbKe3e{#>u@8`U3&t51jxZZH=<NYP*oY}&)untYE_e80VtL$b~+SA(k<lsR$?XC2^+
zVb<Z<9e|87h{iadmNX>RZ3Gg?T#wiJ@=^sfgWQiP`H;AjB=PPV!8ne^h{Mbsz(Q#0
zj!Ul^$u;N-$Cs#4%B<Wm15Z+D`R6V+$*I~|!JLDxobzM_Y(ZNR<%7fHC9`~oV8(ee
z<Yqn41E-$%jU7h)E8XQlDK;7PSb+80xj6~<L0qi|dMEN5rDXKAkH}zWd<9hxjFw-u
z6#bE(*BFgGA)Km4&Ij(3y;XnES*$@mYTdEkP|4Ot71)=7i3sF|_FZ?e@(UmbTb`Jo
z4u+Ek6F~!x&d;mG$P?naL7-!laz{+Hei%33tVK;}MMX&L4Gy)73!>rMBjNdvd(#f!
zSS0KCk&$L=!?NE|abYpW-1r9Nct|S#N?i0@@YPFX#CNuAkz2)eFmnI{5<Fjlmbv-L
zwg|VpvjQ|uDhHDW;749rA`ev`RbNznkl^hghG92~A*zPRYMBC<z*f=r0LnjXvR##*
z{e$Y?(Y-&GK-*(S;)v<#TLU_7tU|F;<OO&?><ahta=(6TdZS}%m;~-UJSpuPmb8T|
zu<V~|<xluD`QL7#kyTgvy7*@+Hu=Bcbm{Bj-;#Fg&S=@sh<^)>S1dICG5__W{NH}M
z^mXxXNqdnc?HTcJp+Wx(`Uo$t4*7?LUt50c(9HUO<#g%m;@^^XkR|OI@o%AVfrZ9D
zu7ByunEL<gY06(0|CY3GSkj&m{}vjzTWI_f{@qyog`KlD;bsvM^8P$a`>Vx&9cj1x
z5-onVt*-Wm+^p@l4jL~;Xw=TX4*dzIw=|*KPMQ7`{9Dpq7)h)4qr}=T=6?$QEi}HT
z6D6C#8SrmuLNA^&{VDjjq#Y1Rdxrd5Xf%t^I0ODIP3Zd_rz-y`__w5eHInuW`M1!x
zEkffA__s8nW~WSl3jQr=w_tNr*!-Q5{#$6g7@=X|zna%toU#js@x&*l#T}89;r)^k
z{SBu#n%3@u&9to}X)WXBX#XEhUq=&=u7u40r)|;F7qFq!oxWBVteUXN#w!*QR{Iss
zzZU;7X$M)-7Bc%&<j)XG7ZgL|0t<~SbV^u_i@i!P-(D8JBM?h%jX!V1RYlz7w8B?k
zH|%AR5}#65F}C)@t!9l?%~1O*|1Q6qPLp3P^{nlh-5AO1<H-E~-<EGZZbLg=`D)c=
z?L5E!>7SM_v;?;^VtT9b;J?YSmiEXTE!MYxB**uMPdknLBKcLHb?Oq<Ril=?cK;YI
z-G^#D=%3_QaGLz;Ze8k<Y12V^B$DUp%jaJbhYH2f$Hv;<WLQ^)TDpUTNQVDCU42IL
zd$Zx0Rj4Je*S3T!^aE-CPg9@K{H{Drey6HXOP*~ad0NX?Fi&psL2E}1ev@$O5;2Fm
zYS7XlmH!Yf)#tU=o1^86$?uNS<X4wUTJpL*lGkj^@127F@%Yo1r>;hA$>Y%Gzc0^l
zzqJCN{F@Bxsz6JRSX3*+$o}<c`C{{Hahm+<sz6I#%_Diuw%U(a`U@MJc?DYXn6)Xk
z0_!MG<cir^{mo5(lXI<_47<mKO#{*?lDSpiPnF+#$EizNt0wEnYt{GRg8prLhOuIQ
zde>?4tD`a3k!Qb1o~JEe)c)5$bqVWe+~FK8-N4R`;ZnYP%JzLy{YLVC=rsACqJ}Mb
z-yg|)dQ5vib$+(f<ae4Hw&Z!}yZ@oQQF}cx?tiLqi^<N9Fz}A7@PE>NqWOpFojU(h
zRJbMYBj1J#|4;khr%#jLX)4^3XHg{2Q<gWT|7iI)IiA8uSWNtRksLp@?zf5B+nD@5
zwx4m0TJm~-L%2r2I0gOO)6hRn4O;T(63OEf<%zZLKiW=Rpwrj{i-}*qK3phdSciVb
z<~Q;*`JF-~EqM)(<n_fV?N4lee}%f9iB})Cd6qo)udAzk9Q~-TucTmYlZ&;tzkqmH
zOTZWF>Y6|1w{K*>gzxd}6R(>@F_#~mKa^_*n1sUjT)sDnI!vOMl4!PZy>&>tJ%(Zr
zMKM{f4%{I}XR-Xvf4&xfvA$0%f5+FVLdWtazcv#t@BJn{tW?!mUR9ORB7a~dDaZSD
zlsC!BWbN|Srr1wW3>TRi5z)Vs<um^|RsIOSwaQ<c`cbS!ae_lu!L+J#q{2n%%LNm8
zKPv?0cXfszX7g!C0Cp@_$2z*&<brLBjj3=;>{A`h5D_<r7WvR-%N+@Lvx`=@VkzSG
z|6(a_SUwiO^6&>+#%6qqrABS`<2Q;{#d3G-+9WKS;CM!=_!o9!2!k7J4aZ9ONg|ky
z=U|IvA^g+NZaJI2lhN{1{zN~5i~T4}fh8T@q#a$-X$V%t*FK~Sy6(oF2=O$^g69P6
zLy_NS!t-DM0b}=IWz7Qy+K7+AF?j2GAsm@!{_1Ko>|*{hndkr$El$TD&%Gx?;?H5&
zG1Y4d60bz~0=cC)4F^*)&y&2#Z#YU23r5LbgJ^LDzU7vGu|AjL0T~u?FI}a%EL$%H
z>;&LmWP1p=WCtA?e~wVs9boq!+IrGF#>Vks%LgKZt~=|=Rk~&0F-qnOjKc8X3o>)_
zJCZ7eM1rndmEu{Z@ViDJ1r~6@A=3|M`cOGC^GZsQ0~Y34ixBVY#;JCa*o|!{Jyo`s
zGjbrQ4S#~J_9}y`WSHY@f-oQ0%g`ZS65Wkc8_VR0i(not3pY3;S&~eKb|q@QmCUT8
zn`2=;kW?WN$&^_bo64(Prbv2|;YI87kylb68dv6}4vUxtsI+4djn#}YOhsBx@Bu2x
zehK4f2u<4W;vrIniU7sYWF|%Q0Wzz`?IrojIB!sW2GsnFv-()1WG?#X!A^9~By0}H
zcPN9;hmMD^9otoIVIf}9%-l}53X7_4YpG)@pqw#ONmL%zx_F9wmWcT{Gz$8Kx5&4T
z@a7phCIQGJD*P3NaH)K#LW&hqAIMz_86y#&JhE`Ff`$S@gwc3g41Huyc?uduh&?qR
zmx9I;B8Pz55(+rtWZ_DD{mtn(d__VQ!C9$2aS1#nJ<wRt>A^L!mI6B0YEL+1sAE1u
zBYXPQlcKO(dtwJz64iHM`+Om;58nl5Ji&a3G^x{XSW8A5{F76{0av~xvKI|xj!ZkL
zLMaE3Nfi<aCLQ?|2f3HZ_>FTJA19iP)l{m?Yh-fSxS0-DOI?R!xnXf7+NzP`xr4cg
z58V=LaU~j$Llw=H4Y;ulsl5TmKKMeWO32KMR$vPqF3qouBg!kp3vs&$c59SU<)ApG
z(jKXi%i=ALVdZ-pB8ciPthUS<^sV@1O<W?2*1MX;eGfL2_c+S~l>ygNjDpXM1~Pqo
zit3b@U`8B<v}Tn#P=3-V<$Q9$)m^5DR!|@_E0zYz-109}*)uBsx#9Sysp1!YLHr>y
zzT<a#POt(-xvb(1d{qpdvWh<)6t3b2?lY^nFUuZqypGfvYC{Z-P7?V4w<IX}m(`7{
zLDNo%jt&|B0R{##|HU8PLM^RPja*CfwS-FM$i&glv_~5?qaNM}+f}XGSBu}Mqd`~C
z*U%yKe^vH4cfEq=me_ZLt$=&Z8Hpdc1(~~ydCsNmY;MWgD>^qu_c%O-(OeVH$MadZ
zC9J|x0{@Oqez{{2p0I)7ahFYQZ|Mxi#S74MJ@7>VB51S6KO%->YcZnaa4*SOw}MDg
ze{3QN@pN#;3grNZi_Zs(JZj2|(z6nBx#__{Jp&V6TY>1?v(SXmh2l_nxyWt-<Vxn%
z_D#EOc%VxsYymo7n^WQ<Tcc<_-hKyNU?XrMy8VDZ`{7%n*BZau?6p?lOV5b4@hEnK
z;QTZ8$5hT%(9t6jngj@j`sWkyWGzh|J5OX1xnC7gM1OR!BKrnXcO5(^+dzloXamoH
z5w$gBSC-Az(T<I4h_OM%n0pv8UPKHylcECQ2%4*c27~-!Y|stwpqQaTUs0hA7}^Zl
zgzvyStwP?w=`v7Rha+8L)l&SK<BIGzQNCy!@oP)8AJp5Aoskv3;SMw2+!Ns4!`ZN+
zY0LG)o8>_xT)D-IhvfyIO|j1iTg1GGl7MYLAbbVH#%*KCyki4;h0R!c+Sm9TD!8pb
zs_j(AH^;l;V>J1<lslH-iT%i4u+8pjCHAcHp5b=jvg#_zaSdzm$H#Hl2+ZK{bJr}K
z1gbX*U$SUC`#@Kyhu>-Q;q!zQV%Oc!ML$0VGN|_Vi7H3j7_<K}PkhklP^e$!w+BP=
zK8)J&d&|}6YOU0985l$pOpNs}tFV2(<@@h-wFO3Z1Y2On)lI3xsI}N#Hol{ndMB=M
z7!TIEpkaOkAB<h16Q9Jk;c%67v~VyhG*Mi<i0b3|f-wS)*Z9NUA?=us8DC@TfJ!?e
z44_L^Aj9^=$#erMD;LVmOogm*xxS?Qb$pHhomegwi;S;vdj>Y=bF05=DXcDUKiVYL
zt)UhbZKo0&@{@2CMVs(KG8IKXnol?-ww;Lhe7DpHG>Pi7ctmepN;JCP^~*7|YOU}I
z#+-g3b%q-a^E=W>^o2*n(H(+|)`EQaK%U$q2=*$)pnGsOI48V0@%mbu6XD3NiQe`X
zLjqeSnf@ZAH}65xgEE2`K*x6Db>aP9^G|@o$`>YtzfTo&zk@pL?AY`UH?pB8d=Op1
zdHrSY!EKmpheDg3n|%FPvb#hn_U)mG<l_39!#`zg!Lhhq{pzXXVbSr87yJFw4vy)_
zPXLGFY-8Jb?FVx0FiAKOcmlU}iSd$Xna9|hzX3XHy-5htI-so0WvfxGtxNHz5ldnj
zKPg$`Qc(1wXw$)!h*J#L9=o?btL}=Q%+?Iq5?){P1e(F4^fi`8nb@EPAO-Ko_g5r<
z$Bt;2`CH`JRP9I3gY3{3K$Y<uTJEp8Oq=o)BB9Z}UX7X=T7QymUmn_?f%}Fq(ciBO
z56N51hc)4cC$M58gIrh(<3q+F@i{yaiB+1Icniyc61i=g`EU-pREaDzACT@kYz8n%
zTxsSA^w;R)%Vw@Xdz$&;F)orb_VSG}^9KH51Q&zN{DCH!9N=+f^l@T@3((jfr7>p)
zOo%E~i!aopcoy=Otbc$3f4?&E&e8CDQH1}eCq*9L^_#(4W|4qM#YtO}WWy&Tv9?AZ
zpEb#dM<xBRd=1y+ny$98$bF>&$EcWhIo{@;PJXTg_TdZ+z-N_V1F*M&Gytum1^`y!
z=fl`B-YIMY-fb4P0lrgiHvaqe+!Ku>e6QJ6G$nOxHjc3aQB!gr+m&epZX1GUe8m9e
zm8y*m|Ka)q<4v${l~*vqxHrVOC>ecaXnP(SUp?iZ&iNKSD7r(lbMEd$3yjP!ApN=(
zr%1m9eKst}*x2-E#iW<*7iUpC0nHN_iw&fEntB4aVSDNCydvQ{P(Z7m1E&^fC&b8N
z3>pR%^12F1Mo5Yoa$JS@RY)6zq?;je()5e_5Rx0{rh{UhC(vs!$KYwJWgO;OTG=!=
z&<h9pT}KrqC*tGn#K1WGWL~I<qymI-aq|t|;E2?RC(vixL$Ma-2>cyxHT56M%-;?|
z^Rwkh?q|SY?p`1}WJXdnm0tt4(8I`|yJF?tR&WcPM@Kc*gD(=&o%=`knsz{YFnjfO
z{PnKcK3w~D`2lSro+Mf=(dr1w_xh{xr_mY{YqUo9mo}Jt6J)FVk95ud+%2CyX(jV9
z6tSh@3ypF5;UBE_Ru42P*X@leaG3%wgvhV<_-o?uF<gI1oVnf}@RkeR?yV5;d&=QU
zdn<5Hv$wp4PuQ|*ruLWAhx5`kf9*;B2Bs)cBL~rF#mpHt+guu77~55cy2|q=ELoW>
zZ)v2wrLw$bvb?2{@|Mc-ma)90;qsP((FQW(IeNe^YtinQguI}}XH`VhxVhO3&`WW*
zwd)?l7*P2P*ul#me@d->dtFSc=y(lX$Sj+(EXkA)3*z{)JVNybb#<R?VAwVsT8lrz
zi{fc*>ZfW=;^6h%N>+?6iTuh-9v+g>YDtTZ9I}2`Wqny}*7GuD=dS#paC<`5&Pdi;
z;U;uh_@=UI-PpGsS}Q(>H9+@`OO%tzNSEp8Ip2^LF>@Ekw+4=)9Tkl*q+sxK2!<G3
z2fHefc${m-p63ZZE$c9tlmrJ9U)~u#0C5`PnPP%i+|bJkB_}r&vqB0Af|0nAl*FZ|
zN(ncrkg@^_9fPnIDCLBzyf;ri^1CCB|L#Y2!KRxQOEul<-w8F{s`e)YuJhQh-zMAJ
zk_60s<?n`S$Bnuc*FeN^JucwgBs_NSHtOf5YCcgvH`VZo`nkztZ?p}Op`XW~pQ!cw
zpn4+p{irTx{YhO^_5J^?pi(P8gAVLR6?^PcRK>dfz?*1>%su=hnbXk`%#6kb0giTf
zP<{$n?6+U)WaBk4I{DVs+&_Oo6G#q&+hB30kJu$R?%RjT!<BCQNGr~7P5L8++8T5;
zTnb$E1DQOgf~&EluBujm352(fa{>V)9K9XB`6CAg{8}t@loa5a2VHA$+a64Q;p!Mh
znt>5>|C=Shm&+pgwXZY3beSJ7rD}&usjS)DtFmbso6Ss|y0Fl9M(KB}lm6PjRCz$(
z3;Jkt_~@D!*CfT^k|Tar3XKLe6<fmH)v|>D`6i}>naEn|Uq*?vXp#QFVh?c3<B~{;
zhhY{Rm#D>7@Qq*yok~CG7u(#-cac5neNzo)9xjp7ujx8&urfPG)-^iES1{BRIiZ6%
zrh8cIW|A12;ZpqWUlqe|QHIH{9+)zXrR05y$EB&ArhkIDh85^uM&aTu+570hxl(tW
z-;cQ~W4&yL7X2?=%Rt)MjbS^x5pRWSbp?AjCYm^&WI(@gKuRPaMfWd(Wj=aS{>R}?
zX!%#Nb+7U7LhIgU>W?0HZn@;e-gGs;MI5Tg|3mgcUBdm3B^+_HsPpzds}q=z4Dki_
z^||GF=1VgAKs5FvUGmGyhEMB_3B67*V3<zH?~kGFNE$YLtzcoQZGL^$59G0YCx2Tu
zkwK0RUZ7AP>c^0aFjI+1_#5V@Vvz-d{+7F8sDys#AV!$mFby-Ha)K~hiYb?WLp+N+
zfH0;fs;@+Q<$`O#HH!ug7fU6fG0l?_Y=o=+%Ebd!Hk<q~<%zvg4aYD!x*pRv`veqX
z$LDz<cCSec^Awp+B7ryzD#fKU?S(7y&qKH_t!d^n4Eiyg!Rso6Bxc}>WRPoS5Ycaw
zUx$U?JCH;VDt;IJZt~kxkt@fKG}`9<?^o33H##200%IHYq4EZefa@JvEq+V#Uvl)7
zSq4ht3UG^?4=+R^{0t9ft|Ht_!sa#Te%Cz+f?tk{DeyhE2cQD{YEt`D^-Y?3KylHE
zx<)m4=}4rH^>^ZW2HIE4uNdM?79{Ozu5!AIIem=sVIr+3uRTjaFlh4xBkv<k(J>j6
zEZSkxnD>&UaN6Utj&>{=1za03G=*eafdub@eGwxIB@eBrfJ6eWFFANMb_%b`0M$&o
zVmAi(WWQ*)ZdCcwhEKz(D@pl$q$!YT)P3@xh>B^i2QK7beKRKohwLF`*mXV7J{9+g
z;FBmGXSp`HIo7N2-L$h9$Avif;+dQ~vaAlep586hEq;%w4e|ujqYB)QpT)UgTgkL7
zv^nYz!~WsoUoJdj!iwMf<ib%Ww3h@i1QSGt|4Agoozc4Cmv+cNbL1odCazR}ULoxh
zBJoTx&Ku)^Z!*e>1H*ott!;=lR*BcD47AlmJU|7~5ElpPvTXDCM=m~Ny`YA&X}Jhd
z${ic`a;^<uEiJ?M*lz8ODGJ<!h7elN&Tk@O(Df%?7xUnkIC>sa@}wB*f6*a^I-Z!x
zRnoX}`S~J@&<sSZUm?qaC!7S8h2bMf_9IDtBNc1teS7B_$5DnpV8K#`OOc%{gBLi;
zFbpy|sSN3=3`Y>9+;J|;fX^5P0_0K;)Q4}2io?*3|03-(radL+7!@94m7Cv+J*XSU
z#fi&U93886rCQN@q!TQ@4d!1zyh(43+1JcB|3DvuxyK5zfes+6VFd2J8j&cQCB9gi
znux+o*<&qChkjs<h3PO6IEcJ-;-tdt6+;h`UWfLwFgb{5$H_vBh`*Q9r_jZ0Y(MBJ
zOS}msi(RdD&AS$jZ!owQ$1bM{iIkV;(IKE^8|AFEuL%z&23^~#WLLQjXI%Bb0@dM#
zd~pM+5(k`AUzi#Ii9lR7bQF}vhbe)~dy-5xuEThql!D`R(c&;{d(iRIp%Iueb926a
zY7ZRMo9!6AJEl&uQ9@ZKBY>k$3gO$-67XKpi`r#$NJiXpM+PE7Nl_1n#QB({*Mke#
z@rfV!>3Dp-NDqup!O90_VwT@aWITL>(Vc+!;(>iBu@PDP0L=w@MVBlH7gNOIk6v<_
zf<!ZVU}51e^i-w><hPEMzU;9?0KO+RxALmJjOn`Z8r1wwq!N=*{nJ!sS-H6`mYe)~
z1IjYi52(BZu=F=sxxL%K!6;g<6@E;vR>%X-xN-3RG50RuQB_yN|4c3<K;R6@B`RX5
zu_azAQBX#VW=JCEzywgV0#ejsdA+n1!whIe4NL-=jMLJ-^zBVsZEdxex7unEDUF!`
zNx&))Dsrh@6;Bv10TmK1neVsuo-5kEm*@Nb&p*#IIp^%X*S@T^_u6Z(z4qFs@y7+_
zM!o9L;R*E0ExzkN@0#5RP_M$b)it~CneMl0&ZB0JU#@u6wkfcq-x1p>nakond;?i^
z`7PnV%;~w^-+=^k&Ee+U@*;O+(QQck<)*QAJ3fL<V*;*%+wmEQ+PKXgVF|XBPW{Cx
zyoT_x+pYxN3v^*odPEbxf*sKcYS2#yKEOlmW<<YC;NnhIt=D)oDP|XJw)>1yxZL$+
z6?LkbsNa%_W}uzYqZ9Jgbf!TD+sOn8IiEX<4NRjR^$vW7odz07r_K&Mm=AfTk1|#^
zn_=dJ@Z{0PD%?Mi!hW`#?jzkLv|yAO&bdylED_}M&Fsme&EVv5GF9bpaksJjOOmad
z9oP}_BD;_m%L|G+_AYCxSl;z}7wmDqB8G_ABfcF6I3&dFUVxv=0q?w{W8FQ*QkIVB
z@Kc*@ge^ZJwZ++YSu(#haR|vTqVjt|LflT3-^xUO5o75lflbek%b8aZMt9!;!#r~c
z{E@IEg2K6f>d-9VOCSp@7RvArO)s*7?N*5_^Rgk37d+%6Q?GiNlEgEEtZDx8o}$t%
z+z`|B6N-}^s4B%c%bwCvIYQgN5hczZ%R)Nv21);f1cLLjg*xy?34Brlk*&+_)PY}<
zz@HHqr~gHtOaDw#C&@gW+|x+C_BtW;bVyw>3LlpLjMAGH^@qv}k+|cH+<r(k@*1g%
zv_w~Bj#MRMJ`w!_mpwD5%YEZ`fUsx5$>4VZJDuO1e{*MiLVK8S_}xm@FlX?)z4q<&
z1}5cEq;1XhkkDy`a*81g+(9-7PS{VLLRZJI&9mBYkw@xqZq1P-t9`6WJnDiNC0B^W
zk4sI0`ZDs^K9!qE>_$I+ywjqDtnY{=L3qemXwdW`SqoXe1z$U(y~>uXP`Ua4*bGf9
zVgqDRAJct%;UYxqspXO_zV9ljSm_S62{%q{!fh52&?VyvhadP?u7G>U4(%l}CeSG9
z!jFRWrhia)=WP<sm8F6G-6mcb;)89-llx^jon^*0neI(8IEMVYgt+_3;25$f9?Lx;
zOOFb6^tP+o7AI`^9tlg0v}r229=Up?-JmklBkkgNEcOaZl_+q=Nc)~-Mh(-p%$4<q
zY~DrtC-{|O3z3@s#;QR}PRL+K|H80lP(>$;{FVwY&`B3ycN86MuFiMlI9UG1%q7>h
zzu5R^kKYZaYBx*R85&`ROq`8OVZ4^(lk5hUl2FB*PUv`{W}-DblrJ1Lvhu{{Aw1+6
z(A@MXwb0%9yI+WtHzhAjvkyN^p%|Mr%Zaf(Gqgm+1JRAt@)UjEG}te<`bjsu5i;NV
z#(j<uE}*k&hT8*$v8wx+9-}1LQ$o_+WG|vW+25CZSw&uhdjxxfe1y)kACaefoqvMv
zjH&^*VLU-yFOs^RPdCtYJvb6{hs&<hk(ClTQbk(KAFVoaq>l7R<b_14E#qeUIbsv^
zm}d9W?Gv*!82Uz$zk^x=+UV=UHBjJmW@<k`%j>-i=s&@E(<IaYOkPQ-zZH&*Pw!7Y
zXFz}F1F6LMx01FscRmSON~|lCmGLtxsne?5uV!ZY<wT~p=uE%$d6~L(rdcY}^Hiqc
zu?$p7f=3*+EyYF7AYc-|q3<RdJK1zUP_!tY&f|WdaMAT<$WxBeEvx2ArY-;9(>Kt?
zwly_=?Q!KI1+ZQQeP;Ns;zECxsCm@-WV-#^hot`~w=#w-DgAxzVc(U?RTQzlNa6Eq
z2Q%<_B@I#doJBbJT)?erpJ9;Ky|3_@N7~lhA`&7H(&l6KhAxTtCvE<G`G1->SWsN8
z769V=E%-4D04Va>fmF4;p|MX;NVJso&^p4Y)<2LoGpZ#EnrB&GdxK-=leRTCNJ1x}
zV(Ir~sTOOzDl=58(cUaqmV1lK(OVUnL9`9i;H9^~)25BKI62CQ9%ls^b$A(UlktX3
zLr0cHXtWQE0YoR*_E*K7NPKyd8o!^Jk7!aioGh*9Zlr<=(_O;h!ejfM*(B+oe`lRd
z&wY}#t+}g6s4;!WzD>klnnW{_O)90@y6=d{<L*fBJ8E0|Xrj_ox$_?3{Q<qREgn%a
z=2KbEnwj-ICsE&WUEi5U&LDx*w^-Nr1Cq4n_EGiKBki`%hroPFek6Z;Uj{y2A+^HC
zKElCA51w-~NG$oUM}Yu7!Ycn?lQ1!oZ|fB9k;>ox?ZVIYf8^YF|4(yOuExv%_?tCD
zirR7iM#=}3_<s>)Sz8;NjS<*tuQ(`rw(M&6aik+=2XMW@#3>Btv=fQ2z-j3mxBpX?
zB6ZTu6n}5b9eCtk2fkG6SF4`4EH~D5uc&pzD(Vwn?L+;J#cIpm<CR1-4z!ic`f!q%
zkxt7ha@fm=V9dn8ll@z@^P4-K=+@lD0)lGd5uPMQ><2M>?XM6z&osUT6Pe#!$(~*A
zY@Gl#f>IfbB|&TMANEaBMLo|G=<)$fE9a@0n<d}Y+*^1!TXUz%7gN(?LQvJy<|OAY
z?Y~GYI}oY@CyyJ;ad1Or7ho|K^A*`AS)Aj3w!3CZV{VKK5=%D<BE-Fvy@hQc@%`i=
z`#3f}C564CFAK+Y3AnM11KI?xxwZ$uz5@6|0Ielhnq)Z^x#fkxMe~^ohGFHMquj10
zqOXZZ8_UCjn?#b|YaAbYf0Fdj>&gEWS?CD6pOu5S^cx+&UB_RL9)FF*FD2eG{@NNg
zF)}%7Zad6QrdnlgJA^^y+&8=<ae6*`X&1s99eX!R_^}iAiUNoZw9Ce`vLkB!&)~}r
ze0Z<a_2eFCKpsrZ5NJJtpi~&hVDlAEb9C)BFMQ)(Gl(1gW&}ePneDoxCi|nNmnh6o
z+!=L<^NtjtH&oGWu{x{2Oo9XJx~*5G^W*y-zDcFEu>6qyMcFw~vf5y;`=qg)yQZkr
z4h*V_kThmU6+>fMWIcSvlKnBm;i$cKpu_h?*l-Kl9x9t-tQw5=W9(%lRlI$ts%Gb9
zeaR?XHYcj+^6UpRIk-zl{Rusp^p*|+<GrPgSNpel^p+pUR!dri>V<yE3bnnRs?e2b
z6=JD&Muo~2OOaegFAz9&e3D)f6{EAi9ac3OqHAPdL}glYZ{RDnNjB>rO3wN#k0!z$
zc5R_*%x~US;ZN)#d<sbP2p7n=zea4F-@s{b0S3PF%)r5JE(KD$Oic7jP!{ZZ#I#pY
z9KNx@wbz)4{%P!E6e1p}V)jpeAW^73gAsfmTfu>tNIv^Mjlc@YP@Am1v5rKRB@xrT
zbMXKy-mKsp(Sm9RT2Jj(HM~-44yof3VgyD5e=D0wZVY$L&t%i!Y_qLddIh9Be%y3-
z8Q*W_k|&Z>%Kkz)cib~*FGHij+6bv8_LR>lMBJpmUeRBd=t3WrLjOHksFy<1`7!z*
z@wessHloy6BcKOyw`);f7%e`+a>V!AG+6Lf)(u&I5k_EcJE(g?mRJ`U&u=xIyG-W~
z?|6%-d)i%$Ube;zc47^YR@ngY2vcWw?lt89g4c{fC#&*ci`~HhLm?C0Lpu)Xx=tTX
zU4IG*wFv#xP|2%oqmG<3jL0Vxec3Rl_eAW+NRVob!cR(HI?a^Z-QL7D1-IHUtNufy
zelgYO9^a(K6t8g@S$lJ-U<-B<=EzN^v!!%5caxz<dfRkv^SYx8hIty7n^{GyFW(W)
z`bA*ug)mQ(V`TJ{I`&RQ!@Adg04$4nT6hxc2K2F43$nsvTqf?FM;<pzPLqRv_Zn62
zTFy_Z>p#)?S@nN2>aQUA<Z!{IEVqoM*GSER&6abA*q<-QZ!g36U9;p(E|MU_Dw_XQ
z*2OTG3Zh<cdX_D>sH2QqEBvaw+DEgGVU_dUd`A(r@HnmDa(VT%E-&JNMhKtRISwOG
z1Ux!ANK%bt_l?c@R&4nwQUE%Rhv~$MRlQlEAC^midlOr#n+TLID^}NLN~j2c&KWL#
zg%lwbpu+nSVpCl9gP5mws-a+U(NB|8IQ@SJE%Bo|61C)17q~q<NCj-u3CCSP!hR%-
z&L>rq4y=|y>1k2y70Q021HUSPgCvlvJIYq-z;8+5Sp>?eYuaFy^Fuo70f{=BD0`d^
ze`J@c+Jf^4uXQFm;dwf;QUlErpb-EGlgYs!FpML;2d4YCQ@%(~S%J01g#gfXq^YIH
zJgduj{)OJ}h#D)bgoaDaZo<*Acj$F38b(jSe_u}YicEb5R`HC+9vO|R9aPfXwvUk@
zpR&zrJl|tF+g3X$K{Jm)ce!OuY-WVB8i&r6>&$!J5X@P@$^tR3ps=z21}pqh@iYc7
zQw27ivZa+Hqmxw;n@j*3b$=Ix9D_+@8_Uq?@NNaUp5?$PqwWJft$W^5xrP@Hc2$Q9
zu<-J(!~Ou3<f5|}VG-t{hh3teT=pudz2WjxmB9#=jzvmi$0FIjF;>_&xF|gBROE10
zEN-?W7yRLqvhpMzDZM9gNm<$3Weh5SkGJKB?b18!8yD0t8PkYEr%;?*;gvnLWRH{0
zlk!yh3fIIHhHL6m#-Ir+j6sv0GF%g1;Jcac7x-@Gdkf$D_};?zKEAsQm%%4x^)my9
zYt6t%{93#GFL~XV{A~A~Z{4$5H`ZI=;`-)~8IXgI2rk{NH6JA+gE<9OAm&=c-HuRe
ze-1a^*T?)9M<-*c+NHuiLqQKD^g=?*iRn&Hl8Q&bdik1iX_#>X%(^3P*HG1>1DV7k
z2eMbYgkoqy(bkMuGlbM!x~B9QD5a8%m(EM2&!oOZ0dvoG)$jWWfBkzlq}?d0v<n%A
z;6II@sym6;b~;JBm@0m#ia2dX-tFTJzjTS{e-Iz))C+uQAbp7&nC$zMWy1lkZM?)q
z5;OF;exlPm7p|$?#U7DsRj;7_R)}q9W{um;P~9bkDmlnqsb4urR=^#OmlP2$f9Rhq
zmc*XcKFJoP_{_>TRilOsqEXC%j8~a7q-Rfp$<XQrI>DPfn)Z`GxB4<7Gsm;q|KvWV
zt<`Al>qKs|0zEW;oI>QQ@u4kx;?muEXnWR>og;ddnvcx3i6Z#w3m0J6z7G-NU5j9~
zyRFE<oah)eyi-Mb!?)2pL`!$(`1S-2O7GZdIoHLzhIRa7=^AMR+%hU#%zIvw-f0Fa
z4F<S2TU<t+nbW;Njy3ILPPf7yxQJpKYcv<(x_LFl2lfdUiHPdCWVy`g);$rGUC5g!
zidf%W%<i(@&qM!=OdK{@XyaPbw=S-_owLr11}(P_H3(%#_k!uZ*pDK$E*uP|pQV4v
zMM`C#Y*T@LgTUtqOwebe{o8?xKHGGH83Of(WUmYpnL<`Z{}iPUo1=2C0?Ni-n1gH4
zC@*{Mk#}<xUu)skSGz#*z8$bO&mW!KAzCdOzUb{%_-+u=`8jA0yekOlG@bYvhdHWD
zL{#Sl0+}QS>8C=8SIVI@gR~t)dDI3MGkp&>y-XG^#gRWn%<zVP&#d9%C|cAATp?Md
zBrJ8Lz}$8S(dv&*jyXsphJan}a)5Apju16@^xt$~@R$|(FpF++)N;mWmZn0fge2<3
zU$H~60x@97=k~-(jvwUMGkW4aiYw9V<lwMXr8Yy~opuY<smai99E&)Oy5;0pRkxA-
zc=W*|lD&`BS7w`;U*1r&n;fu-?8|;d^P~t%o&{eqm#F?!%ixF?1(uAu*$BGGCx}za
zrFaJZ<{9ew_-6S=T#SxL+aE@ioF5&Q>H@L|X)9MljAdH*MtUO#R>9frc)@)z-#-DS
zYt;QiMlEfX4jflg`Od=97kjZ_7BeBwSR`twH~HsM2hJ=xtAYN?8I>7uWkl$;^nE11
z&hZ&|we#5<QCQdGKLeY0o-w|`r|;n;24zB3<5%|_mJgBr*=sb`Hp!%IRBpH2+ZW9I
zC&*j$Un6hi8S=yb27kicH2&f`KKQ`@5`U+_A7)F}nX=#MoBO}al^N^|gMw!^D}GW1
z?wS@Q*~OfG^r&#zaveEJA|D|#x<9G^%9!7_tA!#WTQ3a0iAKY=-U1ex#l`tq{@L#j
zkfGmZE~#Lcxj3HGZ7tzcg@JhY2LQY*`y<i8`d+)sjrx~V7?b1&MOG;1eMsuBs1E<W
zScFHB+UwLC(<Me3AKBM;e~QSilgS;Iuc)$_lGq_Vn?Yy=bKdu~R^jLSpe*`vTc1-X
z46LTIVGKsV-HC}kH4TQ}P-q$0kCxv`%M)z&h+UyI0pHLG`cP662J%dUc8QL;Nyk)4
zOlnyzi}rk-pj0RLm`9Ue&7k&&48+vfRQA+o?l=Aud02a7iw^w%R32u!jC{U4EFw6P
z+c7h8NJ(zT{9lYtw89ISB#wOEB!L05@%$0)eNIjYYG|iV2nnezl0MesQX7|Nw=THO
zT2famQ?9T`D^ypk_@M~j7<JJV&6pK@Tm@R8$JYy!B++`!BpuNL>z-C6m4>`IF6*9S
z@?IUz8DoWp^IJRyrg*~KdKi6&X;f@e#v9T(x#Lq-s54#WUl^J4#|3PA0a(o>x|k@r
zH1T?_D<6}Oh(4xJT^yChkF5}PKa2Nto5969UFKGf4(wn@x0x&}2}JyGZ>!mDp{yv`
zPMK!#Mm7LP`zBgT*6Z~~3?n(w@TEl3PK#=#1lV5tH|YBl=;-`Zt-No{H%qoz*qVjQ
ze)pQ{;obszVKh1LzmEK&<ge8J0>>`_IVHz9jDXT#MGp3<4!<;9ORkJX^`^(-ar?zj
z7>j|&U{a4*;kw}>yMlK06fuYsD=UYKglYwcRR`CqSD8eHTWxvNAI7kDxCp9=I5nf7
zH%ymS<JzIlO{xX=_MruA-^a(O478u;D)!Z`6Wr5Zny3bv2<S_ROzP8RGhbAWm`2b~
zy!A(n`bME{Nh>mWK$hd88Zg6ukyrbzkA=@n!2}7Vj5tew)Ef$jc0iO3$_j2$C<l#z
zsBD5JlVeCCTLVS>N9+!a=BZR@B^;RIwPd18K6w_ls=|o9j?=bU1J;=zP|a}aX3WW@
z8Jp>yTH^i-Z;<hwjS2~Ugp8sAMmiNw@=xm6RvnuqvEvD{>-iVghlO&Vd*MFW;_(;Q
ze^@M#xj!U8c7Q9zo*b7si$tKn&&c=0>TeIqp4&RdfSv?@k~Clrb~+l??=<RP1a&NY
z<+xy$vGnJB*@GVhn`j8fm;GYPeGY$~H&pgCFQJF?Irzq6$Nd)<xbZEt@2iutavvws
z8-B4sEjA_>)IP|4;anFw<Z+PT4d>oM4V#!h)SmP4YL+E0{(yPu98!Ff6zNgj_K%2C
zqMv9akK3<+&R|7;pgZQ9=MAcI<C?}$?%U5RRQN{F+~I#yDE%`_`KC*0gFbi8#U<)Y
z%-0k(#vWJ0XTS;)tx<_oM`f`J^5?Il;kN<F2FM<v07|Y)Z^m?sKB%duv1zwaBdM8@
z5vdlRE%tATw1)tp*F)jlnba|<@5Bfrw<uoeEGibt?U(WO$scrsbljedul9_uKc#(z
za|<?OWwQDiigoz=$M1yLH=mm9{sPD6Q*)F$Z^P1|NE^9gBw*BueUCJ!d^$}pdy<*f
z2>iQ*g~rRAou^qu%wEy;X^ajKp6Em^vM5*VS#$0CmU8--zQZ3g_=oLcA$;7fq6ea7
zy@H?v@t`Yo&?wZ>Vi$5dmlDUxu9n3|?58O+>LM>EWk>UP5xjd^^JKYOq)F~Eny%va
z6(|3uWK|hWH)Qi~N)Del<nr&)KKz@LhYhw%Zb=HFr3;r`LJeRb-}^r3SPVZ!LQL9U
zV*P&if8Iw=zJ#}V>g{6Q?ow|hynRi*QR}ih)Z3TPOw-@T@FDl%=3Xf;^yAzs<OK_w
z+){Z#>6$y57c)5S1!vkO;xyR;3=jD9_xH*A$esH>FC3Nk&EehV-FE>m@5xIsFNfr%
zgqL^a<;$hb%9g&F5ALJ}Nn)wBM!p17(R2BFNxrh>OEv@lAYZT!M|c8f^~Xfi=`?gE
zbGL{J#lCzp1aHf&BbEHU;3WHC*%CgJ{gv;B(!aI*c=Pn+)%0SR3pdG~PAZs^kC_m|
zW35$VP^qzZs8tBS;7MFlFRr?>z6UuzV4o#p{iW;&Ex8B!M4+sXRLrP{!>}A{_CMB^
zd*pZb$@-mT)7UROUhD{>J_(n-uClKVZt(`U+P_;tw!dZ!Qm0o*Ep-yabdrc}XSKl{
zUM2^{jaBVT6*IP7JqC;-!5T@xoWn|%elt^YtdtF*@cEM^5o}?|Gztip{W_H3u=X};
zX^)6EC)K}F@-~BR95-FFC>L8~qz$z7xFh=nNkaTBT-Nbhsq6=S!``uw%vs0>FKZ&q
ziaVN)PqbDW@-Tm<`)zHVC`Qe!`|0hnL}pxngx(!hBr9UF#jf9e@KJ_Qu+<ajWo;M{
zSgh(t$t@_MyX2PmF~#;ROeAXmhXFg0Hrgv6mO5lt2d^zol<0Mz7QmuKRh2UNwnhh{
zkC0PHqs;8lD(JA-k6|aV!~Qx&!Zxmwh4^UE@=ma_{Y)r+u$J>>?#Nxp+WffvWs+4p
zu{2^0YPs8tWggf_6#FU3G&D|CZ4EV{MbZ8OaQYFN@{+T%VIMqUT$)F)<bG11mnlq+
z#cMCw<;Pl`{%gD(MpKMli{0ttKYU|M#Kcg}TV8kb0(x)GW^XX(EhW5d^-~P_)$Ijo
zETJkLjwvCDZ%?kL*e%lV1;9rHz5r!r9Da*`pT!b=N~m&ke&Rfk*a;&xhDa7q{}$=n
z=%&omqz^>0EcX%r52N4G^9S2Ep#sp&-doOb22uECtXFVLmc@Z5ga%3^D|39oTQ%9T
zs}tO6xw{q&@rDZ^&EU<G#^1czRg;a;s2BB@JK`(!hA4~}bhuK{&8C|xXxeL@#c_}S
z``G`)*Dtb7rgjmn@D!1+{P%M?4ZBMJFzTNH8B@a7Ua9*Fb_{nvgsgxLw|<|Dw1PtO
z_-=FLYnBtqV~<(V>K}mODC)ADT++ENiiFi>9|EGf<hZ({Mm#la=UX%zDXADue6HiU
z)bPD{of^Jtc{IuLAT@l0EfO=fuSziBr##k3f@nE8G(DTj!B**pyqZl8*vO>iFj42w
zrXB-+MuIhxAjtq`U_oJ5p!A@>tNByUoe%^4=5=Ccca@sLj@du^s*;+-QbF{=_J=T7
zrQJShtjZ2_f7w64SXII7dAXkxT<fxEvYysb{6fiQWP5JY<y1jf&EY9LWj~|bDf<~%
zr#Ev5Akp82Uls`m<~(GZ7@BgQ(_FE$76D%H@*`x(SLK~G^{a~{%S!Y@BoMWkqm_Wr
zkhhk>#=>JCQ3!yM2(R90|B+@37s<DWNmb-chO^5ag>0y%WX_+--VBbsFiI7mH#*m{
zY)_wG<NZmPpT-()H|}V?4{t$>i^p@wNtuDBRkstNKT%!R=T?`)Z4r?vRXyVd5Phg>
zH43jn?g^|bR-3xv-0VKtv{od5u`dImx~p7c_t0-)4yt(H-THhH<&TP^OBNl8@>eK6
z%t0wZ*XR_wo~u{rnnI=~)hN>FS{l+wDza@^vw^N6LDwj|N3uy_@34HvnODNRDn<Xe
z{igJ+I~%iMp*LIH8h?)0{jo2b`_MQB)mY(Vof*AK^CKc^%JDoD=sC~7%UCtp8R)sq
zzqRR`LNPi|wO5|&YUvdNfs!2ke~ZEo_M{eO7<GnnS3aXhiX2R6Lf~bpY-nbrTm>hk
zGbCp6;VNqE6Y4Qw9x2vH3gw_iCiTmL43$FVo2O0?mKZO|_1btmbhN(>ezf^V^n9hR
zOSNBY?Kx0Rs$WQ|5NFHP1Ri>r$qXJixscT;=fm16iX{6e%s6Jhe1lXnJ9UXyDnGr}
zky39;D|J|M{G`ge;=fm(K7f;yAJzOmeVQl8mdRVD5{6!tno(fsZW<80n`!w$S7pO|
z>Oa|)FdAe<4dce_JqQ!1D3X;lT#^^g(8rg~Bv$N@K9cVLA~aPn;44bZC!s-@hvvu4
zL|u#e#V4-!lmUg@cZvU>7HAuI@D90)J=nvkmB@fU|AHW=Y^x47$D4O7;Lr=w&99mu
zJiX@XiQqF_*1j|;-F(U<Y?ov5Np?>sv&-)?OUmvgs}w&<$ve~3X$-Nil8gE3a(Jo2
zY2ycC@o|>1D$g6fA5J)@+*rB6YgBy%cX+|6jxGK}(6`YWj7IyyUt;Un82G?gb#~yy
z#l9nToWWU3yr;2`v8p3*qF8^5tGO0}n=a;`w7_2}jCg%has2!1d{@`M;{!do5%aqD
zEf~aR2p=TE;U#~*{+`yHVu$BtXElZgunqV(^OybFttuzUv`BTcR{Wa4-N#>A{6euW
z>xGLw4OJ>@%j-N;er$tyeL`rhyA(;L+WpCD{WU`5n$I^7rS2<R>Ncty52FXMA>GR_
z>HjB%Pgp4G`_s@RKMw)z#ktZ_CIe*^5uq5)UYT4o?Xn+4LZn^ry`T2cd>NMis#<+_
z*nfFIA~#gSxU!hfnyyihd+iHN$zg-VJuoGECWmvbHp>KJQJK2vk*ns8+IN&W92KpV
zV;z@I4lf>y9*oIATq2PYOODDfIz6qGC}~8qTmLn6o^v3lKA~XkBdkh)5FH>f{VSv#
zDx?;F60(yTnMi!tm}O|WmHeaTTTB%PKNVLX$h3OvHz2jlkMgleI;+f&Go0u*kvaC-
zixl+{us-&kt;r~7Gd$Nx^>eO{TUcrSsq(Kadx=H3!+!_J{T*+rYWAH3QSwiIo>ubP
zge6O!_Suq8(>i5Xl~Hn=LR_lkH2G7_kK*o@l>obVwPhEpn%qfEMkZ_W*1b}bGFPf5
zF?)KdCNUe&8EH7!D>zu7YO(<nFCXf87!f2qY?o;g@}B~l(ID89C0%%SF=qZdumaj4
z`}XgM95?o9q80Xb+4IIT<XUq^Ksh9q_(5CNh~)&ZoGts=qrxd#JAcLvy^5hH`6KJc
z(x0WRClx;qkA0IB>#NiHc|<BHxnF8C;NpdnM}8}tTaEo28cZUMTd`XxhiTRp$pV+Z
zSMDC1DPOQbp5Knau|&%I7I;+ReFyVU;$6I^CEjiJ0~7CECsXg;rxWivYJ&|AnB@!s
zOo#4XS>*7qq#oteO8#!6@wM^-KYYM170+vB<GZm>!}4UsCsOg{iHfh5ipzSbtu8OE
zzIkc&%}c9qURr(g((0R+R^Pm|`sS&+<f`hHsoMSrg(qs8rhkxw0O|6XK2Dl3Uw-`R
zII?2`oBwy2zzP%ljWl0&>QqT>waO5HcOgz69okwuTIwM)WE%auSc?YRI=8)<I8V7P
zKKGk)iu!oWg>FyKS*J2l<54Y)l}Tw}9h%lWVcPx8iyRn!o8z;6gXrI~YH+bHqm1#h
zE%!31n(rd3?FL0UE2sjjwq=QW@|vh;T;^8%C6m7<3$6}*%Vh-~$%m1zW#VG(Lp{V3
zn^7myr4_1|^2CN<$$nKWce{U)H9n`<|9}<xvlOIezgl2rB^9yz1omC8cF5)Mr8)=t
zZo^h{+k2%OGslZ3wDR?$9H8#$5cQW?|NFB?lN$pPtHb)WE#u^4fOwK{AF1i+yg&*F
z-^U@5Wfl3Nr;|hR_CG}3t+2*=6n`hGiq`moV&7THNT%0*6GDMHu|Kig;RNog;<!@*
zR$F}vcl8O}Rfm`v(VDBBKF(?x^y+2JG)YZ8V#2p%pCCk~*+@99jj5EFUi1m<zupXS
zmcsburUm`k34PtgYoz8YsOhDR7fEJf-2|WVpQZE%<EulLbJ!j(L^r7{E{MNRL#BF<
z^BZt9g#H2#wr+z91Xj+P2&+fLR#S?rm1pLx0=K&CWhLLvU5$MEEmGQF`alGmvLQPB
zJ_+}MSZv;8_+xn4M<@7Xl>)p|68K4AfB62H{4T9OEqT8|;f1p+*A{!}P-gfs=z&_s
z3j0|~feBHUcvkn5I&7l>;+;?wvZCBGLtjM!%euqsUT4&Akm|J7%lL9u1^+w>bm9F^
zs%I=MB2MY?e-JTNZMBwc@Dl4;D??TMEwC!$_$x}8bjqlH6Z*#AM+bK;IiT>q<g})x
zoanNGc6wr2qgfoxg&DA}RIWiob=%7ui@*)jcCb^MKzl=L-;h8SOCtQpS=LeUXE8ab
zH4qXEGqlnZT`FgNN687+V*85&iJn_oTx<oa-qEr^s2mkp)%&;+lIJI2Vw5LXb&BVu
zJlVNH_*h^~3eA3JRJEHewFAmpgB9{tE4JPWY$!_LC(t9yv2fYde^$f`L&FT}?zBfm
zNw8$SP%AmPeN`;Qax};T0;1^Mdst=l{!1#W31?>2X}_%#<m&`J9!+T0w64K!){)T{
z74+Q_`H0|!wf3)w`Yb;hh5rMLGBESeJN2i%p}8)u%dP#ZOfn8eJNsY3y)l<1t6?j2
zl@<DZu?%J_z_$a}9h{~=U$$@)T<UfIz$J(DI6JHlB`9|#bjqUY@GY#h=zY(sIvq8a
z9MGKYO{0D#LDu+-jrtHT)oj%YJE(>$&ygih<bytecdys_HpS!08#-Za<qb!yZTmHY
zOU?(L&=Y^t{B5`JH>-Z_`HH`d^*W>8`p2V+zm0;w!9Z#{;&qj^G%O?_9%2(-K03d^
z9!n(^AM6RX$u>n*K2#STh~@>~BX8RK-+~Q_#<|POek>v{iW&n2K$0K!^PT6FMH_r%
zn~mOxe!q($=tWCuh5A|U#}zJpB^Et7&KY%()mEo%Tm>z>CzvChv#_oiv3Rg=dUYsE
z@xWVTJS{=<DvYv#50=vDgR7b#su-Ull)_dH&>FvNS%8PZ%JnX<@yjk_shq1QuRG@d
ziWhsY!m9e1G(88^npQ02#ay5v`GH9gnpy_iUJ=rZ5Z)_;J;EkGfNr$X8<HmK##E9+
zjuk?m7|CdAo$g8hq^a>HEi6lKPP2ciCyiu!LyVH`g=Y)<Vp)<&Lm6XF{ZWmEV}HsR
zmZGQmo?N7iQ)->yeMumLQ26me9d(zE`iVp-Mm|S}dv*9L2@v}R9sa-zs;oSsn|=Z9
z^p0!^ktuyZ=utS_6><zq%NuUx+04%k6m9vsZJqguatZk_A+Ml<vjW#Ud=uoPQgSwy
z^~tO^Wj4NAl;G#0^kW7{WIaBcX#i2(WCDn5_eCqC#gl}58Jfa~uYE;HR=z>4OVkUJ
zshR?U?P;Q&;vZ`N#2dPm(^4HqeRH;;{8eEINHW-<Go>sa4ewxQqZ)hW*e0W)pXz|k
zVkqeiKR&Lg-^h&pLgVGW2*6I6X#(XUm?#JN1y=AiD-!L4ktTC`-CI%u9d*ZoA#5!5
zHJv9Wh3a+{5=cjGubJpEUbeE5+4M-Q{&hA}J|~+6Zm|jU7%rz#l2gpHPF2=p406uH
zQ)Dsj_#V?i@kUxPwBD+@Xw|!<vBArjkZ8Lph7Q^<z7oGu@@n-xJB2Sz3DsSoq_~hG
zzjq`)lct1MsxZrWGL;@b9PMUX3sX$Y+}2`_++v2F*sL0Ue4BOQ6R!!Gte}4S`&c8l
zRHN}>@4n57MJ&!FfLDLW)TI#a4c(rPjUY8h&|fStcT>tHk1|EEIE~WQtcz==IIHh&
zZ}4^;BklGNhIwpuMGG_JdsLU3DTBV=VfJ6CYZfG#)Zv#b5meV8y7QC_ttEL%;z<S1
zGZr>;+;3vO{411yqvYQ>`De<%8S?LL`S&3ImdhL}&mYB~&&8jj29fag_;W}6X~&=4
z@n=4@kaUId=cxEotdAw!j6Y|@pLfTf55}Jj@#l}?&*$RL)$wP0{JA6kwByfio?t^r
zFCxq88EUmwDS`j**K0Fmbt&sL8JY5z7{7Jm?5kR6FZFj+Hf*#w{tzd>?sSCF@4O>1
zHM%u_igW-O_;oLl+yc<t*6ba*Wm1q%;Jeq`m;Iqz^P_jB(Y;#E)clmx6!ThoF;=`&
z>(W#)WVOQ#PRySq#jqB_y#Pg6+ghxV9aE%J`SQGdn<a%6+26<87pE96Z5}ORVm6xF
zc6h&bP%`27iVeTcL;1nia<=_64w~Wj#X5GG#9l$g)F!u~1E2mife{H5cO+l`oyuo3
zkEY)csM+}}vPq5440+rHi&bod1~+4=kK|$nPg|jCwo=1Fc;7|%PA40P9#jElTfJD7
zsSp-JjAZ$<mYmX)`9VIagZt7a^CJX#mb@eJR`{=qf#6uqvw9TaO3pZ7|1O_bF>?Dp
zVWk_b&<}*_<`UdZI~|LBNY{TeLw}XR*nbjT>8K2Uz3!LZmQ@*wOtCdPS@TgezcN(!
z_Hg0M;cs*5>8~m~PO{DO1Ya65TDW;t@R=c3@KP158!|>c*A5w{o-Yl#mZw@xvZR|Y
zOl-+|!BR@jk~#X+Zxo_$5=5sKM&96l`-8oTX1jF)QxYga`Axz<E6-xH$pS*1tJunf
z=@rK|-x1U+NvWQ4&2T`P7vH1;L%fIjly*c1C-`Ci#rQ@Hu6<1jkw~CZP%fDG2Qb!d
zRyGmlwgZU~vJD2=ZT3BE2LD}U>OSmeZr7mERa*bY_*Pj-Nu{yqQ}AR>U4z=l`2i&b
zSng({euU~he~Z&soh?wydL^sjrJ`<~)QMTJ-B{{U&;`Y6wU4_#6M@|2@cF&r-$}0^
zB;Fn+s~!@uk2NjFEp59|I*5A%)@!j1=J;<EyB6+_mPr=K@?%#_md#*2Z!B<|+5IO6
z3y!lXWVt`Ac_*F-O@+aRFl#A}06s&zM7;dWB6!quil$dWxuyrGZF~?8-=hfHPsf%^
ztdgv9bokL{6_hFA={;eieNeVf!ev`^g6kxK^aQq@<tW+)9s8n=^-63xv3kTlrz8JE
zM^2SU9Ob|TbBgW9iA#-U?ElpHln!<al0x-5X=3OCFURYb9F!#slc7=nT|(o+sKww?
z;x}FcsL-}lGXFO`@#SymWi@UP(k?c`KbO%`+M&)<y<E{N7K8c5%Uq4Kg<YFii<07S
zH!8~d3lUUuLUqGrTm_xH$iVUh9lXiV@&ujA`HNTv%1HVX42-R{3qWriJ?heoU;jfP
zZ>5es3?eeHd&I8S2^Q!C8_!Iz)xL`aQr~GJUdGijt7tBpW@jj6B7&G;#G@DO>8hDq
z^YwuXj$`b@j~QATj<pJ_XE9<d)wyX`-t|Y(B!%m=I1zeDJ)=ctUn~+-f@EBqPD7C>
z6xH}_h{y=-d(;YYKJKV*3g>Nv1IRp^SVqkdOf~#UVQP<X{8Ud9;r}7oSHmTdPVje0
zpm@%mI((`QKUYvJT|-S3*XY212;y3#GV09N<v^r5m*Q8`usvdWbkr7+3jVHZHzhPU
zPZZ-FzWXg_ce?(R19BD?J;vHM)D+F}#Q4Oirp2S)zRZ{^H?Mn#+%NRf=6te_4o&Dg
zX}1cI!Z>5t-`5Uz(}^g~|C9(NON%6yi6<_GR|v<HqfK^!H@GpbS@WP>^M>l$Bz1y}
z7QAS|IY{<M<hWL=Yj+U|qf*c4D620=uQbG&Y1(ueDYmp(Wv3*7LuTJr?3p_!Gdwl_
zL~Ll+lBtqT62fUhUO*a9t)CKFshX*TlX*akaH7QxCDOENiMG`{a{Hus^L_nN$U{Ju
zc6sY}_(n{k-$^?rflGNtFU_EbLbxPOrUS#0PPOHjYO@0y8GZLjTu|{Q$Xt-eu{Z7!
z38`A6ulLHvE^hYH=^K&CTH7Ml$RpmqTaxtM;y=q9Z)SAZ=h3&Sv}00uOrEzt?tQ(%
z1VT3Yf697AIvffWqyBef&bPFHb*Oh_tM97x_DM~wwqy;2z=NbzY2gb_Z~e`i@-fyO
z`G$Xc+*t6<U#M|(^)Ko9B2MX^KX!*2L#uUUg+vO=)sq07${tLDguSr#pT^<Cj<k_s
zhK5$*p&b#W!}maH5Mgv)jfzi%$<%r!wLIEY-S=Z}@aL+v?vMS0WVNIQ#E``No<1IS
zoBe0YY_HQ$lK&}hNCS)5`WKo*Kl<3CI;c`N9ZIal+Qi>Sj4*TCI`7EcrY;i|<#8!a
zos!xKINf(|Wy4M-LZ9H|rHYlF_i-G8xkImg+eyvoh(4Aypaaikj6FF)C|d2R;9Hz!
zz23S<>9kBIlFgWHX@l^M1sCY`>^5)MCvE|5ek~tgi-F`Z)pg!*@iwdPvFada?9~FA
zQ**veVyCkSaR91y#0Oyyhvm$^@SLkKS2F@1&`Q(2-Kcwqm+Ii~2=^Rx?vTK6+3u$l
z<;$MVp#1plDzZsOS`wK~`K|W3BuLRSO@3zoxjKkdq8A-7OcMPQhC*t`UNR7^E#X{B
z6X#O1B~WuIxeF@tQ6EsG+;|wj>|Jo4<~S^ueckz*bsE2X!&rKP#)`T#J}cna9QtF!
z6Cihv>c1|wjUDv64-)-_?$@6kOLlJjw?;2sfdvD_)_<TS9}^M649Qx>s1qF~2|cg!
zboei!i|&dGib8)F6{jKh_cgN%(AD3y6aC3MzQ@dwEwUwKto>U`rnE+`H}PW|*;gRC
zY<dKzV9dU6aQffdw;M-3#ww1`am^X%LGPyg&sQVXy(mOsfd3%E28B^qN$-SK#;PNw
z8!<7&zkR!2BAs#0Yv|7v5x)Ytn#O=pJ$x6+&Rn>x=VvOijx=dRv}X{T;6GTVknt9o
z)$olV6Rm9Cya5W5(}~zyd}cbS3U2ZQ4hX&4l43OkT5xuJrr-)OW~xwSdp_oxeNj)3
zY0HRdBSsKi+N{7U8`$h(+Ghbe6t|^*sPbJ|Tj9^9e8FQT{u*GT65tI!zFBk<ihG5=
zE9)SY=An2R38*BE2=tY~y3MliQFhDWvadX;u<<xZX=1CNZq#g7%dO4fvMoC5Hi=5_
z?Hlb&b%Jwsg1&-D53u@Vqdic^o_a#%e4WB`Dz>X57GpoVE72Ba4jhiciv`W$2NeHA
z?f2klxfIbofsOK;y8ZatR);@T>(RQ3L?G#d=o}FRc*PW_O!J)QRV&t^Xrr%j>iuvZ
zNP-3GUaf0y*ouCgIev=~2#`|rM3I9`^{}6tx4}Eqq$={3Osi(lw=Lh_&pN)fdSv^Q
zV9s8zd%JHcRaYpc>J?ITwN9eyDSasQFQNX4>VIVFI;-mII;+>1%18MhovX3+A&)d<
zi{$nyS06E_rnt+z6|H&B#F6pVi7wEV>)ivggPg3H$Pt{0Bl&&7**ft;N87|=`oE-D
z@gtRDRO`gi#Fi2>h6rBAk<iWW8s5kAyp~@Xzw7wEo@Y5v56=mRn-fj8(j>P+8AQQ}
z(^@CaAbKXhS=#Vm;vJ-#uXBD7sE3I9HqVE7F6Q?Lzegnpx6Go!<+V}=r2*rC9UPvn
zmN)8$Y@6`)xPHhEvcUaPI$cot3vX2#Gm`vR;#!ygg#UbrVur^P`thWG{7gT7&co5R
z{3-e3H?M8^Gdvs|Hb|Ej);vd8=daI6$ZJYpEo<-a*dITMVM`&=qNQH{vRvg*mU~Mv
zpX1En3=A=Z9Ao>akue}o)fLN<7W)-8UXpN+3tSR_Lk45BHNe0mfYN7o27rS0srdv+
z8wHHqAK{XUh>}t&GG0~WqT$n}B1K;8=g9Mz>QV!Gcqm(z7JCfY$<}fbE*m(kNKW0P
ztVq-ydJ6PvT^)-@9JB}B9-yuyX!|N@Ly_3(QI=l=JemS%*8qh{fFA-Nh7lUzLk-ZA
z0{Ec@C`tm%%P1j7dR%zGSv=xZnx_FqCIQA~09>m9CZzzzYJe{!0dg_`?$H1brT}s@
zz=cVGZGTR0^nYl8CsF|0*s=jj#Yuo)06?wpyYUPJkNAfSqXElOpg+=}qmrQira|vW
zfGRxyP#5{*6x<yeZgdjv@{BtET?6b&0bH&DN|OM+FQqrCSk0XSF5wZ!bMINIQDc$-
zuLF>x<RiOPQ*KBBysiPpB>|quDB(_B!oQ^ep3ng9B*1+c0JnVx;64p-O%mXm41izn
zQdN30Rl+qIV0;pwZwA0|T|#dPpsxnFHVLrf#dIv4uOz_%7xIW>X~!UirLrWzvl#$`
zG{Ddlz_S|Qx+K8D41iG@;L9n1g&N@cBtTULfLjBUrvR!nKzR~ia0b964RBiuV6X=8
zBmoZoDZSCP8X%YgI0yrxzfVX4{3!$Adm7-UDS$s|05b`&1OSX^cTJT6l3)&TxL>E>
zzOCV=CE+XuH(<h)_`p)5<FGETGX*z6!_7#-4a%tCAr0_x3Sf{1n3)9F_s4V;3_$c}
z+@8-PPWOFEZT-zzNr2}w0PfkLx?-Oc!1Eg5jwHaN0HoUf$oEwVM^jaLR0G_d1gOpc
zxK39|7A5g2RcnCxNr2%Q0IM~?x)i{04e($RAo@ajqkGhXcR)WDyzvsEn4UoQhmrtm
zG5~IVOEvn66u=q{@a-f(FauyYDM4#E1rXE#4<`X;WB}~e0Pm#$W@v!LNq{e901SFY
zm2fVPcpJW?0Uk*L9D6>!(RXQp`%?hN2B=0qngrO80q~z1U{wlW0{~cLn}KL;66Slr
zaA`*4Wa+n~L}Xz0yllI|SAX<v@ya!-vYu>UeoumaB~hG$`Jz(J4Y(o&^A!yvLR1pw
z3mGt4`!`@g3g!zMMkKBz%!hx7<3W{kuP(=zg85L4uEI}>c$S1&lL2#<(zp&dpGUks
zYc!0=a!HtF88BCAn29NvWf~@&4s%lm%#9l6t0|b9G|Y;0m=VCFaP8ADwJDep8s<mo
zFo*v$9S8d~%+VCg;r<E-B6ud-vnm7Tl!obt@-&WvRT@U5)FjN(44Cst%rN*;3TCN>
z5s@|tGbICNnueK`f|;UWME*^}oSOmjn1*Rg!JMmML|9J39Qu7a4xZ64e@MX`5(|v*
zlOkCsVg8f>)23nGNWuI`!-yE3gn2Xrrbol%qKS^<?NJT$R65KgV8GPbW}N-2t~wGJ
z0xwK~R%lR>@sq_F8O2?pVXjHR7y`3o!>`g|cK=SH!mDN1J2cEUQZT#wQb1x=@COnz
zrG2F8#_rvNiB&<@3RS-&JmO9JWjfqKfeR)8WL5Bi2Ix)#B(z`JcT4A8fG72hDSP;)
zi>N!sp7t{cF`_=HYa)RB&ZEYOBMo*r%?!4A8gq*4n(;{Ed$5{~-6b1j(}&ND^^kbD
zoM~Zn=rmSs37ojde;pqF11G-bzskN5C!`eNd$cjXZX-u!(9T@oXz0&Is{r+F#JgBC
zu6#bQ0_(Hb`;7LL`Zi<}-{`Dswt|22|DrV;v#Lk)xfbXvoQ&wT=U}hdur7YmA!dI>
zMWbh@r8s0?t5P(iS=FPixKVYSAusSqyDV)<ODq=O|5CQm;W5Q)ATUilZr6i8&KIF8
z<5)&cabscKZp@^^Wsi`Ba`SIrI1P*1-Es|wQadK@t5WO1(AcsS_c_@668Dn71GAis
z;ox)EREHx_Hj*gEqgT@n9{jHOco_R42~zrj(3R?HYI_}ttyr5mH}?q1*doIwrET%P
zw%k2@%AYoCirY(JrOWA>?e=L|mf+((yogRxmPb<in%_v@;gEE~n75NgPH4>;&n%NT
zE}7CGNPb#BO-IcgC^D4t6#D`^izZw~tvq7v^XRpI5q%WZf1Vh$h;jX)*hz<ch20#9
zEz}J;F-01(-kLj+11akYE%*AxyAqxCv*-1uQSEzlSrGv3;Z0~y)9l~NYg><VKCd;<
zUAkznx%zBJts~!&<xq-g8>jCa;E@G;(ms+aJJ_S1TG%)^zqaMLvFd1`w{+1scId4n
zY({_P6qIOyu)7MT0g*n?dqvFwxi3r*Bm;`|_UY=-BkT@W6`HRkDq{W>MI?V5l^prH
z9O@n-*{9Wqz2%_4kRDxdb)curH<F<-s5U~)HU@f17wuzj@bv||7?AfftJ)u;<m9|7
zhkoG-c9AU4g6r|4Q!Lai2}9l4&yqrHEb%RetLp=He|j#ba=ysGp<dx>m@wQ@SRJau
zt>$8f)6+QH;jBY_kBa*}=x3~YEzt7?U*AB_myG&f5C8-o&2EtjN^)bB&yj_lq_J`x
znpStO?{eyUi5Z@s4G~256mQI>qs!|)(2308QTJ&s(ulP(xc4zc>X)KVk*=&2Xf(!j
zZkP5<6COm*3@?!8%zB*Wgnuo~S+A|?#f|p0^fFG$3@3xC!cGx-@SL5M!e{L|P)8d|
zxs(Oe)v!BTRp@CPjuA$4-EPjC%lXGvSlMVSSqpPW+)URomLHIuj9+dr0z2fbL{;cj
z7CEU3Z46PNQ@=W+LcfE-s0w-HIK!h?itap(zF?gZBi^)*+bhTp{&yvY2iDvw)%!6-
zSo{xj?tt@YEPB|%3$9018_XRjpW)npqeO3TOndZPl}*_DP4ZgYP6y7z%~8xn6RHwJ
zC*`9nq6FR{KBUX0A{yga4HIzKzn~}5k4$3iU*kuC_fhix6a79(-oL|pX8%lf@8ktP
z9%cLd=p}O4aQ6z_ixp1_jTt5Mdw~TJ?Yjz&ZDDLK_QHi+ZnAhc4ZN17s0LoG8hDv}
za&KOvH1OJXZg&_WIdQuEwp0s`#au}{8QHS7^={fvscCw{Hm;@834MH=|HZ7P#0ZQI
zP9JYlPm@+EyLaIO!}A+<r8ohq<VeQ&)Z>5q0`AGnNF3(?^k@I?X~+i+TqgBkvE#A4
zwsNIL)!bOwQ|N<*xNpbyTcjq#RpAl~OEZXbfj05`ChGXK3ZIwOf5d+EN1UPJsxHW0
z0pIG=uvPJgo<OXWtIT7~XViBi{OXMb*z9B^CS4$QMa{mt4prZ>GwZAGoaQvJGGvkK
z0_vn*TAwIJuj4%8O@oQ?w7o;rb7gQ^+0W#^*QjV_zXC@T+;N1<6fEl1u><MzvmuV0
z9(|d-WD9GwIu&Zvi8ySys^W;K0ufaj5gXGGah9Ba|H%Fkwp%6~%M4Zj_vt16r_+dg
z2EEFbpFuC(eko%y?ZBvel>PKLhvP-OMZW0dM5=!nCPd;4<?5%H_}G`l_}aeQMEnr1
z3gZ;HIYzvKS)kpVJ5{#n+G*l~^Su}gg>x(`I0gq)l@Z*P-5hbr7?_x^>@dzIKP*-r
z<k9pL5T4ZnXMggF-XE6pe0b_1NGr{u5O?lT`=(~NrTxvPK}|3_b-v47?O?Gg3hQW|
z$WL5|Ycv_bO@U3$z=;9AF5Y^I6mQ&8bI@2733OlPbLs7lK=&p7fuxUeBUGD*@~J`7
z)!`eqSgYlozsE{D0^NNV-9&t8hqdM>KURO|m~9b>RrVcTcLx`7pT+O{_=VeQ8coMm
zUruezk}kPUFMRXREx7yMO2)yiK=T%I3Uv3Y!IE*Sd3-1Ph?Ze)v6L$YD^i7bO=D_H
z#{4}!aH5}4-z_<&S~?)b|H<&bZ0{g#7$NPYqPIJvQ0R82N3}A_fsd0$Q~%1awWYL!
z9e7Wmr|+VhtydC_#r#A!R*w%a?#tutwTy}m+Pg8{UVQLLd;4v{pKgcIG}#GT47Sr|
z&x=lJGncj=^YyD@@-?cs)%LnL3X-@6879ckeklYQ&4LVefaX$SS&LHE4!H?6r1IAh
zKjrjSwDcMLKp8o9tOj2W-|*7r6%&l6f;{&TUq36>Y`M2!-`K~CKoRVY-(RE*4z%&4
z7gs8!&Bt1@eb;(|Td{x1#=xRfv6~dWtApF7ga=<B*<Rp2!U5ry>hN@DwR@}o-AUoa
z7dWE=WZ}=MW`eh*W8MmggF_UfvdxQ|E2C+u(|yEiR5g3UQ?rbwiCONhYT2m`Ps}DR
zTQaQTTnhfV11+c2eazY*qNsFZT)q^al?ML`23B!Ike<uIy{$E!>u^&Q;n(htsE(|5
zzzuJNYnj-p4lzEImHG+8SSB++9~PUqfqj^*bV%O{R}NKn?C$QGfqV?Z2%yUd2ov*$
zCv{Z@+aq>PthsdKb9ufmhbMHYufV=M?nyo4JdWYY8E6d3;dn#D1rPRrgy-b>HDAa7
zvlZCiWtHG#w$hjn&m1^kjUU71!LN8ZKE^%x7%zW{Cw|4t@iFd^v%1YZ@he`Ak8uw^
z#>@Bd#IbleKE|EKAZA#*)(mvkVvk?ze+l|l2O~zEEboI`DofghqSYnsRC<*9)-}%P
zFB2LI22N-wFfgB&>UKf8yqY=Ql1)>nXWk_0nU|5i(bMB;DDP?XoZxYy(Q}f=$wtp9
z9;X^Tr+J)i^z`!RZS=%=#2THR2F_1$+erW~?tzwW|61vNiTP%_$!$G><q$|H^_@Lg
zvL%nxNSCp6C(THs)20NSh|eU`3rR9<jCAJU62AFZB+vK7@T@Lwn~ALp9E47A@HoD6
z`amBp^1#PrY+ZsrG8mx`m%ei?Odj;9?317mv>8+ZeW1;t3g`oE230^GXfvn+`aqjO
z70?IT461-W&}L8t^l=!2vPHkAX%wdq=9@-c53LPuD|t=GW0kZYTSsN(TUS)jof>bA
zIRhK$O+uY@{-Z)2W2rc_FiYSHP)K?b6iTjl8fW%2%;;&Hd7@#)iN={H8)lqroO!BY
z#;L}cryFLRZk*ZMFr&9|W~^aGtnr+NLUjvGf-dVj?;vAi*{lRHZj=}NcgD?0yxeiI
z#T&jkE6Y;!8A5cm?$<_L{%6vLy?Mx-vxhR2b}qua+{>T>t$cF3sL=MaSyyIpuBurL
zE;#XX**+79VO_uu?XvK`$q|si$zJ7X)MHHD9iDye(Kv;<A$~U?@EbKQEIAhxXvR!8
z(j|!N1aYHlZpUzgCg3yqG*cK$s~8*>=2O`(%Y+qpOLlPg29KTU0utvDnNL-I*?QCo
zRb><km1R^`kkz_o%HY@B?EibUdy}t-p@T_lQa)|09NODZIjmQ1a15MqK88z)ZVq%O
z=`}6b(-#N*`wjnv+(OW0I(r!q3x;q~Upg%1m(W$)Fzq~E^NErlUXjiE@Q`6YW+GsL
z3@dB4Uq*?aX1{ZnbSzlhv6eha^NkE{Vc_#E`C*Gz-Y~}ShApQT1`2K~4W}CPU#8~s
zR+Kl4E^nCc<d?-Sn_mvUTz-A{$*|Fbm}7o2h#if?N}KCCeA6o%5hYr2@rH~6d+w-m
zW5{=;vSE{NTt<H;?Qk?ibm&TVhyO_QT-DuE_-MQt0|u6E9sXCyp7u8lD>s_naBuSG
zN{K7#;&a<Tqv=)>wlN(CyZxP{)$?<FJ<wY!ON2^UFUPF#W6p}&Yp-+ok8%h<x)Q@K
zaq9gYdaO)Uu_9yXhagh?8TCMkecC<Q<go8Lxk1O>wP@g^ux}hEu#Xvm4J5R}x9$tR
z75OL^spwewv9@gA)t>NU`&58jqH%n0>9HjJIaZZ}heen7Di>^Lv@ys#nx5HOMAiKV
z=@EC-+=Km+891=dEIEqzqtWwO-d{C832(S=pifVE${55w6oa^j!ga$7d^hv`0^iMi
zZ{eF>$vqT<xQD`Z1HF_fh-)YYF$;ZEYX&~#*P`Z!%^VZcmRps<-jeNLCT5nrg-^FU
z-$g1Ua0p(xCyk$Y8ViOta7_hP+ObwYJvEkBbE_b#WGhPmSDuB0m%)E#^)$@sX`FSU
zVa|!hStlFjoNSzRs$tHl##yHu=A3Sv)!Q(qw{cdiVNR^^+=dZgLb{X84c-3rD5b{5
z)mq8+V<#r!SMs}dah`05lso;Os{WzYt4bNus~pfA^QjvVlH<!l(o@9ujW*rf;<UHs
z12g<E_wlu5(HZu9Usm^fqZgyVQYUQ*+eBFBGjt9yjbr_@*IrG?XSz%@v&)Fl*XMVc
zme1`nV?M9T&|m+5-e0EDU$*}1{xW=M!|=5J()0hWzf=cL(PfGp36+Rw6GdJ;=1cjb
zszo8px$amq1G(ntx)sJM!&ueF9q|um1S*b&LhB+OszW%odl)Sd3b_nfazaG)H!Y7|
z^iSl`fmO<9gceHQK`32?P`WBpC?%t`@r8U0f7|Zf>Z>Sk;C4bWhUYKYf6=}91{h8A
z;dRaK?f$Ol1@POAzO#|b6@N<`58T`%EHV?m(cn<UIjQ_uuV{I^aVlB1iT*ufy_M2$
z>iNP7ZnT&yCddVG@K3PY;X5a-erfpKQo|vH4OYpUate1#ryI*cQ6izc4CmxkyN?)w
zrx;aYDjpD5@_5}czB8Ca%-d*+;KCm>Quwffo46PVHhhrpN#WrGC5&5_UA*UGjmJ1P
z0&T=qhiC4u4DN~?&W&;FmMmPZ!^(G@%M-qFzY3HK#*XjGIPV2J9sxHV;o_KU<%ll+
z7r=$q!H-+~2azuBsJTmy;sy@xvr2Ze(nE!M^DyMW*;md-kF&yXO@<XufECMCKq3ZC
zh80hM70XpXA_h)|6;FT_%T+)kF!ay33Mh)Gz7AIGb#G-QWz-GMMHGCkvSfF4a8q^3
zE)fOKSKo12aK(hi!6Paf1{Yc!PeYJFK0kp3_;wBplaig2RQ(nY6E1uu%YXPsBmDPv
z#eZ*C{P%Xne{Wa(_jbj9Z&&>HcHzGl$RX?$|Lt5xw&I^s(de86jqa1e!e7H2`hZo^
z!l5YY0JT0<)WXCXq3j^kdY5nHr(Thvokt{@yL}OhDKEm05MQRw>Tu1z%HWYmXD(9u
zZn?PWS{x6KbA>1EE#W#RLRod97${Bp>&l?l1&UtOTIdxKdi6rDYC2iq5cJ|EIGkmM
zy|TW_pqB@FS!wk0KrbteULwp{DSCOjj6r>6Sta7T6=;QC)$U!G;Tv_ggOu>3yvmXz
zB@u7<rrzq3qu@j@H{!v8{Y;@d6Y|J}#^G00Gz=fjnKG$zoPLF5Q(f|jRkD6kxL~wY
z84~)g%A2>MaZXRe%$~+MCmLp+Xq<DhVdlxkIj0(Co@$(Px?$$&#yPzWGkY87#2RMC
z8b>soqt{feXkOPde|AP#J*i>!S6+mO%kYVTZKggipiTI_C9mPa{<LXK!6bh6Imq%{
zjit`lRpYHejk|1-<-;?2Ev>M(+S)``t}fY(uyopLTd%&1sqcA94l*3+gah%?zvz5k
z7@jB0L^(u$OoXxQMJO+z)yCB0lftu051`n`_%>J0*BNUL=GKguz%^Y1_w(O7@JIzR
zQoj7J%-`=(dSBr=^ZrFE7E3GM+lK>GGkhcR`%%-0C^b8u*N;u-Uc)tkao7)WNJ&L5
zMyHF>DZ&y$6Wza=VbILAT1tdxP(DWcy~sDHhF3J+JdoAw&G`uMEK7Twk9`<lCEX%O
z5*FqarWQEOl0qGku#Bm1vq(DINx#qor9TOhqoh}j5^xJd9I+{kzN;~6hVNw*Z7`jm
zn(pW#JV?%F2(@hxw0le5Rl#pVT7}iqg4NSF92@53u5?DmKI9n-Srq9d4Y^nC735y^
zksx=p{I49n*R!^WED&sbcYUV*zH_O6s|>5^l0)9`w{ZY+lKfTuKCUi#&vM4%><tcT
z@RM85H2dRY>OKikt0O@xL#Wxmg(2j-P!?CBaZk=~jk5*6v(xa~T6s;(;dQnrw*utO
z=VbbSArG>`k)ASArW@Y%{3!Yj=F5VLh60W|aFfv1n)fYd1i8UrtEYQiTz+H~rPH%O
z_oA&;(;a74Q0i?p0zVR3i=N_g8Mo}3e`m6qw?xr}qGBItMC*m_MRYty-4?!CLNaf$
zd^lM%KrJU_($e~*8;?{5x2bh&X@@ZU%fd5`q$?^q(&vBp-%u6~!|0|7eZ>j%N#blG
zZ~%Q$hx88k-OWQ2`U;txWTKkz6i=9*OinUUO=#u`)04?bC^doZFDqFFhtgMMCG{1D
zYb~{2sCKtAEg5xla^-Bmy2_Gmj0&mo#ro^>^cBokLnky09%jjcf%%Ft!+bTI9I8vU
zpiUZwPDxe_3H~Va75t9%0{o8k0{o8k0{o8k0{o8k0{o8k0{rgW1ix#6-!%)r+rZ2u
zYNNWfYKp=Wv=FH@L8PZu=uI({NJ?R`1G<Qx4yB4S>NYAe%^un-3Z;{x_mgoZbb6i`
zZ+O;6mBHPbP8+4w7ZH+9C$*l2K56<srB6Dk=p(Zt^m&b3KdbK(`gr2>k;zEtBl<p}
zk4#2FpQNTCP9JBIKBDgvAp(5^%Xy>j6`7P~<%vRRQZFh-RoOsGUdg7w0p_Az>HVp3
z_`n8kR~3Cyi|CVtIu8MFi9YG(&(|l-Qu?G>N}n`K>62zDebOwYPnxClNwY+sgg+Lw
zmV-X%`PO$9k+HGtf&@K=@{-BM&D3O69iCk%lM&--nqrtc^<a|wNfSpv<MUC;8%+FZ
z{1y*byVSjCi=iL0k>!4C(Yc9&O4v-N!ORHmUB)sIS_BxTxqWK*W*7RVj>zGhn20qZ
zK+c#FUYys(B-N)YG5e{~de;c|Md`HNBTx!NOP2tq<JWLy!~61Jc`muA5r&Kcs>N^9
z14q|<D^V#uRiw2Z3z|ZdQ2Uv(j>vUxGKqzU52M;joKNvT!zJT1HENTiSoIo;V=|Ko
zEuto~*NnQS1(_x7sQ;CGIHA$or*}ewH?J3OF0e1>V;^)rN)lt)0!b*m?Owq+GS%@t
z8qNA}q%rjkrnN!nicVmP!xgh5CA}U?@?AGbz(B1PZ(MRZggiTuh!FG)$m1Q_YjLaR
zh=c}Cv~4o(oJY_TN=D0MKP*n8J@ksRVV66VUY7;Fh>TU<@Lj_`0%|DA_1@lB;*@zK
z?CpnSiT+p1kfcUGrRkAV3^5?c`Zz%%{`cr=qib$CqlHwjANPR^8BhhaeB>=T5hug{
z4DXwKY>-F1X#4f{a*BQ;Pc+;NBI4r2N6bWdC_QGYyPoH*kU4`%Y<Pk{PH4;-%9^rZ
zLPL)5RLkAL{N;w_Ee1=$t(7HPMcD^$)k^PCQ^Ezqg4-)gwxh{mPGdhcg0(_l<|B@T
zxm*1oA!qh=MsqkgJ4~tluI3VJc4`vk;V-H61}DEPe%btT_~r7GaJe}|DQvp^3H{&n
z<Yr(U6>>Og7vt!`WX0(Jl+MnWIm5o7cJWY&zlnGjlO<bgZqPecJ|5JaiCd<>6uxdS
zRt+Co!M{P3xa1iWM?q(FP{Q6wwzb+9uD^|IBiq?5Q`*%uc(tAtTb(18+7y$m(rUR9
zc^#9(lrXCG@I<tvas_O+mos631Klzm^cu@vlawOhweTWt*-N-G{&puTQ8xH?h+eUT
zGhaPkV`5Kr@Px%WQ>GcUow)37fmI8mQ!?Sm`s9@CGuBO&yxJldBp}N5$f=r0G)oj$
z#+v<EHIt2(jd{kmBF4*8vv?xH+<{nfC%^l*%rj~ZT)@A5h4YNBp<lj(*gN^%zcmrF
zUt%_SUZr7<X#PCoBj7$lB#NKx#RV8eA}L<qCk;Qw%lDn2s5g0@;`uficaSl^`wx)u
zJ5uBU$@uTY>?Vfa{Ra~<2PNho^Nc9fi|!<kUh>#rynG3d*VAghF9D!xzdo(@+@z-q
zQMLaY+0eEqOO$m=DQMK+CSAygazn)Y$P`{8^GxH>NHvG;F2iw@oY9MA$cMq#*^`|d
zGy8Tjl`rUvw?{L4o3rLUIn^7uSkND6M~&Up%@w6`1##qXwsfk%i5z2T49r=75#sQ7
zef3Dz0`C(<iQkG?;rmVnJN1>rYUNrHv9mF<*jEr}J{7UE<dvoC4#7DZhs1dL*tmc2
z9{$bV<4S8=j_kF)6U|Ws3Y;zwCGZYY_8+`^=4bSOKyO^ZznM?*@6H$acMoLM_~zq*
zE&O{3^hQ77ca-00ekTd^bn&k*AFcd0>(y*?m;ZUbK7-3$zQOQ=J)&<n`wvuO+`YZ#
z6VrXdSlS56yzUkwu!I*2cH<<o7|8mX;1X}efqissE>%9nRnAig6KtUyo=WKJxH8N_
zyQ4byRQg1g@$#fIdo~!WIhgF(Y)dD5_Rd6%>Km_7^OnCz=a%6R#~lZg>ttNZ0ctgq
zoZW}mmK6V>(CH`sZ**=ls_6Z{+OzMZlYN#Nh_KkoI$IL^IYv{K*;k}Bc6ZUg!;q3D
zr{c&S(|~6mRXakH9%DynfKUr}1HbxV+&;rx(w2`kN$1TNeF}Y3`f_epKE?HQ#S$%+
zI}=D@g`OzBgKvC&)`Haq?7vlDKv<^&#gyvFLu{Hw4(1Xz{B9DmPldQsH+(+tp4PhI
zwLB%aG?i@UOftq~s^!9jE$LsW@EXaVtt;;+tXi&SPbS{pl>X7w{$7<0QnwjiE%n5-
zRXKdMe<1d`;j#UH<4jsDU!hE2*E8D{!?%z5{A;wmGKP_d?04xmVvBr-r}o-nhi|48
zcvafz@J+SC&r7A%kga6?dW6I5+*tKUw}TzI&?C0k{VaM)jGDyv`Fr+H-xUj-s_q1R
z<c8!8Mb`MAOMW%Ya(j?VZn3zVt%SXosak&hxl}D5$A%LVLkyd}_@2kT{3tW<S5*}*
zLKotNzfpWQqEhiR1WV)>xr1n&6*-hUCH%}-!MJ#M&%?rojk{t~j`Dk65g&5|i%HfU
z2=T=;l;@)z_ETfT>90{I4yZU{KPmkC(OjB}O7-#4(q!>GP>xrR(Ly}&5LHyVk)X`|
zlVg;UgCFS7wb-xivA_9q!GzxeGvG6ZQzZhQvtu!E%eBn^Wt9e~D*1oY`FD^n{Sgzf
zG1RgCPN^dw<+bA+{;$zpvOQRAjdS?06C6{FsDSs_N9gC6gr4)oc+yq}r`-42ecyp1
zxaBQU#@bkNi6d;j?@(vF59H9!p&YIE7SE73R?*F7u+M*^u6JJPaQx9Jl{<up86BtO
zXYe&vBagbGT0dL3b=rKY+vD%A+R_O6{j<GTmyHwsUkGY@$NyQVZ7jWzJe484G~tV4
z2`in(UQ3tJ!5F5w>;V$NTcLe6cBh}`R5$nDB0a7sdDNuPN<~d|4|=$4&|+n@Z5K1f
zwygk5;oNI=YAZAtmjRI64CRB`TOO8#a?#XUN&75m<NdqzSm!-Vh{^fPToQA3#bPxD
zxM5<D<s9D|yiRPc6Z4(gf8|;(b8d%sZjUv0$7)6@vd{@K=Ry-BH*P4INWYrXB@QDu
z2@jm|->iZdkLKLiYL<;UiTXaa8l7hPcd#QBBIz^e!7h3($N#0uT(Eb__t6>qBWI@9
zhC>-3?`OytXXL-ST2&^!h>Y~Ba}t%U<Hg}vjp8V?y|L9OM^fzxyzin;GL~!O5!fp@
z&2yT?a-n}^@e8n4SL%KcNjvx0>XAB&c`H`WOk@@>kLxzl%UgZZ8Q+DO-_yu&wXzJ)
zwV(P9o*Tz!#$y=@dy~rPh6z(;p`j|Z+R^23Tv%EBqcTS=g{paqs%Fq*wfGE6qqp*}
z${MQ}cUG`52KZik++NDPgAcX+3jOXWD0Xc6$$0Y~jErqjA~@TnK<p^={aRv<FBL;5
zZ_M2_=RQZV(|=uS?q=S^vixfg$pks}k9=|l=uhf_e>jf_4moHF?PtHIxLSYi?rR`c
z{zNw&D0@V{jJiJ%z@0u13Z$Hwk#kKl8{Up<M7bCwbp9<w0m8jNMqRB+J!Fy&(p$)1
z$Ni7o5`xdUyF_D<(q}5y`(n6%)=uG~{(cj%aJw+jUFN&OQ~QY1;Tt381es+*L%P38
z`RFsUhaBQfXIzlX*#A<ruh)5yq})cNUDTzSQzyn7{@#>*a-eMHImNWCexU3SgvZ+7
z=KK#A%U}j!+j8oSB~iGnj!>aCBnM$^ui$%8uB{fN)xacVc3GyHTPc1B`>4^Q9F!!=
zQ~p}g_+#z0SgpzO@0h$<;W4dvy>#Ne5cQiWR{<BhGWtvHwZoF>he`T3d?QUKt50qK
zZx$E0o8gNKf86;RwXoV`aLHYf`h{i<N^902nx(A!%Whos&ztqr@9Ji;OjQ#SK6tWI
z&06uV+ryoZ>wI_q_uE7I*L<a)(;jgD#zLv&kUQwSO~0fvp4HM+`?WB9Jkh?gpO8om
zQRN$Y7h@&J{RBPYwQu1t3~af@m9~~P2j8%3&I9`OMwtc5YINRPc{D}HyL6*Ak+r`m
zxrcJE8-}-t&}gm)zzI!}8R<Tsbz>hCD6RR?VB)H&TA9BYwllzaXC1EJ<a;`YZKffm
z`X?!*Ezg5LlqvqKUayeNSpJgzf3=wPk%{nh<u06+^-d!<3E|(Pe4s?QARJ`eEfwBM
zg~jSA@;K+Wrk6G^&*Pe1-$=%eo2LCxr@Tp0?pIBdv_Bwi>26h+a{Q$FXj*y6`$wjU
z)9EZ_X;rOr8Ywx6Xr^5SO(v(b`YZEq#e%<13sB3$+tS;o_%n9J&8y2kH-7b(2u|aN
ztG|``4LtLk7<#EhGQY_bi3lM9^Hy+iXhhTqaP21vqe<1lc4k>7)LOnBs{{N?&)=Q-
zt>taSKFh^VRPf{AX%Rn*ph*!)h0Q$&XTqVDuxK6hp8W%Y<W4oW@|Z7j1Tf<`$&<fE
zs%F>!o%p$d2Hwp9{6h#sJTXIo6%xYxXrRrxr@cnyNrd%kqw>&S1#q<_)+d>8H80(H
zoyhW6k=kD>C-Go1R`^j^k*~N=7j-U0A<5)>g3DBKyu*r?NkOcs1MAD<%C)b8l5)YH
zM`3UEKoWeGUQMC{P>Yk!Kf+p9OvBaC+&36eOm_>0%Z3ok{@VR{N*}zf1w-V#ky{%I
z@>GvGZ-MHim-y&0^cp!X0hnFLpiA(>TQmcnrfxP1UeZ>P!zz#gUExxmqQhgyJ&{^0
z+|GdKTt|LuMIn-IMNwPDC~7AI;V>n6Ik4C?jc;|%F4$?mzzA0Zr%6r(unJ3xK0{Rw
zSdjw;Gx*Sc^IYMRgmM!!#@6enA(jb()P2{r4;MOoLzQ8=zMU<tzjH1HS^D83b8f{b
z(}|6LTTMx5IwH%%Iq_Z^Bw}tmOh|;T1-CK&YXHC`1Rxn)mo-j2D`$&3ht+QZL2YN*
zS8o=sGr7ok`9_!N#G-F{QGN4+MzkDz&X+mA=j@i&OSO!;he21oYN0C`@0Y12**P>w
z8X1p@KW<xh%k*iYcw43Kx%-M#|6f$MrcmX%vj#MM0NeP(IXc(q*))T@0V-%lMN#>m
zozbk)56T;CFqeOuT7(xpph({UWYdd^^l%J^eSU**i!}VzK33?!v-sS{a#%h+*k5(U
z(qs5W6%n{n$bG_``)w7O7Z-fZ(6>Q(jjPka^wv1BFoVCSD^=3$Nwq&$%+^kVo5k~)
z9Yi2xm4@%M#O$D?;jwCV7%454*<n3$ApPnb<rjKD1%L>3>d2@)6C^-UulvY?{xSfh
z&D#6wB_sprT50rN8eJXivM*Vl!5`!DhFZl-m98M|Ka(hmKo-mFOXWBY(=O*#kJ|6E
zwp3Dq1m@eP8BZc2zWWxz!ASHLviy(x|EpGxhd8R0dxoi2UWs32B*Cu&m~7<&K|n?;
z@f@do%vO<%R(^#{lC7*-CPNx}sr8PuE<91qf3**adK>e^V;HF~yuy^@hb?LJ7ZYT6
zcVqrSu9Kb?!(CmV_i{h>W}N=K+*pY+80|ARuhnr9rgDd<WlnAv$q}^&=r|^K@qAm!
ziM|{bp9Z@F<8!vDv6N+ezd63PO7CxJ`V@8F9Z00(8FgZ7WrnUcUY;^;Ug(CajF&m4
z^ZX`JFU9p~$3@Y?@u)>bu>Py7!<SOw#n*9et5q0Q(qCX;@#S4kS?&(M+;rooav~NN
zZ<_8-{~_}7&6hQ=%PjdcxqkEnu0=Snt`-to&JU{*_(s`_YK5BX?EB>zHknx>{z@#m
zy(2%ac49K(9T!8hYL09d6)AUc1Y2VwjtZwYUM{Ed?UwUz)$aHGPt!ReTjEX=(T;uh
zd(Qtu-J5_%U0whG2}vNVpP-;|!zfXMN*fd`5k?aUFar~fii&l^ViDIO%n%gBz$C&j
zj#9VU+V*R!RodFttq4{_0+Il3;J$<VXAGMntHS(V?|VP91Z|)Gp8xlG{{Me`WM=Mq
z?z!ild+s^so}1&ILc{L5IcnWE{W>$9&oZk`oj6R`Z9UXr<(KpPyjF-}W3*1UXDAq5
zWP=kvBN*c`m+0)R{^*(LqNZ<MG8Ftp-@;;Gwp&gG;RF4HHjTfG7qFSEw&vmFI|Z<Y
zHoa1lG(6(FYfaA=s^`I`XNA<;6Jf!m<sr!Om$ldAOp2X-+GDwqWan@`uFywCl7->!
z&eN|Fc$s$VdPkDoLj}0vh70R&^yTo>p*@0o^k}Xpqdrs6Y`gG+@n=i5A!c?r?dxmT
zC6o0JmEKLpzACf*Vp%IKGCSik31oI7t}z=i`+K2|9PjEWky|VFCNst4IsLgUrHvRZ
zg?dg;LkE6^KB0ojX?pO-<}IQpX=GxfXBZ96ya!A|C$nGM<6{Ot+M_bRCM}F4yK&IO
z$C6-^`H^HNb1dbC6;76BeJ0-HGGX@mGdn}_7W3i=`10&SRq4x)?Kc}*^v6d)hHr;q
zFx5vueZ0WDVi|DR$KaP9%{P02OkOAG!1A}K+DfrDWSuSU>zmDy@;EW8RWaq+%|d2A
zLkv4_hc16lXKuwU2r$9Yd|c<-;4^;|eOk_Y`dqtvWWg`~j*!OFhLVd0N(RH!$YTA_
zeVi^eLv8K_w)2CvRxGzp+$|W=oV#F1uaq)&INgZ}zH48lb^m2lQNEwsRq{L^O^poJ
zu97oZLfXzUGhq0iJ$vuq`0YMAi+;Q%JPxZicR<X_&Bk422i&!~t2PNgEY?sd`b6%(
z@*Bn3v7*u2bYL-Z9EVvs__BhZjf1+0bmZ;>f1nQ5)K}v`%%+Yswgmv1;m^QV9*Cmw
z-skqng6|w`p&oohblmz3OEPEaNgZdl%{@K?-HC#3&&CXNo1-b{%G1#G6Lc#PQ``w6
zrFh?enq_}@X;VMp5mdv;zjE`yGoYVHfyb^%fv#yB1Fy>9w^rwIdPMDTck_k3CvM`k
zzsY6FU7hNB`0s;XtYqY|!E>H@j78^eq^y6UxTMf<rh(OvyfgA`gGRG=XWCEPPm%rS
zvxW!~$OMn6EiZD!KDU!;%j`Sd^s?-Z5YHa>^R}Ec|MK*Irv3vR>Mw6MySw(S|4^U%
zudlfm7?iW;edyjst--mO`>v_|ma~W%?|0-Y;Kf8p-@VLq5x%hsmKvH?4f*`d1A%2S
z1Gh&uz8I6>cD_k7V>TM6@qb9c@RsLMDklsyG=q}nh`3a2&@A!Rk<ndlKRN&-R`v1~
zjGZ_o&PW^_yg(Wd=ZRMvCjT}cP480KgiAlzChk_^2PE3nlrvjU`1V|7YAjc#t`T?^
z^?<$k;J$09$k&D;&b|Id$f_%}mFAyW=QmD0FIPohhtS!;%;|e60SI!O>Q#oaM@Bd3
zMd^!xnj!E67wQlV+*@@0&1gdXtL=L7T?ROkfvcmRBZqkwg&%ciK$y5cu<Xp}mkdMO
z4<1h;#}nl|!pazVJIjV|X8o+`qak9X@p`wqC4&)z==HA4Ii_kcn)lH3nu;$R6Z=zk
zkzzGO@oP)5ZhQBAj6Oz^K%w5-I?|};9#G2&UNZAP&i7!&M|B0kie1x6Q~joTlhq{3
z^nK}u=o6mvnC`D4%t_-u=pa22g8-oG-f59ShhqSdSOYXx2oVtUzlt!t-oP>)fb7k(
zS{!IQc%}W1(vI|H-aAb;^L{eZ-=v3W+7FXc2+j;gkonn_Ci4kpe$5yx#k|balex-f
z{zI7(<l3>KXWf6Y`+sC(CEoC*B?j@5B+r)VDL&+Nx?LvI7@TF_E<+47`4Xudc%GAv
zENgNY>A2D&)I&=>W14IPe$HA<&IHA-p_)+_NOB+w0BGsBKS*rny+kzvZ)Tr*$4zk1
zPxX;$8MY0feW~_c4jjw{4=(#IwO}19uvo*#GzkoG;pcf|%x-p9^NrF+n=!eMBFAa^
zO5CZUGw=9sf@QFE6!^NN_Py#!7$Mb~8JRVHEQ7r1^eVDo%d7av+WXFdXW&m0_k9lb
z6AcL~FA@fY8|{ehGJzmZNOtVTYusL%_dINd-->_P;4ws3_=aOzFm|$D+v^NM3pGnK
zCvS&j*1Hde+1h531XlLA=P^hqI9!x@X5M-X#V#sj8ZH1NRepI*GHE!!;>sRH9ryj|
zV(*#$JDdk5+aMp3^h=<O=eQ@Wi%`-;KpY?KFvNkWQsiDHY;SRUz*02L{$TV(!Xu<C
zj_}XYT5E}<lqRu1=PAZCGpJ*Pte8;86{vuy3*m$?QZh@m3$1;Txb*@7et+?c#+G=T
znon6azc5ezq<GL8&T!0kE0DGyp_M<u@&H9@s>a60S}aD6ta=L7zYZ5yOQ12dV>B$F
zc0MLtOFKW}8*LqKhU`v3+^U|x0iv0lwcw$eE(Iy7>2(EEqjrqZ)lyB=*rI(WlLgh>
z$wwB|96qBX{!{W&iBYyO`#g^aHtaeR77Y=6{tc*SG^`v&hqtfUoE(fwjY33-DB28k
zBvvjev0L!>?dP2L*b&%I?MY64K*gAv2upJNQGEI*axv%Hp<EPAw&Qh_;vc#dwZd7P
z7uj`8=xFKp7vxh`U<4}?(>Bb{^(Q%YwZG-{SG~XlI^cbZ`WAf9xcd1{Rpi^Q{H{An
z`Zn$x#q;aq)m3Y4!TD?Im!tgQsrhbGKd}<DIsE=8JQyWSZe;iI;S&NA58`HISsjmD
ztqbIxSrk79<=#Q<yjFWAiodf7sd0WG4nm!(W4Y!X`<pm8bCUMwei+$vQ@AJNDM^pV
zS==kK``U0%XYrXder=_5cN=HyByfIQ75VO_D&C3*-fqsF%UQ|m7xcK4xC=9SIg1mI
zPmAoG<}|KGD8xK<X=UM_szSm#a8Ccl>@ks+g^}Hb&b;54+`eaN>L>PCfZ{*-QD9=P
z$_4}tIeSNTmpSuvPnvs|fxPpJ;sHj|i}xrK6A>(gpUCcGoQ6kD(TPsOa`WX1r?HBL
zQw`-)-!#;dI{P)$E%6A6BO7>|u$QfZ!!>>Gq9JrOfxJ;(p9yUurrhULplMrfsDUlJ
z+)1PeGvP1|)m<EjoiCLfcf>uw|3bcLhkz8;KoYEQoyhZhlgZYcV-g?oX|nM~$#hfx
zy>8@*)nPtky%TbdGtgV8vM`oNLFF`#PZdfsp#7lhmiV=4b`)gdhfE+B9xGrSLA&lW
z9xpC8Ka2-}msr@EGs94{x9KfqZF91dY-FRPh>5_zHoKkJwnpL|+==y(cn_O+AeP;a
zEQ1K+-9-vcb;|-aO=hK`U1&#mpZ^qJ2CZILjZhMQlkY~!6|C4it;_s_3;+wp6|%Nk
zIPSlf_$LKAHgDIT6S<g<7v#*(O@s{uSqmc8HF3YG#^Q*NF{6(i;4D7*@otfQ;ZUc?
zx$!1#Q#nhZ)f+4;kA<K``?3kGr5PXFfx_DT22R}tY@l(;Qk$naKx|ZW_%y)Si!zXZ
zRGqsU^za~DV(GWSPzDzN#BKnv4mW54Kqq)?`<<Yk{5U<@qcjD_Qk;cYa30QYN8?$I
z_E$}|B_`2iYBuYp29hf^t4tum9<y03U$p(TKkrN+%GJ>8*$7|zg#-iNk(V1TeG4B=
zzojcHS52R4GH*GD(%n@0Pqy?&s=oU=67IGM)2QdVN{EnvF2W!6{h{(2Jzr|^v%kIq
zHOwly2}Jj>84hv=VEJ$c;CJH;7*|^B47j*-f-~TX(kae>M@#D|=L~qtC~BMm&z3&G
z?;s92WqReUMBX}ZeFQ1toGH%7A*Z3VnL_?}L+M(5@_PxNJTWV3E4N=C;{(#A)ZL}}
zJ_t$cO&8%v?j(Y(gMKDUKA5vnhY0(GyTWIwd7ZhqqTIz9&`_$|oQBgqNh9vb#xH7x
zdlBeEGaHN#T<yb1-7GcW2~`WuE9d5afA+^+pw!H7(?aSaa_)gq`0n7lbH{oCHPI#e
z#G<NamA^#)3~>Duw^H?Es-nhy$8DoZbVY9aoNsNtM@&8ByIJ`~RRi8nA6eYZ$hWy~
z+opVK>gHR_86t5$-_17zg*h!tzHpzTLRN9-wt*@bvjY!AM;AF|cHnaIM`Y>`NLA0~
zROaA@#y?0+f3@m@CjHE``*Ye&=s5Tv+Is6Uow!o%OtkH|?|q^J?(=RQL?`1e=sZc{
zRAJSEk&0<|JcxOSnx;deU2t(X%ZaY!0rJxcG}$d7(l9lG6St3v4?nr;Il;C>|5OvD
z$)I+g`sS?nk$a7)T%Eh!)_3msrJ$b3J2mUMU|cm|Q#I!qE0FAVZ{G=iU5fyp8oYUu
zWY@N<wVVo84Z9|%e#W4jP;d87w;5>=@nyaH>!87myABS9t@&q=3A{6et#3WrQ+&vX
zcCjLasR0$n=Wci5Vb1d+#STK5@2N{!ikHxnyXzV)MZ5m~zqj5@Tmdb)KWMzECAS~E
z|0~QP{(r*~5YJfww;7h;wr<Y?t%H8=To5V(3PgE}?%V$rFvY`yGNr}FvM;wxtJ|NK
zZ4hYxmX{SNH-;1(8wNpQ&l`ZJiX8_JNx9z|(Rr9ZZ)TCZ>^X*&CxLl{>5(U%rX9r;
zXh6oTp!Y>)H~2drO=m!1?seBvgNJ~AYSQmjy7iuHanB)ZQday=5K~AMuEJr{LCuak
zZDe=1P!F@o6*-L`lUcdo+0sfrjfmSV)K%=V@m&&RFtEQqFxjs~_VjZiszSibdzM$0
z?5V8ZsqmX$4cZjhmm5BmnqlJ#X<oo27Rj_Ot(^x3lKG`&uOY#w<z4RC(vU8va*<@`
zS7V~%zAIAfP5S$kJ+{3%>Q)cfHTzVB@M!r6*N)vsRL-AO+8y5611s)YUdzfGw4<8M
zric63QeOO8K&H_N0a(L1v^$dPlvN`u_KtLF_VN}vI*aV{B@c7HSd<gKQqdiQ6<dKa
zhr7@DrM$hVcY28M7HzrZ-S)tK4W#vlUj5@z^?Nn0exf|o>4}p03}e4$UTg0ZT~$@M
zJA8or9c7>F(wb{<l3a-tf4cF4ybozF>-?)yM=lume4*1sDA;aKme1<+<OMme%|FBx
z=7K1)2(Tts-SS4%g;X@f9r;c^wBdgCbNEjEm4;ZN7n1P05`JM5eyM~NO1RV}+^2*m
zlu%<69#p~~lrY>Tyr_hQN;rXp%yE5T<@{ZqvAk<tL8NVMdzn)H$C&&Il=A2HgOsvN
zDQ&AGyE-qCea;!NOO}y4O)%UJ^TW&Oiqq4s5E<1FBBwJGVJ^SQA61sqbbeA1Cpn~X
z*6CW;E3$lTdqtYQltu4k71SLVS^nqteqt9HfRzs{CciMUe05|`*Xj3X*{A=XjHl>7
zjprb5JUnTx@!%o&gTrw&oKri(`+qkcr^9&8GwyW8o5S6Jv??C~g~Dz4o7fbqF2LjU
z1;0P#njCm1b_(BJe0A?VA0<!3cRk<naAp$>^G#gJDJu9WU%BGfS}<9;{>V%si8cJu
zFg-M{BZ}4DnPhqGjo_<0O6|ptIFj#lQOpMo9ioo`y!q-EToGR(zjH#zD)Oc7(6q|J
zdAv5{MW5l`T%tXAfS)IX>7)G+fAolTL#R(-r+2~2d~Gdwi+`n3E4D5VKfS;($xrxb
z8Vh_z(>>BA{#}WF60LlFkcisQ8~XikHG<n{%NxNQzPb(P8H`-4E(}H{^3B7DpRWcZ
z=PTE^q-6L-tcxDEO3UBI4^$nTKv}PY627|q)j{lt?*<zbZRZ0N?a+r%bc_@t5#MM0
z&IuKFKoL$q>)l3t%-atDDbK2gMaojXwiYZ`6_A2As_`%46cQWiALhG@^iWrSY)WCm
zPh~ReK}XhmO&F#8o>2dWE&`O8vlP3CSYH5<ADz&U<7Y@Ehfv6_hj4i&!XV0fspNF?
zMQ34M9wS1pTx0%1`$knqjkJ8xL63%}sZtu<q0%&A5_{X7l`p}}Y3NB)c&~=emlQUx
zv>4QlB{YB}y~==7{g(fPai!&B<JEtI!gUsn59ZxHV|Kh+f+@$rN|ha|+_K0=`5fyA
z>b}!Gk>da-wLtQfAa5@dX#!a-nUKjvO2Bd;?Z`hQ|CRZpnQIMSNIhnO#%v~#rIG=>
zO9k$Kg3%v85!W~=_}x}*>puvibei)SK?fM?fHCcxzu;q3uj~bp=lxk5K?=WF=cUL-
z0GD4vbTISx9dAvRmM~6q-OOd`i=-E=(Kp6v@t~g9bTHH46mK!lANgQ50!>PNC(6b^
zn>WdDg4+?)LEXTIsgI>%Vda9Ga|j^xN=C2NwXS=lWo`Q?yWT42@2Ou7gG1p~P<KG2
z<<IQ{JMe3h55rczGw)>}h_tYwr`K0xx8ux{DogJ<CR?N|JTjZ#2U@;>-!4~>^5{#I
z3o0lBIB(5=t$qbN$hOG7u5~@ca)08{fA;S6L;FW%ecLjtQ!l8q`$y5#`<=xjbJ}|t
z{=wH=;%TE^53z->LgKn*quC69j4H1+aRg-CVSLX(-@s1%ip_2%ag0&Lm}s)?T#YmP
zUvg#jf{~nQn^t9`uuv!*!TG*={__8bKa=m_ukHzhyAJ(p3%%bTugM*>Lyy=p=lP3q
z;KcD4=aQGI7cdybEnVv&?Au6E^9RCCvG7&TZ%*&_KZLKU`sQVbi3BxI;fwv2S2Vj}
zxiW7uU=a69I*MT;t4h8uR2uws3x8vk?qnDbyNZ?0+`pr}lYNg@3DHTrcdTdGM4m6p
zqWfB1`NPVIrUVBj>De-Li(ig=@y|rMo(Jtgrc~eAd^BN9^EQL7mz&MNp4awYKX1r<
zX{;hR4HgIUrg20dro-`~dPFm+LX6FA0t(<_7?f-gCAg37MYM!Z5ptiE#8$?4j;t&^
zy%{=SPm|P2?)pXMEX`A-ayS)Ve~treAj*>kd5i=f%kEqrMYp_rpwl>5i?4B0&>6W&
zCGRpNL7s}%QuIKtXo=H!97UbQ-KZv%7V8Xv05sKUJW*}5;l;@Zf%U0`CT;PQ+{nJE
z;T~i_tvQP3N$mq7dj_f^3$e~=EL6pKEmLtI<vT_8)rGmVx|$XUUzSyLhST^p^(l(i
ziHhb$_RTOw`E83%@mfWh&C7Bs%1N7|wfT`f(@aq~7T2Kto$c`b1&p6Ty{N(r{5LWK
z!PPlMVPV-1Pn28km78kH?U#S5)7b3gpJVblXy|RyJdj6-CG{iwq|-FC{Xn}O(>*v%
zGjiR%oaQjlB<MI;v0>WrqD$s~cxj6NI*SWwC48vS>8i}sP3=7qZ?8_59JE?Bt8vpo
z-YNITFX&(oBfiVD?kH6Ad$ABTJlzg6?R?f=r1hF?R-?N}v@LY?SL~ja-<sb`k6Hwy
z{kaF#afPrt`kiGYqp$dAI+59}S~8G--J(muWZaiyb2lsZM<%yKxnEymaz8%P)NNNT
z*ZgwYeN=hLChk+>$x19EafwarT}$FAN+igt??Ibbs>BKst^9SbZTnW68B#NHP^(%}
z!oThen>*R&p08Xkf~0bPYIDcf+-=IOG`Xj$?r@uXqRl;2xo2eR_9J)F!pv@h{WR7k
zR#}*Je<|j0U~zK#SM@+ds?4eNlK@5JY;{_ydK{yyWV26qPNw0NkNzv}i_r;6uPy3p
zqUlT7?h_ZQFVs#IKW@+HW05zv%BME&JNg9Fk?+Y{>rGil4`gGI$eR}%Ux@n#o!8SA
z6z>(^c09$s!imojIV?ZM9|z|Ol?+`Q-35HfTPk)rjW?4}72RFck~o+r(0kw!7cY2R
zE$kq`S1;Yj&<KVj+O5K&a9@b$xh?;zm{Axy%pVgd;cXnh{B!n6)-yeXSF}UxwME=?
zDZX$8Lpafn+|0pU>XBlMe91*veOsywWA18;+%4qXP7(KhGPCw`!*9>!a;Y8)b%0R2
zRaXAk_~H8oZEEkAf^XZx@Bhd7IF`34m*DZtM`Sf#;LhXSLwzHAyUt$g{&h7tS=s)9
zhfj15fjO9J&<Q-mLXr289N<8v9X}e&c+YtQ5UM@=#|R=8`FNi{s!xK=GWYd4VK+5D
zu`a_p>lm1-Z}*RJ=Stv;`A!?yjx64G1vk`l4HfyFQM&?IpbG;CVOI8%z!D$Q>1qkq
zI9|JZ-qIF;wj%CJmVF-|vBSB{{K-D20S#u(q=hxwM+0JqsIHuO50V(WrZ8M1gGO?>
zCxbVx!HL;<ZaG_ub$j8Bal#MW`bSC|b)X&*CL+OGQwtMn0g}|!;s;REo2JDhbpqgR
z8B}^WHbzf`M_*hBH5`U1+i9E*j=519fLOq+bKCj)4Qni7UA)W1Qd-lVEmS8K=x7u7
ztv}MNnx7NgtZ6d~%$<9o;WuqIeX`OYCf%@>Cv4(GCCcDuSj+Eh;xr}xl|;*0hS5wG
z{nQ4r6zCcA=4TjKPUwajkU#)OZdwokZ0Z$b<2?@auS+Oq$0hz49*Q__Zpe@I=~VGG
zZ(%Dy*Bat4PJ6qZa0!yng7LGH6<^n_XwTzXJ1!&@EvDVNL)5~_a1)^iqmXcuS}Bxl
zf1M$3>}Q5tJ{f?&)R6BOZ-)H3O)pmZ=Sr_5-J`z_^2ktW+w)p;QOmP=I1Mks1{7{;
zC7<#9H#EQ@w>>fXS5?uqgEsA0p382E_)*+`jQO~#-#v+S7ezm4*;G8}gR3^_$5qYx
zcaUq8cmeKphImhWZ2$vD{Nl}p;(u3--DPUMq*^as^-@-?Rac#0vIs&^JScJ1>g+6}
zA#P_i?+Y&r$OCM9lxT`Q2esHjz%*D9?42qc#O82=-^r;O)Jn9KonvD8NL#w?LkP|d
zEIJNfL`G&@JipFOml7-j=Gt<4y~fV#aq_s*!8jt*N?ny7zv!xp*TX}J=T}m(*33=#
zge-bs8-AlWFf8<jZaokatYXd7jfqC2oAi3W4z7%x75-EEA?&x`=Qi6GCRfzNDynw0
z<%WApZ3)K-Mt5xuIFEM?^}*L*_6x!2OYX@lwTu(V745G|ER1d@T&0BGl2AA}3_p<2
z^+8<k&5+h{^*+TvesDam{-^QGdZXAsZ1|hFaM|H(N<0Y-rP}kyu9MUlex#~pbB}+K
zk+XG#)AHNL#KpLonXJ#rug{tDt$sQYS+A;EK0mm+>o!z8{4nOe=-YN%dE04xl(fLo
zU94`&kOWvXG<R0~L>?V~iRX4og7IU%?hn&SpVtB$7vWEm>A7oq7{2oZ^+bJLZ#?#d
zG8{~>FKc^&!(u_ig)L}}ireIdq!-_en(DW->TVgaJsFs@SGS3(%$=edXY}@V1qjIW
z2Va$S5$%NZ0r4SY0%$THel?j?WXpi0ad9$0%7$xo8m?v8aGBT*oY#48T_yrqJ!8hl
zu1{$O3pKfm3iIo8qL&m>VDK>@I2j$;qdIzC5wTn#*z@}M<2M%vk6PpA{9c@pKm`=!
zXkMnqiscGa*qX;f6*bWc>&BSv-bbjaxbOMVhH;$WXA`}Vv1y;J&v9$V8TRq8%{@xF
zGs(s4SZ-}6k!$3WTWx-C<<BBtI*Yz!8h+M+zW@~a0^^zd9g6!-&fHH#5oXWu)(6i)
zm3F{){Q9JS-6m~AMFAWyZk2dlqfno(idJHSnbUS6bmZ6Y8hFcRY<0u?1#>s+z63LS
zx~XT1>q@C<0CrBj{89o$=9kGpjS@1y_zZ#$mN{km)jJ=gZ3;v;hvx=qpM9r>_(*d(
z=p_?!<LDqi*bX>lH{3;54YJX#k9ZwelS95<M(}zm<Oz#ZABBqfTFFjz?O{Zw-y4X2
zFm1Ru%D0_ci`0gOyG{rNT23Z@?c~xsZ05YnH*+bZfmc(i+XGRed|!K5CmNbo;f<Nf
zpQ*B1iFvKa|MQ1i(XlU3@j)n<fk6U5jcWfs1p{afq84EX63Vhh#Sag4g*6p>xxh`l
z2eW0rg87I2`TIwP%3?!u4aC7PdlKpvcf;9+-j1<ZSEaJ#J-*Urb+cJ#rm{qbf3aCx
z4mJ&*oXP@F_YRx&y3Hz1W!a5<s?B`bW_~-t14LHw3&`^5(fF^~OC@pvKieAdzN-7V
zR=*Xk7S*v~zUuz*WijM7KR4=h<F4gnaFukg{S9M^4GTtxnVW=6oIBtZEz@aMG?bV-
z*n%*CcA6Zv3Jc$>wFVmp+stE>SwyDpoIOoeYCqwb85|nY{7H94vwwT=zl>W0Y6il;
zX)p0d-cEAOlGV$tMJR0=UimW#H{A!;hj_5$*NXqXBUEr^H1sVupdv+v<H<1O-AI^G
zZIJywA5EoHfnm^$dn9mab!5R)%lksP)35VK>x$Y&%eaIW#%@XcQ!n7ThVVH=?&DJB
zy6FL8E={ktx3aVAkE~cV4K2eNxEpT`6^)E8AIW92>gedgG12O`u!(kB)Fs>#hCiJG
zYdPoCy*z$Jb(A}0zSl<>7%vcvV+@R7FUOsER!8}b15kBrG>=U-PajY<0GEK#g`s|E
zW!rFQ(s?-6XBAxQ6~d^r#4ex{;;EXuuVLG~qvn61VMSZqBTd^K#;>-Gcu3^u7_rGQ
zU3SRF3`bZrR5ok;bM{n$pQ(Kncx}#Z^Svh_%Y$4weP@MYx9b0n{FJ~W#?`bsT3y;l
zLRg92+qUa?5Fax4Fi?QPcjq6Z2t?pN4QN^EdwGoW8sJB-dgw4=Y2eG><s$SlUU@Bl
z0gG|BZIH;_hOX{lo;eL2jGxvgi3JFP9pNXxE|+qVeZkpB$4C860b=L9D|}4MkN9)n
zPHxa{R1&KloS4PvBKwXB9Us{@Dm07=PGf-fOg?slG9z+c#3Y%N1R{h74W}1?buR~^
z#L48tn!QSs#i>J=N((hXp2%##d)=jMgXS8Cl%NdpC#MPv@4AD)f<L!i?@U#<gMq8g
zkPnE48z1rz-{ls+cfFRYHsZ*PvK*(OgzkdT?Fwl_Nb<JCCYV`dUyaiMyI?TqI*qrc
z05(I2)t2MLi7}vrM*7Ifi}rO5AtbnI;8Afybr6hAMxVDyH_AQWf6M}Pzx$oI2p;q+
zJzxF!5iL~~wi_ORZ{%)bBAx0@0F{FMJT7YDN$yqwy8)CjkErNmy7Q&F1F%&2qZzB)
zhr)vA@KgbRY(`=Gq*R&14&;svLW#TaLtE~fy(z?S?7(<ta1<+Q$1~1|EtOzwSJKGC
z+(yGQmT^l5{is*}sZ9MDIC$-Fw~4()dArC?z(QTEjjnNLdF2~2<%@`?_VMtk!@Ztn
z*bANSXfgK^4=0MDg~883%(k|@k|7TAn&SGM8R{TDkrJ|-2~8t*h?g+lcs*X+5v;v5
zq@!+>P@UUq7|;73faLhVPkv(Y`+@Q4F&L-?1AAcX+e1H1fiX^C{6#fV^z%2$r*eJD
zy^UPjrOAa}lMAwn3{jD*D59ylfsAxRSE%FfDdM_O9*?oF-Ot{`w7zz9GHLk5xwUw0
za2SfBZ3bUN2z%L`2CoADjhkkl$y>i;SOZ;5W$DkOYuG%$ejm@)#vQW}FNb`6x?L*C
zYTAB>4AMTkj%^Qo3k*-qk!Z46E3Ws7YexS<8oiuCm4$n}Dp2cPUi0wUAv03xG@sSl
z6t$La92H14JB_^$#!_J%e#Y_FGtD??<!l3_1f?i(f_b{dfR#9&Rx<me?6Lmn$<cEQ
zjlM*|`xJ**w#W6D7nhZY>%8i|!L46E_UYCULvy+Mk7{OD?`a2c>NM8^=wbOqi8;R#
z9(>*C7B$2aKAMi8cGVK;U9=tG!I9Mdo4TJ;VlOh9pgG3RE<WVci!#v<E~iffh^p2u
z)L!q-ecBks8YO+{_npWW7|9xSS_RsiteVG}nl==ivE#7{+>g%#JQRN8^y9M{XlK9%
zzmHRFPStbzm*@Q8F%T?<f#vSq4^jYmWfpsXV1P1oE@7T#l96undO9L9dV9}w%Edxd
zykO)vSDWZ%1aB``{d#{`IGShQU8ji-#Ctz`L&adC=6;Rw``01Egd@`6`hqItb&f2Q
zksXz(wsFA()K1R2OedcI-nAs8rh=cPyE!}E&1|}H->GK8Q|HqT`{htG0pDRi3SzlM
zFmW0`75mmz0TjrD+tE3ay|gIMlykKA%w)&-JgLh9C86!c3e`2V$=F&nh{}64feLyg
zRpENK{66zLkoy4-fIAHm<;_Kd{=w*`@Il0_Pw3>+aX-ag#ZY&Z)b-|>(^diyBwr#Z
ze%R#>nt5|px3xHJ+-`((WIcX&kS7>R)iOHD35_2c8@$z7%=_dAR_tL8!pBJ68dY@c
zEcwh77?Z2BeX_M;cW8?ouMSm6-oD1db1Gnld$u2w!Y4nah^uIY3})TQW8{ot7v1sM
z-kDaAGpj0Eodz9Xy!a{del2#-y&Ggq{Lq|bV)<`dTh#z0k()6Ux+9NghCw__q>WqH
z+KNxQgCsHb*8H*@ol1(OD=aZ;$XC>3dWynliA75rsdWFp(?+(DxeTy!vb2$_Q~?D!
zdvKAv0cpha;rH~BC04P}@=*crHIx?fq5$e5UTe@9v9PMO`tY(`VpfT6F3H6Jx1NjX
z=D4k8`sQ9kv)N~Di1u&mQfuP_1TVaSUqo}@9(M+1QFnJYoa6~qwN_>M3Y1KGADjM9
zrKeSv$!@0bV@muBqcSSXFKuG85;H2x`L>zMB?ra(o=L||S21qI3A#>eC7T{H>!lWZ
ztx>LVk9coY>&T&8lh6y$w{&=4#(V#n&<$U>3t@;boL;OE^ZqkWBX4N0t*W1OL|IPA
zABfysk`t=(M^7x(8`9qSr6v|e>&uxP*VkH8pP<LFS^dz`>~pukwXlZu!+;CxCFtaJ
zB~xF9p%~7dzM&Y%WeZ229WCdX<xe!4Ec#BJ-`M!<JMIHbM5gq4$-Lh_*hHRK$MOYy
zBz?diji|q3)<=U@w|8d%M<NT})BJdD$*TSD|8k4JB>TJt*0!P&F41|S8>-#{f8OXK
zW6bz?s6l@hW})eKhU{A0%}o6F;Whh(y}w-(8&!gC*@k4pd4x+~9m@w|mlmB7nRP-@
zPH1*b#RlirJ7_wX+l~V}I;gX7Ek<j9FKJ;Qvy0?Gb^h-b$^B&Rzes+o3R2l8-AkY}
zH7mI6DjCi6p$0yji0jWwLswJryfb$ahyQG5?K>L-FSK_LPb`|_eh|;=uc{1npJKC~
zy^<^`>J=m6O`COCR#uwuJKMykRX$DlUv)9<w<$40_;=7uX6v`|1N$+?6YlmY4%7F}
z^9AgzZ_@}Oz^hH--dX4Q0wLhH3jjL%JYV2mxJ;X3)_ML43(SQACUc(eWXCkxW}c_a
z)Or4k%Jg|YMXw%w=cM!vlkjBU&OB)A|A_@~(Eb2oGDy1LM)w0S&s{>n4gjv`Y_PRt
zQU?Gp2MV~&!#49NWu^ceMV7TsNPdE^4+Rpp{&p&~jVNKVlX$5a{2KX+dz|p}qmC6M
zGmH6pko}7A<aXz)*MRoO;T_vZ-S;<A+D^Bh_5d^$K#x_>$a5?<{?hB$^w$RC7G~6~
z1?{uA0j<mfEmPjgkNcKYQUPdQLQSQ!;`dv4FslV2&t5*1peqQ{{i{8I4H5~Au@=$f
z@o2O8N<cC9Nc**%uYLIH0VnZhWLE2P?B?uR;64Vlx~I85CAzLJ_1HtEKiVh*^h7=l
zNPlSYJ&Ycnps)wRBYgD=(%MCdw{A<*SFJy?Y*7y0A#C!IWsjL}MgHj1O7lm5r39ni
zm@~wX$Y*>s1%#{B+AdBz?T7f4))V|S5WA(see`iwP3#uV<;f5SsvYjkUy7?b4Y~ss
zh};67DuV;3@n`t&aQN>I_iR&=xCQXvVV4z6{ZxoLoN;mwrMSl9XJ-4qo3_F?rudl=
zkl|<N{g4Uraf){2XM>>e<#}dw_f3Sx#V!(J$aH~<`R(bG3{3e1<{Mqyeyqp%<Q*f%
z2VX^oJsWf71$@Vzm%m0yY0cPV+w$h0(Xuf+cK0zYabK-X|4QkGmHmjsVK(s=o>cOb
zpzpZv4L9XWY}PuJ7nig6Zz8iJ{TqH^2oS4QF#hCFCUA*jhNez7xGW7Rpo3+VN74%S
z1(wo2{wQt4LgNq8hLID(EFLSPD)Fs5w(pnJgxuP@vY7u1{cV*ZKmD%c|8joUdcP<0
zdpy5q*x&UFO|klVKKyO<5AdNsr+57l`>}`*G9-cNkBklWV=W(KY~@3LWJqtw$1Z-6
zQOqy>ky36mO8D>>RO(+I+imo-j?h|aJo;;ml<S+Q>9oI#I(4-x@&IY(W>;@w^qGM>
zy(3x$b}CcyuDq&ax0=;~Y|tsWVL$#Oi)^f3%SLA{lb@z-t<V34pI?b$nwhzL$$lG-
z;pZnbFqrHOC(m^$*nyvK%Q58aqLj+oaDI8Jp`<r$=BLU`@$>0qS$^$Z*iXa!-~3J&
zMqkLOJj)tq<I|ptp!;lR-G#ak71|u80M%pv&#%s@iYavE)T0Ti^)_>Kg+F%C75+Tg
zw3s<|o<T+=UfQPVYxYVMUjYe<jZ3l~dPNWiW3L$$6TYWTvYH0C=DTjaDjFLFnt>19
z77ox`{mGtz9_{H#-OnF6AQ2b$nC;qc;yt?n^q}VY8wK`jTJUXc28w_*x11Nb<@_wk
z!B~GDAS~s55#GwWQB)@*`QyrI{5W*x53!*&WSZ#bQrDITaep4#kt-d~U`|XbmRjFV
zmoZa@D?Rf4T8156!8saBW{I3<=$;F1{L8}}n^X3k_>dLC<*RPb#mS^GzT>mfGVt{7
z?TA-gA1b)#)2nl0Ihu^4n1$2~EY@fN<W4gYI)n{uB|#QZj98IR>E((*?8nl%PUQT@
zWK6`Gw?#g&tS&Ef!%cITS&=9IoPb%2-i}N6ra8+otsA|=m+p%k?KwM0+V2glny+7X
zYsbp5rM)XJ9o7}{&yRx`Q-@0RRc?&)vTLG01q(oPR$-v!Q-bV_i3~p~$7wuV(-9jB
zSP5Q+l6|=n03QA%HC%#f9?1QJA@rg1Cft#@=5TRhJNU-_Si}VB1fs67uLn7BR$ft4
zaaM8o7`ple?j{8nrJ8c){@Rl5C8ioWwf4(-^>ga+HN*`#hLVF!N$xX&<r`@JYLz3x
zjI?iWJdfk)#nsVO$4C{!fxjsHqd@Gct%2ALxZW8D@Pe;CP64gBk3kQ(FWq0OE=;>%
zttntT05spFUI%U+CFH_b3D6f5j)~_VJto?N|F1n7MOFv5ZfY$?^$*oua4}S8cBa83
zK1EX`9$L=!e;UJ-vZe@(CEQ`B7!s+swFaqHW`F8OX2<=*%3CZ?)>pwJoh(xbF`$A2
z(!&!`y!0av_X>je^a}b&ae(OfVkl|Kzmfl--h7XL)QftvuHG1Vl#bRt!<jqU1zPgA
zjV=<eX&YUv4>miV?JH>;Jw?A-M~BFmp|Ae7dG-3_uXS{}E)1txUM&sQ+XjL9P>ZH9
z=r5ZNr?lOri-$<uYB2RBpS6xChehlN9pI18EPtkwbhm(`ZtI9&*vlQ+@?>pn^pwb|
z+{muHa9;q+r!Y{PJ(sQK!X)$Vdk9=HIX&sTX#fRmuYVj2?ibc#a~K`s$QUNF(da4o
z+~kE%{(eJ#oDwg@HWT+<U>obM#s-<j-0`$TjxQ_cBy#lZT8=wR-DL1le@iGQ+=UL5
z353ZB9rb18z{J3R9X12D%1Subn@JvU@$f@FD^~e92V77z|Ey5tmcpD+K6bbnwn7Q(
zCAlH1JjKnlnBK|^xn<xn^03kGM#thL!ClDs%wl)c!^vdZ2s6!Q(nri-v!72p3e?)d
z8|LvLV?}_ky6+=|BOTmFo+*h)pcz11uz%ER<{K$Xyp)djbF|-k=XY$ba!rB&g214&
z*(-mNVRcieOKilHKy)O}c6`dcUmGM?E_phUcH?(FO%s-jD(ORXB99@k*JjQew7Ao~
znHK%A`!y8({7^qLVb3U^(dQ@(dQJ5C=nI$#u@%a%SfP)vqn$185YtYh#w0oBRDbNb
zW_^#{QLm3^-25Vt6EWi(SCOPYZVmdQUY%s9b9Jn)uzhgsyu#{OcuJL?A1Y;PtKy?_
zqa&wOM|URAqan`IjY962=<uza5uAo2C^Is;vnsNWAJQ~b$3_;8tY~v0kMSdR))aiw
zoJOfM{jqqxKJZ(Pns32aqXv#=+_VFE?=2WLKXpQA{M3{mIm3Fn*UU(f8BCF3p9QUr
zE<&Z)XZ~nQZ+X$nL~d+K$yx|*i+lG&#Kw`OeE$dw$DLpn-*vdUH!Y+W8`DKz14j)q
z%cip}aHR_Tk^;)=Y_r;N>u2GbtUJllk*d91wF>TjE~OY;M}5-!FGn%h>qn-^CvCki
z$Z3*($1eECO-<juHuE&~Cq<9@Ytqx}iy73UvaR9T_hX_@HuK9LTUf7;=&#JL##c41
z^D@(l8?dQGylu~ZlnR^#7iT%OVH1FaA(TP;-)F)-BzBi37^ii@t0qHTIoeev-=cVO
zWyQ*=sk5z|VkIAE=(9GpTp&vPnOH6l{E==(PWhLlKl-ZB{U2Ua_Q&qA9UM*v@1d_V
z7w4;zZ=fnXfbvzzR#eba-!S#s^As_P##Yc+YGtHVhgt6_*b(YFno(DOO7SWvWC}{R
z#^!%9pq$Vttfy+^irC$y*#K}&ZoM6(05FxRk}In!R+?H<xuWrQdKs^<>vtQW842rM
z?A~m;IdjUOW~Qei5W80mcX%%$mjAU@WN&}P_>yq<*o-NWSBU$W&vgp)^)D|GU{l{s
z>qfHXs{`jb?+=VVQOzt=Rk+i~z+=1-tP%>(Q3hK@r|{3USBVV~u+7`6bTmJIq1~=P
zq}P=^ZKu#E`$XR|L)BjnfB@G3j$3T0cJFZ}eUQ>$)Ub>S;oE_x(gvIPiZWj!bE$R-
z_d_!GlOHnt^i7y4^HZbw(TvV{z?&k_GcFLj%}k1YPA{_`5U6*ZhS(!maZOpcb7ba}
zoN!lc1r^Jue%jvehetaZt&=uqI!RAlB0_KCzk+9+9@u+JQAvTuGgxi=A29sRNRdoc
z1s+^;8BZYAkfIdf*=cAbQ6LcDovRljn|;aJa#Hfbr?EfT-F`$FrC2{IkVJ=qJI04@
zh@^IGr3Ke4O)-jP;2lP>=Q+f|{MHd6JbOC4L$wZwn4am!nrRqI>8ACx;@)_oNx#&l
z-^oNYeMGvQo{P!MmPfq(27-BjGXAz(>-o^1i1JW>EG91Pk3L>+KAtqclFb++qV{|A
z$wlN?-}I<yBQ~~&PU*MGSAa%%&Xq60m?;;C-CnQcXiT+WSz=6PBjBgqZ%i9iu`{P|
z9?N$cC4*PBjhdqTs@75E;GBJPXR@}eDmJP(*<2MJRZQH4<m!r+sbd+17EIv#qY&>4
z+EE0fPwE#JVcK~B%Y5gfnWWSx)JAH>f#5<*|9y8L1fD^bMdX1e7%cB9HQ4``u&k}F
z$^`6nw`1>0>!W{=o*w*NM|zr1%ip6X`StlNC$7ZeEWeWmc1vwL2!2lE!+iNcx-x_Z
zUo^B;yblUOt5x7UouaK<Ar5Hk_wdmDsA?u~G3XLIuI<M#uCM6D?Jo-+$gt_dmA;L1
zi;T`>BADtn%NP9(Lhh$rO5St_u#lt8WUjH9_bD?Ya;_pjCI1<HidJ5-CSGSmpqqXx
zosJRM4LO<1o4oZJHA&=Xmo_<1=!?lwk{0V|yH@|sPYPzSM5n<N2DNZSr_s===rkwE
z|Nh2Rh`b{_k=HEK+M@cIp`6elRWQ=~f55%gG*a4!?gQT$>>7nrqCdj`1FMcU=s8aK
z+ecsRVG^ITi363Wqkj7Y5`S+KrwLb`NOb>zwwv@@ZTd5yvPq%T+E2E)z22r@qVff#
zTaJ6Jt#r0miI1!!#aU$TCtutBTK7-nQt0h56PbuF+^^*7m_(ZfZqr-MY3Z$II4m$3
z8s(iza>EyTM}uJWmJ%a+?MT+D9kE<<k)!whW;~0JG8l<zJhCdgOOG?@zqjeH3giPw
zx0Lo<n>oj37HhBvYOwsD<K9AMW@|9(Z~yb+_w-qwet*BHl`H#lyB_n!rlU>NO81S?
zVn3bJY#}Xw8UBggciYu0)bX4ihnxXSs`|5=Sl}&><+AaQBzfI7j^QMaB^lPLu)#m#
z%2LCV{UObM-0;uCKLXixy&Ir!3HvwDcU5e5p*x<=bwxhQK$1SM60hs*-Ct(zhY22p
z4;K5OfK)$&JLSH}Y))LKn^)wD{mB>H$v=n2k<=m4>n7+c?iSNv7CYVRwRL3d6Uoo{
z>Ghgs6PG3y!TM6`mHTZy-`@45;pJTc=0(061H&11ssBaOTFvB07G8hMzo9oTzOH!G
z#IJ=(|73YPVsaTj1>0k3)zi7$3}{|=bxz>9mtM4g(&SW^79ljY_sLAU(YhtqdKb(K
zQS^OD=UMqzh)~`f$v=-*Y3wU`)3cAF_~(RX2CixC9@zOAcR%v{6<<!%cKrV1hGo)O
z+}BG(LTR?^2`O?nyAxIkTfAfi82)RT(^XMVaW7;VI`@L#+3HeCwIuqOR^Eo7-G9;q
zjGg&b-Pqx!{tE6BICBq0Ofr(hIAmfRLyDWLOLi^7kEIU;Zhse=-<;BuiFY)#j`EbL
ztc}RCzb`Z6{hS$aTPpvH@os!9Gv0n;8jQEl8}Ah9U;p!X|7NRakJn|Xx+@?5-gxcU
zbf=rk-nmFjX0&#6qcr+h!7V#PSk277(=d=tj8oQHek6|Kd)hy`pu+aalpFl66>EqR
z{ypI?d&)F3)HdTZB=&-|clif>b(M-~=GuRpEe|ngiGSE#GHcFCHHO^O7`&;Dz5(gv
z?T3Gh@987J_t~cl-zzL_eZj-XeLB$ByVDM(vstz?>u86L`?rsYwuryq>fu_n^wP9N
ziHSl<yddZS7xyhK(H5>C6;M&aeeFrklP{x0aF-s*Dod?T_<<AyC@B=Sp-?_gFrx<t
z%Po!XXhhchk9tBunz~Wg3fOqdjRHUM;iYr|SSt-CAyfF%jcnJt+f)zrS*A$hLkt!d
zN^E;+cd6btXeh0mHp1_y-W=hBtIp;ovzv#!4R4WUEnEM_5HH^h2Z+p=jNXY4gy~`;
znRI`&KPNOERr9U+R>sf7C&($BUu)gtN2%F@vELY43FV>o8lSn3>Dgysh(rE<a>f_$
zjo%ye!c~JYUkO#+5%&mMmU-WIj)Y~&g<V5Ogx~ER2X32KnV?Gp@pCRh?kvF}HdGd9
z8&xbsa9i{3ZC}dfyt+K~<R!LQZFPx*g<{n1G^{p1bdP04xxM@G9#h-s5++vRR2tMu
zO8?`J@bX?GGoh)7QIFZmsdO(jb0y;*GpW3S!JEY8XXl_-(wj^CamUtvlVxzcSGCaX
zZ>pNzJEQK(<*TG3{keYtqc~mhr_B_9gmM}fKm+a<23tl|7RYUNXHZbvLA2E!s98))
z@Rk0U)u%;`98QL$TK%PV5zqcFeX-k{aaYAU%iB#pkA$y(MjbE83!M;cjeMSG-n-Xb
zXDRUr`9-wgo$ITjv+m^*&`r4&`@)~_q<yEz?!2kzB%24U<#C34do1K`28EXsPPlOA
zyV0|W3DMy<!E8GG=8|pp@7`QjDB}I=@RAh3>GP4X;rAALD&5f_AmvgJ`<DbFi9UUm
zS?C^bG7X~nojUpG%w6uDCLu75Vy^gT)7`DaHiI}-;HwTG`oqg0LgfD*?y*c>n{~uU
z0hU^S@^>+-+arm{%%N<05@_W*2tG!Xof$(H2i-ofR|w|?JLmQ|`5=5zo<Vu&cn{%}
z|31Ed^>6Y0tN%TG<8&AtHf^XgcPvzqGSRYYnf0sjStkl_0$L?HdOjWBn}9!?%pabB
z=On}KKLImrg+)R+e?K$<e_~R-38?q&+=5Jj9WY+7IN`0nieL;67H~WqnO}451X?y*
z4Zse!46?5GwU}DP@QKIx2A}xDiDt1<sSA~4)H142U5QLDa66mNlEiG9`(#=3xfV|N
z|D*YY18zgUfx}*M7#y}ASdmb;=CEv`J)?io^CqD9T-T!5X;=#ubf@#yX1?fJXCU6B
zuWoTSu63g2ugRNTRPVcRGqd9~NL7mSTydacy>n}nABfUgP2oed=04jk)R_dQL1L7O
z`sb|R0BBA`8Tndix#*zR*L}k);=o)v?R>HD74w-hltcMQ((dAU#*y(-B0%HGaxAYm
zQN%ys<*L~EB|17C3;7UsBRnn;y%Z(=Y{K5(+Dxw=U$mj`!Ov@l82tPiG&P+{U&e>J
zQn%n9I+TNmdW-x1NiKMsx`;jGrGo{9?|xfvh3fgKhpwO|y21SgmB_u><{qcqQ<U4*
z!Xirsyazb~luFAB97N^`GU1oE*7Y^9q;PvW-dC&1CFpu6cSYhd=ypH($J=kw<*`5D
zf``)oRoY+|^+!H(oW>XU!LyqV{TM@w=_md#_2bskPayL7R)2iv6as3Zu#eS~Zd`(N
zgb!byatRWHl#63kkf;Pz`-dg?2f0R<Q!P*64O|?Xir>2LZ|jXAG;%t69Akd}jang)
z>HK5)w0+6v5%W;JayaYjDr7J^oU>7=urQeWq_(qG19#@G+>!Rq;$)EAg!`kR=^e!e
z@xyG7EvM0=aTfZ}ruVn$&nw-SWEJV8mrY!z#EixDQEFt%r{4NNrYo~YbnEi&-hF{E
z(j2-r6Q&Cnh7x(=Bm9}O-VNh4C|s;%DySZu8?Kc5^W;F}`SGgR+mGrb#vST-ex<*n
z$MO(?;uN5WU=wm`z|c0BUthQj5X9Q?l?*MH$+pZp&LhAGX<7PyKpY>8t_<i>ifF-H
zye76+q?gV(x=1}T%9SYIJKvdRV_LLXa=^){Z7Fkp`+sYHl5+<yQ#{xVgjuvJ6`Ols
zfkN?JgHOWdngN@kUgRJh>p0t?@CeE2o%bfp^$0h3G=q=NEQ7wwVfjN1aqGMmzBEPL
z$MNgm6Svsr4-og-_YHABJUm6*UDXQ2{cS0qbYv-e#83$Ykn}3|XXmM#=Fs(`m|BJf
zT*UAe#rQ=raGOHccaCcF7Dk$8s6QEJZ9_7+Qm@icFGVr^G_82Q@IJhTT&W?KYl_c5
zN;E^-ayTDP<v=*&)2es#eAqjE4Cg!VAXd>efNnXTF8ThYuUSi5-ZKLX4@<43J8k+~
zHhqTD(`#wDO`N90%vzd24aQfHgJ(khjF~y1l0bA<pv5f@AC9L@G6`b!4>49+@$4@5
zcV*e^GQQozEOamQ1-g2m<GJVqX$*S-%{~5+*IZ9O_4P*^N2xC76TFUoNxr;}=#c0h
zTR;dIDS5v()~T`i#%rT3JKu?R^Xom)2|L&F3>JPbXwj#g%Nx}zH8%=F0<BD|HkvMZ
z`}Dl=3r;-oz9*^so9t+|n$Zm9dZ~bT#v2Xy6+dP)+=6iY(ZMZ7J#!To0I7VeM)Z3|
z<c{cL(0sN{|A+ynMmm6;47>r|yjmuiHz#!6pysVX!d8R_HPJ+lpn6)ziD10Xb`|i)
z<~_hipkn#7YlG1$)HzmJ8||^xALCa1s_;+bz%ca~iQP?k6kSM*MBnKh>I2v2g;L&W
z2v*?GG(C6fXX97kJya0;+*2?;a)22@zDA%0VfMUL?#u(tIHET9UX9}^a)Ug*f#dll
z<Z8#t?C-I8V(RJhSH@ncV>2R%sF?#cgZ`+C(<nc$v=Kuw#Pq-<SGTz$a;w@==nn#`
zOgHf8_Rf1!OKi&OuOi{xI?1#$Y6{8~bB{b0EP|1&wmal5wHDoIcTBp>qi`_R2c_~l
z3xl4`s8PQk(IM_J#KL3K*~AnfFW1Pj{u`PY8_PeQ<!vMQjEN0DZERcq@l=3zJB**(
zm9Ks2pYt*Fk`wv`R~&D7uAZ-TqxgE{bCvo!jIYjfCHh*<*DlW$=_`}AC(pdHScj|e
zo=(r@E7jp^uJbD%68@4~9nk-4pJ|jw?NK;G{OEXItli&r3s<z>^yj{&`Nhst=`@B|
zHuwyj#ubCYK%|W@11~vqe@tSK_bU~kNyx5vNls#@$riI(6L->IU{0$=J7`Wck|0e+
z)w6|sQ;Zvsv`1ex3l=}TLNVpc*ojDG35#xthG|^?iXUTQvr2aPjl2T(A28XQ-O|gD
z>vOjlP{i9SR!-|%6T7vZY&KAZuBdw`VcY>?gG*~-oiz`QHqPYRa68l-X`?iIeY<05
zcQA{s3C1k|+~q}vaM#+bC!yb_*T|BH{D{r^r_Fj)S+9Fpx7n=U*sOmj>kTjK2D0>=
zH}-`$4e#7}B<%2Wa-}ai*H*mTR_v~duXq)Q+N@DoZGKCZfINb%{nk$xdCX?5r_n@1
z%lz)Jo+h)NYGMyB0%<j|ze|WCltggACwhRbWp5{^;dk%=clDWC0BuHgyp05TQ?fj+
z0P8qKBF$XOj>?`jp8B0oQM`GSQD)_ILYcJ}Pk8OSwJI=y0y*eVYGRY9A1W=wAIE8E
zAW>0e{b=8BW7v8&f%swG`hlOX?@c0Y;QU-&>Q@`u&dUgDHYPUMr9kt0Kg94k%Oh-$
z(}tID3&yIJen7Z>Pm#34osH&*$gFzW^<K3;RB9%MaG)6i-x<QG-Vn-xTpgQF%2rxP
zj+pup+NyizYMjI%AMzW+uzL;$=wQ6(`DQ?~O3RJfPJWKv=~CC<$_1O>@M(7*molO&
z%=3X5HR&Y;!hm*yv9YjUcg8}_$3H0dLm+I~00;-9K{(igAn#*9cunZThMxgpzLZ4!
zfsjwrRnb=K7IcFFf?3m$Qxiu39^699xK>t+^#YA;KZsCoZq?#sNo*C(BZLwgSxF3s
zGAU4vm+4{ZEV;rTSxK0tOKPWbcZ`9VA)3+QsN^0;=lXnw%LhL+K1bs_X%3kN#Xm7f
z?YWut!1%T?%#Pz*^grJpUnx!V#*1EVbQ;7L)K8&*n0SZ6_Jk0=|E7hYu)+S!dSSWq
z&ZA<$*q3Q%(GF*%161A5m9?pb{@j)R0V~39>N&_>8bJJrN0x9q7fTOu<HKnEd}jkG
zY@?Y|C&?OnN$5DzFS6;2NN+z_$>re2Pez0}qrx3z*oh)<s5`;jh2m8+6&>%Mt)Gz&
zZ;5qyKOnMkvSM$c;jorgmJs_RI}kEmj=OCPIKX%yH;DogG|#CO+y|j{8Mx329u}pS
z{Z}xvAK=&^q6Ui{_EYdekLET)FjnfCC9Vx$p@V85AXYv%C)Ar=%F*>&2qi|$buY!T
z7KrT4t1{Z%saH)}$feqE<jZ-Ms#U7mPyhpRQ>{|fhPua5O=MMKR<{B>RmzokDDXk}
zh?ql;2U>R_=rqxVHcwq-rhiPly4(aMLq)mLPyFa2XYMfeICWFzv4TPl1F7)XKbDSx
z5tn$Zz<g4++<+82;>r)8(*?6tNUY!v!?1g{ARs`eV*x?nl%!cfjyJswKFtbdF{y?X
zgs|l9&jympB%Zn^K3-6@*xmsrn2hfl`k-BE6uK9pKC|Rh;*rx}OJ|j2h@3zZ%-8L$
z(eTqFr%w9Wk(_9FU7?`v`(_XJquU@8YxRA&s}VtwZ#wKJoKv{$uRS4NaKf8n$~U9=
z`KRF~zOUbxL4eiwC;g5O{KsK3cZfm}=Qg{q-Aea{MB~2GRTOP@ayw}Tzp!F&5>)mP
zuWVVG9yB45nUm>l0IgRoqdcJMJP;c(9tmRTR?~F_nP<kX9lC2wEPp7k-qZ}(=*(53
z-V5RN4rksN0mrGS#67x~07BA%7Cb&^Nl%f{PcwCfOn;9F|Me^t<b`a_ZW;6bD@}Ox
zYxW&Nb20ONkmnxgA-~M9e9d^Zdj}}@m~QBjU>@|BVu^~R68+fi#o`lVT&%f?7o7E!
zopEcp%%xe+tA7$v1VsBKif|ZdV*zRshK-%<%>5S8%u+)c)KJZo9&2dDJ;~G&#85-I
zn<tz?4X{HI(Kn6!jO_2b?p<Uo>kc(6gBsd{wD<SUhD1(QVl#<|auV(5b`i|Jg*I1r
zt(#iJX9(1ArOgW4tns2PUz)bYkeT7X9q5ZG`wl5#?Q2|Zsh>TI*}j2l0m9SEu>5m<
zX9SQFMy>e3GY_!@w%M&~kmSX)=`39>>nu{bm@JW9v0d|!4Rfs)MN3BXUkXFuPqF~?
zBo)ngR4-f<^&D#Vieh5Ht%XFD+lt`1#+jQWNjK<`nwLBC7#^cm#5n>44p?Pa=5yj0
z&aDsfZ*}y|0WSyQ=Oin4wC0+0f>wr(@h<MKVxXKQ^MXg=&e6=JXlJq8qr7T<J`t5s
zja94({j7uhXEa8!KFib9@Y#ITxrY}QU#owCdWU!M=YDCzxWz)Ph3Qf+>zJfc?WWYl
zU2^@oZxA1~6KBQnU;WV++ylD{hpk!;#olr#(;!f;I}O!lISjn=ZSlVwRj}DE2ROly
z`Q1dPUlJmOwGi&gTIh10r8V!-Ue>~3Hv+*`MORiu_9jEu26MmlR>Br3ZlPu-^<6hf
znQEMc9}#iJ3o0MC%i&=4<r_fD6t)Y`9e1y~0O^X6Fl=1uWa#ET`75NS)0tbIUK+)e
z)HRImfXcpmu&HdePPS^-MY57G$qweeFDa|zi@ntQD~Ce3WD(4==?$j4JwW2UHc^+V
zo8Ba`Cc4(WE?<!QX4<TSmGu@`oF(0JY}N#uHG<J(Es7yzn)77VdYH3O@`{!OvUst3
z^a%YRl8Di575sIr`0q_n@v;KV5=`s)&Yr5uyQ|nxI-drZq4N%8k1OOm`~6rWX07l?
ze_L9wT-MAKZ!6r(OOvUsu+$W*E}g6bGbot7ZVshKPI$&X!eem@mCc`uPB35M&z9En
zMf#`-?ow{#Xthi>(foYWHH(Z$uD6(IV7lye2C0z2Di&F26^fT_&b-k|Vp-K1$-rA~
zt<Jm}(kT#FW|l4M)-k3$Pam3zhryHV?ENi%ndbOoUXEJF98P=1s&D*G^;WwYoskJi
zAV2jpl>2O95T&bMaq9!?K609%^L$U^J<HXE)JsLI18IPq#u^Ue{;sapKHMWS`tk0+
zdXbY{p&`w^43LAdM-9TvjQv*CV%|*1_PyDC!H`M8i!W<>XF@sqg7Cabr$JUUWT$-N
zHs{P0YcbDCZ1~Qm7^yc9KkqvmFsqZIJasaIcgp(yJf~C6)I*%2Jf~=Ekuz?&Gj0tp
zqBy<B7TE_5&`<~L5&b*=0MIn<wQ_rYcZ$wf>lBUIkgCJ4-ux;$W2;ltEuFa4>5VdN
zc_sRVUEvo46)Qn}i16o?DCP#gz^299uT!xNcbAHw5ibDrH`gWoORg|;xUAXF1zJW~
z>&81Lll8vOUpFSDO1MtMclqM}IY<?Ev5eqPZuc-kwl8QCFDFswQ_g@FmkuRfC09Jr
zeXn#2&SAczE258@8HLu(O$dHI0NP@{+v&gv46DS+qu*FLrG%xj8`n4YI7B@9ioWNM
zU(=&H=DUpIfr>58t<^yOOb>&3VwFB+8h76bP&z)lhuN#&b=UB7WaB1*RU6_u700Zq
z0d0iB>DNQ#;TN|X3Ucjvig&E42?yewv8gJ$Q8@4$9`~^up_yRrCsonw3uC@Z|Jgq$
zn5Uzde7^A@m+-Kz9*#O-Q}|hpnN7XKJrcOhJw8J+2?;(K>_~o?+=^l%wV>H=KoyU2
zi9yANEO-^>{<mocZ<e`F1|NUT?<nj19QNfNcPFKQb+RYxSpR+Zfm<@-&e@ubl(b`p
zY{vDadl*nl<XJ&sc>KkGH9TIdyxd0R&`>5Dj-Nx{?llWhJ;_dC(qpzOlYN@9gJhfL
z+ru&6PoC;;r^j@ho}fi^74scRX7t?@0^PezVgZR&k<WJ73iLcwF`DWGo2E(EBRR+y
zEdGn#iF(_P#ll&tYdN(ONUUZUm@sE&XYQsw>gpdj8Oo8M!2N(3F_;qcQ{34SC1X!n
zzTEP;4A$<|N}Kb%czs8u)hS3GFs#;nXpuZ6c&~l?w7s1Byvg}hPfx!jP0_Z!CM(xg
znv~1-vYm-d%oFP3zQlK?%JoX$VOGsMihTK;t@VP<y;8Y5MGi#R`=iZz%x2|vA}bg2
z=f!nn4sqw&{Dn5ZxAF_gm;UTZn-#WM<CT>q|Jls(HgkzGOAJ7Xn&>X~M4Nl6&7CfQ
zHj>+(vUkb%qTct}D=>)Ak<83PtFO*kP&u2MBE1b*g#9mgbnTZp7+N~<L?8J5jSd-n
zp0v$=zPr(W-nq+sUX=LrR&&^~>rviYC5AP80OA%D%%Ug451B|vSPPtc3dYbnn=ZMS
z2%-fe_({+jd<mLobdMH1g#%*ZkHR_2tkBz7AL^CaSYJSV@<;PaC1b_x2PJi$;r9^*
zbmx{$Q4yTGhn&jzc?n=E0%yiI?9WC^BoTOA>EUJty2nC8=2RIbe&FCz7SrL(`4QA+
zHU^-yh#qm6@r+D)gKZZS5h8#|naE#SpJ?7AE7Lhoii3Xw)pJSy>U~pRHaywba1FqK
z41}wp!IaFfb%x+(*k)|rCuNK6;QE%)1XZT97cr>A%%BkGSyLF?Uz2JDo`ODQTxqEt
z)>$)wd?gZN*l14*85g7Wjidp_b+#0DrUq-P$UT}NnP-;D?DJwfcN>a-mu{Md8^RT7
zzPM*!2m3Cz#eyloW$og=pYt^_H1SQw{$6ml29CQ?u0qQ;zC`}p?@L&-^io^z8&6t}
zUaYB{@2lavKRV<zKH3UG{43-U0?fq3_Ve3~y#v(of{m@(-)HA=?<np&7zDYm9x8<L
zm}ySV_{8>IT;G#@;oN%Pt`}3p{xp^LC-!AD>JfYVh|ei1T7UR!&BS{5G)P;YpC|0+
z@9kQ+DDlrtp1g^{t6+kG<;I-QQT4u<>8yDFUM|e;rQYkM((C2zEw=S~ua}{zUe2PI
z{pbrE4KUS*9IB7F?_@p_x4xIsH`M#Ok|58+m~Yn$^4m!ykX+ODB9s{rR*Z+$Z=4-n
zgUi{$I4f?ZU{2`zo$LG)*Wn-S@>34Mmtf43cMrQ;!TY4io;P+Wo1(Ft_58DnVNi_^
zxojnqp<`HyC(wn5dnUam&!qPiA)+zqZKd_Osus;(XUuwc?leSp7PMx~de<^>jOr$v
z-c9Kq?>>i|Nekg2Gw|dX0$G^(tp)sLv<OR?WgumAf#fP{=<6IFV*JHq=<EAO_3I4?
z|I7RJlL1}lZyQx8_CZ>m0d4SFf2<c{4WH_dv=*6L#+mhP?R}?V@8e~zx*odx8EM7z
zxhw41`|e-k+54VGRn7L(M?(;6b$_|j4DEkx`lB44jipbzfazped9k%1Km)l@Rk!D_
z2B!LYffg+|k<_+=pYg9&M8+PQ@?XKN0z0i#d&N>cQ_fuKP=GzRc)dBFA@=B3w-2DU
zc)?#+*agH$;=awRH76fh@6Q1cQ_7<)2Y=OXDSzLEUOXkR^f-mn^u+H!XaCmz63fFx
z+)eA|_PuQ9c`~~9905o?U=(S=0jcLH{IKu!bu52rru>1~<vXX#oALVF3d-nIe|#O{
zedR<2G5l1&oQ7}6<4D_=Umf@}@D3ntOziMo@~iT_ApqrJ_#_pwJPZeVt{ldH!}sPY
z@FSN_--~?Pxjs7Uu6)m_^TU)=XIM_1=v(jVk-Z=IV|{;ZD&ykW_YPIIUY>m;>C_v&
z)LoRlE}c5jOU2`}?*-}98k^c1JQ}~x2aunF!+euScIK8q87%vwR6160v_9g4S8{qp
z_xJf$JGnhZ7?zUWuPFFcn<k&T@C94=W;e9#{0nI)GvC}nj{BNSmDa@Uk30sIF2DbN
zxc_|?d*4I;v{_FOJM(S@$neD}j}je-<xw*HheDU=OukG^C>=HtC5o?}h0ba_|4>$y
zoTb|9a3&E6$`Xh_Ns&<p>Ha<U7~?$xf!O(&=f<rnBIZtK!Y8g`H#fnwXlt-->zPAn
z%eX<kwau*D!)^L=N>96Pbg_v`l$ded_!Bh@fBM1k)=deIvttd-);NcLf1J~A`u-Sy
zyVQ(vs=6{`yviHnm6HD$BX<oI3kqjqhIt@Nc1_l(F158kEl!Op*N$qGP5(ma=}`@`
zi61L5Gpbj<hE1iX+Kx~Dy#GFa9jjzWG4P@|IRL3lO2&@y=-(eEn(Yve{?`=j5Kic!
zC1%{e7BFb)JstZQ0K;U>vzhlO6HAwgM}Hhy+5W~E_%0Qic!RiZDp5$R&qsWCB|uK2
zK0sdgiwuy^O3Ma_rB5ZS)IB*1Q0~E)oWYguMk;gwcIVRu*o_Bw0QR1*46yIoOjnsH
zU~eKTgH(^7Yk|lu_Xp$cwNV{dTdvQ*Y6AS5_)M`=_d{r&>)#*7b1fKtfssMzS6dB?
zr`XJDWu{<!lPtSG{WtI(M^)fE>IdO#ulOE(izwIuhfgjxI9zm42l&pm@ZD=OA5vxt
zzEjfhW$FLS`>yPY#B?oq?peFrISum>WhJz}wjwRGuHvg3`S6>hyLlqOA-LbuQc<vm
zROfD9%sJvQzEwK+w(|9Fe4XCCWL;&+S5D(8G^OyN(SqNTtOE-M5J5h0Tdt&fzDqH(
z***9=nnkcT7Wzt9mz06zck}XhUaNeRK3F&iX5=({4Iww>P*U}`xCeJLx*O}-6CYB;
z;x`uEXVex#`CBL(361UCel5&S&*X9RkPo)(JF?aAT5J?s-gbT=A7Jj;p5spZW*^J(
z`7yC;x0|5#lOz}xof!*lE$py=aU_V9)z2uFO`DMHgo!WGA>J71hjiMGi95On%@>E^
z8e(YH0Ev8$59IBR>AiwcUud}`k%3`87+Lr+9XN{JQzQy`_823OR`B+p%!`dd_OQgC
zfSvnu4}Bv8IvRwm^OYHoKYn<WKuI6i@zCT4IqqF>Gs9l0Kfn!YGsnr$={$P#p+=Kk
zKUQo8!<%oso*A<t+6m16Ilc!qr~z(6y#oG-kq~~}!n$C`_h2Qcy3;rnNYN>F1Bfl|
zXV*Z&u~Or3y81M&%T~OG#aLW4tmbVVw>M>gQ1P_g#>?%D$NRpr4Jv7d!x*j9M{<Ff
zd<&TqSHY&7dAb>^6WxjCL?=dc6U5{>L*$vg4eONDKb1j3N1ZrmHGAGpis;b27d-Ow
zK7PvYvmC%gW|5Tw>?{MAfS+--()FsbyJ>=8WPbZDHgK_nLcofS`kFK`6Ic212b*W^
zA`2)ZV}9tidHw0QYiqc#)21DiyIRdN?La9j{WJV*EjYIsF^%2H-%)4NkJmKmd?6oA
zL0U6<CHI1jhBM{b%qNw3C7Dvd3@3}h%f|Z*>B8hbx9(<K&3~34CuDM7B8SL{ao@79
zO!FJn!VFpvw)eEB%WW|&xT`n`yZx!I^%L<u5bn?Es37r&KYo`?T+Bb&^wfdh%<yx^
z4nbV&?|RLR*78k!;RI_VPD;;;sRmM)XUODba1fTq=kAavzTuXw(@1zlL-N-0qrfkR
zp$KD1$1lbygtahU{ui~G{UQ*3#ova9Dt{c$NAlPvdUs4h#-8N%TOia2OUDN)UYwdg
zI5)I5IP8kj3C>)0J2bK{IuS=;vhpV*j|2zo(W52KsI|QPL4%b`^bfyz&|sXv?&crB
zI+dG56g7hua=ZcSX`NsymN-58ahPb}v2P$J!e-+?^mjWf*qz~zlle4XCg_W)W^5dy
z@X1CV?=5`nbi@Ck4JlkXM+$#V<PWMz-xK_~#M|laCtMNaGovH<1nvAj$w$*!RA?W?
zr2ix<u~vzLM8|R8ky+`unj6jACT5+ie4Djg?MxuclHpoqnyQ~}HFWfv@~3)LpRiR|
z+pI0h5{X&IVE1lAPL;YZXB{=Q!?Oyvs;9O#cuCT&HHfobD7*|0g26JXnP)h_^S<Uu
zNqgap6$Wngq7)Q!cXN9u_;JL$n7FXkAKkZ1BLARQ4bbckT@%|yTX#2eh9e$}0idMH
zgdE1_5HES_4UjJXj;r$FLjn`=4Iiv4l)6;=$rzSx{s}pi|IDqN@6urj2MS!F_3Czf
zNO{&UqMAExG#WvPMo?@=u$B9;nnLu0&7ljlU+Ua#=s_c7;mf>E`NHH{mBZC?ms8iv
zXaZHzK4icl&EMD(9_-HxQhMjx{-aiLJ}!Nh4JPt@nUqYUwwT#@ikWCyNj*Dz2e@Wx
zlbj6Qhd<eubgMqwmt3gXsO9Lh@n71YSb@d|QdpP$KF92^?oZCz#fW$tghT)7&b+$~
ztV6CcGY!agbU#BtR2hPVOxJQ{_>ANkwr=r6_sJhIc$AV_Ni<aPZm5=*ga<W8_WUT6
z&to=cEP0?a=9=C4vj#=i@BrtMX8qigA3A!;V<w~9tl}kG`4;LO+1+#LRxq|Crz>qc
zLDoaTiGn}!e66RDObh7-XIY0Z6Q54V%$(;F&t#qNI`Fgc#OK1cO<)Bi=;+>bU1M(Q
z`8f3g(H`87%`;|uOi(@lM4GWt*)5_Y8n@Q$5-W@B0*7~493Eq)DviT_2(N=yLwwV;
z?`L69obZN$A|IiT7lpsH=v$pe-%^9VT|(b-(6=;=zNH3zyFlMk4}H6OE)n{cdgz0E
zdTv?@+>9faA9<C|V%7z)i!Bw>^TAjp3_%hEO%N3M2k@}K!Icf0!o+Xvj{7X?qUpy5
z;`xeiKJ5tOKal&C5=%&^JJ~<rYsv@m4)XJCb!7j$1ov(<^m;UB&89fq&b<=KgpOz1
z^ubEMopfo6f@EdyKW6=*T|LDgMK{foGApT-rNuFjl_v`@YtT2?;2Q(|*uRRin#E;T
zOR?>xK)t+4FLt$j`l+@*_?3+>WO)|1q`t!;xiBznRI$@wwhq%H^aKysw<GO2;vVxe
zQG0%eOWHV75N)|HQE`ae7)xo=U*P!Det5uaFud|WOu`|3<f&k;a=Uw{3-g0Z>rX?c
zvyd@xV-5DP8drOQ>a0WSJX_fue93@Y_J{`K!_BoIiArZ790Zc13MVNDR`DdbPOV(4
zd7&b`KFoWsk>Q8rhL1%&Iix6%2gi$zJEY8?`*kdMNPgra!Wbpp&uFR2d74&mkaM0!
zTd-nipyK6O&6$CGj7XQ_#P)(*3V-MN=WG=0oI{2@rgO*)2D8>A`2GO)%`><Ua`f=T
zIwMBGd7n9*GO&(CwML-i+x2`y%_X0b+aD$KjMn_YG+SNDTv>hk0$x#1HqT#EzkIE;
z`0L2NZc{focP?*O9qO;|{T6;N-{uxX_I0iMWYBvJukvzkvN_|g%JKJp-U^oha`xH<
zg<iw2QYNye+tiQT;U5~YJLvr5+x8=*fdlSAN%xpX?EYQCjkIp&g{a+n=WWjO!h=;V
z+VkZ8`l845dR*50$g!RGwz_bm9Nvd~hn$j__wYBV^L^r9i@sq&q~|m70zGH_Hs<-n
zbQLoCif6PpCMIQm7wPx;>EGH9Soa0D{hf(gvVhE-Q2&&Km0n+F*UTr+JrPjgSDjha
zMcB)T$9lb@F%-MofFnjrzPmR`EJY@BH*jQdUjSiQ@M&S0jlzh6?mz&cxn6$xY$e&V
zX}5SqaAGqJ&HK`@iJ#M6(-`{ZQqO508E%l(o`qk$z&H+ScbAOmHvGvne58Tgq$~$O
zx*F^!mPnEZmr?`x&2@GFC#44Pnn=_P;7ARi8w0Qa?MN7aUMD{V(BC!~&h@^{83Dvi
zlMJY`9W7L5dc5piIj+wF@q^?2m2D)P)%=;XAo9q}w;eAMup}rmKjw?ZZ92H^p{#M6
zlr?t7?6||3asTmJJML4-Hxu_>durUr5gEcg;Dgk-_qk@=?~|Vz_dnO0aer!at_Sj_
zDd}<FM`mh$CJtjFvf#feso>~|k)#^>taYDxkOCQczUV{6OiBN)b>Af)LFFFAIdeYQ
zIMmP&i*BJE*~lGm|4uyQ#>)%<vMi?_T6xvI^J1}Nq%T-@8L%0-t6Z5zPUC`it*w7F
zq%hQ@xdZKhmh)kPM?!OJGV}k1!NZrzlz#}Be>2pb1uyeGRMS#IevW#URHcMW&jx(=
zHA|p<bW-GC&er8F6vP===GpERsk?`0MfC4=m%)$3+c_KWQzwdZI^_Ak7+=UL4sh<;
zeaYmq?qs<K{nU7=Pc*yCj%QTdJ>x{-=kQVmM5|=4QWbsoM^k=a(3EO}%g?_uj|ep0
z$bQM1e=BO;8+0wUzi8eRYj(Gw*jm;_jsNMw?{~QFg#V&L)uzL+*I}XUa8g!>Pl(t|
zv1WI+IuxID2me5a6aJeH4VQh@&G$Og4I@UleST(yRqD_(W4EU|l*r+(7;i>+SvE?y
z9RFX8P_-H1U0a3HFvp&P`B|O4uGuz4n%xIbGq8ndw%9^E3ccYoLxfXR?kP33+AVuO
zv);V(mbrDj&%OOEmT7KtFrHs;URE$C4tK^Mw4w-c^<F!=vC=-Cq3m)zh~&|UB#3X)
zPIr#qjL->AtS;@rJ-}Y!Dr$5NRD9^%y@Gm0?kki~18yr{=6m<3F4d9bg-;EXhkXu|
zZ9plD{hap<&v?vc^;4FFf~7dT<+xXnDfvP6G4~N}o4G%eeVe=c?-uKmS2TY4A@Nwu
zdG7ykcP8*rR@dK8Ad#Tp1Qm@HG~lSALJcaK2+;%+nb8Txy5Ww3NEH+z0Tjf*B)~XK
zMa7oZ4Yx{NTC9q=5W<!KisDkGq7WD4iD6R_L4~~E-@VUlkk;0v{eRy1K%VEh`#tyE
zbI(2J+;b;xR7NZ127ap^yVkWjnD+qTDOgd#I~*_kE|3-6sH>dxvo$i9NOas`;Md(-
zvy2##`7OFHdHpbiTYZ_UFV+g<;DHxEunk*+5Tdf}&KSDKSV4IDc%7SP849bboF^_(
zJM6wNQt<A_rq5SULCsbbZv1VAxqsI#AJ5zPO2xxEYUHkuMgGx74pQU}8+i_q(funD
z2z>7Co^OafY$NR8yINv(A13B&fBL&XYd4CwGJvg{%|;QE#;sUGym)*eU_O99p5hfR
zYB5A^0%(hn2EBZS0K@4723_5r#y4Ch!lU$wL$pBT79*CB(ZN3$H^J*Y#84W}R*3fm
z=Zo1}+Iy`tqmT%H%7`9Mo1B&J*aa*`&%sZ&?COhk+>13bx_jH1Y&wIlydwPe%u8tU
zvYu}mIS+<<C?D;mg6TBVtRKj$@&+Ag<yD0{kyo9vQ!8eyK6n3*<>l@;H{%MB3?YW$
z^HRu{SE-OT&ZQL6l<st-7`tm=qU`HyorXo_-AfTYY;s<NQs|(}#Kjzl0YLE2`D;g=
z%jT%O3t&1X3MS{qAta0xTwns6L3}m!kl`rugY>(rTmv!96^7mb);Wzd%sdTj@%=EA
zVo@Y?h<68k&<4fz$Cl@liBZ)6P*z^MJyrb;@|rUj8O|MVnaQs;P$G-75kPV>3juq*
zGx~h>0k2OIPMF0Bq^P5mn^emCe6W<<^vJb5jv7z<1Cne6B0YL9rKxWw!k2TxzHp~i
zMg*uuO8YP4#O~pP&Gc1YW+UVh#E=Cd?nhP#<f8u11mtXLj<uo`7ajia-03PSa#lv@
zGhbwQYKdp=WVyr%gd}oqc1BO7{j8bUl7DV-+FY&A!zjDruUbzto%Q&4*UpuuAj}J3
z0mA8KjPEA}#5Qr&g@L*A8V04xkgdXU9FM@VmofKHJ3dL_jWd)pQZUd?+gEwibftlI
z&i@{)clB}HZhRZv)(!Jqj#SOvw<Vu*-H&c`mRGfQkd=e#`-$MEEau7h0jdi#0oFSu
z+zZ-4V#?5ROvQ^r3mcD{Eax>KWFWMN&pAEVE-h+o%L8V=N@XOUzsV{JZ3FR3BNMob
z><rW%wdL&9J&a&%eiY|>?<hZ4P{Jec7ct3LF*4daSPwKsa8_RLl=sy~`wu2^Cqi2Y
z#4+qiJJBgwQ#5=8R;<y%xKEvppm5_kbff2*#xsSNJ6nzHK$;CoV^XXTk|XV4uQ1Hp
zuIX^YF!X#=nUA1Gak~lg_eJ_tSZwdI%~peiN9&|y`l!fIsy6H#+NG644Nd5)CbVK4
zXqD-iyDYf#)`|?z+*gA;53Kkdm!_dI?w^I6!ZSq+gQxm#{#wm6=J0w)J6*B2cG@#p
z>ZN22^*QwFn?D+ozY6KGby+f7WnlkVX{UEbASBwE8teV5plZ)G3z!j1q@K&w>u#(r
zO<fppVRk#Lh1Eu>@BR4BmH44J<emCVYIJ=~Jg0oKtv^NeBhN^2I=^k!*sE-KYlX*j
z+@&^f_f7&I*9e*QsR=5P0Pi?`flPnP8LBJ@LN%?G@H!0=u;x}#g@Ri?Ol1}MWIE@m
zGWv$Q&x-Ykkr~iW<75Ql1%;6GaH-7Yjsjy&0v>qQq@yHT$P{Yw?lIoN7o%`1NTmi+
z-9sv_whQzglr{2BOa5aJU_YSs1QhoNC!n#*?zu+r3ZFRBJ-xsZ`^p(KMmJhro;C6#
zclk&lsPX=abJtCa;R-07T&aBREMR2zXrF?8;Gw*dkM$0j4*9{zeDZg~#b;70S{F`j
zQj()cQe9g(^@E!@fWmn#gvpqk9<29kdB;g&;r6Ae6;Ink^1)jKWldl~nS&>tD<&8y
zAJSl;oG?C|r`-$QPDu>jD^G<Z+Ye|T0<?30_K8twpNqVu>EP*c_&&JKz_&Nyz_+yb
z)mbASLZH#q+DJ8EbUvgH#Z!0eK@j(VM*{pNZk$RFtTP$2374G5B3T_RgtbIxTXFD$
zG*TJuUNm+#u~H9ZOKEplu{tA?$%a1537EcBc`M1JHDWB!6ld<M;yD%vBDi?ZX2SKV
z2T;;FHaqw9S5+K)ky^;<agG4*sbZyO@z~(3Je|@bowq-N_!Hh`0?1)ldwopQhZ?7k
za(##_>*E@IXpf_hi}j)XhdxU60fTBj3iWZQ`*8vvu$%23#U_!qu64ekCla{d$<)(v
zzEl_ztH^>tJW8O>u4cZN(xy_H88{xN;VgjycDh#`?KSdwHV%(d4uH_90<svkZJqP?
zK86?`3+*{&yrn${u%wChgx8AlI_Dy;69jq#fksg?l1{nzS{k0^Y%oyiQE5I)QWjh}
z{K|{m!BNXI7!qYoAz9Ob1_5I0hcj-57?tr1O!=1@L|KC+;49bQQCA_Lx=NLZDtzi3
zV_SQqS{U4&R{kDs368VP-k&C?+a}LKIZMok_s~jhB4~4ciFq=*(tBP}YU}_i4m3Cw
zN4E=A)x;%2$7Frkw)YLa{tM;T<i|*$^BUM>^jvS-c;5%a-$lHnI4g*aua`;q?oa`q
zu+6$qBVkJvqu*h*bROi7zjwb8p6Y?ENf)^SCLaI7EKqtrqbJu2fpE^rD|Z_#?Mpi6
zTCKD~;FrC%6ge-26tM$Zs|W!fp8Y@zY}Af=w!xDs?=m7eveLjK5P8Jp4up={_hE27
z(yNT%XofLlFgv{`y|YQo7+DnF;`HdGp?=l`>wMEZqGgn>YZ13NN6<90bdzHy5#qhR
zJxu-Rv5lr1J+FHxibiKx2`000vy)EFgiLkHB&~2_YE3=O;myvwz$PK}%$8DT#8Uq`
zIrX(IrM7Yml|3#wb)S|}8|i{+NojKG{aZ>c*x7a;k(|2W7cKNs^ueb7oV8KY>ZYBR
zMoAOrn!LPh#?sG)E=A24EWhksO&m*vhC`DKZF;X?kJS><W7|KN-g1snr6lY}EFOeA
z&5ke0Yn`R1sVQdiJ)Wl}d4JZ3BUvL(WQ}0G)iSo$Iqothc-N%)8(%MOq$1MnM;cDY
z<4C9XlcrfCr!}!ymP>b=UNh#}l$XE+a~HX0@}Jij;`7d>WQr7A!IW}dTp?sHzZOf@
z>t?Yh;peaKH<T#r)zt{ZoGS5pVG9Re_*t(({Dq&ka;_tSdqgHK(d0{*-G&4>Pvx76
z_N7|q={h!`fn|mvDp0^I1@tB$fj*h}ED@TD!?=KJNbC*5oZPMTOs*spw@l&g6X~$%
z^8(S`o^OIkBUx%)`djauf3l&!IB`gSaWpW{K0TP1QhtKV01nnD)spMgO8{|ajgn@b
zA2U=)c;<^5Uoi*s@qd)iJNaRkIOQU;vzXNIs5x9s;?VT*z;Vd*d57_tz|V*u;&4{S
z2R<{`h;Z&S{lVWsKatR<s=@h-D|%UG=}0L(;5GN)m+mPbRg7P%Pp`k<j!}$tN~Z4#
z^br0@wr7Yloyjb_py*#K5Jm=1)yW&7nKoisGTF|iOYcM>a^{6p816iXdd%kVBlRzQ
zdRB1PK2V6X3iiw_;13ZBnj3yIeB$glf7irXV!Sn62Y=Uud1-C=O`=gJvUOdJeh+RY
zaXoVR)p_yBvDuleA~ZQRI}r@+?c_L^$#G1Sf!aXS0mx9BI>zV2l@w0!w+J4MQSvzW
zC;BxGA%^o#y}|Kcg5y)}bK!@!wZ71xIhJ2(^v)dK-qBp|?BLcma{gg8IQrdUaP;>}
zWBbQ{Wq&BWnMnI%n}w3jgD?h32B)pW4_((m;7u?%ZRO;^nk3mDo`AL$^va=}1pCN0
z=mgC=*hYF48MQy0OO(|wx$Q$?zS|jiu;2*3j?<O$UpygASsS|%kRkqunY1t5rY<du
z)B1f5N7};WEr+KzwdMlxKxiwHj-o3TyZTpndX%rtPWz-HA23BOfoQV&tH|F{Bo6C3
zslX#lf#Z)b1+Ft!dx^T9)t)4weR^^Pa)Yz~QwZbG%NZ$>RazQ!j<4@~)F0`X*rg-(
ztbhOSum6>H8Wp$x-#+>8*Do6c^v}+|-E0%ML?<0zOilZ#Bj+EI*sdv$$3gw6;4<11
z19fSnPbZ+$DW#l@+~-S1e=4}141^dzmo%R-EX2#}E`MacuC?`r*c41-^$=Q}I9Uxq
zt?sg7gSGxM_pU}_h=ru@SXmoNvk|P%Fz54er!$j6w87zqx>$X>%d%%6d^P28f#qhW
z-EEW`E}=4Ucp@7a&6!gdtLYtxs#=4;vGNttLOVCol&EGu(SKLc`sL!MF3Ujd=*(|r
zo7uq^#vP=`ckBQj-0mQwB8z}~;6oBTatu~(GbXn8S&*!AZie)m)_&?-VN3T^1vN2c
z`W6UtK=fmCAYXN<8!M_U1Q$W-K@2?35)4oR;pYS(?(-D+%68>Pm><b5x{vjdWw_o(
zgkScS9iuifq_w#$LqOW%Y$UR3O=;-U@}*78J(_^clLo@+OFOM4lko|9nYrX+vF)Gk
z6M5AvefGM%m()28^L-*sR<bgQ6<Q;l%?SIB9SYpKd)?OIws4&@l77)2$w-y+Epk$w
zR%o+|CD^A&#g_<WxkcuATi4f4PjdLf4+sb>1m!)%=m5a$0FUOo^Z68wcWdfuE@OAd
zeuTbqEFK?a6{`i=foiOgnYt2!=>2KY3irfkf#oGAzCV1QYBikI9^<c;<l;QqHe$0g
zEY=8`#Dyt_Q{UiJC|FJTvV3I|cEDz!YGIruTQ}feOg&aio`|c)-@4J<nVX~4i(i_A
z>zu!ln{RjBZ4463dL73{NTle4uH(uns(cv*<G!$n*vB107h?0PsIJP!%t_T1UAmKb
zc53-=Fe%{z+4)*P%8yD+uaTSzXQ3rExH!iTlczi*H@N268rre%Yox18gfL>9uOOw6
zx`WdwE?Qw~(Q~Oy?WfB)s%i|}5FXs9ktN*xV(p_oX;{`9rm*6OJ*#afsoKOIU%W5o
zfB<lHly<xL%L29d4YPV3_a@BIeI&IahXWUy)$30-vYR6BSM%nnsp6l$s1^TY_@zj%
z$X%Zq-_`rTnQGH~3*XK_8TLgw|Au$r9irOyPIv4nFqu-f+a+E&L!REYstr79#LL@u
zgjG$#c*OJ{Tuh)n3=41i*Ckvs6WXUH$29F-=j+GR_V$@)pn$>hG&CTizL1rMeSaao
zJ^=%laFbNOA@!Bng?g#0?5d~m^{`5OjUftXuW~5>Iig$vN00m5waUP2vf4R>N!=th
z?gSe+NrBf8XdD090yUmrY`o(5hrBGuC&I-no|DQ_(23nh|5qtnn}Mc7Eq~w-PsoKC
zxcgC7sQNQ<X`$$fDE;yKYdq0B-6C#b$-{Kn$ac-r{+aVkhPA(B?K&={JOea8F10Cb
zx*s2pQXwp%jdasf^)N56l$~a$R-9mL>V4r`GMu|_6^hpRk$n5Y6>y&CXE|L6GwVel
z#1f@8dWK9#lG4dE3Rwg_xjfr#-3-`<I4^TFt>f^3B}7_aJJPxY#-lCGgYifoLi@ye
zc&hSgRv@&g<`mQ64bCSA*z(l&<B0?BIbi2Jm8M{+Kw9Hb+)X==w1}{DQ)zCIr=}z|
zSi35?qwSa@p)1NXmQ37lfwuuN^3tcM4zWm6N8>-k4a?(Fwm#~ZKg7J-Aeq4m*vt8F
zBk^vX{gAqKMy}l%s$T_dL=Df#6_aLJrZwt1p4gcP1+Qyx*E%_{luU*L70l4RpcM6I
zWSiO;ZY4=KNj%k=aKnu|c4<4W9tW*IYCCruD>dekg4VAYe5~V9!%3QyCbU%rn^&5^
z-+-1iSg=|o{f7;_UV&c|Xpywre8Y^|{coIq6LF;5FE6Ltb^9fDd%%OH+lXV}gABiP
z#y+OonJ#Y%vjlHCxRYTe-=~o55jyv=!I_X$PNpe`GQek3Omn)y3Z&A8;Yuh2XPI1q
z2<?2imB#jrECILKnZN*0d=_m~@kdZR9K1hrtER-$v*J_YQ+O(87R6%$hsQuBIoew6
z<7{b+f`u+q+$^_^M^UiRkDB>vu1kb#Da8YU^hf8HhR;UBa2fwQUT0w1{&&l?RoZT;
ze67lU6X6<j2`H^~L?%S?(A3i1-Ol!J>FC_l;;CC*y5jCr^ECY<9Y0>jUO`$(J3v!k
zQe)ilG4lH5Y!{^Y_4y;X^^c@)Gx^$gpb>>t+tHCN3q(4;L7>|nKc2F*P`v5%p;h3W
zTIE*VxExFXV_ts6)O}5I-R-D*Z~Ao}W(IHB^)nD|Es$Im;C=@`jF(_?p>59PJlH<Q
zV6w%jL47EfVpPA`IY;%9^<aP^AWa0#1INNCq>jo$z>nxW#{AH{&bE2W)+T{w_=}dn
zwm@H5-_Zo4f_X`_KFu@bBkBu8MrHZuYYkvxvBmc8sXt+Vo2h$mt_k4r_A24nFAdHv
z6{CH0RN@>ScMzgYOKEl3W$r0Pe1Y+9cW(u^O16H{u98}tVYZ}CFpfdOqpYjBHFU;F
zJqvpbE`P}k;=1Gzrz>SpZc~S(^#X_5=8lwfrxo$SD;Xg8!a;Qkt4f^to44SObkC&I
z=o3g-`I1@EDj14_eW{5c<gj<5H>nFM1`Aae?_!#unEa~@Y+8M)mOZ%NF&Xv!?7G>{
zT{owwGX3hNm^$jFbSdO(>hHVqkaE5ambc}Hn6vuVF)Kla%b}a`b@Bomc(DTG>*Sy2
z84^?se_`%>!KTt+7s%2tv*#KO$gK#FUZ8(w;BXy2=6W%Ek+zmFF`<u@dc$x8Md4df
ztD>|vW5m>Ey+N+UC>-s}zto#iaRl3FUB75X$&eq764tD>xNlzDbWl-vjP$a-Xr%PA
z&V>@D8P-Ym1njco>$&W0qGPzPb2(cojbU1R*v`@}OXfGNTET53*rPCicEkUR7M+&R
z^a-)1tKFB8n|AN3glc*@Z5yq&{Q)>)jZo{n<cn@gTw4dBPrQ9`euMeImROrFD39pr
zNPqL|J=X*x-I)Wi`4hwNTQdET6FX@u+U-39$3|zmc4}b^_z~@N;eLhAzSLJ@drcAW
zW?}k#v#vKCB(?$O($Ym5dH@ZL5xf}U^{h<*Cj2@0SMV8mKpdfpIhA9npqyfJ9~SW|
zZ2U=z9{~2*_^V93Javye*fY2I_~zh_eJebA+szw{Jsu_XFWy(P-bd2km?YC(@!l!r
zZ9)HoD3`q-9#eSng^u@y`bsvtfm&TXLf_2T`F1n@!Ge2FJCfW7S`~_B>9(8TeOct$
zZms5wMGU_2@qqsu017~iGn@;&8fL4)EUK*OT1Z<w)y10WkxMxUM}e)!ZKFpV3&WKy
z^B;pqb_6(=AAf*vp_AQnIrn^O-<uoo^ly~83HxU5OS^en^L*A3Y$eF~v{oT<<|*5m
z%BGhyOahpt($IIF>UN}QI*B%zd0pNHGX&1}YJKQW|IKXhBmUxk7bQq-L?_O<x{Xb(
z`J-wJRqchUmS);&PjRa)RBfH3)gELL!0VNU)|;B;?p!7KR6<9UK=*A4J9tsUISSHx
z{GrhjGEIV52^HrlcbRg(3-FD*cGK+_-Q045tpfV10QbGGNoLEietXUIiH{fOQp-Kz
z-j276W_Fs>i6<MojOLXUw0vVJOo7<dxdMYu?=9lR>6athe6gy+D*2(V?nHQzq%Lc+
zfgVd>n)8NvyCub0W?#~r1-v+)(Rd^Tk%DdY;NgivfD_~G36J~EnMpF5bO#T&DUHsv
z(OhSfi`n)_F+AfMWAiW|QOZv<Qk)CPiT!UGkD~A`Fsjbgbj!5LD3-a;!p1l~Y?3s`
zYhGyoVdh2c@4%bY8f+K?vWFHo?KfmPn7Z=_sPrqrZ(H5^1!5L|cr>%8lk5RRA<D3%
zVNbzw|F_ZGtme<ji`KS)oEA3EnKxYBv-K(pm!$DLXaW246zAJF={5ERFRCZhcGjEQ
zo1aOQh#Q%m*x|41Ts(acEoS34tn&ocVt55fY0V?S!2Gzs83y`c8##!KH6coE`j8yI
zwSku_@HPd$YWP9?$9&Uv6gC*TH)H(4JK#RMK_={xu4ZbZB|145<=3DOUt%6NzW0&I
z^x^G_mg&Qu=NppfMIM`oBe85S8@GpO6eTZG$;R-%pr3tRn6a?pbb64=)u1(*g}`jX
zOfl?B{q-f$9rN&$wPP+3&XyknaVxnDyCf!CtC4-kxeuLg>W%`UCioeQ_1DhtMvJjF
zJF0=CJ|vb}I;w3z3XD4*6=k#rLsA-xMX9)N@=}#;CR{5cG4V|LrHLu1CMV%!{;CL!
z+|-C8a-0VZ%3w~F_Yz0H={dE$p8bF~#n}4ZyLVwIn?;`@6xQ@KrZxLfJHzrg-y#Mt
zwl;PHRWk*zNUy)U!4JE^94J$8^#q%Gnj0+lE}GfZSRSeLOUdTk05dEv$C~kfHpQj4
zGwl)JWTiDyxCdYOspF+X0PvPcjDdJ2L2=;kO=oBnDe$Z69Jd7w<>)^Zq%XcCIsz(a
z7yENt(3(OaaOV@F1!*lfB(Wf|37>AJpn0}MH^&OPn1VVBD*XU*<+#ig3;n7mO~QgZ
zZ=p-~!a&m{63{C*UPOx)^^6sy7QIA4wnctgRFg*<^=aDgOrOv|Mp3J8FQHbO($F=T
zrJ*rd=0KrYH;X(S`mr_bbuy-73o6Nv&ZS3pAyz+;V^|VXKVClF!VDHcVX3{>MBq3N
zL7H<I6>0~LZY$gPX4Riuvmi<kkm<=}Q;&ldokMl*zY*XW#w{;_2vXkLiw(|BI1PgP
z8?+Jx9T5ZhP=%gLDEx@l|44cdI&av3@DwX!>YM4S@0&#*5_N~J{q>j5l<kIVJ6wMH
zBfZ+1%-F}kN^8HfALg2KQZIoQ(E*@>qZS&{w2JC$)(9%btZW9F7L_OaTO(NhR=YNc
zX%-|u^M#Z`hKPZGYUDPg#7kv7%MoX7H=I9ias|gSe6{wjX8#I94X^4CpC+|<QXzTe
zu|f_sh3s~|0y9gG9g|U-Iws2sy(Rt9E;YUs8F~l>a!wL$OK)6ATN3n7+lyY7=>-YK
zm;`5-^j0Wx@Ez_<KxeVL7~h)Djer~uS{=y$c=TaF&F=1J-O0e{PP$eg)cYfsh{@o-
zn%Le&oH+FLEuZct44<3j90&LWVdUv%M|u0kW<+Zi(ysh`4+bzn9t=O`Gx`bEsa=7{
z@N8f3mfV!`U-w*-$WLQG1}l=b+83T9Ob~&&h%JeRRioYevfPpHHys`bm*jF_z%%(b
zOm1bt*TZT<PK@PH_?E1iTyi`8?=rJbA~xAHY;bP*LbE;nPdzn<xcP?S-(yiR(qdH?
zF3ItQOLL6{W<v@-kgkYYUw+1ytS?V%$@+2^k;&GVx-f41jRy06{RUg=CunG6qsovS
z<rYUWBh@53O3uzR!Ah8JUXDsW>y37N;BIxi;#3yaW`o3?yfua7^V^H}WRnd>e(o$o
zKTPMe=wG&#{soh~I9TKUBVarzXB6es$k_nqTX&$9qm}O<J0j*%EEuef=1n%d#)e;^
z@IR5G=~pEFvklyYi(dC8n148aoBp>p>NKT)+|(fJr0zt<Q5t!6vVWo${ppOM2x!-h
z$B1?%I4EkZ;p&ZFktBU_Yi9=ulk~-{ZBur<PE+FkbRPV(>F=jEnEq~4WK>@~nJ6m{
zZhYnXr1M~gzX-Pu`!b5LBrWDNvDcYR5{?;%&&i04Oby)#w^jT`rj911^d6N_E=`^2
z&NxnL0M|wyHA9JWx=@Ma`Ah^9Jt7udbu{{_=v}<hg*7k2(F3rXr$3&)i$riKO8N=e
zVQ`<Cs<w1!5sH27fc6cJ%izW&9IXo<bPa)z7~qi!BB0B;d@&b4MG7{8pEPIaeKcnT
z%_*7(FOcF)n@!IX_NPWZ>6-5&MY2LDq`6oqr+OxjqFE4}>u+H;Sa&AP9e>S-<y|~g
z<%;K0dbR`$QXt=q;uA+{6zh5-IH7$g(&bnk*EG!?yOEKVhS|{Ev7_E$=%?OJqePR`
z|6pJK7DZRS>NOK8de&}VoC@wi4>1(ykY{Zx#gmLC^vj&5mlyQM7yLH0{0v{WSB!75
zNW0gf{Gu<6ueL?4A|q0Z5WgVH44u~uaXSaH&t1S27wU&T;=Hc`-ojj{`0k38HR_4j
z>qOOEV~~DD+s1u(-sXi9cP0$oLWT~$yx?VoZepjM;IM5B3jek{83XLd?N`L92zTg+
z($GaNWL*f2(a<4oh^h%Gr-z-tf4+y|lcdrPpJZbz{$$X)olWovkD4$E>@xS1joe~b
zwvNA3<XtAWnyOPW(@2mwKg&Rd!4Tl1NR<+tw*$;e@jPG{?n(vWET_JoP@RY2Nb35v
zORekIp8xyv-^2e*{&z^J>vsVE59I$rphWsRe*^tO{j&Z!|6OK}-N0ZD9Ca;IlPWad
z!x43_{jhE^T@wg)u<303!+B_6LW`WLCM*{sgVJL8&LtlTpHAcpgmMC*fs974V{E=-
z<M{&Q!z*@1EZ<?u7eaG1R2fFv3>EPVhuIA6Vi`W0rizYwB}WxyyZNrN`L2oQ`}i|c
zQTtfFzbRiRo9(%q=X#q*Rl1$}tIe~2EYAd!C)zl<s|GYT#4}8=89cEJr7dOnO*})X
z&5#+(&?zayjkbny@eG}8h7Pd|P2og%&9E8niDzhPH2ppxmf^*gGW;=~;YFL_z*vS*
zQij<|89C}Nu?(66lWm@ZVtLL_%5!^C;)|3xxHv18IETc*vCA=uC^l@O0q#RElXf(i
zl16>uDu)iF6!_P3@x_{TlZ@GFE6!&h5#SxXsUCpmZbOLRl4pxjhdO^gR(jfAkLy8b
zE9|wPrKj`|cbA2Q*&9SS_x{EZcxb^TjRUd8GfT-DJAFqFV)_;{FIc?aHVPD-M+_F6
zOr0qey$$tqC1~mGOglR7Qe}}|Ufh4UXJ?#WAXdOZ$;YM{oxkH@>KJ3|sN+#{k*Pz=
z_i(C_#3m+RQ+@U1L42ldPEwsSO=X=!=Fw};)A2h0bcCt%2E~LrPc;eBU$|br7cfE`
z3=H2MV_`T37-rZ4iY`om;b76sQ0J#e1`LPj0Ss#t8Z6j;tYC;w<<1czoB>0lFmyTt
z<VzaskH!>JYx12hn?>+L9toYCik^(f3LBYuRUFFK6Ql0O`=2B)K$NBh+8ikhVVS~~
z35jD-q!3ABD^KtjtvRnLB&aNSYP5zZ-H0)iKDV;qUImqRrs7AaSc~IptherUASK7E
zyV#xg<*_i(<?}NmosMTY<G`ffQ_L#LaWG#5zlZsbXUfB@<o=gaYN6G)YU$%#0;@CK
zF!H{<r1l5GC-@_im6rRYJ;AO@#5VI$#3d}^+Upe>Ia`_{xM3>CQ?-*Gn3Zd@b1E_=
z5#<Ybs*6=0*f^)e1hn>4y=uC<phDd}Uqf^l+Et|1{>p+U6lm|0R2h9uS&hbh6uJZj
z9Y>Jg`1OI)d`SxiJ^cc2b~4J$aB_QM;fPuS<_l%G^C4}nEI41KmY<^RTM5vm4{}K5
zmiU3`mplb=$@ShPuFVC^2n*5VlzR)Ebx-Vdo<`P7FzLkya&OvOg7#!K^v#XAfXD2&
z0^wu*k?I&c+X!s5js<6nQ^vjz2G-v9p_`;o{c*wO*RJa^q{(^KjLi9zlrmu7&C|fu
zx*S2u>@=AmAp4Mk?A7IhY+pfk;dTSrlPU|&QJ|-)k`y|$rc#}c$;WoipIWD{zw;G^
z?AhRC(`hn<MMEz@&Lpz3pq&a~dlN0-p`-$4Boy##3W&0DJLUB4_s7ZhN7XYqnFM<S
zjkC6U?n@Yv*6?cZq}TDNkx<0)EIaRG(#80-Nc$|Rv+PdegzxtBZ(Bw&&PXQxUF`mE
zOFxQEcvCB~>(YP0TdKp+LuzQ>|5^@jyfBN8lybRUYfrAfk!wKXt7LQP|0r5ssc1XD
z(Z2>aj)1b1vnFsaic>wv97&hBKFF+gI#%x1A(w1rNuYt<(!8Dy@+%qX{50fof#P+d
zb*;I7UGkHhA)Xu3ykB~5McTeV0uCEp{R_=^?t#NoP*#eweF9dtSKbIx;70F2bwU0Y
zav@{RJ%l<Z-MYKE(oXZkX^xB*vo(z;wATorfna0csk$9xludJv95L@mLo5WAOnOU|
zf(*6(V8eGd%y|FzMQ$trMf|}JzZ>G&QY)MN8`?*>3Xf;!yY2I+0(wR4o3ZiHdPB+)
zn~H~c-frY;LX&Ud$LR{`udmOzpf=qzcS6Gu&yp=Ep1G&$ndX^$UWd+}x#JJ*%;(pX
zxa5m8if!WmmqR>jHu8TP<t^p^JGe(o^948KAfU104gywD_M-m0EK>P(+=3Pe)#IM6
z+YaBt4e975^E)TdYtLNx{V%v3igFfa`<j<kaINe1@^Ma(GyGGvCbWSpu+j~Z8Ed51
z<$j@L`Z2RpQ>xbZxKjsS<Dgox90grPKO&v4R|-$@Y6<H$INiynw`S)~L~SNa!`)G(
z4eIrm5w3g}ju{nW(>^^n$^MWEUcN?3F-gOjxL@)&jztDJ2GW-69|wiRWV4MLoMjPY
z01l;RQ>wYrp_c}ju^-^9XWt;guUo*R=%HW7O;vMy^<Y7#dsV1R2t40@jkaHx*sq~{
ziNX$*p+R3U;0nk39$rJ(M4f4?!y`CxfW$@H2OryE{B>s8j(1aG+z6x+0^Bh9D)p#V
zDxYIkHm8kJKxQe33zTp@-`XjrKa#2$PY%Cto09U3Y+TnURiRNu#XS1{LOmxK!Hhg>
zF8nmzX+w}vy^Kh-U>Iq}r{g(Dj$^Wkr~Y7;_OF00K#>0n=RFBH@!kVQLwiathW3;S
z#zri73+a>A%lLYae)o0VMUUsO?}|gU!DXo&kha$=`=O=iU6{J`02p3VA}z(DKp6Gt
zoBqg+o#OtR@~{7{qK#+n{`qfUtfFg1>o96qJksIJ1<O5i#ot`qfn^;?pUSe%*^WT!
z_zt)+TkNS`EYMH<b}9q`@A`~A)o?BH(vt9r-l9l<cC_udvJ#HO+zfat2`a8+Z^Yqg
z?)MO?R4<a$zmGTInZKwg;!A}sCiJaOefjTrCbL09)A+8z!(#dJZov8ENRKgJQ6}P$
zT{+>yByppwrTCde;f7lW4gzy<LY}+ihdI4CbYdzB_f)ejXYotT3=K=oDxNwlwNqVj
zYG=5i-wrqHl{WHfZ<awUf>J~}6p|x|W~)Riij&|>m1cxHWmOD`isYJ<1Cd<cmPB$N
ztMJe};Gy+QzTTJr_mPJ)@VYU9fqxlcxRuI@Wg(?H*WYxGT1!J;NHXV#B<HH)Y{?Nq
zot6b~Q4XszewlXY3EKe?l3v>4Mmm;114iwn+9uo%7JL9a3EYib(n!~2*_nlUfv80`
z>cKxH;Oj1noR8V4Wzk&DdK-Sb4Sz-9MkXiI$eV3oodV;6w`XjQ%WPzygi7SL?rvhu
z{7%Ae$)tP}$7O|tVZlDazevGhSTHhoPDh1~4N{hW;!Yu;KlC2<2`H!Nbd3T;3*K@%
zlZvR)@R?bqk(==-z(wOlKw21k1~Bo5&m_=~TSYf?(%Zz5EVN9^Q8eiS0<|6m@TMBv
zkgy)@ddjXx>3=cLqn>*7hpk6vXgzXRkM_}%^{80u(RpS)`a<i`*6&@9eED1UT#x*o
z3Ea)RFc7LM|79Sw0xyFiS1d_AJ4g8$&s^?Y*qZ9+5?-?&t*AgTv%S14z$?xFYV;9+
zv=KW3L36wFdRGHYe|Th;j#a$Yv=4s(Zy<zIk4VR@0F;$SaKHN?0EYB0+%HS_me-b_
z-E&h>O=rPY;`R9c1@{VlEBo|Jsp!Nq1toK^U&79b+hn_Y%MUgaTcTtRSh<wxHH>Y-
z-M#*9n@r=bavQff8UGu-G&MO$Q=;o%4fa1U2PDY?oh*^fnjz$?qH$uDuV5J|l`dbl
zm4Wbu20y-SJx{To$?x)4=&@SpVZ)4j4u7O?L22k8F>YnMxRrHC*~P7Fu+O7zf7r#2
zc>l$(Y);}=wuo)mlV9-(KN`W03_a7hZa#28Xa1gdiZ~X7A)o&X45@=-ao|`!ueg0L
zj-|FVADL$z_Ebo&sM;Sfg}U(Y%;n;_&*4{=`I_sHNoEJ~*N*%we!qxkZof{Rx&1rk
zFXN^i{6+YB_sjI$S-TkPmb<aNN%p@YF*E05oZnxEta9oS7UcU14J+e&0L5_CCw@;c
z*S-~;hy@BW{7M7^rQwpS()_KS$w!#US^};6%C~TRdgb^Wj83uM={1)jmGw@qXEnCr
zKn|S1c{!3&4<dkKjC>)I-jAB7BH$@OhhOjp1{}ia-Ojg&0%Emj1(3&p@c5i8ZIG7m
z?flssFo4)=qfEi9t<Iw_=+34KuF)1{Eaes2y8aR`Z1e~U$`$IL=?|A8t?$$AB_l)R
zGG!%(NfkdsTy}B(_(QjvE0_-5;R_#hs2cXQa}{NoBFxExEMNZ8k#AB{r~6H>iirik
zk*CK@tB7p2B!BHJmxN0ja|*gQqK<jepnf;ljGBu8rbYeEL?+G0*nT6lmD%X7hP9)w
zG;5y>!LX)jNH^{{=~AotZkH~E{<-HPS)Axl&Q{D3!{s^ZP>$`;9fzw!EM|t!SW1sx
z8M!KvVJfn%&yKcUYt#7YrhzeekE^c`Veu6h&SQ#HSC~8k>X&$fuvhZer-{3Kc50E?
zQJ6pFIizB-MgDPfXDns;nG6*iCE^<L@W@OPX$&}UHB;b&*c%T1yhC_rAE5E7ytyHT
zJIPyL{8pg#$X6;)u^(UZ(F6f)#SAILqwC1k*1|81hD3U$vsGRo-6(u>cVj1?mgM-9
zB*(ChSeK@-T^cn*zhpnO-`xzK_!_v)4Cg*&H^Xk+hW(oL6~g;E^rhyk7(Mgd9;avV
z^;G&!IemmvxK;9)KN=*^R&7uJ`3zh?IHwQOjo2bQJ|oL{)X0BegyuE#^L6328vMrE
zdz^J*G&Vlsm2>)syYCPHas}f+_^2s^B}~JJLK_5s^Gav#N(v!Cp%NGy>CN(`TPQ2e
zJZI(#V(D(7c1FLz@DM*BSmbw{0<9NpGO%pLJIFxi1{GruqLwey(j<!&jyL%tJuV?_
z`AIteIx5@fBBkbWUh|urhZD+?1~il=be;_b1&m9iS0yWX8@Cwhi)L`eFU)~eI6%*&
zPISNNti=8T(SWcK%!YAXHu^@N^zDcpM2LQs=}Rrk3YTTJKJQl<A&y~@Qi)J$^vkF#
z&gqOdDDvr=D7F+(&$F{Ulg73voEX9u5FmK_GuH79!lgzXQLvAQU!>#ud)<le6x;C4
z%uB*oM#D{Q&M`!~@&)vXxEW8Me~~NuJQ-4va~1N7x6pD+suLb;Bn@5UeW@q1pb8z4
z-Y4HWpC};p-QZx;FD1@Q-;!s8<x2l61;Y-ezvl+ZsN`M(joQuG)*1Ll3g%ylxVQ0L
z>T=r?)yHy3hUoi=cKm8dxbsJ}${c0Uq)oaUNZeUmF}_?V_0JUJ3yIX$BYruM6ywXx
z?CY41RNAuOPpJ&C;OiXBXdotMLHDH?xAy}r{4k9j2y0P!21=Rq9Vgpv%TD)9(?b>(
z+532GCsz-old`60+>8dDMEAfeGb;O#wq`2RwyBW7(`_RLBc`@`?}<hYP;jgbzeeFP
z9{XS$c$ESv#iizFlS9-zfxm)YvQadQbfn;H8sdDT8z95!cL7>(+f(%i^_7b*MBH<H
z;Y@j7c_$`M@#P<0f%ods8RbXmf-5dO^X0!jvXS=A`?EWi>giF;3nz0bB?B8wayoEh
zEv~R_sb~9|ai7O~Bz@&c37|?W;FH4)K0I{=tui~A%NrM}p^<{?A2q=G2k2j;OKQ#W
ztAp)o_8O0xZ{q#GZLj_(=?|MLcQAbip!18-tE6}qoN!1<@X?N?TgS&P;6A`#=iz;Q
z-{$3^sWvlBI>5Bt$)(NCVN+CRth{rP$c!Lc<v0`xvaMTlIWV96-EM?|S%4p5;GuQt
z?RiUevN2Z)zBCK?TPYw{k{3Kl@=F@)NPo#<XTjvX`lIm#j_s+U=NhA}I0Cb?(c}`(
za5(v#<xNOG=b96q6ZkgfP_pL1Mp5=fF@Jc``>#1!!WHD_q_cF+<-iJ}I~_u-KA!+K
zYjCWvsI?L!`dFy0of($_4dfJ%5fajF-Hk2N)j~;|4!Oc8Je*r`Rq5PTqg0TGJ|C#3
zT7pm&^xFDIvmSPG%gDpYLME$HH6tv#i|HdqH`?y%l<7cU<X(HJYvf69C8cQ1arvix
z`VZ<pud-n6W#Yx`1+v?~6&;zo{ZJVm$OWEz=Y`gnvAPFkM1Z%U&V-@H?*P(z874_g
zOubx(;dJE82&`MN5!A1xIOlZAk{gPtrVj|An$8hCD+{hx#a5tUQvcSi`~WgEf+!^E
zW7^im3XMhR^Q`R1E_v-AQ~4)+Dvq*)E$j&@sVvyfWINIqb}zII&+t~Nc2<l5rmC^Z
z_;{8YW~5>-+t!iZq|4ti@&I4{)=@aWjJCVJ>6prbXD$_}4mU-VsfcS#5y`pT#@Zo8
z@zefj_T5foy&%fX-MzkaG_3DAhXKOMf=g7S)umJxoNLNwMJ)G@6--B%nhsD+yADxf
zhk1vp#RJuou`sqeaLDz&Zk3_eo6a-W$XHys)H_1brb%}SGx8fp9TQ8&p(2-SXusq!
z3g7yP3Riklc>dzioug@G9u`eIQ)!nd?H)*F?i!wu>6!GHj3lDNl;h;2X=rY`#B{L_
zBMSLEk3JgmY3^1Fjuq~9QB<tVVf1_Xbg@v3Hj*UPj4*3PcyI<*cHdc17wgL<tPxE|
z$LOtXFd$~X9c}PhHTXa^IMm&1_;%xube%8a4m5Z)*7>#97$M^CewrEnK>kjDXl1FV
z?-ofy$Cl<daLS#}t?cVZJ?Dc<VjM;U*QZt%Jb1AXqMHt6N7`RaZBUqjJOUgnGh^3L
zA6nBsa(W+3J9Pc_2hpy2ss=+c8!-pwyoQ#{nK1eq+C0P7q;~^wzTQ6Ix<!l)`=>(Z
zYP1BE1>H=&P}+jOK4Nw?Ydc!WZyAzbPxTi<c8h@cf7*`^3P0B5+;Wjxteo?0&PT|}
z*Hgq|-u}CbAwUdZq43IQ1iXV<sZp0oayE)ZyWlMfcdl2uNP$i~kZT}cb*)qJl{1^j
z<einnwu=>EAd69rk^bQewjpR1sFgNqh@xl}t^9{XT8nJ#Uv2C|%$}Ms^)v;{emYX{
zI~(ywMNA_?>h0g!JmYNCQbm=hZOff<8*`<NdG$eJ+L;(-yO78veUS{NwCw950OJJ7
zd3%6SsQNhlV|09*omLl#NN;iW<*BnMlMJ-kakM1>+6Qp?_>O$Aeihe*I}abgI4UfM
z1fivzW6B|W_~_>5{P#S4-@`qs*;9cAY!Ke@wMBhg?{p#>Juca#lUQq7u|%&A^J>WM
zW-;L(;>Xs>ZY1YkV$AFBGmzi>5M3S^>+;}9>awvn*qG^Tzfj|d!5SDMv!$acT*&e(
zo+x|a@w92r<%<I$I<U?s1Ad&6$+9Uf&oIlQQMXx}IGAz+M5zOv+o{G?pu)`9>Lf#W
zUe#0G0-jWNp{;HqUsPA&t1KwIQ0RKR_0WKp<{hFq-S109^0L8MX0qdKxgd*98nJ{Z
z>l+6MrBR4#Wq&(jU}N_CvV$e@`S%*Y{9U-vJqDPwCMNMCZht_A@BHXvxM1Tl<E;_<
zd9X|V7&kZ>Y)!Q>H!R?pxIThklL%cEYdK$iff*_%TkVXTm=z_e>!1g^8hz|K_FkrN
z?r(F)&u!!NbckuAwIhK~dkk%5l;4M+aZaWJ<6o_lthgm<yqXX}bU05S%BdLeZUi5}
z9j!Rkr>{Ht5|Koq9BE%Gz`a>y_!WU`{n{uhU7lpYoXG#k^E|s)%<FL40(6>Nc_tOH
z$Q0E8%&5Ozi8d4YX@BJUX5Sl|j9SAl_50!EFcPX<Ud+WVP*$l8LSOk2RMFJK1F4&J
z8x#E)Wb&EhsY<Q_KA<*#IL+JFlbM#opFmEgr(1x%W;Zk^gk=$wmD$(Rfe8Ml<#=)w
zk)tFXh#^Q9^l_)1r}}c{UI6V2mynd^@AgbOmG8#ER1jNVYy-sizd>+m{tnON^=<<g
zN^SZ*EM=Kc>%=x>(1r|ZBDh0JQvC`|%ZTffP}d)p8pz+}4}CBSCoHtoC{uo=I@H@O
zfL6U_aE)*!m6dt3kxHl5Ss?j5#Z4T-S3#{PXjHxnGcvc1mV!#nlp<C{BAUHRVd>hW
zvs~B4+weG<O4SAw6SfTulk780r(HO=*liceEp)sze;4uw&!lt!&GAZ4^}Sl+l3X8a
zs$PS9lWA~9gzeg=bix;EbRN3JtR4Xyb!;?BVu~p?s;7<0j7G^ma+r-e*hYN?SuzFE
z4d)yi^{rhZ-it<ALEnisa;=S=AC1)XJ(Q>zev~1YfDXuscbNT1@xmQC_*C#Go9}X}
zt=UcoY@zGMlQXvU@kl-v-M=GNkRIWj=zXeJi<TH^TCW_W;7G!{8awv{b85z-+1ZFI
zuOxHobmz|`OfsiF)HdeN57GI!IrXn;3@!c6E*Fm~GHOnJGEq@_lWgyOd9;0d-uD_L
zKaIR)r#ZM_E~QPr@AdSAh9=zi`W+dP8avFk=eq}!8v7`2&uHw~Hgbm|qm8|ds5nr+
z2cEh=cQv_b>YLnuo@AbmjH21OcgY^`Omi;Uvx&>?LUK`Z6I<CPK4~M*Pi*3bn`7|Q
zes}0Ox<~%SGNzSi_Mywww~izEtbxdxu!_4iEf67{DS28p{N>`KwS$$<KZ%^41`y`3
z>HGV^4j)DQ!?E~7h+k^})$4Ue-XwdDWSHNbO3mg*vltQrkH<;Wn8x*`;t#m=oaNy!
zICHPOsa*;uMIr@lO`GZ(uPo&JO2U5Fn873S`F052^oEE;iUGxGGl!L8^F#R3cYAS{
zG>pw^iS8Th(*>=&?4!;M59^WxCH7Q3##jIF_%4}&*1ZF%yCmt9oZL9%HAc!}5a1vr
z(Y>cXE1ec!fBx%Z&d~-ZpzRAPcbsldQYJ`Na0V|V7?mB}AJF^PWSbXh|16+!zIqpU
z*lf<IqoWISeN)?n^qXz^Dy3gade8HxXHEAvFXg%_KNQQKztS^_%}Bx}RdJs|f3aB5
zn!Gh<s4cK-A0np&oOju%4vOlh_F}UGbG(>X>`|?}9t&Yf8IS4DFpTx2l$WrBn#7oh
z(BvBzXIivZQM)gsqoV+Zk|jPiw(9i;Mgke^d={%knlocNl?UY>g#|uA{$l0}qV7yq
zbxxRhs*X&BaeJrKWiX;V+jgh5$^={Y<0<y{b5ohv#I}UMW_YUB#)7t!A6Cv&^-3)0
zNK(=I9{%vH8E~AB9F8o9qv*J*9FZZpH5b(sJV1Y)Q((wc!SrZWGbx;B)THncNrFs`
za{QL_`;`GfW}lYK34TZ90=gC#WM%=e=9y-l8F3#|51l|tQ*T#_dG%!BN0Jm1&W3Bs
z%#8__pIm`PfO%W}dj@hCC0iQ*?PxQ~kJ^&!BuL&YkbZ2#BQ|`w!i{utIn5Yr1D7aJ
z(#fW0<MT)NJ0SvsQ-`x9?P2PrnF(+7VMNevB#kHQ(J82AGMvSaG&j#vhq<LPHrt1A
zKV#@yr;UdOUG<!>M;rZ18i&kbFKX5!P$7qTaS)QEY)uf{z!ZD4!c!VlElxDb^N?w#
zVKWlC-2|cBsxNL+NNfs|GCLG#!*rB{p0{G!+=gQTJA`;z6+f{MJN_OZCS9<F*z$^m
z^;Cv?WWi^|KAz_@bZDgtDRasYB#h^6FKh+rIGj%Dt%Bmeb^>RaigRGUeX2NH0))bF
zxPeUEE}8DBDkFX5#?;U=CJ5f8zIeYv(b+~1F`@zVki6ot(6EGqHY@%@syZqwUA3i|
zuC^{Dr@Bf#txZ=8^;QgFd29^$z<XKan<P==ZV#9WI@t<<s;OZJbsjK;us*!(&oeKv
z&Iic5SDkD)TBvi)_||1ZXiA?U)OtBDc|0#}x=>XQ_M03hM&Z(Z54e=~*$XC!0#da7
z58=sCgoBQi?+1|2+iw8v-j7jP%|LP^<_D`8%d9^b>>t7-VJvrzVk{HsOVWCQK22mT
zukW?e%riU;=jAv<cIE~!2rKp;!-0<nO}XudRARta8}Jl2YJa!IsY(^6j`JJ*!N_~7
zAc^$-y6oaG*U16H6kx<+w!Av7JKVaJ6+ZYKE+(XximK1**sDzWL?aYQ{~L4n%t7sD
zVRGF3q4=-k3{&5jP278@U5nbpmsRYUTM0)=a;{uv167Qtq{$e>HO?}Dz2;O15X2!^
zxF5c@(MUWay&i^#cf%#r%W)<W*>pW;8PSn#$KWx1ex8q`T?<WZ!&<7%Ih=Z+NpclJ
zmc!&`jFuIW7F_|a)atBI8f@p!X9?)_t9YeO2kO%}<}zAkP#D^GYKnr$rSLrGRsa+}
zJqNOUiewhUDd1QVVwrjjO;7)50zEJ$l)dt^CEY6CN>THFr=q09*ova5>1;AJ9ud#<
zR6J8!JkyQQOk%v}Dc>Q8;Xh8uS3y2gLwFn_>~V#X6WoAEof?n2f*LsmuZ`|_@&<di
z@>GAt+|<GcHjDxrFxf5+KT}AcFVg=zA70;$v5C)fI&y*wFz49)t*?F|t93K?FR##v
zZUE}XAHyc!b$=faeEC3!(WKbFfku2~QV?H{2&ux)9+;wX!!r)dQKk8x20VSSxm)4$
z^jp!qv^f8(kuT^Pm2O=B@J;q;?%rF7>D@xAUM8=XNYAsu!gap-rEc{OTev_sPPlJy
zo|+*cq^Igo6)Z9z$mclGFmG1*uLGe5-`A_4x!Esd3)4LPmd!sxFWpu$O<o6}rJ)Ve
zi?-Lxm}4EwHY4_~9t;zfE{E886F`F|ZEXH7?q81-?0-wtqreG9wU+)H$f-Nt1z(I;
z?vfoKN=IUQZW4?%n-lnjlF%mqbxQ$QYZpWlAsd1GM_LOsqkH-y(^?CXuyy5Kg0~2U
za!w!2=jB3P3<md}(OLi<&BR<;)V#DLe{;p`K>lEFRt0Ct3r@aF_$36*bbh-FOp(-b
z$=wEjC-4mo@+HU`X`=-n2eEV>y?T=w`0{bVTXLxMBPhM@w;f(+@gek?r#eUjqxMUo
z<-Y6Q(D=LE`5Wm2`A?dW9?i^o)*y!XuIykQ<w>d@Je8LaFTU{DEMM=J4SJ|ITbmaa
z<-b<(1j#)!7MkN1qcf_|4fUqGPiGU1Ip=(8PnnZl-YIwr#q>4xU7Iy0`{@ZITRojU
zrq!GY+<HD0vk(6FG-W(}d3md;3;8@A8hHX1<|n2UZC}c;kM{y_(Kb)925sPX8tbLX
zlZ7;xops|P>S_UziWUfy{M}v$XvyQNfy=a@Lx#-}lHr9FhdrHb+Tpf*^s!CX$M_RF
z#vl$*8y@dP#+(V3pNMrNpR*|cZbyFqt?5W<{uWO)rcV%ldIT19y2KI!iySwSiqSV`
zOFUKU(Ltc)k}X=Z%#u>dy&TUJ3asOU;QVU}O?budM0^g_`mS4*IQ}FCo%b65<&3|O
z_)g;O`19fm-;^}|pEuVQ<=1Kakq5qxj*vDLhZ=$nZM2sQ{vBuKr?sm1bu&hW*JPC+
z6kOU$H>2cl^i-G7%f_xGFAddMmm{6qK|O~|XC}aOPi9MxM;-jd+*&3oc!4U%is~kI
zZtFQy!TZ!ZL;*V1+{t=;A<>q%8a~W$qib#Ci^E|syAx>zf6m#&n&FRgh)L@;{+FZ)
z$Atb5)1l6sEXmySeh$RC$EJGbwhrz%s{C|OvT9HPEBOOl`N7oU?wPk0ADfDaBJ#W|
z&6P<<m0#ow?mF#-(`ZS>+NQG;$0OjW8JFr0*01s-{5U<FcB+`p`IMP5W`=)IT2&ym
zp}FC#aQUfRDnPIEX;<TFVx#jJNl{&KyG*k}ZE^aub#Gp2+SrJNeX}$C@#bceTe9}T
z*+9-cu2dihg<fl%Mp<Wthn<>vR`|SAkrTe@sjei*RCc6Ev$^TIgaq8uXRwMimFBi<
zDE6w9inC3IO|%YL-*n39YbbF|A8$>zDa&XCJN|=MXV^DiX#*K4xZ!rGj5aszAMf0i
zN0T6on4+nhwj~QUXVW|J3ns(;WY-OQ*ODMUeg;&HL+CbiazFx|bjO4JfjD~(TU+T|
zhc*g;G3%En&mcJnF1>l44rk}!j3g^GrqEd<Iu}W=%DXn@>D0+kGjsT+g3kes51<c_
zbwI-UVC3iK{Q1O<xfx_N=rf_{GkjK#_hMs&1nJxiyR{Mr2WyrEXC}DlI#aKnx)>yf
zy!sJl-lBfjwE4}d(ei;J$6nj#?6?yW0nb}_hh{>07i=af$I5r>pC|xc5xy}SKhLm>
zqw;z#S33~vw+^3G7~FZVCpeo#zCmomI<DH%l_}k6g>)WQi6J^yEMb4S&B~H&r~yt(
z^$eZIq@v;68#c`A^Do?#t(O3UC|=O<yV1GKTyZgn-XIZIv$If)+QFL6*Idt-wqqle
zp|72M(d@2R#lV|dS3Vt3stJ6t0bJ~SbhVkvXGY9Ce}K`iIf<s*d0vfH+3eqRo^|SJ
z_AKT}$eLc}9sXc`=Y+GEs}C36cfQg@rGKP{HsNx@*W(C*uwXq3HUP4eMZM-`P3Yy$
zk){TnSLHru_K=GOws1iw<;GdwFG!v%bjKWkqSiWh9cfU|T;g1Cw=Hj`%9~`$lNxT-
zHM^V7Ee@v-pbVYA1%m177kjCzw=oo<RA(27ow4QWd1wELVtdEsIJ4O>`ohmt(gM%i
zaW^>`!<pImC2FouZ%a{Rv8P3!U`o-fZ+O)7B#>TQMwLnUS{5u=eX5jAt&C5`u{O(A
zWhpRO)X-zd6`en{cQzT+-VV-hnP>IsGx%nfq?i&;vL#fj1omL6)L}Zt=u@y@)z+@m
zm)=;^o&9?xeLPMX;`%hs=hyX9v(pC?&a}+nKz>4ee14frE;;P;#R&<g?K}4e*CI#l
zZtMy%VVV!nreJP}p^CJ>f+k6-%xU|BpH#BzQe#J`*Ov-Mvjg_j{BYBU_G981)<0n~
z#E;oOO9sk(ii&(8EW3uPq8+E3!TS~UxfMM{KKas#RW#mK<hHD@t*A6sQ4HT3N0AeL
zE@3^g_BU)B3Xf-J#yo7mJB(bXnR&L|>51hh%u#P!_gbfmzSCP3FuJ}-k8k^YmQt7g
z6@PUWtj$QlqF}QL+@e6YAm=Q#*ecaVPyoQZgX-M&Z*X1?D6GeQjsOh#GW|Y(ssZqB
zO-(6O7dg*8zPmY=XR7j$IMRL$HDTE$ShfAFfz^R}*gO*x^PEQ>GL%ve@a66Fc-A}m
znymFxsMTgYBQfitvFZ+q=UHvYM*ENBO+0`+oRI9hS=gB2Jk!nSYGd%^QtsJmLcu>q
zGX<}H%RoiC;?Ex2jT5>vP%*YYF7w)yP--w=mqEY9QEP5_zSvlqqizgx@_cX0fawrl
zt^l2}&^M3y%3cm4kza@Rlk8=fTNQJD(}zEW;KoAy>15QEG^E^C+B~&%!;o<4t|6h)
z_2JT;q0$e1p%TXz8ore@TSbS_`js@urImyIl+57@FN^t?OTr)DK7!bnXfJD2d(jkg
zKUi~x=eB9yO5$MEs~+STh+rtfvozetYZg?)RhG9eLdKF&WI~#{8b5uKnEp<sHoxvX
zh<O?uXpwQd*mJootk;xxlIgej2$s#}6~?Edull`;bdyN;C;0r~3m~X7ysOyk7I`xF
z@pP;5*79B)deS?Sr!MN6#S<}ZcIXvf+b?~gd%d#>Km)9+wB{)eLo4$H8m!}0CZi{F
znn{P&lXV??G%Sp&Of?p0Ikp}F&KG&v+dw(kfUWr&!R#(Xw`W5~gS!yxE`qiNcOj}#
zYt*VjcExIyPrG9pQ)a%O^$z68(mj-ie$cQpMfWkL8ewB{D8R&!D%XAIH81|!C;nQN
zka$2mcwqc>a6*c)@!(4Ll})f%ok;s-2jdtboVE}{pA#9Y&@^wMhK$eddh(selZIrI
zQ>zEBm2M|Uq{O6|Mu_q#t@2dqo4!rw>DO*m_B{d(bJBV4GcB>}oL%6A?1p@egV_BF
zc(bTLz?;oez?);fpY_)Aq~2vb42UA>H(>sv3Rc+|8NQkrQcZW?d7T-5y*K_kCn2$M
z6huxE*Tz#UNl39e9^Bx*#()>@_-_KQ!EJdQVjBrbgxDwgrbAnKf{A7<s@$=ta>oK}
zuwzk4L(~>pt*6G6JZ3DayzK}PGTX7J@}`+@!0ZFUQVhlhMBr|Gmp_92R$>JE9ZC-D
zQy3FXp-i6Clf^?n5ckQDg?+Y-SsjfbRfGG^s}q0S7=KLx2TU=fkg14%yk^E<4@yXp
z6A#XHUt_TM9Q1F(KG&^14*NcYB*OkgeFOVKp5F`mG7=>N{8Cd~9N_1fZx`SL`7kHX
zsQzD1uvMhwpTIGKC&Q2uZ}NCT5}RDEZ<<`GCpo6_)Gs!+8ct0z-v&F`rpq2m`ozVg
zV&hPz@+J2jplmz`y3fISO3XHtz_B2QhJ4XfJLHvoQd_Qq?Q5R<szwyXgMIPWK>T%J
zLgJzE;1TiH@`M!B8Lee{G??v1nX|;9FD_Mf9{U2p09z6GEbe#hOw7b3TFYYGlDWif
z!W24yTYDZg=TWLL{Jzn+!RfeDfjtPc+l31*g$-_Ly-oek*4z6r@pk+dnZl|}N22Su
zJnBm%9X>nLEaN53Q%hLJOIXHB)`v@WhDun*&u@7drxWcbvyqYx*skXB{cX~EK9&Kt
zE(uxEMUx8&2!Jv^o-f%Y{!(wd(r|Zg5xZQ@=zN^*YrB}sY~~0xDh^EKLKb_bs61b!
z%ETH2uOZ@X(t0j$+UGWZtI7{Kd{SoCYEdxR{d9vI@Cshc<DmfzKxS%r5qVk06P98|
zt!FNR^Yq(9&m%o1>ftGIg0H1{Sb~+`EWyl573(rIwnQ!AD#fhcW*ryRTkR&x6N2pE
z!#R2@>S?y`WopqtdLd*dg67C}7&c>s>Qan~8mrkvtbR0X&TSU+rEw+3r;PlB?q%*f
zuLI(*m&RX5BqSai53Y>ARwbmE84sQn4W>iq3GNJ=+Q;XmJop;QgL(@|sI<J!QnUDx
zMv<g>k}rWoIaR_V`eq2`@Dxd6w2I|(dpt`6^J<_pr-jb%KgL1yG+WO!Q<UhAZN)U(
zim44M2?b*L*vL7lq3Pz+C=W9ViAlk>+%)0540L5P-1^VUDApO5GQ`FP6(wXkmbxs(
z;^Y30G^=Tc^NGxpqr+E^ds?Qn&@XvPKXZ$j(y8c15|nH+2gLW2Ezh^q+y2A(b|1TP
zXBLt~XWaSPA;&bvEtjA2pO$|QQUs?^lFFdC^Y1JFKW=|-mEZ6!?SJXKpQHV^LPniR
z%Kw?#e}u}<R{Os_x5f7VwEH0pb|-GEWj!?qjGJ9jc?Fd6m?CqQF-j-5YR%n?PX1N1
z4LYe<KNQ|2lbf0{3UB*OtHR3l5K4|OL6F57xP-EdSLSdI4^8Jl=n-!&?;@0WJaHys
zDJ67KGumh3itTnrnM4bfFnK$p3S!$CRT16J4CUR~pQAl0xau2f)V`>eszoE`5KCiL
z@fmH*YF^Zs1|@-5Dv759Odj7P-Jx{+pqI6kmR_a+24c)wLJY6-2+>cZv(|m*by@uN
z)%a^eLSiQ#EUrdB@@-2<kwJ6Z;7s>bmw$-p&7o;#C#WZ+N4*jWQh4KgWQr{frrWWl
zVQ|9IV0*jQ(r~KVRd;D9q(rw7|J_x>cGZw_EEFUJwxnDP(ee*OVez>Mfi2)&Vb$v_
zc{NxtRD9vYCBk=+S4^!F<O9j_hX0WN`HLu~zpMrR=VzO*yWPopIg|BN<S+?L%~s*X
zUXxWA7vQ>X#rnhj&9t1wyN`{}Z1bGvoo=2pyfb+Y38!MrSJ8cY5jHkB@^9*@bxJ85
z&oN}{vS_~cDd@VJ%ekWKj7-n-nCHlSfoHB0T*-w7X4z%oCJO=$Txv1la`^zGg~h@l
z?ZPs7*M!T`lNq05zTFwGgqrb+OrG&7Ha6o`Y;?x^ct?Qo3w8s+U1^@`UDTsFKZ^<z
z=DcP_v<dTgQ4?yFBw@~r=c2VD3HmYTuhvS;Tx+l~)8mtLX2K+$6%U>re|<9k8YK!M
z5D8RaRXoMp?yFL?Koxuum>b%l`E5sWi?i%KO@Ps60`%dXQ!0&1Xtt((VXWtHhw6@a
zA?$fB^$v`8xY%xl+^)`%830|a<eNt1x<KGH&wW+f3gf|1Qb6cWzY%(Ajh)FFUFZJw
zd(UKv^Y`W_-1+w(&gb81KHr4=G^XE<@;^W6eAenall>dcCja?tt|h_Vv-xwtph>3z
z8b}Jw;-M-kd<Ir9FzhC5hIatZAy^Anbe650?yz{-#5rdjnUXTUEkz@f9BXzE%Hkkp
zG1Ngzxu<#ryMNE~{#I`^)@YdJVQQ-{_+E<VE<4E)NVCAHDtDbv;oV*5w~`l3SjIzn
zHPct|u4%SfPiDGfzTKIwgqrD!OrGf~G&a*!Y;>k)5W!5(g|!auYU`<fii%XOdSW`=
z$#nYQSf?-ON&<AbQ#JoY6H)-9W=17BXsk$4w6TU9(b#MyfhH)4_yCb7lIHQy51KF+
z^}Ap$ibTv-H-^_8(L}sQ#X-C#NQ2smhVmmc+kMrnYC#%A=!pp_eDREB(O|~b5L0M^
z8O-P35kcs0_%cuR!!$;N$u?!Y05`)sm4|jM<$Ch1E{5$EC-@C^kCVv}8~VtZsUf=?
z!n;|0ab}?i6x$^g5(;Fp{jtJx3=A`|7A(&!L*$ZaL@rUH^w{6+99AP6JG+B5ko<cN
zmcpDrXS;*7law(UrZ%#xZGna<M!Q-g%n?i(n&D&ruh6hzUb7uk&vbgQlL3VtCowGT
z_>O|9@@+%Aon;K4P*-T~JkelCp2&sqi2&;Y{tjg56YzHgpHciB%ey99rIIAz?^M3Q
zrI|cjAgvy4fi%m;lrh8fBT-b|E|eS?4;~tS9TtDBbYGQtYCL#){B=e`irMktIniJQ
z=jwshG~sXKhqNMb<u+q)`P^tP_guLx5?J{;?MPU;`?|esAS*Hd!<E|#`_)T}4eOl!
z-_nS$Yix@m3{~59%m)yDmC)jvTY!O);Qe*D3EagaK?X3xCm8_#34Rp+di_q;yHLr`
zxZcI})e|?CK{?EZ^E-FdYgsl|DFU+>8;b!Tu&MGYHoEF;CG6i5!)bL8PG~tX3NqnO
zT)LL1AXA~SRYyppRVTo7+`)UzzO+ro9M8V5c`Kb73&3n*v;mu=)KR8ObTUq`h9xjv
zirJH3U5iv$MO9KhSei>o`th3QzW#TZE^Vz0!Loh&+PQ9(Ix6)R)N1YPc6)X7n|d}o
z`vV1bRvOrt;40Z3=*Qt_%`Wh>vF{%EsWUPYHx3jc=%7GJ#%+-eB%B&q%ikg4h)e;8
zgd*?r2uD8TZz!^wzqgtAsgcc!4x4CFZ8WJAABs5qtt+Waz0IVVS~`VfibwTa9X7F{
z(uruFr=oqH?#XOjUp)XJv&P5Mp?V;%zHs$eJuH@)SWjkTC_#ywP04hNUzsJ97Pl&9
z)^J)M6#Ck^6#ku($+eY=J>5zh>Z?<r4sInGyn1q^?XJ$^wZ1xszd{t&yfd+B@P(>#
zO?ZPFUSI7*Q`A>u?rjp+SMP|uVKeH6;YryRRbQRuzSdXcQ_hX{#-sCMfrZh)$b7ZZ
zX%Duj8(;l{kKCf&%^LszAIGbmJ6=z&{=wt*%838{cwP3I9j_9);Z&Yx$LpuGzxc`M
zzs$z|577ViX569w@MtlynoH^jYLX^vT%x3Y4~<LL8!WZ|-;33ZtOhZGj#8i^K`dZY
zBvPy1xI&a{os<8nCH7s!mhMUHe;j=m!FJjf>_OjuO8iUG7y8VS@s+VjebRCgtm`R+
zx)(dFb?%*t*%CV%1$yQdr51?#m7n98+txGpjfvlwuhUbPVI4ZU&pe1<^OV!uaf@n2
zyQ14ljs1{iAEX%%AM1;p#p%li&L|CH$9zeq^^|~(5O;B*n8Wnv7S29mLbOuIjS11^
z%UEnVlNUKFqo_uENX}sN`~-7fJne<snj>5|z+@?&W^{!}*GkMbKx|4e42ZZIslRG@
z`rG0+<PoDj)E<1bqJA_N^rQhyjqI#+D(f%suf<@_K%-0#ga+d%in=O^hI2_!F^(Aa
ze*@tq_%bHYSc+g55ECS0RsXe9SucdwiAd+GI{OOaQ|LH+C_Z<&o?DzOdaVnhQO7qs
zYjh+9bv*bIV}Xo1{_890a-`#}Lz6V`@Am=Oe=>R1;hI0o^MA(q^OMo<C%`{((Ukzo
zUU46+*J=FUUyJ7JBs_3Rk(7YgZM3}=UV}(NYyN?9$;x$RVGT!FQ-W)mvp)@kuOVFx
z6Z6Aq`^w+^-$3<5_4i&nle?EBaP=8{2*1k=o;N#r^nWix@7h2AIC}pV;p?F%|CjJp
z^6`ITzqv|;cCY=WhfwhwijD3!b9s07o01NKnLEJdC}yT8C4v{JE`*L&km&9=l~DUl
zMJDbAtY##(-&C<tDW@cjH63W1)6eD7PFtkqp$=KE&XQrq{KsoBW9G!3geB8yhvv+5
zveB|x33En!Ikjw#k{}XN5{Wc5uSmL#hkooK&ZR(YLs!|DskGX}kg5g2386ow%^@(G
zM$4#vCJ%wIN>9GsK8bdyt<>A#wwXjqOd4x#Xwxc>tPQmoX+e&ej0q~@X*zNgRL{#6
zvx$QJp$KCs=2LIAYj>ZrqoNd6I0B*Mq>7}E7$P9y6p?o1C``e;=eqB_=EYz8#9zx2
z5)X(64~)MKPDn8}9$e|ZelLWwNvO0mp^}GwaR}*blb|YS3PM$U3qs;TWtCFs^asIW
zhGwZ>v$8?4g@=w8CJ>r1i}%<vhmrM<lt1;CzNPI6KQld$v_K92*XkA$>`evE0ppCW
zgV-n)5VE*bAeUD_tw}AY3z(To$z03Q6JSeiAY?I8Co}v1bFF600Tk{O!H*|OMcqj;
z#~pH2t_iKmYk4M4bj@(uxtfQ5;PA_X>_l&{F)d7PVe~9_{_{FJ{`%AEGcm@a4bEE&
zv?8I{k|UXwG!g|yR!o>?`szym4d*6acwoGXuJm91qjO!{&Q<bB(_5IA{~ckPCRLfU
zvxcd|Jo14F&iioqv3Vq?Z$^FjQ|T}F*ZTUxasP$+G>6Xo%lWj51bfe?IqrN??Ei2+
zao<Ub^EvEJ;(QW`09*dmd{PoEoO{owcLyfTr~j@|fPc^#+3Z>GrFHBKXB*f~^Ulzd
zZ?|8X;+h|HaVt+vXYwd9X}%*w+g_zro^ASOiBK}ll?)~4Cp<RO)%)_lny#kG7N@I)
zVDfam8UGHdtda`;_tW)EloPS(+5?{rOjlvM(Y`JIar|$M*1JQy|IG6fXQr%p8rknN
z?`wIIF3_O=2dwWSMxFkXv%Vcc2PXi7_Aj~HN0Ls`#8-rtxZ9g9vxv3|>;}=qPU^?&
zf0xlU)@mp{XPH^H&Y3b7EQ1?;9f^`QVi%h4#rV0&{$A$PnDWG#)An!SBSDMf|A0B=
zjFhD&mVM`lluJ#f>sejPQLM&`X&qjY0eLo^=bsq}GYsFys){bfS=EQ12WA|w%Mjo$
z*qFeo-Xz{0pTtz^n)|h2Z~VjTgYMV-y$GF4%svQVQX5$@c&e-Egy;FPR@)b)mX8m;
z@kXPVv6QzOg5R_sbNUOFym#Z$ihda!DCWul&!hF+(6)VHp&YLG@TLCNZeQpd-@<QB
zBh2q9sr7k^-|=~#+F_H_U9`ABMU;08?kL1fii~t|k{w#49C(}zpUpQ`x94OS_mf8p
z2_hXw^Qg%pn^7cpuz|NJup5E)D9BmAq&IQ?Bif&e(>>4oT5VsLT5(EnXM0`H(J!Ot
zn-?vPb;~v<Q_bn&Hb?P$w&Y$7c%)+=RoWxkk|{PYMS=OTmMrYHyLo!Feou7+5z#hu
zP#dnZ&|BWP(8L%>d#dDpVGn3ulK}0WZ*mUP^JrZR*jsgN(_3t#Kld-(DPS`IivphF
z*8%oc`&r*~*&f~OMK?XwcTknvRXh(g6<81)ZQ^c=$LSJIif-b#r5Tk#*q_mqnef4#
zYGb)DY=s%BaTgf?y~4u`l^vDmz%l>A#?$<|ODa-uflbr}xYZO=6F3wIEq87w6|s3X
z_GuL%a5lX};E^`)UIl8joBm|Z`yg9KWxc}}&hR|nA@~iaIwun;^Pu3)!ZE)rKPR{|
z+f&WSVhlWM5n7M;9`vFtMs)JG^2XiG|Bx56AWr9kJFmF49eK)64el%}_cUD?+&Qp3
zW8%(A5?A09xiDi*j!76TC&QJo*ExBV^FcAkaa4{{en4e<jT8(YV4x9DtF;$5h&#Xr
z4p89r1X>W?c?^g<UF&h8j8c9h;7fgx>Qx+M^@XZydDK@g<1aZ=>Z@OM-+c8Ys}$lq
zj)ph1*m;;oQD{l<+g4-x7Ka{H2IqFdv>o|?bf|n+ITPLi;2~4AGm=QTJ)E>av1p-*
zSWT|RzpB*pNNFVb>34qlo0Q_)hIWlNf*nfrC&ekE8dZ~&xL=xzT`Y1|Qu$KZUmzS6
z!*p4nR)D&o0#Z2?xDv8XEjq3{U)=%=ibJ@O`bO??zdRe=wMm-Nlv`4@sp41`1%N@@
z>eIaD8OsIs#xuT1MqlJ^71*=+1ucp4Ff(}Dmm;D9XKcUExs9Iy-lsGfc%Q&yPk0|e
zSQ5PV|9c$Xt%&^R@c!iOD7?FV(E_|Xew+yJcU%2<cn7)g=m)|3Oftsd%_3#sz3q#i
z3*IXl4ZIig*c0A=B`gWv_nTw>7QgQ#@}I-IoRk*7|F*RSct84KBD}9j{qgWVu=EGQ
z`@KLSyrcMC_4&^Q?~Ab_fu5YpV^4UWMpzQOfAwY@zq=Co&*7a$N(=A4TUvm3uZBc;
zf8D(6N7Ivs&-#JzzLt!8<M%;Ze=c}`_NjsQ$2|6g_v?fu!MkQv9NvE;@}I+d1}QDP
z-`vy!yqB&`g!h!)KOWvEocROc{f%GneoplJMT^~EJ9V2|f_Bti@W-u}H)CD}evjg@
zC%ms9ED7HIUysAPFOmNo-d<8#cwe=#1$dwTK_a{l+4bY$z1a5y;e8t!qwu!W`)lWd
zO+O#J_x;4cyETtJ;l1VKJ>b1|WgOmb68X>J{VXXhyubLY1$eJtlL+s}zWwp=KDXZw
zg!eDV7=?EfzdvpI`QZJ>j|{x;;;|>ZZzU`VzpsBa4)3do{O9m4CZ&b<q{bHDUGZKb
zyifk-$HV($Tnhdm{^W5oM&TXB@6n(AyYS94{5QAOQpUf{f**sCCL|@BM^R|0%Yr{;
zSnzg)3FM7pWW|DiyCM$qEe$TnjhpcV7F<^zI&avN^B|0H-1|8K`@m#2eD`V>?ApFL
zwL4;9_ea_A^1a#cS6gDkH~YeOo0?$7hZe^{e<FA2Rvh;|taxJPW6A6;pDr_wNNBtB
zNTV46f%jkX<39-gkF7KCpT%QO_)jA&3I5}BTY<X;x`9Z8@-4#u9GlXGfB4fD;6LHr
zMEDne{nNnT>H8z#{{)$%@VDr1;O}+*IsAW0do{B*8~N=}PC_8RH73rZ@IQ!s2F8at
z(QMPRwlS>@(*aq04vB;w9f|UV?YG?VWjg21G1r?Z@w+67+Nu?Y$7Nuz=Bbji8NBs+
zr>g{fXk(`4Z_Di>MHu0r|4&CKkeMobf?8e?*UjW1ftGB_X7Mg{qt0?j$2m{WsE$(!
zW$B>EWJ?DXs${sHw8cg(9U6$JIY?9RvZt)LSFKa!Iu1hRg?vUU*8wP1UZx}zq$JYG
z4B(rjLwV@Ol)uc6FoDkKYzZ+r)M8?I&voB<&5OVOl-{R2b5GAk;2C%o2X5B)RDgye
z!qmw;salHl<lF6{j_c@0j8YI0bvUI2N|@+y3ZR?f=(Z`Y$}KJiv21#!)shbCc#rx>
z9#b-2GYHYR*^=>^!M8cTV8TabCVH}JdVu+Wi1mbrc#=mLXLPjN84&d&*971>?MSAJ
zuFUARLPBd*F5f#oo_kqT=pLy7M7kCg9gp#8q`3KzLusa8_5h>Ks97-0h=GaHR5UTB
zGc&x_s=+4EVYVt_W1?cZKa+3$XzITnfmtLHlEp{BdKa-nq#_?7RtPmmPhgQp*T1*e
zY|0cNp((XIl52R?loYRFiN2{pqA4;|l3E#f&EaWb+M+_66j4JHjbtII{lU5ng3A&z
zB1khof(OQ5qedsns1af6tU1F@ow4Q<(~!mqR1Pf(^B{Kvk7#Qgz3?M=Yy36pyFi7?
z_=(sIz@S>07Q0GR{xoAP@x2hNb@Nd~w)+|%qwa(V4WqNft8XBf%TpOZY1PT=GD4zl
zUd6k91h0<2zVE)O%?-S}5ZuaZG~<qV>J*B0%Sdxyl`(^NAb7OtAy#1K6Zd+Q(3nqC
zsB$`wXn`};I(`JtioZVMzN*04yc0W*heCngyTulIwS0ds^hQvV@==8JUV4o~?|nWK
zq4&1FfnEboWr*h;%WJgFm3rYvim2j@*J*C3+AL+W3%yyqDn@GL*=`bE=fq#1bzhZn
z9`8VJU(-Xz%zR>^M0=b$_i1Uio(i429&2u%qq`@yPR8^+Mn3t&*W(F0%zfwk_=56M
z<J_>nbR+uh<qyZzAN^?ml=1am%hSKq*NWl{B$38vD_CU~KfaBjtgwuDg~yiI1|`zr
zHQn64)|vRExoe<$pwgh;EmK+U`R(aSY8_ft^2+#VIR)rY^heBTQ>k~gwyKt~dPdiK
zCDeMa$mI22g~l4AVx#N*Ox|mZdUyDvc1Q~e5g)UX!IZkUfpJ&AGHD}?*~(|MF(M{v
zjEJg0pd?!VMO{cLyDR-bq7m|w3a~^ph-qQY3nw+v_2bovziy1brqDM1lvtBnKVCEA
zuixX(&#HT|M5N`;Ju`7U>Y00C7L%|YO@|0(nSq$*P193Ek!LvOfXY*)3y`ps{_t+G
z5(mAbi(BS2&<I_`S6}S<qk7g9gZC*8WaH7$6jh#~>svC){{5=XrK-LBT~U_STQ!r?
zjK8b@$K0DhNp_w0eUGB-P$Ncw7b#Ax6F)#|ObsyIGd&o=U;{HiO)t}}>5W}I13(Z?
zp}MNNyVTWH#d=k}K$HvxQliO-4sF@-Cfl^)9O+~;j~&~zC7U3{BZ`q1DaUc-$ninE
zBttWfDJZll+0^9s|L%QDbypAS%|fSvn)lw_?{~lZZTGwPy7JTWu6*ad<xe^xLBnZ6
z!Xu(1?<Eb$zcM2Y{QKWp)VTdSTEf1|m@w!e{ShMa`X&#(0$hU(0vlf<rVcm=Z2AWO
zl11w;>JnYD-Y5P4ZU5{AhuCimy2!T-n*MkdmDZnd_3l)f1ftAw;=5C36(>@xKVE++
z&ImC!%KU90wJ5VD_U+&Jb9}IL1iHj@nU%fU=V|lDzV(?&7V$nsF>Vp>Z-29+uqV;i
zoacA^55Fn@w+~QeeBQS1yPCiC%<Udc<^m%wze}pJ_0Yr?x^VM<h58mB`K$kW{;hxY
zzD1TuKXaR-ZGH)M`z!zF7is!8{@VPlZ~ryc=pOlDt?(^==-;>l=I8Hxwup9xR{Xxd
z|FJ)#mmm5qVFvfi{G*%yXz^p;8RzT$AbEO+BrAUG+s~hr=oKYee~&~LmFS!jz3<De
zpv6b#etrgbfRA1L<@sA5{ADh)#)07Hxro9Zx44}5bL8oN>&(rszV*%@pi}Sv$6tiv
zKL44&^u90E7H^$f;?{{Z_9J~&+T+svt?zp4kBHiQ>$mgZ2Ei}fe)^YCfVl?c_b=SK
z{^j|9{lBqMMg;qLK!HyB`;v11+yBFryL9vU@BGqFPX6kbemlQ!nEcfr{guRSfzM+y
z2<iSTuH60mTtdfqE!=8zo!`y}dXtN{#{SfgbJF&$@xR4G_Up+npo}bP)$^a<{>0yP
zP5O@cTQ5JV41L|<RD9%*d~W7P-+_gHAgPfJIk&#+!xTY}ev*eaRjHucuc-*P^7@On
zc78WNc!A@}!?b<#x87g;AK&-W@9Y0}^Pl-DUJ&2^Ra0?3tAqWu`Spk2?*CyOaL(-<
ze~(hX^dtJ`%{0I8%W#n6JD*&<^@Cs4#-Ya+Z|!|~;a~qFHk$~BQjysG!d9@id*7z@
z@Gt!bkG%BuBfsPQ^Piu7l(^tazY}gY|IM28->l0r=WlYiCdjO#tegYsaCpjiAb0NX
zX3Lo&?$1&fFunSR4W<V?h#8P?vhaQJ?>P9<CgA%czuN!DU;4oXLI0~?{7)$ekp9*m
zGLWvgf*eRsDVGE3ssrh=Knh&!e#u+!2bhHvnEz`2%+KGR1CrtwfAlkt{FN^(;t^AP
zWae*i<=cPgLHh+Wg?;H1Re$MQ?0xd>w|?Ur@RRxck#Y9^c_BUylV5o5gJT!}+zanx
z$2KlDeLO#Y;PxXw_HAnVr_K}3i@Y4ZQ9Ii&ehV%5bnqL?3Jg9O-2RRq{|fr<58R%=
z)&KI%*?)Tb{eO#Y-J+nkJ{PhO9@7`q$DIiO^c_6z-X`b#&3`ia_RY_{_54`pkh@}T
ze*Z`3Z$_Q@o7c7<`C*Oar{3fr>g%1;b&Av5tnhDh#R>U7!{_THeEE?dk-PUi*FU}W
z_M7u>z4Ole&Ce0{GD%+s#D6___uY5j_v^p@>+^r}*Z9-d{HI_4n@i*YjXw3_Z@MnX
z3+1iPgbO!6yYSX$UtGBPa|>_%-0Sn75B}1B{+El3H(vyY4#1($&wu7$zJKB7R~LTh
z?cmKje_;ORUv-Z^_5r>C-?Ik3X|~6`I=^`H+KY=fr=MNCSvo%ijFkBK+yCN&cw4;n
z=dCTU#_5mVeCrc@aNh?oxFuEmt)Ec<H|addWAg3izH#h%>KIZUHxGXP_MfJ_+b<)F
zS;`lOr&K>~N2FwW&for%4AiGyWFT+;Jm2TJZe;%UxBcR;-;o8`6*Duu7;fkL{Oz-@
zm>0P#<oxZwfID@XZGLz*+-W|4`zzmUwZC}#?EjRlX>oX#Z&#Mjx-92!|Hc=xSyqQ<
z36<r)a9QSW|91K_|LHeVZ0&#iD|c?)65Q!C2k`uR-}1~mTpC(>onzX9o6~1+UMoEE
z!()p6)K~l;WAZFlY|P&*Le{2V=V;r5pZ|v7i*k+T`&m#$fAgPz5tFlqvM~R#`d43l
z?dK-HaP#kj8SAtYtPF2P-#%pUzEjP8=ahglP1E0!2z2Y(x8J-a8R?5}@Q*L%Wmn9j
zH=_>@DW;%e{)8)rQTD}r&=sT6z5TPlkOkiN4hJ5WAAbJ!8pWi8`TFpb=asTx6@p5C
zmXR^Us|`<mQK^6Po7BsnQ4en~Gx|4w`Q|m|!uxOk&}T7c-+J!$O=1#yv3mP^e(q~`
zZrAx|{?p<2cZh>1AU1)nw_o_kvHNsZ=RX~js!w6#B^Bi5v-5xLZ9n$6uYnWX!4uOX
z(jP7Ll{Zvse1FO9@1aw2BD(!yKo{%F+@{1#Y-YN-&AZwb6Ug7CZhrbrPhB98-|z>&
za_8nOSM7c7(F{FYz5NE=zWpQW`=^7C`u<5?q<`hku>Qq0$-t5J&sTJNhrt-^-_cSJ
z4>SFP-`>1aydhEU`ohisrMS)o9tS`CIQx`7HvS2orNuM<Z1Pv<Kl8QsV<q^&?XP_f
zV8HeN;~0L(6|m$P%DD5p{~_Gxee)msQvaVDlX;D^BkhLYto-up0|MoXQagX>V=zTn
zIX8koI{B}@_}~1#e|u;0fBw=}1j_$gK5rlRIl<Ncy${>Sxqj3=*KAiS&2wAr_Exig
zZo9o(KUZnIQRziHhv%Yh^<1;Lw|lPBZGT_A+KbLr+q<1cv)&!=_RcjM8|NxfrDgw=
zD@_&DYj-EA)u2$=jn142AG;J5&Y$6HAu2W-TdjJnQ0RAh-81~oh7|?v)?2+Z*`&Ma
z$5=EKZd7Vvr`>4v>fLaxbLv4gd1B%mEpPN1Z`8YGI<?zwWdhQ^Y!5WPTepUEy7gYU
zT8Vnkt237ioik^{PGzfJjwr4?(3`a5ofFeko0TXkH)?eJ`LIyfS?=#nE2s_#seYrr
z6+RVC5_<`77V0e$etp2O+wav6CIkY;b+=J1H`|q3;fbhM>D9|;!`=FByL(t}cWc0~
zTW?kl>NL$allGD??Y<lL?Gu5}s#m)82(WJVTRRkTS$*9ezr0)Nl!1Vib~KHkl_mj5
z{zJnsJcA1DSXB27dPA`W@@Fl`Mj%01S~Jn>R;rYg;_*(YL&z1i$BjmK0!l2=?&Wr|
zTCGP>A<1<%bgw0ApjB#pb7kG7sJRBPUFps=>OzOJq3fX)(C+ja?N&6g+h{QewKv)=
zwb;Nn`OIL7%E8eTLo+jON%%mP=LNnC=Px>NEkvvB{d#w0bA7*^X2`Z81GwRdv!!Pe
z7nz#V!h;))`hKBV-;B{>qFAeS!KvO_V{5xtsPJ_*oID#o(JZ$&H>3I>B^fNHt1KS9
zl5!R)J5x;ecrg!%MBbg=G#gRR=2>sM5!1~=(*|#M{IX4nvdy{}?WX6iFJCKv_<C`z
zxV*lwJR2u9!d31im|WOdXzf8~YwHSPU@301Td&_Oi7R8bvel@*dj7Sz#+{<*JD9(u
z2rDa_a571F0v1EoRey%oX{Hs<Ute_Ln&}5n;=b5{O}Hd(0}%M|%gSOIy7ev4-snT8
zUMu$__`_47sD+ywZU#UsA*=@jIjq@Guv&Sy-m6fVq4kmSTvQ+qw9@Z|?M+YZCc>G+
zMr&A!^nA#70-Jlw8CVf|pc(vK)M9DAd`bS2zucWs)6)@21woB{xGJ!vb{^b=a)
zqP{-F-Z4SLLf{pTfEygm#i;x$kW7Yo+hPp1MnYR|YDKWWpag>eaH4X?$P9HP8a#7x
z3P9ABG)|0K*l6h!LBK=@k+J6>sJoR$D^kR%APmROUm!(+uQO9&uidLOt%&fN%SAOR
z!Dq6MQ*r%^mF`wuggB{xDz3^#sTTGcyY=X7oZCuLP2sgOwD7>bm!1nZ4&6Y)FQ_gc
z!4$veLPnEGSrxZRxW)jT8!q6aJh4@;*FZxdEZh{zIV{6V#kJ*y<*QSnXdlQ{ip7?2
zvw|c>C)!D`!t)1XZyeCzUOf`B34h8aFgPSPK)V29-IpU4CR#uoGGR#yIL=eyh44v(
zLVMG3@)%<saWXCzXD}gfuG`*S?KXBR-9uNAX54^i^qAE(Vq(~Jc3ykVmWJMiH6@8M
z!<2+M&b#(Ki|wro!dK=e1Xz^n1Ujbp@QIK<dEqr2mu%+60nw)8Xh&)3po>Hq4}VXI
zk*ZD-lGEZW(FsKn2Tjbv%~$R_n29y`Db|UXg+e;Bca|y#5=o5+kn;g?L}?CXYV3%d
zO2%}&eWA5f-<6D(87;@{ii?XY)5Z1KnHbpZX3Y_um^V_$Ab&Hg9%O+JsP%W}=O5`;
zx|LS1(W+a|HnN>jtI-<5C!Ppf_5B#D69Q5$n~G7bn^-qB)vZ^WFsj{Z=g^l@X!V;-
zDN$#F0cx`WkE}7>9_$EJs=dZuMIxrt>mIXlYeeN*-A8F!#{_UD4rEZnBBbh6Q1k^H
z4S;g(H+tLQX0rlWC0(hwTrN(pFWe}uFRUz=S61~X1riIqTpOftHMo^`Hv&hnXpDGB
zdAAYmR(jR#0F^Onx60e~O2;J;odMkrDXZ7-w#ot`Wm~Rl8};&_XJrP}_Flb<GMBuR
zYmII|*hV{Q^bUjVdb1PEuC1-CO@&^#3K6mTb>sL9Ug(79sqpkzbox}#Y6pk)=;Rz5
z?RN7>mgC(F$7*nh6cYBG>7$Q7)*A~S4x%(_7-%du2F1XKz!8qYUIg3mh7&J?1u5vQ
zN>l9i29|<$cey>yzf8#n=pju_bV$;H|9jLwwM*3hy~#8A$+J^`@zZwE@f|-yMe<j0
zkKnw2KI@-%NSXcbMIUZ%VZ5*U@0b1ab^qM<PiH_1f&>5k=FvYN{-gf+asT||{`m?2
z{AvIEq<?<OKi~GxU+~Xg^3PxK&v*Ruwts%vKfmgqzj)+tuCh}{@T%?v>tnU4F!>Ct
zkPvt;vQZ}-AJ@MowYV9COk+aRbK$ONv6G9goef)Um`A<4SHCn?3s0HMBXQ`Q6MT-;
zP(yL=whyfi5aR)nSR*u>^h)Hty5q=CMt_KHB^%N~#k)<`jEyFhNNTXLHAgE^{6#Yj
z6*q{5GZrSVFNV)Qe*_+4|1mn3s%>C$rN6mZ?|Qh$4;GH>?@<MOfD1=Nze=wxU0GRN
z_@3Fd@@jE?-UY^#Y*8uVkLy>mUk?;yBB%~pPE>P}ec1?Y;fz$KV{{@0)sA7%y38JM
zeE?A?Y?1-d6prS`A;FtCj}kzch9z*>hg&6lwj+^*CLzA^l<LJb4mUw$vnC8ll2^t|
zakSDTofN%Jvo5?qXb$UeCKSrkM=0^>uu!XG{lm@)*9+0`jwFC1oeeio7+dXLxY6!y
zpE}C3f|;ZMY+-4Q#|xSWgw$aHC247c$~H5VsgWl6*Xli}tv<&qn~>mfY4>5h+iiEF
ziEvQ{8qb~JeJDD$ezopZSu(-qthJ4nhy?wn*I{+L(jpP{ZC09uwdz~&|2@oM!BV4z
zMBjAkkQ!#1hPBO)S`=2gl0!mqhp@e2R^y3qt^SdI10@Y(@0400E0r|#$!E@n4f0^(
zRZFR}x8J6o%E3L=6V&P({jFeOGsG4Ic)PIUD2%XahZeIXv82&LHAhN7qzNmuwAMyU
z#*~CHxD5xR|Mlt)o$b~;?QXBU)ogD7hhQ2Zq1$do2BUp|iQWaIYR!7+lVIyc8K|{e
zr+ZXq<Fa9tBa}$7*=+BJROu6Aw^HJv$<Cr}jl0%|wMwrNR*>y_z-<dVQ62Ebg`AlP
zthLCDv>ufNyFHz7CfNiGm0qvD+W`=?7|gA11A=UF@E8j&;}G>V$n?9({AadzlOn*c
zD6C@MXzzymT``I-Favl#u?99W0f<F`Dg+hCZ-txv7Q!D|y=vZUkQ{??f}Kf43=+xj
zUYD#XNWl#2IVTM3u?t-33vzbUsM2x=O;4t1a@nSYm-vF&R%N3}H;kK%r!&S0gl^c?
z>I#tVx5i}~m<WcGJ~t8iyUdWCx+^d-5s+snHFQXQQXWyygy#m=nRepGPC`DNM4UEh
zc|hY&Ti`18#C@~TY&v;XQh0120EprO6DCR?O{s~f`A_q0@>O<abkm%)Sxy{~*{9ra
z16h|RcxV!Qwjd=Hmb)-D$#{)5M{&Uo6P1C!kuQZ?rKm#9!?>BmF}kcROqlGxZ`QrC
z6L%S*ET(`JOvT+wt4~FG>F&hj%~{$&(?|<QXaf`r0Q#-?Ya)o6^}4iiu&Z9K$*>oo
z=^{|5*HIn4pk}yh>F9QEzg};-iD{V9a+FrH08(KSg82!~Fb%;ZK@$_<R=2V{5i+Km
z{ie;4looaR-A-F`t5Ba{SoXImUZhc$A*-3WTVcX()i#p3?m-Im2A%yF1LS=-dO((`
z$$Z;y0}4-!)es1y>}|_?jas7;*1Ax15<((@3CS;f=({GKoCx|Y(5@V9S8DD3GL)o#
z(C8)bt{-%ojcTI@pe>!Nv9R6Ws-Fp69?c%*nF#2(NMKyn7K&I*mv?=f0m$c5ywPh-
z-(D18I|BLkDzvX}bG8TVW0r?Dr`2xFduD4q23YcppV6JhCe2VQE={T(q%1e<TUa)H
zsRD-!u;I|BZ-rs{MAA^p8-+H7RGy|fbgsU~Fvab#;SJ+q7k%m5st<$R1c6=k`Y89d
zVY0O{CVmv7!=shBSSPKEH5r+5G*u0B(Gf$q*KY2C%8aGLv#7USM>P}kRs~Vm?`U9^
zNETIJ!bGrYlOZesD5R7#mcY2Rn{%4Iuh#0X*~B$=G7)}Hz1#L;1VjQ9i!phl-l}eE
z(#7cRb5XX&p#>x8i#M2+FILJsm9ChT06;XRPd6c1LbY(w0cYwIf``wJ8%;5L8{G!2
znHh*-o0WdEcS!}DnF!Ybs-uk+M0FObo%|sV<@hb=FBu+#S}dIzeB)ro$r%hP-7YqQ
zYP-GDs1JpdB?w&tj{~R(U3F0QoL66)2-X}ma6(v04A0f-P1rrURek@Q#5DKjLSKFD
zY*_DACj!}oTu`}p*r}%$jpL<yTZf}dAdI9%=oTdyC32-e<lwH%L-G7nFmQlpk(uEE
z1DLh7!&ZegUIxv}1|kntrdJDe2dxaLW5FZb#Jn3K-x9OiuJz%ov|rPLxhITkO6VXL
z?7q?z+8<^LuBlNBcGiZJVuQ^#C~akj$<6?QVd|Ev%#@h0kV?z4-kUgi)(yEEN6-<(
zZYwS$@Iue&uNl_F6?XfSY}_G=8lW1+R+DHndzi;`V^?LgSviEFH{Aq?9FbNZHyhok
zmqBu|ErW$Psg}xZmij5^mx?^9l0b73REIc0m`F)F_^L5RW?->Z>$V#;)|tFe;|(q1
zf-<#~I~8gd&|0j5;l$8B%VaX4)XS6(D^2=pHjq-DKZ`beHoVOLm#m}XLQP++rl5*t
z4pMBE8~KQc#x>jGY3;Dq&{Sep$MU^RKsZceYl|WT8fOsG+jhl-%YCp+{M`3O7B~7&
zKVs219slh%@{W~~q)G+oN(-80R0;K_gsQXM>vf{3bLY0u*!mk2=(@2h3-)C&8PG)M
z_8U8mb4S-trsFoOc<DTDT)WmS?={+>Fwj%GE?>HEHuPXkB7~DGt@`*(9m$r87=|_G
zL0DLsSv&JwNVZFtSeo|P2prF3wUKu-1%=1hoD)vBD~PlyfHkM1pa#-1e??49o-P}k
zN+G6rR4Ff~+0-)D*fS_*GV;w#ud+@F!j)#P-Uv$l4*D$kFDV^q4AG+^xp%wLgj&G=
zrAY?U-Ac4=g?Vu%;eKFEO#0n)unaL*DkO_n*GscuaXoxx<@#E<^3rm+wotnE7%PQ%
zePFG1G&;D1y?$W^YvXgWQawO#A2TI+#miYnJhgLF(Z^t3NX+nVB9{e>Y_KzaGu#74
z@59ytL933hx||r*u!N08;Z;)~UmfebhA&fn>ypfG4C*D;gE6M{4cwPV=d$UIrdhL;
zL@S{2+9->l9o9hAB$BDbvw({KwbJLhfT|-E5pKkIA{yeRvuDQGv=7XJdQFxUS&3u~
zasouG=B5b;7i6}^FWVYl21J@q$vV`P!>g=#^qa_Qy~9H8)B;Ia%0z7HA6WmfSXyS*
zylMp(94e71*%_6yZB|m5_^|AmX%7n+hCVIOWm;~Da3zQn^#_-m#De2m$d>vX476h~
zRbnwr4dBQDQc!E&VEAdQg?f#jG9KzXe*Q=L;4glL_7w#2bN$J$-1)6@U%B)4k*C7q
zpW?na{gpe{mcDZ556lcc6&C*#rxlM{TU_Wt8Ehx+>Qm>fkj7@Tx+Mb@b|=QrFDWsF
z%B(A)d$+{ClkozdtGgYEF)jbZ^{$=-rSA$Rl~PutPAib1V7sdaG^QIMavgNsznkvS
zL^?+_asNJZ(LMM4L*M&gbKO6ZZxUGep5>_ant^6$Q}%-Nt=tW^b}h&~_^w0{Z_@ZD
zA-g*kKzor%h+*w7YF9t>q4HjY;I*0kJ{qv;0`@AADHz$b#{xEwg+#66K|X}_nl^nh
z3S<_g0ZNBTTob*=AEedZM|@CPc^w>_3KClH{Ss<-OT1O@b@m};fxYw_HN9-Hf@C3E
z-7OblX#`6pCzX4(bnz=GdEK-W$Rzf*CFvv$^YIqSJ4U3=z7imKxfeoQ7DC)hl`-8Z
zQz*<aNIq<o+!tBwizG1r-MSU)-$aa!r-32+6#{Ga==kT;_H{8p{^^xly>`dFF(q2=
z4P~MgbiBGq1rbKR8rqQqj+7(WFa;DbgZLfP+0nPcx7$%~Ljrx~OZnnB^?o4Pg~3k;
zT$Mt~^kd^tg=D`~#RV6j`dRu6q$wniivf&n)BbV)HUrG(d%Mw=D(uv?Xh<WY!^paU
z(Av^+tEOe<Nd-Oxh$e4@vZRBlQX^_}(0W_lH)JYT`(2@d)+>#Vi!Suqn6?hwTVHRx
z8%X>6)krVI`nQ&U<r9nNMilTukbhIGd$pIKSEVcHJJ!f%r_$RFn7-{+1F9gtCb7!3
z0D*SXD3k_N*oC$#;qfrAAy$YRW)eBct6V<lY{?XBpY&MsUXbxm;6bJ>Z0?;X=ycjF
zYD+^FrHdL{LcaKE@Fh&hT8krO7d`}fN+esHf3;D-7)Ge{mfcFU6Kt>s=b)Xum7~Mm
z4NT$*uA0Lzezhz@#^~#U`Yj8jsk_p54Efv}T>>PP-ewd(>5_%)#X-CfCOG+ig5PKk
z%$!Q#UTwl8p+Z|9p;t{N7uRIQ`B!D8>NLARyS*!FjvNDD<U8(yhE_v^D32{pjfE%p
z*%`-eVZg0PJ~$SWnY>3cxu`ZawZkFVm<z{4wGukK8pRw--)I4;h9e-yB4gw$w=li=
z{@8HEo$6o}LxUb3DeJ`W7+Lq))i&I9XrWm2s`UnAu{AtFx4pqa!`<St4ess>vrcO9
zU@TUuS~f4!k>wRPxJUb%jWdd$G-v}8P(v1<!Hm7#4x}t_53omTE((Z&z|<0{Q~l7o
zYCH0eP5%^O-2F(u?H`-|iFC15VJ*NvHvKaWrXm-=nY=Z5s|krOKC7_gO)mt1$Tknk
z4yBuo-Jn5d_W}`y2$`eTj1*Nie$kX`@`3sqY^JYG;(cK9#tXdCb8_B12XAbuoG55)
zB2qw?n2jR%eCi23Rv`4ULw-F(fi{h^9@M&sZe{1b5bzcCYK?a08`CmYcg2+ZnRg^i
z9~QsKaN&cJ*KN?Z>jTqwMeA3Ni#QTMiO<GynRi|%pUH$LA4EKTArp~&5b?~#Ohod5
zWvdp4mop!76SC&kTm4;oVVySaK=R5WW5qSS*6w4{vV_iBATYs4H0^qkQ@hLMzE&j}
zb5?BK3MKD}MW$TdkK^~V3HFl|o8s5OX3agib@$Z2BI53wHwDOtO>B%AVW0cy_a?8F
zv)qOoL&oGEiOa!cFc^@2qI;R&X|TE3>_^+dCNngu2eqaZQ){|UFsg%v(-6D6iJz?(
z*-TvHn|MPs>xDP@Mqr#c4I;%Z=1E%IhB@=@I3_QcvC{XHmlDHDj>|{EW&q31eb{!j
z-W+5eG{^-TEoIrLis2FU!<@>?d+;E{An7AXfKxnzsx3FvQjyzirdr6>+Ow@Zund7X
zz@pSHOO(uSse9+5{YuBbXBKCn3#2*>smH9PZrPvSAi1*aAF_5}OX#LwJ@#Pzdzc6H
zZmbU4#<8IekmYjqprQqo_I|1uf;M<XO23nyt*3|wLnduc52gZ-mVYWOpS8Z}l1&g7
zsZI||<04!ByT@chsA%Y)rV{Y55dJL`_A3zv0xf`IBEwYzvz5UtZCVkUm-W@2$~bE$
zT@FwCrsE|zdItKKD?QyFJv}`ITKUp%ck`vs*&aq@^zz+^j-K9zK4*tpc_9kM<!e!{
zE*k?Ky?kMHuGG!ZQ@eRGdU{Q++{Nd}Am;sjy8=FnjJ%c61V)@_Nd}jh<)0imNv&)t
zu^l;YnuP*Tq^$j;=8n_5Cg;pdhf{E6$nD6Lrxcp{W1-gzZzBSIFjuCS??X%+wIOjK
z$|E3JyPP|~s6}M+Pz2r~JELY*TCyDPu=zzLq+iSP5;MGo?Sb*s+$PvE4aOJFTVl2F
z(CPQI1u?aHKZ$kRZId~V$vv_1^;;MjSX^Kw$(0dCd^R>wjZ9)dvf1P%VehPnOgzb&
z$oIX`Tl`2+osLSc2GgaW8oe4I*7pxyZUvQMeRgwh>+0}IB-$dqj@ybPE6ZD<+3}H1
zoaypNWh{$~+SZ}@t+B&O7^UXZ<4=X(ao&3Q@65VhHsh>~?DI3T(<?KxGv$Sq>Gj3S
zh%nm|-YvY4Ss8giUdVSx!V01h`z7%j>DcC&$Jn%u=8FW!icY!!2X9DsoLAYHMz;6Z
zaxU~_R~reF&GJKns7^`T^3h{M;b6PUC6f#7r|ocng%pvOFayaeb82dOVQIZQx3I|e
z+qB`^6L<<mJ=WIJP*(NWEoNQ?yE0dtvwx8w&+R(PSo|bNt9eh#GR+<#R_a^MYuztQ
z7HRnia%H-<Gn5vk8wZp58J?EtXS{O6I!tuRnZYmI<Z2Y+yRG)*%0%OGrT0rmx$>~Y
zlq<c~q;h5GY@EPLhK`@Kp**6v(&rgI(o3-#y$4cWM^9}laXFJeb8++xrb>^V+?unO
zH+`N4nJS;7f_qAzyFq!n?2Hk4yTGz(jx1d)@}nn*wz`Gbd&pAGf!c0Ocl1i)HQ>>6
z*iv!M<av7F7okV5Az8H^J&Rl39z8uo+t$Hz<dBe=Be^Y_kDlHwvX7qLE!*cwpR+-%
zg!vvPt^yV?MBf<7BL;KP&@|v4%Q@B_@0c*65RufMbS!Viyf9B{+>7G2=dLnuW1W66
zI$n66jvN`hKgZ}fV!x5mb9gV3(KGDj?e<>oe(%MOEIC@0zvp{t*Awh<eak)X5|AVX
z#r`{akuZf-T2$=Sle?@GeUidrr=h%MrLfAIGdI2@ZOxB3nXGBwb2m75U(E$=bX>LM
zIry8U&mk^pflN+VqqoR%u*RD!hq-Iz%)#n$t{mP`EN33`D9e?{yPoCDV?JuR@|a&*
zt{mpKHhPZ4VJ=^moV6Ifm<w6HYOMQt<XI}Rrsc^a5aiCp@>|ZaiM<R*hX8ftgmdh-
zIC>q<ixCK5Pr8R(S~$;}jc(=3M(dnM<>)n0N8T(s+k|iD=*^KQXdZo6o&}?quGD$x
zq|)Prny-SV^Ci!r+$a98qt{?r<S09OmUuk~WJ+D}M$b;R3Px9XPv*)b@21?DVi(?A
zHF+=H+<BZc?>%Hw*X}Xfg?r4GhpaPV0*P~f#y9yMs?><zL&ltpP8jKYrbln0bFR*t
zKF6e#yY}eCD|OD~IpkCCDL#60fN1PeK6)lwCd-jrUi++0<sd=cyM6RJ<i>`R_vjg9
z-M*I$d63-Kk%yfnS@P85eELW4J1pPqzWHbpy97qB0=zo5onZ9b*frh0gPd8kL1FYP
z>`1Wv3%PR8FkGf`W$}9>M$bb9wqGJw4%=CgqhVCyw`Jt1#%dWoL$c9h^eoH(+YvH)
z4h+0QwvOb;@AjCCo*~(K67L7Nhpe`1B}YELpCx~eJcQ)v-kBV=F$lTHsZ7aa$U_^U
zDQ-K<=*`mho*b$3CHLEgMlT<av3%-^&yY_lCX*C2+p(0ZZSmHoT$%hHs9brBSGTtE
z<?*|!M$eRNy1Iuf@jk85v)Z{oqbHAbtaSg^z2<f9Fr(*Y7P)<7_mCwIEpc1aMz6&0
zX&XI*s~`^n@m1u>qDojp9KBWXM!eB;_$u<WDy}3?o^&_h=uJyD2v+ju$tUh7^+*lP
zTb19jm^YiYbmh(C_CV&%C0OK|Iku~E^jhNWm!s#1sZYEkFSAec-U~@Kbl!Ur_hN23
z+Lz20(fer#3gvxdk2js(XA$?FJ-dzd-b;z)Tee{4FJ#!p*!wI+9;WwMM7-7ZzKcor
z<lbjVZu9N~Dde7pX>h}@-_)DGIfKRceZ%>S$!;RP=eA@Eu~nP=?HImgdGvDJrsllq
z^Kc2jTYB^=VnT`aNA~Dk$l~^2k6xEcpTo}SGUU+J+}7{WTjbK`Xpzg1qeX64`RFb3
zyU_Dy$lanmNR{l2&)up#wIqA(bJP-V${#(8+r2+}dK9C4GGTfiK(0zSWwMdOfy9m)
z7`+0i{yEf@!NUtiuO>O!AV-#a$djI+FnT?Vqn)%cdXD69hS9Ug6C!6V<iK+vuPYE)
z@_P3810+W8%-~5Ax$=pV_#-Js&*#pr$dw+ZHF|nG4P*4=1IK3M%9cG}BUeUu_(r}g
z{=|-anb@wBFGHUVjXASu7i`{2^5?N5NAgy451G_5{mMC^(6`OR_mUHUb3{tcuH8!}
z^5~qE{59pzMO8YvC4W`<b5WJfg2`W1{#@>KnVdEGGimbW(&07va_E$sd^vOmPQDy{
zZjZ~G#}3%ZSB*P~Ctn^rvL{~_R+-ryJ$fF02+-&m{AoeCGQ`IU<;fJEE|e#aJE$mM
zj(cfaa`e&YRoUr6Ig;C9N;%TIQ<idMkaI~MQ+slb)97`miuAyzocYp|p+?UbA0ah*
zj=`g)%9Hn+^B(fWr&o=h)g5z{BfTAfHF|n?a8{o5IV(V0&{15Y*C53&b7oimjMV5I
zdu^}%s5^BmF}3-9`6Fjb)-B_m{UhhJ;|pzF-`B|*03&C1)ska$0A)@)8$DBeELy%C
z{`|CjnR5CRq$jfFtL9#u3j7&w`6>bt?J&5}b7f4MxRu1G#@$E$d<zZ!9J(C!rRUSh
z;V;h?@-%1O&E)<FJm1QB4is792BDe0ZYbikA9vGJ!~uAmPh%dQVV67TIM6R_aAy92
zj@IDc0w;6qao2csM#pYgx^geMSBZn<vq8AyrAK<qEvytvuPjdoyi707zO3if*>&zV
z(A)Brg%x|oQ6(2uB5`qMX6gFk`a)nI>r2J8YxX)byO@SwU6>=?nnmz_Wo;(=IlZzx
z7e~#M*0WJ_i^Z#@IBscneQjYn`C7jozb?;GZ^_krb@l4nszsDCmGYjgl+SEMe5Mug
zk*P>uz9L?;6_u<aZuOm}{iQ%JvoB9C+6$nYaW6@tt1m4qm%hgWm#)wYrRT+ETq1)D
z0kZb8yt2OjihGL-UAeygVGCKk=F*g=<22-Q^`(s}F(j&^<m%PvP)X_P^zym`+wA&{
zg*km)S-dv4Sh{k3PH#%Xg`av|E2>rYqE_ieIN@I6khRs~+IlcOv$nD{``xqC+1GOE
zB^P0MP?{~#hGiGBTD)GG^{<P?SC&e1>BsuowT0yw_q{e-oGD(P0sE{5eNL~eT${~A
zFL4skWE!o{3+d-t(YHZa*BPPZ^|e>hXhP;eOJBJ96u(|y^6e|l&R%n&U}oIa()!A(
z54?V5dVYD)ecV`CoL*l`KUArUDFe9j_0{QM_J#(cc%|gut}Lwkx7oQl;1`E4%=qAI
zv#+FYI`+taEdm5r*m9A!y4ThDSK<QJidTH#+U%--D*;!RkfCue*OwPQ>|bxpuGt7#
z#M;u_oC_}{jV&cXG%9UMNfpe^OfRmjt}XF0Gdt&A<`%D)<_Ta1U75Y=US?^6g)q##
z0G64x8!owf2g>V7#QG~LnFxwcBbJIUb7OVM#kjmnrK>BnZf-^|(5#u&mBmF$_L)F~
z=_OUXHmgBq@@MMheU@zN#HqD>rWNqvDp-FBva~o${7ZIQHoZ;WX<sl8l9p%I7H-Uz
zRg--Zesy*wc`rYGL8Bc<m1mZU>BqthsJXzDPol5Q&aP7Z`oc04FbUOMnqIliv`vDR
zSJb5R`}(qo;R2ZM!YCwLwLX)o^>Mr+g&wI$LD`D*ovCO^q(*SCm(`V$d(kvmU0pSl
z7kgPMCf_r&SFT@;BOUeiDM88gIO1jFN@=#z(eG?ArED=J2638M<O(e<tzC~_(=v@^
z!IvCGoGE(70qcr`*5a_M$;-mr?D91IFp9b`H!s9UIGbAPBbH{D;#ZrYKDe}4yfNz{
z(!xR0CC{|tdf+x`J$z*9;cIbW*|9s`<2v=~>*O`9r{wBcp5d^x^7P8;T6w;>NS8AK
ziz^VSY`~gL@4=88Op?JwGsX4dV2Dkxfo#{82U3(E07uJt=~!hiWlNqZt%`iYe~dHv
z(tRkj1^K{rMM295D~l`B04EIt9h2@VjUxh?1RyizWV0v){<p-W90*>Wh3sZ?uFtM9
z#j_#TugommU^)*3O0@Ec=9bDAS60ij%f&0ulQd9cP)0^z+WNqyg_#9H=92O)V0CSF
zeF2K^6P0E^To%(=E3OZO0kMT;M=3HX2}O)J7QDQsq7y)>%;hyE?E3sb7+F{5t`s5R
znIcz<%M!{QxRrBtc5Pa0I2$$qawW2W5?H@DJ5YrQp_#(gXG`m9Teh(Ed6DXs#k3h}
z=>Vjx?PaDz@y24Db$w}#koC2dWn`6XU}<(yl8diZ$x2-8H9e397O|R)gOX?rt`w1C
zNV_~!URr=B(JRJ~$+bGo+&>ylYPxzf6jCxQl)fEJIWv31Nx4TOW|!xS%WCe?h=r@g
z1*YGzU>bWo6p}Ke2!&1#Pkdo`@YBbGmxLfkTd^{{aYI43HjC!L^=?PYAbdDTh$yWM
zskF2@Tf8>Q3_Ds<$tE%&9x@n)OvkezFxU(}S`<>Cq{nO(FRd-C&kjQj7jX<TT=4WD
z61c!&gY9CDkG2m#I@~V!*m$SGg9Dz14~}>mJ~-qq{OFjw(Bp&d!jF!+3mr1-NyO3N
zbrHkH*T;xP3>lOmqv$dW3?uDxlN$*x#*idzn(4PZyLc>|voygfUuTlq5o$vkg}KUx
z6yYL#8SB%iX!F6kWUQilpgB&Lt{_Fb$Euz#Qja<R`I38DVpKaHjQGDk{qhP-$>g@h
z>1*q70HikiEUnC~PZ!thV`16poM6lKmz_chc}HPixEg=aq{Wg;U7B4(&xJ6X=7~O$
zehk)8itD(sUM^0rFWeCK;vFVbT!er7Pl`59LJY4G<}KCA>g@7|(c=hSn_VQ&s(Us0
zzykI5vH~S<lxVBHCWXCJTv)eiQSx4)WK<T%RMJn#JM7MVFG+7zx@;mb0G9%IXAAJ3
z+2|x6adE!c4=<MCPs`H_tLm10E<34B@9KIHVPJahs>P}Q5*_1jN*ajgJB>Fb(<^D^
z8yCESx4MQnZrWWb6E|1VIl8LPiO=bGtPuJTY8XhtcbwuX%8v>6dYM~6Xwpk*dJR=%
z)!q^6Gp}*XHQ9mC?ysRVF1jZOAZyb#^dbL3%VK@=nsh;FzV>3M!P_+?uHy8X0<T?>
z#AYu(*|jV1{Gtm=Gk_L887k&1u{6%`h_`F&D{#VVQUud_`EUjD;)?;Tiqq4xt176J
zwt-hyI4?d+(guR!%-62?Hb{%|bu$jeHG;*};*}M25dRdu^Uj={ox8rMw|S=L;wr#c
zSo^Sh(d4*Z;@yc_i_#rkq)Cg5tHODwm66TQVAIw}=s!`^+%W3JhEXr^VCmftWBMvR
z*nh?8O4B98Rvpb;2IH>Gmgi0WPD9s=SFdt<v*x-n&|@)!vay%+ndQY)Ci8(SF3^#!
z*|mkDv%iTBF-~Tej7ibJHwl#Xm3E(7ojcCT%6a;B)p4V>gO#<i#a)tiAu?Azb~vkB
zw|CpcBwXoej(iiR0v&_itl*E*?M>)1RR!od5w#!GLD_ak>n>{|?P?hp$o_%5@znyb
z4A-#g?o8X)DsBlS?(9mfx)10CY-<ANlt=93ZEd;wb-LII*QM&XiOgxygEz095Ls+;
zqIA7Bc<-vxa^0!UMZar#K212D-hsNqJ;u6Gy|i7awf9S$qpBh@=UQ`QT6_jJ<32T|
zoz1q!TkoaG%4O^NSmZjdEZquAt>4E%@$uEJAy?O)<tp27><!Ln@3%0#y9T@aT`!*h
z&@*<BcLJ;ehasFDxuxsfLtw6Zs$CmG4lZtUGq$eA)z$V?&eg(Z@8_DKiT36u-gw{y
zmH3aeJB5O~$eLro6>T5cSQTIw5&8V~o+e0}(r=B@7;<uTt-5OfI3GUoMEE$(*R`m+
z;+sD7`dvodT{q+IvUdd&CZCaTpZm<YA)6!lxv}Z-v?;>A!Gpv71?2<x+1st|toPT)
zxe>^~_FU*r`R68f=f^UxXd0rsg*1AudLYjE{@mI81{Z<qhM~q*zuo7SocNAy7pqI1
z?Otc=B<N>%3cGQ)0S=#kK79Hjc&9_q3;k!GJhN2aU2i+2EL4P$j76c~&UN3opdymP
z-na0P?z<a^t$<A7yA20Yd%F4@l#8X}^3)7U=;AAIr{&_}sbCKF_F(Bi)bKRuv0hXw
z@6->O7lT(wTaR4V;?+MTb7%Ayp77apn>cPhTms$}y#Piz$^*;&1r60W8&}3hz~xNv
zXKw%7JC)4ur+EJ)zrV)sv;2OM-;ep<-{bke^ZWR}82p{)``7q=lHX(h&%eF%nM}OG
z{sF&lBK)WS<=}6L?|;DW7x?|Z{66^QG+trf$@la8iXCmVUT@FpM)D<axw2Ki($`J&
z!Orn8k#(){xob0VB3JVAhNag<UBMjp-1l^8diHvBbGr(;BE3_c3PEn2w!_`%FtSs@
zc4KQhjEp*R&F_Bv>L->EyAip&x|xeyO2HkKtSfR)MZE&wdyoj_CZE5@ydG7-h4lIX
zM>OJA(Iim=n&YXU?)vf}t}icSh(uTbr8c={wyvwsvgO9*9qT3AeXBcRTUXg@?KfN>
ztBpm0K<Ejp?l`A<D^*vbA0<TGMz1@|I6xw*8_ZmZmR{&y?KX5*^i=rKvFM|^4t;%_
zPPyo9h@HxJ{G||`nK~8BW5%DNAQI_LvyD2GYw$v|u(25)N{nIi93*2m>~n3jzu}CF
zC)>NBYeK)Z&)rG@4llu0BihxHa^_ASwMBmgImvR%P$<w&KlKy@Q}>y{YohQ;M*qq9
zCN?fo9^QQ$5Z6^T*HLO4Gj5Ck2E9t785Z^%&6p^M4lgNKGg^1igJFe&ai58z1fTUr
ztzby8v(SQTHEIN;STYugh>K$k3|q)uq4g<uZ$0PV?{O<j)I1DR!RapPM@U2i-8FP+
zL)Hh7#@|SyX2thQJFEiKnK(l+*Va>kEiFxjN3m+kjp2CgOBllWW59yLg-%rycEk8l
zWdBks!($_#<T|-@CN?F;tsJG~c|@Tp=!&G0Rxgr#A_`$_y#sAx1dnz?Q^RlrMBQuD
z3^h3Lj|()LV8>>gEGKrKz0f)ULy}OIZ_{W^VNs`ax-x4t?dx^!hh#R7U-sfyd<9UZ
zfRii4scoFSK=odIT<#|0yUdtMuevQb4XqLo`iaZrk0Q_!khl^;P3-+H^D0zXv@djb
z{TQ7A4nlDg<t>^?h$w0DPxRcW!K5{$)WO6_QowgoqdgUIgaQDHLRFc|VP&J;?`hy5
zR>oQ)krJack|HHdlvh?!pq5LQC>$~>h?LdLatx76=SLei6SOjTyT9E4oUwrC6r94D
z@p{c<t0QB2iu;;tCaVPt%QuRPtn1;F$)&XrF&Z}T5x)9b`t4eL^jk@|TE}!Mb;eKO
zf(Ng45IN#xbW~38!%9eTBXz%VXmToW363UwAmia$AGbK~Xn>r$@{yr7AoQceO9(>C
z1z2ugbN7`SCeL`=`$+2=A2%MyGv5Vd=?sqRE|<}ip#TsKcSCQC+GWky9PPM!LBUuw
zZU;IUA#CDSb>g^=MN_e4f$+TrAB~keal=^G3Ppw~M-y%f!@ypBTn%=W+`Vv94KFcg
zA<ieZ0#ug(q0L)$E>;6v*$!4(2s}^=Q$~*ICD04~p)1(a#>(EL@f{6nH)~AeT9`?t
zS)K&NO;l+f17N<=)5X-rP<J&eM{ZnP+)>6b6}D^Larko+U_v1Z=(9#7U!8o-2&DiP
zo*s)%BY}!|zItI;__~oX9Z_3pLT_RIrX)uIkp?5eIXdSUR$RV&UU-eC6SeM-NFv?H
zG(dmKNq<Bj2Rb+vx<<M#(J(~J-8XbQt|owbAC8aH;DqIY0X`BQZHbIqI2m~2l!=gx
z5A3zy;*d$9r>3(cLLR$kk>QwTu%A|YA2#9n{CKsCn_;kBuXM(pCXq@C7Kk7@UfBeE
zj_^8e>Qkl{C@1&++2D=0yIj3IzS-b{mA02Z(sG=_AEW_J-8K~)Ya1D33FlZ=isOQI
z2e@O+HM`8Xlbh2bYKLrqY*fbw8lF^e;`DREP(}zc1?q@^bqcDYgqx0&OKjIbb_OI7
zVXLj;1hNye_^Ui)2mHq~_`R=A?*86apPt-}-@Yq(dnS3in7ln5znza?p1#0c=el??
zLYfMo7`lHE2{-OylnG153vGtKSD8`Rji5upSe0gBL%0-Tn~-J6OXGAW(Rpw&g$_vx
z*{gIL!Yvc42XsWuQKxW%u95|t^-UEKNL)%)$z#d7D57^w=lXZNBwgaXFBu=DROX?u
zU@fGsqXZ`0=x;rCCXjM9Zf2#7@yu*zM>9-`?J<FzYO0x2Q5nt*#XM`%zb-t!J`pbH
zB0N$^;FZZ9Z!M81)sRpgh|7BbL9lwSGzO>>6L~BUMtbTJ50xV`wfZI#%vAZ)!RfGY
z{s80F86_9*5@^(HBeFn{+1qHKbf{+*cq*70t06OCbxZXf&;Q>55M|lg%A9p-<O^qH
z%#w-dAY>USZG7eTc4d!I4Yhyyt~sHs&X(pe&m)4ckKx(^m!0;>09?AP+1HuMW&lOE
z#1;#5z#BlUAdNd6<@{q5a#%-ZeZVl>aVL*7nhjH$Q~oaq!_{Oe`ubfOV&);XZeqZb
z1h+V69Oq`kU?g#Krr?Sy8)jj!4&5JdC9_%BY&(|E#fhO0MukbfAr&TZo}^^+CUM3X
zh>-=?HztXLzK1T}sJ5bsyK0;{TNvT`3z9hT66_LikWBwpZQ;Sm9F&|yDc^K#kWGkQ
z+LmIsi^ZhcZtpY_FsnM{TaY0?*4MJ2A<ou#;3)Mjyf$u!&$lR=<~Z<RN65EkgltG;
z&~QczchxIQu2X4rPZoxTr8b>NVt4X)oMg2yrV2{XDbt=nY37e-NN1(%!61xKH6Quf
zoj$)G<oBQP`#8Tp!SB!W`|tVv6@H)Q_Y3@fncqL;_kZzwm*3a;ec;ir-T7VoLVi#2
zdxl?;-y*-4_|^Ee`MtsKEq*`7@5lN51izo+_s{wLMSefS@3Z{=Hosrt_fPmeP|ISk
z>CB1+Nw8z#)p~Cg+e8V1H(jZ2*D)#`F~K`S{!`&3%Q^3vd<_!O-K#qa<e2B0v<3`x
zFprw&A-!Y)hbv-3BfE}TO@ym;u8!@P&V3p-*5{m^t+3AlB+P>Gz08Hb4ki7qt4icI
ze^7~EqQa(EYjSK*8!^&NNt1d|9y-FE=faJ4Pqsk4ir^bU6(j(hnnPShR%}Peh>%^g
z15klhM4ZualXF_z`={p63Z3;1^CF8T+r3^VnmTuGtI^x;Z%nXCdTy)T-fGs*C2C!C
zt`SA-QaLyI?1zpUgkUF++4`EMy%vR7Sk(fftrPiK2qdJZ(sefYHa8#3n0E{O?JPBF
zK>iBD`$9PI&6UrW!(pp{EEX2ha60+S#f^qvCtR`BGZ@^I3e(@tJhPe_W?96#uM&k*
z_f-N)fr{`eMy_@LKWd(9T_9HDR~0;G-u3I1HZzQ@HtVo=G&Jx=I*Z*etYP;lnI;`R
zCmbQhXea|es!UZj-fFvxhSO^v679t;NnRz&an+hdDe8B?XObg|VuOw?W}Ymf$B&P7
zUJFgdCwFGGY8Ne}?2L)ym${5Y1md_Mb3tqhGaACl!x)DVNKdI*uM0?Ksu=_1Xnn03
zO9sbhSvWmEJAKX5B&L0YAQwJ97Cr7|z_X4fk@KS;4PW;*j@Mof`SQ3g-uF=!D~87V
zQ63ua;;vRYPY=)UCa;Pg9-rjDFg%|8zWU+uN&XYcU&J%OR=`e<XX+>I<2_lsjV#|8
z&zB*q{Ev)#5PJn+vOu8bND7b)u8S=iqFdhGQ20_J)A|(zi*z|k^TnB!mvFf$F3wUo
zCKexZb*;F3eX)oi=_>=_?0Ug}XUYwt$H$Z^-WSCQWif{YCax62h$VFQJrY2+pr<Tx
z-xvMFcOdPSYnlJPZ_-TB4`UkNUw>g<ZhEMD$%3diTFf~C1L_7{l*P#uZikcMR;81R
zb{=u1Fk=y_U!@nJHhJ$ON0GOuq`hOC;_+bDfUP>oYA7t!D|K|q+oqx7$SHivDvhWf
zN^OeHAVVpYlMa|AnS9<>M$(hhBrAq@E8~!bS--G&LNaUFxSDq++=X$~YtLD$c#ULV
z>0^=8yL4-(0Y_?-soXe@MRYZ8x{D5R_}G$kCi1~QK)nB4l7?PZnQfgmYi}BgMlDi(
zwWV|o=>d-mj8u*`TK;vSG%hJk)cjZu5vTPD%GDklis6XI!s6xpM0w%GBmu1~x4UKO
z`Pn$VXXE!8mGUN@RoTCz@i^Ye%9@&8gK6-fR}WYT4cALEGgNfSK4c87KVbZIB7=j_
zX0x&tv66L}o#$m3fLn%i44~QG0&>sG=O;}D*BF)zNXnqa`lUwQuZhHPKY!goJv4zW
zbh>zHi3`y}3)WR|#W+2nEJ5-ynsHtOc03hoLphc=eKO$am!XqI5kq6sXr40}t9eN(
zD$RC+Q`WaBuEH?5Q0NPVo{(JD9B7GaUAAo`eD(ZmDr#rCkL=g#SrKzu9$BhvHLBUf
zz46PQrAly%J6NjseAabaCT8w*jGFjWW7lXgD09XtqP~DID2pcj(Vf9khbLNyR@<yO
ztZc6Dw+qaJr1tw)XcKjcC7<2hb}QAd%FfyVYCF{1xdKRcVz5|4f%P*NpFC3m5zYWe
ziL-v0je+vn>$NgwFb#MbP<6mA%j>8RX}C1nX}4I7!A$6if-qw<#9#u(vkHJ%Dhr&6
z?2*~0d-A2h)aZ0mCBSmGV16rpIl9@{k*R`WHtJaa8WgD&=%j)&-Y;IYEsF$QnvCUk
zkDgyAFmD63$1skLUlwb?Avfwx*wq{Z2qy!BsCzMn&(jykr#b6TOlH&&sbgo&yf{mw
z_p?pYF--^SxbSMDc7R5(qeK@pejwOleXriUf`jA|tC_J;R=$=y%l+New)63FWn->=
z0Nz@6K|zO;6Wakgm|M(U#}64}>xBM)qaPc(rp+-*M=6C|O9G3FBUUTufx@458!BR+
zm({U>|0X8T(m2i$X9Yd5P^6U&Rc~0WD2z3mTH@f}bgea&o5ZY>C0LTD_sjF)j5KB)
zai$vKuF7<_k!rgp4y|$@D%N?(A{BuXW)xxdPwRTfeQJNk7P7_!n**xrZSO-^C=3uJ
zH+K`7RJy*zIUQ0YBx^+sRv3&ehGTQoB>TSReBljom>z4*_(fA2;uIE@3+Lt3>snS$
zSs3xm1?*O+I1d*nc5pq|hT{XrJmIY8mR#H2hg)r29WF4Ex7*!GM{%3=mf!`rJV)Lg
zzihr~Mts>eu<eL0RHIg#Xhie%X07l<6P8i~JALA0TplVq9qR&6gaz!OO9-p59pbcC
zK2FUZYNIf{?HN3+F^?T1rJQ0@(=jz7ZbD+ul6|1iG!u$oT-`>raYQ!eU=}J|E~tkw
zc4x8;Buug^G48o4(q~i=k8yT<d1J3U!8u4*8a>y#Wc&(+WW<v;7+kITM(;v-0$;G@
z`hkfSv3x?qU~RM_!v4xZrPUqHLG{==*WQF(491>Q>j^&=VaVqCD%rplrY>bmOCw~0
zGd~hq6qU~;#h#cVE&s7&h;sr&X!~Imb9jtju=V4BtMq&AfIW0YcI+VfSDSrSw2|L&
z5tY+u+dH6|L%(E;Eg<FiNSwONImu>8j7G~B7Vz}IjOX&438KT@jdl~TN-m*|XXQt0
za!k9WlgGm(K^LtMOS+?htg(2m7y4~MqRz6whkK1~ua9q|L@!jRx_R=5^_{cwB>b9N
z?L+|dZL!H;Y(*owWg${vtda8K1er##^1K<^biwT%h`>FYm{f7;it3dvo^{{3T3ny^
zVs5te69J>!-Y=_J7u1rZ!xk=2(4+mO1$53jVmZPM<UBkD2v49QiviS_Wi@=v!$MNJ
zYZGOf^D;0+YuyZ+&Ar|6ByTq^5Geo_7gMAOm_6>&28uFT=4RsnWtk8lDM`@<xzP|@
zahmFFOWqt@!<Yy*D^aiP{GZF*_QT3gbw|BUYDx!NNu4`(vRcQ6E*kB!+p0e`xbe!!
z9({)RSVfx^nCJaeWCeK7%wwfHgTeh2?@Bl)Np>ReD@b-iSg;W9jWRcI7d3e97`<Jp
z9E6-}yCI-3XKeh<lRXM|Q0xu&>a4THQanb4bKYOkFn=OI9d?NRQWu>Z>2J#>$>VYO
z1lM~%J`t{i-?rI<*<_oRY#&1SxDyE<w>Ihn@yw^j+d;d(BvvL7R$Ki5MkjN_5}Kr(
z&Gez>6H)EkS!t}>L0eMS4=41=0g#0W^rUQ3%gr>fmKSjI1^Lp}$CX((&baRO+RXBZ
z^nvR$lP*I;kZ?Ql$koJBB9+HfLawH%ggVEQ-FgLYd>T!PEiIJ<H|pIDE#MvET!3nT
zUXMNzF(+COvPmg+Ofh-cgsz6O;f1FNb;Q-`q<P+Xjpdo0D42DkreBj$n3uF@2|eN4
z%_Hs(w%heUnY}hToTaoGCN)pZ=l`B;F1SH!jj}rt&fBi8u+DB5OqY;@Y|WQ!d&606
zPavjTZbD(pwRSnJ63<Y^Rm+0R%Z)mAK}V>4`Q~n23AN3kYO*r4YeLh+7c6QiQV02^
z4kMZ72}fgDKtHCa%9R(Ojkh<)9WW+>4sJ2Ma@0pG?jB}Jt~Jb!7Q#fly6;fOC2T+z
zkBp{^_Dii3fk6#IN9N67T13i~1fl9~^>^itl1$U6zO|>CjZ?+WMxu+36QG+{-R2Lr
zft6;j-Z;ne%Jot*2Jv(^uYgetDqdMzKUM-aT5jT*H)31Zjh2>R45uugj<Whw_5+xd
zW$~~C5(xR@Zi39}Qd~|5#<Ou(dv-h<j1!8NLD|TU^=zxs>=}(piYHtq45Q^xA5Xw!
zdBDdNQe(}r>E-b=mY}1gh&Uvj8$!*x&7B$}3ewUyTPrdp%GMU6Z5(fdMHzbIMN73<
zJ8a~XuRRnvP#ShSmjJ0>h2<4(>|VPu3-+go3QB?kg)<_O)Xe_|1?*f_i?cL?cILN4
zgW63rrD^?(l;MdSIv|#y{mNnV(H5}mepJ$!$Q9nr)kB=RD6LYHoLKW@0S5rBuy!4d
z_kNZkCfF|SfdCZ&LR+Y9g_lrYLU>`~!o+t?1mJhQE7Nbe*NmL!H?l~E=M3I^G7+q5
zN?6CFaRO_%jY<%k2o}v<!Y1`DZh%JQ1Tc0I#4E}L_Ant)YDW5XJI%zUjpuATO2rUt
ze6vzj0@YyRfJ9I27t*pr57!z+#uyd_oU~pmccUoPdgloV*GeTiFaHgzz=SmJKOWbZ
z;(`<E+EZPV3m5UmM@B5_RODAL2U2hlp|Rs#=h)%UXxBIZR9>ALgUBi{RwQp8H`s}t
z&CLK`eWVOflv)tAH`%P5ZEA^-uS-*{{{q_4Mu`A}LmQC}F=foWB!c(3clR)|ETfG0
zfHND0tbZ<s?9mVt)@X<&QnuUE3~Lj)Frt3cSa=5Mtw&Kxtr6O4gQFXDZF^LZ0OVTe
zXOU~IoDm&@I6mcBp|8cJt7FGK2t}p`%rZEZh>k{vwsayb0rYl7H9QZhwOz^1$J-G%
zIL|rdIHw%vEXPE+#GRWAKBaB;n*+F*C?3p#nD{!^Z^(-FQ+3&}L@OPK!waI;-<1(Z
z-pqcdV6@%GsJ4&Cy{Dd;{Mwi+mgCESV|%Km0?>Ur61a5?0)!&YR_;_|(g;NX$0_!7
z9C=#A30g&G+h?545=Y&&xe!Im#sIHQBf)5YJN7!zLbU3TSz7Biq4IHIW%D15n?f(@
zuCmo>xtxG@DJYnu8h^{6x5LwNJ4MKY4A+1cWXH{UCmaIIOf|?;w8Vq{KD&ztmN}}#
zL!>^t@`W1ezxIQ`qon3!+E5k2V~<FlR!}YoMt5x-SyAC|gBFy9ieeD*;>tY&uuRR|
z4vy@W3-MeQklDKAXA?~kQd++p3xf}ZH54bIi#&gt%zyzFpU^vTp<1mj)pEz!<wUS9
zriyQlwNB3L!jq(10&g@CdVdD%vc})rf6`p2S}ZY`hK%A~hCWy8iN%@Xc01Yz7c2In
z-_<%EcC_289XqJ=Na3InPNuE!=?mkMzB}x1(X{k<sLp|l@jR|2Vr8<T8hOrVI}d?U
zW?>Z!G22BCvE50X@Mo}=9tJaB3{`;5deC?xyplAJefOCxmnPN3XD(V3H8gu|e7IDQ
zu)o5d^7W_aGex;UGKY5>L#CAy6w(mM?I~0ymvrMW28GCg1|jZdcOVDYCl^WvnK{i2
zk8LKgq%A7CTdD1#0JM76xtP>R2GY?1(lrg(G+hv7$Fo-l3fujy`k7e5Vy>s12olo+
zM;FN2f%`goU<_{;M!PLgtlVTnXWKY}KhPKK6tVHByeC?J*sS}J6UgYCxAdr_8;wXt
zp;8Zd1T9Z#Sy!0gya|JEVe<Uhix==KV$CD4TFX{#S%_2V1#|WwpV213V?J&yi=zCx
zR{B;?ge8156R5jIXjhbJ80cZ!j-N0CT;W;4Y9hRj`O916G-GXAdGvP4Cjz`p1!Gir
zw#uo6Uepy?NC~fZOYzfL>~r6>n%0NPkKKm+6CwTWHrPGoI_K2OgtVD*<dDsGK2eHH
zYKAHxE<p;kIyeqdw6#dYpH$i;AO9ybi|P|CX}>Ev1a8s^0hj;Hs=%{~`A45PAXLp>
zCup#j!kZ>dhl2$VR?}&XR!v9xF%z3=BHrUVMyR5*(KQ<uJjle=6T6~>BNxoPt3pBI
zNp>QLT)OutmR@N%(=r`SRB#D4zjP>=iz{>{&xV}~XT#PRgg7Z8UfT6~ORVIJ@}gis
zmO7ILx6TDwEtuEv##Gu$njc4{(*QBeT$xN2chZ*JicREl>Iz-%j!V|Il{VRTBJ8t`
zo)-Or6(o+Xwyuk9#8Q;mHYBd~DK(>o+L<*ne;A4crfJLk=ssf*t0c4CIyYxaB?CvQ
z;+c7cAh)9;u`MX06$PxMia{>CFHRYunngPaV>57s_zHnt3}Ov9mrw6;9cU61aexHI
zl9*D7P;H|bC(U(68Od5us|r*F^4~l}^qWInng@lk>nwP){X<<ZvL#2i^d`G3D;s|O
z-+RXS^C7%apxG=}6Tg5k<+m~wV+*rsdkzq+gq{k%iJeN{^iBA;;jrZk)T=u)2%>fR
z(&@k?BTrm3aRFeOzzb?r%l~zj!37`a8%sN2t@N2XW`ViHeNTusxVoVoSW|vr<^K85
zp$YZotP<})(sFCM@rn_yBnRq&oCsbjt}U~<Z3+@8^>s`pab&>uRtrl4ypm+@$P)R>
z3Tj%`@>@-hmwLUm!)DZ0T=+xnB^HRPLmAe~j83z1&?`gOd(R6yF4OMtqar>}gp=O|
zO4-`2Ka<HJnT1TcAg#S=MNNy3lTt}J<f(9xIQCLvN|77GZl!e?VnmWehGrR?MG80w
zd&dE#bQVmvbB;t=@h;hj7mI%JX>=FT;v6;`9Td`>!XD276;$}1<;I1%mL2+8p(bTR
z9JAE{4M!XCo)y6Bb(K%0W`1@O*v}c6t+x>H+N~f%c~Pw%E{-GK#b@T(*kjpauZdC4
zLU-?M_yqE2x7%vVWy^+HT#B)j&WB{`<}6#NmBt*h3k4m6&`lPK9aijlQdC&#9bBjH
zRSaOZ5IgW~)T)5tnSIGD&Q@v#c4xx`QB}O&+O0P$2g<6|W*H*;yFxZ_&9C`B3CBMZ
zJ{dks3J1rr+PPjO+Bp|wTx<qnOsWy7fe6`t2kX5$AmUl@9d@~mBBlBh767y8*^=i#
zp%j#m1YdUO^yGwddf++68UygGJDbR4+gQTZ(MGX46pPZ(zCyt%_qJlg0)SjC1PS(_
zyRR%FRd;KW<dvz?iMy6~s}jsOxdC?GhgEj?)|^<3d)1^4>03s0EY&-6P-)vdNG8*S
zRjmNU;d17N>3p@EH|n?X_)qw~kKZ@&^X8#?%0FT#AT}neU-cnAMu<axsTlvnhfMgj
z+shx~_IT=4WWLIe%hO1(@{Bd^5Ci13wZI{)G|;%g<}n4hHfzcjpVBm!@mwd)6Hdc9
zHwNQuXi_d86Q9rn&^a7R2q8;_u2M3TGG(eT=DoViy;>VEw3FSCOQbdH73fbMoIxvH
z)%Kw%TEbn<zs9y<)!;U`IRQXhgCquE={A=|5ifFPl7slGFU<sNCUpr{rlxEu4-pN+
zA4lu>MZ9#6o;x$)F|`e}@Af8zPN;0!5$2)nV=nlZ3uz2ErN1J-EBwZP;%j$i{;d05
z_wRrGPo-gcFFE5ptiUYiSRGAkguagzUOhklq1T=~^FrZ`k2afUUikjn+5XvsAHa*8
zje%o@@16Kw;^b6b<!}#<m(x7k!awhFc>dv%X_IR=9h-iv0E4bJ<aI7rH1mCX5-W{>
zbvinoajsPp1u|<Y5Cy`n!LbA*%rc)LNsF5PljqqhgdN_0J;M<u*@(yDBVCZ!qjs~8
zUg7M^WJ==@{~DOA@kHH?0mLb+2uq9gR#3E6VjMBGFz$vv*_|9P@eG#Fi!xpINEBSU
zG!_L{>@+`#-VU{FF$ZCSg`92=<RMY*(xrzz3hgR0LML6y)7TA%rT`0c%}r;^o^XUX
zo~-yh9f^0|f<`Gp<2YT0=W~=+Ckocs{g5T#7WhyFK{eERmyrq9V2hf>vFBvr@k~RN
z_;Ex8>!Chy+ru<D7?SDkV1&UbGsA_#a$qk%^%QV1S>yCL*bLGRiwa%TLbTGVuWY7C
z&W7aTpKf*{$lvpXbo(QwJ9zJOk?QRXrW@AuDUv)~L^bh&rJ!~%#?ks7J9|Y@B_WiO
zjWWxqtW#5(o7bcIwCvhnzov<_m|vrLgsNR?$AXg!lM@X6>8}`FZ68|y=?iDzi?&&A
z&6!7QS6FLE<WptDR3>jL0|4lg`bh4qS2jwfpnM(RZ{|b?f6u{9OxiK%ugepR?izmY
zvd4wS_O?Y3Buc^HXm=uA5#mY+e)DI)cBlHb`+e|lrtfi>p5IUSPw@LGexKv_clrGT
zev13XPk-&s*cj$GbkN`_FPA(YA8qL5$;;F|7Cpt$FFg6@ddn|o!!iA#vXE|ya8Ncg
zu;-z$&}w{m3N(2vLLLdu@>79U+Zl1|fc;)!_#I<*Sm4-PIJW3Q(|;c-p^Gg+TBP*j
zli8=G6YnD9U79T#4WXrk1_V4Y-E}2kfTR(|qJ6eVvs6p`lbobPPDzs0($t6JX`O<~
zfU`1c_J&ybVE@rz0T!E8h0T)C@bfxba(N<Q?hhB3u}0!;Bp^k%SKj7ak+O)mNhgRN
znGhQ5#kxUHhN*Cdn=;BXvM0CMzR0FwZKOP!i16>{{G74Q4}_O4n_wnC368J;`E-1P
zo#-IXJDd-27_JdJyr3=A`6?rPVPf)GK<ZCdeJ&)!C11nw+UZ`eQB@jMcRbz|dRc{j
zXh@;t`GMn^oNl?rQ4kTw6RA}w-9oiffq?|Gs6u5cwiD)5_0ZZq028GohM|Q+E+Ku#
z;F*cDVjs-r)pTja$x4}BSsE_ZOKbtrO}~M`^0<r^@Vxd@JY-Jky?2~XRQJ%6181%V
zeb>#o2a1jPdd;7|fvhVmz@B|1!5ngLgo!CdaFjP4A3}i1GdN0_(}_{{Y|)vVme7??
zGM8x0j?2QCPQ<_kVT+O<O<}B$AxdNgBc0A7u47bU4Nj-xn=PYf+fHr5c}~jqL*Kx{
zj<++~dBHFo7bQ+Cj^xXQ7AcByY_BE#j=SSp;(+5V%a#@#W#XmTUFTKg(U^fcaXofN
z2IFh5Dzn9V*vB;ww#L9RW3bgKsXU6zZ(Xi4t`&|Be>aEmo|r(}{f;y{md)8n;D$ci
zr!+F#PPeO80Jd>F($4n=;|wE7=N~G?wq1GKL2!pZx1RKH>vIIaC>TPHn_=>?Lc>`U
z63=?HorY}511tt!YMm0=<Cz56V>9=L8%)sdy*GO+qkPdq9K8IDsUK`t@cl%V(GIqy
z4Z9IqFYH%Sa&vbYEqN-g7%cDHaWK_$EILQ&6Hr0iD<-;?eY^^x>8#{t#R1#yH|ADY
zyd-f>_8cbfDP_<}D)Dn_s#+~$VyhgcA-!%{-kp$A3xB9s6T68Fj$_tV`$We#5U+t+
z{oq8)t(zL6>?p_8@@7hoJQ0y$j5s@N)f`}AW!a=LF)=px*dA#Ny*sN_vqnlrbCP{3
zZfRo(b}GP+cY_1J31=XZnZjHlodn@XFD1S#&BU5;!h(ni=5S~ogC)~wwnwoQ*7(=c
zu9y7ll5iq8y95i*T*OAYbA^SXrc=7=U9ZE?ZEa}f82L;u$t{|UNSPYOQ(!CQ5m;BM
zAL--c<E-Q(mKS4}!TN6H4Yb04*pdfl?uxsy3F=7p)M)RZV5vDHfzF-)VElp1j?c#4
zD_XX3BRc@*A)uEH?BpCLR8`d=o>U7Zdf-b$d6s6uqnz`XHQ&V>sUMagR;(@urVF|~
zGm-z$a($l(9)H_R=KN9Sy=<~DdZ~y;^Kk9MD+<B@I&)UWuoSbR1DQl96IC}Jbij+-
zP#V+0P9-n+V&X{M9e3lGDH-DB7Rur{R%y9IBW(mj$C+lG20Pa>s}N~jibp0F^rS*{
zCzyzC#NC?<7Vet;F^1~6qnJlqk~rKs7}$!9Q<@y69M9&$#qHG<eZ_50GRs8nqi{u&
z7p0k?54EBsh!s)>#G;n-tW1<Dr>ASs_i>PP?mfHizy;9yNqa3Pl;u1Nq4Yi-In|sQ
zROyDe$;m5RX8i0JizgjPt!B#gB89ftjkc55*i|0$9s*pE7GV3${dXzelm{Z+l_z;#
z6f#XK+-@bZ370GsIB64yDx(x-+a&2wK!Xj~Ahl4@gYt%!`8?@<*b;{8hlscQ@&<X7
zYg{xT+oX^!nGSDoazOkuRXxkIDQbe<tn?POUp*5>XnU)+(IsdgUOWm29mnzF07V-z
z#>=NJa!_O-&0y7o)n-b<W~xnEj_bLi9;zT<*5N@2qpmR0oQjeH*H&x@Jbq*es$bWX
zm%`fO!qNhFP;%GgHLWz;VJAjR9ouy5nQ%=u>R>~K4x@6XqY!G>cx9`M_hL{5=Yw-{
zPNxtD$v|JgvHA%`B*sElk2PPUC>~}Rd=ja?YG+F@dm#|4&<EvO)6`d~vE;-WfV{KW
zX0)W#66#xI>Xy^y@xy{9D1%l6F~r)tuThG`lTmNS;*3O|l+sWzNvdL$%@soh9?w&N
zWK8vH=<dUvgtllph!>BQgcv0tyAZMJFJa%EDjH8gceoB_kCL2(PfWxxIo_Yyrxa6X
zhLrt6HiVLQO7&hb-m_G)jY`%Az%#UvC*o~Sc-OJyq=Q^+2;M94K%KF5)~KJnHEm1~
z;=D`DBzK&-_KTp_<>DrX3fh#oTwwBIJ;YIUiqV)N<3`)NM(4Gnt`^07oP)Do9gAMe
zcFKs<&{g%s7-YQPv>qJ~#-^R@6151S4)AETJ(W+G#1y7pUC3iTS_Wse4$9ddzsx@z
zE*7&BZoKJoCcfKeB4e>e-|w-KOJT$kUZdJTP@xX`@Vx#V%<t57e-)aR_;m7(bBY&%
z!=#Zoo{3?5Lc*OK&SxxL;Wjdok{ZBc6s7@l7vPieO-Fz&aOT14-AYI<Y^&lwn!$_(
z91kTjX@e3XL!8HC3cj>2PHm8R`+3`b?tpOl)I;`IFi@M2IXDBIk9LYW(~0i@bP#D>
zf`@}S?e+*DE)SrKmOwqtBq98ziYch_a8GkO`0L6#l?n?UEXx^ZD9ayFAZ3?oC@pP}
z%X%cmU@;Gsui+XGv#2A^;rKFy$#vz!lyIjxhj6wzP2H`yz5Q&4Gi%&ny9YpqL#qV)
zB&A1PH~Bp5Q>cK#tQxq}@-Y<XXoVE&19`q~z;SDKQt<(Z1U}cGGRN&@79C2uk)l=-
z33!QfG_G=noBR)!SS;2NG0W|uUAN~CJKIvU*ePoVXG8x5nZXj6kiBB9Hq+o}YG)-X
zn0nM*H*jad&&2+tX3=+B;ameF&}|K8vb}mH`N_q&^3j6&x`82?+*9V&bIB3YPSw9e
z9ryCEr+Kp2GC0j!UI~-sG;t}AhpCqd6s(bwV6$wl7b}}fwlho%S@5s4+!Wpzuf0>h
zd6=aUFT8+CVQ1tzbuySKt`|d!k&LRUJfHFbb}EX66;6Ned98gXo^gH_L1Az#H(yN%
zwF!Dd&h%hGG0B+m#<Mw*c0f!GKPyq;BdLw~ikb~g{BWmF3g4LF*er_At!ik`l=c(W
zYnfxK#<@ew4r&<JktLT7o_(hL%*9}Q+&17qbS}v?G5c+c4UB_yNRhC*Nv7EW;n+2q
zFgQpo4Nbt*a0l_DiQ>`(oQ99V*9%a`<jJI+Zt&eDf!h8jZ&F<<*?^`px+md}r^#Nv
zk?@t69_^e*9MiG^Np+q;Q#6iZJHw#Dy^vYaDaknmfNU<S$qUZ4lD*7q&(L1M<{{`^
zO3*~2kKcwSE_vOAuNZYQ2%N;RKDYs?HP$j1KkJXr$%LwOy!M+2W7<BOsp!bbGrr;@
zp_*d05hV5o(1l#RZ2*fFjYW{-G1k^r)~47bCD!=5f?ijhcFv6!sd3pt7(jxUsdTw_
zGwFrYBX@?*-Ky}V59^qrE;S(=3KwJ}N-kOv*ejtq)@NYBJp&v=0QIpf+PLDeUjp9I
zj{B2+w1I)>t}jd*RFW<n&EogDNaO`N4kBqc2V;lIrEGrM?POhj*(o1`iBnXKiW`BJ
zZR1SfNU|r)<BjP}`TTBGx*O;v>70Mzg;OuQFvT&G4|O*CLxqM2Q|wOjC$}aSP>MfU
zuxv-(8-}K12uYjGnO~;;XG}83+-Yo8M!v@Sd%t^_|FK@>hpa!hDahYd9&c<ESkXmM
zj4&JOKo#NWhHf)QRnkpgINY*=s2F52x2uQ@@`GfpJ}7Q&A*pE}a&MorXnaN{Egzha
zW!Y>86!>XdAfN&0M}8Zn?IN%f0o-W_LO&P+W_+(lxv9O&+LhaJFcAn)Dm4n$NsKMh
zpEYD*zJ=OQ!(1)9TaR!nY-<;29dz{Ph#kETiPNdfQfAzEVk3&qSbl5ku<TwM_3^4I
z5m$#)=2NFhlmWxYkfiv6SQ}iHaZhWiK!Xi!TXkzym)=-?c=8M>Le<#0A?$;$v3s@>
zAD9!@U(OW3DIr#-?vO*1V{ha%dD+g!CaE|mzV}f#eR0pO%iwNl%ci$W^xS?)Z3Y5o
z;xw_NG&r$gXEUe}&MD%SYNPJjlcp<b{Lw`g<)zmLFjaL}&^bFXDC@xbSOy5O@yZxt
zkIh*qJRHM{PG>&mxIV;z3y9W^aV^?RNhfIuHzvENgk=W1L;|-G+{U4hxTApkF)Tnq
ze><4`P7DBI7dBz+c4L=MlQFh9SQ*J4tI;#f6|c>PW@m(=#66f9XutfP<Cv5CCI@OJ
zT&(BFDlC~=Ac$gg$vS|n$J?DO+hgI1w&QDRZ`zIIvVvF)Q{69tfT-~~435SygE^U-
zEJ}=yY8l*KvK0(!cJhj%?6fKsMx5Y;?Sv1v0@62wk2Z3Fi%Ohaq8%R#LbVd4+2!ZJ
z!g(`pe1jEF$W$7s*n+vIOL`eH9q;ZeNm@Ji@2bQn^>ulJ5HnFN!~e)I(8j**DR6<E
z3hW=U8@yc8khL^*<VDvxF-tbRr4)KqnzAnFJUzoEx6MpGSHr7KaPU}RTIm=*HP*aB
z4mgy+6m*0c*MtOJn@x;c*VzS_+;#U}iG3TjbI|1(q)&1;6hRwO5nc}NJcy0Doz50l
z2_jtQi2Zcuo1`LNjXSXfvUe(2WNZ^No|TowN8PE8)yr+WB=xCF_xqQa@dhfMlKBX3
z3t0Ug8X=N-ICga5Yf54`Y1Bdx9{OfRNTXKWjuef3e4<3kJ5p4XF(4yuf&1`U_IZlZ
z*otIA#XK?_MRK{2QquYs9D=c5aT_<<p(HEQB+viM^tJ9RMPqI+kHdGSj2nAg(KK{M
zbs4g|nSn^SHhXoUv_8953Q8tZ7$!n<!$N5_)OCXz<U}oTa);+QwvmWxU0{6ak1=1(
zJx93t6ybD!mJM@p&~liq_sjKCK|X=4^Saeh7wr~)?_bU|<~1>qaU?wES?Iaiwi5;X
zWU)SotH6*=l(TcU5G!@E%0L>Nlp$oAeQtkyu(q<Y9?Z`cSLvG&BWqn5zn8zDl)4ey
z$!#?#X^0MpPl0U4Tup3-dEITxaVRVVkNtk=>e%FY{cCHKSd!zlHIoQ2yQRn|TpgUu
zIMup)Ua_Qe>KONdL$3_iIL!$O%Xp37Txch{0hB}~1!;~<(4}~nyf-X4QPeI>8m+ts
zga;WR#@T9}TbZBV9JS$*(k<Jj!Lb*@kq|SExwxx~D_4q(L9-Uz+WxIOFY)`Y`27uj
zzt;Y(J5P3g>&_~_kMR3Ret(Cb#2m(t!=4<4l%_Xkr-lPpIdDV|Ky>3UFl0CTw_Iyf
zw$$JIhvl(Sb>=k&!eW=BMzP`{+UPY!um@G;-RE(9in;CpMv$3`uK?z5UupD#j;+@}
zeU4?{lE!=f*@0j1yyqX1M8S7NZQDRsuU%$FMPuhL9MI23v}9Y07ySV=S>FI;kGkCm
zJ04%S5NH)BIrSE`h0EL`uPF=!XV<ynOP57FAS^N>*K64Cdfcp<xy?Bas5SP`H@Jk?
zPQ(i~*#g}gKN9GTbmP%>W3x9dPqkn$2*VEQW~&$YV>ve@NJUhN!QK%!C(N7Ek2)bE
z4QG1jROUdwvogHdwe5S&iqGa+q9Xy)`=jGpH?V$;YqNIJjykHS3yBTL<Jwd)PF*I7
zBnb_qzUH_%3xmVj@oGVQN%dg<>SlwU*Q`_qGPa^R)6Q(S+wN+QM)o$uJyv%8$;J-~
zh231l<=gMEx_+fpTrMxI%*>XrFJD_;d1;wxW9vmOB@`0_49y*y&>W5i5?+r{@QT0k
zBN5QJyN9dEO^skRWedq@*y$%u!@w?ywaRG{@ex`Q5sY^r<l9-|oK&L&2a|AdOOCo4
zJSjTxHWMqctO4<-I3=37t4lEykQ^3!`uJSpusZ|}cR;MvUprMc@MlxCsc4gXt93jr
z_BK1qkmJvp(K2uN>$k<j8?87{?rYds0{q8WNaxrqx*Uh;1V14zicrw+#BcVbkMW6G
zw#}6kPX$i~W9MzfH`eu~L0O;1CKZ*=`eb|e*rcuhx<pUMH#=*CGa4qFPc9f&M=@dg
z%gC={ysZh=?)Lv@sfIf;&C1kKGNyl}t+?=j>AvIoUB445o=eH0F6p3V=Tj^^+A~6N
z>&nciwYhqCbvor2Zoe=p3qYjTDLn?6fQ_wq2pK$ppCbiB<&ZC?moAbpD@>;gq2;-4
z#{|k#w7D7NtSAswwH?&1VVq7kKKY)_OLLK4X1c(rux#l@a@9rJmTZz98mkjf>>4v0
z;IT<}vYaN(*oDCEsdm|DSX_|r?b!Jv@l;1cKp})hoh$zy;C;}TV`Jb}gQEq<)7B5O
zT-11ufC4KL1~^ufnJkk*a&{y%O5Ve${OG3}{RXEo>3|#yl@?^@2Q-^QGB``A9Y;`c
zkn-G2I$+F!Y?i4Rj@>BACk~RE;PJrGW`1*~cWvcl#Y(e}Aa182W|?C;Bn}hZ$WwtV
zhDMs5>5E<`sBX%fB{vJR!}$xrX>rigF&Rt>j_HUH3K_;)%Pv?~osMpbiBiFD-#il-
z&0|g^ou*Z2oNZRH6KbX@gRRLHozjGHS!D2nkDJrt#z|HOI)=!WT$4&TM+|%>*rq7%
zkRHZHMFW?K4VCSd6(;|?#s((`Bzs9~L#pa<44QBObB-4UK%)#qj@OBdr}D)E*gnW%
zusXli1S@>5xaiyt3{Sy!K6V-M=a`Do$efviM8FI*tPZo2LkbY+Cq>i8o_??Sb}<q8
z9x%x^9B>*BhJkfx2UeU$R5I~$=uAw7q=c~J6V0Tk^|muB&gR3Il*h*3yOnqYlaS7z
zNU8l`_Bn~Kg9KqnGfcEJYmF@6b`Jm1Rxt=!cDdcaZz(x`*f@~q7{^(q50Q1u3z(rq
z!N7+zx0s*~`OgQ6aYXriCV`0tb{i!k%ZAU0fyH@*i@G^s<QSu#%oK<SpfyHU3{{Rz
zny4L%ABJm6G2Uyl=-*`{n*8sh9#h)!Ls3Fm1ej%#yzUXp!Q8^0J1!=H=!G*u0vd}7
ze2PKfxOKCW{@ti>edK-oA-3H|?trDRM(chWpd-vDs-4cgHsWaho!?xxqCUvKPw|Vt
ze+Td1l==RhyerMqoPrxZ%Wi3onsv_)J;Tiu=fmK6K%?o=#B;P;;ZT&LRgpUgHrws{
z>Av#cPxq7jzvth$e*ZSw^sTO4R_^<L@B5#-_+k+JwhyrP{|~-<=S|+<`9tsCS-lnn
zU(=KCFaP0p@4U_P{ExqT=T~^X$@8l`Kgsh^4)}S8XMyLVf8^aeFY<hm=gT}_=h@-e
z;rXLHKhE<LJU_wnS9pGs=a+fD&GQ2s67=XFefLht^L3uj@?8Bd-@WrX&tKwsp!h%b
z?wybGJkRqTp6~E{bei`3@ptc>=lKbqZ}MFI6Yt)+&9lQZm?50!@8S94pQL?y{wdnQ
z^G%*_^L&TrI|}~^($CVaKTUZ&zsz%<=j(sw-8<U~=lP>N-{$!Vo{#=n^7CBf`3}zx
z&#&_QGS6q{Xy+$@AI}$gZu4B_`6kcTdH!*pZ}R*k&yVx`1)iVa`3}!_{v7SSN_js?
z|9QU2^ML0s@%%W?^MC%`J3sy1)c;@e)VT2c6mSfGif`cZWuEi&|Lwm(`8+@QN$OYs
z{vze7{J#VoRPJ9UpPqk}@!<K+r|ADY<^46rfoJ&FNyqccJYVMd$)BP9dj2f^=lRax
zpxr$GKhDkvzO{R9{AYLFmeF0u)T1blDPqbK)P$8~TUlmz#MG44b~fE)be3(`)Yfrq
zMYnZZ&`sDK*>nW89rUrH4?&S}anM;&1VvEfY120Mw)cL2?@#~S+a}-U%&)K4_U?Iq
zKA-Qu^F1d?+nd<mqV7!czr(y?_PgXm^LvaNCVxOav~%<aBM;6gPvU!7FPMdm%qRU2
z>jm9kQU@bbtS>ZvLmtfjmh-~CbbiNtguiDzFp;NynEV6n!s4IUPrP$QWQKhSi!ch!
zKeL`N_7~10%)$&z|CKsu6xff@gn_d;{yX!5CXB%ljLY#q=vU&i)I;}QoHv+<K`yi8
zzv&OAp)JQn&Np<S3q6>F_8j{Z7NPN4;w9=~WPx>u))M`}EOel^kN#kCKkEXG1Jnx-
zQXeD#h<O!+$sO}50`0na6^F)A^U8tV(euiMv18^{4qC^~t1?VO^L6AunmU+=ozOjQ
zUJby|&UuxFNjN6{G4pB)=9{TIhx71E+L3j57X5Q<Kbv-hFPc}qFmw|A!Zh@t7oJyv
z*E7%4=2b}eO2!G3XV4D}y_)u+2g}eoYhE>WunsT`Js5+L*V4ZDbEy;m7Ul(=i|17l
znwL@c2Kt2&Xzr#TIfhASU(0-;b^W}`L*quqbuRu6#s!NXXC37DF4hma>3Nlb#xV1M
zk<T(d@t>bp)*ET(zIhde$uG^T1T?=quTsLV&Z{g;!vYLFz`AvE{B`C7&2P>t8#>=+
zy`k|P)(ct>vJTLN#+&HxhpaC&e#E$7;wOv?dOu@a(4J&mF#9m$g4S;s*PChgcZ>^W
zXBZbW{=v9lzR0+sx5&7l`>2vK&!euPq{7gJoiOqAl5(JVLP=%BLk~utQBp;ihmBqI
z(_B&!XgsT=5_0_Pl1j_*bI6zD6HCf?3&$-b6@t$5O3Iew=hHqczJm6}pF%#2oL*98
ziJwtY*7@{%c1cBH=rtu3hgq0}&N(GD1|#Q|R34_^$UH7!yl-MWun29Ke{)F<!0dVS
z1Fg4|)D%pfUs3^^d0tRbL1^106@&RiN%e}~%{-v_?vlzu2NvY`TKc(=cHYZ+!6fX2
zu>s}-y<6!=e2V!%^Ha<RI<WB~#s$OBPE!Xx*el1Mr9X*(k^Z1F$~r^y8`Qm(dBPA(
zz$i>Z8+x!8#=b?p9Dj#;82LW+F#90&-PC!k3k*HPIzsz*tRuAk#5%&{Usy+p|DAQY
zn05ar{Xwh5IAMA}^Mi>a7E}g?0t?E6`JD@@46Vm6DC-h<;)068&<hq+0(vi5P$?J*
zFQ_r`XDz5a%)e$q1>VNEU%Q}!FbktFaxQhy?V=9amn^6(G%j0E1?XM1pv*Y_ngtbx
z#`_nPEyw)}Y5-<Gw4hvQ-n^iu#D8o-8JCiGC;dR@lM5;ai+3-mUYNLdL8W2#3kxa-
zBlj<;B250zf@*v_^Zx0Aion<}7gQYPf3={J(Ecs+fkl`XKf`?9L3@8*P$6jjZ9#Ry
z;@pCApkXYk3``!ms5}@sZc&wC>`9Bt>Y?6RR8eR*FRBC#J$F&1p!<SFm4%TPFRB8}
zh8LB28OLplDhv}a21BPWs$S^5YEh-3)v>5@Faq;10gZQ3f8nAE!aR&X^P)v%L-*oE
zH2`CmEGic!dluCc^e(6Fa`LaD4w~<$Kj`$+pBxX+AI#oLf5Hzhs-kdcQ8gyu$LI%o
zpIB6JnE2$PN<ufYsK#I(deHnF^Mes+TtOUKFgwP&z}VMV7g+oj>jD$sWnG~8L+W7i
z$E?ehtivSh0^Nt17mWRmc|q?F%nK&|%)BICpr5PApJiS!J;%JDwU2p0cgK=SL-W`r
zm4nI0EU6;2AGf3$chlZ~EvX2MJZVYAq5G62m6YSBFR3v(ZeCJ(Xa$#4;9b-|cS(hy
z_ktzW3G*-kV=rD(DLD==sjM8gEvW*`p0cFOtL3~dsW42xYDwA9eD#tVfX>-V%7w+)
zlA3~c$C5JMP5W<LQb8CwZ%M_Vd;XH@g|Um64=i5Hd|<kV`M^+u`SenMHS>YVKIQ}6
z>zNO<KFEB;-^hHRaSQW#5BW*v1G67xKG6Eul5(K)i6xbR`MZ{s2Sax+sWMCtFDdI9
z#-CYIQ5gFI^M~nC<`0Yi%lu*F2h1Oulg$5G`hA%BLvxCLVD@+P0~3FyA87oIe#Fnw
z4|G=Or;qU+SyoYK9#>WgnEkJ^O2N=`$|?)Jma;0qSg5Sb>lp7zWfg|@%gf4!*+^Lp
zh<{aCxpI7FSxrGRR#wJ)$vd~Kg3#+Ks~C*DwXAxfdudsvp_M4B986waRz+xBS5}Sh
zW862CRRrd5EUUQiwz5hJZ!fDc7}`@-dFb3zRsn}~$&^(H+V_`LC-nZOtQ;5_FRKi6
zzf)Eov>q(0vhb&6Wxbz%A7cK(-!OmSADBNh3d|oS{>A*|_=tVVyq@_Tw@-y(<gxpd
z4P&quIxq=cn1Mx@gVyo;R364)875)l4IDplpNc@^Df?6$=AS`5OrN+<jX~!H)WKMI
zp9=KT&MEs;2qs_*deDKP$UfzYhaQZax=$75_%!<Y0P$DR4|HHBG+s>|%*PlfOmt8O
zU08rcXnc^mH&71~FeE&edYFU>=)wW$!8A19xKCwa4CbK&%g}}9hv*LmVeCA{4;>g6
ze?Ieuk&Bo&OkYgB9AC=32N(~G!1UW$XXwEsjP#HXW6%@6lm4I$%^NvRaGszIV=w{Z
z(18w2!W2wH7rHP9voH@mScZ9M-o!Xy5E@r79%#Z&Xu$*w!2uY7X&8fJ(1spNzyfrj
zaWnnHMwo^n=)x$>LK}Lp7v^CS7GVY&SF+B~gj3LhMHqsCLHdUljKMIpVGJf<96Hc}
zNtl9Z=t39fU>4?~2g@)I&0FXn2BC2k^M@wvgceM|5FCIJn1(Sp25soU1S~)Y8n@Cv
zY=mhTf-a20EVQ8qdtn|XVG(Aav77lr6HY-37GVemZlix_!59of8^(lJ(;iH~q#XA$
zJ~@USwECDY3_&wVJ1_`iFamAZ2@@~@9XJ4!Fb&gi47$*RSy+G`G(OBcU?VKT5Hzl1
zzR-j=v|uj`!K56&kM`skdN2YDFb0i}aQuG82V*b-ZP*DDFaaGn0Fy8c({K#B(39gE
zn6Df|^P`Lx2B8NdFb_Ln5hmpL1MC+$hAuQd$bNw)%tH&7VF;Q-91pPmFa%>T0^=|S
z9caT8Oh6YpFb9(`57V#=U1;9U_+SuvFaq<i6Bc0t8aFauXu>qK;1~=+4@O`C#-MQr
z{li9>fFbC>C`>{dreQC1VG?Fx26`|H^Kc3lVG$ZPvHw0s|ImUK48stN!3d1Q7<7b#
zv<GdN72eADU;-K`#seE+5{6(JMxhIBXn%zH!URl12adrc^k5nmpbL$UGk(|zJs5&{
z7==Y>L*t{&7n(2$Etr8Jn1vBI1!J%XZ5a3j{X+{nFbtD02GcMOUFg6pOhFI2Fb{LE
z2=mYw;(S9Bns?Ga48jDALI>I~4SS&rlhA`1;T_By=3xOAp|OW{KgPU;A7|e{6Lt#k
zWL(gKDdAnr6NcavjKCs{!N6V27g{g@!@^H79_YYcn1o4~h8f}Aj0d`~ApA7*`XuWB
zgV2K!n1`LP2oum4=DZ3&!#Y6|=AZ@hFa*oO4CDC}<Ah;Y{37Fl#(j(jx?f>mz$_et
z=?AD656dw54eHZ)7>3TbsE0||EBrR~5{F~L?@|w4SQdVd`nzc#hM@;zFc0J6A7nkm
z!whtP%zB821>sLwk9&y2Ak4!EEW%F65A;+5ns5MGFbzX+3`U>_W3T{iXndOU0vn+N
zLof-WFb!?!!d{q#N$9~0%)=}!!YOF{jPnmo7#OC1Xu%K+!w8H)8^-1MA=-m6m=QkA
z_#_Ss!e24pdl?@L!q{)Ahc=AE$RDT|4>Q6)Q7>^=koaGz{|p`mg@2<S+At0i(18w2
z!6bBH8s?x2^Dqm`(1Ye@nKul=B8))e@2o#GVFFrk0ES>1M&KBXK@Zxn029#2&_8U1
zNf?4@7=<phVHWmEe3tek4#y-8J&D7D9RG{)eU4)ogt2+%3vC#O3FtrvreG4fFb#9i
zg?X5TW#Izn@AJ$ThJ{Ov2Nq$ka3AA=#(u^FP3S=j7KAH|=L@U@48jPEz!>Zl9%MYw
zhAE-3Uu9tePC*A2VG;&L7$>w~7KWh*W3UM0&^Thha-a!Q(1I=u!5oahJdDAz93M%0
zU*s5up$%g&0prkt4ot$793RE_<QPuDG%P|F2JT~A(1IQe!#s>Z<7nCw2KK8YG+{>g
zsQt==7A(LJH120yuu)jQUqxUHc0wB_paTblJDDF$!W>M)Jal0hW})da9vFmq7=gxP
zXb+k&0Yh*AMqnDo;25-_2NSRW9cX-s{$L|a!w_^~6lS3fJ=hELFbRt=1KA)d3r#o$
zEm(vh82B>%Lkq@W7}_uf6EF@P=)fdQ!8CNC3v)0F^U#B3n1|+9=pP25@mS^$P1p%7
zn1CTT03$FBV{i=G(1QtBfDSZ9=^r-2Gz>u(Mqw7((1X1&50kJ6GtfAm`9l*<$#Dbi
z{SU{m5n3<=BQOeM(1r=v3muq*X_$d7%)%_3f*vfwJPdr5exU`8$FrW$gfVErI1E7t
zMqmoYpbKr7ljA4Qo*cu#1B?S&(1Br?gfW<gap*z^W?@Q>pUC*+7*0VC7GWL+#uy*8
zpz$QygC>kY3&vpxIxqrLFa}*{!<-y9(ViT`z}FZbv|tj3VH(Du3*#^g9hiqHScEP#
zo<e)jgn4MevhZoN_jSer!@{R?K41j)!eTS~1scy}zsT{ksF&ksQ!mFs>c7G9bEt>K
z6RDTu=TZ+%n1mM0z!1#B2%Lg3ScEnVjKk-#KVSkzh5ya|g$^8$_=}j2c$gCp^Dqg^
zFb&QBWj$aJW?=;8VW;rLtPd>06f{nvJ!rz5#9zvIBn|`L<b1xI@xVNc!u08k2fA<o
zW?>q7a17?52aB)(jdu3`w`d<WLJNjq2u7iM2JOKt9Dv?gj0fgn4i;e^8oQVuG@+TL
z9T<cm7=aPk31ct;Z8#u&4eh}M%s~g{VG@>M8k*l`955)1v7cZT#)ao_{-6gl!VcyK
z^ROU1m-_E8P8fvVo2iF+7>C*OsTU73(7lLy@vtE5rvAIcVNiGp^)L_P!Z`J?2s6UB
zQxEwg6jcyjM*Rfq0)xWKsfQMf3$LIahG0f`74<L%3&M9%|2@_T24Mn5paVN$5+-08
z4nP;CVHS=-4|*^U3$O@{?=!BeIp5HPA!xxU3_%-4U@wfpB(z}$CSVpia0(`25vF0_
z2ORg(9!$e1bfFEiuorqT3G*-mi!cj~_s||RVG&v|kYha1f)N;&<7;V8j$tp1!6dX{
zMvkvzzH$r;FaeDR882*vNf?4@7=<phVHWm64<=zAW?&I!q48eE2TfRn77YB5@jwek
zU>L?=4B9Xb6VQPUOhL<`J@GIv9+qJenm=M(FbMN73a$6k4h%sDx-bnB*V7J6!#wn$
zv6pe)z<8hyBhVY59!73vUgB?KU%=w+)Is+y=J8|NyPI`^*1ha27;@<kCcnWtz{q!5
zS7DCv!t9Ue=O?VkPZ_81VfG<R|C)J1`!|day1(Z<Kx2ma{ggTwhUVW`cbJ3@Ov4nk
z{?7QIF;9Omd(N^d!#p%S{Ogxh5Lz$-W6*{UG=Ijt{=KXmj?MXHH3n@s1+%aWjRoq-
zi!4$PW3Ut2FaZ;AK;lc(OB{OQVF4QZ=;!B*d;hWuLt};hVg3Mh&{fOIg`p!>)RY|W
zSW(6=Xs2#P1)&3@Fb!?!!d@6TdPSw>_?Q)ygVwPtst7|cFi9L*Fb2cWhB26a^okmQ
zUOn|Nf82`73e6Q&fX3ril=%?r(m)-w9#0)~p0J`4&}>{$NtlK%3_Wp0dC-GJn0ONP
zza;O;D=Gv-FbW-LL%V53IWY1R`iE(Y@k8T;6_tk;EW;2qA0`h5p$#K2_6)`Wvv2?!
z&9nm}a15rQ2kmDv9_T)c`YGBCQV)~RmgDCz4`@D@{$V7<y8nvf7c*~Yzhp(l<@lwH
z3mPY{sK#I8U(R~K;wkhGW2doxFwxGqVd$02`!~d49GYh^J~@6h=NGzXF|TR*+r@Zb
z{<WM>n2eDJ(>CJ~4-3$|kn{Ol`hga-VHhT03_36llhA=_n1U{JVHW0~2lLRri1vQR
zcwh*oyXlwt6)$EU(7S~8@QJrEKj^@;Fitx#3k%S=ly-j4JYWzyFbcCU4vn`{FL9WW
zILt{L79{=->hr{55IQglvoH>g9_nEPW}pLeFbfONxQzNg5QjnNz$nbZI5gf#y~JTg
z;xH$1SdjST)c=t<3_=G+VHU=rk)U4UFe7o8lQ=9${0i#-L>va81Ea7A<Iuj6dg#Fn
zj9f)MbYTHnyQ!byyulze-bFo3z&Omq0T{WOdg#I&wBAiUbYS4m^aq2`?xh}jFb*T{
zp&n*o28OPo9=fm~$JbK-7tTKnLaUE@n1pdygagpNj(X_99E`k|dg#KyUm5rNsE08a
zg=rXvMK~bdp<X=9K^Nv>{zk5+0^_@h{R}-AgLxQ-Md-lv&Fp8G9pw6k9`s=H7W)4i
zc`yjmw{bl|7k0udOh7Nmbq>uBGY%MoS(x|;<AC-@xo)8~#C7?1+JPbH-obdF2jejE
zG3Fz2n1NP``M?m&Lk}ANVEi9vUeNvo?LZesp?N3w9r3UihVJ6}gbC<D2Ns|QjamBp
zB-cB1VFac>MIFq;BsA})9!6jeChlPz!eQ$F$$7b#dRT-}=zW%X$T4)Fo8h{Kc{nD=
zpX2@n&Che+gdrIC7aj&-@(YX~#zt5N@h}a|FH#Rf(1Q_JfH7$NoB6^<n1CVZz$kQK
z9D2}^<NIh2+Aik@7Qe)NVDihfQ)K?I5oTcsnqQ$En1?pBzsf#=p|4Rd{5t0qdf%Y#
z|H#jBK4Ie9j2{-i%lbiYf_V$SN4|Jy&XNCp<_(=6&<{*PTYQf50IdfZuXs2HLqBBx
zFairO290^<1w+vJ5&gsBUiycjpHL5DKjr*E7ZzY18YSlAasHqK!_fU1{mb#sSube(
zf_iAfG3dd(_(|pgW6)e+zAy;04^c1tCF=yOhpB^UI0fw~&i5kiLklKh7}~#L+|c|r
z=L=fUfgzZJ4s>A>=AaAn(E1JSEYbgO*{{(3J;!D0{>1eOZJ2;rn1MxTkZ1gjICTC_
zd(fN3OZ=a-14IAj{KCi_;}({P@1t&!@j`ov@xpYO@rvKic%cbXFbT(?d-MTSgx)a+
zl(nDn1`enQOhOyxp#$w>52y@uU>3S?3L1~19_H&0sNgbn#~o0e(As%G^};+%LDQrT
z+HeXcVG$-Cdq4$N80Ycy4?P%$iN_sKG3dfLj6I%l2pee+rlAYXCmv8an1Ffluq?+<
zKA;*8&<_m3^ivoY%)$gTo_atHz!*$J7mmR^^q}+11Ijqacwi&6TUZxpK96>w3lqW@
zG9H+PF0^07{9y5<1IkqN^AgqtS}+1*(1tebg$bC14$QzL%)&IBf*veFBg{G(M;IY!
z!3Ye)7>q$1#$ggVFbz}Cg)Yp(9Q0rw=3yBYp?Sm+hVfFy2_rB9W3Uq@VK2<SjPXJ9
z<+KAWn1@MNhIwf2ApQ#4fgu=yG1v)hn1Bg503DcyNjL^w=)o*3Ko1&6a(oKs59VPM
z7NHFz5ymARrlET(?T9~(b%Qo6LI(!w=m%Ob4Z|>WI^%+7JMBOhreOY+j04)Qq8_GU
z;3(QXgE|<4F_?gH=s*W1VG6p?g;|(`9?U~0N;^l>-<hlrbYTo8UQPedfl25=7v^CO
zT4%8y(1w9y$b%M4?qYqRbvEk?)34$DKqJQdp$Ur;hk*d&hZfAjs2rcexM3P*pxMbd
zp#{q@1kGa^2MogWd8|8(bunI8gh^<e&v;?NW}Gk!jYrY`g<Ll<2}97lh<?Stm30#j
z9hinGXmxY`Bn~}jUc$H^P5YNJPZ)!pFmf6F$}!A98)jh|PC*wIVID^6sedPRFbfkf
zb~*h)2f8o`bI`hiaUMq<3~(H|nstE2dl)x94dc*-4$Q(7j9kOGVIF#Nd@bt$lhD{n
z-uu`$FzK*<(1Q+)yq|iBUr+zA_yO`C!|?~nht`J}Hw?i9Ouzx?4bYz)-^hHSc@yJ=
zA!wR-7=&pUg&wq_bu<0KBuq;jj!7JP5+7vT;%{O8k7eGs(GIj=1V&&d%)?$-gh^;5
zIS<f;S!ls27=lIULGyU(hZqk`!#K=B2b#CD4ibkhOu!s;VIEp{Fh7Yy^KqPS7=%d}
zfiCQXp^vdn;$a$QVOIEY>Y?!o>KmwsVVH*r7`u~v=)x>?_&oLkH1A^nKAyTy(hsyh
z#X7=dn)3k7dzd%O!zoyVMQD7Q_4_Zz3xhBVqtJu4_+jP^9hido&$9k7nV}wLVc-dP
zXhG+5^beD;6DB^-`a|mr)Wa+sgXs~*0h9MJUue42H?l4;3}Y|`-7hh3IsP(rusBM;
zF!De2`$XmmE$F~741JY4Xgt7qgC=xf<m=SK81$eG3*x^?{gdb?%Xp#rUG@pIzR!7u
zkslL(GUJ6|;ZMke$)7SW=)xrQU<QUf&Wpt16f}Ouc%cOYO|%CsX#AY^pb29z594zD
z3&sl*Fby3z29wZ(X;^?BG@e2`lbm-Ldx-X60>+>N<1h^!=)x4t3T0h?Nxh8oVfHaT
zG)4O`4~x+G75ne0jQiKzXJF!Yv<H*Wf!RFclK3B(Cyc<N#9`yp7|$Q6gP}h$9vFdf
z@z8-bOhNO{^a~TP0NuaPuf_cS%6woR#$XY~p;@3FS}+Ae(1j70gE5$gHY`IAHa?yF
zzcC(|g)wOUo%ZAy4nPm4VIGdbBJ`l~55@&eXq>?KU?a3<X%B|}MSHLa6VUuO?Lni+
zxS$88p!I)@2S%Xz4D#lfAI#2E2SX+5U}BN|1Fa?Up$!|GIge%ffyrg^psmP*mT^!O
zVF(7FNgj+sW5+=ikYnh;B22@?kq1>yj$u)b>kg{MXECm$cn>b<zy!39KBzL#gHsYe
zhW4J#`oIV@0|%82?PCwB6!hR2Og@Ty7&`8tGK19ZJgCCZgEovkhVj56bYbYR2UP*O
z(0C5*9nU;q5=Nl$xPvMV(=ZKPI0ilF!6Ga`tAY9xsfUd)2}3Z}NIi6*16`PcS?Iz@
z6ZJ3wjpyQFBQ&1ExS<Vgn1H=757W?mD&v3_^kB$heOee748lB&z|hkVs!s8+SNIIp
z6&7I@Mw(eKXgr(sdLHA1L0E(lVUT)gw;WVSiNg%^U>4>>oJW{_5p~a}y^|O(OuU5o
z!6eLye<}0(Z}MRXnkRF<Vd&)t)d0-GwD?nKR~R{{@)Cz-=s+_>Jq$wYRQ4G(&R|_(
z5stw~l=(pOO!|8P>kWg@dNuQxahy#Z#}2f`zlM6~!8A;~mi{4su%f1*35zfU121Gg
zuVY=HaSrt`{d&d^J(z|?I0nrQ&I?S!0<_Pi{zdrnsfWc27!QoutSj^`q+b}ih<>31
z%g}f$^KQk%AWXmrOu|m+!URlr)4#+op<d#!2=mZ<G3~sQdBYftLl+Lf>=pD2tt(k?
z=s@En>YxQ9SJ59#LmL*M1C4iceqjuH5{E_TT|?bVXzyC;U;^4O`+nvL&FiUyHuQx3
z^cQA4(1P{{se|SKb#e?H7`cf$n1mk8!y<HUVO?HIzc31|TbUmWCArR^1IHu|r(k4=
z^YAj-fkBvuQ91q?>nO)C2}3FR6W&Q3bYK~Ju<>Np;gi(C$fxKJnrX%flQ1RmyQzoS
zd)WUlG|YHm`d;eW$p0+!gieNfSo|FQLi-Dh7kVR{4;Z<R@xGjP?&rFJkuP!HVG{O2
zZ;a~^hQ3BW(18Vsf0KS*!F*sC8sFl2gHD!lLi^kF13fq;{0{r%6x#nTc`)$<_6JPE
zH1yyYEW!jda-0VkgGK1TK!o!ME$G28j66txFa}dlz8Y5l&!2EzbB=LDuX)6i>yHT}
zk2uB@-^73SzHUyPxSk`>=kqHZVv?Uan}5@MB;V{bU$wLG)sK1fy917K%9Bn$@kPxi
zU{~wgq20~D6&KOI(-KI#UHlh3XHK<=ICZCa!;ZB!cXJrS$Hbqu(>!%&<EcBX8;-n{
zxYP~tpF{k)5`WE3Qy_kX|5Es8i+J^6Kf$4kUmcLd_>%l-{>$NmBG$jem-sJ_KYl|(
z@%1cW0UxOFJOr=iKVJO$JS3kjw3`2@P5H8B9r(ms`8VKD=6BlK{OWE!^0pmPw;P|O
zpVf)2*NPv+kKyarok09BUgmeE2>KU@AIHmhH+wm6lX&a(bBbSStmf1IX*;b`cLq=0
z8M@)9TkCE+@`J16Sdn`2u3OKO^CRsxz*xteda3yJFYzt-Ui{{DYsI(Y6P*8_NG{`B
z>+iIk!P9nzPTd(kb!X&;V+N1DWxZ!fqAmXGqb^E&!y?w!iE&aV<2-uss9RQ7OoD6s
zW`z74`FDuWtm|HB?+tV6DH31*lJU*p(~NKLF&tEnuYIR;MAZOgd`;~8M%ueq+S**V
zKEAb;l%QcqKfA~e^vo%Km9jOz#yZHl_ETrm&y%EX{Y(5Fd>sFr4GHD^j^Z8shs7US
zw~g~VxVmbRH%(rdyprS{8vjP#+URB6b#?sAmT}+2emI<0`~1m1Z=)`C?wooF-^(~Q
zU&j*fB0fgk-X`8hyiEMfnz*e09^&CQ&Z)Onj*Im_V`tu!J+g5QWWDy1-`hE-PL}qV
z@5cRR2A{^q#cRe{cN9<QiSN?HrQK%Y)|<B87gpzwZ^Y~FW2^Hg9wpwkO}l-><HT3*
zE0V*P)D7Vi`1gva*yj=-C2n!P?vQn<n2(HilDNCkFYAC>y{>2RQ~1jJTiwz8Y!`o=
zB(8r+J5Bf;{?Qu}if_ed@lUSsyVmn7+v#4<m&<SCtCes1@ME;YQ=3P;@0v*+mG<5|
zr#>W&ZL_asou<i4)mWz$;(_z#6x-v_m&~t028ush?D{^Eep~Qi{F~Ot`@tQjA9wU?
zHg=&TubsaR^0JrBDYbrnHsAlHo!!LKm-AjMhvS>>&uiDOjDHV#Q;Ea(_l@i8gWS&~
zK2AJv#hl_-d)j?4E%n5|C-L=tApMu{$t!u!7x8QROWS`F8zp$voZ?qt8+r0Ledzwu
zMm(^4PVrK9Q@owq{oxz8oab)x%y-SHYu4B8kvu*~-5%;Xubxv6FyF)H+5f&YNnO5o
zPTjsfu8nn*=NGbWCF1rqbE<vIxDU^(;aJw^+BtQt^t;Xa$UbQ!uW;R*`ZnKh>=W@F
z`1E_{)WhpuAnV?PkH2qD{j^5AGQJ`5%<JdWXQi;>`j>SXmG+50#5^?X(!SHJems-@
zXQ_+cJg1I1JnyRWF8AFAp3sB_=TyJs9lE{_KQ3?FbBV6r58BChxQ>4%Vtt><IJ@w@
zx6G+;>Ca#L&REsM?&{;{kUXJE&Z$v-o%Z>JtlM7d20k*U#`#`%U!0ZrtF}Hrkn>f?
z4bJ@NoZ?r_hn~MmdrkN<z9@d}KHJVSlhx;`vQBNpb3=2gp>jQWaN2RhQEL~n)bb_k
z(L?^&?Q^Q@Kb+Svb@HBCXG%kB<2dwuS;jd*+)T}>`z5}1zSghPwdZ*=#N|D>p0FO5
z$3@u>EBFkamoyvuY;8ZZ@3c3DFYPzib3gm|oVrNrEBcpq+KHQ=U_Y+M*Y0<+REO^a
z(oPR~Ir84Pp2u}5H;Y5h`v-{!?%ewL)I#ns-oXDsvmd2_O-}<>_v0k_S>7{DE{}~b
zInT5BG5ixZBwRiJJYY%w#{S({pC<gkUGyvY=WaZ&-FW`Ac6#N!wv!j*y~pm=pI7ZZ
zmwx-G%kkb~GRKWC`F;rRJk0B~E$8L%^;h}4c!GSFcD6nL#3Q_S84v%5zGU1h_%NP7
z>Dbseoc}}jSJ?$s&)a3(tvh+XMBRU_*9pY$!k6)@>&rgec>dLmH+YY<53J8;^Zi)r
z`iUn#J*Tecdt=ANt4~;DU+uvc@bWrG_ML8DN&N(Q4)3*gBi|c4dJff3<IDJmeCq2S
z!}{^wY@b-IzwS_d6F!{b`GK6*8vVDEXMKK7eRZ||<A>_I@Hza2KK1?NnO~e!{D}tR
z|J9-TJ@_)df#>nH`kx@L=+3E2WS&Q!d4x0|>o<*$ewo)je)A{4_!Zu(jT&Re4Xgdj
zK5WAGj`F(haQ)i;;Q9CZb$4hhuAYDL8^1cI-o4t+n-8_qjd$=9kFcE~@~yGzc1G|6
z_y-<gJJT|ruUEHIlJVekkFcHQ$1)z?TW|BcWWH_q0sQM^zMJ-Y2j0U!e!cz8`@NOa
z-4g#_o;S%8|F!$|=Iic^oy8qh7oPMtLS63Xb83k1jUDe^U0?ANcxQ4>-6wwiOZu6{
zH$KFB^l_ip_LHveN9r1QfuHBS`ySlxex|uE$o+j6b-jO@Q}5t=*{`ej_t@I~UE1v-
zo+EzZf6%V%<6-KuyhmVy=Pxpk)#u%to;zSyA7A&97vR19mj8qHWn6WS<9z(zoH|`!
zr+vO5b#2rY=I7K$^mW?%zO2)3>SBv?>f?NW__|(uu$1_ajEDGVwuz4tw|Vcum#)V*
z@2>{jByoqh+%`78WF2Smz4*6DV4HQ6yapP{?w?cd;dMf-yf*S0c~8TmeDb<wP`sDn
z=Y8@9$r~`|)vZ2xqvREzFu!)cKXm<UJT9+2J6*m1k=NNcuP)_vUafi5aUG_gI<Iab
z?eO{7`us)OYauW2w0ZS_PkSBG9`C*Am)A#LxMg1b)~CH;@`^8<S8w9+y4F1Qk{683
ztLuHvZ;8C*>GSHHK6y<%z)!u3_bBwqYnLYiyeFjm{HxaZddLf&J+EHjlQ%?OB<4T9
zaq<RUKd&zKX>W!+?_B@9dY&LRzM1!KWEwJ$EzdXPMK@ov4_e7fkXQEUw~M^!`Mh_e
z6xP_M{p7iiAa6v*cfq`Rhflwg<k=U^tBZW{R>;fno}7Mp&HP}$824Y7UF1bC<-I+9
zuHW6{MH2q&vWL8kPo6wanIO-+g7?havd(MImn&Z%$oVUgpC;dT|MNt)z<ZLu-e(=#
z$+LRr)!_Q$fbQ{0`t2ca;F@{$W}o(k$je^m-|skiWyin08S+B?^Xep@`%FDQfR5h8
zd$RiEwUTEi=hd&|1x1a=kuLHY@0eGA^vUZdFMsE}`ZbSFwb~mYFLnRCdV7uM0rL1c
zDeaAMKKTC7^N{t|nXBiYc<gKQYP`O7e@*;=v+%w7ipOc$QHNjGN_*|(+2607C*$fN
zZ;HIt*C`v%k2Z~Kka+Y5^A*3}sP1o+ysS^2To=>ig>&=jYIz>I`TldpPM-N4`q4ST
zwcp3oGspqn)7H^EFJ3L#^g9`8zm5DU-uw2E=J${v+&izXdW7TILw@g%=hbzOa6Eg-
z&+*>7k0hT|V}<`Udh_ZUSpd!XXyO-1y~O{->xEkTvYouhALdp6dOI7B8<qQ{hrA4V
zU-!HIrM*A<=Z%v$RhU=z*0^3}o-^dx|K5I{bx&nK5%(K^3wgn!|F}A&z4_XCvVZ!>
z>s*>w7i>AdJda!3pId%@Cix@e50vNEUgxg89zXPYv~qvSyr;>}?VDG><NNjV%TMNx
zSbaev*X@e5w|`#!Wj(%j-gx<S#Ni8E+G&0o_lsqHJ2I|z;^xXDpT}<UBL^P&Jch}4
z4(j`tc}z%qs`fl2Z<f4(QBqg??B@oH{bH8X4L<v{jlA6BO6sl}k9)Fy-Q-37tE9U5
zUaq5deu=R5Y-;WKR6p?~aligX$SXgwr0Q#oOZuB6FW<C%e<k9fr<By!YCMj}dNn<r
z`SG6F@2hcrNnSg77Vnwu*Io~KV<**aPsTq)p80b9_$59{+^!LyB<>P_8@ZM1E%hbh
z#w+x3nNPzB?04cf_{_VFys`F@y51+Rn>_0*|MmvS8+cvq_T;7HD0xNROZ>TfFY}Yl
zBz`YG&{^$g@Rr7};KLPub^gy_{`mLS*k_X0MqcK;l6sd<UN?Eh1toR4Pu?JT(Tn{1
z9VIV)ajE8gU&c30Ua+U6#_Kq!@jR`L<Sg&0{!E|ip@qEMd;Ht$AkXGK*Dv&$Zy$M?
z_m|X=Pu?(j)(1-J3ZJ~a<n{6%@9*&0k0tW5ya)VlpS-4L^8Da--mBdwubsT?oh6m^
z$?GAn^PZBr$0u)yynLplPVvbbC$D^8NzKXTuW^27$n(BiQjhU@T~_}ruIC?eK5FzU
z_m5WcLO<L7{?S3)Azt9;aka+PM_zQMq&`>Uyh?w=<avMH{(d+?+$xmR+sT#wHhvx^
zFWEWO#tiWU@%J3P7T@mmS^cxw-~TG9i+u80$uk#9>VBW=rHj1O!IHY&r@ek@@2Ca!
zJ)gW0^2Q#$pl<fbn<URVenEZICvQc@*SMhGP-FdN{hEWU|C9amc99o&+JZX6r@h_e
zg-%#dXVup}AA88_d_JEs;M3lOjE~PUxYj3cmb~DL7kpp$H#~>^!e=2|>C;{tdAXAp
z)GK|~xtqM=ss8N^k{5lYe|w`cK0c%2SYF50y1u5#>%4eD-R(2Jx)ZtIykkMh-=WoN
zuZ6tm<qN)#cOB%VuUuGr{mu2r^J<=_^SoYO%C5iclzr7lUVzVv_`A>ehRO5Z<KNz1
z^1^)P#E8#4O5|m)TTtIWw)T9Rc!Ck<vnr19`MGO5d9k4db(>GWJ>-q?Sr-|feuv15
z@Yxqf`Sd$ZUhqEF#b^Cy$V-kc_&%Sh=fzCnfd%y=pY~eGOMa8j=J3hulJ+JR)Q5c5
zrJuZ5Zb4n=lQ%+M=0~;npF9*!l4tOlAWVC8Kg;t&*>AJ>M*Nu1xEr3w^XG>al>Gf=
zt?QwUyda-76032ZWIWyE#s0Fe_V=A!4>BHkT<gPk;$^t&uTKyCU5AW!5Ais0zxF4{
zOU_rfKaEe}w`+fec!s#&^RMRT^LVnfpgtnw+<ZOD-@$IWo_CR#BJV{$>${sg_rQWW
z$!C4{keA~#S&pk)>vy}y?FnhGZc&|ZT<yGB^2Q#!sJeanZTN56dHka9{jiO^c;lk_
zg3o-r$tyl}QO$GSYwf2&@^Vl2&l@E#&1cZ~wKq*(@3R-x4xjnfg?PSx;-c?9Xd!Rv
zd5fy;QQOZ$_In3;#g{B@x&FlW-~)U%4%_a~bt%3dZ&i4?KK9_l__wX+SN_~!ocI9o
zBOkNY-gf(AhP=cXi#7jVRIa!B7cf8a{GK<pk{5f;qVMBi7kQc2`M1|kp4I8!-UxZU
z=lQocNnY_n|MphMi}IO5e)Da9A&-~u@NaJyd5wG)k>7oDH+jh`7uCr=`*II?nRog3
zJ3*fDo<-l+eY506-?yl)^SNFdUc~kFfkkzj&*O9(d4by&)mwbp>n1O9`=YwWCvT9v
z)SZj!dp_$jD(!vB|2{TN-oQQnd3CKkzq;2yuZ6ruKJ)4QKI7{k&l&M=uaCUk{r-8w
z<e6Xb&)Z90=T{eNzHV+{tCYw~j4vuVy&GSv*Z+&TpX2==A6m(C`HZVueb%9iyhc6)
z%dfqD^6c;X=Z%n;dC)&^lDy(x|GX9Q!k&L#^GTe~U-;+kA}{w#|GeGgnZNeW+e2RG
zZ~gNo$V=z_^Jd8_{K-GB;U&C2{p+Iou+RBzBQNuJ|GaMUtbZ-4FZx~o(q4(jDW85v
z$urA5-|=a0n!Lt?{&{s_##hH@jQO<JLSEyemVE!-qJup9IRCsp@-pU<@82H`lUL+3
z-By1O|9bu3f64W`m%QK;{PRjOK0XufO+M?~^iuA}d}f^A&qvzH^DO_o9`XWw=A7Tp
zM~27?J$uRb=cnW3^`7XTH$z_bc}wbrKJ%@A8P9)0OX`(Ak9)1;882H>XZhrHkr(8%
z@%-}o$&2mcGvexLm)95DJ&zk9uiUw$`fAKWo;OZPdtFQF3clC9t|}3CiQn(D-<wY6
z{`gk^yms=kmoBLrYm7_wXAgP7%a+u*&$<msdwkZPU*0%*oz9Z_jn6!0$TL5*r0((Q
zx4w=2&SwVt<+YM$@fm^_9nYj{oUbnO;v;-^pwGJWlb64L$@ls62zlYJEU6Fp%y*K!
z^w-!YK6xwTmA|#5-sm&F=9hCles@V7@6+Bc@`4X8sbBlN&f861{zqIFKJD!xFYt5!
zya^c}pS}1_pY~?S>*cc<{qh=K!Ty`^&ub$u@DKmIZt`OP=btx7UTVodZ<M_J0sp*d
z@`80`-}%;^!t=pLmDTD$kf^brTgZ!={&^kbS$vkI-}&v6_L}_jhGl#w_|Ic6d1FEU
zyb^iFfBWY(MHt_U{qx$%OP=hX*F)aass4FG<XKVwbsi_Le71jkGvq}&{M)NPmFG8a
z_Rni2uXurfzg^^oFY(XoC(r3At3jXp=?Hm+_m<VD&*S8zw0Cn^T~p(}Ca;55$eX&g
zy!y{nIb8h`-*6hwXYiFjr)j|#Dtr_9?RbOFNabN>_3>cy@86`oZsHd4%J%y3;R-M9
z4dG+kv?q_Fqr?-$zvgqln<g)CM_JulV;!Wuy3^UeA1|xl`?S|WUNr6BZwGn#&-l03
zM_%fSW%YfZeuv46jMmPRb=yl`;p=79&iC@XXYKEq*8Y}Beg-^Cyzv`l)gkdiKgT`%
z@5(m+yH3e(qM_&`%$NRmk>5+cZXZazhj@zk5BXmD-}v_-8^5O=l>Ye4T;4KjV|@HP
zT>cpae{U&Cd^PjmD0#8}E!X@zI@x#A<Q1}I#Wr7UZ{zm>CA{_RvhVYardKkbiL!dg
z=lryj7y41T=HK1RczVc7{e;gjmhrr4^`Gyo{@qGHKCAJ2@CA(@#hX9f+K#NtUVIpT
zK-xca{#W1L;?N(g$~a0=@0HbKC2##peEqA~A9%S9ZG4Gu##?y#e&b8dXB$3<-}pVt
z=IdVKUBtu0V^XxX-?sd_)IQ>I;+6M>A-tpUBluK>mw8X%U3{&-uaU9Ol2`u4*8Zfv
z?hMvrvaHTKa&4U3z5Zw+FZwW_@$8e=L7ww#|GYl(#(rB?chqRFl_3q27vi&_@8$a~
z*KLjW6_L7W>N5Xfovc$sl=J<^>UC<thw*P)zdl#jsp9<)q`reZ=TBQ7S7e=g@V)qj
z&Gi-kj!Wu?$cz2Cx_$&7#sAf(ev&-%uUqS--C2A<<Lk~OsPRqs@?WakX~h>ceiy!=
z@!j~m#`oc;G=2#0Y5WL2r|}c`tj15{$27i#cQwBL)$A9IZ^oxJz73z!_zrwh<9qM}
z8sCq1G=2}hSK~+V360;2k8Au4-q!dPe5b}YoW=gr_!fLr<J<8Ojqk#THGVffr169J
zpvDj5EsY<?H){MO-qiS6d_d#tcCr68z6oEhxDMqyYQ+~deiy!=@!j~m#`oc;G=2#0
zY5WL2r|}c`tj15{$27i#cQwBLZ1%s#H{;V9--b_Vd<Q<M@jdtfjqk@h8ovkMtMQ}w
zgvRg1$2EQiZ)^MtzEk5HUQ>PlmGj<$k81MU@ez&h!iP0}H$J5CgZQAv592M3AICRp
z{3PD2@bdUGix1$Jd>%g<UR!;?miiX_6xYc=eCj*MOBc32Zmd53;Zu0G#^e3!;~#m}
z?AChmBlt#*pTL_MKaCG)d<k!8d_7Op%KxbDw;5m5_%?h&<2&$qjqkxvX?#E4)A&93
zoW_shvl_n_Kc?|Bcvs_B@EMJ7cpc}j!mplxd<yS-{>iidS-p-u_)d-Q$HywX^tT5e
z#ryR)L7w^V>i(wj0gW%=4UMlqhx7lh>UNs(MU8L67c{;DpRe$%`yW4r_nXfUdFf*H
zI7aX(ji10LHGUdDpz$TVQ{iQN^{?lCi}xE}D|wN*>hbNuhc&(%AJX_fd{E<u@K%MF
z@r~dc@iRX6pGora^VQ><#ZPH`T?f~n#y8<}8sCb~YWyzzn8tVGU5)R<XEc5YpVs&h
zd`jad@JWrI#t&$G3GZlp{Tn#{8sCghXnY$!uJIjsTjP81of_Ydk7@iKd{pB{@ez&R
ziw|r33_hgsEBK(sH=N7<)A$yAqsF)6O^xru2Q+>+-q83#e7R!ZH;_7vFKYZazM%1w
z_`Jr?;-@se?v3pK3NP1N6F!Icd%n<4Ub?jPdXxGtd<ySZ-%nm*acjNIdk;RY@uPTK
z<M-k_HGT#ktMD?u6?_z5>)&6<IGQ_IpMBNiYs2R>z5}1t_#XV2#`oi0jo*XMX#6NX
zt?_&DDUF}OCpCTrKcMjqZ{q%-@h$jXjc>;%G`<TT*ZAFdTjK}uof<!kk7@ikKC1DP
z_=v{O;=>wW_h#-78sCHuYJ4l+()eBYMvd>rn;PGT4`}=l-q83Fe7U@JAIi@eC-6l)
zKh~`JeZTDA8REL}uHa?7kCVFfFUfB>PoLj{ugsV0sC_-(?>g@xue^Wj_@%#od{N`~
z;0qOA`WwaP@qYbHlQ*`qwLhsZ;a$96eNz|L$HD6QR=k7vtM4MO)7V$_`=#CZn8pv{
zqZ&Vqk7)cjK3w5tev|kR-fw&><OOzAkFVh^G@|h>`0^23^H<M5zKHkhuZO(ck=6D6
z_$=P9euTW#QCsU}|4iVM8b6I6(D)MG(fIoFxxZC-nO`$Lf%hBVF7l$sRFAJ4AJO<e
ze7M3(e?#~X-mkxL@&d<h?Qiw`;|+Y~D9G;}bU)9Rbz32CU}trG!v(Al-mkumyg);B
zeFxsa|J>(wejj;bPpGaR!n^o~ed@=_i$A%#eiCov{pwf9YkXRDeS^*Y8t=FMZR8cK
z>iQ0R9`9G*N8Z>8)%8Po7w=a;PToNC)_OUwlXyqtXYsuiUiM4fh3s#<-+Hx>7YuIg
zPwLz87T&MEhrFrhR@e9AJ-pw1N65=OZ)^SP^^Z?${4_qL@g;mx<LfVCJvF`=?`V7*
zzE|Tr@Cl9Y!N)beA8%{?9(<?9kK$t*zZW0X_!)de<5%!ujc<4>_aBXK!3Q<I9dBuT
z7rs&BcjHZsAH)YVei(0P{5ZbcQoSE0@kNcF#TW2TpgH5LwckIl?*Hz!e82tMLS81c
zwZGN<k5A+M_HPe)y)Ue;@5d+be)S{dMO&-uC-4!x-}q<AE5D?=zV2e?kN2x@Aun@s
zb$vTNjrSXW4|%;WudeUMC-8p#kB}EUwYq)+Z{hv=pCzw++SYoxf7D&V{58G_U(ong
zd|u;s;iojd8}Dg+A3mq?L-?%5kKo5Negf}m{4_qJ@g;m(<LlqX{Ziwb@kxzu!w+bD
z2j0>69(=FH_u~^9zXu=J_))yA@q6)|8b5=NY5WR4TH%}74-IjyC;X3&;9%qVljisQ
zZR8bBuij4`_`JsV;HN6Q^w*E~@P0pc9U(9E%B}rL{RBRVzqxMHcr|}lFiT$f%&qmS
z`~On*zs5J=3mV^w&ujcH{FKIb<2{Y<!{;iz%x4In#rs{)<K(5z+PCHPtJF{8(|Eu7
z74mw|-nt%A-|%+!Bi^sRjlAe<x7N#gcHko#--8cVc<HYnAHw_fH$q<Eb=Cb%;0=wR
z#+PGT^QFHMzKHkhujw6}pL43~Tk%=EUws#OsgAAnvOc@<NsS-G4`}=_-qHARd@sK8
z=NFUP<j-!CU)Q5=zX>nx%WYueOUB=dPiXuud|czZ@wUeI;X5n5oSz|l4Da`Qj&brD
z&#fNcB;M5cS$sg_>n@{Hjc>x2->|iP8DA^Di1!;`7kOiE+*-eS{_(EH58^W#Ka5Xn
z{5U?P@ss$Z#?Rsh@SFFG%%|?1Yx#crxrMy=o3_qJ>f7-)-mkugyzqHj>t#Lq@ga@h
zgAZ1C>2DNo;r;rXCa>I8-Cqe`)cE?#x!>a}*QXgT`F{QFA}@D-b${LXtj71@$25Kj
z?`r%AKBMsy__W4P<5T#}^O5x~ZIfT0(C0Vfb?vvU=U3hzI`B!&czW;y8sCq1G=2}h
zSK~+V360;2k8Au4-q!dPe5b}YT*3WE<6H1ijc><CG`<TT*7)7{kj4+<gBm}Kw={kn
z->C7EcvItN@d1smyOK?z@lE*hTdMbSE54}lyYK~#@5bjfz7Id8@k4k|<45p0ji110
zHGUdDrtu}btMT<$aeZlgGd`{HZTOVNci@v6--92}_<p>j@q6&S8b69pX#8G$T;pf(
zw#KjEJ2k#xH~U}XTkuhhZ^uV8z6&3&@N)m&jSu1d9zXYxSJv#yQG8M3_u>oq$I-{e
zR|DV7Y?HsTO@70>^zFCcb?vurli#(TUwMDrjn8YwKZu{w_+h-K@#FZM#!uq28b6C4
z)A+ip^^Z4A_>3mM6`$7lUHDXmm-F3?PvZSv-wl%2X;<HGhVe0tAIC>Eei9$i_*r~d
z<Llnde$@CTe6YgHd|L4q-f#bQkyp5|dOo}Hd5s^$Pig!x-qZMTd`{yh@mY<Z#gA!x
zT`%Wf<D2jqjc>)LHGUU9RpD39KR$`~J^$o&zO{P2hVe0tAIC@WmDl^^Hu<yb`IXmW
z-Fw(Cn)aLUVNLt3+vM+B&#!F18z0iN--i!Ycv-I@yoL9BzBx`_xqIt+N&O_gi1({s
zAuo4Hb$!D%>>s?}--Wc17kztmeFr{*_dDNx<OO=F>xb|L-miX~yxe8g^^^E4-miXz
zyn)NN*308X!?ipfYkUj7SL569361Z<$2ER8-q!d*d}oE1^Er%<;r-6%Uh;xhY@LtP
z&)_Y*UwwTa`{~N+`eu9)|BEAOY~$yFnxC)jA}@3G)_PgbZhTtf`|v4^AHpX!egr?D
z@e_DQ<EQbx_|5kRS&!0szTbK@UB~(8t)5RSKBn=z@X-n{{dMCb_#gSK-ynH~Yq$2d
zdj9cwyx;E!_L7%+Uv>QqK8gR*QMJ!s{d?IzAJ|$i=dT%WX?z>LQR6%CrpEW+0~+6t
zH#B|^zT96uo>6>J<M-kV6@GR9<Ma4h@6#>wtbZT(`+=?fNqsXugg>f|gIfEayvD(;
z^{e|IZ)$uWKA`bKcthhy@a3DU+n>M}HGUdj(D)KQukrN`&p#^s>iqE@-tT(ZMPBBX
z>hX2s(;DB0Pig!RKB@5|_<;&9<D0-cc)#(@lGl0L*6~Sw-TS$o@qYC!<OM%mUEhwk
z@P73@<dr{CUEhx{;{EDJ$jc2?*H7THc)$C@EP1`D>iW9tdHllr)whrr+*4iOj<@iB
z^*!WGeX6>?AMfFP=TBbd?&|spd>ZdpKTBTkr>pDhZead+zxo#PqW4zUx8oytzxp2X
z8b4cI-;X!(e)S{d6*ATJ6ZkyduYQ)ivCmi6*Y)%KAMaP+Lf*hgb$vVD!TZ(skk@%%
z_2Y3r-p2dYkB}F3xArgh*9m+`<EQaKjW6LXjj#Uz=SSn4@utSN;R71qf#(mJtA20N
zgD>A-?fdaXjo*VWX#6NXukm~FQyM>m_cVS5pVRn;4{|@z_!j(_#<$~Ljqk!|G=4We
zt?`5SRE2M5DTnb%yx;Trz2tR%sd{~9@G*^F!ACW|;X_={8sCBsYkWICr14$&pvLdU
zTN*!zZ`Al<ys7cy_&|l1^_s*Rc)y=dtdN)ca`k#O3~>H6z6C#4;ibQJyo>+y5ggQb
ze$+!=?`U;@{rH5&@4?44eiUzO{9b&g#?Rnm8oz>%YJ9_uoPUjP!G|@z9Us#8E__hq
zcjGOMAH+9m{4n0s_;Gwd<0tWk#?Ru*U#VXIx|_KFXnYgCP~qi#x8n2oJ8GQoCi1$-
z>-?X6>U6%Zd>=l!Z}$-&Abym@*T2LM;T`<38xo2i!S_~p>1P6;z_&?0`>*nSNM?v9
ziJ!6_Km5KpAKW4B*4@nh)wJ7$&)|1U-Nxs`h1NdLSo&=v9{B1$^}mPXhu*(^b^cOM
z{OJ<cjH{3MSdI7|;<*~}apF_NU$@@>;d$M*L(bDQ@x}-CsY_&mx4AAQuWpd*X?&kb
z?5LgBLSE*Z`_z-_YUg#3m;TN^b<@$c^ZLjOJh)GN<(S%e!_uC&PmLW}J8v&};mLjK
z3qE-z^0E(e9Xz^rdrh}6-#_kC7x?70lb8SJJ~iSqj~?>k3;Wc9&-x9K7umm0IX?3k
zCogqipL(@V-VAwJW4~JP8DIUav=`W~Zt`ibl{}+<zxoD{eYM@^dvr;A<elX+-+uD4
zrhncDd4&f5ekaLu9$&jXxjt9O%RF(v;>Vzy-Y;qG{j%h~&~zL7^GW-++@GYb6>s4A
zaq`A};m~z2bsfZQ;=c=U!1_p<TxUJ_;4}8C2QJ{?(B~I!d~WsH#~n)j5P8|__p7J!
zz0_}fzS#&qr|}bbufoeXr}0zxkhHUQ|2j0z)%g=IY!h##q$$aMecH0J*7w8OdFK7X
zYP|1|wA(>l?1bgD_aS5amG|S_#2w<DGOo4#v*mrEhKOg0zi|EWMbczlMr1sU_p&`}
zbsK&H?>%E#y+HEUza)PeUv6HmdLCZF7x8j>Hohdk{==+CMScsu8K1}RmE=R`ZS8&2
zRzFa3)90c}J00XZ&s<iIk^J>9@jdun{P7zS%D(7d&%eIL<EiA0kXLxtvhvHDBrg_R
z-ugaG6+f?)_EyMCJ!e_{&ZpnzkI>$8m(>q^@^+C|Zdq3E^~u{U?LFT=Zx4By|6W!f
zt<j$Bw+ZsB7c8q2_+IwY>c96nZS~V6SNF{f@euJRuE#g7rxkn{FAq~2U$QO@A6?7;
zP>p^iuZ_Ia3;ox*o4ovsYUj!R7$h(F;^i&(8}Y+<3opaj_!2*kZ^T#b<4L@Umxry5
zuU5X9#Ru>wtv}8`lIvOeX&$1zmn^I2udj!+DeK;b4~E(2;w$bivOZnJ8(+Gt=6s%4
z^pjUSd091V8P~>raqV3RrM(gI3NK$)e_tQRp~sy~pF`6?-XwYMDa%S~Hoj!Mv-ljo
zQ~IeGuf*$bXMIjvUi*CJjeT^6JRYpgw}p5wab7kY-e;TQyNDNR)b|kYJbhU)jKlS3
zu6=@z^gl>^ia4i98y}JSc70sty;tgqukLecgD>$j_&olVBAD04=MSym8(+Ds^5WO-
zYwP#n+jhu!n(m-I;=k9#SNA{h)T@?jem=VN-%Xx(=CXR-ru`$n58rs!^4j|lGA`*x
z{1D#4KT5><m-vzOe0dtP@g;r&Z<7Bwk?ZZqeS3O6{}1c_(0*sXwC}WU;I@6}Eia{=
z`j0VR+PQQ+Um(62AKJC7UL@k&!{cnjcj8|r{w;@n2j0P-BYu56vYtKo9R6$KE7rRe
zHAp;u_Od!_%l<y}cq>CbX7K1+4iB1|)%i<*uUS^_+_b*Z-!y*ewaY5b_m%z0`m7KS
z#+DTihpXe<ygpkNhp>#VCB^GC^8dZv_%=PptiN67YF#&Vk#o3ztiKL9)X&=U2p&Rr
zR836!+e2OWjmzqLbrs{=sN;j0D;GrOHA$U!-m=<Nc|Tcwu->wKtK+NtIQO5oEUOXy
zy!b$9v#K9i-!|&XH_?yORh&<WcM%T`E~{VjePvwc*C+7{mJhw3Vb!`gkFDxg>PD%{
z+w89`^E<Rp7_Yu=mb$=Om(?xX)j7v&`)mFL&u1_G&-?45Zs2W|_w~)=tX@}{-w<_)
zOPAG``ChJ{igh`aPZ6v<?W^Z;FLmYrkGi@$xqjZxeRI3>pRL|!GR`*Y;_p~iztKNG
z<~->4^=|6S%a_#y|H1rZUyo3iqK=pK8~f?d=by_*D{guky1E|I<d@06OFzE$oo@BK
z<i6gphx2{avii)Hel{MD*IuWr-v7xfkY~!BoAadmch+(pbd%@Z!1E4iZzE5>=S$-K
z#EZmbj&hxByk5AKecFH;CT{dEs~1b0`i<vr<M<$c^Ya|ZpTt}E%I8J1_(qMdyNmOo
z@lE)E#<${SJeBS5!k3vxrSHbe{5E@8zdn2c&&%_}>vGn{omb@A>w{tPV)R!lPxj+p
z^2W%!Z@YcG&3&xylbr8?WyS4bV_zM59w6)4Ogv9~b)8v%Nn1Vt_$mCdh_(CQ+UI<(
zJ|A1#CryO9$#ZX9KJ@ds)qNs0d`Y~YxJNuJLbE@Hi5F|sPY^e`54UYoKSMl3e7pAR
zKE?XhsBb2os1a``o~+S+H*vQ{eLr!pMtqofu}1q7#Lb&)%%6CuM!YV~{E2V3KF!1v
zHR{`mCyAf3&H8r}cZr|8O}wADSEGKIc(F!&g1E`!>~`(X5DyXGZhUojGyfX#X5xt&
z?Y9$8)~N3$?$(I+6ZdMgKTNz>qke+8dCT_mn;{+|zTNuN-NXEA)Hf4P5Pzw`m*)Cx
zC!Qp}aeuD7zPpLLHR}6`d&IZ9eus${iEp<*Cy1N3ZeKq`JVd-l#(&lN<CJ{Qm)w8q
zKF#Al@wf<0yqUOHqrRQEbsNvWrT$&j?ROJT5P$nN@qXgj8ui1(&E&GWT<Wi`ZhwNf
zP5d3(#Ak@RHR|hzDg5xVx=`xl)$KPEj}d>%Ht}}i$r|<D#Ph^2lKM-l+wUhH`pB|6
zf1CI)@dWYh>L-Y2iLb7Y=KReNH$S?*KAL#ly{sSc?dqF}yTq@M@oCO)J8^SpSzWeG
zyj$8QzFmDk@htHxrCxJ>hlyLaukSBSe1dp__;&R(#69BO(!S>W*6~Eox?@?{+r*oR
z+r+o4Zzt{&zgX%u=eJv)NPcX6e`(_V#AC#_s~;wwCcaDB-?;y5`nk&lagTUxb9~d!
zRc56B)b{mtpJo3Mf9>Y_P3<=mw~4=gb9__#?ZngD)XVdyZsH#C9a6vkCC}IT@HzZ*
z<QJXHtKxNzeDFop>m0dXjZ&Ards+QV+FE<QEKmQB+<3k`N!+=o`sawV_+EUy)X|?n
z##fhN|KN`h@&DNS9x$t?{QuAAbN|epzujuosKJepM7Iov2Dh3t7@8PFVNw)Z85s%L
zy$GRY5Zcu;R>*3Z?+PL7E?L>l8oMEcv07TP#ugjhaeuGZIq!4l+&g#H?(=+~-}C&Q
zU-x;==bZa`|2gmTexHAz^ZCpj`4BG$J`*^^6u&t)ssQdhuroD)0wx}{`W%aL^`tM)
zJilRkX93><xm76VXm8H@I<|K?<l@jbnQ~e8*{p9Z=n2qQh&^N@z6toGhjym&2*`){
zcHnhcIO{2T1n&b7@1b48K(BmQ?qjU~-%$;`Aq%IzDZm#2-;Mfm*u8-C6+2TOfowjs
zZwd81vNN@YApC3mz7lxoS36Ue5!Z1aU()JVfoFZ|L7xeFm>n%geOp1Fv~p)^56VmY
zDd+zc>H~g(;7X}<R?-K9-U@m{5A<r#J3zlO2c7Mi4tm|Mccy%#%ZGXw0Ival53#KF
zP;MFM>p(A||Kvk_74XDkJ5zCD;veGcfe(3nXKIY#%!qFW-T>T;UxO!sugK!Bpv<FK
zuYmtv`a!pGnd2Oe?Wu*_{Qr^jK;547^ETQs3-qO+tMNx8@FBhscr$P_|Co3$1HKM;
zBN@?7*pYrt#`@NPUiKSV7eP1o%{+T!J2rtn6ZF4z$?Nk1eSVg6exW_3E3qCtf#+``
zXY3gUyaxF2F2AJhp<FHKlUD8Q_WxX_18)SrzsMPV3xF?ra%cLzKl}fCvwh1zpZPSN
zcZnS~eK4>!pl<~IucYh#p7Z{5Gx%CoW4>hlIxf}t&-Ll~(*JApk7v7{4^bWKF9$vo
z_-X<<;#mcH=sC2DbZb1-1CIc2>p`vw^fu7XB>nP|2s8R`G4QfAJ5zfRkbX@&mQx?_
zCgNFkQ~+8FdK`4M9_W5z$2sS`O8+GxH{|)9sV79Q?&q23JhW@@V;FDXJDq%D7x60K
zq2G4L>w(t+x7$JfS->X&XBf<f_(I{&#+L!F$-%!0cq|9L9(ZLIPXBEN9tA#}mFj+y
zalYYyT<a?%T|O%Qz*+x0#IpQOdKKuQ7i8VZ(o6aj(3d85rf!jb*EN3i^OXgZe{pB(
zKGAQ}>6c}oF9JP8x_pSQ0v-oG(16H^uNVGm;?f_9Zw8(K|L+9n_^UuU34H!bJ5!h7
zJNb3|)9152_M3`U=zDo*>NU#gc6S?h)ISA$GuQ1*ZA1A#)la`Ifn4-edG4F#cj{dQ
zdg~u{rVjV#)SGi&quf@=t^O0vu^Es5qP>Iv2kXJ>Sl|9ldpVwJ!B_cbJWoftC%>?L
zP2lSQ-_9<+jQ+R;^tQi9e62XH1ileCb<ln@kFEvY4*W+s`dKMhH-jF1vwJ^E0v`hW
zQqjkZ?I^{;doyr<hCXgF)Bi?H^|vbU&3|iW>U;{yhxOG1Zv);!%(8zL@JVlXw__pj
zx-5JcM3(`t0dCgqbpPb}qW$m=y!6R>@XrVTizu^Rd%aNc2Ytv!+~?<xe|1S?_x(28
zGZ+(O!#g-%p`3i^rz+r!-o<{Z%TL|L0qvR&zW94PQ>RfjYo4I~g`g)uH|vOr>oVXQ
zf&Z9tsy$JX)Q56wK(E|{=PQCSqg@+-$AIshZ5N*)2D;8=li;g)e`jhP1+&H(?Ji%1
z{W|FLgq}S=Re?U~gRc0{IM!beyao6S%E4|OH?ywjN3OJa0r<9nkFho%;!A)#A9mf(
zQ7`e8z$=0GH_XT@kk$f^0k6p&*PQbh<&uz_)Gp&NZD)G^%Q=tGFBMN>J^o16@2q%|
zUJrT$=uIeR$D3_!0zCoxnl8FtuccrAv}>5Db}t8i-Cr}`8!CnD8sL+FXRi+%g#T6Q
zN4t#wwgL}*EYEdF!-snOPa$5w?fp6Na^OpWTissqR{(DYK8^}3{k6c?0WT&lAL^M7
zJONy-ufk71EfD?-$(zHE%Rt|<74PQ+K!;rRcwPhg4(J`jK>>Pte)u>0+X}F5hg{27
zn9p<fm+>WWU$=>Xa>MYzZsXr@j+(o^@g+SxC!7ws)_>xCxZH9*_OoS>Teltay^LeC
zzvq2E?O6l*qEB%@$iU@le~$YU#$!9=+CIa%1Ip&Zcoboyy9M|e1he8nxnZDB`dZ$z
zLw#f;UJX3_`A+?wGeLZaPXRvj8{F@ReZ)CGGy!k;c4z7vX;<d_py#Iw@GS#h<xcFs
zbH^#C-&pSk$R(1vUlln(%AT#jL*IAVZ^niH8LW>f%wy!25A~D-pYJ3y-b10EDu6cv
zzlywBexlv=ptpfuUW6~*<C1ee;=HgJa;uAysl77%g83-Nyg)nFfPY1CGIc!4w8QQn
zq;CR!Bj}Anx94xtlb{bNNv3}{-=;H8<<BDCpwI1+@3y|tPu1XS1s}_7FEf^>0ACH9
zy3B`o6Yw_R6AXyF0%<YuP_Ja_&$-7*kA7Rr`g<q!IhBq_kMk+py&e3~KFQQAtUoJ$
zq?bO2`2lov7vtCT{UX~^0eY-&GW|YWy8o*6wQGHw0=}BkWcu8a>QwuI$AJ$a0DqZ&
zxET0S;2Vg`hy2TdH|&;7?IZo!@UH<rzh5#n)G#9_{|4aA!135K{UN>;cv=5sDwaky
zzkdzd1AJ+A{|COVESdU7;$iBo0Ny$<nL3Vota#S~-vWGgmj3C~U!F`&$-)-^Uj$tF
zL1^X3mjG`8uHHAziVxed3iP@?F}^H)jLUl9lYm?2Upf2hcJRd_7-yC|^%gykcmc1<
z!UqFy1g_RQp^?7|_zK`BXYtnq?*MN1dyel}z(a$QsXK%C(tTdYSaYa<8TjY#pVa5t
zQU`Ue0^SPz7V1g&ckHjs<_<9BH-WG0;A9H7`RNbw?UV<;gsAjC>Mi;$=G)=P)K0&~
z)BRdK&+qEj72pe3Bs0#jC|?Ua1pH?xXV|6WyXN&K@WsJ*WA1e;=Q>KeS3+*%;mL02
zxod&90WV{{@?qcDB>ZY$oueNmLEi$o?EO>e3%H*GUTBmdXZ^!~v;J(n8aV6E#-{*Z
z2RwT})C4>MeD5s%i$(vwS@?3`9pKN#*8mSycDH{6a3=@875I)EcKQ=qzw%RciE<;`
zTMnH1OHei+;uQ&8a_FxGPW{>boDQ7!XX6Wi=kos&;Pk&*NUZr~C2;ybThChH^#4c)
zAItttz_aaPzt|3(_8g5e`@g-f<Tx$;9r`!gHzdb8oc*2^^;MJq2+Z>yzM!9B9Zs(o
z^w)IIW1}+13-JZOD}i5xvKfcOmjG`DZsu1r@2&*C4!G(EmS5HaZwF3U^I>@t@GUv;
z?Z7*LXZx|}Ma`d$4+c*C+5NZ*_(tH_{i7au8}Qv}jLa{zXO{41>t87RHqJOK1J3qt
z6+L=B()%IatlLKb)W075i>kW&VKeY%;Jv6vKIBgde+@D5C+D%!moPrT{|mwOzKAvA
z#!OyprF<3mDvwI)b6#mD`$IkO4&c{{yxEsglyXg=HyoYpc7C@Q_)Opt%E^cP%Yion
z=d^4-#McNvmjUx({ToF8bp&;P<oS|1?^gXE^fu^wuZy1UucVi*g};tT>i5vI=FJMw
zD?ks&lBr=XzM$Rdxcr;@F6x~HxzOmOK2Lz0{hkQhvl#T5ppQb?eAo|`(;nbU2x@!M
z`#UTap8et@^=tsYGp74Gu@(3Z=%Fp<Lw^6uxPYI{F(2aPz{#JDR{-w-KEN;|Cx5N*
z2eR<#!f&q+gTb=^_!j87pZsY*W}cJK*5%-9gnieLFDstZvljGr(C-nt>90}iypDWZ
z!8d7aGKJe;<45!h-bQsDIItb1zeoGPr^XBHB0KFE27C+fV+my0q1q36S#>hw?^h@{
z9e5b{VIrsP)0L9G5cFsdbn0CWdL8I$VS+uJ4=J|>_$1)!d2LodWxv@3dd;|G>IRg%
z{A=b9^}cNS{r?KE6upA|9QZHmsbA+fA7nk%kn1=$nL3+tS^ZPysq~Q!+tUR8NKN<k
zU@`DA;C4Sz?FSwP-UqVg!+5R{{w9LydQJc7+HY+JUnAt3dTLio?)QMp*Wv!{xMbFO
zK;}6x{Z<XW4)Dzm;7ht6|NC<^wrc_8hD_+b4lMy50Zwzxhx{vnmjPc+aC}J{>8yV(
z@K)ff(Jyk;kN2><pXX3t5^|L%%YCEhBVLMwvo_$jqipI?{U7+GQ?TBNAJv}|uugnv
zM=j_z^~uzEJ=uY&D*J^F*0&IH@zav2(NrWK;>&=q20qV#$ce849yz@`|9aq)fZP1E
zdo%C^@S$S2*`Jv8$o~hlcTzI__nKzEQ4TzGMrM4duL8IOd;v20+ulFXzIxCbKv(NO
zdGM(~ngx6&@O5HGw{;`ly&qC;IqRF8Om*b;Yma{yvk7vm&q}7oL!5RpqrUCHJAnVy
zr}1uf^l)xA?2niqrgmRfs)4rvFJcAqp&zCIUkcppGf5)e1UvyeL_j{o7Xx1n`~YGy
z&(YrHz}wDCrY8F}-p$?|^Ah!KlKQjPsqMgNFOGxLe&Kvlv>yHU-0b<%%qQ0UANX3q
zr|hH-d{|#C@O8lLb`1kQ9e4uxb=h&3bFK6w3|_W(3HW0dBva4)m+@W?xh>Nm2OTEf
zs{aFT{ZYn!X~y$5|7%!(W@WBhY)?7xAz8TU|G*=_FJ?Vi>kj4WL63ueC+X7fh|dBZ
znw`|Y*I?lbf!6`ABfos;hh@MQ0gv_+_Z<C{aooWAFHWWqW>vrLFPU*<JCfiVGPi48
zHv3fb9$ERH&>ui=6g`|5hM`;myal*gC)4YWSy$`q?U;(^bntgvg7d4M##@g5tJ)8_
zE%TD;=hj*MKIeHL+qD65@yn7uK5yF&dIEGVSLVa`6upk|2VC7pN_^Pf!NA*rtG{2&
zYHvnk!OQyV!5_XncYh<j3G`;re~xml{<DPgSIGWS<^kHb5_lPKGoP4!&|2Uz;CqX{
z>^N=)y$<w!Nf&)8{=jDfug)PydU+e>FVLsvpwpfz(6@kIM7n&4*8^_{ZqH-HX93><
zd<=QTzpQ5=@aUB|XXgBnHJ(Uc4tn!~WNK0lI`ysvz3r;Z{Som^z&B>$)VCdYJMf<T
zU-|~tuOBD7Jr5fOd{Pd)8hBj}d<yWI9C#D(7;rYnd}!BV;FZADeP@;*DnMTe`fAY6
z?P(n4SU;(EGvqq1mUBwz&F&xmKg0jmB=z6nib3>WIq*v0;{;<yyaIS5@F4^wj>Kz$
zF9JSDaI=q?4!i}p>9-`4e*y4yz|RwYgD(Ldxi*>cyRGD33493f{$!L7@wLFCz?D4~
zz6p2@@ZN?QIr+B(uLIshT*d>(dC_0sU*KmE{}0A_75KM+{~+PF{aOKR3g|mPH@|C_
zHGlQ^{Avm064!~ps2d;7>nm9w@F@gfcdmJz^=<%P<oaa#y@9NHbIzNp{TnbJK(3Mc
z|C9EEe+Br@7Jj?^w08>VTR?x2^d9VGy$iwDd_z+IecKPzOTDYW-vR#3)SKov>s#0R
z4V%EXdJ*<bGH+ztmjr#sjoIgX-SrH96ZPDT^*+b^L;I^iZ&;jToyxJlDFuHM_5Cz+
z{1aacd<XPo&kM_ecK|;eJZ!Js4vz1&pogLFk{opE-3)po==Q#ha!KGbfzRwguKX?d
z5A+eF%Q&w<u>yEI@Y8#mhw8fB=BRaX7UV*=B~uS})t5eZQtbzQDd-22E+68{fwuxz
z<6i8b{cC`)27VuTbw9}1*KGp5^7iiS-VQtp{6mq;_Gjta@HgnKJ<uyakKfU?zGutV
zgYGQpI$y*1OWQ@en?O&1{z;Dh#Qwbm^ya&=_g9(yl73kOzAblW-e(ct0K5aZJ@2vJ
zt-yBxukWGWqK)v^&$`-`o?kdqS?>*1fG_fVGW9uhX7|SmK=q(ELr)9#i(TX1IuB^>
zexFCX7ej8xJ;~IGD4YI9d^zywvZVe!B+~F9z6N+D@cs(X7vdX$$ACXfNcs!$t-v<|
ze<=(1zXN-Jk=aiwz*7!<5%3=OtvUKR>#2uayruhj?JVH)fd^Q>d|1yy;EljP=xIFV
zIG>^18pxF`Po^&H$<IB=ZHHXlL)ahpB$s0!!TN^13;S2#e5xlqd)UWMhg{vG=%;y5
zl3oYW{Xre0WS@($zb=E^iszE{-??xcrQh3L1A5tSFb{#3dd+@#1MmjmUkT1hhE3&%
zdbfifdqUO^$YK1dzn?MZA*DDN53lMzu7&|G174;y=?mjs4LkyTG9jzKO#wdB!p(lD
z33w~;slspY#lS;Pc0X5M4m=DzyWTax%YgqjtKJR36Tl0J%ZGMu1-=?Mk5g<r{hQGL
zo=WQX4~3t8DhIv@xV0Zj&tueA1^PPB3n*i?yB>G~xV_$xe-`l7Iq-$RTY+;eHE|^W
zGSP42<-k_~ZwG&Mj(Lo78$b`Q?tVY86?hr&Y`gsL=jJa5PJUKnKGa_!{5DRzYJqQj
zCS(3g`-}8hpig=>Y5%T527NK;TR{IgE6ZBXD8CZ)nHUeb=oO%^r#$E<`tb$v$lCW+
zm*g1NY>)o~ToRej&8V*&cnJ8gp5}=hzaK$6>#6U>jD2CoI^P6(4D>z0X8H}+y~V(5
zfY%AG*G)NZww}AK0$(He_?R2dd!x))?|R^6FX8<m0%$)E2F<u<JGX+q5%eJG@=@)_
zMxYJ2>c<vd4tyPOyT8yc6~Gg~m$L)p@^cUC+bqaczAXE9v77B)2s{qFs;fWa`4-lD
zHeJ?T@~s5l(%)l#=;A~F!Cz~+T@P^Bo(<q@ex-ZA-3ojWa8BRmquSq&`ho9mK;*>B
zfs@}}uZULwCx13x3w%ECY(Gv1-k1Yl0DNW^&Nwas-T?eOlyw{<8F5?%dMoH-NXOwb
zGZp`K^nc(j1XTPp<+g%8b6s~i|3|n#1-_yOxxt`EU+v!BD&RwaXV0_sz$3s_KZ6}q
zOTWzmUY0}8Lf~QG#ZtfFUj{q`{0;)>3kF{Wyczhp7OvV4d<*apJH5V7P@RJdCh1#2
z-}uL5`n_Yd?pc2aRrFWHZ@s)11-t2V_PfEr<G{aUeY(G^e%GP;ogSFvs|8=pYhC**
zGk(=`NEyGhqX~Q+;5&i(vhHW-rzN0A{**Z%ss0aq2=JFFm$rjbOV@Mx3XnE{uMK>U
zxM@C&r}TcK$M^h;K8F2mN&VhB>%fQl1_N&aeyE2py53BEIrf2+n*zBNf0p;Jy36If
z52t^YKrZr^+~=~?y9)Fe=p+7%_EByt<Z3qHz9V-#a=v$^+P@jXZ^Q3C<lWmbzNDr5
zb1kmfQy{nW-LCyf`hH@3NwcT_Tv&RA9v_RrKjgj4`I-J$4!mPi?sbp!wX_d%Q;}u&
zC)&Rm^g7Vb_%HlLz2$$yczs{iqpbCKFz8jF*MNR8%B(ltKI?rq@=XU{%YVSP7<@ax
zH}Bu*Q}G91-3J-t><9ZJ+p`sNTOg<I=d;>Fy+xm3zWFejYG?i3<DBz38{=FJxmL(s
zkS&+v{Db^W;O_wc9P+3AW}Y8({~fF4;E%Q^^?iOWyK;;_ws#ZcmO`$l^SLDGD?mRf
zcfaUyd{%sl_5UMTpJ0Et`+v~Ge@*J&FUz9S&rP7Wf_``IahTK3>_;mhSN1ooH?%kH
zx6E?^@@)X0^GP!OJC&LJQa^2@f0E#v3BD$jv&RkNRK5lKAJEnMWBWZvT-mN#@K=6{
zbE^NspQVsn2)WpnWa<i(+3%RK9m{~P0R9jG-CnFaGT+}y=jnN4E%>5;mvbY?n{{gw
z@EYJ#$*Aq`l9!o>cKiR1^%Q(9U3~U<%GnQScNOGzK#tR*`4F!M-T{11g3=!1vxGkz
zUkKd!to!?*%Yc^wucRLNP|qshG2r_VllE4ixE}Z<;Lqirr>jeP*q8c0!~N73-OuyO
zf$sor*TecNfQSE)xxZxnwZKEbuj;A(9Onbnw-9o5U*%{&@R`7C!DHIVe!B{I3-F@_
z*ZT~;2~hnX^aSV=yXc6g87E!Ov6JB20Y25?$cqo<OFu{afHxB8VVu$KD)3eQv-@+d
zdf;`y?S4W3%mQ8m+~3o<&vAdo_AZB9WP9%Qk9MpDy#@3D%H~6S6Y!<LClHi=LVeqT
zw*g;5T+dG&RO(!X<Gyq&?EgBM+MY4b<er~dPc`@>|C0L$tG`bHJ_Pt^)+3(^<W0aE
zfIpwx4?T{zm5|%=ZP$Az9OtZmE%3VUlBsE|L)uAv6Yv$l|7qc@e>?Ezo!IB~)K7E7
zf%=Ahf%)+J?&k{Cz}tWyC-pEJhBO6uD20CD#~0Y8=V9Z=7MIVC&ASh@Yccq@c%7;D
z{tLfQZY|`R1D&b)|AidelZ4#HyiWbQ4EW9u^_Bhu{b^uls((-Z>0un#LT+`aGvl0=
z`lbU<0DmC&x{&iZ82z^la+L+0>AzF3{b&0Qtor^B{42n3-rKhCZ)nF>+E>__+C8_Q
zdORN)j2oYp;?DH%LbKm-9901?E9p#)?iwdpU%K9FoC11HuTFgqsmB@V>GR74pbrUm
zrgmcktZ}mhcog`!EPN&KnZV7wX6#rCycKvA`LpU_yElX02KqakAnbl<en+>x`*}R&
z$~$mB*srtu``217=QyQb>LC~H-<f{z%6>nL^J^364WPe_a>o3cb>6WIe4(<=fBW|&
ztZyy&;^0%`Kdb$vpl=4f1N~X;d&SR`O9C$|&(NO{zw)mze}bO5|LBTu73f3u$Su!$
zr+~f!^o1zf^HLM=$ex|u-kVqqybO4R{PJNOmJ9z`|Hb%W`!+$YX0J~DyIp3Um*0tE
z9Fm|f1^w=f{@Z<jH5e0Ucu=SQdnP$=Vf(6p4*|}y-GA$WM}ZFz+|1LnfL8*)j5w-c
zMmrY*Zvp<2w9h`bQ|$-6ZlBK7{dr*S-hXqBTgq*M+`6IY_dWTohy76LHk?-t>rDUd
z092{@d>HW6z}4Rk!=LtigohYm`&}UVWD5902Y2f8bJ%UhYZLG~;B{0C+#Vm4TLO9<
z^h<<p{I?SL7T^a6&W!wPfjh%H)4z{s@J+yn0MC9twjFph3+H@Wv>ovQj%C3bmwLUj
z+eUH)QWf}jfdBQ}{>}MZhxRr>u6aaf#@|1%A1nsG6nG8F^o#xX0TqB&g1!#)0lE7D
z{%*Ut+k<E7-3+<b%C7my)=RqoYwUkPKMmzv^$rF-aztk;lzTnI8CxJ{y;C5!V^nAA
z)Es(Q?*h;(t8&*%`ZCbxgWi+;8qin&8~IJ3@Ax<JNzf~g%I$CVhw^W*-hke%-MIh5
zOVxSy8ccg@!Cw>W+0U~;pAUMj__DsmpsxUZ?`%K!P~RHzkL|g>O`vZCy{Gz;pogkE
zQ?)@<o}O25pMaCF;;i#2wr|+KFrJR@xxQM^CxPBmeX~Gc1p2nF`BvX&s`kYL_83#~
zr@q>r>sv#8pts8SQ~mmQu1|Vi-2{5sNj=k(ps$$Fv%Tdz;6KpY|3iCg!M|u?&-Ts&
zeKqL0{KN6R81yZmFUh?>!}!iU&elS%uD<7bH-jDrJy*T#cm8iNKY*Ukdb`c*-Q!XL
z{^rwquCE^S1n520*93Y8=(E|+yVcjG#+CVl5%z=S;E$f!bA4+;ZvZ`4eO%`^gWe4K
z{ax+DJ_nsVPw!>GqSoP}@6cb)>bag_px1!jQ$4ky&j;P<YL}`9ElU4^5!=%QzPg5<
z>sbPN9Q0iEFkY)bZw0+yS3Uav9)H7;XC9cbQTVXF&ERjDmb<=E(EU5HKL%axM==jj
zj(9onMbkS|KOtb@6~H@ytM|$*ycT%n`JJghkze*n^#6471D|7$Q|ru<?BrWa`I$e=
zw+ejm3x1ey6ZqC$_``huB=&c+I#aWLz`kMNn>71}`Rc*fGUtc+7JzTdML*299DLD>
zJAd&1O05UqqPhQ`uN;2f4!-tFdhTDPo$yCfXX<F~Gt%QztqaXq7xW8;An`+e)!>g@
z*>zu*y}y_adIRV^_2-44H-qlu{aI#xZ3Q{%TLu2cc+dTL1L!TF_ojj9pWHuk+-wED
z0{Epl=5u@Pp%~}m^6#;KxVkg-76Ir<?{|Ob{tt33*L3RVle#_W`zq>f0=)xtNBY};
zs&^&iR$tqh{#~hTKhsS2in`j99d~{>&)2$mwBs9Clox77J&Y5_*cQm|@5h+mFshDi
zQaUDF>*IJYN@4z5)S2;jkHiN9Z@-~4wGYbnJWvIE2k`x~@Ot23=-J)ES^q5HLx7JL
z+_s<fECziN=-Krw2i^!gyPh?`7iHB`4LuuxHv>P9^}zqPdgDsUZt;yPiTu<*u4KsK
zKut;X`e02-<+XVymc(Y2RAT>u-mUf`Wh!1FJ^vv<tD%R>hWXIW3gE542N+QDly9l)
zyOlw8)W3!`1<I)Z0ro4<h7#px1HKd^KZ3LPM>eGp73LcX|0!5n=4hcPZR7E!Le&P|
zE1`uleGprBA-+6-FY04B%=caSE)VSTz%CE$^1v<+?DD`a5A5>5E)VST!2d5E=(z3#
z)zsz}PgHs1OFB=;_ZIoy{?pOgC6adtz0L3&dJm2Ze<<=<^oAbraC>h4EODp(#N#z|
zlFlQN$0To*{Ki=)DZbn9)cI}l{a(opeW84RO!D7Ko?UO<3hlQB$(xtz?~U@kPQJ$t
zUGi)_(KoeRO!7L(TO@Ba^tUu$+q*ii`#|S$$=fB**5BsVW<%<GhnE`<->mV7<T1&k
zl4mQ+_Jeb;mN$L~%l9AhLqopwzXF|`dJ`r3dv?8Tf_F$B>47iYOY}+J6JKLr&DYRh
z=gpF5*V`_5q)hY0C2y8Io8Q@8^F<}^sa<WK>wfvM<l7}L+^X^YBp)pK(UPAk`Gt~S
zCHWG`S4jSh<m)8=Q1We(2font^q2f#$;U`ON%A?8AHGBLA0m0R<Y!23P8qWELvTpJ
zkHN=feSanEyLva<;{QI2K8mL$_!Q~PiQK5?jbiz}c&+w-M840jX;h^_@;xNEI-5c{
zF7$zdtB3L^o3ou(q4WHTpE8{}Uw^NEwy@2MDTGIt{7iZyrgA<~<0n!PQ+a{9I?ng^
z>q4P?H~Myy?<SLG;z5D7U)<m<l8Shy#1D^X`RT7SXX&-t9|mu{T7Ng~9wLf(R>Y5~
z|NE3~Ph(v3r{ufQ(;?qayF}Bq^#A>9Y{7B=E0uN(JNAD?PqvcQD|6e|R;%M^__Mzo
z{50v$=Sx0M^6MqPOY(;#e@gP-Oa7MRpGf|V<iVSC-1<qrpX7&0eyrrDNq)ZM^CZ7s
z^1CE|Nb;v7|GngIN&boC-$)+3S?ZU3Kgkc1{8-6Pll*+i=ShCO<abH_kmOHE{(H&a
zlKd0NzmYtM!*G83Nxq-the>{{<fln~zU1>Hzh3gYB!5WqrzHQq<Zns-iR9l%ZsKe5
zy6g3P+9-Lm<jW=Bd$FcFw?C~!%=kC*yYgKg*yVv;9@yo9T^`uwfn6Th<$+xu*yVv;
z9@yo9T^`uwfn6Th<$+xu*yVv;9@yo9T^`uwfn6Th<$+xu*yVv;9@yo9T^`uwf&ceC
zFdt30-o@X!AvdWK<tS1FDUP%hDT2gze~9Bx!<?|>EEA7Q9zh;MB2L}pTY;C5_?}fG
z5_wr>o#T)WLYX`!MNnqqck;I(QBMqs-yxzt)UQ4OYEXzG@!bZ#?^J@s_qf7H#Q4q>
z+oaxw0<aNf5~$0_&?d@Iwii+#q`pXae@T7ZG2lh=A^DL4NUUH8vMMCDs~U;#Yq4C0
z#QJF`b$e2#opGc(B+ByrtC>g<Bz`9^nJG&h)Wt-5NTYtHRv1ay<;a_mQ!n}EBT)~_
zizO#5f<#=|0Y}g-^0)v<AB@C08<18Y)gbXZ6YFDMixfeko|EexXCjjCM#mY1ybj4l
zc?41bX(ZBaNV_BTLF$jx7ikZqK}Z9U{7C(f1|XFol_Kqlv^P>Yl83Yh^cEovM(Tys
z7pXr|KcwA|%8>R(Dn{y!6hSIM+6O6&)CZ{)=_BZBM~aOri$}&e@rC33ac^8GUWT$C
z`R>SrNO?%j<NPrnumLC!L<%9f;J+GpVq94)J}w%A40&Qx5ts&~X-MZHosD!B(m6=i
zoPxilMCyeUMsksSNPeUeq}`B;k&2K?k%CA8q&`R<Qa_{+QXWzvQUOw5q~1uQ5x22k
zBz6LF{sHH4NHtzjthX1B_wnlDy}WRIH*ZM1#4C;ay-566(D}|}U(hBYuSaV3;<4kA
z*LtO~G0=4e%KTpj{UNs(`iP5^kHr5|P>jUiFYzN4AbCiGk@iF4|8*FIv@a5WbHq10
z6ra@OkB>$=6X`Uhx~53{^d^5ShI9tfM5L3ECNxE2g^0^3NJU5uNcBkjBJGdVh_n|{
z80i3{sYpSjQ<3&V8jRE%>0+b^QWWW2q`a}E33qJk>ptZ9W0&9RA$P_?7I|pw^49}n
zOJjbdK%GAxM9M?*bUA@?qOcyZKys1tk@iIjB88ChkUS(mk`Ji>=`^I>kp?1_Bb|bD
z22wrJsYs_I4L}N=1{tJ$qyi*Ak~cdVcV~xUe%1>cO6tIe#7P47B?^#WTf&EwkK_XH
zHPIi-n>Zv^G_fh+%m!_uKM|PdkNc4dkqVIdObo?(PlVmzb&%&HcaevX7bAHS7bi*}
z$9Ck^`(q)bU_I)>ceX2lvWrx3qCf5;C6Pi%-y`jXRD_g|6htaP>V*_WA}vqyV&s0L
zLZnV4{HT6BfV30IhvXqSNT?%$Iug*IfP4bF6VRQ2ZoJCvPe6A9x)lw&RSw+==uSX)
z0(ukBn}E&)bS9uP5rl7DBnK%#KO^NK`Og@O|HnDjagjWvAW{G+AIXQ5hx8TtPzTa>
zq<<oPjkFEPKW<1oAM*k7n0tOG?#>Ivz4JpcZ(b<Yh*#?ykj_WC0I34$P^4O<<B^U<
znvQfXQVmiS(uqhXA)SCU9_c)!X-LN*9fjn_<XeE0kK`f+kqVKDkOD{^QV7Y9J`|W8
zja>|xsYo-BW+GjP#ChmCq=iV=B5@w%e8}`yq`x5%`xxmHB(|G4lflWmC;G&hNRyHF
zLW)86;Yf!ejX)ZORE=~9QWR+<(ve8xkcK0TMk=lg$BU4-s3J1X4O{g4i7oPec-5Es
zd$r^zN`9{7mq~uT<Uh~KACm8_k~`mOedfLD7%Ro+%<nYrm|Vk7SiZ+Zj_V{pF?H=(
zE+TSqkwZw-CnkAM^+iQ4A#(Y`Z{+s=b8b6@&54VAkL}wj@*)tD?+KCP9)O>ix?=t|
z^<~Kk*%?1}?2(Znr=EN1oTf`76$e)yTyfCIOVzg#KORwWaJ1q8jXKk2O`S6%GVi>(
zmt1hsoX8>Ij7Eku&AoKak0K)vJv4g65eGzu%$hZC_V6?|B0BPb9}ugkJiMD&MdcAc
zAXZUX(M=3k#Se)cI<hjmwu(a!`5_}K4(;AHV3j{q+ab}zy47~bi0BW99eU{D-Nb;w
zCEX%5@}Ln%NT_s>Msi|wJZIGt0cS>`<GhP+mo!a3^2l?}nSS26m(FmK*g2m&b*^&&
zh_k0&FvppR?s?8RGv-`+&b0aSo!2zKTKhGs#n@L)MDm=0F9e(bgg@4tU7A0K=JPMQ
z$T>Yj;U>+d6lW{sA5!SS7~;Mi<-?iba|_Z32NCIuQpkM)SHJTOu6rPnYCbK4U=)A-
z(HFmUv;x`vPet+%#pPN7S&etQr&+xT<Zl)d|GXlBvF4|c0TurpUf@h%ON5d49NrzC
z>v;Tco8DAj_`5*f6};TeOG~;mgQZ{d@=WICWqy6#T!M8iqoEc*z5;}1<^5cJ1<xgJ
zh%LBY7?ec|^6vw5i$Kbt1#aFWz@kXreAmAm`5%SO3CnjupkOufILj9X0#6|Sm(Y3h
z;cIf;!sk(F<(ErcH}E0~wfAeQ`VqX$g<VCDq1cYZ*5<u}LKvubiKlA3S@^M2bbONo
z#p{vZAsGKW#y3URY_MWtH$iW~8z{w~$#+)3{}#$mi4-SI-#PvOv5#zQs^4GG4zQ6z
z)BFYRp-i{?{-T*bvzcG_(`~SmzjGOEmp~o|WF@*uNv(&wQBb@V&k07GM=M@ZpEXo;
zc*k3gegh823tw$6Ur@@I#?;q?__dN3?%p-?B`5{sqM1YHRcJ2$DbSk){1ZTLg;&5|
zypK{Y-z`8#bsCX;ZlGWp@>Z5ReSZE**=mvFU*Gt4z~KcC0c|IT7YIFy{7vD&?TX|3
z&MBaD7>qt&Kxx=!Yxox&RQx13z7zCp+Ed^bzW}W8Htmvwi8*fQSqo$R!3qWKFKh}r
z)KQgG3C2F?d9HsgM31%jJio>!+E@Tyb-a7Q>aii}5^NNXt3Z2&-eaSi1iXv_E4=~_
z&4>Z_O5B1@<Z-t5fIta7mSB0L-%(#MjX1uc0o~?i(K4A@dg(UbE12D8gIR5UR@m$|
zzb=^F=65VU)8@@KrrW%R&Ha>@FM0Wn7v76`kD(Oo5P>6L=lgWQH)w?cD`|YISNxTQ
z(JHU68`_$M1&_opx&iC7Ehu#r7P?&rTgmR`76&jV_7x0uI$l4w!~w>Cx$WC407v4>
zUc#W7T*g6hBCS4?muGpO68ojr%5HuNCahjhL29~yB~*K%=Y~1;8W40RZ@iklZHTkC
z)9X1<pA+f5Dc$6GVb0z!2*`t;ypz<tZJ_kLtr*`FMm8yLf?~8GHNivgOt#<(UasTi
zW?X`G(%9t?C8^*VG`j)r@K$?;SE1a@@==~!ycl1u5^mhtIo>gz|As8gcP(4$Ev5SO
zCFtCt{dE^;Uoq65L(B%fHD2*2+1QnyzXMpQ2wwvc-(scy0jh81<#}FS=j9__w)65m
zF9o!E059Wtc^hX`xHZM4ouvsZott_2*<Qe|Bz6NYck%K|ULND+d0t-Q<xO5Xa0!MV
z(9K$p-sJ)aj6>l<0Xb25r@8?(*<ZO_bE>FKa|6BZ1Ls8#Ymh6J_X{_m79AV(ogQ!$
z=N-a{hB@AS!fAtw^LXg|iPU~KIZwkX$bpfd#rw(^TJG(KC2<1SQ^Jm-<LwWy9;Fg7
zR=IkA9Ig~{eU9L(cRE<k0Ly{G!X*zI2RCpwz)AtRy8-C=rvW@$z*2(exB>R4m4c~;
z&vCutnPB`%K!nHfX1E0$sdY%ceFAEP##zP)b$TxVcPk3sT-WV$8NRgROa2q#xhU@T
zp!O;Da!&r9q1hIMqLV)umtNPSSTBrf5}4<@;j6R2{HH^lXKlXxA*lUk@P*L`FYfA%
z$wQ)v-Pd<fpr9EH4fx_~4EVV*`5&Qk5c?MDeMvLv3tem#?gV#&-!2ZQ9e{)HzBBX=
zAjY!n0DKSWhN(@4+5z;Y&*PfiRb4MEJnSa^R%pjaqibIm3-~v`-sRf02?~m9J6NV`
z{Zx6mXh9Jto37fx#4XL<A~*1J5Z|(~X4mcg3IrnXr+2^Wsx43*zL+i06Viq4Ezo;{
zsWxE?RNM{?TLcWFjIB^Hw?Y}1IK*IG4#C{@80?DEgjQ@IQEo-KFy#*|N4a{1_8=`N
z{I2K;K<y-d=N7F-ISjmzOH}dm`1TKBJ_gL*b8h~Nz}!c)0t_Gq&k4K&uueb}9q$En
z33i&>1Z0=+Ucuga?*%CE=N`Rdc$th#Fd_5WgJ3E~<ptlM)5cKnUiJ!jhTh0>vFG0j
ze5)|4#ih;l2SDFviMD4DYx%Q_tFQf68u}Dps7~o=vGk04R5P$MdZOn#!P0uX*~&gq
ze;Zi)Lq+j}sPhd0=TQF7dgQhXre?fho*KDr5HQ_~&oxx{LZch*)b($N{664OjgErB
zdq+3Afn{m*CSkT4-RCZ?AZ@G@Y!mx$a`6-J5LeDQYo+GvPeb?eRIF|mOvP%D=c&q@
zS>e9A@`P1+t1z>F8Bgv0YppawOnE%mBK9-@in2i*LLJoeH%|@9$Atkj8I(ciZmov*
zVMhU1OT-WXRiEkb0@wwxt8hK_Zsz4KUbxZl61=>|%X_@Mi%YOU#HYat6QQ%$f$-Y3
z0**pO-oLP69g4z30@5JwT+ith1?M{g?n&@l&kY?4@B;zqf}*pXUPl7+mTKF7sTqCw
z#{e8DAPjMeqi%jRu)}R^gxmKdU{!*#>f(`Z=(sGb;0`>00Cs`!aTYJGP;53-a3}C!
zn=r9)7eHe%Tf9NQLjZb?Pc^thFco$**zrE6+cGb~@XvI^W<qiv)!m9Q(oaA}%Jch*
zZ?dqXi2cO#UuR+a0Q21*C|&~WD3MjSn|B0?xl(0dPy5x9Wia#^vt${FeL*c*j*>n_
zH1fE>i+f7iV3xFjx}?3Y+2G7eFfQ6^q3uRCcpj!)16H$&kgu4lqQMvqZ=t7_MH|Di
zSR9A!&7zPSdha@~_(}`oF!bFWQ2Gp?!k+UheKy8r)_)y@e<=$0rtngBE>9gTw+W8f
z+qsvPAJZGRyaK8y7wOwd8}p5Xf?&IF)j{+Hi1uPcKM`;YYbf=(Dx?OqLt6NA-A5Pz
zpM*36laLzB3W|Zcg5m_DqK?mHU!yVfq5&&tOqtJBZZsfQaO1`dOx$Q=X*ZrA)$k`k
zlpDSNu)-UROR!mJKL+i6m=NF$@tlClm4oRIOzkfUH~<XZ{=UFX!2Tha+T0!B!$KFM
z(gv9(@4{U4DSS}y2xKDYKHiz0I=5*+*__)n2{Q(7*SSp#8E|e>_#)6Yeo^N&1s`&a
z!7}7c_KLZMUL(Tn2{@ooJ7^o@zF6%`Vic<J^uDAKU(CKF`j`$W+MYg|nPg+;Xr_fi
zax~M%vg|{q+RWx?2KNT{XuXW>7f`Ndq9Fou2jPwMIo^p3MAzj^dM_bNhrxR!_3lge
zVh;9B@wo*h_!7kz-@Spr^T6AM`%sj9uX0CO295@D+!rX=1LbCxU)1MFE0<}j7+K%j
z`Wz|f{6d2_0Q?T7_F}k)3&>7`J70ZlXF%04ega#a$F(RU?MIk;KL+-MvK78wjW2vr
z!FR4cC@U52j8MnOU_#x<FI}N-!B?+dhkA+#+o86xbg0`YWI`Rr@HU}t6AgB#ci5N-
zb&Nt1>PD6&)NZS8ty0hx>K1X;iD+#xy1QD4mJ4_!K(A3RL<zx^`?wGVI|Mlc{@Wj-
zYSj+ktE(AF(Rmnt9FxNYG_q>dK0!b-c$XtcT(wUYkaLAuwHFE2q8S5f)m|+id!t#k
z*Vve>+6lo}wOO@o%&gj92p`=etF{fPRogq4UA#pcnL@L#Kq}zR`Lxi}Kk?@~n5Tau
z3^Xl&nBP$yxs43Vbw@t@H`+(ENq6L6i$Ig0@OGqLyia^uK;D!3X8PSQS795J`$hwy
z51U2UK&(P7!paD~VB))68`1PcmJ#aCGA<}>_g&$~ktsCTpxjwzKv?n|Zw`IqtkO!4
z19EINo)eVgj<~;o+)Lyin$O$XDuK|Aoctp&^<EBXZxJs`si#ip{{Wpwyai`MPn6b=
zchx4ff#o;#o#Ri0nQhf~j=j$Td$VSD{nPQ~3E?>mJib~uI2e0UoBA@4W6^u_dHyj~
zKzh>q&abdpHii?4VE8F5xf)CtLb5lHz!Ip<8|4PoiPi++K+Mh2mw~{qA6fKCK&k^$
za^y8lWDG+wPvGVW$Z+5sObw<Cj7M<#1aG#mvIBcnFog03q`aKq*^oNH!|^Wt{wrQa
zqjv8JmY(5d1{~$RfKsqcv~PnV9>3+We@3u#30CW4j5?I1JGNUbkz))4L6z=~eKuik
zj#GYIpSMwLWwAN*5sJa5wR?FG&}&-g6F~U_!DCLZDWT5+9WM}F-0M6i+yU$i!Pwg1
zZR)5s1ByF+zXsb`!p8m*yv@z$8F~h`!gbUs`gCFBtR1}F3G?V>mMwg&8Xh(z!y^O2
z@KEMmVvCMf=4C)}Cyyt_?R3R>uaqanVJv1iA@0l5q3cAPJ`9d`)$x#xahQ9rFl=x0
z@)0lp;AIAG0KA<n1)wI_CVKWkaOWf5eSe9`_Co>T9w#`-RW9Bl82YFaT;Sx>pdGdm
zXSk{x#AxJX-3=O1mTvI9u(Cr-H!u)v(cK`AVQZoM6jwF4jpex0XC)T^%zMGJOCLrt
z#^POJM(>H$x_huM1n*YC%YYC(r|<tjW{5COAmdrC>PQAt&Acc7DX@(eHq{$`;f9_A
zRwEcZ>ICl%Cjd3tZ1<^ZGoVLNt?D4_ZN>)GL2O7mNCw0X!g=;DBFfn(I91(R*bw)`
z`Y?tkGC}7V?T9N-^E@~?kByGgQRn+hIcgFt>qd?cULa0T8d)t^c~>K+3xq&BLEXp<
zh>h&C2saQNR6Qr}S`;JLrFjp#>a=j=v)Uwv+;@bEP+Xv=G0524RI8Q?$a=h+)I-%T
zZ0M)?Jc!sS5Qm?4i`)0-$ivU+8nzN^cGbhzJq2T}-U7GpW5}a|ttWPcKI=3Xd!KKr
zehAwD>&*^tl*nLz+_jl+CWCnhyF@sKl0)tE9OZ)qeQ=+yW$YSlA*U~2r9MSHNwBnE
zykfM^>(5I*F2UIITIfy}FNYww#mD13|11GlQq!Zpf(gK!-B4cdE9L23l;vCUxskqI
zSjUp}r*1xvx&JPhGUOI0_LDRJx4Nc7$hp$js~$?L1cMMU{Zugn{1Ao?qM_}=S}lh5
zZih?*4LQv3swcnA{33hKuWZHo+?VS{f-IGOJ!5p>i_93l7qo43lgt<fO3xU@G<c>k
zvj6zT_|;^cfqjTqRl(P2^NrPf8Q6#VJiSTylmn`<(c);dK^e6f5JscF6O}9!VcsnH
z#`zuZ00?_8alYkqm0&cXTXiKy0GCb`+tC6(OriI6Y$Mnpnb_8`tYdqLuu_YO?K;8I
zu`S+j5Kfu}y@Z_YzEZ|JPDKy(Vc%(G@!=4=&amHUo49(S8g`da0;%55?$SF7mF<-I
z$mjMt5MLT>mS0Fe$pGKQO*r+h6;2c#-)5id_*!4oHU16=6|2tpl7Ol+{=%m!`9?4$
zYAVS9QHfUvvd2ln8;-kwZxKr`;}VR&r0aPb^*n@*RCv?gm^H9N_nqgu#Y^zzY2m1+
zZ8O}WTY<&E5Z>4Ab1%w`EDtUH8Hz0^hRy<CuwkuM`??30Lahqo#R5J|O>gKRwy`Q1
zpnSJng7|>2Qi}=V4#CnvEMO3$6xyhR*TAw4UNeh2c&mP|ZLxy)l0fO;Io>v|5z$vP
zD=oaicq2hoQ(U*#%6;(4tZ*p9&T_RoI=~Sg;fCps8-)SRb;3u4=oTAN?$Hk?`CKyC
zF6?h&*1i)p26i8ecXiikGjN0F`0_km^H2c~B^dJDd<MdRob!zZCkrF)%#8&$L<`jW
zG7+qKzCzFK&CXuOFGV;DW_Mg88Z3|fQ6S^70^TyjDb&wXcMKQ3s%<`u=HZT^_`pGq
z^PqrAs=ucuo($|=M=5HhqJ6ZYgZ`iu9U+QTl-tQL+*3m|HUMQE<wh1CR<pZ<)!cVW
z5r<|>6<ryUjr<lxGieaFCO(XkQf^{9_ywb*fZaTTb=_CxsqS5evgzJ&mUZ`TLy_JK
z?1>n?_PREctG4eLFV6`k+BA}fn)x`qg!4&c50D?n9UI@7RnL@tZEt8kJXLajcOb?F
zAKQH`AZB;OA)ud#Qr0{XWtorddKK-5%b&Fh?tFa30sUG-D_IW4iy_oEKsc1X60NV@
z(ig_zztX2}4`cim4yb1shl(VWIO_H=z(>0$2?~chzCHoBpf}8Dq*!U7fDh4HSiV6&
zJbOf#(ZSWjGySYF_!n(7`>JmMo_X+ZJ-l*Uf_3+4(mNnMg;AhxB+nBNj&r<W{-XPk
zx1kpA;9%du`{QO!=&Z#%$o21)h53IA2YZ89311KhHc0(^FDU&4zezw27M#B5d;QM^
zRNc0~uOI85xIx>f4urx2o+co+;f}AjKhS$2jBqfXe0Wtxxi-o#oBYbP3q>#7rCsY3
z-w6IE1jbZ|;qF&Yz#A!s!w@yL{v`}*Y!&(g?}70;shQsKmHKgd3au@aD)YPQv3Wbc
z43up{IFjBr{8A*@d1TvQAR4BQKl~g=MQ`dB6ZP%mS0j3V!BlAX)mwxN>|H(b548FA
z(|j4&yLu}zT=-O{*k861HYi((3}^@p@}dmvF}zHehs%R-L@@G}ws;jn{5$j`<;qwD
z<oES+?0bZPYl!!$yvy)`fV9E;9N<|8;Azm?T9xWu+X3=rh5-UXH6F3x$@dI!Hd6mL
zZr-^lx1t<8O5Z$<5N5hBxG;Yf$d#7T;6hhnlWc5}t8Ut^u(8GJ?kNM}O`Ez${;|z?
zw7N&mfZ~`dxP^R9_;4SBM}Bw%f=6REC|gPcS$)GmY=%A>W24(6P_)zS%SQiQl%D|(
z(|Ci?Q_}ct!lwP98{coE_OimH@uO@^H$DSOH-3!Gs2iUF#nE`4LCg?7PSswgy#8T>
z(#J9&)yFtZ=v`^M=vV_CZ=g|mzsJlS9)!j{?&fh8kD=^6;Hq6<Bg&W;^)B!;k)%tx
z^r%HJ1H=-9g@yO+2fm}-F&sef2}i$yIi7udA}`Z<nZ?UIUKaCmKQ6mDKh<)W;GOW9
z!*I0#&nZ4N8@ka8*I5`h8s0yPPp}|H#J9~~d~7zh-4`C6h2@V?x2dL5iso~n=yeVl
z^TZTU=!U5=1N^5_XhWhf1ENAFJRQO%qLJGZZ=2F*AXX9_uJqZE=*xhhuP;s6O|-#}
z5OtNTjfrdqq-+8II?N+}XQGSn$s6AN5$1=nKfeW3^J&}{c3upE_c|`V776lKppPtq
z0!I630oee&4Xz%|*_a;z_U=*Q@9J20fOE7Fe^x*xK3|L57{mi}AaK}wTKHsEa|3!0
zty)jD%|a<fV@`tFAgxLXsOGy$Wt0s;Yw%^T9ll9xRkLa=kjR4kN28K=SkouG?BK<Z
zDs-v8DfwDO%@oYu7Xr;24qIY_vEeEU4Mp))q2u(x3C?x%D}XhF9GvTE-gfftu6V=m
zYm-z|=ei;C?j@M&B#=S09j97Ah&q7*O4dNOY;Cse5s<xDzym2eP{|rd%lbk`gY9x*
z<0uQx#fdm&9~5v1Wy_VUfo$1dXVfdQZwaVu-c!jM$d>&eOIG<ajLsO?OUd{?7>&x&
zcah*v<)c1=rF}$O4;N6?7g4eXvg<o0Ll(A<h3pIgm5&A~Sp#X=K&S?67YUp8QBYz}
zTlZEn_Xyp#^;a47A)d7Lbpe&F`zToh*|xr&A?t?+9)Ymm*@UYZ%h#X}j=^n+J(ipI
zL%(f72lYN>zf}(a-eXC<YI*bmls>^%U$lKRUF#eLeFtE3sRsEK0`3ihx3?apHilau
zypay#6T%oE<Nj_QjBsAGu^~Z>DI8ksO4O(y>W0YH0iSupR53zZ5A@dnJw!Ow`X6-z
zW3w>dzHE|uaaO%E`yJcRE<$gDxe9FvdT30)zv?>k0eZ)~LEh~Qx3O9`uLxMZV5gIB
zqP({oh8AD+W3AyMaGeAo?`#xy2*~>t@4O(P;?0^*weCDOG!0lCn=}I(UEGTKozr2o
z3D#t2di<!&V6~nG?k7R;*7EWeFQ4<WotFaiHO^S-^E|F(QgKS47WW1<-3b|$LE=(3
zNEe%rvUQ%D$B3B^<~GM|Lf1C{a;SB@%iKJM%zQYb@=vdEstGefb$`=seFq39URC$$
z0@`&?7R;{OV0PV?3ZGrK!KCiMA5hUBg^#k{k8z*cwN6Dp(KdYq(&RI-q_a&O3_yij
zz&1zrM;lHqV6*C2JOz&*5TlnxPQ~p=H}8xrOf6+Y&}y%Smyx{8=Vcg<6uj{$`68d{
zsvksvIi?B|&`}}a5Y*?5%^Ql$K*~cCmD4MQi(4UYq7IJ1l!H!IaTqIXCJu$Pe2Rdo
zZ&xeBZ3u?v{{d`qG}asE7QBpd1FZ4i=oHYJ81}+mP0{Os`CpB_8aHnPwf=oH0QOg4
z$K|~P=wgA;RtzF#_q75kyHC(|8_cr%7GX1XhuQ=f237Qu^2=WYwEXh6tzZ0NFiZbm
zv-EpsF)|nMGMASda0#_U#-KJd67^k;IV!*=zabzWe|cxS`ExL~5_H2kZr%kbe<XB_
zT*sZ`6kP(WgY>CxAxG62gS1lK<a)EzpfXTziZtc@0om2U#g@63I)(G0Wr<)xO3qPi
zHss#HasJt8P%%Xyjp~KYO!FcKLf|$}nb%hsxV3|MY5<l9rl#TBTou_m%G@5zr^#`a
z?{Za*G2zvQsqv8j8FPV($mtf@yUXz<<olOUbF{IigZh4{?b#s=yIb~rE|?nBztr|b
z5P0taZBGNs4{Lk&`CK>HvZn=P+Cx7cBxKdJN0f=<1ya5LVaHp-crWMWNnU=-%LIhN
z`y)&0)+p<bC*CEY>n|ua<EyVjv|fiUG64<s^L^ts8v6JAnXLUFYIFnBP&T|g2=QL>
z70v~==T_a0k;GmpybRC?fpETo$L)Cw0F4r;08n5$FX&m$WI^cz{KaCBef1o{mMY9s
zUH4+aU>>_}fPLde!PG2vj;p$Bl(tTF{fvDB%etHX(GrJ$9sixEr<wFW=QpF6U@_w6
z^UdB4l!GaM-t{Q|OXSqP<h!Du1Bx6BGG0=@59K;l#tfLZoZr6Cen8cDg_#%io_UHs
zjtlJ?z~|A&Ay^%UxcB!wAB#d)ZFyUn*GlR}WnMdrVqSQ7mw7jc6#GL^n-?Ql%xhF-
zW8TNv=J~?^&<3wT;4i09415cOyw6nN+gOyq8(z*0Ch%iL-U|F_0$G7?5GXV7aY60C
zFBFU`z4y8f{8GV+iOHPBKSVWi)`LQalhbonlysT13cu8CM0Ylw!9Yb6F`c1-GP*M~
zvnbw9plmwBXi;T#h7OX&+mR70!`lU_GvJ%*1im&AyAjp#C1xFZl0Q?SH*`Dnq)O;n
zG@*y1jWKVFRy*`0o6xJWF{TCIv@s1G+8u8q<PVsTw~_q09_zR<HX%2>cF6A#c}SSC
z{*XXc$Xf-<40)TNcE~pfmY_Zf`4++KkVk2kEK^?#-41yJ=@RnEUv&$)figngOc@#f
zESm9;vhnf~QDucZf_;_oa-Aw0FURpsdpY(`ZNbmcs%K2d+erRgh1}5TA5V{V=TH=N
z$fGQZF%2jiV~!WCR>+%4mXIe@*%;G7C7gtO?IPv}=`|Db7}juas|tA|i!#a$uO0Hf
z+jL{Bknbmu74oP+nIW$e)DC&QU@uuApDCCf@)p`9A-`1UcF5aEmyo|F^x@R0r+fot
zgq(l;FCmYzXuOQF@pAv|y17=!n@KiaR^_ypsYH3XL&Tnf1s|J`$16eJszPq)R>)gP
zl~K;38Rb~Jj4|JdRx9K&lFcYrWn)YWzG-7xzt(M-3evw!$fHMs{Dlg61B()J!)u59
zW0AK){*6FZ$enL=Etw&Y2x^CXuwV%*<duTiA#bK#67q3Ew?m#FT|)k-(5;ExKp7$L
zpo|{nkx?j$m+MeAUfwLKtdPe^7B9D|vhi{|m8g(McIfthgH{!XWn%BB0{IISaznR5
z9ytnB9r8LB#h5tC#+ad^)e3nl$zn{qDjQ?MJh#)ve5;Kq#l~n5jp14zIR@l^sF2sO
zC?SssuO0GJ1hYbZzCc#U8wJV?d9$E)$d?FakMb3Q*&&b9E(!UQLbpTSO1gx6;CH%>
z)+jenM#$SKqeC8!p(tLCp=`W-x~Q^3-bk`|xkZ(Ym)odBc{!HU7M$<m$jyYjeGJI|
z&=b4Rt&oSuf~rFvV^NH0MA;Z~yhvFgZy{NXX;Wom41X?AMtP?;W<88K!Gt_~EXZG~
zkjGdwqg;6Hkbf+g74mNcvO?~BuWQK+c|=e<<bwrU>!4m)%PR%5L*7Wc%qSPS9r6~^
z%_tXolsfgq{-{71A#bCM4!JWPMe%YJW#i>7B4>rXfn@P=vns>OYA;`#(yd0d;_bK~
zEkLH4&?i{emn!szZiT*sR2}+AO_x!1C>x`060KI~<0Okwt*UH{+R@FZh=aa|&!5l*
zE;U`Copp7nE)YHrMd<=j;q424vxA>3m~yYab(<xS)dl7Wl-ULD7S!$n_XzeE)*)Ts
zalz~^5Tjkv1=a{1=BIb?jigH#80zXaVyQJ<z(7=)-odv}Mt6ZW7NrX~$9H*qj;P9q
zoZK%(NfvK6s4~2bkFP^`FT<CcVOt^Z*lf`2&<nfp(K+7Rg}kx+PN3O@bo`s4+E|oe
zHFAS#SS^b>lok30Gjam06N?%>?TEDq_I)PUb>Q&6Qo)Y1D8Y^kuN~}11+#*^Mj$KL
zzZZz@>AFw<R8TwEp9}UXn<>Fg31$bom3B$6i+s8bcCgz?mtbEZ^n<8Vk9PxQ1Up;{
zx(;@XMKi0SY+SoaR9UWVA=%8Tsw}RJ`L$UL_LC;qHt2n&=Xc?wb0pZu3uFbGzhY%9
zilS^RGOU(G4J4ZxNR`E+gs6QQ!G6^QJ3+<&RKf0GQG(qjymqj63T6enNd5S%!*^fI
zp-ex4GJ}1%pmwlF3ATp966^_r6%&(t^T<gkOR%R1y%2PBo>)h^1p8T`D;ssN4Mdgc
zksYUu4t6Vx66|)AjcflUsw~%rCv>?srpn^lc(5zjADUp>p!ZKbqYIxE?3)Cd%@*sC
z-3Zmjq83&oBipcA7PVOx@kct-BiqT-7Og|Dx4|N{d5eO>+opouz@h{@EWCEGM+p|C
zS$Z0;703$qX#!;i`!Yf8U|%WNYt$#fzEv<g*v+&{g1t=WVMb04j}xRzunR)EjX2LR
zBile3!S0}p4t8WBiW2NPl#Od^M3v>*ILT(wRAmWvoA7dE7xb2C+y=dED%b|Hg8i<D
zTET9IYGYCO<SvU0t7TD)WU;7GmBpgEeC-IRa`u8nex9+mfW!Mn9kCjEDIN8a&(9;)
zc9LK5^Jta72rNgci$zlioNB7ZOUVN5hMsg3l;;lzEk=>=^~21Ug+YZ|o|e5TSU=V%
zPs<Ee1PlXFJpkDvd?n<Qr)36HWBe3HJ+T^7pj$|cPpt53qrGtHkBhHOX#4DjAH{@$
z#YNbXHq*cox41XT36}9YS;SjUMY#{&wRR#};}U4R2vQ|Hv~4G~Odr}tPU{LrKVi0l
zQAe@_Bd*FS7{9}k?sxY0;em;~9C8DcSDl~vj509;^=tyc+r~>?A6&ZLmFUZ_J-smD
z?MrI<6%LQTl;n%=ts6t3(V(lpfivK~^jVd9#`%%(DL0M_;P?nHH-lQEf)PCf#S_$%
zveLm?VGInuQuScjK<q?(aR6IFXNa&deCmY^J{GJOklKCyl&pc+t&HrHT(XO7*#Sz{
zK(=giF4;A<>>f(iK(_4KEZN|RFsflPY~4%AbO>F``0`H%xfx{j5(nFtAn)Fa_rQI%
zn?!$p4agme7ti#mH!=mh`zzjaGI+J#I*^rK^2W(~fa1L)2XCv*+fLqt6z?lJc*AE|
zdSm$F8?JbF=HP8Kylh(wc}FPT;rn%OTbtrV+sNyj4c<c(@3b7eQIOduw13HanBu)V
zgID~kc=7Vc(NLQpZ>8e>V+OCQ{i}GzhR8YKtx~+H9K3ZPr)`Ln_h`j?$o_WQv<<C_
zS8Qk}?`Xw4BZF7l5N@#a#_+{=tm3^VgIDWqRJ_p37`Kr3IK}&V4&FAK*O?056BMs+
zNOyaqAgB9I19@u|@8LOkn{D0%c_%2|ALZcfuz4frg11ib-j{>74&<~Aaq^y`c>j`v
zx7FrtC-3QsH}8Nka17>4q~Kibv+y)aZwz02lNIl%9K4M-Zwq<PQM?!B;BB*co$25`
zU-3SWgEtCtx@`^Qy-@MKorAa8=1q`ymf|fO+P!TZHgDuS@HQ&mV{-7;fvjxcxQLVY
zGR1pI2Cwdut%_I1TRV9dDBg!Nc*Vb1kJ5T$_~MHz-uE(iwcbX>D|%bVyHN4=I<R}&
z+KgT-DtHSFyw@w<u^GI9e9T@>6y)ICpv({Cy+QH*ID=Q)+pKu~=p^Wz&VIvm%~<I8
z=hp>4LjJH|T%oa%@WYS{EO4|FY6Mn!kd`eb-)#YfjkB@a1CFx@ebMi{;Okn%C!(%9
zFUnlRBbPvh`o+N&pn6a8@+vQ*vE%d9`(gZRA7A`nUFEADrp&&wI6o#JH$&cRH^5C*
zWG2dU)vDac;w9?EqZStD`Nx9ckHfVl80L7>T)txIBm@f*!*4IBH@|v~(2Pp^0{jka
zmq{PcLbZ8Pzp<!(PjNdj{w1;AP$fiqC=9;D7;L}+tY@w3e*<}Nq^<_7bi8$eytk0I
z)7)3x;Cm>CFF<*HupPx1i&de|P;5joaJZZQH<agyhCGzj-<|lsMCnFB+0H<vi;A|R
z)It@1@>9iTVNg+wxq)_I?Ie#bp!~=~v|ZHdjSGB>EOH@=)!I8R2w63y4X;04i<tih
zKf$H9Xf5!Heh*On33)HNP5okI7hxY_U9U-Wy&N%;1d91O#0LHZ+>JmBzeF?ltI&22
zrQPr56>JA~kYEFW1<!U0zXkS;V9OL=FaP(z{w^3}kr#FSKA8E1jg4>%i-CP57=4pB
z(k%#OVWB&mqCUXN4;=#zZ1>4mq1bFFbf<LW9|@o8YMb1G{*b*^Fm4sS4+{qYx?7<A
z6yz3{XJP(*FvIKzu#SQHsN_HtTTt|OxNd$FUphnq+vk4-f2PRp^SOYCpW}ZRUQlkF
zao8AqS1*}3-p8(T;~c?kH$E!Z3h;U#2b3G%6U=twhc+f|{8%vCjW%Z7SbDg&#d4z!
z(T%}l(7bjw`_tf&C`K`#dB5&;D2fdz(leps@THmf?f!yll#i+$qnb#y`QPQ81SkP=
z{&#NPDJXZSvgi2u4_0mx-U2rB4!^>FW@AhIdHul`nFaboZULvF--+3P9Isi;Lv<wI
zr{<xCBgOznFD6yyq0<FZa{=a|B0M^DekCC8j~uT>$^IdS><faKIZO@rX4dy$-e(X^
zuqea4gJnI;mmR6iu!ebbHmKE3fbD5ud1de{6q{LWRh_C0MRY1pb*k1;T0=&sdQ`x4
zr_!^=lq$_z#J2n$bLxp0w!Vg=G`bVD+z&wAdEG2vFS35=2KPn7!gEmmN(Hik#b<ON
z4?kMVhrlX<YzEl`GNH;6$b}ZK_pA=&V!>2!pYyAAq=WjORx@zJMJPTWY=`!C7WE9=
zfR)h9z@4IiPVAb2%Z|}zrDtHLpau-B0;`#Go8xPUX|}Ow{c~u2{!lPIEueCgi{Z<q
z;v)f7_wc)cFxQ7>*nuDI4z#1(&T^I0n`=n)Vw3|I<E-=WF}e;_X9@pGT7cV8>074K
ztpch_%cRnjfQ3-w4R8ZWd7R1z;tzsoK`X!P<!T49tQ{1d+vT8J#_IYr9rTExX$OV5
zM8zQI4RTedjk7GBwv}bwY0t0jQYULu2dUMLYVCJIR;?Y38EpanjM4AB2)~&F1^%u?
zGM>2?NgRk}jK6Q}YdKcy=kV)?4T9>B&j_dn*n<CJ#jC{>?p=sWphHy5K#Q;39kUjW
zN}So_b!7~@|CQ3bGm$q5#+f#7wiCjgl+$<#>1F=uC~putdqNNk)dk2m+Smv;2zF<a
zVDx)%q??CMoq_r8aPsE^%dgQka4HN|C^j4NafZDLw6lbdy8W*N^R7WYORzGOy`c*F
zsX)6CI#5B63xrnVeP!M9g5&I#m#dcd7R+vWi)wj9wfu6S+bzG!#-!y}3ud?6#!Sn9
zBYalNZHO&*yafzi!|~eM%b{|K30~z1Ruewe!8=AUy9w=V!lycTj|*LyCc%43FpjdI
zG~stPp9IgwOcTBqzA$BV@N7s0j~(r?lPp`eXj`8Y%(gY$1Y5UgTZc~2GMvXvM;mTq
zVrzw9%8g>HjTu`n6h75XZL1BXJKD5bT`#5+_)T}Txq?|8?KXj|j`oB=c1PRyM62bW
z>W(%{FuUb3w)|7w(e4(y9i@8(v!nEg%_l9lG1KyQh0ltT4W*-WMV)2q7H#X*g4wpl
zRg|=?p9|d{6kpnyL}{B~_MosaW9#6F+6HS-*ibr3>m-^gN?UZ4-WJS?(l-KGQ7RKl
zBuaVQzC9|nmNVvmchnyS1=<DVFG@XPM%`N{TR!USqb82`1+#tBN*{IhQxiw+DO$$%
zQJsy6kLm@pePm<CM~j5d@{tXt<N23UEnC0Ww!SNvZEHJi{a)KzeOh;0Yivwxtrg6+
z)y9miR|%hGs}0ds|C1QvFNqKPG6J1pHRZoA?Nn2Kr%z4hTLrUXTwJeBwqiV7AbTX<
zB$!(4ItQr8-6ojrjqs(kU{4h}N4#Z6&TnJljgVlrH*Cy!qe}QJZ`e>ea#x>jM^4+i
zNHE*h7;XJt+xjn|+qUksF|jozm~E?#8CwsTq;0TlwW0J#{752v0VDUlj?$-s^#>*k
zbDmUdMd<*6?2-7i)T%~e3O`d^3Jtg666g@huVDBK5GgnYmNn7=Wo}_L%5%@q22Z9T
zg|6!qor=<aXIict;Oe#KAOXdf#oXkyP}KleFFwBz2IVAKD3=RnPr;Add@=>wm~rOE
z!e>pvHbiGCYi~cxvUZ?Y`!fM;Yujk;K(V&?Y%OJnzPF7@=ywy0cPnO3W@E<M<AslR
zC9<sBkg}G0vWb&*y=oF3fWIre24I7L)(HKHK)Zn~o3i@_lGv-p_nl%jet@eNpaBBf
zjd$j$Q0W!uI$^LwwOBAaRCn8a5-J-rjbA5xR;X+!9jZy^Sk?{{YtIqTwl+#@2fEtY
zSB1e2)oV5;p?X6wJ5)Aito0hS6;`NhC>^R_Nk~<w21=-&5zq?NMuDtQeJ7BF$_cbc
zowvZ{8_h^;U@P}`OE?mnS>^~V<kbG^RBfV~+7EEUD%P6>RI~8`u3nrz7SQ%yg1QcH
z_3AX~T&>F96P{&b;=OYPgDvTfZezxKONGzc6WUNZ)`9b^SPvCziv+Z-?Vz<o#oGD8
zU|V~Yjfu6_2xePrW5(Jih0n6qhSIVAQd*_f%>!ILC4VoV73;mG=~h{>9xaePEbkRe
zLZw%u2L-enAHgdX2$f!r22AfBs=aJX8b4StyYV(=8h@tnS&g@$bf}&`KO<Dy+O-1O
z);7@Ep<-?23@v33%PJcaYh!}h!_vl#wU-K?H7so?9jbw1*ahgQFh+-Jkbu_eIa(kq
zRMP~KQ1#++n3QTGRCl0W77fPJEDlPwRu<0G?NwkAMyeA?2y_8X8Lk{F>(R&yEN>p^
z_E)>6BLoz07IF2QE7&8f<w$p++CDuXpzYO%ZA`rSs9?5NZOnLelkizywV`w$_;$8s
zb(Oojvf7!WLEGxX1Z$zyRqh_j>hlG(t-jF4#OgVMsf9-t6dN;E-y?jghwB?p8%p<u
ziVL-YYS<p>=Bw3otbo?Aoi32o7p@n`9=6{KriSg2?q1pnKkDv;BLtJs?W3G<p@6m%
z=Gd4xVXk0y=xoe5;Q`^ZLT5wi&>b}^BXqr$)zt#pq5H95D|F~~Q&vAFpl$VUY)q_v
zN-#TgHfF5;RQRmW*-$!kqol*B&{fGWK3+g8bQcKJ4@S#I=%)fn=r9g1(jIsk<M5wA
z)p9wd(Q?cAvH(pJ(4N(rIjfy73((ubV7ucz8xwbYD3~2Y8#C@Gy;xge1<{7m?zm9+
za4<?^^sIKdfYz*bw?LLVo)gIK$^GVLG+s~XdkAPZKA{`07oeMk!EXGmHYSa~LomDX
zHf9>XR`{&O+fcd(A9AT>?F`u$9x0%0tur6qnc?bv;e*0p?-hP!V`A;&f~g*9Za6H=
zSo@J+)^ce>X=@i=l3}gBJ-S&y+uEqMR^J|dEey7AzqK*3wo@>BoY|PM_TVOMg*DD>
zDBZEH7d|!4X2>q~P64fPwn`wYV{H^DJ<j6ube$Y$c{s0E<E;EL%N-ZXINL`++Z_#b
z$Hg+v?iL2y9rxInxZ^&-?5<{G#vL1k&+2M6ly=9^`Py1F&Mua5c7%Y|I6GY+%N<t=
zWRJ7A1RKg>d$Fs>+4}<8jc?YC*W>Jj%e4`9<0sgdH2xI9?8e)eY5YRrvl?$h>7Kjo
zN-I=zWt@E{plxj%t(`05?7Rh9${uGI+L%~-kzn>XvoT}s{laIBGaE`<TYW`_wR)VL
zAfRonbGfoskFzI*!S?MlHYV0SFPJ^fY|L2u58<=MnGK~o*0JJ7HO}VBI6GB9Yn(L+
zWOb~i0;R`U>?+;z_c6})Fym~KfU2u4G`D{O+U|(b9Sdcgo&IC3%67+O8xwb&BbeRQ
zY|OZ0iSSun&4$wM_)PfJI9n*=>{|h?an}DQ+CIx2qXe?Y*<FH3<MlZEg@AVB8+7CK
zI4g{IZ+tHslg9TI%x=7mnZ{QOpVfF9O84BMSL=F}wb#ilzz6|tYg=gTb+YxpM;Po-
z-DhKB?E`|@J=eyJwHt)b>bW+Q4%M)0w6&_|UMHLAQ36^$_e_DTQ2kgSJ5=uoCXLsd
z=yn0^#<%Il>&^2C*J>l|Nv6)mr17T;W=}FUW*UEk@L7|L4YBb~AhJ+bdPXsRI?Rl-
ztpY0hTU@=J-X@@JzjFnG*W&7J^qJRbRd(>swlT5)T*2(%*_g5aR^j8*eK}9Gp>$W<
zDtyY?U(2kpO+ee)7_I%a%nGxv@9yo3ZA`44Cz$PR8#C6f6uy!!Yi%fP?MpZ5dX=^J
zi*NrRplxlO*4{6^jo#edw-q)f)>aB;`_{&cwHFAVH85-_ZEgK6mbH(GZzl_ATiZ%&
z9~0lcA`Es<UT<S!?HhvGzO^x9t^ZbSh1HX7h}Nnz!*?X;eYq9+HO>rq+#I+;3#+l+
zBGYiGfY#VPR3PgH`E-HovHcstWNheDf@cJ@J<_hnhCUq_yr_HphS->RWSC%f{A|p4
z<gEWg)_KQSRbB7<GR#m0l~F9%F?NlLHI^8$1QTpn&;%RU#fD<96B~B1f{Ka_!HN|V
z8;ZRSwy3dS#7-1T?0)^7z1Oo(9)9ExKIC!TbKiHpd+mL0nSnaZhEbj0US}%5Te&$q
z0BE&+j>|2JLt_R7&hHa5)P4?ger84OGAwV+hEc7y{T!*ihIe2_0<CsPQhN>WynIT5
z_nptpQ2Q0ky)`Rpmp(UCXf}*$wR4S<+S}-DFQC<qN@{Q8J;RX{Snb(ns68L%1KF&o
zeT=%+|Lr?QS#27Fdss1f3~pr%9s_C&J^*P9x)O64gLJtLHjw?RzgJ1bi}lxly1Drl
zq|5at2VZU&6P_=38<!gn)a6Ejbh$f0T&``Srs~%iToBRZ&jW9-{!Qn0z{%AYnf|m0
zFWdeD%%l7$Tk$tQ2RJ1K_$XWP9v8;79pFA@1UL-l<z`j_yox$4H={bho)^XQ<^Uh1
zMN0q`;6@;|Xeg+3d{7nI12D|TJ~kDI`TQR3&#*};=|`ElRW6>h39FmYgtcLAf>~+8
z{?w@nMyUynmbOdcNz*TF+qHyvG%)*kkq}P;+Q;8v*CZb=66U{w4)a4Ze0&PC53}N9
z(Mv<3d>GYX9!Omd^F_iu4yZ84f)wUMAcwi|=y+P15MlNKdL_?FEBQr2yodsuA8m&D
zD_}NnR?NRhoy;3Wi0xxS>Yu6*N0bm}0JHwL3Gp+aLtN^zc=S~%#J34?E1-RBYle?O
zFmI`5#m6bswN_8Uji*r^;>*<S8e(D(EKr-)SJWxQo|lIYhj=iICPauw03BlA<Fom=
zvv`XFpVjY~Vg5r{4u`h@&5HR2t_Wqi*D{I_J1E++3UOo!al^6k$gKYhLfj1K5RZlB
z5WgVA@j!=olNmm4gE>UA;^Qsq6rxcb;%~1E)j7m3n8X4V;uh2?#Qi`H@o^YUh!CFx
zI>aF<#4nh{)vpQ>HovwR=GTMSyjd~74|Otc6e0GxI-c|oRfuCth^GOw{vQeP9H2vd
z6qZB$kq|!z+Q(OB`1lrPA7;hJGUGy`d>GXs9!^~j@kc@|P$6DKokE-ha)@(Z6HiMM
zBE<QD4slcp@kc^Dh60;E!3^_%gW0@UF@GC%GH(<i4!Jg@rZe%565@NntbggIUiss&
zkAV(xsq5m=6VeX0bkjol<FKuO-chzQ!^ciA?<i))$C=b=M=`2H{1<h@LQHelldHeH
z2h^F^bbW|ch|7Q+;z=-?uxM_==|G1#C55<j(~{Zz7Zlk1H)fds0cP`N#r(=Qgjkt3
ziV!;}+Pezzu@d6W<KvN8|JsDO2hbsohUFWZwF&VtpnW`ThL0Cu_F-0h{7jvE7}X(e
zdt<21A+FsNA?^fJh)07I;^iQR_+J=Jh!B4UI>cEi#I*@=w+SJ_=Jzzi{C+T-H!J2Z
zrB3FJBE)_Z<4IG9ua^*?1!n!56XHuihuHO|cytbNb3$ATXdmmD;o}c5`!Fj$4x~;#
zjOq{{p)QBGIUzm?REQsg6k_Ym;lm;B4WkJW;(<Vi*r$4B*}OR+zCeM^zifv2*I_nq
zR?K&~C6vj$QH0p{)+qYE3UTD7ysdO5aPhP;b#LmF{{iZONo#9Uyf-?T3Y%$<f_QIq
z<=f)AJ{wju!_1m6pABZk%zo6hR+6|yHL8<*j=KChqqQk+QU3*0l0So#<Px{fndDWl
zrBf$bo8n+B(B?bBeB7eWH)&4u3z}hm5tz-J74zFrC-X*;<iI=QNe`%!{Jf0gG+<td
zXE2Vh13ixO-W89|2jdxx<Ay-5$W6@fu{q2u(yaJ6hB~cCqdLTAsT-EMeg@+>4X6;m
z0x87#@18TnLtr!^#_>p?L+o>6e!e&u-=)CjXP9CB6PV4L74wVU6Ut=XC_<b<(H>QZ
zO_j9#Y5NxU#v`--(S*1i&>@b5<vn#YA>IwNkH4GY;~|)Rm=zy0sgn<*I>e19hw2>S
zXhPf^s1Wx9Da5lt-cvt-(S!(bCeR@cOd*bDPu=>y5MlG%n_>PhFq=0k=1-?i=8Yo6
zzJHG=O(8B+LcAZC_1{Q{j{+Uycd#7djfA+&Kj!qYq8UC`h1rK$@v#ea@?lhmcn5Vk
z#2X25GEgDD4pN9ef*j(Y`@;rJh!A%HI>h!A;*Es(5Csl#su|{=g4w)TG5;-fGH(<i
z_IWU*_O9Lpx@c1-aVRkBpSP(k-(v3vbcol$CZ-*1-lqBUt@cYmpNX%T;p0u1j|H>h
zqsK!bvGspvqEYgZPwq*nne%gUzcD2q<Ze%of;hQ1e<TX5atkw5ZUeJQv!b${I;k|O
zm)ah<IVoBeC-<R1Ewu|lT53~3UTSMS95#rmcaCZUprbk*_CP%EVmYeYfo{TGW;9_k
z%uO&WO_)iYnqZWhFsKqR<<U^RbP_W^_ZtN|vVK*Ys)D%RSZV5<k*#Wm+BINSYgW|m
zNuAUhCAInE#~WEo`985%Q>Xm#<8<=QcVP2#2lfR}IksU{Ij#XB$LeRj(@!)Fc|6=r
zsd}<%)q|et2fjZ9E|VJdSyTS`@9$wrc2o89-_?ixPsBshn+2Ox56#EvWx%{Leqt=f
z0lhNbgx#E;<EN$=kFHO~CA}HVWk#>(g?TeFE4|)~I&DTq^>|!H-L4@f#-l*J<8vQ%
zYU`_@*~a6kut5{z#&!ds7uu+_(0*c}-9UlIVxk%5Z-sfGnHBTzQm2Jxl*~6;Mm-%;
zx35ARR6^|hjP%dnIbM~t0v%#M*mbFI^LLIA4+1*G!_4q;6wD!-6(8fNQ;0@&h@GAd
z)j7oZJ4c8GD#Sk2DZ~vy4skRrhd6&{UY!6P;*`8wbdKGk@mz?o`4%(Gx58}Rte9Vy
zI+-_$5NA<zPE(_C)uv41{Ld@IuASpJT@>gLw}p*MA$G0)P3XO<zx)m8#dw+-KF);s
z(;Tzn<6i2r-}nIpqa5O(>UsOWFq`T)S+)XN^(in^$H{UR(5fe!q56K9Rht#nA5$mQ
zM$!HD7en&kYJ?7|B<6kaXJB4EOE5y6JEGD))`jIe(j^$7y@6gu!_4q;2+XU<toXQ+
zI;|q3dW62CZnv})mSBVm)bX;=v@oI(S_|Y6IuAw@VuUUM(uDkTud~t!Ex`zVM}f`%
zY=-$xFNFx3H!J2>qfX|HBE;zwrG0Q@332<E;~BF46$o)K&>@b3<$Z7kLVOTtACH>h
z<4Kr(m=z!2QYRlqb%@)(5~}k)xB?*-sC{rabqeuZkVE_kMiU~$&w&oHPkR;O3WPZD
z)evFxgUm2L7-sWk#rzr6$-Gg7*zdJ?(lm)<ONb8uv;H**aVpRueh<r&xCS9E`_DOj
ztYn6d)nN8vR($MAoqQP8Ax@?)PvRPcSfD2HpVTSDZ$J)l>(|2uO^6T&0UhGN6yh3$
zIGF;6_<$MaAA#AtSuy_wbuw=hA&z?^q^?taSNpCK;yiCk|3-wk0MPo|lm3ke?*t00
z|70`tkAzvjS<!zlb<%H?!%K(Q%(tYrU+4J2;;(^LJ1ME{*ExQ$c-Ob%Q9itenxS@Y
zm=7<rqV`hiva2|9jq0V=^<VM4dB50*rB<N!i{+@(Qri^dr8W+hm)b_$H{A%d`RNf=
z+&9fbcN|qOGtBpfIV!VazCU$}%BYU&;&*0?D!vDQ1<-2y{4GZnUukT4ch0D~o1u1Y
zn4>Z)YX3l;qB5$Z`iMyL%fEg^^(|1l!U7o3H_O)p5mlpQI?vg!nzJV>r}?&J@N~6f
zOXh4S(CrwQ+OZ{b_7nvU?|C!Y@e<78nU!`l-U}59&!`S>o%f}7E9PuNpw+e~wOcV~
zH&Wm^yV(r2x5FHsSy4NkI)!Ichc^&cc}w4t@CsCT!>QApoeT1ueG1ESwk31+CD7(4
zMN~0ofBAQ`%u(%XhWR~Uj>@c<A4Q#_GOD9`XU1$%#hiTrwAxt_Rm|BoAH<^^)%Iqn
z-3jKX%!=AGs8du%byTY(EYI0iMAZ+dsCEZw&Q1Z<bJpj>@bE^}^0zCedCtxQ=J5W?
zoLvHRJNlelt=zvdXTMP3@R~k~%V$PAy22cuS!u^Q)G0ipI=r5r#B*e|gPF4hfmS;(
zsU6ImwNv2mPBlaA88C-uR@DBTI)!IctDTA~dK>pT&j78qJ$f61vHHjABfo8D9W&H!
z0CR86irW3DQ*VuGwUa-ct+z244*{)qO7u1cV};M+QSR+;%~0DH=H8kWwL_^>Z;k4~
zXhm2ajKK`X!axnix*!e4E}(ia8Z*PgzpJJDb>%b<#<9R0-aZV*-+*q%tkjNu7>ti8
z@L>GMjCOnlb9iQ@9ew^2(iEOi9p3k!M_pFCFN5(b&}#dfQmNgS!Px(cc$C9C$PBfI
z!W^DiQF|?Q3eTukdke1UZ4Aa;K&u^?)b7KJfJLz5-Y#i|+GSwwtyxjK9d+uhQLT2|
ztl4@SgK;C!YDYzHV=(6Za?ai^V20X-VeYM2Q9FP-_135!jIYTc55~R>MiXi^7)yaP
z7=HxSgE5Qee6v^eQNUshM*pwWj)NJDt$=RFl+=!c8H~Fq@KT#>Mmz3@xgBPu9UoJt
zb{N&+&GlcYJp{GAfL1#zsXYX>$5G&=)^3K{(_mg|W<~AY)M=?1)oNe-PHKmv_BEi@
z_BpjuI~=ut_&y$GwVRru_D?XYH7jb5qE2d!YPA>sD7A;8_A;Q=4oqqfMQx{_=2Y9o
z47ELAR%=$&u1}rR8r5nC{UWu8p>`Lb)wU<KhoSZ{3f$YL%~1OS%xcYw+Fz)XTBBNR
z@5ZLp>-R<@Z;>NVyCl$RCndEbP&<+Wt3AgIwHLsw)~u+VLY>qa)oTCMB(;a5_TNCO
zou1Slj@kj8<55<-jTvfpfLX0sQF{t?Qfrjeo>2AlcVEX!$gh76W{WIPTjWmE>GjXy
zAiw^30hXgWnAbnA0&TwSw95R!oMylOX3jC#*bMXiVK#48%pXpj%p28FUGc5P;1JYa
z3$)sSN$nx1?am(LsOB+4Z7-PBniaJhQzx}Xwc6c&klNv>-5Y4N?Mdx$)ILXn)lM@*
z?Q1ZrH7jbH+0&%fs8+kw&r*9RYF7YS?WCmkP}H7Jfz@7YhT6+uR%=$&K1rR_8r5oN
z{GZewhT6}7Ry#eZJq)$m{Th$5+MUc$y9>-}&5GKysFPZwTJ4RUw8e}-?QKA-Z9BbE
zI|8)}um@S~B4(&v5@xk#MeSD9Nv%<>c3A%RA=thTNA00NtL>N69*)}ADX`kN%~1Pq
znAMsUwQViYGpRMoYSV_;!uaPmUJqeIEKqN}E<>F*#En5^Lrj-@iu;Frx#3){Kwa)5
z>U6ngCeoKXn*%;y?ocjQpe}b4b-LV>AYX24R(QVLVO*|2UG5<2bh)!YzTCVl!F;(9
zT&_S}t}k`E+*Tl8?iJ3ke7VE9T!FgW*VO59ZLA6|*J$Y={$bTR=~Hgsx6A)LA}jNp
z&hbMue+7DF4o@rdoX+vXGtX1dT*X^{|NJyFR_3cPe<j4MtjyM~u`*lJ_2MG|qk4{>
znoCoEDK{1`0<CsjQhO;k7HiKPkFwhJ%uu@_%r7>~irQh+<#`LWM)e#G!Bu|kbq?Vj
z0@RJgc_6*^x)W5^!l0@#J-f;LIi2HslnVlFz9XWFZ#17pfz6+1hWQI&j>@c<f08;y
zWmHFXZ+EG^pmY3o%m;y1+cvU_>VnSkTQQe!oinPH%}~1<%xcYw+9A|Qtx>J^|A;i-
zv|Y*u-7-&zvD$t~?WJtc2T<VN9%6>t5iqMYD{9A4C$&cP!nz+<d0}19Ier`FGe9k@
z&p_H0=I#-mys!>~W%Czwj$h<+4AAC>M^y2PeBP(P=07sS{AVzmH!J2BV~l0qD4B0G
z-B^7$u<5?^$M33XY3fLSo0<OjDgDt_8EslV{jp*CW4r2)mX4lb_<`z|h}=~DHNTs;
zd|SZexci#?!k1NnIL^NoU_aQ+Q5YYHUIw)DE6s3z4b0BXiu0GLlXIiwJb$HkT-E;W
zJMq)1V-^Yv&Zi>^;_Gfry`#V?yO^P}6=s!YMdb$6Nu^P}IDb2DJa1Z@Rd3_#ZW{o#
zI0u8Y>zxSl;`|y$RPlD(k3dJY)_n22L=|tl?G1Dj_BW#m2f^F~v(kjCsZ$e-auWts
z#t&II#4MefuoJ)Ir9em4?~Gg!@AbdUqpbElGt_<nvs$yFcD_YIh141)wXJLHQ~i4X
zB%=Cf3x8MreUo1Cm_H=tPt2SCdFQ@oR)1ZhQ=@D8KjZoDtET_{;(QxdXh{0+Z+7ip
z{q-yU>wM|I_N~_K78M+o{(JvUoqDB;##z<>nJfO@H~oE)POXE|Mbh6_=rrHJ>hF`%
z-w&NXl}}H9-MrC#_v-J9VQ1cS@iX%C&4#wD-j+;{xq5-Hkn5^{4y)(jbWPv^>A{P4
z%8xE=@x->z=q1Y1LhZg&wOyQ0{pE6&eqPc$v7{#gwWOZ`Rr^l)6}B@$<u0sg-|9cM
zj9W4k9@(jS`=PoOo9CNh)jy+lc6GG8(z$o_HRe>)XQ`-ZuWGhPHN7k815?dEQ_Y3>
zI=}f}&5%^{`>JMCs(CZjOvu++CTd1h*Eu59q<?v@Bh|c<YM#i~SvG1;sA?`uHK$fJ
zZRb=q@1>d#@^zN~U(KXcGoh*(m}+LEn&z}0FPQ#VJ^is+^+!ve72_fASN~qX<m#_^
z@7)u)QtEK|1q#D}9_Z^~6QVG7wO4>1k=M-_k+)!7s|S~V&cUpV$lNQ1MBU37r4h+{
z@2JXX_xbqV_~_N+K_2AkQ4lJ-uMq`SIky=q=Y?6NSy8zOby8_mZ##Rg9nZUTYS~h#
zEzoMGoSO?m?ejd!YNwf@_EnhGniaKO)(I6-Yn0WdvD%y7=dmikl;Uup#_A$awWP{e
zO$Pa-SZ?KbS{7D(jI;{S3u_<PJ*i9Oms5-dx(VaVXu|a{H^Hnl;dScN1fx2#>(^9d
z^YOzU1v;{}^YRLelVZNL;!#$+pc!fxfmy9tQM);Hb5$+j1(;E-cCp_{?NX>M&}xS#
zwM+361n2W8tG&n!wWDEHYgW`gNuAUhCAF>3rClzG_;06q(xEWx{Oa$QREI%2<WBx=
z*vN<6g`MJ*8&wrlr(El_>XAdz|GBJF&)2HIx2zJy8>i)RRHt6)6daZQ`+?PNnX9Ly
z|94GPpGfsOQQelR&x0+OSbkIz(YT=ce>d{v>F?E(_e|$$k5%Idho|ye;;1UDS7Lpu
zO(`9SL(-#GZt9dz%5mxcY#IMydir}e{zKb^`9I8+9`QJro;O{lxhbwa8?3JIugY}4
z^q|(JIH$*@|MQPdt?3-?NPmAIp0e$t>hE!G9@{rGS3iTC&dq$J_e=lz@lM^-k=~R4
zo}c*8eALc{IO4ZpqpN;TXF1|$Lt9qIQcCl2F1K{5d${`NjM9GcE)TBG(zI8{o#zKY
z?I&Frx$0Ca`^gHRvY({O?Zh^gx2L7J+&(~EZX`&Tn+PhWe#^M^qdouV+-ST|9iI6o
zskYl7V9K<4`{q^h@whiI?P3*NE&mAHfk01Kd)3{lV9oqv5wB6;347Cw33~_T2{S7b
z*7NrvO}m#-y@d~<F7>Dqvq0CAtG^rqRF6i1)T28=rR9UFN49PlPmB5AW%Kg_ZGKXi
z-yoYmi2|EH#SHT!VZLoPE9UQ`uC-FcW@wbmS3jWkhme{Q{jf?jhqu^9(m$km)vSMM
zp!Ls6`iC^Hk@cTTf%RWthW<-n4$rLUe}X!NXH<u`M88m*!y7_)s{j?=)*yv99OUq(
z!E$&*n%BwZUkBQJpNq5k^|SduY#btNeiJjy_lG$=vtoV(bqdcY!t1k1Jn0Qx8jW8{
zc$WaP{ym#F$oj_ut$#?;zi0DCS%23}=hWZB4E;S})^AqyZ%m!^8`a@mLfx*_Vywi>
zmnW}4y)%3_bqa49$l<NBS=hk*rrG>jK${;G<~PsgCs5$<ZZX6BB$&;c74siZC-X+x
zeA*|b{c$$6gJY=yt+pf7?v~YV(tpmV`kSG40L*h{R@5Fxo#xD_j_PIV@|^A2yiAVj
z-$2b-=gmW(qFNT@s7{9w)gIaWxj>t5T#}c@KH2=&6gaBy%rO5G%u$&Y^DF!*#40MI
zn6v4Bj-s?lF0eSq-h~4K_DU=F5RSbYfZ5a`9DBC_ZK`iFbqL4af?LF;Y-$lROf3O(
zqGrX^Hq<FmqdL)X)a67EVduOZs6;zJO7ts`6CJQ+*uZ=od)oqSen^;)WA9-KY<{X4
z=AVQ)QL|$Hd+L;^Q6xHUtB{%!U8hPkAA7rOE&U@n_Vxr?|G1=o1jpX<6j=W>GxWa-
zb9iP&f0u1Sg~Btc!`qFz9Nq}RI|!)o&I2jDNg#(e@4(s2$FbKNX!9LmK90Q;DX{sI
z%`ksD%;A|8^LJ6F@QiYJO@q^$GDEfvy}MVhUUsY!oM-q6)lK!KRWQdiSMTEtPc>7v
zi<*6^ngdeJ#Z^teRC9c)8JTLP@A$v#{4LeoSJezpHD{)p@u{ZYU;cNUt5eNeRn53m
zb8f16qN-_nGyU;V`s2_Z)gLdWKYmRAw`+BZHTACkXldU$E^}mCqw!F6nYmj|tX_y*
zT3zN}&7ImB)&IGAm#C;-88up0ZR*upvBv1??`t;Alm6Z}{e88j>V?Am)&HBmYgDI|
z()><yr>;H!KiF;KM%%&_YuYUe4^M^fHqX68#fDdxYW|?PQ(pNmQjjLMbz|Oec@0ST
zxp0F$;!3%2a|$;BdX@AUldlx-%xt!MJj$!&PiCx=EnyxJv$9G~piU!VlvYXpxz`~{
z#OD0+gIiE(5tGaiaW~8&%!-H)sFMhzdQO{$g!we5Rd3_v+`K@|=^7x-=?<VWr`4)!
z{CUr~+-~V|n{&AWb-Dei)8$SFmCFsPF7Y5wLGtl#(_=s<{|yf&`6&De=;TL5^6>`G
znnUMI{&!|HeFK=AZdRIp2z6?@QJwshNIpai+iOk{2b&?{FqlP{6%pg9lL(`{NzJ`<
zHDj|X70n+vH|5c}cJEM;8#s{BnFw^JzuZrSJ5u;PaJEigRxN>nEbqnlnbYJ_W|&+S
z=AGEAm>f)9Yqdyt^=S0JPA3sNB4W?|=M=H886pmVS%g^;aV2#UVN}o9WW?qf8^};R
z0n|`@1kzA+-!~+ep-40K1y7qVw<DMPKcFtR<gj?GF1InLTrSPnp*#i2$LJglbn;j6
z;9R&PZ%f_)bn>$z`B>gFxso@wS!Oi-8<?AJR+_#dOHEBTs*~??c}_k=v>h;K^7ES^
zqBqPU%!-Hs)JcR<p0P$t<G^^<Q#v;qFI6XM9<=>{sf*1kx5SSJ9|A1x-o5&d6Jggx
zVSFn7642dz%?u-N!n|3S6(c<k3W?e*jOr!4Cw2LDZsnFZCJqB?&0PShZY|2Ye)oX9
z=GqR9r=<z;dGkU*FQtK1idC25bLP`1u=z91Fn=!0=FN)vN2rr|qnu*Ps6#^Pl}*(f
zqSaq>hz|m@{_mRO*Q-1RbcjE~CZrI*YmQ&9^4sBYNq^;Vbu)ad1IzmOJ?m!0$KKRc
zH+5w*G|C~SN&k*IRL8GZ`9GjluXSh$92ctN*Q@LWboci&L-m0$ci*h2zKl9`-zd62
z?yyjjM(C3=Lfu9L%s$p<gn9z)<Ik`>LhHB02ptBrk0Z_SaV*R}%!-ejsgn<*dW1S3
z9;)*Qt<MO}2h<3y4$=q>0(pcc!DvE^&}5)Ts6CC)`i#)BM}!EQU(pQntH5mDte784
zoy;3Wh<%TYC!MQvqw#9>*E~YI0<->JEipoS0UhGyu)G+1wZsTL1GJA9%<%CN%s$MD
zk6)>i52HH7t&a*TL(*jQBE$j};vv-O>^&Fc5Wj%YgqX;0fo{TYkB;Z12{Dno0NsQk
zW;9_hn44f$ns5PiYJyROIq8^C^7mA=1YvG?Y{2Ye2PSeepcmt$v>10_67QqH`X4Yu
z{}h-{WwWCHztpu>%D5>s$|0tG@cI*^cITG(4QDq4t#*1+yK_stOkCi^c$C#HY=+v!
zVeYnBQM)yD>b6n6<hDC5p7*dydDXf&dWQn57w(OwmGXO?CxNtYTnF-!>vp`%@6Zza
zMo*y4w_TB+FOJ?5DX{sI%`ksD%u$&Y^Y>AwsEq2U2Aw3eJGaEm=`KL49hlVa+!D8Y
zQz`HP`;-}KpNHiLIAG0++MlS?0c%u8HIYd37QZu5-3Qd}@CHb`!%rYbbxpg>@7fYy
z8=nBQ`SxUf*OvI&_&k4$M>(qb%rL(o%<I&wnBSB-ty81g{3X=wn%3#AE%EF63)DKj
zn>v}F2D15;PoB+ud=YyMpv_MT^YKOO%P4U2SD9h{I+&9;E9R$Br{s;2`9{m|Q$p%V
z)dyV*mo0J0Q>A|xo6K@R>z|(V4`Y)VMS;V+*bMz+V4gFxqW@{?G-pP2cuSoYYV*n+
z#>(vr)XLo!r0_<79NtT?d>#yAY~KLdeB0P;KE`(Q(?f*KZ()Y{ZD2NUR?N3kC-X+x
z{Gckp<wq*21DUf`fmS;(sXdT6yO;v29b<;tu`th>SyB5mb(%AyI;!4hgxVo#&JJYG
z3RG0<QKzVO0y(NXU^%J-TVl@c1KNChn2$MI^2`w7Ia|gI^DDwUXJ*CxPSk16jAG7a
zQIu{y)+lp!!dVJ$1h*ci0<C{i(m#S*kMAjPct4w=zj1b4-Am1^=wF*UEj6Qhsm(Z7
zYL8`C_zY;Z)05g`*%bzz7mu>qznY<TSD2TYSy6i)by{jh^-{YKSNYas1iQjDKyB#{
zgLLchKFCY$Pv^+|2yQ*L0or`ql{u=o^>~N^n}5^{^H0DWm02<WJ#~u8sE+EA^QHD!
z_OP))s~wos9?Pvq=TY$}4@NgL)Xoj_V3-xP8&IdgFsh^agh=xSek@Ua57e%(Fb1?M
zYyfgp@A0&GS2&(6@nfLPw<q(*vn6hQ!JO-KJ2TAh2=h8ME9Ot3PV3aDHvcMh`PSoj
zw!{LpPQRf}=I6aIB-{J}FwDo6co@*;Cx!Xg65pV}$-iTU`S)Q?-mI9P_o7gy<c*T~
zM$06M(yhl9WlP-s;&^1%e<qvEK0xcAp7fu|CeuNI=j;_T^uG@CoS7B<-7g6hnlqz1
zyrI<Pm3t;DcQ{Zh_X3c@y9?y-=DTz@^YMydVW7>oU6swpt;a|TZ2oLB%%2ald9z}E
z3UxAXl+6#S0^E1B)Sk_p9SpSEfl2Mz%-JgxSnca(sC^6OIWsG2yN?MKnlqz1s=rW|
zZ#~Xt&I(ji$55xJMuQyHm$1A}&*s+Sd!Ws?hxwSZ-7gEVp0mBoFuy;{b7oe|kETv@
zW)yQa>xy{NQ>r=JtIXLYV-?;D%-L9=^-oIrUtrF<Tp5pYc&%pWp9kipW>)lXM4gtJ
zQ61i;)a5yQfjPSYsPLWuDZGzC4sX+|W-}jiHUMbz)5Cns*@G0={3B+Ve;nqOYgWwv
zK%G{uQ9WnZT`jd8%-KyqtL<}j6;%gw);4a=sOC3AZEu*>niaKwq)uv$>ZnFjm*=d5
zIV(_6-AA3GdJW{L*1Tpm^KqW7548D#VLs;U8VWpT<IOOCGt6^lR?NRao#xCa=B)kN
zka}`8XD5_7TjM(Ef15d54`}`EN&nl-*>x0nsoiK+^us)7H;WbhZ^1NYMs;{=T_0-m
zoW0GQZ3<L)dw~?*=^%&q0W5ElZ*v<u6KM03!hFox;2T1O&F^N0`8{AZZ&u7-M4ikV
z)pNG~ct!OtbG9+iYG)<2?=ojMQed?=o1yl0nCHx_sC|n%&6!ah)yg-9+B|3PGG_%U
zsx7HgRKq}y>M>Z3>RryWXMr}~XIwTPbGF8W5aBudof+nT5A&Rv74yTW)0`Q_oV87i
zCr#(s=rU*b0JHv?%-I7#>mQi(&t%S)yD2W^@K!QI|Ee%AHM63Bcj~m%jOy_2r7q9e
zOy=xypu+nQr0}}k9FiU0o-oYEc{U7a^X*|i=Im_>Z2mnn%zps$$~7zI7rrHwY2_N#
zb2gQ_toC!}>=~fdPDyG%XU_WGI%iaCnW1)FnAMsUwR=-1wMKPRlc^ih$YSEH;Q|%a
z>(nW#Z$XY~``f|>=HomY47B-KVLs;U5ehtKPnco;S(xX{teF3SI?b6;%-OixLuxwD
zCY3qcZj$u>#GL&VX#IVv53ZYkV$P;g;HCDYS<w&koINX6^#2UgoEg>O?Qln^&2#n>
zbG9c?;hhRnc;i71ujS6!%*UL~1+@8rVLs;USPE?ZL^I5v46}K&V*W1bWZtNrv%z;s
z?JvyPP@vV0N@{;$&Yq;eYM(Vj?TawanORZ$Gj*CXqdKZB?hdti&VJ!MD^O7#NS&fO
z3*@L~!1A2^!g=-?(B`Lv`Ixiq?g<f|vz^Q^zYENBW>(CfMV;o%DCTU|z44?cc5XDD
zDRXx0<bYZKnqA^^-oF8@e^%1JW|#P!_hSmI|37Bv{|aXPW<`IW`$C2E8`a?*N?o3_
zHM_*;yzM}RcMVA4O$9l;CH_8}`Ixh1fi~ag+H5}N>>>)haz~qC{tB4q%&eGyjylbm
zQG_@AA0hRK&W*+gCA=?zIlPSs?|Y#24@vqrBD~%2k4rhcz0A<RKg{8o75$^BQ+P&o
zcpp=j!`q1Pz5^<}-VcO6g||M);avj5e1ta^X!E1Oe1zBfU|ihc&0~i7`CtyuteD@F
zI)!J%<WY1;6<(($`98=l4~3Gfzdzya2}FPX)jwClssU^?_b0?BfcEi>89rWsdHXag
zK7OT6+ow?-;=qSPbq=vVA?^-Ti0vSScrC~w{sN;35n}U{cn*iykwWa>B|<!k0-Hb1
z4D%<!Y~HMxzl%DVH;NF4JQAu-?A&N9UPAm7nDq}P#4mx?-*#Q4e=y<w<<Yp5CvaCY
z^zQ-l1ez87qo~sa8r9*wM_mqYFyYMtD!h58hCYS2CdlEP2g7`XcL~tu`-S-kuk&MZ
zafjE{4D&r;o<Or={`b_iR^!1pnv5d6=@g}(vt7G{xAWtnB<tU!OMK0E51{q8C;fYL
ziSI_fMuGLeX@>rHVAgL|^!IuqR7k&34sTH9?dqqbc0ZQdc%aozN^19Gsm=9tJj!a@
z%uqW&%%fsf)NV|jM#ZRJYWw3Ve=NEOOYLZ&K5iNV(o*{e$V+X}CuM$*F7b`5-vDiX
zdNRL9m-txpTne%&{>h4qVR=?Cf3@rQj+0q2-$7mV(_dw&8P!p3^o-Q**Chs{KhSFX
zT%V(g!MKA0M|F=GYX1)N!ZIsrKch|y%czcO6p`j#VLzg}8mL`i3P@4?8$?vq*X*ij
zn>J2=>|XuRGV0lA!~>P5t;*Of`JB2lq09XF$A*^!x;q0?cP4b{oqufjJPO>M3(e@x
zr7$mKv(lYssMA6=s=Kq~^Px6(XF``Q`Fn(`0@a;uK<dt+ATQ+SVR_G)&}Fe~{uQ9j
zw}<&Y+5F}&gb178!VL4<z-->Em_LO&nKw%2+x~n{qp{(O@w~}r^DSN4wyxM9v)j6~
zZC|m2VCl;Z&6B$1$KLI_d*ZRrnN5zz&hTUZ-lfqpup^#y*-G7v>aY2$&?f<N-;U@K
zUxhv$NZ<0^(gUywNy-sj;;Yb~1AX9rZASmThvmMo5tx<!Ek7+J>cBOsH-h<Jj=HS+
zX!h(yfmS^Tc5_mFG<)~yK&O6|8LH2Nc|$NOs;5w=4Z$d>&c9-8N?Llya~^K+T3GPf
zoD~Ie9!{dby}ic_mH&Xbw`N7<Eb7!-qq?_Gk^w6sz8C%i(7kPbDI8oED<ZxZzBbUk
zUC#{F8^YXMv!eO{>eO4KTJ;;R%(f!pXqXPP>IGhn!hF|%G&jEefmS`h4Aoo1tlF%o
zK8`x6HmX(s=JgP>bZX)8<@g19W%Rot7sT;<4h2>_$_%v^!@O&m6}693r(Me^t39Do
zI3MlITlUc$XiEXLWp4snpn6Qx%6Vho3sko3bh+*R8Rqk?%<)`qC{VXDCxP@0uj@eN
zd~9idBd(PW#3AKC?D=NEypkX1KwJ>$?v6^`eVhaFL<-#9lg;Sv=`eTKtaSGu)Tz5h
zb$5IIE7a!hKF)!-1W?`G2&C=~1(og&s=E6i4D)dyJ_fY;DPcYi#NWLYB5ZyGGt6%U
zvw5>(egt(gZ<NgELv59}<9YLe_;eg<e>8hG4z*oj`9OR=4#ewS_fkCe0kc=)v9I{C
zuT=+PzjxwEhjy-hab{UGlYzN!U$9v}1oTGoJ?y5c+09?@B4(L)<C5OAS2Ux4tHQi#
zo0a|zrB0i+QN4KXrEb>{6K~)Zs9AcIIxU{BKwdoCOb;70Azl{j2=wA<jL+s{UwW7V
zn}5s<^H0NU-mIAai8`4#O6IF?p}iMUFRns7qlCE4zooxZ*LVfGBhVq954%3~ty9-H
zdG7~$2p%@W$5fb41GD1e8|qrCC*hT}Q61uz?}zHV19a*dA?^&+4sbk3AzlS?h(Ew`
zh@HAdi22U1(%}&MrVu-IjS%-ww}#dKZ2llK%pVG~d9z~vTIyupC_)_gK|JZ9og0lS
zONeg(v;MgVaXQc;F8pCUdP0&t7a{%;=*2j|3?JLTyco@jkH1l;#b{KA_$qa~g_sDj
zK!x~U>J;L9AB7KxcnFLpM2JTM9paD_;#`FIE(JC}!wmDEz-->Em|yhcP$u(65n=~L
z2UPC`-Bm)|^^<sH*1u5K_&K7zfYv`M>0hX8{2b8>6nGK7WQP9NU|xh~MSs^%LxmQh
zQ4ViV71*{jrM5R}{{pnyDM@W_)ILgq)jnZ{+Gk)sG|h_IpQy{J#6Le_R4=u`xXMdy
zp|0`Kc7a-I$5N-IHX7um_6>}v;-l@KfHprXqKc2)_WvweX7dM`Vg68<qcSVzucb~=
z8P!q!@SoYD3bl>T16pn08*@~lb{GX#d$1X54})2)Sy6i(by90oN3|tt^VZ&*sCEV_
zs^dY5>M9UXRktl)gvrCJIeW6q*=xWY-de2Ow}EcQkkpQ~Sh<_enlrpD%xK3pFt@|3
zw4<FmwZo_m@0qU@-rA_02DI8yN$uLG-Qepv)ox^l+D&1eGqa-haOyN?Ms;{E<0{YD
zT7*}i!uy&!h1d3FNcNl^1S6_gxrYO7eo90YEBD_Nc;$X*hWSrnj>@c<UzBbuDx*58
z!@ij<s!)3j&}thKa#W%AeG07hBQw-~2D4hTqIR+Wh6<@Qs-t?GNb{VnO;oP{71e)1
zifSRG)N?k4=X|P~vwxL2+v!`iV*o36H=x_mH??B`EB7%9+>WQrXvcFfx5KQo;|J>0
z4x>7}tA3E$fnDP*;TwQfJ0z(c*frh~?)hUp%4&O=p>`pd=gh3A{S$SXGow1Zoxh9c
z9hPpL1`ys*pu#&Dr0}i>dCq!$FY^Ps#>!m;X!D~Ys#v*aQegAvnqmF|n4>Z)=BHAp
zsEq2U`u`-g1G~oS^{s$b+YwR4>-EVLSnd60sC^jbsLYDmS=1>iqdKa~h&0dHK%$xe
zR8&ub^xn;9pnA?m{T$AxRCD%qnX{#SQ9E{I<*op9I~o(K#j+zS_d*KXj!VsG$K^1G
zXI9$rEOiRcsBXv7{}*a=J9g|EA7-opR6Di@Y2_XP^2&W1wo)2|9a#%AfHvPZ%*R^T
z=GPFBGv-sd9bvg4m>=vq-i$XZ=Fg^1Yr&{qx%)OYt90@&%<aaOcregvhbOhWu_eAv
zfunlM47Jl?j>@d4owrk{P*g^BRC`mmf9maStlR^DdZ*-UkfOR3<fyte&1OC}nVvwK
z9~b6hlR1V0n?Jz}^M8Zcyjd}SCv`G!l+0J3|8x$i*H&}ZqYt0|JO|86?I7mtWuWzU
zB>e|5XPY<2rL2DoGxTo*vwpLp|5WOv->6<{^LCBr&1w&4sr3e0ZR4g&?cpr7lPR#;
z)6GzO7R;NZSy6jGb=|A3c8S+#M)gvA6Ib~S?t@rrp8&Pgy0(NXZIUa4ywuKs5mlTJ
z=L2oNZ!&*S*Vq+0cZrMJd{;Bf_kcMnvtqs<b*=w1Z&XJ$sarg6R(m+7?_{9W4o_+i
z?;3-#RQEa6E^CI`6=9Cbtf(DKouV?TqnZn0!zx#mn0P5YA5a_knjq~8gFy9p)@Th6
z8&z|*beXerfVmx~vvMy2x*g+EJ5Fci{+9y3kMpA$?f4azZL+1Cm3FMsBc$nl9HTnC
z!{=6bBT;)C5ViSJ;d@}$rM`_s^>ko!S|S`oGt5x^G0Y*F71ayR6KWKqQ61uWxXZ88
zPA9|y72<8wDa2<%p0^d|QWH*R1+NBl6ZV7MlA3TjD|rmi(Oqdq6Rw3hI<wM*SEy5T
zMs;*udP?<3RL>2x>P=wzgV2$vJ_Kmhhnu1L7?@R?71cLVC)GxEbO#b~j&3B;6{zUW
zp-$0F1l9A`uPr+8)%=ad=4Jjq?xnU|#)|#|==tkN^LH65dC&RdQO#P(`<l^?17O}f
z%}P7QQP-NT7bk~N9pcvuO6?WA_5CBzY8y9KYOmle@4Xh9Q|*3cs67zoxic$jucS_M
zXH=`bY+<RrrfYoHeH_qg`zE#5bdB%2cUvSL<)t>a8EWT)d8wHdwHs5XrDl}W=5JvP
zNg}RCW&2_w!YVtWAXI)xL9<rCr)H@99Oj;y6_txF9#5#A8r41heu=1Cx_aK`@vMLX
zt#(|s>r@5t$<@9~#-ptE05jAc0`qz`D{8NyPV3dER@>({Qagd^D$r_YCAAZnu8Vn;
z)s8Vk?O2%AniaLvsFPZwTJ6(p5G$qLj_w-oigW<2w%@I}Al?<(Xx=${yQvv!H-~v8
zm=(3hQKyw)l+~uMNpw(`Ut5l5YkU`|t?~arx{LhHd?DGV+Nm(i$6e&vK${;P=Ho8%
z2MQe3FJ_qU#Exq7X2tv()XBV2ZT?#}%tKRDm+_Hk%K~9b=9dBKT<8z7`3bOW{xS~7
z+krMeDVe{FLvdj?UYlRs4D(CDY~HMx-;O$&H>%g^Z+nN&to8~%z+Dq)wbPT@EBNg1
zS_-Uoycue5f?2IuQTq;cQfpMJU1d?Jy{2n?18Xgy)wbPMslBFad^z_D3as{OGt^!O
z^B!PU)J~(WwOTNoutv4o$?U^<f4v^H1zK&tr1pB$F3zrOwM&_yc3GI!niaL%Q75%V
zwc1DeL~l6_;#1%Pt#(vWJD!!W@>20At6j|uwQItx)~u-Ai#n+_s?~nEwA4;u$_ljF
z=}GMbrflbB=2W}88EW^0S*=-7doguVYn0Wd{q;ib8}e6Yu3?KQP_NSOpiW!N3!t*a
zq{}rCResSwp3BV()a6zO>2lkHe7QB~Wxm`5F1ImIm)jkr-TxF2m#aQFTsAt~vl{uK
zeRu<?%W?r%s<byx<>;Op=&<_Uo(tjw`r{~Qt{zoh;+<qhSf{`omRSkwZt4`4QN6VO
zvZB;Jj@sRURy#bYeViMOrzx=7=gm+%4Q91wMQzhcp+ahn>ZP^D@==$M?x`g_piZwL
zAcc1-sH}xSRikIXh$`M|{0wOG<07hfOJ&dsA;RYWYKHk;VK#48%%4Y{%p28Fbz3=}
zH>-V|U7#n>YC9sTP<t!|R(qluYEOn)tyxifH+52LR7bTdk>)vjoTv^2YR=9ADXQB*
zWzL#Tsp?w>{x<AiT)hi)Tp6~>z}%CUcn$Ipke=lCRlbK^pQiF9UNQY<mAIsPvz!^d
zSqbLen3dk_LY;bJR44hyYVo{T^&91jSU{^@q;C{f+j^t<4PKKB0Qz;_HfE^a0haGG
zcn{00s2)k3UgsIriB7^@eoNsc5-m`}*Fl{&;{Sj=d|R%nCcMNA!}dTg=f<QQS=`=C
zp}>(nW`_BvV2;eJnE!=3MP^h-cHQby`v$j;Hvt{lyRZonS$qeuZH;)eRnKpR>fSKl
zLYfuTf1*yekVZ*$k8eM0G#+40rR!Der!D`ZzaNqJi2>sab>9M&;}SSgj+=qXWVQ@h
zGmL#aU!##e?$190b2cz9?-^a=M_?`lx>rxZ#w8^)y2g*dd=GT5em0|5jkV&b-7B-w
zt5v8|uZ-%1N3Ijs%c?(QD9;00^^>rPN%e;e<@dnZRGSsmzl&$_x@?FQ)vLp_E{&4v
z{N_vFG#sCx_L=qK;+~!%Q4nuuthas?c<cCs87eo0dFwDMDvzK}TZd8I+uj?-^XA^p
zWSd?R=-v*3U6*=0lWqERU~>h_z0p}_s6G$o-kKHF4^XGx8r8j>lv+0nwafRLv$xZu
zAl}#-O@Vtm)(n-`z}#E2qVg5$)LWyvxBG3X-hPeRLxAq>4X|;kw_nrSSAp*B8)m3}
z8|G;<E2>*J3pJWHqpUjJ)t|&v<`)SwI7|xE?ZH*lX%o61<iljuwbg_fUE^o0)&{x>
z!(jPM`Wap0XRNLOx(QdC(S++@USMXW2_4jFff?1&4O~~MKjcu^324>lz{aQOKIGNt
zLqIRMsb;8t3g!i8R#bmWofe!?t@`>6LUn%a{0XWHM0Nf*5&Pazsr>}CZNHC4S?vO5
zs9gkRwPr=_R@6zYQLXlwKd8quxy_gcbdS4k6oqU(al5fDu(@h{*?N9&hU$JWuMD%I
z`e5p`GK^}~O`C+6+~ZlOF3>%0Pd%Q+yD10pDEIg<Gt?dhbC1o6+KJSu$40f<1^%cW
ze~sG3fbQ|OusnTVqxxi^dwjYXs?UPC$7V(KWa`voqpUh@L1%Kr<`<J6vIUI+>ON>P
zNUu=-1u9!my4)L_h52%yaJi3xx?ESrN0(a}<jb|PMDyila=C?py4?C8ZIHWwe7Q9?
zj)?Nhu~}ShW1#lip&(uE6p$~s6We@N_cfQ>2PkzTLAu;T5SOc7*!2%-6FWB=m-OKt
z@^RowRcD)9y2VFe&jG!px@;bgzCH@$Eba?zu3#D4wai#j>%zRG%*v7)MxB<FQN5&|
zp)R+jrCZ!uzX4QRegvs4i~T8llr9gdr2h>@6XG4<Gl1R&I?^uC(k=F^FDS72ugx(3
z9n43%SuwxDpF^3BbfaXx`nQb-MA0$TtMfbh5aJPA1kCzZBgEr@*5A0Z(!W}_2=7A*
ztp8Ip^nVU>cxFZal3Rufg=bXnVxzW}+BLhy&$x{ST5aE?cFk^aOZ^K4R@-TtxKd`Q
zZGpMlW<~89)T!G>^-?<wS3|1qSFMYsR-l&JMbv4j-2w7a>#>#0uhuP=+I&Ep9}-c;
zQag<Tn?KVG^XI~B-mI8^ggTiws-s$TpwzC}E#AKW4bW=GMO5)B{7edbJf3HU+KXU5
z9?fdC)aiJ9M5vDHR3go9oUBPy1uClRsZ&&sfa=Y1$hP5pk7~|dEOYi1Fwa?k=IjTc
z+tHEQ(VscnbGx{d!`s)4b{qinoSBt&jHOO<W>kmw^9~Ac0CU!PP(Z6~+*PR^z?|(*
zfz=*lhT216UTS7V?N!uisTtMbeSxbyXZ;DUK!w+~eHc)9tAaddXTXRm=Inf+&G${_
z`?IAt(K4IwVutxvn9Z9N^M9aD=8fv8PTes~WwisCv$KI#J3OL_Is1wN&)IiosQnq{
zIWsG2SK295XwHo4sQyEwdCmq9Re_3XZk|_Btpuv)tb^ygy%P6nnX|qAqIL{s&JF;&
z9ph3v1~X@`QsD63FryuB!#rnZr5*G9HB@NMjOy_2+gWOdqV{2+)pjJcLs7fzF7YU<
zUC|7+tH8X}%!=B<)M=?1)oO3uO=|aMSGXH!wT-(gwR^KGEVBEYYL_rW?QdXKYgW{5
zL!Hzb)k|$(GRW_t4rZwx3Di=%3Z$j>7|6TAGJ|D)FsJXzK%4KI%nxQ)xQqfvb)^~R
zuYq}CnHBRdQKyAvR7bV-u2MS`wS$1D%^$j)1)GrWQHG-WexOx9Y=-KoFh^!qRDVsK
zA~UL0Z!|=z_hv8a547rbSbig5?{2ZD-3+wq+s#mY7tE^7is~8ENwrbE&?YfHc~2Y4
zLMu=Ut%Eu(wEuw0LQ5}FuVH=X%k9nO3e@Eup-z{32UKs7?L(uXmsGbR-In5(fB8Ms
z<>T0NRt0(`4^J!kI5wS&DR6hkn9<#_Fn8CiboXiM)Lo-`O>DKd)Skqqvjfm-$0fBV
zvFZGS0;_$<47HEKtk$fk{en8FHLBOb5_^W){GRD?gjb+m!*58P!W#@KYhh5e79NA;
z!~HnM_*tOMcSKZi%fH@UA;RW2G{gMHFq=0k<`1Jz=8fv8{<%*)Z&rH}<NFTKYTNF~
zQN`H)ao>29Z$$=}p>`WsR>5bbW<~7@)ah2lsE%qggypDCBB}xv)t=NTs&-I4XMOey
z506)KwtShhYk|2P=XHx;12GZkcJxc_IImlL%VhEW<5JC4{N;U-rOar@vM`?*W~Cj2
zsndyJRJY@P>hiX8UbpyZkOH;s{EIrZ<9m=-?q0*f2Ik{?8wUVwet4LVZ$i9Ffz7{f
zhWU?RHg8tUFLpqvka?qe<#swyYA^1#K>kgN-GEj*DXG1<+rrIzRq-55fz=*nhT0=x
zo-?ze_B!e`XGV2Y-%>ZC(ol(M$-ix^<)E;jsFnjMsy~4o)pf8O)y3Tw&*pCe+Whn|
zzf?9q|H1KmHs9L}^NYc3-mIA4iaMD$O6D6)FIMkAw#=gP>}uHhm0=rtNXW@1uH{hM
z4`>r@_f}22mP73&3T)yvGfcb*^LjHYCVC7H6<Tjb^?JMDFsYr$)-VQWwf&OXiEIr`
zBjQn3+r<pEtuVLTtf<|9I<?%WUT?eMYDijd*RoR<sP%Rdby{y%g1p{-h7nb~zV3Wz
zc(eK85mg*&M^Rw&$C+XNB$%TzE9UQ}PEi@vQLT4)2+L|Gc8iUyAJA$iMO3ko-9&-a
z-e!i{J7JE>tf+mTIz?ra)b`k=d#A<;tf6$6R_kWo{B4qR$T{y96Upxipz?bFr2O6i
z)hAv15z&r|s^J?{hVKVp?$Lb=U*pKAbdRQ|9^J?A4Wqz4I@pXJ9R_ovW~E2hQKv+W
z>O@yOCWK|Rk1~9HfmYi#xl;Qm!*@9aR(q8hYOjSkQM01<W$KiuQ9XR$;wqoZ_Yq!!
z3U9%q!j-~X6XfB$3`SHjeB*#N-!GZJj~9epj*g4x*7DPhZLr)B%+C*VRA$BeAAG&|
znJ}X|s%gi@^Ac4I-|IlDZI7s8_%=B%9+j1$w!ax_2f!SaSy6i&b&ATU9==|RzFIdi
ze9NFV58tEYw<b{e?Eq4KM}q3%>vMb<8&n;G2bAHv6qtMT3d46LkRIii`Y*$-N_)dA
z4B@Xp_p0*=>L0!83Ujh%rB`cHmsd(8Yg8}m)2PdL->=Y?0(JL&J#}i!BOov9B~A><
zG$EGtvOq7CNok?H!m>V#0-Hb24D%PlY~HMxf0#O%H%jIkE$t_T)D+^0CB$!mS^soG
z`~~O`*KChR=Mbk8;_g8E*wYLj`@-zQtoXQuI{7fFL;Qfc9O86BEKngf|1Cr-#HBzE
z@kAI+h!9T$I>hNI#OZ|iDFrtFxf$lag4w)TF~8)=Ay(#%BE;zwZC-_VVF_{TQ{s_X
z|4c$01aydJ!*Yl-32`#eKJGWe$HOrDFe^U(L!EpW)gf+pYN*a3&LqSF72+<`Da2zy
z4)HY@O^6WR20FyH`>GIU65=MOg$SGPZ-)5+Fq=0k=8vLI=8bZQP3KhZTKb+I&%DK4
zjmG#Y$<~_|=+x+!w9VT+e^vGo3bLV}y2V#zp8}Rt2UU~3-I;NvT=)y|{}t%MVdtpu
zR|*dWx^VpYD(u`nzF~1I(7l|M^#0T>zMZ@3$aqw9rK!Awu!b4ETnFak&#d%vPwI61
z8Kqw4?~?WTdnMu*R`XN%wutA<5YYj%2(u!h`K&M{5k|F$fl0)#h?q`KEn<clB0h#$
zgjo@>*tv6vFsengClQ^y$9E+b8Wj(+h(*m1u_VkQ%!-JC)Tt9jNkm?^pA#GFHokB9
zU!YgnFSM3*8xz}ncBpn?-}8dy!e4ne@^?TN{_z48cJ3Zm+!E+@J1I5lr*82T&ppqG
zM>T8R?rTP)4uH8)W~EVMsZ*nj>a=FXx{a5aBk*kzN0}kwIG9D46%n^nClN-qh(7<w
z>o!D;r>7QilNlmzgIR=G5%F*8B*Lf`F))ef+&x6xOI{Z74>LqO1hWXUBH|0`B*G}K
z+Y_qj|H^V4R{g!&PU2fLy90H@+z!%tbS-G%>Y+`)ZR8ifM>Dg>rOW-w<wgQ^xd|X$
z?j?{fcMa2*)phP3>TU$;a!-MDxlch{F5L=VxLKvD(HK+RMLbZQJ^3YkuZsevel{=P
zecn9GD+2TAE#E!fOYRHwM75^~m+v0$C0|N`BfQ*<2(N-ULbDR#v(za<qdLMxFAfX&
zHP7-iX(gbVv;{~_ItWyHKB(%#Q!vcOtH2k5Ha{uM$E&~%F9{Jgzp)wS{|NIfg;_Cw
zBz3KoB7Vx+D4B1x^tm*i^tdX~*UEk2b-=8D-R|+NwVQzG&#%yCz-~^_uiHJoy*BUY
zxTHTHS<nn0i^6g^d_H1Ud~8jfJ|8ivL%g24!$M4~^t*t%FMJuK5dRBuhy%xj4Vn<&
z2ipng5ND+j*X<r34o#uJAwFh?`KMqG(X5#NfjWg~6d{hgETkS>g*dZ>*!^<p-;5C3
zfDUm0?3&cK%?NP>&_0eb!^d$jPoi1zaSL^tM58*y<|{&Vp2W?V#9ly6;u@d@E7AP(
zB|Cx~;@z+u;%40=#QT8`vCsY4e1y2-*brg!tC(Sab(qbY74v&jC-X)TV%wGRq=!`@
zHvNWsqLIL?e>=9(^MDTVDcE%>#O>HlzXsaJcV_tb31%N=#m8^23W@SzREKy1b%%$T
z_-O82phCP2q)F@mImDH(j;Eywv7N2~bch2}h}(6K?Q|RkHh;Yt<|n{x-mI8^i#nM%
ziV#PQ3#oss-fiw(LhN;o^zY8AqD6oXaU0m!)VJNa9X}3eA19gN;}n>86tm)E5_Q^9
zjB<$S5c~K#sUFJy@deObZ%<tx%Dyr9`goMp?q-JCJz!RAR@7chozxoDOYT=(<#(NT
zXWuALOK!<)L!Xx1??GO2m%;Lq+npB#<A63lDVg7$JJ2S&<CCwe8RqAL<)y^QXI9Lw
zPn}LaqdKaKZkR2q*ySz<TJ7|RDt5W%@pDGi&8(<}IjXtDirNifipr>t>O&&UTl`R>
z`WC2jdO-{*s&zp`RsDzn&zUyOzGdEax>4;ol1+0rAnnMvFJoYN(>#(*b1Ja8nt}3e
z#FJ*U<yn~b4YSggpQ+QnVN|zes|lexZ<<GTk4<w|pjPmSAhl&2$Se3KSYE+Lc8^W7
zX<|Hw*F@U`)e1h6O>;N}Hh;Jo=8uNiyjd}S3w1JYl*~6;hTIgY(x&;R65{*7tp5}?
z%};?2amkzG(RtH6g-vq{pnYs(hL7!GUfX8HM>};|+eY;y-gj$=$*NCd(|j1{uJ=n_
zKaEXurQ70BR=cVhYS(~StyxjK7j;r=R4=&~ag{gCQ<%g8wdB5_PD^gCTSBsz-2N~k
zi+53m18shIGJgu2=1UZK)BL9y=HG&O(=;pQTj{noO`|%hJ#U{as@OD#0j+jiL=~In
zD-<}Y*UgGrn4@}2tf-xPQm9Z=Ms-y85^3HvPa~=)fZ8-a1SzU+JZ-&cPU1PcFVbl2
zQRZ#SJJgObtlaH^ZbwII#~4=b6BIbSXU%9w2h8D_m3B1U87k&Un{Rv?XH<uG$lX$V
zIckptT5a2dmD<ZuJDq}NZS6D6Q2Q~=yPH{2yYM}sLc5z$t#--DQahf#Y&oFS_DgEV
zvxkkLz-q^up>`b1YR!t;Y1CDp8<gG})oK^`yVTxGZx;hvZF^FCFTFjP0;@gU47F#$
ztk$fkolKp2YgDT}|9+{Rg4#=gRy!%For2ot2jWpy+szEMbHS|Etf*a&I`!75Ry&ir
zyhToBC43FE+F42MR93>!2j^6~w;5{phgq#zQF|$MQfpMJ-EOLSJCWZ06==16AF9+&
zq_<NkaBrV9L+!ILt2HZXf2K|=!KhZ-^GT_FjNUE?wAvv_?PK)zcnYkx-3+y-!mQS;
zsJ(+asWr-KPpBsFAMA8_Lm$J2{xnc$@24Pb=yTl_Mt$}k4a*B_3~zLt1hn~4$^4k^
zahiQafw%AP%rO5mESus5i&-(hDjTP^Z=*V@weM9_m!tOgK&$OYYA;9aZ4@}FJIzpg
zFU(Py6}2Bxr!g?9)jseKsl6AqQ-N07c(_t~FKSn353<^|%}~1@%xcYw+I^^#TBBO+
zWe-X1`0~;fXtn*4+VR|l&G&FT%4!!hL+v6kt2HZXx1dgHjcT?1rbz7+db>H$YTJ|A
zDfIRp3as|;W~hA-=DRSnqV`kj=KkN&XjH3x^AV|?N^jo-TJ4mib}GHy=FxbR)oyQw
z+MQrlYgW{rN}bdi)oNFLTxuuM+qHpK+n7?Rok(wQq`+!#Hbd?0Fs}r&qINoUS_wwA
z+E<>E+Q;bazkpWTFR6Wu-v0UNc$C#{Wro^qVeYM2QF|hF>a9^$o3`(xxb@51_vLKg
zrvtTp-w4w7{WPd--|2GQ*-ehB{$A}M@!3Uhpf0x_NSE6gw0QN<@)_b+9PqoP%Z=x9
z1?qD1u`TFwtAVU;9K*avy4(~lSD-F8g*sjCZIIRVV_>qnsa&o=U2X_<y4(pMtK0Xn
zSOWQTpNU-VNT6OCT?*3W?gRO9tt`2-lG(?&+`_;<6>M55|NO$ALAu-_Aj~#eraTkU
zuBqPHII6tT`u(#3^HKdOZ%=Is^g?f+QYm<qx1}ai;Dvs_84LYkm>0TPS?J$T*IGH^
zm6lPx&<8xH?!HPp{t8rgj{@lfnJYl0yMwBy`W}Y)cw6dMpv_MT^YLE(i04Cu%^zik
z`Qu<VZ&u7tqE6<G>OJYG7vg!d+JAPBUqXK(&}wHTwg2oMzl8o{3aoaf8ER+2yn~t*
zwM)GiDs)R}R7Z6fb$L(vC+{Hs4XC*q2U1jzfgIJ69kZE_UqZhe(B}I*lFi32p&vzo
z&0lPW`7tn?H!J3!rcUOKlKDo<@M$6S)aq^E^UIw549wwuOn9ANic0GrnDl>4cn48n
z{fC;N{|K1%n-%@zsgr)AI=t_w%i(=YcwJr&wF+-pkiz>j$l+ZN!+eByGtlPS!+eCd
z@GJ3rHov$T=9hxmyjd~79d$Bq6yY^qjVC>#3h(+7-U-00|9iqa6=?lalK$@r?{f;Q
z|0^@}e+#pIv!Z|b*Fu`~8`a?*O<fM}d%_zDRCwb-3hx<^!&~;B@wAwa@O}%l`B`B;
z!n>RTo4?8o^Vh;`-mI8^nL3#_%HcI#RviINw^o0&w7n6cKdsKXrs}WxYe3rqvy)z}
z@m<uv0CAE(in#z*y}8(E?$sJ!41Er0M;&H3dIjcf->f+5{$@yQt)7H4&!~=bKkD)g
zO0U+q8!J$8o<W^%0d4>}&bEKW)6#_aV&g(UH(?-*Cd9W4&jz{)=bO=li(qboS!u#k
z)Ts$Zxe4i{Ui@vTUaWPY{8N_80IhmA*n||_Vy%ld?^XTfJfKxyXol)bVP1@8MfH=^
zX)zil)jc=t*Qs&TTcLWNBxaG;9-CC`URZM6+^w}!e9x};J5iZucCps(`KE4pphEr=
zNFfgcIpk+xIpoD!m(1;&26Vf+yc^GZO|)z2+^*jN-L4JHXxBzChiq2bbs%*L*(lF$
z%c$w0<d$mVeX?x4tGyR6``Ds2zQVl@5FhcII3KOLxkYPyXZQvRtbd{z`fr6fZnL8Q
zJ?a#<Q9Y^a{5#YhpPIP^lln)XLfi+W5HA2Z#F;S6$0t}{18sgtn2#@>4t+mF*!<pR
znBN~}^Jc~T<<!Z%QH0lTMm*^gRn<EsynBIJ{|>G3t;+|2);})k-=Q_Wb-DZpbLwB&
z4E?LYy!gzD{vp(9@fqdt(hl?YkK%c=+CiwD0<_wWq;?Q$SN=F2<@YT6nxS@WSneY4
zS(+8K`%<UR3ytcfb{DSlp|C@1d_G^GmfFkIX{pTud8uvnq0H~l8ZU%)0NQ+GYK|&i
z2t7nWUPAn&;N!4t0P`KL!@OBB-=N!VRUDj%Ms-w2e<HPmP<s;4YWpR%gIYuFrxZA<
zFU(N;4a`xQ6}8KK8Y&c(Q61G3BF$U-AfkE^sHkRv6xDo4aa41ECiA<oCC(4D`Qgd@
zZfuDsQqY`QOH?PDVg7WOcLlRzelm616^v^0J!gj6<I<M68(ZQMK$+hNB=h@&Z2mD=
z9@5=fV@rG%X!GO3d~Atp|0hJ){CZ}X-w<Z=X2tx$)XBV2GGE<+ejZQSwR-dJ+p;BI
z1kCz}6W--O>+eYVhZA1Q7jY@C+*UL6_k?-nnic&UQ>T?{RL|Lx-^cT2wa21%IiS@x
z9;?(Ii`r2XG;66{Y=+t~Fn8OmsC}9`b=#<3YGZMgx5VKrwF0%&9-vN3?G2EZ+J>`a
zemF~QGoa1)P3DKU#!|bD0&j`;z`P|+aUENtSusD0I&FzYbyTnZAhpM$_AQ{*4o_;2
zMQ#5d<5A6uYJeG0Z4Gl&W<~8u)F~>XTJ0TQO6{SXwf6z7c3e_>C}-_&zM50*a%QMq
z3FbQkv!Zrq>ROZg_<Dy?t#;ZsQab{*uLG^NBdHyM+D-mDr`rBzs2u=vZ_SF@<ET?_
zjp{M@2d;)yuBz6>oxv2K#^61W#-Q`pA=zVaAS~|zhjP}A0NQ-x@%(&o*1k<avliBS
zW|;o~=BUhy`332=qB5$ZI{VvjkkyVr?S(+A?VHq&;LhM@3aqyAU0f+M)HcH$m03}{
zHg$^1sE+DoBF%ROBZ%tXKt<IVeTr&XP>CviY_k;Oer!tqST?}EK)pFW5Twf;3*vH(
zmOej){qs9lFJ+hJ{gcVS+}-my4;})#yF*fU&*MB;;b(P{TaVwG(cQi<ch{_RcPMr0
zu2J3HJE_avJ&$%g0#tYZ4N^Ore+kK@yMwBZ?heCz+<NQ-wE0nCKF)(zD9FnQ^RJs>
z{w<izn-%kO|6eGRd81^$(K3mm302i{CA{T+jYnqvquIq)0a|}Y(m$GAY%~Sde}x(P
zuZDS$&5HgG>RKzE?72quAg|c;$Eu>y$ZE%+wlC0X8&6bf$DsBK3R=?D>Gn7?)ZPHg
zUF7sND{B8qoo<wj>ZP`HBh=<wkI@Wrfm&*tQm3Uh1mvamAdINu)!1V|o9~;<k7gHJ
zwNr@5O9=C8!+h(}&vlqLE9Qq&r&|xBI;xL5&lXjv{Q_vU!y~FVAqF?cqgoWz?q)=_
z7tB$a6}6XAr>KnTsQMx-?+RmxY9pYc8Uj*OCxaZ-!7Va>HCy5lK${<z%wNrx_$~#W
zs`t$>{}If)f>|-&yGy9hu3%J~-=Df{{%W?wqk%Gi8A#^u2ig3BU1u{NTjJtCo9_to
zu_cbAz~;|3!~FR$n>Q=wr%)&JM#+5jt^RHywWWGB_J^`1z7NcEb_d~o3bg*llT~<k
z5Z<8fTuLi<uo?Pyhk50i75x`br<H3|&)L!QsoVFX_9URy_DyQ<N9`vRxZD3RL+zI^
zciXI}{Y|e>p>7-1OKm2u@|Ji9ORYdHwYgg3dRl5Lg1pqug%MROwTpo^KO~}xrS^Xm
zWK(R3Ej{81y(PB6Y~HMx??;`sM58*YrRSe5s!+QU&}zpewfFOC>|zQW)fh9P8Vj>p
zv!eES>J*hxt+vx#QhO(7Z8xCRb|kfTa@HP3fz=*qhT3CczB4c@YHy~lHH}rg8Z)ZZ
zF4a?N??&wkK&x#$RjIujwHH!gwU?Tq_HvkeYgW`gOPzXaRF6UH+@W?@<*I63yc%l*
zY7ABfX$-apc?=$a<uSOE*EdsvHs3dyzmq$IRptp1Hov+V=GTTfDzjpKAL<m9Q61GM
zZL>uccLuY7Ry!o3;?7{+Io0lFhT1(~j>@d4y@)zRWmHGC62kJ~csEh42UJvpL5k{l
zP>CwN8atAWGGFd~Ho((>y4-k>F836O%T?bfSs+?+PM1bwr*e9I2Fz>W83y4ipu0ON
zb@v$tVV?!(?Cvl#x_dCpYr(8^_d4pd7L4ld{y<&+UfeTux7$J?MlViR0I3}VK&88b
zs_xzl!+b2QJApPoCCtatTD*5WUtTht2g|^G9<1g%ZjsH3`61NlJTR&UVTFa`d9&JS
z9Nnt{t+w%W71cD3?n@}}*j{EvR9C`02xdj?Gt_AijOwTsUL@4!l{<}<y8=)v_s<|j
zbpXgwJpsdf9EQ&WZN6`qkE46dMMH$m|IQ5a8^CPdteD@AI+-_0<{K@I#o|d%?A&M^
zROaj~VAemQHGcod1wiW`lJw7Ljo&-+Jq0b*b<0c9U(C?ow0Jya*2PjYEBgCVr@KU>
zI=sJ8m&2Puc%y&{?+%c{dl}^LR$U_GVLpB?ac!W@j|%hglN{Gk;P7rV!~D%KZ!%`Z
z{9Dwure?;yfl-9lzGO&!w%R05E8%tSBmG~t#&;%Lf!5!V^nckJ-<dp|0_#884E@K$
ztlzBYzm+=aH_G9qU2W*nQu_;P_XApO<C#kBFQ|Qq0;_$^47G2<JSt{IZI5L_MP8E8
zZKHarwJa5Nd8vKLmOejFOKnY%zGt=*$V=^hSYB#hw#N6&9tGNb-(>#F*7%-T-`|7?
zn_tTe^XtMKm02;rKXr=AsE(@pva>}MYTJNTJ3OKawZ~FmwI`aP_GFl&GAnBDqE1m6
z)luz2q&cczi0S~KqB;wtsBQxhRikC#a^d{wYR<-%IeQeC!|UE7=Im*p+c7S+qkE5-
zv$d9=GrV=pXvgni4$rK#<6!C(o>3j%J1Z)@9;p2QXtf<lZ4cCLv(lVuw>LxWPB4dO
zR@9zBox(G!!<&MueCyP`N4$o35vW_I|9}+Uyeouc&)LB+qKcJ!1kmOi&*rFN<<6kM
zEB9kF%+G{5DzjpK3A(MQjOwUPUwO8uLhZRgtL>N6_UI96zoWove=<YuuP{etR@C<W
zZKzOGMs-wg5NV#X9z^vqP;=Im=T%fIgX%f!;5kQDHzSXfIoo3uwPSHs?*2g9k-x%v
zE$sU0SZQ9I75oCwZF$Lzw!8*&h-RfNT~`e?3el);%kI?Wwk+PGOa6A_!9ca;e2{KR
z?f`iO&$ZfYO^7wo3+Od5JgtevSrf-oVDs%}m_HR}^Jc~TJ=Dp(Q8Hh>@7On_o=}DO
z&l2J%z&wGg5aKML^^Z&XS0TKeR*y@yq?;gKyzF9z{-LnER=J}zEBeo;u1C6F9P>tX
zc+;uN;jPjm{=JkhfC{f|jnJp?)&M!Yb77c|@Gb`0d`Fm%fo@teF7EKUm|?yZ=J3pl
z`3<O3ct#Q4bc)i?0(@7(8@g5~$@<st5#MUu4`}^u&sE{A-y^=&_#y=k?-eujzX5Z2
zW<`J3wL^u%GpfV;3w1fX_1Td21uDEVKnm|Bki%<PXEyWkvU@I|&G!rQ@v{3^3LM^v
zW|%)2=J3pl`Mao7ct#Q4sNaRu`>NZN`Iq4n?A6zm{=Iv|Yq@oS*597=@7*I_%iTnQ
z_1|WO{ySklP0Wh^_o>qlp&I4z231qpww~1P(IdY7yb#c8rzEv|^oVaipGkq$o@<8M
z3t(1jR@6R9ozxoDOKrjRLv3Dad-sT+Q&<+LrM4MJXWxDxFSV(#JZF3Nh*zG^0ByeU
ze2yw!dH#Nb5MlHE%rL(h%;wFC`6H;4d80b2BY!_zRPiRq2|%mum(=djBi`irfk
z+6=Yd!8{mdMePb3hKlM#uCk>Y)lof-s~puHMD;pQQT+hYuCORlh^l%y@P~NLvDGHI
z$}-H^7r-3e(QK060^N@G)Q+RsB!_G?XLx&=(T@FK4$rK#<1*@W*Jf0=V-|I}9Y@oS
zUx8}J68%D-R_-PsuiP78n2$~J7NE^f3iGi^&bRTL<`*=>{30-~1+!v)bLzB78YT19
zKL$q8z-q;8S;CvAe<;cNC$Q}-0JQ#DN&f`4o#QC*T0Y4P{inbjo>|d9i8_U6R8QV7
zf0EjXsBQjpK&$QZLZx;hY7eHsY7a9*?U696H7jawq)uv$lG?e`mOe0vxS4~eZOb`T
zwnsr6JSS3Ml_#5_@^qNjgjrEJnL4crqq?WZY@(iy<p#AK=$>{YwPU$K{e}Xo{oV|<
zKf|ontf*am(@-I`Mp<q8q-7*^d3?umk8>$d<9iQC<NF54<GbNzvzd<@)Xji4-*_>b
zj~mpxD6sj-W|+SpX7gsn{C}vEd868V>*k?$*QycKEXJu@pxzu<kveVV{XsTA9>(m)
zId&`1=KCe{6FA2fVg_w~Q8Uah3A1^#VtycXGH+C`(|ZRfs);>9?Snw89iG%qMD6lh
z#G|ZsWi!;S2D4hTqIL*%QfpMJeQ&FH-s+V^+5_T_ra-G5m(<?eBkpLn*m_R2+nAwt
zdzjUl6}6{QC$&cP!upjREq|MIA`5G7daJ2g6{Lmr7f@MPX-NBEKL0||o4H(Hpf0y9
zNS7M{;&P3aQQJgI(y6t_GTd_9wtc|7a^K+`xCiL&&Pv^VhjU=j9p>!rl4f*wX_(Ie
zv(nvx)ae{Bs@KAmJ4)@lsJ$L&wS77&weO;Ko}K1YJD(Y97lc`@Sy8(=by91T)W)fm
zM7+nTHR-P*!YbRNAWp3%2G6PTH)g0@4(1hWR#fgpomQ+--P45!s;AR9wfX?9wj-&X
z#;J8N1>OzDn4xwo%xcYw+G*5Dtx-K+%WoTM^QkqBQ)?Zd24WCMr`9o`GGBwL`Fb0c
zm;baLaca!~+I(YLHXo<f_S?-F)lO!Z-x+4}X2txu)XBV2ZT=?4B%fOEup1PpQ|oE!
zv>SW~vibgll>9rK16u)YzF#u`4(GsR3Y`4?W|)5%X7gsn{4DBZ-l$%uJN`v#-$m`N
zK&u^|)V_<_Cn>PnXU$OiBFt*dirNNaEVciStTT_>a!mX9lQvqP7G#MigM^H+yo89c
zg-o(9nX+YzY-3^~WGpGmFf=B_$WqaWF(G3&MrGeCMM?Bj)-1i(bspdI@_jzfAMg8m
zza95=|9;2sIL`aJ?`Jcr)LwE_s7<e0^LW+D&}zq*YUlB)^*N8S+NEZwT>-ONv#hq;
z(V;?WjjF-=J11It)tbX#JqXlRy$aG`eFw^eRlaJ?;OI%OTJyNvETAs845Z6-z*+UG
z)$^FJaAWaC`ND3zYCQq$UfvU|;LLg!NO#lc`9HxPjl%f)*|x{VCEev6%;@sYFn8H3
zcX=pv>atPQ<^NHaz9O}PkAz<bsx4oE)Rqm73(2|50}4NXfYF3FyG8&#Vs+(+t>E+g
z&nU3@FU&B%3}*9Y+5E->8;qDyX}<VQtmC6-+wxPzJ$ZTT@>(Hl6))niP+{FeqaZ$?
z+xhnmL)*;^-F;xzZI*SPOPxY9suuah!STFlk!xDTr->O_ZI{;zJ2kCB?Y1X0RJ(&&
z9x<5J?ktwoo&u9vqiV$7z*TyMSjFaLs8@&;)M><O2Zdyh*yXTfeidhKhBiO6G{1^7
zcPWqZh^;Wg{I@WhH_PTXBLbN>s-hZvV!f#1eNBc|J0qfsPZM9}QC9n^8ERjHS*=-C
zyNWugHL9X|mPq$4kEWVd@fI}0h^phI#lO|QQT#5REz*DXF8{Mfu}@P@3a8r_hhmq!
zPeY4;ZPL6|d^+Fy<fyRi@lg=Bo99qq+ZUK&dpOLt&9d$LsFQ7@%65}eLRhlh9NQVn
zcHJ+wyS$lfr(Z@ifrqDwY~Cu~4Q6Pw{fY+_-H3OCEr!IC+H4y$%(jQwtXVd@8+9^k
zRGKY5C8Vfz@vPT(<CSaTsiCBMnR*w(ngX=`@umJQts<=TPiv^Ziy8X6!K~jb>mNX!
z^cz)SJw#o)+wMYGGk|)<ng?35aK&G8YJ7S~_HlA54D%7**+84073L$nk123?pPFHQ
zG0f)8vibGS2xT&F6yepJ8ATfxZ<g=L;mtfdVA8)muLW-bt-toIBE0Q+E$B5Y9%cQ#
z&CtIm%x^!<vi`HE)7wv@6yAU$i0S7_?T(DvGeE2DU8>!YQQP6XhH7^<L+x%bciSwh
zJ%>7V+o)1|4GmAd?b#~673v0{)ebGy_G}fma;qq4TKLUh-ET5993+O?HDIY{e0AR}
ztL;UdzCdhLs!eb0$Cn~{p|a^8;t8#CRuqKFKTzNaxX28Zm%!Xpv#fF&b?T{6>S=i&
z{x{s5Rs46+viKIL2Y_1N|AMr>Ux2*6ho2Qbi70M3j|19#m$%dN#hc~-QDF14%`pEa
z%;RsC%{OD;H2y}_K+V1|)TUR8UadlHhE_YURNJdnsNMOG@hGd^%?!1DV2;Wxt38K0
zMP*b)^$bgWd>PgDoU|F1YWa<|pHio&8lMxA9n~OMifa2-aVs<gX!BD_^V_$ITcOz$
zG%cPlZ_`_5n13JUZ8FQ|*Wj?xHW^h>U2=XnNNRWFMg2;k)y^%|?#PS!3JM(6w`QpQ
z0p_U8vf3>#2o;LTsEX=kPP9LkQSC@n?*g?|D?y5?8&Yyq<&v($oThh$y;#!CfNTGf
z{BJ>Pm1v9fy<kH?T(0=L;={wj;Nre;dN*!4rUFxU58}*v97uQ5o8_gjaiy_?IAPbj
zC@$$PZ)irByTaUMv)tu=)TzrxRhR!x-JlQ?-#?Y1x;%qAb@?OE`emnzzsNhF_`kbf
ztPwki(_kN<N38apWIj%Vhbd@UTp=&Sqh^?&4)a2oW%FNBr-d*o&DS&?|EG{z{vzDh
za)?`xkp6*$xINGzo(Y>!T&l@HLc9&=5dUt5kGo(F(JcFTgF1z1RE5~%l2D!Q9|yLI
z5O)SD#G^q9@nTS3l`_P4VQG>F65?E-L+o9KIFKWK-%CS;%^zTf`Ga9LZ<ftpN}bFb
zMTk8|#*^M(eB!Yrhj>3Q=|8<yd=uQmK<gh`>OZ|zd=uQSE^Da2yBYeof?2;=)<1|k
z={Ksv`xkZTU^<<H=|!LprbQry*W&V!?C?&7VLraU<7}YKj}P<l{T&}tknRfj_RR&b
zv_+U->N;-l%(D3|SA??KVx0K)O`~MK9JOWCCAGuYx9@;fJGWFjj6M7PsD@FUXolJ$
zFyHH#Wwp0ar+XcvDyk*ar9B(Qp8WvqRM_F}Vf`yZpQ73o<f#4%!+hL2jR)F%?Yqf*
z9F{Hq+|Ya*Gt9S#*}Pdc--kMxH;O%*OVNteYHFHq!YkfkSA~+Ke<a}@3q*f<#k&!9
zd(omMBMET^(C5XoX83p!=6y5EKE9!@#V;dgRE^x_SJzV=UB3qCuJ<mxK9bYzYYMFP
zdo$Gj1asHTvf8b#2^H$P5nZRK?D|Gk*RPfS(YfoudR>R5u8+=L2inK8X1VLIdR-UG
zKE8pu>q5VDeRMt5bRFof4=uYsI(MA{tNq?AcO6!*>tb2$R@XJ`x)5E5lwIGc>iQVz
zAD_Dptk-o|>iYQHb)bDbYnHnXtJih0?Bg4lyDs!g*T>dVP1k|$`uMWz<8#+3u-fm<
za@S$?x-OR0ZgqXbt_#t1$ZEyySpVGhQ*My{seHCF6zHzs4NLdbQ~8W#HqbuaG{eWc
zFn8T7`)GD!s8QFAs;=KiUH9_1o67sF+ko!+tg`D<d7stfrntJ*u5O0fRxo$nEUVpx
zI-M{^(e*lt$}jt!p1Xe0U)1$y>H1|rcl`<2qovVj>G}$weSB+%j~`&}x>@$o^X3L!
zH%eVE-{4(KUAm8YmiLP{0QKVhPmo@mKLV{&TqeJh`M+E0H6cDk?G1E@HSZ<!@geF&
z3T*yCGt55(vw5>@{$uJiaz+v2n7@Y9UPaaAoABb?_EzbCg%H;QT7S<{|0{%d6$REm
z+6?{I!>r#d>wked={KrY=9s@p?Q4AGI~ZuS15359@saPV6j<$RW~hA&=9MwaYMb94
zDzq|2rP}nd<d{;#8@#c&eSAE~DrZDNys_wTM-((IrYpa@?PP|^&am|C+*_MvmHnyH
zyIZ4bAv)g{&wF0Eov*MEU4eRI(Fdf37z$cf3o#RxPPJD!)!qWyeC_+``C=jV8y6xR
z(Sc@|{~gQ&WtPp4qD}*4R1MVTe~;%)YG31%xt>6)?N_ROjZfxorNC-$H$&~6Fsn7o
zYG0>LYK<zjXWbcUyOvSC(JJ1wWoWe{O0{paiZ^ZV^C+vGV}{!KFsn7oYCGN)Dx}t^
z8myyPlj};guQ6CB0=twW6My#WFCY!p%b<MHmIwF4%yT-p-{5jj12v~}K>B2^G0wPL
zP19Ku!b0P=#m{@}$x-+nFwMdmt>Z7NtOC;A^gd;~yW`R03sp_lXdQo9<piL+e2N)e
zJ{{&m#w>UFR_b)f7*#Xz=YNElq`GD6_<HjgpjE#NyT4T3vUPkZ^9P_+uQEe*<9p*-
zJR4?N^>);0HjFCO_uePfZBRWKXw{#=lIk|7UibcZv{i3lhU$%BR&AD5A3~i}8&#@b
zcu=a_q54&zRj)oN3X|$~sQy2oRqtYk>fK>hZI)FJqfV-gD%EqRNOeb4F9cfkMh{7K
zM^yg~Xw`?Bq54ReRhwni*H9<bMwROA?~&>?TF0OG*#&6TXT#E8e_5k-{E45tfL8qv
zGgRLXvud-f`W@<|+Ne^!|3s;7**b0qe+RVct6>vk8{<`QI?$?Tn4$Vvm{pr))n8L5
z)kc--ArDA(8&sbKwCej{Np+jnq56HGRnIX)^?aCBn`PA<StzMCs#O1ZvQ)Q2^%$U4
zzXVIF+o5_T(5innL-lGbidCCs)!R}h)kc--+a8wcj;Ov9Xw`4S?kOjvBdXV!8jo&T
zz<iU~#thXRVCmI^n?$p$dUxt{lW3GwmoL7fIoH#RZ_C#4?NNUN>c#ipAiem`0r|E3
zK@Qw>xi(yGI#8E;52VZe4D#h(;Ur9#Yscl@2kLU)gLJvhMDELd#1WM)*OAM80o3JM
zl8`R9HK@AN82Zl$?1thi4|i|EoyHPi_cGcYTgP`-tN=0t=`TXfdOsD!_br_CNL<Pj
zd#V}Pp9%BCn&s>#P^XDCswTGcWAVI6?Jlk3>m=QPR$KEyp>~(n@pk4a3aoat8EUVG
z`F_YOt9^>PG|};!9*n9<U5Tp{-j1!~yDZk278VrVrXYp4KPb<_fFhoIVMG-ljZFsH
ze9zMSj;-Uv_pXmNjA~Oe%x?j6RA$-yK<X5gQ5Dq_|Ed>Nyg_&tXthHls(6F2_2UiI
zZfl0x9bi^#merm@ozxmtQMEx>`Z~!jMAaFnk79NO=?%g_P_<{Z{|*o3ueo@96ZUK@
zFom}_lY0x$?HFISV{ayR6$MRo#x|KAmrsm#tO4^GYnI!w19duMjjDEBNnL8k-n8Rp
zpxW^mNbPtJ<jLLciI9x>n1x<Io1YctV;1hCz-v3n4D%1eJPT&o{Cw&(3r3~+;%DKW
z45{TIIcpQbYxR`$58#mO0JQ$v4~y^yw2nh^6b06QwHf-agIT{>*8ePZ(r;94-Y)-<
z+9MgYJ%Lu+yHtB5qjnz!Ry)ZocN^wI@?o*8b`DI3q)|0$>&^(Z>5v@2sBI3^A-Ok5
zhvcaskJ^i{G-?A{$00crX!AoOsyHP3JRKrzes43(?+<fSX4(7*>J*hx71f8&)Qc(>
zV*$`=r$kh-7zaJuP;GxR)E)-2TC=S7&(uk+Q5Drz2ur8Jkwmo%P^ZGNAe{;$K-D4H
z^SSU)9+C?;Vb2}{rtk(cxsL(ej=5z!1~a)EJ|CBAszb7y8SU5{=0nmfx8pGCbVwRi
z?YNh^)Q-Wl<58g6@eWAsSPAmv4tODKU_NHyXrRs4ew56|EWAvCC-+q|%)bWnESP2U
zjb98EngyfMeDPBT6qSeMs!a&5$4k<GHizW4K<n>U>OY%9@*fmf|NUm@pA55pv#fss
zb<%HCZQe1jNbUKI+F+p7jwschjjfz`fhmb(q}A^DnER=WzOL(-@kwVhrLwds&N
zn^D^rs6+B3kPgYKK_0b5urz9Cb4V@)+WeG=Dh|m({|yl~f07yIPlY)uvuu7mb&ATU
zit4BT)r%??qsglQt+p<rip4mS0;?TnhT8LCR%@2kPNYt1jjE^)L~S}1&L^s4fI1Z}
z2I*8752_Byelx>lc}TALEB0(QFokysllu<P?dUS6nA}U4+}&oyrJCxH+|!J9><jZD
zX_nh@K6N@IjjDFMNL^~jCA8yhpxW^rNbOj6c1ZT*o(sc#%))S>&G!rQF$;?+@GN|3
zhWX_%&w^Pt-}SXnrdcp5%@-f+Q&b+3J#u(`Uyny7{nv3w_6J)3m{R|B9Fi|kVEz9!
zL;p;e^_yk=P2Omr->BNWi{6sj8yU6BfL1%BRC^<%wwMB|{n9LV8|FiDxmZ@a;oG4?
zhon(8YDeNK9g^2EYNr5oNR9&Okh~w{QCs~@nZJ%hvMtc&>msT+Bu7$U^P|i#e>Kce
znPu}csMDw!RZ(?*M`~|u9gEQoXth1(rl?{uuA{(eZ!kmc%`mGq%W7YuPHK&+sLmkL
zbSm6PR2KntD%=jzsW1an9g;)e4d>+{xo6(9Wx%v&w==ol0o{&)Wjk(Xa*um2F4a_r
z<X|(}aSF_bq*-pq&D80TG^*OMh`Q8{+iAzQK((XO`=L+m*a_sxy$Oc-n1yjbn;#SA
zV-{L`5Eu6>v@yecdzfdzESv8`oo2zPG++EEBt_+KyE`U_cgBaIB<X*EL-Jf8`qNiG
zC&3;qF4g1#4$B2V`&eX#k0mhsFv~tR`Y6=Mhfx*c`P8N3=K(^z0;uC>0!YWtiy()%
z$((vkh&Qr5fDUn18R7$c+Bcp8o1b8Y`FmkDZ<ft}NS(|ZMTleOhSZ&l5YNjYE(fL%
z|HTG=4|IrI&x=RjUWWKDHt{H+eH>?ok3lf|Fv~v1QYRlqRfzMbJ0`@$N46O%#F~#o
zv^KFb$RQ4g(S+E<kwAx7J1?2%c7=kbg^2u>m8E8w{|e?~$Sj-hIzPnf7&3|wXHirh
zqu1pS_goN<O!}W@6ZZ!?#LHp#lzn@eO?(LG5Fata$G>0>(JcG;oH~VQRE4<bC!soR
z;?rzmf1pA<3#3iF3FHu$!)QWm;`cy@*t-nzX*O}try;`TPcp;&sW6*2%jW+^oy;3W
zi2WAElb+hF_;NuG@n68Ce>R&q18Dt2OZ~Iiz|9uLrJ8C8w=_fluVEfSv#kGk>NJE#
zRd^3mmo{)V;bo}sW>cpP{1W8w4)`o=U_LhR5TMPE5A(5s&rsm-UNFP_e_;;KESvv{
zI)!JH%nv9U{nBEoeVeFe0j+j!srGH6+UfInl-2HPhT1(~zTq~@YKKv$8*Za2s^_Uo
zw=Hk;#mP5;x^4Liq&@4jBqTekb73i}x4DHK4z&5&kCXX`Y9R$S|G63Fm%<#CSvJ4X
z7okj18O5H>py;Av&z{YD*6T}!_YvXs0a}0WQvXMU_b3I{|8Fz&KLxXXv#kFs>ZISO
z3U9xqp*9_o9}(UWK<(N2Acc1u$l-klOW}P)c&mUmKQzooc*E*Kgw3CChWS6jY~C!J
zpF*9?8&!KYWSOFx&+FP*K&zcns-4g4+1nIY?fYh^odffpnPs)DmWK-MnNbzhQPd4A
zUtH(6jvtoHP<!@A>U1RD0xCV_?-Tz4mZF-^8{5@Zgch5h8|LGU?GOr@7FWtQC1;vp
z{v4QJ56rUpyQtIafl=&P@2^7Db;Z@@<~^IUGGNlboIU##X#KVGi+(L<&-(okk8*g2
zn4$l0n8P#6`u|Lw!ZWHy?LSyZY8NwVF9EH#cd2$UqqfV}4b|>$hT6SgUJSFW_Co5k
z7)F)aAHP*^>*#Ic?*dxw&{Az3y&Xb<dwYf%YR`sQtyxw(o;s;Dsup7wuF@H{n0tje
zK%HSffpms#^i6p38Fn#@sNzQNa-hwRkEmiXzNf(Ee=@^-BU)?oX4(Aq)XBV271ikP
z>qQlJHa7vSc5Xx!C+?3FSZxivlNf57z^v9RtNk@~QfpL2RY#=h467rm40VRJL!ZvD
ztwA}ei;5Fy9^03aU(Tyn9Z<=)`7xTL<bMsymn-kPu7#!Km-Fg%BhbC9U678H=;hB8
zxR<N_6i?`0HiP*{G0VN&fjS*2MrALHkCiDprnvR^J|Bty_p@eUwKnm6QN4lI-@DYm
zTATR3sCy}BT3k0D=?|JQ3lGD5q?={^^QhC2Zd6TfpGLnep0TDTb-Q_+_@1-<fL1%S
zRNK5wd^^QIDR8$RGehlkn7eJ3)qX*px@}ZV?ypxxZ_^u+)!M{I0vT%54yI0%dj`m(
z_CHuMzgnAE+c$tVKR%+0QR`C^EwlN(%`m?|%xi0w&0j>F*4C(s>hsm=MHNTla-h}D
zji};CJi2j1wFAviI~eAu%(B{>s8du%RaCnoEWIIVPE-c~b;z6s(y1^8L{!CZOlcAx
zE^Jo(X39<Z+S>!b6kZ!9cPh~BsQsjv+%`<^rcE2RV+%9du{F%$ndNp2rcU7*RpGtc
zJf1hH?ZBxp7ihJ;OSK(174}&@9_3Tv05jD74(3zAEUUeQI-LqeRd|o%Djn%<2=67J
z_G}?Y;k9fQlD%gq!-y&-_e`M84~?i|a_3XvJzHpo`OjgF$}F4jLbnx_Q5Dr`Yt)M>
zPK9AWtDO>2#i{Tf1&(Tt8EO~69F<vC+rCApP*g@$RId@~vBe=?%uO7L8EVg#Q>P=b
zEmEpIo5FKG*SMx;>%3=Iv{XB`WOA<sx*c=Nc5KPyeoKMF`@xKMtb+O2G0W}f*(y}%
z*fFZwaS3%p%64o?J4ON3j(b6B$IBp3?k274nU7iM0krwrPt)_oEZj<g&EIZ@`8#2r
zT(fNcb?P*^My2_hrX$*f)biI#?U}=C+*bN`YZE^P)&hwB^yQn~VB^ZZ?barK4D3{(
zeVl2Ak6|$TFv~v1QzsurRftQe8x&&V#|ko3h_$uhP9bguTEBQ`ev>^G);%s4KYeox
z&>{9KL)@)R{QSyl?c(BI=;mgaZw2#0n`QI6Qm2JBiV*84DnmRfhd8XgLOh|(7R`?=
zj9dtGh?8L{#1q<V-DGf4_&(4f&N0Kse3(Nt%RXv5gc^ltRE2mPbt%LXir42$3o#ig
z#1Ygf#M?m*u~El*P1rUyVRfKG9Pw!p;t6ebNaoL`z#*P*hWX(zhiI0~PohpC8byc$
z*9@s;h?nOO=L6Fao=Av`fevxQwc^n!#1jc|f1pD=$P6F-VGhwO`xrr;LNuyEoJm~@
z@kBz*P$AY)rx4q$9X=f5SumOqAzlD<h*QcCPb9>TDR77j%`m?N<`B)Y`3*XSGKFXq
zA<m-ck|M+hbBM>R6OT;#Pa?#@K<l4d>OYC_{!4-N&oV>*n=tD)%laFy+d#ij72YA#
zrCacm2yYNj;avt&c=v$(TG(j4dgj?cpv~7VOy*+)&!fPTd!ZTTFNS$?&9eE))M;{!
z*ueEeYPo@L<?!YMQ+Oxm4FuNPz|QgLw1Fq*4FuZ9L1uXaVfN8qEc+M%lMkaR#F^Bk
z4Lmt-AW$LJffQn!4Z=sg4TRAIHW26#`;{S{oHvjHPw+ysyn!%J@Dj0XeuE7|nKw{~
z4TO{-)@;VRzW!a}kxBn4gm@&-AzlYdA)Z2r{{}k5r_Av249p>#Wgm6aDMX_x#N9Ru
z)hWbN2r)y2IFLGpcrnN!&V$i}2yqe6A&w|RJcST{w{eJYh=-YB{%Dv(G|T2kQ>PG(
zBE;TZ<4Ma9yX6p{0Ve%J2=Qg0LtF(*Ar2wLExN@e9pctz_}CWa5Y4iWfz&BPqbkJ7
z)TI!I5MqW3aTaw7@k@|HJb06^K@%dx!+{QQN*Ur1Li`T}4)Fyu%)bJ2h-TUR&(tYI
zqX==zuR`i>&5A#znM3Tgsq_!yePJJ<_0KK!5958|-4s~=eP-yN46}Z-tp7vmq~9oo
zH=sB_x^<V@b5Oel&?9#O?5^TLP0m5}7@$Y)Ml)330&8A8xEViRZI)HPL|t+hKecL9
zjohxAh3fR?ei-j&Gt|f(OPxmUk08H-zXMAnH;m6a=K$S=PMgQ`K2SDcSetlPyAROO
z9biTieg|`OX1NKMQK#sPs_3R|Q7^i95B>zus+Ys=i0DG~hCSlZR^81E)tket+AOO+
zj5?_{s-jyTF)6xph%Q4#*NZwucLb>VT&8Br@K7G2L-YQP1g0>@a)@39bX%T+r9*Tq
zhv?@(PxMkV+Oh)XL)0v{rOQ?!QHQ8e)s{1<ONZ!K4$%zNmNC?+Et5b#MB8nx*%;d<
z4$)3Pnvi~EzIIVD$zwT0FQvdUG0F_{*T6gzX4(9c)M+M+O7q2^SLzv3M;9U9mP2g5
zjr338^O-h4hu8;}W_tpk&kP0H$GK+sxDaL^X4%KR)X9fY72;3Son9{5gf{W{OyggN
z6@}Ohq!51#a)|$crAeO9CO)5;1ayeK%Md5<`ONy;#`AfCyO?3V8_Z|2SvG$-b*V{l
zCL2YFHNS}`EjRJG9O6h|(my$GBG4f|1xuSaId3A+K9-v0O@yspG(EqztPsmSx@;E`
zy@^6qh-U)RCQi<q2vmq;KnigZ$RW1dKAx5)u!%s2IJ69La^6G=Ji(*P@+QJO!Pkgo
z^H0Jw!A5N24!?w0mqT0uO!^-s#FapYxb=?l=oI3kgm@ItAs%Ojk3lepXqJ79p-v$h
zRUyuyE`|6gA!eu$YyK}pE5r>z4)GEgO^6V$1Ukg=Wr&Xw;&KWc;<sj){}JX8&9eEe
zb_%fy(I`TkOHsLroi^v);h>%4kxBnkgg6A~5dRKKAwETj{{=e4nP&KS9p(_tvX7cx
zp++GZRUsZkT?+9jLd;Mh4y8^Zjs`izWiXl$A$|vRh_lKNpCZKH?-F7i;z?$hKMm#(
z&9eF1sZ)qX5n{hx<4Ma9_sk)_4ov!AAjJ284zcZS@#qxd3xv2c&>`+-hL1ilhiI04
z45dyX8dV`aMO_N<1wzbFA<m^vA^r?<h{yL1Z!{r7JQ?T^Yd$MNe1Q;Oqrf4)ZHD;|
zU=GnNn{Tsws8EPT5#kJr$`FU;5c}*Q{WA%1KcGYW6D)-|lMwF*I>gCl_?QZFh-TTx
zT<R2}Q5E9WeL{5#aV8;Vs1W;6rx4EoImFqp6yi+oPTv7K#GYk{GYN5zJwt>;+}jNE
zePIsKEStZWI)!KyA@<xWp0o_{Z#l%Nz@-0eLVO(P5SPMIh;I|(dcTcJI>Zgl@X;0K
z5Y4iWe$**MqbkI|Q<p+~n-DWph%=~Dh#!F*;_iEg4Vn;}xDU`F4lF}_n-CwSz#%?r
zhWY6*hiI0~e@UG}G>Q<%@AFHDFXs@u?koLs2(dfRAsz=yA<iMhtAP&jIx~FS0CR|D
z*~bj(6rxcTV%z;fbqa9~A!eu$ds3$m4+J^HhhQnhIc(x%K!-S{3~>%2ZoGepaEP0l
zVZH~<A)00L$5N*djUvR_zQ2U{O%Cw}VA8*k5N`!K#F?-Z;zC0F0q77{nc<`H0rAuh
z(JcGuNu5G8szUq|bt%M!gqWd198aA>d=lgkH##un(S+E<&43PZMj7HlLcE>=hj_CY
z=EuPtqFFZoDs>9cC_)@@&@UlwT<A@I-l}mw>0e5SEr1SjH&_aBDIuN;bckn~;bR!g
zA(~|$<Ec}KMpcMQsY@X)CBzIBV(r1<P9bgua)@JLDa560;w?ajSXYL)ln__@U0mED
zHaEk3E0{wx%jS2bP9Yjah;<Z|uZewgh{O6T#BT}lLZCyO3`-$?ONj3S9pW4_e9VVA
zM6>Lp_K;Ac5RIx3kE1Sy_$?u3s1Qd`rx0%kImAW->NO!Yadn_W?6SBB@moSXn*xV;
zz8U6+!yKYnHb03vg=iEZ4m>oZmLZ;#L!1vR{{P}HViV$GphMj7uy}L|aTOu%4|IqJ
znc<^9%psa(A0wz!h(=Y2GpS1<t|G(?6=EHA3bD=M;lm-G1)~WO;srp5*l%&_QhYuC
zV+tJNLNm-SfjLC8Y<`0yLYYD|iV$Z}G`0xwjzVubPLDh?9+~vlw(XF<Bj|XbL%bFC
zXqk3x+xUk2SwM&Qh8aHIfjLC8?4$Kjp{7Oo@mzd-XjFwbgu3*twY6>Iv(rBS^{urx
zgY>Pn{|5O>I%^$WuL<$Rk<LJeIHC-(wrzY-<O&LG{wg!fkA~U2SvLO^buw=hAr3tz
zq?RF0D?&>lt^^kUe@&CFZP!d8Haa#c9pX-~6k^x5>!fe*8Va<JbIkB@0n9$kvX2L;
zlMkaR#1_Yez!YNFw((Pd>i`wv4j_ei6v!b?gVBV}sR>U29paS5sY@Fr^S>GxB5c09
z8RoZw*}Pdce;jo(ZxkWc9v@HoV6&Q<{}p;uh&KU~{;k`_50Q)mI>a|&6Uwx=ZW}*L
z(&+aMeKa-0#~LvEFv~u6qE0@HQiuZz0skHx&zn^DMD>4wR{aAksqTsDO;2d3dJ8jD
zZw<3*v#fd`by96qjox2zm%gcM>$dUz-uD9aBJcu8qxU(;qj$g{HDT+v@vZrX06lth
z7pKVLTl1fwz@zuH8RnmZ*}Pdc{|$9AZ&XD#>_n;UiP{T+j%+H7$U^lTpjFQ|L-j(K
zRhwniolXihQf*X4_7u^k$a)glt3XBe1xS&#LrV3f;2AvUsNxWv-<|#2^JKMUS0;IX
zphK+vya;huCU_DBZpXuBv|}2~UqCa<?N~@%dc}`dK%=T1yPXnhQ#*ET+bn%!@PR<J
z<4lllW5$9!!7E@zGfHnU!7G6_-#g65Ec|{*h_Lw+%`iU%X7gs*{B6|9yisYsrfI)Z
z<4HGeR!H4~@SX!E{ReRy^9s=VN0j;x;`Ze?r^Tgw8}@%@=-(CQ+c2}N|8(kF7CO0o
zF{(E3p)=xnliGf$eGF)|Q%be{P`kmI@hEqDV>8tL3g&K`WwrgOQ@4$(QF{_s>E{Ix
zV)HW8>-2}zY1CGNJZguXF7pSqjhn4wfi^!kqKccXmnd*lubN^0b(o_v%jO%=ZAE2N
zMb&Gl)b>MdAE4ECS(2g(wG%0DR1cb=_92)LNwci>BkEfGGFV1cRCf|-`hJ;yM3tdV
zh5t~esOEsGLvqAf;k?|lb+%y7z6PerJ%Y*o5$JaGE8B4dlRNnAxRk>?#f)~G0dsg}
zxgCF_PT?6<;mtco;T?(EML??^QK~%>wFjOXk8*gwGehm6Fz=aJRy&e9?U_*(-j}#a
zdv*ljWvD%CJuLJoyzU_H*(exM#h#4@+WeG=Dkk@P3LMqXW|(hG7af&ZHopyZipr>p
z>a_FfMHOm?0j;(!q6)R|QsAgQG(+t?nD@*qt8H_BsL-AnRZ+c0q-oENB&rOxXUnNm
zRBe$`?b#Hbvs1I;=LPegZFqs&F^I|i70~VI@<lPZgP7dW6ga#a%xK3gFo$QB+wlT*
z3eTttulXOPb}(w&0IjxPsdg}G&!NEKU0{aV;V|!+SynrdI_;TJ6<()5gxa)cg9tA}
z?b#00DZE2K-m`~bY0n1n3h@}w=0`+SF}YnY3=xiMGc(L@33F6t+5Az|DJr8Xs_Tc>
ziz@c)uRyDv5mANORTMa?#uvqv5<_isnD@*qtNjgi+B2gns_%$2?b%?W%20dO34Mxc
zJ5aS}vv|%Eiktqu@}B+nVzuKuCieg!?MOe`ITChzd4qf&6FeE{woEmnEsw(77PH)z
zPpDH{jH<S5_oq;uCipzslA$K}5bD&Hvp}BUS7B*_&trn$1bQaw%9%KikH2;rq5OL`
zYH~`^nO)5=zX#0b&9eD3sgrr5(tJ(R-j~FamLVRQL!1Uo`bQAr6F`T!0+vD?L5Le(
z+R#T=Gkk0cvk$ZEV*qvXVN`{9H+3n*5rmkbLVT7wg*Xr75O*CJHfTaD_Fh1T*yYP2
z#1Vve9|fM^NoJUT7-sWk+58;pWZo!3oN`%6{e2PQ&>Z5HmrMUug!mhvLmUE2Aznp@
ze*xOZt!DVR9cCY9*~cr?$%j!D;`&#F>J;KtgqWd1+?hIscsR%*J_1W2Ud1L(2Rg)l
zWr$Z1;zpxFgw1bahWX85HgA^AA4;9f8%2m+u8b$`Uxau?4)G7br2n?I?b2tkBY+NZ
z8tl&UIK8dyn(4FHkAZ$oTxf=mB{07xnq?pB{W&DIES`k7<3?48L#aC=#Kcz<GHhQ!
z{+RA{)O9G)7U>h0he0Xo9`QkL=d0prX~H_G3EhAWaYPy7ZEZUz^H);f5U(-A{1})+
zG|T3npiUtgMTjG={w2iQa)|E&lm0siaW2pyu60d3I)!*AA?^Wmh<lsiqc6-Mnq?n<
zq)s6kRUyuzE`@j}UlPhtAugp(A-2CZd^p65U^F2@ybS0Nr<5VyNr;OmaEMFHFkc6A
zh-TS*=h2}|AsR)9b16Ea*u;l&h<jZZk4*X}5@KJVL%bB0LYzp5lYkC!su?~WgE>UA
z>|-Hy3el(vvDcVTokE;Qh#4xxqo`Ae7lRz)XE2%&A=UvM;@mRCiG+B_*bw0m4>!a7
z(J+T-md*c}I)!KyA@;sLo^(JF;tM&%X~3j^dcLs%I>Z&Q^qM$5-`L#H&_`D@d~6D{
z53}rJ0Cn<VRE2mqb?G&6dcLs%>NW9MkX{q#f&7}d>y2T9CU9c|bcnS}ix8*h8ygB7
z;v_T7KMb>Zvuu72buw=hA&$T4mk>Y7A+Gfo>3^OOI|ChJe^?6fc|!aX&^|_*;bRoc
zKFqR@N2rqzqbkIvH;3vJ;`4-<p+f9NokH9l<PdL%r4XOz#^!FIL+o9K_&g!DxFtl`
zd>b>&w};uhSvJ2Lbuw=hA=dmgo^<bG6TitJo&iky-yy_vfe!J0SPJnSLVO!&AMcyt
zV-Czd%(9Qxw}wRdFsed4hPo8uJA{~_LL5$=Li{VpA$|j+39*Sk0UhGdGQ@WX@%Ohi
z4Dm!W%nyOtyjeDXGj%d=6e0E-7f*V85n`Jje0cp5FzH`Jh_iqWagD#lqwg(m2p19J
zc0j)-?qr6K-C%xAG|N8DqE4@gMpcN<QFm;JiLX^>sLwz?p-v&Lc6<16h{wTbLWFoC
z&>@a5LtI3NGbyn7H_R~qF3jf5viX*O4`niM6d}%_s0{H}ImG`PACFA>mlI-dphNrv
zEQPq75bp-s$9-n_m<+QIv+QFYb@E|Ug}B`vp*n@QoK4J7Ar7EUA)XC#h%;d{Awql$
z=n!X>AucDx-R=w#HovDC=J$o!yjeE?N9ttWC_?OcS3K#jix79sA^sVd^w-qJ`@}Io
zhxj}!9j7(5@jh`m(8uXFX82eM^NC`XeQY)%B(^9vjQ5E~Rfrc*myXk#+IXLsp^np=
zsZ)sm1UbYGcgNGxgt$9h7w8ac>WUC+YU6$4B@{TsE6gx|HOwKJW%G|yrx1-I#G&_u
z)V+$U_0J)G1x)(e5#kR(>+f0WZ%241{39;q@J=y9|LHJ)>Tj0y-%eff8Q(Brl)@WO
z1iNsey4@bNUjnUmV5zn}Y7f7!q1vO(P&*K2wPsoESn8zKs8ajF15(=owXXuLc1)?Z
z18R4Cu%X&sW~l8Avs$yP_AKh8)~H&HugD;sZtYl%40XD7yf^e|F}4ADF>Zn}Sg{!6
zfHprP1}he$1zCEq+L&R!J<R6KviUyL$-Ge&)g|{Us`j;^_DZ1D)+|ed6>67LV71?v
zp>`##-WevA)o#IT=nONe)DD{@wH;7<A<$}jmTEh2=q#YXY8RQIb_vXC&9d6glS75n
z8dZa}kabCe)t<r1P=nQ+j%cvDf%0HoRGb0NG0*3ew^ALr+#5h$?i-LU*9m7_uBPb>
zEbL#*$0_+p+;57y+r4(5^wHnJKzDaw+1>87ebYyO|E9p*eaejPJ_GaQn&s|(MV%(s
zsOs*14~5#)-R`v=(kGf3svSe9Q#(e1a(4$59sL}Z*0y_Xzhr(n(B{X4`9qTVfe(iW
zn;&e3`IBKbZ<fvfl{%R>sup3Fsr8~dF{#}XXtlFK?U1B)5(QTKuo-Hn!K~ISt6fN)
z)EZS$?efohQEi-}%1}`qL!F|!5ag)dh7r{n$^1t^o3B}(=Jf1jexFAgM%C90^Zj5x
z1<bPfOQ_Q+U{pobd0M@wE=y{=0j;)YsJ$|&9Zi9wy50=6e}OqFv#j<7>J*hx71er=
zhT3$<bg$heMb#assQQ8w)fpg1^%9Jzu1V%+0d0O@m>-kO@AO#1sCG5O{2nkzWtPnk
zqfSv7mF8=j*8VG=v^)|o%}3&`z@)z?N8%ko>mO6<@5zza^6`fH+nS-j1I*!>W&L|n
zr|^ub@NT6p9f>_T5;IhIk5i}c-Ud0mo&Oy+Fds+a9zdI)5$5AaoI-&o_n&5%e+*{x
zX4(8_)XBV2wP)8(uctPS#2bNDTeBiX6-VOF6j<$QPbfarHiKEMSysCPby90oMRhH8
zL={J3hKlMz>J-(>AV;<7ll9EUk+>Dm=6i<uI1+E6z~;x9VSYT!duEo+&!$d$W>iIW
z`cw7P#*ugq&}s*U+BgzFqQGiDHbd>FFh^yU)vi4wR46K=DymbdOGjc)j>HRrifSB4
zQ9TKARO>ul&wLz-U4S+}Cd|i?cohXUKiUlQ*TWo@SvLO+b&ATUG+)zn;D18utX4HO
zf6YhYhro0y>|PtcPksT=`e&5-cdw1#BY)sCaVhKnof-NMg*iO4tbY`B3eTt-wI$D~
z+kH^G0%)~$rP@BIJ^A@~ls~38-3+y7!;%Vq*`rxjdoOkRn8K)1+x{h~-3ztr0Ijym
zSB2WWP&<+W_jZ&SYOjX5w`N)Gbn4Vwqe|^>UXj`Zc$>d7&}#dYY7gLT{$vWQcB&a_
zABB06&9d4h)M=89lG^g^(|b%zI&pXB?Nf$Y<CWBDjn{rQjCzgFfw2bh8|(fEwD}RG
z`Q2;dFA97@L5hHHJ6i%v!-4s4UB_UVW%HXd$+d++KKwJPqI&rSMb)P^)XoN4?Tk`w
zAJq1GF&^cpdYhqkPnZYGEUP_-It`XlrFP=WQo9#wrvR<CF4iD=yWxKus_klq+D&0r
zYnIg>MV-_dRcgQdpVS_}hZ0`{t+vb8X$|5dl;d7)sCJMUYEOcB4a~CITd30-7$vpk
z8nhzxVa0zJLC43y>jAX}y+B%nV?lWh%FDGSm9$lRak&h2xvi<w<@$nrxjksx*`>Mz
zxZJ@&eMo&KNIxfc1BlBNe==%jba;62+5Ln0)$3tkTHE6|VgChmSpCYdj^l*wHmhM+
zo0$<-517L;%V8Z$ox(Dz!kR)|YR7T3<0+upF$bjXHkuuhb9V<69X%R``8Wjz0d0On
zn2)`hO@U8=H_b5rF3fvnmd&?#EtF}mj7sx0t36V<TkWahe>I&#`B?>ix`?sm=6zPH
z`4L!ZYI06(qn2A0u;%q&QYv86Vgw+Qo7L2Olt<tZVCuqUweh3M(}C{7jIs-t)y9t~
zZ}Uc6+_wbVo6&`xVaXjQw^{DOS=8y4z^EF5nbei`3o-Ep)Q^B#u%AF{mfeeQ@7wUr
zkn9l{0mFR!sN^W1&DVwb_)*CpDX{rQZ>b@eZw9k@vuwT>buw?1%nvAxw0t|BBdNW*
zHhwigd!W_!{HEy6)wS^}0WP4x13TOdwIg62ShKA5LFzQHMpaZlQkRa_t83$@T3fyo
z78F$vkfJ&e<ftaXFdx4XU<%OY2Zs6hl>lqK8_#F+>zZMH1DMU5W%K(}C-X+7`I@G4
zDJu8uo4jWyyr=NSvS+6Ot$$3Ze=K`8j{=X{r)KE?9Oh9o%lbEZKU8ScjH>YdL|xjm
zvFzDspu&3)r0`~f9A3{4>Y0x{+W~0vGs1lA*#i{V{6l7#p9Zsevuu6|buw>M?b*LS
zl-j?rXa50OZOykuRDWU5Hv1?Z<x%TlhT5JmkD6Ildkl3NHKQu32dGPX_80c--#|t6
z0Z38(402RQ%&BKS_G}=~=6i<u*t6#;u=$tGF#jsd=FPJCAE=Xgqu8@Cb3^K%i(8tu
zTk?%T-R4REc*5HPi2n3bKqtWNDql**6XF=4ecWh<k6U2&VU~ToM4fyXRUxkbai~rq
zjwi$|fC{lMNFkm9a)>X%(qTEiHbR^Qbch4X5XaX>h&#;>5#G1m%rL(v%=>1R&7Vh|
z_RT0l?6M%9bb4{M&2orSfJy%p4$DV@)<34yKZV0`qfg>e4sR1P^luLHeW_X2e<XD+
ziV5NEq)`fQK=E&{ER5$(Y9B)F>p-iWQL23iwR<dTsP?yJsNE0dQ8UYGFQ!hTW>k&Z
z!?;SLHie`8X`n`JE@-X7A-7OXJ`KqpwKHKv6-WDdK%1|NsN!h<fC5J~*9`LuV2;Wx
zn{P+A6_rsH)u_+vMHOmC1Fg2_cPXk+`yB;V`=c3ZYZk}V9hF&DyA5@U%BYI!bs|ks
zJw#L=0~J*xv?!`gK#r>E=Q2Nyqp=mx<_DJMr*R~nO@YmyXNLI;VK#4;&HsZsnK!D;
ze@|U9KaC@C^(A3J<~IlFNZcP}^Y_5goKE9Nd=O~!W5RqKi5<R(=d<~>%`m?n%;wFq
z`F*I9d85*NasNnBc_i+ZkHn#0D!eB+WX=a#|BO=q6Z!s;0_*?84E>+Myk}-v|GG;<
zh4##-+OunxN$r!Uy#Z*ob*0)TQM=mmhTU#vhT0Y|ciSwh-IY3Z+o&3~vvHM1?Fshm
z5}-zHJV<-?EXbp_b)C#VQ5%QM_CTBO@_mXb4w-QjII8hxn4bW1RA$-yZ0Z!1Q5DtR
zE2Q>G)E)@5+JU9oCu>9PV-#5JbTib>fH^9&tacf7ipr>p>INcBQ9VgicLEjF(;!9l
z3CK}B_m#{)!;$z3(B{XK=AYq6{Pouj=X85B%<lxVd9!T(B<f_|s51XA>XP|qI1*n3
z%KT>_9f>W!3CT8pA}k$=&u}E34z&3hVLpz;*C}xFZ<}HMeVEOgW%H|l8_Hzfs5D>G
zbUa1xH>;^REFX#6eHV{R`d{Xd=>@d@x>Enk95PcVu>OCVq5m<M_slHo|AIR0nNbzq
zA>W7E^p^T%CU+1};av(+coRSlukFfu=HrlA3uyCQRwnbYwpUSL^P|l$e?83R&9eCy
zsFQi4WPU&q;N3q+?M(LU0ie|mEY;3r&)WYOkFwge%uu^7%%f(O)$UE5M$M>->aWzL
z51MDPXZHfvF6{6_#4myr)nbsN+Uuu!=HuO1U!cv83G=aM4^v?C)66jcILzkFviXJ7
z$-GhQ*@&M*>YYW^nR(ATuaf@x#W&KFe{Q=Q(E4YT`sdfi?~}cS0_z`VhW_y|kD6K5
z|0Z=BHKS_Oc3ExjLS9WxIu#b6c2A(y)|F}(pmq`kR{O9SYNx^6ZL_R)A$9JyP^EUq
zrc(O}YIg%#ZI>SkwV$ANA_Z3apc!f(f?2IuR{Jq^QfpK##uhc9HZ8_{&aj<;T8tw>
zT8!Z!FUH%j6xICN_|cb-fHvQ+G(W#Keu2%wjY5RY4=}_05ipxK%jU18PUelOs6J^N
z&&yzi+9g1%9aE}ZfZ9WwG*o+}8ETJ(c@506+Uuy(8W>e-=QfktPw4GJpw-SO)qawD
z+q|LL0cN?ku;#_0<-fglgjiO46->P~ss?LE+L-Qa7BE<Q1NGu~GDw4U6(|o@`E}TB
z@tH37376XgsLKrk>2jBYxLom7v(=*|<&iilABq11rde3V+P(pFck9aTE@N%?S)*Zh
z`<l_+elX91S?=!T)M*xss#*A=Wjt?EyBxJ&0j;*nkA>Rhs6Dn-L$$v*L+y#M=9&ew
ztoCN=Gz&(R+C^=ob_Hse0<E@Rsdfcwk7(Oa?J;JkJsxJYW?Aj^)Jd&THM#GTK{^tb
zF=~r}Iucv72z?s09-utA1BwQYf-zWe$czTs{D{*0GS;>cS^7w9YKHkWU^Z`-&F?~;
z%o|lv-PAfvF<7B?9MEcK#9+k%-i$1*wuKpL+rYdAW?AiS)M*WjDz#&3rFI2J;>|#-
zt&715wT;@vqpY^6S?(>YUT?*++MQtPtx+{ti&&R*hAn5Xz6I(G>qNUXSUZ98V3kMW
zeC9b_ZUvWH4%Fp3&<|a1J5Y5b&c(v-+t<|mJs*h&w^w&tw2PkuJsjxncKNB8+!pQP
z=RcpKz}<bujPAYw^DLO<?tV?3X2GcJZtHT-`jsMDqq5(cA>Ar#eoh6UavB9z`M4P>
zpM-e^%(BYQsnZM?RTKN>+99lKp}0xgc5&yOq1E<Vl?vj{d*4p+D62id47CTt+*7lx
z_6q9MQ=>}l59>;8`*!iYuo+tI_)=~AcJaNiL)L4k_6#%Bo(;2Fv#fRkby90osa<aa
zsqKi`46U}VRNE1?SMezKcC;C4uZLN!SyuZjby90osoi-asqKW?46U|zO{1D7oltup
zkFwfHW~hA_X0>Kn?E>ng)+nhh51YL@gxX`uxoy!dzO61py(kW*PKV7UpnP_f%Q_d<
zJuVkNmbnmU^J7Z$E!xHR6!z~JBK+md!_6>%3@j}p-=k!f&0j;EzMN@PMKx+IMb)}p
zsLjx7XO(JOqxK6P<*1gKq4sN-qcY2CH|FS2R7REBJJylfw(a62JVUGP(x_0|wq3j^
zwq(_<wyhazJHV{gEUVp<I;l0P)IPnw)V6OI-?NvY)ebGywr>~Tv$sX(c$C#{ZHC%y
zVODFF)efdkYK<zj^EQ;)j;PJhYG;&cJEC^~E)CTlWQN-QFsn7oYA>TsYK<zjjk-#0
zC)8$WwY94iYCEC!R37Eto@s{KVKA#T%W5Z3C$&aNZF$6e%!^XG?{3Qxvka&srY-ZW
zF9d80%12Cjxz!k~bh-9it{qU9+Xkc)t3Sw>+nD6j<vMb?ZGgJm!60337|55~W8;V>
zU9J<CI~b_Todwe6ZUAw);%_c=i)-CdeAmol`Q~jguzPvJZpDqpG9cNf@0#hpNj!Q&
z6vhq5p+L{Tk!H-mu`thoS)PIGsM8D>RWtA<b?KgVE86lCQ1`U!|0+al26}^Xm&=!#
z2{4)vN8$ZIFTlWZ0k+~OY_(}z+~#Y|Fy9eo^Jdxn?$pV=QE9%WX&pu979qZwLma$W
zC`tOaBg9jI4)Jc-J!Od7wTlpE1MTBYGkm-Yvk$ZEqvhtIMm~(H5C>D2W_vqA90t^E
zj|C~jM?em-efN4zh!EESI>a$$h}#k3ND6Fzlo{r)hS|JXHva^5GH(<i4&5T8Uf!&x
zW_b?r6JQE)Pj2*<0Ih#Useeyy^bYG0m$Lq&%+P-v%=*o;{;||azflUWT$#38tK0jb
zb}gXQ)|G1aL+x-1tagMMYA=H|FP<rX%4n9=K17|~P8n6BHXm1M)b?c5mIF0v?Y9h9
zI!1p3@~Dl45mnrb+yb=uE{#)EaWm3ttA<h4nqj^p%u$(T^ZQb#sEn$pCikotRj8c?
zwAz8C+WmNW-)NhLYBw=M?dCA6HOp#`qE2d!s;FuZmX7xQh^j46M|)3@qWT@Edc~{x
zb$A$F>{;`z_&(8dfhoL0nB3t&w_{A%jzgH-Pbu)^E;gebU&5Mea?Nr(HrzI(X>yIK
z@J4JGbxG}^s2v5g+8L$VLs7eo0;~Pn47K0Gtkx{6?Y?~jwMJEVLvfYDJB0B51XOtA
zK??6bAn#e{-^lzS+?aL)+I(F^6?=9w1vY=18Rq{Evw5>@{&ngUl~EPdW;;mjp{U&&
zXth0?q^Lse?G#w;oo1-L2WGWqS?zn&Nv%;8)gOs8MRh1qT?JHB_k$GG|3KBAjo2}q
zA6DFyte^Mn3t*bu$xQB7K-!V+EVuf<c=UbcP03^?_(-7N!yjixTL#0DUEaf+<+j{R
zo!-M6Rc%>9UHSlHGHuCFuM{nJ3ejrIW*|@S<uIBMXYVyY&&0rTCMI+CeocYRuQbE_
zDwxfiW%JwY9AahOs5D=E^C3m;i^H;K4)KIu@yMiq2A`Uq2DJV$rT!Uww)-{(p6U0^
z&_4&}*ABC+zuhjOqD6VA#;=Dnsy49gZc_U!YS#i<?Tk|Gv#1?Gfz@7ShT1D(&2_Y!
zWwrmJP8(=cshx}~dK(`XO#@nOUGz3S-0HS#c(mHh%uw3{X0>Kn?J?9ztx=`+@!s`%
z8)}~hT5Zp!skfnav)$uSp5!gfQ2T3`C)q5k9Y~!f*(j+k-_^H9SUTOFWsTPd>U7%$
zq%}ScRGn@$dxVGgiXW})n-}AJU<z+0qjoWncBIp7Dr{V#s>w`7ZVu2P&Nrhi3t?Um
zv)q=BeL|uZ#Hb4K@6@3>Lc9m)5C@ha&g2uy=6lA~t+u5ZYTLrB)-0>-MV-_dm1>LQ
zh@$<A<7i0k`hdMcNzy;B-7e`Hl8y#i|Cmz$ymq^%-=p*b1s=jz%+Nm@<{>o8`kVYV
zRA>l|sv#Ui-M(e?^V)SxzeFm-crEF8Y4L9}3bCtgk^Zx;{7?P_#bO?g*}an4Wk8Qu
z_q{{U^mv|qlj-AuHhrQQrcZ_0v{^QN2X!)SRE^m2`_zl*=%o5&pjF=qyFXM9OsZ!B
zt@;f!RKE+eYO}1m)xHg)GOD6Fo;sr1G)0x6qUzGDimG4vpG5U34=1X@$@CXMoBq{)
zp=V;4J~^50547pS%`klo%%;t<>FcOdR7SC1J@*eK<$hh7_v>z8^6@eI^#IWOHKH)z
z<YV@0?Y?m->tEjt{Tsoo-z@7tggWUrszRGeT?*}E_A5h$HszPl<|?$E4hY$pjs4mk
z=+G{NrTzN2UF_FGK%1UshUtI9Y}zcFUPhfv8&&)D(1G<-$9_EqwCd%sv|k^$i~ZX0
zpoUR(Geh;}FsnAps*k2ls*S3s9-@w@V!tv}RJF~k{pwx*C;PR1zj~%)zjguI^m(v}
z<$itKF81p#prg9i4AT$7Y}zcFo<p5X8^wN&I5;GqQS8^aykB1f)5y(dzkURIzlIj(
zo6Kjw2K}yKXeXPY|8$u3n`QmusFQx9Dzu-eOYeo|vtJoTXz85n+&}c`CTb^;-wTa{
zVLtZlE}%pF0G9S`KKs_;kcLg@WJVJ@!`uY3+=TwrsR>5azO6MNo)^`zZ=HcweHd&~
zIePQiw^2Zk-Zf^Z9t*Q-v#k1A>ZIDJif;WwLp9OGzGbNBcBW3z9S(AIkHBa`?B8^t
zo3IRaM>HY!Z-c`cM%UGhCTs?C6U=fG4y8^_FpB-Fqo~}!hx7iOaJa%;!2X>E^!|-6
z_iq9F_c{d*@f|bte+aXFv#h_>5url*jj9mOpe}{Dfc?u*Azn+JLc9;;5L+Et&wT7(
z2cSdj3rqXAfc+Z@bQ7*LqY2l-+yt}SglDKz6O5|;yW*&Ns$>7I1zPnBu(W>**uQUq
zR{f(Hsv8|0&tlbPS@mzIlWL<Xx@)K-y4b%A72W;RDY_Ryj&74<>NO$uuLsagI3AYv
zZvn4=qk(S14Q4dq7MPo0mYeWAb!vi9?BBp+Lvp!)FX#Px6PV7CPuRZ?fZo4Z<^Fxb
z{`EO7E@l1un4$jwnDv`w{llq~exoYHkElx_e!~7`s1SdmP9d&4FeE#~;V{g{{*44W
z#ObiKf1j{_D}ZjocV;x<CzzXHmYcBc@u5&nFsk<N8|qLU`}Y&js(1K(Trln5C+y!z
zK&w8@4Ap1BtlBKAo<N;c8&%Q$NFC9|{$;4>)*BR}72S>?M|Ts9CdB@Y1G)*bVQK$9
zVgG&xx(SU3#}g+;6IO@031+zo+f%0|7{&h0qNv=zd3pc#Iw77l`S_Im>kIV$)vR9Z
z->2-~R0^#BF*Ec(0keLytbYl0(r;9Sc;JblHih^p`<J0YJe@j)cooPYE`eb__HPBy
zA@(>ao;U5^r|jP#pqp^A8BI7H<|dfsCfr4xnqXAz--##Ji!Ao<bf8uL1D5vhQ}*v|
zpjCfhhU$4Rt2WE3JDw71q}r&8?lkI%F7_`&MRzrIitb*JqiZ^(UK3*fS^?dJ-C=3}
zK4t&T0J;go%xJ;|FgL+0H{l-Y)C8m0zuu>Y<Z}O3=KXsFm_~3R`!^lv{p(ro-$M4U
z>uGT*>)*@_{aeDU-z@7tnmXw>szQ8{x)kC<_Af()_#t%)aV5wh9(}q(T-Yx5ZxGNS
zj)SHBTgd*s4s;XVF{23|!rTP2+=LEigc>!$sM^1`s3Wr2zYl>{z4n=L!L)x1*}uJk
zR^8VO)d$0@+AOQSj5?_{s-pXVI--mH%TUq%K%JuNG&FoTx^rPPA@*-L&`o#<miBKU
z`}YyhO_*;+6BfbT1hd?Pwa#h~ol)%H6pG6IYri$$`?1N{@yO(35&PEz==~d5?%yKz
z?*<C2{}wa!{|#pSW?BC$)G0)xD#WdZh1wM2BK9vsg?IpU3ULU?A-)2`eC*%rK!@1s
zoOs@}e~Z|^K0r5NA2XV80L)D=%S{+botj`&?cZ<DtruDB-vL0Y9tBJLw}}0F6lm4c
z%~1U`%&N_@>aVDiYNIN;zUS48F7_`&MRzK7itb8~qx%F#bg_R+fNsJ@=g0G={aeKT
z9SC$2`kT>&!(ncMS#H7=)Ts$Zv434I2qoqI^~n2o6EOMsjQtx2^!|-0_wO_Iukjz^
zQr5q^8Twnptluo_??s*T8&x6RPF>o+&)B~V72*@rDa7|c4zbsTVFUBAe|>-saX2jP
z-)HRKRG^#im>ErY0_G-|<tD74PE9bX_HWuB>#2_Ydje?H-@?-Vea8N6I=o?YJ<L$O
z4a};|vg*OqNwrZG-E``RF7_`&MfW~+itam*qZ@Ehy(Yx|9Sw97#=_G6ea8Mh33PPN
zn9+n6VQzw1Zo)UzsR>50e`78V$>sj-oA<BjpX6gP`_~HS{hLwl-(vP}7zNgUff@QQ
zf?2;=*8c!?(r;9S*k(kiO(8C3|1wmFTT-VG_X9b^2ViOc7IXhO73dI`z|#ILX8*cg
z5@Oth&CF=RmM}NLEH_~gb!vi9wST|5v|ePfe_H~r`XpG|zs2m|%|NRjXNKxKU{-CG
zRlh-<R2x;%^%z+%y4b%A72W>SDY}zEj_x@a(Z&9~0(27^T^7%q_HQx!w-wM$*w&0D
z><Duc%yJV>q)tsRiv6p(Jd~^|p7rFsf9C>|kI&h^;Xv<SUAcdsvwsUIu>K`x=wAl2
zezUB<+Z7>A`i-g(M^cx52>J7N9n*KHbXlYLvTXPCb-%Ivd-?r7`75;xU^t8yxzB-4
zs@td#k(TUpmh32?oeng^=?O49HOo$KqfSnZswF$>$`FI<ShC*(t$G~n{_^WvpR;5y
z0j>H~GgQA0vud-fy4jx_<YQFja};&tvuVnw-!J)`!oNG8$6+{)O?nz=rz>F-!)bga
zZ_}$nnVt49!|66KJ2lHr2T`YdjM${B<4Ma+x}@49VDhmfZxYa(G@|H7lO=hRD6sw|
zW_gof*1t?F>+g0=gQSeAq()Mgl3KzhP5C9MvHZJ}S^&c#n*?-H-L4H0X_J=ZO#<5K
zK(o9_Fgra#EIYjorlgF1*`(1S2Gwj5(5lD5(k3m*n*_A#SIzP!!L0gqv8=k;bq(?{
zs`5FCI`WB4n)^#Wr||F2=W!TL*(9Kyu7pi2H)#p4Nt=!dWp>)bEN>FbPPY-uP6xr1
zj}e<RHlDQnp0C@gO#&t#U*t^!dXs8f6r1!#-XsdFe~DS%B$)Lt6U+L$UEd%nqbjM9
z)a_d~^9$|?GmIOHbPV<`#PU6rL(Bi<8;M%n5Kn{ISgLkFhqfOqE!7t+)g?fizQPRC
zSHo=DESr9sI)!FbE!8DA)>9ozbtTZM{{u@)^#x0{6lm37nW6f7m{pr))je)%5S39C
z)g{ysRonw+sHn#O64fk4)$A|zOviq;2HNy*VH0B{;+1L`(55di!}LWkn>Nd)CsU`W
zjAFm~-5iq3{dy$t*K@!$a$mAvuK>MYH7$$%`jY+H;g-0R_4hJE|L!pBH_Q6Zp-%dZ
zs?c7iE`|0b`<0<W>sg3pzXq27$$lOASB3T^@3D>tI<#A1X}`W?zh(n%`Ykg|zYnu%
zvuwKEt)WJyjjH{cO&t-%e!T;<>h`zA1=D_g$$sqywCX+0P`xkAs?D<M;nYdBQ5Dr}
z>WC`#D?>#!=9j2uD5^up)r%_j>lmO-{{=R&+^;Wr{dxvy(=VD~`hPH+Hp`}0Q76+z
zv0vl=7Lv=)G-l=fTJv`KSjv8_5A=T373P~PWxqyJVEtE`q5oQ#^_yk=)2Wkwqbjrw
z{vK-6XBtb{uM8u!^xkS0>h#9=NRZzcPlu(+UCO>a19XT#!qUDiW#6_PA7b2u9nEOM
zE-*L2EH_~ob!vi9wQt+sQ7^LCw_Si%eLgIGrm>WL`v=gfA237pLolm0%c>VpC)Gw(
zbiMAZ7hUXIhKlYe>J;4tAV>EmjOb$jJ^;E2?e2=_O`mBjW&d^sx(R*EXu>`)H^D47
z;XLZp1f$r$+6keg+`q+n|1Jk6A9d{CH9+rQmsZ98)v<poD6szT%+UW6%=*o;{vLOS
zH0d|0LL5z9+P^yXFGGd+Aax4yWspPcaZfxg=Hq^HTcATc4VLz=j{Um}=qB82MiU-{
zxd~>u3G=B_6O5|;yXPPERLA~32(;>ju(W@5?BCk=#-puzeKS;V1hZ<htojh@q}r&8
z?g8qEF7_`&MfYFo6x||_quXm@y(Yx|^#!^Km%!5g)v<q*fNsK6Gn()i%uO)MP56{L
zHNhzMZ^V5ex!k{|J^7mU*T6J_%h<mkf!@D<<^C;W{|4RPFvOG1(0@A2`pvTbanwn_
zQ5E9P)TI!Yv40sV#Lf?dK83gw$RUn{VLtZnE}%pF0G9S|8T;4a!G=xfWJVJ@!`uY3
z+=TwrsR>5a{;f4Bo)^`zf1QC=eHbk5-!k@Z6wsr0jTx%P!mQdXtA3U`sWz&jTYqw>
zCc4<a3>DqZ)G4~dL5}Vb7)^-%n+|jnmci2gEo1*Sn9?x1u4Xi0GnkuTmYZ-Wb!vi9
z>|Y&4<^FA$_wR&<6y|dF?=+zIZ$!C&%h|uzDR7AIn4$kenDv`w{jDAj71D21g?I*a
zDa7UMUxo_tTIv+yeISR}YHB_6v40(a4zVvR?cZ|tZzRx7xYCR!Tnlp(%yJW+p-xRO
zs`l@Sf7VkS`*$tSs$YPm{aeodeG9beAI(tR=#h99t2WE3e?y&A8&%O=Lmkn@{$;4>
z?x#-Cy$Eu2n@p?MgxJ3xKsVudSlYkk?B8ghn{a~}O}GW-CYa?WJWrjPU=;f|@X?T5
z?%z&%|K0?qb7Tek_W{uRH>KRa73^Q1$Kq1fzmFOE4}e*}S=K+CI_Wp6Li~uj6ygf@
zFGGd+6Lku4-G7B-hd3OD`PjdaK!-RTmiBK2`?mt<CVXc`6MllZ31+zo+ddu&)dZtz
z|GuFP)v<p+0j+w6f5!#W{;gpDP6Ar>X=bQC3ue`3S@i_!q}r&8?nml~F7_`&MYrDc
z5UuET1Ub5!U^F52ZyeA~m<>z&w}Soq8R#Z7ej=VYF`BSC%uO)MP1v3~HNhzMZx%)6
z{`JrMx7U;LtjWh$>|bA?_it{we_yeGQz@|i$IQ_G1kC!)vi>F1NxxAQ;(<?v+7#kf
z>|cfo@pS4G;#DArxCDmz*uND(huC9AJa5{+uh_pqKsVuJGn#NZ%uO)MO}L9XHNmLb
zzZ0LX7g_Az=|HRg2Q2O1SM1;0K&$@14At{sR&AD5cl=MNk!qtVy3?p5y4b%A72Va;
zDY|<>j;`r5^_md-*9zz+><&x&_Z9nh2GC6yW=0b(fVl}~xe51BrzRN1{`Gz~B$r<a
zKO^toBfvC*U$cMHf!@E`*2VsP&HiorTwKcfdzhhr8<_Q*W&MMxlYXNr#Am2WUupfC
zuY~t5l<_?!v-x*_rM27hO6qGq<lF-2qz1#%l6}pR-3GMN@n$%^8)m0w+3EY#$*EDb
zWVgLgPjxKWoj|Mp0Cs=*`TEx^*%~i4%%_bRsyo1}+AOR7Ep<|DRONFUb>tIY2_O1P
zJ`?$O=d%*V1jZ&cdMOOp=}xeT;WWNdcM{O~oMwj8vtV{=mYv>3otzr6NiT=G@|EoJ
zYLkG;$2WPCfZn9><tBZ@CUyO9!=yGdL;sdA>o?2#kETxgjjE)cq%IwY-|$M7p^n23
zsnc<|66E9X=vTr9=Hr!Y5YQoxgQX?=h9!F)=n&sAqX{3v+yt}Sgbx1;HEM!UwPbHm
zhw50e4}n&__N#Hh^h)*(OSU)As{5Lu`e2w<n`PCPQ76?#RdgRvM|AN@mZ75ifjULk
zX=eCvbmzioLhRpgpquazEWMI_!z<ZGKsRB&8BJIOa}&&R6V{s5AUdPizbO=zuVmx$
z{%tZl9+`Z6%l`EMdjDpX`}ZyTcQXanKh6yOcfhRQEbD)RI)!Ldg}Ci&p*EeA-||XU
z)24bQ+ns;+NqHj-hp}Y00iDzvu(V{~vSck^k7u;gS~Hxk3A0nP>~w$X<kYBIvX*bu
z%O#eqJ<zKA!tO86_itIUKLD-zVlz~agjuy&Ry~b6sWz(eY58WoeBza?=P&uJ!@oP9
zAu#fZO*#u`r}x1ohSPW@dmCt{ADH2E9?VY7veOQ4g#$S?Vv{H;H)(3MNpCB$@A4)A
zy-5R$el+<mZxRL8Kh7*~63qJV5X<`CfJwhmmDINHD5>w*q%prFwLAarq;7<zq`u>x
z@HU{6dIOd=>ASp1@5VFQX{}k_B$%D9DVCk?50g`)UpDEzdbzMkK&$QxyT6>T@A4)A
zt@>iKyh$*t9x0YpPlHLdQI${2_v_^on>6E>eAeOLozD;$`LIbqJG~D!F`TkVKs)`w
zEN>FbPUnebryV{B2Xbn}CQ(%06TVVy(uYdy`@Bg&Z&F>kN#C<cH&9^xx0s>-Z!qgO
z%lcoTPWp|iq_+Af)TSGY@3|+;P&XC_P^TM<At2vayaG!he$SG<4s?jE=EU=+CHtNw
z>jQKX_A#Rg2f*9}v)qJ{)Ts$Z)sp>oZoSB2$qoQo^(a`nC;XlzdlYEZ)6G!*G|Z~a
zvg)s>lWL<Xy1w)3MHlyk87jI{sZ(@Uf*jo^Frth7TLN?wHu^Z8H{BC{&pqLRKsTYk
z8BI7G<|dfsCR{<CnqU<B*JXYvDfe$d-oKlG$;V3eZyeD3*QIT-e=FI)#tY(7*1x(L
z`dh=S-z@9zMV<5;RUzI^UE05u>|cfo@d@e_;(H*6*z1$9f%&*y>;rU&!(nOvR<eIn
zfo{TMW;Edmn44gho3Mg9HNmLbziFS=Qyu&F1kkF#g{A#l$^LD+uwisP%uu}z%&N_@
z>cP}WwNVw_bn1vM_Af(4_da!s?mLj98?dNe6Jr042D%AjVQK$XvVTtk9o;i#G~q><
zn_!lk@C|ipf>G?>n9oA;HSKF^e#-l|_G0-c{;pd3&9)l=y?_16{cF-b{%q#;6!<qr
z|HTabx5COhhx~V5%(DKO)U_z<;upXhRioGC3#o02+D(C0JEm0I6t%ZeV6}fYL+xEK
zt2N7N-=R)wjViU<)=6zM)b0ed+8L$VW~iM+fz>{2hT3T`t2N7NKc`M=jViTgt&rN*
zsJ#GawRNT1)~Nl60;~Pl47H!anrjlwvf2(`g^D!sF$qSM+C9FJ+BT@&7ihIzY74b(
zP&=6dtDS0w+DBnlYnIh6piXLylG=+3$J=}!YL6}cyI7+5vzZy{*VP|PoerNfKt6nC
z!_w<UllJjz-roV*e819sllJku`ui*i5y@1~MomsB#Ow!42M6W{xQ;)$VV2EbL0xSb
zX+&jIMK$xwc;2M8DQe#WTJ4BZZBx|lzO<p*ePCAG-*u=p%W5y9PHK%Rwcjt3+GeP&
zSsu`8r<7`&q4rb?nwPzxw`aqw_9EAz)-0=?M4i+cRccrJT54OPb`7A_&MnopM(r>P
zyawl+F$sT!`G_&gY9F9ZM~qRW_O)-Nwhe0E1zK%wyFzW7Jh$J)qpWssGt}-6vs$yP
zb~trXYgDP7zfx-3w(p!?8W#htws)zvZTpSVFMIFzLqoNPn4$J?nAMtPwU<*TwMI#8
zdBpT&q0;?LQ;wJnb$@dZbvj~B1LY&8yxcyl!CA$B7gHW5)&QVRtn)y++$|tq?gWnY
zbO5&IavAD!ms6+9-3jvLwr6IN**07*Lzx{woi2AaC|_<sVX)Wt^^#sQCEW+;q=!b*
z>!hS7QQ*0J*o>s7!JM>NPI>`#O4_JOx;G<Op5MjTHA_EF@>`(Hp9qrqD?ymAY1-w-
zaQ@F$H8pE)!*?A28Q8sif^~Rp9RqYb#+U6_hu7AhDR4Vh`$@|{JDS1V4zt{j9jH?~
zjH)$#a+Shcw|)F^^5=k7JGWH3Zu|J-<iDxer*K#JEB+@R*gKk`wim2<S;v8GmemfW
zPIm%EmD<l%i|0+f-H6`S0j;*SeW7+EdV6T&hH8&AL+!CJt2N7Nucl6FjViV8G?m(}
z?c>kX&jDI(?^12o_VEWt`!{Q-_AoQl9tB%ny*0~f$55w9Fsc?~8WWQanRPf7GSngS
z7Ij*T6(FAqzx!E(wGOXVM*wYpXlZ^OUakH|fd^~08Rp-F*}Pdc--1b&d7~<-%Nr@G
zb=&io$beQmzErzz`}pg0KTu$`tISZ_m<jb@nPs&*P^ZB%s??s-L~1vpw|@j$?W|aX
zxP$$K0;~PZ47Fdt+*`A(b^|6^y)~-TUf*15yS9%%J^NRn)z)-KYY=}*z47YtD64I5
zhT4`et2N7Ndr>E~M%5a;&QX+Jy4Ph5J_c$H8j-Wsperb^L3t<e9%n_`s*Si@hPvEJ
z>U6oa$-$TVlx0qTs<kVZ`x>ZMuMV_Ve-(HKQ1$9HmyTT6xTa>S{OWb~8VYLwuLpks
zI;@^$SOa)H_?QBR^{E+QErxj!%yL-kwFnhj1fwdf3#dE3=yV|_?z1vfcmGPAx;qWD
zZt>9k^Vw^*tY<z>f%SoyPk**-0BmABZ=42~0{yYnC^MRH4J?`CMbRub;YsTBv6NBS
zgqo&PT8HF+6(0-lnM2&XjeJZX#GXKhIIs+H0wIp0!20hnL;pQ6>o?2#-=$9ajjHpZ
ze><tY8?{FQt#(YQ_HNWZPl46`*9^6@U{-6E)i!P)Dx}t^8o6!TMqT>bc@r49J%Ady
z<3ZZM%Rzae2NVG=fpsq@X#y`iD}XjXBch5?JH9qVc;8Mi!~7{Q@0(dRe=BvVN%3KX
zQ5Dsi9qL8Jf&i_yrelgK)Lub>)m~+W+R-qpHOp$BqE2d!s;G`3(sWqfO;o1=by!{n
zQdE;b)nPfbV>rLNI4lp(d$t&u!kfh8E(6kzbY65{Gamg&nerqi_*kIZ@_RGdaw2T?
z;=%bQ+AO!_HtO^WXH>Q2JL=K|Pogc&*9wK2;9r5%mi<7U;7Kr=5Hs-z&@<7qoQX-?
zI&HjmJfF?~$_(?}VK#4;%^ydd%o~;Fi}!1t;z{Qehvn%x#1-oWO!^-q#FaqnA6V*t
zjPM4p-%$U_X6Qc+=9xCj`u|3qX4<IQz^PbBY9DRiF}>M-9B8#;O0|!+@08wbZ?#TC
zwZAq)?e?(MHEL#A?WxqM+eX!>JxATha_%2(-z2@+egoL0fP7!`Dv(BP({)3#NA1rr
z%&(u!j{(~Jj4<CNnQu&%9<}CXm~RPlRA$-yZqz9%qssi(M4I;OG4`xU=dh(c`+vsX
z1YW0d{r`WSwdcX!+mJCc86w*}kD&<d(ncb?IuWT<C!JK<=@hAiq?EKv8InS$5-JTM
zqMc4ONZTot5TQYa-+f=#=UV%-@_qeZ|MPmCr{(>8)_q^s=eq8Dt!M42E)XSu0T3s@
z7z}-?>uebyCm%aY`c(Jcsx{86d<|wuz77~C&&-k^2puKQNY`1crcUZ_O763Db_bxu
zzX|KC5Rl`~_3>}QI@=EcU*-=pL;SyiNnE(8VrKC-s->aPWu8&E&gR!qokaTyqJ0UF
zqb=~!eu8K#*448(+G@-YZ7nd4mYGF65ITyMQP^ttAuDOMO=z{5fYfTsfvDBK0OD5b
zRGX6DgqxD{0Xg~US}JX|<q+^Xdz~4Qe+!ITjhQ9C4?1c!Mq#P0tw+&*f@sGAa<s)-
zDvfp<1RU+x%n<EPFpid)MVnn;qo8ORg{4}GlFCiVCn(h#K)NaU8i-1D3J9f&C9TDr
z{(3gY*4bl?$d2vk+@}E9j#x$4x!cjXiH-Fvwj-4p?8pRTJD8arouMN;7=^_fcn(Fo
z4cD`wfE;bEk9HfbXImiPXty#$wA;W^>3YV@qRnWaQKWmF-v2TRi+3$_ay{FI>)Ag6
z>3a4o5EXAd5U;aF4I|0x^(+^VlP}QZ^?EiR0#1G*GbFzRj7!DLlK&1mDixzJ`NvUG
z*<`k(b6)_YP38k2N`5a8C*S8>N`5<*Z5|*eU!;Ai%XSq6ocue?ko@~#oIEp2{s?rG
zJR_fcELiW>Ww6h`?DQ``xtFA(CK~=nZtX3%wO6$%<#Z6wr)vXpPQ^Z_gLppu5Cojl
zqs)-gV_=*UGt22i=qM*f;o3{erD%Uaw3&b$ZLE@u_7_Au9s-Vb0y9KA5saf{X3@S1
z9YxD1Y`;q9X|(br?I7CEAhlmx=+5$$J6G;FF9YKCTMi}{_=9+cx)P9+&-KY4#PjKM
zn`sc7d^2W9z9kqZ&&-k^1sx^NC@j_7=8;nA_bki@<Y>ofsq~3&xfW+eTag)}tqR7`
zGP7t0Ku6Is3QH9rFe%k9D3w7fRUPQ4R9%5ks`MFu2C*Wa)34<cXS?I?5JFv7-NeQ5
zl&2lbXsK!X@DrD$6ggBI$nQakOWkrE2jG|M7F(|S0Hp^`U;!KkWLJxQ1ShZnu4r|p
zt0S1f)oZ}mRc7YuEa=EpMqv+p(S~~9B%<8`$kE0syJ$}$+TLx?jJ7W`L^}YCqh)5%
z-UA&)%P8!Dy~s*>-~@WWAoV~>YmJY3pdJt}*L%TGDqR3m06F<wpZp0d*S{d(<$9VK
zl23q(oIEp2zAbc=JfpBwW7|barP1CB$kC3|Qt5L29|Roj9%hJk9~ei=%%ZK)UZbFB
z8HJ_VijqpHPNGx>sZ_s0N2SU}D3RsrbBdLh<@zc@;N|)am=7Pz^)Qev*Whq?#Z7eq
z1ljH-X?jOZXQMlBzgxm(65<K5Vn11*^6|t}_mAyRc!v|0#}msrb^=Uhdp#+ojAOro
zrF-3fWWz}7q=RRBwAxKs$<OPr`AG$Mt6@AAi?_hMu_|ufA(;1BD7*_c?>aD^cO_Vv
z(s{pRv^ti4zYDLMAA4B&#HsH1BoF;gTw<-!S!0!9)_P|ERwcw@6<vgx`EG)mT~sA2
zW_0<A^04F_K=hzIoGbua;G-H{zN$Plc^Hs;aXK^f;w-RKcd|X4WM;ki5p>>i++i~c
zd$Dp?jaoh)jV_-e*Y5^^t{V2l=mH?>#nC{#ZF~d<6KaSFn*k#W(R)6+d>u)?dp8Y&
zlkde0$@c-{<e6FW6QQHz8TsU6Nk!dt(%W1S&$S{}=%G;JzpuQ03qdtNE@D2I6!E_D
z`mF@_0&*VrGeaH^f^i<qERWUDQ67xKB33wGqn0AxS6;s?R~L|q*d2&IDO?N0MSLF&
zCg>NCKL+F?F7rjaue^Q%xn)lcf|GB{49Rx{<K&rH@;5+7$urU-*6O8`e&KF2x?2&S
z1(f(_mDleQUjWGQm-zT+mDleQuiE>}_-imj{B^)Mer6W`VCX1*Mp8V#Z>09oc_rE>
z%IkLoWdm}wIaOV>Pn6g12)Y9TcKdE-i1uDEzIHLQXjec-*Dgk3tG$7&<U{E!wAwmA
zx_0dWqWvQE0!@@#?G`YUN-x}Z0CMu(eDbr(>xKIO1f2X~W=Q@R7?+BfCExf$jf_gg
zC@j@$eIupPN5ZQBIod)km41Y-cafgO(Kcj;Xq$j>shC-`!=a;6F$znSjKHK+PoPw1
z0aB^j08y!i0HIW|q*y=AVUpV{N7y=h1W<}sgwCA>2s`A8w;AjO*Y1f$=-@vA*_M;c
zU`xEePMzvbwksYpv!w-eR76IhEmNTrTZ&-I96+*V6%g671Bg4g?EnoACTLG|0py-o
z=6j;3y!OOG2srsA%#i%6V4OTNOTGj;N}iEVJ{DAUvjxpvib>Ni)}Y%te77rS_JQ<-
z*i68dzDjSD&-sy<+g@Xdua?iOFwy;n1nA^WYkB#2O7`DiYImnjDyBJR*c@AVj@3Gc
zF`i?`|1}3gog*i&G>?TgM>#;rW3A3%jORETx=0=jb&fom;~kshQl4X-&S8w_81{e7
z!BFRj4b(hta?5MGn?r6W#{x=cwXL9+v0DLoc`XH#AKGeLK`&?90J+1yW`+*i3C6n)
zGwZPI%QQsVbr^-qs{lG#UTrJry~s^~w7jMR(ehdW#LKHpzD^4h^a4=<ke649UtVo1
z=mlaj1f2Xs%#i$}V4OTNOa24sD0xP*ypoCrX{cAYA|A0KRvk?7_eT+H19A}ufz9<R
ztv`x536S%6fEn_b3YO|lwmp)W<?$wTv`sJyi<mq_qrOIi(IOh8A~t}Iir5S2Y<H@C
zpVuNVDPsQ$TEtfYxrnu@xgz#Q5vyFTLGTy%YA{3cb-|=K_);V@OFkbu`r;lVEn;k_
zPCCRDG0W{BvJ!6xl=!cyptoof0lA2;fk_dssi3!N-vM$Fzh{O#_JVN{nOPncuFw#v
zh>XG_UIU#J@fxf|gH*(SLq|n?9*B#WG)$+333{Q+1mq&-`66CZK`(UUA>bn3$qdOC
zf^iX<S@J8OqarfWA{GoUEn-V6;&woZe=Lf)6OfBo{z^StiZ~WU><Y+5Jf9izxBx7b
zRw6UY<Dbw`5gCO=`~W&B;#d^XAQkZk=%|Rn2+f0wcr6%A&?1fn<RTXOB9287w?V)~
z{Dv8lF9G8sGPC5%6=-BsL`GV~wGhpAD{-(D@ye_8Oo@LYw$p0?IsRon{)yN|KZSsc
z_c=4fza5M(2h1$~)T=cLx*RYHi+4G6axI*Q;#~(w#d{Elinj!auZ79iM3UESv<x68
zU!uwDweU^|IQe^+A^H2jIC*B4{C}XM<QZx44qdCEj&jfNZg9m*&Ufn}dt_<+S#xvb
z*-%9=gr7XZ>H2{cx#O(4732}oU0|F=Av0w0Z!pe+nPssYI?95PW-)G*27JtAG1Ibm
z2GF-))>6&l1u)KHC)g6vX%^Yn>42O?d1lC>5?HD`*RJQxEQ>zS(JsQsXOZl`EB<Nd
z#!D=EIe!U^`fw8v_2D0knz-Bg!qJ+-Ugyb5^W;@P@#H=A<P9+PWIxy<AIE#@NsTc&
zAa5>pnZc8WV7$37GfxIXN1F?y(31twjgeT?lh?uMnb{XW<jL=h#FLveh2NbgUpr6a
zreQUpcrq!jHx2It!W8**d;&}wWl~&k8k&yNA-PkUGlMIw!BSlo)+x-)l~K@9r!exa
zWcp5d3p#OSQe17>07$m%0is8V={IZ0;*_^#I>67)mJ?>nM}T6>6xgyEkZno0MTcAD
z>oWzmv<Bp6>A(!ObOqyPVP>}62pu&GqtKSOp%YuC#Pu61KLI3L_5)G3WZn9ATL6~1
z3tO#L=$4NG#g-Ycr5KQHN&lw~C*3jwwsZvKZt2Plwww=^O5MWDY`G1(bg$Fz&tW9C
z<h%D~-loAw)H4zF96*lxQ?Md;Qr1jFeHxIXP8hEv3PaQ>V7#X@v#47^M|(P>u$b$S
zofLBhin$GtiuoH56|*uzMvu#qxfj!3ueoV&Ob4XhbvY0X_c<fKWb*!{>H8(q(|Y3p
zKyi5+Tz&+QUH$-Uk&k2>Ts{WKOXfH;xO^Inmkcv=`Mf(cL|QV8LYMD`PF&swmmdZs
zmtO`VTedLrwv3xlYRgS#%K|{LWjAbD3dpv64<@$khAn5^sY9|YRhhw-nqX`TGqYs?
zbYu&o(3WSQ6I*t}mX`s^mQR4lmV=DEEyZ`0+VYgyl6<#9u_Zas>rNR!w&i><u_Zas
z>&|U}Y|8{@u;m^wwuPD5@(Ofh3!~7MlhBDR$$>7S%!!%`+0q1vY#G4F+mdrnsV(cx
zmfnD3OLf@NACPUi4@_*S4qILaWLs7-gDvlZu`SHZmV?ldEsR22Iu~jxVoP<{(hrbq
z84E<VOk*T_%e^|*4{mQcVz%4^D7G|%Ee`;)Eo;EUmS(W!ARyavgc)r41B`89X0|k#
zq#=?mj6z$+LnpR0gDv+1k}WR+kuB>P$=-6m&gpO7>a@n?cnP4`@+@pw0m!!O2V3H|
z+-G4+^#^oFzOdD123zWbrMi>t=8c)zG8npauhYvhqtKSc(1|V2272?h5|C`!21GY+
z$AQEtZ%g6iQd=%ETUG;#Eel}F`+#iA2{5r`0c>eH<xE?eGlMOy!PpjNX3Hq($QDMS
zEpI_5wk&`x8vx0cJwRkj`hyyBXiG7`Vt4a4-fRh`s!D8G3tQ3v*_QLc#Fn+N<)47u
zE&pN$TkZm5TbP+GuRuq(FcMqxT_<gNh@yTEQEvt0s8b(SVUas2>peu>4v?en#0*h)
z1LK=FW)}4g(8(5|J<cdBW()yJG1p@0X9H3(n*mAX^xIM|VdVZqwEw)b3e#S%x#@N1
zV?erj+XqC$Wj>;|2=Ytj(4z|cPBOnWc9L_algmvL^m5z+kX;@NCcV)#K`+P80J6)^
zF@wu5f~C4NY$su6E^mj9dV^8ua-$g<wYc0gL3fgNfaLNZAX+jLfI?ep%`CO$W3%N7
zz{cM9cCh7YK(=KrnAp+|wrmDuTRvk3TfP8eTbP+GDUWH0WDBFvmdl|NTiU^v>j24?
z2Y|?ymw-ZBa%Yv=^0V1;J)qdq3%1+>$hN!;Cbsl~E#CsNExVY(mfc`%3p2B&(&J~?
z!YH(5G<0H1FWB-gK(b{v5ZST{D72;9>{45@+h7sh2`IMY!InvYY|9!ju_X_-8~|in
z4l{!-$H3SYW@bx+C(f{iQE1B@(1|U1uw@D$*|GqLY}p7D+A{sgQd?S^Et>$vma(v9
zD<IpFIY-YHTgJkc&VX!7cV@7q7Z}^Z%xt+Gx^%w>=?=~)wB=*y#FnwJ<x4=a<u@R*
zrRr0fM`+7h0Ker9F<bru6k7^mOYpRw%C>X@6I%*l%ME~R%gxMS%WYt63p2B2A#`L5
zqtKQ=p%YsQVN2><jhbvZ2Z(Ix3l!Q?4B&0K+ib~rM#mCc9)>OD0NIv)U}DR|u;p$*
zw&h-Cu;qR*wuPD5@+Nd-3!~7Mw0Rn}*zz!JsSHTAv;iVph607Q)OuFOk_)uias{B+
z@)B&h8jx+73nsR_1Y0%(vMrx6gDqcxu`SHZmXsn5k!)cU+HyH`V#`ag<vKvJ<pChF
z<t3odmb~XmZCP)&JOn7VyaQWi0<tZifQc>dz?PGMY)kxkokkdJNdjYAn3*kYp(9%u
zg|<8ho!IgYY<UupY<UBSZ21}}w54Fa#=FRUy4`QKybLI|Y=SMX0kSRo!PfX2oK3K$
z!3$&oe(~nH%wS71Ft&x6+42wQ$QDLoOTG(W>WezBM7<eN&j948H-RnkQEx`ne*tna
zPcuW*2@7-<zSU=DQMZDQZuJ?3#axB#q?nsf%#Q)7n0tY!m>CEU#gtq9`!VhHnw#F!
z&H|)c{nvnKxa~kTocDCdLh^JAJRJeZo<0s%<mcT2PuBsmrynzer<=jpQ)cGrap=fX
zMxm#D7irYu>6QfjymL7qd3pzsWUL>bo@V4f@6=kXWBCPAt}SlW#sW&KZHFzl0<tYH
zgUJHf4qNsDvMoO|gDt;+r8-aT^A0n!rQS;#V!GGqJqe?5f%ILXI*Ix#M12V$NBt1k
zB45m}5cO(6j{03@i24ICE+#XJ`Y3c%Oh#cbCm=g1=5{QQ2LY*=3xTMkHvw5k`_IC?
zURD!ifqaGGE(4?maw`xG_XLm)=NCxjrR3??@U$i%dzuec<URcwp5710o<7J7o<0J`
zo-#8}S3yUfG73FSe?_AfPrt?jsRBrzwg;jGas?y5KuQ3%x`##WZGkj=m2CMQwlo7|
zTdo6>1@b*?nGeXeEMx{-mVl)?Pi=uPGh0faqXoh!w59VhvgLd9Mn6EZWegDY#xx-A
zjrejh;Rl$I3dkmO0h8YN0VdoC$R^yv3?_^R<KAFqCd`M9dV^7D!V&0Xf&75p2v%q+
zv_R?tQE&8O<a?tSV7u#$0_%<J*T|Lwu%#j(+j23O^u__$G8K?*nZ^vZ%mhoN-e6|7
ztcQ+zgOS+c7y5z!P}IL5>Z5=h^?9$WaFIJH>lZ|QB_KzAH8Vs#5{!$<%%Xk*Iw~fk
zu$a{mpe*zQ=;$VZ)X{x`sH1NLvX1tf=Z~0HdgB)i_d6i<M#VSO0~)Ra5QcNVX9U3a
z#sk(HrvV##^G?9!<TrIRcDXB<^u`IeJOPkhp2!R?PXbGIv)I1D%v^pGI_eEZVQ+l9
zQUjEzPa*0b06FR!Z>eyRPwy0>9stNuU&;(o4+7)fU}jNIhK_oJQP>-05TNwN2^8~e
zK<bSSKvc{tfw(um#I({Ir!d@JK<bU;Rq6o^*BB`5jb#A-Gwf39jh_L<<@7{-$owlH
zyWC{8&alM0oSvu;nTG?i%U3aj%h!UXQg1Lbm!E)+dV^8u@=wr-%jt=FL;eRKxm<D0
z8Mbr)3T;^n;BEP@+42{l*isR;1aIqTY)dCFv85txxdD)ExtSSkxebhMVP>{0gpO=s
z6x#A9bYe?I*pm8=MoqSy14P}@7bvu)7{J@|m)Vl>u8t+P)P^nP0NIv)U}8&c*m5@@
z+j1{6*m6G@+rrFjc@sLag;8ir+FFfTY^e=fDg%-&ZGgy@p+KQ6vG;T=zrogRhfgkD
z0mYV9u%$O3+j0+>Y_P3h%PK&&<sD|Q<$W-=g_+s%J9K0VBe5mlEyU^{XettQ8$?|X
zkfXj5Y>{93Z4mV|K#qDQGekWbj5k<j7WI1QXoF=G7PJ5R8nqO&6^c0wkcv40h>AH6
z2*s3-_LV=>;bentgW(zg(gxcbh=v;j6!yj;0N)$^tT&piBbPhF<u-uq@=ajU8=c|u
zOMvY1E6m{X3b0h#V40cAd!eJ=U=;R7ne`NPS43R_kfR;|w#fHJS42G#kfWZ&3{gJ-
z#=XJJqJ9%P>J3I=Z*=}hqn6(2jAHf!q}~_<M8%v2#J!QaL5Gvx=!)Sg0a9<Y0ixlC
z0)@R%0^qmP@zxvFJ|>rY!{s`F?D8-$*-m@I<wpS7<r&Q2^5bBs)Emsq<@L}}Z!ij7
zuCkF_?hRWS0+P$UfykCIK%p&#pXgYA$rPC_?*caVJvAA&d<4k0oCcF6Ga0sY*sMdc
zEnS$wmL6bi3p2CjcIe0!M&XiqW{b`%QBOtGF934XJHZzD^rj-}tWVF3x*Rh^T@j3z
z3^R-R0_bSTFba#g8QG0duk}u92Ot&mI1m-H2J+w~^CYH~B{LPhu@I1!%tt^p+&-YN
zH^vpKC4R|lu-;e#C@#-{%PRoc<^5o?WM;tS>YttIa&2aCxjtAbEg5F!@?hwwHyDL3
zFNRKBo&j4{0+P$yfXJ5PK%p&#TT5-(Z?>!k6kF!NmiGbKmJ?uN%N*F!^z$=qY0eC`
zv<72an3*l3pd(utg|@r}o!Bx5wrl_-TlN5vE$RQ&kV9Lh1NfaJyFG5qJ_Qt8o`Wsh
z0NIx8FLXHBNuGl(eF1qV8Ndv-TnfgvFf&^wLr1nS3U`vF+cX%7dOo854<JW<5NwfO
zyz>!tjqQ3iM_q>*qCN+VcM@h6^<d~|Ct(y8^IK#m#e5FM+z&{_Oh?dE%=3VFCs~AP
zWha@B;obzKon#vj4R;(U?2W=N)e_$u&8;_{`I=l_0+(L^WS4h>iOWmia@IFzx?GMK
zT&@VlE;BQiFMy6*W)$|uyGTWPV{xMXnxT&XIqK73($R|(_16q_`0C84yD&r4J-||F
z|6^uR-wqvhG^4OLHbW=9u{cq`cC-VKdgC|{6|)B1=iYb{W=U@>L2oPsq~7=lh=$t-
z6!ylr9cqcc$_%pJSOO?6uRw3C0A!c<gUNQf0=-fF+cRCR%?vKr2V<9+nahKrBbOP4
zE-!{oTwVcNRsxdC+knWH<3OP;g*!C|zhowuE$;$~EvsP5M}Tb0X)tNNRj{SQcW2tt
zg_$)U7~9f=nAvhW7}>%oTr$s;=)4m38e8aq9Q97HMSeS7V+(!PnNgQxW(yrGve1cH
z)E9v9LMId!b2GA&Vy;3lcK}i`j{{LLYaoxvlEECZWY*Y{0pukEM8oX^3VUPR|I`w{
zWae3KECCdkKd>bO$S&^(lO^+kEt&7nbh$P&TQXp&v@z8uW-bo~quyW?y1W=VarpyV
zGJxdrHXyR)I8bOy$!^VIle6VLvn6$pLb2r(Y{>>>TY7=b_sw?-wu}elYxx9buw^0`
z+rrFjc^Nvgg;8kB3Fz`Q7~QW8k}cKtYIfp*eyzV9Bl%J$fd9d?y=F_rAIX-oN&48i
zIw0FJ5Nwf;q->HtcD^5we|TgnGuZMdn5;GYeh+45%RA6z_^#5g^)m`>Df5#?E!|Q!
zNo_eBkZkD$M7CVXNNfRk$kn!N2lu-wz1OwQj>oc^Ch0Hi8MaSngB@l354h*4Kz~us
zMhLh&wlISYTftI&da!|+*>D;<|Jy2I10%7)M|sQ78dzO-oYfM=Gsw{{^U=0M(RSf1
zF4`Vui1ueNE*dk7w)%dJf{Ml{EZQB=$<Eg@Nk8FE0i>O8F%T7P6C-@v|I7nA?bSZ{
zrWo!?KpO5XAR6u)M$!WZbu7P;wsSooKmA?p7lpE8jD)*&0C{N)1Cxz(BwT(3kb7bV
zGr0UXSgNlzdV-m`ydFB*F&Kp|S2?6n%hDJLTN(n=(&!CD8|fGzamw2gJ6vkZm1avv
zK(XZ(*wO=#ZMh3fY`F!tECpm+mNSDbuY<8I%*>X((2*^SLR;GZO19hrTP_47TSfwr
zEmMI)TXK$++H$YiauJ}|G6A++3dpuR3?{ZrfGuwWvMuj1gDva8*cN7H%kR*UEsR22
z`Wz)&Ccu^<fMm;fAhP8tpwO0X$4YIPZ?@bCD7H+2Et3G*mNj5v%M{ph0FZ4t%nY_1
z17lm5nJo=|(-6rPMxiZtKsUzkEmL626hN|N0T9`;5h%1}-0!8ftT$Vh0E#WMV9N?X
zwq-w<*fI;YRR2SVWLs)8gDv&J*cN7H%V6lp7Dk~hi=h)+X2F(~fMm-yAhP8+P-six
zpQX0!Gh0>ziY;?t%lm+A%Ly>CWiD)Kdi+dVnlpnft-;t9W@gJM=*SjEp)GGgC$`Lm
zEgJyImOVgZOZs0La%juA6AJuK_-1v)mFQVOv889S-lQ!6WLtKCE%JTaGg)uavQFxd
ze7!Hv47OAT<Lf;$v*iNlGW-=wZ_*frw#<f3Z0VV-H)$^dk}d0i!~?x|-OI>diI$xz
zwWYb)@*SYqG7Prt0c2awKCNeqEyG~T06?~7AT!u91dMHAX0}X*j%;BR+OiEgv1J%+
z*$qgxB*pqTgWX?AcmKy9<!Hdj+fpm88h_t1*lf81P;7Y&wp<O!w#)^SMf4bK*$l{g
z%V*4B%NJm&E<C$$VP>|Z1R7$x*J-yf3T?R@I<e)kWW77Q4v=hl0EiaROF-cw%1tP>
zWrEpqJ)qd~G;FyAkZpMxOl)}?wtNf7w(MdCTXuu7EzHc8N{MIK!YH(5G<0Ii)3D`V
zfMm;TAhKl@P-sg*QmHM^m@Q8OiY*Ia%kzM2%MLKHWg%=SldMCsEoGU(mb1Xv7G`G4
z1<<9Jb_=7>mO0RgEem1GB0#d`BOtP6A5dsZQA(*TYt5E#0L7M-u;qV%Y|GiHdbZfI
z61MaQWLqv_23zvM*cN7H%Vg-t7Dk~hUqL6ftb{E;0+KDsX&N-y(ikYTCAW-f{FP{r
z*)jr9Y+00|SE7-CY|C7*MXr!pi&FGTv;~m2+|QZ8mM_85Xm4R=wxncfh#95b!YH(5
z5OiY8q7=OnT?<II+z%uk=#^+8BfsSq=IEUMDN1%HT%g|o6kB4cYRlVzY|9Zau_czO
zw$v}HL$WQ6nZcH3U~CIBv*k+Y$QDMSElZ&jTVknd%NjtkWd{)1a*C0+rKFtZ;Jc-b
z*^*seq1bXZY^eyyw)6uNTh4|p69L(l`<TI&DPU|1GqdGQ=*SjEp)JW3G-|QsY}j%Z
zAlcFyh-?|mNV?@L9m{viP_yL?K(VE#bqgTdas*6l>1o|kQHNw(8Z)zQ0b^U55i?t^
z1S4A*g|;k(PHgFE-2zCq>;NKLPBHSfbgQg6`1_VgX3L#`V#}?tWfCCUvIcC4-&<~l
zEe8O3%RS5twj2XXrM-oj+0vkjhL~Quw=fEAxdS?}<<?X@)dD12768$fyAep7^0wqv
zEwyE#+42ye*m4hSnF+|Yd;%u6+yh%q0<tafYC4TD*pdXswlFhW+CoRRFbZvX5IV8t
z9@z3EAldQ;5ZUrIP-siR*&46^g)LjmmYINJ%LA}w4j|j|Ihfe;0BlLDP8Q&&I8&Ly
zmP|0Vg_+sX1v;{Yk=T;&I_ZiUI<G`M8Bt#i$WcEDR^(2~nvAG70CF)uVTP!S!T3vH
z%q;4Y(5033IHRzb(~+GN^8qaVX9200?*h@%-^B<|Zu{0$6Ry|X^ty97AU(OA07S#h
zW#pGkty(&k-(df+-WUWZE>DNc!vWdl$G~KRoer1R0rCdBkr`b66inR1lUruy@+s&t
zN?m3Yy4=6ErXns+hs(nN$>qC%$d-AG#Fjdxw$$&8yOcqIV#{2!1u$X@nAkGcYyphe
z!pv*|i`YUev;~Z9AyjHhU9x4a*#gM60I@BM#Fl!cw)6{a0Tf%FH(LNBwt$H(&zmiP
z5nGs<EnpE_h=sO*u`Ps3ZK+STJa4uDvMoSt3nOn!;W;{%zZ~CgwyXveTi%5&?*p<e
zC%~5Y<@_#eY1%-C<gKbXGuYA^j5k<jX3Hq((n}W+qtKSOpc|vX=;e3=Alb49h^{;7
z4K-wO%G)v>z}xb)*|G^xY*`OmwgR#(nT>Qfv1L7M=?uuWbY})zdV#Sm%*>YCp(9%u
zg|>VQo!GJ-m*X!1$(G-M$d;;&|7J@Oz~}CAyw+^_0Z?rD6t?^f$hOowSBG2U3-l>$
z83D+)T*C~ui~?g@n3*kep(9%ui7ojqfOSnY7>T+VQGWu+Q3p*`SmdKFM$|0;IqEjd
z5OoJIz8o{Ns7FDU<|e_TGDcxBe?@k;`d~iA(vKr*x_;IHqNU%D5iZC7!L-+FZhATX
z0FZ|J0f>f6K2L24mrSgg0>5N_u-@nhC@vp`%RK<u<-5RS$sC2tO99#C<;>vn>tNg)
z%*^Gz&{1zN3SDlWtEr6fwj9Nhxe$<+%t#<wGE;#<TXLK0Sex9EN$rBm@q2(`OG29d
z%JmI^Y|AOIA|FXYn*Pr9^IGVTY)cDfu%!(cHy<;z<r?VH+$3l|M&Xi~(NgD?s58^_
zJ0zY2<fylRNz|EX`kfM|06FTQ6=i~`lfgJ@W)^iz=qPGNVKJ8>JL&O+G`%dm3rIa)
z0z^F?AY>F%epsmx)5;3W#Bh%S(s0XwXt=FFRh-Z6kL$Q!b=z9g_ut^x#CqdmKyf)Y
zO&=%?0%VsT1(WSGH%%WXZ3JYOw=jdtTfunAFf*4^+GvRBrM<x@?2V>vRVPumK-8@O
zIqDn17P$elS|I9qfE@Mn%n<bgFzyXz7WH=Ms5cmey)hiwjq$<cqL^a=X`xRAqJ{nn
z5cfvIc4~sOS_=%<7La;l5D*PF0SLpnZ@_4;V=Z@gmzP*?^a2!@JHh3Cfb8=9VA30%
z;PP5PcKJhQaCrk5_Xaa_`6P7I8;ru<sMnEtqYI)w7m%aA7EF4h3!;7;kfWZ%3{gJ=
z#=XJJqTT`>^#-Fb>fbw2)ZG#FDL{_8U1t?a)ZG#Fb$}f8SZ0WN92iH<%%Xk{I*OW6
z*wF(!XfV>zozQ|;15!s90#OUT0K^?#6ZuL<cfoMy0a8cz2cqF_2IAp<MHVt#cMKQr
zq9N08wSZ{2o<QLO$?K}8`M&@>!4}BffU-bFplJUF<f5$tlcJ44(e?xK2fD+|P_*B`
zq!{@4&&-Nezng}bQM!3D3R|sychyPM1&F!{AV+;QSdrha3J~>7KyJ0!%n<ccVBBiV
zEb8^pNp|{WTSj54jYoF*K9~_{dh=+ITJ3S@q;mSDl@*L|FIusOnlMOn)8PzKZ?u7q
zh8xJpUsy{3lAI?mT5mKwpIp8UE;j>Ymq&vw@sV5ym!AS;m*+8q%g=-H=E=-l{x5X2
zc`^!nqgpSDdNiW01ISSi1Cx#(ji?_6<fx}JL)5dtxHp(t)a#(5-e45=M%$hm^-XG_
z7PB`X^~OjbD&`}M&>OXS>u@*w;YOp?ngLR8<N?ue<AG!=ktZG(=ve-9%4X}0)qt`B
zXTs(80ommfVA2~i;d0Xpbx3x(IWxH28jO2`nYla)I_eEZq04VUH^$pC6Wi$qKyrBx
z5N)UFeKcfo%G**5;B7f!wq*2GmDsWnwv+>8Tl#^CEem1G-GFS%z06?C{a|bhGqdGQ
z=*SjEp)F|_Y1CrNLfBFnkZfrKM79hC3T-I?@V3<JijQyA`jIWK+L{DpTZVy&Ew93s
zM*!KD8O&hI<6vwHGqYtqbYu&o(3UFw$(C1POG7}ir8f}SG6pEL<q&|krH|Qi?f|l7
zC2VN{$hM3H6I)iomS+IjmgktkmKVX;7G`G4cIe0!MxiZ@E+$)6!j^V`WXm8RvSk8L
zXiF?l$MUw^Xts0&Z0uL*TG-M9kZrjOOl(;TTb2T{Ez6m~me;}97G`G4Ug*dcMxib3
zFCkmj!j=mG$(E5oWXn{b(3YG_bu9lmcb3_5F`(G80k#YRWLq8ulg~06V9Q28wq*-5
z*s>LjZDD4%qzu#$$reUpOTH^$)5}yRQEx=ltpGXd8^IR&s&7Qp^8h*O=b0hu1z>zR
zW@b@uhc3-cf+shO!eS0bc4K@n8&J%#fK<$>Kvc|EfKW_%2GTHJO}Jij(?=?80qJr)
z2#AK802KB{Y><xSdt<frMpr;_`E$738<1VT2TXe7bGW<;kX?R<8C-rJjC+Hbx%@kH
z)EkV#-l#r=dgBX3T@R3>z7kA&;|oMR4UnUr$qZ4?2IJmfW>K$)j(USp*c<%^Yt+&k
zpQAU10a9;F0HR{f1LEGOe7O!Mz3~NxYXC^S(Hn?{8v_*f#vuUT8{b)PG#g4T?}W>3
z0NLf6z@#^J!sV9$+2vQ5!Q~ZT+#Afy<-O2RZ!ijbqs%ahx&%>I0OY6#fJtwZAnJ*L
z9Q7n-i24CA?hR%Z^_$S8mF^pi!rth7g+?vCu@lAY2S~j!28fC|4TyUqb+`^Ey-|YU
zDgjb&v;m^wh607XQ3BvEwMpIV#_URR`A4{12asJJ1}2x<AK~&Nfb8-NW^nm&FzyXz
z=JI;zs5cmeE>{^rF8>Hy8Um8by@AM<F+ia$hXA}SP0W^a3&@sZu%!hc+cFkRY&iy7
zo&jWAo?`}EUIb%Xn3*lxp(9%ug|;-hiflOsTiOAVErWo_mI*+iEwQV0EN{y|v!x?o
zW48*jPQsQRfNaZMU}DQj*s>ImZCTC?w!99;wlFhW_CiOtFbZvHe+}7k61H3jNVbdw
zB3q^cg|_7Uqtuo=%$AD)#g^oB{Q|_LfNaadU}8&hx_$xTZ9ul=J!Y_F9T?lf%xw7`
zI<kdPXiJ}K$(H1Fy(bw0NVbdzB3qsU3T>%1vecGm&6X<w#g=T?ay1~^G8as2$%ZYP
z0oj(%n8B7Wz}OaMW=qN_4UueN6xwn*bYpyvXTz530LhjIfXJ4YfI?exuPe3X1GD9N
zK(VC?Y`F!HZFw0?Y^efUz6E4kb}@r3yTRBNW@byJ(P!AgD70lXbYe>t*zzwxvSl_9
z*|G{Kw58jaQd@pDTkZrDTN=QYNq}t28Zfb?0c<${$hI7223wASu`SHZmIl|KVGE<s
zmOG#mTN=QYDS%|l0wA(wBT#5d-q=!G%5}#D`XNBEr4?+M3COm50w%V!f-NTj*_QYX
zI*l;ck_5)KFf&`)LYM9q2R=SA3T=51I<ch{Y<UupY<UBSZ21}}v}N3lrM7f1Tb2Nd
zEj?h%3P84HKbY9k1GZGZi7dc(wAN+@Tk3<cEzHc8!O)Q{j6z!$LnpTMfGsNl$(C(E
zWXo}&(3Z6~YYzUKIj=HXeghO+9)~R_0oj(8x9Hi6{4V!6Y`GSYznU|K8Em-;EX{WT
zzM8|#Y?%jLMrpS&3T-(6o!Ihtx<30p0Z6u-eX9m79_ZIax-jxjWefhP8vnBu)6AAf
z0mYULuw^zN+wuvR*s=k(oC0K95^mFJgu#|nFt&x6+0qs|vV~D-%LCAfEgN9V<A7w#
z>p*17c1H3Hf4s)K+u5?*Y*_{<wtNL!-UMV@4uFX*U%{4|x03}}MD>`#mPTM~3p2B2
z2y|o%qtKRx(1|Ty!In1w$(C(EWXm6noGrlxfqPlwUXqIL&}?QqgLXQDlJi~Dego(Y
z3J%6ohPwa$4-Eb!k8rHa1U>nB(IqDS!~M4d7;n8@n8BVNV7$jLGkeCdPQS>=$lH^W
zF()DR1a#iU;E15-866e$2ax~V8ywHdaVN&A$74Y}9(NxO@6>e8cW(~v8H-i$Z|C{9
zcg2GFP8l5aFOywr!EFEXihp_Ay|_Qot*5^2P95Dd7E3?>sf1XeKeb~#ql-KK4gx=M
z>M}Q5c7I1x@7B}$`*1qPQ!XTy=2*({vX?rv)(w<$Jf1k%9XG=%=g8Z>j@9y90EH8E
zq>*lA{^jbJ^`+Au@&P;;Z!5p5r1CwgkfJ^qZzol)35cr7udS(7L(b|X<(v$GOEQ*>
zZhwdw%K0c5my?;5^F8R&{dihVM$+xR1urj*MB786Edb;-`V`m;?j)CMZ;ARtK#qC?
zGerFf7)Q;_qCN#3Ma?K&ql53&sAWMs7_XEv$wf61ke*vi2BI~(n34NxrZ=I|q=*Rx
zVnPi-Hem=DO!$YGFbR-Ncz_v9m<q-wFf$X@Ku0Dp3QeeSpGJMPR;0d|P!Ety=mkV3
zjAG>X??V9NT%)J;z`fz<f0Hdw#p{VJ;{dsn^L!^i6>lhZd<y~Fv5Oh(*bT;ZFf%)<
z-mg)R9gIRd9)(VN;;DF9v11+}+3_wA*|Cd}T;(6oIjg!PDw!Q`12*=4FOKJm9UlU+
z9fjVG#qm~R$9a==D7K>oGuY7vjGq)TGdo5>m*%3t!e$iOu@XA5V{trN>{t&-cKirL
zcBD_ykOg6h+zf`~2T1b&0_5bEY4Vp!^8bT?li$M($?pT><e6FW)gIKyD0xO<^0z@J
zOJi}ox+H%mASFK+h?0MYk=)u$)j6jo$71JLFMJCqcDx#IEq43>$aa)?J6?^q7dr+#
zq(iYCmokGLgTUAhW@g7^=*SL6p&egB=f6eP1*1!2FCf{G`mn}Fb~FaUx5&zNc|RDE
z?<~nb49Lmn)Rg4AOY)~7;N%k?Aw!US3K%EP%#!a29VO35lFxSu?}HA}4wPsQ19G%^
z8tq_-_KIl~8Lr18m?7G0z&KiF7VRwPC|X8gsSZLnMoU#oI`=3bmFlcVH9jg;TOcmg
zOfV#Wg(N=*kdrUe<VQ&IsngF)K9d=e&jI7)nOX8Zp`+v(`Q+XAUeC};Uvr~%a_da)
ztd{w(-iUXWzAJ)4l6fQEMUq(v$jOxGSv@3~vu2)|OciEGraBm3kC<691E5QHL1P1E
z6!zVA=n(CN675bvj<!}U$+4eAd*NeeM%#}WqRj(i>zP@!_d-Y3GYb1|D|E6&yb-S|
zeYXpc`YxEI@loH^1LD5B2Mo#QN%9W>a`Jha{AH5-F$j2z_>&ouKMBUkGqdEIKd!-2
z@{Gb#y$2nl9VyXn0OV*3HQF%}ZHw7DI!D`v8KUg~#?dmfXs?HkqGc48Y8`ZA{1)*>
zypfb@6CjmpKM<8F>j@2+OEnq{$=@i+j|1f7mud31Nb=u7z{!8l49V{W<K&rH^3|Ty
z$S8S6K6&@)52BUs<KZQCDZXfqo+<IKi#L?`F9qcIOMLw6;!Py}_aNZ-*D*u<AA|9(
zz|7)51zmdSuD~cP-mOn*w6f0D#mh_aCIM3M<^xgj)&X(x&UreLyxx;G1w`_h{t?1R
zus40vti!$OTtGIVh#5?H0gO#xW+r?E9htyLi#UC*hP=cTaikTo#xs=1ws-|8Vtqg^
zVoq&W#BK4)Qp7nBaQx3OL;TNyas13I{%z1v{EWgPHkn67+!jxkB6b9%A`S(j=d2Tf
zxQIKzq=UD`&z9uB2jt|tY4Wus`AeSFAUOGaW=Q^WFkXSoEcvO>rImItBQ0LHBAqnu
z9{=2F#ajs|@gH`#m;O1{yMP>jp^yJ?Tz~wm(Q{|U-;^2RZw|)sGqd=whK}NAB*n{j
zf$dtL^GdWw5$%tF9PKh6?NLN~>B2Lk9mEXL4h7?AnOU?CK}XRt3Zp%?n4<kNo+93!
z0OV*(e6)YYGo&|0zI0}^W0)b@8^JhQW)|(U&{4FE!iDh$YVa?=%N&mD*L>Fj(zDZ_
zfoOLt^Sp-43*%-mG*%T!{$GHcd`=x{tm=~dUI_Sk`hI3e{xFy%g)cKOv*hc}*T`JU
z+YKM1uvCvOQXLvgqn!=N(dPMRkH$6H7;4GOIFT8mO$FmUjG0B-9=i0>Xc>jk&U%re
zJ%UZ=X+Vy)&_{bDuA5Fe)(J<O#SGDw1LMYGX3_SBPHLc!DH(;)?ne#8+atI>90BBL
zmua+meHi|N=E%`r#SGD23&!3uvuI~SN8U0Dm%(1><RX6r*N5K#>H1I!cGEIw4`j>W
zZdb<L*ek~QRdp1r>SsV2t{i%ehHC|c;WGTw>L1Y|{x_Pt9V0mw^mKzH<t<UOMz}to
z?zZ-f70VN1&jEUigQOsy@w!`8>j4{jm>k6A!`Ux@QlXSUF9N^O=}A|{<gs^&uTV;$
z7lDG8&#cftn4v<~fpI@EvkJ|Dj{1pF*iY3~=)4kbI-;!u$o-U4*F~F-Xdi}v`)N8e
zL^}(Nqh)5%u7i%EWfVqx`Rf#IW}p{=0zi(on~yd#(2KxE2sqj;%n<EXFpid)MVs)3
zMnTas3cG(FY9QU866i(XPe5wds!Me|YS&Ic*8Tae!{&jZv2<H~5s;G~=aWwfbXzU|
ziVn`nS7L_btAX*-XJ*OwgN~Lyqp(!lU(<QfSQ_n4K#q2qHkL+v;eXDIwjVP@n+L|x
zGP7v!g^r?S6h^yh8AY2G=$+?#fE;a!k2WpPi$L!3Gox+A4AHg&<HllU(T;^K%~cyu
zR2YTPmU}f4t$s+T0?5(k)RSoSB5)rB+zFGJA=-z)*jr{6?HcIFTSnnBC<7a18KedJ
zA>nL5S_YkgXc>$EvSr{e0(H?t6a1!|j#bqHkcJxyM8n+$#210CSoB$bxXd8g{aR-%
z@0VS<7P$Rip1*bTPSy^RWs4v1Ci!0*+fY6r`#(<4(rxHn2-yD*n8E+`VC+9L^ZyU%
z$bUv*w_LrFqOF51el#FQTjZmygDw7Z2sqm9%n<E2U>q$oi#GEuje??O6h@oBnxd_X
zXomrEw8cK!x`_582sqk}%n<DsFpid)MSB`Lik4B>+ryF79lm?&pc%&kQZqgVM9ug*
zkoC6T;;XEp<m&{wY1RVd<YV=vRJv(Sf<S`BBl?HI#1JGu9gLG_X34MO@$_DlQCO;^
zw<+4Xh&B_DqwVISt&0oNoe*%T?qP;#?*rpalbJ>PCUj}eKWv(e!ctwnMx(vOm#QvG
zH3E=IH3f)D^%4+D<-Ymp9UbeKOJcQs`d<&&*iVp)&fNmYc8v3O<f3!iy{kj99i5oL
zj&5LV2Q#zdX6VQcMxh<=Lnpg(F6{UWknH#si0r7aRzv2_y$KA-Yv+y!<m8JqdF|Yv
zAmHQ=FhlZ(!8mzlmVEv9G%`w_QP{cfLnqO;zy|&?AV*u`qiqpn%jf9_-`CMO+DDin
z+8JOREi;RDEp!wuBOk46wGVXCLid!g*jjBXpv2z`t@afl;+LnL<v!H2U+_KI3oX|L
zkn`xl40-eh<Nbn}<#9W7>24CNKt^H9ZG%o)t`}NvHz2iK;yMkQo)$I);+A_D3?^vH
zJqE}{%sIyuu@|<VKOo@bPcTFBv5zzePM(=1-xNAZo{<)@1fssKh`X(bZPx1y5`P|w
z*cp(EcstkvU&K5VaXujDv5*<^SOUg*Fta?qg^u!I6c(|~28~*_(L5Bf7a%>^x)zA`
zjmbb<#Q%Z81TErEfLz3GzKD4!;-w#J5S;uVW=MW07$?unl79#~N}iDxF?XX*dd#is
z<F1JET;N{78oo9|@m%0}K)(Sv7|0#uIG?~3xP#mXg`|B2HlWV{Iqf1nOYa~%Z8|gU
zZp@H&PcTlKnWa4*I!c?7)I8ti@C$Sj?J#ueZ-5+av5$5bI`pbf&W!e2W{7q)7)Q&@
zqJ0`Vik49r?eAN3UWs-%qCEx3(Z(9MXon-(>pnd*+Of<K?Km*r+?iRl&q0^w{KKZm
zC|plRk(F$U!?2zbHfu1nDb@p`_0$`P*V8LtD3z|K*8w^CT%Y_ftf$IwN36viWF0VB
zV@UoSFfJ7{Oa2lbPwyZZg{AsWaimll?P@@dcAS<<?;2`<c4n#SGefkEz_?V*EZRZP
zQK=Y(rTPOUm91|$N|l0WY3pkMM5XEjgi^(la<^&@Q{5FI=X|%B_~(g_0hags#^@kN
zK5Q-kl!}fH^oQP-0<uR%-lNfh{;*r6&(HLz8Z&rQ6O27#W*+51M;<W>J$ep0Y3I@K
zXc-{2^JhTh(GehS=K=rKG?08%`NBs&ASYj}$=8q@wGSZR%fotRNPZI-C(q21KLH&j
z&&VeqODf!^p<d)>J;zlvy>qvCET^HX$s<8V4|m*qy9)dwOH56V>$lu429*q_1^O&!
z86aoa%?~v#&}TW-zSOZe!<x*HVO=m@u*@vOeCT{^*sU0a<$VP@|LcxiF#0U#Z9ppT
z4j?M;DIhNIu&*=?B(IO%t^wra$7%BV*zGe2IQcJ_A^ESsIC*B4eEQd=g+cO+w7kU-
z`SP~6@-FjX&C~ME{-$)4((?WZDj7bD^2T<klrt>xLp_V~UJn6hcoQ>Zcq<rZ$jmZ)
z5jrX_qp-ZEpp)`Gi}IHFR->izHUXmY4glivz6^%swY;wZa`HKiBzY}w?VUOwCtsf#
zl5YgY$uqO$heAimGt%;we5aH8^7gax=J~KzYk3!xsKAd>THdr>N+rWJC~poRXISWm
zT7&Z52?1w#4>M$V9~fuI%rbl(I?9kySl+DvX|z(_H7IX&Kq_x1AS&;bKwRE6U`Sre
z`vD*)zf6<Y@;3iogW%*_Geh$2!8mzlmi!p#D0xO&-fp{f(w%O!0$1M53KL_o8GwHK
z5B3C!DcOI6sohn$gN1t`WN5+$dvqEZ@h1own(!7D9)OUc3EyVnK?oU|Fm*3Y{R@N)
zO*oK+hahBV!g(w_3?V}k{=~vxA!KO6RzFgVM<8Tq!h2YF3_^w`+{nT|AY^F5vwotf
z|ALUA2@6<w3POe^T*AWB5Hd93Z!C-@Xqz!KVfTF$V*nvT6F$ts1PB?La61cAAY^F5
zIzQ9YX%I3r;SDS-10h2bu4G{jgbYoXu%D(b4<SPn4q#zL2pO7i4hyS6$k2p8u&^$K
z3{BYV0L54zLWU-sz{185GBn{j7M=$oLlgeM!j=hf*`ydcVJygZyLWJvdr7KwP(yvw
zeY4}#p6<iN^;;8S1{?W{*M<tR``rtn->VWgR?xO&Xc2tE!c7n|G-0J*N?l6)1VV-;
zyqblZA!KO6r7YY6Awv`X!NN}=WN5<k4^fQ85Hd93G!}jaAwv^>6&03|0x>jUy~8y1
zR*cBdgg3G9a|jula1{&x3n4=jCjClNe*qyw6XvmS8-xr^_%sW*L&(sCdsz4-gbYpC
z{0PPP6@&~;coz%5hLE8N*R${&2pO8N+)<i(2ZRhwIGlywLdejBi&(f5LWU+h!ou$$
zWN5-J$0)`U2pO92K^E?Ukf90x%fkOb$k2o}f1|0thmfHO$3%s?6d0QD^{B9-tQm$T
zjQt*&x~2#jn((5iu%QSUns9bh*j$7RO}Hy6>>xshCT#LYB*yba$k2qhM}_@F$k2rE
zv+xHP&d`Kq{-mjQL&(sCL!v^h3_}ynj|#Oi3{7}2D%8p_G-3PWkr=fy3{Cj&s8B1z
z(1f3|a1XL(Xu_(0(bRh?Xx0o(cx_auSu-@@vZzqAW@y5{qC(A@p$U7Rh{UK_Gc@50
z7VbsX3{AKrD%7kQny|r1ntE>q&6=SJZ;1*uYlbF#J1W$y8JaNlRAg$+nxP2?MuoZx
z8JcijRH&<vp$UJA3Uw7SG-0dLkr?x3xiU22JuLhQm0@VYjZvXihM@`1iuHAOW$xE8
z_<OUTP#K0MEQkuVG7L?)Br4R(Ff`$BQK43bp$WUkBQa`a7@F|ms8B1z(1hDrxDP%u
zG+~{9rv4d1h9<lrDpbQ6ns6lx_hUqcCQL}6srO?`VQ9huQK2p_h9;a76>13?n(zk}
z9zcu?P1r1vVmyEt8JciHRH!jBG~v3aP-A3h!ki?U`XFLtXu>O^Lahu#6E0xkFBp-b
z2@gkwIwC_8c1org4`D=xCY&4<>WB<YxHT$N9~qjkdP-z!^^u_ouVdk1#K_QuuSJC#
zBSRCOiV8JGh9>NjN--X;pfNHu;Vc&ZiWnK1@Vlr`V`ON;#%VP5uh{k&n(#Ij9>LTM
zO}I8H)TtSoFe9C&K8g_;ns5*ck3q=LgwI8VIyFNR?q}g|7?Gg~+h$OVzhPZ7G~uME
zP;1T5gqx#6tu;dvR?eiUe@BcAO?V9p|A3I8315i{)gFc>{F8-$Vnl`}>{*6lJPsj4
z6F$nqzaV62!mn9)0z!r+te-_wpM;R13CBf+nl(ccu8s<|gbYoXoE@23OUTfKm$2{@
zVq|E-xhy;lAwv`HWnt_reS*Z$ge`I?M)z}sawo;mgm<&hePf0Q8Jchd3lkt@Xu|Sk
zY3f7>8Jh4)7A8T+(1eSlLM<Uf6CRBUwS){!*tHzRn2f0zns6!$Qy^sMgyqM%=Y5CV
z7hDok&yweSmzAf{Q*jbQ8~s@px<8F5<})<mJ{G1!$k2qXE6~&#5Hd7jVN|G|GBn|)
zs8Bs+Xu^tT(bSoknxP4=iVAgVh9-QOg=H`zLlgea!Yl|Gny^PjiZL5Ph9-Q3g*gy1
zG~t&lEDIq+6V|OnQ<ptUvu0?*8(CNmBQiAMTT!8o$k2p|m1*ko7?Gg~FJ@r{2pO92
zsi;t=W@y6QEIbP%GBjas6^gMUgbYo1Ckrb<$k2ozv9L0P3{6<JDotGlLWU+B#=@!)
zGBn{r7FL6hp$UIw;n@%}G-2mz6k~M=8Jchk3u{2g(1f3}uqK2IO<3b>nz|N*3{5zi
zg|#7MXu|)nunvR_O?aAxbs=PE!oJlh#(EGkG~wectPdeW6PB>>90(bj@Z1_Sbpr?)
zns7V|8$!s?gzvGi5rhm)m|2siZiFp`p$P}GurWqtXu{`NcrJttO?ZHXO(0}w!gjSN
z#wLi7p$YF}VN;CA(1cr9cpiidO<1KiP2CJah9>+63v(f4Xu?-n*c?KJCOpo<77#Kt
zVXrzAV++{B(1g=j*b*Z$G~qW<p>E9#O?XaSnz|K6WN5;hqe2~#p$XTpur)?xXu_0w
zG<9oa&CrCGvak(CWN5-?SlAXqh9>+mD%4~cny_Vkim@F=WN5;PQK62=(1agHg?cB&
z(1aDvp{d(rYKA5p5f$px3{ChF3p-#$h9*45!j2F!G-0;}6k{g{8Jh5+s8C~MXu@q#
zq1waHgtZ$+rq<723{5yTD%2;i3{CiERH#p28JaL?6q#C|z%n#pe-?IzJq%6wBn!Jh
z$k2q}v#=|K3{7}mV~VjGgbYo12MfDH$k2ozvakn)3{9AQE=_$tgbYnMl!ZMZWN5+{
zS=b9gh9*43!rl-vG-1ak6ypUDGBn`>EW8jxh9>-sg?%7oXu`9b($sw+WN5-sEW8Lp
zh9+FW!hR4kG~r1W_J@$62`@a4VjKV=LlZt073#WXXu_RQp{{F&CT!G<roI?cGc@5p
zqe7jUp$XrO3Uz9RCQQ$zsq-*3Llfplg*r7u6Bb2<IyFNR{>;KlFf~IHwrNf=UV@7z
zLlfT1!b>qCLlb_&!hsMnG-0I{H1$AC&CrBbv+y#E$k2pKS(pzYLlgcH6{_J3O?ZAw
ziZTBzHJqUdr?GGlre<iuuUI%3LWU-+*NUbd0wF^a-o(PoA!KO6RZ*d4&CrBNt!e7Z
zku^gT=CN=nre<iur=vohnxP5zM1^WNLlZV{Lor@~sTrE^uBcF_W@y6oEF6Xr8Je(M
zTbg<}gbYnMJSx<w8Jchr3$Mh83{7|>D%24fny^bdit#Fp$k2ojvhZpM8Jh6FQK3%F
z(1bPH)6`d^dl;H<OjM}1TntV4dQ_;lTntSZ>kye*Z@Czn@S><tZ@CznaCTIvU-V{Z
z!d+3Jemep~6E^7>iBZ2DfuRX+j|#7p=eP___<mG4N`wqeSf*2C>Tx1uXu=^;;T<Am
zXu|nX;Up0<G~vOh@F5X0G-3PBkr-!*kf90x9Th$!LWU;%G%8#mLWU-++9fjet0H7*
z!fT_#l_F$l!evq6dm?0L!oQ-zjUr@d!romYF@7#Wh9;a5748rrLlf?Z3U`Z;p$Qvw
zi%fk`gbYo1OH}x~2pKxzS^ft%-i``m`c2VHO_|y~GIyFPncCz7S$PfiGlnLd$HIR=
z$k2p8MTNTYF*IST9u(w1u<<c8;XP5IZhQ<)xG^f!jgO%T&pJOcwQhV2O<2IfYmqfW
z6E2AgHEV_@{4FZfD>g$DcJE0sj>OaqP55wBs8cgE;dT~|!iWq_Sf>|FJqov)3{7}L
zRH(@?G~r4XUWcg}nlPa^O?@4vW@y3zQK3%F(1deXI2uzkG~o{{90MUk6E?emV!R$g
zh9;a46>4P|ns6Np$6`c=Cd|2zroI6}h9<lsD%7bNns5ONZ^VcUO?Wse)Dan)uu~t3
z@kXp`h9;aG73#WXXu_>5ya_QfG-36=H1#+L8Jh6As8Ew(Xu{W`LTxjKCOj1tYBCH>
z*yo~1jJioNG~ukMP&X-tCj2fc)Vni=CT!e~roI{WFf`$9QK8zy(1dGQcnd~kXu^#C
zH1(|zGBn|!s8FY7Xu{`M_)m<;&<SI~SpO43xA~V6|MG%=`P{$k@GpD)%XasYl=HL3
zS1ZB&X*KurLuDUHPK?!R?0#DPnRwb0?sy6W{^vI{KTn+bH<Udt9xE$<$IPFzES}<z
zci<fVn}{=4rakA**#+c($xr56iE|01JCr@%eFuX-^>^r8II+xCabY?8HAZ1?$I}+L
zlN*A`6dy`hjI!^J$I_NLT~p{}-i;|s2>D+Dm_1sfpYEgI9G6Ur{qa{i^)jCN+jx%u
zOK{B(P&kRoo?w;}&+w@%2a^yh#*@U(j{(zse)}bEM&8a?aHIQ^J4v|*bsATcSP%D-
z(l<3R)(g<5ll@$x{|VM>fdY?Sa6fij&;8JDQX%LNM|-=o%S~}-$If+s7w*Xl-h^Ct
zSZ25mx{wEbES6M)f46Y4T<p%3Yz_iS5o*UZ?UY~0iU#p)-!SRS8pb6>W<JGiU&59U
zy0~JQeN@QI*H8@a0ipl)=*%W+ZXZ7Rytu-NfZ}*-#CRW|1l~sDegI6W&^Dg#<1TQH
zWcG@e^&LLl8|C7Z=~qK6(|2_Sk8lPDW4z0?4r%_)ol^^)Q#n7SCC2gr{e-ElgPeno
zT?6LpmfA+y6JS+6>k?%6bGNdtr~Cav9v%U7>ou0zJBZ7OwGZpma&{kAeE;3+iLqqY
zI;rt@<@jqC(I0)2V$<Be?0<z#TmsE{#bOE5F~BG8t*#CvHg*Sj{_TfiF+XZztg8$*
z*uNQ@=3YDZ)4>`#-x6b&(p0<LyF>A=O|{IAbzhS6>LLF!+rPZwU)K7U&Hm*Z|MIne
z>D=3g^sDAH$=TV@0SRd#pbsxNCnc$w`|m4Y{-@3pu8bwNa{ujcL{FY8y6oik?!RNe
ze2XRK#*@3a|K7-~c|7q#$8G_WZ@_O6Pdfi^Ea|aWivJsm&p_w<Cb5;s7$v#i43c!I
zdv6<bzT82BSVFC%rHPHO#M%N%Vof!%E?}J4MleaNsU~(Dj1xP>42cEDbYf17nI+Z~
zI!cUDnAk|@B(d{i3Cr;BQLbXQT4L(~C9zhT*cLEOZ0K(qoFvvt6PpUgiA`gM#AbqV
zV$3YDx1poN82Q9pd4AVGcDgL4SQdGJl0^^AVlWtIu@~%FU+W&4MY%t8K+d8fGh|T}
zjI&^7S@eO9vS1`x1c%&g39&zQ=JhU>XDpSffRf4uno4ajPGvdR3qF+#G?iUooXT!y
zNaZImPKBAJQu(+BNU1OiJ1rkN`EBhNCO__W(|JJhyL<b_V+q9o6J4^eS+e^8CE0<R
z>|roY_TIlV5J`5RCc7MrlYN~Tl6?z|lVxVf{s<i<%SgK~?}P^8v)E`^i~*D^hG-Ty
zgK-w|la$2}&7wINXVID&vS<&+SunFKu7i%UV5C_TL-d%-Vvl994^Xlgj#_|m7KNuY
zix++44M#1&IE&YrSuMc)FAhn>-u)Ia%i>3{bg$FBn-Q`&t$|E-StMTI{_i6{4Cs%8
zyEKbGz&MM^v5TA{7LzRQ(kxbjaTaTsA&a$OoCPz>;uq*B3r3nnL0kjzyK-&I;&wpE
zVv=St5sb4)5A<Z&l_zNy9l$t?F3gZc4>0a1W|qYb&{0P*LM<TjS+ub%_5w;4lQoNj
zV4TH-1j=HvX7LgjXYmR%WU&H_vtVXfd<z|A!AP>eoz&W7e_N(^QvF;mzYo$oDZS77
zT;5&abBulKUMmdNcWb$wE-|)8-ql;Ww(f4FIo_3Cb1#LR-F;Sqy8}!pNYtc#6YX+J
z>0X0Ao1mKec;I%@q;XJYI5^(@Klw8EQsQseYQ};e+^Ine_gm@yuRlc{Bxk!fO8;8*
z(Q<C$LJZf&MUn2_Pw`_;qiBM<GDA*NnIZK~_x4=8?K?TBmzZ|H`)@b@e*MJ6LU+6X
z?~A*~)2BK#&b^&@Jf8ljJ1&yrASPq|&QqPEV-@?c8t7OFNu|al>R7S!O2^8T;~<tX
z%MD!M{t+}vo9E|nhe>VYnRDFn^E5=V{{!2p^BsK$)Oi|9YUdg?sN~X5SevZ#x$|Sy
z%nyU^-|qK8iyy^)u9}*|r#MIaXs$WLnz{E|XbxZDeexsMy={IJ$Z-%$e$CA}-JjPo
z7Ci4-J>gJ_29xdHIO<B9*3DI^*eiO+#r2$AH_U0B<ut@JOE>>ETpuUL?tJyf{VV7l
zh(G;Pbr#=d;*U&I;uRNQ8uuy5rnwwyp2ojz)5x)#MoeA<u{YWOY5Cyh40rIUfBFCW
z@nW6JJt3B+A^z$HF4xC(>Dg|?+JLe=`X^TU8%pl3vJsFuL*4JC6&WKZD^uH@Iu<h7
zo-T<ep5xf9V19c_aeoZvT*qDpTjPhjBq=r5vAr}!YHmEWwPQasYaTDt)v;f|{3=gv
z5l`&!H<so;tn_rOUb?318!EMx$QY%$Ti?_^PB)cwL4H!AXS2ZEN3&QkD8qM5k*i5C
zSQ@67D)}OmRLPioaA1|pm1B2$iXWpwhKA&Kn_#$%Rp14u6J>o*rxVxeiHU_y+6brR
zxZ}j*?j@M+x2Of~B_WonLsoFZw|7M?<BNJOpbsjzH7@01=E{{;#pl-2<#bzI^2_tb
zog}6rcW9m3pN7KLQS9GFr^Z_OV>h=BnCt!-jMo7R{4oafDy*=D|Hptbi0*5#);3^b
z4HsdfGMb)GJ$P8q(?A&>-4hGm^fuJWQr$k+==o;DMSx<%(`o}Vvmw{b89bv=7y4t3
zy4Z(;sBfU5YzJ-QBh{#L<v6sV!2L6rrvpy+#~9G7FyQ+CW5BjFpd9-F6YIO{-#@Z7
zJ=r^#3wjJlJ`F5z4`eDh8**|~SI?bztFs}oiCfnO{lzwTBbLw&q9JbhX-<^kFFgfr
z!eC8ep*zgS``$L>I-NpWi_qk>L;ke4<+K;iJgt_yB$!2ca_lZx{>t(fv{J)&<FZ#4
zIzjNB?v=$}45yOIYEFK=oAvEW$IX>vmx}C_10j~}Y^@Gl;1t38I`DL_#=t|*9C$4a
ze8>hC6Bj}(zxeheK_3Ch?+sk9R+rOQMfGVcp$Pwe(8a#US)J<Z@d}H6ND;Of%so|e
zHy}YQA-BAaG{ph```Sg-1yF|Hr)LfalQZ|ZGuHz6IsI=Zlry*RnTPbu1AOKocV=D%
zozve2_}|?iXI=v+bN;4hP6CrTe{*LZ0(i`wS-mgLOgM|oIH70O0h2RNxHG2%_%l1Y
zGc&zI^ZCpmo+9vlJ~fED>)-~z@{20!EG2G$%iRE3egj+rC~eRsu1lYxTl(2G-IbwM
z2iF^2<2inJ&hy9aS*q-Th3+5LC`T|_nwtV0xXize#a|-FeyzGY!Acs<ckUnmv}hgz
zlxWUJGz=}8oQ^Jno*GRze{9i=bN{etiZL2RQ{>-9G{ti4qABY)js}%A9@!;aL&xMX
zNwCX#lu&^82D*RDuvljR`q6`ah?Sv>HOpV>zlX4~7v?*mzt-;q#I-)lU+e2u(GXr&
zp<e7S2lV$7cCjDM3>W*Wz`S*Kv1eu%`^Ta4VdG-Y$Y1O;FUxSZW>qy9S9<sRz~k}E
z%N@)Cl==*JaK3xD62oEfKwu3(xedC~Jz@8K6YJzEH9DRweUt0{VU;=;YN}L$e;bvW
zF2`<wOur>Q0<qubg5j9&aSq}}7nxViW#vurc(xB>tq)?n_U<8y;qx<NsO3H|Sqyn{
z?2|}XTTQLG#C6GP*AC*>5kRqKGW<HlaEgQ8*|f9ubWuJCWd%U#rm69GS--j)0?Ll^
zh_WHfrYkE1^UV}IrfeCQ|5`vW+eP}6J4lS}cf;CafBzMx55`0v`!DmK5%XPuK@b0O
znR`hntgZp?awA;l4&~G0Gk}?X{Q7}BZGRrjXO{4+^RAUUxUhzvE$LpGAVOvd&%5Aa
z9UA)RI=Px!ba%Sb-X0r`uK3?Bx;)oY!DsGWU7;7de<rN0sgu0rhI(lLLfil-A-0Um
z%sav4xpbQ(A7Gza6kxZ+_6`jOD(jAe?rsCU&}|Dr3;*&zKk+aAWuJdJ?qAaSxtH?(
zrJ8?f;ye#7aN>kQjB}Aoah(e**{_!pZ}HlAito^zF3zO~W2r0LaZ+tJmiwRl70VBk
zm%77l?(N{tAZ3xq{`lrtn*StFA5yvNz9XLW=HFTGSn@}X75Q`Sj;Flmj*I=V8#`%}
z`^Qr}z{aFvhn51#ozo`%NZ-gO^@?}Bqd$7v9}RFv3B@?&Mwi}qF1>W$XEFbkuYvKT
z8SePM5csZ6KAu?Q(9b|_5QpN4FF97Wj%My3;U>F#<cy~s%=6<6if6p&j*I;9;CSY%
z?)X!PCC(v^U+s49puojbW?5YwXSy5b54TjaI=Gkr0?Nv|%`LaC4(<i?U#kgjcY|#A
zgJk<h8S=#93*S`T>gjO5xLJSq>D}YbEcT(i70>>+JC1d8)jJfAXH9eeXk1^_KC#5b
zZk`7ntPki5mAKH26ALDI5AX9Yb85O5A9zscU*wBlMed|vZ>D?s#=ku5-{_q0US9G?
zUG8!(d)yldx##F?=ed!~T<jXjx5ZFE*%?2KYhOfQ?^yalH$$!qJoqS{@{>C*@W*{(
znMd656dquGJja_j9n3$e2{yQ7y{Yp6Bc`qZyv!?|sp%*E>3;ZPw4guk8_NpZ#comq
zO~6?m3wF5+M5cq!0@m^H2j^kR-(X_hAXc`lgE`&ZP={SK{vU}Abu9OUDHi<d_W6nq
z=6S^tH}HH%TQ^cg8?Tt*obij#U;}sT7NJ=92ZXY+o`QuD*gKZ(^IGPIpBLA}4*7rd
zg||KA4fWO^J8w6+844O}2y#JvKA!2<+;}j#F`gffW%z8S`;(5u%hhtn%OH?uKN(M|
zMog-FFjme-^8pXEIiB>M8$81L#&R0Fb5B7huAg=W*Z&(#tnWq(-tx`X-@O$K^)HqD
z;@IZd=k6t;;9SjQoon@auJ^JZac-^kQP*)_bM}q67zzn7G0;YgK<DCoC(JqDo#p;)
zu55<g+&^3$vYv3m^=+baNGGHP@v?Ir9Ou>9LA;#r;hmU6yh{#}d@o0s^L?f}DGMW2
zchkAQekx5_<Oix0q<EW3{Bd8c(WOmwR=HMH4x~m`fyqU<N}!v{G(fpP)I-)E14`D7
zkaYz1j+J}Pg|r(&3B6sAw8(`KVHd?R{4Q0irwgq^kiNj3ljn~+YRHBDxSx(S=R6JB
zU2bAQQynY9FxE1EPBR^=#2@GCSUJ69EZG$=Yo=rQuS^H-8xnj6{ufNXyw=4f<y-J8
zKzA4m4*KmVSI_L{qV_wLY!U+iWs}HvCi#lr2H4b}I?RpdSE9k%?$~ueIlu2Lh44l%
zyfTmxSM!LrmwyWArj>0+R@-IGH6Z7l`vu5>j5ro-lxHJ##GAn)8`=GUZqF|r@d%GN
zD#-Hdw?_+|MV9AHfpc<~UnM{Km)-tF9y|VjenUe-A;x&wT^-ulPW~dGgue%^6oGCh
z|HL&~H($QJLCSh}JkA{_)rh5k=8lUzKcFQp_Pk~+XPe`7TWZ8Ld};^Xz<w`o2<U5^
zc4aK*8z&qL=5HIFFd?zT!TSOIH7>*bJ)qr=J;<ziJna|99s%>~D8qfJZQtKm>SM9g
zqmHeF&byz{N@R>uANP}2b+ZK*_!s|;Xu%=xs~nwnM+s|3p!1x4RW5PQYyF4SU4U-i
z^7l_kRsY5k>cz^{bF5;4o-B8R^Mh1>t}%BQOQ<2I8uG)rQ)B6k{F}X<!*>N)=egtC
zF^+W67jA>{gBJqIUHLX0+?a0|3~nd`gU7O3x|8?eWKV;qeP_wH=lQSR$WcPhRT}!&
zPEgt9EsM1}pe)uUSgdsbu~@TeyD^)ANx)0p_Ucz*uFvQ{K|HIm`$qv3(%NshzhUn0
zi|+$;rE?dFpxn7`s29Lo(6QiyAnBaHv4n;((XE6|_SSW(W9D?S->!pBF6JBEgOA)+
zn(1Cw;eNJ2h6Bn1Ifexifo`w$3*>4D<#W()L6To6%-n)0=NHU3(B=3vP6tWvx&Jb=
z1#|*BSwOJ_T|n{H8X_;CW?)bH0b}mB@%sfd9FP~#Rm`w}t_9--#LO1ZqtMX;Vk8SF
zp;jA>?o!u0Bh0|QfG#Na0mkLwKSM+y47?OVxmcwqh=I)fj^p?10^dD339?U5!#UDk
zZl9K`S&<jG+d{FsY+H><E?Z?2#BOG0cSq>N?((p^8z9?#GnnjG<ze@;fNb}CX0Ur9
z7~9Rv?A{6;+097pE^Mc%J>pCmXLc_K6uZxc-4O`8--1x=uC8`7GrNnun|0OhV>l;b
zcg_W5_nqy@?s{rBGqZambYgcS*!?LW+a2qm1B%^^VE1`|Y<CN0u)7Ty+s(}Ez6v_B
zn~~a`(@`Vw-TkoH-2qVSc7Gi~x;p}4cMk}~?pA6yGqby!ceB0PJrU<b>@H-x<DJOv
z4r(_uv-@1=#O_Y8I~S1cz6wn2?gYCZ1!TJ)V+OmQ0AstEnceS0M|Lw(yT^6b)clvx
zo;SM}0LpHB5$ujY+^#J1F7->0+m#Y`?CtftZ_b5|Uz{K}<lWq{8C186&L#$30)rX?
zvO$-Fi9wgZpvi!2&_m2%(4$~%5HmAq6?9|}BQ<DxR}I-e(phB&Ee8~Xu7E)i2!qCX
zBZjF#McxP)RP1?y8Wigz2DvXobSs_eE1T+OK$&VZriwsJmFtEH#^_W9en?C;-SZoD
zs<k#%PIsNEin|EM^IXCF$GpCPvdZqjR1t`&4*4PP(y3~>Z^*N$^4xJys8bbs?w^RH
z_IEF}{7X0gGQ_`(@-KI~mxNsZT+;pT8+SX?)ZO=F`)5we{5Tf|iSp3tA5i!zB_A(e
z<lZlF@4JUh<zI5gIqqScf5?=c{{I;J5;!fV_W%3b_x)_MG*5{rGPEZeQFbPUEHRWs
z)I?NBk0eAq*$Ww4wqind^$<$7v2W4Xw-;mIy@Ou+>-~Sf*SXHjdB*%dpI@I(_gwe)
zT-&+Mb(Z_w%hMUlXPkt4hb0xDbEHQYxh`!a-3wRuH_+C8a$PD_?mx_$m+1D^{edEI
z&p<8>cG|--%D}RdF?Qbtn!-N1??M{asGVVYt!nKFW8q*^(j8bG%ZHfKjX*b%?y5T~
zl_PPoz1?`IA&rfP9$?Co-oc2%Z-!gzqOcUttG@)zz0@UHso_=(qM6;(@^SL-=bCkE
z1*MMjwONZmZPo@_yx2BtGicYWmHPu<V4JlDv^488Fc>we>Wg4+AJ{AV81d=6mvuty
z)ki^8tH5Hk@hQ%$3w^FT)T<&;y~@-2oz|-Yt!rL45x?7dl|4YblD4h}9o`e!w$9-M
zSD;c*Zwn$xf<{{)O%v2WVp$|jQ$-q0(}`_%`v_Z;>8M_~JSt<`IZQ4Cz$opr+1{41
z7a8x3I=qCxdnzVjxHqy5IoW4iVHq#>8COI$V{7ThqgsnPoS+tB>cD-hDM^LLt%j2o
z6H|}F6z$jfU_&H}@#_?;pr21flG%3@WA;jTm4c`+8RH<loV_*J*BZYS@O-xStJp>v
zq~Jc}{VKxq8SLGUU_uF3llq<qK9Lk|fh+K^iihoZ7{x<(UjI!ZHIs+adC21s6;y3D
zi;EK2s`v}G8W@)UMkbr&1;BpR8@Uqt2`OaSfE`8z9ZG?vl~HE+pCfcUjxx>ERcZ&B
z9X$@+*&gX>cY{qe?h$dYsmMZW4kuw*Y?4&g$ddoXGCWlt)H*!X2#r|CaP#g+>#;P{
z@Yt?nia?k25DWK+Wn!%vWs|-uY*vm2zJHurA9SAdUUtH5BE5HPcfbyo&LDdvx9o2X
zp31g4FqYN)gNjKs+^)+S1rcaj)ywX}xC5+o4}zoNE_WA>RFHdRZg*jxVz*mHdvWxT
zf@t}c-GxVeOsDH76!WKRA9K_7Ps-;_*FMDQx@1$trD+UeacG=m8x?p*VXbYVX3#B7
zbf`2DW|W)ZT@B8{*X)SV02+NQxeKJL4g>zK>AV%_>=B@^>C9z7p5)nb=caL#*Pwdm
zrU=8Gn|_dPrpW1bkgpsMdK&a3skeq_*F~bZdP0lh>e2sHT!g9Os#>d<k#8n_cxoGH
zt(*w*IJvl6t(r=~5speQb6!<QdiPPO?db*u#><_lJjtUa^$H{>3))2TU{Rhu66Dm>
z6Hr)9a$1ml7IcoZ-kqu^odYPbYvkD?Tg6ErZR|cj_@7lQ!t}0J;pkSq&7^ZSrd7v)
zMohT{P)mB0vscHn8^m5SNo#MnG5@qzgsHun$q><c)RE2w8Mfh`D$hKHT?uXx)?&Yr
zQuSiLa!QN+bK9|BgsJ@;Mbv(QwAQzoH0sNyuAB<`U{~K7o-Gl31(LSDZ?t2t2vd96
zV<DpUs!6N89BGYjJ?X=oy+)qhEcUX;fwcDeAM(%m7GW;F6j6J5(i-2bRo;(p5!PbA
zK&ho-KRCX{{;BQQFT$LCia7hEwY~+?THhwpE%vh0aCV#6t0igc`(iuxiZHcTporRQ
zBCYnaCxBLa)udbO<#=|7*lQqZ?QJ~vpYbiiTzgK3h-**Mu02U>{?wB`yrn&Pc9+<z
z%z?D_PH4wo5vKNX6j6Hx(%POUtGsJZ=4}ynw6ot#se8nJ^@%O^8``m7gsJ^Him3fY
z(k{L;K)d*op6u-9c(%;Om!z%l`iK5AzD1ae?@16*do`rBzCBc)^BwFaIII?7<DLCF
zN)^O@Gf8WId^`4wFtuMZv(<hbX{~P~Y1Ee^d~h=8qny1ep4}(*>PT99_qAiM2vd8_
z6w&%t&H}CVeMse9`_gU^)?&YgQumAf0!eFsT|4%RFc)v$(0B1Bt@gviY<szQa~>$d
zTE^o#N<Ap{n@C#wd#SuE#&qs3!qk4%sjc>FNo&08NF&~yZyQJ-=i=SWvsGfR`ZSQ%
z-j(gxE5g)X9YwUh4WzZc*SD!J?G|BETkKQnA+cXIyT$&8?bt8E)P9a4YQLVe+W%bT
z{q`=xTH3pjQme&&_VgC}ea8K>e-&YBzlI`eKTlfiZ>93Cy_s)C80`n07h*MDpx7(&
zfXYcV)@p<@w~TK_F_-b3dqPF!8Q^_OR&#qCXnD)P8(v^n=SA3_c0E&lCPWrU{c6tw
zdA6+a@_CRtzShkJtxK`9LE5F*w1cdEB&u7A%~eR3V$9(@1?-}$K=NQ&pf!;kZx?sx
zfSfApfEtoV%Tg>)a+-ah=v}RdpV~#3hPpr*4RsS~F4^oOMN|D~zbB6rovu(h-G85#
zgNNJt3;?haR}ONgDW#xXV@{3jou*BE@VFTJJ_-zm=sOhUeYYJHlq-|zv5|XQ;}n!_
zjuT@zc`>gMZ{sy$vzA=~EzQu<>G0lg{|M{Vadh+QxWhzXqZJb+??*_6D6bG@W8P_`
z>{Y((^;Y(?f5^&*8lvF-p%Aq%;PX=pi4UQmi-Nob84Zqkr*db7a1j_pJK*)z?0D<o
zGGyv5yw}Igw`v8Y$)4B!kRnho4YTJW1TWbAkZRCwKO{%|CA%L|Ph4*iU#p5`&vgwH
z^CIpB1=*DDBJN4WxJMSQ(LL8@1=;D`E#h@PrWbL)DaOIU-6HlecM;c1Z{>OyaXut_
zu0bV26$dAP$2~nb24VX1isGjS8_q|C??8d~UeIGaeTxS^jTJSXZX?|fs(uG*UyM5s
za<>|JSg?-t>n1r6Za08V;I$v#8Ev+>N$rZ0N*emmL~>HglW%b{3?+3u=@KUO0|zg4
zfZ+o=!f<e&HT)`=K89g=B5-2`CA~+68Sc{Ep_mM6M~5cOTv(wX)bYs%`x%!gd%o3q
z8_*Iw(@~N|{y0!r2j~+Dt>0Z}KsvSg<U-V-db&cq6ZDks2;$!g_NU2VrUZ}01-~f9
zt*+GgpmaE}0T);&77JgZjMu}LD8@SAnPz;`1J70a*w&_W6tJrlV~*gN=92x}VDS|}
z`B-32`h438n-9gf&<_G!=O9P6MgMX+vN`H9>Gt9rX=Vd0DMgDk%(sry_$lC&RB&Jt
zVA#bbQq<^iUQEu>D0QH4;%&<R;|pj}lT!wFmq1zKOep(IL3w<ww~5aI_N!tHc04{v
zT!_D0USxf)2OaiKBso+ap%{amz^5%P$KQ!QwzWwt0(P`wtZxEui(TCYOX6*@CBQCM
zJ|<;iTVeB|qy)PHLq;6j<OTb2)a8;pi-TR5hhTTcsw-+%rL5p*_%Ffs!Eht&vVt>_
zyVzE7eZ=Q_w37t8%Ow^(1ft=Pz&;{e1@@3n2Pg6iY%#maVL+3N>A4D`_hI(5F}+H$
zBZ*B(F}Eu(fR9sSMz_<YRt?9%@K`*VMSrRlJO$v10Mc~7hC<VfD9DrE2omzliafk5
zk8+4-4Jw_*0Jb60MkET97?s%&HT+S_Wr1;U7_Zj`i&g|Bn*-}yXI+zp!q|9|Hc+%O
zkeu0A+19ZJhe{Z-msv>(!#Ep;%@jMHY==wTFHuMwIU)!<!P!Kkf_${@e8#m2cHdQy
zdBofOiDm_+0$AiC)WkYoY$KGt&_*axd%5+c7MxEz&T5t}w46D|*`S=KgY!ejS<li%
zma|bg!{I1R)LdZ|Uk~=*0hIFD+<@74kF-}6Nvy=!;5pJ8*&Laxa<Viz(<GS%4=X02
z#<YnFD$)Tb0}z)ayJv8vHThqV;Q=UNPQ}59($?{37x1nr6nBZr`LKR13c`nQi1G`q
zmS>@04FKsEP4sXdd7OQ_t8IA?S~(b<ZVtL-99-d9`4nQ;t|+#WyA&FBz;;knbB$H7
zBNUX6Q3Gcw2z^1=SG&`@&>40^50gF>H7gGnfExl$z)bLqf)cOoO?)V@#I@E!XDZtv
zF&=+5QfNDYOnemn)+i=n-7yIFfOp|ox>&0v3}6?!_&kWC72F+kxPRhh{8^@ubWcnb
zt?!;kjau|M*v4VkB*~euhM_*hq)I{mqzp3n@uh7sGUz(%x)?d!TQ)=uS~3U<w}DUT
ziBR&if|B`@Oni77EHM<m%0u5(sAV{XHN22D46e6!210<3cqi&Xq!biw#~QKF!N-oX
z7JLjA;-!$?Lisr6hts4kdnzP#nJ$mr->MK+p}3h6FNfe7g(RO&mN4ZRrdgqcX`L3!
z*v~LwhnuyKJ2vw4JRh77H)iz$9{8yleTQn1tz#bu?SzOWav^441!a;v)5IqOJ6W;y
z=+%YE*?<ak;Ube^OI@hN;=*kv#b@#_Q;gM3;L-aa$*1$1M9Mn;x-a#piAy*DR9=M9
z=`sB0<NJRphlG29Nn8OeT5Nq5td1GQ%C6$!0#<Ax5AtFJ--(Fol>Jw@nVpMKhE=SE
zk?^`$s{4U5(9dvDoID;Ktd8Z2<9IUY4Q{aZShvg{r6&SvKyl^|Q#uQDv(R{RpK_HK
zL+%DEH(BKnaVJ-UVkeguIt<E=fLwuc*UOdegX#`YybX{z5`X_xh#rJ%5<&T$tji@3
zxlt1EZM7s3Sz;5=$B=-fOx_xbEWyLlbvN27oehOFFNT*WXbqRraGnA%$k+CRsIuNl
zeFCY&U6&uC;O_M84(TIjDJ1URVd4fv_8P^+#k<6t0+p}8JxN*Jwk*2Cr!iF6PakBL
zSc9*^_bD{E3D&`zC@9VKZcH~fQ>c_aypz}*Tmux69DdKleSmGRm@Km2PYeJwN+GsU
z_`XSQ4XlZNeQIobE<l6feOu<iIm+whK@E#F5As4IPNJlMGrTJyN}4<+uX+YGnX(x6
zosw#yp~Svl5@m0;5m*J}Y(yZn4Jy<{L20P&CamBvx2b{>^zL|{k>i{XA_+u#oG;zO
z#2Km_<%3wdgwJvWY*e%ltY1Ov_kz`dUFl>WJdDB2`nPgQxcj27%lki3aEo=n5xjRo
zLFRc7DFr2C2AI@O5UNp37BU0y4l5ZC_Zhds6?dZo++p7mvs;FZ|1SllRUygN|3)#%
z5xmF1)?erIh47p0zur>oAeO{I$d__E)F?6&NF%aUJUK)`aT#wXhHrDUk+0!B&;``v
zI7E%g#EZqJP}vHK|Da$k4eX^AbD&d~VP22D)w&`+?`<+~LVALa?PD_U0h{b&`<l#0
zz^41yeuz{-ffX?OuJh#y1;v+x)fWdkUs8-;@7p|I#(<LntWr<{eF%eqZMP_=D4HW^
zc1`TSVXXBylX?^VYqc`ULSP(IgbVJd(UXtZrfhi!iu?rZg(IuG-fn#o`3aQ&kvUqW
zF!l)o0-1xs>p*$C<S67PlQj-zBfZjQ1gyltSvY=-k!Jx)dQ0ggQffcr^BPM3*3x>l
zFalgw&n?2a-B{{50mtP}SUt9MrC0hYO4k&XzUW!m7Avu@we*E7wS6F^Fa&xrN|R3(
zFA-}Oq2BxiP}}G4<qC$=TPTO`QD^z6kawCwx3Q;NhK_2v=82lMmhZy$G;rE$Jq$-?
z=LC08;yY~kHvnyRG4g#zAB$HcxAoZAq2lf-Zpb(;jty~79FmM=AJumrXj7eLOIKnt
zPvAqVBS8<<Vy-t5|Bbj&5{y&~2_GbWLlDwvr65PCFp)YO^s&m(i5!0f>ErOXp5;*@
zeGKSE(!a!+iJ;F>UapE0tHTVJ%oq6<jto<@>SHlmSbSOPbN-4xE4+7ms2Q4^4ek{_
zG%R^?8z^2Wf<9N=7d&BSww8h7a2Z>xgw)F|2YC~+QB~iL#_j_4L<Km%Rd6amGr^Sd
ziO}FOG~-B<=??l&EngpSqAZcdyHi08$`d^jWxUYJlin2FJ>3^21)g<Ir164Z&_p^F
zl{0%@y0dkFNbqtikR<7(uq4UeHc5O;lf*$@k~pX(NyPE&<!~J19v$Dt)9oCuMY(gl
zj&zIT4LnoFn@OwVrFXS;Tv}<Q!b~l*pR|$>X)6^$Y$ZI6$YvVM1E<08cpf&##e#iN
zRGywXVity}yc)n#Am|JB4i6%KE3ZKHj!(P@@<lD@v||o7nP)s~8(@j;6Pbp#SaVqN
zu!ms;2@>nVinYMrQ++)wW-8tV__2be1Wyl5$s27!d4Dz#OaE17=^r~JL~9YYF0@)5
z$fj%2S_E3Hk|EW1+xSa{9Aq=Z$23D6<YkD1S~G-kt6>O7nKa{-2aPzT8K(m2=CF+M
zX(Fw0IZo~O@Z&N=!J@bnfnHn|D6=1zBFx3bfkkmC0<Bg_z?W33B;aUCKp)ZsEP`wT
zu0sXwwOsZd8`>LDH4Yr9?O?f^f@y*?%XbE}i$Y>*7Ph4)GikZP51&GDwrZKx^8`eO
zAS5NTfeuqpa&d7~aW1e~iphAq7!Ns~)fRjYI&Y?fo58*v!=9nBwKzn<a%<-#Sf;-n
z-+_2%1v$$nOdNOo65!cBSP~}$yjwvocoSI#AMnABaiY@)5YJwT_77u|W<aWW@~>Eo
z^;B+FKt6gFWIh4QD22OFb1JsOBE<&8LC5dF@SZY^!!^*AIJ$gyE;I&rpa=1Di)
z`q}{}l~-Zs!cLFBswBedrbh?S37H;C*hmu#HaHU79&v{8K3y@yDoFH-CEP{Wdx7ws
z<@5End_~xMLB(w7I7j)I8;L%60+N%H4{FvGLCm_a9B$cNgf67|;SlBTwI08WSme=f
zGj~JpWeP5(iC3fagTQJKj>MWMz87?!^n!9`*i*_nl&qH+N!1PRvpSJNU_;WRABIqY
zLhnQ|rJ6|J+JSnrxMiHUEk(6&tgv#DMGHjL#|mMAWmN^?9vrq;KuI{6)SsZDn)|Kf
z&tSU%wdgnl`E`PV+>K3qfMJhKe3pV65&ST;Lzxe1lRKyse7YiX5F6Jv`6;k+jVhF3
z{G+fVvBwmXCjU50bUXv#a|)uOLE;k(<8uM7RZs@DPeW<opM2@h;UXK@2epBVpuXS<
zGi=eC2W%`bd<LA;q(!GGcqfhDWm~kGk=GW@k+v;*ld=vYtG4K`inX+;j4oXsw3cLa
zxjU-31d-oIF$O7dw~j7F;74#&R!ICzMAWSO+aXq0L4G}^M?Jln5}8g+kVc*y2Te?o
z(^guOUW%NrkTd|YO_D9R8bZ@;vel8c$<{#fw$ALT`D;KfFp|$rLgS{y5Hxr7wJ6BJ
zI6H5FCm1AS4w6kg!QhzQ32fQxaB^ytVtaRg$a=-_r1}LVEMLCbV%sqgXX3%^6Q-f!
zW00~@)x);vAhb{(fOP-K3QBs6i)5H6!rlvHTCAbnBce|I(IxVhHWdSLa-AyJj|OMz
zEbl<c#0z!pPy}K&la6tZigRl@F<(1|4{FD75PPI{3<uG5=@=O<Tb@vbV)~*`RxL#s
zESE5>ANY(5!pv-_`B*WmXoJLMp>zQs(=Jd1^#w1YTla{K?&%oO&VwJ4|4S5<5$yw;
z{{==*^S_C-&HvYx)gRG%KWcTh<bQ@E+MbG@NGF?YvSqJ_BAaa0B<;vHUpc+By;>p0
zQ3gp#z8qyf#%l;nzDIl(H&YjZNXCxr7L|`#|D@M_7U631NG4kLaSMM48@IY)aSH{x
zA%G7(*kN%81*P@B(bo4tZG8uMt?!`L*1ra-N2@}~#_vMuCJrNKYDgdrU9WtSCEweI
z_AzbfBB(Fq?P8=^rT+xNv98l;Sl`t%hC{G$;H2Nklh^Hn#58zOS{uWp%=!VY&J)|~
ztY3u_t?PQqj=;H0cMue7rgO}jsf<#G-R;a>gnblb$ajX%H_Y-CVISEUKJWX97ZoJ?
zeNcl^1W8c%8z%pFG$yqGL!<v_Ogy-JAS&QL8WZEum|$<DPaIUB_0l*aeJQeN{_Dk?
z8Fjd1vfl}k9gPiewH;MGVIzAS<PO3my=+5NEyf~$3%Rwno0c<_;ec44{`gBV?3VyT
zXKi<Lq~#@jd#9}GDQhT(p-C{5<VMg$1-Z62V@+ya8!TLrI@g2PNy6^wvpk6XI_!x<
zijP)&?2LdmN2RW51BEN`JWVkdZtOw9FYI%T&sYzi1Tq62N?p4?YeDWzn3YkQ4|p7}
z*k;7;3BvzTx?aI4V3<$oIUt@>kWVL=M-csv<6vzP4`<;JWuLd&sr7nBpIZyHjQ$sy
zdq4&^Ai)QRnfpNJNDm21h=(s&Ikrk-I3@>z-4rY(h~GUarPTXsLAD?8t0}~`Rz{h3
z_h$R?njxxDn77TBvIhM4%0vY*R|e*XSg>Oi<Ctjv9hY$*ssQiI523`Y+l$t_YAG-J
zZdl7l!ODY|EYq1_O2B-Fn_;bqS~kX~i*CS4_C{2AUQl{F=n2ZrS;cfWrS|}vs+c6t
zrU>r9%h?W!@_7IPS?YaS!7V9{7aG&l@4(II0Y11xFREeF^uq_0D3#-h^QPYOX2BZk
zyYJ04iaBq@?cIFFe%9^Ve8&FVWmn*N`0*g(Ucz|p@v=2BiXIQaMtS%UMhICmw=SWh
zdga27N5EA|$4^m*Wxi;Q$>80xppjKM%#?f&`a><3CLN1+GJgX2rGgv@&7s)s4azHD
zX?1~mMNxn;#3|Ze8HyAgqF{@n@B`FIzAP5ywQMqsyvLd@0@sZSa$+(Q@s)%n0Pj+8
zYl26jZLeUTspsJ~9#&wwi<*^b0GPgky6hK_bfDNj#w1EVZUZJx#p>l-fa_FTTJadf
zfwyUYSCBo+Oy;ila(2|pwKf(tfd7E0(s7U)sGxX$l70AVAH|qdW)|D+Bg*F#<|S^~
z5s<$g@@FcTChzGm#gCmuO^w#npFq2VwS>9a?NtkMgWW7gke-G7ECr<kD=`RwakXNd
z$k;&p6%7BTI7CfahT$q02+bY4A*CERngVZ{l1gB|D<gs&n3p6Os$a7X%mj25SUOR*
ziplVSl{rZnT4ln4G}@r}BM=-)qm*8&;ITCDu@Pmpp0fY`Ls<~+Ms4-4TQ?Z8G0;}}
zDYQ9|(}Vc~acp-oO?N=KKbeMu6P!u>$rLXR<32=pPN18iBfH7;WLT<gDxORm9)_W>
zP~dI*?*+GPKb6kfa)=7*_6xAbCog1{eNMpw-5#3Y&c&+=N%9Ui^5sY$OYSa@ToyrH
zp^n$bAE@kgRJW)6PSa-!aiEW`Ob){zAByJ$@kn6nl<icqT_utZngS@XnMnE&B>69u
zf5SG%tvKnA|5Azh3i_4sF|9-q<SMaHWiN#?t;Ax5yh`|xtHeFZhEdQ}!a)=0nXQBm
z*-A)zMKy2QitGW_r)m+=ACL$~DcBF786|OX5c4nUCvov1Q5k;Atl%R)a!56*8YVmU
zXp+_Fr=KgxTA8EbJpe^-Ss!G~J38J6P*S0-XzqFOph6+>U>otkhqO|%%d#O|7{G&^
z?m}vJ9`<gb_9yiM4{x_n^2mzFeopKM9)9KF&EwqZ-#nE=0>KSm%fr3wN7nr$hNGSn
z(RsKr)qqYN-I#eEWOfNid=fVK6i<|yQ%Ah2B*yeoi}Ls(Q~Ey?fAWdd#T1ISG3gA-
z>sY>Razl_`XfX%gcnb`Xly6}9YEw$}<)2!4j*Rgj{LI^WU`Q$mGKeu8#jk;LBhQiO
z2Jy{@;`x?sb7hmL*9)5ufi3+nXbWya9d9mu8{~;9!9fWR6_tJr>{P`{i4{!g7r-u4
zjO&JYg-H@SM=`ADc}cT1Jib?*?@nl05YyT!Wg84Ex<#xy$hZ1<J67LOjLSl2^~EBq
zLGoLe_))Q*G_f?nhz)77v1KKqJ7S52k1=Hb0pC#NJC}TS#rR-&3&epYQ6nvNjB-g!
z{o5qilCu=+1~z<HR5HS05|VB3@nrCwt9<3;+cqe}t0_ShS|I*WikE3IW{4nOhlK0^
znOe5c-C}x;mIDpq7p>_p6vKEF#4lOX4%1|eY5E)G<82j9N(V_wZ-Hr^Iafz_)|71m
zvVr6)$zhOK?_(Rn4JrMy?J3Q4Y?<_ws+&Qgr<5_3b2!l+QcqHQ$ljU{eh+D6x$Pk%
zwb=C#iR(-SC9c|29Hu=*Vtr<@ZVBeuin&;`a=|UIhmMpU0lEq&(LH6XUM+FVErdsF
zRV7>QliK?b+4?}$1ZDdRBGHQE<{l*RJX@eC%Eis%_*^@AjnlDUIwkKHn-0Pt<i>p;
z;t(l=QypD?P(t2F(hW{;s!FNrDhXXTtv+|8ImmF3a+o%OglW^_Fv++xKryMvBSHB9
zRBowaAcF`Gvg#1sk)!D*JoM&a5D&ZZuqO{~J^?6?AnebA!+Ds@!$~~kd6>sT9S^O~
z98~Mvu{*qfyB0M$8ROYF1=&L8I2jauC>kg|+=tM^gLpPXr>W>diXLBj7NA)QaR7=w
zF(nItW$`d(^p%kjvKn+^4;c+_Qq~f%Vl*t_$d^Mg$0LsN4|}TOfkB$(^-?aycWbc>
zoCBdFc`j_t@sviNm(Bxpxu+-kGMT&r*prH}|3+V7vkzG2ZQCoU&`dM&O>MB`ig>XH
zv4+X+$!k1_t(okJL(F-8L)ABy`c9BpznelFc6HX@RiQzI?5w}LLf)*uuR>58M0VEq
zAv^2K{&^!JWKJyM{(EpM;@)V=xzCtW1x(f`Z^~Z;_L5@k`_Wt^Vfq=MRgf~%J2X&;
z3XU)tDyz4&si3w-p5#rI5nrHM*o@K12Ff~ea$2e|Z`VLaC{6A_ZE~3aAM(6#Q_*ry
zXS@Qv_zf89`i^ZMki4{t*S5iuD`j!)vnBh2H(ZKd5LJF=rE#4VY>$yamR!RXlt}*~
zORgfwF1bc37Y%FheMo|D1AZIA@~aI|6=@C8S*oBVL=yhX6_u`n@FyG51>aDRHy@(E
zgYW_RUZ*wR332_h7VDjhF|OCbfH~L5ExraCTP;nK#TJsY+xJ~t=VH6nib{3Z?4qs)
za^^g%u8umax<*gk!R@L$Q&EnVl0`xID2JVAd3c-d*MDx^7!LP`BJ`3^!5y%%8@76w
zPt`opU>)&=<>qiH`6R)buBcQ6PD_5~DYmQWI25+uQX{h|#-H#Q0m6YY4gM*!PZ2c7
zE(!|J6aOWPg8!--nGbbQkXGAFWw$7Bm=6B3C@51tSyb71*g=dnSE{lSXrj4SWPOmu
zlg05GacoDGmL<i@+KWz8aC6qGn!6c@p#%M1G*20M-S2wQIf}^wXhqOzPlTx&2BV<}
zJ7=LBPllNe`=X$p^l8Y3sQF9V02|}vP}f|IEH2UJ3hu&*Hdo14mb*B|Hc`~&wlsMa
z#Y$C2t*2w_c6C&@(SgS<m{Q;K@Eh~0@jYw$6jbGxS`1&;V(6}eUUOYJ=b{ep+j0g!
z8e~VS9*S|02Np_l4N{PSinc+E+<@Z!6}%pxS+pMKI|tQ~?_eVn&LPSs_5WPvy9QQG
z=euUmI^X3zPB-6G-VJLy-(9cek}Epj)kwL_ck{JamLLP+flO{Ec|vY-yIC>jAtpDe
z;)jY!702LHPMq=b&~9!tY0h*7(8(2P&Tvg6?aY?`mklA)yme;lrRWr(re0>YEfkXN
zAA3D}h(bfiWoNd-6xxQK*}-*^Lekys%;rOOW-H;ul*J~uxhW|<zXr6MnevvlBS`~K
zG&S#2)h(%MXQpQq?n5Ws%;Z47Pko?_%oW$C)+%N%&5(09vve6eTbkxYd=<#(y)-Z0
zYf0Kl9`Jz;qgP2EYMnvqDD%EfvZS>tb5%i0Risht6m`8Uh&fN*sVIjqbE`~~KBUuR
z5!4r~VO)!N)O=_??FS@*&Y%7jBT9BT>UXEi_J?S>)C9lxBYsR%P?BpT$B?`tyhYRn
zYGi?RwStV8DcA*;kLd!-L9CUgp@Ui$SY_Npsis@^N}boLYUZiAU+U~bTIV82>Reun
zip%CkCs0xLYg_xRQHPV^w#@1|n2DaSZBkEK+oXpw7q`g}AH_sf!o^S{#h$dIceTgh
zHUTCmrkN&7-JrmsLAw6_T8zdGuo4bRn%6_7p*(sjQTi?DG0MRB#8;XUUY*ra{#v_~
zzT3CiGmuS&6MXC{xl*~<hZf4Lv``_Lm9EB>J*+*U7|0-cS|(Z_N*pbcJ|qd={<ds#
zEQTq6TQ)hklxu2#TQ-Sp*>H0-kQ}0#Z)_`-p_T*CN-^jE(F$G+rxG*7DF>A@Dt7iS
zf^_z;W1#Hpf4nO3XaDmR^JjmD`Lq9g<@08L2T8kap|gJj&Dq&MxEG|I{a2~9oBgE%
zO$xFCiIXI%K7^>IB@>_~F|&-@M77j-nQR69ro!w-@#S^{>VJyKD0hXC+{t`vqbYUK
z+^M4)&7DbFE>Y|m#JmhESB#^5e3j(r`U+`|N<QuEi`snJOQB6Dd38|2oxukBKg-B2
zUL|f&x;>8B$}8TwT|ky^TgL0dU<F7-RphF9qI(**`K}C#<zju6V%lPMN0Bmio%W;_
zQ?0Di*n=8a{zwor#$PA{2eiZ~(t#W#?p@WH63vuA+(^gY*|<uJ+RAtJp{woMv+{ma
z{#2<$4y0RO9;2+jzXg_Cf2V7)^H<ic4GL=Z$^x^AGIj&1@&S-`1L_*(<n1VIK*_$|
zGm3FXI&rd*t9~Ej9_K<NcMa4eW_Ms3<f$=fvfQWo$TuG26_yyj708gl?JDk<eC8Rz
zrfwPc(VEDxZBTwa6vyA&fJr=47=fc&MSj7FT(hyGpP=B`^mDNE69-9(+KyfXX-CgK
z2xGRR&s8OUM_;IzzxU@bzoTELeA2MCIUK}Uu_b+~Y0ic|N79D=1(lXz;$>8Ud#cR}
zvg(N;HugS-*h^2ThaUXGvSdpWb?R<l@PiGVpDo#yD7Ws#TI_ZMZI5BO_L!}dK{~JY
znB9tX%J$YU#W?bF$CY`Wqn=0XNK_9xvAv8$&4lepG*hMgk?3^Aq_K4*I?w0Rk*IPN
z)aXd`wU$dW_p}>AH7vIy&~;kuk3cW^Ds3lxO(Dtst1+jv+)(zYt!tTn|F%(u34G`g
zYaveMd7LHAl`0l!dzlQjHz=!AQFr^7DkfFb-TsFZlSRDl_AmGO@FjzPV}G`ZRxUK<
z>{)MXIXfo5X;88b3{?+-VV-5^-faC8<GoGxca(3Xn2i;><KF=D5Q+!m@dxRfJUK#o
z<|Jj-^pU2Xu9#drZi~0;sGx~51Fd}J>K6GOishwe*RmLL<<Z7?K1ms*3RUddl|Nda
zYjJWUW6MFd9x~Bx65B?K+w|E>S^f0cUok&@#`=7kJ^}91y7ak8%WX*PL^eS&am=RC
zQ40BKbgr-3rqM+TwWJXr=n3VH*^W$}{7<%mTcd)<qJqgGxOMZWf*d-cgVH+zs)C2n
zSUY*s*?P&RkCjpCyu?o4UnwSa#!qL-O)-kxE_3oPS}X(0Qkjz-BsCrwNV}H)Z2juR
zY`Qf_R>OxA6%)5`^HN5@U=)IqhWFqL@~FWUzt}tAx8T|*4vz6ke@5vDEhRtyRXvV1
z+q<wF9(p7yTlF7n7JKi({rE3;z#H*MlsU-$B;kI_zzqyDD%l@@&QORgVa8ata}{Hi
z%ptgF!I&2QMl)Sns4sXb*zquJsS81+3M#xYSnmUnz8G4bP;e*E_>C_bc91M@W|-t<
zz}{3gx$v722APEbgGW)xZX%QT#a2+#W=0fWkJ1_x$1`GUr&<~Mq9EK<WE^B=tep|c
zCM^#;Y1%o_m+dLC4pMtb+B>aCR_)&8%k~yo2l;mIERuy?#^!Zjwx7s4$d`S;J=yqI
z8<4)N?*Nf?kS|-NY%TSr-NC->K#_HjFS|<{*-SlT>)9q%B6EzETbV?>6vd6wCP{6-
z$G|&Cc$XCMMiS3Lk$#Qmy9!FZhKQ_#eBVAUlGS*Y{N}}EN0D`qFWX7kT4KU_?c~eu
zDzXmJ`XyPvVMVe@+m6TkvO`7ILB8FYMY8JiVqbQ5k#&$SyR<#oSA5yMMAkvR>|5>0
z{?C`)M`Ru3%cg#}A#I5j^QTI|zM#XABI_VucKh~ZC;GDci>!lu*{SWxUhd1*h^&Kr
z+3VVqeae?TNMs%4%f8f}>^fg|w8%Qhm;I|f*<OEmIeCc4I>?vZQrU{)JjRzjRAe3G
z%Z_g=dnelE0tK<35rpGJ)<IS_m0XUZOO?%zyJ_2RtJrTRh|J?!Ze?u0eWl2r_S;_-
z>`QwSMb<&Sum3BO)jrVoPcKH3MAkvRY?ZPV#pqC9_GpoHkS{y2tt?|S-<Lf`WF2H>
zZHz82l1&F`%0BGNP8C@P`La*7C;Oc*dxFS1$d_H$R+fEllfS%Jog%Uh^6T46*@|Md
zzb`vmWF6$oj%iQ!EMN9ak#&$SdqI1$1z&cq$U4ZEUDclK$G+^jBI_Vu_Ura!JN?&-
z)deE!AYZntvK7T@m@hkDWF6$o?%PII)-AQ{KMO?W6fL(hw*Q=8B%}T3KHuJDBI_XE
z-s&P*?LS}mvR8_%gM8T^+LP`2w-=*@BI_Vuc5`JbiqZbQ?DZn+AYXP&d$MQyvNwpV
zgM8Wf?a4mq%hrpmgM8V?+RAdR_l+-mv&cHg%Gx&isYq7asQdrCSluSF4)X2xR<@#8
z9pua2A+iqgWe;ml_IzLVE|GPRFT0>U+10-6GLdzVFZ*PBvOoB;_lm58eA!>xlifVn
z57EZg0q{S(Ut}HR%l22cqF9abWmk%<gM8WX?a9vfWmk)=gM8V`+sMk=wvPSwQIUB}
z%dJd8)_yM+$>`eeci-OQBI_XEo-sBYMfKcL!7Ul1r$p94zU<((vb@4OOu-qT%~2xj
zAj;ypPq53CO)$O4J5n)wEhz6HU8hhfMGt{Di+MrUsOYA+OnnwW$$3~NG$2T31RhZj
zd7b))cb&QhEH9d*e1oPsvh|$;!{Moy;}X>&Ihe>FrI)A<JeGE=@QYI@NHoS)_5gs}
zz;tpnh<aS`n*Go|L%|a6#{*yQk17+EnfrX#02Ss&GEXo<2MwSP#QUgFa*Z+%qPbBf
zCN?E$6>LpxT#B}CQi%5?%=l2)e)riXrq4i86*W&XnYo~ANnZ!&@v}ZS4T9V7pG;C(
zXG_DetX@Gm7NGP<RIh|}Y#_(6;&3xb)HDdEki9x%6|Mq1o)QR3DKk~UvjCbiL_^gR
zEgEV`QUmVkM>x!co+v1>dbR?7Gk_GHWC}J^uqXA*Go>^zO0mVnE){b#6q?m)uD;b=
zV~e>U*b(;P;0A2KxxI~hU~68=W<{aY`p|82c-3hV4)<V#C+tsZ5Dt-JcmswW0U-Mr
z4J!2%t*<Zn`m91Fg#K;hTRqt)QR_dAd?|TanKi`jW|F8>x=xWEIOz{m)I6x5ucpRR
zGrPQ4P52o)K$Kh0qI*NlOK>vtA&4s!1c}ZUg!`~MRh=#8;Q&7aP{N?(Y2fG}JPbg1
zEZG{A?KzbG2SB1fh}RXAE_uAk3<UPMVpoGUCj{YaN*6Y;M)-Nx#L?&h(@@$#BQvek
zhFvVfm6VzV|2MMd2f?ciC?HMs6a;QC8JfOD8Mskp>VxninhCmEDZXIR4^rafdJ2k@
zD@=l>D;2}e7j72gt1Q5(sC}i7d6EwWQGKP=R|B>koxXyCq7VHbYF1zYtnTZ$qRp%*
zZdU~61#6*^YF`oyl~03w86Q8S+xb40_a5*b4h1C_Axt@Pyp11fBS%$V%W)+*ra2SU
z3W_728mVlpk9}re2r5w1mq94sEm?;VHEL9@KVv*D33a8v!r+g-Xa(-r`~~nY1$kS;
zWDUY}KK{qSo>1d@&pc@OVguWI8n?8L?gh>E>%;uKZ`7plwNUdj*rm_>qM(fX8yVi#
z4u)@K<#~I-^u$m)0@)l6;^9QpGhEBVBXG=qJ!LQEdR}Gbp<gGCQRPb%ly1|<Bu;|D
zyA<n2<^4?Z1P|K)bTkk5Sn>D3fz}CZTl393;d7Yd<1Xg;RhCbB@V+L=huVEC8XClB
z1AAKe;2(y0le_?bKlicGsfz(Mv7N@pB<F2yHH-J-V+qX_MHq7JMU=+DeO^nfkJ|U^
zYFnamkX11kvtM{R3ZiO7`oO?@$7!5KD9CvvJP{wfB4<uPw#--L<c#A$);2sN2>&8y
zUOCY$!EfX|6NCfleVE5q++;LNoyYkV6vwxt)PcfzZV>KC&U%lt8?42zfzyG)c@cgW
z0-RAnIoZj!p!L5%IIs+Kcx4dY0M76=vNtIE<|rvS9)n76<gx=hll?lA+ydAe%E<bM
z*WpL2n-%B<U_&SAKuixoxWr1Pwy{wc$t8IA?o%4BRFvX<D1Hl!I8elIx8mFR;&|&d
zQR4ykBY&hN9GIfvPlKq|W7-kwg~@@!v<@GTnS#*fl#|VM2zpVxB@Pah(Ebm-4V+P4
zIs1TfZ|F>s-GS_#VTlpxBJ6*_hf=(t;^?2jIEbegLfnCDu5dkc?!)O*gXet}#+mdJ
zV0EB)kH-+h>!`KSW1UK^-+|SE!ipgyYVx>FBNwyRfx@*V6t1SiW{-6qS$9E<9Vo1W
zpfK2WKWGR7^rYYha^21|1sO`dG?>ULkOr_f20f{V14UOg%)UgkmC8B@toJ}+icQ;3
zL1Ep;NFx?upF*<MGqTD#>_Cw`7!K!@Ygg!hoLuZd7b;i=I;@2&@4}VvOCJ8FQ%!^S
zLrD<qhnPMOq0DZmMD`glK4waWgRUn1prv!9aeudzHNAT~D=C@$IBqVo1&ct`yo?>|
zDdm))J%K01!wyqne`g$`>g}zT>F|NyxlBI|)%z$YReRl(6D!d6n=tbNPHL5*52Wym
zgLvzKx(bT!4U{iGl!=^z39>dpetl5yE6A}v`~vb%Qod>j>k9{kwTMOfV6bkZAk!dB
zpl|<4)*6rXBeLef>Ok93Fo5KgD-GFZaxu{yD3!yP1f#si6--ku2MX7EFx&rF4yVe>
zzH~W?(!av21Iy@PSFj#I)_P@SUt9$T(wiqSo_g}(reM7otWiNx&ITRnz(}yJR8Z>O
z6K*ytz{*sTiyh2?;%0wvHF;cvU1c07TwBBJhcw%)tlvS;FtC<A4YRX%+z+Q1Z}C5@
z3DLuv6k)6ER|30a`&2>kZVEKYih;91RJ)Vaz^jCl=)ndsJ4?Y$srKx!lrv0&Vim;Z
z;R{FXFGb)*L7MryRvB6Ia6XK^k5Gq0;Y+v!EQTX_;1>>}U}tN)5u6iYyW||$X=3JH
z6Lz=+boN=HP5MHdY^&Tz3UskB#TMO8G3hJ!htj}B7#did8lj9C3a<>)S3~$<#n400
zf6*Nijf$KDg!YB)BcJVoP<;7RG2fRj6!U#4!kjO^Dx>eq#=F>1IA6jEh*DImC>LYL
zBS0CZW;KIyTUf#j&5=fC;$?0|)h_i^s1iamD8!XSJsIx~D{cqaNc!9${XOt2l(`&u
zh&SWIIna}6Q1m>Y=fllX&N(kCC?nV#_%RudU_~J2AQ{0vR!)h;+sMlBQr4zsS6j#T
zp*2s(s5-#KQrH}dg@Q5^SttUXh3AyB#e#m&&s)ClW{nL%MqUYH9e#kZDjM4iKVkee
zPUcoy4zzQCCrsh{&p_uZir@xeWd!~k;Cpx<56`d`4MVNim0-9XiqpFzvp-a@6Qz!c
z(|ZAHRt!<b^<kW0Z_Lqw<Kq%`%6ii1j42ij+1+Xt=IOEY+9C{$?umni7%|i4kUijb
z@EAN8)Z4CR?ehqCNGX_6x;a+w2Nab)BBl0g!dIj8crEQM{A<0^a?COFw6vh!(Lx$I
z9|vD}R?Y$EPJOHow99`@FQ9V#UW!~%Ma`;U35xpUpkN?~Mg#!Ywu%0r)5BUDBa|Nt
zh^mHLh_~#whK#(n6g&@^OGIS2mb3j-q*0*<0S$K|RTQ~iL_Yh6NO(XVZCbylT1T>s
zf?VnqD7alg**$vKUV7!I>qG1EH%_R<e)u&a5mk?{%1(i@qo6F^4MaT!nN5F_rD1Q&
zG9D$f$+9bmX0mh!3%~grRqtzAm{IcpBqoAo4+Yt8OeIpb-IpCBjt8x;GuEomObGpc
z7`ojeXqu&91vT}=S3S5D;e+vtlFL0vI!X7^OFW2UpL`9i3@>j4^{S8qm>FP7Zf%3b
zD@&KOh5CZmyLgq+tI2r_=4(*z^3kYGC9=s>;d?U&;G~ABH7_VX6m*^iJDIQp16W|e
zsZhBQ16VuK2JlcA;m0H@ZUX<c3L<;)F$u7@`wfgzzLW1iY#k3yvF856*78BAB>3Xs
zCCs&H6qCLiruW>+_JsyD_Dnf^TJJ<0y7RCF4?FNM0*A<PO~cycQ~+|>zrp_2dGYgX
zfwro#5HGGxLt<O}c}*b}n0q7XAP(z8>_m|)f{qR19Z>YUiuNMgj6lAKoqZ9(d@xG0
zKUR}IHpr09k$yNzQ?8!$UGgQ3$p=}7WPj);v2>3@s4zBkaP7hh2BR(CDg<jGfD&#)
zPab1I>0Ia_c6}hLg6vpZ)B$W~gTHhi@+&CmwL9MMBKWQkjxb393kotl%^t|?1Hk!=
z5AKE4f90Xp$kA|Wo{Rco1^H~4*<Rw%$olS6#xOj1m=(#gbiYtSlYI%r2f$P%S}zVR
zfTYWW+gQ~h#iL^L8q~DQ3<RZaoK-dy$_AoR5<Um+NDH{A3VV~aNm*wB-4?8ggF!g(
zWU_82(&lih@&H2pQR%2w5gO^`j5tt??1WGhl#BZNJN*t6{netsQTb||{&okSL2%iA
z>!oD`3b}L%t5J}wI>P!$_hth~Fj}J(+?k5^N@0|2fh5^aG>N}a<k*NsD-yqZPzj;#
z>2)5&`8(=~L!t}x%~KN`#>~lyPHmuQWxBjA#K9?p^+r&`HXd2B8OS`z8u9QJ)hHfL
zv>y7Hdgvg}LkG2Zn4*VW$J@w=hbI_u(Z|%qB8V;qQLRc2#|gtJ?j56`A3z^d_Z;N8
z=b#q%B!JiZ?%4qPka|}H(YvTY<<7tfBPh;2=R2pd^D%YKL7sCCYH?0tH*mr~W9MV)
zT@lpI+>@+TJ<r3@1<1}coBtdIWzL<QT8++ru|m8aHfNah10Kc!4WCJi9|xA1X#JAm
z@LKfe(d-<D@o*Fm({M;uDHFf^Q4av0P{y;L3LZ!cOQri2LFl(7?5hPP<gFz#B9N>u
z?GK48h8Y<FQXE=-J<@uy4|UyXL|GArk*16TOAW>DGNLU{dOOipaN6u_nQZ&Zr0LHg
z>(7?>Wm5J;f41Z=lj`%byC5ag#;!o`MSYLM$7&aVy*K{HL5-Ax1ikr5*pSiE%1O2o
z_)3lHnJk=k6?_>ux7y9>`35}*zp3Hr`5UGCYAH4IU*+p)j$3#UnxA4db04T`mX5^d
zD#+9`6TvwK&GpFfU~kltekj?4?4v<^uOKD?OuC6f@#iOnkOqKYZ0by^KqH4IkUEJ6
zu0Fy~^Ek+lV$C9U9}ka9Db$6}lWOmi7V>5E;-y~r4w;vf;&3(6H43KLs*OX?4uG$T
ztKm#P1@8wP<I8W0@b??VxEhS7nB)@tt$Z0z<sKKNZUtRKdiNk*06pgft8_2&o?zm;
zJd77^@hDNbP}x}jcz=^wjlZ`l#;C?M@dIt3@CbAVn$FW)&WP!PH7(2ck<+bdaq|?D
zcpSWo6yr)Mo?|jxxi44jXJTh&UT6a)hoztPpq)S`^>;0u<Oalg+u%H-+Ss_sKDH+B
zD%O!$KdFfiu_ozHQ1)Nt>rB3FP3D6(SZYP)9S=%_PIZr8_aHP~km`v;{3Q-mIU4~E
zM#=33+Fv1YZ)br<`Oq#IR(7mHy(l<DTF5~nsC7NUm(;qRs8|_AZC!ncb<O;OvN^tq
z-7-J6fl@2u@7h8#fo1*#Tu|Xk(DB?jz;N+^Lehiw4KfK}Zz#qQH9kiwTLc|p*}nBe
z@nkt8R|LW3kiUNseuxeeRZg`*Wj>AqTG9tWZ$jRg(@NPpvacW=uQD;dvmewcFIy_U
z78;I&K-8q@1wbbP$eaMyZxqBB7nnI2_8kXJp!Ch6yqe1Qx7{dxtaXI*N<2!s%(e=p
z2-z;<LkvEgm_sLW%F2=HBs%dkoS2~?M^`gf+TB5&0L6D1iR*>R28kfPTLM-D(Tsft
zp`Ihym&gVAR%Ix^<0seo@?(D7<%<fc>^wBs$T}p=QAn{I>S-=AW#d5?K;ySt$`1wI
zO!`A`if@0YXji(sz?4h@N7Y(LUfzM%M|B|6&G<CJDLC2tI2$ecoW>9NWoG~!t{})D
zeH+xqXMkMoq2?;xk^GcxUwfie@(`+Z0fJWE14D8X%?u7ZlCEkL+DJj3Cqu#t7JRRg
z?25olMPGn_D99EB7>ZPZQoCAp&6c*@N6xUuSz9ou!B@eJkJXCxoISB6^*q^A*&6MM
z^(*@tO7^x~)t0svwmZqQuy=_4y)IbM+a1{+Oq%NBwf=B{Jwn3eKqj5JA=w>&bI@uQ
zg%y24PgfZZ<LNQ1IlwCwL=b}X*bsgJY@jb=u*JoIHB&`Bf2|@|t<3C2rk;0??jH&g
zZHoVU4rgA1@YfWn!3mCv{+c4iHN_bK<6sUZ9(Ub!_$j#7-$$>&XWH5|gEM>%1ySwE
z7P$&?{{kSZp}c}@N<2;!gl`DzEX&I4o$mn1b=SEHb|dTQ*gwPHIC!y3tNp*g`t82f
zenGj1AP2+g+?tq+L*(Epgkm88aiSh85_5peb8}9$mboM`JKDW6mSWi8$Jyeb(}u0~
zZi0g91FSvp%fG^#s6<Oojhaul`f@1z{$eN?2;vU~dx19H#I6s8J=oD(pDd_24X6C{
zt86m2K4=ito?(@9yK6j{k|#mo845~2-p-`3)7XM-02=!@G5ii5R6Z^M%?={!ph`kL
zO`4(so;`s`Y7WRBR3yvteyOwZr`H_o0&iCsyugJj2E!FY76xWFle(Y{77h(ElwG8J
zvPR!84#I6%(cO69?>Dp=;G!mF?}2uZN9iV_+EsARoN6-1Fa&2=U${mwXPeUFfaO?z
zL0JxDJ!^sytb8_3-d0{jH3%_+;eBFk>L|z&EaV6lg!|GP-bIPBbFKC#03Q!DwGIpe
z6qIy1F!MX0yh7A$M#H2G%++UGX{w$<>Fv<Q4iwhQ)J~0Z4g%*n;LMP7w1UF9IFXtN
z><GoAC*NRW7S$<uAUwGY(j}B`WDssPX}T96Kh4q(H`2V2MuL-P+BV~BGYrmOp0$?X
z{HkDUaDvCc8wcNdJz^0|UUYDq9`QC*r)DFB4d>Y)Fz-j>zjVH}3UZxh`eC;YoQY-y
zhXbqzV4v!vzgya0f3AEBG^ShmS@J~n`Bw2nunz}&d=7|x72KYRx6zTeoy1C1UtpzY
zpgJQdeGeq^3d#~_Z2CSxHMBWCvl3)INaj+>6F6C;tZW>HtMm~N&negvt`xO4QN^L&
z01@_pLHkACZ9pX&F0{Ukg)e3E@qZYIHx#Ub26I#yH(VXm6Hqj#l;g1v$q-UL5@^Ld
zt8g`Ck2D=d0L!vPCLsg#%J0#QTF11*;6>-did(P={9VBv2X6?c{vskcyNt7(w)9J{
zbSOsXZMF0nn8I_eQpyX5I4Gd>KrOAg$a;vSr~eeyCYWFBr>PyoVu<blRL0O_REjg9
z+1K6{@UeI(R{bc;zJrMGFCzD=h%{-H74b2M$mfASQa<Te=!W5zbfwL5OP&hlm$&7@
zGHO;C{{HBmU{A81)wErkd>Ukq<aTm%W7NgAzD$=G>5_f{#Qq90U1AJ9GUYz1ATM>y
z_KA1!=Tn6`5ZXDz*`z?#yO@&xpqoIW8l|lH)Jv?LZOFQ(M8H98dTi>;%;RG&0+*_&
zjLe9D*vY;NJCm%P9B43?eE>!4lv#%RNud}TyujKPL&sV}J_cFgo1lD*g_&;oe9ZCP
z=<Azh`FzarHTe2YvwS`Vz96c-)H-(%jE-~pbc%wq5INUM`WPjH$!MN9Sb=PJOP&)U
z-)%y1cgekn?)5PWqQYg?5*zR{WQGiQD;1PF<1tr$ggeow;7o9?8w1YKAi7^}#XEsE
zQv|B=p{YUZGmMEEE#dXTQJAgeo#3eoN-AT=U?B<@DJEGpBS;kVd`pzga)J;0f+t(S
z7k%uMz}`3b8ue`bwXE=0oF<x-@mGvB^PI-$3R@X*<#dzkgu+tAc47tR7}*JSn50v0
zlj#D!F3KkueoR<)2fHiSm%{y!kKE+^QbAV6T$5ydJ74J;zMfBhD8NaZUWi!-O8cwX
zAC$4k?0*z=X65%v5|vk3BZouhLR2~CuWVK+C^~Np5_vs86HE`dxLl*4#09^NO_x9S
zu>#iU3$1ppA{TKmrS?t=$_RKreykW8!>%|aa#}JT><s`)C^K6@YBEoY1_z0M_a~Qt
z>kj1-XeEM^XjJ4$$X(l6^XgLt#pxI9#%A_v>tuHtctt|Ll|quhxUx&sD{u*TK6DXS
zsh}u(%_{rbSN66j+xQx*U-I^Sbg>;-*D6IngT{4E;~ol%#t%(=3HsiA#c;h4n6E_R
z8im#)gdd);%TC@hNmO2Ijg3PHdZE42A3;SQ1@|TYhNhHz)Qub!H#TXe=>=LY`O?jl
zvCBWL*anp8ZY#G&F%I^4&|P$Y@9W+iwcDMw%U@^R=nL%zZb+xTN<pbzvnf9gBS{ux
zj`>Y$wfXf{Mr!p3YPBCV6%;)Yn#!D}#}pJz|HW=PH1%I(#brgGG^LDPHMOUWyqTBR
z@^U(lZzTrdSZRmFR_qDD8$o~OG7x(yDEd3-BW<TEh+N`LcTsw&Vsd4%K9uTu<FsxZ
zYVGecZwswwOtC|40jU}X7fUJBfW8K$*J>#d{;Oq{w6}p<ymuQ_-e7&-4l>{1zucxB
zprBk$Y;5GNuaDtQ31?pivCX6I^1_gV5V*kJ2O9xuU)3kW{-%jxZJ=mna@V#{UjSk1
zEL<(rsxU9`HUJRQ&BY3C1sVgOczuUrvOZ9+eN4S}5Vs7}YX>2+p4ZQ-KGwxpuYE|p
zE`sQF5N44dsb-a)0)#(xlU{<B&E9CEEh=$0rS*a~Rj);dpsY8lnZQm<@PdNUs}4-!
zmPiql913f#7t1Zb-;)Y%wsw}n_;yg1ksm%rO5m4FO7J*iPz5?UFOVNR-dCBWbubfk
zNBO0SNh~kH1`h8Tq}uM6Bu@sbzh4p!mfP3<eu><J*#+z0IOw%8GnyfagFWC?&9vgh
zL*2g-k=E+SgBigswk9`1>aRQ`nRs=q$x2<~fW}3-EO<{*W*Kvz4CF<GsG#6S5KPU*
zI5G>wn+m2uo0kRpMj@F~)(W)F=XzD3&P%O7xZ4q!w*=bMhwxg6+-+Gs5>4|UDw(R&
z;sPXwqW1BRAevc&Z%pDl&^7<U1U@22{{p%I+I*X!i`fqVQ_K5hIDA7@O}AR(@1yuh
zsLGJ<D+Q%07^rq;gm1Hq(xfG(gm!b#XYi3&v0u*<wV%a4sI#93t@fJ;!#;jO3%0{l
z9(|)h^?m_;9p=ITe6J&TUcvd`H62VT>onzd&w=%DT}bc_AH;l6#yUKx;95#=YNSdI
z(<+Jk)gO88=Xj#-H-UEU*EF@bUjRw>B_55w<q}x^zYL9qJ8T_J0ss1Qv6QGswr4+v
zg@G8(v6>31agtn7&IjmURc7V|zKeO82eI?Y=UJt0SKR4EW<S$mIXJIYOzJxlI}vxb
z0r82<rEQ_UP@Af{%W6aa3U0@&S3$RO^s$E#zXmA-vs(Rf5C=f<D^;URTwY``H<i)8
z17+;#Ys3K`<LA5Wi%acnEJQg~iCv&zV+1ez4#u^5I(CqeKsJ&drGX4SX$j<Y|8&?v
zEe=<F0A*E>!6sA5C!lLVb5El5JDhx^8kqdtlgNAt@I3{&?vVYhG6w#jyKVTSj$@3(
z-N!WUMG)g2K7?x&dl4YN-m;XVW1}j&77Pnur?e3s{HS0LfaVC?LwFBh9bKPbGQ4@Q
z$vxHuajX=dF8T=I77B`Awb-d{YP4M6LJh+$wQjJr4yD!|UbP&MZmo4j!IoOvWnR}j
z`f^TfFslcmg(`-ksha5-KCjO)s`69N+R5rbyG|B-2HcKOwIJEaI#9`&W!3{3eNVt<
zBXiLQwW~PDOGyW{w6GmSR3WQ`D`ZnK5A|rE^%=I=n?a-5BMHDG9(PN#uT-!l0A*~M
zjh0(aPo?6Mj5Jga#d;BQ4ORI$jA=t1s0<Qd*HGgXq~it+CEfWl1!)vdn%eH{gPK-F
zkffEoNGEsFWqW0-JkqMZV4b}U+T!(*R(+5a4n$@L=1S!F0DyNXC&C_>t4;Ph{H<XP
zuD9dw^I9(PT#N_TwuaEF3d&4>BkpGn0hp&$J%Y;z1X4BkS`VIrzGd(rvkENZ6_l$b
zd_a%7CMzbo?$c3!j)Eu;48E!-O?!=k;{Pg>5S0aA<-;aJl`kunrZi?7+vzwZMpgG&
zx40=@jRY>;2TY?B6l;wp(eVI)1y<)(V}exf0V~6%S%#9#cOV=nafz|mk+%kh=FyR4
z?v<Yk+mENR!W!FBz*iolysG>VZcKq29k@byTfr?s8{Djt#jyi71BfL{2RaZxXdRL;
z9ug+Rp&}3tWmbUmK;`6qqd6=rp_u>^F<vpZE1@c!KyAsBV6J7sgwT{dij&7wlEb^%
zDNOQleups-;ew#_IbeyE))#h6Ga@t@f?XeKjhNXNz7XuGoRZ-a!_qat`uWlmVVZsn
zRd5PbP71{xhxzU}%y;KPUm@HHMEVXzH>Ad6Y-Jp#(wSFbxk>q?QKwiJ!&P2grfLOK
z3QDb~hT`^Siiz9DTeltNyX`RF?J2%Ky4~R|xbumkl9y*7A=rz3FdS+V;s<3Ezt0Fm
zNr)m42_c)DTWiZyFcN2mNdc>i!E_MSDsn7_g&FK7-+|wOl1%5|H%ggJ^-P)zHJh3=
zn^;i!B?{`bpav%{&GIZ*qG|TRY8zcYKo0W*lzrIp6{Xo!1<BZwW?T5+&9)7zeCeA*
zBhek9phS16M%M=sT?tQK86{@7h6w>LEe6y0dQ~31Las#QBBW6na8Eh7kRe=Nh6mhy
zn=A2)uJ;2>J!0#&0ju#4ykq#bR<IwzhjlP;;3fbO&Qj)VP8p@`AA>YUh^u|@i7+V%
zQ3Ns}9)j`3k6PnWxtGF31$$rvo9Jnq!uP5wzl9v;x6pr;Pa4Aw1gEHW*FqBb9Tk+o
zKd*uJK^OQ1#lB%lQ}vkjwiM&$RU9{2*$E1Ykw)~;?+~6Mkd^HZaFTM$IM5h|66rG(
zlt{m-k#?XT=?32p-YOE~U;E(O8tEd?b-h&$*7q{D=$B#Kkppz_6(apE`Vzxl&6AI`
zXEZ!++5NaX%#V9U)fUD5av#J1EOFmkL5cgP8h0Obalb<uB`3bbXw-Ux-rjmUC6D}*
z2~Q@s_fe(r0%B{?F5#}&(+<A?7F9oC>#z|_-o)mrgUY;ua(C`~G}lFKK;Dd#mMJKw
zOo%^d%Q$c|1P^^6a}}hUl@T@r(;RkWzsNI~>$H`|K4~41ApNSXR0N`xq(KT4`#rQv
znkJt79hP1P8~3RS>1ThS>)r@(v!|?HwxroGGNrcw9OQ#|idghkehpP|WU_aFBX3VC
zmVs^{osA?vS3Paj$yVPcu(1LRRnJ&ZhQ2KTZVO{G6~yicp1%(hb_TEf2D&!DOJkhD
zuTvI3fgI*1(AUaWlt70)>jex6RKoeFmg>7kCc$~A4kt_)PVKuZw`5k=D9q5s^`5gT
zPz7A1M;#sovAJSW!A+w$v#Jdk_QG%(HNI*^UxjGvl?x3vn^+d^zbFS=qh;a#qk>Yq
z+YCzh*leoyd8_RZ@LmARnPIrvI9<VVdWY3u)S$qR0G1(FWp-<3jR&K~96c((l}SuS
z$QsaQyGQ~(;03E+He+?E<S_A1mr9eBPnK<Ngq*9Oq!pJ+4BcV{Wlg+2om1d56h7;m
z%hQKl)wzP_ToVg+v(BYow9ffi;4nW6c2T~f33jZ4k{Zsr=?Y4_R%5^XJvbB<6y>JE
z+W^YAvv8k+GP4bf%6M^Cgz1KZ!)RPL930egbr*zR(N3eP<PiQjfC}zCWWPg&_ljf-
z!ZAoL>K25LCHq1$%u-jg>KX^+Cjgnlp{wL28!!pPXjE@uo|iL<lJ#+LRWOmNX?9K|
z`7}itTs{Ti4Umfp3QvUGZs$Ob!a2(A7m1E~Psb~5b=VJ1$=+~OyT<zU7&Il$hM&By
zYozqlD3t5FW0V6ymg_rNv|Xj3gu`9GEm6>2zsZ$g&KK7!!R+_Yf4v<Ht4Y`68vz*@
zUarg-@96+K#)>?uTlKPa@<qVw(@9?X<tR84qXI7h_fih&1bPWrqgc@;;4un{<L(mh
zJO#z9y9BKFRq7?+ql#g0l1o5wdblq=6HZ6LE7spWU>gK~B?C5AP%>aPdSAT)RJIqn
zz60Sv@$YhQ6+EtKj?00<bv3f-(LA#69UP(t<sSfLH-NwFO>~o|6_ijciZUE&9TtMo
zOpk)hdtiJ`8D%N680MQjb&orB>wI-ftU8AksSDTIK#01V;OAHLle0U1Dcu$gH$m_*
z74s2B2g;atYb1NxMc7V(?9^7Na2CSmwn%m}9VT^5+l}T1=7`&9Zsv(@BGvxTx{1^X
zS~roZu<hTnQ@fuU>Baz+7(6Wp533uk+Y<RZqB5=&_wunjBWcGXjB7P+I_|HGWz={l
zGbN`8H>&<Z6vT!D#l|vgW43Q&xwTP*!A20(DHl^}V~h)#<uK(yQHO7cwExOyGQ=3`
z-#|$Y?!>KQ{{{+wUnh7SU>pp_5;l!9(g(s)ugi;Xpw#??&fDHizZb#m-C2<<8(HbS
z#eyp^5{C6WH1Tu=hPCi}Qu5;qyaMtzKIF8`O~mRqZTpPG%=Z@r<#ya;1?_}kZzB4b
z-i~t+d#K)ya}ehXxrr#Z2j{51U4WWBuo;O5dKHr8C*H&y=3zZS$9Q;iB>sM*e6lmS
zuSp*StQt?}VlO()l|)YHFf$Q!J?YVjlEXnaf{stcw;YbaN$?{IMuq9)Kvt2)Z_MRD
zzx<Z1ymWza>5~D~vUs%NgMY!Yw{5XZ+O;M_zw20jR9H%X8$iR)B<`^UTfbv@WsaVn
zIv3Cog~~u<4^+gPDSomQuSAnmd>+c5P+s1HFmqCb))YZW!QNI(CjB{?_!{tip_ruW
z9Fw@B4ThVY3)(_`0sMc``WmFGpVrafRDW>$X)X5!t0b*I@Y1>*l^(Ourgh!BHXPSt
zg7Mz!FZ|2Gk3-`VV3%8hdBnlIBx}3PKCtDii+w}C^|NHjoEE-@!iVrbH5ms7N|4_&
z>EnS7{;*Yx`32wUf~D|$9xmbG8XWLLYL@2~^vRb2WR^p4k%C`=Hs_d<J3UNh${JI0
zKd{?<0|%N6y?I12Rt+~t#5spa)2MSZKPqw#{<PjUmCHD}ppzah8yIy>mY??qzlNiw
z`-51n-~bxC6L(_v_P|Pj+~vV@>A{!E7?W|CNsI;dtzwd@%kVVl5dhcw*qRZ>`-}(=
z@-f`AJraBc#iYwTg?-uZ9yk>>D%=$;p8zOHBR!t|#OlPL6c}8?1zdRIo#*bYL*ZZG
zU9rgKOx35B{Xfi^W6xtFDRMLvya7NSL@O{|#^L@ufJC!$><*6U_@AVTCUVrmwNt62
z`ZKGx9wk3ICE1_Ba&(F!)g+IJMKVVkl0l+LrKtFM{FmZ-79R`6mr!xl=T`CUC@wi)
zy=x@P^b|#cbs%$AaTRGu2H{eOwmv%$Rex!fFkU^NB#l`vs3rHKnR1Z#X}P4(14h0h
z>0{AQqy%MNRmKxx*gPh(J-)KKeOVuCk=<PxMfP!#y|NuyA8V0ap^VdLw?Smh*Z;KZ
zW0dtrh01R&|5Jc>Lk!9~A{cdyL2Sy)K<}drtVmQasBQQ#TcGwkEBFlXgCN*pA26S%
zpiEu^Oa-I3PBDoDeyB&n=!4?Y7f=%i$8Fl0o<~CIUN>uces7I+feo+E@`(}eUW0UD
z>9|$Oe+TSek8xebx+4`F08^%0D1)BEDuJQj%Ht4kDqlptN|WM4Ho*_p0hx|^$}_(X
zN}v?C0_5?W9hFV$+BZ1@MQ1A{)5cctABzvv^j)u}Z=$A`;9e-F3iq$!H>A1@M>Q9?
zO16G4Qf?g|%7Wr&jH8-=TR#WGQ1=V9;!_mt4dvz(skehT<V3ygr(91|HdZL=6G*)q
znF(ha`I<!VAG#FCTv`R%+?e4^oC6tcS;7TFfgHS_%+<k_&DIr~;QmAzmk*aJC_&@?
z1j=erkMg=7E%!Y@>Vb>(CR$l;t!IA`>m9NvY*6Mjn9Z%$dxuGl5Zv<WpRC3qU>XE_
zF(qphlm>cGn%hB6Fgzaq;HQqP^wtm>z(CTe_-6~wr`_kp#cH_aT+Dec_EJ`C0kpX2
zFs4ET7lZHxHsKr|<OShnq%P;-Iv%d&;T|5I$02G^mHd^uUExw`DTucf+?sZB2t7YY
zn_g%AMEBrLUsJX|zyS)%WID@K-~(4dJ-t5_4E!q9FUo-Y49ry~%e+6}KUPzU*dk++
zJG24Ap+QAY2scs_?}Jh*`HLsDN~L@dQf2)h)khhnb39^7**}ITCd;u$ahG}<fI}6O
zi?c^fDhPjKT<TF|-*5aEPg`z1v~IwL@3Y`X9HN5icm^H&2*j^sPxkO%;dhA%GY8|O
znkN%8H6U{!qm4W63$h-hyd4o#9EQ_IGU02ootP0lez!HfnepkPV+FddONlD!#(`w*
zUdk&|vSzKrIIb`yBz!L^V;PlhuHo}RhL25}_`_P4elWn4PJnymirMkTgxQk-Zmb|K
zn*y_yiQ^O6f)nDYZJ=nQvPo?q*_wo-c~$oq)a}bivTN0{+MnUQC@xtSDu;Bc?g{B=
zH!B9EtgBRYtzuHupS7w!h^pc_W!Q*n{<M{y2=QY9#O%~F6{JnmBwakOP>PW4;tq0M
zJP12dV}o)Y3h&Pb(1E%8o-h8DO?V%JLm67GV|Bmpa4yIV{<5A?y?MaiwDvJvjEQGe
z%IA6JAm>@kq&iIbW>Vk3C3hw%<mZmVxB-E9iuYG2A7hLhl<w+d3}=$5^PMtE;C~8&
zGi6r#p|W-UU4)2qT3$RWHU5FFP%#hovVYq;bJ?;IfGiKYE4U-ASK{IAI*+TIV=&w4
z2A``x6xN{j;l@xBwq94a{(zx;Zn^G%*4$H&f0E`-2XU8z8PKNMB=h+Dgko}sc6jPM
zK(8t!F}W<j?@nShub5PSL?XTv*nbq04V@7tz7kj>;JgNV_;2<$e2F|5qL`S)Sdpkz
zfMakT|HbO53aZtpUeBAr@QSO!V+y7L;=)C0@T_8eiH#9m9~JA88k898ne_8<wgx>E
zL!_}xw>9XinAG5KTZ2i8m5}c!ex_4_sQ~`voV#8@ac;78?q<chQ*x>}w?ZKoUWVvR
zUo?l2v{``@Q23iOkP2-T#K1{Ls#T$w^pTlynhg{Dj;=P-R;N<Sr8*~@6s?W$^_&(&
zwR(Od%7$O0$#S}aQs*<QvGWv@ESO_sc$s3IAdDAGBny@*CPvS+MnCoSof}~L(N<<l
z$lW_vjdoB_YIuPa?5>zJ+`RTTp4w~_<>i6WAUGPj|3>sO7b4gWtfZsZ9X_WG7OqI1
z=|LPh!tSZlJcx-N_QWBHZQ0-vH6oRKD2e^tA}Cy$;&c5zSI86%cZEatM-dMsH4=xY
zS#{k9hBW$HY6ocjLqQ3~x2atLl_hM@%E4#8D;WW(ijDo9DIEs7mh^K_dK5MHp9IYX
zmAD#XSQluPkA2mVZv#`l7%~l{``cGOFH2hWyHowo@l7c1B|BSCMCdQmp+EjkQfvoe
zDGb2j$*e?8+DexK*$SqFcf5jokat&H;h?YGtk?u%yV(~u95$R720392GgjlF1gkBh
z!?sbzAe>9%waR)SU=DcM`7qSXx(zqTwVbTC@kLM+b2KSqaTP{qgDr~i5+LDX9+u+}
zWlOEu?NAXa@3;oc0~M^J-DxJuz6vl$4X5KGuftn7d)&#9WjcoQ#xkquRshr?GWAZT
z{Y|aZFw89erdBvOW)k|FTKw8{P=?ltgC93*oq_g3(3h<)o`G_Ip?(c0i0ap~&h&;e
zJF}OwDQ{6w)`_QqbuC!Kjkm?&T`VYVvVKmi&BfGj#5zJv>zXoZQe|`C?no5LHw9|{
zM2Oay<YBP(w=xU`jUQczVrIaGovmLoRqteyQ-KXoOs)oY!o%c~JW%?>R761Ds2{GZ
z8R~q%*thY05L37?&hQymE29rO#(HIxQSM%4^g%GD=fGSYtM<4FMg8}_)Y&TKgHHXN
z^=*B{VO%1}=L3AuF+S@vo~MjH=ol+D@HEybqYpa9u|DH%%IJe+JR3E;KpCqU)Z4+x
z_&ZSI|7+%AK({I%k}xoT#22)I<SIZKd6CuycUc0n*a-HykDYI^Q&gd>DQ@SsI0P1G
zXl06d&<OU1&)5J)rqX8$N-DjKY?iIbO_`-z@X+29<kcJ5;PD;{JA2IMJaZ1DyB$nP
zUtp&yCNnMGv??WI15=>F`o2mT$|<zIDcKU`FDTZ_VkR*NU^9g_FzM|;SFwL$3!r2u
zPC9i}U%<^RfOuzs<qBf_u#c3BH#tS6F*d~S83jL}94~2Y-Z~x}hB~i)%8m!k-K^?$
zsAjkp1yQiEMGk=6cK~EOSWiJ3Gw5UTcut~5(JO(X-v{x*IOG(R-PIXjZM~ZO7xg!5
zId(*X->E+jp+TEiqjKNvViTVSEb4ACS%O{{C23(3#W-r1^Tpm43ORd0aOnSI?@i#d
zDz3lrd+$7RpPMHO*%LMiyPyOR7lc4@LrFl<x&&7c2%>C4!X}#mt5v~(xNE?z)(VQY
zZh#ecaHY1zwJlb(uGNCRR9p4`J?G4Q?mSOIL~H-=ul>D#<hl2rIWu#%Idf(WMu9PB
zm&I`*jI!YAR1Y{ZfvvXe%wlNK@|S?4zYbgy!qn&vLdGMIP`_m;xi{(_`4t!|<RRMm
zCY_8chCc{=AfAhm9K#>z<v-_hzd@jUUNPqhL8PVFbfsy)^oT(tKIjhvbKZ{)a9_&X
zhVzCXJMZTPjckzeGL7qq#`c_NpEBV=^2lOXF9VZ2pq1!5*#P&W`Z{pl5M<{))1W1|
zK9l!S17upuIBy8D^R8&F-VJu%j+{3H*?FI6F7HQn-p-si1lf5%Yc6m5jv^SQxf|yV
zL3ZA*1}#Na$J%+jbKVeS=RLW(ybBGG<=lhwh9Eod#m(jYr2%#!9rfhAA;`|VAwBOA
zz~63ww2JBN$9Y4L<kk8ZB)w<Q#1`R1qTW1M-AP0?7I+lz#;fsjmjNCCpnF=zkJCV^
zGr5;@(F(mq_))o?rHq|GKZ@EE6|^1p>Yl~Lbuj5xaW;sHOWcS&xnCpCSjW4DioTPJ
z?jl7`K<bGoTJOZmQ3l9;>ryYV8?Ygi6@LXLZRR(9!&qi7#8cCNi?NrRbqGG}4P%N9
z_|Wx?_J%RV8^(AvPJ`}Xb$20Y7`S~CBF(x8&=ANy-v$(YDS)pS%xn_l(=Hs8yA+IN
z(@!)<7rpVs_6Fjedo=p<v|h$={LoUCckf926wurH<M9(^`wivzfPSKdy5v?%YtGN1
zdb8*!nW{cDBr1?Q_zKLP(lMjeB+Kh?=H@wkY^9IIZj6o^Io}}MJuZh%8uh^|DX!HH
zd>p^;2qZdA2mI5=xqRx#hOvg7%)bJ4H1c6`8rRq3<35v*`=J`__@wLJHW?uIz!N+t
z_bUKj?IATV5TQ!m-_wETI@xiQvzp3Z<0i24(b-Dx=Q-JVRKDZUa$_P3@A%_>2=1Ni
zGQ5wb_xs%3ws@b|Q!3dP*lrz6p*RgleB}28a5d$ck9plg@iA;a$;M&qg~)bb8c<>H
zVop~8w^Fv5*vE}`2ln7Dbe-H0_?T*n=t=O0T(97mbl_q)euM=hJJTaF2B(3jrdFo_
z_!EmJJ;Kd6Aq_@|_)lUb3Ye6$zi{ye5+<UhWuFdN4<RB0k6X=Zyrb24+-iPru%QJf
z*=n9QK-A@Mt6_as7$_R4<9_AlkdC)gxsI1l@42|r&CSGZYhzI1Ncza~a;cfF>?ITp
zq~dTeiz~j}08=VnZGg#&9ksn&GGoJqGaKw=kiBL179eGI+jwk{>!`*K*jr5q2ipjt
zK^|v-kdouVAi1a|eWWM^Cyv|6%ctB9CMeu4#*DWb1ZEr34o3Ht9NiHqQA^+oLm-zO
zRyfsQh822cOHuZaqa=R`N?d@Z%7W~JD%C+E$VKul65dK52YXp0;TKF!@|S=?zIc1x
z04oIOad8gfV>MWc8zS$5efChPoR*D%U@9aGT=U0j+$0~8@+p1bnm<k{)BGWHji!%b
zlF%UUQo|`nOAY<02EOKRzhhybr-OB`AN?FnKS>XVzX>Mzw?dp3L&5AOo-{z}{oKB|
zfRNlo3h-Q~;45(dyg|e+MA{~u_xE(zB$4kXkmw%h6_5)#xS!ORLwOGN^2n8>z$VLm
zl_wcIHjVyXzK6K#Q!1yw=N82Aaf?BqT|sVz5fbj<cMpdwvw$#?K8}DP<>6zi!6pN9
zkAyh~qYo59C{U;JSjvm+0yfzNtTyP>I;a3^sW@62b`Fv<_k}pGhd9|fLtrN59AON5
zUxUfkd90UtGEzUK`cA-Uh2!y27Ke(l0sl0-e`JuE_>;Vh(SUt!U@S+Rg7L;Ig=S~d
z4=-KwaacaLxC}7W=U}0%HCVrcC?k^|ZGdFbIKSp*zJ-EEqGEh_*%bIMqL070`Q&%E
z)BDTlbUsDGvnFe2%KAIZ>STNf=;H>;%@sRCxH;rv4+Kuc@%s>ZKiw<%638155t11B
z3@`8VG+5>c$v=(qoCOY((<*9Yf64k&)c!DWL(#(+1I!|lX5);(X1pynK<@G`fGt0f
z2E+~lM!eoeTx1Y!V2F5+jX2*R+CU(3;PSjd%qDv3;UKx6P}>doFxbe8A?+U;Ak%&^
z-1rjYT1|{!D*eV*yu*FR*nRJJsIX7=knA_Q86X)b_Z!UCasy@lFEiBIz>wN|4I;O<
zrF1GK=_S0!mXZYxkV^lC@;?aa9$<h>_X;>g`s0%)8z7tORRA6W;8_O9PUUJG9H{`%
z$pD8pcueU=gTR#DXehOTA*F8`M2-}GPLw`N^sOE!^c|04X=*>4!pc5W!6uLG<8Fh%
zw1&~;9}F-Xxj4GSLJ^hbR7h#$*xRGDf}XALV}KN)xtpcb5Qb#O-C|<ao(7Sbecpwp
ztw{4@l<$2*KhdU7+Na?0KF*wFc<V?%e5c!|5Ta-z3O9$C(qDtBhZColqVj3P=~6fM
zBD_yA2;GUMLlH|YN&`{|mbDSUKMxT$*{dDp`q@_ixQ>b%<hE+Wd+s1X;70nen|ECr
zEUSZ~R|591sj`f!yx-He0ygnSk{7R-BKMr^pW|&i6+T#OVx2)?jbNInU@Z_f8{j*X
zE7#Tc0oK!y2{%nM>-s^!4z*zzCSfvzFb|VDd6E_(+hb`k3E6n6MwJ~dxF4a^saL=l
zb71^e86Zo9_JoP?A1SD`By=`Va}EV+E@n&I64Z<t9Zaqib%EjGjov>6&Z%#wPOday
zPle^f8Y-p8CCD%ZGvz!#@w5T5NA4}_-8Kwufbrh7@eU9?8wNaUfZ`2D`D)#8sc;-B
zY@iBP<K+tj<i>Qh=iiV9iyaWZIt^q*cs7b?c9yVggw%5lWbsQN>CaHWRR)-gcegJ1
z0*^kBhdVdpYrKxeD~%;(|CT|8<+FF=zZCI_0g_tSyP;Qvr-(WCSW<r#wJrHes?@jR
z&zgJX%R%cw_)&e16XIr3tt(JKPrPg*O2&D7gVG5GfogZhIo@HEbr@w`XHuR3UzC;p
zG_&)N0do78<ev-B1_R=GyK`j8eusgPJ8{qR`0RPvNFjw+Y^Dm{g$6c*@FsgqorA4%
z?nH3W1(opqZeBE~J&nj4O+V3g)2Z~C3`)E?^jS|oJnmgcZ#)*J9ey5dcJwYL*md-?
zhJH5D&x`c)3jMrCKkSIuCC#NApAhT|`iVglYSHn+%gxZqc>u&e2LpYG@G3X<Dc(op
zoo;*%zQM=s2AkX1aBS=NG97rblM#cM8>oa4ZXyHkpVIqrm`g6eM_D#LE_AbU@IDgn
z*hQ^N@!sbIp#x;#&!6k|0DWUXT-hzU6F~Vt7A*J%+(hfd_Xlhdm00a&(ZFIIy^q2l
z?ZNwYfxB6>TPrsQ38y;oA$T7`?<cxh1Mxl$@BThc;wZeY7Pw^CN(mjD49e`_mM8a2
zH@gz=e>N1bm5p%|Cj!=o(#K#X`#5~$Vy~6^rR$HvyE;+G<u>{cm;boY5K8~r^~WN;
zn@Oh*)jK7I?UDc<XaU`C1kr{OV&Xy|PNR~F@h7z?*C{7SRaBJww#UWWuw?N+F^KG_
z-W5a}mPDLq5ZQIVBXrm>LeyuX>MIN)i{u?ALFB9^+FE1Ph}itnDv7BljID&x-eBBt
zvQSS%5k^25|2##2h%RZhRLZh}Nz7%{g2^#?UxR4_2{Vgmb+D7w{aT-c*GeO2?&`nc
zKh=JM0kIK&?fKL_jIv?hc-fNyn`Ohan@zgD(7<Ts9DnCNJ`b=yR9lgoH4E>f>HQSk
znuYfUdT%3&tFl;kjhJm1k<J?KW)N)++pr|!iFS!b!#0c%S#f6=M58z+Z7Y$spODtg
zh&81~3l`OAfVNGYXu*<*RTePGtObimGeARH_D>+S7j4*q0C-vC!TO96<-Z2VLIARk
zAu~Yd;XbbE^_5d4ot3&deQSlvJx!8-j7-y!z179YbCdzLBV|r>bL%Y_`5wKGo^OH3
zLUkAXctt3|TMUooT}D4W>GN><IhlSiDdF&+^;6SK1s8zm^N|#P9mVw_q9?mKf5Q9q
z2B8~K(8JX`09$2XG-}T{({U2-12~#8O@Z_ID?ZlI$6#0Q!u#OUg)w%Yhq04A+kjZA
zM>(y&0@@u0#?2g8=l(qnruPNSbYoqU8~*jZ5UIV0uMhG`D)u72J`@Zhq(XmxTh}Pc
zx)dz?AaWT}R-YkszK3^jJBjIY{HPF29V+c;;2w3Y!L0<$J%GFY_Y8>H+z%N?o+;(g
zOiCUyGS(Re$XLit7goibzgi^i1Ju@^rG_9|an)b?s#>&`w$+sRF)E^NfaAMA86eHt
znKiW#M4ka_YJ3CaE`vr*i`LdSZ|Adwa!A}UYil8hoK%>1KZ8aF&3O;S%ZUcqoAQ?O
zT1W_zys|ELx<QkA&Dz@K2H2PKw&uJc$gX!~TD^|<W*#!^qMvv^e(E4X?-Kgq>5J#l
zEc{I-(@PN0VL*ygPa6WM$+-K)$=QEq@Yvb+3AiW4Ux9WH%plQ@rq})~q#>h4NJEHN
zS+op6wt!DGXrX}Rifa^O1iQ$j8wL{xqPXoyCV^&<Kyyj=BFJB*;Hm^Ia1KE-ElzT|
z(+F1t_elz_%jK?STv%M3I*NV92m=rXZV<|(4)Rn3V*feVjZ;r$!xYtc3`RyVXJxmj
z72OU|zqMOTw!swf49wp91_>&JP&E7dmB`x@FV`A0j^;a?ze_&gNaRTXMJAvoq4{}>
zDONoGTKu%51dhWgcJ`tlzRGDoq$=lZVW|yFvh!7FaugZoVn->q74wz*B?f_<j5i2(
zN5OkJBMIRke=J@nm~;xpW99gZBj-^1SSj@`yyMS62GjAp)WB*8akI*pY{7aFY*LH{
z2v-@jP6Rr_2?*^$0~0it@g7gbgIm22m|5dS;1?>uKX<4<4-Xp@d)^SZ)Zj6J#|eSY
z7?@392(t)$*`V13er8}cf#0O!83Kom7XptX0*_|`Z4eXq6Qo^h@MznJd%VktHY|o#
z$)e7vkph{MVZzA~24-^-!YodXHE1>`(+te!<bqT@!^u+y%A8a&fi@_~N#O(`@I>OI
zN;t7$#7V|wNIb_No=1qoxxmQ=#EiUXIR3%_e@%GR!tr6}NF_GMA<W`<xIwcyo?>7&
z$8%Eg&~DK*oGk{LO%;#QF^qguV6l#2o1?L-w8QPlsJ2cN>gcaG_s7saphotk0rIwz
zNz&Ltm}zXW50N1_S2AZIk2l2eKLu>Cfw3=`=K7xlHr~K^2z<U`<M_b99z<TZ*5h{A
z=R7IT7F7uA1w7LZ2O2b6RA(5Nph-JCCl${a$IS-H?QkaR(gu-n#BedS<4h7_3W>m3
znwsz;@-4Hx1hw6Dcqd#(<xUku65#;IlttLp0BKFpy<9{X!gxF~$Yl}sHF%l8^Na|`
z8yNfY1`**h17i^`7ZGMolZt;sbzCJPJkP*v5r#044ZEnB2F(`XwFV|=BEnl!@t^_{
z&2|GtpyRmLvO4woLd)X>x?X5G*}!aCLYPI%7=vchve>|ET9&2a8Csq*P*&3oj(0e^
z4zH4aPR5VgG+pTG1hRh|D(7VbWaX?8<%BR>Ie#>Gp>l}#{25Xp+xI%xCs-#NcCVX3
zVfO(BMjW`m3aDv>Ac`Km2UW)JEi@!M-mKWxbWk1Oj5yYmM~LnNjA(<rI)|Iu<pz!I
z1b>^Ey1Z`;>@CXrsOaXbT4BT1O$Z~BO;bD9pxL^)+`t4)?BtqMJTwBT_@4&)Bvt%`
zR9sotwBitEReX{`vn!ruV0Ojzsd%W!8$-&UL_aiHry6ETg~K7*NudR-HbAz3SH%KC
zm~8=T44!QPuNoLzz?)(LAK9=!xEU`a0{o|ebtdk&vvUkV>@YXtJjW}*+fH8PE!QV=
z=rc=*WiLBFCOVyIfUMJZ6&L=#fx)jk?)$MfQBDZLtK)6t`u=9H!unV%1+%3NHiwTy
zD>DqtHir-<;%2w8z@XV$S#4l~CgyNgDjs4tVzjInjD9SPeq}I1M)hT&dL$!=e{H~f
z4E-u|d-WV4nYsOhxeY-Uw~YoXh1*RA_&f<}r*NAySBkT_4Ph3ytqq#Z?LY&wxgDB{
zXN>>12Kp{l{DD;5?}DZkhcK(+!3NE)c(j4p6`z-iXDWW!Kv}<^O2w_`HLW;=SrvCN
zXm-Uz4a~0i_*6V_s20ta8gMY)xfsnXlLCpW4Uhwg44k<;(E{3$#BBzVRX4=RqNw2`
z17ow###R+_wPO}YHQe}cG&1uXlz6@YvW|0IVfj%5w3QsfES8@&Xg13q8JNxT?o>R~
z>Mf)WX6r&BXdU?eGXR+bvmK}JA_3AE)ZN!m`Je%E#M9YO>0Bt7XQ04v=n=wt5KM+1
z34=yUjp5Lvhk*&2G`xPPco<@kB+oE4V@&P1op7kb@opsA7a44#U6amsgZ8%!kZH#u
zdY15ndLff(FE_NWGeDd65N6T7-k{mEzh+=I?Qf^z8QQZAlxgopw5vsng_!Li`=+F+
z86cY)&OL~!8DNU38CZ&`85o<|k8lu}tm^b7QqAKewZjb0YYou0su0$Tlq33j$e`I)
z^}2xxn&|7DR6H!+l96}5NNO&mnvcMu<F80t&QAoo(!;P5Y-iuz3dfoa@E}4x*Uh0;
zx0T!|&SJ9&PB#ebCoxEFMT2Aqw;Q+!?8oBu7X|_K@+g^kKl@^-Yb}8IOXf0?X*58a
z_Yl^LatrTw8Z?{t=L}5Hg!gT!c!u{@mq^X6spbW4CKz?94J?~r3tgXwH#U%mH@u)X
z<foDy%gWA6p>3ROfNUGfj9@~bZ5!hZVrU!B!p&T7U_8Bvi@do-J#K((FjpCa>3^xz
z{Wy_%tudIN8lWw-5Y`KbMrbPynk}>k3{22OXpg1h8KHe+plmSL(?;COgp|q9>bs#B
z{L2N%VpwCu5CUy66dJ@(4CF&jH!$XSo#FUh1ALk|e$a3{aH&*da~#5i6^<o_8Z?{Z
za|}$-gyR{hc$fmZ5Vfr_&{L`Ahh^w)!)R#6mfU%nP;(a{;_MO+G|w<Vo5T=ik$8?l
zvq`+vz-$t)O2vaCB@$mWP@X4QkM$!OHrha@k?i|`D}=^uqHzNbcQvE|*}iGAUpWne
zdb0;qjMurFZDoKQ&0>EEH~bKofwC|=#-q))2GNAGY|3XF7%O(GQS3_w_!?34vQcdB
z<x;mTwGh?|h{lu$7&KdIryH1{iDD<D;=z=EQQ$ah4K$l-{+;wbzcw%)NBrJUleMBL
zH6hHRro^Dx)EsJHHZ@14;-Pjm@=B=zQg@0%S)XNqENk3Bz_Jd3wye)Hh_<ZnGccC*
zdq&nBR!TM8h(0jf)f%8J>kwvfx6q*3+}&(oHg{`M@z97U-hSUer;w~altyI3DE?-p
zoPL#%m`#W{F-zU!WCLTRd~C?xV1PE+A<QECX@h2y{hoo@WbaDFL#^u5tA&(eD5YDd
zl-M-_WTkv%loA4MrQ{k!TPdd)7%Ro~r1O8$0C{djc@k1~xmK#NxeH-JHhZ?-2F>Q~
zcmoqOX#l6E;-N8<TsIl$T_kI4;bw>Rr2*PChcK&VeVq_!*WBH}?3xcq#lr?Ba#(#R
zD#-L?{52XMNeA@@gP%<_WqIPDY#=!(2d8tfmawcl#vyp4_p_m(nFVoPksE!z&_*Z4
zWgE@INaejWJX%*F!c)ll8D*zcrx3`!uPlbxAn!V?Iv)tUai(<Y#@tfPw^1|@KiKo(
zJ%x%?pBtq9g~0d(|G4-N$i<iQB9#rI;(aVvIGd=bUQ+C*^xg->zDD%Fi=X&9Q}$`#
zyWKV5pDHLT1>Rt;CQP@gu|ZtTp~$kDvY=>9b^gi}$}A0XGe{;|3E@X_*+w61-X-?1
zSVn|=ML#*HJf3@#aQY%L^Z|g`DltGvAAdR`re`B#8v~<$-aSU^`G9pZFebIqjnxCz
z)6P4L(QJ@+tA_$I5n;xh;(bZ`kK#M#3`X7c#iD%x_`lI0+FHuK8cV0%1N5_reqNxT
zH|Xbe{HX1w-YKAgjv?}5({2OoiSoR`0WZXUZD7=d)tOF=EaK8ek;09Pm&+Eh-oS{t
zSRa>ft^BnO>+ABhm5&=3FNR`2-4VdIVYq!UPNjcr@W{o)4rDYNL}My+BddfE;^JC}
zgt?etfXu}Z;bMw`*<5^SV9do3;iBhiDbVJk+=dx04lppA3mX=4afZROxUfOO#U_K4
zqKj7ykhwTQxcH-i*<74^v!#n8gp2D9%;uueh8ZqyF)*788y0f$g2A)6utCB_pIe&I
z#nA@HTnrU1jx{iwi`NZ|bum=9NZi^~7dbY}aFK6dHWxN5<YJJ)v$(K9!o_NXGzk`U
zM`%$G8X$9Vrf~6V1GBlPyv@?ZnZm_f1G9Cpz=jzv>J7~1!iI%htTlKR7dA+^$h}<%
zAub*ax#(_y%*ENlMY)05T&y=R*2USv#RmpvbMaRjX1LgCU^W*vEaam7&xAmW3mYU{
zEHX$by12#wnTs0XqS3%?E_&W!>7qusIMu*xF3z-JhKsWe%;v&|g<LE(cor8nNVxda
zAf@Oc{&S&@xj0w2$T2XRi|Y-Hb#bn6@vMQ_T)b$*3>PmOn9YR^3%U5p;8|SQAmL*4
zokH*eyIssRK;~jfocoIl4b0{u?-!E!Hu!9BN-OR!4l^*Di=%9q;bNG9*<9GLkc-&{
z&*H)c2^ZT95^<4i7hf76b1_4>aPJa|Z7!A@7;`a0xOmLKY%ZR%VTOw>24-_%!$L0p
zZtyHFY!GwdtVRbObCz&80Eyy#3)=Xt9ZWE|Mo7OV8^Z2R8$xm>;)nN)@YWF8Y$D6l
zd$Dn(Hx&?VAvu&jj}2jJovCXu_&Nnp?tectK-7<Ipd3OrP(0Pk^(pKO)(U-yX`I*~
zmoFOFg!I@DCq~<53QanRz%A!%t%n*AX~XTSf@ARfrGY(7uznt2YaQGzMN=rRvDt*a
zLRc??O^Tg@M}a}3P#%FrKxhXUn4r0gHzXAgtA%5Ld5M7@hj;9TIXw-epT~v;BH$H+
z$I~bWxr}JTG<HC+yhhwZ1b|Zf!=9=0@Ho>39mO}0))~+M!aBkWrr~*yft^M$oGBq%
zY?#I%lkenCtdlC3=A&YaXv6dikpEod9%T@jmZ6+?i~%tpLp{!G!ziy_g1n!Sl8<&1
zGzuugy_EW8{9Q{@)rt37wIAwoska&!mpVX7wPDFpN1`Nmpp?4EE_KCyR;h!e)VB<b
zOC2nw+OTA)>!{QpNvYdSsg7FxD=AW;flY@w2Ndl7$^c97j_cUihC>)PQ%nHS0ArKE
zqwz0nnGN^g`=vlO+_Q|~8W<0MCyL<~JRo?s;fAnYD9;$~J_gM;+#v=gXkxfksd&gw
zhMU_KJfG+C<?bW#p0rf`pwxa2km$$+nUM`kTIzIz$ks81o8e*u;vx0<a=ZK^24-^;
z!Ypn!8#J4n4-Cxa=JQlMsHF}BmOms!k0YX|3(+==RLvBwFo?e;#Hm8zM+Rn77{V+H
zcN;XD!qyK<y*7ni49wu6BBvYv#hea3KrQ+jdS48KdatV)vaOdwXyxU8PCeMd{39?I
zbrZ}ogSnN5_SeDto`D|~g6mNDd;r*y^ftKLP$K5{xI1njrN4m@^<>JnXrt77HZYb^
zHgd?97$EJPaR27|B$NjYjMoBp#R&AG0g-*XyHRf&yu_R<ned+-P49Q1$YDK%@Ug&)
zIX~mn#P#%k7=CMzT8ZbUj|d^xpyTuc_XKr0fUL?B9kmkgL#dWCVzHQelDZM!=h1hy
zqi(@F?JIQINcX=9mKxKY@rcL#1%Ssd!Ve}c?(T!VtZr~8(1Uo!eR#Z@_yoVDnH%s+
zr8${TnYUQxOL%(>pdj-t^OoymW#56ZX~tIojLuOvfX;_n#qkJoPr|RlQGp?Spp-oC
zGP;2_&s%ytKbDQ<#}yaxqv1h*EPs(7D?a1Lm6_l=&s*7<A6E_J$JO-rBl5g!M)Bj?
z)%>{bF@9YC2Y%cTz%S)_H+JAhV_$x(K9(Oh*YM+(`TV%8kso)g=f^K-*q`UE+0Bo;
zbA0+**OedlR`KJ$G5q-DEPniIDL?MVz1_fkfcA~%c@NU&^*ryPfAHgBOg`XiJ^h)|
zJa5Brer%k@k4KjA<IxBB@!0S9@%X3wc%lH#BhP!XH$R@Dzgn8-J$*Vqo|(;$%`5q_
zWgS1Br9ZEk=RNl)emuXEA1?$M1bndrKVCYBA6tjxkq>LSik}(oFZr2px8Yf^2qJut
zw79+?3uW|y$#@&`yn~*B+Vi}FzW`32cSx%QJ%)7P$6<Z=aX1~%&GU{pfgeYX<;PLA
z{1|!>KaRedAIIFmk6{n-qjC#Bs$S>E@DK4Q_#JBN-WMkU20J;!$czpyRQ+EE&?y)Q
z+c*m#_uPV>_&$SP&nzHIoru@ilA>t@8RL?*6_2AVMKcNVzQ>R{fSr=L_${~`Z}X9)
zhZHR#3}4+$dDey*i|=FHpW#<uMtHGOLMWvoims#_yQNIRDCUBTt|xG|qI!$pAuyLw
z`T<@`KEY3G!Y#U)u&O-9qI`zV?F2qSz?6?mXyX)pK=2Ei!e<fu1B<V=cM#-Lh7_%%
z_t|b86k2>S{qWn>c#E_g8?5&srx2804tU9O{In+bP<jJ>QoAksH5D^iieb^Dl~wdK
zAxy-{L{#xxWOGVhrD8bqM{xC6hvb?$^VdkS2q~oyqJ-)$T1mzHy@^0yB=FY}*eTjY
zkkdsPTqc)J!iK26qQr|Nk>zjTuMoUQu?g^xsOwJz9+xKB;_E<^Q-ZZFr|5k`xVKq^
zqMej>eshFK2}S=Tgh$edF=_fnq)oEYxYD8=0zcdYJiar)(?yPl!Lbyx$+0d=hvK`0
zl-918GT#sxG4bxSYfsQy$Kkb@%#eSac3lX%DIK~$L9a|Np&vp2$zqO5C>lu6Q&gne
zq#lZC_n}it$9+oZd{67g@maL%aNu_k9;g_n-H}v6V`^<|98SC8l=QPy@=Vf+lyr4-
zxrz=XF*RCZnha-Zo7;4Jmr&(Jr&B37_zTms#8Nbtz>mAPQ$DtzHF$IME`Vb}DIW}?
znv2dQye;kvX?TA`cN&WoolmK4+%y`wsG`}F_B)rHYg9uqaq8$e_0vJoLPB}XC0Cq|
z((YnPdomI_6G@*=(G`?5UK%3VxKnf$L5>ngQgb54qMImfSUQbT((ROXtfVm$oLSQD
zrnD2RG&V&^dy5QyRAjwqkRKAL-UhXMfJ%5fDwhgyiXI_oLds3j>a=@`z#pe0zd+EU
zbmXGn5qO9Nj%dqy6KVdDdH&zI+w)!N_I!`Ky*z*aD?IdGey{8CaEjigf}V5Nptp-O
z;OM^s9Lp{Gl2YGxUrI?W78QD;!dyJ?C(U*?htiroZo7**!ST+c*2`Io$qPE|2T<a|
zHs}OPp22s!qwpO}p|Yidb|VNd)dHMEfN2)s3<8{Q0mc(xh6R{Rfaw;XjsUe5U_Jrr
zEWkwsm}vo)5@41ExQYO?EkGjy=2?KB5ny2i*e48cCxAWo<hzmlApT`!6~Q7psO8iA
z-*yf^owCutmRPuZjF7Krjg%ZJtAm=c+TB3;!q02nP5e0qv9(h|62_t;H0N~O4+OXp
zW<Ld#clMJoyX*-m`e<POCxqD)5_t!=^7_9Gf*p>!)k7R_dwufMkPZ-Ep1SNHek>ir
zk7XzD<BD2-G*H9HQ_COW$BMW4aV2g1%2O-zA>cf9RZo6gUBQoQ=(g%Sb?p`WxNaRk
zu792%H|*xejX4xItHv^ZtRBRVn@{4$EtB|h+fshqaVI~1@hm^qyvvWfzv9QbLZ~ZG
z-8+CE_l@MoFKhVmt9ksmpZ=&!o_gTt{CMzHemwL6KOWZb$a!jg89z1*<j2MuempXt
zACHoA&r^>*!H>uP#1Cu@C*YI&@Z+gN`SElWKc1PukIjquv1JuMp54HY=U(K;^KbFv
zh28vkF^8gL^->ppY#oS4z6#tW{LFA~;b+2q3eSS6kR8R_YJI^!Knca$YD1oqcv~I(
z9&qy1Az$-jNDhU_>M#zw)!`g=t0Oq<R!4H!t&ZZbTMgx~TOG||w>pNyZZ(X<ZdJ))
zx2oc>TMehM8<)vI^({r!g(j*dn<^SdR##}EYJmXk6w@Eou%hZh6IC}*9=ZpTGZv9A
zcZ#pVuZgNl2%%&gehN)g&1Dis5kZ|oE2=K~9f7%wlGpHB{73wh5^kXtRdYT=rxjIm
zJ}#lPQ)oriDR2{2GYwpQ8xvI%q|ijw<Dj;pne@YNblX#;%VbntaslAQ)Q^`^fL21c
zM;24-EHqIyXJ*l)l~rg()kTj}>BY}bF`U^%)lue;kz^54vXknex(iKI&CHOHj6h9P
zodi3DR#a_tMP-t(Au21X7Wo^viK@96{t<OqQFSEQqD#=eonnfr3$3U+9ih;Qs?!LO
z5(=%TIwHoTSy8p5aixV;RBeODcSgrB-DNrUf%_<839&9q4#IaaDXpzU)sYbs?@rq&
zsxBfk<R7PP6jj?$iK^2}kf@p~k4h*sQFWwSeq*EG0+O7P7m2Xn;-~a+5~Yc%lf^h~
zC8|!Xjg7-;8%5O-c_vAs>gIA49!O$pw8TVLNZRH`H#`;-4}~VGPEsRcDKt^_O3KHq
z+<><*sxDbg*{SA26IE|a!;`4G&_vZ~G;&deCaNYE8r2{Uz(m!BCaNZfn~u^}qUuQK
zOeB3eg;rEuL^kdeT2Xbn7z<5QokpXSWTI+L;~!SLq?xEXN@G)$G%KoR!qFhDsM>(q
zN>m+{O9eQECaR9WRHxHcqUs1L$R?_ez>HjIqUs17(H2G3+_@pD=FSaKHFa)?s;`x(
znz!c^nyC5)Bt;qsqv}EvRsT9A)u>RS>OvD$zttRyMAdC2s;1V<S&PUEI_)H?&OldJ
z{0#9QMb%P4+bF6wfGDaqfGDaqfGDaqfGDaqfGDaqfGDaqfGDaqfGDaqfGDaqfGDaq
zfGDaKKzloybx(c)5ykH*9M+DT=XA{}&?+alq;1Yy3lTvB<fee~+>@|?<CQxK)7+{Y
z$@%_Y7Pjt-ZLzWTPTvChuFf0<SFY{P2M$Mk?I+PGbG*IVx8S4_AZK!FVBrHf!~SH1
zL^1<P&!l9|tvN9(`7Nbo6LLgMPBIyJTiI!xJJ*7s?!2fbPR>t$BeB57<>bO-;Ycn@
zGP(ECBXPu<6l)6*1^YHaiuf>QAqot)!as}8lJ4Fr^4m#)0R1M_#chb8M{55bQV27q
z2!JJ>A_In`SDchSS!70}H%3m_iportXHx@88W=`U$%4TBcN3+h7}F*~_L!OQgIhzW
z!M#x^w}vnSbN9XJ;@m9kkeW2oLP>-THMQZ|EmayL%Q>Wt+G^AmrHCvWf?f5N^+3Ru
zw3)P4Nwp!7n<~?ZkGv@5JBc9OT?th>X*|>lzTMW6MiDkSi%l#|B$GiXyDKA+&|yW`
z0%4aWoG~+&&oW}La*!Y*(@GJAP?P4fLKOlt)m>Sd02xUB&Sn%E4KjY;?&I6N4m-ez
zVc{{Y(KJuMzebZ)C7N&2K*)gD3x>?6k<y$)hV}%TwQ2bx{^mv!md4H~#a!(EOql0Z
zl7hqi8W#aITJ40(=cW>(e^)I>h9`A5^~~SdY78mOlxLcrB`bFFp=<)nVqq2R<5LQ2
z+VDb|q=*%6Cq<py+(H3G4Nmg%w;d%&lTG(O+~~qGC{zfWov>M$bO|8*J4-C&0(V4)
zp-&Sd4C@o}Y%7FGd@o0BH2@YuiXnavE<#mNXP9nQ&BYZK&feG7#J}5ZSDbq~UrlXH
z433%JgONqNq_p1er&*Z957?}lE6nQEnJYVX3Jt~*x8Xbz6ig`cTKI@!a4~Q0)kSKi
zPY*wGQd(Nm_<L_WHwl|9pc#k0cc8r*$w*fewj)+bx=d0c1B%2W5dok5Q`lB*!wJO~
z`3K8tSlCev3S2uV68T1P5vpk_#M~4ir}%p=J&FiK*^zgWZ!0cf{O@MQ5K^e^re>TX
zF)qBN{?d4=P=<CiDU_x}Q#gd5=4R9VdPtgNj-p9Ou{oC>sT*3bgtqtYr%5>Tt<DZs
zl%l=Jh(KgCj1dl^4o>XC7Dw0;Ed#a<2^?8rrLm$=&?ylmYl#QKp}iU-3QHDR?9ng^
zZ^nk2X`6f15R*P!%!+GIu`JX1g-%$^Dk(+ML35!(Xrz%RgM^zgb#gXd`ZPz<IfRwk
z{YqL)DD>FAfgzS=`P&+(+hYb0l`?Glt~5qQjFM8s#<g0pYZE%5#vmz1V#<yiP+E%Q
zY}*ry_};Q)LcS#^Fy1y);`gqrS-piyH4=$LND{FWiBGFZ+5$m}tu+xyO4P|^bBmN#
z$KR1-mmDcB3OqK`guV}m_-odVnw`>1I<Y;pKOxnMDbjjE=8VNAoEo7SAxrL*NsQD{
zG7Qb0C4gl2JU#|4i!3a=Stj?oK{peYMy7zOxa+cn67h&ct2wg(QaZ0x4x3tsIPYeR
zx>*G!9b}5#rZgb9$Ye_<eWvufk%GbkQpj)OjY7vCb}<l&C562)Q>uq`(~R}*RiDtD
z|Fs&XP-dAVv)DAU5BcR}J4Ay5t<Ke`%VIK$aM>P1iQuEzCT!{vMW^(xdo^{ncNWV`
zq<Hx6VgABnT2IkzInA14(pQ9%O)UxXX)XeU_#*tKttd=m2CYtqzpc>OI0|M#M4b}x
zFnXk(f3c0(9S3p!?Zy_0*6K1Mj#4Znr7ul&e=y9Hfwy7GGQcM7PgZK0fcLijwP1Xf
zH)J}}jL#BDYL_6&*<(*Cnz0m}+L=NGc2D|)>(R1C=Bj0136r+NB4Mq$?o7-wRhuc!
z1AH~Fq`mTp(g`%xhy_WDlcAH8KEc+92sN_QW>f-N_lmHjOAz~y3?*z2DU&@mQN)rJ
zC8r76_Ru2rlxD;AG#5C8-o&`-l!li!acZ@yrW1VWX7Zio5fWk~(xRf8^lg@x<otV9
z6>N+J6N<YTI$LhyVMe=ki@>9pf>f0<{lP?~SQM!*;t^CeiHNLJ&i>sf4V8svw8!ar
zF1BgdhKfTbZ5l134|UayCkiu%Z8^oDQs$DIo2&V5JVp`{+H948@06Al@fs1|Ol{w%
z8<)ycY9sZBl8|J}LXGM}8Ap-Tp4)BIx=5}`KSg%NU3kb7a~Tbjq`>ATb3*e#acPtD
z-%ThHVMYZhw53EhX-&e69YqREcXP&;D#VnQ8%=Mqt%aEQXvxgX0wGr`;%PdW^L<5t
zs?!!BA`6weQk;VA&LufT0%<ORMHC4Dbu~j~3$E)sX{wo`TW%SlF{d;^Cb{X9N+c9C
z(mhY9fWxLikEJ&|>Jjl-BVl6%f}ZMW#8CIpD8hzB%_7-Gn2)p>z3(icuw*VQMNDa<
zfYf#ZDWrA!Nkiw7Qo0R>d^gHMCQb9Td)7UA&`_%OJYs0TVR@{*R8p<62@@JMVAw&*
z*ra){7TFC7xAlkY%`n|UTZl?R)-+u-Q&O|0-z36JD~VZ*LIR5Wnza5<SfGNNe%OjE
zyXRu;$um(@GgP*~FX{=+&vsE#R7%Ii)meR)vA!@jCpYJ%gz_fDid8h%QX`U}%xD-Y
z8PWpJNL|3)Th8}RKgmWevPd5Sga(|dAZD`}I-0eUWWb*y!!)DYgAT$hd(XAh773T_
zJIMtT-dtUlA=-^vgz_!=h9FYst%&M>fnp|-<zpFD3ejA6YMtrz2@PI>o0&Nvb0DOk
zvavODSOI<X$7if0nWDQOWpHLDpxB2yj6(O0&ai?*@EuT6X69hLW)$#i1vY;s3S5FB
z_?0rLysTEBAe(RqBj-?HNWJ(|kc3Ro+(Ypy(7eE*mr_)g&##(Z3yM*1h*CHpiGsb8
z4AsL5Dq8;tB^7HRVo!T3ev}@<kJjDz(WVrXw51gq)R4^4&J>Xu7SY~dBmw(K<Tj*q
zN{)~)tbnS*WdL4j2*AZFAyx<?oYbc9KS%VD1iz!`)8$<x@-7y6mx#PeMc%DR-fhz5
z-8Nm`?b7891(2j7d9x%*nw|sQI<EVVoNWBMbiW7w;s2lXqr~yp8r~$acioKXy(di_
zJ7!Yv2~(#|m^8lkIa4Q(?>%PX+%dH?&YRPF#`LkhCrz3;x%af`Q_megwsuDEu~R2c
zn>cCw^j_0zdrz8pcJDDW#!PX3m|}XDm-j|#6Kf~V9Y6gnR5W?&4_WjiqSPUJ&#D=7
z-uSa-)Q%Z@UimpBAqI|_M1rWDI=y`CSOP|*L@DP$kXC}wh%!>3C}Z-(v1g5)G-k?#
zvu5`{I6eLRx-rwoOsSnXWxS-M)y@=Im0P8ymo&MqcKmFsklJ&mj~_D*fRLxjW2Q};
zIAwxBqB5EF+UaA)+7wvwAW<0wAS;eAL|G?IoKY(^TCkchFXRB9=k-~A7`-@Z3I4Wk
zR^nBy{b~;z;@uejDxR0S%%t9fRF$ZL)AnNSXSxUvXOwn>&$KfE39Fuyl)H6_m2$CG
zgq|qStpuw4O1yafF!b^$!0TKbV$kQQly;0s>RzJTB4e>%p$q-qx|LsvXB9tN<5>WB
z2Y--mgQtt9^Dd*e_RDoHK6CsUpbygdj5b(z9;Q1YsRMGh_p6bkN;~Ra)u0ooV_($k
z`t?kTN4KDW7C-2uyChtFpnCIy3f*4a7OT|Ga)9!5kKwxKO<auQZ_zz~<m}={UsTo!
z7pC|f^}c}i!Dq*Me5#CUy$|Z?1|C=-zM2EBsG2GPjH355@Z+gD<-zb^e<4C8g8cNH
zb<N~N1)R6e&YOe0g$ecNd5tlpwc4dCb$-wz=IKJUVs3R3mOC}3b%}a&b}fT`=GLmk
zey#dyPPxu}68SC+f-=r@<?MRKe0D)4zO`E8*6Oxv^}&8Qz;Rz~TTrP>3Us_K=IhSt
z3|H^#2l<lJF-j>?XTh>`KZPUYl&F5Upn|ha0CZmz>N51@WMKqdYoNcepsxB&43ZCK
z>av7-dtN1H+q|Fx#gwUsI75lrvY;FU<)*{@S`b_ZsK%cscXivv{};LVJ#ld=QJ+x%
znCohsLV*Z9-Cz9$q;>R}F<)H+PQs)%>OCWnt09mG`o4i)1q~^kr9RIPVJ@9n!{58`
z?dg5g?pdrdwbJA!(RWoJqB>VyuPU^&1kD0XDtN@SPIbk+^<fi6umCIF?UA`;klCs)
zy`WO(;nfX#=@Q5`Z;hn*ui(frz`xgn@KED4<#l~h5w4R$8&gXb)JUsSOXtyrLsZ=7
z%Io2hEhf}f?dp8BYC%w;_d~h){wlPgTHUWwA65%Lb+nHE0V<qRt8;hjti^bpTdm^<
z=>b&RondWU(|M@V0UN=3u!cX@8_d#~3H9g#w2=%n(^9{W)+2%G`i-$bSoPJ@^XidJ
zt4Gu}Nai6o*r7AjrwbZ%e1y&ztK*}dl{(b*3%UKFd5iB*|Ht+AC4adVHH|=x6To@T
zBAt;?ucJyf)o(7?K@wD77$yCbm6V}&XH;lC9QBr|OTZ4x6$P`XeXkoiUKbvW1WWEG
zp>B4;gaQ-rS<k_WcAcsdr|YcXH@Z~ay};|`ek*4$xj`c+d2PYwZ|CfC58Bb4603B=
zP$DdQeT#GfK@zKCF`ZA;sb9~phGZ*sTVJ)R)}26gSq&J2uejf<(1~M#Nbb;6PtW(d
zl0|5LJ?V)u`|8(TH7i0&Y{Ml!J3rX0%h-a9BYhAhWl~yit;ou}8zsNjxkLwvU^%;4
zSG^Cd5y4I{NN3c+t1q2b50*be50EuV_fS{4jp#QTVp+PYuil*-l<8jT#A<ysXg<VO
z`->gipx4K;bt|=Q?iw9mtaEn3cItJf3RaF=9!qc^2-1t!=(16|r`oR?_;EF~0ZBd8
z0I!S7-@|sYSv{$}K4@~F%Dq{L4mJw83AG(flj(g&XN<DQttE1my2)LmohxBK+`ba(
z!TB{>2fbjrY!~XX`9Vc2Ll0J?0Ut_W)n&Z}Km%0o=%8FXpNZ#r37)5<ihQN<;VfSD
zo(FmNybx`z5hR;N`>cn28ztX!l+So9SKW{H>RcU>bw^XO%F?L#oT*qMr9B!_5ldEI
z*`)f{xq9xngH_sH1zI#&hnrZU1Eb>jOvr4@+<JJAyjt912Y&!f_9Cp!x@!gQTxS#e
zg|3d}!1eu}6n+4uB5WXR$6svtb=BRP?mz=<iKzyC_B*(f6cF_YTBuS_>Pmchy^Q5`
zD%R!b?XzL|9GR@pL-5&p5E^L%8tDlB>`b4v_?*a}tp*{Mf|odlzsUJM$8Xt$Mos%>
zaux}7$6Pp}k_kFzmF`l-&bGi`44bXht<e)y>T+MbfELjifFAy{h<DJk^DA_ZYN$}X
z9N!F^zjrn~=`!761_wv%nf=Q`XNmTzz68T8gjL49741Yzl?M1{INLJxNpR0#8TW!U
zg1W&Zi{4_3>f8$54c}_KE@ceM+g~}q!DRH*Vznb|>gt<$UU%0BceT-Bo$g0=zp_~!
zHj)o{Ls#f5s(F+y0ugyWT95lE9PSi2-U97>9=4^w&2OPC!LoJ2S4%xufJpi@@*5M3
z-}rF8@f*#x!??Q2Yb2MU-ksAx4x?5Ngy4^EWS>DTMmozI{Cc9(uP5)}t95hWK328l
zJ{ojc6}gW}aUZqDeRMPKV;hyMp7gx~Q{9JJGbfP#6e0Iw=uF)3Zf`TxN_sFp^V0bh
zNaVJnKJ?0w&{kayAxWc~U7@o@to%KX-WTJ?J1W=}?81wy<^<~r;Fr@E!rNdoe^z*H
z!*RpNa8&VE$A#T4_PUfpo3kMq1uM-~zgUQdt+qLhdVel)Rs}4hA6^~xg$uU`nlja8
z3n*^MP&*MnXPt_dnP9KGx?*-U;<Z{hU-i=k)zliY)cY9?I!9eQ6OjVG3rv%!pe6zB
ze&JRl_oeu4eUdI3qD$-G7`{N4&}N)&U8VDWglN7|xAUj!^1~6DPN@O*C-Dk>u(}F$
zm8<rfb#L{u-a*Yky*@W=0^NLdhelWIH?kapf@Cv!Rd940ba~WR_C){(2h>_!xsddb
zOX5-uvn$1b2WmAq*)ZM!k8!nPVNmI{52ZQe7BdQa2f-po0EwVL`iE5z>rdu2#Nr@%
zR5i+O$MXDn3{eVdq^9T~Y|8O>pnrRXot!#ZkkDvAAK2vn)4v#H<fzv#ph9}6r!n5?
zc@r1AAqlDl!WbDRxn8FtuT@B3?yZKvCp=QO*Qr4K!{;0mIIluiji6yT1zTj77%eyr
z6dRx}L<j#wP_$vgJMX-s<I52f{A58uaYDZOhY4seUQkOhe7^dp0sLgaW_-6}gcuZb
z6UHL@vx)a5(faCbguqB3?$wQMJrc>xvwXD!!ws?Xujd;>Urv~+QLP771CcYi62VHn
z1S<u;x^p3{2Ps`KY|R)1Mdcl`3Wa5;j7H=vQ?EM>_{>pvErj*4c7g*XUONrJ!s{^6
zOm(Nb9$KyQIwK-iPt6Z#{AI^}YxsM227<LE++DAli8zsa@LCjJs$TRe@u}6_^Xa=x
z{T9(tCU?T>t@*(k?u4n&%22DJg52ODeIT01A!H0Ybq0oTrg{15d<uLPb4$9=Zb|iM
zIn)iSbqmQ&H%A>J(<P`Qu5QLy&~yRnr*V`X_75M-sigjax{-+uM$kFbwrPxw>yZ&+
z#o%q;no|Q21C^3;eDy3MMQXQer7NR$i+b7<^}bq+4voW_LAoqhsk>Kk5&J<wex-jT
z^L$)|-e-lFT$S!XRV~*AD|D-1lrFeX=l@8z8la2T!i^TI2j-K3vzpYiD4BiaAjuoN
zqdQXOpbzF98ZgJ;^+0SxRE!c^+Ok+@4A=W?1l9f2NTQKaJ=J$TK`5&_E+7SQCe;sQ
z5pX$Dd#gt^_oHwiEF-{Lav&ndHs}hH4us6Cm3nzjMV;>Pnr=&ULC3AB2C~rO(0X*v
zv-%)T03>Yw!(*9nkq^M~6RK4Mddo&l&BXN&(|z#tek=o?`;ER@HOjX-AAO(74XEW5
zs(W1-<SBo%Mj<3*7r_hHK1%lsUe#R?JY`^FrHt6kP@56_b-;`X8hK22#8}TGZ@!QE
z20abBBi*G2UUv|}6tGaPODbRxgLO}zv~!HFe(522JI%xp)w&2n%1SXJ4BXLPhnV&r
z$0)Gi5%m7GWV^n)az4cp6q3@YR;y1v4zR&8wR+FL8?dhz1Z654gI1-s&c{PztP$)~
z<AZYbgwq&Y5L4<AU(~3cnG;l^gTow(dRzy@ic&Al4Qf#sjaJBY!ri>IpxP@9o#FOh
z7-zT$-a0c8tRSxkmiv(HkbTkkje;Ww4w;k@_eW{{Dkn5M`rIM3lUJ1yclRB89;ijV
z#@4$G9Sxv^A`wg{O%TNeT_v7F=tR{9pG(11L#dj*87&mXL*oI}7CNP&?o7f{6RP#T
zTS)x)!cEEx(S5Vmh2Y<ENRSgKFxbN&db4WHa*L_+5Nt`q$szG0OH572XhgW{0*U-c
zYHc#Wj<_4W)^PVqjAjcE8!v|?bypXI{A6@cE78H<GN|b!)EjfQ0kxm{*@A!unx*P3
zbPXBM#{NFpY>rwxuR)j8?J<66(5>InMbD!x|1n0spMrt!(3Vqz0n~q>`eH6Qz*6<_
z!Wxc98tsS#y-7QNj4po>s9EX{AXP)yV#eyTSUt3p8YVdR7?EIROhj@8i{y%CM1p{c
zMe_E4ut>J0isT<@B2k#XUcJtU<g37?{*$aTq3)hVbB{S}#B*yy_((DVhW#GBV`BMo
zm_{vzaVk-cfM_;4aP(Li{t}F;yl(lr45-Q<CaxY0TxMuS(U_|unaDtNNw6{auMYUF
zZ|Rr24gW%2Ok}LkJ?r4u5Qq77dH|kzl{(YEk}D#|PaS~cn{MwknRs8T_W@!7JZbhY
zAXjs!>H3S%pJ65-oJKlWh#n<@{tBIU3r#C<B#taRF^vHuibwBz5cSyM;17)LVelt0
z)d-q(pp2fncow<)xcVf(6oOV2L8G)?*2;ZGEAIq|XA`e-#PjB?24u#R1*Z0fA}iJD
zs^@}wxXuRS`q%30ph{;hMg)oJ=?cWv%XIbzT}*%)0UV>VM_B+Ki=vN^!KfJs`g4W5
z(-uR=B*i-UR@<HGz2TO_q_1bn#B~*D74B~VH^Thf^+8=w2cH;nodH{Per34siw=rp
zl-Omau27y*V<7TEt%5<2LWY8=WS+`$$s_}%e#TX`<y@0<tQV)whD13sER-T|s$3^t
zAzU&-wjy6GcZsQdHJl`wT!F}`5ChtUeis88@<W{6$82T=m#_v7f*8Yu*1HH|IT{OB
zeu!ta(hYVB!wXS?QXe`z5Tt%l3PaC$!@_EBpRi{fa=($l=a_4z+AvNHI#PcwBh@?6
zpi801eDser0#H7O&n(j(7=5k^wggo{FE0@mnEQY!@Gfv)ln9D+UvP-!hR%pXx}2&z
zuf{KrYpz{D<8f<T(@sU>nrgV>*Dq*f#8DL09jLljV|;phtP-$dwebQPwdJe7x`;$E
zBSVxA7ta3_5#&Q1TSI~#b6nD1;xn=78Z3&GD1>qp!=GM@uej=39cv|0P{p+pD5@$9
zGX`ROJQ$wh5M6p2`<w!r`W{3RN`Bn~2*nQHfhIr>Y&Od5RSEUQ{flH84t?0Mu8z;<
zHJJ7?{Xf2vzuGcE=i{AB-km7A9WJh5ydlL?1v^Dw%*}&FyxVvbue$62T9*3R!UiEA
zekrd$RK)^ST5dCXh%kB-1!XyB61P;PI=Ghd<JD0oA%s-5>p9+U<16&}#R`MwEOMl9
zcWA?jwaAyNUUM2zNQwH@!b)UrrB1=Xe};~q2`5(#kqjcwg{(U<jHspzzWT`I*oZlX
z9Q9*@%Ah!<dK3y3n1!iu*3&ooNJukr){3c-ES!gpEOtP%t#pTk8iFXME4xF4KUXIj
z0Bxg|%|x33Js8@<5-A)eRu&spNFKt9FYQSc7@vHl_&aZoiSaOU!QvXlCfy+~1rN^t
zkOBbH)2J6SsEw4VPiAfgu)ErV2%RDy=|t3)I5lG@7);cv59+otu|74b=bVOU?7nEd
zVeH~$qr^-G=wv-ra`?|}RrRmLgsV(^p6&H?G5&@@({S!quaP?j1l+!Q6Y+h9nz&gX
za+^KT0}}hGPbuC~pU1y-8mFfDX&joE;b|NW!^|{JYS2^}1x=aiN~|xjI8GI3i1iV0
z<u2A8UT2$d7VA#bJJS*==83LHA5F1(Kdey9MmVaTk5j`hQ-7IB1NQFfH*f%{B1SU%
zNT495qz><f44T<wpQT=$i8=q^S@ID&{$=pIOpuY^9`cBh`t9-16|OR!kt(CuvRwV+
zf}JR^RQ)tYYuT8(_0^_1o7H3<D(3`s&CdY$^i?j+v6QO^pq2gMP0M}iqDs{zF$@4P
zuTfaby{Ym~7Hdz&F6tf(%V}~MqqelkWgd~kCVX{r*i*Rb4Hz4DpO}m)eilpdwJ@$x
zI*w>#u@S(`YViJdOc<6+OCr{{|JtzrE7M;hBD>z*2AU<z_yCLJDPhLdppfSeCUA%N
z8I&Pw8(^zMt;DE|yy|<iXi2h2T^Gpem|OLVD18^Chu900<yPt9I$c<&yVq%1Sp`EY
zc;&l?`ZP{UttC7yhXp=a)lgW<{umnS&BGiton5nQ?vbV&ZgoJV`>9LfX!iX<rz{0v
zW)cfZarGev8o?GB8WQV$HW}96K$#e_Ou{6Os2VSku4zyLU2jY3dP1`bL16eV>pEZM
z)6D7A+P$k`4OtTp`p_IO&4%gmVwhAvA5&v$J!ZH`g1ctR^w#tKdfg@{L=3wgj$yGb
zP>(sAb;$;d`7vRIewyMEtXTZ%f*Mn#L}}_4Og>_)!-G@D$Gi~SU~kl0gjP@xMy4l=
zhZ?WDo~Da}fd)wy0}AR*1b%Y33L%lbf5sJ6y5p%Rocc`kDTprH>;nIroy}EZvJ;{(
zb=~7eTh}GqXw@jOc=d-ljeFNr;6zYan_w`_QJ~^cbvnMEZWA_;xz)i2uVWa^EqcP#
z__m4W&@`@eE7d26Ucv#(runo!o~yfU(fcgcWrMJ!P!kJ$wQmKXpkt-9chb`^golA)
zo*%1cMHB@>AL65yx#cK7*ctIX`bk5@OJb$7|1VgnS)t3QF(a6IOcueE^e{1syP<Ax
zZoV$UES+}{p7k0!kA7=)Q7uxs=j*QAeGW_70WW*_ry^Y76@*(bVF{PF=VrjmYG{sc
zz#e<5`{&rcwT0Hu7Olc`#J_~~avu?FF(RM4J`#4HXN<64OR5}ehurrzy1_Q+G_LN&
zVl{2^7zNd4wP7{Ss>ge=%EFvNumNtdm1<o<scI~F!EE)Es>ihRyI4l<uvoW-!qEUx
zsHf&ONPNehIIR03Oc=n2Ce+&bUWG)ByBBQZIs>Ux7hj~y$p4}unvl#_`=TlD`ZmGA
zzg_`Kda4Vt_7`<Zw&2?NYKzmrVUBw>cIqN#xeR?cmTmpz@Y2{$f@cOihD-+s_n_ql
zWf+EcBGlz9QmyV^P>oO5r?oK_HVs-D(grQ$oK`Iis9S9d39aY;!3|oYDienJ<Je$a
zuS+3UBbE3EU^CYa>_L%vV(c1P(994iy*8@kZyo{wCedBO#5lnQcuUj<Pv)z<8noEQ
z=T+)1%Xu-kD_Uhne_ay10(Y8^83*D?{T#a#xSv(8&O_(c(;Gl5P29nQlkRHA{2F}k
zuWrPeK=~fLGwcy|19hxjNWl(yr8V$M82jT%&bgy%n7<PX2L*7Zn9D`Q>J($a80V6x
zeeW0~f_Av>qSRlp^no&n306zKI+;ceI_gd|&5H#o*3b&FUn)<1c)=Q&Y_}@C4{$6;
zR!LBGa?p_G#K`XA>LqjnZD^*lK)pE!TVGnkM6k?f_k}yqozRL3dZ-<UMrjWrdMK7z
zqgRiFe(KLy{pqPDY%?OhL!ksF>NOH^rn*I8OA1L$-8K)aQG1dY>d!U1oUmeR3RsUQ
z<mU>bgMUei&wghqrt}7(5O=m9#NGuQIbA)MB3EiPuDV1?^yI18L0K>xan&U3nWCv7
z+G>ZTPXCqQRD5}AB06F$Dg?FIV-UHNO<RpSyHUB=ei(e9u^8KGu`XPs3r6WynCPZz
zhRV9ZAN8T^na1gO@ONE|;d_Un@F*`}H7Ez+Yo&S~Z_S0Gn6tnN0cJR=D1O(f4q^U*
zvdiqZQUg?O+(06z@Up@VcI$5pc|XAh&71^!6vX@fyHIpot(sZRdTzQSs~qcISiC)_
zn(=Pp!9bQ;k`Pb%bsgD9mZ~@3|Ex=2-4pxSox$Z~sB>jL=1N4{%}mHl0CNa{DNmlS
zZd+IZ@24J#2bg}Ci9s{3sL-tBO7x8gVs$orh_61L$*XW3v06r}ci+Cu!hH<p_b$)i
z%?Wp7X;TaiTOStlwlfED1x9zix^~VE6Ofx8?R`|M?v7ytFYI9Dob`Chqz|QI(&s&F
zA?8UR>SRX$)`<L@xQp`g6Y7==FgXRzK_{I5dR`OES5G7C--9@K;~1}^--S3lraP(M
zqaRlA058WdOoIkzv1V>FfoDA?dr6~<IJD1Di%=gln4wPFA=ZIOws5p@54QK39c8}y
zJL1{&d9F6<H6Ot}Em_fUzI1}l7?0d#>aGjmh~U<EfsW1ESFgpk$-q+0-6mB{mZ6J!
z#BHE%83R&`Lh@)0QySd+b1P|71Hn1!$@y#0AZZE|%$2C!7?IG(<|1aJt@>xY8jYl*
zsEPJ>@M5KUZ!V4h3)OYH5+U*^-SHLOoi^c>tD7&7WpVUGh_Vq>cdCP2O4L2GH$#JQ
zY(=6`&SK;iT7F~VTu)WAH7Rw^1<9qZ<doj)vFczXhILf|)uPnLSPnxG*eXnWx#Ft5
zaR;=2ylk6M{;O~YETaT=;PDjDXVl}7ntb)*JX-b3RKHZBx0mM88fqpsXuJBbDu}(&
zH0s?tJ{<j1Z`oF`Dn<*sf8xx%_Yi-j5F3FYN?N4k5u*=WnK)^X4KJ8kq#1&VpuDXb
zTF-XP(-wy!QbJDmdFkvH>$Vf1a~g?_Ks5u^A_OqB{Q5hrbG1|Ju<@sr?yBydw;7J1
zD^2L}teP4Cxvmlh*8-!pI+D_ubM+&1MWpBDFq`#g!lPIj$M7r_&CzA3QeIM452?*D
zpNLUgSQ0cW!k@uW!5SU_KZpGu(M&nk$<^zKv9NB{u8xViQ;2Geh{9-Rk3`k(nJ8vU
zmGXi%cEL6nqMig%#F3*)wy_0`qM%Ft%&6E?r(=CmWd>jZR&g8lY)ZIG(`egYH3lJj
zx=Rx3b~_;17{()}s%6+}$-(fc;4Djhv!E6#fTIYf0O9^<3J~LZ+WVUWtIoz!9#wmZ
zOCBPwKGGQCp}mU|Jaq*E;N->{XQ@z=N18~e-{LP_*4EbI4<5PhHME=0IoAD;Jc;A9
zaCByD?Zla5Y9~&e()8?%b$q3{!!Y&{8aW7)EbFYANn<9=D4#eb?KlpB&zd%7`i$|@
z<s{0tA30h5nz4}AG3m^HDNz7TO2}BrJA=eq9YTxno=<VV<G-f+Y`|Os)~7LF&<)Rn
z@azo7aVVbM5h5!8%2BUK6Q}sa!31mRVl<hVT5ZtoBV3b*E?zcqsy5#Km`9hI)jbOX
zYWz~|%Z}7r!}Y#;>btu429%DCqZn1H85?!{FIrE4cNxA-ABKu)I=dKz;JFxaX{|mO
zoe@re)M5|rz@UMRjnCRNykQvM4;y$;xSI%0W6$Bh)fVY}UV-1joCx}$0@!&Tiba!0
zJWj&#)zo@Wh+%A_&aUE>9q%GDe?kkFt8~#tz$?LgGte^BR9PLO{jThcK{_qLxc&~Q
zk&eG$c7X<tMQC6&by0vP4Vp{v%!gxCepNJyyzWgydk6C1n6;SAdnFR;1x=o<o7y;+
zRz)EO%%Q#P&`g_)bp$vxELf@MHCziL)uVzkgl-Yi9oPxSNY6z`)M48=KM!%9O~Fa2
z?9LEpOA1a(IoQEf&viYsXPh-*IZsD8O^99_B0Uu$r4xO3i1Q@2l^gEUB?QA0=AVel
zX;Rs`5b5y<$*v6Dn|c5^UTcbi$qDEF$!Kd3%|gDaW9Tm9=!S64igqoRma|dNAt^F>
zG!-#Y$~rpC+LfBeDl{?pdLVX@C)v=SDbL7cN>?jI${PvupM{Md8K}$mFX5-_pBGh<
zyWP-t2+>ys`Y@S7b5?^;vg=Nak!vn@|Jz*ixbbJ#O_)Gu0Mk!3+MYK$%O^(&&ze4d
z0uK63pOb#nFa6};x4Ygxq1;Pd?61dE03DxejrLxQt@WQn&$N98JD7j1HtYP=*k8bl
zLtTCK#Jp-9pmE1(<bz$7Zz%Fc6zsU_PNb1Lb7%rGOFgP6))QB{2}!gA4G~c{)P#8j
zS*|}tvI0a~_8n~6>Hw?n3_^Mh(+5*I<?1>NRFj*J-o^PZiBDa`Db;ZQ;?*cH>7t%Q
zHSH0ocY&R@!78DmRl)}`;x7SWM+{@hC(<@CxSbaeO`1qoJ)n}uQPlJEu#JH>ySa%v
zrl}fhDZ`Tj;#sL`gxMEEUbKjfh{Uh=5$jU99!?ulrr=bxnRotzMgG2UGkKW3L&ZGj
zt?FdAq_Y+Y1(Ki@#(VLBT91;7u|bVj^gwii{I$^y9r^*Nt$$UsRjYh+DoJ%lliMe;
z1+{O%R3dNjr=4h6>E_V~q}zk|6Pd8a0HSjfI9OE7=Pev{@=ha!Ae2IYz9sHA_|MTn
zl{T<}Y#&3EjJ0N`8~QyK*Gv%CG!45PIJ!eTPCES)?ZV$5#|CwIrQV;G-XwNcxALCB
za^BBMXEQX0Qd7LE;LbekD9_(40P3T2Ren9T-!~A%S5vTYk6}282{KeP4ea`7nV!BF
zyJzVXsPliQzo%J>IM#@8wzUph2VM84`;xO6@n7d`taH`r-sam5Mcdwp<G_~BGBe7w
zsS{^RosvA?OumSZ9fv26_f05lZ|bh@!=imIEW5w2_9GvXc`G(Ac2y@~^Zc#&+)v^3
z4$nfY;)a96A07M}EbSFgM8f7f2p?o}82KH9g4&V?qx5PN1lL5z>trzxNSLdrEyn^~
zbZ+4t)AQ`YDnE{}CR8WdRQPY83Z=sfwW1~b)@u415J;=MRp^9qz!wb<+Zot*xY)}@
zjPc3fEyiZ=UFdakb#bDIo8FQE&+;5+yb*<B&a;>rLL4V?@Yz+hg;;yAOuNE64IwG!
zs&VL0(dn0w#L7upKD>P?hDo#watirF@z8})j2#c?loJr|#TlFDL^!{55Y<KENmjrF
zUr2>f7Il)0AormqfQ0JPpmUe&#0;H_oe}WseSK93r7uUN<?0a}kbvCPOHKs_-6JZ9
zaN@>S<LLNBt}3T(NpW=oopstqsMvjrA$bxv=Bs>K1OQ6;9#Et>A_m2g3;th<`)?5y
z&Cgk4#aFR95DSR1CBjw5Qb5bSi`t*eLZ(JeEKfpv*Ta~xUPX$*^dR={z^IQ-HDIOY
z*00qr9j{#>miRR6WsinvTFs#K!0nO133M1(kDb6j8bJrvWe^)+P}aG^AA$p1^*Xm4
zAk~9f5mPr(wnT2Enr4QzsJ0bqd^L_~?^mH)4TfiFpp&3|{b9Ot5He<}P7ON0PG|mD
zA4837pgs~09K8`IWsaWsD!Wm;bQeKQjm}2z!|!bjog)J5)KER}3raD;Gc%1+oO?Eu
zLgJ@XsvhJ_M4lyP8|SM;vpQdV7}{M5)u*OXt$SgO2(wAPI?(u7s6oyKUA6}%D7eA)
zz)YOo-p!q;qJ4*szY){CRXEYu8=UpSDT0I==CW6Iu?M`y96iH?jm+9%r9nWe&}%0!
z<&9A7dfv&G<4*=ff%=QHnHi}R&vq2dV-)}ce+simIxGK9fGC8)O+XELz*OWMuIAJL
zm=m0Z4@_|LZg(|{28Qq~LVSX~raBW*D)^;-hdc}pn~@``R9Tgf3)NBNtJ(E>z$lE>
zvee&k;sS>$u;h%WX73JDP=;JO`J1P1T~G^i$Bdb#V@$4EJrCnD<K>G<2OKS`b8(0q
zH24X%a-m!WfU14<K1}^P9^~M<X<HK2O%0Ih=HNZ8a=Pk4DnE}VK%lr|BE_xwK@_(E
zEQ#W<mFjXt^^pQer%46&N7pdGpAUkPTKpwGA}!LK4rT&Hiy1h-JqX#mt1~Hp5JlEY
z%Y=sffoPVNB4Hmtydv{(9DtTMNVsG}uDaP$Y!QW`Mz&ajX1!kdZ_}$y3%S`eS|UA9
z!a67E`RM-+J@=wTkf_00danP$8mui_((|`{yXqFp4hkbXn3QbbE7%ZdzWs-8;5Kn&
z2;cUdMr>p#lW8Xd3}xtlgQ0XPH(mArG+VLNvp2Stj_;~lEn6#yY;7{_8e^xk{C|wC
z*~IUSZKmU=F&o+H6uN!|wtCcmgRS=Ue?X@uM>=Upl^%xJ!u>v$_n(&E#m=56?8=W?
z$`nLC(o!1!#g-DCpKTI%+0yzxrvIOoKTX<EQ=Mv>>XH8qP4$QFR{mdW4P$BB-v2jv
zr*wW@b-U%Q@*;0FHQBFU@&CLPS<C+hZ)#h|zk}a2Ym2<~G~=z0_;2#o|CRwGW>;h;
zfYuOc8B?j(aJ?B%&tcla%$&X8)+A?sa-&8)Eu0`&_Aj>Sl!+hO&PgqVZlT#*nj+Po
zj|EVK)Q9g(PUMw0oewe~TA|y+L>|xCD{CSzIUhu~IxWC_P;!0PUyCrz%m>wDW-fg`
z$m`M?eC%mL2(z8mgb-$GX+<fV5aN#du075P@m#@oxhaKc*6QH}J227mj7uBeWX+}(
z&wR_Q&-pmb!2__R|Ef~M%97P=b0gcCP8Xm+QHT9kDC%1WKi^kNn=LKjs$7Uvv@|{0
zpI-iNRnrWkrXl}EHFfqEfG=Kg_?D&={1k}aza`P4M+-A4-&aRlEFEQAI-23(Dm-pV
zm;GCHRBLqf=)!-alKy{odQeqM{oVIf)w7nWvMg2A(lHuV)usPdRn0Q0+K5{Es3&P%
zI#9<YSD3b?E->L#$=STXl;Mwr;rCE0aD6MCp8RA1-FD^RK3J@}1qojA!-W_)o@Vv{
zn1ux(g*y{)DbT(6+{4ZUI=pHwLh}bL=DWG*4%)U@)o!u@pq`ew7rTusu$liW+_*tI
z*<Aq42Ax3w|04;$2uIPmZ<H-YSU8j(=ko>i*xw_*Y^jTef~yK}?Nj?I-L(p<o7Fgp
zwE>q}6semQ)K&!@uw^s@$5QF)xbEsrcbhJ#<J1ic*6U8e44pX(kMog=YffFd4I>k`
zCgW-@BF&puNC~t6qaMYPITy)?FwIZl+_TPKpbzo(Ev6@3|E|2li)A@c&Gm2viWE^@
z$Sb?D%s5>TN9v<+4Z8R(gi4aO++;XVGH{9hxdQBoGO#h6DhWaTEzoSiN(IU0G;Afu
zl4Y4L4p%xim}+Je8ls=fNK#D;Xi7cidR4?J)2|*|;H~6sNJ%k_wiClxh6OZnPbqYL
zV^~~Dj^!viN(vXsxUg}1O0f&Gb8NFp!oFKeaWACQ;vMH2*1u83Nwf{Pu05i}n?ey}
zlI0Y6q%e>|rJZ6^X~VrMQi!n`@M>(9?M!xIc8(O5Y0fiA@iQs4u)j*CO}48^O-j`V
zEk-AE4X|@H7zy6*M#f_d>m0kyR70lJpm3!g87~_?ERxz#((vVQ#bYi86p;eRONCWe
z0M6NQx(Is?D09!2llxoAwM}!|$t2pa@N%=GlN$}rvS4pGD#QB!8PyviPl7`|@Swi>
zv+H3adk*e0R&*jGOC5`wcE8*GgQVk9?9@Q^gqlDK?4Vk&5%~<$?#yKTJHAZLo6(V@
z)n$kC&NB6C^85hqjN&bhxX`!`ZnyI<;Rvxxj2$CB&O^%<kb2$%5?YUo!#T;ZM?-UC
zkIpTPJ+L#w8hc2e0Ugsek;ZVFNPAUUf&B#&X@3E2+@SphmDDNV?k9W>P-oX@_jG0!
z=MozEl7N#k;MkoX|MzW1<kKUv4^fV;?D;s!jP!lZ_IAW*b99rFCs9)K`y0P~d-T?h
z#<H+Jpy-gp1Q%OhM$8$0?C{|;XAPe{bHtnx!>dMAjX3t$V~;;}__4>&Id=B&wb;2w
zw;em!c}U~$ggTFWr)<+{LyPa+h}P=PIJt*oVePP45l8m(=zO12=h87|SJkY+ajME$
zx6YhBbLQ%sXWp`U)~&N<*3GJ$HGB5#3ue!peZj4>Z<+ZDilzN_t~z2pjlZdFsvC4Y
zT1j)~FA{2JjE`thV!citsi3RzewZ{O{YKPYhBG!w4t=B+ObB!`t87LK2!|5_-3_(p
zGLB>v+-tsP6qQxMNVP2@;i_7yY<i1GxL${lrnQKaFGy2bL@E-b^IAkI5u}MNBDEEy
z@hu{?7o@Q*BH<=D;tsd)wIIx{f^=GoNVt%MN~&%Vsiz>F+#=F`f^<@gNaccbLW@Yf
z1?l(}kq!`~V_QTzSdc1PL>eGSLt8`|EJ%m9i1d^rSQ{3$h_=O`J=ikZa|Z4HmeH^Y
zmsNOg%V^sS+Ams0d&8jJ-ZI*s4BE{tqrGj=ZfY6r9b5)SR(M0pXz!ZRu1}(QYE)2v
zpEy)p-owGx;<fi0K%WSZP#YGm;e#u<_g27kqseBu(d2G{EZ1$<@tX@5FBf-83NC8Q
zmlNauMM;D*4y*1#=vAUlr;sGBx;MyQB)|YpF5|!@MjPR$Gplvy<&tq`P!LQAmcIl4
zO+lceR(R|AL_KbJ;)BJw;p{UUaPLZe1jb8zPd43ybgS-zBZCMy6F4gw$IY5;aWdRp
z&82$kT-*b(1=m?7uV3U#<el5KzUfs2dI+Td3yzeH&7;6-;(uLWHDS`!v&T%D(d0mx
z_1Ea4xX2Eqnme=+VWuA~W9;w`%Rz|z-SVlBZx6Q?m78$u0|fGU3EVvx;9L(5XZh+G
zv}*dR6&n|>=L2Le=2&E!;6B|pXsh%3>x?;Q_2Y5TAR_(VmHJ4!kddsMju9TCGw_#`
zVCr-e`KkIym_D7@!me^mg&csLF*A1{9LGt1I{ix5Z*<1{avaNm@AcIr%n(x(+JFd+
zkC~~P5K4La2m-1d^Jtkx;1^kE-TpC;4{xa%m}yY0w#i9kocS1rTyzy5=LR9%bp3lU
z?!jNK!6xcfy}>8#SmstMM=(^)HUQghYr<!>J`zXY5J9$upP*7R@MjvpC;c6!Z{rg;
zo2ky2Y8u6SCVX%IB2#&dk#Ts&4uiYR4iAi34(h1&##M}mFO!+4_Pkk;NXCIP9(&<R
zqpq)Db9uEsfNsg-`}jy3L>MkzaMVEhdsy&j(<nU7Q~9|5hfYh=`9R#IM{{8gw(R>e
zNxJGm{5b^tjWkNb9}l1%%_8wt9-sIsQNN!@e;<%8Y{9r{YV|1_bpI-ye*q@_YH&sJ
z3>nF;Lq2hpAK|+BJzPG5&=`Mc#vej%Rb7N@g9>lPQqQa!h%2#17Y?SsC)IBS3mKMq
zKJTjTtg<$vY`XsmcbD)EXIygH0O#9zAd#fbqe1duRZMrb72!gdfmQg{>Q+p#5#UXl
zV&jQ9I!lm28_nr@Q*i04ogNJjTcI?JGU*D5qQmi>r@r6|*l|MwK(w{pRarGMbU$6b
z%{1KUB*~@7G6*k9?RGHJDaK{~S^;0MFXNM&JURaZWfKY%|C{-^>;oece0li0DnSEY
zQvPh$&ljwr#<~TKbsKtK91gt**TgsX$8o5PK>gF9`)AM};a6ViKz{~(y7cBza)RvM
zqt8qxh~5-cowXA`IJO{@6rk@VRNvem*9qkIEjk>P{Np-t^s7T_aixa&+g$WLjLr^G
z|BEGXp1>gc@c7^>i3BxN*8?L^J?U6Ii2}XqB{BS|GW7-i=$6iZ*E$yaHI6MFNd_y@
zTm~ak>Y;_WC8d<gdO&wn<K%8QS9PhRX<_vg&0KSX8KiTUpr!%pW47;HbtP;`<9JFT
znSP$H-kpz$%Q&or#7uufh5u;T9{vW=MeZ6Ve?5r`f7<T*im4|}a~UxaSGZ8j_4@N_
zHtyjCz{QE27~7qr{*L+N{MDeA?F)aq1a6h6P_LmW=c=DCq#19zKwlk=bEtUh-HZcc
zN1f`x^fFUo{~a4~6N)tb-@^V|o?l85f<Ih<Y&5Oqs6XQFxgM$?H(Md|Z@6qR!>l`B
zfxELQ{MyNXDexZM;WfShm%^u>l->VaX1q_4(<H&)a0C_?TUNkzEteEvRx}!9`a3*w
z^2dd5rrI2JB%A;L$K9L9Syg1|qM2vqKAcSEoHKzWnbguMw5G1QB{kf4yQ{mj`rh}t
z`@PcY-|c$U)dGqj2xzIOOpc`pinD?PWhnyAh#-~|GB}G2&LV^J1d5gu?_04V_KrAv
z$37=dp2HvSA2^wL_Kt}4tvOb#@Y#|`v*p<mF?SmZm=Rn)ID~#OWenKv7o(Jjabn9T
z%OcH|423&U<jxPj$?A!rg2B1h75`RewXMR<OhVU*;Ot>)%}dT?T^7@h+<T!DH+G0p
z{6e+9iFD*Bts}os-6NWyR`PTt`V}VX$Rmx83^>6SDo4Fxkf<E_nBsg~*TRDS@M*AW
zxQDGLkZggMfYZTstI)GR%l^t595=_iaLd%`n9wpK{y4h!1dPQ{EV%k)ER2Pv2mLjZ
z`JV1-;#UM1R_Pl0QeD&~)H_S5ee?U3c%%IOY-j}zC^AqTXvM(vWz7Liu@aqr@v4FN
z6}*-`r)T~JM@I1j-@qwH+@a0>FLeTme@ovpX8d!$ALTo_DY=t#C*`<PRX{KD9iK{f
z;g$3rZcV|J9arF&XidU*%8J!rdj<@3U2Yb1KjI2?kUmTQz{yRS>rZ#kKcFmw=ZGyS
zE&A8tdZhy)7F~4G<c+UYSsLKm>ReRHnXGUkzl!AlM<V{C)Y&)ezg-Dy)k4$HTnG0-
zF(TZDBYv~gbszXeG;A~|6VUH;T0D0hC{>C#ftU5i^*W>R$~Vv8gX?%F+P_oXl%Vmq
z;gmnE694f!+|Cr>ug9aA98E@JNa~FnQPuYdNhfZ5HbLzUz=P^kKaS>aB4^*@G&Ry)
zxGPJD_m5MAIp}cd0p&2pKSqo(3a%JGkwNGA+&|-b;v;kalsgy<@MSgMeCC+{^@;_y
z1UZ~1W@fDZO+aUFnWuuzst=j((2GPu+NZ0~78p&o)V*oF=!<IR)*tgTxB(2^NbsrZ
zAw`Z`_|jB%`|ct(&$#0XcNw*w1f1?dL#V;<uiynKH}A+z)`q4yIM1N*np!V-Ox?zW
z22uFF8fTjt6CF0e@jMa_$+yFB18yEobsN&dp?@3)-)KCDt9ZR*w$WCMCnNt6kn%9$
zz^r=1f$^727M<I8OyyT`NF!ukTa}v&A|Dyv1Kq^Oz5C;QXbx_)AuBiZkr`ieCabDe
z9Ft_d(2I7cWvQ|0nlre^j+n?Nt8wud(~s|hLMv44@kR?-5c83Pv6e+Ig|h*+_{p1t
z#*^^F6lUw*BZKdMf*&S<BdIU*<|K5V`62!{JUZ-2%(#ozK|JUlxOrDR==1VDXmq(w
zw+{Vj9UABdbbD7l==Hc^&4=^j)u96|Ek@9B@d4&91KlNO^09V--FBxHcR(K;CC$oe
zu(9=j{)3K(FT^g9VxzjH(#8Y*Q`n&llRDffnH#En8|&fQU}gn9aZ~zF`7SR0MPCGn
zq}yEsAT*#>{SSDIkMhUpkvB#LW<@2A>@m=M*&PFQcF`EQXpGO-qh}<pFZusl=bmFH
zOgZMnKO8f0$_Xc$TuUqk5@k;$&+<FxA?}<8vP54hD#bGX_Y)_EJvE|d?L<!gZ=GR(
zH~NGV$Nzqc>f7dwo-oDV9ectbrW_ytYVvMcH0<yH>%UGM`}@gbG1vT^JNwc6zv&rF
zK4tPBet$eNSTO=wDXhY|r0agEW#@C`lGLg5x~bwU%|d@2NMkb{!I3)eF|%l)F$g*W
zsIUgzY^ru)->wmIT1Af_a#6xwmm90Tw-W!>sxY`ZyGQ*7zEL3d4!3@ui3@n+!LLYs
z6TPh>UIcXr;Z!(+ZSynOH|8N~ajhjMua!@Si!0IQm>Xfwv~HZHgs2t|a8yewszsd~
z|BR?s-r0uRfN*Ur-d>Fk$+%YxpJ08q;pMn`vj<v9PS1=EhMf{LO$-M88ebSr-{2m<
zgO5~y`v#>**53}rU4H1Id?adiNAusa^fii-^cOgG?M>6qP@A3@1s7vfbEs3B&2i_v
zI#?o$O6$Bt8l;KT6SoR=mq=)MqVLsph=iC{s8ff)lsnN0#Xma3sOBL|H5KIcRzcJV
z4s8xPN_l(L+04P{+i<CNf%^b&%^w%zOHh}*=G+eai&Xy@^(*Bx9ibX2p`FVAIy$IF
zFGBSNRgKiBzJ!1@q|aw(aCMuXjm}e)#nu9z#QBGiOb>_Ducf=Ixr{<q_N3vZqY`-u
zWSR=@K_3K%OyAMGq<ZA>8r$Htf-HuOPz!~sucFu}<Pv7FF0#=o%|@9K?2^<W9c9r8
zA*PQ^gK#H@inSG)aI2O@CJwVj<nFdat~4@P-1JdZMDrIcZc=2!<a=kCFGldJ4mt||
zRA)xkXQ$#rMm4Y+flm<5y=S0c-@B|PT!^EVbsxTs@)o@+fMF!hgMG3dL8oVZbG^cz
z^X;4+3$ZsZZKFFK>>uRxQ^m2rUt=FvA$!kAh2El3Pn$*sfKR4TltFal1Hkjop{^Yw
zBmykGvzzg+(HbhS!7amlg%aP#(+3-F_;Y5Ylx(ciZJ($KomBAqs@yq2k2+i%pw!B0
z{w91dQQVnf{p8oV7u4^yW>JNs+1zoWVsbx4*J}J3C8F*8t*@o96#gMfAOAbjpCj?x
zDtZjMn;Ysuns99!?gH=$QUfhiFkl*n+p@F-NwXTPel4pRT4@wamk1a#8Q&+&t8KyS
zYz1>GXDushrfP7<6N^(;%v2BCOl?D*o{uO4G*NODC(2+U3g(FBS@?Gt8>0g(Yr5@>
z4ElQdCfY#P#nA&{HF|J3N<=T8{aX-EWq!`qL5$9=e5<#|!#JuO9!3p4XQ1O$i38p1
zsn7{U9O!PX1V0aEk|;mIflg8#S^7<?Mu1C-R#TE_>jy)t)B$c6#!_uBqt5IuaEoCG
zcO&kF<Ltq=)%P+-)(7lgRB^5q4%?#$lZT=~D>qAr{8M&=`kL*0JX*l{5S<)%J}$Bh
z(A$>tafQ(h-E}^8IJ#lZesMlNP#jj9O>jOQO}9WcnQZ6dBI(Bjve(kvmh({>nT!ys
zw!zFHLy!zIu4^&Q#v=usjn#?()p^dwoI+S9+kwrlKRz?x*-%F)v4}R>$%~LpVB8eV
zhZ1|y)ktC$WevLHZGNZ$k9!=CoZ&Z-gK*tL=#*IwhfAj@xC_BfrW0;gwBr^-Cp2`)
zb2t>4K(O9f_5_T>A>m%;;709FNM#S&i#xhQdj+{+tTga+72RG9g``b!HEqI$P}~5w
z>fc25{6aP{=|A~W0@gBeO-v@68Krd$J6F&#8=+&kX32FtZVVldtBQ9#9#>R#9FOTr
zQ8TO8V!tu+x6d<bCYhwg`nkBx&QUY_PTvnM$k%F_p$lR~IQOL5F(Hp*$I4C~6Di+~
z8ymrWicu~-18$Q1VX)8(zlLjze}{2gV|yp}6!1=-h(ipT>$fw+k<=7`cu8_7V^rjB
z{mvHp#PL)1onCQ2<r{;q_$d~?cc<bjv~lp=cRKssU1tAcHSb_1j`mn=IJJt%(qh&T
z+POGF^ae^|m9t@f#QadLa3ADRDIyv?QhPB?2-(!k>z)usnxua0`|n^Gq^Hopd0{}?
zrQUID5a1LCzaX<!?`E8`8Tq&==-#Ztox05MhE+kZ3g4jYZv0!roY#YMO=u+=rU({+
zRTF(<&ib|D1e{XaDn)?ILYN}>5Q(m&vXUqPSA>q{AqLXy3<w(LOg>OW9F>b{c+5IN
z<q7^rzjh`^FAhd~Ba34bB{^@xPj@;*=~D@!3>Ttcuzo61K8Qn_LsUit*|kP9QM%y_
zIJZfG4bQWlUKQ|G;D-#K4Uz&6l+P4^u84S^2WY0V9q8UW9k<67gIvu2z+H1k7fhP+
zkSKCe$OVF&B2Et*xf?wCf*)=-1euN>a%CWholNEdI+!3TnJnSVP=;-Uw5`T%fk=y1
zQ4?-hWf|{Vw+sJDl#r2}D5@&duU3hK?+hipdaHV*?h&mgdaqMI#~c=Q*d@Csd>n7<
zms=>q7@pxfy^{SjUO@w$i<&G|P<@}|{iEd2BVq9cZnn)tj>THk-K8Yri_xq+bPMi~
zvK7I@sg8veAa{qGRw`}^9Sch@etvp#ZiA;fAfHM(AC)58TpE?&(Hj3^uVv^rp3eIh
zJe9#hHGFa-XsL<M9Nb?vMo+>QGJHElsNz059frcEI-2M_!v-tV3xY^m9#++NVqjO|
z)-U7p6j)%9It3|ohYKDm+8mV520dt%VuPfen9ow!010%LN<D2VnZz&)OVL#FxM(J@
z;r$mzeTYyApWOWy6WBEAz-B{t^u+tAdg2nZ^q8JlVd;rAmY%r3TzX=a!y)(X4?VHQ
zt0&%1VUPhRQFXMbr6*h}MGR6Jl`K6m!{WAemYz^l>$;vmX<M3}m~R#s(-ZevdSZ<=
zrveKsT2I{XaKZBZq9-2n>WTMK*kBOlQOVL1_d3E-G#eDHClr+m=?SVsD=XIMF^@tV
zl0qKEi>aF8Cc`2zP4Q=I@mnoTaZ9;0#ZwNCEZQHMVyjnEyqLlw11OJ5mZos26tPHY
zRI)V1d`l|tu{4FET5+0!j~w6@PW-E)%YdV*8a`}20w1td`vP(YUN57S{$f}|w1?%B
zFS4}9R%@OG7G0$F7)Vb$oN?oR&>pCzP^{5QLYLn5M%oK0tkIcIr9P3DvB(jiqFJLz
z?J-=a1SHW-2LI|PRBy*-4sO=rNG}|i>Eed0xTk#EH0r6rEd)O=IV$0VT0K3iNSCqe
zg3ePM(u3Y-?x6W`mBANA4-z(P_7u7xfU8j_Fsk7Ka#cHj{ZmyHo7pCRz^-r)dsn!*
zh)rRha|4K{Z9koWDiHiCgA;6F6#msTb`W{`ygyuE`h}9S*cZV2o9P`6KP7y-S#1ei
z0J0Pnl9yJ;#H*^IPrNGc0S{mEbL|>bCwo^8P)$CQw|p=Tu8nqN{mW?;O;`&_W)N$_
zFs`%`ns=m-WI&u}auw#84(1u`Bq@QMsMxgbkC^&Uby{VF6%apgx^EYqmt0SeXc66E
zSRBW|3W+G+FK<M&u<FJz&Gc?Y?V;my72c0)Xe+gYE<k>&c{?+li>sV1^wlXywrg@Y
z%w2wRbTB!r{qv!!tOh+!nLJtQZ_1QrpPoY@JbpP?gaMyCyAgM!qTE2wgi39KQEK&(
zQu`9uPKr|V=wuskj}<cFi|q(ZvxPpl0RM7Y1-Bvu@R>$(V_Olor{FyxPM91ygejoy
zZNe0hF;h^2GVz@VQ#<jOGg6BuP<H<+wi%_K>U=iex@kggSkPDPGIU-Xd=IC{RdJRs
zg1Yu}Pr8|7hkDwfHGowG_2x9(fx0K?v(#Is>#@uzed2=~&tz2jDgA948Ux`sJWw-T
zKz5>VS7jAC4_QN(8MfdJ14j*9<6#Sau3gi_Q3Hi(;!%aMa<C4e1O|%do3ZU|_O~;`
z+D&wKKfbGrV=}|)BR_qvFUl8WKaL~&B2yjbH@jScZRFcjU^nCDS{u$6_6M9ddf~h%
z6;2_91Lup=q7BDCqwj&qhrT>5H7%aT_e}IU+9te@&sFr+G~{lD{X2wSG0;WltJ>?G
zxCZd=4dhl;z7*-8?0Tp{o@SixFUPN^eHwSG9RCT8yI-O`8-HfA#MF>C(_aYQD%)?h
z#3!(GW7X1goxPRhKf?Z-6i5$~8vL(uNXb)TUqg2ooY@`gVV6eFNX1zep0neP!VBM#
zyuYr341x56j4B~l>wRgOp+tvJLLtiY9LgJg*f!#Gp{S-m4Au0AltfiukpL@d-*{&a
zxDLl%#tk91A42&iW%xy==PA-SA=S$AVSAs0?aE4E`-qFJ<*I%-EqU*T7v4^?a0lC!
zY^I>h{7mh!$)QoIqpxvSb13|{9orOubdaJ2hK?&~M%N&ln5(bXL8wA@NQmr?RAjG-
zBYRu<kbTra_O?nO`;-^i9jVC5!X0F9D+99UT3Y>~X*sj8(?N<9g@C2O`m`{TtIP0Z
zV}6I$n*y^IDfSS_rB%~#vOGy_SsgUe8buqjtE$h|Yh37cvOKP~y{*J=Z>9IODP~`|
ziq4+KJ#H&=1d7KC=rgq%Bv{z3%GoKgRUV}Ixq+GrwOylvM?YH`(c3A~&sR=k4~2*8
z6tdtGS{~rv;k>PnTjKR@+jq=v+jyg!x8m;x<`E&xBQ(rEWih5((qZ1BVYV#qC#Mxs
zQXZJ0q*PfX+Z%y-AIq?sZ7rS3$KC|*FHYm;x0S^Eyc%VQ6Y#zz6>lL2&iX~kh3=38
zoCk>N=wS=*b1b|SeejKRyx;8#-j}E2{dOGhH_C@Mt`bke`>l%OeYF?w%Tw_dayWQ*
zha3j)=PkTHJuQ-NMW2Frzo+m{DBS7Pn*4r|qdmI$Aw0O5K{Z;J3A3HLQs>L>;+~2a
zjp{N7HAsC%=AcG7MgAME0;ZMdkNfTH0#+1!^au`Kl0o1~fO=Y@IL9dgUdRRkQTHCN
zLCAM!-rOdo=A694i7gdu@avoDdCL4Ss?J+54ZFb=Oepi1HT0@#WtGil4wa;vA_V?|
zl-Bbew&4fcBU<1myIOG}nxFO9gB~n>xO<)F#Bf2HWOu{E115NR2WCP!_R}SgHgpF*
z!ev*mW-~wOCncGe$S?k&Ohr|09O}O|p_PUDZQr^aSCyl#6aVT8up?YX!gb*XsIP#N
zP%Ij85(+)>`zq?QqinD0EwA!ADd?KVL*`WRA#FY_)k#^b#NKgIuGiYE3{J{(HIgq7
zqjmjhm3C5I_A0T>rzHm(;-m-_94DnP6$(2km(@g0%3YQcQ)B@~iAKxhf^wHDjseWc
zPRcOHNm-i?^L-j-%PL=7KBKiQGFrTuC1_t(ag+6~7wxsFXbUkMlhqw!7?X9Ah4zgW
z+KM`b(6%yv#lU@UI^65yaL+6s+#e^weRjp+{?ZHgy{T{uF&wzNLkt7=atrQDEVvbQ
ziiJDHMOkfJlo5DYHTpLx7e!hx*HP%Ayn@3CY|E<IMG4amPfkPea*`*)U+s_fQ0&}0
zp2!tCH1&HTo@}8BO-+`#iC&`2;gKV9`LrTy&3zl&XrP5qhdXqnqz`wh6`5l5oNNhM
z-pG?4Z-nX5rs?4%ixoY(<e7<IW;XJ~Gvn6}VuD!PKSS7!7|tFEsY-M9QmQitYPa%N
z%B{TpOfFuC+{)yN3x2YmmdC-ov>=%IF4S$VTghPZ+neFxq3vII=41Y}96I$Cb8^6k
zW_r679<zqtq8tbAa4WwpZl!hLV*}DUiJZ-kwnxOkPqtrqDf^WR3;31!9G*`U*R6cj
zxRtz>JCs4iCcNHL$kGieY0Q%v0UF9_A<7)+cQ+TKBW*z+Q7Z+wn<MaMr4`uS47EZR
z+|6QN)i-oEFUF82Su)HVfzXju?&u)c8ua1IxKxiKntPhQ`^(>@h}%%gxvJeYa@3-Z
z+gFRjj26<O*4$=As{8@@^WO%wm<_szF@>vhhlZMO`>DyPbpBpFU90Ne9L2ZnRM{T$
zXOun38nZpdr}6mh&9o+9ch_u_EQ87^<2DHoQA1Cw1)y9dwoS7A)MO8rpKI3(cQjn#
zh36{?t>Lh6V+79+qEfcMo@$ETJA@J#SjSwf>=1L+;Xj<N!8S<_?c5|O_j8SRKa?Lb
z;0U&OAIu51>ecC8pgu%wBF@|JSLTJFZ%74wxdz>GelIN*bh9@Wf&OX`W^C5TJoy?r
z*(tjmTBqzR@RrL0)=7~KsVpF5aB}XKf^O-%DIVUpc2gDwQIPZJQ;p&1AVn9z<m{#3
zXE4Bbu}{VTo~5y$z+E1+v`3(B@qo%->`%AELA^nP>UUv0+w}(2$c1^9LB%*V*>(6g
zP|}v2i;q22x6KjwgcQdX550MYZF+_dy~{SkL1bnq<Q>BHa(H-t?XWiFFM8s*sFo<!
z+>S(z%9AKaR-Q;b=%GG;t)3o?Q~yuh;H5C>&2Mj}w^Bsv!njCTP2NHeG3w5hzWK}p
z*3fRRNImF*n4fFcWN;wv4jGJAU1o{Y)s{#px?teKB83|#H1u6prpr^2-w;QBM){QK
z?iA!_#gXq$*?}Pcl^6NtsmKc%9OP$oLjVl&D=g&aS;#B86oq_dXoG>i1gT$=iv9C(
z>_0oDbPJLv^<Sl6|F_d(hP3-qf3|A#5c*P<AzhM+y^z7dzB^<v*gtGxe~yK{qDxWO
zYpF-d&?+TDR3SAdd&PO$lCS9(reeM=j`=&ML`^N`B1qLS4}E+}Y8{owl`zIP@~F<s
z!uW<h)H_J-l--BA#D(6fNLiQ)y->q}{+&~zs<Q4=!*Z;iu=wHYQzFMoktcy4`eifU
zQLz{H+QVZ#3r_W?4jSkU4X#xO`gUpHdORx=BbnK^(D_-rgz&9Xirl1f#Gk9my3mMV
zz5zQ{5xO7+>;alOPRvwU>I@d@gxKo-csf$b9A?TFmKtZ-ipY_dal{-=?AgLuLDI#k
za{fVF&R?-O`&vuRca$XOc5MUCteGxz<ouPmoOfTvUuU3~aM_aci&LO)C!vNT=Vhmc
z(fco0a{iGe=ZZWbFX#>x_&HI|=cUT|hjBT7*^=`vfLpYjU+&2HOZ!94uQy=F<a}NV
z*aJwY<H&hssWX@qbwX@?az4b!c~{K&S*c?FWn9dku{dF|CFWa767#q@zmdfpYn52q
zWUeexJSzpAc9I!8qWFxZn7Th>%jE8|L~*+%ie?G<fmfm^&Q29Y2{@b#<kO`AP_!s6
zazyc|{UM6WyrMWe1?&MN)Nw?yveX$&iaG^Fah4IqE}7iV5gjG%!m<6Ur*jpNoVxBu
zEY7~ylEn=r$zt5(-d;!+KR+cci!3vCWU=fsHfC{;C5xLaSu{&1N)|smCC&XkPXl1N
zzYklo*aZNJmc=_9S$t@J$l@xmEPj>(_5c#<II>t->I^1Dor1Es(<C_Zb6dO-hpQ}1
z^g|zc#E(;B^)xpweyc3>AGO49b-MU@&^M;gq?Ma#h2!zADzC@OtE!=g4cg-IT1kzM
zJZSTC?V1{nXs@!AQ5UG;iSsQpI~^Bzn54maCE7fP4j~Ud<%>2BUwkL3ZTJw~6O)vx
zpgJ#(>dMkk^{m67%J~W&vYGC4P+b{EwL57_9-@XG@uK=+Dyl*a2i2A3Mm0?135bds
z1ws5E6~v3<Al_GD5LY@N-d70_AN7LxK`Mws4F|;gDh#5cMnMqYPX$qOY>s=gqQW4q
zazI>B2@uzKL3}?IM4^TQ;))7`sHjm8#P?D`l-!vE;=L6H@qPzHuI(#h`d8HHV_p#7
zO9fG=;edE=g+Wx*C<x-asUTh&SEtJ>4B~1B#O0L$ajh4`cT+(WYB(S+uP}&;8U;ao
zCl$oY;vn8rVGtj1K)k0CAg=R*_)aQ_LJbGRdnyc~qDDT5s_TaGPxW2Xhw7`<7O1j3
zsb%YD?x}a~Eyx^LFFSJR1I!75{`2q;+6<fUB}>nxM6Zn`ITk0Abge~#ZP}>9!FB2J
zi>J;j>|((U4S0sFwAl$?uh}2r>+6O?B&%yRXnM}WAw1QAnmkVUT6Xf7T!o!LUvf%#
zCaY8Eh3QBgoGSXoaa7r;;kIm)n}xq-RM&S>Rm(M2V)V&VJRLpRinY>RiZare+OBoI
z;uLf6xt6wBQEUM&$gh6Y)qeT)-2>?*hY2n}1qX~OSQ5-@t)rc4t?0<dMOaQpJ`gFn
zAebNQbckekfUhi(h6|A}1Kk#$Pa_|4l<0`4>N_{w2s^Dyk@b2+#!?_#eYz6F%qfVU
znTx9El2f9K;JN^I5h)86qh@-YZ2&&ipeHr{OGw|R&AdH-<~e)j<z!Qn>ohyf9pTLD
z)QvKBhj(t~p!ZVg0Ni@X>uaoAhkxxZ@4NAHEBAYE=TCh(xzvxpQ_m*|>-ZXWrQNgi
z5$d0q*>D6yZNT?Ci+@0lBix1O3&av~7PacJ2HF(AIAkw(Kt-Pme=Va&T+NRA4$O}y
z)q-;q_Ky3f6>Xp?W7g0)svV6vj=`ksft#akdL+rLJUu|u!%^L3qsL&O2WC{KuCUa%
zyr|k_@C8oaG2d6wIcK7LvOm3!o0{tAj|+6~$-0^N>Ex9`V-~l6^Y?Ihv^pDlxFR;8
z2c@b@&r;8kL9bc3GyKohqkkT>;4JGOgC74FH2fUTKZeW8;kn?-vN3vCyigxBUZO5k
zL~ZH))gy!2vxBBtxZn3hobl;Ff5G{e!MMw4RL~2<^v=E=^f)}|c_?2*udWj~TC~gh
zp!TGo_rjp(eEp%v)F7afM*l7ti2L&}zh4Kf{}j}Y4f-4%)PlpN2DP)4A3{IXXIjk~
zj+S2^u5U}KL)*<d6pA)23<l+}w5dTKYQT5t?{Y_+8}xnx$91P*YV?mY(T8&su4kv~
zaos%TIwgxsuy7u!M;4)6_E6ld`#s#mO>do!-|<og<48%CKB&gibnzLZ@gzp-#owm`
zVOx69(MMsZk#M4hKBmZ9o!xDkr>(1o_OH$ILoeZw(5T=rkS$9?j|lqF9cOM1dd|c1
zcj2WyXiXLO4IL8%{B)MjaSxzbxK=*s_w%6VEa~C16xV@|6wR8={UR8I>&K~cWzhOt
zJTXOkcJwX2p{h!?U94uOB(qzCzAC8y1V7Wg^7DTW`m91{wk3*Fs<Y=9{-<;J!nb$|
zvsQhvp8ghB2hXATQ`gkNZ7l!vb(K1jLetOWd)sjx>JCo4@<pHWS9IRwpJ$!94!;nE
ze|=_0c--gN(<^!0M@FonTa~iFX&-ehuA?YGsiYk=d9ppE6z95a59m2{=S{+CAG_0R
zzSrxa*b-2t7yb2206;HCEW~un($X_=!7UtZP!k8#G_fOUcF^_cHH694Wz`XM(=SR{
zi!A*I_vw_=vfIyqk`8KD1)VgN@67E3S-~qu;7EVacW2Ne2VW8Iss4%L)2iHGgW8js
zBkJgHRXc*-zYVI<y=J6(SvCDZjftChgCTeZ%A_}_*aC74fF8+;s>;w<&R|P`a&ds3
zzG4N{qqZ4TX5X@dxq7_QGK9KULiUC3zTm~K+F`cMbqll=HcxkD{^|v0{<)?S7T~_2
zS-qjYF7uyEBUqD>{C};YSYwvPsZ(vW%IvhV4r$LE#9!#kXUF=fS7mYQ44++Wq;c%%
zX7~~s{-bI=P={CS)`+!gz%bY!oWXYU(^;CJ_)U++?>A#9H%5<Dug99eV`QjakHufX
zSQEfc!X8cVUYI+4gN-d_uvR_TSQ)I<9c*kk*uZG8Zjq6xQkaaH;nk^2JE54R^%sQc
z1X)^tcWEbtOKU_eZW<krEoQJ5J=n1_Sc^N@vEg9faQh08>BzIKW#pAG_hm-urv_~2
zH`Obim<dg$z1ka*?qQYFmk$=>f5)Nya9Yy7y7+JPx^J74nJ^9O{Qj&bA~F7~vRIe~
zw%eR0l=n?5`S$K2N=Riz*ne}67c*};msutEv6H8xK6X}}f$C$Idc4>}k1#3I$wmkB
zP+jZn&cYmO>ce*I&^GHk=kA-f02Uy~(#x40aBSzpM0U%ueV*WS(Y<JQshl_%!*|ee
zn0<)qUumQEzY*}V4}8cJxKT&Jp`)o&aG@S<3(wLm=;~;k;9A;}!EE?Z{C)|%>nX;K
zew}vEEoZX(_HQeLo(u6m-zyN%)(nykbjWDD_&5B?e(`3!SPmOjXV<HjF`a0Mn9eNN
zb0i3`ciLPgz&gjX-&~|;kHey9D<cG8C-#U^{PA|$jb#sn)7m-;27e6RxxQ*{(ED6v
zwyQyecFenK3f>L#--{Mcn~Qa^zW~o*D=9rtqxzf$06}%c&;AzQy9p~(-=df0h_d+-
z;s<_VmKIGr3g0`QrU1dcIHt;8I6r}2!~2M>MJCIRt$up5zOtQWu&fQnoy!1w4j%4B
zyQjhDfS1lTD`)0}%lbeQ{01Y<n3g#JJ`UwM--k5f;T31%d+gZHwH<ps#ed6TSdgF<
zE%&ih<$uGmf2L}t1YCSuVpW#MtsWipgZI(8C;E*z=+sx~Mi3Hz$t(c0G@k#0ZZ~?h
zs=*sSM%)H6(IscH)YIMZGxnG3MvbmUvmMO*#<8neVrgAQT=IKx{iSxv#lZw<a4^^D
z5GC|453JUYX<vQC>G0Jtsb-)HPhHI(Ez4%|feD(P8J)jdW))GFawfl4k*bDvDGDSy
z>7ZS5G|#ka4{66%#&*&#p9aBAI+9-k4cSR=#rrdKGqE9Ki2EZ*#>Xl&k|KHwA@q;v
z+Ug}i{U6mgtLfBH;D}KwsH@734H{<!ec>^KwZEE{`4N2aoyr$KM11jjRvXO9e+Lm{
z@>gen7RGa{r|n4O9sVBL?GrQ~K4oaqN{D#hs0sVIJ_^`9#`Eofmy0B@Gr@>qGsP}y
z#^}h;45Q7#IYrye(zu4#UPGVeagEi;VRd3hj#@QUH06(oHdFm7=Mv7-B`l3~-b@xh
zRf>d0YW0bf!zwyhjY<&5nLiW5DRY&j-=o(_Xkm}YGPq!&I-F=ue4kniE(0)cWYuI+
zlWe7+GG94khx*nCx{>d-=U?J?+&UO-A9_6Liv!=)aUi~*K}v@8^8X{j5S^p~4~_@F
zFGDmS9{d)R>vvQ<h~L%HU<n*J>Zs+$4B87^RD&&uSY(*uMoMy2Kc4%ZdUixzj(_c(
zK<0tI#bfs6KlM{HJcLz5fAz6puDN8J^Q*X)H>*3_U+uZUisyJ3ecFRx(N|r9+ZYO3
zmNuWvS#thPEiFbS5+LDo6>T{=^~hSj>_rU_MA-B@e?Zsf>>TT+lZ!mnRElWHGSHJ-
z8Mjtx(Aj2R82KkSHSz3{JojUVMC}fVI33?*66I%6qFp0-Ssm@_5z%}5$(*0(K}HG9
z@M)m3&%fXk?EixsMGVV)r{1t1CocHg;2>7(4hOM^;^82cR&Y?5Z~o?F_!vHt97;14
zDdSwT-r|~jEw0&67Owd|IML>sbte}a$#jQp?snPc2!}{XW?bO<QAi{fFq?1IpB(W`
z7l`E9zED-7jCdXGvG`_l$Tx>G)zpsEK$S@OIkU|Fk;gZ=>lN4hz{52^R<ZByXz5>`
zoRnoWjCz(LS%15Q@na|R5m*tHl@@2LP3H{HqD_p;{Zsgzzo4axGfW$i$FR{|#u_Ld
zqJi#Jt7?)By=FL<p|pH?vR{AvJqa~hTx#$USVN7d?4STOJgP%oD8etRqrDajY&==*
ztMecJN$7)5ZE9Qi*9`yv;(t0ahjDB^SlN)@FspVd8#vTb249|>n0=Q8{JUOXhcY3<
zEdBVHr3|_Nd66dMcN%izo~_vr%3y@%fdj>c9OUvqqE9bG`a?)0rtr5MB1Oue3q<nB
zg}tZCxQ}5XDcL_n$^JNod;<UTy~t+d-cp=s@8_ntE0Xon=H?0gq?JQXd9(ta&hy24
zI`cd>Yo*1ek6D^&Rk~&>l1+a=_c=D}zWw3pJmQ)x>qx_gDVplXWRu9YSSu}y)ddoH
z)D`MuL@MfFA|p>{ZD_GZ3C+NzDV|R52E}{b^mE=#_j4}w@XT_HXEs=CxVJ1k^L<+F
z@XS5?LA5p0T9;`o^9CZN`8kG2`F_rFi)*?-B)@7iTywTX@kc_gIh?6h(9c<<xF)tQ
z!&(j5u76Q9ur{3fsyIx7t#-As*Dp&WITVv(2!4jwjqS>H#s&Yb#DBgq{#8|ESsP7>
zV|+*HS?ue}#QrsZki!58Yj}+Ezxg|6?<}J-i?zHXCHqJHI6GOlL{ILLw1?DS`YE-g
zWtOCO`novQd^482-=<0ZA4QRRs6*=i$fv>q>7@RTqDVc|C3S3&5v7qjTz(QhUQ*Mc
zNZoP@1GP~ns+%iw|5M5zGY9adQk9DRq>4iMY{-kq_p?94pKDcNBpUynBsD0<A-6K6
zrVfc=T5#&<U?@@;gSqD9pMxPt%XLh_f4>eokmm?cW>QN(92NAXe~~U|)w3j<{_9{M
zPlvueMJ4{L=wIjJ39ccT7qsJlyre#RWKes!O!L2>+<E>PdMc1j&}te0N3<@$)h4Hc
zluf(}I(noe6|oU;@F=tEeuydirT?i@`M(aDa)-x<`)M>BHvLD+JF`EuU)cmX#p?!R
z0Cmq__Gpz7>505JeRvuN4r=FxR@5?!@p+9po&BUcPv!Lam@Pp)!Xi34sDJxE{_md$
z4aWzE@@~TDpc6??RQP7;fnW!o8bB|dx;f~5cyIvzJRrL&I8?n*uhbN?9Bsk{x^`Nk
zJ6)$HR8QM!CMMUas5C!Vi2-<`8G7&3&~2o*PK#<7Z>o)K=u0Q1W@qyA>rr-wZ?@cn
zke`vOqZ4w`al<!H;x$U8%b#?2dbL$SZ=S{_N|kDDD`VEsrK*W_lt#CzeXmVSh{}WQ
z9y;do=*ZqTFS?d)s*U2X0(A8Fe%4N-OrnDn#h9i_Y|R~}l4Z~Qu#u$`(sQx!;;xWI
zusxAvapkS`geycs7MWwQ$Zm^8-YAVlJWDeH_vVlnaIc8fy7i6IVgK?XZfhT<i(Li{
z_E*=azyb}&ICOBXrcAzL#bBRIpd7gbsxbD7LJ{_yu0y}$8l`2hZ@4hX{6Z(fb3X%F
zd(+NSq00}<9swn?)m0)Zg+a2B5}A{{{bo31x5Xi6Sse1E#UU@8RuTa`UVX^?tm;Kw
zT;-^Xt*2EoTW45ikx>^z*(-9cMVynWE|Mvhqy0#+4pJ0TEW@6Ta!gQ|pw-29^VG#B
zp}IJiYYO7oTEh&VTg<SpCbB3qa07v<ir-k8y6`ybMqNav<2r{Io;$6A`F_I+i#02#
z3)mb^I5(9OlF5~${YS12Qsjylfn5}(JCtXYr7vsHV_sZ(+ESG{P?`ZJbb?mJYgyjt
zcg{)Oby89_l32n9da{<e&#6ZGW~x-z`>z-(XgK8UE%a$E3<us{duzD0`VERvigF}M
zYih?ttD${rRj?&Y%pUgk&s`@ar*HYW0h$_4ZT&Y>QQKPb)zAFeI$B+;u7AeoD!Sk#
zm8s=F3=ZidE`#ZG#qF`VIH^JQ;kKw9)M_~H+{<2VJ<=~x0HQ9tR*r4d7W&PTl5_R^
zwf=Oa2HWcOedkGqj#3oS>4D$gL=V-P!!TS@=W7f7rUw9iN1L9Zb8@lp3_a{<dQq4|
zFN4Av3c203(A1ha3}Qw@P*l(0^sPGy|Jt>_3sm3oYB>FBdvv{qMb0uRp#M|Q$$ekH
zA=a$jBch8^MfBykh<;c;5uIHZ6VVU%hltLpW5tssqKiBn!XzF*LJdbmKP-cYK4FRI
z*HbMKHS}>sRE5duBKof$5q&&WMDK}<=w$^(v`7JHqE}c%!{)J<VJqD4$0zKghTX!$
zzs-hT?@iOsVDFM8xQ5<T3&7zf1+VDePUF2~>2VLZBL1>^NBqmS`a!aKOwtv9m7~C!
zw>I&+*Q-Lv3_Nq55>6Gd{i@3T0IHz_Rd7DlAPiAZHBdizZ}N`Z{rrCG+B`Yo(JyYn
z&p!#tjkwEaO!(foe%LfMYP-9*E{b)xK(bS=x-abR8^gYUwMJ;upP!oQ?pqyxx?|LD
zHX4J=6nohFaUf7q2rZKxKtdo#Wo()nb!lFV<BDO)OGyNR3oK)yp2Nz}Y@f=hCq{cP
zDT3jXq>|0KI`;NCo7*fV1!=#acAnf3gZvWnRK_~-KHIlU=G1!nI?e}Am9|WeJ~5uY
zF{sV-XNL`*oLbq#&Z1y$Gi)G7-mSfhSt+`uU6aSrEl)XOQZyHMJR-9pYx0d9OdhpK
zk$1pJxew^t)u^(ls630*;|$Ld_&9T;c58{R55~`^hI^u{_}UH=57<h7;RS^D;)#=Y
z=i3WQw7Z!1sZom$=ne%ZuFhtAk+WU(G+GSQv``n<8Cuvnubf~qEjoo3_>_1r<zbE5
z^^?n{a4ckKBfe{^ca#c>xa(H)Ih3^L$@;p85k8&j(fK0l7bvPZVcTCVW?pHjw6*Cf
z%@fsQ<2J0~;~|^rZb#N1NLTq*J}~qZqFYA}J`zV=i@EFU0aw<o&FxR7dQ?77ww;7N
zj><2QJ|3H>Hnv%{qYlSX`5UJi<=7zv<8Nfy?KgF-DZ;-Ser%GN7vUF&*%JQMmhg80
zaF2423;%LQ_*bKvpp4EUuPLAhy~4kHYI2;;&kZD@g(du}OJn{bG`R@Bq6LOd7yjRN
z*|mGuC+5sr4RmQ-%$Hj$U2nN|E7HZ>!^T!7S3H##TJ6aBJ?V08>HqGyc598C$6PxQ
zD9yDK0y%QN+!BlRmL4pPK$dHFmi4knE!R#F3~+VFwcF`irl|XC;(TyhY0LCTf84cu
z)M0~V`^B|;%CJGqwcFuQ2cqqTJdSI3n<FNXH&B2JicknVWCwvfU3Bf<@GL>J`vSFe
zOS>;EE532<{+OfPmsY~Hdq%A($+b(P#X!VL><e1jeQCwO6z#5Pfg#hid%0Y@%|4wk
zvVN;tz2n+lXRZGQOAFncuJem_?KV2HzA#<om&vtzMJ*HgKaq9IwcC`Y3)@NP<EZ=+
z>Ep49mTPyZMeMsR*G>^ksr({cyN#)?-BLvhOZXR9!rukJJ<2^U{F@x%Ur-6x?lrZh
zB;ntfq6i0)(83b_1*I{65t>|tU(uqF@LTsqc7IzS{)S?+hrPq>;cU#<-kezCZcdmz
zqxj~;XWB=NzrdM<{{_@%w{dfKsY$3K(Og$oiy9#}#6|7aEH|BHg}C#kCieZ3L~=cS
z6xV|<O^HJM1wA99zI{q+TC-?9ctuY;tNG#-l!g_xRoR^o_lBOneUP~xIT4>n$m6I%
zmpoC{w*YxOm163oiA1<B)<yf5A5Kx5BjN=qf&qESs1Mc66;eXnIi59%6SO;D556!Z
zO4D`$@S^qLRXy$e)(hB%D|?8$NZ|`T$YSE;x6)`akc1YN9(<uR<}X?gDq3KUiB8b>
zO@8YWE&G{avHaG`JmE+0G=F|;X1Wu!FE0E~SnFSE3IFqD68^t9!vFY`)GduNIYD<A
z;g2~%Gt!+Pp^qc|CDNy`6ZE#_1id~b%5N!xDd8{D3EDR$Ex$EG(ZX_qHdw;n1;C3I
z{#zX3-+;_}*@gcuBmA-C_P!};`7NP^CHxyoWB#IrU(v!R{3f{#nJ<s!-!moE^3PFJ
zb0VPC*81<agnvz$gnzjs{P*t{%m1Je{#cRHZVz*a<rnfevhR{7vit?eQ`qu<Zdv|k
zZOg9+ri5QLX((*@Kk}?eEdShm;a_D5e-{8RTKHEu!oR8#mj7WR{4vX)MvH+Yw6KJK
zRcXv$wD2oh_=LagmVd{T)Sazs<HEnpTL0sg@Gmcu@UL-%f9Za){7)O<k6HfhQ#=`O
zk$$0%BmE`Pr?BNe$0GK_mgQFj>x$*ymTLJI<_rH4OZdA0c+tYY))D?Cm9YHJ8sU#w
z{%t9Wa3BdSEa6{L8uJ$|{E8Mn;Ww6_`f%G-4hF01<meg4`x$kZZm#TnXIEGF@6>yx
z9wEoM3%CdPw-!ZqCgq?`D(MT{b3@ElI^8-Yv3Z$EGi=iO#;S?kJ2^@-%<dJX8NQyJ
znr0}1<SfF?^tEC?)6C%0$tdNm)X6GYC0dU6(pd&kH`c+bTRrS3+D543XqytLQ&`(P
zXla}CENugF*&Q6^Y>g_*L+R9a%(dTR(wRf$Bs6nG<<bk&d1G6gH{P*$W3I&;AD4+Y
z&g;b)3Bwz2?+@O%#LF8CQ+Y$E<M2j_)G5pxPguP1_2ei91-Xjg4Y?Du+PPpxH{FWv
z!@R=B3#|6)=`GC*R-4l8lgo5aqM7FMIzrgr;1^d;CGdi3=qfMlS9oCO=h`(P9N4#8
zLUB#6DE!!&E)<2skF7nU#N)@4!>a5Kp%<p36OURa)ZsT(sw%sB73y{CbhaG8cgbSR
z_HhF1#CrNFt`(lP2ym;V6*i|6plFM+$k7T<?GLT6%*!6LJ?tSCL#X4hM~T!atQFq0
zw89&fRsgw*&<df&;6iNePGp9;*G^SOw&JVFDSku)%~tesk``+%uDI6{iw)^q;aRs+
zs>8Hyp(T#icq~<GSd}{6*{)b_*g`6GtP7{Unw*@`;;9bM<Z;+y?c^xAf3GDpA$g7p
zhRSUl)ppBIXpO1C!E}hUru^j;`(Qh3DD+0_8L6PnlTA79U?CTOL-oaq*I+9LGS?u5
z|GDT+-MwkZ*VFkLdCP-*sL;}8CN8ar0EVgAOm{l^;6YHng64vUtfBi2@Zv#Q!0$}~
zzFkwt0lq})c((CO>5{>@gQ=s%pPgRQl9bPJSPmoo1fc`-3r<q90&q%oIkdoC+4whu
zo1IK3k)`F66K~v*(zzzuplB4>-QQ+Rjt*XbW9fid={mr}384<?)CUXNXt$#Srl;zF
zZtq$!y?W4BYMHg!9P0oWS(`q|I)I-Wq{$<@);+`H$Y_II8jT9O4=Z~`?!!fq`!Ga^
zhB@_Ctr|wK`*09<N`vz-5>@#*v{FYam0XstISEbFCkMgNxbA1N`PB2opvRHuqB13W
zBw*-AZ`Yy=Ii9Z09VX{Ea#PXo`;x+s_Mts^v=)`%z0ufY1ixZr&~j985I$wrskQ(l
z*A!G|XX}%n%g;pmjOmG{F(V=!_4efCy%qjuf4V%bqVCqttJ|U5R<s`P#8i`*E54iP
zudIzQPEFQ2_{n=h&TTbsd(;Hauuad<Dd8(TLytx<Gc>1f28A;e%|C>p9*%31@E;IA
zjL+KZ<6haw-tU^6x=A7Kvt#e?C`eMzg!5HcGd=7`_3f3FYEke{Xyv5}vQ){s10Tqd
zT!_n0x9e!a!S4>RWY?}`(x%)Tq=~{jkV98n?!ah9qjTRJ*658vGb6bkEy@MPe%T|{
zpRBV}Z%j@;1<$i@paoh!tl(}b&p!1IiXB1c(d>vw$NFE+eRG@4e}|l$$eA;dE%aEg
z%%~{3TQWIyYuR$tN-L=rIG^dIx`Rc{bgb^;Z+JwJpBxYo13%d<>6cFSw4}F{M!w-%
z?Gj~kc6W|#7mtKH&>rih?pSfp)*gbtQ8x3#e!9hzG&!!y&PPc1XchV?vBv0%)xI*_
zYQL!&-vWQfiILU5x_9JF?m96w1DL;f&Cdx|`=Z`<yuba#ipTps=*r&g4JBFaD?RMM
zbRI-Pp19R6kw%5B_7=-(?>;dM`iGDr8s=2cYVVs`gw;N~F3D=|E&OO7nu|xfX0>~T
zMqBj+Fv)A&XM**hg<G?_U9f7Ejy(=FE){)vVzONqy){o^>KLJw%uBera3PwuB}W+^
zyKamS+jV|>3*D!f)bv?iiLgJfK*Ca0T|WKCL)6eCY5}MMX5Ms~K7Dv%vMb5Y4Io*#
z6IHAPrP3FkuduavTJ^qP^@^IIJ$a(BKZAu57}!-*I--F8pqg@IhPRm<^b7c|#u%r%
z4ama<<lIkVD8F)IYHs1iILtTe)#9!l56m841FLlo_O;OC46~YfEuFd~OhPV&uB&)d
zxyj%k+n0DH6@Q_KgMVR)coHiX{&RXqyFIHc{1u6O_{0BD_~-T*_?KDm4-MhhG9D%v
zA|Zb+Rmhjc(Z9AJuX}{tpl^cZ7TW9x`L%H&w}QqJh5U5`a!kmdO9fdL?tol);f00#
z%HENX-)@1dDB%M+67q&rl=teJ?$!0N{S_Zr$Gg{hfaWhYLd!6La61nql*?jp!29A8
zqpirdEatp2-4Q85_Ib!w+UB5tae2|_Ap&|=ttvbSU~N6F^`Ots4b;@AEdw<Q%l#sY
zw0~AC!E@*o^5BzO6U(>vaNO9S^tp3>|1qB;=l3<z$~eOFPK>gkT|n65Y8oTXri+Jc
zr8gA9W<zmqWj#$EGNAVi;;htjIScW}QV|#GIEc@yDF1f~budd8U}M<LHR%1331DX!
z8%bO7U(B)_p~rQ83*$VoI92F3$64ajNs-W>)hDt@UrkDlK8hClZycfj6yATHU)LR#
zFD!6wAEoO}R|ks)7N@d+P{Uz?5~)#G=pV9#{#;Aw6?tMjks|cBqznC)IKrPyiiExk
z2p28%`y8R)GpVBCr67KuK|Ch(x1=I2)Nv61q@w)aNs2l#fb)gE54zlHq2FZ*{Q<_G
z+atN&r5vi!{1Y<=S0^@a=!s_=wMtaQ?Z&jfX039eCFO6FB;|I_jE8Kc`F*64L{+}f
zYm;(2Ds%~AjdFPcTA(=~quR0NkU9!4&6$6qCwa(I9jK|}@W5-9gmi&A9;t^}gk8c|
zb@Y-Y^&d=PKbYsx$>dU-Mm^Q#DZ_aO=)j{CR@|VeMJM*hsHT7QSuWYOTQki$F*&`+
zTZ&DzJFWp<C~bX3XaF9vjjrP5=nF8n7Ayay<r{gxfEIb>e~w4~`ME)wHV*ccrOgnb
z4F(sZuXle;L78#%bGR52-+{?@%LDj#2|I)5o`_2g6BPizrHMXQEFf95z}oxTI4R0B
zF0r)0rjoRP3Gdkb<8j;QI^hF-3FC8<Qd7^C-7l?1fXD1Xixn$`nMO+woa@sA{NNym
zNcINd#z|2N{6r$9R|*`VG9F2l0&n$>HVC&VWxx{|B2>et-bxva;QgY5q&Ksz42Fn$
zK&B0UuabfRntr0Eyqz<BjWoT_OyMi~qnwwcX(rmE$1BDQJa#MH+y~cjBEKUR;c;jn
z&#kytUaurRz-R&8=~W2RJqm$mIWSJ4+WkeLPNtAH0(_ulIPVL?2x!mX`z9Qy)h%N4
z3krp>**(gl#BIdKlW?9mslO8&peDLV(aVYc@3&lpNBTtD5D%m)2agd9w;^z!g_>d;
z-Ro$FRjGEuVv5r21TQn7hg`O>l*7l9k~6;i+#pRJ$4-=;Jfbw~;2=gp*M>=I2ZARt
zMCgT2Jyiu!o?gIZ^876*g5ZF8+H;sIx*<DDd5P~#0_P-gfar$H;v8^~<t990nTG-#
zP=uQxD&Yam0HP9jG>=N?u4w>a0$S%{Z$*Xgc(50hAcS$SuPk9YNfAaNouCO0Gezg`
zCi@6c!e7xfH-t|@bC~4hindNl-rC_WHbNCiGSI3jyAcoY4u?PI@5%ZiI54bzNuMbB
zxqMQTCVGaW1+^M%0;~&LKHf5UO8JumFB&%B6*~E$tsXXDk_^-&aoE6+WQ%tj#6uF}
zDRBV6Z|@{fK<j=kYcjfDhyCAg&~1UOWbPIZe*N%xBi#}g<{MbQrs7wG-16I7=~*SW
zW^3+-@}_Wjh=5-5qO`?>5<fRkFI=5qGNJAb-C$g`S;Z*v+dFyTsuCnUVONLkH1_0N
z3!NDm75B1YiJR!Plaj+Y{&o}H7l-s>%R;|zS?Kxc7TSYzXrZ}8g2!y5ZH|SWmujIc
z9bJ-z#(+rI8`xucoVAlYK|IfNkfx3!ofliy>wU|56`+o%M8FE-Z|M{H?bj*4o#!w_
zsD)2GRsE8Q?35B##{zisxc@x-gFr0GPOR~u&fjjNRf-_a9{b$VP%p+W<1t%lyMy{g
zC8ItnD7w|^`)ZkCKQ7BQukqo|a~-JZ<LuJSEe&^%RM>kOJY=Vk2cM*~s!`P%=I`nP
z?x>RQ5s#wgmo?DpIKFc#I{y~hsZEU(OePdRJmS%?JmUd+#ty~jl*W-BiaUCU#~d6^
z*{y(JspAT~W~H60a<g=0LsXZ#I)(Q2^oXXt<>Z`O8tpx1-jsrxqLyapH%^Nh%Xx0x
za$0dtiTmB38c|YvAa)*fbqcQSni@_GWr@`AYy}x=nBD&lp$0&d!dO#Sc>sPR8=Rb@
zt45|f5~h_NKw0|g>9IJVXO#wMK5LpnLOJ&FD|oILu$eySllc+H=QWTb6>C9*{UYcw
z5x?q@-4W^dl^)9TwA-U;<0rf1@hmGN(**;TPyX9<5u};B6hUIFjw}|S?!9vlva#2B
zVD>GQP-y;7{4gRrT9+&5jtqLD%$2`UZB;9;@Ypc^UIPbS>Rp)?LG4LlzG;?~R_ns*
z^39k*M%TUW&?TNca!Zyz!ahn^Q#3O@|7eL`NqHwll^Xh@kCeXV#-4~dIBc-O!v;Lr
z0S=k)i=0iWC{5c1Qu!D`lgd=X^603B1*tgG=%`ryd|;OMpAPshVE|Ma8T|WaW3J5d
z6gFt24TcSt2ep3;*<gml<WZKXfR~of2KnXBBP~8?&C>2Zxw#=9OfQ-bN^)d1AAF_x
z!1Nfke46Dcd@#_V6G)%)(JIOk6`)lY_@GmfD&m9TY(%0Y<o8;2vDtejz_BY5zGVWO
zFLLXHK0B4n_s<Q%8ytuC>PeoX4Xh0tVe5HJ$K%~KF)B&8sBg6H-4jzw67qQ@UUB(&
zAEj@V??&HD#AURs*IVhrzH0wU?_2DgSnR$<Y5c;&*U;sCdEKE4Y^5fz_J|cfH&D~d
z@yAM}S3aLa^fEP@7_d`_gHLtp_MqX+EZ`7*oCT>0o>u?dEd1-heq}oBZ^mKYQ5x(;
zEu)n#>YMp_bSz@~#I!ybmTBk?#8(=KM}aubJ&{*>G=e}Zv~eKbF)^y)=z>UiXk&sL
zxMrAHC)36{!)PGZI}u^ii{BX~a4{Y-Wl0Ch9r00Tg9|(^8)r!B>619@TP8}P%6|i7
z;Z>-aHPbapj*Yv$1v@`wtTo~V)X+@^Q<h!_(+fP9@^kHa;SQ!-Cd#27|H3^Q4H1*c
zu<o!D9nf36*}~&>2r2N%^|c%E8}YSOFa_V0e_<l2c{RCUB>#fmnLJ8eY7UncKjTi}
z%X)=ZAJJKpQtQXRj6?hkLkt_y<y^91R1;CP(2adFKa0fp>7rHjN}Cl9rG2OCY{deO
z&a(7%btvfZTjrJ;%;gxmW!ca2U@oDvP{IkZp3#{5DUlzF<Z0#TKogTfpHt&q19P9J
zmfG){{lh^y(?NN*f|6@ZR4=94C2|PVTrru_gn6vI0twFbCdi>LCMIv7G5htWvlYoQ
ze0h|~%dXLK?mq+SS-J@%dH;al-bBlI<zZO82IVk$a&9T`E{D}TL=CM_3othe#?F_1
zF)=xJ%MZ8d)jPniVc?OD_pjcg<?LE3e)}L<JwAy+Hs?!qI}+3h_sEV?V%sn7v41|%
zQ!l^_+d$`P)U2BDM@pTy+4Er3`0XupyW?d&T(szhl4<UAIwCmu=pyfQT4`Y91athH
zDd^!pXF5RB!@;pIJv_-FgQH1@3}$**=S)q92;LupivCVRz=&2a4Jd>&cPRhHP9}dk
z(c@bSo_~tt`Cvgj^UpMxe1eI%Lp<;5YacZI>qO*4iaNs69Xwa}W&IVMGhJZxQI<aS
zC?wIvAt~^<opgC{ViD&|^C8VkLhN?Wncme`N7n2j7+4jU+01|IuY{rM*Wye4(|j|3
z+C7nP+fLfAXQ{8JIT}k#qpmKfQ9U}vXjJA!etR=5R~pq6?%&TZu5?SN2h&A!w+Bw1
zZ@Z?3BbjBRMu$)X<G9X~*04WdC#w2L$R*ByXRXdHA9!p6GhjVk9GAuW3gVY9i}{SF
zWwDu7I<mMTUAJ1+v9!z=uc(F|?dw1E{ecHPezHBH1%9%H@xF<pU?j7M^0R)YIA0j^
z^^+xx$at<a!pJ_7xfV|g<6(KixHlBWcPHj{q{!l>aamkm5X&OHn`T<=$l^WwLl)O2
z$l^>xjV!(EK~rQ=sNu-s@`6^zqnV7AG`?PkPy^VxIcxTE*-GLxB=yeCRaz|Rls-q^
zsGv1Vp%*6NNWWjn{v|G?i!`u)hV(4O#4eGNecEw=ZY<hhmL{YOP7S@FRs@5o>g1h^
zj9*AaRY>6ktc6KYcr*R-zR_m-of8dhI!Mt*!OT{3C|mgvN9h11Gg7ilRXEJ7_K33R
zQ<Gh$bCa_*_$T2JI7j$b!kYzdc(YzV8o#6__XGU~36PeAZ&>HPVUII7lecc7%z^sL
z=?&b(>c6StGaYO%l$bi&<hl*QlEji?H=d2N<QFGI`O$q=espGfe$>OK`Hp%s{mn_E
ze1RA@FTAp9KN5Jen2XzI_0!o;NuyX@oo@4RrC1K3hQpOEHKHAb0@U!Zs+C(=)i-+C
zoQc>_fEEGh9YP<5Uo~_xMuih$l3U?R-m7t?b_f1PXC;+m$^JGU_FGdH+CZB$?3TN?
z_k<`1*ag^&usgi07W&$Ws`nxb?T5X~Q8f>N&cI&S)2J~(Q^PSadrK37eD6W0SB%|>
zqBBJeOxoEXGu7GQxsAKE>L^0)r{*)j6{&LnR$T74TT8yIU$igs?g^>;B1Oyn+<v?b
zV$!GED<St+c^Tk}6mdWwSVav-?p<m`qFR6&h2{P^%S3&4g376i+$;KoEK#c5&rgB9
zf!>SD{cD!ocLDaI<$iuYJDc`eD(of7{Q@uS^HX3SpsC@={cEKOLD6!rs1bskmsK@#
zPcZaSzzmZ=LIs3scT7Bw%nxN#<de|toQ>3cqK#)S0!@D7vi`KS%-bwkf3DlI&O<iS
zA|>l4;C~8iUR=aqS{_||(Pf4UB%Nv*{n;Ka;OE*kc^u&{kvyIa6}Ui>2&toY`$l^8
z)f1vHMA0i`i=phcz(3U`Y2RxWBcJ(6W4@CA`h?^(Ict=95;w?6ldrR+y$iq>DecX4
zt0V2}xZJYrX8s-nv>ZRH&C<!>FJ5vFsttF#DUCMmnl_H?uRB3@%(}mO(jjzEfU?7H
z0QER=D}7Gf-%{Xh(C&@{@1qrkw}ozV;C-|bPTYMlbkXfpquV<|H;ps{G-(`}25Gu~
zVu6`#FjGNqXeVrqMM_*3?n8$kn6JaJYbJjC=b53g8sF_HsMpiAiarkN%PiD4SlayF
zbZuU=P_J=NUxrg;`9f{w{7SUpPaD`pdyh88Elb<kdiWyPc1;fl>JsVU@!gPVG3xv5
zevu>i$O%@=1`278;7wd}d8ih5RIxZQn-TYm^R@YFDZn?-^>N@AR}}abTJHeAxFW!B
zG{AG|PY?LRVW)*OatzSqa1{7r%d6>v92D)mDRN+zsX-gnBDu?+1)3TUWw|%CEC-o_
zwolZawR3*%86S_!m2$T$8maTNcp-XeU2Uy&yTz0X(k0&GpoBH@T;RbYw$fH7SVNJW
zlZhEl-<)n`X}T=(JVroos8#jIW)5IMahhyA<B<e@a3Bde)bSkrTnTa*fnR88)5Q|3
z@f13RKKLZ3XNR-WLfM*j*uJYBHZ;Q?fIm=fp<!y;a006Tl9U;L5gTQSBsF$n!=;vt
zZ?~pX0O}%?I}h1Tul7ULd05Rl|Ae&4H4AfxAFPU((Ww<LJY<&MQmf2KEo-gaRBM9=
zcYY2VXX?2+yNz0TsbiAr?3nYALzy~CvG-ERxC4&BFTWL@q7S<z`p(XZjn56uSM>M8
z(@bIlaoZnN%XDg1&#~lvuf+s^N|*Pdb2INb;(ktQke+~iF*(`2YE@x-)X<a5()}LL
znHudR)Nt&(OO41R7odh;`x$Y+(<1HVBJMni4pQ{NCwI%1k8w&dg%bQ<i%%yt%DL;q
za<{8aJObrztXr8MQ0}&?_oz@I+;c+mRv>@1ksgn;!gNa?bb%E-`oQ9N4A@G06eF0x
zecyzN?$q-dYv^mk7+gf-^v%7;!x;SBKusHmF)B%$PN5Bk7V~e8>gCt5UapOQUauYP
z@8enp=BVmu&+w*HUGQR@0Y07(?ay4$KguV3I-xvy+L`_BJnhHGos>O-zo5UfKV#{F
zn@&h6nQA8?hvW1WB!{=5&O+{ZuN&&j)S3FBk+Z+9-#U~RGOtz9j1zM8kY7;-AdppA
zWe%F<DT5oeGB}dawv@q)6HV<azZuG4U+<{)^_o;=up`b3Z%&9dZMwh<MJj^}6ek#E
zuxo!PgUbwO#FW7`smefT<0ykl(x#IXZGd2^GRO=K+h~u)cXcFy8Ubb&v`QbYnIx71
zI6qY>h$(ea0Gk+`)P4*eU%)QbV)kyP1<c;zKFy}M{C7XtzuBNH`?FTb^Z6c>d1~z>
z3wOkS(}Y5eL_B66dveC?Z|jG;XCvxwO@KpcL|xGWLkr4gPe!YJ(>eur*BG7eqRgQ}
z8p_{b?tV9LrXrGq^7`_jyx2i`eI-!7+l%ry6Z~oGc9MlVD6cOA%J17KzigwdXwe0f
zFUEI`EZ6JYPAJPm@}2$NsVL8hqrAF&C@*(VUR?>4AM~QUI~8SFxP$WQGNAl7D;x66
zgz(}T1dpNB=A^Ahl&Lj5&yeHJ_tmPXJF`;n%%d2rUWs}wO-B1GE~c*Dt4?TE<(fnX
z(73Q@n6`V6=j>2_x;BpdGRvI5Zn<{Xr@M9@b8b|-*=A~@HJS$AW)0gtknxP$^o$)N
zTjGohr&_P+A60TKonR`t2JsTC-1`s?$Z`D@*KJ94jCK80Fz8odnDxd4PrEWUEcLWd
zV__XlSYo-^ulJ9F%+jz3Q*7qOpk`XDy({rBmjHUyM$q)k=oEiDA@(l1E6TG*P-Cvu
zHV^7zbcHq!>J_F<htLMdxOP{gjKd3z@<9B(dT!9db<$C}+iMe0ujx0c*T;cgTxp=!
zJ8twX6N)|m>jPW0G~+{MVcs=<sBEJFdoHNt1l6ylf-UrMz^*WTI!Mtc26h{MBQ>}O
z0&H3m^vcdsxOWQf1H!N=OR(n2%?L%h{~QPXrm|{UD~qD6e=|Ml2se*bS>fJd@Qw-h
zOR0DZX&k&OOPUTLO$4{Dbx_yNWT_p=XyZ(~&5Csb<&&VD@kjk*@9Zx`@1rLn<6Mn2
zyE$Qc&7_g;3DX^YLUy5hCnTrb_$7_tMxK%5cF(k!Wv^wH|I}@>%tN-)$I44HarTS}
z6-~kOkTvwBmtXEp;g^A$Iu5_gwDe~esN)ICtYVovEmB`TfxQ@>dnc32asl%5aOMW1
zhYTByQ*d(Sb}dYUY|(&alrb@vlnn_Tg_{r<c<o06y%<*|ACE5-dV4k)jn;^cAJ44k
z)SF?1kH)7lf)$1P3`P20j4;9l^*R+7Qye#?Fv0*$2!|0q9$#W8ZehO8!W@F2F&|8d
zGJsGVg1ox9a33bjz1nd&Tfly*&AD^9@u~^_Gqw%1D~|I{jkEu#hQ|*#I2%8_h2}8M
z%#gQ+VDB7X<P~L3K^1Sj6?I;f9rzl$!hjv~3a;{i&Cd<c)NsJw$zbD7QGc<N$H#{!
zZ1OIfEgyf+_^`de;E+Bp+Ji6`bZ9%cA*i0OPiOqh2)P4zAm210xg0%r_S-L*;OSq@
zTA`6XQRqAUtG5)C@*>dZx3|(YBIWp8MVr-YE9M;VfC1g4V8^vpEaOg-+6z3e^K%0=
z8JsqlTgm|Y>lW;93+y~zrw|68k{hBL#r~$jJkLMSn%(47V|kc;n7ThOn4xIonEed}
z^_eH9YRdhM`f8?I>g}8=?{b%YPvtHn+p(PLzVT_<Hd(lnhbg>pk43X`s_*rWnlryL
z-cX`LC;^P!$d0>)>G&O=tlhD%$9n{oqvi&hrBSou_I2Z<Es@*nqwTS0p<w&?b+-pf
zqq~uSz<@1ut7D<prb=(OEi|vOhVC)I#w_&L9$@*o0h%_B(q1<{+B)n$ZLFhR7;v!A
z2A^E5owq+qJ;U;-yz65J<IyNvo}jef^6>&|k4A0W9Ho6%`IPp9meRg674~iy$%xY4
zpkPl@+HZMa7o{yW-cj0jl|gCGu8;C@Yiy;h2m=5rqqJX2RoZ{n;%^;eyQzFi`$<P>
z-&6^uy~W6OOliO50hzt_c9MlVO1tpF3oGrp^^wxP&r;fo5*WBhrTv0WP(^9~MWbdZ
z?M0T-e%?~rOUt6PpLUe?jj7UGCZ+v?0XC+zU+@4cN?T~-DD6d-((XQO@|CutO$cp%
zU}f4oMK!JYw?^``tFoU4jci&G!r(`{m+NEW`6yG8+a<iPGR_P0to6QQ@xtYCUKow4
z4fgd>0@|G4%p&S+RTH$)lKRbP`(WmGbfB_}tI~fZw`SGRRAyAQPP&nW0t2?t8xBv+
z9gogO73^ukqu0=ThN*VrM<=QK*mzIb3_m!4gvt(ARfNiSI#ixVr-qe1^yv89k`z9;
zKh6giSvsKrAB?p4pl}$b@Y~_wH)p@#_kkBbP$#)ll;9`Saqufl9W1kYu0<G6JPr?+
z1?1lPl|eHE<j4HjQAo6Z5~jpfj8D##@Z9QYT^zYt#Up1+uz!N8V`y%)61~A9dKg<;
z<`3f&7Y9hoP?S@(BD12?!i_;=ZkVN&wA9awOMTaWHIM!n@%GK*Jw@G2p$6KjDP(ym
zdybESl<xDkwb6?KTWETNUDCb#cr*zpW4ombf<b1#i3??TjWu+BgH9vH9GRO_?VZrZ
zab&tcn<BUev|;vs4(uK2|7g2#LCWkJz-hc3Cn@lD@yHc7JK=3q7h<Hr44tkm<P@DY
z$M`fNOMNm2RZAVhDPq3pk6^n#=vKlIRA+x13hdP>i>jxc@kQ;b<f5ACBc*H2$^Th=
z)}@T;4`IMB)T&TkF6YOrlCl+Py0u;FFGsglNPh{&EszyE8T2;t(A}X%qBzbs;uz~G
zn#S-0G{ZaM`1bKp&Y*y*vKij9y<<{i+-MYB+e{Za#MsqMVoZ#56F0dR*0|gdBc_|u
z1f^XIibITDl_W-o2nxs|mTf45hhyLk_)m@*v`q?n{*lF;mXFVTJK3-1R9XW)u7%u+
zv8ESqmAmFosRF1L+T(as`;O0ys7|pv-Hx+(oj5#DzXboI0o6f#hRpGa@A1h!KRG~C
z$dUUlP{?NW0)7i_)7Hve2|fm|Q+F;8V|F~?sG2PPLjbe)c-9Im6dfWI>Wfybno;jK
z{Wm`IKa#;`hOehxaSgw-cnx0!_^!jzOmmbFn|;e2$D?hi?Qz@@G(j-AzbYRbT!4$5
z)P~15)xLHQ;l`!h*q5l{t}s%~JG&0>SEZQ0c1;{ds&`gQs+DLA7ST|6vUM7k<0&)p
z5>;mD!tt3S4EBna>bvX#n9c#ZJfb&v#B6`qRj51koA#(J^7yr2=lL=+oK9+_&*IwR
zd5b2C8=~^JEyt&((LHKEEPvxuP&{TUEo`v&x;Gx5cGAdF7Nu=}%3k*^hGirv6FTr&
zI?uy0{M<lI9!FJ_ojhhwbeEO!*nWK29;j33h3Uu<G}A?2O3ZmrltrKKYvn9({5HUI
zJTV-9yNUj$@OM(@&shp%aYM9?QUL$L(YKml8(lB(^Q7t7RPe2QdTHSE8UwmjfuEF5
zKgR<;KQ~Cz#>uBwmNr9#HW)npQui;C6@hTX-U9rv_xR+PUid+xIfow}viM=8#Sd%K
z^+D16aHqu&5AF|sxZleUdl4`vWh{jsgf<R8lt>%D-(Zx%UTf23<MCVs&2tz+idY31
z<Y~hozr|P0d4-wkfPZyXGl=x9jO&YCsr+%3qMgGZcUb(f!Qzj5%fufKTl{hR{@{-%
zy!^2%l|O_w4u6zLo5K8Yc0;r&_lU(GidcpCgAT$$G_LPKXWPT@pWWRQ*+}-Mu^rnI
z9E1;5JJFCDy-n5cwyLM<=#QIqe^Q16Ez25$-t(jq^GM^X(&NW__V3xU_NRLkk)5ON
zmmM#KMgFS{iuEy?=oQ5@Ch>3?j)Qb7)Xs}8#i@8xEx}ynAca-o5s)khkgQDj;WoW~
z$FnHk`UlDSF_8ESL)h)aVTlg>$Hf*c!T0c~8rSg+=idOt?4dMHnrdaH1)Z7i^$DbL
z(I^zwatj-TDQiyPz?x@YO{@AX2%7oV>w`n^UnVPkYLSI);LgaI>~8!^s*QI|Sbm$O
zhmZGM6T!^bN)IT43OCyAyDvPRd+Dd$aZ!Y^;UNanE>R|uC#DOERVSs@S@Mu|^pRRs
zPpQG`#ca|WIgqf|&BGox;fHa9yCw-ojD4mcZeSx1FfKfvJDR7F#8cK{(lDkZaX6Di
zB_8{!Mi&S1xtc?y(FIuLU~Yun52*2Ux~(qiml(s8SHaZ}Z)eNItv9MrqB|^iXfROU
zPf?wnYN$n*9PimJVYaNN_Z77qrSd9HWr<O(rK=mGXlB>A)M&=TP+>F^9p<^T(T-+b
z8J8B#SPrd^qrxK|;TKHG-AG>zEo@|V2(NgvGM<-sn2E{LuF2!*oL9$1+YwhcM$Uc#
z@)R~uTPy>$dz@*1+d+z4n2lRdkXI^{J1dD#4mNF+q+zyG)_Fc`htG3sq`itS9Is`Q
z#TSdMg>Omc3y)M;d+SQIw9*2_7RCT=LZH!|RBxXBVP7zyn-v=%u3>I)iaXEuumL|e
zP*cP4QwmbU+e%?7w^EQRVShQr-gy`0x64W(Y#+x;hv(2K<iRJmc1T6C>W-S8_?rXp
zdB=Mi?DIE~$A}~Rd}#=KYGn+<%*i|?(io25ZyeX%!(CzOD|A(C$>%Wxy2U`w31SCw
z&?k8xnIA+VL(#{<yt4E`+JfnW!Q~ozb67((eP(2{*7dW3fw|knt5_PU`RT{!lEqon
z{B%VHN6kNO>9H<|vq#OF+D}uw+6-B<Z+YBMHZ7lWknc{fmY?nsZ_)BX9Y@PomO33w
z9qU|9-U(}S&m-zTHygFS3w)vD(#&@vwayEuj<#q&7?%=4;O3h+8{B8f{2EKE-=D74
zi;#Jq8m_}}wEBI#&rn9S&ST*E8-x1}{OF{&KN#naC~*0Wdd|VUAUR4^>oY7*YMoN+
zJogTv4?bz@z62k!jiqb6ae9|;cXibI`{Q!SYvOM<(j{t5j^SQW8pK5yZXU9gR%x}a
zY!R&(mz!C!Ga5W#Kx@<*VaC_tM@OfFILSJl9~`KO<LLCt5~ov$gTb{<XMEB8P@C=V
zHY&^L;QQ{lRI`0)9N*=ZChvj>7oo|;Y_Hb%vL8}K_v{z5z1Bb+Guxm~GQ?uGg+30%
zm8DN7Df&bZb2tcywb}lmG22UFwwH?8UKN_{9lph|a;T?k6crq^eOtxA7JdDwW44#2
znr&;VDU7|$&5-(1*DyFsG22h6RVD>zJ5trRP{&c<m8DJxEKLz+JNJw++Qr4}%nez3
zc3ev9=6bq4&I(sstnjL3x^GN3-9^Z~u)<bH?ypK^g(BkgbZ?!9tf4o&tnf@KD+qNQ
zRw$7=h2!+A8lyz=ZC0GF$c0s;$Lar`#2aaG`r2{Hjy-GRM!HWC%rW;DTju_Ki!J7-
zvqcf+o`-CuZHg^SoPKd7;`Ddbs*>XLwH`L$sSVWBaFl;RYLx2G-C{X(*I9A8A`fO$
zbe#TJy1`!=M|fUo2p8eAi@|@x5&XIP#o)hZAdeY*&?n8{3w<2SD@&hFXsKf3bQPe}
zgX2=}2Ck=v)Y2U*f4-&KyCB0wtM<1YgwNX_s(p`IWs;Tut4F5UB(!VlII6v})af8a
z9SokT$q}5(mLTq8FD^J=njV~QjI+V#$3?05vzww^^Owh^rs9hbbg}SXI;#A0PKcE~
zI6tdNr`}`1`O;L}g&Yp<1<6sWE`Qk4<>y$zxuQ?R2utuWJvd*I9-Kd~Aa<<vXQe@0
zgteC7{3|W#5}bc_T*ZU)vlZS+!8wSNtjHxe7veaIyt2gUBt;y6ni!m8Bah9Y6Pzy|
zml>X1P0C+wq|I@FKRqtmFY1B}7op0<SbyWF@=pM;Q%`z|Q%~A$H=V}}=v<AlTxIKI
zU>5s8<|hYg0y!X8mO!1P2!z31kn^+ZDm>>%j24YcO{KpQSJNL<3}R8!XEkLGk6InQ
ze_S^=Op6J`E;3qM%0I0Q(;HK@wou2>+LfhF2Z1_8q|&WZxp`5!<l^?kg?;`wPkSqo
zb@5moW&WC_%&%*Tw$8Vu+v{SAbVS(a^IF3`NUx@HLJ`@SbWRXiUtsjPIObL%@O%#^
zh@=a797%u8l9TH!cP1oHm+ZGXj46AjFI)Ee{o~ArV<#zcDbZ&i#MA-ji@s`TMi-64
zW=qnBILlx?iTmmtZ`)dEIWvQyqf{!tx@+eIRF<%ru5RK}4(6o5mgCBMG%;_3oL5;x
zfA)&~g&wf^xpqw*=b%7ksna3U!QhEbx@k9_`+rTkWj4O5_s@W{^gr4wMNL~IE1QcN
z+kJ7INAh{K4J0m(!wYLH$-l>9?I+SD-{Yy9k|R?D(n8A|UU>Alv~G9ZHuAy?E4;k$
z1z6E1DFZZh91~C?bv$|zg$3|I7<&5H%5QBw&TKvpW^!3JA-_|D>5etQFh}z75N48i
zweS1Kc`EYw>rHftqMBnBR#?2T*5Zwo>AX>di_b&0(JIF%+<RQHWfe}aS8R7YjED_5
zV-wgra4sVQb&`F2p6Vb?9mgn?NS(sGG0P(LV~RI;4nu@o_|!AY5$OoNO7Ea>NABBg
z!Qn>-e~*L0BS-4$?`5)Ew=>|TOBq!5io>u_DUUdRY&$OPw7|kRdn~ccNEg_nhya0;
zqHVNR*aM%d=+;z^!BQM0@rI&EK+n3|Vfl{RJc@%SI*5ce4s%qNHbaCq7(D&-$ateC
z+|G%{ab&iN^T8$$AMm%E=+-zNTxapYYZmX`P?DN31vzX&c*r)|=%|T>shnW>jwNvd
z1`OyGmk%t@ag&D+c%p+eZ5%!*kv1MBVR?>MTZCSwJV&0x5Fr*m$(@BpN#va%lfG+l
z=r?+xXEtx5rE%yNlm>m#DqxeP0<KL3zDz3MH5YnI1#I*{FDgK2<3L|o+6*B@8-+BV
zA0ENG_b>;#yw+S$oV&uPfMxi*slystA6-twK@T4(WZ$XUawQ)+E@fM{iT<hx71-A_
zUT|EY*1$!020V>6dXsTiGq0snmxQ6jJZKdA%yF4>L_Bs6`dF>3R*LxR?29WO@~{9u
zH%L>YXV)m=5p>H_Tx<#Y!q8J3A|%4Vy;UuL9{gdm<nVY{9{j8<!t-Id29-a$<2X-&
z7M}`fqNf#IoNG(JIyOr4@3ZDPGhOo+5j^maZS<KFJbZa<MS}+(GD|a>Rf%=NwWW7>
z(C6m{Y3exFmX=7J!Yr}MBJ~`_5<G_?OfD5X$UUY$mN?C5grvxFk2r~2l<9Rc$cucT
zC9_f^y{x!{>`QGww7BD<=4caW_pzmH0@<M@57|nyo9#5i2NmTG3>eTH#T(cmkm$iO
z4vRdz!4n;*Y2z4&5@}PIHy*cm<C|kSu;e*(GO-kIxV!u2Fop>%%^kNt!!wvK{>|_|
zb#vtqV3vA@TZefH#hggy>yoN+ZOWYV%Uyw=oQCXwJ2pAUU=`Xx=O_X@8_JI!8|^~e
zVlmOyI1?4uuA@e7bfP^?D@_)qHmj~`#ceAov#SRQSv5GbTU+kgg^i*V0bA%chAp&z
zj~rWM2E8QTR}EQ1_o-zj?RWp}*tGPCfaj!7$_}1Oo@QF&kms>t$>S^fOHV5^kJiv~
zFL}O7Ay2#J9)~<@Ed|*H-Y%>Jb~Z(~n(jE(B;y7PH(?c$dJKEh=)?d$du(PTr>J<L
z9YWji%evAZKOcZTJWb+f{=4qbA6~<erxaNAu$Q5H%um#N`^(MA@RGt~k-LsfsWNG#
ztKwpJhb4CFETwQ?TqzWhz%~)5$-=eL!wy^Bek{%=6y<hyN9>+(`N|49L7$P{wB<le
zA18%<hs9W3ppVBrhvhYC%h?vO*C^}4ywb@8(;e-&_l?=F$g?gqSRXD$;fa;F%#4gq
zg)qa!!_tR6YMH;?L=Py!If=^)t@ZD2j<&NdO6L!cV1*|^xvve6*+#n@2DzYEy;Bl@
zV8DPrQi}{-4~sh<J~lORDb#U%j|(m7*=+$@8g;Ck#4_t;S1Yx{)EXkx!Y4OzS&!fF
zvEzo^(ZPURi@2QC&#RsURk`nO3Hs@(a|uuQ+q67I#n#gXttqS?oHI&Wt0zHWT4IHl
z$2QY$<%F7|(i!7oeg3SBR9H$L)tV2JE?KG9ynSUf!xGTIIVF@vj&@BB$KuRz1S4|E
z3qbEt42V*dK{krBgQ;O{Ukp`=3C>gFMwi1>kf~6-=U87c3Tu;kdR*gZ_q8i4j`<qD
znf5pWy>BdUMa=5PX`o_`4}5KKotuyAJt=J7uF2!zI=#ZUD)L~Kde2gDRc-^2{ZXT_
z<#gAvsNzZ5VdIo@13eiB_M5R$3ZWay=(cTgAbCf~kr=RrK5e!OP`)0EYbkmx>;|mq
z4k9nIhW0h9W*A8h^j#iY`N07Zb?}qj6XTn)+z){F4QnA_B@1mniL$$Of6l>K&?(1a
zs+%pdo<Xx^7DmAP<NiVQUv(3hqhV|6>i#*X%H59t7olOqf6E+@)U2+sQNJ+NsP9pn
z;u!U<7N`8BB{J$Sj!jJ`6mQh8Zn0ynEn_PgYk{-@{keq$$|R$HVY*Ql>NrMy>)0qx
z{!2^5B&AWuGV0qcj@vcX?8*!wMJ-I<HR{|-#u)XMT<eyg$Ma~=oovjfd!#{(z;s0%
zr&i(d($-kKF~7NmGfKvoKaSmyysF~v8}mEWvY;Gd#ypK2?Ih%Il>Xz6Kokvm(Z(Fq
zD8iWU8=G#-XJ{NPW4@uRIL3|nEsh|s-!I1eE`w{#nC~0w&ziK8kjKGwLxpiw<bj+P
zGUhOsX~uk39N2ZG%{*U4=UXb(Fc;Tywb;gd?S3)l_q2G8`PUv?#h8cG0ikVUzRoh{
zUC`6{LYq$_+nC?lQk*eADrU@wTE;wScL0|ys#MMv)vlVdc5|zD$9l3j+}XN`E>+x8
zWABbFA6w{l$$ZApO=p~doL}EYt6Aj5NBRrWrHnxptr|O}8@^t;3(v!5X{}mSE@8Lm
zT@QEgbAvQBoZTWt4Yc|4-?izXhP78X!!rEq#&TH3!wnJY;8W_x5VhoXD+>0l<6}Y9
zS+-EMD+TfFD%B+}lQkuXTjb|G0evXBz|YjPb&qII@dRAf-OjJ6%X|x)%egUOjPTmn
z)O)<IkE@l%?24s!K+7l1YBwA(8PH~0-;x;>S@^|V^i^gP-%J}dtFuR)m6HZNaC#-o
zcfRIf4fcE5DO^8(vK7bTvD^j@m$&rE7JtEEvYL<tZx+PRih|?0D8bvD<J;Fq@Fo{}
zLq=^LUmcrLTv|^z2yio|n$~r6%UEvaS76FM=%mrquXc!LQ&Vov(gsX9bkJ@o5<YIO
z#ASl&Nt%&40zdbr>1UWWYt8h03p0C7)(6?EsgRkK;}to3_1yqWuYR+&`VO^vhQ_HJ
z#?J<@pcOv=vVTG!&rw19C?%Lx*(VULz2FhXEoqoG&@EaRt&G|Y1%)x+RvBS5h(awf
ztGCdzEqsNIakp<M6PhjbQUNr#q@Xz<S~KQo3*Zgi5x@jI5q3Y@qC`z^$8ZJD0nC9d
zIrVY0!Sh!xo{uB!uE#}NoDo2uMm`lH%3AH!*5Y)hGJjcI+^qgIT2%NH(b&87U(tF|
z1^7`9rU%%gN=n}F_ZsNoI4_)M`Fo#OA^C;rA$dN7M7`u>f6w%fZ=rWoKxR%7oXeJ@
zsEwHJZ18=e)`{CDhH{3wN6u<$`oLKK?)3moAZH_{L;~gO#E3v<Bj$F?(qB0?>>xi_
zD2930Df2yyckhwP5r)W&YzlhjwB<&nu<Xw))0=o<v8d;mQ5{~T_h#ls#%$TxTyl8B
zUvHx46fvDV&fmsFnToSpqp<bMF{ybRPtlNx{&qvF@Q`ivg%kaKKBm}ng^nk{=0bOZ
zNzeK5m;udh)tMY=>||}vfIi7}<^13vO&=$dQzCr|_y2gfCF=iij>_cl9EJ$N@JVNK
zm}O8%$Xhpj5qq;#NrV-GQ5yf|6g65;TjT0tPie>($lsXVhi01IYNv1ZAi=18yOUt>
z`BoCENVLSgnDZ6TN$Hz3YP4%=IJ#lam?-nzeL1kpqgY>e2sQA@4fA{9H{2Jx0e9w!
zUa)WSylJfGEEy}126|CrYUOf1c2*H(WeZ5NN5k2X9Y$=SnS9kB&{NZ<kH!?4llAHU
z^q7tpSwj~Xh}i+|0S_Q_+;8$QJU=%;6UWKqd|VpCd+ei8b%<s#;9#K*K1t*6VeC0T
z)r$|9`2j9Q#4oC_x!ON_M9?#Pr1A#ho9j1>^>kij=b?#SjVti?%LDnFZj7dE7?0UT
zXSdo{8@`vW!L5{ROoJa09DH;zESs*udCY+3wzBFDbMaPf-whtr`ME(6eehGQ)wTQm
zG0~<?7o<BMdQSkyV-LxKP%40v1LCIM^FaKKvYA8sX;wf@c=j@}@tL7*%4>8BFyQrx
zvd@hWIoqjN$4&2Cgk(*UZ^%s6L?0_IacYBK98+R7xY1;+*ZJ*jbfu%$U&OXSUd(Rk
z^#sWe_t@ocSGm{6@eo<MQ3*W~IV$O47kyvkfuElnq!;caatbeeN6>jR_fl4uL(wn!
z{?>!(CmAb|^Ky#{7%)T?y+6iKVhB^hU*Q|p{Qs>0$f~jYPQ@NB$lr|dMEI;!nxJsl
z%W-me>q|x1t{vdFx6xvCtE+5tS~dSq!7Z$y&(C9L>23o*>-<_9{%^)4ySMz@AWabm
z{=yXT*ggyX_bu=KWd%PEH$+H;PpNslTzF@Y$C2k_<W6b#$3EEkn~ijlB97z3-*1_K
zM_MDR|3JDA?^(HR_0_?YR=U@*`>RszzGVRtr2pU}3t9kPW<U?AWx@i)`UQOK;Q@Yb
zpeB#w!ryP%{YR`2CnQg){Q~w`LH33*oYv$ybPBofNlty4`aA8yk8F{vBoOg48@VF?
zt`B?uD$@3GkzZbDaWlKpJ-|3MtmAk{r1Bl;?@5I|UGWzIJr9XAzE}L;^+3<hA&sw|
zbHu;0)Ilmg%>snSel9l_z&-e>&HQjN^ODkc+2JMVbHADYv*rdX9e5Mtle%q@@6u>%
zXKLw~iN(x6$!4DM$VwC)GsQ1?zz|a`lXJi*Jh{SA^Sst5YF?p4j)xmUm~CNGoEh3!
zDiY76B5`{hiL1+p#MVMcJd=uqOwK`~@Z<_3agBw<QVR*pwlES@n;9AwDs@#V9!SOF
z;W!rOl@E(|3Ssd;Di$(12aCd!D~!c53yZ5QEHK-`SY(E_8>v_ZS+Y*0x^YODbBY?A
z-0DF>Qq-Fi5>Cy~-qB?UL-_42^tDE!M(L-73Z7d%NbutWNG9jx2lkFGq&y2F@mC9p
z^A!?2++d!qLc-Zx&?(=H2^BKK!hkS+zn|Y}pANoijAyr>zt}*#G!RyzXeR@KEr~Ag
zZ#&*0zrKa$v|(+Eh4k@Y&cIaByQ3?&-_HZr&=qYQ0VS1oUX_Zy(8IxA(IeGxdc0*T
zPI%VxEk7S^5}|{IKA31C_MwyTi4j8UFa~k~oLi{E=21T67pFu1K^*c|Mn@UW>)WCz
z;q}p}nSRf@6ZqgV<^yC01^!n?FY%Ro6cHli>iJ=u0jv(wNaj+WBjBP4{@1n5weh!d
zgBPdbFT`>1FFSF}*3c^!Q@l6Y+8R>SQe44IZ1RO1KuDfnIsqK2fsYC{E*O*ASMZZK
z@LPa{OsAHvZ;Q5g3IOj(^xGi<!_Z9EIJj>ay+h-^AQ;x=Yse$q@w7A)Fu0rgfEv2V
z$UVn~&Jm0YQgCl4p^bz47Dp(Grj5sow$ms~n+~B3KIydo5%{ROfB<9M%F-h@55MTI
zHV3P%GkE8ZN#5CH$-<>-afmlL%P3m5^HX2>Nnns1BjQfAw#<Kv^8U|{PHhckNp~o_
z=7>#EDPUe@4c*-4t@S(KgEBveTh<JD@RMCJuxWIZ1MY$t<U^gD1IAdLS<uNU;V~C)
zDb1bD{2dDSbKleTSX>ssxwY!L!e_gGjqf3!fq&)n_X2rGmTDL1cX+?iDc|T@$L@rE
z+3b>FkowIkQP$Fdrkbyt%`Q_ft@}?sQ(Q_c6V0tRTQ3A|OAJf-i~Z?VC7q7jcb)dE
z{1?o6l#K~X*_iU%n`mR3gr(y41?mdEEFW`pin=z9PR<kZJKFRNo$bHtv}5X@VLl(|
zz}e1^^V<i7GZc{@!Xl3wV0+;|+3D+{uh;CCW#M<u<8K_D>c=hBl(qc01qCVVnYU5n
zrWmh<Hfc4kW=hvqSjfeKzou4%n|wLb8;5bD2S$E)00}u9b-tif81psvU?B&_alJMT
zBfPdA*vI2}B*6GgI*hl+VZ6G0Fm82VylTI|_(nR6&!oaA<ZxiTx_mGya+C(+lj$(t
z6^HSP^1=9$1LNiU1;(A}Fg}?IqmaXa@rv@nsK`+oj2lLKw$xdA8)$hP#!Jfw<Esvg
z^ZCW|iU^BJbDTt{zLk#RhEx=V8jfkav>=L}VlZPGjfojdpoVTtFa@-?`>Db-awLpO
zO1Y{syzzPozNjlHN4ZW*Zj3Uj)m%Dfyq24_*U~feTHI(_g{Fw6IkOcnzPQQ9Dg5;&
zdQlP9*?If;m?);6(;oTVdyeULOwD7q(ad&<sl_ONbWA1r2LlFlLA$P<V~gzcWWCA5
zI6ToonmEqBTiJ<YV(JZ65WDXf6;tyhh6uIr$!WKUpCzJJ;G14rGmST!rRzs~_SyNR
zjU+yb(@X2UW1@(9PJ0wJ7J$77Czpq8rE}W3bk5XSy?ab0TXti>faZBozdi-^ftofB
z>Lt>q2sf2!)5)|^nN)XkHLtlz4kuyoZB*7IB~tgAsE6O^?7hRz9+eo{2rIm;HGs9_
z_J*YauC!R;t#l1ggvu0FnCGy<wqq*UbWgFu)h;VorB|1yvVzdYVTBTD<H<N$nbu8~
z0{G+@4i1GkKrGE0Y+p<Op2r(x-Qmy`>u_GWb@(i99iF#%<5tT$Y)<D5&-$H=E3dbW
z7CP2pW96;GEiQ9d8Q1etm}3wLaUA9-J8_J4c++BzH*D*ms0F%p%{t6ZR|#LmRl?Jj
zb-2}12?by;+Bz(9RKio0w+_p^sLxJ8eIN;K9MnssP0`i?w5gnR_~MvU>u|OrljA6?
zu{6MPixnPE*8oLZhh+{cJX(3{u)<{p%Q}1kK1`}rVNErVgf<Q<lt>$ocC@U+Zc71d
zv8)4#RXOYM-Z7pilh@xw^EGe$f4seUfK}C%E?)QUI+sg9DO5ofG8MDLMAUSqPMc15
zr#r8s$?IRg{&fdb9HMBaGrwpwiU|S&qD(Rfs7#K?Bp{&7vj`%ChzyDolMD_(fq=tX
zYp=b}S^J)|_qlcJ-1-Nq>aKgv-ru*rwb!26(A^^D8yi^b@RHj%EcK6Q9hMrc!@}aX
z4(m<lV6DS3hjX-3;26%4I5@g>7|2w(j<*gHtva_3huk4yLO3MMW39smR+t#t^KKnh
z86jbA@mq&YR@M(WSZ^x`8-w)%vB|r20BnlaI=t<6g(t$U@GOe}uQOMe?T!F>w+@>O
zS9qrQt-~9pE3nq#ZKo>`Hijz{h>ayWvesb)19~xQ9RRH2wGLa{t;1a98?1Gh$b91f
zYaJeQ`-Y|d5v{{+qji{2{MO+E(>Yk{u*KmV?F4~iI7i~(=+<E>bB<?u>mbppbL+6t
z9TFCXL&8|rIvikyiJ?93)?uF!62=t2bvSHgeWQc*wt}!RST7Kpyjusrrg*KxO1CSl
z47<XEECPJOTw#<u0_5E~95!5GXz^Qz6Q(P$)?ua76$l%{6$-@05*=CVFp~lOIBOjM
ztopYOu;l}L1L28>dG6-nHRT+vdHCX%B-U@zDAH-8CHVZ7qR%n(Yh#;ZnCEbWc7kv)
z9073f*S7U<6HLH-bxVO0FuEmJ&K%)E-V#W(EG<E$ms(0QP}v*w0JOQfohG1^BN#)^
zPxXmjRD&F~5PQS%;S(gvxn$K6`ShY1oR`C=w-N)|;CzEV`sEWC)XCY>$<}n$b*nzS
zz!SqX<4C+QtMrtE@wTekjrlz$@zu^<)fQ}V(d2Gw9c<|Q@)nIl2f_hLXwEID>$+gQ
zUTU;r_qG^zak?%n^T+!{FY80=mV)vlV|wv$p%nAgJ^1vC#Di@hfXPxfp5f0d9R`&Y
zaXuQ~?1{0O`_PQy;}-7md+ih+hT7xtuta#kLvznNTBjxV__~e+5d36}4tpKpFF1P`
za1YZ5dhH%RHy@U2^T}mH+QXdEJ{gwwu3Lin#b??C^NV}jQO(lyYrW&4ETs2G4ZZJ5
zUhhw7&O>@1X5kzQ)>aS}2ImE05$^~eun2#U1pxpjoCcB`T&j*=p{stIXYo8gV*%e_
zTSJaZGg^qVl4|45n(c*AZAtjG?)Bz_R$?mF#uxmyTk@PruFnFL8IpW|TL$as4YrVv
z=(JVn7%PYhM90$bX>|00ln#Uryqpg0&^vW=CJeI!bhMA2F2vF|53S|w=XM3kfecsJ
z#B!PWESGuBt$K^68(n@ZVk_~i;R_p!n&_!58uP5a(9h`$1drhhiNm8?gpXMu*mH}&
zMWX}Z74i;@RJ5Q38;+7UJ-qG>_U%>#D@x8ob{;hsf|f1C{Sp)7c#icf3e0B#KZbfs
z6wm=5qX540A~DMd`0K*~pKaZaPw*Zx&+NcgWW*v_D>$<QH)S;D2lbqXpkqU$ok9Zy
zs1K^lhyu~bqxr+4b|f_5r=}Tfp7gD(O7(g(UiQBuRG+>$?slA0w?yZ7=rt|HLlPt-
z$6LWvzmlnbRsB_u@4QI7V5ok1QB@Bw^u%&mN&oO37XMFKJOK;ZPJv@I{)vO5HUBx&
z{ASWT7NR4e1wR>SApFjjkU@LUc$7TE!N~GHvWOnL4=u#Qig|Ez_`M_}USg78$s`}c
zyQOK<lE<bEU)V}4HYER&Q}Pi9b>tk=^LtbB_zF*~)kKG~ypJqI<M-MsEM%{)A+YE`
zSiq~yeqCev^7P-pJ}met+=3soNExkLh%vJAMgzX6FiI^^TxTl~#o>jm#L_nD9|oD+
z!lLR}B2p^W%R2g*92>DaW+51{X{)d?TJK_F(*d!OIXi9E$X)o=QmqPx>w_a=x~u2*
zmPOMTD_V#r15MMJE*bF*bAeq<)zjV4JddUeU)f5$YN+~|u&V1jQnS;l>9;JDW5L=g
zER5h?AQqN<mUZFBFklz3o*RG@?zu^3AbLaJO?J@X7z51P!lN^9TI7z_EyZ-jH5-GS
zRv6dua+d|XNNl5q8(v=`raI-$vbVxIZdgZ8?A6@UNDY(wn-=czd+ii92KU9qrXyhk
zFQ*H1^<jItDcn!pa*y_dt+!ZK@{l7uA9jRCSqitGrErhCMQ?F*qvM9}yh!X~o-onr
z2~7U=@r3tHPhctBR;MQrIEE)A4vtRYrm!e5ol-bNt0SQ$eS@d>dL&PWfsQn`H$sXn
z{R7>36VT0!?5*IlTZjdUeb!qU$HKsVR;C#C^9cF)%2wiSBjk?@hkT~~_`X|CB8RW=
z#34;LC2~yon;dkvRcIK)^WzLH1VfjFp~1HhVNp8}8t_x(&Dpx|24l2(Z*(^DOdGWW
z0NR3P4}0ZsC&DH0$kZg+TQ12g^(x>`Ei=2krrADT<16H-DQ;-?&z_xoOZ3_7loxCX
zhltN_4n}m(Fee%64iR~@6_gj8p$I`uoWQriY>w!czBP9yhdE0`)*pXHoAj6a+M=WQ
zJv`<wb1nWtNdf_5gon>>4kqyG0E{JevYA{yj56+)V2^-A%H}|qs#GznuU^j?;>T*h
zl@992wgc*Lj^IF~$(G=O<8PJXJ?R(5&iErYCw3sx18w;v-VqpH%bCP;Q=ik}%Q=@7
zD^My%OWvc-+n#LU9lzH`Rov(g#8=!hjmIc{wM{UM_r=Y6%)3Sa60A4Cw;4hn=*NgI
zaKVmD7|ODEjEp+>D!U(k?k+EaxDbD=;I}HB1-7C_vhXqA5Q8N;m3$;?Z4&h2(=Qiu
z40_ilOE12nrFcnJpqkFog2Rl1cz5>Z=pj7(-nUf6FXFdJ#aG-#FFv+Re8%YACh5h$
ze-Dw6^tQ?1E}tv$fQ29Sgnv4vFR!4UUIzAmp)KtH%9q;DoYW2pQ@bWPYM(c#T~iEd
z7h9?Q%t<X(+@LnT;_=j;Wz=qE)B+Oqp!Nh%9v1DHn=QLG&_lif!2;VdL$#}An}i=X
z7R%cPIn0`yor_8=86D4I@R^s3xsq?ay>r#g#oW^;b<k2Q(fniA&XD#QC;x<w!GD41
zxLRz4rA(Y-dC?Z;6abgSDK6D|Pj`U{dU!}31b^Bo_@QC1Se2aM=NW=uRSdx|)r8N?
zCH%CLaH_aLczng<1%I9i{x!xeAYtLQ9<jXEN$rDSYF8#l?R<k;T-cak7KSd~QY^Dl
zTkE8jDsE64U-5WqKWEf#X4C=_$y0mEN$o>nYF8vj?F$CAD~dtwax1l`oYYdq4Qk^n
z9#8ETjM`TjwSYt()T$l$Ip^N{T__rj?HD<icGN;M&83Z%D9MeMAPAUCTNq2Sy=~UR
zN&VUeorzauU1=U|8L}?N5#e-Gd1DQ}Q!Is5*mh;tc^rl&)LxbqgMM{r9OkHnewt7t
zKxDt#1msP)SEzsF0q_<;pm!y2ebe)Z9U$5e*G)1wv9G6Ag1Iv+fiVw?J-et*5*oG;
z;}mx=j!s2gaC6););tD5@Y$^(SA(MZ%UBmU1zI3O8>|c+aWX{JW>cOE>fy3E3_-Q!
zsXEa^VtPd52CuBF5>MEvbFX)^g-~pbz9HsIM2!4-d~Dq;v)DRS#;!a*{c^F_h`HmP
zltzqaxd%0<IXxgN5Y_A5Y@rmt_bpX%Bj(0eJU-?=$>PvcEan0daWPkvsXK!q4UUew
z>Pn%k9I+BhZcbmEiqtBNSFM-G7~@s<$5LwvESg%q_O4od!~*M|XZq&`KXh|_j*ZT_
zTq<%D>oZv`7{$V|0ps}C#TLS86pP?6MzJIu&SSJKM~1^Bk8XILZyQ``FyUr@`B@DD
zBpEkPAVlZG=}b5+3w|nrq3&Dj+Vz>^6!f;RpwImxfl*2l^jSmD=U~{--ij6|=mG7p
z`HT|u91FiBXsWm&==h4q3wkvZ^k61vKq5}iaCwlVJO5#<XPngT4pVy~Ico1~#|NNK
z6oc9at<*l_q?RgfP#a(IcxpE?YQOr0ZneHA0Ey(Oo$RFcy)d;$lB4zkgW4m-pmwyC
z+R098sp1B;@fDA!b~~f?45Jp1NS@k9oYa08ruO~hs2ycc`+hN~ebh?rBTj0m;s&+x
z6_2NO52N;DMlB$bJhda7)P5SKc29EDPB5t5Qw(aKuu?n1Ni9{}pf<kZ@zlQ0s6E7}
z1tgNEcCeG$vteqtCr9m6gWB!Ipmv6p+QCk0sp1B;@fDA!_6Vc)9Y!r6kvz3u0aH<9
z7?_QH1Df~Hl+niIsGV+5yRjJ5&bLzg)h}EVy;O07+W3mcQ+t9@`zE6nkVu}|GfrwB
z2vfT{Icnz`)UGZDwM(qjo^eu36*s7juXsGQ=NPqH7`1>z^3;Cpq;^!8+QrFH`;tNJ
z;$l#{#!BtSPHL&*2DR}OkEgbOyI{clWkxL^kvz4BoYYPTQ#&s?YF8Q5&MOABo2=9x
za#BkbH>i!Tcs#X38MP}IwSYwO)V|}Sc50Z~=aQrL6@%L6ib3t`R%+jIQcD##sEx09
zJhcxqY8NqT0f{`QRa1KAnI<^-0*fL$(&E!4@LTh2SNS(~rPXV{u&k=bX}}g@o<zym
zO*H{&c8;xDmXz=X^ziUjVh`3$P2tIrW?V{KA}0T$sHb0G#Y)9~S+S@sRIgd+$M3aO
zfEa_#5)kKsFBU-9lF7%~1<RRcA^q^O4g?VV6uo__nns$_UHscABl+**$t&xYc+KHS
zl^MB&QZNr#V)FpBuM+DdP{wukgOcz7S)A75c;Exi_{Sj(Ev^S(#WLcItQc&J#-&%r
zZT3qn9)RC#uK+Rbij{!W(*x!*4;U{40KTph0R%tMZT8)8Rpr-u((rRI+rWGv>MR6^
z0Vu8T-l{pj$le~=p+fVPVw;4>*cN^6rUIuN^b9tR#^Td25@!SIvo${V=WcSTpRJXy
z&pw&~?Xjl_6Z#wAzbs!-JIBI37P*}Q#8@aF4@kWFS26B~Nbd1<9XUdOl%O@a1ODDs
z-b>iMC&hJn2o9Qu*`4iXcYB!Kv&pf0j<MUmVc8vMWp}odT>`{lH!(mu3JD1Cq_)P*
zgdc%JYNX{sM$0IkXa7BNcO00fGAH5RbOz=k{T(k;eqp(G6jOqh;$6wH$CrQ~y$Nq>
z#g7|{G3|q9=Qs%Of>G9mDS52+z-L}0hP0=QC6&R+WAsc|33Mc1RY%M%#&3R+wP_?C
zmb^D6Rl}xYj#BjS36{8>0;!1!QqMv_vdk*8J+fQd1+&U$Z^Bu3EJH^hHQ4k7zw3>Y
zWfHw~j}~5PiC2<+%H|m*8Q3|sM1pOs`59r!;gOXV;!_m>*rvHXHwF8lrZVSv*X<mZ
z#RdBG13cjhU)f4bXwT;&_rQQ<&NMUIC|T#FK==wzJfYPeTC6p~q5ifC4P(>Xo|}SA
zP<24VlI_D%3%$1B?RKDHST2P0ieoo{s}evR2q5^gxoAil!X6pgBW+MT+)43?FvYtI
zqu8Q<O|fp)TZxGV#k<24vo^dyil4MnJlsh!p<z(GyD*CLSSW?w*n!Xp@rmQrY00Nt
zERsD?S7G1pWc_@Y^{tHc8BEwa6B70l2J2hHtk+-Iv#hM&?_{0OFj(KpSg!*b@xngD
zh5bGkwgf2T2o7NnbW;3znBpykQJlB1pEM}m5~jHR!k%rVc%YMFLc^eVOJNk}Eo_NK
zh)=7q*}6Y@f3|F~4}@Fn^N@i@&2UkReZR!VxXNZdV|@V=_NxgAdzQia`Y`MD7xp46
z>*sHZ&f5@;wt~<wSYOXruLBzK!v3@!93-b(>|Hly@y>FJ@DiYqBRGVOJw`jlgTfTA
zD~#g2g+1G#cwLy{`V0F-E5+xW6cZW-#p?>AIB#J~G(vpl2pg94j?`^7A9Y*oHG{9p
zt?1GN)TuT`#&+1r|M)Qf^KXh<if^f$PR_8KfzH}WtTd>eAEvtgYTjt2`mmE~s<=V*
z{Mg|L%h(D_wFkD(20gBIH)S{Y!j)J=63w=wr@K~0^Q=bKyHFAz#q&NV&(p#@Pfw2L
zjRw!ti^1~_E6@9!JX6IDo~I|k^Kj1d63(;4q7FRsrutTS^a#`Q9pNT>hm+@-VV)-^
z$MX(@XIzn)cz=4ImFFE!o~hyn&yy42d3<~5E$fDO?oA+W-4IJG;&>KK(%opzDOJ`2
zjMecHlXTpM3u@og<3?f)wDXZoHICU-h-DHPV;{zFMeStkI9eV<CHV9zaiDz~X3QGV
z**9Xu%`j5m`7W>J@8LvSuXARQ{0YXu<xhYCeyLP^+BQ2Aet>zc#qgi8iDHd~E&O6@
zRTtxe@!^UsdtKtA@67gaPKRdeaq17@3)^G$*q!5@a9*RUKd+$=14&=3qp!i0ioFI`
zOWj<p4RbX(Ij)WwTn)*OtIzCQEp>85buqXaoB&tzIalL2S5UpWaFt~l(M}yefye{C
zuok$<+7u?MUvgxfG|0LyKeE2EleNG}7S+Wdt6u_SEp8uNsxpGQBcv;+T|8HizV3`p
zYeMPk1t3f#eVqoSkLtdlVGBX6h0#Mjeq#dZD?YuI7*xZDkB`HcK!VL3zQPkDYt+<W
zX!v-Vg=hR;TUBwRxjTMi++1mV0Nuz%`PGebq!?e=fv|v|%#mX3rxcXF;zJFd$2)mG
zEG262{6TU&k2HAxpcp()wDLUO$um{l;Q50Dc;3NzuH`&SEaG?;jniFqT4S7zR-xms
z?x=M1LQD^GGJP)0^tR-fo@Ow;tr$$tv@$)&$uw2mV0v2uOdn*C`-2<hKoSXBLJ>!F
z_N<QE=0Xp3e1keg^MIUJHspL7iXIiVF_&r~hDwa&N)PxQCTv%!n%a{;OCqb+dEl4>
zKD(8eR|6Bm{`UV>VT$WByo9gt#8NB8r*DiNLB#L1Rn=w(G{jfiGE~Uca-D7;EWmyJ
zMw}SKmvtZ<;3s#$h7*&pA&0HvbtL$hli)GA1TRXC;H3t^i;6+;dMm-loCH&~8Nu<@
zjwiTZO+fG~j9`gF9KjH&yZDisCCYNwJ1*DsljMCCLeV`c#59SB(fOR7L|DeBSBZ^A
zSe_oHv_9j6_=-}oLskHKsxsf=7nAI>P>SDcttxIL)bSO!kO~8&dP@FbM(Uy)^}>wy
zgao{pt|ID$2O2^@RV~cGyXQv1!VLddQZ{y!?pz_)IHquI27IdoR^j1+1*uU!!<#J?
zMY9SqUop&%HXnauusq}anqckD<Qwa|b_ZWsCH5Lh9{-CIx%qW{LN&gkR2(pAXNz<;
zLt_}(MBx`(D>#gmw*|rxze;CPO)zFWLplSNp*`UPKb2*6!oO<J0)E1byV9riRVS@4
zglT=G$h5v?(E3O*Xgz49)rCfDL1-APK5}EQ>ZT4(ZkYqo`3SbdM507Pl88&KY$R@i
z9Ng-S6=Fe{#c@Su@omYXKAv*yjd>nVX&g0unfU77E4@5c)R9;ZX;yKKv@x2v$>HOz
z6*>m1<BH6xL?@S3f1R|xYNQ3+Dk8hcFaQF^YV<eVwE}+mmlN>Mg>rVj`A9A1jCjFP
zQ+&)%#H$ja3chyms~a##<HwD~eI0@z-LKw38lU+yamrBhS2raZr12Fk#Fw&GRn#_D
z^Ock@*n%{E@!u63trCF46{PW9Wnvi%(htcXjeq}7!Uuk8ErWC~99g~&o9zy84sm|8
z-lpo&F@btmX5rZ_Lef=!ReV}ID_Xk}t=&voqq|9n=<gI77n^8ckj9Ve<m7n-X?*4%
zeG<PW*Tde4B;JiAUY0|mB}k`AMCtu0Rq)Hrf)`JnqV$23;!%g`FK;Mnm`1R^sd)Vs
zZDsj(e!Pm(VH%(BnD`WrSW!Ak;RD6~9xy9P$EL1=f5)eg+aIR=gM+i<>KdkJJHzz$
zaF{;J!t`JkrqA6_-&`6=`I$Ipgz2-z2-5>QVBaY!OwV?QX@bKD(*?p2AEwu^Fg=Kc
zX^9UM-yNo1wB9DD9!8ihupW7b>7vuxSrDyJVcLbp?-UwFm_B<$pyoO_dEQ}Kljsi9
z;EmC_v<lNiqY<WyNuvzY#G?+=pEX37CSIe$w4GNOrU@S-Oc#?_8KyO}&M+;S2Fu|1
z(8m?}I@VLULmrP?2DPxiQ7Jx<VcOVkd{~BQ9QtW2#<DPdtlnW7Us)|4?7&B-afw>a
zlw$pdJjTi}?T>V^A&<u`jTRQIjY7ldMIDxUcA(KS@>F4n(sy{5h_B0WOY!5|O9lM)
z{WKf2-c}RLCxaMNXSs&pfzp_VU8>csF#LrjO$94v9=#DqH>2hmh+rE1F$g|rR4FJ$
z48fKWJ%5uqF3=^0hjmE*U2s9?fg7?{d5uOD>mDZg&QkG+rX2f}M#uS4TgyzG6F|o3
zW`O`%S~NW#0_$=Jpc(-HsU-v0C)W4!D2{t5j)PIl2~G*qkb%MQ{vu0wgdySm#gOm>
z%_j|3GYL;{N|?|vB)q>cf49)-`+GpAgd-%`YWPb@CVg3@*8zS?WniUp<{!K^_*b|%
zxjX!i8%w=LR|Z|=(Klv0ShZXsJ`M-#w+gG9Wf_EDw;sw>t3X#6t#zxt_Xb#5VwEzR
zq9|;yI4ZJKOx4tf#=u57sBf*{F{n4;2}XuuLR}u&7#6iXp#wiru+CXi=ii~`?=LoH
zpS*VPMVzbX2LG2Vp}?NKOnZa)>IO>=hp(=Lfi6^rA^VL?_Rq7Hdz-uEw$xS+nBdF|
zzOq_OH+<ohe0`y?9meDf^R2${)eX@DDEPfL3LV223Pi`%-@>$@OdMnVt+#K`aMci8
zXdy>bbd<-wDnGXRf^2^^|NJNT1o(u18=p|y)R)MQN-@(b$zT%EE+e+yFv)9kHIRZU
zG*jaF4p(`pdD5;-{As2-47(Zl4wf!_06MiYC*@U+>I4He_PXZ=I7K@uo51A`EydH4
z4&!`^r&t8q(;>(vo^d-}KG_7m^CIz@;dM{m03p&If>`bt3PJe{!^0aru}jtx4y(Y6
z9<qF*bdSa9u%zu2I>vDZiKC-uPseo#ro`vpAP-B&LUbg!;3vA`y(<p$bnmI=Mbka1
z<ZAkoY-bEJ;`JMB=iXzJRV8LgAUuAN-y{}e_OKWeL${?d)%#^I{l!<d7H?o`<hM7E
z1KxR&U0964S2hyw$XdcF2H4&hrx?6$p&P%~UZDdx!+L96Z~}{Xii=JsLI++&hZf|V
zO{3RvXD=&A`kUR-r$}r_e>{`^AtwEaNlAaNA^k@hK>7!)(%)?H1(H6YV@N-7bhPxJ
zVA6k@OJBkT-r<%$U3sw~M*1&?r9YZU{}7XY4BdH4|6N1+4>yGLk65L@!9q7lpU^R+
zUtDxL35ibr(w_~cDw#-k((7XMeZ5(^bV4kT@qyLK2iSU;2QcY#9=v6tTW6C&q(5Sa
zdq_jjeacGrawpvcgmGTY1CWKefv{vaYzSyJ6Y`^S2nfBalaR>7Q7xMIi6dXQnC_uR
z0*w@p9X~hQ9p|@8jEp#c_WEFb$e@lvFY5gD&WRgKze10PpeIm<ch2yH&#o^x!@J76
zq!(<^%DMZ-yy+cq^~AuAa#(?yd?UjHbfTB-Qk*Arj3vs+qQf@Qu4XZIuyhZ~@Bl73
zdekJ7#q3U}cxw3Ql1iT1Gu~(2aKe$<zaFOjGlTXZvyY+O5&-<M1spda-p?DnpSj*O
zw%|(a^QQfNlXf<!H0uV-QaGZW&@r0#&#n(v2G&6o7Ib)cB$g#OKyMSds6wAlg+~-Z
z@9`3q$EQ0}B!BjX^d-@wz#ZWWRjNm0B=RQ{Ek94>@s+JbzmDne28sNM>x-Jm<0~79
zp_1OHME+Szc*pOxSLhh6d2!L{gy={OyA%0qbRy4IrpE{PsqO$z-5n#qA7e@%%>w*K
z2?hAU9r+;Q(FO?cqfDjWu1D1}0?=^<c-<Lfow{VvVF7*%3-ITz*X?0P0dN%(;HNkP
z{JY@*f7GBo2=FnqTS8bwfFEM;ex&#T{$Z1L7T~A213aN)1o)#YK-NJN7IeY^UV;Ph
zC$9kiXiR{oeyb7S4<=gvaDX3X1o#6D5#S%O2KYyv0iMt?0(^1N=_CN1x&-)x;0U_A
z)-5uFXM?n3Zh-48?Sr((!b*RKDSb-EK<V$f1AIQyM3l;nF_gaV`fQ=OT;7AUla$nH
zDY22mkFhud<#Gg%A@#)J(L*(Fu?T<sdNxWc@si@lj!4)2@K3}j?bULWc0`Q!Kb3qN
z+TX*pKZP~uF??HMS6KTK4DIiBYM;#t7B)&t$=wrH?T@g~P01ahV`#s)=yVc*PQ3Qv
zM1Wg$ddFAtz$Pa2Q@)9*@ku$pZ0s&Qf4yaP5J#e02<qv`BoBUvk?Hj;0=&Q?z~=f#
z0DNOBG0TVm>zr+Uy<Gxd>4`<w*8V(rWK@8}@4+#4%9Z@9w+jTvGBpT^kk<bv9fEC5
zyGj38gbsuj{A6xTRhNmwDj&2m_&(?6dyuRW;~NN2rD!Z(U=bjOZ%YK=*$Wn}m6&bN
zz1B%L>$?{g0q~Wcc+pDtIVatOjSTqk9MHl#Lkq!pRtz?IjAs$K5*v7xo7@vYpX98Y
z+`+lz0^CI<caA~snuZ|vB`djSo#YZWjNIa4(?LjVLgc34FF32OwmrePOg&;dWD3!%
zchS9Ch|#(3@FMf))hxEJNGi53G5leX(;pIw?dz=maNOw+1P=3u0>O!o?O(FkzKO?n
ziB`x%9I^eFo9{8Xd;<Z#{8zKs9>aIuv3;pQ_d+M#3B~sHR=STl=_YI#-HQw@1hG8^
zo4jMY#3q(p7TaO8%O&Qqx#R-eMJ0EcLGFTvAopb}xksJk5;lz7;$qW5NNhsn=EQc<
zG~GG+JPAK>$1}Z3MyVBj!8Y|B*GG>$;*zxrF;k*soG&q%`NCc%^rzi{-Qvu;8>d&O
zRf!$gQSlFonGCBdbC%+>U|#2&184!?K3OY&DLxyB-C^M#zt>u!VJyoo5Dm*bETEyY
z^hY`dYpLd4ui<G=@IYzIlPKCFx&GA<sq83Rp`@;d5!);j<K$cg<PNlElMOCERv5*(
zv3%Kkw8Mb*8~g;ulHr9_;!W(Q!%wBK1J%P8D3gJC<14*J;tt%3-zc+RmplVSkM1*J
z(OU;ahu>gDoWJ7fvFmZ&KkbjGhl|e1DSze!E|-UQwa-8$<P~b>8e6PPH>k{hdDKUh
zxC0k&b<Nx=58EoqWUuy`WO~XfH_wcMk6`_EP}MEhM~|Q3LtBWs9sA3NreE|L|4G+W
zOf)JQ457tnF?R^<kgDi@Jfu_nB-?{(DoffbBX)PpUgfWTo8X=(Vd4y}b<Af<RKBxR
ze9%$uz|V-r*;~b(A-C9KaSOz%t$`K%A#1}cXozF*%8gyxF%(`q8C)GCTsQ?zuF(be
zEMJVjgJU$tA+?LeoysAyr+j}%WgZcC`*BFTdj0;$p#rR5C3KyYFD&pH|JnD2N3IWU
zbgF~X=lX&ct*YC8R=4Vmc&lS}lJ5%R@^*#7de~UoQgKkZLiX2y5d6i653f2Lp^X6)
zd^5|zN;E=5I6{pCD{ut0)H-uS+^y+9$><;ZdmRbHMko0THTmMti@nD8`1Fr$2>SPN
z`o}Z~{f9OE|MUA?@;5rkH-SR>Iam!qKY$gc|L`xUG04_3@`I*&y)7{(<uCThzXQk@
zP5d@bjA{t_-{<s?Y!Ld7Df($ul(C0yy_0?uDx}|ps{tGU;9?GdSyngyZ0m2fQuq8F
z(_=CN;d3?H2FLQ^sERyrUS8Y!rcB$LXL0txNE)8M-g0#(Caf*QR+Zba#ggZH$E^{}
zZS8_>qB1+cB?0*KR^l`4wW&1gZb*&vhOB<j6Ub|Ud|?>^<?#JGA)t`MV~*8`?}q?P
z>?eq|RhSq_e8HHobrVZiS7^9gH-Y8sfFQ|vm$D2t{$0=^F800P(iX}KXip7nj4^j(
z(egQUWL)!5$~fJUG7|Z3hRHwGJ8q?{g?v5MuKOXa#8(0N&-u*P_I5E}@7$1>f56K8
zbW23Q@3n=L9X-a_eie^LZhEA|0C?z*+ImL(0KcQwfdGORQb$b_Uj~1Hjv6e8#3KVC
zy<M#J&z4ic@l?e8N$Rc#?ZZS{`+!xfgj|XGa!l_BdIv){6FLPc?_r?IanWa<eo!rj
zcjC*a_M?O4vlp|ljF22)yww3FIvk*lLdS4`1HFS)#1lFN9jF*|EMCDT+1_AYajLhR
zWXHnS5MI7pbjEl9tpd%i;u1<ZOhfTLMN8trqC`Nl6+L1{#2-_{vrhawg%O|IiB}G<
z$7yu*uNGrE;W&+s`TKeobuA?kKgmk`7$@<BjzN5Z=;SGj8bs%NILJZj*EH)<J9TM$
z_RTtpf2i9BPKU++2J?aGO#Hjt;<xyKZUh1!m}H25hg1CZ9+W2^c-Hg*wwi9J(+3D0
z!w24A;-AjMAA?T(EX;w9!3_I;E`EuZ^a||B>fN^#>#<G1DzcMdhqwv<EKK<8MJ9ZT
zLHPEDApAKi;X|B+6FLUruNOwRMGnT=Qe>`1NO(g0vNj*?Zx&lhAe5_p&}O<vtw2^C
z->!Dw2aW$6NZq3rkl?#3#Wsl(^dR(*Kt`Oqj?%q4AK$K(j#oD0)2qc<Nqi%DK0t}U
zwGD}n$`9zQGJ5>a=;_UY#>H0gHmc$V(dVwCPL93emH~LS5o&3tU;uttZyob$2#FBK
zDzf|AUAJn6%|(>(J-G7|{!L|rBfcl~hRd9y>W5BJA>I!Y`{8v#QulD@AgTNKy80${
z_{u6Vv@^9{FhqYP{6U}2&4MkIIl7aD4`{8*Y;aOwnJv+lCE%}h3Zm`#>#&D{WoYkL
z!Z@Z`CDwR}whXqT{b9F>y`ATr91OE?pfDCJ;Y4drds>wk-kBd+v>(N6O|27ZUB_z;
zQm>j3KGC7U)`HM6I@JfR3sUPk_^9PV3Fv9d(FffDZI9@vjy1NW{FoECQ`_%YAJjTD
zmovgQU_Pb`{Fe&uqZ(m}uO!1yl>@L8dyfh;v#j^LS@K0)8|0c4H$l~=G_wH;6(P70
z7W_$XR5%0w`V>z0ZQo`&<FV^>Y1<23T66jfV<5ZFTr`KVDH#{xnpFAGV)(al7RNy@
zl(T2e@Xr;Kv;;Ha3jj1K5aLt*UQh#3$y>M$s)e&NU<vdxe&A!v#3>f&-;nVG|Nc*6
z1_nWYB6>r9P2NE05#jOLJ>}72_oZ>-tX%Z_3l2BpV_S*^QexFaaVtJ>^~H73%^ZI3
z$Ex-hnYD*z?xc4?oonx>F1lD(QElNxZ8F&W{%`D;{yYBD3yx!j|2iUToh55s^CYad
zDsnfo$Z;c3mH?o`Bc>z>$W6uJr@P|k65{9U{1{yqKYdhXzhPE3ZmiW(S-l+@qa9ZE
zmp(l`<heb+!CA%MBR$`SlaOf)F$0tSXbXdgd8+Mmr`iTeKQ+|$asq0rWop~O)Hc&l
z+sjF*ZJ||dpF7n?wKvrEGDiD^)Fx{SWYtY=F2Z^XDzc%rc)~0W7*<<R_~|Z)pD4At
z`01l6Yp6}~<2(k!QrWQDG(AbG?YvWM_baur>|%YPYLlyoFcg%D(@brzGquf9=|<zs
z2HnEpb_}>&_Rnyotk>?R&}?-PU*5}x?CZSN3QN#ZEYbqR8)=Q^?z~fg1c)KP^$7|P
z0FdKzN`Tnr<<Z=^IP5K`$c6wlhi_UlM2k;`1t?3b$pWrI7e3UNtL}=cC^5RY>Z2<D
z4Swj9<f;%c>eM)B`n0dQ#CTgBn?4*MuqT>>!eU&PfEdp(F>YsKoE;^`b;S_li&in7
zbBd7wF~qnoK`{aV1&GnbVQ)c2HpHkoOhAmX#J(8U6+?`~Rg@TAT=h{EH^eBpN=A&@
zSCbUuS*I8WhsC%y0Wp5Y#JG)#aZZ#N*A_#JFImNS)+t5;#1P}!1jPsd6d*<yhrI<A
z*$|`VFaa^j68mCYTMRK0S5abgan(mv+z_MWDj6|qUrkbspE<=iBrL`?35fA56XRAU
z#^<BNxTY9lTx=ENXHGE^Ach#%Bq&Azpa3ztIP5K`$c7j-hY5&Lme?2Lnqr8NxQY^^
zi>p4W;)WO{SILM``)ZP6JmVDO17R_)PC$(3m>9P(G0u$=<LY9FafwxoXPjarKnyXi
zPEd>hKmlTOaoAf>kqt3w4igZgEU_=f)x{7aaTO&-7gv2$#SJk^u96X>_SGcCc-kq(
zp<ywuN<fU~nHXPVVw@Kx##O}-<5H^_Pdmj(fEZ$2m7o{_fC9wm;;=W&7~?pQAx6z%
z0%DXU_QklW7-A%@qQvOps*kF;Ax6nnGGf%enxq(Oonm}2EXI`yi1BkK#?4HO^P|ML
zvKV4qW))+tQ;Y<NA;y&niV*-PK#VR9dkZSEAx6z%0%DXU_Qkle7-A%@qQvOps*kF;
zAx6nnGGf%enxq&{ImP%;Sd1$Y5aSn2jIS~=z7QowJZT~Esg31UF`jaYkpMBoxFSI@
z0ssYw(ZykJK}9yis5wkPjIzYO7*`ZSjKo!x7+qZTQ584DD7i{TjM`U|6yr&!7>6k_
zvQ?tX3l$?<L-QpQ<0dA?1?Y=Dv&(~7{efPGYy2fK%j81ls3kEcokF6@7(!Z}ppazY
zAt9{WB)tp&-hzs32q_-_yypn>fxA1LD1vEjloniY_fcgx=G-ORg=ir>=dRIqX`xpg
zr}<7ewJ<!ag=Gn7;VY(vjZ6y*4J|CQXkock3n!dfpvo9pSeBp`WZ~<q1sDFkVFhQX
zyA_Xr-de~9?(Tx%j?#h)?mnvQh885;$!I~Noun2%b!uTmSPM%N&_ch?!6N!sm=+cp
zT38xr!L{%ter@F{S&OKK<Wr}3sP=|<mL@13S=%~`$3<9gK}9wil6b=M7Ee+5=`M($
zD7Crx>7y!Zs7>;djM_9kF16KlZRICUwT%p`ZAk)ZyN{`D15?|JhT4`SrMA^pwSD4L
z8`a)W+mZy;CTm+~wYdoEEvU$b+TsbzTWv+*r@J72qSWT%r;n<vp*G1+GHTQGB&oKK
zooXAU)W+6UE-qAUY;9$KrnZ-v+FptpfLL6N0f;qPfT6XOA3Fs|fEWT?oS*;!fC2^}
zTpacmRAfVdn!^MJAY_UCZtCJ<3_uW9QDSs))kjs_7=Vym6(Yv)+Dh%KE-}`1ZRJN!
zF+Lm?<4XyMaR3wJdM3ujQDS_l7-C#&72`)vF%lq#7+*?Ii~v9ZVsvrXTTqb=F=`GI
z5Th)yFUFUOAx7dVN{lY9`lyN<Vw7AZBS!73Ns95fQ;eg-Vtg?HF%D#6T*t(?Bub1g
z7DJ5dtYSRw6e9s*i1Eb)#RvctAVwF5y#*E75ToWW0Wr!F`(k{t7-A%@qQvOps*kF;
zAx6nnGGf%enxq(yImI|8EXG9%i1B_V#<fh0OQXcNs2E~gZx!P)rx*zkLyU_O6e9po
zfEZmI_7+rRLyVfk1jHyy?2B<xF~mq*MTybHRUcJxLyVHEWW=a_HAyiZb&7FpSd0r3
z5aS>w#x+cg%c8`%uoz-|*(%1PPB9W7h8PznC`JIF05Q5a>@BFsh8Q)635Zda*cao%
zVu+EriV~xXt3ImYh8QJR$%s+=YLa3+;uPb!uoxF4AjZK=jH{U#mq&?lK{3R*!79ch
zPB9W7h8PzlC`JIF05Q5a>@BFsh8Q)635Zda*cao1Vu+EriV~xXt3ImYh8QJR$%s+=
zYLa67&?&}8lo;6xk{1dUBU?c-go$w#6XOc>MW5Lhg1HN01qt2e5;gJnp;Jgy8AC`f
zBq$_V_&QHTxbW{SsK|zp;_=UWDk2}ay9<IlN((Ny`>3)TGYb;#LbMQGL88%iX`!ww
zNDe!-K*wcO@{RBF3)KRfS$KeHVI|YT%BY5EelZ%Rjn;<guv1S2h@q$X3F-*|D4=0-
zaoAf>kqtd*4ijjYWQqOoHNO}Q6LA$KMi*CoRK<;kNpe+)7{fCQ+E-m-tn19eA*UE8
zgd3)Lg^H0iOhcI%S1>WIG8(3NmWJsS>vFC`P9afc3?a=+P)M@yb#9nk`1giG;X=!~
z;_=VBVaf;Y?t<Ws(t-=_KC0|S!zAG@L<`}DNu%x3LR}lCgHA0>3~OO-0$O;GX<<3j
z!fHbcb1hofXw||&rxvI(h8E@~s0CU0I%~m&e{VrWHX4<9{PWgAK5%yz1b37cTyXbM
zWjC}S;Z8;i8to*t07s2FPV;$GX@M>Oc)n0Auq5pvriEop3u~g1wC9VFq-~ZJgHBIq
z`NsjLo(K>_PtPZ)Cjg*;B+bQPZ$U*i^rSgVAW4%Y_LH>di;<)eS5abgan(mv+(^<S
zSA~c%oTO=Ab&0XAN!os=7#|CZaZUna9LB`Bl!<X|lo;m}LyWIk#kk)oMgqhT<D3M=
z2mllyMi+;@1r^y4qvkLHG0GDAVw_V9F%nl%VsvrUM^)SqqvR?XF=}6RiSgI^IBPss
z9xfr4*JYMuPI?{S_jCphD7{MECG#C=nHG1>q=U}W-CF2#mc<^6MDaxQ3h}h`WaA=%
zr`TEEyE_Nh<UH#>Z$AE-oU!ci`WLSAugR$ruNfKTQ$a?lHwdr;&+B|LCDyZ4?2`3_
z8^rKfYU2j6Jr>2{_gX7_jI-^ZVkg1xW>xa>>FIU4QgW5*0T=4DgXffFL=~KEe}{L8
z_&QyX`{Ub#%cbZZR=m&?-`SqvfS-(8f8qC3CVfS4m-B8Xug`>eeKKiY)%~%m+*M+W
zLF1EQ8tZdtCDFLsO5<)PjRc25<CDdvQNj^R<F4!A0>G%#DY3#8;@L2dPZXQSt)0_9
z3@&?`hAs90jM1WB+PKLJ&x~ctG(N*)(XaF_x!UWTxfQ<)$GxZg8vt={k3J`zc-^CU
zodmTu<hjeqE5T#%`b4pL1$bmDYsgby>eZu~Zf?#Fph$eukHgy>6@iPn70;~S@>pC&
z`bWkY%Z(GznK5p3gsZtRUGm%B$HHwd<F>%4`<|v1J1xd7FD^9Tu5bB0gVx6kuMciP
z@uO}u8kRd5^stS-f1^F3p;(nSOyECV@yvAyx|at*cdL`c=ffmET4WM;86-X$Cb9l8
z_kAmgE;L#TLc<{O(IS%wXoN{rX;b=pmGZv5RA#2v9*8Yn>{Y>k=_|EDGh)ki*{dRB
zG1^y&xe^GEPXJ9Uj9yEwq&1pd3~egDv$fbQ`AySJHUeKqf%sO(2^WP0VtijC@qw&k
zW4dXzj9-Yb#p3$-z4i*5CMGt)oiLGkQk@j5`?IK6d}Sws27W4+w<L7KTN1wJU$*);
zE!AbK*Xksi-3bGC|KJtNm`Yc}|EbJK&&$lxejxA!U3-g6XP_Aw^GN2_AF+&ZbU_(2
z#HLbc9_MwQq{@c3zN9naF1($oECZZCAcr3zshljYX~8D63YY`m_rv+faQ0RIaDH4z
z0v!7{TIqEb=fI-176h3Q2~5a>PDWvn`B$9uP|0X}f&<=ew9@c9#Pdrl;s0hCgflf#
zvz`%auggYX$%2)r6m;B{!TSh<_q;1%X`%f}bWoM37W*Zq`r6Im*Jb<XcPo^iX<-Md
zj;Yv3nq<72$Y`_II!JD#@G(d(GCnng4-}KGChCk?2D@JsQZc+uIS@8(+P4dvHf^PO
z2pht}dmV&rAy|iZope-*qeknER~+V?TU39|)@nLQv+R<uWez%93qr%7bBN*YK?)Y5
z*}PLQK!dm57|^@+)UEe7b?ZG+2EXQ+CGfFvJ^GUCEH~?6uxue{=YgT$K}Lo0)~}`Y
z_Vo)v2;SIA95>W^KlY=nSGut@EhOHnyi0n;_M@<prQ(ccJ9MSn5)0e-y|xM*gKZO>
z1lficb|7rvCsJ+~<T|)1{c_cKi;E#5W9tXk9I@s*-MPZM5dR97x=~A*y_)_o!fW&c
z&kHWme83_2N(GkT5j(DpyRj<%io*9h2Uipxx>ny|S1l%Y;SKZ-xZ=$&_J|a&^VKr-
z`y%D{#dJ4Xj~tbX>0R(PC>Z4miTwc!{n)m(QAil{@3=OB>#XqUHB@o<NkwVo8ltqk
zz6v^5EmWfRw4^dsrnzr?LQ(%yu!qr4_&1IJluc4sU_FuN=L2)T4|iFXz@JJoEAo6m
zTmh^v5Y3}6e22F;%bbLNjrg_r+U%{7%@Zo1QmpTSq#E9O|Fyx*py#>-5*>W4^JdUI
zrk?SY)#9Trd?08bYQq}9SW8t{0K-?7iqB*%!GqC}jT95$v0p;kM!{paaDni|D|}&>
zK;fgM!m;o*h?i=><OO=jS2r9(XqJKdp5cFk?w8zjzZ|9;2!PBGU)5L?gYJ)IfYRCK
zfNP<fWaqoy4Hofz*2*{F<Kmm}G59VJA4|5TFJ;mfVb%zV51?O`Vf1=!B(Zp*cWSu`
zWdl@T$vzC9fnt#{c@t)Y>Yu+BF3yZfN)ew*u{G=l=dYow=|)+vnaU$UktM7KxnY~6
zDL;3Olk$Wl-oGmf10+rzjN5}A*z?y$cfb&eHVO)Z@%liauRUGK0_OwQ%2_FVUkzac
zKT!{|GqMQH7NdKaqI;>PJ10lM)C$KK@TTty8c|6v4r=?1&B4*WrPwW*7yOFJ6W0VI
zD#N-4`Rb`_obf&%cffaEB<}0V-Qm+~_8S3Uu5nx3BpBV78OfI^<G5AqZsumJzJX3z
z;3`shp(h@czR-w#A<KuGp0W4>7POrL2eJYf3&B(n#zG1N#}e%!nb!{T3XAsr$U(3W
z9SJS?sfiqb>WV|g-RZLT$bkQ}JK*mLEB(`Jf`C5^ZemsAJ|c$iJOf&_=x@;dNrR~W
zA+7#U4+ZdX1$@HCkbiOUsUduTRk~EXvr1fb1Zd%<vfrc-Aj+~mbP&iR=8txJ!tt;t
z>|>rVm3hLuZsq5l>5c8mGrhO3X@E>`vUY?}rU&r29D(359HBsXEZUdr!Lbb}6K^x^
zAG-$oa?~~eUioKw54-99C`>mH;AeWppnIGl{I}e~PbkxyV&xm~aq&&~7<?CqPo9CX
zMo4_5@MAK)fm-!-Yx;-SYD`T(b%Knh|8>^%&ty&i8}0y*f73t3aEa~3Z~C9JHvL1K
zO+SHSH2npFW05~=`VTYtzjuwy4XNpuXn}*|+4K)~M*!*s8D0DBECS4A0Y8TCJUzTx
zOf~4<=8gafCH^yX1PC?#fR8Hz5I#l(C@wxVgil^ge`8D2uX+ln)d*!Jtp^FlCO?60
zTvWdg$IL3kNQsr<5=)p%tY9v&+8qM&a0z^6m3UFQ1WYf(ZzTf9i|yl+{upF1!DJtw
zgk)b<zErH%9uOLz{N$SGt{)bywL-&ifF&#-u3!N%290>lpJ$qXjWmyiXixaSPc)3#
ziKZvucXNfv$4**Dg=t+}WLjS`XkA<kTGv=<{n$w>p<&RvxG-8RPVP_1=qc6qLc)>D
zqF$uA_}cX4snVuiRxJDL?Ox+Mye4;e;*{*Gro_sUp72o<_-=`~ek6=lcE%oG>JIoL
zCWR&9=l#La<*>Fm(Irbg!L(R_<7jfgS#0i-zBr{b2BV*#g2hQ&uz26;Rg=PAHH&%G
zRu(MgCKN2z8(uZ57+$s2>Q(PMy^7E<ylNKns;w+o#Gnx$EFS6_j07*^!9wDb>t^UK
z#&WXvoU~F8$!G&+7Ma$U4O(XwgVt?UTHkZhN@y6g&Mb`9{DXyrBbP-TEM_JZEc$iz
zg9VHU>R_>#1&du!20jG_6Gb$IC0YmB$2GY|JC#xP+dAghO}?c}2If9QPnL-b^;{MU
zT>I6s*CiCUX2Tlc53j)}nH4{Q_}R;A(#sP|y)x0Fr`Igg(<{sThsp&~;%m42f%9j#
z`hjy}*Zs02W#TSalm&%Ji%+h(J$rTbQWZldch&QPv++|kxL%=2*?Y|i|MgeP*U#3k
z?{9rQF1N$HJ|%W`-9S}Y0#(_ssxlAT0`)M}7Hq<{0KQQo{y7NxZ^4&o*x=5^s_gGv
zGYRIVg=h>53qA2CAd6apxm|&0w1t^%hG!NAT=A(HShvy~en@9<a6~}I!}Gez4@7OR
zY;@j;_SNFSZfcege#_h*jE=7Cb(Po1xpl>|OhZqD;MfPgqK%l}jSmv9=;azKVq*sp
zQ`&hf564$RYb<L8Yu~Wv&?L*I3~GBt=Z*Ni8Uu}{1{#=?BL0oVif+L``s!W<4y4n=
z%EWor)V|hBb{MfFoe4Df>4FUMb8%1j=Mu^gO8m*e4c>(r&<lK2a)9!PYsg))Q<SgZ
zOtf<ONxt;t7iQ@%;BRF=R&G|V0t<NAe+OU9(h?f!Whk-cqu?K7ZgOw06V|>-3-Ofp
zvB15aMlYzZdtq5xiOuL<#LaX5%+xbLOcCcI@y(^;O<6CPn4<nE^Rm4bFT?M(Rd^UJ
zQ#>A)kr_7jIHp@L;=iDmUPsb_0D>3Oyi^x-C0IH`3u5Fn<Y>iy<E#$ZS!_U*<D|P*
zz%QxHDIHV9#$J{!DCkQSVquu>vAu$cs1KQb#uus|xi!I`d#e)rj2JVf7Yq;B>1V?e
zg#{U`XQ?=>=udY+fQ+>V8!i0f7h5ZU4E_@bNC%jwm{UAS0R{`vo)Ckd)Oy#Q@FOgq
zldW1N-J?>*?vm^eD3D~o;dyuZ{J-1_8YA0KM5TBs%>To^g7N7OS&3r!w<KP=iP7sK
ztHpcW(tj6BOOJ9kG4*Dj#DCNjKU*jHvW0y7UK@puL4I-3sX=tuI!Qe(rPfnjs@GE;
zRBc8UQ#zd|?NlD2@{5u;X;dnEnePnCdqJCFcZStrXLz7j+)6P^YLUAMr&=5dV*hkM
z_CM&1{Rz>3%1ZxoC;fznL4Q0Rme?;B{-c`9#Pdv46MAW3)DS>uA*Y-z)Qie==TvFA
z*QiX4=mm>W#nEoJmNxM&5NCSgQbBw}N<4Tmj$WiQ*nB`Hu+l4;Lth!1=T}4%XkPS}
z?$7s2&DfXv<@=?U_$BeJncAa+T&9^^CkBgayjHRvr@WdaUdJE6!tWN|Wf{mB(%-L8
zD-E+#<=4x^ooWH0fEAek3d;@4uJ$fRNT7dGb)~csDoxL1J9}4=#W$5jb=cxx%c`8u
z=mO99rB{BN_igoz*7S^$%+-^jTGcyMwQvfFI(a3Tz3_8-8~g(~?^V>CXGGcU$`aE+
z-W|+$MdZ^l-`Ag;T@rX%DiiEue5R)*K%ie$h}{>f4oHw3p7=Q>hxP?ac{B|8^eS;*
zci#8-6d#<=ip2V)1rK(|3i(+fYZ#vC8Qu55qQcHC34{?DPW(Jbn}&4{x*ex}?(BBN
z4@13jaWD+kucx0!wI{rwIL4T@&I)kJ+6vXfxID%+o>ZTkt`W2*vfFxqT9b$U#$;5_
z=)Nj8ofYC^<tqIHS9$B_1-2!8dl){wN<7k?Z*P4IqPTrJBf>xY=?wa8qSubCU6S=I
z6;riq%!D@@TV+OBTmy^NT2<U|jrfZ1_d4{#^8@7eFY)7%>ihuKXuNfCaR1`*pX)}W
zJtD#K1vPP@qGH-#YkvWB3uV(#X3H2P$0}n8=CRHLW%hjGehHDWsb+mr99N532FL4*
z!SNz1$LD_@Eq}@jsp1C5@fDBf_$1?ar_XT>BEdM$Neuio_WnvmOHLTgFoZ1!!^6W2
zFH4T$g$BdRiox(oE5j$83{%AohT|(9&+zAr;mwR;Kq8)D7-;nS{5eZfs4J*vm^#YM
z&#6(~B@mHH26*3u?B}v8y?>Frp)=yWpIav6DRNIyoU`SZGkzX)3Ac6+MlR;M+qOJf
zehTLsjF$ho;<o(w%2KgiJ4R@|!n+Q~Xe|gG!!Zg($I|k%L86Dc2QB}ypX))Q_Cmr1
zwJ;lg-ADBI*ucOK#{3J*vN)O!TjSwhxyz<;cBULMNM)9)Imw4%V-+<TyX1Gu#Gn1S
ziY96_yS~q9=8uYpQryxDbQ=5#L>JXiUF~NY{}dIXE|7N3#otp(%JPh1f2EZ4q;(Cw
z#W90Dob>RP;$R8n830>p2G_H9%;LI$F0{P~hITKl^h&b#d#N(fIOUbp>VgN&=iI--
zq(Hd+)J5M?5=y5(|4UriE&Xjh&axk{L;zcgGbKEoK93KsPdc5gzy;yPQ7^bYgpPWl
zcb1C#QixE;Otyys6eOc1*<}b+Qvm7|bY8O(Br)l)k%ux|Aak(h;sXT`BlmDDK$dl}
zn%olr5Qc!3;=vRqk$Qi`Atyiu&2?~mDC&%j#s|lBU>+a^olQWeLzx6g{npX=?Q;EF
zHCpoDYQ3)dGk8j^XMV30eWw0+g|?JW=}f=%p#MzEi|CJ?>i5|X=<BJnR#<BDH&vu>
zkcs&|g~ZqCv%katYBfr#xPMEZnU=24zE5BH7y8fqO;z(!`po=I74W<CnTb-W3<#|C
z{*gWd@4ijCSTmY?nW+O=M!a=z)Z7a>WTkjA9f}9f+?zl=K;NqtucmoCcp4vUjR(x#
z^WX3ncK4lW3<sSl9Kik>45e~POuD=BLJ1enaKMCUfYi==BS#`GkaHoG;yDSG!2SI4
znPO@zP<g}!EJd~0p5}33O1?lv^fN7Upd2WCr6=BU0QKMC@zADI)RfgXfZFT^YQ6#r
zvN-~3d@N9Tv@itJUc;jvX$YVW<N!r8>NqooDX;;|t2SWJvQoSdc#z&@Bx?W@o%Ko_
z@afe+DEFapPG|}m<z_wvFZ9I09ALQAI_b!y0^9c}PVx5Qly@clF;ynpgm3EKWQ+6<
z`ZxHj*83LzhH+S0%BXxwN9D`NQij!9Z0B$DTI3Ql#6}ESR*KKU#Jt`&E(+!$ZhU&R
zm?Aha+wsBqM8U$ujj!~?bHW}4f2n~}ZQOSVKM5SJMl?M7Y+p-EM9WGsSYY!ZW1{xx
zwMhVnPp=kp4B*xj7dU*SCth*@_unw^F4H8H0LW~q)-!)oj;=nFpoYT4E*q#6CY}w0
z`a}|-Qkd9kfI6+Hph`&hI3V?Xu?r!qwEScGO{Jp}#0%hAhG(v_fryrsVnrClhmrsh
zpI$8v86ZAbR1iIUr6)dh!1&*u3YTWa`0ZR6sRuNtucZfs=hsz=ZDHij-IG8t#HUw_
zvj%c!@xjFlhWJWP3@Eh+!%d#buBEj7Ouro=*Ny)B8~X3>qkj7#{ZYB9`c1_Nm2IkK
z&g>zm&cOdiUspYnOYpZ^PrlYKi)u4f$IA@b|4Ls_?P977I=y}?>9?S=Jyq>6mlWpc
z;r_ykdtCj6Ri&Z+!q8af$RoR>{=$c)yua{3zRppf{=!Lzb4>C6jp&gPC>pL8op5ak
z!!;l|xIQu9>faD>owLA2qX}0NT+*q1KQ!A%fN}|Txx}P<EKvv({Yvq^L{9Emgx_I5
z{)czR#h^Ugj5-lRJ?>_Q@xjH5LDbtC<C!ri=tMMy>KP5&<M&unF0`x^A1Gk>097nB
zd5k{d(_y`@Cv#PVTpV%&Q{OIxhcEQR*c@PZCj!SgW$^l$4s-Ii?&M`EDyv3LMZ2%j
zXR^O=O1FyYDXr-<HTI-pyNc0faIPlH4)k9Y{Z~W(wWt5oi_8HV-q}&ZVb~*?j7!&%
z@l-E5er{m&Z?f(OVW8r2PnD-pnLOtQ#_5oefnLcjJ>(Ygi6ctQz1tPkw|JqT{`sF1
zP#XpHGltsE<Ad{2TYZB1eHl}2+^ME`|3nP(exn@eu#KQS?F4U67`#uD1FwIEhpJB+
z0=$PT;IRQ?)y<A>5hpm|IuwR$e{yh*FyQ*2A>f)|feYs!{PA^%G>1FkIuV9zcXDt|
zG~n9R5O6)24_qqARdHJlP{`k$o_FHUt|&J=6N+-nl7n)g0p-$$fO4e;N+#4(Ufy~C
zJ|}q7!r)C$4!n&9c+WNjcsud|&mr6GPPk@=;d(qdxON(FJ=PF#y_*kQ{@fs>X|)~~
zu~W7$IH7zs4CPmMC6=6=GNAktA6)F@<jZ`ZRLO~ofDS*L;so#AFnC9k18+zpo}3(M
z2=E?m6rI_zn1{ZW?oQ`pgoigqJMlUm#%teQbUdPchQ_k|-ajm$hnoJc7Gn*--fjrM
zCg%VabUdT8w^2@fJ`UsaR;)i+I-h#^$W9-gM+|uOI^kj6&-(Nfrbfd<_hN~We~#SA
zc7Ys{trVX~G>q8~S?K!C=g}DR_(tjP25Uw3IMHB}=Y>rh(HQfz95i^pX|(qv>^Hg9
zHQ4DrXT#pJJwbI%HQZ-gzJS%oeP-kU7N@TJo%oy&<Fhq6bv<Fgv&9KdLh70o4UbaS
zK)1R+535TSIvI66X{hTpCmIQ<YjzGArn>%Ft1G+2vXbZgU6BWR>!m#RHwyJ{)+ea0
zS%&+py9<_97Clv3l!KTrFFAsI?yktAj&*b%B=In!^SVOSq*wA`TSd`%b|XHizxJ-3
z4m6mW>Jyz`%t3?u&mm6^WXe2yl*NBOy({uU3++E6!v3=~LH}7~_|KAj1y`TwyebDU
z-+e3qeSDW|{9sfVpT&jxQ#ki|$$;l2Cp^rb>I2W39C)}tO{JlCr$4>#_NPf<yk;fn
zPwNf9W;y|50lYqdZOs8J&Y#|O`qPv!J~NW@rws-?&pF{q$e*_7z!Tw5&T;k~PPk@<
z;hG!^R~}P>G)b|;fNPQyu7td4Up?X4;DqbNFkGXPgX>)bu7?`}t|Jb(@)>7e=7eia
z7_K47!FAMtYj8urRhti7YAJd!%4AC&o^yx#oniE9?@Vsk<Vyp-Q*bMJ(KF8>a^Fp`
zj65#XKkM|aU15AqVl4{k%7jygepx;YcftuzLZN<W4m>>6PtnUHVbs#*PgC9g^iCMB
zV+s1xU<0tDPQVfh_M>tDi}R<+PJenYjL(te{OJJ$o)4YyB;-${bKo)kNi8dKjDnAM
z!gV+d*9Wn1<()c=G~jyQ30FeiG|>r{THm17FsjuuY893mWsqwMYdyK-zSfhgM*W4%
zYC)uWikihybFk)be7Vah&s@UWmKIs4#fxeQ<o}{Ssx{Jj2@5Q3NoQ_VJ9U2Pl}wSF
z^(=j~H}7<IVy9<AgCNrqj2znxuBSEx*L!og){&!y1)v>wx;n8l!}v^&l|tSF_&W@E
zCOP3rD01w}fyay-YI&_Aa%^_OH9rj3_*l5|jvRXpxE^uBl~Ck3V1bL~&y@5XaIJH~
zwImGJ$mHPqz<_H+L%?<130KfA+47ua?r60pjMtC^qt#IZu)z%hSZxkqI$GJLW0yGn
zX<ZnfL9zanceFZgz;nM7o`j;+nH+e`X!R3n9%bH)t7DtX<9gF<Gw%YMKcRUGu}R`(
zT=dc}5r4wRw-P4_F!;GdeD$ZI_QGg!@D~!UF0`2!4keBAruj~9+8p+#FR>QM^ulTl
zZ~DRsPeR`GmD8Kl4iiT(pXr2aM;NX%v2f+x_I+-^b=nD6LT%rm#;&y^LEC2w=1<zh
zheL>!;;k@VAO9)2L6Cusc`yH?h5&4M4q$Qeo96VVePMi#$NE#=!F-Sb&oL)F3CV9{
z4m@Ts50V4hy7EVza2*K4bubpLyyZ8{fa`!0u7u?Ghy^Z6J=H-LatGWgPi^W@Tf)`p
z9cojD+GD47UZ}B4bG@9}xS@Kh=B7y1`IL)UPSM8lQ=WRJ`95=>3B=K><(?H|v(CGq
z@Tk*vu>GnOQyOD$)R=;vQ7DIc3K~z_!KYV?4UKUMT3@g<<F3$&gp9bnE?8n4{MGK9
z?V2-;oWW@tm*e90pL&h)aFx?W$6rnu-sUv}a|5T%f3j?9Ld!}qKMd6PSfKLQfR9hF
z7JDU7`qZ6A@^$vY=IZg4o;aX^>jA$8z{yjIaMQE6Q0^pG=T1$g&GX=3HQ6p+=YRD&
z!cX!rDlzX*c*jqai(xdb5L+a2#({{R-$937nkyD>g##YtRa_ZSmAhhb&U<wF<4o`^
zx#38x!h_$s!l+E^O!hjjad5uWj6bC>i7roLC{Lw$qj7(krlrN*i@nBo>!Ucu={w-a
z^~rFA?gbh9xvjaU+39lahsQJ$KiQu-k?jCQ#5YyLu~3cSH_dA89a(s>b?q(-`GwJn
z?<^IA%TB=m=ClA@!E;G-I_g9W1UE0m1tL}A3`JP@xkP+^2b2_qD*B~wt=qAoc^v10
zegMZ{$&z)VR2fccZwr4F8F6>|3UxzN=1MifCR%O)_5PAhYr<WcYSXZw-iK_r{LRPa
z&O1jg`Pm(=VW%Nwp<%f-cf_UMc{gUym+{nSH9pwt2rMSnXDPtqvgllrcKzs)O}1^%
z0o^etbYsHMElLi$r3Q2h8v?rZ7U+yTzya5OCtQz(;hK{iTx$)uW;X;}uQ}oRDT!8|
z26oC*n^GO%?R0|oTo}Bm$$_`s0B=e|fcKUYJT=8}5n-qjN(Xowo!~7DgEuxg@b($t
zjcEw*4m-h<ErWAC&uS-J%foODOAfAs23!v{1YDmu;rid?WNIEw_IAXHk`puqcgkx5
ztuQv^DYeym5N`EW9iGU~csHij>x@tL8*Mqw=Klr`P3OK$yxnr*0(DjnHVJiB4*W5$
zjm{$nr_Rdoroq7i`|+{)w9tj6YWPA=jBjdhp*MQ!gdzX@6uDzf^+DNk7`e{4zZ<tN
z!?<lu4!5TnZm%{3ZqGY#+eD5gTArW-3Y3r4db0cNdz|WU*DVX$^hZ|$xu9t%fgBeL
za^4B#+f8`_IW}LI>ytnpvcQ~g7PQ6**Rn8N!;^#SfC1OAhJfp1CtND3suMcY7?PtQ
zU*rVul`weylLPNl1HAhh0=)D2fTx0A-9-De6UaSbAV2-TaV<*TqU~SKMf(XpIG;A9
zKB9f7+}@^4)hDx<r&IbT56c$jVrr*Nkni_Oc6)NLu{JsnFuUY3lkj7$|C>A~Q|7Sv
znfy(?E#kLR`gl&+*45IHDC&Es^pR+)Q=vvm)o=V%v06{gG2!>*Da+={Au|h6=j^Dq
z)f|$QYm-!sPSZBS)9}oT@B~`5fJkjEmo+-2lP2@9AHSUPkRX3pL*nk|Knwq1$$@Ab
za^nS|9O%A3#CdEUIS`FQp16ST8R~}*&c|a5%YtYe^79Mq9=i!JmRKonOs|xO4cR|Y
zr!Pf(N38=>&+bThulmjWOy#%gGk$WWKQJ#pJ58s$;?1YVpz^-oTLzWUvQkXDFhu6`
z-zPvOKD}COypWUmEIzn+Wa2A5vExEJnd&6X+MEMF%$X2#!JaRQ)xWohg3cpd9>#H4
z5;)R%qz4TgA1W%262(s(C~l(346{R38w5ldO!!jDQ~LN<xv=t-r~dml9p~XNVicm`
zwi9h`OEKtR814hVOTcX@1`Th<-F81dxOi?$F=%`<m)okyuG&d6o<PgE%i~>Uf0b#-
zZnr11KXvSgNttt$;^^;O{f@ECLj8_~v1H_(!YpgXQ<w$$l2M-&=4A&NYSW(+u6LYp
zO%22KTyk)|V!$=MA>ex50oN3AO*!RBXIk$1oh6WxvY(ac7|XMsNCF*F_ErO(Y5Ag4
zpX_2!4mvo3MVBpvPGEoYcb2xAPGFy|pkb$4${G~1HcQWx<7RC-fqk11^rkwYsrO7F
zoxr{~2O7ROWDs+lh{Nh%b3-*R4Aq2KFUliFC8+ioP>pv&#pcoq^CE(3zXK{vYvh^#
zvNzu{w<%q1g9<Gx#quyz!;*jspI$8v8c;n{T&VDsp7_K8m47_FgbI(HHL{MSHmK;l
z-L(N!@J%>xcW@Fw(RsVa44?)T7brS!_p}2jKUG%qKWZjJ^%&I|-0DBIgh)afePGgL
zXM^~jgcYvK)(asr;>d5~lDWL|hX<PT{NY1<a6ZXgee#FV&7+e!8n=`q70&$O5hq+9
zhT(cYIk-j{aJ|<Ma6RgTOAQr~U~yT@m7WZ7>{6l;zq1^2LJ{s$C0piSdw!e5qEiZS
z6AZ+5I}yvH;oP9_b2FBs>HFL$#66LN7%se_oT6HWG`1Rk`0p%t-JxZrI1z+2eL0D&
zL49)ye0sH**qnEycGVY}TQxL>I5jl*N>4nQ1C7opP<y*zj06^+oTC{U|8g8YO?XOo
zrOeKg9O!WbxFf%Hb&$q24|R|hB&XQrhX1|b1es+c28Nf3ukO9dyQG)bIpYQy<9n&t
zU;&v{_NdONxw{d-`ya0b>KBv;4@qTud2L2{&G1*tkE6uhx9gk7#NOZTx32JOE}l{d
zv()(MScEO>rdetX<!mW78&3R`6W=`a?Z%fDs=e9#D$)2#czdS|uBC&eYTAvfyfz4)
zxEqd^BJ8g5A=>_1i`4O<6=HT6qsNoMs7mZKFnTO`jDm~6FOcq2D&B40pKd38&1;53
zw4&7s*Phtfzs*)eIuLD4*dCVX+2175M&RSC#W_QxpZ%r*l8}KFW)c$gJEL>7na&Wl
zBaF`(tVJR1MEG=u&kcA^JK<pxt4|0Ulmm|z7`78r>2wA;XmQfAg?O41u6<#+jwk3&
zgDQAVd(4SfLLq--4qkEYG{vnqP%j9>$oY>Z=TAcne2zHrNywibj>hK-+ZOc4o&NJd
z7@ot)!SkR2&!L8ZXKW5UTyC|VTw-*}lM6L#J+;VCjdrWg)DXDp4e)tPb^aR8lo*})
zuRw57dje>}=Dv#1#Mj1Hg!337q6wSn6`1(yZ6_Olm*ln+sHZ39YdkT?!`=xcN4WYr
zbUoph798ky!v0|#UWvsaK68@As20x|IJ^?ZfvL~v?_fd-;r(j4BjAga@tIEe={CX_
za*$A-B`*PArk7yBY1dsU<*Ag0i=6&VHrhvUdqSlcs<2@l$5pY|<S{Tor(@1DRJigt
zaQk~+BNMh^#F@_C5x&zCOEp+CbKM3%H?XPwO?E@1eh8Zf!`Q5h#m3?{JRu-#<{Q|o
z_zg@M*s)<gWW#2X6`QMI0GzN{R?!nM2==Y$5QUu@yd+)DYs#s5wG5+{){^^6Le#BC
zYW;<|@WRn|eDyc3zT*oLXQQEfBo^m9e30_ew+x)e;e(4kICfCGU=MOZ4)at!GjDD(
z^j8Z7qukLs>>?+WuY{rOpB$8*8c^QX5Kx|XLaEk~I6QBT6RvGxxX%4LF6GbL^UfM@
zoy7+i+w%rgnmr=zd6e?UZ*6$m3D=%5T%RTfSN}?$@qf|~a6ROJYl;`YwPBJIuJ^-m
zeV80vLk+kNHw0W`oN%eF4IoC_$qD0r;~bzk62|MjUne&}@vs5dyA1)@V>y7u^(e<U
z{pna3pLb&YDUW=F+Q~5nJo}vRuymw80~C{T;L*_v(~+rqGl9dO9(4OtZ5Xee3HsCH
z24HVE0ZV9v;+Y)4;{53Wr$3zu<Fg|<f0}H-^STqBg#2lG4m_qm$vF_!la9`<`nmn7
zUu9@OXH$ay^sE8cMkini`O~}{z~cPr%U`?l`2JyhUP;cMo-^Rt;Dje3e|jMY9@C$C
z(2{0H@_E_`*U&Irt775GJNcYvz_ro|S3=&j)B+bJpK5ux1FjF9a6J-+>xJatT4BI7
zzaijy#Q|47SB}5ugllpbu9?Zf^|ArijD~<~n-eaz|Ha{5yPR-68;0wN<lx$Bz%{KQ
z;M(JWYpSP)njLU$al$n>4A;cu;M#4#HK8Hk`oIa7+E3+xYn>CWC1JQmCI{CC23#W=
z0<PmuxYR(R1Fl6*xLygv)jv77J~iOFuOZ+%9}kyvwcK+~xORr&s{K`BqgY=WaGk;j
z7kd<INQ>xEEb14AmM!h3ePPi#{bNq(_J^T6lpJ&qwcsOK2O9#qaTe%|989fdm5Z4C
zW8c;GP&uTkM*mNFYMZB8&#8_{P>1ZP!}*kne}{<N0xFl(`cF~Ypi~CP*P_Yp*lk{D
zV)K<>S+)t#%;usNp_$Dwv9#qqv-wU7zNBw-zO>mUW}K^`V1mFCA6iI5e^;xN{j(4i
zi1MV5Q#AK?N;l_f62aeV45Xi-oor`lA1(EM<!Kh<er4ICLI@uTBmCa~#Kn}n5q`KO
zNBCWQa6U1maD*RgX^$wl|5eVq@N)x{V^-u&8Q%Y^=pHZ{R*Lgs)V3yp8a};RJYk@=
zrKqTR_)1UAa^SYiQ@ZX(yBt+c$vv~ylVPFOQ%72>)eoxY%)`)Wx_tuod&4B;X__Qc
zzu_uYaOH^q;Ml(su?Z*<a`L;`HJ<m1^zKjp(lcUWH|^m;i)k@3<u9TXOa21q1;P@q
zC9rH0gG#C=J0Q{0+lJ+nX*h_mf2#jv&s$R3#H$wX|7B!mCj1rmV5Kr*S_)hmek{q(
z@^H~^>T2&_@Yic_aVUJDQAWJ|7nlvNhU0$j5dR0Chs{??yqQYj$7m~N{N;8i?9LQ?
zK|K4Ho?!FO(KhPuZ=_P5m$^z-vLtf_K+`L`0iRYTHva`qT!eT10AC~)rf$VQiw%I1
z{_Ee11@Luw5ar+D+pq908sL3B)6*-%cV0#75Hq54x>L$4(`N<3X+tM~9QaRMaKg)$
z4e)B<8{edF&}hjXujw!dV|T-+OCmr01AKZtJyKkB!t=@?<caLtUKe`3=qXzVTua*p
z%e7Y)O7w)^OT<mkO2~sM%chiOrOTFxBPBh(Rtg1R;({xo$`|1`#RGr6)vJcHEGsg%
zfps%+rsPVm4V1UTl`wVQG$R%O9B?A%NUtTV*u4TPk@-jOa`->}AGC0Mf!9I)6_(=T
z64jDhVL5@=T+-k3vXx$H7d({w>yr}nBoB6bfPbQ4Moj-}e{j+PQ0R2$D*vZy{u4gB
z>`gSE+zb9qXK$e|i`MCHC}zsCOW+eBSIm|!h9Aq*ol&S4z`P1Bn}-+ey?3N6MKkeK
z`YP|c@W&MX&0j%#cVyTnW&Ev}k;alY=lfKq+)H8Keq3knF`{&cXd0G0Xu&;}o`gqt
zVhP2pd(&O?CqEFcr{QEuzi@BdoBrFVQ$^`HKT^f3s$20aBGxN1_N!9<FD%OsQ}V>y
z5-}NTZEcg-_+kNVlH{kAcsq@4(tprVo&VKSgOCw`!Vl~y+@w(GQWO;WAPV10DXym6
zo0ZE-MB)dQN({Ow4E{hE+>VP=u1H^6p_YOl0EJ%&K~kNL8{zr^W^3<-gKEP|;mC2i
zilx%8n{miS+)Go7b@uB^wfCmQEaumb-J9;NKlz&2FHyuJ$V$ZUV*65s_(=NFw+YAZ
zA&%`#9Lr%bJ;TwHA*4yRM}Xi07lK!aO%lNiID-A|#THLKRrC{q!tc8m8XG&Tu``nN
z%x{a648yPb7FmzY#1|67e<Te5ndx!Cz3K1R_4scJnzjuG&3`CpD4_PcH;WBe5l$7B
z+_qQ1+(5wm0-ft?fpfhnjndf~__LSE?u7rK@ugNTTdRF;nf&ww{9Dd_ZjQ_6z9Y7U
zReP>);B$kR&z<j^gwK6noMk?DHhG`(_jpKPD#QSR?QM%p_Ez|>B^(;<d0pWrXvgQ=
z8?`i%r1AF(8KWUQ*LRYy#zEqQ-`>U`v%%|dwReg5`c?95?H}K+8gCZlR;Ne$4;##a
z+&>XmP~29T+u`3IV{`TsqY5tF{Qs&FHC#8vrMIV|mvnQkZ^F7U$?d4fh1HfwZkK$?
zy-wlM)x@PBaxI}2$dw1F%X8B)e&6U*d9kGO7NqhvRQjd4O82DFGZ7kJ4(2R1y^vDV
zW<hE?TcxHIV!NN3!r!#di@WU}|3k4$#)3)?%1P$&!&odh<&FiGboFbx@lK0ZShL>m
z-gIaE$&a9!<LT<N_olyYZy0`}%6ySo<{*C6q0AN{bq>2zDgS3!K*-)G?>JHH_|--X
zmGP?`xp;f-;?Fwb*S{-d4C$L}2JtI~O$#z17ynO!27I)mbn#y3;@7cg1!Ou}`P)>H
zyEhz}zfxpUv^rVbXmyFA@*70urTJ3%JB3RR6PF-b4P#AFLR9|Ir}A=1<()|7-59OD
z6GSUfwjTx-cJTy6WU|<QmmOz}PCD>p!~|C!@-=Zt#-0jZ#r=J$_b>~Gjl51*%F8HH
zJ=oZhCCspfbr%aWpY{!U50{D&OvN7*Tg5HJ1gYYxOm-5$Q=Rp9BqK2Pn0$r8rBzb6
zP@B|4#j<Y`3Q(ffa$xRnk>D%wpAP9OSQjz-Fi!vcvQ`fNq{dLsb)na6Fj%It9lh?b
z8ud1>75r9`t)Wr2A86*q5XhY)yNx&+*HU~YiBDy4sKvV-8cjS^1d_AY`{JllBQXt~
zTf9?Hd*eqatT4XrA~A*0zCB;s^Iw1&<R|#LM&db1d*e)YB>Y;&6+gs6JASX7fl3nt
zm8%pg&=-tD#gcc*L8ET)nv~eb^3J1uVI-4a>PRqwNx4*iSIj-T)An(^gQcY!HLJ+d
zKv=pgI~pu9(0!4kvn836(i>7RX6FSL+8pnjjT+gfcUFke5-Vem%8PziBqP?b^kb>p
z9V`tNZ^{woDzQYQ|4ujMi=5uT)^E+^_sY-?Gt1pErZ<#A6|gG;Fk!aZhz9|H#x9c9
z3YsjB0x$ZRS)HKCz4u4;Mf}XHk+@IH^!p+k`eNFHShYv2zK7$@Wtsi(ZzqfraH2pJ
z{0A;^g+cVuzUi*fNgF<@r5G!52y1?!)I4U(b*htP^bzVL!=0|eMPeBf{{ko71&uzM
zbT<+&Yw;u9hWL;6jgI*wehv>8zX?x}z7?hhKRDvePWa-Egb)0rRF7?bcjOs0U+D+*
zBYm^CS_6887)Rv31m8>Fq2sw&&=&(&ovF=R$;&0Rl*cW|FOMr+#~Vx?vkdo$gV@#_
znk<I~Te><!lc(3W7ZMs^qfEx~PRJ?<m*z6kmsZKvJOvk`$-f9_25v>FZCyo#I4d3%
z{i&VDyT|ZP>EQmc=Zi%(yr!vGgWW?pV^)&x2Gcw;aez6ZKgh&W_?>1^0kb0#RcM`X
zY|Ln#8i|kkLa({(F0auqyr#XpvY&Z9#7NN-s<!N}Gre!X3?g8V5s%&*xqX!cQz52H
zM=+YKX{^cG!<wvT5^A!xu^2YB*fFeB?6tc3o9-rypfQ@PX{^br12mQ_pEX(I7|!!q
zlO?eN>dP`XW~x0uvdMZQrpcN?Q7x3FPGL<}9qN>)crOyKGrA`io9;&9ZCOpoS3^zK
z8;;1<PH=d5lV!paG+BkIA)?8W_yD=?CTmAu+b9w?d=+9Qv76IoO=f|t4&3J5X6<0y
zPU;IY-g(W2E-pCz+-KGD4hOldIXbM_np{-RZ!aV|QoPvv=!8cNux3l$;RI_xsM*>j
z@dKNbcvkH6bCvCVqo*vX6`QSFu|?E|-RY+*j~9!2C}C5vleJ;c01KH>LmT$T+%~Kk
zp5{=N9q%<=LJ30lAa$@#QsNE$tFOTmOES2iLac@Ofc~6<M6RdzU9qMw8~C-XNkIE7
zG^*G!G1{>qELhjF`1+8$9m_MYH5EsVVcumi0$m#};NYSbuu2?b>Kv4B<Sjg>JZ*}T
z4G=ALH?#yzHq&1_FB~Wv2Tjk&;WP+bli55~f7Lhaq{rqPiMh-X#<PYNu=-Z0Z>U9>
z5lj2p#(gRLQoq=6zd><Th#nEl)BUQ%amg<A7Y4ce3vbGq-!D_4QJ^cmJp27;Om10!
zVTm*F5-vs#W#SSvs4=p%v`L``wLJj?!NBY-l)+!%XwXYrc&Q~Y=6kIi52Wn+q=#2#
zq_US$WLiQYC>!(_XZLlD`fLv?`|Le9fzwC~D8)gpFYl@Ev@<$cl{m-r{MkK;FLf|G
z|D|GJsWH~WIG^q0obWI>KYI_3^&=iQ6dHqvWzY|pPJ1-yw?YJy%|rXbVng@@A!<($
z!A}PFfsAMO#V(a?1yB}RY^-uhb~EMjyc7-|G{)vz{v=0w<<*U8{GbVbAb%1#I@%PM
zF7OeqGYPl>EEn>}fVzn<MEbH6t!9MfhuGsT$;^@~p;8CQnZ=!1>>smkWYcsvSgHl1
z-`UReO@H`Xu8RC7c|9G+0At}AEiu78N^&Ud_WSRF`3_hnYoG7PJGzZ14HlT)3Coh2
ziesRnywb~1l(Qb_n(OfsOpj+R7+~QV-6SlG7AzhMORunN;OARdV|DJHEUpwKHwSdW
ztrfXJFu0xlsap05Yh|<ODWhOT@$K>nnan8o11Ed9tAZ8cM40T|7=a2QTMuIISJkN!
z6On8vP-cdjl-}*+nvG=Vzpyfw>nBaFS#%rjjBbR45#4sjy1j*JIRJo+HWKeMst?_x
z2YlQ6&=>(t2|s-aZP>vu5IsZ$Zfgz^{Xpy&{b31m5Aoq&u7p7%9JhnKdnz1yjO9p)
zGXmBFiwBIn+wS>U2uPv4`!yN$Xf$vRHh?tSCbqkqK}%@J^_HgMsZzeYVnAQpYGABT
zh4?aDz|BQ1V3l~fG*}}1YVq6Y=d~x}#H=wV)88fP)(RRUYQ;iRr@Y%=6VnJfOMxTo
zyGKjEJz^D(WV#8;i2i-k{}A0^6T4r9*#*SGsq5^1mi$t?pH^g*c&0R%aocpyPDh)R
zNSvCh*$vIO-Pbqr&_-$_S_^{4U^iK4g7d+3vjos6GP9T-=^3^l_4daJ{+v5b&;*kd
z70CRVSecf?Ol&iFSjhp3P>_~G_`*MkJ%71XhA=hGZO_pw#e)(>qkp!N`N=XCrq;T{
zRGxlPE#}J%h89_^D1H=JX8H+h&uZ_nPt~<i$QbQefyh|$c2E$>v5`2-^7gHM5U3%j
zfKqDYI%EC{N4cUm;AIXi;Qw^?U*KPs)Rw_SJQ}3RvX|rMxb8ob*)Mx?C7C5M(;@eH
z-s=vZf%^!^n44xkqBJSnO@6(RxC;l?(=y|SrcZ6eDa-EH&iHHjU|1G_Ps3#am_wmM
zrZP{LmeN`e@fq%|X{7cOr86%;jfwcPzlR+MBHg7mnBuacv^akOWRh5=6g0kBy&-4a
z@2&(3YJ!(En<U4^N?!54j$g@akei!IGPo~G)>#bv17ehx>+^-Iz?DJF+<K2?b`!tb
zQc!adp1OYYo?w1+57WalZaw7DY~y<`61$ilCf)-nePPLb9zz!RKFFw~AfhG$w%!vR
zD)4*l41D-Rz@zsBlbm(H$Cdg~LB}z$=gZU&|K5=RgP+PX*sym+#@Py%7UZBX;))+I
zWn38JyQGR~zV9Bg`R?#PkndLczI!AI-yQ4n-4$-%T_rgU`|eQYyB{++A5O@3KV-i9
zKm+*hNvrR!aQZIChx_hO=DT&kC*F5wFt8^v-<5#H`fg~bqFni|E2`7vXFa3(FQII5
zI-N&#dUBwQjW%V>2q|N863Uq8QpPm5GWJ=Nas2Kesz2lfIe6{e&N0cnqxv8(SW12j
zFJmckR3GVKe={noPjf1R<HMD4{O%yC*8!h+Wo%%|xQ{7A0#<irRQgeU4}95!Ob3r(
zNI|B9jYUbeGK+(GnZLq6xyj5OxEovG=6KPZlBRSS8pB4c9&+C~=1U#n52?&fuN<~a
z<6iWV0qRe;(+e_pz#lUc;h)^cm%-KtN=wQxttrV)fqzriO3QTD3fM!A$BM&x*p+wV
z02UUvRIK!H=+w*5+AQ49oX!}=E12;rZA=MLW>?sJnKZ}zsZ^}-&GB8S3<8$(^q1Jv
zU)JW*IHD*oGUE+##*3KoD{;mPneoOr;|0ukQ=IV&%=pzf<N3^Zvtb;8^jv1V#W3C-
z#(6d~-X3Q>gBiaOXZ$QP-enj^F!&5J-W_NBG&9~~7)PM-Br|@?Fpen36U_K+!#Ki4
zCo|*s4C9DmJkE?iFpMLfJb@V>GK?b_9LJ20#2Jrf#>eA~M>6A24CB3FM|zMM*BZwA
z!o~xb@fU`11a$W?<F5?kh+=&H=OBa)lzm1W!Xj|~j2RDM#wD5E7!*bApYgN~8KcB7
z`FR@7nZR-Ik__}#v0h@<Uwc6IEqkTnK<Dhv(xzU^GO_LFuznCWV!%gjGh%M{o%EDH
zg`s%szZQcL#oQ;q787&#GUKP>jQ23(r{j!wGvjCCjCV2PXXA|DWX99ujCV5Q=i-dt
zV8%0;aT)fvn;4B%U|KH7^a6X!wu3!)CVOrw6A?0AXU4PQjJGr6*>T3(nDLxA<E_m2
z`8eY(%y@2`@oUU@UYzk}W;{R6_*G{7LY(m?X1pNIcq21j7-#$nGhP&Dynz|N7-#%4
zGkz(~cs(;-9A~_a883-5UdxP^#u=|+#><#-gs1}3azUmfs!G&f3qyZ_X_YdqmilXp
zrtK`IHOjPuH0_F}-7KcvmFd@{X%95L%wl?lGX17B{Vtk*&tm$0W%_s0^dHdlA1$Uo
zQl|ebO@D%>S6NJds!V?-O(TQKYDU6xMna_VDrUTb8AmV{n3fANwK5i%Rw+{}V}WUn
zGPN=mn08mDR>lI;E0ifQ7Ky=1hQUfV27zh0AX6&_foYX8wPFyM)+kde27zgJWopGB
zFug*VIxtwlFj(crATTW#WNO7AFs)LiRty5u8f9w5ATaH&OsyCMrdKFat8xR=?<-TQ
zas$&JDO0O*1Jj=>Q&R5#L6QH5A@acVKb7f!DUnCywaXY^YutPVrsaZ6t$YQhRm#-L
zS72JBOs#wcrrnjPm9N0`3T0~LD=_`OGPUv*nEpta5?_(gU@1d(EkicacnLFJ$BZMw
zXJA?`$dqV^eC}fQ-1Y9~2BzhLOs(_=rd7(+N^f9VqfCk3$kM*VO8c_Aw1H{4AX95;
z1Jf#HYAtPGTBA%IrG1f=c7waLfoZuQQwL{@*mGZTKQ}Nf7i8*q?n3t5jqc|LrsaZ6
zt>^}(Rm#+gZeUuYOdaShV5QyUE^T01F38kc+Q76*nOaL5nARv$M`>SRrG3?1+Q772
zkg2t_foYX8rP4-<XFe;!W_J++({e$k4!4=dp8FbmZe$_mGUF}GI3kV(rsaZ69ff$F
zJ$I}7xq)f9AX6*8foYX8wR%HfTBA%I-Y|!ib{i{gBxJLh@pfh$0a;*LF38jY*(~<l
z*V%I;3o(-!?}#&=!HnOCGk%U4?~F5^&Wzt=#t~QqrsaZ6tuzOwRm#*#b6{GdOdT{o
z%SyY8l{OOLXPEKsIOC_8@t!#2r<n2HIO8Xo@mq1mPcY-R<BX><<9%_)Q<?EQ%s7JR
zz_eVDsg>x!v`U#e;^h=pgm+mHBH5eFjNjA7rDc$N!;ICAUOT<^y<p+ct+2S~L+>YW
zP)x^OUZel(wS~`9k9n8E2d%G?r}~t@yjiOZtd#BVee+7MEi8@B{1!)E#guNrmV&gA
z&QD^meqX;D9~?Ch_&78EAkO$PX1qVn_)%tjfEh<H7MPX`GPN=mm{uuM2V)ai5e~YG
z5SW$=GId1C3GBIt*mENh9?y&q#~DAuj6aMs9><K2#2Jre#z&cP1Ra5Cxgb*q7Gv0R
zkGY>4n3fANbv$=8d+u@e+(?WcX2u^e<A}-yrsaZ69fcUhp8GL-Ze$@wGUHF;j7Koz
zPveY-GvgC+#>1HL$vESOnDMDN;|G~>ZJhB?W_&u%_yJ~oCeC;WGyW{jcrY_Q8)rO-
z8J~+YzMmPNk24;~j6aVv9>9#hh%@fbjK7RCzK<Dy6=&R!8TY#+W{&Tx{|qJs?~613
zk{S1pGyZ}Z4`9X-+73+11(`au{W*K?K=#~7ZJ%ex_s1EZW5$D+aYTs&({e$kjuM|`
z&mGL38(HGdnDLM}<1@_ofjHyS%y?*=aV;}`FwXcCGk%B}M*tU?mJ2d<0C$o-cbNOR
zfoZuQQ^#{pu;&hUKQ}Nf7i8*q?x*azBizpoOv?qCI-dIpd+tc~+(-xdm>G{^#t~!$
zrsaZ69fkOaJ@;Yva|6?IL8kQFh^e~3v`U#e^1|b+2&3Ia2u#ZbnL4U=j6HV@du}AY
zN15^1IO8MCcpNj1AR{m>7i4M$EikQ8rq+ILU|OS0sh=A$zZsZzSEklc_rUZDWojLD
z4@|$WOlj0T659_MT#qogB8?9-<MDCEhnVq%IOBuNcw(IK0cQMYobi5U{8*gv2h8~K
zIOF%3@uWE8_n7hIIOBJj@sv2@cbM_iIOBcHcv_tC+sychIODgN@sn}JdztZ5amIU?
z@zZg}yP5GbamKrt@w3c0B31;Z<$_GDu_7?7Ql^do`z9;GbaxQ~({e$k)*=L^Rm#*+
zgq^Gi&$){bn3fANwH6^Ttx~3rBD}$hFvDGhz!X+`NmD99WWd_No;#C0H`4fZW;}}-
zM|eYES}w@cQHbsAxwF}GBMY&O8P8$H5rqg$%LSQQ$p}oVl&OP^t*i*ovm!*6cMCI~
z8)y95|6}h>;H#>x#s728%04-{Bq1b$FoPh<paI9K4Gx_RIK0<B4X9O$1gR(pVN?PJ
z$J!yFKJ8#oJLqcxhpJV;v9$%%T5AP|YHf?wPS%1}YwPR(yVgE8_lD#KwfNd!|CNuq
z_nx!%wD#I-uf3+72Yl<UfFH>N{&pU47nsO_mWc-~h>09%pA@tkOgx;|h1<ICLJqV{
zJZQl#<UsqRpxt!gp}a2K-hCHxpk?AgyCKTWdG+3rS8tb-yeSX(J9)reOhpc~Ogv~e
zo!FRH@15P(n*%Kq51OsF3zz8!^XgrnS8tbt`(7UKhCJXd2A2aZ6AxN&l5?PaQcyq1
zT{y($K!>ED1qH?&=*Sebpum^|-7f_#C@|(g4@*G{x@qJ<KbV3RbgRjMPE0`yy4B=B
z|0@OcZZ#*V2hPbIRQ5U0Po|)!ng`D53OcO-==2oy3<Et&LC-D#Iy(hD$3W*R=z;>E
zi&D_V2D)@hAVtf<E@7ppB{X0uS{?!|MVEvMerX}_%d+5~_24TEd}Sf<E3)8Mdhjn9
z_?HWTUy}uI^Wa}K@UImD|3()41`od4z}FN4zbOlTvj@N3!0#voerFc^E)RZ>f!|vQ
z{QfNX10MVb2EMTn_~tD5LmvD`2L4zf@W->@Kk?vC8TiwMz<-(rf7XNl!oYu72>gXC
z_=_I=w+8;ZLf|iF!T;dFUp4U83W2|#1%JbXzh&V6TL^qvlv3pR#5_QY7ez{u7k4y}
zWmg-dng#!if)_B6eK(Jl-krxvyX3nM<N@E42iyhU&4HGQ2QA2VbD(`v(1Lt72RbAL
zEy#Cspd(Yzf_ygzx?c+F`EHjuxIYgE_jZqi94M~623imYIZ#}C4YV5p<G#Eu+}C{<
za-ewD=5?XVq1~HT@BQ7^n*+sLHLu<-lvg=Wyj2afo1xv4*M$eV??Mi=Ogv~o?B+oG
zq@dk&;qJUHd^fKPU7~D59`N^e1-w2F_`y8jE|yOY6t_kL^@HoO6YKKo{eE7(T>{^g
z2mFIP;4V6m1I4RwSDm;suilN_*P8>y|1YoJE{G=w+9w4q$QyE?LsHOgoK?P)*V|2b
zz3mcZcjN(Y&I9fOWjRn>1-l<w4ir~G11&hT94M}W23nBq<v>TKpat1p4s^d1w4k_|
z13fGS?M6R&dmg$T>K<J=P<$2h(ADKM+?H4G!+G^~3H<Fm;79U+yO^^aDE@!D>cp*i
z_5LthZyJA?Tu_v;6gHu3Z%~V&4?+A=3899WiR4eciOaaNpU0%XuXEfERqHaO(TRV>
z`}W_|g*VM-=aN(>BRxWH5o?n_o!81n&Q0X}CCkrIJ|cbN?)-I3H+6P=*WIs<J?jqg
z?Z103>g=xdj_tkuU++boZ@=}o581Jw9m^e~>D67FQdS2@DT|KLc-!{39ZUXS|LV}U
zomNE0cmLPFy2C`rzPx?Nj^#T3>R3z1cO8Fq2ifu6Kku)OBkK6;?e}{3l<Qba$6p=$
z((zq)zdDxd`0MSL>j?R7Yw51s+kf|NAv>0P?|*f~S$DtQ`@X!}77F${QkL#CEhh5A
zVY$8j?|NRlo9ZdlpF;H)dS0lWcl!LD*8fi73-xDr)l;bc-Szw*)z3oh{`>j6Ti5?=
zcq>Go|7`s%)Sv&?`(J2W|9<Y**T3=bZ#?A7QU9ig-AfOJ_-CQ#?-c)q>iI`KFVz1+
z{V4?h&-VPi?th{B3q60Q_$gFRq34C_F9=`5y779n4(G2z^%Qzuu>Jz)vO@K|*Ut;}
z^PlZ`Av_f7XCe51)bm39EcE=)e-ND%>d!mHTcLUiJ%6Y57lQv+t^fb^aTS`c|JR=v
znwLV)-|2V@)$>lDzt{Zao%ZMNfnTRvbYp7amFnm)pSN4&7w6R`^WKwx3zB(KmNdO5
zb-y<~{&%f^9|k5Q`tLaXD-SM7YGp&z_D?tczLWM>Ze5zxMwuWCi%Nr%sN%Qvf&M37
z3PV?!+3FJcc3AH!R=I?Ge|pB_U4=Z-oNRPG*ShjaD_rRWm$}IGly8Qsi^^O^ezT;W
z9L+wd^B;qH5<UAHdjGe1oh#?c?@7GvDHl?C#R>XS{?gpm{-UJNi);ztOWG`X^vlgH
z)?QgdQBOBWz8ADPH__g#S=vGyLs;vfT*;NceYuvX(hjwg?EP<=H}bxhT+O>4WGvAu
zRVAM4`f3U8Bw`c>Ez6d+($zlNfRh25db&cM4_d5}r<)sWAUB2$ejw%C#-)!+#2$U6
z6IA(fS1ISLHtr+xl%d8h29VD+Z=+zb+(up2ZOx?8vCo^En*iw{chYC;@8`1JO@}5G
zt%m3@UhS`Bif=u|m5`pLXRG!kmYbVfsi8)m4nb5h$(1kXd5t{Qtm7(Wj_d6Hrh)NE
z&RyV!D=zl6>jr`~uKy-Cnht%S-u0|=HSq*j!LI=%d>KZ*?gQ%GAzYqVAOF|>R^#`W
z7?O0${nS{#qo{|hu)ceWc!eR_pii7^v-dUYRfT*lY;}FsxatY=$<~y3&QkXn?plcD
zH$l=C4NwB@34xA#IVI3*prIa_S7}t_wdMxrxDhBB&t#RzWnmMKi{;{_GzZa*<*A;}
zLfH{`o*zg_V^o~2b6K{|FVIMt>zR?GTHA}<7jT_a4<(7Q*4A<=&1>tepur8)(e#vc
zOPeSq@=(wex$J^^K3n&fBkgY$>!`t1#*?Y5TrO_b)sm6F8v6a+q+vYH$RN1f#&A<}
zy{lmi;|*a`S*1oop|nPIQ1K^QAkM+9cB5ACg2C)q(wGOt+{kGST9pn#zQvt!THQQa
zKUOO%kxQd`*W(P2Ao@12KE0G{lIt^Hr&@a&%l0LDxrvNx@V%+9j}={YN>3nx%&uMI
z%HR*W=xXFj=uFwe8A`|Ma^m~17gj_vsaA$3ZE||j6!lfKB~0_sSJG)-T@<=f&{iG|
z@9U}{yb5?wzqqL=<9cy%=hAvE5$hp83+i~$;QFzqwf0z6EUokOyL`Ge_uHVw&?+Z*
z0%PS{M`J15=F7+>OZ^N4%jda%^5-CFb^Rx~>dD%atX$gS262O0@1~+8mREw+_OumZ
z7{r>>r}R#?2e3JA5TT!GKi+CiD_ybNUq>r%1g&n+cf1%Py;`G?F&`D;U=}Qc@rL1F
z%FFm0@+9E@aO}LfbH>h`HFe6&vD0VGnm%*d*fVCGHErya8H=Yh%{_C`*tv72j-5Gk
z{#j#Z&zbe<X;Yi#j-5K|tl2YWPMfpuoTjleXMAexl(|#R4g&pqYU7kMr=2>tY0A_y
zYtPWy?cvNBXU|($J9Vl)2y}^Edz5|&<N}shY34~sl7COo5pPfXV>&b6XDyA)gf4g$
zw>G_9W$1S2>`U2pSCZ-P1}NgAaW$Ty%1IEAgYIlP_Tn<*IN6frwUfAY6*M}SN)H*e
z{YlpkJ_KO2!{g~>T<nf@L#aIjYWqV1`Wf;W9E7Lc_^2l+>qnoXpo+hpkQfAC%;?Jl
z6r=AEhIh|IjjgR6%S_E^nz4A=oKu;~vu1TWd1p<T32~g7KZm*EHq%XWrc9m2bVuoQ
z?>zlYXUv&4<#bOExuU*hUu@Sz(Yie)iao<3aU(KvXg7E8CGxRcPOx`-&3Fq=nLVM9
zAil}2Vcx-Z*W!lNSvIn;mg@iq*<0z8yRote2KEuX)q1oX7SU7veYD<e+E>nu#&ECr
zCMSP3f{QkJkI&>qKb-x$9nNvPb2vBd*5OF`_AUoBrrQCXusa7N^ZZ5Gf7c9UqWyc3
zh^_@k+T=RD&3(E~jEqQHy1CB&^uK=?>ig@-Os02z-}f#FjT`LS2`z)m<-wuNNce&q
zrW7g*;fn{u&E8+{_KPRDp7Z=QPpe!%ef@dQ2REnm_^hYLzU)o=$|suK`;p2&+6MEK
zecIC7R1%r7TgUyWcR79e<JJW>He|W&z0qZU>%y&WoGec2xf{Dy{v5SsZ{lXaY~7bE
zVJFbBuYv1d;s;#X?SSX))&ZZe8wQ+_!8*|rZ&!*v!UmCC)y<qO-mOFUkKHna;hl%D
zvhWbjx7pMDTkn>q=PkC0ZM$HDxf}Y7tGA?Feeb-*rt3D8kva%H&;-d?=A%A_s-j`N
zO$wCE3NL|7Mttev?}?kZ!v%LE(NwwMHBWm}xofxZVjH|$CvMHs-7xTsVA3|~gS9Bb
z^y4_CTb{q?-#?Dro-8*keU}uNzb$9vks#V%SCQxYei#ki4rA1A9me&$XBa>3Fbq#z
zau_>vrpm<sZY9@lUw$5OHwxze%ZatKXU&*9>+Bh4PfyP!Il**F)2y>*Og(k#%qeG2
zKXu_D2j@w)XHA(sTZ`n5Ho0o-S+16R@izDL9m~tRmW)p;nxJQ?K>Dd<T%3`*&>z6r
ztwZ?`$ETJ4d8ytLo{@P5CS|sp%Us^iu=me(c-v^dcw4+b!o?_A7B!;9`CuCr^lWv#
zIj8Pt-1sOrj?d`hUWU1yQeF{rd$V4iW`d186t=cMzoR+P3)e;|yOg)W?A@uMT!31q
zD81DCUKE0~)ABaj`A%LtoATPZvpLZlyEQZ=NRBvhjXPw5t31~gpYHa%z%Ks|A3`=T
zwVnJfYC$39??XpAD6g}FWiTD}Q;Qo~$DQU!QdOl~xP*(vPjV&8UG)X7WRg2zEk{hf
z-PH6Au}tT}IOj&#&@$Ux@u_G=7AYbXC6UZd8j^|0#3JMmu{)(*%w>1_DwfyI(|2Ri
zD6k&%U8k~)vP{@e{UK%9DN*#v1@!ry%*O+Hc)ABXIgbQ+mih2l`hpwwE$yqE!3;T0
z$paeQP^3o1Q5a9Qxs;Q2a6fz1eV9)f?dvcU=nx4(Yh3M%))grF^Ufu~_;io^Xbej2
zeN!CtTIXxeT-Qz(*P-0&p8Qhc>)Do4-Z`y0$#QGRmGG+aG}_b|W}r&Hfn2pj$+1*c
zp&)yQb!%rTw$s1>xdnpt#6o46SRQSz<9Vsvj?y}E2g~J9jss7zCxuvEinh_b5<n53
z5Wu4-cTs^@pB`5AcV#A7&%Bq*WL+c3wN0*cEt=J%?IBwfPfKKUeNi!|l=7~Hb=Am4
zGFLaOfTj}E>(apXzXQeTPN&(D_S0<c=G}y5KhjcEVyD@CQJeAmXoy2;HkJW;FR>wX
z@(Knv)Vlscv@ur3QWDFmB@K4|2;=sJ9n0ta%I=1Pbe=**GYcX@3%?azSIHSJnc$%g
zcZ0RnNs4sT?Jb@cabWQWs#(myd^A9wW7V~DEYFqZw`H5E%Th}^qmdiPx@fZFQ8d@<
zlT;aGXq-%Aem!3oCPP8*Pnwf@=ypf4Pda@AlO|USY?#^P{O#AR+#;C_vx&QHY4$qg
z5ZS}4pc|9|t#Z`IQ0hCR!&XBO)8a^<fG%9g`NpvE-BqK#E0?>inv8tc7_|L%WfKg|
z51{&1dVC!ORZ7Kbrd~}@4V<u)%cG+F{t_lh83)R<@*|xb?mozrNF(Z0)V)cSKYhxy
zh7BI?H@QA9sG&q_isiwmma1B)sydbpOX?<i?SfR-!J5ul&Gp0RO?642Nm0yC)cL@y
zqjZ^0z|~8WhQc%O*}NHet@8{#^v^Z}?K86T46H(%Tq&hZu3~~KAFp^G>G50{Pq10J
z!p^;Vp=w1ezgHHdR4KC>*QSU!s8wj^_lcEHRmSoQpp?!dDXRso3l!f^Wbu97(sq15
z8|2~pj-}g~gh3g36@~5aMXvX3H)x_8dN@^P(0Rk$(S>K2zdFte54j|>-e`0mCEQ&I
z6U8#o<On%XiK|j3(5%w;m|d+Bpo`@gU-p2U_;D=dVc5FZWCu1OHaM*>mfIm`dee=_
zk>wU0_Lfq4ki7*Y-SRHV!_W?8xo0s(m-m)0fv&xDr45o_1oipv#>khUT;+6!*4uRL
z5zEK6tEo-deE2ynW8?Z|(2`%SM7~O$YJ7~2PI(zq!B2@#^1F}J+GK|EiH*$pnQ*lt
zxj#rY(jmVgou25ynF;=WuqD50<Nk8zQq33>875Pe=!1D(_k^IJ8M$MrMx&R<x+C>e
zujpj8{;t!XKF(|_8X1*1Y3#nP8oQ@+V@KrBl;75d?%GPt0$U-?T0Ow_@l77q#qd7V
zX>^Bn-QWkh8Cza^x(NQ{n>!HMD{uBaI_o>NhXphXl|R6b?bKd*qM};QMi=uD#9LiB
z0c6hbO)<sol`yZqspC5}wQtu=9lOh>u#+SUd{ZO4YU+eeP3_%vQ=fQyO}So}YDzb`
zK@*H$h4VbAUTDvYk=sk->8^M_tEG(1z{z5N@pJg+l%xl2*X=kXzh0u8&!gk&NOenk
zM|8Ry&tP>9T;XJ%D}B|CX0^b1?!@fZJ=^NUC`_!_Ml+Q%w%%1OWNk0z^I48(r79a~
z>yg~MR7E<!;JzEM;;et$^7?np&iy+**T2oXuYXd~L=WeA-RV`x#eR2SXMe4k-GLpa
zz0d3N`pm|;5$q=~n4I&>lBBX7vEPVNLC2LDX`-hzKLp$d4$`qVx{vaKgwR)CH>yY*
z%Vo{zmb%i4=`I$@>tP46_9shrCD-PxG$z+J?JU<K%RwjCM(tuPS}i0J?GYXV>%c-?
z3?axlP1uG_!u|GAEa%w`<bUxHc!zZ11vlN_{l@%jd^WP1)5c)CZcdQ#v2MU@+ovM3
zf5=fBVnACTx?;O(yu%k(8Q7>+{)oTJJM9QvI7luEw^2z(K4X(QK>h?1x0CJsltQPd
zeHB@}O-k)*bOAKkPFYX1e+{D1Ar!F6FM#BL72d#brHL7JDrM+c2gmYZPg`9)IxGv-
zFP4vbsP;$Zm%^Uc>g~Q%mhO15T*D5RWsx*dB0tAzVz|;&Qr#I^_t*~5+kjTm3(v?_
zDOaJF%fh&!ld06IlWicyKoMyM#K!TALaxYzw;uc^X!M>mM`t_Wu^7wDb;+tM7gCI6
z&;);Qy30(n8D%r7@%p<z`H$DlspC+L*U}h87?=NqE-ngIbY+F!Jr?5bO!>2C&7F}o
z<(q9k-?+PLW!17lsj_M~lA>ZflT3hLlo$@*3|crV|EC^=N6YO<w`CJKOvAp5S!W82
zW?e6a3U1+Is0O!31NtEu++-?+P(H;I8N3eq!e|8)M-tD*H#j$aWip#)4wgOja2m=K
z4-(1=8`GH>2P;)R9A+l)*Cmtnd%VpSK!DLq-$i5eVX{HLGW|yNaq^lQoVk{MHm37?
zqJI&M>Emi}ZD6$;{Q2qQJ^P^8ndpYE_v>~!FJlAY_-c|j3tk%wymwUiIp12;pP4^w
zZ=Lx+|I6uO=S-VDYfe)qMeJGgnx-xE+Si;0wQIequi@0wr%jzT2mS4wLOR=AJ)Uk)
zoilCvjJZv77UdPlr|2AfmzrC1jY!*LR%}7!c|YVz=j)Vr9#Q_goW6PIvcA-=SJ-_+
z(iOI!>Um-3=Z7uOe6gIrRE7AA4ACXfUp~dU8a&?=6yaJYYur9cbUWOr<&bPsyw)AW
zHdmx4@(ZOYW!(c~dF#A-tyW$SbQ82yA<LJV<cT&bT@$iAT&JQyRjP7HD&1IBa%8lL
zm{UNjtH3Tc*Huhn;WS~DmTKG_S7?p$KIQ8bavR;%pAQGM80yd}j+AZ9y4h%(q5LFB
zwLHn;yxU7wEmgKrZH#{iJqKQ$x9X&%Ru-rShA;LkX9_2qf@rv|wczrVv_+<9JXG63
z9u2gs75ZA%F5SXbi~OjS+g=#U<7^gw!@F3IeyI5}as^&uDS-Sho?BVX@@-2|uF5HG
zUWqEhF~G~GJPSgk0L-hRu3K6wXX@i(`DL)tHzd9Ey&mlXxf$0m^J<#JPb`eC2Pc2N
zhH+Aj%~2~+VL+AYj^iUA#amK+-eS{7`=p*lrn%uQb8tu#weKZIDr!pQ{$PuChVkNU
zCXI;+IP!-;d9U)rFq!Q7Yr#prb2F`Dkt{+<kR0cVlTj&3daT2_@k!Tbn(Hyq$pkkl
zK{0_zl*34}fg?h1HnU{9ljX>GebF{3UbW&-Rz*0&Y7^4b=K7r9xx7JTFwJ@7*RJzZ
zs#Mnh8Lwzt<;Fgp?i9&tH&$zllX?EhiIe<;HU8%blf5ju$+n>=PPk>TAw4-g#Qb}S
z?0qdz;GIy1mrU<qhdDj96Ud(R$#PV`4+d!ZwWaB<;sTVs>s+-;ImO5?^mR|(h@fih
zquqy+jO@@rn;yeUvg5Y-Mw*gi*$kuPBk2hHAy2CZA$#O<muM5qjntnyuhotG3~hLl
z{;Duti8T<&vL&c*5lJHyYO)s1j;!bo)2j{B8<>w|{0@AE*#wj17COJ$jf*eRMI?`$
zr*q_+`Zjh8zlfzLYr2YKmc58qAWZM8J(M4x#|skvX&Ib?Wed3M`6X9*EgiB&HX=R+
z57mvXSXY^0Io^~u5S;4Kl-Nnjs#~C{8z(m{MfB_^&)|fu7!A=eQ#jT`N`x__M=MR%
zyL}r>)Y9#`A0s$aA+42In)H-+fK^$BCoqC*15SRBcVCp0rPp&ZD``V%*~XaGy5STa
zhJbBL?)|B7GG7ObG+9pHYfxnF&EWTH2U<*jjJwINmZ+7#f@PV37L>EKxQg|(zNcJ*
zL#@PZadNmH=L@KXH~MvjTOz6bS{>0NUd;VgN-<t43E~ATvhg<kE$G!pPL6Biuf=sZ
zFE6g=Ly;8AGFGlyOZo_>FV)`oS-T-pY<Y&ZWeH{#dhT>j7!u2yNJ(;qbI(9QqvYZ9
zlpT1|zkzLo`8Sl^)U4?9&u>;5@aDVzxMQBLRX+|!OdfMtHptu$)a}Dl`1E9GoKq*o
za!|6)js{TnbZvmb2?v>CJ_p4Yr!=L_D=uJGe?C`_$r%U~Tz0u}D5ZWePqF(;df7hc
zKb&V<33Y|CLTZaLDKhRqZzFTwfL?kN15k{B#QUU9`m6({Ty8#(g+CZ#DA@>|OyYfo
ztQU5JPrH&&ICl|1z2p{FBl%X{{#yM;-RC@}==S5%07ze1dtS0lw-MjKkEBr+a-2jl
zG1S$p@rr61(a8$pZz<B{lJk<>p7o}1lx9}-y=*(rOd)<|uMx$upV{ZnYsqa@`kJwP
zOWib-?O4_8jT3y^*7I<Alv=&!5ONq8uQCwBc`UD<s~c^xv=+4(V(3yVe>qpN*k3L$
zf&hA=@We1GKf<2FofFM1uE$CzlZZnZYXi7xNz&9#mtQWbcVneV*M;#jFp*Lg1bD9Q
zd;R2+qAgUq0bg`gsWwpQI?l1WiD;!0bdL(*bBdgo&#h~(x|OO`=NKjr+W?rH_yso_
zYo**lU46cg_G>)r(V4C`In&7_t`e=O+{E^yWUql0S@OO?9p$wud5opuxSMvl@$3k(
zNb~^gcb{?5$kh0i*`d&$%eEM9t7TbHHdw+leze|3zh62JWqp_QtE%6}(U13#tC}H&
zT`_I8S3@^U`wwc{tVk|k_27vQC)V}<Whl3ONH0RNMqF}*8sEFwc-ao}YW`DbYTrI`
z)p?074o`Exqc#%-RClUJ8*AT?KiSo8W<jZ`Q`Hd_a#=o2`ooR9uDy|ITT(=}hWI|&
z;Z8M2>57*-P}hw-^|1rmTYV7DlF|$>v!Z7xo1NbygOgDi)w_dQ-G}SJ1n0HSTGyUf
z%+54IdnbQfY@VDA?xU<$b!W6ONvm5|)B|!^zgUHx<((~~NxNk<r$N%#kB{iCoBH|c
zZ}amNxywc4!jxv(xf^20<aCtL@T&G<-mtihVYUzQCl1TuQ0?r~ZhuBYJWLfsQD1+F
zKjCm6tI==?wob#au7ztC=lH}g-1hdt{%kR;8|B%~qrE;I?f>8#`R+zLJOo{g*7g^9
zNJs1T&@)4QJP9d}-baawjLOs*_O;o`-O)bk%NHZRT-rYBmmFQGnd4-ftDI~>4S~D_
z;bi1196Ur`U{Q5Tvl;mYG}T9*I<LX?$jUOS1U{)4YF(U1ysx?~NmV<N9$vh<D;(XP
z;^@B@Gab8uBAzpfc)@T})J_t*nh-k&_;nDC>7S*KkvaNU))7;G=>k(v7^ZGAO#OJ*
zn7Yd__5FNI-I~SJTZ@vMF8ty+4V^$_mv7S75YZyLl<t6TyCANIQp7FWO^AEd5Vt9d
zxydP2dCbY8c4l0{_L{Pg9F_fgQ3onpsS^H9RCW)ztCj~1cN=zvyJ~sVaCdV)?lxs{
z_u`^FD!V4mr?RhhO=Zue$ol=F-9u#;$I5rIxXOxE9jQ!8Gzh`?c!qtjCRx$GC?8w2
z6UY4~jK<V*^O1$ss|OL_(M34|xhdYR^D)mzW96E5+W36e!`+b%_sQKf+^^VhQ`(43
zU8fu;rHwhs(B#o{hbf6h;Si_YR9$0shq-^zx{f={I-e+eo{m)RTclfz6_UGEHXG+^
zmibL|KmYiDoE)g*U$cCZE$r|G{e>t6&%3*GbKl;%xsCbF-I~|jL+`k`d(-Bk{c@b+
ztBcI$Ezi><8^&8{?`?QDENXe|sCEYYbbeQ^&FjiD+Lisiuz!;KFzB_aVu4l^(ZK=!
zLGp3H7V3jd%eQ(8j1qtP);&P(T|y8A5fy9EF_g-+h=wrtT^Rn2Naab6E?FhXjZ1V$
z#b|ywJrGD!jOEsglhTMTKnCUn{<Y02{p=@?n{A%PRwD~`I+Cf{NQ3xBp4N_v3nI>7
zCQrX$rx6#g@;*$-W`uk-QwNw8RmADdiOikvKKv)P;uh9MGMpXgBbb0LHWhUkw*p+0
z;whE`5pv{YI9ADItGI@fqYZI<>W6k<D^`no4<DW<Wpy$aENO$2I2#d4hzB0dhO{>Q
z@Hy&ySjCTG`|ZX^%{BxEj*6+?>%}G8J}G%p(wC!+GOJE{d96`^ik-u6oilYdf<^yy
z;V@MP(2^Y2V4cbfWHgC5KADv<roq+A3A)|R#typEU@e{`mQr%q)AR5|l|^+3@(Q-R
zb|kM3+oURYaJdJ=S^JTcIMBzC{y!QczLX&Q6t8l<*DJ=fK2_1j9|!fRh#unwiN`{+
zpI?D%tkp<^;)lv@5ob3qtKSz9ij}UBMAx^SDrKc^5cx9ZZ}QhcYu-Beovs7c{Zdc$
z`=?Yt(1~dj)+|ru9?t?y>{*y?Z-+0l!vCJ@e~kq(L*+VDB_PGgODYm4W4+KACgT-l
zObMJMWj2|7p>fB;ojCO+#W~q{$HJU!{JG+d?ntf6q%%ceyn}|lnquRk)!C!SJTw(8
z?K)Ui%ctvNRsj^6|D2xIU?L>^ZD$JpZ(k)RER8(Q5$Xh2!m}!#8J!}?;sRUiN+vR&
zYB<fVfc)|*@n#7<TOL7G>3jmhcB`X;z0j4&W=;s1D?6;UEVXe|B^a3Virh}yqtUkP
zHGvWvvAOTE#CIssixO_t)~$xIM2|QDqHX%iPax6NdcIj_L?ZJ2SiO=mau<xT(D`m0
zy{V%c8?@sc*G=b+f0rY&p5yLbl$a(>8<z*|p+D{4NlRWa?JD@jfq<6NFAQD9a5Rvc
z;x_BOXDfJ_KE8Ncx+orXIibl<M6eT$!fM}K)S|4WcoKf6m!g0g4NfnNlR79Bt!p5?
zk~Y^j!^xhYm@*V5cPwPDrTI#Eq_~ba?4I)6LhXN<e7!ggyT8mhkJMYg&eCyxQg;RY
zDK9NT=G@iBP%K}DIheNkg+;A&#%2U3Vp0cq{qZ?DhsZwid29hOwQjems+2EaZWu7v
zjn<Z-Lh}UEc;91GB~UgVI%|V8W#8yV;-i6yhF7<9jHebvdl8X*>QiHsy7aAlHL-9c
zFEwmk(3JdV`>Sh{M71woLW)N9TIeb-aQzng#iF`CG_+q|46EGK-SK9l9mz4!%u@~R
z&m5;3q;OfWFghYs8JyreMwE@t>nkgZ;Z<0vH9tX5d13*JXVtqn(~eG0d43(ON^pv}
z0O?6FdCekl-dzAdv3$~YZ5fuEh!OH1xDrzOt4mc~v3w`v54-bnMI4qHZ~Y-G#(qB+
zm?!OW?@1tcEk$-X9H@=%uy~733=X6Cs;Y1BI&4m6N-)O&TPCqwi{hBKN*JM3=_>W>
z=PXnk=*XpJwJDT2h}GILA9a%Qbn)hsqB`F$tg1{tVC~67Er{@EIxIO6f{!LtB;Su4
zh=Sq|45TT(NB^u(xMZ*?-k^VE!MeD?dV*8y&u}5gGf~w8K$S*Mu{?_pNZQHEvQN)6
zHYe!ZQ3U8n@<%vR^=Bdj3{RvfL5pESe1wPVy<sktMw8X0zdH8sMb-)=mdR<eF+Rt-
z8bkTCk+KZIC&7MCNq_r7E#B25H}Yhgjx4GrD?(G8d@PFq8}Bdm*&X`FP;sYzlSSY_
zy`XSHR}@sk4u_-y3t1Kb;hpO8iefBJ=}6`5rnnUw<WRk-F%XpS(qp3yVi_9zk&_Ql
z9r%a6Ixr7|***TcrP2FXv6By&wKGV(y)nD4)lSItkDQFHP^*&#sY*Rp;m**F?M)l<
zcLd636@vh-)*z5naebZd<g66Rw6Pv4ABHK*j-+#S-2)b?vEy*kT6I3tPIjnOsln`|
zsmd1ReU2tr867c|>+DWA)xmeJH&d%AR_5;#rP||z4gCwdf?cPS(vDT_@xhWO>pUBD
zYTeIKjygIFYI|dzdEq*&ydx(cWQ{LU3x+WM9I{xqJ2icfbOF}*wz{|uW6e&rF_3z8
zseBdyCe`9$!gIVe`%0HhmN`h9nWi-{4zG2=qVNAjajQ--azcBc$=6Yn0N^FzU&b7r
zjH)x3$N&x2yOHev>>!wu_aM=RcNuudFxyYQQ@qWL{g2r1K<+4Jop3!B8&)s*)dCYu
z<V#5_mZ@US4gN_WUrAt8>eYm?3n_Mn>to-W_<s4@`tHki2Y)5*1q3_Lj%JPWY=4Fh
z$XtuV*lQqLyn92u{Dl+U2)%GijWmHgzp%|6YDQ8uPeJY16*qbj1ZPrNAEKLnRqJ2(
zp45M@)-V6loK<^AV{*s#Cw7+YZzwiN$;%gy=MntsViPXZY!mKiSH4`1RNsYS?q!01
zR2+DAAu+S>M#a5X2XWx4Vq#{OCu`Oi-End;#5Y8hxF``ER1)2HYU+dXI=OX5Y8N2@
z^W0Lor<w;*E#jd+Rmwx>Dj}7~cZ$dyq`?2-6;G4cB^x;fVk}o-vRJ;21xncdPiu7U
zBUWUsTW4DaB%iHq?NZfoIdu@xx>5=F6eHR5X-O+OyVci2767ehkbH-F22cm5YhK^R
zb|4v+%BW{to%wgN1N~2PHZDcGjV!&DvyeYY&h?Fl@kSO}ih_zMDGF|Mp{vedo{<Mo
zF?L~YqP9Tp#Z;(E=v#)1O4+Iwq)ocm<v|{)$Ian(6sVC4i_}fCLhe3Sd4_f-ln06o
z#;RI&UJH|;PcOuLXDYmlJlOgobH9}<&-FQ$R#AR{{H{oa_hMOjE|RY%ZZgci%u^VZ
z+<LBhB^S%1Maj^uet5zR2e%#NgHHGJC+jG)!>jJcQWu6YQ0cQ};oB!e(92IY#P?`o
zC_SumO>h$J@3B5hRuPrHlEvBQARAqpVP;M0wXq^~x@<DXVNS-?h<<n`EC1ujxZL#^
z@A{8-dm_^FpeN>ZoKLihNES3E>n*$2v}(9u(D<+CJ=Zj|lj~g9K4qO8Hgis9UAxug
zSF^L*T9*!SdCqUUc2#TrRJukU)ud^es_8EHnh`}2PCYvPaZ7C4kRsE};2^0_13U%#
zG53P!hRO9zzCPZHT0yhN9l&9=N)7976~|Nv!Kh%)I*Tn;k_c{vdIA~R=Gj+fHP2$N
z`-%3hWRvnQ%IpR9HZ2kj*B4u?M2)@ZpI?HOfRcY#^)7KWTCs}|Lh+7h-%DSt?gH&r
zVqO<$|94d&u_K8ZN>I6`*wUj^O7Ga-16+_i-t~D(?(BW=*vb^|`XVKFE8s02V%MW)
z7tWqKQQ3u)UlR(a_H~n2QDg;V7)3IwR}W*|u5HgYktZC=PYWACnSnA(VM<jCOo*Nd
zvvIUP{j;*Yb7cqUYVECM`=}^fqh$wZ*`Zo?c;~XCa%IPA*-=_{w3Z#CWyfmS1GOx!
zfZ2Ady6v;FlY)I7*L|P0Y{5Ram+iXGTDD-H$93Q5b=~!OZ1;WEvIYCxUbgE#YuSQ*
z9-Hm6GgG6d2%QwClakR%DQlmU@!3w+7;u$NW3>X;DDXf99?}lHZx(oz0gq7Ny%czq
z0@o_=e(k{HbKNjE=kEJam#w;n^+P?~cVwR|^e7MAeNXDLJ^BC1VmUI4ovs)7d$w4P
z>>khEE|w#@@AG@MSdPf{`CoUjv@fl<w^%gJ20>6IJ~8h`4)@WXNPZ9N3rrgMFOpYX
zaC5f>cCKk}YP!CQnyiUrVOMo&5xIq+e9QeMzeEL4%)wZ9-%zd!aTm9AHS(KgJi_&%
zNx>0^;J*9{IOWLvlP7a5p6p*EG3nttz({T2_#69t6Y>@Gjy*Zg?v0fiXKcr&=^>s3
zz9`m}lZH>SQzdmLtzYlPZb(70C+U;j*c0&$K54@#lU?tVNe-YX9@NbcXWpBM+`y5F
z2vym7o~0rwmNod%^unNyp(i=o^?J%lv}Z3@{G>P1Kj}tW(<FzsE+==ns@FKfk-Ikb
z<`{V@74Plbj^R$>6J^apq%a>y_rC1WIAZs9PyykHaWcg1{WNc*Sl1RQIeY;fZa~d0
z*)Lg%N$>cG?L=<Gh;^l_+Mv^)-09?Lw+C*FT8@L`2rH8NZ4T{3E@a=ET0MT!$%#)T
zW3g-&C*!O`;d%;UK~PJf;Ir?WmhT5@b}pWLxf`s2u}VBc;tdA7Ejv-a)gV`kg(a1E
zlT*{L#&_7chMWx~6)+`9O2?sG9;#@9!j8XwtQHeoq@s(%8fPx+8uRcz9uUo=N20nG
zoS2t`FVd1=Ij2E|isBPIIxn{V?Q7WN-Qajbq0T;+>+JQNJNtZIXDj4dk}GJ29mu8D
z3=$MLlHc5K?|defIM!&EVtS>B<#jr9hGy~y@vvyMFQdmYN~MU+G#J9~1|UdQTrkP?
zyvOxj<tpZD^4}vp8AoZgL5y=d8qSlvnn(i+`e-@1^}IaG>SLmMRc$eln+R}9Rk8Az
zs60Sq$3T`I-_m_IUVE~uQ3~3_$-O7pDtOEEHU@**MQ10%hVIE#J_HK-KLz}dq*}-9
z$mEckWRnN@E#}^UN@?v<VrzdKdb=Tpt)%tINwAEUuxzPeBqZd$3da<_1{cubLGqe4
zJ4`-@x@a6z&HR?CtvtEH^|+QLt478nAOaL-F@!J4XYib%<es?HtJ2MDk1j6K;b*j3
zdCT-Y+Hq2H6{Sajh+gR1<cw@csz0q?hajvx?Kb?yF%4#<M6JK$1v{?QkPP#-sBv_`
zFMKq!K4;HX4?&;(vQncz4rlRLcYykPkbDO}?h$+*xJAwXVSJ?<IffUj^<eS=x{;{a
zbc77{ey&~SDUD>Bj}dP{?}pW>akUvkH_=QBRY?y*K5U@#NFA#9Gz{dW^GyTAi_Faz
z9jK>^SWeJ`430U0?1i-z79Pm6;dUSKW=nUoS0_Ut#iw{3wBK)nG)4D?HhM|^D}}4_
zO!9D#3|70}6G}YW4xTqM_%DV^W9rF%k1P2b%tWVpo+-8En<;_%^esXBFny}W#e>fE
zxEx(cy(^0eI}SCERGCb!<=bBR?Iis+Lcg7?-}cmRr|7rg`t6hY4Kr?;oT}gU&}XOV
zx1suNihdi?#3t}5{YE5ixlGk>gY?_!`fVV=5i(7`4bX3m`mMiyo37vb>9;fVTVMS)
zL%-GNw@>T0KCAh5rhe<K&t~ekYW;SWeyh@NXY030{WeR#_0n&%^;?COJx9OcXI3tA
z^czXC%4M#8E7Sg=fYe+De4D4=dXULq=IghNK3kyQO0>j6zFo~Hv2NkHd@knmldgP}
z3$JwLqg{A~D<9*+%U$_CE}U-x+9ODW9R&yf%PAVYa?hu6&uI?xb%b4oukK1u*hwk5
zYsKMxpGg&my)Boari$@752v$4LphS1hS$QD$%R-E+WidrTx1r{^?HvYHzFgX6~Iyg
zXYL}wapd!dV@yH?-m^gd@7VK;I6wclLsL#%K%8Y5`rF3^z%XK&9UbUXGY6BRax4Y!
zkCjk{{woqn)7(xv=`O>$-(DE@u1VsR%Tki4$vQZzDJgh?KhYQb?iGzB!8gA#M(zAc
z$~B(n0L(OadAdIJdPfDRa3nt39rdKeVvsu<G4xQx!CH7J8F~0M6k)KOU;RB~`O~iF
z(RgKXy#)>)jcyF^d}qg#kI6Y~iODf8lf2(Kgl#&IW7*o9_|rI7_QV^A;_Pq_r9JKA
zW#Z%o{P7xH)pl3m|9bHujGyrD^>u`<bcEJRr@gR}y?|m8Z?TkCnTuHNFTDml1YCEU
z;9A|zG%F5XI*(vz)@ZlS;y{9Vg?*%(#u2^Pr+r=WNmJw5*}%ycLvv1{_DEiA&Zb&;
zG(d~!CqiynYG%x2to(=+D|;_zQ$yH~4|jV|<w#IBlw*eq4GJI0H_5VLl879nu2^X$
z6Kt0!tXRFdLV2IAa61U8PZ1Hh`c(bty!C)Ye10FtEhe=nM0-{-1|o@&e|4n#k;U*I
z^DsQJ5QbMMhKCl$uws>71oB#-7^(7z^|MmAO+%~}jk(f8jeUE9fORG)^@~@q+oV{z
zYiTwp`bqPR*736Mn?N@m4mRXa%4QZAw5SB7U9ph+6oD(38U-Y4aJ(ThbGf(Kp6qL2
z0u)U2;DL;K83#N+1#j2^C#~X=_A35N6*zclM5w4HjWQRum-%1qU|OW5y~y7vG62zN
zk{fW6t0FX8PY&TpY5QLA&$j<YbGH4<LZ44t^Jr)5{A06?L{GKr$O=;{&w+5Yp1!X|
zXI&<4^J81?nfGMuyLIj(wtk;9IQ+VF3t8YLuSottIG!JFt+qBO)0_3sPVKW>J#|Fr
zS1XXVtF}1qNz!GSx;w@x`RG(eBMOm^El^e~Zg5Z}%QXAfujdo}=vWyxxL7J1EvfS+
zlQaH`A@*PE#*w-6X3w58ZSLF|v(E0ki_Beg)~9C8oWb2CZ#N@GaAmqRw8y?+<%Yn5
z&8+lm+!()%^kOS}%2651;Xo!?mn_cc9_j0C5+W7<j2@D!Q<~#`GOdh^?i_Xb7R6~A
z-)KzV6eL}XoAA}#j#G8Y&60i7T({Banzvt<G{KUsMWSLqipodpE0!KeGPbTRbOX1#
zKIA9XPH{}~nJ^<<=W0<0`P)l09#v4*Xf}nCm9AXV4&a?VuR$+VDmllMEx>jANVkWM
zpX0`utv+=Ej-(S@*~yk&N4Q-f>-J!WWZeE6EacviK)A9!M^YbuUy`z$G_l1ute=tm
z$2L+))vHUmT@Lr5GSC`a_%A05rP*Y>)Z9td&56-nN=IE=<-3%bay9KrAkTmA*N}Cq
zLvHh33tY!vovtdlVi^gL^!Pp)zYA05uy|C*M5dHVJZduds|s>9=SO3%Q^=k4B<g}{
z7jjjCltV|EJ~-o+Y<J^7i!Zhs+a-RQS7$e}Z6lBTm85X`c`Lpz>QQq4V!Vj^UF-Jv
ziYuAtK0tFN@hSzCFIdLqkeoCaaHNuRKK|+WpGe8JT;-hoOH@iPm7eBzC$o`FlK(Vb
zPL$YsDpK1`AP*A*-j{*d%F?{i-eG*Y8%C2uHP44~Xq?Y)Aa81aEkeE%(y<QHB{_}w
z^$yMAX;#x@N)4;eFB`7zP(Bk8Z*ICv%?TZ4x9D4!Y)8vMn!*p|lv0O#?NaLI8>1{x
zb%|bV6_SKeJ^1=$<P^FG)+rmyX}Piwnt_#UFghRF-8kt_Yt|jjNoX?jB)>%>4|#s5
z;X<7TGTJndG^{dLKw>U+A|#>2^US%%D31ndC>v2v^yXKkhNe%V(TrR`Uawv&VX>M|
zK~=0M2{gRWBYbs=@B#SqH{`Gk=DxqAZcU1*JIyZK0a2^Cb#4ewja@cv`Xf}Zuv_^S
zj^#qeF8PRJrBdn|RW~I6X<g7cAk_;UCbw#Sq8HW6MzK0C57izsOoG_jJAD;-J@Tkz
zmWmKg{z9G(Qz0i~fK24`WMionJeEj<$01@unp26iijOR*CkI6fC?Xe08U+MZq<r#*
zsDX2;_EziU3!l;!k%2EI6M1=w3d21!%XMUkh$iY7X1C%;o$xJ?G3xTxha^xML^oKL
zg)B*Vi`xMsk~N|GrcWPC=^QCUi^P)IXcg{IC^tkt2OR1Uf+jS-3fa{(6V$L+F10#6
z4Q&o^H!VryIW}_--7e!fd^v6<p_uB<0(m^J*hL!v8c_{+D76<Gxr2}Ty2;T?KYe{`
z^XmJtuTM^cXK*cHjviT~$2RF2k&E(h_7_BLP-QKC-Lgt)m61F{Xo93l;;&o2r}pQ(
z*SHZcC=j&eU(9cm{n6(hZ|T7&Xp-Gjl3d8Q#Ad6*gcxTysJ_!YY^K?LXd)zQjZUp`
zR<bj~jJleLQ2s)i9P7fgw84`s>{qQ}#d0;O=T56x<3*G_u3>Td^g&iWCLL934q8)G
zEUPJ+^$@HfEp+PJ38OF?v%oF9!{gJ;w^-efQ#{7<g{7J=zyvD!J%`IIA}_a`EsD4h
zCXd&kp_;?8m$`>)JQ1)N`5ni{UQz*LQCO&NPd=g1Qgu&MjA;2rU`;e4>vSVNgr!{H
zrOU*(`$VAUMAgd=w6cljhA1T<r`wRDl%Vpi6rkGqkW%-Q*hMi*V^7qIML?WE4QlNR
z5A##Si0o&4OZuA&aIKeXvtXVgQAz65gsVJ-UgP>rWRFE>0q>}WaqR_T(S#9-(kMQI
z5z567?|jbxjteTXBsfN|HDd!#)^Hew(`x=)y(VN-h6_AxPt-<a3v7b*p&gM+ElY|u
zE^mZ-8Z0uqg~Lhj2!*!!9;L9cd?OF|8Ra!;%Al{eKab_7q3(b>mww@7UG)~VB==~t
zFs&)tt2h0Wsi8}@`ksPzPpfUjjp`ofmpnVfFL21ULfqGv;X=@+&UF4OlGn#%<nd!H
zmKp1If<ZF)b(r1SjFe)G9_E!yKXvd}Ijxacj%jiI6ovB`T`&1D-SM>9(t$PyaQCdz
zdq$pb&eEjPfH%G9Qnk_F&E8boWOGuyB1MbMIPz1qqKJ774%dNQ)M;RJ`t|ls`=z);
z57RoS$p^K}o8*J)Jkp&DK??*MqyMJ6Ln)(6r$R1{wEKhPjU|ffa=p#fe!s~@%=+8E
z9{{}mF4f<z;&4}vcewglqROHJzat&++w4HG^wkcKF3V`e<LM2LLnR6MEO^YW>DNN)
zH3VYP4Kxc*MqXUPJ-9mwlk(q@&U`O+P^IpGTwHp5$&iAs3*avqaFzTy1^$czLxvy6
zp4;{0$iDTa`Y(#{V?O@IR5Vt#bpK3fhjn{%TK3KTycv=IW_I??cX_i{{+nglHxKe=
zK>nN7>>Ir(b?@{|x0gAsL01q3=e{=@gBfQxO`9`y+UzFI?Lm<LtHb4?dbGGU+w<Nb
z(klB>IjL6l?r+U)9K`VC%ybJU=yo*k{^hb>iihg==CGMNiTUGO5agcZ5{AAFGYZ|y
zE17pI6{ofi@3Lpd4i9rp?F8L^sO$hflQgKlUY(#<vJ-TOT0L1O=Y(trm8@`?4Hv_%
zQY7Etz?BWg2sxAuUWaJv<Q~G#E0fC*#Z?X_miDKFt>}wygiW$MZj!gKr1kh74=zS$
zN_rn%kTtTM9B&HsQc;uKA2pFQLUTCO$kl`;6ff{{hrbhg9y`0pOf}~8WnK2?nw3(d
zFSy{DgdLqiEhTbKi8fv=tLE#$u|)oCmg!=-H2wX0NiElq!)6pja}neQQYxc$ARIE^
zP8}-&Ji9?&p5N+*tg!e40#;U(wE8ET>`7THSCkN{ybTV-!Qz#Ld~*ole?#u?%ig_b
zBWdr(n}bHbT31ttP<=QTqN9zmjX*wI(xN4e`E6fld5zv#VZYl1^1yuX<s~>a)Pr5c
zWKE=@>@%A`AC8@5d?JMqLDqJlAGoR~!BZnz&y(qQwY)*FxrtQsyknBA;g)s9_D$z<
z?nlVvpvHdpBj%|$cJJ6CNMc!YZlk@`BrN)t8A$K0kPWV0N0*$zq2$u}mUb+ELJrUg
zsg^h2vv%i0*I&*@j^RY$P9_IWrFkdYNYBNPZ1<P1Ew<?#kdb?%)h08uH^lP2bV@NH
z6*s!QZT==Pd9^lqKjhLbO@dB-$XISu(7(A+6WQN2%xSzU_BzJ!B9HeM3Hqj3G}Skm
z{Ll?lZ!wkBn4~J#laylU|E#M>3D&QbrdE+}sh?N0H5#PhNRIV9Zf&|HeX%$jS~Fo%
zsv~@M5rIF`T{+~dvSSkp?&WqC;jZ&?-P2jaCulOQ5_y=n{Yj6|i%$;p^IY*5*B^bU
zw#F3;1lsJ8Z85khZSil#Aqc3<$o1zU^Kwq6CK3s+<GLWdo9p|FIvk$!RV>JNx_U`G
z^evDUyJ7OgBJD?&{21aP3k&(zmF&Jok^${qOJdAQmKnn-k!7^!(1glK-W3ObI%+XB
z4Ww#Ulu@p4EI+}0reuQaOKO*CKnx`eYa`ub(is@1{ik?uX9iik(LX!Od)~?2xH*;K
zsne)hpI#au?*-&vFIFMR^e51ShQ343$_8uXb<%b4MJ}{n4v@c+&aIDnifCKQslhMw
zmlkWBbG3ZY)lbl6grnNtt9V;0*Rjf!iX0(p@-rgVhiS-Gjoh@597Yh~F}B1tSFG>L
zW9gJfq9N$?kilEp0@a}SF=C^6x~L&Jo*0=4iMAs0dy;tRTRC5IZWPO)wrFs6ZkOGX
z&h4XAU6SE~VNJ+TONvOofKj=WP`+w38t}sV7HyHU;5rZ(7P_t|3|1!<cKeguVK)|`
zC*cMot>+RV@HGiZwfA(C)Mt$Vc$2j`Xq13?o1_+aDBQ~(ZRe$6EmRzxSLtvasfrHb
z6O#&xgZ95ot~Z3Pj%6b#t~S%YRt*Vo`3yMAmP6z8|N2F!#im23-p!<`(-2UKsN<46
zqn(o5p-@GIA4zMwqQ{|pmtQU+_bkIXfaV79X;U7{x6`9VkZj5>rFFTbd;D!FvY#fB
z&<HYvl2NyYKrY6>s(aYa7lC+BV~&u<VmIU&@K+-194@Al^tU`nMnIiU9i{SH9{QE=
z5(!EA2+0vk89)h3zl@~Zny|K5Dw>RxbS^|bmo`sC-L~ZQy#C#u_V4TMtYsBe1nb{W
zO$4EIvE8QAlZ7T-g)BC<lf}HQar&;v!rG1HrbWb(z8#+C@$rn#we}cMw9jT#(Cl?6
zREw9nK3VQIzk#Lk23hC4xx%>WD|e(re=)Xou##MZ+N@Ee{J~}OEq!${VXycobtWF=
zuqgq{?{IO{6J(&9OCc)qIh*8vWFwmnK6UsxX%TMLBUfO$x<)XGC9?^E1Nr;NX3lHM
zGw;$PSU*MvPiIols7A^|4uC2fnd>*5iYNQX*VLcak>~YU!7|-LjzMH7kv3`{?b#2M
z&HKG&8|OVO@D)_HwO*T<5;=KSb42d<k2NUL{06ex^(VL*W1FLwedXgM^r+<OyuR16
zDYW3OsMK02>lbaqxevvC&!b$)TvvP`%NcoyO|n$8|3I?j+|}k|`J#o+mXR}3n>Yw;
zh}_RGO=n0{C=bGEDJwo;3*)_Lv5vG>u8Fije|b(;+c=w8zN;2%t=Z)ZBU0S4a<`$(
z?Zf(D<=XqUNTt;S?7p-z*<?ZZHL`6{!BLU|o>7jLFR{@2%1f>`J6xl?aJr7#=6)S%
zsd{RLH68XRyBaoV(0ur#E!Vvt@AK5xalC&0;rwPiW<q=Qw=E)X{@bZu<R@TUtI-9I
z+wgr+?=1fxw?3u&C!qUwmc;hNHe`eNi>5=&GMh`8*J^uw4hf_Af(SFzet)A`wSvWR
zM=CceqbZSRk&pVSv7lJiksa9BT8pwZ1<kay%aA?T{6bf)c7$Ttd@j3`inbH{Az@N`
z^Veu~+T0Gf*h`@bnk23#LE!5$Cux-%nv?BzxrVLi)(z>%p7A@|%zwbn){%u3-gu06
z?93#)*>TtoEXj3^kV4Eb{V?KVe>TlteCl$nlKaW5Q?s6<$ZA)E6~q<;LI#q%mpJ?;
zDc2))aG$B)YLPv4agIep)mxot(!&9JwGqQKH{vAMJDHmTO#%p4hZZ9N-O#i9uIFQ?
zd|`syV=|TF`Hqad6o%TL>q=!~urUqjo}Sx!D(=nl-`v(%m2Qn_*3zv}Q_v%Q83}oI
z{}AcV*%OTh*DZ!sNMB?XA=_22Q&2-dV#;Wivru$zBoRE1$}CMiau6}bN@gHX<jU-x
zczg0%u3N8UyLDTVT^{nKP;Xf9JMM1~c~#4vE|6IM8;p$9-b}YHzdB#NmGl4vNR3=!
zsm!6>1uBsBmM_AQ{i;rtz&nt&G^G&6R}!7gbJg?Q-t#zVrn37=6EsK3D+rz{C$?h3
zDfU95=9L&DFGK5x&;>h$Aqrj^a+(%G6V@f0qQ1K5H9EhWkbhLLc_IHrpmK<X{Ob}X
z<iDuPkpCZ-st)Fj_eK|$)0=eG!KC_+ofR-$D^c>aE~#^C2d&9Yw)76~-j<%_zLbIo
za3DvStT)$HFp-*3UND{pi*Y&f{vxU<w=-Ux%)k%aeA}Rza6B|)YX<`^tgiTLoY!Ct
z`ii>@EHwUt6>9jx3{TxByW-1T&B^oJ5iGG91X0VG5RZ+9_2%w}3tWsQt{C#qe9}&q
z5obJGCy#TV6`qTdY>q}(v+e7plD3+F-&-MK{88+|`4(-Zmi@QdW1Svc;$xj0&i6zE
ztal~*cKx?zoE!G4Ke%uz*7|~&8<k|;Qm(qpyx_;R()S65bw9*aEHhQv!+L>)KcXm*
zLlMK2ph@hnVqAKa17SGEkx*<n7EBT+U$U#wWkFWX_1?KSgJV0gQO|BOd*NV0sW5m_
zS8ry)|8nsO$%XW+KVsD$Or72yu?O1xIQb0=U@>7mWOi}kDUP#92{%6Np+1-#CQH;<
zTcQr;_0hpqNECHU#YA&BIwXF-D?ZvCagqxkalwABDn0M3fvP6`emUAW4PRm^UQO1b
z^B&<1M47eTgr4l>GS@k_woikH8O)K+*~*C6vq9M2y$vH|&OB;4K_KM?+(n=!NkY;{
z<;V0BlVe0^C%9F_=9F_Rm}luu!V6qa+))iZoLVbh0MoEm&F3{q@A#;sD2W;1;vg`*
z&;wqJPv>e^ihP&U#>W~Gd(^S*>~ETmQ0BKNyUO+nN22#pn5#%ze3LX_0tDv1)pl?y
z(d|dC><Dk6^CgL%)wzaeQxv*>e})3eWz7<pr~Fv+xmLjlxn+&;8Y~zNQSWgq6i!wy
zZJ3~4l;2{H^opK9zHK1=<WXcpUk$FR$5N_Xei)H<_8nKHn~caqzB;}W0EpbIJ^Q<A
zOIHXt0coAFT&aGcn!T_d<oDx><VM$jlB=Gq1C#4<PRV=%@>M^_D;P{~c|NN9xFSWa
zLks0!T+fRt`31fVJwz`vEM_@V-30Fyg9mC3OW!aoSxAy>wV|mJDv)m?B?kvk589qP
z)H0)95tX>mP&AewHmlK)OG#}PB!TbY2yYNZz1yP>8|HY;;Zpen)!6jl(1kT{4HW#<
zrQ1>xMg;wz^eb6};>}F_PlFbycGD!YH%BNl4H;V?EP}!yn%;;eIsqgS@y9#7W^L!h
z<@pHDOkbq}oOM`)HdFDzX(i-cq(|X(4llvJFk4kwK8+2!>-3%>x~a=CKjH)fB+9)f
zo7WlYZwzaB&_|wYZqeV`7x^mA$N*_y_38;4VwEbnc}<?&fX|^ZdU=ABCz8Rd2MsHJ
zvwg9YSVtb?9*1mCl&_PoH+zGY&i7a7$q-n%oXAmQE}reyg`aatM@A+8_iPBc45aO2
zxivc$BGQr!eN_y65@8AxfFb~&DttsHIil3;m7HDYFJY1@*tenUY;L7c(8>O!#8mo{
zD>SWT;+=#;Sy*S4#=cU5hzyy`tUi#(7`rjTYn2fOvYMM(Dl*S9UwV*K1#L$Dpe{Yg
zYjS0zn#m!u8EPH@b@q>CPg9ZTN>LRFPLPv;#66z<O=ZJgN-EfAU!_tu>yDKUeMgms
zay69(FZvMBj9sVVrN%4Z7`j(;B*k+rCjXOO!-94ydr;I6FT_^Ob!+Ei{k=lTO1=Zb
z3%DpP#!4vv-5k1oj`z0e@8~cLL%)G$G@F$!6^Tzb^aWpY+^f--jVIyYK!ji{4`2}u
z-G}8qI%Cv)gu@lV2hQERBy~ZJHI+h4x_CU=JSCr$)}pc?wcKyD*bHIo!BJ1Ua4oxJ
zReBmgacdJ7Kb3VQ@qQiHMKo<)_%WAxJ>7zaSwCcsmx11NCGYnj_^)zsFjCp%0|S^A
zf3(`{dT#X%z3BJMt*&IP>obXY1f`Ai5J$34xRMWeB8o=zW$)CxfNdhE2C^N%PbEv-
z1woJYpHJmK;mY>K{R6!BIG|!7VDYNt5iaxYay#4*%_5E%t`9kQS;bo^I1FuZBZcgR
z+>L6*aJz{U84|TYPv<X@Nieu$z2@j~%Gqu%_bk~+S&!F^?Z}sE`YIWjA9@}qo0>UQ
zd0y$++)%ApzN7wKBwg0}$)1D|N?z+z9iGh^r7lm(*<|0WY{J~tUeDXE#u~uS#AceX
zP%Mh1B9@hZN1cXjscN(Y?IbentILiY!0>#TOvLvd4pAjwjVl999zd9qFJSBsWk=NH
zE%KVGuA5VhbU4*S?WY=*I62isem*pgP**~l$m|Eb8Del02Z8s%C%?N>&FRyooIZ2b
z)HAsQvuWB?uE5-{Qvg=}VWq3XOs6V#wljZfM$>$&yVHyL)8|Z?ea6m@yF5~LeJslF
zTh0u9^W*99XgvOnN<+xe>LwK=dNojul(*?xkvVE?sU~Vkxl~_5R7gmx4xP4v#L>yU
z+mA?p3Uo<|x`Ddj3fDWM7Nc(K(fzxtdba3FBYuf`)<R<zgrZS5YyxAfrxQH$S#MpM
zpe|6QLcSUjlU1&vY#$j(>@X+1hbVoxew46xl=`IWA&eY`m}ItM{nEkp!a#$=-*BxN
zI^S1$gqZ+vJw~YS!3`bwexj!YsDcC%nnk&<oT*NqrBXwy^L(qJjHAPMbEr(F12TeZ
zc2%q^26wA5J8M%Y=ii_fjBP~P)FHYySxV#(|D`rJLXj_@XVeF1SBPg)mTprbzrx*f
z7)OUev0U9;hqo+FUoj^O%w4hpckDXejZ&oy_VBydZVsgexhCa-Y7|Xn{EP72)X3`-
z8`-dOr@E2yUFKI^tvX;rDWDw_jh!)}#<otF@H48WTYP-#<s$B$G;eGo638@QBRfYc
zw(Udy#LW{7Qho`8)B!T8pCeL}8q$q2W{o+6OG6dazd-P=L%i_n=TLrv50mHS0lBDG
zyFq#mtaD@6X&YX`i8OD7o}Qt^V)@jT)OzRnxX56;#uboA7H*@bNmH_eDvgoK@@73u
zGMVFD`2{Ad?1P~}>&8tm-nzm|PO!p^`a1Z5y&5(}dsU_5#hEs5xZeNv^H}qRQM`%l
z=3f#Gm7*s#e~MuzAAGdDZwqC<i^Muw?nbYxXI%A=W2aK1l3VTuCQl(taZe!I>j1nk
z{imKV`hm#->8D8*v7CcTwBkjb>wG7+bN_>)ft2r5uA3a&a2wj&6e41N(CW_RrQkc3
zY){swSgIk_JviFr%Fb{-R=EKepkbuYFx*`c=Wu33$GDu%|2Sw6><+s3MqE<M)M_L0
zE4{ozqYgIXO<iZ1Ej;n(F9;-^?21q(lwA=G`6~ila+gcO)hTzd<q*rAs(hnewQ(BK
zQ4cZ^zEhJL<rI>LTDMj-b4Q?x7te4q(}i*&*U_8)LsyEmrAwtT$22-D6=fP=GUXZ=
zp>kY9*C<>Aox^fhxt=yq269`ikSugVh^AP3Co7uk^4CosVPr=?$+Ls@)zZRhadoyH
zxMC2;$30Y0q!-uj%O?V*MkHX@()!8#F_;gQg*vsD$meuyJP75x5u&@klDj#D+S>L@
zFWcV~?j9;ki8i(VL$v<MX$bj0GcQ-nUV4;Lg|15UI^U7}Ft8;jKMc1eJKzuaa9_-_
zqis%MsrP5JS6{NaR%^H_{i}mJvzkk|o1R+wa@zAN=y4c^&7~Z#e8Q8{a~%1C3*qf+
zT=jHgbXP!X`JAqgtmN`dor4}q>c!Z>_+`9baJtX(lP1Q64~HrO^_;x34(q2l1saz?
z+$FrGVyf%E(p8Z%ag~>TG>vP;&(i(%E2`urf<g$&gpagp|Jew*JKFU=Bc;c-)UQ>!
z2SL!llg$3>oTPd$&ap*S2rC}8DDFU=<!x@y=|HPO8|beUfOd7dTa1bEYHvg{Z-TB|
zr)nCV>E)YvStZXc*=GAs_*d<!k-%G|P%?JMWTEL9VGnO@MnRr^isykne1-#fG0v@z
zMjI(zE?1!U=z+$Ljdd{EQu^G3S^N~s*iYE^LRG%+Ls?<t%c?q+;E1Sslbz1DwFPHf
z@^H{UtQDU_<pBUL9_+u`*sEArNh>0c(pb5CShKOTkq4ins@%vHQ6n!?b6=T?eBMI_
z`}o6<5GJl0wN5|I?xBLs72$U3>5CIQc~REDI{U`a8)2`kWRfr7T#20l?68<u>wci!
zU_a+Xo7WExBe-nP=7#`m$}X%b%|Pr~%L@jdMG|<yL#VG{m>jXz#`VtY)A{dM-~L30
zbC-jy9~34Jg>^EAqbGZZ;%q46kL2@gV@`gL^D;<5j!=8T8L2hoQj#=V;9!vw^3ZK5
zA-@`i?x0Na68!8g;#Lap{rBLu)oD}nPp-x&{MTmn0*}yIdizvj!pSYJ$86Vkg`L+z
zaIF4&M0x3CY5w$4e337tCCE-}&*y`>t5`<^sY}iK!kr0ao=K%QN=Zf)W5gO0Jd^D3
z78f1VwJ)TD+JIiuxL-yNNmOL<9sf~E7C&3!!|g%@4;<RHRoULW1?lc|xGAY=SOnNP
zyQE3zK944G1k2K7c3Tb6@NMsesZ?IN8U**2|7f%I51pu3Gzz_aomziWTj4gYq+sSe
z&t7Y{3ZsH;M$p<vFJMLwytiRx-z?t1kle&T?Bt8f5N|`N^==X<d(}LCw9gMEIj^1K
z_#w|Mc36Q(#;}x~yrs0`g(jJR4}5WQZ!}OXT8iISewE_)k{o`28?<doN6UpWhV=G*
z<LdM`=7Iv0b6QFnUw|9;MN%{|!1Ur9-Cq9MWF~_lEB6o#8(HJ0u`_g<nrmSYNs!dR
zZVwItWIjZYJH;A~<oTc_`9X}m+Pv(Hf&V%k!oARaN+P#~_3H}`A%DWsoq3SF5AD6Y
z42{-tb96d`4an-Q&Rn2g<Ty}+(2vdzBCj-V0K-Ji-xO?tbKQf$o|V#8!8eQ2U_MoD
zf(70&qc^6KL&V(HvakM-y)-%68%@<|zMF%1)Zri=v(I|-q8<`Bh{x%E(eWT&q;8ML
z|0YGx%k1g!f($9PV8upJt3x;>-vZg9dR-X^ljo9Qu9AGmYXwq+f0L-nD(=9)jgy6P
zBizO~$V1uPvO4|%R}VkK&H(-u^G72n%ZrGZSyUr@3Hki{VywwS^=jk5J0ja^+P4;C
z>4bO~M^7WyrOLKE8=~jv2F~(i3;VF1#&pYDv{5fmXR7(lj-Nl*n{spABYxIZPIvpu
z)v#okw#Om%7!-AD(k=EWb}RMYcmm72?t<SUwri(dP*pVfZTp=tkL!TR3K_f&rc}@N
zrTl9MWyntjt&&q=gv@SBcOV<T-%{$Xr?p|aUzZ98Ow7%k%f0jBqk29C1CiVb`3{xK
zQN`taR8RYTN*?>C)bZ&pT@VLHBzR~ybkT8N&mWD(ymX)oFSb0v!Wdzu0$Z|#1skSp
zrhXpj0_r%i?R#~>na%F4jW(gWdxpqQZRv#<Z$RKWX-Hs~A*ZJDT9yJivHQbYM+#Kw
zLi%xGbP)tHR|BQyVayTyhx+0!Vic)<Q}+;1>!iAoEP}&0-szUBgTjqqVM}L({~<;A
z-w;l#xbfMQt)zhO?*lNPj%y@kQ0YM%+KM4AoA@E5&{O}|{W+OzcSqr3&aC5jA98a8
z1}DD|XKhc##N}u*%H)5MH-^i-$mU9w>m&8b^>{l##$i4)q&-C*H@}0v&h6j|_&b{r
z1GFW>UMH7fB|DH`L~I>N7-p=|x~dHpEDhWIR;Sk9>X^hl@7I_HbcTu)Vj#AGlwJ^_
zIn$cx^2Zb>@6U4ZYZWD&IkxYFjtN70TG26rA?V1o1rH2^O^9pYr~YO;y#7X_3)
zF-xk`%oZx2kRP?S{S9+!vq|2xv{M9Bsu=dyB_#22$-}vEEZd%r<6-7@VCGm&B(Znu
z;7{5MucMjd25R2E9`YIt(F~R?u{tndv|6I)W={z}4{QC<@6OVyhLNx{4U#$=H{MBK
zbmcEF<?CEI0+<Tm5X%AU)bDXvwozsv+bTkdHmh?G%WtC8vtaxh%GJnKOAtI4rL9Lo
z>&7Cw4KHf$5uoS&-v7P#O5@Ba(>rHH@lR)*J+)(CP}jOv1wM85lsR)PhYG4{jrh55
zM>>5YR3#c}#Ltlz9k=$FV~&|H|K?+lT`=J|zD=As@tBDdFIs!dgbC+KNy{<E^7@!#
zpIR_~(SrF4o}NE{f&F(a%;&e7rsqko7?nL$m?S;q75H6+?l9qYa<!wiIm?_yRIk34
z6)tJvpa=jSXShk~HLD(=TFqCl`#LUh#sK!>B1I}uA6SHoAEN@%v^_WCFJ^UZc6#Db
zAeazNX?ZZKvoPQ90Hem+_Ihr~!rWgN=G$4Adke$dk%ie%7=}CTe4p<s46`8%b4OvA
zyR$I2cYx_I_ItB1-_F6n#)ftn{0Gzi+?0a>haEsZnu2_*Fvx@y<c7i^$EF}(D-3d6
z3i8#$AfHG<+6sf5n1Xz{Fv#Q-<m$p8C#4`)6$Uva1-Y^?$f+sF<%L0}q#!E_gPfj%
ze7-P9V+!)w!XRg)AfM>~(qTo-OhGQnfpj96vs0K0axgm+%sDB@`GrB|q#(-*gUn4q
z{#KN~x;s$j{1oJ`1woP}#vxuW3~-SFyj&RIQUmyHK>)Hb`6d0U!XV`-$hN{Dy;6{$
z7Y3<LL7poNQj>x_Qy64G3bM6mU6yNhn&iPL%ujMKJ5TZ+Dad1mL58Owj}`_QnSyLC
z3_|t-n}Ht`2H7_Sd9X0ZxD@1p!XO8xAommoIXDH`P#ENp6l7gtkoTt`-zf}ocnWe`
zVUYWS6y}z~FyG6<tSt=lgDlLq3d4};#ZSVGIhamF^KcgE8#$nzspdynnCrWSQPE&~
zQKA+ll^j$q=qxzozs*Sv(cY+fkZpbxPE;W$;CfVz(|N)BgWyG(k(sG8bs)Vt!l-;G
zXZrI4Pkb5`{1CT$MGfwsDj;(rHQzTPIn=T>Nn=v%1p!B#u6xv){v*VhvlP%uqUWsC
zeCdhzjx{l}6=ip=cX?8uPkk?B4NYV&1;{hSgOCH8FNnA0uW;fd@3R#?BMlByr|i>y
zc9b942&@8`Rc<)Sf?_Obd-_kU<`-J|@$GL$<h}8Z3&-P1xd2&N9a7@>68j9}GovKy
z`CajR+6Z^4s*$<~<xjEt5{{?l)H}Q)+Yv1FC7CCILD`0y09{rY_4f7gDIs0cBS(--
z%2oeLl+KQ(G3Zd1Ft@^G&AJy{&G7$^JuULZ=bfF%^962|s*jX(Y_&LBnhTD@VOFOF
zccFf0^8F0Y<K&EkbgFQSuQBgs^LmS4b>TN$aE8}QWPEB)?mAO?QwlL`a?I?(Jq4Np
z;|N^5G#Qs&NK3X_B^jJ!hN@%k=sI^8O0LHA)OSAG#caB8yeYXR0XK#h_(F<Hb@-uV
zjIz-ik>Z!UcM~tvsk5qH>y6)M2ev4xt?_T;?BnpKF2K(_^OWoTF-$kysEe?EG-aT*
zjCZ8MQ5xS|t48LN_?B;agPPZ%PPdBbxVlH1zAlz2X^_O4bnxDnIq-w;BZIlQojS<c
zP5hZwORK2fOIbx+KWpA%cr$rlI=mqFkxVTH%>{HqaU^6c)zTa&2l~??>B|n}33n*k
z`+EP&YhXzJYtRjOz}~lPs~fZ2ah0t5VD>TwnY2^CJ4~9B5e*>d%(NH6ZtV`%W?>}i
zV~quoT*TG`qJDW0;3`>i7^(#(A(k^(whASEw^-6toJPEM^W)t$s<;kGjp`k_Sd1K=
zsi@wK<shu@2n$f!5XP<<UmKh%;e$;$qhN0{E1gjRM{mqt$F`{RqjoXEP2bzk1Gz6i
z-LBXUW$`++FFm+YOe>MGVB8wXpiDM>*w$?GzU{kw+hC;Yww3L|S#a@;FHW1iU@1P6
zR{O?4J=pQ}I+ebY{t2b#`JQ>p6vbGUKvmHYMSJZ`x7D0g-Js(bRuchD(=FQm0&KOb
z)Oeaq)DWp-h`OX=LiYcjiW5yjuLfatQq!Kl>S{B|L7;FwPJjClnN2{5<`ZB98Whqq
zzLedek?{MD{n)Y_x>2rq5;e(?;2{~y;??d08iA$J3dw<Z%a(0RCv1Xt^jPimZ2dNt
zLZo3-4~~JjYm;yY*E?QBqx_JfVf|19VfFx-)daI-lnLS5WSMd%^<>z=ivN4|J#)sV
zrgn0T+nI0Q+#}`NRiT#(GV%<XK}}Mqx35R3ARy0?+X{aZeJt1GA7$+P5wfR_JIMpD
z%)(kOtt{v_vsyw8nLuH<&NOeRW=-|jf;T+ZWN0=+vUF+P=?i27>%d0~RH|VxCW!3R
zSe>zyNXj&&ib@~>w;cPBJ_220@)uwXUkaVsQL2?+fjo&7+Aq^8j{%gm?_l^dJn)MJ
zderKbq|+IGo2%IN4u|i3RFBiT<*~dO_$Eegn}LH|&q=2H!y(^~JwD3&3tKXh+L-aT
zSxDo~;}7HG(&;>15fy<FTb{9evsts~>CO`3uV>s|6VI?@*hhFO@xJ~oMf_fXuf+@(
z%umbg=bx^iOtfERu2eQ#yXWM(ot1K7B9)e+&FuKs^T(rg0trkwqP=3%a&Gm#_+@)1
z%-w`|ugbyjYz<6elOL9r>Fu$xo;4YrQAsLCmx;KHPCwZyclp<@naM;SsZ2+n9Rts`
zb67m7mn^PY6VG<}H|scVIZ2=NF2^;-+wm^f4Go86&~|F2hRc^~PxX^cV3&W99_T>y
zVeVd{nh0TAH!5|mvTErm^^%xdp*(E&e@R+n<@Yr&^B@|AXH9f-s2Qwag;ZY=)%zb6
zRD{vmI_dxRV_U6=hUsfA53p*Eww*@jH6R!&cqv_S)#eSxpbrA_D1{_~$AC22S>(?g
zl|qxJL@qiv^dQQx4dxW9C6}r?8rDCZ_P$7l4QYl8e+rUhU*9IB&G|#a_>9hyC$uu?
zY(eUJTkQ#0xEj#_<hXRYWj_{y98Z832`}{=0oBI)@M$<Tzuj1l<DjLBC!!%Mo)tZh
zgV~Ix0=%huY6;zT7Q52XuH^F$7x$7wT(}sJ<5R~*E}B*`FOmnaN?S!#*gKYc$uZwY
zK1P(Dc6*Hy;F%C0(IwG=d=mRYhKu&U_-8iprk#K>(ojsvB9DN%unU?{0^a4{ECKJE
z8jlV`pw;_<>attstG-u(d?-!yn33OLJl|U%lamv>r@8_}@)8DM?TXHmb_Gia0@qbI
zD)yFNBd}!)FlK$r*!#qCWVCM@uxR7f1^o{i_XC*c{K$VvPJbIfW}F`yOih;{nPKd)
zpFCw1fhOu_{`<JnRd+!(0QjRgvTuK6O9}<?4;=wOp81aTbM({;mXk+5;U6<~9#Tq(
z&{W)L3}%02eAx;_IV#y|@Fn+zmXaoduQ6=00@R<NG*^0vKblKFNSzY_rxKkL{7vUJ
zy5Lu4o6?)Ftke!$`fzHsU8>}4psX7yA(mdBWl8pZDUrdnVi}7U7cV0u>W*mfd`TK`
z+S%dyb07WDnqk+o9b%reAM2HA&4p>P<RY$EEjQB6{ly&!0*~IGWqL<<#q?m$@0{tW
zlWIVCAx@LUa#7YPGwqgbZ7tz0`!!+r_sd+7iR<mhnZn7AAi5BLm<oJWIE0ilgB&Hc
z<6UKsq9cgxL@pq^4JhjupYJM2E}G-XCwkT%6SPCI*MbOw(W_7*bS!CIull-pc4%4`
z+)3-Y1_sY%BKei+QNn~WJvWhqUn%0_o+v-WVLQFcF(b3MKuoH)q*UQWJ#`DWK-wa1
zRMdjdt%->7WQ0SN)>uA-^6RTCmC3XmUo>GT<`yn_4SSsJT=^u_$W*<%{S?OO_tV`+
zTD|47SnoZ9BTN$yw$w8*pSn(e*FAnOB>++xhS74PxHp@lQ<NlB7yvgKUrj_bZpP8a
z{NrxL0y3W2EO#WqOT%cHRsKeEbab}z6RG@CUpbG*c3=7Ee3JAsM@o{PVS^!DihbVb
z3(3}VrH!7+qTk$u9_5iA+s(x&1WZ;~fhh835Dn+33N!G@@d-NhTWOk2J|-RYI(wh)
z`_Tc~QXqfD0~{WSf16RS9J8GaiD|3yJ_6J9vHXC@&0z+=9`}tB`F=BsBonuDg03Es
zFy5{Uy%tFqNh-7N#Txe26Ct6IgTV^qrvZfE>g`R+hZiD89bzrvwC!7mg_Ok6HIYdT
zCD8}{=N4BA|0qEpX5dJATv%$t0?Dr8Xi*A%l>cmVrQ02@ibxzpsh6s@pv8@&9hE?B
zWGV(_qa61mH`*|cLeUb)8|SBJI-huEM%;j`pLiuv10RRmNOzWDVIi>wk>J{J&gy6Y
z`p_c;1OyNLLk`z7rk_1)&a_S^>~|RpmWxwUb?T4yO^@Z($U7PQ+g~Fi?pCJMkzl2T
zsVpX1qs0wKYFx#7japUyRt6FG++aXggzBJwm~2z8e^R!`q=bM5xqV3Ep|qOMT1K@B
z`9PcW#&1<?ycTlT{e^Vb?t^@WXlnUT(M8t^*^G>!uSG4j%^%mj_!6M<z>f`(h&{$s
zVH2%{MsE)1vBWp}8X#C4TuRKK;{9ZaCraD*%U}&xJXtHza4sSyWsg!`(dKktkA_wH
zw_MCyiC=~*zZ4gnvvk86JU{h%@%qa^gOv$0gZgwI3FAr2d{v*ztI9jg{Loe2k2SDd
zPSLm#6?OEYNItkeg1@MdJqvkB-=G-dLoduT2l*4!9y46_QQxQrw?`AGQa`@Ygj(YX
zb+`pwj0eO<p*Jn~LzP~Un~FaJwjtEpZ_)!$HS7Dd?di&C!T6^IkjL&1v?Z1BB5AZR
z9eW}D8#McDEJtYHLt^C6#wG*ZUWkMHVVH<N_7sTvwDzdY9juW6%g}n0%L8_isfvnO
zLiCL#^ppSlB{X-@+^OX5^*NK3{7*gjz}ilpJu_#VJ#S&BMb<fra_5lITx{jg{nNGd
zIA#}(@GT)SQE#H+e#>}&7DFJ_ddegvJi63Fp;5f9B|Zd9?~PGFGTV4bSn&-@aTt=P
z10rKC_u#9EA(8j<aEm^i!Nc(xjxan_)fxU**~k4d+*F;y{#qY?8rbyVC>lhiLM;Y<
ztgnTe%6Q5GjAe|v{3vr5dC)TZb3h1w<-sn?1Jg)d5B6|7*xerNuY|&<wH=THd%PV?
z##k1I|Bt;lfw!tI_y5;k&spp2vzZQuSw#=CAaWeC9DuCuEoD&a=K!c7DwCoR98eKK
z4%lQCs15g4pbg$`f!3|808LX1oU%c%*$U0Fx`t+T?=AoD&-4ARv-de1P;Z0(-~aWO
zI(zT6zQgmKp6@e16?<r<#&|b35aSjKVccKzXW#Z`4SL2PKj{tnAb%8I$nS?2|LQN4
z*#*d-;#KJvm`8<oH}Gx{p|3S#zf-LfLkB$P$sRqqF+BONtMugEcw}~ba&66%pL&^_
z^hC=+-%sH6Oj<$B{LOlNPH60DFGyVUEO~!;@+15l<xBJ_o|Uf1DrXVF9f#KP5#jaS
zUf~{9xRD9?5AeY~IGm^1a2zSOWUm3MjX|`}<%<##Wmj+RN^ZtOn_#ylXXTg(95%2i
zCp^8*?`D_LA>tXWUpKJ#V9I&fi`(!>3JMo+Q{ykXJZMvr71`)n+2JDIVdcm=BMl9C
z@La1y3<mwWBwYT6dS7nlZ$lek7bBLy9k9>gS$PdBAkkE>{OvR>fj4OxdqW;Hpz~<S
z>@+ava&Q~rc-0!u`AFVG5qz!oQN+0qR7uTH@I6p!+1pz8-QD;8%H9AcMY-y?L0*1P
zYh!EV0fNf|CZ5bqQ+|Z@ygi+cgA=Mt&vbs6Y)a5qjT^J(i9|GPgZ?=SPm1|4w00Dt
zUJyWg8a-up_bbkxU7;iZC)oQ;X8iT`JI{`NqGra{^Nh|lJ@$K^>-}=HFY3D}5K!}*
z0fr>IPL$ygEMH|-=E)V+q92W^0y#%&k1Px;hypE>zKvGKV;S9~nuO=}_fgM4LLcva
zfMT5ZUcYSxgg0mDc+Sap3<6X0!x{MW3J_U*{un-n{+vU+RxsF~mDE9}g(p}0lht+c
z33YFbb%l$bQOSsl-V>ke-2PtAb#5vDkrmLi0^k?wD4wl9U8#=VSu{&WPxzCKdh$U!
zF0U=a^^m;{TY?GG+2dS_>oBuE8vzgMlR2!HBpZs){M!5)Umn9hMB_gt{5nnp@cDTB
zO5w(S_VGHjM?*JegUF+)_~I~B8?s^Wj#8kFggnvhZ22<I_|e(%m4Rx2NQ&SuyS)w^
z|8UqA&vTlCKDabtf)n4#+Ux-2+Mwt@pbDbusNX{EW4+{*oK@~=;j*vTvjle}A7o`o
z*66WnfDE?!y>*i&BWPU4KKk&LtZ=;@t@KLW>9{S+pin>oy(ecSJma?^w3*6I=q`ip
zq4t=945ys54Fn-1Ac`L5_A?*yKpx^gX1mF5&RXE@p-V)QLHh4^6OVP^Yk6L88#G3|
zyq<KMsQz}B*RI$-G&#V|A($THDItkEmPk5;?$oRjSGi>dSPrg%0r~sM9;Yf<4Lt>K
zf)xd_OY6Qnd%g7+c_}d{AvA10rm)s48e3Q90kDX=#I|3F3~2I80QB|tb0ScO29XJQ
z3msrC-5OTFmK1u>w@F#iv(m>~zs8fg3uBNRh-B}@KDow|z&qBozu<D~r)i$5RP34M
znxcBFa=pC$Cy`O>;ADTyJ*^zK9`clf`lUL286d?EW;5`!k5Eb9Ecv54A?FE*yWSB~
zlQdzOt<7+^eJ;>@<@>{aMcD{w>8C^gQp(m>9?A|>sGf`s+6bOka1Wxt6M;?b+8xB<
z-e0qhB;*L-4I3eZ0;1>#2DHjGzIe!jVZr2h-r}5@$a@zE!eF`~P~bCl|7f-Hts$<A
z>{|4jirx$YI{n;?^?gvImjE{@ALT<MI!sIOV|zci7Qe3vl(mz*pJWQ57YSQ5M9rKQ
zQS=pCnA~;1G&$?_mTZWe2wtc6#9%;3N#XpYZ~(svE80IBxivd<LN?~?Z1@@$r^!|m
zvPfs!k)`LWb8`A<CQhpf3VF(1QV+!cbv(xgd4ryh$nAG{G)VvF0J4)e;NP$=z&0n~
zTqSs<6F=#8?Nz%0%TuuN31#tZwANvc$8qg<BTk3Ia8ovh_sRW}vk@m{c51kx?J()+
zLqn$)OLI%Ahl+AHoGBiWU!M(rF8gP1yb$3MEQ3_`6?@MLq<VcYvQ%g#voDi5BDHO0
zT|Fhc_p(i%L60y)cR~S3j*b-V4(f5i{g>pwgTMfOUb*6#E04U-Z`>dbV++r~_MQFv
zoIc!9dcSS}|NED>!daxrd;anK#i9kXTIVcWd<hK3CG%UC%v`i^$^3$o5Rd)Y*GInK
z<9J0XB$Ji#kAx$tuNTBWe+3|}fv{_SruuY$98i}ch}sIBgbl?+xa}6Z$Hwf@_Unn<
z!Mg1s@yN5BohG}L2v!{roiGE|5%GM1hxyI+x)(naM;7U@{+VsVabMfpzL4(ZQ)j6Z
z)PA{#9+Lq^f^KpE={18A6hb-T8v8*wN$85z)ygdUsG2Iz)35SaeTNzHA4dnvtjZID
zRZr}L9M%29i+bV*f;An6!mu6A52xFyAK-k0T2erJP^`mQe*a$mylWr*Fw7SFCEitO
z)(kK*pg5f0sM)|jBF^dbu3~cDRnw`EIi2pgYT9#Sv*T_pdj28t4TyWjLC1bY=22F;
zK1?}cAW3Kv=8dMYW|<0Ro)}QU^PvdFaz3YP%2Tr5Sg=5E{C@8@#3|I9=i-|f+DBS$
zZgmLT{ALlfZ$<DzcctLTjJcE>ROks2<e(WgDx{&%eIm1nCc773;4T(I*+m^OOQuWA
z2{#6XfKo-67~xIJwPirN%h#2smJhp$UG&`&R2L7!`uN(DSpY9^!i7%pZBlwR47J>X
z@7K006MOduwsH^9WY8eM996iJr4Y~M?dPnI^#q|e0TI-vRsfk%K<a_&E7`*F*)Z~s
zSVfKlU}ix9Xbl2Uwz4uQicJs!Ov5PUXN00zevucR7FulAJ=11<4yo9Ptj^?w?Sr!Z
zbP>TFk{$R!R@s&vIM!z+36q3CPIHtT9{M7wc=`COGLIe!I<FT*TOIDQrK1=T94F68
z`KJZZ9)Eob;_BZpbH13tnT*AZXReN@1iJe)GS10=QZ638oAk89;UkU*&FkX%J>p6*
zDbl7SpU-~nlXX8m+<-U6b2?1^kj{A7O*qne{AcVz-{@D|J3ukbYY^RGQvj_@7*nE|
zu8aZCDM6qiBvyUDlqrv=FH!(;2m8H0d#$m8Aidm%NU@F5K6`|j+~7m6+o1x2-R?mw
z%6m;n9jQ$UL(xsy@jhV59))j+34A0UCGP14j9$EWVMV~=cw6}mY~E=|UUe=@`5&0l
z`|x_*drIvKz|mbX>G0PNSgebQ{IWD1W3k*eB(|g0U_DL1abXa|DK~QL2Qlsuld{7n
zWygqI<-q-B#iX<cw^rSY#IA-WefhDg@A9kghF9P!TuffIIeW?dI-9TEH8tMhe?YdU
z!vG<A&H6w&u0axdt|RO8`0$Roa~w#r6QV44-`EnC319G?LC^Eg*>L0_*M!H2gtUQ@
z3BK9F)pl~`I+~bm;e>FSp@AFLhFjr!Rq$vRC0A6S<M1b6@z+KKDE-8$YJE0nwp*x`
z;Qyp(Ns(0e9hb{+Mq4{Ql|Sfs^cCd7N0NUl8#<Lep%|dA<ys&HnwJ5!`H4P0NZ<2y
zs-r)3VVAN^u7GM(6I~Ti+Z5^DmL0WTWx@b9u<6Ugv+*4sg*|Ue*u)wNtRsIzSELpS
z?evegKT(!Z3=wluk3BR<ONX{3%tG9KiKm0ELwOw<U+b+1dhWnw?H$R`+wVv?#G~|G
z0P`ukg}AD}TglzJ5o-7Acq$Ot+m42*QW2!K(h*1&m)fb*@F;$DQPuT(AQ>2F-C_Yp
z-sVLyKTy^(PTm&Tt!TgaYr6;g6u50q77rNTMepYjpp)6+4~>{F=+Wpdv8O;^K2j8c
z(kc`KrDC@Nh(lE}KLUt9vkKW(qv3R-iB$-^IJVUo9Ct5IsDc3h2?!YN_y`F?3a1tZ
zI7y-LhHRp~=EezwoWOKD9eUfPzGMMHq3<Rkuw`mzL|wXVCqACILeUcTJxwSxY4~%8
zE$G?@UUU2UCauc}N*)lr%`9)>4ddf)noqC!ZVx7|N}*D?u-}OyDx~-#vw+wTN{Hr6
z&*`gJ;ia2+f)6J^RY*&UPdD<N-ZWTRpB=oWINpbffg~>;$hV=qEIabn?A2>fbP!Ys
z-D5v;?jipBK{j?x*8c<yqPf2EwIrZA6SdM>#x@(@>he-dsRp`KW-dMGxA$V*`9(**
zF7(HB*4_)}_0>!11_OrB4HhCC#!=g{v4o*hPxy<|lU<P!R!lAE{vh{NxRQ|t*_${Z
z_|t4flzBWx*3eu;PN!gI7x`IeXrCt=DltPbE7&!b2kG~zeZ$?RxpEQ!BDj1Gw*-Md
zwysRil914`p?%%+fDqfce>$!Y#>tZYlL+if1j!-&rtQGqP6g9k;!l^><aWEqa2{a~
z@-m>kphP3|pI823O^oLU5gRyOX-{gquDm=FFi&0;wbCr&w(^>?c4vv~w%g)CpGK71
zdPLUjv0Y@7x8BhX;zw>_gg9(Kv{suDBok|-WkhVSHai?^5n;hBS|OFn=Q!{wL>-+#
z;6xOZ+YHd^`Uy4?^F(z&Hk4!aydcXQ&llx}tALFP_IIhIT08&;&6v|tQ{R!)^8kJ(
zbkTZ*5)?}9&?t*&iJ0IFM0U!7UT>%G(KjMjPiE6#@|D}^Z7tL>J1pch0w%$d6v1yU
z^6-B;V0z2}Yt`AQ_hVJ($7=8I;kZ$Uk|35o@wB1aDwWR7IdjQxUWq$=;RTn>TC`+l
zytO;~5Y=*|FDasGZG|ceTjhy6QwQtLj?}>s7&cYxV)%?5StH@%!{0TES<&12z}on<
zp&{Ti$$uZDq7`e6fiXkgktVtF2b<Bl<Kr?f2Gz1Bdy;>lahJD?9E|3Db6JD#Zukzk
zwc7;1`CS8W&^`eGVy7Q!000@&XqNyyu`dAnI{*d?IA{iDc6I6RP)sQ8N*2>de487r
z-$rQhdSbL887531$w%p@WQ&4d06J1^@LagYeK=2H@2U;-$KR0M%Au^sMg$fyxMP)U
z)`lqV>n$E)JfXp?n<ALh+d{B&>;|I#3dHF2h9xiOvHYfpKMuv<28;v4d3wkG>aA;l
zX=$)z7ISuH2aEHdM>*o5@$gnIlxk<cP#^T+E`aN=CTA1vrt(?_fQtcYfZ#L&*0ts~
z`mzgT8-%d|;Zcb3Hb^!NZ6H`(q$O(&cXV<$URuk!Y<#z+!+Qs%#yf&N<qS3#PA^md
z<Mf#1v2}HQ5Ai4D!Pu1z*$f7mmO<1U%U`gcHr^U{mMYmigt-70ZhIx<@LtL<gicOH
z2LMlc7>&LBMj2GNyYQG1&tXGQ_uxg>P(hPZUcVl7j&qs)&@?)d<zRg^h<-oCQvU(_
zw)R6pEXZA$Ar8s<%KFIGLMEvMk%g=ml#jf<{G3e)a~BBrItCWebnSuLTI}~;_I9qJ
zzV_d=Guo#d39=tSDr!n0ZvTu)qU@(KY*+jdDjj|%I=L!Ag7{&4rg(n@!nVCj+2^&x
zzos4jT@F%7wrS032s=I^o=N@DJ_vkU)bxG_!Gr*T0@|o3VT%}uH}^b)@iBl#V*IYv
z5Mlk5!~st_mCm6>1Q{s=*mcX4VFA%Zk*&(QbIDOu=a~B;?Ik8rfq3>4feaF)hDlU3
zoe+VH^P>o47K~{+g#zENZ1`iTZl7yj!P}?Sf<$uP<_qJkfXkC6xUMqUC)0^@QRHI5
zou)y4q5|pIh>$AM_cdcdm5qHvHbN;0$Q@EH-^7ZLpM%okAj-<U!RPDvJjff47ESCl
zJ@1ZwO{qTBvcg|G145Wexxc}GAWu@pp5i^eQIx|4#*4oRp44^l2(bLYW#RQn)Ws__
z>L*KE@s9Osdam>{QG&Jf<vtH|7y6!V?EtM2IOU}RzV@r^2#8XsBmu5P+89`+kFo=?
z(9chUEDmuZm0>>UnSv=*X*L%JzB_7`bY5zgq4>`|D1lKDLx}Yj!#g2t`s+gVo5Qc;
z=zy21ogb+@&%HOIY7D5~+nV7MmD-uR%lDUelutp7I2EpPob}ki1O9nnr~Pi>yDHkk
z|MZU!b-$!_)&&<|ID65;*1ZHFwWLsI+Nq{mr;O;;H|Nh<GP`rV+BUl)uNKW-yoBWG
zvoCF(xzC*IU8){B(`Dz)izo1VNNuE39m^ULoi@ogx?=1ke&UjwU=uK?(wED-VsDYO
zYpRV4kqSyEue9WGUkk5`UJvA3`PZ;WE{fD;m{t^7+r=)nd6io_j<pGaWi{f_g-W-D
z?OKW1rOP)!2VmI)V)bKoL)ef{X8o3N8TPb)A&_xhHpCPOURPsDS@Qgb!u$*V0s3h&
z1;jrPA&NnYtHhbT8v_%Ql_E{_dKxHFV_an*txEB-w-dGVVJLFg3YrO|(E$jD)n54?
zZ$NtgFkZV<bP`};{ek(P*+Mr<=*zj9Ky6u0l<XKU!Pms5E5g}88%>f=g<|hT;xkz*
z%A2X484u+zXNK(2#P55rbq=gMnN_@-TUM!w1Gsh3Ofqo#4Md{`?LgYMh(igf5IUsl
z+g&c)i#}X;abq?VUlZ(`hzpIA;#Y`(JQhTO@dAOcx=qhUu16BjH!L9Nk*K*Vz_sPG
z;>-qn%98D-lQMG6AE#0&4ff7HeifqKa)jdj(#+`b?JO47mjSlnAvy(Cka2l!4w+rO
zw(KJLQnI<zu%5n;sLsBsaHngb`F3V~wGG1x+DESnA`qEjTmBx2X6ZQcjC%;bT=3u@
zL<Izxw42)ZQm7nMxvK~YdJ2hlM^nm|5p53vf2ohXP^!5r+lR>>QzpsP7K}ZDZY{MP
zEA-6O_x5G3dY4ej=rr=q%8<n!qrqT7K(IN%)L`GlM5qs{c8yKuGa}Pc+rC0?9oO0&
zCV+d{t;}?IOPmmAK4wtHH>+(Y&|=Xv<knYEWep($CGpegUTY1V^$21puRmJVNG3L|
zyr?F(;wACq|18>K2HOI8*$8B}dN{;R=K^km8V*M!rf<iAq^FX}Ov_v5&W!-UE=X>Y
zp!n&zw0JkBi*k{d|1aH>m_c+`PKah3w$69&8*rWB#O~l(kni=Aa$us(!;0)O#Oj)=
z*XnrcI!_1$33$ixRWTGMP728+Az<|~CmW9(%(X0Oz9f!7fu2Rw!I}4?uJ17Xb2U7O
z{nHTvy_CZ$yKNm-SwMutdQEXyG4UY2bpTrOL!d_0aBort`YVY4UJ}f+yMnnnObn~+
zqQTsbktnEQcaT)04lAI0IVgRwDP%w>rL_FWPW{*9T|jk=9*}4NsSI&j!ig5~^n%a(
z^v3W(eA^y9Bk_$|P(3)UliO&dpyc-&ULv$1{2N>SjTG)JE9Qn=(J?V99|S7O{b<4q
zt#ewQOhvxFpcD^$-xxmZE$n~R`=#`&`d<yz7-%0Gg<VYptT9nFCM)pjfb;lHvM~s7
z5(+Ut{opqb*J0F}6R7i5(;S4;0|+dgL_uNG`<t)@-olOnBB5&?BQrGd0S_T)=K+^n
zUcR3%rSgvO)4d<+%8;|?y)7Oo;p*-cD8ruVsDX+A5m(awW7T+2S3peb^GyUo*Is2a
zKAgCKqFVBEt0q-TMp}PnXeZey*}ZPFbuJ#UFNetn`Jtox;;Qn7YE2qc4`73t;SeWl
zmU^<j-C5tW<I(-ts<xU6Bm<!W7v7I>mI-z7iR4j|zc=f5POO82LWep+?bP{6O`X8g
z&5IQ_bf|!XNk^#GM-tTj$e;#)DPC8-!=0Yut-3oeZ;1c4gc+~jKmXKG$yVQ)e<~vW
zcZ%SwWo&MZ%Xl5j=!mbfdAchTpmH?O5(rV&df=G8*fc^-voMPEfmjrM)C5$${-75$
zGm0iPK|<>}<*g3*V^FVYvicv-hB?XEx_H(Fg{O>Iu)B)E!h-dv4CMNh3CNwn{L;VQ
z0Xee^J}5{_#3%X6N&*65gmjlt4#0>X**Q^wG}c-19vvYcVLv<&fnI58hGeSuQnz6&
zeIR?4%?Ol0Y9l$$;rc^H*8~0yPZwtPAmORALwUOdAyPowcK$eZM1e?n4NpfGUBL%x
zZPsTauLm2EOf{g&+R*3y3&-8F1HeV_kTHY8`r;QX+6xQ2-nIx-c@WZKVNL>33+uAc
zdto%E2SDJIReo;T08{^FcY=D?E4j8a7fD}BnAITTb@QraaYG+0w3Lp)?*h?FJ=ETS
za<g61dY9txigLG4M`u9DsMg5JmEW0RwVh3XzGx<lPNVf<rMf{OO88c9rw~iS2(jmn
zoM6rEB&q0VE?Gl?Aqd)qeEOjpBJ3FGQ}W9O8$K=@h|t8lwYR`?OI(!LtI9jGSDyyU
zjwwiMc$?cBp-PVS*Cfp@8(ChwC#rilL}y}KwYM(mLCG4mLz40Zv4@KZZtpD&W5?R;
zHal?KYu^^%or_Aq-w~6Bz)hQuiqBi=R+<Rnq_rV%v+lt{5vl7XnCl|ir}QP0j)|q;
zwkfvK%!aYW#Mz=LYL|%m^m>U?!%#0J+p@Qb2MLqb#yzzRnGyFS^EpB@rmzB{rAl^J
zl$41|B^bCr8xK%T6hBstfN`4baCNnTOi$sS`Wcl11B3>1L+q8|>-Hk40X~7$4m6(4
zkhE`9Tzq>o`X7JsZr0~mTCp=&$pAJDw7<Hi9jq?`mhXT2PJ*<${wL36$xJ0aLK^mH
zz@03+AVqS)k<M^>a45?k3(fHx%j*V&ZoDmFor~E@Gqz`5DBtN4fj7C<<k45)P%T}w
zXz}bNOXe@Ubno!TwVZTS@9|=c`;yuwT_m6b`plK_fRd8CzH-t8(OY&8JVn!EY!Nq*
zVbg7cAMkkz`EgMTgRGg`G=DTA+`ia6Vbaq={YJYdSqHKdb6WNk1cTXYIs$||KfX;R
zv@E$zkcrMTA`ntgE1cSAS55O*p*ZqN7gi@q`ZLZx0#qSX6tu<@Ky=vmvPra@<?=~*
zIN1q{-;@44QB>HFgLEKMNO8x@c{&DTYlUTM$luHeL|oF;;cfJ<tyHqpw|iQq#Fk4@
zjKidq(uYJx+X}{XmFL3d!O5KMJ4XV4r{hkLnYZsYv^3K>C1~}yu6*OO=i`<AdwAQz
zlKSUw5BJi$vZJwUi#*31jzPdS2ZS?aHfK_pIwLqlkP^a`I`dgRAv_2MU=}=Wbunq|
z*y;7~sRY}&;JCzn#Ml?3$7XT;6`9q$UE~z)zYx}5ZvVfAgzpLLhcWX<QEuRz426NN
z*@j(QMLuDgC|OpsBgDZ57yiz?H3Vo!&iN+h6V6$lW5Zxi;m@<id8j*1;^>{2z1rRe
zNzH`2cPl-l_FK62jw~|%SA|vF99&?a4UoZxb-oW<3`dt7YGpUVDb!z}8TQC-fg4Rg
zcYbv$K_6@H7#xEt->F0Gw_<u(AMn&0G{{MrBE7T)TaNss_FH#on7o@Z&s99JJ;2-M
z2*PAqfhFAs2-jz&2iR20_6UFq!*n&h-9Ty#>}=%)#M7Up;^V^DHj+WRfE7ks&q>@Z
z5p^0pg3{3gLbV^J@=^>#{A5kFd&vXAiw2U2sVyfg?Q5y_y_47WRriI{^Lo;X7XlGh
zmzz?f?IbzT>Z*V1C>tSzm+VO}rOa4^zT!JuC<e{)Sw`29Xn7#Qk=kA91`3tSxA=IK
zx&mU`r6M8Wk@A*4v*zzg7h{^-kgi+XsgL?jw6~12UMB5buF~Gu)QR@K?-jU8uDIm<
zg$w2{Z>P(uS4P(t$&zY?PLksV2!5+BwMSeNCk)-F5h%6K)(p?+WNk<bJ@qir<u+xk
ze@s4nde-L&e~)=Cx2%*O^37HvX@oi>@lH90Q^Q_a2s^VCZ>C0Mza(>GZ<I)-t+SBG
z%E%vB-*KM5Ia$@QRzJuk@jVHs&8XFA7lEd_1f`yYF&TLpJ&q^=XM?cVGg?oq!km;Z
z4acw|CPE3af;3`d)`NIw0j#>i!X20%Hh^ZZu)e|hZl{?*o^2@P-qC)7*O}j`2jE*+
z*qKGkWAez+g@Lk)lj8E?d;yn+u$7-$l78UC<9Kl&M^i2>DKvI0egfpJ<2mKV?q_@G
z6`=sl1@VJ@&_bdN3>=Ye%GHVJU0FQ6=%IcMQbe#Fx+g~&#RFD`$tCEeVEHG`>lj*h
zeu%8h^w_`h4XX6A|Hgf`AS4V!EX1XRbXh~;9(JLmM%^JfH&L`N&S;+LpgWnqWu7$J
zz0**ls*uuuYYJZ8Lxo{fzE-cUPE`G`eJIeG;gbCYN56xf;HKQ<4<zS%y<R{@^C6;h
z1UGI%PVahj8$INGgBXOHwA7N3@LVqFJ-A`(i-#dip%Bq|@9u<%N}UjqvX;M0L{uXL
zcmWiYCZBsH5fRQf=bw*Ot5zaV#Y$EDGt=40OJ^;pBAw*gI60rk1Uw#EcftF8_+`Ch
zLD(lX>A5wVCl(dP%h|s4S2{U;-MQ>nfFWpIuw>@k+34IZys+S}t26duq<`)E&icOE
zJ3tJ<X~lePM>szam&z2P`Wz>b0zEQ9*R^QsfSxc^-VB+4K3+kX33%ygPNeuLsYte{
z!cIoy;=wr5CJlF`P%D+JFQ{IJ#t7s#Q1&F<32xO7gUSB+M`ay&X5orgvS&zM>}|a#
z>!8fRTHkvT0pz0R+}?AQeB|oj9InGPc`P#Gl2Fj|U^F2H2jU7(D*Gb`dw~d+FvbDv
z0@(b;zT(*+^G9<(B^$gtdo>=MoMm~t_Evzxo_2mK?ZvtbEC`4KCxiBlj>2HOKO(1_
z?lu^UNV#~{2bY61zbz2*AN9uGP@TPr5ez-o{N(of0>;cHMzMl|GD`8^FJJ2Q??Bus
zXyOHMZxX;+=QfI^{IQ}j?3p*z7Tl2pjxOje7&ht@5KUOP`QBVQB`_5jk1^`cb?oSo
z*pcsn0PSZXAZxq$PJ0(Ys{1^$y=hrrcE=<XXAlFL^%*P~rp6*=?L$(B)sd8n{KnNQ
z=7+js239sY6dxxuR<Ue9PU3_`sWIE_K!_5pB9X%c6QoApN~tV^$OfUohyU?tuyx*I
zlGo2%JbNzG)Z#1lQtDTUtm*;pQlZ|tp?04B@%Kd>*$)Do%cuS$?n`q>n=*LS=S!Ee
z`zB?h$cTsYt_ZMgggps!nBE%ZDIL)dw^po-Fm*%(Hzhl0V^*31U4@QH)5+~6Eg3bt
zK?JSgYu)*AhJO@G^d&l@ifn8zlKv}4+m|6dbfDjW-C^MHbLF(lZ>WK$M7y2hzi7Az
zHXL}LP0Si8r#G(+Z|qbkMZ-rpu_noWD)^#Lm@r}T`lZXVam%t3xK~l{V);~`FJhC3
z$bjrvnsxsSf7vZ1lYx8kJt3RPoX^beBc&1(<(b=^aMOV*jK4_WtugaxP!vV6SKCVo
zljy8*I=}9^HgYw~XnmmD9B@w6a>d@z<{jNliar5wA}G)M-{p^w6-jomJ?lo*huGDr
zXrKe^<5-jRmjExg+%qG1;ca$Ek0qNg`Nag75;1rh2qGMcc1-elS@)$`wjNTCiE`Ls
z&DQVO5ped_fEz9gzmCJqGaC?}Z43ezIk?@riY$2a2sUO75EQZoLS~u$DiIrBvI_%?
zo7z%kaxHI+4D~WZf7wTW=pEyjnU(Jji>|D;;(D=^Turc-5^~I|Nxi$@aR#x>6F5!z
zzF~r-Qp3RgE35X<g0GA1Kyf3Ns7Mnb0@#sdGs4UBeLyk~a)Umi{rCi(CK`;@4+E-b
zi9%Z>e^(Os&=gv<U6C3`5+FzQlR7pxKA7p{clf+igq*QsWogSt1~5N?&q#aQKSo7W
zZ5+e@C%SH&eWo=l<oln<l1D;oH<Eg^02EwDV{;;;$Q%%c3_s6BrV8bN8zSEJgRb^f
zARYtO7awE5PdK0#X}s~KBeSVyB<N2{Xq%)SOGsIZxTyW`D<ySCV&<Y=iYUIhysXGM
zpB~m<lR3mo&C>l9CbL<S8N=>#WO`y=DS?1aQrM?+!$>cPoj!!MW7nM~XlZn$p7k!y
zs9up{BmB>X@oWv$Hzq+BW%a~+#>o!@2B9e6zXSOr!nW;P-y0wS{>$5%QXSKw<#BkB
zgpKxUv^s1}WHJ9r8tPt31ljPJtp2Tjq<RocgagLOREAH~6}j@pA7a130TNp~>cz|V
z-x-i!bsy~d7>703#rAeoX<xv<vYVaCxsumXUz{(Pkd2T*Y7;(YJ)?O^u!M8-hmz2U
zU`cIb@Yf))JJd%Q<69)YEatS9?C3nFE=iO@BkZ29A9<@c1@}{wT|T|I_io9KyjN2z
zfzRtjSE+1j$7@zq%kEt1zCo#rjv+9lORiC*-CvS>mhN?Jw%f5PC^qQ>3Bm#jCvlZG
z2GMEzFW<pW1lTrXjjW=~B@Q<%G*L8k$@K8a3=@7c)94faWc%Lu5drlKoF}3*!2OM6
zdwB}~9QN|$04sm-#rSv}2!ZF3kcG+Yg(8YG(Bi;<(t+<|KLl{^a%tyN7S?+cr_66X
zul2m+)zJI*l07<KA!D$dI#2sl%vg9Abol|1k-;tnezACSH*YYc3!J1of^LX}z}I+l
zaP6B#MV)(iGqUzgV^K%hgF3vqx~Ov>Zw{=jv#xlfoHnE4oBdi-e%_^F`9I#8GO#;h
zKrg_iaxda*f1Fa6L|AQwuFNHCG~b<sX`LCb=3?RxP(}~Y^7VG8a*1Q?f_a6zMz6}6
zId)(JyeD$Q0YC;Lim6kwHz9z5Iva^19mnA@sBR$dCSzBJQ4W!u+d_L=LVLvPscla}
zmTG$nRTwtmX;G+CKGiKUk#`ZAYG7p<g*V}-c-#Uh2{3ITWZ40_`<xoFxdqQBc~%0K
z1j~oE&{5KpfmE_(+>!XJ1_nsfWV6cdC`4*$G`3F1^v4gnPg&MZaG|>LhuYx~u67Wr
z;LOhBuU|DAgY2D~Ti!W?mt{l2Roz-Khd_0Kh1OV_2jdKPCWwAoczv)UR-T*<|36vd
zifq_~>~Oq(FUTg`>G5$&4|>C%{VIQ&X`JSn6-xOZgPI55NmN8OXV2fEf3Cp61m3IZ
zv^+8}UbkTkNxCb>tuVV#qhw|2fyT3eaFCN2thD5L`(Q`}bd1I`#+Kuz%7{^Rkuk7<
z*ws~Qvf)dz!BacV7s(=gzMc=3UTb}tNI`48x!+DG&rlTeK0C3r!aA0aM~&+}-jxDG
zv=YhANMPD1aiiJd@f2z#ueW|3Bh#$a`gR&7;)A4p?Xfm%nDs2gnipFrQ7KUBc1Ok3
z4~HfE-v?q6F@}aERx3%LCpC`2j_CJe0sFqrg+KLJ1j6oxj4OW^Cx>qh2iYgbz7l9_
z+3dx$=FZ+H;;O|aRlf1Xuv2*7StL|jtMj`qkziF~*SUCS^d|{Z@`-pWeNiGhxx{^S
zm1}}o_I}nvd<}6e`D(p`m2%07-9sQQH{1$NvWcunIOzk=&H5t4&&5WtyT7!Gy`z1B
zF{0bdVf~jSmv~^GfW7J~OK;A)zW^E>VDBVlJ0ZdnOfPap>BE5VnpNW<U`nKZvei(F
z-BA{Y7cZ=ArHp6meP?M?b}apmMO5DeZaE~sT*;m7%xu7IS$|x40AH3B4hYe)v}Lbn
zgTe^o$ROw>1rs|Mg*tbvly<Jg?!)KA0ei|Fb9>`IJ_+G}H2KcYC~81`k(WBukT7ik
z?&U(i?G;oIe|3MTX8+EG0*R9!7I>@oO9jL<XDC;C;y=F3eVqAnYu5LU>~%;JzLE94
zMia#oiNGZkXJ!a&nLMe{C;(|2!x4*6vfCldpnWxo<R8)vAhuX~hjYR3YxXmtPJ`QS
zf*{A4wvX(8M&daB-#^bsG8<;iqyW(rF@@S{Nshn#trCX7m%q%puUCGqK??1ybHLvD
zz)D#^*V})x1DWK3XFC0j1jxC=%^_$7h7E%Yd!A*gXV=v@h5s>ZJ72gjl7_zP1$FlL
z|6kF4tf&nLt}f|?1(y+6q+JkQ^(+#)#9Z{DaAaS*7OR0AqU3kD49J?}dQ2$FT&VRp
zrO9Z)Ll8u5BNkt>CoHjs8N`_=H?*x>$>Ou1lsJ|fu#s=PA&@)wXNP|kT~{j+A&I9t
zH)vr`%Cc($dpn)guV?k614yv3mYOdh<KLsJ30&wUTS9c5@*ja}Qq0(x&LmhX-lT%T
z77*QEqpOf%ERIf5h0R`IKWTJ-_R*DWEu%};s^f}%mY~AXk(d-&bQ|S!hr~uDJSbyI
z>b#f-#rM7*C!V$@@`kJCgn3j?eeJe5j}OsPnl+<xb$Nq*9Ih_1_5@%T3c^;QYu}P`
z;1?rxnA?2`B5PL+qGy%<0P8k)BO#a1&sJ{t>06DUAr2zW+h3&P0@8Gr`n8XikgT=a
z#Eb#fFlFRskFkbi5#!wNbd2PCTN~m-lCxz!B{D4AUrLhAcek57E_}WHl*mgd@gFuZ
zmVwf(%e7mvE$x@A<dNDrz8OvqLZ%}E*v;S>Evppgn#F4H#~T8XichcMg)2qxez3FI
zH&PD2um&z9bi2=s>i-(FEborQt|{&GO|=u78b6$KGMFvK`A*HP-7mu_5`HqfR&K-B
zCG4mk1cu>>=3BdJAIkmOU4@iouU)kzY%0+e5YWG16}#FEJNB^~w7a|B4deH*8^$w^
zn|$*=OKd!CUPUa$<A`92frY$Gac*_?d)gYrzws#}l;=}*3*-LsD!4!&{||zV#e2aJ
z(vwYvNWcaGYdUZ7%((0B^a`Wz&q{>-n&&@yKj|4PKc>khkTC(g6%G48v1dAX3eN`E
zHq4)N5bjG!Vn>qMe4Jp<ZQWJeb_4~*Guqax43)wFN;6c#*Y!GL1O}hJ_{u8~`Pbim
z0QasER^{8%Xf60qSEFFf4v{NvxUmfa*2gr4fD5hhFdrtNT)84&!d~`CEZ!=1zAn!|
z&0KA=atr|dVMPE)8jbMw9R>w0aKJFw<DhzDCUSd*aMcCO*V}jkx2nuM@8JViiIUWL
zCir>^Sy>F|nH8b{!i#m-+Ivjvtb9)dvq=$mJy0^2LXzJYmabBEOEjO67QfnkMeQvh
zKY5}%%Za&T+!OUgyNf<4;ui<YTIXO0N9vcRKO015_l2LQWCI^oU<nrkmaoaWP0a?q
zH|sSOOYRw2uesU4>o@y<aFEz#5FMsz0`pABHnoNPV`o?n*j7hohWQD?3-F%aPjas)
z7H26kOs~Lr36RS58aA&2)Pw|I2`U2}<BQ=_RdVu<4q%=W*5-cUK6zT1r0_lm?KV8@
zxBh;gs5%C)vgE^2N^6?J*b^6y*y<a~8_Vc7c~&N}e4*h?ZGL%!jpY__%$pwCKQ4j1
zW*^XESM1|N^k%W`$JhxepwbpZf-phyq#H)qbakpcAdVMgS6i{02tK|y4^Pmn^=grv
z1;WlwKiKCKm|5`tfUxdw%qm;6BiFO(KSRVTPGZ=E-yxNzNDJaeDsCMi2M^_!<^LAv
zL85FwGgt$yP%kyY0|FSp^$)d0xKbH<;U1dop>~_e=0A85IXe&yh`+R=Ts;~)Osge|
z7zNx4@oExVr}hwDkA7gb>b3pH3PkeOyI6tL9yJ*>Boep{wGR=^e7`+=``9C`@c6r(
zyHIe!q4v2I@=Pee4{Xe1MnEX_5jdF_R+dGhjf2aK8ws7wt9+h5(@xb}8|8zh+&;Nt
z9KbKEg8kb7ww7?HtpgQQd+cJuUIc7kkdhs`A*lcU0@ZL%cCx5I#A!fPIx9%r3C~V6
zrnmaO_zWgl1y<Qv=cq>WXrK0I<l6HY$ZC>m@8VB~;8K6NGB9$F;0K(^nxGMv)191M
zezGM{59tfd3;!T+e>-8f1F_LR?D9_#PF|S&$90Ijd{g5mL|Hc#$EucC;~h4Zq*A`4
z#U)=u)O2<EB$j4yCH8?j?pxoyF;fE}ydZ>;XW=%mUIReH=6H3NbfJA_5DrbaXbg|Z
zAu<}MoBSegBwV|ouD4q-G#g1x&VLBLCN9l+C*slTEP+mU7YhwA@j(3Q%1s^N{)Ze0
zqNmj!h!57q1EJI3=5rLV_zQK>oF0JVhT3^01{z|?U+PNU%1Uag2_d&TUL~W08VV#;
zv2`mUWy?wf;wyB=l{(*{_xtKFLW(nGTdrhpHa_cSq%~_B&f4dFeZxWfG!4>(wn%Eb
zio-TNJa}JN_3O2#D3I8b%O!~kc<jUY)eV+Cx|S<Z-0hkp7K+>oUh=o=tLRDePWx#i
z`9l>Ag=nw|<gpZK!$LF~Z^-l4BPsWD^^@f~SD}NCWnsn<s{({BC`9HyNNEHG7jeJ$
zy9C7{qV9L<cE_o2-Hp@v4*MU(NY++WVfAEAQDOBVnu%1oJ#81$p@Z^G%<C&<QX1@I
z!nfB<`1^xwDoprqh*jewS<_*{MKXp7e*{{f80_b=X*d~ZP#90X$~IjEZ7>?U1N#mX
zT(`N3(*?yAjunb{FF83BB;rr!f>eRLI_P*Wu#fkI7|^sH7hq&j2Iuc=U+|CouNGWu
zSKK?*1#h+w5nnc}b(=4nD++tsy)}z{N9V;}2yTmu9nG*#*XFX#S9)BjeH^-C#c}QZ
z&$=y86PIeWH`<e0?GIH~yDzRDfI_U)38D2J;h>eplDL&K(FoRMO&~cOffx(1XCZ+j
zTJ_`>F6@tJH5|Psd?&cgn0+|J*BxZPL^X`-iB7Mk0Bw=D1wWs13=do7I@tOBZk5k2
zcgtxnW8dPMI7p~D!K7%ky%)d;t2M`dmF{7||CIzW5-)rJX7`zN63BX2HuhOcT++td
zb-Z<ya&h2Sci@%$XSSG#1Zbn?xWlB}*B%3N1Gq!WMdX640K*y_D2n@hP5bi{Hal9)
z+5Alb7!|XhA(3XZqsg$AzFO7-f5wz?v?sG^;`<yg^z*BPfDW;brmVeQwh=j-mk61$
z56|~1tqZJ4rDIsHlLJBPQq^-qcF8YiJx_I-w{(A?zc*%0JF<Z!a>WdX6uWk{qEQUd
zNO=(aG>B3%l6Z}nDB9%$tkE7{A&veq_Gtr29L+KGIdI}2vCH--yj?F8Ig`VN{Vbva
zJZQB(*r8xQ@eWi}zjX1#IGk6wZVC26dq;=#Sc*^7NG7BsaPXas17<Ku$0`DmKGDV;
z(b>Zgi$l|qefN!px4twi?8G2yv{&0VIh6X9daPW0s-?Y0is^^aw}U;G=_Yc7Za-bF
zInZ<IpK6~Yw{O1$-v@;;RRjgY=$#{w8NwH+;NFPY4$)VaN9T6kin7E_&Y;+@JD>e?
z+o^*<yyiA;59p8Tk<$1++cVNj=q;Y+^bBE(c={4&+Hl$<jJDZNSFr607W~OX1hYnL
z?}OPZFXa}f@cA_Py<E0*oA5)1?SuqP50=`ca>o#{ZV>i>W2uYGu;3E0NC|Ud&txGK
zdI`rm$|R%L@E`iSVeqnjh?4eL*87C)U?PyR)ID<2h=6UC-$V?^-OIb6dL}{E1~!0{
zBw_<b+3jKj-WJcP&R%2#KCdB$g(*SqSJ{BCtZ3tTfsUi7x?lr@Cxt?xzfb0P{sys)
zHu~M9!>ZLn-VLTvXc5O4m1tZdvY=vL!jz~3mDwe=7U0JEU(MqRwZhR@Jv}?1c6$dP
zJMk(7kl@D7WHR7uC1Gy21ERo7-xK-5(F0i^#G-A>3khoG`u>uA0+C7~G+np6lhD*Q
z)*--EoA%Qyn<I60OC1MZ-U{B?Eio<~Ufbk1YOp<u)yT5I@Hi}mp*Q?2{F>wp95LSz
zWRyv41lW~~cejwRF6^Xsc|)h2WIvEJjBI=3p74ESLkJE55QFTjon380;lcW_32lsO
zPAe)NiDacOH38R6O^5kl`YU?lI;2VnG2c%zD21Z$X?I+q6Dg6=xGqITH`R)az+dem
zBf8(8$Vd_z-#>eajLO(04HZ^-6&BA?Qm|jik!NmsKt4cPaJboVHL{~$D|=Wyk(5hk
zXBo2Zn>l>;6%u!yXxnW!p?G@OPp?qKkzU8Leg9WtC4yz_L#z~}N@uHe`+(vvg)yi0
zaZNPPCgq{R8_cpxmh2wzRT#zBU~aSW!not^HROa^vY|L|;WUaV<AZE7RDo=><P+J6
zg&^NEbAq)|10&jhIP6fU8RrzQN3o1Rkv@|A8m(@{-hV|9jr^LeNlHO~wk|&VUCpy=
zuMkgDZ`*avO+jvWx3aAAqi$T*uayaC0hnK-1xAMEjlBJ(RQO#{0R`u+SCn#bz=66$
z$0^+7BnCQ8Z3k(@93j^zISH+^x7Ik2BD>+2fJBpAqvX`HTHP@BFoW=on7^sL{2eQa
z6h@blzjs+fC2O_Gf9FCGz{3K|Q>*2SO!6N}(wFqKTZn>%ADdlEz=&pMhW(b?A0+l7
z;y;!t-(ll7*sY$+uc!U-auFP6@519Mc}nkZ;Jw*zmCNo%ezR}NcV@#dghz7KZulk_
zqQ@VJ1`EGNzN!hYFTw=uM^s<<;T@RUeIB4dixWeLA*Ppw1DuvxE8FtoAQW=(s^{w?
z-|EzNpPIgR2f}@2*1;9#S=y8bpbVpsopkEmUHs7Q$;A&CLX4Qc`vM`Cy~Y<_hnGZ>
z*pVTbMoR8=7iEZAs*MzyGu<IlsFZ3%3eAJbMyE)j&~xNn`gR=G-zU<I_i@^HI`y%)
zM=#bJT9?{rIt0UqQmcuk)5fuBkET=O(d&IA-|f^#k50~d!a#fVvHyrYJ*!^B-p5Uy
z`{>iDkGhxNN8f56hr2Ci<rOq)B0l({`IpY!r*YHEHc0B)8mcSLn%btT_M0+OI`PrC
zvagj<GGc!WGBN9~4IV9*vl34GK?M->Dmr;?kE{p~WyN5ta0Cg*7UbOGo7>v?X1iEO
zDd6z4pp7JCOher4Bm`kh5A)Z_ZR)DLmF8UFWTWBP#*m;>m&Fj?nLTx-h^bM0Pi(ZX
zVPIgU%Xdy<4pL#&>Fyt+mOy?kLkKF%Y4}de%?9J<L2z(ShcE*J`K1&dYY)1^N@$B{
zHJL4m=}gKW5->L8B?Hp7FF-^NbOuEFud#+je)~Yqg}b41OrABCQT|5bDxsTcisqln
znF)>EGzq<mX0}<`Eqh+Ar`Rs?pr<X2Hz2-5y{XM?Yi!9Al<99x?YaR8lT*6@!G+#J
z99aH#pL)!cn*EXYHA%tz)YE8%h-z4^o|f>lA;!VOBG+4ry<lF$xboFF4L5FzLhc+=
z+1?tor|z8OJ!1(^2EpCn*8|%%Pag|A<z#hF!-t<kQ;`tNKNMnD5{8&(n0dA?O%?Y4
z#jGIn4hZ}uhI*dvQE|5X<6Y-Gi1R+NMAwIY-v@l%_u>JU4ZNu4?t~?n0TqB7KT|Bh
z7(Nf0Y?uW;vhrnmj8bt>{y^3}J*=NIOG)`L%>AP9DnFAV*tg1oCn=vu&;wX@LU;Gc
zPjx%wMoe)AiG~ieewN>_g4_6$t>I7ZBrSxZ>oT~8ND*Y^rFvDuJ6~lHA2-t*u3o)S
zi91skwa9ilg1)dt%BN5i*9!hZ0dyB7sRsnyAIE3OhDpND`@D>E5nq#Z=s@R_OO+i#
zNF@95vaY>fW)K_%Yz$ptHss}RCaV{L98}6fxt_)Za-y2a(_$Q!-f}Fw0*3G4Egpoi
zR2f(_3p<!UDIkcSqMkQugv4NK$$D)D#WvYvM1$`lPh4$+!5pU?ROpxiYZ!Jpl0|zO
zzZQOVNC@}Il}Z@^<^L`BH><bLA&C@~T>3#=p$SpM2iuLn?Qj9a3JEbNUXEll3390(
z49R)f!fy$i7p!>tmruMiYmf`s0NcG%nAn%YzKk1?&r<nyZkMb~)0F{spc3QawOe5_
zG)W2aNfIo6coj@rgT00rKz1lN#HP2(C$Xn}7XuQpA)lkU4nX&*0XoF?+8gK{FCFLx
z!`%=(a5SNJ*+5DY-ixgz&#fM?{uq2U%wUIeM~wH-UrGWbqq6B+gEoG8Hh3;8&VuA8
z`guw1irw5IJ_R-bMQJ=w)-6U))sk<PypR}7OmRwryioWwIKLhha2v5?*$jxBAH)Kp
zgN&udzPtw+-UXn7o-6QFfBPnKWzR9J^9py2R7@9(!jV!+4;5WD&4!Y5(q%^d?aSEX
z^thf@f-L|wbmU!B>E|uCZ@AUS2-`}@qcsg=UgKzL@0F+(VkxchOCP(lejNB=-Vt-z
zFg+VLmpF~nFsfRg9Wnv`bIsg@P}qrXiuZGMq}L%5k8<K7ld2KpKLAi+euhCK;FC`i
z96ZUjN*H4~iUKY89(lmwN}f;%^zK+4zcr|K<x4+@WA%VLH^?1Q+$Iv%fM|jSpmaif
ziwR5q*`NbJpsRpcgGn;w-JQknaD!qK;_Pd{WW?qQ(m1TmAaJ<SX6@D)Ie@X(qpi`&
zz+^RMSHW$Ug$k_JA^Ok>IWg_ejkWHTx?DUn>^?7-uCI}*rG4$?BkJ*ppU2CkC78)D
zLdhe1pz)(`mORg}`LKS(`sQtKEI%f#AqX+@h04zCNNDuzpTI*-AW9Pmqe5k?1Ussn
zY&PqBRW^jXfvAem11`))EI2=$t@c2b2PkM(?}}YQ{w%cD+P|beb~Aa9U2X?qWu-pI
zcW9S%)v@n*TN<0C0w^=!xjRB@6`NJSBhnbz|BxDZW_E&Sc!cKyEVvkQCwPLvL1;l^
zJU0N>0&o|+6yQb{z{!<81`6rz5+U|resl4tqWS+LIJu9XT))g_#?f4_enHf3&4L}8
zi#RV22c9ddA31~q8py?R%d1uMmqx2DQoh6i`%4lyYi_Fkj_Ig=SrfhY+tJ=kx$usa
z5H_KmQM3t25>xu*i@K<`V0|@%11b;31Y<FOAt97>=D73oYjEtoF(7ROzdpnHW(P-#
zT_FtCXitsN_WRt!rW!F%NfQ3Q(N@(2t~y5F?xK$(Phd+nb)4YFbZB5rL>C!plQd&o
zPyHWYQYPiZE%2kcHE?{t$eKj{a3yHH_*DBP3_djR5MLZ*Yk(FDxV$23$jYf_dl~H%
zh15)}VS~8MU<#R)r~2Er5S6g~(rFXrwGGd&iBd#4l@jkG+$e&oBDG~ZRkwT>e*-0m
zZc{tDx_`<a$G@44>})I{(I15D@Y@UvVbC)J@J;8L)g??yTA)Xcuy12FY_L|hmc;ms
z<2RA4h$5g%g$t&h0Gr+Kz))Q3yED1z=dXba(FA;=v=@c;6D30JjzB5G5NN{uF-<Si
zc)yTclxd4PKO5@2KSJ$W!C=@zG&#wB8D2x(@G|GDyq10deQ>@lIQnrDX?9Y1=x6|M
z(v4g{lNIb|mn#-QPvuu;6e?9hCnhAz?pImKgi2rV*t)Ox^fh1k$1^-t_o;neOtlwJ
z^*X9n3?5m2CH*E|NrVOI0lsrOCr3RpEPv3$94DUt52EXP3Aef2`M9O`vaY#-SQ<xY
zf^K1vG+(9s4rhrK2ST1kqDvAx)(wLs`6}5IF(N`Lep)=pV~}DzCdp{kaBUdTw%27x
zoRke*noXUpJs_$sx1EXe*bXrHJ6eNj<yC<uXWM0JKgo}A>ou293{=0YKH-+N_rP+E
zGCFOv2gvdo)TXR@IG?zhN7~RX+ATnR%Y{SQNW6K1%I;z>qVeJ-;$`1S_;;ZUBy<t<
zye&d<LpV6=1>JTqJ98NzSp_ngj6O}+MBkLwL)onMh%>K1M)@VJ3m3J{n|;~B#S1QE
zA6>fi@|V4vstt7Z@avi|(tbPbKjVJe<NS5`L>8dCy~DIee0}Uw7$z5bkNa@4F!UbV
zR_qQ@Bda+GkiUgB{4RD#ao0+!74NT21oIT1^j#ui_y#*gix`6pq{CdM#@=%@nUm-<
z$aG5V*6m)<dE&ZF(LC?gUqh=;3ic4B!WC(b6~azPaQD1`h!!ljXB|ZD<tXj10`=c3
z+`u#7mEU;I*LJ)zk2c+F*?y4fdR2o}&44bdxG`_TFNN*}r9GIkmAY{L_zNMD0wG@6
zRdjEwJvxdSPDxEBHAzq|ZA<a*vM;zj_<_7BsQNuc&D{R7QZ{Q1*o8&+k2kB8IGg3)
z_>b1?b$l9$h@AuA1PWjdSr2F{?1lWPyRe6DZzs%4(XFo#d4S1{r8XYz)Wm#m;9%W=
zPHu6AF`~FQ%kZ95A9K0&jtH+-g(bZr%f9M|oOJWvu}znh%{Rw>CPJv{rOrfHqf&>N
z7!5Qj#Nl2uL8gk}ddi+-N^X|d!(fsFA}4JKp{UFD!c}0_VRlnWT(areD8&lYhQ^TN
z$5&}H_pzT6`Qr$@tLQ;9R+pD2@=-!?81dQHk~YOn%bpCJTzWPn1C!>M**Fs1aE~H(
zwsaIRkIOB}YtqxMb@QUI1kZQ8?1o@|fAaRMc_Z5&beP-tO$c=AAy`_oDeC&HY_JFh
zzIqNL6wQ21^HYKl>?CjhnH9QB{fOH27BAC494Wi<&dc4r-`>o%Y=db5a>ah0a0bF2
z#~`se&>Z&tB*qG9XJ4LLDc?76GeQPz{++QTm~wuRNUeO0KwO&*ha~dzWRF4BDg@>K
zU<DIx*j%eC8!F<P*uR%#so99D6Co<GpOM}7``xw7>?5Q!$_8K;233VL{#RL_9a$3*
zk~aU@zhM>pzxbAwpW;SqgGREu2u(5(bt}s!u!;C-S7T*g-L&o3UFYMA&_cVmjak<D
zEkcNPd48isTZdiyEnl<FyS4_It|qSyjfXArKTfde;?|}s<v$Kf`K-38eF{VYa<UO^
z%-qu{LE;(Q5QgLkEVl<RF3ZT`6K>%26SR;QscDB!3%>#>OzI~bd%;l^gx$IYLhm=#
z+zt;}z#yVHhX@vX1hal&h*Y8wUvkv^+T#Jcg(e;{QJfk;xGPW$YY7ZWx^#c+2m^%`
z(#3}h|0l-%;OwG(?<(B)Duc3mL{;jNR>mMFl-ZRc5*9f1cJfXYm?ZSt1#*LI2ueU3
zO9(20tP!BrEo~HltXTK2LRd#xnSCuXVsreafEa}&cJ~3h0c0u!{acUa+#SD2%~^+G
zcL6{+tYjXQF<5aK)Kq-Y+n}r6q)V(T90>M!0Nmpvel{Dl+4sv&-TAr+gvbPg$23=I
zemZ~I7C~7d#c=bVBh|2P#qi5ksMbV3{ZKd7IT`hjS9VroB<z#;>N2)M*Yy31sWM+0
zl~%(qw8wnzBIfE{#gdSyMbs4WXoIMnJgA5-;k^uzX5B7C#?o|5ado2vC(RU22#jkJ
zNxvJb+GRUyE9)o@A{{s(pw)6srSAWvNcuhexK7;2iFu%C;gXlyf{{fQDNRiq4+iWZ
z^1^Y(+Q>tMG#n6l;~Zqtdh9i@X5=wgr@HzSFh>QOjf9aW_@hM)ac0lDzEoa2C0n;@
z9S4ELVrq0V_-2g#z<r4ig7n2BnT$zNEUx2xj8c}UY;1W!=->>Fct4vGowZ|dHnh(m
zT(xDtOcdy}L=HkJ6(&Gpc#kkD-k1Ebi@qA|-{|Y039v9oB=YMiPx3OP-8c)YJ$t+c
z;<IrntOZs|jR36K8@FUlB=u!M2INXoHOhvp1LAkqLQI}0r?U%)r?x{34<`j|3EBh+
z2zH_EGZr+=Va7W5FN7HS7xYM#&&HZh`4!sKaBe#s_7dR$vGej^G0*y#?&7&1JgF@k
zyF38+VVZ*Lheh)NX11OZW39nkvRAh`aIdGC=c=H+CbmLAobo3q;Md@>)EgsFNWaOx
zn-WRXBvKET<Tl@?$<<lqW(Va}t{AeZ8nvR?2#aSnR+C|NM(Djoy(cykvg3eCc_2RM
za*Pa9a!H`HlGWi_BqpceCj|c}b-OySQuH49o9|_fKMw`hBA!&qtyx3b(mxxa(m!vf
zwz-fZ&E69N&vCI`LQFrbG*t=FqNkI@i1hkZB_Jlq01j+#1k8Hk0B*_fwe8hT7fis?
z$Dy)~u+nk^4l${I3gf3qNw1Er9)rLV&-DI;d|4^K)06CO#a)Ai{*$LS9AZ~rMbtKe
z3Qb<RbTjG;Dwmc{&rWfe9*3f+w-FogObenZTpa!@X~VlACPFaM&JcS=l`6It7rWp+
zXB$$!)^nmInDf{6Q;5LB)hY?wc~PLD&bTX1IyU_whcv3-2Gphtvat_>R%F7wCQ~x1
z@odf@_v>4JD0cB{k{)!?MQ2R+zA#Dx^AWvs8weyjN^lkVz6kiIpXhm<xjEK}o&-8(
zb5J6|I9S+mxjl;7svy5VgH&!XE93&bd_Lz^UTV0tt<M^7%bM0lAlI!V1E)Gbfz*!7
zCkJFM*}Xl30q<}d30`2B4W!Mc4NYZ#rgHe&@YMA^^N6cRLuvVK?CW#Olgd-s=FJdi
zVMC4NTU?pPu8IhpT=fN-ZSS(H`@`!U+X2pE!8*_85=0EOSbts|=iIp5ahS1scV$v)
zzage@1m7>mi+_@|m$4w;Ar69O$1b6qe!*)<F7_lW+I*aANJ0Dku*I$C&M?w+MDI`;
zy}1BSrlX%PD=YG?p=3Gc3$RvY4r`IRGjvuNpGl5Dau9V!E)6B_j-4%Lc42x*3kCos
z<e7f1?u?V$l#YqWNjICzYORUzx8>h@`C;syo;DJ*f*IMtYkW&6MySYV#b+zqv<)O&
z7|qBtdIZa!OK*r+r<V`Tnx<r#dqRrQO^w(xF1nCX(2sonu!{Dl+tOvs-$LZuL(u4P
zhBxU96KvQAHy2jVCnEqpw&u__?ZJ2eg(VPaC!*~h=72p}g_%UKdV>xlz^sVl(>B5U
zafxc0o&$U?WSPUD_?oc?R6CDGv4+qn&a4h1u|e2nX_7P*ots^gO@Q4~Bi`HdadyvR
zOiiBf8*OU&_-x1;*8{(XlOPORdSBM>?TDYFDi~e^rnq4O$?cEC@S4D(l8lEI;ij9?
zCfVgEg<w~q>HWoBJ*UdxGBHfgvLcwhYCKZBcnHj@)22}O2N&%<Q@q{T=`A%Id&MSq
z5L09zYNuEG5F?RJ%4I*hpK(E@Kr4zrLh)lN){CsL<)<dFlb@?5D%g)<Xh*xE6n3S9
zVQA{Y95iG>@sgb4{gz!o`aiFLn7vB<?q3VsH8Ea8+ILJ?P^GWCmXcQASLu04POmOz
zAB)@i%4u1*=du!;aho!1v2)pJ_0vcZPZBo{Ev^cXWKCFrjnt=>bzOEGncCpX`GE)u
znx`A*`rz%cRJar<)x+PTL9#arS@g*ceimN7g(J29@{M+x67lV?Ns1s*tt}E^+Y<Xc
z7-7;uqRqz<u0fx{Ke~i12wJk8gq2l6!VeK=7Yr$VkggnAdxX#8_oS>D6<hrhWXH2e
z$|VQB11Q*yhF$uK4(b>+g$vpeKZdE@lYnyc%-&jV0!x!m_hi@S)}MigyE>^h-mx<f
zGxh3d-^neeOy|g!#JI+VB;ss{BN*m3h6MtWcq;4rF~=-@sVXGfn@=SJ)0N!Fr(q!G
z$OKh`AOjDrRm-GAD^KD}N~XgtSd@gz^vC#dfyM@9r5m%u#`-*0K*-WHDgX^qii+I@
z`N774)RG?<TO&bFbCirN>><Wa4DGq>uAIfjukc-~q#mi*Cc{C@kVk!exyzsi+MU!M
z02kvjRr1G(!Vs=NTW0G7nATX35(6Zttr0OG(a7yLHDzxs%H|6(fOLw+evkb=1<MDu
zw|H$7*X#@itGS&5-G*7}aP>z&^|?d|&Y%z)MHF;^<cFztRf?}jpp~AZ``t|MHSzP^
z`s7!$;4}hx2kQiENd@P%Zf6x*dOnR~(ri$g)3XD?J3SpF80^be;j~wP@UGzH5^DZr
z@0yR-*1Wm4=1sLVe^u0s^Jo3GxTUE*5$FeYbO7|~tHerZ<Kn4VOlTF*JJnrSJIS*K
z<KBpz^`oLsDMVKFCphv=H|dDTsup4MOl2?bWlyizpjK>arg}$s`%hhk;BEw8WzVb|
zJmC_A;rc28&3C=E+|LCzpsMo*#CzpFE{a~2zC>L?T$Z=8t!<586%<-m6#6EGDqPDX
z@Wuo5&4D)Kq-6Oo0<sFa@)GfD)b;ru{+I?HFB<qNae4YxZeq($%8tc~Ua%5|(GTNu
z(QYp)^YYE9@;p$xd0>R`>(qV~%I4S8<v1Yp>g~0WMjx~p`2`$eJ|*oL>Rw&ceT2FV
z(wdU{aC-Z|inovYThM`1Fuw?x^?rU2n;sq=8}QlDoF#dC5c{?%?WK#`0I*9!Krgx?
z1bCbbiUay@=5qvwn8yKh59OUcE30P;A58NA{SigMwZK<U66&yc{w|(7n^}QiLgUXq
zaN%ICIN=iH9%}!FPw-g#!YZ(d*2G}fwH;PFn_}$_$JAtde4j1n4JgJ`KV-!445ws`
z*Zcm$*OI4);{r)s9(c7$*;I@KTC|3nf?0c;TJbftTZOe=ihoTgeqU{XwY7i}Lt%~0
zF$xSkf4%KY<b01_n7=Za?4AzqOVLgK@NxcP8^Qk-)_Yy(9M;%s(_B>@b51)juEF0&
zPI-o#9Nhemr-hbYdfEI-Uyhns1j{qUWPAt86W6K+-7*cqO0C5HbX2N{Cqv{YE?LP~
zj0j|qNF3i~fh8Cn?zZx~{HF|V+%}P9%uBJil)QahZr{Q)bF2qq9hlo~BoiYoNN+74
zB!VOL&qwxz%L37wuk*x=3`JOGIv-+-wD_b_5548qr-pUn=2wlaLEirZiHJ}U>5*L|
zp?gmf%wtvsXWK2<&tXaDK;9o3zb4#UONlIny(PKJ+p>{6v!gIz;Ri0@2N1ntT6Pq~
z1@8iQrTnb8=My_mB9StYrAcwLMhWC!WX#X)HDr!V&5$)vI}qB!F4|fCQ~7J@5U|1<
z217z5JO4fc@C8t`Nz2kDi(3{fykORXmbnWT&RsCOW!}O|X1C0mzkF8fl8disS+e+o
zmIVuzUDC2>@xqH{U(mXw1+se4`~|ZYAHKM?g~;qJYNln}xRxq?90>0u(VLl(3cuij
zgi-jDISXdZT{3R|rT(OIEo9*wrs`7_p1Ek&;w5B6^jEcwb_&v7h~zsouz2>Og^OEf
zUO4-Lg^MBVV=L7TIvC${s9EI{Is?@OgV3R7c;C5Jtz}1e=cn=Dh?<6a1b?C;Y|jos
z{FvX!hC(uSNonHcmra>GdCJtu`~@H4$(lbWF<iyIoIyn3Fl5^x0HLg9MQGn9I}SAo
zfQi$Pq;=mUh(RdS*162T6=fdiP==FQZRgWPneEjwlKgilw5cfc^=ctHldP^Cpv^^@
z`#O~A&<+WK1JL*Gy9`8jDD$;_mm$h#DD%~Qm-#|b=FWYWxxFZ}ZQo_SginA^^wxcs
z!B<q4G!@&@p-lUN2?d{4MQ%~yaevmQ{L$>NfODm82|^swqilo^3zA?g=v-EV%f8S_
zsvPZs@90ZF2uGvtFdxYHYitt=_GJZGiTl-m6GScM#>9QiYbx_{sMrITi;BCus5syX
z$oydCfGFp}$Wg)iz67ux_t}A)vJ!Tf4*8OC9=R_-|BXcUvH^^cFAg^hDBwu({Cj)y
zj}jJeJ5?W!B5(m%TZ$=E<cdLj*5@ZUiC5a@#zH~#ZvElKd(odH*t6ux(Hy21gR+p|
znMf&{P+rh-3EK*<C4pXBa4aK38>h*~aD$7$6r2|(pE&0Z%Xa=|Zmblwr)Y)0skV0b
zYcVdo_65yh`q8kyI;?Iv1ZFMSd_~$C?WLnie|+Sa<bl%}<~a5mnCI^W^UiF{wEe+c
z)_5|_WjoA&;zwzt1EJ>`k_WwRhgk7qB$z-tB<tG(T=AGl$iv38LC|MBsFRmLSll6a
zv7@+`HT|_|c{dsfj>exn2B?dV?L8?A;~eTaOlfO=or>5WJlyAX7~&-MW*j-8@C7oq
zvQQ9Vl9K#ZHc69x2}@D-E_|;nN{-Nmm!}NUMGukG#^v1`KY27DHhuc^av!J(MGr;$
zDT^xfXR6+vt)-IveS}XX{*3Plrb;3)e0SOlM7tl1nTXcQw+VU@5;w@Nw1nye69rf8
z@KoUu((zb^1)!IN;+T{x7es@|Sbu?Z#5=j0I7Rhf_vZp{PCnOGejjRoTCww6N7H@a
zW#4TQIS>+2ubzQTFJ8RQv<VZfUTI?>b~Z<Xo!G^CMQll*q{F#Anw~3=LI6S%0sHq9
zi9=aMv5~%`JXt}{x?^o7);3Ja3o;z>0{-fkBz7-lG7UvZdoqvou8%$Cxz^sGa;^mp
z`7LJqezDc6>91D1k#e<RL6V%!>qZ$K5$*&|bO`-L+o03<D&;9sFanOmcL>tZYE{cc
zSuR<q0x*4T9NJj#ZX3y~LzwB*K8-U{*qfiRDjvoJtj{lE#d=#Taf%bL)>Cm60?-$0
z!;H28++(y_B8Hh7--YqAexBgSAw4;=yTSJLHkpdWX2F|tV|*=zRXxZCA&WXE8?Zh*
zFeE&qm3o_p8HSpJ$3hyU4sM4$zcB6?8$JmaE;sNnc&n&`EP_{)K~sQ(>jz*UTXbvZ
zC{XDzHDx0dr&cS~!!BTM{n^1xm_Io`LT8?SbC?a+Oa@>+%qmxbiC9X&%%x@#)c^_r
zn2>U**#f4f0F%0cg*#Bzl6<YRi+4vz%Cb+^TdZ<7@Y%CA<v8Jp9m+cUCiZBtMrxOc
zBoB*tqz<D4i4d0RN!YowT_z*vnQSZ7x?||qMx8L410eP7;Y41DaXYSJr^J^pU?MXZ
z&9%(h!Xe+Dv>$SA_u`Om$7J~>9&)$10R^GRQOdUN=;35B6@VG;^g@8+9B1ysu^3qe
z?+qTXu)!uGl36OYX`s9tyW5$Hi*qQ0HfaLAn&g|Bh5T2p1;ai`y`-2>$N73NVPL9F
z3N=*7#p!;k!)(-|JnSRYU^>1J*egA}3BC2iy%C#_rF=+0fH=s?fZZX(&ZGcLWfHe^
z_!Pzl3SKk<7Y*+1Qhr(1<Djf`4%q&{@xp`^ds?9+54B2l)vl}anIa6<su-PE(u{$K
zO^F#?CUzQl!SXX5JO1iQt$&!{f2s3LFOe;t34%fOM%cI7BYWSx(h=(#-f_beE_3!c
z6jw&HT|GuF5<9hlnw_vl%1>GBp-(8u2{GanBB}lcU!RZpwra2+@#vVP*$}Rt+5Q5%
z2+B%@X&f1Lqxa0}@`-G7IgxoDqo97KL|gLBNs>j*R@n4UmB3uF0Gz`otFY-{6tKk<
ze`k4@A@`ZU;|49jQ&c;FH*J651@GEwK8yFaL$<;1#sJdpM4@+mUwy&%RhYJi$mrSs
zNj=xl*mBi%d6I+X+602JH9wafaB^6$FUN=Ea-S>9o^B^8y(V9LJH08+mDAW^w*HOK
z!Ta(>HQQn1>AlfD=COwrfLUmyKbAgz{nJFrzVF8GL89<+;@MR_J+b`7(Mv<YjRIM{
z52A-qgHN*4+hJtyU6mf8<MbL#jbeOAyLQ#~Y@qT476wej0cf<j1up-~iggP9q&EMH
zn%(S$bbMU{?uPH;ifXtX8NBQ@`q;A=Ks8+ddL<ks7D7R9EOw+x)1x#7^u>aR>|LU8
zTgPL$g!}TO>`goQCdsZ!m-0v5qXhAwua0}7Cpqd<PV9NX7UhiM6|<C;H?0c`hkLHV
zYyY01^`^_iXz~<<autcIpTAOzp}k7{3LH*-K6Jw!Vt-_B2mC}ns<6r13R)45sxYrl
zU+GtY{R=jppbaYeG?OX1BsY(N2&$9_2(l14w}(FuLjSMBVmYo`0~|eVlOOp2#-l|@
zT<@%ue^4cSdOmTbgzTZz_lr`++uUyP$hP72uHtomSzYdxzl20y{M*AsTMU%!tu?#o
zQN!ZYo9;tY&nRVkiK0?mKD!c?YKBpTe11g{YP@7m6i^8OL6zI4746qaYTq(fH|SMg
z1s6rm#Me}C_+?5YE4UG#;qtq~AU^KH@*QE1`yj%WetM-84*{Ee(9_0*=kheu>VB5Q
zNPam;2_$*3<rl_PZbxO^*I^hw4xdY>bW>03OUaTwPVlY@-;(vddY^G}R?Atk=l34s
zhiixULkpw;nH2oKG%_+3W5SW74Pz~Q#9#%I2lzUWxVfiiN2}!=`JTGWzn@{0o{ip>
zC4a(EQ)<jDVShcR?P1pupfzwOcmza)igRih6J##e)?tf5CJ(04R;Sz<G@~Z$*H^k4
zCsh9+_SZhh>EWGc)F|1Fb)XWv6MGw<^h$Z6V!8+R``ZaUHa*!+q9gKOIgRLPIz}b?
z6Xx?4JYVfZ2w;z|6%Ll&BU!tOC7+>8cl$z`jul24ZVw}o^*tRpR$@l_Y&;om_VVu?
z#?o7|lYDa6G7ZF1Bi&v1ZP>ZM>3V4xPCiBJxh7Qpna~>8YEa)x?C{s<dc74mc{gqv
zp~`Kpr%UjD$Um7KM0Pnn{6AiPQ+Sns73Gl|EG=%co|~SeSwaG;SjT3ee&u3tuvE!s
zWuv)K?IL+}^`E1%=d2Itl(;T-;WRMnV5k@J+-2Ra%zDgl&H+I;mNc{UvRN;e@zN@-
z?#gL)GJ_A(saqqSuM>6Zd)p1ApmM`6y8pA9VR}qZt$5Ne%_vtj5%-VP<LjO6Dv@+C
zpePhec%%tLs9h-h9Me#pm?WP{OXF-iQxsya4#DhxrC5e>_I2ow@Q_%0TXBd${M>L(
zSjpqmp9B%!NIJToV>-jhv*qD-+r$2atlf^{dd68hR4QPHD|Y@aocwRBl=XkIYmG|2
zJ(Sb~kAK~WYfzQ*at%}tNhGP(B=px!yU6e7u%xyVZxNDCn_O(RtF4%HhwwLFfeo`a
zmDj;Yx1ZP3+3w*K><T-F2!X1#HE^S(Bg@;6c*&|UZYB7CC#cy113#JdU0B5|;{>mL
zA1-|?IYbD-n?pPiJlmld5?1E>*${Z+)jnM*ra(p@=6J6!Vo|z*4ZQy+!otbqIK&M`
zOWPbdV5vt4q+w^o4Iyt`?UnS#upX;I6C_s?KpmO@ctXXhY`&AfQ3j)8d3mj{lI8+d
zs7MApPqH!JI9PXQcV+{wWA)Tmz3s+EGlYIqbV2g_B3XID`Dzzs*+;>!LVEykBx6t|
zoX6OD$A&BTU9ly?^_N5l2IA(HmAZ7hL7e+EJiFHz+B6NL)h<R9@`U$>=Y%%%@_bkC
zLNsF*)j3FBNkQ_siC#QF9=tO5A%wL0aVNYk1LPqCjp|6xTNBiPuDbo_C~Fsyhd1?_
ztY<oEV9+9K9&-DSRpSvO`WepG=bKui9yNzA46J+5PEzu7t0q-T`W`JA2GtsNg1ww<
z155U_cC@x7(M}@*5xw%oRpmL=%G^Ifj<h*5h;+$=h$Dvm39qWGFTp{*sgJE{tEs}N
zQ>!^t`i5A$xW(R^^^4em;MLPQ)X|ow&QEIUICD%2`q0US4izw2>I7E{7MZG1f`(&c
zgTEBk*LnwM&J?-N?wk<C;op|@z(Q$$-br!hpR-73v6Xz9F=lfWZ<-%o&7XYUPhN*I
z6`MLpYQ=9<NF!yC93DOe!O!@UoYwwkM9mvNExc2}4Bc9jvOIsMf0pu#!$wrX<;J3u
z2{IOVEUQEFf?aS=ZaG;C4;<$JmgrWek2v==RUeP=y+&0rht(yeQ#65s(N18mK!A8+
zOb_V2Fc<!7d}I79-Be6#q1WC!Xku$bi|AZON|nnN-G4yC+MtPkXin~=JKsBK;>!-q
zSQ|7^eLT~-kG+E?zU*<-1Wg=S2Np{P|KG1h4G!V^O@tleB6iS6Mjh<Dj;dAHe3Dmt
zVQW<zxo%QiZ1>*m6Ob01xMSS%L==lXKLzd#*rH(_=K_;5J=F>E05$DzyT`W?o1!TX
zw*d;4)_AMn8>Cn+i5<O%SU2PhBbE*gm7a+2XEr-gOK`V|&V9@4h#Qfxj8n7VVOzB^
zmYin{Vx*yb_Q%hM<_>`pB6(kn^1kS?6OQ@DNKxP%0JRA3-SCn5XJqvYp!$%o)GrTR
z+Ql(QKlPkaz}`Pdcp2tE5QT<rt>q?&ns7z#NFWq?gRB%xyNvEB5Ey;3VS*axR8jMd
z_Ap5g3R$%%CLb`&T)X`F3<&5z`fPRli&>@Lxy0R3Lp5{*4#3eBYG-y_44wtY?}Ye_
z?1&iN-{!>&bD1*X^s#~X;nI+}vxdkj=Q~LL8t?rQM#BK`5;@epZ)7*-ksOm(@5Y!X
zP%aBah`<@S!)80z8E39-CCZ*d*a-OVmN3JTVS1BuV26T-ZBjnnEsg`m>l|M7Z;Mq|
zy2s(W|2~*v>G_x){0%HFyq&I1F8tNZ3+K<88<9lK1Gkn4zn9Hguyi)3o^e(!AzsX?
z;eTab(KYI+?P_l`iCGt1+`4$y1p*K>CL@W)QzKgXk(`vca4RB~wT1oc<>g`EAV*3S
zdz4XePRd?Fg4MD5Nw#T)XXtJBBYaXc`F=Wl5JPw&DW10~r}kY}_l5I(+bV_HHmq>%
zo&@QEO~psYdvz1jKqK0c4epnx=ZKu$R;@flzh=9}ClPt0&H{A#H+Thk*F^!K!P)!^
zt4P%s!714Tp}}<czAJooReDsl!Z*8sfIRCp!~N1K>41vyZpTu%Ao16`^cZv5iv0-r
z23k~~(B<tGw9CgA&rU;b;keiq&dCmy5YQ2peNO&3j(*@+?DLYt)Wd!(Ms^+p>Qb}`
z=6ISvzXgNCBimDbHI%Pnwny2?5+hBt6JXac^*#x#F~Fk>l)wrhwqjC-qXIJ=3$OPE
zKF8)bu29mYE9mNFqJ6w3n)hRIfcjlF>@u*jfWaGlmP=v+z@r3+uxnyx)`XhrMpPZR
zY)s!6DA~myxoX{_v#yXoUDzgDiOj(ZuSaacaSP8{{?B@8w}%%BSUBw~iVxcpgKp_9
zg~qh#IoY;V4aF<QoAyUH5*}plrcg7_Q+xNSEm6xKP$yV)4?EVyMfP6wZQ9N0sqJMd
z#m=&4kf{?50nS!}P*0IJ(td|1s>RL|t1>dT=AGH_HH>&1S(;q3;D)t9m@tY;9EY#4
zCT?Sj(OnSP%tW?3Cu`2<dc`|o7DEN!9l345GSp$hfwnMEz!Ay~)d%3jL|shl{VvQO
z7GRu+vg@Q#wV*iHBfE`8#L69sKLF&Zro!@%#9GC*dyeMZ)~_r#*%e!^WOBnA?!o8H
z<sBHH%PzjfevPE0M8=pKUEvOYlq>OGi#@$^8YWVj2kB~5hs%!1?Q2JGhyv*;@$SWK
z(7r}GMh!Bt*{~7l15oXSE?-!=o5g+sv3KbB%Z^)G>~{B)3=Q9p-(`jdz9`eCSPcO>
z>F2tJZe179N#UH>YYGTYDj>vZL~nBA>hFy~u&;;Ri>94kyv}bufQb0kl0A%|LT3i1
z=w8O9qmtN(<Kds$q*B*!n-feFp$7VqWF>eRnXf%@1jEqbAbiL;afSZSf}&CI){rG@
zgyH#}E)8xVG{S~#z*;@FZ7W^bIvxoY(OQAUfym3lUj`6iA29`luZbov>7fICUO!ng
z`9CO0#*|33B&@G3@xPh$?J&wZ<;Lw%opwKqITuM3a@a9gPk48y5R^6d4s*N7H(__w
zg<*S^jtGRXHE@``&4p10#hfC|$Js2*WAGU+l@AB-fMUM7Qg@Z`M#=6e36FT@L``%`
z;_WSUnVnIt%4+_Uq=yGn9IK>9gy3{e?WrR3wg)2c@=a>(WN%bHbbjPRs4(;=y1<~b
z-wE!M^NV~)CuO~1i3x1O%IO*8JpKwZk*F%W%$i`yVV95dmhed4kWHkVHw&E^dX~hM
zZ|N~nsNHQhjJCFdU5b3z`izq<tJhiCknpgT9=chNarou+-lA@EJHO2i9%tw8R-%c4
z=RmQaGtGvNLsoLrs-4UDc|4*`d&4*gaPTJildzSvYYDl55CN5$hAfYx!vv0OgYk?n
zGlCytE0oD;8Y?Uz`4V}bF)?~l@i)H!dPnj95F-1L+#n4E{~={NzQT6;wCn_i_@*|Z
z=h@gc>X}X1k{;Rz<#R6_FOjsfq`eWd^D&C>wvtKRjm%w%bfv`~W>-jr9ONmL4O#Cs
z5vwcFv~I$RZbHJ0a#9qV8e%=4j<5bL*=zol<Xzm$1pL9@M{W)jY_t#IA0U^jQhr31
zz2FDd?7nu!;+|9{T8_b>wf!3EWMHUGk>=M0qN;osl-3%}86KsYv29FPS=c1aFASoZ
z4AN{reueByiPW>cSYc0Kh6}$!%{d7tGT#lI{R(?Qlcvc6$i7nPzkBxJFK!|H;D3L;
zt+G?UNfL<Q`IpXbja0H|%$v@QH0HQMLPg*~joX%DM=1-jmKfL*9qNgC0ePX>B&dr2
zvdeMvQY5+TlX^NfayJ#lRCc6TaXT>GeQET8Plq267evy*#HJH|!bY=#NJLfau=D^X
z;9k>|yk1lA_nMNw(bVpbDG6rZ!45nuz+YzwUS(WDJM%h9H)>gos!T)+z3#1Xq5I4I
zp-<G52fedG%4#KY&y9@{#bu?5Q#RM31xu6%6PxR=W%5LOSti$hT@_~8pDZkjAL`l?
zSf<;z6lEUlP^P^dX`3PBvDo2St@PmVhfB9XLVDjxoApM{3MHGf>9J)+=Ejl&d1+^H
zfk-iMYbH;@y$7Rrd+JDK9bUG1LrWF(s+p6^vDRq1EOglhFVwK#LS*-$r4_LqRsmZT
zkiAGyMRMnIA4Xtpff>B;<xAe|%d#07aDX}QR&NW|xnYs+wqVXUBQj{%6np9E!F^l8
z-hYfldH-VJ^#|%45BtCT9M79|>4ms|Ea@O%cu|2u;N`=)ckx6g;X`LP3_qx|;z1oK
zZOs8}!af8rW4G7a*?Y3mk(`2K@i1ViB@!5|C6AGH^KwXmz&R#ooSh<5FT89AD2byj
z*buz@TyW{(7YI12H?-TTFli&KyKdfbAb$3#+Araj&53fO96MC*6cw8xAr}`J1Vs}`
zrrCris3g{sa`678GcUwDBUGILOveU=W|F6;_N9Kky&V-XgcOh-LV^*3qpVbnyZ*Ka
z_d53b7@bTWHwvPTPX|NuZ}R99Df%A~e%&>8`!hk_fYRaJBZC2F9k7Ax=T2D1k;ZaW
zikJEkDuJ>?C6iofQ0i^1qUilip?N13`UNz<5Hx<70*wQh9@)DScvEt;M<iQya8NJ(
zjlpTF0p~Yef>XeEy!BJg*D#Nm5p<kI=)W6QrKpVF!0e?Dg-N_RtABT09h|xhq0*5}
z*&(D2(kv5J-zS;X-FqEh>gs0pKCFHnr71FGgS`%dR#F!|N^N4B17YO@d8yr++H|*o
z@te$ERhYtdaY!6CKO%?h^wz8*8ZdFYF#l6umW_Zkk+2T)_JngZU3P|qMg3P>I!Vf2
z9faJDZOsM|znvEc%#4Uw^eyW4<#NOJ-A0H(MWd0_39|o4f^Ck@8ZIVS<$*p?bx2&;
zjShl6mZVc<rLUv&S00hLm<u;`TFky=<kpb@=40*1X~gf>SbXJk68f;DcPG<){5$wt
ziZZ<#iJDjmt}7aW<<j|2;HQt`lg#N_iX^`;D?P{;sY#dv!byhA!eMQu$YxNhSXVRe
z_2Tf#KJN{hcW>s|7hj%G<R93Q4G89J+E`-*W!Knue#E{o<MK>9@IAIvhKSD7=B#pc
z+t^&Pdeaa@K-f43w6$*>ytRs*!(RI)K@kQY7xzre6`QA1`>|rj_!&IG_srk!3`+2j
z*r__i$6Dhgc8Wj)k|8Wn`yTyU_xG02AKv0&XNr-Avn8unFcZRMl_v-f{cz>0v(j_=
z&0Q3BO&J=C2VE=>Pvhk{G)T2nX6*mN-giK^S)E}k77htBi~wO26E+0086gB5I|+si
z6FXrAgsfqU*ped2am)x|kFuda344~k3uTnFlnn*ivY~{QviB|&_@DQ_&(*m4K3f5Y
z|D6B)=Olh~-}~J6?ma${dS_yYnfO~gDNvq?-yjp$(Yn8rykLUVsov@6>iVN`BrCWl
z7SC&$ulwSrtf>Ok<4sl5C9LP5@mIn7j+7!%9efB6nv1;OAM=@i@{w3r)@DQ6Q=E(Z
z3I)JaFx#E3vRty=tX$w2!Ns_5hi?r^sPc&|`Hz$6Huw9q#fL}1;Gt~vpMa^5r&uY{
zdwm``1|oBHt3s}t^H1Knx<7OEL!9X>tC?5gai?_#+;2)e9ygQ{kJ&~4;Mk*$XfMs?
z_0drB@G+JOk-HCJnRL9Vz--4if0S3khvTF4Sm<3-KAW%sR+Mmw2Hk_CY5(9m^08~%
zN#H{Rwyra~FYCugU?jV%sz(JU%E#ZPxgJBG=Gb=9+O8Npc)1h`S;PtiH$$Po>&plY
z;&me9#VGU(jx)=21GL)!$BZ+BlP|_npFE_31+-o8kh0yFsmFtoRakeB9t3CSB!=Qg
z|KR=d_+w8I9jy`F-$Yb8%Dyay8qsYrhLy1qzJjX@l6q_ft&V2!BD{A5Ym`t)a}9UX
z^HMGPv=OFijF;EwTg50evZ@tCQ?9p^aGD1>AWbUnCY_B-87l;Lk*+H^$!r~I*L^HD
z7i`<=)G(6-W>mFCfn)Z|ylY{awGa#BQk<M&)pMwZrjkheYHP^dWi{kc)DY82Rn3wb
zvJ>vv$#};0Qp>HG3Sf@|1!R3Rk1|lk{71(<=!+y#k2R^tI}-f|Jd;l|Cp`*jHqfRS
zP68MbAA>OCY8gz*-BF2YRsX83<@&q!efS15j%b*6hi%4dNVox_w`%fQQZliuBH!Vw
z+7h+oVjZ48Ul^Xtt2&rwU0gjB%X{0%DuxWt%cU~;!UKHbKPF%Hwnz}WLzv>2F*;TS
zPr#%T3xQfb5RXkH7*0GE_{8IUHy-b}@mRb(<ME$-JPy!!tYPEP;~>wL;gdbf@CkUa
zy{rr$Usi^{#mnMsLa-59HLGa!-*+P`vj~f)NOfrSAKYIp8TBvbd2YN3+wVA2%JPL*
z*J^tlOpUUI7oUTYJom2fVX#&T&Xa{<Ea5+jJp~*eyrFti(+>0x{>QwQH86NHwhUx1
z7Ykg{EQr4T!KY+zUY_f@3&(WU{R3Y6nU8xOcA4T&xFPMePBA#R1j^R^v3f9`78aGk
zm38+CD-@5KN)W8tR4NnO@23}P>Z2dV)T^%&AI*|PA{e@XX)eB@^T!iD<4ql@LPAJm
zidPD>6?qt$fKUw#F2)nEMRVRatE6EI25-_Y18S4}QkA^XX&YrH{l#Nl4H{g?qs>L?
zok#6#l<&e;etDz(G79u|RhzF1n_<CM%vXtI39>496}m-}!1DE9(=r8bIRfpY4qnlC
zJcILJ+Ca(j2$TDcg;kBqv9LeePYe5LZejlnh288iBHiVfvV1tjBvg6fFU%yIL^FKT
z+YKostkWZE!>Gw`r-Q%sOxSO{T8iJ`IkwW_|NBTq^T<He%(XQJcx6=fDNz;r2Y-Wi
za`VHFpR=@Pau5ek+;xK?{ez3DWGPK@QkK$~YLcVDvv9&5zUwNG@m;>(R=R{&(bA8f
z@1}(J#|0x@Ez=S2Qru9?SCH!9rC5xTF4E2f@hJ{$A3!9{my<h%92`{0L31#}yK30B
z{=bLIilsB3KR@Wv)PH%nn}NT^{(^1l|9eE$OjG*%bi9a0(!@7-629AmaBe2i48DbA
z+SIH);L}+b;X@UYV_#rH7dh6~!)K}fK|Ez)*Y02}WXOQq^V#7fwP$w7PP$J^WwYZA
zuk5(RD?45*%Z@9Gvg3`??D#qL2YDbJSRi;g4n}Evh+}e++9J)bOk)M@)~c#WvJ8i7
zX~j8oCv)i4<(WfQdgsv7`3!OrzmP%e7V3-BDi*hRhp;PoRIV$QRhY-Ta_BE*Idor9
z4n10$Lw(gfYjYL|;=@yDN##D;B5h=kXA^5G3x*hd+`OTbm~lEDjKg5WwBhgp{pJna
zxnP6$w~_|$^wI|JQ@6nzh#f`S;5nJY46Krmt`9Jq>kEH@^9H&{R=fdTv}-2&T?b!~
z-<U=*!MW43@nR$<{}@qR#n!AJ?#i1*R8y<o!V#p^@I?x<{J!|e?W;GKQ@wU49zDW2
zGOVhOuij$M>LH)WUktKVc@r(&{OzkZyBC{PMuKIS&E@!x8V@z8RTWH{0a$PiKEHvl
zkn3V%aBZ`Ei(vhLL%8b)8xgESbXiXh&6tr4&aYzsOFk5}vK)ij0B6m%H!B_{gtsAt
zMZV+eUb*jC^?eokp3D2bx(s39`A2!rFBf0&#_68HdEzOo*TlN8Y==p<yD5dU8D1%c
z71WZHl2G>#E|p1;tke|3{kcgdEcm95uHH-y*a}+f9wWxDH15PBS&i5D+AdBY?au_A
zy)S|zs|ymfeplfShPfh=LsX9j(^%or@$0Y5`9;L>-&juFF1Qaa&EIdA$tU5i?(phO
zf@{i=O1x~)|8XQTj$~EigX+~|2Rx3NB6&KddT20<16MfjIY_>n6kKIbd<3)`2mFln
zSO#f74bm1&<^2b2Z!XDgWZv}NVEJrNhx_L6yQuL#W+FDBczA9sWHyuac)JlMV_kRv
zusxQGm0g{2_ni&jb)Rd?#KKWkC>3?YrL|};a6Ro<RM1s$bqtS0o5o}X`6$xTi&%|!
zmE+@^uY~U^U|}@ajN8OGm^S5Nb6!jq7B)p87#KWQ$^ssD!(VfHyPxFEN@6s>s>FI_
zSxu}Gi&ezJ{4#F}uPUQFv&duaiT4lAMbmvr^=hA^2pnBKR8MR6+a6u78D#Viu7mrk
zn8*jW-P1JYljRuJHo<XnC|pKD4RRJ<!nI=6sP(m2R&BbrM&nmL(U8V<Adcn%`YalW
zK8S{d3k&o(A8#WvSR=B5Pei2d4h+WnG$Qdnh=_!1#o(&t5m~Rth_v--L?V3_kwN*0
z;6W;UxJK#-u9$h%5R)UOu{2*;>x25SSD!^=D~X0#@O!K}FhCxULo_1((Kx;@>&KKn
zi^edIXqdZtzWH%-Uq+;^&muCyD<Yje){s^Dw1zxzWS_-lj8|OdVGq<VQw9VFdBtU5
zX*aS`z8hJ&v>RE)bR+km3t%^L?@x^jJ{gtoM}onf<sePJ&rv@nXw%!?V;lzeX&ipv
zH*uJtaWF>-Yx`rQ78tsG21t51`4>2eYftcnA`2FjoZ`!Mr(>bEaJEf{I)xLq#fNE&
zPx2K_VBDj9W<qlVM?z1^wLLT&viLL_81P<L!mi==E8$fexo$ATv@!#(z<9*$o&>v(
z#%O#!NUNSPlMPuF7oAuWUtbm%HVDqc4!w+tH^7%O<;Xn_ta5)jjZsdua50H-RubDk
zgPl%Z&hJ;U4#kC!6-t)ws(B{Qm6o5$(<$u-IQOM~_X{3_)mD4ROhVJI{1Duf{=xq(
zl!ZmP-drVfa1+Btk_wo4$b2NY&e1YP!<`A3R)MK{&GRG*I8YcbOw5gbOjdZ+Cx7g2
z*EMVqpk!?DWAQ8>z%%gPOwr0r|EiJeVKOOuT${=kchy*-4#C?G*!so5O7~~Rnv*ux
zKkUv*h;?bFQ~@GJ1;@(gs-a+Sv+7r|f<^fdEW*1}a#ZBlBdeyOfq5EU;&osh=Jg-2
zKFiuv^Q;-lnq)6-e&|UyxgUYgWW}&WS5vemI#hz~T`a>~zd6Kgv=py*Y9S47#ixVm
zMb&0_tI2rz1(P-qjJxG-{m-<BSM9%+<evB*d^ccLj{TNF-7=?($gZ@!B+4Q+mf)m0
zM2T8YF1s+ho+>%rh9s5x6$svb<i+6N>I$wDmRw)O=hB40V-1?s_J%85V@8YnsN2ap
z-X^$vWzGc#pTwQoYCJG#PAHTtin}4NC_bKx;)qyqP}MF)#i@Kv>?ipqrOYJB?~cSu
zgpBrZ?@MZ?Nklp9BCjy&^u6A`8!N4xQ=CKxwm-}y@f^$?s{-@zce!2X;0>2Td#<Za
z(k2J)VVk_6e4GV0sdugJr{Q`Cm#wP;r~Q)Q3MF#kWTLa9r(Q?`U1J0NS4_CqT?xmC
z-mI=cC$T0z#54pIf2IE8s)r7c7mV?pto{?KSC3)-8;5ubUU-;Ly(%R5a!jw1UGkhM
z$S`?oe@n?Ye7GCOTyb%$fAC{8eM9l&5>)MoQ!)LI$0r~T8DBj*xY54&71y(2{etVm
zBrY+n*<p@+9HJU64R&~I@Uc0$Q8_k;pzfvR=?fO06Xk#-rV!fw$)--f{)bd=HQ+}*
z1+jnihLft-!qHQ)z;`!P|6}INUcUpYH^VJLy#z50_jBb9+P3Ol_NX2+Q*J;HjG5gJ
zyfS7VkorLG#tm30*o@cW4#1tG;qp?(o@QgHAJWns1jH-F+FR@XvuUF6bk@Q+zL<ea
zz35`GzA#D}VyPEcVSE|Qx>kkDBrXHw`)31n+H(f35>{=$hIZxqR}XAZH^FuBouXjJ
z(WSAOQxqE<N<=mcM4loxBloV}6i<J~s<+(7_&6R!%vJCidW0PTSnY<Yp{P_tOa)uZ
zs~`sghnZ6TSKlEVzDv>G8IAFo27KS5H4+)VV5c2IJB-<`J(28Ouw84ibNHC;calY@
z{6D%XCq}m2){t8gx$IBWX;nmHEuF2sJ|B_l#Ai9=gB{UWJlxry8{U#I>t*t(j!1hr
z**bjm_B(AqV))OCrwx;dR-h}=7M2fdAW>lvNhRZnRvjjqFyki{*|WJQf`s2InBR#M
z9i-DC(@x>Z2c!{%C`0ChBz&MEBDTuS#j?1umrf&Rp)Qk1w#K_;Sq?4&D-1+C@yY#c
z+*a<l(xZ4p7()V_7uyUaL{50&9L-n&PL8529<dUN=xS9LRH2OGf%#*qwpmN-R{yFg
z(s@@^?I;ke8ZEGbLZE-u4)}IvCnWv)S0$7;WliI-x_{N~($n_u{~7+SShbDV4yf8*
z03Y>Tv+BHlD4Xvhjj95FTu#N$W=nBVSsq2$|9dh2my4e*Z>Z$OC9!XcUp$qvrB91n
z&4XIp*1_#v>=vM~;VL%@+s3)7UrT12`c^X^m!%S^WR0nJHCyE}oyoa1qsERMvGdMb
z*KB3#YF;*S#Hg)%t>V3ZlgBKQo2Be@CT8!s_BOM0vo6qc@W<@5V_ERW?6h;=20z>`
zO@Vy22P2qS*;uqGacC@1E7FOv*1*jDr>0s*8Che>e9p5WJ1von1*XJu6Tz7aQX)Ay
zoXt&zAr>u_i%HzpwTH7=G1RpoYZ{^p<oC2#E?ghZg#!)Q+V(_iQXTJseSlcTILzUh
znr3;XW^yLg(GU$Z;5zZtL~?E{Bfk>ic4&?UICI9ge=@Q0p-_HGJ#HLrIuY!hLb(aX
zeO;=fBb5v^p*f93MNM4_T~S3k(z%Risfasr2cgc46#^P)LX5-RGh?~VOmcc>d%IQC
zo(e~O!%aV#jFKud(m53MY(=g$6bx{W{+RTsjYc!bi`>*$JQuKrnTgi6oOo=C1axKU
z!jZOEKoT;Mn}{q5)T3WPl6FZ^>QdWAbRD^-9KLXc+-*#C#WFME&0Q&RmF~>NOsNV?
z3ojVonarB3uK|BT042+n6m!~B$yTIlsxuj#m2QT%aFU^lA+2q~&dAg!@>v>NfSxni
z+Dpl3&U8!h4J@1zOU5#ZNK+)0jzx_~VjjtLB5vK*FRda{C=tz$aHgAy)sUQ;>Y5ho
z$Q$K1{x_#k+@h(jf;<r|rtN_w7PF}(4fTO(sVr(uYcdf}M8Zf$)U-sdAsLM=C`}+u
z4rVW&fx<_;Vj&x0Y|&+qT)rhsb3B*ofFFn(JlVT5oPl?!+>`N1FeIpa)~g|j6{tjX
zawdi>2Qd}VwjnH~&@>AZJLB;fDid;>)wQ(p?vu#1HAW_;Ym?EsR5BM^ki*CV)KD&!
z=C77i4gm>dW4T%8b9)$e1Qu!@6a<q~nMllJiTO1viLlI-S~WA9oti*dY{*Vp5Q)X2
zXcM&XHlS>wayMj~(y<7di@I<&HX}YG9W$RPn;z>LVahdH4AU$Hpm!3=wi=M{Xbm%g
zy0%zku6PTz_(EGG)SZa68z0aFrIO)xlL*toX^E$#P>WQF8A(V2;dDBNj|UnF8nR>-
z2^ENSq;uV9`D5WsK-?m%fkM)2^+|VT+tk^l<h;&UXKZ3P7ikOB$BGQ;R5}z#7UztX
zrp^{^-%RC1%`%Fj2*|8t2ZEAsg=a!XA`{Ct#ZX+rxtI-?`hZx^M1!76HYDSzirSbR
z#uPP*%UTF~cH9*U&-E?8>U3%hJ&#Xak&eqY7RU{gPaUQRqi*0+b*-@gQ#a6*P9&$H
zU0M*B8qT!FrXj^@ZNDgO7Do87Xh9N^b=^>Fd#WYe9-1G{gc9gTY`sWjrWgxt{R%mo
zZ$nK*5tCFZH57Lg1#hJ3v7xZdzG+w+P>nJudU;#*%M3^2Qd7+@sn@8k<~MfN3x+hR
zblyh4@ZAwZ%Am<nHdCoH8*XXGUcU?ojFyf=OAT?Uk|9$Mt-XpmwQ|;?YBi<e`5J2L
zXTF+9y~;--pjEnHHUo|<BAY6yl4isaXlPBM!=KS2pG*`rsGdcgf2cK<3#Gcym}lD(
z=`tG=e-!?x#>dogo5GXvy-y56(#Fq;-O`lAFnFnJOQo_g4M=@zW-QW;DvU-lo<LX5
zFteIArqZ45&@TBqHIdDs-dYc}md)H>3gajg(BhgeLn98nOl_Z;cU96%X^=`sT$0d5
z;qBA$Z8(xk%s0)wj1VgsOpG2SO@b+@mf!Q}OdX&ppB>Azq_PRLkLZ6*D_w(=B}nB~
zBx5Wi{nB_dP>^4H=rakWwk@ety9~UF9ALJb&DF>Rx@MaR0ZA(4heiqmT^P~7OAKRy
z8JI|9&}B&1P9d5?@hh8G>B@T>^DoO(!pyBOoHkEO+7vk8z`#OV;iX}kjRK!C17vBV
z(Pv3vwWSnIMq?&6KauK0y}<~e6H!Yg8zWMJNH;CcY;-;nb<`Y0+`1MCh}tIgWyFfk
z4518VP4_R&3fhj=d}low2aD8esU7U6qKKeo00K5S6YhwauB_Nrgpxm|U(>PK#BMBP
zG??*;PCd|+WK%L&L?@|jrw%Wg%o;}_fmNzQnt<_lMm&yjiD|A)IhdA;cDARccXqVM
znAU_S8)yp8k41&vm@_%kJhiD18wt3Lnp9geI4})xu}uKGf<<SmWMIQlV-pr|b4ZcQ
zM6}{_rJ3rGZ^t5Ep~(!)M61S-PFz7_I3x2VGnStjiyoSi30$2W@JQ%1X+xnfih!Q(
z3b7X$f!4D~D*a2yk&!vlgXze~9Oc1uWMq!^U^+4~$9OOu8JS}}n2wCh9XyzhjLaQ9
zn2wChojjP1jLe-qn2wChkxqt~OezRl(HTZMGnrU2S6sxHuVM6Xp+pvg7%xjHsNqoB
z)E^rj?ai{G&Lri1+Ez$4ldc9e07Ea#)=fRE*TvRszL74n6KdYxw*AtWY0H6WcS}sh
z93mpm_V$Gdx^nw6COSYganZ??4h&_zLkAVpF$bVbMjaY*bbF}UO$qcz#dR9(AgN~l
zTaUpFX28lomEIp!8jYZx8W&BuZF<nPc%5Hxnb<uA(-I6;GScVHN@mz^l8g39e`Vpj
z?IfBoZ<AD;k-<8R89B*xjb+Iy;NUoG<(+xeu1!lUj&WQl+MNtzDyk!CTQK8js!MJ9
zGJ7zPh++JiMsI5JXz$KMWUh%2VmpgPJ7#hPO)aA@s~OUjV7OU_5k*}`dSCs4xg`vN
z<`mWdFv~-UY&K_Fr%RWW#XLT)AK4BiWd__KMUsLv=3yO(xXka1<?XR#YpyNSk&0p=
z04ik!BlQ%Xhf;Csl&Wpci0DWbY1t9ZX>HEMT9JGh{mc(g>`gYUgXlH`{ep%#R*cYY
zi2D$NmrtzFd=nj4826NBy`)f=O+|Lz*;`Y=)>CD{n!3}n*5sSINz+q@Qixs-gP!69
zRx8Zvje93?oG2@)EY6<dLI``Yj?+Goisafu8JXQ@(LyH{bfyC;Yfq)-<_$SB9V-z<
zV-Z=uR5E7YXe~onkah%EPm<-i^!WLyM6_nBZMNRZ{Mcse2}R;<L|!~rwzfze<EGed
zSWRAotl5DwA<%_JBo^w(wzfqxfys#lGJnDNDwk@Xl#F5_(=5bbH8+rJ&xTsi(j?6<
ztdK};X($vhSs0GW!mu3zZ#N;ZP)GYZj?EXvJTtZc6NGFaf)NsCb(nC;QFJer)6kvG
zZOG0_W;@fez6G-B9vIjpDxIn7d*F-~77dt?W`2+PbVS5<P7{w|O(vA>Z1G*A(0L7~
zOiB+&{TF+cj*#feqx|41*H2vIEAbzt11(L_0%4EKE9%r{z*bvpLD6y+jb$SfoT?z}
zdeKyeECiR<985wYXcx<bNl$46&8VkE($^-N_VUtGnr_L=bwYxLk8=M+*p?}0X3C`)
zX)SPLXJm25xe>NILKIh7wm|i<J(1L2Z`lqqm@Q_S`sT>e{=zc%=?q#O$35m2F-Iih
zzxJNwAvdL3J3QzrEaPH+TIQpXl`+FvFD|8J?2fq}icuD2_T<LUtBdmNdV226ywo7a
zw5Rjbm$&s)JU^TC>_6sby~SZ~dEOI0eMJOM#hyajb-{5_;mxb^bd$IBc5E6Kr|_Xu
zPIqBsST1hG0;q@Bu**t=QPY`-dRZ7n-WILiF4A2xk#b9Y+<4+;1B+HO_wsV$Z4;4h
zN)|D(_UObtZ{<;v)h-*$dDuZ2mJvEIkSxtJus!9iSjBdzA}VV}x^Sq(;fW~WHf$nH
zdrD<KUtEfW={9y)fX{`n6706MMvfu8+Xomqx=mM})|+OMwAn~oENTYdj{mSjH;qvt
zwm?cl1{;(*?Du=wprX$e+j2K;7=X2Oi%vf~aaUHN;G4MgDN+Tcehyn?P8A#FWi4S%
zrQLiBM`SwWgi(cVeqdJO_HZKdlDU0MCKj2GQKI7wBAbrMu0~k`X1lW<367yjGMaD`
zR60*zX<0&$T~?&?@-i0~#>xn`v{ax5nNHG`BTA7jw;Dj0UyDIVLmm<CXu)!MSH!LO
z5MwayDNlM@fQoI^7FQ3%pnJ+ikC%lJJF(715S4Ha<NDInK;pHh!?`v`j2Un$DSOSb
z?2Ap4m{-g(#fYPmcAT1uDm6A7kIP8B+t1#sGD95}^)dZ(wT40q(yg*f?rkl-d3i~q
zw5-G+aIKkCXWC6`%rUU$T$(^#DXg574H|I_!lNan)T8{vMq2Zzw<EL3jB@FC4M(zH
z<!NE+lUd0rZj0eUcD*~oS$E_tyV_p0AukVQyE|G^?QW7_8lH>0Lgc!KRE1@UO>&uB
z=<~f*my1a*Q(}~ZUS=UB&ohtFN@JY`eN)&;ZME>|gz_$=4fk{;7Ui9ny2BY+x24G|
z43`!2LLkblvTYsZGXSgyVjfq<wf&}4c&Um})W%4gB%&J@m)VMAB~i1qR9X#Oo^N;T
zO-!{XB5p%a&O|Taq;DBVHlr#Dp746nl}Mr_;E1gZEPA2ZrYw|nM>IBo=3{U+xzkxJ
z(D^DYp~_)iAJ0p8sMfhuB;~eF-ri?0IFBVz^4xCQlb%Vnbe0wD9+>`#km-_vaAm1b
z%AJb^Re!zcXt2VhMY-p+EoPeGwNKi%gt2ujd)7TE&2^;F-#Z#qSXSe5Xv53NHWkr?
z%gCWTh8QbFQf~9ALU(YcQnvtPVjc53-GQ$PT?Hz26|}d8J38F9P=&4nm3k{EOAxB?
zkz@VTN!O?}Wn*?KlehcGMO;QkQ|Hm%hO`N6Pq=w$TE=p50;R}ZMVH>&&8JXE)<&Tu
z<S|)j#}elPDPK-J+x;x!eirb+Q<0BcX0~2@h%r6IwAu-f$}xE^mnY&9F*n+YIOcyC
z6QRc|AO6Eqq&b{EmK~DPP9zde!?$u_`3NWOF(qCQ&0wi7q^rh`64>EBMIz;E8_Kp}
z{kYs!sMFU511nFyw_J{QdeVI?+R*U3WdpH5c>HWck^B#N2dhVGgc(Ojy~5{f0}DBk
z?BmAA3XWtg+Q*TP6&%U=Kp#gwR`WW;QExXPUn@Co<l##UTT32d*}?M=cZPf{%nqD~
zgJ`M~2lG5ETIAe|ha9t<@~%6V#r88!m6q11xJ;~FS=39IiFrvyRyKCV;<u+<gdtQx
znT3`0i^j6c&o&-!$01oapy;8qZo)*{MU%rstC5yF38j%Y>tk`83iMn-v$k;S(U^3|
zNX60D!Gn;cz8EHsBY2a`EwX5%#6!=y9_$bvvjyzvD;g?bE*pup_cU5ZjFsdueC&`R
z1vV9n&sSnm26Op#*{%pBI?}RxX;*b{$U@I6VFKty(zTx+S!M^M2L{%pq;mJb$af4q
zbF`o6fn)N#s66yQbr)ZIr6#c?l5LO0WCqX!S(iT$QoFoS3D?dqdR8rSS~+0QX<CyW
z^y=GQ6;U%e93&`No9IEWT+5OZh&=~W3U2QVGxP7kFVVtzeGe>COM3PUM^bwa5l9uN
z5<RB1>@7sSkx)GB5Uod57189xV+GpK?o_~PuZ&ha$m}TwMT6j;XvIU{9(b6+q~^ye
ziABXs^mP^LOef~2u#c2V^-{R8>)b1+V{S4f<Lw^eig}FmPdzZOVAnedU0Sb-u-l~r
z`5rurw4veYIXoDBqa@<|OOK%j3q>q8ANwTIpZDOU=<963RiU0~vi#qB9L=nuXGI)$
z^bjK1Trs1N9ypOUob~J3qxK3o_GCa0KI!R!-kO?@xw<aigQ7@VuarzjuM{jpVA0K-
zQRu-hTvL;A2-m@TAf_{zm?wH|ZZSN`;z&}D1qmZEGaxhDkv(|XF)!46833u0It7~X
zGb;zCdJLUqVn`NC%<rXd1v#0@CE~J{+H*`$$xs`)0MP?Ai<4$%-?cZ6*)8megyCav
zx!;+M;oXIv!=B%?>%lK9ON8dedPxL(La$e1N1|2k*5QC>tC^4W;IZ@-mGBapOR_K&
z>N59(dd#nE8na3?WW5coOuy0F=*rai-bh-8dt<SS>5V4MZ*Ls!WO`%8JEhiSd!=bh
z+$+uWS{1O+WcS9BmZ3KeMv-Pisy7}<y*1soXX{O!NwZ|HiDo88CDY{|3cB42?THf0
z#6zYdx6LLNatbHocyuFQtj(<yIjfhJdTUK+ql!#Yy<jZX0~g*>)#FfAU9k|073Jpu
zMQBphVan{r@n5JW#11uT20D&V&ZCwFAKUlkLg-9-Dm4dMOBG;4GU4ci$X$&Pk=&NT
zRNm~_%iW2FLT=#BQw|HH8FOBqM}HGY>qtP#SV(```yrtmE_Y*jA&%4J=J^FZ9%fB>
znykp+z(n3`wp)vgIDUi$Iz4C1lQ9wugcQe(YjM9^?rLiQWKa-~l3QZvKqRmZ8;+R!
zVz@V^$8DSNb^)B?S~eDtOj%-a!xRn6ymfM4TW?oQ!}vbajXMH$dY!$=><kuCN-w6A
zq?IwqU2&`!k1rgetvO)qh!F=4t0_n%N#vC7gg(8(iPp5!yhGWLt=D@NGn=Q&09`WG
zBw7Pjc;Im^fpx{1%{{+MGG?bSW$%(eHl}aiCFux9<PuO{Uijc;siM?qNY-HyD-~%h
zU&h5W&TTj%9x~PGR3}*(!_Di`YKMB^Aw^@P7q3gnMuzLjUQR+GjCdllZ0hCGxzAmy
zmKVQEh4wo5a!6`sVvo<<j2Sy>C-<3~F=I#f@hd&d`>sT?uS=C)f#aoJZ6TUstsQ7%
z<l2M0nPXn<z?v8?4Vi1mc;^JKnv@9iQc<lwgJT*LZDr(UL|r(k))Y;ah3{F(q<QM1
zwh344&Fdnxl`5_D8Tm_0Xnrzby(({Sf3m+s_;8s?$`YD1tl`Sj4=A^${mVDQp0=`&
zhpDSvHBu$!bb@iHY8)E`i>tz&Xb4SW;P!7gI#TXa`dN-1)q|xo*eR$rm(6|k8e3n*
z#@1*yQDOR^55RF;O^34NaV!gQ2`4dTmuUcYe{+e}&QxdC{KO>*vr$)IdJ%9)&b%Fh
zMnIFgPuoJ%Oh{G0_@+TF#7FT4400vhZeLz0kx8kVQA+E}<|1R>nvvkh4R`Z^4{qaO
z>%ALQr4>~vtj`H(8JFALfhJp?w7adxMJV%N0Pg;nSNZU^M^r0@q1Pe}8?v|(ZtBVu
z-0ZOud3{Hvc@eJgQBt_WXP&V^&z!_*uWSO3S)qSOiYHU$3cXaDM!82;YZi1c7QqdB
z)9d3ti?OZ8rI}2kC8in?N;N9KIhB8LLR8k8OSi@u>Z?fh31^bBgNLRegBM!l=8_nt
z9f7A43hK0|G{p7ZrYoNr!@Cq%5@=4*-rRkvM^A!{;F!wHz(9Xa#xYH^kY}Q%veM`l
zoHWF1o9Y@GG!^7FDh5bshh)SUXu@49X)GqmK}-$3y+kEV0NS1Q@Pe5C#VAbRf5sy-
zbUm@X69XPyZ!_`HEY>20z)E?LwYVZX^NfHi2<#U__H+g6y2H%#skmr~H5oIIf+9Yd
z#K<>>aY%X6yg;pa3#d6|(qI}qLb>KU1DXk)9k^DD5w$+qpkC)oiOp6P%f@gft5^1w
zVcrCDGrJ*fI+DD&_?-yN1Y<t~k6vJ-Szg(YVpUS@DKgv5lVw1pg6Xj?y<sO0HB3un
zape^I$Z99F=_x#QCS@3fSEZ&zH{*bU!&cfX^U#bagUcCswZ%HiSMn+Or*QOvI+thm
zw<lvSNh*zI=}dX{D=V>UM{U<&V6uQ}V|n7C6Z?tbDAG3Ek!?=Zvh|ixzBb<SOWSVM
zHlu##>_kdx6l%YUriCNZ4#kv0B0npMHF>PustsQjGudyu5VXA^o<25dACiHic`b@v
zaFne?z6&VsDy3XwIlH8X6q(#-<Lg-s$>J*KlqLquqqz2I)1GUWcD*n?;%jGU)YxKb
z*=|bXVExo^H{M8uJe5PUDXlaULLbz}N+sF~dFRc{Ttug7W2M)U9!lPQ6V#d*M(Y*r
z(@xX$^b#A4%CzK4k-)hnDM@lQwOm+|VbVeHaSc5=-IocCz~OOM1kOxVHElIKsOGtB
zZ6*^&ZyLk%R`R@!Nhf2$T!=(EJIt#^@{k)gQp_l`IVH`3c3BkT_;^7pHscbwr+}E(
z>GKlB)Z(aP?Kpqy!Bh?mi|{ZSkE<ZdI5res8{V_gyXdY4nSbRSh2aXv4<t#kc4q@G
z$&1h<L}==*P=jvbUL3$aO?`H8XbMaw?12|y;4NRon}|h)sO<%XVhW4dcbD!~x=K`$
zY{7x$_HhBu>1Vc3NTlhJQKvjcAStQoGSj^OfCmkcekwtCFHdU~Eq@@e`9#F)UCAgO
zyFni#7q0Wq+!Q_WBXz~$<4S-Xe3=-Rl^>IR`9MwVo{?>+XFw+5;&vuB4b8ABZK!X^
z)}`>Y2@ZtJNY-JJEmykp!Ds!P*xj5km#Sgc+*DsL4=B}UyOR+e@-~IzXnGFqmIkE|
zhkU59`~!t1sOFg*9Yp9mTxNn+T=t72lZei+qXArqM@pIGpV)0w+3`j`tlD8Qo=9bR
zi*V8cG!%wzYy)DeTJl*n%e=Mb2%%IOLX|QR0%|haM1(Z750Uh;;4`-T9o;cwciyRQ
z7kv6`YHr4ya*+)cZFsQ(>n2<W>v=(9^oSkG79>WG9MKoiEe=j;*!pa}%ioJ%Acf`h
zSV$ic?~AoN6k&D@Sf0e&7Q+hy_9FUjaWme{n2~HZ6*w?6+|?k{4vY$FWgJ#4FoRgk
z7|kQJX0@Ziv=-8V>+`E}wG*{xMn>C`fO(2j-`O-N3$3~;hXrypBH{s>9I1^M1pfRU
zr4Yfg7^^^fc*JlYd2B-h+8Az)nb4!PK<j2>ZdRJ}kL$`w-kNQO)yQ$x8b0z-YPrR*
z8gvJEC(jY%5fDkPa3(etbC$y6yrPk>hm^R$E6YfFNgL_&0ytAIJD_;mY`W=baJ>+Z
z{Pg}5M(K>N_m?)vm&JSx9_37hna0X^KRqqq&*BAJB+|r05^wgZy*~1aaigpQo00cS
zb5gY-J{_Cqn53aEL<gor_(r+a$Z<ZFDJElpiM|Lupo}}kYnd-Mr#P#V9N?e}WG?OL
z+r%@?38C8QO%0Q<Es~Bk$;&S|p_!GDna)uH;b@dSgtq+1ost$CEB5H@vhXh#ZkJUP
z^uew~`Wy)~!ertcn3cw@msoKMNw5nE!bf9^$6Yc|h@w|*D@;z7Ybl3cIIxS=Hry~X
zFCC7Q5?@~*!1<=w#F&&jOwKsmoRm&s;u$kzf)Y_H`akRk$)?TZ1oo+B;L4oH<|EJK
z6`kqk)tu;p5hBIw%7H|5!AO1Mq$u*^#>wzkLQ~Jcls@%>7p<payRsb$^i{_3BZt+L
zxJM;OL<-iWRi$P^&4h8|CPiDZ-GgMVPnd_4Q4X*VB|DU2gShHj!4gQ2W+kPa!4I0z
z00qp&h~|`u6KWbn#-&6}6G)N{D-R~IAjIQ^QxPXqErlsW5d_Q=xH`hZGt{Um$Odt1
z(!M#>U@l;!<k3hx`yQ7g!E~7oB(i(Rv&ia`A<jw~1!$9GAyeGgaR?llci-{IcDI>}
z2%$-CQ=l0)l}x=xo&@mvo8+Gfp89N_P}HBb@<13UIs}qD995ijMz?9SWf3kbD|q?V
zse-)5xOO>Yx5UIe6|IABG_R<}IXrCLDwm^mh^85#No-=SjgBphYHcEKY)!T)n<-%=
znC{~V@a_Q45SXHo$fC02rh!}?K<lDqM3P>c2&2jxO`e&u0j9ChB3bm)==^1yf>F{G
zELSh%_VGb^4UiAaqNO$8tI-iRU!g6n$!Puc!C7+kEkV4=jrnm7?@AY1*gO-xs(sop
zV3zZif5h~?IrE_h?Al~y!6ksBb0{TAIeUPk*!VPIGTM$hm7SYNhq~G@#!wmt09jny
zm(<bFvX|0=DGvumQh2Z)-(%3I7x+q-o^km`Q1(Qnc;p9ZS-F-VU#gb#0XWH!txvR~
z4K4Y^fEgF*P^REck9QQC<-A4Sj9nl(X^}Te^0@?az9i3p3pCgT7oq`cba{I{YLPd#
z;HlRX#+YW04BJ?F3-}a~cdd#Al5xdmlas{)996*sk0qRTIg?V%F^5!&1?^=qbOi`Y
z0P&DbJ_+t}&s!u0`Wwk$jEWkJAv!hDl92)0j3j?e#byHfo8{U<<mZru^wL`{rDNk^
zr2}c4@kr*1|CF>JHEt2CK;wj>9ISwD{Pu8zV0lPOLg(kSoKzHPZ@nfF9CszgqOc_J
z^#yDfl}EE&iC_AaGmG?_BXd65y+dCT(xSLZ?m5OHe|QP1w0M-;7li~Xm@>1g&9r7Q
zh@FI+!!k;jZ^q!G4LZlri3Ngbrn&g&2X0@XSJ&y4{RBwq=!KS@aHNm><z0J*kcG8m
zG;4+xSU<|eii>hZi3O75#;;fvm_4O;c>>BtsFMo?A1kEiee{k~xeun~^%#@9N{>=f
z7TglDN0>|Odv@*E7AZ3)(ow$Z@fi?#O1^s%j)x>LOGosyIAMSu#YT6L1W(4q<`xKg
z%`(r&2v#plNI5MU>*NVI>k{X?GsQC8Dl6$2LBp#Q`q)gKhqnZZx;WgZEH%S{%o7V?
zlfV>Adg~#tES|q`mKW(Qt-KN8#Qr|EbMg{&v7-tmJ?306^Qudpt+(~`BQbd)Dkz2?
zQgtWkL#0T5tVjo@;|5?ejwhpnPeK(gXvKPljLz~3<aUERo|YFNi^|L;0)5>v&qvbY
z700YOs+bpI3Rje=LNWWNnDb#L93=w-K?GD*ZkjP4oEekMTBuW$so45~wPg5IG=IfQ
z3S7FLCSSj?uP5idL<n#}FlQFRmv2YiXA7!5EXm=*J<fud7uv9XBrAcs9=TjcAZhG4
zBj-A>oQ>-&*}|&!FY0Ul0MYXGHI+^1$yI5aaC{N_m{=~u^a7m_9xO*&pi63amm6zT
zw85yM2?Iua@rgTJ`BO#ae9?4#z6FDe`7w1WmN-8g!)A>v<e}VZA3Mz)io=$voQ*S!
zaCkS}E+7T!abAO^9xmif_m*s`9dkXifPrcwAC@z&P#;9$@-6LVBQ$Qt$L63(ojf(5
zXHJ^Tp$8i5ksP&}#jmefrkQeJ9T8tOYd4?!GKXQ}7=`FWFTYAUyJ@;w@f%lEfR!bD
z7K2~cQ-b-v-tsLYrprjn**ck0OByt1@UboV1dq=7%>0^RlR1Jc4C#lq%wNr6<RZQT
zh52m*ma-P;v{im!;>ZLsYOJBe5zj41nUIEYMqJmNb(`0e9~2eoS(bMpkj@h+Asgi_
z*{CGNFO&57S<x@!-7nF$o4zuNb2<1RZbWu4+RUO@AU4mAvr*pA5LNo3Hn%9GU&WS<
z9&pHI^rM1-#(YPD)mv;?ncf#4>BUgSJltL~495suR-_TKnOJ?s!VHS;xN!|pIo~74
z&E!j$I+D||Sd)3@qjWZ48PfQ1?Rwe#a2C3I3zOY@%D*wcOup6Rx2KlT8Mdz)@YzAh
zC+V9peu{)}We`n8C(eyvIE|fuIaJB?Y{cQCf(eaykx7<5WwpIvuZyiBv#Duz-AaOJ
zZGJ{N>!#f=pK!~yDrY6l%^+85S3`C-*1OOy%cYcQa+(n5VPv6SKmTZwT+TkAMQ@BW
zB+<NMaQq8}PtO>|%(AI;rD=P7(Ci@2#L3TCMw@#RK-r#Ap~hzUhSE88gc2*mkJ@au
z6OXpE%CMsWtN!-TubE?*I5cMP;kpI+gSirDeA>XCI)YKoYI5loouwJSn0zUAQYV$D
zSWs)$EevmYFT?5EqL`<deBn-MgKXY6nO8?;DO!t7Vf$B3mS#|lWRXy%0i0yjvrSTq
zk$UFvX&v^ZmUHQ{Va&)~p8Sic%e}uYq#C~N)kpWMdYB#+dDmxWPkQjK`0YGN%6jbJ
zg|KPHGa72f&|hZvw8JtscF2qY*I<ej<b|@aP`GThT&uHAi_JA01gu-)(1iK7G>V1q
zh~n##BGqlfXwkVcPIjc9LhYN3jXj;JQ*MsYMe`$gsF3|cQMD(I>B9wn!BHYB$4KxS
zwa0uO1+_wl&iI0}nYQbwHoO2w8gP?<A`On30y8_4)*mhgs6|=Od_q}IB0`}KB?%^W
z%lCM3TnBsc*v6z+5_bO7z|&wt&)-F9Cr^eg*DU-c%DK*Sq~(ZUd}>OGe#{0ZA=~jW
zE1cK?pp@&REgP7KzE<wi+7b~kr-$%a>h7$3fN{AdNCXEN%z(EjxLk@hgWO6le-sXo
zPmu?ObpgCSVQvnf!$ra6MP*~1<x!VH<BvgZU?NhW9U}-i8X|ocdV^WX77CGS^Jm~n
zzUk|*p@(m8nbj|T>2aqP`BuDf1m$#PwZN%K(ncgSYLCnVr9&wi31Y-KKNN)=V?{iC
zBGseJx(f#s@D!ljVKyTiJv`Em`iux>?YG$Sfzb6^2{LX7nO%MTjBo&lM&&+AA;t1e
z3Zjx!DvBG9mB1%SAUQjecPKH^QJ4IZ(8o+1Vp#6L`3w`|5^`-8noxg@jebK@%H@0v
zJY>#<W}zvZjJBi}=rtyM4NW2ff=Pa$o@W4Wt~fJ?9gLw61&B?_a?{Z@%iE%OXg`8L
zJC_0@$P+0zB`#WV;3+4EsxUoB;9QpET)98xER!b27aUSgn^bZ0-;8%daa>agVR;>;
z&zsO9hlWBeCgY1487Tz*!Li{C>5JPX$|bl-ehE!(;j<j)Zyx2-@|S4~FzJ-Fmf|LC
zI<B*3^?X_&j;${JJOGj+mcRUh;-vN2EUQVeP(G%=qHX!MV`hb;IKbMuG{xHE(o8ht
zP`iR0#tf*XUbvAD3>qQqXErs-;uC&l@O5n*y^`V#x7MCYWpac%NTqNTh_;7t7(-S)
zLuq_pEfi^4zR}fd<V9^Zik`e{+^S%P))Hv>l$u$GMzhCJxVgzwEH|Z0?zW+dU|m)>
z83G;MA^Sq5eKZ>kv~_IMajHKijSc$z(!-c}zo*LjcSBOI%As%+`(FqZJvN7@c*T_m
zd$MLV*ew0#mxK|CL-2Na-jC&*$|l0vDikI4a;<~tbdl^ae1xJsisFm|@x_H&#)@Iw
z3*dULDbi+yr~5yh9kJSW9#YF{C&kMk*sO*nrh^kx_;5joy?};XG2KR{5ej%IP19CR
zAIUj5(-FZ6!D?=c?IXc5A7O^5Gm9<(b2Y6LSy@@N+fUXm6W9y;ibY|{nb2wrhpDO2
zpzf`yh+OQcMAe?+%A)QyHI~G$#87-*Sfj^!Yc?HNx|VavDVagqaiF;_63I2E=3-DF
zGl0VQ2T{^)7`-SfN@1#;X-?q*n5k1|Ps`7qGu+%LoZp&?S%R5i)0bIT(ZobS#!?-)
zKrEAn{O<;wy()|{<uf2?D`dM6B_7vlr~!*(g)lVM6@HuJp~w*L;Dc#yr%9H{1sinW
zr9sy)=cCeCH={F$6aS7_JfX$%t#O5{Dr;5k)$6)__pP1S5U6WQ;r-%#z)iMbLxQWh
z1v!Z_(35KpMHZ5^qa>%ylO`SJ=(~2M1sgpeA86!0q76e{U{Z9sCP`-!^CXYvN0<9T
z$k&F(fV{|4y#7Z^Q{i08q7$`!Q19BY$S_>~pdGPWN|Gr064FdO#;Nx`Bx|g+XzeT;
zz_SsOXL^XwG>dq|V`8jUu3+08IPAS~^)42jiVwS+;j_KAi51ZD3xh>o*p1DF;!>x1
zfrWK4bwKqN8MZ^Qm})l<BrfB|iF|_kVA^1lk`uczr8Zl2VvSAEmP6#QG}TBIEAkdn
z<FZ^VrOWP%WHI2u*UWmk)USt-(y4UF+`Fj9QF<hTnf=sbgH}hgIICO`n+rO4sSquA
zU0Y`o2iVz0v4gP2;tV3AAVM3YM|GznP4uv;WTLcTI8+W<pb0Nk*^OIfmbtK~>0^9`
z61gkioJxz3Zw9E$CY(vqrcShsIH8E$Ry?m!v{;M5xhY@hFeI%~Cef`h!w{OMVa;j~
zmS^dp=tJ7_D6C@}O@-PKAWkCb1VTBUE0Y)`XEC9qz@SYjDn54o1}$wWB^O)To5QUb
za%18b$(cykwG<jAq(ueWOGTy7V>_r|TfEy-EX8Kd&u^7PVR7BoF_0{xU<`ucr?amn
zC0(bx=yeLU-L{=gY3vQh6A|sG@&(ehMv0Zp3#_YGLP5&@{CMK*IU;KQhN6jZtHcnM
z6yr1*!^`r%Sy$w5+~1HB6S1iDHTZ0Jb2kQZxX*#pP1xSRa-2SL05*z1XGbzHv0aAh
zlaf)q#9*$0=ze&fiH8Yt*rmx$Pt8EVGw(~NF6A{P6F8=TtH)6&$TOfGd4TajBI~Gw
z4g3no+W|Ni-IfcWF2ynvWl76KCx&axrpC#Ry_s)l{gWKk4#NnwikdeOY}G_diU67f
z!Gf3`l}KdyC_?@V)Z@no7S<yb?HHC#u12X@eDB*FOQ1=T9MfhAiGt4+;Q%4VUl{DD
z(EOA?wQ_Hq_NIu|W*gf(<@4Ho8zyu5Ujk)Y&{<N%v<Wq-EA_<4_yo@ka|Jis$HCwg
z1#`<jfkXXf1<;fNt*7}S*p%wb;7JKvb{FPHNxCt#2g)~wq$pW?VJw0oS19@@ewY|#
z<<eLJ7Z+R2n>oTH1xs7|B%)X!)UZtKOyC2_Nx21qg=uMI&G1((@!&}oixV@=vlT`z
zA8^NHW^N2kYZ#3S&Wo^$$cS|I$bfbVEMlRM)?w8aYEi$iHkJ1bT|)__Lw!V}Z(hn!
zhsAHN6ot-2F8>HhkwtxCK5qQjfE8IZwI$*?44Y@fP3_ATS$RumPpvgfG!%_E%_Ywd
zG2P1IPr2;Su>xXS7_atd6zslhaa3qEJ(ZN>(VEDmGHLMCecj{+JnzyFK`Et#*;yP*
zbYmtl$Bi>y_eGCm^syzz3ARpii$RJTX5aFSdXzDH4y#ZEQP^g|r16KLDQ`$*O}e3l
zhS>bf7e}kqNX6PFc;2yfqI!RR`yfw83^y-v;FcM#Y|1MT(nR2lu{m&21MvX|7L3Yp
z&0zfepcE&wW_Sii!WNj*g9$xfAW5Q=VEKjXSvu~@cZTR6b!l-Aw5RPczLhD~dQJ=#
z3)|#krDaKM`Z(k{sKhO-Y@4KF-zG|4HC7UFufDTGyg)kS<A)OfZOzhrP3*)rMo#Qz
z=fup@0!8Uz)&U|iU@Q`fgCe7FWU*6qEVsyT$F0XF9mbB`aR>KF2gr8bI-qYAq|=oL
zPh1SoVZ9_PuMuEeJ-i`_#Rd8FfLT`unpzI*GGAB1-<CiY>mQN<fl$MY(By`xlLD>j
zP%188+6ct+zf$Z7Lg7p+{>Q=?fZ-dpcuPi2q8bC%EOsa`7HN;jSLN`PvViUwV9yvm
z&3O5k)UX<e$BmPpA&kSYsbj<wC`S`0-C`=E(wYefY-_R;6}Y8SzBy(Q-vXA~9`fjJ
z0_UZ2ffo4yaX`M+0SIYCj42yHG{rN-rc*2pWCQW81nv(TKCA;BEg88hC#eA{OhzBP
zkpU0hT7XNTb>h_kQ8GW?l}4O$aruR-@=5&2=0#a6%1bD~6Um4jLX?$zb^>w(P!I<@
z1mqSSP`?R>ALas_LFJpblwl?>hYwZ2luN}G<|-JKxfE{K!E!-7(*exkqv`U^D&w)B
z_y{yN0?o&)kl~=q3(|me3--5w=0#~BvQVAFTC)T#m=JJ>F^cnmc*cPr|591I%8Uil
zNtx&gi^MqOW=8fkVvi-eD6W|!?WF)<E+-`jL#TFa4jD|hg@F;=2*I7fY}8ztOl0wz
zeYy>1tjJ)<l*pP)5tJ9#fO<(ZAqyJDet|kfpp65hk+BHvKN_FOY|5g%VzD688j(Nq
z1MyU9`v|Hatnt&B17rII0#$)KZm$lst{e#L5)2?Y3Z?Oas(_UHapMZ{839UxFgFlD
zAu5Xa)v&fDGzv>ejI)5HHR^P$mj$$Q1^A<7^@f3glJx(~noZi5_%&Jlf(J<Dr&bIE
z;xfz&42IAIS!-!v5mc8ueY!-e1oA3qV3w~kfKEid5P>&!<xnVQO@t`0xk<PU9;gmf
zBjL6&mW6b=!bY%=a2tbCAe1HCZ(!Yf!kusRGPR<Ggj;N*#8|vx&E>HlD}HIh-Iq_e
zlxITyu|$g`$T@|C`VK$yRU)5I36mD4?UjT&{L)&1z+&PP#9v>ga${Na$J#$j-t)xI
zh+h-er28$1e)^V`AEZ84|0JLA_7<+9sF|NOoZnJk|I0NUb|Sj`o5`z`{~q~YIr=y7
z(Z8(xw@=sbeo6eAxb+#{`MD$IlZaD@-9G%&D8HI`9r0u0=frP`ZhHB-e}MdliI4iY
zzl?J0|E)Az_pX0d?&^KpN57R<8lSE+HC-)s)4YgyH1Q0g8$RpKqM!SdmXmuMPK(wb
zzwoYgmd4lO5SlHz@tZ^Yr-&~QzaaYY+bZP0d4-0@;`=mPboE=g#c#;B=<?UOQuS|3
zeCJv<54=v%<$p?^#sAT4@#pdfU#0%m5Vs@_Bf9#Rm47w$JWc$O*m$-2JDc&IOT3-<
z7oYUF)zP;-^;mswc$X|A--gfXUDohf_k$R&4T(d2;_HU@=eoE44srao@#!snZ9Z9i
znd$c$(G9QVS?rB}^EI09-w+o&`R?jpR(}2ORR3>?wj90cQ=Tlpll+CmGl_0^rcu5>
zF+`l}!?*HE^*?*IhJXEY6gMTth;Dd3ApcupZ~RHutKMefal}W6uKuO;*Wz0=zv~ko
z%d^-U|KZ<gc&g7;T$9*Lbi?y1`R@^X<6nG(>V1wl<VH2`PjvO)Lf-uq@^5B1A0~cm
z!+oCm<LYmvzZVf-Ar8Aq`B~y0h_3q_`Dgw__ecA<|BiCkzy1t|8@|JQ!gon8;ra{R
zedXic`qM%CzUGIi*VX@$Pk5G<zjU$Y<Kq`9zC`?r=%%mb???NCi6{B+e@%I%`mZAY
zZenlYS@C8K_pZdlh))vNx<&2xBf9Zfo4k=f(fv>#_lqfa{kxiSH+&oTgm1TA!WE^v
zbA8-fe^#Y^U-SD>udBb?Cp^o_zn*&TBR1To`PoWz({~Kzb;P5Im-z7K-LCo;5nmyG
zLv;0b-J$#|h^ybJ<{F~QKXMuQR`0uXXZ23-(ZAR4)&CCSeBvUa8=i;{{|F!cQp(>W
zeoFkRkNDqHPy1aOkE@6e65a4y{(cqm4<*myLYghQ`Yq3*<ymz3KTzL*yEXi)6W1pG
zT)x$_0rlNYe85Nlvhp{ho~?*oMC;F^G(Smn<KyT4X7YP;zvdq_-8LqUB(@O`x<~Ej
z5?%jY_r395e>S0B>(15h$KRgrM)l%;mwPq*vxyzV{}EmPrugtr@!?OnPxbFboKEZ{
z9!vZk(e<z6es!NEo=W_H*c<<K^4|5~fAmMyAACUZS>i`TSO2o|t=?6sZ*AfbAN|Y9
zpSeWCy)Q9Ie1hnP-{t?2yhnZb?@<0V@dx72KdHa2em~w6AO69VUrcQMv+6yT=;}ZA
zLA5`PIOH#C9!GTfcairO;v2-bh%Wy{hqnUtuI!_KS@}m(&xyo~iQoC?cljTY_l*y~
z{b5ayYlzno-yyp3wfw_qe>Bm$clj>Q>VMit|BOd8Jo^yOCtgT&^;_N?+DC~AAAaaj
z)xUsvB=K6JtKa3_M&1iP{Cyu&{Z|nWe@4yc5ncU59#{J~(em!};a^SOm&BEyQ1{0W
zXFWy#iT4s;CLZ)xwNDY<@L2sHP`}j|p?jBad2V=SF+BSd4<TATM^WDy#IuQ)5w9XH
zYxrl_ba4DXh~^CSTL0bnSe^~98$OqRBK1B+97_F-#IK3FF}=@vTGRh2qMyFAsn5Ev
z^2smjf2I5fpH%;CICo$;Y`EO;SpFk)XZhoN!v7X|UlRv2{?@-4bQd8W*Ngvst#7|)
zHU0+@TZqRK-S~e=xyA2j{=Sd+tN%^IV{tIe7TxfyPkW0sG!G^IT>e<<nNHmN?;5@x
zh_3#5<Q+@Al=vL+=kjxxsQ-(I?-N(PROPPzqZrOBiQ(szmm#|RW68UOc=<BAzny%G
z_t1Pl(e>}Ve`t6vB`zhtX_4Xl+9$l1lXoNW4&on(u7AtQZ+u?EXK^2zExP(gzM%Zm
ziRTh;Bi{9*+W&*-=YC;@{CCNlNqzefTYdDuNBPDtX*gPlM-tueEN}h@mfJmugHF?M
z-E+EPBjxsd!V=0So~8D?^IXAdSCCKn{b#Fr5ao8iWeMeW|6=KN%Cq|o>2uU<*ZZHj
zUd?uW{@`=fyxL;LgKt!{>&JsAKj9*k2X0oh>vv~R-oSc$Dc8O1`qibkDL=nHb%&Z8
zh>b+MUbOq2YG2KDmO;cviO&%2ddo9+DgX1k6$6}yAIf>|BI1sm@9j=}j`O^w#Lbqd
z`w_(JIp4U2IFIvyMZ|qL-aeQ(oa50E#21K5iFUlVgyX(P*uOkOY-W4X$aLJA<>X+>
zH-28jS3|V*Z7IwDg|@t(s=SSeZux)gRkdI7HO2La8xvRlr^?qPUO~K$=<0io`rar0
zTz!Ll!nF(KuKz)X%MJJTKH*(Vxf`#CefVyDzLe>F#w(gm7O!OdZy~Poy4nvS?ngYB
zxcXAs6SpL~@muW;<*h|rkGK)h<-5F@<aPM)Z+}zu-$i_k_$={D;x|OsKR10wFdhy6
zQN1p|9|eBs@>}0hJ%<pJ#8ZjA@zdlT=EJx8T;6eh{I@kc7ZC3!{)zZD@m=ET?<ju_
z;*P|fi6LT`cmnYx;?2Zci7yaeBK}D1_pbW0C2?zF192~6D=|TI(=W+zpGN$DEq)g<
zUY8R8PJEvD9q|X^rcB>0h<g#I6SKtm#Kpvmi4PJVCcaPnh`1K>VIATw#0ft6W5a!$
z6K-4XY`9mVejDylG~00RPqPj8(KOp|Uq`bI_upu?;kM<@hI^<J?s}i_zfm*M6uIw-
z{WnqbV4_<dt$YL8PaxL%@B{B@Jy?yn7I8bGtKZ6Zr~N_1h!5ZL>S=#~kGtc1-23t4
zKKvIcf1mgbagFyio^E`sd{f$QN!*UuMBJC?x_8|*`nX%@<KB;dfDiu@${Rn_`0PvU
zB)Z|V{1rb?{wl;qqRT&n@^gtd5O4P3KSB9x#5af^`tZM|ywdPneqDv}wfrr~A4VMS
zlRiH(Jgd-t4dO;Vd@HY1|03%7H}Oqk)kogt@r0kqe`Xo=-%bAg#3zYm<$>~xh?f&@
z^5I)~rTPak9j^LV)8l61lSDT>mOqT{MiM)H((@|HZztYIe8Pus<<HXoL*ges{91;y
z((qgUsTIc8^5>C1pLjgcO`itJXA=(~w)*g`yi)xaP>)3$Z;Nhto*{qSCz`*z5ceax
z{1<*Azh)Wr4<LV4;)XuqS<3KyNc@cW1JMn?l~<~NH2t+`)5)T%|6uZ0`%KewJ>poR
zAOCQ=JBs)a(dB<hdGJ%!zY1{!qRY4PO=!O>ad#j7Ih0o#e#>7<cfG~e^8Z5qW5kz;
zZul>x{958o#Cv@BR$i(852?rEH#A!eGT$uPbg}4$cX#sdCEiDTndtKO{E7Utmr?&=
z<S!zgPISZ5MESwQ7;&Bt-^wf1e>L@3ypd*$chGFnrlUnSyi3Wy{|l`Tj}YG^`tkoq
zcl|zBcaw-NzmxJ~h^G)2`|z#&GTPryT;juDm*K25{FWc7Fus;Qh5V_+IYc+U8&f`v
zIFh)l58uiw)t{ywi-*x{@pzgoo=LOC3u(4^4b2vBquHWOSBq|ZKOz5wFSQ(;NW7Zp
z^1uCw{JLe-zb*MAh`afOCrJ4q;$Y$?K71>$RR3)1v3MZO7Nay<oJ+IC49yn1X|}kC
zW{Wl-EV}7`3;FMTt@-r@agA@(?8kqS?w%zM{>qy_iSj1m9O7IbzLn3T{i(z=eE4rt
zUTOF(f2RuLYx!%FzY%dH(M_KZ7@q$Ue;}^p!?*HE^-rQ6i;Xl}Jb-44ahfg8quJu&
zG+R7@W{YRhZ1EzRE&h&Xi?`Ek@sBiHe2iv`|Df68>oi-m`D)Qk?;4iVuZiCf*ZHq^
zdEDkF@()`^{RfaAC3X?r@Q<c^cj6@C-adRQuT=kO)MN2CG+VroW{X$QZ1FmpE#6AA
z#k*;?_yEloAEw#jQ#4!r2hA2=q1ocUXtww+%@#kQ*`h567Txr&VYyiKe_B4)C5|Eb
z@yF9$E%7*?@^vTW4-%gwzT(5T@;7Mzz2$xDU4HkWywdPn{`GX%TYN451oF=yUP*M*
zCq#KWaUSsqAHJ1Ws{d~4vG^d(7XM1K#g}Nd_$JL3Kc?B@w=`R<{!Y`~;vkwWZa}le
zEoinll4gtJX|_0pW{Wdvws;`T7TajHn5Eg`LYgg}M6<<nXtsC>%@(ht+2WlvTU<i3
z#V2UC_&m)P-=Nvz2Q*v!nr4ettalb|xwhz*x82D<;d{-e6Ny(7UH+awk-wPkD%F1&
z`NtB^Bf8;fqWoZDj5yDSZ{?NhzlnM*-bu5?`)IcKAk7vZr`h7OG+TU;W{a=WZ1F9c
zEq+L|#V=^K_#Mp_tC)`#2hwbD5X~0XrP<=IXtuaH%@&8zY;iQr7RS+Sv6g0wd(dog
z2F(^{(`@lTnk`0Ywm6q&iy4|NcGGNe5zQ8DJ+SDOw_C`c^`q985b<!LAOA_ZdzLu(
z2k-u966H<AImEd>d@G+v`%{T$`0(GMywdPn{!SIf*YejRe<R{{KI!ub!}C3{ioDf)
z_*P!2{zcT&FQEILRm2*in_nmYME-MhSE>Gc$zMWzn&^h-Y|1YsUPZjshi~PT>L0;$
zSkh0^=UL)ML^nK^Uq^R)5P$8Hp7&AyB=H}_fBW#Q{9W3w%5V(w2~U#pO2cpYx6@s3
z@wNO*$iI?k-Mi`2Mf+okClP-`boo|Zss3-NXSXWNuLfc((baGH+tPjn(Ykl}LD~-@
z4km6wboo|Zss2UO(|-jG|GLC6L|4D%pG|k?6JPNO&niLHyCHEi;%K6)-^zEU{ei@T
zefW1!UTOF(|G#wCTYN45KjgnhwC>&T-%tC;iBA(>A-a4kuT=k@%$L)Nza`$~lV6s9
z80{Akt$Q~-O|(Cl7$eRjx_m3IRR1&7)6ieb#s0)DqO0HX|4n!A5Vx&X_bz`9<*mdN
zaiI_2%8#Y}MZ`;e_+L|AY4|OFe1-9~{H@5}j%eMx@eR;^RpOe&UlCotl~=0&Wa@dE
z_!9AJAN`hpFYT8Qt$Q~-XVd;t;#I_3i7wyDE7d<}MJ*>63{bqD_$blUZ~2?j-B!f8
zM3;Xd<u?%TAU^2BxAMnm{~_@cAO0Q;XQkn{{8KB8ujO}>zmRC%yYbzd_6HFo#1zrx
zTY07WU#6ao2Woz8O`Jk>^;`bRv>!yY?p^-Bss9t=m&ATP`mMZD{f*QUB_@dH63-`I
zMs&m1wT%4Zmy!Pv`3J6~>Dxif5Dz6DK|F={5YY|qiYu$0am2rFtmfy4E`J(%Clb#f
zZn=uOn?^i(Ej4epw&JnGlZc-Yza|C-E3cY(F7X2555%f<l(#Z*b>c?Eor(R|Ro*OO
zXo#AhBCfZ-nl~aIMLeE(I`JIhrW+`KOX8`-vxtj{mk@6x{)u?vhU)&e#Qqzpd0pb~
zh_@2oCw@WPocS?|*i4*F+>h8oY$c|NhZ2t=o=!ZIcp1^nZ!34p#pgc!b!s$TbBH10
zA;kH_bBL~g*8LRPPbFG?E`MX%4<n8w?n-p|R$i(8epFzwk@o`r$MeCL(x1mS(RkbQ
zyMFQeV5ss|*-Wu&bH#zg9f-RU4<XJYUQE1-_&4H9#Qs~TzEz3ih_%Edv6FZ?@mk{Z
z#Mg)`ZK?X!AWk4oB4&u)#H)!n5??01L0oMs)i;<pk=Q_-PdtKn9r0G;KZ*Y#uDP}9
z8$z5+oJKs9cogv_;+@2Q5#J@QvyJN8h`1+lZ{kAYam3q*e;~d^{E)c*wyJLv;w<7E
z;z`6ah;DgZ`~Q~{lO+GYoM<_+<>W@6^74sKdD)ijgIm77@!_xP!?$|eb}Hn<A5FQ-
zul3<uJ#M>qwhzCRa+lxf!?$|ecJvV+{$k2q{<S`QtH*7Zzw_ZgO}Wc|&4+LGxb6IK
zw%2a{1}S&>YxwZ39=G3!`0#h6+~rU5;afd!KXjfCe=g-NzuSjz^|<}o6F&S)D0lfc
z`tYqDx1apchyQoVUH%(Be5=RpcSp1Tbn|yb%3c0oAHLP&_T#NS{9Pz_`3*jNtH&J|
zEcW52D0lfs`0%YBcbxIG5C00vUH+{;e5=PDw*)!faMS+<>T&u1@zHPfxZ|K5ee|zP
zxyv8o!?$|e{&KDle>ciq{xl!H)#HxSF7e@KDR=ot`S7hCcii`PAO1CzyZk$S_*RcQ
zj$D!BKR170p&pn2u8)4J#~qjM;-i0c%3c0OK76ak9p|Qe_;r-K{JnknR*ySwzQTv!
zMY+pA&WCUHxa05_eE8Q>?(+ZO!?$|easA31kGuK%I`z2x4}J7oJ?=bVHy{0LQSS0L
z@!?xN?z|)G!=FOA%b(@Lw|d-p%r!pz!zg$8C;9NL9(P{!iVy#0%3c2bK76akooB7?
zGamjo^|<^`ee_#B?!2weNB_E%yZp_4_*RcQ4_xTO--~jWf3OeV>T&0le)EOnsmJA?
z<D=hwFX0)kdo0*q=O;h+y@*e!XZCQ__c!7{h(kxHd~4$F#NQF!`fc^hrF+Y}$A^EV
zkG}!(hmBPK8;Gudn~zfat%&E2R`UZymv4Emlm8NV@A>dYkyj}{N&ZO{>bH7VqyOs=
zClRL-k0847`HuelKn#-S>f6f4eJ$nIznL`q`S;{lP2X*IP#i&=PW)iJ%7^T#xIOV;
zq8t9#sc%ENUxB<z-Jh_d`g05MZsH_{<4~esxZLnqeHT#As9jWl6Y=Nrcil<#C5c_c
zlZmeW$&@z{_a#Ps_*P!2{-xtIJf9LbWjwd>iSI5uEB^xG6~ya^Hoebcx|hX={$z-U
z5^XqK{$8}-hqym6Omz8H9;JON(e>{vANSW$eiQLdqF?xa#dLDh&6XQCe5a6q7x6yg
zqr|6(w*0#8FQI*J?q4O(b$<ox*S5Q9K8_)tMf@%CQsPU*19n%wt8d2%DsLsGiN_LM
z{%({{CGJf;(1&m3mFj<x@!N{}UL?LuwDH}U@!Ff%M4U_PARb9Pis;7Y3Lk%L`dWYX
zp!-VwvH7tl)7>xqulv93|M&ENJ;txM@K)7od0B<H32{H-Z;4kB|4jUr=%&A`ZyWOb
z^gYpwJ{zvJ$ZIDqB-(J@PV?#$H9k8M-SE}asr{zJ2kO=QKGEe5CvPR%uRTfmn-g9B
zuFJ?jjr?ngz3E?z;ix4x5El@SB;G=F!?zygn-RApZtugl@{zRPf#~`_$H#q+avQ$m
zX!Z-AJHB4T@$f{ZcO&s#;wQv&Cu_dA@x5}2%5NgxNvz+)n?I>R<ui!0iT4s8Cw@nC
z{adh?x<8S4I&rtD-u`_wUFAOzt7oYBW}>S<K27b@#4h4{#Qzevp+49By^ZSrA>xz7
z$lfY<`Rg>Pd<)`s#3zU@f9=`I{}pj_;(bJy|9G?VUnagmJaU$Ic>c7H%AX~^NSwc~
zH@|T|l|Mzi<{&j+M|8sz-(T(L6OSNncYwG4za6OZe-Ym$p5~*!d5-cABF2cT(VZKf
z^FzwNoOms<_F(Vu3=OM%dtx2&Jff@L<<IDazhR5&A4VKae2wVppBz(uBXJ+%S45Zp
z&xrCrB7RAXMOD6#cr(#;|9xC}tF|hxP5cAV<zJXq-VMatiTln|xyzrOQTf5dR^nfY
zuM$_ydb`hds{9DzvBXX0d-ETno~Mb=6FYL={5f4JZzpDn>n-r+uhXsa8se73(}))m
zpCY>HG5T<Izbmnx_y*DCA9twoPbZ#B-0m>%@NKwI<->@hi7yge{m&n%{H4UViPIOU
zd=BwcqU(O{QOfHi9!^~AXqCJCA;+kEQ{pznM~E(e=yA#)MjTCCOnl&Ywf}(Vx*taQ
zcw!y#9%AN1b$<oXbszY(+OI@hgZMk5%m2-3%D;qoHF47EDtGxGoS^crh~E<v49|`y
zt9={Mb${haYJUguUg9Y7pE_0Te;~T<?>R;7|3Z9%*y6*VdWQ1n5F^9^XR6%Qzuj3X
zzlQiE@q41n-|sx-Cx~g{i^TVd8=mj&-}M)$yk@cDaN-=I>tDZ%m6s--M!c8k@*6Hw
z{@ujC5q~7Q{0FX7{tj0u))PC4E`Q|jly@2NX5y<vmw(yy%3tFK#f^zmi7x+^8<qDm
z@w}VWd@<4Ge?s03H!1IJ@;)MtyhY`^6EnnPiLSmE=x$l{ob99U<~!8?E$&ntLyQq!
z|2Mf?d1n$YAwEfT`Dfm%{C@W-u1*|JboqZ<qP+hR|L|uuFCn`8HUFgc4-kV7s(B^i
zBw{o1DB{_~=ZP;7UH$*1zssukavy!2k81edC4NKP`Z1Ne{{Q_6mDfF~xHoYj(d9q>
ztn$YFO>qxmC(-3^`keC4CSFGTE79ej{G#%|B@TE=&0~o!f3KI-eim^K@pGcf-|jW#
z??{|Le2(byUwuXSZxKHv?)|FDL&Q^vuKVZysl3;TZxN4pUF9zS+@&hNlz0ts{2Si<
zu79iiDB_94jo<X<Z}Bgck09<yTui){_&m`K-^Tw@_gfQ35Fa7B{3qU0{&U1viP^Wk
z!`Jw(%J(ORiQf=i{XbIAz<1Q$AYzVqH1TGl>;9Yf)Lqs4iUWz45?%i9KTzHh;v>X4
zAFABt@BFdK>xg?2-ypjDcc|wx;(v(;ex!Ql63-{P?%()CdG8ZHBcAlBcleI^Oy#E%
z&mnI5xi`PzOO-bf_a}Zzbi=dCH_Bg!xDoLVqRW5!Tjjq*TuSWv&O1D3eXsHhiB}NE
z{ou`?6{s^_hKO<E_e3{5>jjlRl(;o<3DM=R^o9DnHgN;uk;GGp_Yht8FMOr$-ypt2
zJmPEb_+0Q`m0wA`p13>H&DDS4|CHZKOcH}kH<w@Yqw=>QjwC)zbou}8r|I<}@eATf
zRVsJ+SFfP*TZnfNr&fFO&+4!8%Zb+#r?2SEuU=K<s}t8DUPW}nGh=}AL&P}oS>l_-
z!2`Yh`^w5!Qu`H&mk?e5#;vBjdg5NhSBNhEww0BCAMrur0jqe2=gf6gelM|Yh?-MG
zSO2r*4P8%p!-xkF=MoPk-b6ij62BpSPn^HL>N$+)`gbLHetK4<yA6qPqSbpM&1Vx`
z{kyHD{(rf);^cJ{XA$oqeoO2v-Zow~TsB@d9G7mO`oAW+;hD<tG!qXdW{58Teab&3
zent#4J}!T=!Rmi2aS?H+UgEtP^WCQNRHo-}#^W%b_-sx6!-%7aQ;BZ)T%HZzSw4I#
z|1Is^d|TG=`ss@^ecbRaD}Q#4rvIObPY_=v{zx3SiSk_kUZ8x+P_>^yJeYVA@ob{&
zexFU%-6O=OiA#wCH&gqyh_3tRDc@~#<?TV-k9auoc%timpDongUx-f<UnBP4Qtby3
zUH30gUcZ&{rV<Y%9#1@j=(;~+q`JF@c+qGzUrC%ZR?QLO;2qVxA<@<MEcI+XN_nG+
zdl7#_ypDJm@nPbd^!I(@c4Jh}DB?8?$4$g{h#wL++d=tT6T=K=8}UZsZNzto9}+Lx
zN!?#gbmMms{qYOO*G@QgU^pfbU41Vzo__Hj&v-WxUv$F#EzQ+F;oY0|e(7)@?f*_Z
zm3nMC+)VR5KKc{GG`-TqBZ*6he<x1fPI*&_!R^(&GVwH`TW;KPV#|R|mkiV82%;?q
zHXm#`u=!x~-NxJIyG_R^<2jdT%YjXYrIi2MrpL}2k9~-4{<`U7)8n5^7uP@6{VC*~
zL%fuD8`0&zP5D>EABZ;nUH;n4kIjiAh<^NiX`di=5VOQC;vGcSKiA#;<k|Az@*k)D
z--t_zpAlWYEr*9Q-VMW<KSZ0a$J+dJ6#QKGOIVH{BK|;JoBliA1(uaRmU?Q5XA>VK
zu0BHJ`!&(FyN9xUeUbHKINK>VJXZgBNB`wCyL>CZmiE^ZZFml4I36Na>iz*A|7`rM
z-ery7)w^i=+(NvIIDNdz-SF+XtIC^+2N3^Ee1_O>g17qvcT;&QF-ff6-J8E^t;*LW
z{)%`6@z=z=iEenGpQ!Ga65l2sRp;$rN4?6AARbHnm{>JQ?MD+`|JIzW_8Ss6CEi1H
z`G20G{3nTjCr0*AxyyfRPnCZ{{F->$Uf%qZr>guM;$mXeG?fn`))HO+E}E{qYlt@y
zC(ZEoZ(5_u_aPocyr1|4@f)J+-#RnZ{UYLd#0Q8jzpF|4{~`WB+@je#eCO^%`M!$l
z5_coI`oE?;wx9AB60at@{7dF2Z^(g)yAxj|y8M5T|0=PKyd^$-%X^#rPaOXy(%l>%
z_rE?!!&MzpT!pwkaTDV4#9N7<AFTXuiK~Xyyf$%Oi<%b@k0+i=+$O5_BZ>9Iy@=bz
z)ZR^>LG-`3@coNC*Ztu>;n^(W?ceu4@w<)jboGz%iO;)~yZkug`6BUk;%CJF60d31
z@ZCiG6Y){vmI<}ro;aRZNBqYjYX1uHUE(Lim*#rM|6e}gxj634ALZl!Ym~e3zttx`
z9c|wFU-t?B$v)w4Osby8iLVh?OL^=6B&YI2Iu(x~-b8f6f5f56JC%42ap%Ll_5bGx
zm48h9ig@ZmZ+^q!DsLk0Py93SSz;CSxZykVNOixMcsX&$Mc)4Xlk#_n9}^cI<;`zB
zTIKVI3y7;8<IN8ptMUXfO&oBXH-F9JRlXr{Q{u_Q^NEiU-S|gOQ1?k<Cvk-nz5V-;
z@~??M5YIlzn}7ewDu0Uj4`TPPz4@D;rt;y$9f(g6-SD4uit^7UUO=omRpo0CClX!v
z&z`QlSBU>2cAcSem;b9XRlXH*IPs4}mw(M!%D<I(H*wn8-uf>&N9ETNZz1mS8*l#1
zb5%Zv7$H7Re33ZtJa6|`QhpQhPGbG}-u&%;tMVO*6NrB&y77PRLgl|se2cj7B5(c2
zT&(i5i5C#7E>Zbl;$))h-%`roBYsL;bg8$0smoN}O<Y7Ae7QG2bA`$eCmuuG;7V`)
z=2xkFIB^H!Ma1iguM*w(bX~3Pk0PE(T=yDp{{p{L`AWn!h*uL`{a;?I{2z(^uT%5I
zM3;ZX^=f}R@gCyP8&p1)7$>^!12-yfCE^;yONlQ3^qZ9bTjHg}u{W#S<)3wn$}c2d
zLEPn5Z~mm)R6c_^n|MF*3F7~Vu7Bs<uI?`*{*Jig9p3)c+^O<yh$D#)6J7nkzf1W`
zh>sBG-0iLZpg*X*jhG@nPkfU&_#SWf+uf`39f=c&e<!-(d+t8vzfOFMIPHFwA3!{X
z=(^u^iSniqn~C=kpC$(Wq&(OC7JpXx2;z>!Cy6fquMaB!MdItk$$wFKGx0d0>wfV=
z%DbL;8*!h9Rqpa%eN^Rd5kDjz{+Ks^&Lb+16FZ1Yi60YdsK@nh(c|j=Wa62`ZJ+S=
zZ{H_X-bPFjSAEKxzv^FAz7cUV;wi*Sh|dt+@Qrz1-5*5U;6*hLCA$0^d1GHtUYxuR
z;x)wI6aP*8lIZH2L;14mS>Yx1-_^Ir%PQZO7$Sa8bot-EqWo1~Ra~2RIPrAi5~Az=
z^rh;q{~L5qoJ4f_JHM&C8;JK1-zU2Ko8MOc7VjvIA;yR<|Dt!5cMb6-;^g;K?()z2
zK;;(_uON>5(3?NzeU(ojP9okwyodNc(e-cVkJNn~aZlpEh%SGfPn2In+>-bJ(dDoF
zx$*}SHzeLpbon=as{Ff%e<W`HnaW2KqeR#JZC@zwKH`JK1HM$b%Rl=om0v`>k~rpT
zZ~p85Rr$NbPlzXc>&<WeM&$<)W5lP3uM$_K9yfd^e5dZuB%V(k{y%U3!r!ZWE-^z~
z=?8EAuYOedR>a}NbBI?E|3P%acTJ$)h;Aj`O>FF^a+kkHmC9!l_alBvbor~Sp!^Mq
zn-U)-y8M|dDt`_!Lj0EK@*k~M{)@!diF@`}`GLgKh_3rj2Pp45V%0!3|CZ?TA6-f9
zcUoC-GBHbZ`CF|`K5;kVn?#pCW?kh^AWkB_N_6?p4^jS7;@iZ9>#N-5cW<cjD~Pvj
zq~^PbpZ-eC-w^MqQS+aOLx!rkhWOs5YW|ey`uBVKvxN8vaW?(iiT+F>rin)rkJ(uD
zpG90k{2OudCTc&6cr@`G;_VFQpNU@(`!l@rH&ge=5pN^@h3Lj-6#X4ftRwao&Ns+&
z_3g*-B#3EZZ{Z&7limZ@(D)1?))4<lbp3y5P31pLe4e<=S}Na@co@-j|L$PreNOzA
zc;-4Pclqt>sr+!_F~m(+Ze9K%8z?_VJdC(1%aP0ffaU5d;`bGnvr}2#Tz%iN+^@j$
zv@)@`a(4xJuD)$puEr2|A@)`d|L#*BCvTzY(@5Nh_($S9#35UH=g0Y5tNe1}wZz(O
zy!o4LrSk2GV~I}@-S{?atNbS7{=_ecF8`G6mA~R}#r27M5ncX+J1g%);zhft`AVY8
z4~|p&JBja*_c3w!c(tEE>>wUVboJHjuKYcT-_@$QYNDdc50f{Tyh-FuCH|Ut0r64d
z%S2b-KN;?2jmK#|@!5s`ENi&l^$Fjf_t13sKkVHFc$8PS_y2+5?k>T#5TtlZaEAcJ
z-K{`yZ3$Am5K5s)k>XIGxJxLm4X#CsySu$JS>JtH)-(H>Oli;gpa1*jx*k5<>;CPv
z_r2$ynOJ%9R1p2K3+n4xT}g6VaThkO?0!EFR+aimJdYi#xzAUsBJ~E?9QWaEOhq5P
zzB&Oiz8SW~x2W@ZYe>Ermcp~B^Mz|kz7$r()2Q=%YD)eTUcho+NWDIeLOnjDw&eEV
zVQgJT>N-ECuGE*}YOGSveg0_!slUf44dr+O>i)MH%lRkx8b>y9-@j#jsdvI2cn)u2
zO6J$|wQSCK?17P5xSwxiE2)2vQ!#sM_xZ>zr5+cPU_bm3529XQ!ZtEKHD<*1sPlic
zmHZ=oiPPJ;U*DYeQeTA|F>?p07smFe=llFi$(6@ycpi2BN=M1x!>2g9lhk$or_NIU
z8CPJ%K==70U8Q~wFJs?s?(@aENWC)F#4UIjBhp8&uWxr5KLkf%`X27*E8J7+rLZFY
zfx3VDK9cW_{V+ja_x+FelKKU_hUI%py&;Z3Jzs);l1qgda0BXm{jVjz7q4TY0aDlb
z4Fjc~ZID<FyQ9u0_(pOAaVqXXonJdt^3jHgak21lIrhi7xC&>Dkn;<%$|yPh0ym77
z<82r;MvjN!!?AMw0)xlN@ekPVJ2}?teP>zkbk?o&-?2Wu-f|<|_g}<%_4=E9>pp*-
z_3P^y8|*%xoBi{&zgNlW>-miR)%*R3y3Pl(-+KQuv2L9o%Kq!eBbVjzXgyx8GZ1@W
zl<(bNf3yiwPk_lW2*==7)awhLB;$Q1i}A4}>im<blI!%NI1pE$&Uct0xvTgBv(1#c
z&S#q=^?X<akD<;VpC$P+v&EViggSq8uH=5lE7*UY)OEh-LaDFDJ@^uJ{^b(M`z#e>
z;2hNXxr-&2m~j~}0Cj%$FOpw^tFZDisq6gc<x-!B(=gr&skg_esK+l`CAkf_4ePFU
zKVQ?e^u;dtJI>!M=TD)Yuf}>g-w<2iBh>i=8zp}Nf5$SL=(ANEfqHy4){zg3;4FL-
zBIC1d6ZQD<+oe7Y=U~wt?(>y*OT9KW!aJz@uUjekUvV!M<8ipbz9wJie!iY-q#lIB
z@G<K7UT%=Q5BnGcyJAK5wHNB~XST@rD|j30u>TW(mGiq%kMGZZ55;fsImXx};|ro5
zpM<`?m<7k-aK^2+jPJZt`uD*gyo14e<@`3(^T(r}0@L9rEV)m{cSb$F|9&|?8pmUy
z15(%dLI<T@9;@L7Jc#d5k6(94#{Y_YvFUH_=W7}&_4e2WqaAji|Llm=%VAZ#ih4a)
zk4pYNKEnpbm=~v_9>3+d<o4ii*!qOjb-wjUsRv>&JcqY2)hYMmJ71Fe033=L|8SrG
z<g(NY;^#OUSKwvT>wR{GzE{PlI0JS5<#oyV+z?~neAM}GuSxC)oR0D6lLo7y9>4CU
zjQ<t)V(VK{*ZHcqrCt}C;4{?u$af_l7n9%;)cK?LB>y{J!GZVPuczq?skg^2811F|
z{8ta8J`Bg8<Dt~!U~$yzOY%x`zL*8Kpw2&}&ufhMNXAb=olo#o&Zoi*xB+#3@ngxa
z!Od9fiPUxez%!|zz~8a^bNBfducaRGjTjwgqV7NQt>hNra!m40>Y1=M>hZ1LOD+(5
zVO&QU9Su8f7g6fnurDU{ai4!npQsV!d>jnIF}M}=q*)`$xV%^xPoU2K5?S)=F$C*G
zalgJgQKjAt+v0oF{WC<D{3n<n52MbnPayg2xDT5pbiba1F{FMH&tpJLsW-;4sOM{$
zNOGO92gXY5e!dd1q+SVY;4Reso5z*>m)H$s#B<;OO>C)0i6h3wxv2YRk1x6WSQL+-
z&gV!X`Jz}7ucOZ2Nh<kg_!a|`Nqqn=Lp{D#a>;eZo)|fW)OFq`rPO0%BAksnzblpG
zL-8bjnc98-PHCjx8wcPue1e(Mx*uQ4SL#*p3%rAReX}x1elf1ZiW%Maf1OV1k<*K@
zup17<6{zQ{m`TQcfer8x>by@D$;ZY-xD<6hZFb3LLqFV&I{!;n$*;!{%$iN=1+gva
z@w;<K?l7Lh&N-#7^ObW;eJdWscc}B3@<={67Qp?e^9S=v{v@8qGWn!l6NjK4KP|uH
z=HpT<^{Lc#zDxnBSHs%48TaEG)Z@DrlJWg<Fy6<bh2?ytBJSs}&iMw|91r2pqB3qR
z>iHv4kAsPEAeR12#`i!yeoZksAA-B_Q}SEzzGb{$aT#9(OW-Ng^Ih_n{B3-M)k{dd
z364fRzEWAq)xw7O7<K;h3X(66)$khXeDuna&xdEK$nhoA`3mG}Vtssut*grT9ykkE
zq3*Z7q~!NvDE25Nb)EmZwA6>=SbUCA%E<YGsK;L`C+8pFa~xaV{d~_VQm-UN#Tlsk
zr{Fp>Vh;4QFWW8qbGxR@`xM{c_%GbA_k3-s-@yAgw2u4y!g^9aihUc%@j%qqcaz*>
ze1#M1%eW{F#l%<=zd+ruUV!A=VIU^1E_IzxQ$y-mF*p8z^YA3<@k49L_+b12bF)u6
zKe4XlXX1P;&UNej2KH?S?nh7i(!#Po>zl~@Ih%?_umkG(cQ%vUef+wG9Djq`TFUVr
zY}8tgTcYk4rMaA6jeF?x8^)kdax9B=F<L7b9~Y}(V;qiCa5n2$gjewirfwtsa-&}F
zyT3W_T;|vPws4)^UUwVTq5DndI=sEkB9`mD%(@@qOMGX!9-WKGdLpA<Z<MyO-+nl)
zogB|Woi9c18eS%M3p2Hs@%gb4c1GPV1LN{wVf-9*Ugt`ZD`Ux@rta-J`&!m}wS!#W
zQ+$IHzLdIN@BEHZUyEBXQzxkx!d9rq$Lc1zB$yf(qt3tSA^BK6#e}#Hb^c&y$(_XW
z_*J0Pb^cv%sb}pY?&~WaMxFnhTve=vudsGM8Q%)O!#Sw?P3|K3*|-Sfc9nV>tcH4g
z4EiR*l;~+4D=q7p(Ou?Uh|91HkAt57*Itr8fJd<-`>OK;zn1(<j5<J$3!%;@A0+2T
z;}qPFI)8hx<X>aNA#yw)b$;<Ta()eNM!%s_FOGqz$LAd;xnfuf&!Enq87}!NcpC?e
zkh;$A8YT6Dqs6k=3;!cOhWuFa*bDz7KaTu&<gpj({PFL}O%QA0P}KQ8lO%TsUtx~P
zQrG!vQ>5MmTVu?r?(@^9NqqtSf@P<>&;Kw}>f5mWEIICidOat}UB`Pk=_eWYjB$}?
z%W+;Tjk;glA0?L-Gvhd%gU3;iZ#P56cf-DzfOYBoBG$JGH=?I?*R-rZ=W3b17?#2t
zsOPW0PVz0W1HM9?zp+O0FYrCKUn}*ma4G8X@77B$+6FN$E<~L_xKVN^@jQ0hBz2wd
zv03T^a46oz*O+sQ`|*{xO1&Yrz-S@v^L>An`Vbt2DYm=MN82X#WSAC5;xs&j`ufuB
zknveDH||B9zq?cN&+#3O-{pRN6Lw2|2F}CSd!(KcE2Ez8%e|88kApGmKKJuYIUw~p
z_%jwc=suryztjt1aa@F(@gC~+eSS#Bm&a;&6LmgzsN|Dk8eEAwzxuG`x8g4R<cQRZ
z;+Lq$k3K57A8|I8KPGjZ-*7_eJ8(bNKj}VS{<zfZViVklzhfNw==mC-lJRY^Ge$q{
ze!fr6NWCC_jz>}VKYmv77w{TZIVbf7I2!f%Ru?4K1N-B3e1$nKN=}bo|A*9bT^2vX
z_Nepuu1Rh_uEFc5^Utm`?+r03PDh;|b5nAYa0W)bCG{j&4)ypLw<VVdQ{oEL`89VW
zAA-BF*<GpYeE)k=AC2R&(0%v$LJy=~9;@L7Jc#d5&o}>}j9-rHu-qfde2=AG5-Z?E
z)ct*5OFld1!9A$+5uZvv0Vc;SsPlziNWK(S#B-?gTb@b&5FSVW=TfhULr{<3|59?t
z@f>z~C3T%Y^hWBZ@B;RH>ptJ}oz#PH7^ZpeK0hQv*{~_c-~`MWQR;d<aU4=lhG}sa
zPQ=})$LI8s@dfZRJcc@dCz9mfpig8uUVu8EB$}M}#Voi5b$&q<$*;q$m_4f0Kf_M=
zALFA-ZUwH#sxhRl^Sfh8{V<-w&avF*Tg8@oXY7f;<84e8$Nl)j@uXfMzE}r`qh4Q$
zM3P&NyYU6;{PLud&z4Lqh;32lCnuNOY+QuJQ%GIsOQe)~C9Hw#aW}p|J$`u_8J{h!
zSP<Ky&ZkQ+xyd*mPod6_%Ov^enZ=}79(BHV7RlYfXP6<Y)OEgkZmI9ZlNim<eg1l0
zsW;0fcE`E+AN>o^A5UVmg6{ioD=hWGMZ|L08+HFYpGj^ZuEiUu^FR4ZK6wc-3)V%Q
zk6&7HBk%{@gE~L3oaA3()bes%2zCBpMLFNDlGqCupw2g|D!J2m9g|g)y3QZ2F7*mE
z#5y<}b-qL`$*srT_yTo)d0ok8t0xx3wy5*98%l03p2TR4q^|SNnn=A}Q?VB=K%M`#
z1$|nIaj-b*{MT(IHyp=e=63W^19G*OdLb;1b8sbIK{ebF@k<#W3lriZ)cJdzB>w{6
z<CxCw*EhbS)TiMbjK#R@SO@idy#r<ZARK|oySSe(Z&#@o!%}z#b^kOyB%c*?;}+EU
z?cF4Q5RYN8?ozLh!%&Z3&{J|Na6MM&C3T%|*jwtYu_H$6<33-ypVX^jExdxd|6Te#
z$9LGeuk;DRC8)<A>MyxdcmX?q<$irJf~4LD=i@Qd{re7<{7p<cM2<_N&OaO~=bH`_
zhvG`q`FbNIw;Qiwf>Bb}`SssQJ<Axe40c7G&ooYQ<8cX|L7ngWz2t9V)CqE&A9eoB
z4|2Z3B(V*Ck2;@ss^oscjd%lfe#UgkCz&B;#9FBHac4<xD1MJSQRf5ZN`5CE!wB=F
zuJfPHmwIt5i@&4JXI&)uyjU1RQRkyAk$eJ7j>}NzPb`r9MZAuc7fQVWjz&Fx+s~5Q
zk4LcmVyWx=%B51@j61O5FYfcBmPvgA{)qXOyU*ubA@u_I8P3Ax_y_9seZ5k~567{X
zd6oP5lCG9|I?Re8sQWiwBl))28KbOq-#_{~sVBq~xB_+mv+E^)74P8S4etB*-zfE=
z_$@xd2%F@5Uexm)qJ9c5V9(9&=WDY?>Rqrm#@p&Xzm)nq+={hB+~@0UlX?qmj}d=$
zpHH)0>RB;2?m~UNId@6E7?#4jsPi@VO1=@c#Mh|v-49592#!L(gHqS|RXe1<1$Sb>
zol-A@JyDO3vPW`BFg313oxizH@-NY`Uyi4v&gVKL=L=zRJc&C0_&3SF#Ymy_N1dN`
zSkBMKrI_%D)YD^4)Z?EXm0aXwVk}&PIv;Xea-nz<%bk#VeH?{)eEE}-tAX|K2I~B^
zQ<8sx&vEE!sq6gQGg4oTo3Qm+_xZqcQV+sm_#9*WF6WD)p6><qi03629VemAPrM+x
znK&N{T$H-bN53rffjAaJQ0E_9k^C!+a8-^cqt3^=A?H(J2Hb`^|MNA;uf|Q7^19Ts
zVQtjo_uQ1+5j>57x1_G~C2vc;GS<XDQ0Fh-k^EhJf`jk6@4xJx)HmQZtasmi{=h@2
zpTo;I@R9p`hX+#cf&K9;-ozyI(bqHlv5fx?C!^mJ_wz-3D)m^H5NDz8Kl_>Fm*6Ta
z_1t~`9xtT+4UWb<FWu*>y^?wpY>kKU3MPK-e*7QQ@8Dw`^u~RD&|9gG#_^c`o%?*F
z_fqeO-7&eNoZhOi<F6x1eK?NA%s%e(eIiSJFpk7@QQYT~Mv!_I%!LziA)Z0K$V`!B
zd~Pg&hfwF!MU{LG%!_+b=VM2gd}_>ydr{|e#gcp>ERH8o=d;C?d_F9Khf(Ku$B_I<
zJdb5#O1&PAL_Pj^Y{^}~Yxs2>sq6gVcv3%$e_)^Z?(<_3NPQB{z&r`v=PM+ZdN&M8
zBFDq<P*OQQj#HA$@lW_9g&e=e4XNaK8zxF4$0_klS~<Ri`P0d9QGAtNjw56c^?nRx
zox%76W@f$7SZ7l7#~Ro+ne^?23vnZ6<~lyb=GYyFrj+p$a62ByvZ>{KEgXd3<2?3Z
zHD1Q2*xFad_rO`W3U{$j$I*xVN{mC<w+Z+w9<%Ic9IiVJW<pQ<w2z#Ad_H9#iktmF
zPy2R-obFePeQ9F$2R-d$B<?TWuLt`!0EeQdea*=J>VByc$>Ws;bKws-509cAzlr;H
z7ak1fecXuqS@%1`^YRMbMo;(kVD4|-?-kGENZh|M(bIiCo1E^Kg8Mil=0H#P{dRJ?
zUlH!>(pU*SJr6Eho(~r@%i~!oi&zhbqMpBIcFFC=6Bspz)O9{{PO1B0K|G8)Upcqr
zYhxq4gF1is6Ukq~8(1@!)LY{ZsK-C>liVwekVlTEqRy|$E9XOSH#W{Eb)Db#snqx5
z5o}k$eZEnCsdvQgcn<HQFMafUZ41iy-Z%hL7ji$}&LUDTR#dEr{ZRLxS4?uta4lB<
z%zgiW;!>}V&F}^4{txK$3L|_j<6C17oQHb+Gk?}mLX3((qRyW!CAqSt#V>Fm>ionq
zl8akbOoP=>=Lb;_#z{EMlGnMP$j!0jgDS{6#^6Mpih4adH-p?POa59V>EEQX*a>H#
z?tg^(MZAG`EqR@LNbZRxA6iY;aUQSXZPe@0x%=cES@L;nNdNh`1}~%TKdY|flhzY6
zV@=fg1WhD23@74t)cHZJB>x<vw3g$1sPm6H$oV#3ial{Q>U{IAk~@jlFkv^T>->@4
zQZLs>tc`<F=SvQd+*;g)k5T8>d?WeXL&eXrGwS?c>f>-S&a>njjF7(VunT^Ty8oI{
ztY<Xq!Op1jgQ<_h$vDrFZ!nhiU>E!vb^kTvSkHH?2RozA&!WBz*WqSMUgx%v+hNJi
zoFMC1ifeEq>h<W{R&u{u@=+#9{{c7#x1#R<$kZpxxM&y~b>5+J@i|Up$v>DXecxe}
zALTeE>i#+xms|o%zQ%Ovw;NBO&kU*S{=d$VdcnD3S?q>7pZ90U&Brx(8FhZva>*xM
zA!f#!sPhTdNp2WU#O<i_gSJTiIY!wk$N5m_pY4$Iop*{~<1eW5+o>PM(|FC2pSnl-
zF2v=y1$F;N`y}6Hzt|IJqt3TJEV&DK57Qozy3XgLUIHs%ZA(7pap{vB(_t>u{ac@8
zJ$Mh(p0ccmdI_w6wJrIWXIT%X!(6D>)BJbIpTujJ@Vxu=9Jws@a#zIKI2d*RI=3X(
z20P(tyo0H4yPvP=U8x6RFHCjMeZJcRsSn1HnERpo{L;r#Ux!<<>J#_*0nem90)sK_
zbNBfKcch*QGvEQ#`xWoL<Wpce+=n_}<dNh{V<mioI$!ds<SS!Myp1}a;05c)47d?>
zKJ9DCXG1>>L7ngOQu2dwB<6Z0b)Db)M(RiL43>B+^)5II_4s%1B^Sq0KI~LtT!}iL
z*++7ISP*xi&L@i^`K*{5kD<<g_fFO`3m0Nl)~WM5BS`<_cn<qUl)BEZiX`<dxDy*i
zcAq~IRqE&PGWLt+KEE=$)HB8q^I=QW>uDcHa^0~Xrikml|M&5wJ{{*`=>+ccpTv`T
zLHrzN;|jctdVRGM%J`<(2H&C1-%BF-c1gwlxCC`Re=5m+j%Dy1>ipwmlJ`k2#=u`t
z=l7<N+-bas<x@(%0ggsJ{#I(qJ;B%bT^gzD{D!nr-+}wFy|4THmGn}-hfneQ4DRz^
zrIY$F9D|QBBIEO+Uf;ZoGJYAZ#j2Uy&vz`F)X(Ep?4R9zetc%B&&EYqEsOhnyR1_0
zgF$!$Ut><@)z>pJhm8Lor()im?&oXpiPT%+m*|treZH)p)B~^%-b1~fhxCb%TgFAh
zKpc##P>(;CN5<X3$2cyp`}LioUNE1WuYj#l_s@}Ea-U)`Jc>G>q@d(|F$-=%o!|DU
z<PYO1^e-Uwnm7dY_{D`Jw+Xjnv%*r>`32OY7LoJGu>k6PXi>=}D<)>e`l$1hs9(hr
zpUJom_%#kkJ^mTva#+Ucd=!7_p9E9maGZ(<P>=U1E#rIPP+W^Tf1!-z@8dI^T2|^h
zU!j83YhrzTh&sQboaBGSy_mne)XQOS)Z?F2l-xUvTuF|9M4j(dRnFhS7nrS@)OEf}
zEvYxcmUsX!V7%Jy$KR+c^``a2PB;Vgd}SI+ZZq!3H>mS-n@B!&Q!xiNM4g|}OmYix
z8J24<b)7HQLh9AAE^fvH_!jl}TDN5U0PJvEj=Q4Hk0Te0^Q&<azQ;Isq)#!diskQ0
zy$9<4KmX0Xjp(oYz3*EgtYnPRPmZHuVr43hjk;es&R51-SQmBvLVua>4!*`{UrD_`
zev5j1eCla11Ln5mpM5QTU!l(cIgXFIe?98^@jm(n$vB-~I7I3hzY#0rSE%#vhDt8l
zFflI9L!DnSTymRmJJuf|b)8Q$O6plLH||26-#J?H$MGDN{#NQ=;xyFbbBvMPr&tV+
zqRt-~EBQb0CJqahy3P+BC-spy9%rJ?U!;BuA7b6_q)$IwjCy>s@sdlA*)Rlke(U#=
z{|!%I_X$$h`G^yx9t#uVBGmbXKS+K(hG64K?)#6LEcIwpL_chXx_{$olIw&$FvWED
z{Zr47dKS!u<8e9ufqK4|GiAKbPht%G33WdGe97g+e7FyFzQ7{M`(rsgk2=46mgG0$
zub6AL)Qe$9)Z>3!Ai2|c5qmF`y3QY)EA=aQ8%NA@pU*i*>ILvKJdV1*-(txZ!4h~1
zb-v*W$+yOi7-^;Z^-NhU^*Q)6mR#dLe}0M7Z{U5by;SNgaRTb~C0!-CbeI)GQ0HGQ
zlYETjVto7sb-u<gl52=9@HOguj<u5a!@^h<b$$%>NjL)wu9H4GKa2WeT#1#|yU$P9
zDD@dQ4?o}JK3{8t)SF;y+<`~%J?iz9*(~F0VMBa{Iv;JT<P%_WT!uRTONivR;x5d!
zP3i$S5cT+aze=tJw#T=q^LutmzThtL3mk|#pMJ08Cg5y5hC07+pX8U|8r+ULA9KIt
z6Jsh|gF5ebK=MVf1fE5mFZ7$_OJW7Qh&q4!pyZ$88*Fe$>h19d)Z-t9O71mAJS@lK
zQ0K27k@FAn1&%!`b)6r3OzM+yCPqFk^+Z?(_4soqBp2<Z=#O1c=WkJeg0FGHDH*Tx
zKc1HQJY0f_&PY8y)<iu%=UK_s#bDfqI$z|R<V#~Eyox%1=Xc3J!?)P-ywtnmY}Dh6
zU6fo~oQa1~=PO>4{1?~&pQ6sc`9tziE{n0TI}XB?sK?JqP%*5ODxr9!ig*%rKK*Su
z{{;@g;i&VIs87SixEw=pC+hL#@5p>runsoF4j71f{6y+g@ij)iD}8l-#678x!vlB}
zKQ=%1zV!bNf5n}s`+J-Bn^_{PMR}ZAwq)4x%&IbPJoed->oIocI0#SSMNIHO#+yE|
z>Elbi`a?Od*MEfiMf7<jxeTcDdi_lqHz=I>U~<(XRno1)jsvM1`*Lj5gT`>)_&vu)
zou9|~lXwvyqt5?AeG_iOgO<Fhhqu2^WZ8#&SPTPDuO}w;WSAPWS@NbH-u^A=6Df+U
zD<S5`Za4(>`ereH3wp|j&}SR&R_gwzIUge`>%+{b^M8=Pk5ABH$(wq3`<r<y(yu0t
z!ST2O_4?jZuM$nJX9mv3Kkz-;#`|%;Fq(ckAC2>gF$HEuoj3LH_BZp^rk}Zv<s2VI
zb6tAAdYm`=V8(5tZp)A4{8%+!=bLlBGj_)y)Ok}6Z~wV5<Z*n3BV+QoqV8|<C&*vG
z$Efp5sc*y(JYdP2dU*RMVx5(-IySSc-{jMCzAzTI<V`Lc=PO`%=c^J+_H#ZyMMrF@
zAH+W}MjWXpLcO1+-%qOF8;<{OzpoiT45y%;zc%$2*cQ86@}?f%{<eKe&i?52*z!@y
zCqPf*PqF?>_+O8cns6ECH{-8(2vyu+>f!C*k;i>ECXOrjLt5O9r_d2k&c{GKzv;J4
z^-J}U{if4zK5jrge^2UzaX5Z&$(wq3``h-Zkl7!#AX`2&`8?=p{0r6}fxLN~)O3ze
za=+srcn@{n)Wh4q7LWT-9E-Co_d{dq9WW64Tk@tJ-u`pxa{@2mBTN6K)Hh-X9<bz1
zJ-q#+#FytsO}v1&uxtW3AAo~#66(jt^ee#l;%NHod>qcFL|@E_I&bRX?O&fh^U++_
zD@%WqAIbUgYP?=gbIy0h?ihqRZ|dRgzdwoWf1{*g3k*Wt|1tSuUgk5B&xVC8{ol|(
zI_KkH3QOM9!`uI7`V>nh*IN>spuXNv^2xo-M<5>s6I=S9rvEj(jn7f9$JE2yKSv6=
z&aZG{N;zJMy1&UcB;N}Apw8!~UINQv4NKnC!`uHTeNwUB^qASQev^;H`PgX2>-C?b
z|4qDyuTbYrJ-q$@<NF_cAD~-mc^q_qoev_nBCY%UGGD3xjyLf(>i#BwIGp*&>7;)w
zEP$m^_xCoR-_o~+rN6iNWAynQZ{vM@iSICedb!>-sITu5Ia{B+jGKsaaG|ANRO)8l
zwH)uV<fCMg^^L@hm>{#%^?H+YzBV59kZ(?(z8EXJ^i7Dmzqk1!S)^Yze4kB@qtibH
zrnAiVvEwf??^Vo|Rpu{<rE<t|W7OB1gq$ySM1384-}H5jvRvO=_QUi`YT1`8|I&E#
zIOz48b?E)@Hs6H(YKxiJXS46R|3c0e%qfpcMeO~F95>1(w!}EO<v20we#_~19^YdG
z`r7hG!kHgVpE<Y#f3x)egzNbHZ(h&Gj(^L%5!k=zSReI%n7ZEgw*OH6XZkF|4H(Vx
zc*dlzANS<{P<~Nfxu4eI2Hb*rf4t4>zB}oA#M0l}e9oq_{)X5B*P~v4#b%OkiaoI(
z>iky)BtHa4;Cx(+FHn!4<z@a#A(`(sKEODIrJfL*pq?)}_3-997L~r;u@C-$KjID4
z^L^)K{^!qRzV)~fx1#RvZC>|1N8dY^{@&)-6qohyz+Jcx_4>Wd>%OPxd)d<8+k8)d
zS^p3miX&04-`l+IJBhxtE&aXCcPt_6?}uOGAk^#kHm~~z(|4++zqk38C1w2+a5^r>
z|5!iuzBm9!S@NbH-u@fuv#^Y;>ond$y`B@~&)_+{gHJGHY3Y{@1H#$g)-QW0>8ICY
z%cr#Db5pN?lUUyj)cw89N2QOx{(RJnU|;mK-?n~!;T%7noW9P*)b;gxo8L&E{TPaW
z;C0Moxt|J@mB+O-mc@#w*XM0s_pM9c7MA|r=D#f`>z|6#@F&#k_cpKlE}-vfOMh?k
z=jaowyj(|OOoe*=ChrJm-q!b1OMhEFn<a1hEa3dbigF!qQLo?RYlJhOj6NwaEmpSl
zx8?mU`ID7oy*Vq3Usn^qLB0MW<Vs-`tcA_6EB3>&sQY=F_cX7WXEpO}!tFS_fm~k*
z>h*-T|9AA8ic8VkemZZ~AKw1wtH|}-!F%`=d(@Eg192WM!JS{o`NNp4mK^)xeSC)J
zYs>koIH-;skHBMi7WdVqKfbOf#}Vs``uhL>c0KxWNmf;^y8uq$arg<Z@;E%iw(M7D
z)cf^s_xE9)X5DFO%K9^5X{>~2@Dg_6zUhrW;B<V8k+`1%unzu+vvC0T(NH{v7ce{f
zlm~m_R~Vi9E*|FPb)c~2_2B>Z>u(nzkGFmu(f6%>UGerl(XT`Ld7xjHJl$9Nb<5lP
z%-h#FeP8O=Jx}+kexB?5R=;j~x{rrj?q~COcH?pFhr@6Qu0#EL<7xcI<_|FMGmKDO
z?mr)FjcxH89E){%-D!+LI0*In%zU^1o90`?I`r%0RqA)JJ+J>=a1Q>Ao}Mqy$i2n*
zyv`;={d_7xy*$>#rs(PUHjCUsJct+3+vnpLUhgMZ-Usya^D((sSccC(Dx>~9<p=81
zQGecYhx%j8%=^nHSO|-we*TZ7J`O#-9;D*^DFf>Fv5$RyNW}YHO014`u?4ordAJ1i
z`(!)I>&q_g-#r+Lp6>S~+^4BAEw14HS%v!f``<m^aSzv{*XM2CpX>hL{`}XE+pm_#
z@Bg;zE%U$R@f~7$yjoe-A4FZ>|1+rT_pc3>y!pCEzkkKDtjE;#`&&j!-qiK`UolJG
z)b;!07nZ!K>-WzuEqPPd@2|G~|BgQT>nQX7tM}jA{8jpUdcQ8t`)mj7f@^Uz#^C)k
z9;U=h*btlJQe1^TynjZ=YFG>B;39m6Z?FvSr<HIV{(!S_5uU`~F$eE|d2lms$KUV-
zM(2GoF($*bmiNp5x8L&l{eRbf-{SthgRfA3eHe}VS3jRVcD$KaKfla6^nBjt&GS-!
zA4NY;hj4v;xo<~fP43eMmizVJ?q8a9mBlKkucrZZy<Z<Y-t3E~uRrwb+%d~~{^@=r
zm}eADM*V%V`Ihy3?07S;-XF6L{djnrH~Z-6>pM^HE7^Jf(ChiP`=4gt&*DuqU!Usr
zc$+tU_5PT7bboL2X5aMJ&-#73w`G4nc6@I7=D|W})}yb_+q_w~r|$>o&qMZEuJ@nr
z_Z9ODz;Drfzd~QXw|Uc7U%#10_xCn$_Q})tQ}pL`srY<N&;L*N`;GlPj2F;+zemsS
zZQk_N*Kg+0{k_ebee(4EDE)bIC(Hi+)BVyhPX^3`=KE#(`n}DYzWVyjJi5QPd9zQR
zzMrSR?g+8$??2tI8}szQ{%BjT-j9zRZ?4nR_cQg^VTt*AO0Vai?zfZu*p0_g|NP||
zb-kXC9dG8<`(xIj=kqpi_R-V#i}lyNjV=5CPxni~JZ61aIM(0K{?xLbj~#EW)6@6M
zJ$)Zwg=KyJcK;U4+X}m({(0a)%lbZcyqQ<;pIL{#f4$9{ebwJ*dBXLW@3R!<>ro%R
z4mICbslu`OI#sXd-|oMbb*{${+>LrY-sa6dc=~y-{&|l1K9$+Ga_pa8&%fROCHr93
zABA=3pI4jp>-Bx?cyryJe!iu@@A185zy9fdm6@k1HbnFDH@zR;=1pIH{bnBB-`l*|
zC;jt5Pu~|kZ`t2}y5D5xnTj*etXE&Zw|R3N=5@*TecmjV@1K3__{&_+RlI|bQD2|8
zd9!Z)^Ir41YrZdN`}yz3j?c-y<wEm&S)W<<)7!l5_g=>F`yW%#{2q+nPxE^+;mv2@
z_foQB8LW=B-(&fwf8WZ}@5M~w=i1)>9*X`so&GtT{<)d{xtRI6mHEAqqkKQ!{N9NE
zKBM_Qn(g;I%=bym?{%23>%;qd93%O<!2I5Z`8?MAUWR$!`n$hJq3@@I+z0>m<K2zN
z*VFG6eC*??A20oNy53LyKK1|hM}8h=?ziyn&uQ+X^Z(HOl>dL}{SuYmqfU=oa3@CL
zx?)?d&(rIdr~6yqzcrY*Bi_TOsDCd)zdm|;{LSx;>)*d!Pk+50Q`h@)*pfH(o1E9L
z5BmA6=QHE<bw^<Tb>7tV<CNTzH+B8{#^(Oj{fpB_|K55vOMh?kWg5%#w<gxYdZ^dq
zZC>|nL*MR}{@&*GzmF4=R<0*!eletp7^|5W4{xC#x!7hstK{ETu7L5VZ=ufv#*Jt$
z{q+9S=r8q|xNwLZ??#=k`<0w;f^o<t{)h5y={qV&=J}GoBP{*jl8ZD@@^LX6USix!
z%lM{)WLz8Uilb3q|MJ0-UybW=6Y9Kvyn~y|_;n*iPx%mX*GIX}KOZCY1Y^atSPXUl
zD%8KkDY*6@%I~4iO<Xxn))k6+JrAfS`%ZGjupWMFzA1f%<7^DE^!GMz`rM#jn(=ZS
zMNzNc+k60hw&NkZZRv0F?Ks~V`{2JlUf=HzxNqNmFZ-hRW86fk@5Mxu<Tx7^!?LKy
z>-iiLWV|i^XtJD-HAS3*t5NrVGF5UheiYMSZq)hV)I+iTG#NJxb^a*1OZWocU?#>@
zvy4CIVSEt}<7aYy4sOP6sMnihx?E2=48WGC^J%H)#60+!C2#8C?catzKj2JUW$E9U
zdVd^<-&*pf9^U>-Ec<wx`c;g?d^u*we){8AI1KfEnm)hN=aTAcuKPF3^<1WY4R7IH
zOa1`s_zkb(Ez9~2&XnspjK}aa>gyT&6M0NCOOA8mFSrA3<5w|$v+Aewqd7krr{f~j
zc~cK>|HJgr*HOr_ev^;D`50)%>-C(b|24dg&r#=1J-q$(>uCV5yVVxR;}U??u_-pg
zK<t7+I1qzz9P0Hxp#B73DRn-}LRr@r*Z>=0e;kaHaVqM5p8Dv#t?yz>e_MX8CI6Iq
zyhU=I6|gOi!k=+J#{60GdcCjdXN*AJ2R}AngFa?`W<Ad>{k_e9LLYDUp^9Zc^3l&&
znB$_B^_rZ~Q@#s*%;RJpmx7k{c$*KQ&ymIQct5~6OWZ&H4LHBw%lw#sIo~0i{nPy-
z*Hs!<FO%b4sPEs5<cecSFUQv;Z>-O8Bh>TVq5c#<WnG1_&~jOCCDh}eGcF?e=okk-
zHb0Ab7vL65zQX<eqVsXcC9&j3uarK!aUVum<-WhkPbD`?jn~(!bEf}Y#_9Z5t7YE7
zI1OK+r~G|#&n@}rYo&h@EQk$I_xDq|&Kys)<X2ITzfR`Kibb#<PQkM4C07Z1<8Yjd
zN3iGy$$y5y_#Nu?oAvz8y6)i{%lb|3rpiTRy|(M=!ug&!&~klWS*~vr_5FD0Z(grn
zzg}-&59`zO>-{wQX7-~a{q*&ldGz)E!gZPJ)%|VvO)2Iv_swyR+tE+oH{+<E!~~Z6
z!}Oa=Ka>B1ae95;=9AM$-{&=`*Hibgo^QZLx!=cQxlMB17Iog_UXeGsL6-aq>g(_j
zX4@?D=>9sNhFoS#zAW`x*bsv(d0TF_CBKRKE8M+B)_n!_`rmDp^9e)5Nw^w6HgEc-
zre8+PX6f&3-h920ajC4|4<Dj~_v3E7PX=KX-p>NC6z>lKczKQVdw^NEfBewgKf&A|
zG1<p>cn=@oZF9Zq`~7-<i*A#7E8rWf^{do%KHeUwH^8RY9fNQx&cyu~if=LEUg;ML
z6JTb{iKVduRz<zO%=G;f%V8x9M6=#u9FM{|xDeOkM%42KP_K)@I0-{=59;x!xsH_k
z*jMa`I-hvI<Vs>Ktb?|EfpF#v9+3XUu{6#{-QS$=!ucUM)I+{iIP;+gWj&|yHbyuk
zb-f;c>J_j7cCqA}9Fab)u>&qe-Tx-_2lx~VA9cT;2RsiKnDZW<kMS-@pXB(*A99@X
zviKWb!B|(Mo&@!J|LJ~aJyDoHI>tsl|25Vh-n{Aacdxe#`!*Aovo5o5dVSq4%Kr7l
zakvz9ekb+c@G^dON&4u#Ie*RcBNx?@k3~Hp=D<RhyjjO-`ulUe6Vco!FD;MHzcs#s
zWj<5y$oW9*Zpl}n9^Smpjb*&q5B>O<$E^zc`zd*|j~|;q&i!wmH|D-J^O$uluv~w5
z$4|42pLIg+%U|&n-a&nR-sU%|{x?1JU(9(k&aB6b)9VR5DeLc!JMb)e%I_x^YRQ|v
zKXHC8ZoxdKWd1>@=QHEKQuzv&yvdpQOwQKd+k98v2PR|ltFr#7sMl}u--R>(it8$R
zTKf89dF+W_W5P3%E06WDE9&)l>i@C*yd7_@tF5}O@f@4$7;m{A+xSs`bG)bdHt{%b
z#ei$F|9$Yw>vH@v>ixHk?;OtYX5QbJ{|n|Z^XmEjAQzGTk<qMM=WmjKi+<#bpw63q
z(Kw$Db6E28ssDl@c-@kJV(MIH0W{aG*JJW;8Rsegihdby$m6ycZ=>#S@~Oj_H+{>g
zzOin~e7e8M`-d|h=dASc$D8;Lb^jphvvD1s_=oaSRsX#n`j6zi8E4jG#_9D(J15s0
z3*%vW{I~LnEd3`_zk^$Tm-Su1SD5|0`^Wo7#?3{OPhiQL+)m4UW}I!lj~!ox>se&E
zPQCvoUygi5?2R9rxApxYoa?iV4-V(}Iy_G8up{=iJbnwQFU1wO(UMQZ>x4g^z__>E
z-%loAAe?#Aw>J5D*a`LHQ<1vPH?ib(&h*`6$@ir`8W*E2--LbWj)U={Wq-P;{8EnB
z;UPSW`ua`(p`4$A3oZHE)cf9+$E)aFu_5aICjWwQp7I&!TMS#GnNRmO^=ssc-;wo{
z#rD`4yJ8<q!@AA7Z2hOve<seyP53MB!~^JQem&n1#{GBC7tFl+di47Bdc(U9=jiWg
zzis<y#_Rd@KAD`kA9Vf^>xjhT?}z5T(Rp(pH6qsn^}gu)Y9~3}uXQ-b53r24-N&|k
zZTjf-n{}9RBP{ccp*{(J!4OOSIQ2L@uM%Sl%j<*5UtrujjAY4QQ8`cjpLm(?!@NUr
zJU+0j$DE(Y`DM7llHW>wFP_KSmb~Mhyq-11j@TWO-Iw#9qQ2kE_>APUU@p}8NYuld
zH+^a|&eM8yUo*~JpI(ot>+9age7o@#ntiBk*$>^%Q+@$`&3rf2e0sg3IX@50`u20I
z^W#|OK792+9?xVCMO%J)IP)jTT}KD~BV%GrgPAZF>iNyMxE{t$;{5Muu1oi8$@#X}
z8GGQz<_puO0yak5yn4Qm9sk)!&R>N4U?paFB=<#W)ay0*a^cK(AvXkPp_xzjKhC^G
z9?Lp{aXH%Zml)?M|A^c(^rNqtPxoJTRMt_1?-QG!Uzwk8`BOLFFZJd7mgf7L!F+#m
zKi`j=$@ht_@O_jAXuiJ@O71jY-^And`4W77dy#qsexBq|KfengXFgwU%Im&)y=uzy
z)I9%vc|3yH52J0r3q6(nsEoa^KkEI9{7mu%ap7}0UWU70$?+i^@K%n8qV5-)K571;
zeyv%@B;4hw95&@a)bn>DHz1t(pUCO!4yCU9Z)G3!zTBg(^S#(7y>Ig@`?rPb(${&{
za=kGl%6`PhB<PEN*b9Sj1dhRRcm(gGzMc@Cj|U>iyrF3FO)toQFB<ov7!R|UW6syZ
z2h3B%NBZgcrbdv;UJS+asPoUL$A~Crru&GCQRhuAHu+D{A9el~eHulQGl4h;bv`5Y
za^cKJjw~IrV+Cx3x_@u#L-0qu7|#4`a(gk<!}x4bWCfpMWei5WKAo>gt`7!z7@s$)
zbo?AEV=(IZbiO9JJ{aU-e03T&#YOlFuE5o(DbM{^9jD_$)cN7jq%sPFaVjpuZK%h`
zi7AzYm=SYfJ}iiOd`N7m?8l4v2z7pW9H~@~C&o`8F2G%Q1v4a+^Sw}x{`|9+>NAww
zJIs(svL&!D9>G65$=})~79%7Tmn9SPB^QHn0Y1R;DdfEVGY)-zqE8)iC($>hj3|t4
zaU1G>PpGF!C1)~VAuNV{P><h9eIH)LmzMlAUax+^Rk$8?{}Ae<c^$Znu`JK;<K!3f
z`et6IzG0m1Z_Anae3-{JpLyLWMy?HZvaHV$Lsk$8V_{s>>q|uaGaQX`E%~xMuPS0S
ztZ#X~d7E!RpEmd<cC+;NHXj^IcHk;Lz?hcje>0x{?XeqvjZ1MW>is<xS1Ru@2G9Q_
z__6tDrR4L7bl4huq3&P0wB#G%Pq+eg{tER+_!M7Y@iNjc0QLA=W#xRta^ggsk2*iL
zyyPa~emspq6*!Nxa4qV7Qz}Y+2cE+;m87ooFR7=hEa!`1Nwnn?gfoA_^r<5K6W|x9
z`+J)YEh&B1n||u+G2Oo}uV?$PI<IR3Q0Ett+o1B@E%_7VuHZGii@JYi`h14_@Q9`V
z@L1Ae67Ipncp5LDo-Y>9`*`S!nXn+1Mm>INqsn2WIrtu<HI};0o7^jvOJ~U^Z!Ud?
z;I}vdb^pJc&q%*gsOJl|%vY<0tZx-=#+|6w_jmJ+=+_<fd?A+kf*Z*Cd|UE(cz8a%
z>?pYeox}|I73%AWMqTIIS@JumU&Y5*p)>1K6P&f=3t93R+DV^DI0cWRYVRmdy*f6=
z=9c^|>e1TEJee^Y>i)Z^zruhHGOiXj!`Aq3jX(M~=l5k@`LGarx{m4OPT;3sx_^Ab
z+ph<G?s@1pzl+SX8n@tH48>#EqnqS|QSX0v`<$eo>GQLt-vx4a@fp5CJ->ffSzlGG
zgAMQ!^SranXB*d?@n&4|?(QF-Z^=!?Ik*DNb?&mvZyUFX@%nl%QNM*RFruaZQO?`O
znRS_c((}cqZ(7WXewO)cxq_B_b?*DR*Z>D(Fiyd}cnS4-+feTu&iq1h%hB|G$+7O=
zw5{9+FVVsKK?8Gt^7?0<&rPYD`^TTY|J7f;-}rK$t+(87>G{5i9~Q)FmhY!D<nyRD
z*bawSKEGPiU#{-}9>x=>_apRc$)CjwcpdLx`~mKtKl7;<36k?8@CNGnc9Hkt{7)Y8
zGX_cD*;sI}92Z61KZM*4Jb=fsCF2HI#-Adm=c%a1U9yb7&G@XGpMtjhgMM;7=KKTl
zBZf#l7_)yP$NGBTP=AL}hDt6IddkQ5kY6xN`o$bBro&QL1^eOzT!DK2jjCTOFUOm8
zB(;pw^Tiz@>rQ}KFb5XIqF4?qVI6FYf!GU2U@*?Yg}4!aMZG>h9=94JWt~m&73#cW
zl;rZEKbAn9uTP-~w!|NFkmEa8i=P*m@3Td;e0}zmy!rmH$?xFx=_=aB&*61%2_|6P
z<d&aTc$*(jpC9o&Ud3pvFD{nHYG__>13A{)_HXz9da^v;!*C3)LH{Xoz7gv69j5*k
z+fSAJ0@V3xKg#(Y7-^auCq<pVI9AR-<b1$_N?{S(zjx)BF8K%;NZ$PWSMjLFM{oZ<
ztQfPUe*r9krBJWm<g<q}@5{QLn||v3U-yqTN7j=L3*zT!%cluve$8CzzZ)Om6Lidz
z^N}zfCPH7#h<ZK$cK>~>^AJAA*BF&`#l)nT60=|q)a(DZ`$wBE`<n~%;R4j_$zsVD
zq}~);Vjy<MARL0h_&xql&sT(Xe2#6f1NOpxI1ESO4>%R|_59oYPc4wg^Bwvul;hs0
z*Yn(xk4oMT^P@kO#sK^Rn_^4+pPnxU>xhE|u_%_qN>~RQU>od!`g;ED{)d_O9A3s7
zmi2g>&-=67AH}c~mPdWP-&3E0%W#z?Z*p@u|FasW`+J*TN54vo<vRM}4%Gb*k@w~N
zcOLSomq_3Am>F}R?*BGa@)Zw@lW{-l{0#c+#G_b#sf^e8EtdRA>JG+5!g!bneK8~Y
zVSfCdp6?9TaUP?xu9%n<Q(_j(fd#QB>g)Np`%hxt<iE({w+rJfbN_h$O#UsN_mHp7
z`C3>H8>7D7wajbW$gxr9FK|BAa#`;uSPpeQc!lJq;hU9moM@HU5W8T+)lyH5y5E#F
za()ZGz*lI?ZwzPN);Hr?nK#9HF%RnbCR5*q2k``U+#ut-qiy_#zd3#`efOfzMwu@%
z>iJB5S2**wzB%aM4udT7E#JgGZWept7>vF}>Zwq#H_ui%UliM7R~&<rP>&Cxo;gJN
ze1{7$%{Hm$K|Ma{uW~*+Hs3ABU!l$qCpQM?;tJGxlbg=@8~6^(Gmn{9kFUI4*3%fj
z$7whR7vU*9k9YAg>i)X#8gl#a6~@{j>(Jv{QSXXDn0lw={V*8&?UMRl%)CdAOQY^T
zg>liz*GJPQI^$AV#(zRSKOV(1sOR%Gf0Ot5Tu0<OI^$2M`$yj^xt!Pto1iV9Eu48<
z-ybafuTYP+Pu7(WKSjMBlaCzEysd9%OaD;nXYm}~u;c^w%ewnuKl~Z>`nOQOfR`}-
z0qOIx`PTIL1y|rLyn|^Dx}R?#=Z(WTHtO}6{ujtaqOYyLALol>Ib4XFaVPG^SE%Rj
z@S9vmFpfvRQ1{n2!jhjtJrqykHN1;&(1-pBF!}#S=9|X)=HMB;gb(mJMq%BtF%@R8
zT+hGV-!VrXw{}a!i>$+ZJrhX19<R5dy#ATjyQb6+oAs;jH!kCKvBFX5AArx$aZKt{
za3&TxE_HqX1)Pw2WAr^K$GK4FO|C6@lPhG&2a@Z7{^ZL4L;1e+9fDQpTf@?SJh_Qz
zaxE<Rv*a$~5OSj}`9svN;ZuC>A^%4>^KDMa<MTCsi(^qgUMAl!oOyrllW$LRzo`39
z_aDd4N#^1qykPnHNh=x*$1!O8?-BNL{v=+-h?bw9$D?k3-fn)*-qVuLPrW3X-1vVe
zzn(t3@i;!R^mp)cs;pQJJ0X|<U*x1-T;)c2$QKD`-jAOH{)Dgjz4&C7-&aq;f9H`E
zb6_r<g)8yDE`+}<zAyLPYCQTtj?du7=D(zm=^Oo_^v{k(@f+0heeC$bYW|uY=3C17
z)p!}Nqh6o4`9zQ9`cq;}tcAM2xA{+>NdFGTXL39ob^pFk<^1>POD>BgpZvMxn_x%m
zj)`8%`BbRqOGhriGXB3i-`cW{q15$y3UXbaV^8d3xn6Jc6JN>xEyE12<@j^d>q+uP
z&Zj^>3_zU^;CWt_e!g#I{00o5o}6PnK1swXVWpJlkL6M4lSh_(OAN$WXv^0RXMPX4
zGx!+uMUi=R|8dmkMwRo4qlurP&Uc9}=X+vbT!j1a2I}!cV#xRr_$_Y5Yv>bGa(a9_
z>SJ&zZnfm2#Fjp}u@<&N-9LoKIV6sZ)4%Ux>f1RViudpV#)&KA<6&O($KI&_`*yva
ziwULAl_X;Rq+%n~`6G$s{O)k(CnlHte2kS+j<cZde~(;X3dvnSJ+8E6d<62<@wu1d
zgV;B7{Y}}2K^TnZ@FJ#3ChPUZE*ONtN(a~J>G3zYY~=kgpBay4KLXg7TbBLll2+Ea
z7&H6I@h7Msk8jA04`)6NeRSU{mj3ZF$ozG2Ki)vSo>tVit6ap4(nsfyQa^(kGD|Ky
z>b$q#uS=O#`UT=#T!gy6xA_e8sUFV$-sV@(_ZIqOlj})}x$r3J>$yw)HNM3N+1)=L
z-sW}Rxb#hB>F;g69eo<+ko7LW?RWwoqrUzdjK7Q7$>p}>$C3L9J@t3cCo;yu#HiP+
z*Oxt~Tvr}khBZHt`fqq0Kg%Wcx;P3~qV69;zbKY|p2qiN{2<)SJZCNQHD#VoI1<OA
zS$8+f_^#x7VShAzzUSEN%Po$R)6euZ*QNKPKyKN;BIu8OaXik)E!e_O#_4`bsjtF&
z_{@?YRz~_v!0EUUm*Fa0Sx)kYP|qLUK5OY``rNei+e+>rp1?Dx=l3rw>#K@&umPqm
zFXxM)9&a1hobhH{9n1J7<Tm0CJc8yrBU#4V#+_rlUT*~UEe0mT^p^d6Ods1gvo5nw
zdcHjLEso_ez%rjLSI3gSpI06qM?NuSez5@R^|U893Qex2B|nSY&$tTLq3&;Tzi|E@
zKC<LHF>h}iilgu}K0!U78TS?Ex8e><e!-`*KgaL_UP9g9<oAR#-?M=9A6ig+j)@A%
zvF^W+{I9qZqcJWPrpHX!1wHj&q56BuPoVE~oQ;c7uV3fPb>_5O?^gO8K$ELr$ww_L
z`!)_wV$ve+_cI0O+v9x?`Nj0vgprC$-=wIocPaG^xF2uf9ejv-e5Yd4zhnuq2QI}w
zFm6fr*SDPUy^FgaujezlZT^yf<Dq}u(voYAt*|35#=WTLdyq!P_sSybpHBr*AI9sJ
zKkrNC{UMloKI%<*-5-bl@$YM?e@{#Qdt3VFS2O8TxPq+DAFts998pou>+1`xBK4|O
z#U8j8b^bDWKhA&aAs@`Vk*Z05y?))_k6aC`je~F`&PP4|O@Pc(sk&GjyP?jHqdpgX
z>dCltXv<F`KNV-7&M%_A0}I!b{{Gkrd*D1=g4t?G-Vb&EOJ?30l8c1t7_akJEcuj-
z3vWKUCI1QY=0|g#r7i36HgEcxeKdWmS^9gMZ%&_fXdah7mj2%6O<(i4o4!LV{k_dk
zp^v_gmRtHiqW%isW0EiAeoBsC<3K!z7g67*N4Q@v;VpcCacfI|UmS~5aTTt`v~_qM
zVDGweJP^(ER^Pw2>+@mVI-i+(e!S+Pe*o7r4QJv4)cs#kkHmF-%JmdSoj3b2hkR3V
z?JfEAtlL~iNscR`?w^|V=*O>sW&Pgf_2X|I?+#o~C+vZGeTmrr4%i8Mpw92EFZac9
zJdf8<=La{C{C7A7AE3^gdW42DE&-;%)Hojx;ce9YOg?`&^YM7!o7qU__w@bx_e~`C
zNmFqYeuum8Ailx~&FHTd?9X{``=qh-3$pZEMO{tr7;ec=wB$`6eVz9$`AyVy{;(x)
z`sjVLUGH=H=zL_${scFc$6+kz|KsyT{vB5H@3HFr8b)CpF2QxU8TX;)5QRI~k$-P<
zB<lPK{$0Jhn1g@UuDs>nxAQhXlYb9#H>Tj<U(Agqu`=rQb>!cF?1u|+1?s#lcbB}L
z_n#hrAft3VhPUu8>iObll1e7Lo>`6~Wzk7FK3h<HiBSv5ae36Eyv>_`HjsZl)M2(c
zy>86$F+79k@iJ<3?g4?9I>>PVzi0FXHpb@I27BX4e1oa@J)vw^9UI_4oQQfEdOp+t
zBK>tfyz9u%e4nFPms!Wpmi3ta{h9w847RMtZ)S<GC6>pF)8zH3@nZMy7Yn$cRk#y>
zL(MV@Ccno+zCk1jOvM>^7j=Jc^V9hKuHW$z=C%C3m$&(|^hwF<X<F=Hd42UZUxe4;
zw%85LzkixJw(L+T)cf_Z<D+pO#m4xU1oeE5T;2L~?B1-o)9zixw&ri`I(2W=HPDf(
zQ|Ioja+N4q;gjyo+HlmSQ;%HDd$jwq<tOc0I&!&=+BWOf){(1apH9wxV)w3cre~|J
z-P(2T^ua+B=ee$}zHH_iz~2JD?C!|bu2VbbpWR#acK+GgdEh+Pxn;BN%^bN}wQbV6
zYqO56nzU`{9BYo8O<S~R(yDihR)O7{IG6urE9dCu-MTq)1$J%aYSyG#*RIX_NMrMN
zS7Y<{_AQ*<Tr;?q<D4vP>CHN}YvJtN*|`YkAGyL-CaVZ*-J)~Hj;%U5Pqu8;yhocR
zfjydcaxTm{ryRHI)Y{c1>|C?v&AYbhX^y^Z*U7m)Ipf@9^H*ns?tKFPY|yn?r#8-`
zf8xXSuIzf`&8tyhM1SXz!x2b9|EtIPF3I4`x}F(k{OE}O&Lf9oGzH(Vqd))9uL60T
zr~W#AaYX;H@M6Yi3_JSs4?VuL^VGBV?>n3MLnHc!g+nO>xF&FZ@G9ZZztvgQdCv8j
zkQwir-`{!UaQNmA!%=9O=z3LfUAr02zgF$2?X0<elh}-}d)VK3<Z#qI9ERiFPonu#
z?_Xo*iT8h5f9-B1!lJdiNiqd517`hZ9%Bb*`rTi~2L{Rbz#wJ<*ZN)iY}Ow(iSv)X
z&i}dg)btA+$oPRW(EOUOncs{z*FQv!pEOv;PZ})ahcnPcEa(69c-OC2yOQSB+joSF
z_Z=bQQ#jB4^+P}YBb+DyI^Li0{){*O+_vcvkG~kdSK}Su%KVORW&V)`U90}{!;CYP
zDbBzAb^g&i<ZXBK4te0Bn;`1-&t$yW|Cx-R$@riGvJTV6*Zc(+ICUk=c;DBqJKEv!
zeJ!8paXW|ATz}~K*Enl`9bdO-wXkg6qVn%Y_YXT8Rto#WJbtDU;{3~B#~&*i5O(TV
zQQ4^<O%V0`N1Sz6PajV7E*}_{>|I{Qf5A9|p8u5dgez%&n_rLDd3}7{Io7pPW_%Fy
z2Qj~&^W0xQ^!UflldhdI<Cigh8RO0K#PrbP-#bsZ_Royp!}vXn*Li&$$vOD1<3kxA
z%6R>N%dVQ$I2^9Gf7f%*%pY1gFzjS#Wl08bW0~itncwtDq2~9kCgXjpNy7E`{Pn|(
zH>OqN9RV`l5g_9qxGwH5A7-2}s~Z2Hx{QCo|9?VI_P-2v{#CmES<B@d@9Jf)Kd8Tq
z5A4tU89)5Q*#FO=VSmfV{C=u##vfAaH;2Yzj90@QpMNd$|Ex}!ai(0^GX5sxefODH
zZW*D+*R_m~ULi@?fDSp$Ozy|Gwv6xIL&hKZ#PzQ8=ZBuZE93S4m7SeBY|v@me@zE7
z-uM;c&3R+m5^2Kz+T6S_a>9%=IoHd)uDQ+)EG6SZQhwM#XX87w=H4+qU6yF#8+K^^
z?{&fsoPX%?uHz49kfd)1->|=2Y$_wNI*-~q|7z}g^Sa`Ce{naz<3PHw@dYz~*j>7X
zo$~mHk8`zw5&i!o=i^-QXt#AA$6~kjNRCjq^~lc8v-}<U!^ioGWIm7j>p36iXE5gT
zqYvxR9Om<$59`q#=JS~k>oFYW^OO(kF&*afj}Pmy9Om<i59`kNc=P$fUrzWq-%-rx
z0e>kwAE&qu^Zx!}J)ZOZTHd!mtjBkl_u~)i2^{8q_QQHYhk5_|u%5_LPwX)7cOT}H
zIL!OlhxMcm^ZxW<J(<J2?|fKK?lA8sAJ$Vi%=^TLb>~+j=Jo$CCwv^K9OiZXFJ<R`
zrgoUu>ksSl-2H3K^>-gfT3re|_Hp=H>hc`YCw(00bt&xF$C1HO&uFP<`j5Kn`5DRa
z(DiA-2OssFM-d%=-f!~i=y3F-9x$UsSpGDlCOD1Y+@ImRpJn|;uEq6!;;z4Z9{8V+
z?{;V2HE)2r&t329u6mHF-=-d->Jgk5=30+=yj<7m`iK`+PvQvS^A@Q(9Ik(c*wz1{
z>QgE#pV%=|<z3${ba!2@|Gd#%J)!eu(Y2msIlc><(isc;aQ)wtx$5hv`%<s(th?*8
z(ph({)9-SLu;e8g9k=9jJ727$IQaiwqPL40s%~EAJ3H&{=5_sZrtaz~ou5a$)?YrS
zTvuJ@C`5fGb<@8-^#!W_74>!0<+Blo!}Xt8hV7fm|K_Z_TTc$>rys8CHS?KEE<xSA
zkC}P{>SjJu??FAQDd0HjzN)^2x|!Fw*;#izUcQ@4hK)$ZrzcOSFS}A9tnNGv`-tlN
z#MhM%X)F15d?K9QscRp6bIU&a^U{)!d|<OOVdGuLu8$7X9~_WA=KCYlsrxOF`U~gd
z>iRfAJv45qu;Wd98Ty`jaP{(Gb=T(~u8$<n>u{|zlzJj5J6xZBh1KWCdVJ)|I!7z&
z%i2i20P{|#?wd{Kz03o#i~0ll=b-;t(}#Kj>QAUQ%~v|Ca)T4D|IF5PKbXheJT4`v
zo5$VMYdinqTEAaUSx+q1IhJ}rsMIs@h4wz`!JVZ(Eut(avh%#FPhi!GVFf?dnU?xY
z>K*xlwScqkdVCY{_&RUzu#d{lx@-MeId)DL_R*ZWFUO|doBsZ4-tpAqotFK%<$PRS
zA8Sm$Uxlz^xeABlH)q|oo_MUsyk1=+?@NF4&KJS?ZDrT>I#fL+^@~+xA56Xg^`_P2
zelVZ*_oMDl-F!c0le6wxPt&S$|C{SQL*CKo!#){e%6cv~l)CBDmb&jfx!$!+!Y<n}
zih5{&sc)9DW2Lhm!Fl&2s}LAgDNlV{SUt5PzS^J1)Z4O7vu{b9&nMUV1F4()t^)N0
zs?Rshy1Uo6GnV=%&aHQsAK|RK<_)SN`{wYK3z$bekh<B={gytNoZqx?*MESee#%*Q
z&3mes%xm`Hg(Y9b`8wgspHunv)Gtyu&x--hy1VrpC*QWW%v*@Z>pu0F)b~)2?R+4@
zp6?|qg;mb-eD`zKUF&H|-mJeW_43rszICBqSJfv_f57_9`{PDu-8FB##xn0xCb&*M
zK=p~^eBq7Y{CqyRPt~w>7GwPt!s^K!O}RgPsjs9Sr0QuCxZk&~&bn(o4~EHlR?+8M
z>dS^py#e(F)I)|!-SvGL*T-+vL;FcRoAdR;^>Ls2Mfy9RwqYL$oS&M6?ZXJkFQ89h
zQ>XuY^3|Mm*W=<p_`9%*d0YliFHPOtCqFss?(UOk<jbpkGUpqayL?Ay-8JvUVKT3I
z9EUpVVf!{-)*puj?IYjz8_6f*ec?9wkg<}FM?IDE*BM;>Pto7pSD#Z4V4bE<GwMNV
zJ;R)Jck90#mXGV0GgR&$=hHCk<2`-+hs!$6ys4evPI29D0p!j5ezCB+^QSa50>j4p
zN*hN*^1<Ytml^gkkb0o1&kw6RUzeEIpZq~*-QD^#IDc;-Y@a8|zRBzkM>%KRwQn;=
ze7OGh)E7`UkJlLbgwByZuCH@kALq!2vYt>WJEA0${*IB--}&?m`^ZB*guJ;An>p+5
z*1yV9PwxD`D|MIeZ>e9f)Ls8~r|$Z!aMoS>6Mf2>uyvbt{!ZPOdf-X9t{BcQiCy`4
z)Xn`+kh(wnuqSTDumP>9CsX|=QjezUd!2Q6>-2NpQm%O)bXgnL*UVd!`YBcKMg5|x
z&!E1HdRDnC#~$jALDJuRWA>4$cm1%Q+W7@<*nOq?SEYV~x_O<M;H<m5u4v9rc-+<d
zS?ZT8^(xNK_uchbVX3F`b)O&Pth=sj()bVec@FiN)XjaemwK?u-?#Lyna=&Zdo1<O
z)4R{FveZ*#aGxLOth-(xeCt(qxIS9(y1InA;|r;;qkhd<ckRO|bzKRZx3#-{9%QLM
zw$z(va^L4SOT9>DS&!p;d0gT}3UXbBqqei|Zav#9_54{}ed0R&Hp=U~dEKt#th?*8
z!&1-feB!vv&$rZ*WOJYY#!`Reth>Ih@e8}kKmUlAUG~9mlIuG%e}4bDkhA{Rye~rn
zoOO5W46)QR=5U`M@2tD-heB%Jjnw^B{f?ziqny&Gyvlc`?l)Pk%RIjZQO{4^yq^3(
zy{_uBg!=!X>&ydZuD<_&?QdJgQjxa|sZ?l4ye(zk_BBIg$<o*&>xeAbCfO5X&6>$l
ziZVr(5>x0SOAXOSB_;_`nL<?5@0@#{_xm#^pZTM=9`Eyh-gD1AcX{2{eZ3|C?sRs5
zXDOX;z=KNXcgr))=I25-lCInwmBs%)u-}b1-$9)FR}8PW>;~SP_>JPf13G?H-xJ_X
z!D(8wpFCA=tTTi-w@VQA8L*#Coc)O;TUx^RwfM6a_I;H7rT?Md<Nh0$`{n=OZ75;c
zpA_`Nwp2R5Lfpm&uVvbmOMWp2_911z5%%tNJ}#c$gU9ijpN^>4tyQJm(4zEy(TMns
z;=lQS@T%2rZ2#K-;8%%rzYCzA?nl`Nq`sa8tt@JSU(^NnH59%O{6+8-9+z|X-&ccs
z>x=zQu-^^t9-njj1{a8P{QZm5<50A^_!ERbyP)#`aUKsRpLxWRJm76D&opu5zssv<
zpyRJA{(OZ$*bbgRy?Q|BwAG<?Ed23<7k*IuJOYkpG&RBF;4c6B62DP9eq!y5m;*nG
z|41v7gpOGvad7REqlUyGg~#$rz@IGeKntl?7dZ4RxEGK0y$rsFILA2veHWkY&K^32
zpnrt;jr^}tQ{o@}S^O^u`-h2hfAQflyRT60Q`Vm1smdK?d8XN|+G{rK<FNNbe;x4~
z`I9G0>SdNn+}yZ#Gw~bQ_x~UK6XNWDV2;F-ZuZzufm(?FeBrl%w*fa1;jdyHvKTx(
zM)(E9;S{)ks_^`<f1tMLo9@DI%O~ZA9iJq8F6_5}$Da{i9{%L1BRZZz!rj+^1;7(e
zr}G$i_yyrf_<0pP^`h_r(0{zH=tqVLZ;AcJHN<Ze=bR7Sn0Njk{C(ma|L6wM&xL;a
z4|oj6ksn5UIzG(pUed&H-oz_mGcn>f@~2?E8}ot0x!lxNDVId;XEk{A1L0pn|15Ft
zN8VT6cqKjQPXCSdC;Sio5^)~K%pcN^=zr~JHvNOSU-5<DVa21wjrsTe+;^>grtyvn
zv4Kh2{~EK+FZyw%vxhjxA@Z@*_YT;91D*me1^yFw9Q<zZe~8~G{;s~^Khx{${50;|
z40Qg_Pi61?4DGnFp9yg7XB6D|xrWNQk)QdpIX*>A0_V`+j=9<Lf8XEa-`_~59NBX`
z{n&rTwwY;WbxQy1KJ}BZPgQDci*)wS5jW=Mf89@i5nSJYe+4=*?EgERFNx<c|GuAo
zoOrREbRTw+th@L;6n{MRg~!Xbu>Zfs{@0i~#LH8?V&65eWG=>s`fN`;I=rdnkTLzC
zlUP6n(T9c?`<V*+z>IYJb;OJ2q<N}}?9XJt{s8Rbv&ElL;6D*(f4q%E=P<^lt87p8
zieuiS%R2jMN^Td;nQ4v;k$&Oki^1TlN2K%R#Jx8-KkycDu5aSuW>(kL_W*P}Y8`&T
z@-pViW9juO)lmHOSCja={LCiKewG-S?#~eL<j{0Jk9Ei&-_`VZZUIk)(*68~ImIVf
zN6KA_`2214={oxHbU({Bf_`DqIf)841&>q|ei?iKxW7qyxi5o<-)v$_9EkRMhdJlr
z_u@}4_;Z3d$6@ra^!gT{Zc?7?Q`;pU{(}7==G-np`a}9iEsPlo?rkga3BY~~agJMT
zKziI(5a)8e&!(5VlkGWfN5xO4^DFejM<{^wnGXHGpp&Wcp*&Te%MA}sk54o3;11#8
zTq4vJ-19@apW}(MpDC5ktBG@b{_l0^@F&SS9Ov9Je!4j4Ct3C<kS+SzurCW9+b!{&
zlUIK6pyLxoNB)Y=t%Lg-QN`)g6aG&m&i;5)%`LBrJUImW=-;9MZ2qMG<$8@!^}3zH
z!hR-9dfXa-C!0}6qtE*&x2Ln8D*A7L&mzv_%<jtRdA`~5Ow(EA+qYK7N~itj8gr^w
z@}%Sg4LkO84^@->jPy^h7vDDJ{0tQq?#9E8;E}P^K=g6_qA%-^zCV}fOEwxaANpZ6
z-fnmNE^>@Mu0MVcoyd66X@fxXZ8Fw3!@%ud88Z?*u}b>a0r0o!e>rY(b>H?N_yQHr
zi_q~UC8Hifx%H?x8@J+;KV#4?{h3pIOs3@9SnvtNZQTB$iqNM#{COGNe^Kmb=9gbA
zC(eFWY@gn~{B1F;lUm%s8oq#hP7vpEy(7}&|1<0Z{Uo3H+y;O6ZO^uS3rL(@-j<~5
zuzs+Mv~LIaUxhf=H&L1XkUoX%e~sa%xtaU$^+yF^KLgyoF4m1Rn}~D2*sR8-<HWgK
zZ+)rPD){*$>ky9(mv#)I+`p_2wPU;V_}os*Gmcxy*_*88YS=d+&ibBTWPHn4SbotK
z_CYnT4JXd^@=uof@;P1pY=TbWnzUCJ__-52Ra@frn3$U{pdV?I?&n4DDUYV}+ghW2
z8%s}gUo(Fvcx-Tb`&K2+^-Z-+*LmFXOjBL02gVcUcm@LECtWt$&tmZ4W#KL#K7x)>
z<H-rOr})GdXWPGbk7?wkhAsxp>wnx$oZG#wYWHTud0Y+tEbaR{3SMJ%sJ+zu@|D#o
zV@9a?J$D<?No*8<E}-zC;HjL_j?(*#Sw@`wS)%5(kHEL8^~p)g%g_dRdYm)bN<5P(
ziCb}$dp~i`H`7x1c=U^U#JRmT<NkXF?4O2CWMq1Oe2F>5C*YU(ybJr&un(&B=S`1_
z{u#CYtWBJCyfs9pZB7vyW9>6d&*32pazlS9+mrv^g6aOm!Nbkcc?$ae3Zj1=?bw<+
z0_!AykvO>V{8{GYPXz16f+%+Zac-AGDo?f&x8qW#jAL$IJ_7wj9vL_K<`<#!;DKS{
z&p_zscwGF6j23_3rl~}n%Z;e@>LZp@yQ^_{FzlPExJ6-KQuX&8#BF_RNgiVOG1r)L
zo&+SGE<Sg(6Z_<MlFx_Xe`n@o?<t=i|7oxfE^1{hcftNGaNk`LhbnpG7he$Pcm__(
zIL7Dm`12ijc$|!rk0WnS6X(3mQgJBKUi?YElpfFO#5tZu^{>a+p4#1maf7yB>}L?{
zSF3hk1by$T>H6EDlk!SDXSn&o>d^QQm~C}4pkJ_q_!(a$IxfyW;_T;1HBNS8d-5}!
zC2@NR{Urt->M7-(Kzz<ZKRQqH)5R@kN6`s<DmrD5aQA@+mk7Tb{3+t>&sCL&Q(&KZ
zK+1LXdWSf-W9mc6e?BkEpY6~&sq*u@v#%!Y+gr@dHRyXj5`U=W?5Fe-((XpBi?WGx
z9{Qum!-8Vp4)zPw`1ZWzw9bD<#u?}TV%FhtX`0ll7|K0h_3=N~{WuBx;C<rfE(}=t
zI*E=C{i_82rYdnezkDJ6Xgk_*0o#-R@u>8dfr$TF;vE0!YQ8%Ro~7bxI*X1^jobGV
z=l-&wb$T8K!AGchf4t?HCaSI%R)b$v^U)sgOyy^aIM*wwt_MqW5$-)D>k4-t;a>2_
z6Ouot5Vz*UIj{UDB~L!VxcZpWStdFUBj27T&T-qI`q5&xr}p*Xx){TcdC%I@Ic*i6
zL$FWGmwfmb{^ai}I-VDWqgd0PIr$k?*D0gGJr&Z&$rZ%e&z8#1&!8WFNc1P7+zZy8
z)|o1wZ+=pA9#ieo+;KJT4I*y)OA+y>IP@2SCwoXfXMw*19=%0)5_}(VZkKRt8E4!)
zc!upM&Y?Bpzk8iX*`So0sGlB(7T_V}&$GliK1WraFSDH1EvmhC6K8*dAB#VULdJeR
zBhLOL8jJrg3O2rOV(+~yE3sqfcQuJ~fAK5*&X#AI0LBe;cM~RV^L%&uc>X4Il4`u#
z3qC@vldnQ2aZ+@0z|ZpC#h;4mx}X(tF4wbE`o%Ly#2(h3uJ={kUVwcnD?R>iI30C8
z^0Cz^V=CZ$o%81;>_h7MF;5TiGp^2!lqJr7Ce%34$lBBOmx|{=wx{+=y(sNf196xP
zo#@v3mW08Vz}~OM?av%PEIQ8rqCG`FI9l>^IDXL#Ja&iJN1#8HIp@_*8J}~4Paw{D
zvRkchUx!X~ujrJA{`bT=KfO~VKOfB{LT8~LSNn>dUQ%vsWCLrsCBN9;10LU&o+oX<
zlljxz_c`L+k8-PV=5^R7u1WsSfuFmG+wpd``03UUpMeMNYGfT5kyrfrfjGw@r20$V
zr^J8LyrDIuYT8d(=G4B?#uER0D7Oi5)=8-GZ4h+yzQh#pRJQmNC@lJ~IQuswPkJHF
z8?1h&q4#%K!`xzSj=(<fthCo)@LyoRPt9vZX`#aL461RpmgVHX+Ry1qobxA+_5U-_
z9||5inm%qUfPPY~Kew_y#m$FxT4m_`4(`SEOG)rDeR#k9-=Dg~Z9c2(m#3I>zr(ua
zC;aJB;+zkuz7prc2;2(d++InQ|DQSiffCQY&^ZqMa208nPtlQXep>Vcxg<}P=aoMw
zOPu4pM6E-bTTbUN)wntUJgBaV7ZK<B`WG~{!bmss5$sp1arheBQye^4CzOK!6{#Z3
z<8!kunFZdPImai5)N45SDB|q@YBj#CguULk+y-vyH?ay|BR)sL6Sz)1hYIBHC;FRJ
ze)=pgV^*tm-4n21qU^)4@1y3I*NJm|O<n1y3DoNlbE<Emki@eN_|L@4<KNf+$mlQj
zWz_S7MTm2JE~-4Q0sCMgy}h0U_svY_<H1e6X0}+@zH6*Ljn66$pTpi$TgKH1aQ2kb
z=`Z{Rj2qXW<5xP~0aCAs+81idoZ=RYwY18&L%%n;=WpR|oOy{j_oIj!_cjvee&<^x
z{`7>-cGe+o)PBg1R)^NTYFxUNHXJz44b}QEn>gzPH;Vp!(C+~5ouJ}ub?E$w>hG^Z
ze}l@a{m@C>E&2Q`;(3-h*DHFtu`SWPw(BBsFMPQEM}a{SpEK$@x-xOr3B51=JN-J~
z(L05YK|-{EPEhst@x-~kyq8+oVhf<~b=IEN=X$<ieX@_N7JHX(1!-Dg9siTkzHZ!j
zkU9D3EhF|tq2C4e;rGP;7~(SpJbAm=KLGnziF3JTo$w(TA97J5ak)|6z^Bhku=f+U
z<EL5&_GWvI+jG*sZakR{9bb)BR-qL1w}FS+i@sb%8gmx<87i*|(*(!*vCiUW2K>33
zxV`>8FMKlsHx>5rf$8}cV@~x79}@f5pmP}9V0=c{%-`S%%wMkDT0=x9@UZy*4Rm@C
z=YAJg>z3(k&;E}{A5YeUd$Hf;;`X)Gp?yNNzA8MFb^iS+N8GM+RJkpzJ?(>4l6i10
z=D}XzsfiMYLXaHn_>gozUnb7|B5_dWYj>aVRcC)O+bV2?&U?h!pNd%LySRM@dq38n
zZv6QH+_yKqz4AYcdZ}@;3UT%)%m)DIGamJ-O`O};;J$eg_%oV0*&CG)E5O6Irq}lq
z@Q^2+|3aMW6;N^Z3=@B16J-49jsz-6oXgeMv-QCvH%t8Iz|SCY?q8Ku{zt&qs&V68
ztAjtj?#C|T9G~Rz`j+|;pZ(AYE1fG=hsGII-#cjG!Tyv~{yajQ>lGT79-kq^xm<mH
zF&6fwy2Qs_*S`Vozd2oJAGpVx&M$)pUr5iByGMwh0X6TpAkKdJ)&9vKaL+0km-=Je
z7-n_oI$iB=FJyZjH`F+=&FWkic>MD?^aIa}pYA-yO(Vr0ubSWM5a&4i)Vj1Q?4$Qe
z9KJwajkosHzf>NsfxUmel<W3^4_N+hKhMBEsp6UW9Q#B2dusjG5WK0nKh=x4&9@9G
z_c{1EfH}p@eGR5t%dI2M?GjY&{snZR<7Awf0G;EoPYsa%Leska`~>^tOtHU!`Q<X~
z6KdbS^e8FU%udgr`pn7CV0{_S^FhBqac-B2Y8;;Lboc-UeYT)~tpyKZ9a|YXA6uO=
z=8THx8R$Qz);WboBTt5==YI|G#5l>@81l9waXbF2bw~t!fy(D?#JRl!OQb(`L!5U*
zKbRr$%tJ#af4&1x?MaVY&N1R=NR8WfGUxt+{YI}<F{UYaY?Q>g2Ka375Uzu613wA<
zWF1-W6+@iOSn)qnQ~cZvUXVDKn}~>hFX+@I&hhD>^7cu#r#Kk3{+|qe4<AIK&w3=p
z4Cu$OuFJ&4c?9|u)jahVbb_12e|K&q?>NNgGs)-P@Uu2?&Nq+BhbM`1dqrAH{Ig)+
zi*?xlcH-xM(T-EC4vk}KUHvBPqv~A5=hmL?52`%>2fVsk?^OzmKe2?=>z_R0(1XM|
z&M8%HH{$GPK+P8unNxdtOG$f4YZ?<H&h^#%JRd+O7L)Z%ek4i)I{rzb)6K}|8tna8
zm!i9vd!846V(LEcBjBD35@$DlhQQ-{q}*4LZ&Sd1pGmo0Q0_|Nc08OeHFD$9I%^N3
z>wbLlKls<gIUjyf^Fq$?!dt3+fV+ruJmXWuAAbKEe+Gev3beBS@75dZtPWj&seO-x
z)9E8R8TPN}yb^P27xx-!cVFomajtLdXnLOKdqMIcr21V|;;iH8C2?K_{Wh>SDu0Gp
zo@uhwI%ENK^m*5}nDaQm2bt(o32`_DeGkshxOx?v0DI(N1^82&ILH5xDz_`!lmDTb
zGJd-C%p~wYq4aTkHF)w9;mvbN0Uug@I?tn?FFR>@rfI6ifxHtb&Qz~JG}{WCLA~xG
z&i+KzK7TXtU`p%{!~RL=1l9HCOsBK3q17J<KUaYV28#d15ueY%!#GFk`pY%&$iha}
zvMw%w%S;kKQzg>Zy>*CloHNxp(~Iq?zM<)2e>40a51vAQuMdCTu{!kpnwsy95a;|1
zz1G-THp(x5kYpX=$*JO}>!*J@-b>1@3;mlW!@iA_m?xk7q5^T&_vaS-Jg~0{`_xX!
zb2mPBf_-Ff18cc8xBNknxEFtW{f|LxPjQIwK|A^^!Y?<mKJnmB!gqlmCC>5msd(m@
za%25c#91eX^^co>8-mC2eu<pW?@pZcQ%XMq`xNH2O0eGnd!zc%kKhf}^+=Ja;!g<u
zu`P6J5x04w*7=>;p5h<wCV4wAul&I%@Pt~QM8TuDuRR}m^&#|ktA2Ofayoad_KR{&
zlXAT`NxPqgPC4cj&+x)*>xi_DF%60H{FtHE2~Sy`X|AYv&V_!kkLcGy9Nq$ttLvJB
z%()$BO1TAKe;M`x>=)$&&wLSa`&;_O^8E6P>fovMGGDZTeK&AZwV5TTu9*bx`9;dz
zieD@t&i%st(cIeq2Ye-QZpZ2>&YwUhrS=JbgpMz_`1u^#_jk6Z`bNK!a({)N#it8T
zG?d711g{AmZ6KVcE&J&}oc-yn=Gif@k56l1?OfcV#BIG^Y+|`v|Ez_5q@LKnEAr+V
z>;rh-gtj~EC;v+lfA8ll?f*AKd@2y<Jn^dQg;vgff{Z^MDYX3Q4({J0{<wAKNahqD
z{T#s(;#}V&YW?uB<#cXA-S4;n``v1Nc;^iGkNm7B?wH!dSwBm)Zzu4zYCkzl+>Xy(
zq<v>$-kD0gT+Whab#vONq)$`m$E*%rpQ?UzfH>DTK1cFl73@pT6n`SBf7K;!=W#v&
zPaijb^&-yx=>7bG&}pdhWR~R=H|5U;=zG=qnG?{DR+4f*K|cS=_T<0GFZuHu2Hc7f
z;Zf}Wzm9sfCT{aYt(Ts$oc8Bcdo2UMs{G#z9i#RW&ci;b#%IqgiCbiej00=XUKNP*
zJY@z*K6k>rHVQm(K-y&j@_h7cX_qM~4;K^X_<QHecs>gHD~NM^!UaXYBI2-<IoZe6
z`sYWhUxwbFL;s*p2JDN_0+{Uss$J?4=e(V(=BXaU*&p9K((k4~r!VXqsyvTaPUrMg
zKiW*(`j2%&4YcoJ=A<9P{Wyfj`~&+a&H)Bc?mcrw$9;V#ZHw8@qr^EMHmmx+1m1t7
z!~xA>c3Do>7plHzSfBjyh9nOMLH`<YZeQPjq#a!zemhU>qiWpFJ73}!zf<!3IP%;>
zob@9(r+p3j_cAB_AofpO9J+!>ajudw%zmbUCts0#aP7O9IJe_IRj*^PPYe}%w@xeg
zviK9qmp-r6ByQ(xHSa&+>~WsljT>W_lmCf5qF)kzz5@G59f`vp@b%CSsB!+k(D7m&
z^(gHBCeC@7=$JlE=2;;Adq;@Ai(6IjxVqkNPn_!&RO8iXr}KlfuZ!nw=2WlPIpMd!
zpP1F5`&lagzkq&3y|?EI>^*8cxo4rYS6wwPv>{#&WnKSc5bVRsel~H=tDt&bX(R0W
zsPX^5#5tbvLDH``AwQ47-mmNn(!!hLW>nm25$AIK9vQEGD<R%K3Z5LB-Y&zyqw4(9
zV#}|usr-AL<^O)~^n1)X-zrMJ{e_NwggEE_AvMmM#lk13xRoc)I?=DCeN%AZA?8%C
zfLcHIvpRGiT#Yj;VXvQa*$VC-lpcp9agM*fKlKyr6Cb9pXYwr(e}YP<3V7^?bp5vA
z@zd%2Iq+1s^!~n_ImN-e-^xayDB|!raUS=)`~VSs+JF~dD*513^KU)k9Jkc7VlTCz
z=k17def9Ht{jCn2161?%%dijf1BUc*>$)hozp&J+GW>bV>XbAiuwL?k^LzHWUICn+
zboI*pig5o<8RwsZ{e8sQPksKm8gtS~ofiAd9P$UR5a)c0tMPUR>yu7g<@p)rtW#L>
zmj2g%iY}9KgS(_Z#=&nT&h|lE*AxS<!<^%JP4adh{AmLoNy<1m73+t-;E8y8KFlG`
z{o0HBm2Q4q3wy8H2Z%E#e|%@f|MT$YNAT1S!V7{Iii)2;j5BmOW<PgEg~wIiHf2uw
z(cGd_!CKRMZ-}#>G1c#0hfWeYdl85I;2}Jp0&FfZ=lC}geK!vldsX}~3ngw%5Vw1o
zlYI<dhq?no&57GQIp4tWXDG(c0Jz?_?@FBguc+qJ=ZSMWdizOU)v{{z93{AqANZwD
zHTbgzJgD|{za-B7M9Ydl!*YnwQSdN7C{3Te@bd@aw%xJ+<`Hvq+j8caX02M6)+27?
z&{^U(4E9~Y-Pgnp0Uzt^(Z8bLtC^FZ;hUshZvSvQcu+kz`lGXdUd9u5-zVP+>5tKO
zv#qk*?+p@XKNILLg;4GU=2UJ%jfZa$=Qx~EdG$H$lRFz&1^3+IX>g<Fh5Rc~FU2c^
z`)Z5-zrvpu;E~qSF7CW#5Af6o$%k&RALjIL6@DCi4smYx0M=J^z*n(7#UcEw>_6lH
z-%p&|w}V=@T!OuKp|o!&{6W1{h?|<nyMZT>w{@XE9NarIeVkcJoXah##`!(4kEwOq
zd2mn5^gJp3n$$POF9e{^>ikmhoy56)QyuDC@Mb>Y4>RX{z<&Ncg3Tb<NBBW+`ndhc
zS&pmq{1$L8-iMz7orJT;bKEY@SBZ0de^ULZ-0Q-Rs5rDG&i?!6ia%~$J&ZZ|Z&pgX
z&@gL1D`20RB6%f$Wy~(v2dfHq&mUbP&Uv0t^L44!;!hzpf7Kyw`zi9U7s_o8`;e;F
zAlL_RZqS_{TLk;j%Kkme>3bZMKVLZ=wa)n)Jfiwn`8Om!@wcR3l!l+3h;x0Ts$b6~
zZpTUWJl89*U#+f3K4E=o7kwRf3_RLb%60wj3iOjWr#&*SIF#|G_|r%EQ;RtJli(LV
z(8ryx_7k`5{-Cs@d;dvq==jxrtEp^HItK5FYAVH=mEa+D9)2HqP@Q|Z0R6Cf|68Fo
z;*SseinL6%pBl`mT#q_m)YWnrUH4-Q?2}Kmurv$5n@^nk%SkmZy$^f8I*)n^_92zG
zMb?UrUtMQaC(eGxwn}7NKkdw%`d8qX^e;4*d7e18OXz@%GwwXb6zHT{HM5?(e)<-4
zyzh!m2I_U1IOkzl&HJ~$C4L6fbyhRtw%xIhN0;sPGX(ZQ)sN<|J^2|PoL;Yuu-~cl
zkHFqrQtaKn#4p6TzWzI<zD4p$xqq@f#V3V*hCcA8`rGicmelu7@E+hkpR{kwobrp8
znUj7ptCa=WXn^&^xm-PNZ-Y*Bf#|e`{&DE|)qTd?>m<%sRGh02=W_LX5*olhOP$l}
z1pAng_H}tO4%}3he(KgOZ(1FCACAhyFNyQ@Lf}2|-#rhqH75QfR>?f&`g;OAURvzk
zdoXgoBlc0X4!@l_=?7;?{BK2k+JgIA2zS>D!=V#<DLqdXg8S7us85M=e;lFebq@MI
zoC6tya%;RR_LbCicRS)7e?Q*);o>=-Is5-xbE{AUI<JGr+ll`)ZQ0LG@bEE-Pe<@P
z>m`2@J)2nKt`j}rrke24=-9UsxBeWHb)7r^bFb6cDeW=^`qkN<{7if&{<!zgKSG@2
z<5B%<DD)#&ML&*mr^DW;_`C-DsCv$5KXKmwU(h0CJ@LVxOU_=M$1c7>{JEm^>k{Ys
zh6hXDy7z>%wf1x_NbTngC(e26`BCCr6%BTbIq4+6mVVJ9w>b1Yc(QhSzsT{Pl<V6s
zaoYp`Z+HBKMwYBWd}=Z0IKLs~u12}#HbMvAQ!^DhHJFpV?`P3}1NNPXv!D7LS8u1I
zo;ROPoZB(CirYJ|PrV@Zb>ADjo$cv8drjNVww@pi<^XXnH+D(#xi|bd0v&%z(Q)&|
zHSp*P84vrzKKCZ6muE({)pzUa`<auz>7K6hIJmx_!mk;({a4*DUI_cRTCeURZritv
z<gJVIVd(hOynGe*{+-gl+<m}2n?)y8G(8{6f$MWG4-jWRQ%b)bajtJvof{tw9>IL)
z&W%T*udmnMgnh8CjC*b!wcT<g({(?N5NCgSs`W#TEy6=;-Cl(_=b?YR<Y69Y^<hr_
z`_%d>0{i$P$@6Nkf7jWob?*`IfZ9j;n>ep4uBi27x%b7-(P~_(Pu%)*Q1WM5E-APJ
zc<g<NkIT<-;C}U7%t~;RPsYjJ$lDKy7tQ(qzmG2i^-91#Ia~UBPLz9=IrsN@5{E}&
zU-ARWAL0h<4al43%qgBpb^fF+ac-|eYF+d!cxM%ddEjA{Zyyln_;~h799+5IgU6Ri
zKD+tiFX-%4`A})A_)|!YC#{KdeS>(O#l>wUcns%x3Ly?3L#Kk;uTH@}g7c#Hz`n|d
zXkYbwQzPOWpST(a2CzN*vryvS9Xc;td)g0F_d~Z6=lJXQryXM*;(m|lbjT?}SD>?6
zoqxM+oA}dTwO4K8tgq+IhQw|E!aAoo{2b1l?2}(fyR5@J5`j+apzy)4Uj`n$UB<Wh
z;4$dOTS@;NfDU{D_C}p^KFjv_+v|Vi-Y))x`=#g4UBtOw`nlRF*8X}0`FBHS-#&dk
z^9b8hdwFoKrVsvfkkz3$^p|<$aqwBhx!pUf{QLm+dY|xf@QB)vy$GJd`vhFSC>fXf
z>g(3Z%*jvRixN+Cchdv*Q8k`S0{7v)#1cJYRycdLfBUJkUoQ1^=P<t|&g0<`b^V^>
zBZ@!g`SEPSA6k~%PbK0UpDQXpor$ymiE@(9PJaow5BJ}tcE+3rk79o&1NwzOMtoF0
zWD&RH+lFjwDWfr6_Y-G-`m6Q!bm#=Hrsq}6arJ(NomPkTJ=Fd8Gq4Y5NnW)_!(0XT
z;=RrCTUvYW5dWi1r60NLomSvswO=;`+~7I2N{GW;r&CPg>DCqR5a;@yQRBduY)|nI
zb&~cSl|zIsf~RIl+-gC;(oXTytM&&T2lw|&??(fi{a+2O!pvMEGyyzto3x0V2iFj{
z^Uivizb2#HO|YjgY_kMznj_#*yw9N&?9Vf&dIh>jy#nC3>=OTt>UXukqbh$o6EB*x
zh{=5C5v!0JI$_v{a8A|v|2nu&T`%ut&i>$j_gB#Q6ZR2RZkbQS&xkr_QXf3rMcVfz
z=(J-_`aYbqE$jRw&hd=RX<*5?yz+||iE}>eRO_x7>yUjI^RnBgJp!F@C(#+^5uIaf
zPvs_`mmb;<<!1heb^iUSYB|kc_+Blfg9(7AUP<qF!-=z>qm`dg@W<48BSG9=PvJe!
z?mG80^v%R{{erv2K6X(2UkI0b5a;nNL#_9w66g5%)cb7T1NW-ujJ~ounWm5OKj)`X
zuLY_<-bb9{8F@+C_dEC#Aa3i0_h;S;J_b64)VgJr<@A07<^OT$d(d7VL8k&;Fmk!U
z-4ciP;H`*r{OhW5dj#x#c>bj#?4#D6?z^e+@KfS85ASMhE$ii#KRC`h6wg$NW)`@9
zoqMm8J4Ll`CE{!!+amqC3Cev0+{_Zv0{VTd4&853-!nVI>8R%@R}yDG^?P^Musy}c
zgZr?qe_eonNY%H@KJhbtSn5T~SNo|(obxKF&W*Pt&h6_jBK69G{cP6Za_<oS8~8f5
zCqEPI8(Ok1zx-l5>;vz#v|tD9e*rfyXIt_Fc-DULGlY4hAowH1S>LQ|WJzW4Zs5_c
z>RVC+aqdl=>lIS>7p4(s|GhoMAJ<+V!TyM9m+#r0%Jt7^YV}<{6!=X1iK^!XiV^2>
zH>-6>ZQ?dhHm2)zgMDoHY^$GvdQGsL-shw8?RC~6|C7In|Gi;<6!sld|23aW-kwza
zF3ah8CAH3P3;X`6{|+Z^<DjlXmoO)PVjs7#o;(YG*26y5QO4(E;0LWfoexy`_6Kx4
zWm{QG$qHjiCB*-p>iXp&@U4nJMcn$cN8;n=-zm(wAK|$|_gv~4=uA-8YYFK1@ZP+B
zX!x85q<uZAeJg-ZQM?6lF4wEp&(AWaxFv29KNmp%6{n-(vllu^+;6Ifc>V~UQtfin
zLBvh1Cu<P5`7>L}b?bzFu&=1>7qUJ3ukP2pL!8Hl;9$|&l2aVI`wOv;_m**WGA7if
z#ChC~sXQDG`&9q*ezAl&_xC^r>7fgtvj+A_bxz<5*!xb(_%;&pNrK1L%J>ilKLwqH
z>eu-XiJt+rKTrj{keYY8fFDtDn@F7F7RLQ$H*UPaoZ=SkD{*uE{Q`I(Q}TZ<{H%0X
z@-3{!ht|YxyI@`6#<BL`5w&j?wmNjZqx@VB`#9n@0_DDEIsWImAOD3;0^>sfzq<$?
zQSE#4m*S^B|9?Moii77>88;3fK%-!<-?uOa_K8_yUk2rFa`tL{dxALUkDl*-gpOG$
zI@HbW$MawDGlloJxc7VC2d>}G+z8yiH$89rf(MpKJG$$~5OJOt`lx<AA3UzsMV}Mr
z{r{%e|E~x?|73lNTXLr4kIaTt*CUd*9n?71ggEDkzqa@(v$HW>nUhWk=ZXtLe=O|1
z{UttG;0wU@`LwmfxnHMLzc>o}xVn#)gDx~U4gq!l+Dn|<D|t}*se7N>-PWF-=Tdq5
z7;!IryZ%RrIM+9U_XoOuw;0^3#+hx<U!dmMA7CF?Bjd@F2+&{P$=4*3?mdCUzm{@+
zYX2~cIFFOQ4<w##;ZWgkB>wRd(vJTDe}FmpAH@AW7ylNpk756Th70=%fhVw!_XGH5
zaNioyF9rSs^u6kST=An)Zc3dGtV^8R#q+q#<ENn02=+<!oNrg=R9`>dCzA*EVQ}B<
zY)guPuYyiMy`Ov!an7r#8i!NFz4+U-AKyxx{qKqXF8uN#;&z-_A@Sb~-k&+wYgu~#
zjS%N@x2kb$i{*4)McrRM1|44?$rCr9=A{J>>qnB(L+fDvDodR83#oc#vpvNvenHyn
z6z0=O)}GEAtM&Y9@CAzR1vhxk7indFhJJ9D#32K5sBm2RV_4}lCeHo`RGtj6_Vj##
z>hCLvbDo&P(l6ZkpUuz-tA6(lxc^J(?@u5;zcHscMCVI82H|Jbr1%pnEbZmqJJE<a
z*@yA|<hx<t4?KbWQdi$aPDjlbJHaDr9rY7&u2=YPS<krV1b>IVx4N`fH6x!}zY{<8
z^Ffus!|I$&06e7PGlDpm8-GvY(;e{{2d>}iJBv7vt6?>de9HP1pTI=%^I`aT-r8rH
z3Mvnao**6ip06n0&zT$cUgGRebX@~W+<S*Au|1U=Rr6YB;#_XxYst6a@c&iV_rd*B
z*DgC)pX_~jzPlvK{TcRgHD46_UgGS%l5H)2hy7j5NymGW%)e3a?%;Y~F-)BOuc-WA
zWqGD~OwFgCg0EF^`;GM}KEBT-Pe$jGg7cmf9vm!j=mvjEgU4ozpKkqDk2w3&L0y-1
zWqYcZXQ|lJ<(~bFC(i8>TwdS)zk6<dGW7kqg%5}QV%W#kd+Jsa=W>szao`j1pt_F!
z)pFXeR`I!oCLs3HS6%9RI)@b8kvaL9d|UeOLBuBv9#`i}-zLuE<Z5*tdcbmeU%k4o
z^at#xEBj2Eu-Kp23d#Q}h(mMYoNt~Ll0W<M$uC}Ed-Bt-&I#-UPjr!S9@R4Epzrxd
z^oK#e+-dROtNdvOzCc~K_6NVB_zcTwy`=i}4&q#|K!)T)LFdpJ_W$3XTP-Jhb#6Qx
z+^GB1y}`pjOMJ#daujiHm!w)Rz2@}QdwcdXCqE;2zTUZc9y-(2x~|xdl7}gE9bJ<+
z>jd*leW$>mG2s4_68|Xpo6NaCs^_M6Iga;JIe&g7Zu^n1kyUWt`;+Ue=tO>OV)=9g
zw=}p{jpy0m0Ts^y%*p={#?{;4=TK*l?}wTIK9@NA6aHNKubUsE#JQgumH)e-AIA6I
zx*RwUeWRX_yX~B`V@$PUed1iMe(s<dagL`^^X4;bPxbO_lss|ktJTo&p!S&)tV8yx
z6VhHT{ujVKILEsTaVY<j)Yq%V|AxfbpP0H&(FZ(??@{UvoiK6s)BC0Pxgn?g;;hqA
z@4>t2XVH(V`qpAjI?1^$t$>?P`@ufgT-s}7KG6?@N7R1B8{kn~XZ44l8=)UA+{zL+
zUY!IFVEw!j_N9Lje?kwnu%ra)HH<jlC))7wBbMie{VL+zzR9AJ&)tzIF>t+Ky%jw0
zu=I;17`OKm=lFZo{jXn%bAE<Ydlf$~`4&~{#s=VP)j069<+R>bc|MOg$KQ|liuXai
z{>z;F@my+ZiOZ`$iF17uE5!f%U~f{Q6UF(}o51g6&hhEkEKR35?EOup-G`%qVZ^z<
zDOKNv#JSuMzSplZ^jA5375{Bkhwf{rb;bAKLB$LHD*Dl&<l7A7+pXZKqv`Ecp15r<
z^}h6)#M%EWwchA#d8V15%6$<!$%&G0d0f4ibH5%X@d<$+1lQj$mg_gPi+W#rapF9`
z^ig?IpSW$`anio-+}A+Z?^O9SpY_Qf-`nEnNAPF(1=&|gG!+lsI&(2`){hoTZ})A)
zxgEoJ|9lDP94GF@zpwxC2XXd4q{h|TE{eW+Lj3Q5epHXR=LX;7*^=$a|L~s@&uu7V
z265K!pz6B?_A$J7E-&oAg8c#&&wQ7}AN@TbWtnq3;(DPWbea(7I7E^{*24b)arVcr
z)`3Hy6M9|Z-yS-1p_AY*Jf+Wb;H%i4^gZhOYp>J6^<6>Or-*y;@9TdQqzeI?=ikft
z=FS6GAkKcqR2&|MeQ0$0xDf`AsQG;rbBa&&u+*zC3O{A_Y1~um7SClam+r4AUY$7m
z8CK63b!Sfcv98kY8PJbFCxP#S!0=<XK_@jU-TxnmbN+{a6F=QK-qu&7T+f!qR=Guf
zDc~vasCqBlJm#bm#C5LoXC3UlGt$fb7CfxpSNt1zVuAFp&B&j8e~5nkb*b+uRP27{
z6rZG;-#ZfLcHFGi6{EpBtM&GBaF1&DgV0Z?`}=<|r~1Z`|92w(W&f1?50#Mmu7dw}
z5odoUsQ%alJfZfH1`+3ar4XN&p#K7RSiM(&4Rek^?vwUFd=A0he}}ZAJD+nE_P#~g
zR=F<pZ~IHiH5j)KfL8<eU|+?}Q_YxDxyh%cT_(eR5OF)7E|z(xCE_p)_Hp!6mnW|h
z=lpM~uCI5pJ-6>~;%9EGTfT;UQ0)&~g?-}B=GK$3h(qzK&<UlF+ttCn>RiN=&R(tS
zUL?-#I6~#ghip%Ahz*lC_^^<=`I>Ma-m^Cc@u|z4>J?dvdV%)>4}T@&o_p`vbKt?*
z(oci1pG(~4mCBzDY)}4#bF{LSPs9Fq;<mrPFXe7RJS+b#{-mCi__+Pa$B46jZq<(_
z!9Lns{HX)|WyHDN{pz`imEZ=?9nXjTdsd&uJ++^3g!MVEurC*Y{YB=S59*xNP5%f_
zsP=u3IQtVVB>mClZ8o@mzNr~;&coFz4+mPFX@YA0ngf1C#b*O?+rH|4?s3>JQP=PJ
zO%7`xQ|BJ<C(ieGcB|(*+Oj>x$CoAfRtWJKMx5&#xSF2-6QJ)?^VAw}Z;teK{|r1l
zPVz@)Nn?H`UKC|r|KpAv;(v0Q<U>C6yKL}iJBfcMj02sCbDVwZxsq|PkE!!EE37@O
zAJq8yDeOb{iH8%hpgBgI<Dl=;or8T;oqx`mQ|uF!WIgQeM;9Q@`F~Nh@BPHNef9f1
zn?NT%M&elxK?<-vw^uHS=Oc*EMChc{Imx$)b9_pw`v{*yCsa+^-Ho4@pp$r8;tY9H
zCYSiXLFMfu#M%ES?$_6apFP0iC$cSR4?Ysy+eg-QyAbEu&>yPSvFq8M`)@+>fre%K
z`I<PlOI*Dt>U-#?)c4UB%q@PV)Hqy?Iq3&*|EK`;yAkL1>Y)62-s!0OjH|$tEz<LU
zkJHC>UkT{{2K&h0>2ba_kNA_o_d&b(KL#Gbd9#P1GoCodK|k*>1NPy&rG4Lm{buH5
z9~vosI{j~8pC~8u?Ax&ai#X@c6}1k$KQH2=)<5mRGgO|8A<pH7UrMj<8gTu6-+PGj
z{E~V>{9g!viqe3<^$q!C9Wn#sKxN{r?^pY5PY}2HudX-8LMQdB)c5P+Qouykp>iXI
zq})3YpiR&Z4@vj`D7at6vtWk!6J3;UUj;mPG@U=@>=ROMG1PY|arVED@_()6biY!~
z*9W1KP|x@M37sQqU;Xy{;%8)=j92daCLSct`F~Yik9Q)@?UGm{{dyYWvl2QUbzf;O
zbn2>c?`PPDdP^KGL8o{D@h8DwyiOm9hB01n?^PL}Hx!Zp)gsP*R#JJ`-*UP>Qv2Ai
zLMN{F-#%f^<G_zn;#QG27l?Ddg{Mh>nTGM|FRM@YBUHUA(8f6Hgz&vEZoi^AbJB_6
z`P1>JZx`4H)jD<p?4xR&Sp)7rFY7w@9+>UK*-!m_K)YcdSNnk};vBcls-KoCBz^|!
z$owK%Va)x^x!v8ojQF&LeNbJ`j&(XK((`;Jan7GO?w6tanuEmIpPnk8{{Y{r@~}){
zsc#(p1>rO`iF1G1{K#zUVFvu{Lfno&Ur1iL{xuTzM$M0loxM8Ow8Pp{yDNWwgMCtc
zFGI#nQf_dd#Pf0Zb1!k*UUelt6;Xkz#Mytp+K24G_7u0^t)f#NIx|_Hcyf&7?Hvf{
zHsZJBEN<L-7nc#{8`y_%y>>Hn@)Qw2_5NO2aGyHY-4Hy5_pZ2geqZA3XJCWGnYO3w
z=P8fqN7VQ~k2uF6x>WkL+dp{~Jk?gld3U|}0l4>W$^S8k+g{?FKPOdxOtC%rW7cI`
zKjy=}bW!2SBf?#O@6Vk4Nt_Y=e(3Md5a&31%S-%6qh2o)=W<J`b;3um5C10ZQWfQ1
z0Z&#Doe{WhDpyS66I0K{KTe$WgQ{L*VV_d@`L?yEeQb3d^ac0=l~<;?=x3^NAk%U>
zH>=*u@gQ-IPqLx3mpf-(k2uFCbx!IltxxY^V|(JUk0s6@A#V4SkoqR^eX@CwplyhA
z{vW}45)bU3CC=sQ_ZN+09n$xll5*X6yA(PJb-sE#bo@o6eWyeJ6KAjDcG_}kU)A3W
z-7NliN2KR%Y2xfp2<tsJ{?sGR?HE!z{a_!?E&jXp;X?4lIf+{-__+(*i+z&!z)utB
z_~`Rv=be32TI2-mYu+M$#?OjA-L|lwzRXES?>mhHkE-v}Swo!5)#qg1A<p&ns(x`4
z_Q8iG{_p3Ka(}S))b47&zU@}=$7^Jr_E`?G_Yvpk;zQ~^tDV`N;vX&}_5DuF&3Nb-
zwg0&mJfZj@aNlF%=P~G<1@{k-{@V^bZ%HZF*FxH{Ao#t^IX?Nte`*f<X+oUyJgMfX
z0kBWveJ?OKbBNpaeK>tQ+yI?Eh=a5L#_~*)p~j8ErNo~^PKi%%{J|Z>S>HS=^O5`h
z_o~FX9g}K&Xm2^4|55AG(a?`?6`jk_UjhB(4Dr7s+I<b|gK8iCAb3(;&t7F6YTv{G
ziL={R%(_kd?4b73x>-)&6Q}0)$;7$7iPqw07TRkj>|<)2e1|x<qkb>(C*TRS?)?cm
zCDnSnNNLeYs{NA(nNz(|IBzf%{ya^b^CYGGnF*b^+UMB_?kSvpeRm8zgzpz<4gHJY
z5p^FblU_K<<!)Bv;lq~G_hqQOdWyJ>r`j(aM4aOm*Zcg;xu1F^4_hElK7_rQB;4im
zG2-k`Sna3fDkC~E+{bt0Z5DAm{-}MPE|$}K$<+Ok=UIpR^y7I<SMKZJQMGT7fPM)z
zzFn~P2<~-13fwOG9(8`(N1W@GQ1yBo_Q5WapYHzMNahqb5B6>Hp^P`IPNvzd>U#h>
z33a`55jv5X*;c`=151<@KlM3~D$F@QOUt<D=Gh=|Zg>4Y{HKX?y+)|})Gt9lruxgf
z(D!2eEQEUPgMRX788^1(mtTAdo!|+Hk87{Lh;zAT)VNySD}DylxtHqT`h7KZiE}<A
z5jSiPn4slw?z$hNU>{x}<9t($Z!?|#Y}xNx0KORZL6ukAm~)($Oa8lg|6ACH)VPtm
zoY)(@*CGs`?g97X`%2upq6=~E7gN;r@;Kt$j&VFkjAgHx%sRvk?t{BHZ?-zL@2bZC
z@1P%0`UT5FN4;;PDsk_1adiJ|Mx5I*uKMX9aF6OQi&>xJhUb63g3!m{{+SZb;oyHV
zcZFT|vk-0gu#T^r%rB{&^2>*bbKE>?zx_$Jr}zZbI6RTKjpxsD1w94<odbLQo~YGU
zzliaw=dAa@KJ;|@dgBzhK8N`SbMim(xA;F8b@NuZaeW_PPWB#kzcoml?-vB|e&dqR
znP7EjJ##X>->rs@|EQFEJM0e==XKf$tWVtdd=2&n=N!hu{;oSEZr)F%AG!0T_Yvnf
zhufsDx3ig({~`GA_C>qEK6EVI&+*{Fp^^_SJ`v)4eyOC2f6UqA+?3nj{tP@kR_58K
zFd!afPW}Yv$+&tE^(u3h_#d1k_P61BrxJ0_tEQ^IJO+FHJlbGqukvs%>=WChzHZ&M
z)!Ng0j8#AS9`^eEBsuREKfPDe<8~Kws+azL<N$H@zoPPg9C2Q!C9z-O+T~N|$JG0v
zE<!)DRNBS)UyL?JZq!dJ5$C+}F0Cm;65`p=+S7gl_IX^s^<aA%Pp+tS)HK)!)wmjC
z&h3cz0u4bs9s-YHzs{ZGx(pqUdVb-KdnBHzzA_FJhMzT=lfE~P^p}GeXSx&Tal?ze
zYJ+kI5-&>qG1^Sd5lQRN*C|3jqV^4TKtFa?^3cuKXJN13!~YL>Ncms+UhzMP_2Eg0
zwrNA$##3Ei4}raDmww$ejXBr1tBhm!pkD96KB3m1UxSAyr{{B?`@|m~t_$3}Tm?Kd
zBRy~1GN*E*-6gNw{i89&Id8YB>zXyN4=$Ft(adK*2f$Of4?Yn5cji1Usd7tK5}sNr
zIvo+W+Qd0-3)FSNQ?L&u((5}7Twj;HW_9TO6>7h959~cy=RXQRj}qs+(%-{%4mu~*
zy0LI&iJQTF#>&vCPu%KMl=giNyg7K_T^YCgVVx5o&h_=H>xl8-0o5N@LMQg6jPve2
zZ|jJ2J0=^8KIF{~@c5_Fk50m$v(WMFl78o&54y<q6bH||^!cSg74$oGU-D++oF^?+
zeCiTso!CYh&(~rh-iA5(nZWy_+;!Ys=u}kuKO3RrQ|H)|;QISVE;;)=QZCZUl)7L1
zJfyD6>Jn#vf?Fg$eKF1q1rK?I{|^2NbE=oY_iWycc&>+i0R18ZJVBiE!St7WcmWOb
z6>%Fk)xZ9Neo~!-D(4eFBfpA2Z$ZC3b8ascpRvSkxoVu5Oq}!0r{=r0(21+_U0+&z
z+Ls$G<0^_ZC!n7^E^&76pFG1l98Yz>xF~J#a($bsJb9Qn+s6;e_~Z7OpJq<=3hYZC
zuVz^8y!ih=--f+@|KTq1_`B&kKNDwv&Zzy6+pBT8|NhiqPJYHRB>ppC(w#WRS%1Ie
zQ1D<oxh`}2^K+~|#Ydf2*-D)KnWFsp2|D^Yrr%hH{E2iDe`FLjrpN<mUv*tsjX1|M
z`hI#Go?y;8i$uY#Ge^SS)NN&J>eh9OtUi4&mCD;au#c<z7e9kX)P3Hf)x{rEprO@w
z?|XUB@_)y_8F8D>Kgqas0u%P5#5tdLt9f=L^uvFp*Y_3ZMAW&F*I@5`L+n>We?M{W
zzuhrE!ak<HzrgdL_;W>#=hcXF{9^?qA9i6JXaXMjRO;oPvwjjhp!PFoFsJqkmX-d}
z7QVd&p6V>^)dl<u;+$72)jrH0&`%5$9XDU!R732it9i4!<#cXP<yAY_Pf+^9h_j!5
z+~27If8KQbBgxxRm=}%^x9j0IB@a)-{(f4%vj5(;GX7+MKL);1t*eI<x8tFD|JX9-
z<c}BkL1;v_pHHEaQsdIk&@pY(=aCXw;?Gr8-}=N^KcMbY_W_UL{Fta2GmAOpLu!b`
ze-Qk69rnTb5{H`LKfpftru3IaxL*DTJf_a$Rj4KU`uWg@iF1GPs{P(4*`EAQAU@qu
z?n~eSyic_n_)e=s?=w_+a+>X_UWxD1*A=F=#BGDJuS}fVJ%aBua<j~n#BVeXPqI4n
zJf_OS*TKVTUfXYVsJ~zyxfT9j0FSEoyxmeq;!{%HXQ@Zr=JR36+g7+v>CK$#8^rg&
z-v|A%;Gw6}$DdWu@1XW)_QT!`lDL&ef_@JkQRla>5VvvLD)}=C>yt0*N}La={LJ-`
z@JecZRl#yv52*L9*Co#N@~ZvV9^i3|tM2+@GI)5HjAQQm%a$_dy!uh{xf0^D4fY|m
zFLaVP=VwI~|3VM5KmY#R!<_U}*zdg)CIQFsoa3wDgTcdXWSo2z<Ig0kPwOc4eX?&8
zx9cU<Uk($u^S&Clk3%OlQ2ObM=%*LKgE+U-3;q<WCvi@y^*|NoRIku1iQ5?1KTVv+
z!z*oEPmx~?gHBk*b18IU*#Du+aQoQ;o~$hS?9Pv!gH90Vj$m$z`lY_UXJj4Q6aL&r
zoc%N{BoAK$?@OHhtghAx3)r6WDvtHD%ZDxC9=x}<0(1^RU!T9d0(;|?e47h<Pqyd>
zCrbM^0k2D(+cB!Hr@A_>-sdwN+^5dHzhyZ+hpzUQ62!S)$sy_U#U<F6Q1&J3OWZ=A
zOWfSJ^e}VM@u+k1-N92asgXMeGKM(E`Kr=i27A5!+{c{T>qTjIEJIAO22yTBJ%@e|
zaUN%)O(mW#PX@p~s;=v2!9I-f+=KXUgT1edjBj&M@R!7Ge^=xA6}G2x^>ZJ$Hxxeu
z_}+Hc?sbWCe++#k`tEwR32}~3SgosvSx)ys)HuAD^{L#%2NIul7+3c&XZ?fe<N0~3
zQ_^_Vdx36gBymeMOOIPO;_OcaH9o&Yob~<cy({lBCx3!?zmQw+9kccb?sY%@fKCYK
zz$@c-p2pBu*TvPry`yEkdfcr`!Q(j3a1H~(P~z-=mRff$VSCOKjKc*{?nlsxRY~s`
zCy8@=`PKJ3`~>@?n(qoUfxeM=CZXRLJe5!SQ4#n#ggEEbX4Ss)*q+LbV7}gr@%e3Q
zPxVs!1OJ7-sU+oogM28`RP?iyzRz;%k1F4~!QR|I+j{QSvEzxeKS?|f;>O!I!9!gd
zTYL9C)E|MT@SZU@j~rl5@z?L6`orm{`)FmFNq%~cOWgA2k${GYbKJ7je7Ax)`x(rX
zc6<YQvY$DXo51^g?nb-(2K$6Me^tJ@=$BOMv<AfO`u{)KR^c!Ll+{AYHEJK=N$@Dn
zVY+?bq2NB`&x7!1I(S^2`(4kR{14*!KIsL<oOSkU-}R=J>_0u1q}H*OET{Lus`;fY
zaofH#Wn8*|{OJeoSN9Pj;2vCexcBRBBJM?9uK%%*IJZ{^70=(GlThbFO1BdKd#e8H
z2d|{YrGA#vIH~Sq&L__GimL05_o1^}#V5)36leWBa^Xir$A@*-UgT9(=49`wBjd1)
zy2cD4Zu_;0^L*kqe{kOQ9D;Pg+SB<owGO$ZwZvhyS`Yh)vrhOId9J4ebb7*mikfF9
z5obTchom35_0Jk`gZBrz^CZVvpZoiG$+RY@?+>t#H<S8y#Xyh|5I<vTT)G#0f%5-R
z@SUo@<A~e-tMXwvb8Z*x*U4&%zL(P3KP~mDhd7)e&i;EVG_Yg{{JaeN_;X_K>YJyH
zl$%iZU$Vf1{lwn&(>BDpeS>N}FqG}d&%{2l-wl7>BF_Hk?^oRneSL2Bpw*}SYSoUv
zTTb(ZD);8L;=f0|uftEA%k^}V@%b&xBTZqi-;4V+ajsWPox7N6d8SFIc6=8+sQf&{
z`V@x{o`0!`I23(U{4t|te0J?snmD(+{(i;>!1ed3H6_mD+X6K{^o6}2=b2?RGiC;O
zL|u2ptPY*CRrNgt`=ELrDrN2IeG4jYi$5m*gcQ#rZpT&JryhVfG$(HJ^P=?M@yO2r
z?0sKId%5qY=mYyiAsJ!Z^OZx0a~y`M_|Jn*K-Kqs)}eOsmy&gYdw%vO*z51#x#e;3
zKcSvGcnCbD&JP5MbN=|%IR66p5tX;EL&vCd>3hLrgQOqzL)?yn$JKLTdD}@mqvfT)
zhY`=h#5oTmcwep`IyGVM%aH37_kOl);vCODD$n~n9b5;|a>ah;fQNF6p9P`+4s<fr
zc=#pTQ=UijN&a7k&Q)tq{Z2jqSgyU)%m0Rqc)c(V-$R_^oK*YCE!m#x>p3TR<%Rt!
z;#_XvMQL|;y}2JeQ9$(XzyY!o(D$qLRh|x_6H@1gD}je+iOzVGo6Vg3Nnw7ui26<@
zZu1b=9d1541f3M#qvF;Te?ljO_ut-wIG67z`bq4cxcIkVPWAO-oDaahGjVR;BWnFH
z5%vb}TXXx;^TFenB(HX3{Ct}@_g~*0X~#?Xq=1cVPxUotvn^PMauZHRolm~NI%MyQ
zi@n<)d+!PHKc((-CBegaCC=^~)J5i`<Hf%2ohUc6llYm)BmVczFTc2xIL9HO;?Rb;
z&6ED=`8kqxSpS;L`-`9-V|(_0mE?a}@UNlYQjO0!I*Yx*{w>03ZUfi%C2KOLI0seS
zIy<iNa6EDA=TPy}?E@Txy<e@*b9WJcf_P5f<w;%e<TqI7pkAGbbH6*I#<8))x!mBi
z^!RLojvxE#ZvXrkbFNnoiCb^<iwodhwf-qh3oVXwysEVOy*b69ddx{DQBvlM=h5%_
z5NCh<*gt{1nZ=y!^?R-sL&vD+E_cITf1mOxwkQ1<&Vjgnp?ptDUQJQsUNz$EXH4B+
zYX|$IYW^C@_N4D=Eb-Zd`0s~~|Elztr%<u)h;w}tcpmL`*q04Txkf$LS)Vw^A=p#q
z(^as46!r;qT|XGyr@nV%zSFr`;_vRC#)xzO%25655OnnSO#KS`-O9drH;F?6?_d2G
zel{V_{_FR6bYpws3EW3^<L6}XsM;rA3m#DY=m7Nf^95hQ-l+YwYv93>(q8WV;N9J&
zzj)Mr#z%;=pJALMy^MAYfrn6EckX3@)uDM=jVE#NmFha~JMf6QUzV?jv`bv&XJzm{
zYW?;oaU0L`l5bNH|K8wUTo1y16R|q<z6iBXy8-q=buQ~W@Ki6!LpKiO?J0hSRlUj)
z=YHYEdA6GHCqSI@Kk$W&Kd*tm#`YZNVKSco07t)oenS<{YtZ*SDmod^sn$!%4dDGn
zZe87lIFE-ubsuCRcpT4vxpSv)LMPH()*%z1|FyNJ=YLhZ{LS|4|1|O6J-1teIzDsr
zhpcn{gL>Udoc)=i_5oTG=lG<$Nq)L{VJURt>!km-$S)3UbozJ?hP!|C4eL<3v6x&J
z=f6$tPr%+kOzI`AX-pn!P&<xQ5Z)W(a}Yd<@1-q=I7FDUKJGiX_<uy4$CJ3)ulo&r
zt{Oi}^pSECXs^D|@5G$rhWW+SYXWiBKdSO=4ea%P^-gE6+UrNyr(T!(_D8$?ZS85_
zN$o>ccv{+Pvs(8y2M?<KlL5qSKHz%c9Q5bGKB~^+eN3F&%Pf@kb^93mz<n)bCG4Jm
zJi?sv#B)5`MohBKm|T6uPml8RF7U9rer!&h+dYW)s+C6k2NSpBb_ZFHQQFwgc(x}$
z^>bAVpcBJ$h^3&j9rjV&k8|yK4m`0$GSH2yMf*wn7E<li5Ztfwr?=(wJd0ZYPbbdx
z@}I`|fOW_&a9>h*XZV?$5`g<-hU$+MiL*`$-?!2U_HBvV_^9`LcVc^rj~~~AZXMOf
z>K8F3)OqihU?0HuhEIk5a_Gmi)934LuurM^@g(g1&x^gg{`!fytryNMyYVgW0I65t
zyu{%_#HSW>&O`Oy_|C*_zf<$nIM{1H-++D!>pJ&-?<3GjsrLQTa*DsY9?u*o<r>xQ
z4-@D3>+i>GMx6amsdH2PVIL1lzRkz@Hk3KFZ*-LOAPlQ!8FV7|3P<{zUBo#Kg_J+1
zVILhR`p*8QLE=wH?R(cC&N@R?JcHn!mHrFFxxV^-;&j$0|3m70;6~<TA5zab9s~EP
z^>dLB@)`4~^S=^t)(QIS+stf$JZVCl{ZA-AL$LSdlDN5kJ%_j#xqbbQbxwbq*k8&m
zzujr|X?|4m^<S`0sP~TEJXrkH-;d*C&f~yqlILzd?Ff7Q{?Z|^H+Vk!894g}ajtJn
z?YDeR+^%QN%6wf2c~$Khwx{)#y8dcUoa33q{W~`vjt2LQmKBRzpU<{BnI@q6%O<v`
zdWG>Emb8vB$BA>Em`u@e-}CY#^u1n*^Eou|Ekods8c*sH=lWJx<@SVq5bxP==lZ6B
zhtxj6Tg=JN*y3y(No+ruuV5dWDf6%N8e=Yl`wI)HjQAHDD)H=~#`&7ySJb%Og}Ch(
z^QGOpLT4Fs?iV>FVmlFs_n>3c_;Vb*k199Evl5@AT8C61Zu?6cS$B2MD+SkMPH{*+
zDE_oWKkDf0)pg$}=*QIfupBxuoDcEfK)@bwe}B>6g8E*zI`ll0T5psdCjP_=i~YT@
zZ@`@U``Yw&>FanKSr5=<to@7w_o(ZvwcrVLe&cK6oL9TmxM7BiKfalgCl%pOD{$}o
z;ty5Fex6}Ye#S0|eK*8&3hWKqJp`ST#JNBAQP+3*Mu<N?mFHE6b9?!5j@q58Z_Av@
zjgC$q=f_%(!mj(V*y_{s7i!(L-SSMcR>k3a;@p1&0nyKb)-SM+DnE;i6o2CCywiis
z$$x|QHemU027~**l<}%Q;<gbwo)N-(qTGGp-U{jC#%1ChPp?{MmU>Rg^_`M-`3^cY
znNz*Im*qO>7}~KVc>I_2cn&4baqy}0Gh<+{&l}Hyy*|&mjO{57kyVoCG5GUe;+%)M
zRsZ@M`d-|pbN4T99VPy!@H~jSKJqaq{qTH=+b5{kVAx01JT-+l$1O|s#|^}}zWP0-
zA3`V9SmF<Pa|&F)kM0_C@+UA`>g)E0>x>rvW2&Ea0Z*!Z%Sqr7^&I^M@X*Qh_BsOY
zpDpW8ckbo9<p}z9KMIeL_^(vgiFJtEylpK0yK~MxV4tPzXR$rSErst*dm3eIf&FyV
zzrKNe5Z|-e8}=87+kD$1^MYHiUWL7W9=ya@@iW*&+I=bJ{pQ3uuOe!{VgTEdpMhJ%
z-t9|FgMD3<hws4NOqA<B_gvB*=G4B4LNXq{2LE%7ga7J&Tw(A?9ciyUurERUhUY&m
zr*Y$c$+yc$xK`kPwZ0wf>|2VyyPq<XILA}J4|zUuj!y+u-(AE#{C*YnT+9LJC)E3R
zPl5Y!KPwOFl_M;1j^h52cxBAp#JSuvYW>y{_7SZAolb9Xulhd93B<Wx`n{bmI(xP6
z8Y9mA-KXZAL*Plg=g?i36@OmrQ)<4bMV#{|tmd6wu=l9-&s57Z%?7nk`!?)DYJcMp
zajqA=Xvs!$I^ubZIG39&kv{(994~%$Q0wz7@DXYq)zfl1H>mpKWa6C9kp<E(3ZdNf
z;9fkJQ5$?ebMnVmT<oWSpC!)g@ZipDOFqZE_SOqxA63_b-xBBgrtX&h<<2+$M4a;^
ztm0O9g7_K7_ci~Dayu|*oe!lyy78nN?4$U;5{aHMqlmM9P>o}&q2t{z<6c4N@38iC
z|5UZ_Iq==cA6zz>%!$Y!H80mE&VI%c>ElU9@T9u`K9)JfGpXJy6@`9W>3`yMev}^f
zA{_eJ+SBveDxM`Li9bm-&sJql<@(k5-x2mvoR@IdO=H0mn5W!$^%`-Gb4umgKG-K~
z%RJ?-v#t>5e9-4Mufaa3&JS0djQ&zNU8fOoE_a{WpC3k?;~)M*;^X{W1Rhe)O?(KR
zYA@~O&L<xR_o(^$Z}5P+ALpGS{!~(NXafF|%EJ-FIS%@Jji-X^>(g1pxn3c4E^#|}
zOkG!=AkO1q1n+t4fIPnn`$%hvk6ZV8ri%Xw70=q1)AvTHajcu;>YRC)IQtoSNc7!%
zz~+Jb`lgR>+nDos(ntDZKKOYG_6c>)_HS^H+E=MEP0EeqeT-7NF|C-BzF8{c)nw?0
zU>{f_dDRW;iYegUo-*#a=Lc63xBFLz#7}p>`y1kReXi!6oG(f|BPt&%5N919&gsc4
zKz_o0sJhPWL)`Y$zogu}@IMOs4NCu0wx{^(_b8r*{TXFnc)IwxMCE5)aId-!3KHkM
z4c#m&tLbp44{_@!?svO2&m!hzADAQYTnzuWK;NkGDhZt=-uvX@Q{pA@bAs}-f#sCv
z`n~{hTdsO;{dwpZrLzV)AvNA6pyPQeJ#Q~Fr*=%>d_@N8Rc3~?Z}=Ul?|#gWSBbMf
z$v(o}Ik%!Sg$Gr>-N~Hv!!yL*Jzv}bJlRR|VHnCC4V`GVWWJkcXA|dfa)ersZHJDh
zj_9|ApZOx<PjrgRvob0g<6}<#1aPjyU8i?|eY~ygS3HGv>_FmnoLBGl9qIJddj@6_
zXFr#y{9h0Igqn}O0r%k?wR_LiS?Fw4d3Dn)(J!QUHR9YZ`n{ly!K3ONX(#Y-#q@j#
zgX?|o`OGP9W~Pik<<VZJoW0t&DLz~L@lBG<Z&678;2z>U&P3IEwKeRGnm3<e&gHI<
zJXwWu=UW|mzqE?OR_J(oO1<2?c>?w;)pbO^IpSv=_m$A>rZRDEcT+umzHSbC{r;1l
z#JOHsYX5d3>u~<>khuL9{y#QX+Hs#6$Ho!o`i9<;`bLq@o54MuB%dFJPOf?4XLM_Q
zOTyp<h}&@k*KuxN;$G&YAII}7MPc6<_CfW1e@}rY`%8ax@tFo4Pqy^CQuxI$un#Yn
ze&oIvHDkW`6IAP#dx>-0cB^v*t%!5nJh-p@HT)b5oq&3dX1>*-_a3Y9aF^3p`>|)B
z6H(8_6n<ICO*N8uzJ_vp66biP#-_(%E_3oTiE~+lVZRghF}3bG1$*CgiJRN6$hAOp
zVqZzSS3<kj1drl=E0)QoD{&rYhN|&!GI7pZJ&w&}9UdRldTEu_q4Sw)-rolu-wQH7
zuEG5H6?DP_#Q*-N?=Mz|zCS|6zsy1@H>~a}H3atzN{?qh@I+9`m8W%#84Di7{=XZ~
z-z3iU)!%cmo;c^#8JyEt4L=V#{cED%1@$`5oZ2yf^Enx?zipA^Ln4Q?OIhT3Gvcft
zP;u)|-0I-`!#&Vh4<5q$un_nOt3&t2)cPd%Vzd|DcSp;0`zZ&WnkW9d=lE(7XMa|!
z{ed2q)Agwu_a;Fnr1En;bMAMoWt?&Qz(>G+JEYxn!=H>L;-_9m-43q5XQVoKxUBfU
z7x8bwoXYhrl{kNd_>6^p@=fuFwzKSK66|9wh4+O0x5POgJZeAu6m+8Fq}&NuC!3`b
zx0E_>d^d5o_Jg$jk!^WlW3WDNhhKIf&gJU)t{3d%>Uw7`xJSKzU^%!??JI79zW)C7
zoor8W3qC7xo9CQ;1-z`p=R=GqjlpBpgs(t6hY;udNvQd2p5>WlwA!D1pLKXV=_x(Y
zU2lE?9#Zr8KhR02@uBiEDYv3p_dZIT^&|NHugmarn6;<pf7SYGK6q|59&TnGich$T
z#Ahnn{S5R2>ifkCN7*0x{xa1cYY=CDVt>ncbrSw`22WL!bxv=@e*|+bHzwt}bEHdP
z?^&51w@t*k+^AX)d;@#^y-t^)A8aN19Z_!iS0xYidjRew&g~mj`)qB9v!4-l-gOLk
zxQOJ*UC>|7oZDUP|9k?TQv1C>6X$t6b~M{6bVeR_UoQ3Sqv|`=a@xPTPe%SeD0e+^
zE?0k#=RW47?^`T>j)VPq@Z@t6pS|F9DB<}%bt$|z(g*&e<usnFb;1PV+^-X>WIf={
z$xO5MbU#kz^H$jF_sr}d&VB}FNE`wvH^rRvgZsqKZQy35v{yv6*WJX~KBAr{Z4Dm9
z^IWx{(;qyJ{m-f3Q^8|u-M)!9=Sc^3zv(;T{JzaI>U%=-tb(6vU-u#6?59!d#vaT$
z-;T&UQV03)8gY)ZcY?&ZBmF6V-U5%-mi46DhuK7&`^!-k|I@5bI^i#6{qNq3^{2I`
zb*9??x&1ZqC#;^EZo-^&lIs0M{a_!*dM}T-X`-+Xc2)fvJp8PT5AOSG_7mrR5&1^)
zp#;X0!mmrYi42LuaOjsO&h=fO_O$|RPddS0<vuB7%oy-^amlLz&{+wc>FT;^58HG6
z=Zl}CFis}HllY!@HxB1p4SV$-Zy$5=CxPcyTzuMt2XG(iIruXkItg_@cMWmQ!`*5h
z`vC0qbL*$UW5XL;<&s!eT(dg#eQIi6xbqG1Gluuzx$B@dj^~oN$!dXKFHD@{pHw<4
z*q-7RQ|Gn*1D(h+$@B53@9(hpdL%zxdzE=pbiyhQ4T*EVNKO?`-1Ex;wx>A9r-+{&
zAhZxVsn+TG+hHHh7JGL+c!D_BE2QT4f@`E){rv*PiF18J>bbTo;@n=V)%e*J_9^wg
zui@aH52RjhpXVj!RIk`#@pCZhwax0&KDnCrzX!jl+9lsw@n3&WeNp1<=M}ZisX^S<
zOYMhu0uQL?1Sfze@LW}QwC`-@<bMR$aV5aF5$AGCs&c;r_bjMsYwE5$3cV%mGDX!Z
z%W`@?SM`f0h_nA5>@T^z8poXc@!|Vh-Fp62*az^v4{hM*cG$;OO8k$5p9T-l%(le&
zneT1U_nsEMDyRIxUEscm=v0KxBg8r1B5E9amhH*^)KnSgx8#>Um;oMA@!tsU!~OA6
z&_8VT>HLz4&+pbAM%Vo)uujU=-xpIJJg(x|j5xPrmYVPS!9J$eKXbqn>VD{EXTMg)
z`7nIi2Od}VZ7)0CNyZbmZ%{Cn!+5W&kNanN;yl0jmrJ{RhF`QH&i)5gKOF&k@2{dC
z%pre}B+mI0Eidb<9?;4Cj>Ij9=RIL=DiY^%6Pu*mJMfEf#JSyDs(EKE+f%zoqB8He
z_o{7#{ZKV79S5JQ`bCa+#h<WR7u^M}&pFq1{7xyiNIt1pJL2q*Z;-UhG}N~n+jBpv
zDEaK(doztV*Q=yj$F6ny>b;-`!F}rd)n8Vpj4^6`Qg*%6E1<5^8xUtd{k5A~52Y6L
zeRRaxpOea;LC^_pknz^7Lnc`JGG?E;FZ>?tz3MscPl?<39F+NG8rBI1oITzHFRLJ9
zE;6UMg}#-z9Yj3KY!E+ps&@1f=W#%PuV{N~Pv^bWx^xtDR;uf@SE1vXE%9;r`6+ln
zoxAuQJgnZQQTjdcbEsNpwj$1cMq9~v<<_MGtUc{J;eMqT^_|G}6o(+b_s89jUJjk0
zIybV}>8s~ujzK4c@9A@U6jx!dpW7|Ak^F}b*Z*k3oW_AZDxS{}xATZPUpk*Sw|lak
z)b~jQ^fk8Uei4!WE?Hqr0z8a;OBaU=#5q4VsP?*Lla%XM?|EoMoc#=|{eh?1p38k#
z^2&X$+y>ZZsrz4FLf@;#hksxnQE{lSS^Qj}_RX3T=Q!x~%;Us4Zs8-6Z|?cT=a`e9
zF?{d9b~M5w)*&9)AaVNz{DCc^lS03K3_Jj?zZa|*bJ7VPlQ__2n*A&!&VK6ewO>x0
z<DaSOwFf#;tlJ@D&N}^qqVMMMg71r;NxXmc9RA=z;<kO2en)4ozAthdbMimZNc4-K
zzN?7ae8YVow|}w&_Axv!KN02r0{sy7XPp1VJ`jI$t9hgvcpv3wSK{1`{%2$!bmzcB
z;1Sg>Gp!E27ecKE*2CVX?spu9eMGGT|FE2%t5xG+>8;{VBvbm`Ld2&kxKBNI86?jA
zC8F|w0^3uZleHzzZe6+;I)NWVe{e1-;5*_Re?7ik0N3wB_|xfpp1z(h`k|B?!2Xgu
zr%{<W=YKqrZ41nY2HQ`Z^T)5+>yqX4JyUAkQhuB0d(TO|b|5~jz+;7F-O>$wDshfa
zRO!5JIXyR_)};qohvJ~uCud>5M2%www~KyLl@Ila+x}i!{8<ElW`alXJrq(4WA;F&
zq#E~rCeHrEx`~dAHuTtITzF8OpQ#0IK1v^#x`X@Gx?(DG&Xbbji90v)F6@)){UrY-
z&UvD*yN?s+cHFAslk+37kN1}A^vTG#n~8I|-g#24d(LhIac-}sY9DN=<(cM?>Mz^D
zJE--~3Dze+J+ownT95VKW$=Vrx0n4`?7dh&Tt<FogU2q(eoJrgcEq`TgLqG4RkZJm
zun(zy=69KseH7=z-F{sH_EB|T@^|7K&oe5XrFMv)$&=E)aL3dk&i0kmx}`tcQ@QcH
zGQL&8A1sAV=rw7VM!5c34<1$LJrcyZzR@2<ry;KUy6hBx^!sX_A<pf1M&<K-;+#MF
z`<<5&x959bt>>y_Bf8S+(Dxjve)=Ec+`iu4;*ZPQlh&TD_tm<(z%GeTQpKk>aqAE6
zA6-K`HYU#cq2{vE?hD?->d?N2y55`y9#P}WI?Ks_HSZjPegxm&9YepkLfnh8uK!Wu
z6Y<A~bLo}w%bMUp+#h#&`vh^Wm-nQ!?^d*L57-;^o`eW-n{Ud`ciEoWJ(fqtpO?|@
zm!K0qBJr6He+vIc{7k5Pu1cKq;mRYj&z}MN4wlpTPt||NvJRE&TOi}ZN!Y&web0l^
zuid)qOX6H#eZBK7aWDS%`X85^eizZd7g<+!x5Oc=@~u8`&W8xjKd(i5b+q<$KV7XS
zC&50d-oLRJJfQZ|wnHaN-CsTp``|-zefk&TpW{=iFZV~hkJtHs2XpdMzvsUexL(gR
zC(i9|)HpK|e2U74#l$%dsl^R!IA6fN>jrRdBN>+tgC7JBZc5)LKL;N2Nq)|7^THnX
zrwFZ|AF=vKThq#N`hIZLf5%%+*EMQ=wSu_qzt5!4n;$c$xW%!r<v~)MhQ3$LFNODt
zz5fTvw_`}$8q6sU0rj4}Uf`kkB|dJS^LcQey53nwoZBmYQuIf_pAE2&tA2EZxa~)3
zy_a*J_!GtVz&;M03d}iAmPwwsN1U4z=XRg2;`tnOVspg*acIXm;2w1yy$L!o)qlUW
z_Hg359~Yq$#(nU9_+8%p@E_+1-MaC1=G<PB#Q%F>?<darpTv38YUsZUp%Yf$Z@3;j
z-b?b+jh~+q=XkDG{qDTeIVSdQoGJd9_~}*WShARNobi2s_ruTr#5vAA)p+v%sCySM
z$+D_UG;Hur1A?V8il|UL#aDNp6ZxF+Vnt;}Rs|KA8DwNuHw}6!GU8-rbjFK#WL5Sc
z)(p<)FnX~>1-*(EBOl_3Gvy0W)G5V5Mo~+A3=fr7K@h>)Dk?At_g{~FVxN6>L{>{x
ze=~dv-79zOv-jF-ulHK}yBH2V$Ke}4t>gd58UH6VzlBHhkN?DQ(BW?ieqN}5_`=`O
zb!wfpq9->GILn1UPv6G^;Iz)~ari*2^V~Qm^l9dKg^r(b=;!AY{t^fNL7k_f{cO!V
zpMOKwsd=hKe&qqD^*qU4kB*;l;*2jRIMIp5JB+{XEr7FJ$jdptP3ieJ74FK@2LUH~
zxa!=m&-q<h&qo|O|4P8=yAM4@_>A#OKab#auU{eGHT!Mr_!IZac|Ae@a8>7jopbK*
z0G!r;Rr7+({C}Y1FFAJ8Cux2|9M=Cn{++ab#B=q%whQ{VXEGe|AP3Lu3`bq6qtEmK
zr}aFTPuqR}7I4DDDUEACN$3A@f@6H;gEIa=@&D^O|HmEr|Cr8y^{mJRQ-AS=zbErQ
z^zEYW8GrcK5FGQn?`kb8{IcWM+6A1}@7DMAbpETaDDcX^M%VvSW*&#n|B-=fT<@>+
z93S=jvL4rO_B6nWuI_dC%$knB{2BS~RsC*F$6s~i+P4DE_}6!1jUV#WG!O89;SQn0
zAJ%z(Md#Uf^zgr+@pLYqO4H|K{y^#9(QjuM4te3w=a$0X;Q0ApqVVr==<|mGr}ex2
z(%%3$t^eBZ$@%^dUH`jjJm4qaFYte(c>bn8l<}t?BlqadI{rHdj`<(@C6Qm|-S9W)
z_{&chJ?i6+lOMcI$LF3Z<6ouod_dtdj-LEQ|5NaD!olY};7o^(ef=T>clz@j>O9vR
zJii8T!q3dUtn=l%p4aI-H?)3A(UAKcg+JuH5ArddANHejE>C%vUmyN@z-j%b{#NMF
z_$w-a^L=@qtmjDW=z9PsdVafOr+*8>zdX0%#BE<kaNUdjKkw1;Lx-+Dt?*e#|9s}V
zg&r!JuWS5ceZWaxyw2gz-%sPQFE{_{^*kPWyK}#OUFZ3TqX&KzaK10ElKUmnGnae9
zdt^NqenRl}Dm|EGz=@uFnvWsUGneZV9P2sxG@-Z4I{w>e9>V{Tz#EFBD+<4%eYSOY
z?k4~z{6EQ&Q#TZTuT$UjHwu53V}E_|d-dF%c<a9goaR62%yXZPpLtc9Z+r`n|MJ{P
z=N!LZ;deUvz}s}5>rVat9|KN!aO*)np!0lO>FQ3!&!hfG@O)DH2t7*S=M;WT^V!~_
zfA|iCUsk`<OLhEv6@KN3!cU%~@H_rk*5m4pj|H60@ruT49;@TO8gRnfoeo`<8ICx(
zLsv%%|2{|lzE<bC=J+w*4mf@H#J14MU+8z=r|<{WAMy<f|CGW{eoy+mp86+>H>d7t
z8*sWu-|F0>BN~tIUe`Kk<8OHd;B;Q@J4&z8@wvZ|@BX6_?$0ayA;sI*>N?*GIDPjG
z4*h)Y`viW&;R9a_ILiy|WAJr4PaSZ!i?0<J@)`Z^f7kKX)zA4<9sf}pkM+C$@W0pb
z7c`&YX*&K{56XA1YyQ~N75+THY5gZ1Iv*;0-{FTppz~b$jL_R#b)H`*IM&m9nVhe&
zzaCV0<x}E^e5vm1Qwo3ZeZrqlsebi^e=76bI4kp*{Hs9Wm%m-~E7RZd4B)i>7ddp;
zQuq@cx_zb2llykzgr@(%8x7ofH}Ka8PV!yx28!YTd>C-TznkCwF&*F2_!K-8{GTWP
znaq<@J^xVQ&nGy|lNUVn9wUEf>iFw_C;CHG@%AbmUwO5x-}KG<Nx+#N9QZHmcsDQc
z4;6mWsdxA!;DpZ^=N^5<pA#H?tDO3m0^mfqC;pqP^G|eNU;BPpkK4~`5pbIStV37#
z>G%u3BIj=UJ2e0&{9Jx{nh#vzd9WUhU*D<U{Rti4(|Tk@TkdxWj&<HpJJ;y>pVE1r
z<j9LJ`+$zuepwqj|2e=34-Yx~?ZrC&@|6OQ{Bs@upA~*Z^P0@P`(uLBz8pWv9e*M7
zUw7<_X8}%lzO3sv`>OJI)U!BzW&}9xcSie9K1$c~S{?srN56WT!k^^my&qQib%(!w
z=?CR}FaKwuLz7qkEWr7`oDu%;ZTj79f@8lYoP6wy3cujk#XqF*t4?0)TNPee75}Yy
zU*>&)GaYJQR)e1tACh%m`9aZSeq0gp#R_-x`kn$ft@DPH_fjM{@Oka^LgzoN`x*gG
ze!pir{SRNQ^SJir&oEqnZ2iwWc^>4G|G)w--O%$#be@x+5W0GzuK$j|lza3c=Uko*
zIPrm*uUO{Ujh=ZI;EaE#-lVAWtT^;=sN+vLezPCc@z<U4*A@PLXFY#T^AO#Jg10y7
z!94TBvQGE?yRQM9#^3l+8Gn!BXx)r=-Zgunj=!dUw$Ib?M}QNb`6Gw_Usd=8cl`=~
zqqF|M()rzY0CO;$Y5q4m_wG4>)B1BiAnU(d_gmue@Jl%H)|b+FoY&R5j5qqpD*>l-
zyrTIt|4!$5wT?fbb#42)-#765U!HrSbANwB$DjIp;h#pI`D-(e<In%1kH~r+e90Qm
zEVnzC`+C4xzWh$we^LaT`Jv+<*wgWDetS#jcl%$xOvm4NYTAGD{W|`_bA_Ib-{6|U
zdtV~=TG5kxx6b4C7ka;rf9S6Yysv++Klr4=b6SUM=6TvjWncS_ow5ozo$uA}mU)l~
z$N%|WzzIL6zFFk>AFDp|8XbS*H{^W3QpdlQ=SQ8mqqlri=Q;5j;ctDt?|-l3-{r{V
zC;zpq=kkxs{IAdr-0}a(dU~HH=Wg`O#{y3D=DyGPG{Aj!?l<T>m!FV6m$M3Y`!-i;
ze!|bM%6beRJEZv`H?KSXhwrEHSkDEA&-^-%M}CE)Cwx@L&nP{c^ZJ5^WS&#{Zi0a?
z5ghAz=+Q#YKc)D;q~ot@{>+ZTU!mh4{9Rf9eG2~>9p6)X*7z!aU*YaMO&<cB_WLe}
zejfifvd-S0rSId6!e^ZKh+hae@tGfV_{?P;f6aNP?DYi4ek=M;!GW&-g9^X$#UhuD
zUi$dI)$wYFzDCF21vq_o#i6%th9l4I=#@P3s_LsFGmrK=`d2#7RlrZ`KimKF78;Lp
zcl$E^9?y^WeH{M$X&pb~=nqf-JH2-YX+7j?0r$~!0dS`O4-|Mg#xMOsg<p5_Qoe=Y
zz-R9%LJv*d#1GN@INuYFo_1a5Ipz4@-p}I^PjTeYV?QSAclDDm0G!r&!l}Pl)bS^O
zSoq9yl|Os~jrY*`OKCi<-_aZYE8u(|9X<A)I#1=%vR`wLK1Oh?r>A~bsLlMJFZp{}
z|H=0WAAXDC=c$07$iYAF^gVkv;55%ePJCfa=W*YIzL&;h{g<_GlF{2=s_<*7-x~N^
zbe_xa7J2cDkCp}fDvj5*+5huLfD@g6+&Py|>-;y~DEGzaQS<*G>woaq<o+7`Tp&2s
zc|r3VjbG(k04F?ObK(&{qVrtPcLB^izs}=fe>rme6FUB?<Ilef0-f-A!SSCgEBwk|
zr`LI)@EOPMy8<|^^Ft25`dJ!}eYtfR|5N9==IH02)bW>pQTVOlt6%wX#hX)Kd>(N6
z?q%n@M>HPay`uSJU#5rl79J0Oiz7GR$Mf4KEC2i%9sd!BpM3QvXg#^yweKjTesqrD
z_-^k{1#Z^=9~6G#?Q$=^K@aRl6@KAGLMLXn>pVa1?~kSN|3Scs-k#{_nUDRX;OC*g
zk$e3fO}u_8;57bY&ic<X9Q?<jt35N%W?Ei<ry1|)pKsRrzsH&Xy$XM(;}^Q~f5|!@
z)cm3&)pMRraNsRBD|j&Z2_+rhbLh4YIO$97d+7g0$5&pPmXB8ze!_|SycKYwpC>u`
z=LdDX`~KjgKPBrwsr^e$ee0JgyyDoQrxbqOS^r4kSJkgBtIg$J2{@hCFFJVpRUMym
z>JUGy<6r0SpU?j%S?A>!3LiUEggygs!rSHV6FNDo{Qs+HJl&Taq0h%DRhM+06Pow3
zr|_E2<NE7bfYbWl?$GC}b)G9u-N7#@yr=Jc$?4~E?<6?ZbK#sohWg!4>ipm1=yfMQ
zE#G~EqvxCjoc4R;m(ucmTgSh|S!WM$;)j)=5&mP&<vR$D^;EPE{Ofc*KgRRG?soXk
zZv#$p^Px8hJ{MGu{Ml!eZk>M7e+f9twe7S$@&&m&1n$Nu6#k&ohw2`{iJtFt{80}m
ze9?(pz1EC(=>M$>pK;{-p8-zmJn{buegvy{C*^Yh$9^wso#}HFKVJ_x(Zh2bJlAPF
z=DGTAp~HFQ0~d9k8#<3U_g^sc|A25v!_WUx;U}&Moye+k_~sosrgNtr`AZdkRo|C?
znbQB$0jG8Tnxn5ikKs?s^_+LNdkhEvbnyI2o##$xo}bb2CmsIz9>AH-bFy-iPy6&o
z2|RbF?CVDrpH+hEKimIvpyRJ-ee^r@kKYA2(Zib^KK~}32XPH2Pw)2)T=OpUSaKic
zdC;HM+1C>u4LsxAPyLdd%lE2Wdl%reuZOM+{hRapG>yl(UvS<v_}tH>ec^sN`N>}m
zIIX|utbc>XWB!x+o}=NzBZXg6JLSY3f{54X{8#@`<l1|6V?V3#+>+e8pZr`I|0jUc
zI#2wV@K2NP_m?`pr}Zzo-Q4H>U$UN=b8?O+7603S)B4};#2=3Ur}?k{hS=$^)%CoL
z;MmvoNbpwH54_pTul=;;*5q=3N^s0`@&m%D%zEzl7xLYAIrsNjfHR(*@n;o&{n0Y>
z&*?<pr0~m4nICRa{?BE=Y5iCA{r4B@J^FtDr+f4+M;`sO&U3+eXYIW@{+W(G_8FQV
zc&Ipb$JgE|c<!k`<h;Q%!LgnTKPLEpsjl-I0jKr6(a}#{rtmu*J^9BK{w`-dzpeAn
zICjU!6n>>A=T+17e9mKJoiiVh`}+ok-vv0W=VJ~(tO8E>>@5g=n*5)(j^A?hfmhS~
z7=QU&gnqUa|3A*-5!Z0yu)n4AJkgow6M)lq-Tt4s$IARC9Y4u46n;wU`;48ks&Mx`
zzSzw3HleGIuJe+@FFXANewg4m_nz{xI~8xg1vstK%^&!EohRqmBcA}A?%i`7`}&!W
zlliY{AC(s>ovZ^+<F7b*DFYgh?>^-C4Sqn!zuoaiy-ne7cE0-|g`afBfBxsm{MSwk
zeqMQpZ1}4P4m{lWdZC|}DIOxgS+8*3NqjZnL|2b?`13F5JP$g3FaMn2I;Z_VpJF)T
z6pp^}jK|A5AAF~*)5K2~366DM)_z~l)yW1r{vk*2{Xrc+<K&;e4RBijS%*G9rt{oz
z_|=o1p!Dz;vYvM<-#!UA-;37^A86_M&H>Kn>*zTzVmR_Yo$r3T!mm4e=FjQ;uD{~9
z6#n3Er1|j2b)E~3UV8HLWxp+lFP#FM=-llOa1L<V*C_`NZ5@BXc~|8D8jo|iFe`X4
zd7=MB;km~OUEQVlc@N-(pHq%L^9h~b)&Cy_!OHWipU2$OXBzm=%KWPK=AH*Q?aR%h
zc)r5j_fK0i561VL{MG-c@au09Je&OFpH%qO*A$onW!HFLUgvk;|NR3R5ByyFF<IwD
z#lt_Dd7OChSAT)5=ZfR6SSL8<IjQ-dPgeSWK*wKp&g+K&Cp@_E$2Thc!Pg5vH-7s!
z0e({FwEyS#bpDh29vR#w{GWdWobJ)A!v~)Hg#v$$BiEh_IN`bC^f7BH{Q5^FUh~bW
zpZt4;-}n)Me~m8c`vGS?uqc@S66N#%sN>zZ#^b+8=<N*-pZwQ=pVT?+|G5`%TIb3A
zw0-o=JP+QVa^&gjXgtnk<}c)Yzgs7}0XVJm!Cx*gWJU4zubwFLyLQ3z36Ak@KcANX
zPWR<qj=cC$hQm+m@RN4|PUn94`Dr`!L7k`K*pH9?m$IH48i#$kuK(!@Kc#)S&AFEp
ze%YyyYb(6+D50zWWX@6HSAKJuXEuJ$?*p9Z=S2>`daI7V=J*3YOmN_*r+!Pr2Ojgq
zg6A21r(Ve+cNTD(|0NE69w_`dn>}97lTONp|0CdZ?hiR}ik~7lo#S`PISN(ea(}Gj
zul=Fm+2rT^gU)}-IhSXAiLB=x&b=rCPTzI&Nxl(qI!E{2=LU_Z^%#GJ($#mHd7Qe)
zpI3NJ>$!Az?oR+G{Jh;+&mB)9IQ$Hb{_re?&p7ZB;MB(~r+M$jzug6#*ZF$67w;7`
z=Uzu}?5pS0cf3R4xt9orO@7_`0jG6dcl7)xe5t^<ob}H#9Q7uS9dw_Lzv}4W-wZgd
z^M=+_e^l4`-*ui3Idc4iG#=}B>;68i^E{|}xT*=cr$1TFF?XNHk$aUumjI`I^&I+q
z5#Y2Qw=eOJ>O41`{-tlyd9FG8dcVT|gTt>L^JQ|rR~-C&9pLoct6wC1_1TJt3k1i0
zuQ+{szE#Jcc$|!1P{R6oz?uKlgb#eL@`2v~oc25Gtn+UfjyfI3?*5lwF5f+Mukejs
zoqrZ^ny2O9?H(FW>({;qtGfPg2b|V(>V&N4Z=WcCc(sntJz40}tn*hCenIQGjGyEa
zG!N#_DWCZ&o&ULCA?wV2p4eqzv;3d?0H^Oh_%@;E=jplEbo@0Z?)D0WpK$tVyve{F
z|G@9){3rjH?CU#q{&y+dt*iZ@&XaTc=RE!?vi=h_!Prmec|AknH|~-7|LbF9Uj>Ed
z{!ZxG@RL~KmFJ4Q{y|;Ow<`R?FAJS~<)dYumjh1tar-cQ7vOZh6-O@ooQ}V|DE#&w
zo&T3~{5u@|_9HsJqJ8fR^4Z+S04F+`(LCfADf}r<6+B$gJPp%N<}||}mpl1#GlTr#
zEa1c^@6>+CMjj1xo*N^fhrZ7LZ_WG~mp9{IM{um?#w%q0uhj7$1f1}5?M*^IZ~PbX
zhrd?%iSHGA#Mrt2p!1))DBo@9JdZvp^Iv_N!1wjQ<_V7ZXI>`fBBz<ly-4A1e#>Qr
zUw)^IH2T1gDE#`*%6iPZU2j(Si6_bW^|QJ608Zz7)j8igo+j&gP~)?v&*T>aPU9Dy
z`~H8^c;M}NB<uej{lJR=C%j$%VWDU9e#b722mHo+WIbm7BZWVBUG`<x`C5fvar`j9
zrtr$Mh0ZVOefdj;&)g~W`JeQ@e9qH#o!=yU<|TSA&jp<J^%4gU5sk+>A9TL^G9Hh9
z!jAm?FFMah969*gI**%Q_kM+6cKW(J<{64-M_w!`{Fx4XOX1()eD@G=TK{G3V`lEr
zj{r`3!ddMTYxIQQr13b%tG^}mY4VN#j^~FS;>d+(JyXut&9DACz-b=0kIlU_9`M}H
z7Z~#o=mEb2aJs)I9lzz@&^%cG$#<mb;mOaE@8(X+`MyW_{8ua7eP=KaIL$NT(Eq(U
z-hJ;Z*73b>OMmw}6<*Q0W~9UMe|`aQqMt8!^vn<G_}m`|-!}Epk9)R^KkJ<1*8xuV
zW#$`&o=yMr+})!=|L#_|UTfXG*X{1L;=A{|?fC9m^MTrMaBy_@pkKec)jDk7-RpPn
zkL$z1-Suv}*KEc8uj>!*ZZ&uAt_^A({;#OkqKSsx{$2HYE;>`*I9(`5(O|R_l_DDD
z47hK7K4N$|nAyCq+I%1m%iX=={kdrTm&ImhFsyazaWPl#4+q21?(SWfJ9^&oW>hX!
zx1uQ5i2Lzgb1;ni(XbuWTis4P$az3f)aXWgt?o{(6*aKPK~x)E%F%9G@i1=OHNUiw
z$IPBV(QdQTjB5RU?I?;n!~Rijw_j_=QDf9@AK@p~zeN~1Obxx22=4V`nxD{nE2pj1
zht0#<u-WbK!HuyWEb*~52T>!|L2;vKpHCR_2<y+$ZmYI8DBjWqHsYPp-d?k_=dLd-
zfUM5M!|JeBKPa{<?fWbD@7&T)@750D2qaiPpl{zb+!+k+vgk9I9S?|xE$n;hA8+NU
zF`jQVDaid8xOkpzkD}dfU$2j|%cT}>U)0^D>+Ky8&NPlXwRW>Ew?n^?LRA<f%wyt*
z@`2u7x!irG*l5rk#?c`-Q@4L>iLTiJOR2{_Vioq;gIo1zAyK{CJ4z^`9Lx^twN9hi
z2fW>@^?iTYZMHC{^_O}5&HQZkSJvP3chP<6Z}Y$R<60wXc7cJ)VP!oCz;J8e5#Xl%
zV8Qwq8B=VxL80X!C=8b`)e6J-Qgc{26i{{89}N4;`*HmMQ`IZy>eVokKQ+7cVJjNe
zc3QCj_^Z8ctF_z6es<CRtPCpZXGfi0w>ju`z^b#qo3(#8Y!iWCr5F2j!Vi0gA)$7c
z(Aw^HDu>M-?57gsqv)^}F~JUNd;Ed$V!zhw#f^yeL>rf-Cg4FF6F!cbW;89Q4dUnk
zb1ueHTbDty4NOPBd-y2J_xhm3@kS<XBX70=2{?%LOcNk~9m_qZyTc!7Is0A6P2WD2
z?4!_8vJsce>5LMXMX0mMQSbG;qh1zODdOZY@`1VW59yX2hh)BrL-(h>+F)?;SkmM{
z`cs3xy%w?iA)#6B#k@uQ&;+0iF(IVD16((OW-c`0$A6XQtxvYn-(0|59Cll)%}a$&
z!`w)$s6tm#2yuZ0?{J@QB7WUB_jp0pONv6X*22OXys|~>Yu#Q<oC<r){E4QcKglMS
ztRFF5C4UOX_anvjDwT5%6G~kP{2ugHtmc`qWG4{<x~Vy6>EW!!rK5H{tTk%GTGX!f
z<gaG=?x|!}8P3=QFRPrNP1ZS^oyfn^3G?u37E8&IEHhUsSrUfvoe!5=ajjq7uQj?C
z*Wz}!f3(`|FLyESlIS?XzLyTp;A$+B{E6<nFPw`YSQB9t4KCJtb@@#(nD58EZXfz>
zbg|a&K!u`Tl!L`JsOkr$!FFdb>dAn^;sUc8rWc@g`f`gwI7W9&EHhCX!};+>Nk6l<
zSv)KTa~Era2nI(Bi|RtPzoq(GtJxV{y46}yyH>yTQ<AX+Nu<jqV$Ks%0BrNgvmVrF
z4UQPOL|FDA%h2MIs!cewTd7;Nbs@KH-xr%~Yuw5;i(=t15*EOqAb45r5Y*mUMX}c%
zG@0a5s{e4%hz{Z-R(TzxX+i&XYnLGx-PPO!0Y}6aNz#pN?m4=fr(steLD%bcTh3os
z;z7ON+=&Z~Mn4`5pg4-SuqM!{_ep)~IC~6d;(i|%LmR)X?Zwd!EcX~1y0wBZkRdt^
zQS=U(aj7x+%rsGZ1a_DnHM_}NuGBe%#A{T#d{Zj5W*^JOe^N8fW@f~~Kp$+?B=`xI
zs?hJ_MyXSzeaQUFL9w!4jo_}}&#e$ncZ&J;fVrx{OqiEH7w9r0a22mL2kqLhz7GS{
zm5rwvEpekt7UovJ21f@zu$26iBv3IB<0yg+1=ohnX>a+L^04HY2g$c?<^AjrTP|_V
zRQlZ^Ot&+&o){(ug;4uhy{WN*QY7Ix{u2`}D@Cg{IHDRw5N5L)L`NhI$HV4D_B4kq
ze>%06GtJ^gr`)|*jt}D&qZ{9H77^xQ^VVT|kZ|3_*+gK*J6%}$sRb?2hgO;~L+=Y=
zV({AcVGXw8sMCdEF&y<f6J&9+;PEuYrOpVREF8q(mU4AyiQ>kv(X^zzUNr{T(+|Me
zb}J8_RNIV4VTN^$h_sN@RSxFY;{IN|#d<An#c+Tt=UBD!M^%eX*9LKs)zt)(!6K-*
z5s7X*V0GSgS-@vhsMkmBQ47aTw~xO=HxFki|C%BuVCfRnO1c`*T8tF2@EFdFItQKZ
zMOe0pN{%`0NkY0@&axB%w-nt_=!j#(otMFS{$OKe^L(?5UE|NG)dh2;3F;4{`Y_F5
zgZTr#M&b`y2O~=Vc;TQr{x$JDXX~N;^YhJGxw+G?^^a1Mgn4Js>Zk+Hq_fU2Z?Ci0
z!nNG*w%`Q5wL~kY=rF8shUf+qlA^%0$2y$h+pkoht}17vfx6RlkWtoC#4<1604>%J
zh}3KFgd17>ux~`T;H)P~A<m}bMJ96c;<EypU1GvwgGJh@EOV1x@HkND_knGC-b%{8
zWlAG6bRuOC*WLENjP>!)e%<!G>5tqpb7Jx*9syJham%A&Jh(;S2#}I^E!Aar_ZW~+
zq+Z9AI4Fl9qX7b5;!CV?LrG-b;tUmcK{Ax7yk4vC3paBIx4IXj7U>5%aY{pV2G2W-
zaPOBhd?7u2yb%Aw^E00D)-}5oyl;$e@tIl&AOJ7+gW0`k*c}4-ZFpVO2uN=>nA;x2
zeJDuFILJi9cg8GP3f+`(;Y^8?F`S?^pU9f@gkiAL>_Eb>(hOY+XADI^3=n5zh|p#o
z0@E|}K3DYs|F6KDp<v}Q&Dj9MTQm^xZ|4Wb6j%dpNLd~AV>t9kS0rl+I%{(W{*ZoL
zAN5IIgY-w8;Vn?Urko||Af$C6q(`cEw;#~Ce)jC{;e0Z@?Vr4b!LcbDqpw7W^?<x@
z`;HK5zeP@{DI3B^ntPpYKYj$ZlQ_PdJ2Q_vR_q*-RaeZn;!gHf*(1Pr5y&7H_mpWC
z;?7~S-|gI9GQVYFeaZp#VlWDh*`$*xZtMMpEE~cSZbLh2H;02;sU%I?s`J&v&31eD
zYMgnu_bVLJqe}+WJ1vfaut4s##`>te1EU5ZA6$f^{r#OC&)9i{3i@%Q!qY8x8*y<$
znA7=4n3tbG7^!C~5zV)G-`zSyA>98Q0J-g4h9o<GZ?_-EJA=j}_rb)m|H$C;k>EW8
zGDREke6q%$A>75`e!N0|z0JxfFJnlq(%XKT<4;BGt`aey*?BN~uoCaqMy(;Db@24w
zZUl@yE$raAtrR96PpG@MU&9%H3KC|A424JNl-a3ETZD)2_Ul;IHgB1gK1GZoDJ|NI
zk&}ox4qcceg)J?1W*a7ZEe#LI7Y#u~`CYB%A(9^b<Mj8pl@=Wz)@{H3kHqnaAS8mX
zgSFC@vg~CE$J^NHUP5e>qXH9b=y0*x-5qWY24zyyikIqf+=v@vWBRt9O&u9%eG2rY
zc2W+)<PS)yz~0J19uUe=qaA5(${aFjdz=dp?Tu=ENm!Xre#urb^5!^|CV|EsN=MV=
z7Mhb>^gC?Ynsl5nAeRk@m;Wag^I9gN*`r<hwWAvG`eZ9f%+$FO3v0Ebop=RnUe;YB
z^z+DI7s^bJ$RLGfcLy_Ol-CIs%(6EGQ4E>uHcKm3gpOG+Tu9(j5_!rV$YP@v!Nx<r
zC-Ud^8#v9KVkHOx<$r5-cDqO>%${Tz9EfvTLfMqaNq(E#yk9m`?ET(W!mu<HizIII
zC%ZkUdKbI>Rs;Kl8E@}4dwes9&fskK1~BWhhZKYy_3q0_X&}$JZ?k->5qFz7OPIH3
zv0Q@^sZGxp&e?;jaCp>OXTguaD2mO>VReRb)0{gM&Z+Pjj0R+#fv1cs6K*fM$J)sw
zcEaONKC;w2>Okari0ZRPa0>$GZu&3F@=P=#l%dd{g1JU>Kr+S12@{>9KO!SxGX@E7
zp}1jr2ku*w0w8}3S6kg~V-?w<7?6orBb82^*5n61&tBJ3yGWTHi_80n8`E!y=5!&D
zk>U;S_RDeYV4Q9s-<n-U-ZduSzj@X&OMh7ABy@@^6gSV|LkuiRLihMPlVZN|p?UpK
ziCuyX&!^_?urGZ!y^YCHVg5iEq_&x8Dp+vp#)dOhy>$&?6VPx5!*S4GCJWbUeFTsx
zcN59#N3<GfKbfPN5<k|=8k#N$R=SKv%7&+)W>{-cYU~l}gE}$JDmkn$7-u_fcDqB0
zgd+0Jflw<5bPMzYT9-}3!hBU_Oh_j<C^tI?ao>*7GlGIeFl+q57y4BUu5_!zZf|Qp
zzK585avpwRYrYsRB8!LOCzO*~Yw7-P^2hmk{c%OTebruEZ|*kh1xmHxECtVXo0pxW
zh;6Y1s-XRm<M<X}d$dF7=Kqcw&6<`Tp#bCHd{3(+KnD1CBs4=DY#~DzA4cXf^4j@V
zvn%*jzR>5ZUCyLe@Q<?>guoL?^cHW6T<1bQ*oN%14Ikny{>m*3#n@*e=~Ysi!-jzX
z*1MhcQL6>CSCW$mX2(#@*`pjjO`Y4|Bmj;emV*VEV|lf7y4EO>(O^w7XMuT^_M~Z+
zQcKz7>@RDg1znB&VcLSiKxWz)4L5dABR1F=lu)i{%{6EJY3icFP^Kz(u`@}NJYQ>#
zVr#B>8-7!h*~yL0!shM2O`j|;D}w~B4VP<!;RY<pUBuoihwbqZdX6icl^?D&5BqW3
z{K{EJXyCGjNoQu}T95Z&W|-ehpKHSLhjX$llKFda-=3?K>}QUVUo4`42>NE=tP+f5
zE8;)+wypD@FBh03*1N+Bf)*&@v=1{hf1Zx}dBweETG@v-ri^gbWMbKWzp06<`_0{<
zMWbnqLa;l3pK?l{y%K1%4<rhYZjl3-vY?~@x)WbSSe?p=Z;fxtUa%Y#s+Q}QG}#<=
zMQ7P;5ynJkGd{Cmb__k{q-2s6Dbb`^II{ss%Y*TB;jHiPQ%(yS(r3TEVld0=Atme_
zEQ4E-4r}9dNGVMFwZZ<aKdxkIxGs>{G-?|`JC9=W1|_W)z|(hOQBbiHiaT(uwVmc+
zKt-GN7An&so`Sp^#TIJ?L36MY_i@b|tNm{KVf-9gKg|HeVga6D#wg{koW;hs$lf_U
zsvpFtw+vA{%IJv>q2e%NA}sJ0>jPuVsDD**$a=lpT}aU3|GF!X{|R$vmY1U#p3L4*
zs!s!y+mAYwoZg7+e-3kW3-5yZG9vp8W={%0_5L{wfYUEEX1CkDtBx;6(duSltr!*8
zSI|*F+6(AEJDz}G7G?lS1e<$Q5Rcr{X64)=8H3vHU}JT)THK1Z3a87(2yF;37Tyvv
z9irM0NW}rL&jueM^($!=08gY1z?l9JXO=dEf~C}ib^IHW*4brIS%n<I{}tJNvGRiT
z!Wy17&P*^X29!zH3}&8(S_y#_?@w%6!euxwap<is7eK5@9f#B_u!?=L2AM<4#$e6d
z8yum~4n^Sb8VsW*G!?>=#9#*pv>4b7G&ig%{CCuT0Dp%=4jA-L(?{+-&jKoJ*JO$S
zRl=MMr}>)!DqQG5sKjp<7J;=K3VTrCH`+M@jpc^THWUkZk>Vl!qppXoz1g(oQ>?5W
z-n0=~tj=Z~MqvP29@U4@egj92jvDA15e*(-izRK_E|oTxx5`m<V;i)$R=T&i8EqB|
zEBLSFa`AjI*Pwz%B?oJ7391c_P-_IU*K^`Bf2Fh$Ew2?SvH}-}NI?S*FTuCk=#Ib<
zvg-`;jC4w!q(nauUAx(Aa>YezKlz#Le(u>UZ55-f%_8oEXXaV?#TNc&d1Jkb5!3dG
zq6idZ`^O|?H;m&fCPJZe@*Tu|l=4s=?RtisV9(JWDUNW=N)YQNTBm#9UyeB%D#+~a
z?oJ=n>~x3CU8xtt$b`g^hfU{#e@BymEK3L%SHx+xjI1pRNyPoq?s`o2IOW%8-Qvig
zWP&@p0*WA3l@Cwjc(UkPEZ0zoK^`l$#k>6yRNioAy}Qv{tM#@!_)}u$7lWA<AdZc8
z+-TB`XOrJmrM}`2?EqV?+#qf<tMlU{IYZpu-X1X+mkH*_^itg_Wmg`CRl||aK-@9&
zWp7BO-pt`z4!cc+in8<SO{AN$y<WmySuSrLLjquLxDmUJS?9#?70z7q5puG@Ub7d6
z*<~y$=QtcPcSKyo$2Mk58JFUEcNt=?RB0CH;i@FY%7t&K>lw^c6X)&{u(Q!8?-h|r
zisBkAuJ(4I;6lU0T~wf{mLMuA8ot1ALx_!<fT-!_{Ln~1uG}+Xu;1Mo;U^s`)po>O
zXf{_x>2j$sHv<!CM#|b7C`$|$$Nx<JTx4LzWJ+yC?JaWNPscj$cViw0=6-Y@IuF-)
zfmg>Gt?<ocZgr1aD7csd9tYj}(o%Fd=y1-+G1tCNr@Tk<s3DBR!;(<L4VFY#t3?AN
zPZo2qVBP97#jP{NjbhHA9XE-Q-szn>b*5Z8y&Q#ig?G(mP!Q&|z@*v(BB(f=6aFa4
z`52_uTD3liExMQx0P(C5*4#mV2E;2pj7v@1?GF64-MtRzwXw>rgO2+c=@itzfNI&N
z9T2l!N*{HoYtbrI(ju~e3VYV5Mw7}3iFx^4_WXyy`gPGK&K9#ZoxyCS+XEA<AFNX2
zC=}Dus?>oR1__titabL{K^dY4%Ro0HQR$NZ=B>D*JI};{Bko<K&I;Z!q9O^s@(81c
zE#lK9SQyA#;^m#Mp<-6{ucTmNr=;rgA7%<r-Be@D;?`m>L1=usUKTT~W6eaCIW0gO
zN$iCfS1v|?u}}W(*t?&Q_sp!kg}P<4`3NZjDK<t#Xp!Kc2?K!G7k1!ktmanN>ZDgD
zR|mQx^vZhNBC92{-KBDKM}i|zCxQj>Khti<jW-<H3r)rVna)8-0=qCc-5gdC90lFG
zq#7145mOp&bq`|TpGyXi5t<ojg!=Nz=EfSSkA-cB_hRm_Hb|HdtK9LW9+XL_3X4+^
zi}J=cF0#;)-B1Ym6r8syJ-#RuGK@wjzs$9F1|7)Ni*+hb&gxWRGtp`61C2iRWz)^v
z8e4HBvNtYv(8eePmC`>osN)6O9zq|TVYfzS-ra#@LPf(4LN9XP-Af`$9V|)6I`0Za
z0glK#g9E3D-b2)KqtR`n>@Fv~jF;~Hv0TAny9c|}$DG4CVb0a{6;}0&&yTi>)vdMC
z3TZDXifxZNq<KP-@{?(zJRCy!?x9&`hGbMbT3uN!Z&%OqZk5plA+C85mOsZF$;Lqx
zlkG)IP$0U^@t5h6B2b;Zcpf`DQ(DQA%EIaMXV6z^pH#+RVe2BIX(*%urEB=J+zu%I
z8O*{J(yWi+2#exBi^x_hHoyxJyo7lLR!Gvtb8QoR;NT+FD5$%W@0>!R0&0E}T?CDT
zf5bKJGUZ&-ndc)3sn!{S{ge`ub#V+#l-_kz@NmVjV6-^5(g(c=pPaMDL#~wIg(;c<
zY+5G^NbZRju8+9zT!ku1n9eh&t9U=R0?oZ#^x!=(v_=P0IkP#z;1iNHnmAG6G~@zL
z4GOs>CN!{ZOz4aOr9KL{q){;)gSkUTKJEjDC@-)b1o<UNORpM64Gy5z1gf<mA>j08
zjzH2XjZBaDJ?`s+Dei?(WM`t*?Fv6-f;UnW>Nd5|Sz*(86%L1+T~&tN3!m6puAbgJ
zckHU6108d2#-_w=Li-<RH*_s@)nUM7#tKPF$Rmj9qSUD4R{8?n-MrBm`1h;~`N+tK
z)`W_(R1NEvi~6X2CrDo`b6fHR^MV=`aG?INdTR`d0cmfrFq?I1%E?obEQbM&n~pFa
zxqPHGLm~yqZu1bnSus`vu~KW?pAdU$O?ny6z+hb6SXUd`6+MZr(y2puW%zZdio5uK
zD$8r){aG!Qw~AzadZek&C~dd0GX@I@Hd&f=h66Y$>AXVRVPtD*4=(gsowj!fBHZ{I
zb5cX-+@b>NDZGxV9!}8~{nIB5yZz#bu!`m~^%y>F+UYqHI+y5J5MupR(4NG~SUobt
z8w$xL?P*H8OpDaOBhd}0ZCwr?Ve-6q(73=#Bstz|aLuY};KN9=<l)`{5_~d87=%Kb
z<;qrRO)LT|ll_XMxlr-zY0^wQ=6I;mn#p=?yS!DZ7OU0L#yXC5>QbQaz#o)d%D4;}
zG~XhM<N6_S1h}LrtPM!6U!<<Cz7Lw4fXEhoGV{UE<GvJkU%9ki44692nJoyLT#qlV
zaEy%j__#IBO)_$qWiG98vn=?dC@;2LJL-;xv+K~^iZGvo2>*wMj`(kE&*liNy&Bw@
zyQDq3GtP_43sNg?Y*5)Uh>B85Fwoos%LdA$1?^F%US&s6wYD3VYY!Ze39yRvBcVx;
zUay>^Tmd61Z9bpTloDg%xZs=FT6ENmTV#g|OR`+p>7OkMDXONqQmv4S53!9iYeiyE
zIBU8UI~41SGh1KWtX;f^`a&kUhf6V{CWEx#?Lp-~Y4Rzw@?V5XoS&-lg?oWacUv!T
z(7yodJ`#<=`HStk3g*UcEbh}pote&2=p()q*AFwwt2wwO!#2*waBC)=tumMh<`Nz%
zUZG-hLS|^-b(0#BXF|#PN*9eT<9C#D!>Z23Hgw<N65PW3eKaEQ+`T{!JMrYMZ`@Pd
zRH0!DD#K-zDwa1Yo6$YX8!N=Ce3w39coe#!!uUg&HBbSCkllk7@04vqupEX4t-7`I
z0}a?f!A8r~EaeBr|5<M!$_W|a^?w=zWD6)*gTKo+k6>qY1%F;$S*~ssXq0cihRNY}
z3^<}?bBfvkaXwZZY-pknCSE?6z{UbjT?quBF7>iP!xtyvxKFiGESxi<-#$gEdfslb
zFAg`uL8>U-)C<*v2{-7Mhqu9Ui{G!@EzL<n_%@OntnqCNg5;ko%PRfHmytRt9UC_R
zntR6v4xt`MKqPZF9ctcM+XRm*7nglBFuMt|J<W|EGy*V95+WxA3SSn1`8@a<1@j=u
z;n_ip4)|e7<0r+E;T{jY@yJ4&FRv8OSIQ+a#eC$KTsP&;!ka}??XzV8$3+j0tXhLY
z-t5QGc7~A2P*7oIp;Kt$vqC!|1PvwD+#7I#a0ZD%+7XrN^@$b~s4_2B_K&l<+s!>G
z%0S)E1h-nUH%Kg!JYKmIssd$6t<55<_&&XDk@Yskl}NH;u8B?C3gRqIo=ymPd@yrr
z?ZH;J&;da~(Ib~{b8plg4d|a>ufrilO#(AkPu7o%M+lfi7@f`y48+NWeZu{RKy#Li
z&<0q@r4LspDy2BC<0A;ekH9*}4|j>Zq&`%LLxcH~e9l=@nC8ufD41%FBgWl&XV_8|
z)16kn3%A%{*l2bm%|iff+ANYJhSYpH=A@oBtlZ+(q-bwKH)OgZwC&8D_BSTdmaj$=
z$`^t#Q6G6CPzh<D_Od+QW>*Dd9i*`f&+@VNwYP@{J>B0Y!GFpJ8x03U^hS#5TE~6n
zpt*o>Q%f{~rH7*ct%M%`ak#P)22gp1{q9j^{|Jt@T1#E`Y!J?`tT^DzUIJgVB(JW6
zZW|R3iMD8tVQws2?r9G9F&2S|eMBS&NHL-Q<DvuB2Y6EehvmIYYU5hYmXve~7Ug<#
zYDyK)a8RrpVvI{2(A-`$JBn-+bE4||kJ8+<!}g#Dzd`IA1WBrkI6V$5c>Dwjm6FR*
zuc_}sC*v?-1IQS7OgG0#OwQq`vI+BmP0AE~I7+nc5Q~IMpxdEChGj*q%F%t&==shS
zrnGfPG7357-5a8;73<7W!yJ@ZI`gfr*j<G5ieILReXZ9GEC+4~2fbpc!_DC0UE6g#
z!+XS0JeV`SA#p}mHaA$%GB4mzM+i-#wgX|{*^cOlBBRTj9u>kwR%pojCRE8vVRMbR
zR#pd739!plBe6IU*1U?@@hW^IVklAEF4>jZ{x7Jqx>UV#fo+fmRu6GF{N*yTURLNd
zw#roxp9scogC=ww^t**-hl`n7D0Mz1x@y3ui>R)9X2t+WgpBQtb2wSJ#2lHDiD2$Y
zA|23!=93hfqAEPjsY7FOb+<Uwn;4TCYfsq3*u6*E5+_#}u0#=DsZbdJD~avLPeL%Q
zkn&t>Rqk)*K#)kx8=_xOhO-+@P>d5iZB*ayU=ynUIm-+Z-(P*ulWv=?-X6*j%g{=q
zv?=6g^6GG(b`2YGKAV@DXAUHlbOB7kq-)~NNyS-;tA;ad{cm*QGElJ9tsV_pc#z3)
zegupyQH01f;z;)i{&&5Oq^@#G-a;0)CS^|Yst_(hbWwC9TYlR~25GVB=pzpKDhACx
z6F*7^40F(m<E#u1Og2yj`9RCnwZgs2Cd}oaTaz*)u3^UA=S~a4AZ+su83mkoRpANG
ztQ1dgpYgL1QQEAaqi3>IE*obrkk4xiOLUcq$WF>3U#Gdhr%o;3dvD=%DGKh&=S+s7
zb5RWz*gqM8`$gNZ`uvzpgj}zA*ri}x7Yx@&#O4Uv-0aRfB$=z(C5naNCCuAJeUkjZ
z!j~3ilggn6YaB51X9z|w510F>?!n`B3g%_fzg)!4Y<G50uhWVvb&uaSn9;y~2`L-6
zU*x9rD?(!Ja1aeC&uX9=3FF6af;<`)Q(VR4>%$`rl6NXGY4Q}}csHt*!m_xq)0`nx
z)1^Ixp|#tDH+vVSLToREafGBe4do^lhGT`RQ4&bqV--^>$)LhlT=b?cQ+2gmIK!5N
zefrH^)_#!fhx)urITFiR6^(h(ez(iPN0&J(|E3*d`!N1E9E5p{Lr5~cb~*LfM@@O<
z?n5U5d@LG0TMJOSizkaX7iy6*9w^(N;^3aTbQGWRDK5VNw$w^-m9)i;ZI2p*5InDe
zOkOYwsy_&3HaaUv!J)<Y>h1%dYkftmm9Z|2gg?q!1&-9H=6__XI(up<r^6B@zi=hK
zXe4P}rn+**pAzHsw@yw=y%|@lBf%VZSs0|;Wv7oQpi`p!EC~vvE{b88<d>`#Nnb2i
z$s;?B4`y8YzywE+W~c&L44q&B+4!v^EB8#R!qZ<LhyILT67e5e=mXiqm`koSC|d8b
zC_<D<A$})V-_#L2^W)DlyMsz#DzzaVMBwy}iP;@SQh@zq8NOs-Sh&j}I4HuK3o9#<
zKbTo9m6>%%tp;9~EbPL!RpEAd9zEEOx<D^&NvQ^=4+7Ft_1`PLn;0Ntp@uJX_OzgR
z9bA`?&#Q46oa4hHG*2Lsgqc$p4cNKRRHo6=WVdNvS%qC_Ad$M_`q{!daedSjK;O#*
zPcm##oy~SEDWv7w+!r(j?sy=wN7Qn1%=lNIY?75ebmbv)5t+th>=_d((Hx<yj3aTu
z!P(~CK5WrqheMdo688E`G*dAKCmTN(mGP;oJ=elkRJ}+{J73%|Wz)d;Mx6p=I|wuV
zCKLz>e~5+?&fAbYKFV*RP4KIlVS|12n>z-nbbC0F*Gq~T1cvd0_pVT|)PC`(OUY+8
zCXFcpE}!)*TgV+j8{)XB<QRHv_0U9jnw{QgNS-|)UmjIWtf-XMtdY`lD`(D~<`5=Z
z@@X1n2dL*r!n{bw)oO%)ApPciaTA98r0BXqDqS_;-$I(bBvj{{Xuyjju~Lx3pbT^@
z7?T2>gt#eAjO=;LJa#Jiu$(s7j04|7b27DAF!H#%6BtACb(3!~I6iGiBF<Q6ZI>K$
zs-)RCa9l2Dp?>&@(5w)Xak1EajheP<a#^CNO6V#CWvz$VT#aw+q96n!fCT<+=n@n@
zA#ZUi)=2Xq(_*~Msl!PuO3gx&VVJ_n9C;9+qQfWN-$g<7F({!^)PgM+7a|@~>gI8i
z8Gk@>+0$3VD>M??kOS{?b)sB+gu=YsLR<@Mn}T$MdFEp{Dt8CsUfiWzOQg~c@Ti?6
zpFxsZO9_K=`TSZEP=sLScr$r-MJ??#Hg-`x4pu={61k2&7tGKYZUG?_43<k1VWD6=
zYc%2v!XIv`Zos7ZO#9X@SU`vJpfZGN=@9qB^w8&Ed<wP`_NpQ+*o3`O+?TP)qR{3R
zD8iv83EoqZG!5|})DZymTqjLCTV0`)felhrkD)MPisXRhkWRV)XlI0XZL!r6N;z<D
z`bunuC>(3ZBTQtS4lRXZ<eR(B+>wf5P@dPIB!byRcF)_mK^{7#qa#W*onm$<>2%sL
zw~1$GjyOnpU&JJG*i|^Q2|gS5C6%31=1c{}_z45fTtUbj>L`2J+VU)Bl4dj~PBS=N
z8|*ZrqRX3cw?XHyUJ%0$ZQOt=5HHr49hJkezt<x@JtpH)H6BvI@YrB<&Ppw2Sk-d#
zy~s`2&MwuqGKR4t<f>&64W(M1cDHO|Ekhw)pTkgS_(RE!w%P2ZBVC(lIH(k+>+#|d
zJ;;>(E*_$<p?9X7UN8r$FNh_?6*};GW}9)Fct7Wx;E|=u<{IiEh)$=I*f`5_0uyEL
z!i3A2pi;|LT&amiJjKi7Ru@xtUS80YV#p?YT)aY@DO5!v{8|;hN<tWM6CcZQ)1<ZR
z<hW&zJ^c16tZ)SAvr})Q+?ai2ZIi&pv8fbUOPW^pB;DMUMUIXj|B4-@9%+)Gl%x2h
z9;mm+Gd7Vty%B|*ryn%k2<I+hau7(3V=3e5sWU0kR;iHzn@o^fYEIDhC3zq%-BAHN
z93G`%8B&;0wH0-xV7*jk<=41PmIB#eC^<H<jnn~I<vNI8)Ah@jI<z!o@=|>@SAEQm
z1^X7gFX{0s2D51e04e3BJe3NhB)@2p{!wDNv?-V6Xe5*K7PIHS$dNDZOMV8~#MqNK
zPLN*51kJ^(K8jYiK39u?JrSeigtj!*jT7XopuM|7F|pbQL`c~?7cA((9PEIsO`Cw!
zXIP){R6;LlK@WnmczTC;B~|HhpEv66iV7bD(liUtKEH4lfi?(KZQ{vwujmU$N_7^4
zW5pZmgc`>U)+T4@Q=qh3T3y@Prps?jo|FVPVM3{^n<j%L2$-4)@?9PVaXfbxSuWDm
zL8eyZ*^%0mt5ih8Y#L;x-p6q@&m!D>TxwDTub;L)PL#nAE5fBgZ;n5v6kd-^#WR$a
zNKI{%XiwCwF>^?4zl2YbQf7q6v~_xlK~i5!lKsppm#J+#rhPuUH%Fai*-_poNR5(>
zUcQ8L?1c09;OsQtMM&MVaLywcZS@ePYN0gRaPOA{%)ln&c5iqDrpk%Vz)9>-pf$uC
zm^wPiLO@Am3LzkOl0BwAmH6Q?`XpLJjuW=X8pMoy@a0Q%jXKM?YT}eKepYHMhi`FO
zS^r$2aX+f*nb}cZz1Q9_9zaXDZ*G$_<avitqG(a;A%0~xeYe8Uz64PAp%bC7<^ieI
zF}{0#nze+RM=^`dp+32qx&ofSQ+50FO6fdemwZ>#CSu^IViagVbRl^PQT(ema~mfR
zBX$_!ZC<|!Q_X1k1u%UkhfTmnv@!|Q0N&RbwMZB7us_wD*zvD*cPVOu=pLuuhitNV
zZne=Uv<}pvNW(!T;Vwf>CMlX&@JeBN=^wa8t4A()a}h?J)DzjT^+=k~H}r7zVM{da
zL9rg*!j(!@y}|C46p<$d*L<qeXkc|Mn%$_N`4TlvN*1IyGpW@A$C<|7)GK5yILH@(
zVJ^))-;6Jg89If?nOi4&#wqt}s3X1~tz)tC#sl2s)jdDNAV!?HEKRbs{Y66>r%Agm
zd1$VU-f5Ah9QoFq@NN`OqaMO62ONg7c{#|7?pAa}P!A^_^XkfG;hyC+Vz9oC7*ZnT
zVOFc6D5*tEJ#%Ac2}kJM0w3xj+!;QGJWB;Jt4%cPH{~$dB_Z66LGpK4so4Pw<L(-}
zy-Ogj0h2{STd*wGc)QCIp=2g@zm#6%bQZ-*)8Tno*Oc8Wzu4x+0rZ3QhgTI!4I+r2
zARwATA<h}TKB(dR3;^%NrEl@jrEF<B23y@Bn-<a|4I~>X#)}$R1f_fmh-rCPjPad|
zW7^Wqoz*#Mzu3&UhXG?fbs2q?2oe}7l($fjNc-a3q8Czf>arU!&h{1K#ByYvF_5Xh
zPIY;6otsgaqIpcWbv`X|Ji3xB-r23DGI9nu4^Sdm;zd({AHwjT<2G^EuNx!|Us|V{
zt!cMT)K9kJa^pM{cgi`v6-d)K054ESox4FhCuLGnvbpV6CK)M)hq<%?L&s(=(IQ>_
zN*6hFosIssvi}ls!VPAGo5BbE-i}l#pZdnYRzpQ8Ht1hHqIn=fYS8B3z{z{1at?F>
z+bXSZd+bFrT)5(*PO`lb?~L}MozZS0;oWbWGIzT<N=jGIGc8vNr#CpKGIfO|bHrV7
zFw1Hd+mw_@p~fyTZ-%YYJ8b1<fd3C?ip6{d-fL3kX8324@lf$Okl^IPqor{J-U+I)
zHT|}Tvn`WamcZv{OX#04GdDMv4}$q{etvdozF2P0=9d;{X6NUEAefoOzt8Yz!{qlc
z6X|yg_)`!r(tppxC^z&FPAFERN$|8xW)WrbTyU$<Qgt4!4QPwL<c#VZ{#dl~Fsh3P
zWP6^o;~+b6Udcv)r9)Xp8G;FB#;!k2o}Z#fO(|obhgXq-Dp|<XOJu^8PNcPUw=8Qv
zMNcNH3Ou5)wN{`ieokBF+LRRAHTi&Sg(AHWZg>i!c`i^oos)tJJWj7M+U)QZ#!G;W
z?nPWy6!}kMOi5G;QZ9?zl(A0%4y%6HRA+Nz^A@${pd;rFtx{G=wf5dE<OS14eN6>8
zlQv+fO)ZIUywu?gf06d&V<f2$Z@ETQP#xIt8TghiZhByh13^|Iw#$lUvQ++vUNvZ}
z+ClF$b!+ffhjL26G53+K+=OUZLd^$MD$7-6IrZ0PVoV%)s>1+p#vqj-pXkd}Ti}XR
z(Bt?XqJM!}a{x;zd>MaTL$H;c{=sZNe(?zD-V}f(b2WQEIPI#6_>Me$wG8Eu-@Rf_
zK$dWgqg%XBCLz!{V4DSVSE1!mpNdnEqe(Wwc=?H!#fVkB?r|{cMU4a*ljNK@;*m))
zb-Ma~_hL4?oQv`}LkWJp1JVFf(r9<LH5zaWkBp2JIY-JRwY!o#nBq$*D|)WpLO>Q<
z7VXQw)PzpN4-WHcvL8GdtGoBIF&sI#tz_1=B4QMBCo!|6gtPfWKQ)|f?hNWw%hjos
z^m^4-SGMjctgI-}r5Wu4Ue~5mj-b!)pe@bN*M?(aAA{-(*0-dMQfgMaE~Id8IERmK
zbAq@OH;E|+_uX`-yy@0+Ze^uZMJ_g4c6sS0K`_gmEcF}JlpDvO`~_MPDh@K^5?zbK
zK}oX_^#AmW9w+n)*-ZIdXxT{1v~3-nA^C$uUi<D*vH_GQ#r{2-tA@F_?`NZRI5=uM
z_)7}LQq&z#L=z={c*3!(i2}~e4^8v~{dlp?!x)0Y8`((mjmR6p=P{;7w0fv@3YMkp
z+o{<Gr<F+&R!{0<v!XpOcf!cU{zd40Xt^S#y9dxdJ*c)@x=mqNI4i1U2QsZRJp0Iq
zS&WhZ=(L?+s%fcc$<cCC&go~1RQ`sPBl^cBbL|o*K?_@s%^JlcT*P*)KJ*k5B1fZ`
zD?XR!xQ$~HSL**WVoYvy^Kjg4I_(PLmnf2g3v~g37f9PwmnQYrYS}XvvTtl}QYNP1
z<iOHJR1wH~7`V=^jT}y>(#QRDK9(-{UP?(Bcjzq4Ir|MY5xL+g&<C><&%=AvAi_m5
zxjbz}YT?6g`9?{>)bovMitdL+p#>4Ci*A}FiIfM=-JHNM%`IeR;wjO!Ld@VD8r3>%
zwxO+SQkD@yX(!@BX@G7-(uG09mvDM@+Rfv@>`S(LvxT88Yv7pSNU0UM5|D9$sckn5
zMJ#5sr?GG*9#ZOY1I8Y8#92jdKW9#54!KHoPf#@0kgAH(TvyxR<K`wW{;O&*i~uU7
zIJnbWM%p4c^ME?a7R8t880k)0kjFSU=8UC2h<YLVxLCqcXxL~6FQaA_yR=MQ$Ed6|
zgM~z0p&uDOlrn(yC6}DIS#fbm7FiB@XSYcfN*y)Fq^V^NHFC2Aq*KZ<yAv%<9v{g`
zK?><x<l@il*AB6vR^3G0`%+#+jt8GnWx~{ImwUzMB4^~BjW6NddZ~^uI77xL$ha`5
z;x&v$4O*&C=MNS(Ie?aBKIv;k7(s6sWTH$xakLtRZwuFzGFa0oYviFPJF?G@Gi5R4
zb7UC3bdsYa$m8yvrt2W>zW7;#@nv=x$toO^Tgpg0a@;1{L}cQUx#T2)7E$Dhnr!X^
z!7aTgj=|H!G`M6Dng_?09H;q7Ru&ieC6S`^x1~MZID<cl;{?PcnUf{vr1DUlJ=&Ng
z?Of44p-!(O(&;EShMo;vEz^52st0UtRgl3Jzt~p}V{~+GkhHQ{I$y*S2ojG@>GJSl
zaB-BxK>J6*?6`Ml1tyo+;n@nRWB8J)x;E2h-#K6KX-B?;-V?^sv&CPczK{FJu5Yq^
zLh8HkDi|i6*k!3RnyPo^kYthMP^PF#x_@zO!{z$Y5t6;2KCkEWMN$&O^bwZJ!ZIG=
z8R#Pap8T)Ve=fb&$%~gfCSs#ENyNcaq_BB8+~kILr85O2)lFXk%}fpSUA(2d>3B(j
z->}@E<Z1Ftq;yP@`#41;Xv2iTe~)6lA8yH57;JY2qaNIk2<SKXF-t1ARacA4N7PfU
zJ?pUzcZtUH-bUt-V8-Y_DA}hw?fw7}KPh`ehz&`W`Wp}38zObu2|4pEBJ^rz!Jqu=
zKfKAjL$!{k-Wi185z@@;D?dlBsBT?K%H$0{dU0@Ngo^nMeg6PDEdS71rDN}<R2`7z
zuv<r(oVN2Yy$PKuf^-IcK9ou#`-0luJe&F092k1>I2%Eef1NKmQAA^w4iI{YO`#BN
z(m43+YQ>PVdrV^%(6HykL~>!VmwNnePm0RX#{{VsEj*7jp~-(DnntAgbl6gy#sP8S
zTF=u*^GQT)qV^Hx$l)rXl9{yc%t;4}6!>26ZXz7qKa4f_&3cXROUa6W+r(bxZil$_
zU_I1I-0^0lbU!*Cy4=3IWQ7&ZAh)i_L0QjLO4fkwQd=<74cEC#NrR#RZ1xBlG7z9%
zZf9b)k|lE%kzI0yP2Ig!XYG77m=`fyja$3&^ci>&kcIA}{y7uIaDXg0pXr%mn<1%R
z>!bD#-dx;W+-VM%iQQ2rGcWyx5|hC9BjX>$SGZ(6%l*<Za&nwWgnSv5lj}wFRMCq-
z*DYLN;xB39kb3m_frX)2OG@m7NK0cXoWdiMgM}pbP^x;Z)+jcur5qlPK*%Nsp1A4U
z)0xmxiIeN?&}H_X<XP#3I=wbRE(J32Nk1z?4~EssxHq=+ti*|SVj%?8t4O62faZP>
ziIFs8&5|B#O6i$un5zC^rd}k}%PdBXqi;_oRl4<*lkH(nQsSBv!u7P9WlDWA&q*ZO
zDc%NNYc|%BVpObX(lZk#D43ei2u#Cjn;pD@(H!zBGFVSS;e(wfW&u`4;Dbs+&e<Ll
zzNG-KC-jvM5iOo6mZ1;NEaVqv7Z>vgICJ25N@N*TqzEj}BIH|cBm7Oipt+>|Ki*-3
zU!qh#%7+W{h(I6B&LDEl-}k>NRg~y)`f;i10$v25!?kIgF`7DXO6`$Gf8(A6)N!m4
z?a*s{QqX~4*fNs6D&tN~4jw%zDX^tEt-Mbz8hsVsF)m7j!p@-E!rkDS_lW|;usdlo
zJ(Y5WFu~-<rHk%Tl_ji}HVQBisA-Ecf=ZOcj4O5Otx+SE>Up{E;}E-C(rk52nuT%c
zV+YS=P}nkc3Ykz^uTH5t=plmZUE`#7bBE_Q0Z<-LC3T(>ccgi<vv!Q6NHDp!vYidk
z6Ceng6@lGQ>pR)P3|a*wK}3)o`UUVH@wUoaMd+Z`16Cc<BG*n%pL_DuG*e^N&1zM}
zObRdQsARQVI8Ra06x|}>V~8grJ<X;TVtVsZnyh$ONg@(ZunlB!DXHrp_W8fv1z<Uu
zJ|^}i5@msNZjf!W3x+JKvhSa8Lw~DvJM;&=HcN}aEpj4LE*NECa|%v72Uk`rDH8T-
z+bL3|!W*y$XaUG<X|bOoy+h}nE*nc+0K;z>u)vqfmH*@Uu1hPpPWV4!B~F&Mr644k
z{Wn{MiRL=~X7-84alg&_kq{lTYv{IePlb7NY!}LGN0aqa$C}|lD21o2IBcWQl4|sG
z%7Xl^y}6AJq!!zBKCW!|_HKP2Wn0{i!6ut*0$y4^Xylo@7go;|UvM*xxz&?F<stkE
zTJq)`<=AC7y=h9bx>DIFmr<K2y|5|Bw1dK|<c1i%qXD^6-=|ZF5f4dPfKSSK(hz)=
zbk9vmJD-N8n`>kOMhtBgZ$*w>m^M;(k;XKL;pnCke(sI$m{yL!2(|X1`1~^UfnZL&
zys8b@?0cN}*xZUqG;#LOy{|j!yj8TpHpsDxD0Vt-HesZp)zfidtGcp6$yKcNOp^t`
zmpq#3JAp)E;Cm4LQc&ht{TiPb!ghqI;NjHcR0E)K)x#h<6JNeeE(*S%ZN-&b(`1>{
zs$!TF@q-?%?D!d6ad=^nQNkPm+8>3<9N7xP%J%jtX7j<>+M&G7z{!}D!`ZOT%xk1&
zy}#5yYZV_IQ2(q$O1a=_Ay>@`=I|G45{~j5Jd^27I!HPqgER{soU0m<H2Pmzqfx#i
zHYiTLBW7Qm*0^Ney&|3z816`aextmt@0z4J*BrXfK<|i}J$ApM>?*T)Uk|ZhfjWPc
zVSH`uN`*D*yU0#RB`HCIuI@m;%TAiqgD!h}9vfc?st~E|3*J@+R-@8NnM<9<#+n3I
zC2&jPI=#0EZ7gk6f{L}-or_6*Vpkt4Dy*#Gtp(>TlryFUzv{cEj}Qz}o7ta7Esj4s
zeRKQ-QcAZ;GAcnLYTHmL4<*P*iBIgm`Rb5k&YHz~5E}DTh<LKz>a&ulhnjU+(kieW
z{7!Yzr0XoYsb*Yc2yE$mb)DTUyGT?;+t@nyb3LrFM-S@rUM}gLao>7%fP7(e92=;t
zxcA;Df7k5vi^NH96i<a(m;=4D=8%>(l1T%~pw$`f&He~_;arNPz^V>d45YlAutD8q
z5%&f;Rh5(^vQ=EGaH5z^j1uzo@`Qw+qM2sQ&$LdHASm>tc9AI?1xhuHj12N8WuGjr
zXp3O7&;003)`ZqcWXY${f+u{U?k;o_q`n+eaY-VAl2NfXtlRToiYwm6nUYk7<013Z
zV|+W)9^>;hR}i=08d@;&)JCn$8^;$iYbYLr=c+Van;9R~xe{1L^@dc&?I{qjaw>v3
zW4!CTfrM(FYmQ8J1(IY-s%C91C!Gz1B2;|?dKzJX-O**}qHBkzQ^#Bt(ydvqBIksy
z>P6yZRWzvanZ-*g5_>&*jTB|IlY>JRq3{DVDY{HveV7mq;%wOa(LTbV=Uu+l4%Kzy
zJt(}H`<VKmVS(vPz2uP%pF~bJ0H;R>`B7L3Gi7LMk+xouUW;UNQigNt!i>wVCxr%#
zkp!cL^><Y;lDvyhGU>%(VqOU_T#B9mXM2^BJ~o)}R2I)?z^Qd*l{M|u3o3DtP3%$M
zPEFww=HX4W=)tzMB|2tq!>0}x=Y+fuqx(P|ah5<r-Z;GtVqg&r&ZGQ_oz+JBgC;kT
zWtr*q*>>Mqn;`?CFGnp1=5gMgZdBa3XT7+&f>(NpE<IPyOmmZ=t+5+hk98@@m`2-B
zaF)`b)JZ^4b@V+n&zBQo)eAChh?u*n9RQut*e_2m{BWU&AaoyI-==!!@vZ?LUO_^3
zokGz#RPNEJA;=Vu(quopb&AmX@^Z1Vbqv0&#?zXMn(<Tk?6t+SwSgf3aNaFEQOs08
zG!92)TG-nMhQ)(85hFzNjc5nH?Fqsv!74IZ*x8Ylq-vgyI~7i-U`WfHaUd#rFA^_k
zqB}o3up&fG;5gwXWj-=XmL;ic8O>vIZvs}<Q!iQ1YOt-)k)9b&uL4&JOYBW5ufTe!
zplRv~3DZ_M(>K;!8gzlf=!qC62LRfBtQD$Tn|=p?X+tl*1&;g}tIgc60w|m0-2moI
ze=p_+&GP)5i^L$MPJp)<g~If*uqoc;D{&YPi$opdYDs+*4hEfys-0TD-$b=?`qd|3
z)n!2*`p1(VTx*05OJP)D9U}F{eRh&m*p-br2n&y#57P<GoT(I*)>lb+br+VkuZ^tA
z$abl{#yQIaqC~x8OUR%AYK0?8vmy%#n0?Ydowb-_;9zQb`P|m#T4{qbtWsKPeYeSh
zB<f(36#LsMISD@eJ2gq<Z#YM&<%dv(=rKjGO}Yg3??vlILqj-+G)$SMmI~_O@VE|g
zc&o5aq)w_>371lLN&Z5K6dWIv<Rf<}67DqGjsHC-EQ8$Xge~{yqEs_$L*5jhZrVRQ
zq0u8HD;N5E=ZW2J7Um<p6u4%b%*1-Bbbc%2#)-I)v=vxRhze(pF0I!tmB?zNpbzDg
zOG-|n`H0&n{YrHOtqX3}6QGtgg`pyNpJ0d}Ml_U+CpO2)J#~=d-n3q%g<KDH*hAIK
z&{G#3gcJ>{!2Jr}vr)?EUnzZpK=eGBTqO56#E>V#5~2d`+&FZ?`v+i_oDD~AYO-;C
z=bO$sLgN8)_4t@@GA(9R51)rzWasR8>G_;S>QFwvMg1^K_lYTWtb1mDyK|9B2=2!b
z;ut;llj&^MLoM~i3H3jw$3lDXV&t1O#z}w<lC>lR`yA&{p{t7=h<!~>3Jr4;w1h7e
z$`g=hBpBvQytDR9d3uZ14SOdZ63u+`o?p@+$#_F~nR?CGU>x9GN7UBDyEVLR=yiaq
zTTLEW+FmdD9<5a^Ch>bMZ)}|NK2o2D@^CtaYlN7WI7U1~Y7jD^OQ5lQSk38<Ps+ww
zfFL(rQA+No>~f;=((XFmoJMgG-V77&@0_i^P%5!tIZ=0nV3a*SxW4F4GT}^<3xQQ2
zovp{AWLMw>bIkVXIV31K{Ae>bZu6a`J{Xi~)<xg9SrTU&L-Zt1UmbT{`Q_7$Bm@&-
z-*u-6HJ3c>Miok5M>l~wTn2L+nlzvG&i&f-**==kT*vwmbk;^ZK+{8ffSzmTcSY>o
znA|wD=yH~b$G0`ns)}RgRt$K|5+ng^Qu^Ncq$CL`E~FE1a`GnncJ~LAixSL~8sv7V
z)S7+dtib0CCsID*jt0fU%2w3gqIWk^g4wWnnDFE*Ly=x)Ss|I3NE?JA-i!9Z$SGzn
zxRl*_$UUJIFaYW^ga=H(&(NF(H4N;MoAw*5CCT+4zi3W9gy`8`-{G6vwd?P3>of>n
zE>o$MgZ$+2`f)-!eC?(6WdzT*Jj2y5PLV>i2wZ11932w8%S*7)KInACVV$C$V^0&5
zfn<_4Dex3J!?EHy@O-uhTi|i^<9%tku0)_P)fC%)r@V+VlH`!~wB!74w>tsh;~jx6
zquyRuAEvjrWm<TVv{*Y`Vy%;zLp`SM3gk^bNYo70b#ZYh$HlQy$Hhw0pn<&{=Ri)P
zouzNDX=p!e8%Lb|$yAr5v4h(RPli;e&1hQ1Q>$@-OE*s+k?Vo;AZDUq-u*M3dg?1M
zr}V{#<Qrjtd9_4eQu{EANT+E8Ev#0Q*nqTWRa;f{3uh6CXvlo0k9b|`e&v=4_M+@y
zi?d#F`WwI2w42$69?yB?xW#;fTv;N<Fk+Q(Wb>mDzEMu<xYq$m^yg*(P0+k)s1xzZ
zT40El2-T-3)O7L!ml}8SN}c5{q!<ZW`UYiIureQ#3UB&9<Pn)+RmNrV;{qA`9Fn5U
z6y2tMA!{YHP|>v9^2YWg-&%4Bp|ekT(Oirn`ZM}Sw!N;r&;)gxy-So_NZD-E*Fr>B
zX=@E-&V9|U+ge+ua<?sMi|!-bWKF65kzTH7m^Q7-Q%9m9T;d*3H530q>U3}+sIiMf
z&$L~r|6H%-p=R^lHgJX~A~V$Uzi<BNY*Y+p;h&(6N-+gZs6{S0unf>-W_x<FjrB4h
ztuf*@)WDldw45}WK}j&nBurWz<)LxkNE&1;P>X@ii@F5uqjCr-RmA__?*^o;3@cx3
zDCLt0W*&xeoup$n@%|+p?Nk!ADw@p`Qw}AK8Ce!3l%lVk99FDo--a!R#Fo#-wH|kD
z6!8HqeNrDALy?DQ!?;Jqzb-%1qzQd*(fCv4@DL5>*bDv2<bFxZe9{%*MW76J6AzI-
zN4|i__%P>TJ*6a-=A0IAmt@1>xKk0!=^>e2B*~YQHRU`dopHlRnk5T7sOy&|e9<7z
zIHNPHK-{v_5G%7Bqn@m;puzJNb+^i(J*=WYY->Nhr`vBKnzDMx=mUQk;fWTgvnqK9
zl>dQzrlL{FT&XyAVxZ$Rf5sglx)(LPkr1Ct06+(OXDB|5PSDx4h9R9A9rbDrV;bZs
zGChXik!s=^9%-)__uINUe*bX-o)fN~O?bXZF?PC8g#rjUo2gLo(~>#aTC2ukep`f!
zAQP9F;XolXvL1>ENqN0l$Co`TCs$-?VEhan;hq~jaXU->aGo2fRJ*&w%>n2DCHcim
z^*9D?Bqu;UQKyI@TgCH_Ot261!8=FUZt_pu!PO;|8S)PlFM%ZCY2%y)dzZLTX#MN|
zj=?$XFX;{IS#`3O@pH41O^zuwzh%rzu{4L+^5R}KR;TfB+tndi+D<)!KF5z9sIw!b
z%*{%M6hB%at<ce(bAT6&@R|#<;`r6fJ#L=l^$cnem06njJ+(1#hAb#Af0mO2xcf+%
zQes#VPt45MCV>t<68DU;xr|6XChqBFBEmCCJHQh*+@4><^PFvn>clvj&Fmyt7`Kxl
zmn_5-zgg6vNYMCC=J`Jpat_k5cz#MVd)$F63Dhc8gr2jVx0~rg&-55|CgD}YUzKY>
zxZ`Tt>{2~a_Dp)rqUAA4g*!||rgTg&0gvqRMSGT@3`tkBdFhi8<gu?+d}xR^DkE}d
zlH!Sbz^rI6M*3n7!h$K!du(IrfEI<m5$+S=liH64mN}z!cs!rsk&Bu*fmP!9P2u+x
ztvKgcPkH&9X7gc+Y3yqk+~Q^uV8#9%bB&C!RZsaj()TH0*^3G-8Ma#q$+#3JFjK9-
zgG2d>*&!h!g-1J-INQeqvh@QZPnG5=O>V46jhlYnTv!Eax%F_-1R0QU)1+>|hw5Y$
zm=}B!p6bzXPTyeO>K??M?cP>*1<kl2jL?P`Ck)|VsbrgPo8TaIF@_gBX=FU<Xik^U
zZEjWy+*{U_ItlZIbIrU%kzxKbxFB#G4<)i>nE&3W)^D(2%`m5Mwo-#{GhsvYddm0>
zHDb6)0xh3$bwDpzJ<OCyYQEb-oMJkiLuobXgzo)D{<eb-Ct6G|QX>WqJrk>(dcvsK
zYQf7ppP4a0#Dsf;Z^qQ?(kK6v>QN<!Z&6;3kC!M%CYKLC7bAmGx5s_QCP>ha_tK$R
zQ59qYYOvEOJ4-kVeU$%3eMng`Ds@g%UP;C?B|18#P8^pFC20)cx}3?N6%+W^aphpj
zJ3yP9lFDMKJLKOW&OJ@xA*%)7(&7df;FmHv`iTmZbjePHu?Bcu@h|c*St#QX?3%nx
z@a@6Xi{cf&+Kftmv1@sUBEZM_Wl0#qV~-0*gzb{2I<FCxq+X5XGIjU`m%OR&Ow@yq
zPC(+TDqF^Ul1iq#sAplzSIA}j5REs1)36~*XDP1X`QB26SL3c4UXD(q-=);@J(K0>
zY%EwPcQ2L^5NWBGq}#Xnw)Z}W^rcwUO$3+;s~{6wp{5=IiEPheV{Y7jM=(P7$3n<I
zubsP(P!n?Am!u)zm_(AKjSd%!ZU`(t-bAI_Bi@rj0ofG%I)LZ;B&)K^SH>z@kfBYF
zK8jf9fCP0)GKbvqUHlPHzY!s0(+?JVA1x|_8OpV2vTRTPEH6qB(+8!;tu+Ty>3dT)
zV_K>kdq2%SWliLs17ePX;9$Fhu+PN~b;u&at+`7X9|LZT2uhNin&?swkGl-VFBtlc
zFF_{Ps!SngaR__3vMJ(qE5bIk;g3rRW;~QKH0v;qCya6!ndvIo-N8wrv}p&s??mZ3
zkvT!kI#if@n!|lacI27t!=%=XkcF{>$*7+E=C}_N2_cJbUU;#)__5Oxp@<kwh>xf-
zpod74+05Hh&P-vr#{ImQ|Dpu`5H1#Sflp|SqjhIByot+Z7^Qb<Xv?`d7BytJT@ti}
zG&gTBx`Pbs<Z3D^4a$dTfXh8|;TPvjtQ<wUlO8TM=aPPaUMfn=Hi^S}2uW9$lyRLy
zycr8c!1N3U`b0;9yQcvOw9h%5k`V5oHt=NeEr;B4kKHUO_tH|ixl`KfAk_jx!0XXu
zEAEphXrhA@UKRy*xk(MUIRJao4#U2jNs!m2e{oNFi6gmmb$w!4l#fYAp#kC3n=9JV
z)7kL<@paT=fm)&F6`&tCPM1y;ZIb!ryLAS!Bk5pf4N+&OloSj`?KaX|auJ21NKvCM
zD(OI!gJRULbFJJK0!LegOkJn3KVZ0`7OEb%z_M8Tm#7y>8oCrt)}Pnu8&KAtaC+om
zP{MEpDv!6#gZ0#UALN?Kpm5huvEmA(7^%)4O+R^d%%$SB67|MN$!5G8jF>U%6ZoFn
zcOj$1=htJrxsbyzCjG#A;e~L;Y(ceEJeL8zFfIB-QP83{$qn>VLc)K+4`Z)WbIi)w
zQZ6<#3UGU;OFRG(%tq|sfYj?ocLau=L4vOZ9v6yk7MH7NU+iD6q1+^<u6p4#(PEsc
z&RN5e7|tMRhw>hv*EXTZ2olAeIEOy)DJbv25x)AoCvB4=2=xDx-ZB^WD~;5*w5?m0
zO)7@7ncIEoUozMWd?m4S(AMy5A(TXStMiioS~V%0qKUB=bpoOj!a%x_|B3Qo$!*z;
zF8iE1^{~4(n}!=i$QdIQlnE~Znksu)S+Y*^FlE0TU8Gd>3=*ip%FVP&m97YQS7M|s
zu0&_XGQa~RQ8!Z=q<thDw5Rik(QU@qPQIfzl#LT4nx0f3C;au?%9(Sg@uadRJ<7ZY
zL(*^^^+C%U7??pulJ$_vx!Tx9wSXjD`IeaA8bLp1-I-}KoDJH-VQ#l!@;FjDg%K8T
zo2;8A`KbX(yM57x5cxxeIA`^ZuR9eBp&|1z#KEj<cOY$d)Km=ftSNB<aUF3AQY+;d
z5lZ}Z7@3h=#&gEHHhInn1%?sNZFk^xI7kd&=ED3DedG^iTc%ZTN>q?j6+WShS$r;3
zlWaq}9GIH$$=wb;gIUL<Q~`2_lSWN0NY>;*nJ(*+z$d)b>Sec`=Zcg(f5ZKu+U;!g
z5%;h)+p+%VBOpWL!x(s*Hh?Z0rZSBOeM1vsv<>R4QJe0$C+R&bW{T_Rx<_5|U1k{s
zn-pR0a~2xQ0_gfCJCxd1HmXBp+wT!>n-bLzH1(^5E|A3?O8oR?2<D+f_n}Xb8CK#5
zA-uyWJkYkm_z`xeCv6zP{7*t?DMVANEGNx|evKqi6vHZArY=(ul6ryZtoElVrNnyn
zI3(ptnkvv@p<PL*MYzS-5lK|Vi7tATg2l0$!USL8I-rzX6v3t4E~yPCg3nM!>cxiC
z`h5o}b<$86%Ej|UiG0_L3X4XVoCg+gQ+52+Gy#wxj+eCcPDpMOlZLt5dA)v=00ift
z>xcpFr<au`<cfyvy_P&*GJcZ*H#%XF?sZEnLivD)V6EU2xj(&;Ji~0u74xv7?^2m#
zfDl4!bW~eX)FX34Og90}&@XJSlr~OpuTqJ{^s}WR{au7MF<-`Rp|}6~c!r`ASLl)V
z%qN3%cSsg1Y-pN$;{SAlb!><Xm<gU{v+Xvu2*pgPkY%cmKoYTpxgE?zQT@`T+D`K@
z2oRt|{y83O<w7nh6E2=QwS4crc>W~{?gAsV(9H@th?Cqkcqfscvay*cc`ZOtNn7R#
ztXnI)2<!H*y`n0MP*b9EutP;h-Us8gds3onLn(E>g;k0Nma-S2NoPN!-imSsJ`!pe
zwaq>Xd%dI*b3i0#3hlGA*t`i@7M+GLC-5WXyvOBtVWD^(6Y2a)&h90X_!t^*nQEvM
zYaD0rNjqm`937{QQVQ<4cFCSCG<^%O>0a+h9ukV!wP6ycZ9Zly_KoM35)iGnlQt%?
zNeU5kdr~QAf7D!mG0gArT(EgUE47x~>YhfqFREI=<@MP?N6=)rwpQ(9%PBg~j8jp~
zO$O7E?-2DvXjZ?C<_46(BYiEz^=DjtsqG}aj#kSPn+cm+4Yb@ub6}JQc*48Z3ooQ5
z_25WuR+<x>ndXD+0~TE~CBjlKW-L}(=;Ge(w<{1>nflAD4d>~HvrwgCD<l}BZ<Oui
zcg+yzImRB2wvNL+j)bu|`_+V-tnl&~ra>StT9bC{c@)I>Qpsc~RqjPW^-96t!3r#f
z3W=zLj@8R=Yz-t7moR>@=E$OINYzw|R}$}$o}FU4XE;xY>QT20XuZ1(6@V?>REe`y
zt?u$*6DH*%A4?Ks*XsmYNjY_Sm3R_NVoNdCJ`g8}bL?TBc>|dPgJ!!&#tj!Sa0)S!
z5+)FFf;r{>w%|&ZJErtZUq9dO%y2ft{465*?BJ+PmP=~XsE4~r4km8J9ib~XSWSeQ
zWXN2+L1_LUo;DX<qSKk%n>pdY2ts0=6duZ$n3-LJp)?>f)tsDgJ~b_+bj362;KmY7
za|bKpgSw{T`f>qp@QHaz0>KLO&TLAl5RgKm$W8M0lp@h8rd^X-<9;HdOIh@2dI|S#
zU)wnDX$sQu+Ng-f=U9l5^jgQi*kXz<9Y~RTPC7K3*V@nOxb50j;mn!h3P;ONn}->l
zBhHKTb}t<(SY!(ww|TG=9BF3jNF^Ca6;czf0TD(Vha!mt>y%o=W`AV0h=dP(a*N2c
zb7+>==~@+(IGFi@RT02W7`E~0m($KyGDTUA+ebmSVwQ5`v<2PNIe;Tm9OxXIU!^h3
znmsm@SAjR{rF<pHcn@dJz|dqvR|Ah#I<Mn)BN{1qfH1zPt7aVegz;>WNz>ng&Ed2g
zb9NXQr451>U6QF0p0-Fz-(jt9)_lpe&FwVpQEy<@_)UYG4D*mOE}&W)+N6I^Y|l6$
z&6`}#Wzf7<d8^0yHCoXC*VJerB8h+pv?rDjns{9j-*m%_HW`5(*O1%{8cbPE?NQ5Z
zgryx<ePT_Dq2UU{wd#{M42ySTLBagkPLzz<3C$nUr{alp&LIenwq~T`3XA3Pvblz&
zdKrnrOHda8zYI~35}<DX4E!&(maqeaoif$EApRr{JpF3I+GFnHqhfPV6P@t?dsj@|
zs)wlQVfYE!s=*NSyx|@$=h$ygVg^J?Z`&lbt}s)fNyy8n%tS~uNtkS)y${`spBS)(
z(;J&x8<nDD_o?U063UBUWd(3KtMG^x?apVXB_YKuE}6V@pA)TE3}%?*<Rx$FY(gxX
z{j!EEQMzuz@K*mRU-t#$x6!9Ump3Y#(b+=T7evsE5mRo{M{bZ*-e5HnCDRWW|Cs%5
z&+I10Nw&H@dN1nO+N(t61!HP<@l*w#)M<J$2rS7;*~R1DOTL?v=#%7ELoQ;ZE_k)^
zT&5{bwuHI!y?1%DHciO9EGL6)^K^;$>TdHA8o}ji^%sxu5Lfo0AXbC+(QY71BO&!1
zSnrDR0c}IMREnwn^RgKp(TC`ZMLT0*o-!7EM`f-s=~M87?D<Mkt)`W8d?Ao&GkzP;
zxnuJMP7$TDu|w}ks*D7WMoXk(?r?ndUAhp>5}pw}MwvaQBvA<Th(w{S7eHaBV;#qW
zv%eYplEC%JpO7{6VRUj9MIw9Gw#!Li&e>~VVI~N6ksHvGqC`uyktUAFobwDi`{l<C
z{jw{yp5X&)LlL*gG^ST&f8b7-TNpwR$p2P3VK8Hq%)@WqZX(bIo=iczA>-4q(PSOL
zJe+!zdrJ@v!K_k0CDizFw8YjsOki;uRgsF=--V?p3Az4SGpPEEaho%u@R{lt%wuqj
zABQ{~!y_YIutiT)ZNWg{s3S!Zjq;IruKr-uOGUL^xEzv5VWKPtoOij=;jp`@B0)Na
zC?7ZnhylD^9wqi_!ULf;8rC}f3Q{&-#3^eSXJb0)3zXLI5P3~Byet9Dl?LQctdf6=
zqb3wLXS0y)Y;=-Frs6KnS@!P3u(mg&t8x)0H@8Oeop!QS?(+dAZ;U}MCNm<RkWi1C
z@GrnX9piOLnwyq7#eTor7vVKG!B;gk45fK1HsZ7{5>F6VjpnkbGERtQr-!!1*0b^s
z#RVd$xNSCSYq{H-=j07brKX<Zh1MsCryoDi3?KjT6^Ix|-jc+dZp*$Y9<z3FsRt)7
z`rRSTWAj{AIWpyD_FLwpa!sL(@$(>Xgh|byceDhp@xnON0lJo{Goo<$aW*HBAlS8c
za&a+P@3~Jo65PdIL9xrpv(+Ck%qejPj;<m#u@{876d@MlxK%&x&h!1`hK$2-zF~%P
z#8s~@Q@*ntOkKAM`=)(_C<!SQd$AW^nn564s(3_-f=uK47f}9fadk^y41~)?b4z^-
zPxd$HjUwbM(X=XEwB&3B?*)|ji8D6f$0aemJ!<7d+!${<b5=Uiv{2F=9nO1qAlHxx
znaz2j(9|Z#Iq#H2#3nXQnn~N&HrzBLJ4E^evgr~ZgL~%VWHNes<+u}1UaFajH>$Fc
z8mP>P1X|!k4*IG-&LVA%qBHTZR6*I-@-n*M({magsxUOwM&mdVbyF~!B<j$fQg?JH
zrE9o8k~&Ip*33f3Y0AOEMo054C@Tl8v%H#Cx|h@nB9tpD)S_zIHsCI7q?Dlao80*#
zt}Ag9d-;$iQX1@SH)-RtSPAAL1*lJqbxLY9??s%@0(?5)VR$yrzxL#I5F1l2W|J!=
z$5k%Ux?31=GgYNh5$Ro#cyg#b#}e8}_63A6T2&%q)v0#}cxK3l>SVc!YsClEi}HOl
z1V?do8_k><MU@`oVDHrFshu8e!Kkx@hI9jt<HQ2!v2PM*4ZC?57WpjRk2R`_(NfrN
z4ZnT`ik+m{qzpgPJp(y_#jP?mM&xWQ3i?jntC-nXFaN`+6?<#(1L=#K^00!Y99flk
z62K4&_l9#|)TmBf<JkTb27r(6auWsPC~QcpNIZu@ny^29Bl3;Rsuz#&;-c6M8QWY$
zO5_ytWLF+6GQDs=9plRr6U>|*b#Z8kPa3Ao5E@jou5~i}Zv6O912hb7_To&_wDfTi
zO-b+oMgG*uLb(49TFDl1!dWdSiPZzSm2>2snSQR&5-8###hlbf6!o0Wy^to9nPUtP
zc0hUTne!19^=~rcOL=gir(61c>Z%dH$xaw(HS{)2JhDKk{<)nSwBopT44TGvF;&?f
zvTL?-u2QegQnS5G!ceLscAL`prXp##-tlD}gjHiTRZUAFN_Z37Sxgnw&Eg%APU}b=
z@6@Dyw6}+A+ht11;y{c2*_beoSCa=vgB3QvQXgezj>Yv_r@IBql?93CMkNdqDz8LK
z$2wY=nJx%X&_Qnol-AEn<5(W8kpx!kqK>86@n4~Cp@}HXWw@hsdcnbS%Z!^3JeukJ
z0W(Vk6Dt8z+b)5rr5U^1#nRzNMnRFjvdk2{^k6g*`>h~M<*HXA!1QoTqhkBqusHD=
zCoc+3r9Li#%btoaRU+wEZcAqqjIqL^mCcRQ#pvutmH2~;jih(!IK+j{yhmE7F|C%|
zw+(|dNu{9nOOE|)If0Na!H>WgnFfV2^Ynpmm_v2~QP*?M4N2c$gt<J`YyM+mk50YY
zJwRXM^ek?;%a+w?9zD0*ZNc;W09w0IhQH5w9*ol|(8Db(PD%nwZYH#DC?fbK$#sXE
zC-fq=eR(r*n!p#di+6aK2~ay$=)jyL4sK}fq^ETA7$k5p9ad&}+iM*6*D0<3H?#5r
z5ItT$9QCPndiqwvAvoxfteM84#*M4^=|}%)Lt_wRgO1<G770(EglKUVVv;TBDwtL=
zxHO=}!T>@XK_9qe<5Ie1^VC&iA;8h}#Y*!EveckW`G+RdK%`)D^L!sR2tWt|x{1Yg
zLqt>10vJyX@>@@$NtiSQZ(zW5MArPm-G;oYmspejW|~XP)sa!+vy<%1HObUFRxp{3
zSeqrHOTIGBQg^PkDQHeutQZW(+-<bh>UQb8Y&Lz2)6F1C8cS&24+|^m6fORa#F41}
z;|#A=U;P%8wc@=<M9*^LRPt&CLf4b+rw#mbdPTA_DU|N{W4>xvP|8OgJZg|beB|bN
zYN|c6h--eMgMMYclCfZ(+w^RJ-J{DoH#Q?;OYCz8(@++;yO~Yqu2f1=DC%|gZnPES
zsS`s(15rBC4BF|7Ky}Ki+z%z&Z?<@U@&5Zg9cT5RR#(=lXEs*3Hgy`?B6kSsxRMcW
zuW`tanjUlC>-**z1Y3|BJq523ojbkef2K3lgEI`sYejyexX=HM*Wa6cuY4M3dEzig
z)glw}oRg?b2`EjlMWq2wdPw!uMYrBI8_*Zw-ObYSETlFP*5N3E*~=Zntoxk=9t<-x
z%^|b%Wa7)HjzhU|Tu6nNeRC%q6}?8_M`(mOoB3x{xmi+IBU0rC6l*<(i-3GdOV{GN
z2(`{paeXC<_ziG*wIp?%OY7aUqdlAnf|jKYl_R4stUM8tqTi&`Z>LY0-qaymGg&D)
z%DE`v%qHkglCa4dS*A1rG{Kyduxqf5pm`ky`ZOQEQHlGCcICBF9hC@Y%B9n&M$nfk
zZE7ae@cYfvrMT5>%w4YM#aAiJDVNrZAp*6?))h157!%M|e}b|4L)4ylS`pnMHFl8j
zW&fi^{@ZCV1?nX`*&n|!X_#(_H|+*(HHbdMQDIZUQVGv4)={|CDoVF6ml;clK-uI3
zDojY*^1-q7CAmozGB`nI!>;zTA$5%N>Kas6h^vjao2ZBpIk5-gzRuE?2{hHw&!>%k
zrs!8VK;uF3e7mKU$#qWFbpX$ZAG662PPfAto5YOJBH=)!jH3l_8F{<{+PGsv%O>#z
zA-1o@Nynn`|F7;_yVF>bZ2gPwQWbiBL=y7hTCWz^KBvF15CR)pfCi9k_piTuM`UE>
ztH9ke>vqqqo^D$%)gv=9o;!9-zJmPa9jo!+b}hfSiA2z^5osN3ZlAF|p?nMf?ei7&
zqGJ(G1wh?bl&b`C5HAo;2qqvWtL9V2$<J*yf2Xo}WK9O)=<mQq{{2j)wH4FP0V(+4
zbN^a`ba9uMU$2||L82f$!kS0tAMnFl%Nn{ylEf|bfcd76Kkv}FnaOsQ8I{8i5ZKtH
zr0*74+e)mh_BuBBCVpao^8#mpIkCSBzK*$z@)tiDSQ?5*RLMZ)7kO$$x()(OC~9mI
z=x~x|^3k=lEilfNuy^|8i2ZWfm&)N;x$EygqvfHk$IqVuRrG~U8c&^}9p_g!qwBlw
zB?h@+gkE7Un`j&9a8Nv>;}q&H`H-Y55VM<DwU^LD<WgMtDill$n=ASE{|0BVa}ALk
zVr;+5k@W4Jp-S}JWi6^saYyV&D;$Ixjpk4^-Dl}c*@CQuF(cw!{q_trTY$e-od7G?
z-4=n0&P5S8NWoQa$OE%|sgHuWHRLPV<?cYUSa-E$sMawU+ccgONbup1h{<;X=5v1{
z)w;TlQXEquOD|}u3)r6Dn84(ntkRx0XHV2Qq&0n<R#&1_eL?s^*YWmIdR}fY7S#2c
z!yAvRDz-;xdiIE4(~(hl9StfGs#7&fgB0E-ddiR08rU})PdPCR;a%GJ5kE}M<G+SU
z?9~11#X;}-S9N%B(m6RiJ?$JFb?Va|e+tzJ!ei|91-omp9JsbR!9StYi9a1;+S2u+
z*6WUjKUP9P$kM)C`m0l)9OFj64iC<bd9V85c6IlHfBVRZtG*uJ>kW>8G435*uF>=i
zJ}sqJN_!Nc1&0{?sTz#bqPJkK0EJ_T!QLUtG~A3Z!_~p}oB0zGIp@<aT<%@UqbZs1
zyi8{NCOZfDm`HZcC-9rh-Whz!&1wnL*lTfWOq4kqT^o8@#Yd%?O#cGU`ukve2F0W^
zV^c=hL}03hT}Uz?Xs(oqHM#7Jc-Xr&C)W`RD?|A#&3AY?AAixTd1;C4JC<|9#fj9W
z=TbBNkZvm+_uw|Vl6|tS+F4--LQTw`32F8cj>gt+NL?u>jFPBfHypb_`>Y7e5?nbb
z^B&2GHZBZRSdZ*2-wYob#i=2F0-Y9pVSIaXI0{!4+a<1|Y*35jLjG`cQqJVoN19u|
z8BcEhfdH2u>*yI3;rx4DY>8y}@o&IQ5;FNEj;sTTVCF+}7c&qVp%n=-YMV$nB7z`f
z99h-F!62bs@}$JqK%Ru*mncPb2R8_E^|$f!sEYdbJVuBOwg-43NV{WN=yndM^Y{@i
zSWz0CA1pCP(QMTU`armeRgAc6iuY412nZ(5d;Ve0vDDhxL-)KzCz%ktyi4nSBtp0t
z4*x1V>I6YDJg3ccxB^g_r@F1L!#k9VN1(5<qM`T1oCR^jEY0Qb;S+tpjwvB_^vmW=
zv?UIbeXqev@#xbJzKFIa1-&wb<;=HS!SH?Ivv{J~=)CO!OR~bl;Qo+36uqFgc(=iL
z8RI16Yf|vBf;@isuJ|nDti(#BdQv45X$<7s3;Ub0Q@c~rs0?U8(nF$%?i8nHl>N-M
zcy%1UUP$Rj0e4TIf0mC--%rzh3>oB>g;$INzFN|&+E*+QU!XDa{D&18ayUXh${~OB
zZ)|pT@PI0Y%Oi$+o(iRt@fffE37Ks?cIPM1dTsLj74pnrZv)ODrtxHeiyJ=uLaGed
zeGr>&6IPQ@bXabIc4HN$*>S&$raxvbH6EQ1MmoEHjUFm-{5VVeV<zGui9i=D0{0L7
zL9w-|i7W*R41c1f6W(4JUob<yQ5+Pn^Knef5eC8RWIY!IQ}$M2;S4dC1vr=Fa1=uK
zm_^@g<ypjr(H&q=d{F(f409^{dN?WkGeDCP7389Y0{+lY*V5umylwxc2PQZXV5fwt
zsJ#oG0hJO4?R<R&$JpZA0-p7${AGx0b@5=jK#YJpy&!Z+N?eedLJruG)34P#0?@zZ
z9ZM4DZOBag;|;SCk<@7=M<V#=`6J|e^NN4h>PV!C3gjCoL7~W0daRcl87Pp8#IZDR
zAW?BCjUSFm;-4l>JVABk79e34yQj;e31`Tza1rf(Q$Q6Y0Hn{z_cbxB-#2i``P1FH
zzeL7FTb4>6?$?`%_0iTGRSd4%Ardn_xgZh;KIrp)@&DK6wdxpz>#yKwK^M{#V-z@N
zjKOF+@Q**-6ZcAN$<ux`f`XSKkub%TH^r~w0>0gMczC$XvixCYUwuziK93<KP(k<l
zdqQmqbww_fB`{J2*0ZsUBa#|aTg~4D-Y^r1v8oQZ#gLrVyl<0)LD0%{2R=D6EMT`W
z*wIeG<K)w6%pH>19~6)XnG<Co>k6-*=oa`SHcK}>OU?T!Sxk$*v30;Ax3<UFAa_FP
zZyz3pNGKG9ztoP_1ISh{O<vCpDZ*1()O!5IXhtIO(o)$wT2?aME2|H2%NoMX?5ZGp
z=s@al_3roEQY|0n($}DJk&FVa23FotFGebM{ssR42(V_VlXY<9?@!MFFQc_Td&n;T
zi&-Wl-B3XLGNo~@YG&b|?k9@0i5r*LhMk3nM_R-YZX%lC!t-7I!p)NWGnl58tnbOg
zXW`}*W@pZj=E+O0F&H>U19btKEIjY>lAH1Y3hLzY@(MsQdIjliYKN^KvaVlfK0w-C
z($M*D9@2MqgUgUn+wqP<SF5PZ<niWvxHV~ClKRl4qaCVgJ^ld>4Y-5CF0xDIY5bTz
zR|lce#`HHR8_2{aZQGEL-sLzYNI7f@2!=^=4a2H}<)YiNGWW+#P<QJ!A%UvepbzmY
z;lpe^{YZM3vd~Kn`U?vW(&mwTQ3v@IqCUE}gMJV1_c9$!L52X9^^MGvbDX7s@H9<<
zv6gjsS0PNu#KaYXG)ZvVy&>WnZ;%{89oyzC`Y!duOJp(lhj&|0y~1Zmw7v!EQs_ia
z^+edn(nmHOqF+ewrzyxdacig>>fB4$zLcz<ohJRhffD4!JoGvDh)nAfYz7HZo9S2p
zyc<hQ@<}NFf)gG@tjW(I6+%(h@>U-%Z$1pcy@WL{EvmQf3kI^~YfKQCp)2iNr5Po>
zh$Qa#q);|l;SGlRrSEy<U_r>9erJ8Ypl6Jv6bc-xV@|0&!=vr$cM}wUr^(`k@=6s=
zZkU2L(&W5s;Gh-WWy>JhQPNbUUlJ3M0yPc4!+J1b-r8i;dUw#v4YUsRKfj`&mYNsv
zWJ;yW{Fbu8q_|(I4&0g5^-d0>ZK(_&-|G;-c~cC6-xBWmVfick>#eC{Hrvtn>92Qf
z3@S}$2lGal^@JQh`a%R|hP11&vL@S5I|s-!AfO}QcGxfG6gxv`5Bljhq<v?ST8ILC
z6jL2tjF(#PK+R<ZV#|Utb(toN<?3Dx`=4|QR%v5w|NiS08O*jSpJt)F+@eiLwf2sU
zgmqi$9IRw1bOh@k<IPvt*nSP9I>3F)KGWBe=?jPmZy<84v0g7a)xlBK>zz2t5js%y
z=vXn_kdQbj&E171l7Mc6|9<reX+8Lw)OPvs9WTx#3ZRa>e?i>6py13S^*2j1xtdAL
zk70qru0EzGq~6Ma6=04sa?U4mDQ`LCbC<(siH%w_8F!E0(3rY6&1$=^&5s3?Lv<#v
zc8NBEt)dYSy%q${5ug41G+%FD$Ir;9umHgeb@j~`fLoEsgLzCr74lxYK;qv##jU~^
z%2Ng?jYrhk(-a2l_ZT#|XDf>%pZ0{n43hud5q(hJZRMhA<TXZuic*AHM@Kj<)aWeR
zm!Pq5W^`RWOs&AdMrsgFPPUP84wvO*xbc`&IcPacjqoYCe?k0qNc=obCtHf=q_^C>
zuAw{Uj>e^b*#|6W4L_j>3pcPQtS!5>Ztt{ND-qJ8E^AvV_svOzelTT)?AbmsTTcN?
z-Q&$PS|xdb9SfnN$gPx9`ha>C8f~CltAqX%=D|#eu->Z)yr3-5b`1z+w!KwBg;ht_
ztng0nC;g?yTUdnT&5sAZJnQUO=JBfOml0<rcX!9P!#Kub5NA1KlJ!Hptb(aRJpxk!
zSG=quOhVyB`eqkzhk-f+VF`P3?f@xSUyxpbxgP4g@B!xbn@bfyM$@&b5)MaS((>@C
z@_YKtlY5Wgk}4$C9=-m-Xv1CrZ;8sSpj`}f@Oxay=EYnMMmR2<L;DHHNlZIwIpF@a
z98Yc{85YI}?f#(GQ`Ct&$emit>jR1Nu2AA;M>XTyJLfX9%AUkU1rg<|rONJzLYf3a
zdBLka8Y+4QCM#HcG=75_f_16U8v-~4xZVyjTz0XzR<2yzP|4S@J36Xgj}W(g^wW;B
zGiNYI28f{EElfBcjkeDl3Er7q%WvRx`OT0~h%wegC611RKcMD)g&~H}xfrWqh!&jm
z`D6CNVsAFs!m<?G_hOBHhp<7Z>&v(Dl81XuUS8iBYf*9M{KeZ04?bQ3;Q$2({><fe
z{^fP`x{*J^y0p*@DDZ}rLu9f!imT;p*vCQ&w3-kuE+^kYn{Zv(D1nL|7W9|&E=LM3
ztP$Qm1f&J&N$b4w0}#lz4NI{m0h+<fOY*bz@#5)i+*RoI=?y>h{!&7*{3;hbW9<c7
zfk>=$3NhQJB=vqb_8^=zz9(YDV()^xZmaxH^=umGC;hNfM0RcKhcXBoZP`}_CH{(j
zSVJQ_+;zNfXe$=1t3XCR!8KW}0~Yq3nOim%;Rv<48~%%tR~X6igN$OT8MsiyQpO9o
zYQ}hx>^kBUaH$B%2~N0vrw1B>_=h$)6BP`jtIjT<`nJ<}>YNI38rdsuvq9jN+<@wY
zy~8|KR-M-Luk*(`9hzcAo>`Z`028+6v-#vkI$BFRVVV+wGGy9d=%M*WM5lfJkx|E&
zsoQ|XN<T)UWr<G^&yt-hUC|xe@5~=+>-6Y8jo0+x689~9y%Cl8@63~<Cj9XPo$Lm*
zM2N1<*ZB;r5TI&X%@o=tkP5f7h;+bFB$We@86x#S;ROw9Zhv#J?t{w$5&|bRDX2%n
zczJpDp5rfhY_u$SfMh?Gm_Q{ekLGOSJ5Y)xE#&KRj%1AtghlQZ;6|`=pc@VCg!zh_
zo1Zp80juv>*uyun{3)vucaV(2CPKnHQq|Ci0~sBur)F~&`BT&@^wQ{4d@0ZCJU9=5
zqZ#Rp%D0=sAPE?#fMFL$s|-G}LfwbUkGF6LFs)s9=$^*+Z|JJqp25(u8{YxYQkg!Y
zz!UW@5N$i2<*OPp3SG7IDs}gT=Af0|$<!DODT7T6+><>`Ze<L%R>t78C<x~aVAn5W
z@XTRwoN!IOYO`b}7U_7|m_$1}*!toCAdu*({B3HLAfqqrH_fG{s+Clf_4nx-B@XPk
zMhw{Zv$K)=0EkI=$UWI%qDTU`@ynXHk@*7uw_nD>Sy;i*^73e_-#Qo3&ov0nlLA~I
zRoxH0hr*XfuuOOl<posbxt(AM>F4J6Aq@f?$>-;gHdJ$h&`GdI${?EmVnv`Z*Zpp0
z_COA7O-#VlO<+Re?TXZd<(JdlMSf6tzzUB5EYWO`RbPea@ZA@ew8q{@G~#m6>3+Jj
z?d{!gBy-JVAf}=vkZ(jYEXbOf*z>Nn)RHV9Sg#&m_o3B12t%iB0i@Qn&sCKg1=`tC
zX*KIdL(>gv>dPmUX;!g!NfX|!km9i@P-2z)_Yc%ck?>m(Tl~~y0WkpA!T!SV2B<$0
zQ{}k{)ABOStN-w~@~$fkO~=O+_S}U^jFdna220WCzuoJh;f8O(Nj{}gpUA%c8>}uH
z;D3W0_s96ffFC83#uo(~jvu>gx1gAi!rjlE`l4(Tg^3h(4SONujR0yJy@B!w)fJDK
ziC!(z4afwe$+{I{d~k_ShNb!pLRk77Z(5;He(QTQg@UW)51v*od|gm6ia$)p28$>M
z`f-Z!Qd5sCEu!u$)h!7R4(%M?UHq|^p3&f?@)#qS+aSci=djN?t-WH=LFiL5mmM`=
zCQ_z0f?-bS*<&1wS$d8(H8e>a6*vYGu9!TJu_q^&I)oz2j0I70ZkmJU`MQ$jCgMZ9
zVa28v-FX+GD*R$_**pJ81aWe-PR>8QUyonDqE~YSB&0Q2&9@qfu_H_Rv<}b5oB8D2
z{~WF?{;#U$n};c?qU=rYE#N!}qgz`X;BdCBo{kq#Sl{Or2MvYGjD)Z*e)0+C&0UaF
z)(B<soi2Q+G3uw^l++B}q{)PL-n#DA0EVJOJUm5zNBLs^`XO0ycvvh);nBE%U|{DT
z`<2h{q=4X*3cj<<00cHbDR5|)jDvYbVb5~4T)tv}OzRM1&dv4Z`Na9G)K%d-pk8PY
zmh>WZoT*NbfdwsOE{#>f1yi4Ka-o`3R}MV?b+_qbeES3#WeNO)jHc_EL3s=!l0IGW
zE6}`fMyFet5YPLcUa3Y-qr<{G9ywJ`XZ&X&@W2NWtS6M_*?B|Cq6ZE{IsZ){mwB5u
zpDzh}n1Mu-BL3$+QklKsLtlI4EmC&$gzu+F$;~IiQi#xWv3Oj3lg7H}b_it)m6FbT
zc;Q-aP1i+Tz9z6pbx9SY0nkMPG=L0uk0i}H4rH3D4i5w|bE>*EdMO_?^U3pZ?u8}w
zr;_`{E(7dOv1n&B2;L+;bQLRPt9#sKo`Y=ncgkssUf!2q!<7WPS~kERqcukMQfm4Q
zLeI(!)iKaTs8ebC#Smn9&Nag)=OPWPMymDFqteNaiaR9q;#Io~&9a>%bZX&=JsW48
zZnoOT2_mxv)J6VymqkFb=ySI}6l8_NjfEPI01bR+LwM6B_<om^oz~5)d-wD_n&TCi
zc7FN#wBFwo?t@O5u~m3)2X>Q9zVCiSt;M+I5l7{p+0i5S;qtO~-M_>AN{*GuBVt6~
z1{#S$eb^kedt{afZ*bTR?u;Hptr}*p!1UpfWK7P*ui~!ed7&t?g0pTi1V=2ucI|rk
zgwwE~K~*nmup}$%4n_kpIMy_$=;hFKGa<FFut?YzDRkjI!{L>v5}y&?#mHW~nJF~9
z#O@Xht`9=rzmC^W-Sv8m5vc_>3`;3N+UI!c416yFKb5n%mTcU}gV_k33g4{Zt>N~W
z<5BlXP&oN68yoo@iIRx-Ke{vhLhtn?XEbBe)9L3F(jTvCs!JR0j~a8iHk2oNs6d)P
zIG)L3YM(0(#=3o7JWkhlGd0u5J1kJ4k@|M&>2_8&zm_ePX8(TC#FBz$w_DATuba<6
z;>)E1P7L2)xZ2-#@)e-xWCJy;B;7(vrz?sc5k;<XLiOf}PlDVJgPGC&#Z5u?)DLiw
zh}ad*JtSuq_ltro`nxI_!JzlIk-D{a%FEMy98B40^123ODP&)K1Vuug_q70DK;@do
zjW!+k(kb+=9gf={U63+Y@no<t%tZAaMkLKwK<Z)1VD+nxQ$Sy2l-@UB)p7`zWjh2x
zZsi6!Zh~LK`1R44#}!pZ62RudXh-(@BOr<sKT{!kX$PVN`$zu!g*x|9fC=pbdMIJ7
z2m8I7DOHrig(R|35;_x6H147ctt^;!4a0nyVjpRi#A!hZ2G?cZQrmQi@lO!6<TP}n
zM-<=@0<bAPS|1>ue!j3hGkmD?qjCL~Zdv5LvBO?Q8;Sd76E(6IvY^cQst#2iJ;h>g
zrgQL=P|*tPzOh@o#ufWxO)c3n!$q9}s#3PF77k{qL8`9BH+=$^E>bomJ079Fzi~6J
z6IC1{)y46r5^m0^_wW7Ns+$wKqUjy&rM11h@X`&g6$ML7w-}aj`|i~cJ-a?DB$PNT
z`gK8S;L=N)K%!W^E+fi279#234$Gc2a2|c#<ddAek`Di1j#}Z<Tn-GX3Nzr(Ka$YX
zo7F0(MXnC+mhgV(8*(WM0@z%3rLP`b4%l-<<Vmw9O}jePO{dO8_<sfL5%DCPCg`cb
ze%xa|M>CFwvlZ~u3P1}tbd3|SU)w=y{~q2^Mtgf-pM{H!Wq*2HQK5eU6DX(-@vIJ5
z8U|vguv_fb14SD+e#YMg@S*z{Kh24SgU1eU9!BhEKJU_r51&RpkRNcd1ER!46kSFc
z=g)`gxfs@FV1$5V_VP31ASpP>KFKkk5l+O~L8SF4{mRrE$YQP^m?A0Y`GBdMDFU}h
z>Bs3iG|fQq>iCD*IVZQ_>*J!(8;#sqNQ)#>MISD^!!GX_Ds1l=YwjiVk*ex5=6-?|
z#5p}60aCAL=l8$<M!yTELA6mgyX2>Yd7tjC>6E2AVfH5~ra;+mSCiGVG{!|jl`orf
zJDK0Ed=<)c;e7cOIT+2HX9vqmmg{&E{sEos2s7Q*_GbI_tlXEO#QT@?1_pWWPC@k8
z1WzBJ(S!b1?e;hnNWRFh*4-BtyYDK47cfZ$RAm3^LMnGj`l6i{fLyE{*()sr+jv5s
z{w=+`|Lm6(>r=YgzElc;+#FLtwfd{zPDMnQ%8S1`R!Uac!Fj}RF~#Kg%1(MR9VAXO
z*$%9gD-lgq5{#a{gATbXZ;M|8qh>Bu7>}DogeD+rmkbX5KcA;BKggD^R33_*#pU!1
zQ#WB}mo4?F5>RP#hGnWgq=4b4H4Mk$sRU-BzM;vE$RFc9X$KW85{z`q1~s)F>_8&A
zVIXw}5znu%9^Xm=K7L$Fqwgu^b+y3gke$_n^3>(OH>}rJ9dZm1FLA+sz><-4qm(tl
zb>01I`0%06Dq?Jg@Hcn7T)%7jk)Z+|d_2GS<F?ey-p1l{^U=J~$P~Eb8<K`tHx2oI
zOr&cvU*&*W9|LZUU1dF(0K<0~Fmy?xcm(X|D;SR<ZKNNPxF!9fKrsOb{KscE4lcQ@
zQ)xu&bG_&=gQC|d8MmmAl-9g$8K}!tJW@H+Bkt!-KVnz2*_C1yvW4+zMa>S}ps52)
zt5?iT;#1P+Xkq5rjuFM6p!&V)0D%u^l}n1F$?Yo*tQLHe#rxM>=Kmm)$i|%(mdMD*
z(S~s>V5*2DbZ`+zT?@U>a79RtX2hUzw>o6(tc$hRzh>K7)%P3<V@%`ZTS$&9?J{^W
zv)`<pTkIPu32GbQ&;!uNi|x&vU|+$k+^)7al9FW&EiGBJbPmK>04QLjp1>L9m3zZ^
zXAeAj4<cK9u`WYaxC<?gVZG=bBRZ8KM3LKULo_mFY=b^{=w~~eCokd{{@|TU<zCT}
ze73Fu9u{TV{@;6!m#n1GnC4)2L62#_=*5|qpd%Ivjixv7!(B&lL+lC@BjGv)2pPA^
zXkAaLQk+i8BqL5JiVIek`jF(DlfqQ#Rm{$3g|q0DLWp>(eS9z^5I})S?s|~p15A`k
z)o}sDKu`fi#bdQQ0(wAU4nFHNRhqfE?W)eSIXzgouXne52a3SsqWt+FAPy-u7|cS}
z$!OUWG_rM5pK`^0fGWo`8xntV5OlI6|Mq^%>ZoB#KiA%D7P3+TB@!dmqpE`n$Z|8A
zuJzLVb6l7yK8phbp(p&IL^ByT1RN*z9q8FPX=DZ8=1Z-$I1Pr^uLaNoM@kacTFK>)
z$jG7zZ2aX+isw?6)xkZfXX;Hi_I^UHwCt-+1%C$}NRJ`fd+#_K+^DPy?EFFT$9$`u
zGky6%)DWf}@B&TWurcfzye(J?RAN|=`CT<1YMWrp>+UzyV6)(U)FQ>M<tVg9Xr`tk
z_Y@6gZ>Re8+S?cg(*t2=HZ8<lbngdxep20}ujbrOYVCqDwjF@-V6c{L@s4&(Z+7eV
zQUg)H9hGcp-P=z0cA@%bqkb;bi0vS}Qa>s}APj%H|IiA8w)-1u&DoR2M>)CxCE2><
z?h-xheH+v_S1;&>IbUHo=>ibnh2S!i4NAb?0-Z12+C?wkm$|+BEW=C9L5L4h2((J~
zS3~VU%_8p|kfCG4#U$^EBnxt-;O-lN-s}<@99zH=wWL#!Eiclj=h)=j@bKyU;y9iz
z7hjy`h3A!-XsZs?(-MBYOdQNIlE{IgXGA%^GYPLw;Z1x50W;beeG>|h<!`xBs!oY=
zaq#p0a`OE8H2tkE0_W7^cO>-TB!E7+84!YI37cY4@IxwlS<>SS8C{eW%MrB2_cF-K
z%^Qt3W;6deLLts3)`$P-*@Ux|tZS_EJtxr%Hq3i%`Da^U^s3MF!rkY#o?<(}kTO>G
zLK1W{eRTz?@WJzww{Qjg_kr3JW@*uJWgSTO<E`+n;JAdJ>7(}Or)aE1*A1z+!)s{*
zUT~XiMKUx2y2$Yzs#$v9!25w;VN;I0E%X!J_0FNAB}*II+=3DXoT0EhMKWor9kLY&
z)_1-GAbw+)YV<0<tXlA;OGH3Zqjr&TT+B?ky09O~6-7=+QuvVRo%fx0=@H@;6UI$d
zT@psHbu?UU&E27R-yG|F;R_*Nb##SjE}&o$;Gk6P-O5)dmV3I!J}9CsjU5A|a)^rK
z8`xb!y4llv_9!{QTdi6AA$F+XNNB-PTSp2f#;NJ|{kzfN;(<+FxF}kers{})*rkpQ
zO`>W2%=30BtdO@_4Z;O;Y=t-17--}-oiA2+z8&Ug2_(|TR+eSny2*;EigTS8SC=3e
zIqwyK-U0pp(cRL~{$ESGk499xUdoMz%mm3${0$4j)phqh9j;w>^CBAW5HyvNTvFh(
zM^zY%^9Y{(=c!~qr|S%4COZsq+DURJAx7{2_AZ#VjRXWZhX~>vh54O<a2z@8q`}<K
zOH++lO}AkjS^7<#5WbmvF+8<tGOABy5GU>G$4AJ-<#aa2)Ha7MEx1S3;U&z@@(UK>
z&p*58{rx*<b;Qzs#2!c-V$@PON00}w#~ZV%a^7p7e(vhep>6*uMF2}(jdD<Il)bkK
zlJfO6v>Ht&>r_0q8C$bPPw0TblmxV!%%M%U&q6W~Dny>+hV}k~S;nJJ<i)CLNZrT&
zaw~r;tT+cYQY;3mS8#H9`$><3;6fzgIi5*E8I8ucCG+{C!fLb8_J`NkU~>1#Y>|dc
zXf$UFvq>;LlO`|xb9N#+HeEf9c^-NfOaRJsMn_}Dt^h8YZYv0Jq8T}8;*q~Y<wqwM
zb#!@Z+5S1Hu8u+48vR<9YT>+90BG2(7bn8#d`Hvi8^svIGX#v*{G)$@7h%g$!I}2p
z?)0@?&fQl&(8w*VtJyA9lxcHhAIlm1j^cC5MP$wR>{%6uaRvw{QRZ}e57j72W9NRr
z?>l&-a6{g`ZI)6Fkygg&P}Q!UliMvfa3Ru#z_^!bcpWH7a;?_p5h=<OIeIt~BO7l7
z+|6QNHzc<#Pi%*{XdzX*1#1U8UfD?p&2v8~+5p0Y6WvaJnV0P$S2{WfnSMKA;$%I4
z0x|e%k?J_6aYXaLL>pOHb&zkLrKEKZC(rI^{Q5+c1uOgh6>xq%IN6+8(J0FOSM2F(
zsxS%!#Nx8SA`zf48~T25cQw4|vJ?<)u}^7~EQ2{CA-Vpp(}z?VT8BJNIg$uMgz0vp
z%?u4jV_+i6ERBs;bPm1{9CIL4=?(qC4`Ab5hV;z)479`fq|4w+-Ra^;UwhsTU=3VX
zWPRyjBnL_?(!iB&Kt8(q=)wBi(`y5txjMiyGOj>EjfSMKwdQ|na@sk%yY>b+zl|g(
zo#a=Fi4)`=^2kvp2Wgz)vj|>k%}pv`GPTjQyft<i1Rff8GJR&=1w5YHUr^4utOj*n
zFvl^A+`%bWzi!ZH$C7uSKT62J(0W*Ywik=Vci7HS9rKXWd_l7JOFcSAHy%o?Iki?x
zK&4tafDDPUPat*RYpXDVB6x|KH~4~6VZ=IFk3)P|+x+h=g02+?X(4pJ+J+P6)Qrd1
znW?16R#@Rnu)6@3-XNHwv2SqY=_yl>#ZUNc*&GuLqzRVl3l!wmE&`AT!@7`S*@7k~
zNYx$Z-xLwd(_sY$ox|qbRMGThHSvp6_VNe>U4FYjAggNQeKiOCjU>fIU6QSYMi^00
z&`@rGr}W7h#41pE&l){qI;cz~PPIkVRX))^^3gai-X`Ht`c{A$2DlIf;aIi9SC0E*
zj(bb#+!Z&^^h2kMZIOvDpXSpa9ZoU~|Gnw<!Nf1DB51t|?KHUpRLn;DIR>$GGpjdF
zA|q3UPjmd=<wI{GgX7~M&L%NBvkmRpg;s)ebF|>b%{lCG$Q|OL6yPtujauhs?ELO`
z3aZTog`fhV(*;@t!fP}rPbB*GlMn#4y~(Dk(Fvib6b_t!fQO{xF}8qf*Z8zvub9>o
z@5lP?(ZS1P8xN2Zoo13UU=V1q{bPzboUDzV|8i2>lHJiT*+o2@yIVMxXXXprXdSRy
zh;8_%yVg$-SP_(ka3{=fLGOEitQpnSY7PCmtrsvg1vT<FG@Ok=mA9PsZGo!O=7&C;
z&X8EDF0xeMcz&2peofF%kJ4#&TLXKjACESpk@Ooq2>RbGV4*E8v1!m%ZM=3)r3u?1
z$cOh!r3>rAv*Gzv(qqibj$~D#H<&ub|72fSH9ju>8ULXp#5_yp<+@i@J3SPLTS2}}
zJG^KFdR#sQb`=gjJw=~_O0|RqKuZ49vO0D%M{_%auU;gcvqZlnr3T6^87&!eftXL0
zez}JaBk!>uu^~Gfju~!;67ORrmzYnF&yVuaQ5#Q#;fWlA`am5DW1Lv6Nj_T0me>c;
zb0EAfPqKOcrl1Y|+Cs6E=0;gqgNCt?b8)9q^L)*_w;jqAW3mvZsd3$X@#|9J`)pfU
z6Ec^*PnwX4y*m6&F_>@OvlLY-y$}b2O_Cg!C_|`N`T5~zOl|IOl=MENKya#BivqEf
z(>I8D;)uw_I2?XLWcDwg5r#3$cA3Rcj3_`8;T9kEZ>s}VR)3Bk=WkVwKX2xv38y%Z
zM4ouYd+{{Ee81gnBfkn8L87)CDX(p@Q%duqOV^}3qVMqTOT{^7guD&t4wXbUVj)um
zLh7VAfSoof_O$z<plSR&Qha3s3YU5Ps*ICmO=7x1_Bc9pv8UEK)1l#~kMpfdfP;($
zi-C|$>h$i1sC{XnEq$E28rtW&ca5d_giYHb<LRx`+kYE7sV%&^C7YlI&^K@4QyAS8
zmd~^%Ty75W%8pkh_8IYMi}LxmgQ662K{qQrHXbM>7~kX{-^%D~$#w+@3m2CwwgT`9
zA}nyI9oJ<duJr*<s&GyHTA~MvXR#dSRZ@L;m`)ubNphbmNKCj$W5=!`+Q%CS%W7`0
zqKFbO$>GO+BIxrn=}R5outf*q<L#;|^G?&GHQrHL%`op&8+0;3f}l_tBoy)cKT+`5
z*xvM&-0ZLGdop3<r@@c6(j*|G?q*NarDpq-08yy7+l6O@VlN(^o=3Ht@HN3@*ov|A
z%Bh7A%T1KA+Fbq_28)mqV=wP0cvJmdHl6<I4g2tX^#~7e#|N2!=dV&9$Qm)r%$N`T
zxy*4P=9j8W6y?}dIdLGPrDddt_6sg}VuhJZ|C*v0Bn1OHQNFO%kgA#yLw9rk{7U$K
zE*k%Kl~f%_g&p#~E#;FfpDF$tMm|7U`<V}klsSIU47!-_PIbz3@74V0^yzlRtmpa-
z++ajy=0~T{f`8)CzKh4zGyF^m0LrKUwM0m$`FvV>Hjf(uZgrFKzL6NT$4u7!`r#6;
ztZ={NXRn1uE*KS+ug?z9AO$#|MHWE?vkzosjXCDasY{y+SHG}Kz?JhW=u*karn<3;
z2nk3R!lh}C4fY&t#C+qI5qgmuN|7Db0$0N~`W;@g&DYoM6R>N!>_l;*&YsdZac4k4
zs)#L9(qyj9GyE38U@V&v16zrYo+qI=IxZsi)e<tM&cVa$(gOUe)w%}~BsJ&EJHTrv
zS2*`NPm}H9WKwY#oC9Sz7cJA#lDX{)e`qQ%&X?8V`a>_n$x!*x$=|t%SPCSGTA_{J
zlOrZRK5j}ak!|LVGqLC$3iCDnsvQ#LQshcn<70t=l{ap&b1tSS+{BwFUlnUvw(EPA
z(yl(T7jD<v_|a}*?$!6aHoPt<AEC1z8TM=Zfve))yi;j_r@Wr%3?iHR!~DxvfTf=P
zv0750E=Y6}ItwtEaGa<3spRRxfUs`nT&7?4KQywsI-DWnN4Vf{CCN3B?_$ypB#}L|
z9SPVVe1CHw@P?_LQWwYIdStL*VuYIwbVepkl?`m2zf7<~h((h%QC}-slk|yNIBdWp
zvOCeCZ!f89+n=y{8+T2*QA{#DlKX91%j>6)*NiB3l$W?qD6ETW1o->P=aIi;Wd)x#
z2?^J~G!ob)x*%J@@y3El0>YBLJ4jYdKT6XJKvd6AarpJZF*CiO8B=`R4g+A<;;NC7
zIgiN0lkatq1w>lNf$*YZr=4})8s1+t<2*%3o9@yl%@W}4Be~@A&iB%#Kz9gfX<?gm
zH~r+IVtzY_neiQi_r<pPyx@myxHm4nZwbeV$H~}gBEO~c99?2D*d<5$^A%)E`68DH
za!{YoNhNXgH29pOb;&UtM~ew(O=_Pdsn0m<n+$`hG!X_ZNo+vt8@#>$(nggt=~{Y<
z<NhEW5bh`A5xstRT_Tk#{naC-0*J~RSE#Frgq%9Cv8Z9+^@Y2Mdki)d4!m9AONkSa
zxZj0tkzBU+7L?h;oS*|0etU5(&4|&$;^{(~C%y8a_)0bkyq7rD{9_{A^d|o66%duw
zQrodMLJR9vD*8XixfhBF8ZV$a6aq<f6tC8W0xAZTD8EPwVH*s=n}3JAsanO%9%!*n
zs#@H;$ZLI?zrnrqcw35q_XncVrtacU2@q1V=#JwRbImv1KhcB|8E9~{`C<`o+HV+!
zTPsF(2(uU}*%DufbKA8;lV+JlC63*FFn(=ZO(8Wp3XlUSkdWhIUJIS3ba_D)<JY#O
zUQivML+}Z@H1b{{UrLs^WIv^tMe|wnxuFz8yoJ(J#CykzXmvf;U~WG>EyB$JU7fh8
zB3aT(uQ*Vi)P@-Q(513;jhgQ8OrLVVd><Z1YxC5<F!OZ{=p<4l1k^_}Uz6p*fM!m~
zP7k=Fa4l*=b#fZm2^W3G+UjW3-Pb_Nyhvb>o57m7_y=@@*a$l+4UR@dY5?9~0qhhE
zmUdMxykUOhN|W23{xTJ`j@hIoHPZM|n)$>Nij*9ky;KmI)gY-9I>+~!9RVH*T;^%@
z3Rd)75*>nzy$T<1`@ezU+QD;VbzMh^MXJcbU$;sMz1LZl!`6S_#c?a|skf!zhE>*w
z@^z-zSmZ2Vu1a+=?U?{G)5n{=SS~8LJf~b&BL9*El=i2sK-wg<2CXqN<b0jc4+13-
zt->PPX2*_dL7ddO$YWLB!Xs`~NYNztfQ#V+qd{M}!`uxe6hLq5@aue|aX*uTrK9DS
zSou&>$t^<6kH;^>x$xNhBNH#5ZB|4L$gCQ_pDrF_iR_n*tx-zp=%4R*@kvk5sKg}a
z;qNI{8d&9giHxE|lMHS;-slr`dSSI)Z0Gz!#CFo~N_R&6sB}%hT%grUt%w24{!*GX
z>ieG<YLy<GdYguw2HsvpX%x>2eNj|5Z{_@nrdQS#4?>L`BqT~3Z)fIBEH=K@k*_v)
zuTG#Youj#03y-$+RMn?+ID9&3{CtCLT}=jXu~5>*8kZi|$eFC}ule+usqaR;c?a5F
zEEr}I8Df~_wDg*~XO8HRiTQBwCtpp>sQSeS5T+v(JPIP=+Qzlu!e8?#`Zjz!u;M=3
zd3x!ZWHjMZxW9(Ycmb-2)^QZJ65?hS-_u9wh^x@aCW(ebTq{F^kJo@{lxMzJ8eWdy
z^~Hzb!%bhHH|f2?LiF&jduUE0HI>&3)zsOg;iP=G66ruNeN7v7vR|nFhY~@jSc~4%
zmnpk2k>T8t0s2<J4F)yGsuD(=fsh=y++BFU)J^tLjX#x0;d<9xzm>W-{LMz5)MY3i
zgyzT;QV^-@tP+cY{CGY2+HwX>o|=?)^j4bJ-FyX|@@`?n@%yf>ISd>Av*-2W%53S#
z`!6LbxMXTwg)^8-kJ)Cu1ab8P-J2t(4+O!^c9i_M*U~M=e=3~H6CAW>0am`ckQ57<
z9s$c|y>2&L16SEj5h*wc=sMXCNeNF(71W9dr~+|E{`ga@H*4$PAD`yq$=VSPB`>)n
zn{B3}>R|E%<y6@rc9wV?aaq&4I3gFThzq3Jw>zdHsa}i@!z+A<i@fnaAh{V_cRz_A
z7DduF69%lUO{C{-pV1E0?C_vT3ZS0sC3dBb+^w$<<uKB7dqjC}HIhd9=~H#gUZLcy
zx)TZmDdArv>vYL=qRj?$uOxmiNR@+oYrRKzw7gORlPh1~?1j7p+$mhN^B7qQHGdn8
zu#pXU-ggh!$P)DLWX$NiB$^%>+Ef?lnkq6h)_-swl_>%rRHPKE4mM+!*2^ZXM!E6P
z6CE^m7qkIpgBb>sSq!W?77^>pMh7mh{?<pxudN%#g7x6_(YX<Y@_LpHJs|4XIo!WD
z_9S|rxS}ZYYbNt|K7bDPd@=s@2wqDD7!*R5J&VHU0Nzm3FQwtP&Yt`JfrPm&j^JFo
zr;8h#Huqf0D({!5Q>mbk&;pvTYbrXd6$QMS!54e~&>IU%Xs%ZRKP;ndgqM8(p~w#7
zVqy?}GxrUyH6D-Xg{A-c-4B2tk$owjKeMmG2^?M0C<<Us=9AH*)+kc7ZB6z*bi&A)
zGIT2Dk_;v>MO&81r}V;&e|DeI+oWPzLS<B^*U-3AqA0xk?;+&n4&Gj`=HRAteis@x
z=F30<yEd3e(vQ6chCL%JnEut`oyWu^D^sXblV$Ff`N1NtqvP*S=NZTx<?x7B$D+g_
zcX8jnbrY?kD=X^~0mDC#l}Oo_cO{g)%Il*0i>ytQdhZ8XDY9I-vqZ2+Sjj{L(O3k3
zBgF+ZzNA4>Be_+;W!{f#5qIxz*CRB&IrEz?dzggAgbgA2Pu|Mh0GV(RJ=(Wp<%hJ{
z9;z^I;r!b>uhFyXIgt8(GI%*hQ{r1Q-X(q8#;n6NM~V9W3j5AX%pCuJ&;OFHcuS?`
zb(VP3Lx#etg19+)1kJC9%E8uvt?1q_lE1`CBxsK=X*SazkPgdWMu#SXPIS*(N)s-f
z;pl;Kt5?E={bw+sqAdU4<Mm@O>PKW|(&0R3nhiO=SY|!Xk*dz^3{b8BmX0vA2NOu&
zy)J*i#*e(P(oLZ!$+&CJjT~({UMzJ04<X=8N;4t-Ndafpagto>9k_YEvmsy>b<pqg
zm(h=}puXR{j3?GDk=)J==P+=gYpJ(#)manBdPC5AU>KjXeimLQY!$2yLG1Lb!=726
zP<_AKjpbGVD%I{(J5sj7qjOvnI3|b%EKyb)YKiHOoAZ(ZK$fvSxPU3yPVc{hix1Fr
z=j9=^PX*1&s<;~{Sy4;^6GnECb2#q9Bz$rsKU2q%^aB78QuI^lKnbB+E%$=z4ziNO
zF|xaoj9SCh4a(7Lj2=T++Jde!^}~+#vLxFT%kdJ}wdu6Q5Aq(GQk*6R1IJFmC(K#p
z3zP5M+1*l?9vSDicTyUB=aQS{*YwX1=ix!72`E8hpu*vLKM}dX*`ItD#!3zwe*;KV
zGxync9iqQv(0l)B)V;V6KEPP9rzv9@$g6dCP<l?jrmZOMWdMI%tpG29!$j6Fd`*y^
zba;rPe2FZhC#7x0)&m$!Hj!kh4X>2l!Xv???(MtyF8=7=-uA9(SW5QEuWPHD=A;_@
z>H{FO2mJyMlm}=mwit$Few5cZ4}z|ceYr!HU0|WmII(4p*^qkglwN~hX}f%&RDF6#
z9|TeX;eYeSxp-=Ps0d_9;amqkH!>D&{O!PG3#4Gk=@ZUGhgQ6{VxT(tZ>#4CYQVM*
zhU`APoMXUp=`}1@D-g^6)h*Eh<l51Xr~HHj3%*5KzbDcD9hain>KLVnHJE)=ULsw1
zCa|@4%$u92S|wT=1H$h#rAiHVbtfI^C7Cjn!Yz~;tc8kzmpil;i$RF-7Mrj08EC*!
zSzXOWkF5D`T(-!}n#h0h?XG4Er+{1+@04*S@?T*rbPA-EV+@J1A>C4P)S4E$(pUN~
zfQLda{t3t+r@z&2&d>e9o-+vD3XjA^!0ESXzQ_-+h@_(<&q7sFVhsZ0lhaio=OZnK
zHdiYnF;<P_1({w!#EoqEi<<e?=*$8yDbGQ6|GEIuM#^&x){4!{&ZbHV5|~vj+9vjr
z-4M!$bp;7!q=2{n-#iiE7%){$ic$?G0tYiHAtd%-*dJ`qeBYz1m^$`H>&n4z<2T9;
zTNT|;hu)V>D=G_z)#r23XGztt>!{-J#0{<KW?3Se?l_UOssVT26Hy%-c6MGPM}7>i
zD3{HI`%IES6R8oMT(5pyqjl`L@pADPIVT0AZlvW-v<RH6Uhs-oFQ(|E-F5VZGhhKF
zi1ZAO@vQ9SHfdi0s<y1TsZg6MTG$r5gu8;154NQF1YTgeCxPb*3+LvO1mEIgBfFo7
zlulE#!a~id3-s{eXEB_CR`1VTkwFr&)v5cjb#s$0C4ZMF7wQs3`Bk-WoEr~FyU~u2
z0P-~RIHZ{EYWUbiW>TYn&!_#x{%9<YuHj__#XL^789LL39wzHv8wyb~=Q8E&vrReM
zxtq(U?Me*ES6J&cd5)G}#wlh(1(|XP00CNAI7X6)g*_+9kDIcUs=9cW;oy=%p+mXI
zhNQ+vbI#gXx<_R3Qj?e0d|O@K$+K{y8HSj9gxLFvjpRFFU5;ML&zC}fO{xVdMxyOn
zAla%yA`;>NcNOq<2~T*Y(go29kPBHN;UbUzGlKKMFK{w16i3m9)Uh)spad=kia7Yb
zsS~vwax&v3g6~mG3VfYHJJSJ(Cb4jg7P%zIKa@rTUtXRGA+`zuGXbqc*zbs7Rz)n-
z#Pq6T<CG~N2Qj8^J1Dmg)fY~eUgDc7*OcD)8{k%EXUZPYj=I0n9d=w30}NbsQ9!z%
znqdQf7Wn2L+bp4N7uUT&q2V!U$PtWO#L#{JzITaiZ1?<H%zj}@8%NSWN*E8Tv+{7P
zIZsurR#Z;evltCcTCB<tP=XAO18YZfOd_WHU|Gp@LDWv+0&U6?5sxFbFcqZH)h@Fl
zFaT-=3y11c+#Ch#@nSxq2P*aj42hp4!NIgZ@!f3?q)ztuYxET@%IFVlahmm*;z4X;
zl5eobS2YwXCfRFnDjac#2_Y*_LP!7I=yP+m=E7s)ak%VvFCH4G7uo{dEXfq{7JKK3
zt+*WAj})s;P5T8rQvq+;=<T48c%XpP)q8KGNs{C&vQ|ug&p@9IfD>`!><iG@qyFX(
zj4<3cVjT{N>%mEnp|wzfoC@BOworvR_r-SxQrQ|OfK*u@4@gbqR&%Lzzqoke%`M^s
zQyg^XR=6|-?jz4jZd08KZY09{iTK1vz-8~5WJFHzV_}$6p@M$jd$_RC<lzfKN}CQY
z)`ZgKYic1Q4%i??+O89)2`okB!L-PDjNQNSOvjzteb;TDH`A#`iRqfD4rRig$)t4s
zu^Wo<D`mC#OLO=j<wuA5O6GBw-p^m;6L*0cm1*D+4rMZ%BjNuvV%|4*DO&pofP^0Y
z!B5+z$d$U(^4xK3p?2Z^9yyeY_rPio({-A!_>1D|Pz20VI~ib#9-NZx2E-lRM;ugf
zDnbGBqjlJW;A@eI6!~10%Roaigx8~OK&)R@&rs9^QMXaIWAvTyJU9x8p2v|i-@D>K
z@-~$Z8A0dy1{jPb&7yOb<i{>ivjh>8gDd$?qvY17oDV?*$^|ZNJ;UqllpWu5e87ya
zt3c>f%0t75axd<lJorDm7<YWXdXbryOsJ3xRUwt>l&#;$vtT-SkH35jD@ppC3vx9{
z9SE>^S;RfZWTT`!G~K1+WR_m)^L{9E<P0&AkK2&b-Ev4R2S|{;N-t4hi`07&P4QqA
zFVhf+$bUmhB{x#jL)PL}i4$ehz}_MuFN7fgAWyeeCt3V@lvd0{+A6~_^qyy*NB16G
z64^e18DxT*LP_An-Ct?gq)B!+n&6y`Lf?AdzJ<(&RecQ=c!N$+Mj|Izl+kwlg?`kj
zVD^UiOa4UUA?dE&PoTv7%!P$Z_8(Gj2`gvzi>3iwvaowe11i}6w0WRFb+KbRHRLM1
zsEb-?*>C?uYh>{r+UlfEf_E_EO%329oeq&?O<aQ|xSP$ea7|VP&Ke`v9Nrj}fh*8|
zIlotl49V%JQ&vW(hrTYR@Ha0mMxFnIgtNCGwc}u&PQGK1(rN+qI7cNBe0t^7pz1k9
zL<mY<NF;Fx_6XH2Dt8XZB^F=xXe4k_=z77yPNmhk%UsvN=l<>B@A=yQYw6wN!~XT6
zIygEy>QvQneSCa)cHFyO9Cpr54-Suys;W9T#NQ9Z)33e0LY9cXcY=Sa>QnyrG4iu!
zb=!bZy&ghj0E+?WLHW3M4;`qJg<{8Ex-IU}Kp~GqOEvNdZw&K=FR7&SytRc`?2gQE
z0iW(NTd~lcV1wbmZcrlLqLl-=A;*y#ndlg=)v$v{8%dwyH#+Lkh42dIP3Z%dLrhb?
zFRz0mBaWmm@VShuW)@lD`)B4ZQPdKR?D(hbh1l8P7A-gz#i@ZzBugSh-54e&v@zYe
zV?bkX?tq?}uP`1OuQ_;qK_|v$!-SCx*)6z-X3yHlvdmPCx2XK$SnQM!+@!`Oaw6fi
zqa|F9++=iC{zZE@t1;5xfklxv_56ssM4c-}_HHrmkZ%)unifl09HlvM_yGCcn)318
zeed>f#rc~#KH&HcgKGftD`bDQ97R%)`}K4-|LGE2c=FS82@+_%b)s^b!3Bw8G6N$V
zwW#uPLTkCytq!~lazjB}-1i-uy}>#wS$^X-pJuC69}WO_*Ub8W$R3os=L^7miYs94
z3UBb14ftd`lGS_jXT^;d!<z?W8@f5IDr0EH7^bI6Jt1XvE2wX0Is*viEqaeG`zV}0
zJu_{yTSBcOB%<xewjq1#F~|?Tfuj$`N7!mokxUxHl7H63Lg`4^$-qWoKtN1j+L8SD
z<!ZXYbZee*SUtvI^T{=I7dzwL09$b=jU7^edryUBJJ(SuW2w)v|NS<dD|Ck2+^;qO
zxV<4;7L6B5w@8gQLH)>#NiAmAr?dHoB|t|xo!rd5p}64<<%4c~a;Yo8tM^~NLcMK7
zWSoE`P!Q~t?DcqtO7G}NGlEbz%myT<G@kfb5kq<@Bbo?<f{e`3Atoy0Lt+p;aChx&
zjPyaSzkH(hc<3IFO1#*{+a*rm-EdeM9o&?(w2|Gk5z02>=dEbwB>A^%Be209z8LCT
zol%!B$1#M_kxuGpp&BfP6B%7K;Ur3|@@6~}oZzm<!YpFTRve>}Kch&~QWt1iKwqK6
z8(Q+BVj+nA?)GkfJ2nIX1V0X4jCY_afB1kBIOsMx&f4MOQgKsZ+F+_VvU*!m3}^3O
zTLCns1G&EYSZXsqbO{j@oj77DY6**MpoR`e9hFwr`*FzQ1gvz4ID1yr*CnWcUY@3#
zNiqM?;E%xUg{m3E<hO<wh?9VenO1ZTnTJzV!OX<^NRl2Ll{kI66D_+zP!fzzj2y?H
zpYlxFDXqSw)IG^2X;?g4aV|!ap=#oiiql{(#!Dq9bCAXY2~boeI%LpEUs_u@%=(!>
zA}K)E>K$#GR1Jdm{o>}CMR>;2<-RX|I0APdMu~i#zW}<3zhcfUV3Wz_#$3zCz$UW4
zes38IHoc)AEen^upFuH`2Xnj}OL<^-V^+r1)-XhQpJO`5>Z})(t^myG;5$iHxarG8
zLPXm#cbGqVDJArVOP^~Y|GdO=Yb3uwA3pT1;X{ZkT6mN{aN(%U#0Stoo!=(K(nv5Q
zqh8oU*jXLN2g{Febp&tH{e)qa8~bVy#X~tUNQzxOLV0yYk|FE8-492LA-E%A9~%0@
z<E@e-Q)?#OXUu(%0n}TdZJJHMy;?WHuMB0s)Lmr>q}gmVoz3WBxtU^-4Xru9xBD!s
zu|+^jfV^0rl8^qcG#kYcHJ3WeNwOpunZ>qrQI&TADm195p`3%`<(?(;5}jI%u)XPn
zz)2@%bgzd(ujSx>B1XxUx=>VGck<r3V5|3glK~zokcJG%qiDip$-GOL{G4_DO#FSV
zfDYmq;mK(Hvp)I_RF@4<%kEemya#(`E8Tk4VGyLuv#ZtfKi&KpDwKf|ktWKK9;)Q7
z94Khl0>UlghhruUzrzYK@hGE;Eph@e^M@f4QB5mPzK-*HURoMtWeFbBP4|;aH%3O+
zMO6}ay09}j3eIyxR4;vI>4(!<zF0j?1sV_QZblouSIi%l7Q(9zhjZC;mC_>Td+tl*
zgAeDfL{4Lwo)FD{ER^iA96Xp_{9)X}8Fx$-&Dn&>BkCeuipZ59w=shaHJ9%N%}1TK
zb3q^)Hvcu+Zo2(4EG&Dd0c5rEH~6zlP9-I3N?*@t#r}e~H|*c^?mjZhP*^d|KSOW1
zVqf7?-HCwM091~gxm@Bg5cqWMnES|4!AfkALWcuyPjX^M0oO;mg_;?lHG0Wr=HfNm
zU;zX6oj@c9Qx@kpC~TWUhjoTN#JoSc5Yuc?EkaY&v9-_n&#MPAL|1cosBX}xB!CY5
z==7RdTp||nm$M$nQW1{=90y|UpTt%Qsycgk67LX}fW#48l<q}I=qzi4R<jNKGH+zl
zHB=eLYKe<q*wUb+P#xQl&zPR3B)jG{zAhLUW?xe<nQV|`nP!3U(3;o8Td8YE6ImB9
z3YJD=^03>#|Fb{5x{%aTL3;JPJO2{j866~V5M4i5{=T8Ef4I2<^Im$+Pw(LIn3@xZ
zf`8Eo4=BSRZ$GEAOT$JqO7{Czlp%kHko4{+P$_wNro&d!!D_aB*lezuxa<9#Os7xN
zrwb@!wiOav`Ix_9YVYC(W6405h-Aam7{n9F57%$>4&@r1$F$2uwoDFd1Uyu`q(xIh
zijjymNCZ@aN<+;xk%NfG?i)+BfA+*Ac4)@{nS8--CqMOaCr-$|x`KRHa);6A=gSw8
z$OsGulDilK6XNGfWGWa}3M^uRh}?3lGfg5U*ZKs5rahb*0PBEK4m>@(gl6(<v2bzN
zyQPkb`{7M7uv2v6i|H}?Yw%XSPC;pa+=bAXtVb9~!PDhR1%SLA;%|+7U~Nas*To}p
zZ|Jp=v=L%k9tQJx$S@Gjm_r>)EWtj~Ftr1mBke%G%`4<7@ap&h34b2Y;p+Sqafp;Q
z?{VN3;Pze+w;H23-k!(W8N`t+#5BT$zfqb)aWXs7BnA)#MW3I!&5Q5Ll}zwpoaY8J
zYOcHddwyp?44$cwP#*$*fBwL^4?wQOZ%OW!9D@|ktnWd?UAX2mZ{P~7RL|@VIXso(
zTeuQ@M)?EGFwDq>o##8U1$r4{L|pO(Fw(0f(G9e0Gi>6BDj>NClBQtNY?-Q3lCe4y
ze!3e`aH!J~-(IA)a8P@Y7#oLiBi;V({Wi@!D`H}KlRP)xmggONm7ZKzKml+6A0X~e
zKC<cB-1QR5LYLjTHaL_F;VaDGOJ3#FG4~u7-qHmOq;_$BW^m~9WqAh4*6YjY$;jn%
z&-#>(pG~k|PM%+3L9;IaECiUGse?Im;J;C=sl`m!Te-Q)qSJfV#&XdzX{pp#SsgTL
z%afh_*1XKKOH7&3iJ5AJn;VHx_7OAn{4mYr!z!)5ur7qgw^k}ZFOe?ffvdp2kIj~2
z-JNYVFlG7V`a9^fSMN1{TM6EAmjKd!82v$L4ZL8ORuE^Q;rUJkF!6!O6^m%)?&`do
k4|AfSnY^x1C)tj$E>1%vH>?hu;X3G{Mjaf#=0dIi2NCh)@Bjb+

literal 0
HcmV?d00001

diff --git a/test/tools/sancov/Inputs/test-linux_x86_64-1.sancov b/test/tools/sancov/Inputs/test-linux_x86_64-1.sancov
new file mode 100644
index 0000000000000000000000000000000000000000..a1c7f7b6cc38561aa1ca2c72908bcd4dd70ef101
GIT binary patch
literal 80
scmYfK4+8rSXz%l70D+?Y5c&|5egmbA4?x7Tp!8fQeE~}Q9EQjP04&)Q^8f$<

literal 0
HcmV?d00001

diff --git a/test/tools/sancov/Inputs/test-linux_x86_64.sancov b/test/tools/sancov/Inputs/test-linux_x86_64.sancov
new file mode 100644
index 0000000000000000000000000000000000000000..e5ed81ed906e07fa1c4019b3ccfdac80155a9ced
GIT binary patch
literal 64
lcmYfK4+8rSXz%l70D+?Y5c&<2Ha-C1XF=(?Q2GLt1_0!t5y=1m

literal 0
HcmV?d00001

diff --git a/test/tools/sancov/Inputs/test.cpp b/test/tools/sancov/Inputs/test.cpp
new file mode 100644
index 000000000000..5690409a2781
--- /dev/null
+++ b/test/tools/sancov/Inputs/test.cpp
@@ -0,0 +1,19 @@
+// compile & generate coverage data using:
+// clang++ -g -o test-linux_x86_64 -fsanitize=address -fsanitize-coverage=bb test.cpp foo.cpp
+// ASAN_OPTIONS="coverage=1" ./test-linux_x86_64 && mv test-linux_x86_64.*.sancov test-linux_x86_64.sancov
+// ASAN_OPTIONS="coverage=1" ./test-linux_x86_64 1 && mv test-linux_x86_64.*.sancov test-linux_x86_64-1.sancov
+
+#include <stdio.h>
+#include <string>
+
+void foo();
+
+__attribute__((noinline))
+std::string bar(std::string str) { printf("bar\n"); return str; }
+
+int main(int argc, char **argv) {
+    if (argc == 2)
+        foo();
+    bar("str");
+    printf("main\n");
+}
diff --git a/test/tools/sancov/blacklist.test b/test/tools/sancov/blacklist.test
new file mode 100644
index 000000000000..c07f5cdd109e
--- /dev/null
+++ b/test/tools/sancov/blacklist.test
@@ -0,0 +1,5 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions -blacklist %p/Inputs/blacklist.txt %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+
+CHECK-NOT: Inputs{{[/\\]}}test.cpp:12 bar(std::string)
+CHECK: Inputs{{[/\\]}}test.cpp:14 main
diff --git a/test/tools/sancov/covered_functions.test b/test/tools/sancov/covered_functions.test
new file mode 100644
index 000000000000..5e0696bf8615
--- /dev/null
+++ b/test/tools/sancov/covered_functions.test
@@ -0,0 +1,13 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -covered-functions -strip_path_prefix=Inputs/ %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=STRIP_PATH %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -demangle=0 -covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck --check-prefix=NO_DEMANGLE %s
+
+CHECK: Inputs{{[/\\]}}test.cpp:12 bar(std::string)
+CHECK: Inputs{{[/\\]}}test.cpp:14 main
+
+STRIP_PATH: {{^}}test.cpp:12 bar(std::string)
+STRIP_PATH: {{^}}test.cpp:14 main
+
+NO_DEMANGLE: test.cpp:12 _Z3barSs
+NO_DEMANGLE: test.cpp:14 main
diff --git a/test/tools/sancov/not_covered_functions.test b/test/tools/sancov/not_covered_functions.test
new file mode 100644
index 000000000000..8bcbac7f7b1b
--- /dev/null
+++ b/test/tools/sancov/not_covered_functions.test
@@ -0,0 +1,8 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -not-covered-functions %p/Inputs/test-linux_x86_64-1.sancov | FileCheck --check-prefix=CHECK1 --allow-empty %s
+
+CHECK: Inputs{{[/\\]}}foo.cpp:5 foo()
+CHECK-NOT: {{.*__sanitizer.*}}
+CHECK1-NOT: {{.+}}
+
diff --git a/test/tools/sancov/print.test b/test/tools/sancov/print.test
new file mode 100644
index 000000000000..c67bbaa842a8
--- /dev/null
+++ b/test/tools/sancov/print.test
@@ -0,0 +1,11 @@
+REQUIRES: x86_64-linux
+RUN: sancov -obj %p/Inputs/test-linux_x86_64 -print %p/Inputs/test-linux_x86_64.sancov | FileCheck %s
+
+CHECK: 0x4dbe2b
+CHECK: 0x4dbf72
+CHECK: 0x4dbfec
+CHECK: 0x4dc033
+CHECK: 0x4dc06a
+CHECK: 0x4dc09d
+CHECK: 0x4dc0d0
+
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index c9c5a1fdfa08..9b89d87fc571 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,78 +1,45 @@
-add_llvm_tool_subdirectory(llvm-config)
+# This file will recurse into all subdirectories that contain CMakeLists.txt
+# Setting variables that match the pattern LLVM_TOOL_{NAME}_BUILD to Off will
+# prevent traversing into a directory.
+#
+# The only tools that need to be explicitly added are ones that have explicit
+# ordering requirements.
+
+# Iterates all the subdirectories to create CMake options to enable/disable
+# traversing each directory.
+create_llvm_tool_options()
 
 # Build polly before the tools: the tools link against polly when
 # LINK_POLLY_INTO_TOOLS is set.
 if(WITH_POLLY)
   add_llvm_external_project(polly)
-else(WITH_POLLY)
-  list(APPEND LLVM_IMPLICIT_PROJECT_IGNORE "${LLVM_MAIN_SRC_DIR}/tools/polly")
-endif(WITH_POLLY)
-
-if( LLVM_BUILD_LLVM_DYLIB )
-  add_llvm_tool_subdirectory(llvm-shlib)
 else()
-  ignore_llvm_tool_subdirectory(llvm-shlib)
+  set(LLVM_TOOL_POLLY_BUILD Off)
 endif()
 
-add_llvm_tool_subdirectory(opt)
-add_llvm_tool_subdirectory(llvm-as)
-add_llvm_tool_subdirectory(llvm-dis)
-add_llvm_tool_subdirectory(llvm-mc)
+if(NOT LLVM_BUILD_LLVM_DYLIB )
+  set(LLVM_TOOL_LLVM_SHLIB_BUILD Off)
+endif()
 
-add_llvm_tool_subdirectory(llc)
+if(NOT LLVM_USE_INTEL_JITEVENTS )
+  set(LLVM_TOOL_LLVM_JITLISTENER_BUILD Off)
+endif()
+
+if(CYGWIN)
+  set(LLVM_TOOL_LTO_BUILD Off)
+  set(LLVM_TOOL_LLVM_LTO_BUILD Off)
+endif()
+
+# Add LTO, llvm-ar, llvm-config, and llvm-profdata before clang, ExternalProject
+# requires targets specified in DEPENDS to exist before the call to
+# ExternalProject_Add.
+add_llvm_tool_subdirectory(lto)
 add_llvm_tool_subdirectory(llvm-ar)
-add_llvm_tool_subdirectory(llvm-nm)
-add_llvm_tool_subdirectory(llvm-size)
-
-add_llvm_tool_subdirectory(llvm-cov)
+add_llvm_tool_subdirectory(llvm-config)
 add_llvm_tool_subdirectory(llvm-profdata)
-add_llvm_tool_subdirectory(llvm-link)
-add_llvm_tool_subdirectory(lli)
-
-add_llvm_tool_subdirectory(llvm-extract)
-add_llvm_tool_subdirectory(llvm-diff)
-add_llvm_tool_subdirectory(macho-dump)
-add_llvm_tool_subdirectory(llvm-objdump)
-add_llvm_tool_subdirectory(llvm-readobj)
-add_llvm_tool_subdirectory(llvm-rtdyld)
-add_llvm_tool_subdirectory(llvm-dwarfdump)
-add_llvm_tool_subdirectory(dsymutil)
-add_llvm_tool_subdirectory(llvm-cxxdump)
-if( LLVM_USE_INTEL_JITEVENTS )
-  add_llvm_tool_subdirectory(llvm-jitlistener)
-else()
-  ignore_llvm_tool_subdirectory(llvm-jitlistener)
-endif( LLVM_USE_INTEL_JITEVENTS )
-
-add_llvm_tool_subdirectory(bugpoint)
-add_llvm_tool_subdirectory(bugpoint-passes)
-add_llvm_tool_subdirectory(llvm-bcanalyzer)
-add_llvm_tool_subdirectory(llvm-stress)
-add_llvm_tool_subdirectory(llvm-mcmarkup)
-
-add_llvm_tool_subdirectory(verify-uselistorder)
-
-add_llvm_tool_subdirectory(llvm-symbolizer)
-
-add_llvm_tool_subdirectory(llvm-c-test)
-
-add_llvm_tool_subdirectory(obj2yaml)
-add_llvm_tool_subdirectory(yaml2obj)
-
-add_llvm_tool_subdirectory(llvm-go)
-
-add_llvm_tool_subdirectory(llvm-pdbdump)
-
-if(NOT CYGWIN AND LLVM_ENABLE_PIC)
-  add_llvm_tool_subdirectory(lto)
-  add_llvm_tool_subdirectory(llvm-lto)
-else()
-  ignore_llvm_tool_subdirectory(lto)
-  ignore_llvm_tool_subdirectory(llvm-lto)
-endif()
-
-add_llvm_tool_subdirectory(gold)
 
+# Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly
+# specified.
 add_llvm_external_project(clang)
 add_llvm_external_project(llgo)
 add_llvm_external_project(lld)
@@ -80,6 +47,6 @@ add_llvm_external_project(lldb)
 
 # Automatically add remaining sub-directories containing a 'CMakeLists.txt'
 # file as external projects.
-add_llvm_implicit_external_projects()
+add_llvm_implicit_projects()
 
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index acf61b0268c1..d4b014771859 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -28,6 +28,7 @@ subdirectories =
  llvm-diff
  llvm-dis
  llvm-dwarfdump
+ llvm-dwp
  llvm-extract
  llvm-jitlistener
  llvm-link
@@ -40,7 +41,7 @@ subdirectories =
  llvm-profdata
  llvm-rtdyld
  llvm-size
- macho-dump
+ llvm-split
  opt
  verify-uselistorder
 
diff --git a/tools/Makefile b/tools/Makefile
index a47710f111a4..92d495451879 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -29,10 +29,11 @@ OPTIONAL_DIRS := lldb
 DIRS := llvm-config
 PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \
                  lli llvm-extract llvm-mc bugpoint llvm-bcanalyzer llvm-diff \
-                 macho-dump llvm-objdump llvm-readobj llvm-rtdyld \
+                 llvm-objdump llvm-readobj llvm-rtdyld \
                  llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \
                  llvm-profdata llvm-symbolizer obj2yaml yaml2obj llvm-c-test \
-                 llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump
+                 llvm-cxxdump verify-uselistorder dsymutil llvm-pdbdump \
+                 llvm-split sancov llvm-dwp
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/bugpoint-passes/TestPasses.cpp b/tools/bugpoint-passes/TestPasses.cpp
index 6979d0345ee6..c1eb0fbb67c1 100644
--- a/tools/bugpoint-passes/TestPasses.cpp
+++ b/tools/bugpoint-passes/TestPasses.cpp
@@ -76,20 +76,21 @@ static RegisterPass<DeleteCalls>
 
 namespace {
   /// CrashOnDeclFunc - This pass is used to test bugpoint.  It intentionally
-  /// crash if the module has an undefined function (ie a function that is
-  /// defined in an external module).
-  class CrashOnDeclFunc : public ModulePass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    CrashOnDeclFunc() : ModulePass(ID) {}
-  private:
-    bool runOnModule(Module &M) override {
-      for (auto &F : M.functions()) {
-        if (F.isDeclaration())
-          abort();
-      }
-      return false;
+/// crashes if the module has an undefined function (ie a function that is
+/// defined in an external module).
+class CrashOnDeclFunc : public ModulePass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  CrashOnDeclFunc() : ModulePass(ID) {}
+
+private:
+  bool runOnModule(Module &M) override {
+    for (auto &F : M.functions()) {
+      if (F.isDeclaration())
+        abort();
     }
+    return false;
+  }
   };
 }
 
@@ -97,3 +98,29 @@ char CrashOnDeclFunc::ID = 0;
 static RegisterPass<CrashOnDeclFunc>
   Z("bugpoint-crash-decl-funcs",
     "BugPoint Test Pass - Intentionally crash on declared functions");
+
+#include <iostream>
+namespace {
+/// CrashOnOneCU - This pass is used to test bugpoint. It intentionally
+/// crashes if the Module has two or more compile units
+class CrashOnTooManyCUs : public ModulePass {
+public:
+  static char ID;
+  CrashOnTooManyCUs() : ModulePass(ID) {}
+
+private:
+  bool runOnModule(Module &M) override {
+    NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu");
+    if (!CU_Nodes)
+      return false;
+    if (CU_Nodes->getNumOperands() >= 2)
+      abort();
+    return false;
+  }
+};
+}
+
+char CrashOnTooManyCUs::ID = 0;
+static RegisterPass<CrashOnTooManyCUs>
+    A("bugpoint-crash-too-many-cus",
+      "BugPoint Test Pass - Intentionally crash on too many CUs");
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
index 43f4c2963fc3..030749fa9ea0 100644
--- a/tools/bugpoint/BugDriver.cpp
+++ b/tools/bugpoint/BugDriver.cpp
@@ -72,7 +72,7 @@ BugDriver::BugDriver(const char *toolname, bool find_bugs,
                      LLVMContext& ctxt)
   : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
     Program(nullptr), Interpreter(nullptr), SafeInterpreter(nullptr),
-    gcc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
+    cc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
     MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
 
 BugDriver::~BugDriver() {
@@ -80,7 +80,7 @@ BugDriver::~BugDriver() {
   if (Interpreter != SafeInterpreter)
     delete Interpreter;
   delete SafeInterpreter;
-  delete gcc;
+  delete cc;
 }
 
 std::unique_ptr<Module> llvm::parseInputFile(StringRef Filename,
@@ -132,7 +132,7 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
     if (!M.get()) return true;
 
     outs() << "Linking in input file: '" << Filenames[i] << "'\n";
-    if (Linker::LinkModules(Program, M.get()))
+    if (Linker::linkModules(*Program, std::move(M)))
       return true;
   }
 
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
index 579781246c54..20efff3fda5f 100644
--- a/tools/bugpoint/BugDriver.h
+++ b/tools/bugpoint/BugDriver.h
@@ -36,7 +36,7 @@ class LLVMContext;
 
 class DebugCrashes;
 
-class GCC;
+class CC;
 
 extern bool DisableSimplifyCFG;
 
@@ -52,7 +52,7 @@ class BugDriver {
   std::vector<std::string> PassesToRun;
   AbstractInterpreter *Interpreter;   // How to run the program
   AbstractInterpreter *SafeInterpreter;  // To generate reference output, etc.
-  GCC *gcc;
+  CC *cc;
   bool run_find_bugs;
   unsigned Timeout;
   unsigned MemoryLimit;
@@ -321,16 +321,21 @@ void PrintFunctionList(const std::vector<Function*> &Funcs);
 ///
 void PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs);
 
+// DeleteGlobalInitializer - "Remove" the global variable by deleting its
+// initializer, making it external.
+//
+void DeleteGlobalInitializer(GlobalVariable *GV);
+
 // DeleteFunctionBody - "Remove" the function by deleting all of it's basic
 // blocks, making it external.
 //
 void DeleteFunctionBody(Function *F);
 
-/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
-/// module, split the functions OUT of the specified module, and place them in
-/// the new module.
-Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
-                                  ValueToValueMapTy &VMap);
+/// Given a module and a list of functions in the module, split the functions
+/// OUT of the specified module, and place them in the new module.
+std::unique_ptr<Module>
+SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
+                          ValueToValueMapTy &VMap);
 
 } // End llvm namespace
 
diff --git a/tools/bugpoint/CMakeLists.txt b/tools/bugpoint/CMakeLists.txt
index daf502e16ccd..287a26ecd40c 100644
--- a/tools/bugpoint/CMakeLists.txt
+++ b/tools/bugpoint/CMakeLists.txt
@@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS
   BitWriter
   CodeGen
   Core
-  IPA
   IPO
   IRReader
   InstCombine
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
index e2aaf6b01fc5..6cdc43ab8699 100644
--- a/tools/bugpoint/CrashDebugger.cpp
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -15,6 +15,7 @@
 #include "ListReducer.h"
 #include "ToolRunner.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -49,6 +50,10 @@ namespace {
   DontReducePassList("disable-pass-list-reduction",
                      cl::desc("Skip pass list reduction steps"),
                      cl::init(false));
+
+  cl::opt<bool> NoNamedMDRM("disable-namedmd-remove",
+                            cl::desc("Do not remove global named metadata"),
+                            cl::init(false));
 }
 
 namespace llvm {
@@ -138,7 +143,7 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
                               std::vector<GlobalVariable*> &GVs) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
-  Module *M = CloneModule(BD.getProgram(), VMap);
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
   std::set<GlobalVariable*> GVSet;
@@ -155,11 +160,10 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
 
   // Loop over and delete any global variables which we aren't supposed to be
   // playing with...
-  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I)
-    if (I->hasInitializer() && !GVSet.count(I)) {
-      I->setInitializer(nullptr);
-      I->setLinkage(GlobalValue::ExternalLinkage);
+  for (GlobalVariable &I : M->globals())
+    if (I.hasInitializer() && !GVSet.count(&I)) {
+      DeleteGlobalInitializer(&I);
+      I.setLinkage(GlobalValue::ExternalLinkage);
     }
 
   // Try running the hacked up program...
@@ -235,7 +239,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
 
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
-  Module *M = CloneModule(BD.getProgram(), VMap);
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
   std::set<Function*> Functions;
@@ -253,9 +257,9 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
   if (!ReplaceFuncsWithNull) {
     // Loop over and delete any functions which we aren't supposed to be playing
     // with...
-    for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
-      if (!I->isDeclaration() && !Functions.count(I))
-        DeleteFunctionBody(I);
+    for (Function &I : *M)
+      if (!I.isDeclaration() && !Functions.count(&I))
+        DeleteFunctionBody(&I);
   } else {
     std::vector<GlobalValue*> ToRemove;
     // First, remove aliases to functions we're about to purge.
@@ -280,12 +284,12 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
       ToRemove.push_back(&Alias);
     }
 
-    for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
-      if (!I->isDeclaration() && !Functions.count(I)) {
-        PointerType *Ty = cast<PointerType>(I->getType());
+    for (Function &I : *M) {
+      if (!I.isDeclaration() && !Functions.count(&I)) {
+        PointerType *Ty = cast<PointerType>(I.getType());
         Constant *Replacement = ConstantPointerNull::get(Ty);
-        I->replaceAllUsesWith(Replacement);
-        ToRemove.push_back(I);
+        I.replaceAllUsesWith(Replacement);
+        ToRemove.push_back(&I);
       }
     }
 
@@ -342,7 +346,7 @@ namespace {
 bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
-  Module *M = CloneModule(BD.getProgram(), VMap);
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
   SmallPtrSet<BasicBlock*, 8> Blocks;
@@ -361,20 +365,22 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   // Loop over and delete any hack up any blocks that are not listed...
   for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
     for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
-      if (!Blocks.count(BB) && BB->getTerminator()->getNumSuccessors()) {
+      if (!Blocks.count(&*BB) && BB->getTerminator()->getNumSuccessors()) {
         // Loop over all of the successors of this block, deleting any PHI nodes
         // that might include it.
-        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
-          (*SI)->removePredecessor(BB);
+        for (succ_iterator SI = succ_begin(&*BB), E = succ_end(&*BB); SI != E;
+             ++SI)
+          (*SI)->removePredecessor(&*BB);
 
         TerminatorInst *BBTerm = BB->getTerminator();
-
-        if (!BB->getTerminator()->getType()->isVoidTy())
+        if (BBTerm->isEHPad())
+          continue;
+        if (!BBTerm->getType()->isVoidTy() && !BBTerm->getType()->isTokenTy())
           BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType()));
 
         // Replace the old terminator instruction.
         BB->getInstList().pop_back();
-        new UnreachableInst(BB->getContext(), BB);
+        new UnreachableInst(BB->getContext(), &*BB);
       }
 
   // The CFG Simplifier pass may delete one of the basic blocks we are
@@ -450,7 +456,7 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
                                            &Insts) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
-  Module *M = CloneModule(BD.getProgram(), VMap);
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
   SmallPtrSet<Instruction*, 64> Instructions;
@@ -468,10 +474,10 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
   for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
     for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
       for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
-        Instruction *Inst = I++;
+        Instruction *Inst = &*I++;
         if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst) &&
-            !isa<LandingPadInst>(Inst)) {
-          if (!Inst->getType()->isVoidTy())
+            !Inst->isEHPad()) {
+          if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy())
             Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
           Inst->eraseFromParent();
         }
@@ -497,6 +503,149 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
   return false;
 }
 
+namespace {
+// Reduce the list of Named Metadata nodes. We keep this as a list of
+// names to avoid having to convert back and forth every time.
+class ReduceCrashingNamedMD : public ListReducer<std::string> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingNamedMD(BugDriver &bd,
+                        bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+  TestResult doTest(std::vector<std::string> &Prefix,
+                    std::vector<std::string> &Kept,
+                    std::string &Error) override {
+    if (!Kept.empty() && TestNamedMDs(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestNamedMDs(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestNamedMDs(std::vector<std::string> &NamedMDs);
+};
+}
+
+bool ReduceCrashingNamedMD::TestNamedMDs(std::vector<std::string> &NamedMDs) {
+
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+  outs() << "Checking for crash with only these named metadata nodes:";
+  unsigned NumPrint = std::min<size_t>(NamedMDs.size(), 10);
+  for (unsigned i = 0, e = NumPrint; i != e; ++i)
+    outs() << " " << NamedMDs[i];
+  if (NumPrint < NamedMDs.size())
+    outs() << "... <" << NamedMDs.size() << " total>";
+  outs() << ": ";
+
+  // Make a StringMap for faster lookup
+  StringSet<> Names;
+  for (const std::string &Name : NamedMDs)
+    Names.insert(Name);
+
+  // First collect all the metadata to delete in a vector, then
+  // delete them all at once to avoid invalidating the iterator
+  std::vector<NamedMDNode *> ToDelete;
+  ToDelete.reserve(M->named_metadata_size() - Names.size());
+  for (auto &NamedMD : M->named_metadata())
+    if (!Names.count(NamedMD.getName()))
+      ToDelete.push_back(&NamedMD);
+
+  for (auto *NamedMD : ToDelete)
+    NamedMD->eraseFromParent();
+
+  // Verify that this is still valid.
+  legacy::PassManager Passes;
+  Passes.add(createVerifierPass());
+  Passes.run(*M);
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
+    return true;
+  }
+  delete M; // It didn't crash, try something else.
+  return false;
+}
+
+namespace {
+// Reduce the list of operands to named metadata nodes
+class ReduceCrashingNamedMDOps : public ListReducer<const MDNode *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingNamedMDOps(BugDriver &bd,
+                           bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+  TestResult doTest(std::vector<const MDNode *> &Prefix,
+                    std::vector<const MDNode *> &Kept,
+                    std::string &Error) override {
+    if (!Kept.empty() && TestNamedMDOps(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestNamedMDOps(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestNamedMDOps(std::vector<const MDNode *> &NamedMDOps);
+};
+}
+
+bool ReduceCrashingNamedMDOps::TestNamedMDOps(
+    std::vector<const MDNode *> &NamedMDOps) {
+  // Convert list to set for fast lookup...
+  SmallPtrSet<const MDNode *, 64> OldMDNodeOps;
+  for (unsigned i = 0, e = NamedMDOps.size(); i != e; ++i) {
+    OldMDNodeOps.insert(NamedMDOps[i]);
+  }
+
+  outs() << "Checking for crash with only " << OldMDNodeOps.size();
+  if (OldMDNodeOps.size() == 1)
+    outs() << " named metadata operand: ";
+  else
+    outs() << " named metadata operands: ";
+
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+  // This is a little wasteful. In the future it might be good if we could have
+  // these dropped during cloning.
+  for (auto &NamedMD : BD.getProgram()->named_metadata()) {
+    // Drop the old one and create a new one
+    M->eraseNamedMetadata(M->getNamedMetadata(NamedMD.getName()));
+    NamedMDNode *NewNamedMDNode =
+        M->getOrInsertNamedMetadata(NamedMD.getName());
+    for (MDNode *op : NamedMD.operands())
+      if (OldMDNodeOps.count(op))
+        NewNamedMDNode->addOperand(cast<MDNode>(MapMetadata(op, VMap)));
+  }
+
+  // Verify that this is still valid.
+  legacy::PassManager Passes;
+  Passes.add(createVerifierPass());
+  Passes.run(*M);
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    // Make sure to use instruction pointers that point into the now-current
+    // module, and that they don't include any deleted blocks.
+    NamedMDOps.clear();
+    for (const MDNode *Node : OldMDNodeOps)
+      NamedMDOps.push_back(cast<MDNode>(VMap.MD()[Node].get()));
+
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
+    return true;
+  }
+  delete M; // It didn't crash, try something else.
+  return false;
+}
+
 /// DebugACrash - Given a predicate that determines whether a component crashes
 /// on a program, try to destructively reduce the program while still keeping
 /// the predicate true.
@@ -509,13 +658,13 @@ static bool DebugACrash(BugDriver &BD,
       BD.getProgram()->global_begin() != BD.getProgram()->global_end()) {
     // Now try to reduce the number of global variable initializers in the
     // module to something small.
-    Module *M = CloneModule(BD.getProgram());
+    Module *M = CloneModule(BD.getProgram()).release();
     bool DeletedInit = false;
 
     for (Module::global_iterator I = M->global_begin(), E = M->global_end();
          I != E; ++I)
       if (I->hasInitializer()) {
-        I->setInitializer(nullptr);
+        DeleteGlobalInitializer(&*I);
         I->setLinkage(GlobalValue::ExternalLinkage);
         DeletedInit = true;
       }
@@ -538,7 +687,7 @@ static bool DebugACrash(BugDriver &BD,
         for (Module::global_iterator I = BD.getProgram()->global_begin(),
                E = BD.getProgram()->global_end(); I != E; ++I)
           if (I->hasInitializer())
-            GVs.push_back(I);
+            GVs.push_back(&*I);
 
         if (GVs.size() > 1 && !BugpointIsInterrupted) {
           outs() << "\n*** Attempting to reduce the number of global "
@@ -558,10 +707,9 @@ static bool DebugACrash(BugDriver &BD,
 
   // Now try to reduce the number of functions in the module to something small.
   std::vector<Function*> Functions;
-  for (Module::iterator I = BD.getProgram()->begin(),
-         E = BD.getProgram()->end(); I != E; ++I)
-    if (!I->isDeclaration())
-      Functions.push_back(I);
+  for (Function &F : *BD.getProgram())
+    if (!F.isDeclaration())
+      Functions.push_back(&F);
 
   if (Functions.size() > 1 && !BugpointIsInterrupted) {
     outs() << "\n*** Attempting to reduce the number of functions "
@@ -581,10 +729,9 @@ static bool DebugACrash(BugDriver &BD,
   //
   if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
     std::vector<const BasicBlock*> Blocks;
-    for (Module::const_iterator I = BD.getProgram()->begin(),
-           E = BD.getProgram()->end(); I != E; ++I)
-      for (Function::const_iterator FI = I->begin(), E = I->end(); FI !=E; ++FI)
-        Blocks.push_back(FI);
+    for (Function &F : *BD.getProgram())
+      for (BasicBlock &BB : F)
+        Blocks.push_back(&BB);
     unsigned OldSize = Blocks.size();
     ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
     if (Blocks.size() < OldSize)
@@ -595,14 +742,11 @@ static bool DebugACrash(BugDriver &BD,
   // cases with large basic blocks where the problem is at one end.
   if (!BugpointIsInterrupted) {
     std::vector<const Instruction*> Insts;
-    for (Module::const_iterator MI = BD.getProgram()->begin(),
-           ME = BD.getProgram()->end(); MI != ME; ++MI)
-      for (Function::const_iterator FI = MI->begin(), FE = MI->end(); FI != FE;
-           ++FI)
-        for (BasicBlock::const_iterator I = FI->begin(), E = FI->end();
-             I != E; ++I)
-          if (!isa<TerminatorInst>(I))
-            Insts.push_back(I);
+    for (const Function &F : *BD.getProgram())
+      for (const BasicBlock &BB : F)
+        for (const Instruction &I : BB)
+          if (!isa<TerminatorInst>(&I))
+            Insts.push_back(&I);
 
     ReduceCrashingInstructions(BD, TestFn).reduceList(Insts, Error);
   }
@@ -642,12 +786,12 @@ static bool DebugACrash(BugDriver &BD,
             } else {
               if (BugpointIsInterrupted) goto ExitLoops;
 
-              if (isa<LandingPadInst>(I))
+              if (I->isEHPad() || I->getType()->isTokenTy())
                 continue;
 
               outs() << "Checking instruction: " << *I;
               std::unique_ptr<Module> M =
-                  BD.deleteInstructionFromProgram(I, Simplification);
+                  BD.deleteInstructionFromProgram(&*I, Simplification);
 
               // Find out if the pass still crashes on this pass...
               if (TestFn(BD, M.get())) {
@@ -666,12 +810,37 @@ static bool DebugACrash(BugDriver &BD,
     }
 
   } while (Simplification);
+
+  if (!NoNamedMDRM) {
+    BD.EmitProgressBitcode(BD.getProgram(), "reduced-instructions");
+
+    if (!BugpointIsInterrupted) {
+      // Try to reduce the amount of global metadata (particularly debug info),
+      // by dropping global named metadata that anchors them
+      outs() << "\n*** Attempting to remove named metadata: ";
+      std::vector<std::string> NamedMDNames;
+      for (auto &NamedMD : BD.getProgram()->named_metadata())
+        NamedMDNames.push_back(NamedMD.getName().str());
+      ReduceCrashingNamedMD(BD, TestFn).reduceList(NamedMDNames, Error);
+    }
+
+    if (!BugpointIsInterrupted) {
+      // Now that we quickly dropped all the named metadata that doesn't
+      // contribute to the crash, bisect the operands of the remaining ones
+      std::vector<const MDNode *> NamedMDOps;
+      for (auto &NamedMD : BD.getProgram()->named_metadata())
+        for (auto op : NamedMD.operands())
+          NamedMDOps.push_back(op);
+      ReduceCrashingNamedMDOps(BD, TestFn).reduceList(NamedMDOps, Error);
+    }
+  }
+
 ExitLoops:
 
   // Try to clean up the testcase by running funcresolve and globaldce...
   if (!BugpointIsInterrupted) {
     outs() << "\n*** Attempting to perform final cleanups: ";
-    Module *M = CloneModule(BD.getProgram());
+    Module *M = CloneModule(BD.getProgram()).release();
     M = BD.performFinalCleanups(M, true).release();
 
     // Find out if the pass still crashes on the cleaned up program...
diff --git a/tools/bugpoint/ExecutionDriver.cpp b/tools/bugpoint/ExecutionDriver.cpp
index 25813b34e62d..41b8ccc18e87 100644
--- a/tools/bugpoint/ExecutionDriver.cpp
+++ b/tools/bugpoint/ExecutionDriver.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Program.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/raw_ostream.h"
 #include <fstream>
@@ -124,11 +125,10 @@ namespace {
                cl::ZeroOrMore, cl::PositionalEatsArgs);
 
   cl::opt<std::string>
-  GCCBinary("gcc", cl::init("gcc"),
-              cl::desc("The gcc binary to use. (default 'gcc')"));
+  CCBinary("gcc", cl::init(""), cl::desc("The gcc binary to use."));
 
   cl::list<std::string>
-  GCCToolArgv("gcc-tool-args", cl::Positional,
+  CCToolArgv("gcc-tool-args", cl::Positional,
               cl::desc("<gcc-tool arguments>..."),
               cl::ZeroOrMore, cl::PositionalEatsArgs);
 }
@@ -148,6 +148,13 @@ bool BugDriver::initializeExecutionEnvironment() {
   SafeInterpreter = nullptr;
   std::string Message;
 
+  if (CCBinary.empty()) {
+    if (sys::findProgramByName("clang"))
+      CCBinary = "clang";
+    else
+      CCBinary = "gcc";
+  }
+
   switch (InterpreterSel) {
   case AutoPick:
     if (!Interpreter) {
@@ -158,8 +165,8 @@ bool BugDriver::initializeExecutionEnvironment() {
     if (!Interpreter) {
       InterpreterSel = RunLLC;
       Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                   GCCBinary, &ToolArgv,
-                                                   &GCCToolArgv);
+                                                   CCBinary, &ToolArgv,
+                                                   &CCToolArgv);
     }
     if (!Interpreter) {
       InterpreterSel = RunLLI;
@@ -179,8 +186,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLCIA:
   case LLC_Safe:
     Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                 GCCBinary, &ToolArgv,
-                                                 &GCCToolArgv,
+                                                 CCBinary, &ToolArgv,
+                                                 &CCToolArgv,
                                                  InterpreterSel == RunLLCIA);
     break;
   case RunJIT:
@@ -213,9 +220,9 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary,
+                                                       CCBinary,
                                                        &SafeToolArgs,
-                                                       &GCCToolArgv);
+                                                       &CCToolArgv);
     }
 
     if (!SafeInterpreter &&
@@ -224,9 +231,9 @@ bool BugDriver::initializeExecutionEnvironment() {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
       SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       GCCBinary,
+                                                       CCBinary,
                                                        &SafeToolArgs,
-                                                       &GCCToolArgv);
+                                                       &CCToolArgv);
     }
     if (!SafeInterpreter) {
       SafeInterpreterSel = AutoPick;
@@ -237,8 +244,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLCIA:
     SafeToolArgs.push_back("--relocation-model=pic");
     SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                     GCCBinary, &SafeToolArgs,
-                                                     &GCCToolArgv,
+                                                     CCBinary, &SafeToolArgs,
+                                                     &CCToolArgv,
                                                 SafeInterpreterSel == RunLLCIA);
     break;
   case Custom:
@@ -252,8 +259,8 @@ bool BugDriver::initializeExecutionEnvironment() {
   }
   if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
 
-  gcc = GCC::create(Message, GCCBinary, &GCCToolArgv);
-  if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
+  cc = CC::create(Message, CCBinary, &CCToolArgv);
+  if (!cc) { outs() << Message << "\nExiting.\n"; exit(1); }
 
   // If there was an error creating the selected interpreter, quit with error.
   return Interpreter == nullptr;
@@ -388,13 +395,13 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
   std::string OutputFile;
 
   // Using the known-good backend.
-  GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
+  CC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
                                                  Error);
   if (!Error.empty())
     return "";
 
   std::string SharedObjectFile;
-  bool Failure = gcc->MakeSharedObject(OutputFile, FT, SharedObjectFile,
+  bool Failure = cc->MakeSharedObject(OutputFile, FT, SharedObjectFile,
                                        AdditionalLinkerArgs, Error);
   if (!Error.empty())
     return "";
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 238cbbc70a03..fe0ab69dc162 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -86,7 +86,7 @@ std::unique_ptr<Module>
 BugDriver::deleteInstructionFromProgram(const Instruction *I,
                                         unsigned Simplification) {
   // FIXME, use vmap?
-  Module *Clone = CloneModule(Program);
+  Module *Clone = CloneModule(Program).release();
 
   const BasicBlock *PBB = I->getParent();
   const Function *PF = PBB->getParent();
@@ -100,7 +100,7 @@ BugDriver::deleteInstructionFromProgram(const Instruction *I,
 
   BasicBlock::iterator RI = RBI->begin(); // Get iterator to corresponding inst
   std::advance(RI, std::distance(PBB->begin(), BasicBlock::const_iterator(I)));
-  Instruction *TheInst = RI;              // Got the corresponding instruction!
+  Instruction *TheInst = &*RI; // Got the corresponding instruction!
 
   // If this instruction produces a value, replace any users with null values
   if (!TheInst->getType()->isVoidTy())
@@ -179,11 +179,43 @@ std::unique_ptr<Module> BugDriver::extractLoop(Module *M) {
   return NewM;
 }
 
+static void eliminateAliases(GlobalValue *GV) {
+  // First, check whether a GlobalAlias references this definition.
+  // GlobalAlias MAY NOT reference declarations.
+  for (;;) {
+    // 1. Find aliases
+    SmallVector<GlobalAlias*,1> aliases;
+    Module *M = GV->getParent();
+    for (Module::alias_iterator I=M->alias_begin(), E=M->alias_end(); I!=E; ++I)
+      if (I->getAliasee()->stripPointerCasts() == GV)
+        aliases.push_back(&*I);
+    if (aliases.empty())
+      break;
+    // 2. Resolve aliases
+    for (unsigned i=0, e=aliases.size(); i<e; ++i) {
+      aliases[i]->replaceAllUsesWith(aliases[i]->getAliasee());
+      aliases[i]->eraseFromParent();
+    }
+    // 3. Repeat until no more aliases found; there might
+    // be an alias to an alias...
+  }
+}
+
+//
+// DeleteGlobalInitializer - "Remove" the global variable by deleting its initializer,
+// making it external.
+//
+void llvm::DeleteGlobalInitializer(GlobalVariable *GV) {
+  eliminateAliases(GV);
+  GV->setInitializer(nullptr);
+}
 
 // DeleteFunctionBody - "Remove" the function by deleting all of its basic
 // blocks, making it external.
 //
 void llvm::DeleteFunctionBody(Function *F) {
+  eliminateAliases(F);
+
   // delete the body of the function...
   F->deleteBody();
   assert(F->isDeclaration() && "This didn't make the function external!");
@@ -271,13 +303,8 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
   }
 }
 
-
-/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
-/// module, split the functions OUT of the specified module, and place them in
-/// the new module.
-Module *
-llvm::SplitFunctionsOutOfModule(Module *M,
-                                const std::vector<Function*> &F,
+std::unique_ptr<Module>
+llvm::SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
                                 ValueToValueMapTy &VMap) {
   // Make sure functions & globals are all external so that linkage
   // between the two modules will work.
@@ -291,7 +318,7 @@ llvm::SplitFunctionsOutOfModule(Module *M,
   }
 
   ValueToValueMapTy NewVMap;
-  Module *New = CloneModule(M, NewVMap);
+  std::unique_ptr<Module> New = CloneModule(M, NewVMap);
 
   // Remove the Test functions from the Safe module
   std::set<Function *> TestFunctions;
@@ -306,16 +333,14 @@ llvm::SplitFunctionsOutOfModule(Module *M,
 
   
   // Remove the Safe functions from the Test module
-  for (Module::iterator I = New->begin(), E = New->end(); I != E; ++I)
-    if (!TestFunctions.count(I))
-      DeleteFunctionBody(I);
-  
+  for (Function &I : *New)
+    if (!TestFunctions.count(&I))
+      DeleteFunctionBody(&I);
 
   // Try to split the global initializers evenly
-  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
-       I != E; ++I) {
-    GlobalVariable *GV = cast<GlobalVariable>(NewVMap[I]);
-    if (Function *TestFn = globalInitUsesExternalBA(I)) {
+  for (GlobalVariable &I : M->globals()) {
+    GlobalVariable *GV = cast<GlobalVariable>(NewVMap[&I]);
+    if (Function *TestFn = globalInitUsesExternalBA(&I)) {
       if (Function *SafeFn = globalInitUsesExternalBA(GV)) {
         errs() << "*** Error: when reducing functions, encountered "
                   "the global '";
@@ -325,18 +350,18 @@ llvm::SplitFunctionsOutOfModule(Module *M,
                << "' and from test function '" << TestFn->getName() << "'.\n";
         exit(1);
       }
-      I->setInitializer(nullptr);  // Delete the initializer to make it external
+      DeleteGlobalInitializer(&I); // Delete the initializer to make it external
     } else {
       // If we keep it in the safe module, then delete it in the test module
-      GV->setInitializer(nullptr);
+      DeleteGlobalInitializer(GV);
     }
   }
 
   // Make sure that there is a global ctor/dtor array in both halves of the
   // module if they both have static ctor/dtor functions.
-  SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
-  SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
-  
+  SplitStaticCtorDtor("llvm.global_ctors", M, New.get(), NewVMap);
+  SplitStaticCtorDtor("llvm.global_dtors", M, New.get(), NewVMap);
+
   return New;
 }
 
diff --git a/tools/bugpoint/ListReducer.h b/tools/bugpoint/ListReducer.h
index a0bb570a5cb2..f08bc97a9988 100644
--- a/tools/bugpoint/ListReducer.h
+++ b/tools/bugpoint/ListReducer.h
@@ -75,6 +75,11 @@ struct ListReducer {
     // Maximal number of allowed splitting iterations,
     // before the elements are randomly shuffled.
     const unsigned MaxIterationsWithoutProgress = 3;
+
+    // Maximal number of allowed single-element trim iterations. We add a
+    // threshhold here as single-element reductions may otherwise take a
+    // very long time to complete.
+    const unsigned MaxTrimIterationsWithoutBackJump = 3;
     bool ShufflingEnabled = true;
 
 Backjump:
@@ -157,6 +162,7 @@ Backjump:
     if (TheList.size() > 2) {
       bool Changed = true;
       std::vector<ElTy> EmptyList;
+      unsigned TrimIterations = 0;
       while (Changed) {  // Trimming loop.
         Changed = false;
         
@@ -186,9 +192,9 @@ Backjump:
           if (!Error.empty())
             return true;
         }
-        // This can take a long time if left uncontrolled.  For now, don't
-        // iterate.
-        break;
+        if (TrimIterations >= MaxTrimIterationsWithoutBackJump)
+          break;
+        TrimIterations++;
       }
     }
 
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index fad16368698d..16919f5a4492 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -176,12 +176,15 @@ ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
 namespace {
   class ReduceMiscompilingFunctions : public ListReducer<Function*> {
     BugDriver &BD;
-    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+    bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                   std::unique_ptr<Module>, std::string &);
+
   public:
     ReduceMiscompilingFunctions(BugDriver &bd,
-                                bool (*F)(BugDriver &, Module *, Module *,
+                                bool (*F)(BugDriver &, std::unique_ptr<Module>,
+                                          std::unique_ptr<Module>,
                                           std::string &))
-      : BD(bd), TestFn(F) {}
+        : BD(bd), TestFn(F) {}
 
     TestResult doTest(std::vector<Function*> &Prefix,
                       std::vector<Function*> &Suffix,
@@ -207,32 +210,24 @@ namespace {
   };
 }
 
-/// TestMergedProgram - Given two modules, link them together and run the
-/// program, checking to see if the program matches the diff. If there is
-/// an error, return NULL. If not, return the merged module. The Broken argument
-/// will be set to true if the output is different. If the DeleteInputs
-/// argument is set to true then this function deletes both input
-/// modules before it returns.
+/// Given two modules, link them together and run the program, checking to see
+/// if the program matches the diff. If there is an error, return NULL. If not,
+/// return the merged module. The Broken argument will be set to true if the
+/// output is different. If the DeleteInputs argument is set to true then this
+/// function deletes both input modules before it returns.
 ///
-static Module *TestMergedProgram(const BugDriver &BD, Module *M1, Module *M2,
-                                 bool DeleteInputs, std::string &Error,
-                                 bool &Broken) {
-  // Link the two portions of the program back to together.
-  if (!DeleteInputs) {
-    M1 = CloneModule(M1);
-    M2 = CloneModule(M2);
-  }
-  if (Linker::LinkModules(M1, M2))
+static std::unique_ptr<Module> testMergedProgram(const BugDriver &BD,
+                                                 std::unique_ptr<Module> M1,
+                                                 std::unique_ptr<Module> M2,
+                                                 std::string &Error,
+                                                 bool &Broken) {
+  if (Linker::linkModules(*M1, std::move(M2)))
     exit(1);
-  delete M2;   // We are done with this module.
 
   // Execute the program.
-  Broken = BD.diffProgram(M1, "", "", false, &Error);
-  if (!Error.empty()) {
-    // Delete the linked module
-    delete M1;
+  Broken = BD.diffProgram(M1.get(), "", "", false, &Error);
+  if (!Error.empty())
     return nullptr;
-  }
   return M1;
 }
 
@@ -259,7 +254,7 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
   //   we can conclude that a function triggers the bug when in fact one
   //   needs a larger set of original functions to do so.
   ValueToValueMapTy VMap;
-  Module *Clone = CloneModule(BD.getProgram(), VMap);
+  Module *Clone = CloneModule(BD.getProgram(), VMap).release();
   Module *Orig = BD.swapProgramIn(Clone);
 
   std::vector<Function*> FuncsOnClone;
@@ -270,12 +265,12 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
 
   // Split the module into the two halves of the program we want.
   VMap.clear();
-  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
-  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, FuncsOnClone,
-                                                 VMap);
+  std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+  std::unique_ptr<Module> ToOptimize =
+      SplitFunctionsOutOfModule(ToNotOptimize.get(), FuncsOnClone, VMap);
 
-  // Run the predicate, note that the predicate will delete both input modules.
-  bool Broken = TestFn(BD, ToOptimize, ToNotOptimize, Error);
+  bool Broken =
+      TestFn(BD, std::move(ToOptimize), std::move(ToNotOptimize), Error);
 
   delete BD.swapProgramIn(Orig);
 
@@ -294,29 +289,29 @@ static void DisambiguateGlobalSymbols(Module *M) {
       I->setName("anon_fn");
 }
 
-/// ExtractLoops - Given a reduced list of functions that still exposed the bug,
-/// check to see if we can extract the loops in the region without obscuring the
-/// bug.  If so, it reduces the amount of code identified.
+/// Given a reduced list of functions that still exposed the bug, check to see
+/// if we can extract the loops in the region without obscuring the bug.  If so,
+/// it reduces the amount of code identified.
 ///
 static bool ExtractLoops(BugDriver &BD,
-                         bool (*TestFn)(BugDriver &, Module *, Module *,
-                                        std::string &),
-                         std::vector<Function*> &MiscompiledFunctions,
+                         bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                                        std::unique_ptr<Module>, std::string &),
+                         std::vector<Function *> &MiscompiledFunctions,
                          std::string &Error) {
   bool MadeChange = false;
   while (1) {
     if (BugpointIsInterrupted) return MadeChange;
 
     ValueToValueMapTy VMap;
-    Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
-    Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
-                                                   MiscompiledFunctions,
-                                                   VMap);
-    Module *ToOptimizeLoopExtracted = BD.extractLoop(ToOptimize).release();
+    std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+    Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize.get(),
+                                                   MiscompiledFunctions, VMap)
+                             .release();
+    std::unique_ptr<Module> ToOptimizeLoopExtracted =
+        BD.extractLoop(ToOptimize);
     if (!ToOptimizeLoopExtracted) {
       // If the loop extractor crashed or if there were no extractible loops,
       // then this chapter of our odyssey is over with.
-      delete ToNotOptimize;
       delete ToOptimize;
       return MadeChange;
     }
@@ -330,13 +325,14 @@ static bool ExtractLoops(BugDriver &BD,
     // extraction.
     AbstractInterpreter *AI = BD.switchToSafeInterpreter();
     bool Failure;
-    Module *New = TestMergedProgram(BD, ToOptimizeLoopExtracted,
-                                    ToNotOptimize, false, Error, Failure);
+    std::unique_ptr<Module> New =
+        testMergedProgram(BD, std::move(ToOptimizeLoopExtracted),
+                          std::move(ToNotOptimize), Error, Failure);
     if (!New)
       return false;
 
     // Delete the original and set the new program.
-    Module *Old = BD.swapProgramIn(New);
+    Module *Old = BD.swapProgramIn(New.release());
     for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
       MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
     delete Old;
@@ -350,16 +346,15 @@ static bool ExtractLoops(BugDriver &BD,
       errs() << "      Continuing on with un-loop-extracted version.\n";
 
       BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-tno.bc",
-                            ToNotOptimize);
+                            ToNotOptimize.get());
       BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to.bc",
                             ToOptimize);
       BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
-                            ToOptimizeLoopExtracted);
+                            ToOptimizeLoopExtracted.get());
 
       errs() << "Please submit the "
              << OutputPrefix << "-loop-extract-fail-*.bc files.\n";
       delete ToOptimize;
-      delete ToNotOptimize;
       return MadeChange;
     }
     delete ToOptimize;
@@ -367,18 +362,20 @@ static bool ExtractLoops(BugDriver &BD,
 
     outs() << "  Testing after loop extraction:\n";
     // Clone modules, the tester function will free them.
-    Module *TOLEBackup = CloneModule(ToOptimizeLoopExtracted, VMap);
-    Module *TNOBackup  = CloneModule(ToNotOptimize, VMap);
+    std::unique_ptr<Module> TOLEBackup =
+        CloneModule(ToOptimizeLoopExtracted.get(), VMap);
+    std::unique_ptr<Module> TNOBackup = CloneModule(ToNotOptimize.get(), VMap);
 
     for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
       MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
 
-    Failure = TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize, Error);
+    Failure = TestFn(BD, std::move(ToOptimizeLoopExtracted),
+                     std::move(ToNotOptimize), Error);
     if (!Error.empty())
       return false;
 
-    ToOptimizeLoopExtracted = TOLEBackup;
-    ToNotOptimize = TNOBackup;
+    ToOptimizeLoopExtracted = std::move(TOLEBackup);
+    ToNotOptimize = std::move(TNOBackup);
 
     if (!Failure) {
       outs() << "*** Loop extraction masked the problem.  Undoing.\n";
@@ -390,7 +387,8 @@ static bool ExtractLoops(BugDriver &BD,
         MisCompFunctions.emplace_back(F->getName(), F->getFunctionType());
       }
 
-      if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted))
+      if (Linker::linkModules(*ToNotOptimize,
+                              std::move(ToOptimizeLoopExtracted)))
         exit(1);
 
       MiscompiledFunctions.clear();
@@ -401,8 +399,7 @@ static bool ExtractLoops(BugDriver &BD,
         MiscompiledFunctions.push_back(NewF);
       }
 
-      delete ToOptimizeLoopExtracted;
-      BD.setNewProgram(ToNotOptimize);
+      BD.setNewProgram(ToNotOptimize.release());
       return MadeChange;
     }
 
@@ -418,11 +415,9 @@ static bool ExtractLoops(BugDriver &BD,
     // extraction both didn't break the program, and didn't mask the problem.
     // Replace the current program with the loop extracted version, and try to
     // extract another loop.
-    if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted))
+    if (Linker::linkModules(*ToNotOptimize, std::move(ToOptimizeLoopExtracted)))
       exit(1);
 
-    delete ToOptimizeLoopExtracted;
-
     // All of the Function*'s in the MiscompiledFunctions list are in the old
     // module.  Update this list to include all of the functions in the
     // optimized and loop extracted module.
@@ -434,7 +429,7 @@ static bool ExtractLoops(BugDriver &BD,
       MiscompiledFunctions.push_back(NewF);
     }
 
-    BD.setNewProgram(ToNotOptimize);
+    BD.setNewProgram(ToNotOptimize.release());
     MadeChange = true;
   }
 }
@@ -442,14 +437,15 @@ static bool ExtractLoops(BugDriver &BD,
 namespace {
   class ReduceMiscompiledBlocks : public ListReducer<BasicBlock*> {
     BugDriver &BD;
-    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+    bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                   std::unique_ptr<Module>, std::string &);
     std::vector<Function*> FunctionsBeingTested;
   public:
     ReduceMiscompiledBlocks(BugDriver &bd,
-                            bool (*F)(BugDriver &, Module *, Module *,
-                                      std::string &),
-                            const std::vector<Function*> &Fns)
-      : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
+                            bool (*F)(BugDriver &, std::unique_ptr<Module>,
+                                      std::unique_ptr<Module>, std::string &),
+                            const std::vector<Function *> &Fns)
+        : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
 
     TestResult doTest(std::vector<BasicBlock*> &Prefix,
                       std::vector<BasicBlock*> &Suffix,
@@ -495,7 +491,7 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
 
   // Split the module into the two halves of the program we want.
   ValueToValueMapTy VMap;
-  Module *Clone = CloneModule(BD.getProgram(), VMap);
+  Module *Clone = CloneModule(BD.getProgram(), VMap).release();
   Module *Orig = BD.swapProgramIn(Clone);
   std::vector<Function*> FuncsOnClone;
   std::vector<BasicBlock*> BBsOnClone;
@@ -509,45 +505,37 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
   }
   VMap.clear();
 
-  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
-  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
-                                                 FuncsOnClone,
-                                                 VMap);
+  std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+  std::unique_ptr<Module> ToOptimize =
+      SplitFunctionsOutOfModule(ToNotOptimize.get(), FuncsOnClone, VMap);
 
   // Try the extraction.  If it doesn't work, then the block extractor crashed
   // or something, in which case bugpoint can't chase down this possibility.
   if (std::unique_ptr<Module> New =
-          BD.extractMappedBlocksFromModule(BBsOnClone, ToOptimize)) {
-    delete ToOptimize;
-    // Run the predicate,
-    // note that the predicate will delete both input modules.
-    bool Ret = TestFn(BD, New.get(), ToNotOptimize, Error);
+          BD.extractMappedBlocksFromModule(BBsOnClone, ToOptimize.get())) {
+    bool Ret = TestFn(BD, std::move(New), std::move(ToNotOptimize), Error);
     delete BD.swapProgramIn(Orig);
     return Ret;
   }
   delete BD.swapProgramIn(Orig);
-  delete ToOptimize;
-  delete ToNotOptimize;
   return false;
 }
 
-
-/// ExtractBlocks - Given a reduced list of functions that still expose the bug,
-/// extract as many basic blocks from the region as possible without obscuring
-/// the bug.
+/// Given a reduced list of functions that still expose the bug, extract as many
+/// basic blocks from the region as possible without obscuring the bug.
 ///
 static bool ExtractBlocks(BugDriver &BD,
-                          bool (*TestFn)(BugDriver &, Module *, Module *,
+                          bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                                         std::unique_ptr<Module>,
                                          std::string &),
-                          std::vector<Function*> &MiscompiledFunctions,
+                          std::vector<Function *> &MiscompiledFunctions,
                           std::string &Error) {
   if (BugpointIsInterrupted) return false;
 
   std::vector<BasicBlock*> Blocks;
   for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
-    for (Function::iterator I = MiscompiledFunctions[i]->begin(),
-           E = MiscompiledFunctions[i]->end(); I != E; ++I)
-      Blocks.push_back(I);
+    for (BasicBlock &BB : *MiscompiledFunctions[i])
+      Blocks.push_back(&BB);
 
   // Use the list reducer to identify blocks that can be extracted without
   // obscuring the bug.  The Blocks list will end up containing blocks that must
@@ -571,10 +559,10 @@ static bool ExtractBlocks(BugDriver &BD,
   }
 
   ValueToValueMapTy VMap;
-  Module *ProgClone = CloneModule(BD.getProgram(), VMap);
-  Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
-                                                MiscompiledFunctions,
-                                                VMap);
+  Module *ProgClone = CloneModule(BD.getProgram(), VMap).release();
+  Module *ToExtract =
+      SplitFunctionsOutOfModule(ProgClone, MiscompiledFunctions, VMap)
+          .release();
   std::unique_ptr<Module> Extracted =
       BD.extractMappedBlocksFromModule(Blocks, ToExtract);
   if (!Extracted) {
@@ -595,7 +583,7 @@ static bool ExtractBlocks(BugDriver &BD,
     if (!I->isDeclaration())
       MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
 
-  if (Linker::LinkModules(ProgClone, Extracted.get()))
+  if (Linker::linkModules(*ProgClone, std::move(Extracted)))
     exit(1);
 
   // Set the new program and delete the old one.
@@ -613,14 +601,13 @@ static bool ExtractBlocks(BugDriver &BD,
   return true;
 }
 
-
-/// DebugAMiscompilation - This is a generic driver to narrow down
-/// miscompilations, either in an optimization or a code generator.
+/// This is a generic driver to narrow down miscompilations, either in an
+/// optimization or a code generator.
 ///
-static std::vector<Function*>
+static std::vector<Function *>
 DebugAMiscompilation(BugDriver &BD,
-                     bool (*TestFn)(BugDriver &, Module *, Module *,
-                                    std::string &),
+                     bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                                    std::unique_ptr<Module>, std::string &),
                      std::string &Error) {
   // Okay, now that we have reduced the list of passes which are causing the
   // failure, see if we can pin down which functions are being
@@ -628,9 +615,9 @@ DebugAMiscompilation(BugDriver &BD,
   // the program.
   std::vector<Function*> MiscompiledFunctions;
   Module *Prog = BD.getProgram();
-  for (Module::iterator I = Prog->begin(), E = Prog->end(); I != E; ++I)
-    if (!I->isDeclaration())
-      MiscompiledFunctions.push_back(I);
+  for (Function &F : *Prog)
+    if (!F.isDeclaration())
+      MiscompiledFunctions.push_back(&F);
 
   // Do the reduction...
   if (!BugpointIsInterrupted)
@@ -699,28 +686,28 @@ DebugAMiscompilation(BugDriver &BD,
   return MiscompiledFunctions;
 }
 
-/// TestOptimizer - This is the predicate function used to check to see if the
-/// "Test" portion of the program is misoptimized.  If so, return true.  In any
-/// case, both module arguments are deleted.
+/// This is the predicate function used to check to see if the "Test" portion of
+/// the program is misoptimized.  If so, return true.  In any case, both module
+/// arguments are deleted.
 ///
-static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe,
-                          std::string &Error) {
+static bool TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
+                          std::unique_ptr<Module> Safe, std::string &Error) {
   // Run the optimization passes on ToOptimize, producing a transformed version
   // of the functions being tested.
   outs() << "  Optimizing functions being tested: ";
-  std::unique_ptr<Module> Optimized = BD.runPassesOn(Test, BD.getPassesToRun(),
-                                                     /*AutoDebugCrashes*/ true);
+  std::unique_ptr<Module> Optimized =
+      BD.runPassesOn(Test.get(), BD.getPassesToRun(),
+                     /*AutoDebugCrashes*/ true);
   outs() << "done.\n";
-  delete Test;
 
   outs() << "  Checking to see if the merged program executes correctly: ";
   bool Broken;
-  Module *New =
-      TestMergedProgram(BD, Optimized.get(), Safe, true, Error, Broken);
+  std::unique_ptr<Module> New = testMergedProgram(
+      BD, std::move(Optimized), std::move(Safe), Error, Broken);
   if (New) {
     outs() << (Broken ? " nope.\n" : " yup.\n");
     // Delete the original and set the new program.
-    delete BD.swapProgramIn(New);
+    delete BD.swapProgramIn(New.release());
   }
   return Broken;
 }
@@ -753,10 +740,10 @@ void BugDriver::debugMiscompilation(std::string *Error) {
   // Output a bunch of bitcode files for the user...
   outs() << "Outputting reduced bitcode files which expose the problem:\n";
   ValueToValueMapTy VMap;
-  Module *ToNotOptimize = CloneModule(getProgram(), VMap);
-  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
-                                                 MiscompiledFunctions,
-                                                 VMap);
+  Module *ToNotOptimize = CloneModule(getProgram(), VMap).release();
+  Module *ToOptimize =
+      SplitFunctionsOutOfModule(ToNotOptimize, MiscompiledFunctions, VMap)
+          .release();
 
   outs() << "  Non-optimized portion: ";
   EmitProgressBitcode(ToNotOptimize, "tonotoptimize", true);
@@ -769,13 +756,13 @@ void BugDriver::debugMiscompilation(std::string *Error) {
   return;
 }
 
-/// CleanupAndPrepareModules - Get the specified modules ready for code
-/// generator testing.
+/// Get the specified modules ready for code generator testing.
 ///
-static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
+static void CleanupAndPrepareModules(BugDriver &BD,
+                                     std::unique_ptr<Module> &Test,
                                      Module *Safe) {
   // Clean up the modules, removing extra cruft that we don't need anymore...
-  Test = BD.performFinalCleanups(Test).release();
+  Test = BD.performFinalCleanups(Test.get());
 
   // If we are executing the JIT, we have several nasty issues to take care of.
   if (!BD.isExecutingJIT()) return;
@@ -788,21 +775,21 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
       // Rename it
       oldMain->setName("llvm_bugpoint_old_main");
       // Create a NEW `main' function with same type in the test module.
-      Function *newMain = Function::Create(oldMain->getFunctionType(),
-                                           GlobalValue::ExternalLinkage,
-                                           "main", Test);
+      Function *newMain =
+          Function::Create(oldMain->getFunctionType(),
+                           GlobalValue::ExternalLinkage, "main", Test.get());
       // Create an `oldmain' prototype in the test module, which will
       // corresponds to the real main function in the same module.
       Function *oldMainProto = Function::Create(oldMain->getFunctionType(),
                                                 GlobalValue::ExternalLinkage,
-                                                oldMain->getName(), Test);
+                                                oldMain->getName(), Test.get());
       // Set up and remember the argument list for the main function.
       std::vector<Value*> args;
       for (Function::arg_iterator
              I = newMain->arg_begin(), E = newMain->arg_end(),
              OI = oldMain->arg_begin(); I != E; ++I, ++OI) {
         I->setName(OI->getName());    // Copy argument names from oldMain
-        args.push_back(I);
+        args.push_back(&*I);
       }
 
       // Call the old main function and return its result
@@ -905,9 +892,8 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
 
           // Save the argument list.
           std::vector<Value*> Args;
-          for (Function::arg_iterator i = FuncWrapper->arg_begin(),
-                 e = FuncWrapper->arg_end(); i != e; ++i)
-            Args.push_back(i);
+          for (Argument &A : FuncWrapper->args())
+            Args.push_back(&A);
 
           // Pass on the arguments to the real function, return its result
           if (F->getReturnType()->isVoidTy()) {
@@ -932,15 +918,14 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
   }
 }
 
-
-
-/// TestCodeGenerator - This is the predicate function used to check to see if
-/// the "Test" portion of the program is miscompiled by the code generator under
-/// test.  If so, return true.  In any case, both module arguments are deleted.
+/// This is the predicate function used to check to see if the "Test" portion of
+/// the program is miscompiled by the code generator under test.  If so, return
+/// true.  In any case, both module arguments are deleted.
 ///
-static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
+static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
+                              std::unique_ptr<Module> Safe,
                               std::string &Error) {
-  CleanupAndPrepareModules(BD, Test, Safe);
+  CleanupAndPrepareModules(BD, Test, Safe.get());
 
   SmallString<128> TestModuleBC;
   int TestModuleFD;
@@ -951,12 +936,11 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
            << EC.message() << "\n";
     exit(1);
   }
-  if (BD.writeProgramToFile(TestModuleBC.str(), TestModuleFD, Test)) {
+  if (BD.writeProgramToFile(TestModuleBC.str(), TestModuleFD, Test.get())) {
     errs() << "Error writing bitcode to `" << TestModuleBC.str()
            << "'\nExiting.";
     exit(1);
   }
-  delete Test;
 
   FileRemover TestModuleBCRemover(TestModuleBC.str(), !SaveTemps);
 
@@ -971,7 +955,7 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
     exit(1);
   }
 
-  if (BD.writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, Safe)) {
+  if (BD.writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, Safe.get())) {
     errs() << "Error writing bitcode to `" << SafeModuleBC
            << "'\nExiting.";
     exit(1);
@@ -982,7 +966,6 @@ static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
   std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
   if (!Error.empty())
     return false;
-  delete Safe;
 
   FileRemover SharedObjectRemover(SharedObject, !SaveTemps);
 
@@ -1030,11 +1013,12 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
 
   // Split the module into the two halves of the program we want.
   ValueToValueMapTy VMap;
-  Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
-  Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
+  std::unique_ptr<Module> ToNotCodeGen = CloneModule(getProgram(), VMap);
+  std::unique_ptr<Module> ToCodeGen =
+      SplitFunctionsOutOfModule(ToNotCodeGen.get(), Funcs, VMap);
 
   // Condition the modules
-  CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen);
+  CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen.get());
 
   SmallString<128> TestModuleBC;
   int TestModuleFD;
@@ -1046,12 +1030,11 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
     exit(1);
   }
 
-  if (writeProgramToFile(TestModuleBC.str(), TestModuleFD, ToCodeGen)) {
+  if (writeProgramToFile(TestModuleBC.str(), TestModuleFD, ToCodeGen.get())) {
     errs() << "Error writing bitcode to `" << TestModuleBC
            << "'\nExiting.";
     exit(1);
   }
-  delete ToCodeGen;
 
   // Make the shared library
   SmallString<128> SafeModuleBC;
@@ -1064,7 +1047,8 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
     exit(1);
   }
 
-  if (writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, ToNotCodeGen)) {
+  if (writeProgramToFile(SafeModuleBC.str(), SafeModuleFD,
+                         ToNotCodeGen.get())) {
     errs() << "Error writing bitcode to `" << SafeModuleBC
            << "'\nExiting.";
     exit(1);
@@ -1072,7 +1056,6 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
   std::string SharedObject = compileSharedObject(SafeModuleBC.str(), *Error);
   if (!Error->empty())
     return true;
-  delete ToNotCodeGen;
 
   outs() << "You can reproduce the problem with the command line: \n";
   if (isExecutingJIT()) {
@@ -1080,7 +1063,7 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
   } else {
     outs() << "  llc " << TestModuleBC << " -o " << TestModuleBC
            << ".s\n";
-    outs() << "  gcc " << SharedObject << " " << TestModuleBC.str()
+    outs() << "  cc " << SharedObject << " " << TestModuleBC.str()
               << ".s -o " << TestModuleBC << ".exe";
 #if defined (HAVE_LINK_R)
     outs() << " -Wl,-R.";
@@ -1093,7 +1076,7 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
   outs() << '\n';
   outs() << "The shared object was created with:\n  llc -march=c "
          << SafeModuleBC.str() << " -o temporary.c\n"
-         << "  gcc -xc temporary.c -O2 -o " << SharedObject;
+         << "  cc -xc temporary.c -O2 -o " << SharedObject;
   if (TargetTriple.getArch() == Triple::sparc)
     outs() << " -G";              // Compile a shared library, `-G' for Sparc
   else
diff --git a/tools/bugpoint/ToolRunner.cpp b/tools/bugpoint/ToolRunner.cpp
index 51091e297d7d..2ccd64905128 100644
--- a/tools/bugpoint/ToolRunner.cpp
+++ b/tools/bugpoint/ToolRunner.cpp
@@ -64,16 +64,6 @@ static int RunProgramWithTimeout(StringRef ProgramPath,
                                  unsigned MemoryLimit = 0,
                                  std::string *ErrMsg = nullptr) {
   const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
-
-#if 0 // For debug purposes
-  {
-    errs() << "RUN:";
-    for (unsigned i = 0; Args[i]; ++i)
-      errs() << " " << Args[i];
-    errs() << "\n";
-  }
-#endif
-
   return sys::ExecuteAndWait(ProgramPath, Args, nullptr, Redirects,
                              NumSeconds, MemoryLimit, ErrMsg);
 }
@@ -93,15 +83,6 @@ static int RunProgramRemotelyWithTimeout(StringRef RemoteClientPath,
                                          unsigned MemoryLimit = 0) {
   const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
 
-#if 0 // For debug purposes
-  {
-    errs() << "RUN:";
-    for (unsigned i = 0; Args[i]; ++i)
-      errs() << " " << Args[i];
-    errs() << "\n";
-  }
-#endif
-
   // Run the program remotely with the remote client
   int ReturnCode = sys::ExecuteAndWait(RemoteClientPath, Args, nullptr,
                                        Redirects, NumSeconds, MemoryLimit);
@@ -152,7 +133,7 @@ static std::string ProcessFailure(StringRef ProgPath, const char** Args,
                         ErrorFilename.str(), Timeout, MemoryLimit);
   // FIXME: check return code ?
 
-  // Print out the error messages generated by GCC if possible...
+  // Print out the error messages generated by CC if possible...
   std::ifstream ErrorFile(ErrorFilename.c_str());
   if (ErrorFile) {
     std::copy(std::istreambuf_iterator<char>(ErrorFile),
@@ -184,7 +165,7 @@ namespace {
                        const std::string &InputFile,
                        const std::string &OutputFile,
                        std::string *Error,
-                       const std::vector<std::string> &GCCArgs,
+                       const std::vector<std::string> &CCArgs,
                        const std::vector<std::string> &SharedLibs =
                        std::vector<std::string>(),
                        unsigned Timeout = 0,
@@ -197,7 +178,7 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
                         const std::string &InputFile,
                         const std::string &OutputFile,
                         std::string *Error,
-                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &CCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
@@ -305,7 +286,7 @@ namespace {
                        const std::string &InputFile,
                        const std::string &OutputFile,
                        std::string *Error,
-                       const std::vector<std::string> &GCCArgs =
+                       const std::vector<std::string> &CCArgs =
                        std::vector<std::string>(),
                        const std::vector<std::string> &SharedLibs =
                        std::vector<std::string>(),
@@ -361,7 +342,7 @@ namespace {
                        const std::string &InputFile,
                        const std::string &OutputFile,
                        std::string *Error,
-                       const std::vector<std::string> &GCCArgs,
+                       const std::vector<std::string> &CCArgs,
                        const std::vector<std::string> &SharedLibs =
                          std::vector<std::string>(),
                        unsigned Timeout = 0,
@@ -374,7 +355,7 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
                         const std::string &InputFile,
                         const std::string &OutputFile,
                         std::string *Error,
-                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &CCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
@@ -473,7 +454,7 @@ AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
 //===----------------------------------------------------------------------===//
 // LLC Implementation of AbstractIntepreter interface
 //
-GCC::FileType LLC::OutputCode(const std::string &Bitcode,
+CC::FileType LLC::OutputCode(const std::string &Bitcode,
                               std::string &OutputAsmFile, std::string &Error,
                               unsigned Timeout, unsigned MemoryLimit) {
   const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
@@ -514,7 +495,7 @@ GCC::FileType LLC::OutputCode(const std::string &Bitcode,
                             Timeout, MemoryLimit))
     Error = ProcessFailure(LLCPath, &LLCArgs[0],
                            Timeout, MemoryLimit);
-  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;
+  return UseIntegratedAssembler ? CC::ObjectFile : CC::AsmFile;
 }
 
 void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
@@ -529,22 +510,22 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
                         const std::string &InputFile,
                         const std::string &OutputFile,
                         std::string *Error,
-                        const std::vector<std::string> &ArgsForGCC,
+                        const std::vector<std::string> &ArgsForCC,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
 
   std::string OutputAsmFile;
-  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
+  CC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
                                       MemoryLimit);
   FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
 
-  std::vector<std::string> GCCArgs(ArgsForGCC);
-  GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
+  std::vector<std::string> CCArgs(ArgsForCC);
+  CCArgs.insert(CCArgs.end(), SharedLibs.begin(), SharedLibs.end());
 
-  // Assuming LLC worked, compile the result with GCC and run it.
-  return gcc->ExecuteProgram(OutputAsmFile, Args, FileKind,
-                             InputFile, OutputFile, Error, GCCArgs,
+  // Assuming LLC worked, compile the result with CC and run it.
+  return cc->ExecuteProgram(OutputAsmFile, Args, FileKind,
+                             InputFile, OutputFile, Error, CCArgs,
                              Timeout, MemoryLimit);
 }
 
@@ -552,9 +533,9 @@ int LLC::ExecuteProgram(const std::string &Bitcode,
 ///
 LLC *AbstractInterpreter::createLLC(const char *Argv0,
                                     std::string &Message,
-                                    const std::string &GCCBinary,
+                                    const std::string &CCBinary,
                                     const std::vector<std::string> *Args,
-                                    const std::vector<std::string> *GCCArgs,
+                                    const std::vector<std::string> *CCArgs,
                                     bool UseIntegratedAssembler) {
   std::string LLCPath =
       PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t) & createLLC);
@@ -563,13 +544,13 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
     return nullptr;
   }
 
-  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
-  if (!gcc) {
+  CC *cc = CC::create(Message, CCBinary, CCArgs);
+  if (!cc) {
     errs() << Message << "\n";
     exit(1);
   }
   Message = "Found llc: " + LLCPath + "\n";
-  return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler);
+  return new LLC(LLCPath, cc, Args, UseIntegratedAssembler);
 }
 
 //===---------------------------------------------------------------------===//
@@ -591,7 +572,7 @@ namespace {
                        const std::string &InputFile,
                        const std::string &OutputFile,
                        std::string *Error,
-                       const std::vector<std::string> &GCCArgs =
+                       const std::vector<std::string> &CCArgs =
                          std::vector<std::string>(),
                        const std::vector<std::string> &SharedLibs =
                          std::vector<std::string>(),
@@ -605,7 +586,7 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
                         const std::string &InputFile,
                         const std::string &OutputFile,
                         std::string *Error,
-                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &CCArgs,
                         const std::vector<std::string> &SharedLibs,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
@@ -656,7 +637,7 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
 }
 
 //===---------------------------------------------------------------------===//
-// GCC abstraction
+// CC abstraction
 //
 
 static bool IsARMArchitecture(std::vector<const char*> Args) {
@@ -672,82 +653,82 @@ static bool IsARMArchitecture(std::vector<const char*> Args) {
   return false;
 }
 
-int GCC::ExecuteProgram(const std::string &ProgramFile,
+int CC::ExecuteProgram(const std::string &ProgramFile,
                         const std::vector<std::string> &Args,
                         FileType fileType,
                         const std::string &InputFile,
                         const std::string &OutputFile,
                         std::string *Error,
-                        const std::vector<std::string> &ArgsForGCC,
+                        const std::vector<std::string> &ArgsForCC,
                         unsigned Timeout,
                         unsigned MemoryLimit) {
-  std::vector<const char*> GCCArgs;
+  std::vector<const char*> CCArgs;
 
-  GCCArgs.push_back(GCCPath.c_str());
+  CCArgs.push_back(CCPath.c_str());
 
   if (TargetTriple.getArch() == Triple::x86)
-    GCCArgs.push_back("-m32");
+    CCArgs.push_back("-m32");
 
   for (std::vector<std::string>::const_iterator
-         I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
-    GCCArgs.push_back(I->c_str());
+         I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+    CCArgs.push_back(I->c_str());
 
   // Specify -x explicitly in case the extension is wonky
   if (fileType != ObjectFile) {
-    GCCArgs.push_back("-x");
+    CCArgs.push_back("-x");
     if (fileType == CFile) {
-      GCCArgs.push_back("c");
-      GCCArgs.push_back("-fno-strict-aliasing");
+      CCArgs.push_back("c");
+      CCArgs.push_back("-fno-strict-aliasing");
     } else {
-      GCCArgs.push_back("assembler");
+      CCArgs.push_back("assembler");
 
       // For ARM architectures we don't want this flag. bugpoint isn't
       // explicitly told what architecture it is working on, so we get
-      // it from gcc flags
-      if (TargetTriple.isOSDarwin() && !IsARMArchitecture(GCCArgs))
-        GCCArgs.push_back("-force_cpusubtype_ALL");
+      // it from cc flags
+      if (TargetTriple.isOSDarwin() && !IsARMArchitecture(CCArgs))
+        CCArgs.push_back("-force_cpusubtype_ALL");
     }
   }
 
-  GCCArgs.push_back(ProgramFile.c_str());  // Specify the input filename.
+  CCArgs.push_back(ProgramFile.c_str());  // Specify the input filename.
 
-  GCCArgs.push_back("-x");
-  GCCArgs.push_back("none");
-  GCCArgs.push_back("-o");
+  CCArgs.push_back("-x");
+  CCArgs.push_back("none");
+  CCArgs.push_back("-o");
 
   SmallString<128> OutputBinary;
   std::error_code EC =
-      sys::fs::createUniqueFile(ProgramFile + "-%%%%%%%.gcc.exe", OutputBinary);
+      sys::fs::createUniqueFile(ProgramFile + "-%%%%%%%.cc.exe", OutputBinary);
   if (EC) {
     errs() << "Error making unique filename: " << EC.message() << "\n";
     exit(1);
   }
-  GCCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
+  CCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
 
-  // Add any arguments intended for GCC. We locate them here because this is
+  // Add any arguments intended for CC. We locate them here because this is
   // most likely -L and -l options that need to come before other libraries but
   // after the source. Other options won't be sensitive to placement on the
   // command line, so this should be safe.
-  for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
-    GCCArgs.push_back(ArgsForGCC[i].c_str());
+  for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
+    CCArgs.push_back(ArgsForCC[i].c_str());
 
-  GCCArgs.push_back("-lm");                // Hard-code the math library...
-  GCCArgs.push_back("-O2");                // Optimize the program a bit...
+  CCArgs.push_back("-lm");                // Hard-code the math library...
+  CCArgs.push_back("-O2");                // Optimize the program a bit...
 #if defined (HAVE_LINK_R)
-  GCCArgs.push_back("-Wl,-R.");            // Search this dir for .so files
+  CCArgs.push_back("-Wl,-R.");            // Search this dir for .so files
 #endif
   if (TargetTriple.getArch() == Triple::sparc)
-    GCCArgs.push_back("-mcpu=v9");
-  GCCArgs.push_back(nullptr);                    // NULL terminator
+    CCArgs.push_back("-mcpu=v9");
+  CCArgs.push_back(nullptr);                    // NULL terminator
 
-  outs() << "<gcc>"; outs().flush();
+  outs() << "<CC>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
-          errs() << " " << GCCArgs[i];
+        for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
+          errs() << " " << CCArgs[i];
         errs() << "\n";
         );
-  if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], "", "", "")) {
-    *Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
+    *Error = ProcessFailure(CCPath, &CCArgs[0]);
     return -1;
   }
 
@@ -821,9 +802,9 @@ int GCC::ExecuteProgram(const std::string &ProgramFile,
   }
 }
 
-int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
+int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
                           std::string &OutputFile,
-                          const std::vector<std::string> &ArgsForGCC,
+                          const std::vector<std::string> &ArgsForCC,
                           std::string &Error) {
   SmallString<128> UniqueFilename;
   std::error_code EC = sys::fs::createUniqueFile(
@@ -834,84 +815,84 @@ int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   }
   OutputFile = UniqueFilename.str();
 
-  std::vector<const char*> GCCArgs;
+  std::vector<const char*> CCArgs;
 
-  GCCArgs.push_back(GCCPath.c_str());
+  CCArgs.push_back(CCPath.c_str());
 
   if (TargetTriple.getArch() == Triple::x86)
-    GCCArgs.push_back("-m32");
+    CCArgs.push_back("-m32");
 
   for (std::vector<std::string>::const_iterator
-         I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
-    GCCArgs.push_back(I->c_str());
+         I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+    CCArgs.push_back(I->c_str());
 
   // Compile the C/asm file into a shared object
   if (fileType != ObjectFile) {
-    GCCArgs.push_back("-x");
-    GCCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
+    CCArgs.push_back("-x");
+    CCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
   }
-  GCCArgs.push_back("-fno-strict-aliasing");
-  GCCArgs.push_back(InputFile.c_str());   // Specify the input filename.
-  GCCArgs.push_back("-x");
-  GCCArgs.push_back("none");
+  CCArgs.push_back("-fno-strict-aliasing");
+  CCArgs.push_back(InputFile.c_str());   // Specify the input filename.
+  CCArgs.push_back("-x");
+  CCArgs.push_back("none");
   if (TargetTriple.getArch() == Triple::sparc)
-    GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
+    CCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
   else if (TargetTriple.isOSDarwin()) {
     // link all source files into a single module in data segment, rather than
     // generating blocks. dynamic_lookup requires that you set
     // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
-    // bugpoint to just pass that in the environment of GCC.
-    GCCArgs.push_back("-single_module");
-    GCCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
-    GCCArgs.push_back("-undefined");
-    GCCArgs.push_back("dynamic_lookup");
+    // bugpoint to just pass that in the environment of CC.
+    CCArgs.push_back("-single_module");
+    CCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
+    CCArgs.push_back("-undefined");
+    CCArgs.push_back("dynamic_lookup");
   } else
-    GCCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
+    CCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
 
   if (TargetTriple.getArch() == Triple::x86_64)
-    GCCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
+    CCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
 
   if (TargetTriple.getArch() == Triple::sparc)
-    GCCArgs.push_back("-mcpu=v9");
+    CCArgs.push_back("-mcpu=v9");
 
-  GCCArgs.push_back("-o");
-  GCCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
-  GCCArgs.push_back("-O2");              // Optimize the program a bit.
+  CCArgs.push_back("-o");
+  CCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
+  CCArgs.push_back("-O2");              // Optimize the program a bit.
 
 
 
-  // Add any arguments intended for GCC. We locate them here because this is
+  // Add any arguments intended for CC. We locate them here because this is
   // most likely -L and -l options that need to come before other libraries but
   // after the source. Other options won't be sensitive to placement on the
   // command line, so this should be safe.
-  for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
-    GCCArgs.push_back(ArgsForGCC[i].c_str());
-  GCCArgs.push_back(nullptr);                    // NULL terminator
+  for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
+    CCArgs.push_back(ArgsForCC[i].c_str());
+  CCArgs.push_back(nullptr);                    // NULL terminator
 
 
 
-  outs() << "<gcc>"; outs().flush();
+  outs() << "<CC>"; outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
-          errs() << " " << GCCArgs[i];
+        for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
+          errs() << " " << CCArgs[i];
         errs() << "\n";
         );
-  if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], "", "", "")) {
-    Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
+    Error = ProcessFailure(CCPath, &CCArgs[0]);
     return 1;
   }
   return 0;
 }
 
-/// create - Try to find the `gcc' executable
+/// create - Try to find the CC executable
 ///
-GCC *GCC::create(std::string &Message,
-                 const std::string &GCCBinary,
+CC *CC::create(std::string &Message,
+                 const std::string &CCBinary,
                  const std::vector<std::string> *Args) {
-  auto GCCPath = sys::findProgramByName(GCCBinary);
-  if (!GCCPath) {
-    Message = "Cannot find `" + GCCBinary + "' in PATH: " +
-              GCCPath.getError().message() + "\n";
+  auto CCPath = sys::findProgramByName(CCBinary);
+  if (!CCPath) {
+    Message = "Cannot find `" + CCBinary + "' in PATH: " +
+              CCPath.getError().message() + "\n";
     return nullptr;
   }
 
@@ -926,6 +907,6 @@ GCC *GCC::create(std::string &Message,
     RemoteClientPath = *Path;
   }
 
-  Message = "Found gcc: " + *GCCPath + "\n";
-  return new GCC(*GCCPath, RemoteClientPath, Args);
+  Message = "Found CC: " + *CCPath + "\n";
+  return new CC(*CCPath, RemoteClientPath, Args);
 }
diff --git a/tools/bugpoint/ToolRunner.h b/tools/bugpoint/ToolRunner.h
index 5d67a9426870..3accd70ed4ce 100644
--- a/tools/bugpoint/ToolRunner.h
+++ b/tools/bugpoint/ToolRunner.h
@@ -33,22 +33,22 @@ extern Triple TargetTriple;
 class LLC;
 
 //===---------------------------------------------------------------------===//
-// GCC abstraction
+// CC abstraction
 //
-class GCC {
-  std::string GCCPath;                // The path to the gcc executable.
+class CC {
+  std::string CCPath;                // The path to the cc executable.
   std::string RemoteClientPath;       // The path to the rsh / ssh executable.
-  std::vector<std::string> gccArgs; // GCC-specific arguments.
-  GCC(StringRef gccPath, StringRef RemotePath,
-      const std::vector<std::string> *GCCArgs)
-    : GCCPath(gccPath), RemoteClientPath(RemotePath) {
-    if (GCCArgs) gccArgs = *GCCArgs;
+  std::vector<std::string> ccArgs; // CC-specific arguments.
+  CC(StringRef ccPath, StringRef RemotePath,
+      const std::vector<std::string> *CCArgs)
+    : CCPath(ccPath), RemoteClientPath(RemotePath) {
+    if (CCArgs) ccArgs = *CCArgs;
   }
 public:
   enum FileType { AsmFile, ObjectFile, CFile };
 
-  static GCC *create(std::string &Message,
-                     const std::string &GCCBinary,
+  static CC *create(std::string &Message,
+                     const std::string &CCBinary,
                      const std::vector<std::string> *Args);
 
   /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
@@ -64,7 +64,7 @@ public:
                      const std::string &InputFile,
                      const std::string &OutputFile,
                      std::string *Error = nullptr,
-                     const std::vector<std::string> &GCCArgs =
+                     const std::vector<std::string> &CCArgs =
                          std::vector<std::string>(),
                      unsigned Timeout = 0,
                      unsigned MemoryLimit = 0);
@@ -74,7 +74,7 @@ public:
   ///
   int MakeSharedObject(const std::string &InputFile, FileType fileType,
                        std::string &OutputFile,
-                       const std::vector<std::string> &ArgsForGCC,
+                       const std::vector<std::string> &ArgsForCC,
                        std::string &Error);
 };
 
@@ -88,9 +88,9 @@ class AbstractInterpreter {
   virtual void anchor();
 public:
   static LLC *createLLC(const char *Argv0, std::string &Message,
-                        const std::string              &GCCBinary,
+                        const std::string              &CCBinary,
                         const std::vector<std::string> *Args = nullptr,
-                        const std::vector<std::string> *GCCArgs = nullptr,
+                        const std::vector<std::string> *CCArgs = nullptr,
                         bool UseIntegratedAssembler = false);
 
   static AbstractInterpreter*
@@ -119,15 +119,15 @@ public:
                               unsigned Timeout = 0, unsigned MemoryLimit = 0) {}
 
   /// OutputCode - Compile the specified program from bitcode to code
-  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// understood by the CC driver (either C or asm).  If the code generator
   /// fails, it sets Error, otherwise, this function returns the type of code
   /// emitted.
-  virtual GCC::FileType OutputCode(const std::string &Bitcode,
+  virtual CC::FileType OutputCode(const std::string &Bitcode,
                                    std::string &OutFile, std::string &Error,
                                    unsigned Timeout = 0,
                                    unsigned MemoryLimit = 0) {
     Error = "OutputCode not supported by this AbstractInterpreter!";
-    return GCC::AsmFile;
+    return CC::AsmFile;
   }
 
   /// ExecuteProgram - Run the specified bitcode file, emitting output to the
@@ -140,7 +140,7 @@ public:
                              const std::string &InputFile,
                              const std::string &OutputFile,
                              std::string *Error,
-                             const std::vector<std::string> &GCCArgs =
+                             const std::vector<std::string> &CCArgs =
                                std::vector<std::string>(),
                              const std::vector<std::string> &SharedLibs =
                                std::vector<std::string>(),
@@ -154,18 +154,18 @@ public:
 class LLC : public AbstractInterpreter {
   std::string LLCPath;               // The path to the LLC executable.
   std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
-  GCC *gcc;
+  CC *cc;
   bool UseIntegratedAssembler;
 public:
-  LLC(const std::string &llcPath, GCC *Gcc,
+  LLC(const std::string &llcPath, CC *cc,
       const std::vector<std::string> *Args,
       bool useIntegratedAssembler)
-    : LLCPath(llcPath), gcc(Gcc),
+    : LLCPath(llcPath), cc(cc),
       UseIntegratedAssembler(useIntegratedAssembler) {
     ToolArgs.clear();
     if (Args) ToolArgs = *Args;
   }
-  ~LLC() override { delete gcc; }
+  ~LLC() override { delete cc; }
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
@@ -178,7 +178,7 @@ public:
                      const std::string &InputFile,
                      const std::string &OutputFile,
                      std::string *Error,
-                     const std::vector<std::string> &GCCArgs =
+                     const std::vector<std::string> &CCArgs =
                        std::vector<std::string>(),
                      const std::vector<std::string> &SharedLibs =
                         std::vector<std::string>(),
@@ -186,10 +186,10 @@ public:
                      unsigned MemoryLimit = 0) override;
 
   /// OutputCode - Compile the specified program from bitcode to code
-  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// understood by the CC driver (either C or asm).  If the code generator
   /// fails, it sets Error, otherwise, this function returns the type of code
   /// emitted.
-  GCC::FileType OutputCode(const std::string &Bitcode,
+  CC::FileType OutputCode(const std::string &Bitcode,
                            std::string &OutFile, std::string &Error,
                            unsigned Timeout = 0,
                            unsigned MemoryLimit = 0) override;
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
index af6d9fccf291..48f30e6709f8 100644
--- a/tools/bugpoint/bugpoint.cpp
+++ b/tools/bugpoint/bugpoint.cpp
@@ -126,7 +126,6 @@ int main(int argc, char **argv) {
   initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
-  initializeIPA(Registry);
   initializeTransformUtils(Registry);
   initializeInstCombine(Registry);
   initializeInstrumentation(Registry);
@@ -181,19 +180,12 @@ int main(int argc, char **argv) {
       Builder.Inliner = createFunctionInliningPass(225);
     else
       Builder.Inliner = createFunctionInliningPass(275);
-
-    // Note that although clang/llvm-gcc use two separate passmanagers
-    // here, it shouldn't normally make a difference.
     Builder.populateFunctionPassManager(PM);
     Builder.populateModulePassManager(PM);
   }
 
-  for (std::vector<const PassInfo*>::iterator I = PassList.begin(),
-         E = PassList.end();
-       I != E; ++I) {
-    const PassInfo* PI = *I;
+  for (const PassInfo *PI : PassList)
     D.addPass(PI->getPassArgument());
-  }
 
   // Bugpoint has the ability of generating a plethora of core files, so to
   // avoid filling up the disk, we prevent it
diff --git a/tools/dsymutil/BinaryHolder.cpp b/tools/dsymutil/BinaryHolder.cpp
index ad66105bc249..4c2c1d195c3c 100644
--- a/tools/dsymutil/BinaryHolder.cpp
+++ b/tools/dsymutil/BinaryHolder.cpp
@@ -13,25 +13,59 @@
 //===----------------------------------------------------------------------===//
 
 #include "BinaryHolder.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace dsymutil {
 
-ErrorOr<MemoryBufferRef>
-BinaryHolder::GetMemoryBufferForFile(StringRef Filename) {
+Triple BinaryHolder::getTriple(const object::MachOObjectFile &Obj) {
+  // If a ThumbTriple is returned, use it instead of the standard
+  // one. This is because the thumb triple always allows to create a
+  // target, whereas the non-thumb one might not.
+  Triple ThumbTriple;
+  Triple T = Obj.getArch(nullptr, &ThumbTriple);
+  return ThumbTriple.getArch() ? ThumbTriple : T;
+}
+
+static std::vector<MemoryBufferRef>
+getMachOFatMemoryBuffers(StringRef Filename, MemoryBuffer &Mem,
+                         object::MachOUniversalBinary &Fat) {
+  std::vector<MemoryBufferRef> Buffers;
+  StringRef FatData = Fat.getData();
+  for (auto It = Fat.begin_objects(), End = Fat.end_objects(); It != End;
+       ++It) {
+    StringRef ObjData = FatData.substr(It->getOffset(), It->getSize());
+    Buffers.emplace_back(ObjData, Filename);
+  }
+  return Buffers;
+}
+
+void BinaryHolder::changeBackingMemoryBuffer(
+    std::unique_ptr<MemoryBuffer> &&Buf) {
+  CurrentArchives.clear();
+  CurrentObjectFiles.clear();
+  CurrentFatBinary.reset();
+
+  CurrentMemoryBuffer = std::move(Buf);
+}
+
+ErrorOr<std::vector<MemoryBufferRef>>
+BinaryHolder::GetMemoryBuffersForFile(StringRef Filename,
+                                      sys::TimeValue Timestamp) {
   if (Verbose)
     outs() << "trying to open '" << Filename << "'\n";
 
   // Try that first as it doesn't involve any filesystem access.
-  if (auto ErrOrArchiveMember = GetArchiveMemberBuffer(Filename))
-    return *ErrOrArchiveMember;
+  if (auto ErrOrArchiveMembers = GetArchiveMemberBuffers(Filename, Timestamp))
+    return *ErrOrArchiveMembers;
 
   // If the name ends with a closing paren, there is a huge chance
   // it is an archive member specification.
   if (Filename.endswith(")"))
-    if (auto ErrOrArchiveMember = MapArchiveAndGetMemberBuffer(Filename))
-      return *ErrOrArchiveMember;
+    if (auto ErrOrArchiveMembers =
+            MapArchiveAndGetMemberBuffers(Filename, Timestamp))
+      return *ErrOrArchiveMembers;
 
   // Otherwise, just try opening a standard file. If this is an
   // archive member specifiaction and any of the above didn't handle it
@@ -42,39 +76,70 @@ BinaryHolder::GetMemoryBufferForFile(StringRef Filename) {
   if (auto Err = ErrOrFile.getError())
     return Err;
 
+  changeBackingMemoryBuffer(std::move(*ErrOrFile));
   if (Verbose)
     outs() << "\tloaded file.\n";
-  CurrentArchive.reset();
-  CurrentMemoryBuffer = std::move(ErrOrFile.get());
-  return CurrentMemoryBuffer->getMemBufferRef();
+
+  auto ErrOrFat = object::MachOUniversalBinary::create(
+      CurrentMemoryBuffer->getMemBufferRef());
+  if (ErrOrFat.getError()) {
+    // Not a fat binary must be a standard one. Return a one element vector.
+    return std::vector<MemoryBufferRef>{CurrentMemoryBuffer->getMemBufferRef()};
+  }
+
+  CurrentFatBinary = std::move(*ErrOrFat);
+  return getMachOFatMemoryBuffers(Filename, *CurrentMemoryBuffer,
+                                  *CurrentFatBinary);
 }
 
-ErrorOr<MemoryBufferRef>
-BinaryHolder::GetArchiveMemberBuffer(StringRef Filename) {
-  if (!CurrentArchive)
+ErrorOr<std::vector<MemoryBufferRef>>
+BinaryHolder::GetArchiveMemberBuffers(StringRef Filename,
+                                      sys::TimeValue Timestamp) {
+  if (CurrentArchives.empty())
     return make_error_code(errc::no_such_file_or_directory);
 
-  StringRef CurArchiveName = CurrentArchive->getFileName();
+  StringRef CurArchiveName = CurrentArchives.front()->getFileName();
   if (!Filename.startswith(Twine(CurArchiveName, "(").str()))
     return make_error_code(errc::no_such_file_or_directory);
 
   // Remove the archive name and the parens around the archive member name.
   Filename = Filename.substr(CurArchiveName.size() + 1).drop_back();
 
-  for (const auto &Child : CurrentArchive->children()) {
-    if (auto NameOrErr = Child.getName())
-      if (*NameOrErr == Filename) {
-        if (Verbose)
-          outs() << "\tfound member in current archive.\n";
-        return Child.getMemoryBufferRef();
+  std::vector<MemoryBufferRef> Buffers;
+  Buffers.reserve(CurrentArchives.size());
+
+  for (const auto &CurrentArchive : CurrentArchives) {
+    for (auto ChildOrErr : CurrentArchive->children()) {
+      if (std::error_code Err = ChildOrErr.getError())
+        return Err;
+      const auto &Child = *ChildOrErr;
+      if (auto NameOrErr = Child.getName()) {
+        if (*NameOrErr == Filename) {
+          if (Timestamp != sys::TimeValue::PosixZeroTime() &&
+              Timestamp != Child.getLastModified()) {
+            if (Verbose)
+              outs() << "\tmember had timestamp mismatch.\n";
+            continue;
+          }
+          if (Verbose)
+            outs() << "\tfound member in current archive.\n";
+          auto ErrOrMem = Child.getMemoryBufferRef();
+          if (auto Err = ErrOrMem.getError())
+            return Err;
+          Buffers.push_back(*ErrOrMem);
+        }
       }
+    }
   }
 
-  return make_error_code(errc::no_such_file_or_directory);
+  if (Buffers.empty())
+    return make_error_code(errc::no_such_file_or_directory);
+  return Buffers;
 }
 
-ErrorOr<MemoryBufferRef>
-BinaryHolder::MapArchiveAndGetMemberBuffer(StringRef Filename) {
+ErrorOr<std::vector<MemoryBufferRef>>
+BinaryHolder::MapArchiveAndGetMemberBuffers(StringRef Filename,
+                                            sys::TimeValue Timestamp) {
   StringRef ArchiveFilename = Filename.substr(0, Filename.find('('));
 
   auto ErrOrBuff = MemoryBuffer::getFileOrSTDIN(ArchiveFilename);
@@ -83,29 +148,62 @@ BinaryHolder::MapArchiveAndGetMemberBuffer(StringRef Filename) {
 
   if (Verbose)
     outs() << "\topened new archive '" << ArchiveFilename << "'\n";
-  auto ErrOrArchive = object::Archive::create((*ErrOrBuff)->getMemBufferRef());
-  if (auto Err = ErrOrArchive.getError())
-    return Err;
 
-  CurrentArchive = std::move(*ErrOrArchive);
-  CurrentMemoryBuffer = std::move(*ErrOrBuff);
+  changeBackingMemoryBuffer(std::move(*ErrOrBuff));
+  std::vector<MemoryBufferRef> ArchiveBuffers;
+  auto ErrOrFat = object::MachOUniversalBinary::create(
+      CurrentMemoryBuffer->getMemBufferRef());
+  if (ErrOrFat.getError()) {
+    // Not a fat binary must be a standard one.
+    ArchiveBuffers.push_back(CurrentMemoryBuffer->getMemBufferRef());
+  } else {
+    CurrentFatBinary = std::move(*ErrOrFat);
+    ArchiveBuffers = getMachOFatMemoryBuffers(
+        ArchiveFilename, *CurrentMemoryBuffer, *CurrentFatBinary);
+  }
 
-  return GetArchiveMemberBuffer(Filename);
+  for (auto MemRef : ArchiveBuffers) {
+    auto ErrOrArchive = object::Archive::create(MemRef);
+    if (auto Err = ErrOrArchive.getError())
+      return Err;
+    CurrentArchives.push_back(std::move(*ErrOrArchive));
+  }
+  return GetArchiveMemberBuffers(Filename, Timestamp);
 }
 
 ErrorOr<const object::ObjectFile &>
-BinaryHolder::GetObjectFile(StringRef Filename) {
-  auto ErrOrMemBufferRef = GetMemoryBufferForFile(Filename);
-  if (auto Err = ErrOrMemBufferRef.getError())
+BinaryHolder::getObjfileForArch(const Triple &T) {
+  for (const auto &Obj : CurrentObjectFiles) {
+    if (const auto *MachO = dyn_cast<object::MachOObjectFile>(Obj.get())) {
+      if (getTriple(*MachO).str() == T.str())
+        return *MachO;
+    } else if (Obj->getArch() == T.getArch())
+      return *Obj;
+  }
+
+  return make_error_code(object::object_error::arch_not_found);
+}
+
+ErrorOr<std::vector<const object::ObjectFile *>>
+BinaryHolder::GetObjectFiles(StringRef Filename, sys::TimeValue Timestamp) {
+  auto ErrOrMemBufferRefs = GetMemoryBuffersForFile(Filename, Timestamp);
+  if (auto Err = ErrOrMemBufferRefs.getError())
     return Err;
 
-  auto ErrOrObjectFile =
-      object::ObjectFile::createObjectFile(*ErrOrMemBufferRef);
-  if (auto Err = ErrOrObjectFile.getError())
-    return Err;
+  std::vector<const object::ObjectFile *> Objects;
+  Objects.reserve(ErrOrMemBufferRefs->size());
 
-  CurrentObjectFile = std::move(*ErrOrObjectFile);
-  return *CurrentObjectFile;
+  CurrentObjectFiles.clear();
+  for (auto MemBuf : *ErrOrMemBufferRefs) {
+    auto ErrOrObjectFile = object::ObjectFile::createObjectFile(MemBuf);
+    if (auto Err = ErrOrObjectFile.getError())
+      return Err;
+
+    Objects.push_back(ErrOrObjectFile->get());
+    CurrentObjectFiles.push_back(std::move(*ErrOrObjectFile));
+  }
+
+  return std::move(Objects);
 }
 }
 }
diff --git a/tools/dsymutil/BinaryHolder.h b/tools/dsymutil/BinaryHolder.h
index 04871b5d5855..9d7b4bd8787f 100644
--- a/tools/dsymutil/BinaryHolder.h
+++ b/tools/dsymutil/BinaryHolder.h
@@ -14,11 +14,14 @@
 #ifndef LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H
 #define LLVM_TOOLS_DSYMUTIL_BINARYHOLDER_H
 
+#include "llvm/ADT/Triple.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/Error.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/TimeValue.h"
 
 namespace llvm {
 namespace dsymutil {
@@ -35,69 +38,98 @@ namespace dsymutil {
 /// meaning that a mapping request will invalidate the previous memory
 /// mapping.
 class BinaryHolder {
-  std::unique_ptr<object::Archive> CurrentArchive;
+  std::vector<std::unique_ptr<object::Archive>> CurrentArchives;
   std::unique_ptr<MemoryBuffer> CurrentMemoryBuffer;
-  std::unique_ptr<object::ObjectFile> CurrentObjectFile;
+  std::vector<std::unique_ptr<object::ObjectFile>> CurrentObjectFiles;
+  std::unique_ptr<object::MachOUniversalBinary> CurrentFatBinary;
   bool Verbose;
 
-  /// \brief Get the MemoryBufferRef for the file specification in \p
-  /// Filename from the current archive.
+  /// Get the MemoryBufferRefs for the file specification in \p
+  /// Filename from the current archive. Multiple buffers are returned
+  /// when there are multiple architectures available for the
+  /// requested file.
   ///
   /// This function performs no system calls, it just looks up a
   /// potential match for the given \p Filename in the currently
   /// mapped archive if there is one.
-  ErrorOr<MemoryBufferRef> GetArchiveMemberBuffer(StringRef Filename);
+  ErrorOr<std::vector<MemoryBufferRef>>
+  GetArchiveMemberBuffers(StringRef Filename, sys::TimeValue Timestamp);
 
-  /// \brief Interpret Filename as an archive member specification,
-  /// map the corresponding archive to memory and return the
-  /// MemoryBufferRef corresponding to the described member.
-  ErrorOr<MemoryBufferRef> MapArchiveAndGetMemberBuffer(StringRef Filename);
+  /// Interpret Filename as an archive member specification map the
+  /// corresponding archive to memory and return the MemoryBufferRefs
+  /// corresponding to the described member. Multiple buffers are
+  /// returned when there are multiple architectures available for the
+  /// requested file.
+  ErrorOr<std::vector<MemoryBufferRef>>
+  MapArchiveAndGetMemberBuffers(StringRef Filename, sys::TimeValue Timestamp);
 
-  /// \brief Return the MemoryBufferRef that holds the memory
-  /// mapping for the given \p Filename. This function will try to
-  /// parse archive member specifications of the form
-  /// /path/to/archive.a(member.o).
+  /// Return the MemoryBufferRef that holds the memory mapping for the
+  /// given \p Filename. This function will try to parse archive
+  /// member specifications of the form /path/to/archive.a(member.o).
   ///
-  /// The returned MemoryBufferRef points to a buffer owned by this
+  /// The returned MemoryBufferRefs points to a buffer owned by this
   /// object. The buffer is valid until the next call to
   /// GetMemoryBufferForFile() on this object.
-  ErrorOr<MemoryBufferRef> GetMemoryBufferForFile(StringRef Filename);
+  /// Multiple buffers are returned when there are multiple
+  /// architectures available for the requested file.
+  ErrorOr<std::vector<MemoryBufferRef>>
+  GetMemoryBuffersForFile(StringRef Filename, sys::TimeValue Timestamp);
+
+  void changeBackingMemoryBuffer(std::unique_ptr<MemoryBuffer> &&MemBuf);
+  ErrorOr<const object::ObjectFile &> getObjfileForArch(const Triple &T);
 
 public:
   BinaryHolder(bool Verbose) : Verbose(Verbose) {}
 
-  /// \brief Get the ObjectFile designated by the \p Filename. This
+  /// Get the ObjectFiles designated by the \p Filename. This
   /// might be an archive member specification of the form
   /// /path/to/archive.a(member.o).
   ///
   /// Calling this function invalidates the previous mapping owned by
-  /// the BinaryHolder.
-  ErrorOr<const object::ObjectFile &> GetObjectFile(StringRef Filename);
+  /// the BinaryHolder. Multiple buffers are returned when there are
+  /// multiple architectures available for the requested file.
+  ErrorOr<std::vector<const object::ObjectFile *>>
+  GetObjectFiles(StringRef Filename,
+                 sys::TimeValue Timestamp = sys::TimeValue::PosixZeroTime());
 
-  /// \brief Wraps GetObjectFile() to return a derived ObjectFile type.
+  /// Wraps GetObjectFiles() to return a derived ObjectFile type.
   template <typename ObjectFileType>
-  ErrorOr<const ObjectFileType &> GetFileAs(StringRef Filename) {
-    auto ErrOrObjFile = GetObjectFile(Filename);
+  ErrorOr<std::vector<const ObjectFileType *>>
+  GetFilesAs(StringRef Filename,
+             sys::TimeValue Timestamp = sys::TimeValue::PosixZeroTime()) {
+    auto ErrOrObjFile = GetObjectFiles(Filename, Timestamp);
     if (auto Err = ErrOrObjFile.getError())
       return Err;
-    if (const auto *Derived = dyn_cast<ObjectFileType>(CurrentObjectFile.get()))
-      return *Derived;
-    return make_error_code(object::object_error::invalid_file_type);
+
+    std::vector<const ObjectFileType *> Objects;
+    Objects.reserve((*ErrOrObjFile).size());
+    for (const auto &Obj : *ErrOrObjFile) {
+      const auto *Derived = dyn_cast<ObjectFileType>(Obj);
+      if (!Derived)
+        return make_error_code(object::object_error::invalid_file_type);
+      Objects.push_back(Derived);
+    }
+    return std::move(Objects);
   }
 
-  /// \brief Access the currently owned ObjectFile. As successfull
-  /// call to GetObjectFile() or GetFileAs() must have been performed
-  /// before calling this.
-  const object::ObjectFile &Get() {
-    assert(CurrentObjectFile);
-    return *CurrentObjectFile;
+  /// Access the currently owned ObjectFile with architecture \p T. As
+  /// successfull call to GetObjectFiles() or GetFilesAs() must have
+  /// been performed before calling this.
+  ErrorOr<const object::ObjectFile &> Get(const Triple &T) {
+    return getObjfileForArch(T);
   }
 
-  /// \brief Access to a derived version of the currently owned
+  /// Access to a derived version of the currently owned
   /// ObjectFile. The conversion must be known to be valid.
-  template <typename ObjectFileType> const ObjectFileType &GetAs() {
-    return cast<ObjectFileType>(*CurrentObjectFile);
+  template <typename ObjectFileType>
+  ErrorOr<const ObjectFileType &> GetAs(const Triple &T) {
+    auto ErrOrObj = Get(T);
+    if (auto Err = ErrOrObj.getError())
+      return Err;
+    return cast<ObjectFileType>(*ErrOrObj);
   }
+
+  static Triple getTriple(const object::MachOObjectFile &Obj);
 };
 }
 }
diff --git a/tools/dsymutil/CMakeLists.txt b/tools/dsymutil/CMakeLists.txt
index 88f9f1f083db..0f45d20de22c 100644
--- a/tools/dsymutil/CMakeLists.txt
+++ b/tools/dsymutil/CMakeLists.txt
@@ -14,5 +14,6 @@ add_llvm_tool(llvm-dsymutil
   DebugMap.cpp
   DwarfLinker.cpp
   MachODebugMapParser.cpp
+  MachOUtils.cpp
   )
 
diff --git a/tools/dsymutil/DebugMap.cpp b/tools/dsymutil/DebugMap.cpp
index e5cc87b3f318..4717085f4322 100644
--- a/tools/dsymutil/DebugMap.cpp
+++ b/tools/dsymutil/DebugMap.cpp
@@ -20,8 +20,9 @@ namespace dsymutil {
 
 using namespace llvm::object;
 
-DebugMapObject::DebugMapObject(StringRef ObjectFilename)
-    : Filename(ObjectFilename) {}
+DebugMapObject::DebugMapObject(StringRef ObjectFilename,
+                               sys::TimeValue Timestamp)
+    : Filename(ObjectFilename), Timestamp(Timestamp) {}
 
 bool DebugMapObject::addSymbol(StringRef Name, uint64_t ObjectAddress,
                                uint64_t LinkedAddress, uint32_t Size) {
@@ -58,8 +59,9 @@ void DebugMapObject::print(raw_ostream &OS) const {
 void DebugMapObject::dump() const { print(errs()); }
 #endif
 
-DebugMapObject &DebugMap::addDebugMapObject(StringRef ObjectFilePath) {
-  Objects.emplace_back(new DebugMapObject(ObjectFilePath));
+DebugMapObject &DebugMap::addDebugMapObject(StringRef ObjectFilePath,
+                                            sys::TimeValue Timestamp) {
+  Objects.emplace_back(new DebugMapObject(ObjectFilePath, Timestamp));
   return *Objects.back();
 }
 
@@ -95,7 +97,7 @@ struct YAMLContext {
 };
 }
 
-ErrorOr<std::unique_ptr<DebugMap>>
+ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
 DebugMap::parseYAMLDebugMap(StringRef InputFile, StringRef PrependPath,
                             bool Verbose) {
   auto ErrOrFile = MemoryBuffer::getFileOrSTDIN(InputFile);
@@ -112,8 +114,9 @@ DebugMap::parseYAMLDebugMap(StringRef InputFile, StringRef PrependPath,
 
   if (auto EC = yin.error())
     return EC;
-
-  return std::move(Res);
+  std::vector<std::unique_ptr<DebugMap>> Result;
+  Result.push_back(std::move(Res));
+  return std::move(Result);
 }
 }
 
@@ -121,11 +124,12 @@ namespace yaml {
 
 // Normalize/Denormalize between YAML and a DebugMapObject.
 struct MappingTraits<dsymutil::DebugMapObject>::YamlDMO {
-  YamlDMO(IO &io) {}
+  YamlDMO(IO &io) { Timestamp = 0; }
   YamlDMO(IO &io, dsymutil::DebugMapObject &Obj);
   dsymutil::DebugMapObject denormalize(IO &IO);
 
   std::string Filename;
+  sys::TimeValue::SecondsType Timestamp;
   std::vector<dsymutil::DebugMapObject::YAMLSymbolMapping> Entries;
 };
 
@@ -141,6 +145,7 @@ void MappingTraits<dsymutil::DebugMapObject>::mapping(
     IO &io, dsymutil::DebugMapObject &DMO) {
   MappingNormalization<YamlDMO, dsymutil::DebugMapObject> Norm(io, DMO);
   io.mapRequired("filename", Norm->Filename);
+  io.mapOptional("timestamp", Norm->Timestamp);
   io.mapRequired("symbols", Norm->Entries);
 }
 
@@ -174,9 +179,10 @@ SequenceTraits<std::vector<std::unique_ptr<dsymutil::DebugMapObject>>>::element(
 void MappingTraits<dsymutil::DebugMap>::mapping(IO &io,
                                                 dsymutil::DebugMap &DM) {
   io.mapRequired("triple", DM.BinaryTriple);
-  io.mapOptional("objects", DM.Objects);
+  io.mapOptional("binary-path", DM.BinaryPath);
   if (void *Ctxt = io.getContext())
     reinterpret_cast<YAMLContext *>(Ctxt)->BinaryTriple = DM.BinaryTriple;
+  io.mapOptional("objects", DM.Objects);
 }
 
 void MappingTraits<std::unique_ptr<dsymutil::DebugMap>>::mapping(
@@ -184,14 +190,16 @@ void MappingTraits<std::unique_ptr<dsymutil::DebugMap>>::mapping(
   if (!DM)
     DM.reset(new DebugMap());
   io.mapRequired("triple", DM->BinaryTriple);
-  io.mapOptional("objects", DM->Objects);
+  io.mapOptional("binary-path", DM->BinaryPath);
   if (void *Ctxt = io.getContext())
     reinterpret_cast<YAMLContext *>(Ctxt)->BinaryTriple = DM->BinaryTriple;
+  io.mapOptional("objects", DM->Objects);
 }
 
 MappingTraits<dsymutil::DebugMapObject>::YamlDMO::YamlDMO(
     IO &io, dsymutil::DebugMapObject &Obj) {
   Filename = Obj.Filename;
+  Timestamp = Obj.getTimestamp().toEpochTime();
   Entries.reserve(Obj.Symbols.size());
   for (auto &Entry : Obj.Symbols)
     Entries.push_back(std::make_pair(Entry.getKey(), Entry.getValue()));
@@ -205,11 +213,11 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
   StringMap<uint64_t> SymbolAddresses;
 
   sys::path::append(Path, Filename);
-  auto ErrOrObjectFile = BinHolder.GetObjectFile(Path);
-  if (auto EC = ErrOrObjectFile.getError()) {
+  auto ErrOrObjectFiles = BinHolder.GetObjectFiles(Path);
+  if (auto EC = ErrOrObjectFiles.getError()) {
     llvm::errs() << "warning: Unable to open " << Path << " " << EC.message()
                  << '\n';
-  } else {
+  } else if (auto ErrOrObjectFile = BinHolder.Get(Ctxt.BinaryTriple)) {
     // Rewrite the object file symbol addresses in the debug map. The
     // YAML input is mainly used to test llvm-dsymutil without
     // requiring binaries checked-in. If we generate the object files
@@ -224,7 +232,9 @@ MappingTraits<dsymutil::DebugMapObject>::YamlDMO::denormalize(IO &IO) {
     }
   }
 
-  dsymutil::DebugMapObject Res(Path);
+  sys::TimeValue TV;
+  TV.fromEpochTime(Timestamp);
+  dsymutil::DebugMapObject Res(Path, TV);
   for (auto &Entry : Entries) {
     auto &Mapping = Entry.second;
     uint64_t ObjAddress = Mapping.ObjectAddress;
diff --git a/tools/dsymutil/DebugMap.h b/tools/dsymutil/DebugMap.h
index d0edbabb404b..06ac5a503dcd 100644
--- a/tools/dsymutil/DebugMap.h
+++ b/tools/dsymutil/DebugMap.h
@@ -29,6 +29,7 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TimeValue.h"
 #include "llvm/Support/YAMLTraits.h"
 #include <vector>
 
@@ -65,6 +66,7 @@ class DebugMapObject;
 /// }
 class DebugMap {
   Triple BinaryTriple;
+  std::string BinaryPath;
   typedef std::vector<std::unique_ptr<DebugMapObject>> ObjectContainer;
   ObjectContainer Objects;
 
@@ -75,7 +77,8 @@ class DebugMap {
   DebugMap() = default;
   ///@}
 public:
-  DebugMap(const Triple &BinaryTriple) : BinaryTriple(BinaryTriple) {}
+  DebugMap(const Triple &BinaryTriple, StringRef BinaryPath)
+      : BinaryTriple(BinaryTriple), BinaryPath(BinaryPath) {}
 
   typedef ObjectContainer::const_iterator const_iterator;
 
@@ -89,10 +92,13 @@ public:
 
   /// This function adds an DebugMapObject to the list owned by this
   /// debug map.
-  DebugMapObject &addDebugMapObject(StringRef ObjectFilePath);
+  DebugMapObject &addDebugMapObject(StringRef ObjectFilePath,
+                                    sys::TimeValue Timestamp);
 
   const Triple &getTriple() const { return BinaryTriple; }
 
+  StringRef getBinaryPath() const { return BinaryPath; }
+
   void print(raw_ostream &OS) const;
 
 #ifndef NDEBUG
@@ -100,7 +106,7 @@ public:
 #endif
 
   /// Read a debug map for \a InputFile.
-  static ErrorOr<std::unique_ptr<DebugMap>>
+  static ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
   parseYAMLDebugMap(StringRef InputFile, StringRef PrependPath, bool Verbose);
 };
 
@@ -139,6 +145,8 @@ public:
 
   llvm::StringRef getObjectFilename() const { return Filename; }
 
+  sys::TimeValue getTimestamp() const { return Timestamp; }
+
   iterator_range<StringMap<SymbolMapping>::const_iterator> symbols() const {
     return make_range(Symbols.begin(), Symbols.end());
   }
@@ -150,9 +158,10 @@ public:
 private:
   friend class DebugMap;
   /// DebugMapObjects can only be constructed by the owning DebugMap.
-  DebugMapObject(StringRef ObjectFilename);
+  DebugMapObject(StringRef ObjectFilename, sys::TimeValue Timestamp);
 
   std::string Filename;
+  sys::TimeValue Timestamp;
   StringMap<SymbolMapping> Symbols;
   DenseMap<uint64_t, DebugMapEntry *> AddressToMapping;
 
@@ -167,12 +176,14 @@ private:
 public:
   DebugMapObject &operator=(DebugMapObject RHS) {
     std::swap(Filename, RHS.Filename);
+    std::swap(Timestamp, RHS.Timestamp);
     std::swap(Symbols, RHS.Symbols);
     std::swap(AddressToMapping, RHS.AddressToMapping);
     return *this;
   }
   DebugMapObject(DebugMapObject &&RHS) {
     Filename = std::move(RHS.Filename);
+    Timestamp = std::move(RHS.Timestamp);
     Symbols = std::move(RHS.Symbols);
     AddressToMapping = std::move(RHS.AddressToMapping);
   }
diff --git a/tools/dsymutil/DwarfLinker.cpp b/tools/dsymutil/DwarfLinker.cpp
index 6e9087c70eca..8c5f7946a425 100644
--- a/tools/dsymutil/DwarfLinker.cpp
+++ b/tools/dsymutil/DwarfLinker.cpp
@@ -10,11 +10,14 @@
 #include "BinaryHolder.h"
 #include "DebugMap.h"
 #include "dsymutil.h"
+#include "MachOUtils.h"
+#include "NonRelocatableStringpool.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
+#include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
@@ -28,6 +31,7 @@
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/LEB128.h"
@@ -42,17 +46,6 @@ namespace dsymutil {
 
 namespace {
 
-void warn(const Twine &Warning, const Twine &Context) {
-  errs() << Twine("while processing ") + Context + ":\n";
-  errs() << Twine("warning: ") + Warning + "\n";
-}
-
-bool error(const Twine &Error, const Twine &Context) {
-  errs() << Twine("while processing ") + Context + ":\n";
-  errs() << Twine("error: ") + Error + "\n";
-  return false;
-}
-
 template <typename KeyT, typename ValT>
 using HalfOpenIntervalMap =
     IntervalMap<KeyT, ValT, IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
@@ -80,6 +73,112 @@ struct PatchLocation {
   }
 };
 
+class CompileUnit;
+struct DeclMapInfo;
+
+/// A DeclContext is a named program scope that is used for ODR
+/// uniquing of types.
+/// The set of DeclContext for the ODR-subject parts of a Dwarf link
+/// is expanded (and uniqued) with each new object file processed. We
+/// need to determine the context of each DIE in an linked object file
+/// to see if the corresponding type has already been emitted.
+///
+/// The contexts are conceptually organised as a tree (eg. a function
+/// scope is contained in a namespace scope that contains other
+/// scopes), but storing/accessing them in an actual tree is too
+/// inefficient: we need to be able to very quickly query a context
+/// for a given child context by name. Storing a StringMap in each
+/// DeclContext would be too space inefficient.
+/// The solution here is to give each DeclContext a link to its parent
+/// (this allows to walk up the tree), but to query the existance of a
+/// specific DeclContext using a separate DenseMap keyed on the hash
+/// of the fully qualified name of the context.
+class DeclContext {
+  unsigned QualifiedNameHash;
+  uint32_t Line;
+  uint32_t ByteSize;
+  uint16_t Tag;
+  StringRef Name;
+  StringRef File;
+  const DeclContext &Parent;
+  const DWARFDebugInfoEntryMinimal *LastSeenDIE;
+  uint32_t LastSeenCompileUnitID;
+  uint32_t CanonicalDIEOffset;
+
+  friend DeclMapInfo;
+
+public:
+  typedef DenseSet<DeclContext *, DeclMapInfo> Map;
+
+  DeclContext()
+      : QualifiedNameHash(0), Line(0), ByteSize(0),
+        Tag(dwarf::DW_TAG_compile_unit), Name(), File(), Parent(*this),
+        LastSeenDIE(nullptr), LastSeenCompileUnitID(0), CanonicalDIEOffset(0) {}
+
+  DeclContext(unsigned Hash, uint32_t Line, uint32_t ByteSize, uint16_t Tag,
+              StringRef Name, StringRef File, const DeclContext &Parent,
+              const DWARFDebugInfoEntryMinimal *LastSeenDIE = nullptr,
+              unsigned CUId = 0)
+      : QualifiedNameHash(Hash), Line(Line), ByteSize(ByteSize), Tag(Tag),
+        Name(Name), File(File), Parent(Parent), LastSeenDIE(LastSeenDIE),
+        LastSeenCompileUnitID(CUId), CanonicalDIEOffset(0) {}
+
+  uint32_t getQualifiedNameHash() const { return QualifiedNameHash; }
+
+  bool setLastSeenDIE(CompileUnit &U, const DWARFDebugInfoEntryMinimal *Die);
+
+  uint32_t getCanonicalDIEOffset() const { return CanonicalDIEOffset; }
+  void setCanonicalDIEOffset(uint32_t Offset) { CanonicalDIEOffset = Offset; }
+
+  uint16_t getTag() const { return Tag; }
+  StringRef getName() const { return Name; }
+};
+
+/// Info type for the DenseMap storing the DeclContext pointers.
+struct DeclMapInfo : private DenseMapInfo<DeclContext *> {
+  using DenseMapInfo<DeclContext *>::getEmptyKey;
+  using DenseMapInfo<DeclContext *>::getTombstoneKey;
+
+  static unsigned getHashValue(const DeclContext *Ctxt) {
+    return Ctxt->QualifiedNameHash;
+  }
+
+  static bool isEqual(const DeclContext *LHS, const DeclContext *RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey())
+      return RHS == LHS;
+    return LHS->QualifiedNameHash == RHS->QualifiedNameHash &&
+           LHS->Line == RHS->Line && LHS->ByteSize == RHS->ByteSize &&
+           LHS->Name.data() == RHS->Name.data() &&
+           LHS->File.data() == RHS->File.data() &&
+           LHS->Parent.QualifiedNameHash == RHS->Parent.QualifiedNameHash;
+  }
+};
+
+/// This class gives a tree-like API to the DenseMap that stores the
+/// DeclContext objects. It also holds the BumpPtrAllocator where
+/// these objects will be allocated.
+class DeclContextTree {
+  BumpPtrAllocator Allocator;
+  DeclContext Root;
+  DeclContext::Map Contexts;
+
+public:
+  /// Get the child of \a Context described by \a DIE in \a Unit. The
+  /// required strings will be interned in \a StringPool.
+  /// \returns The child DeclContext along with one bit that is set if
+  /// this context is invalid.
+  /// An invalid context means it shouldn't be considered for uniquing, but its
+  /// not returning null, because some children of that context might be
+  /// uniquing candidates.  FIXME: The invalid bit along the return value is to
+  /// emulate some dsymutil-classic functionality.
+  PointerIntPair<DeclContext *, 1>
+  getChildDeclContext(DeclContext &Context,
+                      const DWARFDebugInfoEntryMinimal *DIE, CompileUnit &Unit,
+                      NonRelocatableStringpool &StringPool, bool InClangModule);
+
+  DeclContext &getRoot() { return Root; }
+};
+
 /// \brief Stores all information relating to a compile unit, be it in
 /// its original instance in the object file to its brand new cloned
 /// and linked DIE tree.
@@ -88,16 +187,28 @@ public:
   /// \brief Information gathered about a DIE in the object file.
   struct DIEInfo {
     int64_t AddrAdjust; ///< Address offset to apply to the described entity.
+    DeclContext *Ctxt;  ///< ODR Declaration context.
     DIE *Clone;         ///< Cloned version of that DIE.
     uint32_t ParentIdx; ///< The index of this DIE's parent.
-    bool Keep;          ///< Is the DIE part of the linked output?
-    bool InDebugMap;    ///< Was this DIE's entity found in the map?
+    bool Keep : 1;      ///< Is the DIE part of the linked output?
+    bool InDebugMap : 1;///< Was this DIE's entity found in the map?
+    bool Prune : 1;     ///< Is this a pure forward declaration we can strip?
   };
 
-  CompileUnit(DWARFUnit &OrigUnit, unsigned ID)
+  CompileUnit(DWARFUnit &OrigUnit, unsigned ID, bool CanUseODR,
+              StringRef ClangModuleName)
       : OrigUnit(OrigUnit), ID(ID), LowPc(UINT64_MAX), HighPc(0), RangeAlloc(),
-        Ranges(RangeAlloc) {
+        Ranges(RangeAlloc), ClangModuleName(ClangModuleName) {
     Info.resize(OrigUnit.getNumDIEs());
+
+    const auto *CUDie = OrigUnit.getUnitDIE(false);
+    unsigned Lang = CUDie->getAttributeValueAsUnsignedConstant(
+        &OrigUnit, dwarf::DW_AT_language, 0);
+    HasODR = CanUseODR && (Lang == dwarf::DW_LANG_C_plus_plus ||
+                           Lang == dwarf::DW_LANG_C_plus_plus_03 ||
+                           Lang == dwarf::DW_LANG_C_plus_plus_11 ||
+                           Lang == dwarf::DW_LANG_C_plus_plus_14 ||
+                           Lang == dwarf::DW_LANG_ObjC_plus_plus);
   }
 
   CompileUnit(CompileUnit &&RHS)
@@ -116,6 +227,10 @@ public:
   DIE *getOutputUnitDIE() const { return CUDie; }
   void setOutputUnitDIE(DIE *Die) { CUDie = Die; }
 
+  bool hasODR() const { return HasODR; }
+  bool isClangModule() const { return !ClangModuleName.empty(); }
+  const std::string &getClangModuleName() const { return ClangModuleName; }
+
   DIEInfo &getInfo(unsigned Idx) { return Info[Idx]; }
   const DIEInfo &getInfo(unsigned Idx) const { return Info[Idx]; }
 
@@ -139,6 +254,14 @@ public:
     return LocationAttributes;
   }
 
+  void setHasInterestingContent() { HasInterestingContent = true; }
+  bool hasInterestingContent() { return HasInterestingContent; }
+
+  /// Mark every DIE in this unit as kept. This function also
+  /// marks variables as InDebugMap so that they appear in the
+  /// reconstructed accelerator tables.
+  void markEverythingAsKept();
+
   /// \brief Compute the end offset for this unit. Must be
   /// called after the CU's DIEs have been cloned.
   /// \returns the next unit offset (which is also the current
@@ -147,9 +270,10 @@ public:
 
   /// \brief Keep track of a forward reference to DIE \p Die in \p
   /// RefUnit by \p Attr. The attribute should be fixed up later to
-  /// point to the absolute offset of \p Die in the debug_info section.
+  /// point to the absolute offset of \p Die in the debug_info section
+  /// or to the canonical offset of \p Ctxt if it is non-null.
   void noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
-                            PatchLocation Attr);
+                            DeclContext *Ctxt, PatchLocation Attr);
 
   /// \brief Apply all fixups recored by noteForwardReference().
   void fixupForwardReferences();
@@ -190,11 +314,27 @@ public:
   const std::vector<AccelInfo> &getPubnames() const { return Pubnames; }
   const std::vector<AccelInfo> &getPubtypes() const { return Pubtypes; }
 
+  /// Get the full path for file \a FileNum in the line table
+  const char *getResolvedPath(unsigned FileNum) {
+    if (FileNum >= ResolvedPaths.size())
+      return nullptr;
+    return ResolvedPaths[FileNum].size() ? ResolvedPaths[FileNum].c_str()
+                                         : nullptr;
+  }
+
+  /// Set the fully resolved path for the line-table's file \a FileNum
+  /// to \a Path.
+  void setResolvedPath(unsigned FileNum, const std::string &Path) {
+    if (ResolvedPaths.size() <= FileNum)
+      ResolvedPaths.resize(FileNum + 1);
+    ResolvedPaths[FileNum] = Path;
+  }
+
 private:
   DWARFUnit &OrigUnit;
   unsigned ID;
-  std::vector<DIEInfo> Info;  ///< DIE info indexed by DIE index.
-  DIE *CUDie;                 ///< Root of the linked DIE tree.
+  std::vector<DIEInfo> Info; ///< DIE info indexed by DIE index.
+  DIE *CUDie;                ///< Root of the linked DIE tree.
 
   uint64_t StartOffset;
   uint64_t NextUnitOffset;
@@ -208,8 +348,8 @@ private:
   /// The offsets for the attributes in this array couldn't be set while
   /// cloning because for cross-cu forward refences the target DIE's
   /// offset isn't known you emit the reference attribute.
-  std::vector<std::tuple<DIE *, const CompileUnit *, PatchLocation>>
-      ForwardDIEReferences;
+  std::vector<std::tuple<DIE *, const CompileUnit *, DeclContext *,
+                         PatchLocation>> ForwardDIEReferences;
 
   FunctionIntervals::Allocator RangeAlloc;
   /// \brief The ranges in that interval map are the PC ranges for
@@ -236,8 +376,24 @@ private:
   std::vector<AccelInfo> Pubnames;
   std::vector<AccelInfo> Pubtypes;
   /// @}
+
+  /// Cached resolved paths from the line table.
+  std::vector<std::string> ResolvedPaths;
+
+  /// Is this unit subject to the ODR rule?
+  bool HasODR;
+  /// Did a DIE actually contain a valid reloc?
+  bool HasInterestingContent;
+  /// If this is a Clang module, this holds the module's name.
+  std::string ClangModuleName;
 };
 
+void CompileUnit::markEverythingAsKept() {
+  for (auto &I : Info)
+    // Mark everything that wasn't explicity marked for pruning.
+    I.Keep = !I.Prune;
+}
+
 uint64_t CompileUnit::computeNextUnitOffset() {
   NextUnitOffset = StartOffset + 11 /* Header size */;
   // The root DIE might be null, meaning that the Unit had nothing to
@@ -251,8 +407,8 @@ uint64_t CompileUnit::computeNextUnitOffset() {
 /// \brief Keep track of a forward cross-cu reference from this unit
 /// to \p Die that lives in \p RefUnit.
 void CompileUnit::noteForwardReference(DIE *Die, const CompileUnit *RefUnit,
-                                       PatchLocation Attr) {
-  ForwardDIEReferences.emplace_back(Die, RefUnit, Attr);
+                                       DeclContext *Ctxt, PatchLocation Attr) {
+  ForwardDIEReferences.emplace_back(Die, RefUnit, Ctxt, Attr);
 }
 
 /// \brief Apply all fixups recorded by noteForwardReference().
@@ -261,8 +417,12 @@ void CompileUnit::fixupForwardReferences() {
     DIE *RefDie;
     const CompileUnit *RefUnit;
     PatchLocation Attr;
-    std::tie(RefDie, RefUnit, Attr) = Ref;
-    Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
+    DeclContext *Ctxt;
+    std::tie(RefDie, RefUnit, Ctxt, Attr) = Ref;
+    if (Ctxt && Ctxt->getCanonicalDIEOffset())
+      Attr.set(Ctxt->getCanonicalDIEOffset());
+    else
+      Attr.set(RefDie->getOffset() + RefUnit->getStartOffset());
   }
 }
 
@@ -298,91 +458,6 @@ void CompileUnit::addTypeAccelerator(const DIE *Die, const char *Name,
   Pubtypes.emplace_back(Name, Die, Offset, false);
 }
 
-/// \brief A string table that doesn't need relocations.
-///
-/// We are doing a final link, no need for a string table that
-/// has relocation entries for every reference to it. This class
-/// provides this ablitity by just associating offsets with
-/// strings.
-class NonRelocatableStringpool {
-public:
-  /// \brief Entries are stored into the StringMap and simply linked
-  /// together through the second element of this pair in order to
-  /// keep track of insertion order.
-  typedef StringMap<std::pair<uint32_t, StringMapEntryBase *>, BumpPtrAllocator>
-      MapTy;
-
-  NonRelocatableStringpool()
-      : CurrentEndOffset(0), Sentinel(0), Last(&Sentinel) {
-    // Legacy dsymutil puts an empty string at the start of the line
-    // table.
-    getStringOffset("");
-  }
-
-  /// \brief Get the offset of string \p S in the string table. This
-  /// can insert a new element or return the offset of a preexisitng
-  /// one.
-  uint32_t getStringOffset(StringRef S);
-
-  /// \brief Get permanent storage for \p S (but do not necessarily
-  /// emit \p S in the output section).
-  /// \returns The StringRef that points to permanent storage to use
-  /// in place of \p S.
-  StringRef internString(StringRef S);
-
-  // \brief Return the first entry of the string table.
-  const MapTy::MapEntryTy *getFirstEntry() const {
-    return getNextEntry(&Sentinel);
-  }
-
-  // \brief Get the entry following \p E in the string table or null
-  // if \p E was the last entry.
-  const MapTy::MapEntryTy *getNextEntry(const MapTy::MapEntryTy *E) const {
-    return static_cast<const MapTy::MapEntryTy *>(E->getValue().second);
-  }
-
-  uint64_t getSize() { return CurrentEndOffset; }
-
-private:
-  MapTy Strings;
-  uint32_t CurrentEndOffset;
-  MapTy::MapEntryTy Sentinel, *Last;
-};
-
-/// \brief Get the offset of string \p S in the string table. This
-/// can insert a new element or return the offset of a preexisitng
-/// one.
-uint32_t NonRelocatableStringpool::getStringOffset(StringRef S) {
-  if (S.empty() && !Strings.empty())
-    return 0;
-
-  std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
-  MapTy::iterator It;
-  bool Inserted;
-
-  // A non-empty string can't be at offset 0, so if we have an entry
-  // with a 0 offset, it must be a previously interned string.
-  std::tie(It, Inserted) = Strings.insert(std::make_pair(S, Entry));
-  if (Inserted || It->getValue().first == 0) {
-    // Set offset and chain at the end of the entries list.
-    It->getValue().first = CurrentEndOffset;
-    CurrentEndOffset += S.size() + 1; // +1 for the '\0'.
-    Last->getValue().second = &*It;
-    Last = &*It;
-  }
-  return It->getValue().first;
-}
-
-/// \brief Put \p S into the StringMap so that it gets permanent
-/// storage, but do not actually link it in the chain of elements
-/// that go into the output section. A latter call to
-/// getStringOffset() with the same string will chain it though.
-StringRef NonRelocatableStringpool::internString(StringRef S) {
-  std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
-  auto InsertResult = Strings.insert(std::make_pair(S, Entry));
-  return InsertResult.first->getKey();
-}
-
 /// \brief The Dwarf streaming logic
 ///
 /// All interactions with the MC layer that is used to build the debug
@@ -425,7 +500,7 @@ public:
   bool init(Triple TheTriple, StringRef OutputFilename);
 
   /// \brief Dump the file to the disk.
-  bool finish();
+  bool finish(const DebugMap &);
 
   AsmPrinter &getAsmPrinter() const { return *Asm; }
 
@@ -445,7 +520,7 @@ public:
 
   /// \brief Emit the abbreviation table \p Abbrevs to the
   /// debug_abbrev section.
-  void emitAbbrevs(const std::vector<DIEAbbrev *> &Abbrevs);
+  void emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs);
 
   /// \brief Emit the string table described by \p Pool.
   void emitStrings(const NonRelocatableStringpool &Pool);
@@ -472,7 +547,8 @@ public:
 
   /// \brief Emit the line table described in \p Rows into the
   /// debug_line section.
-  void emitLineTableForUnit(StringRef PrologueBytes, unsigned MinInstLength,
+  void emitLineTableForUnit(MCDwarfLineTableParams Params,
+                            StringRef PrologueBytes, unsigned MinInstLength,
                             std::vector<DWARFDebugLine::Row> &Rows,
                             unsigned AdddressSize);
 
@@ -543,9 +619,11 @@ bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) {
   if (EC)
     return error(Twine(OutputFilename) + ": " + EC.message(), Context);
 
-  MS = TheTarget->createMCObjectStreamer(TheTriple, *MC, *MAB, *OutFile, MCE,
-                                         *MSTI, false,
-                                         /*DWARFMustBeAtTheEnd*/ false);
+  MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+  MS = TheTarget->createMCObjectStreamer(
+      TheTriple, *MC, *MAB, *OutFile, MCE, *MSTI, MCOptions.MCRelaxAll,
+      MCOptions.MCIncrementalLinkerCompatible,
+      /*DWARFMustBeAtTheEnd*/ false);
   if (!MS)
     return error("no object streamer for target " + TripleName, Context);
 
@@ -566,7 +644,10 @@ bool DwarfStreamer::init(Triple TheTriple, StringRef OutputFilename) {
   return true;
 }
 
-bool DwarfStreamer::finish() {
+bool DwarfStreamer::finish(const DebugMap &DM) {
+  if (DM.getTriple().isOSDarwin() && !DM.getBinaryPath().empty())
+    return MachOUtils::generateDsymCompanion(DM, *MS, *OutFile);
+
   MS->Finish();
   return true;
 }
@@ -605,7 +686,8 @@ void DwarfStreamer::emitCompileUnitHeader(CompileUnit &Unit) {
 
 /// \brief Emit the \p Abbrevs array as the shared abbreviation table
 /// for the linked Dwarf file.
-void DwarfStreamer::emitAbbrevs(const std::vector<DIEAbbrev *> &Abbrevs) {
+void DwarfStreamer::emitAbbrevs(
+    const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs) {
   MS->SwitchSection(MOFI->getDwarfAbbrevSection());
   Asm->emitDwarfAbbrevs(Abbrevs);
 }
@@ -637,7 +719,7 @@ void DwarfStreamer::emitRangesEntries(
   MS->SwitchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
 
   // Offset each range by the right amount.
-  int64_t PcOffset = FuncRange.value() + UnitPcOffset;
+  int64_t PcOffset = Entries.empty() ? 0 : FuncRange.value() + UnitPcOffset;
   for (const auto &Range : Entries) {
     if (Range.isBaseAddressSelectionEntry(AddressSize)) {
       warn("unsupported base address selection operation",
@@ -796,7 +878,8 @@ void DwarfStreamer::emitLocationsForUnit(const CompileUnit &Unit,
   }
 }
 
-void DwarfStreamer::emitLineTableForUnit(StringRef PrologueBytes,
+void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
+                                         StringRef PrologueBytes,
                                          unsigned MinInstLength,
                                          std::vector<DWARFDebugLine::Row> &Rows,
                                          unsigned PointerSize) {
@@ -819,7 +902,7 @@ void DwarfStreamer::emitLineTableForUnit(StringRef PrologueBytes,
   if (Rows.empty()) {
     // We only have the dummy entry, dsymutil emits an entry with a 0
     // address in that case.
-    MCDwarfLineAddr::Encode(*MC, INT64_MAX, 0, EncodingOS);
+    MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
     MS->EmitBytes(EncodingOS.str());
     LineSectionSize += EncodingBuffer.size();
     MS->EmitLabel(LineEndSym);
@@ -901,11 +984,10 @@ void DwarfStreamer::emitLineTableForUnit(StringRef PrologueBytes,
 
     int64_t LineDelta = int64_t(Row.Line) - LastLine;
     if (!Row.EndSequence) {
-      MCDwarfLineAddr::Encode(*MC, LineDelta, AddressDelta, EncodingOS);
+      MCDwarfLineAddr::Encode(*MC, Params, LineDelta, AddressDelta, EncodingOS);
       MS->EmitBytes(EncodingOS.str());
       LineSectionSize += EncodingBuffer.size();
       EncodingBuffer.resize(0);
-      EncodingOS.resync();
       Address = Row.Address;
       LastLine = Row.Line;
       RowsSinceLastSequence++;
@@ -920,11 +1002,10 @@ void DwarfStreamer::emitLineTableForUnit(StringRef PrologueBytes,
         MS->EmitULEB128IntValue(AddressDelta);
         LineSectionSize += 1 + getULEB128Size(AddressDelta);
       }
-      MCDwarfLineAddr::Encode(*MC, INT64_MAX, 0, EncodingOS);
+      MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
       MS->EmitBytes(EncodingOS.str());
       LineSectionSize += EncodingBuffer.size();
       EncodingBuffer.resize(0);
-      EncodingOS.resync();
       Address = -1ULL;
       LastLine = FileNum = IsStatement = 1;
       RowsSinceLastSequence = Column = Isa = 0;
@@ -932,11 +1013,10 @@ void DwarfStreamer::emitLineTableForUnit(StringRef PrologueBytes,
   }
 
   if (RowsSinceLastSequence) {
-    MCDwarfLineAddr::Encode(*MC, INT64_MAX, 0, EncodingOS);
+    MCDwarfLineAddr::Encode(*MC, Params, INT64_MAX, 0, EncodingOS);
     MS->EmitBytes(EncodingOS.str());
     LineSectionSize += EncodingBuffer.size();
     EncodingBuffer.resize(0);
-    EncodingOS.resync();
   }
 
   MS->EmitLabel(LineEndSym);
@@ -1035,14 +1115,12 @@ public:
       : OutputFilename(OutputFilename), Options(Options),
         BinHolder(Options.Verbose), LastCIEOffset(0) {}
 
-  ~DwarfLinker() {
-    for (auto *Abbrev : Abbreviations)
-      delete Abbrev;
-  }
-
   /// \brief Link the contents of the DebugMap.
   bool link(const DebugMap &);
 
+  void reportWarning(const Twine &Warning, const DWARFUnit *Unit = nullptr,
+                     const DWARFDebugInfoEntryMinimal *DIE = nullptr) const;
+
 private:
   /// \brief Called at the start of a debug object link.
   void startDebugObject(DWARFContext &, DebugMapObject &);
@@ -1050,79 +1128,124 @@ private:
   /// \brief Called at the end of a debug object link.
   void endDebugObject();
 
-  /// \defgroup FindValidRelocations Translate debug map into a list
-  /// of relevant relocations
-  ///
-  /// @{
-  struct ValidReloc {
-    uint32_t Offset;
-    uint32_t Size;
-    uint64_t Addend;
-    const DebugMapObject::DebugMapEntry *Mapping;
+  /// Keeps track of relocations.
+  class RelocationManager {
+    struct ValidReloc {
+      uint32_t Offset;
+      uint32_t Size;
+      uint64_t Addend;
+      const DebugMapObject::DebugMapEntry *Mapping;
 
-    ValidReloc(uint32_t Offset, uint32_t Size, uint64_t Addend,
-               const DebugMapObject::DebugMapEntry *Mapping)
-        : Offset(Offset), Size(Size), Addend(Addend), Mapping(Mapping) {}
+      ValidReloc(uint32_t Offset, uint32_t Size, uint64_t Addend,
+                 const DebugMapObject::DebugMapEntry *Mapping)
+          : Offset(Offset), Size(Size), Addend(Addend), Mapping(Mapping) {}
 
-    bool operator<(const ValidReloc &RHS) const { return Offset < RHS.Offset; }
+      bool operator<(const ValidReloc &RHS) const {
+        return Offset < RHS.Offset;
+      }
+    };
+
+    DwarfLinker &Linker;
+
+    /// \brief The valid relocations for the current DebugMapObject.
+    /// This vector is sorted by relocation offset.
+    std::vector<ValidReloc> ValidRelocs;
+
+    /// \brief Index into ValidRelocs of the next relocation to
+    /// consider. As we walk the DIEs in acsending file offset and as
+    /// ValidRelocs is sorted by file offset, keeping this index
+    /// uptodate is all we have to do to have a cheap lookup during the
+    /// root DIE selection and during DIE cloning.
+    unsigned NextValidReloc;
+
+  public:
+    RelocationManager(DwarfLinker &Linker)
+        : Linker(Linker), NextValidReloc(0) {}
+
+    bool hasValidRelocs() const { return !ValidRelocs.empty(); }
+    /// \brief Reset the NextValidReloc counter.
+    void resetValidRelocs() { NextValidReloc = 0; }
+
+    /// \defgroup FindValidRelocations Translate debug map into a list
+    /// of relevant relocations
+    ///
+    /// @{
+    bool findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
+                                    const DebugMapObject &DMO);
+
+    bool findValidRelocs(const object::SectionRef &Section,
+                         const object::ObjectFile &Obj,
+                         const DebugMapObject &DMO);
+
+    void findValidRelocsMachO(const object::SectionRef &Section,
+                              const object::MachOObjectFile &Obj,
+                              const DebugMapObject &DMO);
+    /// @}
+
+    bool hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
+                            CompileUnit::DIEInfo &Info);
+
+    bool applyValidRelocs(MutableArrayRef<char> Data, uint32_t BaseOffset,
+                          bool isLittleEndian);
   };
 
-  /// \brief The valid relocations for the current DebugMapObject.
-  /// This vector is sorted by relocation offset.
-  std::vector<ValidReloc> ValidRelocs;
-
-  /// \brief Index into ValidRelocs of the next relocation to
-  /// consider. As we walk the DIEs in acsending file offset and as
-  /// ValidRelocs is sorted by file offset, keeping this index
-  /// uptodate is all we have to do to have a cheap lookup during the
-  /// root DIE selection and during DIE cloning.
-  unsigned NextValidReloc;
-
-  bool findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
-                                  const DebugMapObject &DMO);
-
-  bool findValidRelocs(const object::SectionRef &Section,
-                       const object::ObjectFile &Obj,
-                       const DebugMapObject &DMO);
-
-  void findValidRelocsMachO(const object::SectionRef &Section,
-                            const object::MachOObjectFile &Obj,
-                            const DebugMapObject &DMO);
-  /// @}
-
   /// \defgroup FindRootDIEs Find DIEs corresponding to debug map entries.
   ///
   /// @{
   /// \brief Recursively walk the \p DIE tree and look for DIEs to
   /// keep. Store that information in \p CU's DIEInfo.
-  void lookForDIEsToKeep(const DWARFDebugInfoEntryMinimal &DIE,
+  void lookForDIEsToKeep(RelocationManager &RelocMgr,
+                         const DWARFDebugInfoEntryMinimal &DIE,
                          const DebugMapObject &DMO, CompileUnit &CU,
                          unsigned Flags);
 
+  /// If this compile unit is really a skeleton CU that points to a
+  /// clang module, register it in ClangModules and return true.
+  ///
+  /// A skeleton CU is a CU without children, a DW_AT_gnu_dwo_name
+  /// pointing to the module, and a DW_AT_gnu_dwo_id with the module
+  /// hash.
+  bool registerModuleReference(const DWARFDebugInfoEntryMinimal &CUDie,
+                               const DWARFUnit &Unit, DebugMap &ModuleMap,
+                               unsigned Indent = 0);
+
+  /// Recursively add the debug info in this clang module .pcm
+  /// file (and all the modules imported by it in a bottom-up fashion)
+  /// to Units.
+  void loadClangModule(StringRef Filename, StringRef ModulePath,
+                       StringRef ModuleName, uint64_t DwoId,
+                       DebugMap &ModuleMap, unsigned Indent = 0);
+
   /// \brief Flags passed to DwarfLinker::lookForDIEsToKeep
   enum TravesalFlags {
     TF_Keep = 1 << 0,            ///< Mark the traversed DIEs as kept.
     TF_InFunctionScope = 1 << 1, ///< Current scope is a fucntion scope.
     TF_DependencyWalk = 1 << 2,  ///< Walking the dependencies of a kept DIE.
     TF_ParentWalk = 1 << 3,      ///< Walking up the parents of a kept DIE.
+    TF_ODR = 1 << 4,             ///< Use the ODR whhile keeping dependants.
+    TF_SkipPC = 1 << 5,          ///< Skip all location attributes.
   };
 
   /// \brief Mark the passed DIE as well as all the ones it depends on
   /// as kept.
-  void keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE,
+  void keepDIEAndDependencies(RelocationManager &RelocMgr,
+                               const DWARFDebugInfoEntryMinimal &DIE,
                                CompileUnit::DIEInfo &MyInfo,
                                const DebugMapObject &DMO, CompileUnit &CU,
-                               unsigned Flags);
+                               bool UseODR);
 
-  unsigned shouldKeepDIE(const DWARFDebugInfoEntryMinimal &DIE,
+  unsigned shouldKeepDIE(RelocationManager &RelocMgr,
+                         const DWARFDebugInfoEntryMinimal &DIE,
                          CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
                          unsigned Flags);
 
-  unsigned shouldKeepVariableDIE(const DWARFDebugInfoEntryMinimal &DIE,
+  unsigned shouldKeepVariableDIE(RelocationManager &RelocMgr,
+                                 const DWARFDebugInfoEntryMinimal &DIE,
                                  CompileUnit &Unit,
                                  CompileUnit::DIEInfo &MyInfo, unsigned Flags);
 
-  unsigned shouldKeepSubprogramDIE(const DWARFDebugInfoEntryMinimal &DIE,
+  unsigned shouldKeepSubprogramDIE(RelocationManager &RelocMgr,
+                                   const DWARFDebugInfoEntryMinimal &DIE,
                                    CompileUnit &Unit,
                                    CompileUnit::DIEInfo &MyInfo,
                                    unsigned Flags);
@@ -1134,77 +1257,119 @@ private:
   /// \defgroup Linking Methods used to link the debug information
   ///
   /// @{
-  /// \brief Recursively clone \p InputDIE into an tree of DIE objects
-  /// where useless (as decided by lookForDIEsToKeep()) bits have been
-  /// stripped out and addresses have been rewritten according to the
-  /// debug map.
-  ///
-  /// \param OutOffset is the offset the cloned DIE in the output
-  /// compile unit.
-  /// \param PCOffset (while cloning a function scope) is the offset
-  /// applied to the entry point of the function to get the linked address.
-  ///
-  /// \returns the root of the cloned tree.
-  DIE *cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &U,
-                int64_t PCOffset, uint32_t OutOffset);
 
-  typedef DWARFAbbreviationDeclaration::AttributeSpec AttributeSpec;
+  class DIECloner {
+    DwarfLinker &Linker;
+    RelocationManager &RelocMgr;
+    /// Allocator used for all the DIEValue objects.
+    BumpPtrAllocator &DIEAlloc;
+    MutableArrayRef<CompileUnit> CompileUnits;
+    LinkOptions Options;
 
-  /// \brief Information gathered and exchanged between the various
-  /// clone*Attributes helpers about the attributes of a particular DIE.
-  struct AttributesInfo {
-    const char *Name, *MangledName;         ///< Names.
-    uint32_t NameOffset, MangledNameOffset; ///< Offsets in the string pool.
+  public:
+    DIECloner(DwarfLinker &Linker, RelocationManager &RelocMgr,
+              BumpPtrAllocator &DIEAlloc,
+              MutableArrayRef<CompileUnit> CompileUnits, LinkOptions &Options)
+        : Linker(Linker), RelocMgr(RelocMgr), DIEAlloc(DIEAlloc),
+          CompileUnits(CompileUnits), Options(Options) {}
 
-    uint64_t OrigHighPc; ///< Value of AT_high_pc in the input DIE
-    int64_t PCOffset;    ///< Offset to apply to PC addresses inside a function.
+    /// Recursively clone \p InputDIE into an tree of DIE objects
+    /// where useless (as decided by lookForDIEsToKeep()) bits have been
+    /// stripped out and addresses have been rewritten according to the
+    /// debug map.
+    ///
+    /// \param OutOffset is the offset the cloned DIE in the output
+    /// compile unit.
+    /// \param PCOffset (while cloning a function scope) is the offset
+    /// applied to the entry point of the function to get the linked address.
+    ///
+    /// \returns the root of the cloned tree or null if nothing was selected.
+    DIE *cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &U,
+                  int64_t PCOffset, uint32_t OutOffset, unsigned Flags);
 
-    bool HasLowPc;      ///< Does the DIE have a low_pc attribute?
-    bool IsDeclaration; ///< Is this DIE only a declaration?
+    /// Construct the output DIE tree by cloning the DIEs we
+    /// chose to keep above. If there are no valid relocs, then there's
+    /// nothing to clone/emit.
+    void cloneAllCompileUnits(DWARFContextInMemory &DwarfContext);
 
-    AttributesInfo()
-        : Name(nullptr), MangledName(nullptr), NameOffset(0),
-          MangledNameOffset(0), OrigHighPc(0), PCOffset(0), HasLowPc(false),
-          IsDeclaration(false) {}
+  private:
+    typedef DWARFAbbreviationDeclaration::AttributeSpec AttributeSpec;
+
+    /// Information gathered and exchanged between the various
+    /// clone*Attributes helpers about the attributes of a particular DIE.
+    struct AttributesInfo {
+      const char *Name, *MangledName;         ///< Names.
+      uint32_t NameOffset, MangledNameOffset; ///< Offsets in the string pool.
+
+      uint64_t OrigLowPc;  ///< Value of AT_low_pc in the input DIE
+      uint64_t OrigHighPc; ///< Value of AT_high_pc in the input DIE
+      int64_t PCOffset; ///< Offset to apply to PC addresses inside a function.
+
+      bool HasLowPc;      ///< Does the DIE have a low_pc attribute?
+      bool IsDeclaration; ///< Is this DIE only a declaration?
+
+      AttributesInfo()
+          : Name(nullptr), MangledName(nullptr), NameOffset(0),
+            MangledNameOffset(0), OrigLowPc(UINT64_MAX), OrigHighPc(0),
+            PCOffset(0), HasLowPc(false), IsDeclaration(false) {}
+    };
+
+    /// Helper for cloneDIE.
+    unsigned cloneAttribute(DIE &Die,
+                            const DWARFDebugInfoEntryMinimal &InputDIE,
+                            CompileUnit &U, const DWARFFormValue &Val,
+                            const AttributeSpec AttrSpec, unsigned AttrSize,
+                            AttributesInfo &AttrInfo);
+
+    /// Clone a string attribute described by \p AttrSpec and add
+    /// it to \p Die.
+    /// \returns the size of the new attribute.
+    unsigned cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
+                                  const DWARFFormValue &Val,
+                                  const DWARFUnit &U);
+
+    /// Clone an attribute referencing another DIE and add
+    /// it to \p Die.
+    /// \returns the size of the new attribute.
+    unsigned
+    cloneDieReferenceAttribute(DIE &Die,
+                               const DWARFDebugInfoEntryMinimal &InputDIE,
+                               AttributeSpec AttrSpec, unsigned AttrSize,
+                               const DWARFFormValue &Val, CompileUnit &Unit);
+
+    /// Clone an attribute referencing another DIE and add
+    /// it to \p Die.
+    /// \returns the size of the new attribute.
+    unsigned cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
+                                 const DWARFFormValue &Val, unsigned AttrSize);
+
+    /// Clone an attribute referencing another DIE and add
+    /// it to \p Die.
+    /// \returns the size of the new attribute.
+    unsigned cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
+                                   const DWARFFormValue &Val,
+                                   const CompileUnit &Unit,
+                                   AttributesInfo &Info);
+
+    /// Clone a scalar attribute  and add it to \p Die.
+    /// \returns the size of the new attribute.
+    unsigned cloneScalarAttribute(DIE &Die,
+                                  const DWARFDebugInfoEntryMinimal &InputDIE,
+                                  CompileUnit &U, AttributeSpec AttrSpec,
+                                  const DWARFFormValue &Val, unsigned AttrSize,
+                                  AttributesInfo &Info);
+
+    /// Get the potential name and mangled name for the entity
+    /// described by \p Die and store them in \Info if they are not
+    /// already there.
+    /// \returns is a name was found.
+    bool getDIENames(const DWARFDebugInfoEntryMinimal &Die, DWARFUnit &U,
+                     AttributesInfo &Info);
+
+    /// Create a copy of abbreviation Abbrev.
+    void copyAbbrev(const DWARFAbbreviationDeclaration &Abbrev, bool hasODR);
   };
 
-  /// \brief Helper for cloneDIE.
-  unsigned cloneAttribute(DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE,
-                          CompileUnit &U, const DWARFFormValue &Val,
-                          const AttributeSpec AttrSpec, unsigned AttrSize,
-                          AttributesInfo &AttrInfo);
-
-  /// \brief Helper for cloneDIE.
-  unsigned cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
-                                const DWARFFormValue &Val, const DWARFUnit &U);
-
-  /// \brief Helper for cloneDIE.
-  unsigned
-  cloneDieReferenceAttribute(DIE &Die,
-                             const DWARFDebugInfoEntryMinimal &InputDIE,
-                             AttributeSpec AttrSpec, unsigned AttrSize,
-                             const DWARFFormValue &Val, CompileUnit &Unit);
-
-  /// \brief Helper for cloneDIE.
-  unsigned cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
-                               const DWARFFormValue &Val, unsigned AttrSize);
-
-  /// \brief Helper for cloneDIE.
-  unsigned cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
-                                 const DWARFFormValue &Val,
-                                 const CompileUnit &Unit, AttributesInfo &Info);
-
-  /// \brief Helper for cloneDIE.
-  unsigned cloneScalarAttribute(DIE &Die,
-                                const DWARFDebugInfoEntryMinimal &InputDIE,
-                                CompileUnit &U, AttributeSpec AttrSpec,
-                                const DWARFFormValue &Val, unsigned AttrSize,
-                                AttributesInfo &Info);
-
-  /// \brief Helper for cloneDIE.
-  bool applyValidRelocs(MutableArrayRef<char> Data, uint32_t BaseOffset,
-                        bool isLittleEndian);
-
   /// \brief Assign an abbreviation number to \p Abbrev
   void AssignAbbrev(DIEAbbrev &Abbrev);
 
@@ -1213,7 +1378,7 @@ private:
   /// \brief Storage for the unique Abbreviations.
   /// This is passed to AsmPrinter::emitDwarfAbbrevs(), thus it cannot
   /// be changed to a vecot of unique_ptrs.
-  std::vector<DIEAbbrev *> Abbreviations;
+  std::vector<std::unique_ptr<DIEAbbrev>> Abbreviations;
 
   /// \brief Compute and emit debug_ranges section for \p Unit, and
   /// patch the attributes referencing it.
@@ -1243,35 +1408,31 @@ private:
   BumpPtrAllocator DIEAlloc;
   /// @}
 
+  /// ODR Contexts for that link.
+  DeclContextTree ODRContexts;
+
   /// \defgroup Helpers Various helper methods.
   ///
   /// @{
-  const DWARFDebugInfoEntryMinimal *
-  resolveDIEReference(DWARFFormValue &RefValue, const DWARFUnit &Unit,
-                      const DWARFDebugInfoEntryMinimal &DIE,
-                      CompileUnit *&ReferencedCU);
-
-  CompileUnit *getUnitForOffset(unsigned Offset);
-
-  bool getDIENames(const DWARFDebugInfoEntryMinimal &Die, DWARFUnit &U,
-                   AttributesInfo &Info);
-
-  void reportWarning(const Twine &Warning, const DWARFUnit *Unit = nullptr,
-                     const DWARFDebugInfoEntryMinimal *DIE = nullptr) const;
-
   bool createStreamer(Triple TheTriple, StringRef OutputFilename);
+
+  /// \brief Attempt to load a debug object from disk.
+  ErrorOr<const object::ObjectFile &> loadObject(BinaryHolder &BinaryHolder,
+                                                 DebugMapObject &Obj,
+                                                 const DebugMap &Map);
   /// @}
 
-private:
   std::string OutputFilename;
   LinkOptions Options;
   BinaryHolder BinHolder;
   std::unique_ptr<DwarfStreamer> Streamer;
+  uint64_t OutputDebugInfoSize;
+  unsigned UnitID; ///< A unique ID that identifies each compile unit.
 
   /// The units of the current debug map object.
   std::vector<CompileUnit> Units;
 
-  /// The debug map object curently under consideration.
+  /// The debug map object currently under consideration.
   DebugMapObject *CurrentDebugObject;
 
   /// \brief The Dwarf string pool
@@ -1294,11 +1455,15 @@ private:
   /// Offset of the last CIE that has been emitted in the output
   /// debug_frame section.
   uint32_t LastCIEOffset;
+
+  /// Mapping the PCM filename to the DwoId.
+  StringMap<uint64_t> ClangModules;
 };
 
-/// \brief Similar to DWARFUnitSection::getUnitForOffset(), but
-/// returning our CompileUnit object instead.
-CompileUnit *DwarfLinker::getUnitForOffset(unsigned Offset) {
+/// Similar to DWARFUnitSection::getUnitForOffset(), but returning our
+/// CompileUnit object instead.
+static CompileUnit *getUnitForOffset(MutableArrayRef<CompileUnit> Units,
+                                     unsigned Offset) {
   auto CU =
       std::upper_bound(Units.begin(), Units.end(), Offset,
                        [](uint32_t LHS, const CompileUnit &RHS) {
@@ -1307,38 +1472,251 @@ CompileUnit *DwarfLinker::getUnitForOffset(unsigned Offset) {
   return CU != Units.end() ? &*CU : nullptr;
 }
 
-/// \brief Resolve the DIE attribute reference that has been
+/// Resolve the DIE attribute reference that has been
 /// extracted in \p RefValue. The resulting DIE migh be in another
 /// CompileUnit which is stored into \p ReferencedCU.
 /// \returns null if resolving fails for any reason.
-const DWARFDebugInfoEntryMinimal *DwarfLinker::resolveDIEReference(
-    DWARFFormValue &RefValue, const DWARFUnit &Unit,
+static const DWARFDebugInfoEntryMinimal *resolveDIEReference(
+    const DwarfLinker &Linker, MutableArrayRef<CompileUnit> Units,
+    const DWARFFormValue &RefValue, const DWARFUnit &Unit,
     const DWARFDebugInfoEntryMinimal &DIE, CompileUnit *&RefCU) {
   assert(RefValue.isFormClass(DWARFFormValue::FC_Reference));
   uint64_t RefOffset = *RefValue.getAsReference(&Unit);
 
-  if ((RefCU = getUnitForOffset(RefOffset)))
+  if ((RefCU = getUnitForOffset(Units, RefOffset)))
     if (const auto *RefDie = RefCU->getOrigUnit().getDIEForOffset(RefOffset))
       return RefDie;
 
-  reportWarning("could not find referenced DIE", &Unit, &DIE);
+  Linker.reportWarning("could not find referenced DIE", &Unit, &DIE);
   return nullptr;
 }
 
-/// \brief Get the potential name and mangled name for the entity
-/// described by \p Die and store them in \Info if they are not
-/// already there.
-/// \returns is a name was found.
-bool DwarfLinker::getDIENames(const DWARFDebugInfoEntryMinimal &Die,
-                              DWARFUnit &U, AttributesInfo &Info) {
-  // FIXME: a bit wastefull as the first getName might return the
+/// \returns whether the passed \a Attr type might contain a DIE
+/// reference suitable for ODR uniquing.
+static bool isODRAttribute(uint16_t Attr) {
+  switch (Attr) {
+  default:
+    return false;
+  case dwarf::DW_AT_type:
+  case dwarf::DW_AT_containing_type:
+  case dwarf::DW_AT_specification:
+  case dwarf::DW_AT_abstract_origin:
+  case dwarf::DW_AT_import:
+    return true;
+  }
+  llvm_unreachable("Improper attribute.");
+}
+
+/// Set the last DIE/CU a context was seen in and, possibly invalidate
+/// the context if it is ambiguous.
+///
+/// In the current implementation, we don't handle overloaded
+/// functions well, because the argument types are not taken into
+/// account when computing the DeclContext tree.
+///
+/// Some of this is mitigated byt using mangled names that do contain
+/// the arguments types, but sometimes (eg. with function templates)
+/// we don't have that. In that case, just do not unique anything that
+/// refers to the contexts we are not able to distinguish.
+///
+/// If a context that is not a namespace appears twice in the same CU,
+/// we know it is ambiguous. Make it invalid.
+bool DeclContext::setLastSeenDIE(CompileUnit &U,
+                                 const DWARFDebugInfoEntryMinimal *Die) {
+  if (LastSeenCompileUnitID == U.getUniqueID()) {
+    DWARFUnit &OrigUnit = U.getOrigUnit();
+    uint32_t FirstIdx = OrigUnit.getDIEIndex(LastSeenDIE);
+    U.getInfo(FirstIdx).Ctxt = nullptr;
+    return false;
+  }
+
+  LastSeenCompileUnitID = U.getUniqueID();
+  LastSeenDIE = Die;
+  return true;
+}
+
+PointerIntPair<DeclContext *, 1> DeclContextTree::getChildDeclContext(
+    DeclContext &Context, const DWARFDebugInfoEntryMinimal *DIE, CompileUnit &U,
+    NonRelocatableStringpool &StringPool, bool InClangModule) {
+  unsigned Tag = DIE->getTag();
+
+  // FIXME: dsymutil-classic compat: We should bail out here if we
+  // have a specification or an abstract_origin. We will get the
+  // parent context wrong here.
+
+  switch (Tag) {
+  default:
+    // By default stop gathering child contexts.
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+  case dwarf::DW_TAG_module:
+    break;
+  case dwarf::DW_TAG_compile_unit:
+    return PointerIntPair<DeclContext *, 1>(&Context);
+  case dwarf::DW_TAG_subprogram:
+    // Do not unique anything inside CU local functions.
+    if ((Context.getTag() == dwarf::DW_TAG_namespace ||
+         Context.getTag() == dwarf::DW_TAG_compile_unit) &&
+        !DIE->getAttributeValueAsUnsignedConstant(&U.getOrigUnit(),
+                                                  dwarf::DW_AT_external, 0))
+      return PointerIntPair<DeclContext *, 1>(nullptr);
+  // Fallthrough
+  case dwarf::DW_TAG_member:
+  case dwarf::DW_TAG_namespace:
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_class_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_typedef:
+    // Artificial things might be ambiguous, because they might be
+    // created on demand. For example implicitely defined constructors
+    // are ambiguous because of the way we identify contexts, and they
+    // won't be generated everytime everywhere.
+    if (DIE->getAttributeValueAsUnsignedConstant(&U.getOrigUnit(),
+                                                 dwarf::DW_AT_artificial, 0))
+      return PointerIntPair<DeclContext *, 1>(nullptr);
+    break;
+  }
+
+  const char *Name = DIE->getName(&U.getOrigUnit(), DINameKind::LinkageName);
+  const char *ShortName = DIE->getName(&U.getOrigUnit(), DINameKind::ShortName);
+  StringRef NameRef;
+  StringRef ShortNameRef;
+  StringRef FileRef;
+
+  if (Name)
+    NameRef = StringPool.internString(Name);
+  else if (Tag == dwarf::DW_TAG_namespace)
+    // FIXME: For dsymutil-classic compatibility. I think uniquing
+    // within anonymous namespaces is wrong. There is no ODR guarantee
+    // there.
+    NameRef = StringPool.internString("(anonymous namespace)");
+
+  if (ShortName && ShortName != Name)
+    ShortNameRef = StringPool.internString(ShortName);
+  else
+    ShortNameRef = NameRef;
+
+  if (Tag != dwarf::DW_TAG_class_type && Tag != dwarf::DW_TAG_structure_type &&
+      Tag != dwarf::DW_TAG_union_type &&
+      Tag != dwarf::DW_TAG_enumeration_type && NameRef.empty())
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+
+  std::string File;
+  unsigned Line = 0;
+  unsigned ByteSize = UINT32_MAX;
+
+  if (!InClangModule) {
+    // Gather some discriminating data about the DeclContext we will be
+    // creating: File, line number and byte size. This shouldn't be
+    // necessary, because the ODR is just about names, but given that we
+    // do some approximations with overloaded functions and anonymous
+    // namespaces, use these additional data points to make the process
+    // safer.  This is disabled for clang modules, because forward
+    // declarations of module-defined types do not have a file and line.
+    ByteSize = DIE->getAttributeValueAsUnsignedConstant(
+        &U.getOrigUnit(), dwarf::DW_AT_byte_size, UINT64_MAX);
+    if (Tag != dwarf::DW_TAG_namespace || !Name) {
+      if (unsigned FileNum = DIE->getAttributeValueAsUnsignedConstant(
+              &U.getOrigUnit(), dwarf::DW_AT_decl_file, 0)) {
+        if (const auto *LT = U.getOrigUnit().getContext().getLineTableForUnit(
+                &U.getOrigUnit())) {
+          // FIXME: dsymutil-classic compatibility. I'd rather not
+          // unique anything in anonymous namespaces, but if we do, then
+          // verify that the file and line correspond.
+          if (!Name && Tag == dwarf::DW_TAG_namespace)
+            FileNum = 1;
+
+          // FIXME: Passing U.getOrigUnit().getCompilationDir()
+          // instead of "" would allow more uniquing, but for now, do
+          // it this way to match dsymutil-classic.
+          if (LT->getFileNameByIndex(
+                  FileNum, "",
+                  DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
+                  File)) {
+            Line = DIE->getAttributeValueAsUnsignedConstant(
+                &U.getOrigUnit(), dwarf::DW_AT_decl_line, 0);
+#ifdef HAVE_REALPATH
+            // Cache the resolved paths, because calling realpath is expansive.
+            if (const char *ResolvedPath = U.getResolvedPath(FileNum)) {
+              File = ResolvedPath;
+            } else {
+              char RealPath[PATH_MAX + 1];
+              RealPath[PATH_MAX] = 0;
+              if (::realpath(File.c_str(), RealPath))
+                File = RealPath;
+              U.setResolvedPath(FileNum, File);
+            }
+#endif
+            FileRef = StringPool.internString(File);
+          }
+        }
+      }
+    }
+  }
+
+  if (!Line && NameRef.empty())
+    return PointerIntPair<DeclContext *, 1>(nullptr);
+
+  // We hash NameRef, which is the mangled name, in order to get most
+  // overloaded functions resolve correctly.
+  //
+  // Strictly speaking, hashing the Tag is only necessary for a
+  // DW_TAG_module, to prevent uniquing of a module and a namespace
+  // with the same name.
+  //
+  // FIXME: dsymutil-classic won't unique the same type presented
+  // once as a struct and once as a class. Using the Tag in the fully
+  // qualified name hash to get the same effect.
+  unsigned Hash = hash_combine(Context.getQualifiedNameHash(), Tag, NameRef);
+
+  // FIXME: dsymutil-classic compatibility: when we don't have a name,
+  // use the filename.
+  if (Tag == dwarf::DW_TAG_namespace && NameRef == "(anonymous namespace)")
+    Hash = hash_combine(Hash, FileRef);
+
+  // Now look if this context already exists.
+  DeclContext Key(Hash, Line, ByteSize, Tag, NameRef, FileRef, Context);
+  auto ContextIter = Contexts.find(&Key);
+
+  if (ContextIter == Contexts.end()) {
+    // The context wasn't found.
+    bool Inserted;
+    DeclContext *NewContext =
+        new (Allocator) DeclContext(Hash, Line, ByteSize, Tag, NameRef, FileRef,
+                                    Context, DIE, U.getUniqueID());
+    std::tie(ContextIter, Inserted) = Contexts.insert(NewContext);
+    assert(Inserted && "Failed to insert DeclContext");
+    (void)Inserted;
+  } else if (Tag != dwarf::DW_TAG_namespace &&
+             !(*ContextIter)->setLastSeenDIE(U, DIE)) {
+    // The context was found, but it is ambiguous with another context
+    // in the same file. Mark it invalid.
+    return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+  }
+
+  assert(ContextIter != Contexts.end());
+  // FIXME: dsymutil-classic compatibility. Union types aren't
+  // uniques, but their children might be.
+  if ((Tag == dwarf::DW_TAG_subprogram &&
+       Context.getTag() != dwarf::DW_TAG_structure_type &&
+       Context.getTag() != dwarf::DW_TAG_class_type) ||
+      (Tag == dwarf::DW_TAG_union_type))
+    return PointerIntPair<DeclContext *, 1>(*ContextIter, /* Invalid= */ 1);
+
+  return PointerIntPair<DeclContext *, 1>(*ContextIter);
+}
+
+bool DwarfLinker::DIECloner::getDIENames(const DWARFDebugInfoEntryMinimal &Die,
+                                         DWARFUnit &U, AttributesInfo &Info) {
+  // FIXME: a bit wasteful as the first getName might return the
   // short name.
   if (!Info.MangledName &&
       (Info.MangledName = Die.getName(&U, DINameKind::LinkageName)))
-    Info.MangledNameOffset = StringPool.getStringOffset(Info.MangledName);
+    Info.MangledNameOffset =
+        Linker.StringPool.getStringOffset(Info.MangledName);
 
   if (!Info.Name && (Info.Name = Die.getName(&U, DINameKind::ShortName)))
-    Info.NameOffset = StringPool.getStringOffset(Info.Name);
+    Info.NameOffset = Linker.StringPool.getStringOffset(Info.Name);
 
   return Info.Name || Info.MangledName;
 }
@@ -1368,17 +1746,70 @@ bool DwarfLinker::createStreamer(Triple TheTriple, StringRef OutputFilename) {
   return Streamer->init(TheTriple, OutputFilename);
 }
 
-/// \brief Recursive helper to gather the child->parent relationships in the
-/// original compile unit.
-static void gatherDIEParents(const DWARFDebugInfoEntryMinimal *DIE,
-                             unsigned ParentIdx, CompileUnit &CU) {
+/// Recursive helper to build the global DeclContext information and
+/// gather the child->parent relationships in the original compile unit.
+///
+/// \return true when this DIE and all of its children are only
+/// forward declarations to types defined in external clang modules
+/// (i.e., forward declarations that are children of a DW_TAG_module).
+static bool analyzeContextInfo(const DWARFDebugInfoEntryMinimal *DIE,
+                               unsigned ParentIdx, CompileUnit &CU,
+                               DeclContext *CurrentDeclContext,
+                               NonRelocatableStringpool &StringPool,
+                               DeclContextTree &Contexts,
+                               bool InImportedModule = false) {
   unsigned MyIdx = CU.getOrigUnit().getDIEIndex(DIE);
-  CU.getInfo(MyIdx).ParentIdx = ParentIdx;
+  CompileUnit::DIEInfo &Info = CU.getInfo(MyIdx);
 
+  // Clang imposes an ODR on modules(!) regardless of the language:
+  //  "The module-id should consist of only a single identifier,
+  //   which provides the name of the module being defined. Each
+  //   module shall have a single definition."
+  //
+  // This does not extend to the types inside the modules:
+  //  "[I]n C, this implies that if two structs are defined in
+  //   different submodules with the same name, those two types are
+  //   distinct types (but may be compatible types if their
+  //   definitions match)."
+  //
+  // We treat non-C++ modules like namespaces for this reason.
+  if (DIE->getTag() == dwarf::DW_TAG_module && ParentIdx == 0 &&
+      DIE->getAttributeValueAsString(&CU.getOrigUnit(), dwarf::DW_AT_name,
+                                     "") != CU.getClangModuleName()) {
+    InImportedModule = true;
+  }
+
+  Info.ParentIdx = ParentIdx;
+  bool InClangModule = CU.isClangModule() || InImportedModule;
+  if (CU.hasODR() || InClangModule) {
+    if (CurrentDeclContext) {
+      auto PtrInvalidPair = Contexts.getChildDeclContext(
+          *CurrentDeclContext, DIE, CU, StringPool, InClangModule);
+      CurrentDeclContext = PtrInvalidPair.getPointer();
+      Info.Ctxt =
+          PtrInvalidPair.getInt() ? nullptr : PtrInvalidPair.getPointer();
+    } else
+      Info.Ctxt = CurrentDeclContext = nullptr;
+  }
+
+  Info.Prune = InImportedModule;
   if (DIE->hasChildren())
     for (auto *Child = DIE->getFirstChild(); Child && !Child->isNULL();
          Child = Child->getSibling())
-      gatherDIEParents(Child, MyIdx, CU);
+      Info.Prune &= analyzeContextInfo(Child, MyIdx, CU, CurrentDeclContext,
+                                       StringPool, Contexts, InImportedModule);
+
+  // Prune this DIE if it is either a forward declaration inside a
+  // DW_TAG_module or a DW_TAG_module that contains nothing but
+  // forward declarations.
+  Info.Prune &= (DIE->getTag() == dwarf::DW_TAG_module) ||
+                DIE->getAttributeValueAsUnsignedConstant(
+                    &CU.getOrigUnit(), dwarf::DW_AT_declaration, 0);
+
+  // Don't prune it if there is no definition for the DIE.
+  Info.Prune &= Info.Ctxt && Info.Ctxt->getCanonicalDIEOffset();
+
+  return Info.Prune;
 }
 
 static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) {
@@ -1396,9 +1827,14 @@ static bool dieNeedsChildrenToBeMeaningful(uint32_t Tag) {
   llvm_unreachable("Invalid Tag");
 }
 
+static unsigned getRefAddrSize(const DWARFUnit &U) {
+  if (U.getVersion() == 2)
+    return U.getAddressByteSize();
+  return 4;
+}
+
 void DwarfLinker::startDebugObject(DWARFContext &Dwarf, DebugMapObject &Obj) {
   Units.reserve(Dwarf.getNumCompileUnits());
-  NextValidReloc = 0;
   // Iterate over the debug map entries and put all the ones that are
   // functions (because they have a size) into the Ranges map. This
   // map is very similar to the FunctionRanges that are stored in each
@@ -1424,7 +1860,6 @@ void DwarfLinker::startDebugObject(DWARFContext &Dwarf, DebugMapObject &Obj) {
 
 void DwarfLinker::endDebugObject() {
   Units.clear();
-  ValidRelocs.clear();
   Ranges.clear();
 
   for (auto I = DIEBlocks.begin(), E = DIEBlocks.end(); I != E; ++I)
@@ -1440,9 +1875,10 @@ void DwarfLinker::endDebugObject() {
 /// \brief Iterate over the relocations of the given \p Section and
 /// store the ones that correspond to debug map entries into the
 /// ValidRelocs array.
-void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
-                                       const object::MachOObjectFile &Obj,
-                                       const DebugMapObject &DMO) {
+void DwarfLinker::RelocationManager::
+findValidRelocsMachO(const object::SectionRef &Section,
+                     const object::MachOObjectFile &Obj,
+                     const DebugMapObject &DMO) {
   StringRef Contents;
   Section.getContents(Contents);
   DataExtractor Data(Contents, Obj.isLittleEndian(), 0);
@@ -1453,7 +1889,7 @@ void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
     unsigned RelocSize = 1 << Obj.getAnyRelocationLength(MachOReloc);
     uint64_t Offset64 = Reloc.getOffset();
     if ((RelocSize != 4 && RelocSize != 8)) {
-      reportWarning(" unsupported relocation in debug_info section.");
+      Linker.reportWarning(" unsupported relocation in debug_info section.");
       continue;
     }
     uint32_t Offset = Offset64;
@@ -1464,7 +1900,7 @@ void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
     if (Sym != Obj.symbol_end()) {
       ErrorOr<StringRef> SymbolName = Sym->getName();
       if (!SymbolName) {
-        reportWarning("error getting relocation symbol name.");
+        Linker.reportWarning("error getting relocation symbol name.");
         continue;
       }
       if (const auto *Mapping = DMO.lookupSymbol(*SymbolName))
@@ -1480,14 +1916,15 @@ void DwarfLinker::findValidRelocsMachO(const object::SectionRef &Section,
 
 /// \brief Dispatch the valid relocation finding logic to the
 /// appropriate handler depending on the object file format.
-bool DwarfLinker::findValidRelocs(const object::SectionRef &Section,
-                                  const object::ObjectFile &Obj,
-                                  const DebugMapObject &DMO) {
+bool DwarfLinker::RelocationManager::findValidRelocs(
+    const object::SectionRef &Section, const object::ObjectFile &Obj,
+    const DebugMapObject &DMO) {
   // Dispatch to the right handler depending on the file type.
   if (auto *MachOObj = dyn_cast<object::MachOObjectFile>(&Obj))
     findValidRelocsMachO(Section, *MachOObj, DMO);
   else
-    reportWarning(Twine("unsupported object file type: ") + Obj.getFileName());
+    Linker.reportWarning(Twine("unsupported object file type: ") +
+                         Obj.getFileName());
 
   if (ValidRelocs.empty())
     return false;
@@ -1505,8 +1942,9 @@ bool DwarfLinker::findValidRelocs(const object::SectionRef &Section,
 /// link by indicating which DIEs refer to symbols present in the
 /// linked binary.
 /// \returns wether there are any valid relocations in the debug info.
-bool DwarfLinker::findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
-                                             const DebugMapObject &DMO) {
+bool DwarfLinker::RelocationManager::
+findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
+                           const DebugMapObject &DMO) {
   // Find the debug_info section.
   for (const object::SectionRef &Section : Obj.sections()) {
     StringRef SectionName;
@@ -1525,8 +1963,9 @@ bool DwarfLinker::findValidRelocsInDebugInfo(const object::ObjectFile &Obj,
 /// This function must be called with offsets in strictly ascending
 /// order because it never looks back at relocations it already 'went past'.
 /// \returns true and sets Info.InDebugMap if it is the case.
-bool DwarfLinker::hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
-                                     CompileUnit::DIEInfo &Info) {
+bool DwarfLinker::RelocationManager::
+hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
+                   CompileUnit::DIEInfo &Info) {
   assert(NextValidReloc == 0 ||
          StartOffset > ValidRelocs[NextValidReloc - 1].Offset);
   if (NextValidReloc >= ValidRelocs.size())
@@ -1546,7 +1985,7 @@ bool DwarfLinker::hasValidRelocation(uint32_t StartOffset, uint32_t EndOffset,
 
   const auto &ValidReloc = ValidRelocs[NextValidReloc++];
   const auto &Mapping = ValidReloc.Mapping->getValue();
-  if (Options.Verbose)
+  if (Linker.Options.Verbose)
     outs() << "Found valid debug map entry: " << ValidReloc.Mapping->getKey()
            << " " << format("\t%016" PRIx64 " => %016" PRIx64,
                             uint64_t(Mapping.ObjectAddress),
@@ -1579,9 +2018,11 @@ getAttributeOffsets(const DWARFAbbreviationDeclaration *Abbrev, unsigned Idx,
 
 /// \brief Check if a variable describing DIE should be kept.
 /// \returns updated TraversalFlags.
-unsigned DwarfLinker::shouldKeepVariableDIE(
-    const DWARFDebugInfoEntryMinimal &DIE, CompileUnit &Unit,
-    CompileUnit::DIEInfo &MyInfo, unsigned Flags) {
+unsigned DwarfLinker::shouldKeepVariableDIE(RelocationManager &RelocMgr,
+                                            const DWARFDebugInfoEntryMinimal &DIE,
+                                            CompileUnit &Unit,
+                                            CompileUnit::DIEInfo &MyInfo,
+                                            unsigned Flags) {
   const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
 
   // Global variables with constant value can always be kept.
@@ -1606,7 +2047,7 @@ unsigned DwarfLinker::shouldKeepVariableDIE(
   // always check in the variable has a valid relocation, so that the
   // DIEInfo is filled. However, we don't want a static variable in a
   // function to force us to keep the enclosing function.
-  if (!hasValidRelocation(LocationOffset, LocationEndOffset, MyInfo) ||
+  if (!RelocMgr.hasValidRelocation(LocationOffset, LocationEndOffset, MyInfo) ||
       (Flags & TF_InFunctionScope))
     return Flags;
 
@@ -1619,6 +2060,7 @@ unsigned DwarfLinker::shouldKeepVariableDIE(
 /// \brief Check if a function describing DIE should be kept.
 /// \returns updated TraversalFlags.
 unsigned DwarfLinker::shouldKeepSubprogramDIE(
+    RelocationManager &RelocMgr,
     const DWARFDebugInfoEntryMinimal &DIE, CompileUnit &Unit,
     CompileUnit::DIEInfo &MyInfo, unsigned Flags) {
   const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
@@ -1639,7 +2081,7 @@ unsigned DwarfLinker::shouldKeepSubprogramDIE(
       DIE.getAttributeValueAsAddress(&OrigUnit, dwarf::DW_AT_low_pc, -1ULL);
   assert(LowPc != -1ULL && "low_pc attribute is not an address.");
   if (LowPc == -1ULL ||
-      !hasValidRelocation(LowPcOffset, LowPcEndOffset, MyInfo))
+      !RelocMgr.hasValidRelocation(LowPcOffset, LowPcEndOffset, MyInfo))
     return Flags;
 
   if (Options.Verbose)
@@ -1670,16 +2112,17 @@ unsigned DwarfLinker::shouldKeepSubprogramDIE(
 
 /// \brief Check if a DIE should be kept.
 /// \returns updated TraversalFlags.
-unsigned DwarfLinker::shouldKeepDIE(const DWARFDebugInfoEntryMinimal &DIE,
+unsigned DwarfLinker::shouldKeepDIE(RelocationManager &RelocMgr,
+                                    const DWARFDebugInfoEntryMinimal &DIE,
                                     CompileUnit &Unit,
                                     CompileUnit::DIEInfo &MyInfo,
                                     unsigned Flags) {
   switch (DIE.getTag()) {
   case dwarf::DW_TAG_constant:
   case dwarf::DW_TAG_variable:
-    return shouldKeepVariableDIE(DIE, Unit, MyInfo, Flags);
+    return shouldKeepVariableDIE(RelocMgr, DIE, Unit, MyInfo, Flags);
   case dwarf::DW_TAG_subprogram:
-    return shouldKeepSubprogramDIE(DIE, Unit, MyInfo, Flags);
+    return shouldKeepSubprogramDIE(RelocMgr, DIE, Unit, MyInfo, Flags);
   case dwarf::DW_TAG_module:
   case dwarf::DW_TAG_imported_module:
   case dwarf::DW_TAG_imported_declaration:
@@ -1699,26 +2142,28 @@ unsigned DwarfLinker::shouldKeepDIE(const DWARFDebugInfoEntryMinimal &DIE,
 /// back to lookForDIEsToKeep while adding TF_DependencyWalk to the
 /// TraversalFlags to inform it that it's not doing the primary DIE
 /// tree walk.
-void DwarfLinker::keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE,
+void DwarfLinker::keepDIEAndDependencies(RelocationManager &RelocMgr,
+                                          const DWARFDebugInfoEntryMinimal &Die,
                                           CompileUnit::DIEInfo &MyInfo,
                                           const DebugMapObject &DMO,
-                                          CompileUnit &CU, unsigned Flags) {
+                                          CompileUnit &CU, bool UseODR) {
   const DWARFUnit &Unit = CU.getOrigUnit();
   MyInfo.Keep = true;
 
   // First mark all the parent chain as kept.
   unsigned AncestorIdx = MyInfo.ParentIdx;
   while (!CU.getInfo(AncestorIdx).Keep) {
-    lookForDIEsToKeep(*Unit.getDIEAtIndex(AncestorIdx), DMO, CU,
-                      TF_ParentWalk | TF_Keep | TF_DependencyWalk);
+    unsigned ODRFlag = UseODR ? TF_ODR : 0;
+    lookForDIEsToKeep(RelocMgr, *Unit.getDIEAtIndex(AncestorIdx), DMO, CU,
+                      TF_ParentWalk | TF_Keep | TF_DependencyWalk | ODRFlag);
     AncestorIdx = CU.getInfo(AncestorIdx).ParentIdx;
   }
 
   // Then we need to mark all the DIEs referenced by this DIE's
   // attributes as kept.
   DataExtractor Data = Unit.getDebugInfoExtractor();
-  const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
-  uint32_t Offset = DIE.getOffset() + getULEB128Size(Abbrev->getCode());
+  const auto *Abbrev = Die.getAbbreviationDeclarationPtr();
+  uint32_t Offset = Die.getOffset() + getULEB128Size(Abbrev->getCode());
 
   // Mark all DIEs referenced through atttributes as kept.
   for (const auto &AttrSpec : Abbrev->attributes()) {
@@ -1731,9 +2176,33 @@ void DwarfLinker::keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE,
 
     Val.extractValue(Data, &Offset, &Unit);
     CompileUnit *ReferencedCU;
-    if (const auto *RefDIE = resolveDIEReference(Val, Unit, DIE, ReferencedCU))
-      lookForDIEsToKeep(*RefDIE, DMO, *ReferencedCU,
-                        TF_Keep | TF_DependencyWalk);
+    if (const auto *RefDIE =
+            resolveDIEReference(*this, MutableArrayRef<CompileUnit>(Units), Val,
+                                Unit, Die, ReferencedCU)) {
+      uint32_t RefIdx = ReferencedCU->getOrigUnit().getDIEIndex(RefDIE);
+      CompileUnit::DIEInfo &Info = ReferencedCU->getInfo(RefIdx);
+      // If the referenced DIE has a DeclContext that has already been
+      // emitted, then do not keep the one in this CU. We'll link to
+      // the canonical DIE in cloneDieReferenceAttribute.
+      // FIXME: compatibility with dsymutil-classic. UseODR shouldn't
+      // be necessary and could be advantageously replaced by
+      // ReferencedCU->hasODR() && CU.hasODR().
+      // FIXME: compatibility with dsymutil-classic. There is no
+      // reason not to unique ref_addr references.
+      if (AttrSpec.Form != dwarf::DW_FORM_ref_addr && UseODR && Info.Ctxt &&
+          Info.Ctxt != ReferencedCU->getInfo(Info.ParentIdx).Ctxt &&
+          Info.Ctxt->getCanonicalDIEOffset() && isODRAttribute(AttrSpec.Attr))
+        continue;
+
+      // Keep a module forward declaration if there is no definition.
+      if (!(isODRAttribute(AttrSpec.Attr) && Info.Ctxt &&
+            Info.Ctxt->getCanonicalDIEOffset()))
+        Info.Prune = false;
+
+      unsigned ODRFlag = UseODR ? TF_ODR : 0;
+      lookForDIEsToKeep(RelocMgr, *RefDIE, DMO, *ReferencedCU,
+                        TF_Keep | TF_DependencyWalk | ODRFlag);
+    }
   }
 }
 
@@ -1749,12 +2218,15 @@ void DwarfLinker::keepDIEAndDenpendencies(const DWARFDebugInfoEntryMinimal &DIE,
 /// also called, but during these dependency walks the file order is
 /// not respected. The TF_DependencyWalk flag tells us which kind of
 /// traversal we are currently doing.
-void DwarfLinker::lookForDIEsToKeep(const DWARFDebugInfoEntryMinimal &DIE,
+void DwarfLinker::lookForDIEsToKeep(RelocationManager &RelocMgr,
+                                    const DWARFDebugInfoEntryMinimal &Die,
                                     const DebugMapObject &DMO, CompileUnit &CU,
                                     unsigned Flags) {
-  unsigned Idx = CU.getOrigUnit().getDIEIndex(&DIE);
+  unsigned Idx = CU.getOrigUnit().getDIEIndex(&Die);
   CompileUnit::DIEInfo &MyInfo = CU.getInfo(Idx);
   bool AlreadyKept = MyInfo.Keep;
+  if (MyInfo.Prune)
+    return;
 
   // If the Keep flag is set, we are marking a required DIE's
   // dependencies. If our target is already marked as kept, we're all
@@ -1762,30 +2234,31 @@ void DwarfLinker::lookForDIEsToKeep(const DWARFDebugInfoEntryMinimal &DIE,
   if ((Flags & TF_DependencyWalk) && AlreadyKept)
     return;
 
-  // We must not call shouldKeepDIE while called from keepDIEAndDenpendencies,
+  // We must not call shouldKeepDIE while called from keepDIEAndDependencies,
   // because it would screw up the relocation finding logic.
   if (!(Flags & TF_DependencyWalk))
-    Flags = shouldKeepDIE(DIE, CU, MyInfo, Flags);
+    Flags = shouldKeepDIE(RelocMgr, Die, CU, MyInfo, Flags);
 
   // If it is a newly kept DIE mark it as well as all its dependencies as kept.
-  if (!AlreadyKept && (Flags & TF_Keep))
-    keepDIEAndDenpendencies(DIE, MyInfo, DMO, CU, Flags);
-
+  if (!AlreadyKept && (Flags & TF_Keep)) {
+    bool UseOdr = (Flags & TF_DependencyWalk) ? (Flags & TF_ODR) : CU.hasODR();
+    keepDIEAndDependencies(RelocMgr, Die, MyInfo, DMO, CU, UseOdr);
+  }
   // The TF_ParentWalk flag tells us that we are currently walking up
   // the parent chain of a required DIE, and we don't want to mark all
   // the children of the parents as kept (consider for example a
   // DW_TAG_namespace node in the parent chain). There are however a
   // set of DIE types for which we want to ignore that directive and still
   // walk their children.
-  if (dieNeedsChildrenToBeMeaningful(DIE.getTag()))
+  if (dieNeedsChildrenToBeMeaningful(Die.getTag()))
     Flags &= ~TF_ParentWalk;
 
-  if (!DIE.hasChildren() || (Flags & TF_ParentWalk))
+  if (!Die.hasChildren() || (Flags & TF_ParentWalk))
     return;
 
-  for (auto *Child = DIE.getFirstChild(); Child && !Child->isNULL();
+  for (auto *Child = Die.getFirstChild(); Child && !Child->isNULL();
        Child = Child->getSibling())
-    lookForDIEsToKeep(*Child, DMO, CU, Flags);
+    lookForDIEsToKeep(RelocMgr, *Child, DMO, CU, Flags);
 }
 
 /// \brief Assign an abbreviation numer to \p Abbrev.
@@ -1808,55 +2281,60 @@ void DwarfLinker::AssignAbbrev(DIEAbbrev &Abbrev) {
   } else {
     // Add to abbreviation list.
     Abbreviations.push_back(
-        new DIEAbbrev(Abbrev.getTag(), Abbrev.hasChildren()));
+        llvm::make_unique<DIEAbbrev>(Abbrev.getTag(), Abbrev.hasChildren()));
     for (const auto &Attr : Abbrev.getData())
       Abbreviations.back()->AddAttribute(Attr.getAttribute(), Attr.getForm());
-    AbbreviationsSet.InsertNode(Abbreviations.back(), InsertToken);
+    AbbreviationsSet.InsertNode(Abbreviations.back().get(), InsertToken);
     // Assign the unique abbreviation number.
     Abbrev.setNumber(Abbreviations.size());
     Abbreviations.back()->setNumber(Abbreviations.size());
   }
 }
 
-/// \brief Clone a string attribute described by \p AttrSpec and add
-/// it to \p Die.
-/// \returns the size of the new attribute.
-unsigned DwarfLinker::cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
-                                           const DWARFFormValue &Val,
-                                           const DWARFUnit &U) {
+unsigned DwarfLinker::DIECloner::cloneStringAttribute(DIE &Die,
+                                                      AttributeSpec AttrSpec,
+                                                      const DWARFFormValue &Val,
+                                                      const DWARFUnit &U) {
   // Switch everything to out of line strings.
   const char *String = *Val.getAsCString(&U);
-  unsigned Offset = StringPool.getStringOffset(String);
+  unsigned Offset = Linker.StringPool.getStringOffset(String);
   Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
                DIEInteger(Offset));
   return 4;
 }
 
-/// \brief Clone an attribute referencing another DIE and add
-/// it to \p Die.
-/// \returns the size of the new attribute.
-unsigned DwarfLinker::cloneDieReferenceAttribute(
+unsigned DwarfLinker::DIECloner::cloneDieReferenceAttribute(
     DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE,
     AttributeSpec AttrSpec, unsigned AttrSize, const DWARFFormValue &Val,
     CompileUnit &Unit) {
-  uint32_t Ref = *Val.getAsReference(&Unit.getOrigUnit());
+  const DWARFUnit &U = Unit.getOrigUnit();
+  uint32_t Ref = *Val.getAsReference(&U);
   DIE *NewRefDie = nullptr;
   CompileUnit *RefUnit = nullptr;
-  const DWARFDebugInfoEntryMinimal *RefDie = nullptr;
+  DeclContext *Ctxt = nullptr;
 
-  if (!(RefUnit = getUnitForOffset(Ref)) ||
-      !(RefDie = RefUnit->getOrigUnit().getDIEForOffset(Ref))) {
-    const char *AttributeString = dwarf::AttributeString(AttrSpec.Attr);
-    if (!AttributeString)
-      AttributeString = "DW_AT_???";
-    reportWarning(Twine("Missing DIE for ref in attribute ") + AttributeString +
-                      ". Dropping.",
-                  &Unit.getOrigUnit(), &InputDIE);
+  const DWARFDebugInfoEntryMinimal *RefDie =
+      resolveDIEReference(Linker, CompileUnits, Val, U, InputDIE, RefUnit);
+
+  // If the referenced DIE is not found,  drop the attribute.
+  if (!RefDie)
     return 0;
-  }
 
   unsigned Idx = RefUnit->getOrigUnit().getDIEIndex(RefDie);
   CompileUnit::DIEInfo &RefInfo = RefUnit->getInfo(Idx);
+
+  // If we already have emitted an equivalent DeclContext, just point
+  // at it.
+  if (isODRAttribute(AttrSpec.Attr)) {
+    Ctxt = RefInfo.Ctxt;
+    if (Ctxt && Ctxt->getCanonicalDIEOffset()) {
+      DIEInteger Attr(Ctxt->getCanonicalDIEOffset());
+      Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+                   dwarf::DW_FORM_ref_addr, Attr);
+      return getRefAddrSize(U);
+    }
+  }
+
   if (!RefInfo.Clone) {
     assert(Ref > InputDIE.getOffset());
     // We haven't cloned this DIE yet. Just create an empty one and
@@ -1865,7 +2343,8 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
   }
   NewRefDie = RefInfo.Clone;
 
-  if (AttrSpec.Form == dwarf::DW_FORM_ref_addr) {
+  if (AttrSpec.Form == dwarf::DW_FORM_ref_addr ||
+      (Unit.hasODR() && isODRAttribute(AttrSpec.Attr))) {
     // We cannot currently rely on a DIEEntry to emit ref_addr
     // references, because the implementation calls back to DwarfDebug
     // to find the unit offset. (We don't have a DwarfDebug)
@@ -1883,11 +2362,11 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
       // A forward reference. Note and fixup later.
       Attr = 0xBADDEF;
       Unit.noteForwardReference(
-          NewRefDie, RefUnit,
+          NewRefDie, RefUnit, Ctxt,
           Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
                        dwarf::DW_FORM_ref_addr, DIEInteger(Attr)));
     }
-    return AttrSize;
+    return getRefAddrSize(U);
   }
 
   Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
@@ -1895,25 +2374,24 @@ unsigned DwarfLinker::cloneDieReferenceAttribute(
   return AttrSize;
 }
 
-/// \brief Clone an attribute of block form (locations, constants) and add
-/// it to \p Die.
-/// \returns the size of the new attribute.
-unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
-                                          const DWARFFormValue &Val,
-                                          unsigned AttrSize) {
-  DIE *Attr;
+unsigned DwarfLinker::DIECloner::cloneBlockAttribute(DIE &Die,
+                                                     AttributeSpec AttrSpec,
+                                                     const DWARFFormValue &Val,
+                                                     unsigned AttrSize) {
+  DIEValueList *Attr;
   DIEValue Value;
   DIELoc *Loc = nullptr;
   DIEBlock *Block = nullptr;
   // Just copy the block data over.
   if (AttrSpec.Form == dwarf::DW_FORM_exprloc) {
     Loc = new (DIEAlloc) DIELoc;
-    DIELocs.push_back(Loc);
+    Linker.DIELocs.push_back(Loc);
   } else {
     Block = new (DIEAlloc) DIEBlock;
-    DIEBlocks.push_back(Block);
+    Linker.DIEBlocks.push_back(Block);
   }
-  Attr = Loc ? static_cast<DIE *>(Loc) : static_cast<DIE *>(Block);
+  Attr = Loc ? static_cast<DIEValueList *>(Loc)
+             : static_cast<DIEValueList *>(Block);
 
   if (Loc)
     Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
@@ -1928,27 +2406,30 @@ unsigned DwarfLinker::cloneBlockAttribute(DIE &Die, AttributeSpec AttrSpec,
   // FIXME: If DIEBlock and DIELoc just reuses the Size field of
   // the DIE class, this if could be replaced by
   // Attr->setSize(Bytes.size()).
-  if (Streamer) {
+  if (Linker.Streamer) {
+    auto *AsmPrinter = &Linker.Streamer->getAsmPrinter();
     if (Loc)
-      Loc->ComputeSize(&Streamer->getAsmPrinter());
+      Loc->ComputeSize(AsmPrinter);
     else
-      Block->ComputeSize(&Streamer->getAsmPrinter());
+      Block->ComputeSize(AsmPrinter);
   }
   Die.addValue(DIEAlloc, Value);
   return AttrSize;
 }
 
-/// \brief Clone an address attribute and add it to \p Die.
-/// \returns the size of the new attribute.
-unsigned DwarfLinker::cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
-                                            const DWARFFormValue &Val,
-                                            const CompileUnit &Unit,
-                                            AttributesInfo &Info) {
+unsigned DwarfLinker::DIECloner::cloneAddressAttribute(
+    DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
+    const CompileUnit &Unit, AttributesInfo &Info) {
   uint64_t Addr = *Val.getAsAddress(&Unit.getOrigUnit());
   if (AttrSpec.Attr == dwarf::DW_AT_low_pc) {
     if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
         Die.getTag() == dwarf::DW_TAG_lexical_block)
-      Addr += Info.PCOffset;
+      // The low_pc of a block or inline subroutine might get
+      // relocated because it happens to match the low_pc of the
+      // enclosing subprogram. To prevent issues with that, always use
+      // the low_pc from the input DIE if relocations have been applied.
+      Addr = (Info.OrigLowPc != UINT64_MAX ? Info.OrigLowPc : Addr) +
+             Info.PCOffset;
     else if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
       Addr = Unit.getLowPc();
       if (Addr == UINT64_MAX)
@@ -1973,9 +2454,7 @@ unsigned DwarfLinker::cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
   return Unit.getOrigUnit().getAddressByteSize();
 }
 
-/// \brief Clone a scalar attribute  and add it to \p Die.
-/// \returns the size of the new attribute.
-unsigned DwarfLinker::cloneScalarAttribute(
+unsigned DwarfLinker::DIECloner::cloneScalarAttribute(
     DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
     AttributeSpec AttrSpec, const DWARFFormValue &Val, unsigned AttrSize,
     AttributesInfo &Info) {
@@ -1993,8 +2472,9 @@ unsigned DwarfLinker::cloneScalarAttribute(
   else if (auto OptionalValue = Val.getAsUnsignedConstant())
     Value = *OptionalValue;
   else {
-    reportWarning("Unsupported scalar attribute form. Dropping attribute.",
-                  &Unit.getOrigUnit(), &InputDIE);
+    Linker.reportWarning(
+        "Unsupported scalar attribute form. Dropping attribute.",
+        &Unit.getOrigUnit(), &InputDIE);
     return 0;
   }
   PatchLocation Patch =
@@ -2002,6 +2482,7 @@ unsigned DwarfLinker::cloneScalarAttribute(
                    dwarf::Form(AttrSpec.Form), DIEInteger(Value));
   if (AttrSpec.Attr == dwarf::DW_AT_ranges)
     Unit.noteRangeAttribute(Die, Patch);
+
   // A more generic way to check for location attributes would be
   // nice, but it's very unlikely that any other attribute needs a
   // location list.
@@ -2017,12 +2498,10 @@ unsigned DwarfLinker::cloneScalarAttribute(
 /// \brief Clone \p InputDIE's attribute described by \p AttrSpec with
 /// value \p Val, and add it to \p Die.
 /// \returns the size of the cloned attribute.
-unsigned DwarfLinker::cloneAttribute(DIE &Die,
-                                     const DWARFDebugInfoEntryMinimal &InputDIE,
-                                     CompileUnit &Unit,
-                                     const DWARFFormValue &Val,
-                                     const AttributeSpec AttrSpec,
-                                     unsigned AttrSize, AttributesInfo &Info) {
+unsigned DwarfLinker::DIECloner::cloneAttribute(
+    DIE &Die, const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
+    const DWARFFormValue &Val, const AttributeSpec AttrSpec, unsigned AttrSize,
+    AttributesInfo &Info) {
   const DWARFUnit &U = Unit.getOrigUnit();
 
   switch (AttrSpec.Form) {
@@ -2056,8 +2535,9 @@ unsigned DwarfLinker::cloneAttribute(DIE &Die,
     return cloneScalarAttribute(Die, InputDIE, Unit, AttrSpec, Val, AttrSize,
                                 Info);
   default:
-    reportWarning("Unsupported attribute form in cloneAttribute. Dropping.", &U,
-                  &InputDIE);
+    Linker.reportWarning(
+        "Unsupported attribute form in cloneAttribute. Dropping.", &U,
+        &InputDIE);
   }
 
   return 0;
@@ -2071,8 +2551,9 @@ unsigned DwarfLinker::cloneAttribute(DIE &Die,
 /// monotonic \p BaseOffset values.
 ///
 /// \returns wether any reloc has been applied.
-bool DwarfLinker::applyValidRelocs(MutableArrayRef<char> Data,
-                                   uint32_t BaseOffset, bool isLittleEndian) {
+bool DwarfLinker::RelocationManager::
+applyValidRelocs(MutableArrayRef<char> Data, uint32_t BaseOffset,
+                 bool isLittleEndian) {
   assert((NextValidReloc == 0 ||
           BaseOffset > ValidRelocs[NextValidReloc - 1].Offset) &&
          "BaseOffset should only be increasing.");
@@ -2140,16 +2621,33 @@ static bool isTypeTag(uint16_t Tag) {
   return false;
 }
 
-/// \brief Recursively clone \p InputDIE's subtrees that have been
-/// selected to appear in the linked output.
-///
-/// \param OutOffset is the Offset where the newly created DIE will
-/// lie in the linked compile unit.
-///
-/// \returns the cloned DIE object or null if nothing was selected.
-DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
-                           CompileUnit &Unit, int64_t PCOffset,
-                           uint32_t OutOffset) {
+static bool
+shouldSkipAttribute(DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
+                    uint16_t Tag, bool InDebugMap, bool SkipPC,
+                    bool InFunctionScope) {
+  switch (AttrSpec.Attr) {
+  default:
+    return false;
+  case dwarf::DW_AT_low_pc:
+  case dwarf::DW_AT_high_pc:
+  case dwarf::DW_AT_ranges:
+    return SkipPC;
+  case dwarf::DW_AT_location:
+  case dwarf::DW_AT_frame_base:
+    // FIXME: for some reason dsymutil-classic keeps the location
+    // attributes when they are of block type (ie. not location
+    // lists). This is totally wrong for globals where we will keep a
+    // wrong address. It is mostly harmless for locals, but there is
+    // no point in keeping these anyway when the function wasn't linked.
+    return (SkipPC || (!InFunctionScope && Tag == dwarf::DW_TAG_variable &&
+                       !InDebugMap)) &&
+           !DWARFFormValue(AttrSpec.Form).isFormClass(DWARFFormValue::FC_Block);
+  }
+}
+
+DIE *DwarfLinker::DIECloner::cloneDIE(
+    const DWARFDebugInfoEntryMinimal &InputDIE, CompileUnit &Unit,
+    int64_t PCOffset, uint32_t OutOffset, unsigned Flags) {
   DWARFUnit &U = Unit.getOrigUnit();
   unsigned Idx = U.getDIEIndex(&InputDIE);
   CompileUnit::DIEInfo &Info = Unit.getInfo(Idx);
@@ -2166,10 +2664,25 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
     Die = Info.Clone = DIE::get(DIEAlloc, dwarf::Tag(InputDIE.getTag()));
   assert(Die->getTag() == InputDIE.getTag());
   Die->setOffset(OutOffset);
+  if ((Unit.hasODR() || Unit.isClangModule()) &&
+      Die->getTag() != dwarf::DW_TAG_namespace && Info.Ctxt &&
+      Info.Ctxt != Unit.getInfo(Info.ParentIdx).Ctxt &&
+      !Info.Ctxt->getCanonicalDIEOffset()) {
+    // We are about to emit a DIE that is the root of its own valid
+    // DeclContext tree. Make the current offset the canonical offset
+    // for this context.
+    Info.Ctxt->setCanonicalDIEOffset(OutOffset + Unit.getStartOffset());
+  }
 
   // Extract and clone every attribute.
   DataExtractor Data = U.getDebugInfoExtractor();
-  uint32_t NextOffset = U.getDIEAtIndex(Idx + 1)->getOffset();
+  // Point to the next DIE (generally there is always at least a NULL
+  // entry after the current one). If this is a lone
+  // DW_TAG_compile_unit without any children, point to the next unit.
+  uint32_t NextOffset =
+    (Idx + 1 < U.getNumDIEs())
+    ? U.getDIEAtIndex(Idx + 1)->getOffset()
+    : U.getNextUnitOffset();
   AttributesInfo AttrInfo;
 
   // We could copy the data only if we need to aply a relocation to
@@ -2178,7 +2691,7 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
   SmallString<40> DIECopy(Data.getData().substr(Offset, NextOffset - Offset));
   Data = DataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
   // Modify the copy with relocated addresses.
-  if (applyValidRelocs(DIECopy, Offset, Data.isLittleEndian())) {
+  if (RelocMgr.applyValidRelocs(DIECopy, Offset, Data.isLittleEndian())) {
     // If we applied relocations, we store the value of high_pc that was
     // potentially stored in the input DIE. If high_pc is an address
     // (Dwarf version == 2), then it might have been relocated to a
@@ -2188,6 +2701,11 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
     // high_pc value is done in cloneAddressAttribute().
     AttrInfo.OrigHighPc =
         InputDIE.getAttributeValueAsAddress(&U, dwarf::DW_AT_high_pc, 0);
+    // Also store the low_pc. It might get relocated in an
+    // inline_subprogram that happens at the beginning of its
+    // inlining function.
+    AttrInfo.OrigLowPc =
+        InputDIE.getAttributeValueAsAddress(&U, dwarf::DW_AT_low_pc, UINT64_MAX);
   }
 
   // Reset the Offset to 0 as we will be working on the local copy of
@@ -2202,7 +2720,27 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
     PCOffset = Info.AddrAdjust;
   AttrInfo.PCOffset = PCOffset;
 
+  if (Abbrev->getTag() == dwarf::DW_TAG_subprogram) {
+    Flags |= TF_InFunctionScope;
+    if (!Info.InDebugMap)
+      Flags |= TF_SkipPC;
+  }
+
+  bool Copied = false;
   for (const auto &AttrSpec : Abbrev->attributes()) {
+    if (shouldSkipAttribute(AttrSpec, Die->getTag(), Info.InDebugMap,
+                            Flags & TF_SkipPC, Flags & TF_InFunctionScope)) {
+      DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset, &U);
+      // FIXME: dsymutil-classic keeps the old abbreviation around
+      // even if it's not used. We can remove this (and the copyAbbrev
+      // helper) as soon as bit-for-bit compatibility is not a goal anymore.
+      if (!Copied) {
+        copyAbbrev(*InputDIE.getAbbreviationDeclarationPtr(), Unit.hasODR());
+        Copied = true;
+      }
+      continue;
+    }
+
     DWARFFormValue Val(AttrSpec.Form);
     uint32_t AttrSize = Offset;
     Val.extractValue(Data, &Offset, &U);
@@ -2232,20 +2770,28 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
     Unit.addTypeAccelerator(Die, AttrInfo.Name, AttrInfo.NameOffset);
   }
 
+  // Determine whether there are any children that we want to keep.
+  bool HasChildren = false;
+  for (auto *Child = InputDIE.getFirstChild(); Child && !Child->isNULL();
+       Child = Child->getSibling()) {
+    unsigned Idx = U.getDIEIndex(Child);
+    if (Unit.getInfo(Idx).Keep) {
+      HasChildren = true;
+      break;
+    }
+  }
+
   DIEAbbrev NewAbbrev = Die->generateAbbrev();
-  // If a scope DIE is kept, we must have kept at least one child. If
-  // it's not the case, we'll just be emitting one wasteful end of
-  // children marker, but things won't break.
-  if (InputDIE.hasChildren())
+  if (HasChildren)
     NewAbbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
   // Assign a permanent abbrev number
-  AssignAbbrev(NewAbbrev);
+  Linker.AssignAbbrev(NewAbbrev);
   Die->setAbbrevNumber(NewAbbrev.getNumber());
 
   // Add the size of the abbreviation number to the output offset.
   OutOffset += getULEB128Size(Die->getAbbrevNumber());
 
-  if (!Abbrev->hasChildren()) {
+  if (!HasChildren) {
     // Update our size.
     Die->setSize(OutOffset - Die->getOffset());
     return Die;
@@ -2254,7 +2800,7 @@ DIE *DwarfLinker::cloneDIE(const DWARFDebugInfoEntryMinimal &InputDIE,
   // Recursively clone children.
   for (auto *Child = InputDIE.getFirstChild(); Child && !Child->isNULL();
        Child = Child->getSibling()) {
-    if (DIE *Clone = cloneDIE(*Child, Unit, PCOffset, OutOffset)) {
+    if (DIE *Clone = cloneDIE(*Child, Unit, PCOffset, OutOffset, Flags)) {
       Die->addChild(Clone);
       OutOffset = Clone->getOffset() + Clone->getSize();
     }
@@ -2283,7 +2829,7 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
   uint64_t OrigLowPc = OrigUnitDie->getAttributeValueAsAddress(
       &OrigUnit, dwarf::DW_AT_low_pc, -1ULL);
   // Ranges addresses are based on the unit's low_pc. Compute the
-  // offset we need to apply to adapt to the the new unit's low_pc.
+  // offset we need to apply to adapt to the new unit's low_pc.
   int64_t UnitPcOffset = 0;
   if (OrigLowPc != -1ULL)
     UnitPcOffset = int64_t(OrigLowPc) - Unit.getLowPc();
@@ -2293,15 +2839,18 @@ void DwarfLinker::patchRangesForUnit(const CompileUnit &Unit,
     RangeAttribute.set(Streamer->getRangesSectionSize());
     RangeList.extract(RangeExtractor, &Offset);
     const auto &Entries = RangeList.getEntries();
-    const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
+    if (!Entries.empty()) {
+      const DWARFDebugRangeList::RangeListEntry &First = Entries.front();
 
-    if (CurrRange == InvalidRange || First.StartAddress < CurrRange.start() ||
-        First.StartAddress >= CurrRange.stop()) {
-      CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc);
       if (CurrRange == InvalidRange ||
-          CurrRange.start() > First.StartAddress + OrigLowPc) {
-        reportWarning("no mapping for range.");
-        continue;
+          First.StartAddress + OrigLowPc < CurrRange.start() ||
+          First.StartAddress + OrigLowPc >= CurrRange.stop()) {
+        CurrRange = FunctionRanges.find(First.StartAddress + OrigLowPc);
+        if (CurrRange == InvalidRange ||
+            CurrRange.start() > First.StartAddress + OrigLowPc) {
+          reportWarning("no mapping for range.");
+          continue;
+        }
       }
     }
 
@@ -2451,12 +3000,13 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
       if (StopAddress != -1ULL && !Seq.empty()) {
         // Insert end sequence row with the computed end address, but
         // the same line as the previous one.
-        Seq.emplace_back(Seq.back());
-        Seq.back().Address = StopAddress;
-        Seq.back().EndSequence = 1;
-        Seq.back().PrologueEnd = 0;
-        Seq.back().BasicBlock = 0;
-        Seq.back().EpilogueBegin = 0;
+        auto NextLine = Seq.back();
+        NextLine.Address = StopAddress;
+        NextLine.EndSequence = 1;
+        NextLine.PrologueEnd = 0;
+        NextLine.BasicBlock = 0;
+        NextLine.EpilogueBegin = 0;
+        Seq.push_back(NextLine);
         insertLineSequence(Seq, NewRows);
       }
 
@@ -2484,13 +3034,18 @@ void DwarfLinker::patchLineTableForUnit(CompileUnit &Unit,
   // table emitter.
   if (LineTable.Prologue.Version != 2 ||
       LineTable.Prologue.DefaultIsStmt != DWARF2_LINE_DEFAULT_IS_STMT ||
-      LineTable.Prologue.LineBase != -5 || LineTable.Prologue.LineRange != 14 ||
-      LineTable.Prologue.OpcodeBase != 13)
+      LineTable.Prologue.OpcodeBase > 13)
     reportWarning("line table paramters mismatch. Cannot emit.");
-  else
-    Streamer->emitLineTableForUnit(LineData.slice(StmtList + 4, PrologueEnd),
+  else {
+    MCDwarfLineTableParams Params;
+    Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
+    Params.DWARF2LineBase = LineTable.Prologue.LineBase;
+    Params.DWARF2LineRange = LineTable.Prologue.LineRange;
+    Streamer->emitLineTableForUnit(Params,
+                                   LineData.slice(StmtList + 4, PrologueEnd),
                                    LineTable.Prologue.MinInstLength, NewRows,
                                    Unit.getOrigUnit().getAddressByteSize());
+  }
 }
 
 void DwarfLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
@@ -2583,33 +3138,211 @@ void DwarfLinker::patchFrameInfoForObject(const DebugMapObject &DMO,
   }
 }
 
-bool DwarfLinker::link(const DebugMap &Map) {
+void DwarfLinker::DIECloner::copyAbbrev(
+    const DWARFAbbreviationDeclaration &Abbrev, bool hasODR) {
+  DIEAbbrev Copy(dwarf::Tag(Abbrev.getTag()),
+                 dwarf::Form(Abbrev.hasChildren()));
 
-  if (Map.begin() == Map.end()) {
-    errs() << "Empty debug map.\n";
-    return false;
+  for (const auto &Attr : Abbrev.attributes()) {
+    uint16_t Form = Attr.Form;
+    if (hasODR && isODRAttribute(Attr.Attr))
+      Form = dwarf::DW_FORM_ref_addr;
+    Copy.AddAttribute(dwarf::Attribute(Attr.Attr), dwarf::Form(Form));
   }
 
+  Linker.AssignAbbrev(Copy);
+}
+
+static uint64_t getDwoId(const DWARFDebugInfoEntryMinimal &CUDie,
+                         const DWARFUnit &Unit) {
+  uint64_t DwoId =
+      CUDie.getAttributeValueAsUnsignedConstant(&Unit, dwarf::DW_AT_dwo_id, 0);
+  if (!DwoId)
+    DwoId = CUDie.getAttributeValueAsUnsignedConstant(&Unit,
+                                                      dwarf::DW_AT_GNU_dwo_id, 0);
+  return DwoId;
+}
+
+bool DwarfLinker::registerModuleReference(
+    const DWARFDebugInfoEntryMinimal &CUDie, const DWARFUnit &Unit,
+    DebugMap &ModuleMap, unsigned Indent) {
+  std::string PCMfile =
+      CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_dwo_name, "");
+  if (PCMfile.empty())
+    PCMfile =
+        CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_GNU_dwo_name, "");
+  if (PCMfile.empty())
+    return false;
+
+  // Clang module DWARF skeleton CUs abuse this for the path to the module.
+  std::string PCMpath =
+      CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_comp_dir, "");
+  uint64_t DwoId = getDwoId(CUDie, Unit);
+
+  std::string Name =
+      CUDie.getAttributeValueAsString(&Unit, dwarf::DW_AT_name, "");
+  if (Name.empty()) {
+    reportWarning("Anonymous module skeleton CU for " + PCMfile);
+    return true;
+  }
+
+  if (Options.Verbose) {
+    outs().indent(Indent);
+    outs() << "Found clang module reference " << PCMfile;
+  }
+
+  auto Cached = ClangModules.find(PCMfile);
+  if (Cached != ClangModules.end()) {
+    if (Cached->second != DwoId)
+      reportWarning(Twine("hash mismatch: this object file was built against a "
+                          "different version of the module ") + PCMfile);
+    if (Options.Verbose)
+      outs() << " [cached].\n";
+    return true;
+  }
+  if (Options.Verbose)
+    outs() << " ...\n";
+
+  // Cyclic dependencies are disallowed by Clang, but we still
+  // shouldn't run into an infinite loop, so mark it as processed now.
+  ClangModules.insert({PCMfile, DwoId});
+  loadClangModule(PCMfile, PCMpath, Name, DwoId, ModuleMap, Indent + 2);
+  return true;
+}
+
+ErrorOr<const object::ObjectFile &>
+DwarfLinker::loadObject(BinaryHolder &BinaryHolder, DebugMapObject &Obj,
+                        const DebugMap &Map) {
+  auto ErrOrObjs =
+      BinaryHolder.GetObjectFiles(Obj.getObjectFilename(), Obj.getTimestamp());
+  if (std::error_code EC = ErrOrObjs.getError()) {
+    reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message());
+    return EC;
+  }
+  auto ErrOrObj = BinaryHolder.Get(Map.getTriple());
+  if (std::error_code EC = ErrOrObj.getError())
+    reportWarning(Twine(Obj.getObjectFilename()) + ": " + EC.message());
+  return ErrOrObj;
+}
+
+void DwarfLinker::loadClangModule(StringRef Filename, StringRef ModulePath,
+                                  StringRef ModuleName, uint64_t DwoId,
+                                  DebugMap &ModuleMap, unsigned Indent) {
+  SmallString<80> Path(Options.PrependPath);
+  if (sys::path::is_relative(Filename))
+    sys::path::append(Path, ModulePath, Filename);
+  else
+    sys::path::append(Path, Filename);
+  BinaryHolder ObjHolder(Options.Verbose);
+  auto &Obj =
+      ModuleMap.addDebugMapObject(Path, sys::TimeValue::PosixZeroTime());
+  auto ErrOrObj = loadObject(ObjHolder, Obj, ModuleMap);
+  if (!ErrOrObj) {
+    ClangModules.erase(ClangModules.find(Filename));
+    return;
+  }
+
+  std::unique_ptr<CompileUnit> Unit;
+
+  // Setup access to the debug info.
+  DWARFContextInMemory DwarfContext(*ErrOrObj);
+  RelocationManager RelocMgr(*this);
+  for (const auto &CU : DwarfContext.compile_units()) {
+    auto *CUDie = CU->getUnitDIE(false);
+    // Recursively get all modules imported by this one.
+    if (!registerModuleReference(*CUDie, *CU, ModuleMap, Indent)) {
+      if (Unit) {
+        errs() << Filename << ": Clang modules are expected to have exactly"
+               << " 1 compile unit.\n";
+        exitDsymutil(1);
+      }
+      if (getDwoId(*CUDie, *CU) != DwoId)
+        reportWarning(
+            Twine("hash mismatch: this object file was built against a "
+                  "different version of the module ") + Filename);
+
+      // Add this module.
+      Unit = llvm::make_unique<CompileUnit>(*CU, UnitID++, !Options.NoODR,
+                                            ModuleName);
+      Unit->setHasInterestingContent();
+      analyzeContextInfo(CUDie, 0, *Unit, &ODRContexts.getRoot(), StringPool,
+                         ODRContexts);
+      // Keep everything.
+      Unit->markEverythingAsKept();
+    }
+  }
+  if (Options.Verbose) {
+    outs().indent(Indent);
+    outs() << "cloning .debug_info from " << Filename << "\n";
+  }
+
+  DIECloner(*this, RelocMgr, DIEAlloc, MutableArrayRef<CompileUnit>(*Unit),
+            Options)
+      .cloneAllCompileUnits(DwarfContext);
+}
+
+void DwarfLinker::DIECloner::cloneAllCompileUnits(
+    DWARFContextInMemory &DwarfContext) {
+  if (!Linker.Streamer)
+    return;
+
+  for (auto &CurrentUnit : CompileUnits) {
+    const auto *InputDIE = CurrentUnit.getOrigUnit().getUnitDIE();
+    CurrentUnit.setStartOffset(Linker.OutputDebugInfoSize);
+    DIE *OutputDIE = cloneDIE(*InputDIE, CurrentUnit, 0 /* PC offset */,
+                              11 /* Unit Header size */, 0);
+    CurrentUnit.setOutputUnitDIE(OutputDIE);
+    Linker.OutputDebugInfoSize = CurrentUnit.computeNextUnitOffset();
+    if (Linker.Options.NoOutput)
+      continue;
+    // FIXME: for compatibility with the classic dsymutil, we emit
+    // an empty line table for the unit, even if the unit doesn't
+    // actually exist in the DIE tree.
+    Linker.patchLineTableForUnit(CurrentUnit, DwarfContext);
+    if (!OutputDIE)
+      continue;
+    Linker.patchRangesForUnit(CurrentUnit, DwarfContext);
+    Linker.Streamer->emitLocationsForUnit(CurrentUnit, DwarfContext);
+    Linker.emitAcceleratorEntriesForUnit(CurrentUnit);
+  }
+
+  if (Linker.Options.NoOutput)
+    return;
+
+  // Emit all the compile unit's debug information.
+  for (auto &CurrentUnit : CompileUnits) {
+    Linker.generateUnitRanges(CurrentUnit);
+    CurrentUnit.fixupForwardReferences();
+    Linker.Streamer->emitCompileUnitHeader(CurrentUnit);
+    if (!CurrentUnit.getOutputUnitDIE())
+      continue;
+    Linker.Streamer->emitDIE(*CurrentUnit.getOutputUnitDIE());
+  }
+}
+
+bool DwarfLinker::link(const DebugMap &Map) {
+
   if (!createStreamer(Map.getTriple(), OutputFilename))
     return false;
 
   // Size of the DIEs (and headers) generated for the linked output.
-  uint64_t OutputDebugInfoSize = 0;
+  OutputDebugInfoSize = 0;
   // A unique ID that identifies each compile unit.
-  unsigned UnitID = 0;
+  UnitID = 0;
+  DebugMap ModuleMap(Map.getTriple(), Map.getBinaryPath());
+
   for (const auto &Obj : Map.objects()) {
     CurrentDebugObject = Obj.get();
 
     if (Options.Verbose)
       outs() << "DEBUG MAP OBJECT: " << Obj->getObjectFilename() << "\n";
-    auto ErrOrObj = BinHolder.GetObjectFile(Obj->getObjectFilename());
-    if (std::error_code EC = ErrOrObj.getError()) {
-      reportWarning(Twine(Obj->getObjectFilename()) + ": " + EC.message());
+    auto ErrOrObj = loadObject(BinHolder, *Obj, Map);
+    if (!ErrOrObj)
       continue;
-    }
 
     // Look for relocations that correspond to debug map entries.
-    if (!findValidRelocsInDebugInfo(*ErrOrObj, *Obj)) {
+    RelocationManager RelocMgr(*this);
+    if (!RelocMgr.findValidRelocsInDebugInfo(*ErrOrObj, *Obj)) {
       if (Options.Verbose)
         outs() << "No valid relocations found. Skipping.\n";
       continue;
@@ -2619,68 +3352,40 @@ bool DwarfLinker::link(const DebugMap &Map) {
     DWARFContextInMemory DwarfContext(*ErrOrObj);
     startDebugObject(DwarfContext, *Obj);
 
-    // In a first phase, just read in the debug info and store the DIE
-    // parent links that we will use during the next phase.
+    // In a first phase, just read in the debug info and load all clang modules.
     for (const auto &CU : DwarfContext.compile_units()) {
       auto *CUDie = CU->getUnitDIE(false);
       if (Options.Verbose) {
         outs() << "Input compilation unit:";
         CUDie->dump(outs(), CU.get(), 0);
       }
-      Units.emplace_back(*CU, UnitID++);
-      gatherDIEParents(CUDie, 0, Units.back());
+
+      if (!registerModuleReference(*CUDie, *CU, ModuleMap))
+        Units.emplace_back(*CU, UnitID++, !Options.NoODR, "");
     }
 
+    // Now build the DIE parent links that we will use during the next phase.
+    for (auto &CurrentUnit : Units)
+      analyzeContextInfo(CurrentUnit.getOrigUnit().getUnitDIE(), 0, CurrentUnit,
+                         &ODRContexts.getRoot(), StringPool, ODRContexts);
+
     // Then mark all the DIEs that need to be present in the linked
     // output and collect some information about them. Note that this
     // loop can not be merged with the previous one becaue cross-cu
     // references require the ParentIdx to be setup for every CU in
     // the object file before calling this.
     for (auto &CurrentUnit : Units)
-      lookForDIEsToKeep(*CurrentUnit.getOrigUnit().getUnitDIE(), *Obj,
+      lookForDIEsToKeep(RelocMgr, *CurrentUnit.getOrigUnit().getUnitDIE(), *Obj,
                         CurrentUnit, 0);
 
     // The calls to applyValidRelocs inside cloneDIE will walk the
     // reloc array again (in the same way findValidRelocsInDebugInfo()
     // did). We need to reset the NextValidReloc index to the beginning.
-    NextValidReloc = 0;
-
-    // Construct the output DIE tree by cloning the DIEs we chose to
-    // keep above. If there are no valid relocs, then there's nothing
-    // to clone/emit.
-    if (!ValidRelocs.empty())
-      for (auto &CurrentUnit : Units) {
-        const auto *InputDIE = CurrentUnit.getOrigUnit().getUnitDIE();
-        CurrentUnit.setStartOffset(OutputDebugInfoSize);
-        DIE *OutputDIE = cloneDIE(*InputDIE, CurrentUnit, 0 /* PCOffset */,
-                                  11 /* Unit Header size */);
-        CurrentUnit.setOutputUnitDIE(OutputDIE);
-        OutputDebugInfoSize = CurrentUnit.computeNextUnitOffset();
-        if (Options.NoOutput)
-          continue;
-        // FIXME: for compatibility with the classic dsymutil, we emit
-        // an empty line table for the unit, even if the unit doesn't
-        // actually exist in the DIE tree.
-        patchLineTableForUnit(CurrentUnit, DwarfContext);
-        if (!OutputDIE)
-          continue;
-        patchRangesForUnit(CurrentUnit, DwarfContext);
-        Streamer->emitLocationsForUnit(CurrentUnit, DwarfContext);
-        emitAcceleratorEntriesForUnit(CurrentUnit);
-      }
-
-    // Emit all the compile unit's debug information.
-    if (!ValidRelocs.empty() && !Options.NoOutput)
-      for (auto &CurrentUnit : Units) {
-        generateUnitRanges(CurrentUnit);
-        CurrentUnit.fixupForwardReferences();
-        Streamer->emitCompileUnitHeader(CurrentUnit);
-        if (!CurrentUnit.getOutputUnitDIE())
-          continue;
-        Streamer->emitDIE(*CurrentUnit.getOutputUnitDIE());
-      }
-
-    if (!ValidRelocs.empty() && !Options.NoOutput && !Units.empty())
+    RelocMgr.resetValidRelocs();
+    if (RelocMgr.hasValidRelocs())
+      DIECloner(*this, RelocMgr, DIEAlloc, Units, Options)
+          .cloneAllCompileUnits(DwarfContext);
+    if (!Options.NoOutput && !Units.empty())
       patchFrameInfoForObject(*Obj, DwarfContext,
                               Units[0].getOrigUnit().getAddressByteSize());
 
@@ -2694,10 +3399,55 @@ bool DwarfLinker::link(const DebugMap &Map) {
     Streamer->emitStrings(StringPool);
   }
 
-  return Options.NoOutput ? true : Streamer->finish();
+  return Options.NoOutput ? true : Streamer->finish(Map);
 }
 }
 
+/// \brief Get the offset of string \p S in the string table. This
+/// can insert a new element or return the offset of a preexisitng
+/// one.
+uint32_t NonRelocatableStringpool::getStringOffset(StringRef S) {
+  if (S.empty() && !Strings.empty())
+    return 0;
+
+  std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
+  MapTy::iterator It;
+  bool Inserted;
+
+  // A non-empty string can't be at offset 0, so if we have an entry
+  // with a 0 offset, it must be a previously interned string.
+  std::tie(It, Inserted) = Strings.insert(std::make_pair(S, Entry));
+  if (Inserted || It->getValue().first == 0) {
+    // Set offset and chain at the end of the entries list.
+    It->getValue().first = CurrentEndOffset;
+    CurrentEndOffset += S.size() + 1; // +1 for the '\0'.
+    Last->getValue().second = &*It;
+    Last = &*It;
+  }
+  return It->getValue().first;
+}
+
+/// \brief Put \p S into the StringMap so that it gets permanent
+/// storage, but do not actually link it in the chain of elements
+/// that go into the output section. A latter call to
+/// getStringOffset() with the same string will chain it though.
+StringRef NonRelocatableStringpool::internString(StringRef S) {
+  std::pair<uint32_t, StringMapEntryBase *> Entry(0, nullptr);
+  auto InsertResult = Strings.insert(std::make_pair(S, Entry));
+  return InsertResult.first->getKey();
+}
+
+void warn(const Twine &Warning, const Twine &Context) {
+  errs() << Twine("while processing ") + Context + ":\n";
+  errs() << Twine("warning: ") + Warning + "\n";
+}
+
+bool error(const Twine &Error, const Twine &Context) {
+  errs() << Twine("while processing ") + Context + ":\n";
+  errs() << Twine("error: ") + Error + "\n";
+  return false;
+}
+
 bool linkDwarf(StringRef OutputFilename, const DebugMap &DM,
                const LinkOptions &Options) {
   DwarfLinker Linker(OutputFilename, Options);
diff --git a/tools/dsymutil/MachODebugMapParser.cpp b/tools/dsymutil/MachODebugMapParser.cpp
index 6c9fa9b51325..4412db25426c 100644
--- a/tools/dsymutil/MachODebugMapParser.cpp
+++ b/tools/dsymutil/MachODebugMapParser.cpp
@@ -10,6 +10,7 @@
 #include "BinaryHolder.h"
 #include "DebugMap.h"
 #include "dsymutil.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -21,19 +22,25 @@ using namespace llvm::object;
 
 class MachODebugMapParser {
 public:
-  MachODebugMapParser(StringRef BinaryPath, StringRef PathPrefix = "",
-                      bool Verbose = false)
-      : BinaryPath(BinaryPath), PathPrefix(PathPrefix),
-        MainBinaryHolder(Verbose), CurrentObjectHolder(Verbose),
-        CurrentDebugMapObject(nullptr) {}
+  MachODebugMapParser(StringRef BinaryPath, ArrayRef<std::string> Archs,
+                      StringRef PathPrefix = "", bool Verbose = false)
+      : BinaryPath(BinaryPath), Archs(Archs.begin(), Archs.end()),
+        PathPrefix(PathPrefix), MainBinaryHolder(Verbose),
+        CurrentObjectHolder(Verbose), CurrentDebugMapObject(nullptr) {}
 
-  /// \brief Parses and returns the DebugMap of the input binary.
+  /// \brief Parses and returns the DebugMaps of the input binary.
+  /// The binary contains multiple maps in case it is a universal
+  /// binary.
   /// \returns an error in case the provided BinaryPath doesn't exist
   /// or isn't of a supported type.
-  ErrorOr<std::unique_ptr<DebugMap>> parse();
+  ErrorOr<std::vector<std::unique_ptr<DebugMap>>> parse();
+
+  /// Walk the symbol table and dump it.
+  bool dumpStab();
 
 private:
   std::string BinaryPath;
+  SmallVector<StringRef, 1> Archs;
   std::string PathPrefix;
 
   /// Owns the MemoryBuffer for the main binary.
@@ -47,7 +54,7 @@ private:
   /// Owns the MemoryBuffer for the currently handled object file.
   BinaryHolder CurrentObjectHolder;
   /// Map of the currently processed object file symbol addresses.
-  StringMap<uint64_t> CurrentObjectAddresses;
+  StringMap<Optional<uint64_t>> CurrentObjectAddresses;
   /// Element of the debug map corresponfing to the current object file.
   DebugMapObject *CurrentDebugMapObject;
 
@@ -55,11 +62,14 @@ private:
   const char *CurrentFunctionName;
   uint64_t CurrentFunctionAddress;
 
-  void switchToNewDebugMapObject(StringRef Filename);
+  std::unique_ptr<DebugMap> parseOneBinary(const MachOObjectFile &MainBinary,
+                                           StringRef BinaryPath);
+
+  void switchToNewDebugMapObject(StringRef Filename, sys::TimeValue Timestamp);
   void resetParserState();
   uint64_t getMainBinarySymbolAddress(StringRef Name);
-  void loadMainBinarySymbols();
-  void loadCurrentObjectFileSymbols();
+  void loadMainBinarySymbols(const MachOObjectFile &MainBinary);
+  void loadCurrentObjectFileSymbols(const object::MachOObjectFile &Obj);
   void handleStabSymbolTableEntry(uint32_t StringIndex, uint8_t Type,
                                   uint8_t SectionIndex, uint16_t Flags,
                                   uint64_t Value);
@@ -68,10 +78,26 @@ private:
     handleStabSymbolTableEntry(STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc,
                                STE.n_value);
   }
+
+  /// Dump the symbol table output header.
+  void dumpSymTabHeader(raw_ostream &OS, StringRef Arch);
+
+  /// Dump the contents of nlist entries.
+  void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, uint32_t StringIndex,
+                       uint8_t Type, uint8_t SectionIndex, uint16_t Flags,
+                       uint64_t Value);
+
+  template <typename STEType>
+  void dumpSymTabEntry(raw_ostream &OS, uint64_t Index, const STEType &STE) {
+    dumpSymTabEntry(OS, Index, STE.n_strx, STE.n_type, STE.n_sect, STE.n_desc,
+                    STE.n_value);
+  }
+  void dumpOneBinaryStab(const MachOObjectFile &MainBinary,
+                         StringRef BinaryPath);
 };
 
 static void Warning(const Twine &Msg) { errs() << "warning: " + Msg + "\n"; }
-}
+} // anonymous namespace
 
 /// Reset the parser state coresponding to the current object
 /// file. This is to be called after an object file is finished
@@ -84,41 +110,44 @@ void MachODebugMapParser::resetParserState() {
 /// Create a new DebugMapObject. This function resets the state of the
 /// parser that was referring to the last object file and sets
 /// everything up to add symbols to the new one.
-void MachODebugMapParser::switchToNewDebugMapObject(StringRef Filename) {
+void MachODebugMapParser::switchToNewDebugMapObject(StringRef Filename,
+                                                    sys::TimeValue Timestamp) {
   resetParserState();
 
   SmallString<80> Path(PathPrefix);
   sys::path::append(Path, Filename);
 
-  auto MachOOrError = CurrentObjectHolder.GetFileAs<MachOObjectFile>(Path);
+  auto MachOOrError =
+      CurrentObjectHolder.GetFilesAs<MachOObjectFile>(Path, Timestamp);
   if (auto Error = MachOOrError.getError()) {
     Warning(Twine("cannot open debug object \"") + Path.str() + "\": " +
             Error.message() + "\n");
     return;
   }
 
-  loadCurrentObjectFileSymbols();
-  CurrentDebugMapObject = &Result->addDebugMapObject(Path);
+  auto ErrOrAchObj =
+      CurrentObjectHolder.GetAs<MachOObjectFile>(Result->getTriple());
+  if (auto Err = ErrOrAchObj.getError()) {
+    return Warning(Twine("cannot open debug object \"") + Path.str() + "\": " +
+                   Err.message() + "\n");
+  }
+
+  CurrentDebugMapObject = &Result->addDebugMapObject(Path, Timestamp);
+  loadCurrentObjectFileSymbols(*ErrOrAchObj);
 }
 
-static Triple getTriple(const object::MachOObjectFile &Obj) {
-  Triple TheTriple("unknown-unknown-unknown");
-  TheTriple.setArch(Triple::ArchType(Obj.getArch()));
-  TheTriple.setObjectFormat(Triple::MachO);
-  return TheTriple;
+static std::string getArchName(const object::MachOObjectFile &Obj) {
+  Triple ThumbTriple;
+  Triple T = Obj.getArch(nullptr, &ThumbTriple);
+  return T.getArchName();
 }
 
-/// This main parsing routine tries to open the main binary and if
-/// successful iterates over the STAB entries. The real parsing is
-/// done in handleStabSymbolTableEntry.
-ErrorOr<std::unique_ptr<DebugMap>> MachODebugMapParser::parse() {
-  auto MainBinOrError = MainBinaryHolder.GetFileAs<MachOObjectFile>(BinaryPath);
-  if (auto Error = MainBinOrError.getError())
-    return Error;
-
-  const MachOObjectFile &MainBinary = *MainBinOrError;
-  loadMainBinarySymbols();
-  Result = make_unique<DebugMap>(getTriple(MainBinary));
+std::unique_ptr<DebugMap>
+MachODebugMapParser::parseOneBinary(const MachOObjectFile &MainBinary,
+                                    StringRef BinaryPath) {
+  loadMainBinarySymbols(MainBinary);
+  Result =
+      make_unique<DebugMap>(BinaryHolder::getTriple(MainBinary), BinaryPath);
   MainBinaryStrings = MainBinary.getStringTableData();
   for (const SymbolRef &Symbol : MainBinary.symbols()) {
     const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
@@ -132,6 +161,179 @@ ErrorOr<std::unique_ptr<DebugMap>> MachODebugMapParser::parse() {
   return std::move(Result);
 }
 
+// Table that maps Darwin's Mach-O stab constants to strings to allow printing.
+// llvm-nm has very similar code, the strings used here are however slightly
+// different and part of the interface of dsymutil (some project's build-systems
+// parse the ouptut of dsymutil -s), thus they shouldn't be changed.
+struct DarwinStabName {
+  uint8_t NType;
+  const char *Name;
+};
+
+static const struct DarwinStabName DarwinStabNames[] = {
+    {MachO::N_GSYM, "N_GSYM"},    {MachO::N_FNAME, "N_FNAME"},
+    {MachO::N_FUN, "N_FUN"},      {MachO::N_STSYM, "N_STSYM"},
+    {MachO::N_LCSYM, "N_LCSYM"},  {MachO::N_BNSYM, "N_BNSYM"},
+    {MachO::N_PC, "N_PC"},        {MachO::N_AST, "N_AST"},
+    {MachO::N_OPT, "N_OPT"},      {MachO::N_RSYM, "N_RSYM"},
+    {MachO::N_SLINE, "N_SLINE"},  {MachO::N_ENSYM, "N_ENSYM"},
+    {MachO::N_SSYM, "N_SSYM"},    {MachO::N_SO, "N_SO"},
+    {MachO::N_OSO, "N_OSO"},      {MachO::N_LSYM, "N_LSYM"},
+    {MachO::N_BINCL, "N_BINCL"},  {MachO::N_SOL, "N_SOL"},
+    {MachO::N_PARAMS, "N_PARAM"}, {MachO::N_VERSION, "N_VERS"},
+    {MachO::N_OLEVEL, "N_OLEV"},  {MachO::N_PSYM, "N_PSYM"},
+    {MachO::N_EINCL, "N_EINCL"},  {MachO::N_ENTRY, "N_ENTRY"},
+    {MachO::N_LBRAC, "N_LBRAC"},  {MachO::N_EXCL, "N_EXCL"},
+    {MachO::N_RBRAC, "N_RBRAC"},  {MachO::N_BCOMM, "N_BCOMM"},
+    {MachO::N_ECOMM, "N_ECOMM"},  {MachO::N_ECOML, "N_ECOML"},
+    {MachO::N_LENG, "N_LENG"},    {0, nullptr}};
+
+static const char *getDarwinStabString(uint8_t NType) {
+  for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
+    if (DarwinStabNames[i].NType == NType)
+      return DarwinStabNames[i].Name;
+  }
+  return nullptr;
+}
+
+void MachODebugMapParser::dumpSymTabHeader(raw_ostream &OS, StringRef Arch) {
+  OS << "-----------------------------------"
+        "-----------------------------------\n";
+  OS << "Symbol table for: '" << BinaryPath << "' (" << Arch.data() << ")\n";
+  OS << "-----------------------------------"
+        "-----------------------------------\n";
+  OS << "Index    n_strx   n_type             n_sect n_desc n_value\n";
+  OS << "======== -------- ------------------ ------ ------ ----------------\n";
+}
+
+void MachODebugMapParser::dumpSymTabEntry(raw_ostream &OS, uint64_t Index,
+                                          uint32_t StringIndex, uint8_t Type,
+                                          uint8_t SectionIndex, uint16_t Flags,
+                                          uint64_t Value) {
+  // Index
+  OS << '[' << format_decimal(Index, 6) << "] "
+     // n_strx
+     << format_hex_no_prefix(StringIndex, 8) << ' '
+     // n_type...
+     << format_hex_no_prefix(Type, 2) << " (";
+
+  if (Type & MachO::N_STAB)
+    OS << left_justify(getDarwinStabString(Type), 13);
+  else {
+    if (Type & MachO::N_PEXT)
+      OS << "PEXT ";
+    else
+      OS << "     ";
+    switch (Type & MachO::N_TYPE) {
+    case MachO::N_UNDF: // 0x0 undefined, n_sect == NO_SECT
+      OS << "UNDF";
+      break;
+    case MachO::N_ABS: // 0x2 absolute, n_sect == NO_SECT
+      OS << "ABS ";
+      break;
+    case MachO::N_SECT: // 0xe defined in section number n_sect
+      OS << "SECT";
+      break;
+    case MachO::N_PBUD: // 0xc prebound undefined (defined in a dylib)
+      OS << "PBUD";
+      break;
+    case MachO::N_INDR: // 0xa indirect
+      OS << "INDR";
+      break;
+    default:
+      OS << format_hex_no_prefix(Type, 2) << "    ";
+      break;
+    }
+    if (Type & MachO::N_EXT)
+      OS << " EXT";
+    else
+      OS << "    ";
+  }
+
+  OS << ") "
+     // n_sect
+     << format_hex_no_prefix(SectionIndex, 2) << "     "
+     // n_desc
+     << format_hex_no_prefix(Flags, 4) << "   "
+     // n_value
+     << format_hex_no_prefix(Value, 16);
+
+  const char *Name = &MainBinaryStrings.data()[StringIndex];
+  if (Name && Name[0])
+    OS << " '" << Name << "'";
+
+  OS << "\n";
+}
+
+void MachODebugMapParser::dumpOneBinaryStab(const MachOObjectFile &MainBinary,
+                                            StringRef BinaryPath) {
+  loadMainBinarySymbols(MainBinary);
+  MainBinaryStrings = MainBinary.getStringTableData();
+  raw_ostream &OS(llvm::outs());
+
+  dumpSymTabHeader(OS, getArchName(MainBinary));
+  uint64_t Idx = 0;
+  for (const SymbolRef &Symbol : MainBinary.symbols()) {
+    const DataRefImpl &DRI = Symbol.getRawDataRefImpl();
+    if (MainBinary.is64Bit())
+      dumpSymTabEntry(OS, Idx, MainBinary.getSymbol64TableEntry(DRI));
+    else
+      dumpSymTabEntry(OS, Idx, MainBinary.getSymbolTableEntry(DRI));
+    Idx++;
+  }
+
+  OS << "\n\n";
+  resetParserState();
+}
+
+static bool shouldLinkArch(SmallVectorImpl<StringRef> &Archs, StringRef Arch) {
+  if (Archs.empty() ||
+      std::find(Archs.begin(), Archs.end(), "all") != Archs.end() ||
+      std::find(Archs.begin(), Archs.end(), "*") != Archs.end())
+    return true;
+
+  if (Arch.startswith("arm") && Arch != "arm64" &&
+      std::find(Archs.begin(), Archs.end(), "arm") != Archs.end())
+    return true;
+
+  return std::find(Archs.begin(), Archs.end(), Arch) != Archs.end();
+}
+
+bool MachODebugMapParser::dumpStab() {
+  auto MainBinOrError =
+      MainBinaryHolder.GetFilesAs<MachOObjectFile>(BinaryPath);
+  if (auto Error = MainBinOrError.getError()) {
+    llvm::errs() << "Cannot get '" << BinaryPath
+                 << "' as MachO file: " << Error.message() << "\n";
+    return false;
+  }
+
+  Triple T;
+  for (const auto *Binary : *MainBinOrError)
+    if (shouldLinkArch(Archs, Binary->getArch(nullptr, &T).getArchName()))
+      dumpOneBinaryStab(*Binary, BinaryPath);
+
+  return true;
+}
+
+/// This main parsing routine tries to open the main binary and if
+/// successful iterates over the STAB entries. The real parsing is
+/// done in handleStabSymbolTableEntry.
+ErrorOr<std::vector<std::unique_ptr<DebugMap>>> MachODebugMapParser::parse() {
+  auto MainBinOrError =
+      MainBinaryHolder.GetFilesAs<MachOObjectFile>(BinaryPath);
+  if (auto Error = MainBinOrError.getError())
+    return Error;
+
+  std::vector<std::unique_ptr<DebugMap>> Results;
+  Triple T;
+  for (const auto *Binary : *MainBinOrError)
+    if (shouldLinkArch(Archs, Binary->getArch(nullptr, &T).getArchName()))
+      Results.push_back(parseOneBinary(*Binary, BinaryPath));
+
+  return std::move(Results);
+}
+
 /// Interpret the STAB entries to fill the DebugMap.
 void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex,
                                                      uint8_t Type,
@@ -144,8 +346,11 @@ void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex,
   const char *Name = &MainBinaryStrings.data()[StringIndex];
 
   // An N_OSO entry represents the start of a new object file description.
-  if (Type == MachO::N_OSO)
-    return switchToNewDebugMapObject(Name);
+  if (Type == MachO::N_OSO) {
+    sys::TimeValue Timestamp;
+    Timestamp.fromEpochTime(Value);
+    return switchToNewDebugMapObject(Name, Timestamp);
+  }
 
   // If the last N_OSO object file wasn't found,
   // CurrentDebugMapObject will be null. Do not update anything
@@ -184,22 +389,33 @@ void MachODebugMapParser::handleStabSymbolTableEntry(uint32_t StringIndex,
   if (ObjectSymIt == CurrentObjectAddresses.end())
     return Warning("could not find object file symbol for symbol " +
                    Twine(Name));
-  if (!CurrentDebugMapObject->addSymbol(Name, ObjectSymIt->getValue(), Value,
+  if (!ObjectSymIt->getValue())
+    return;
+  if (!CurrentDebugMapObject->addSymbol(Name, *ObjectSymIt->getValue(), Value,
                                         Size))
     return Warning(Twine("failed to insert symbol '") + Name +
                    "' in the debug map.");
 }
 
 /// Load the current object file symbols into CurrentObjectAddresses.
-void MachODebugMapParser::loadCurrentObjectFileSymbols() {
+void MachODebugMapParser::loadCurrentObjectFileSymbols(
+    const object::MachOObjectFile &Obj) {
   CurrentObjectAddresses.clear();
 
-  for (auto Sym : CurrentObjectHolder.Get().symbols()) {
+  for (auto Sym : Obj.symbols()) {
     uint64_t Addr = Sym.getValue();
     ErrorOr<StringRef> Name = Sym.getName();
     if (!Name)
       continue;
-    CurrentObjectAddresses[*Name] = Addr;
+    // Objective-C on i386 uses artificial absolute symbols to
+    // perform some link time checks. Those symbols have a fixed 0
+    // address that might conflict with real symbols in the object
+    // file. As I cannot see a way for absolute symbols to find
+    // their way into the debug information, let's just ignore those.
+    if (Sym.getFlags() & SymbolRef::SF_Absolute)
+      CurrentObjectAddresses[*Name] = None;
+    else
+      CurrentObjectAddresses[*Name] = Addr;
   }
 }
 
@@ -215,9 +431,10 @@ uint64_t MachODebugMapParser::getMainBinarySymbolAddress(StringRef Name) {
 
 /// Load the interesting main binary symbols' addresses into
 /// MainBinarySymbolAddresses.
-void MachODebugMapParser::loadMainBinarySymbols() {
-  const MachOObjectFile &MainBinary = MainBinaryHolder.GetAs<MachOObjectFile>();
+void MachODebugMapParser::loadMainBinarySymbols(
+    const MachOObjectFile &MainBinary) {
   section_iterator Section = MainBinary.section_end();
+  MainBinarySymbolAddresses.clear();
   for (const auto &Sym : MainBinary.symbols()) {
     SymbolRef::Type Type = Sym.getType();
     // Skip undefined and STAB entries.
@@ -227,8 +444,13 @@ void MachODebugMapParser::loadMainBinarySymbols() {
     // are the only ones that need to be queried because the address
     // of common data won't be described in the debug map. All other
     // addresses should be fetched for the debug map.
-    if (!(Sym.getFlags() & SymbolRef::SF_Global) || Sym.getSection(Section) ||
-        Section == MainBinary.section_end() || Section->isText())
+    if (!(Sym.getFlags() & SymbolRef::SF_Global))
+      continue;
+    ErrorOr<section_iterator> SectionOrErr = Sym.getSection();
+    if (!SectionOrErr)
+      continue;
+    Section = *SectionOrErr;
+    if (Section == MainBinary.section_end() || Section->isText())
       continue;
     uint64_t Addr = Sym.getValue();
     ErrorOr<StringRef> NameOrErr = Sym.getName();
@@ -243,16 +465,21 @@ void MachODebugMapParser::loadMainBinarySymbols() {
 
 namespace llvm {
 namespace dsymutil {
-llvm::ErrorOr<std::unique_ptr<DebugMap>> parseDebugMap(StringRef InputFile,
-                                                       StringRef PrependPath,
-                                                       bool Verbose,
-                                                       bool InputIsYAML) {
+llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
+parseDebugMap(StringRef InputFile, ArrayRef<std::string> Archs,
+              StringRef PrependPath, bool Verbose, bool InputIsYAML) {
   if (!InputIsYAML) {
-    MachODebugMapParser Parser(InputFile, PrependPath, Verbose);
+    MachODebugMapParser Parser(InputFile, Archs, PrependPath, Verbose);
     return Parser.parse();
   } else {
     return DebugMap::parseYAMLDebugMap(InputFile, PrependPath, Verbose);
   }
 }
+
+bool dumpStab(StringRef InputFile, ArrayRef<std::string> Archs,
+              StringRef PrependPath) {
+  MachODebugMapParser Parser(InputFile, Archs, PrependPath, false);
+  return Parser.dumpStab();
 }
-}
+} // namespace dsymutil
+} // namespace llvm
diff --git a/tools/dsymutil/MachOUtils.cpp b/tools/dsymutil/MachOUtils.cpp
new file mode 100644
index 000000000000..44cc528ed8f2
--- /dev/null
+++ b/tools/dsymutil/MachOUtils.cpp
@@ -0,0 +1,521 @@
+//===-- MachOUtils.h - Mach-o specific helpers for dsymutil  --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MachOUtils.h"
+#include "BinaryHolder.h"
+#include "DebugMap.h"
+#include "dsymutil.h"
+#include "NonRelocatableStringpool.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace dsymutil {
+namespace MachOUtils {
+
+std::string getArchName(StringRef Arch) {
+  if (Arch.startswith("thumb"))
+    return (llvm::Twine("arm") + Arch.drop_front(5)).str();
+  return Arch;
+}
+
+static bool runLipo(StringRef SDKPath, SmallVectorImpl<const char *> &Args) {
+  auto Path = sys::findProgramByName("lipo", makeArrayRef(SDKPath));
+  if (!Path)
+    Path = sys::findProgramByName("lipo");
+
+  if (!Path) {
+    errs() << "error: lipo: " << Path.getError().message() << "\n";
+    return false;
+  }
+
+  std::string ErrMsg;
+  int result =
+      sys::ExecuteAndWait(*Path, Args.data(), nullptr, nullptr, 0, 0, &ErrMsg);
+  if (result) {
+    errs() << "error: lipo: " << ErrMsg << "\n";
+    return false;
+  }
+
+  return true;
+}
+
+bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
+                             StringRef OutputFileName,
+                             const LinkOptions &Options, StringRef SDKPath) {
+  // No need to merge one file into a universal fat binary. First, try
+  // to move it (rename) to the final location. If that fails because
+  // of cross-device link issues then copy and delete.
+  if (ArchFiles.size() == 1) {
+    StringRef From(ArchFiles.front().Path);
+    if (sys::fs::rename(From, OutputFileName)) {
+      if (std::error_code EC = sys::fs::copy_file(From, OutputFileName)) {
+        errs() << "error: while copying " << From << " to " << OutputFileName
+               << ": " << EC.message() << "\n";
+        return false;
+      }
+      sys::fs::remove(From);
+    }
+    return true;
+  }
+
+  SmallVector<const char *, 8> Args;
+  Args.push_back("lipo");
+  Args.push_back("-create");
+
+  for (auto &Thin : ArchFiles)
+    Args.push_back(Thin.Path.c_str());
+
+  // Align segments to match dsymutil-classic alignment
+  for (auto &Thin : ArchFiles) {
+    Thin.Arch = getArchName(Thin.Arch);
+    Args.push_back("-segalign");
+    Args.push_back(Thin.Arch.c_str());
+    Args.push_back("20");
+  }
+
+  Args.push_back("-output");
+  Args.push_back(OutputFileName.data());
+  Args.push_back(nullptr);
+
+  if (Options.Verbose) {
+    outs() << "Running lipo\n";
+    for (auto Arg : Args)
+      outs() << ' ' << ((Arg == nullptr) ? "\n" : Arg);
+  }
+
+  return Options.NoOutput ? true : runLipo(SDKPath, Args);
+}
+
+// Return a MachO::segment_command_64 that holds the same values as
+// the passed MachO::segment_command. We do that to avoid having to
+// duplicat the logic for 32bits and 64bits segments.
+struct MachO::segment_command_64 adaptFrom32bits(MachO::segment_command Seg) {
+  MachO::segment_command_64 Seg64;
+  Seg64.cmd = Seg.cmd;
+  Seg64.cmdsize = Seg.cmdsize;
+  memcpy(Seg64.segname, Seg.segname, sizeof(Seg.segname));
+  Seg64.vmaddr = Seg.vmaddr;
+  Seg64.vmsize = Seg.vmsize;
+  Seg64.fileoff = Seg.fileoff;
+  Seg64.filesize = Seg.filesize;
+  Seg64.maxprot = Seg.maxprot;
+  Seg64.initprot = Seg.initprot;
+  Seg64.nsects = Seg.nsects;
+  Seg64.flags = Seg.flags;
+  return Seg64;
+}
+
+// Iterate on all \a Obj segments, and apply \a Handler to them.
+template <typename FunctionTy>
+static void iterateOnSegments(const object::MachOObjectFile &Obj,
+                              FunctionTy Handler) {
+  for (const auto &LCI : Obj.load_commands()) {
+    MachO::segment_command_64 Segment;
+    if (LCI.C.cmd == MachO::LC_SEGMENT)
+      Segment = adaptFrom32bits(Obj.getSegmentLoadCommand(LCI));
+    else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
+      Segment = Obj.getSegment64LoadCommand(LCI);
+    else
+      continue;
+
+    Handler(Segment);
+  }
+}
+
+// Transfer the symbols described by \a NList to \a NewSymtab which is
+// just the raw contents of the symbol table for the dSYM companion file.
+// \returns whether the symbol was tranfered or not.
+template <typename NListTy>
+static bool transferSymbol(NListTy NList, bool IsLittleEndian,
+                           StringRef Strings, SmallVectorImpl<char> &NewSymtab,
+                           NonRelocatableStringpool &NewStrings,
+                           bool &InDebugNote) {
+  // Do not transfer undefined symbols, we want real addresses.
+  if ((NList.n_type & MachO::N_TYPE) == MachO::N_UNDF)
+    return false;
+
+  StringRef Name = StringRef(Strings.begin() + NList.n_strx);
+  if (InDebugNote) {
+    InDebugNote =
+        (NList.n_type != MachO::N_SO) || (!Name.empty() && Name[0] != '\0');
+    return false;
+  } else if (NList.n_type == MachO::N_SO) {
+    InDebugNote = true;
+    return false;
+  }
+
+  // FIXME: The + 1 is here to mimic dsymutil-classic that has 2 empty
+  // strings at the start of the generated string table (There is
+  // corresponding code in the string table emission).
+  NList.n_strx = NewStrings.getStringOffset(Name) + 1;
+  if (IsLittleEndian != sys::IsLittleEndianHost)
+    MachO::swapStruct(NList);
+
+  NewSymtab.append(reinterpret_cast<char *>(&NList),
+                   reinterpret_cast<char *>(&NList + 1));
+  return true;
+}
+
+// Wrapper around transferSymbol to transfer all of \a Obj symbols
+// to \a NewSymtab. This function does not write in the output file.
+// \returns the number of symbols in \a NewSymtab.
+static unsigned transferSymbols(const object::MachOObjectFile &Obj,
+                                SmallVectorImpl<char> &NewSymtab,
+                                NonRelocatableStringpool &NewStrings) {
+  unsigned Syms = 0;
+  StringRef Strings = Obj.getStringTableData();
+  bool IsLittleEndian = Obj.isLittleEndian();
+  bool InDebugNote = false;
+
+  if (Obj.is64Bit()) {
+    for (const object::SymbolRef &Symbol : Obj.symbols()) {
+      object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
+      if (transferSymbol(Obj.getSymbol64TableEntry(DRI), IsLittleEndian,
+                         Strings, NewSymtab, NewStrings, InDebugNote))
+        ++Syms;
+    }
+  } else {
+    for (const object::SymbolRef &Symbol : Obj.symbols()) {
+      object::DataRefImpl DRI = Symbol.getRawDataRefImpl();
+      if (transferSymbol(Obj.getSymbolTableEntry(DRI), IsLittleEndian, Strings,
+                         NewSymtab, NewStrings, InDebugNote))
+        ++Syms;
+    }
+  }
+  return Syms;
+}
+
+static MachO::section
+getSection(const object::MachOObjectFile &Obj,
+           const MachO::segment_command &Seg,
+           const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
+  return Obj.getSection(LCI, Idx);
+}
+
+static MachO::section_64
+getSection(const object::MachOObjectFile &Obj,
+           const MachO::segment_command_64 &Seg,
+           const object::MachOObjectFile::LoadCommandInfo &LCI, unsigned Idx) {
+  return Obj.getSection64(LCI, Idx);
+}
+
+// Transfer \a Segment from \a Obj to the output file. This calls into \a Writer
+// to write these load commands directly in the output file at the current
+// position.
+// The function also tries to find a hole in the address map to fit the __DWARF
+// segment of \a DwarfSegmentSize size. \a EndAddress is updated to point at the
+// highest segment address.
+// When the __LINKEDIT segment is transfered, its offset and size are set resp.
+// to \a LinkeditOffset and \a LinkeditSize.
+template <typename SegmentTy>
+static void transferSegmentAndSections(
+    const object::MachOObjectFile::LoadCommandInfo &LCI, SegmentTy Segment,
+    const object::MachOObjectFile &Obj, MCObjectWriter &Writer,
+    uint64_t LinkeditOffset, uint64_t LinkeditSize, uint64_t DwarfSegmentSize,
+    uint64_t &GapForDwarf, uint64_t &EndAddress) {
+  if (StringRef("__DWARF") == Segment.segname)
+    return;
+
+  Segment.fileoff = Segment.filesize = 0;
+
+  if (StringRef("__LINKEDIT") == Segment.segname) {
+    Segment.fileoff = LinkeditOffset;
+    Segment.filesize = LinkeditSize;
+  }
+
+  // Check if the end address of the last segment and our current
+  // start address leave a sufficient gap to store the __DWARF
+  // segment.
+  uint64_t PrevEndAddress = EndAddress;
+  EndAddress = RoundUpToAlignment(EndAddress, 0x1000);
+  if (GapForDwarf == UINT64_MAX && Segment.vmaddr > EndAddress &&
+      Segment.vmaddr - EndAddress >= DwarfSegmentSize)
+    GapForDwarf = EndAddress;
+
+  // The segments are not necessarily sorted by their vmaddr.
+  EndAddress =
+      std::max<uint64_t>(PrevEndAddress, Segment.vmaddr + Segment.vmsize);
+  unsigned nsects = Segment.nsects;
+  if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
+    MachO::swapStruct(Segment);
+  Writer.writeBytes(
+      StringRef(reinterpret_cast<char *>(&Segment), sizeof(Segment)));
+  for (unsigned i = 0; i < nsects; ++i) {
+    auto Sect = getSection(Obj, Segment, LCI, i);
+    Sect.offset = Sect.reloff = Sect.nreloc = 0;
+    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
+      MachO::swapStruct(Sect);
+    Writer.writeBytes(StringRef(reinterpret_cast<char *>(&Sect), sizeof(Sect)));
+  }
+}
+
+// Write the __DWARF segment load command to the output file.
+static void createDwarfSegment(uint64_t VMAddr, uint64_t FileOffset,
+                               uint64_t FileSize, unsigned NumSections,
+                               MCAsmLayout &Layout, MachObjectWriter &Writer) {
+  Writer.writeSegmentLoadCommand("__DWARF", NumSections, VMAddr,
+                                 RoundUpToAlignment(FileSize, 0x1000),
+                                 FileOffset, FileSize, /* MaxProt */ 7,
+                                 /* InitProt =*/3);
+
+  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+    MCSection *Sec = Layout.getSectionOrder()[i];
+    if (Sec->begin() == Sec->end() || !Layout.getSectionFileSize(Sec))
+      continue;
+
+    unsigned Align = Sec->getAlignment();
+    if (Align > 1) {
+      VMAddr = RoundUpToAlignment(VMAddr, Align);
+      FileOffset = RoundUpToAlignment(FileOffset, Align);
+    }
+    Writer.writeSection(Layout, *Sec, VMAddr, FileOffset, 0, 0, 0);
+
+    FileOffset += Layout.getSectionAddressSize(Sec);
+    VMAddr += Layout.getSectionAddressSize(Sec);
+  }
+}
+
+static bool isExecutable(const object::MachOObjectFile &Obj) {
+  if (Obj.is64Bit())
+    return Obj.getHeader64().filetype != MachO::MH_OBJECT;
+  else
+    return Obj.getHeader().filetype != MachO::MH_OBJECT;
+}
+
+static bool hasLinkEditSegment(const object::MachOObjectFile &Obj) {
+  bool HasLinkEditSegment = false;
+  iterateOnSegments(Obj, [&](const MachO::segment_command_64 &Segment) {
+    if (StringRef("__LINKEDIT") == Segment.segname)
+      HasLinkEditSegment = true;
+  });
+  return HasLinkEditSegment;
+}
+
+static unsigned segmentLoadCommandSize(bool Is64Bit, unsigned NumSections) {
+  if (Is64Bit)
+    return sizeof(MachO::segment_command_64) +
+           NumSections * sizeof(MachO::section_64);
+
+  return sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
+}
+
+// Stream a dSYM companion binary file corresponding to the binary referenced
+// by \a DM to \a OutFile. The passed \a MS MCStreamer is setup to write to
+// \a OutFile and it must be using a MachObjectWriter object to do so.
+bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
+                           raw_fd_ostream &OutFile) {
+  auto &ObjectStreamer = static_cast<MCObjectStreamer &>(MS);
+  MCAssembler &MCAsm = ObjectStreamer.getAssembler();
+  auto &Writer = static_cast<MachObjectWriter &>(MCAsm.getWriter());
+  MCAsmLayout Layout(MCAsm);
+
+  MCAsm.layout(Layout);
+
+  BinaryHolder InputBinaryHolder(false);
+  auto ErrOrObjs = InputBinaryHolder.GetObjectFiles(DM.getBinaryPath());
+  if (auto Error = ErrOrObjs.getError())
+    return error(Twine("opening ") + DM.getBinaryPath() + ": " +
+                     Error.message(),
+                 "output file streaming");
+
+  auto ErrOrInputBinary =
+      InputBinaryHolder.GetAs<object::MachOObjectFile>(DM.getTriple());
+  if (auto Error = ErrOrInputBinary.getError())
+    return error(Twine("opening ") + DM.getBinaryPath() + ": " +
+                     Error.message(),
+                 "output file streaming");
+  auto &InputBinary = *ErrOrInputBinary;
+
+  bool Is64Bit = Writer.is64Bit();
+  MachO::symtab_command SymtabCmd = InputBinary.getSymtabLoadCommand();
+
+  // Get UUID.
+  MachO::uuid_command UUIDCmd;
+  memset(&UUIDCmd, 0, sizeof(UUIDCmd));
+  UUIDCmd.cmd = MachO::LC_UUID;
+  UUIDCmd.cmdsize = sizeof(MachO::uuid_command);
+  for (auto &LCI : InputBinary.load_commands()) {
+    if (LCI.C.cmd == MachO::LC_UUID) {
+      UUIDCmd = InputBinary.getUuidCommand(LCI);
+      break;
+    }
+  }
+
+  // Compute the number of load commands we will need.
+  unsigned LoadCommandSize = 0;
+  unsigned NumLoadCommands = 0;
+  // We will copy the UUID if there is one.
+  if (UUIDCmd.cmd != 0) {
+    ++NumLoadCommands;
+    LoadCommandSize += sizeof(MachO::uuid_command);
+  }
+
+  // If we have a valid symtab to copy, do it.
+  bool ShouldEmitSymtab =
+      isExecutable(InputBinary) && hasLinkEditSegment(InputBinary);
+  if (ShouldEmitSymtab) {
+    LoadCommandSize += sizeof(MachO::symtab_command);
+    ++NumLoadCommands;
+  }
+
+  unsigned HeaderSize =
+      Is64Bit ? sizeof(MachO::mach_header_64) : sizeof(MachO::mach_header);
+  // We will copy every segment that isn't __DWARF.
+  iterateOnSegments(InputBinary, [&](const MachO::segment_command_64 &Segment) {
+    if (StringRef("__DWARF") == Segment.segname)
+      return;
+
+    ++NumLoadCommands;
+    LoadCommandSize += segmentLoadCommandSize(Is64Bit, Segment.nsects);
+  });
+
+  // We will add our own brand new __DWARF segment if we have debug
+  // info.
+  unsigned NumDwarfSections = 0;
+  uint64_t DwarfSegmentSize = 0;
+
+  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+    MCSection *Sec = Layout.getSectionOrder()[i];
+    if (Sec->begin() == Sec->end())
+      continue;
+
+    if (uint64_t Size = Layout.getSectionFileSize(Sec)) {
+      DwarfSegmentSize =
+          RoundUpToAlignment(DwarfSegmentSize, Sec->getAlignment());
+      DwarfSegmentSize += Size;
+      ++NumDwarfSections;
+    }
+  }
+
+  if (NumDwarfSections) {
+    ++NumLoadCommands;
+    LoadCommandSize += segmentLoadCommandSize(Is64Bit, NumDwarfSections);
+  }
+
+  SmallString<0> NewSymtab;
+  NonRelocatableStringpool NewStrings;
+  unsigned NListSize = Is64Bit ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+  unsigned NumSyms = 0;
+  uint64_t NewStringsSize = 0;
+  if (ShouldEmitSymtab) {
+    NewSymtab.reserve(SymtabCmd.nsyms * NListSize / 2);
+    NumSyms = transferSymbols(InputBinary, NewSymtab, NewStrings);
+    NewStringsSize = NewStrings.getSize() + 1;
+  }
+
+  uint64_t SymtabStart = LoadCommandSize;
+  SymtabStart += HeaderSize;
+  SymtabStart = RoundUpToAlignment(SymtabStart, 0x1000);
+
+  // We gathered all the information we need, start emitting the output file.
+  Writer.writeHeader(MachO::MH_DSYM, NumLoadCommands, LoadCommandSize, false);
+
+  // Write the load commands.
+  assert(OutFile.tell() == HeaderSize);
+  if (UUIDCmd.cmd != 0) {
+    Writer.write32(UUIDCmd.cmd);
+    Writer.write32(UUIDCmd.cmdsize);
+    Writer.writeBytes(
+        StringRef(reinterpret_cast<const char *>(UUIDCmd.uuid), 16));
+    assert(OutFile.tell() == HeaderSize + sizeof(UUIDCmd));
+  }
+
+  assert(SymtabCmd.cmd && "No symbol table.");
+  uint64_t StringStart = SymtabStart + NumSyms * NListSize;
+  if (ShouldEmitSymtab)
+    Writer.writeSymtabLoadCommand(SymtabStart, NumSyms, StringStart,
+                                  NewStringsSize);
+
+  uint64_t DwarfSegmentStart = StringStart + NewStringsSize;
+  DwarfSegmentStart = RoundUpToAlignment(DwarfSegmentStart, 0x1000);
+
+  // Write the load commands for the segments and sections we 'import' from
+  // the original binary.
+  uint64_t EndAddress = 0;
+  uint64_t GapForDwarf = UINT64_MAX;
+  for (auto &LCI : InputBinary.load_commands()) {
+    if (LCI.C.cmd == MachO::LC_SEGMENT)
+      transferSegmentAndSections(LCI, InputBinary.getSegmentLoadCommand(LCI),
+                                 InputBinary, Writer, SymtabStart,
+                                 StringStart + NewStringsSize - SymtabStart,
+                                 DwarfSegmentSize, GapForDwarf, EndAddress);
+    else if (LCI.C.cmd == MachO::LC_SEGMENT_64)
+      transferSegmentAndSections(LCI, InputBinary.getSegment64LoadCommand(LCI),
+                                 InputBinary, Writer, SymtabStart,
+                                 StringStart + NewStringsSize - SymtabStart,
+                                 DwarfSegmentSize, GapForDwarf, EndAddress);
+  }
+
+  uint64_t DwarfVMAddr = RoundUpToAlignment(EndAddress, 0x1000);
+  uint64_t DwarfVMMax = Is64Bit ? UINT64_MAX : UINT32_MAX;
+  if (DwarfVMAddr + DwarfSegmentSize > DwarfVMMax ||
+      DwarfVMAddr + DwarfSegmentSize < DwarfVMAddr /* Overflow */) {
+    // There is no room for the __DWARF segment at the end of the
+    // address space. Look trhough segments to find a gap.
+    DwarfVMAddr = GapForDwarf;
+    if (DwarfVMAddr == UINT64_MAX)
+      warn("not enough VM space for the __DWARF segment.",
+           "output file streaming");
+  }
+
+  // Write the load command for the __DWARF segment.
+  createDwarfSegment(DwarfVMAddr, DwarfSegmentStart, DwarfSegmentSize,
+                     NumDwarfSections, Layout, Writer);
+
+  assert(OutFile.tell() == LoadCommandSize + HeaderSize);
+  Writer.WriteZeros(SymtabStart - (LoadCommandSize + HeaderSize));
+  assert(OutFile.tell() == SymtabStart);
+
+  // Transfer symbols.
+  if (ShouldEmitSymtab) {
+    Writer.writeBytes(NewSymtab.str());
+    assert(OutFile.tell() == StringStart);
+
+    // Transfer string table.
+    // FIXME: The NonRelocatableStringpool starts with an empty string, but
+    // dsymutil-classic starts the reconstructed string table with 2 of these.
+    // Reproduce that behavior for now (there is corresponding code in
+    // transferSymbol).
+    Writer.WriteZeros(1);
+    typedef NonRelocatableStringpool::MapTy MapTy;
+    for (auto *Entry = NewStrings.getFirstEntry(); Entry;
+         Entry = static_cast<MapTy::MapEntryTy *>(Entry->getValue().second))
+      Writer.writeBytes(
+          StringRef(Entry->getKey().data(), Entry->getKey().size() + 1));
+  }
+
+  assert(OutFile.tell() == StringStart + NewStringsSize);
+
+  // Pad till the Dwarf segment start.
+  Writer.WriteZeros(DwarfSegmentStart - (StringStart + NewStringsSize));
+  assert(OutFile.tell() == DwarfSegmentStart);
+
+  // Emit the Dwarf sections contents.
+  for (const MCSection &Sec : MCAsm) {
+    if (Sec.begin() == Sec.end())
+      continue;
+
+    uint64_t Pos = OutFile.tell();
+    Writer.WriteZeros(RoundUpToAlignment(Pos, Sec.getAlignment()) - Pos);
+    MCAsm.writeSectionData(&Sec, Layout);
+  }
+
+  return true;
+}
+}
+}
+}
diff --git a/tools/dsymutil/MachOUtils.h b/tools/dsymutil/MachOUtils.h
new file mode 100644
index 000000000000..61dfadc70270
--- /dev/null
+++ b/tools/dsymutil/MachOUtils.h
@@ -0,0 +1,39 @@
+//===-- MachOUtils.h - Mach-o specific helpers for dsymutil  --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
+#define LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
+
+#include <string>
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class MCStreamer;
+class raw_fd_ostream;
+namespace dsymutil {
+class DebugMap;
+struct LinkOptions;
+namespace MachOUtils {
+
+struct ArchAndFilename {
+  std::string Arch, Path;
+  ArchAndFilename(StringRef Arch, StringRef Path) : Arch(Arch), Path(Path) {}
+};
+
+bool generateUniversalBinary(SmallVectorImpl<ArchAndFilename> &ArchFiles,
+                             StringRef OutputFileName, const LinkOptions &,
+                             StringRef SDKPath);
+
+bool generateDsymCompanion(const DebugMap &DM, MCStreamer &MS,
+                           raw_fd_ostream &OutFile);
+
+std::string getArchName(StringRef Arch);
+}
+}
+}
+#endif // LLVM_TOOLS_DSYMUTIL_MACHOUTILS_H
diff --git a/tools/dsymutil/NonRelocatableStringpool.h b/tools/dsymutil/NonRelocatableStringpool.h
new file mode 100644
index 000000000000..9db1f3342bd3
--- /dev/null
+++ b/tools/dsymutil/NonRelocatableStringpool.h
@@ -0,0 +1,68 @@
+//===-- NonRelocatableStringpool.h - A simple stringpool  -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_DSYMUTIL_NONRELOCATABLESTRINGPOOL_H
+#define LLVM_TOOLS_DSYMUTIL_NONRELOCATABLESTRINGPOOL_H
+
+namespace llvm {
+namespace dsymutil {
+
+/// \brief A string table that doesn't need relocations.
+///
+/// We are doing a final link, no need for a string table that
+/// has relocation entries for every reference to it. This class
+/// provides this ablitity by just associating offsets with
+/// strings.
+class NonRelocatableStringpool {
+public:
+  /// \brief Entries are stored into the StringMap and simply linked
+  /// together through the second element of this pair in order to
+  /// keep track of insertion order.
+  typedef StringMap<std::pair<uint32_t, StringMapEntryBase *>, BumpPtrAllocator>
+      MapTy;
+
+  NonRelocatableStringpool()
+      : CurrentEndOffset(0), Sentinel(0), Last(&Sentinel) {
+    // Legacy dsymutil puts an empty string at the start of the line
+    // table.
+    getStringOffset("");
+  }
+
+  /// \brief Get the offset of string \p S in the string table. This
+  /// can insert a new element or return the offset of a preexisitng
+  /// one.
+  uint32_t getStringOffset(StringRef S);
+
+  /// \brief Get permanent storage for \p S (but do not necessarily
+  /// emit \p S in the output section).
+  /// \returns The StringRef that points to permanent storage to use
+  /// in place of \p S.
+  StringRef internString(StringRef S);
+
+  // \brief Return the first entry of the string table.
+  const MapTy::MapEntryTy *getFirstEntry() const {
+    return getNextEntry(&Sentinel);
+  }
+
+  // \brief Get the entry following \p E in the string table or null
+  // if \p E was the last entry.
+  const MapTy::MapEntryTy *getNextEntry(const MapTy::MapEntryTy *E) const {
+    return static_cast<const MapTy::MapEntryTy *>(E->getValue().second);
+  }
+
+  uint64_t getSize() { return CurrentEndOffset; }
+
+private:
+  MapTy Strings;
+  uint32_t CurrentEndOffset;
+  MapTy::MapEntryTy Sentinel, *Last;
+};
+}
+}
+
+#endif
diff --git a/tools/dsymutil/dsymutil.cpp b/tools/dsymutil/dsymutil.cpp
index 50091935a44e..e9ee57f3dee6 100644
--- a/tools/dsymutil/dsymutil.cpp
+++ b/tools/dsymutil/dsymutil.cpp
@@ -13,13 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "DebugMap.h"
+#include "MachOUtils.h"
 #include "dsymutil.h"
+#include "llvm/Object/MachO.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Options.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetSelect.h"
+#include <cstdint>
 #include <string>
 
 using namespace llvm::dsymutil;
@@ -27,35 +32,205 @@ using namespace llvm::dsymutil;
 namespace {
 using namespace llvm::cl;
 
-static opt<std::string> InputFile(Positional, desc("<input file>"),
-                                  init("a.out"));
+OptionCategory DsymCategory("Specific Options");
+static opt<bool> Help("h", desc("Alias for -help"), Hidden);
+static opt<bool> Version("v", desc("Alias for -version"), Hidden);
+
+static list<std::string> InputFiles(Positional, OneOrMore,
+                                    desc("<input files>"), cat(DsymCategory));
 
 static opt<std::string>
     OutputFileOpt("o",
                   desc("Specify the output file. default: <input file>.dwarf"),
-                  value_desc("filename"));
+                  value_desc("filename"), cat(DsymCategory));
 
 static opt<std::string> OsoPrependPath(
     "oso-prepend-path",
     desc("Specify a directory to prepend to the paths of object files."),
-    value_desc("path"));
+    value_desc("path"), cat(DsymCategory));
 
-static opt<bool> Verbose("v", desc("Verbosity level"), init(false));
+static opt<bool> DumpStab(
+    "symtab",
+    desc("Dumps the symbol table found in executable or object file(s) and\n"
+         "exits."),
+    init(false), cat(DsymCategory));
+static alias DumpStabA("s", desc("Alias for --symtab"), aliasopt(DumpStab));
+
+static opt<bool> FlatOut("flat",
+                         desc("Produce a flat dSYM file (not a bundle)."),
+                         init(false), cat(DsymCategory));
+static alias FlatOutA("f", desc("Alias for --flat"), aliasopt(FlatOut));
+
+static opt<bool> Verbose("verbose", desc("Verbosity level"), init(false),
+                         cat(DsymCategory));
 
 static opt<bool>
     NoOutput("no-output",
              desc("Do the link in memory, but do not emit the result file."),
-             init(false));
+             init(false), cat(DsymCategory));
+
+static list<std::string> ArchFlags(
+    "arch",
+    desc("Link DWARF debug information only for specified CPU architecture\n"
+         "types. This option can be specified multiple times, once for each\n"
+         "desired architecture.  All cpu architectures will be linked by\n"
+         "default."),
+    ZeroOrMore, cat(DsymCategory));
+
+static opt<bool>
+    NoODR("no-odr",
+          desc("Do not use ODR (One Definition Rule) for type uniquing."),
+          init(false), cat(DsymCategory));
 
 static opt<bool> DumpDebugMap(
     "dump-debug-map",
     desc("Parse and dump the debug map to standard output. Not DWARF link "
          "will take place."),
-    init(false));
+    init(false), cat(DsymCategory));
 
 static opt<bool> InputIsYAMLDebugMap(
     "y", desc("Treat the input file is a YAML debug map rather than a binary."),
-    init(false));
+    init(false), cat(DsymCategory));
+}
+
+static bool createPlistFile(llvm::StringRef BundleRoot) {
+  if (NoOutput)
+    return true;
+
+  // Create plist file to write to.
+  llvm::SmallString<128> InfoPlist(BundleRoot);
+  llvm::sys::path::append(InfoPlist, "Contents/Info.plist");
+  std::error_code EC;
+  llvm::raw_fd_ostream PL(InfoPlist, EC, llvm::sys::fs::F_Text);
+  if (EC) {
+    llvm::errs() << "error: cannot create plist file " << InfoPlist << ": "
+                 << EC.message() << '\n';
+    return false;
+  }
+
+  // FIXME: Use CoreFoundation to get executable bundle info. Use
+  // dummy values for now.
+  std::string bundleVersionStr = "1", bundleShortVersionStr = "1.0",
+              bundleIDStr;
+
+  llvm::StringRef BundleID = *llvm::sys::path::rbegin(BundleRoot);
+  if (llvm::sys::path::extension(BundleRoot) == ".dSYM")
+    bundleIDStr = llvm::sys::path::stem(BundleID);
+  else
+    bundleIDStr = BundleID;
+
+  // Print out information to the plist file.
+  PL << "<?xml version=\"1.0\" encoding=\"UTF-8\"\?>\n"
+     << "<!DOCTYPE plist PUBLIC \"-//Apple Computer//DTD PLIST 1.0//EN\" "
+     << "\"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">\n"
+     << "<plist version=\"1.0\">\n"
+     << "\t<dict>\n"
+     << "\t\t<key>CFBundleDevelopmentRegion</key>\n"
+     << "\t\t<string>English</string>\n"
+     << "\t\t<key>CFBundleIdentifier</key>\n"
+     << "\t\t<string>com.apple.xcode.dsym." << bundleIDStr << "</string>\n"
+     << "\t\t<key>CFBundleInfoDictionaryVersion</key>\n"
+     << "\t\t<string>6.0</string>\n"
+     << "\t\t<key>CFBundlePackageType</key>\n"
+     << "\t\t<string>dSYM</string>\n"
+     << "\t\t<key>CFBundleSignature</key>\n"
+     << "\t\t<string>\?\?\?\?</string>\n"
+     << "\t\t<key>CFBundleShortVersionString</key>\n"
+     << "\t\t<string>" << bundleShortVersionStr << "</string>\n"
+     << "\t\t<key>CFBundleVersion</key>\n"
+     << "\t\t<string>" << bundleVersionStr << "</string>\n"
+     << "\t</dict>\n"
+     << "</plist>\n";
+
+  PL.close();
+  return true;
+}
+
+static bool createBundleDir(llvm::StringRef BundleBase) {
+  if (NoOutput)
+    return true;
+
+  llvm::SmallString<128> Bundle(BundleBase);
+  llvm::sys::path::append(Bundle, "Contents", "Resources", "DWARF");
+  if (std::error_code EC = create_directories(Bundle.str(), true,
+                                              llvm::sys::fs::perms::all_all)) {
+    llvm::errs() << "error: cannot create directory " << Bundle << ": "
+                 << EC.message() << "\n";
+    return false;
+  }
+  return true;
+}
+
+static std::error_code getUniqueFile(const llvm::Twine &Model, int &ResultFD,
+                                     llvm::SmallVectorImpl<char> &ResultPath) {
+  // If in NoOutput mode, use the createUniqueFile variant that
+  // doesn't open the file but still generates a somewhat unique
+  // name. In the real usage scenario, we'll want to ensure that the
+  // file is trully unique, and creating it is the only way to achieve
+  // that.
+  if (NoOutput)
+    return llvm::sys::fs::createUniqueFile(Model, ResultPath);
+  return llvm::sys::fs::createUniqueFile(Model, ResultFD, ResultPath);
+}
+
+static std::string getOutputFileName(llvm::StringRef InputFile,
+                                     bool TempFile = false) {
+  if (TempFile) {
+    llvm::StringRef Basename =
+        OutputFileOpt.empty() ? InputFile : llvm::StringRef(OutputFileOpt);
+    llvm::Twine OutputFile = Basename + ".tmp%%%%%%.dwarf";
+    int FD;
+    llvm::SmallString<128> UniqueFile;
+    if (auto EC = getUniqueFile(OutputFile, FD, UniqueFile)) {
+      llvm::errs() << "error: failed to create temporary outfile '"
+                   << OutputFile << "': " << EC.message() << '\n';
+      return "";
+    }
+    llvm::sys::RemoveFileOnSignal(UniqueFile);
+    if (!NoOutput) {
+      // Close the file immediately. We know it is unique. It will be
+      // reopened and written to later.
+      llvm::raw_fd_ostream CloseImmediately(FD, true /* shouldClose */, true);
+    }
+    return UniqueFile.str();
+  }
+
+  if (FlatOut) {
+    // If a flat dSYM has been requested, things are pretty simple.
+    if (OutputFileOpt.empty()) {
+      if (InputFile == "-")
+        return "a.out.dwarf";
+      return (InputFile + ".dwarf").str();
+    }
+
+    return OutputFileOpt;
+  }
+
+  // We need to create/update a dSYM bundle.
+  // A bundle hierarchy looks like this:
+  //   <bundle name>.dSYM/
+  //       Contents/
+  //          Info.plist
+  //          Resources/
+  //             DWARF/
+  //                <DWARF file(s)>
+  std::string DwarfFile =
+      InputFile == "-" ? llvm::StringRef("a.out") : InputFile;
+  llvm::SmallString<128> BundleDir(OutputFileOpt);
+  if (BundleDir.empty())
+    BundleDir = DwarfFile + ".dSYM";
+  if (!createBundleDir(BundleDir) || !createPlistFile(BundleDir))
+    return "";
+
+  llvm::sys::path::append(BundleDir, "Contents", "Resources", "DWARF",
+                          llvm::sys::path::filename(DwarfFile));
+  return BundleDir.str();
+}
+
+void llvm::dsymutil::exitDsymutil(int ExitStatus) {
+  // Cleanup temporary files.
+  llvm::sys::RunInterruptHandlers();
+  exit(ExitStatus);
 }
 
 int main(int argc, char **argv) {
@@ -63,41 +238,105 @@ int main(int argc, char **argv) {
   llvm::PrettyStackTraceProgram StackPrinter(argc, argv);
   llvm::llvm_shutdown_obj Shutdown;
   LinkOptions Options;
+  void *MainAddr = (void *)(intptr_t)&exitDsymutil;
+  std::string SDKPath = llvm::sys::fs::getMainExecutable(argv[0], MainAddr);
+  SDKPath = llvm::sys::path::parent_path(SDKPath);
 
-  llvm::cl::ParseCommandLineOptions(argc, argv, "llvm dsymutil\n");
+  HideUnrelatedOptions(DsymCategory);
+  llvm::cl::ParseCommandLineOptions(
+      argc, argv,
+      "manipulate archived DWARF debug symbol files.\n\n"
+      "dsymutil links the DWARF debug information found in the object files\n"
+      "for the executable <input file> by using debug symbols information\n"
+      "contained in its symbol table.\n");
 
-  auto DebugMapPtrOrErr =
-      parseDebugMap(InputFile, OsoPrependPath, Verbose, InputIsYAMLDebugMap);
+  if (Help)
+    PrintHelpMessage();
+
+  if (Version) {
+    llvm::cl::PrintVersionMessage();
+    return 0;
+  }
 
   Options.Verbose = Verbose;
   Options.NoOutput = NoOutput;
+  Options.NoODR = NoODR;
+  Options.PrependPath = OsoPrependPath;
 
   llvm::InitializeAllTargetInfos();
   llvm::InitializeAllTargetMCs();
   llvm::InitializeAllTargets();
   llvm::InitializeAllAsmPrinters();
 
-  if (auto EC = DebugMapPtrOrErr.getError()) {
-    llvm::errs() << "error: cannot parse the debug map for \"" << InputFile
-                 << "\": " << EC.message() << '\n';
+  if (!FlatOut && OutputFileOpt == "-") {
+    llvm::errs() << "error: cannot emit to standard output without --flat\n";
     return 1;
   }
 
-  if (Verbose || DumpDebugMap)
-    (*DebugMapPtrOrErr)->print(llvm::outs());
-
-  if (DumpDebugMap)
-    return 0;
-
-  std::string OutputFile;
-  if (OutputFileOpt.empty()) {
-    if (InputFile == "-")
-      OutputFile = "a.out.dwarf";
-    else
-      OutputFile = InputFile + ".dwarf";
-  } else {
-    OutputFile = OutputFileOpt;
+  if (InputFiles.size() > 1 && FlatOut && !OutputFileOpt.empty()) {
+    llvm::errs() << "error: cannot use -o with multiple inputs in flat mode\n";
+    return 1;
   }
 
-  return !linkDwarf(OutputFile, **DebugMapPtrOrErr, Options);
+  for (const auto &Arch : ArchFlags)
+    if (Arch != "*" && Arch != "all" &&
+        !llvm::object::MachOObjectFile::isValidArch(Arch)) {
+      llvm::errs() << "error: Unsupported cpu architecture: '" << Arch << "'\n";
+      exitDsymutil(1);
+    }
+
+  for (auto &InputFile : InputFiles) {
+    // Dump the symbol table for each input file and requested arch
+    if (DumpStab) {
+      if (!dumpStab(InputFile, ArchFlags, OsoPrependPath))
+        exitDsymutil(1);
+      continue;
+    }
+
+    auto DebugMapPtrsOrErr = parseDebugMap(InputFile, ArchFlags, OsoPrependPath,
+                                           Verbose, InputIsYAMLDebugMap);
+
+    if (auto EC = DebugMapPtrsOrErr.getError()) {
+      llvm::errs() << "error: cannot parse the debug map for \"" << InputFile
+                   << "\": " << EC.message() << '\n';
+      exitDsymutil(1);
+    }
+
+    if (DebugMapPtrsOrErr->empty()) {
+      llvm::errs() << "error: no architecture to link\n";
+      exitDsymutil(1);
+    }
+
+    // If there is more than one link to execute, we need to generate
+    // temporary files.
+    bool NeedsTempFiles = !DumpDebugMap && (*DebugMapPtrsOrErr).size() != 1;
+    llvm::SmallVector<MachOUtils::ArchAndFilename, 4> TempFiles;
+    for (auto &Map : *DebugMapPtrsOrErr) {
+      if (Verbose || DumpDebugMap)
+        Map->print(llvm::outs());
+
+      if (DumpDebugMap)
+        continue;
+
+      if (Map->begin() == Map->end())
+        llvm::errs() << "warning: no debug symbols in executable (-arch "
+                     << MachOUtils::getArchName(Map->getTriple().getArchName())
+                     << ")\n";
+
+      std::string OutputFile = getOutputFileName(InputFile, NeedsTempFiles);
+      if (OutputFile.empty() || !linkDwarf(OutputFile, *Map, Options))
+        exitDsymutil(1);
+
+      if (NeedsTempFiles)
+        TempFiles.emplace_back(Map->getTriple().getArchName().str(),
+                               OutputFile);
+    }
+
+    if (NeedsTempFiles &&
+        !MachOUtils::generateUniversalBinary(
+            TempFiles, getOutputFileName(InputFile), Options, SDKPath))
+      exitDsymutil(1);
+  }
+
+  exitDsymutil(0);
 }
diff --git a/tools/dsymutil/dsymutil.h b/tools/dsymutil/dsymutil.h
index 408918779833..91cb32766129 100644
--- a/tools/dsymutil/dsymutil.h
+++ b/tools/dsymutil/dsymutil.h
@@ -27,22 +27,35 @@ namespace dsymutil {
 struct LinkOptions {
   bool Verbose;  ///< Verbosity
   bool NoOutput; ///< Skip emitting output
+  bool NoODR;    ///< Do not unique types according to ODR
+  std::string PrependPath; ///< -oso-prepend-path
 
   LinkOptions() : Verbose(false), NoOutput(false) {}
 };
 
-/// \brief Extract the DebugMap from the given file.
-/// The file has to be a MachO object file.
-llvm::ErrorOr<std::unique_ptr<DebugMap>> parseDebugMap(StringRef InputFile,
-                                                       StringRef PrependPath,
-                                                       bool Verbose,
-                                                       bool InputIsYAML);
+/// \brief Extract the DebugMaps from the given file.
+/// The file has to be a MachO object file. Multiple debug maps can be
+/// returned when the file is universal (aka fat) binary.
+llvm::ErrorOr<std::vector<std::unique_ptr<DebugMap>>>
+parseDebugMap(StringRef InputFile, ArrayRef<std::string> Archs,
+              StringRef PrependPath, bool Verbose, bool InputIsYAML);
+
+/// \brief Dump the symbol table
+bool dumpStab(StringRef InputFile, ArrayRef<std::string> Archs,
+              StringRef PrependPath = "");
 
 /// \brief Link the Dwarf debuginfo as directed by the passed DebugMap
 /// \p DM into a DwarfFile named \p OutputFilename.
 /// \returns false if the link failed.
 bool linkDwarf(StringRef OutputFilename, const DebugMap &DM,
                const LinkOptions &Options);
+
+/// \brief Exit the dsymutil process, cleaning up every temporary
+/// files that we created.
+LLVM_ATTRIBUTE_NORETURN void exitDsymutil(int ExitStatus);
+
+void warn(const Twine &Warning, const Twine &Context);
+bool error(const Twine &Error, const Twine &Context);
 }
 }
 #endif // LLVM_TOOLS_DSYMUTIL_DSYMUTIL_H
diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp
index 68c9d1a6f6e4..31d7f156a700 100644
--- a/tools/gold/gold-plugin.cpp
+++ b/tools/gold/gold-plugin.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/CodeGen/ParallelCG.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -28,15 +29,16 @@
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
-#include "llvm/Linker/Linker.h"
+#include "llvm/Linker/IRMover.h"
 #include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Object/IRObjectFile.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Utils/GlobalStatus.h"
@@ -55,30 +57,59 @@
 
 using namespace llvm;
 
-namespace {
-struct claimed_file {
-  void *handle;
-  std::vector<ld_plugin_symbol> syms;
-};
-}
-
 static ld_plugin_status discard_message(int level, const char *format, ...) {
   // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
   // callback in the transfer vector. This should never be called.
   abort();
 }
 
-static ld_plugin_get_input_file get_input_file = nullptr;
 static ld_plugin_release_input_file release_input_file = nullptr;
+static ld_plugin_get_input_file get_input_file = nullptr;
+static ld_plugin_message message = discard_message;
+
+namespace {
+struct claimed_file {
+  void *handle;
+  std::vector<ld_plugin_symbol> syms;
+};
+
+/// RAII wrapper to manage opening and releasing of a ld_plugin_input_file.
+struct PluginInputFile {
+  void *Handle;
+  ld_plugin_input_file File;
+
+  PluginInputFile(void *Handle) : Handle(Handle) {
+    if (get_input_file(Handle, &File) != LDPS_OK)
+      message(LDPL_FATAL, "Failed to get file information");
+  }
+  ~PluginInputFile() {
+    if (release_input_file(Handle) != LDPS_OK)
+      message(LDPL_FATAL, "Failed to release file information");
+  }
+  ld_plugin_input_file &file() { return File; }
+};
+
+struct ResolutionInfo {
+  bool IsLinkonceOdr = true;
+  bool UnnamedAddr = true;
+  GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+  bool CommonInternal = false;
+  bool UseCommon = false;
+  unsigned CommonSize = 0;
+  unsigned CommonAlign = 0;
+  claimed_file *CommonFile = nullptr;
+};
+}
+
 static ld_plugin_add_symbols add_symbols = nullptr;
 static ld_plugin_get_symbols get_symbols = nullptr;
 static ld_plugin_add_input_file add_input_file = nullptr;
 static ld_plugin_set_extra_library_path set_extra_library_path = nullptr;
 static ld_plugin_get_view get_view = nullptr;
-static ld_plugin_message message = discard_message;
 static Reloc::Model RelocationModel = Reloc::Default;
 static std::string output_name = "";
 static std::list<claimed_file> Modules;
+static StringMap<ResolutionInfo> ResInfo;
 static std::vector<std::string> Cleanup;
 static llvm::TargetOptions TargetOpts;
 
@@ -92,10 +123,20 @@ namespace options {
   static bool generate_api_file = false;
   static OutputType TheOutputType = OT_NORMAL;
   static unsigned OptLevel = 2;
+  static unsigned Parallelism = 1;
+#ifdef NDEBUG
+  static bool DisableVerify = true;
+#else
+  static bool DisableVerify = false;
+#endif
   static std::string obj_path;
   static std::string extra_library_path;
   static std::string triple;
   static std::string mcpu;
+  // When the thinlto plugin option is specified, only read the function
+  // the information from intermediate files and write a combined
+  // global index for the ThinLTO backends.
+  static bool thinlto = false;
   // Additional options to pass into the code generator.
   // Note: This array will contain all plugin options which are not claimed
   // as plugin exclusive to pass to the code generator.
@@ -103,7 +144,7 @@ namespace options {
   // use only and will not be passed.
   static std::vector<const char *> extra;
 
-  static void process_plugin_option(const char* opt_)
+  static void process_plugin_option(const char *opt_)
   {
     if (opt_ == nullptr)
       return;
@@ -125,10 +166,17 @@ namespace options {
       TheOutputType = OT_SAVE_TEMPS;
     } else if (opt == "disable-output") {
       TheOutputType = OT_DISABLE;
+    } else if (opt == "thinlto") {
+      thinlto = true;
     } else if (opt.size() == 2 && opt[0] == 'O') {
       if (opt[1] < '0' || opt[1] > '3')
-        report_fatal_error("Optimization level must be between 0 and 3");
+        message(LDPL_FATAL, "Optimization level must be between 0 and 3");
       OptLevel = opt[1] - '0';
+    } else if (opt.startswith("jobs=")) {
+      if (StringRef(opt_ + 5).getAsInteger(10, Parallelism))
+        message(LDPL_FATAL, "Invalid parallelism level: %s", opt_ + 5);
+    } else if (opt == "disable-verify") {
+      DisableVerify = true;
     } else {
       // Save this option to pass to the code generator.
       // ParseCommandLineOptions() expects argv[0] to be program name. Lazily
@@ -278,7 +326,7 @@ static bool shouldSkip(uint32_t Symflags) {
   return false;
 }
 
-static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
+static void diagnosticHandler(const DiagnosticInfo &DI) {
   if (const auto *BDI = dyn_cast<BitcodeDiagnosticInfo>(&DI)) {
     std::error_code EC = BDI->getError();
     if (EC == BitcodeError::InvalidBitcodeSignature)
@@ -308,6 +356,23 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
   message(Level, "LLVM gold plugin: %s",  ErrStorage.c_str());
 }
 
+static void diagnosticHandlerForContext(const DiagnosticInfo &DI,
+                                        void *Context) {
+  diagnosticHandler(DI);
+}
+
+static GlobalValue::VisibilityTypes
+getMinVisibility(GlobalValue::VisibilityTypes A,
+                 GlobalValue::VisibilityTypes B) {
+  if (A == GlobalValue::HiddenVisibility)
+    return A;
+  if (B == GlobalValue::HiddenVisibility)
+    return B;
+  if (A == GlobalValue::ProtectedVisibility)
+    return A;
+  return B;
+}
+
 /// Called by gold to see whether this file is one that our plugin can handle.
 /// We'll try to open it and register all the symbols with add_symbol if
 /// possible.
@@ -322,7 +387,8 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
       message(LDPL_ERROR, "Failed to get a view of %s", file->name);
       return LDPS_ERR;
     }
-    BufferRef = MemoryBufferRef(StringRef((const char *)view, file->filesize), "");
+    BufferRef =
+        MemoryBufferRef(StringRef((const char *)view, file->filesize), "");
   } else {
     int64_t offset = 0;
     // Gold has found what might be IR part-way inside of a file, such as
@@ -341,7 +407,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
     BufferRef = Buffer->getMemBufferRef();
   }
 
-  Context.setDiagnosticHandler(diagnosticHandler);
+  Context.setDiagnosticHandler(diagnosticHandlerForContext);
   ErrorOr<std::unique_ptr<object::IRObjectFile>> ObjOrErr =
       object::IRObjectFile::create(BufferRef, Context);
   std::error_code EC = ObjOrErr.getError();
@@ -363,6 +429,11 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
 
   cf.handle = file->handle;
 
+  // If we are doing ThinLTO compilation, don't need to process the symbols.
+  // Later we simply build a combined index file after all files are claimed.
+  if (options::thinlto)
+    return LDPS_OK;
+
   for (auto &Sym : Obj->symbols()) {
     uint32_t Symflags = Sym.getFlags();
     if (shouldSkip(Symflags))
@@ -381,8 +452,22 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
 
     const GlobalValue *GV = Obj->getSymbolGV(Sym.getRawDataRefImpl());
 
+    ResolutionInfo &Res = ResInfo[sym.name];
+
     sym.visibility = LDPV_DEFAULT;
     if (GV) {
+      Res.UnnamedAddr &= GV->hasUnnamedAddr();
+      Res.IsLinkonceOdr &= GV->hasLinkOnceLinkage();
+      if (GV->hasCommonLinkage()) {
+        Res.CommonAlign = std::max(Res.CommonAlign, GV->getAlignment());
+        const DataLayout &DL = GV->getParent()->getDataLayout();
+        uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+        if (Size >= Res.CommonSize) {
+          Res.CommonSize = Size;
+          Res.CommonFile = &cf;
+        }
+      }
+      Res.Visibility = getMinVisibility(Res.Visibility, GV->getVisibility());
       switch (GV->getVisibility()) {
       case GlobalValue::DefaultVisibility:
         sym.visibility = LDPV_DEFAULT;
@@ -421,15 +506,13 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
       const Comdat *C = Base->getComdat();
       if (C)
         sym.comdat_key = strdup(C->getName().str().c_str());
-      else if (Base->hasWeakLinkage() || Base->hasLinkOnceLinkage())
-        sym.comdat_key = strdup(sym.name);
     }
 
     sym.resolution = LDPR_UNKNOWN;
   }
 
   if (!cf.syms.empty()) {
-    if (add_symbols(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
+    if (add_symbols(cf.handle, cf.syms.size(), cf.syms.data()) != LDPS_OK) {
       message(LDPL_ERROR, "Unable to add symbols!");
       return LDPS_ERR;
     }
@@ -438,27 +521,6 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
   return LDPS_OK;
 }
 
-static void keepGlobalValue(GlobalValue &GV,
-                            std::vector<GlobalAlias *> &KeptAliases) {
-  assert(!GV.hasLocalLinkage());
-
-  if (auto *GA = dyn_cast<GlobalAlias>(&GV))
-    KeptAliases.push_back(GA);
-
-  switch (GV.getLinkage()) {
-  default:
-    break;
-  case GlobalValue::LinkOnceAnyLinkage:
-    GV.setLinkage(GlobalValue::WeakAnyLinkage);
-    break;
-  case GlobalValue::LinkOnceODRLinkage:
-    GV.setLinkage(GlobalValue::WeakODRLinkage);
-    break;
-  }
-
-  assert(!GV.isDiscardableIfUnused());
-}
-
 static void internalize(GlobalValue &GV) {
   if (GV.isDeclarationForLinker())
     return; // We get here if there is a matching asm definition.
@@ -466,33 +528,6 @@ static void internalize(GlobalValue &GV) {
     GV.setLinkage(GlobalValue::InternalLinkage);
 }
 
-static void drop(GlobalValue &GV) {
-  if (auto *F = dyn_cast<Function>(&GV)) {
-    F->deleteBody();
-    F->setComdat(nullptr); // Should deleteBody do this?
-    return;
-  }
-
-  if (auto *Var = dyn_cast<GlobalVariable>(&GV)) {
-    Var->setInitializer(nullptr);
-    Var->setLinkage(
-        GlobalValue::ExternalLinkage); // Should setInitializer do this?
-    Var->setComdat(nullptr); // and this?
-    return;
-  }
-
-  auto &Alias = cast<GlobalAlias>(GV);
-  Module &M = *Alias.getParent();
-  PointerType &Ty = *cast<PointerType>(Alias.getType());
-  GlobalValue::LinkageTypes L = Alias.getLinkage();
-  auto *Var =
-      new GlobalVariable(M, Ty.getElementType(), /*isConstant*/ false, L,
-                         /*Initializer*/ nullptr);
-  Var->takeName(&Alias);
-  Alias.replaceAllUsesWith(Var);
-  Alias.eraseFromParent();
-}
-
 static const char *getResolutionName(ld_plugin_symbol_resolution R) {
   switch (R) {
   case LDPR_UNKNOWN:
@@ -519,58 +554,6 @@ static const char *getResolutionName(ld_plugin_symbol_resolution R) {
   llvm_unreachable("Unknown resolution");
 }
 
-namespace {
-class LocalValueMaterializer : public ValueMaterializer {
-  DenseSet<GlobalValue *> &Dropped;
-  DenseMap<GlobalObject *, GlobalObject *> LocalVersions;
-
-public:
-  LocalValueMaterializer(DenseSet<GlobalValue *> &Dropped) : Dropped(Dropped) {}
-  Value *materializeValueFor(Value *V) override;
-};
-}
-
-Value *LocalValueMaterializer::materializeValueFor(Value *V) {
-  auto *GO = dyn_cast<GlobalObject>(V);
-  if (!GO)
-    return nullptr;
-
-  auto I = LocalVersions.find(GO);
-  if (I != LocalVersions.end())
-    return I->second;
-
-  if (!Dropped.count(GO))
-    return nullptr;
-
-  Module &M = *GO->getParent();
-  GlobalValue::LinkageTypes L = GO->getLinkage();
-  GlobalObject *Declaration;
-  if (auto *F = dyn_cast<Function>(GO)) {
-    Declaration = Function::Create(F->getFunctionType(), L, "", &M);
-  } else {
-    auto *Var = cast<GlobalVariable>(GO);
-    Declaration = new GlobalVariable(M, Var->getType()->getElementType(),
-                                     Var->isConstant(), L,
-                                     /*Initializer*/ nullptr);
-  }
-  Declaration->takeName(GO);
-  Declaration->copyAttributesFrom(GO);
-
-  GO->setLinkage(GlobalValue::InternalLinkage);
-  GO->setName(Declaration->getName());
-  Dropped.erase(GO);
-  GO->replaceAllUsesWith(Declaration);
-
-  LocalVersions[Declaration] = GO;
-
-  return GO;
-}
-
-static Constant *mapConstantToLocalCopy(Constant *C, ValueToValueMapTy &VM,
-                                        LocalValueMaterializer *Materializer) {
-  return MapValue(C, VM, RF_IgnoreMissingEntries, nullptr, Materializer);
-}
-
 static void freeSymName(ld_plugin_symbol &Sym) {
   free(Sym.name);
   free(Sym.comdat_key);
@@ -578,12 +561,44 @@ static void freeSymName(ld_plugin_symbol &Sym) {
   Sym.comdat_key = nullptr;
 }
 
+static std::unique_ptr<FunctionInfoIndex>
+getFunctionIndexForFile(claimed_file &F, ld_plugin_input_file &Info) {
+
+  if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK)
+    message(LDPL_FATAL, "Failed to get symbol information");
+
+  const void *View;
+  if (get_view(F.handle, &View) != LDPS_OK)
+    message(LDPL_FATAL, "Failed to get a view of file");
+
+  MemoryBufferRef BufferRef(StringRef((const char *)View, Info.filesize),
+                            Info.name);
+
+  // Don't bother trying to build an index if there is no summary information
+  // in this bitcode file.
+  if (!object::FunctionIndexObjectFile::hasFunctionSummaryInMemBuffer(
+          BufferRef, diagnosticHandler))
+    return std::unique_ptr<FunctionInfoIndex>(nullptr);
+
+  ErrorOr<std::unique_ptr<object::FunctionIndexObjectFile>> ObjOrErr =
+      object::FunctionIndexObjectFile::create(BufferRef, diagnosticHandler);
+
+  if (std::error_code EC = ObjOrErr.getError())
+    message(LDPL_FATAL, "Could not read function index bitcode from file : %s",
+            EC.message().c_str());
+
+  object::FunctionIndexObjectFile &Obj = **ObjOrErr;
+
+  return Obj.takeIndex();
+}
+
 static std::unique_ptr<Module>
 getModuleForFile(LLVMContext &Context, claimed_file &F,
                  ld_plugin_input_file &Info, raw_fd_ostream *ApiFile,
-                 StringSet<> &Internalize, StringSet<> &Maybe) {
+                 StringSet<> &Internalize, StringSet<> &Maybe,
+                 std::vector<GlobalValue *> &Keep) {
 
-  if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK)
+  if (get_symbols(F.handle, F.syms.size(), F.syms.data()) != LDPS_OK)
     message(LDPL_FATAL, "Failed to get symbol information");
 
   const void *View;
@@ -609,11 +624,12 @@ getModuleForFile(LLVMContext &Context, claimed_file &F,
   SmallPtrSet<GlobalValue *, 8> Used;
   collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
 
-  DenseSet<GlobalValue *> Drop;
-  std::vector<GlobalAlias *> KeptAliases;
-
   unsigned SymNum = 0;
   for (auto &ObjSym : Obj.symbols()) {
+    GlobalValue *GV = Obj.getSymbolGV(ObjSym.getRawDataRefImpl());
+    if (GV && GV->hasAppendingLinkage())
+      Keep.push_back(GV);
+
     if (shouldSkip(ObjSym.getFlags()))
       continue;
     ld_plugin_symbol &Sym = F.syms[SymNum];
@@ -625,20 +641,37 @@ getModuleForFile(LLVMContext &Context, claimed_file &F,
     if (options::generate_api_file)
       *ApiFile << Sym.name << ' ' << getResolutionName(Resolution) << '\n';
 
-    GlobalValue *GV = Obj.getSymbolGV(ObjSym.getRawDataRefImpl());
     if (!GV) {
       freeSymName(Sym);
       continue; // Asm symbol.
     }
 
-    if (Resolution != LDPR_PREVAILING_DEF_IRONLY && GV->hasCommonLinkage()) {
-      // Common linkage is special. There is no single symbol that wins the
-      // resolution. Instead we have to collect the maximum alignment and size.
-      // The IR linker does that for us if we just pass it every common GV.
-      // We still have to keep track of LDPR_PREVAILING_DEF_IRONLY so we
-      // internalize once the IR linker has done its job.
-      freeSymName(Sym);
-      continue;
+    ResolutionInfo &Res = ResInfo[Sym.name];
+    if (Resolution == LDPR_PREVAILING_DEF_IRONLY_EXP && !Res.IsLinkonceOdr)
+      Resolution = LDPR_PREVAILING_DEF;
+
+    GV->setUnnamedAddr(Res.UnnamedAddr);
+    GV->setVisibility(Res.Visibility);
+
+    // Override gold's resolution for common symbols. We want the largest
+    // one to win.
+    if (GV->hasCommonLinkage()) {
+      cast<GlobalVariable>(GV)->setAlignment(Res.CommonAlign);
+      if (Resolution == LDPR_PREVAILING_DEF_IRONLY)
+        Res.CommonInternal = true;
+
+      if (Resolution == LDPR_PREVAILING_DEF_IRONLY ||
+          Resolution == LDPR_PREVAILING_DEF)
+        Res.UseCommon = true;
+
+      if (Res.CommonFile == &F && Res.UseCommon) {
+        if (Res.CommonInternal)
+          Resolution = LDPR_PREVAILING_DEF_IRONLY;
+        else
+          Resolution = LDPR_PREVAILING_DEF;
+      } else {
+        Resolution = LDPR_PREEMPTED_IR;
+      }
     }
 
     switch (Resolution) {
@@ -648,40 +681,37 @@ getModuleForFile(LLVMContext &Context, claimed_file &F,
     case LDPR_RESOLVED_IR:
     case LDPR_RESOLVED_EXEC:
     case LDPR_RESOLVED_DYN:
-      assert(GV->isDeclarationForLinker());
+    case LDPR_PREEMPTED_IR:
+    case LDPR_PREEMPTED_REG:
       break;
 
     case LDPR_UNDEF:
-      if (!GV->isDeclarationForLinker()) {
+      if (!GV->isDeclarationForLinker())
         assert(GV->hasComdat());
-        Drop.insert(GV);
-      }
       break;
 
     case LDPR_PREVAILING_DEF_IRONLY: {
-      keepGlobalValue(*GV, KeptAliases);
-      if (!Used.count(GV)) {
-        // Since we use the regular lib/Linker, we cannot just internalize GV
-        // now or it will not be copied to the merged module. Instead we force
-        // it to be copied and then internalize it.
+      Keep.push_back(GV);
+      // The IR linker has to be able to map this value to a declaration,
+      // so we can only internalize after linking.
+      if (!Used.count(GV))
         Internalize.insert(GV->getName());
-      }
       break;
     }
 
     case LDPR_PREVAILING_DEF:
-      keepGlobalValue(*GV, KeptAliases);
-      break;
-
-    case LDPR_PREEMPTED_IR:
-      // Gold might have selected a linkonce_odr and preempted a weak_odr.
-      // In that case we have to make sure we don't end up internalizing it.
-      if (!GV->isDiscardableIfUnused())
-        Maybe.erase(GV->getName());
-
-      // fall-through
-    case LDPR_PREEMPTED_REG:
-      Drop.insert(GV);
+      Keep.push_back(GV);
+      // There is a non IR use, so we have to force optimizations to keep this.
+      switch (GV->getLinkage()) {
+      default:
+        break;
+      case GlobalValue::LinkOnceAnyLinkage:
+        GV->setLinkage(GlobalValue::WeakAnyLinkage);
+        break;
+      case GlobalValue::LinkOnceODRLinkage:
+        GV->setLinkage(GlobalValue::WeakODRLinkage);
+        break;
+      }
       break;
 
     case LDPR_PREVAILING_DEF_IRONLY_EXP: {
@@ -689,9 +719,8 @@ getModuleForFile(LLVMContext &Context, claimed_file &F,
       // reason is that this GV might have a copy in another module
       // and in that module the address might be significant, but that
       // copy will be LDPR_PREEMPTED_IR.
-      if (GV->hasLinkOnceODRLinkage())
-        Maybe.insert(GV->getName());
-      keepGlobalValue(*GV, KeptAliases);
+      Maybe.insert(GV->getName());
+      Keep.push_back(GV);
       break;
     }
     }
@@ -699,25 +728,11 @@ getModuleForFile(LLVMContext &Context, claimed_file &F,
     freeSymName(Sym);
   }
 
-  ValueToValueMapTy VM;
-  LocalValueMaterializer Materializer(Drop);
-  for (GlobalAlias *GA : KeptAliases) {
-    // Gold told us to keep GA. It is possible that a GV usied in the aliasee
-    // expression is being dropped. If that is the case, that GV must be copied.
-    Constant *Aliasee = GA->getAliasee();
-    Constant *Replacement = mapConstantToLocalCopy(Aliasee, VM, &Materializer);
-    GA->setAliasee(Replacement);
-  }
-
-  for (auto *GV : Drop)
-    drop(*GV);
-
   return Obj.takeModule();
 }
 
 static void runLTOPasses(Module &M, TargetMachine &TM) {
-  if (const DataLayout *DL = TM.getDataLayout())
-    M.setDataLayout(*DL);
+  M.setDataLayout(TM.createDataLayout());
 
   legacy::PassManager passes;
   passes.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
@@ -725,8 +740,10 @@ static void runLTOPasses(Module &M, TargetMachine &TM) {
   PassManagerBuilder PMB;
   PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM.getTargetTriple()));
   PMB.Inliner = createFunctionInliningPass();
+  // Unconditionally verify input since it is not verified before this
+  // point and has unknown origin.
   PMB.VerifyInput = true;
-  PMB.VerifyOutput = true;
+  PMB.VerifyOutput = !options::DisableVerify;
   PMB.LoopVectorize = true;
   PMB.SLPVectorize = true;
   PMB.OptLevel = options::OptLevel;
@@ -739,11 +756,11 @@ static void saveBCFile(StringRef Path, Module &M) {
   raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
   if (EC)
     message(LDPL_FATAL, "Failed to write the output file.");
-  WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true);
+  WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ false);
 }
 
-static void codegen(Module &M) {
-  const std::string &TripleStr = M.getTargetTriple();
+static void codegen(std::unique_ptr<Module> M) {
+  const std::string &TripleStr = M->getTargetTriple();
   Triple TheTriple(TripleStr);
 
   std::string ErrMsg;
@@ -779,12 +796,10 @@ static void codegen(Module &M) {
       TripleStr, options::mcpu, Features.getString(), Options, RelocationModel,
       CodeModel::Default, CGOptLevel));
 
-  runLTOPasses(M, *TM);
+  runLTOPasses(*M, *TM);
 
   if (options::TheOutputType == options::OT_SAVE_TEMPS)
-    saveBCFile(output_name + ".opt.bc", M);
-
-  legacy::PassManager CodeGenPasses;
+    saveBCFile(output_name + ".opt.bc", *M);
 
   SmallString<128> Filename;
   if (!options::obj_path.empty())
@@ -792,37 +807,47 @@ static void codegen(Module &M) {
   else if (options::TheOutputType == options::OT_SAVE_TEMPS)
     Filename = output_name + ".o";
 
-  int FD;
+  std::vector<SmallString<128>> Filenames(options::Parallelism);
   bool TempOutFile = Filename.empty();
-  if (TempOutFile) {
-    std::error_code EC =
-        sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename);
-    if (EC)
-      message(LDPL_FATAL, "Could not create temporary file: %s",
-              EC.message().c_str());
-  } else {
-    std::error_code EC =
-        sys::fs::openFileForWrite(Filename.c_str(), FD, sys::fs::F_None);
-    if (EC)
-      message(LDPL_FATAL, "Could not open file: %s", EC.message().c_str());
-  }
-
   {
-    raw_fd_ostream OS(FD, true);
+    // Open a file descriptor for each backend thread. This is done in a block
+    // so that the output file descriptors are closed before gold opens them.
+    std::list<llvm::raw_fd_ostream> OSs;
+    std::vector<llvm::raw_pwrite_stream *> OSPtrs(options::Parallelism);
+    for (unsigned I = 0; I != options::Parallelism; ++I) {
+      int FD;
+      if (TempOutFile) {
+        std::error_code EC =
+            sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filenames[I]);
+        if (EC)
+          message(LDPL_FATAL, "Could not create temporary file: %s",
+                  EC.message().c_str());
+      } else {
+        Filenames[I] = Filename;
+        if (options::Parallelism != 1)
+          Filenames[I] += utostr(I);
+        std::error_code EC =
+            sys::fs::openFileForWrite(Filenames[I], FD, sys::fs::F_None);
+        if (EC)
+          message(LDPL_FATAL, "Could not open file: %s", EC.message().c_str());
+      }
+      OSs.emplace_back(FD, true);
+      OSPtrs[I] = &OSs.back();
+    }
 
-    if (TM->addPassesToEmitFile(CodeGenPasses, OS,
-                                TargetMachine::CGFT_ObjectFile))
-      message(LDPL_FATAL, "Failed to setup codegen");
-    CodeGenPasses.run(M);
+    // Run backend threads.
+    splitCodeGen(std::move(M), OSPtrs, options::mcpu, Features.getString(),
+                 Options, RelocationModel, CodeModel::Default, CGOptLevel);
   }
 
-  if (add_input_file(Filename.c_str()) != LDPS_OK)
-    message(LDPL_FATAL,
-            "Unable to add .o file to the link. File left behind in: %s",
-            Filename.c_str());
-
-  if (TempOutFile)
-    Cleanup.push_back(Filename.c_str());
+  for (auto &Filename : Filenames) {
+    if (add_input_file(Filename.c_str()) != LDPS_OK)
+      message(LDPL_FATAL,
+              "Unable to add .o file to the link. File left behind in: %s",
+              Filename.c_str());
+    if (TempOutFile)
+      Cleanup.push_back(Filename.c_str());
+  }
 }
 
 /// gold informs us that all symbols have been read. At this point, we use
@@ -832,32 +857,58 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
   if (Modules.empty())
     return LDPS_OK;
 
+  // If we are doing ThinLTO compilation, simply build the combined
+  // function index/summary and emit it. We don't need to parse the modules
+  // and link them in this case.
+  if (options::thinlto) {
+    FunctionInfoIndex CombinedIndex;
+    uint64_t NextModuleId = 0;
+    for (claimed_file &F : Modules) {
+      PluginInputFile InputFile(F.handle);
+
+      std::unique_ptr<FunctionInfoIndex> Index =
+          getFunctionIndexForFile(F, InputFile.file());
+
+      // Skip files without a function summary.
+      if (Index)
+        CombinedIndex.mergeFrom(std::move(Index), ++NextModuleId);
+    }
+
+    std::error_code EC;
+    raw_fd_ostream OS(output_name + ".thinlto.bc", EC,
+                      sys::fs::OpenFlags::F_None);
+    if (EC)
+      message(LDPL_FATAL, "Unable to open %s.thinlto.bc for writing: %s",
+              output_name.data(), EC.message().c_str());
+    WriteFunctionSummaryToFile(CombinedIndex, OS);
+    OS.close();
+
+    cleanup_hook();
+    exit(0);
+  }
+
   LLVMContext Context;
-  Context.setDiagnosticHandler(diagnosticHandler, nullptr, true);
+  Context.setDiagnosticHandler(diagnosticHandlerForContext, nullptr, true);
 
   std::unique_ptr<Module> Combined(new Module("ld-temp.o", Context));
-  Linker L(Combined.get());
+  IRMover L(*Combined);
 
   std::string DefaultTriple = sys::getDefaultTargetTriple();
 
   StringSet<> Internalize;
   StringSet<> Maybe;
   for (claimed_file &F : Modules) {
-    ld_plugin_input_file File;
-    if (get_input_file(F.handle, &File) != LDPS_OK)
-      message(LDPL_FATAL, "Failed to get file information");
-    std::unique_ptr<Module> M =
-        getModuleForFile(Context, F, File, ApiFile, Internalize, Maybe);
+    PluginInputFile InputFile(F.handle);
+    std::vector<GlobalValue *> Keep;
+    std::unique_ptr<Module> M = getModuleForFile(
+        Context, F, InputFile.file(), ApiFile, Internalize, Maybe, Keep);
     if (!options::triple.empty())
       M->setTargetTriple(options::triple.c_str());
-    else if (M->getTargetTriple().empty()) {
+    else if (M->getTargetTriple().empty())
       M->setTargetTriple(DefaultTriple);
-    }
 
-    if (L.linkInModule(M.get()))
+    if (L.move(*M, Keep, [](GlobalValue &, IRMover::ValueAdder) {}))
       message(LDPL_FATAL, "Failed to link module");
-    if (release_input_file(F.handle) != LDPS_OK)
-      message(LDPL_FATAL, "Failed to release file information");
   }
 
   for (const auto &Name : Internalize) {
@@ -884,12 +935,12 @@ static ld_plugin_status allSymbolsReadHook(raw_fd_ostream *ApiFile) {
       path = output_name;
     else
       path = output_name + ".bc";
-    saveBCFile(path, *L.getModule());
+    saveBCFile(path, *Combined);
     if (options::TheOutputType == options::OT_BC_ONLY)
       return LDPS_OK;
   }
 
-  codegen(*L.getModule());
+  codegen(std::move(Combined));
 
   if (!options::extra_library_path.empty() &&
       set_extra_library_path(options::extra_library_path.c_str()) != LDPS_OK)
diff --git a/tools/llc/CMakeLists.txt b/tools/llc/CMakeLists.txt
index ff5a89e1da44..136cf4be1c12 100644
--- a/tools/llc/CMakeLists.txt
+++ b/tools/llc/CMakeLists.txt
@@ -11,6 +11,7 @@ set(LLVM_LINK_COMPONENTS
   SelectionDAG
   Support
   Target
+  TransformUtils
   )
 
 # Support plugins.
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 38660cf27a46..c1f5cebea859 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llc
 parent = Tools
-required_libraries = AsmParser BitReader IRReader MIRParser all-targets
+required_libraries = AsmParser BitReader IRReader MIRParser TransformUtils all-targets
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index ae64c9a5b57c..cd34c80d840a 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llc
-LINK_COMPONENTS := all-targets bitreader asmparser irreader mirparser
+LINK_COMPONENTS := all-targets bitreader asmparser irreader mirparser transformutils
 
 # Support plugins.
 NO_DEAD_STRIP := 1
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index e33cd795d3ae..bffa39fd9e5c 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include <memory>
 using namespace llvm;
 
@@ -96,6 +97,12 @@ static cl::opt<bool> AsmVerbose("asm-verbose",
                                 cl::desc("Add comments to directives."),
                                 cl::init(true));
 
+static cl::opt<bool>
+    CompileTwice("compile-twice", cl::Hidden,
+                 cl::desc("Run everything twice, re-using the same pass "
+                          "manager and verify the result is the same."),
+                 cl::init(false));
+
 static int compileModule(char **, LLVMContext &);
 
 static std::unique_ptr<tool_output_file>
@@ -312,8 +319,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
   PM.add(new TargetLibraryInfoWrapperPass(TLII));
 
   // Add the target data from the target machine, if it exists, or the module.
-  if (const DataLayout *DL = Target->getDataLayout())
-    M->setDataLayout(*DL);
+  M->setDataLayout(Target->createDataLayout());
 
   // Override function attributes based on CPUStr, FeaturesStr, and command line
   // flags.
@@ -326,10 +332,15 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   {
     raw_pwrite_stream *OS = &Out->os();
-    std::unique_ptr<buffer_ostream> BOS;
-    if (FileType != TargetMachine::CGFT_AssemblyFile &&
-        !Out->os().supportsSeeking()) {
-      BOS = make_unique<buffer_ostream>(*OS);
+
+    // Manually do the buffering rather than using buffer_ostream,
+    // so we can memcmp the contents in CompileTwice mode
+    SmallVector<char, 0> Buffer;
+    std::unique_ptr<raw_svector_ostream> BOS;
+    if ((FileType != TargetMachine::CGFT_AssemblyFile &&
+         !Out->os().supportsSeeking()) ||
+        CompileTwice) {
+      BOS = make_unique<raw_svector_ostream>(Buffer);
       OS = BOS.get();
     }
 
@@ -379,7 +390,39 @@ static int compileModule(char **argv, LLVMContext &Context) {
     // Before executing passes, print the final values of the LLVM options.
     cl::PrintOptionValues();
 
+    // If requested, run the pass manager over the same module again,
+    // to catch any bugs due to persistent state in the passes. Note that
+    // opt has the same functionality, so it may be worth abstracting this out
+    // in the future.
+    SmallVector<char, 0> CompileTwiceBuffer;
+    if (CompileTwice) {
+      std::unique_ptr<Module> M2(llvm::CloneModule(M.get()));
+      PM.run(*M2);
+      CompileTwiceBuffer = Buffer;
+      Buffer.clear();
+    }
+
     PM.run(*M);
+
+    // Compare the two outputs and make sure they're the same
+    if (CompileTwice) {
+      if (Buffer.size() != CompileTwiceBuffer.size() ||
+          (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
+           0)) {
+        errs()
+            << "Running the pass manager twice changed the output.\n"
+               "Writing the result of the second run to the specified output\n"
+               "To generate the one-run comparison binary, just run without\n"
+               "the compile-twice option\n";
+        Out->os() << Buffer;
+        Out->keep();
+        return 1;
+      }
+    }
+
+    if (BOS) {
+      Out->os() << Buffer;
+    }
   }
 
   // Declare success.
diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp
index ae276e6cda8f..4235145ee7a5 100644
--- a/tools/lli/OrcLazyJIT.cpp
+++ b/tools/lli/OrcLazyJIT.cpp
@@ -38,26 +38,39 @@ namespace {
                                              "Dump modules to the current "
                                              "working directory. (WARNING: "
                                              "will overwrite existing files)."),
-                                  clEnumValEnd));
+                                  clEnumValEnd),
+                                cl::Hidden);
+
+  cl::opt<bool> OrcInlineStubs("orc-lazy-inline-stubs",
+                               cl::desc("Try to inline stubs"),
+                               cl::init(true), cl::Hidden);
 }
 
-OrcLazyJIT::CallbackManagerBuilder
-OrcLazyJIT::createCallbackManagerBuilder(Triple T) {
+std::unique_ptr<OrcLazyJIT::CompileCallbackMgr>
+OrcLazyJIT::createCompileCallbackMgr(Triple T) {
   switch (T.getArch()) {
     default: return nullptr;
 
     case Triple::x86_64: {
-      typedef orc::JITCompileCallbackManager<IRDumpLayerT,
-                                             orc::OrcX86_64> CCMgrT;
-      return [](IRDumpLayerT &IRDumpLayer, RuntimeDyld::MemoryManager &MemMgr,
-                LLVMContext &Context) {
-               return llvm::make_unique<CCMgrT>(IRDumpLayer, MemMgr, Context, 0,
-                                                64);
-             };
+      typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
+      return llvm::make_unique<CCMgrT>(0);
     }
   }
 }
 
+OrcLazyJIT::IndirectStubsManagerBuilder
+OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) {
+  switch (T.getArch()) {
+    default: return nullptr;
+
+    case Triple::x86_64:
+      return [](){
+        return llvm::make_unique<
+                       orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
+      };
+  }
+}
+
 OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
 
   switch (OrcDumpKind) {
@@ -111,6 +124,12 @@ OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
 // Defined in lli.cpp.
 CodeGenOpt::Level getOptLevel();
 
+
+template <typename PtrTy>
+static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+  return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
+}
+
 int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
   // Add the program's symbols into the JIT's search space.
   if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
@@ -123,20 +142,31 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
   EngineBuilder EB;
   EB.setOptLevel(getOptLevel());
   auto TM = std::unique_ptr<TargetMachine>(EB.selectTarget());
-  auto &Context = getGlobalContext();
-  auto CallbackMgrBuilder =
-    OrcLazyJIT::createCallbackManagerBuilder(Triple(TM->getTargetTriple()));
+  auto CompileCallbackMgr =
+    OrcLazyJIT::createCompileCallbackMgr(Triple(TM->getTargetTriple()));
 
   // If we couldn't build the factory function then there must not be a callback
   // manager for this target. Bail out.
-  if (!CallbackMgrBuilder) {
+  if (!CompileCallbackMgr) {
     errs() << "No callback manager available for target '"
            << TM->getTargetTriple().str() << "'.\n";
     return 1;
   }
 
+  auto IndirectStubsMgrBuilder =
+    OrcLazyJIT::createIndirectStubsMgrBuilder(Triple(TM->getTargetTriple()));
+
+  // If we couldn't build a stubs-manager-builder for this target then bail out.
+  if (!IndirectStubsMgrBuilder) {
+    errs() << "No indirect stubs manager available for target '"
+           << TM->getTargetTriple().str() << "'.\n";
+    return 1;
+  }
+
   // Everything looks good. Build the JIT.
-  OrcLazyJIT J(std::move(TM), Context, CallbackMgrBuilder);
+  OrcLazyJIT J(std::move(TM), std::move(CompileCallbackMgr),
+               std::move(IndirectStubsMgrBuilder),
+               OrcInlineStubs);
 
   // Add the module, look up main and run it.
   auto MainHandle = J.addModule(std::move(M));
@@ -148,6 +178,6 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
   }
 
   typedef int (*MainFnPtr)(int, char*[]);
-  auto Main = OrcLazyJIT::fromTargetAddress<MainFnPtr>(MainSym.getAddress());
+  auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
   return Main(ArgC, ArgV);
 }
diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h
index fe86adbf9516..bb4da33ea9b6 100644
--- a/tools/lli/OrcLazyJIT.h
+++ b/tools/lli/OrcLazyJIT.h
@@ -23,39 +23,36 @@
 #include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/IR/LLVMContext.h"
 
 namespace llvm {
 
 class OrcLazyJIT {
 public:
 
-  typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr;
+  typedef orc::JITCompileCallbackManager CompileCallbackMgr;
   typedef orc::ObjectLinkingLayer<> ObjLayerT;
   typedef orc::IRCompileLayer<ObjLayerT> CompileLayerT;
   typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
     TransformFtor;
   typedef orc::IRTransformLayer<CompileLayerT, TransformFtor> IRDumpLayerT;
   typedef orc::CompileOnDemandLayer<IRDumpLayerT, CompileCallbackMgr> CODLayerT;
+  typedef CODLayerT::IndirectStubsManagerBuilderT
+    IndirectStubsManagerBuilder;
   typedef CODLayerT::ModuleSetHandleT ModuleHandleT;
 
-  typedef std::function<
-            std::unique_ptr<CompileCallbackMgr>(IRDumpLayerT&,
-                                                RuntimeDyld::MemoryManager&,
-                                                LLVMContext&)>
-    CallbackManagerBuilder;
-
-  static CallbackManagerBuilder createCallbackManagerBuilder(Triple T);
-
-  OrcLazyJIT(std::unique_ptr<TargetMachine> TM, LLVMContext &Context,
-             CallbackManagerBuilder &BuildCallbackMgr)
-    : TM(std::move(TM)),
-      ObjectLayer(),
-      CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
-      IRDumpLayer(CompileLayer, createDebugDumper()),
-      CCMgr(BuildCallbackMgr(IRDumpLayer, CCMgrMemMgr, Context)),
-      CODLayer(IRDumpLayer, *CCMgr, false),
-      CXXRuntimeOverrides([this](const std::string &S) { return mangle(S); }) {}
+  OrcLazyJIT(std::unique_ptr<TargetMachine> TM,
+             std::unique_ptr<CompileCallbackMgr> CCMgr,
+             IndirectStubsManagerBuilder IndirectStubsMgrBuilder,
+             bool InlineStubs)
+      : TM(std::move(TM)), DL(this->TM->createDataLayout()),
+	CCMgr(std::move(CCMgr)),
+	ObjectLayer(),
+        CompileLayer(ObjectLayer, orc::SimpleCompiler(*this->TM)),
+        IRDumpLayer(CompileLayer, createDebugDumper()),
+        CODLayer(IRDumpLayer, extractSingleFunction, *this->CCMgr,
+                 std::move(IndirectStubsMgrBuilder), InlineStubs),
+        CXXRuntimeOverrides(
+            [this](const std::string &S) { return mangle(S); }) {}
 
   ~OrcLazyJIT() {
     // Run any destructors registered with __cxa_atexit.
@@ -65,15 +62,13 @@ public:
       DtorRunner.runViaLayer(CODLayer);
   }
 
-  template <typename PtrTy>
-  static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
-    return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
-  }
+  static std::unique_ptr<CompileCallbackMgr> createCompileCallbackMgr(Triple T);
+  static IndirectStubsManagerBuilder createIndirectStubsMgrBuilder(Triple T);
 
   ModuleHandleT addModule(std::unique_ptr<Module> M) {
     // Attach a data-layout if one isn't already present.
     if (M->getDataLayout().isDefault())
-      M->setDataLayout(*TM->getDataLayout());
+      M->setDataLayout(DL);
 
     // Record the static constructors and destructors. We have to do this before
     // we hand over ownership of the module to the JIT.
@@ -136,20 +131,27 @@ private:
     std::string MangledName;
     {
       raw_string_ostream MangledNameStream(MangledName);
-      Mangler::getNameWithPrefix(MangledNameStream, Name, *TM->getDataLayout());
+      Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
     }
     return MangledName;
   }
 
+  static std::set<Function*> extractSingleFunction(Function &F) {
+    std::set<Function*> Partition;
+    Partition.insert(&F);
+    return Partition;
+  }
+
   static TransformFtor createDebugDumper();
 
   std::unique_ptr<TargetMachine> TM;
+  DataLayout DL;
   SectionMemoryManager CCMgrMemMgr;
 
+  std::unique_ptr<CompileCallbackMgr> CCMgr;
   ObjLayerT ObjectLayer;
   CompileLayerT CompileLayer;
   IRDumpLayerT IRDumpLayer;
-  std::unique_ptr<CompileCallbackMgr> CCMgr;
   CODLayerT CODLayer;
 
   orc::LocalCXXRuntimeOverrides CXXRuntimeOverrides;
diff --git a/tools/lli/RemoteTarget.cpp b/tools/lli/RemoteTarget.cpp
index 850fdc506999..95e1511eaaf5 100644
--- a/tools/lli/RemoteTarget.cpp
+++ b/tools/lli/RemoteTarget.cpp
@@ -1,4 +1,4 @@
-//===- RemoteTarget.cpp - LLVM Remote process JIT execution --------------===//
+//===- RemoteTarget.cpp - LLVM Remote process JIT execution -----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -56,7 +56,7 @@ bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) {
 }
 
 bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) {
-  int (*fn)(void) = (int(*)(void))Address;
+  int (*fn)() = (int(*)())Address;
   RetVal = fn();
   return true;
 }
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 057841f0ad60..9f714060c17a 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -262,8 +262,7 @@ public:
     if (!getCacheFilename(ModuleID, CacheName))
       return;
     if (!CacheDir.empty()) { // Create user-defined cache dir.
-      SmallString<128> dir(CacheName);
-      sys::path::remove_filename(dir);
+      SmallString<128> dir(sys::path::parent_path(CacheName));
       sys::fs::create_directories(Twine(dir));
     }
     std::error_code EC;
@@ -422,7 +421,7 @@ int main(int argc, char **argv, char * const *envp) {
 
   // If not jitting lazily, load the whole bitcode file eagerly too.
   if (NoLazyCompilation) {
-    if (std::error_code EC = Mod->materializeAllPermanently()) {
+    if (std::error_code EC = Mod->materializeAll()) {
       errs() << argv[0] << ": bitcode didn't read correctly.\n";
       errs() << "Reason: " << EC.message() << "\n";
       exit(1);
diff --git a/tools/llvm-ar/CMakeLists.txt b/tools/llvm-ar/CMakeLists.txt
index 05ffe36fa7fe..86233dfce9a5 100644
--- a/tools/llvm-ar/CMakeLists.txt
+++ b/tools/llvm-ar/CMakeLists.txt
@@ -10,30 +10,5 @@ add_llvm_tool(llvm-ar
   llvm-ar.cpp
   )
 
-if(UNIX)
-  set(LLVM_LINK_OR_COPY create_symlink)
-  set(llvm_ar_binary "llvm-ar${CMAKE_EXECUTABLE_SUFFIX}")
-else()
-  set(LLVM_LINK_OR_COPY copy)
-  set(llvm_ar_binary "${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar${CMAKE_EXECUTABLE_SUFFIX}")
-endif()
-
-set(llvm_ranlib "${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib${CMAKE_EXECUTABLE_SUFFIX}")
-
-add_custom_command(OUTPUT ${llvm_ranlib}
-                   COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${llvm_ar_binary}" "${llvm_ranlib}"
-                   DEPENDS llvm-ar)
-
-add_custom_target(llvm-ranlib ALL DEPENDS ${llvm_ranlib})
-set_target_properties(llvm-ranlib PROPERTIES FOLDER Tools)
-
-set(llvm_lib "${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-lib${CMAKE_EXECUTABLE_SUFFIX}")
-
-add_custom_command(OUTPUT ${llvm_lib}
-                   COMMAND ${CMAKE_COMMAND} -E ${LLVM_LINK_OR_COPY} "${llvm_ar_binary}" "${llvm_lib}"
-                   DEPENDS llvm-ar)
-
-add_custom_target(llvm-lib ALL DEPENDS ${llvm_lib})
-set_target_properties(llvm-lib PROPERTIES FOLDER Tools)
-
-install(SCRIPT install_symlink.cmake -DCMAKE_INSTALL_PREFIX=\"${CMAKE_INSTALL_PREFIX}\")
+add_llvm_tool_symlink(llvm-ranlib llvm-ar)
+add_llvm_tool_symlink(llvm-lib llvm-ar)
diff --git a/tools/llvm-ar/install_symlink.cmake b/tools/llvm-ar/install_symlink.cmake
deleted file mode 100644
index d48431b128de..000000000000
--- a/tools/llvm-ar/install_symlink.cmake
+++ /dev/null
@@ -1,31 +0,0 @@
-# We need to execute this script at installation time because the
-# DESTDIR environment variable may be unset at configuration time.
-# See PR8397.
-
-if(UNIX)
-  set(LINK_OR_COPY create_symlink)
-  set(DESTDIR $ENV{DESTDIR})
-else()
-  set(LINK_OR_COPY copy)
-endif()
-
-# CMAKE_EXECUTABLE_SUFFIX is undefined on cmake scripts. See PR9286.
-if( WIN32 )
-  set(EXECUTABLE_SUFFIX ".exe")
-else()
-  set(EXECUTABLE_SUFFIX "")
-endif()
-
-set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/")
-
-message("Creating llvm-ranlib")
-
-execute_process(
-  COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "llvm-ar${EXECUTABLE_SUFFIX}" "llvm-ranlib${EXECUTABLE_SUFFIX}"
-  WORKING_DIRECTORY "${bindir}")
-
-message("Creating llvm-lib")
-
-execute_process(
-  COMMAND "${CMAKE_COMMAND}" -E ${LINK_OR_COPY} "llvm-ar${EXECUTABLE_SUFFIX}" "llvm-lib${EXECUTABLE_SUFFIX}"
-  WORKING_DIRECTORY "${bindir}")
diff --git a/tools/llvm-ar/llvm-ar.cpp b/tools/llvm-ar/llvm-ar.cpp
index 2c9668c63b8c..ef5fab68b947 100644
--- a/tools/llvm-ar/llvm-ar.cpp
+++ b/tools/llvm-ar/llvm-ar.cpp
@@ -81,7 +81,7 @@ static cl::opt<Format>
                          clEnumValN(GNU, "gnu", "gnu"),
                          clEnumValN(BSD, "bsd", "bsd"), clEnumValEnd));
 
-std::string Options;
+static std::string Options;
 
 // Provide additional help output explaining the operations and modifiers of
 // llvm-ar. This object instructs the CommandLine library to print the text of
@@ -130,6 +130,7 @@ static bool OnlyUpdate = false;    ///< 'u' modifier
 static bool Verbose = false;       ///< 'v' modifier
 static bool Symtab = true;         ///< 's' modifier
 static bool Deterministic = true;  ///< 'D' and 'U' modifiers
+static bool Thin = false;          ///< 'T' modifier
 
 // Relative Positional Argument (for insert/move). This variable holds
 // the name of the archive member to which the 'a', 'b' or 'i' modifier
@@ -252,6 +253,9 @@ static ArchiveOperation parseCommandLine() {
     case 'U':
       Deterministic = false;
       break;
+    case 'T':
+      Thin = true;
+      break;
     default:
       cl::PrintHelpMessage();
     }
@@ -308,18 +312,9 @@ static void doPrint(StringRef Name, const object::Archive::Child &C) {
 // Utility function for printing out the file mode when the 't' operation is in
 // verbose mode.
 static void printMode(unsigned mode) {
-  if (mode & 004)
-    outs() << "r";
-  else
-    outs() << "-";
-  if (mode & 002)
-    outs() << "w";
-  else
-    outs() << "-";
-  if (mode & 001)
-    outs() << "x";
-  else
-    outs() << "-";
+  outs() << ((mode & 004) ? "r" : "-");
+  outs() << ((mode & 002) ? "w" : "-");
+  outs() << ((mode & 001) ? "x" : "-");
 }
 
 // Implement the 't' operation. This function prints out just
@@ -334,7 +329,9 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
     printMode(Mode & 007);
     outs() << ' ' << C.getUID();
     outs() << '/' << C.getGID();
-    outs() << ' ' << format("%6llu", C.getSize());
+    ErrorOr<uint64_t> Size = C.getSize();
+    failIfError(Size.getError());
+    outs() << ' ' << format("%6llu", Size.get());
     outs() << ' ' << C.getLastModified().str();
     outs() << ' ';
   }
@@ -393,13 +390,14 @@ static bool shouldCreateArchive(ArchiveOperation Op) {
 
 static void performReadOperation(ArchiveOperation Operation,
                                  object::Archive *OldArchive) {
-  if (Operation == Extract && OldArchive->isThin()) {
-    errs() << "extracting from a thin archive is not supported\n";
-    std::exit(1);
-  }
+  if (Operation == Extract && OldArchive->isThin())
+    fail("extracting from a thin archive is not supported");
 
   bool Filter = !Members.empty();
-  for (const object::Archive::Child &C : OldArchive->children()) {
+  for (auto &ChildOrErr : OldArchive->children()) {
+    failIfError(ChildOrErr.getError());
+    const object::Archive::Child &C = *ChildOrErr;
+
     ErrorOr<StringRef> NameOrErr = C.getName();
     failIfError(NameOrErr.getError());
     StringRef Name = NameOrErr.get();
@@ -432,10 +430,21 @@ static void performReadOperation(ArchiveOperation Operation,
   std::exit(1);
 }
 
-template <typename T>
-void addMember(std::vector<NewArchiveIterator> &Members, T I, StringRef Name,
-               int Pos = -1) {
-  NewArchiveIterator NI(I, Name);
+static void addMember(std::vector<NewArchiveIterator> &Members,
+                      StringRef FileName, int Pos = -1) {
+  NewArchiveIterator NI(FileName);
+  if (Pos == -1)
+    Members.push_back(NI);
+  else
+    Members[Pos] = NI;
+}
+
+static void addMember(std::vector<NewArchiveIterator> &Members,
+                      const object::Archive::Child &M, StringRef Name,
+                      int Pos = -1) {
+  if (Thin && !M.getParent()->isThin())
+    fail("Cannot convert a regular archive to a thin one");
+  NewArchiveIterator NI(M, Name);
   if (Pos == -1)
     Members.push_back(NI);
   else
@@ -451,7 +460,7 @@ enum InsertAction {
 };
 
 static InsertAction computeInsertAction(ArchiveOperation Operation,
-                                        object::Archive::child_iterator I,
+                                        const object::Archive::Child &Member,
                                         StringRef Name,
                                         std::vector<StringRef>::iterator &Pos) {
   if (Operation == QuickAppend || Members.empty())
@@ -485,7 +494,7 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
     // operation.
     sys::fs::file_status Status;
     failIfError(sys::fs::status(*MI, Status), *MI);
-    if (Status.getLastModificationTime() < I->getLastModified()) {
+    if (Status.getLastModificationTime() < Member.getLastModified()) {
       if (PosName.empty())
         return IA_AddOldMember;
       return IA_MoveOldMember;
@@ -508,7 +517,9 @@ computeNewArchiveMembers(ArchiveOperation Operation,
   int InsertPos = -1;
   StringRef PosName = sys::path::filename(RelPos);
   if (OldArchive) {
-    for (auto &Child : OldArchive->children()) {
+    for (auto &ChildOrErr : OldArchive->children()) {
+      failIfError(ChildOrErr.getError());
+      auto &Child = ChildOrErr.get();
       int Pos = Ret.size();
       ErrorOr<StringRef> NameOrErr = Child.getName();
       failIfError(NameOrErr.getError());
@@ -529,7 +540,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
         addMember(Ret, Child, Name);
         break;
       case IA_AddNewMeber:
-        addMember(Ret, *MemberI, Name);
+        addMember(Ret, *MemberI);
         break;
       case IA_Delete:
         break;
@@ -537,7 +548,7 @@ computeNewArchiveMembers(ArchiveOperation Operation,
         addMember(Moved, Child, Name);
         break;
       case IA_MoveNewMember:
-        addMember(Moved, *MemberI, Name);
+        addMember(Moved, *MemberI);
         break;
       }
       if (MemberI != Members.end())
@@ -557,12 +568,10 @@ computeNewArchiveMembers(ArchiveOperation Operation,
   assert(unsigned(InsertPos) <= Ret.size());
   Ret.insert(Ret.begin() + InsertPos, Moved.begin(), Moved.end());
 
-  Ret.insert(Ret.begin() + InsertPos, Members.size(),
-             NewArchiveIterator("", ""));
+  Ret.insert(Ret.begin() + InsertPos, Members.size(), NewArchiveIterator(""));
   int Pos = InsertPos;
   for (auto &Member : Members) {
-    StringRef Name = sys::path::filename(Member);
-    addMember(Ret, Member, Name, Pos);
+    addMember(Ret, Member, Pos);
     ++Pos;
   }
 
@@ -590,15 +599,15 @@ performWriteOperation(ArchiveOperation Operation, object::Archive *OldArchive,
     break;
   }
   if (NewMembersP) {
-    std::pair<StringRef, std::error_code> Result =
-        writeArchive(ArchiveName, *NewMembersP, Symtab, Kind, Deterministic);
+    std::pair<StringRef, std::error_code> Result = writeArchive(
+        ArchiveName, *NewMembersP, Symtab, Kind, Deterministic, Thin);
     failIfError(Result.second, Result.first);
     return;
   }
   std::vector<NewArchiveIterator> NewMembers =
       computeNewArchiveMembers(Operation, OldArchive);
   auto Result =
-      writeArchive(ArchiveName, NewMembers, Symtab, Kind, Deterministic);
+      writeArchive(ArchiveName, NewMembers, Symtab, Kind, Deterministic, Thin);
   failIfError(Result.second, Result.first);
 }
 
@@ -644,20 +653,13 @@ static int performOperation(ArchiveOperation Operation,
   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
       MemoryBuffer::getFile(ArchiveName, -1, false);
   std::error_code EC = Buf.getError();
-  if (EC && EC != errc::no_such_file_or_directory) {
-    errs() << ToolName << ": error opening '" << ArchiveName
-           << "': " << EC.message() << "!\n";
-    return 1;
-  }
+  if (EC && EC != errc::no_such_file_or_directory)
+    fail("error opening '" + ArchiveName + "': " + EC.message() + "!");
 
   if (!EC) {
     object::Archive Archive(Buf.get()->getMemBufferRef(), EC);
-
-    if (EC) {
-      errs() << ToolName << ": error loading '" << ArchiveName
-             << "': " << EC.message() << "!\n";
-      return 1;
-    }
+    failIfError(EC,
+                "error loading '" + ArchiveName + "': " + EC.message() + "!");
     performOperation(Operation, &Archive, NewMembers);
     return 0;
   }
@@ -713,7 +715,9 @@ static void runMRIScript() {
       failIfError(LibOrErr.getError(), "Could not parse library");
       Archives.push_back(std::move(*LibOrErr));
       object::Archive &Lib = *Archives.back();
-      for (auto &Member : Lib.children()) {
+      for (auto &MemberOrErr : Lib.children()) {
+        failIfError(MemberOrErr.getError());
+        auto &Member = MemberOrErr.get();
         ErrorOr<StringRef> NameOrErr = Member.getName();
         failIfError(NameOrErr.getError());
         addMember(NewMembers, Member, *NameOrErr);
@@ -721,7 +725,7 @@ static void runMRIScript() {
       break;
     }
     case MRICommand::AddMod:
-      addMember(NewMembers, Rest, sys::path::filename(Rest));
+      addMember(NewMembers, Rest);
       break;
     case MRICommand::Create:
       Create = true;
@@ -784,9 +788,9 @@ int main(int argc, char **argv) {
     "  This program archives bitcode files into single libraries\n"
   );
 
-  if (Stem.find("ar") != StringRef::npos)
-    return ar_main();
   if (Stem.find("ranlib") != StringRef::npos)
     return ranlib_main();
+  if (Stem.find("ar") != StringRef::npos)
+    return ar_main();
   fail("Not ranlib, ar or lib!");
 }
diff --git a/tools/llvm-as-fuzzer/CMakeLists.txt b/tools/llvm-as-fuzzer/CMakeLists.txt
new file mode 100644
index 000000000000..ff9bdaaf4c43
--- /dev/null
+++ b/tools/llvm-as-fuzzer/CMakeLists.txt
@@ -0,0 +1,13 @@
+if( LLVM_USE_SANITIZE_COVERAGE )
+  set(LLVM_LINK_COMPONENTS
+      AsmParser
+      BitWriter
+      Core
+      Support
+      )
+  add_llvm_tool(llvm-as-fuzzer 
+                llvm-as-fuzzer.cpp)
+  target_link_libraries(llvm-as-fuzzer
+                        LLVMFuzzer
+                        )
+endif()
diff --git a/tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp b/tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp
new file mode 100644
index 000000000000..b4024bcaa99a
--- /dev/null
+++ b/tools/llvm-as-fuzzer/llvm-as-fuzzer.cpp
@@ -0,0 +1,76 @@
+//===--- fuzz-llvm-as.cpp - Fuzzer for llvm-as using lib/Fuzzer -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Build tool to fuzz the LLVM assembler (llvm-as) using
+// lib/Fuzzer. The main reason for using this tool is that it is much
+// faster than using afl-fuzz, since it is run in-process.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <csetjmp>
+
+using namespace llvm;
+
+static jmp_buf JmpBuf;
+
+namespace {
+
+void MyFatalErrorHandler(void *user_data, const std::string& reason,
+                         bool gen_crash_diag) {
+  // Don't bother printing reason, just return to the test function,
+  // since a fatal error represents a successful parse (i.e. it correctly
+  // terminated with an error message to the user).
+  longjmp(JmpBuf, 1);
+}
+
+static bool InstalledHandler = false;
+
+} // end of anonymous namespace
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+
+  // Allocate space for locals before setjmp so that memory can be collected
+  // if parse exits prematurely (via longjmp).
+  StringRef Input((const char *)Data, Size);
+  // Note: We need to create a buffer to add a null terminator to the
+  // end of the input string. The parser assumes that the string
+  // parsed is always null terminated.
+  std::unique_ptr<MemoryBuffer> MemBuf = MemoryBuffer::getMemBufferCopy(Input);
+  SMDiagnostic Err;
+  LLVMContext &Context = getGlobalContext();
+  std::unique_ptr<Module> M;
+
+  if (setjmp(JmpBuf))
+    // If reached, we have returned with non-zero status, so exit.
+    return 0;
+
+  // TODO(kschimpf) Write a main to do this initialization.
+  if (!InstalledHandler) {
+    llvm::install_fatal_error_handler(::MyFatalErrorHandler, nullptr);
+    InstalledHandler = true;
+  }
+
+  M = parseAssembly(MemBuf->getMemBufferRef(), Err, Context);
+
+  if (!M.get())
+    return 0;
+
+  verifyModule(*M.get());
+  return 0;
+}
diff --git a/tools/llvm-as/llvm-as.cpp b/tools/llvm-as/llvm-as.cpp
index 4455d24fb60d..d4e4d8d71070 100644
--- a/tools/llvm-as/llvm-as.cpp
+++ b/tools/llvm-as/llvm-as.cpp
@@ -44,6 +44,10 @@ Force("f", cl::desc("Enable binary output on terminals"));
 static cl::opt<bool>
 DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false));
 
+static cl::opt<bool>
+EmitFunctionSummary("function-summary", cl::desc("Emit function summary index"),
+                    cl::init(false));
+
 static cl::opt<bool>
 DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
 
@@ -77,7 +81,8 @@ static void WriteOutputFile(const Module *M) {
   }
 
   if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
-    WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder);
+    WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder,
+                       EmitFunctionSummary);
 
   // Declare success.
   Out->keep();
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 7672951f9c9b..fe68689def60 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -110,10 +110,16 @@ static const char *GetBlockName(unsigned BlockID,
   case bitc::TYPE_BLOCK_ID_NEW:        return "TYPE_BLOCK_ID";
   case bitc::CONSTANTS_BLOCK_ID:       return "CONSTANTS_BLOCK";
   case bitc::FUNCTION_BLOCK_ID:        return "FUNCTION_BLOCK";
+  case bitc::IDENTIFICATION_BLOCK_ID:
+    return "IDENTIFICATION_BLOCK_ID";
   case bitc::VALUE_SYMTAB_BLOCK_ID:    return "VALUE_SYMTAB";
   case bitc::METADATA_BLOCK_ID:        return "METADATA_BLOCK";
+  case bitc::METADATA_KIND_BLOCK_ID:   return "METADATA_KIND_BLOCK";
   case bitc::METADATA_ATTACHMENT_ID:   return "METADATA_ATTACHMENT_BLOCK";
   case bitc::USELIST_BLOCK_ID:         return "USELIST_BLOCK_ID";
+  case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+                                       return "FUNCTION_SUMMARY_BLOCK";
+  case bitc::MODULE_STRTAB_BLOCK_ID:   return "MODULE_STRTAB_BLOCK";
   }
 }
 
@@ -165,6 +171,15 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(MODULE_CODE, ALIAS)
       STRINGIFY_CODE(MODULE_CODE, PURGEVALS)
       STRINGIFY_CODE(MODULE_CODE, GCNAME)
+      STRINGIFY_CODE(MODULE_CODE, VSTOFFSET)
+      STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES)
+    }
+  case bitc::IDENTIFICATION_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return nullptr;
+      STRINGIFY_CODE(IDENTIFICATION_CODE, STRING)
+      STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH)
     }
   case bitc::PARAMATTR_BLOCK_ID:
     switch (CodeID) {
@@ -241,6 +256,9 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FUNC_CODE, INST_SWITCH)
       STRINGIFY_CODE(FUNC_CODE, INST_INVOKE)
       STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE)
+      STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET)
+      STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET)
+      STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD)
       STRINGIFY_CODE(FUNC_CODE, INST_PHI)
       STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA)
       STRINGIFY_CODE(FUNC_CODE, INST_LOAD)
@@ -260,6 +278,21 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default: return nullptr;
     STRINGIFY_CODE(VST_CODE, ENTRY)
     STRINGIFY_CODE(VST_CODE, BBENTRY)
+    STRINGIFY_CODE(VST_CODE, FNENTRY)
+    STRINGIFY_CODE(VST_CODE, COMBINED_FNENTRY)
+    }
+  case bitc::MODULE_STRTAB_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return nullptr;
+      STRINGIFY_CODE(MST_CODE, ENTRY)
+    }
+  case bitc::FUNCTION_SUMMARY_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return nullptr;
+      STRINGIFY_CODE(FS_CODE, PERMODULE_ENTRY)
+      STRINGIFY_CODE(FS_CODE, COMBINED_ENTRY)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {
@@ -271,7 +304,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default:return nullptr;
       STRINGIFY_CODE(METADATA, STRING)
       STRINGIFY_CODE(METADATA, NAME)
-      STRINGIFY_CODE(METADATA, KIND)
+      STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK
       STRINGIFY_CODE(METADATA, NODE)
       STRINGIFY_CODE(METADATA, VALUE)
       STRINGIFY_CODE(METADATA, OLD_NODE)
@@ -301,6 +334,12 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
       STRINGIFY_CODE(METADATA, MODULE)
     }
+  case bitc::METADATA_KIND_BLOCK_ID:
+    switch (CodeID) {
+    default:
+      return nullptr;
+      STRINGIFY_CODE(METADATA, KIND)
+    }
   case bitc::USELIST_BLOCK_ID:
     switch(CodeID) {
     default:return nullptr;
@@ -476,14 +515,38 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
           GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
                       CurStreamType))
         outs() << " codeid=" << Code;
-      if (Entry.ID != bitc::UNABBREV_RECORD)
+      const BitCodeAbbrev *Abbv = nullptr;
+      if (Entry.ID != bitc::UNABBREV_RECORD) {
+        Abbv = Stream.getAbbrev(Entry.ID);
         outs() << " abbrevid=" << Entry.ID;
+      }
 
       for (unsigned i = 0, e = Record.size(); i != e; ++i)
         outs() << " op" << i << "=" << (int64_t)Record[i];
 
       outs() << "/>";
 
+      if (Abbv) {
+        for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+          const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+          if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array)
+            continue;
+          assert(i + 2 == e && "Array op not second to last");
+          std::string Str;
+          bool ArrayIsPrintable = true;
+          for (unsigned j = i - 1, je = Record.size(); j != je; ++j) {
+            if (!isprint(static_cast<unsigned char>(Record[j]))) {
+              ArrayIsPrintable = false;
+              break;
+            }
+            Str += (char)Record[j];
+          }
+          if (ArrayIsPrintable)
+            outs() << " record string = '" << Str << "'";
+          break;
+        }
+      }
+
       if (Blob.data()) {
         outs() << " blob data = ";
         if (ShowBinaryBlobs) {
diff --git a/tools/llvm-c-test/llvm-c-test.h b/tools/llvm-c-test/llvm-c-test.h
index 1b4976ae1423..7929fc4d19be 100644
--- a/tools/llvm-c-test/llvm-c-test.h
+++ b/tools/llvm-c-test/llvm-c-test.h
@@ -13,11 +13,13 @@
 #ifndef LLVM_C_TEST_H
 #define LLVM_C_TEST_H
 
+#include <stdbool.h>
+
 // helpers.c
 void tokenize_stdin(void (*cb)(char **tokens, int ntokens));
 
 // module.c
-int module_dump(void);
+int module_dump(bool Lazy, bool New);
 int module_list_functions(void);
 int module_list_globals(void);
 
diff --git a/tools/llvm-c-test/main.c b/tools/llvm-c-test/main.c
index 59cc749fb155..e6b6e17098b3 100644
--- a/tools/llvm-c-test/main.c
+++ b/tools/llvm-c-test/main.c
@@ -23,6 +23,14 @@ static void print_usage(void) {
   fprintf(stderr, " Commands:\n");
   fprintf(stderr, "  * --module-dump\n");
   fprintf(stderr, "    Read bytecode from stdin - print disassembly\n\n");
+  fprintf(stderr, "  * --lazy-module-dump\n");
+  fprintf(stderr,
+          "    Lazily read bytecode from stdin - print disassembly\n\n");
+  fprintf(stderr, "  * --new-module-dump\n");
+  fprintf(stderr, "    Read bytecode from stdin - print disassembly\n\n");
+  fprintf(stderr, "  * --lazy-new-module-dump\n");
+  fprintf(stderr,
+          "    Lazily read bytecode from stdin - print disassembly\n\n");
   fprintf(stderr, "  * --module-list-functions\n");
   fprintf(stderr,
           "    Read bytecode from stdin - list summary of functions\n\n");
@@ -49,8 +57,14 @@ int main(int argc, char **argv) {
 
   LLVMInitializeCore(pr);
 
-  if (argc == 2 && !strcmp(argv[1], "--module-dump")) {
-    return module_dump();
+  if (argc == 2 && !strcmp(argv[1], "--lazy-new-module-dump")) {
+    return module_dump(true, true);
+  } else if (argc == 2 && !strcmp(argv[1], "--new-module-dump")) {
+    return module_dump(false, true);
+  } else if (argc == 2 && !strcmp(argv[1], "--lazy-module-dump")) {
+    return module_dump(true, false);
+  } else if (argc == 2 && !strcmp(argv[1], "--module-dump")) {
+    return module_dump(false, false);
   } else if (argc == 2 && !strcmp(argv[1], "--module-list-functions")) {
     return module_list_functions();
   } else if (argc == 2 && !strcmp(argv[1], "--module-list-globals")) {
diff --git a/tools/llvm-c-test/module.c b/tools/llvm-c-test/module.c
index 2661fc81d881..a6c47bf5fa16 100644
--- a/tools/llvm-c-test/module.c
+++ b/tools/llvm-c-test/module.c
@@ -19,7 +19,14 @@
 #include <stdlib.h>
 #include <string.h>
 
-static LLVMModuleRef load_module(void) {
+static void diagnosticHandler(LLVMDiagnosticInfoRef DI, void *C) {
+  char *CErr = LLVMGetDiagInfoDescription(DI);
+  fprintf(stderr, "Error with new bitcode parser: %s\n", CErr);
+  LLVMDisposeMessage(CErr);
+  exit(1);
+}
+
+static LLVMModuleRef load_module(bool Lazy, bool New) {
   LLVMMemoryBufferRef MB;
   LLVMModuleRef M;
   char *msg = NULL;
@@ -29,18 +36,35 @@ static LLVMModuleRef load_module(void) {
     exit(1);
   }
 
-  if (LLVMParseBitcode(MB, &M, &msg)) {
+  LLVMBool Ret;
+  if (New) {
+    LLVMContextRef C = LLVMGetGlobalContext();
+    LLVMContextSetDiagnosticHandler(C, diagnosticHandler, NULL);
+    if (Lazy)
+      Ret = LLVMGetBitcodeModule2(MB, &M);
+    else
+      Ret = LLVMParseBitcode2(MB, &M);
+  } else {
+    if (Lazy)
+      Ret = LLVMGetBitcodeModule(MB, &M, &msg);
+    else
+      Ret = LLVMParseBitcode(MB, &M, &msg);
+  }
+
+  if (Ret) {
     fprintf(stderr, "Error parsing bitcode: %s\n", msg);
     LLVMDisposeMemoryBuffer(MB);
     exit(1);
   }
 
-  LLVMDisposeMemoryBuffer(MB);
+  if (!Lazy)
+    LLVMDisposeMemoryBuffer(MB);
+
   return M;
 }
 
-int module_dump(void) {
-  LLVMModuleRef M = load_module();
+int module_dump(bool Lazy, bool New) {
+  LLVMModuleRef M = load_module(Lazy, New);
 
   char *irstr = LLVMPrintModuleToString(M);
   puts(irstr);
@@ -52,7 +76,7 @@ int module_dump(void) {
 }
 
 int module_list_functions(void) {
-  LLVMModuleRef M = load_module();
+  LLVMModuleRef M = load_module(false, false);
   LLVMValueRef f;
 
   f = LLVMGetFirstFunction(M);
@@ -93,7 +117,7 @@ int module_list_functions(void) {
 }
 
 int module_list_globals(void) {
-  LLVMModuleRef M = load_module();
+  LLVMModuleRef M = load_module(false, false);
   LLVMValueRef g;
 
   g = LLVMGetFirstGlobal(M);
diff --git a/tools/llvm-c-test/object.c b/tools/llvm-c-test/object.c
index a5421d9066d5..43521787b609 100644
--- a/tools/llvm-c-test/object.c
+++ b/tools/llvm-c-test/object.c
@@ -13,6 +13,7 @@
 \*===----------------------------------------------------------------------===*/
 
 #include "llvm-c-test.h"
+#include "llvm-c/Core.h"
 #include "llvm-c/Object.h"
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/tools/llvm-config/BuildVariables.inc.in b/tools/llvm-config/BuildVariables.inc.in
index 3f51f491a7a4..345f47e91b4a 100644
--- a/tools/llvm-config/BuildVariables.inc.in
+++ b/tools/llvm-config/BuildVariables.inc.in
@@ -26,3 +26,8 @@
 #define LLVM_LIBDIR_SUFFIX "@LLVM_LIBDIR_SUFFIX@"
 #define LLVM_TARGETS_BUILT "@LLVM_TARGETS_BUILT@"
 #define LLVM_SYSTEM_LIBS "@LLVM_SYSTEM_LIBS@"
+#define LLVM_BUILD_SYSTEM "@LLVM_BUILD_SYSTEM@"
+#define LLVM_HAS_RTTI "@LLVM_HAS_RTTI@"
+#define LLVM_ENABLE_DYLIB "@LLVM_BUILD_LLVM_DYLIB@"
+#define LLVM_ENABLE_SHARED "@LLVM_ENABLE_SHARED@"
+#define LLVM_DYLIB_COMPONENTS "@LLVM_DYLIB_COMPONENTS@"
diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
index edbd8c950d7b..83794bb3fddd 100644
--- a/tools/llvm-config/CMakeLists.txt
+++ b/tools/llvm-config/CMakeLists.txt
@@ -24,10 +24,18 @@ set(LLVM_OBJ_ROOT ${LLVM_BINARY_DIR})
 set(LLVM_CPPFLAGS "${CMAKE_CPP_FLAGS} ${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(LLVM_CFLAGS "${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
 set(LLVM_CXXFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${COMPILE_FLAGS} ${LLVM_DEFINITIONS}")
+set(LLVM_BUILD_SYSTEM cmake)
+set(LLVM_HAS_RTTI ${LLVM_CONFIG_HAS_RTTI})
+
 # Use the C++ link flags, since they should be a superset of C link flags.
 set(LLVM_LDFLAGS "${CMAKE_CXX_LINK_FLAGS}")
 set(LLVM_BUILDMODE ${CMAKE_BUILD_TYPE})
 set(LLVM_SYSTEM_LIBS ${SYSTEM_LIBS})
+if(BUILD_SHARED_LIBS)
+  set(LLVM_ENABLE_SHARED ON)
+else()
+  set(LLVM_ENABLE_SHARED OFF)
+endif()
 string(REPLACE ";" " " LLVM_TARGETS_BUILT "${LLVM_TARGETS_TO_BUILD}")
 configure_file(${BUILDVARIABLES_SRCPATH} ${BUILDVARIABLES_OBJPATH} @ONLY)
 
diff --git a/tools/llvm-config/Makefile b/tools/llvm-config/Makefile
index 1ff8b6f04063..d2fe2cfa34aa 100644
--- a/tools/llvm-config/Makefile
+++ b/tools/llvm-config/Makefile
@@ -38,6 +38,12 @@ else
   LLVM_SYSTEM_LIBS := $(LIBS)
 endif
 
+ifneq ($(REQUIRES_RTTI), 1)
+  LLVM_HAS_RTTI := NO
+else
+  LLVM_HAS_RTTI := YES
+endif
+
 # This is blank for now.  We need to be careful about adding stuff here:
 # LDFLAGS tend not to be portable, and we don't currently require the
 # user to use libtool when linking against LLVM.
@@ -65,6 +71,18 @@ $(ObjDir)/BuildVariables.inc: $(BUILDVARIABLES_SRCPATH) Makefile $(ObjDir)/.dir
 	  >> temp.sed
 	$(Verb) $(ECHO) 's/@LLVM_TARGETS_BUILT@/$(subst /,\/,$(TARGETS_TO_BUILD))/' \
 	  >> temp.sed
+	$(if $(filter-out $(ENABLE_SHARED),0),\
+	  $(Verb) $(ECHO) 's/@LLVM_BUILD_LLVM_DYLIB@/ON/',\
+	  $(Verb) $(ECHO) 's/@LLVM_BUILD_LLVM_DYLIB@/OFF/') \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_ENABLE_SHARED@/OFF/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_DYLIB_COMPONENTS@/all/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_BUILD_SYSTEM@/autoconf/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_HAS_RTTI@/$(LLVM_HAS_RTTI)/' \
+	  >> temp.sed
 	$(Verb) $(SED) -f temp.sed < $< > $@
 	$(Verb) $(RM) temp.sed
 
diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp
index 879b9ab0945b..80f627936d0c 100644
--- a/tools/llvm-config/llvm-config.cpp
+++ b/tools/llvm-config/llvm-config.cpp
@@ -30,6 +30,7 @@
 #include <cstdlib>
 #include <set>
 #include <vector>
+#include <unordered_set>
 
 using namespace llvm;
 
@@ -51,12 +52,16 @@ using namespace llvm;
 /// \param Name - The component to traverse.
 /// \param ComponentMap - A prebuilt map of component names to descriptors.
 /// \param VisitedComponents [in] [out] - The set of already visited components.
-/// \param RequiredLibs [out] - The ordered list of required libraries.
-static void VisitComponent(StringRef Name,
+/// \param RequiredLibs [out] - The ordered list of required
+/// libraries.
+/// \param GetComponentNames - Get the component names instead of the
+/// library name.
+static void VisitComponent(const std::string& Name,
                            const StringMap<AvailableComponent*> &ComponentMap,
                            std::set<AvailableComponent*> &VisitedComponents,
-                           std::vector<StringRef> &RequiredLibs,
-                           bool IncludeNonInstalled) {
+                           std::vector<std::string> &RequiredLibs,
+                           bool IncludeNonInstalled, bool GetComponentNames,
+                           const std::string *ActiveLibDir, bool *HasMissing) {
   // Lookup the component.
   AvailableComponent *AC = ComponentMap.lookup(Name);
   assert(AC && "Invalid component name!");
@@ -74,12 +79,22 @@ static void VisitComponent(StringRef Name,
   // Otherwise, visit all the dependencies.
   for (unsigned i = 0; AC->RequiredLibraries[i]; ++i) {
     VisitComponent(AC->RequiredLibraries[i], ComponentMap, VisitedComponents,
-                   RequiredLibs, IncludeNonInstalled);
+                   RequiredLibs, IncludeNonInstalled, GetComponentNames,
+                   ActiveLibDir, HasMissing);
+  }
+
+  if (GetComponentNames) {
+    RequiredLibs.push_back(Name);
+    return;
   }
 
   // Add to the required library list.
-  if (AC->Library)
+  if (AC->Library) {
+    if (!IncludeNonInstalled && HasMissing && !*HasMissing && ActiveLibDir) {
+      *HasMissing = !sys::fs::exists(*ActiveLibDir + "/" + AC->Library);
+    }
     RequiredLibs.push_back(AC->Library);
+  }
 }
 
 /// \brief Compute the list of required libraries for a given list of
@@ -87,14 +102,15 @@ static void VisitComponent(StringRef Name,
 /// appear prior to their dependencies).
 ///
 /// \param Components - The names of the components to find libraries for.
-/// \param RequiredLibs [out] - On return, the ordered list of libraries that
-/// are required to link the given components.
 /// \param IncludeNonInstalled - Whether non-installed components should be
 /// reported.
-static void ComputeLibsForComponents(const std::vector<StringRef> &Components,
-                                     std::vector<StringRef> &RequiredLibs,
-                                     bool IncludeNonInstalled) {
-  std::set<AvailableComponent*> VisitedComponents;
+/// \param GetComponentNames - True if one would prefer the component names.
+static std::vector<std::string>
+ComputeLibsForComponents(const std::vector<StringRef> &Components,
+                         bool IncludeNonInstalled, bool GetComponentNames,
+                         const std::string *ActiveLibDir, bool *HasMissing) {
+  std::vector<std::string> RequiredLibs;
+  std::set<AvailableComponent *> VisitedComponents;
 
   // Build a map of component names to information.
   StringMap<AvailableComponent*> ComponentMap;
@@ -116,12 +132,15 @@ static void ComputeLibsForComponents(const std::vector<StringRef> &Components,
     }
 
     VisitComponent(ComponentLower, ComponentMap, VisitedComponents,
-                   RequiredLibs, IncludeNonInstalled);
+                   RequiredLibs, IncludeNonInstalled, GetComponentNames,
+                   ActiveLibDir, HasMissing);
   }
 
   // The list is now ordered with leafs first, we want the libraries to printed
   // in the reverse order of dependency.
   std::reverse(RequiredLibs.begin(), RequiredLibs.end());
+
+  return RequiredLibs;
 }
 
 /* *** */
@@ -157,6 +176,9 @@ Options:\n\
   --host-target     Target triple used to configure LLVM.\n\
   --build-mode      Print build mode of LLVM tree (e.g. Debug or Release).\n\
   --assertion-mode  Print assertion mode of LLVM tree (ON or OFF).\n\
+  --build-system    Print the build system used to build LLVM (autoconf or cmake).\n\
+  --has-rtti        Print whether or not LLVM was built with rtti (YES or NO).\n\
+  --shared-mode     Print how the provided components can be collectively linked (`shared` or `static`).\n\
 Typical components:\n\
   all               All LLVM libraries (default).\n\
   engine            Either a native JIT or a bitcode interpreter.\n";
@@ -171,10 +193,34 @@ std::string GetExecutablePath(const char *Argv0) {
   return llvm::sys::fs::getMainExecutable(Argv0, P);
 }
 
+/// \brief Expand the semi-colon delimited LLVM_DYLIB_COMPONENTS into
+/// the full list of components.
+std::vector<std::string> GetAllDyLibComponents(const bool IsInDevelopmentTree,
+                                               const bool GetComponentNames) {
+  std::vector<StringRef> DyLibComponents;
+
+  StringRef DyLibComponentsStr(LLVM_DYLIB_COMPONENTS);
+  size_t Offset = 0;
+  while (true) {
+    const size_t NextOffset = DyLibComponentsStr.find(';', Offset);
+    DyLibComponents.push_back(DyLibComponentsStr.substr(Offset, NextOffset));
+    if (NextOffset == std::string::npos) {
+      break;
+    }
+    Offset = NextOffset + 1;
+  }
+
+  assert(!DyLibComponents.empty());
+
+  return ComputeLibsForComponents(DyLibComponents,
+                                  /*IncludeNonInstalled=*/IsInDevelopmentTree,
+                                  GetComponentNames, nullptr, nullptr);
+}
+
 int main(int argc, char **argv) {
   std::vector<StringRef> Components;
   bool PrintLibs = false, PrintLibNames = false, PrintLibFiles = false;
-  bool PrintSystemLibs = false;
+  bool PrintSystemLibs = false, PrintSharedMode = false;
   bool HasAnyOption = false;
 
   // llvm-config is designed to support being run both from a development tree
@@ -269,6 +315,108 @@ int main(int argc, char **argv) {
     ActiveIncludeOption = "-I" + ActiveIncludeDir;
   }
 
+  /// We only use `shared library` mode in cases where the static library form
+  /// of the components provided are not available; note however that this is
+  /// skipped if we're run from within the build dir. However, once installed,
+  /// we still need to provide correct output when the static archives are
+  /// removed or, as in the case of CMake's `BUILD_SHARED_LIBS`, never present
+  /// in the first place. This can't be done at configure/build time.
+
+  StringRef SharedExt, SharedVersionedExt, SharedDir, SharedPrefix, StaticExt,
+    StaticPrefix, StaticDir = "lib";
+  const Triple HostTriple(Triple::normalize(LLVM_DEFAULT_TARGET_TRIPLE));
+  if (HostTriple.isOSWindows()) {
+    SharedExt = "dll";
+    SharedVersionedExt = PACKAGE_VERSION ".dll";
+    StaticExt = "a";
+    SharedDir = ActiveBinDir;
+    StaticDir = ActiveLibDir;
+    StaticPrefix = SharedPrefix = "lib";
+  } else if (HostTriple.isOSDarwin()) {
+    SharedExt = "dylib";
+    SharedVersionedExt = PACKAGE_VERSION ".dylib";
+    StaticExt = "a";
+    StaticDir = SharedDir = ActiveLibDir;
+    StaticPrefix = SharedPrefix = "lib";
+  } else {
+    // default to the unix values:
+    SharedExt = "so";
+    SharedVersionedExt = PACKAGE_VERSION ".so";
+    StaticExt = "a";
+    StaticDir = SharedDir = ActiveLibDir;
+    StaticPrefix = SharedPrefix = "lib";
+  }
+
+  const bool BuiltDyLib = (std::strcmp(LLVM_ENABLE_DYLIB, "ON") == 0);
+
+  enum { CMake, AutoConf } ConfigTool;
+  if (std::strcmp(LLVM_BUILD_SYSTEM, "cmake") == 0) {
+    ConfigTool = CMake;
+  } else {
+    ConfigTool = AutoConf;
+  }
+
+  /// CMake style shared libs, ie each component is in a shared library.
+  const bool BuiltSharedLibs =
+      (ConfigTool == CMake && std::strcmp(LLVM_ENABLE_SHARED, "ON") == 0);
+
+  bool DyLibExists = false;
+  const std::string DyLibName =
+    (SharedPrefix + "LLVM-" + SharedVersionedExt).str();
+
+  if (BuiltDyLib) {
+    DyLibExists = sys::fs::exists(SharedDir + "/" + DyLibName);
+  }
+
+  /// Get the component's library name without the lib prefix and the
+  /// extension. Returns true if Lib is in a recognized format.
+  auto GetComponentLibraryNameSlice = [&](const StringRef &Lib,
+                                          StringRef &Out) {
+    if (Lib.startswith("lib")) {
+      unsigned FromEnd;
+      if (Lib.endswith(StaticExt)) {
+        FromEnd = StaticExt.size() + 1;
+      } else if (Lib.endswith(SharedExt)) {
+        FromEnd = SharedExt.size() + 1;
+      } else {
+        FromEnd = 0;
+      }
+
+      if (FromEnd != 0) {
+        Out = Lib.slice(3, Lib.size() - FromEnd);
+        return true;
+      }
+    }
+
+    return false;
+  };
+  /// Maps Unixizms to the host platform.
+  auto GetComponentLibraryFileName = [&](const StringRef &Lib,
+                                         const bool ForceShared) {
+    std::string LibFileName = Lib;
+    StringRef LibName;
+    if (GetComponentLibraryNameSlice(Lib, LibName)) {
+      if (BuiltSharedLibs || ForceShared) {
+        LibFileName = (SharedPrefix + LibName + "." + SharedExt).str();
+      } else {
+        // default to static
+        LibFileName = (StaticPrefix + LibName + "." + StaticExt).str();
+      }
+    }
+
+    return LibFileName;
+  };
+  /// Get the full path for a possibly shared component library.
+  auto GetComponentLibraryPath = [&](const StringRef &Name,
+                                     const bool ForceShared) {
+    auto LibFileName = GetComponentLibraryFileName(Name, ForceShared);
+    if (BuiltSharedLibs || ForceShared) {
+      return (SharedDir + "/" + LibFileName).str();
+    } else {
+      return (StaticDir + "/" + LibFileName).str();
+    }
+  };
+
   raw_ostream &OS = outs();
   for (int i = 1; i != argc; ++i) {
     StringRef Arg = argv[i];
@@ -302,13 +450,33 @@ int main(int argc, char **argv) {
       } else if (Arg == "--libfiles") {
         PrintLibFiles = true;
       } else if (Arg == "--components") {
+        /// If there are missing static archives and a dylib was
+        /// built, print LLVM_DYLIB_COMPONENTS instead of everything
+        /// in the manifest.
+        std::vector<std::string> Components;
         for (unsigned j = 0; j != array_lengthof(AvailableComponents); ++j) {
           // Only include non-installed components when in a development tree.
           if (!AvailableComponents[j].IsInstalled && !IsInDevelopmentTree)
             continue;
 
-          OS << ' ';
-          OS << AvailableComponents[j].Name;
+          Components.push_back(AvailableComponents[j].Name);
+          if (AvailableComponents[j].Library && !IsInDevelopmentTree) {
+            if (DyLibExists &&
+                !sys::fs::exists(GetComponentLibraryPath(
+                    AvailableComponents[j].Library, false))) {
+              Components = GetAllDyLibComponents(IsInDevelopmentTree, true);
+              std::sort(Components.begin(), Components.end());
+              break;
+            }
+          }
+        }
+
+        for (unsigned I = 0; I < Components.size(); ++I) {
+          if (I) {
+            OS << ' ';
+          }
+
+          OS << Components[I];
         }
         OS << '\n';
       } else if (Arg == "--targets-built") {
@@ -323,6 +491,12 @@ int main(int argc, char **argv) {
 #else
         OS << "ON\n";
 #endif
+      } else if (Arg == "--build-system") {
+        OS << LLVM_BUILD_SYSTEM << '\n';
+      } else if (Arg == "--has-rtti") {
+        OS << LLVM_HAS_RTTI << '\n';
+      } else if (Arg == "--shared-mode") {
+        PrintSharedMode = true;
       } else if (Arg == "--obj-root") {
         OS << ActivePrefix << '\n';
       } else if (Arg == "--src-root") {
@@ -338,35 +512,84 @@ int main(int argc, char **argv) {
   if (!HasAnyOption)
     usage();
 
-  if (PrintLibs || PrintLibNames || PrintLibFiles || PrintSystemLibs) {
+  if (PrintLibs || PrintLibNames || PrintLibFiles || PrintSystemLibs ||
+      PrintSharedMode) {
+
+    if (PrintSharedMode && BuiltSharedLibs) {
+      OS << "shared\n";
+      return 0;
+    }
+
     // If no components were specified, default to "all".
     if (Components.empty())
       Components.push_back("all");
 
     // Construct the list of all the required libraries.
-    std::vector<StringRef> RequiredLibs;
-    ComputeLibsForComponents(Components, RequiredLibs,
-                             /*IncludeNonInstalled=*/IsInDevelopmentTree);
+    bool HasMissing = false;
+    std::vector<std::string> RequiredLibs =
+        ComputeLibsForComponents(Components,
+                                 /*IncludeNonInstalled=*/IsInDevelopmentTree,
+                                 false, &ActiveLibDir, &HasMissing);
+
+    if (PrintSharedMode) {
+      std::unordered_set<std::string> FullDyLibComponents;
+      std::vector<std::string> DyLibComponents =
+          GetAllDyLibComponents(IsInDevelopmentTree, false);
+
+      for (auto &Component : DyLibComponents) {
+        FullDyLibComponents.insert(Component);
+      }
+      DyLibComponents.clear();
+
+      for (auto &Lib : RequiredLibs) {
+        if (!FullDyLibComponents.count(Lib)) {
+          OS << "static\n";
+          return 0;
+        }
+      }
+      FullDyLibComponents.clear();
+
+      if (HasMissing && DyLibExists) {
+        OS << "shared\n";
+        return 0;
+      } else {
+        OS << "static\n";
+        return 0;
+      }
+    }
 
     if (PrintLibs || PrintLibNames || PrintLibFiles) {
-      for (unsigned i = 0, e = RequiredLibs.size(); i != e; ++i) {
-        StringRef Lib = RequiredLibs[i];
-        if (i)
-          OS << ' ';
 
+      auto PrintForLib = [&](const StringRef &Lib, const bool ForceShared) {
         if (PrintLibNames) {
-          OS << Lib;
+          OS << GetComponentLibraryFileName(Lib, ForceShared);
         } else if (PrintLibFiles) {
-          OS << ActiveLibDir << '/' << Lib;
+          OS << GetComponentLibraryPath(Lib, ForceShared);
         } else if (PrintLibs) {
           // If this is a typical library name, include it using -l.
-          if (Lib.startswith("lib") && Lib.endswith(".a")) {
-            OS << "-l" << Lib.slice(3, Lib.size()-2);
-            continue;
+          StringRef LibName;
+          if (Lib.startswith("lib")) {
+            if (GetComponentLibraryNameSlice(Lib, LibName)) {
+              OS << "-l" << LibName;
+            } else {
+              OS << "-l:" << GetComponentLibraryFileName(Lib, ForceShared);
+            }
+          } else {
+            // Otherwise, print the full path.
+            OS << GetComponentLibraryPath(Lib, ForceShared);
           }
+        }
+      };
 
-          // Otherwise, print the full path.
-          OS << ActiveLibDir << '/' << Lib;
+      if (HasMissing && DyLibExists) {
+        PrintForLib(DyLibName, true);
+      } else {
+        for (unsigned i = 0, e = RequiredLibs.size(); i != e; ++i) {
+          auto Lib = RequiredLibs[i];
+          if (i)
+            OS << ' ';
+
+          PrintForLib(Lib, false);
         }
       }
       OS << '\n';
diff --git a/tools/llvm-cov/CoverageReport.cpp b/tools/llvm-cov/CoverageReport.cpp
index 497c2f88f268..ed01a2e154f1 100644
--- a/tools/llvm-cov/CoverageReport.cpp
+++ b/tools/llvm-cov/CoverageReport.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
 namespace {
 /// \brief Helper struct which prints trimmed and aligned columns.
 struct Column {
-  enum TrimKind { NoTrim, LeftTrim, RightTrim };
+  enum TrimKind { NoTrim, WidthTrim, LeftTrim, RightTrim };
 
   enum AlignmentKind { LeftAlignment, RightAlignment };
 
@@ -30,7 +30,7 @@ struct Column {
   AlignmentKind Alignment;
 
   Column(StringRef Str, unsigned Width)
-      : Str(Str), Width(Width), Trim(NoTrim), Alignment(LeftAlignment) {}
+      : Str(Str), Width(Width), Trim(WidthTrim), Alignment(LeftAlignment) {}
 
   Column &set(TrimKind Value) {
     Trim = Value;
@@ -44,6 +44,7 @@ struct Column {
 
   void render(raw_ostream &OS) const;
 };
+
 raw_ostream &operator<<(raw_ostream &OS, const Column &Value) {
   Value.render(OS);
   return OS;
@@ -64,6 +65,9 @@ void Column::render(raw_ostream &OS) const {
 
   switch (Trim) {
   case NoTrim:
+    OS << Str;
+    break;
+  case WidthTrim:
     OS << Str.substr(0, Width);
     break;
   case LeftTrim:
@@ -84,8 +88,19 @@ static Column column(StringRef Str, unsigned Width, const T &Value) {
   return Column(Str, Width).set(Value);
 }
 
-static const unsigned FileReportColumns[] = {25, 10, 8, 8, 10, 10};
-static const unsigned FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+static size_t FileReportColumns[] = {25, 10, 8, 8, 10, 10};
+static size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+
+/// \brief  Adjust column widths to fit long file paths and function names.
+static void adjustColumnWidths(coverage::CoverageMapping *CM) {
+  for (StringRef Filename : CM->getUniqueSourceFiles()) {
+    FileReportColumns[0] = std::max(FileReportColumns[0], Filename.size());
+    for (const auto &F : CM->getCoveredFunctions(Filename)) {
+      FunctionReportColumns[0] =
+          std::max(FunctionReportColumns[0], F.Name.size());
+    }
+  }
+}
 
 /// \brief Prints a horizontal divider which spans across the given columns.
 template <typename T, size_t N>
@@ -108,8 +123,9 @@ static raw_ostream::Colors determineCoveragePercentageColor(const T &Info) {
 }
 
 void CoverageReport::render(const FileCoverageSummary &File, raw_ostream &OS) {
-  OS << column(File.Name, FileReportColumns[0], Column::LeftTrim)
-     << format("%*u", FileReportColumns[1], (unsigned)File.RegionCoverage.NumRegions);
+  OS << column(File.Name, FileReportColumns[0], Column::NoTrim)
+     << format("%*u", FileReportColumns[1],
+               (unsigned)File.RegionCoverage.NumRegions);
   Options.colored_ostream(OS, File.RegionCoverage.isFullyCovered()
                                   ? raw_ostream::GREEN
                                   : raw_ostream::RED)
@@ -157,6 +173,7 @@ void CoverageReport::render(const FunctionCoverageSummary &Function,
 
 void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
                                            raw_ostream &OS) {
+  adjustColumnWidths(Coverage.get());
   bool isFirst = true;
   for (StringRef Filename : Files) {
     if (isFirst)
@@ -191,6 +208,7 @@ void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
 }
 
 void CoverageReport::renderFileReports(raw_ostream &OS) {
+  adjustColumnWidths(Coverage.get());
   OS << column("Filename", FileReportColumns[0])
      << column("Regions", FileReportColumns[1], Column::RightAlignment)
      << column("Miss", FileReportColumns[2], Column::RightAlignment)
@@ -200,6 +218,7 @@ void CoverageReport::renderFileReports(raw_ostream &OS) {
      << "\n";
   renderDivider(FileReportColumns, OS);
   OS << "\n";
+
   FileCoverageSummary Totals("TOTAL");
   for (StringRef Filename : Coverage->getUniqueSourceFiles()) {
     FileCoverageSummary Summary(Filename);
diff --git a/tools/llvm-cov/CoverageViewOptions.h b/tools/llvm-cov/CoverageViewOptions.h
index 94b55fe793fc..1208fad79176 100644
--- a/tools/llvm-cov/CoverageViewOptions.h
+++ b/tools/llvm-cov/CoverageViewOptions.h
@@ -24,6 +24,7 @@ struct CoverageViewOptions {
   bool ShowLineStatsOrRegionMarkers;
   bool ShowExpandedRegions;
   bool ShowFunctionInstantiations;
+  bool ShowFullFilenames;
 
   /// \brief Change the output's stream color if the colors are enabled.
   ColoredRawOstream colored_ostream(raw_ostream &OS,
diff --git a/tools/llvm-cov/gcov.cpp b/tools/llvm-cov/gcov.cpp
index 4377a50b5566..a5343fa29afc 100644
--- a/tools/llvm-cov/gcov.cpp
+++ b/tools/llvm-cov/gcov.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
                            const std::string &InputGCNO,
                            const std::string &InputGCDA, bool DumpGCOV,
-                           const GCOVOptions &Options) {
+                           const GCOV::Options &Options) {
   SmallString<128> CoverageFileStem(ObjectDir);
   if (CoverageFileStem.empty()) {
     // If no directory was specified with -o, look next to the source file.
@@ -143,8 +143,8 @@ int gcovMain(int argc, const char *argv[]) {
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
 
-  GCOVOptions Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
-                      PreservePaths, UncondBranch, LongNames, NoOutput);
+  GCOV::Options Options(AllBlocks, BranchProb, BranchCount, FuncSummary,
+                        PreservePaths, UncondBranch, LongNames, NoOutput);
 
   for (const auto &SourceFile : SourceFiles)
     reportCoverage(SourceFile, ObjectDir, InputGCNO, InputGCDA, DumpGCOV,
diff --git a/tools/llvm-cxxdump/llvm-cxxdump.cpp b/tools/llvm-cxxdump/llvm-cxxdump.cpp
index 4e06be9e78b9..3dda69266a2d 100644
--- a/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -40,18 +40,14 @@ cl::list<std::string> InputFilenames(cl::Positional,
                                      cl::ZeroOrMore);
 } // namespace opts
 
-static int ReturnValue = EXIT_SUCCESS;
-
 namespace llvm {
 
-static bool error(std::error_code EC) {
+static void error(std::error_code EC) {
   if (!EC)
-    return false;
-
-  ReturnValue = EXIT_FAILURE;
+    return;
   outs() << "\nError reading file: " << EC.message() << ".\n";
   outs().flush();
-  return true;
+  exit(1);
 }
 
 } // namespace llvm
@@ -59,38 +55,24 @@ static bool error(std::error_code EC) {
 static void reportError(StringRef Input, StringRef Message) {
   if (Input == "-")
     Input = "<stdin>";
-
   errs() << Input << ": " << Message << "\n";
   errs().flush();
-  ReturnValue = EXIT_FAILURE;
+  exit(1);
 }
 
 static void reportError(StringRef Input, std::error_code EC) {
   reportError(Input, EC.message());
 }
 
-static SmallVectorImpl<SectionRef> &getRelocSections(const ObjectFile *Obj,
-                                                     const SectionRef &Sec) {
-  static bool MappingDone = false;
-  static std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
-  if (!MappingDone) {
-    for (const SectionRef &Section : Obj->sections()) {
-      section_iterator Sec2 = Section.getRelocatedSection();
-      if (Sec2 != Obj->section_end())
-        SectionRelocMap[*Sec2].push_back(Section);
-    }
-    MappingDone = true;
-  }
-  return SectionRelocMap[Sec];
-}
+static std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
 
-static bool collectRelocatedSymbols(const ObjectFile *Obj,
+static void collectRelocatedSymbols(const ObjectFile *Obj,
                                     const SectionRef &Sec, uint64_t SecAddress,
                                     uint64_t SymAddress, uint64_t SymSize,
                                     StringRef *I, StringRef *E) {
   uint64_t SymOffset = SymAddress - SecAddress;
   uint64_t SymEnd = SymOffset + SymSize;
-  for (const SectionRef &SR : getRelocSections(Obj, Sec)) {
+  for (const SectionRef &SR : SectionRelocMap[Sec]) {
     for (const object::RelocationRef &Reloc : SR.relocations()) {
       if (I == E)
         break;
@@ -98,8 +80,7 @@ static bool collectRelocatedSymbols(const ObjectFile *Obj,
       if (RelocSymI == Obj->symbol_end())
         continue;
       ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
-      if (error(RelocSymName.getError()))
-        return true;
+      error(RelocSymName.getError());
       uint64_t Offset = Reloc.getOffset();
       if (Offset >= SymOffset && Offset < SymEnd) {
         *I = *RelocSymName;
@@ -107,29 +88,26 @@ static bool collectRelocatedSymbols(const ObjectFile *Obj,
       }
     }
   }
-  return false;
 }
 
-static bool collectRelocationOffsets(
+static void collectRelocationOffsets(
     const ObjectFile *Obj, const SectionRef &Sec, uint64_t SecAddress,
     uint64_t SymAddress, uint64_t SymSize, StringRef SymName,
     std::map<std::pair<StringRef, uint64_t>, StringRef> &Collection) {
   uint64_t SymOffset = SymAddress - SecAddress;
   uint64_t SymEnd = SymOffset + SymSize;
-  for (const SectionRef &SR : getRelocSections(Obj, Sec)) {
+  for (const SectionRef &SR : SectionRelocMap[Sec]) {
     for (const object::RelocationRef &Reloc : SR.relocations()) {
       const object::symbol_iterator RelocSymI = Reloc.getSymbol();
       if (RelocSymI == Obj->symbol_end())
         continue;
       ErrorOr<StringRef> RelocSymName = RelocSymI->getName();
-      if (error(RelocSymName.getError()))
-        return true;
+      error(RelocSymName.getError());
       uint64_t Offset = Reloc.getOffset();
       if (Offset >= SymOffset && Offset < SymEnd)
         Collection[std::make_pair(SymName, Offset - SymOffset)] = *RelocSymName;
     }
   }
-  return false;
 }
 
 static void dumpCXXData(const ObjectFile *Obj) {
@@ -182,6 +160,13 @@ static void dumpCXXData(const ObjectFile *Obj) {
   std::map<std::pair<StringRef, uint64_t>, StringRef> VTTEntries;
   std::map<StringRef, StringRef> TINames;
 
+  SectionRelocMap.clear();
+  for (const SectionRef &Section : Obj->sections()) {
+    section_iterator Sec2 = Section.getRelocatedSection();
+    if (Sec2 != Obj->section_end())
+      SectionRelocMap[*Sec2].push_back(Section);
+  }
+
   uint8_t BytesInAddress = Obj->getBytesInAddress();
 
   std::vector<std::pair<SymbolRef, uint64_t>> SymAddr =
@@ -191,12 +176,11 @@ static void dumpCXXData(const ObjectFile *Obj) {
     object::SymbolRef Sym = P.first;
     uint64_t SymSize = P.second;
     ErrorOr<StringRef> SymNameOrErr = Sym.getName();
-    if (error(SymNameOrErr.getError()))
-      return;
+    error(SymNameOrErr.getError());
     StringRef SymName = *SymNameOrErr;
-    object::section_iterator SecI(Obj->section_begin());
-    if (error(Sym.getSection(SecI)))
-      return;
+    ErrorOr<object::section_iterator> SecIOrErr = Sym.getSection();
+    error(SecIOrErr.getError());
+    object::section_iterator SecI = *SecIOrErr;
     // Skip external symbols.
     if (SecI == Obj->section_end())
       continue;
@@ -205,11 +189,9 @@ static void dumpCXXData(const ObjectFile *Obj) {
     if (Sec.isBSS() || Sec.isVirtual())
       continue;
     StringRef SecContents;
-    if (error(Sec.getContents(SecContents)))
-      return;
+    error(Sec.getContents(SecContents));
     ErrorOr<uint64_t> SymAddressOrErr = Sym.getAddress();
-    if (error(SymAddressOrErr.getError()))
-      return;
+    error(SymAddressOrErr.getError());
     uint64_t SymAddress = *SymAddressOrErr;
     uint64_t SecAddress = Sec.getAddress();
     uint64_t SecSize = Sec.getSize();
@@ -236,23 +218,19 @@ static void dumpCXXData(const ObjectFile *Obj) {
     // Complete object locators in the MS-ABI start with '??_R4'
     else if (SymName.startswith("??_R4")) {
       CompleteObjectLocator COL;
-      COL.Data = ArrayRef<little32_t>(
+      COL.Data = makeArrayRef(
           reinterpret_cast<const little32_t *>(SymContents.data()), 3);
       StringRef *I = std::begin(COL.Symbols), *E = std::end(COL.Symbols);
-      if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
-                                  E))
-        return;
+      collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
       COLs[SymName] = COL;
     }
     // Class hierarchy descriptors in the MS-ABI start with '??_R3'
     else if (SymName.startswith("??_R3")) {
       ClassHierarchyDescriptor CHD;
-      CHD.Data = ArrayRef<little32_t>(
+      CHD.Data = makeArrayRef(
           reinterpret_cast<const little32_t *>(SymContents.data()), 3);
       StringRef *I = std::begin(CHD.Symbols), *E = std::end(CHD.Symbols);
-      if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
-                                  E))
-        return;
+      collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
       CHDs[SymName] = CHD;
     }
     // Class hierarchy descriptors in the MS-ABI start with '??_R2'
@@ -265,12 +243,10 @@ static void dumpCXXData(const ObjectFile *Obj) {
     // Base class descriptors in the MS-ABI start with '??_R1'
     else if (SymName.startswith("??_R1")) {
       BaseClassDescriptor BCD;
-      BCD.Data = ArrayRef<little32_t>(
+      BCD.Data = makeArrayRef(
           reinterpret_cast<const little32_t *>(SymContents.data()) + 1, 5);
       StringRef *I = std::begin(BCD.Symbols), *E = std::end(BCD.Symbols);
-      if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
-                                  E))
-        return;
+      collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
       BCDs[SymName] = BCD;
     }
     // Type descriptors in the MS-ABI start with '??_R0'
@@ -283,9 +259,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
         TD.AlwaysZero = *reinterpret_cast<const little32_t *>(DataPtr);
       TD.MangledName = SymContents.drop_front(BytesInAddress * 2);
       StringRef *I = std::begin(TD.Symbols), *E = std::end(TD.Symbols);
-      if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
-                                  E))
-        return;
+      collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
       TDs[SymName] = TD;
     }
     // Throw descriptors in the MS-ABI start with '_TI'
@@ -316,9 +290,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
       CT.VirtualBaseAdjustmentOffset = DataPtr[4];
       CT.Size = DataPtr[5];
       StringRef *I = std::begin(CT.Symbols), *E = std::end(CT.Symbols);
-      if (collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I,
-                                  E))
-        return;
+      collectRelocatedSymbols(Obj, Sec, SecAddress, SymAddress, SymSize, I, E);
       CTs[SymName] = CT;
     }
     // Construction vtables in the Itanium ABI start with '_ZTT' or '__ZTT'.
@@ -510,7 +482,9 @@ static void dumpCXXData(const ObjectFile *Obj) {
 }
 
 static void dumpArchive(const Archive *Arc) {
-  for (const Archive::Child &ArcC : Arc->children()) {
+  for (auto &ErrorOrChild : Arc->children()) {
+    error(ErrorOrChild.getError());
+    const Archive::Child &ArcC = *ErrorOrChild;
     ErrorOr<std::unique_ptr<Binary>> ChildOrErr = ArcC.getAsBinary();
     if (std::error_code EC = ChildOrErr.getError()) {
       // Ignore non-object files.
@@ -527,12 +501,6 @@ static void dumpArchive(const Archive *Arc) {
 }
 
 static void dumpInput(StringRef File) {
-  // If file isn't stdin, check that it exists.
-  if (File != "-" && !sys::fs::exists(File)) {
-    reportError(File, cxxdump_error::file_not_found);
-    return;
-  }
-
   // Attempt to open the binary.
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
   if (std::error_code EC = BinaryOrErr.getError()) {
@@ -569,5 +537,5 @@ int main(int argc, const char *argv[]) {
   std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
                 dumpInput);
 
-  return ReturnValue;
+  return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-diff/DiffLog.cpp b/tools/llvm-diff/DiffLog.cpp
index 24a1b08d545d..ed86058f1af1 100644
--- a/tools/llvm-diff/DiffLog.cpp
+++ b/tools/llvm-diff/DiffLog.cpp
@@ -20,7 +20,8 @@
 using namespace llvm;
 
 LogBuilder::~LogBuilder() {
-  consumer.logf(*this);
+  if (consumer)
+    consumer->logf(*this);
 }
 
 StringRef LogBuilder::getFormat() const { return Format; }
diff --git a/tools/llvm-diff/DiffLog.h b/tools/llvm-diff/DiffLog.h
index 8eb53ffffccf..8f28461afdde 100644
--- a/tools/llvm-diff/DiffLog.h
+++ b/tools/llvm-diff/DiffLog.h
@@ -27,7 +27,7 @@ namespace llvm {
 
   /// A temporary-object class for building up log messages.
   class LogBuilder {
-    Consumer &consumer;
+    Consumer *consumer;
 
     /// The use of a stored StringRef here is okay because
     /// LogBuilder should be used only as a temporary, and as a
@@ -38,8 +38,12 @@ namespace llvm {
     SmallVector<Value*, 4> Arguments;
 
   public:
-    LogBuilder(Consumer &c, StringRef Format)
-      : consumer(c), Format(Format) {}
+    LogBuilder(Consumer &c, StringRef Format) : consumer(&c), Format(Format) {}
+    LogBuilder(LogBuilder &&L)
+        : consumer(L.consumer), Format(L.Format),
+          Arguments(std::move(L.Arguments)) {
+      L.consumer = nullptr;
+    }
 
     LogBuilder &operator<<(Value *V) {
       Arguments.push_back(V);
diff --git a/tools/llvm-diff/DifferenceEngine.cpp b/tools/llvm-diff/DifferenceEngine.cpp
index 7d379ef5dc41..456560b093ab 100644
--- a/tools/llvm-diff/DifferenceEngine.cpp
+++ b/tools/llvm-diff/DifferenceEngine.cpp
@@ -599,7 +599,7 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
   TerminatorInst *RTerm = RStart->getParent()->getTerminator();
   if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
     if (cast<BranchInst>(LTerm)->isConditional()) return;
-    BasicBlock::iterator I = LTerm;
+    BasicBlock::iterator I = LTerm->getIterator();
     if (I == LStart->getParent()->begin()) return;
     --I;
     if (!isa<CallInst>(*I)) return;
@@ -612,7 +612,7 @@ void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
     tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
   } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
     if (cast<BranchInst>(RTerm)->isConditional()) return;
-    BasicBlock::iterator I = RTerm;
+    BasicBlock::iterator I = RTerm->getIterator();
     if (I == RStart->getParent()->begin()) return;
     --I;
     if (!isa<CallInst>(*I)) return;
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 4b7d94d5b261..9fdfcd4256c9 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -159,7 +159,7 @@ int main(int argc, char **argv) {
     ErrorOr<std::unique_ptr<Module>> MOrErr =
         getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
     M = std::move(*MOrErr);
-    M->materializeAllPermanently();
+    M->materializeAll();
   } else {
     errs() << argv[0] << ": " << ErrorMessage << '\n';
     return 1;
diff --git a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index db3fcf6ee7bd..eaacc7c5f21b 100644
--- a/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/RelocVisitor.h"
 #include "llvm/Support/CommandLine.h"
@@ -22,6 +23,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
@@ -35,19 +37,19 @@ using namespace llvm;
 using namespace object;
 
 static cl::list<std::string>
-InputFilenames(cl::Positional, cl::desc("<input object files>"),
+InputFilenames(cl::Positional, cl::desc("<input object files or .dSYM bundles>"),
                cl::ZeroOrMore);
 
-static cl::opt<DIDumpType>
-DumpType("debug-dump", cl::init(DIDT_All),
-  cl::desc("Dump of debug sections:"),
-  cl::values(
+static cl::opt<DIDumpType> DumpType(
+    "debug-dump", cl::init(DIDT_All), cl::desc("Dump of debug sections:"),
+    cl::values(
         clEnumValN(DIDT_All, "all", "Dump all debug sections"),
         clEnumValN(DIDT_Abbrev, "abbrev", ".debug_abbrev"),
         clEnumValN(DIDT_AbbrevDwo, "abbrev.dwo", ".debug_abbrev.dwo"),
         clEnumValN(DIDT_AppleNames, "apple_names", ".apple_names"),
         clEnumValN(DIDT_AppleTypes, "apple_types", ".apple_types"),
-        clEnumValN(DIDT_AppleNamespaces, "apple_namespaces", ".apple_namespaces"),
+        clEnumValN(DIDT_AppleNamespaces, "apple_namespaces",
+                   ".apple_namespaces"),
         clEnumValN(DIDT_AppleObjC, "apple_objc", ".apple_objc"),
         clEnumValN(DIDT_Aranges, "aranges", ".debug_aranges"),
         clEnumValN(DIDT_Info, "info", ".debug_info"),
@@ -59,6 +61,7 @@ DumpType("debug-dump", cl::init(DIDT_All),
         clEnumValN(DIDT_Loc, "loc", ".debug_loc"),
         clEnumValN(DIDT_LocDwo, "loc.dwo", ".debug_loc.dwo"),
         clEnumValN(DIDT_Frames, "frames", ".debug_frame"),
+        clEnumValN(DIDT_Macro, "macro", ".debug_macinfo"),
         clEnumValN(DIDT_Ranges, "ranges", ".debug_ranges"),
         clEnumValN(DIDT_Pubnames, "pubnames", ".debug_pubnames"),
         clEnumValN(DIDT_Pubtypes, "pubtypes", ".debug_pubtypes"),
@@ -66,38 +69,79 @@ DumpType("debug-dump", cl::init(DIDT_All),
         clEnumValN(DIDT_GnuPubtypes, "gnu_pubtypes", ".debug_gnu_pubtypes"),
         clEnumValN(DIDT_Str, "str", ".debug_str"),
         clEnumValN(DIDT_StrDwo, "str.dwo", ".debug_str.dwo"),
-        clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo", ".debug_str_offsets.dwo"),
-        clEnumValEnd));
+        clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo",
+                   ".debug_str_offsets.dwo"),
+        clEnumValN(DIDT_CUIndex, "cu_index", ".debug_cu_index"),
+        clEnumValN(DIDT_TUIndex, "tu_index", ".debug_tu_index"), clEnumValEnd));
 
-static int ReturnValue = EXIT_SUCCESS;
-
-static bool error(StringRef Filename, std::error_code EC) {
+static void error(StringRef Filename, std::error_code EC) {
   if (!EC)
-    return false;
+    return;
   errs() << Filename << ": " << EC.message() << "\n";
-  ReturnValue = EXIT_FAILURE;
-  return true;
+  exit(1);
+}
+
+static void DumpObjectFile(ObjectFile &Obj, Twine Filename) {
+  std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+
+  outs() << Filename.str() << ":\tfile format " << Obj.getFileFormatName()
+         << "\n\n";
+  // Dump the complete DWARF structure.
+  DICtx->dump(outs(), DumpType);
 }
 
 static void DumpInput(StringRef Filename) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
       MemoryBuffer::getFileOrSTDIN(Filename);
-  if (error(Filename, BuffOrErr.getError()))
-    return;
+  error(Filename, BuffOrErr.getError());
   std::unique_ptr<MemoryBuffer> Buff = std::move(BuffOrErr.get());
 
-  ErrorOr<std::unique_ptr<ObjectFile>> ObjOrErr =
-      ObjectFile::createObjectFile(Buff->getMemBufferRef());
-  if (error(Filename, ObjOrErr.getError()))
-    return;
-  ObjectFile &Obj = *ObjOrErr.get();
+  ErrorOr<std::unique_ptr<Binary>> BinOrErr =
+      object::createBinary(Buff->getMemBufferRef());
+  error(Filename, BinOrErr.getError());
 
-  std::unique_ptr<DIContext> DICtx(new DWARFContextInMemory(Obj));
+  if (auto *Obj = dyn_cast<ObjectFile>(BinOrErr->get()))
+    DumpObjectFile(*Obj, Filename);
+  else if (auto *Fat = dyn_cast<MachOUniversalBinary>(BinOrErr->get()))
+    for (auto &ObjForArch : Fat->objects()) {
+      auto MachOOrErr = ObjForArch.getAsObjectFile();
+      error(Filename, MachOOrErr.getError());
+      DumpObjectFile(**MachOOrErr,
+                     Filename + " (" + ObjForArch.getArchTypeName() + ")");
+    }
+}
 
-  outs() << Filename
-         << ":\tfile format " << Obj.getFileFormatName() << "\n\n";
-  // Dump the complete DWARF structure.
-  DICtx->dump(outs(), DumpType);
+/// If the input path is a .dSYM bundle (as created by the dsymutil tool),
+/// replace it with individual entries for each of the object files inside the
+/// bundle otherwise return the input path.
+static std::vector<std::string> expandBundle(std::string InputPath) {
+  std::vector<std::string> BundlePaths;
+  SmallString<256> BundlePath(InputPath);
+  // Manually open up the bundle to avoid introducing additional dependencies.
+  if (sys::fs::is_directory(BundlePath) &&
+      sys::path::extension(BundlePath) == ".dSYM") {
+    std::error_code EC;
+    sys::path::append(BundlePath, "Contents", "Resources", "DWARF");
+    for (sys::fs::directory_iterator Dir(BundlePath, EC), DirEnd;
+         Dir != DirEnd && !EC; Dir.increment(EC)) {
+      const std::string &Path = Dir->path();
+      sys::fs::file_status Status;
+      EC = sys::fs::status(Path, Status);
+      error(Path, EC);
+      switch (Status.type()) {
+      case sys::fs::file_type::regular_file:
+      case sys::fs::file_type::symlink_file:
+      case sys::fs::file_type::type_unknown:
+        BundlePaths.push_back(Path);
+        break;
+      default: /*ignore*/;
+      }
+    }
+    error(BundlePath, EC);
+  }
+  if (!BundlePaths.size())
+    BundlePaths.push_back(InputPath);
+  return BundlePaths;
 }
 
 int main(int argc, char **argv) {
@@ -112,7 +156,14 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
-  std::for_each(InputFilenames.begin(), InputFilenames.end(), DumpInput);
+  // Expand any .dSYM bundles to the individual object files contained therein.
+  std::vector<std::string> Objects;
+  for (auto F : InputFilenames) {
+    auto Objs = expandBundle(F);
+    Objects.insert(Objects.end(), Objs.begin(), Objs.end());
+  }
 
-  return ReturnValue;
+  std::for_each(Objects.begin(), Objects.end(), DumpInput);
+
+  return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-dwp/CMakeLists.txt b/tools/llvm-dwp/CMakeLists.txt
new file mode 100644
index 000000000000..b29c00d49c3d
--- /dev/null
+++ b/tools/llvm-dwp/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  AsmPrinter
+  DebugInfoDWARF
+  MC
+  Object
+  Support
+  Target
+  )
+
+add_llvm_tool(llvm-dwp
+  llvm-dwp.cpp
+  )
diff --git a/tools/llvm-dwp/LLVMBuild.txt b/tools/llvm-dwp/LLVMBuild.txt
new file mode 100644
index 000000000000..345a73757255
--- /dev/null
+++ b/tools/llvm-dwp/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./tools/llvm-dwp/LLVMBuild.txt ---------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = llvm-dwp
+parent = Tools
+required_libraries = AsmPrinter DebugInfoDWARF MC Object Support all-targets
+
diff --git a/tools/llvm-dwp/Makefile b/tools/llvm-dwp/Makefile
new file mode 100644
index 000000000000..826371ecf915
--- /dev/null
+++ b/tools/llvm-dwp/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvm-dwp/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := llvm-dwp
+LINK_COMPONENTS := all-targets AsmPrinter DebugInfoDWARF MC Object Support
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/llvm-dwp/llvm-dwp.cpp b/tools/llvm-dwp/llvm-dwp.cpp
new file mode 100644
index 000000000000..570854b849c1
--- /dev/null
+++ b/tools/llvm-dwp/llvm-dwp.cpp
@@ -0,0 +1,420 @@
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Options.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <list>
+#include <memory>
+#include <unordered_set>
+
+using namespace llvm;
+using namespace llvm::object;
+using namespace cl;
+
+OptionCategory DwpCategory("Specific Options");
+static list<std::string> InputFiles(Positional, OneOrMore,
+                                    desc("<input files>"), cat(DwpCategory));
+
+static opt<std::string> OutputFilename(Required, "o",
+                                       desc("Specify the output file."),
+                                       value_desc("filename"),
+                                       cat(DwpCategory));
+
+static int error(const Twine &Error, const Twine &Context) {
+  errs() << Twine("while processing ") + Context + ":\n";
+  errs() << Twine("error: ") + Error + "\n";
+  return 1;
+}
+
+static std::error_code
+writeStringsAndOffsets(MCStreamer &Out, StringMap<uint32_t> &Strings,
+                       uint32_t &StringOffset, MCSection *StrSection,
+                       MCSection *StrOffsetSection, StringRef CurStrSection,
+                       StringRef CurStrOffsetSection) {
+  // Could possibly produce an error or warning if one of these was non-null but
+  // the other was null.
+  if (CurStrSection.empty() || CurStrOffsetSection.empty())
+    return std::error_code();
+
+  DenseMap<uint32_t, uint32_t> OffsetRemapping;
+
+  DataExtractor Data(CurStrSection, true, 0);
+  uint32_t LocalOffset = 0;
+  uint32_t PrevOffset = 0;
+  while (const char *s = Data.getCStr(&LocalOffset)) {
+    StringRef Str(s, LocalOffset - PrevOffset - 1);
+    auto Pair = Strings.insert(std::make_pair(Str, StringOffset));
+    if (Pair.second) {
+      Out.SwitchSection(StrSection);
+      Out.EmitBytes(
+          StringRef(Pair.first->getKeyData(), Pair.first->getKeyLength() + 1));
+      StringOffset += Str.size() + 1;
+    }
+    OffsetRemapping[PrevOffset] = Pair.first->second;
+    PrevOffset = LocalOffset;
+  }
+
+  Data = DataExtractor(CurStrOffsetSection, true, 0);
+
+  Out.SwitchSection(StrOffsetSection);
+
+  uint32_t Offset = 0;
+  uint64_t Size = CurStrOffsetSection.size();
+  while (Offset < Size) {
+    auto OldOffset = Data.getU32(&Offset);
+    auto NewOffset = OffsetRemapping[OldOffset];
+    Out.EmitIntValue(NewOffset, 4);
+  }
+
+  return std::error_code();
+}
+
+static uint32_t getCUAbbrev(StringRef Abbrev, uint64_t AbbrCode) {
+  uint64_t CurCode;
+  uint32_t Offset = 0;
+  DataExtractor AbbrevData(Abbrev, true, 0);
+  while ((CurCode = AbbrevData.getULEB128(&Offset)) != AbbrCode) {
+    // Tag
+    AbbrevData.getULEB128(&Offset);
+    // DW_CHILDREN
+    AbbrevData.getU8(&Offset);
+    // Attributes
+    while (AbbrevData.getULEB128(&Offset) | AbbrevData.getULEB128(&Offset))
+      ;
+  }
+  return Offset;
+}
+
+static uint64_t getCUSignature(StringRef Abbrev, StringRef Info) {
+  uint32_t Offset = 0;
+  DataExtractor InfoData(Info, true, 0);
+  InfoData.getU32(&Offset); // Length
+  uint16_t Version = InfoData.getU16(&Offset);
+  InfoData.getU32(&Offset); // Abbrev offset (should be zero)
+  uint8_t AddrSize = InfoData.getU8(&Offset);
+
+  uint32_t AbbrCode = InfoData.getULEB128(&Offset);
+
+  DataExtractor AbbrevData(Abbrev, true, 0);
+  uint32_t AbbrevOffset = getCUAbbrev(Abbrev, AbbrCode);
+  uint64_t Tag = AbbrevData.getULEB128(&AbbrevOffset);
+  (void)Tag;
+  // FIXME: Real error handling
+  assert(Tag == dwarf::DW_TAG_compile_unit);
+  // DW_CHILDREN
+  AbbrevData.getU8(&AbbrevOffset);
+  uint32_t Name;
+  uint32_t Form;
+  while ((Name = AbbrevData.getULEB128(&AbbrevOffset)) |
+             (Form = AbbrevData.getULEB128(&AbbrevOffset)) &&
+         Name != dwarf::DW_AT_GNU_dwo_id) {
+    DWARFFormValue::skipValue(Form, InfoData, &Offset, Version, AddrSize);
+  }
+  // FIXME: Real error handling
+  assert(Name == dwarf::DW_AT_GNU_dwo_id);
+  return InfoData.getU64(&Offset);
+}
+
+struct UnitIndexEntry {
+  uint64_t Signature;
+  DWARFUnitIndex::Entry::SectionContribution Contributions[8];
+};
+
+static void addAllTypes(MCStreamer &Out,
+                        std::vector<UnitIndexEntry> &TypeIndexEntries,
+                        MCSection *OutputTypes, StringRef Types,
+                        const UnitIndexEntry &CUEntry, uint32_t &TypesOffset) {
+  if (Types.empty())
+    return;
+
+  Out.SwitchSection(OutputTypes);
+  uint32_t Offset = 0;
+  DataExtractor Data(Types, true, 0);
+  while (Data.isValidOffset(Offset)) {
+    UnitIndexEntry Entry = CUEntry;
+    // Zero out the debug_info contribution
+    Entry.Contributions[0] = {};
+    auto &C = Entry.Contributions[DW_SECT_TYPES - DW_SECT_INFO];
+    C.Offset = TypesOffset;
+    auto PrevOffset = Offset;
+    // Length of the unit, including the 4 byte length field.
+    C.Length = Data.getU32(&Offset) + 4;
+
+    Data.getU16(&Offset); // Version
+    Data.getU32(&Offset); // Abbrev offset
+    Data.getU8(&Offset);  // Address size
+    Entry.Signature = Data.getU64(&Offset);
+    Offset = PrevOffset + C.Length;
+
+    if (any_of(TypeIndexEntries, [&](const UnitIndexEntry &E) {
+          return E.Signature == Entry.Signature;
+        }))
+      continue;
+
+    Out.EmitBytes(Types.substr(PrevOffset, C.Length));
+    TypesOffset += C.Length;
+
+    TypeIndexEntries.push_back(Entry);
+  }
+}
+
+static void
+writeIndexTable(MCStreamer &Out, ArrayRef<unsigned> ContributionOffsets,
+                ArrayRef<UnitIndexEntry> IndexEntries,
+                uint32_t DWARFUnitIndex::Entry::SectionContribution::*Field) {
+  for (const auto &E : IndexEntries)
+    for (size_t i = 0; i != array_lengthof(E.Contributions); ++i)
+      if (ContributionOffsets[i])
+        Out.EmitIntValue(E.Contributions[i].*Field, 4);
+}
+
+static void writeIndex(MCStreamer &Out, MCSection *Section,
+                       ArrayRef<unsigned> ContributionOffsets,
+                       ArrayRef<UnitIndexEntry> IndexEntries) {
+  unsigned Columns = 0;
+  for (auto &C : ContributionOffsets)
+    if (C)
+      ++Columns;
+
+  std::vector<unsigned> Buckets(NextPowerOf2(3 * IndexEntries.size() / 2));
+  uint64_t Mask = Buckets.size() - 1;
+  for (size_t i = 0; i != IndexEntries.size(); ++i) {
+    auto S = IndexEntries[i].Signature;
+    auto H = S & Mask;
+    while (Buckets[H]) {
+      assert(S != IndexEntries[Buckets[H] - 1].Signature &&
+             "Duplicate type unit");
+      H += ((S >> 32) & Mask) | 1;
+    }
+    Buckets[H] = i + 1;
+  }
+
+  Out.SwitchSection(Section);
+  Out.EmitIntValue(2, 4);                   // Version
+  Out.EmitIntValue(Columns, 4);             // Columns
+  Out.EmitIntValue(IndexEntries.size(), 4); // Num Units
+  Out.EmitIntValue(Buckets.size(), 4);      // Num Buckets
+
+  // Write the signatures.
+  for (const auto &I : Buckets)
+    Out.EmitIntValue(I ? IndexEntries[I - 1].Signature : 0, 8);
+
+  // Write the indexes.
+  for (const auto &I : Buckets)
+    Out.EmitIntValue(I, 4);
+
+  // Write the column headers (which sections will appear in the table)
+  for (size_t i = 0; i != ContributionOffsets.size(); ++i)
+    if (ContributionOffsets[i])
+      Out.EmitIntValue(i + DW_SECT_INFO, 4);
+
+  // Write the offsets.
+  writeIndexTable(Out, ContributionOffsets, IndexEntries,
+                  &DWARFUnitIndex::Entry::SectionContribution::Offset);
+
+  // Write the lengths.
+  writeIndexTable(Out, ContributionOffsets, IndexEntries,
+                  &DWARFUnitIndex::Entry::SectionContribution::Length);
+}
+static std::error_code write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
+  const auto &MCOFI = *Out.getContext().getObjectFileInfo();
+  MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
+  MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
+  MCSection *const TypesSection = MCOFI.getDwarfTypesDWOSection();
+  const StringMap<std::pair<MCSection *, DWARFSectionKind>> KnownSections = {
+      {"debug_info.dwo", {MCOFI.getDwarfInfoDWOSection(), DW_SECT_INFO}},
+      {"debug_types.dwo", {MCOFI.getDwarfTypesDWOSection(), DW_SECT_TYPES}},
+      {"debug_str_offsets.dwo", {StrOffsetSection, DW_SECT_STR_OFFSETS}},
+      {"debug_str.dwo", {StrSection, static_cast<DWARFSectionKind>(0)}},
+      {"debug_loc.dwo", {MCOFI.getDwarfLocDWOSection(), DW_SECT_LOC}},
+      {"debug_line.dwo", {MCOFI.getDwarfLineDWOSection(), DW_SECT_LINE}},
+      {"debug_abbrev.dwo", {MCOFI.getDwarfAbbrevDWOSection(), DW_SECT_ABBREV}}};
+
+  std::vector<UnitIndexEntry> IndexEntries;
+  std::vector<UnitIndexEntry> TypeIndexEntries;
+
+  StringMap<uint32_t> Strings;
+  uint32_t StringOffset = 0;
+
+  uint32_t ContributionOffsets[8] = {};
+
+  for (const auto &Input : Inputs) {
+    auto ErrOrObj = object::ObjectFile::createObjectFile(Input);
+    if (!ErrOrObj)
+      return ErrOrObj.getError();
+
+    IndexEntries.emplace_back();
+    UnitIndexEntry &CurEntry = IndexEntries.back();
+
+    StringRef CurStrSection;
+    StringRef CurStrOffsetSection;
+    StringRef CurTypesSection;
+    StringRef InfoSection;
+    StringRef AbbrevSection;
+
+    for (const auto &Section : ErrOrObj->getBinary()->sections()) {
+      StringRef Name;
+      if (std::error_code Err = Section.getName(Name))
+        return Err;
+
+      auto SectionPair =
+          KnownSections.find(Name.substr(Name.find_first_not_of("._")));
+      if (SectionPair == KnownSections.end())
+        continue;
+
+      StringRef Contents;
+      if (auto Err = Section.getContents(Contents))
+        return Err;
+
+      if (DWARFSectionKind Kind = SectionPair->second.second) {
+        auto Index = Kind - DW_SECT_INFO;
+        if (Kind != DW_SECT_TYPES) {
+          CurEntry.Contributions[Index].Offset = ContributionOffsets[Index];
+          ContributionOffsets[Index] +=
+              (CurEntry.Contributions[Index].Length = Contents.size());
+        }
+
+        switch (Kind) {
+        case DW_SECT_INFO:
+          InfoSection = Contents;
+          break;
+        case DW_SECT_ABBREV:
+          AbbrevSection = Contents;
+          break;
+        default:
+          break;
+        }
+      }
+
+      MCSection *OutSection = SectionPair->second.first;
+      if (OutSection == StrOffsetSection)
+        CurStrOffsetSection = Contents;
+      else if (OutSection == StrSection)
+        CurStrSection = Contents;
+      else if (OutSection == TypesSection)
+        CurTypesSection = Contents;
+      else {
+        Out.SwitchSection(OutSection);
+        Out.EmitBytes(Contents);
+      }
+    }
+
+    assert(!AbbrevSection.empty());
+    assert(!InfoSection.empty());
+    CurEntry.Signature = getCUSignature(AbbrevSection, InfoSection);
+    addAllTypes(Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry,
+                ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]);
+
+    if (auto Err = writeStringsAndOffsets(Out, Strings, StringOffset,
+                                          StrSection, StrOffsetSection,
+                                          CurStrSection, CurStrOffsetSection))
+      return Err;
+  }
+
+  if (!TypeIndexEntries.empty()) {
+    // Lie about there being no info contributions so the TU index only includes
+    // the type unit contribution
+    ContributionOffsets[0] = 0;
+    writeIndex(Out, MCOFI.getDwarfTUIndexSection(), ContributionOffsets,
+               TypeIndexEntries);
+  }
+
+  // Lie about the type contribution
+  ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO] = 0;
+  // Unlie about the info contribution
+  ContributionOffsets[0] = 1;
+
+  writeIndex(Out, MCOFI.getDwarfCUIndexSection(), ContributionOffsets,
+             IndexEntries);
+
+  return std::error_code();
+}
+
+int main(int argc, char **argv) {
+
+  ParseCommandLineOptions(argc, argv, "merge split dwarf (.dwo) files");
+
+  llvm::InitializeAllTargetInfos();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+
+  std::string ErrorStr;
+  StringRef Context = "dwarf streamer init";
+
+  Triple TheTriple("x86_64-linux-gnu");
+
+  // Get the target.
+  const Target *TheTarget =
+      TargetRegistry::lookupTarget("", TheTriple, ErrorStr);
+  if (!TheTarget)
+    return error(ErrorStr, Context);
+  std::string TripleName = TheTriple.getTriple();
+
+  // Create all the MC Objects.
+  std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
+  if (!MRI)
+    return error(Twine("no register info for target ") + TripleName, Context);
+
+  std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
+  if (!MAI)
+    return error("no asm info for target " + TripleName, Context);
+
+  MCObjectFileInfo MOFI;
+  MCContext MC(MAI.get(), MRI.get(), &MOFI);
+  MOFI.InitMCObjectFileInfo(TheTriple, Reloc::Default, CodeModel::Default, MC);
+
+  auto MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, "");
+  if (!MAB)
+    return error("no asm backend for target " + TripleName, Context);
+
+  std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+  if (!MII)
+    return error("no instr info info for target " + TripleName, Context);
+
+  std::unique_ptr<MCSubtargetInfo> MSTI(
+      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+  if (!MSTI)
+    return error("no subtarget info for target " + TripleName, Context);
+
+  MCCodeEmitter *MCE = TheTarget->createMCCodeEmitter(*MII, *MRI, MC);
+  if (!MCE)
+    return error("no code emitter for target " + TripleName, Context);
+
+  // Create the output file.
+  std::error_code EC;
+  raw_fd_ostream OutFile(OutputFilename, EC, sys::fs::F_None);
+  if (EC)
+    return error(Twine(OutputFilename) + ": " + EC.message(), Context);
+
+  MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
+  std::unique_ptr<MCStreamer> MS(TheTarget->createMCObjectStreamer(
+      TheTriple, MC, *MAB, OutFile, MCE, *MSTI, MCOptions.MCRelaxAll,
+      MCOptions.MCIncrementalLinkerCompatible,
+      /*DWARFMustBeAtTheEnd*/ false));
+  if (!MS)
+    return error("no object streamer for target " + TripleName, Context);
+
+  if (auto Err = write(*MS, InputFiles))
+    return error(Err.message(), "Writing DWP file");
+
+  MS->Finish();
+}
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 936496cd7fe6..1da456d33f52 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -222,45 +222,42 @@ int main(int argc, char **argv) {
     }
   }
 
-  // Materialize requisite global values.
-  if (!DeleteFn)
-    for (size_t i = 0, e = GVs.size(); i != e; ++i) {
-      GlobalValue *GV = GVs[i];
-      if (std::error_code EC = GV->materialize()) {
-        errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
-        return 1;
-      }
+  auto Materialize = [&](GlobalValue &GV) {
+    if (std::error_code EC = GV.materialize()) {
+      errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
+      exit(1);
     }
-  else {
+  };
+
+  // Materialize requisite global values.
+  if (!DeleteFn) {
+    for (size_t i = 0, e = GVs.size(); i != e; ++i)
+      Materialize(*GVs[i]);
+  } else {
     // Deleting. Materialize every GV that's *not* in GVs.
     SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());
-    for (auto &G : M->globals()) {
-      if (!GVSet.count(&G)) {
-        if (std::error_code EC = G.materialize()) {
-          errs() << argv[0] << ": error reading input: " << EC.message()
-                 << "\n";
-          return 1;
-        }
-      }
-    }
     for (auto &F : *M) {
-      if (!GVSet.count(&F)) {
-        if (std::error_code EC = F.materialize()) {
-          errs() << argv[0] << ": error reading input: " << EC.message()
-                 << "\n";
-          return 1;
-        }
-      }
+      if (!GVSet.count(&F))
+        Materialize(F);
     }
   }
 
+  {
+    std::vector<GlobalValue *> Gvs(GVs.begin(), GVs.end());
+    legacy::PassManager Extract;
+    Extract.add(createGVExtractionPass(Gvs, DeleteFn));
+    Extract.run(*M);
+
+    // Now that we have all the GVs we want, mark the module as fully
+    // materialized.
+    // FIXME: should the GVExtractionPass handle this?
+    M->materializeAll();
+  }
+
   // In addition to deleting all other functions, we also want to spiff it
   // up a little bit.  Do this now.
   legacy::PassManager Passes;
 
-  std::vector<GlobalValue*> Gvs(GVs.begin(), GVs.end());
-
-  Passes.add(createGVExtractionPass(Gvs, DeleteFn));
   if (!DeleteFn)
     Passes.add(createGlobalDCEPass());           // Delete unreachable globals
   Passes.add(createStripDeadDebugInfoPass());    // Remove dead debug info
diff --git a/tools/llvm-go/llvm-go.go b/tools/llvm-go/llvm-go.go
index c5c3fd244cad..ed79ca67b89d 100644
--- a/tools/llvm-go/llvm-go.go
+++ b/tools/llvm-go/llvm-go.go
@@ -24,6 +24,11 @@ import (
 	"strings"
 )
 
+const (
+	linkmodeComponentLibs = "component-libs"
+	linkmodeDylib         = "dylib"
+)
+
 type pkg struct {
 	llvmpath, pkgpath string
 }
@@ -66,11 +71,12 @@ var components = []string{
 func llvmConfig(args ...string) string {
 	configpath := os.Getenv("LLVM_CONFIG")
 	if configpath == "" {
-		// strip llvm-go, add llvm-config
-		configpath = os.Args[0][:len(os.Args[0])-7] + "llvm-config"
+		bin, _ := filepath.Split(os.Args[0])
+		configpath = filepath.Join(bin, "llvm-config")
 	}
 
 	cmd := exec.Command(configpath, args...)
+	cmd.Stderr = os.Stderr
 	out, err := cmd.Output()
 	if err != nil {
 		panic(err.Error())
@@ -78,11 +84,21 @@ func llvmConfig(args ...string) string {
 
 	outstr := string(out)
 	outstr = strings.TrimSuffix(outstr, "\n")
-	return strings.Replace(outstr, "\n", " ", -1)
+	outstr = strings.Replace(outstr, "\n", " ", -1)
+	return outstr
 }
 
-func llvmFlags() compilerFlags {
-	ldflags := llvmConfig(append([]string{"--ldflags", "--libs", "--system-libs"}, components...)...)
+func llvmFlags(linkmode string) compilerFlags {
+	ldflags := llvmConfig("--ldflags")
+	switch linkmode {
+	case linkmodeComponentLibs:
+		ldflags += " " + llvmConfig(append([]string{"--libs"}, components...)...)
+	case linkmodeDylib:
+		ldflags += " -lLLVM"
+	default:
+		panic("invalid linkmode: " + linkmode)
+	}
+	ldflags += " " + llvmConfig("--system-libs")
 	if runtime.GOOS != "darwin" {
 		// OS X doesn't like -rpath with cgo. See:
 		// https://code.google.com/p/go/issues/detail?id=7293
@@ -117,8 +133,8 @@ func printComponents() {
 	fmt.Println(strings.Join(components, " "))
 }
 
-func printConfig() {
-	flags := llvmFlags()
+func printConfig(linkmode string) {
+	flags := llvmFlags(linkmode)
 
 	fmt.Printf(`// +build !byollvm
 
@@ -137,7 +153,7 @@ type (run_build_sh int)
 `, flags.cpp, flags.cxx, flags.ld)
 }
 
-func runGoWithLLVMEnv(args []string, cc, cxx, gocmd, llgo, cppflags, cxxflags, ldflags string) {
+func runGoWithLLVMEnv(args []string, cc, cxx, gocmd, llgo, cppflags, cxxflags, ldflags, linkmode string) {
 	args = addTag(args, "byollvm")
 
 	srcdir := llvmConfig("--src-root")
@@ -166,7 +182,7 @@ func runGoWithLLVMEnv(args []string, cc, cxx, gocmd, llgo, cppflags, cxxflags, l
 	newgopathlist = append(newgopathlist, filepath.SplitList(os.Getenv("GOPATH"))...)
 	newgopath := strings.Join(newgopathlist, string(filepath.ListSeparator))
 
-	flags := llvmFlags()
+	flags := llvmFlags(linkmode)
 
 	newenv := []string{
 		"CC=" + cc,
@@ -178,7 +194,7 @@ func runGoWithLLVMEnv(args []string, cc, cxx, gocmd, llgo, cppflags, cxxflags, l
 		"PATH=" + newpath,
 	}
 	if llgo != "" {
-		newenv = append(newenv, "GCCGO=" + llgo)
+		newenv = append(newenv, "GCCGO="+llgo)
 	}
 
 	for _, v := range os.Environ() {
@@ -234,45 +250,44 @@ func main() {
 	ldflags := os.Getenv("CGO_LDFLAGS")
 	gocmd := "go"
 	llgo := ""
+	linkmode := linkmodeComponentLibs
+
+	flags := []struct {
+		name string
+		dest *string
+	}{
+		{"cc", &cc},
+		{"cxx", &cxx},
+		{"go", &gocmd},
+		{"llgo", &llgo},
+		{"cppflags", &cppflags},
+		{"ldflags", &ldflags},
+		{"linkmode", &linkmode},
+	}
 
 	args := os.Args[1:]
-	DONE: for {
-		switch {
-		case len(args) == 0:
+LOOP:
+	for {
+		if len(args) == 0 {
 			usage()
-		case strings.HasPrefix(args[0], "cc="):
-			cc = args[0][3:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "cxx="):
-			cxx = args[0][4:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "go="):
-			gocmd = args[0][3:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "llgo="):
-			llgo = args[0][5:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "cppflags="):
-			cppflags = args[0][9:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "cxxflags="):
-			cxxflags = args[0][9:]
-			args = args[1:]
-		case strings.HasPrefix(args[0], "ldflags="):
-			ldflags = args[0][8:]
-			args = args[1:]
-		default:
-			break DONE
 		}
+		for _, flag := range flags {
+			if strings.HasPrefix(args[0], flag.name+"=") {
+				*flag.dest = args[0][len(flag.name)+1:]
+				args = args[1:]
+				continue LOOP
+			}
+		}
+		break
 	}
 
 	switch args[0] {
 	case "build", "get", "install", "run", "test":
-		runGoWithLLVMEnv(args, cc, cxx, gocmd, llgo, cppflags, cxxflags, ldflags)
+		runGoWithLLVMEnv(args, cc, cxx, gocmd, llgo, cppflags, cxxflags, ldflags, linkmode)
 	case "print-components":
 		printComponents()
 	case "print-config":
-		printConfig()
+		printConfig(linkmode)
 	default:
 		usage()
 	}
diff --git a/tools/llvm-link/CMakeLists.txt b/tools/llvm-link/CMakeLists.txt
index d4f5be749046..e4ced186336c 100644
--- a/tools/llvm-link/CMakeLists.txt
+++ b/tools/llvm-link/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
   Core
   IRReader
   Linker
+  Object
   Support
   )
 
diff --git a/tools/llvm-link/LLVMBuild.txt b/tools/llvm-link/LLVMBuild.txt
index 2e386f3c2316..807823820ed6 100644
--- a/tools/llvm-link/LLVMBuild.txt
+++ b/tools/llvm-link/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-link
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IRReader Linker
+required_libraries = AsmParser BitReader BitWriter IRReader Linker Object
diff --git a/tools/llvm-link/Makefile b/tools/llvm-link/Makefile
index ed30d2d256b8..a08d2844ceca 100644
--- a/tools/llvm-link/Makefile
+++ b/tools/llvm-link/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-link
-LINK_COMPONENTS := linker bitreader bitwriter asmparser irreader
+LINK_COMPONENTS := linker bitreader bitwriter asmparser irreader object
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-link/llvm-link.cpp b/tools/llvm-link/llvm-link.cpp
index 369f3477fe5c..a32383028ae2 100644
--- a/tools/llvm-link/llvm-link.cpp
+++ b/tools/llvm-link/llvm-link.cpp
@@ -18,10 +18,12 @@
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -43,10 +45,33 @@ static cl::list<std::string> OverridingInputs(
     cl::desc(
         "input bitcode file which can override previously defined symbol(s)"));
 
+// Option to simulate function importing for testing. This enables using
+// llvm-link to simulate ThinLTO backend processes.
+static cl::list<std::string> Imports(
+    "import", cl::ZeroOrMore, cl::value_desc("function:filename"),
+    cl::desc("Pair of function name and filename, where function should be "
+             "imported from bitcode in filename"));
+
+// Option to support testing of function importing. The function index
+// must be specified in the case were we request imports via the -import
+// option, as well as when compiling any module with functions that may be
+// exported (imported by a different llvm-link -import invocation), to ensure
+// consistent promotion and renaming of locals.
+static cl::opt<std::string> FunctionIndex("functionindex",
+                                          cl::desc("Function index filename"),
+                                          cl::init(""),
+                                          cl::value_desc("filename"));
+
 static cl::opt<std::string>
 OutputFilename("o", cl::desc("Override output filename"), cl::init("-"),
                cl::value_desc("filename"));
 
+static cl::opt<bool>
+Internalize("internalize", cl::desc("Internalize linked symbols"));
+
+static cl::opt<bool>
+OnlyNeeded("only-needed", cl::desc("Link only needed symbols"));
+
 static cl::opt<bool>
 Force("f", cl::desc("Enable binary output on terminals"));
 
@@ -64,6 +89,10 @@ static cl::opt<bool>
 SuppressWarnings("suppress-warnings", cl::desc("Suppress all linking warnings"),
                  cl::init(false));
 
+static cl::opt<bool>
+    PreserveModules("preserve-modules",
+                    cl::desc("Preserve linked modules for testing"));
+
 static cl::opt<bool> PreserveBitcodeUseListOrder(
     "preserve-bc-uselistorder",
     cl::desc("Preserve use-list order when writing LLVM bitcode."),
@@ -77,16 +106,21 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
 // Read the specified bitcode file in and return it. This routine searches the
 // link path for the specified file to try to find it...
 //
-static std::unique_ptr<Module>
-loadFile(const char *argv0, const std::string &FN, LLVMContext &Context) {
+static std::unique_ptr<Module> loadFile(const char *argv0,
+                                        const std::string &FN,
+                                        LLVMContext &Context,
+                                        bool MaterializeMetadata = true) {
   SMDiagnostic Err;
   if (Verbose) errs() << "Loading '" << FN << "'\n";
-  std::unique_ptr<Module> Result = getLazyIRFileModule(FN, Err, Context);
+  std::unique_ptr<Module> Result =
+      getLazyIRFileModule(FN, Err, Context, !MaterializeMetadata);
   if (!Result)
     Err.print(argv0, errs());
 
-  Result->materializeMetadata();
-  UpgradeDebugInfo(*Result);
+  if (MaterializeMetadata) {
+    Result->materializeMetadata();
+    UpgradeDebugInfo(*Result);
+  }
 
   return Result;
 }
@@ -112,9 +146,111 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
   errs() << '\n';
 }
 
+static void diagnosticHandlerWithContext(const DiagnosticInfo &DI, void *C) {
+  diagnosticHandler(DI);
+}
+
+/// Import any functions requested via the -import option.
+static bool importFunctions(const char *argv0, LLVMContext &Context,
+                            Linker &L) {
+  StringMap<std::unique_ptr<DenseMap<unsigned, MDNode *>>>
+      ModuleToTempMDValsMap;
+  for (const auto &Import : Imports) {
+    // Identify the requested function and its bitcode source file.
+    size_t Idx = Import.find(':');
+    if (Idx == std::string::npos) {
+      errs() << "Import parameter bad format: " << Import << "\n";
+      return false;
+    }
+    std::string FunctionName = Import.substr(0, Idx);
+    std::string FileName = Import.substr(Idx + 1, std::string::npos);
+
+    // Load the specified source module.
+    std::unique_ptr<Module> M = loadFile(argv0, FileName, Context, false);
+    if (!M.get()) {
+      errs() << argv0 << ": error loading file '" << FileName << "'\n";
+      return false;
+    }
+
+    if (verifyModule(*M, &errs())) {
+      errs() << argv0 << ": " << FileName
+             << ": error: input module is broken!\n";
+      return false;
+    }
+
+    Function *F = M->getFunction(FunctionName);
+    if (!F) {
+      errs() << "Ignoring import request for non-existent function "
+             << FunctionName << " from " << FileName << "\n";
+      continue;
+    }
+    // We cannot import weak_any functions without possibly affecting the
+    // order they are seen and selected by the linker, changing program
+    // semantics.
+    if (F->hasWeakAnyLinkage()) {
+      errs() << "Ignoring import request for weak-any function " << FunctionName
+             << " from " << FileName << "\n";
+      continue;
+    }
+
+    if (Verbose)
+      errs() << "Importing " << FunctionName << " from " << FileName << "\n";
+
+    std::unique_ptr<FunctionInfoIndex> Index;
+    if (!FunctionIndex.empty()) {
+      ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+          llvm::getFunctionIndexForFile(FunctionIndex, diagnosticHandler);
+      std::error_code EC = IndexOrErr.getError();
+      if (EC) {
+        errs() << EC.message() << '\n';
+        return false;
+      }
+      Index = std::move(IndexOrErr.get());
+    }
+
+    // Save the mapping of value ids to temporary metadata created when
+    // importing this function. If we have already imported from this module,
+    // add new temporary metadata to the existing mapping.
+    auto &TempMDVals = ModuleToTempMDValsMap[FileName];
+    if (!TempMDVals)
+      TempMDVals = llvm::make_unique<DenseMap<unsigned, MDNode *>>();
+
+    // Link in the specified function.
+    DenseSet<const GlobalValue *> FunctionsToImport;
+    FunctionsToImport.insert(F);
+    if (L.linkInModule(std::move(M), Linker::Flags::None, Index.get(),
+                       &FunctionsToImport, TempMDVals.get()))
+      return false;
+  }
+
+  // Now link in metadata for all modules from which we imported functions.
+  for (StringMapEntry<std::unique_ptr<DenseMap<unsigned, MDNode *>>> &SME :
+       ModuleToTempMDValsMap) {
+    // Load the specified source module.
+    std::unique_ptr<Module> M = loadFile(argv0, SME.getKey(), Context, true);
+    if (!M.get()) {
+      errs() << argv0 << ": error loading file '" << SME.getKey() << "'\n";
+      return false;
+    }
+
+    if (verifyModule(*M, &errs())) {
+      errs() << argv0 << ": " << SME.getKey()
+             << ": error: input module is broken!\n";
+      return false;
+    }
+
+    // Link in all necessary metadata from this module.
+    if (L.linkInMetadata(*M, SME.getValue().get()))
+      return false;
+  }
+  return true;
+}
+
 static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
                       const cl::list<std::string> &Files,
-                      bool OverrideDuplicateSymbols) {
+                      unsigned Flags) {
+  // Filter out flags that don't apply to the first file we load.
+  unsigned ApplicableFlags = Flags & Linker::Flags::OverrideFromSrc;
   for (const auto &File : Files) {
     std::unique_ptr<Module> M = loadFile(argv0, File, Context);
     if (!M.get()) {
@@ -127,11 +263,36 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
       return false;
     }
 
+    // If a function index is supplied, load it so linkInModule can treat
+    // local functions/variables as exported and promote if necessary.
+    std::unique_ptr<FunctionInfoIndex> Index;
+    if (!FunctionIndex.empty()) {
+      ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+          llvm::getFunctionIndexForFile(FunctionIndex, diagnosticHandler);
+      std::error_code EC = IndexOrErr.getError();
+      if (EC) {
+        errs() << EC.message() << '\n';
+        return false;
+      }
+      Index = std::move(IndexOrErr.get());
+    }
+
     if (Verbose)
       errs() << "Linking in '" << File << "'\n";
 
-    if (L.linkInModule(M.get(), OverrideDuplicateSymbols))
+    if (L.linkInModule(std::move(M), ApplicableFlags, Index.get()))
       return false;
+    // All linker flags apply to linking of subsequent files.
+    ApplicableFlags = Flags;
+
+    // If requested for testing, preserve modules by releasing them from
+    // the unique_ptr before the are freed. This can help catch any
+    // cross-module references from e.g. unneeded metadata references
+    // that aren't properly set to null but instead mapped to the source
+    // module version. The bitcode writer will assert if it finds any such
+    // cross-module references.
+    if (PreserveModules)
+      M.release();
   }
 
   return true;
@@ -143,18 +304,31 @@ int main(int argc, char **argv) {
   PrettyStackTraceProgram X(argc, argv);
 
   LLVMContext &Context = getGlobalContext();
+  Context.setDiagnosticHandler(diagnosticHandlerWithContext, nullptr, true);
+
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
 
   auto Composite = make_unique<Module>("llvm-link", Context);
-  Linker L(Composite.get(), diagnosticHandler);
+  Linker L(*Composite);
+
+  unsigned Flags = Linker::Flags::None;
+  if (Internalize)
+    Flags |= Linker::Flags::InternalizeLinkedSymbols;
+  if (OnlyNeeded)
+    Flags |= Linker::Flags::LinkOnlyNeeded;
 
   // First add all the regular input files
-  if (!linkFiles(argv[0], Context, L, InputFilenames, false))
+  if (!linkFiles(argv[0], Context, L, InputFilenames, Flags))
     return 1;
 
   // Next the -override ones.
-  if (!linkFiles(argv[0], Context, L, OverridingInputs, true))
+  if (!linkFiles(argv[0], Context, L, OverridingInputs,
+                 Flags | Linker::Flags::OverrideFromSrc))
+    return 1;
+
+  // Import any functions requested via -import
+  if (!importFunctions(argv[0], Context, L))
     return 1;
 
   if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
diff --git a/tools/llvm-lto/CMakeLists.txt b/tools/llvm-lto/CMakeLists.txt
index 3ea1aeea9a41..29b3339e6cea 100644
--- a/tools/llvm-lto/CMakeLists.txt
+++ b/tools/llvm-lto/CMakeLists.txt
@@ -1,7 +1,10 @@
 set(LLVM_LINK_COMPONENTS
   ${LLVM_TARGETS_TO_BUILD}
+  BitWriter
+  Core
   LTO
   MC
+  Object
   Support
   Target
   )
diff --git a/tools/llvm-lto/LLVMBuild.txt b/tools/llvm-lto/LLVMBuild.txt
index c1613a34c0e4..b36f4a981532 100644
--- a/tools/llvm-lto/LLVMBuild.txt
+++ b/tools/llvm-lto/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-lto
 parent = Tools
-required_libraries = LTO Support all-targets
+required_libraries = BitWriter Core LTO Object Support all-targets
diff --git a/tools/llvm-lto/Makefile b/tools/llvm-lto/Makefile
index f1801b4b20cc..f8ca7e1cac5d 100644
--- a/tools/llvm-lto/Makefile
+++ b/tools/llvm-lto/Makefile
@@ -14,6 +14,4 @@ LINK_COMPONENTS := lto ipo scalaropts linker bitreader bitwriter mcdisassembler
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
 
-NO_INSTALL := 1
-
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 08218986f45b..4bc692279b9e 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -13,16 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/LTO/LTOModule.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
+#include <list>
 
 using namespace llvm;
 
@@ -34,6 +40,10 @@ OptLevel("O",
          cl::ZeroOrMore,
          cl::init('2'));
 
+static cl::opt<bool> DisableVerify(
+    "disable-verify", cl::init(false),
+    cl::desc("Do not run the verifier during the optimization pipeline"));
+
 static cl::opt<bool>
 DisableInline("disable-inlining", cl::init(false),
   cl::desc("Do not run the inliner pass"));
@@ -50,6 +60,14 @@ static cl::opt<bool>
 UseDiagnosticHandler("use-diagnostic-handler", cl::init(false),
   cl::desc("Use a diagnostic handler to test the handler interface"));
 
+static cl::opt<bool>
+    ThinLTO("thinlto", cl::init(false),
+            cl::desc("Only write combined global index for ThinLTO backends"));
+
+static cl::opt<bool>
+SaveModuleFile("save-merged-module", cl::init(false),
+               cl::desc("Write merged LTO module to file before CodeGen"));
+
 static cl::list<std::string>
 InputFilenames(cl::Positional, cl::OneOrMore,
   cl::desc("<input bitcode files>"));
@@ -77,6 +95,9 @@ static cl::opt<bool> SetMergedModule(
     "set-merged-module", cl::init(false),
     cl::desc("Use the first input module as the merged module"));
 
+static cl::opt<unsigned> Parallelism("j", cl::Prefix, cl::init(1),
+                                     cl::desc("Number of backend threads"));
+
 namespace {
 struct ModuleInfo {
   std::vector<bool> CanBeHidden;
@@ -85,6 +106,7 @@ struct ModuleInfo {
 
 static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity,
                               const char *Msg, void *) {
+  errs() << "llvm-lto: ";
   switch (Severity) {
   case LTO_DS_NOTE:
     errs() << "note: ";
@@ -102,18 +124,68 @@ static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity,
   errs() << Msg << "\n";
 }
 
+static std::string CurrentActivity;
+static void diagnosticHandler(const DiagnosticInfo &DI) {
+  raw_ostream &OS = errs();
+  OS << "llvm-lto: ";
+  switch (DI.getSeverity()) {
+  case DS_Error:
+    OS << "error";
+    break;
+  case DS_Warning:
+    OS << "warning";
+    break;
+  case DS_Remark:
+    OS << "remark";
+    break;
+  case DS_Note:
+    OS << "note";
+    break;
+  }
+  if (!CurrentActivity.empty())
+    OS << ' ' << CurrentActivity;
+  OS << ": ";
+
+  DiagnosticPrinterRawOStream DP(OS);
+  DI.print(DP);
+  OS << '\n';
+
+  if (DI.getSeverity() == DS_Error)
+    exit(1);
+}
+
+static void diagnosticHandlerWithContenxt(const DiagnosticInfo &DI,
+                                          void *Context) {
+  diagnosticHandler(DI);
+}
+
+static void error(const Twine &Msg) {
+  errs() << "llvm-lto: " << Msg << '\n';
+  exit(1);
+}
+
+static void error(std::error_code EC, const Twine &Prefix) {
+  if (EC)
+    error(Prefix + ": " + EC.message());
+}
+
+template <typename T>
+static void error(const ErrorOr<T> &V, const Twine &Prefix) {
+  error(V.getError(), Prefix);
+}
+
 static std::unique_ptr<LTOModule>
 getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
-                  const TargetOptions &Options, std::string &Error) {
+                  const TargetOptions &Options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getFile(Path);
-  if (std::error_code EC = BufferOrErr.getError()) {
-    Error = EC.message();
-    return nullptr;
-  }
+  error(BufferOrErr, "error loading file '" + Path + "'");
   Buffer = std::move(BufferOrErr.get());
-  return std::unique_ptr<LTOModule>(LTOModule::createInLocalContext(
-      Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Error, Path));
+  CurrentActivity = ("loading file '" + Path + "'").str();
+  ErrorOr<std::unique_ptr<LTOModule>> Ret = LTOModule::createInLocalContext(
+      Buffer->getBufferStart(), Buffer->getBufferSize(), Options, Path);
+  CurrentActivity = "";
+  return std::move(*Ret);
 }
 
 /// \brief List symbols in each IR file.
@@ -122,24 +194,44 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
 /// functionality that's exposed by the C API to list symbols.  Moreover, this
 /// provides testing coverage for modules that have been created in their own
 /// contexts.
-static int listSymbols(StringRef Command, const TargetOptions &Options) {
+static void listSymbols(const TargetOptions &Options) {
   for (auto &Filename : InputFilenames) {
-    std::string Error;
     std::unique_ptr<MemoryBuffer> Buffer;
     std::unique_ptr<LTOModule> Module =
-        getLocalLTOModule(Filename, Buffer, Options, Error);
-    if (!Module) {
-      errs() << Command << ": error loading file '" << Filename
-             << "': " << Error << "\n";
-      return 1;
-    }
+        getLocalLTOModule(Filename, Buffer, Options);
 
     // List the symbols.
     outs() << Filename << ":\n";
     for (int I = 0, E = Module->getSymbolCount(); I != E; ++I)
       outs() << Module->getSymbolName(I) << "\n";
   }
-  return 0;
+}
+
+/// Create a combined index file from the input IR files and write it.
+///
+/// This is meant to enable testing of ThinLTO combined index generation,
+/// currently available via the gold plugin via -thinlto.
+static void createCombinedFunctionIndex() {
+  FunctionInfoIndex CombinedIndex;
+  uint64_t NextModuleId = 0;
+  for (auto &Filename : InputFilenames) {
+    CurrentActivity = "loading file '" + Filename + "'";
+    ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+        llvm::getFunctionIndexForFile(Filename, diagnosticHandler);
+    std::unique_ptr<FunctionInfoIndex> Index = std::move(IndexOrErr.get());
+    CurrentActivity = "";
+    // Skip files without a function summary.
+    if (!Index)
+      continue;
+    CombinedIndex.mergeFrom(std::move(Index), ++NextModuleId);
+  }
+  std::error_code EC;
+  assert(!OutputFilename.empty());
+  raw_fd_ostream OS(OutputFilename + ".thinlto.bc", EC,
+                    sys::fs::OpenFlags::F_None);
+  error(EC, "error opening the file '" + OutputFilename + ".thinlto.bc'");
+  WriteFunctionSummaryToFile(CombinedIndex, OS);
+  OS.close();
 }
 
 int main(int argc, char **argv) {
@@ -150,10 +242,8 @@ int main(int argc, char **argv) {
   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm LTO linker\n");
 
-  if (OptLevel < '0' || OptLevel > '3') {
-    errs() << argv[0] << ": optimization level must be between 0 and 3\n";
-    return 1;
-  }
+  if (OptLevel < '0' || OptLevel > '3')
+    error("optimization level must be between 0 and 3");
 
   // Initialize the configured targets.
   InitializeAllTargets();
@@ -164,29 +254,27 @@ int main(int argc, char **argv) {
   // set up the TargetOptions for the machine
   TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
 
-  if (ListSymbolsOnly)
-    return listSymbols(argv[0], Options);
+  if (ListSymbolsOnly) {
+    listSymbols(Options);
+    return 0;
+  }
+
+  if (ThinLTO) {
+    createCombinedFunctionIndex();
+    return 0;
+  }
 
   unsigned BaseArg = 0;
 
-  LTOCodeGenerator CodeGen;
+  LLVMContext Context;
+  Context.setDiagnosticHandler(diagnosticHandlerWithContenxt, nullptr, true);
+
+  LTOCodeGenerator CodeGen(Context);
 
   if (UseDiagnosticHandler)
     CodeGen.setDiagnosticHandler(handleDiagnostics, nullptr);
 
-  switch (RelocModel) {
-  case Reloc::Static:
-    CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_STATIC);
-    break;
-  case Reloc::PIC_:
-    CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC);
-    break;
-  case Reloc::DynamicNoPIC:
-    CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC);
-    break;
-  default:
-    CodeGen.setCodePICModel(LTO_CODEGEN_PIC_MODEL_DEFAULT);
-  }
+  CodeGen.setCodePICModel(RelocModel);
 
   CodeGen.setDebugInfo(LTO_DEBUG_MODEL_DWARF);
   CodeGen.setTargetOptions(Options);
@@ -198,38 +286,33 @@ int main(int argc, char **argv) {
   std::vector<std::string> KeptDSOSyms;
 
   for (unsigned i = BaseArg; i < InputFilenames.size(); ++i) {
-    std::string error;
-    std::unique_ptr<LTOModule> Module(
-        LTOModule::createFromFile(InputFilenames[i].c_str(), Options, error));
-    if (!error.empty()) {
-      errs() << argv[0] << ": error loading file '" << InputFilenames[i]
-             << "': " << error << "\n";
-      return 1;
-    }
+    CurrentActivity = "loading file '" + InputFilenames[i] + "'";
+    ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
+        LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options);
+    std::unique_ptr<LTOModule> &Module = *ModuleOrErr;
+    CurrentActivity = "";
 
-    LTOModule *LTOMod = Module.get();
+    unsigned NumSyms = Module->getSymbolCount();
+    for (unsigned I = 0; I < NumSyms; ++I) {
+      StringRef Name = Module->getSymbolName(I);
+      if (!DSOSymbolsSet.count(Name))
+        continue;
+      lto_symbol_attributes Attrs = Module->getSymbolAttributes(I);
+      unsigned Scope = Attrs & LTO_SYMBOL_SCOPE_MASK;
+      if (Scope != LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN)
+        KeptDSOSyms.push_back(Name);
+    }
 
     // We use the first input module as the destination module when
     // SetMergedModule is true.
     if (SetMergedModule && i == BaseArg) {
       // Transfer ownership to the code generator.
-      CodeGen.setModule(Module.release());
+      CodeGen.setModule(std::move(Module));
     } else if (!CodeGen.addModule(Module.get())) {
       // Print a message here so that we know addModule() did not abort.
       errs() << argv[0] << ": error adding file '" << InputFilenames[i] << "'\n";
       return 1;
     }
-
-    unsigned NumSyms = LTOMod->getSymbolCount();
-    for (unsigned I = 0; I < NumSyms; ++I) {
-      StringRef Name = LTOMod->getSymbolName(I);
-      if (!DSOSymbolsSet.count(Name))
-        continue;
-      lto_symbol_attributes Attrs = LTOMod->getSymbolAttributes(I);
-      unsigned Scope = Attrs & LTO_SYMBOL_SCOPE_MASK;
-      if (Scope != LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN)
-        KeptDSOSyms.push_back(Name);
-    }
   }
 
   // Add all the exported symbols to the table of symbols to preserve.
@@ -255,34 +338,68 @@ int main(int argc, char **argv) {
   if (!attrs.empty())
     CodeGen.setAttr(attrs.c_str());
 
+  if (FileType.getNumOccurrences())
+    CodeGen.setFileType(FileType);
+
   if (!OutputFilename.empty()) {
-    std::string ErrorInfo;
-    std::unique_ptr<MemoryBuffer> Code = CodeGen.compile(
-        DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo);
-    if (!Code) {
-      errs() << argv[0]
-             << ": error compiling the code: " << ErrorInfo << "\n";
+    if (!CodeGen.optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+                          DisableLTOVectorization)) {
+      // Diagnostic messages should have been printed by the handler.
+      errs() << argv[0] << ": error optimizing the code\n";
       return 1;
     }
 
-    std::error_code EC;
-    raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None);
-    if (EC) {
-      errs() << argv[0] << ": error opening the file '" << OutputFilename
-             << "': " << EC.message() << "\n";
+    if (SaveModuleFile) {
+      std::string ModuleFilename = OutputFilename;
+      ModuleFilename += ".merged.bc";
+      std::string ErrMsg;
+
+      if (!CodeGen.writeMergedModules(ModuleFilename.c_str())) {
+        errs() << argv[0] << ": writing merged module failed.\n";
+        return 1;
+      }
+    }
+
+    std::list<tool_output_file> OSs;
+    std::vector<raw_pwrite_stream *> OSPtrs;
+    for (unsigned I = 0; I != Parallelism; ++I) {
+      std::string PartFilename = OutputFilename;
+      if (Parallelism != 1)
+        PartFilename += "." + utostr(I);
+      std::error_code EC;
+      OSs.emplace_back(PartFilename, EC, sys::fs::F_None);
+      if (EC) {
+        errs() << argv[0] << ": error opening the file '" << PartFilename
+               << "': " << EC.message() << "\n";
+        return 1;
+      }
+      OSPtrs.push_back(&OSs.back().os());
+    }
+
+    if (!CodeGen.compileOptimized(OSPtrs)) {
+      // Diagnostic messages should have been printed by the handler.
+      errs() << argv[0] << ": error compiling the code\n";
       return 1;
     }
 
-    FileStream.write(Code->getBufferStart(), Code->getBufferSize());
+    for (tool_output_file &OS : OSs)
+      OS.keep();
   } else {
-    std::string ErrorInfo;
+    if (Parallelism != 1) {
+      errs() << argv[0] << ": -j must be specified together with -o\n";
+      return 1;
+    }
+
+    if (SaveModuleFile) {
+      errs() << argv[0] << ": -save-merged-module must be specified with -o\n";
+      return 1;
+    }
+
     const char *OutputName = nullptr;
-    if (!CodeGen.compile_to_file(&OutputName, DisableInline,
-                                 DisableGVNLoadPRE, DisableLTOVectorization,
-                                 ErrorInfo)) {
-      errs() << argv[0]
-             << ": error compiling the code: " << ErrorInfo
-             << "\n";
+    if (!CodeGen.compile_to_file(&OutputName, DisableVerify, DisableInline,
+                                 DisableGVNLoadPRE, DisableLTOVectorization)) {
+      // Diagnostic messages should have been printed by the handler.
+      errs() << argv[0] << ": error compiling the code\n";
       return 1;
     }
 
diff --git a/tools/llvm-mc-fuzzer/CMakeLists.txt b/tools/llvm-mc-fuzzer/CMakeLists.txt
new file mode 100644
index 000000000000..0d6432b23dfa
--- /dev/null
+++ b/tools/llvm-mc-fuzzer/CMakeLists.txt
@@ -0,0 +1,18 @@
+if( LLVM_USE_SANITIZE_COVERAGE )
+  include_directories(BEFORE
+    ${CMAKE_CURRENT_SOURCE_DIR}/../../lib/Fuzzer)
+
+  set(LLVM_LINK_COMPONENTS
+      AllTargetsDescs
+      AllTargetsDisassemblers
+      AllTargetsInfos
+      MC
+      MCDisassembler
+      Support
+      )
+  add_llvm_tool(llvm-mc-fuzzer 
+                llvm-mc-fuzzer.cpp)
+  target_link_libraries(llvm-mc-fuzzer
+                        LLVMFuzzerNoMain
+                        )
+endif()
diff --git a/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp b/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp
new file mode 100644
index 000000000000..3f80e4582ee1
--- /dev/null
+++ b/tools/llvm-mc-fuzzer/llvm-mc-fuzzer.cpp
@@ -0,0 +1,138 @@
+//===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Disassembler.h"
+#include "llvm-c/Target.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "FuzzerInterface.h"
+
+using namespace llvm;
+
+const unsigned AssemblyTextBufSize = 80;
+
+enum ActionType {
+  AC_Assemble,
+  AC_Disassemble
+};
+
+static cl::opt<ActionType>
+Action(cl::desc("Action to perform:"),
+       cl::init(AC_Assemble),
+       cl::values(clEnumValN(AC_Assemble, "assemble",
+                             "Assemble a .s file (default)"),
+                  clEnumValN(AC_Disassemble, "disassemble",
+                             "Disassemble strings of hex bytes"),
+                  clEnumValEnd));
+
+static cl::opt<std::string>
+    TripleName("triple", cl::desc("Target triple to assemble for, "
+                                  "see -version for available targets"));
+
+static cl::opt<std::string>
+    MCPU("mcpu",
+         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+         cl::value_desc("cpu-name"), cl::init(""));
+
+// This is useful for variable-length instruction sets.
+static cl::opt<unsigned> InsnLimit(
+    "insn-limit",
+    cl::desc("Limit the number of instructions to process (0 for no limit)"),
+    cl::value_desc("count"), cl::init(0));
+
+static cl::list<std::string>
+    MAttrs("mattr", cl::CommaSeparated,
+           cl::desc("Target specific attributes (-mattr=help for details)"),
+           cl::value_desc("a1,+a2,-a3,..."));
+// The feature string derived from -mattr's values.
+std::string FeaturesStr;
+
+static cl::list<std::string>
+    FuzzerArgv("fuzzer-args", cl::Positional,
+               cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
+               cl::PositionalEatsArgs);
+
+int DisassembleOneInput(const uint8_t *Data, size_t Size) {
+  char AssemblyText[AssemblyTextBufSize];
+
+  std::vector<uint8_t> DataCopy(Data, Data + Size);
+
+  LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
+      TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
+      nullptr, nullptr);
+  assert(Ctx);
+  uint8_t *p = DataCopy.data();
+  unsigned Consumed;
+  unsigned InstructionsProcessed = 0;
+  do {
+    Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
+                                     AssemblyTextBufSize);
+    Size -= Consumed;
+    p += Consumed;
+
+    InstructionsProcessed ++;
+    if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
+      break;
+  } while (Consumed != 0);
+  LLVMDisasmDispose(Ctx);
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  // The command line is unusual compared to other fuzzers due to the need to
+  // specify the target. Options like -triple, -mcpu, and -mattr work like
+  // their counterparts in llvm-mc, while -fuzzer-args collects options for the
+  // fuzzer itself.
+  //
+  // Examples:
+  //
+  // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
+  // 4-bytes each and use the contents of ./corpus as the test corpus:
+  //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
+  //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
+  //
+  // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
+  // feature enabled using up to 64-byte inputs:
+  //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
+  //       -disassemble -fuzzer-args ./corpus
+  //
+  // If your aim is to find instructions that are not tested, then it is
+  // advisable to constrain the maximum input size to a single instruction
+  // using -max_len as in the first example. This results in a test corpus of
+  // individual instructions that test unique paths. Without this constraint,
+  // there will be considerable redundancy in the corpus.
+
+  LLVMInitializeAllTargetInfos();
+  LLVMInitializeAllTargetMCs();
+  LLVMInitializeAllDisassemblers();
+
+  cl::ParseCommandLineOptions(argc, argv);
+
+  // Package up features to be passed to target/subtarget
+  // We have to pass it via a global since the callback doesn't
+  // permit any user data.
+  if (MAttrs.size()) {
+    SubtargetFeatures Features;
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  if (Action == AC_Assemble)
+    errs() << "error: -assemble is not implemented\n";
+  else if (Action == AC_Disassemble)
+    return fuzzer::FuzzerDriver(argc, argv, DisassembleOneInput);
+
+  llvm_unreachable("Unknown action");
+  return 1;
+}
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 6ecdb2eaa6d5..96e3f7c21a51 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===//
+//===-- llvm-mc.cpp - Machine Code Hacking Driver ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -39,6 +39,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -234,7 +235,7 @@ static void setDwarfDebugFlags(int argc, char **argv) {
 }
 
 static std::string DwarfDebugProducer;
-static void setDwarfDebugProducer(void) {
+static void setDwarfDebugProducer() {
   if(!getenv("DEBUG_PRODUCER"))
     return;
   DwarfDebugProducer += getenv("DEBUG_PRODUCER");
@@ -398,7 +399,7 @@ int main(int argc, char **argv) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
       MemoryBuffer::getFileOrSTDIN(InputFilename);
   if (std::error_code EC = BufferPtr.getError()) {
-    errs() << ProgName << ": " << EC.message() << '\n';
+    errs() << InputFilename << ": " << EC.message() << '\n';
     return 1;
   }
   MemoryBuffer *Buffer = BufferPtr->get();
@@ -510,9 +511,10 @@ int main(int argc, char **argv) {
 
     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
-    Str.reset(TheTarget->createMCObjectStreamer(TheTriple, Ctx, *MAB, *OS, CE,
-                                                *STI, RelaxAll,
-                                                /*DWARFMustBeAtTheEnd*/ false));
+    Str.reset(TheTarget->createMCObjectStreamer(
+        TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
+        MCOptions.MCIncrementalLinkerCompatible,
+        /*DWARFMustBeAtTheEnd*/ false));
     if (NoExecStack)
       Str->InitSections(true);
   }
diff --git a/tools/llvm-nm/CMakeLists.txt b/tools/llvm-nm/CMakeLists.txt
index 20293bb88f05..22b7bd3e35e9 100644
--- a/tools/llvm-nm/CMakeLists.txt
+++ b/tools/llvm-nm/CMakeLists.txt
@@ -1,5 +1,7 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmParsers
+  AllTargetsDescs
+  AllTargetsInfos
   Core
   Object
   Support
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index e7ee3124ed73..b812233034ee 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -44,6 +45,7 @@
 #include <cstring>
 #include <system_error>
 #include <vector>
+
 using namespace llvm;
 using namespace object;
 
@@ -64,27 +66,30 @@ cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input files>"),
 cl::opt<bool> UndefinedOnly("undefined-only",
                             cl::desc("Show only undefined symbols"));
 cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
-                         cl::aliasopt(UndefinedOnly));
+                         cl::aliasopt(UndefinedOnly), cl::Grouping);
 
 cl::opt<bool> DynamicSyms("dynamic",
                           cl::desc("Display the dynamic symbols instead "
                                    "of normal symbols."));
 cl::alias DynamicSyms2("D", cl::desc("Alias for --dynamic"),
-                       cl::aliasopt(DynamicSyms));
+                       cl::aliasopt(DynamicSyms), cl::Grouping);
 
 cl::opt<bool> DefinedOnly("defined-only",
                           cl::desc("Show only defined symbols"));
 cl::alias DefinedOnly2("U", cl::desc("Alias for --defined-only"),
-                       cl::aliasopt(DefinedOnly));
+                       cl::aliasopt(DefinedOnly), cl::Grouping);
 
 cl::opt<bool> ExternalOnly("extern-only",
                            cl::desc("Show only external symbols"));
 cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
-                        cl::aliasopt(ExternalOnly));
+                        cl::aliasopt(ExternalOnly), cl::Grouping);
 
-cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
-cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
-cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"));
+cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"),
+                        cl::Grouping);
+cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"),
+                          cl::Grouping);
+cl::opt<bool> DarwinFormat("m", cl::desc("Alias for --format=darwin"),
+                           cl::Grouping);
 
 static cl::list<std::string>
     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
@@ -96,32 +101,33 @@ cl::opt<bool> PrintFileName(
     cl::desc("Precede each symbol with the object file it came from"));
 
 cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
-                         cl::aliasopt(PrintFileName));
+                         cl::aliasopt(PrintFileName), cl::Grouping);
 cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
-                         cl::aliasopt(PrintFileName));
+                         cl::aliasopt(PrintFileName), cl::Grouping);
 
 cl::opt<bool> DebugSyms("debug-syms",
                         cl::desc("Show all symbols, even debugger only"));
 cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
-                     cl::aliasopt(DebugSyms));
+                     cl::aliasopt(DebugSyms), cl::Grouping);
 
 cl::opt<bool> NumericSort("numeric-sort", cl::desc("Sort symbols by address"));
 cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
-                       cl::aliasopt(NumericSort));
+                       cl::aliasopt(NumericSort), cl::Grouping);
 cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
-                       cl::aliasopt(NumericSort));
+                       cl::aliasopt(NumericSort), cl::Grouping);
 
 cl::opt<bool> NoSort("no-sort", cl::desc("Show symbols in order encountered"));
-cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort));
+cl::alias NoSortp("p", cl::desc("Alias for --no-sort"), cl::aliasopt(NoSort),
+                  cl::Grouping);
 
 cl::opt<bool> ReverseSort("reverse-sort", cl::desc("Sort in reverse order"));
 cl::alias ReverseSortr("r", cl::desc("Alias for --reverse-sort"),
-                       cl::aliasopt(ReverseSort));
+                       cl::aliasopt(ReverseSort), cl::Grouping);
 
 cl::opt<bool> PrintSize("print-size",
                         cl::desc("Show symbol size instead of address"));
 cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
-                     cl::aliasopt(PrintSize));
+                     cl::aliasopt(PrintSize), cl::Grouping);
 
 cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
 
@@ -130,12 +136,12 @@ cl::opt<bool> WithoutAliases("without-aliases", cl::Hidden,
 
 cl::opt<bool> ArchiveMap("print-armap", cl::desc("Print the archive map"));
 cl::alias ArchiveMaps("M", cl::desc("Alias for --print-armap"),
-                      cl::aliasopt(ArchiveMap));
+                      cl::aliasopt(ArchiveMap), cl::Grouping);
 
 cl::opt<bool> JustSymbolName("just-symbol-name",
                              cl::desc("Print just the symbol's name"));
 cl::alias JustSymbolNames("j", cl::desc("Alias for --just-symbol-name"),
-                          cl::aliasopt(JustSymbolName));
+                          cl::aliasopt(JustSymbolName), cl::Grouping);
 
 // FIXME: This option takes exactly two strings and should be allowed anywhere
 // on the command line.  Such that "llvm-nm -s __TEXT __text foo.o" would work.
@@ -147,7 +153,7 @@ cl::list<std::string> SegSect("s", cl::Positional, cl::ZeroOrMore,
                                        "and section name, Mach-O only"));
 
 cl::opt<bool> FormatMachOasHex("x", cl::desc("Print symbol entry in hex, "
-                                             "Mach-O only"));
+                                             "Mach-O only"), cl::Grouping);
 
 cl::opt<bool> NoLLVMBitcode("no-llvm-bc",
                             cl::desc("Disable LLVM bitcode reader"));
@@ -159,7 +165,7 @@ bool MultipleFiles = false;
 bool HadError = false;
 
 std::string ToolName;
-}
+} // anonymous namespace
 
 static void error(Twine Message, Twine Path = Twine()) {
   HadError = true;
@@ -182,7 +188,7 @@ struct NMSymbol {
   StringRef Name;
   BasicSymbolRef Sym;
 };
-}
+} // anonymous namespace
 
 static bool compareSymbolAddress(const NMSymbol &A, const NMSymbol &B) {
   bool ADefined = !(A.Sym.getFlags() & SymbolRef::SF_Undefined);
@@ -202,8 +208,14 @@ static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
 }
 
 static char isSymbolList64Bit(SymbolicFile &Obj) {
-  if (isa<IRObjectFile>(Obj))
-    return false;
+  if (isa<IRObjectFile>(Obj)) {
+    IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
+    Module &M = IRobj->getModule();
+    if (M.getTargetTriple().empty())
+      return false;
+    Triple T(M.getTargetTriple());
+    return T.isArch64Bit();
+  }
   if (isa<COFFObjectFile>(Obj))
     return false;
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -215,54 +227,80 @@ static StringRef CurrentFilename;
 typedef std::vector<NMSymbol> SymbolListT;
 static SymbolListT SymbolList;
 
+static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
+
 // darwinPrintSymbol() is used to print a symbol from a Mach-O file when the
 // the OutputFormat is darwin or we are printing Mach-O symbols in hex.  For
 // the darwin format it produces the same output as darwin's nm(1) -m output
 // and when printing Mach-O symbols in hex it produces the same output as
 // darwin's nm(1) -x format.
-static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
-                              char *SymbolAddrStr, const char *printBlanks) {
+static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
+                              char *SymbolAddrStr, const char *printBlanks,
+                              const char *printDashes, const char *printFormat) {
   MachO::mach_header H;
   MachO::mach_header_64 H_64;
-  uint32_t Filetype, Flags;
-  MachO::nlist_64 STE_64;
-  MachO::nlist STE;
-  uint8_t NType;
-  uint8_t NSect;
-  uint16_t NDesc;
-  uint32_t NStrx;
-  uint64_t NValue;
-  DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
-  if (MachO->is64Bit()) {
-    H_64 = MachO->MachOObjectFile::getHeader64();
-    Filetype = H_64.filetype;
-    Flags = H_64.flags;
-    STE_64 = MachO->getSymbol64TableEntry(SymDRI);
-    NType = STE_64.n_type;
-    NSect = STE_64.n_sect;
-    NDesc = STE_64.n_desc;
-    NStrx = STE_64.n_strx;
-    NValue = STE_64.n_value;
+  uint32_t Filetype = MachO::MH_OBJECT;
+  uint32_t Flags = 0;
+  uint8_t NType = 0;
+  uint8_t NSect = 0;
+  uint16_t NDesc = 0;
+  uint32_t NStrx = 0;
+  uint64_t NValue = 0;
+  MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
+  if (Obj.isIR()) {
+    uint32_t SymFlags = I->Sym.getFlags();
+    if (SymFlags & SymbolRef::SF_Global)
+      NType |= MachO::N_EXT;
+    if (SymFlags & SymbolRef::SF_Hidden)
+      NType |= MachO::N_PEXT;
+    if (SymFlags & SymbolRef::SF_Undefined)
+      NType |= MachO::N_EXT | MachO::N_UNDF;
+    else {
+      // Here we have a symbol definition.  So to fake out a section name we
+      // use 1, 2 and 3 for section numbers.  See below where they are used to
+      // print out fake section names.
+      NType |= MachO::N_SECT;
+      if(SymFlags & SymbolRef::SF_Const)
+        NSect = 3;
+      else {
+        IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
+        char c = getSymbolNMTypeChar(*IRobj, I->Sym);
+        if (c == 't')
+          NSect = 1;
+        else
+          NSect = 2;
+      }
+    }
+    if (SymFlags & SymbolRef::SF_Weak)
+      NDesc |= MachO::N_WEAK_DEF;
   } else {
-    H = MachO->MachOObjectFile::getHeader();
-    Filetype = H.filetype;
-    Flags = H.flags;
-    STE = MachO->getSymbolTableEntry(SymDRI);
-    NType = STE.n_type;
-    NSect = STE.n_sect;
-    NDesc = STE.n_desc;
-    NStrx = STE.n_strx;
-    NValue = STE.n_value;
+    DataRefImpl SymDRI = I->Sym.getRawDataRefImpl();
+    if (MachO->is64Bit()) {
+      H_64 = MachO->MachOObjectFile::getHeader64();
+      Filetype = H_64.filetype;
+      Flags = H_64.flags;
+      MachO::nlist_64 STE_64 = MachO->getSymbol64TableEntry(SymDRI);
+      NType = STE_64.n_type;
+      NSect = STE_64.n_sect;
+      NDesc = STE_64.n_desc;
+      NStrx = STE_64.n_strx;
+      NValue = STE_64.n_value;
+    } else {
+      H = MachO->MachOObjectFile::getHeader();
+      Filetype = H.filetype;
+      Flags = H.flags;
+      MachO::nlist STE = MachO->getSymbolTableEntry(SymDRI);
+      NType = STE.n_type;
+      NSect = STE.n_sect;
+      NDesc = STE.n_desc;
+      NStrx = STE.n_strx;
+      NValue = STE.n_value;
+    }
   }
 
   // If we are printing Mach-O symbols in hex do that and return.
   if (FormatMachOasHex) {
     char Str[18] = "";
-    const char *printFormat;
-    if (MachO->is64Bit())
-      printFormat = "%016" PRIx64;
-    else
-      printFormat = "%08" PRIx64;
     format(printFormat, NValue).print(Str, sizeof(Str));
     outs() << Str << ' ';
     format("%02x", NType).print(Str, sizeof(Str));
@@ -280,6 +318,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
   if (PrintAddress) {
     if ((NType & MachO::N_TYPE) == MachO::N_INDR)
       strcpy(SymbolAddrStr, printBlanks);
+    if (Obj.isIR() && (NType & MachO::N_TYPE) == MachO::N_TYPE)
+      strcpy(SymbolAddrStr, printDashes);
     outs() << SymbolAddrStr << ' ';
   }
 
@@ -314,8 +354,20 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
     outs() << "(indirect) ";
     break;
   case MachO::N_SECT: {
-    section_iterator Sec = MachO->section_end();
-    MachO->getSymbolSection(I->Sym.getRawDataRefImpl(), Sec);
+    if (Obj.isIR()) {
+      // For llvm bitcode files print out a fake section name using the values
+      // use 1, 2 and 3 for section numbers as set above.
+      if (NSect == 1)
+        outs() << "(LTO,CODE) ";
+      else if (NSect == 2)
+        outs() << "(LTO,DATA) ";
+      else if (NSect == 3)
+        outs() << "(LTO,RODATA) ";
+      else
+        outs() << "(?,?) ";
+      break;
+    }
+    section_iterator Sec = *MachO->getSymbolSection(I->Sym.getRawDataRefImpl());
     DataRefImpl Ref = Sec->getRawDataRefImpl();
     StringRef SectionName;
     MachO->getSectionName(Ref, SectionName);
@@ -374,7 +426,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
   if ((NType & MachO::N_TYPE) == MachO::N_INDR) {
     outs() << I->Name << " (for ";
     StringRef IndirectName;
-    if (MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
+    if (!MachO ||
+        MachO->getIndirectName(I->Sym.getRawDataRefImpl(), IndirectName))
       outs() << "?)";
     else
       outs() << IndirectName << ")";
@@ -392,7 +445,8 @@ static void darwinPrintSymbol(MachOObjectFile *MachO, SymbolListT::iterator I,
         outs() << " (dynamically looked up)";
       else {
         StringRef LibraryName;
-        if (MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
+        if (!MachO ||
+            MachO->getLibraryShortNameByIndex(LibraryOrdinal - 1, LibraryName))
           outs() << " (from bad library ordinal " << LibraryOrdinal << ")";
         else
           outs() << " (from " << LibraryName << ")";
@@ -440,13 +494,14 @@ static const struct DarwinStabName DarwinStabNames[] = {
     {MachO::N_ECOMM, "ECOMM"},
     {MachO::N_ECOML, "ECOML"},
     {MachO::N_LENG, "LENG"},
-    {0, 0}};
+    {0, nullptr}};
+
 static const char *getDarwinStabString(uint8_t NType) {
   for (unsigned i = 0; DarwinStabNames[i].Name; i++) {
     if (DarwinStabNames[i].NType == NType)
       return DarwinStabNames[i].Name;
   }
-  return 0;
+  return nullptr;
 }
 
 // darwinPrintStab() prints the n_sect, n_desc along with a symbolic name of
@@ -511,12 +566,14 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     }
   }
 
-  const char *printBlanks, *printFormat;
+  const char *printBlanks, *printDashes, *printFormat;
   if (isSymbolList64Bit(Obj)) {
     printBlanks = "                ";
+    printDashes = "----------------";
     printFormat = "%016" PRIx64;
   } else {
     printBlanks = "        ";
+    printDashes = "--------";
     printFormat = "%08" PRIx64;
   }
 
@@ -528,6 +585,9 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
       continue;
     if (Undefined && DefinedOnly)
       continue;
+    bool Global = SymFlags & SymbolRef::SF_Global;
+    if (!Global && ExternalOnly)
+      continue;
     if (SizeSort && !PrintAddress)
       continue;
     if (PrintFileName) {
@@ -537,7 +597,8 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
         outs() << ArchiveName << ":";
       outs() << CurrentFilename << ": ";
     }
-    if (JustSymbolName || (UndefinedOnly && isa<MachOObjectFile>(Obj))) {
+    if ((JustSymbolName || (UndefinedOnly && isa<MachOObjectFile>(Obj) &&
+                            OutputFormat != darwin)) && OutputFormat != posix) {
       outs() << I->Name << "\n";
       continue;
     }
@@ -550,9 +611,13 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     if (OutputFormat == sysv)
       strcpy(SymbolSizeStr, printBlanks);
 
-    if (I->TypeChar != 'U')
-      format(printFormat, I->Address)
+    if (I->TypeChar != 'U') {
+      if (Obj.isIR())
+        strcpy(SymbolAddrStr, printDashes);
+      else
+        format(printFormat, I->Address)
           .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+    }
     format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
 
     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
@@ -561,11 +626,15 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     // printing Mach-O symbols in hex and not a Mach-O object fall back to
     // OutputFormat bsd (see below).
     MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj);
-    if ((OutputFormat == darwin || FormatMachOasHex) && MachO) {
-      darwinPrintSymbol(MachO, I, SymbolAddrStr, printBlanks);
+    if ((OutputFormat == darwin || FormatMachOasHex) && (MachO || Obj.isIR())) {
+      darwinPrintSymbol(Obj, I, SymbolAddrStr, printBlanks, printDashes,
+                        printFormat);
     } else if (OutputFormat == posix) {
-      outs() << I->Name << " " << I->TypeChar << " " << SymbolAddrStr
-             << SymbolSizeStr << "\n";
+      outs() << I->Name << " " << I->TypeChar << " ";
+      if (MachO)
+        outs() << I->Address << " " << "0" /* SymbolSizeStr */ << "\n";
+      else
+        outs() << SymbolAddrStr << SymbolSizeStr << "\n";
     } else if (OutputFormat == bsd || (OutputFormat == darwin && !MachO)) {
       if (PrintAddress)
         outs() << SymbolAddrStr << ' ';
@@ -594,10 +663,11 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
   // OK, this is ELF
   elf_symbol_iterator SymI(I);
 
-  elf_section_iterator SecI = Obj.section_end();
-  if (error(SymI->getSection(SecI)))
+  ErrorOr<elf_section_iterator> SecIOrErr = SymI->getSection();
+  if (error(SecIOrErr.getError()))
     return '?';
 
+  elf_section_iterator SecI = *SecIOrErr;
   if (SecI != Obj.section_end()) {
     switch (SecI->getType()) {
     case ELF::SHT_PROGBITS:
@@ -651,9 +721,10 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) {
 
   uint32_t Characteristics = 0;
   if (!COFF::isReservedSectionNumber(Symb.getSectionNumber())) {
-    section_iterator SecI = Obj.section_end();
-    if (error(SymI->getSection(SecI)))
+    ErrorOr<section_iterator> SecIOrErr = SymI->getSection();
+    if (error(SecIOrErr.getError()))
       return '?';
+    section_iterator SecI = *SecIOrErr;
     const coff_section *Section = Obj.getCOFFSection(*SecI);
     Characteristics = Section->Characteristics;
   }
@@ -701,8 +772,7 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
   case MachO::N_INDR:
     return 'i';
   case MachO::N_SECT: {
-    section_iterator Sec = Obj.section_end();
-    Obj.getSymbolSection(Symb, Sec);
+    section_iterator Sec = *Obj.getSymbolSection(Symb);
     DataRefImpl Ref = Sec->getRawDataRefImpl();
     StringRef SectionName;
     Obj.getSectionName(Ref, SectionName);
@@ -762,8 +832,14 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
   char Ret = '?';
   if (Symflags & object::SymbolRef::SF_Absolute)
     Ret = 'a';
-  else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
+  else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj)) {
     Ret = getSymbolNMTypeChar(*IR, I);
+    Triple Host(sys::getDefaultTargetTriple());
+    if (Ret == 'd' && Host.isOSDarwin()) {
+      if(Symflags & SymbolRef::SF_Const)
+        Ret = 's';
+    }
+  }
   else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
     Ret = getSymbolNMTypeChar(*COFF, I);
   else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -943,10 +1019,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       if (I != E) {
         outs() << "Archive map\n";
         for (; I != E; ++I) {
-          ErrorOr<Archive::child_iterator> C = I->getMember();
+          ErrorOr<Archive::Child> C = I->getMember();
           if (error(C.getError()))
             return;
-          ErrorOr<StringRef> FileNameOrErr = C.get()->getName();
+          ErrorOr<StringRef> FileNameOrErr = C->getName();
           if (error(FileNameOrErr.getError()))
             return;
           StringRef SymName = I->getName();
@@ -958,7 +1034,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
 
     for (Archive::child_iterator I = A->child_begin(), E = A->child_end();
          I != E; ++I) {
-      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = I->getAsBinary(&Context);
+      if (error(I->getError()))
+        return;
+      auto &C = I->get();
+      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
       if (ChildOrErr.getError())
         continue;
       if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
@@ -1013,8 +1092,11 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
               for (Archive::child_iterator AI = A->child_begin(),
                                            AE = A->child_end();
                    AI != AE; ++AI) {
+                if (error(AI->getError()))
+                  return;
+                auto &C = AI->get();
                 ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
-                    AI->getAsBinary(&Context);
+                    C.getAsBinary(&Context);
                 if (ChildOrErr.getError())
                   continue;
                 if (SymbolicFile *O =
@@ -1067,8 +1149,11 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
             for (Archive::child_iterator AI = A->child_begin(),
                                          AE = A->child_end();
                  AI != AE; ++AI) {
+              if (error(AI->getError()))
+                return;
+              auto &C = AI->get();
               ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
-                  AI->getAsBinary(&Context);
+                  C.getAsBinary(&Context);
               if (ChildOrErr.getError())
                 continue;
               if (SymbolicFile *O =
@@ -1116,8 +1201,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         std::unique_ptr<Archive> &A = *AOrErr;
         for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end();
              AI != AE; ++AI) {
-          ErrorOr<std::unique_ptr<Binary>> ChildOrErr =
-              AI->getAsBinary(&Context);
+          if (error(AI->getError()))
+            return;
+          auto &C = AI->get();
+          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
           if (ChildOrErr.getError())
             continue;
           if (SymbolicFile *O = dyn_cast<SymbolicFile>(&*ChildOrErr.get())) {
@@ -1150,7 +1237,6 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     return;
   }
   error("unrecognizable file type", Filename);
-  return;
 }
 
 int main(int argc, char **argv) {
diff --git a/tools/llvm-objdump/CMakeLists.txt b/tools/llvm-objdump/CMakeLists.txt
index 1f2721ab5452..d0dd4ac0182c 100644
--- a/tools/llvm-objdump/CMakeLists.txt
+++ b/tools/llvm-objdump/CMakeLists.txt
@@ -1,5 +1,8 @@
 set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
+  AllTargetsAsmPrinters
+  AllTargetsDescs
+  AllTargetsDisassemblers
+  AllTargetsInfos
   CodeGen
   DebugInfoDWARF
   MC
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index 8b94a50ea28b..f286351614ac 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -151,7 +151,7 @@ static void printAllUnwindCodes(ArrayRef<UnwindCode> UCs) {
              << " remaining in buffer";
       return ;
     }
-    printUnwindCode(ArrayRef<UnwindCode>(I, E));
+    printUnwindCode(makeArrayRef(I, E));
     I += UsedSlots;
   }
 }
@@ -165,10 +165,10 @@ resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
   if (std::error_code EC = ResolvedAddrOrErr.getError())
     return EC;
   ResolvedAddr = *ResolvedAddrOrErr;
-  section_iterator iter(Obj->section_begin());
-  if (std::error_code EC = Sym.getSection(iter))
+  ErrorOr<section_iterator> Iter = Sym.getSection();
+  if (std::error_code EC = Iter.getError())
     return EC;
-  ResolvedSection = Obj->getCOFFSection(*iter);
+  ResolvedSection = Obj->getCOFFSection(**Iter);
   return std::error_code();
 }
 
@@ -241,12 +241,10 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) {
     return;
 
   const pe32_header *PE32Header;
-  if (error(Obj->getPE32Header(PE32Header)))
-    return;
+  error(Obj->getPE32Header(PE32Header));
   uint32_t ImageBase = PE32Header->ImageBase;
   uintptr_t IntPtr = 0;
-  if (error(Obj->getVaPtr(TableVA, IntPtr)))
-    return;
+  error(Obj->getVaPtr(TableVA, IntPtr));
   const support::ulittle32_t *P = (const support::ulittle32_t *)IntPtr;
   outs() << "SEH Table:";
   for (int I = 0; I < Count; ++I)
@@ -257,8 +255,7 @@ printSEHTable(const COFFObjectFile *Obj, uint32_t TableVA, int Count) {
 static void printLoadConfiguration(const COFFObjectFile *Obj) {
   // Skip if it's not executable.
   const pe32_header *PE32Header;
-  if (error(Obj->getPE32Header(PE32Header)))
-    return;
+  error(Obj->getPE32Header(PE32Header));
   if (!PE32Header)
     return;
 
@@ -267,13 +264,11 @@ static void printLoadConfiguration(const COFFObjectFile *Obj) {
     return;
 
   const data_directory *DataDir;
-  if (error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir)))
-    return;
+  error(Obj->getDataDirectory(COFF::LOAD_CONFIG_TABLE, DataDir));
   uintptr_t IntPtr = 0;
   if (DataDir->RelativeVirtualAddress == 0)
     return;
-  if (error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr)))
-    return;
+  error(Obj->getRvaPtr(DataDir->RelativeVirtualAddress, IntPtr));
 
   auto *LoadConf = reinterpret_cast<const coff_load_configuration32 *>(IntPtr);
   outs() << "Load configuration:"
@@ -381,8 +376,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
                             const RuntimeFunction *&RFStart, int &NumRFs) {
   for (const SectionRef &Section : Obj->sections()) {
     StringRef Name;
-    if (error(Section.getName(Name)))
-      continue;
+    error(Section.getName(Name));
     if (Name != ".pdata")
       continue;
 
@@ -394,8 +388,7 @@ static bool getPDataSection(const COFFObjectFile *Obj,
     std::sort(Rels.begin(), Rels.end(), RelocAddressLess);
 
     ArrayRef<uint8_t> Contents;
-    if (error(Obj->getSectionContents(Pdata, Contents)))
-      continue;
+    error(Obj->getSectionContents(Pdata, Contents));
     if (Contents.empty())
       continue;
 
@@ -440,7 +433,7 @@ static void printWin64EHUnwindInfo(const Win64EH::UnwindInfo *UI) {
   if (UI->NumCodes)
     outs() << "    Unwind Codes:\n";
 
-  printAllUnwindCodes(ArrayRef<UnwindCode>(&UI->UnwindCodes[0], UI->NumCodes));
+  printAllUnwindCodes(makeArrayRef(&UI->UnwindCodes[0], UI->NumCodes));
 
   outs() << "\n";
   outs().flush();
@@ -499,11 +492,10 @@ static void printRuntimeFunctionRels(const COFFObjectFile *Obj,
 
   ArrayRef<uint8_t> XContents;
   uint64_t UnwindInfoOffset = 0;
-  if (error(getSectionContents(
+  error(getSectionContents(
           Obj, Rels, SectionOffset +
                          /*offsetof(RuntimeFunction, UnwindInfoOffset)*/ 8,
-          XContents, UnwindInfoOffset)))
-    return;
+          XContents, UnwindInfoOffset));
   if (XContents.empty())
     return;
 
@@ -550,3 +542,52 @@ void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
   printImportTables(file);
   printExportTable(file);
 }
+
+void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
+  for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
+    ErrorOr<COFFSymbolRef> Symbol = coff->getSymbol(SI);
+    StringRef Name;
+    error(Symbol.getError());
+    error(coff->getSymbolName(*Symbol, Name));
+
+    outs() << "[" << format("%2d", SI) << "]"
+           << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
+           << "(fl 0x00)" // Flag bits, which COFF doesn't have.
+           << "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
+           << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
+           << "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
+           << "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
+           << Name << "\n";
+
+    for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
+      if (Symbol->isSectionDefinition()) {
+        const coff_aux_section_definition *asd;
+        error(coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd));
+
+        int32_t AuxNumber = asd->getNumber(Symbol->isBigObj());
+
+        outs() << "AUX "
+               << format("scnlen 0x%x nreloc %d nlnno %d checksum 0x%x "
+                         , unsigned(asd->Length)
+                         , unsigned(asd->NumberOfRelocations)
+                         , unsigned(asd->NumberOfLinenumbers)
+                         , unsigned(asd->CheckSum))
+               << format("assoc %d comdat %d\n"
+                         , unsigned(AuxNumber)
+                         , unsigned(asd->Selection));
+      } else if (Symbol->isFileRecord()) {
+        const char *FileName;
+        error(coff->getAuxSymbol<char>(SI + 1, FileName));
+
+        StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() *
+                                     coff->getSymbolTableEntrySize());
+        outs() << "AUX " << Name.rtrim(StringRef("\0", 1))  << '\n';
+
+        SI = SI + Symbol->getNumberOfAuxSymbols();
+        break;
+      } else {
+        outs() << "AUX Unknown\n";
+      }
+    }
+  }
+}
diff --git a/tools/llvm-objdump/ELFDump.cpp b/tools/llvm-objdump/ELFDump.cpp
index 2d0d7d7db0f3..7b44e3916c48 100644
--- a/tools/llvm-objdump/ELFDump.cpp
+++ b/tools/llvm-objdump/ELFDump.cpp
@@ -24,10 +24,8 @@ using namespace llvm::object;
 template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
   typedef ELFFile<ELFT> ELFO;
   outs() << "Program Header:\n";
-  for (typename ELFO::Elf_Phdr_Iter pi = o->program_header_begin(),
-                                    pe = o->program_header_end();
-       pi != pe; ++pi) {
-    switch (pi->p_type) {
+  for (const typename ELFO::Elf_Phdr &Phdr : o->program_headers()) {
+    switch (Phdr.p_type) {
     case ELF::PT_LOAD:
       outs() << "    LOAD ";
       break;
@@ -55,22 +53,16 @@ template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
 
     const char *Fmt = ELFT::Is64Bits ? "0x%016" PRIx64 " " : "0x%08" PRIx64 " ";
 
-    outs() << "off    "
-           << format(Fmt, (uint64_t)pi->p_offset)
-           << "vaddr "
-           << format(Fmt, (uint64_t)pi->p_vaddr)
-           << "paddr "
-           << format(Fmt, (uint64_t)pi->p_paddr)
-           << format("align 2**%u\n", countTrailingZeros<uint64_t>(pi->p_align))
-           << "         filesz "
-           << format(Fmt, (uint64_t)pi->p_filesz)
-           << "memsz "
-           << format(Fmt, (uint64_t)pi->p_memsz)
-           << "flags "
-           << ((pi->p_flags & ELF::PF_R) ? "r" : "-")
-           << ((pi->p_flags & ELF::PF_W) ? "w" : "-")
-           << ((pi->p_flags & ELF::PF_X) ? "x" : "-")
-           << "\n";
+    outs() << "off    " << format(Fmt, (uint64_t)Phdr.p_offset) << "vaddr "
+           << format(Fmt, (uint64_t)Phdr.p_vaddr) << "paddr "
+           << format(Fmt, (uint64_t)Phdr.p_paddr)
+           << format("align 2**%u\n",
+                     countTrailingZeros<uint64_t>(Phdr.p_align))
+           << "         filesz " << format(Fmt, (uint64_t)Phdr.p_filesz)
+           << "memsz " << format(Fmt, (uint64_t)Phdr.p_memsz) << "flags "
+           << ((Phdr.p_flags & ELF::PF_R) ? "r" : "-")
+           << ((Phdr.p_flags & ELF::PF_W) ? "w" : "-")
+           << ((Phdr.p_flags & ELF::PF_X) ? "x" : "-") << "\n";
   }
   outs() << "\n";
 }
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index 04c72f4856c8..a2f3bc8e8a7c 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm-objdump.h"
 #include "llvm-c/Disassembler.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
@@ -97,11 +98,6 @@ cl::opt<bool>
                        cl::desc("Print the linker optimization hints for "
                                 "Mach-O objects (requires -macho)"));
 
-cl::list<std::string>
-    llvm::DumpSections("section",
-                       cl::desc("Prints the specified segment,section for "
-                                "Mach-O objects (requires -macho)"));
-
 cl::opt<bool>
     llvm::InfoPlist("info-plist",
                     cl::desc("Print the info plist section as strings for "
@@ -138,6 +134,7 @@ static cl::opt<bool> NoSymbolicOperands(
 static cl::list<std::string>
     ArchFlags("arch", cl::desc("architecture(s) from a Mach-O file to dump"),
               cl::ZeroOrMore);
+
 bool ArchAll = false;
 
 static std::string ThumbTripleName;
@@ -210,19 +207,19 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
   case MachO::DICE_KIND_DATA:
     if (Length >= 4) {
       if (!NoShowRawInsn)
-        dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
+        dumpBytes(makeArrayRef(bytes, 4), outs());
       Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
       outs() << "\t.long " << Value;
       Size = 4;
     } else if (Length >= 2) {
       if (!NoShowRawInsn)
-        dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+        dumpBytes(makeArrayRef(bytes, 2), outs());
       Value = bytes[1] << 8 | bytes[0];
       outs() << "\t.short " << Value;
       Size = 2;
     } else {
       if (!NoShowRawInsn)
-        dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+        dumpBytes(makeArrayRef(bytes, 2), outs());
       Value = bytes[0];
       outs() << "\t.byte " << Value;
       Size = 1;
@@ -234,14 +231,14 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
     break;
   case MachO::DICE_KIND_JUMP_TABLE8:
     if (!NoShowRawInsn)
-      dumpBytes(ArrayRef<uint8_t>(bytes, 1), outs());
+      dumpBytes(makeArrayRef(bytes, 1), outs());
     Value = bytes[0];
     outs() << "\t.byte " << format("%3u", Value) << "\t@ KIND_JUMP_TABLE8\n";
     Size = 1;
     break;
   case MachO::DICE_KIND_JUMP_TABLE16:
     if (!NoShowRawInsn)
-      dumpBytes(ArrayRef<uint8_t>(bytes, 2), outs());
+      dumpBytes(makeArrayRef(bytes, 2), outs());
     Value = bytes[1] << 8 | bytes[0];
     outs() << "\t.short " << format("%5u", Value & 0xffff)
            << "\t@ KIND_JUMP_TABLE16\n";
@@ -250,7 +247,7 @@ static uint64_t DumpDataInCode(const uint8_t *bytes, uint64_t Length,
   case MachO::DICE_KIND_JUMP_TABLE32:
   case MachO::DICE_KIND_ABS_JUMP_TABLE32:
     if (!NoShowRawInsn)
-      dumpBytes(ArrayRef<uint8_t>(bytes, 4), outs());
+      dumpBytes(makeArrayRef(bytes, 4), outs());
     Value = bytes[3] << 24 | bytes[2] << 16 | bytes[1] << 8 | bytes[0];
     outs() << "\t.long " << Value;
     if (Kind == MachO::DICE_KIND_JUMP_TABLE32)
@@ -670,13 +667,9 @@ static void DumpLiteral8(MachOObjectFile *O, uint32_t l0, uint32_t l1,
                          double d) {
   outs() << format("0x%08" PRIx32, l0) << " " << format("0x%08" PRIx32, l1);
   uint32_t Hi, Lo;
-  if (O->isLittleEndian()) {
-    Hi = l1;
-    Lo = l0;
-  } else {
-    Hi = l0;
-    Lo = l1;
-  }
+  Hi = (O->isLittleEndian()) ? l1 : l0;
+  Lo = (O->isLittleEndian()) ? l0 : l1;
+
   // Hi is the high word, so this is equivalent to if(isfinite(d))
   if ((Hi & 0x7ff00000) != 0x7ff00000)
     outs() << format(" (%.16e)\n", d);
@@ -921,10 +914,7 @@ static void DumpInitTermPointerSection(MachOObjectFile *O, const char *sect,
                                        SymbolAddressMap *AddrMap,
                                        bool verbose) {
   uint32_t stride;
-  if (O->is64Bit())
-    stride = sizeof(uint64_t);
-  else
-    stride = sizeof(uint32_t);
+  stride = (O->is64Bit()) ? sizeof(uint64_t) : sizeof(uint32_t);
   for (uint32_t i = 0; i < sect_size; i += stride) {
     const char *SymbolName = nullptr;
     if (O->is64Bit()) {
@@ -1006,8 +996,8 @@ static void DumpSectionContents(StringRef Filename, MachOObjectFile *O,
   if (verbose)
     CreateSymbolAddressMap(O, &AddrMap);
 
-  for (unsigned i = 0; i < DumpSections.size(); ++i) {
-    StringRef DumpSection = DumpSections[i];
+  for (unsigned i = 0; i < FilterSections.size(); ++i) {
+    StringRef DumpSection = FilterSections[i];
     std::pair<StringRef, StringRef> DumpSegSectName;
     DumpSegSectName = DumpSection.split(',');
     StringRef DumpSegName, DumpSectName;
@@ -1171,7 +1161,7 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
   // UniversalHeaders or ArchiveHeaders.
   if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind ||
       LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints ||
-      DylibsUsed || DylibId || ObjcMetaData || (DumpSections.size() != 0)) {
+      DylibsUsed || DylibId || ObjcMetaData || (FilterSections.size() != 0)) {
     outs() << Filename;
     if (!ArchiveMemberName.empty())
       outs() << '(' << ArchiveMemberName << ')';
@@ -1194,7 +1184,7 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
     PrintSectionHeaders(MachOOF);
   if (SectionContents)
     PrintSectionContents(MachOOF);
-  if (DumpSections.size() != 0)
+  if (FilterSections.size() != 0)
     DumpSectionContents(Filename, MachOOF, !NonVerbose);
   if (InfoPlist)
     DumpInfoPlistSectionContents(Filename, MachOOF);
@@ -1395,7 +1385,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
   }
 }
 
-static void printArchiveChild(Archive::Child &C, bool verbose,
+static void printArchiveChild(const Archive::Child &C, bool verbose,
                               bool print_offset) {
   if (print_offset)
     outs() << C.getChildOffset() << "\t";
@@ -1404,42 +1394,15 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
     // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
     // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
     outs() << "-";
-    if (Mode & sys::fs::owner_read)
-      outs() << "r";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::owner_write)
-      outs() << "w";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::owner_exe)
-      outs() << "x";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::group_read)
-      outs() << "r";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::group_write)
-      outs() << "w";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::group_exe)
-      outs() << "x";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::others_read)
-      outs() << "r";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::others_write)
-      outs() << "w";
-    else
-      outs() << "-";
-    if (Mode & sys::fs::others_exe)
-      outs() << "x";
-    else
-      outs() << "-";
+    outs() << ((Mode & sys::fs::owner_read) ? "r" : "-");
+    outs() << ((Mode & sys::fs::owner_write) ? "w" : "-");
+    outs() << ((Mode & sys::fs::owner_exe) ? "x" : "-");
+    outs() << ((Mode & sys::fs::group_read) ? "r" : "-");
+    outs() << ((Mode & sys::fs::group_write) ? "w" : "-");
+    outs() << ((Mode & sys::fs::group_exe) ? "x" : "-");
+    outs() << ((Mode & sys::fs::others_read) ? "r" : "-");
+    outs() << ((Mode & sys::fs::others_write) ? "w" : "-");
+    outs() << ((Mode & sys::fs::others_exe) ? "x" : "-");
   } else {
     outs() << format("0%o ", Mode);
   }
@@ -1448,8 +1411,10 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
   outs() << format("%3d/", UID);
   unsigned GID = C.getGID();
   outs() << format("%-3d ", GID);
-  uint64_t Size = C.getRawSize();
-  outs() << format("%5" PRId64, Size) << " ";
+  ErrorOr<uint64_t> Size = C.getRawSize();
+  if (std::error_code EC = Size.getError())
+    report_fatal_error(EC.message());
+  outs() << format("%5" PRId64, Size.get()) << " ";
 
   StringRef RawLastModified = C.getRawLastModified();
   if (verbose) {
@@ -1483,14 +1448,11 @@ static void printArchiveChild(Archive::Child &C, bool verbose,
 }
 
 static void printArchiveHeaders(Archive *A, bool verbose, bool print_offset) {
-  if (A->hasSymbolTable()) {
-    Archive::child_iterator S = A->getSymbolTableChild();
-    Archive::Child C = *S;
-    printArchiveChild(C, verbose, print_offset);
-  }
-  for (Archive::child_iterator I = A->child_begin(), E = A->child_end(); I != E;
-       ++I) {
-    Archive::Child C = *I;
+  for (Archive::child_iterator I = A->child_begin(false), E = A->child_end();
+       I != E; ++I) {
+    if (std::error_code EC = I->getError())
+      report_fatal_error(EC.message());
+    const Archive::Child &C = **I;
     printArchiveChild(C, verbose, print_offset);
   }
 }
@@ -1527,7 +1489,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
       printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets);
     for (Archive::child_iterator I = A->child_begin(), E = A->child_end();
          I != E; ++I) {
-      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = I->getAsBinary();
+      if (std::error_code EC = I->getError())
+        report_error(Filename, EC);
+      auto &C = I->get();
+      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
       if (ChildOrErr.getError())
         continue;
       if (MachOObjectFile *O = dyn_cast<MachOObjectFile>(&*ChildOrErr.get())) {
@@ -1575,7 +1540,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
               for (Archive::child_iterator AI = A->child_begin(),
                                            AE = A->child_end();
                    AI != AE; ++AI) {
-                ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+                if (std::error_code EC = AI->getError())
+                  report_error(Filename, EC);
+                auto &C = AI->get();
+                ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
                 if (ChildOrErr.getError())
                   continue;
                 if (MachOObjectFile *O =
@@ -1617,7 +1585,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
             for (Archive::child_iterator AI = A->child_begin(),
                                          AE = A->child_end();
                  AI != AE; ++AI) {
-              ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+              if (std::error_code EC = AI->getError())
+                report_error(Filename, EC);
+              auto &C = AI->get();
+              ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
               if (ChildOrErr.getError())
                 continue;
               if (MachOObjectFile *O =
@@ -1653,7 +1624,10 @@ void llvm::ParseInputMachO(StringRef Filename) {
           printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
         for (Archive::child_iterator AI = A->child_begin(), AE = A->child_end();
              AI != AE; ++AI) {
-          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = AI->getAsBinary();
+          if (std::error_code EC = AI->getError())
+            report_error(Filename, EC);
+          auto &C = AI->get();
+          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
           if (ChildOrErr.getError())
             continue;
           if (MachOObjectFile *O =
@@ -1676,8 +1650,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
       errs() << "llvm-objdump: '" << Filename << "': "
              << "Object is not a Mach-O file type.\n";
   } else
-    errs() << "llvm-objdump: '" << Filename << "': "
-           << "Unrecognized file type.\n";
+    report_error(Filename, object_error::invalid_file_type);
 }
 
 typedef std::pair<uint64_t, const char *> BindInfoEntry;
@@ -1698,6 +1671,7 @@ struct DisassembleInfo {
   uint64_t adrp_addr;
   uint32_t adrp_inst;
   BindTable *bindtable;
+  uint32_t depth;
 };
 
 // SymbolizerGetOpInfo() is the operand information call back function.
@@ -1735,8 +1709,15 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
   if (Arch == Triple::x86) {
     if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
       return 0;
-    // First search the section's relocation entries (if any) for an entry
-    // for this section offset.
+    if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+      // TODO:
+      // Search the external relocation entries of a fully linked image
+      // (if any) for an entry that matches this segment offset.
+      // uint32_t seg_offset = (Pc + Offset);
+      return 0;
+    }
+    // In MH_OBJECT filetypes search the section's relocation entries (if any)
+    // for an entry for this section offset.
     uint32_t sect_addr = info->S.getAddress();
     uint32_t sect_offset = (Pc + Offset) - sect_addr;
     bool reloc_found = false;
@@ -1806,17 +1787,20 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       op_info->Value = offset;
       return 1;
     }
-    // TODO:
-    // Second search the external relocation entries of a fully linked image
-    // (if any) for an entry that matches this segment offset.
-    // uint32_t seg_offset = (Pc + Offset);
     return 0;
   }
   if (Arch == Triple::x86_64) {
     if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
       return 0;
-    // First search the section's relocation entries (if any) for an entry
-    // for this section offset.
+    if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+      // TODO:
+      // Search the external relocation entries of a fully linked image
+      // (if any) for an entry that matches this segment offset.
+      // uint64_t seg_offset = (Pc + Offset);
+      return 0;
+    }
+    // In MH_OBJECT filetypes search the section's relocation entries (if any)
+    // for an entry for this section offset.
     uint64_t sect_addr = info->S.getAddress();
     uint64_t sect_offset = (Pc + Offset) - sect_addr;
     bool reloc_found = false;
@@ -1874,17 +1858,20 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       op_info->AddSymbol.Name = name;
       return 1;
     }
-    // TODO:
-    // Second search the external relocation entries of a fully linked image
-    // (if any) for an entry that matches this segment offset.
-    // uint64_t seg_offset = (Pc + Offset);
     return 0;
   }
   if (Arch == Triple::arm) {
     if (Offset != 0 || (Size != 4 && Size != 2))
       return 0;
-    // First search the section's relocation entries (if any) for an entry
-    // for this section offset.
+    if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+      // TODO:
+      // Search the external relocation entries of a fully linked image
+      // (if any) for an entry that matches this segment offset.
+      // uint32_t seg_offset = (Pc + Offset);
+      return 0;
+    }
+    // In MH_OBJECT filetypes search the section's relocation entries (if any)
+    // for an entry for this section offset.
     uint32_t sect_addr = info->S.getAddress();
     uint32_t sect_offset = (Pc + Offset) - sect_addr;
     DataRefImpl Rel;
@@ -2016,8 +2003,15 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
   if (Arch == Triple::aarch64) {
     if (Offset != 0 || Size != 4)
       return 0;
-    // First search the section's relocation entries (if any) for an entry
-    // for this section offset.
+    if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
+      // TODO:
+      // Search the external relocation entries of a fully linked image
+      // (if any) for an entry that matches this segment offset.
+      // uint64_t seg_offset = (Pc + Offset);
+      return 0;
+    }
+    // In MH_OBJECT filetypes search the section's relocation entries (if any)
+    // for an entry for this section offset.
     uint64_t sect_addr = info->S.getAddress();
     uint64_t sect_offset = (Pc + Offset) - sect_addr;
     auto Reloc =
@@ -2370,6 +2364,8 @@ static const char *get_pointer_64(uint64_t Address, uint32_t &offset,
   for (unsigned SectIdx = 0; SectIdx != info->Sections->size(); SectIdx++) {
     uint64_t SectAddress = ((*(info->Sections))[SectIdx]).getAddress();
     uint64_t SectSize = ((*(info->Sections))[SectIdx]).getSize();
+    if (SectSize == 0)
+      continue;
     if (objc_only) {
       StringRef SectName;
       ((*(info->Sections))[SectIdx]).getName(SectName);
@@ -3267,6 +3263,8 @@ walk_pointer_list_32(const char *listname, const SectionRef S,
 }
 
 static void print_layout_map(const char *layout_map, uint32_t left) {
+  if (layout_map == nullptr)
+    return;
   outs() << "                layout map: ";
   do {
     outs() << format("0x%02" PRIx32, (*layout_map) & 0xff) << " ";
@@ -3330,8 +3328,8 @@ static void print_method_list64_t(uint64_t p, struct DisassembleInfo *info,
       return;
     memset(&m, '\0', sizeof(struct method64_t));
     if (left < sizeof(struct method64_t)) {
-      memcpy(&ml, r, left);
-      outs() << indent << "   (method_t entends past the end of the section)\n";
+      memcpy(&m, r, left);
+      outs() << indent << "   (method_t extends past the end of the section)\n";
     } else
       memcpy(&m, r, sizeof(struct method64_t));
     if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
@@ -4222,7 +4220,7 @@ static void print_objc_property_list32(uint32_t p,
   }
 }
 
-static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
+static bool print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
                                bool &is_meta_class) {
   struct class_ro64_t cro;
   const char *r;
@@ -4233,7 +4231,7 @@ static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
 
   r = get_pointer_64(p, offset, left, S, info);
   if (r == nullptr || left < sizeof(struct class_ro64_t))
-    return;
+    return false;
   memset(&cro, '\0', sizeof(struct class_ro64_t));
   if (left < sizeof(struct class_ro64_t)) {
     memcpy(&cro, r, left);
@@ -4357,10 +4355,11 @@ static void print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
   if (cro.baseProperties + n_value != 0)
     print_objc_property_list64(cro.baseProperties + n_value, info);
 
-  is_meta_class = (cro.flags & RO_META) ? true : false;
+  is_meta_class = (cro.flags & RO_META) != 0;
+  return true;
 }
 
-static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
+static bool print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
                                bool &is_meta_class) {
   struct class_ro32_t cro;
   const char *r;
@@ -4370,7 +4369,7 @@ static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
 
   r = get_pointer_32(p, offset, left, S, info);
   if (r == nullptr)
-    return;
+    return false;
   memset(&cro, '\0', sizeof(struct class_ro32_t));
   if (left < sizeof(struct class_ro32_t)) {
     memcpy(&cro, r, left);
@@ -4420,7 +4419,8 @@ static void print_class_ro32_t(uint32_t p, struct DisassembleInfo *info,
          << format("0x%" PRIx32, cro.baseProperties) << "\n";
   if (cro.baseProperties != 0)
     print_objc_property_list32(cro.baseProperties, info);
-  is_meta_class = (cro.flags & RO_META) ? true : false;
+  is_meta_class = (cro.flags & RO_META) != 0;
+  return true;
 }
 
 static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
@@ -4490,11 +4490,16 @@ static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
     outs() << " Swift class";
   outs() << "\n";
   bool is_meta_class;
-  print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class);
+  if (!print_class_ro64_t((c.data + n_value) & ~0x7, info, is_meta_class))
+    return;
 
-  if (is_meta_class == false) {
-    outs() << "Meta Class\n";
-    print_class64_t(c.isa + isa_n_value, info);
+  if (!is_meta_class &&
+      c.isa + isa_n_value != p &&
+      c.isa + isa_n_value != 0 &&
+      info->depth < 100) {
+      info->depth++;
+      outs() << "Meta Class\n";
+      print_class64_t(c.isa + isa_n_value, info);
   }
 }
 
@@ -4555,9 +4560,10 @@ static void print_class32_t(uint32_t p, struct DisassembleInfo *info) {
     outs() << " Swift class";
   outs() << "\n";
   bool is_meta_class;
-  print_class_ro32_t(c.data & ~0x3, info, is_meta_class);
+  if (!print_class_ro32_t(c.data & ~0x3, info, is_meta_class))
+    return;
 
-  if (is_meta_class == false) {
+  if (!is_meta_class) {
     outs() << "Meta Class\n";
     print_class32_t(c.isa, info);
   }
@@ -4865,7 +4871,7 @@ static void print_category32_t(uint32_t p, struct DisassembleInfo *info) {
   outs() << "              name " << format("0x%" PRIx32, c.name);
   name = get_symbol_32(offset + offsetof(struct category32_t, name), S, info,
                        c.name);
-  if (name != NULL)
+  if (name)
     outs() << " " << name;
   outs() << "\n";
 
@@ -5006,6 +5012,9 @@ static void print_image_info64(SectionRef S, struct DisassembleInfo *info) {
   struct objc_image_info64 o;
   const char *r;
 
+  if (S == SectionRef())
+    return;
+
   StringRef SectName;
   S.getName(SectName);
   DataRefImpl Ref = S.getRawDataRefImpl();
@@ -5142,75 +5151,48 @@ static void printObjc2_64bit_MetaData(MachOObjectFile *O, bool verbose) {
   info.adrp_addr = 0;
   info.adrp_inst = 0;
 
-  const SectionRef CL = get_section(O, "__OBJC2", "__class_list");
-  if (CL != SectionRef()) {
-    info.S = CL;
-    walk_pointer_list_64("class", CL, O, &info, print_class64_t);
-  } else {
-    const SectionRef CL = get_section(O, "__DATA", "__objc_classlist");
-    info.S = CL;
-    walk_pointer_list_64("class", CL, O, &info, print_class64_t);
-  }
+  info.depth = 0;
+  SectionRef CL = get_section(O, "__OBJC2", "__class_list");
+  if (CL == SectionRef())
+    CL = get_section(O, "__DATA", "__objc_classlist");
+  info.S = CL;
+  walk_pointer_list_64("class", CL, O, &info, print_class64_t);
 
-  const SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
-  if (CR != SectionRef()) {
-    info.S = CR;
-    walk_pointer_list_64("class refs", CR, O, &info, nullptr);
-  } else {
-    const SectionRef CR = get_section(O, "__DATA", "__objc_classrefs");
-    info.S = CR;
-    walk_pointer_list_64("class refs", CR, O, &info, nullptr);
-  }
+  SectionRef CR = get_section(O, "__OBJC2", "__class_refs");
+  if (CR == SectionRef())
+    CR = get_section(O, "__DATA", "__objc_classrefs");
+  info.S = CR;
+  walk_pointer_list_64("class refs", CR, O, &info, nullptr);
 
-  const SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
-  if (SR != SectionRef()) {
-    info.S = SR;
-    walk_pointer_list_64("super refs", SR, O, &info, nullptr);
-  } else {
-    const SectionRef SR = get_section(O, "__DATA", "__objc_superrefs");
-    info.S = SR;
-    walk_pointer_list_64("super refs", SR, O, &info, nullptr);
-  }
+  SectionRef SR = get_section(O, "__OBJC2", "__super_refs");
+  if (SR == SectionRef())
+    SR = get_section(O, "__DATA", "__objc_superrefs");
+  info.S = SR;
+  walk_pointer_list_64("super refs", SR, O, &info, nullptr);
 
-  const SectionRef CA = get_section(O, "__OBJC2", "__category_list");
-  if (CA != SectionRef()) {
-    info.S = CA;
-    walk_pointer_list_64("category", CA, O, &info, print_category64_t);
-  } else {
-    const SectionRef CA = get_section(O, "__DATA", "__objc_catlist");
-    info.S = CA;
-    walk_pointer_list_64("category", CA, O, &info, print_category64_t);
-  }
+  SectionRef CA = get_section(O, "__OBJC2", "__category_list");
+  if (CA == SectionRef())
+    CA = get_section(O, "__DATA", "__objc_catlist");
+  info.S = CA;
+  walk_pointer_list_64("category", CA, O, &info, print_category64_t);
 
-  const SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
-  if (PL != SectionRef()) {
-    info.S = PL;
-    walk_pointer_list_64("protocol", PL, O, &info, nullptr);
-  } else {
-    const SectionRef PL = get_section(O, "__DATA", "__objc_protolist");
-    info.S = PL;
-    walk_pointer_list_64("protocol", PL, O, &info, nullptr);
-  }
+  SectionRef PL = get_section(O, "__OBJC2", "__protocol_list");
+  if (PL == SectionRef())
+    PL = get_section(O, "__DATA", "__objc_protolist");
+  info.S = PL;
+  walk_pointer_list_64("protocol", PL, O, &info, nullptr);
 
-  const SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
-  if (MR != SectionRef()) {
-    info.S = MR;
-    print_message_refs64(MR, &info);
-  } else {
-    const SectionRef MR = get_section(O, "__DATA", "__objc_msgrefs");
-    info.S = MR;
-    print_message_refs64(MR, &info);
-  }
+  SectionRef MR = get_section(O, "__OBJC2", "__message_refs");
+  if (MR == SectionRef())
+    MR = get_section(O, "__DATA", "__objc_msgrefs");
+  info.S = MR;
+  print_message_refs64(MR, &info);
 
-  const SectionRef II = get_section(O, "__OBJC2", "__image_info");
-  if (II != SectionRef()) {
-    info.S = II;
-    print_image_info64(II, &info);
-  } else {
-    const SectionRef II = get_section(O, "__DATA", "__objc_imageinfo");
-    info.S = II;
-    print_image_info64(II, &info);
-  }
+  SectionRef II = get_section(O, "__OBJC2", "__image_info");
+  if (II == SectionRef())
+    II = get_section(O, "__DATA", "__objc_imageinfo");
+  info.S = II;
+  print_image_info64(II, &info);
 
   if (info.bindtable != nullptr)
     delete info.bindtable;
@@ -5559,7 +5541,7 @@ static void printObjcMetaData(MachOObjectFile *O, bool verbose) {
       // binary for the iOS simulator which is the second Objective-C
       // ABI.  In that case printObjc1_32bit_MetaData() will determine that
       // and return false.
-      if (printObjc1_32bit_MetaData(O, verbose) == false)
+      if (!printObjc1_32bit_MetaData(O, verbose))
         printObjc2_32bit_MetaData(O, verbose);
     }
   }
@@ -5588,36 +5570,38 @@ static const char *GuessLiteralPointer(uint64_t ReferenceValue,
                                        uint64_t *ReferenceType,
                                        struct DisassembleInfo *info) {
   // First see if there is an external relocation entry at the ReferencePC.
-  uint64_t sect_addr = info->S.getAddress();
-  uint64_t sect_offset = ReferencePC - sect_addr;
-  bool reloc_found = false;
-  DataRefImpl Rel;
-  MachO::any_relocation_info RE;
-  bool isExtern = false;
-  SymbolRef Symbol;
-  for (const RelocationRef &Reloc : info->S.relocations()) {
-    uint64_t RelocOffset = Reloc.getOffset();
-    if (RelocOffset == sect_offset) {
-      Rel = Reloc.getRawDataRefImpl();
-      RE = info->O->getRelocation(Rel);
-      if (info->O->isRelocationScattered(RE))
-        continue;
-      isExtern = info->O->getPlainRelocationExternal(RE);
-      if (isExtern) {
-        symbol_iterator RelocSym = Reloc.getSymbol();
-        Symbol = *RelocSym;
+  if (info->O->getHeader().filetype == MachO::MH_OBJECT) {
+    uint64_t sect_addr = info->S.getAddress();
+    uint64_t sect_offset = ReferencePC - sect_addr;
+    bool reloc_found = false;
+    DataRefImpl Rel;
+    MachO::any_relocation_info RE;
+    bool isExtern = false;
+    SymbolRef Symbol;
+    for (const RelocationRef &Reloc : info->S.relocations()) {
+      uint64_t RelocOffset = Reloc.getOffset();
+      if (RelocOffset == sect_offset) {
+        Rel = Reloc.getRawDataRefImpl();
+        RE = info->O->getRelocation(Rel);
+        if (info->O->isRelocationScattered(RE))
+          continue;
+        isExtern = info->O->getPlainRelocationExternal(RE);
+        if (isExtern) {
+          symbol_iterator RelocSym = Reloc.getSymbol();
+          Symbol = *RelocSym;
+        }
+        reloc_found = true;
+        break;
       }
-      reloc_found = true;
-      break;
     }
-  }
-  // If there is an external relocation entry for a symbol in a section
-  // then used that symbol's value for the value of the reference.
-  if (reloc_found && isExtern) {
-    if (info->O->getAnyRelocationPCRel(RE)) {
-      unsigned Type = info->O->getAnyRelocationType(RE);
-      if (Type == MachO::X86_64_RELOC_SIGNED) {
-        ReferenceValue = Symbol.getValue();
+    // If there is an external relocation entry for a symbol in a section
+    // then used that symbol's value for the value of the reference.
+    if (reloc_found && isExtern) {
+      if (info->O->getAnyRelocationPCRel(RE)) {
+        unsigned Type = info->O->getAnyRelocationType(RE);
+        if (Type == MachO::X86_64_RELOC_SIGNED) {
+          ReferenceValue = Symbol.getValue();
+        }
       }
     }
   }
@@ -5872,7 +5856,6 @@ static void emitComments(raw_svector_ostream &CommentStream,
                          formatted_raw_ostream &FormattedOS,
                          const MCAsmInfo &MAI) {
   // Flush the stream before taking its content.
-  CommentStream.flush();
   StringRef Comments = CommentsToEmit.str();
   // Get the default information for printing a comment.
   const char *CommentBegin = MAI.getCommentString();
@@ -5893,7 +5876,6 @@ static void emitComments(raw_svector_ostream &CommentStream,
 
   // Tell the comment stream that the vector changed underneath it.
   CommentsToEmit.clear();
-  CommentStream.resync();
 }
 
 static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
@@ -6065,7 +6047,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     diContext.reset(new DWARFContextInMemory(*DbgObj));
   }
 
-  if (DumpSections.size() == 0)
+  if (FilterSections.size() == 0)
     outs() << "(" << DisSegName << "," << DisSectName << ") section\n";
 
   for (unsigned SectIdx = 0; SectIdx != Sections.size(); SectIdx++) {
@@ -6087,19 +6069,6 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
 
     bool symbolTableWorked = false;
 
-    // Parse relocations.
-    std::vector<std::pair<uint64_t, SymbolRef>> Relocs;
-    for (const RelocationRef &Reloc : Sections[SectIdx].relocations()) {
-      uint64_t RelocOffset = Reloc.getOffset();
-      uint64_t SectionAddress = Sections[SectIdx].getAddress();
-      RelocOffset -= SectionAddress;
-
-      symbol_iterator RelocSym = Reloc.getSymbol();
-
-      Relocs.push_back(std::make_pair(RelocOffset, *RelocSym));
-    }
-    array_pod_sort(Relocs.begin(), Relocs.end());
-
     // Create a map of symbol addresses to symbol names for use by
     // the SymbolizerSymbolLookUp() routine.
     SymbolAddressMap AddrMap;
@@ -6157,7 +6126,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
       StringRef SymName = *SymNameOrErr;
 
       SymbolRef::Type ST = Symbols[SymIdx].getType();
-      if (ST != SymbolRef::ST_Function)
+      if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
         continue;
 
       // Make sure the symbol is defined in this section.
@@ -6251,10 +6220,9 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
                                            DebugOut, Annotations);
         if (gotInst) {
           if (!NoShowRawInsn) {
-            dumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, Size), outs());
+            dumpBytes(makeArrayRef(Bytes.data() + Index, Size), outs());
           }
           formatted_raw_ostream FormattedOS(outs());
-          Annotations.flush();
           StringRef AnnotationsStr = Annotations.str();
           if (isThumb)
             ThumbIP->printInst(&Inst, FormattedOS, AnnotationsStr, *ThumbSTI);
@@ -6316,7 +6284,7 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
           }
           if (!NoShowRawInsn) {
             outs() << "\t";
-            dumpBytes(ArrayRef<uint8_t>(Bytes.data() + Index, InstSize), outs());
+            dumpBytes(makeArrayRef(Bytes.data() + Index, InstSize), outs());
           }
           IP->printInst(&Inst, outs(), "", *STI);
           outs() << "\n";
@@ -6441,8 +6409,7 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
   // Go back one so that SymbolAddress <= Addr.
   --Sym;
 
-  section_iterator SymSection = Obj->section_end();
-  Sym->second.getSection(SymSection);
+  section_iterator SymSection = *Sym->second.getSection();
   if (RelocSection == *SymSection) {
     // There's a valid symbol in the same section before this reference.
     ErrorOr<StringRef> NameOrErr = Sym->second.getName();
@@ -6780,13 +6747,268 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
   }
 }
 
+static unsigned getSizeForEncoding(bool is64Bit,
+                                   unsigned symbolEncoding) {
+  unsigned format = symbolEncoding & 0x0f;
+  switch (format) {
+    default: llvm_unreachable("Unknown Encoding");
+    case dwarf::DW_EH_PE_absptr:
+    case dwarf::DW_EH_PE_signed:
+      return is64Bit ? 8 : 4;
+    case dwarf::DW_EH_PE_udata2:
+    case dwarf::DW_EH_PE_sdata2:
+      return 2;
+    case dwarf::DW_EH_PE_udata4:
+    case dwarf::DW_EH_PE_sdata4:
+      return 4;
+    case dwarf::DW_EH_PE_udata8:
+    case dwarf::DW_EH_PE_sdata8:
+      return 8;
+  }
+}
+
+static uint64_t readPointer(const char *&Pos, bool is64Bit, unsigned Encoding) {
+  switch (getSizeForEncoding(is64Bit, Encoding)) {
+    case 2:
+      return readNext<uint16_t>(Pos);
+      break;
+    case 4:
+      return readNext<uint32_t>(Pos);
+      break;
+    case 8:
+      return readNext<uint64_t>(Pos);
+      break;
+    default:
+      llvm_unreachable("Illegal data size");
+  }
+}
+
+static void printMachOEHFrameSection(const MachOObjectFile *Obj,
+                                     std::map<uint64_t, SymbolRef> &Symbols,
+                                     const SectionRef &EHFrame) {
+  if (!Obj->isLittleEndian()) {
+    outs() << "warning: cannot handle big endian __eh_frame section\n";
+    return;
+  }
+
+  bool is64Bit = Obj->is64Bit();
+
+  outs() << "Contents of __eh_frame section:\n";
+
+  StringRef Contents;
+  EHFrame.getContents(Contents);
+
+  /// A few fields of the CIE are used when decoding the FDE's.  This struct
+  /// will cache those fields we need so that we don't have to decode it
+  /// repeatedly for each FDE that references it.
+  struct DecodedCIE {
+    Optional<uint32_t> FDEPointerEncoding;
+    Optional<uint32_t> LSDAPointerEncoding;
+    bool hasAugmentationLength;
+  };
+
+  // Map from the start offset of the CIE to the cached data for that CIE.
+  DenseMap<uint64_t, DecodedCIE> CachedCIEs;
+
+  for (const char *Pos = Contents.data(), *End = Contents.end(); Pos != End; ) {
+
+    const char *EntryStartPos = Pos;
+
+    uint64_t Length = readNext<uint32_t>(Pos);
+    if (Length == 0xffffffff)
+      Length = readNext<uint64_t>(Pos);
+
+    // Save the Pos so that we can check the length we encoded against what we
+    // end up decoding.
+    const char *PosAfterLength = Pos;
+    const char *EntryEndPos = PosAfterLength + Length;
+
+    assert(EntryEndPos <= End &&
+           "__eh_frame entry length exceeds section size");
+
+    uint32_t ID = readNext<uint32_t>(Pos);
+    if (ID == 0) {
+      // This is a CIE.
+
+      uint32_t Version = readNext<uint8_t>(Pos);
+
+      // Parse a null terminated augmentation string
+      SmallString<8> AugmentationString;
+      for (uint8_t Char = readNext<uint8_t>(Pos); Char;
+           Char = readNext<uint8_t>(Pos))
+        AugmentationString.push_back(Char);
+
+      // Optionally parse the EH data if the augmentation string says it's there.
+      Optional<uint64_t> EHData;
+      if (StringRef(AugmentationString).count("eh"))
+        EHData = is64Bit ? readNext<uint64_t>(Pos) : readNext<uint32_t>(Pos);
+
+      unsigned ULEBByteCount;
+      uint64_t CodeAlignmentFactor = decodeULEB128((const uint8_t *)Pos,
+                                                   &ULEBByteCount);
+      Pos += ULEBByteCount;
+
+      int64_t DataAlignmentFactor = decodeSLEB128((const uint8_t *)Pos,
+                                                   &ULEBByteCount);
+      Pos += ULEBByteCount;
+
+      uint32_t ReturnAddressRegister = readNext<uint8_t>(Pos);
+
+      Optional<uint64_t> AugmentationLength;
+      Optional<uint32_t> LSDAPointerEncoding;
+      Optional<uint32_t> PersonalityEncoding;
+      Optional<uint64_t> Personality;
+      Optional<uint32_t> FDEPointerEncoding;
+      if (!AugmentationString.empty() && AugmentationString.front() == 'z') {
+        AugmentationLength = decodeULEB128((const uint8_t *)Pos,
+                                           &ULEBByteCount);
+        Pos += ULEBByteCount;
+
+        // Walk the augmentation string to get all the augmentation data.
+        for (unsigned i = 1, e = AugmentationString.size(); i != e; ++i) {
+          char Char = AugmentationString[i];
+          switch (Char) {
+            case 'e':
+              assert((i + 1) != e && AugmentationString[i + 1] == 'h' &&
+                     "Expected 'eh' in augmentation string");
+              break;
+            case 'L':
+              assert(!LSDAPointerEncoding && "Duplicate LSDA encoding");
+              LSDAPointerEncoding = readNext<uint8_t>(Pos);
+              break;
+            case 'P': {
+              assert(!Personality && "Duplicate personality");
+              PersonalityEncoding = readNext<uint8_t>(Pos);
+              Personality = readPointer(Pos, is64Bit, *PersonalityEncoding);
+              break;
+            }
+            case 'R':
+              assert(!FDEPointerEncoding && "Duplicate FDE encoding");
+              FDEPointerEncoding = readNext<uint8_t>(Pos);
+              break;
+            case 'z':
+              llvm_unreachable("'z' must be first in the augmentation string");
+          }
+        }
+      }
+
+      outs() << "CIE:\n";
+      outs() << "  Length: " << Length << "\n";
+      outs() << "  CIE ID: " << ID << "\n";
+      outs() << "  Version: " << Version << "\n";
+      outs() << "  Augmentation String: " << AugmentationString << "\n";
+      if (EHData)
+        outs() << "  EHData: " << *EHData << "\n";
+      outs() << "  Code Alignment Factor: " << CodeAlignmentFactor << "\n";
+      outs() << "  Data Alignment Factor: " << DataAlignmentFactor << "\n";
+      outs() << "  Return Address Register: " << ReturnAddressRegister << "\n";
+      if (AugmentationLength) {
+        outs() << "  Augmentation Data Length: " << *AugmentationLength << "\n";
+        if (LSDAPointerEncoding) {
+          outs() << "  FDE LSDA Pointer Encoding: "
+                 << *LSDAPointerEncoding << "\n";
+        }
+        if (Personality) {
+          outs() << "  Personality Encoding: " << *PersonalityEncoding << "\n";
+          outs() << "  Personality: " << *Personality << "\n";
+        }
+        if (FDEPointerEncoding) {
+          outs() << "  FDE Address Pointer Encoding: "
+                 << *FDEPointerEncoding << "\n";
+        }
+      }
+      // FIXME: Handle instructions.
+      // For now just emit some bytes
+      outs() << "  Instructions:\n  ";
+      dumpBytes(makeArrayRef((const uint8_t*)Pos, (const uint8_t*)EntryEndPos),
+                outs());
+      outs() << "\n";
+      Pos = EntryEndPos;
+
+      // Cache this entry.
+      uint64_t Offset = EntryStartPos - Contents.data();
+      CachedCIEs[Offset] = { FDEPointerEncoding, LSDAPointerEncoding,
+                             AugmentationLength.hasValue() };
+      continue;
+    }
+
+    // This is an FDE.
+    // The CIE pointer for an FDE is the same location as the ID which we
+    // already read.
+    uint32_t CIEPointer = ID;
+
+    const char *CIEStart = PosAfterLength - CIEPointer;
+    assert(CIEStart >= Contents.data() &&
+           "FDE points to CIE before the __eh_frame start");
+
+    uint64_t CIEOffset = CIEStart - Contents.data();
+    auto CIEIt = CachedCIEs.find(CIEOffset);
+    if (CIEIt == CachedCIEs.end())
+      llvm_unreachable("Couldn't find CIE at offset in to __eh_frame section");
+
+    const DecodedCIE &CIE = CIEIt->getSecond();
+    assert(CIE.FDEPointerEncoding &&
+           "FDE references CIE which did not set pointer encoding");
+
+    uint64_t PCPointerSize = getSizeForEncoding(is64Bit,
+                                                *CIE.FDEPointerEncoding);
+
+    uint64_t PCBegin = readPointer(Pos, is64Bit, *CIE.FDEPointerEncoding);
+    uint64_t PCRange = readPointer(Pos, is64Bit, *CIE.FDEPointerEncoding);
+
+    Optional<uint64_t> AugmentationLength;
+    uint32_t LSDAPointerSize;
+    Optional<uint64_t> LSDAPointer;
+    if (CIE.hasAugmentationLength) {
+      unsigned ULEBByteCount;
+      AugmentationLength = decodeULEB128((const uint8_t *)Pos,
+                                         &ULEBByteCount);
+      Pos += ULEBByteCount;
+
+      // Decode the LSDA if the CIE augmentation string said we should.
+      if (CIE.LSDAPointerEncoding) {
+        LSDAPointerSize = getSizeForEncoding(is64Bit, *CIE.LSDAPointerEncoding);
+        LSDAPointer = readPointer(Pos, is64Bit, *CIE.LSDAPointerEncoding);
+      }
+    }
+
+    outs() << "FDE:\n";
+    outs() << "  Length: " << Length << "\n";
+    outs() << "  CIE Offset: " << CIEOffset << "\n";
+
+    if (PCPointerSize == 8) {
+      outs() << format("  PC Begin: %016" PRIx64, PCBegin) << "\n";
+      outs() << format("  PC Range: %016" PRIx64, PCRange) << "\n";
+    } else {
+      outs() << format("  PC Begin: %08" PRIx64, PCBegin) << "\n";
+      outs() << format("  PC Range: %08" PRIx64, PCRange) << "\n";
+    }
+    if (AugmentationLength) {
+      outs() << "  Augmentation Data Length: " << *AugmentationLength << "\n";
+      if (LSDAPointer) {
+        if (LSDAPointerSize == 8)
+          outs() << format("  LSDA Pointer: %016\n" PRIx64, *LSDAPointer);
+        else
+          outs() << format("  LSDA Pointer: %08\n" PRIx64, *LSDAPointer);
+      }
+    }
+
+    // FIXME: Handle instructions.
+    // For now just emit some bytes
+    outs() << "  Instructions:\n  ";
+    dumpBytes(makeArrayRef((const uint8_t*)Pos, (const uint8_t*)EntryEndPos),
+              outs());
+    outs() << "\n";
+    Pos = EntryEndPos;
+  }
+}
+
 void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
   std::map<uint64_t, SymbolRef> Symbols;
   for (const SymbolRef &SymRef : Obj->symbols()) {
     // Discard any undefined or absolute symbols. They're not going to take part
     // in the convenience lookup for unwind info and just take up resources.
-    section_iterator Section = Obj->section_end();
-    SymRef.getSection(Section);
+    section_iterator Section = *SymRef.getSection();
     if (Section == Obj->section_end())
       continue;
 
@@ -6802,7 +7024,7 @@ void llvm::printMachOUnwindInfo(const MachOObjectFile *Obj) {
     else if (SectName == "__unwind_info")
       printMachOUnwindInfoSection(Obj, Symbols, Section);
     else if (SectName == "__eh_frame")
-      outs() << "llvm-objdump: warning: unhandled __eh_frame section\n";
+      printMachOEHFrameSection(Obj, Symbols, Section);
   }
 }
 
@@ -7128,36 +7350,20 @@ static void PrintSegmentCommand(uint32_t cmd, uint32_t cmdsize,
            MachO::VM_PROT_EXECUTE)) != 0)
       outs() << "  maxprot ?" << format("0x%08" PRIx32, maxprot) << "\n";
     else {
-      if (maxprot & MachO::VM_PROT_READ)
-        outs() << "  maxprot r";
-      else
-        outs() << "  maxprot -";
-      if (maxprot & MachO::VM_PROT_WRITE)
-        outs() << "w";
-      else
-        outs() << "-";
-      if (maxprot & MachO::VM_PROT_EXECUTE)
-        outs() << "x\n";
-      else
-        outs() << "-\n";
+      outs() << "  maxprot ";
+      outs() << ((maxprot & MachO::VM_PROT_READ) ? "r" : "-");
+      outs() << ((maxprot & MachO::VM_PROT_WRITE) ? "w" : "-");
+      outs() << ((maxprot & MachO::VM_PROT_EXECUTE) ? "x\n" : "-\n");
     }
     if ((initprot &
          ~(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE |
            MachO::VM_PROT_EXECUTE)) != 0)
       outs() << "  initprot ?" << format("0x%08" PRIx32, initprot) << "\n";
     else {
-      if (initprot & MachO::VM_PROT_READ)
-        outs() << " initprot r";
-      else
-        outs() << " initprot -";
-      if (initprot & MachO::VM_PROT_WRITE)
-        outs() << "w";
-      else
-        outs() << "-";
-      if (initprot & MachO::VM_PROT_EXECUTE)
-        outs() << "x\n";
-      else
-        outs() << "-\n";
+      outs() << "  initprot ";
+      outs() << ((initprot & MachO::VM_PROT_READ) ? "r" : "-");
+      outs() << ((initprot & MachO::VM_PROT_WRITE) ? "w" : "-");
+      outs() << ((initprot & MachO::VM_PROT_EXECUTE) ? "x\n" : "-\n");
     }
   } else {
     outs() << "  maxprot " << format("0x%08" PRIx32, maxprot) << "\n";
@@ -7611,26 +7817,11 @@ static void PrintUuidLoadCommand(MachO::uuid_command uuid) {
   else
     outs() << "\n";
   outs() << "    uuid ";
-  outs() << format("%02" PRIX32, uuid.uuid[0]);
-  outs() << format("%02" PRIX32, uuid.uuid[1]);
-  outs() << format("%02" PRIX32, uuid.uuid[2]);
-  outs() << format("%02" PRIX32, uuid.uuid[3]);
-  outs() << "-";
-  outs() << format("%02" PRIX32, uuid.uuid[4]);
-  outs() << format("%02" PRIX32, uuid.uuid[5]);
-  outs() << "-";
-  outs() << format("%02" PRIX32, uuid.uuid[6]);
-  outs() << format("%02" PRIX32, uuid.uuid[7]);
-  outs() << "-";
-  outs() << format("%02" PRIX32, uuid.uuid[8]);
-  outs() << format("%02" PRIX32, uuid.uuid[9]);
-  outs() << "-";
-  outs() << format("%02" PRIX32, uuid.uuid[10]);
-  outs() << format("%02" PRIX32, uuid.uuid[11]);
-  outs() << format("%02" PRIX32, uuid.uuid[12]);
-  outs() << format("%02" PRIX32, uuid.uuid[13]);
-  outs() << format("%02" PRIX32, uuid.uuid[14]);
-  outs() << format("%02" PRIX32, uuid.uuid[15]);
+  for (int i = 0; i < 16; ++i) {
+    outs() << format("%02" PRIX32, uuid.uuid[i]);
+    if (i == 3 || i == 5 || i == 7 || i == 9)
+      outs() << "-";
+  }
   outs() << "\n";
 }
 
@@ -7650,30 +7841,47 @@ static void PrintRpathLoadCommand(MachO::rpath_command rpath, const char *Ptr) {
 }
 
 static void PrintVersionMinLoadCommand(MachO::version_min_command vd) {
-  if (vd.cmd == MachO::LC_VERSION_MIN_MACOSX)
-    outs() << "      cmd LC_VERSION_MIN_MACOSX\n";
-  else if (vd.cmd == MachO::LC_VERSION_MIN_IPHONEOS)
-    outs() << "      cmd LC_VERSION_MIN_IPHONEOS\n";
-  else
-    outs() << "      cmd " << vd.cmd << " (?)\n";
+  StringRef LoadCmdName;
+  switch (vd.cmd) {
+  case MachO::LC_VERSION_MIN_MACOSX:
+    LoadCmdName = "LC_VERSION_MIN_MACOSX";
+    break;
+  case MachO::LC_VERSION_MIN_IPHONEOS:
+    LoadCmdName = "LC_VERSION_MIN_IPHONEOS";
+    break;
+  case MachO::LC_VERSION_MIN_TVOS:
+    LoadCmdName = "LC_VERSION_MIN_TVOS";
+    break;
+  case MachO::LC_VERSION_MIN_WATCHOS:
+    LoadCmdName = "LC_VERSION_MIN_WATCHOS";
+    break;
+  default:
+    llvm_unreachable("Unknown version min load command");
+  }
+
+  outs() << "      cmd " << LoadCmdName << '\n';
   outs() << "  cmdsize " << vd.cmdsize;
   if (vd.cmdsize != sizeof(struct MachO::version_min_command))
     outs() << " Incorrect size\n";
   else
     outs() << "\n";
-  outs() << "  version " << ((vd.version >> 16) & 0xffff) << "."
-         << ((vd.version >> 8) & 0xff);
-  if ((vd.version & 0xff) != 0)
-    outs() << "." << (vd.version & 0xff);
+  outs() << "  version "
+         << MachOObjectFile::getVersionMinMajor(vd, false) << "."
+         << MachOObjectFile::getVersionMinMinor(vd, false);
+  uint32_t Update = MachOObjectFile::getVersionMinUpdate(vd, false);
+  if (Update != 0)
+    outs() << "." << Update;
   outs() << "\n";
   if (vd.sdk == 0)
     outs() << "      sdk n/a";
   else {
-    outs() << "      sdk " << ((vd.sdk >> 16) & 0xffff) << "."
-           << ((vd.sdk >> 8) & 0xff);
+    outs() << "      sdk "
+           << MachOObjectFile::getVersionMinMajor(vd, true) << "."
+           << MachOObjectFile::getVersionMinMinor(vd, true);
   }
-  if ((vd.sdk & 0xff) != 0)
-    outs() << "." << (vd.sdk & 0xff);
+  Update = MachOObjectFile::getVersionMinUpdate(vd, true);
+  if (Update != 0)
+    outs() << "." << Update;
   outs() << "\n";
 }
 
@@ -8366,7 +8574,9 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
       MachO::rpath_command Rpath = Obj->getRpathCommand(Command);
       PrintRpathLoadCommand(Rpath, Command.Ptr);
     } else if (Command.C.cmd == MachO::LC_VERSION_MIN_MACOSX ||
-               Command.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) {
+               Command.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS ||
+               Command.C.cmd == MachO::LC_VERSION_MIN_TVOS ||
+               Command.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) {
       MachO::version_min_command Vd = Obj->getVersionMinLoadCommand(Command);
       PrintVersionMinLoadCommand(Vd);
     } else if (Command.C.cmd == MachO::LC_SOURCE_VERSION) {
@@ -8536,6 +8746,7 @@ public:
   StringRef segmentName(uint32_t SegIndex);
   StringRef sectionName(uint32_t SegIndex, uint64_t SegOffset);
   uint64_t address(uint32_t SegIndex, uint64_t SegOffset);
+  bool isValidSegIndexAndOffset(uint32_t SegIndex, uint64_t SegOffset);
 
 private:
   struct SectionInfo {
@@ -8559,8 +8770,7 @@ SegInfo::SegInfo(const object::MachOObjectFile *Obj) {
   uint64_t CurSegAddress;
   for (const SectionRef &Section : Obj->sections()) {
     SectionInfo Info;
-    if (error(Section.getName(Info.SectionName)))
-      return;
+    error(Section.getName(Info.SectionName));
     Info.Address = Section.getAddress();
     Info.Size = Section.getSize();
     Info.SegmentName =
@@ -8585,6 +8795,20 @@ StringRef SegInfo::segmentName(uint32_t SegIndex) {
   llvm_unreachable("invalid segIndex");
 }
 
+bool SegInfo::isValidSegIndexAndOffset(uint32_t SegIndex,
+                                       uint64_t OffsetInSeg) {
+  for (const SectionInfo &SI : Sections) {
+    if (SI.SegmentIndex != SegIndex)
+      continue;
+    if (SI.OffsetInSegment > OffsetInSeg)
+      continue;
+    if (OffsetInSeg >= (SI.OffsetInSegment + SI.Size))
+      continue;
+    return true;
+  }
+  return false;
+}
+
 const SegInfo::SectionInfo &SegInfo::findSection(uint32_t SegIndex,
                                                  uint64_t OffsetInSeg) {
   for (const SectionInfo &SI : Sections) {
@@ -8753,6 +8977,8 @@ static const char *get_dyld_bind_info_symbolname(uint64_t ReferenceValue,
     for (const llvm::object::MachOBindEntry &Entry : info->O->bindTable()) {
       uint32_t SegIndex = Entry.segmentIndex();
       uint64_t OffsetInSeg = Entry.segmentOffset();
+      if (!sectionTable.isValidSegIndexAndOffset(SegIndex, OffsetInSeg))
+        continue;
       uint64_t Address = sectionTable.address(SegIndex, OffsetInSeg);
       const char *SymbolName = nullptr;
       StringRef name = Entry.symbolName();
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 275eb9c6a454..22167c788901 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -72,6 +72,13 @@ static cl::alias
 Disassembled("d", cl::desc("Alias for --disassemble"),
              cl::aliasopt(Disassemble));
 
+cl::opt<bool>
+llvm::DisassembleAll("disassemble-all",
+  cl::desc("Display assembler mnemonics for the machine instructions"));
+static cl::alias
+DisassembleAlld("D", cl::desc("Alias for --disassemble-all"),
+             cl::aliasopt(DisassembleAll));
+
 cl::opt<bool>
 llvm::Relocations("r", cl::desc("Display the relocation entries in the file"));
 
@@ -129,6 +136,13 @@ static cl::alias
 SectionHeadersShorter("h", cl::desc("Alias for --section-headers"),
                       cl::aliasopt(SectionHeaders));
 
+cl::list<std::string>
+llvm::FilterSections("section", cl::desc("Operate on the specified sections only. "
+                                         "With -macho dump segment,section"));
+cl::alias
+static FilterSectionsj("j", cl::desc("Alias for --section"),
+                 cl::aliasopt(llvm::FilterSections));
+
 cl::list<std::string>
 llvm::MAttrs("mattr",
   cl::CommaSeparated,
@@ -163,22 +177,86 @@ cl::opt<bool> PrintFaultMaps("fault-map-section",
                              cl::desc("Display contents of faultmap section"));
 
 static StringRef ToolName;
-static int ReturnValue = EXIT_SUCCESS;
 
-bool llvm::error(std::error_code EC) {
-  if (!EC)
-    return false;
+namespace {
+typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
 
-  outs() << ToolName << ": error reading file: " << EC.message() << ".\n";
-  outs().flush();
-  ReturnValue = EXIT_FAILURE;
-  return true;
+class SectionFilterIterator {
+public:
+  SectionFilterIterator(FilterPredicate P,
+                        llvm::object::section_iterator const &I,
+                        llvm::object::section_iterator const &E)
+      : Predicate(P), Iterator(I), End(E) {
+    ScanPredicate();
+  }
+  const llvm::object::SectionRef &operator*() const { return *Iterator; }
+  SectionFilterIterator &operator++() {
+    ++Iterator;
+    ScanPredicate();
+    return *this;
+  }
+  bool operator!=(SectionFilterIterator const &Other) const {
+    return Iterator != Other.Iterator;
+  }
+
+private:
+  void ScanPredicate() {
+    while (Iterator != End && !Predicate(*Iterator)) {
+      ++Iterator;
+    }
+  }
+  FilterPredicate Predicate;
+  llvm::object::section_iterator Iterator;
+  llvm::object::section_iterator End;
+};
+
+class SectionFilter {
+public:
+  SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
+      : Predicate(P), Object(O) {}
+  SectionFilterIterator begin() {
+    return SectionFilterIterator(Predicate, Object.section_begin(),
+                                 Object.section_end());
+  }
+  SectionFilterIterator end() {
+    return SectionFilterIterator(Predicate, Object.section_end(),
+                                 Object.section_end());
+  }
+
+private:
+  FilterPredicate Predicate;
+  llvm::object::ObjectFile const &Object;
+};
+SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
+  return SectionFilter([](llvm::object::SectionRef const &S) {
+                         if(FilterSections.empty())
+                           return true;
+                         llvm::StringRef String;
+                         std::error_code error = S.getName(String);
+                         if (error)
+                           return false;
+                         return std::find(FilterSections.begin(),
+                                          FilterSections.end(),
+                                          String) != FilterSections.end();
+                       },
+                       O);
+}
 }
 
-static void report_error(StringRef File, std::error_code EC) {
+void llvm::error(std::error_code EC) {
+  if (!EC)
+    return;
+
+  errs() << ToolName << ": error reading file: " << EC.message() << ".\n";
+  errs().flush();
+  exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
+                                                std::error_code EC) {
   assert(EC);
   errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
-  ReturnValue = EXIT_FAILURE;
+  exit(1);
 }
 
 static const Target *getTarget(const ObjectFile *Obj = nullptr) {
@@ -205,10 +283,8 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
                                                          Error);
-  if (!TheTarget) {
-    errs() << ToolName << ": " << Error;
-    return nullptr;
-  }
+  if (!TheTarget)
+    report_fatal_error("can't find target: " + Error);
 
   // Update the triple name and return the found target.
   TripleName = TheTriple.getTriple();
@@ -301,11 +377,12 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
 
 template <class ELFT>
 static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
-                                                DataRefImpl Rel,
+                                                const RelocationRef &RelRef,
                                                 SmallVectorImpl<char> &Result) {
+  DataRefImpl Rel = RelRef.getRawDataRefImpl();
+
   typedef typename ELFObjectFile<ELFT>::Elf_Sym Elf_Sym;
   typedef typename ELFObjectFile<ELFT>::Elf_Shdr Elf_Shdr;
-  typedef typename ELFObjectFile<ELFT>::Elf_Rel Elf_Rel;
   typedef typename ELFObjectFile<ELFT>::Elf_Rela Elf_Rela;
 
   const ELFFile<ELFT> &EF = *Obj->getELFFile();
@@ -327,36 +404,31 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
   if (std::error_code EC = StrTabOrErr.getError())
     return EC;
   StringRef StrTab = *StrTabOrErr;
-  uint8_t type;
+  uint8_t type = RelRef.getType();
   StringRef res;
   int64_t addend = 0;
-  uint16_t symbol_index = 0;
   switch (Sec->sh_type) {
   default:
     return object_error::parse_failed;
   case ELF::SHT_REL: {
-    const Elf_Rel *ERel = Obj->getRel(Rel);
-    type = ERel->getType(EF.isMips64EL());
-    symbol_index = ERel->getSymbol(EF.isMips64EL());
     // TODO: Read implicit addend from section data.
     break;
   }
   case ELF::SHT_RELA: {
     const Elf_Rela *ERela = Obj->getRela(Rel);
-    type = ERela->getType(EF.isMips64EL());
-    symbol_index = ERela->getSymbol(EF.isMips64EL());
     addend = ERela->r_addend;
     break;
   }
   }
-  const Elf_Sym *symb =
-      EF.template getEntry<Elf_Sym>(Sec->sh_link, symbol_index);
+  symbol_iterator SI = RelRef.getSymbol();
+  const Elf_Sym *symb = Obj->getSymbol(SI->getRawDataRefImpl());
   StringRef Target;
-  ErrorOr<const Elf_Shdr *> SymSec = EF.getSection(symb);
-  if (std::error_code EC = SymSec.getError())
-    return EC;
   if (symb->getType() == ELF::STT_SECTION) {
-    ErrorOr<StringRef> SecName = EF.getSectionName(*SymSec);
+    ErrorOr<section_iterator> SymSI = SI->getSection();
+    if (std::error_code EC = SymSI.getError())
+      return EC;
+    const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl());
+    ErrorOr<StringRef> SecName = EF.getSectionName(SymSec);
     if (std::error_code EC = SecName.getError())
       return EC;
     Target = *SecName;
@@ -404,6 +476,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
     break;
   }
   case ELF::EM_386:
+  case ELF::EM_IAMCU:
   case ELF::EM_ARM:
   case ELF::EM_HEXAGON:
   case ELF::EM_MIPS:
@@ -418,9 +491,8 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
 }
 
 static std::error_code getRelocationValueString(const ELFObjectFileBase *Obj,
-                                                const RelocationRef &RelRef,
+                                                const RelocationRef &Rel,
                                                 SmallVectorImpl<char> &Result) {
-  DataRefImpl Rel = RelRef.getRawDataRefImpl();
   if (auto *ELF32LE = dyn_cast<ELF32LEObjectFile>(Obj))
     return getRelocationValueString(ELF32LE, Rel, Result);
   if (auto *ELF64LE = dyn_cast<ELF64LEObjectFile>(Obj))
@@ -471,7 +543,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
 
     // If we couldn't find a symbol that this relocation refers to, try
     // to find a section beginning instead.
-    for (const SectionRef &Section : O->sections()) {
+    for (const SectionRef &Section : ToolSectionFilter(*O)) {
       std::error_code ec;
 
       StringRef Name;
@@ -496,8 +568,8 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     symbol_iterator SI = O->symbol_begin();
     advance(SI, Val);
     ErrorOr<StringRef> SOrErr = SI->getName();
-    if (!error(SOrErr.getError()))
-      S = *SOrErr;
+    error(SOrErr.getError());
+    S = *SOrErr;
   } else {
     section_iterator SI = O->section_begin();
     // Adjust for the fact that sections are 1-indexed.
@@ -732,10 +804,6 @@ static bool getHidden(RelocationRef RelRef) {
 
 static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   const Target *TheTarget = getTarget(Obj);
-  // getTarget() will have already issued a diagnostic if necessary, so
-  // just bail here if it failed.
-  if (!TheTarget)
-    return;
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
@@ -748,42 +816,28 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
   std::unique_ptr<const MCRegisterInfo> MRI(
       TheTarget->createMCRegInfo(TripleName));
-  if (!MRI) {
-    errs() << "error: no register info for target " << TripleName << "\n";
-    return;
-  }
+  if (!MRI)
+    report_fatal_error("error: no register info for target " + TripleName);
 
   // Set up disassembler.
   std::unique_ptr<const MCAsmInfo> AsmInfo(
       TheTarget->createMCAsmInfo(*MRI, TripleName));
-  if (!AsmInfo) {
-    errs() << "error: no assembly info for target " << TripleName << "\n";
-    return;
-  }
-
+  if (!AsmInfo)
+    report_fatal_error("error: no assembly info for target " + TripleName);
   std::unique_ptr<const MCSubtargetInfo> STI(
       TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
-  if (!STI) {
-    errs() << "error: no subtarget info for target " << TripleName << "\n";
-    return;
-  }
-
+  if (!STI)
+    report_fatal_error("error: no subtarget info for target " + TripleName);
   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
-  if (!MII) {
-    errs() << "error: no instruction info for target " << TripleName << "\n";
-    return;
-  }
-
+  if (!MII)
+    report_fatal_error("error: no instruction info for target " + TripleName);
   std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
   MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
 
   std::unique_ptr<MCDisassembler> DisAsm(
     TheTarget->createMCDisassembler(*STI, Ctx));
-
-  if (!DisAsm) {
-    errs() << "error: no disassembler for target " << TripleName << "\n";
-    return;
-  }
+  if (!DisAsm)
+    report_fatal_error("error: no disassembler for target " + TripleName);
 
   std::unique_ptr<const MCInstrAnalysis> MIA(
       TheTarget->createMCInstrAnalysis(MII.get()));
@@ -791,11 +845,9 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
-  if (!IP) {
-    errs() << "error: no instruction printer for target " << TripleName
-      << '\n';
-    return;
-  }
+  if (!IP)
+    report_fatal_error("error: no instruction printer for target " +
+                       TripleName);
   IP->setPrintImmHex(PrintImmHex);
   PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
 
@@ -806,38 +858,75 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   // in RelocSecs contain the relocations for section S.
   std::error_code EC;
   std::map<SectionRef, SmallVector<SectionRef, 1>> SectionRelocMap;
-  for (const SectionRef &Section : Obj->sections()) {
+  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     section_iterator Sec2 = Section.getRelocatedSection();
     if (Sec2 != Obj->section_end())
       SectionRelocMap[*Sec2].push_back(Section);
   }
 
   // Create a mapping from virtual address to symbol name.  This is used to
-  // pretty print the target of a call.
-  std::vector<std::pair<uint64_t, StringRef>> AllSymbols;
-  if (MIA) {
-    for (const SymbolRef &Symbol : Obj->symbols()) {
-      if (Symbol.getType() != SymbolRef::ST_Function)
-        continue;
+  // pretty print the symbols while disassembling.
+  typedef std::vector<std::pair<uint64_t, StringRef>> SectionSymbolsTy;
+  std::map<SectionRef, SectionSymbolsTy> AllSymbols;
+  for (const SymbolRef &Symbol : Obj->symbols()) {
+    ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+    error(AddressOrErr.getError());
+    uint64_t Address = *AddressOrErr;
 
-      ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
-      if (error(AddressOrErr.getError()))
-        break;
-      uint64_t Address = *AddressOrErr;
+    ErrorOr<StringRef> Name = Symbol.getName();
+    error(Name.getError());
+    if (Name->empty())
+      continue;
 
-      ErrorOr<StringRef> Name = Symbol.getName();
-      if (error(Name.getError()))
-        break;
-      if (Name->empty())
-        continue;
-      AllSymbols.push_back(std::make_pair(Address, *Name));
-    }
+    ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+    error(SectionOrErr.getError());
+    section_iterator SecI = *SectionOrErr;
+    if (SecI == Obj->section_end())
+      continue;
 
-    array_pod_sort(AllSymbols.begin(), AllSymbols.end());
+    AllSymbols[*SecI].emplace_back(Address, *Name);
   }
 
-  for (const SectionRef &Section : Obj->sections()) {
-    if (!Section.isText() || Section.isVirtual())
+  // Create a mapping from virtual address to section.
+  std::vector<std::pair<uint64_t, SectionRef>> SectionAddresses;
+  for (SectionRef Sec : Obj->sections())
+    SectionAddresses.emplace_back(Sec.getAddress(), Sec);
+  array_pod_sort(SectionAddresses.begin(), SectionAddresses.end());
+
+  // Linked executables (.exe and .dll files) typically don't include a real
+  // symbol table but they might contain an export table.
+  if (const auto *COFFObj = dyn_cast<COFFObjectFile>(Obj)) {
+    for (const auto &ExportEntry : COFFObj->export_directories()) {
+      StringRef Name;
+      error(ExportEntry.getSymbolName(Name));
+      if (Name.empty())
+        continue;
+      uint32_t RVA;
+      error(ExportEntry.getExportRVA(RVA));
+
+      uint64_t VA = COFFObj->getImageBase() + RVA;
+      auto Sec = std::upper_bound(
+          SectionAddresses.begin(), SectionAddresses.end(), VA,
+          [](uint64_t LHS, const std::pair<uint64_t, SectionRef> &RHS) {
+            return LHS < RHS.first;
+          });
+      if (Sec != SectionAddresses.begin())
+        --Sec;
+      else
+        Sec = SectionAddresses.end();
+
+      if (Sec != SectionAddresses.end())
+        AllSymbols[Sec->second].emplace_back(VA, Name);
+    }
+  }
+
+  // Sort all the symbols, this allows us to use a simple binary search to find
+  // a symbol near an address.
+  for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
+    array_pod_sort(SecSyms.second.begin(), SecSyms.second.end());
+
+  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
+    if (!DisassembleAll && (!Section.isText() || Section.isVirtual()))
       continue;
 
     uint64_t SectionAddr = Section.getAddress();
@@ -845,27 +934,23 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     if (!SectSize)
       continue;
 
-    // Make a list of all the symbols in this section.
-    std::vector<std::pair<uint64_t, StringRef>> Symbols;
-    for (const SymbolRef &Symbol : Obj->symbols()) {
-      if (Section.containsSymbol(Symbol)) {
-        ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
-        if (error(AddressOrErr.getError()))
-          break;
-        uint64_t Address = *AddressOrErr;
-        Address -= SectionAddr;
-        if (Address >= SectSize)
-          continue;
-
-        ErrorOr<StringRef> Name = Symbol.getName();
-        if (error(Name.getError()))
-          break;
-        Symbols.push_back(std::make_pair(Address, *Name));
+    // Get the list of all the symbols in this section.
+    SectionSymbolsTy &Symbols = AllSymbols[Section];
+    std::vector<uint64_t> DataMappingSymsAddr;
+    std::vector<uint64_t> TextMappingSymsAddr;
+    if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+      for (const auto &Symb : Symbols) {
+        uint64_t Address = Symb.first;
+        StringRef Name = Symb.second;
+        if (Name.startswith("$d"))
+          DataMappingSymsAddr.push_back(Address - SectionAddr);
+        if (Name.startswith("$x"))
+          TextMappingSymsAddr.push_back(Address - SectionAddr);
       }
     }
 
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
+    std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
+    std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
 
     // Make a list of all the relocations for this section.
     std::vector<RelocationRef> Rels;
@@ -886,8 +971,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       SegmentName = MachO->getSectionFinalSegmentName(DR);
     }
     StringRef name;
-    if (error(Section.getName(name)))
-      break;
+    error(Section.getName(name));
     outs() << "Disassembly of section ";
     if (!SegmentName.empty())
       outs() << SegmentName << ",";
@@ -895,14 +979,13 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
     // If the section has no symbol at the start, just insert a dummy one.
     if (Symbols.empty() || Symbols[0].first != 0)
-      Symbols.insert(Symbols.begin(), std::make_pair(0, name));
+      Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name));
 
     SmallString<40> Comments;
     raw_svector_ostream CommentStream(Comments);
 
     StringRef BytesStr;
-    if (error(Section.getContents(BytesStr)))
-      break;
+    error(Section.getContents(BytesStr));
     ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
                             BytesStr.size());
 
@@ -914,11 +997,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     // Disassemble symbol by symbol.
     for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
 
-      uint64_t Start = Symbols[si].first;
-      // The end is either the section end or the beginning of the next symbol.
-      uint64_t End = (si == se - 1) ? SectSize : Symbols[si + 1].first;
+      uint64_t Start = Symbols[si].first - SectionAddr;
+      // The end is either the section end or the beginning of the next
+      // symbol.
+      uint64_t End =
+          (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr;
+      // Don't try to disassemble beyond the end of section contents.
+      if (End > SectSize)
+        End = SectSize;
       // If this symbol has the same address as the next symbol, then skip it.
-      if (Start == End)
+      if (Start >= End)
         continue;
 
       outs() << '\n' << Symbols[si].second << ":\n";
@@ -932,6 +1020,45 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       for (Index = Start; Index < End; Index += Size) {
         MCInst Inst;
 
+        // AArch64 ELF binaries can interleave data and text in the
+        // same section. We rely on the markers introduced to
+        // understand what we need to dump.
+        if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+          uint64_t Stride = 0;
+
+          auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
+                                      DataMappingSymsAddr.end(), Index);
+          if (DAI != DataMappingSymsAddr.end() && *DAI == Index) {
+            // Switch to data.
+            while (Index < End) {
+              outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+              outs() << "\t";
+              if (Index + 4 <= End) {
+                Stride = 4;
+                dumpBytes(Bytes.slice(Index, 4), outs());
+                outs() << "\t.word";
+              } else if (Index + 2 <= End) {
+                Stride = 2;
+                dumpBytes(Bytes.slice(Index, 2), outs());
+                outs() << "\t.short";
+              } else {
+                Stride = 1;
+                dumpBytes(Bytes.slice(Index, 1), outs());
+                outs() << "\t.byte";
+              }
+              Index += Stride;
+              outs() << "\n";
+              auto TAI = std::lower_bound(TextMappingSymsAddr.begin(),
+                                          TextMappingSymsAddr.end(), Index);
+              if (TAI != TextMappingSymsAddr.end() && *TAI == Index)
+                break;
+            }
+          }
+        }
+
+        if (Index >= End)
+          break;
+
         if (DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
                                    SectionAddr + Index, DebugOut,
                                    CommentStream)) {
@@ -940,26 +1067,55 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
                         SectionAddr + Index, outs(), "", *STI);
           outs() << CommentStream.str();
           Comments.clear();
+
+          // Try to resolve the target of a call, tail call, etc. to a specific
+          // symbol.
           if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) ||
                       MIA->isConditionalBranch(Inst))) {
             uint64_t Target;
             if (MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) {
-              auto TargetSym = std::upper_bound(
-                  AllSymbols.begin(), AllSymbols.end(), Target,
-                  [](uint64_t LHS, const std::pair<uint64_t, StringRef> &RHS) {
-                    return LHS < RHS.first;
-                  });
-              if (TargetSym != AllSymbols.begin())
-                --TargetSym;
-              else
-                TargetSym = AllSymbols.end();
+              // In a relocatable object, the target's section must reside in
+              // the same section as the call instruction or it is accessed
+              // through a relocation.
+              //
+              // In a non-relocatable object, the target may be in any section.
+              //
+              // N.B. We don't walk the relocations in the relocatable case yet.
+              auto *TargetSectionSymbols = &Symbols;
+              if (!Obj->isRelocatableObject()) {
+                auto SectionAddress = std::upper_bound(
+                    SectionAddresses.begin(), SectionAddresses.end(), Target,
+                    [](uint64_t LHS,
+                       const std::pair<uint64_t, SectionRef> &RHS) {
+                      return LHS < RHS.first;
+                    });
+                if (SectionAddress != SectionAddresses.begin()) {
+                  --SectionAddress;
+                  TargetSectionSymbols = &AllSymbols[SectionAddress->second];
+                } else {
+                  TargetSectionSymbols = nullptr;
+                }
+              }
 
-              if (TargetSym != AllSymbols.end()) {
-                outs() << " <" << TargetSym->second;
-                uint64_t Disp = Target - TargetSym->first;
-                if (Disp)
-                  outs() << '+' << utohexstr(Disp);
-                outs() << '>';
+              // Find the first symbol in the section whose offset is less than
+              // or equal to the target.
+              if (TargetSectionSymbols) {
+                auto TargetSym = std::upper_bound(
+                    TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
+                    Target, [](uint64_t LHS,
+                               const std::pair<uint64_t, StringRef> &RHS) {
+                      return LHS < RHS.first;
+                    });
+                if (TargetSym != TargetSectionSymbols->begin()) {
+                  --TargetSym;
+                  uint64_t TargetAddress = std::get<0>(*TargetSym);
+                  StringRef TargetName = std::get<1>(*TargetSym);
+                  outs() << " <" << TargetName;
+                  uint64_t Disp = Target - TargetAddress;
+                  if (Disp)
+                    outs() << '+' << utohexstr(Disp);
+                  outs() << '>';
+                }
               }
             }
           }
@@ -983,8 +1139,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           // Stop when rel_cur's address is past the current instruction.
           if (addr >= Index + Size) break;
           rel_cur->getTypeName(name);
-          if (error(getRelocationValueString(*rel_cur, val)))
-            goto skip_print_rel;
+          error(getRelocationValueString(*rel_cur, val));
           outs() << format(Fmt.data(), SectionAddr + addr) << name
                  << "\t" << val << "\n";
 
@@ -1004,12 +1159,11 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
   if (!Obj->isRelocatableObject())
     return;
 
-  for (const SectionRef &Section : Obj->sections()) {
+  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     if (Section.relocation_begin() == Section.relocation_end())
       continue;
     StringRef secname;
-    if (error(Section.getName(secname)))
-      continue;
+    error(Section.getName(secname));
     outs() << "RELOCATION RECORDS FOR [" << secname << "]:\n";
     for (const RelocationRef &Reloc : Section.relocations()) {
       bool hidden = getHidden(Reloc);
@@ -1019,8 +1173,7 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
       if (hidden)
         continue;
       Reloc.getTypeName(relocname);
-      if (error(getRelocationValueString(Reloc, valuestr)))
-        continue;
+      error(getRelocationValueString(Reloc, valuestr));
       outs() << format(Fmt.data(), address) << " " << relocname << " "
              << valuestr << "\n";
     }
@@ -1032,10 +1185,9 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
   outs() << "Sections:\n"
             "Idx Name          Size      Address          Type\n";
   unsigned i = 0;
-  for (const SectionRef &Section : Obj->sections()) {
+  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     StringRef Name;
-    if (error(Section.getName(Name)))
-      return;
+    error(Section.getName(Name));
     uint64_t Address = Section.getAddress();
     uint64_t Size = Section.getSize();
     bool Text = Section.isText();
@@ -1051,11 +1203,10 @@ void llvm::PrintSectionHeaders(const ObjectFile *Obj) {
 
 void llvm::PrintSectionContents(const ObjectFile *Obj) {
   std::error_code EC;
-  for (const SectionRef &Section : Obj->sections()) {
+  for (const SectionRef &Section : ToolSectionFilter(*Obj)) {
     StringRef Name;
     StringRef Contents;
-    if (error(Section.getName(Name)))
-      continue;
+    error(Section.getName(Name));
     uint64_t BaseAddr = Section.getAddress();
     uint64_t Size = Section.getSize();
     if (!Size)
@@ -1069,8 +1220,7 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
       continue;
     }
 
-    if (error(Section.getContents(Contents)))
-      continue;
+    error(Section.getContents(Contents));
 
     // Dump out the content as hex and printable ascii characters.
     for (std::size_t addr = 0, end = Contents.size(); addr < end; addr += 16) {
@@ -1098,84 +1248,28 @@ void llvm::PrintSectionContents(const ObjectFile *Obj) {
   }
 }
 
-static void PrintCOFFSymbolTable(const COFFObjectFile *coff) {
-  for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
-    ErrorOr<COFFSymbolRef> Symbol = coff->getSymbol(SI);
-    StringRef Name;
-    if (error(Symbol.getError()))
-      return;
-
-    if (error(coff->getSymbolName(*Symbol, Name)))
-      return;
-
-    outs() << "[" << format("%2d", SI) << "]"
-           << "(sec " << format("%2d", int(Symbol->getSectionNumber())) << ")"
-           << "(fl 0x00)" // Flag bits, which COFF doesn't have.
-           << "(ty " << format("%3x", unsigned(Symbol->getType())) << ")"
-           << "(scl " << format("%3x", unsigned(Symbol->getStorageClass())) << ") "
-           << "(nx " << unsigned(Symbol->getNumberOfAuxSymbols()) << ") "
-           << "0x" << format("%08x", unsigned(Symbol->getValue())) << " "
-           << Name << "\n";
-
-    for (unsigned AI = 0, AE = Symbol->getNumberOfAuxSymbols(); AI < AE; ++AI, ++SI) {
-      if (Symbol->isSectionDefinition()) {
-        const coff_aux_section_definition *asd;
-        if (error(coff->getAuxSymbol<coff_aux_section_definition>(SI + 1, asd)))
-          return;
-
-        int32_t AuxNumber = asd->getNumber(Symbol->isBigObj());
-
-        outs() << "AUX "
-               << format("scnlen 0x%x nreloc %d nlnno %d checksum 0x%x "
-                         , unsigned(asd->Length)
-                         , unsigned(asd->NumberOfRelocations)
-                         , unsigned(asd->NumberOfLinenumbers)
-                         , unsigned(asd->CheckSum))
-               << format("assoc %d comdat %d\n"
-                         , unsigned(AuxNumber)
-                         , unsigned(asd->Selection));
-      } else if (Symbol->isFileRecord()) {
-        const char *FileName;
-        if (error(coff->getAuxSymbol<char>(SI + 1, FileName)))
-          return;
-
-        StringRef Name(FileName, Symbol->getNumberOfAuxSymbols() *
-                                     coff->getSymbolTableEntrySize());
-        outs() << "AUX " << Name.rtrim(StringRef("\0", 1))  << '\n';
-
-        SI = SI + Symbol->getNumberOfAuxSymbols();
-        break;
-      } else {
-        outs() << "AUX Unknown\n";
-      }
-    }
-  }
-}
-
 void llvm::PrintSymbolTable(const ObjectFile *o) {
   outs() << "SYMBOL TABLE:\n";
 
   if (const COFFObjectFile *coff = dyn_cast<const COFFObjectFile>(o)) {
-    PrintCOFFSymbolTable(coff);
+    printCOFFSymbolTable(coff);
     return;
   }
   for (const SymbolRef &Symbol : o->symbols()) {
     ErrorOr<uint64_t> AddressOrError = Symbol.getAddress();
-    if (error(AddressOrError.getError()))
-      continue;
+    error(AddressOrError.getError());
     uint64_t Address = *AddressOrError;
     SymbolRef::Type Type = Symbol.getType();
     uint32_t Flags = Symbol.getFlags();
-    section_iterator Section = o->section_end();
-    if (error(Symbol.getSection(Section)))
-      continue;
+    ErrorOr<section_iterator> SectionOrErr = Symbol.getSection();
+    error(SectionOrErr.getError());
+    section_iterator Section = *SectionOrErr;
     StringRef Name;
     if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
       Section->getName(Name);
     } else {
       ErrorOr<StringRef> NameOrErr = Symbol.getName();
-      if (error(NameOrErr.getError()))
-        continue;
+      error(NameOrErr.getError());
       Name = *NameOrErr;
     }
 
@@ -1222,8 +1316,7 @@ void llvm::PrintSymbolTable(const ObjectFile *o) {
         outs() << SegmentName << ",";
       }
       StringRef SectionName;
-      if (error(Section->getName(SectionName)))
-        SectionName = "";
+      error(Section->getName(SectionName));
       outs() << SectionName;
     }
 
@@ -1329,7 +1422,7 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
   }
 
   Optional<object::SectionRef> ClangASTSection;
-  for (auto Sec : Obj->sections()) {
+  for (auto Sec : ToolSectionFilter(*Obj)) {
     StringRef Name;
     Sec.getName(Name);
     if (Name == ClangASTSectionName) {
@@ -1341,11 +1434,7 @@ void llvm::printRawClangAST(const ObjectFile *Obj) {
     return;
 
   StringRef ClangASTContents;
-  if (error(ClangASTSection.getValue().getContents(ClangASTContents))) {
-    errs() << "Could not read the " << ClangASTSectionName << " section!\n";
-    return;
-  }
-
+  error(ClangASTSection.getValue().getContents(ClangASTContents));
   outs().write(ClangASTContents.data(), ClangASTContents.size());
 }
 
@@ -1364,7 +1453,7 @@ static void printFaultMaps(const ObjectFile *Obj) {
 
   Optional<object::SectionRef> FaultMapSection;
 
-  for (auto Sec : Obj->sections()) {
+  for (auto Sec : ToolSectionFilter(*Obj)) {
     StringRef Name;
     Sec.getName(Name);
     if (Name == FaultMapSectionName) {
@@ -1381,10 +1470,7 @@ static void printFaultMaps(const ObjectFile *Obj) {
   }
 
   StringRef FaultMapContents;
-  if (error(FaultMapSection.getValue().getContents(FaultMapContents))) {
-    errs() << "Could not read the " << FaultMapContents << " section!\n";
-    return;
-  }
+  error(FaultMapSection.getValue().getContents(FaultMapContents));
 
   FaultMapParser FMP(FaultMapContents.bytes_begin(),
                      FaultMapContents.bytes_end());
@@ -1393,13 +1479,14 @@ static void printFaultMaps(const ObjectFile *Obj) {
 }
 
 static void printPrivateFileHeader(const ObjectFile *o) {
-  if (o->isELF()) {
+  if (o->isELF())
     printELFFileHeader(o);
-  } else if (o->isCOFF()) {
+  else if (o->isCOFF())
     printCOFFFileHeader(o);
-  } else if (o->isMachO()) {
+  else if (o->isMachO())
     printMachOFileHeader(o);
-  }
+  else
+    report_fatal_error("Invalid/Unsupported object file format");
 }
 
 static void DumpObject(const ObjectFile *o) {
@@ -1442,15 +1529,14 @@ static void DumpObject(const ObjectFile *o) {
 
 /// @brief Dump each object file in \a a;
 static void DumpArchive(const Archive *a) {
-  for (Archive::child_iterator i = a->child_begin(), e = a->child_end(); i != e;
-       ++i) {
-    ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
-    if (std::error_code EC = ChildOrErr.getError()) {
-      // Ignore non-object files.
+  for (auto &ErrorOrChild : a->children()) {
+    if (std::error_code EC = ErrorOrChild.getError())
+      report_error(a->getFileName(), EC);
+    const Archive::Child &C = *ErrorOrChild;
+    ErrorOr<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
+    if (std::error_code EC = ChildOrErr.getError())
       if (EC != object_error::invalid_file_type)
         report_error(a->getFileName(), EC);
-      continue;
-    }
     if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
       DumpObject(o);
     else
@@ -1460,11 +1546,6 @@ static void DumpArchive(const Archive *a) {
 
 /// @brief Open file and figure out how to dump it.
 static void DumpInput(StringRef file) {
-  // If file isn't stdin, check that it exists.
-  if (file != "-" && !sys::fs::exists(file)) {
-    report_error(file, errc::no_such_file_or_directory);
-    return;
-  }
 
   // If we are using the Mach-O specific object file parser, then let it parse
   // the file and process the command line options.  So the -arch flags can
@@ -1476,10 +1557,8 @@ static void DumpInput(StringRef file) {
 
   // Attempt to open the binary.
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
-  if (std::error_code EC = BinaryOrErr.getError()) {
+  if (std::error_code EC = BinaryOrErr.getError())
     report_error(file, EC);
-    return;
-  }
   Binary &Binary = *BinaryOrErr.get().getBinary();
 
   if (Archive *a = dyn_cast<Archive>(&Binary))
@@ -1499,7 +1578,6 @@ int main(int argc, char **argv) {
   // Initialize targets and assembly printers/parsers.
   llvm::InitializeAllTargetInfos();
   llvm::InitializeAllTargetMCs();
-  llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
 
   // Register the target printer for --version.
@@ -1514,6 +1592,8 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
+  if (DisassembleAll)
+    Disassemble = true;
   if (!Disassemble
       && !Relocations
       && !SectionHeaders
@@ -1536,7 +1616,7 @@ int main(int argc, char **argv) {
       && !(DylibsUsed && MachOOpt)
       && !(DylibId && MachOOpt)
       && !(ObjcMetaData && MachOOpt)
-      && !(DumpSections.size() != 0 && MachOOpt)
+      && !(FilterSections.size() != 0 && MachOOpt)
       && !PrintFaultMaps) {
     cl::PrintHelpMessage();
     return 2;
@@ -1545,5 +1625,5 @@ int main(int argc, char **argv) {
   std::for_each(InputFilenames.begin(), InputFilenames.end(),
                 DumpInput);
 
-  return ReturnValue;
+  return EXIT_SUCCESS;
 }
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index eb10d8344f71..6e8ad6bda156 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -11,6 +11,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -25,8 +26,9 @@ extern cl::opt<std::string> TripleName;
 extern cl::opt<std::string> ArchName;
 extern cl::opt<std::string> MCPU;
 extern cl::list<std::string> MAttrs;
-extern cl::list<std::string> DumpSections;
+extern cl::list<std::string> FilterSections;
 extern cl::opt<bool> Disassemble;
+extern cl::opt<bool> DisassembleAll;
 extern cl::opt<bool> NoShowRawInsn;
 extern cl::opt<bool> PrivateHeaders;
 extern cl::opt<bool> ExportsTrie;
@@ -54,7 +56,7 @@ extern cl::opt<bool> UnwindInfo;
 extern cl::opt<bool> PrintImmHex;
 
 // Various helper functions.
-bool error(std::error_code ec);
+void error(std::error_code ec);
 bool RelocAddressLess(object::RelocationRef a, object::RelocationRef b);
 void ParseInputMachO(StringRef Filename);
 void printCOFFUnwindInfo(const object::COFFObjectFile* o);
@@ -66,6 +68,7 @@ void printMachOLazyBindTable(const object::MachOObjectFile* o);
 void printMachOWeakBindTable(const object::MachOObjectFile* o);
 void printELFFileHeader(const object::ObjectFile *o);
 void printCOFFFileHeader(const object::ObjectFile *o);
+void printCOFFSymbolTable(const object::COFFObjectFile *o);
 void printMachOFileHeader(const object::ObjectFile *o);
 void printExportsTrie(const object::ObjectFile *o);
 void printRebaseTable(const object::ObjectFile *o);
@@ -77,6 +80,7 @@ void PrintRelocations(const object::ObjectFile *o);
 void PrintSectionHeaders(const object::ObjectFile *o);
 void PrintSectionContents(const object::ObjectFile *o);
 void PrintSymbolTable(const object::ObjectFile *o);
+LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, std::error_code EC);
 
 } // end namespace llvm
 
diff --git a/tools/llvm-pdbdump/BuiltinDumper.cpp b/tools/llvm-pdbdump/BuiltinDumper.cpp
index d80829841f3a..43270540f453 100644
--- a/tools/llvm-pdbdump/BuiltinDumper.cpp
+++ b/tools/llvm-pdbdump/BuiltinDumper.cpp
@@ -19,69 +19,53 @@ BuiltinDumper::BuiltinDumper(LinePrinter &P)
     : PDBSymDumper(false), Printer(P) {}
 
 void BuiltinDumper::start(const PDBSymbolTypeBuiltin &Symbol) {
+  WithColor(Printer, PDB_ColorItem::Type).get() << getTypeName(Symbol);
+}
+
+StringRef BuiltinDumper::getTypeName(const PDBSymbolTypeBuiltin &Symbol) {
   PDB_BuiltinType Type = Symbol.getBuiltinType();
   switch (Type) {
   case PDB_BuiltinType::Float:
     if (Symbol.getLength() == 4)
-      WithColor(Printer, PDB_ColorItem::Type).get() << "float";
-    else
-      WithColor(Printer, PDB_ColorItem::Type).get() << "double";
-    break;
+      return "float";
+    return "double";
   case PDB_BuiltinType::UInt:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "unsigned";
     if (Symbol.getLength() == 8)
-      WithColor(Printer, PDB_ColorItem::Type).get() << " __int64";
-    break;
+      return "unsigned __int64";
+    return "unsigned";
   case PDB_BuiltinType::Int:
     if (Symbol.getLength() == 4)
-      WithColor(Printer, PDB_ColorItem::Type).get() << "int";
-    else
-      WithColor(Printer, PDB_ColorItem::Type).get() << "__int64";
-    break;
+      return "int";
+    return "__int64";
   case PDB_BuiltinType::Char:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "char";
-    break;
+    return "char";
   case PDB_BuiltinType::WCharT:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "wchar_t";
-    break;
+    return "wchar_t";
   case PDB_BuiltinType::Void:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "void";
-    break;
+    return "void";
   case PDB_BuiltinType::Long:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "long";
-    break;
+    return "long";
   case PDB_BuiltinType::ULong:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "unsigned long";
-    break;
+    return "unsigned long";
   case PDB_BuiltinType::Bool:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "bool";
-    break;
+    return "bool";
   case PDB_BuiltinType::Currency:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "CURRENCY";
-    break;
+    return "CURRENCY";
   case PDB_BuiltinType::Date:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "DATE";
-    break;
+    return "DATE";
   case PDB_BuiltinType::Variant:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "VARIANT";
-    break;
+    return "VARIANT";
   case PDB_BuiltinType::Complex:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "complex";
-    break;
+    return "complex";
   case PDB_BuiltinType::Bitfield:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "bitfield";
-    break;
+    return "bitfield";
   case PDB_BuiltinType::BSTR:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "BSTR";
-    break;
+    return "BSTR";
   case PDB_BuiltinType::HResult:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "HRESULT";
-    break;
+    return "HRESULT";
   case PDB_BuiltinType::BCD:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "HRESULT";
-    break;
+    return "HRESULT";
   default:
-    WithColor(Printer, PDB_ColorItem::Type).get() << "void";
-    break;
+    return "void";
   }
 }
diff --git a/tools/llvm-pdbdump/BuiltinDumper.h b/tools/llvm-pdbdump/BuiltinDumper.h
index 8cf984a0ca61..ac666dbd059b 100644
--- a/tools/llvm-pdbdump/BuiltinDumper.h
+++ b/tools/llvm-pdbdump/BuiltinDumper.h
@@ -23,6 +23,8 @@ public:
   void start(const PDBSymbolTypeBuiltin &Symbol);
 
 private:
+  StringRef getTypeName(const PDBSymbolTypeBuiltin &Symbol);
+
   LinePrinter &Printer;
 };
 }
diff --git a/tools/llvm-pdbdump/LinePrinter.cpp b/tools/llvm-pdbdump/LinePrinter.cpp
index 6bbc403f5caf..a43727f02b5e 100644
--- a/tools/llvm-pdbdump/LinePrinter.cpp
+++ b/tools/llvm-pdbdump/LinePrinter.cpp
@@ -11,19 +11,49 @@
 
 #include "llvm-pdbdump.h"
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Regex.h"
 
 #include <algorithm>
 
+namespace {
+bool IsItemExcluded(llvm::StringRef Item,
+                    std::list<llvm::Regex> &IncludeFilters,
+                    std::list<llvm::Regex> &ExcludeFilters) {
+  if (Item.empty())
+    return false;
+
+  auto match_pred = [Item](llvm::Regex &R) { return R.match(Item); };
+
+  // Include takes priority over exclude.  If the user specified include
+  // filters, and none of them include this item, them item is gone.
+  if (!IncludeFilters.empty() && !any_of(IncludeFilters, match_pred))
+    return true;
+
+  if (any_of(ExcludeFilters, match_pred))
+    return true;
+
+  return false;
+}
+}
+
 using namespace llvm;
 
 LinePrinter::LinePrinter(int Indent, llvm::raw_ostream &Stream)
     : OS(Stream), IndentSpaces(Indent), CurrentIndent(0) {
-  SetFilters(TypeFilters, opts::ExcludeTypes.begin(), opts::ExcludeTypes.end());
-  SetFilters(SymbolFilters, opts::ExcludeSymbols.begin(),
+  SetFilters(ExcludeTypeFilters, opts::ExcludeTypes.begin(),
+             opts::ExcludeTypes.end());
+  SetFilters(ExcludeSymbolFilters, opts::ExcludeSymbols.begin(),
              opts::ExcludeSymbols.end());
-  SetFilters(CompilandFilters, opts::ExcludeCompilands.begin(),
+  SetFilters(ExcludeCompilandFilters, opts::ExcludeCompilands.begin(),
              opts::ExcludeCompilands.end());
+
+  SetFilters(IncludeTypeFilters, opts::IncludeTypes.begin(),
+             opts::IncludeTypes.end());
+  SetFilters(IncludeSymbolFilters, opts::IncludeSymbols.begin(),
+             opts::IncludeSymbols.end());
+  SetFilters(IncludeCompilandFilters, opts::IncludeCompilands.begin(),
+             opts::IncludeCompilands.end());
 }
 
 void LinePrinter::Indent() { CurrentIndent += IndentSpaces; }
@@ -38,87 +68,53 @@ void LinePrinter::NewLine() {
 }
 
 bool LinePrinter::IsTypeExcluded(llvm::StringRef TypeName) {
-  if (TypeName.empty())
-    return false;
-
-  for (auto &Expr : TypeFilters) {
-    if (Expr.match(TypeName))
-      return true;
-  }
-  return false;
+  return IsItemExcluded(TypeName, IncludeTypeFilters, ExcludeTypeFilters);
 }
 
 bool LinePrinter::IsSymbolExcluded(llvm::StringRef SymbolName) {
-  if (SymbolName.empty())
-    return false;
-
-  for (auto &Expr : SymbolFilters) {
-    if (Expr.match(SymbolName))
-      return true;
-  }
-  return false;
+  return IsItemExcluded(SymbolName, IncludeSymbolFilters, ExcludeSymbolFilters);
 }
 
 bool LinePrinter::IsCompilandExcluded(llvm::StringRef CompilandName) {
-  if (CompilandName.empty())
-    return false;
-
-  for (auto &Expr : CompilandFilters) {
-    if (Expr.match(CompilandName))
-      return true;
-  }
-  return false;
+  return IsItemExcluded(CompilandName, IncludeCompilandFilters,
+                        ExcludeCompilandFilters);
 }
 
 WithColor::WithColor(LinePrinter &P, PDB_ColorItem C) : OS(P.OS) {
-  if (C == PDB_ColorItem::None)
-    OS.resetColor();
-  else {
-    raw_ostream::Colors Color;
-    bool Bold;
-    translateColor(C, Color, Bold);
-    OS.changeColor(Color, Bold);
-  }
+  applyColor(C);
 }
 
 WithColor::~WithColor() { OS.resetColor(); }
 
-void WithColor::translateColor(PDB_ColorItem C, raw_ostream::Colors &Color,
-                               bool &Bold) const {
+void WithColor::applyColor(PDB_ColorItem C) {
   switch (C) {
+  case PDB_ColorItem::None:
+    OS.resetColor();
+    return;
   case PDB_ColorItem::Address:
-    Color = raw_ostream::YELLOW;
-    Bold = true;
+    OS.changeColor(raw_ostream::YELLOW, /*bold=*/true);
     return;
   case PDB_ColorItem::Keyword:
-    Color = raw_ostream::MAGENTA;
-    Bold = true;
+    OS.changeColor(raw_ostream::MAGENTA, true);
     return;
   case PDB_ColorItem::Register:
   case PDB_ColorItem::Offset:
-    Color = raw_ostream::YELLOW;
-    Bold = false;
+    OS.changeColor(raw_ostream::YELLOW, false);
     return;
   case PDB_ColorItem::Type:
-    Color = raw_ostream::CYAN;
-    Bold = true;
+    OS.changeColor(raw_ostream::CYAN, true);
     return;
   case PDB_ColorItem::Identifier:
-    Color = raw_ostream::CYAN;
-    Bold = false;
+    OS.changeColor(raw_ostream::CYAN, false);
     return;
   case PDB_ColorItem::Path:
-    Color = raw_ostream::CYAN;
-    Bold = false;
+    OS.changeColor(raw_ostream::CYAN, false);
     return;
   case PDB_ColorItem::SectionHeader:
-    Color = raw_ostream::RED;
-    Bold = true;
+    OS.changeColor(raw_ostream::RED, true);
     return;
   case PDB_ColorItem::LiteralValue:
-    Color = raw_ostream::GREEN;
-    Bold = true;
-  default:
+    OS.changeColor(raw_ostream::GREEN, true);
     return;
   }
 }
diff --git a/tools/llvm-pdbdump/LinePrinter.h b/tools/llvm-pdbdump/LinePrinter.h
index b985e9341909..b0a9d2cfc4e8 100644
--- a/tools/llvm-pdbdump/LinePrinter.h
+++ b/tools/llvm-pdbdump/LinePrinter.h
@@ -48,9 +48,13 @@ private:
   int IndentSpaces;
   int CurrentIndent;
 
-  std::list<Regex> CompilandFilters;
-  std::list<Regex> TypeFilters;
-  std::list<Regex> SymbolFilters;
+  std::list<Regex> ExcludeCompilandFilters;
+  std::list<Regex> ExcludeTypeFilters;
+  std::list<Regex> ExcludeSymbolFilters;
+
+  std::list<Regex> IncludeCompilandFilters;
+  std::list<Regex> IncludeTypeFilters;
+  std::list<Regex> IncludeSymbolFilters;
 };
 
 template <class T>
@@ -80,8 +84,7 @@ public:
   raw_ostream &get() { return OS; }
 
 private:
-  void translateColor(PDB_ColorItem C, raw_ostream::Colors &Color,
-                      bool &Bold) const;
+  void applyColor(PDB_ColorItem C);
   raw_ostream &OS;
 };
 }
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 4a4c64b80cc1..0e3f0b281fe0 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -22,6 +22,8 @@
 #include "VariableDumper.h"
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
@@ -38,12 +40,16 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
 
 #if defined(HAVE_DIA_SDK)
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
 #include <Windows.h>
 #endif
 
@@ -79,6 +85,17 @@ cl::opt<uint64_t> LoadAddress(
     cl::desc("Assume the module is loaded at the specified address"),
     cl::cat(OtherOptions));
 
+cl::opt<bool> DumpHeaders("dump-headers", cl::desc("dump PDB headers"),
+                          cl::cat(OtherOptions));
+cl::opt<bool> DumpStreamSizes("dump-stream-sizes",
+                              cl::desc("dump PDB stream sizes"),
+                              cl::cat(OtherOptions));
+cl::opt<bool> DumpStreamBlocks("dump-stream-blocks",
+                               cl::desc("dump PDB stream blocks"),
+                               cl::cat(OtherOptions));
+cl::opt<std::string> DumpStreamData("dump-stream", cl::desc("dump stream data"),
+                                    cl::cat(OtherOptions));
+
 cl::list<std::string>
     ExcludeTypes("exclude-types",
                  cl::desc("Exclude types by regular expression"),
@@ -91,6 +108,20 @@ cl::list<std::string>
     ExcludeCompilands("exclude-compilands",
                       cl::desc("Exclude compilands by regular expression"),
                       cl::ZeroOrMore, cl::cat(FilterCategory));
+
+cl::list<std::string> IncludeTypes(
+    "include-types",
+    cl::desc("Include only types which match a regular expression"),
+    cl::ZeroOrMore, cl::cat(FilterCategory));
+cl::list<std::string> IncludeSymbols(
+    "include-symbols",
+    cl::desc("Include only symbols which match a regular expression"),
+    cl::ZeroOrMore, cl::cat(FilterCategory));
+cl::list<std::string> IncludeCompilands(
+    "include-compilands",
+    cl::desc("Include only compilands those which match a regular expression"),
+    cl::ZeroOrMore, cl::cat(FilterCategory));
+
 cl::opt<bool> ExcludeCompilerGenerated(
     "no-compiler-generated",
     cl::desc("Don't show compiler generated types and symbols"),
@@ -107,10 +138,264 @@ cl::opt<bool> NoEnumDefs("no-enum-definitions",
                          cl::cat(FilterCategory));
 }
 
+
+static void reportError(StringRef Input, StringRef Message) {
+  if (Input == "-")
+    Input = "<stdin>";
+  errs() << Input << ": " << Message << "\n";
+  errs().flush();
+  exit(1);
+}
+
+static void reportError(StringRef Input, std::error_code EC) {
+  reportError(Input, EC.message());
+}
+
+static std::error_code checkOffset(MemoryBufferRef M, uintptr_t Addr,
+                                   const uint64_t Size) {
+  if (Addr + Size < Addr || Addr + Size < Size ||
+      Addr + Size > uintptr_t(M.getBufferEnd()) ||
+      Addr < uintptr_t(M.getBufferStart())) {
+    return std::make_error_code(std::errc::bad_address);
+  }
+  return std::error_code();
+}
+
+template <typename T>
+static std::error_code checkOffset(MemoryBufferRef M, ArrayRef<T> AR) {
+  return checkOffset(M, uintptr_t(AR.data()), (uint64_t)AR.size() * sizeof(T));
+}
+
+static std::error_code checkOffset(MemoryBufferRef M, StringRef SR) {
+  return checkOffset(M, uintptr_t(SR.data()), SR.size());
+}
+
+// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m.
+// Returns unexpected_eof if error.
+template <typename T>
+static std::error_code getObject(const T *&Obj, MemoryBufferRef M,
+                                 const void *Ptr,
+                                 const uint64_t Size = sizeof(T)) {
+  uintptr_t Addr = uintptr_t(Ptr);
+  if (std::error_code EC = checkOffset(M, Addr, Size))
+    return EC;
+  Obj = reinterpret_cast<const T *>(Addr);
+  return std::error_code();
+}
+
+static uint64_t bytesToBlocks(uint64_t NumBytes, uint64_t BlockSize) {
+  return RoundUpToAlignment(NumBytes, BlockSize) / BlockSize;
+}
+
+static uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) {
+  return BlockNumber * BlockSize;
+}
+
+static void dumpStructure(MemoryBufferRef M) {
+  const PDB::SuperBlock *SB;
+  if (auto EC = getObject(SB, M, M.getBufferStart()))
+    reportError(M.getBufferIdentifier(), EC);
+
+  if (opts::DumpHeaders) {
+    outs() << "BlockSize: " << SB->BlockSize << '\n';
+    outs() << "Unknown0: " << SB->Unknown0 << '\n';
+    outs() << "NumBlocks: " << SB->NumBlocks << '\n';
+    outs() << "NumDirectoryBytes: " << SB->NumDirectoryBytes << '\n';
+    outs() << "Unknown1: " << SB->Unknown1 << '\n';
+    outs() << "BlockMapAddr: " << SB->BlockMapAddr << '\n';
+  }
+
+  // We don't support blocksizes which aren't a multiple of four bytes.
+  if (SB->BlockSize % sizeof(support::ulittle32_t) != 0)
+    reportError(M.getBufferIdentifier(),
+                std::make_error_code(std::errc::illegal_byte_sequence));
+
+  // We don't support directories whose sizes aren't a multiple of four bytes.
+  if (SB->NumDirectoryBytes % sizeof(support::ulittle32_t) != 0)
+    reportError(M.getBufferIdentifier(),
+                std::make_error_code(std::errc::illegal_byte_sequence));
+
+  // The number of blocks which comprise the directory is a simple function of
+  // the number of bytes it contains.
+  uint64_t NumDirectoryBlocks =
+      bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize);
+  if (opts::DumpHeaders)
+    outs() << "NumDirectoryBlocks: " << NumDirectoryBlocks << '\n';
+
+  // The block map, as we understand it, is a block which consists of a list of
+  // block numbers.
+  // It is unclear what would happen if the number of blocks couldn't fit on a
+  // single block.
+  if (NumDirectoryBlocks > SB->BlockSize / sizeof(support::ulittle32_t))
+    reportError(M.getBufferIdentifier(),
+                std::make_error_code(std::errc::illegal_byte_sequence));
+
+
+  uint64_t BlockMapOffset = (uint64_t)SB->BlockMapAddr * SB->BlockSize;
+  if (opts::DumpHeaders)
+    outs() << "BlockMapOffset: " << BlockMapOffset << '\n';
+
+  // The directory is not contiguous.  Instead, the block map contains a
+  // contiguous list of block numbers whose contents, when concatenated in
+  // order, make up the directory.
+  auto DirectoryBlocks =
+      makeArrayRef(reinterpret_cast<const support::ulittle32_t *>(
+                       M.getBufferStart() + BlockMapOffset),
+                   NumDirectoryBlocks);
+  if (auto EC = checkOffset(M, DirectoryBlocks))
+    reportError(M.getBufferIdentifier(), EC);
+
+  if (opts::DumpHeaders) {
+    outs() << "DirectoryBlocks: [";
+    for (const support::ulittle32_t &DirectoryBlockAddr : DirectoryBlocks) {
+      if (&DirectoryBlockAddr != &DirectoryBlocks.front())
+        outs() << ", ";
+      outs() << DirectoryBlockAddr;
+    }
+    outs() << "]\n";
+  }
+
+  bool SeenNumStreams = false;
+  uint32_t NumStreams = 0;
+  std::vector<uint32_t> StreamSizes;
+  DenseMap<uint32_t, std::vector<uint32_t>> StreamMap;
+  uint32_t StreamIdx = 0;
+  uint64_t DirectoryBytesRead = 0;
+  // The structure of the directory is as follows:
+  //    struct PDBDirectory {
+  //      uint32_t NumStreams;
+  //      uint32_t StreamSizes[NumStreams];
+  //      uint32_t StreamMap[NumStreams][];
+  //    };
+  //
+  //  Empty streams don't consume entries in the StreamMap.
+  for (uint32_t DirectoryBlockAddr : DirectoryBlocks) {
+    uint64_t DirectoryBlockOffset =
+        blockToOffset(DirectoryBlockAddr, SB->BlockSize);
+    auto DirectoryBlock =
+        makeArrayRef(reinterpret_cast<const support::ulittle32_t *>(
+                         M.getBufferStart() + DirectoryBlockOffset),
+                     SB->BlockSize / sizeof(support::ulittle32_t));
+    if (auto EC = checkOffset(M, DirectoryBlock))
+      reportError(M.getBufferIdentifier(), EC);
+
+    // We read data out of the directory four bytes at a time.  Depending on
+    // where we are in the directory, the contents may be: the number of streams
+    // in the directory, a stream's size, or a block in the stream map.
+    for (uint32_t Data : DirectoryBlock) {
+      // Don't read beyond the end of the directory.
+      if (DirectoryBytesRead == SB->NumDirectoryBytes)
+        break;
+
+      DirectoryBytesRead += sizeof(Data);
+
+      // This data must be the number of streams if we haven't seen it yet.
+      if (!SeenNumStreams) {
+        NumStreams = Data;
+        SeenNumStreams = true;
+        continue;
+      }
+      // This data must be a stream size if we have not seen them all yet.
+      if (StreamSizes.size() < NumStreams) {
+        // It seems like some streams have their set to -1 when their contents
+        // are not present.  Treat them like empty streams for now.
+        if (Data == UINT32_MAX)
+          StreamSizes.push_back(0);
+        else
+          StreamSizes.push_back(Data);
+        continue;
+      }
+
+      // This data must be a stream block number if we have seen all of the
+      // stream sizes.
+      std::vector<uint32_t> *StreamBlocks = nullptr;
+      // Figure out which stream this block number belongs to.
+      while (StreamIdx < NumStreams) {
+        uint64_t NumExpectedStreamBlocks =
+            bytesToBlocks(StreamSizes[StreamIdx], SB->BlockSize);
+        StreamBlocks = &StreamMap[StreamIdx];
+        if (NumExpectedStreamBlocks > StreamBlocks->size())
+          break;
+        ++StreamIdx;
+      }
+      // It seems this block doesn't belong to any stream?  The stream is either
+      // corrupt or something more mysterious is going on.
+      if (StreamIdx == NumStreams)
+        reportError(M.getBufferIdentifier(),
+                    std::make_error_code(std::errc::illegal_byte_sequence));
+
+      StreamBlocks->push_back(Data);
+    }
+  }
+
+  // We should have read exactly SB->NumDirectoryBytes bytes.
+  assert(DirectoryBytesRead == SB->NumDirectoryBytes);
+
+  if (opts::DumpHeaders)
+    outs() << "NumStreams: " << NumStreams << '\n';
+  if (opts::DumpStreamSizes)
+    for (uint32_t StreamIdx = 0; StreamIdx < NumStreams; ++StreamIdx)
+      outs() << "StreamSizes[" << StreamIdx << "]: " << StreamSizes[StreamIdx]
+             << '\n';
+
+  if (opts::DumpStreamBlocks) {
+    for (uint32_t StreamIdx = 0; StreamIdx < NumStreams; ++StreamIdx) {
+      outs() << "StreamBlocks[" << StreamIdx << "]: [";
+      std::vector<uint32_t> &StreamBlocks = StreamMap[StreamIdx];
+      for (uint32_t &StreamBlock : StreamBlocks) {
+        if (&StreamBlock != &StreamBlocks.front())
+          outs() << ", ";
+        outs() << StreamBlock;
+      }
+      outs() << "]\n";
+    }
+  }
+
+  StringRef DumpStreamStr = opts::DumpStreamData;
+  uint32_t DumpStreamNum;
+  if (!DumpStreamStr.getAsInteger(/*Radix=*/0U, DumpStreamNum) &&
+      DumpStreamNum < NumStreams) {
+    uint32_t StreamBytesRead = 0;
+    uint32_t StreamSize = StreamSizes[DumpStreamNum];
+    std::vector<uint32_t> &StreamBlocks = StreamMap[DumpStreamNum];
+    for (uint32_t &StreamBlockAddr : StreamBlocks) {
+      uint64_t StreamBlockOffset = blockToOffset(StreamBlockAddr, SB->BlockSize);
+      uint32_t BytesLeftToReadInStream = StreamSize - StreamBytesRead;
+      if (BytesLeftToReadInStream == 0)
+        break;
+
+      uint32_t BytesToReadInBlock = std::min(
+          BytesLeftToReadInStream, static_cast<uint32_t>(SB->BlockSize));
+      auto StreamBlockData =
+          StringRef(M.getBufferStart() + StreamBlockOffset, BytesToReadInBlock);
+      if (auto EC = checkOffset(M, StreamBlockData))
+        reportError(M.getBufferIdentifier(), EC);
+
+      outs() << StreamBlockData;
+      StreamBytesRead += StreamBlockData.size();
+    }
+  }
+}
+
 static void dumpInput(StringRef Path) {
+  if (opts::DumpHeaders || !opts::DumpStreamData.empty()) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
+        MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
+                                     /*RequiresNullTerminator=*/false);
+
+    if (std::error_code EC = ErrorOrBuffer.getError())
+      reportError(Path, EC);
+
+    std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
+
+    dumpStructure(Buffer->getMemBufferRef());
+
+    outs().flush();
+    return;
+  }
+
   std::unique_ptr<IPDBSession> Session;
-  PDB_ErrorCode Error =
-      llvm::loadDataForPDB(PDB_ReaderType::DIA, Path, Session);
+  PDB_ErrorCode Error = loadDataForPDB(PDB_ReaderType::DIA, Path, Session);
   switch (Error) {
   case PDB_ErrorCode::Success:
     break;
@@ -145,7 +430,7 @@ static void dumpInput(StringRef Path) {
 
   Printer.NewLine();
   WithColor(Printer, PDB_ColorItem::Identifier).get() << "Size";
-  if (!llvm::sys::fs::file_size(FileName, FileSize)) {
+  if (!sys::fs::file_size(FileName, FileSize)) {
     Printer << ": " << FileSize << " bytes";
   } else {
     Printer << ": (Unable to obtain file size)";
@@ -242,11 +527,11 @@ int main(int argc_, const char *argv_[]) {
   PrettyStackTraceProgram X(argc_, argv_);
 
   SmallVector<const char *, 256> argv;
-  llvm::SpecificBumpPtrAllocator<char> ArgAllocator;
-  std::error_code EC = llvm::sys::Process::GetArgumentVector(
-      argv, llvm::makeArrayRef(argv_, argc_), ArgAllocator);
+  SpecificBumpPtrAllocator<char> ArgAllocator;
+  std::error_code EC = sys::Process::GetArgumentVector(
+      argv, makeArrayRef(argv_, argc_), ArgAllocator);
   if (EC) {
-    llvm::errs() << "error: couldn't get arguments: " << EC.message() << '\n';
+    errs() << "error: couldn't get arguments: " << EC.message() << '\n';
     return 1;
   }
 
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.h b/tools/llvm-pdbdump/llvm-pdbdump.h
index 586a9ea374ed..cb5bec64dec8 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.h
+++ b/tools/llvm-pdbdump/llvm-pdbdump.h
@@ -27,6 +27,9 @@ extern llvm::cl::opt<bool> NoEnumDefs;
 extern llvm::cl::list<std::string> ExcludeTypes;
 extern llvm::cl::list<std::string> ExcludeSymbols;
 extern llvm::cl::list<std::string> ExcludeCompilands;
+extern llvm::cl::list<std::string> IncludeTypes;
+extern llvm::cl::list<std::string> IncludeSymbols;
+extern llvm::cl::list<std::string> IncludeCompilands;
 }
 
 #endif
\ No newline at end of file
diff --git a/tools/llvm-profdata/llvm-profdata.cpp b/tools/llvm-profdata/llvm-profdata.cpp
index 6fb48d8fad58..dc6cd0a5784d 100644
--- a/tools/llvm-profdata/llvm-profdata.cpp
+++ b/tools/llvm-profdata/llvm-profdata.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/ProfileData/InstrProfReader.h"
@@ -18,6 +20,7 @@
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/ProfileData/SampleProfWriter.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -26,67 +29,150 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <tuple>
 
 using namespace llvm;
 
-static void exitWithError(const Twine &Message, StringRef Whence = "") {
+enum ProfileFormat { PF_None = 0, PF_Text, PF_Binary, PF_GCC };
+
+static void exitWithError(const Twine &Message, StringRef Whence = "",
+                          StringRef Hint = "") {
   errs() << "error: ";
   if (!Whence.empty())
     errs() << Whence << ": ";
   errs() << Message << "\n";
+  if (!Hint.empty())
+    errs() << Hint << "\n";
   ::exit(1);
 }
 
+static void exitWithErrorCode(const std::error_code &Error,
+                              StringRef Whence = "") {
+  if (Error.category() == instrprof_category()) {
+    instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+    if (instrError == instrprof_error::unrecognized_format) {
+      // Hint for common error of forgetting -sample for sample profiles.
+      exitWithError(Error.message(), Whence,
+                    "Perhaps you forgot to use the -sample option?");
+    }
+  }
+  exitWithError(Error.message(), Whence);
+}
+
 namespace {
 enum ProfileKinds { instr, sample };
 }
 
-static void mergeInstrProfile(const cl::list<std::string> &Inputs,
-                              StringRef OutputFilename) {
+static void handleMergeWriterError(std::error_code &Error,
+                                   StringRef WhenceFile = "",
+                                   StringRef WhenceFunction = "",
+                                   bool ShowHint = true) {
+  if (!WhenceFile.empty())
+    errs() << WhenceFile << ": ";
+  if (!WhenceFunction.empty())
+    errs() << WhenceFunction << ": ";
+  errs() << Error.message() << "\n";
+
+  if (ShowHint) {
+    StringRef Hint = "";
+    if (Error.category() == instrprof_category()) {
+      instrprof_error instrError = static_cast<instrprof_error>(Error.value());
+      switch (instrError) {
+      case instrprof_error::hash_mismatch:
+      case instrprof_error::count_mismatch:
+      case instrprof_error::value_site_count_mismatch:
+        Hint = "Make sure that all profile data to be merged is generated "
+               "from the same binary.";
+        break;
+      default:
+        break;
+      }
+    }
+
+    if (!Hint.empty())
+      errs() << Hint << "\n";
+  }
+}
+
+struct WeightedFile {
+  StringRef Filename;
+  uint64_t Weight;
+
+  WeightedFile() {}
+
+  WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
+};
+typedef SmallVector<WeightedFile, 5> WeightedFileVector;
+
+static void mergeInstrProfile(const WeightedFileVector &Inputs,
+                              StringRef OutputFilename,
+                              ProfileFormat OutputFormat) {
   if (OutputFilename.compare("-") == 0)
     exitWithError("Cannot write indexed profdata format to stdout.");
 
+  if (OutputFormat != PF_Binary && OutputFormat != PF_Text)
+    exitWithError("Unknown format is specified.");
+
   std::error_code EC;
   raw_fd_ostream Output(OutputFilename.data(), EC, sys::fs::F_None);
   if (EC)
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   InstrProfWriter Writer;
-  for (const auto &Filename : Inputs) {
-    auto ReaderOrErr = InstrProfReader::create(Filename);
+  SmallSet<std::error_code, 4> WriterErrorCodes;
+  for (const auto &Input : Inputs) {
+    auto ReaderOrErr = InstrProfReader::create(Input.Filename);
     if (std::error_code ec = ReaderOrErr.getError())
-      exitWithError(ec.message(), Filename);
+      exitWithErrorCode(ec, Input.Filename);
 
     auto Reader = std::move(ReaderOrErr.get());
-    for (const auto &I : *Reader)
-      if (std::error_code EC =
-              Writer.addFunctionCounts(I.Name, I.Hash, I.Counts))
-        errs() << Filename << ": " << I.Name << ": " << EC.message() << "\n";
+    for (auto &I : *Reader) {
+      if (std::error_code EC = Writer.addRecord(std::move(I), Input.Weight)) {
+        // Only show hint the first time an error occurs.
+        bool firstTime = WriterErrorCodes.insert(EC).second;
+        handleMergeWriterError(EC, Input.Filename, I.Name, firstTime);
+      }
+    }
     if (Reader->hasError())
-      exitWithError(Reader->getError().message(), Filename);
+      exitWithErrorCode(Reader->getError(), Input.Filename);
   }
-  Writer.write(Output);
+  if (OutputFormat == PF_Text)
+    Writer.writeText(Output);
+  else
+    Writer.write(Output);
 }
 
-static void mergeSampleProfile(const cl::list<std::string> &Inputs,
+static sampleprof::SampleProfileFormat FormatMap[] = {
+    sampleprof::SPF_None, sampleprof::SPF_Text, sampleprof::SPF_Binary,
+    sampleprof::SPF_GCC};
+
+static void mergeSampleProfile(const WeightedFileVector &Inputs,
                                StringRef OutputFilename,
-                               sampleprof::SampleProfileFormat OutputFormat) {
+                               ProfileFormat OutputFormat) {
   using namespace sampleprof;
-  auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
+  auto WriterOrErr =
+      SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
   if (std::error_code EC = WriterOrErr.getError())
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   auto Writer = std::move(WriterOrErr.get());
   StringMap<FunctionSamples> ProfileMap;
-  for (const auto &Filename : Inputs) {
+  SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
+  for (const auto &Input : Inputs) {
     auto ReaderOrErr =
-        SampleProfileReader::create(Filename, getGlobalContext());
+        SampleProfileReader::create(Input.Filename, getGlobalContext());
     if (std::error_code EC = ReaderOrErr.getError())
-      exitWithError(EC.message(), Filename);
+      exitWithErrorCode(EC, Input.Filename);
 
-    auto Reader = std::move(ReaderOrErr.get());
+    // We need to keep the readers around until after all the files are
+    // read so that we do not lose the function names stored in each
+    // reader's memory. The function names are needed to write out the
+    // merged profile map.
+    Readers.push_back(std::move(ReaderOrErr.get()));
+    const auto Reader = Readers.back().get();
     if (std::error_code EC = Reader->read())
-      exitWithError(EC.message(), Filename);
+      exitWithErrorCode(EC, Input.Filename);
 
     StringMap<FunctionSamples> &Profiles = Reader->getProfiles();
     for (StringMap<FunctionSamples>::iterator I = Profiles.begin(),
@@ -94,16 +180,36 @@ static void mergeSampleProfile(const cl::list<std::string> &Inputs,
          I != E; ++I) {
       StringRef FName = I->first();
       FunctionSamples &Samples = I->second;
-      ProfileMap[FName].merge(Samples);
+      sampleprof_error Result = ProfileMap[FName].merge(Samples, Input.Weight);
+      if (Result != sampleprof_error::success) {
+        std::error_code EC = make_error_code(Result);
+        handleMergeWriterError(EC, Input.Filename, FName);
+      }
     }
   }
   Writer->write(ProfileMap);
 }
 
-static int merge_main(int argc, const char *argv[]) {
-  cl::list<std::string> Inputs(cl::Positional, cl::Required, cl::OneOrMore,
-                               cl::desc("<filenames...>"));
+static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
+  StringRef WeightStr, FileName;
+  std::tie(WeightStr, FileName) = WeightedFilename.split(',');
 
+  uint64_t Weight;
+  if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
+    exitWithError("Input weight must be a positive integer.");
+
+  if (!sys::fs::exists(FileName))
+    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
+                      FileName);
+
+  return WeightedFile(FileName, Weight);
+}
+
+static int merge_main(int argc, const char *argv[]) {
+  cl::list<std::string> InputFilenames(cl::Positional,
+                                       cl::desc("<filename...>"));
+  cl::list<std::string> WeightedInputFilenames("weighted-input",
+                                               cl::desc("<weight>,<filename>"));
   cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
                                       cl::init("-"), cl::Required,
                                       cl::desc("Output file"));
@@ -114,31 +220,41 @@ static int merge_main(int argc, const char *argv[]) {
       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
                  clEnumVal(sample, "Sample profile"), clEnumValEnd));
 
-  cl::opt<sampleprof::SampleProfileFormat> OutputFormat(
-      cl::desc("Format of output profile (only meaningful with --sample)"),
-      cl::init(sampleprof::SPF_Binary),
-      cl::values(clEnumValN(sampleprof::SPF_Binary, "binary",
-                            "Binary encoding (default)"),
-                 clEnumValN(sampleprof::SPF_Text, "text", "Text encoding"),
-                 clEnumValN(sampleprof::SPF_GCC, "gcc", "GCC encoding"),
+  cl::opt<ProfileFormat> OutputFormat(
+      cl::desc("Format of output profile"), cl::init(PF_Binary),
+      cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
+                 clEnumValN(PF_Text, "text", "Text encoding"),
+                 clEnumValN(PF_GCC, "gcc",
+                            "GCC encoding (only meaningful for -sample)"),
                  clEnumValEnd));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
+  if (InputFilenames.empty() && WeightedInputFilenames.empty())
+    exitWithError("No input files specified. See " +
+                  sys::path::filename(argv[0]) + " -help");
+
+  WeightedFileVector WeightedInputs;
+  for (StringRef Filename : InputFilenames)
+    WeightedInputs.push_back(WeightedFile(Filename, 1));
+  for (StringRef WeightedFilename : WeightedInputFilenames)
+    WeightedInputs.push_back(parseWeightedFile(WeightedFilename));
+
   if (ProfileKind == instr)
-    mergeInstrProfile(Inputs, OutputFilename);
+    mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat);
   else
-    mergeSampleProfile(Inputs, OutputFilename, OutputFormat);
+    mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
 
   return 0;
 }
 
 static int showInstrProfile(std::string Filename, bool ShowCounts,
-                            bool ShowAllFunctions, std::string ShowFunction,
+                            bool ShowIndirectCallTargets, bool ShowAllFunctions,
+                            std::string ShowFunction, bool TextFormat,
                             raw_fd_ostream &OS) {
   auto ReaderOrErr = InstrProfReader::create(Filename);
   if (std::error_code EC = ReaderOrErr.getError())
-    exitWithError(EC.message(), Filename);
+    exitWithErrorCode(EC, Filename);
 
   auto Reader = std::move(ReaderOrErr.get());
   uint64_t MaxFunctionCount = 0, MaxBlockCount = 0;
@@ -148,35 +264,71 @@ static int showInstrProfile(std::string Filename, bool ShowCounts,
         ShowAllFunctions || (!ShowFunction.empty() &&
                              Func.Name.find(ShowFunction) != Func.Name.npos);
 
+    bool doTextFormatDump = (Show && ShowCounts && TextFormat);
+
+    if (doTextFormatDump) {
+      InstrProfSymtab &Symtab = Reader->getSymtab();
+      InstrProfWriter::writeRecordInText(Func, Symtab, OS);
+      continue;
+    }
+
     ++TotalFunctions;
     assert(Func.Counts.size() > 0 && "function missing entry counter");
     if (Func.Counts[0] > MaxFunctionCount)
       MaxFunctionCount = Func.Counts[0];
 
+    for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+      if (Func.Counts[I] > MaxBlockCount)
+        MaxBlockCount = Func.Counts[I];
+    }
+
     if (Show) {
+
       if (!ShownFunctions)
         OS << "Counters:\n";
+
       ++ShownFunctions;
 
       OS << "  " << Func.Name << ":\n"
          << "    Hash: " << format("0x%016" PRIx64, Func.Hash) << "\n"
          << "    Counters: " << Func.Counts.size() << "\n"
          << "    Function count: " << Func.Counts[0] << "\n";
-    }
 
-    if (Show && ShowCounts)
-      OS << "    Block counts: [";
-    for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
-      if (Func.Counts[I] > MaxBlockCount)
-        MaxBlockCount = Func.Counts[I];
-      if (Show && ShowCounts)
-        OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+      if (ShowIndirectCallTargets)
+        OS << "    Indirect Call Site Count: "
+           << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n";
+
+      if (ShowCounts) {
+        OS << "    Block counts: [";
+        for (size_t I = 1, E = Func.Counts.size(); I < E; ++I) {
+          OS << (I == 1 ? "" : ", ") << Func.Counts[I];
+        }
+        OS << "]\n";
+      }
+
+      if (ShowIndirectCallTargets) {
+        InstrProfSymtab &Symtab = Reader->getSymtab();
+        uint32_t NS = Func.getNumValueSites(IPVK_IndirectCallTarget);
+        OS << "    Indirect Target Results: \n";
+        for (size_t I = 0; I < NS; ++I) {
+          uint32_t NV = Func.getNumValueDataForSite(IPVK_IndirectCallTarget, I);
+          std::unique_ptr<InstrProfValueData[]> VD =
+              Func.getValueForSite(IPVK_IndirectCallTarget, I);
+          for (uint32_t V = 0; V < NV; V++) {
+            OS << "\t[ " << I << ", ";
+            OS << Symtab.getFuncName(VD[V].Value) << ", " << VD[V].Count
+               << " ]\n";
+          }
+        }
+      }
     }
-    if (Show && ShowCounts)
-      OS << "]\n";
   }
+
   if (Reader->hasError())
-    exitWithError(Reader->getError().message(), Filename);
+    exitWithErrorCode(Reader->getError(), Filename);
+
+  if (ShowCounts && TextFormat)
+    return 0;
 
   if (ShowAllFunctions || !ShowFunction.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";
@@ -192,10 +344,12 @@ static int showSampleProfile(std::string Filename, bool ShowCounts,
   using namespace sampleprof;
   auto ReaderOrErr = SampleProfileReader::create(Filename, getGlobalContext());
   if (std::error_code EC = ReaderOrErr.getError())
-    exitWithError(EC.message(), Filename);
+    exitWithErrorCode(EC, Filename);
 
   auto Reader = std::move(ReaderOrErr.get());
-  Reader->read();
+  if (std::error_code EC = Reader->read())
+    exitWithErrorCode(EC, Filename);
+
   if (ShowAllFunctions || ShowFunction.empty())
     Reader->dump(OS);
   else
@@ -210,6 +364,12 @@ static int show_main(int argc, const char *argv[]) {
 
   cl::opt<bool> ShowCounts("counts", cl::init(false),
                            cl::desc("Show counter values for shown functions"));
+  cl::opt<bool> TextFormat(
+      "text", cl::init(false),
+      cl::desc("Show instr profile data in text dump format"));
+  cl::opt<bool> ShowIndirectCallTargets(
+      "ic-targets", cl::init(false),
+      cl::desc("Show indirect call site target values for shown functions"));
   cl::opt<bool> ShowAllFunctions("all-functions", cl::init(false),
                                  cl::desc("Details for every function"));
   cl::opt<std::string> ShowFunction("function",
@@ -232,14 +392,14 @@ static int show_main(int argc, const char *argv[]) {
   std::error_code EC;
   raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::F_Text);
   if (EC)
-    exitWithError(EC.message(), OutputFilename);
+    exitWithErrorCode(EC, OutputFilename);
 
   if (ShowAllFunctions && !ShowFunction.empty())
     errs() << "warning: -function argument ignored: showing all functions\n";
 
   if (ProfileKind == instr)
-    return showInstrProfile(Filename, ShowCounts, ShowAllFunctions,
-                            ShowFunction, OS);
+    return showInstrProfile(Filename, ShowCounts, ShowIndirectCallTargets,
+                            ShowAllFunctions, ShowFunction, TextFormat, OS);
   else
     return showSampleProfile(Filename, ShowCounts, ShowAllFunctions,
                              ShowFunction, OS);
@@ -266,8 +426,7 @@ int main(int argc, const char *argv[]) {
       return func(argc - 1, argv + 1);
     }
 
-    if (strcmp(argv[1], "-h") == 0 ||
-        strcmp(argv[1], "-help") == 0 ||
+    if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0 ||
         strcmp(argv[1], "--help") == 0) {
 
       errs() << "OVERVIEW: LLVM profile data tools\n\n"
diff --git a/tools/llvm-readobj/ARMAttributeParser.cpp b/tools/llvm-readobj/ARMAttributeParser.cpp
index e2d71912a21e..688d349d7ec0 100644
--- a/tools/llvm-readobj/ARMAttributeParser.cpp
+++ b/tools/llvm-readobj/ARMAttributeParser.cpp
@@ -118,7 +118,7 @@ void ARMAttributeParser::PrintAttribute(unsigned Tag, unsigned Value,
 
 void ARMAttributeParser::CPU_arch(AttrType Tag, const uint8_t *Data,
                                   uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Pre-v4", "ARM v4", "ARM v4T", "ARM v5T", "ARM v5TE", "ARM v5TEJ", "ARM v6",
     "ARM v6KZ", "ARM v6T2", "ARM v6K", "ARM v7", "ARM v6-M", "ARM v6S-M",
     "ARM v7E-M", "ARM v8"
@@ -149,7 +149,7 @@ void ARMAttributeParser::CPU_arch_profile(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data,
                                      uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "Permitted" };
+  static const char *const Strings[] = { "Not Permitted", "Permitted" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -159,7 +159,7 @@ void ARMAttributeParser::ARM_ISA_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data,
                                        uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" };
+  static const char *const Strings[] = { "Not Permitted", "Thumb-1", "Thumb-2" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -169,7 +169,7 @@ void ARMAttributeParser::THUMB_ISA_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data,
                                  uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "VFPv1", "VFPv2", "VFPv3", "VFPv3-D16", "VFPv4",
     "VFPv4-D16", "ARMv8-a FP", "ARMv8-a FP-D16"
   };
@@ -182,7 +182,7 @@ void ARMAttributeParser::FP_arch(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data,
                                    uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" };
+  static const char *const Strings[] = { "Not Permitted", "WMMXv1", "WMMXv2" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -192,8 +192,8 @@ void ARMAttributeParser::WMMX_arch(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
                                             uint32_t &Offset) {
-  static const char *Strings[] = {
-    "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON"
+  static const char *const Strings[] = {
+    "Not Permitted", "NEONv1", "NEONv2+FMA", "ARMv8-a NEON", "ARMv8.1-a NEON"
   };
 
   uint64_t Value = ParseInteger(Data, Offset);
@@ -204,7 +204,7 @@ void ARMAttributeParser::Advanced_SIMD_arch(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
                                     uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "None", "Bare Platform", "Linux Application", "Linux DSO", "Palm OS 2004",
     "Reserved (Palm OS)", "Symbian OS 2004", "Reserved (Symbian OS)"
   };
@@ -217,7 +217,7 @@ void ARMAttributeParser::PCS_config(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data,
                                         uint32_t &Offset) {
-  static const char *Strings[] = { "v6", "Static Base", "TLS", "Unused" };
+  static const char *const Strings[] = { "v6", "Static Base", "TLS", "Unused" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -227,7 +227,7 @@ void ARMAttributeParser::ABI_PCS_R9_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Absolute", "PC-relative", "SB-relative", "Not Permitted"
   };
 
@@ -239,7 +239,9 @@ void ARMAttributeParser::ABI_PCS_RW_data(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "Absolute", "PC-relative", "Not Permitted" };
+  static const char *const Strings[] = {
+    "Absolute", "PC-relative", "Not Permitted"
+  };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -249,7 +251,9 @@ void ARMAttributeParser::ABI_PCS_RO_data(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "Direct", "GOT-Indirect" };
+  static const char *const Strings[] = {
+    "Not Permitted", "Direct", "GOT-Indirect"
+  };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -259,7 +263,7 @@ void ARMAttributeParser::ABI_PCS_GOT_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "Unknown", "2-byte", "Unknown", "4-byte"
   };
 
@@ -271,7 +275,7 @@ void ARMAttributeParser::ABI_PCS_wchar_t(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "IEEE-754", "Runtime" };
+  static const char *const Strings[] = { "IEEE-754", "Runtime" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -281,7 +285,9 @@ void ARMAttributeParser::ABI_FP_rounding(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "Unsupported", "IEEE-754", "Sign Only" };
+  static const char *const Strings[] = {
+    "Unsupported", "IEEE-754", "Sign Only"
+  };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -291,7 +297,7 @@ void ARMAttributeParser::ABI_FP_denormal(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data,
                                            uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "IEEE-754" };
+  static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -302,7 +308,7 @@ void ARMAttributeParser::ABI_FP_exceptions(AttrType Tag, const uint8_t *Data,
 void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag,
                                                 const uint8_t *Data,
                                                 uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "IEEE-754" };
+  static const char *const Strings[] = { "Not Permitted", "IEEE-754" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -312,7 +318,7 @@ void ARMAttributeParser::ABI_FP_user_exceptions(AttrType Tag,
 
 void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data,
                                              uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "Finite Only", "RTABI", "IEEE-754"
   };
 
@@ -324,7 +330,7 @@ void ARMAttributeParser::ABI_FP_number_model(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data,
                                           uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "8-byte alignment", "4-byte alignment", "Reserved"
   };
 
@@ -344,7 +350,7 @@ void ARMAttributeParser::ABI_align_needed(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data,
                                              uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Required", "8-byte data alignment", "8-byte data and code alignment",
     "Reserved"
   };
@@ -365,7 +371,7 @@ void ARMAttributeParser::ABI_align_preserved(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data,
                                        uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "Packed", "Int32", "External Int32"
   };
 
@@ -377,7 +383,7 @@ void ARMAttributeParser::ABI_enum_size(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data,
                                         uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Tag_FP_arch", "Single-Precision", "Reserved", "Tag_FP_arch (deprecated)"
   };
 
@@ -389,7 +395,7 @@ void ARMAttributeParser::ABI_HardFP_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data,
                                       uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "AAPCS", "AAPCS VFP", "Custom", "Not Permitted"
   };
 
@@ -401,7 +407,7 @@ void ARMAttributeParser::ABI_VFP_args(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data,
                                        uint32_t &Offset) {
-  static const char *Strings[] = { "AAPCS", "iWMMX", "Custom" };
+  static const char *const Strings[] = { "AAPCS", "iWMMX", "Custom" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -412,7 +418,7 @@ void ARMAttributeParser::ABI_WMMX_args(AttrType Tag, const uint8_t *Data,
 void ARMAttributeParser::ABI_optimization_goals(AttrType Tag,
                                                 const uint8_t *Data,
                                                 uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Debugging",
     "Best Debugging"
   };
@@ -426,7 +432,7 @@ void ARMAttributeParser::ABI_optimization_goals(AttrType Tag,
 void ARMAttributeParser::ABI_FP_optimization_goals(AttrType Tag,
                                                    const uint8_t *Data,
                                                    uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "None", "Speed", "Aggressive Speed", "Size", "Aggressive Size", "Accuracy",
     "Best Accuracy"
   };
@@ -461,7 +467,7 @@ void ARMAttributeParser::compatibility(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data,
                                               uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "v6-style" };
+  static const char *const Strings[] = { "Not Permitted", "v6-style" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -471,7 +477,7 @@ void ARMAttributeParser::CPU_unaligned_access(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "If Available", "Permitted" };
+  static const char *const Strings[] = { "If Available", "Permitted" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -481,7 +487,7 @@ void ARMAttributeParser::FP_HP_extension(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data,
                                              uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" };
+  static const char *const Strings[] = { "Not Permitted", "IEEE-754", "VFPv3" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -491,7 +497,7 @@ void ARMAttributeParser::ABI_FP_16bit_format(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data,
                                          uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "Permitted" };
+  static const char *const Strings[] = { "Not Permitted", "Permitted" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -501,7 +507,7 @@ void ARMAttributeParser::MPextension_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data,
                                  uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "If Available", "Not Permitted", "Permitted"
   };
 
@@ -513,7 +519,7 @@ void ARMAttributeParser::DIV_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data,
                                   uint32_t &Offset) {
-  static const char *Strings[] = { "Not Permitted", "Permitted" };
+  static const char *const Strings[] = { "Not Permitted", "Permitted" };
 
   uint64_t Value = ParseInteger(Data, Offset);
   StringRef ValueDesc =
@@ -523,7 +529,7 @@ void ARMAttributeParser::T2EE_use(AttrType Tag, const uint8_t *Data,
 
 void ARMAttributeParser::Virtualization_use(AttrType Tag, const uint8_t *Data,
                                             uint32_t &Offset) {
-  static const char *Strings[] = {
+  static const char *const Strings[] = {
     "Not Permitted", "TrustZone", "Virtualization Extensions",
     "TrustZone + Virtualization Extensions"
   };
diff --git a/tools/llvm-readobj/ARMEHABIPrinter.h b/tools/llvm-readobj/ARMEHABIPrinter.h
index dd2490d503db..beb5fd4ea042 100644
--- a/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -305,13 +305,15 @@ void OpcodeDecoder::Decode(const uint8_t *Opcodes, off_t Offset, size_t Length)
 
 template <typename ET>
 class PrinterContext {
-  StreamWriter &SW;
-  const object::ELFFile<ET> *ELF;
-
   typedef typename object::ELFFile<ET>::Elf_Sym Elf_Sym;
   typedef typename object::ELFFile<ET>::Elf_Shdr Elf_Shdr;
+  typedef typename object::ELFFile<ET>::Elf_Rel Elf_Rel;
+  typedef typename object::ELFFile<ET>::Elf_Word Elf_Word;
 
-  typedef typename object::ELFFile<ET>::Elf_Rel_Iter Elf_Rel_iterator;
+  StreamWriter &SW;
+  const object::ELFFile<ET> *ELF;
+  const Elf_Shdr *Symtab;
+  ArrayRef<Elf_Word> ShndxTable;
 
   static const size_t IndexTableEntrySize;
 
@@ -332,8 +334,9 @@ class PrinterContext {
   void PrintOpcodes(const uint8_t *Entry, size_t Length, off_t Offset) const;
 
 public:
-  PrinterContext(StreamWriter &Writer, const object::ELFFile<ET> *File)
-    : SW(Writer), ELF(File) {}
+  PrinterContext(StreamWriter &SW, const object::ELFFile<ET> *ELF,
+                 const Elf_Shdr *Symtab)
+      : SW(SW), ELF(ELF), Symtab(Symtab) {}
 
   void PrintUnwindInformation() const;
 };
@@ -345,10 +348,14 @@ template <typename ET>
 ErrorOr<StringRef>
 PrinterContext<ET>::FunctionAtAddress(unsigned Section,
                                       uint64_t Address) const {
-  for (const Elf_Sym &Sym : ELF->symbols())
+  ErrorOr<StringRef> StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
+  error(StrTableOrErr.getError());
+  StringRef StrTable = *StrTableOrErr;
+
+  for (const Elf_Sym &Sym : ELF->symbols(Symtab))
     if (Sym.st_shndx == Section && Sym.st_value == Address &&
         Sym.getType() == ELF::STT_FUNC)
-      return ELF->getSymbolName(&Sym, false);
+      return Sym.getName(StrTable);
   return readobj_error::unknown_symbol;
 }
 
@@ -365,24 +372,29 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
   /// table.
 
   for (const Elf_Shdr &Sec : ELF->sections()) {
-    if (Sec.sh_type == ELF::SHT_REL && Sec.sh_info == IndexSectionIndex) {
-      for (Elf_Rel_iterator RI = ELF->rel_begin(&Sec), RE = ELF->rel_end(&Sec);
-           RI != RE; ++RI) {
-        if (RI->r_offset == static_cast<unsigned>(IndexTableOffset)) {
-          typename object::ELFFile<ET>::Elf_Rela RelA;
-          RelA.r_offset = RI->r_offset;
-          RelA.r_info = RI->r_info;
-          RelA.r_addend = 0;
+    if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex)
+      continue;
 
-          std::pair<const Elf_Shdr *, const Elf_Sym *> Symbol =
-              ELF->getRelocationSymbol(&Sec, &RelA);
+    ErrorOr<const Elf_Shdr *> SymTabOrErr = ELF->getSection(Sec.sh_link);
+    error(SymTabOrErr.getError());
+    const Elf_Shdr *SymTab = *SymTabOrErr;
 
-          ErrorOr<const Elf_Shdr *> Ret = ELF->getSection(Symbol.second);
-          if (std::error_code EC = Ret.getError())
-            report_fatal_error(EC.message());
-          return *Ret;
-        }
-      }
+    for (const Elf_Rel &R : ELF->rels(&Sec)) {
+      if (R.r_offset != static_cast<unsigned>(IndexTableOffset))
+        continue;
+
+      typename object::ELFFile<ET>::Elf_Rela RelA;
+      RelA.r_offset = R.r_offset;
+      RelA.r_info = R.r_info;
+      RelA.r_addend = 0;
+
+      const Elf_Sym *Symbol = ELF->getRelocationSymbol(&RelA, SymTab);
+
+      ErrorOr<const Elf_Shdr *> Ret =
+          ELF->getSection(Symbol, SymTab, ShndxTable);
+      if (std::error_code EC = Ret.getError())
+        report_fatal_error(EC.message());
+      return *Ret;
     }
   }
   return nullptr;
diff --git a/tools/llvm-readobj/ARMWinEHPrinter.cpp b/tools/llvm-readobj/ARMWinEHPrinter.cpp
index bf5ff8e1d031..650955d1d75c 100644
--- a/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -630,9 +630,10 @@ bool Decoder::dumpUnpackedEntry(const COFFObjectFile &COFF,
 
     SW.printString("ExceptionRecord", formatSymbol(*Name, Address));
 
-    section_iterator SI = COFF.section_end();
-    if (XDataRecord->getSection(SI))
+    ErrorOr<section_iterator> SIOrErr = XDataRecord->getSection();
+    if (!SIOrErr)
       return false;
+    section_iterator SI = *SIOrErr;
 
     return dumpXDataRecord(COFF, *SI, FunctionAddress, Address);
   } else {
diff --git a/tools/llvm-readobj/CMakeLists.txt b/tools/llvm-readobj/CMakeLists.txt
index 87407a258021..a06b678780cb 100644
--- a/tools/llvm-readobj/CMakeLists.txt
+++ b/tools/llvm-readobj/CMakeLists.txt
@@ -7,6 +7,7 @@ add_llvm_tool(llvm-readobj
   ARMAttributeParser.cpp
   ARMWinEHPrinter.cpp
   COFFDumper.cpp
+  COFFImportDumper.cpp
   ELFDumper.cpp
   Error.cpp
   llvm-readobj.cpp
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index cf897d7cb484..516d1cf8057b 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -60,6 +60,7 @@ public:
   void printCOFFExports() override;
   void printCOFFDirectives() override;
   void printCOFFBaseReloc() override;
+  void printCodeViewDebugInfo() override;
   void printStackMap() const override;
 private:
   void printSymbol(const SymbolRef &Sym);
@@ -71,7 +72,7 @@ private:
   void printBaseOfDataField(const pe32_header *Hdr);
   void printBaseOfDataField(const pe32plus_header *Hdr);
 
-  void printCodeViewDebugInfo(const SectionRef &Section);
+  void printCodeViewSection(const SectionRef &Section);
 
   void printCodeViewSymbolsSubsection(StringRef Subsection,
                                       const SectionRef &Section,
@@ -219,6 +220,7 @@ static const EnumEntry<COFF::DLLCharacteristics> PEDLLCharacteristics[] = {
 
 static const EnumEntry<COFF::SectionCharacteristics>
 ImageSectionCharacteristics[] = {
+  LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NOLOAD           ),
   LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_TYPE_NO_PAD           ),
   LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_CODE              ),
   LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_SCN_CNT_INITIALIZED_DATA  ),
@@ -385,14 +387,12 @@ void COFFDumper::printFileHeaders() {
   // Print PE header. This header does not exist if this is an object file and
   // not an executable.
   const pe32_header *PEHeader = nullptr;
-  if (error(Obj->getPE32Header(PEHeader)))
-    return;
+  error(Obj->getPE32Header(PEHeader));
   if (PEHeader)
     printPEHeader<pe32_header>(PEHeader);
 
   const pe32plus_header *PEPlusHeader = nullptr;
-  if (error(Obj->getPE32PlusHeader(PEPlusHeader)))
-    return;
+  error(Obj->getPE32PlusHeader(PEPlusHeader));
   if (PEPlusHeader)
     printPEHeader<pe32plus_header>(PEPlusHeader);
 
@@ -475,10 +475,18 @@ void COFFDumper::printBaseOfDataField(const pe32_header *Hdr) {
 
 void COFFDumper::printBaseOfDataField(const pe32plus_header *) {}
 
-void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
+void COFFDumper::printCodeViewDebugInfo() {
+  for (const SectionRef &S : Obj->sections()) {
+    StringRef SecName;
+    error(S.getName(SecName));
+    if (SecName == ".debug$S")
+      printCodeViewSection(S);
+  }
+}
+
+void COFFDumper::printCodeViewSection(const SectionRef &Section) {
   StringRef Data;
-  if (error(Section.getContents(Data)))
-    return;
+  error(Section.getContents(Data));
 
   SmallVector<StringRef, 10> FunctionNames;
   StringMap<StringRef> FunctionLineTables;
@@ -518,8 +526,7 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
 
       switch (SubSectionType) {
       case COFF::DEBUG_SYMBOL_SUBSECTION:
-        if (opts::SectionSymbols)
-          printCodeViewSymbolsSubsection(Contents, Section, Offset);
+        printCodeViewSymbolsSubsection(Contents, Section, Offset);
         break;
       case COFF::DEBUG_LINE_TABLE_SUBSECTION: {
         // Holds a PC to file:line table.  Some data to parse this subsection is
@@ -533,19 +540,18 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
           return;
         }
 
-        StringRef FunctionName;
-        if (error(resolveSymbolName(Obj->getCOFFSection(Section), Offset,
-                                    FunctionName)))
-          return;
-        W.printString("FunctionName", FunctionName);
-        if (FunctionLineTables.count(FunctionName) != 0) {
+        StringRef LinkageName;
+        error(resolveSymbolName(Obj->getCOFFSection(Section), Offset,
+                                LinkageName));
+        W.printString("LinkageName", LinkageName);
+        if (FunctionLineTables.count(LinkageName) != 0) {
           // Saw debug info for this function already?
           error(object_error::parse_failed);
           return;
         }
 
-        FunctionLineTables[FunctionName] = Contents;
-        FunctionNames.push_back(FunctionName);
+        FunctionLineTables[LinkageName] = Contents;
+        FunctionNames.push_back(LinkageName);
         break;
       }
       case COFF::DEBUG_STRING_TABLE_SUBSECTION:
@@ -582,7 +588,7 @@ void COFFDumper::printCodeViewDebugInfo(const SectionRef &Section) {
   for (unsigned I = 0, E = FunctionNames.size(); I != E; ++I) {
     StringRef Name = FunctionNames[I];
     ListScope S(W, "FunctionLineTable");
-    W.printString("FunctionName", Name);
+    W.printString("LinkageName", Name);
 
     DataExtractor DE(FunctionLineTables[Name], true, 4);
     uint32_t Offset = 6;  // Skip relocations.
@@ -695,9 +701,8 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
       uint32_t CodeSize = DE.getU32(&Offset);
       DE.getU8(&Offset, Unused, 12);
       StringRef SectionName;
-      if (error(resolveSymbolName(Obj->getCOFFSection(Section),
-                                  OffsetInSection + Offset, SectionName)))
-        return;
+      error(resolveSymbolName(Obj->getCOFFSection(Section),
+                              OffsetInSection + Offset, SectionName));
       Offset += 4;
       DE.getU8(&Offset, Unused, 3);
       StringRef DisplayName = DE.getCStr(&Offset);
@@ -748,8 +753,7 @@ void COFFDumper::printSections() {
     const coff_section *Section = Obj->getCOFFSection(Sec);
 
     StringRef Name;
-    if (error(Sec.getName(Name)))
-      Name = "";
+    error(Sec.getName(Name));
 
     DictScope D(W, "Section");
     W.printNumber("Number", SectionNumber);
@@ -782,14 +786,10 @@ void COFFDumper::printSections() {
       }
     }
 
-    if (Name == ".debug$S" && opts::CodeView)
-      printCodeViewDebugInfo(Sec);
-
     if (opts::SectionData &&
         !(Section->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)) {
       StringRef Data;
-      if (error(Sec.getContents(Data)))
-        break;
+      error(Sec.getContents(Data));
 
       W.printBinaryBlock("SectionData", Data);
     }
@@ -803,8 +803,7 @@ void COFFDumper::printRelocations() {
   for (const SectionRef &Section : Obj->sections()) {
     ++SectionNumber;
     StringRef Name;
-    if (error(Section.getName(Name)))
-      continue;
+    error(Section.getName(Name));
 
     bool PrintedGroup = false;
     for (const RelocationRef &Reloc : Section.relocations()) {
@@ -834,8 +833,7 @@ void COFFDumper::printRelocation(const SectionRef &Section,
   symbol_iterator Symbol = Reloc.getSymbol();
   if (Symbol != Obj->symbol_end()) {
     ErrorOr<StringRef> SymbolNameOrErr = Symbol->getName();
-    if (error(SymbolNameOrErr.getError()))
-      return;
+    error(SymbolNameOrErr.getError());
     SymbolName = *SymbolNameOrErr;
   }
 
@@ -914,8 +912,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
   for (uint8_t I = 0; I < Symbol.getNumberOfAuxSymbols(); ++I) {
     if (Symbol.isFunctionDefinition()) {
       const coff_aux_function_definition *Aux;
-      if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
-        break;
+      error(getSymbolAuxData(Obj, Symbol, I, Aux));
 
       DictScope AS(W, "AuxFunctionDef");
       W.printNumber("TagIndex", Aux->TagIndex);
@@ -925,8 +922,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
 
     } else if (Symbol.isAnyUndefined()) {
       const coff_aux_weak_external *Aux;
-      if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
-        break;
+      error(getSymbolAuxData(Obj, Symbol, I, Aux));
 
       ErrorOr<COFFSymbolRef> Linked = Obj->getSymbol(Aux->TagIndex);
       StringRef LinkedName;
@@ -943,8 +939,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
 
     } else if (Symbol.isFileRecord()) {
       const char *FileName;
-      if (error(getSymbolAuxData(Obj, Symbol, I, FileName)))
-        break;
+      error(getSymbolAuxData(Obj, Symbol, I, FileName));
 
       DictScope AS(W, "AuxFileRecord");
 
@@ -954,8 +949,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
       break;
     } else if (Symbol.isSectionDefinition()) {
       const coff_aux_section_definition *Aux;
-      if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
-        break;
+      error(getSymbolAuxData(Obj, Symbol, I, Aux));
 
       int32_t AuxNumber = Aux->getNumber(Symbol.isBigObj());
 
@@ -986,8 +980,7 @@ void COFFDumper::printSymbol(const SymbolRef &Sym) {
       }
     } else if (Symbol.isCLRToken()) {
       const coff_aux_clr_token *Aux;
-      if (error(getSymbolAuxData(Obj, Symbol, I, Aux)))
-        break;
+      error(getSymbolAuxData(Obj, Symbol, I, Aux));
 
       ErrorOr<COFFSymbolRef> ReferredSym =
           Obj->getSymbol(Aux->SymbolTableIndex);
@@ -1040,9 +1033,9 @@ void COFFDumper::printImportedSymbols(
     iterator_range<imported_symbol_iterator> Range) {
   for (const ImportedSymbolRef &I : Range) {
     StringRef Sym;
-    if (error(I.getSymbolName(Sym))) return;
+    error(I.getSymbolName(Sym));
     uint16_t Ordinal;
-    if (error(I.getOrdinal(Ordinal))) return;
+    error(I.getOrdinal(Ordinal));
     W.printNumber("Symbol", Sym, Ordinal);
   }
 }
@@ -1054,12 +1047,12 @@ void COFFDumper::printDelayImportedSymbols(
   for (const ImportedSymbolRef &S : Range) {
     DictScope Import(W, "Import");
     StringRef Sym;
-    if (error(S.getSymbolName(Sym))) return;
+    error(S.getSymbolName(Sym));
     uint16_t Ordinal;
-    if (error(S.getOrdinal(Ordinal))) return;
+    error(S.getOrdinal(Ordinal));
     W.printNumber("Symbol", Sym, Ordinal);
     uint64_t Addr;
-    if (error(I.getImportAddress(Index++, Addr))) return;
+    error(I.getImportAddress(Index++, Addr));
     W.printHex("Address", Addr);
   }
 }
@@ -1069,12 +1062,12 @@ void COFFDumper::printCOFFImports() {
   for (const ImportDirectoryEntryRef &I : Obj->import_directories()) {
     DictScope Import(W, "Import");
     StringRef Name;
-    if (error(I.getName(Name))) return;
+    error(I.getName(Name));
     W.printString("Name", Name);
     uint32_t Addr;
-    if (error(I.getImportLookupTableRVA(Addr))) return;
+    error(I.getImportLookupTableRVA(Addr));
     W.printHex("ImportLookupTableRVA", Addr);
-    if (error(I.getImportAddressTableRVA(Addr))) return;
+    error(I.getImportAddressTableRVA(Addr));
     W.printHex("ImportAddressTableRVA", Addr);
     printImportedSymbols(I.imported_symbols());
   }
@@ -1083,10 +1076,10 @@ void COFFDumper::printCOFFImports() {
   for (const DelayImportDirectoryEntryRef &I : Obj->delay_import_directories()) {
     DictScope Import(W, "DelayImport");
     StringRef Name;
-    if (error(I.getName(Name))) return;
+    error(I.getName(Name));
     W.printString("Name", Name);
     const delay_import_directory_table_entry *Table;
-    if (error(I.getDelayImportTable(Table))) return;
+    error(I.getDelayImportTable(Table));
     W.printHex("Attributes", Table->Attributes);
     W.printHex("ModuleHandle", Table->ModuleHandle);
     W.printHex("ImportAddressTable", Table->DelayImportAddressTable);
@@ -1104,12 +1097,9 @@ void COFFDumper::printCOFFExports() {
     StringRef Name;
     uint32_t Ordinal, RVA;
 
-    if (error(E.getSymbolName(Name)))
-      continue;
-    if (error(E.getOrdinal(Ordinal)))
-      continue;
-    if (error(E.getExportRVA(RVA)))
-      continue;
+    error(E.getSymbolName(Name));
+    error(E.getOrdinal(Ordinal));
+    error(E.getExportRVA(RVA));
 
     W.printNumber("Ordinal", Ordinal);
     W.printString("Name", Name);
@@ -1122,13 +1112,11 @@ void COFFDumper::printCOFFDirectives() {
     StringRef Contents;
     StringRef Name;
 
-    if (error(Section.getName(Name)))
-      continue;
+    error(Section.getName(Name));
     if (Name != ".drectve")
       continue;
 
-    if (error(Section.getContents(Contents)))
-      return;
+    error(Section.getContents(Contents));
 
     W.printString("Directive(s)", Contents);
   }
@@ -1152,10 +1140,8 @@ void COFFDumper::printCOFFBaseReloc() {
   for (const BaseRelocRef &I : Obj->base_relocs()) {
     uint8_t Type;
     uint32_t RVA;
-    if (error(I.getRVA(RVA)))
-      continue;
-    if (error(I.getType(Type)))
-      continue;
+    error(I.getRVA(RVA));
+    error(I.getType(Type));
     DictScope Import(W, "Entry");
     W.printString("Type", getBaseRelocTypeName(Type));
     W.printHex("Address", RVA);
diff --git a/tools/llvm-readobj/COFFImportDumper.cpp b/tools/llvm-readobj/COFFImportDumper.cpp
new file mode 100644
index 000000000000..83715e60f057
--- /dev/null
+++ b/tools/llvm-readobj/COFFImportDumper.cpp
@@ -0,0 +1,52 @@
+//===-- COFFImportDumper.cpp - COFF import library dumper -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the COFF import library dumper for llvm-readobj.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "ObjDumper.h"
+#include "llvm-readobj.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
+#include "llvm/Support/COFF.h"
+
+using namespace llvm::object;
+
+namespace llvm {
+
+void dumpCOFFImportFile(const COFFImportFile *File) {
+  outs() << '\n';
+  outs() << "File: " << File->getFileName() << "\n";
+  outs() << "Format: COFF-import-file\n";
+
+  const coff_import_header *H = File->getCOFFImportHeader();
+  switch (H->getType()) {
+  case COFF::IMPORT_CODE:  outs() << "Type: code\n"; break;
+  case COFF::IMPORT_DATA:  outs() << "Type: data\n"; break;
+  case COFF::IMPORT_CONST: outs() << "Type: const\n"; break;
+  }
+
+  switch (H->getNameType()) {
+  case COFF::IMPORT_ORDINAL: outs() << "Name type: ordinal\n"; break;
+  case COFF::IMPORT_NAME: outs() << "Name type: name\n"; break;
+  case COFF::IMPORT_NAME_NOPREFIX: outs() << "Name type: noprefix\n"; break;
+  case COFF::IMPORT_NAME_UNDECORATE: outs() << "Name type: undecorate\n"; break;
+  }
+
+  for (const object::BasicSymbolRef &Sym : File->symbols()) {
+    outs() << "Symbol: ";
+    Sym.printName(outs());
+    outs() << "\n";
+  }
+}
+
+} // namespace llvm
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 1cdf5529c080..02397f382848 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -42,8 +42,7 @@ namespace {
 template<typename ELFT>
 class ELFDumper : public ObjDumper {
 public:
-  ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer)
-      : ObjDumper(Writer), Obj(Obj) {}
+  ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer);
 
   void printFileHeaders() override;
   void printSections() override;
@@ -57,6 +56,9 @@ public:
   void printNeededLibraries() override;
   void printProgramHeaders() override;
   void printHashTable() override;
+  void printGnuHashTable() override;
+  void printLoadName() override;
+  void printVersionInfo() override;
 
   void printAttributes() override;
   void printMipsPLTGOT() override;
@@ -69,13 +71,114 @@ private:
   typedef ELFFile<ELFT> ELFO;
   typedef typename ELFO::Elf_Shdr Elf_Shdr;
   typedef typename ELFO::Elf_Sym Elf_Sym;
+  typedef typename ELFO::Elf_Dyn Elf_Dyn;
+  typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
+  typedef typename ELFO::Elf_Rel Elf_Rel;
+  typedef typename ELFO::Elf_Rela Elf_Rela;
+  typedef typename ELFO::Elf_Rela_Range Elf_Rela_Range;
+  typedef typename ELFO::Elf_Phdr Elf_Phdr;
+  typedef typename ELFO::Elf_Half Elf_Half;
+  typedef typename ELFO::Elf_Hash Elf_Hash;
+  typedef typename ELFO::Elf_GnuHash Elf_GnuHash;
+  typedef typename ELFO::Elf_Ehdr Elf_Ehdr;
+  typedef typename ELFO::Elf_Word Elf_Word;
+  typedef typename ELFO::uintX_t uintX_t;
+  typedef typename ELFO::Elf_Versym Elf_Versym;
+  typedef typename ELFO::Elf_Verneed Elf_Verneed;
+  typedef typename ELFO::Elf_Vernaux Elf_Vernaux;
+  typedef typename ELFO::Elf_Verdef Elf_Verdef;
+  typedef typename ELFO::Elf_Verdaux Elf_Verdaux;
 
-  void printSymbol(const Elf_Sym *Symbol, bool IsDynamic);
+  /// \brief Represents a region described by entries in the .dynamic table.
+  struct DynRegionInfo {
+    DynRegionInfo() : Addr(nullptr), Size(0), EntSize(0) {}
+    /// \brief Address in current address space.
+    const void *Addr;
+    /// \brief Size in bytes of the region.
+    uintX_t Size;
+    /// \brief Size of each entity in the region.
+    uintX_t EntSize;
+  };
+
+  void printSymbolsHelper(bool IsDynamic);
+  void printSymbol(const Elf_Sym *Symbol, const Elf_Shdr *SymTab,
+                   StringRef StrTable, bool IsDynamic);
 
   void printRelocations(const Elf_Shdr *Sec);
-  void printRelocation(const Elf_Shdr *Sec, typename ELFO::Elf_Rela Rel);
+  void printRelocation(Elf_Rela Rel, const Elf_Shdr *SymTab);
+  void printValue(uint64_t Type, uint64_t Value);
+
+  const Elf_Rela *dyn_rela_begin() const;
+  const Elf_Rela *dyn_rela_end() const;
+  Elf_Rela_Range dyn_relas() const;
+  StringRef getDynamicString(uint64_t Offset) const;
+  const Elf_Dyn *dynamic_table_begin() const {
+    ErrorOr<const Elf_Dyn *> Ret = Obj->dynamic_table_begin(DynamicProgHeader);
+    error(Ret.getError());
+    return *Ret;
+  }
+  const Elf_Dyn *dynamic_table_end() const {
+    ErrorOr<const Elf_Dyn *> Ret = Obj->dynamic_table_end(DynamicProgHeader);
+    error(Ret.getError());
+    return *Ret;
+  }
+  StringRef getSymbolVersion(StringRef StrTab, const Elf_Sym *symb,
+                             bool &IsDefault);
+  void LoadVersionMap();
+  void LoadVersionNeeds(const Elf_Shdr *ec) const;
+  void LoadVersionDefs(const Elf_Shdr *sec) const;
 
   const ELFO *Obj;
+  DynRegionInfo DynRelaRegion;
+  const Elf_Phdr *DynamicProgHeader = nullptr;
+  StringRef DynamicStringTable;
+  const Elf_Sym *DynSymStart = nullptr;
+  StringRef SOName;
+  const Elf_Hash *HashTable = nullptr;
+  const Elf_GnuHash *GnuHashTable = nullptr;
+  const Elf_Shdr *DotDynSymSec = nullptr;
+  const Elf_Shdr *DotSymtabSec = nullptr;
+  ArrayRef<Elf_Word> ShndxTable;
+
+  const Elf_Shdr *dot_gnu_version_sec = nullptr;   // .gnu.version
+  const Elf_Shdr *dot_gnu_version_r_sec = nullptr; // .gnu.version_r
+  const Elf_Shdr *dot_gnu_version_d_sec = nullptr; // .gnu.version_d
+
+  // Records for each version index the corresponding Verdef or Vernaux entry.
+  // This is filled the first time LoadVersionMap() is called.
+  class VersionMapEntry : public PointerIntPair<const void *, 1> {
+  public:
+    // If the integer is 0, this is an Elf_Verdef*.
+    // If the integer is 1, this is an Elf_Vernaux*.
+    VersionMapEntry() : PointerIntPair<const void *, 1>(nullptr, 0) {}
+    VersionMapEntry(const Elf_Verdef *verdef)
+        : PointerIntPair<const void *, 1>(verdef, 0) {}
+    VersionMapEntry(const Elf_Vernaux *vernaux)
+        : PointerIntPair<const void *, 1>(vernaux, 1) {}
+    bool isNull() const { return getPointer() == nullptr; }
+    bool isVerdef() const { return !isNull() && getInt() == 0; }
+    bool isVernaux() const { return !isNull() && getInt() == 1; }
+    const Elf_Verdef *getVerdef() const {
+      return isVerdef() ? (const Elf_Verdef *)getPointer() : nullptr;
+    }
+    const Elf_Vernaux *getVernaux() const {
+      return isVernaux() ? (const Elf_Vernaux *)getPointer() : nullptr;
+    }
+  };
+  mutable SmallVector<VersionMapEntry, 16> VersionMap;
+
+public:
+  Elf_Dyn_Range dynamic_table() const {
+    ErrorOr<Elf_Dyn_Range> Ret = Obj->dynamic_table(DynamicProgHeader);
+    error(Ret.getError());
+    return *Ret;
+  }
+
+  std::string getFullSymbolName(const Elf_Sym *Symbol, StringRef StrTable,
+                                bool IsDynamic);
+  const Elf_Shdr *getDotDynSymSec() const { return DotDynSymSec; }
+  const Elf_Shdr *getDotSymtabSec() const { return DotSymtabSec; }
+  ArrayRef<Elf_Word> getShndxTable() { return ShndxTable; }
 };
 
 template <class T> T errorOrDefault(ErrorOr<T> Val, T Default = T()) {
@@ -122,30 +225,246 @@ std::error_code createELFDumper(const object::ObjectFile *Obj,
 
 } // namespace llvm
 
-template <typename ELFO>
-static std::string getFullSymbolName(const ELFO &Obj,
-                                     const typename ELFO::Elf_Sym *Symbol,
-                                     bool IsDynamic) {
-  StringRef SymbolName = errorOrDefault(Obj.getSymbolName(Symbol, IsDynamic));
+// Iterate through the versions needed section, and place each Elf_Vernaux
+// in the VersionMap according to its index.
+template <class ELFT>
+void ELFDumper<ELFT>::LoadVersionNeeds(const Elf_Shdr *sec) const {
+  unsigned vn_size = sec->sh_size;  // Size of section in bytes
+  unsigned vn_count = sec->sh_info; // Number of Verneed entries
+  const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+  const char *sec_end = sec_start + vn_size;
+  // The first Verneed entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vn_count; i++) {
+    if (p + sizeof(Elf_Verneed) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version needed records.");
+    const Elf_Verneed *vn = reinterpret_cast<const Elf_Verneed *>(p);
+    if (vn->vn_version != ELF::VER_NEED_CURRENT)
+      report_fatal_error("Unexpected verneed version");
+    // Iterate through the Vernaux entries
+    const char *paux = p + vn->vn_aux;
+    for (unsigned j = 0; j < vn->vn_cnt; j++) {
+      if (paux + sizeof(Elf_Vernaux) > sec_end)
+        report_fatal_error("Section ended unexpected while scanning auxiliary "
+                           "version needed records.");
+      const Elf_Vernaux *vna = reinterpret_cast<const Elf_Vernaux *>(paux);
+      size_t index = vna->vna_other & ELF::VERSYM_VERSION;
+      if (index >= VersionMap.size())
+        VersionMap.resize(index + 1);
+      VersionMap[index] = VersionMapEntry(vna);
+      paux += vna->vna_next;
+    }
+    p += vn->vn_next;
+  }
+}
+
+// Iterate through the version definitions, and place each Elf_Verdef
+// in the VersionMap according to its index.
+template <class ELFT>
+void ELFDumper<ELFT>::LoadVersionDefs(const Elf_Shdr *sec) const {
+  unsigned vd_size = sec->sh_size;  // Size of section in bytes
+  unsigned vd_count = sec->sh_info; // Number of Verdef entries
+  const char *sec_start = (const char *)Obj->base() + sec->sh_offset;
+  const char *sec_end = sec_start + vd_size;
+  // The first Verdef entry is at the start of the section.
+  const char *p = sec_start;
+  for (unsigned i = 0; i < vd_count; i++) {
+    if (p + sizeof(Elf_Verdef) > sec_end)
+      report_fatal_error("Section ended unexpectedly while scanning "
+                         "version definitions.");
+    const Elf_Verdef *vd = reinterpret_cast<const Elf_Verdef *>(p);
+    if (vd->vd_version != ELF::VER_DEF_CURRENT)
+      report_fatal_error("Unexpected verdef version");
+    size_t index = vd->vd_ndx & ELF::VERSYM_VERSION;
+    if (index >= VersionMap.size())
+      VersionMap.resize(index + 1);
+    VersionMap[index] = VersionMapEntry(vd);
+    p += vd->vd_next;
+  }
+}
+
+template <class ELFT> void ELFDumper<ELFT>::LoadVersionMap() {
+  // If there is no dynamic symtab or version table, there is nothing to do.
+  if (!DynSymStart || !dot_gnu_version_sec)
+    return;
+
+  // Has the VersionMap already been loaded?
+  if (VersionMap.size() > 0)
+    return;
+
+  // The first two version indexes are reserved.
+  // Index 0 is LOCAL, index 1 is GLOBAL.
+  VersionMap.push_back(VersionMapEntry());
+  VersionMap.push_back(VersionMapEntry());
+
+  if (dot_gnu_version_d_sec)
+    LoadVersionDefs(dot_gnu_version_d_sec);
+
+  if (dot_gnu_version_r_sec)
+    LoadVersionNeeds(dot_gnu_version_r_sec);
+}
+
+
+template <typename ELFO, class ELFT>
+static void printVersionSymbolSection(ELFDumper<ELFT> *Dumper,
+                                      const ELFO *Obj,
+                                      const typename ELFO::Elf_Shdr *Sec,
+                                      StreamWriter &W) {
+  DictScope SS(W, "Version symbols");
+  if (!Sec)
+    return;
+  StringRef Name = errorOrDefault(Obj->getSectionName(Sec));
+  W.printNumber("Section Name", Name, Sec->sh_name);
+  W.printHex("Address", Sec->sh_addr);
+  W.printHex("Offset", Sec->sh_offset);
+  W.printNumber("Link", Sec->sh_link);
+
+  const typename ELFO::Elf_Shdr *DynSymSec = Dumper->getDotDynSymSec();
+  const uint8_t *P = (const uint8_t *)Obj->base() + Sec->sh_offset;
+  ErrorOr<StringRef> StrTableOrErr =
+      Obj->getStringTableForSymtab(*DynSymSec);
+  error(StrTableOrErr.getError());
+
+  // Same number of entries in the dynamic symbol table (DT_SYMTAB).
+  ListScope Syms(W, "Symbols");
+  for (const typename ELFO::Elf_Sym &Sym : Obj->symbols(DynSymSec)) {
+    DictScope S(W, "Symbol");
+    std::string FullSymbolName =
+        Dumper->getFullSymbolName(&Sym, *StrTableOrErr, true /* IsDynamic */);
+    W.printNumber("Version", *P);
+    W.printString("Name", FullSymbolName);
+    P += sizeof(typename ELFO::Elf_Half);
+  }
+}
+
+template <typename ELFO, class ELFT>
+static void printVersionDefinitionSection(ELFDumper<ELFT> *Dumper,
+                                          const ELFO *Obj,
+                                          const typename ELFO::Elf_Shdr *Sec,
+                                          StreamWriter &W) {
+  DictScope SD(W, "Version definition");
+  if (!Sec)
+    return;
+  StringRef Name = errorOrDefault(Obj->getSectionName(Sec));
+  W.printNumber("Section Name", Name, Sec->sh_name);
+  W.printHex("Address", Sec->sh_addr);
+  W.printHex("Offset", Sec->sh_offset);
+  W.printNumber("Link", Sec->sh_link);
+
+  unsigned verdef_entries = 0;
+  // The number of entries in the section SHT_GNU_verdef
+  // is determined by DT_VERDEFNUM tag.
+  for (const typename ELFO::Elf_Dyn &Dyn : Dumper->dynamic_table()) {
+    if (Dyn.d_tag == DT_VERDEFNUM)
+      verdef_entries = Dyn.d_un.d_val;
+  }
+  const uint8_t *SecStartAddress =
+      (const uint8_t *)Obj->base() + Sec->sh_offset;
+  const uint8_t *SecEndAddress = SecStartAddress + Sec->sh_size;
+  const uint8_t *P = SecStartAddress;
+  ErrorOr<const typename ELFO::Elf_Shdr *> StrTabOrErr =
+      Obj->getSection(Sec->sh_link);
+  error(StrTabOrErr.getError());
+
+  ListScope Entries(W, "Entries");
+  for (unsigned i = 0; i < verdef_entries; ++i) {
+    if (P + sizeof(typename ELFO::Elf_Verdef) > SecEndAddress)
+      report_fatal_error("invalid offset in the section");
+    auto *VD = reinterpret_cast<const typename ELFO::Elf_Verdef *>(P);
+    DictScope Entry(W, "Entry");
+    W.printHex("Offset", (uintptr_t)P - (uintptr_t)SecStartAddress);
+    W.printNumber("Rev", VD->vd_version);
+    // FIXME: print something more readable.
+    W.printNumber("Flags", VD->vd_flags);
+    W.printNumber("Index", VD->vd_ndx);
+    W.printNumber("Cnt", VD->vd_cnt);
+    W.printString("Name", StringRef((const char *)(Obj->base() +
+                                                   (*StrTabOrErr)->sh_offset +
+                                                   VD->getAux()->vda_name)));
+    P += VD->vd_next;
+  }
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printVersionInfo() {
+  // Dump version symbol section.
+  printVersionSymbolSection(this, Obj, dot_gnu_version_sec, W);
+
+  // Dump version definition section.
+  printVersionDefinitionSection(this, Obj, dot_gnu_version_d_sec, W);
+}
+
+template <typename ELFT>
+StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
+                                            const Elf_Sym *symb,
+                                            bool &IsDefault) {
+  // This is a dynamic symbol. Look in the GNU symbol version table.
+  if (!dot_gnu_version_sec) {
+    // No version table.
+    IsDefault = false;
+    return StringRef("");
+  }
+
+  // Determine the position in the symbol table of this entry.
+  size_t entry_index = (reinterpret_cast<uintptr_t>(symb) -
+                        reinterpret_cast<uintptr_t>(DynSymStart)) /
+                       sizeof(Elf_Sym);
+
+  // Get the corresponding version index entry
+  const Elf_Versym *vs =
+      Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
+  size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
+
+  // Special markers for unversioned symbols.
+  if (version_index == ELF::VER_NDX_LOCAL ||
+      version_index == ELF::VER_NDX_GLOBAL) {
+    IsDefault = false;
+    return StringRef("");
+  }
+
+  // Lookup this symbol in the version table
+  LoadVersionMap();
+  if (version_index >= VersionMap.size() || VersionMap[version_index].isNull())
+    reportError("Invalid version entry");
+  const VersionMapEntry &entry = VersionMap[version_index];
+
+  // Get the version name string
+  size_t name_offset;
+  if (entry.isVerdef()) {
+    // The first Verdaux entry holds the name.
+    name_offset = entry.getVerdef()->getAux()->vda_name;
+    IsDefault = !(vs->vs_index & ELF::VERSYM_HIDDEN);
+  } else {
+    name_offset = entry.getVernaux()->vna_name;
+    IsDefault = false;
+  }
+  if (name_offset >= StrTab.size())
+    reportError("Invalid string offset");
+  return StringRef(StrTab.data() + name_offset);
+}
+
+template <typename ELFT>
+std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
+                                               StringRef StrTable,
+                                               bool IsDynamic) {
+  StringRef SymbolName = errorOrDefault(Symbol->getName(StrTable));
   if (!IsDynamic)
     return SymbolName;
 
   std::string FullSymbolName(SymbolName);
 
   bool IsDefault;
-  ErrorOr<StringRef> Version =
-      Obj.getSymbolVersion(nullptr, &*Symbol, IsDefault);
-  if (Version) {
-    FullSymbolName += (IsDefault ? "@@" : "@");
-    FullSymbolName += *Version;
-  } else
-    error(Version.getError());
+  StringRef Version = getSymbolVersion(StrTable, &*Symbol, IsDefault);
+  FullSymbolName += (IsDefault ? "@@" : "@");
+  FullSymbolName += Version;
   return FullSymbolName;
 }
 
 template <typename ELFO>
 static void
 getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
+                    const typename ELFO::Elf_Shdr *SymTab,
+                    ArrayRef<typename ELFO::Elf_Word> ShndxTable,
                     StringRef &SectionName, unsigned &SectionIndex) {
   SectionIndex = Symbol->st_shndx;
   if (Symbol->isUndefined())
@@ -162,25 +481,26 @@ getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
     SectionName = "Reserved";
   else {
     if (SectionIndex == SHN_XINDEX)
-      SectionIndex = Obj.getExtendedSymbolTableIndex(&*Symbol);
+      SectionIndex =
+          Obj.getExtendedSymbolTableIndex(Symbol, SymTab, ShndxTable);
     ErrorOr<const typename ELFO::Elf_Shdr *> Sec = Obj.getSection(SectionIndex);
-    if (!error(Sec.getError()))
-      SectionName = errorOrDefault(Obj.getSectionName(*Sec));
+    error(Sec.getError());
+    SectionName = errorOrDefault(Obj.getSectionName(*Sec));
   }
 }
 
-template <class ELFT>
-static const typename ELFFile<ELFT>::Elf_Shdr *
-findSectionByAddress(const ELFFile<ELFT> *Obj, uint64_t Addr) {
+template <class ELFO>
+static const typename ELFO::Elf_Shdr *findSectionByAddress(const ELFO *Obj,
+                                                           uint64_t Addr) {
   for (const auto &Shdr : Obj->sections())
     if (Shdr.sh_addr == Addr)
       return &Shdr;
   return nullptr;
 }
 
-template <class ELFT>
-static const typename ELFFile<ELFT>::Elf_Shdr *
-findSectionByName(const ELFFile<ELFT> &Obj, StringRef Name) {
+template <class ELFO>
+static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj,
+                                                        StringRef Name) {
   for (const auto &Shdr : Obj.sections()) {
     if (Name == errorOrDefault(Obj.getSectionName(&Shdr)))
       return &Shdr;
@@ -409,6 +729,12 @@ static const EnumEntry<unsigned> ElfSymbolTypes[] = {
   { "GNU_IFunc", ELF::STT_GNU_IFUNC }
 };
 
+static const EnumEntry<unsigned> AMDGPUSymbolTypes[] = {
+  { "AMDGPU_HSA_KERNEL",            ELF::STT_AMDGPU_HSA_KERNEL },
+  { "AMDGPU_HSA_INDIRECT_FUNCTION", ELF::STT_AMDGPU_HSA_INDIRECT_FUNCTION },
+  { "AMDGPU_HSA_METADATA",          ELF::STT_AMDGPU_HSA_METADATA }
+};
+
 static const char *getElfSectionType(unsigned Arch, unsigned Type) {
   switch (Arch) {
   case ELF::EM_ARM:
@@ -473,13 +799,24 @@ static const EnumEntry<unsigned> ElfSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, SHF_TLS             ),
   LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_CP_SECTION),
   LLVM_READOBJ_ENUM_ENT(ELF, XCORE_SHF_DP_SECTION),
-  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP    )
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_MIPS_NOSTRIP    ),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_GLOBAL),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_READONLY),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_CODE),
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_AGENT)
 };
 
 static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
   // Check potentially overlapped processor-specific
   // program header type.
   switch (Arch) {
+  case ELF::EM_AMDGPU:
+    switch (Type) {
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_READONLY_AGENT);
+    LLVM_READOBJ_ENUM_CASE(ELF, PT_AMDGPU_HSA_LOAD_CODE_AGENT);
+    }
   case ELF::EM_ARM:
     switch (Type) {
     LLVM_READOBJ_ENUM_CASE(ELF, PT_ARM_EXIDX);
@@ -565,9 +902,138 @@ static const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, EF_MIPS_ARCH_64R6)
 };
 
+template <typename ELFT>
+ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, StreamWriter &Writer)
+    : ObjDumper(Writer), Obj(Obj) {
+
+  SmallVector<const Elf_Phdr *, 4> LoadSegments;
+  for (const Elf_Phdr &Phdr : Obj->program_headers()) {
+    if (Phdr.p_type == ELF::PT_DYNAMIC) {
+      DynamicProgHeader = &Phdr;
+      continue;
+    }
+    if (Phdr.p_type != ELF::PT_LOAD || Phdr.p_filesz == 0)
+      continue;
+    LoadSegments.push_back(&Phdr);
+  }
+
+  auto toMappedAddr = [&](uint64_t VAddr) -> const uint8_t * {
+    const Elf_Phdr **I = std::upper_bound(
+        LoadSegments.begin(), LoadSegments.end(), VAddr, compareAddr<ELFT>);
+    if (I == LoadSegments.begin())
+      report_fatal_error("Virtual address is not in any segment");
+    --I;
+    const Elf_Phdr &Phdr = **I;
+    uint64_t Delta = VAddr - Phdr.p_vaddr;
+    if (Delta >= Phdr.p_filesz)
+      report_fatal_error("Virtual address is not in any segment");
+    return Obj->base() + Phdr.p_offset + Delta;
+  };
+
+  uint64_t SONameOffset = 0;
+  const char *StringTableBegin = nullptr;
+  uint64_t StringTableSize = 0;
+  for (const Elf_Dyn &Dyn : dynamic_table()) {
+    switch (Dyn.d_tag) {
+    case ELF::DT_HASH:
+      HashTable =
+          reinterpret_cast<const Elf_Hash *>(toMappedAddr(Dyn.getPtr()));
+      break;
+    case ELF::DT_GNU_HASH:
+      GnuHashTable =
+          reinterpret_cast<const Elf_GnuHash *>(toMappedAddr(Dyn.getPtr()));
+      break;
+    case ELF::DT_RELA:
+      DynRelaRegion.Addr = toMappedAddr(Dyn.getPtr());
+      break;
+    case ELF::DT_RELASZ:
+      DynRelaRegion.Size = Dyn.getVal();
+      break;
+    case ELF::DT_RELAENT:
+      DynRelaRegion.EntSize = Dyn.getVal();
+      break;
+    case ELF::DT_SONAME:
+      SONameOffset = Dyn.getVal();
+      break;
+    case ELF::DT_STRTAB:
+      StringTableBegin = (const char *)toMappedAddr(Dyn.getPtr());
+      break;
+    case ELF::DT_STRSZ:
+      StringTableSize = Dyn.getVal();
+      break;
+    case ELF::DT_SYMTAB:
+      DynSymStart =
+          reinterpret_cast<const Elf_Sym *>(toMappedAddr(Dyn.getPtr()));
+      break;
+    }
+  }
+  if (StringTableBegin)
+    DynamicStringTable = StringRef(StringTableBegin, StringTableSize);
+  if (SONameOffset)
+    SOName = getDynamicString(SONameOffset);
+
+  for (const Elf_Shdr &Sec : Obj->sections()) {
+    switch (Sec.sh_type) {
+    case ELF::SHT_GNU_versym:
+      if (dot_gnu_version_sec != nullptr)
+        reportError("Multiple SHT_GNU_versym");
+      dot_gnu_version_sec = &Sec;
+      break;
+    case ELF::SHT_GNU_verdef:
+      if (dot_gnu_version_d_sec != nullptr)
+        reportError("Multiple SHT_GNU_verdef");
+      dot_gnu_version_d_sec = &Sec;
+      break;
+    case ELF::SHT_GNU_verneed:
+      if (dot_gnu_version_r_sec != nullptr)
+        reportError("Multilpe SHT_GNU_verneed");
+      dot_gnu_version_r_sec = &Sec;
+      break;
+    case ELF::SHT_DYNSYM:
+      if (DotDynSymSec != nullptr)
+        reportError("Multilpe SHT_DYNSYM");
+      DotDynSymSec = &Sec;
+      break;
+    case ELF::SHT_SYMTAB:
+      if (DotSymtabSec != nullptr)
+        reportError("Multilpe SHT_SYMTAB");
+      DotSymtabSec = &Sec;
+      break;
+    case ELF::SHT_SYMTAB_SHNDX: {
+      ErrorOr<ArrayRef<Elf_Word>> TableOrErr = Obj->getSHNDXTable(Sec);
+      error(TableOrErr.getError());
+      ShndxTable = *TableOrErr;
+      break;
+    }
+    }
+  }
+}
+
+template <typename ELFT>
+const typename ELFDumper<ELFT>::Elf_Rela *
+ELFDumper<ELFT>::dyn_rela_begin() const {
+  if (DynRelaRegion.Size && DynRelaRegion.EntSize != sizeof(Elf_Rela))
+    report_fatal_error("Invalid relocation entry size");
+  return reinterpret_cast<const Elf_Rela *>(DynRelaRegion.Addr);
+}
+
+template <typename ELFT>
+const typename ELFDumper<ELFT>::Elf_Rela *
+ELFDumper<ELFT>::dyn_rela_end() const {
+  uint64_t Size = DynRelaRegion.Size;
+  if (Size % sizeof(Elf_Rela))
+    report_fatal_error("Invalid relocation table size");
+  return dyn_rela_begin() + Size / sizeof(Elf_Rela);
+}
+
+template <typename ELFT>
+typename ELFDumper<ELFT>::Elf_Rela_Range ELFDumper<ELFT>::dyn_relas() const {
+  return make_range(dyn_rela_begin(), dyn_rela_end());
+}
+
 template<class ELFT>
 void ELFDumper<ELFT>::printFileHeaders() {
-  const typename ELFO::Elf_Ehdr *Header = Obj->getHeader();
+  const Elf_Ehdr *Header = Obj->getHeader();
 
   {
     DictScope D(W, "ElfHeader");
@@ -618,7 +1084,7 @@ void ELFDumper<ELFT>::printSections() {
   ListScope SectionsD(W, "Sections");
 
   int SectionIndex = -1;
-  for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : Obj->sections()) {
     ++SectionIndex;
 
     StringRef Name = errorOrDefault(Obj->getSectionName(&Sec));
@@ -645,12 +1111,18 @@ void ELFDumper<ELFT>::printSections() {
 
     if (opts::SectionSymbols) {
       ListScope D(W, "Symbols");
-      for (const typename ELFO::Elf_Sym &Sym : Obj->symbols()) {
-        ErrorOr<const Elf_Shdr *> SymSec = Obj->getSection(&Sym);
+      const Elf_Shdr *Symtab = DotSymtabSec;
+      ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*Symtab);
+      error(StrTableOrErr.getError());
+      StringRef StrTable = *StrTableOrErr;
+
+      for (const Elf_Sym &Sym : Obj->symbols(Symtab)) {
+        ErrorOr<const Elf_Shdr *> SymSec =
+            Obj->getSection(&Sym, Symtab, ShndxTable);
         if (!SymSec)
           continue;
         if (*SymSec == &Sec)
-          printSymbol(&Sym, false);
+          printSymbol(&Sym, Symtab, StrTable, false);
       }
     }
 
@@ -667,7 +1139,7 @@ void ELFDumper<ELFT>::printRelocations() {
   ListScope D(W, "Relocations");
 
   int SectionNumber = -1;
-  for (const typename ELFO::Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : Obj->sections()) {
     ++SectionNumber;
 
     if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA)
@@ -689,29 +1161,25 @@ template<class ELFT>
 void ELFDumper<ELFT>::printDynamicRelocations() {
   W.startLine() << "Dynamic Relocations {\n";
   W.indent();
-  for (typename ELFO::Elf_Rela_Iter RelI = Obj->dyn_rela_begin(),
-                                    RelE = Obj->dyn_rela_end();
-       RelI != RelE; ++RelI) {
+  for (const Elf_Rela &Rel : dyn_relas()) {
     SmallString<32> RelocName;
-    Obj->getRelocationTypeName(RelI->getType(Obj->isMips64EL()), RelocName);
+    Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
     StringRef SymbolName;
-    uint32_t SymIndex = RelI->getSymbol(Obj->isMips64EL());
-    const typename ELFO::Elf_Sym *Sym = Obj->dynamic_symbol_begin() + SymIndex;
-    SymbolName = errorOrDefault(Obj->getSymbolName(Sym, true));
+    uint32_t SymIndex = Rel.getSymbol(Obj->isMips64EL());
+    const Elf_Sym *Sym = DynSymStart + SymIndex;
+    SymbolName = errorOrDefault(Sym->getName(DynamicStringTable));
     if (opts::ExpandRelocs) {
       DictScope Group(W, "Relocation");
-      W.printHex("Offset", RelI->r_offset);
-      W.printNumber("Type", RelocName, (int)RelI->getType(Obj->isMips64EL()));
+      W.printHex("Offset", Rel.r_offset);
+      W.printNumber("Type", RelocName, (int)Rel.getType(Obj->isMips64EL()));
       W.printString("Symbol", SymbolName.size() > 0 ? SymbolName : "-");
-      W.printHex("Addend", RelI->r_addend);
+      W.printHex("Addend", Rel.r_addend);
     }
     else {
       raw_ostream& OS = W.startLine();
-      OS << W.hex(RelI->r_offset)
-        << " " << RelocName
-        << " " << (SymbolName.size() > 0 ? SymbolName : "-")
-        << " " << W.hex(RelI->r_addend)
-        << "\n";
+      OS << W.hex(Rel.r_offset) << " " << RelocName << " "
+         << (SymbolName.size() > 0 ? SymbolName : "-") << " "
+         << W.hex(Rel.r_addend) << "\n";
     }
   }
   W.unindent();
@@ -720,51 +1188,43 @@ void ELFDumper<ELFT>::printDynamicRelocations() {
 
 template <class ELFT>
 void ELFDumper<ELFT>::printRelocations(const Elf_Shdr *Sec) {
+  ErrorOr<const Elf_Shdr *> SymTabOrErr = Obj->getSection(Sec->sh_link);
+  error(SymTabOrErr.getError());
+  const Elf_Shdr *SymTab = *SymTabOrErr;
+
   switch (Sec->sh_type) {
   case ELF::SHT_REL:
-    for (typename ELFO::Elf_Rel_Iter RI = Obj->rel_begin(Sec),
-                                     RE = Obj->rel_end(Sec);
-         RI != RE; ++RI) {
-      typename ELFO::Elf_Rela Rela;
-      Rela.r_offset = RI->r_offset;
-      Rela.r_info = RI->r_info;
+    for (const Elf_Rel &R : Obj->rels(Sec)) {
+      Elf_Rela Rela;
+      Rela.r_offset = R.r_offset;
+      Rela.r_info = R.r_info;
       Rela.r_addend = 0;
-      printRelocation(Sec, Rela);
+      printRelocation(Rela, SymTab);
     }
     break;
   case ELF::SHT_RELA:
-    for (typename ELFO::Elf_Rela_Iter RI = Obj->rela_begin(Sec),
-                                      RE = Obj->rela_end(Sec);
-         RI != RE; ++RI) {
-      printRelocation(Sec, *RI);
-    }
+    for (const Elf_Rela &R : Obj->relas(Sec))
+      printRelocation(R, SymTab);
     break;
   }
 }
 
 template <class ELFT>
-void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
-                                      typename ELFO::Elf_Rela Rel) {
+void ELFDumper<ELFT>::printRelocation(Elf_Rela Rel, const Elf_Shdr *SymTab) {
   SmallString<32> RelocName;
   Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
   StringRef TargetName;
-  std::pair<const Elf_Shdr *, const Elf_Sym *> Sym =
-      Obj->getRelocationSymbol(Sec, &Rel);
-  if (Sym.second && Sym.second->getType() == ELF::STT_SECTION) {
-    ErrorOr<const Elf_Shdr *> Sec = Obj->getSection(Sym.second);
-    if (!error(Sec.getError())) {
-      ErrorOr<StringRef> SecName = Obj->getSectionName(*Sec);
-      if (SecName)
-        TargetName = SecName.get();
-    }
-  } else if (Sym.first) {
-    const Elf_Shdr *SymTable = Sym.first;
-    ErrorOr<const Elf_Shdr *> StrTableSec = Obj->getSection(SymTable->sh_link);
-    if (!error(StrTableSec.getError())) {
-      ErrorOr<StringRef> StrTableOrErr = Obj->getStringTable(*StrTableSec);
-      if (!error(StrTableOrErr.getError()))
-        TargetName = errorOrDefault(Sym.second->getName(*StrTableOrErr));
-    }
+  const Elf_Sym *Sym = Obj->getRelocationSymbol(&Rel, SymTab);
+  if (Sym && Sym->getType() == ELF::STT_SECTION) {
+    ErrorOr<const Elf_Shdr *> Sec = Obj->getSection(Sym, SymTab, ShndxTable);
+    error(Sec.getError());
+    ErrorOr<StringRef> SecName = Obj->getSectionName(*Sec);
+    if (SecName)
+      TargetName = SecName.get();
+  } else if (Sym) {
+    ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*SymTab);
+    error(StrTableOrErr.getError());
+    TargetName = errorOrDefault(Sym->getName(*StrTableOrErr));
   }
 
   if (opts::ExpandRelocs) {
@@ -782,28 +1242,39 @@ void ELFDumper<ELFT>::printRelocation(const Elf_Shdr *Sec,
   }
 }
 
+template<class ELFT>
+void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) {
+  const Elf_Shdr *Symtab = (IsDynamic) ? DotDynSymSec : DotSymtabSec;
+  if (!Symtab)
+    return;
+  ErrorOr<StringRef> StrTableOrErr = Obj->getStringTableForSymtab(*Symtab);
+  error(StrTableOrErr.getError());
+  StringRef StrTable = *StrTableOrErr;
+  for (const Elf_Sym &Sym : Obj->symbols(Symtab))
+    printSymbol(&Sym, Symtab, StrTable, IsDynamic);
+}
+
 template<class ELFT>
 void ELFDumper<ELFT>::printSymbols() {
   ListScope Group(W, "Symbols");
-  for (const typename ELFO::Elf_Sym &Sym : Obj->symbols())
-    printSymbol(&Sym, false);
+  printSymbolsHelper(false);
 }
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicSymbols() {
   ListScope Group(W, "DynamicSymbols");
-
-  for (const typename ELFO::Elf_Sym &Sym : Obj->dynamic_symbols())
-    printSymbol(&Sym, true);
+  printSymbolsHelper(true);
 }
 
 template <class ELFT>
-void ELFDumper<ELFT>::printSymbol(const typename ELFO::Elf_Sym *Symbol,
-                                  bool IsDynamic) {
+void ELFDumper<ELFT>::printSymbol(const Elf_Sym *Symbol, const Elf_Shdr *SymTab,
+                                  StringRef StrTable, bool IsDynamic) {
   unsigned SectionIndex = 0;
   StringRef SectionName;
-  getSectionNameIndex(*Obj, Symbol, SectionName, SectionIndex);
-  std::string FullSymbolName = getFullSymbolName(*Obj, Symbol, IsDynamic);
+  getSectionNameIndex(*Obj, Symbol, SymTab, ShndxTable, SectionName,
+                      SectionIndex);
+  std::string FullSymbolName = getFullSymbolName(Symbol, StrTable, IsDynamic);
+  unsigned char SymbolType = Symbol->getType();
 
   DictScope D(W, "Symbol");
   W.printNumber("Name", FullSymbolName, Symbol->st_name);
@@ -811,7 +1282,11 @@ void ELFDumper<ELFT>::printSymbol(const typename ELFO::Elf_Sym *Symbol,
   W.printNumber("Size", Symbol->st_size);
   W.printEnum  ("Binding", Symbol->getBinding(),
                   makeArrayRef(ElfSymbolBindings));
-  W.printEnum  ("Type", Symbol->getType(), makeArrayRef(ElfSymbolTypes));
+  if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+      SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
+    W.printEnum  ("Type", SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+  else
+    W.printEnum  ("Type", SymbolType, makeArrayRef(ElfSymbolTypes));
   W.printNumber("Other", Symbol->st_other);
   W.printHex("Section", SectionName, SectionIndex);
 }
@@ -855,12 +1330,15 @@ static const char *getTypeString(uint64_t Type) {
   LLVM_READOBJ_TYPE_CASE(SYMENT);
   LLVM_READOBJ_TYPE_CASE(SYMTAB);
   LLVM_READOBJ_TYPE_CASE(TEXTREL);
+  LLVM_READOBJ_TYPE_CASE(VERDEF);
+  LLVM_READOBJ_TYPE_CASE(VERDEFNUM);
   LLVM_READOBJ_TYPE_CASE(VERNEED);
   LLVM_READOBJ_TYPE_CASE(VERNEEDNUM);
   LLVM_READOBJ_TYPE_CASE(VERSYM);
   LLVM_READOBJ_TYPE_CASE(RELCOUNT);
   LLVM_READOBJ_TYPE_CASE(GNU_HASH);
   LLVM_READOBJ_TYPE_CASE(MIPS_RLD_VERSION);
+  LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP_REL);
   LLVM_READOBJ_TYPE_CASE(MIPS_FLAGS);
   LLVM_READOBJ_TYPE_CASE(MIPS_BASE_ADDRESS);
   LLVM_READOBJ_TYPE_CASE(MIPS_LOCAL_GOTNO);
@@ -956,8 +1434,15 @@ void printFlags(T Value, ArrayRef<EnumEntry<TFlag>> Flags, raw_ostream &OS) {
 }
 
 template <class ELFT>
-static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
-                       bool Is64, raw_ostream &OS) {
+StringRef ELFDumper<ELFT>::getDynamicString(uint64_t Value) const {
+  if (Value >= DynamicStringTable.size())
+    reportError("Invalid dynamic string table reference");
+  return StringRef(DynamicStringTable.data() + Value);
+}
+
+template <class ELFT>
+void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
+  raw_ostream &OS = W.getOStream();
   switch (Type) {
   case DT_PLTREL:
     if (Value == DT_REL) {
@@ -981,6 +1466,7 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
   case DT_FINI_ARRAY:
   case DT_PREINIT_ARRAY:
   case DT_DEBUG:
+  case DT_VERDEF:
   case DT_VERNEED:
   case DT_VERSYM:
   case DT_GNU_HASH:
@@ -988,11 +1474,13 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
   case DT_MIPS_BASE_ADDRESS:
   case DT_MIPS_GOTSYM:
   case DT_MIPS_RLD_MAP:
+  case DT_MIPS_RLD_MAP_REL:
   case DT_MIPS_PLTGOT:
   case DT_MIPS_OPTIONS:
     OS << format("0x%" PRIX64, Value);
     break;
   case DT_RELCOUNT:
+  case DT_VERDEFNUM:
   case DT_VERNEEDNUM:
   case DT_MIPS_RLD_VERSION:
   case DT_MIPS_LOCAL_GOTNO:
@@ -1013,14 +1501,14 @@ static void printValue(const ELFFile<ELFT> *O, uint64_t Type, uint64_t Value,
     OS << Value << " (bytes)";
     break;
   case DT_NEEDED:
-    OS << "SharedLibrary (" << O->getDynamicString(Value) << ")";
+    OS << "SharedLibrary (" << getDynamicString(Value) << ")";
     break;
   case DT_SONAME:
-    OS << "LibrarySoname (" << O->getDynamicString(Value) << ")";
+    OS << "LibrarySoname (" << getDynamicString(Value) << ")";
     break;
   case DT_RPATH:
   case DT_RUNPATH:
-    OS << O->getDynamicString(Value);
+    OS << getDynamicString(Value);
     break;
   case DT_MIPS_FLAGS:
     printFlags(Value, makeArrayRef(ElfDynamicDTMipsFlags), OS);
@@ -1046,7 +1534,8 @@ namespace {
 template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
   const unsigned Machine = Obj->getHeader()->e_machine;
   if (Machine == EM_ARM) {
-    ARM::EHABI::PrinterContext<ELFType<support::little, false>> Ctx(W, Obj);
+    ARM::EHABI::PrinterContext<ELFType<support::little, false>> Ctx(
+        W, Obj, DotSymtabSec);
     return Ctx.PrintUnwindInformation();
   }
   W.startLine() << "UnwindInfo not implemented.\n";
@@ -1055,9 +1544,20 @@ template <> void ELFDumper<ELFType<support::little, false>>::printUnwindInfo() {
 
 template<class ELFT>
 void ELFDumper<ELFT>::printDynamicTable() {
-  auto DynTable = Obj->dynamic_table(true);
+  auto I = dynamic_table_begin();
+  auto E = dynamic_table_end();
 
-  ptrdiff_t Total = std::distance(DynTable.begin(), DynTable.end());
+  if (I == E)
+    return;
+
+  --E;
+  while (I != E && E->getTag() == ELF::DT_NULL)
+    --E;
+  if (E->getTag() != ELF::DT_NULL)
+    ++E;
+  ++E;
+
+  ptrdiff_t Total = std::distance(I, E);
   if (Total == 0)
     return;
 
@@ -1069,12 +1569,13 @@ void ELFDumper<ELFT>::printDynamicTable() {
   W.startLine()
      << "  Tag" << (Is64 ? "                " : "        ") << "Type"
      << "                 " << "Name/Value\n";
-  for (const auto &Entry : DynTable) {
-    W.startLine()
-       << "  "
-       << format(Is64 ? "0x%016" PRIX64 : "0x%08" PRIX64, Entry.getTag())
-       << " " << format("%-21s", getTypeString(Entry.getTag()));
-    printValue(Obj, Entry.getTag(), Entry.getVal(), Is64, OS);
+  while (I != E) {
+    const Elf_Dyn &Entry = *I;
+    uintX_t Tag = Entry.getTag();
+    ++I;
+    W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, true) << " "
+                  << format("%-21s", getTypeString(Tag));
+    printValue(Tag, Entry.getVal());
     OS << "\n";
   }
 
@@ -1088,14 +1589,14 @@ void ELFDumper<ELFT>::printNeededLibraries() {
   typedef std::vector<StringRef> LibsTy;
   LibsTy Libs;
 
-  for (const auto &Entry : Obj->dynamic_table())
+  for (const auto &Entry : dynamic_table())
     if (Entry.d_tag == ELF::DT_NEEDED)
-      Libs.push_back(Obj->getDynamicString(Entry.d_un.d_val));
+      Libs.push_back(getDynamicString(Entry.d_un.d_val));
 
   std::stable_sort(Libs.begin(), Libs.end());
 
-  for (LibsTy::const_iterator I = Libs.begin(), E = Libs.end(); I != E; ++I) {
-    outs() << "  " << *I << "\n";
+  for (const auto &L : Libs) {
+    outs() << "  " << L << "\n";
   }
 }
 
@@ -1103,33 +1604,51 @@ template<class ELFT>
 void ELFDumper<ELFT>::printProgramHeaders() {
   ListScope L(W, "ProgramHeaders");
 
-  for (typename ELFO::Elf_Phdr_Iter PI = Obj->program_header_begin(),
-                                    PE = Obj->program_header_end();
-       PI != PE; ++PI) {
+  for (const Elf_Phdr &Phdr : Obj->program_headers()) {
     DictScope P(W, "ProgramHeader");
-    W.printHex   ("Type",
-                  getElfSegmentType(Obj->getHeader()->e_machine, PI->p_type),
-                  PI->p_type);
-    W.printHex   ("Offset", PI->p_offset);
-    W.printHex   ("VirtualAddress", PI->p_vaddr);
-    W.printHex   ("PhysicalAddress", PI->p_paddr);
-    W.printNumber("FileSize", PI->p_filesz);
-    W.printNumber("MemSize", PI->p_memsz);
-    W.printFlags ("Flags", PI->p_flags, makeArrayRef(ElfSegmentFlags));
-    W.printNumber("Alignment", PI->p_align);
+    W.printHex("Type",
+               getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
+               Phdr.p_type);
+    W.printHex("Offset", Phdr.p_offset);
+    W.printHex("VirtualAddress", Phdr.p_vaddr);
+    W.printHex("PhysicalAddress", Phdr.p_paddr);
+    W.printNumber("FileSize", Phdr.p_filesz);
+    W.printNumber("MemSize", Phdr.p_memsz);
+    W.printFlags("Flags", Phdr.p_flags, makeArrayRef(ElfSegmentFlags));
+    W.printNumber("Alignment", Phdr.p_align);
   }
 }
 
 template <typename ELFT>
 void ELFDumper<ELFT>::printHashTable() {
   DictScope D(W, "HashTable");
-  auto HT = Obj->getHashTable();
-  if (!HT)
+  if (!HashTable)
     return;
-  W.printNumber("Num Buckets", HT->nbucket);
-  W.printNumber("Num Chains", HT->nchain);
-  W.printList("Buckets", HT->buckets());
-  W.printList("Chains", HT->chains());
+  W.printNumber("Num Buckets", HashTable->nbucket);
+  W.printNumber("Num Chains", HashTable->nchain);
+  W.printList("Buckets", HashTable->buckets());
+  W.printList("Chains", HashTable->chains());
+}
+
+template <typename ELFT>
+void ELFDumper<ELFT>::printGnuHashTable() {
+  DictScope D(W, "GnuHashTable");
+  if (!GnuHashTable)
+    return;
+  W.printNumber("Num Buckets", GnuHashTable->nbuckets);
+  W.printNumber("First Hashed Symbol Index", GnuHashTable->symndx);
+  W.printNumber("Num Mask Words", GnuHashTable->maskwords);
+  W.printNumber("Shift Count", GnuHashTable->shift2);
+  W.printHexList("Bloom Filter", GnuHashTable->filter());
+  W.printList("Buckets", GnuHashTable->buckets());
+  if (!DotDynSymSec)
+    reportError("No dynamic symbol section");
+  W.printHexList("Values",
+                 GnuHashTable->values(DotDynSymSec->getEntityCount()));
+}
+
+template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
+  outs() << "LoadName: " << SOName << '\n';
 }
 
 template <class ELFT>
@@ -1171,21 +1690,23 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
 namespace {
 template <class ELFT> class MipsGOTParser {
 public:
-  typedef object::ELFFile<ELFT> ObjectFile;
-  typedef typename ObjectFile::Elf_Shdr Elf_Shdr;
-  typedef typename ObjectFile::Elf_Sym Elf_Sym;
+  typedef object::ELFFile<ELFT> ELFO;
+  typedef typename ELFO::Elf_Shdr Elf_Shdr;
+  typedef typename ELFO::Elf_Sym Elf_Sym;
+  typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
+  typedef typename ELFO::Elf_Addr GOTEntry;
+  typedef typename ELFO::Elf_Rel Elf_Rel;
+  typedef typename ELFO::Elf_Rela Elf_Rela;
 
-  MipsGOTParser(const ObjectFile *Obj, StreamWriter &W);
+  MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
+                Elf_Dyn_Range DynTable, StreamWriter &W);
 
   void parseGOT();
   void parsePLT();
 
 private:
-  typedef typename ObjectFile::Elf_Addr GOTEntry;
-  typedef typename ObjectFile::template ELFEntityIterator<const GOTEntry>
-  GOTIter;
-
-  const ObjectFile *Obj;
+  ELFDumper<ELFT> *Dumper;
+  const ELFO *Obj;
   StreamWriter &W;
   llvm::Optional<uint64_t> DtPltGot;
   llvm::Optional<uint64_t> DtLocalGotNum;
@@ -1194,22 +1715,26 @@ private:
   llvm::Optional<uint64_t> DtJmpRel;
 
   std::size_t getGOTTotal(ArrayRef<uint8_t> GOT) const;
-  GOTIter makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
+  const GOTEntry *makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum);
 
-  void printGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It);
-  void printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt, GOTIter It,
-                           const Elf_Sym *Sym, bool IsDynamic);
-  void printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt, GOTIter It,
-                     StringRef Purpose);
-  void printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt, GOTIter It,
+  void printGotEntry(uint64_t GotAddr, const GOTEntry *BeginIt,
+                     const GOTEntry *It);
+  void printGlobalGotEntry(uint64_t GotAddr, const GOTEntry *BeginIt,
+                           const GOTEntry *It, const Elf_Sym *Sym,
+                           StringRef StrTable, bool IsDynamic);
+  void printPLTEntry(uint64_t PLTAddr, const GOTEntry *BeginIt,
+                     const GOTEntry *It, StringRef Purpose);
+  void printPLTEntry(uint64_t PLTAddr, const GOTEntry *BeginIt,
+                     const GOTEntry *It, StringRef StrTable,
                      const Elf_Sym *Sym);
 };
 }
 
 template <class ELFT>
-MipsGOTParser<ELFT>::MipsGOTParser(const ObjectFile *Obj, StreamWriter &W)
-    : Obj(Obj), W(W) {
-  for (const auto &Entry : Obj->dynamic_table()) {
+MipsGOTParser<ELFT>::MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
+                                   Elf_Dyn_Range DynTable, StreamWriter &W)
+    : Dumper(Dumper), Obj(Obj), W(W) {
+  for (const auto &Entry : DynTable) {
     switch (Entry.getTag()) {
     case ELF::DT_PLTGOT:
       DtPltGot = Entry.getVal();
@@ -1264,8 +1789,11 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
     return;
   }
 
-  const Elf_Sym *DynSymBegin = Obj->dynamic_symbol_begin();
-  const Elf_Sym *DynSymEnd = Obj->dynamic_symbol_end();
+  const Elf_Shdr *DynSymSec = Dumper->getDotDynSymSec();
+  ErrorOr<StringRef> StrTable = Obj->getStringTableForSymtab(*DynSymSec);
+  error(StrTable.getError());
+  const Elf_Sym *DynSymBegin = Obj->symbol_begin(DynSymSec);
+  const Elf_Sym *DynSymEnd = Obj->symbol_end(DynSymSec);
   std::size_t DynSymTotal = std::size_t(std::distance(DynSymBegin, DynSymEnd));
 
   if (*DtGotSym > DynSymTotal) {
@@ -1280,9 +1808,9 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
     return;
   }
 
-  GOTIter GotBegin = makeGOTIter(*GOT, 0);
-  GOTIter GotLocalEnd = makeGOTIter(*GOT, *DtLocalGotNum);
-  GOTIter It = GotBegin;
+  const GOTEntry *GotBegin = makeGOTIter(*GOT, 0);
+  const GOTEntry *GotLocalEnd = makeGOTIter(*GOT, *DtLocalGotNum);
+  const GOTEntry *It = GotBegin;
 
   DictScope GS(W, "Primary GOT");
 
@@ -1312,11 +1840,13 @@ template <class ELFT> void MipsGOTParser<ELFT>::parseGOT() {
   {
     ListScope GS(W, "Global entries");
 
-    GOTIter GotGlobalEnd = makeGOTIter(*GOT, *DtLocalGotNum + GlobalGotNum);
+    const GOTEntry *GotGlobalEnd =
+        makeGOTIter(*GOT, *DtLocalGotNum + GlobalGotNum);
     const Elf_Sym *GotDynSym = DynSymBegin + *DtGotSym;
     for (; It != GotGlobalEnd; ++It) {
       DictScope D(W, "Entry");
-      printGlobalGotEntry(GOTShdr->sh_addr, GotBegin, It, GotDynSym++, true);
+      printGlobalGotEntry(GOTShdr->sh_addr, GotBegin, It, GotDynSym++,
+                          *StrTable, true);
     }
   }
 
@@ -1350,10 +1880,16 @@ template <class ELFT> void MipsGOTParser<ELFT>::parsePLT() {
     W.startLine() << "There is no .rel.plt section in the file.\n";
     return;
   }
+  ErrorOr<const Elf_Shdr *> SymTableOrErr =
+      Obj->getSection(PLTRelShdr->sh_link);
+  error(SymTableOrErr.getError());
+  const Elf_Shdr *SymTable = *SymTableOrErr;
+  ErrorOr<StringRef> StrTable = Obj->getStringTableForSymtab(*SymTable);
+  error(StrTable.getError());
 
-  GOTIter PLTBegin = makeGOTIter(*PLT, 0);
-  GOTIter PLTEnd = makeGOTIter(*PLT, getGOTTotal(*PLT));
-  GOTIter It = PLTBegin;
+  const GOTEntry *PLTBegin = makeGOTIter(*PLT, 0);
+  const GOTEntry *PLTEnd = makeGOTIter(*PLT, getGOTTotal(*PLT));
+  const GOTEntry *It = PLTBegin;
 
   DictScope GS(W, "PLT GOT");
   {
@@ -1367,21 +1903,19 @@ template <class ELFT> void MipsGOTParser<ELFT>::parsePLT() {
 
     switch (PLTRelShdr->sh_type) {
     case ELF::SHT_REL:
-      for (typename ObjectFile::Elf_Rel_Iter RI = Obj->rel_begin(PLTRelShdr),
-                                             RE = Obj->rel_end(PLTRelShdr);
+      for (const Elf_Rel *RI = Obj->rel_begin(PLTRelShdr),
+                         *RE = Obj->rel_end(PLTRelShdr);
            RI != RE && It != PLTEnd; ++RI, ++It) {
-        const Elf_Sym *Sym =
-            Obj->getRelocationSymbol(&*PLTRelShdr, &*RI).second;
-        printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, Sym);
+        const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+        printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, *StrTable, Sym);
       }
       break;
     case ELF::SHT_RELA:
-      for (typename ObjectFile::Elf_Rela_Iter RI = Obj->rela_begin(PLTRelShdr),
-                                              RE = Obj->rela_end(PLTRelShdr);
+      for (const Elf_Rela *RI = Obj->rela_begin(PLTRelShdr),
+                          *RE = Obj->rela_end(PLTRelShdr);
            RI != RE && It != PLTEnd; ++RI, ++It) {
-        const Elf_Sym *Sym =
-            Obj->getRelocationSymbol(&*PLTRelShdr, &*RI).second;
-        printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, Sym);
+        const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+        printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, *StrTable, Sym);
       }
       break;
     }
@@ -1394,15 +1928,16 @@ std::size_t MipsGOTParser<ELFT>::getGOTTotal(ArrayRef<uint8_t> GOT) const {
 }
 
 template <class ELFT>
-typename MipsGOTParser<ELFT>::GOTIter
+const typename MipsGOTParser<ELFT>::GOTEntry *
 MipsGOTParser<ELFT>::makeGOTIter(ArrayRef<uint8_t> GOT, std::size_t EntryNum) {
   const char *Data = reinterpret_cast<const char *>(GOT.data());
-  return GOTIter(sizeof(GOTEntry), Data + EntryNum * sizeof(GOTEntry));
+  return reinterpret_cast<const GOTEntry *>(Data + EntryNum * sizeof(GOTEntry));
 }
 
 template <class ELFT>
-void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
-                                        GOTIter It) {
+void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr,
+                                        const GOTEntry *BeginIt,
+                                        const GOTEntry *It) {
   int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
   W.printHex("Address", GotAddr + Offset);
   W.printNumber("Access", Offset - 0x7ff0);
@@ -1410,9 +1945,9 @@ void MipsGOTParser<ELFT>::printGotEntry(uint64_t GotAddr, GOTIter BeginIt,
 }
 
 template <class ELFT>
-void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
-                                              GOTIter It, const Elf_Sym *Sym,
-                                              bool IsDynamic) {
+void MipsGOTParser<ELFT>::printGlobalGotEntry(
+    uint64_t GotAddr, const GOTEntry *BeginIt, const GOTEntry *It,
+    const Elf_Sym *Sym, StringRef StrTable, bool IsDynamic) {
   printGotEntry(GotAddr, BeginIt, It);
 
   W.printHex("Value", Sym->st_value);
@@ -1420,16 +1955,19 @@ void MipsGOTParser<ELFT>::printGlobalGotEntry(uint64_t GotAddr, GOTIter BeginIt,
 
   unsigned SectionIndex = 0;
   StringRef SectionName;
-  getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+  getSectionNameIndex(*Obj, Sym, Dumper->getDotDynSymSec(),
+                      Dumper->getShndxTable(), SectionName, SectionIndex);
   W.printHex("Section", SectionName, SectionIndex);
 
-  std::string FullSymbolName = getFullSymbolName(*Obj, Sym, IsDynamic);
+  std::string FullSymbolName =
+      Dumper->getFullSymbolName(Sym, StrTable, IsDynamic);
   W.printNumber("Name", FullSymbolName, Sym->st_name);
 }
 
 template <class ELFT>
-void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
-                                        GOTIter It, StringRef Purpose) {
+void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr,
+                                        const GOTEntry *BeginIt,
+                                        const GOTEntry *It, StringRef Purpose) {
   DictScope D(W, "Entry");
   int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
   W.printHex("Address", PLTAddr + Offset);
@@ -1438,8 +1976,10 @@ void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
 }
 
 template <class ELFT>
-void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
-                                        GOTIter It, const Elf_Sym *Sym) {
+void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr,
+                                        const GOTEntry *BeginIt,
+                                        const GOTEntry *It, StringRef StrTable,
+                                        const Elf_Sym *Sym) {
   DictScope D(W, "Entry");
   int64_t Offset = std::distance(BeginIt, It) * sizeof(GOTEntry);
   W.printHex("Address", PLTAddr + Offset);
@@ -1449,10 +1989,11 @@ void MipsGOTParser<ELFT>::printPLTEntry(uint64_t PLTAddr, GOTIter BeginIt,
 
   unsigned SectionIndex = 0;
   StringRef SectionName;
-  getSectionNameIndex(*Obj, Sym, SectionName, SectionIndex);
+  getSectionNameIndex(*Obj, Sym, Dumper->getDotDynSymSec(),
+                      Dumper->getShndxTable(), SectionName, SectionIndex);
   W.printHex("Section", SectionName, SectionIndex);
 
-  std::string FullSymbolName = getFullSymbolName(*Obj, Sym, true);
+  std::string FullSymbolName = Dumper->getFullSymbolName(Sym, StrTable, true);
   W.printNumber("Name", FullSymbolName, Sym->st_name);
 }
 
@@ -1462,7 +2003,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsPLTGOT() {
     return;
   }
 
-  MipsGOTParser<ELFT> GOTParser(Obj, W);
+  MipsGOTParser<ELFT> GOTParser(this, Obj, dynamic_table(), W);
   GOTParser.parseGOT();
   GOTParser.parsePLT();
 }
@@ -1604,7 +2145,7 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsReginfo() {
 }
 
 template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
-  const typename ELFFile<ELFT>::Elf_Shdr *StackMapSection = nullptr;
+  const Elf_Shdr *StackMapSection = nullptr;
   for (const auto &Sec : Obj->sections()) {
     ErrorOr<StringRef> Name = Obj->getSectionName(&Sec);
     if (*Name == ".llvm_stackmaps") {
diff --git a/tools/llvm-readobj/MachODumper.cpp b/tools/llvm-readobj/MachODumper.cpp
index adb99b0acd7f..58d2c9fca47d 100644
--- a/tools/llvm-readobj/MachODumper.cpp
+++ b/tools/llvm-readobj/MachODumper.cpp
@@ -40,6 +40,14 @@ public:
   void printUnwindInfo() override;
   void printStackMap() const override;
 
+  // MachO-specific.
+  void printMachODataInCode() override;
+  void printMachOVersionMin() override;
+  void printMachODysymtab() override;
+  void printMachOSegment() override;
+  void printMachOIndirectSymbols() override;
+  void printMachOLinkerOptions () override;
+
 private:
   template<class MachHeader>
   void printFileHeaders(const MachHeader &Header);
@@ -255,6 +263,21 @@ namespace {
     uint32_t Flags;
     uint32_t Reserved1;
     uint32_t Reserved2;
+    uint32_t Reserved3;
+  };
+
+  struct MachOSegment {
+    std::string CmdName;
+    std::string SegName;
+    uint64_t cmdsize;
+    uint64_t vmaddr;
+    uint64_t vmsize;
+    uint64_t fileoff;
+    uint64_t filesize;
+    uint32_t maxprot;
+    uint32_t initprot;
+    uint32_t nsects;
+    uint32_t flags;
   };
 
   struct MachOSymbol {
@@ -266,6 +289,18 @@ namespace {
   };
 }
 
+static std::string getMask(uint32_t prot)
+{
+  // TODO (davide): This always assumes prot is valid.
+  // Catch mistakes and report if needed.
+  std::string Prot;
+  Prot = "";
+  Prot += (prot & MachO::VM_PROT_READ) ? "r" : "-";
+  Prot += (prot & MachO::VM_PROT_WRITE) ? "w" : "-";
+  Prot += (prot & MachO::VM_PROT_EXECUTE) ? "x" : "-";
+  return Prot;
+}
+
 static void getSection(const MachOObjectFile *Obj,
                        DataRefImpl Sec,
                        MachOSection &Section) {
@@ -292,8 +327,40 @@ static void getSection(const MachOObjectFile *Obj,
   Section.Flags       = Sect.flags;
   Section.Reserved1   = Sect.reserved1;
   Section.Reserved2   = Sect.reserved2;
+  Section.Reserved3   = Sect.reserved3;
 }
 
+static void getSegment(const MachOObjectFile *Obj,
+                       const MachOObjectFile::LoadCommandInfo &L,
+                       MachOSegment &Segment) {
+  if (!Obj->is64Bit()) {
+    MachO::segment_command SC = Obj->getSegmentLoadCommand(L);
+    Segment.CmdName = "LC_SEGMENT";
+    Segment.SegName = SC.segname;
+    Segment.cmdsize = SC.cmdsize;
+    Segment.vmaddr = SC.vmaddr;
+    Segment.vmsize = SC.vmsize;
+    Segment.fileoff = SC.fileoff;
+    Segment.filesize = SC.filesize;
+    Segment.maxprot = SC.maxprot;
+    Segment.initprot = SC.initprot;
+    Segment.nsects = SC.nsects;
+    Segment.flags = SC.flags;
+    return;
+  }
+  MachO::segment_command_64 SC = Obj->getSegment64LoadCommand(L);
+  Segment.CmdName = "LC_SEGMENT_64";
+  Segment.SegName = SC.segname;
+  Segment.cmdsize = SC.cmdsize;
+  Segment.vmaddr = SC.vmaddr;
+  Segment.vmsize = SC.vmsize;
+  Segment.fileoff = SC.fileoff;
+  Segment.filesize = SC.filesize;
+  Segment.maxprot = SC.maxprot;
+  Segment.initprot = SC.initprot;
+  Segment.nsects = SC.nsects;
+  Segment.flags = SC.flags;
+}
 
 static void getSymbol(const MachOObjectFile *Obj,
                       DataRefImpl DRI,
@@ -375,8 +442,7 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
     DataRefImpl DR = Section.getRawDataRefImpl();
 
     StringRef Name;
-    if (error(Section.getName(Name)))
-      Name = "";
+    error(Section.getName(Name));
 
     ArrayRef<char> RawName = Obj->getSectionRawName(DR);
     StringRef SegmentName = Obj->getSectionFinalSegmentName(DR);
@@ -398,6 +464,8 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
                  makeArrayRef(MachOSectionAttributes));
     W.printHex("Reserved1", MOSection.Reserved1);
     W.printHex("Reserved2", MOSection.Reserved2);
+    if (Obj->is64Bit())
+      W.printHex("Reserved3", MOSection.Reserved3);
 
     if (opts::SectionRelocations) {
       ListScope D(W, "Relocations");
@@ -419,8 +487,7 @@ void MachODumper::printSections(const MachOObjectFile *Obj) {
       bool IsBSS = Section.isBSS();
       if (!IsBSS) {
         StringRef Data;
-        if (error(Section.getContents(Data)))
-          break;
+        error(Section.getContents(Data));
 
         W.printBinaryBlock("SectionData", Data);
       }
@@ -434,8 +501,7 @@ void MachODumper::printRelocations() {
   std::error_code EC;
   for (const SectionRef &Section : Obj->sections()) {
     StringRef Name;
-    if (error(Section.getName(Name)))
-      continue;
+    error(Section.getName(Name));
 
     bool PrintedGroup = false;
     for (const RelocationRef &Reloc : Section.relocations()) {
@@ -475,15 +541,13 @@ void MachODumper::printRelocation(const MachOObjectFile *Obj,
     symbol_iterator Symbol = Reloc.getSymbol();
     if (Symbol != Obj->symbol_end()) {
       ErrorOr<StringRef> TargetNameOrErr = Symbol->getName();
-      if (error(TargetNameOrErr.getError()))
-        return;
+      error(TargetNameOrErr.getError());
       TargetName = *TargetNameOrErr;
     }
   } else if (!IsScattered) {
     section_iterator SecI = Obj->getRelocationSection(DR);
     if (SecI != Obj->section_end()) {
-      if (error(SecI->getName(TargetName)))
-        return;
+      error(SecI->getName(TargetName));
     }
   }
   if (TargetName.empty())
@@ -547,8 +611,10 @@ void MachODumper::printSymbol(const SymbolRef &Symbol) {
   getSymbol(Obj, Symbol.getRawDataRefImpl(), MOSymbol);
 
   StringRef SectionName = "";
-  section_iterator SecI(Obj->section_begin());
-  if (!error(Symbol.getSection(SecI)) && SecI != Obj->section_end())
+  ErrorOr<section_iterator> SecIOrErr = Symbol.getSection();
+  error(SecIOrErr.getError());
+  section_iterator SecI = *SecIOrErr;
+  if (SecI != Obj->section_end())
     error(SecI->getName(SectionName));
 
   DictScope D(W, "Symbol");
@@ -603,3 +669,153 @@ void MachODumper::printStackMap() const {
      prettyPrintStackMap(llvm::outs(),
                          StackMapV1Parser<support::big>(StackMapContentsArray));
 }
+
+void MachODumper::printMachODataInCode() {
+  for (const auto &Load : Obj->load_commands()) {
+    if (Load.C.cmd  == MachO::LC_DATA_IN_CODE) {
+      MachO::linkedit_data_command LLC = Obj->getLinkeditDataLoadCommand(Load);
+      DictScope Group(W, "DataInCode");
+      W.printNumber("Data offset", LLC.dataoff);
+      W.printNumber("Data size", LLC.datasize);
+      ListScope D(W, "Data entries");
+      unsigned NumRegions = LLC.datasize / sizeof(MachO::data_in_code_entry);
+      for (unsigned i = 0; i < NumRegions; ++i) {
+        MachO::data_in_code_entry DICE = Obj->getDataInCodeTableEntry(
+                                                              LLC.dataoff, i);
+        DictScope Group(W, "Entry");
+        W.printNumber("Index", i);
+        W.printNumber("Offset", DICE.offset);
+        W.printNumber("Length", DICE.length);
+        W.printNumber("Kind", DICE.kind);
+      }
+    }
+  }
+}
+
+void MachODumper::printMachOVersionMin() {
+  for (const auto &Load : Obj->load_commands()) {
+    StringRef Cmd;
+    switch (Load.C.cmd) {
+    case MachO::LC_VERSION_MIN_MACOSX:
+      Cmd = "LC_VERSION_MIN_MACOSX";
+      break;
+    case MachO::LC_VERSION_MIN_IPHONEOS:
+      Cmd = "LC_VERSION_MIN_IPHONEOS";
+      break;
+    case MachO::LC_VERSION_MIN_TVOS:
+      Cmd = "LC_VERSION_MIN_TVOS";
+      break;
+    case MachO::LC_VERSION_MIN_WATCHOS:
+      Cmd = "LC_VERSION_MIN_WATCHOS";
+      break;
+    default:
+      continue;
+    }
+
+    MachO::version_min_command VMC = Obj->getVersionMinLoadCommand(Load);
+    DictScope Group(W, "MinVersion");
+    W.printString("Cmd", Cmd);
+    W.printNumber("Size", VMC.cmdsize);
+    SmallString<32> Version;
+    Version = utostr(MachOObjectFile::getVersionMinMajor(VMC, false)) + "." +
+              utostr(MachOObjectFile::getVersionMinMinor(VMC, false));
+    uint32_t Update = MachOObjectFile::getVersionMinUpdate(VMC, false);
+    if (Update != 0)
+      Version += "." + utostr(MachOObjectFile::getVersionMinUpdate(VMC, false));
+    W.printString("Version", Version);
+    SmallString<32> SDK;
+    if (VMC.sdk == 0)
+      SDK = "n/a";
+    else {
+      SDK = utostr(MachOObjectFile::getVersionMinMajor(VMC, true)) + "." +
+            utostr(MachOObjectFile::getVersionMinMinor(VMC, true));
+      uint32_t Update = MachOObjectFile::getVersionMinUpdate(VMC, true);
+      if (Update != 0)
+        SDK += "." + utostr(MachOObjectFile::getVersionMinUpdate(VMC, true));
+    }
+    W.printString("SDK", SDK);
+  }
+}
+
+void MachODumper::printMachODysymtab() {
+  for (const auto &Load : Obj->load_commands()) {
+    if (Load.C.cmd == MachO::LC_DYSYMTAB) {
+      MachO::dysymtab_command DLC = Obj->getDysymtabLoadCommand();
+      DictScope Group(W, "Dysymtab");
+      W.printNumber("ilocalsym", DLC.ilocalsym);
+      W.printNumber("nlocalsym", DLC.nlocalsym);
+      W.printNumber("iextdefsym", DLC.iextdefsym);
+      W.printNumber("nextdefsym", DLC.nextdefsym);
+      W.printNumber("iundefsym", DLC.iundefsym);
+      W.printNumber("nundefsym", DLC.nundefsym);
+      W.printNumber("tocoff", DLC.tocoff);
+      W.printNumber("ntoc", DLC.ntoc);
+      W.printNumber("modtaboff", DLC.modtaboff);
+      W.printNumber("nmodtab", DLC.nmodtab);
+      W.printNumber("extrefsymoff", DLC.extrefsymoff);
+      W.printNumber("nextrefsyms", DLC.nextrefsyms);
+      W.printNumber("indirectsymoff", DLC.indirectsymoff);
+      W.printNumber("nindirectsyms", DLC.nindirectsyms);
+      W.printNumber("extreloff", DLC.extreloff);
+      W.printNumber("nextrel", DLC.nextrel);
+      W.printNumber("locreloff", DLC.locreloff);
+      W.printNumber("nlocrel", DLC.nlocrel);
+    }
+  }
+}
+
+void MachODumper::printMachOSegment() {
+  for (const auto &Load : Obj->load_commands()) {
+    if (Load.C.cmd == MachO::LC_SEGMENT || Load.C.cmd == MachO::LC_SEGMENT_64) {
+      MachOSegment MOSegment;
+      getSegment(Obj, Load, MOSegment);
+      DictScope Group(W, "Segment");
+      W.printString("Cmd", MOSegment.CmdName);
+      W.printString("Name", MOSegment.SegName);
+      W.printNumber("Size", MOSegment.cmdsize);
+      W.printHex("vmaddr", MOSegment.vmaddr);
+      W.printHex("vmsize", MOSegment.vmsize);
+      W.printNumber("fileoff", MOSegment.fileoff);
+      W.printNumber("filesize", MOSegment.filesize);
+      W.printString("maxprot", getMask(MOSegment.maxprot));
+      W.printString("initprot", getMask(MOSegment.initprot));
+      W.printNumber("nsects", MOSegment.nsects);
+      W.printHex("flags", MOSegment.flags);
+    }
+  }
+}
+
+void MachODumper::printMachOIndirectSymbols() {
+  for (const auto &Load : Obj->load_commands()) {
+    if (Load.C.cmd == MachO::LC_DYSYMTAB) {
+      MachO::dysymtab_command DLC = Obj->getDysymtabLoadCommand();
+      DictScope Group(W, "Indirect Symbols");
+      W.printNumber("Number", DLC.nindirectsyms);
+      ListScope D(W, "Symbols");
+      for (unsigned i = 0; i < DLC.nindirectsyms; ++i) {
+        DictScope Group(W, "Entry");
+        W.printNumber("Entry Index", i);
+        W.printHex("Symbol Index", Obj->getIndirectSymbolTableEntry(DLC, i));
+      }
+    }
+  }
+}
+
+void MachODumper::printMachOLinkerOptions() {
+  for (const auto &Load : Obj->load_commands()) {
+    if (Load.C.cmd == MachO::LC_LINKER_OPTION) {
+      MachO::linker_option_command LOLC = Obj->getLinkerOptionLoadCommand(Load);
+      DictScope Group(W, "Linker Options");
+      W.printNumber("Size", LOLC.cmdsize);
+      ListScope D(W, "Strings");
+      uint64_t DataSize = LOLC.cmdsize - sizeof(MachO::linker_option_command);
+      const char *P = Load.Ptr + sizeof(MachO::linker_option_command);
+      StringRef Data(P, DataSize);
+      for (unsigned i = 0; i < LOLC.count; ++i) {
+        std::pair<StringRef,StringRef> Split = Data.split('\0');
+        W.printString("Value", Split.first);
+        Data = Split.second;
+      }
+    }
+  }
+}
diff --git a/tools/llvm-readobj/ObjDumper.h b/tools/llvm-readobj/ObjDumper.h
index 5ecf0ec3d6fa..db26d6983552 100644
--- a/tools/llvm-readobj/ObjDumper.h
+++ b/tools/llvm-readobj/ObjDumper.h
@@ -1,4 +1,4 @@
-//===-- ObjDumper.h -------------------------------------------------------===//
+//===-- ObjDumper.h ---------------------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,7 +15,8 @@
 
 namespace llvm {
 namespace object {
-  class ObjectFile;
+class COFFImportFile;
+class ObjectFile;
 }
 
 class StreamWriter;
@@ -38,6 +39,9 @@ public:
   virtual void printNeededLibraries() { }
   virtual void printProgramHeaders() { }
   virtual void printHashTable() { }
+  virtual void printGnuHashTable() { }
+  virtual void printLoadName() {}
+  virtual void printVersionInfo() {}
 
   // Only implemented for ARM ELF at this time.
   virtual void printAttributes() { }
@@ -52,6 +56,15 @@ public:
   virtual void printCOFFExports() { }
   virtual void printCOFFDirectives() { }
   virtual void printCOFFBaseReloc() { }
+  virtual void printCodeViewDebugInfo() { }
+
+  // Only implemented for MachO.
+  virtual void printMachODataInCode() { }
+  virtual void printMachOVersionMin() { }
+  virtual void printMachODysymtab() { }
+  virtual void printMachOSegment() { }
+  virtual void printMachOIndirectSymbols() { }
+  virtual void printMachOLinkerOptions() { }
 
   virtual void printStackMap() const = 0;
 
@@ -71,6 +84,8 @@ std::error_code createMachODumper(const object::ObjectFile *Obj,
                                   StreamWriter &Writer,
                                   std::unique_ptr<ObjDumper> &Result);
 
+void dumpCOFFImportFile(const object::COFFImportFile *File);
+
 } // namespace llvm
 
 #endif
diff --git a/tools/llvm-readobj/StreamWriter.h b/tools/llvm-readobj/StreamWriter.h
index f3cc57ef940e..d2dbb07af256 100644
--- a/tools/llvm-readobj/StreamWriter.h
+++ b/tools/llvm-readobj/StreamWriter.h
@@ -34,14 +34,17 @@ struct HexNumber {
   // unsigned type. The overloads are here so that every type that is implicitly
   // convertible to an integer (including enums and endian helpers) can be used
   // without requiring type traits or call-site changes.
-  HexNumber(int8_t   Value) : Value(static_cast<uint8_t >(Value)) { }
-  HexNumber(int16_t  Value) : Value(static_cast<uint16_t>(Value)) { }
-  HexNumber(int32_t  Value) : Value(static_cast<uint32_t>(Value)) { }
-  HexNumber(int64_t  Value) : Value(static_cast<uint64_t>(Value)) { }
-  HexNumber(uint8_t  Value) : Value(Value) { }
-  HexNumber(uint16_t Value) : Value(Value) { }
-  HexNumber(uint32_t Value) : Value(Value) { }
-  HexNumber(uint64_t Value) : Value(Value) { }
+  HexNumber(char             Value) : Value(static_cast<unsigned char>(Value)) { }
+  HexNumber(signed char      Value) : Value(static_cast<unsigned char>(Value)) { }
+  HexNumber(signed short     Value) : Value(static_cast<unsigned short>(Value)) { }
+  HexNumber(signed int       Value) : Value(static_cast<unsigned int>(Value)) { }
+  HexNumber(signed long      Value) : Value(static_cast<unsigned long>(Value)) { }
+  HexNumber(signed long long Value) : Value(static_cast<unsigned long long>(Value)) { }
+  HexNumber(unsigned char      Value) : Value(Value) { }
+  HexNumber(unsigned short     Value) : Value(Value) { }
+  HexNumber(unsigned int       Value) : Value(Value) { }
+  HexNumber(unsigned long      Value) : Value(Value) { }
+  HexNumber(unsigned long long Value) : Value(Value) { }
   uint64_t Value;
 };
 
@@ -194,6 +197,19 @@ public:
     OS << "]\n";
   }
 
+  template <typename T>
+  void printHexList(StringRef Label, const T &List) {
+    startLine() << Label << ": [";
+    bool Comma = false;
+    for (const auto &Item : List) {
+      if (Comma)
+        OS << ", ";
+      OS << hex(Item);
+      Comma = true;
+    }
+    OS << "]\n";
+  }
+
   template<typename T>
   void printHex(StringRef Label, T Value) {
     startLine() << Label << ": " << hex(Value) << "\n";
diff --git a/tools/llvm-readobj/Win64EHDumper.cpp b/tools/llvm-readobj/Win64EHDumper.cpp
index f57eea20e2d9..2da5ae3200fd 100644
--- a/tools/llvm-readobj/Win64EHDumper.cpp
+++ b/tools/llvm-readobj/Win64EHDumper.cpp
@@ -149,11 +149,8 @@ static std::error_code resolveRelocation(const Dumper::Context &Ctx,
     return EC;
   ResolvedAddress = *ResolvedAddressOrErr;
 
-  section_iterator SI = Ctx.COFF.section_begin();
-  if (std::error_code EC = Symbol.getSection(SI))
-    return EC;
-
-  ResolvedSection = Ctx.COFF.getCOFFSection(*SI);
+  ErrorOr<section_iterator> SI = Symbol.getSection();
+  ResolvedSection = Ctx.COFF.getCOFFSection(**SI);
   return std::error_code();
 }
 
@@ -257,7 +254,7 @@ void Dumper::printUnwindInfo(const Context &Ctx, const coff_section *Section,
         return;
       }
 
-      printUnwindCode(UI, ArrayRef<UnwindCode>(UCI, UCE));
+      printUnwindCode(UI, makeArrayRef(UCI, UCE));
       UCI = UCI + UsedSlots - 1;
     }
   }
@@ -284,11 +281,11 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
 
   const coff_section *XData;
   uint64_t Offset;
-  if (error(resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset)))
-    return;
+  resolveRelocation(Ctx, Section, SectionOffset + 8, XData, Offset);
 
   ArrayRef<uint8_t> Contents;
-  if (error(Ctx.COFF.getSectionContents(XData, Contents)) || Contents.empty())
+  error(Ctx.COFF.getSectionContents(XData, Contents));
+  if (Contents.empty())
     return;
 
   Offset = Offset + RF.UnwindInfoOffset;
@@ -302,15 +299,15 @@ void Dumper::printRuntimeFunction(const Context &Ctx,
 void Dumper::printData(const Context &Ctx) {
   for (const auto &Section : Ctx.COFF.sections()) {
     StringRef Name;
-    if (error(Section.getName(Name)))
-      continue;
+    Section.getName(Name);
 
     if (Name != ".pdata" && !Name.startswith(".pdata$"))
       continue;
 
     const coff_section *PData = Ctx.COFF.getCOFFSection(Section);
     ArrayRef<uint8_t> Contents;
-    if (error(Ctx.COFF.getSectionContents(PData, Contents)) || Contents.empty())
+    error(Ctx.COFF.getSectionContents(PData, Contents));
+    if (Contents.empty())
       continue;
 
     const RuntimeFunction *Entries =
diff --git a/tools/llvm-readobj/llvm-readobj.cpp b/tools/llvm-readobj/llvm-readobj.cpp
index 12afacb0a858..fa8fee2b03ab 100644
--- a/tools/llvm-readobj/llvm-readobj.cpp
+++ b/tools/llvm-readobj/llvm-readobj.cpp
@@ -24,6 +24,7 @@
 #include "ObjDumper.h"
 #include "StreamWriter.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachOUniversal.h"
 #include "llvm/Object/ObjectFile.h"
@@ -131,6 +132,10 @@ namespace opts {
   cl::opt<bool> HashTable("hash-table",
     cl::desc("Display ELF hash table"));
 
+  // -gnu-hash-table
+  cl::opt<bool> GnuHashTable("gnu-hash-table",
+    cl::desc("Display ELF .gnu.hash section"));
+
   // -expand-relocs
   cl::opt<bool> ExpandRelocs("expand-relocs",
     cl::desc("Expand each shown relocation to multiple lines"));
@@ -181,25 +186,62 @@ namespace opts {
   COFFBaseRelocs("coff-basereloc",
                  cl::desc("Display the PE/COFF .reloc section"));
 
+  // -macho-data-in-code
+  cl::opt<bool>
+  MachODataInCode("macho-data-in-code",
+                  cl::desc("Display MachO Data in Code command"));
+
+  // -macho-indirect-symbols
+  cl::opt<bool>
+  MachOIndirectSymbols("macho-indirect-symbols",
+                  cl::desc("Display MachO indirect symbols"));
+
+  // -macho-linker-options
+  cl::opt<bool>
+  MachOLinkerOptions("macho-linker-options",
+                  cl::desc("Display MachO linker options"));
+
+  // -macho-segment
+  cl::opt<bool>
+  MachOSegment("macho-segment",
+                  cl::desc("Display MachO Segment command"));
+
+  // -macho-version-min
+  cl::opt<bool>
+  MachOVersionMin("macho-version-min",
+                  cl::desc("Display MachO version min command"));
+
+  // -macho-dysymtab
+  cl::opt<bool>
+  MachODysymtab("macho-dysymtab",
+                  cl::desc("Display MachO Dysymtab command"));
+
   // -stackmap
   cl::opt<bool>
   PrintStackMap("stackmap",
                 cl::desc("Display contents of stackmap section"));
 
+  // -version-info
+  cl::opt<bool>
+      VersionInfo("version-info",
+                  cl::desc("Display ELF version sections (if present)"));
+  cl::alias VersionInfoShort("V", cl::desc("Alias for -version-info"),
+                             cl::aliasopt(VersionInfo));
 } // namespace opts
 
-static int ReturnValue = EXIT_SUCCESS;
-
 namespace llvm {
 
-bool error(std::error_code EC) {
-  if (!EC)
-    return false;
+LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
+  errs() << "\nError reading file: " << Msg << ".\n";
+  errs().flush();
+  exit(1);
+}
 
-  ReturnValue = EXIT_FAILURE;
-  outs() << "\nError reading file: " << EC.message() << ".\n";
-  outs().flush();
-  return true;
+void error(std::error_code EC) {
+  if (!EC)
+    return;
+
+  reportError(EC.message());
 }
 
 bool relocAddressLess(RelocationRef a, RelocationRef b) {
@@ -212,17 +254,14 @@ static void reportError(StringRef Input, std::error_code EC) {
   if (Input == "-")
     Input = "<stdin>";
 
-  errs() << Input << ": " << EC.message() << "\n";
-  errs().flush();
-  ReturnValue = EXIT_FAILURE;
+  reportError(Twine(Input) + ": " + EC.message());
 }
 
 static void reportError(StringRef Input, StringRef Message) {
   if (Input == "-")
     Input = "<stdin>";
 
-  errs() << Input << ": " << Message << "\n";
-  ReturnValue = EXIT_FAILURE;
+  reportError(Twine(Input) + ": " + Message);
 }
 
 static bool isMipsArch(unsigned Arch) {
@@ -253,26 +292,12 @@ static std::error_code createDumper(const ObjectFile *Obj, StreamWriter &Writer,
   return readobj_error::unsupported_obj_file_format;
 }
 
-static StringRef getLoadName(const ObjectFile *Obj) {
-  if (auto *ELF = dyn_cast<ELF32LEObjectFile>(Obj))
-    return ELF->getLoadName();
-  if (auto *ELF = dyn_cast<ELF64LEObjectFile>(Obj))
-    return ELF->getLoadName();
-  if (auto *ELF = dyn_cast<ELF32BEObjectFile>(Obj))
-    return ELF->getLoadName();
-  if (auto *ELF = dyn_cast<ELF64BEObjectFile>(Obj))
-    return ELF->getLoadName();
-  llvm_unreachable("Not ELF");
-}
-
 /// @brief Dumps the specified object file.
 static void dumpObject(const ObjectFile *Obj) {
   StreamWriter Writer(outs());
   std::unique_ptr<ObjDumper> Dumper;
-  if (std::error_code EC = createDumper(Obj, Writer, Dumper)) {
+  if (std::error_code EC = createDumper(Obj, Writer, Dumper))
     reportError(Obj->getFileName(), EC);
-    return;
-  }
 
   outs() << '\n';
   outs() << "File: " << Obj->getFileName() << "\n";
@@ -281,8 +306,7 @@ static void dumpObject(const ObjectFile *Obj) {
          << Triple::getArchTypeName((llvm::Triple::ArchType)Obj->getArch())
          << "\n";
   outs() << "AddressSize: " << (8*Obj->getBytesInAddress()) << "bit\n";
-  if (Obj->isELF())
-    outs() << "LoadName: " << getLoadName(Obj) << "\n";
+  Dumper->printLoadName();
 
   if (opts::FileHeaders)
     Dumper->printFileHeaders();
@@ -306,6 +330,10 @@ static void dumpObject(const ObjectFile *Obj) {
     Dumper->printProgramHeaders();
   if (opts::HashTable)
     Dumper->printHashTable();
+  if (opts::GnuHashTable)
+    Dumper->printGnuHashTable();
+  if (opts::VersionInfo)
+    Dumper->printVersionInfo();
   if (Obj->getArch() == llvm::Triple::arm && Obj->isELF())
     if (opts::ARMAttributes)
       Dumper->printAttributes();
@@ -317,25 +345,43 @@ static void dumpObject(const ObjectFile *Obj) {
     if (opts::MipsReginfo)
       Dumper->printMipsReginfo();
   }
-  if (opts::COFFImports)
-    Dumper->printCOFFImports();
-  if (opts::COFFExports)
-    Dumper->printCOFFExports();
-  if (opts::COFFDirectives)
-    Dumper->printCOFFDirectives();
-  if (opts::COFFBaseRelocs)
-    Dumper->printCOFFBaseReloc();
-
+  if (Obj->isCOFF()) {
+    if (opts::COFFImports)
+      Dumper->printCOFFImports();
+    if (opts::COFFExports)
+      Dumper->printCOFFExports();
+    if (opts::COFFDirectives)
+      Dumper->printCOFFDirectives();
+    if (opts::COFFBaseRelocs)
+      Dumper->printCOFFBaseReloc();
+    if (opts::CodeView)
+      Dumper->printCodeViewDebugInfo();
+  }
+  if (Obj->isMachO()) {
+    if (opts::MachODataInCode)
+      Dumper->printMachODataInCode();
+    if (opts::MachOIndirectSymbols)
+      Dumper->printMachOIndirectSymbols();
+    if (opts::MachOLinkerOptions)
+      Dumper->printMachOLinkerOptions();
+    if (opts::MachOSegment)
+      Dumper->printMachOSegment();
+    if (opts::MachOVersionMin)
+      Dumper->printMachOVersionMin();
+    if (opts::MachODysymtab)
+      Dumper->printMachODysymtab();
+  }
   if (opts::PrintStackMap)
     Dumper->printStackMap();
 }
 
 /// @brief Dumps each object file in \a Arc;
 static void dumpArchive(const Archive *Arc) {
-  for (Archive::child_iterator ArcI = Arc->child_begin(),
-                               ArcE = Arc->child_end();
-                               ArcI != ArcE; ++ArcI) {
-    ErrorOr<std::unique_ptr<Binary>> ChildOrErr = ArcI->getAsBinary();
+  for (auto &ErrorOrChild : Arc->children()) {
+    if (std::error_code EC = ErrorOrChild.getError())
+      reportError(Arc->getFileName(), EC.message());
+    const auto &Child = *ErrorOrChild;
+    ErrorOr<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (std::error_code EC = ChildOrErr.getError()) {
       // Ignore non-object files.
       if (EC != object_error::invalid_file_type)
@@ -365,18 +411,11 @@ static void dumpMachOUniversalBinary(const MachOUniversalBinary *UBinary) {
 
 /// @brief Opens \a File and dumps it.
 static void dumpInput(StringRef File) {
-  // If file isn't stdin, check that it exists.
-  if (File != "-" && !sys::fs::exists(File)) {
-    reportError(File, readobj_error::file_not_found);
-    return;
-  }
 
   // Attempt to open the binary.
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
-  if (std::error_code EC = BinaryOrErr.getError()) {
+  if (std::error_code EC = BinaryOrErr.getError())
     reportError(File, EC);
-    return;
-  }
   Binary &Binary = *BinaryOrErr.get().getBinary();
 
   if (Archive *Arc = dyn_cast<Archive>(&Binary))
@@ -386,6 +425,8 @@ static void dumpInput(StringRef File) {
     dumpMachOUniversalBinary(UBinary);
   else if (ObjectFile *Obj = dyn_cast<ObjectFile>(&Binary))
     dumpObject(Obj);
+  else if (COFFImportFile *Import = dyn_cast<COFFImportFile>(&Binary))
+    dumpCOFFImportFile(Import);
   else
     reportError(File, readobj_error::unrecognized_file_format);
 }
@@ -407,5 +448,5 @@ int main(int argc, const char *argv[]) {
   std::for_each(opts::InputFilenames.begin(), opts::InputFilenames.end(),
                 dumpInput);
 
-  return ReturnValue;
+  return 0;
 }
diff --git a/tools/llvm-readobj/llvm-readobj.h b/tools/llvm-readobj/llvm-readobj.h
index 74b9a60d34b2..5a103920c165 100644
--- a/tools/llvm-readobj/llvm-readobj.h
+++ b/tools/llvm-readobj/llvm-readobj.h
@@ -11,6 +11,7 @@
 #define LLVM_TOOLS_LLVM_READOBJ_LLVM_READOBJ_H
 
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include <string>
 
 namespace llvm {
@@ -19,7 +20,8 @@ namespace llvm {
   }
 
   // Various helper functions.
-  bool error(std::error_code ec);
+  LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
+  void error(std::error_code ec);
   bool relocAddressLess(object::RelocationRef A,
                         object::RelocationRef B);
 } // namespace llvm
diff --git a/tools/llvm-rtdyld/llvm-rtdyld.cpp b/tools/llvm-rtdyld/llvm-rtdyld.cpp
index 86f66f89b159..6ee3a44b63bf 100644
--- a/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -93,6 +93,11 @@ CheckFiles("check",
            cl::desc("File containing RuntimeDyld verifier checks."),
            cl::ZeroOrMore);
 
+static cl::opt<uint64_t>
+PreallocMemory("preallocate",
+              cl::desc("Allocate memory upfront rather than on-demand"),
+              cl::init(0));
+
 static cl::opt<uint64_t>
 TargetAddrStart("target-addr-start",
                 cl::desc("For -verify only: start of phony target address "
@@ -127,6 +132,12 @@ DummySymbolMappings("dummy-extern",
                     cl::ZeroOrMore,
                     cl::Hidden);
 
+static cl::opt<bool>
+PrintAllocationRequests("print-alloc-requests",
+                        cl::desc("Print allocation requests made to the memory "
+                                 "manager by RuntimeDyld"),
+                        cl::Hidden);
+
 /* *** */
 
 // A trivial memory manager that doesn't do anything fancy, just uses the
@@ -150,12 +161,6 @@ public:
 
   bool finalizeMemory(std::string *ErrMsg) override { return false; }
 
-  // Invalidate instruction cache for sections with execute permissions.
-  // Some platforms with separate data cache and instruction cache require
-  // explicit cache flush, otherwise JIT code manipulations (like resolved
-  // relocations) will get to the data cache but not to the instruction cache.
-  virtual void invalidateInstructionCache();
-
   void addDummySymbol(const std::string &Name, uint64_t Addr) {
     DummyExterns[Name] = Addr;
   }
@@ -173,15 +178,56 @@ public:
                         size_t Size) override {}
   void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                           size_t Size) override {}
+
+  void preallocateSlab(uint64_t Size) {
+    std::string Err;
+    sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+    if (!MB.base())
+      report_fatal_error("Can't allocate enough memory: " + Err);
+
+    PreallocSlab = MB;
+    UsePreallocation = true;
+    SlabSize = Size;
+  }
+
+  uint8_t *allocateFromSlab(uintptr_t Size, unsigned Alignment, bool isCode) {
+    Size = RoundUpToAlignment(Size, Alignment);
+    if (CurrentSlabOffset + Size > SlabSize)
+      report_fatal_error("Can't allocate enough memory. Tune --preallocate");
+
+    uintptr_t OldSlabOffset = CurrentSlabOffset;
+    sys::MemoryBlock MB((void *)OldSlabOffset, Size);
+    if (isCode)
+      FunctionMemory.push_back(MB);
+    else
+      DataMemory.push_back(MB);
+    CurrentSlabOffset += Size;
+    return (uint8_t*)OldSlabOffset;
+  }
+
 private:
   std::map<std::string, uint64_t> DummyExterns;
+  sys::MemoryBlock PreallocSlab;
+  bool UsePreallocation = false;
+  uintptr_t SlabSize = 0;
+  uintptr_t CurrentSlabOffset = 0;
 };
 
 uint8_t *TrivialMemoryManager::allocateCodeSection(uintptr_t Size,
                                                    unsigned Alignment,
                                                    unsigned SectionID,
                                                    StringRef SectionName) {
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, nullptr);
+  if (PrintAllocationRequests)
+    outs() << "allocateCodeSection(Size = " << Size << ", Alignment = "
+           << Alignment << ", SectionName = " << SectionName << ")\n";
+
+  if (UsePreallocation)
+    return allocateFromSlab(Size, Alignment, true /* isCode */);
+
+  std::string Err;
+  sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+  if (!MB.base())
+    report_fatal_error("MemoryManager allocation failed: " + Err);
   FunctionMemory.push_back(MB);
   return (uint8_t*)MB.base();
 }
@@ -191,41 +237,35 @@ uint8_t *TrivialMemoryManager::allocateDataSection(uintptr_t Size,
                                                    unsigned SectionID,
                                                    StringRef SectionName,
                                                    bool IsReadOnly) {
-  sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, nullptr);
+  if (PrintAllocationRequests)
+    outs() << "allocateDataSection(Size = " << Size << ", Alignment = "
+           << Alignment << ", SectionName = " << SectionName << ")\n";
+
+  if (UsePreallocation)
+    return allocateFromSlab(Size, Alignment, false /* isCode */);
+
+  std::string Err;
+  sys::MemoryBlock MB = sys::Memory::AllocateRWX(Size, nullptr, &Err);
+  if (!MB.base())
+    report_fatal_error("MemoryManager allocation failed: " + Err);
   DataMemory.push_back(MB);
   return (uint8_t*)MB.base();
 }
 
-void TrivialMemoryManager::invalidateInstructionCache() {
-  for (int i = 0, e = FunctionMemory.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(FunctionMemory[i].base(),
-                                            FunctionMemory[i].size());
-
-  for (int i = 0, e = DataMemory.size(); i != e; ++i)
-    sys::Memory::InvalidateInstructionCache(DataMemory[i].base(),
-                                            DataMemory[i].size());
-}
-
 static const char *ProgramName;
 
-static void Message(const char *Type, const Twine &Msg) {
-  errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
-}
-
 static int Error(const Twine &Msg) {
-  Message("error", Msg);
+  errs() << ProgramName << ": error: " << Msg << "\n";
   return 1;
 }
 
 static void loadDylibs() {
   for (const std::string &Dylib : Dylibs) {
-    if (sys::fs::is_regular_file(Dylib)) {
-      std::string ErrMsg;
-      if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
-        llvm::errs() << "Error loading '" << Dylib << "': "
-                     << ErrMsg << "\n";
-    } else
-      llvm::errs() << "Dylib not found: '" << Dylib << "'.\n";
+    if (!sys::fs::is_regular_file(Dylib))
+      report_fatal_error("Dylib not found: '" + Dylib + "'.");
+    std::string ErrMsg;
+    if (sys::DynamicLibrary::LoadLibraryPermanently(Dylib.c_str(), &ErrMsg))
+      report_fatal_error("Error loading '" + Dylib + "': " + ErrMsg);
   }
 }
 
@@ -240,7 +280,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
     InputFileList.push_back("-");
-  for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+  for (auto &File : InputFileList) {
     // Instantiate a dynamic linker.
     TrivialMemoryManager MemMgr;
     RuntimeDyld Dyld(MemMgr, MemMgr);
@@ -248,7 +288,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
     // Load the input memory buffer.
 
     ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
-        MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+        MemoryBuffer::getFileOrSTDIN(File);
     if (std::error_code EC = InputBuffer.getError())
       return Error("unable to read input: '" + EC.message() + "'");
 
@@ -277,6 +317,7 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
       if (UseDebugObj) {
         DebugObj = LoadedObjInfo->getObjectForDebug(Obj);
         SymbolObj = DebugObj.getBinary();
+        LoadedObjInfo.reset();
       }
     }
 
@@ -303,12 +344,11 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
         // symbol in memory (rather than that in the unrelocated object file)
         // and use that to query the DWARFContext.
         if (!UseDebugObj && LoadObjects) {
-          object::section_iterator Sec(SymbolObj->section_end());
-          Sym.getSection(Sec);
+          object::section_iterator Sec = *Sym.getSection();
           StringRef SecName;
           Sec->getName(SecName);
           uint64_t SectionLoadAddress =
-            LoadedObjInfo->getSectionLoadAddress(SecName);
+            LoadedObjInfo->getSectionLoadAddress(*Sec);
           if (SectionLoadAddress != 0)
             Addr += SectionLoadAddress - Sec->getAddress();
         }
@@ -317,11 +357,9 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
                << ", Addr = " << Addr << "\n";
 
         DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size);
-        DILineInfoTable::iterator  Begin = Lines.begin();
-        DILineInfoTable::iterator  End = Lines.end();
-        for (DILineInfoTable::iterator It = Begin; It != End; ++It) {
-          outs() << "  Line info @ " << It->first - Addr << ": "
-                 << It->second.FileName << ", line:" << It->second.Line << "\n";
+        for (auto &D : Lines) {
+          outs() << "  Line info @ " << D.first - Addr << ": "
+                 << D.second.FileName << ", line:" << D.second.Line << "\n";
         }
       }
     }
@@ -330,26 +368,33 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) {
   return 0;
 }
 
+static void doPreallocation(TrivialMemoryManager &MemMgr) {
+  // Allocate a slab of memory upfront, if required. This is used if
+  // we want to test small code models.
+  if (static_cast<intptr_t>(PreallocMemory) < 0)
+    report_fatal_error("Pre-allocated bytes of memory must be a positive integer.");
+
+  // FIXME: Limit the amount of memory that can be preallocated?
+  if (PreallocMemory != 0)
+    MemMgr.preallocateSlab(PreallocMemory);
+}
+
 static int executeInput() {
   // Load any dylibs requested on the command line.
   loadDylibs();
 
   // Instantiate a dynamic linker.
   TrivialMemoryManager MemMgr;
+  doPreallocation(MemMgr);
   RuntimeDyld Dyld(MemMgr, MemMgr);
 
-  // FIXME: Preserve buffers until resolveRelocations time to work around a bug
-  //        in RuntimeDyldELF.
-  // This fixme should be fixed ASAP. This is a very brittle workaround.
-  std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
-
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
     InputFileList.push_back("-");
-  for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+  for (auto &File : InputFileList) {
     // Load the input memory buffer.
     ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
-        MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+        MemoryBuffer::getFileOrSTDIN(File);
     if (std::error_code EC = InputBuffer.getError())
       return Error("unable to read input: '" + EC.message() + "'");
     ErrorOr<std::unique_ptr<ObjectFile>> MaybeObj(
@@ -359,7 +404,6 @@ static int executeInput() {
       return Error("unable to create object file: '" + EC.message() + "'");
 
     ObjectFile &Obj = **MaybeObj;
-    InputBuffers.push_back(std::move(*InputBuffer));
 
     // Load the object file
     Dyld.loadObject(Obj);
@@ -368,12 +412,9 @@ static int executeInput() {
     }
   }
 
-  // Resolve all the relocations we can.
-  Dyld.resolveRelocations();
-  // Clear instruction cache before code will be executed.
-  MemMgr.invalidateInstructionCache();
-
+  // Resove all the relocations we can.
   // FIXME: Error out if there are unresolved relocations.
+  Dyld.resolveRelocations();
 
   // Get the address of the entry point (_main by default).
   void *MainAddress = Dyld.getSymbolLocalAddress(EntryPoint);
@@ -381,12 +422,12 @@ static int executeInput() {
     return Error("no definition for '" + EntryPoint + "'");
 
   // Invalidate the instruction cache for each loaded function.
-  for (unsigned i = 0, e = MemMgr.FunctionMemory.size(); i != e; ++i) {
-    sys::MemoryBlock &Data = MemMgr.FunctionMemory[i];
+  for (auto &FM : MemMgr.FunctionMemory) {
+
     // Make sure the memory is executable.
+    // setExecutable will call InvalidateInstructionCache.
     std::string ErrorStr;
-    sys::Memory::InvalidateInstructionCache(Data.base(), Data.size());
-    if (!sys::Memory::setExecutable(Data, &ErrorStr))
+    if (!sys::Memory::setExecutable(FM, &ErrorStr))
       return Error("unable to mark function executable: '" + ErrorStr + "'");
   }
 
@@ -428,11 +469,9 @@ applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
     std::string SectionIDStr = Mapping.substr(0, EqualsIdx);
     size_t ComaIdx = Mapping.find_first_of(",");
 
-    if (ComaIdx == StringRef::npos) {
-      errs() << "Invalid section specification '" << Mapping
-             << "'. Should be '<file name>,<section name>=<addr>'\n";
-      exit(1);
-    }
+    if (ComaIdx == StringRef::npos)
+      report_fatal_error("Invalid section specification '" + Mapping +
+                         "'. Should be '<file name>,<section name>=<addr>'");
 
     std::string FileName = SectionIDStr.substr(0, ComaIdx);
     std::string SectionName = SectionIDStr.substr(ComaIdx + 1);
@@ -442,20 +481,17 @@ applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
     std::tie(OldAddrInt, ErrorMsg) =
       Checker.getSectionAddr(FileName, SectionName, true);
 
-    if (ErrorMsg != "") {
-      errs() << ErrorMsg;
-      exit(1);
-    }
+    if (ErrorMsg != "")
+      report_fatal_error(ErrorMsg);
 
     void* OldAddr = reinterpret_cast<void*>(static_cast<uintptr_t>(OldAddrInt));
 
     std::string NewAddrStr = Mapping.substr(EqualsIdx + 1);
     uint64_t NewAddr;
 
-    if (StringRef(NewAddrStr).getAsInteger(0, NewAddr)) {
-      errs() << "Invalid section address in mapping '" << Mapping << "'.\n";
-      exit(1);
-    }
+    if (StringRef(NewAddrStr).getAsInteger(0, NewAddr))
+      report_fatal_error("Invalid section address in mapping '" + Mapping +
+                         "'.");
 
     Checker.getRTDyld().mapSectionAddress(OldAddr, NewAddr);
     SpecificMappings[OldAddr] = NewAddr;
@@ -544,20 +580,16 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
   for (const auto &Mapping : DummySymbolMappings) {
     size_t EqualsIdx = Mapping.find_first_of("=");
 
-    if (EqualsIdx == StringRef::npos) {
-      errs() << "Invalid dummy symbol specification '" << Mapping
-             << "'. Should be '<symbol name>=<addr>'\n";
-      exit(1);
-    }
+    if (EqualsIdx == StringRef::npos)
+      report_fatal_error("Invalid dummy symbol specification '" + Mapping +
+                         "'. Should be '<symbol name>=<addr>'");
 
     std::string Symbol = Mapping.substr(0, EqualsIdx);
     std::string AddrStr = Mapping.substr(EqualsIdx + 1);
 
     uint64_t Addr;
-    if (StringRef(AddrStr).getAsInteger(0, Addr)) {
-      errs() << "Invalid symbol mapping '" << Mapping << "'.\n";
-      exit(1);
-    }
+    if (StringRef(AddrStr).getAsInteger(0, Addr))
+      report_fatal_error("Invalid symbol mapping '" + Mapping + "'.");
 
     MemMgr.addDummySymbol(Symbol, Addr);
   }
@@ -569,38 +601,38 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
 static int linkAndVerify() {
 
   // Check for missing triple.
-  if (TripleName == "") {
-    llvm::errs() << "Error: -triple required when running in -verify mode.\n";
-    return 1;
-  }
+  if (TripleName == "")
+    return Error("-triple required when running in -verify mode.");
 
   // Look up the target and build the disassembler.
   Triple TheTriple(Triple::normalize(TripleName));
   std::string ErrorStr;
   const Target *TheTarget =
     TargetRegistry::lookupTarget("", TheTriple, ErrorStr);
-  if (!TheTarget) {
-    llvm::errs() << "Error accessing target '" << TripleName << "': "
-                 << ErrorStr << "\n";
-    return 1;
-  }
+  if (!TheTarget)
+    return Error("Error accessing target '" + TripleName + "': " + ErrorStr);
+
   TripleName = TheTriple.getTriple();
 
   std::unique_ptr<MCSubtargetInfo> STI(
     TheTarget->createMCSubtargetInfo(TripleName, MCPU, ""));
-  assert(STI && "Unable to create subtarget info!");
+  if (!STI)
+    return Error("Unable to create subtarget info!");
 
   std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
-  assert(MRI && "Unable to create target register info!");
+  if (!MRI)
+    return Error("Unable to create target register info!");
 
   std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
-  assert(MAI && "Unable to create target asm info!");
+  if (!MAI)
+    return Error("Unable to create target asm info!");
 
   MCContext Ctx(MAI.get(), MRI.get(), nullptr);
 
   std::unique_ptr<MCDisassembler> Disassembler(
     TheTarget->createMCDisassembler(*STI, Ctx));
-  assert(Disassembler && "Unable to create disassembler!");
+  if (!Disassembler)
+    return Error("Unable to create disassembler!");
 
   std::unique_ptr<MCInstrInfo> MII(TheTarget->createMCInstrInfo());
 
@@ -612,23 +644,19 @@ static int linkAndVerify() {
 
   // Instantiate a dynamic linker.
   TrivialMemoryManager MemMgr;
+  doPreallocation(MemMgr);
   RuntimeDyld Dyld(MemMgr, MemMgr);
   Dyld.setProcessAllSections(true);
   RuntimeDyldChecker Checker(Dyld, Disassembler.get(), InstPrinter.get(),
                              llvm::dbgs());
 
-  // FIXME: Preserve buffers until resolveRelocations time to work around a bug
-  //        in RuntimeDyldELF.
-  // This fixme should be fixed ASAP. This is a very brittle workaround.
-  std::vector<std::unique_ptr<MemoryBuffer>> InputBuffers;
-
   // If we don't have any input files, read from stdin.
   if (!InputFileList.size())
     InputFileList.push_back("-");
-  for(unsigned i = 0, e = InputFileList.size(); i != e; ++i) {
+  for (auto &Filename : InputFileList) {
     // Load the input memory buffer.
     ErrorOr<std::unique_ptr<MemoryBuffer>> InputBuffer =
-        MemoryBuffer::getFileOrSTDIN(InputFileList[i]);
+        MemoryBuffer::getFileOrSTDIN(Filename);
 
     if (std::error_code EC = InputBuffer.getError())
       return Error("unable to read input: '" + EC.message() + "'");
@@ -640,7 +668,6 @@ static int linkAndVerify() {
       return Error("unable to create object file: '" + EC.message() + "'");
 
     ObjectFile &Obj = **MaybeObj;
-    InputBuffers.push_back(std::move(*InputBuffer));
 
     // Load the object file
     Dyld.loadObject(Obj);
@@ -660,11 +687,9 @@ static int linkAndVerify() {
   Dyld.registerEHFrames();
 
   int ErrorCode = checkAllExpressions(Checker);
-  if (Dyld.hasError()) {
-    errs() << "RTDyld reported an error applying relocations:\n  "
-           << Dyld.getErrorString() << "\n";
-    ErrorCode = 1;
-  }
+  if (Dyld.hasError())
+    return Error("RTDyld reported an error applying relocations:\n  " +
+                 Dyld.getErrorString());
 
   return ErrorCode;
 }
diff --git a/tools/llvm-shlib/CMakeLists.txt b/tools/llvm-shlib/CMakeLists.txt
index 54d71d3f6320..2356103a9cd5 100644
--- a/tools/llvm-shlib/CMakeLists.txt
+++ b/tools/llvm-shlib/CMakeLists.txt
@@ -2,42 +2,6 @@
 # library is enabled by setting LLVM_BUILD_LLVM_DYLIB=yes on the CMake
 # commandline. By default the shared library only exports the LLVM C API.
 
-
-# You can configure which libraries from LLVM you want to include in the shared
-# library by setting LLVM_DYLIB_COMPONENTS to a semi-colon delimited list of
-# LLVM components. All compoenent names handled by llvm-config are valid.
-
-if(NOT DEFINED LLVM_DYLIB_COMPONENTS)
-  set(LLVM_DYLIB_COMPONENTS
-    ${LLVM_TARGETS_TO_BUILD}
-    Analysis
-    BitReader
-    BitWriter
-    CodeGen
-    Core
-    DebugInfoDWARF
-    DebugInfoPDB
-    ExecutionEngine
-    IPA
-    IPO
-    IRReader
-    InstCombine
-    Instrumentation
-    Interpreter
-    Linker
-    MCDisassembler
-    MCJIT
-    ObjCARCOpts
-    Object
-    ScalarOpts
-    Support
-    Target
-    TransformUtils
-    Vectorize
-    native
-    )
-endif()
-
 add_definitions( -DLLVM_VERSION_INFO=\"${PACKAGE_VERSION}\" )
 
 set(SOURCES
@@ -46,56 +10,35 @@ set(SOURCES
 
 llvm_map_components_to_libnames(LIB_NAMES ${LLVM_DYLIB_COMPONENTS})
 
-if(NOT DEFINED LLVM_DYLIB_EXPORTED_SYMBOL_FILE)
-
-  if( WIN32 AND NOT CYGWIN )
-    message(FATAL_ERROR "Auto-generation not implemented for Win32 without GNU utils. Please specify LLVM_EXPORTED_SYMBOL_FILE.")
+if(LLVM_LINK_LLVM_DYLIB)
+  if(LLVM_DYLIB_EXPORTED_SYMBOL_FILE)
+    message(WARNING "Using LLVM_LINK_LLVM_DYLIB with LLVM_DYLIB_EXPORTED_SYMBOL_FILE may not work. Use at your own risk.")
   endif()
 
-  # To get the export list for a single llvm library:
-  # nm ${LIB_PATH} | awk "/T _LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LIB_PATH}.exports
+  # libLLVM.so should not have any dependencies on any other LLVM
+  # shared libraries. When using the "all" pseudo-component,
+  # LLVM_AVAILABLE_LIBS is added to the dependencies, which may
+  # contain shared libraries (e.g. libLTO).
+  #
+  # Also exclude libLLVMTableGen for the following reasons:
+  #  - it is only used by internal *-tblgen utilities;
+  #  - it pollutes the global options space.
+  foreach(lib ${LIB_NAMES})
+    get_target_property(t ${lib} TYPE)
+    if("${lib}" STREQUAL "LLVMTableGen")
+    elseif("x${t}" STREQUAL "xSTATIC_LIBRARY")
+      list(APPEND FILTERED_LIB_NAMES ${lib})
+    endif()
+  endforeach()
+  set(LIB_NAMES ${FILTERED_LIB_NAMES})
+endif()
 
-  set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_BINARY_DIR}/libllvm.exports)
-
-  if (NOT LLVM_DYLIB_EXPORT_ALL)
-    foreach (lib ${LIB_NAMES})
-      set(LIB_DIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
-      set(LIB_NAME ${LIB_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}${lib})
-      set(LIB_PATH ${LIB_NAME}${CMAKE_STATIC_LIBRARY_SUFFIX})
-      set(LIB_EXPORTS_PATH ${LIB_NAME}.exports)
-      list(APPEND LLVM_DYLIB_REQUIRED_EXPORTS ${LIB_EXPORTS_PATH})
-
-      
-      add_custom_command(OUTPUT ${LIB_EXPORTS_PATH}
-        COMMAND nm ${LIB_PATH} | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LIB_EXPORTS_PATH}
-        WORKING_DIRECTORY ${LIB_DIR}
-        DEPENDS ${lib}
-        COMMENT "Generating Export list for ${lib}..."
-        VERBATIM )
-    endforeach ()
-  endif()
-
-  if (LLVM_DYLIB_EXPORT_ALL)
-    add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
-      COMMAND echo \"LLVM*\" > ${LLVM_EXPORTED_SYMBOL_FILE} && echo \"_Z*llvm*\" >> ${LLVM_EXPORTED_SYMBOL_FILE}
-      WORKING_DIRECTORY ${LIB_DIR}
-      DEPENDS ${LLVM_DYLIB_REQUIRED_EXPORTS}
-      COMMENT "Generating combined export list...")
-  else()
-    add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
-      COMMAND cat ${LLVM_DYLIB_REQUIRED_EXPORTS} > ${LLVM_EXPORTED_SYMBOL_FILE}
-      WORKING_DIRECTORY ${LIB_DIR}
-      DEPENDS ${LLVM_DYLIB_REQUIRED_EXPORTS}
-      COMMENT "Generating combined export list...")
-  endif()
-
-  add_custom_target(libLLVMExports DEPENDS ${LLVM_EXPORTED_SYMBOL_FILE})
-else()
+if(LLVM_DYLIB_EXPORTED_SYMBOL_FILE)
   set(LLVM_EXPORTED_SYMBOL_FILE ${LLVM_DYLIB_EXPORTED_SYMBOL_FILE})
   add_custom_target(libLLVMExports DEPENDS ${LLVM_EXPORTED_SYMBOL_FILE})
 endif()
 
-add_llvm_library(LLVM SHARED ${SOURCES})
+add_llvm_library(LLVM SHARED DISABLE_LLVM_LINK_LLVM_DYLIB SONAME ${SOURCES})
 
 list(REMOVE_DUPLICATES LIB_NAMES)
 if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") # FIXME: It should be "GNU ld for elf"
@@ -107,11 +50,48 @@ endif()
 
 target_link_libraries(LLVM PRIVATE ${LIB_NAMES})
 
-add_dependencies(LLVM libLLVMExports)
-
 if (APPLE)
   set_property(TARGET LLVM APPEND_STRING PROPERTY
               LINK_FLAGS
-              " -compatibility_version ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} -current_version ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}")
+              " -compatibility_version 1 -current_version ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
+endif()
+
+if(TARGET libLLVMExports)
+  add_dependencies(LLVM libLLVMExports)
+endif()
+
+if(LLVM_BUILD_LLVM_C_DYLIB)
+  # To get the export list for a single llvm library:
+  # nm ${LIB_PATH} | awk "/T _LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LIB_PATH}.exports
+
+  if(NOT APPLE)
+    message(FATAL_ERROR "Generating libLLVM-c is only supported on Darwin")
+  endif()
+
+  set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_BINARY_DIR}/libllvm-c.exports)
+
+  set(LIB_DIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/lib${LLVM_LIBDIR_SUFFIX})
+  set(LIB_NAME ${LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}LLVM)
+  set(LIB_PATH ${LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX})
+  set(LIB_EXPORTS_PATH ${LIB_NAME}.exports)
+  list(APPEND LLVM_DYLIB_REQUIRED_EXPORTS ${LIB_EXPORTS_PATH})
+
+  add_custom_command(OUTPUT ${LLVM_EXPORTED_SYMBOL_FILE}
+    COMMAND nm ${LIB_PATH} | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" > ${LLVM_EXPORTED_SYMBOL_FILE}
+    WORKING_DIRECTORY ${LIB_DIR}
+    DEPENDS LLVM
+    COMMENT "Generating Export list for LLVM..."
+    VERBATIM )
+
+  add_custom_target(libLLVMCExports DEPENDS ${LLVM_EXPORTED_SYMBOL_FILE})
+
+  add_llvm_library(LLVM-C SHARED ${SOURCES})
+  
+  target_link_libraries(LLVM-C PUBLIC LLVM)
+  add_dependencies(LLVM-C libLLVMCExports)
+  
+  set_property(TARGET LLVM-C APPEND_STRING PROPERTY
+              LINK_FLAGS
+              " -compatibility_version 1 -current_version ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH} -Wl,-reexport_library ${LIB_PATH}")
 endif()
 
diff --git a/tools/llvm-shlib/Makefile b/tools/llvm-shlib/Makefile
index 2bc81dac5f00..19077a3858a6 100644
--- a/tools/llvm-shlib/Makefile
+++ b/tools/llvm-shlib/Makefile
@@ -61,7 +61,7 @@ endif
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), DragonFly Linux FreeBSD GNU/kFreeBSD GNU))
     # Add soname to the library.
-    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME).1$(SHLIBEXT)
+    LLVMLibsOptions += -Wl,--soname,lib$(LIBRARYNAME)$(SHLIBEXT)
 endif
 
 ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux GNU GNU/kFreeBSD))
diff --git a/tools/llvm-size/llvm-size.cpp b/tools/llvm-size/llvm-size.cpp
index 9a6e2c1ae4be..069cc621f615 100644
--- a/tools/llvm-size/llvm-size.cpp
+++ b/tools/llvm-size/llvm-size.cpp
@@ -1,4 +1,4 @@
-//===-- llvm-size.cpp - Print the size of each object section -------------===//
+//===-- llvm-size.cpp - Print the size of each object section ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -30,6 +30,7 @@
 #include <algorithm>
 #include <string>
 #include <system_error>
+
 using namespace llvm;
 using namespace object;
 
@@ -98,7 +99,7 @@ static size_t getNumLengthAsString(uint64_t num) {
 }
 
 /// @brief Return the printing format for the Radix.
-static const char *getRadixFmt(void) {
+static const char *getRadixFmt() {
   switch (Radix) {
   case octal:
     return PRIo64;
@@ -413,14 +414,6 @@ static bool checkMachOAndArchFlags(ObjectFile *o, StringRef file) {
 /// @brief Print the section sizes for @p file. If @p file is an archive, print
 ///        the section sizes for each archive member.
 static void PrintFileSectionSizes(StringRef file) {
-  // If file is not stdin, check that it exists.
-  if (file != "-") {
-    if (!sys::fs::exists(file)) {
-      errs() << ToolName << ": '" << file << "': "
-             << "No such file\n";
-      return;
-    }
-  }
 
   // Attempt to open the binary.
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(file);
@@ -435,7 +428,13 @@ static void PrintFileSectionSizes(StringRef file) {
     for (object::Archive::child_iterator i = a->child_begin(),
                                          e = a->child_end();
          i != e; ++i) {
-      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+      if (i->getError()) {
+        errs() << ToolName << ": " << file << ": " << i->getError().message()
+               << ".\n";
+        exit(1);
+      }
+      auto &c = i->get();
+      ErrorOr<std::unique_ptr<Binary>> ChildOrErr = c.getAsBinary();
       if (std::error_code EC = ChildOrErr.getError()) {
         errs() << ToolName << ": " << file << ": " << EC.message() << ".\n";
         continue;
@@ -497,7 +496,13 @@ static void PrintFileSectionSizes(StringRef file) {
               for (object::Archive::child_iterator i = UA->child_begin(),
                                                    e = UA->child_end();
                    i != e; ++i) {
-                ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+                if (std::error_code EC = i->getError()) {
+                  errs() << ToolName << ": " << file << ": " << EC.message()
+                         << ".\n";
+                  exit(1);
+                }
+                auto &c = i->get();
+                ErrorOr<std::unique_ptr<Binary>> ChildOrErr = c.getAsBinary();
                 if (std::error_code EC = ChildOrErr.getError()) {
                   errs() << ToolName << ": " << file << ": " << EC.message()
                          << ".\n";
@@ -574,7 +579,13 @@ static void PrintFileSectionSizes(StringRef file) {
             for (object::Archive::child_iterator i = UA->child_begin(),
                                                  e = UA->child_end();
                  i != e; ++i) {
-              ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+              if (std::error_code EC = i->getError()) {
+                errs() << ToolName << ": " << file << ": " << EC.message()
+                       << ".\n";
+                exit(1);
+              }
+              auto &c = i->get();
+              ErrorOr<std::unique_ptr<Binary>> ChildOrErr = c.getAsBinary();
               if (std::error_code EC = ChildOrErr.getError()) {
                 errs() << ToolName << ": " << file << ": " << EC.message()
                        << ".\n";
@@ -638,7 +649,12 @@ static void PrintFileSectionSizes(StringRef file) {
         for (object::Archive::child_iterator i = UA->child_begin(),
                                              e = UA->child_end();
              i != e; ++i) {
-          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = i->getAsBinary();
+          if (std::error_code EC = i->getError()) {
+            errs() << ToolName << ": " << file << ": " << EC.message() << ".\n";
+            exit(1);
+          }
+          auto &c = i->get();
+          ErrorOr<std::unique_ptr<Binary>> ChildOrErr = c.getAsBinary();
           if (std::error_code EC = ChildOrErr.getError()) {
             errs() << ToolName << ": " << file << ": " << EC.message() << ".\n";
             continue;
diff --git a/tools/llvm-split/CMakeLists.txt b/tools/llvm-split/CMakeLists.txt
new file mode 100644
index 000000000000..6e22a7236714
--- /dev/null
+++ b/tools/llvm-split/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  TransformUtils
+  BitWriter
+  Core
+  IRReader
+  Support
+  )
+
+add_llvm_tool(llvm-split
+  llvm-split.cpp
+  )
diff --git a/tools/macho-dump/LLVMBuild.txt b/tools/llvm-split/LLVMBuild.txt
similarity index 79%
rename from tools/macho-dump/LLVMBuild.txt
rename to tools/llvm-split/LLVMBuild.txt
index 1ad9b84261c9..780c76afa41c 100644
--- a/tools/macho-dump/LLVMBuild.txt
+++ b/tools/llvm-split/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./tools/macho-dump/LLVMBuild.txt -------------------------*- Conf -*--===;
+;===- ./tools/llvm-split/LLVMBuild.txt -------------------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,6 +17,6 @@
 
 [component_0]
 type = Tool
-name = macho-dump
+name = llvm-split
 parent = Tools
-required_libraries = Object Support
+required_libraries = TransformUtils BitWriter Core IRReader Support
diff --git a/tools/macho-dump/Makefile b/tools/llvm-split/Makefile
similarity index 71%
rename from tools/macho-dump/Makefile
rename to tools/llvm-split/Makefile
index 0843e982e1af..ef1243dbf1ff 100644
--- a/tools/macho-dump/Makefile
+++ b/tools/llvm-split/Makefile
@@ -1,15 +1,15 @@
-##===- tools/macho-dump/Makefile ---------------------------*- Makefile -*-===##
-#
+##===- tools/llvm-split/Makefile ---------------------------*- Makefile -*-===##
+# 
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-#
+# 
 ##===----------------------------------------------------------------------===##
 
 LEVEL := ../..
-TOOLNAME := macho-dump
-LINK_COMPONENTS := support object
+TOOLNAME := llvm-split
+LINK_COMPONENTS := transformutils bitwriter core irreader support
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-split/llvm-split.cpp b/tools/llvm-split/llvm-split.cpp
new file mode 100644
index 000000000000..059770fbf4ac
--- /dev/null
+++ b/tools/llvm-split/llvm-split.cpp
@@ -0,0 +1,67 @@
+//===-- llvm-split: command line tool for testing module splitter ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program can be used to test the llvm::SplitModule function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
+    cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<unsigned> NumOutputs("j", cl::Prefix, cl::init(2),
+                                    cl::desc("Number of output files"));
+
+int main(int argc, char **argv) {
+  LLVMContext &Context = getGlobalContext();
+  SMDiagnostic Err;
+  cl::ParseCommandLineOptions(argc, argv, "LLVM module splitter\n");
+
+  std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
+
+  if (!M) {
+    Err.print(argv[0], errs());
+    return 1;
+  }
+
+  unsigned I = 0;
+  SplitModule(std::move(M), NumOutputs, [&](std::unique_ptr<Module> MPart) {
+    std::error_code EC;
+    std::unique_ptr<tool_output_file> Out(new tool_output_file(
+        OutputFilename + utostr(I++), EC, sys::fs::F_None));
+    if (EC) {
+      errs() << EC.message() << '\n';
+      exit(1);
+    }
+
+    WriteBitcodeToFile(MPart.get(), Out->os());
+
+    // Declare success.
+    Out->keep();
+  });
+
+  return 0;
+}
diff --git a/tools/llvm-stress/CMakeLists.txt b/tools/llvm-stress/CMakeLists.txt
index d5c10e13f5b3..8f2b30e4163b 100644
--- a/tools/llvm-stress/CMakeLists.txt
+++ b/tools/llvm-stress/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_LINK_COMPONENTS
+  Analysis
   Core
-  IPA
   Support
   )
 
diff --git a/tools/llvm-stress/llvm-stress.cpp b/tools/llvm-stress/llvm-stress.cpp
index 6a1a248a0572..99d2afdcd301 100644
--- a/tools/llvm-stress/llvm-stress.cpp
+++ b/tools/llvm-stress/llvm-stress.cpp
@@ -612,7 +612,8 @@ struct CmpModifier: public Modifier {
     }
 
     Value *V = CmpInst::Create(fp ? Instruction::FCmp : Instruction::ICmp,
-                               op, Val0, Val1, "Cmp", BB->getTerminator());
+                               (CmpInst::Predicate)op, Val0, Val1, "Cmp",
+                               BB->getTerminator());
     return PT->push_back(V);
   }
 };
@@ -666,7 +667,7 @@ static void IntroduceControlFlow(Function *F, Random &R) {
 
   for (auto *Instr : BoolInst) {
     BasicBlock *Curr = Instr->getParent();
-    BasicBlock::iterator Loc = Instr;
+    BasicBlock::iterator Loc = Instr->getIterator();
     BasicBlock *Next = Curr->splitBasicBlock(Loc, "CF");
     Instr->moveBefore(Curr->getTerminator());
     if (Curr != &F->getEntryBlock()) {
diff --git a/tools/llvm-symbolizer/CMakeLists.txt b/tools/llvm-symbolizer/CMakeLists.txt
index 5df3b17a065e..b04c45ff7442 100644
--- a/tools/llvm-symbolizer/CMakeLists.txt
+++ b/tools/llvm-symbolizer/CMakeLists.txt
@@ -8,9 +8,9 @@ set(LLVM_LINK_COMPONENTS
   DebugInfoPDB
   Object
   Support
+  Symbolize
   )
 
 add_llvm_tool(llvm-symbolizer
-  LLVMSymbolize.cpp
   llvm-symbolizer.cpp
   )
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.cpp b/tools/llvm-symbolizer/LLVMSymbolize.cpp
deleted file mode 100644
index c57c219b11d2..000000000000
--- a/tools/llvm-symbolizer/LLVMSymbolize.cpp
+++ /dev/null
@@ -1,532 +0,0 @@
-//===-- LLVMSymbolize.cpp -------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation for LLVM symbolization library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "LLVMSymbolize.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Config/config.h"
-#include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/PDB/PDB.h"
-#include "llvm/DebugInfo/PDB/PDBContext.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/SymbolSize.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/Compression.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
-#include <sstream>
-#include <stdlib.h>
-
-#if defined(_MSC_VER)
-#include <Windows.h>
-#include <DbgHelp.h>
-#pragma comment(lib, "dbghelp.lib")
-#endif
-
-namespace llvm {
-namespace symbolize {
-
-static bool error(std::error_code ec) {
-  if (!ec)
-    return false;
-  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
-  return true;
-}
-
-static DILineInfoSpecifier
-getDILineInfoSpecifier(const LLVMSymbolizer::Options &Opts) {
-  return DILineInfoSpecifier(
-      DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
-      Opts.PrintFunctions);
-}
-
-ModuleInfo::ModuleInfo(ObjectFile *Obj, DIContext *DICtx)
-    : Module(Obj), DebugInfoContext(DICtx) {
-  std::unique_ptr<DataExtractor> OpdExtractor;
-  uint64_t OpdAddress = 0;
-  // Find the .opd (function descriptor) section if any, for big-endian
-  // PowerPC64 ELF.
-  if (Module->getArch() == Triple::ppc64) {
-    for (section_iterator Section : Module->sections()) {
-      StringRef Name;
-      if (!error(Section->getName(Name)) && Name == ".opd") {
-        StringRef Data;
-        if (!error(Section->getContents(Data))) {
-          OpdExtractor.reset(new DataExtractor(Data, Module->isLittleEndian(),
-                                               Module->getBytesInAddress()));
-          OpdAddress = Section->getAddress();
-        }
-        break;
-      }
-    }
-  }
-  std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
-      computeSymbolSizes(*Module);
-  for (auto &P : Symbols)
-    addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
-}
-
-void ModuleInfo::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
-                           DataExtractor *OpdExtractor, uint64_t OpdAddress) {
-  SymbolRef::Type SymbolType = Symbol.getType();
-  if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
-    return;
-  ErrorOr<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
-  if (error(SymbolAddressOrErr.getError()))
-    return;
-  uint64_t SymbolAddress = *SymbolAddressOrErr;
-  if (OpdExtractor) {
-    // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
-    // function descriptors. The first word of the descriptor is a pointer to
-    // the function's code.
-    // For the purposes of symbolization, pretend the symbol's address is that
-    // of the function's code, not the descriptor.
-    uint64_t OpdOffset = SymbolAddress - OpdAddress;
-    uint32_t OpdOffset32 = OpdOffset;
-    if (OpdOffset == OpdOffset32 && 
-        OpdExtractor->isValidOffsetForAddress(OpdOffset32))
-      SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
-  }
-  ErrorOr<StringRef> SymbolNameOrErr = Symbol.getName();
-  if (error(SymbolNameOrErr.getError()))
-    return;
-  StringRef SymbolName = *SymbolNameOrErr;
-  // Mach-O symbol table names have leading underscore, skip it.
-  if (Module->isMachO() && SymbolName.size() > 0 && SymbolName[0] == '_')
-    SymbolName = SymbolName.drop_front();
-  // FIXME: If a function has alias, there are two entries in symbol table
-  // with same address size. Make sure we choose the correct one.
-  auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
-  SymbolDesc SD = { SymbolAddress, SymbolSize };
-  M.insert(std::make_pair(SD, SymbolName));
-}
-
-bool ModuleInfo::getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
-                                        std::string &Name, uint64_t &Addr,
-                                        uint64_t &Size) const {
-  const auto &SymbolMap = Type == SymbolRef::ST_Function ? Functions : Objects;
-  if (SymbolMap.empty())
-    return false;
-  SymbolDesc SD = { Address, Address };
-  auto SymbolIterator = SymbolMap.upper_bound(SD);
-  if (SymbolIterator == SymbolMap.begin())
-    return false;
-  --SymbolIterator;
-  if (SymbolIterator->first.Size != 0 &&
-      SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
-    return false;
-  Name = SymbolIterator->second.str();
-  Addr = SymbolIterator->first.Addr;
-  Size = SymbolIterator->first.Size;
-  return true;
-}
-
-DILineInfo ModuleInfo::symbolizeCode(
-    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
-  DILineInfo LineInfo;
-  if (DebugInfoContext) {
-    LineInfo = DebugInfoContext->getLineInfoForAddress(
-        ModuleOffset, getDILineInfoSpecifier(Opts));
-  }
-  // Override function name from symbol table if necessary.
-  if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
-    std::string FunctionName;
-    uint64_t Start, Size;
-    if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
-                               FunctionName, Start, Size)) {
-      LineInfo.FunctionName = FunctionName;
-    }
-  }
-  return LineInfo;
-}
-
-DIInliningInfo ModuleInfo::symbolizeInlinedCode(
-    uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const {
-  DIInliningInfo InlinedContext;
-
-  if (DebugInfoContext) {
-    InlinedContext = DebugInfoContext->getInliningInfoForAddress(
-        ModuleOffset, getDILineInfoSpecifier(Opts));
-  }
-  // Make sure there is at least one frame in context.
-  if (InlinedContext.getNumberOfFrames() == 0) {
-    InlinedContext.addFrame(DILineInfo());
-  }
-  // Override the function name in lower frame with name from symbol table.
-  if (Opts.PrintFunctions != FunctionNameKind::None && Opts.UseSymbolTable) {
-    DIInliningInfo PatchedInlinedContext;
-    for (uint32_t i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
-      DILineInfo LineInfo = InlinedContext.getFrame(i);
-      if (i == n - 1) {
-        std::string FunctionName;
-        uint64_t Start, Size;
-        if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset,
-                                   FunctionName, Start, Size)) {
-          LineInfo.FunctionName = FunctionName;
-        }
-      }
-      PatchedInlinedContext.addFrame(LineInfo);
-    }
-    InlinedContext = PatchedInlinedContext;
-  }
-  return InlinedContext;
-}
-
-bool ModuleInfo::symbolizeData(uint64_t ModuleOffset, std::string &Name,
-                               uint64_t &Start, uint64_t &Size) const {
-  return getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset, Name, Start,
-                                Size);
-}
-
-const char LLVMSymbolizer::kBadString[] = "??";
-
-std::string LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
-                                          uint64_t ModuleOffset) {
-  ModuleInfo *Info = getOrCreateModuleInfo(ModuleName);
-  if (!Info)
-    return printDILineInfo(DILineInfo());
-  if (Opts.PrintInlining) {
-    DIInliningInfo InlinedContext =
-        Info->symbolizeInlinedCode(ModuleOffset, Opts);
-    uint32_t FramesNum = InlinedContext.getNumberOfFrames();
-    assert(FramesNum > 0);
-    std::string Result;
-    for (uint32_t i = 0; i < FramesNum; i++) {
-      DILineInfo LineInfo = InlinedContext.getFrame(i);
-      Result += printDILineInfo(LineInfo);
-    }
-    return Result;
-  }
-  DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts);
-  return printDILineInfo(LineInfo);
-}
-
-std::string LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
-                                          uint64_t ModuleOffset) {
-  std::string Name = kBadString;
-  uint64_t Start = 0;
-  uint64_t Size = 0;
-  if (Opts.UseSymbolTable) {
-    if (ModuleInfo *Info = getOrCreateModuleInfo(ModuleName)) {
-      if (Info->symbolizeData(ModuleOffset, Name, Start, Size) && Opts.Demangle)
-        Name = DemangleName(Name);
-    }
-  }
-  std::stringstream ss;
-  ss << Name << "\n" << Start << " " << Size << "\n";
-  return ss.str();
-}
-
-void LLVMSymbolizer::flush() {
-  DeleteContainerSeconds(Modules);
-  ObjectPairForPathArch.clear();
-  ObjectFileForArch.clear();
-}
-
-// For Path="/path/to/foo" and Basename="foo" assume that debug info is in
-// /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
-// For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
-// /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
-static
-std::string getDarwinDWARFResourceForPath(
-    const std::string &Path, const std::string &Basename) {
-  SmallString<16> ResourceName = StringRef(Path);
-  if (sys::path::extension(Path) != ".dSYM") {
-    ResourceName += ".dSYM";
-  }
-  sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
-  sys::path::append(ResourceName, Basename);
-  return ResourceName.str();
-}
-
-static bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
-  ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
-      MemoryBuffer::getFileOrSTDIN(Path);
-  if (!MB)
-    return false;
-  return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
-}
-
-static bool findDebugBinary(const std::string &OrigPath,
-                            const std::string &DebuglinkName, uint32_t CRCHash,
-                            std::string &Result) {
-  std::string OrigRealPath = OrigPath;
-#if defined(HAVE_REALPATH)
-  if (char *RP = realpath(OrigPath.c_str(), nullptr)) {
-    OrigRealPath = RP;
-    free(RP);
-  }
-#endif
-  SmallString<16> OrigDir(OrigRealPath);
-  llvm::sys::path::remove_filename(OrigDir);
-  SmallString<16> DebugPath = OrigDir;
-  // Try /path/to/original_binary/debuglink_name
-  llvm::sys::path::append(DebugPath, DebuglinkName);
-  if (checkFileCRC(DebugPath, CRCHash)) {
-    Result = DebugPath.str();
-    return true;
-  }
-  // Try /path/to/original_binary/.debug/debuglink_name
-  DebugPath = OrigRealPath;
-  llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
-  if (checkFileCRC(DebugPath, CRCHash)) {
-    Result = DebugPath.str();
-    return true;
-  }
-  // Try /usr/lib/debug/path/to/original_binary/debuglink_name
-  DebugPath = "/usr/lib/debug";
-  llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
-                          DebuglinkName);
-  if (checkFileCRC(DebugPath, CRCHash)) {
-    Result = DebugPath.str();
-    return true;
-  }
-  return false;
-}
-
-static bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
-                                    uint32_t &CRCHash) {
-  if (!Obj)
-    return false;
-  for (const SectionRef &Section : Obj->sections()) {
-    StringRef Name;
-    Section.getName(Name);
-    Name = Name.substr(Name.find_first_not_of("._"));
-    if (Name == "gnu_debuglink") {
-      StringRef Data;
-      Section.getContents(Data);
-      DataExtractor DE(Data, Obj->isLittleEndian(), 0);
-      uint32_t Offset = 0;
-      if (const char *DebugNameStr = DE.getCStr(&Offset)) {
-        // 4-byte align the offset.
-        Offset = (Offset + 3) & ~0x3;
-        if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
-          DebugName = DebugNameStr;
-          CRCHash = DE.getU32(&Offset);
-          return true;
-        }
-      }
-      break;
-    }
-  }
-  return false;
-}
-
-static
-bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
-                             const MachOObjectFile *Obj) {
-  ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
-  ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
-  if (dbg_uuid.empty() || bin_uuid.empty())
-    return false;
-  return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
-}
-
-ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
-    const MachOObjectFile *MachExeObj, const std::string &ArchName) {
-  // On Darwin we may find DWARF in separate object file in
-  // resource directory.
-  std::vector<std::string> DsymPaths;
-  StringRef Filename = sys::path::filename(ExePath);
-  DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
-  for (const auto &Path : Opts.DsymHints) {
-    DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
-  }
-  for (const auto &path : DsymPaths) {
-    ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(path);
-    std::error_code EC = BinaryOrErr.getError();
-    if (EC != errc::no_such_file_or_directory && !error(EC)) {
-      OwningBinary<Binary> B = std::move(BinaryOrErr.get());
-      ObjectFile *DbgObj =
-          getObjectFileFromBinary(B.getBinary(), ArchName);
-      const MachOObjectFile *MachDbgObj =
-          dyn_cast<const MachOObjectFile>(DbgObj);
-      if (!MachDbgObj) continue;
-      if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj)) {
-        addOwningBinary(std::move(B));
-        return DbgObj; 
-      }
-    }
-  }
-  return nullptr;
-}
-
-LLVMSymbolizer::ObjectPair
-LLVMSymbolizer::getOrCreateObjects(const std::string &Path,
-                                   const std::string &ArchName) {
-  const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
-  if (I != ObjectPairForPathArch.end())
-    return I->second;
-  ObjectFile *Obj = nullptr;
-  ObjectFile *DbgObj = nullptr;
-  ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
-  if (!error(BinaryOrErr.getError())) {
-    OwningBinary<Binary> &B = BinaryOrErr.get();
-    Obj = getObjectFileFromBinary(B.getBinary(), ArchName);
-    if (!Obj) {
-      ObjectPair Res = std::make_pair(nullptr, nullptr);
-      ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
-      return Res;
-    }
-    addOwningBinary(std::move(B));
-    if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
-      DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
-    // Try to locate the debug binary using .gnu_debuglink section.
-    if (!DbgObj) {
-      std::string DebuglinkName;
-      uint32_t CRCHash;
-      std::string DebugBinaryPath;
-      if (getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash) &&
-          findDebugBinary(Path, DebuglinkName, CRCHash, DebugBinaryPath)) {
-        BinaryOrErr = createBinary(DebugBinaryPath);
-        if (!error(BinaryOrErr.getError())) {
-          OwningBinary<Binary> B = std::move(BinaryOrErr.get());
-          DbgObj = getObjectFileFromBinary(B.getBinary(), ArchName);
-          addOwningBinary(std::move(B));
-        }
-      }
-    }
-  }
-  if (!DbgObj)
-    DbgObj = Obj;
-  ObjectPair Res = std::make_pair(Obj, DbgObj);
-  ObjectPairForPathArch[std::make_pair(Path, ArchName)] = Res;
-  return Res;
-}
-
-ObjectFile *
-LLVMSymbolizer::getObjectFileFromBinary(Binary *Bin,
-                                        const std::string &ArchName) {
-  if (!Bin)
-    return nullptr;
-  ObjectFile *Res = nullptr;
-  if (MachOUniversalBinary *UB = dyn_cast<MachOUniversalBinary>(Bin)) {
-    const auto &I = ObjectFileForArch.find(
-        std::make_pair(UB, ArchName));
-    if (I != ObjectFileForArch.end())
-      return I->second;
-    ErrorOr<std::unique_ptr<ObjectFile>> ParsedObj =
-        UB->getObjectForArch(ArchName);
-    if (ParsedObj) {
-      Res = ParsedObj.get().get();
-      ParsedBinariesAndObjects.push_back(std::move(ParsedObj.get()));
-    }
-    ObjectFileForArch[std::make_pair(UB, ArchName)] = Res;
-  } else if (Bin->isObject()) {
-    Res = cast<ObjectFile>(Bin);
-  }
-  return Res;
-}
-
-ModuleInfo *
-LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
-  const auto &I = Modules.find(ModuleName);
-  if (I != Modules.end())
-    return I->second;
-  std::string BinaryName = ModuleName;
-  std::string ArchName = Opts.DefaultArch;
-  size_t ColonPos = ModuleName.find_last_of(':');
-  // Verify that substring after colon form a valid arch name.
-  if (ColonPos != std::string::npos) {
-    std::string ArchStr = ModuleName.substr(ColonPos + 1);
-    if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
-      BinaryName = ModuleName.substr(0, ColonPos);
-      ArchName = ArchStr;
-    }
-  }
-  ObjectPair Objects = getOrCreateObjects(BinaryName, ArchName);
-
-  if (!Objects.first) {
-    // Failed to find valid object file.
-    Modules.insert(make_pair(ModuleName, (ModuleInfo *)nullptr));
-    return nullptr;
-  }
-  DIContext *Context = nullptr;
-  if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
-    // If this is a COFF object, assume it contains PDB debug information.  If
-    // we don't find any we will fall back to the DWARF case.
-    std::unique_ptr<IPDBSession> Session;
-    PDB_ErrorCode Error = loadDataForEXE(PDB_ReaderType::DIA,
-                                         Objects.first->getFileName(), Session);
-    if (Error == PDB_ErrorCode::Success) {
-      Context = new PDBContext(*CoffObject, std::move(Session),
-                               Opts.RelativeAddresses);
-    }
-  }
-  if (!Context)
-    Context = new DWARFContextInMemory(*Objects.second);
-  assert(Context);
-  ModuleInfo *Info = new ModuleInfo(Objects.first, Context);
-  Modules.insert(make_pair(ModuleName, Info));
-  return Info;
-}
-
-std::string LLVMSymbolizer::printDILineInfo(DILineInfo LineInfo) const {
-  // By default, DILineInfo contains "<invalid>" for function/filename it
-  // cannot fetch. We replace it to "??" to make our output closer to addr2line.
-  static const std::string kDILineInfoBadString = "<invalid>";
-  std::stringstream Result;
-  if (Opts.PrintFunctions != FunctionNameKind::None) {
-    std::string FunctionName = LineInfo.FunctionName;
-    if (FunctionName == kDILineInfoBadString)
-      FunctionName = kBadString;
-    else if (Opts.Demangle)
-      FunctionName = DemangleName(FunctionName);
-    Result << FunctionName << "\n";
-  }
-  std::string Filename = LineInfo.FileName;
-  if (Filename == kDILineInfoBadString)
-    Filename = kBadString;
-  Result << Filename << ":" << LineInfo.Line << ":" << LineInfo.Column << "\n";
-  return Result.str();
-}
-
-#if !defined(_MSC_VER)
-// Assume that __cxa_demangle is provided by libcxxabi (except for Windows).
-extern "C" char *__cxa_demangle(const char *mangled_name, char *output_buffer,
-                                size_t *length, int *status);
-#endif
-
-std::string LLVMSymbolizer::DemangleName(const std::string &Name) {
-#if !defined(_MSC_VER)
-  // We can spoil names of symbols with C linkage, so use an heuristic
-  // approach to check if the name should be demangled.
-  if (Name.substr(0, 2) != "_Z")
-    return Name;
-  int status = 0;
-  char *DemangledName = __cxa_demangle(Name.c_str(), nullptr, nullptr, &status);
-  if (status != 0)
-    return Name;
-  std::string Result = DemangledName;
-  free(DemangledName);
-  return Result;
-#else
-  char DemangledName[1024] = {0};
-  DWORD result = ::UnDecorateSymbolName(
-      Name.c_str(), DemangledName, 1023,
-      UNDNAME_NO_ACCESS_SPECIFIERS |       // Strip public, private, protected
-          UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
-          UNDNAME_NO_THROW_SIGNATURES |    // Strip throw() specifications
-          UNDNAME_NO_MEMBER_TYPE |      // Strip virtual, static, etc specifiers
-          UNDNAME_NO_MS_KEYWORDS |      // Strip all MS extension keywords
-          UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
-
-  return (result == 0) ? Name : std::string(DemangledName);
-#endif
-}
-
-} // namespace symbolize
-} // namespace llvm
diff --git a/tools/llvm-symbolizer/LLVMSymbolize.h b/tools/llvm-symbolizer/LLVMSymbolize.h
deleted file mode 100644
index be246c3f8712..000000000000
--- a/tools/llvm-symbolizer/LLVMSymbolize.h
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- LLVMSymbolize.h ----------------------------------------- C++ -----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Header for LLVM symbolization library.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H
-#define LLVM_TOOLS_LLVM_SYMBOLIZER_LLVMSYMBOLIZE_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/DIContext.h"
-#include "llvm/Object/MachOUniversal.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include <map>
-#include <memory>
-#include <string>
-
-namespace llvm {
-
-typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
-using namespace object;
-
-namespace symbolize {
-
-class ModuleInfo;
-
-class LLVMSymbolizer {
-public:
-  struct Options {
-    FunctionNameKind PrintFunctions;
-    bool UseSymbolTable : 1;
-    bool PrintInlining : 1;
-    bool Demangle : 1;
-    bool RelativeAddresses : 1;
-    std::string DefaultArch;
-    std::vector<std::string> DsymHints;
-    Options(FunctionNameKind PrintFunctions = FunctionNameKind::LinkageName,
-            bool UseSymbolTable = true, bool PrintInlining = true,
-            bool Demangle = true, bool RelativeAddresses = false,
-            std::string DefaultArch = "")
-        : PrintFunctions(PrintFunctions), UseSymbolTable(UseSymbolTable),
-          PrintInlining(PrintInlining), Demangle(Demangle),
-          RelativeAddresses(RelativeAddresses), DefaultArch(DefaultArch) {}
-  };
-
-  LLVMSymbolizer(const Options &Opts = Options()) : Opts(Opts) {}
-  ~LLVMSymbolizer() {
-    flush();
-  }
-
-  // Returns the result of symbolization for module name/offset as
-  // a string (possibly containing newlines).
-  std::string
-  symbolizeCode(const std::string &ModuleName, uint64_t ModuleOffset);
-  std::string
-  symbolizeData(const std::string &ModuleName, uint64_t ModuleOffset);
-  void flush();
-  static std::string DemangleName(const std::string &Name);
-private:
-  typedef std::pair<ObjectFile*, ObjectFile*> ObjectPair;
-
-  ModuleInfo *getOrCreateModuleInfo(const std::string &ModuleName);
-  ObjectFile *lookUpDsymFile(const std::string &Path, const MachOObjectFile *ExeObj,
-                             const std::string &ArchName);
-
-  /// \brief Returns pair of pointers to object and debug object.
-  ObjectPair getOrCreateObjects(const std::string &Path,
-                                const std::string &ArchName);
-  /// \brief Returns a parsed object file for a given architecture in a
-  /// universal binary (or the binary itself if it is an object file).
-  ObjectFile *getObjectFileFromBinary(Binary *Bin, const std::string &ArchName);
-
-  std::string printDILineInfo(DILineInfo LineInfo) const;
-
-  // Owns all the parsed binaries and object files.
-  SmallVector<std::unique_ptr<Binary>, 4> ParsedBinariesAndObjects;
-  SmallVector<std::unique_ptr<MemoryBuffer>, 4> MemoryBuffers;
-  void addOwningBinary(OwningBinary<Binary> OwningBin) {
-    std::unique_ptr<Binary> Bin;
-    std::unique_ptr<MemoryBuffer> MemBuf;
-    std::tie(Bin, MemBuf) = OwningBin.takeBinary();
-    ParsedBinariesAndObjects.push_back(std::move(Bin));
-    MemoryBuffers.push_back(std::move(MemBuf));
-  }
-
-  // Owns module info objects.
-  std::map<std::string, ModuleInfo *> Modules;
-  std::map<std::pair<MachOUniversalBinary *, std::string>, ObjectFile *>
-      ObjectFileForArch;
-  std::map<std::pair<std::string, std::string>, ObjectPair>
-      ObjectPairForPathArch;
-
-  Options Opts;
-  static const char kBadString[];
-};
-
-class ModuleInfo {
-public:
-  ModuleInfo(ObjectFile *Obj, DIContext *DICtx);
-
-  DILineInfo symbolizeCode(uint64_t ModuleOffset,
-                           const LLVMSymbolizer::Options &Opts) const;
-  DIInliningInfo symbolizeInlinedCode(
-      uint64_t ModuleOffset, const LLVMSymbolizer::Options &Opts) const;
-  bool symbolizeData(uint64_t ModuleOffset, std::string &Name, uint64_t &Start,
-                     uint64_t &Size) const;
-
-private:
-  bool getNameFromSymbolTable(SymbolRef::Type Type, uint64_t Address,
-                              std::string &Name, uint64_t &Addr,
-                              uint64_t &Size) const;
-  // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd
-  // (function descriptor) section and OpdExtractor refers to its contents.
-  void addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize,
-                 DataExtractor *OpdExtractor = nullptr,
-                 uint64_t OpdAddress = 0);
-  ObjectFile *Module;
-  std::unique_ptr<DIContext> DebugInfoContext;
-
-  struct SymbolDesc {
-    uint64_t Addr;
-    // If size is 0, assume that symbol occupies the whole memory range up to
-    // the following symbol.
-    uint64_t Size;
-    friend bool operator<(const SymbolDesc &s1, const SymbolDesc &s2) {
-      return s1.Addr < s2.Addr;
-    }
-  };
-  std::map<SymbolDesc, StringRef> Functions;
-  std::map<SymbolDesc, StringRef> Objects;
-};
-
-} // namespace symbolize
-} // namespace llvm
-
-#endif
diff --git a/tools/llvm-symbolizer/Makefile b/tools/llvm-symbolizer/Makefile
index de75befb1c97..8272d61280c7 100644
--- a/tools/llvm-symbolizer/Makefile
+++ b/tools/llvm-symbolizer/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-symbolizer
-LINK_COMPONENTS := DebugInfoDWARF DebugInfoPDB Object
+LINK_COMPONENTS := DebugInfoDWARF DebugInfoPDB Object Support Symbolize
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 9c9f3adbf60e..e45660c84c77 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -15,8 +15,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "LLVMSymbolize.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/Support/COM.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -73,6 +74,20 @@ static cl::list<std::string>
 ClDsymHint("dsym-hint", cl::ZeroOrMore,
            cl::desc("Path to .dSYM bundles to search for debug info for the "
                     "object files"));
+static cl::opt<bool>
+    ClPrintAddress("print-address", cl::init(false),
+                   cl::desc("Show address before line information"));
+
+static cl::opt<bool>
+    ClPrettyPrint("pretty-print", cl::init(false),
+                  cl::desc("Make the output more human friendly"));
+
+static bool error(std::error_code ec) {
+  if (!ec)
+    return false;
+  errs() << "LLVMSymbolizer: error reading file: " << ec.message() << ".\n";
+  return true;
+}
 
 static bool parseCommand(bool &IsData, std::string &ModuleName,
                          uint64_t &ModuleOffset) {
@@ -118,9 +133,7 @@ static bool parseCommand(bool &IsData, std::string &ModuleName,
   // Skip delimiters and parse module offset.
   pos += strspn(pos, kDelimiters);
   int offset_length = strcspn(pos, kDelimiters);
-  if (StringRef(pos, offset_length).getAsInteger(0, ModuleOffset))
-    return false;
-  return true;
+  return !StringRef(pos, offset_length).getAsInteger(0, ModuleOffset);
 }
 
 int main(int argc, char **argv) {
@@ -132,9 +145,9 @@ int main(int argc, char **argv) {
   llvm::sys::InitializeCOMRAII COM(llvm::sys::COMThreadingMode::MultiThreaded);
 
   cl::ParseCommandLineOptions(argc, argv, "llvm-symbolizer\n");
-  LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable,
-                               ClPrintInlining, ClDemangle,
+  LLVMSymbolizer::Options Opts(ClPrintFunctions, ClUseSymbolTable, ClDemangle,
                                ClUseRelativeAddress, ClDefaultArch);
+
   for (const auto &hint : ClDsymHint) {
     if (sys::path::extension(hint) == ".dSYM") {
       Opts.DsymHints.push_back(hint);
@@ -148,11 +161,28 @@ int main(int argc, char **argv) {
   bool IsData = false;
   std::string ModuleName;
   uint64_t ModuleOffset;
+  DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
+                    ClPrettyPrint);
+
   while (parseCommand(IsData, ModuleName, ModuleOffset)) {
-    std::string Result =
-        IsData ? Symbolizer.symbolizeData(ModuleName, ModuleOffset)
-               : Symbolizer.symbolizeCode(ModuleName, ModuleOffset);
-    outs() << Result << "\n";
+    if (ClPrintAddress) {
+      outs() << "0x";
+      outs().write_hex(ModuleOffset);
+      StringRef Delimiter = (ClPrettyPrint == true) ? ": " : "\n";
+      outs() << Delimiter;
+    }
+    if (IsData) {
+      auto ResOrErr = Symbolizer.symbolizeData(ModuleName, ModuleOffset);
+      Printer << (error(ResOrErr.getError()) ? DIGlobal() : ResOrErr.get());
+    } else if (ClPrintInlining) {
+      auto ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleName, ModuleOffset);
+      Printer << (error(ResOrErr.getError()) ? DIInliningInfo()
+                                             : ResOrErr.get());
+    } else {
+      auto ResOrErr = Symbolizer.symbolizeCode(ModuleName, ModuleOffset);
+      Printer << (error(ResOrErr.getError()) ? DILineInfo() : ResOrErr.get());
+    }
+    outs() << "\n";
     outs().flush();
   }
 
diff --git a/tools/lto/CMakeLists.txt b/tools/lto/CMakeLists.txt
index c479fa954cb0..62447eec389b 100644
--- a/tools/lto/CMakeLists.txt
+++ b/tools/lto/CMakeLists.txt
@@ -19,3 +19,13 @@ add_llvm_library(LTO SHARED ${SOURCES})
 
 install(FILES ${LLVM_MAIN_INCLUDE_DIR}/llvm-c/lto.h
   DESTINATION include/llvm-c)
+
+if (APPLE)
+  set(LTO_VERSION ${LLVM_VERSION_MAJOR})
+  if(LLVM_LTO_VERSION_OFFSET)
+    math(EXPR LTO_VERSION "${LLVM_VERSION_MAJOR} + ${LLVM_LTO_VERSION_OFFSET}")
+  endif()
+  set_property(TARGET LTO APPEND_STRING PROPERTY
+              LINK_FLAGS
+              " -compatibility_version 1 -current_version ${LTO_VERSION}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
+endif()
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 5c712f18c9ee..d8f99c050a34 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -15,6 +15,8 @@
 #include "llvm-c/lto.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/LTO/LTOCodeGenerator.h"
 #include "llvm/LTO/LTOModule.h"
@@ -43,6 +45,16 @@ static cl::opt<bool>
 DisableLTOVectorization("disable-lto-vectorization", cl::init(false),
   cl::desc("Do not run loop or slp vectorization during LTO"));
 
+#ifdef NDEBUG
+static bool VerifyByDefault = false;
+#else
+static bool VerifyByDefault = true;
+#endif
+
+static cl::opt<bool> DisableVerify(
+    "disable-llvm-verifier", cl::init(!VerifyByDefault),
+    cl::desc("Don't run the LLVM verifier during the optimization pipeline"));
+
 // Holds most recent error string.
 // *** Not thread safe ***
 static std::string sLastErrorString;
@@ -54,6 +66,24 @@ static bool initialized = false;
 // Holds the command-line option parsing state of the LTO module.
 static bool parsedOptions = false;
 
+static LLVMContext *LTOContext = nullptr;
+
+static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
+  if (DI.getSeverity() != DS_Error) {
+    DiagnosticPrinterRawOStream DP(errs());
+    DI.print(DP);
+    errs() << '\n';
+    return;
+  }
+  sLastErrorString = "";
+  {
+    raw_string_ostream Stream(sLastErrorString);
+    DiagnosticPrinterRawOStream DP(Stream);
+    DI.print(DP);
+  }
+  sLastErrorString += '\n';
+}
+
 // Initialize the configured targets if they have not been initialized.
 static void lto_initialize() {
   if (!initialized) {
@@ -69,21 +99,37 @@ static void lto_initialize() {
     InitializeAllAsmParsers();
     InitializeAllAsmPrinters();
     InitializeAllDisassemblers();
+
+    LTOContext = &getGlobalContext();
+    LTOContext->setDiagnosticHandler(diagnosticHandler, nullptr, true);
     initialized = true;
   }
 }
 
 namespace {
 
+static void handleLibLTODiagnostic(lto_codegen_diagnostic_severity_t Severity,
+                                   const char *Msg, void *) {
+  sLastErrorString = Msg;
+  sLastErrorString += "\n";
+}
+
 // This derived class owns the native object file. This helps implement the
 // libLTO API semantics, which require that the code generator owns the object
 // file.
 struct LibLTOCodeGenerator : LTOCodeGenerator {
-  LibLTOCodeGenerator() {}
+  LibLTOCodeGenerator() : LTOCodeGenerator(*LTOContext) {
+    setDiagnosticHandler(handleLibLTODiagnostic, nullptr); }
   LibLTOCodeGenerator(std::unique_ptr<LLVMContext> Context)
-      : LTOCodeGenerator(std::move(Context)) {}
+      : LTOCodeGenerator(*Context), OwnedContext(std::move(Context)) {
+    setDiagnosticHandler(handleLibLTODiagnostic, nullptr); }
+
+  // Reset the module first in case MergedModule is created in OwnedContext.
+  // Module must be destructed before its context gets destructed.
+  ~LibLTOCodeGenerator() { resetMergedModule(); }
 
   std::unique_ptr<MemoryBuffer> NativeObjectFile;
+  std::unique_ptr<LLVMContext> OwnedContext;
 };
 
 }
@@ -147,14 +193,21 @@ lto_module_is_object_file_in_memory_for_target(const void* mem,
 lto_module_t lto_module_create(const char* path) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(LTOModule::createFromFile(path, Options, sLastErrorString));
+  ErrorOr<std::unique_ptr<LTOModule>> M =
+      LTOModule::createFromFile(*LTOContext, path, Options);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_from_fd(int fd, const char *path, size_t size) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(
-      LTOModule::createFromOpenFile(fd, path, size, Options, sLastErrorString));
+  ErrorOr<std::unique_ptr<LTOModule>> M =
+      LTOModule::createFromOpenFile(*LTOContext, fd, path, size, Options);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
@@ -163,14 +216,21 @@ lto_module_t lto_module_create_from_fd_at_offset(int fd, const char *path,
                                                  off_t offset) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(LTOModule::createFromOpenFileSlice(fd, path, map_size, offset,
-                                                 Options, sLastErrorString));
+  ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createFromOpenFileSlice(
+      *LTOContext, fd, path, map_size, offset, Options);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_from_memory(const void* mem, size_t length) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(LTOModule::createFromBuffer(mem, length, Options, sLastErrorString));
+  ErrorOr<std::unique_ptr<LTOModule>> M =
+      LTOModule::createFromBuffer(*LTOContext, mem, length, Options);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_from_memory_with_path(const void* mem,
@@ -178,16 +238,22 @@ lto_module_t lto_module_create_from_memory_with_path(const void* mem,
                                                      const char *path) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(
-      LTOModule::createFromBuffer(mem, length, Options, sLastErrorString, path));
+  ErrorOr<std::unique_ptr<LTOModule>> M =
+      LTOModule::createFromBuffer(*LTOContext, mem, length, Options, path);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_in_local_context(const void *mem, size_t length,
                                                 const char *path) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(LTOModule::createInLocalContext(mem, length, Options,
-                                              sLastErrorString, path));
+  ErrorOr<std::unique_ptr<LTOModule>> M =
+      LTOModule::createInLocalContext(mem, length, Options, path);
+  if (!M)
+    return nullptr;
+  return wrap(M->release());
 }
 
 lto_module_t lto_module_create_in_codegen_context(const void *mem,
@@ -196,8 +262,9 @@ lto_module_t lto_module_create_in_codegen_context(const void *mem,
                                                   lto_code_gen_t cg) {
   lto_initialize();
   llvm::TargetOptions Options = InitTargetOptionsFromCodeGenFlags();
-  return wrap(LTOModule::createInContext(mem, length, Options, sLastErrorString,
-                                         path, &unwrap(cg)->getContext()));
+  ErrorOr<std::unique_ptr<LTOModule>> M = LTOModule::createInContext(
+      mem, length, Options, path, &unwrap(cg)->getContext());
+  return wrap(M->release());
 }
 
 void lto_module_dispose(lto_module_t mod) { delete unwrap(mod); }
@@ -260,7 +327,7 @@ bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod) {
 }
 
 void lto_codegen_set_module(lto_code_gen_t cg, lto_module_t mod) {
-  unwrap(cg)->setModule(unwrap(mod));
+  unwrap(cg)->setModule(std::unique_ptr<LTOModule>(unwrap(mod)));
 }
 
 bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug) {
@@ -269,8 +336,22 @@ bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug) {
 }
 
 bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model) {
-  unwrap(cg)->setCodePICModel(model);
-  return false;
+  switch (model) {
+  case LTO_CODEGEN_PIC_MODEL_STATIC:
+    unwrap(cg)->setCodePICModel(Reloc::Static);
+    return false;
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+    unwrap(cg)->setCodePICModel(Reloc::PIC_);
+    return false;
+  case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+    unwrap(cg)->setCodePICModel(Reloc::DynamicNoPIC);
+    return false;
+  case LTO_CODEGEN_PIC_MODEL_DEFAULT:
+    unwrap(cg)->setCodePICModel(Reloc::Default);
+    return false;
+  }
+  sLastErrorString = "Unknown PIC model";
+  return true;
 }
 
 void lto_codegen_set_cpu(lto_code_gen_t cg, const char *cpu) {
@@ -301,14 +382,15 @@ static void maybeParseOptions(lto_code_gen_t cg) {
 
 bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char *path) {
   maybeParseOptions(cg);
-  return !unwrap(cg)->writeMergedModules(path, sLastErrorString);
+  return !unwrap(cg)->writeMergedModules(path);
 }
 
 const void *lto_codegen_compile(lto_code_gen_t cg, size_t *length) {
   maybeParseOptions(cg);
   LibLTOCodeGenerator *CG = unwrap(cg);
-  CG->NativeObjectFile = CG->compile(DisableInline, DisableGVNLoadPRE,
-                                     DisableLTOVectorization, sLastErrorString);
+  CG->NativeObjectFile =
+      CG->compile(DisableVerify, DisableInline, DisableGVNLoadPRE,
+                  DisableLTOVectorization);
   if (!CG->NativeObjectFile)
     return nullptr;
   *length = CG->NativeObjectFile->getBufferSize();
@@ -317,15 +399,14 @@ const void *lto_codegen_compile(lto_code_gen_t cg, size_t *length) {
 
 bool lto_codegen_optimize(lto_code_gen_t cg) {
   maybeParseOptions(cg);
-  return !unwrap(cg)->optimize(DisableInline,
-                               DisableGVNLoadPRE, DisableLTOVectorization,
-                               sLastErrorString);
+  return !unwrap(cg)->optimize(DisableVerify, DisableInline, DisableGVNLoadPRE,
+                               DisableLTOVectorization);
 }
 
 const void *lto_codegen_compile_optimized(lto_code_gen_t cg, size_t *length) {
   maybeParseOptions(cg);
   LibLTOCodeGenerator *CG = unwrap(cg);
-  CG->NativeObjectFile = CG->compileOptimized(sLastErrorString);
+  CG->NativeObjectFile = CG->compileOptimized();
   if (!CG->NativeObjectFile)
     return nullptr;
   *length = CG->NativeObjectFile->getBufferSize();
@@ -335,8 +416,8 @@ const void *lto_codegen_compile_optimized(lto_code_gen_t cg, size_t *length) {
 bool lto_codegen_compile_to_file(lto_code_gen_t cg, const char **name) {
   maybeParseOptions(cg);
   return !unwrap(cg)->compile_to_file(
-      name, DisableInline, DisableGVNLoadPRE,
-      DisableLTOVectorization, sLastErrorString);
+      name, DisableVerify, DisableInline, DisableGVNLoadPRE,
+      DisableLTOVectorization);
 }
 
 void lto_codegen_debug_options(lto_code_gen_t cg, const char *opt) {
diff --git a/tools/macho-dump/CMakeLists.txt b/tools/macho-dump/CMakeLists.txt
deleted file mode 100644
index bc2dfbf427c3..000000000000
--- a/tools/macho-dump/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-set(LLVM_LINK_COMPONENTS
-  Object
-  Support
-  )
-
-add_llvm_tool(macho-dump
-  macho-dump.cpp
-  )
diff --git a/tools/macho-dump/macho-dump.cpp b/tools/macho-dump/macho-dump.cpp
deleted file mode 100644
index 39c2860df355..000000000000
--- a/tools/macho-dump/macho-dump.cpp
+++ /dev/null
@@ -1,434 +0,0 @@
-//===-- macho-dump.cpp - Mach Object Dumping Tool -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a testing tool for use with the MC/Mach-O LLVM components.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Object/MachO.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
-#include <system_error>
-using namespace llvm;
-using namespace llvm::object;
-
-static cl::opt<std::string>
-InputFile(cl::Positional, cl::desc("<input file>"), cl::init("-"));
-
-static cl::opt<bool>
-ShowSectionData("dump-section-data", cl::desc("Dump the contents of sections"),
-                cl::init(false));
-
-///
-
-static const char *ProgramName;
-
-static void Message(const char *Type, const Twine &Msg) {
-  errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
-}
-
-static int Error(const Twine &Msg) {
-  Message("error", Msg);
-  return 1;
-}
-
-static void Warning(const Twine &Msg) {
-  Message("warning", Msg);
-}
-
-///
-
-static void DumpSegmentCommandData(StringRef Name,
-                                   uint64_t VMAddr, uint64_t VMSize,
-                                   uint64_t FileOffset, uint64_t FileSize,
-                                   uint32_t MaxProt, uint32_t InitProt,
-                                   uint32_t NumSections, uint32_t Flags) {
-  outs() << "  ('segment_name', '";
-  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
-  outs() << "  ('vm_addr', " << VMAddr << ")\n";
-  outs() << "  ('vm_size', " << VMSize << ")\n";
-  outs() << "  ('file_offset', " << FileOffset << ")\n";
-  outs() << "  ('file_size', " << FileSize << ")\n";
-  outs() << "  ('maxprot', " << MaxProt << ")\n";
-  outs() << "  ('initprot', " << InitProt << ")\n";
-  outs() << "  ('num_sections', " << NumSections << ")\n";
-  outs() << "  ('flags', " << Flags << ")\n";
-}
-
-static int DumpSectionData(const MachOObjectFile &Obj, unsigned Index,
-                           StringRef Name,
-                           StringRef SegmentName, uint64_t Address,
-                           uint64_t Size, uint32_t Offset,
-                           uint32_t Align, uint32_t RelocationTableOffset,
-                           uint32_t NumRelocationTableEntries,
-                           uint32_t Flags, uint32_t Reserved1,
-                           uint32_t Reserved2, uint64_t Reserved3 = ~0ULL) {
-  outs() << "    # Section " << Index << "\n";
-  outs() << "   (('section_name', '";
-  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
-  outs() << "    ('segment_name', '";
-  outs().write_escaped(SegmentName, /*UseHexEscapes=*/true) << "')\n";
-  outs() << "    ('address', " << Address << ")\n";
-  outs() << "    ('size', " << Size << ")\n";
-  outs() << "    ('offset', " << Offset << ")\n";
-  outs() << "    ('alignment', " << Align << ")\n";
-  outs() << "    ('reloc_offset', " << RelocationTableOffset << ")\n";
-  outs() << "    ('num_reloc', " << NumRelocationTableEntries << ")\n";
-  outs() << "    ('flags', " << format("0x%x", Flags) << ")\n";
-  outs() << "    ('reserved1', " << Reserved1 << ")\n";
-  outs() << "    ('reserved2', " << Reserved2 << ")\n";
-  if (Reserved3 != ~0ULL)
-    outs() << "    ('reserved3', " << Reserved3 << ")\n";
-  outs() << "   ),\n";
-
-  // Dump the relocation entries.
-  outs() << "  ('_relocations', [\n";
-  unsigned RelNum = 0;
-  for (relocation_iterator I = Obj.section_rel_begin(Index),
-                           E = Obj.section_rel_end(Index);
-       I != E; ++I, ++RelNum) {
-    MachO::any_relocation_info RE = Obj.getRelocation(I->getRawDataRefImpl());
-    outs() << "    # Relocation " << RelNum << "\n";
-    outs() << "    (('word-0', " << format("0x%x", RE.r_word0) << "),\n";
-    outs() << "     ('word-1', " << format("0x%x", RE.r_word1) << ")),\n";
-  }
-  outs() << "  ])\n";
-
-  // Dump the section data, if requested.
-  if (ShowSectionData) {
-    outs() << "  ('_section_data', '";
-    StringRef Data = Obj.getData().substr(Offset, Size);
-    for (unsigned i = 0; i != Data.size(); ++i) {
-      if (i && (i % 4) == 0)
-        outs() << ' ';
-      outs() << hexdigit((Data[i] >> 4) & 0xF, /*LowerCase=*/true);
-      outs() << hexdigit((Data[i] >> 0) & 0xF, /*LowerCase=*/true);
-    }
-    outs() << "')\n";
-  }
-
-  return 0;
-}
-
-static int DumpSegmentCommand(const MachOObjectFile &Obj,
-                              const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::segment_command SLC = Obj.getSegmentLoadCommand(LCI);
-
-  DumpSegmentCommandData(StringRef(SLC.segname, 16), SLC.vmaddr,
-                         SLC.vmsize, SLC.fileoff, SLC.filesize,
-                         SLC.maxprot, SLC.initprot, SLC.nsects, SLC.flags);
-
-  // Dump the sections.
-  outs() << "  ('sections', [\n";
-  for (unsigned i = 0; i != SLC.nsects; ++i) {
-    MachO::section Sect = Obj.getSection(LCI, i);
-    DumpSectionData(Obj, i, StringRef(Sect.sectname, 16),
-                    StringRef(Sect.segname, 16), Sect.addr,
-                    Sect.size, Sect.offset, Sect.align,
-                    Sect.reloff, Sect.nreloc, Sect.flags,
-                    Sect.reserved1, Sect.reserved2);
-  }
-  outs() << "  ])\n";
-
-  return 0;
-}
-
-static int DumpSegment64Command(const MachOObjectFile &Obj,
-                                const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::segment_command_64 SLC = Obj.getSegment64LoadCommand(LCI);
-  DumpSegmentCommandData(StringRef(SLC.segname, 16), SLC.vmaddr,
-                         SLC.vmsize, SLC.fileoff, SLC.filesize,
-                         SLC.maxprot, SLC.initprot, SLC.nsects, SLC.flags);
-
-  // Dump the sections.
-  outs() << "  ('sections', [\n";
-  for (unsigned i = 0; i != SLC.nsects; ++i) {
-    MachO::section_64 Sect = Obj.getSection64(LCI, i);
-
-    DumpSectionData(Obj, i, StringRef(Sect.sectname, 16),
-                    StringRef(Sect.segname, 16), Sect.addr,
-                    Sect.size, Sect.offset, Sect.align,
-                    Sect.reloff, Sect.nreloc, Sect.flags,
-                    Sect.reserved1, Sect.reserved2,
-                    Sect.reserved3);
-  }
-  outs() << "  ])\n";
-
-  return 0;
-}
-
-static void DumpSymbolTableEntryData(const MachOObjectFile &Obj,
-                                     unsigned Index, uint32_t StringIndex,
-                                     uint8_t Type, uint8_t SectionIndex,
-                                     uint16_t Flags, uint64_t Value,
-                                     StringRef StringTable) {
-  const char *Name = &StringTable.data()[StringIndex];
-  outs() << "    # Symbol " << Index << "\n";
-  outs() << "   (('n_strx', " << StringIndex << ")\n";
-  outs() << "    ('n_type', " << format("0x%x", Type) << ")\n";
-  outs() << "    ('n_sect', " << uint32_t(SectionIndex) << ")\n";
-  outs() << "    ('n_desc', " << Flags << ")\n";
-  outs() << "    ('n_value', " << Value << ")\n";
-  outs() << "    ('_string', '" << Name << "')\n";
-  outs() << "   ),\n";
-}
-
-static int DumpSymtabCommand(const MachOObjectFile &Obj) {
-  MachO::symtab_command SLC = Obj.getSymtabLoadCommand();
-
-  outs() << "  ('symoff', " << SLC.symoff << ")\n";
-  outs() << "  ('nsyms', " << SLC.nsyms << ")\n";
-  outs() << "  ('stroff', " << SLC.stroff << ")\n";
-  outs() << "  ('strsize', " << SLC.strsize << ")\n";
-
-  // Dump the string data.
-  outs() << "  ('_string_data', '";
-  StringRef StringTable = Obj.getStringTableData();
-  outs().write_escaped(StringTable,
-                       /*UseHexEscapes=*/true) << "')\n";
-
-  // Dump the symbol table.
-  outs() << "  ('_symbols', [\n";
-  unsigned SymNum = 0;
-  for (const SymbolRef &Symbol : Obj.symbols()) {
-    DataRefImpl DRI = Symbol.getRawDataRefImpl();
-    if (Obj.is64Bit()) {
-      MachO::nlist_64 STE = Obj.getSymbol64TableEntry(DRI);
-      DumpSymbolTableEntryData(Obj, SymNum, STE.n_strx, STE.n_type,
-                               STE.n_sect, STE.n_desc, STE.n_value,
-                               StringTable);
-    } else {
-      MachO::nlist STE = Obj.getSymbolTableEntry(DRI);
-      DumpSymbolTableEntryData(Obj, SymNum, STE.n_strx, STE.n_type,
-                               STE.n_sect, STE.n_desc, STE.n_value,
-                               StringTable);
-    }
-    SymNum++;
-  }
-  outs() << "  ])\n";
-
-  return 0;
-}
-
-static int DumpDysymtabCommand(const MachOObjectFile &Obj) {
-  MachO::dysymtab_command DLC = Obj.getDysymtabLoadCommand();
-
-  outs() << "  ('ilocalsym', " << DLC.ilocalsym << ")\n";
-  outs() << "  ('nlocalsym', " << DLC.nlocalsym << ")\n";
-  outs() << "  ('iextdefsym', " << DLC.iextdefsym << ")\n";
-  outs() << "  ('nextdefsym', " << DLC.nextdefsym << ")\n";
-  outs() << "  ('iundefsym', " << DLC.iundefsym << ")\n";
-  outs() << "  ('nundefsym', " << DLC.nundefsym << ")\n";
-  outs() << "  ('tocoff', " << DLC.tocoff << ")\n";
-  outs() << "  ('ntoc', " << DLC.ntoc << ")\n";
-  outs() << "  ('modtaboff', " << DLC.modtaboff << ")\n";
-  outs() << "  ('nmodtab', " << DLC.nmodtab << ")\n";
-  outs() << "  ('extrefsymoff', " << DLC.extrefsymoff << ")\n";
-  outs() << "  ('nextrefsyms', " << DLC.nextrefsyms << ")\n";
-  outs() << "  ('indirectsymoff', " << DLC.indirectsymoff << ")\n";
-  outs() << "  ('nindirectsyms', " << DLC.nindirectsyms << ")\n";
-  outs() << "  ('extreloff', " << DLC.extreloff << ")\n";
-  outs() << "  ('nextrel', " << DLC.nextrel << ")\n";
-  outs() << "  ('locreloff', " << DLC.locreloff << ")\n";
-  outs() << "  ('nlocrel', " << DLC.nlocrel << ")\n";
-
-  // Dump the indirect symbol table.
-  outs() << "  ('_indirect_symbols', [\n";
-  for (unsigned i = 0; i != DLC.nindirectsyms; ++i) {
-    uint32_t ISTE = Obj.getIndirectSymbolTableEntry(DLC, i);
-    outs() << "    # Indirect Symbol " << i << "\n";
-    outs() << "    (('symbol_index', " << format("0x%x", ISTE) << "),),\n";
-  }
-  outs() << "  ])\n";
-
-  return 0;
-}
-
-static int
-DumpLinkeditDataCommand(const MachOObjectFile &Obj,
-                        const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::linkedit_data_command LLC = Obj.getLinkeditDataLoadCommand(LCI);
-  outs() << "  ('dataoff', " << LLC.dataoff << ")\n"
-         << "  ('datasize', " << LLC.datasize << ")\n"
-         << "  ('_addresses', [\n";
-
-  SmallVector<uint64_t, 8> Addresses;
-  Obj.ReadULEB128s(LLC.dataoff, Addresses);
-  for (unsigned i = 0, e = Addresses.size(); i != e; ++i)
-    outs() << "    # Address " << i << '\n'
-           << "    ('address', " << format("0x%x", Addresses[i]) << "),\n";
-
-  outs() << "  ])\n";
-
-  return 0;
-}
-
-static int
-DumpDataInCodeDataCommand(const MachOObjectFile &Obj,
-                          const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::linkedit_data_command LLC = Obj.getLinkeditDataLoadCommand(LCI);
-  outs() << "  ('dataoff', " << LLC.dataoff << ")\n"
-         << "  ('datasize', " << LLC.datasize << ")\n"
-         << "  ('_data_regions', [\n";
-
-  unsigned NumRegions = LLC.datasize / sizeof(MachO::data_in_code_entry);
-  for (unsigned i = 0; i < NumRegions; ++i) {
-    MachO::data_in_code_entry DICE= Obj.getDataInCodeTableEntry(LLC.dataoff, i);
-    outs() << "    # DICE " << i << "\n"
-           << "    ('offset', " << DICE.offset << ")\n"
-           << "    ('length', " << DICE.length << ")\n"
-           << "    ('kind', " << DICE.kind << ")\n";
-  }
-
-  outs() <<"  ])\n";
-
-  return 0;
-}
-
-static int
-DumpLinkerOptionsCommand(const MachOObjectFile &Obj,
-                         const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::linker_option_command LOLC = Obj.getLinkerOptionLoadCommand(LCI);
-  outs() << "  ('count', " << LOLC.count << ")\n"
-         << "  ('_strings', [\n";
-
-  uint64_t DataSize = LOLC.cmdsize - sizeof(MachO::linker_option_command);
-  const char *P = LCI.Ptr + sizeof(MachO::linker_option_command);
-  StringRef Data(P, DataSize);
-  for (unsigned i = 0; i != LOLC.count; ++i) {
-    std::pair<StringRef,StringRef> Split = Data.split('\0');
-    outs() << "\t\"";
-    outs().write_escaped(Split.first);
-    outs() << "\",\n";
-    Data = Split.second;
-  }
-  outs() <<"  ])\n";
-
-  return 0;
-}
-
-static int
-DumpVersionMin(const MachOObjectFile &Obj,
-               const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::version_min_command VMLC = Obj.getVersionMinLoadCommand(LCI);
-  outs() << "  ('version, " << VMLC.version << ")\n"
-         << "  ('sdk, " << VMLC.sdk << ")\n";
-  return 0;
-}
-
-static int
-DumpDylibID(const MachOObjectFile &Obj,
-            const MachOObjectFile::LoadCommandInfo &LCI) {
-  MachO::dylib_command DLLC = Obj.getDylibIDLoadCommand(LCI);
-  outs() << "  ('install_name', '" << LCI.Ptr + DLLC.dylib.name << "')\n"
-         << "  ('timestamp, " << DLLC.dylib.timestamp << ")\n"
-         << "  ('cur_version, " << DLLC.dylib.current_version << ")\n"
-         << "  ('compat_version, " << DLLC.dylib.compatibility_version << ")\n";
-  return 0;
-}
-
-static int DumpLoadCommand(const MachOObjectFile &Obj,
-                           const MachOObjectFile::LoadCommandInfo &LCI) {
-  switch (LCI.C.cmd) {
-  case MachO::LC_SEGMENT:
-    return DumpSegmentCommand(Obj, LCI);
-  case MachO::LC_SEGMENT_64:
-    return DumpSegment64Command(Obj, LCI);
-  case MachO::LC_SYMTAB:
-    return DumpSymtabCommand(Obj);
-  case MachO::LC_DYSYMTAB:
-    return DumpDysymtabCommand(Obj);
-  case MachO::LC_CODE_SIGNATURE:
-  case MachO::LC_SEGMENT_SPLIT_INFO:
-  case MachO::LC_FUNCTION_STARTS:
-    return DumpLinkeditDataCommand(Obj, LCI);
-  case MachO::LC_DATA_IN_CODE:
-    return DumpDataInCodeDataCommand(Obj, LCI);
-  case MachO::LC_LINKER_OPTION:
-    return DumpLinkerOptionsCommand(Obj, LCI);
-  case MachO::LC_VERSION_MIN_IPHONEOS:
-  case MachO::LC_VERSION_MIN_MACOSX:
-    return DumpVersionMin(Obj, LCI);
-  case MachO::LC_ID_DYLIB:
-    return DumpDylibID(Obj, LCI);
-  default:
-    Warning("unknown load command: " + Twine(LCI.C.cmd));
-    return 0;
-  }
-}
-
-static int DumpLoadCommand(const MachOObjectFile &Obj, unsigned Index,
-                           const MachOObjectFile::LoadCommandInfo &LCI) {
-  outs() << "  # Load Command " << Index << "\n"
-         << " (('command', " << LCI.C.cmd << ")\n"
-         << "  ('size', " << LCI.C.cmdsize << ")\n";
-  int Res = DumpLoadCommand(Obj, LCI);
-  outs() << " ),\n";
-  return Res;
-}
-
-static void printHeader(const MachOObjectFile *Obj,
-                        const MachO::mach_header &Header) {
-  outs() << "('cputype', " << Header.cputype << ")\n";
-  outs() << "('cpusubtype', " << Header.cpusubtype << ")\n";
-  outs() << "('filetype', " << Header.filetype << ")\n";
-  outs() << "('num_load_commands', " << Header.ncmds << ")\n";
-  outs() << "('load_commands_size', " << Header.sizeofcmds << ")\n";
-  outs() << "('flag', " << Header.flags << ")\n";
-
-  // Print extended header if 64-bit.
-  if (Obj->is64Bit()) {
-    const MachO::mach_header_64 *Header64 =
-      reinterpret_cast<const MachO::mach_header_64 *>(&Header);
-    outs() << "('reserved', " << Header64->reserved << ")\n";
-  }
-}
-
-int main(int argc, char **argv) {
-  ProgramName = argv[0];
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
-
-  cl::ParseCommandLineOptions(argc, argv, "llvm Mach-O dumping tool\n");
-
-  ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(InputFile);
-  if (std::error_code EC = BinaryOrErr.getError())
-    return Error("unable to read input: '" + EC.message() + "'");
-  Binary &Binary = *BinaryOrErr.get().getBinary();
-
-  const MachOObjectFile *InputObject = dyn_cast<MachOObjectFile>(&Binary);
-  if (!InputObject)
-    return Error("Not a MachO object");
-
-  // Print the header
-  MachO::mach_header_64 Header64;
-  MachO::mach_header *Header = reinterpret_cast<MachO::mach_header*>(&Header64);
-  if (InputObject->is64Bit())
-    Header64 = InputObject->getHeader64();
-  else
-    *Header = InputObject->getHeader();
-  printHeader(InputObject, *Header);
-
-  // Print the load commands.
-  int Res = 0;
-  unsigned Index = 0;
-  outs() << "('load_commands', [\n";
-  for (const auto &Load : InputObject->load_commands()) {
-    if (DumpLoadCommand(*InputObject, Index++, Load))
-      break;
-  }
-  outs() << "])\n";
-
-  return Res;
-}
diff --git a/tools/obj2yaml/elf2yaml.cpp b/tools/obj2yaml/elf2yaml.cpp
index f117a10d3822..f2b013807345 100644
--- a/tools/obj2yaml/elf2yaml.cpp
+++ b/tools/obj2yaml/elf2yaml.cpp
@@ -26,14 +26,15 @@ class ELFDumper {
   typedef typename object::ELFFile<ELFT>::Elf_Word Elf_Word;
 
   const object::ELFFile<ELFT> &Obj;
+  ArrayRef<Elf_Word> ShndxTable;
 
-  std::error_code dumpSymbol(const Elf_Sym *Sym, bool IsDynamic,
-                             ELFYAML::Symbol &S);
+  std::error_code dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                             StringRef StrTable, ELFYAML::Symbol &S);
   std::error_code dumpCommonSection(const Elf_Shdr *Shdr, ELFYAML::Section &S);
   std::error_code dumpCommonRelocationSection(const Elf_Shdr *Shdr,
                                               ELFYAML::RelocationSection &S);
   template <class RelT>
-  std::error_code dumpRelocation(const Elf_Shdr *Shdr, const RelT *Rel,
+  std::error_code dumpRelocation(const RelT *Rel, const Elf_Shdr *SymTab,
                                  ELFYAML::Relocation &R);
 
   ErrorOr<ELFYAML::RelocationSection *> dumpRelSection(const Elf_Shdr *Shdr);
@@ -68,15 +69,26 @@ ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   Y->Header.Flags = Obj.getHeader()->e_flags;
   Y->Header.Entry = Obj.getHeader()->e_entry;
 
+  const Elf_Shdr *Symtab = nullptr;
+
   // Dump sections
   for (const Elf_Shdr &Sec : Obj.sections()) {
     switch (Sec.sh_type) {
     case ELF::SHT_NULL:
-    case ELF::SHT_SYMTAB:
     case ELF::SHT_DYNSYM:
     case ELF::SHT_STRTAB:
       // Do not dump these sections.
       break;
+    case ELF::SHT_SYMTAB:
+      Symtab = &Sec;
+      break;
+    case ELF::SHT_SYMTAB_SHNDX: {
+      ErrorOr<ArrayRef<Elf_Word>> TableOrErr = Obj.getSHNDXTable(Sec);
+      if (std::error_code EC = TableOrErr.getError())
+        return EC;
+      ShndxTable = *TableOrErr;
+      break;
+    }
     case ELF::SHT_RELA: {
       ErrorOr<ELFYAML::RelocationSection *> S = dumpRelaSection(&Sec);
       if (std::error_code EC = S.getError())
@@ -122,18 +134,24 @@ ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   }
 
   // Dump symbols
+  ErrorOr<StringRef> StrTableOrErr = Obj.getStringTableForSymtab(*Symtab);
+  if (std::error_code EC = StrTableOrErr.getError())
+    return EC;
+  StringRef StrTable = *StrTableOrErr;
+
   bool IsFirstSym = true;
-  for (auto SI = Obj.symbol_begin(), SE = Obj.symbol_end(); SI != SE; ++SI) {
+  for (const Elf_Sym &Sym : Obj.symbols(Symtab)) {
     if (IsFirstSym) {
       IsFirstSym = false;
       continue;
     }
 
     ELFYAML::Symbol S;
-    if (std::error_code EC = ELFDumper<ELFT>::dumpSymbol(SI, false, S))
+    if (std::error_code EC =
+            ELFDumper<ELFT>::dumpSymbol(&Sym, Symtab, StrTable, S))
       return EC;
 
-    switch (SI->getBinding())
+    switch (Sym.getBinding())
     {
     case ELF::STB_LOCAL:
       Y->Symbols.Local.push_back(S);
@@ -153,19 +171,20 @@ ErrorOr<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
 }
 
 template <class ELFT>
-std::error_code ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, bool IsDynamic,
-                                            ELFYAML::Symbol &S) {
+std::error_code
+ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
+                            StringRef StrTable, ELFYAML::Symbol &S) {
   S.Type = Sym->getType();
   S.Value = Sym->st_value;
   S.Size = Sym->st_size;
   S.Other = Sym->st_other;
 
-  ErrorOr<StringRef> NameOrErr = Obj.getSymbolName(Sym, IsDynamic);
+  ErrorOr<StringRef> NameOrErr = Sym->getName(StrTable);
   if (std::error_code EC = NameOrErr.getError())
     return EC;
   S.Name = NameOrErr.get();
 
-  ErrorOr<const Elf_Shdr *> ShdrOrErr = Obj.getSection(&*Sym);
+  ErrorOr<const Elf_Shdr *> ShdrOrErr = Obj.getSection(Sym, SymTab, ShndxTable);
   if (std::error_code EC = ShdrOrErr.getError())
     return EC;
   const Elf_Shdr *Shdr = *ShdrOrErr;
@@ -182,18 +201,14 @@ std::error_code ELFDumper<ELFT>::dumpSymbol(const Elf_Sym *Sym, bool IsDynamic,
 
 template <class ELFT>
 template <class RelT>
-std::error_code ELFDumper<ELFT>::dumpRelocation(const Elf_Shdr *Shdr,
-                                                const RelT *Rel,
+std::error_code ELFDumper<ELFT>::dumpRelocation(const RelT *Rel,
+                                                const Elf_Shdr *SymTab,
                                                 ELFYAML::Relocation &R) {
   R.Type = Rel->getType(Obj.isMips64EL());
   R.Offset = Rel->r_offset;
   R.Addend = 0;
 
-  auto NamePair = Obj.getRelocationSymbol(Shdr, Rel);
-  if (!NamePair.first)
-    return obj2yaml_error::success;
-
-  const Elf_Shdr *SymTab = NamePair.first;
+  const Elf_Sym *Sym = Obj.getRelocationSymbol(Rel, SymTab);
   ErrorOr<const Elf_Shdr *> StrTabSec = Obj.getSection(SymTab->sh_link);
   if (std::error_code EC = StrTabSec.getError())
     return EC;
@@ -202,7 +217,7 @@ std::error_code ELFDumper<ELFT>::dumpRelocation(const Elf_Shdr *Shdr,
     return EC;
   StringRef StrTab = *StrTabOrErr;
 
-  ErrorOr<StringRef> NameOrErr = NamePair.second->getName(StrTab);
+  ErrorOr<StringRef> NameOrErr = Sym->getName(StrTab);
   if (std::error_code EC = NameOrErr.getError())
     return EC;
   R.Symbol = NameOrErr.get();
@@ -264,9 +279,14 @@ ELFDumper<ELFT>::dumpRelSection(const Elf_Shdr *Shdr) {
   if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
     return EC;
 
+  ErrorOr<const Elf_Shdr *> SymTabOrErr = Obj.getSection(Shdr->sh_link);
+  if (std::error_code EC = SymTabOrErr.getError())
+    return EC;
+  const Elf_Shdr *SymTab = *SymTabOrErr;
+
   for (auto RI = Obj.rel_begin(Shdr), RE = Obj.rel_end(Shdr); RI != RE; ++RI) {
     ELFYAML::Relocation R;
-    if (std::error_code EC = dumpRelocation(Shdr, &*RI, R))
+    if (std::error_code EC = dumpRelocation(&*RI, SymTab, R))
       return EC;
     S->Relocations.push_back(R);
   }
@@ -283,10 +303,15 @@ ELFDumper<ELFT>::dumpRelaSection(const Elf_Shdr *Shdr) {
   if (std::error_code EC = dumpCommonRelocationSection(Shdr, *S))
     return EC;
 
+  ErrorOr<const Elf_Shdr *> SymTabOrErr = Obj.getSection(Shdr->sh_link);
+  if (std::error_code EC = SymTabOrErr.getError())
+    return EC;
+  const Elf_Shdr *SymTab = *SymTabOrErr;
+
   for (auto RI = Obj.rela_begin(Shdr), RE = Obj.rela_end(Shdr); RI != RE;
        ++RI) {
     ELFYAML::Relocation R;
-    if (std::error_code EC = dumpRelocation(Shdr, &*RI, R))
+    if (std::error_code EC = dumpRelocation(&*RI, SymTab, R))
       return EC;
     R.Addend = RI->r_addend;
     S->Relocations.push_back(R);
@@ -331,14 +356,12 @@ ErrorOr<ELFYAML::Group *> ELFDumper<ELFT>::dumpGroup(const Elf_Shdr *Shdr) {
   if (std::error_code EC = dumpCommonSection(Shdr, *S))
     return EC;
   // Get sh_info which is the signature.
-  const Elf_Sym *symbol = Obj.getSymbol(Shdr->sh_info);
-  ErrorOr<const Elf_Shdr *> Symtab = Obj.getSection(Shdr->sh_link);
-  if (std::error_code EC = Symtab.getError())
+  ErrorOr<const Elf_Shdr *> SymtabOrErr = Obj.getSection(Shdr->sh_link);
+  if (std::error_code EC = SymtabOrErr.getError())
     return EC;
-  ErrorOr<const Elf_Shdr *> StrTabSec = Obj.getSection((*Symtab)->sh_link);
-  if (std::error_code EC = StrTabSec.getError())
-    return EC;
-  ErrorOr<StringRef> StrTabOrErr = Obj.getStringTable(*StrTabSec);
+  const Elf_Shdr *Symtab = *SymtabOrErr;
+  const Elf_Sym *symbol = Obj.getSymbol(Symtab, Shdr->sh_info);
+  ErrorOr<StringRef> StrTabOrErr = Obj.getStringTableForSymtab(*Symtab);
   if (std::error_code EC = StrTabOrErr.getError())
     return EC;
   StringRef StrTab = *StrTabOrErr;
diff --git a/tools/obj2yaml/obj2yaml.cpp b/tools/obj2yaml/obj2yaml.cpp
index b64096d75fa4..ee6284da6e41 100644
--- a/tools/obj2yaml/obj2yaml.cpp
+++ b/tools/obj2yaml/obj2yaml.cpp
@@ -29,9 +29,6 @@ static std::error_code dumpObject(const ObjectFile &Obj) {
 }
 
 static std::error_code dumpInput(StringRef File) {
-  if (File != "-" && !sys::fs::exists(File))
-    return obj2yaml_error::file_not_found;
-
   ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(File);
   if (std::error_code EC = BinaryOrErr.getError())
     return EC;
diff --git a/tools/opt/CMakeLists.txt b/tools/opt/CMakeLists.txt
index 5f825220cc88..9194a6ba0985 100644
--- a/tools/opt/CMakeLists.txt
+++ b/tools/opt/CMakeLists.txt
@@ -4,7 +4,6 @@ set(LLVM_LINK_COMPONENTS
   BitWriter
   CodeGen
   Core
-  IPA
   IPO
   IRReader
   InstCombine
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 0db60d144409..fe1605aa8436 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -28,6 +28,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
@@ -36,7 +37,6 @@
 #include "llvm/LinkAllIR.h"
 #include "llvm/LinkAllPasses.h"
 #include "llvm/MC/SubtargetFeature.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
@@ -51,6 +51,7 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
 #include <memory>
 using namespace llvm;
@@ -190,6 +191,11 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
     cl::desc("Preserve use-list order when writing LLVM assembly."),
     cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+    RunTwice("run-twice",
+             cl::desc("Run all passes twice, re-using the same pass manager."),
+             cl::init(false), cl::Hidden);
+
 static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
   // Add the pass to the pass manager...
   PM.add(P);
@@ -312,7 +318,6 @@ int main(int argc, char **argv) {
   initializeVectorization(Registry);
   initializeIPO(Registry);
   initializeAnalysis(Registry);
-  initializeIPA(Registry);
   initializeTransformUtils(Registry);
   initializeInstCombine(Registry);
   initializeInstrumentation(Registry);
@@ -583,22 +588,61 @@ int main(int argc, char **argv) {
   if (!NoVerify && !VerifyEach)
     Passes.add(createVerifierPass());
 
+  // In run twice mode, we want to make sure the output is bit-by-bit
+  // equivalent if we run the pass manager again, so setup two buffers and
+  // a stream to write to them. Note that llc does something similar and it
+  // may be worth to abstract this out in the future.
+  SmallVector<char, 0> Buffer;
+  SmallVector<char, 0> CompileTwiceBuffer;
+  std::unique_ptr<raw_svector_ostream> BOS;
+  raw_ostream *OS = nullptr;
+
   // Write bitcode or assembly to the output as the last step...
   if (!NoOutput && !AnalyzeOnly) {
+    assert(Out);
+    OS = &Out->os();
+    if (RunTwice) {
+      BOS = make_unique<raw_svector_ostream>(Buffer);
+      OS = BOS.get();
+    }
     if (OutputAssembly)
-      Passes.add(
-          createPrintModulePass(Out->os(), "", PreserveAssemblyUseListOrder));
+      Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder));
     else
-      Passes.add(
-          createBitcodeWriterPass(Out->os(), PreserveBitcodeUseListOrder));
+      Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder));
   }
 
   // Before executing passes, print the final values of the LLVM options.
   cl::PrintOptionValues();
 
+  // If requested, run all passes again with the same pass manager to catch
+  // bugs caused by persistent state in the passes
+  if (RunTwice) {
+      std::unique_ptr<Module> M2(CloneModule(M.get()));
+      Passes.run(*M2);
+      CompileTwiceBuffer = Buffer;
+      Buffer.clear();
+  }
+
   // Now that we have all of the passes ready, run them.
   Passes.run(*M);
 
+  // Compare the two outputs and make sure they're the same
+  if (RunTwice) {
+    assert(Out);
+    if (Buffer.size() != CompileTwiceBuffer.size() ||
+        (memcmp(Buffer.data(), CompileTwiceBuffer.data(), Buffer.size()) !=
+         0)) {
+      errs() << "Running the pass manager twice changed the output.\n"
+                "Writing the result of the second run to the specified output.\n"
+                "To generate the one-run comparison binary, just run without\n"
+                "the compile-twice option\n";
+      Out->os() << BOS->str();
+      Out->keep();
+      return 1;
+    }
+    Out->os() << BOS->str();
+  }
+
   // Declare success.
   if (!NoOutput || PrintBreakpoints)
     Out->keep();
diff --git a/tools/sancov/CMakeLists.txt b/tools/sancov/CMakeLists.txt
new file mode 100644
index 000000000000..e92b1fcbb862
--- /dev/null
+++ b/tools/sancov/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(LLVM_LINK_COMPONENTS
+  AllTargetsAsmPrinters
+  AllTargetsDescs
+  AllTargetsDisassemblers
+  AllTargetsInfos
+  DebugInfoDWARF
+  DebugInfoPDB
+  MC
+  MCDisassembler
+  Object
+  Support
+  Symbolize
+  )
+
+add_llvm_tool(sancov
+  sancov.cc
+  )
diff --git a/tools/sancov/Makefile b/tools/sancov/Makefile
new file mode 100644
index 000000000000..7dba1a7a594a
--- /dev/null
+++ b/tools/sancov/Makefile
@@ -0,0 +1,18 @@
+##===- tools/sancov/Makefile ----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := sancov
+LINK_COMPONENTS := all-targets DebugInfoDWARF DebugInfoPDB MC MCParser \
+  MCDisassembler Object Support Symbolize
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/sancov/sancov.cc b/tools/sancov/sancov.cc
new file mode 100644
index 000000000000..a07cdbe097a3
--- /dev/null
+++ b/tools/sancov/sancov.cc
@@ -0,0 +1,533 @@
+//===-- sancov.cc --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a command-line tool for reading and analyzing sanitizer
+// coverage.
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <set>
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+// --------- COMMAND LINE FLAGS ---------
+
+enum ActionType {
+  PrintAction,
+  CoveredFunctionsAction,
+  NotCoveredFunctionsAction
+};
+
+cl::opt<ActionType> Action(
+    cl::desc("Action (required)"), cl::Required,
+    cl::values(clEnumValN(PrintAction, "print", "Print coverage addresses"),
+               clEnumValN(CoveredFunctionsAction, "covered-functions",
+                          "Print all covered funcions."),
+               clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
+                          "Print all not covered funcions."),
+               clEnumValEnd));
+
+static cl::list<std::string> ClInputFiles(cl::Positional, cl::OneOrMore,
+                                          cl::desc("<filenames...>"));
+
+static cl::opt<std::string>
+    ClBinaryName("obj", cl::Required,
+                 cl::desc("Path to object file to be symbolized"));
+
+static cl::opt<bool>
+    ClDemangle("demangle", cl::init(true),
+        cl::desc("Print demangled function name."));
+
+static cl::opt<std::string> ClStripPathPrefix(
+    "strip_path_prefix", cl::init(""),
+    cl::desc("Strip this prefix from file paths in reports."));
+
+static cl::opt<std::string>
+    ClBlacklist("blacklist", cl::init(""),
+                cl::desc("Blacklist file (sanitizer blacklist format)."));
+
+static cl::opt<bool> ClUseDefaultBlacklist(
+    "use_default_blacklist", cl::init(true), cl::Hidden,
+    cl::desc("Controls if default blacklist should be used."));
+
+static const char *const DefaultBlacklist = "fun:__sanitizer_*";
+
+// --------- FORMAT SPECIFICATION ---------
+
+struct FileHeader {
+  uint32_t Bitness;
+  uint32_t Magic;
+};
+
+static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
+static const uint32_t Bitness32 = 0xFFFFFF32;
+static const uint32_t Bitness64 = 0xFFFFFF64;
+
+// ---------
+
+static void FailIfError(std::error_code Error) {
+  if (!Error)
+    return;
+  errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n";
+  exit(1);
+}
+
+template <typename T> static void FailIfError(const ErrorOr<T> &E) {
+  FailIfError(E.getError());
+}
+
+static void FailIfNotEmpty(const std::string &E) {
+  if (E.empty())
+    return;
+  errs() << "Error: " << E << "\n";
+  exit(1);
+}
+
+template <typename T>
+static void FailIfEmpty(const std::unique_ptr<T> &Ptr,
+                        const std::string &Message) {
+  if (Ptr.get())
+    return;
+  errs() << "Error: " << Message << "\n";
+  exit(1);
+}
+
+template <typename T>
+static void readInts(const char *Start, const char *End,
+                     std::set<uint64_t> *Ints) {
+  const T *S = reinterpret_cast<const T *>(Start);
+  const T *E = reinterpret_cast<const T *>(End);
+  std::copy(S, E, std::inserter(*Ints, Ints->end()));
+}
+
+struct FileLoc {
+  bool operator<(const FileLoc &RHS) const {
+    return std::tie(FileName, Line) < std::tie(RHS.FileName, RHS.Line);
+  }
+
+  std::string FileName;
+  uint32_t Line;
+};
+
+struct FunctionLoc {
+  bool operator<(const FunctionLoc &RHS) const {
+    return std::tie(Loc, FunctionName) < std::tie(RHS.Loc, RHS.FunctionName);
+  }
+
+  FileLoc Loc;
+  std::string FunctionName;
+};
+
+std::string stripPathPrefix(std::string Path) {
+  if (ClStripPathPrefix.empty())
+    return Path;
+  size_t Pos = Path.find(ClStripPathPrefix);
+  if (Pos == std::string::npos)
+    return Path;
+  return Path.substr(Pos + ClStripPathPrefix.size());
+}
+
+// Compute [FileLoc -> FunctionName] map for given addresses.
+static std::map<FileLoc, std::string>
+computeFunctionsMap(const std::set<uint64_t> &Addrs) {
+  std::map<FileLoc, std::string> Fns;
+
+  symbolize::LLVMSymbolizer::Options SymbolizerOptions;
+  SymbolizerOptions.Demangle = ClDemangle;
+  SymbolizerOptions.UseSymbolTable = true;
+  symbolize::LLVMSymbolizer Symbolizer(SymbolizerOptions);
+
+  // Fill in Fns map.
+  for (auto Addr : Addrs) {
+    auto InliningInfo = Symbolizer.symbolizeInlinedCode(ClBinaryName, Addr);
+    FailIfError(InliningInfo);
+    for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
+      auto FrameInfo = InliningInfo->getFrame(I);
+      SmallString<256> FileName(FrameInfo.FileName);
+      sys::path::remove_dots(FileName, /* remove_dot_dot */ true);
+      FileLoc Loc = {FileName.str(), FrameInfo.Line};
+      Fns[Loc] = FrameInfo.FunctionName;
+    }
+  }
+
+  return Fns;
+}
+
+// Compute functions for given addresses. It keeps only the first
+// occurence of a function within a file.
+std::set<FunctionLoc> computeFunctionLocs(const std::set<uint64_t> &Addrs) {
+  std::map<FileLoc, std::string> Fns = computeFunctionsMap(Addrs);
+
+  std::set<FunctionLoc> Result;
+  std::string LastFileName;
+  std::set<std::string> ProcessedFunctions;
+
+  for (const auto &P : Fns) {
+    std::string FileName = P.first.FileName;
+    std::string FunctionName = P.second;
+
+    if (LastFileName != FileName)
+      ProcessedFunctions.clear();
+    LastFileName = FileName;
+
+    if (!ProcessedFunctions.insert(FunctionName).second)
+      continue;
+
+    Result.insert(FunctionLoc{P.first, P.second});
+  }
+
+  return Result;
+}
+
+// Locate __sanitizer_cov* function addresses that are used for coverage
+// reporting.
+static std::set<uint64_t>
+findSanitizerCovFunctions(const object::ObjectFile &O) {
+  std::set<uint64_t> Result;
+
+  for (const object::SymbolRef &Symbol : O.symbols()) {
+    ErrorOr<uint64_t> AddressOrErr = Symbol.getAddress();
+    FailIfError(AddressOrErr);
+
+    ErrorOr<StringRef> NameOrErr = Symbol.getName();
+    FailIfError(NameOrErr);
+    StringRef Name = NameOrErr.get();
+
+    if (Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
+        Name == "__sanitizer_cov_trace_func_enter") {
+      Result.insert(AddressOrErr.get());
+    }
+  }
+
+  if (Result.empty())
+    FailIfNotEmpty("__sanitizer_cov* functions not found");
+
+  return Result;
+}
+
+// Locate addresses of all coverage points in a file. Coverage point
+// is defined as the 'address of instruction following __sanitizer_cov
+// call - 1'.
+static void getObjectCoveragePoints(const object::ObjectFile &O,
+                                    std::set<uint64_t> *Addrs) {
+  Triple TheTriple("unknown-unknown-unknown");
+  TheTriple.setArch(Triple::ArchType(O.getArch()));
+  auto TripleName = TheTriple.getTriple();
+
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  FailIfNotEmpty(Error);
+
+  std::unique_ptr<const MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+  FailIfEmpty(STI, "no subtarget info for target " + TripleName);
+
+  std::unique_ptr<const MCRegisterInfo> MRI(
+      TheTarget->createMCRegInfo(TripleName));
+  FailIfEmpty(MRI, "no register info for target " + TripleName);
+
+  std::unique_ptr<const MCAsmInfo> AsmInfo(
+      TheTarget->createMCAsmInfo(*MRI, TripleName));
+  FailIfEmpty(AsmInfo, "no asm info for target " + TripleName);
+
+  std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
+  MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+  std::unique_ptr<MCDisassembler> DisAsm(
+      TheTarget->createMCDisassembler(*STI, Ctx));
+  FailIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
+
+  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
+  FailIfEmpty(MII, "no instruction info for target " + TripleName);
+
+  std::unique_ptr<const MCInstrAnalysis> MIA(
+      TheTarget->createMCInstrAnalysis(MII.get()));
+  FailIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
+
+  auto SanCovAddrs = findSanitizerCovFunctions(O);
+
+  for (const auto Section : O.sections()) {
+    if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
+      continue;
+    uint64_t SectionAddr = Section.getAddress();
+    uint64_t SectSize = Section.getSize();
+    if (!SectSize)
+      continue;
+
+    StringRef SectionName;
+    FailIfError(Section.getName(SectionName));
+
+    StringRef BytesStr;
+    FailIfError(Section.getContents(BytesStr));
+    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
+                            BytesStr.size());
+
+    for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
+         Index += Size) {
+      MCInst Inst;
+      if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
+                                  SectionAddr + Index, nulls(), nulls())) {
+        if (Size == 0)
+          Size = 1;
+        continue;
+      }
+      uint64_t Target;
+      if (MIA->isCall(Inst) &&
+          MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target)) {
+        if (SanCovAddrs.find(Target) != SanCovAddrs.end()) {
+          // Sanitizer coverage uses the address of the next instruction - 1.
+          Addrs->insert(Index + SectionAddr + Size - 1);
+        }
+      }
+    }
+  }
+}
+
+static void getArchiveCoveragePoints(const object::Archive &A,
+                                     std::set<uint64_t> *Addrs) {
+  for (auto &ErrorOrChild : A.children()) {
+    FailIfError(ErrorOrChild);
+    const object::Archive::Child &C = *ErrorOrChild;
+    ErrorOr<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
+    FailIfError(ChildOrErr);
+    if (object::ObjectFile *O =
+            dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
+      getObjectCoveragePoints(*O, Addrs);
+    else
+      FailIfError(object::object_error::invalid_file_type);
+  }
+}
+
+// Locate addresses of all coverage points in a file. Coverage point
+// is defined as the 'address of instruction following __sanitizer_cov
+// call - 1'.
+std::set<uint64_t> getCoveragePoints(std::string FileName) {
+  std::set<uint64_t> Result;
+
+  ErrorOr<object::OwningBinary<object::Binary>> BinaryOrErr =
+      object::createBinary(FileName);
+  FailIfError(BinaryOrErr);
+
+  object::Binary &Binary = *BinaryOrErr.get().getBinary();
+  if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
+    getArchiveCoveragePoints(*A, &Result);
+  else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
+    getObjectCoveragePoints(*O, &Result);
+  else
+    FailIfError(object::object_error::invalid_file_type);
+
+  return Result;
+}
+
+static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
+  if (!ClUseDefaultBlacklist) 
+    return std::unique_ptr<SpecialCaseList>();
+  std::unique_ptr<MemoryBuffer> MB =
+      MemoryBuffer::getMemBuffer(DefaultBlacklist);
+  std::string Error;
+  auto Blacklist = SpecialCaseList::create(MB.get(), Error);
+  FailIfNotEmpty(Error);
+  return Blacklist;
+}
+
+static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
+  if (ClBlacklist.empty())
+    return std::unique_ptr<SpecialCaseList>();
+
+  return SpecialCaseList::createOrDie({{ClBlacklist}});
+}
+
+static void printFunctionLocs(const std::set<FunctionLoc> &FnLocs,
+                              raw_ostream &OS) {
+  std::unique_ptr<SpecialCaseList> DefaultBlacklist = createDefaultBlacklist();
+  std::unique_ptr<SpecialCaseList> UserBlacklist = createUserBlacklist();
+
+  for (const FunctionLoc &FnLoc : FnLocs) {
+    if (DefaultBlacklist &&
+        DefaultBlacklist->inSection("fun", FnLoc.FunctionName))
+      continue;
+    if (DefaultBlacklist &&
+        DefaultBlacklist->inSection("src", FnLoc.Loc.FileName))
+      continue;
+    if (UserBlacklist && UserBlacklist->inSection("fun", FnLoc.FunctionName))
+      continue;
+    if (UserBlacklist && UserBlacklist->inSection("src", FnLoc.Loc.FileName))
+      continue;
+
+    OS << stripPathPrefix(FnLoc.Loc.FileName) << ":" << FnLoc.Loc.Line << " "
+       << FnLoc.FunctionName << "\n";
+  }
+}
+
+class CoverageData {
+ public:
+  // Read single file coverage data.
+  static ErrorOr<std::unique_ptr<CoverageData>> read(std::string FileName) {
+    ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+        MemoryBuffer::getFile(FileName);
+    if (!BufOrErr)
+      return BufOrErr.getError();
+    std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
+    if (Buf->getBufferSize() < 8) {
+      errs() << "File too small (<8): " << Buf->getBufferSize();
+      return make_error_code(errc::illegal_byte_sequence);
+    }
+    const FileHeader *Header =
+        reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
+
+    if (Header->Magic != BinCoverageMagic) {
+      errs() << "Wrong magic: " << Header->Magic;
+      return make_error_code(errc::illegal_byte_sequence);
+    }
+
+    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
+
+    switch (Header->Bitness) {
+    case Bitness64:
+      readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
+                         Addrs.get());
+      break;
+    case Bitness32:
+      readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
+                         Addrs.get());
+      break;
+    default:
+      errs() << "Unsupported bitness: " << Header->Bitness;
+      return make_error_code(errc::illegal_byte_sequence);
+    }
+
+    return std::unique_ptr<CoverageData>(new CoverageData(std::move(Addrs)));
+  }
+
+  // Merge multiple coverage data together.
+  static std::unique_ptr<CoverageData>
+  merge(const std::vector<std::unique_ptr<CoverageData>> &Covs) {
+    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
+
+    for (const auto &Cov : Covs)
+      Addrs->insert(Cov->Addrs->begin(), Cov->Addrs->end());
+
+    return std::unique_ptr<CoverageData>(new CoverageData(std::move(Addrs)));
+  }
+
+  // Read list of files and merges their coverage info.
+  static ErrorOr<std::unique_ptr<CoverageData>>
+  readAndMerge(const std::vector<std::string> &FileNames) {
+    std::vector<std::unique_ptr<CoverageData>> Covs;
+    for (const auto &FileName : FileNames) {
+      auto Cov = read(FileName);
+      if (!Cov)
+        return Cov.getError();
+      Covs.push_back(std::move(Cov.get()));
+    }
+    return merge(Covs);
+  }
+
+  // Print coverage addresses.
+  void printAddrs(raw_ostream &OS) {
+    for (auto Addr : *Addrs) {
+      OS << "0x";
+      OS.write_hex(Addr);
+      OS << "\n";
+    }
+  }
+
+  // Print list of covered functions.
+  // Line format: <file_name>:<line> <function_name>
+  void printCoveredFunctions(raw_ostream &OS) {
+    printFunctionLocs(computeFunctionLocs(*Addrs), OS);
+  }
+
+  // Print list of not covered functions.
+  // Line format: <file_name>:<line> <function_name>
+  void printNotCoveredFunctions(raw_ostream &OS) {
+    std::set<FunctionLoc> AllFns =
+        computeFunctionLocs(getCoveragePoints(ClBinaryName));
+    std::set<FunctionLoc> CoveredFns = computeFunctionLocs(*Addrs);
+
+    std::set<FunctionLoc> NotCoveredFns;
+    std::set_difference(AllFns.begin(), AllFns.end(), CoveredFns.begin(),
+                        CoveredFns.end(),
+                        std::inserter(NotCoveredFns, NotCoveredFns.end()));
+    printFunctionLocs(NotCoveredFns, OS);
+  }
+
+private:
+  explicit CoverageData(std::unique_ptr<std::set<uint64_t>> Addrs)
+      : Addrs(std::move(Addrs)) {}
+
+  std::unique_ptr<std::set<uint64_t>> Addrs;
+};
+} // namespace
+
+int main(int argc, char **argv) {
+  // Print stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
+
+  llvm::InitializeAllTargetInfos();
+  llvm::InitializeAllTargetMCs();
+  llvm::InitializeAllDisassemblers();
+
+  cl::ParseCommandLineOptions(argc, argv, "Sanitizer Coverage Processing Tool");
+
+  auto CovData = CoverageData::readAndMerge(ClInputFiles);
+  FailIfError(CovData);
+
+  switch (Action) {
+  case PrintAction: {
+    CovData.get()->printAddrs(outs());
+    return 0;
+  }
+  case CoveredFunctionsAction: {
+    CovData.get()->printCoveredFunctions(outs());
+    return 0;
+  }
+  case NotCoveredFunctionsAction: {
+    CovData.get()->printNotCoveredFunctions(outs());
+    return 0;
+  }
+  }
+
+  llvm_unreachable("unsupported action");
+}
diff --git a/tools/verify-uselistorder/verify-uselistorder.cpp b/tools/verify-uselistorder/verify-uselistorder.cpp
index efa4bcbe1aaa..c3fec1326acb 100644
--- a/tools/verify-uselistorder/verify-uselistorder.cpp
+++ b/tools/verify-uselistorder/verify-uselistorder.cpp
@@ -346,6 +346,7 @@ static void verifyAfterRoundTrip(const Module &M,
   if (!matches(ValueMapping(M), ValueMapping(*OtherM)))
     report_fatal_error("use-list order changed");
 }
+
 static void verifyBitcodeUseListOrder(const Module &M) {
   TempFile F;
   if (F.init("bc"))
diff --git a/tools/xcode-toolchain/CMakeLists.txt b/tools/xcode-toolchain/CMakeLists.txt
new file mode 100644
index 000000000000..4eacbd320efc
--- /dev/null
+++ b/tools/xcode-toolchain/CMakeLists.txt
@@ -0,0 +1,72 @@
+# OS X 10.11 El Capitan has just been released. One of the new features, System
+# Integrity Protection, prevents modifying the base OS install, even with sudo.
+# This prevents LLVM developers on OS X from being able to easily install new
+# system compilers. The feature can be disabled, but to make it easier for
+# developers to work without disabling SIP, this file can generate an Xcode
+# toolchain. Xcode toolchains are a mostly-undocumented feature that allows
+# multiple copies of low level tools to be installed to different locations, and
+# users can easily switch between them.
+
+# Setting an environment variable TOOLCHAINS to the toolchain's identifier will
+# result in /usr/bin/<tool> or xcrun <tool> to find the tool in the toolchain.
+
+# To make this work with Xcode 7.1 and later you can install the toolchain this
+# file generates anywhere on your system and set EXTERNAL_TOOLCHAINS_DIR to the
+# path specified by $CMAKE_INSTALL_PREFIX/Toolchains
+
+# This file generates a custom install-xcode-toolchain target which constructs
+# and installs a toolchain with the identifier in the pattern:
+# org.llvm.${PACKAGE_VERSION}. This toolchain can then be used to override the
+# system compiler by setting TOOLCHAINS=org.llvm.${PACKAGE_VERSION} in the
+# in the environment.
+
+# Example usage:
+# cmake -G Ninja -DLLVM_CREATE_XCODE_TOOLCHAIN=On
+#   -DCMAKE_INSTALL_PREFIX=$PWD/install
+# ninja install-xcode-toolchain
+# export EXTERNAL_TOOLCHAINS_DIR=$PWD/install/Toolchains
+# export TOOLCHAINS=org.llvm.3.8.0svn
+
+# `xcrun -find clang` should return the installed clang, and `clang --version`
+# should show 3.8.0svn.
+
+if(NOT APPLE)
+  return()
+endif()
+
+option(LLVM_CREATE_XCODE_TOOLCHAIN "Create a target to install LLVM into an Xcode toolchain" Off)
+
+if(NOT LLVM_CREATE_XCODE_TOOLCHAIN)
+  return()
+endif()
+
+execute_process(
+  COMMAND xcrun -find otool
+  OUTPUT_VARIABLE clang_path
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  ERROR_FILE /dev/null
+)
+string(REGEX MATCH "(.*/Toolchains)/.*" toolchains_match ${clang_path})
+if(NOT toolchains_match)
+  message(FATAL_ERROR "Could not identify toolchain dir")
+endif()
+set(toolchains_dir ${CMAKE_MATCH_1})
+
+set(XcodeDefaultInfo "${toolchains_dir}/XcodeDefault.xctoolchain/ToolchainInfo.plist")
+set(LLVMToolchainDir "${CMAKE_INSTALL_PREFIX}/Toolchains/LLVM${PACKAGE_VERSION}.xctoolchain/")
+
+add_custom_command(OUTPUT ${LLVMToolchainDir}
+                    COMMAND ${CMAKE_COMMAND} -E make_directory ${LLVMToolchainDir})
+
+add_custom_command(OUTPUT ${LLVMToolchainDir}/ToolchainInfo.plist
+                  DEPENDS ${LLVMToolchainDir}
+                  COMMAND ${CMAKE_COMMAND} -E copy "${XcodeDefaultInfo}" "${LLVMToolchainDir}/ToolchainInfo.plist"
+                  COMMAND /usr/libexec/PlistBuddy -c "Set:Identifier org.llvm.${PACKAGE_VERSION}" "${LLVMToolchainDir}/ToolchainInfo.plist")
+
+add_custom_target(install-xcode-toolchain
+                  DEPENDS ${LLVMToolchainDir}/ToolchainInfo.plist
+                  COMMAND "${CMAKE_COMMAND}" --build ${CMAKE_BINARY_DIR} --target all
+                  COMMAND "${CMAKE_COMMAND}"
+                          -DCMAKE_INSTALL_PREFIX=${LLVMToolchainDir}/usr/
+                          -P "${CMAKE_BINARY_DIR}/cmake_install.cmake"
+                  ${cmake_3_2_USES_TERMINAL})
diff --git a/tools/yaml2obj/yaml2elf.cpp b/tools/yaml2obj/yaml2elf.cpp
index a247f48c053d..d03986e9c88d 100644
--- a/tools/yaml2obj/yaml2elf.cpp
+++ b/tools/yaml2obj/yaml2elf.cpp
@@ -105,10 +105,10 @@ class ELFState {
   typedef typename object::ELFFile<ELFT>::Elf_Rela Elf_Rela;
 
   /// \brief The future ".strtab" section.
-  StringTableBuilder DotStrtab;
+  StringTableBuilder DotStrtab{StringTableBuilder::ELF};
 
   /// \brief The future ".shstrtab" section.
-  StringTableBuilder DotShStrtab;
+  StringTableBuilder DotShStrtab{StringTableBuilder::ELF};
 
   NameToIdxMap SN2I;
   NameToIdxMap SymN2I;
@@ -193,7 +193,7 @@ bool ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
 
   for (const auto &Sec : Doc.Sections)
     DotShStrtab.add(Sec->Name);
-  DotShStrtab.finalize(StringTableBuilder::ELF);
+  DotShStrtab.finalize();
 
   for (const auto &Sec : Doc.Sections) {
     zero(SHeader);
@@ -222,9 +222,11 @@ bool ELFState<ELFT>::initSectionHeaders(std::vector<Elf_Shdr> &SHeaders,
 
       unsigned Index;
       if (SN2I.lookup(S->Info, Index)) {
-        errs() << "error: Unknown section referenced: '" << S->Info
-               << "' at YAML section '" << S->Name << "'.\n";
-        return false;
+        if (S->Info.getAsInteger(0, Index)) {
+          errs() << "error: Unknown section referenced: '" << S->Info
+                 << "' at YAML section '" << S->Name << "'.\n";
+          return false;
+        }
       }
       SHeader.sh_info = Index;
 
@@ -284,7 +286,7 @@ void ELFState<ELFT>::initSymtabSectionHeader(Elf_Shdr &SHeader,
     DotStrtab.add(Sym.Name);
   for (const auto &Sym : Doc.Symbols.Weak)
     DotStrtab.add(Sym.Name);
-  DotStrtab.finalize(StringTableBuilder::ELF);
+  DotStrtab.finalize();
 
   addSymbols(Doc.Symbols.Local, Syms, ELF::STB_LOCAL);
   addSymbols(Doc.Symbols.Global, Syms, ELF::STB_GLOBAL);
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index a4445f6e4651..55c3f48f00d4 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -1313,7 +1313,21 @@ TEST(APFloatTest, roundToIntegral) {
   P = APFloat::getInf(APFloat::IEEEdouble, true);
   P.roundToIntegral(APFloat::rmTowardZero);
   EXPECT_TRUE(std::isinf(P.convertToDouble()) && P.convertToDouble() < 0.0);
-
+}
+  
+TEST(APFloatTest, isInteger) {
+  APFloat T(-0.0);
+  EXPECT_TRUE(T.isInteger());
+  T = APFloat(3.14159);
+  EXPECT_FALSE(T.isInteger());
+  T = APFloat::getNaN(APFloat::IEEEdouble);
+  EXPECT_FALSE(T.isInteger());
+  T = APFloat::getInf(APFloat::IEEEdouble);
+  EXPECT_FALSE(T.isInteger());
+  T = APFloat::getInf(APFloat::IEEEdouble, true);
+  EXPECT_FALSE(T.isInteger());
+  T = APFloat::getLargest(APFloat::IEEEdouble);
+  EXPECT_TRUE(T.isInteger());
 }
 
 TEST(APFloatTest, getLargest) {
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index ffba7b163369..0002dad8555f 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -17,6 +17,13 @@ using namespace llvm;
 
 namespace {
 
+TEST(APIntTest, ValueInit) {
+  APInt Zero = APInt();
+  EXPECT_TRUE(!Zero);
+  EXPECT_TRUE(!Zero.zext(64));
+  EXPECT_TRUE(!Zero.sext(64));
+}
+
 // Test that APInt shift left works when bitwidth > 64 and shiftamt == 0
 TEST(APIntTest, ShiftLeftByZero) {
   APInt One = APInt::getNullValue(65) + 1;
diff --git a/unittests/ADT/ArrayRefTest.cpp b/unittests/ADT/ArrayRefTest.cpp
index 9ad32d54d36b..6cbadd6bc228 100644
--- a/unittests/ADT/ArrayRefTest.cpp
+++ b/unittests/ADT/ArrayRefTest.cpp
@@ -31,7 +31,7 @@ static_assert(
     !std::is_convertible<ArrayRef<volatile int *>, ArrayRef<int *>>::value,
     "Removing volatile");
 
-namespace llvm {
+namespace {
 
 TEST(ArrayRefTest, AllocatorCopy) {
   BumpPtrAllocator Alloc;
@@ -45,6 +45,19 @@ TEST(ArrayRefTest, AllocatorCopy) {
   EXPECT_NE(Array1.data(), Array1c.data());
   EXPECT_TRUE(Array2.equals(Array2c));
   EXPECT_NE(Array2.data(), Array2c.data());
+
+  // Check that copy can cope with uninitialized memory.
+  struct NonAssignable {
+    const char *Ptr;
+
+    NonAssignable(const char *Ptr) : Ptr(Ptr) {}
+    NonAssignable(const NonAssignable &RHS) = default;
+    void operator=(const NonAssignable &RHS) { assert(RHS.Ptr != nullptr); }
+    bool operator==(const NonAssignable &RHS) const { return Ptr == RHS.Ptr; }
+  } Array3Src[] = {"hello", "world"};
+  ArrayRef<NonAssignable> Array3Copy = makeArrayRef(Array3Src).copy(Alloc);
+  EXPECT_EQ(makeArrayRef(Array3Src), Array3Copy);
+  EXPECT_NE(makeArrayRef(Array3Src).data(), Array3Copy.data());
 }
 
 TEST(ArrayRefTest, DropBack) {
@@ -111,4 +124,20 @@ TEST(ArrayRefTest, InitializerList) {
   ArgTest12({1, 2});
 }
 
+// Test that makeArrayRef works on ArrayRef (no-op)
+TEST(ArrayRefTest, makeArrayRef) {
+  static const int A1[] = {1, 2, 3, 4, 5, 6, 7, 8};
+
+  // No copy expected for non-const ArrayRef (true no-op)
+  ArrayRef<int> AR1(A1);
+  ArrayRef<int> &AR1Ref = makeArrayRef(AR1);
+  EXPECT_EQ(&AR1, &AR1Ref);
+
+  // A copy is expected for non-const ArrayRef (thin copy)
+  const ArrayRef<int> AR2(A1);
+  const ArrayRef<int> &AR2Ref = makeArrayRef(AR2);
+  EXPECT_NE(&AR2Ref, &AR2);
+  EXPECT_TRUE(AR2.equals(AR2Ref));
+}
+
 } // end anonymous namespace
diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp
index 3deaff0fe356..95ff93fa9c4c 100644
--- a/unittests/ADT/BitVectorTest.cpp
+++ b/unittests/ADT/BitVectorTest.cpp
@@ -235,12 +235,12 @@ TYPED_TEST(BitVectorTest, PortableBitMask) {
   const uint32_t Mask1[] = { 0x80000000, 6, 5 };
 
   A.resize(10);
-  A.setBitsInMask(Mask1, 3);
+  A.setBitsInMask(Mask1, 1);
   EXPECT_EQ(10u, A.size());
   EXPECT_FALSE(A.test(0));
 
   A.resize(32);
-  A.setBitsInMask(Mask1, 3);
+  A.setBitsInMask(Mask1, 1);
   EXPECT_FALSE(A.test(0));
   EXPECT_TRUE(A.test(31));
   EXPECT_EQ(1u, A.count());
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index cbcb08485563..cb878c61b85f 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -28,6 +28,7 @@ set(ADTSources
   PointerIntPairTest.cpp
   PointerUnionTest.cpp
   PostOrderIteratorTest.cpp
+  RangeAdapterTest.cpp
   SCCIteratorTest.cpp
   SmallPtrSetTest.cpp
   SmallStringTest.cpp
diff --git a/unittests/ADT/DenseMapTest.cpp b/unittests/ADT/DenseMapTest.cpp
index 2c6fe3588327..f3dcf95e92fd 100644
--- a/unittests/ADT/DenseMapTest.cpp
+++ b/unittests/ADT/DenseMapTest.cpp
@@ -252,6 +252,22 @@ TYPED_TEST(DenseMapTest, AssignmentTest) {
   EXPECT_EQ(this->getValue(), copyMap[this->getKey()]);
 }
 
+TYPED_TEST(DenseMapTest, AssignmentTestNotSmall) {
+  for (int Key = 0; Key < 5; ++Key)
+    this->Map[this->getKey(Key)] = this->getValue(Key);
+  TypeParam copyMap = this->Map;
+
+  EXPECT_EQ(5u, copyMap.size());
+  for (int Key = 0; Key < 5; ++Key)
+    EXPECT_EQ(this->getValue(Key), copyMap[this->getKey(Key)]);
+
+  // test self-assignment.
+  copyMap = copyMap;
+  EXPECT_EQ(5u, copyMap.size());
+  for (int Key = 0; Key < 5; ++Key)
+    EXPECT_EQ(this->getValue(Key), copyMap[this->getKey(Key)]);
+}
+
 // Test swap method
 TYPED_TEST(DenseMapTest, SwapTest) {
   this->Map[this->getKey()] = this->getValue();
diff --git a/unittests/ADT/FoldingSet.cpp b/unittests/ADT/FoldingSet.cpp
index a18a0df95716..5addf27e1363 100644
--- a/unittests/ADT/FoldingSet.cpp
+++ b/unittests/ADT/FoldingSet.cpp
@@ -24,11 +24,11 @@ TEST(FoldingSetTest, UnalignedStringTest) {
   SCOPED_TRACE("UnalignedStringTest");
 
   FoldingSetNodeID a, b;
-  // An aligned string
+  // An aligned string.
   std::string str1= "a test string";
   a.AddString(str1);
 
-  // An unaligned string
+  // An unaligned string.
   std::string str2 = ">" + str1;
   b.AddString(str2.c_str() + 1);
 
diff --git a/unittests/ADT/OptionalTest.cpp b/unittests/ADT/OptionalTest.cpp
index 92c4eec487a6..18b59e315818 100644
--- a/unittests/ADT/OptionalTest.cpp
+++ b/unittests/ADT/OptionalTest.cpp
@@ -377,5 +377,18 @@ TEST_F(OptionalTest, MoveGetValueOr) {
 
 #endif // LLVM_HAS_RVALUE_REFERENCE_THIS
 
+TEST_F(OptionalTest, NoneComparison) {
+  Optional<int> o;
+  EXPECT_EQ(o, None);
+  EXPECT_EQ(None, o);
+  EXPECT_FALSE(o != None);
+  EXPECT_FALSE(None != o);
+  o = 3;
+  EXPECT_FALSE(o == None);
+  EXPECT_FALSE(None == o);
+  EXPECT_TRUE(o != None);
+  EXPECT_TRUE(None != o);
+}
+
 } // end anonymous namespace
 
diff --git a/unittests/ADT/PackedVectorTest.cpp b/unittests/ADT/PackedVectorTest.cpp
index 55b5d8d049dd..199a67043090 100644
--- a/unittests/ADT/PackedVectorTest.cpp
+++ b/unittests/ADT/PackedVectorTest.cpp
@@ -52,18 +52,6 @@ TEST(PackedVectorTest, Operation) {
   EXPECT_FALSE(Vec == Vec2);
   EXPECT_TRUE(Vec != Vec2);
 
-  Vec2.swap(Vec);
-  EXPECT_EQ(3U, Vec.size());
-  EXPECT_FALSE(Vec.empty());
-  EXPECT_EQ(0U, Vec[0]);
-  EXPECT_EQ(0U, Vec[1]);
-  EXPECT_EQ(0U, Vec[2]);
-
-  EXPECT_EQ(2U, Vec2[0]);
-  EXPECT_EQ(0U, Vec2[1]);
-  EXPECT_EQ(1U, Vec2[2]);
-  EXPECT_EQ(3U, Vec2[3]);
-
   Vec = Vec2;
   EXPECT_TRUE(Vec == Vec2);
   EXPECT_FALSE(Vec != Vec2);
diff --git a/unittests/ADT/PointerIntPairTest.cpp b/unittests/ADT/PointerIntPairTest.cpp
index 0bbcd9f869ec..e27a5823a51e 100644
--- a/unittests/ADT/PointerIntPairTest.cpp
+++ b/unittests/ADT/PointerIntPairTest.cpp
@@ -14,35 +14,36 @@ using namespace llvm;
 
 namespace {
 
-// Test fixture
-class PointerIntPairTest : public testing::Test {
-};
+TEST(PointerIntPairTest, GetSet) {
+  struct S {
+    int i;
+  };
+  S s;
 
-TEST_F(PointerIntPairTest, GetSet) {
-  PointerIntPair<PointerIntPairTest *, 2> Pair(this, 1U);
-  EXPECT_EQ(this, Pair.getPointer());
+  PointerIntPair<S *, 2> Pair(&s, 1U);
+  EXPECT_EQ(&s, Pair.getPointer());
   EXPECT_EQ(1U, Pair.getInt());
 
   Pair.setInt(2);
-  EXPECT_EQ(this, Pair.getPointer());
+  EXPECT_EQ(&s, Pair.getPointer());
   EXPECT_EQ(2U, Pair.getInt());
 
   Pair.setPointer(nullptr);
   EXPECT_EQ(nullptr, Pair.getPointer());
   EXPECT_EQ(2U, Pair.getInt());
 
-  Pair.setPointerAndInt(this, 3U);
-  EXPECT_EQ(this, Pair.getPointer());
+  Pair.setPointerAndInt(&s, 3U);
+  EXPECT_EQ(&s, Pair.getPointer());
   EXPECT_EQ(3U, Pair.getInt());
 }
 
-TEST_F(PointerIntPairTest, DefaultInitialize) {
-  PointerIntPair<PointerIntPairTest *, 2> Pair;
+TEST(PointerIntPairTest, DefaultInitialize) {
+  PointerIntPair<float *, 2> Pair;
   EXPECT_EQ(nullptr, Pair.getPointer());
   EXPECT_EQ(0U, Pair.getInt());
 }
 
-TEST_F(PointerIntPairTest, ManyUnusedBits) {
+TEST(PointerIntPairTest, ManyUnusedBits) {
   // In real code this would be a word-sized integer limited to 31 bits.
   struct Fixnum31 {
     uintptr_t Value;
diff --git a/unittests/ADT/RangeAdapterTest.cpp b/unittests/ADT/RangeAdapterTest.cpp
new file mode 100644
index 000000000000..634f5bb990d9
--- /dev/null
+++ b/unittests/ADT/RangeAdapterTest.cpp
@@ -0,0 +1,83 @@
+//===- RangeAdapterTest.cpp - Unit tests for range adapters  --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/STLExtras.h"
+#include "gtest/gtest.h"
+
+#include <iterator>
+#include <list>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+// A wrapper around vector which exposes rbegin(), rend().
+class ReverseOnlyVector {
+  std::vector<int> Vec;
+
+public:
+  ReverseOnlyVector(std::initializer_list<int> list) : Vec(list) {}
+
+  typedef std::vector<int>::reverse_iterator reverse_iterator;
+  reverse_iterator rbegin() { return Vec.rbegin(); }
+  reverse_iterator rend() { return Vec.rend(); }
+};
+
+// A wrapper around vector which exposes begin(), end(), rbegin() and rend().
+// begin() and end() don't have implementations as this ensures that we will
+// get a linker error if reverse() chooses begin()/end() over rbegin(), rend().
+class BidirectionalVector {
+  std::vector<int> Vec;
+
+public:
+  BidirectionalVector(std::initializer_list<int> list) : Vec(list) {}
+
+  typedef std::vector<int>::iterator iterator;
+  iterator begin();
+  iterator end();
+
+  typedef std::vector<int>::reverse_iterator reverse_iterator;
+  reverse_iterator rbegin() { return Vec.rbegin(); }
+  reverse_iterator rend() { return Vec.rend(); }
+};
+
+template <typename R> void TestRev(const R &r) {
+  int counter = 3;
+  for (int i : r)
+    EXPECT_EQ(i, counter--);
+}
+
+// Test fixture
+template <typename T> class RangeAdapterLValueTest : public ::testing::Test {};
+
+typedef ::testing::Types<std::vector<int>, std::list<int>, int[4]>
+    RangeAdapterLValueTestTypes;
+TYPED_TEST_CASE(RangeAdapterLValueTest, RangeAdapterLValueTestTypes);
+
+TYPED_TEST(RangeAdapterLValueTest, TrivialOperation) {
+  TypeParam v = {0, 1, 2, 3};
+  TestRev(reverse(v));
+
+  const TypeParam c = {0, 1, 2, 3};
+  TestRev(reverse(c));
+}
+
+template <typename T> struct RangeAdapterRValueTest : testing::Test {};
+
+typedef ::testing::Types<std::vector<int>, std::list<int>, ReverseOnlyVector,
+                         BidirectionalVector> RangeAdapterRValueTestTypes;
+TYPED_TEST_CASE(RangeAdapterRValueTest, RangeAdapterRValueTestTypes);
+
+TYPED_TEST(RangeAdapterRValueTest, TrivialOperation) {
+  TestRev(reverse(TypeParam({0, 1, 2, 3})));
+}
+
+} // anonymous namespace
diff --git a/unittests/ADT/SmallStringTest.cpp b/unittests/ADT/SmallStringTest.cpp
index 9398e99c9119..995ef8e81278 100644
--- a/unittests/ADT/SmallStringTest.cpp
+++ b/unittests/ADT/SmallStringTest.cpp
@@ -159,6 +159,17 @@ TEST_F(SmallStringTest, Count) {
   EXPECT_EQ(0U, theString.count("zz"));
 }
 
+TEST_F(SmallStringTest, Realloc) {
+  theString = "abcd";
+  theString.reserve(100);
+  EXPECT_EQ("abcd", theString);
+  unsigned const N = 100000;
+  theString.reserve(N);
+  for (unsigned i = 0; i < N - 4; ++i)
+    theString.push_back('y');
+  EXPECT_EQ("abcdyyy", theString.slice(0, 7));
+}
+
 TEST(StringRefTest, Comparisons) {
   EXPECT_EQ(-1, SmallString<10>("aab").compare("aad"));
   EXPECT_EQ( 0, SmallString<10>("aab").compare("aab"));
diff --git a/unittests/ADT/SparseBitVectorTest.cpp b/unittests/ADT/SparseBitVectorTest.cpp
index d8fc5ce25db1..9127225860ba 100644
--- a/unittests/ADT/SparseBitVectorTest.cpp
+++ b/unittests/ADT/SparseBitVectorTest.cpp
@@ -33,4 +33,98 @@ TEST(SparseBitVectorTest, TrivialOperation) {
   EXPECT_FALSE(Vec.test(17));
 }
 
+TEST(SparseBitVectorTest, IntersectWith) {
+  SparseBitVector<> Vec, Other;
+
+  Vec.set(1);
+  Other.set(1);
+  EXPECT_FALSE(Vec &= Other);
+  EXPECT_TRUE(Vec.test(1));
+
+  Vec.clear();
+  Vec.set(5);
+  Other.clear();
+  Other.set(6);
+  EXPECT_TRUE(Vec &= Other);
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.clear();
+  Vec.set(5);
+  Other.clear();
+  Other.set(225);
+  EXPECT_TRUE(Vec &= Other);
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.clear();
+  Vec.set(225);
+  Other.clear();
+  Other.set(5);
+  EXPECT_TRUE(Vec &= Other);
+  EXPECT_TRUE(Vec.empty());
+}
+
+TEST(SparseBitVectorTest, SelfAssignment) {
+  SparseBitVector<> Vec, Other;
+
+  Vec.set(23);
+  Vec.set(234);
+  Vec = Vec;
+  EXPECT_TRUE(Vec.test(23));
+  EXPECT_TRUE(Vec.test(234));
+
+  Vec.clear();
+  Vec.set(17);
+  Vec.set(256);
+  EXPECT_FALSE(Vec |= Vec);
+  EXPECT_TRUE(Vec.test(17));
+  EXPECT_TRUE(Vec.test(256));
+
+  Vec.clear();
+  Vec.set(56);
+  Vec.set(517);
+  EXPECT_FALSE(Vec &= Vec);
+  EXPECT_TRUE(Vec.test(56));
+  EXPECT_TRUE(Vec.test(517));
+
+  Vec.clear();
+  Vec.set(99);
+  Vec.set(333);
+  EXPECT_TRUE(Vec.intersectWithComplement(Vec));
+  EXPECT_TRUE(Vec.empty());
+  EXPECT_FALSE(Vec.intersectWithComplement(Vec));
+
+  Vec.clear();
+  Vec.set(28);
+  Vec.set(43);
+  Vec.intersectWithComplement(Vec, Vec);
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.clear();
+  Vec.set(42);
+  Vec.set(567);
+  Other.set(55);
+  Other.set(567);
+  Vec.intersectWithComplement(Vec, Other);
+  EXPECT_TRUE(Vec.test(42));
+  EXPECT_FALSE(Vec.test(567));
+
+  Vec.clear();
+  Vec.set(19);
+  Vec.set(21);
+  Other.clear();
+  Other.set(19);
+  Other.set(31);
+  Vec.intersectWithComplement(Other, Vec);
+  EXPECT_FALSE(Vec.test(19));
+  EXPECT_TRUE(Vec.test(31));
+
+  Vec.clear();
+  Vec.set(1);
+  Other.clear();
+  Other.set(59);
+  Other.set(75);
+  Vec.intersectWithComplement(Other, Other);
+  EXPECT_TRUE(Vec.empty());
+}
+
 }
diff --git a/unittests/ADT/StringRefTest.cpp b/unittests/ADT/StringRefTest.cpp
index d80179bd7873..6cf2e6a0454d 100644
--- a/unittests/ADT/StringRefTest.cpp
+++ b/unittests/ADT/StringRefTest.cpp
@@ -225,6 +225,59 @@ TEST(StringRefTest, Split2) {
   expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
   StringRef("a,,b,c").split(parts, ",", 3, false);
   EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
+  StringRef("a,,b,c").split(parts, ',', 3, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ",", 3, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef().split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(StringRef());
+  StringRef("").split(parts, ',', 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+  StringRef().split(parts, ',', 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a");
+  expected.push_back("");
+  expected.push_back("b");
+  expected.push_back("c,d");
+  StringRef("a,,b,c,d").split(parts, ',', 3, true);
+  EXPECT_TRUE(parts == expected);
 }
 
 TEST(StringRefTest, Trim) {
diff --git a/unittests/ADT/TripleTest.cpp b/unittests/ADT/TripleTest.cpp
index 2b1e871b94cb..ac4fa2274e9e 100644
--- a/unittests/ADT/TripleTest.cpp
+++ b/unittests/ADT/TripleTest.cpp
@@ -81,6 +81,12 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::Darwin, T.getOS());
   EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
+  T = Triple("i386-pc-elfiamcu");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::ELFIAMCU, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("x86_64-pc-linux-gnu");
   EXPECT_EQ(Triple::x86_64, T.getArch());
   EXPECT_EQ(Triple::PC, T.getVendor());
@@ -194,6 +200,18 @@ TEST(TripleTest, ParsedIDs) {
   EXPECT_EQ(Triple::UnknownOS, T.getOS());
   EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
 
+  T = Triple("avr-unknown-unknown");
+  EXPECT_EQ(Triple::avr, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("avr");
+  EXPECT_EQ(Triple::avr, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
   T = Triple("huh");
   EXPECT_EQ(Triple::UnknownArch, T.getArch());
 }
@@ -354,6 +372,8 @@ TEST(TripleTest, MutateName) {
   EXPECT_EQ(Triple::Darwin, T.getOS());
   EXPECT_EQ("i386-pc-darwin", T.getTriple());
 
+  T.setEnvironmentName("amdopencl");
+  EXPECT_EQ(Triple::AMDOpenCL, T.getEnvironment());
 }
 
 TEST(TripleTest, BitWidthPredicates) {
@@ -461,6 +481,11 @@ TEST(TripleTest, BitWidthPredicates) {
   EXPECT_FALSE(T.isArch16Bit());
   EXPECT_FALSE(T.isArch32Bit());
   EXPECT_TRUE(T.isArch64Bit());
+
+  T.setArch(Triple::avr);
+  EXPECT_TRUE(T.isArch16Bit());
+  EXPECT_FALSE(T.isArch32Bit());
+  EXPECT_FALSE(T.isArch64Bit());
 }
 
 TEST(TripleTest, BitWidthArchVariants) {
@@ -799,181 +824,185 @@ TEST(TripleTest, getARMCPUForArch) {
   // Standard ARM Architectures.
   {
     llvm::Triple Triple("armv4-unknown-eabi");
-    EXPECT_STREQ("strongarm", Triple.getARMCPUForArch());
+    EXPECT_EQ("strongarm", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv4t-unknown-eabi");
-    EXPECT_STREQ("arm7tdmi", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm7tdmi", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv5-unknown-eabi");
-    EXPECT_STREQ("arm10tdmi", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm10tdmi", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv5t-unknown-eabi");
-    EXPECT_STREQ("arm10tdmi", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm10tdmi", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv5e-unknown-eabi");
-    EXPECT_STREQ("arm1022e", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1022e", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv5tej-unknown-eabi");
-    EXPECT_STREQ("arm926ej-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm926ej-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6-unknown-eabi");
-    EXPECT_STREQ("arm1136jf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1136jf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6j-unknown-eabi");
-    EXPECT_STREQ("arm1136j-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1136jf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6k-unknown-eabi");
-    EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1176j-s", Triple.getARMCPUForArch());
+  }
+  {
+    llvm::Triple Triple("armv6kz-unknown-eabi");
+    EXPECT_EQ("arm1176jzf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6zk-unknown-eabi");
-    EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1176jzf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6t2-unknown-eabi");
-    EXPECT_STREQ("arm1156t2-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1156t2-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6m-unknown-eabi");
-    EXPECT_STREQ("cortex-m0", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-m0", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7-unknown-eabi");
-    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a8", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7a-unknown-eabi");
-    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a8", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7m-unknown-eabi");
-    EXPECT_STREQ("cortex-m3", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-m3", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7r-unknown-eabi");
-    EXPECT_STREQ("cortex-r4", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-r4", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7r-unknown-eabi");
-    EXPECT_STREQ("cortex-r4", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-r4", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7r-unknown-eabi");
-    EXPECT_STREQ("cortex-r4", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-r4", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7r-unknown-eabi");
-    EXPECT_STREQ("cortex-r4", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-r4", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv8a-unknown-eabi");
-    EXPECT_STREQ("cortex-a53", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a53", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv8.1a-unknown-eabi");
-    EXPECT_STREQ("generic", Triple.getARMCPUForArch());
+    EXPECT_EQ("generic", Triple.getARMCPUForArch());
   }
   // Non-synonym names, using -march style, not default arch.
   {
     llvm::Triple Triple("arm");
-    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch("armv7-a"));
+    EXPECT_EQ("cortex-a8", Triple.getARMCPUForArch("armv7-a"));
   }
   {
     llvm::Triple Triple("arm");
-    EXPECT_STREQ("cortex-m3", Triple.getARMCPUForArch("armv7-m"));
+    EXPECT_EQ("cortex-m3", Triple.getARMCPUForArch("armv7-m"));
   }
   {
     llvm::Triple Triple("arm");
-    EXPECT_STREQ("cortex-a53", Triple.getARMCPUForArch("armv8"));
+    EXPECT_EQ("cortex-a53", Triple.getARMCPUForArch("armv8"));
   }
   {
     llvm::Triple Triple("arm");
-    EXPECT_STREQ("cortex-a53", Triple.getARMCPUForArch("armv8-a"));
+    EXPECT_EQ("cortex-a53", Triple.getARMCPUForArch("armv8-a"));
   }
   // Platform specific defaults.
   {
     llvm::Triple Triple("arm--nacl");
-    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a8", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6-unknown-freebsd");
-    EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1176jzf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("thumbv6-unknown-freebsd");
-    EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1176jzf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armebv6-unknown-freebsd");
-    EXPECT_STREQ("arm1176jzf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1176jzf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("arm--win32");
-    EXPECT_STREQ("cortex-a9", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a9", Triple.getARMCPUForArch());
   }
   // Some alternative architectures
   {
     llvm::Triple Triple("xscale-unknown-eabi");
-    EXPECT_STREQ("xscale", Triple.getARMCPUForArch());
+    EXPECT_EQ("xscale", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("iwmmxt-unknown-eabi");
-    EXPECT_STREQ("iwmmxt", Triple.getARMCPUForArch());
+    EXPECT_EQ("iwmmxt", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7s-apple-ios7");
-    EXPECT_STREQ("swift", Triple.getARMCPUForArch());
+    EXPECT_EQ("swift", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7em-apple-ios7");
-    EXPECT_STREQ("cortex-m4", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-m4", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv7l-linux-gnueabihf");
-    EXPECT_STREQ("cortex-a8", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-a8", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6sm-apple-ios7");
-    EXPECT_STREQ("cortex-m0", Triple.getARMCPUForArch());
+    EXPECT_EQ("cortex-m0", Triple.getARMCPUForArch());
   }
   // armeb is permitted, but armebeb is not
   {
     llvm::Triple Triple("armeb-none-eabi");
-    EXPECT_STREQ("arm7tdmi", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm7tdmi", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armebeb-none-eabi");
-    EXPECT_EQ(nullptr, Triple.getARMCPUForArch());
+    EXPECT_EQ("", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armebv6eb-none-eabi");
-    EXPECT_EQ(nullptr, Triple.getARMCPUForArch());
+    EXPECT_EQ("", Triple.getARMCPUForArch());
   }
   // armebv6 and armv6eb are permitted, but armebv6eb is not
   {
     llvm::Triple Triple("armebv6-non-eabi");
-    EXPECT_STREQ("arm1136jf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1136jf-s", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armv6eb-none-eabi");
-    EXPECT_STREQ("arm1136jf-s", Triple.getARMCPUForArch());
+    EXPECT_EQ("arm1136jf-s", Triple.getARMCPUForArch());
   }
   // xscaleeb is permitted, but armebxscale is not
   {
     llvm::Triple Triple("xscaleeb-none-eabi");
-    EXPECT_STREQ("xscale", Triple.getARMCPUForArch());
+    EXPECT_EQ("xscale", Triple.getARMCPUForArch());
   }
   {
     llvm::Triple Triple("armebxscale-none-eabi");
-    EXPECT_EQ(nullptr, Triple.getARMCPUForArch());
+    EXPECT_EQ("", Triple.getARMCPUForArch());
   }
 }
 
diff --git a/unittests/ADT/ilistTest.cpp b/unittests/ADT/ilistTest.cpp
index 9127b05b4b8f..377dcc044ddb 100644
--- a/unittests/ADT/ilistTest.cpp
+++ b/unittests/ADT/ilistTest.cpp
@@ -30,18 +30,18 @@ TEST(ilistTest, Basic) {
   ilist<Node> List;
   List.push_back(Node(1));
   EXPECT_EQ(1, List.back().Value);
-  EXPECT_EQ(nullptr, List.back().getPrevNode());
-  EXPECT_EQ(nullptr, List.back().getNextNode());
+  EXPECT_EQ(nullptr, List.getPrevNode(List.back()));
+  EXPECT_EQ(nullptr, List.getNextNode(List.back()));
 
   List.push_back(Node(2));
   EXPECT_EQ(2, List.back().Value);
-  EXPECT_EQ(2, List.front().getNextNode()->Value);
-  EXPECT_EQ(1, List.back().getPrevNode()->Value);
+  EXPECT_EQ(2, List.getNextNode(List.front())->Value);
+  EXPECT_EQ(1, List.getPrevNode(List.back())->Value);
 
   const ilist<Node> &ConstList = List;
   EXPECT_EQ(2, ConstList.back().Value);
-  EXPECT_EQ(2, ConstList.front().getNextNode()->Value);
-  EXPECT_EQ(1, ConstList.back().getPrevNode()->Value);
+  EXPECT_EQ(2, ConstList.getNextNode(ConstList.front())->Value);
+  EXPECT_EQ(1, ConstList.getPrevNode(ConstList.back())->Value);
 }
 
 TEST(ilistTest, SpliceOne) {
@@ -81,7 +81,7 @@ TEST(ilistTest, UnsafeClear) {
   // List with contents.
   List.push_back(1);
   ASSERT_EQ(1u, List.size());
-  Node *N = List.begin();
+  Node *N = &*List.begin();
   EXPECT_EQ(1, N->Value);
   List.clearAndLeakNodesUnsafely();
   EXPECT_EQ(0u, List.size());
diff --git a/unittests/Analysis/AliasAnalysisTest.cpp b/unittests/Analysis/AliasAnalysisTest.cpp
index 62bfaa125133..ee116992fe76 100644
--- a/unittests/Analysis/AliasAnalysisTest.cpp
+++ b/unittests/Analysis/AliasAnalysisTest.cpp
@@ -8,63 +8,162 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/Passes.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 
+using namespace llvm;
+
+// Set up some test passes.
 namespace llvm {
+void initializeAATestPassPass(PassRegistry&);
+void initializeTestCustomAAWrapperPassPass(PassRegistry&);
+}
+
+namespace {
+struct AATestPass : FunctionPass {
+  static char ID;
+  AATestPass() : FunctionPass(ID) {
+    initializeAATestPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.setPreservesAll();
+  }
+
+  bool runOnFunction(Function &F) override {
+    AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+    SetVector<Value *> Pointers;
+    for (Argument &A : F.args())
+      if (A.getType()->isPointerTy())
+        Pointers.insert(&A);
+    for (Instruction &I : instructions(F))
+      if (I.getType()->isPointerTy())
+        Pointers.insert(&I);
+
+    for (Value *P1 : Pointers)
+      for (Value *P2 : Pointers)
+        (void)AA.alias(P1, MemoryLocation::UnknownSize, P2,
+                       MemoryLocation::UnknownSize);
+
+    return false;
+  }
+};
+}
+
+char AATestPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AATestPass, "aa-test-pas", "Alias Analysis Test Pass",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(AATestPass, "aa-test-pass", "Alias Analysis Test Pass",
+                    false, true)
+
+namespace {
+/// A test customizable AA result. It merely accepts a callback to run whenever
+/// it receives an alias query. Useful for testing that a particular AA result
+/// is reached.
+struct TestCustomAAResult : AAResultBase<TestCustomAAResult> {
+  friend AAResultBase<TestCustomAAResult>;
+
+  std::function<void()> CB;
+
+  explicit TestCustomAAResult(const TargetLibraryInfo &TLI,
+                              std::function<void()> CB)
+      : AAResultBase(TLI), CB(std::move(CB)) {}
+  TestCustomAAResult(TestCustomAAResult &&Arg)
+      : AAResultBase(std::move(Arg)), CB(std::move(Arg.CB)) {}
+
+  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+
+  AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+    CB();
+    return MayAlias;
+  }
+};
+}
+
+namespace {
+/// A wrapper pass for the legacy pass manager to use with the above custom AA
+/// result.
+class TestCustomAAWrapperPass : public ImmutablePass {
+  std::function<void()> CB;
+  std::unique_ptr<TestCustomAAResult> Result;
+
+public:
+  static char ID;
+
+  explicit TestCustomAAWrapperPass(
+      std::function<void()> CB = std::function<void()>())
+      : ImmutablePass(ID), CB(std::move(CB)) {
+    initializeTestCustomAAWrapperPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+  }
+
+  bool doInitialization(Module &M) override {
+    Result.reset(new TestCustomAAResult(
+        getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), std::move(CB)));
+    return true;
+  }
+
+  bool doFinalization(Module &M) override {
+    Result.reset();
+    return true;
+  }
+
+  TestCustomAAResult &getResult() { return *Result; }
+  const TestCustomAAResult &getResult() const { return *Result; }
+};
+}
+
+char TestCustomAAWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TestCustomAAWrapperPass, "test-custom-aa",
+                "Test Custom AA Wrapper Pass", false, true)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(TestCustomAAWrapperPass, "test-custom-aa",
+                "Test Custom AA Wrapper Pass", false, true)
+
 namespace {
 
 class AliasAnalysisTest : public testing::Test {
 protected:
-  AliasAnalysisTest() : M("AliasAnalysisTBAATest", C) {}
-
-  // This is going to check that calling getModRefInfo without a location, and
-  // with a default location, first, doesn't crash, and second, gives the right
-  // answer.
-  void CheckModRef(Instruction *I, AliasAnalysis::ModRefResult Result) {
-    static char ID;
-    class CheckModRefTestPass : public FunctionPass {
-    public:
-      CheckModRefTestPass(Instruction *I, AliasAnalysis::ModRefResult Result)
-          : FunctionPass(ID), ExpectResult(Result), I(I) {}
-      static int initialize() {
-        PassInfo *PI = new PassInfo("CheckModRef testing pass", "", &ID,
-                                    nullptr, true, true);
-        PassRegistry::getPassRegistry()->registerPass(*PI, false);
-        initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
-        initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
-        return 0;
-      }
-      void getAnalysisUsage(AnalysisUsage &AU) const override {
-        AU.setPreservesAll();
-        AU.addRequiredTransitive<AliasAnalysis>();
-      }
-      bool runOnFunction(Function &) override {
-        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
-        EXPECT_EQ(AA.getModRefInfo(I, MemoryLocation()), ExpectResult);
-        EXPECT_EQ(AA.getModRefInfo(I), ExpectResult);
-        return false;
-      }
-      AliasAnalysis::ModRefResult ExpectResult;
-      Instruction *I;
-    };
-    static int initialize = CheckModRefTestPass::initialize();
-    (void)initialize;
-    CheckModRefTestPass *P = new CheckModRefTestPass(I, Result);
-    legacy::PassManager PM;
-    PM.add(createBasicAliasAnalysisPass());
-    PM.add(P);
-    PM.run(M);
-  }
-
   LLVMContext C;
   Module M;
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI;
+  std::unique_ptr<AssumptionCache> AC;
+  std::unique_ptr<BasicAAResult> BAR;
+  std::unique_ptr<AAResults> AAR;
+
+  AliasAnalysisTest() : M("AliasAnalysisTest", C), TLI(TLII) {}
+
+  AAResults &getAAResults(Function &F) {
+    // Reset the Function AA results first to clear out any references.
+    AAR.reset(new AAResults());
+
+    // Build the various AA results and register them.
+    AC.reset(new AssumptionCache(F));
+    BAR.reset(new BasicAAResult(M.getDataLayout(), TLI, *AC));
+    AAR->addAAResult(*BAR);
+
+    return *AAR;
+  }
 };
 
 TEST_F(AliasAnalysisTest, getModRefInfo) {
@@ -91,14 +190,67 @@ TEST_F(AliasAnalysisTest, getModRefInfo) {
 
   ReturnInst::Create(C, nullptr, BB);
 
+  auto &AA = getAAResults(*F);
+
   // Check basic results
-  CheckModRef(Store1, AliasAnalysis::ModRefResult::Mod);
-  CheckModRef(Load1, AliasAnalysis::ModRefResult::Ref);
-  CheckModRef(Add1, AliasAnalysis::ModRefResult::NoModRef);
-  CheckModRef(VAArg1, AliasAnalysis::ModRefResult::ModRef);
-  CheckModRef(CmpXChg1, AliasAnalysis::ModRefResult::ModRef);
-  CheckModRef(AtomicRMW, AliasAnalysis::ModRefResult::ModRef);
+  EXPECT_EQ(AA.getModRefInfo(Store1, MemoryLocation()), MRI_Mod);
+  EXPECT_EQ(AA.getModRefInfo(Store1), MRI_Mod);
+  EXPECT_EQ(AA.getModRefInfo(Load1, MemoryLocation()), MRI_Ref);
+  EXPECT_EQ(AA.getModRefInfo(Load1), MRI_Ref);
+  EXPECT_EQ(AA.getModRefInfo(Add1, MemoryLocation()), MRI_NoModRef);
+  EXPECT_EQ(AA.getModRefInfo(Add1), MRI_NoModRef);
+  EXPECT_EQ(AA.getModRefInfo(VAArg1, MemoryLocation()), MRI_ModRef);
+  EXPECT_EQ(AA.getModRefInfo(VAArg1), MRI_ModRef);
+  EXPECT_EQ(AA.getModRefInfo(CmpXChg1, MemoryLocation()), MRI_ModRef);
+  EXPECT_EQ(AA.getModRefInfo(CmpXChg1), MRI_ModRef);
+  EXPECT_EQ(AA.getModRefInfo(AtomicRMW, MemoryLocation()), MRI_ModRef);
+  EXPECT_EQ(AA.getModRefInfo(AtomicRMW), MRI_ModRef);
+}
+
+class AAPassInfraTest : public testing::Test {
+protected:
+  LLVMContext &C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M;
+
+public:
+  AAPassInfraTest()
+      : C(getGlobalContext()),
+        M(parseAssemblyString("define i32 @f(i32* %x, i32* %y) {\n"
+                              "entry:\n"
+                              "  %lx = load i32, i32* %x\n"
+                              "  %ly = load i32, i32* %y\n"
+                              "  %sum = add i32 %lx, %ly\n"
+                              "  ret i32 %sum\n"
+                              "}\n",
+                              Err, C)) {
+    assert(M && "Failed to build the module!");
+  }
+};
+
+TEST_F(AAPassInfraTest, injectExternalAA) {
+  legacy::PassManager PM;
+
+  // Register our custom AA's wrapper pass manually.
+  bool IsCustomAAQueried = false;
+  PM.add(new TestCustomAAWrapperPass([&] { IsCustomAAQueried = true; }));
+
+  // Now add the external AA wrapper with a lambda which queries for the
+  // wrapper around our custom AA and adds it to the results.
+  PM.add(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+    if (auto *WrapperPass = P.getAnalysisIfAvailable<TestCustomAAWrapperPass>())
+      AAR.addAAResult(WrapperPass->getResult());
+  }));
+
+  // And run a pass that will make some alias queries. This will automatically
+  // trigger the rest of the alias analysis stack to be run. It is analagous to
+  // building a full pass pipeline with any of the existing pass manager
+  // builders.
+  PM.add(new AATestPass());
+  PM.run(*M);
+
+  // Finally, ensure that our custom AA was indeed queried.
+  EXPECT_TRUE(IsCustomAAQueried);
 }
 
 } // end anonymous namspace
-} // end llvm namespace
diff --git a/unittests/Analysis/CFGTest.cpp b/unittests/Analysis/CFGTest.cpp
index b29c168ce08b..44f0fe681dff 100644
--- a/unittests/Analysis/CFGTest.cpp
+++ b/unittests/Analysis/CFGTest.cpp
@@ -378,7 +378,7 @@ TEST_F(IsPotentiallyReachableTest, BranchInsideLoop) {
 TEST_F(IsPotentiallyReachableTest, ModifyTest) {
   ParseAssembly(BranchInsideLoopIR);
 
-  succ_iterator S = succ_begin(++M->getFunction("test")->begin());
+  succ_iterator S = succ_begin(&*++M->getFunction("test")->begin());
   BasicBlock *OldBB = S[0];
   S[0] = S[1];
   ExpectPath(false);
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index 35a6d92ce179..06560cf14d4a 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  IPA
   Analysis
   AsmParser
   Core
@@ -13,4 +12,5 @@ add_llvm_unittest(AnalysisTests
   LazyCallGraphTest.cpp
   ScalarEvolutionTest.cpp
   MixedTBAATest.cpp
+  ValueTrackingTest.cpp
   )
diff --git a/unittests/Analysis/Makefile b/unittests/Analysis/Makefile
index 52296e7b3db2..527f4525e87e 100644
--- a/unittests/Analysis/Makefile
+++ b/unittests/Analysis/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TESTNAME = Analysis
-LINK_COMPONENTS := ipa analysis asmparser
+LINK_COMPONENTS := analysis asmparser
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Analysis/MixedTBAATest.cpp b/unittests/Analysis/MixedTBAATest.cpp
index 7b8a25c44389..d0cfa59f6459 100644
--- a/unittests/Analysis/MixedTBAATest.cpp
+++ b/unittests/Analysis/MixedTBAATest.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
@@ -67,7 +68,7 @@ TEST_F(MixedTBAATest, MixedTBAA) {
   // because the AA eval pass only runs one test per store-pair.
   const char* args[] = { "MixedTBAATest", "-evaluate-aa-metadata" };
   cl::ParseCommandLineOptions(sizeof(args) / sizeof(const char*), args);
-  PM.add(createTypeBasedAliasAnalysisPass());
+  PM.add(createTypeBasedAAWrapperPass());
   PM.add(createAAEvalPass());
   PM.run(M);
 }
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index 6ce7ff43a312..938dafe60384 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -8,9 +8,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -24,16 +28,23 @@ namespace {
 // deleting the PassManager.
 class ScalarEvolutionsTest : public testing::Test {
 protected:
-  ScalarEvolutionsTest() : M("", Context), SE(*new ScalarEvolution) {}
-  ~ScalarEvolutionsTest() override {
-    // Manually clean up, since we allocated new SCEV objects after the
-    // pass was finished.
-    SE.releaseMemory();
-  }
   LLVMContext Context;
   Module M;
-  legacy::PassManager PM;
-  ScalarEvolution &SE;
+  TargetLibraryInfoImpl TLII;
+  TargetLibraryInfo TLI;
+
+  std::unique_ptr<AssumptionCache> AC;
+  std::unique_ptr<DominatorTree> DT;
+  std::unique_ptr<LoopInfo> LI;
+
+  ScalarEvolutionsTest() : M("", Context), TLII(), TLI(TLII) {}
+
+  ScalarEvolution buildSE(Function &F) {
+    AC.reset(new AssumptionCache(F));
+    DT.reset(new DominatorTree(F));
+    LI.reset(new LoopInfo(*DT));
+    return ScalarEvolution(F, TLI, *AC, *DT, *LI);
+  }
 };
 
 TEST_F(ScalarEvolutionsTest, SCEVUnknownRAUW) {
@@ -49,9 +60,7 @@ TEST_F(ScalarEvolutionsTest, SCEVUnknownRAUW) {
   Value *V1 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V1");
   Value *V2 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V2");
 
-  // Create a ScalarEvolution and "run" it so that it gets initialized.
-  PM.add(&SE);
-  PM.run(M);
+  ScalarEvolution SE = buildSE(*F);
 
   const SCEV *S0 = SE.getSCEV(V0);
   const SCEV *S1 = SE.getSCEV(V1);
@@ -96,9 +105,7 @@ TEST_F(ScalarEvolutionsTest, SCEVMultiplyAddRecs) {
   BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
   ReturnInst::Create(Context, nullptr, BB);
 
-  // Create a ScalarEvolution and "run" it so that it gets initialized.
-  PM.add(&SE);
-  PM.run(M);
+  ScalarEvolution SE = buildSE(*F);
 
   // It's possible to produce an empty loop through the default constructor,
   // but you can't add any blocks to it without a LoopInfo pass.
diff --git a/unittests/Analysis/ValueTrackingTest.cpp b/unittests/Analysis/ValueTrackingTest.cpp
new file mode 100644
index 000000000000..3af856ea2039
--- /dev/null
+++ b/unittests/Analysis/ValueTrackingTest.cpp
@@ -0,0 +1,189 @@
+//===- ValueTrackingTest.cpp - ValueTracking tests ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class MatchSelectPatternTest : public testing::Test {
+protected:
+  void parseAssembly(const char *Assembly) {
+    SMDiagnostic Error;
+    M = parseAssemblyString(Assembly, Error, getGlobalContext());
+
+    std::string errMsg;
+    raw_string_ostream os(errMsg);
+    Error.print("", os);
+
+    // A failure here means that the test itself is buggy.
+    if (!M)
+      report_fatal_error(os.str());
+
+    Function *F = M->getFunction("test");
+    if (F == nullptr)
+      report_fatal_error("Test must have a function named @test");
+
+    A = nullptr;
+    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+      if (I->hasName()) {
+        if (I->getName() == "A")
+          A = &*I;
+      }
+    }
+    if (A == nullptr)
+      report_fatal_error("@test must have an instruction %A");
+  }
+
+  void expectPattern(const SelectPatternResult &P) {
+    Value *LHS, *RHS;
+    Instruction::CastOps CastOp;
+    SelectPatternResult R = matchSelectPattern(A, LHS, RHS, &CastOp);
+    EXPECT_EQ(P.Flavor, R.Flavor);
+    EXPECT_EQ(P.NaNBehavior, R.NaNBehavior);
+    EXPECT_EQ(P.Ordered, R.Ordered);
+  }
+
+  std::unique_ptr<Module> M;
+  Instruction *A, *B;
+};
+
+}
+
+TEST_F(MatchSelectPatternTest, SimpleFMin) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp ult float %a, 5.0\n"
+      "  %A = select i1 %1, float %a, float 5.0\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMINNUM, SPNB_RETURNS_NAN, false});
+}
+
+TEST_F(MatchSelectPatternTest, SimpleFMax) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp ogt float %a, 5.0\n"
+      "  %A = select i1 %1, float %a, float 5.0\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMAXNUM, SPNB_RETURNS_OTHER, true});
+}
+
+TEST_F(MatchSelectPatternTest, SwappedFMax) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp olt float 5.0, %a\n"
+      "  %A = select i1 %1, float %a, float 5.0\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMAXNUM, SPNB_RETURNS_OTHER, false});
+}
+
+TEST_F(MatchSelectPatternTest, SwappedFMax2) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp olt float %a, 5.0\n"
+      "  %A = select i1 %1, float 5.0, float %a\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMAXNUM, SPNB_RETURNS_NAN, false});
+}
+
+TEST_F(MatchSelectPatternTest, SwappedFMax3) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp ult float %a, 5.0\n"
+      "  %A = select i1 %1, float 5.0, float %a\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMAXNUM, SPNB_RETURNS_OTHER, true});
+}
+
+TEST_F(MatchSelectPatternTest, FastFMin) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp nnan olt float %a, 5.0\n"
+      "  %A = select i1 %1, float %a, float 5.0\n"
+      "  ret float %A\n"
+      "}\n");
+  expectPattern({SPF_FMINNUM, SPNB_RETURNS_ANY, false});
+}
+
+TEST_F(MatchSelectPatternTest, FMinConstantZero) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp ole float %a, 0.0\n"
+      "  %A = select i1 %1, float %a, float 0.0\n"
+      "  ret float %A\n"
+      "}\n");
+  // This shouldn't be matched, as %a could be -0.0.
+  expectPattern({SPF_UNKNOWN, SPNB_NA, false});
+}
+
+TEST_F(MatchSelectPatternTest, FMinConstantZeroNsz) {
+  parseAssembly(
+      "define float @test(float %a) {\n"
+      "  %1 = fcmp nsz ole float %a, 0.0\n"
+      "  %A = select i1 %1, float %a, float 0.0\n"
+      "  ret float %A\n"
+      "}\n");
+  // But this should be, because we've ignored signed zeroes.
+  expectPattern({SPF_FMINNUM, SPNB_RETURNS_OTHER, true});
+}
+
+TEST_F(MatchSelectPatternTest, DoubleCastU) {
+  parseAssembly(
+      "define i32 @test(i8 %a, i8 %b) {\n"
+      "  %1 = icmp ult i8 %a, %b\n"
+      "  %2 = zext i8 %a to i32\n"
+      "  %3 = zext i8 %b to i32\n"
+      "  %A = select i1 %1, i32 %2, i32 %3\n"
+      "  ret i32 %A\n"
+      "}\n");
+  // We should be able to look through the situation where we cast both operands
+  // to the select.
+  expectPattern({SPF_UMIN, SPNB_NA, false});
+}
+
+TEST_F(MatchSelectPatternTest, DoubleCastS) {
+  parseAssembly(
+      "define i32 @test(i8 %a, i8 %b) {\n"
+      "  %1 = icmp slt i8 %a, %b\n"
+      "  %2 = sext i8 %a to i32\n"
+      "  %3 = sext i8 %b to i32\n"
+      "  %A = select i1 %1, i32 %2, i32 %3\n"
+      "  ret i32 %A\n"
+      "}\n");
+  // We should be able to look through the situation where we cast both operands
+  // to the select.
+  expectPattern({SPF_SMIN, SPNB_NA, false});
+}
+
+TEST_F(MatchSelectPatternTest, DoubleCastBad) {
+  parseAssembly(
+      "define i32 @test(i8 %a, i8 %b) {\n"
+      "  %1 = icmp ult i8 %a, %b\n"
+      "  %2 = zext i8 %a to i32\n"
+      "  %3 = sext i8 %b to i32\n"
+      "  %A = select i1 %1, i32 %2, i32 %3\n"
+      "  ret i32 %A\n"
+      "}\n");
+  // The cast types here aren't the same, so we cannot match an UMIN.
+  expectPattern({SPF_UNKNOWN, SPNB_NA, false});
+}
diff --git a/unittests/AsmParser/AsmParserTest.cpp b/unittests/AsmParser/AsmParserTest.cpp
index 9c2081fa2f2d..4189310fda23 100644
--- a/unittests/AsmParser/AsmParserTest.cpp
+++ b/unittests/AsmParser/AsmParserTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
@@ -64,4 +65,91 @@ TEST(AsmParserTest, SlotMappingTest) {
   EXPECT_EQ(Mapping.MetadataNodes.count(1), 0u);
 }
 
+TEST(AsmParserTest, TypeAndConstantValueParsing) {
+  LLVMContext &Ctx = getGlobalContext();
+  SMDiagnostic Error;
+  StringRef Source = "define void @test() {\n  entry:\n  ret void\n}";
+  auto Mod = parseAssemblyString(Source, Error, Ctx);
+  ASSERT_TRUE(Mod != nullptr);
+  auto &M = *Mod;
+
+  const Value *V;
+  V = parseConstantValue("double 3.5", Error, M);
+  ASSERT_TRUE(V);
+  EXPECT_TRUE(V->getType()->isDoubleTy());
+  ASSERT_TRUE(isa<ConstantFP>(V));
+  EXPECT_TRUE(cast<ConstantFP>(V)->isExactlyValue(3.5));
+
+  V = parseConstantValue("i32 42", Error, M);
+  ASSERT_TRUE(V);
+  EXPECT_TRUE(V->getType()->isIntegerTy());
+  ASSERT_TRUE(isa<ConstantInt>(V));
+  EXPECT_TRUE(cast<ConstantInt>(V)->equalsInt(42));
+
+  V = parseConstantValue("<4 x i32> <i32 0, i32 1, i32 2, i32 3>", Error, M);
+  ASSERT_TRUE(V);
+  EXPECT_TRUE(V->getType()->isVectorTy());
+  ASSERT_TRUE(isa<ConstantDataVector>(V));
+
+  V = parseConstantValue("i32 add (i32 1, i32 2)", Error, M);
+  ASSERT_TRUE(V);
+  ASSERT_TRUE(isa<ConstantInt>(V));
+
+  V = parseConstantValue("i8* blockaddress(@test, %entry)", Error, M);
+  ASSERT_TRUE(V);
+  ASSERT_TRUE(isa<BlockAddress>(V));
+
+  V = parseConstantValue("i8** undef", Error, M);
+  ASSERT_TRUE(V);
+  ASSERT_TRUE(isa<UndefValue>(V));
+
+  EXPECT_FALSE(parseConstantValue("duble 3.25", Error, M));
+  EXPECT_EQ(Error.getMessage(), "expected type");
+
+  EXPECT_FALSE(parseConstantValue("i32 3.25", Error, M));
+  EXPECT_EQ(Error.getMessage(), "floating point constant invalid for type");
+
+  EXPECT_FALSE(parseConstantValue("i32* @foo", Error, M));
+  EXPECT_EQ(Error.getMessage(), "expected a constant value");
+
+  EXPECT_FALSE(parseConstantValue("i32 3, ", Error, M));
+  EXPECT_EQ(Error.getMessage(), "expected end of string");
+}
+
+TEST(AsmParserTest, TypeAndConstantValueWithSlotMappingParsing) {
+  LLVMContext &Ctx = getGlobalContext();
+  SMDiagnostic Error;
+  StringRef Source =
+      "%st = type { i32, i32 }\n"
+      "@v = common global [50 x %st] zeroinitializer, align 16\n"
+      "%0 = type { i32, i32, i32, i32 }\n"
+      "@g = common global [50 x %0] zeroinitializer, align 16\n"
+      "define void @marker4(i64 %d) {\n"
+      "entry:\n"
+      "  %conv = trunc i64 %d to i32\n"
+      "  store i32 %conv, i32* getelementptr inbounds "
+      "    ([50 x %st], [50 x %st]* @v, i64 0, i64 0, i32 0), align 16\n"
+      "  store i32 %conv, i32* getelementptr inbounds "
+      "    ([50 x %0], [50 x %0]* @g, i64 0, i64 0, i32 0), align 16\n"
+      "  ret void\n"
+      "}";
+  SlotMapping Mapping;
+  auto Mod = parseAssemblyString(Source, Error, Ctx, &Mapping);
+  ASSERT_TRUE(Mod != nullptr);
+  auto &M = *Mod;
+
+  const Value *V;
+  V = parseConstantValue("i32* getelementptr inbounds ([50 x %st], [50 x %st]* "
+                         "@v, i64 0, i64 0, i32 0)",
+                         Error, M, &Mapping);
+  ASSERT_TRUE(V);
+  ASSERT_TRUE(isa<ConstantExpr>(V));
+
+  V = parseConstantValue("i32* getelementptr inbounds ([50 x %0], [50 x %0]* "
+                         "@g, i64 0, i64 0, i32 0)",
+                         Error, M, &Mapping);
+  ASSERT_TRUE(V);
+  ASSERT_TRUE(isa<ConstantExpr>(V));
+}
+
 } // end anonymous namespace
diff --git a/unittests/Bitcode/BitReaderTest.cpp b/unittests/Bitcode/BitReaderTest.cpp
index 3c56ea04c744..420aca2443bb 100644
--- a/unittests/Bitcode/BitReaderTest.cpp
+++ b/unittests/Bitcode/BitReaderTest.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Constants.h"
@@ -21,6 +22,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/StreamingMemoryObject.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -89,6 +91,41 @@ getStreamedModuleFromAssembly(LLVMContext &Context, SmallString<1024> &Mem,
   return std::move(ModuleOrErr.get());
 }
 
+// Checks if we correctly detect eof if we try to read N bits when there are not
+// enough bits left on the input stream to read N bits, and we are using a data
+// streamer. In particular, it checks if we properly set the object size when
+// the eof is reached under such conditions.
+TEST(BitReaderTest, TestForEofAfterReadFailureOnDataStreamer) {
+  // Note: Because StreamingMemoryObject does a call to method GetBytes in it's
+  // constructor, using internal constant kChunkSize, we must fill the input
+  // with more characters than that amount.
+  static size_t InputSize = StreamingMemoryObject::kChunkSize + 5;
+  char *Text = new char[InputSize];
+  std::memset(Text, 'a', InputSize);
+  Text[InputSize - 1] = '\0';
+  StringRef Input(Text);
+
+  // Build bitsteam reader using data streamer.
+  auto MemoryBuf = MemoryBuffer::getMemBuffer(Input);
+  std::unique_ptr<DataStreamer> Streamer(
+      new BufferDataStreamer(std::move(MemoryBuf)));
+  auto OwnedBytes =
+      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
+  auto Reader = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
+  BitstreamCursor Cursor;
+  Cursor.init(Reader.get());
+
+  // Jump to two bytes before end of stream.
+  Cursor.JumpToBit((InputSize - 4) * CHAR_BIT);
+  // Try to read 4 bytes when only 2 are present, resulting in error value 0.
+  const size_t ReadErrorValue = 0;
+  EXPECT_EQ(ReadErrorValue, Cursor.Read(32));
+  // Should be at eof now.
+  EXPECT_TRUE(Cursor.AtEndOfStream());
+
+  delete[] Text;
+}
+
 TEST(BitReaderTest, MateralizeForwardRefWithStream) {
   SmallString<1024> Mem;
 
@@ -103,30 +140,6 @@ TEST(BitReaderTest, MateralizeForwardRefWithStream) {
   EXPECT_FALSE(M->getFunction("func")->empty());
 }
 
-TEST(BitReaderTest, DematerializeFunctionPreservesLinkageType) {
-  SmallString<1024> Mem;
-
-  LLVMContext Context;
-  std::unique_ptr<Module> M = getLazyModuleFromAssembly(
-      Context, Mem, "define internal i32 @func() {\n"
-                      "ret i32 0\n"
-                    "}\n");
-
-  EXPECT_FALSE(verifyModule(*M, &dbgs()));
-
-  M->getFunction("func")->materialize();
-  EXPECT_FALSE(M->getFunction("func")->empty());
-  EXPECT_TRUE(M->getFunction("func")->getLinkage() ==
-              GlobalValue::InternalLinkage);
-
-  // Check that the linkage type is preserved after dematerialization.
-  M->getFunction("func")->dematerialize();
-  EXPECT_TRUE(M->getFunction("func")->empty());
-  EXPECT_TRUE(M->getFunction("func")->getLinkage() ==
-              GlobalValue::InternalLinkage);
-  EXPECT_FALSE(verifyModule(*M, &dbgs()));
-}
-
 // Tests that lazy evaluation can parse functions out of order.
 TEST(BitReaderTest, MaterializeFunctionsOutOfOrder) {
   SmallString<1024> Mem;
@@ -203,10 +216,6 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddr) { // PR11677
                     "  unreachable\n"
                     "}\n");
   EXPECT_FALSE(verifyModule(*M, &dbgs()));
-
-  // Try (and fail) to dematerialize @func.
-  M->getFunction("func")->dematerialize();
-  EXPECT_FALSE(M->getFunction("func")->empty());
 }
 
 TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionBefore) {
@@ -234,11 +243,6 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionBefore) {
   EXPECT_FALSE(M->getFunction("func")->empty());
   EXPECT_TRUE(M->getFunction("other")->empty());
   EXPECT_FALSE(verifyModule(*M, &dbgs()));
-
-  // Try (and fail) to dematerialize @func.
-  M->getFunction("func")->dematerialize();
-  EXPECT_FALSE(M->getFunction("func")->empty());
-  EXPECT_FALSE(verifyModule(*M, &dbgs()));
 }
 
 TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionAfter) {
@@ -266,11 +270,6 @@ TEST(BitReaderTest, MaterializeFunctionsForBlockAddrInFunctionAfter) {
   EXPECT_FALSE(M->getFunction("func")->empty());
   EXPECT_TRUE(M->getFunction("other")->empty());
   EXPECT_FALSE(verifyModule(*M, &dbgs()));
-
-  // Try (and fail) to dematerialize @func.
-  M->getFunction("func")->dematerialize();
-  EXPECT_FALSE(M->getFunction("func")->empty());
-  EXPECT_FALSE(verifyModule(*M, &dbgs()));
 }
 
 } // end namespace
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
index 85cabdbd41a4..7704e5a2f79a 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -1,4 +1,4 @@
-//===- MCJITTest.cpp - Unit tests for the MCJIT ---------------------------===//
+//===- MCJITTest.cpp - Unit tests for the MCJIT -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -479,14 +479,14 @@ TEST_F(MCJITCAPITest, addGlobalMapping) {
 
   Module = LLVMModuleCreateWithName("testModule");
   LLVMSetTarget(Module, HostTriple.c_str());
-  LLVMTypeRef FunctionType = LLVMFunctionType(LLVMInt32Type(), NULL, 0, 0);
+  LLVMTypeRef FunctionType = LLVMFunctionType(LLVMInt32Type(), nullptr, 0, 0);
   LLVMValueRef MappedFn = LLVMAddFunction(Module, "mapped_fn", FunctionType);
 
   Function = LLVMAddFunction(Module, "test_fn", FunctionType);
   LLVMBasicBlockRef Entry = LLVMAppendBasicBlock(Function, "");
   LLVMBuilderRef Builder = LLVMCreateBuilder();
   LLVMPositionBuilderAtEnd(Builder, Entry);
-  LLVMValueRef RetVal = LLVMBuildCall(Builder, MappedFn, NULL, 0, "");
+  LLVMValueRef RetVal = LLVMBuildCall(Builder, MappedFn, nullptr, 0, "");
   LLVMBuildRet(Builder, RetVal);
   LLVMDisposeBuilder(Builder);
 
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
index 7d52a9acca70..65f969f24c6c 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITMultipleModuleTest.cpp
@@ -1,4 +1,4 @@
-//===- MCJITMultipeModuleTest.cpp - Unit tests for the MCJIT---------------===//
+//===- MCJITMultipeModuleTest.cpp - Unit tests for the MCJIT ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -223,18 +223,18 @@ TEST_F(MCJITMultipleModuleTest, two_module_global_variables_case) {
   EXPECT_EQ(GVA, TheJIT->FindGlobalVariableNamed("GVA"));
   EXPECT_EQ(GVB, TheJIT->FindGlobalVariableNamed("GVB"));
   EXPECT_EQ(GVC, TheJIT->FindGlobalVariableNamed("GVC",true));
-  EXPECT_EQ(NULL, TheJIT->FindGlobalVariableNamed("GVC"));
+  EXPECT_EQ(nullptr, TheJIT->FindGlobalVariableNamed("GVC"));
 
   uint64_t FBPtr = TheJIT->getFunctionAddress(FB->getName().str());
   TheJIT->finalizeObject();
   EXPECT_TRUE(0 != FBPtr);
-  int32_t(*FuncPtr)(void) = (int32_t(*)(void))FBPtr;
+  int32_t(*FuncPtr)() = (int32_t(*)())FBPtr;
   EXPECT_EQ(initialNum, FuncPtr())
     << "Invalid value for global returned from JITted function in module B";
 
   uint64_t FAPtr = TheJIT->getFunctionAddress(FA->getName().str());
   EXPECT_TRUE(0 != FAPtr);
-  FuncPtr = (int32_t(*)(void))FAPtr;
+  FuncPtr = (int32_t(*)())FAPtr;
   EXPECT_EQ(initialNum, FuncPtr())
     << "Invalid value for global returned from JITted function in module A";
 }
@@ -420,4 +420,4 @@ TEST_F(MCJITMultipleModuleTest, FindFunctionNamed_test) {
   EXPECT_EQ(FB1, TheJIT->FindFunctionNamed(FB1->getName().data()));
 }
 
-}
+} // end anonymous namespace
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
index ff5b6e3deccc..2e3d2b6665a5 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITObjectCacheTest.cpp
@@ -78,7 +78,6 @@ private:
 
 class MCJITObjectCacheTest : public testing::Test, public MCJITTestBase {
 protected:
-
   enum {
     OriginalRC = 6,
     ReplacementRC = 7
@@ -101,7 +100,7 @@ protected:
     EXPECT_TRUE(nullptr != vPtr)
       << "Unable to get pointer to main() from JIT";
 
-    int (*FuncPtr)(void) = (int(*)(void))(intptr_t)vPtr;
+    int (*FuncPtr)() = (int(*)())(intptr_t)vPtr;
     int returnCode = FuncPtr();
     EXPECT_EQ(returnCode, ExpectedRC);
   }
@@ -119,7 +118,6 @@ TEST_F(MCJITObjectCacheTest, SetNullObjectCache) {
   compileAndRun();
 }
 
-
 TEST_F(MCJITObjectCacheTest, VerifyBasicObjectCaching) {
   SKIP_UNSUPPORTED_PLATFORM;
 
@@ -228,5 +226,4 @@ TEST_F(MCJITObjectCacheTest, VerifyNonLoadFromCache) {
   EXPECT_FALSE(Cache->wereDuplicatesInserted());
 }
 
-} // Namespace
-
+} // end anonymous namespace
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
index 01e796d9a4ea..744bfdb4a01b 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTest.cpp
@@ -1,4 +1,4 @@
-//===- MCJITTest.cpp - Unit tests for the MCJIT ---------------------------===//
+//===- MCJITTest.cpp - Unit tests for the MCJIT -----------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -89,7 +89,7 @@ TEST_F(MCJITTest, run_main) {
   EXPECT_TRUE(0 != ptr)
     << "Unable to get pointer to main() from JIT";
 
-  int (*FuncPtr)(void) = (int(*)(void))ptr;
+  int (*FuncPtr)() = (int(*)())ptr;
   int returnCode = FuncPtr();
   EXPECT_EQ(returnCode, rc);
 }
@@ -109,7 +109,7 @@ TEST_F(MCJITTest, return_global) {
   uint64_t rgvPtr = TheJIT->getFunctionAddress(ReturnGlobal->getName().str());
   EXPECT_TRUE(0 != rgvPtr);
 
-  int32_t(*FuncPtr)(void) = (int32_t(*)(void))rgvPtr;
+  int32_t(*FuncPtr)() = (int32_t(*)())rgvPtr;
   EXPECT_EQ(initialNum, FuncPtr())
     << "Invalid value for global returned from JITted function";
 }
@@ -181,7 +181,7 @@ TEST_F(MCJITTest, multiple_functions) {
   EXPECT_TRUE(0 != ptr)
     << "Unable to get pointer to outer function from JIT";
 
-  int32_t(*FuncPtr)(void) = (int32_t(*)(void))ptr;
+  int32_t(*FuncPtr)() = (int32_t(*)())ptr;
   EXPECT_EQ(innerRetVal, FuncPtr())
     << "Incorrect result returned from function";
 }
@@ -196,7 +196,7 @@ TEST_F(MCJITTest, multiple_decl_lookups) {
   void *A = TheJIT->getPointerToFunction(Foo);
   void *B = TheJIT->getPointerToFunction(Foo);
 
-  EXPECT_TRUE(A != 0) << "Failed lookup - test not correctly configured.";
+  EXPECT_TRUE(A != nullptr) << "Failed lookup - test not correctly configured.";
   EXPECT_EQ(A, B) << "Repeat calls to getPointerToFunction fail.";
 }
 
@@ -281,4 +281,4 @@ TEST_F(MCJITTest, lazy_function_creator_lambda) {
   EXPECT_FALSE(std::find(I, E, "Foo2") == E);
 }
 
-}
+} // end anonymous namespace
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h
index 7d704def4617..21def6e9eb1b 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestAPICommon.h
@@ -17,6 +17,8 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/InitializePasses.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/TargetSelect.h"
 
@@ -39,6 +41,10 @@ protected:
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
 
+    // FIXME: It isn't at all clear why this is necesasry, but without it we
+    // fail to initialize the AssumptionCacheTracker.
+    initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry());
+
 #ifdef LLVM_ON_WIN32
     // On Windows, generate ELF objects by specifying "-elf" in triple
     HostTriple += "-elf";
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
index 0749a1dfb9c1..609ac844c47d 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
+++ b/unittests/ExecutionEngine/MCJIT/MCJITTestBase.h
@@ -1,4 +1,4 @@
-//===- MCJITTestBase.h - Common base class for MCJIT Unit tests  ----------===//
+//===- MCJITTestBase.h - Common base class for MCJIT Unit tests -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,7 +13,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #ifndef LLVM_UNITTESTS_EXECUTIONENGINE_MCJIT_MCJITTESTBASE_H
 #define LLVM_UNITTESTS_EXECUTIONENGINE_MCJIT_MCJITTESTBASE_H
 
@@ -70,9 +69,8 @@ protected:
 
     SmallVector<Value*, 1> CallArgs;
 
-    Function::arg_iterator arg_iter = Result->arg_begin();
-    for(;arg_iter != Result->arg_end(); ++arg_iter)
-      CallArgs.push_back(arg_iter);
+    for (Argument &A : Result->args())
+      CallArgs.push_back(&A);
 
     Value *ReturnCode = Builder.CreateCall(Callee, CallArgs);
     Builder.CreateRet(ReturnCode);
@@ -98,8 +96,8 @@ protected:
     Function *Result = startFunction<int32_t(int32_t, int32_t)>(M, Name);
 
     Function::arg_iterator args = Result->arg_begin();
-    Value *Arg1 = args;
-    Value *Arg2 = ++args;
+    Value *Arg1 = &*args;
+    Value *Arg2 = &*++args;
     Value *AddResult = Builder.CreateAdd(Arg1, Arg2);
 
     endFunctionWithRet(Result, AddResult);
@@ -160,17 +158,17 @@ protected:
   //   }
   // NOTE: if Helper is left as the default parameter, Helper == recursive_add.
   Function *insertAccumulateFunction(Module *M,
-                                              Function *Helper = 0,
-                                              StringRef Name = "accumulate") {
+                                     Function *Helper = nullptr,
+                                     StringRef Name = "accumulate") {
     Function *Result = startFunction<int32_t(int32_t)>(M, Name);
-    if (Helper == 0)
+    if (!Helper)
       Helper = Result;
 
     BasicBlock *BaseCase = BasicBlock::Create(Context, "", Result);
     BasicBlock *RecursiveCase = BasicBlock::Create(Context, "", Result);
 
     // if (num == 0)
-    Value *Param = Result->arg_begin();
+    Value *Param = &*Result->arg_begin();
     Value *Zero = ConstantInt::get(Context, APInt(32, 0));
     Builder.CreateCondBr(Builder.CreateICmpEQ(Param, Zero),
                          BaseCase, RecursiveCase);
@@ -199,7 +197,7 @@ protected:
                                       Function *&FB1, Function *&FB2) {
     // Define FB1 in B.
     B.reset(createEmptyModule("B"));
-    FB1 = insertAccumulateFunction(B.get(), 0, "FB1");
+    FB1 = insertAccumulateFunction(B.get(), nullptr, "FB1");
 
     // Declare FB1 in A (as an external).
     A.reset(createEmptyModule("A"));
@@ -234,7 +232,6 @@ protected:
     FC = insertSimpleCallFunction<int32_t(int32_t, int32_t)>(C.get(), FBExtern_in_C);
   }
 
-
   // Module A { Function FA },
   // Populates Modules A and B:
   // Module B { Function FB }
@@ -279,7 +276,6 @@ protected:
   }
 };
 
-
 class MCJITTestBase : public MCJITTestAPICommon, public TrivialModuleBuilder {
 protected:
 
@@ -348,4 +344,4 @@ protected:
 
 } // namespace llvm
 
-#endif
+#endif // LLVM_UNITTESTS_EXECUTIONENGINE_MCJIT_MCJITTESTBASE_H
diff --git a/unittests/ExecutionEngine/Orc/CMakeLists.txt b/unittests/ExecutionEngine/Orc/CMakeLists.txt
index 30bd19fa3d23..74cc5b57015c 100644
--- a/unittests/ExecutionEngine/Orc/CMakeLists.txt
+++ b/unittests/ExecutionEngine/Orc/CMakeLists.txt
@@ -1,12 +1,21 @@
+
 set(LLVM_LINK_COMPONENTS
   Core
+  ExecutionEngine
+  Object
   OrcJIT
+  RuntimeDyld
   Support
+  native
   )
 
 add_llvm_unittest(OrcJITTests
+  CompileOnDemandLayerTest.cpp
   IndirectionUtilsTest.cpp
+  GlobalMappingLayerTest.cpp
   LazyEmittingLayerTest.cpp
+  ObjectLinkingLayerTest.cpp
   ObjectTransformLayerTest.cpp
+  OrcCAPITest.cpp
   OrcTestCommon.cpp
   )
diff --git a/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
new file mode 100644
index 000000000000..a27e649b616f
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/CompileOnDemandLayerTest.cpp
@@ -0,0 +1,75 @@
+//===----- CompileOnDemandLayerTest.cpp - Unit tests for the COD layer ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcTestCommon.h"
+#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+class DummyCallbackManager : public orc::JITCompileCallbackManager {
+public:
+  DummyCallbackManager() : JITCompileCallbackManager(0) { }
+public:
+  void grow() override { llvm_unreachable("not implemented"); }
+};
+
+class DummyStubsManager : public orc::IndirectStubsManager {
+public:
+  std::error_code createStub(StringRef StubName, TargetAddress InitAddr,
+                             JITSymbolFlags Flags) override {
+    llvm_unreachable("Not implemented");
+  }
+
+  std::error_code createStubs(const StubInitsMap &StubInits) override {
+    llvm_unreachable("Not implemented");
+  }
+
+  JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+    llvm_unreachable("Not implemented");
+  }
+
+  JITSymbol findPointer(StringRef Name) override {
+    llvm_unreachable("Not implemented");
+  }
+
+  std::error_code updatePointer(StringRef Name,
+                                TargetAddress NewAddr) override {
+    llvm_unreachable("Not implemented");
+  }
+};
+
+TEST(CompileOnDemandLayerTest, FindSymbol) {
+  auto MockBaseLayer =
+    createMockBaseLayer<int>(DoNothingAndReturn<int>(0),
+                             DoNothingAndReturn<void>(),
+                             [](const std::string &Name, bool) {
+                               if (Name == "foo")
+                                 return JITSymbol(1, JITSymbolFlags::Exported);
+                               return JITSymbol(nullptr);
+                             },
+                             DoNothingAndReturn<JITSymbol>(nullptr));
+
+  typedef decltype(MockBaseLayer) MockBaseLayerT;
+  DummyCallbackManager CallbackMgr;
+
+  llvm::orc::CompileOnDemandLayer<MockBaseLayerT> COD(
+      MockBaseLayer, [](Function &F) { return std::set<Function *>{&F}; },
+      CallbackMgr, [] { return llvm::make_unique<DummyStubsManager>(); }, true);
+
+  auto Sym = COD.findSymbol("foo", true);
+
+  EXPECT_TRUE(!!Sym)
+    << "CompileOnDemand::findSymbol should call findSymbol in the base layer.";
+}
+
+}
diff --git a/unittests/ExecutionEngine/Orc/GlobalMappingLayerTest.cpp b/unittests/ExecutionEngine/Orc/GlobalMappingLayerTest.cpp
new file mode 100644
index 000000000000..054fc16cabd4
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/GlobalMappingLayerTest.cpp
@@ -0,0 +1,55 @@
+//===--- GlobalMappingLayerTest.cpp - Unit test the global mapping layer --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/GlobalMappingLayer.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+struct MockBaseLayer {
+
+  typedef int ModuleSetHandleT;
+
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    if (Name == "bar")
+      return llvm::orc::JITSymbol(0x4567, JITSymbolFlags::Exported);
+    return nullptr;
+  }
+
+};
+
+TEST(GlobalMappingLayerTest, Empty) {
+  MockBaseLayer M;
+  GlobalMappingLayer<MockBaseLayer> L(M);
+
+  // Test fall-through for missing symbol.
+  auto FooSym = L.findSymbol("foo", true);
+  EXPECT_FALSE(FooSym) << "Found unexpected symbol.";
+
+  // Test fall-through for symbol in base layer.
+  auto BarSym = L.findSymbol("bar", true);
+  EXPECT_EQ(BarSym.getAddress(), static_cast<TargetAddress>(0x4567))
+    << "Symbol lookup fall-through failed.";
+
+  // Test setup of a global mapping.
+  L.setGlobalMapping("foo", 0x0123);
+  auto FooSym2 = L.findSymbol("foo", true);
+  EXPECT_EQ(FooSym2.getAddress(), static_cast<TargetAddress>(0x0123))
+    << "Symbol mapping setup failed.";
+
+  // Test removal of a global mapping.
+  L.eraseGlobalMapping("foo");
+  auto FooSym3 = L.findSymbol("foo", true);
+  EXPECT_FALSE(FooSym3) << "Symbol mapping removal failed.";
+}
+
+}
diff --git a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
index 1a533b05838d..38b60ea7fcd4 100644
--- a/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
+++ b/unittests/ExecutionEngine/Orc/IndirectionUtilsTest.cpp
@@ -18,7 +18,7 @@ namespace {
 
 TEST(IndirectionUtilsTest, MakeStub) {
   ModuleBuilder MB(getGlobalContext(), "x86_64-apple-macosx10.10", "");
-  Function *F = MB.createFunctionDecl<void(DummyStruct, DummyStruct)>(MB.getModule(), "");
+  Function *F = MB.createFunctionDecl<void(DummyStruct, DummyStruct)>("");
   SmallVector<AttributeSet, 4> Attrs;
   Attrs.push_back(
     AttributeSet::get(MB.getModule()->getContext(), 1U,
diff --git a/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
new file mode 100644
index 000000000000..a37177c5d1db
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
@@ -0,0 +1,94 @@
+//===-- ObjectLinkingLayerTest.cpp - Unit tests for object linking layer --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/LLVMContext.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+TEST(ObjectLinkingLayerTest, TestSetProcessAllSections) {
+
+  class SectionMemoryManagerWrapper : public SectionMemoryManager {
+  public:
+    SectionMemoryManagerWrapper(bool &DebugSeen) : DebugSeen(DebugSeen) {}
+    uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                 unsigned SectionID,
+                                 StringRef SectionName,
+                                 bool IsReadOnly) override {
+      if (SectionName == ".debug_str")
+        DebugSeen = true;
+      return SectionMemoryManager::allocateDataSection(Size, Alignment,
+                                                         SectionID,
+                                                         SectionName,
+                                                         IsReadOnly);
+    }
+  private:
+    bool DebugSeen;
+  };
+
+  ObjectLinkingLayer<> ObjLayer;
+
+  auto M = llvm::make_unique<Module>("", getGlobalContext());
+  M->setTargetTriple("x86_64-unknown-linux-gnu");
+  Type *Int32Ty = IntegerType::get(getGlobalContext(), 32);
+  GlobalVariable *GV =
+    new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
+                         ConstantInt::get(Int32Ty, 42), "foo");
+
+  GV->setSection(".debug_str");
+
+  std::unique_ptr<TargetMachine> TM(
+    EngineBuilder().selectTarget(Triple(M->getTargetTriple()), "", "",
+                                 SmallVector<std::string, 1>()));
+  if (!TM)
+    return;
+
+  auto OwningObj = SimpleCompiler(*TM)(*M);
+  std::vector<object::ObjectFile*> Objs;
+  Objs.push_back(OwningObj.getBinary());
+
+  bool DebugSectionSeen = false;
+  SectionMemoryManagerWrapper SMMW(DebugSectionSeen);
+  auto Resolver =
+    createLambdaResolver(
+      [](const std::string &Name) {
+        return RuntimeDyld::SymbolInfo(nullptr);
+      },
+      [](const std::string &Name) {
+        return RuntimeDyld::SymbolInfo(nullptr);
+      });
+
+  {
+    // Test with ProcessAllSections = false (the default).
+    auto H = ObjLayer.addObjectSet(Objs, &SMMW, &*Resolver);
+    EXPECT_EQ(DebugSectionSeen, false)
+      << "Unexpected debug info section";
+    ObjLayer.removeObjectSet(H);
+  }
+
+  {
+    // Test with ProcessAllSections = true.
+    ObjLayer.setProcessAllSections(true);
+    auto H = ObjLayer.addObjectSet(Objs, &SMMW, &*Resolver);
+    EXPECT_EQ(DebugSectionSeen, true)
+      << "Expected debug info section not seen";
+    ObjLayer.removeObjectSet(H);
+  }
+}
+
+}
diff --git a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
index 41b2307cadd8..c88c94f17b1c 100644
--- a/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/ObjectTransformLayerTest.cpp
@@ -157,21 +157,6 @@ public:
     resetExpectations();
   }
 
-  template <typename OwningMBSet>
-  void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) {
-    EXPECT_EQ(MockObjSetHandle, H);
-    EXPECT_EQ(MockBufferSet, *MBs);
-    LastCalled = "takeOwnershipOfBuffers";
-  }
-  void expectTakeOwnershipOfBuffers(ObjSetHandleT H, MockMemoryBufferSet *MBs) {
-    MockObjSetHandle = H;
-    MockBufferSet = *MBs;
-  }
-  void verifyTakeOwnershipOfBuffers() {
-    EXPECT_EQ("takeOwnershipOfBuffers", LastCalled);
-    resetExpectations();
-  }
-
 private:
   // Backing fields for remembering parameter/return values
   std::string LastCalled;
@@ -275,18 +260,6 @@ TEST(ObjectTransformLayerTest, Main) {
   T1.mapSectionAddress(H, Buffer, MockAddress);
   M.verifyMapSectionAddress();
 
-  // Test takeOwnershipOfBuffers, using unique pointer to buffer set
-  auto MockBufferSetPtr = llvm::make_unique<MockMemoryBufferSet>(366);
-  M.expectTakeOwnershipOfBuffers(H, MockBufferSetPtr.get());
-  T2.takeOwnershipOfBuffers(H, std::move(MockBufferSetPtr));
-  M.verifyTakeOwnershipOfBuffers();
-
-  // Test takeOwnershipOfBuffers, using naked pointer to buffer set
-  MockMemoryBufferSet MockBufferSet = 266;
-  M.expectTakeOwnershipOfBuffers(H, &MockBufferSet);
-  T1.takeOwnershipOfBuffers(H, &MockBufferSet);
-  M.verifyTakeOwnershipOfBuffers();
-
   // Verify transform getter (non-const)
   MockObjectFile Mutatee = 277;
   MockObjectFile *Out = T2.getTransform()(&Mutatee);
diff --git a/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp b/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
new file mode 100644
index 000000000000..776d26970a31
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/OrcCAPITest.cpp
@@ -0,0 +1,160 @@
+//===--------------- OrcCAPITest.cpp - Unit tests Orc C API ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OrcTestCommon.h"
+#include "gtest/gtest.h"
+#include "llvm-c/Core.h"
+#include "llvm-c/OrcBindings.h"
+#include "llvm-c/Target.h"
+#include "llvm-c/TargetMachine.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+namespace llvm {
+
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)
+
+class OrcCAPIExecutionTest : public testing::Test, public OrcExecutionTest {
+protected:
+  std::unique_ptr<Module> createTestModule(const Triple &TT) {
+    ModuleBuilder MB(getGlobalContext(), TT.str(), "");
+    Function *TestFunc = MB.createFunctionDecl<int()>("testFunc");
+    Function *Main = MB.createFunctionDecl<int(int, char*[])>("main");
+
+    Main->getBasicBlockList().push_back(BasicBlock::Create(getGlobalContext()));
+    IRBuilder<> B(&Main->back());
+    Value* Result = B.CreateCall(TestFunc);
+    B.CreateRet(Result);
+
+    return MB.takeModule();
+  }
+
+  typedef int (*MainFnTy)();
+
+  static int myTestFuncImpl() {
+    return 42;
+  }
+
+  static char *testFuncName;
+
+  static uint64_t myResolver(const char *Name, void *Ctx) {
+    if (!strncmp(Name, testFuncName, 8))
+      return (uint64_t)&myTestFuncImpl;
+    return 0;
+  }
+
+  struct CompileContext {
+    CompileContext() : Compiled(false) { }
+
+    OrcCAPIExecutionTest* APIExecTest;
+    std::unique_ptr<Module> M;
+    LLVMOrcModuleHandle H;
+    bool Compiled;
+  };
+
+  static LLVMOrcTargetAddress myCompileCallback(LLVMOrcJITStackRef JITStack,
+                                                void *Ctx) {
+    CompileContext *CCtx = static_cast<CompileContext*>(Ctx);
+    auto *ET = CCtx->APIExecTest;
+    CCtx->M = ET->createTestModule(ET->TM->getTargetTriple());
+    CCtx->H = LLVMOrcAddEagerlyCompiledIR(JITStack, wrap(CCtx->M.get()),
+                                          myResolver, nullptr);
+    CCtx->Compiled = true;
+    LLVMOrcTargetAddress MainAddr = LLVMOrcGetSymbolAddress(JITStack, "main");
+    LLVMOrcSetIndirectStubPointer(JITStack, "foo", MainAddr);
+    return MainAddr;
+  }
+};
+
+char *OrcCAPIExecutionTest::testFuncName = nullptr;
+
+TEST_F(OrcCAPIExecutionTest, TestEagerIRCompilation) {
+  if (!TM)
+    return;
+
+  LLVMOrcJITStackRef JIT =
+    LLVMOrcCreateInstance(wrap(TM.get()));
+
+  std::unique_ptr<Module> M = createTestModule(TM->getTargetTriple());
+
+  LLVMOrcGetMangledSymbol(JIT, &testFuncName, "testFunc");
+
+  LLVMOrcModuleHandle H =
+    LLVMOrcAddEagerlyCompiledIR(JIT, wrap(M.get()), myResolver, nullptr);
+  MainFnTy MainFn = (MainFnTy)LLVMOrcGetSymbolAddress(JIT, "main");
+  int Result = MainFn();
+  EXPECT_EQ(Result, 42)
+    << "Eagerly JIT'd code did not return expected result";
+
+  LLVMOrcRemoveModule(JIT, H);
+
+  LLVMOrcDisposeMangledSymbol(testFuncName);
+  LLVMOrcDisposeInstance(JIT);
+}
+
+TEST_F(OrcCAPIExecutionTest, TestLazyIRCompilation) {
+  if (!TM)
+    return;
+
+  LLVMOrcJITStackRef JIT =
+    LLVMOrcCreateInstance(wrap(TM.get()));
+
+  std::unique_ptr<Module> M = createTestModule(TM->getTargetTriple());
+
+  LLVMOrcGetMangledSymbol(JIT, &testFuncName, "testFunc");
+
+  LLVMOrcModuleHandle H =
+    LLVMOrcAddLazilyCompiledIR(JIT, wrap(M.get()), myResolver, nullptr);
+  MainFnTy MainFn = (MainFnTy)LLVMOrcGetSymbolAddress(JIT, "main");
+  int Result = MainFn();
+  EXPECT_EQ(Result, 42)
+    << "Lazily JIT'd code did not return expected result";
+
+  LLVMOrcRemoveModule(JIT, H);
+
+  LLVMOrcDisposeMangledSymbol(testFuncName);
+  LLVMOrcDisposeInstance(JIT);
+}
+
+TEST_F(OrcCAPIExecutionTest, TestDirectCallbacksAPI) {
+  if (!TM)
+    return;
+
+  LLVMOrcJITStackRef JIT =
+    LLVMOrcCreateInstance(wrap(TM.get()));
+
+  LLVMOrcGetMangledSymbol(JIT, &testFuncName, "testFunc");
+
+  CompileContext C;
+  C.APIExecTest = this;
+  LLVMOrcCreateIndirectStub(JIT, "foo",
+                            LLVMOrcCreateLazyCompileCallback(JIT,
+                                                             myCompileCallback,
+                                                             &C));
+  MainFnTy FooFn = (MainFnTy)LLVMOrcGetSymbolAddress(JIT, "foo");
+  int Result = FooFn();
+  EXPECT_TRUE(C.Compiled)
+    << "Function wasn't lazily compiled";
+  EXPECT_EQ(Result, 42)
+    << "Direct-callback JIT'd code did not return expected result";
+
+  C.Compiled = false;
+  FooFn();
+  EXPECT_FALSE(C.Compiled)
+    << "Direct-callback JIT'd code was JIT'd twice";
+
+  LLVMOrcRemoveModule(JIT, C.H);
+
+  LLVMOrcDisposeMangledSymbol(testFuncName);
+  LLVMOrcDisposeInstance(JIT);
+}
+
+} // namespace llvm
diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp b/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
index 5fea3c89f86c..1b5485d3b33b 100644
--- a/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
+++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
@@ -15,6 +15,8 @@
 
 using namespace llvm;
 
+bool OrcExecutionTest::NativeTargetInitialized = false;
+
 ModuleBuilder::ModuleBuilder(LLVMContext &Context, StringRef Triple,
                              StringRef Name)
   : M(new Module(Name, Context)),
diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
index 1b2859b51d09..15d9f54a37fc 100644
--- a/unittests/ExecutionEngine/Orc/OrcTestCommon.h
+++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
@@ -20,46 +20,159 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/TypeBuilder.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/Orc/JITSymbol.h"
+#include "llvm/Support/TargetSelect.h"
 #include <memory>
 
 namespace llvm {
 
-  class ModuleBuilder {
-  public:
-    ModuleBuilder(LLVMContext &Context, StringRef Triple,
-                  StringRef Name);
+// Base class for Orc tests that will execute code.
+class OrcExecutionTest {
+public:
 
-    template <typename FuncType>
-    Function* createFunctionDecl(Module *M, StringRef Name) {
-      return Function::Create(
-               TypeBuilder<FuncType, false>::get(M->getContext()),
-               GlobalValue::ExternalLinkage, Name, M);
+  OrcExecutionTest() {
+    if (!NativeTargetInitialized) {
+      InitializeNativeTarget();
+      InitializeNativeTargetAsmParser();
+      InitializeNativeTargetAsmPrinter();
+      NativeTargetInitialized = true;
     }
 
-    Module* getModule() { return M.get(); }
-    const Module* getModule() const { return M.get(); }
-    std::unique_ptr<Module> takeModule() { return std::move(M); }
+    // Try to select a TargetMachine for the host.
+    TM.reset(EngineBuilder().selectTarget());
 
-  private:
-    std::unique_ptr<Module> M;
-    IRBuilder<> Builder;
-  };
-
-  // Dummy struct type.
-  struct DummyStruct {
-    int X[256];
-  };
-
-  // TypeBuilder specialization for DummyStruct.
-  template <bool XCompile>
-  class TypeBuilder<DummyStruct, XCompile> {
-  public:
-    static StructType *get(LLVMContext &Context) {
-      return StructType::get(
-          TypeBuilder<types::i<32>[256], XCompile>::get(Context), nullptr);
+    if (TM) {
+      // If we found a TargetMachine, check that it's one that Orc supports.
+      const Triple& TT = TM->getTargetTriple();
+      if (TT.getArch() != Triple::x86_64 || !TT.isOSDarwin())
+        TM = nullptr;
     }
   };
 
+protected:
+  std::unique_ptr<TargetMachine> TM;
+private:
+  static bool NativeTargetInitialized;
+};
+
+class ModuleBuilder {
+public:
+  ModuleBuilder(LLVMContext &Context, StringRef Triple,
+                StringRef Name);
+
+  template <typename FuncType>
+  Function* createFunctionDecl(StringRef Name) {
+    return Function::Create(
+             TypeBuilder<FuncType, false>::get(M->getContext()),
+             GlobalValue::ExternalLinkage, Name, M.get());
+  }
+
+  Module* getModule() { return M.get(); }
+  const Module* getModule() const { return M.get(); }
+  std::unique_ptr<Module> takeModule() { return std::move(M); }
+
+private:
+  std::unique_ptr<Module> M;
+  IRBuilder<> Builder;
+};
+
+// Dummy struct type.
+struct DummyStruct {
+  int X[256];
+};
+
+// TypeBuilder specialization for DummyStruct.
+template <bool XCompile>
+class TypeBuilder<DummyStruct, XCompile> {
+public:
+  static StructType *get(LLVMContext &Context) {
+    return StructType::get(
+      TypeBuilder<types::i<32>[256], XCompile>::get(Context), nullptr);
+  }
+};
+
+template <typename HandleT,
+          typename AddModuleSetFtor,
+          typename RemoveModuleSetFtor,
+          typename FindSymbolFtor,
+          typename FindSymbolInFtor>
+class MockBaseLayer {
+public:
+
+  typedef HandleT ModuleSetHandleT;
+
+  MockBaseLayer(AddModuleSetFtor &&AddModuleSet,
+                RemoveModuleSetFtor &&RemoveModuleSet,
+                FindSymbolFtor &&FindSymbol,
+                FindSymbolInFtor &&FindSymbolIn)
+      : AddModuleSet(AddModuleSet), RemoveModuleSet(RemoveModuleSet),
+        FindSymbol(FindSymbol), FindSymbolIn(FindSymbolIn)
+  {}
+
+  template <typename ModuleSetT, typename MemoryManagerPtrT,
+            typename SymbolResolverPtrT>
+  ModuleSetHandleT addModuleSet(ModuleSetT Ms, MemoryManagerPtrT MemMgr,
+                                SymbolResolverPtrT Resolver) {
+    return AddModuleSet(std::move(Ms), std::move(MemMgr), std::move(Resolver));
+  }
+
+  void removeModuleSet(ModuleSetHandleT H) {
+    RemoveModuleSet(H);
+  }
+
+  orc::JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+    return FindSymbol(Name, ExportedSymbolsOnly);
+  }
+
+  orc::JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+    return FindSymbolIn(H, Name, ExportedSymbolsOnly);
+  }
+
+private:
+  AddModuleSetFtor AddModuleSet;
+  RemoveModuleSetFtor RemoveModuleSet;
+  FindSymbolFtor FindSymbol;
+  FindSymbolInFtor FindSymbolIn;
+};
+
+template <typename ModuleSetHandleT,
+          typename AddModuleSetFtor,
+          typename RemoveModuleSetFtor,
+          typename FindSymbolFtor,
+          typename FindSymbolInFtor>
+MockBaseLayer<ModuleSetHandleT, AddModuleSetFtor, RemoveModuleSetFtor,
+              FindSymbolFtor, FindSymbolInFtor>
+createMockBaseLayer(AddModuleSetFtor &&AddModuleSet,
+                    RemoveModuleSetFtor &&RemoveModuleSet,
+                    FindSymbolFtor &&FindSymbol,
+                    FindSymbolInFtor &&FindSymbolIn) {
+  return MockBaseLayer<ModuleSetHandleT, AddModuleSetFtor, RemoveModuleSetFtor,
+                       FindSymbolFtor, FindSymbolInFtor>(
+                         std::forward<AddModuleSetFtor>(AddModuleSet),
+                         std::forward<RemoveModuleSetFtor>(RemoveModuleSet),
+                         std::forward<FindSymbolFtor>(FindSymbol),
+                         std::forward<FindSymbolInFtor>(FindSymbolIn));
+}
+
+template <typename ReturnT>
+class DoNothingAndReturn {
+public:
+  DoNothingAndReturn(ReturnT Val) : Val(Val) {}
+
+  template <typename... Args>
+  ReturnT operator()(Args...) const { return Val; }
+private:
+  ReturnT Val;
+};
+
+template <>
+class DoNothingAndReturn<void> {
+public:
+  template <typename... Args>
+  void operator()(Args...) const { }
+};
 
 } // namespace llvm
 
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
index 0c2979632e10..e5a0fc981075 100644
--- a/unittests/IR/CMakeLists.txt
+++ b/unittests/IR/CMakeLists.txt
@@ -2,7 +2,6 @@ set(LLVM_LINK_COMPONENTS
   Analysis
   AsmParser
   Core
-  IPA
   Support
   )
 
diff --git a/unittests/IR/ConstantRangeTest.cpp b/unittests/IR/ConstantRangeTest.cpp
index de4eec4abf5d..1f32eea3e43b 100644
--- a/unittests/IR/ConstantRangeTest.cpp
+++ b/unittests/IR/ConstantRangeTest.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -568,4 +569,55 @@ TEST(ConstantRange, MakeSatisfyingICmpRegion) {
       ConstantRange(APInt(8, 4), APInt(8, -128)));
 }
 
+TEST(ConstantRange, MakeOverflowingRegion) {
+  const int IntMin4Bits = 8;
+  const int IntMax4Bits = 7;
+  typedef OverflowingBinaryOperator OBO;
+
+  for (int Const : {0, -1, -2, 1, 2, IntMin4Bits, IntMax4Bits}) {
+    APInt C(4, Const, true /* = isSigned */);
+
+    auto NUWRegion =
+      ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoUnsignedWrap);
+
+    EXPECT_FALSE(NUWRegion.isEmptySet());
+
+    auto NSWRegion =
+      ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap);
+
+    EXPECT_FALSE(NSWRegion.isEmptySet());
+
+    auto NoWrapRegion = ConstantRange::makeNoWrapRegion(
+        Instruction::Add, C, OBO::NoSignedWrap | OBO::NoUnsignedWrap);
+
+    EXPECT_FALSE(NoWrapRegion.isEmptySet());
+    EXPECT_TRUE(NUWRegion.intersectWith(NSWRegion).contains(NoWrapRegion));
+
+    for (APInt I = NUWRegion.getLower(), E = NUWRegion.getUpper(); I != E;
+         ++I) {
+      bool Overflow = false;
+      I.uadd_ov(C, Overflow);
+      EXPECT_FALSE(Overflow);
+    }
+
+    for (APInt I = NSWRegion.getLower(), E = NSWRegion.getUpper(); I != E;
+         ++I) {
+      bool Overflow = false;
+      I.sadd_ov(C, Overflow);
+      EXPECT_FALSE(Overflow);
+    }
+
+    for (APInt I = NoWrapRegion.getLower(), E = NoWrapRegion.getUpper(); I != E;
+         ++I) {
+      bool Overflow = false;
+
+      I.sadd_ov(C, Overflow);
+      EXPECT_FALSE(Overflow);
+
+      I.uadd_ov(C, Overflow);
+      EXPECT_FALSE(Overflow);
+    }
+  }
+}
+
 }  // anonymous namespace
diff --git a/unittests/IR/ConstantsTest.cpp b/unittests/IR/ConstantsTest.cpp
index 2df008a78ccd..7471584097dd 100644
--- a/unittests/IR/ConstantsTest.cpp
+++ b/unittests/IR/ConstantsTest.cpp
@@ -15,6 +15,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Core.h"
 #include "gtest/gtest.h"
 
 namespace llvm {
@@ -348,7 +349,7 @@ TEST(ConstantsTest, GEPReplaceWithConstant) {
   std::unique_ptr<Module> M(new Module("MyModule", Context));
 
   Type *IntTy = Type::getInt32Ty(Context);
-  auto *PtrTy = PointerType::get(IntTy, 0);
+  Type *PtrTy = PointerType::get(IntTy, 0);
   auto *C1 = ConstantInt::get(IntTy, 1);
   auto *Placeholder = new GlobalVariable(
       *M, IntTy, false, GlobalValue::ExternalWeakLinkage, nullptr);
@@ -361,7 +362,7 @@ TEST(ConstantsTest, GEPReplaceWithConstant) {
 
   auto *Global = new GlobalVariable(*M, PtrTy, false,
                                     GlobalValue::ExternalLinkage, nullptr);
-  auto *Alias = GlobalAlias::create(PtrTy, GlobalValue::ExternalLinkage,
+  auto *Alias = GlobalAlias::create(IntTy, 0, GlobalValue::ExternalLinkage,
                                     "alias", Global, M.get());
   Placeholder->replaceAllUsesWith(Alias);
   ASSERT_EQ(GEP, Ref->getInitializer());
@@ -382,5 +383,73 @@ TEST(ConstantsTest, AliasCAPI) {
   ASSERT_EQ(unwrap<GlobalAlias>(AliasRef)->getAliasee(), Aliasee);
 }
 
+static std::string getNameOfType(Type *T) {
+  std::string S;
+  raw_string_ostream RSOS(S);
+  T->print(RSOS);
+  return S;
+}
+
+TEST(ConstantsTest, BuildConstantDataArrays) {
+  LLVMContext Context;
+  std::unique_ptr<Module> M(new Module("MyModule", Context));
+
+  for (Type *T : {Type::getInt8Ty(Context), Type::getInt16Ty(Context),
+                  Type::getInt32Ty(Context), Type::getInt64Ty(Context)}) {
+    ArrayType *ArrayTy = ArrayType::get(T, 2);
+    Constant *Vals[] = {ConstantInt::get(T, 0), ConstantInt::get(T, 1)};
+    Constant *CDV = ConstantArray::get(ArrayTy, Vals);
+    ASSERT_TRUE(dyn_cast<ConstantDataArray>(CDV) != nullptr)
+        << " T = " << getNameOfType(T);
+  }
+
+  for (Type *T : {Type::getHalfTy(Context), Type::getFloatTy(Context),
+                  Type::getDoubleTy(Context)}) {
+    ArrayType *ArrayTy = ArrayType::get(T, 2);
+    Constant *Vals[] = {ConstantFP::get(T, 0), ConstantFP::get(T, 1)};
+    Constant *CDV = ConstantArray::get(ArrayTy, Vals);
+    ASSERT_TRUE(dyn_cast<ConstantDataArray>(CDV) != nullptr)
+        << " T = " << getNameOfType(T);
+  }
+}
+
+TEST(ConstantsTest, BuildConstantDataVectors) {
+  LLVMContext Context;
+  std::unique_ptr<Module> M(new Module("MyModule", Context));
+
+  for (Type *T : {Type::getInt8Ty(Context), Type::getInt16Ty(Context),
+                  Type::getInt32Ty(Context), Type::getInt64Ty(Context)}) {
+    Constant *Vals[] = {ConstantInt::get(T, 0), ConstantInt::get(T, 1)};
+    Constant *CDV = ConstantVector::get(Vals);
+    ASSERT_TRUE(dyn_cast<ConstantDataVector>(CDV) != nullptr)
+        << " T = " << getNameOfType(T);
+  }
+
+  for (Type *T : {Type::getHalfTy(Context), Type::getFloatTy(Context),
+                  Type::getDoubleTy(Context)}) {
+    Constant *Vals[] = {ConstantFP::get(T, 0), ConstantFP::get(T, 1)};
+    Constant *CDV = ConstantVector::get(Vals);
+    ASSERT_TRUE(dyn_cast<ConstantDataVector>(CDV) != nullptr)
+        << " T = " << getNameOfType(T);
+  }
+}
+
+TEST(ConstantsTest, BitcastToGEP) {
+  LLVMContext Context;
+  std::unique_ptr<Module> M(new Module("MyModule", Context));
+
+  auto *i32 = Type::getInt32Ty(Context);
+  auto *U = StructType::create(Context, "Unsized");
+  Type *EltTys[] = {i32, U};
+  auto *S = StructType::create(EltTys);
+
+  auto *G = new GlobalVariable(*M, S, false,
+                               GlobalValue::ExternalLinkage, nullptr);
+  auto *PtrTy = PointerType::get(i32, 0);
+  auto *C = ConstantExpr::getBitCast(G, PtrTy);
+  ASSERT_EQ(dyn_cast<ConstantExpr>(C)->getOpcode(),
+            Instruction::BitCast);
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm
diff --git a/unittests/IR/DominatorTreeTest.cpp b/unittests/IR/DominatorTreeTest.cpp
index 146ec576dba6..3aef4d64cbc2 100644
--- a/unittests/IR/DominatorTreeTest.cpp
+++ b/unittests/IR/DominatorTreeTest.cpp
@@ -32,29 +32,29 @@ namespace llvm {
         PostDominatorTree *PDT = &getAnalysis<PostDominatorTree>();
         Function::iterator FI = F.begin();
 
-        BasicBlock *BB0 = FI++;
+        BasicBlock *BB0 = &*FI++;
         BasicBlock::iterator BBI = BB0->begin();
-        Instruction *Y1 = BBI++;
-        Instruction *Y2 = BBI++;
-        Instruction *Y3 = BBI++;
+        Instruction *Y1 = &*BBI++;
+        Instruction *Y2 = &*BBI++;
+        Instruction *Y3 = &*BBI++;
 
-        BasicBlock *BB1 = FI++;
+        BasicBlock *BB1 = &*FI++;
         BBI = BB1->begin();
-        Instruction *Y4 = BBI++;
+        Instruction *Y4 = &*BBI++;
 
-        BasicBlock *BB2 = FI++;
+        BasicBlock *BB2 = &*FI++;
         BBI = BB2->begin();
-        Instruction *Y5 = BBI++;
+        Instruction *Y5 = &*BBI++;
 
-        BasicBlock *BB3 = FI++;
+        BasicBlock *BB3 = &*FI++;
         BBI = BB3->begin();
-        Instruction *Y6 = BBI++;
-        Instruction *Y7 = BBI++;
+        Instruction *Y6 = &*BBI++;
+        Instruction *Y7 = &*BBI++;
 
-        BasicBlock *BB4 = FI++;
+        BasicBlock *BB4 = &*FI++;
         BBI = BB4->begin();
-        Instruction *Y8 = BBI++;
-        Instruction *Y9 = BBI++;
+        Instruction *Y8 = &*BBI++;
+        Instruction *Y9 = &*BBI++;
 
         // Reachability
         EXPECT_TRUE(DT->isReachableFromEntry(BB0));
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index 093cbbfc7790..e0da018d7bfe 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -131,7 +131,7 @@ TEST_F(IRBuilderTest, GetIntTy) {
 TEST_F(IRBuilderTest, FastMathFlags) {
   IRBuilder<> Builder(BB);
   Value *F, *FC;
-  Instruction *FDiv, *FAdd, *FCmp;
+  Instruction *FDiv, *FAdd, *FCmp, *FCall;
 
   F = Builder.CreateLoad(GV);
   F = Builder.CreateFAdd(F, F);
@@ -206,6 +206,26 @@ TEST_F(IRBuilderTest, FastMathFlags) {
   FCmp = cast<Instruction>(FC);
   EXPECT_TRUE(FCmp->hasAllowReciprocal());
 
+  Builder.clearFastMathFlags();
+ 
+  // Test a call with FMF.
+  auto CalleeTy = FunctionType::get(Type::getFloatTy(Ctx),
+                                    /*isVarArg=*/false);
+  auto Callee =
+      Function::Create(CalleeTy, Function::ExternalLinkage, "", M.get());
+
+  FCall = Builder.CreateCall(Callee, None);
+  EXPECT_FALSE(FCall->hasNoNaNs());
+
+  FMF.clear();
+  FMF.setNoNaNs();
+  Builder.SetFastMathFlags(FMF);
+
+  FCall = Builder.CreateCall(Callee, None);
+  EXPECT_TRUE(Builder.getFastMathFlags().any());
+  EXPECT_TRUE(Builder.getFastMathFlags().NoNaNs);
+  EXPECT_TRUE(FCall->hasNoNaNs());
+
   Builder.clearFastMathFlags();
 
   // To test a copy, make sure that a '0' and a '1' change state. 
@@ -299,7 +319,7 @@ TEST_F(IRBuilderTest, RAIIHelpersTest) {
   {
     IRBuilder<>::InsertPointGuard Guard(Builder);
     Builder.SetInsertPoint(cast<Instruction>(F));
-    EXPECT_EQ(F, Builder.GetInsertPoint());
+    EXPECT_EQ(F, &*Builder.GetInsertPoint());
   }
 
   EXPECT_EQ(BB->end(), Builder.GetInsertPoint());
@@ -312,11 +332,13 @@ TEST_F(IRBuilderTest, DIBuilder) {
   auto File = DIB.createFile("F.CBL", "/");
   auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, "F.CBL", "/",
                                   "llvm-cobol74", true, "", 0);
-  auto Type = DIB.createSubroutineType(File, DIB.getOrCreateTypeArray(None));
-  DIB.createFunction(CU, "foo", "", File, 1, Type, false, true, 1, 0, true, F);
+  auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+  auto SP =
+      DIB.createFunction(CU, "foo", "", File, 1, Type, false, true, 1, 0, true);
+  F->setSubprogram(SP);
   AllocaInst *I = Builder.CreateAlloca(Builder.getInt8Ty());
-  auto BarSP = DIB.createFunction(CU, "bar", "", File, 1, Type, false, true, 1,
-                                  0, true, nullptr);
+  auto BarSP =
+      DIB.createFunction(CU, "bar", "", File, 1, Type, false, true, 1, 0, true);
   auto BadScope = DIB.createLexicalBlockFile(BarSP, File, 0);
   I->setDebugLoc(DebugLoc::get(2, 0, BadScope));
   DIB.finalize();
@@ -362,7 +384,7 @@ TEST_F(IRBuilderTest, DebugLoc) {
   auto File = DIB.createFile("tmp.cpp", "/");
   auto CU = DIB.createCompileUnit(dwarf::DW_LANG_C_plus_plus_11, "tmp.cpp", "/",
                                   "", true, "", 0);
-  auto SPType = DIB.createSubroutineType(File, DIB.getOrCreateTypeArray(None));
+  auto SPType = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
   auto SP =
       DIB.createFunction(CU, "foo", "foo", File, 1, SPType, false, true, 1);
   DebugLoc DL1 = DILocation::get(Ctx, 2, 0, SP);
@@ -379,7 +401,7 @@ TEST_F(IRBuilderTest, DebugLoc) {
   EXPECT_EQ(DL1, Call1->getDebugLoc());
 
   Call1->setDebugLoc(DL2);
-  Builder.SetInsertPoint(Call1->getParent(), Call1);
+  Builder.SetInsertPoint(Call1->getParent(), Call1->getIterator());
   EXPECT_EQ(DL2, Builder.getCurrentDebugLocation());
   auto Call2 = Builder.CreateCall(Callee, None);
   EXPECT_EQ(DL2, Call2->getDebugLoc());
diff --git a/unittests/IR/LegacyPassManagerTest.cpp b/unittests/IR/LegacyPassManagerTest.cpp
index 66fd1ccd7f20..1f88283dc0ce 100644
--- a/unittests/IR/LegacyPassManagerTest.cpp
+++ b/unittests/IR/LegacyPassManagerTest.cpp
@@ -510,7 +510,7 @@ namespace llvm {
       // Function: test4 (func_test4)
       {
         Function::arg_iterator args = func_test4->arg_begin();
-        Value* int1_f = args++;
+        Value *int1_f = &*args++;
         int1_f->setName("f");
 
         BasicBlock* label_entry_11 = BasicBlock::Create(getGlobalContext(), "entry",func_test4,nullptr);
diff --git a/unittests/IR/Makefile b/unittests/IR/Makefile
index 7c59003c92a7..45aa8d68082e 100644
--- a/unittests/IR/Makefile
+++ b/unittests/IR/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TESTNAME = IR
-LINK_COMPONENTS := core ipa asmparser
+LINK_COMPONENTS := core analysis asmparser
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/IR/MetadataTest.cpp b/unittests/IR/MetadataTest.cpp
index b58615ce653b..257ab7204c6c 100644
--- a/unittests/IR/MetadataTest.cpp
+++ b/unittests/IR/MetadataTest.cpp
@@ -813,6 +813,14 @@ TEST_F(DILocationTest, getTemporary) {
   EXPECT_FALSE(L->isResolved());
 }
 
+TEST_F(DILocationTest, cloneTemporary) {
+  MDNode *N = MDNode::get(Context, None);
+  auto L = DILocation::getTemporary(Context, 2, 7, N);
+  EXPECT_TRUE(L->isTemporary());
+  auto L2 = L->clone();
+  EXPECT_TRUE(L2->isTemporary());
+}
+
 typedef MetadataTest GenericDINodeTest;
 
 TEST_F(GenericDINodeTest, get) {
@@ -1260,10 +1268,6 @@ TEST_F(DISubroutineTypeTest, get) {
   EXPECT_EQ(nullptr, N->getScope());
   EXPECT_EQ(nullptr, N->getFile());
   EXPECT_EQ("", N->getName());
-  EXPECT_EQ(nullptr, N->getBaseType());
-  EXPECT_EQ(nullptr, N->getVTableHolder());
-  EXPECT_EQ(nullptr, N->getTemplateParams().get());
-  EXPECT_EQ("", N->getIdentifier());
 }
 
 typedef MetadataTest DIFileTest;
@@ -1307,11 +1311,13 @@ TEST_F(DICompileUnitTest, get) {
   MDTuple *Subprograms = getTuple();
   MDTuple *GlobalVariables = getTuple();
   MDTuple *ImportedEntities = getTuple();
-  uint64_t DWOId = 0xc0ffee;
-  auto *N = DICompileUnit::get(
+  uint64_t DWOId = 0x10000000c0ffee;
+  MDTuple *Macros = getTuple();
+  auto *N = DICompileUnit::getDistinct(
       Context, SourceLanguage, File, Producer, IsOptimized, Flags,
       RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-      RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, DWOId);
+      RetainedTypes, Subprograms, GlobalVariables, ImportedEntities, Macros,
+      DWOId);
 
   EXPECT_EQ(dwarf::DW_TAG_compile_unit, N->getTag());
   EXPECT_EQ(SourceLanguage, N->getSourceLanguage());
@@ -1327,86 +1333,31 @@ TEST_F(DICompileUnitTest, get) {
   EXPECT_EQ(Subprograms, N->getSubprograms().get());
   EXPECT_EQ(GlobalVariables, N->getGlobalVariables().get());
   EXPECT_EQ(ImportedEntities, N->getImportedEntities().get());
+  EXPECT_EQ(Macros, N->getMacros().get());
   EXPECT_EQ(DWOId, N->getDWOId());
-  EXPECT_EQ(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage + 1, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, getFile(), Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, "other",
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  !IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, "other", RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion + 1,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion, "other",
-                                  EmissionKind, EnumTypes, RetainedTypes,
-                                  Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind + 1,
-                                  EnumTypes, RetainedTypes, Subprograms,
-                                  GlobalVariables, ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, getTuple(),
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  getTuple(), Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, getTuple(), GlobalVariables,
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, getTuple(),
-                                  ImportedEntities, DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  getTuple(), DWOId));
-  EXPECT_NE(N, DICompileUnit::get(Context, SourceLanguage, File, Producer,
-                                  IsOptimized, Flags, RuntimeVersion,
-                                  SplitDebugFilename, EmissionKind, EnumTypes,
-                                  RetainedTypes, Subprograms, GlobalVariables,
-                                  ImportedEntities, DWOId + 1));
 
   TempDICompileUnit Temp = N->clone();
-  EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
+  EXPECT_EQ(dwarf::DW_TAG_compile_unit, Temp->getTag());
+  EXPECT_EQ(SourceLanguage, Temp->getSourceLanguage());
+  EXPECT_EQ(File, Temp->getFile());
+  EXPECT_EQ(Producer, Temp->getProducer());
+  EXPECT_EQ(IsOptimized, Temp->isOptimized());
+  EXPECT_EQ(Flags, Temp->getFlags());
+  EXPECT_EQ(RuntimeVersion, Temp->getRuntimeVersion());
+  EXPECT_EQ(SplitDebugFilename, Temp->getSplitDebugFilename());
+  EXPECT_EQ(EmissionKind, Temp->getEmissionKind());
+  EXPECT_EQ(EnumTypes, Temp->getEnumTypes().get());
+  EXPECT_EQ(RetainedTypes, Temp->getRetainedTypes().get());
+  EXPECT_EQ(Subprograms, Temp->getSubprograms().get());
+  EXPECT_EQ(GlobalVariables, Temp->getGlobalVariables().get());
+  EXPECT_EQ(ImportedEntities, Temp->getImportedEntities().get());
+  EXPECT_EQ(Macros, Temp->getMacros().get());
+  EXPECT_EQ(DWOId, Temp->getDWOId());
+
+  auto *TempAddress = Temp.get();
+  auto *Clone = MDNode::replaceWithPermanent(std::move(Temp));
+  EXPECT_TRUE(Clone->isDistinct());
+  EXPECT_EQ(TempAddress, Clone);
 }
 
 TEST_F(DICompileUnitTest, replaceArrays) {
@@ -1422,10 +1373,10 @@ TEST_F(DICompileUnitTest, replaceArrays) {
   MDTuple *RetainedTypes = MDTuple::getDistinct(Context, None);
   MDTuple *ImportedEntities = MDTuple::getDistinct(Context, None);
   uint64_t DWOId = 0xc0ffee;
-  auto *N = DICompileUnit::get(
+  auto *N = DICompileUnit::getDistinct(
       Context, SourceLanguage, File, Producer, IsOptimized, Flags,
       RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes,
-      RetainedTypes, nullptr, nullptr, ImportedEntities, DWOId);
+      RetainedTypes, nullptr, nullptr, ImportedEntities, nullptr, DWOId);
 
   auto *Subprograms = MDTuple::getDistinct(Context, None);
   EXPECT_EQ(nullptr, N->getSubprograms().get());
@@ -1440,6 +1391,13 @@ TEST_F(DICompileUnitTest, replaceArrays) {
   EXPECT_EQ(GlobalVariables, N->getGlobalVariables().get());
   N->replaceGlobalVariables(nullptr);
   EXPECT_EQ(nullptr, N->getGlobalVariables().get());
+
+  auto *Macros = MDTuple::getDistinct(Context, None);
+  EXPECT_EQ(nullptr, N->getMacros().get());
+  N->replaceMacros(Macros);
+  EXPECT_EQ(Macros, N->getMacros().get());
+  N->replaceMacros(nullptr);
+  EXPECT_EQ(nullptr, N->getMacros().get());
 }
 
 typedef MetadataTest DISubprogramTest;
@@ -1459,7 +1417,6 @@ TEST_F(DISubprogramTest, get) {
   unsigned VirtualIndex = 5;
   unsigned Flags = 6;
   bool IsOptimized = false;
-  llvm::Function *Function = getFunction("foo");
   MDTuple *TemplateParams = getTuple();
   DISubprogram *Declaration = getSubprogram();
   MDTuple *Variables = getTuple();
@@ -1467,7 +1424,7 @@ TEST_F(DISubprogramTest, get) {
   auto *N = DISubprogram::get(
       Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
       IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags,
-      IsOptimized, Function, TemplateParams, Declaration, Variables);
+      IsOptimized, TemplateParams, Declaration, Variables);
 
   EXPECT_EQ(dwarf::DW_TAG_subprogram, N->getTag());
   EXPECT_EQ(Scope, N->getScope());
@@ -1484,147 +1441,105 @@ TEST_F(DISubprogramTest, get) {
   EXPECT_EQ(VirtualIndex, N->getVirtualIndex());
   EXPECT_EQ(Flags, N->getFlags());
   EXPECT_EQ(IsOptimized, N->isOptimized());
-  EXPECT_EQ(Function, N->getFunction());
   EXPECT_EQ(TemplateParams, N->getTemplateParams().get());
   EXPECT_EQ(Declaration, N->getDeclaration());
   EXPECT_EQ(Variables, N->getVariables().get());
   EXPECT_EQ(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
 
   EXPECT_NE(N, DISubprogram::get(Context, getCompositeType(), Name, LinkageName,
                                  File, Line, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
-                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 VirtualIndex, Flags, IsOptimized,
                                  TemplateParams, Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, "other", LinkageName, File,
                                  Line, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
-                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 VirtualIndex, Flags, IsOptimized,
                                  TemplateParams, Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, "other", File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, getFile(),
                                  Line, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
-                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 VirtualIndex, Flags, IsOptimized,
                                  TemplateParams, Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File,
                                  Line + 1, Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine, ContainingType, Virtuality,
-                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 VirtualIndex, Flags, IsOptimized,
+                                 TemplateParams, Declaration, Variables));
+  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
+                                 getSubroutineType(), IsLocalToUnit,
+                                 IsDefinition, ScopeLine, ContainingType,
+                                 Virtuality, VirtualIndex, Flags, IsOptimized,
                                  TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(
-                   Context, Scope, Name, LinkageName, File, Line,
-                   getSubroutineType(), IsLocalToUnit, IsDefinition, ScopeLine,
-                   ContainingType, Virtuality, VirtualIndex, Flags, IsOptimized,
-                   Function, TemplateParams, Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, !IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, !IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition,
                                  ScopeLine + 1, ContainingType, Virtuality,
-                                 VirtualIndex, Flags, IsOptimized, Function,
+                                 VirtualIndex, Flags, IsOptimized,
                                  TemplateParams, Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  getCompositeType(), Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality + 1, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex + 1,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 ~Flags, IsOptimized, Function, TemplateParams,
+                                 ~Flags, IsOptimized, TemplateParams,
                                  Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, !IsOptimized, Function, TemplateParams,
+                                 Flags, !IsOptimized, TemplateParams,
                                  Declaration, Variables));
+  EXPECT_NE(N,
+            DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
+                              Type, IsLocalToUnit, IsDefinition, ScopeLine,
+                              ContainingType, Virtuality, VirtualIndex, Flags,
+                              IsOptimized, getTuple(), Declaration, Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, getFunction("bar"),
-                                 TemplateParams, Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, getTuple(),
-                                 Declaration, Variables));
-  EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
-                                 Type, IsLocalToUnit, IsDefinition, ScopeLine,
-                                 ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  getSubprogram(), Variables));
   EXPECT_NE(N, DISubprogram::get(Context, Scope, Name, LinkageName, File, Line,
                                  Type, IsLocalToUnit, IsDefinition, ScopeLine,
                                  ContainingType, Virtuality, VirtualIndex,
-                                 Flags, IsOptimized, Function, TemplateParams,
+                                 Flags, IsOptimized, TemplateParams,
                                  Declaration, getTuple()));
 
   TempDISubprogram Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
-TEST_F(DISubprogramTest, replaceFunction) {
-  DIScopeRef Scope = getCompositeType();
-  StringRef Name = "name";
-  StringRef LinkageName = "linkage";
-  DIFile *File = getFile();
-  unsigned Line = 2;
-  DISubroutineType *Type = getSubroutineType();
-  bool IsLocalToUnit = false;
-  bool IsDefinition = true;
-  unsigned ScopeLine = 3;
-  DITypeRef ContainingType = getCompositeType();
-  unsigned Virtuality = 4;
-  unsigned VirtualIndex = 5;
-  unsigned Flags = 6;
-  bool IsOptimized = false;
-  MDTuple *TemplateParams = getTuple();
-  DISubprogram *Declaration = getSubprogram();
-  MDTuple *Variables = getTuple();
-
-  auto *N = DISubprogram::get(
-      Context, Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-      IsDefinition, ScopeLine, ContainingType, Virtuality, VirtualIndex, Flags,
-      IsOptimized, nullptr, TemplateParams, Declaration, Variables);
-
-  EXPECT_EQ(nullptr, N->getFunction());
-
-  std::unique_ptr<Function> F(
-      Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
-                       GlobalValue::ExternalLinkage));
-  N->replaceFunction(F.get());
-  EXPECT_EQ(F.get(), N->getFunction());
-
-  N->replaceFunction(nullptr);
-  EXPECT_EQ(nullptr, N->getFunction());
-}
-
 typedef MetadataTest DILexicalBlockTest;
 
 TEST_F(DILexicalBlockTest, get) {
@@ -1652,6 +1567,32 @@ TEST_F(DILexicalBlockTest, get) {
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
+TEST_F(DILexicalBlockTest, Overflow) {
+  DISubprogram *SP = getSubprogram();
+  DIFile *F = getFile();
+  {
+    auto *LB = DILexicalBlock::get(Context, SP, F, 2, 7);
+    EXPECT_EQ(2u, LB->getLine());
+    EXPECT_EQ(7u, LB->getColumn());
+  }
+  unsigned U16 = 1u << 16;
+  {
+    auto *LB = DILexicalBlock::get(Context, SP, F, UINT32_MAX, U16 - 1);
+    EXPECT_EQ(UINT32_MAX, LB->getLine());
+    EXPECT_EQ(U16 - 1, LB->getColumn());
+  }
+  {
+    auto *LB = DILexicalBlock::get(Context, SP, F, UINT32_MAX, U16);
+    EXPECT_EQ(UINT32_MAX, LB->getLine());
+    EXPECT_EQ(0u, LB->getColumn());
+  }
+  {
+    auto *LB = DILexicalBlock::get(Context, SP, F, UINT32_MAX, U16 + 1);
+    EXPECT_EQ(UINT32_MAX, LB->getLine());
+    EXPECT_EQ(0u, LB->getColumn());
+  }
+}
+
 typedef MetadataTest DILexicalBlockFileTest;
 
 TEST_F(DILexicalBlockFileTest, get) {
@@ -1865,7 +1806,6 @@ TEST_F(DIGlobalVariableTest, get) {
 typedef MetadataTest DILocalVariableTest;
 
 TEST_F(DILocalVariableTest, get) {
-  unsigned Tag = dwarf::DW_TAG_arg_variable;
   DILocalScope *Scope = getSubprogram();
   StringRef Name = "name";
   DIFile *File = getFile();
@@ -1874,9 +1814,9 @@ TEST_F(DILocalVariableTest, get) {
   unsigned Arg = 6;
   unsigned Flags = 7;
 
-  auto *N = DILocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                 Arg, Flags);
-  EXPECT_EQ(Tag, N->getTag());
+  auto *N =
+      DILocalVariable::get(Context, Scope, Name, File, Line, Type, Arg, Flags);
+  EXPECT_TRUE(N->isParameter());
   EXPECT_EQ(Scope, N->getScope());
   EXPECT_EQ(Name, N->getName());
   EXPECT_EQ(File, N->getFile());
@@ -1884,47 +1824,44 @@ TEST_F(DILocalVariableTest, get) {
   EXPECT_EQ(Type, N->getType());
   EXPECT_EQ(Arg, N->getArg());
   EXPECT_EQ(Flags, N->getFlags());
-  EXPECT_EQ(N, DILocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg, Flags));
+  EXPECT_EQ(N, DILocalVariable::get(Context, Scope, Name, File, Line, Type, Arg,
+                                    Flags));
 
-  EXPECT_NE(N, DILocalVariable::get(Context, dwarf::DW_TAG_auto_variable, Scope,
-                                    Name, File, Line, Type, Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, getSubprogram(), Name, File,
-                                    Line, Type, Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, "other", File, Line,
+  EXPECT_FALSE(
+      DILocalVariable::get(Context, Scope, Name, File, Line, Type, 0, Flags)
+          ->isParameter());
+  EXPECT_NE(N, DILocalVariable::get(Context, getSubprogram(), Name, File, Line,
                                     Type, Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, Name, getFile(), Line,
-                                    Type, Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, Name, File, Line + 1,
-                                    Type, Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, Name, File, Line,
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, "other", File, Line, Type,
+                                    Arg, Flags));
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, Name, getFile(), Line, Type,
+                                    Arg, Flags));
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, Name, File, Line + 1, Type,
+                                    Arg, Flags));
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, Name, File, Line,
                                     getDerivedType(), Arg, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, Name, File, Line, Type,
                                     Arg + 1, Flags));
-  EXPECT_NE(N, DILocalVariable::get(Context, Tag, Scope, Name, File, Line, Type,
-                                    Arg, ~Flags));
+  EXPECT_NE(N, DILocalVariable::get(Context, Scope, Name, File, Line, Type, Arg,
+                                    ~Flags));
 
   TempDILocalVariable Temp = N->clone();
   EXPECT_EQ(N, MDNode::replaceWithUniqued(std::move(Temp)));
 }
 
 TEST_F(DILocalVariableTest, getArg256) {
-  EXPECT_EQ(255u, DILocalVariable::get(Context, dwarf::DW_TAG_arg_variable,
-                                       getSubprogram(), "", getFile(), 0,
-                                       nullptr, 255, 0)
+  EXPECT_EQ(255u, DILocalVariable::get(Context, getSubprogram(), "", getFile(),
+                                       0, nullptr, 255, 0)
                       ->getArg());
-  EXPECT_EQ(256u, DILocalVariable::get(Context, dwarf::DW_TAG_arg_variable,
-                                       getSubprogram(), "", getFile(), 0,
-                                       nullptr, 256, 0)
+  EXPECT_EQ(256u, DILocalVariable::get(Context, getSubprogram(), "", getFile(),
+                                       0, nullptr, 256, 0)
                       ->getArg());
-  EXPECT_EQ(257u, DILocalVariable::get(Context, dwarf::DW_TAG_arg_variable,
-                                       getSubprogram(), "", getFile(), 0,
-                                       nullptr, 257, 0)
+  EXPECT_EQ(257u, DILocalVariable::get(Context, getSubprogram(), "", getFile(),
+                                       0, nullptr, 257, 0)
                       ->getArg());
   unsigned Max = UINT16_MAX;
-  EXPECT_EQ(Max, DILocalVariable::get(Context, dwarf::DW_TAG_arg_variable,
-                                      getSubprogram(), "", getFile(), 0,
-                                      nullptr, Max, 0)
+  EXPECT_EQ(Max, DILocalVariable::get(Context, getSubprogram(), "", getFile(),
+                                      0, nullptr, Max, 0)
                      ->getArg());
 }
 
@@ -2338,4 +2275,16 @@ TEST_F(FunctionAttachmentTest, EntryCount) {
   EXPECT_EQ(12304u, *F->getEntryCount());
 }
 
+TEST_F(FunctionAttachmentTest, SubprogramAttachment) {
+  Function *F = getFunction("foo");
+  DISubprogram *SP = getSubprogram();
+  F->setSubprogram(SP);
+
+  // Note that the static_cast confirms that F->getSubprogram() actually
+  // returns an DISubprogram.
+  EXPECT_EQ(SP, static_cast<DISubprogram *>(F->getSubprogram()));
+  EXPECT_EQ(SP, F->getMetadata("dbg"));
+  EXPECT_EQ(SP, F->getMetadata(LLVMContext::MD_dbg));
+}
+
 }
diff --git a/unittests/IR/TypesTest.cpp b/unittests/IR/TypesTest.cpp
index 2cee640a13d3..b4dbf8ec2c73 100644
--- a/unittests/IR/TypesTest.cpp
+++ b/unittests/IR/TypesTest.cpp
@@ -27,4 +27,12 @@ TEST(TypesTest, StructType) {
   EXPECT_FALSE(Struct->hasName());
 }
 
+TEST(TypesTest, LayoutIdenticalEmptyStructs) {
+  LLVMContext C;
+
+  StructType *Foo = StructType::create(C, "Foo");
+  StructType *Bar = StructType::create(C, "Bar");
+  EXPECT_TRUE(Foo->isLayoutIdentical(Bar));
+}
+
 }  // end anonymous namespace
diff --git a/unittests/IR/UserTest.cpp b/unittests/IR/UserTest.cpp
index 56b054b68359..8d488389448a 100644
--- a/unittests/IR/UserTest.cpp
+++ b/unittests/IR/UserTest.cpp
@@ -93,4 +93,28 @@ TEST(UserTest, ValueOpIteration) {
   EXPECT_EQ(P.value_op_end(), (I - 2) + 8);
 }
 
+TEST(UserTest, PersonalityUser) {
+  Module M("", getGlobalContext());
+  FunctionType *RetVoidTy =
+      FunctionType::get(Type::getVoidTy(getGlobalContext()), false);
+  Function *PersonalityF = Function::Create(
+      RetVoidTy, GlobalValue::ExternalLinkage, "PersonalityFn", &M);
+  Function *TestF =
+      Function::Create(RetVoidTy, GlobalValue::ExternalLinkage, "TestFn", &M);
+
+  // Set up the personality function
+  TestF->setPersonalityFn(PersonalityF);
+  auto PersonalityUsers = PersonalityF->user_begin();
+
+  // One user and that user is the Test function
+  EXPECT_EQ(*PersonalityUsers, TestF);
+  EXPECT_EQ(++PersonalityUsers, PersonalityF->user_end());
+
+  // Reset the personality function
+  TestF->setPersonalityFn(nullptr);
+
+  // No users should remain
+  EXPECT_TRUE(TestF->user_empty());
+}
+
 } // end anonymous namespace
diff --git a/unittests/IR/ValueHandleTest.cpp b/unittests/IR/ValueHandleTest.cpp
index e6eb7cbedc68..e1d598bbc58b 100644
--- a/unittests/IR/ValueHandleTest.cpp
+++ b/unittests/IR/ValueHandleTest.cpp
@@ -29,7 +29,7 @@ protected:
   }
 };
 
-class ConcreteCallbackVH : public CallbackVH {
+class ConcreteCallbackVH final : public CallbackVH {
 public:
   ConcreteCallbackVH(Value *V) : CallbackVH(V) {}
 };
@@ -235,7 +235,7 @@ TEST_F(ValueHandle, CallbackVH_Comparisons) {
 }
 
 TEST_F(ValueHandle, CallbackVH_CallbackOnDeletion) {
-  class RecordingVH : public CallbackVH {
+  class RecordingVH final : public CallbackVH {
   public:
     int DeletedCalls;
     int AURWCalls;
@@ -261,7 +261,7 @@ TEST_F(ValueHandle, CallbackVH_CallbackOnDeletion) {
 }
 
 TEST_F(ValueHandle, CallbackVH_CallbackOnRAUW) {
-  class RecordingVH : public CallbackVH {
+  class RecordingVH final : public CallbackVH {
   public:
     int DeletedCalls;
     Value *AURWArgument;
@@ -291,7 +291,7 @@ TEST_F(ValueHandle, CallbackVH_CallbackOnRAUW) {
 }
 
 TEST_F(ValueHandle, CallbackVH_DeletionCanRAUW) {
-  class RecoveringVH : public CallbackVH {
+  class RecoveringVH final : public CallbackVH {
   public:
     int DeletedCalls;
     Value *AURWArgument;
@@ -339,7 +339,7 @@ TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
   // arrangement of other VHs so that the bad behavior would be
   // triggered in whichever order callbacks run.
 
-  class DestroyingVH : public CallbackVH {
+  class DestroyingVH final : public CallbackVH {
   public:
     std::unique_ptr<WeakVH> ToClear[2];
     DestroyingVH(Value *V) {
@@ -384,7 +384,7 @@ TEST_F(ValueHandle, AssertingVHCheckedLast) {
   // Value deletion, the CallbackVH should get a chance to do so
   // before the AssertingVHs assert.
 
-  class ClearingVH : public CallbackVH {
+  class ClearingVH final : public CallbackVH {
   public:
     AssertingVH<Value> *ToClear[2];
     ClearingVH(Value *V,
diff --git a/unittests/IR/ValueTest.cpp b/unittests/IR/ValueTest.cpp
index 32d66a1b4721..9cf1306dae67 100644
--- a/unittests/IR/ValueTest.cpp
+++ b/unittests/IR/ValueTest.cpp
@@ -39,9 +39,9 @@ TEST(ValueTest, UsedInBasicBlock) {
 
   Function *F = M->getFunction("f");
 
-  EXPECT_FALSE(F->isUsedInBasicBlock(F->begin()));
-  EXPECT_TRUE((++F->arg_begin())->isUsedInBasicBlock(F->begin()));
-  EXPECT_TRUE(F->arg_begin()->isUsedInBasicBlock(F->begin()));
+  EXPECT_FALSE(F->isUsedInBasicBlock(&F->front()));
+  EXPECT_TRUE((++F->arg_begin())->isUsedInBasicBlock(&F->front()));
+  EXPECT_TRUE(F->arg_begin()->isUsedInBasicBlock(&F->front()));
 }
 
 TEST(GlobalTest, CreateAddressSpace) {
@@ -127,9 +127,9 @@ TEST(ValueTest, printSlots) {
   BasicBlock &BB = F->getEntryBlock();
   ASSERT_EQ(3u, BB.size());
 
-  Instruction *I0 = BB.begin();
+  Instruction *I0 = &*BB.begin();
   ASSERT_TRUE(I0);
-  Instruction *I1 = ++BB.begin();
+  Instruction *I1 = &*++BB.begin();
   ASSERT_TRUE(I1);
 
   ModuleSlotTracker MST(M.get());
@@ -175,4 +175,64 @@ TEST(ValueTest, printSlots) {
 #undef CHECK_PRINT_AS_OPERAND
 }
 
+TEST(ValueTest, getLocalSlots) {
+  // Verify that the getLocalSlot method returns the correct slot numbers.
+  LLVMContext C;
+  const char *ModuleString = "define void @f(i32 %x, i32 %y) {\n"
+                             "entry:\n"
+                             "  %0 = add i32 %y, 1\n"
+                             "  %1 = add i32 %y, 1\n"
+                             "  br label %2\n"
+                             "\n"
+                             "  ret void\n"
+                             "}\n";
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(ModuleString, Err, C);
+
+  Function *F = M->getFunction("f");
+  ASSERT_TRUE(F);
+  ASSERT_FALSE(F->empty());
+  BasicBlock &EntryBB = F->getEntryBlock();
+  ASSERT_EQ(3u, EntryBB.size());
+  BasicBlock *BB2 = &*++F->begin();
+  ASSERT_TRUE(BB2);
+
+  Instruction *I0 = &*EntryBB.begin();
+  ASSERT_TRUE(I0);
+  Instruction *I1 = &*++EntryBB.begin();
+  ASSERT_TRUE(I1);
+
+  ModuleSlotTracker MST(M.get());
+  MST.incorporateFunction(*F);
+  EXPECT_EQ(MST.getLocalSlot(I0), 0);
+  EXPECT_EQ(MST.getLocalSlot(I1), 1);
+  EXPECT_EQ(MST.getLocalSlot(&EntryBB), -1);
+  EXPECT_EQ(MST.getLocalSlot(BB2), 2);
+}
+
+#if defined(GTEST_HAS_DEATH_TEST) && !defined(NDEBUG)
+TEST(ValueTest, getLocalSlotDeath) {
+  LLVMContext C;
+  const char *ModuleString = "define void @f(i32 %x, i32 %y) {\n"
+                             "entry:\n"
+                             "  %0 = add i32 %y, 1\n"
+                             "  %1 = add i32 %y, 1\n"
+                             "  br label %2\n"
+                             "\n"
+                             "  ret void\n"
+                             "}\n";
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(ModuleString, Err, C);
+
+  Function *F = M->getFunction("f");
+  ASSERT_TRUE(F);
+  ASSERT_FALSE(F->empty());
+  BasicBlock *BB2 = &*++F->begin();
+  ASSERT_TRUE(BB2);
+
+  ModuleSlotTracker MST(M.get());
+  EXPECT_DEATH(MST.getLocalSlot(BB2), "No function incorporated");
+}
+#endif
+
 } // end anonymous namespace
diff --git a/unittests/IR/VerifierTest.cpp b/unittests/IR/VerifierTest.cpp
index 71e3168b686b..4e94b4375f92 100644
--- a/unittests/IR/VerifierTest.cpp
+++ b/unittests/IR/VerifierTest.cpp
@@ -60,5 +60,52 @@ TEST(VerifierTest, InvalidRetAttribute) {
       "Attribute 'uwtable' only applies to functions!"));
 }
 
+TEST(VerifierTest, CrossModuleRef) {
+  LLVMContext &C = getGlobalContext();
+  Module M1("M1", C);
+  Module M2("M2", C);
+  Module M3("M2", C);
+  FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), /*isVarArg=*/false);
+  Function *F1 = cast<Function>(M1.getOrInsertFunction("foo1", FTy));
+  Function *F2 = cast<Function>(M2.getOrInsertFunction("foo2", FTy));
+  Function *F3 = cast<Function>(M3.getOrInsertFunction("foo3", FTy));
+
+  BasicBlock *Entry1 = BasicBlock::Create(C, "entry", F1);
+  BasicBlock *Entry3 = BasicBlock::Create(C, "entry", F3);
+
+  // BAD: Referencing function in another module
+  CallInst::Create(F2,"call",Entry1);
+
+  // BAD: Referencing personality routine in another module
+  F3->setPersonalityFn(F2);
+
+  // Fill in the body
+  Constant *ConstZero = ConstantInt::get(Type::getInt32Ty(C), 0);
+  ReturnInst::Create(C, ConstZero, Entry1);
+  ReturnInst::Create(C, ConstZero, Entry3);
+
+  std::string Error;
+  raw_string_ostream ErrorOS(Error);
+  EXPECT_FALSE(verifyModule(M2, &ErrorOS));
+  EXPECT_TRUE(verifyModule(M1, &ErrorOS));
+  EXPECT_TRUE(StringRef(ErrorOS.str()).equals(
+      "Referencing function in another module!\n"
+      "  %call = call i32 @foo2()\n"
+      "; ModuleID = 'M1'\n"
+      "i32 ()* @foo2\n"
+      "; ModuleID = 'M2'\n"));
+
+  Error.clear();
+  EXPECT_TRUE(verifyModule(M3, &ErrorOS));
+  EXPECT_TRUE(StringRef(ErrorOS.str()).startswith(
+      "Referencing personality function in another module!"));
+
+  // Erase bad methods to avoid triggering an assertion failure on destruction
+  F1->eraseFromParent();
+  F3->eraseFromParent();
+}
+
+
+
 }
 }
diff --git a/unittests/Linker/LinkModulesTest.cpp b/unittests/Linker/LinkModulesTest.cpp
index 45f1308d3bd9..322a44f8aafe 100644
--- a/unittests/Linker/LinkModulesTest.cpp
+++ b/unittests/Linker/LinkModulesTest.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
@@ -15,6 +16,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm-c/Core.h"
 #include "llvm-c/Linker.h"
 #include "gtest/gtest.h"
 
@@ -70,12 +72,16 @@ protected:
   BasicBlock *ExitBB;
 };
 
+static void expectNoDiags(const DiagnosticInfo &DI, void *C) {
+  EXPECT_TRUE(false);
+}
+
 TEST_F(LinkModuleTest, BlockAddress) {
   IRBuilder<> Builder(EntryBB);
 
   std::vector<Value *> GEPIndices;
   GEPIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ctx), 0));
-  GEPIndices.push_back(F->arg_begin());
+  GEPIndices.push_back(&*F->arg_begin());
 
   Value *GEP = Builder.CreateGEP(AT, GV, GEPIndices, "switch.gep");
   Value *Load = Builder.CreateLoad(GEP, "switch.load");
@@ -92,10 +98,8 @@ TEST_F(LinkModuleTest, BlockAddress) {
   Builder.CreateRet(ConstantPointerNull::get(Type::getInt8PtrTy(Ctx)));
 
   Module *LinkedModule = new Module("MyModuleLinked", Ctx);
-  Linker::LinkModules(LinkedModule, M.get());
-
-  // Delete the original module.
-  M.reset();
+  Ctx.setDiagnosticHandler(expectNoDiags);
+  Linker::linkModules(*LinkedModule, std::move(M));
 
   // Check that the global "@switch.bas" is well-formed.
   const GlobalVariable *LinkedGV = LinkedModule->getNamedGlobal("switch.bas");
@@ -168,13 +172,15 @@ static Module *getInternal(LLVMContext &Ctx) {
 TEST_F(LinkModuleTest, EmptyModule) {
   std::unique_ptr<Module> InternalM(getInternal(Ctx));
   std::unique_ptr<Module> EmptyM(new Module("EmptyModule1", Ctx));
-  Linker::LinkModules(EmptyM.get(), InternalM.get());
+  Ctx.setDiagnosticHandler(expectNoDiags);
+  Linker::linkModules(*EmptyM, std::move(InternalM));
 }
 
 TEST_F(LinkModuleTest, EmptyModule2) {
   std::unique_ptr<Module> InternalM(getInternal(Ctx));
   std::unique_ptr<Module> EmptyM(new Module("EmptyModule1", Ctx));
-  Linker::LinkModules(InternalM.get(), EmptyM.get());
+  Ctx.setDiagnosticHandler(expectNoDiags);
+  Linker::linkModules(*InternalM, std::move(EmptyM));
 }
 
 TEST_F(LinkModuleTest, TypeMerge) {
@@ -189,7 +195,8 @@ TEST_F(LinkModuleTest, TypeMerge) {
                       "@t2 = weak global %t zeroinitializer\n";
   std::unique_ptr<Module> M2 = parseAssemblyString(M2Str, Err, C);
 
-  Linker::LinkModules(M1.get(), M2.get(), [](const llvm::DiagnosticInfo &){});
+  Ctx.setDiagnosticHandler(expectNoDiags);
+  Linker::linkModules(*M1, std::move(M2));
 
   EXPECT_EQ(M1->getNamedGlobal("t1")->getType(),
             M1->getNamedGlobal("t2")->getType());
@@ -219,4 +226,108 @@ TEST_F(LinkModuleTest, CAPIFailure) {
   LLVMDisposeMessage(errout);
 }
 
+TEST_F(LinkModuleTest, NewCAPISuccess) {
+  std::unique_ptr<Module> DestM(getExternal(Ctx, "foo"));
+  std::unique_ptr<Module> SourceM(getExternal(Ctx, "bar"));
+  LLVMBool Result =
+      LLVMLinkModules2(wrap(DestM.get()), wrap(SourceM.release()));
+  EXPECT_EQ(0, Result);
+  // "bar" is present in destination module
+  EXPECT_NE(nullptr, DestM->getFunction("bar"));
+}
+
+static void diagnosticHandler(LLVMDiagnosticInfoRef DI, void *C) {
+  auto *Err = reinterpret_cast<std::string *>(C);
+  char *CErr = LLVMGetDiagInfoDescription(DI);
+  *Err = CErr;
+  LLVMDisposeMessage(CErr);
+}
+
+TEST_F(LinkModuleTest, NewCAPIFailure) {
+  // Symbol clash between two modules
+  LLVMContext Ctx;
+  std::string Err;
+  LLVMContextSetDiagnosticHandler(wrap(&Ctx), diagnosticHandler, &Err);
+
+  std::unique_ptr<Module> DestM(getExternal(Ctx, "foo"));
+  std::unique_ptr<Module> SourceM(getExternal(Ctx, "foo"));
+  LLVMBool Result =
+      LLVMLinkModules2(wrap(DestM.get()), wrap(SourceM.release()));
+  EXPECT_EQ(1, Result);
+  EXPECT_EQ("Linking globals named 'foo': symbol multiply defined!", Err);
+}
+
+TEST_F(LinkModuleTest, MoveDistinctMDs) {
+  LLVMContext C;
+  SMDiagnostic Err;
+
+  const char *SrcStr = "define void @foo() !attach !0 {\n"
+                       "entry:\n"
+                       "  call void @llvm.md(metadata !1)\n"
+                       "  ret void, !attach !2\n"
+                       "}\n"
+                       "declare void @llvm.md(metadata)\n"
+                       "!named = !{!3, !4}\n"
+                       "!0 = distinct !{}\n"
+                       "!1 = distinct !{}\n"
+                       "!2 = distinct !{}\n"
+                       "!3 = distinct !{}\n"
+                       "!4 = !{!3}\n";
+
+  std::unique_ptr<Module> Src = parseAssemblyString(SrcStr, Err, C);
+  assert(Src);
+  ASSERT_TRUE(Src.get());
+
+  // Get the addresses of the Metadata before merging.
+  Function *F = &*Src->begin();
+  ASSERT_EQ("foo", F->getName());
+  BasicBlock *BB = &F->getEntryBlock();
+  auto *CI = cast<CallInst>(&BB->front());
+  auto *RI = cast<ReturnInst>(BB->getTerminator());
+  NamedMDNode *NMD = &*Src->named_metadata_begin();
+
+  MDNode *M0 = F->getMetadata("attach");
+  MDNode *M1 =
+      cast<MDNode>(cast<MetadataAsValue>(CI->getArgOperand(0))->getMetadata());
+  MDNode *M2 = RI->getMetadata("attach");
+  MDNode *M3 = NMD->getOperand(0);
+  MDNode *M4 = NMD->getOperand(1);
+
+  // Confirm a few things about the IR.
+  EXPECT_TRUE(M0->isDistinct());
+  EXPECT_TRUE(M1->isDistinct());
+  EXPECT_TRUE(M2->isDistinct());
+  EXPECT_TRUE(M3->isDistinct());
+  EXPECT_TRUE(M4->isUniqued());
+  EXPECT_EQ(M3, M4->getOperand(0));
+
+  // Link into destination module.
+  auto Dst = llvm::make_unique<Module>("Linked", C);
+  ASSERT_TRUE(Dst.get());
+  Ctx.setDiagnosticHandler(expectNoDiags);
+  Linker::linkModules(*Dst, std::move(Src));
+
+  // Check that distinct metadata was moved, not cloned.  Even !4, the uniqued
+  // node, should effectively be moved, since its only operand hasn't changed.
+  F = &*Dst->begin();
+  BB = &F->getEntryBlock();
+  CI = cast<CallInst>(&BB->front());
+  RI = cast<ReturnInst>(BB->getTerminator());
+  NMD = &*Dst->named_metadata_begin();
+
+  EXPECT_EQ(M0, F->getMetadata("attach"));
+  EXPECT_EQ(M1, cast<MetadataAsValue>(CI->getArgOperand(0))->getMetadata());
+  EXPECT_EQ(M2, RI->getMetadata("attach"));
+  EXPECT_EQ(M3, NMD->getOperand(0));
+  EXPECT_EQ(M4, NMD->getOperand(1));
+
+  // Confirm a few things about the IR.  This shouldn't have changed.
+  EXPECT_TRUE(M0->isDistinct());
+  EXPECT_TRUE(M1->isDistinct());
+  EXPECT_TRUE(M2->isDistinct());
+  EXPECT_TRUE(M3->isDistinct());
+  EXPECT_TRUE(M4->isUniqued());
+  EXPECT_EQ(M3, M4->getOperand(0));
+}
+
 } // end anonymous namespace
diff --git a/unittests/MC/StringTableBuilderTest.cpp b/unittests/MC/StringTableBuilderTest.cpp
index 716b9c7ae879..4cc0bda0a03a 100644
--- a/unittests/MC/StringTableBuilderTest.cpp
+++ b/unittests/MC/StringTableBuilderTest.cpp
@@ -17,13 +17,13 @@ using namespace llvm;
 namespace {
 
 TEST(StringTableBuilderTest, BasicELF) {
-  StringTableBuilder B;
+  StringTableBuilder B(StringTableBuilder::ELF);
 
   B.add("foo");
   B.add("bar");
   B.add("foobar");
 
-  B.finalize(StringTableBuilder::ELF);
+  B.finalize();
 
   std::string Expected;
   Expected += '\x00';
@@ -39,14 +39,14 @@ TEST(StringTableBuilderTest, BasicELF) {
 }
 
 TEST(StringTableBuilderTest, BasicWinCOFF) {
-  StringTableBuilder B;
+  StringTableBuilder B(StringTableBuilder::WinCOFF);
 
   // Strings must be 9 chars or longer to go in the table.
   B.add("hippopotamus");
   B.add("pygmy hippopotamus");
   B.add("river horse");
 
-  B.finalize(StringTableBuilder::WinCOFF);
+  B.finalize();
 
   // size_field + "pygmy hippopotamus\0" + "river horse\0"
   uint32_t ExpectedSize = 4 + 19 + 12;
diff --git a/unittests/Option/OptionParsingTest.cpp b/unittests/Option/OptionParsingTest.cpp
index 55cf8a95f35e..5270dc940f96 100644
--- a/unittests/Option/OptionParsingTest.cpp
+++ b/unittests/Option/OptionParsingTest.cpp
@@ -48,7 +48,7 @@ namespace {
 class TestOptTable : public OptTable {
 public:
   TestOptTable(bool IgnoreCase = false)
-    : OptTable(InfoTable, array_lengthof(InfoTable), IgnoreCase) {}
+    : OptTable(InfoTable, IgnoreCase) {}
 };
 }
 
diff --git a/unittests/ProfileData/CMakeLists.txt b/unittests/ProfileData/CMakeLists.txt
index 79137c9510ae..011f8c581792 100644
--- a/unittests/ProfileData/CMakeLists.txt
+++ b/unittests/ProfileData/CMakeLists.txt
@@ -7,4 +7,5 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(ProfileDataTests
   CoverageMappingTest.cpp
   InstrProfTest.cpp
+  SampleProfTest.cpp
   )
diff --git a/unittests/ProfileData/CoverageMappingTest.cpp b/unittests/ProfileData/CoverageMappingTest.cpp
index a0995fbbc028..35b8626c494a 100644
--- a/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/unittests/ProfileData/CoverageMappingTest.cpp
@@ -188,7 +188,8 @@ TEST_F(CoverageMappingTest, expansion_gets_first_counter) {
 }
 
 TEST_F(CoverageMappingTest, basic_coverage_iteration) {
-  ProfileWriter.addFunctionCounts("func", 0x1234, {30, 20, 10, 0});
+  InstrProfRecord Record("func", 0x1234, {30, 20, 10, 0});
+  ProfileWriter.addRecord(std::move(Record));
   readProfCounts();
 
   addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
@@ -238,7 +239,8 @@ TEST_F(CoverageMappingTest, uncovered_function_with_mapping) {
 }
 
 TEST_F(CoverageMappingTest, combine_regions) {
-  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20, 30});
+  InstrProfRecord Record("func", 0x1234, {10, 20, 30});
+  ProfileWriter.addRecord(std::move(Record));
   readProfCounts();
 
   addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
@@ -256,7 +258,8 @@ TEST_F(CoverageMappingTest, combine_regions) {
 }
 
 TEST_F(CoverageMappingTest, dont_combine_expansions) {
-  ProfileWriter.addFunctionCounts("func", 0x1234, {10, 20});
+  InstrProfRecord Record("func", 0x1234, {10, 20});
+  ProfileWriter.addRecord(std::move(Record));
   readProfCounts();
 
   addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
@@ -275,7 +278,8 @@ TEST_F(CoverageMappingTest, dont_combine_expansions) {
 }
 
 TEST_F(CoverageMappingTest, strip_filename_prefix) {
-  ProfileWriter.addFunctionCounts("file1:func", 0x1234, {10});
+  InstrProfRecord Record("file1:func", 0x1234, {10});
+  ProfileWriter.addRecord(std::move(Record));
   readProfCounts();
 
   addCMR(Counter::getCounter(0), "file1", 1, 1, 9, 9);
diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp
index 2cedd593eb8c..8f4db871a927 100644
--- a/unittests/ProfileData/InstrProfTest.cpp
+++ b/unittests/ProfileData/InstrProfTest.cpp
@@ -1,4 +1,4 @@
-//===- unittest/ProfileData/InstrProfTest.cpp -------------------------------=//
+//===- unittest/ProfileData/InstrProfTest.cpp -------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -50,7 +50,8 @@ TEST_F(InstrProfTest, write_and_read_empty_profile) {
 }
 
 TEST_F(InstrProfTest, write_and_read_one_function) {
-  Writer.addFunctionCounts("foo", 0x1234, {1, 2, 3, 4});
+  InstrProfRecord Record("foo", 0x1234, {1, 2, 3, 4});
+  Writer.addRecord(std::move(Record));
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
@@ -66,9 +67,38 @@ TEST_F(InstrProfTest, write_and_read_one_function) {
   ASSERT_TRUE(++I == E);
 }
 
+TEST_F(InstrProfTest, get_instr_prof_record) {
+  InstrProfRecord Record1("foo", 0x1234, {1, 2});
+  InstrProfRecord Record2("foo", 0x1235, {3, 4});
+  Writer.addRecord(std::move(Record1));
+  Writer.addRecord(std::move(Record2));
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("foo", 0x1234);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(2U, R.get().Counts.size());
+  ASSERT_EQ(1U, R.get().Counts[0]);
+  ASSERT_EQ(2U, R.get().Counts[1]);
+
+  R = Reader->getInstrProfRecord("foo", 0x1235);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(2U, R.get().Counts.size());
+  ASSERT_EQ(3U, R.get().Counts[0]);
+  ASSERT_EQ(4U, R.get().Counts[1]);
+
+  R = Reader->getInstrProfRecord("foo", 0x5678);
+  ASSERT_TRUE(ErrorEquals(instrprof_error::hash_mismatch, R.getError()));
+
+  R = Reader->getInstrProfRecord("bar", 0x1234);
+  ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, R.getError()));
+}
+
 TEST_F(InstrProfTest, get_function_counts) {
-  Writer.addFunctionCounts("foo", 0x1234, {1, 2});
-  Writer.addFunctionCounts("foo", 0x1235, {3, 4});
+  InstrProfRecord Record1("foo", 0x1234, {1, 2});
+  InstrProfRecord Record2("foo", 0x1235, {3, 4});
+  Writer.addRecord(std::move(Record1));
+  Writer.addRecord(std::move(Record2));
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
@@ -91,14 +121,466 @@ TEST_F(InstrProfTest, get_function_counts) {
   ASSERT_TRUE(ErrorEquals(instrprof_error::unknown_function, EC));
 }
 
+TEST_F(InstrProfTest, get_icall_data_read_write) {
+  InstrProfRecord Record1("caller", 0x1234, {1, 2});
+  InstrProfRecord Record2("callee1", 0x1235, {3, 4});
+  InstrProfRecord Record3("callee2", 0x1235, {3, 4});
+  InstrProfRecord Record4("callee3", 0x1235, {3, 4});
+
+  // 4 value sites.
+  Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+  InstrProfValueData VD0[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2},
+                              {(uint64_t) "callee3", 3}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+  // No value profile data at the second site.
+  Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+  InstrProfValueData VD2[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+  InstrProfValueData VD3[] = {{(uint64_t) "callee1", 1}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+
+  Writer.addRecord(std::move(Record1));
+  Writer.addRecord(std::move(Record2));
+  Writer.addRecord(std::move(Record3));
+  Writer.addRecord(std::move(Record4));
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(4U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(3U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+  ASSERT_EQ(0U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+  ASSERT_EQ(2U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+  ASSERT_EQ(1U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+  std::unique_ptr<InstrProfValueData[]> VD =
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  // Now sort the target acording to frequency.
+  std::sort(&VD[0], &VD[3],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+}
+
+TEST_F(InstrProfTest, get_icall_data_read_write_big_endian) {
+  InstrProfRecord Record1("caller", 0x1234, {1, 2});
+  InstrProfRecord Record2("callee1", 0x1235, {3, 4});
+  InstrProfRecord Record3("callee2", 0x1235, {3, 4});
+  InstrProfRecord Record4("callee3", 0x1235, {3, 4});
+
+  // 4 value sites.
+  Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+  InstrProfValueData VD0[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2},
+                              {(uint64_t) "callee3", 3}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+  // No value profile data at the second site.
+  Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+  InstrProfValueData VD2[] = {{(uint64_t) "callee1", 1},
+                              {(uint64_t) "callee2", 2}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+  InstrProfValueData VD3[] = {{(uint64_t) "callee1", 1}};
+  Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+
+  Writer.addRecord(std::move(Record1));
+  Writer.addRecord(std::move(Record2));
+  Writer.addRecord(std::move(Record3));
+  Writer.addRecord(std::move(Record4));
+
+  // Set big endian output.
+  Writer.setValueProfDataEndianness(support::big);
+
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  // Set big endian input.
+  Reader->setValueProfDataEndianness(support::big);
+
+  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(4U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(3U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+  ASSERT_EQ(0U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+  ASSERT_EQ(2U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+  ASSERT_EQ(1U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+  std::unique_ptr<InstrProfValueData[]> VD =
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  // Now sort the target acording to frequency.
+  std::sort(&VD[0], &VD[3],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+
+  // Restore little endian default:
+  Writer.setValueProfDataEndianness(support::little);
+}
+
+TEST_F(InstrProfTest, get_icall_data_merge1) {
+  static const char caller[] = "caller";
+  static const char callee1[] = "callee1";
+  static const char callee2[] = "callee2";
+  static const char callee3[] = "callee3";
+  static const char callee4[] = "callee4";
+
+  InstrProfRecord Record11(caller, 0x1234, {1, 2});
+  InstrProfRecord Record12(caller, 0x1234, {1, 2});
+  InstrProfRecord Record2(callee1, 0x1235, {3, 4});
+  InstrProfRecord Record3(callee2, 0x1235, {3, 4});
+  InstrProfRecord Record4(callee3, 0x1235, {3, 4});
+  InstrProfRecord Record5(callee3, 0x1235, {3, 4});
+  InstrProfRecord Record6(callee4, 0x1235, {3, 5});
+
+  // 5 value sites.
+  Record11.reserveSites(IPVK_IndirectCallTarget, 5);
+  InstrProfValueData VD0[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3},
+                              {uint64_t(callee4), 4}};
+  Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
+
+  // No valeu profile data at the second site.
+  Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+
+  InstrProfValueData VD2[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3}};
+  Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
+
+  InstrProfValueData VD3[] = {{uint64_t(callee1), 1}};
+  Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+
+  InstrProfValueData VD4[] = {{uint64_t(callee1), 1},
+                              {uint64_t(callee2), 2},
+                              {uint64_t(callee3), 3}};
+  Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr);
+
+  // A differnt record for the same caller.
+  Record12.reserveSites(IPVK_IndirectCallTarget, 5);
+  InstrProfValueData VD02[] = {{uint64_t(callee2), 5},
+                               {uint64_t(callee3), 3}};
+  Record12.addValueData(IPVK_IndirectCallTarget, 0, VD02, 2, nullptr);
+
+  // No valeu profile data at the second site.
+  Record12.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+
+  InstrProfValueData VD22[] = {{uint64_t(callee2), 1},
+                               {uint64_t(callee3), 3},
+                               {uint64_t(callee4), 4}};
+  Record12.addValueData(IPVK_IndirectCallTarget, 2, VD22, 3, nullptr);
+
+  Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr);
+
+  InstrProfValueData VD42[] = {{uint64_t(callee1), 1},
+                               {uint64_t(callee2), 2},
+                               {uint64_t(callee3), 3}};
+  Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr);
+
+  Writer.addRecord(std::move(Record11));
+  // Merge profile data.
+  Writer.addRecord(std::move(Record12));
+
+  Writer.addRecord(std::move(Record2));
+  Writer.addRecord(std::move(Record3));
+  Writer.addRecord(std::move(Record4));
+  Writer.addRecord(std::move(Record5));
+  Writer.addRecord(std::move(Record6));
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+  ASSERT_TRUE(NoError(R.getError()));
+  ASSERT_EQ(5U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(4U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+  ASSERT_EQ(0U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+  ASSERT_EQ(4U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+  ASSERT_EQ(1U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+  ASSERT_EQ(3U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+
+  std::unique_ptr<InstrProfValueData[]> VD =
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  // Now sort the target acording to frequency.
+  std::sort(&VD[0], &VD[4],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(7U, VD[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(6U, VD[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4"));
+  ASSERT_EQ(4U, VD[2].Count);
+  ASSERT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(1U, VD[3].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_2(
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 2));
+  std::sort(&VD_2[0], &VD_2[4],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(6U, VD_2[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
+  ASSERT_EQ(4U, VD_2[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(3U, VD_2[2].Count);
+  ASSERT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(1U, VD_2[3].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_3(
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 3));
+  ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(1U, VD_3[0].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_4(
+      R.get().getValueForSite(IPVK_IndirectCallTarget, 4));
+  std::sort(&VD_4[0], &VD_4[3],
+            [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+              return VD1.Count > VD2.Count;
+            });
+  ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(6U, VD_4[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(4U, VD_4[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(2U, VD_4[2].Count);
+}
+
+TEST_F(InstrProfTest, get_icall_data_merge1_saturation) {
+  static const char bar[] = "bar";
+
+  const uint64_t Max = std::numeric_limits<uint64_t>::max();
+
+  InstrProfRecord Record1("foo", 0x1234, {1});
+  auto Result1 = Writer.addRecord(std::move(Record1));
+  ASSERT_EQ(Result1, instrprof_error::success);
+
+  // Verify counter overflow.
+  InstrProfRecord Record2("foo", 0x1234, {Max});
+  auto Result2 = Writer.addRecord(std::move(Record2));
+  ASSERT_EQ(Result2, instrprof_error::counter_overflow);
+
+  InstrProfRecord Record3(bar, 0x9012, {8});
+  auto Result3 = Writer.addRecord(std::move(Record3));
+  ASSERT_EQ(Result3, instrprof_error::success);
+
+  InstrProfRecord Record4("baz", 0x5678, {3, 4});
+  Record4.reserveSites(IPVK_IndirectCallTarget, 1);
+  InstrProfValueData VD4[] = {{uint64_t(bar), 1}};
+  Record4.addValueData(IPVK_IndirectCallTarget, 0, VD4, 1, nullptr);
+  auto Result4 = Writer.addRecord(std::move(Record4));
+  ASSERT_EQ(Result4, instrprof_error::success);
+
+  // Verify value data counter overflow.
+  InstrProfRecord Record5("baz", 0x5678, {5, 6});
+  Record5.reserveSites(IPVK_IndirectCallTarget, 1);
+  InstrProfValueData VD5[] = {{uint64_t(bar), Max}};
+  Record5.addValueData(IPVK_IndirectCallTarget, 0, VD5, 1, nullptr);
+  auto Result5 = Writer.addRecord(std::move(Record5));
+  ASSERT_EQ(Result5, instrprof_error::counter_overflow);
+
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  // Verify saturation of counts.
+  ErrorOr<InstrProfRecord> ReadRecord1 =
+      Reader->getInstrProfRecord("foo", 0x1234);
+  ASSERT_TRUE(NoError(ReadRecord1.getError()));
+  ASSERT_EQ(Max, ReadRecord1.get().Counts[0]);
+
+  ErrorOr<InstrProfRecord> ReadRecord2 =
+      Reader->getInstrProfRecord("baz", 0x5678);
+  ASSERT_EQ(1U, ReadRecord2.get().getNumValueSites(IPVK_IndirectCallTarget));
+  std::unique_ptr<InstrProfValueData[]> VD =
+      ReadRecord2.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+  ASSERT_EQ(StringRef("bar"), StringRef((const char *)VD[0].Value, 3));
+  ASSERT_EQ(Max, VD[0].Count);
+}
+
+// Synthesize runtime value profile data.
+ValueProfNode Site1Values[5] = {{{uint64_t("callee1"), 400}, &Site1Values[1]},
+                                {{uint64_t("callee2"), 1000}, &Site1Values[2]},
+                                {{uint64_t("callee3"), 500}, &Site1Values[3]},
+                                {{uint64_t("callee4"), 300}, &Site1Values[4]},
+                                {{uint64_t("callee5"), 100}, 0}};
+
+ValueProfNode Site2Values[4] = {{{uint64_t("callee5"), 800}, &Site2Values[1]},
+                                {{uint64_t("callee3"), 1000}, &Site2Values[2]},
+                                {{uint64_t("callee2"), 2500}, &Site2Values[3]},
+                                {{uint64_t("callee1"), 1300}, 0}};
+
+ValueProfNode Site3Values[3] = {{{uint64_t("callee6"), 800}, &Site3Values[1]},
+                                {{uint64_t("callee3"), 1000}, &Site3Values[2]},
+                                {{uint64_t("callee4"), 5500}, 0}};
+
+ValueProfNode Site4Values[2] = {{{uint64_t("callee2"), 1800}, &Site4Values[1]},
+                                {{uint64_t("callee3"), 2000}, 0}};
+
+static ValueProfNode *ValueProfNodes[5] = {&Site1Values[0], &Site2Values[0],
+                                           &Site3Values[0], &Site4Values[0], 0};
+static uint16_t NumValueSites[IPVK_Last + 1] = {5};
+TEST_F(InstrProfTest, runtime_value_prof_data_read_write) {
+  ValueProfRuntimeRecord RTRecord;
+  initializeValueProfRuntimeRecord(&RTRecord, &NumValueSites[0],
+                                   &ValueProfNodes[0]);
+
+  ValueProfData *VPData = serializeValueProfDataFromRT(&RTRecord, nullptr);
+
+  InstrProfRecord Record("caller", 0x1234, {1ULL << 31, 2});
+
+  VPData->deserializeTo(Record, 0);
+
+  // Now read data from Record and sanity check the data
+  ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget));
+  ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+  ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+  ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+  ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+  ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4));
+
+  auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
+    return VD1.Count > VD2.Count;
+  };
+  std::unique_ptr<InstrProfValueData[]> VD_0(
+      Record.getValueForSite(IPVK_IndirectCallTarget, 0));
+  std::sort(&VD_0[0], &VD_0[5], Cmp);
+  ASSERT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(1000U, VD_0[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(500U, VD_0[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(400U, VD_0[2].Count);
+  ASSERT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4"));
+  ASSERT_EQ(300U, VD_0[3].Count);
+  ASSERT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5"));
+  ASSERT_EQ(100U, VD_0[4].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_1(
+      Record.getValueForSite(IPVK_IndirectCallTarget, 1));
+  std::sort(&VD_1[0], &VD_1[4], Cmp);
+  ASSERT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(2500U, VD_1[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1"));
+  ASSERT_EQ(1300U, VD_1[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(1000U, VD_1[2].Count);
+  ASSERT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5"));
+  ASSERT_EQ(800U, VD_1[3].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_2(
+      Record.getValueForSite(IPVK_IndirectCallTarget, 2));
+  std::sort(&VD_2[0], &VD_2[3], Cmp);
+  ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4"));
+  ASSERT_EQ(5500U, VD_2[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(1000U, VD_2[1].Count);
+  ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6"));
+  ASSERT_EQ(800U, VD_2[2].Count);
+
+  std::unique_ptr<InstrProfValueData[]> VD_3(
+      Record.getValueForSite(IPVK_IndirectCallTarget, 3));
+  std::sort(&VD_3[0], &VD_3[2], Cmp);
+  ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3"));
+  ASSERT_EQ(2000U, VD_3[0].Count);
+  ASSERT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2"));
+  ASSERT_EQ(1800U, VD_3[1].Count);
+
+  finalizeValueProfRuntimeRecord(&RTRecord);
+  free(VPData);
+}
+
 TEST_F(InstrProfTest, get_max_function_count) {
-  Writer.addFunctionCounts("foo", 0x1234, {1ULL << 31, 2});
-  Writer.addFunctionCounts("bar", 0, {1ULL << 63});
-  Writer.addFunctionCounts("baz", 0x5678, {0, 0, 0, 0});
+  InstrProfRecord Record1("foo", 0x1234, {1ULL << 31, 2});
+  InstrProfRecord Record2("bar", 0, {1ULL << 63});
+  InstrProfRecord Record3("baz", 0x5678, {0, 0, 0, 0});
+  Writer.addRecord(std::move(Record1));
+  Writer.addRecord(std::move(Record2));
+  Writer.addRecord(std::move(Record3));
   auto Profile = Writer.writeBuffer();
   readProfile(std::move(Profile));
 
   ASSERT_EQ(1ULL << 63, Reader->getMaximumFunctionCount());
 }
 
+TEST_F(InstrProfTest, get_weighted_function_counts) {
+  InstrProfRecord Record1("foo", 0x1234, {1, 2});
+  InstrProfRecord Record2("foo", 0x1235, {3, 4});
+  Writer.addRecord(std::move(Record1), 3);
+  Writer.addRecord(std::move(Record2), 5);
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  std::vector<uint64_t> Counts;
+  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1234, Counts)));
+  ASSERT_EQ(2U, Counts.size());
+  ASSERT_EQ(3U, Counts[0]);
+  ASSERT_EQ(6U, Counts[1]);
+
+  ASSERT_TRUE(NoError(Reader->getFunctionCounts("foo", 0x1235, Counts)));
+  ASSERT_EQ(2U, Counts.size());
+  ASSERT_EQ(15U, Counts[0]);
+  ASSERT_EQ(20U, Counts[1]);
+}
+
+TEST_F(InstrProfTest, instr_prof_symtab_test) {
+  std::vector<StringRef> FuncNames;
+  FuncNames.push_back("func1");
+  FuncNames.push_back("func2");
+  FuncNames.push_back("func3");
+  FuncNames.push_back("bar1");
+  FuncNames.push_back("bar2");
+  FuncNames.push_back("bar3");
+  InstrProfSymtab Symtab;
+  Symtab.create(FuncNames);
+  StringRef R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func1"));
+  ASSERT_EQ(StringRef("func1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func2"));
+  ASSERT_EQ(StringRef("func2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func3"));
+  ASSERT_EQ(StringRef("func3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar1"));
+  ASSERT_EQ(StringRef("bar1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar2"));
+  ASSERT_EQ(StringRef("bar2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar3"));
+  ASSERT_EQ(StringRef("bar3"), R);
+
+  // Now incrementally update the symtab
+  Symtab.addFuncName("blah_1");
+  Symtab.addFuncName("blah_2");
+  Symtab.addFuncName("blah_3");
+  // Finalize it
+  Symtab.finalizeSymtab();
+
+  // Check again
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_1"));
+  ASSERT_EQ(StringRef("blah_1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_2"));
+  ASSERT_EQ(StringRef("blah_2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("blah_3"));
+  ASSERT_EQ(StringRef("blah_3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func1"));
+  ASSERT_EQ(StringRef("func1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func2"));
+  ASSERT_EQ(StringRef("func2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("func3"));
+  ASSERT_EQ(StringRef("func3"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar1"));
+  ASSERT_EQ(StringRef("bar1"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar2"));
+  ASSERT_EQ(StringRef("bar2"), R);
+  R = Symtab.getFuncName(IndexedInstrProf::ComputeHash("bar3"));
+  ASSERT_EQ(StringRef("bar3"), R);
+}
+
 } // end anonymous namespace
diff --git a/unittests/ProfileData/SampleProfTest.cpp b/unittests/ProfileData/SampleProfTest.cpp
new file mode 100644
index 000000000000..cc3c2f5306e4
--- /dev/null
+++ b/unittests/ProfileData/SampleProfTest.cpp
@@ -0,0 +1,132 @@
+//===- unittest/ProfileData/SampleProfTest.cpp -------------------*- C++
+//-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/ProfileData/SampleProfWriter.h"
+#include "gtest/gtest.h"
+
+#include <cstdarg>
+
+using namespace llvm;
+using namespace sampleprof;
+
+static ::testing::AssertionResult NoError(std::error_code EC) {
+  if (!EC)
+    return ::testing::AssertionSuccess();
+  return ::testing::AssertionFailure() << "error " << EC.value() << ": "
+                                       << EC.message();
+}
+
+namespace {
+
+struct SampleProfTest : ::testing::Test {
+  std::string Data;
+  std::unique_ptr<raw_ostream> OS;
+  std::unique_ptr<SampleProfileWriter> Writer;
+  std::unique_ptr<SampleProfileReader> Reader;
+
+  SampleProfTest()
+      : Data(), OS(new raw_string_ostream(Data)), Writer(), Reader() {}
+
+  void createWriter(SampleProfileFormat Format) {
+    auto WriterOrErr = SampleProfileWriter::create(OS, Format);
+    ASSERT_TRUE(NoError(WriterOrErr.getError()));
+    Writer = std::move(WriterOrErr.get());
+  }
+
+  void readProfile(std::unique_ptr<MemoryBuffer> &Profile) {
+    auto ReaderOrErr = SampleProfileReader::create(Profile, getGlobalContext());
+    ASSERT_TRUE(NoError(ReaderOrErr.getError()));
+    Reader = std::move(ReaderOrErr.get());
+  }
+
+  void testRoundTrip(SampleProfileFormat Format) {
+    createWriter(Format);
+
+    StringRef FooName("_Z3fooi");
+    FunctionSamples FooSamples;
+    FooSamples.addTotalSamples(7711);
+    FooSamples.addHeadSamples(610);
+    FooSamples.addBodySamples(1, 0, 610);
+
+    StringRef BarName("_Z3bari");
+    FunctionSamples BarSamples;
+    BarSamples.addTotalSamples(20301);
+    BarSamples.addHeadSamples(1437);
+    BarSamples.addBodySamples(1, 0, 1437);
+
+    StringMap<FunctionSamples> Profiles;
+    Profiles[FooName] = std::move(FooSamples);
+    Profiles[BarName] = std::move(BarSamples);
+
+    std::error_code EC;
+    EC = Writer->write(Profiles);
+    ASSERT_TRUE(NoError(EC));
+
+    Writer->getOutputStream().flush();
+
+    auto Profile = MemoryBuffer::getMemBufferCopy(Data);
+    readProfile(Profile);
+
+    EC = Reader->read();
+    ASSERT_TRUE(NoError(EC));
+
+    StringMap<FunctionSamples> &ReadProfiles = Reader->getProfiles();
+    ASSERT_EQ(2u, ReadProfiles.size());
+
+    FunctionSamples &ReadFooSamples = ReadProfiles[FooName];
+    ASSERT_EQ(7711u, ReadFooSamples.getTotalSamples());
+    ASSERT_EQ(610u, ReadFooSamples.getHeadSamples());
+
+    FunctionSamples &ReadBarSamples = ReadProfiles[BarName];
+    ASSERT_EQ(20301u, ReadBarSamples.getTotalSamples());
+    ASSERT_EQ(1437u, ReadBarSamples.getHeadSamples());
+  }
+};
+
+TEST_F(SampleProfTest, roundtrip_text_profile) {
+  testRoundTrip(SampleProfileFormat::SPF_Text);
+}
+
+TEST_F(SampleProfTest, roundtrip_binary_profile) {
+  testRoundTrip(SampleProfileFormat::SPF_Binary);
+}
+
+TEST_F(SampleProfTest, sample_overflow_saturation) {
+  const uint64_t Max = std::numeric_limits<uint64_t>::max();
+  sampleprof_error Result;
+
+  StringRef FooName("_Z3fooi");
+  FunctionSamples FooSamples;
+  Result = FooSamples.addTotalSamples(1);
+  ASSERT_EQ(Result, sampleprof_error::success);
+
+  Result = FooSamples.addHeadSamples(1);
+  ASSERT_EQ(Result, sampleprof_error::success);
+
+  Result = FooSamples.addBodySamples(10, 0, 1);
+  ASSERT_EQ(Result, sampleprof_error::success);
+
+  Result = FooSamples.addTotalSamples(Max);
+  ASSERT_EQ(Result, sampleprof_error::counter_overflow);
+  ASSERT_EQ(FooSamples.getTotalSamples(), Max);
+
+  Result = FooSamples.addHeadSamples(Max);
+  ASSERT_EQ(Result, sampleprof_error::counter_overflow);
+  ASSERT_EQ(FooSamples.getHeadSamples(), Max);
+
+  Result = FooSamples.addBodySamples(10, 0, Max);
+  ASSERT_EQ(Result, sampleprof_error::counter_overflow);
+  ErrorOr<uint64_t> BodySamples = FooSamples.findSamplesAt(10, 0);
+  ASSERT_FALSE(BodySamples.getError());
+  ASSERT_EQ(BodySamples.get(), Max);
+}
+
+} // end anonymous namespace
diff --git a/unittests/Support/AlignOfTest.cpp b/unittests/Support/AlignOfTest.cpp
index e0859fc747f4..be208f7d28ea 100644
--- a/unittests/Support/AlignOfTest.cpp
+++ b/unittests/Support/AlignOfTest.cpp
@@ -89,6 +89,22 @@ V6::~V6() {}
 V7::~V7() {}
 V8::~V8() {}
 
+struct Abstract1 {
+  virtual ~Abstract1() {}
+  virtual void method() = 0;
+
+  char c;
+};
+
+struct Abstract2 : Abstract1 {
+  virtual ~Abstract2() {}
+  double d;
+};
+
+struct Final final : Abstract2 {
+  void method() override {}
+};
+
 // Ensure alignment is a compile-time constant.
 char LLVM_ATTRIBUTE_UNUSED test_arr1
   [AlignOf<char>::Alignment > 0]
@@ -174,6 +190,10 @@ TEST(AlignOfTest, BasicAlignmentInvariants) {
   EXPECT_LE(alignOf<V1>(),     alignOf<V6>());
   EXPECT_LE(alignOf<V1>(),     alignOf<V7>());
   EXPECT_LE(alignOf<V1>(),     alignOf<V8>());
+
+  EXPECT_LE(alignOf<char>(), alignOf<Abstract1>());
+  EXPECT_LE(alignOf<double>(), alignOf<Abstract2>());
+  EXPECT_LE(alignOf<Abstract2>(), alignOf<Final>());
 }
 
 TEST(AlignOfTest, BasicAlignedArray) {
diff --git a/unittests/Support/BlockFrequencyTest.cpp b/unittests/Support/BlockFrequencyTest.cpp
index 0eac8bb8811b..c1f5671815bd 100644
--- a/unittests/Support/BlockFrequencyTest.cpp
+++ b/unittests/Support/BlockFrequencyTest.cpp
@@ -19,7 +19,7 @@ namespace {
 
 TEST(BlockFrequencyTest, OneToZero) {
   BlockFrequency Freq(1);
-  BranchProbability Prob(UINT32_MAX - 1, UINT32_MAX);
+  BranchProbability Prob(UINT32_MAX / 3, UINT32_MAX);
   Freq *= Prob;
   EXPECT_EQ(Freq.getFrequency(), 0u);
 
@@ -54,11 +54,11 @@ TEST(BlockFrequencyTest, MaxToHalfMax) {
   BlockFrequency Freq(UINT64_MAX);
   BranchProbability Prob(UINT32_MAX / 2, UINT32_MAX);
   Freq *= Prob;
-  EXPECT_EQ(Freq.getFrequency(), 9223372034707292159ULL);
+  EXPECT_EQ(Freq.getFrequency(), 9223372036854775807ULL);
 
   Freq = BlockFrequency(UINT64_MAX);
   Freq *= Prob;
-  EXPECT_EQ(Freq.getFrequency(), 9223372034707292159ULL);
+  EXPECT_EQ(Freq.getFrequency(), 9223372036854775807ULL);
 }
 
 TEST(BlockFrequencyTest, BigToBig) {
@@ -88,6 +88,12 @@ TEST(BlockFrequencyTest, MaxToMax) {
   EXPECT_EQ(Freq.getFrequency(), UINT64_MAX);
 }
 
+TEST(BlockFrequencyTest, Subtract) {
+  BlockFrequency Freq1(0), Freq2(1);
+  EXPECT_EQ((Freq1 - Freq2).getFrequency(), 0u);
+  EXPECT_EQ((Freq2 - Freq1).getFrequency(), 1u);
+}
+
 TEST(BlockFrequency, Divide) {
   BlockFrequency Freq(0x3333333333333333ULL);
   Freq /= BranchProbability(1, 2);
@@ -97,18 +103,18 @@ TEST(BlockFrequency, Divide) {
 TEST(BlockFrequencyTest, Saturate) {
   BlockFrequency Freq(0x3333333333333333ULL);
   Freq /= BranchProbability(100, 300);
-  EXPECT_EQ(Freq.getFrequency(), 0x9999999999999999ULL);
+  EXPECT_EQ(Freq.getFrequency(), 0x9999999866666668ULL);
   Freq /= BranchProbability(1, 2);
   EXPECT_EQ(Freq.getFrequency(), UINT64_MAX);
 
   Freq = 0x1000000000000000ULL;
-  Freq /= BranchProbability(10000, 160000);
+  Freq /= BranchProbability(10000, 170000);
   EXPECT_EQ(Freq.getFrequency(), UINT64_MAX);
 
   // Try to cheat the multiplication overflow check.
   Freq = 0x00000001f0000001ull;
   Freq /= BranchProbability(1000, 0xf000000f);
-  EXPECT_EQ(33506781356485509ULL, Freq.getFrequency());
+  EXPECT_EQ(33527736066704712ULL, Freq.getFrequency());
 }
 
 TEST(BlockFrequencyTest, SaturatingRightShift) {
diff --git a/unittests/Support/BranchProbabilityTest.cpp b/unittests/Support/BranchProbabilityTest.cpp
index bbd4d4eba1c0..f03b09b66e81 100644
--- a/unittests/Support/BranchProbabilityTest.cpp
+++ b/unittests/Support/BranchProbabilityTest.cpp
@@ -14,7 +14,7 @@
 using namespace llvm;
 
 namespace llvm {
-void PrintTo(const BranchProbability &P, ::std::ostream *os) {
+void PrintTo(BranchProbability P, ::std::ostream *os) {
   *os << P.getNumerator() << "/" << P.getDenominator();
 }
 }
@@ -22,12 +22,12 @@ namespace {
 
 typedef BranchProbability BP;
 TEST(BranchProbabilityTest, Accessors) {
-  EXPECT_EQ(1u, BP(1, 7).getNumerator());
-  EXPECT_EQ(7u, BP(1, 7).getDenominator());
+  EXPECT_EQ(306783378u, BP(1, 7).getNumerator());
+  EXPECT_EQ(1u << 31, BP(1, 7).getDenominator());
   EXPECT_EQ(0u, BP::getZero().getNumerator());
-  EXPECT_EQ(1u, BP::getZero().getDenominator());
-  EXPECT_EQ(1u, BP::getOne().getNumerator());
-  EXPECT_EQ(1u, BP::getOne().getDenominator());
+  EXPECT_EQ(1u << 31, BP::getZero().getDenominator());
+  EXPECT_EQ(1u << 31, BP::getOne().getNumerator());
+  EXPECT_EQ(1u << 31, BP::getOne().getDenominator());
 }
 
 TEST(BranchProbabilityTest, Operators) {
@@ -78,6 +78,12 @@ TEST(BranchProbabilityTest, Operators) {
   EXPECT_FALSE(BP(1, 7) != BP(2, 14));
   EXPECT_TRUE(BP(4, 7) != BP(1, 2));
   EXPECT_TRUE(BP(4, 7) != BP(3, 7));
+
+  EXPECT_TRUE(BP(1, 7) == BP(2, 14));
+  EXPECT_TRUE(BP(1, 7) == BP(3, 21));
+  EXPECT_TRUE(BP(5, 7) == BP(25, 35));
+  EXPECT_TRUE(BP(99999998, 100000000) < BP(99999999, 100000000));
+  EXPECT_TRUE(BP(4, 8) == BP(400000000, 800000000));
 }
 
 TEST(BranchProbabilityTest, MoreOperators) {
@@ -139,10 +145,10 @@ TEST(BranchProbabilityTest, scale) {
   // Big fractions.
   EXPECT_EQ(1u, BP(Two31, UINT32_MAX).scale(2));
   EXPECT_EQ(Two31, BP(Two31, UINT32_MAX).scale(Two31 * 2));
-  EXPECT_EQ(Two63 + Two31, BP(Two31, UINT32_MAX).scale(UINT64_MAX));
+  EXPECT_EQ(9223372036854775807ULL, BP(Two31, UINT32_MAX).scale(UINT64_MAX));
 
   // High precision.
-  EXPECT_EQ(UINT64_C(9223372047592194055),
+  EXPECT_EQ(UINT64_C(9223372045444710399),
             BP(Two31 + 1, UINT32_MAX - 2).scale(UINT64_MAX));
 }
 
@@ -155,11 +161,13 @@ TEST(BranchProbabilityTest, scaleByInverse) {
   EXPECT_EQ(0u, BP(1, 1).scaleByInverse(0));
   EXPECT_EQ(0u, BP(7, 7).scaleByInverse(0));
 
+  auto MAX_DENOMINATOR = BP::getDenominator();
+
   // Divide by something very small.
   EXPECT_EQ(UINT64_MAX, BP(1, UINT32_MAX).scaleByInverse(UINT64_MAX));
-  EXPECT_EQ(uint64_t(UINT32_MAX) * UINT32_MAX,
-            BP(1, UINT32_MAX).scaleByInverse(UINT32_MAX));
-  EXPECT_EQ(UINT32_MAX, BP(1, UINT32_MAX).scaleByInverse(1));
+  EXPECT_EQ(uint64_t(UINT32_MAX) * MAX_DENOMINATOR,
+            BP(1, MAX_DENOMINATOR).scaleByInverse(UINT32_MAX));
+  EXPECT_EQ(MAX_DENOMINATOR, BP(1, MAX_DENOMINATOR).scaleByInverse(1));
 
   auto Two63 = UINT64_C(1) << 63;
   auto Two31 = UINT64_C(1) << 31;
@@ -169,18 +177,18 @@ TEST(BranchProbabilityTest, scaleByInverse) {
   EXPECT_EQ(UINT64_MAX, BP(1, 2).scaleByInverse(Two63));
 
   // Big fractions.
-  EXPECT_EQ(1u, BP(Two31, UINT32_MAX).scaleByInverse(1));
+  EXPECT_EQ(2u, BP(Two31, UINT32_MAX).scaleByInverse(1));
   EXPECT_EQ(2u, BP(Two31 - 1, UINT32_MAX).scaleByInverse(1));
-  EXPECT_EQ(Two31 * 2 - 1, BP(Two31, UINT32_MAX).scaleByInverse(Two31));
-  EXPECT_EQ(Two31 * 2 + 1, BP(Two31 - 1, UINT32_MAX).scaleByInverse(Two31));
+  EXPECT_EQ(Two31 * 2, BP(Two31, UINT32_MAX).scaleByInverse(Two31));
+  EXPECT_EQ(Two31 * 2, BP(Two31 - 1, UINT32_MAX).scaleByInverse(Two31));
   EXPECT_EQ(UINT64_MAX, BP(Two31, UINT32_MAX).scaleByInverse(Two63 + Two31));
 
   // High precision.  The exact answers to these are close to the successors of
   // the floor.  If we were rounding, these would round up.
-  EXPECT_EQ(UINT64_C(18446744065119617030),
+  EXPECT_EQ(UINT64_C(18446744060824649767),
             BP(Two31 + 2, UINT32_MAX - 2)
-                .scaleByInverse(UINT64_C(9223372047592194055)));
-  EXPECT_EQ(UINT64_C(18446744065119617026),
+                .scaleByInverse(UINT64_C(9223372047592194056)));
+  EXPECT_EQ(UINT64_C(18446744060824649739),
             BP(Two31 + 1, UINT32_MAX).scaleByInverse(Two63 + Two31));
 }
 
@@ -191,87 +199,87 @@ TEST(BranchProbabilityTest, scaleBruteForce) {
     uint64_t Result;
   } Tests[] = {
     // Data for scaling that results in <= 64 bit division.
-    { 0x1423e2a50ULL, { 0x64819521, 0x7765dd13 }, 0x10f418889ULL },
-    { 0x35ef14ceULL, { 0x28ade3c7, 0x304532ae }, 0x2d73c33aULL },
-    { 0xd03dbfbe24ULL, { 0x790079, 0xe419f3 }, 0x6e776fc1fdULL },
-    { 0x21d67410bULL, { 0x302a9dc2, 0x3ddb4442 }, 0x1a5948fd6ULL },
+    { 0x1423e2a50ULL, { 0x64819521, 0x7765dd13 }, 0x10f418888ULL },
+    { 0x35ef14ceULL, { 0x28ade3c7, 0x304532ae }, 0x2d73c33bULL },
+    { 0xd03dbfbe24ULL, { 0x790079, 0xe419f3 }, 0x6e776fc2c4ULL },
+    { 0x21d67410bULL, { 0x302a9dc2, 0x3ddb4442 }, 0x1a5948fd4ULL },
     { 0x8664aeadULL, { 0x3d523513, 0x403523b1 }, 0x805a04cfULL },
-    { 0x201db0cf4ULL, { 0x35112a7b, 0x79fc0c74 }, 0xdf8b07f6ULL },
-    { 0x13f1e4430aULL, { 0x21c92bf, 0x21e63aae }, 0x13e0cba15ULL },
+    { 0x201db0cf4ULL, { 0x35112a7b, 0x79fc0c74 }, 0xdf8b07f8ULL },
+    { 0x13f1e4430aULL, { 0x21c92bf, 0x21e63aae }, 0x13e0cba26ULL },
     { 0x16c83229ULL, { 0x3793f66f, 0x53180dea }, 0xf3ce7b6ULL },
-    { 0xc62415be8ULL, { 0x9cc4a63, 0x4327ae9b }, 0x1ce8b71caULL },
+    { 0xc62415be8ULL, { 0x9cc4a63, 0x4327ae9b }, 0x1ce8b71c1ULL },
     { 0x6fac5e434ULL, { 0xe5f9170, 0x1115e10b }, 0x5df23dd4cULL },
-    { 0x1929375f2ULL, { 0x3a851375, 0x76c08456 }, 0xc662b082ULL },
-    { 0x243c89db6ULL, { 0x354ebfc0, 0x450ef197 }, 0x1bf8c1661ULL },
+    { 0x1929375f2ULL, { 0x3a851375, 0x76c08456 }, 0xc662b083ULL },
+    { 0x243c89db6ULL, { 0x354ebfc0, 0x450ef197 }, 0x1bf8c1663ULL },
     { 0x310e9b31aULL, { 0x1b1b8acf, 0x2d3629f0 }, 0x1d69c93f9ULL },
-    { 0xa1fae921dULL, { 0xa7a098c, 0x10469f44 }, 0x684413d6cULL },
-    { 0xc1582d957ULL, { 0x498e061, 0x59856bc }, 0x9edc5f4e7ULL },
+    { 0xa1fae921dULL, { 0xa7a098c, 0x10469f44 }, 0x684413d6eULL },
+    { 0xc1582d957ULL, { 0x498e061, 0x59856bc }, 0x9edc5f4ecULL },
     { 0x57cfee75ULL, { 0x1d061dc3, 0x7c8bfc17 }, 0x1476a220ULL },
-    { 0x139220080ULL, { 0x294a6c71, 0x2a2b07c9 }, 0x1329e1c76ULL },
-    { 0x1665d353cULL, { 0x7080db5, 0xde0d75c }, 0xb590d9fbULL },
+    { 0x139220080ULL, { 0x294a6c71, 0x2a2b07c9 }, 0x1329e1c75ULL },
+    { 0x1665d353cULL, { 0x7080db5, 0xde0d75c }, 0xb590d9faULL },
     { 0xe8f14541ULL, { 0x5188e8b2, 0x736527ef }, 0xa4971be5ULL },
     { 0x2f4775f29ULL, { 0x254ef0fe, 0x435fcf50 }, 0x1a2e449c1ULL },
-    { 0x27b85d8d7ULL, { 0x304c8220, 0x5de678f2 }, 0x146e3bef9ULL },
-    { 0x1d362e36bULL, { 0x36c85b12, 0x37a66f55 }, 0x1cc19b8e6ULL },
-    { 0x155fd48c7ULL, { 0xf5894d, 0x1256108 }, 0x11e383602ULL },
+    { 0x27b85d8d7ULL, { 0x304c8220, 0x5de678f2 }, 0x146e3befbULL },
+    { 0x1d362e36bULL, { 0x36c85b12, 0x37a66f55 }, 0x1cc19b8e7ULL },
+    { 0x155fd48c7ULL, { 0xf5894d, 0x1256108 }, 0x11e383604ULL },
     { 0xb5db2d15ULL, { 0x39bb26c5, 0x5bdcda3e }, 0x72499259ULL },
-    { 0x153990298ULL, { 0x48921c09, 0x706eb817 }, 0xdb3268e8ULL },
-    { 0x28a7c3ed7ULL, { 0x1f776fd7, 0x349f7a70 }, 0x184f73ae1ULL },
+    { 0x153990298ULL, { 0x48921c09, 0x706eb817 }, 0xdb3268e7ULL },
+    { 0x28a7c3ed7ULL, { 0x1f776fd7, 0x349f7a70 }, 0x184f73ae2ULL },
     { 0x724dbeabULL, { 0x1bd149f5, 0x253a085e }, 0x5569c0b3ULL },
-    { 0xd8f0c513ULL, { 0x18c8cc4c, 0x1b72bad0 }, 0xc3e30643ULL },
+    { 0xd8f0c513ULL, { 0x18c8cc4c, 0x1b72bad0 }, 0xc3e30642ULL },
     { 0x17ce3dcbULL, { 0x1e4c6260, 0x233b359e }, 0x1478f4afULL },
-    { 0x1ce036ce0ULL, { 0x29e3c8af, 0x5318dd4a }, 0xe8e76196ULL },
+    { 0x1ce036ce0ULL, { 0x29e3c8af, 0x5318dd4a }, 0xe8e76195ULL },
     { 0x1473ae2aULL, { 0x29b897ba, 0x2be29378 }, 0x13718185ULL },
     { 0x1dd41aa68ULL, { 0x3d0a4441, 0x5a0e8f12 }, 0x1437b6bbfULL },
     { 0x1b49e4a53ULL, { 0x3430c1fe, 0x5a204aed }, 0xfcd6852fULL },
     { 0x217941b19ULL, { 0x12ced2bd, 0x21b68310 }, 0x12aca65b1ULL },
     { 0xac6a4dc8ULL, { 0x3ed68da8, 0x6fdca34c }, 0x60da926dULL },
-    { 0x1c503a4e7ULL, { 0xfcbbd32, 0x11e48d17 }, 0x18fec7d38ULL },
-    { 0x1c885855ULL, { 0x213e919d, 0x25941897 }, 0x193de743ULL },
-    { 0x29b9c168eULL, { 0x2b644aea, 0x45725ee7 }, 0x1a122e5d5ULL },
+    { 0x1c503a4e7ULL, { 0xfcbbd32, 0x11e48d17 }, 0x18fec7d37ULL },
+    { 0x1c885855ULL, { 0x213e919d, 0x25941897 }, 0x193de742ULL },
+    { 0x29b9c168eULL, { 0x2b644aea, 0x45725ee7 }, 0x1a122e5d4ULL },
     { 0x806a33f2ULL, { 0x30a80a23, 0x5063733a }, 0x4db9a264ULL },
     { 0x282afc96bULL, { 0x143ae554, 0x1a9863ff }, 0x1e8de5204ULL },
     // Data for scaling that results in > 64 bit division.
-    { 0x23ca5f2f672ca41cULL, { 0xecbc641, 0x111373f7 }, 0x1f0301e5e8295ab5ULL },
-    { 0x5e4f2468142265e3ULL, { 0x1ddf5837, 0x32189233 }, 0x383ca7ba9fdd2c8cULL },
-    { 0x277a1a6f6b266bf6ULL, { 0x415d81a8, 0x61eb5e1e }, 0x1a5a3e1d41b30c0fULL },
-    { 0x1bdbb49a237035cbULL, { 0xea5bf17, 0x1d25ffb3 }, 0xdffc51c53d44b93ULL },
-    { 0x2bce6d29b64fb8ULL, { 0x3bfd5631, 0x7525c9bb }, 0x166ebedda7ac57ULL },
-    { 0x3a02116103df5013ULL, { 0x2ee18a83, 0x3299aea8 }, 0x35be8922ab1e2a84ULL },
-    { 0x7b5762390799b18cULL, { 0x12f8e5b9, 0x2563bcd4 }, 0x3e960077aca01209ULL },
-    { 0x69cfd72537021579ULL, { 0x4c35f468, 0x6a40feee }, 0x4be4cb3848be98a3ULL },
-    { 0x49dfdf835120f1c1ULL, { 0x8cb3759, 0x559eb891 }, 0x79663f7120edadeULL },
-    { 0x74b5be5c27676381ULL, { 0x47e4c5e0, 0x7c7b19ff }, 0x4367d2dff36a1028ULL },
-    { 0x4f50f97075e7f431ULL, { 0x9a50a17, 0x11cd1185 }, 0x2af952b34c032df4ULL },
-    { 0x2f8b0d712e393be4ULL, { 0x1487e386, 0x15aa356e }, 0x2d0df36478a776aaULL },
-    { 0x224c1c75999d3deULL, { 0x3b2df0ea, 0x4523b100 }, 0x1d5b481d145f08aULL },
-    { 0x2bcbcea22a399a76ULL, { 0x28b58212, 0x48dd013e }, 0x187814d084c47cabULL },
-    { 0x1dbfca91257cb2d1ULL, { 0x1a8c04d9, 0x5e92502c }, 0x859cf7d00f77545ULL },
-    { 0x7f20039b57cda935ULL, { 0xeccf651, 0x323f476e }, 0x25720cd976461a77ULL },
-    { 0x40512c6a586aa087ULL, { 0x113b0423, 0x398c9eab }, 0x1341c03de8696a7eULL },
-    { 0x63d802693f050a11ULL, { 0xf50cdd6, 0xfce2a44 }, 0x60c0177bb5e46846ULL },
-    { 0x2d956b422838de77ULL, { 0xb2d345b, 0x1321e557 }, 0x1aa0ed16b6aa5319ULL },
-    { 0x5a1cdf0c1657bc91ULL, { 0x1d77bb0c, 0x1f991ff1 }, 0x54097ee94ff87560ULL },
-    { 0x3801b26d7e00176bULL, { 0xeed25da, 0x1a819d8b }, 0x1f89e96a3a639526ULL },
-    { 0x37655e74338e1e45ULL, { 0x300e170a, 0x5a1595fe }, 0x1d8cfb55fddc0441ULL },
-    { 0x7b38703f2a84e6ULL, { 0x66d9053, 0xc79b6b9 }, 0x3f7d4c91774094ULL },
-    { 0x2245063c0acb3215ULL, { 0x30ce2f5b, 0x610e7271 }, 0x113b916468389235ULL },
-    { 0x6bc195877b7b8a7eULL, { 0x392004aa, 0x4a24e60c }, 0x530594fb17db6ba5ULL },
-    { 0x40a3fde23c7b43dbULL, { 0x4e712195, 0x6553e56e }, 0x320a799bc76a466aULL },
-    { 0x1d3dfc2866fbccbaULL, { 0x5075b517, 0x5fc42245 }, 0x18917f0061595bc3ULL },
-    { 0x19aeb14045a61121ULL, { 0x1bf6edec, 0x707e2f4b }, 0x6626672a070bcc7ULL },
-    { 0x44ff90486c531e9fULL, { 0x66598a, 0x8a90dc }, 0x32f6f2b0525199b0ULL },
-    { 0x3f3e7121092c5bcbULL, { 0x1c754df7, 0x5951a1b9 }, 0x14267f50b7ef375dULL },
-    { 0x60e2dafb7e50a67eULL, { 0x4d96c66e, 0x65bd878d }, 0x49e31715ac393f8bULL },
-    { 0x656286667e0e6e29ULL, { 0x9d971a2, 0xacda23b }, 0x5c6ee315ead6cb4fULL },
-    { 0x1114e0974255d507ULL, { 0x1c693, 0x2d6ff }, 0xaae42e4b35f6e60ULL },
-    { 0x508c8baf3a70ff5aULL, { 0x3b26b779, 0x6ad78745 }, 0x2c98387636c4b365ULL },
-    { 0x5b47bc666bf1f9cfULL, { 0x10a87ed6, 0x187d358a }, 0x3e1767155848368bULL },
-    { 0x50954e3744460395ULL, { 0x7a42263, 0xcdaa048 }, 0x2fe739f0aee1fee1ULL },
-    { 0x20020b406550dd8fULL, { 0x3318539, 0x42eead0 }, 0x186f326325fa346bULL },
-    { 0x5bcb0b872439ffd5ULL, { 0x6f61fb2, 0x9af7344 }, 0x41fa1e3bec3c1b30ULL },
-    { 0x7a670f365db87a53ULL, { 0x417e102, 0x3bb54c67 }, 0x8642a558304fd9eULL },
-    { 0x1ef0db1e7bab1cd0ULL, { 0x2b60cf38, 0x4188f78f }, 0x147ae0d6226b2ee6ULL }
+    { 0x23ca5f2f672ca41cULL, { 0xecbc641, 0x111373f7 }, 0x1f0301e5c76869c6ULL },
+    { 0x5e4f2468142265e3ULL, { 0x1ddf5837, 0x32189233 }, 0x383ca7bad6053ac9ULL },
+    { 0x277a1a6f6b266bf6ULL, { 0x415d81a8, 0x61eb5e1e }, 0x1a5a3e1d1c9e8540ULL },
+    { 0x1bdbb49a237035cbULL, { 0xea5bf17, 0x1d25ffb3 }, 0xdffc51c5cb51cf1ULL },
+    { 0x2bce6d29b64fb8ULL, { 0x3bfd5631, 0x7525c9bb }, 0x166ebedd9581fdULL },
+    { 0x3a02116103df5013ULL, { 0x2ee18a83, 0x3299aea8 }, 0x35be89227276f105ULL },
+    { 0x7b5762390799b18cULL, { 0x12f8e5b9, 0x2563bcd4 }, 0x3e960077695655a3ULL },
+    { 0x69cfd72537021579ULL, { 0x4c35f468, 0x6a40feee }, 0x4be4cb38695a4f30ULL },
+    { 0x49dfdf835120f1c1ULL, { 0x8cb3759, 0x559eb891 }, 0x79663f6e3c8d8f6ULL },
+    { 0x74b5be5c27676381ULL, { 0x47e4c5e0, 0x7c7b19ff }, 0x4367d2dfb22b3265ULL },
+    { 0x4f50f97075e7f431ULL, { 0x9a50a17, 0x11cd1185 }, 0x2af952b30374f382ULL },
+    { 0x2f8b0d712e393be4ULL, { 0x1487e386, 0x15aa356e }, 0x2d0df3649b2b19fcULL },
+    { 0x224c1c75999d3deULL, { 0x3b2df0ea, 0x4523b100 }, 0x1d5b481d160dd8bULL },
+    { 0x2bcbcea22a399a76ULL, { 0x28b58212, 0x48dd013e }, 0x187814d0610c8a56ULL },
+    { 0x1dbfca91257cb2d1ULL, { 0x1a8c04d9, 0x5e92502c }, 0x859cf7d19e83ad0ULL },
+    { 0x7f20039b57cda935ULL, { 0xeccf651, 0x323f476e }, 0x25720cd9054634bdULL },
+    { 0x40512c6a586aa087ULL, { 0x113b0423, 0x398c9eab }, 0x1341c03dbb662054ULL },
+    { 0x63d802693f050a11ULL, { 0xf50cdd6, 0xfce2a44 }, 0x60c0177b667a4feaULL },
+    { 0x2d956b422838de77ULL, { 0xb2d345b, 0x1321e557 }, 0x1aa0ed16b094575cULL },
+    { 0x5a1cdf0c1657bc91ULL, { 0x1d77bb0c, 0x1f991ff1 }, 0x54097ee9907290eaULL },
+    { 0x3801b26d7e00176bULL, { 0xeed25da, 0x1a819d8b }, 0x1f89e96a616b9abeULL },
+    { 0x37655e74338e1e45ULL, { 0x300e170a, 0x5a1595fe }, 0x1d8cfb55ff6a6dbcULL },
+    { 0x7b38703f2a84e6ULL, { 0x66d9053, 0xc79b6b9 }, 0x3f7d4c91b9afb9ULL },
+    { 0x2245063c0acb3215ULL, { 0x30ce2f5b, 0x610e7271 }, 0x113b916455fe2560ULL },
+    { 0x6bc195877b7b8a7eULL, { 0x392004aa, 0x4a24e60c }, 0x530594fabfc81cc3ULL },
+    { 0x40a3fde23c7b43dbULL, { 0x4e712195, 0x6553e56e }, 0x320a799bc205c78dULL },
+    { 0x1d3dfc2866fbccbaULL, { 0x5075b517, 0x5fc42245 }, 0x18917f00745cb781ULL },
+    { 0x19aeb14045a61121ULL, { 0x1bf6edec, 0x707e2f4b }, 0x6626672aa2ba10aULL },
+    { 0x44ff90486c531e9fULL, { 0x66598a, 0x8a90dc }, 0x32f6f2b097001598ULL },
+    { 0x3f3e7121092c5bcbULL, { 0x1c754df7, 0x5951a1b9 }, 0x14267f50d4971583ULL },
+    { 0x60e2dafb7e50a67eULL, { 0x4d96c66e, 0x65bd878d }, 0x49e317155d75e883ULL },
+    { 0x656286667e0e6e29ULL, { 0x9d971a2, 0xacda23b }, 0x5c6ee3159e1deac3ULL },
+    { 0x1114e0974255d507ULL, { 0x1c693, 0x2d6ff }, 0xaae42e4be5f9f8dULL },
+    { 0x508c8baf3a70ff5aULL, { 0x3b26b779, 0x6ad78745 }, 0x2c983876178ed5b1ULL },
+    { 0x5b47bc666bf1f9cfULL, { 0x10a87ed6, 0x187d358a }, 0x3e1767153bea720aULL },
+    { 0x50954e3744460395ULL, { 0x7a42263, 0xcdaa048 }, 0x2fe739f0944a023cULL },
+    { 0x20020b406550dd8fULL, { 0x3318539, 0x42eead0 }, 0x186f326307c0d985ULL },
+    { 0x5bcb0b872439ffd5ULL, { 0x6f61fb2, 0x9af7344 }, 0x41fa1e3c47f0f80dULL },
+    { 0x7a670f365db87a53ULL, { 0x417e102, 0x3bb54c67 }, 0x8642a551d0f41b0ULL },
+    { 0x1ef0db1e7bab1cd0ULL, { 0x2b60cf38, 0x4188f78f }, 0x147ae0d63fc0575aULL }
   };
 
   for (const auto &T : Tests) {
@@ -279,4 +287,72 @@ TEST(BranchProbabilityTest, scaleBruteForce) {
   }
 }
 
+TEST(BranchProbabilityTest, NormalizeProbabilities) {
+  const auto UnknownProb = BranchProbability::getUnknown();
+  {
+    SmallVector<BranchProbability, 2> Probs{{0, 1}, {0, 1}};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{0, 1}, {1, 1}};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(0u, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator(), Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{1, 100}, {1, 100}};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{1, 1}, {1, 1}};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 3> Probs{{1, 1}, {1, 1}, {1, 1}};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[1].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[2].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{0, 1}, UnknownProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(0U, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator(), Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{1, 1}, UnknownProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator(), Probs[0].getNumerator());
+    EXPECT_EQ(0U, Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 2> Probs{{1, 2}, UnknownProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 2, Probs[1].getNumerator());
+  }
+  {
+    SmallVector<BranchProbability, 4> Probs{
+        {1, 2}, {1, 2}, {1, 2}, UnknownProb};
+    BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[0].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[1].getNumerator());
+    EXPECT_EQ(BranchProbability::getDenominator() / 3 + 1,
+              Probs[2].getNumerator());
+    EXPECT_EQ(0U, Probs[3].getNumerator());
+  }
+}
+
 }
diff --git a/unittests/Support/CMakeLists.txt b/unittests/Support/CMakeLists.txt
index 564d189d49c2..3ab98d58d5f0 100644
--- a/unittests/Support/CMakeLists.txt
+++ b/unittests/Support/CMakeLists.txt
@@ -32,6 +32,7 @@ add_llvm_unittest(SupportTests
   ProcessTest.cpp
   ProgramTest.cpp
   RegexTest.cpp
+  ReplaceFileTest.cpp
   ScaledNumberTest.cpp
   SourceMgrTest.cpp
   SpecialCaseListTest.cpp
@@ -40,7 +41,10 @@ add_llvm_unittest(SupportTests
   SwapByteOrderTest.cpp
   TargetRegistry.cpp
   ThreadLocalTest.cpp
+  ThreadPool.cpp
+  TimerTest.cpp
   TimeValueTest.cpp
+  TrailingObjectsTest.cpp
   UnicodeTest.cpp
   YAMLIOTest.cpp
   YAMLParserTest.cpp
diff --git a/unittests/Support/CommandLineTest.cpp b/unittests/Support/CommandLineTest.cpp
index e0fbf5b09e57..eac669f467b7 100644
--- a/unittests/Support/CommandLineTest.cpp
+++ b/unittests/Support/CommandLineTest.cpp
@@ -94,16 +94,16 @@ TEST(CommandLineTest, ModifyExisitingOption) {
     "Failed to modify option's option category.";
 
   Retrieved->setDescription(Description);
-  ASSERT_STREQ(Retrieved->HelpStr, Description) <<
-    "Changing option description failed.";
+  ASSERT_STREQ(Retrieved->HelpStr.data(), Description)
+      << "Changing option description failed.";
 
   Retrieved->setArgStr(ArgString);
-  ASSERT_STREQ(ArgString, Retrieved->ArgStr) <<
-    "Failed to modify option's Argument string.";
+  ASSERT_STREQ(ArgString, Retrieved->ArgStr.data())
+      << "Failed to modify option's Argument string.";
 
   Retrieved->setValueStr(ValueString);
-  ASSERT_STREQ(Retrieved->ValueStr, ValueString) <<
-    "Failed to modify option's Value string.";
+  ASSERT_STREQ(Retrieved->ValueStr.data(), ValueString)
+      << "Failed to modify option's Value string.";
 
   Retrieved->setHiddenFlag(cl::Hidden);
   ASSERT_EQ(cl::Hidden, TestOption.getOptionHiddenFlag()) <<
@@ -155,7 +155,7 @@ void testCommandLineTokenizer(ParserFunction *parse, const char *Input,
                               const char *const Output[], size_t OutputSize) {
   SmallVector<const char *, 0> Actual;
   BumpPtrAllocator A;
-  BumpPtrStringSaver Saver(A);
+  StringSaver Saver(A);
   parse(Input, Saver, Actual, /*MarkEOLs=*/false);
   EXPECT_EQ(OutputSize, Actual.size());
   for (unsigned I = 0, E = Actual.size(); I != E; ++I) {
diff --git a/unittests/Support/EndianTest.cpp b/unittests/Support/EndianTest.cpp
index 8f9355306383..c2b5572e574e 100644
--- a/unittests/Support/EndianTest.cpp
+++ b/unittests/Support/EndianTest.cpp
@@ -32,6 +32,138 @@ TEST(Endian, Read) {
             (endian::read<int32_t, little, unaligned>(littleval + 1)));
 }
 
+TEST(Endian, ReadBitAligned) {
+  // Simple test to make sure we properly pull out the 0x0 word.
+  unsigned char littleval[] = {0x3f, 0x00, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff};
+  unsigned char bigval[] = {0x00, 0x00, 0x00, 0x3f, 0xff, 0xff, 0xff, 0xc0};
+  EXPECT_EQ(
+      (endian::readAtBitAlignment<int, little, unaligned>(&littleval[0], 6)),
+      0x0);
+  EXPECT_EQ((endian::readAtBitAlignment<int, big, unaligned>(&bigval[0], 6)),
+            0x0);
+  // Test to make sure that signed right shift of 0xf0000000 is masked
+  // properly.
+  unsigned char littleval2[] = {0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x00};
+  unsigned char bigval2[] = {0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+  EXPECT_EQ(
+      (endian::readAtBitAlignment<int, little, unaligned>(&littleval2[0], 4)),
+      0x0f000000);
+  EXPECT_EQ((endian::readAtBitAlignment<int, big, unaligned>(&bigval2[0], 4)),
+            0x0f000000);
+  // Test to make sure left shift of start bit doesn't overflow.
+  EXPECT_EQ(
+      (endian::readAtBitAlignment<int, little, unaligned>(&littleval2[0], 1)),
+      0x78000000);
+  EXPECT_EQ((endian::readAtBitAlignment<int, big, unaligned>(&bigval2[0], 1)),
+            0x78000000);
+  // Test to make sure 64-bit int doesn't overflow.
+  unsigned char littleval3[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0,
+                                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+  unsigned char bigval3[] = {0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+                             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+  EXPECT_EQ((endian::readAtBitAlignment<int64_t, little, unaligned>(
+                &littleval3[0], 4)),
+            0x0f00000000000000);
+  EXPECT_EQ(
+      (endian::readAtBitAlignment<int64_t, big, unaligned>(&bigval3[0], 4)),
+      0x0f00000000000000);
+}
+
+TEST(Endian, WriteBitAligned) {
+  // This test ensures that signed right shift of 0xffffaa is masked
+  // properly.
+  unsigned char bigval[8] = {0x00};
+  endian::writeAtBitAlignment<int32_t, big, unaligned>(bigval, (int)0xffffaaaa,
+                                                       4);
+  EXPECT_EQ(bigval[0], 0xff);
+  EXPECT_EQ(bigval[1], 0xfa);
+  EXPECT_EQ(bigval[2], 0xaa);
+  EXPECT_EQ(bigval[3], 0xa0);
+  EXPECT_EQ(bigval[4], 0x00);
+  EXPECT_EQ(bigval[5], 0x00);
+  EXPECT_EQ(bigval[6], 0x00);
+  EXPECT_EQ(bigval[7], 0x0f);
+
+  unsigned char littleval[8] = {0x00};
+  endian::writeAtBitAlignment<int32_t, little, unaligned>(littleval,
+                                                          (int)0xffffaaaa, 4);
+  EXPECT_EQ(littleval[0], 0xa0);
+  EXPECT_EQ(littleval[1], 0xaa);
+  EXPECT_EQ(littleval[2], 0xfa);
+  EXPECT_EQ(littleval[3], 0xff);
+  EXPECT_EQ(littleval[4], 0x0f);
+  EXPECT_EQ(littleval[5], 0x00);
+  EXPECT_EQ(littleval[6], 0x00);
+  EXPECT_EQ(littleval[7], 0x00);
+
+  // This test makes sure 1<<31 doesn't overflow.
+  // Test to make sure left shift of start bit doesn't overflow.
+  unsigned char bigval2[8] = {0x00};
+  endian::writeAtBitAlignment<int32_t, big, unaligned>(bigval2, (int)0xffffffff,
+                                                       1);
+  EXPECT_EQ(bigval2[0], 0xff);
+  EXPECT_EQ(bigval2[1], 0xff);
+  EXPECT_EQ(bigval2[2], 0xff);
+  EXPECT_EQ(bigval2[3], 0xfe);
+  EXPECT_EQ(bigval2[4], 0x00);
+  EXPECT_EQ(bigval2[5], 0x00);
+  EXPECT_EQ(bigval2[6], 0x00);
+  EXPECT_EQ(bigval2[7], 0x01);
+
+  unsigned char littleval2[8] = {0x00};
+  endian::writeAtBitAlignment<int32_t, little, unaligned>(littleval2,
+                                                          (int)0xffffffff, 1);
+  EXPECT_EQ(littleval2[0], 0xfe);
+  EXPECT_EQ(littleval2[1], 0xff);
+  EXPECT_EQ(littleval2[2], 0xff);
+  EXPECT_EQ(littleval2[3], 0xff);
+  EXPECT_EQ(littleval2[4], 0x01);
+  EXPECT_EQ(littleval2[5], 0x00);
+  EXPECT_EQ(littleval2[6], 0x00);
+  EXPECT_EQ(littleval2[7], 0x00);
+
+  // Test to make sure 64-bit int doesn't overflow.
+  unsigned char bigval64[16] = {0x00};
+  endian::writeAtBitAlignment<int64_t, big, unaligned>(
+      bigval64, (int64_t)0xffffffffffffffff, 1);
+  EXPECT_EQ(bigval64[0], 0xff);
+  EXPECT_EQ(bigval64[1], 0xff);
+  EXPECT_EQ(bigval64[2], 0xff);
+  EXPECT_EQ(bigval64[3], 0xff);
+  EXPECT_EQ(bigval64[4], 0xff);
+  EXPECT_EQ(bigval64[5], 0xff);
+  EXPECT_EQ(bigval64[6], 0xff);
+  EXPECT_EQ(bigval64[7], 0xfe);
+  EXPECT_EQ(bigval64[8], 0x00);
+  EXPECT_EQ(bigval64[9], 0x00);
+  EXPECT_EQ(bigval64[10], 0x00);
+  EXPECT_EQ(bigval64[11], 0x00);
+  EXPECT_EQ(bigval64[12], 0x00);
+  EXPECT_EQ(bigval64[13], 0x00);
+  EXPECT_EQ(bigval64[14], 0x00);
+  EXPECT_EQ(bigval64[15], 0x01);
+
+  unsigned char littleval64[16] = {0x00};
+  endian::writeAtBitAlignment<int64_t, little, unaligned>(
+      littleval64, (int64_t)0xffffffffffffffff, 1);
+  EXPECT_EQ(littleval64[0], 0xfe);
+  EXPECT_EQ(littleval64[1], 0xff);
+  EXPECT_EQ(littleval64[2], 0xff);
+  EXPECT_EQ(littleval64[3], 0xff);
+  EXPECT_EQ(littleval64[4], 0xff);
+  EXPECT_EQ(littleval64[5], 0xff);
+  EXPECT_EQ(littleval64[6], 0xff);
+  EXPECT_EQ(littleval64[7], 0xff);
+  EXPECT_EQ(littleval64[8], 0x01);
+  EXPECT_EQ(littleval64[9], 0x00);
+  EXPECT_EQ(littleval64[10], 0x00);
+  EXPECT_EQ(littleval64[11], 0x00);
+  EXPECT_EQ(littleval64[12], 0x00);
+  EXPECT_EQ(littleval64[13], 0x00);
+  EXPECT_EQ(littleval64[14], 0x00);
+  EXPECT_EQ(littleval64[15], 0x00);
+}
+
 TEST(Endian, Write) {
   unsigned char data[5];
   endian::write<int32_t, big, unaligned>(data, -1362446643);
diff --git a/unittests/Support/FileOutputBufferTest.cpp b/unittests/Support/FileOutputBufferTest.cpp
index c7e10066b4ad..090c476e35cf 100644
--- a/unittests/Support/FileOutputBufferTest.cpp
+++ b/unittests/Support/FileOutputBufferTest.cpp
@@ -39,8 +39,10 @@ TEST(FileOutputBuffer, Test) {
   SmallString<128> File1(TestDirectory);
 	File1.append("/file1");
   {
-    std::unique_ptr<FileOutputBuffer> Buffer;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File1, 8192, Buffer));
+    ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+        FileOutputBuffer::create(File1, 8192);
+    ASSERT_NO_ERROR(BufferOrErr.getError());
+    std::unique_ptr<FileOutputBuffer> &Buffer = *BufferOrErr;
     // Start buffer with special header.
     memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
     // Write to end of buffer to verify it is writable.
@@ -59,8 +61,10 @@ TEST(FileOutputBuffer, Test) {
   SmallString<128> File2(TestDirectory);
 	File2.append("/file2");
   {
-    std::unique_ptr<FileOutputBuffer> Buffer2;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File2, 8192, Buffer2));
+    ErrorOr<std::unique_ptr<FileOutputBuffer>> Buffer2OrErr =
+        FileOutputBuffer::create(File2, 8192);
+    ASSERT_NO_ERROR(Buffer2OrErr.getError());
+    std::unique_ptr<FileOutputBuffer> &Buffer2 = *Buffer2OrErr;
     // Fill buffer with special header.
     memcpy(Buffer2->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
     // Do *not* commit buffer.
@@ -74,8 +78,10 @@ TEST(FileOutputBuffer, Test) {
   SmallString<128> File3(TestDirectory);
 	File3.append("/file3");
   {
-    std::unique_ptr<FileOutputBuffer> Buffer;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File3, 8192000, Buffer));
+    ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+        FileOutputBuffer::create(File3, 8192000);
+    ASSERT_NO_ERROR(BufferOrErr.getError());
+    std::unique_ptr<FileOutputBuffer> &Buffer = *BufferOrErr;
     // Start buffer with special header.
     memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
     // Write to end of buffer to verify it is writable.
@@ -93,9 +99,10 @@ TEST(FileOutputBuffer, Test) {
   SmallString<128> File4(TestDirectory);
 	File4.append("/file4");
   {
-    std::unique_ptr<FileOutputBuffer> Buffer;
-    ASSERT_NO_ERROR(FileOutputBuffer::create(File4, 8192, Buffer,
-                                              FileOutputBuffer::F_executable));
+    ErrorOr<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+        FileOutputBuffer::create(File4, 8192, FileOutputBuffer::F_executable);
+    ASSERT_NO_ERROR(BufferOrErr.getError());
+    std::unique_ptr<FileOutputBuffer> &Buffer = *BufferOrErr;
     // Start buffer with special header.
     memcpy(Buffer->getBufferStart(), "AABBCCDDEEFFGGHHIIJJ", 20);
     // Commit buffer.
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index 5c95b500dd4c..945d8322b259 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -183,6 +183,125 @@ TEST(MathExtras, RoundUpToAlignment) {
   EXPECT_EQ(8u, RoundUpToAlignment(5, 8));
   EXPECT_EQ(24u, RoundUpToAlignment(17, 8));
   EXPECT_EQ(0u, RoundUpToAlignment(~0LL, 8));
+
+  EXPECT_EQ(7u, RoundUpToAlignment(5, 8, 7));
+  EXPECT_EQ(17u, RoundUpToAlignment(17, 8, 1));
+  EXPECT_EQ(3u, RoundUpToAlignment(~0LL, 8, 3));
+  EXPECT_EQ(552u, RoundUpToAlignment(321, 255, 42));
+}
+
+template<typename T>
+void SaturatingAddTestHelper()
+{
+  const T Max = std::numeric_limits<T>::max();
+  bool ResultOverflowed;
+
+  EXPECT_EQ(T(3), SaturatingAdd(T(1), T(2)));
+  EXPECT_EQ(T(3), SaturatingAdd(T(1), T(2), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingAdd(Max, T(1)));
+  EXPECT_EQ(Max, SaturatingAdd(Max, T(1), &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingAdd(T(1), T(Max - 1)));
+  EXPECT_EQ(Max, SaturatingAdd(T(1), T(Max - 1), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingAdd(T(1), Max));
+  EXPECT_EQ(Max, SaturatingAdd(T(1), Max, &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingAdd(Max, Max));
+  EXPECT_EQ(Max, SaturatingAdd(Max, Max, &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+}
+
+TEST(MathExtras, SaturatingAdd) {
+  SaturatingAddTestHelper<uint8_t>();
+  SaturatingAddTestHelper<uint16_t>();
+  SaturatingAddTestHelper<uint32_t>();
+  SaturatingAddTestHelper<uint64_t>();
+}
+
+template<typename T>
+void SaturatingMultiplyTestHelper()
+{
+  const T Max = std::numeric_limits<T>::max();
+  bool ResultOverflowed;
+
+  // Test basic multiplication.
+  EXPECT_EQ(T(6), SaturatingMultiply(T(2), T(3)));
+  EXPECT_EQ(T(6), SaturatingMultiply(T(2), T(3), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(T(6), SaturatingMultiply(T(3), T(2)));
+  EXPECT_EQ(T(6), SaturatingMultiply(T(3), T(2), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  // Test multiplication by zero.
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), T(0)));
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), T(0), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(T(0), SaturatingMultiply(T(1), T(0)));
+  EXPECT_EQ(T(0), SaturatingMultiply(T(1), T(0), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), T(1)));
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), T(1), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(T(0), SaturatingMultiply(Max, T(0)));
+  EXPECT_EQ(T(0), SaturatingMultiply(Max, T(0), &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), Max));
+  EXPECT_EQ(T(0), SaturatingMultiply(T(0), Max, &ResultOverflowed));
+  EXPECT_FALSE(ResultOverflowed);
+
+  // Test multiplication by maximum value.
+  EXPECT_EQ(Max, SaturatingMultiply(Max, T(2)));
+  EXPECT_EQ(Max, SaturatingMultiply(Max, T(2), &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingMultiply(T(2), Max));
+  EXPECT_EQ(Max, SaturatingMultiply(T(2), Max, &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+
+  EXPECT_EQ(Max, SaturatingMultiply(Max, Max));
+  EXPECT_EQ(Max, SaturatingMultiply(Max, Max, &ResultOverflowed));
+  EXPECT_TRUE(ResultOverflowed);
+
+  // Test interesting boundary conditions for algorithm -
+  // ((1 << A) - 1) * ((1 << B) + K) for K in [-1, 0, 1]
+  // and A + B == std::numeric_limits<T>::digits.
+  // We expect overflow iff A > B and K = 1.
+  const int Digits = std::numeric_limits<T>::digits;
+  for (int A = 1, B = Digits - 1; B >= 1; ++A, --B) {
+    for (int K = -1; K <= 1; ++K) {
+      T X = (T(1) << A) - T(1);
+      T Y = (T(1) << B) + K;
+      bool OverflowExpected = A > B && K == 1;
+
+      if(OverflowExpected) {
+        EXPECT_EQ(Max, SaturatingMultiply(X, Y));
+        EXPECT_EQ(Max, SaturatingMultiply(X, Y, &ResultOverflowed));
+        EXPECT_TRUE(ResultOverflowed);
+      } else {
+        EXPECT_EQ(X * Y, SaturatingMultiply(X, Y));
+        EXPECT_EQ(X * Y, SaturatingMultiply(X, Y, &ResultOverflowed));
+        EXPECT_FALSE(ResultOverflowed);
+      }
+    }
+  }
+}
+
+TEST(MathExtras, SaturatingMultiply) {
+  SaturatingMultiplyTestHelper<uint8_t>();
+  SaturatingMultiplyTestHelper<uint16_t>();
+  SaturatingMultiplyTestHelper<uint32_t>();
+  SaturatingMultiplyTestHelper<uint64_t>();
 }
 
 }
diff --git a/unittests/Support/MemoryBufferTest.cpp b/unittests/Support/MemoryBufferTest.cpp
index ffb809aa207b..963dcd91c8b6 100644
--- a/unittests/Support/MemoryBufferTest.cpp
+++ b/unittests/Support/MemoryBufferTest.cpp
@@ -169,6 +169,13 @@ TEST_F(MemoryBufferTest, getOpenFileReopened) {
   testGetOpenFileSlice(true);
 }
 
+TEST_F(MemoryBufferTest, reference) {
+  OwningBuffer MB(MemoryBuffer::getMemBuffer(data));
+  MemoryBufferRef MBR(*MB);
+
+  EXPECT_EQ(MB->getBufferStart(), MBR.getBufferStart());
+  EXPECT_EQ(MB->getBufferIdentifier(), MBR.getBufferIdentifier());
+}
 
 TEST_F(MemoryBufferTest, slice) {
   // Create a file that is six pages long with different data on each page.
@@ -214,9 +221,5 @@ TEST_F(MemoryBufferTest, slice) {
   EXPECT_TRUE(BufData2.substr(0x17F8,8).equals("12345678"));
   EXPECT_TRUE(BufData2.substr(0x1800,8).equals("abcdefgh"));
   EXPECT_TRUE(BufData2.substr(0x2FF8,8).equals("abcdefgh"));
- 
 }
-
-
-
 }
diff --git a/unittests/Support/Path.cpp b/unittests/Support/Path.cpp
index 210b3a04cb21..3f626f87888a 100644
--- a/unittests/Support/Path.cpp
+++ b/unittests/Support/Path.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Path.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -20,6 +21,10 @@
 #include <winerror.h>
 #endif
 
+#ifdef LLVM_ON_UNIX
+#include <sys/stat.h>
+#endif
+
 using namespace llvm;
 using namespace llvm::sys;
 
@@ -146,6 +151,11 @@ TEST(Support, Path) {
 
     path::native(*i, temp_store);
   }
+
+  SmallString<32> Relative("foo.cpp");
+  ASSERT_NO_ERROR(sys::fs::make_absolute("/root", Relative));
+  Relative[5] = '/'; // Fix up windows paths.
+  ASSERT_EQ("/root/foo.cpp", Relative);
 }
 
 TEST(Support, RelativePathIterator) {
@@ -154,7 +164,7 @@ TEST(Support, RelativePathIterator) {
   PathComponents ExpectedPathComponents;
   PathComponents ActualPathComponents;
 
-  StringRef(Path).split(ExpectedPathComponents, "/");
+  StringRef(Path).split(ExpectedPathComponents, '/');
 
   for (path::const_iterator I = path::begin(Path), E = path::end(Path); I != E;
        ++I) {
@@ -174,7 +184,7 @@ TEST(Support, RelativePathDotIterator) {
   PathComponents ExpectedPathComponents;
   PathComponents ActualPathComponents;
 
-  StringRef(Path).split(ExpectedPathComponents, "/");
+  StringRef(Path).split(ExpectedPathComponents, '/');
 
   for (path::const_iterator I = path::begin(Path), E = path::end(Path); I != E;
        ++I) {
@@ -194,7 +204,7 @@ TEST(Support, AbsolutePathIterator) {
   PathComponents ExpectedPathComponents;
   PathComponents ActualPathComponents;
 
-  StringRef(Path).split(ExpectedPathComponents, "/");
+  StringRef(Path).split(ExpectedPathComponents, '/');
 
   // The root path will also be a component when iterating
   ExpectedPathComponents[0] = "/";
@@ -217,7 +227,7 @@ TEST(Support, AbsolutePathDotIterator) {
   PathComponents ExpectedPathComponents;
   PathComponents ActualPathComponents;
 
-  StringRef(Path).split(ExpectedPathComponents, "/");
+  StringRef(Path).split(ExpectedPathComponents, '/');
 
   // The root path will also be a component when iterating
   ExpectedPathComponents[0] = "/";
@@ -290,18 +300,127 @@ TEST(Support, AbsolutePathIteratorEnd) {
 }
 
 TEST(Support, HomeDirectory) {
-#ifdef LLVM_ON_UNIX
-  // This test only makes sense on Unix if $HOME is set.
-  if (::getenv("HOME")) {
-#endif
-    SmallString<128> HomeDir;
-    EXPECT_TRUE(path::home_directory(HomeDir));
-    EXPECT_FALSE(HomeDir.empty());
-#ifdef LLVM_ON_UNIX
+  std::string expected;
+#ifdef LLVM_ON_WIN32
+  if (wchar_t const *path = ::_wgetenv(L"USERPROFILE")) {
+    auto pathLen = ::wcslen(path);
+    ArrayRef<char> ref{reinterpret_cast<char const *>(path),
+                       pathLen * sizeof(wchar_t)};
+    convertUTF16ToUTF8String(ref, expected);
   }
+#else
+  if (char const *path = ::getenv("HOME"))
+    expected = path;
 #endif
+  // Do not try to test it if we don't know what to expect.
+  // On Windows we use something better than env vars.
+  if (!expected.empty()) {
+    SmallString<128> HomeDir;
+    auto status = path::home_directory(HomeDir);
+    EXPECT_TRUE(status);
+    EXPECT_EQ(expected, HomeDir);
+  }
 }
 
+TEST(Support, UserCacheDirectory) {
+  SmallString<13> CacheDir;
+  SmallString<20> CacheDir2;
+  auto Status = path::user_cache_directory(CacheDir, "");
+  EXPECT_TRUE(Status ^ CacheDir.empty());
+
+  if (Status) {
+    EXPECT_TRUE(path::user_cache_directory(CacheDir2, "")); // should succeed
+    EXPECT_EQ(CacheDir, CacheDir2); // and return same paths
+
+    EXPECT_TRUE(path::user_cache_directory(CacheDir, "A", "B", "file.c"));
+    auto It = path::rbegin(CacheDir);
+    EXPECT_EQ("file.c", *It);
+    EXPECT_EQ("B", *++It);
+    EXPECT_EQ("A", *++It);
+    auto ParentDir = *++It;
+
+    // Test Unicode: "<user_cache_dir>/(pi)r^2/aleth.0"
+    EXPECT_TRUE(path::user_cache_directory(CacheDir2, "\xCF\x80r\xC2\xB2",
+                                           "\xE2\x84\xB5.0"));
+    auto It2 = path::rbegin(CacheDir2);
+    EXPECT_EQ("\xE2\x84\xB5.0", *It2);
+    EXPECT_EQ("\xCF\x80r\xC2\xB2", *++It2);
+    auto ParentDir2 = *++It2;
+
+    EXPECT_EQ(ParentDir, ParentDir2);
+  }
+}
+
+TEST(Support, TempDirectory) {
+  SmallString<32> TempDir;
+  path::system_temp_directory(false, TempDir);
+  EXPECT_TRUE(!TempDir.empty());
+  TempDir.clear();
+  path::system_temp_directory(true, TempDir);
+  EXPECT_TRUE(!TempDir.empty());
+}
+
+#ifdef LLVM_ON_WIN32
+static std::string path2regex(std::string Path) {
+  size_t Pos = 0;
+  while ((Pos = Path.find('\\', Pos)) != std::string::npos) {
+    Path.replace(Pos, 1, "\\\\");
+    Pos += 2;
+  }
+  return Path;
+}
+
+/// Helper for running temp dir test in separated process. See below.
+#define EXPECT_TEMP_DIR(prepare, expected)                                     \
+  EXPECT_EXIT(                                                                 \
+      {                                                                        \
+        prepare;                                                               \
+        SmallString<300> TempDir;                                              \
+        path::system_temp_directory(true, TempDir);                            \
+        raw_os_ostream(std::cerr) << TempDir;                                  \
+        std::exit(0);                                                          \
+      },                                                                       \
+      ::testing::ExitedWithCode(0), path2regex(expected))
+
+TEST(SupportDeathTest, TempDirectoryOnWindows) {
+  // In this test we want to check how system_temp_directory responds to
+  // different values of specific env vars. To prevent corrupting env vars of
+  // the current process all checks are done in separated processes.
+  EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"C:\\OtherFolder"), "C:\\OtherFolder");
+  EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"C:/Unix/Path/Seperators"),
+                  "C:\\Unix\\Path\\Seperators");
+  EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"Local Path"), ".+\\Local Path$");
+  EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"F:\\TrailingSep\\"), "F:\\TrailingSep");
+  EXPECT_TEMP_DIR(
+      _wputenv_s(L"TMP", L"C:\\2\x03C0r-\x00B5\x00B3\\\x2135\x2080"),
+      "C:\\2\xCF\x80r-\xC2\xB5\xC2\xB3\\\xE2\x84\xB5\xE2\x82\x80");
+
+  // Test $TMP empty, $TEMP set.
+  EXPECT_TEMP_DIR(
+      {
+        _wputenv_s(L"TMP", L"");
+        _wputenv_s(L"TEMP", L"C:\\Valid\\Path");
+      },
+      "C:\\Valid\\Path");
+
+  // All related env vars empty
+  EXPECT_TEMP_DIR(
+  {
+    _wputenv_s(L"TMP", L"");
+    _wputenv_s(L"TEMP", L"");
+    _wputenv_s(L"USERPROFILE", L"");
+  },
+    "C:\\Temp");
+
+  // Test evn var / path with 260 chars.
+  SmallString<270> Expected{"C:\\Temp\\AB\\123456789"};
+  while (Expected.size() < 260)
+    Expected.append("\\DirNameWith19Charss");
+  ASSERT_EQ(260, Expected.size());
+  EXPECT_TEMP_DIR(_putenv_s("TMP", Expected.c_str()), Expected.c_str());
+}
+#endif
+
 class FileSystemTest : public testing::Test {
 protected:
   /// Unique temporary directory in which all created filesystem entities must
@@ -458,6 +577,26 @@ TEST_F(FileSystemTest, CreateDir) {
             errc::file_exists);
   ASSERT_NO_ERROR(fs::remove(Twine(TestDirectory) + "foo"));
 
+#ifdef LLVM_ON_UNIX
+  // Set a 0000 umask so that we can test our directory permissions.
+  mode_t OldUmask = ::umask(0000);
+
+  fs::file_status Status;
+  ASSERT_NO_ERROR(
+      fs::create_directory(Twine(TestDirectory) + "baz500", false,
+                           fs::perms::owner_read | fs::perms::owner_exe));
+  ASSERT_NO_ERROR(fs::status(Twine(TestDirectory) + "baz500", Status));
+  ASSERT_EQ(Status.permissions() & fs::perms::all_all,
+            fs::perms::owner_read | fs::perms::owner_exe);
+  ASSERT_NO_ERROR(fs::create_directory(Twine(TestDirectory) + "baz777", false,
+                                       fs::perms::all_all));
+  ASSERT_NO_ERROR(fs::status(Twine(TestDirectory) + "baz777", Status));
+  ASSERT_EQ(Status.permissions() & fs::perms::all_all, fs::perms::all_all);
+
+  // Restore umask to be safe.
+  ::umask(OldUmask);
+#endif
+
 #ifdef LLVM_ON_WIN32
   // Prove that create_directories() can handle a pathname > 248 characters,
   // which is the documented limit for CreateDirectory().
@@ -765,4 +904,47 @@ TEST(Support, NormalizePath) {
 
 #undef EXPECT_PATH_IS
 }
+
+TEST(Support, RemoveLeadingDotSlash) {
+  StringRef Path1("././/foolz/wat");
+  StringRef Path2("./////");
+
+  Path1 = path::remove_leading_dotslash(Path1);
+  EXPECT_EQ(Path1, "foolz/wat");
+  Path2 = path::remove_leading_dotslash(Path2);
+  EXPECT_EQ(Path2, "");
+}
+
+static std::string remove_dots(StringRef path,
+    bool remove_dot_dot) {
+  SmallString<256> buffer(path);
+  path::remove_dots(buffer, remove_dot_dot);
+  return buffer.str();
+}
+
+TEST(Support, RemoveDots) {
+#if defined(LLVM_ON_WIN32)
+  EXPECT_EQ("foolz\\wat", remove_dots(".\\.\\\\foolz\\wat", false));
+  EXPECT_EQ("", remove_dots(".\\\\\\\\\\", false));
+
+  EXPECT_EQ("a\\..\\b\\c", remove_dots(".\\a\\..\\b\\c", false));
+  EXPECT_EQ("b\\c", remove_dots(".\\a\\..\\b\\c", true));
+  EXPECT_EQ("c", remove_dots(".\\.\\c", true));
+
+  SmallString<64> Path1(".\\.\\c");
+  EXPECT_TRUE(path::remove_dots(Path1, true));
+  EXPECT_EQ("c", Path1);
+#else
+  EXPECT_EQ("foolz/wat", remove_dots("././/foolz/wat", false));
+  EXPECT_EQ("", remove_dots("./////", false));
+
+  EXPECT_EQ("a/../b/c", remove_dots("./a/../b/c", false));
+  EXPECT_EQ("b/c", remove_dots("./a/../b/c", true));
+  EXPECT_EQ("c", remove_dots("././c", true));
+
+  SmallString<64> Path1("././c");
+  EXPECT_TRUE(path::remove_dots(Path1, true));
+  EXPECT_EQ("c", Path1);
+#endif
+}
 } // anonymous namespace
diff --git a/unittests/Support/ProgramTest.cpp b/unittests/Support/ProgramTest.cpp
index 0feed47b59d9..47a3dbb5fb1f 100644
--- a/unittests/Support/ProgramTest.cpp
+++ b/unittests/Support/ProgramTest.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
@@ -57,21 +58,64 @@ ProgramTestStringArg1("program-test-string-arg1");
 static cl::opt<std::string>
 ProgramTestStringArg2("program-test-string-arg2");
 
-static void CopyEnvironment(std::vector<const char *> &out) {
-#ifdef __APPLE__
-  char **envp = *_NSGetEnviron();
+class ProgramEnvTest : public testing::Test {
+  std::vector<const char *> EnvTable;
+  std::vector<std::string> EnvStorage;
+
+protected:
+  void SetUp() override {
+    auto EnvP = [] {
+#if defined(LLVM_ON_WIN32)
+      _wgetenv(L"TMP"); // Populate _wenviron, initially is null
+      return _wenviron;
+#elif defined(__APPLE__)
+      return *_NSGetEnviron();
 #else
-  // environ seems to work for Windows and most other Unices.
-  char **envp = environ;
+      return environ;
 #endif
-  while (*envp != nullptr) {
-    out.push_back(*envp);
-    ++envp;
+    }();
+    ASSERT_TRUE(EnvP);
+
+    auto prepareEnvVar = [this](decltype(*EnvP) Var) {
+#if defined(LLVM_ON_WIN32)
+      // On Windows convert UTF16 encoded variable to UTF8
+      auto Len = wcslen(Var);
+      ArrayRef<char> Ref{reinterpret_cast<char const *>(Var),
+                         Len * sizeof(*Var)};
+      EnvStorage.emplace_back();
+      auto convStatus = convertUTF16ToUTF8String(Ref, EnvStorage.back());
+      EXPECT_TRUE(convStatus);
+      return EnvStorage.back().c_str();
+#else
+      return Var;
+#endif
+    };
+
+    while (*EnvP != nullptr) {
+      EnvTable.emplace_back(prepareEnvVar(*EnvP));
+      ++EnvP;
+    }
   }
-}
+
+  void TearDown() override {
+    EnvTable.clear();
+    EnvStorage.clear();
+  }
+
+  void addEnvVar(const char *Var) {
+    ASSERT_TRUE(EnvTable.empty() || EnvTable.back()) << "Env table sealed";
+    EnvTable.emplace_back(Var);
+  }
+
+  const char **getEnviron() {
+    if (EnvTable.back() != nullptr)
+      EnvTable.emplace_back(nullptr); // Seal table.
+    return &EnvTable[0];
+  }
+};
 
 #ifdef LLVM_ON_WIN32
-TEST(ProgramTest, CreateProcessLongPath) {
+TEST_F(ProgramEnvTest, CreateProcessLongPath) {
   if (getenv("LLVM_PROGRAM_TEST_LONG_PATH"))
     exit(0);
 
@@ -85,15 +129,12 @@ TEST(ProgramTest, CreateProcessLongPath) {
 
   const char *ArgV[] = {
     MyExe.c_str(),
-    "--gtest_filter=ProgramTest.CreateProcessLongPath",
+    "--gtest_filter=ProgramEnvTest.CreateProcessLongPath",
     nullptr
   };
 
   // Add LLVM_PROGRAM_TEST_LONG_PATH to the environment of the child.
-  std::vector<const char *> EnvP;
-  CopyEnvironment(EnvP);
-  EnvP.push_back("LLVM_PROGRAM_TEST_LONG_PATH=1");
-  EnvP.push_back(nullptr);
+  addEnvVar("LLVM_PROGRAM_TEST_LONG_PATH=1");
 
   // Redirect stdout to a long path.
   SmallString<128> TestDirectory;
@@ -108,7 +149,7 @@ TEST(ProgramTest, CreateProcessLongPath) {
   std::string Error;
   bool ExecutionFailed;
   const StringRef *Redirects[] = { nullptr, &LongPathRef, nullptr };
-  int RC = ExecuteAndWait(MyExe, ArgV, &EnvP[0], Redirects,
+  int RC = ExecuteAndWait(MyExe, ArgV, getEnviron(), Redirects,
     /*secondsToWait=*/ 10, /*memoryLimit=*/ 0, &Error,
     &ExecutionFailed);
   EXPECT_FALSE(ExecutionFailed) << Error;
@@ -120,7 +161,7 @@ TEST(ProgramTest, CreateProcessLongPath) {
 }
 #endif
 
-TEST(ProgramTest, CreateProcessTrailingSlash) {
+TEST_F(ProgramEnvTest, CreateProcessTrailingSlash) {
   if (getenv("LLVM_PROGRAM_TEST_CHILD")) {
     if (ProgramTestStringArg1 == "has\\\\ trailing\\" &&
         ProgramTestStringArg2 == "has\\\\ trailing\\") {
@@ -133,17 +174,14 @@ TEST(ProgramTest, CreateProcessTrailingSlash) {
       sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1);
   const char *argv[] = {
     my_exe.c_str(),
-    "--gtest_filter=ProgramTest.CreateProcessTrailingSlash",
+    "--gtest_filter=ProgramEnvTest.CreateProcessTrailingSlash",
     "-program-test-string-arg1", "has\\\\ trailing\\",
     "-program-test-string-arg2", "has\\\\ trailing\\",
     nullptr
   };
 
   // Add LLVM_PROGRAM_TEST_CHILD to the environment of the child.
-  std::vector<const char *> envp;
-  CopyEnvironment(envp);
-  envp.push_back("LLVM_PROGRAM_TEST_CHILD=1");
-  envp.push_back(nullptr);
+  addEnvVar("LLVM_PROGRAM_TEST_CHILD=1");
 
   std::string error;
   bool ExecutionFailed;
@@ -154,14 +192,14 @@ TEST(ProgramTest, CreateProcessTrailingSlash) {
   StringRef nul("/dev/null");
 #endif
   const StringRef *redirects[] = { &nul, &nul, nullptr };
-  int rc = ExecuteAndWait(my_exe, argv, &envp[0], redirects,
+  int rc = ExecuteAndWait(my_exe, argv, getEnviron(), redirects,
                           /*secondsToWait=*/ 10, /*memoryLimit=*/ 0, &error,
                           &ExecutionFailed);
   EXPECT_FALSE(ExecutionFailed) << error;
   EXPECT_EQ(0, rc);
 }
 
-TEST(ProgramTest, TestExecuteNoWait) {
+TEST_F(ProgramEnvTest, TestExecuteNoWait) {
   using namespace llvm::sys;
 
   if (getenv("LLVM_PROGRAM_TEST_EXECUTE_NO_WAIT")) {
@@ -173,19 +211,16 @@ TEST(ProgramTest, TestExecuteNoWait) {
       sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1);
   const char *argv[] = {
     Executable.c_str(),
-    "--gtest_filter=ProgramTest.TestExecuteNoWait",
+    "--gtest_filter=ProgramEnvTest.TestExecuteNoWait",
     nullptr
   };
 
   // Add LLVM_PROGRAM_TEST_EXECUTE_NO_WAIT to the environment of the child.
-  std::vector<const char *> envp;
-  CopyEnvironment(envp);
-  envp.push_back("LLVM_PROGRAM_TEST_EXECUTE_NO_WAIT=1");
-  envp.push_back(nullptr);
+  addEnvVar("LLVM_PROGRAM_TEST_EXECUTE_NO_WAIT=1");
 
   std::string Error;
   bool ExecutionFailed;
-  ProcessInfo PI1 = ExecuteNoWait(Executable, argv, &envp[0], nullptr, 0,
+  ProcessInfo PI1 = ExecuteNoWait(Executable, argv, getEnviron(), nullptr, 0,
                                   &Error, &ExecutionFailed);
   ASSERT_FALSE(ExecutionFailed) << Error;
   ASSERT_NE(PI1.Pid, 0) << "Invalid process id";
@@ -204,7 +239,7 @@ TEST(ProgramTest, TestExecuteNoWait) {
 
   EXPECT_EQ(LoopCount, 1u) << "LoopCount should be 1";
 
-  ProcessInfo PI2 = ExecuteNoWait(Executable, argv, &envp[0], nullptr, 0,
+  ProcessInfo PI2 = ExecuteNoWait(Executable, argv, getEnviron(), nullptr, 0,
                                   &Error, &ExecutionFailed);
   ASSERT_FALSE(ExecutionFailed) << Error;
   ASSERT_NE(PI2.Pid, 0) << "Invalid process id";
@@ -222,7 +257,7 @@ TEST(ProgramTest, TestExecuteNoWait) {
   ASSERT_GT(LoopCount, 1u) << "LoopCount should be >1";
 }
 
-TEST(ProgramTest, TestExecuteAndWaitTimeout) {
+TEST_F(ProgramEnvTest, TestExecuteAndWaitTimeout) {
   using namespace llvm::sys;
 
   if (getenv("LLVM_PROGRAM_TEST_TIMEOUT")) {
@@ -234,20 +269,17 @@ TEST(ProgramTest, TestExecuteAndWaitTimeout) {
       sys::fs::getMainExecutable(TestMainArgv0, &ProgramTestStringArg1);
   const char *argv[] = {
     Executable.c_str(),
-    "--gtest_filter=ProgramTest.TestExecuteAndWaitTimeout",
+    "--gtest_filter=ProgramEnvTest.TestExecuteAndWaitTimeout",
     nullptr
   };
 
   // Add LLVM_PROGRAM_TEST_TIMEOUT to the environment of the child.
-  std::vector<const char *> envp;
-  CopyEnvironment(envp);
-  envp.push_back("LLVM_PROGRAM_TEST_TIMEOUT=1");
-  envp.push_back(nullptr);
+ addEnvVar("LLVM_PROGRAM_TEST_TIMEOUT=1");
 
   std::string Error;
   bool ExecutionFailed;
   int RetCode =
-      ExecuteAndWait(Executable, argv, &envp[0], nullptr, /*secondsToWait=*/1, 0,
+      ExecuteAndWait(Executable, argv, getEnviron(), nullptr, /*secondsToWait=*/1, 0,
                      &Error, &ExecutionFailed);
   ASSERT_EQ(-2, RetCode);
 }
diff --git a/unittests/Support/ReplaceFileTest.cpp b/unittests/Support/ReplaceFileTest.cpp
new file mode 100644
index 000000000000..8b16daf3233c
--- /dev/null
+++ b/unittests/Support/ReplaceFileTest.cpp
@@ -0,0 +1,113 @@
+//===- llvm/unittest/Support/ReplaceFileTest.cpp - unit tests -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+
+#define ASSERT_NO_ERROR(x)                                                 \
+  do {                                                                     \
+    if (std::error_code ASSERT_NO_ERROR_ec = x) {                          \
+      errs() << #x ": did not return errc::success.\n"                     \
+             << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n"     \
+             << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \
+    }                                                                      \
+  } while (false)
+
+namespace {
+std::error_code CreateFileWithContent(const SmallString<128> &FilePath,
+                                      const StringRef &content) {
+  int FD = 0;
+  if (std::error_code ec = fs::openFileForWrite(FilePath, FD, fs::F_None))
+    return ec;
+
+  const bool ShouldClose = true;
+  raw_fd_ostream OS(FD, ShouldClose);
+  OS << content;
+
+  return std::error_code();
+}
+
+class ScopedFD {
+  int FD;
+
+  ScopedFD(const ScopedFD &) = delete;
+  ScopedFD &operator=(const ScopedFD &) = delete;
+
+ public:
+  explicit ScopedFD(int Descriptor) : FD(Descriptor) {}
+  ~ScopedFD() { Process::SafelyCloseFileDescriptor(FD); }
+};
+
+TEST(rename, FileOpenedForReadingCanBeReplaced) {
+  // Create unique temporary directory for this test.
+  SmallString<128> TestDirectory;
+  ASSERT_NO_ERROR(fs::createUniqueDirectory(
+      "FileOpenedForReadingCanBeReplaced-test", TestDirectory));
+
+  // Add a couple of files to the test directory.
+  SmallString<128> SourceFileName(TestDirectory);
+  path::append(SourceFileName, "source");
+
+  SmallString<128> TargetFileName(TestDirectory);
+  path::append(TargetFileName, "target");
+
+  ASSERT_NO_ERROR(CreateFileWithContent(SourceFileName, "!!source!!"));
+  ASSERT_NO_ERROR(CreateFileWithContent(TargetFileName, "!!target!!"));
+
+  {
+    // Open the target file for reading.
+    int ReadFD = 0;
+    ASSERT_NO_ERROR(fs::openFileForRead(TargetFileName, ReadFD));
+    ScopedFD EventuallyCloseIt(ReadFD);
+
+    // Confirm we can replace the file while it is open.
+    EXPECT_TRUE(!fs::rename(SourceFileName, TargetFileName));
+
+    // We should still be able to read the old data through the existing
+    // descriptor.
+    auto Buffer = MemoryBuffer::getOpenFile(ReadFD, TargetFileName, -1);
+    ASSERT_TRUE(static_cast<bool>(Buffer));
+    EXPECT_EQ(Buffer.get()->getBuffer(), "!!target!!");
+
+    // The source file should no longer exist
+    EXPECT_FALSE(fs::exists(SourceFileName));
+  }
+
+  {
+    // If we obtain a new descriptor for the target file, we should find that it
+    // contains the content that was in the source file.
+    int ReadFD = 0;
+    ASSERT_NO_ERROR(fs::openFileForRead(TargetFileName, ReadFD));
+    ScopedFD EventuallyCloseIt(ReadFD);
+    auto Buffer = MemoryBuffer::getOpenFile(ReadFD, TargetFileName, -1);
+    ASSERT_TRUE(static_cast<bool>(Buffer));
+
+    EXPECT_EQ(Buffer.get()->getBuffer(), "!!source!!");
+  }
+
+  // Rename the target file back to the source file name to confirm that rename
+  // still works if the destination does not already exist.
+  EXPECT_TRUE(!fs::rename(TargetFileName, SourceFileName));
+  EXPECT_FALSE(fs::exists(TargetFileName));
+  ASSERT_TRUE(fs::exists(SourceFileName));
+
+  // Clean up.
+  ASSERT_NO_ERROR(fs::remove(SourceFileName));
+  ASSERT_NO_ERROR(fs::remove(TestDirectory.str()));
+}
+
+}  // anonymous namespace
diff --git a/unittests/Support/ThreadPool.cpp b/unittests/Support/ThreadPool.cpp
new file mode 100644
index 000000000000..0f36c383d494
--- /dev/null
+++ b/unittests/Support/ThreadPool.cpp
@@ -0,0 +1,168 @@
+//========- unittests/Support/ThreadPools.cpp - ThreadPools.h tests --========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ThreadPool.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+// Fixture for the unittests, allowing to *temporarily* disable the unittests
+// on a particular platform
+class ThreadPoolTest : public testing::Test {
+  Triple Host;
+  SmallVector<Triple::ArchType, 4> UnsupportedArchs;
+  SmallVector<Triple::OSType, 4> UnsupportedOSs;
+  SmallVector<Triple::EnvironmentType, 1> UnsupportedEnvironments;
+protected:
+  // This is intended for platform as a temporary "XFAIL"
+  bool isUnsupportedOSOrEnvironment() {
+    Triple Host(Triple::normalize(sys::getProcessTriple()));
+
+    if (std::find(UnsupportedEnvironments.begin(), UnsupportedEnvironments.end(),
+                  Host.getEnvironment()) != UnsupportedEnvironments.end())
+      return true;
+
+    if (std::find(UnsupportedOSs.begin(), UnsupportedOSs.end(), Host.getOS())
+        != UnsupportedOSs.end())
+      return true;
+
+    if (std::find(UnsupportedArchs.begin(), UnsupportedArchs.end(), Host.getArch())
+        != UnsupportedArchs.end())
+      return true;
+
+    return false;
+  }
+
+  ThreadPoolTest() {
+    // Add unsupported configuration here, example:
+    //   UnsupportedArchs.push_back(Triple::x86_64);
+
+    // See https://llvm.org/bugs/show_bug.cgi?id=25829
+    UnsupportedArchs.push_back(Triple::ppc64le);
+    UnsupportedArchs.push_back(Triple::ppc64);
+  }
+
+  /// Make sure this thread not progress faster than the main thread.
+  void waitForMainThread() {
+    std::unique_lock<std::mutex> LockGuard(WaitMainThreadMutex);
+    WaitMainThread.wait(LockGuard, [&] { return MainThreadReady; });
+  }
+
+  /// Set the readiness of the main thread.
+  void setMainThreadReady() {
+    {
+      std::unique_lock<std::mutex> LockGuard(WaitMainThreadMutex);
+      MainThreadReady = true;
+    }
+    WaitMainThread.notify_all();
+  }
+
+  void SetUp() override { MainThreadReady = false; }
+
+  std::condition_variable WaitMainThread;
+  std::mutex WaitMainThreadMutex;
+  bool MainThreadReady;
+
+};
+
+#define CHECK_UNSUPPORTED() \
+  do { \
+    if (isUnsupportedOSOrEnvironment()) \
+      return; \
+  } while (0); \
+
+TEST_F(ThreadPoolTest, AsyncBarrier) {
+  CHECK_UNSUPPORTED();
+  // test that async & barrier work together properly.
+
+  std::atomic_int checked_in{0};
+
+  ThreadPool Pool;
+  for (size_t i = 0; i < 5; ++i) {
+    Pool.async([this, &checked_in, i] {
+      waitForMainThread();
+      ++checked_in;
+    });
+  }
+  ASSERT_EQ(0, checked_in);
+  setMainThreadReady();
+  Pool.wait();
+  ASSERT_EQ(5, checked_in);
+}
+
+static void TestFunc(std::atomic_int &checked_in, int i) { checked_in += i; }
+
+TEST_F(ThreadPoolTest, AsyncBarrierArgs) {
+  CHECK_UNSUPPORTED();
+  // Test that async works with a function requiring multiple parameters.
+  std::atomic_int checked_in{0};
+
+  ThreadPool Pool;
+  for (size_t i = 0; i < 5; ++i) {
+    Pool.async(TestFunc, std::ref(checked_in), i);
+  }
+  Pool.wait();
+  ASSERT_EQ(10, checked_in);
+}
+
+TEST_F(ThreadPoolTest, Async) {
+  CHECK_UNSUPPORTED();
+  ThreadPool Pool;
+  std::atomic_int i{0};
+  Pool.async([this, &i] {
+    waitForMainThread();
+    ++i;
+  });
+  Pool.async([&i] { ++i; });
+  ASSERT_NE(2, i.load());
+  setMainThreadReady();
+  Pool.wait();
+  ASSERT_EQ(2, i.load());
+}
+
+TEST_F(ThreadPoolTest, GetFuture) {
+  CHECK_UNSUPPORTED();
+  ThreadPool Pool;
+  std::atomic_int i{0};
+  Pool.async([this, &i] {
+    waitForMainThread();
+    ++i;
+  });
+  // Force the future using get()
+  Pool.async([&i] { ++i; }).get();
+  ASSERT_NE(2, i.load());
+  setMainThreadReady();
+  Pool.wait();
+  ASSERT_EQ(2, i.load());
+}
+
+TEST_F(ThreadPoolTest, PoolDestruction) {
+  CHECK_UNSUPPORTED();
+  // Test that we are waiting on destruction
+  std::atomic_int checked_in{0};
+  {
+    ThreadPool Pool;
+    for (size_t i = 0; i < 5; ++i) {
+      Pool.async([this, &checked_in, i] {
+        waitForMainThread();
+        ++checked_in;
+      });
+    }
+    ASSERT_EQ(0, checked_in);
+    setMainThreadReady();
+  }
+  ASSERT_EQ(5, checked_in);
+}
diff --git a/unittests/Support/TimerTest.cpp b/unittests/Support/TimerTest.cpp
new file mode 100644
index 000000000000..f556a3f72c62
--- /dev/null
+++ b/unittests/Support/TimerTest.cpp
@@ -0,0 +1,65 @@
+//===- unittests/TimerTest.cpp - Timer tests ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "gtest/gtest.h"
+
+#if LLVM_ON_WIN32
+#include <windows.h>
+#else
+#include <time.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+
+// FIXME: Put this somewhere in Support, it's also used in LockFileManager.
+void SleepMS() {
+#if LLVM_ON_WIN32
+  Sleep(1);
+#else
+  struct timespec Interval;
+  Interval.tv_sec = 0;
+  Interval.tv_nsec = 1000000;
+  nanosleep(&Interval, nullptr);
+#endif
+}
+
+TEST(Timer, Additivity) {
+  Timer T1("T1");
+
+  EXPECT_TRUE(T1.isInitialized());
+
+  T1.startTimer();
+  T1.stopTimer();
+  auto TR1 = T1.getTotalTime();
+
+  T1.startTimer();
+  SleepMS();
+  T1.stopTimer();
+  auto TR2 = T1.getTotalTime();
+
+  EXPECT_TRUE(TR1 < TR2);
+}
+
+TEST(Timer, CheckIfTriggered) {
+  Timer T1("T1");
+
+  EXPECT_FALSE(T1.hasTriggered());
+  T1.startTimer();
+  EXPECT_TRUE(T1.hasTriggered());
+  T1.stopTimer();
+  EXPECT_TRUE(T1.hasTriggered());
+
+  T1.clear();
+  EXPECT_FALSE(T1.hasTriggered());
+}
+
+} // end anon namespace
diff --git a/unittests/Support/TrailingObjectsTest.cpp b/unittests/Support/TrailingObjectsTest.cpp
new file mode 100644
index 000000000000..866ff1e6e88d
--- /dev/null
+++ b/unittests/Support/TrailingObjectsTest.cpp
@@ -0,0 +1,195 @@
+//=== - llvm/unittest/Support/TrailingObjectsTest.cpp ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TrailingObjects.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+// This class, beyond being used by the test case, a nice
+// demonstration of the intended usage of TrailingObjects, with a
+// single trailing array.
+class Class1 final : protected TrailingObjects<Class1, short> {
+  friend TrailingObjects;
+
+  unsigned NumShorts;
+
+protected:
+  size_t numTrailingObjects(OverloadToken<short>) const { return NumShorts; }
+
+  Class1(int *ShortArray, unsigned NumShorts) : NumShorts(NumShorts) {
+    std::uninitialized_copy(ShortArray, ShortArray + NumShorts,
+                            getTrailingObjects<short>());
+  }
+
+public:
+  static Class1 *create(int *ShortArray, unsigned NumShorts) {
+    void *Mem = ::operator new(totalSizeToAlloc<short>(NumShorts));
+    return new (Mem) Class1(ShortArray, NumShorts);
+  }
+
+  short get(unsigned Num) const { return getTrailingObjects<short>()[Num]; }
+
+  unsigned numShorts() const { return NumShorts; }
+
+  // Pull some protected members in as public, for testability.
+  using TrailingObjects::totalSizeToAlloc;
+  using TrailingObjects::additionalSizeToAlloc;
+  using TrailingObjects::getTrailingObjects;
+};
+
+// Here, there are two singular optional object types appended.  Note
+// that the alignment of Class2 is automatically increased to account
+// for the alignment requirements of the trailing objects.
+class Class2 final : protected TrailingObjects<Class2, double, short> {
+  friend TrailingObjects;
+
+  bool HasShort, HasDouble;
+
+protected:
+  size_t numTrailingObjects(OverloadToken<short>) const {
+    return HasShort ? 1 : 0;
+  }
+  size_t numTrailingObjects(OverloadToken<double>) const {
+    return HasDouble ? 1 : 0;
+  }
+
+  Class2(bool HasShort, bool HasDouble)
+      : HasShort(HasShort), HasDouble(HasDouble) {}
+
+public:
+  static Class2 *create(short S = 0, double D = 0.0) {
+    bool HasShort = S != 0;
+    bool HasDouble = D != 0.0;
+
+    void *Mem =
+        ::operator new(totalSizeToAlloc<double, short>(HasDouble, HasShort));
+    Class2 *C = new (Mem) Class2(HasShort, HasDouble);
+    if (HasShort)
+      *C->getTrailingObjects<short>() = S;
+    if (HasDouble)
+      *C->getTrailingObjects<double>() = D;
+    return C;
+  }
+
+  short getShort() const {
+    if (!HasShort)
+      return 0;
+    return *getTrailingObjects<short>();
+  }
+
+  double getDouble() const {
+    if (!HasDouble)
+      return 0.0;
+    return *getTrailingObjects<double>();
+  }
+
+  // Pull some protected members in as public, for testability.
+  using TrailingObjects::totalSizeToAlloc;
+  using TrailingObjects::additionalSizeToAlloc;
+  using TrailingObjects::getTrailingObjects;
+};
+
+TEST(TrailingObjects, OneArg) {
+  int arr[] = {1, 2, 3};
+  Class1 *C = Class1::create(arr, 3);
+  EXPECT_EQ(sizeof(Class1), sizeof(unsigned));
+  EXPECT_EQ(Class1::additionalSizeToAlloc<short>(1), sizeof(short));
+  EXPECT_EQ(Class1::additionalSizeToAlloc<short>(3), sizeof(short) * 3);
+
+  EXPECT_EQ(Class1::totalSizeToAlloc<short>(1), sizeof(Class1) + sizeof(short));
+  EXPECT_EQ(Class1::totalSizeToAlloc<short>(3),
+            sizeof(Class1) + sizeof(short) * 3);
+
+  EXPECT_EQ(C->getTrailingObjects<short>(), reinterpret_cast<short *>(C + 1));
+  EXPECT_EQ(C->get(0), 1);
+  EXPECT_EQ(C->get(2), 3);
+  delete C;
+}
+
+TEST(TrailingObjects, TwoArg) {
+  Class2 *C1 = Class2::create(4);
+  Class2 *C2 = Class2::create(0, 4.2);
+
+  EXPECT_EQ(sizeof(Class2), llvm::RoundUpToAlignment(sizeof(bool) * 2,
+                                                     llvm::alignOf<double>()));
+  EXPECT_EQ(llvm::alignOf<Class2>(), llvm::alignOf<double>());
+
+  EXPECT_EQ((Class2::additionalSizeToAlloc<double, short>(1, 0)),
+            sizeof(double));
+  EXPECT_EQ((Class2::additionalSizeToAlloc<double, short>(0, 1)),
+            sizeof(short));
+  EXPECT_EQ((Class2::additionalSizeToAlloc<double, short>(3, 1)),
+            sizeof(double) * 3 + sizeof(short));
+
+  EXPECT_EQ((Class2::totalSizeToAlloc<double, short>(1, 1)),
+            sizeof(Class2) + sizeof(double) + sizeof(short));
+
+  EXPECT_EQ(C1->getDouble(), 0);
+  EXPECT_EQ(C1->getShort(), 4);
+  EXPECT_EQ(C1->getTrailingObjects<double>(),
+            reinterpret_cast<double *>(C1 + 1));
+  EXPECT_EQ(C1->getTrailingObjects<short>(), reinterpret_cast<short *>(C1 + 1));
+
+  EXPECT_EQ(C2->getDouble(), 4.2);
+  EXPECT_EQ(C2->getShort(), 0);
+  EXPECT_EQ(C2->getTrailingObjects<double>(),
+            reinterpret_cast<double *>(C2 + 1));
+  EXPECT_EQ(C2->getTrailingObjects<short>(),
+            reinterpret_cast<short *>(reinterpret_cast<double *>(C2 + 1) + 1));
+  delete C1;
+  delete C2;
+}
+
+// This test class is not trying to be a usage demo, just asserting
+// that three args does actually work too (it's the same code as
+// handles the second arg, so it's basically covered by the above, but
+// just in case..)
+class Class3 final : public TrailingObjects<Class3, double, short, bool> {
+  friend TrailingObjects;
+
+  size_t numTrailingObjects(OverloadToken<double>) const { return 1; }
+  size_t numTrailingObjects(OverloadToken<short>) const { return 1; }
+};
+
+TEST(TrailingObjects, ThreeArg) {
+  EXPECT_EQ((Class3::additionalSizeToAlloc<double, short, bool>(1, 1, 3)),
+            sizeof(double) + sizeof(short) + 3 * sizeof(bool));
+  EXPECT_EQ(sizeof(Class3),
+            llvm::RoundUpToAlignment(1, llvm::alignOf<double>()));
+  std::unique_ptr<char[]> P(new char[1000]);
+  Class3 *C = reinterpret_cast<Class3 *>(P.get());
+  EXPECT_EQ(C->getTrailingObjects<double>(), reinterpret_cast<double *>(C + 1));
+  EXPECT_EQ(C->getTrailingObjects<short>(),
+            reinterpret_cast<short *>(reinterpret_cast<double *>(C + 1) + 1));
+  EXPECT_EQ(
+      C->getTrailingObjects<bool>(),
+      reinterpret_cast<bool *>(
+          reinterpret_cast<short *>(reinterpret_cast<double *>(C + 1) + 1) +
+          1));
+}
+
+class Class4 final : public TrailingObjects<Class4, char, long> {
+  friend TrailingObjects;
+  size_t numTrailingObjects(OverloadToken<char>) const { return 1; }
+};
+
+TEST(TrailingObjects, Realignment) {
+  EXPECT_EQ((Class4::additionalSizeToAlloc<char, long>(1, 1)),
+            llvm::RoundUpToAlignment(sizeof(long) + 1, llvm::alignOf<long>()));
+  EXPECT_EQ(sizeof(Class4), llvm::RoundUpToAlignment(1, llvm::alignOf<long>()));
+  std::unique_ptr<char[]> P(new char[1000]);
+  Class4 *C = reinterpret_cast<Class4 *>(P.get());
+  EXPECT_EQ(C->getTrailingObjects<char>(), reinterpret_cast<char *>(C + 1));
+  EXPECT_EQ(C->getTrailingObjects<long>(),
+            reinterpret_cast<long *>(llvm::alignAddr(
+                reinterpret_cast<char *>(C + 1) + 1, llvm::alignOf<long>())));
+}
+}
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index e2671499e812..25e322ee5a8e 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -165,7 +165,7 @@ TEST_F(CloneInstruction, Attributes) {
 
   Attribute::AttrKind AK[] = { Attribute::NoCapture };
   AttributeSet AS = AttributeSet::get(context, 0, AK);
-  Argument *A = F1->arg_begin();
+  Argument *A = &*F1->arg_begin();
   A->addAttr(AS);
 
   SmallVector<ReturnInst*, 4> Returns;
@@ -193,7 +193,7 @@ TEST_F(CloneInstruction, CallingConvention) {
 
   SmallVector<ReturnInst*, 4> Returns;
   ValueToValueMapTy VMap;
-  VMap[F1->arg_begin()] = F2->arg_begin();
+  VMap[&*F1->arg_begin()] = &*F2->arg_begin();
 
   CloneFunctionInto(F2, F1, VMap, false, Returns);
   EXPECT_EQ(CallingConv::Cold, F2->getCallingConv());
@@ -231,13 +231,14 @@ protected:
     auto *File = DBuilder.createFile("filename.c", "/file/dir/");
     DITypeRefArray ParamTypes = DBuilder.getOrCreateTypeArray(None);
     DISubroutineType *FuncType =
-        DBuilder.createSubroutineType(File, ParamTypes);
+        DBuilder.createSubroutineType(ParamTypes);
     auto *CU =
         DBuilder.createCompileUnit(dwarf::DW_LANG_C99, "filename.c",
                                    "/file/dir", "CloneFunc", false, "", 0);
 
     auto *Subprogram = DBuilder.createFunction(
-        CU, "f", "f", File, 4, FuncType, true, true, 3, 0, false, OldFunc);
+        CU, "f", "f", File, 4, FuncType, true, true, 3, 0, false);
+    OldFunc->setSubprogram(Subprogram);
 
     // Function body
     BasicBlock* Entry = BasicBlock::Create(C, "", OldFunc);
@@ -255,8 +256,8 @@ protected:
     auto *IntType =
         DBuilder.createBasicType("int", 32, 0, dwarf::DW_ATE_signed);
     auto *E = DBuilder.createExpression();
-    auto *Variable = DBuilder.createLocalVariable(
-        dwarf::DW_TAG_auto_variable, Subprogram, "x", File, 5, IntType, true);
+    auto *Variable =
+        DBuilder.createAutoVariable(Subprogram, "x", File, 5, IntType, true);
     auto *DL = DILocation::get(Subprogram->getContext(), 5, 0, Subprogram);
     DBuilder.insertDeclare(Alloca, Variable, E, DL, Store);
     DBuilder.insertDbgValueIntrinsic(AllocaContent, 0, Variable, E, DL,
@@ -309,8 +310,8 @@ TEST_F(CloneFunc, Subprogram) {
   auto *Sub2 = cast<DISubprogram>(*Iter);
 
   EXPECT_TRUE(
-      (Sub1->getFunction() == OldFunc && Sub2->getFunction() == NewFunc) ||
-      (Sub1->getFunction() == NewFunc && Sub2->getFunction() == OldFunc));
+      (Sub1 == OldFunc->getSubprogram() && Sub2 == NewFunc->getSubprogram()) ||
+      (Sub1 == NewFunc->getSubprogram() && Sub2 == OldFunc->getSubprogram()));
 }
 
 // Test that the new subprogram entry was not added to the CU which doesn't
@@ -354,8 +355,8 @@ TEST_F(CloneFunc, InstructionOwnership) {
       // But that they belong to different functions
       auto *OldSubprogram = cast<DISubprogram>(OldDL.getScope());
       auto *NewSubprogram = cast<DISubprogram>(NewDL.getScope());
-      EXPECT_EQ(OldFunc, OldSubprogram->getFunction());
-      EXPECT_EQ(NewFunc, NewSubprogram->getFunction());
+      EXPECT_EQ(OldFunc->getSubprogram(), OldSubprogram);
+      EXPECT_EQ(NewFunc->getSubprogram(), NewSubprogram);
     }
 
     ++OldIter;
@@ -389,25 +390,21 @@ TEST_F(CloneFunc, DebugIntrinsics) {
                          getParent()->getParent());
 
       // Old variable must belong to the old function
-      EXPECT_EQ(OldFunc,
-                cast<DISubprogram>(OldIntrin->getVariable()->getScope())
-                    ->getFunction());
+      EXPECT_EQ(OldFunc->getSubprogram(),
+                cast<DISubprogram>(OldIntrin->getVariable()->getScope()));
       // New variable must belong to the New function
-      EXPECT_EQ(NewFunc,
-                cast<DISubprogram>(NewIntrin->getVariable()->getScope())
-                    ->getFunction());
+      EXPECT_EQ(NewFunc->getSubprogram(),
+                cast<DISubprogram>(NewIntrin->getVariable()->getScope()));
     } else if (DbgValueInst* OldIntrin = dyn_cast<DbgValueInst>(&OldI)) {
       DbgValueInst* NewIntrin = dyn_cast<DbgValueInst>(&NewI);
       EXPECT_TRUE(NewIntrin);
 
       // Old variable must belong to the old function
-      EXPECT_EQ(OldFunc,
-                cast<DISubprogram>(OldIntrin->getVariable()->getScope())
-                    ->getFunction());
+      EXPECT_EQ(OldFunc->getSubprogram(),
+                cast<DISubprogram>(OldIntrin->getVariable()->getScope()));
       // New variable must belong to the New function
-      EXPECT_EQ(NewFunc,
-                cast<DISubprogram>(NewIntrin->getVariable()->getScope())
-                    ->getFunction());
+      EXPECT_EQ(NewFunc->getSubprogram(),
+                cast<DISubprogram>(NewIntrin->getVariable()->getScope()));
     }
 
     ++OldIter;
@@ -439,7 +436,7 @@ protected:
     IBuilder.CreateRetVoid();
   }
 
-  void CreateNewModule() { NewM = llvm::CloneModule(OldM); }
+  void CreateNewModule() { NewM = llvm::CloneModule(OldM).release(); }
 
   LLVMContext C;
   Module *OldM;
diff --git a/unittests/Transforms/Utils/IntegerDivision.cpp b/unittests/Transforms/Utils/IntegerDivision.cpp
index f7318a2e7897..4cda2b4e5892 100644
--- a/unittests/Transforms/Utils/IntegerDivision.cpp
+++ b/unittests/Transforms/Utils/IntegerDivision.cpp
@@ -35,8 +35,8 @@ TEST(IntegerDivision, SDiv) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Div = Builder.CreateSDiv(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::SDiv);
@@ -65,8 +65,8 @@ TEST(IntegerDivision, UDiv) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Div = Builder.CreateUDiv(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::UDiv);
@@ -95,8 +95,8 @@ TEST(IntegerDivision, SRem) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Rem = Builder.CreateSRem(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::SRem);
@@ -125,8 +125,8 @@ TEST(IntegerDivision, URem) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Rem = Builder.CreateURem(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::URem);
@@ -156,8 +156,8 @@ TEST(IntegerDivision, SDiv64) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Div = Builder.CreateSDiv(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::SDiv);
@@ -186,8 +186,8 @@ TEST(IntegerDivision, UDiv64) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Div = Builder.CreateUDiv(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::UDiv);
@@ -216,8 +216,8 @@ TEST(IntegerDivision, SRem64) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Rem = Builder.CreateSRem(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::SRem);
@@ -246,8 +246,8 @@ TEST(IntegerDivision, URem64) {
   Builder.SetInsertPoint(BB);
 
   Function::arg_iterator AI = F->arg_begin();
-  Value *A = AI++;
-  Value *B = AI++;
+  Value *A = &*AI++;
+  Value *B = &*AI++;
 
   Value *Rem = Builder.CreateURem(A, B);
   EXPECT_TRUE(BB->front().getOpcode() == Instruction::URem);
diff --git a/unittests/Transforms/Utils/ValueMapperTest.cpp b/unittests/Transforms/Utils/ValueMapperTest.cpp
index 137a2607c848..9dbe4dbc56de 100644
--- a/unittests/Transforms/Utils/ValueMapperTest.cpp
+++ b/unittests/Transforms/Utils/ValueMapperTest.cpp
@@ -24,4 +24,35 @@ TEST(ValueMapperTest, MapMetadataUnresolved) {
   EXPECT_EQ(T.get(), MapMetadata(T.get(), VM, RF_NoModuleLevelChanges));
 }
 
+TEST(ValueMapperTest, MapMetadataDistinct) {
+  LLVMContext Context;
+  auto *D = MDTuple::getDistinct(Context, None);
+
+  {
+    // The node should be cloned.
+    ValueToValueMapTy VM;
+    EXPECT_NE(D, MapMetadata(D, VM, RF_None));
+  }
+  {
+    // The node should be moved.
+    ValueToValueMapTy VM;
+    EXPECT_EQ(D, MapMetadata(D, VM, RF_MoveDistinctMDs));
+  }
+}
+
+TEST(ValueMapperTest, MapMetadataDistinctOperands) {
+  LLVMContext Context;
+  Metadata *Old = MDTuple::getDistinct(Context, None);
+  auto *D = MDTuple::getDistinct(Context, Old);
+  ASSERT_EQ(Old, D->getOperand(0));
+
+  Metadata *New = MDTuple::getDistinct(Context, None);
+  ValueToValueMapTy VM;
+  VM.MD()[Old].reset(New);
+
+  // Make sure operands are updated.
+  EXPECT_EQ(D, MapMetadata(D, VM, RF_MoveDistinctMDs));
+  EXPECT_EQ(New, D->getOperand(0));
+}
+
 }
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp
index 9b9ffb049064..f73a0fee76c8 100644
--- a/utils/FileCheck/FileCheck.cpp
+++ b/utils/FileCheck/FileCheck.cpp
@@ -399,15 +399,15 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
     TmpStr = RegExStr;
 
     unsigned InsertOffset = 0;
-    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
+    for (const auto &VariableUse : VariableUses) {
       std::string Value;
 
-      if (VariableUses[i].first[0] == '@') {
-        if (!EvaluateExpression(VariableUses[i].first, Value))
+      if (VariableUse.first[0] == '@') {
+        if (!EvaluateExpression(VariableUse.first, Value))
           return StringRef::npos;
       } else {
         StringMap<StringRef>::iterator it =
-          VariableTable.find(VariableUses[i].first);
+            VariableTable.find(VariableUse.first);
         // If the variable is undefined, return an error.
         if (it == VariableTable.end())
           return StringRef::npos;
@@ -417,7 +417,7 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
       }
 
       // Plop it into the regex at the adjusted offset.
-      TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
+      TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
                     Value.begin(), Value.end());
       InsertOffset += Value.size();
     }
@@ -436,11 +436,9 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
   StringRef FullMatch = MatchInfo[0];
 
   // If this defines any variables, remember their values.
-  for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
-                                                     E = VariableDefs.end();
-       I != E; ++I) {
-    assert(I->second < MatchInfo.size() && "Internal paren error");
-    VariableTable[I->first] = MatchInfo[I->second];
+  for (const auto &VariableDef : VariableDefs) {
+    assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
+    VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
   }
 
   MatchLen = FullMatch.size();
@@ -470,10 +468,10 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
   // If this was a regular expression using variables, print the current
   // variable values.
   if (!VariableUses.empty()) {
-    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
+    for (const auto &VariableUse : VariableUses) {
       SmallString<256> Msg;
       raw_svector_ostream OS(Msg);
-      StringRef Var = VariableUses[i].first;
+      StringRef Var = VariableUse.first;
       if (Var[0] == '@') {
         std::string Value;
         if (EvaluateExpression(Var, Value)) {
@@ -761,9 +759,7 @@ static StringRef FindFirstCandidateMatch(StringRef &Buffer,
   CheckTy = Check::CheckNone;
   CheckLoc = StringRef::npos;
 
-  for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
-       I != E; ++I) {
-    StringRef Prefix(*I);
+  for (StringRef Prefix : CheckPrefixes) {
     size_t PrefixLoc = Buffer.find(Prefix);
 
     if (PrefixLoc == StringRef::npos)
@@ -861,7 +857,7 @@ static bool ReadCheckFile(SourceMgr &SM,
   for (const auto &PatternString : ImplicitCheckNot) {
     // Create a buffer with fake command line content in order to display the
     // command line option responsible for the specific implicit CHECK-NOT.
-    std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
+    std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
     std::string Suffix = "'";
     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
         Prefix + PatternString + Suffix, "command line");
@@ -979,7 +975,7 @@ static bool ReadCheckFile(SourceMgr &SM,
   return false;
 }
 
-static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
+static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
                              const Pattern &Pat, StringRef Buffer,
                              StringMap<StringRef> &VariableTable) {
   // Otherwise, we have an error, emit an error message.
@@ -1146,9 +1142,7 @@ bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
                            const std::vector<const Pattern *> &NotStrings,
                            StringMap<StringRef> &VariableTable) const {
-  for (unsigned ChunkNo = 0, e = NotStrings.size();
-       ChunkNo != e; ++ChunkNo) {
-    const Pattern *Pat = NotStrings[ChunkNo];
+  for (const Pattern *Pat : NotStrings) {
     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
 
     size_t MatchLen = 0;
@@ -1176,10 +1170,7 @@ size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
   size_t LastPos = 0;
   size_t StartPos = LastPos;
 
-  for (unsigned ChunkNo = 0, e = DagNotStrings.size();
-       ChunkNo != e; ++ChunkNo) {
-    const Pattern &Pat = DagNotStrings[ChunkNo];
-
+  for (const Pattern &Pat : DagNotStrings) {
     assert((Pat.getCheckTy() == Check::CheckDAG ||
             Pat.getCheckTy() == Check::CheckNot) &&
            "Invalid CHECK-DAG or CHECK-NOT!");
@@ -1253,10 +1244,7 @@ static bool ValidateCheckPrefix(StringRef CheckPrefix) {
 static bool ValidateCheckPrefixes() {
   StringSet<> PrefixSet;
 
-  for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
-       I != E; ++I) {
-    StringRef Prefix(*I);
-
+  for (StringRef Prefix : CheckPrefixes) {
     // Reject empty prefixes.
     if (Prefix == "")
       return false;
diff --git a/utils/PerfectShuffle/PerfectShuffle.cpp b/utils/PerfectShuffle/PerfectShuffle.cpp
index f80d88563168..5c142a5f54e1 100644
--- a/utils/PerfectShuffle/PerfectShuffle.cpp
+++ b/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -85,8 +85,8 @@ static void PrintMask(unsigned i, std::ostream &OS) {
 
 /// ShuffleVal - This represents a shufflevector operation.
 struct ShuffleVal {
-  unsigned Cost;  // Number of instrs used to generate this value.
   Operator *Op;   // The Operation used to generate this value.
+  unsigned Cost;  // Number of instrs used to generate this value.
   unsigned short Arg0, Arg1;  // Input operands for this value.
 
   ShuffleVal() : Cost(1000000) {}
@@ -102,14 +102,14 @@ static std::vector<Operator*> TheOperators;
 
 /// Operator - This is a vector operation that is available for use.
 struct Operator {
+  const char *Name;
   unsigned short ShuffleMask;
   unsigned short OpNum;
-  const char *Name;
   unsigned Cost;
 
   Operator(unsigned short shufflemask, const char *name, unsigned opnum,
            unsigned cost = 1)
-    : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) {
+    :  Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) {
     TheOperators.push_back(this);
   }
   ~Operator() {
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index 0d7c5ffbea1c..4177388840be 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -294,6 +294,15 @@ public:
   }
 };
 
+class AsmVariantInfo {
+public:
+  std::string RegisterPrefix;
+  std::string TokenizingCharacters;
+  std::string SeparatorCharacters;
+  std::string BreakCharacters;
+  int AsmVariantNo;
+};
+
 /// MatchableInfo - Helper class for storing the necessary information for an
 /// instruction or alias which is capable of being matched.
 struct MatchableInfo {
@@ -460,6 +469,20 @@ struct MatchableInfo {
         TheDef->getValueAsBit("UseInstAsmMatchConverter")) {
   }
 
+  // Could remove this and the dtor if PointerUnion supported unique_ptr
+  // elements with a dynamic failure/assertion (like the one below) in the case
+  // where it was copied while being in an owning state.
+  MatchableInfo(const MatchableInfo &RHS)
+      : AsmVariantID(RHS.AsmVariantID), AsmString(RHS.AsmString),
+        TheDef(RHS.TheDef), DefRec(RHS.DefRec), ResOperands(RHS.ResOperands),
+        Mnemonic(RHS.Mnemonic), AsmOperands(RHS.AsmOperands),
+        RequiredFeatures(RHS.RequiredFeatures),
+        ConversionFnKind(RHS.ConversionFnKind),
+        HasDeprecation(RHS.HasDeprecation),
+        UseInstAsmMatchConverter(RHS.UseInstAsmMatchConverter) {
+    assert(!DefRec.is<const CodeGenInstAlias *>());
+  }
+
   ~MatchableInfo() {
     delete DefRec.dyn_cast<const CodeGenInstAlias*>();
   }
@@ -470,18 +493,12 @@ struct MatchableInfo {
 
   void initialize(const AsmMatcherInfo &Info,
                   SmallPtrSetImpl<Record*> &SingletonRegisters,
-                  int AsmVariantNo, std::string &RegisterPrefix);
+                  AsmVariantInfo const &Variant);
 
   /// validate - Return true if this matchable is a valid thing to match against
   /// and perform a bunch of validity checking.
   bool validate(StringRef CommentDelimiter, bool Hack) const;
 
-  /// extractSingletonRegisterForAsmOperand - Extract singleton register,
-  /// if present, from specified token.
-  void
-  extractSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info,
-                                        std::string &RegisterPrefix);
-
   /// findAsmOperand - Find the AsmOperand with the specified name and
   /// suboperand index.
   int findAsmOperand(StringRef N, int SubOpIdx) const {
@@ -576,8 +593,10 @@ struct MatchableInfo {
   void dump() const;
 
 private:
-  void tokenizeAsmString(const AsmMatcherInfo &Info);
-  void addAsmOperand(size_t Start, size_t End);
+  void tokenizeAsmString(AsmMatcherInfo const &Info,
+                         AsmVariantInfo const &Variant);
+  void addAsmOperand(size_t Start, size_t End,
+                     std::string const &SeparatorCharacters);
 };
 
 /// SubtargetFeatureInfo - Helper class for storing information on a subtarget
@@ -756,8 +775,7 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
 
   // Find the ResOperand that refers to the operand we're aliasing away
   // and update it to refer to the combined operand instead.
-  for (unsigned i = 0, e = ResOperands.size(); i != e; ++i) {
-    ResOperand &Op = ResOperands[i];
+  for (ResOperand &Op : ResOperands) {
     if (Op.Kind == ResOperand::RenderAsmOperand &&
         Op.AsmOperandNum == (unsigned)SrcAsmOperand) {
       Op.AsmOperandNum = DstAsmOperand;
@@ -768,8 +786,7 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
   AsmOperands.erase(AsmOperands.begin() + SrcAsmOperand);
   // Adjust the ResOperand references to any AsmOperands that followed
   // the one we just deleted.
-  for (unsigned i = 0, e = ResOperands.size(); i != e; ++i) {
-    ResOperand &Op = ResOperands[i];
+  for (ResOperand &Op : ResOperands) {
     switch(Op.Kind) {
     default:
       // Nothing to do for operands that don't reference AsmOperands.
@@ -786,26 +803,58 @@ void MatchableInfo::formTwoOperandAlias(StringRef Constraint) {
   }
 }
 
+/// extractSingletonRegisterForAsmOperand - Extract singleton register,
+/// if present, from specified token.
+static void
+extractSingletonRegisterForAsmOperand(MatchableInfo::AsmOperand &Op,
+                                      const AsmMatcherInfo &Info,
+                                      StringRef RegisterPrefix) {
+  StringRef Tok = Op.Token;
+
+  // If this token is not an isolated token, i.e., it isn't separated from
+  // other tokens (e.g. with whitespace), don't interpret it as a register name.
+  if (!Op.IsIsolatedToken)
+    return;
+
+  if (RegisterPrefix.empty()) {
+    std::string LoweredTok = Tok.lower();
+    if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
+      Op.SingletonReg = Reg->TheDef;
+    return;
+  }
+
+  if (!Tok.startswith(RegisterPrefix))
+    return;
+
+  StringRef RegName = Tok.substr(RegisterPrefix.size());
+  if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
+    Op.SingletonReg = Reg->TheDef;
+
+  // If there is no register prefix (i.e. "%" in "%eax"), then this may
+  // be some random non-register token, just ignore it.
+  return;
+}
+
 void MatchableInfo::initialize(const AsmMatcherInfo &Info,
                                SmallPtrSetImpl<Record*> &SingletonRegisters,
-                               int AsmVariantNo, std::string &RegisterPrefix) {
-  AsmVariantID = AsmVariantNo;
+                               AsmVariantInfo const &Variant) {
+  AsmVariantID = Variant.AsmVariantNo;
   AsmString =
-    CodeGenInstruction::FlattenAsmStringVariants(AsmString, AsmVariantNo);
+    CodeGenInstruction::FlattenAsmStringVariants(AsmString,
+                                                 Variant.AsmVariantNo);
 
-  tokenizeAsmString(Info);
+  tokenizeAsmString(Info, Variant);
 
   // Compute the require features.
-  std::vector<Record*> Predicates =TheDef->getValueAsListOfDefs("Predicates");
-  for (unsigned i = 0, e = Predicates.size(); i != e; ++i)
+  for (Record *Predicate : TheDef->getValueAsListOfDefs("Predicates"))
     if (const SubtargetFeatureInfo *Feature =
-            Info.getSubtargetFeature(Predicates[i]))
+            Info.getSubtargetFeature(Predicate))
       RequiredFeatures.push_back(Feature);
 
   // Collect singleton registers, if used.
-  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
-    extractSingletonRegisterForAsmOperand(i, Info, RegisterPrefix);
-    if (Record *Reg = AsmOperands[i].SingletonReg)
+  for (MatchableInfo::AsmOperand &Op : AsmOperands) {
+    extractSingletonRegisterForAsmOperand(Op, Info, Variant.RegisterPrefix);
+    if (Record *Reg = Op.SingletonReg)
       SingletonRegisters.insert(Reg);
   }
 
@@ -818,9 +867,9 @@ void MatchableInfo::initialize(const AsmMatcherInfo &Info,
 }
 
 /// Append an AsmOperand for the given substring of AsmString.
-void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
+void MatchableInfo::addAsmOperand(size_t Start, size_t End,
+                                  std::string const &Separators) {
   StringRef String = AsmString;
-  StringRef Separators = "[]*! \t,";
   // Look for separators before and after to figure out is this token is
   // isolated.  Accept '$$' as that's how we escape '$'.
   bool IsIsolatedToken =
@@ -831,92 +880,86 @@ void MatchableInfo::addAsmOperand(size_t Start, size_t End) {
 }
 
 /// tokenizeAsmString - Tokenize a simplified assembly string.
-void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info) {
+void MatchableInfo::tokenizeAsmString(const AsmMatcherInfo &Info,
+                                      AsmVariantInfo const &Variant) {
   StringRef String = AsmString;
-  unsigned Prev = 0;
-  bool InTok = true;
-  for (unsigned i = 0, e = String.size(); i != e; ++i) {
-    switch (String[i]) {
-    case '[':
-    case ']':
-    case '*':
-    case '!':
-    case ' ':
-    case '\t':
-    case ',':
-      if (InTok) {
-        addAsmOperand(Prev, i);
+  size_t Prev = 0;
+  bool InTok = false;
+  std::string Separators = Variant.TokenizingCharacters +
+                           Variant.SeparatorCharacters;
+  for (size_t i = 0, e = String.size(); i != e; ++i) {
+    if(Variant.BreakCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
+        Prev = i;
+      }
+      InTok = true;
+      continue;
+    }
+    if(Variant.TokenizingCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
-      if (!isspace(String[i]) && String[i] != ',')
-        addAsmOperand(i, i + 1);
+      addAsmOperand(i, i + 1, Separators);
       Prev = i + 1;
-      break;
-
+      continue;
+    }
+    if(Variant.SeparatorCharacters.find(String[i]) != std::string::npos) {
+      if(InTok) {
+        addAsmOperand(Prev, i, Separators);
+        InTok = false;
+      }
+      Prev = i + 1;
+      continue;
+    }
+    switch (String[i]) {
     case '\\':
       if (InTok) {
-        addAsmOperand(Prev, i);
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
       ++i;
       assert(i != String.size() && "Invalid quoted character");
-      addAsmOperand(i, i + 1);
+      addAsmOperand(i, i + 1, Separators);
       Prev = i + 1;
       break;
 
     case '$': {
-      if (InTok) {
-        addAsmOperand(Prev, i);
+      if (InTok && Prev != i) {
+        addAsmOperand(Prev, i, Separators);
         InTok = false;
       }
 
-      // If this isn't "${", treat like a normal token.
+      // If this isn't "${", start new identifier looking like "$xxx"
       if (i + 1 == String.size() || String[i + 1] != '{') {
         Prev = i;
         break;
       }
 
-      StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
-      assert(End != String.end() && "Missing brace in operand reference!");
-      size_t EndPos = End - String.begin();
-      addAsmOperand(i, EndPos+1);
+      size_t EndPos = String.find('}', i);
+      assert(EndPos != StringRef::npos &&
+             "Missing brace in operand reference!");
+      addAsmOperand(i, EndPos+1, Separators);
       Prev = EndPos + 1;
       i = EndPos;
       break;
     }
-
-    case '.':
-      if (!Info.AsmParser->getValueAsBit("MnemonicContainsDot")) {
-        if (InTok)
-          addAsmOperand(Prev, i);
-        Prev = i;
-      }
-      InTok = true;
-      break;
-
     default:
       InTok = true;
     }
   }
   if (InTok && Prev != String.size())
-    addAsmOperand(Prev, StringRef::npos);
+    addAsmOperand(Prev, StringRef::npos, Separators);
 
   // The first token of the instruction is the mnemonic, which must be a
   // simple string, not a $foo variable or a singleton register.
   if (AsmOperands.empty())
     PrintFatalError(TheDef->getLoc(),
                   "Instruction '" + TheDef->getName() + "' has no tokens");
-  Mnemonic = AsmOperands[0].Token;
-  if (Mnemonic.empty())
-    PrintFatalError(TheDef->getLoc(),
-                  "Missing instruction mnemonic");
-  // FIXME : Check and raise an error if it is a register.
-  if (Mnemonic[0] == '$')
-    PrintFatalError(TheDef->getLoc(),
-                    "Invalid instruction mnemonic '" + Mnemonic + "'!");
-
-  // Remove the first operand, it is tracked in the mnemonic field.
-  AsmOperands.erase(AsmOperands.begin());
+  assert(!AsmOperands[0].Token.empty());
+  if (AsmOperands[0].Token[0] != '$')
+    Mnemonic = AsmOperands[0].Token;
 }
 
 bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
@@ -946,8 +989,8 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
   // Also, check for instructions which reference the operand multiple times;
   // this implies a constraint we would not honor.
   std::set<std::string> OperandNames;
-  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
-    StringRef Tok = AsmOperands[i].Token;
+  for (const AsmOperand &Op : AsmOperands) {
+    StringRef Tok = Op.Token;
     if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
       PrintFatalError(TheDef->getLoc(),
                       "matchable with operand modifier '" + Tok +
@@ -974,38 +1017,6 @@ bool MatchableInfo::validate(StringRef CommentDelimiter, bool Hack) const {
   return true;
 }
 
-/// extractSingletonRegisterForAsmOperand - Extract singleton register,
-/// if present, from specified token.
-void MatchableInfo::
-extractSingletonRegisterForAsmOperand(unsigned OperandNo,
-                                      const AsmMatcherInfo &Info,
-                                      std::string &RegisterPrefix) {
-  StringRef Tok = AsmOperands[OperandNo].Token;
-
-  // If this token is not an isolated token, i.e., it isn't separated from
-  // other tokens (e.g. with whitespace), don't interpret it as a register name.
-  if (!AsmOperands[OperandNo].IsIsolatedToken)
-    return;
-
-  if (RegisterPrefix.empty()) {
-    std::string LoweredTok = Tok.lower();
-    if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(LoweredTok))
-      AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
-    return;
-  }
-
-  if (!Tok.startswith(RegisterPrefix))
-    return;
-
-  StringRef RegName = Tok.substr(RegisterPrefix.size());
-  if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
-    AsmOperands[OperandNo].SingletonReg = Reg->TheDef;
-
-  // If there is no register prefix (i.e. "%" in "%eax"), then this may
-  // be some random non-register token, just ignore it.
-  return;
-}
-
 static std::string getEnumNameForToken(StringRef Str) {
   std::string Res;
 
@@ -1364,8 +1375,15 @@ void AsmMatcherInfo::buildInfo() {
     Record *AsmVariant = Target.getAsmParserVariant(VC);
     std::string CommentDelimiter =
       AsmVariant->getValueAsString("CommentDelimiter");
-    std::string RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
-    int AsmVariantNo = AsmVariant->getValueAsInt("Variant");
+    AsmVariantInfo Variant;
+    Variant.RegisterPrefix = AsmVariant->getValueAsString("RegisterPrefix");
+    Variant.TokenizingCharacters =
+        AsmVariant->getValueAsString("TokenizingCharacters");
+    Variant.SeparatorCharacters =
+        AsmVariant->getValueAsString("SeparatorCharacters");
+    Variant.BreakCharacters =
+        AsmVariant->getValueAsString("BreakCharacters");
+    Variant.AsmVariantNo = AsmVariant->getValueAsInt("Variant");
 
     for (const CodeGenInstruction *CGI : Target.instructions()) {
 
@@ -1378,9 +1396,9 @@ void AsmMatcherInfo::buildInfo() {
       if (CGI->TheDef->getValueAsBit("isCodeGenOnly"))
         continue;
 
-      std::unique_ptr<MatchableInfo> II(new MatchableInfo(*CGI));
+      auto II = llvm::make_unique<MatchableInfo>(*CGI);
 
-      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+      II->initialize(*this, SingletonRegisters, Variant);
 
       // Ignore instructions which shouldn't be matched and diagnose invalid
       // instruction definitions with an error.
@@ -1396,7 +1414,8 @@ void AsmMatcherInfo::buildInfo() {
       Records.getAllDerivedDefinitions("InstAlias");
     for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
       auto Alias = llvm::make_unique<CodeGenInstAlias>(AllInstAliases[i],
-                                                       AsmVariantNo, Target);
+                                                       Variant.AsmVariantNo,
+                                                       Target);
 
       // If the tblgen -match-prefix option is specified (for tblgen hackers),
       // filter the set of instruction aliases we consider, based on the target
@@ -1405,9 +1424,9 @@ void AsmMatcherInfo::buildInfo() {
             .startswith( MatchPrefix))
         continue;
 
-      std::unique_ptr<MatchableInfo> II(new MatchableInfo(std::move(Alias)));
+      auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
 
-      II->initialize(*this, SingletonRegisters, AsmVariantNo, RegisterPrefix);
+      II->initialize(*this, SingletonRegisters, Variant);
 
       // Validate the alias definitions.
       II->validate(CommentDelimiter, false);
@@ -1434,7 +1453,7 @@ void AsmMatcherInfo::buildInfo() {
       StringRef Token = Op.Token;
 
       // Check for singleton registers.
-      if (Record *RegRecord = II->AsmOperands[i].SingletonReg) {
+      if (Record *RegRecord = Op.SingletonReg) {
         Op.Class = RegisterClasses[RegRecord];
         assert(Op.Class && Op.Class->Registers.size() == 1 &&
                "Unexpected class for singleton register");
@@ -1474,7 +1493,7 @@ void AsmMatcherInfo::buildInfo() {
         II->TheDef->getValueAsString("TwoOperandAliasConstraint");
       if (Constraint != "") {
         // Start by making a copy of the original matchable.
-        std::unique_ptr<MatchableInfo> AliasII(new MatchableInfo(*II));
+        auto AliasII = llvm::make_unique<MatchableInfo>(*II);
 
         // Adjust it to be a two-operand alias.
         AliasII->formTwoOperandAlias(Constraint);
@@ -1494,8 +1513,7 @@ void AsmMatcherInfo::buildInfo() {
   // information.
   std::vector<Record*> AllTokenAliases =
     Records.getAllDerivedDefinitions("TokenAlias");
-  for (unsigned i = 0, e = AllTokenAliases.size(); i != e; ++i) {
-    Record *Rec = AllTokenAliases[i];
+  for (Record *Rec : AllTokenAliases) {
     ClassInfo *FromClass = getTokenClass(Rec->getValueAsString("FromToken"));
     ClassInfo *ToClass = getTokenClass(Rec->getValueAsString("ToToken"));
     if (FromClass == ToClass)
@@ -1603,9 +1621,7 @@ void MatchableInfo::buildInstructionResultOperands() {
 
   // Loop over all operands of the result instruction, determining how to
   // populate them.
-  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
-    const CGIOperandList::OperandInfo &OpInfo = ResultInst->Operands[i];
-
+  for (const CGIOperandList::OperandInfo &OpInfo : ResultInst->Operands) {
     // If this is a tied operand, just copy from the previously handled operand.
     int TiedOp = -1;
     if (OpInfo.MINumOperands == 1)
@@ -1701,7 +1717,7 @@ void MatchableInfo::buildAliasResultOperands() {
 }
 
 static unsigned getConverterOperandID(const std::string &Name,
-                                      SetVector<std::string> &Table,
+                                      SmallSetVector<std::string, 16> &Table,
                                       bool &IsNew) {
   IsNew = Table.insert(Name);
 
@@ -1717,8 +1733,8 @@ static unsigned getConverterOperandID(const std::string &Name,
 static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
                              std::vector<std::unique_ptr<MatchableInfo>> &Infos,
                              raw_ostream &OS) {
-  SetVector<std::string> OperandConversionKinds;
-  SetVector<std::string> InstructionConversionKinds;
+  SmallSetVector<std::string, 16> OperandConversionKinds;
+  SmallSetVector<std::string, 16> InstructionConversionKinds;
   std::vector<std::vector<uint8_t> > ConversionTable;
   size_t MaxRowLength = 2; // minimum is custom converter plus terminator.
 
@@ -1850,7 +1866,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
         // Add the operand entry to the instruction kind conversion row.
         ConversionRow.push_back(ID);
-        ConversionRow.push_back(OpInfo.AsmOperandNum + 1);
+        ConversionRow.push_back(OpInfo.AsmOperandNum);
 
         if (!IsNewConverter)
           break;
@@ -1959,7 +1975,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
       continue;
 
     // Add the row to the table.
-    ConversionTable.push_back(ConversionRow);
+    ConversionTable.push_back(std::move(ConversionRow));
   }
 
   // Finish up the converter driver function.
@@ -1979,10 +1995,8 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
   // Output the instruction conversion kind enum.
   OS << "enum InstructionConversionKind {\n";
-  for (SetVector<std::string>::const_iterator
-         i = InstructionConversionKinds.begin(),
-         e = InstructionConversionKinds.end(); i != e; ++i)
-    OS << "  " << *i << ",\n";
+  for (const std::string &Signature : InstructionConversionKinds)
+    OS << "  " << Signature << ",\n";
   OS << "  CVT_NUM_SIGNATURES\n";
   OS << "};\n\n";
 
@@ -2105,12 +2119,7 @@ static void emitIsSubclass(CodeGenTarget &Target,
   OS << "  if (A == B)\n";
   OS << "    return true;\n\n";
 
-  std::string OStr;
-  raw_string_ostream SS(OStr);
-  unsigned Count = 0;
-  SS << "  switch (A) {\n";
-  SS << "  default:\n";
-  SS << "    return false;\n";
+  bool EmittedSwitch = false;
   for (const auto &A : Infos) {
     std::vector<StringRef> SuperClasses;
     for (const auto &B : Infos) {
@@ -2120,33 +2129,38 @@ static void emitIsSubclass(CodeGenTarget &Target,
 
     if (SuperClasses.empty())
       continue;
-    ++Count;
 
-    SS << "\n  case " << A.Name << ":\n";
+    // If this is the first SuperClass, emit the switch header.
+    if (!EmittedSwitch) {
+      OS << "  switch (A) {\n";
+      OS << "  default:\n";
+      OS << "    return false;\n";
+      EmittedSwitch = true;
+    }
+
+    OS << "\n  case " << A.Name << ":\n";
 
     if (SuperClasses.size() == 1) {
-      SS << "    return B == " << SuperClasses.back().str() << ";\n";
+      OS << "    return B == " << SuperClasses.back() << ";\n";
       continue;
     }
 
     if (!SuperClasses.empty()) {
-      SS << "    switch (B) {\n";
-      SS << "    default: return false;\n";
-      for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
-        SS << "    case " << SuperClasses[i].str() << ": return true;\n";
-      SS << "    }\n";
+      OS << "    switch (B) {\n";
+      OS << "    default: return false;\n";
+      for (StringRef SC : SuperClasses)
+        OS << "    case " << SC << ": return true;\n";
+      OS << "    }\n";
     } else {
       // No case statement to emit
-      SS << "    return false;\n";
+      OS << "    return false;\n";
     }
   }
-  SS << "  }\n";
+  OS << "  }\n";
 
-  // If there were case statements emitted into the string stream, write them
-  // to the output stream, otherwise write the default.
-  if (Count)
-    OS << SS.str();
-  else
+  // If there were case statements emitted into the string stream write the
+  // default.
+  if (!EmittedSwitch)
     OS << "  return false;\n";
 
   OS << "}\n\n";
@@ -2575,13 +2589,17 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   OS << "  uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
 
   OS << "  // Get the next operand index.\n";
-  OS << "  unsigned NextOpNum = Operands.size()-1;\n";
+  OS << "  unsigned NextOpNum = Operands.size();\n";
 
   // Emit code to search the table.
   OS << "  // Search the table.\n";
   OS << "  std::pair<const OperandMatchEntry*, const OperandMatchEntry*>";
-  OS << " MnemonicRange =\n";
-  OS << "    std::equal_range(OperandMatchTable, OperandMatchTable+"
+  OS << " MnemonicRange\n";
+  OS << "       (OperandMatchTable, OperandMatchTable+";
+  OS << Info.OperandMatchInfo.size() << ");\n";
+  OS << "  if(!Mnemonic.empty())\n";
+  OS << "    MnemonicRange = std::equal_range(OperandMatchTable,";
+  OS << " OperandMatchTable+"
      << Info.OperandMatchInfo.size() << ", Mnemonic,\n"
      << "                     LessOpcodeOperand());\n\n";
 
@@ -2682,7 +2700,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "&Operands);\n";
   OS << "  void convertToMapAndConstraints(unsigned Kind,\n                ";
   OS << "           const OperandVector &Operands) override;\n";
-  OS << "  bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID) override;\n";
+  OS << "  bool mnemonicIsValid(StringRef Mnemonic, unsigned VariantID);\n";
   OS << "  unsigned MatchInstructionImpl(const OperandVector &Operands,\n"
      << "                                MCInst &Inst,\n"
      << "                                uint64_t &ErrorInfo,"
@@ -2891,8 +2909,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "                     bool matchingInlineAsm, unsigned VariantID) {\n";
 
   OS << "  // Eliminate obvious mismatches.\n";
-  OS << "  if (Operands.size() > " << (MaxNumOperands+1) << ") {\n";
-  OS << "    ErrorInfo = " << (MaxNumOperands+1) << ";\n";
+  OS << "  if (Operands.size() > " << MaxNumOperands << ") {\n";
+  OS << "    ErrorInfo = " << MaxNumOperands << ";\n";
   OS << "    return Match_InvalidOperand;\n";
   OS << "  }\n\n";
 
@@ -2901,7 +2919,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "  uint64_t AvailableFeatures = getAvailableFeatures();\n\n";
 
   OS << "  // Get the instruction mnemonic, which is the first token.\n";
-  OS << "  StringRef Mnemonic = ((" << Target.getName()
+  OS << "  StringRef Mnemonic;\n";
+  OS << "  if (Operands[0]->isToken())\n";
+  OS << "    Mnemonic = ((" << Target.getName()
      << "Operand&)*Operands[0]).getToken();\n\n";
 
   if (HasMnemonicAliases) {
@@ -2932,8 +2952,11 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   }
   OS << "  }\n";
   OS << "  // Search the table.\n";
-  OS << "  std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
-  OS << "    std::equal_range(Start, End, Mnemonic, LessOpcode());\n\n";
+  OS << "  std::pair<const MatchEntry*, const MatchEntry*> "
+        "MnemonicRange(Start, End);\n";
+  OS << "  unsigned SIndex = Mnemonic.empty() ? 0 : 1;\n";
+  OS << "  if (!Mnemonic.empty())\n";
+  OS << "    MnemonicRange = std::equal_range(Start, End, Mnemonic.lower(), LessOpcode());\n\n";
 
   OS << "  // Return a more specific error code if no mnemonics match.\n";
   OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
@@ -2943,28 +2966,23 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "*ie = MnemonicRange.second;\n";
   OS << "       it != ie; ++it) {\n";
 
-  OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
-  OS << "    assert(Mnemonic == it->getMnemonic());\n";
-
   // Emit check that the subclasses match.
   OS << "    bool OperandsValid = true;\n";
-  OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
-  OS << "      if (i + 1 >= Operands.size()) {\n";
-  OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
-  OS << "        if (!OperandsValid) ErrorInfo = i + 1;\n";
+  OS << "    for (unsigned i = SIndex; i != " << MaxNumOperands << "; ++i) {\n";
+  OS << "      auto Formal = static_cast<MatchClassKind>(it->Classes[i]);\n";
+  OS << "      if (i >= Operands.size()) {\n";
+  OS << "        OperandsValid = (Formal == " <<"InvalidMatchClass);\n";
+  OS << "        if (!OperandsValid) ErrorInfo = i;\n";
   OS << "        break;\n";
   OS << "      }\n";
-  OS << "      unsigned Diag = validateOperandClass(*Operands[i+1],\n";
-  OS.indent(43);
-  OS << "(MatchClassKind)it->Classes[i]);\n";
+  OS << "      MCParsedAsmOperand &Actual = *Operands[i];\n";
+  OS << "      unsigned Diag = validateOperandClass(Actual, Formal);\n";
   OS << "      if (Diag == Match_Success)\n";
   OS << "        continue;\n";
   OS << "      // If the generic handler indicates an invalid operand\n";
   OS << "      // failure, check for a special case.\n";
   OS << "      if (Diag == Match_InvalidOperand) {\n";
-  OS << "        Diag = validateTargetOperandClass(*Operands[i+1],\n";
-  OS.indent(43);
-  OS << "(MatchClassKind)it->Classes[i]);\n";
+  OS << "        Diag = validateTargetOperandClass(Actual, Formal);\n";
   OS << "        if (Diag == Match_Success)\n";
   OS << "          continue;\n";
   OS << "      }\n";
@@ -2973,8 +2991,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "      // If we already had a match that only failed due to a\n";
   OS << "      // target predicate, that diagnostic is preferred.\n";
   OS << "      if (!HadMatchOtherThanPredicate &&\n";
-  OS << "          (it == MnemonicRange.first || ErrorInfo <= i+1)) {\n";
-  OS << "        ErrorInfo = i+1;\n";
+  OS << "          (it == MnemonicRange.first || ErrorInfo <= i)) {\n";
+  OS << "        ErrorInfo = i;\n";
   OS << "        // InvalidOperand is the default. Prefer specificity.\n";
   OS << "        if (Diag != Match_InvalidOperand)\n";
   OS << "          RetCode = Diag;\n";
@@ -3029,7 +3047,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasDeprecation) {
     OS << "    std::string Info;\n";
-    OS << "    if (MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, STI, Info)) {\n";
+    OS << "    if (MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
     OS << "      SMLoc Loc = ((" << Target.getName()
        << "Operand&)*Operands[0]).getStartLoc();\n";
     OS << "      getParser().Warning(Loc, Info, None);\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index 8163f681d88d..cc74f9ebcece 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -586,6 +586,8 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
   const auto &Registers = Target.getRegBank().getRegisters();
   std::vector<Record*> AltNameIndices = Target.getRegAltNameIndices();
   bool hasAltNames = AltNameIndices.size() > 1;
+  std::string Namespace =
+      Registers.front().TheDef->getValueAsString("Namespace");
 
   O <<
   "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
@@ -610,9 +612,9 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
     O << "  switch(AltIdx) {\n"
       << "  default: llvm_unreachable(\"Invalid register alt name index!\");\n";
     for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
-      std::string Namespace = AltNameIndices[1]->getValueAsString("Namespace");
       std::string AltName(AltNameIndices[i]->getName());
-      O << "  case " << Namespace << "::" << AltName << ":\n"
+      std::string Prefix = !Namespace.empty() ? Namespace + "::" : "";
+      O << "  case " << Prefix << AltName << ":\n"
         << "    assert(*(AsmStrs" << AltName << "+RegAsmOffset"
         << AltName << "[RegNo-1]) &&\n"
         << "           \"Invalid alt name index for register!\");\n"
@@ -727,7 +729,6 @@ public:
         ++I;
       }
     }
-    OS.flush();
 
     // Emit the string.
     O.indent(6) << "AsmString = \"" << OutString << "\";\n";
@@ -736,14 +737,13 @@ public:
     O.indent(4) << '}';
   }
 
-  bool operator==(const IAPrinter &RHS) {
+  bool operator==(const IAPrinter &RHS) const {
     if (Conds.size() != RHS.Conds.size())
       return false;
 
     unsigned Idx = 0;
-    for (std::vector<std::string>::iterator
-           I = Conds.begin(), E = Conds.end(); I != E; ++I)
-      if (*I != RHS.Conds[Idx++])
+    for (const auto &str : Conds)
+      if (str != RHS.Conds[Idx++])
         return false;
 
     return true;
@@ -762,12 +762,12 @@ static unsigned CountNumOperands(StringRef AsmString, unsigned Variant) {
 
 namespace {
 struct AliasPriorityComparator {
-  typedef std::pair<CodeGenInstAlias *, int> ValueType;
+  typedef std::pair<CodeGenInstAlias, int> ValueType;
   bool operator()(const ValueType &LHS, const ValueType &RHS) {
     if (LHS.second ==  RHS.second) {
       // We don't actually care about the order, but for consistency it
       // shouldn't depend on pointer comparisons.
-      return LHS.first->TheDef->getName() < RHS.first->TheDef->getName();
+      return LHS.first.TheDef->getName() < RHS.first.TheDef->getName();
     }
 
     // Aliases with larger priorities should be considered first.
@@ -796,12 +796,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     Records.getAllDerivedDefinitions("InstAlias");
 
   // Create a map from the qualified name to a list of potential matches.
-  typedef std::set<std::pair<CodeGenInstAlias*, int>, AliasPriorityComparator>
+  typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator>
       AliasWithPriority;
   std::map<std::string, AliasWithPriority> AliasMap;
   for (std::vector<Record*>::iterator
          I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
-    CodeGenInstAlias *Alias = new CodeGenInstAlias(*I, Variant, Target);
     const Record *R = *I;
     int Priority = R->getValueAsInt("EmitPriority");
     if (Priority < 1)
@@ -809,13 +808,13 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
     const DagInit *DI = R->getValueAsDag("ResultInst");
     const DefInit *Op = cast<DefInit>(DI->getOperator());
-    AliasMap[getQualifiedName(Op->getDef())].insert(std::make_pair(Alias,
-                                                                   Priority));
+    AliasMap[getQualifiedName(Op->getDef())].insert(
+        std::make_pair(CodeGenInstAlias(*I, Variant, Target), Priority));
   }
 
   // A map of which conditions need to be met for each instruction operand
   // before it can be matched to the mnemonic.
-  std::map<std::string, std::vector<IAPrinter*> > IAPrinterMap;
+  std::map<std::string, std::vector<IAPrinter>> IAPrinterMap;
 
   // A list of MCOperandPredicates for all operands in use, and the reverse map
   std::vector<const Record*> MCOpPredicates;
@@ -823,25 +822,24 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
   for (auto &Aliases : AliasMap) {
     for (auto &Alias : Aliases.second) {
-      const CodeGenInstAlias *CGA = Alias.first;
-      unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
+      const CodeGenInstAlias &CGA = Alias.first;
+      unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
       unsigned NumResultOps =
-        CountNumOperands(CGA->ResultInst->AsmString, Variant);
+          CountNumOperands(CGA.ResultInst->AsmString, Variant);
 
       // Don't emit the alias if it has more operands than what it's aliasing.
-      if (NumResultOps < CountNumOperands(CGA->AsmString, Variant))
+      if (NumResultOps < CountNumOperands(CGA.AsmString, Variant))
         continue;
 
-      IAPrinter *IAP = new IAPrinter(CGA->Result->getAsString(),
-                                     CGA->AsmString);
+      IAPrinter IAP(CGA.Result->getAsString(), CGA.AsmString);
 
       unsigned NumMIOps = 0;
-      for (auto &Operand : CGA->ResultOperands)
+      for (auto &Operand : CGA.ResultOperands)
         NumMIOps += Operand.getMINumOperands();
 
       std::string Cond;
       Cond = std::string("MI->getNumOperands() == ") + llvm::utostr(NumMIOps);
-      IAP->addCond(Cond);
+      IAP.addCond(Cond);
 
       bool CantHandle = false;
 
@@ -849,7 +847,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
       for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
         std::string Op = "MI->getOperand(" + llvm::utostr(MIOpNum) + ")";
 
-        const CodeGenInstAlias::ResultOperand &RO = CGA->ResultOperands[i];
+        const CodeGenInstAlias::ResultOperand &RO = CGA.ResultOperands[i];
 
         switch (RO.Kind) {
         case CodeGenInstAlias::ResultOperand::K_Record: {
@@ -875,11 +873,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           if (Rec->isSubClassOf("RegisterOperand"))
             Rec = Rec->getValueAsDef("RegClass");
           if (Rec->isSubClassOf("RegisterClass")) {
-            IAP->addCond(Op + ".isReg()");
+            IAP.addCond(Op + ".isReg()");
 
-            if (!IAP->isOpMapped(ROName)) {
-              IAP->addOperand(ROName, MIOpNum, PrintMethodIdx);
-              Record *R = CGA->ResultOperands[i].getRecord();
+            if (!IAP.isOpMapped(ROName)) {
+              IAP.addOperand(ROName, MIOpNum, PrintMethodIdx);
+              Record *R = CGA.ResultOperands[i].getRecord();
               if (R->isSubClassOf("RegisterOperand"))
                 R = R->getValueAsDef("RegClass");
               Cond = std::string("MRI.getRegClass(") + Target.getName() + "::" +
@@ -887,12 +885,12 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
                                     ".contains(" + Op + ".getReg())";
             } else {
               Cond = Op + ".getReg() == MI->getOperand(" +
-                llvm::utostr(IAP->getOpIndex(ROName)) + ").getReg()";
+                     llvm::utostr(IAP.getOpIndex(ROName)) + ").getReg()";
             }
           } else {
             // Assume all printable operands are desired for now. This can be
             // overridden in the InstAlias instantiation if necessary.
-            IAP->addOperand(ROName, MIOpNum, PrintMethodIdx);
+            IAP.addOperand(ROName, MIOpNum, PrintMethodIdx);
 
             // There might be an additional predicate on the MCOperand
             unsigned Entry = MCOpPredicateMap[Rec];
@@ -905,42 +903,41 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
                 break; // No conditions on this operand at all
             }
             Cond = Target.getName() + ClassName + "ValidateMCOperand(" +
-                   Op + ", " + llvm::utostr(Entry) + ")";
+                   Op + ", STI, " + llvm::utostr(Entry) + ")";
           }
           // for all subcases of ResultOperand::K_Record:
-          IAP->addCond(Cond);
+          IAP.addCond(Cond);
           break;
         }
         case CodeGenInstAlias::ResultOperand::K_Imm: {
           // Just because the alias has an immediate result, doesn't mean the
           // MCInst will. An MCExpr could be present, for example.
-          IAP->addCond(Op + ".isImm()");
+          IAP.addCond(Op + ".isImm()");
 
-          Cond = Op + ".getImm() == "
-            + llvm::utostr(CGA->ResultOperands[i].getImm());
-          IAP->addCond(Cond);
+          Cond = Op + ".getImm() == " +
+                 llvm::utostr(CGA.ResultOperands[i].getImm());
+          IAP.addCond(Cond);
           break;
         }
         case CodeGenInstAlias::ResultOperand::K_Reg:
           // If this is zero_reg, something's playing tricks we're not
           // equipped to handle.
-          if (!CGA->ResultOperands[i].getRegister()) {
+          if (!CGA.ResultOperands[i].getRegister()) {
             CantHandle = true;
             break;
           }
 
-          Cond = Op + ".getReg() == " + Target.getName() +
-            "::" + CGA->ResultOperands[i].getRegister()->getName();
-          IAP->addCond(Cond);
+          Cond = Op + ".getReg() == " + Target.getName() + "::" +
+                 CGA.ResultOperands[i].getRegister()->getName();
+          IAP.addCond(Cond);
           break;
         }
 
-        if (!IAP) break;
         MIOpNum += RO.getMINumOperands();
       }
 
       if (CantHandle) continue;
-      IAPrinterMap[Aliases.first].push_back(IAP);
+      IAPrinterMap[Aliases.first].push_back(std::move(IAP));
     }
   }
 
@@ -959,30 +956,26 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   std::string Cases;
   raw_string_ostream CasesO(Cases);
 
-  for (std::map<std::string, std::vector<IAPrinter*> >::iterator
-         I = IAPrinterMap.begin(), E = IAPrinterMap.end(); I != E; ++I) {
-    std::vector<IAPrinter*> &IAPs = I->second;
+  for (auto &Entry : IAPrinterMap) {
+    std::vector<IAPrinter> &IAPs = Entry.second;
     std::vector<IAPrinter*> UniqueIAPs;
 
-    for (std::vector<IAPrinter*>::iterator
-           II = IAPs.begin(), IE = IAPs.end(); II != IE; ++II) {
-      IAPrinter *LHS = *II;
+    for (auto &LHS : IAPs) {
       bool IsDup = false;
-      for (std::vector<IAPrinter*>::iterator
-             III = IAPs.begin(), IIE = IAPs.end(); III != IIE; ++III) {
-        IAPrinter *RHS = *III;
-        if (LHS != RHS && *LHS == *RHS) {
+      for (const auto &RHS : IAPs) {
+        if (&LHS != &RHS && LHS == RHS) {
           IsDup = true;
           break;
         }
       }
 
-      if (!IsDup) UniqueIAPs.push_back(LHS);
+      if (!IsDup)
+        UniqueIAPs.push_back(&LHS);
     }
 
     if (UniqueIAPs.empty()) continue;
 
-    CasesO.indent(2) << "case " << I->first << ":\n";
+    CasesO.indent(2) << "case " << Entry.first << ":\n";
 
     for (std::vector<IAPrinter*>::iterator
            II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
@@ -1005,8 +998,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
   if (!MCOpPredicates.empty())
     O << "static bool " << Target.getName() << ClassName
-      << "ValidateMCOperand(\n"
-      << "       const MCOperand &MCOp, unsigned PredicateIndex);\n";
+      << "ValidateMCOperand(const MCOperand &MCOp,\n"
+      << "                  const MCSubtargetInfo &STI,\n"
+      << "                  unsigned PredicateIndex);\n";
 
   O << HeaderO.str();
   O.indent(2) << "const char *AsmString;\n";
@@ -1078,8 +1072,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
 
   if (!MCOpPredicates.empty()) {
     O << "static bool " << Target.getName() << ClassName
-      << "ValidateMCOperand(\n"
-      << "       const MCOperand &MCOp, unsigned PredicateIndex) {\n"
+      << "ValidateMCOperand(const MCOperand &MCOp,\n"
+      << "                  const MCSubtargetInfo &STI,\n"
+      << "                  unsigned PredicateIndex) {\n"      
       << "  switch (PredicateIndex) {\n"
       << "  default:\n"
       << "    llvm_unreachable(\"Unknown MCOperandPredicate kind\");\n"
diff --git a/utils/TableGen/Attributes.cpp b/utils/TableGen/Attributes.cpp
new file mode 100644
index 000000000000..7b001bf14de5
--- /dev/null
+++ b/utils/TableGen/Attributes.cpp
@@ -0,0 +1,156 @@
+//===- Attributes.cpp - Generate attributes -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <string>
+#include <vector>
+using namespace llvm;
+
+#define DEBUG_TYPE "attr-enum"
+
+namespace {
+
+class Attributes {
+public:
+  Attributes(RecordKeeper &R) : Records(R) {}
+  void emit(raw_ostream &OS);
+
+private:
+  void emitTargetIndependentEnums(raw_ostream &OS);
+  void emitFnAttrCompatCheck(raw_ostream &OS, bool IsStringAttr);
+
+  void printEnumAttrClasses(raw_ostream &OS,
+                            const std::vector<Record *> &Records);
+  void printStrBoolAttrClasses(raw_ostream &OS,
+                               const std::vector<Record *> &Records);
+
+  RecordKeeper &Records;
+};
+
+} // End anonymous namespace.
+
+void Attributes::emitTargetIndependentEnums(raw_ostream &OS) {
+  OS << "#ifdef GET_ATTR_ENUM\n";
+  OS << "#undef GET_ATTR_ENUM\n";
+
+  std::vector<Record*> Attrs =
+      Records.getAllDerivedDefinitions("EnumAttr");
+
+  for (auto A : Attrs)
+    OS << A->getName() << ",\n";
+
+  OS << "#endif\n";
+}
+
+void Attributes::emitFnAttrCompatCheck(raw_ostream &OS, bool IsStringAttr) {
+  OS << "#ifdef GET_ATTR_COMPAT_FUNC\n";
+  OS << "#undef GET_ATTR_COMPAT_FUNC\n";
+
+  OS << "struct EnumAttr {\n";
+  OS << "  static bool isSet(const Function &Fn,\n";
+  OS << "                    Attribute::AttrKind Kind) {\n";
+  OS << "    return Fn.hasFnAttribute(Kind);\n";
+  OS << "  }\n\n";
+  OS << "  static void set(Function &Fn,\n";
+  OS << "                  Attribute::AttrKind Kind, bool Val) {\n";
+  OS << "    if (Val)\n";
+  OS << "      Fn.addFnAttr(Kind);\n";
+  OS << "    else\n";
+  OS << "      Fn.removeFnAttr(Kind);\n";
+  OS << "  }\n";
+  OS << "};\n\n";
+
+  OS << "struct StrBoolAttr {\n";
+  OS << "  static bool isSet(const Function &Fn,\n";
+  OS << "                    StringRef Kind) {\n";
+  OS << "    auto A = Fn.getFnAttribute(Kind);\n";
+  OS << "    return A.getValueAsString().equals(\"true\");\n";
+  OS << "  }\n\n";
+  OS << "  static void set(Function &Fn,\n";
+  OS << "                  StringRef Kind, bool Val) {\n";
+  OS << "    Fn.addFnAttr(Kind, Val ? \"true\" : \"false\");\n";
+  OS << "  }\n";
+  OS << "};\n\n";
+
+  printEnumAttrClasses(OS ,Records.getAllDerivedDefinitions("EnumAttr"));
+  printStrBoolAttrClasses(OS , Records.getAllDerivedDefinitions("StrBoolAttr"));
+
+  OS << "static inline bool hasCompatibleFnAttrs(const Function &Caller,\n"
+     << "                                        const Function &Callee) {\n";
+  OS << "  bool Ret = true;\n\n";
+
+  std::vector<Record *> CompatRules =
+      Records.getAllDerivedDefinitions("CompatRule");
+
+  for (auto *Rule : CompatRules) {
+    std::string FuncName = Rule->getValueAsString("CompatFunc");
+    OS << "  Ret &= " << FuncName << "(Caller, Callee);\n";
+  }
+
+  OS << "\n";
+  OS << "  return Ret;\n";
+  OS << "}\n\n";
+
+  std::vector<Record *> MergeRules =
+      Records.getAllDerivedDefinitions("MergeRule");
+  OS << "static inline void mergeFnAttrs(Function &Caller,\n"
+     << "                                const Function &Callee) {\n";
+
+  for (auto *Rule : MergeRules) {
+    std::string FuncName = Rule->getValueAsString("MergeFunc");
+    OS << "  " << FuncName << "(Caller, Callee);\n";
+  }
+
+  OS << "}\n\n";
+
+  OS << "#endif\n";
+}
+
+void Attributes::printEnumAttrClasses(raw_ostream &OS,
+                                      const std::vector<Record *> &Records) {
+  OS << "// EnumAttr classes\n";
+  for (const auto *R : Records) {
+    OS << "struct " << R->getName() << "Attr : EnumAttr {\n";
+    OS << "  static enum Attribute::AttrKind getKind() {\n";
+    OS << "    return llvm::Attribute::" << R->getName() << ";\n";
+    OS << "  }\n";
+    OS << "};\n";
+  }
+  OS << "\n";
+}
+
+void Attributes::printStrBoolAttrClasses(raw_ostream &OS,
+                                         const std::vector<Record *> &Records) {
+  OS << "// StrBoolAttr classes\n";
+  for (const auto *R : Records) {
+    OS << "struct " << R->getName() << "Attr : StrBoolAttr {\n";
+    OS << "  static const char *getKind() {\n";
+    OS << "    return \"" << R->getValueAsString("AttrString") << "\";\n";
+    OS << "  }\n";
+    OS << "};\n";
+  }
+  OS << "\n";
+}
+
+void Attributes::emit(raw_ostream &OS) {
+  emitTargetIndependentEnums(OS);
+  emitFnAttrCompatCheck(OS, false);
+}
+
+namespace llvm {
+
+void EmitAttributes(RecordKeeper &RK, raw_ostream &OS) {
+  Attributes(RK).emit(OS);
+}
+
+} // End llvm namespace.
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index feaa7c757962..eef1540424dd 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_tablegen(llvm-tblgen LLVM
   AsmMatcherEmitter.cpp
   AsmWriterEmitter.cpp
   AsmWriterInst.cpp
+  Attributes.cpp
   CallingConvEmitter.cpp
   CodeEmitterGen.cpp
   CodeGenDAGPatterns.cpp
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
index c7519b3e1528..a47662b28558 100644
--- a/utils/TableGen/CallingConvEmitter.cpp
+++ b/utils/TableGen/CallingConvEmitter.cpp
@@ -181,15 +181,15 @@ void CallingConvEmitter::EmitAction(Record *Action,
         O << Size << ", ";
       else
         O << "\n" << IndentStr
-          << "  State.getMachineFunction().getTarget().getDataLayout()"
-             "->getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())),"
+          << "  State.getMachineFunction().getDataLayout()."
+             "getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())),"
              " ";
       if (Align)
         O << Align;
       else
         O << "\n" << IndentStr
-          << "  State.getMachineFunction().getTarget().getDataLayout()"
-             "->getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()"
+          << "  State.getMachineFunction().getDataLayout()."
+             "getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()"
              "))";
       O << ");\n" << IndentStr
         << "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index ae1cc0cfb5fd..3ebe51e05121 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -14,6 +14,7 @@
 
 #include "CodeGenDAGPatterns.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
@@ -84,9 +85,9 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
   if (TP.hasError())
     return false;
 
-  for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
-    if (!Pred || Pred(LegalTypes[i]))
-      TypeVec.push_back(LegalTypes[i]);
+  for (MVT::SimpleValueType VT : LegalTypes)
+    if (!Pred || Pred(VT))
+      TypeVec.push_back(VT);
 
   // If we have nothing that matches the predicate, bail out.
   if (TypeVec.empty()) {
@@ -107,36 +108,24 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
 /// hasIntegerTypes - Return true if this TypeSet contains iAny or an
 /// integer value type.
 bool EEVT::TypeSet::hasIntegerTypes() const {
-  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
-    if (isInteger(TypeVec[i]))
-      return true;
-  return false;
+  return std::any_of(TypeVec.begin(), TypeVec.end(), isInteger);
 }
 
 /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
 /// a floating point value type.
 bool EEVT::TypeSet::hasFloatingPointTypes() const {
-  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
-    if (isFloatingPoint(TypeVec[i]))
-      return true;
-  return false;
+  return std::any_of(TypeVec.begin(), TypeVec.end(), isFloatingPoint);
 }
 
 /// hasScalarTypes - Return true if this TypeSet contains a scalar value type.
 bool EEVT::TypeSet::hasScalarTypes() const {
-  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
-    if (isScalar(TypeVec[i]))
-      return true;
-  return false;
+  return std::any_of(TypeVec.begin(), TypeVec.end(), isScalar);
 }
 
 /// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
 /// value type.
 bool EEVT::TypeSet::hasVectorTypes() const {
-  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
-    if (isVector(TypeVec[i]))
-      return true;
-  return false;
+  return std::any_of(TypeVec.begin(), TypeVec.end(), isVector);
 }
 
 
@@ -171,7 +160,7 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
     return true;
   }
 
-  assert(TypeVec.size() >= 1 && InVT.TypeVec.size() >= 1 && "No unknowns");
+  assert(!TypeVec.empty() && !InVT.TypeVec.empty() && "No unknowns");
 
   // Handle the abstract cases, seeing if we can resolve them better.
   switch (TypeVec[0]) {
@@ -206,8 +195,7 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
     // multiple different integer types, replace them with a single iPTR.
     if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
         TypeVec.size() != 1) {
-      TypeVec.resize(1);
-      TypeVec[0] = InVT.TypeVec[0];
+      TypeVec.assign(1, InVT.TypeVec[0]);
       MadeChange = true;
     }
 
@@ -216,25 +204,20 @@ bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
 
   // If this is a type list and the RHS is a typelist as well, eliminate entries
   // from this list that aren't in the other one.
-  bool MadeChange = false;
   TypeSet InputSet(*this);
 
-  for (unsigned i = 0; i != TypeVec.size(); ++i) {
-    bool InInVT = false;
-    for (unsigned j = 0, e = InVT.TypeVec.size(); j != e; ++j)
-      if (TypeVec[i] == InVT.TypeVec[j]) {
-        InInVT = true;
-        break;
-      }
+  TypeVec.clear();
+  std::set_intersection(InputSet.TypeVec.begin(), InputSet.TypeVec.end(),
+                        InVT.TypeVec.begin(), InVT.TypeVec.end(),
+                        std::back_inserter(TypeVec));
 
-    if (InInVT) continue;
-    TypeVec.erase(TypeVec.begin()+i--);
-    MadeChange = true;
-  }
+  // If the intersection is the same size as the original set then we're done.
+  if (TypeVec.size() == InputSet.TypeVec.size())
+    return false;
 
   // If we removed all of our types, we have a type contradiction.
   if (!TypeVec.empty())
-    return MadeChange;
+    return true;
 
   // FIXME: Really want an SMLoc here!
   TP.error("Type inference contradiction found, merging '" +
@@ -249,15 +232,16 @@ bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
   // If we know nothing, then get the full set.
   if (TypeVec.empty())
     return FillWithPossibleTypes(TP, isInteger, "integer");
+
   if (!hasFloatingPointTypes())
     return false;
 
   TypeSet InputSet(*this);
 
   // Filter out all the fp types.
-  for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isInteger(TypeVec[i]))
-      TypeVec.erase(TypeVec.begin()+i--);
+  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+                               std::not1(std::ptr_fun(isInteger))),
+                TypeVec.end());
 
   if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
@@ -280,10 +264,10 @@ bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
 
   TypeSet InputSet(*this);
 
-  // Filter out all the fp types.
-  for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isFloatingPoint(TypeVec[i]))
-      TypeVec.erase(TypeVec.begin()+i--);
+  // Filter out all the integer types.
+  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+                               std::not1(std::ptr_fun(isFloatingPoint))),
+                TypeVec.end());
 
   if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
@@ -308,9 +292,9 @@ bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
   TypeSet InputSet(*this);
 
   // Filter out all the vector types.
-  for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isScalar(TypeVec[i]))
-      TypeVec.erase(TypeVec.begin()+i--);
+  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+                               std::not1(std::ptr_fun(isScalar))),
+                TypeVec.end());
 
   if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
@@ -333,11 +317,9 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
   bool MadeChange = false;
 
   // Filter out all the scalar types.
-  for (unsigned i = 0; i != TypeVec.size(); ++i)
-    if (!isVector(TypeVec[i])) {
-      TypeVec.erase(TypeVec.begin()+i--);
-      MadeChange = true;
-    }
+  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
+                               std::not1(std::ptr_fun(isVector))),
+                TypeVec.end());
 
   if (TypeVec.empty()) {
     TP.error("Type inference contradiction found, '" +
@@ -350,7 +332,7 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
 
 
 /// EnforceSmallerThan - 'this' must be a smaller VT than Other. For vectors
-/// this shoud be based on the element type. Update this and other based on
+/// this should be based on the element type. Update this and other based on
 /// this information.
 bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   if (TP.hasError())
@@ -404,26 +386,33 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   // type size is smaller than the scalar size of the smallest type. For
   // vectors, we also need to make sure that the total size is no larger than
   // the size of the smallest type.
-  TypeSet InputSet(Other);
-  MVT Smallest = TypeVec[0];
-  for (unsigned i = 0; i != Other.TypeVec.size(); ++i) {
-    MVT OtherVT = Other.TypeVec[i];
-    // Don't compare vector and non-vector types.
-    if (OtherVT.isVector() != Smallest.isVector())
-      continue;
-    // The getSizeInBits() check here is only needed for vectors, but is
-    // a subset of the scalar check for scalars so no need to qualify.
-    if (OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits() ||
-        OtherVT.getSizeInBits() < Smallest.getSizeInBits()) {
-      Other.TypeVec.erase(Other.TypeVec.begin()+i--);
-      MadeChange = true;
-    }
-  }
+  {
+    TypeSet InputSet(Other);
+    MVT Smallest = *std::min_element(TypeVec.begin(), TypeVec.end(),
+      [](MVT A, MVT B) {
+        return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
+               (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+                A.getSizeInBits() < B.getSizeInBits());
+      });
 
-  if (Other.TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" + InputSet.getName() +
-             "' has nothing larger than '" + getName() +"'!");
-    return false;
+    auto I = std::remove_if(Other.TypeVec.begin(), Other.TypeVec.end(),
+      [Smallest](MVT OtherVT) {
+        // Don't compare vector and non-vector types.
+        if (OtherVT.isVector() != Smallest.isVector())
+          return false;
+        // The getSizeInBits() check here is only needed for vectors, but is
+        // a subset of the scalar check for scalars so no need to qualify.
+        return OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits()||
+               OtherVT.getSizeInBits() < Smallest.getSizeInBits();
+      });
+    MadeChange |= I != Other.TypeVec.end(); // If we're about to remove types.
+    Other.TypeVec.erase(I, Other.TypeVec.end());
+
+    if (Other.TypeVec.empty()) {
+      TP.error("Type inference contradiction found, '" + InputSet.getName() +
+               "' has nothing larger than '" + getName() +"'!");
+      return false;
+    }
   }
 
   // Okay, find the largest type from the other set and remove anything the
@@ -431,32 +420,36 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
   // type size is larger than the scalar size of the largest type. For
   // vectors, we also need to make sure that the total size is no smaller than
   // the size of the largest type.
-  InputSet = TypeSet(*this);
-  MVT Largest = Other.TypeVec[Other.TypeVec.size()-1];
-  for (unsigned i = 0; i != TypeVec.size(); ++i) {
-    MVT OtherVT = TypeVec[i];
-    // Don't compare vector and non-vector types.
-    if (OtherVT.isVector() != Largest.isVector())
-      continue;
-    // The getSizeInBits() check here is only needed for vectors, but is
-    // a subset of the scalar check for scalars so no need to qualify.
-    if (OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
-         OtherVT.getSizeInBits() > Largest.getSizeInBits()) {
-      TypeVec.erase(TypeVec.begin()+i--);
-      MadeChange = true;
-    }
-  }
+  {
+    TypeSet InputSet(*this);
+    MVT Largest = *std::max_element(Other.TypeVec.begin(), Other.TypeVec.end(),
+      [](MVT A, MVT B) {
+        return A.getScalarSizeInBits() < B.getScalarSizeInBits() ||
+               (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
+                A.getSizeInBits() < B.getSizeInBits());
+      });
+    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+      [Largest](MVT OtherVT) {
+        // Don't compare vector and non-vector types.
+        if (OtherVT.isVector() != Largest.isVector())
+          return false;
+        return OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
+               OtherVT.getSizeInBits() > Largest.getSizeInBits();
+      });
+    MadeChange |= I != TypeVec.end(); // If we're about to remove types.
+    TypeVec.erase(I, TypeVec.end());
 
-  if (TypeVec.empty()) {
-    TP.error("Type inference contradiction found, '" + InputSet.getName() +
-             "' has nothing smaller than '" + Other.getName() +"'!");
-    return false;
+    if (TypeVec.empty()) {
+      TP.error("Type inference contradiction found, '" + InputSet.getName() +
+               "' has nothing smaller than '" + Other.getName() +"'!");
+      return false;
+    }
   }
 
   return MadeChange;
 }
 
-/// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
 /// whose element is specified by VTOperand.
 bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
                                            TreePattern &TP) {
@@ -467,24 +460,24 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
   TypeSet InputSet(*this);
 
   // Filter out all the types which don't have the right element type.
-  for (unsigned i = 0; i != TypeVec.size(); ++i) {
-    assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
-    if (MVT(TypeVec[i]).getVectorElementType().SimpleTy != VT) {
-      TypeVec.erase(TypeVec.begin()+i--);
-      MadeChange = true;
-    }
-  }
+  auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+    [VT](MVT VVT) {
+      return VVT.getVectorElementType().SimpleTy != VT;
+    });
+  MadeChange |= I != TypeVec.end();
+  TypeVec.erase(I, TypeVec.end());
 
   if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
     TP.error("Type inference contradiction found, forcing '" +
-             InputSet.getName() + "' to have a vector element");
+             InputSet.getName() + "' to have a vector element of type " +
+             getEnumName(VT));
     return false;
   }
 
   return MadeChange;
 }
 
-/// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+/// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
 /// whose element is specified by VTOperand.
 bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
                                            TreePattern &TP) {
@@ -500,8 +493,7 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
   if (isConcrete()) {
     MVT IVT = getConcrete();
     IVT = IVT.getVectorElementType();
-    return MadeChange |
-      VTOperand.MergeInTypeInfo(IVT.SimpleTy, TP);
+    return MadeChange || VTOperand.MergeInTypeInfo(IVT.SimpleTy, TP);
   }
 
   // If the scalar type is known, filter out vector types whose element types
@@ -511,26 +503,12 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
 
   MVT::SimpleValueType VT = VTOperand.getConcrete();
 
-  TypeSet InputSet(*this);
+  MadeChange |= EnforceVectorEltTypeIs(VT, TP);
 
-  // Filter out all the types which don't have the right element type.
-  for (unsigned i = 0; i != TypeVec.size(); ++i) {
-    assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
-    if (MVT(TypeVec[i]).getVectorElementType().SimpleTy != VT) {
-      TypeVec.erase(TypeVec.begin()+i--);
-      MadeChange = true;
-    }
-  }
-
-  if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
-    TP.error("Type inference contradiction found, forcing '" +
-             InputSet.getName() + "' to have a vector element");
-    return false;
-  }
   return MadeChange;
 }
 
-/// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to be a
+/// EnforceVectorSubVectorTypeIs - 'this' is now constrained to be a
 /// vector type specified by VTOperand.
 bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
                                                  TreePattern &TP) {
@@ -569,13 +547,13 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
     // Only keep types that have less elements than VTOperand.
     TypeSet InputSet(VTOperand);
 
-    for (unsigned i = 0; i != VTOperand.TypeVec.size(); ++i) {
-      assert(isVector(VTOperand.TypeVec[i]) && "EnforceVector didn't work");
-      if (MVT(VTOperand.TypeVec[i]).getVectorNumElements() >= NumElems) {
-        VTOperand.TypeVec.erase(VTOperand.TypeVec.begin()+i--);
-        MadeChange = true;
-      }
-    }
+    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+                            [NumElems](MVT VVT) {
+                              return VVT.getVectorNumElements() >= NumElems;
+                            });
+    MadeChange |= I != VTOperand.TypeVec.end();
+    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
     if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
       TP.error("Type inference contradiction found, forcing '" +
                InputSet.getName() + "' to have less vector elements than '" +
@@ -593,13 +571,13 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
     // Only keep types that have more elements than 'this'.
     TypeSet InputSet(*this);
 
-    for (unsigned i = 0; i != TypeVec.size(); ++i) {
-      assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
-      if (MVT(TypeVec[i]).getVectorNumElements() <= NumElems) {
-        TypeVec.erase(TypeVec.begin()+i--);
-        MadeChange = true;
-      }
-    }
+    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+                            [NumElems](MVT VVT) {
+                              return VVT.getVectorNumElements() <= NumElems;
+                            });
+    MadeChange |= I != TypeVec.end();
+    TypeVec.erase(I, TypeVec.end());
+
     if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
       TP.error("Type inference contradiction found, forcing '" +
                InputSet.getName() + "' to have more vector elements than '" +
@@ -611,7 +589,7 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
   return MadeChange;
 }
 
-/// EnforceVectorSameNumElts - 'this' is now constrainted to
+/// EnforceVectorSameNumElts - 'this' is now constrained to
 /// be a vector with same num elements as VTOperand.
 bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
                                              TreePattern &TP) {
@@ -628,16 +606,16 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
     MVT IVT = getConcrete();
     unsigned NumElems = IVT.getVectorNumElements();
 
-    // Only keep types that have same elements as VTOperand.
+    // Only keep types that have same elements as 'this'.
     TypeSet InputSet(VTOperand);
 
-    for (unsigned i = 0; i != VTOperand.TypeVec.size(); ++i) {
-      assert(isVector(VTOperand.TypeVec[i]) && "EnforceVector didn't work");
-      if (MVT(VTOperand.TypeVec[i]).getVectorNumElements() != NumElems) {
-        VTOperand.TypeVec.erase(VTOperand.TypeVec.begin()+i--);
-        MadeChange = true;
-      }
-    }
+    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+                            [NumElems](MVT VVT) {
+                              return VVT.getVectorNumElements() != NumElems;
+                            });
+    MadeChange |= I != VTOperand.TypeVec.end();
+    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
     if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
       TP.error("Type inference contradiction found, forcing '" +
                InputSet.getName() + "' to have same number elements as '" +
@@ -648,16 +626,16 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
     MVT IVT = VTOperand.getConcrete();
     unsigned NumElems = IVT.getVectorNumElements();
 
-    // Only keep types that have same elements as 'this'.
+    // Only keep types that have same elements as VTOperand.
     TypeSet InputSet(*this);
 
-    for (unsigned i = 0; i != TypeVec.size(); ++i) {
-      assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
-      if (MVT(TypeVec[i]).getVectorNumElements() != NumElems) {
-        TypeVec.erase(TypeVec.begin()+i--);
-        MadeChange = true;
-      }
-    }
+    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+                            [NumElems](MVT VVT) {
+                              return VVT.getVectorNumElements() != NumElems;
+                            });
+    MadeChange |= I != TypeVec.end();
+    TypeVec.erase(I, TypeVec.end());
+
     if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
       TP.error("Type inference contradiction found, forcing '" +
                InputSet.getName() + "' to have same number elements than '" +
@@ -669,15 +647,66 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
   return MadeChange;
 }
 
+/// EnforceSameSize - 'this' is now constrained to be same size as VTOperand.
+bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
+                                    TreePattern &TP) {
+  if (TP.hasError())
+    return false;
+
+  bool MadeChange = false;
+
+  // If we know one of the types, it forces the other type agree.
+  if (isConcrete()) {
+    MVT IVT = getConcrete();
+    unsigned Size = IVT.getSizeInBits();
+
+    // Only keep types that have the same size as 'this'.
+    TypeSet InputSet(VTOperand);
+
+    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
+                            [&](MVT VT) {
+                              return VT.getSizeInBits() != Size;
+                            });
+    MadeChange |= I != VTOperand.TypeVec.end();
+    VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
+
+    if (VTOperand.TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
+      TP.error("Type inference contradiction found, forcing '" +
+               InputSet.getName() + "' to have same size as '" +
+               getName() + "'");
+      return false;
+    }
+  } else if (VTOperand.isConcrete()) {
+    MVT IVT = VTOperand.getConcrete();
+    unsigned Size = IVT.getSizeInBits();
+
+    // Only keep types that have the same size as VTOperand.
+    TypeSet InputSet(*this);
+
+    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
+                            [&](MVT VT) {
+                              return VT.getSizeInBits() != Size;
+                            });
+    MadeChange |= I != TypeVec.end();
+    TypeVec.erase(I, TypeVec.end());
+
+    if (TypeVec.empty()) {  // FIXME: Really want an SMLoc here!
+      TP.error("Type inference contradiction found, forcing '" +
+               InputSet.getName() + "' to have same size as '" +
+               VTOperand.getName() + "'");
+      return false;
+    }
+  }
+
+  return MadeChange;
+}
+
 //===----------------------------------------------------------------------===//
 // Helpers for working with extended types.
 
 /// Dependent variable map for CodeGenDAGPattern variant generation
 typedef std::map<std::string, int> DepVarMap;
 
-/// Const iterator shorthand for DepVarMap
-typedef DepVarMap::const_iterator DepVarMap_citer;
-
 static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
   if (N->isLeaf()) {
     if (isa<DefInit>(N->getLeafValue()))
@@ -692,9 +721,9 @@ static void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
 static void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
   DepVarMap depcounts;
   FindDepVarsOf(N, depcounts);
-  for (DepVarMap_citer i = depcounts.begin(); i != depcounts.end(); ++i) {
-    if (i->second > 1)            // std::pair<std::string, int>
-      DepVars.insert(i->first);
+  for (const std::pair<std::string, int> &Pair : depcounts) {
+    if (Pair.second > 1)
+      DepVars.insert(Pair.first);
   }
 }
 
@@ -705,9 +734,8 @@ static void DumpDepVars(MultipleUseVarSet &DepVars) {
     DEBUG(errs() << "<empty set>");
   } else {
     DEBUG(errs() << "[ ");
-    for (MultipleUseVarSet::const_iterator i = DepVars.begin(),
-         e = DepVars.end(); i != e; ++i) {
-      DEBUG(errs() << (*i) << " ");
+    for (const std::string &DepVar : DepVars) {
+      DEBUG(errs() << DepVar << " ");
     }
     DEBUG(errs() << "]");
   }
@@ -771,7 +799,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
   if (ClassName == "SDNode")
     Result = "    SDNode *N = Node;\n";
   else
-    Result = "    " + ClassName + "*N = cast<" + ClassName + ">(Node);\n";
+    Result = "    auto *N = cast<" + ClassName + ">(Node);\n";
   
   return Result + getPredCode();
 }
@@ -841,7 +869,7 @@ getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
 /// pattern's predicates concatenated with "&&" operators.
 ///
 std::string PatternToMatch::getPredicateCheck() const {
-  std::string PredicateCheck;
+  SmallVector<Record *, 4> PredicateRecs;
   for (Init *I : Predicates->getValues()) {
     if (DefInit *Pred = dyn_cast<DefInit>(I)) {
       Record *Def = Pred->getDef();
@@ -851,13 +879,20 @@ std::string PatternToMatch::getPredicateCheck() const {
 #endif
         llvm_unreachable("Unknown predicate type!");
       }
-      if (!PredicateCheck.empty())
-        PredicateCheck += " && ";
-      PredicateCheck += "(" + Def->getValueAsString("CondString") + ")";
+      PredicateRecs.push_back(Def);
     }
   }
+  // Sort so that different orders get canonicalized to the same string.
+  std::sort(PredicateRecs.begin(), PredicateRecs.end(), LessRecord());
 
-  return PredicateCheck;
+  SmallString<128> PredicateCheck;
+  for (Record *Pred : PredicateRecs) {
+    if (!PredicateCheck.empty())
+      PredicateCheck += " && ";
+    PredicateCheck += "(" + Pred->getValueAsString("CondString") + ")";
+  }
+
+  return PredicateCheck.str();
 }
 
 //===----------------------------------------------------------------------===//
@@ -912,6 +947,10 @@ SDTypeConstraint::SDTypeConstraint(Record *R) {
     ConstraintType = SDTCisSameNumEltsAs;
     x.SDTCisSameNumEltsAs_Info.OtherOperandNum =
       R->getValueAsInt("OtherOperandNum");
+  } else if (R->isSubClassOf("SDTCisSameSizeAs")) {
+    ConstraintType = SDTCisSameSizeAs;
+    x.SDTCisSameSizeAs_Info.OtherOperandNum =
+      R->getValueAsInt("OtherOperandNum");
   } else {
     PrintFatalError("Unrecognized SDTypeConstraint '" + R->getName() + "'!\n");
   }
@@ -1041,6 +1080,14 @@ bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
     return OtherNode->getExtType(OResNo).
       EnforceVectorSameNumElts(NodeToApply->getExtType(ResNo), TP);
   }
+  case SDTCisSameSizeAs: {
+    unsigned OResNo = 0;
+    TreePatternNode *OtherNode =
+      getOperandNum(x.SDTCisSameSizeAs_Info.OtherOperandNum,
+                    N, NodeInfo, OResNo);
+    return OtherNode->getExtType(OResNo).
+      EnforceSameSize(NodeToApply->getExtType(ResNo), TP);
+  }
   }
   llvm_unreachable("Invalid ConstraintType!");
 }
@@ -1091,33 +1138,32 @@ SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
 
   // Parse the properties.
   Properties = 0;
-  std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
-  for (unsigned i = 0, e = PropList.size(); i != e; ++i) {
-    if (PropList[i]->getName() == "SDNPCommutative") {
+  for (Record *Property : R->getValueAsListOfDefs("Properties")) {
+    if (Property->getName() == "SDNPCommutative") {
       Properties |= 1 << SDNPCommutative;
-    } else if (PropList[i]->getName() == "SDNPAssociative") {
+    } else if (Property->getName() == "SDNPAssociative") {
       Properties |= 1 << SDNPAssociative;
-    } else if (PropList[i]->getName() == "SDNPHasChain") {
+    } else if (Property->getName() == "SDNPHasChain") {
       Properties |= 1 << SDNPHasChain;
-    } else if (PropList[i]->getName() == "SDNPOutGlue") {
+    } else if (Property->getName() == "SDNPOutGlue") {
       Properties |= 1 << SDNPOutGlue;
-    } else if (PropList[i]->getName() == "SDNPInGlue") {
+    } else if (Property->getName() == "SDNPInGlue") {
       Properties |= 1 << SDNPInGlue;
-    } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+    } else if (Property->getName() == "SDNPOptInGlue") {
       Properties |= 1 << SDNPOptInGlue;
-    } else if (PropList[i]->getName() == "SDNPMayStore") {
+    } else if (Property->getName() == "SDNPMayStore") {
       Properties |= 1 << SDNPMayStore;
-    } else if (PropList[i]->getName() == "SDNPMayLoad") {
+    } else if (Property->getName() == "SDNPMayLoad") {
       Properties |= 1 << SDNPMayLoad;
-    } else if (PropList[i]->getName() == "SDNPSideEffect") {
+    } else if (Property->getName() == "SDNPSideEffect") {
       Properties |= 1 << SDNPSideEffect;
-    } else if (PropList[i]->getName() == "SDNPMemOperand") {
+    } else if (Property->getName() == "SDNPMemOperand") {
       Properties |= 1 << SDNPMemOperand;
-    } else if (PropList[i]->getName() == "SDNPVariadic") {
+    } else if (Property->getName() == "SDNPVariadic") {
       Properties |= 1 << SDNPVariadic;
     } else {
       PrintFatalError("Unknown SD Node property '" +
-                      PropList[i]->getName() + "' on node '" +
+                      Property->getName() + "' on node '" +
                       R->getName() + "'!");
     }
   }
@@ -1138,15 +1184,15 @@ MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
          "We only work with nodes with zero or one result so far!");
   assert(ResNo == 0 && "Only handles single result nodes so far");
 
-  for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i) {
+  for (const SDTypeConstraint &Constraint : TypeConstraints) {
     // Make sure that this applies to the correct node result.
-    if (TypeConstraints[i].OperandNo >= NumResults)  // FIXME: need value #
+    if (Constraint.OperandNo >= NumResults)  // FIXME: need value #
       continue;
 
-    switch (TypeConstraints[i].ConstraintType) {
+    switch (Constraint.ConstraintType) {
     default: break;
     case SDTypeConstraint::SDTCisVT:
-      return TypeConstraints[i].x.SDTCisVT_Info.VT;
+      return Constraint.x.SDTCisVT_Info.VT;
     case SDTypeConstraint::SDTCisPtrTy:
       return MVT::iPTR;
     }
@@ -1247,8 +1293,8 @@ void TreePatternNode::print(raw_ostream &OS) const {
     OS << ")";
   }
 
-  for (unsigned i = 0, e = PredicateFns.size(); i != e; ++i)
-    OS << "<<P:" << PredicateFns[i].getFnName() << ">>";
+  for (const TreePredicateFn &Pred : PredicateFns)
+    OS << "<<P:" << Pred.getFnName() << ">>";
   if (TransformFn)
     OS << "<<X:" << TransformFn->getName() << ">>";
   if (!getName().empty())
@@ -1315,8 +1361,8 @@ TreePatternNode *TreePatternNode::clone() const {
 
 /// RemoveAllTypes - Recursively strip all the types of this tree.
 void TreePatternNode::RemoveAllTypes() {
-  for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    Types[i] = EEVT::TypeSet();  // Reset to unknown type.
+  // Reset to unknown type.
+  std::fill(Types.begin(), Types.end(), EEVT::TypeSet());
   if (isLeaf()) return;
   for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
     getChild(i)->RemoveAllTypes();
@@ -1410,8 +1456,8 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
     FragTree->UpdateNodeType(i, getExtType(i), TP);
 
   // Transfer in the old predicates.
-  for (unsigned i = 0, e = getPredicateFns().size(); i != e; ++i)
-    FragTree->addPredicateFn(getPredicateFns()[i]);
+  for (const TreePredicateFn &Pred : getPredicateFns())
+    FragTree->addPredicateFn(Pred);
 
   // Get a new copy of this fragment to stitch into here.
   //delete this;    // FIXME: implement refcounting!
@@ -2024,8 +2070,8 @@ void TreePattern::error(const Twine &Msg) {
 }
 
 void TreePattern::ComputeNamedNodes() {
-  for (unsigned i = 0, e = Trees.size(); i != e; ++i)
-    ComputeNamedNodes(Trees[i]);
+  for (TreePatternNode *Tree : Trees)
+    ComputeNamedNodes(Tree);
 }
 
 void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
@@ -2251,53 +2297,52 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   bool MadeChange = true;
   while (MadeChange) {
     MadeChange = false;
-    for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
-      MadeChange |= Trees[i]->ApplyTypeConstraints(*this, false);
-      MadeChange |= SimplifyTree(Trees[i]);
+    for (TreePatternNode *Tree : Trees) {
+      MadeChange |= Tree->ApplyTypeConstraints(*this, false);
+      MadeChange |= SimplifyTree(Tree);
     }
 
     // If there are constraints on our named nodes, apply them.
-    for (StringMap<SmallVector<TreePatternNode*,1> >::iterator
-         I = NamedNodes.begin(), E = NamedNodes.end(); I != E; ++I) {
-      SmallVectorImpl<TreePatternNode*> &Nodes = I->second;
+    for (auto &Entry : NamedNodes) {
+      SmallVectorImpl<TreePatternNode*> &Nodes = Entry.second;
 
       // If we have input named node types, propagate their types to the named
       // values here.
       if (InNamedTypes) {
-        if (!InNamedTypes->count(I->getKey())) {
-          error("Node '" + std::string(I->getKey()) +
+        if (!InNamedTypes->count(Entry.getKey())) {
+          error("Node '" + std::string(Entry.getKey()) +
                 "' in output pattern but not input pattern");
           return true;
         }
 
         const SmallVectorImpl<TreePatternNode*> &InNodes =
-          InNamedTypes->find(I->getKey())->second;
+          InNamedTypes->find(Entry.getKey())->second;
 
         // The input types should be fully resolved by now.
-        for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+        for (TreePatternNode *Node : Nodes) {
           // If this node is a register class, and it is the root of the pattern
           // then we're mapping something onto an input register.  We allow
           // changing the type of the input register in this case.  This allows
           // us to match things like:
           //  def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
-          if (Nodes[i] == Trees[0] && Nodes[i]->isLeaf()) {
-            DefInit *DI = dyn_cast<DefInit>(Nodes[i]->getLeafValue());
+          if (Node == Trees[0] && Node->isLeaf()) {
+            DefInit *DI = dyn_cast<DefInit>(Node->getLeafValue());
             if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
                        DI->getDef()->isSubClassOf("RegisterOperand")))
               continue;
           }
 
-          assert(Nodes[i]->getNumTypes() == 1 &&
+          assert(Node->getNumTypes() == 1 &&
                  InNodes[0]->getNumTypes() == 1 &&
                  "FIXME: cannot name multiple result nodes yet");
-          MadeChange |= Nodes[i]->UpdateNodeType(0, InNodes[0]->getExtType(0),
-                                                 *this);
+          MadeChange |= Node->UpdateNodeType(0, InNodes[0]->getExtType(0),
+                                             *this);
         }
       }
 
       // If there are multiple nodes with the same name, they must all have the
       // same type.
-      if (I->second.size() > 1) {
+      if (Entry.second.size() > 1) {
         for (unsigned i = 0, e = Nodes.size()-1; i != e; ++i) {
           TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
           assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
@@ -2311,8 +2356,8 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
   }
 
   bool HasUnresolvedTypes = false;
-  for (unsigned i = 0, e = Trees.size(); i != e; ++i)
-    HasUnresolvedTypes |= Trees[i]->ContainsUnresolvedType();
+  for (const TreePatternNode *Tree : Trees)
+    HasUnresolvedTypes |= Tree->ContainsUnresolvedType();
   return !HasUnresolvedTypes;
 }
 
@@ -2328,9 +2373,9 @@ void TreePattern::print(raw_ostream &OS) const {
 
   if (Trees.size() > 1)
     OS << "[\n";
-  for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
+  for (const TreePatternNode *Tree : Trees) {
     OS << "\t";
-    Trees[i]->print(OS);
+    Tree->print(OS);
     OS << "\n";
   }
 
@@ -2425,14 +2470,14 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
   std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrag");
 
   // First step, parse all of the fragments.
-  for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
-    if (OutFrags != Fragments[i]->isSubClassOf("OutPatFrag"))
+  for (Record *Frag : Fragments) {
+    if (OutFrags != Frag->isSubClassOf("OutPatFrag"))
       continue;
 
-    DagInit *Tree = Fragments[i]->getValueAsDag("Fragment");
+    DagInit *Tree = Frag->getValueAsDag("Fragment");
     TreePattern *P =
-        (PatternFragments[Fragments[i]] = llvm::make_unique<TreePattern>(
-             Fragments[i], Tree, !Fragments[i]->isSubClassOf("OutPatFrag"),
+        (PatternFragments[Frag] = llvm::make_unique<TreePattern>(
+             Frag, Tree, !Frag->isSubClassOf("OutPatFrag"),
              *this)).get();
 
     // Validate the argument list, converting it to set, to discard duplicates.
@@ -2443,7 +2488,7 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
       P->error("Cannot have unnamed 'node' values in pattern fragment!");
 
     // Parse the operands list.
-    DagInit *OpsList = Fragments[i]->getValueAsDag("Operands");
+    DagInit *OpsList = Frag->getValueAsDag("Operands");
     DefInit *OpsOp = dyn_cast<DefInit>(OpsList->getOperator());
     // Special cases: ops == outs == ins. Different names are used to
     // improve readability.
@@ -2480,18 +2525,18 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
 
     // If there is a node transformation corresponding to this, keep track of
     // it.
-    Record *Transform = Fragments[i]->getValueAsDef("OperandTransform");
+    Record *Transform = Frag->getValueAsDef("OperandTransform");
     if (!getSDNodeTransform(Transform).second.empty())    // not noop xform?
       P->getOnlyTree()->setTransformFn(Transform);
   }
 
   // Now that we've parsed all of the tree fragments, do a closure on them so
   // that there are not references to PatFrags left inside of them.
-  for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
-    if (OutFrags != Fragments[i]->isSubClassOf("OutPatFrag"))
+  for (Record *Frag : Fragments) {
+    if (OutFrags != Frag->isSubClassOf("OutPatFrag"))
       continue;
 
-    TreePattern &ThePat = *PatternFragments[Fragments[i]];
+    TreePattern &ThePat = *PatternFragments[Frag];
     ThePat.InlinePatternFragments();
 
     // Infer as many types as possible.  Don't worry about it if we don't infer
@@ -2815,7 +2860,7 @@ static bool InferFromPattern(CodeGenInstruction &InstInfo,
 
   if (InstInfo.mayLoad != PatInfo.mayLoad && !InstInfo.mayLoad_Unset) {
     // Allow explicitly setting mayLoad = 1, even when the pattern has no loads.
-    // Some targets translate imediates to loads.
+    // Some targets translate immediates to loads.
     if (!InstInfo.mayLoad) {
       Error = true;
       PrintError(PatDef->getLoc(), "Pattern doesn't match mayLoad = " +
@@ -3065,11 +3110,11 @@ const DAGInstruction &CodeGenDAGPatterns::parseInstructionPattern(
 void CodeGenDAGPatterns::ParseInstructions() {
   std::vector<Record*> Instrs = Records.getAllDerivedDefinitions("Instruction");
 
-  for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
+  for (Record *Instr : Instrs) {
     ListInit *LI = nullptr;
 
-    if (isa<ListInit>(Instrs[i]->getValueInit("Pattern")))
-      LI = Instrs[i]->getValueAsListInit("Pattern");
+    if (isa<ListInit>(Instr->getValueInit("Pattern")))
+      LI = Instr->getValueAsListInit("Pattern");
 
     // If there is no pattern, only collect minimal information about the
     // instruction for its operand list.  We have to assume that there is one
@@ -3081,7 +3126,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       std::vector<Record*> Results;
       std::vector<Record*> Operands;
 
-      CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+      CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
 
       if (InstInfo.Operands.size() != 0) {
         for (unsigned j = 0, e = InstInfo.Operands.NumDefs; j < e; ++j)
@@ -3095,12 +3140,12 @@ void CodeGenDAGPatterns::ParseInstructions() {
 
       // Create and insert the instruction.
       std::vector<Record*> ImpResults;
-      Instructions.insert(std::make_pair(Instrs[i],
+      Instructions.insert(std::make_pair(Instr,
                           DAGInstruction(nullptr, Results, Operands, ImpResults)));
       continue;  // no pattern.
     }
 
-    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]);
+    CodeGenInstruction &CGI = Target.getInstruction(Instr);
     const DAGInstruction &DI = parseInstructionPattern(CGI, LI, Instructions);
 
     (void)DI;
@@ -3108,10 +3153,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
   }
 
   // If we can, convert the instructions to be patterns that are matched!
-  for (std::map<Record*, DAGInstruction, LessRecordByID>::iterator II =
-        Instructions.begin(),
-       E = Instructions.end(); II != E; ++II) {
-    DAGInstruction &TheInst = II->second;
+  for (auto &Entry : Instructions) {
+    DAGInstruction &TheInst = Entry.second;
     TreePattern *I = TheInst.getPattern();
     if (!I) continue;  // No pattern.
 
@@ -3126,7 +3169,7 @@ void CodeGenDAGPatterns::ParseInstructions() {
       SrcPattern = Pattern;
     }
 
-    Record *Instr = II->first;
+    Record *Instr = Entry.first;
     AddPatternToMatch(I,
                       PatternToMatch(Instr,
                                      Instr->getValueAsListInit("Predicates"),
@@ -3187,19 +3230,18 @@ void CodeGenDAGPatterns::AddPatternToMatch(TreePattern *Pattern,
 
   // Scan all of the named values in the destination pattern, rejecting them if
   // they don't exist in the input pattern.
-  for (std::map<std::string, NameRecord>::iterator
-       I = DstNames.begin(), E = DstNames.end(); I != E; ++I) {
-    if (SrcNames[I->first].first == nullptr)
+  for (const auto &Entry : DstNames) {
+    if (SrcNames[Entry.first].first == nullptr)
       Pattern->error("Pattern has input without matching name in output: $" +
-                     I->first);
+                     Entry.first);
   }
 
   // Scan all of the named values in the source pattern, rejecting them if the
   // name isn't used in the dest, and isn't used to tie two values together.
-  for (std::map<std::string, NameRecord>::iterator
-       I = SrcNames.begin(), E = SrcNames.end(); I != E; ++I)
-    if (DstNames[I->first].first == nullptr && SrcNames[I->first].second == 1)
-      Pattern->error("Pattern has dead named input: $" + I->first);
+  for (const auto &Entry : SrcNames)
+    if (DstNames[Entry.first].first == nullptr &&
+        SrcNames[Entry.first].second == 1)
+      Pattern->error("Pattern has dead named input: $" + Entry.first);
 
   PatternsToMatch.push_back(PTM);
 }
@@ -3258,31 +3300,29 @@ void CodeGenDAGPatterns::InferInstructionFlags() {
 
   // Revisit instructions with undefined flags and no pattern.
   if (Target.guessInstructionProperties()) {
-    for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
-      CodeGenInstruction &InstInfo = *Revisit[i];
-      if (InstInfo.InferredFrom)
+    for (CodeGenInstruction *InstInfo : Revisit) {
+      if (InstInfo->InferredFrom)
         continue;
       // The mayLoad and mayStore flags default to false.
       // Conservatively assume hasSideEffects if it wasn't explicit.
-      if (InstInfo.hasSideEffects_Unset)
-        InstInfo.hasSideEffects = true;
+      if (InstInfo->hasSideEffects_Unset)
+        InstInfo->hasSideEffects = true;
     }
     return;
   }
 
   // Complain about any flags that are still undefined.
-  for (unsigned i = 0, e = Revisit.size(); i != e; ++i) {
-    CodeGenInstruction &InstInfo = *Revisit[i];
-    if (InstInfo.InferredFrom)
+  for (CodeGenInstruction *InstInfo : Revisit) {
+    if (InstInfo->InferredFrom)
       continue;
-    if (InstInfo.hasSideEffects_Unset)
-      PrintError(InstInfo.TheDef->getLoc(),
+    if (InstInfo->hasSideEffects_Unset)
+      PrintError(InstInfo->TheDef->getLoc(),
                  "Can't infer hasSideEffects from patterns");
-    if (InstInfo.mayStore_Unset)
-      PrintError(InstInfo.TheDef->getLoc(),
+    if (InstInfo->mayStore_Unset)
+      PrintError(InstInfo->TheDef->getLoc(),
                  "Can't infer mayStore from patterns");
-    if (InstInfo.mayLoad_Unset)
-      PrintError(InstInfo.TheDef->getLoc(),
+    if (InstInfo->mayLoad_Unset)
+      PrintError(InstInfo->TheDef->getLoc(),
                  "Can't infer mayLoad from patterns");
   }
 }
@@ -3302,8 +3342,8 @@ void CodeGenDAGPatterns::VerifyInstructionFlags() {
     unsigned NumSideEffects = 0;
     unsigned NumStores = 0;
     unsigned NumLoads = 0;
-    for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
-      const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+    for (const Record *Instr : Instrs) {
+      const CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
       NumSideEffects += InstInfo.hasSideEffects;
       NumStores += InstInfo.mayStore;
       NumLoads += InstInfo.mayLoad;
@@ -3335,19 +3375,19 @@ void CodeGenDAGPatterns::VerifyInstructionFlags() {
       continue;
     ++Errors;
 
-    for (unsigned i = 0, e = Msgs.size(); i != e; ++i)
-      PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msgs[i]) + " on the " +
+    for (const std::string &Msg : Msgs)
+      PrintError(PTM.getSrcRecord()->getLoc(), Twine(Msg) + " on the " +
                  (Instrs.size() == 1 ?
                   "instruction" : "output instructions"));
     // Provide the location of the relevant instruction definitions.
-    for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
-      if (Instrs[i] != PTM.getSrcRecord())
-        PrintError(Instrs[i]->getLoc(), "defined here");
-      const CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+    for (const Record *Instr : Instrs) {
+      if (Instr != PTM.getSrcRecord())
+        PrintError(Instr->getLoc(), "defined here");
+      const CodeGenInstruction &InstInfo = Target.getInstruction(Instr);
       if (InstInfo.InferredFrom &&
           InstInfo.InferredFrom != InstInfo.TheDef &&
           InstInfo.InferredFrom != PTM.getSrcRecord())
-        PrintError(InstInfo.InferredFrom->getLoc(), "inferred from patttern");
+        PrintError(InstInfo.InferredFrom->getLoc(), "inferred from pattern");
     }
   }
   if (Errors)
@@ -3386,8 +3426,7 @@ static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
 void CodeGenDAGPatterns::ParsePatterns() {
   std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
 
-  for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
-    Record *CurPattern = Patterns[i];
+  for (Record *CurPattern : Patterns) {
     DagInit *Tree = CurPattern->getValueAsDag("PatternToMatch");
 
     // If the pattern references the null_frag, there's nothing to do.
@@ -3517,8 +3556,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
                                  CodeGenDAGPatterns &CDP,
                                  const MultipleUseVarSet &DepVars) {
   // Make sure that each operand has at least one variant to choose from.
-  for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
-    if (ChildVariants[i].empty())
+  for (const auto &Variants : ChildVariants)
+    if (Variants.empty())
       return;
 
   // The end result is an all-pairs construction of the resultant pattern.
@@ -3529,8 +3568,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
 #ifndef NDEBUG
     DEBUG(if (!Idxs.empty()) {
             errs() << Orig->getOperator()->getName() << ": Idxs = [ ";
-              for (unsigned i = 0; i < Idxs.size(); ++i) {
-                errs() << Idxs[i] << " ";
+              for (unsigned Idx : Idxs) {
+                errs() << Idx << " ";
             }
             errs() << "]\n";
           });
@@ -3539,8 +3578,8 @@ static void CombineChildVariants(TreePatternNode *Orig,
     std::vector<TreePatternNode*> NewChildren;
     for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
       NewChildren.push_back(ChildVariants[i][Idxs[i]]);
-    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
-                                             Orig->getNumTypes());
+    auto R = llvm::make_unique<TreePatternNode>(
+        Orig->getOperator(), NewChildren, Orig->getNumTypes());
 
     // Copy over properties.
     R->setName(Orig->getName());
@@ -3551,29 +3590,19 @@ static void CombineChildVariants(TreePatternNode *Orig,
 
     // If this pattern cannot match, do not include it as a variant.
     std::string ErrString;
-    if (!R->canPatternMatch(ErrString, CDP)) {
-      delete R;
-    } else {
-      bool AlreadyExists = false;
-
-      // Scan to see if this pattern has already been emitted.  We can get
-      // duplication due to things like commuting:
-      //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
-      // which are the same pattern.  Ignore the dups.
-      for (unsigned i = 0, e = OutVariants.size(); i != e; ++i)
-        if (R->isIsomorphicTo(OutVariants[i], DepVars)) {
-          AlreadyExists = true;
-          break;
-        }
-
-      if (AlreadyExists)
-        delete R;
-      else
-        OutVariants.push_back(R);
-    }
+    // Scan to see if this pattern has already been emitted.  We can get
+    // duplication due to things like commuting:
+    //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
+    // which are the same pattern.  Ignore the dups.
+    if (R->canPatternMatch(ErrString, CDP) &&
+        std::none_of(OutVariants.begin(), OutVariants.end(),
+                     [&](TreePatternNode *Variant) {
+                       return R->isIsomorphicTo(Variant, DepVars);
+                     }))
+      OutVariants.push_back(R.release());
 
     // Increment indices to the next permutation by incrementing the
-    // indicies from last index backward, e.g., generate the sequence
+    // indices from last index backward, e.g., generate the sequence
     // [0, 0], [0, 1], [1, 0], [1, 1].
     int IdxsIdx;
     for (IdxsIdx = Idxs.size() - 1; IdxsIdx >= 0; --IdxsIdx) {
@@ -3724,7 +3753,7 @@ static void GenerateVariantsOf(TreePatternNode *N,
       // operands are the commutative operands, and there might be more operands
       // after those.
       assert(NC >= 3 &&
-             "Commutative intrinsic should have at least 3 childrean!");
+             "Commutative intrinsic should have at least 3 children!");
       std::vector<std::vector<TreePatternNode*> > Variants;
       Variants.push_back(ChildVariants[0]); // Intrinsic id.
       Variants.push_back(ChildVariants[2]);
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 9ce3cdfd7bc1..76c9cefea50f 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -132,22 +132,25 @@ namespace EEVT {
     /// this an other based on this information.
     bool EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP);
 
-    /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+    /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
     /// whose element is VT.
     bool EnforceVectorEltTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
 
-    /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+    /// EnforceVectorEltTypeIs - 'this' is now constrained to be a vector type
     /// whose element is VT.
     bool EnforceVectorEltTypeIs(MVT::SimpleValueType VT, TreePattern &TP);
 
-    /// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to
+    /// EnforceVectorSubVectorTypeIs - 'this' is now constrained to
     /// be a vector type VT.
     bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
 
-    /// EnforceVectorSameNumElts - 'this' is now constrainted to
+    /// EnforceVectorSameNumElts - 'this' is now constrained to
     /// be a vector with same num elements as VT.
     bool EnforceVectorSameNumElts(EEVT::TypeSet &VT, TreePattern &TP);
 
+    /// EnforceSameSize - 'this' is now constrained to be the same size as VT.
+    bool EnforceSameSize(EEVT::TypeSet &VT, TreePattern &TP);
+
     bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
     bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
 
@@ -173,7 +176,7 @@ struct SDTypeConstraint {
   enum {
     SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs,
     SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec,
-    SDTCisSubVecOfVec, SDTCVecEltisVT, SDTCisSameNumEltsAs
+    SDTCisSubVecOfVec, SDTCVecEltisVT, SDTCisSameNumEltsAs, SDTCisSameSizeAs
   } ConstraintType;
 
   union {   // The discriminated union.
@@ -201,6 +204,9 @@ struct SDTypeConstraint {
     struct {
       unsigned OtherOperandNum;
     } SDTCisSameNumEltsAs_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisSameSizeAs_Info;
   } x;
 
   /// ApplyTypeConstraint - Given a node in a pattern, apply this type
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index e83d5033b182..366e8ec7fac1 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -78,6 +78,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
     } else if (Rec->isSubClassOf("Operand")) {
       PrintMethod = Rec->getValueAsString("PrintMethod");
       OperandType = Rec->getValueAsString("OperandType");
+      OperandNamespace = Rec->getValueAsString("OperandNamespace");
       // If there is an explicit encoder method, use it.
       EncoderMethod = Rec->getValueAsString("EncoderMethod");
       MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
diff --git a/utils/TableGen/CodeGenIntrinsics.h b/utils/TableGen/CodeGenIntrinsics.h
index f4055571b1c7..7bdb7e1bc537 100644
--- a/utils/TableGen/CodeGenIntrinsics.h
+++ b/utils/TableGen/CodeGenIntrinsics.h
@@ -60,9 +60,10 @@ namespace llvm {
     IntrinsicSignature IS;
 
     // Memory mod/ref behavior of this intrinsic.
-    enum {
+    enum ModRefKind {
       NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
-    } ModRef;
+    };
+    ModRefKind ModRef;
 
     /// This is set to true if the intrinsic is overloaded by its argument
     /// types.
diff --git a/utils/TableGen/CodeGenMapTable.cpp b/utils/TableGen/CodeGenMapTable.cpp
index 48df4391fe16..f66dd082709b 100644
--- a/utils/TableGen/CodeGenMapTable.cpp
+++ b/utils/TableGen/CodeGenMapTable.cpp
@@ -471,7 +471,7 @@ void MapTableEmitter::emitTablesWithFunc(raw_ostream &OS) {
 
   ListInit *ColFields = InstrMapDesc.getColFields();
   const std::vector<ListInit*> &ValueCols = InstrMapDesc.getValueCols();
-  OS << "// "<< InstrMapDesc.getName() << "\n";
+  OS << "// "<< InstrMapDesc.getName() << "\nLLVM_READONLY\n";
   OS << "int "<< InstrMapDesc.getName() << "(uint16_t Opcode";
   if (ValueCols.size() > 1) {
     for (Init *CF : ColFields->getValues()) {
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index c9e6d1d379de..ca316e96a21a 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -1171,20 +1171,13 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
   CoveringLanes = ~0u;
   for (auto &Idx : SubRegIndices) {
     if (Idx.getComposites().empty()) {
+      if (Bit > 32) {
+        PrintFatalError(
+          Twine("Ran out of lanemask bits to represent subregister ")
+          + Idx.getName());
+      }
       Idx.LaneMask = 1u << Bit;
-      // Share bit 31 in the unlikely case there are more than 32 leafs.
-      //
-      // Sharing bits is harmless; it allows graceful degradation in targets
-      // with more than 32 vector lanes. They simply get a limited resolution
-      // view of lanes beyond the 32nd.
-      //
-      // See also the comment for getSubRegIndexLaneMask().
-      if (Bit < 31)
-        ++Bit;
-      else
-        // Once bit 31 is shared among multiple leafs, the 'lane' it represents
-        // is no longer covering its registers.
-        CoveringLanes &= ~(1u << Bit);
+      ++Bit;
     } else {
       Idx.LaneMask = 0;
     }
@@ -1274,6 +1267,12 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
         continue;
       LaneMask |= SubRegIndex.LaneMask;
     }
+
+    // For classes without any subregisters set LaneMask to ~0u instead of 0.
+    // This makes it easier for client code to handle classes uniformly.
+    if (LaneMask == 0)
+      LaneMask = ~0u;
+
     RegClass.LaneMask = LaneMask;
   }
 }
@@ -1568,6 +1567,12 @@ void CodeGenRegBank::pruneUnitSets() {
           && UnitWeight == RegUnits[SuperSet.Units.back()].Weight) {
         DEBUG(dbgs() << "UnitSet " << SubIdx << " subsumed by " << SuperIdx
               << "\n");
+        // We can pick any of the set names for the merged set. Go for the
+        // shortest one to avoid picking the name of one of the classes that are
+        // artificially created by tablegen. So "FPR128_lo" instead of
+        // "QQQQ_with_qsub3_in_FPR128_lo".
+        if (RegUnitSets[SubIdx].Name.size() < RegUnitSets[SuperIdx].Name.size())
+          RegUnitSets[SuperIdx].Name = RegUnitSets[SubIdx].Name;
         break;
       }
     }
diff --git a/utils/TableGen/CodeGenSchedule.cpp b/utils/TableGen/CodeGenSchedule.cpp
index bc27481869fe..c98f62345342 100644
--- a/utils/TableGen/CodeGenSchedule.cpp
+++ b/utils/TableGen/CodeGenSchedule.cpp
@@ -24,15 +24,9 @@ using namespace llvm;
 #define DEBUG_TYPE "subtarget-emitter"
 
 #ifndef NDEBUG
-static void dumpIdxVec(const IdxVec &V) {
-  for (unsigned i = 0, e = V.size(); i < e; ++i) {
-    dbgs() << V[i] << ", ";
-  }
-}
-static void dumpIdxVec(const SmallVectorImpl<unsigned> &V) {
-  for (unsigned i = 0, e = V.size(); i < e; ++i) {
-    dbgs() << V[i] << ", ";
-  }
+static void dumpIdxVec(ArrayRef<unsigned> V) {
+  for (unsigned Idx : V)
+    dbgs() << Idx << ", ";
 }
 #endif
 
@@ -326,9 +320,9 @@ void CodeGenSchedModels::collectSchedRW() {
 }
 
 /// Compute a SchedWrite name from a sequence of writes.
-std::string CodeGenSchedModels::genRWName(const IdxVec& Seq, bool IsRead) {
+std::string CodeGenSchedModels::genRWName(ArrayRef<unsigned> Seq, bool IsRead) {
   std::string Name("(");
-  for (IdxIter I = Seq.begin(), E = Seq.end(); I != E; ++I) {
+  for (auto I = Seq.begin(), E = Seq.end(); I != E; ++I) {
     if (I != Seq.begin())
       Name += '_';
     Name += getSchedRW(*I, IsRead).Name;
@@ -457,13 +451,13 @@ void CodeGenSchedModels::expandRWSeqForProc(
 }
 
 // Find the existing SchedWrite that models this sequence of writes.
-unsigned CodeGenSchedModels::findRWForSequence(const IdxVec &Seq,
+unsigned CodeGenSchedModels::findRWForSequence(ArrayRef<unsigned> Seq,
                                                bool IsRead) {
   std::vector<CodeGenSchedRW> &RWVec = IsRead ? SchedReads : SchedWrites;
 
   for (std::vector<CodeGenSchedRW>::iterator I = RWVec.begin(), E = RWVec.end();
        I != E; ++I) {
-    if (I->Sequence == Seq)
+    if (makeArrayRef(I->Sequence) == Seq)
       return I - RWVec.begin();
   }
   // Index zero reserved for invalid RW.
@@ -585,11 +579,11 @@ void CodeGenSchedModels::collectSchedClasses() {
 /// Find an SchedClass that has been inferred from a per-operand list of
 /// SchedWrites and SchedReads.
 unsigned CodeGenSchedModels::findSchedClassIdx(Record *ItinClassDef,
-                                               const IdxVec &Writes,
-                                               const IdxVec &Reads) const {
+                                               ArrayRef<unsigned> Writes,
+                                               ArrayRef<unsigned> Reads) const {
   for (SchedClassIter I = schedClassBegin(), E = schedClassEnd(); I != E; ++I) {
-    if (I->ItinClassDef == ItinClassDef
-        && I->Writes == Writes && I->Reads == Reads) {
+    if (I->ItinClassDef == ItinClassDef && makeArrayRef(I->Writes) == Writes &&
+        makeArrayRef(I->Reads) == Reads) {
       return I - schedClassBegin();
     }
   }
@@ -603,20 +597,22 @@ unsigned CodeGenSchedModels::getSchedClassIdx(
   return InstrClassMap.lookup(Inst.TheDef);
 }
 
-std::string CodeGenSchedModels::createSchedClassName(
-  Record *ItinClassDef, const IdxVec &OperWrites, const IdxVec &OperReads) {
+std::string
+CodeGenSchedModels::createSchedClassName(Record *ItinClassDef,
+                                         ArrayRef<unsigned> OperWrites,
+                                         ArrayRef<unsigned> OperReads) {
 
   std::string Name;
   if (ItinClassDef && ItinClassDef->getName() != "NoItinerary")
     Name = ItinClassDef->getName();
-  for (IdxIter WI = OperWrites.begin(), WE = OperWrites.end(); WI != WE; ++WI) {
+  for (unsigned Idx : OperWrites) {
     if (!Name.empty())
       Name += '_';
-    Name += SchedWrites[*WI].Name;
+    Name += SchedWrites[Idx].Name;
   }
-  for (IdxIter RI = OperReads.begin(), RE = OperReads.end(); RI != RE; ++RI) {
+  for (unsigned Idx : OperReads) {
     Name += '_';
-    Name += SchedReads[*RI].Name;
+    Name += SchedReads[Idx].Name;
   }
   return Name;
 }
@@ -636,10 +632,9 @@ std::string CodeGenSchedModels::createSchedClassName(const RecVec &InstDefs) {
 /// SchedWrites and SchedReads. ProcIndices contains the set of IDs of
 /// processors that may utilize this class.
 unsigned CodeGenSchedModels::addSchedClass(Record *ItinClassDef,
-                                           const IdxVec &OperWrites,
-                                           const IdxVec &OperReads,
-                                           const IdxVec &ProcIndices)
-{
+                                           ArrayRef<unsigned> OperWrites,
+                                           ArrayRef<unsigned> OperReads,
+                                           ArrayRef<unsigned> ProcIndices) {
   assert(!ProcIndices.empty() && "expect at least one ProcIdx");
 
   unsigned Idx = findSchedClassIdx(ItinClassDef, OperWrites, OperReads);
@@ -1322,10 +1317,10 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
 // Create new SchedClasses for the given ReadWrite list. If any of the
 // ReadWrites refers to a SchedVariant, create a new SchedClass for each variant
 // of the ReadWrite list, following Aliases if necessary.
-void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
-                                     const IdxVec &OperReads,
+void CodeGenSchedModels::inferFromRW(ArrayRef<unsigned> OperWrites,
+                                     ArrayRef<unsigned> OperReads,
                                      unsigned FromClassIdx,
-                                     const IdxVec &ProcIndices) {
+                                     ArrayRef<unsigned> ProcIndices) {
   DEBUG(dbgs() << "INFER RW proc("; dumpIdxVec(ProcIndices); dbgs() << ") ");
 
   // Create a seed transition with an empty PredTerm and the expanded sequences
@@ -1335,9 +1330,9 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
   LastTransitions.back().ProcIndices.append(ProcIndices.begin(),
                                             ProcIndices.end());
 
-  for (IdxIter I = OperWrites.begin(), E = OperWrites.end(); I != E; ++I) {
+  for (unsigned WriteIdx : OperWrites) {
     IdxVec WriteSeq;
-    expandRWSequence(*I, WriteSeq, /*IsRead=*/false);
+    expandRWSequence(WriteIdx, WriteSeq, /*IsRead=*/false);
     unsigned Idx = LastTransitions[0].WriteSequences.size();
     LastTransitions[0].WriteSequences.resize(Idx + 1);
     SmallVectorImpl<unsigned> &Seq = LastTransitions[0].WriteSequences[Idx];
@@ -1346,9 +1341,9 @@ void CodeGenSchedModels::inferFromRW(const IdxVec &OperWrites,
     DEBUG(dbgs() << "("; dumpIdxVec(Seq); dbgs() << ") ");
   }
   DEBUG(dbgs() << " Reads: ");
-  for (IdxIter I = OperReads.begin(), E = OperReads.end(); I != E; ++I) {
+  for (unsigned ReadIdx : OperReads) {
     IdxVec ReadSeq;
-    expandRWSequence(*I, ReadSeq, /*IsRead=*/true);
+    expandRWSequence(ReadIdx, ReadSeq, /*IsRead=*/true);
     unsigned Idx = LastTransitions[0].ReadSequences.size();
     LastTransitions[0].ReadSequences.resize(Idx + 1);
     SmallVectorImpl<unsigned> &Seq = LastTransitions[0].ReadSequences[Idx];
@@ -1552,20 +1547,16 @@ void CodeGenSchedModels::collectItinProcResources(Record *ItinClassDef) {
 }
 
 void CodeGenSchedModels::collectRWResources(unsigned RWIdx, bool IsRead,
-                                            const IdxVec &ProcIndices) {
+                                            ArrayRef<unsigned> ProcIndices) {
   const CodeGenSchedRW &SchedRW = getSchedRW(RWIdx, IsRead);
   if (SchedRW.TheDef) {
     if (!IsRead && SchedRW.TheDef->isSubClassOf("SchedWriteRes")) {
-      for (IdxIter PI = ProcIndices.begin(), PE = ProcIndices.end();
-           PI != PE; ++PI) {
-        addWriteRes(SchedRW.TheDef, *PI);
-      }
+      for (unsigned Idx : ProcIndices)
+        addWriteRes(SchedRW.TheDef, Idx);
     }
     else if (IsRead && SchedRW.TheDef->isSubClassOf("SchedReadAdvance")) {
-      for (IdxIter PI = ProcIndices.begin(), PE = ProcIndices.end();
-           PI != PE; ++PI) {
-        addReadAdvance(SchedRW.TheDef, *PI);
-      }
+      for (unsigned Idx : ProcIndices)
+        addReadAdvance(SchedRW.TheDef, Idx);
     }
   }
   for (RecIter AI = SchedRW.Aliases.begin(), AE = SchedRW.Aliases.end();
@@ -1590,15 +1581,15 @@ void CodeGenSchedModels::collectRWResources(unsigned RWIdx, bool IsRead,
 }
 
 // Collect resources for a set of read/write types and processor indices.
-void CodeGenSchedModels::collectRWResources(const IdxVec &Writes,
-                                            const IdxVec &Reads,
-                                            const IdxVec &ProcIndices) {
+void CodeGenSchedModels::collectRWResources(ArrayRef<unsigned> Writes,
+                                            ArrayRef<unsigned> Reads,
+                                            ArrayRef<unsigned> ProcIndices) {
 
-  for (IdxIter WI = Writes.begin(), WE = Writes.end(); WI != WE; ++WI)
-    collectRWResources(*WI, /*IsRead=*/false, ProcIndices);
+  for (unsigned Idx : Writes)
+    collectRWResources(Idx, /*IsRead=*/false, ProcIndices);
 
-  for (IdxIter RI = Reads.begin(), RE = Reads.end(); RI != RE; ++RI)
-    collectRWResources(*RI, /*IsRead=*/true, ProcIndices);
+  for (unsigned Idx : Reads)
+    collectRWResources(Idx, /*IsRead=*/true, ProcIndices);
 }
 
 
diff --git a/utils/TableGen/CodeGenSchedule.h b/utils/TableGen/CodeGenSchedule.h
index e5241b9d5fda..f5c50c992a92 100644
--- a/utils/TableGen/CodeGenSchedule.h
+++ b/utils/TableGen/CodeGenSchedule.h
@@ -72,10 +72,10 @@ struct CodeGenSchedRW {
     IsSequence = Def->isSubClassOf("WriteSequence");
   }
 
-  CodeGenSchedRW(unsigned Idx, bool Read, const IdxVec &Seq,
+  CodeGenSchedRW(unsigned Idx, bool Read, ArrayRef<unsigned> Seq,
                  const std::string &Name)
-    : Index(Idx), Name(Name), TheDef(nullptr), IsRead(Read), IsAlias(false),
-      HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
+      : Index(Idx), Name(Name), TheDef(nullptr), IsRead(Read), IsAlias(false),
+        HasVariants(false), IsVariadic(false), IsSequence(true), Sequence(Seq) {
     assert(Sequence.size() > 1 && "implied sequence needs >1 RWs");
   }
 
@@ -144,8 +144,9 @@ struct CodeGenSchedClass {
 
   CodeGenSchedClass(): Index(0), ItinClassDef(nullptr) {}
 
-  bool isKeyEqual(Record *IC, const IdxVec &W, const IdxVec &R) {
-    return ItinClassDef == IC && Writes == W && Reads == R;
+  bool isKeyEqual(Record *IC, ArrayRef<unsigned> W, ArrayRef<unsigned> R) {
+    return ItinClassDef == IC && makeArrayRef(Writes) == W &&
+           makeArrayRef(Reads) == R;
   }
 
   // Is this class generated from a variants if existing classes? Instructions
@@ -256,18 +257,16 @@ public:
   class_iterator classes_end() { return SchedClasses.end(); }
   const_class_iterator classes_end() const { return SchedClasses.end(); }
   iterator_range<class_iterator> classes() {
-   return iterator_range<class_iterator>(classes_begin(), classes_end());
+   return make_range(classes_begin(), classes_end());
   }
   iterator_range<const_class_iterator> classes() const {
-   return iterator_range<const_class_iterator>(classes_begin(), classes_end());
+   return make_range(classes_begin(), classes_end());
   }
   iterator_range<class_iterator> explicit_classes() {
-    return iterator_range<class_iterator>(
-        classes_begin(), classes_begin() + NumInstrSchedClasses);
+    return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses);
   }
   iterator_range<const_class_iterator> explicit_classes() const {
-    return iterator_range<const_class_iterator>(
-        classes_begin(), classes_begin() + NumInstrSchedClasses);
+    return make_range(classes_begin(), classes_begin() + NumInstrSchedClasses);
   }
 
   Record *getModelOrItinDef(Record *ProcDef) const {
@@ -363,14 +362,14 @@ public:
   void expandRWSeqForProc(unsigned RWIdx, IdxVec &RWSeq, bool IsRead,
                           const CodeGenProcModel &ProcModel) const;
 
-  unsigned addSchedClass(Record *ItinDef, const IdxVec &OperWrites,
-                         const IdxVec &OperReads, const IdxVec &ProcIndices);
+  unsigned addSchedClass(Record *ItinDef, ArrayRef<unsigned> OperWrites,
+                         ArrayRef<unsigned> OperReads,
+                         ArrayRef<unsigned> ProcIndices);
 
   unsigned findOrInsertRW(ArrayRef<unsigned> Seq, bool IsRead);
 
-  unsigned findSchedClassIdx(Record *ItinClassDef,
-                             const IdxVec &Writes,
-                             const IdxVec &Reads) const;
+  unsigned findSchedClassIdx(Record *ItinClassDef, ArrayRef<unsigned> Writes,
+                             ArrayRef<unsigned> Reads) const;
 
   Record *findProcResUnits(Record *ProcResKind,
                            const CodeGenProcModel &PM) const;
@@ -383,14 +382,14 @@ private:
 
   void collectSchedRW();
 
-  std::string genRWName(const IdxVec& Seq, bool IsRead);
-  unsigned findRWForSequence(const IdxVec &Seq, bool IsRead);
+  std::string genRWName(ArrayRef<unsigned> Seq, bool IsRead);
+  unsigned findRWForSequence(ArrayRef<unsigned> Seq, bool IsRead);
 
   void collectSchedClasses();
 
   std::string createSchedClassName(Record *ItinClassDef,
-                                   const IdxVec &OperWrites,
-                                   const IdxVec &OperReads);
+                                   ArrayRef<unsigned> OperWrites,
+                                   ArrayRef<unsigned> OperReads);
   std::string createSchedClassName(const RecVec &InstDefs);
   void createInstRWClass(Record *InstRWDef);
 
@@ -400,8 +399,8 @@ private:
 
   void inferSchedClasses();
 
-  void inferFromRW(const IdxVec &OperWrites, const IdxVec &OperReads,
-                   unsigned FromClassIdx, const IdxVec &ProcIndices);
+  void inferFromRW(ArrayRef<unsigned> OperWrites, ArrayRef<unsigned> OperReads,
+                   unsigned FromClassIdx, ArrayRef<unsigned> ProcIndices);
   void inferFromItinClass(Record *ItinClassDef, unsigned FromClassIdx);
   void inferFromInstRWs(unsigned SCIdx);
 
@@ -413,10 +412,10 @@ private:
   void collectItinProcResources(Record *ItinClassDef);
 
   void collectRWResources(unsigned RWIdx, bool IsRead,
-                          const IdxVec &ProcIndices);
+                          ArrayRef<unsigned> ProcIndices);
 
-  void collectRWResources(const IdxVec &Writes, const IdxVec &Reads,
-                          const IdxVec &ProcIndices);
+  void collectRWResources(ArrayRef<unsigned> Writes, ArrayRef<unsigned> Reads,
+                          ArrayRef<unsigned> ProcIndices);
 
   void addProcResource(Record *ProcResourceKind, CodeGenProcModel &PM);
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index 661975ecb202..aaad4225ace8 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -76,6 +76,8 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v16i1:    return "MVT::v16i1";
   case MVT::v32i1:    return "MVT::v32i1";
   case MVT::v64i1:    return "MVT::v64i1";
+  case MVT::v512i1:   return "MVT::v512i1";
+  case MVT::v1024i1:  return "MVT::v1024i1";
   case MVT::v1i8:     return "MVT::v1i8";
   case MVT::v2i8:     return "MVT::v2i8";
   case MVT::v4i8:     return "MVT::v4i8";
@@ -83,22 +85,29 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v16i8:    return "MVT::v16i8";
   case MVT::v32i8:    return "MVT::v32i8";
   case MVT::v64i8:    return "MVT::v64i8";
+  case MVT::v128i8:   return "MVT::v128i8";
+  case MVT::v256i8:   return "MVT::v256i8";
   case MVT::v1i16:    return "MVT::v1i16";
   case MVT::v2i16:    return "MVT::v2i16";
   case MVT::v4i16:    return "MVT::v4i16";
   case MVT::v8i16:    return "MVT::v8i16";
   case MVT::v16i16:   return "MVT::v16i16";
   case MVT::v32i16:   return "MVT::v32i16";
+  case MVT::v64i16:   return "MVT::v64i16";
+  case MVT::v128i16:  return "MVT::v128i16";
   case MVT::v1i32:    return "MVT::v1i32";
   case MVT::v2i32:    return "MVT::v2i32";
   case MVT::v4i32:    return "MVT::v4i32";
   case MVT::v8i32:    return "MVT::v8i32";
   case MVT::v16i32:   return "MVT::v16i32";
+  case MVT::v32i32:   return "MVT::v32i32";
+  case MVT::v64i32:   return "MVT::v64i32";
   case MVT::v1i64:    return "MVT::v1i64";
   case MVT::v2i64:    return "MVT::v2i64";
   case MVT::v4i64:    return "MVT::v4i64";
   case MVT::v8i64:    return "MVT::v8i64";
   case MVT::v16i64:   return "MVT::v16i64";
+  case MVT::v32i64:   return "MVT::v32i64";
   case MVT::v1i128:   return "MVT::v1i128";
   case MVT::v2f16:    return "MVT::v2f16";
   case MVT::v4f16:    return "MVT::v4f16";
@@ -112,6 +121,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v2f64:    return "MVT::v2f64";
   case MVT::v4f64:    return "MVT::v4f64";
   case MVT::v8f64:    return "MVT::v8f64";
+  case MVT::token:    return "MVT::token";
   case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
@@ -252,7 +262,7 @@ void CodeGenTarget::ReadLegalValueTypes() const {
     LegalValueTypes.insert(LegalValueTypes.end(), RC.VTs.begin(), RC.VTs.end());
 
   // Remove duplicates.
-  std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+  array_pod_sort(LegalValueTypes.begin(), LegalValueTypes.end());
   LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
                                     LegalValueTypes.end()),
                         LegalValueTypes.end());
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 24b38514260c..cf4a0bbe5bd9 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -173,7 +173,7 @@ public:
   inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
   inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
   iterator_range<inst_iterator> instructions() const {
-    return iterator_range<inst_iterator>(inst_begin(), inst_end());
+    return make_range(inst_begin(), inst_end());
   }
 
 
diff --git a/utils/TableGen/DAGISelMatcherEmitter.cpp b/utils/TableGen/DAGISelMatcherEmitter.cpp
index 4659dc157338..26f53dca6361 100644
--- a/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/TableGen/Record.h"
@@ -36,6 +37,10 @@ class MatcherTableEmitter {
   
   DenseMap<TreePattern *, unsigned> NodePredicateMap;
   std::vector<TreePredicateFn> NodePredicates;
+
+  // We de-duplicate the predicates by code string, and use this map to track
+  // all the patterns with "identical" predicates.
+  StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun;
   
   StringMap<unsigned> PatternPredicateMap;
   std::vector<std::string> PatternPredicates;
@@ -62,10 +67,23 @@ private:
                        formatted_raw_ostream &OS);
 
   unsigned getNodePredicate(TreePredicateFn Pred) {
-    unsigned &Entry = NodePredicateMap[Pred.getOrigPatFragRecord()];
+    TreePattern *TP = Pred.getOrigPatFragRecord();
+    unsigned &Entry = NodePredicateMap[TP];
     if (Entry == 0) {
-      NodePredicates.push_back(Pred);
-      Entry = NodePredicates.size();
+      TinyPtrVector<TreePattern *> &SameCodePreds =
+          NodePredicatesByCodeToRun[Pred.getCodeToRunOnSDNode()];
+      if (SameCodePreds.empty()) {
+        // We've never seen a predicate with the same code: allocate an entry.
+        NodePredicates.push_back(Pred);
+        Entry = NodePredicates.size();
+      } else {
+        // We did see an identical predicate: re-use it.
+        Entry = NodePredicateMap[SameCodePreds.front()];
+        assert(Entry != 0);
+      }
+      // In both cases, we've never seen this particular predicate before, so
+      // mark it in the list of predicates sharing the same code.
+      SameCodePreds.push_back(TP);
     }
     return Entry-1;
   }
@@ -625,13 +643,6 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
   }
 
   // Emit Node predicates.
-  // FIXME: Annoyingly, these are stored by name, which we never even emit. Yay?
-  StringMap<TreePattern*> PFsByName;
-
-  for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
-       I != E; ++I)
-    PFsByName[I->first->getName()] = I->second.get();
-
   if (!NodePredicates.empty()) {
     OS << "bool CheckNodePredicate(SDNode *Node,\n";
     OS << "                        unsigned PredNo) const override {\n";
@@ -642,7 +653,10 @@ void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
       TreePredicateFn PredFn = NodePredicates[i];
       
       assert(!PredFn.isAlwaysTrue() && "No code in this predicate");
-      OS << "  case " << i << ": { // " << NodePredicates[i].getFnName() <<'\n';
+      OS << "  case " << i << ": { \n";
+      for (auto *SimilarPred :
+           NodePredicatesByCodeToRun[PredFn.getCodeToRunOnSDNode()])
+        OS << "    // " << TreePredicateFn(SimilarPred).getFnName() <<'\n';
       
       OS << PredFn.getCodeToRunOnSDNode() << "\n  }\n";
     }
diff --git a/utils/TableGen/DFAPacketizerEmitter.cpp b/utils/TableGen/DFAPacketizerEmitter.cpp
index 5060b6e9ce7c..77afff7ab5c2 100644
--- a/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -15,16 +15,83 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "dfa-emitter"
+
 #include "CodeGenTarget.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/Support/Debug.h"
 #include <list>
 #include <map>
 #include <string>
+#include <queue>
 using namespace llvm;
 
+// --------------------------------------------------------------------
+// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
+
+// DFA_MAX_RESTERMS * DFA_MAX_RESOURCES must fit within sizeof DFAInput.
+// This is verified in DFAPacketizer.cpp:DFAPacketizer::DFAPacketizer.
+//
+// e.g. terms x resource bit combinations that fit in uint32_t:
+//      4 terms x 8  bits = 32 bits
+//      3 terms x 10 bits = 30 bits
+//      2 terms x 16 bits = 32 bits
+//
+// e.g. terms x resource bit combinations that fit in uint64_t:
+//      8 terms x 8  bits = 64 bits
+//      7 terms x 9  bits = 63 bits
+//      6 terms x 10 bits = 60 bits
+//      5 terms x 12 bits = 60 bits
+//      4 terms x 16 bits = 64 bits <--- current
+//      3 terms x 21 bits = 63 bits
+//      2 terms x 32 bits = 64 bits
+//
+#define DFA_MAX_RESTERMS        4   // The max # of AND'ed resource terms.
+#define DFA_MAX_RESOURCES       16  // The max # of resource bits in one term.
+
+typedef uint64_t                DFAInput;
+typedef int64_t                 DFAStateInput;
+#define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
+
+namespace {
+  DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
+    return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
+  }
+
+  /// Return the DFAInput for an instruction class input vector.
+  /// This function is used in both DFAPacketizer.cpp and in
+  /// DFAPacketizerEmitter.cpp.
+  DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
+    DFAInput InsnInput = 0;
+    assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+            "Exceeded maximum number of DFA terms");
+    for (auto U : InsnClass)
+      InsnInput = addDFAFuncUnits(InsnInput, U);
+    return InsnInput;
+  }
+}
+// --------------------------------------------------------------------
+
+#ifndef NDEBUG
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass);
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo);
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent);
+#endif
+
 //
 // class DFAPacketizerEmitter: class that generates and prints out the DFA
 // for resource tracking.
@@ -37,20 +104,48 @@ private:
   // allInsnClasses is the set of all possible resources consumed by an
   // InstrStage.
   //
-  DenseSet<unsigned> allInsnClasses;
+  std::vector<std::vector<unsigned>> allInsnClasses;
   RecordKeeper &Records;
 
 public:
   DFAPacketizerEmitter(RecordKeeper &R);
 
   //
-  // collectAllInsnClasses: Populate allInsnClasses which is a set of units
+  // collectAllFuncUnits - Construct a map of function unit names to bits.
+  //
+  int collectAllFuncUnits(std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           int &maxResources,
+                           raw_ostream &OS);
+
+  //
+  // collectAllComboFuncs - Construct a map from a combo function unit bit to
+  //                        the bits of all included functional units.
+  //
+  int collectAllComboFuncs(std::vector<Record*> &ComboFuncList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                           raw_ostream &OS);
+
+  //
+  // collectOneInsnClass - Populate allInsnClasses with one instruction class.
+  //
+  int collectOneInsnClass(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           Record *ItinData,
+                           raw_ostream &OS);
+
+  //
+  // collectAllInsnClasses - Populate allInsnClasses which is a set of units
   // used in each stage.
   //
-  void collectAllInsnClasses(const std::string &Name,
-                             Record *ItinData,
-                             unsigned &NStages,
-                             raw_ostream &OS);
+  int collectAllInsnClasses(const std::string &ProcName,
+                           std::vector<Record*> &ProcItinList,
+                           std::map<std::string, unsigned> &FUNameToBitsMap,
+                           std::vector<Record*> &ItinDataList,
+                           int &maxStages,
+                           raw_ostream &OS);
 
   void run(raw_ostream &OS);
 };
@@ -87,7 +182,7 @@ class State {
   const int stateNum;
   mutable bool isInitial;
   mutable std::set<unsigned> stateInfo;
-  typedef std::map<unsigned, const State *> TransitionMap;
+  typedef std::map<std::vector<unsigned>, const State *> TransitionMap;
   mutable TransitionMap Transitions;
 
   State();
@@ -97,28 +192,47 @@ class State {
   }
 
   //
-  // canAddInsnClass - Returns true if an instruction of type InsnClass is a
-  // valid transition from this state, i.e., can an instruction of type InsnClass
-  // be added to the packet represented by this state.
+  // canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+  // may be a valid transition from this state i.e., can an instruction of type
+  // InsnClass be added to the packet represented by this state.
   //
-  // PossibleStates is the set of valid resource states that ensue from valid
-  // transitions.
+  // Note that for multiple stages, this quick check does not take into account
+  // any possible resource competition between the stages themselves.  That is
+  // enforced in AddInsnClassStages which checks the cross product of all
+  // stages for resource availability (which is a more involved check).
   //
-  bool canAddInsnClass(unsigned InsnClass) const;
+  bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
   //
   // AddInsnClass - Return all combinations of resource reservation
   // which are possible from this state (PossibleStates).
   //
-  void AddInsnClass(unsigned InsnClass, std::set<unsigned> &PossibleStates) const;
-  // 
+  // PossibleStates is the set of valid resource states that ensue from valid
+  // transitions.
+  //
+  void AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const;
+  //
+  // AddInsnClassStages - Return all combinations of resource reservation
+  // resulting from the cross product of all stages for this InsnClass
+  // which are possible from this state (PossibleStates).
+  //
+  void AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const;
+  //
   // addTransition - Add a transition from this state given the input InsnClass
   //
-  void addTransition(unsigned InsnClass, const State *To) const;
+  void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
   //
   // hasTransition - Returns true if there is a transition from this state
   // given the input InsnClass
   //
-  bool hasTransition(unsigned InsnClass) const;
+  bool hasTransition(std::vector<unsigned> InsnClass) const;
 };
 } // End anonymous namespace.
 
@@ -144,10 +258,54 @@ public:
   //
   // writeTable: Print out a table representing the DFA.
   //
-  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName);
+  void writeTableAndAPI(raw_ostream &OS, const std::string &ClassName,
+                 int numInsnClasses = 0,
+                 int maxResources = 0, int numCombos = 0, int maxStages = 0);
 };
 } // End anonymous namespace.
 
+#ifndef NDEBUG
+// To enable debugging, run llvm-tblgen with: "-debug-only dfa-emitter".
+//
+// dbgsInsnClass - When debugging, print instruction class stages.
+//
+void dbgsInsnClass(const std::vector<unsigned> &InsnClass) {
+  DEBUG(dbgs() << "InsnClass: ");
+  for (unsigned i = 0; i < InsnClass.size(); ++i) {
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(InsnClass[i]));
+  }
+  DFAInput InsnInput = getDFAInsnInput(InsnClass);
+  DEBUG(dbgs() << " (input: 0x" << utohexstr(InsnInput) << ")");
+}
+
+//
+// dbgsStateInfo - When debugging, print the set of state info.
+//
+void dbgsStateInfo(const std::set<unsigned> &stateInfo) {
+  DEBUG(dbgs() << "StateInfo: ");
+  unsigned i = 0;
+  for (std::set<unsigned>::iterator SI = stateInfo.begin();
+       SI != stateInfo.end(); ++SI, ++i) {
+    unsigned thisState = *SI;
+    if (i > 0) {
+      DEBUG(dbgs() << ", ");
+    }
+    DEBUG(dbgs() << "0x" << utohexstr(thisState));
+  }
+}
+
+//
+// dbgsIndent - When debugging, indent by the specified amount.
+//
+void dbgsIndent(unsigned indent) {
+  for (unsigned i = 0; i < indent; ++i) {
+    DEBUG(dbgs() << " ");
+  }
+}
+#endif
 
 //
 // Constructors and destructors for State and DFA
@@ -157,10 +315,11 @@ State::State() :
 
 DFA::DFA(): currentState(nullptr) {}
 
-// 
+//
 // addTransition - Add a transition from this state given the input InsnClass
 //
-void State::addTransition(unsigned InsnClass, const State *To) const {
+void State::addTransition(std::vector<unsigned> InsnClass, const State *To)
+      const {
   assert(!Transitions.count(InsnClass) &&
       "Cannot have multiple transitions for the same input");
   Transitions[InsnClass] = To;
@@ -170,7 +329,7 @@ void State::addTransition(unsigned InsnClass, const State *To) const {
 // hasTransition - Returns true if there is a transition from this state
 // given the input InsnClass
 //
-bool State::hasTransition(unsigned InsnClass) const {
+bool State::hasTransition(std::vector<unsigned> InsnClass) const {
   return Transitions.count(InsnClass) > 0;
 }
 
@@ -178,61 +337,170 @@ bool State::hasTransition(unsigned InsnClass) const {
 // AddInsnClass - Return all combinations of resource reservation
 // which are possible from this state (PossibleStates).
 //
-void State::AddInsnClass(unsigned InsnClass,
-                            std::set<unsigned> &PossibleStates) const {
+// PossibleStates is the set of valid resource states that ensue from valid
+// transitions.
+//
+void State::AddInsnClass(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        std::set<unsigned> &PossibleStates) const {
   //
   // Iterate over all resource states in currentState.
   //
+  unsigned numstages = InsnClass.size();
+  assert((numstages > 0) && "InsnClass has no stages");
 
   for (std::set<unsigned>::iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
     unsigned thisState = *SI;
 
-    //
-    // Iterate over all possible resources used in InsnClass.
-    // For ex: for InsnClass = 0x11, all resources = {0x01, 0x10}.
-    //
-
     DenseSet<unsigned> VisitedResourceStates;
-    for (unsigned int j = 0; j < sizeof(InsnClass) * 8; ++j) {
-      if ((0x1 << j) & InsnClass) {
-        //
-        // For each possible resource used in InsnClass, generate the
-        // resource state if that resource was used.
-        //
-        unsigned ResultingResourceState = thisState | (0x1 << j);
+
+    DEBUG(dbgs() << "  thisState: 0x" << utohexstr(thisState) << "\n");
+    AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                numstages - 1, numstages,
+                                thisState, thisState,
+                                VisitedResourceStates, PossibleStates);
+  }
+}
+
+void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
+                        std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                        unsigned chkstage, unsigned numstages,
+                        unsigned prevState, unsigned origState,
+                        DenseSet<unsigned> &VisitedResourceStates,
+                        std::set<unsigned> &PossibleStates) const {
+
+  assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
+  unsigned thisStage = InsnClass[chkstage];
+
+  DEBUG({
+    dbgsIndent((1 + numstages - chkstage) << 1);
+    dbgs() << "AddInsnClassStages " << chkstage << " (0x"
+           << utohexstr(thisStage) << ") from ";
+    dbgsInsnClass(InsnClass);
+    dbgs() << "\n";
+  });
+
+  //
+  // Iterate over all possible resources used in thisStage.
+  // For ex: for thisStage = 0x11, all resources = {0x01, 0x10}.
+  //
+  for (unsigned int j = 0; j < DFA_MAX_RESOURCES; ++j) {
+    unsigned resourceMask = (0x1 << j);
+    if (resourceMask & thisStage) {
+      unsigned combo = ComboBitToBitsMap[resourceMask];
+      if (combo && ((~prevState & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped Add 0x" << utohexstr(prevState)
+                     << " - combo op 0x" << utohexstr(resourceMask)
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        continue;
+      }
+      //
+      // For each possible resource used in thisStage, generate the
+      // resource state if that resource was used.
+      //
+      unsigned ResultingResourceState = prevState | resourceMask | combo;
+      DEBUG({
+        dbgsIndent((2 + numstages - chkstage) << 1);
+        dbgs() << "0x" << utohexstr(prevState)
+               << " | 0x" << utohexstr(resourceMask);
+        if (combo)
+          dbgs() << " | 0x" << utohexstr(combo);
+        dbgs() << " = 0x" << utohexstr(ResultingResourceState) << " ";
+      });
+
+      //
+      // If this is the final stage for this class
+      //
+      if (chkstage == 0) {
         //
         // Check if the resulting resource state can be accommodated in this
         // packet.
-        // We compute ResultingResourceState OR thisState.
-        // If the result of the OR is different than thisState, it implies
+        // We compute resource OR prevState (originally started as origState).
+        // If the result of the OR is different than origState, it implies
         // that there is at least one resource that can be used to schedule
-        // InsnClass in the current packet.
+        // thisStage in the current packet.
         // Insert ResultingResourceState into PossibleStates only if we haven't
         // processed ResultingResourceState before.
         //
-        if ((ResultingResourceState != thisState) &&
-            (VisitedResourceStates.count(ResultingResourceState) == 0)) {
-          VisitedResourceStates.insert(ResultingResourceState);
-          PossibleStates.insert(ResultingResourceState);
+        if (ResultingResourceState != prevState) {
+          if (VisitedResourceStates.count(ResultingResourceState) == 0) {
+            VisitedResourceStates.insert(ResultingResourceState);
+            PossibleStates.insert(ResultingResourceState);
+            DEBUG(dbgs() << "\tResultingResourceState: 0x"
+                         << utohexstr(ResultingResourceState) << "\n");
+          } else {
+            DEBUG(dbgs() << "\tSkipped Add - state already seen\n");
+          }
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no final resources available\n");
+        }
+      } else {
+        //
+        // If the current resource can be accommodated, check the next
+        // stage in InsnClass for available resources.
+        //
+        if (ResultingResourceState != prevState) {
+          DEBUG(dbgs() << "\n");
+          AddInsnClassStages(InsnClass, ComboBitToBitsMap,
+                                chkstage - 1, numstages,
+                                ResultingResourceState, origState,
+                                VisitedResourceStates, PossibleStates);
+        } else {
+          DEBUG(dbgs() << "\tSkipped Add - no resources available\n");
         }
       }
     }
   }
-
 }
 
 
 //
-// canAddInsnClass - Quickly verifies if an instruction of type InsnClass is a
-// valid transition from this state i.e., can an instruction of type InsnClass
-// be added to the packet represented by this state.
+// canMaybeAddInsnClass - Quickly verifies if an instruction of type InsnClass
+// may be a valid transition from this state i.e., can an instruction of type
+// InsnClass be added to the packet represented by this state.
 //
-bool State::canAddInsnClass(unsigned InsnClass) const {
+// Note that this routine is performing conservative checks that can be
+// quickly executed acting as a filter before calling AddInsnClassStages.
+// Any cases allowed through here will be caught later in AddInsnClassStages
+// which performs the more expensive exact check.
+//
+bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
+                    std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
   for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
-    if (~*SI & InsnClass)
+
+    // Check to see if all required resources are available.
+    bool available = true;
+
+    // Inspect each stage independently.
+    // note: This is a conservative check as we aren't checking for
+    //       possible resource competition between the stages themselves
+    //       The full cross product is examined later in AddInsnClass.
+    for (unsigned i = 0; i < InsnClass.size(); ++i) {
+      unsigned resources = *SI;
+      if ((~resources & InsnClass[i]) == 0) {
+        available = false;
+        break;
+      }
+      // Make sure _all_ resources for a combo function are available.
+      // note: This is a quick conservative check as it won't catch an
+      //       unscheduleable combo if this stage is an OR expression
+      //       containing a combo.
+      //       These cases are caught later in AddInsnClass.
+      unsigned combo = ComboBitToBitsMap[InsnClass[i]];
+      if (combo && ((~resources & combo) != combo)) {
+        DEBUG(dbgs() << "\tSkipped canMaybeAdd 0x" << utohexstr(resources)
+                     << " - combo op 0x" << utohexstr(InsnClass[i])
+                     << " (0x" << utohexstr(combo) <<") cannot be scheduled\n");
+        available = false;
+        break;
+      }
+    }
+
+    if (available) {
       return true;
+    }
   }
   return false;
 }
@@ -244,7 +512,6 @@ const State &DFA::newState() {
   return *IterPair.first;
 }
 
-
 int State::currentStateNum = 0;
 
 DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
@@ -263,57 +530,100 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 //                         the ith state.
 //
 //
-void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
-  static const std::string SentinelEntry = "{-1, -1}";
-  DFA::StateSet::iterator SI = states.begin();
+void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
+                           int numInsnClasses,
+                           int maxResources, int numCombos, int maxStages) {
+
+  unsigned numStates = states.size();
+
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "writeTableAndAPI\n");
+  DEBUG(dbgs() << "Total states: " << numStates << "\n");
+
+  OS << "namespace llvm {\n";
+
+  OS << "\n// Input format:\n";
+  OS << "#define DFA_MAX_RESTERMS        " << DFA_MAX_RESTERMS
+     << "\t// maximum AND'ed resource terms\n";
+  OS << "#define DFA_MAX_RESOURCES       " << DFA_MAX_RESOURCES
+     << "\t// maximum resource bits in one term\n";
+
+  OS << "\n// " << TargetName << "DFAStateInputTable[][2] = "
+     << "pairs of <Input, NextState> for all valid\n";
+  OS << "//                           transitions.\n";
+  OS << "// " << numStates << "\tstates\n";
+  OS << "// " << numInsnClasses << "\tinstruction classes\n";
+  OS << "// " << maxResources << "\tresources max\n";
+  OS << "// " << numCombos << "\tcombo resources\n";
+  OS << "// " << maxStages << "\tstages max\n";
+  OS << "const " << DFA_TBLTYPE << " "
+     << TargetName << "DFAStateInputTable[][2] = {\n";
+
   // This table provides a map to the beginning of the transitions for State s
   // in DFAStateInputTable.
-  std::vector<int> StateEntry(states.size());
-
-  OS << "namespace llvm {\n\n";
-  OS << "const int " << TargetName << "DFAStateInputTable[][2] = {\n";
+  std::vector<int> StateEntry(numStates+1);
+  static const std::string SentinelEntry = "{-1, -1}";
 
   // Tracks the total valid transitions encountered so far. It is used
   // to construct the StateEntry table.
   int ValidTransitions = 0;
-  for (unsigned i = 0; i < states.size(); ++i, ++SI) {
+  DFA::StateSet::iterator SI = states.begin();
+  for (unsigned i = 0; i < numStates; ++i, ++SI) {
     assert ((SI->stateNum == (int) i) && "Mismatch in state numbers");
     StateEntry[i] = ValidTransitions;
     for (State::TransitionMap::iterator
         II = SI->Transitions.begin(), IE = SI->Transitions.end();
         II != IE; ++II) {
-      OS << "{" << II->first << ", "
+      OS << "{0x" << utohexstr(getDFAInsnInput(II->first)) << ", "
          << II->second->stateNum
-         << "},    ";
+         << "},\t";
     }
     ValidTransitions += SI->Transitions.size();
 
     // If there are no valid transitions from this stage, we need a sentinel
     // transition.
     if (ValidTransitions == StateEntry[i]) {
-      OS << SentinelEntry << ",";
+      OS << SentinelEntry << ",\t";
       ++ValidTransitions;
     }
 
+    OS << " // state " << i << ": " << StateEntry[i];
+    if (StateEntry[i] != (ValidTransitions-1)) {   // More than one transition.
+       OS << "-" << (ValidTransitions-1);
+    }
     OS << "\n";
   }
 
   // Print out a sentinel entry at the end of the StateInputTable. This is
   // needed to iterate over StateInputTable in DFAPacketizer::ReadTable()
-  OS << SentinelEntry << "\n";
-  
+  OS << SentinelEntry << "\t";
+  OS << " // state " << numStates << ": " << ValidTransitions;
+  OS << "\n";
+
   OS << "};\n\n";
+  OS << "// " << TargetName << "DFAStateEntryTable[i] = "
+     << "Index of the first entry in DFAStateInputTable for\n";
+  OS << "//                         "
+     << "the ith state.\n";
+  OS << "// " << numStates << " states\n";
   OS << "const unsigned int " << TargetName << "DFAStateEntryTable[] = {\n";
 
   // Multiply i by 2 since each entry in DFAStateInputTable is a set of
   // two numbers.
-  for (unsigned i = 0; i < states.size(); ++i)
+  unsigned lastState = 0;
+  for (unsigned i = 0; i < numStates; ++i) {
+    if (i && ((i % 10) == 0)) {
+        lastState = i-1;
+        OS << "   // states " << (i-10) << ":" << lastState << "\n";
+    }
     OS << StateEntry[i] << ", ";
+  }
 
   // Print out the index to the sentinel entry in StateInputTable
   OS << ValidTransitions << ", ";
+  OS << "   // states " << (lastState+1) << ":" << numStates << "\n";
 
-  OS << "\n};\n";
+  OS << "};\n";
   OS << "} // namespace\n";
 
 
@@ -332,40 +642,118 @@ void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName) {
 
 
 //
-// collectAllInsnClasses - Populate allInsnClasses which is a set of units
-// used in each stage.
+// collectAllFuncUnits - Construct a map of function unit names to bits.
 //
-void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
-                                  Record *ItinData,
-                                  unsigned &NStages,
-                                  raw_ostream &OS) {
-  // Collect processor itineraries.
-  std::vector<Record*> ProcItinList =
-    Records.getAllDerivedDefinitions("ProcessorItineraries");
-
-  // If just no itinerary then don't bother.
-  if (ProcItinList.size() < 2)
-    return;
-  std::map<std::string, unsigned> NameToBitsMap;
+int DFAPacketizerEmitter::collectAllFuncUnits(
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            int &maxFUs,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllFuncUnits");
+  DEBUG(dbgs() << " (" << ProcItinList.size() << " itineraries)\n");
 
+  int totalFUs = 0;
   // Parse functional units for all the itineraries.
   for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
     Record *Proc = ProcItinList[i];
     std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
 
-    // Convert macros to bits for each stage.
-    for (unsigned i = 0, N = FUs.size(); i < N; ++i)
-      NameToBitsMap[FUs[i]->getName()] = (unsigned) (1U << i);
-  }
+    DEBUG(dbgs() << "    FU:" << i
+                 << " (" << FUs.size() << " FUs) "
+                 << Proc->getName());
 
+
+    // Convert macros to bits for each stage.
+    unsigned numFUs = FUs.size();
+    for (unsigned j = 0; j < numFUs; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of representable resources");
+      unsigned FuncResources = (unsigned) (1U << j);
+      FUNameToBitsMap[FUs[j]->getName()] = FuncResources;
+      DEBUG(dbgs() << " " << FUs[j]->getName()
+                   << ":0x" << utohexstr(FuncResources));
+    }
+    if (((int) numFUs) > maxFUs) {
+      maxFUs = numFUs;
+    }
+    totalFUs += numFUs;
+    DEBUG(dbgs() << "\n");
+  }
+  return totalFUs;
+}
+
+//
+// collectAllComboFuncs - Construct a map from a combo function unit bit to
+//                        the bits of all included functional units.
+//
+int DFAPacketizerEmitter::collectAllComboFuncs(
+                            std::vector<Record*> &ComboFuncList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::map<unsigned, unsigned> &ComboBitToBitsMap,
+                            raw_ostream &OS) {
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
+  DEBUG(dbgs() << "collectAllComboFuncs");
+  DEBUG(dbgs() << " (" << ComboFuncList.size() << " sets)\n");
+
+  int numCombos = 0;
+  for (unsigned i = 0, N = ComboFuncList.size(); i < N; ++i) {
+    Record *Func = ComboFuncList[i];
+    std::vector<Record*> FUs = Func->getValueAsListOfDefs("CFD");
+
+    DEBUG(dbgs() << "    CFD:" << i
+                 << " (" << FUs.size() << " combo FUs) "
+                 << Func->getName() << "\n");
+
+    // Convert macros to bits for each stage.
+    for (unsigned j = 0, N = FUs.size(); j < N; ++j) {
+      assert ((j < DFA_MAX_RESOURCES) &&
+                      "Exceeded maximum number of DFA resources");
+      Record *FuncData = FUs[j];
+      Record *ComboFunc = FuncData->getValueAsDef("TheComboFunc");
+      const std::vector<Record*> &FuncList =
+                                   FuncData->getValueAsListOfDefs("FuncList");
+      std::string ComboFuncName = ComboFunc->getName();
+      unsigned ComboBit = FUNameToBitsMap[ComboFuncName];
+      unsigned ComboResources = ComboBit;
+      DEBUG(dbgs() << "      combo: " << ComboFuncName
+                   << ":0x" << utohexstr(ComboResources) << "\n");
+      for (unsigned k = 0, M = FuncList.size(); k < M; ++k) {
+        std::string FuncName = FuncList[k]->getName();
+        unsigned FuncResources = FUNameToBitsMap[FuncName];
+        DEBUG(dbgs() << "        " << FuncName
+                     << ":0x" << utohexstr(FuncResources) << "\n");
+        ComboResources |= FuncResources;
+      }
+      ComboBitToBitsMap[ComboBit] = ComboResources;
+      numCombos++;
+      DEBUG(dbgs() << "          => combo bits: " << ComboFuncName << ":0x"
+                   << utohexstr(ComboBit) << " = 0x"
+                   << utohexstr(ComboResources) << "\n");
+    }
+  }
+  return numCombos;
+}
+
+
+//
+// collectOneInsnClass - Populate allInsnClasses with one instruction class
+//
+int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
+                        std::vector<Record*> &ProcItinList,
+                        std::map<std::string, unsigned> &FUNameToBitsMap,
+                        Record *ItinData,
+                        raw_ostream &OS) {
   const std::vector<Record*> &StageList =
     ItinData->getValueAsListOfDefs("Stages");
 
   // The number of stages.
-  NStages = StageList.size();
+  unsigned NStages = StageList.size();
 
-  // For each unit.
-  unsigned UnitBitValue = 0;
+  DEBUG(dbgs() << "    " << ItinData->getValueAsDef("TheClass")->getName()
+               << "\n");
+
+  std::vector<unsigned> UnitBits;
 
   // Compute the bitwise or of each unit used in this stage.
   for (unsigned i = 0; i < NStages; ++i) {
@@ -375,18 +763,74 @@ void DFAPacketizerEmitter::collectAllInsnClasses(const std::string &Name,
     const std::vector<Record*> &UnitList =
       Stage->getValueAsListOfDefs("Units");
 
+    DEBUG(dbgs() << "        stage:" << i
+                 << " [" << UnitList.size() << " units]:");
+    unsigned dbglen = 26;  // cursor after stage dbgs
+
+    // Compute the bitwise or of each unit used in this stage.
+    unsigned UnitBitValue = 0;
     for (unsigned j = 0, M = UnitList.size(); j < M; ++j) {
       // Conduct bitwise or.
       std::string UnitName = UnitList[j]->getName();
-      assert(NameToBitsMap.count(UnitName));
-      UnitBitValue |= NameToBitsMap[UnitName];
+      DEBUG(dbgs() << " " << j << ":" << UnitName);
+      dbglen += 3 + UnitName.length();
+      assert(FUNameToBitsMap.count(UnitName));
+      UnitBitValue |= FUNameToBitsMap[UnitName];
     }
 
     if (UnitBitValue != 0)
-      allInsnClasses.insert(UnitBitValue);
+      UnitBits.push_back(UnitBitValue);
+
+    while (dbglen <= 64) {   // line up bits dbgs
+        dbglen += 8;
+        DEBUG(dbgs() << "\t");
+    }
+    DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
   }
+
+  if (UnitBits.size() > 0)
+    allInsnClasses.push_back(UnitBits);
+
+  DEBUG({
+    dbgs() << "        ";
+    dbgsInsnClass(UnitBits);
+    dbgs() << "\n";
+  });
+
+  return NStages;
 }
 
+//
+// collectAllInsnClasses - Populate allInsnClasses which is a set of units
+// used in each stage.
+//
+int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
+                            std::vector<Record*> &ProcItinList,
+                            std::map<std::string, unsigned> &FUNameToBitsMap,
+                            std::vector<Record*> &ItinDataList,
+                            int &maxStages,
+                            raw_ostream &OS) {
+  // Collect all instruction classes.
+  unsigned M = ItinDataList.size();
+
+  int numInsnClasses = 0;
+  DEBUG(dbgs() << "-----------------------------------------------------------------------------\n"
+               << "collectAllInsnClasses "
+               << ProcName
+               << " (" << M << " classes)\n");
+
+  // Collect stages for each instruction class for all itinerary data
+  for (unsigned j = 0; j < M; j++) {
+    Record *ItinData = ItinDataList[j];
+    int NStages = collectOneInsnClass(ProcName, ProcItinList,
+                                      FUNameToBitsMap, ItinData, OS);
+    if (NStages > maxStages) {
+      maxStages = NStages;
+    }
+    numInsnClasses++;
+  }
+  return numInsnClasses;
+}
 
 //
 // Run the worklist algorithm to generate the DFA.
@@ -398,16 +842,35 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     Records.getAllDerivedDefinitions("ProcessorItineraries");
 
   //
-  // Collect the instruction classes.
+  // Collect the Functional units.
   //
+  std::map<std::string, unsigned> FUNameToBitsMap;
+  int maxResources = 0;
+  collectAllFuncUnits(ProcItinList,
+                              FUNameToBitsMap, maxResources, OS);
+
+  //
+  // Collect the Combo Functional units.
+  //
+  std::map<unsigned, unsigned> ComboBitToBitsMap;
+  std::vector<Record*> ComboFuncList =
+    Records.getAllDerivedDefinitions("ComboFuncUnits");
+  int numCombos = collectAllComboFuncs(ComboFuncList,
+                              FUNameToBitsMap, ComboBitToBitsMap, OS);
+
+  //
+  // Collect the itineraries.
+  //
+  int maxStages = 0;
+  int numInsnClasses = 0;
   for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
     Record *Proc = ProcItinList[i];
 
     // Get processor itinerary name.
-    const std::string &Name = Proc->getName();
+    const std::string &ProcName = Proc->getName();
 
     // Skip default.
-    if (Name == "NoItineraries")
+    if (ProcName == "NoItineraries")
       continue;
 
     // Sanity check for at least one instruction itinerary class.
@@ -419,15 +882,11 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
     // Get itinerary data list.
     std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
 
-    // Collect instruction classes for all itinerary data.
-    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
-      Record *ItinData = ItinDataList[j];
-      unsigned NStages;
-      collectAllInsnClasses(Name, ItinData, NStages, OS);
-    }
+    // Collect all instruction classes
+    numInsnClasses += collectAllInsnClasses(ProcName, ProcItinList,
+                          FUNameToBitsMap, ItinDataList, maxStages, OS);
   }
 
-
   //
   // Run a worklist algorithm to generate the DFA.
   //
@@ -436,6 +895,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   Initial->isInitial = true;
   Initial->stateInfo.insert(0x0);
   SmallVector<const State*, 32> WorkList;
+//  std::queue<State*> WorkList;
   std::map<std::set<unsigned>, const State*> Visited;
 
   WorkList.push_back(Initial);
@@ -459,9 +919,19 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   //
   while (!WorkList.empty()) {
     const State *current = WorkList.pop_back_val();
-    for (DenseSet<unsigned>::iterator CI = allInsnClasses.begin(),
-           CE = allInsnClasses.end(); CI != CE; ++CI) {
-      unsigned InsnClass = *CI;
+    DEBUG({
+      dbgs() << "---------------------\n";
+      dbgs() << "Processing state: " << current->stateNum << " - ";
+      dbgsStateInfo(current->stateInfo);
+      dbgs() << "\n";
+    });
+    for (unsigned i = 0; i < allInsnClasses.size(); i++) {
+      std::vector<unsigned> InsnClass = allInsnClasses[i];
+      DEBUG({
+        dbgs() << i << " ";
+        dbgsInsnClass(InsnClass);
+        dbgs() << "\n";
+      });
 
       std::set<unsigned> NewStateResources;
       //
@@ -469,32 +939,52 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
       // and the state can accommodate this InsnClass, create a transition.
       //
       if (!current->hasTransition(InsnClass) &&
-          current->canAddInsnClass(InsnClass)) {
-        const State *NewState;
-        current->AddInsnClass(InsnClass, NewStateResources);
-        assert(!NewStateResources.empty() && "New states must be generated");
+          current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
+        const State *NewState = NULL;
+        current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
+        if (NewStateResources.size() == 0) {
+          DEBUG(dbgs() << "  Skipped - no new states generated\n");
+          continue;
+        }
+
+        DEBUG({
+          dbgs() << "\t";
+          dbgsStateInfo(NewStateResources);
+          dbgs() << "\n";
+        });
 
         //
         // If we have seen this state before, then do not create a new state.
         //
-        //
         auto VI = Visited.find(NewStateResources);
-        if (VI != Visited.end())
+        if (VI != Visited.end()) {
           NewState = VI->second;
-        else {
+          DEBUG({
+            dbgs() << "\tFound existing state: " << NewState->stateNum
+                   << " - ";
+            dbgsStateInfo(NewState->stateInfo);
+            dbgs() << "\n";
+          });
+        } else {
           NewState = &D.newState();
           NewState->stateInfo = NewStateResources;
           Visited[NewStateResources] = NewState;
           WorkList.push_back(NewState);
+          DEBUG({
+            dbgs() << "\tAccepted new state: " << NewState->stateNum << " - ";
+            dbgsStateInfo(NewState->stateInfo);
+            dbgs() << "\n";
+          });
         }
-        
+
         current->addTransition(InsnClass, NewState);
       }
     }
   }
 
   // Print out the table.
-  D.writeTableAndAPI(OS, TargetName);
+  D.writeTableAndAPI(OS, TargetName,
+               numInsnClasses, maxResources, numCombos, maxStages);
 }
 
 namespace llvm {
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index f02051a2cd6c..e8595271bcce 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -134,7 +134,7 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
       PredicateNamespace = "ARM";
 
     EmitFixedLenDecoder(Records, OS, PredicateNamespace,
-                        "if (!Check(S, ", ")) return MCDisassembler::Fail;",
+                        "if (!Check(S, ", "))",
                         "S", "MCDisassembler::Fail",
                         "  MCDisassembler::DecodeStatus S = "
                           "MCDisassembler::Success;\n(void)S;");
@@ -142,8 +142,7 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
   }
 
   EmitFixedLenDecoder(Records, OS, Target.getName(),
-                      "if (", " == MCDisassembler::Fail)"
-                       " return MCDisassembler::Fail;",
+                      "if (", " == MCDisassembler::Fail)",
                       "MCDisassembler::Success", "MCDisassembler::Fail", "");
 }
 
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index c4df2833885a..8ca4a1bf5404 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -44,9 +44,10 @@ struct EncodingField {
 struct OperandInfo {
   std::vector<EncodingField> Fields;
   std::string Decoder;
+  bool HasCompleteDecoder;
 
-  OperandInfo(std::string D)
-    : Decoder(D) { }
+  OperandInfo(std::string D, bool HCD)
+    : Decoder(D), HasCompleteDecoder(HCD) { }
 
   void addField(unsigned Base, unsigned Width, unsigned Offset) {
     Fields.push_back(EncodingField(Base, Width, Offset));
@@ -64,8 +65,8 @@ typedef std::vector<uint8_t> DecoderTable;
 typedef uint32_t DecoderFixup;
 typedef std::vector<DecoderFixup> FixupList;
 typedef std::vector<FixupList> FixupScopeList;
-typedef SetVector<std::string> PredicateSet;
-typedef SetVector<std::string> DecoderSet;
+typedef SmallSetVector<std::string, 16> PredicateSet;
+typedef SmallSetVector<std::string, 16> DecoderSet;
 struct DecoderTableInfo {
   DecoderTable Table;
   FixupScopeList FixupStack;
@@ -85,8 +86,7 @@ public:
   FixedLenDecoderEmitter(RecordKeeper &R,
                          std::string PredicateNamespace,
                          std::string GPrefix  = "if (",
-                         std::string GPostfix = " == MCDisassembler::Fail)"
-                         " return MCDisassembler::Fail;",
+                         std::string GPostfix = " == MCDisassembler::Fail)",
                          std::string ROK      = "MCDisassembler::Success",
                          std::string RFail    = "MCDisassembler::Fail",
                          std::string L        = "") :
@@ -448,10 +448,13 @@ protected:
                                const Filter &Best) const;
 
   void emitBinaryParser(raw_ostream &o, unsigned &Indentation,
-                        const OperandInfo &OpInfo) const;
+                        const OperandInfo &OpInfo,
+                        bool &OpHasCompleteDecoder) const;
 
-  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc) const;
-  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc) const;
+  void emitDecoder(raw_ostream &OS, unsigned Indentation, unsigned Opc,
+                   bool &HasCompleteDecoder) const;
+  unsigned getDecoderIndex(DecoderSet &Decoders, unsigned Opc,
+                           bool &HasCompleteDecoder) const;
 
   // Assign a single filter and run with it.
   void runSingleFilter(unsigned startBit, unsigned numBit, bool mixed);
@@ -779,7 +782,9 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
       OS << "// Skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
-    case MCD::OPC_Decode: {
+    case MCD::OPC_Decode:
+    case MCD::OPC_TryDecode: {
+      bool IsTry = *I == MCD::OPC_TryDecode;
       ++I;
       // Extract the ULEB128 encoded Opcode to a buffer.
       uint8_t Buffer[8], *p = Buffer;
@@ -788,7 +793,8 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
                && "ULEB128 value too large!");
       // Decode the Opcode value.
       unsigned Opc = decodeULEB128(Buffer);
-      OS.indent(Indentation) << "MCD::OPC_Decode, ";
+      OS.indent(Indentation) << "MCD::OPC_" << (IsTry ? "Try" : "")
+        << "Decode, ";
       for (p = Buffer; *p >= 128; ++p)
         OS << utostr(*p) << ", ";
       OS << utostr(*p) << ", ";
@@ -798,8 +804,25 @@ void FixedLenDecoderEmitter::emitTable(formatted_raw_ostream &OS,
         OS << utostr(*I) << ", ";
       OS << utostr(*I++) << ", ";
 
+      if (!IsTry) {
+        OS << "// Opcode: "
+           << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+        break;
+      }
+
+      // Fallthrough for OPC_TryDecode.
+
+      // 16-bit numtoskip value.
+      uint8_t Byte = *I++;
+      uint32_t NumToSkip = Byte;
+      OS << utostr(Byte) << ", ";
+      Byte = *I++;
+      OS << utostr(Byte) << ", ";
+      NumToSkip |= Byte << 8;
+
       OS << "// Opcode: "
-         << NumberedInstructions->at(Opc)->TheDef->getName() << "\n";
+         << NumberedInstructions->at(Opc)->TheDef->getName()
+         << ", skip to: " << ((I - Table.begin()) + NumToSkip) << "\n";
       break;
     }
     case MCD::OPC_SoftFail: {
@@ -876,8 +899,9 @@ emitDecoderFunction(formatted_raw_ostream &OS, DecoderSet &Decoders,
   OS.indent(Indentation) << "static DecodeStatus decodeToMCInst(DecodeStatus S,"
     << " unsigned Idx, InsnType insn, MCInst &MI,\n";
   OS.indent(Indentation) << "                                   uint64_t "
-    << "Address, const void *Decoder) {\n";
+    << "Address, const void *Decoder, bool &DecodeComplete) {\n";
   Indentation += 2;
+  OS.indent(Indentation) << "DecodeComplete = true;\n";
   OS.indent(Indentation) << "InsnType tmp;\n";
   OS.indent(Indentation) << "switch (Idx) {\n";
   OS.indent(Indentation) << "default: llvm_unreachable(\"Invalid index!\");\n";
@@ -1033,7 +1057,8 @@ unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
 }
 
 void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
-                                     const OperandInfo &OpInfo) const {
+                                     const OperandInfo &OpInfo,
+                                     bool &OpHasCompleteDecoder) const {
   const std::string &Decoder = OpInfo.Decoder;
 
   if (OpInfo.numFields() != 1)
@@ -1049,45 +1074,57 @@ void FilterChooser::emitBinaryParser(raw_ostream &o, unsigned &Indentation,
     o << ";\n";
   }
 
-  if (Decoder != "")
+  if (Decoder != "") {
+    OpHasCompleteDecoder = OpInfo.HasCompleteDecoder;
     o.indent(Indentation) << Emitter->GuardPrefix << Decoder
-                          << "(MI, tmp, Address, Decoder)"
-                          << Emitter->GuardPostfix << "\n";
-  else
+      << "(MI, tmp, Address, Decoder)"
+      << Emitter->GuardPostfix
+      << " { " << (OpHasCompleteDecoder ? "" : "DecodeComplete = false; ")
+      << "return MCDisassembler::Fail; }\n";
+  } else {
+    OpHasCompleteDecoder = true;
     o.indent(Indentation) << "MI.addOperand(MCOperand::createImm(tmp));\n";
-
+  }
 }
 
 void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
-                                unsigned Opc) const {
+                                unsigned Opc, bool &HasCompleteDecoder) const {
+  HasCompleteDecoder = true;
+
   for (const auto &Op : Operands.find(Opc)->second) {
     // If a custom instruction decoder was specified, use that.
     if (Op.numFields() == 0 && Op.Decoder.size()) {
+      HasCompleteDecoder = Op.HasCompleteDecoder;
       OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder
         << "(MI, insn, Address, Decoder)"
-        << Emitter->GuardPostfix << "\n";
+        << Emitter->GuardPostfix
+        << " { " << (HasCompleteDecoder ? "" : "DecodeComplete = false; ")
+        << "return MCDisassembler::Fail; }\n";
       break;
     }
 
-    emitBinaryParser(OS, Indentation, Op);
+    bool OpHasCompleteDecoder;
+    emitBinaryParser(OS, Indentation, Op, OpHasCompleteDecoder);
+    if (!OpHasCompleteDecoder)
+      HasCompleteDecoder = false;
   }
 }
 
 unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
-                                        unsigned Opc) const {
+                                        unsigned Opc,
+                                        bool &HasCompleteDecoder) const {
   // Build up the predicate string.
   SmallString<256> Decoder;
   // FIXME: emitDecoder() function can take a buffer directly rather than
   // a stream.
   raw_svector_ostream S(Decoder);
   unsigned I = 4;
-  emitDecoder(S, I, Opc);
-  S.flush();
+  emitDecoder(S, I, Opc, HasCompleteDecoder);
 
   // Using the full decoder string as the key value here is a bit
   // heavyweight, but is effective. If the string comparisons become a
   // performance concern, we can implement a mangling of the predicate
-  // data easilly enough with a map back to the actual string. That's
+  // data easily enough with a map back to the actual string. That's
   // overkill for now, though.
 
   // Make sure the predicate is in the table.
@@ -1162,7 +1199,7 @@ unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
   // Using the full predicate string as the key value here is a bit
   // heavyweight, but is effective. If the string comparisons become a
   // performance concern, we can implement a mangling of the predicate
-  // data easilly enough with a map back to the actual string. That's
+  // data easily enough with a map back to the actual string. That's
   // overkill for now, though.
 
   // Make sure the predicate is in the table.
@@ -1193,7 +1230,6 @@ void FilterChooser::emitPredicateTableEntry(DecoderTableInfo &TableInfo,
   SmallString<16> PBytes;
   raw_svector_ostream S(PBytes);
   encodeULEB128(PIdx, S);
-  S.flush();
 
   TableInfo.Table.push_back(MCD::OPC_CheckPredicate);
   // Predicate index
@@ -1252,16 +1288,13 @@ void FilterChooser::emitSoftFailTableEntry(DecoderTableInfo &TableInfo,
   raw_svector_ostream S(MaskBytes);
   if (NeedPositiveMask) {
     encodeULEB128(PositiveMask.getZExtValue(), S);
-    S.flush();
     for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
       TableInfo.Table.push_back(MaskBytes[i]);
   } else
     TableInfo.Table.push_back(0);
   if (NeedNegativeMask) {
     MaskBytes.clear();
-    S.resync();
     encodeULEB128(NegativeMask.getZExtValue(), S);
-    S.flush();
     for (unsigned i = 0, e = MaskBytes.size(); i != e; ++i)
       TableInfo.Table.push_back(MaskBytes[i]);
   } else
@@ -1308,22 +1341,41 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   // Check for soft failure of the match.
   emitSoftFailTableEntry(TableInfo, Opc);
 
-  TableInfo.Table.push_back(MCD::OPC_Decode);
+  bool HasCompleteDecoder;
+  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc, HasCompleteDecoder);
+
+  // Produce OPC_Decode or OPC_TryDecode opcode based on the information
+  // whether the instruction decoder is complete or not. If it is complete
+  // then it handles all possible values of remaining variable/unfiltered bits
+  // and for any value can determine if the bitpattern is a valid instruction
+  // or not. This means OPC_Decode will be the final step in the decoding
+  // process. If it is not complete, then the Fail return code from the
+  // decoder method indicates that additional processing should be done to see
+  // if there is any other instruction that also matches the bitpattern and
+  // can decode it.
+  TableInfo.Table.push_back(HasCompleteDecoder ? MCD::OPC_Decode :
+      MCD::OPC_TryDecode);
   uint8_t Buffer[8], *p;
   encodeULEB128(Opc, Buffer);
   for (p = Buffer; *p >= 128 ; ++p)
     TableInfo.Table.push_back(*p);
   TableInfo.Table.push_back(*p);
 
-  unsigned DIdx = getDecoderIndex(TableInfo.Decoders, Opc);
   SmallString<16> Bytes;
   raw_svector_ostream S(Bytes);
   encodeULEB128(DIdx, S);
-  S.flush();
 
   // Decoder index
   for (unsigned i = 0, e = Bytes.size(); i != e; ++i)
     TableInfo.Table.push_back(Bytes[i]);
+
+  if (!HasCompleteDecoder) {
+    // Push location for NumToSkip backpatching.
+    TableInfo.FixupStack.back().push_back(TableInfo.Table.size());
+    // Allocate the space for the fixup.
+    TableInfo.Table.push_back(0);
+    TableInfo.Table.push_back(0);
+  }
 }
 
 // Emits table entries to decode the singleton, and then to decode the rest.
@@ -1679,7 +1731,8 @@ static bool populateInstruction(CodeGenTarget &Target,
   // of trying to auto-generate the decoder.
   std::string InstDecoder = Def.getValueAsString("DecoderMethod");
   if (InstDecoder != "") {
-    InsnOperands.push_back(OperandInfo(InstDecoder));
+    bool HasCompleteInstDecoder = Def.getValueAsBit("hasCompleteDecoder");
+    InsnOperands.push_back(OperandInfo(InstDecoder, HasCompleteInstDecoder));
     Operands[Opc] = InsnOperands;
     return true;
   }
@@ -1835,7 +1888,14 @@ static bool populateInstruction(CodeGenTarget &Target,
       if (!isReg && String && String->getValue() != "")
         Decoder = String->getValue();
 
-      OperandInfo OpInfo(Decoder);
+      RecordVal *HasCompleteDecoderVal =
+        TypeRecord->getValue("hasCompleteDecoder");
+      BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
+        dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
+      bool HasCompleteDecoder = HasCompleteDecoderBit ?
+        HasCompleteDecoderBit->getValue() : true;
+
+      OperandInfo OpInfo(Decoder, HasCompleteDecoder);
       OpInfo.addField(bitStart, bitWidth, 0);
 
       NumberedInsnOperands[Name].push_back(OpInfo);
@@ -1907,7 +1967,14 @@ static bool populateInstruction(CodeGenTarget &Target,
     if (!isReg && String && String->getValue() != "")
       Decoder = String->getValue();
 
-    OperandInfo OpInfo(Decoder);
+    RecordVal *HasCompleteDecoderVal =
+      TypeRecord->getValue("hasCompleteDecoder");
+    BitInit *HasCompleteDecoderBit = HasCompleteDecoderVal ?
+      dyn_cast<BitInit>(HasCompleteDecoderVal->getValue()) : nullptr;
+    bool HasCompleteDecoder = HasCompleteDecoderBit ?
+      HasCompleteDecoderBit->getValue() : true;
+
+    OperandInfo OpInfo(Decoder, HasCompleteDecoder);
     unsigned Base = ~0U;
     unsigned Width = 0;
     unsigned Offset = 0;
@@ -2096,12 +2163,52 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "      Ptr += Len;\n"
      << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
      << "      Ptr += Len;\n"
-     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
-     << "                   << \", using decoder \" << DecodeIdx << \"\\n\" );\n"
-     << "      DEBUG(dbgs() << \"----- DECODE SUCCESSFUL -----\\n\");\n"
      << "\n"
+     << "      MI.clear();\n"
      << "      MI.setOpcode(Opc);\n"
-     << "      return decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm);\n"
+     << "      bool DecodeComplete;\n"
+     << "      S = decodeToMCInst(S, DecodeIdx, insn, MI, Address, DisAsm, DecodeComplete);\n"
+     << "      assert(DecodeComplete);\n"
+     << "\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_Decode: opcode \" << Opc\n"
+     << "                   << \", using decoder \" << DecodeIdx << \": \"\n"
+     << "                   << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+     << "      return S;\n"
+     << "    }\n"
+     << "    case MCD::OPC_TryDecode: {\n"
+     << "      unsigned Len;\n"
+     << "      // Decode the Opcode value.\n"
+     << "      unsigned Opc = decodeULEB128(++Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);\n"
+     << "      Ptr += Len;\n"
+     << "      // NumToSkip is a plain 16-bit integer.\n"
+     << "      unsigned NumToSkip = *Ptr++;\n"
+     << "      NumToSkip |= (*Ptr++) << 8;\n"
+     << "\n"
+     << "      // Perform the decode operation.\n"
+     << "      MCInst TmpMI;\n"
+     << "      TmpMI.setOpcode(Opc);\n"
+     << "      bool DecodeComplete;\n"
+     << "      S = decodeToMCInst(S, DecodeIdx, insn, TmpMI, Address, DisAsm, DecodeComplete);\n"
+     << "      DEBUG(dbgs() << Loc << \": OPC_TryDecode: opcode \" << Opc\n"
+     << "                   << \", using decoder \" << DecodeIdx << \": \");\n"
+     << "\n"
+     << "      if (DecodeComplete) {\n"
+     << "        // Decoding complete.\n"
+     << "        DEBUG(dbgs() << (S != MCDisassembler::Fail ? \"PASS\" : \"FAIL\") << \"\\n\");\n"
+     << "        MI = TmpMI;\n"
+     << "        return S;\n"
+     << "      } else {\n"
+     << "        assert(S == MCDisassembler::Fail);\n"
+     << "        // If the decoding was incomplete, skip.\n"
+     << "        Ptr += NumToSkip;\n"
+     << "        DEBUG(dbgs() << \"FAIL: continuing at \" << (Ptr - DecodeTable) << \"\\n\");\n"
+     << "        // Reset decode status. This also drops a SoftFail status that could be\n"
+     << "        // set before the decode attempt.\n"
+     << "        S = MCDisassembler::Success;\n"
+     << "      }\n"
+     << "      break;\n"
      << "    }\n"
      << "    case MCD::OPC_SoftFail: {\n"
      << "      // Decode the mask values.\n"
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index e242a965ff93..a6583399fa20 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -1,4 +1,4 @@
-//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. ------------===//
+//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-
 #include "CodeGenDAGPatterns.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
@@ -26,6 +25,7 @@
 #include <cstdio>
 #include <map>
 #include <vector>
+
 using namespace llvm;
 
 namespace {
@@ -70,11 +70,11 @@ private:
   void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
   std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
 };
-} // End anonymous namespace
+} // end anonymous namespace
 
 static void PrintDefList(const std::vector<Record*> &Uses,
                          unsigned Num, raw_ostream &OS) {
-  OS << "static const uint16_t ImplicitList" << Num << "[] = { ";
+  OS << "static const MCPhysReg ImplicitList" << Num << "[] = { ";
   for (unsigned i = 0, e = Uses.size(); i != e; ++i)
     OS << getQualifiedName(Uses[i]) << ", ";
   OS << "0 };\n";
@@ -190,7 +190,6 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
   }
 }
 
-
 /// Initialize data structures for generating operand name mappings.
 /// 
 /// \param Operands [out] A map used to generate the OpName enum with operand
@@ -257,9 +256,9 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
 
   OS << "OPERAND_LAST";
   OS << "\n};\n";
-  OS << "} // End namespace OpName\n";
-  OS << "} // End namespace " << Namespace << "\n";
-  OS << "} // End namespace llvm\n";
+  OS << "} // end namespace OpName\n";
+  OS << "} // end namespace " << Namespace << "\n";
+  OS << "} // end namespace llvm\n";
   OS << "#endif //GET_INSTRINFO_OPERAND_ENUM\n";
 
   OS << "#ifdef GET_INSTRINFO_NAMED_OPS\n";
@@ -298,8 +297,8 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
     OS << "  return -1;\n";
   }
   OS << "}\n";
-  OS << "} // End namespace " << Namespace << "\n";
-  OS << "} // End namespace llvm\n";
+  OS << "} // end namespace " << Namespace << "\n";
+  OS << "} // end namespace llvm\n";
   OS << "#endif //GET_INSTRINFO_NAMED_OPS\n";
 
 }
@@ -328,9 +327,9 @@ void InstrInfoEmitter::emitOperandTypesEnum(raw_ostream &OS,
   }
 
   OS << "  OPERAND_TYPE_LIST_END" << "\n};\n";
-  OS << "} // End namespace OpTypes\n";
-  OS << "} // End namespace " << Namespace << "\n";
-  OS << "} // End namespace llvm\n";
+  OS << "} // end namespace OpTypes\n";
+  OS << "} // end namespace " << Namespace << "\n";
+  OS << "} // end namespace llvm\n";
   OS << "#endif // GET_INSTRINFO_OPERAND_TYPES_ENUM\n";
 }
 
@@ -419,7 +418,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
      << TargetName << "InstrNameIndices, " << TargetName << "InstrNameData, "
      << NumberedInstructions.size() << ");\n}\n\n";
 
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace \n";
 
   OS << "#endif // GET_INSTRINFO_MC_DESC\n\n";
 
@@ -431,10 +430,10 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "namespace llvm {\n";
   OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
      << "  explicit " << ClassName
-     << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1);\n"
-     << "  virtual ~" << ClassName << "();\n"
+     << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1);\n"
+     << "  ~" << ClassName << "() override {}\n"
      << "};\n";
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace \n";
 
   OS << "#endif // GET_INSTRINFO_HEADER\n\n";
 
@@ -446,13 +445,12 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
   OS << "extern const char " << TargetName << "InstrNameData[];\n";
   OS << ClassName << "::" << ClassName
-     << "(int CFSetupOpcode, int CFDestroyOpcode)\n"
-     << "  : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode) {\n"
+     << "(int CFSetupOpcode, int CFDestroyOpcode, int CatchRetOpcode)\n"
+     << "  : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode) {\n"
      << "  InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
      << "InstrNameIndices, " << TargetName << "InstrNameData, "
-     << NumberedInstructions.size() << ");\n}\n"
-     << ClassName << "::~" << ClassName << "() {}\n";
-  OS << "} // End llvm namespace \n";
+     << NumberedInstructions.size() << ");\n}\n";
+  OS << "} // end llvm namespace \n";
 
   OS << "#endif // GET_INSTRINFO_CTOR_DTOR\n\n";
 
@@ -596,9 +594,9 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
     OS << "    " << Class.Name << "\t= " << Num++ << ",\n";
   OS << "    SCHED_LIST_END = " << SchedModels.numInstrSchedClasses() << "\n";
   OS << "  };\n";
-  OS << "} // End Sched namespace\n";
-  OS << "} // End " << Namespace << " namespace\n";
-  OS << "} // End llvm namespace \n";
+  OS << "} // end Sched namespace\n";
+  OS << "} // end " << Namespace << " namespace\n";
+  OS << "} // end llvm namespace \n";
 
   OS << "#endif // GET_INSTRINFO_ENUM\n\n";
 }
@@ -610,4 +608,4 @@ void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
   EmitMapTable(RK, OS);
 }
 
-} // End llvm namespace
+} // end llvm namespace
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 2b59ee69d16a..42a6a152f55e 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -50,8 +50,6 @@ public:
                      raw_ostream &OS);
   void EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints,
                       raw_ostream &OS);
-  void EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints,
-                          raw_ostream &OS);
   void EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
                                     raw_ostream &OS);
   void EmitIntrinsicToMSBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints,
@@ -92,9 +90,6 @@ void IntrinsicEmitter::run(raw_ostream &OS) {
   // Emit the intrinsic parameter attributes.
   EmitAttributes(Ints, OS);
 
-  // Emit intrinsic alias analysis mod/ref behavior.
-  EmitModRefBehavior(Ints, OS);
-
   // Emit code to translate GCC builtins into LLVM intrinsics.
   EmitIntrinsicToGCCBuiltinMap(Ints, OS);
 
@@ -246,22 +241,25 @@ enum IIT_Info {
   // Values from 16+ are only encodable with the inefficient encoding.
   IIT_V64  = 16,
   IIT_MMX  = 17,
-  IIT_METADATA = 18,
-  IIT_EMPTYSTRUCT = 19,
-  IIT_STRUCT2 = 20,
-  IIT_STRUCT3 = 21,
-  IIT_STRUCT4 = 22,
-  IIT_STRUCT5 = 23,
-  IIT_EXTEND_ARG = 24,
-  IIT_TRUNC_ARG = 25,
-  IIT_ANYPTR = 26,
-  IIT_V1   = 27,
-  IIT_VARARG = 28,
-  IIT_HALF_VEC_ARG = 29,
-  IIT_SAME_VEC_WIDTH_ARG = 30,
-  IIT_PTR_TO_ARG = 31,
-  IIT_VEC_OF_PTRS_TO_ELT = 32,
-  IIT_I128 = 33
+  IIT_TOKEN = 18,
+  IIT_METADATA = 19,
+  IIT_EMPTYSTRUCT = 20,
+  IIT_STRUCT2 = 21,
+  IIT_STRUCT3 = 22,
+  IIT_STRUCT4 = 23,
+  IIT_STRUCT5 = 24,
+  IIT_EXTEND_ARG = 25,
+  IIT_TRUNC_ARG = 26,
+  IIT_ANYPTR = 27,
+  IIT_V1   = 28,
+  IIT_VARARG = 29,
+  IIT_HALF_VEC_ARG = 30,
+  IIT_SAME_VEC_WIDTH_ARG = 31,
+  IIT_PTR_TO_ARG = 32,
+  IIT_VEC_OF_PTRS_TO_ELT = 33,
+  IIT_I128 = 34,
+  IIT_V512 = 35,
+  IIT_V1024 = 36
 };
 
 
@@ -285,6 +283,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
   case MVT::f16: return Sig.push_back(IIT_F16);
   case MVT::f32: return Sig.push_back(IIT_F32);
   case MVT::f64: return Sig.push_back(IIT_F64);
+  case MVT::token: return Sig.push_back(IIT_TOKEN);
   case MVT::Metadata: return Sig.push_back(IIT_METADATA);
   case MVT::x86mmx: return Sig.push_back(IIT_MMX);
   // MVT::OtherVT is used to mean the empty struct type here.
@@ -375,6 +374,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
     case 16: Sig.push_back(IIT_V16); break;
     case 32: Sig.push_back(IIT_V32); break;
     case 64: Sig.push_back(IIT_V64); break;
+    case 512: Sig.push_back(IIT_V512); break;
+    case 1024: Sig.push_back(IIT_V1024); break;
     }
 
     return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig);
@@ -502,28 +503,6 @@ void IntrinsicEmitter::EmitGenerator(const std::vector<CodeGenIntrinsic> &Ints,
   OS << "#endif\n\n";  // End of GET_INTRINSIC_GENERATOR_GLOBAL
 }
 
-namespace {
-enum ModRefKind {
-  MRK_none,
-  MRK_readonly,
-  MRK_readnone
-};
-}
-
-static ModRefKind getModRefKind(const CodeGenIntrinsic &intrinsic) {
-  switch (intrinsic.ModRef) {
-  case CodeGenIntrinsic::NoMem:
-    return MRK_readnone;
-  case CodeGenIntrinsic::ReadArgMem:
-  case CodeGenIntrinsic::ReadMem:
-    return MRK_readonly;
-  case CodeGenIntrinsic::ReadWriteArgMem:
-  case CodeGenIntrinsic::ReadWriteMem:
-    return MRK_none;
-  }
-  llvm_unreachable("bad mod-ref kind");
-}
-
 namespace {
 struct AttributeComparator {
   bool operator()(const CodeGenIntrinsic *L, const CodeGenIntrinsic *R) const {
@@ -541,8 +520,8 @@ struct AttributeComparator {
       return R->isConvergent;
 
     // Try to order by readonly/readnone attribute.
-    ModRefKind LK = getModRefKind(*L);
-    ModRefKind RK = getModRefKind(*R);
+    CodeGenIntrinsic::ModRefKind LK = L->ModRef;
+    CodeGenIntrinsic::ModRefKind RK = R->ModRef;
     if (LK != RK) return (LK > RK);
 
     // Order by argument attributes.
@@ -636,7 +615,7 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
           case CodeGenIntrinsic::ReadNone:
             if (addComma)
               OS << ",";
-            OS << "Attributes::ReadNone";
+            OS << "Attribute::ReadNone";
             addComma = true;
             break;
           }
@@ -649,10 +628,10 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
       }
     }
 
-    ModRefKind modRef = getModRefKind(intrinsic);
-
-    if (!intrinsic.canThrow || modRef || intrinsic.isNoReturn ||
-        intrinsic.isNoDuplicate || intrinsic.isConvergent) {
+    if (!intrinsic.canThrow ||
+        intrinsic.ModRef != CodeGenIntrinsic::ReadWriteMem ||
+        intrinsic.isNoReturn || intrinsic.isNoDuplicate ||
+        intrinsic.isConvergent) {
       OS << "      const Attribute::AttrKind Atts[] = {";
       bool addComma = false;
       if (!intrinsic.canThrow) {
@@ -678,17 +657,29 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
         addComma = true;
       }
 
-      switch (modRef) {
-      case MRK_none: break;
-      case MRK_readonly:
+      switch (intrinsic.ModRef) {
+      case CodeGenIntrinsic::NoMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::ReadNone";
+        break;
+      case CodeGenIntrinsic::ReadArgMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::ReadOnly,";
+        OS << "Attribute::ArgMemOnly";
+        break;
+      case CodeGenIntrinsic::ReadMem:
         if (addComma)
           OS << ",";
         OS << "Attribute::ReadOnly";
         break;
-      case MRK_readnone:
+      case CodeGenIntrinsic::ReadWriteArgMem:
         if (addComma)
           OS << ",";
-        OS << "Attribute::ReadNone";
+        OS << "Attribute::ArgMemOnly";
+        break;
+      case CodeGenIntrinsic::ReadWriteMem:
         break;
       }
       OS << "};\n";
@@ -713,41 +704,6 @@ EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
   OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
 }
 
-/// EmitModRefBehavior - Determine intrinsic alias analysis mod/ref behavior.
-void IntrinsicEmitter::
-EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
-  OS << "// Determine intrinsic alias analysis mod/ref behavior.\n"
-     << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n"
-     << "assert(iid <= Intrinsic::" << Ints.back().EnumName << " && "
-     << "\"Unknown intrinsic.\");\n\n";
-
-  OS << "static const uint8_t IntrinsicModRefBehavior[] = {\n"
-     << "  /* invalid */ UnknownModRefBehavior,\n";
-  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
-    OS << "  /* " << TargetPrefix << Ints[i].EnumName << " */ ";
-    switch (Ints[i].ModRef) {
-    case CodeGenIntrinsic::NoMem:
-      OS << "DoesNotAccessMemory,\n";
-      break;
-    case CodeGenIntrinsic::ReadArgMem:
-      OS << "OnlyReadsArgumentPointees,\n";
-      break;
-    case CodeGenIntrinsic::ReadMem:
-      OS << "OnlyReadsMemory,\n";
-      break;
-    case CodeGenIntrinsic::ReadWriteArgMem:
-      OS << "OnlyAccessesArgumentPointees,\n";
-      break;
-    case CodeGenIntrinsic::ReadWriteMem:
-      OS << "UnknownModRefBehavior,\n";
-      break;
-    }
-  }
-  OS << "};\n\n"
-     << "return static_cast<ModRefBehavior>(IntrinsicModRefBehavior[iid]);\n"
-     << "#endif // GET_INTRINSIC_MODREF_BEHAVIOR\n\n";
-}
-
 /// EmitTargetBuiltins - All of the builtins in the specified map are for the
 /// same target, and we already checked it.
 static void EmitTargetBuiltins(const std::map<std::string, std::string> &BIM,
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index 9262d7c8a02c..c1b5e6510325 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -149,10 +149,10 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
                                     PE = I->first.end(); PI != PE; ++PI) {
       OS << "\"" << *PI << "\" COMMA ";
     }
-    OS << "0})\n";
+    OS << "nullptr})\n";
   }
   OS << "#undef COMMA\n";
-  OS << "#endif\n\n";
+  OS << "#endif // PREFIX\n\n";
 
   OS << "/////////\n";
   OS << "// Groups\n\n";
@@ -164,7 +164,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
     OS << "OPTION(";
 
     // The option prefix;
-    OS << "0";
+    OS << "nullptr";
 
     // The option string.
     OS << ", \"" << R.getValueAsString("Name") << '"';
@@ -183,7 +183,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
       OS << "INVALID";
 
     // The other option arguments (unused for groups).
-    OS << ", INVALID, 0, 0, 0";
+    OS << ", INVALID, nullptr, 0, 0";
 
     // The option help text.
     if (!isa<UnsetInit>(R.getValueInit("HelpText"))) {
@@ -191,10 +191,10 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
       OS << "       ";
       write_cstring(OS, R.getValueAsString("HelpText"));
     } else
-      OS << ", 0";
+      OS << ", nullptr";
 
     // The option meta-variable name (unused).
-    OS << ", 0)\n";
+    OS << ", nullptr)\n";
   }
   OS << "\n";
 
@@ -242,7 +242,7 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
     OS << ", ";
     std::vector<std::string> AliasArgs = R.getValueAsListOfStrings("AliasArgs");
     if (AliasArgs.size() == 0) {
-      OS << "0";
+      OS << "nullptr";
     } else {
       OS << "\"";
       for (size_t i = 0, e = AliasArgs.size(); i != e; ++i)
@@ -274,17 +274,17 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
       OS << "       ";
       write_cstring(OS, R.getValueAsString("HelpText"));
     } else
-      OS << ", 0";
+      OS << ", nullptr";
 
     // The option meta-variable name.
     OS << ", ";
     if (!isa<UnsetInit>(R.getValueInit("MetaVarName")))
       write_cstring(OS, R.getValueAsString("MetaVarName"));
     else
-      OS << "0";
+      OS << "nullptr";
 
     OS << ")\n";
   }
-  OS << "#endif\n";
+  OS << "#endif // OPTION\n";
 }
 } // end namespace llvm
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 9619fb9e356e..b727df75626f 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -240,7 +240,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
     MaxRegUnitWeight = std::max(MaxRegUnitWeight, RegUnits.Weight);
     OS << "    \"" << RegUnits.Name << "\",\n";
   }
-  OS << "    nullptr };\n"
+  OS << "  };\n"
      << "  return PressureNameTable[Idx];\n"
      << "}\n\n";
 
@@ -1074,9 +1074,7 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
      << "  explicit " << ClassName
-     << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n"
-     << "  bool needsStackRealignment(const MachineFunction &) const override\n"
-     << "     { return false; }\n";
+     << "(unsigned RA, unsigned D = 0, unsigned E = 0, unsigned PC = 0);\n";
   if (!RegBank.getSubRegIndices().empty()) {
     OS << "  unsigned composeSubRegIndicesImpl"
        << "(unsigned, unsigned) const override;\n"
@@ -1454,27 +1452,32 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
 
   OS << "ArrayRef<const uint32_t *> " << ClassName
      << "::getRegMasks() const {\n";
-  OS << "  static const uint32_t *Masks[] = {\n";
-  for (Record *CSRSet : CSRSets)
-    OS << "    " << CSRSet->getName() << "_RegMask, \n";
-  OS << "    nullptr\n  };\n";
-  OS << "  return ArrayRef<const uint32_t *>(Masks, (size_t)" << CSRSets.size()
-     << ");\n";
+  if (!CSRSets.empty()) {
+    OS << "  static const uint32_t *const Masks[] = {\n";
+    for (Record *CSRSet : CSRSets)
+      OS << "    " << CSRSet->getName() << "_RegMask,\n";
+    OS << "  };\n";
+    OS << "  return makeArrayRef(Masks);\n";
+  } else {
+    OS << "  return None;\n";
+  }
   OS << "}\n\n";
 
   OS << "ArrayRef<const char *> " << ClassName
      << "::getRegMaskNames() const {\n";
-  OS << "  static const char *Names[] = {\n";
-  for (Record *CSRSet : CSRSets)
-    OS << "    " << '"' << CSRSet->getName() << '"' << ",\n";
-  OS << "    nullptr\n  };\n";
-  OS << "  return ArrayRef<const char *>(Names, (size_t)" << CSRSets.size()
-     << ");\n";
+  if (!CSRSets.empty()) {
+  OS << "  static const char *const Names[] = {\n";
+    for (Record *CSRSet : CSRSets)
+      OS << "    " << '"' << CSRSet->getName() << '"' << ",\n";
+    OS << "  };\n";
+    OS << "  return makeArrayRef(Names);\n";
+  } else {
+    OS << "  return None;\n";
+  }
   OS << "}\n\n";
 
-  OS << "const " << TargetName << "FrameLowering *"
-     << TargetName << "GenRegisterInfo::\n"
-     << "    getFrameLowering(const MachineFunction &MF) {\n"
+  OS << "const " << TargetName << "FrameLowering *\n" << TargetName
+     << "GenRegisterInfo::getFrameLowering(const MachineFunction &MF) {\n"
      << "  return static_cast<const " << TargetName << "FrameLowering *>(\n"
      << "      MF.getSubtarget().getFrameLowering());\n"
      << "}\n\n";
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 03d7f4e868e7..6246d811123d 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -26,6 +26,7 @@
 #include <map>
 #include <string>
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "subtarget-emitter"
@@ -105,9 +106,8 @@ public:
     Records(R), SchedModels(TGT.getSchedModels()), Target(TGT.getName()) {}
 
   void run(raw_ostream &o);
-
 };
-} // End anonymous namespace
+} // end anonymous namespace
 
 //
 // Enumeration - Emit the specified class as an enumeration.
@@ -1199,7 +1199,8 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
          << "  " << (SchedModels.schedClassEnd()
                      - SchedModels.schedClassBegin()) << ",\n";
     else
-      OS << "  0, 0, 0, 0, // No instruction-level machine model.\n";
+      OS << "  nullptr, nullptr, 0, 0,"
+         << " // No instruction-level machine model.\n";
     if (PI->hasItineraries())
       OS << "  " << PI->ItinsDef->getName() << "};\n";
     else
@@ -1414,7 +1415,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   OS << "namespace llvm {\n";
   Enumeration(OS, "SubtargetFeature");
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace\n";
   OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
 
   OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n";
@@ -1461,7 +1462,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
     OS << "0, 0, 0";
   OS << ");\n}\n\n";
 
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace\n";
 
   OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";
 
@@ -1491,7 +1492,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << "  DFAPacketizer *createDFAPacketizer(const InstrItineraryData *IID)"
      << " const;\n"
      << "};\n";
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace\n";
 
   OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n";
 
@@ -1543,7 +1544,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   EmitSchedModelHelpers(ClassName, OS);
 
-  OS << "} // End llvm namespace \n";
+  OS << "} // end llvm namespace\n";
 
   OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
 }
@@ -1555,4 +1556,4 @@ void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS) {
   SubtargetEmitter(RK, CGTarget).run(OS);
 }
 
-} // End llvm namespace
+} // end llvm namespace
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 02fe4dc98bea..c16a5583eb36 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -41,7 +41,8 @@ enum ActionType {
   PrintEnums,
   PrintSets,
   GenOptParserDefs,
-  GenCTags
+  GenCTags,
+  GenAttributes
 };
 
 namespace {
@@ -85,6 +86,8 @@ namespace {
                                "Generate option definitions"),
                     clEnumValN(GenCTags, "gen-ctags",
                                "Generate ctags-compatible index"),
+                    clEnumValN(GenAttributes, "gen-attrs",
+                               "Generate attributes"),
                     clEnumValEnd));
 
   cl::opt<std::string>
@@ -165,6 +168,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenCTags:
     EmitCTags(Records, OS);
     break;
+  case GenAttributes:
+    EmitAttributes(Records, OS);
+    break;
   }
 
   return false;
diff --git a/utils/TableGen/TableGenBackends.h b/utils/TableGen/TableGenBackends.h
index 2dc03ce1e71e..d9dd3d157697 100644
--- a/utils/TableGen/TableGenBackends.h
+++ b/utils/TableGen/TableGenBackends.h
@@ -78,6 +78,7 @@ void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
 void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
 void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
 void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
+void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index efcb0c81e1ca..8a5ae12f67fb 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -796,12 +796,12 @@ void RecognizableInstr::emitInstructionSpecifier() {
   case X86Local::MRM_E3: case X86Local::MRM_E4: case X86Local::MRM_E5:
   case X86Local::MRM_E8: case X86Local::MRM_E9: case X86Local::MRM_EA:
   case X86Local::MRM_EB: case X86Local::MRM_EC: case X86Local::MRM_ED:
-  case X86Local::MRM_EE: case X86Local::MRM_F0: case X86Local::MRM_F1:
-  case X86Local::MRM_F2: case X86Local::MRM_F3: case X86Local::MRM_F4:
-  case X86Local::MRM_F5: case X86Local::MRM_F6: case X86Local::MRM_F7:
-  case X86Local::MRM_F9: case X86Local::MRM_FA: case X86Local::MRM_FB:
-  case X86Local::MRM_FC: case X86Local::MRM_FD: case X86Local::MRM_FE:
-  case X86Local::MRM_FF:
+  case X86Local::MRM_EE: case X86Local::MRM_EF: case X86Local::MRM_F0:
+  case X86Local::MRM_F1: case X86Local::MRM_F2: case X86Local::MRM_F3:
+  case X86Local::MRM_F4: case X86Local::MRM_F5: case X86Local::MRM_F6:
+  case X86Local::MRM_F7: case X86Local::MRM_F9: case X86Local::MRM_FA:
+  case X86Local::MRM_FB:  case X86Local::MRM_FC: case X86Local::MRM_FD:
+  case X86Local::MRM_FE:  case X86Local::MRM_FF:
     // Ignored.
     break;
   }
@@ -951,6 +951,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("f128mem",             TYPE_M128)
   TYPE("f256mem",             TYPE_M256)
   TYPE("f512mem",             TYPE_M512)
+  TYPE("FR128",               TYPE_XMM128)
   TYPE("FR64",                TYPE_XMM64)
   TYPE("FR64X",               TYPE_XMM64)
   TYPE("f64mem",              TYPE_M64FP)
@@ -1069,6 +1070,7 @@ RecognizableInstr::immediateEncodingFromString(const std::string &s,
   // register IDs in 8-bit immediates nowadays.
   ENCODING("FR32",            ENCODING_IB)
   ENCODING("FR64",            ENCODING_IB)
+  ENCODING("FR128",           ENCODING_IB)
   ENCODING("VR128",           ENCODING_IB)
   ENCODING("VR256",           ENCODING_IB)
   ENCODING("FR32X",           ENCODING_IB)
@@ -1091,6 +1093,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("GR8",             ENCODING_RM)
   ENCODING("VR128",           ENCODING_RM)
   ENCODING("VR128X",          ENCODING_RM)
+  ENCODING("FR128",           ENCODING_RM)
   ENCODING("FR64",            ENCODING_RM)
   ENCODING("FR32",            ENCODING_RM)
   ENCODING("FR64X",           ENCODING_RM)
@@ -1120,6 +1123,7 @@ RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
   ENCODING("GR64",            ENCODING_REG)
   ENCODING("GR8",             ENCODING_REG)
   ENCODING("VR128",           ENCODING_REG)
+  ENCODING("FR128",           ENCODING_REG)
   ENCODING("FR64",            ENCODING_REG)
   ENCODING("FR32",            ENCODING_REG)
   ENCODING("VR64",            ENCODING_REG)
@@ -1157,6 +1161,7 @@ RecognizableInstr::vvvvRegisterEncodingFromString(const std::string &s,
   ENCODING("GR32",            ENCODING_VVVV)
   ENCODING("GR64",            ENCODING_VVVV)
   ENCODING("FR32",            ENCODING_VVVV)
+  ENCODING("FR128",           ENCODING_VVVV)
   ENCODING("FR64",            ENCODING_VVVV)
   ENCODING("VR128",           ENCODING_VVVV)
   ENCODING("VR256",           ENCODING_VVVV)
diff --git a/utils/emacs/llvm-mode.el b/utils/emacs/llvm-mode.el
index 5fb1eb38cc62..3a609f249054 100644
--- a/utils/emacs/llvm-mode.el
+++ b/utils/emacs/llvm-mode.el
@@ -8,22 +8,25 @@
 
 ;;; Code:
 
-(defvar llvm-mode-syntax-table nil
+(defvar llvm-mode-syntax-table
+  (let ((table (make-syntax-table)))
+    (modify-syntax-entry ?% "_" table)
+    (modify-syntax-entry ?. "_" table)
+    (modify-syntax-entry ?\; "< " table)
+    (modify-syntax-entry ?\n "> " table)
+    table)
   "Syntax table used while in LLVM mode.")
+
 (defvar llvm-font-lock-keywords
   (list
-   ;; Comments
-   '(";.*" . font-lock-comment-face)
    ;; Variables
    '("%[-a-zA-Z$\._][-a-zA-Z$\._0-9]*" . font-lock-variable-name-face)
    ;; Labels
    '("[-a-zA-Z$\._0-9]+:" . font-lock-variable-name-face)
-   ;; Strings
-   '("\"[^\"]+\"" . font-lock-string-face)
    ;; Unnamed variable slots
    '("%[-]?[0-9]+" . font-lock-variable-name-face)
    ;; Types
-   `(,(regexp-opt '("void" "i1" "i8" "i16" "i32" "i64" "i128" "float" "double" "type" "label" "opaque") 'symbol) . font-lock-type-face)
+   `(,(regexp-opt '("void" "i1" "i8" "i16" "i32" "i64" "i128" "float" "double" "type" "label" "opaque") 'symbols) . font-lock-type-face)
    ;; Integer literals
    '("\\b[-]?[0-9]+\\b" . font-lock-preprocessor-face)
    ;; Floating point constants
@@ -56,84 +59,22 @@
    ;; Metadata types
    `(,(regexp-opt '("distinct") 'symbols) . font-lock-keyword-face)
    ;; Use-list order directives
-   `(,(regexp-opt '("uselistorder" "uselistorder_bb") 'symbols) . font-lock-keyword-face)
+   `(,(regexp-opt '("uselistorder" "uselistorder_bb") 'symbols) . font-lock-keyword-face))
+  "Syntax highlighting for LLVM.")
 
-   )
-  "Syntax highlighting for LLVM."
-  )
-
-;; ---------------------- Syntax table ---------------------------
-;; Shamelessly ripped from jasmin.el
-;; URL: http://www.neilvandyke.org/jasmin-emacs/jasmin.el.html
-
-(if (not llvm-mode-syntax-table)
-    (progn
-      (setq llvm-mode-syntax-table (make-syntax-table))
-      (mapc (function (lambda (n)
-                        (modify-syntax-entry (aref n 0)
-                                             (aref n 1)
-                                             llvm-mode-syntax-table)))
-            '(
-              ;; whitespace (` ')
-              [?\^m " "]
-              [?\f  " "]
-              [?\n  " "]
-              [?\t  " "]
-              [?\   " "]
-              ;; word constituents (`w')
-              ;;[?<  "w"]
-              ;;[?>  "w"]
-              [?%  "w"]
-              ;;[?_  "w  "]
-              ;; comments
-              [?\;  "< "]
-              [?\n  "> "]
-              ;;[?\r  "> "]
-              ;;[?\^m "> "]
-              ;; symbol constituents (`_')
-              ;; punctuation (`.')
-              ;; open paren (`(')
-              ;; close paren (`)')
-              ;; string quote ('"')
-              [?\" "\""]))))
-
-;; --------------------- Abbrev table -----------------------------
-
-(defvar llvm-mode-abbrev-table nil
-  "Abbrev table used while in LLVM mode.")
-(define-abbrev-table 'llvm-mode-abbrev-table ())
-
-(defvar llvm-mode-hook nil)
-(defvar llvm-mode-map nil)   ; Create a mode-specific keymap.
-
-(if (not llvm-mode-map)
-    ()  ; Do not change the keymap if it is already set up.
-  (setq llvm-mode-map (make-sparse-keymap))
-  (define-key llvm-mode-map "\t" 'tab-to-tab-stop)
-  (define-key llvm-mode-map "\es" 'center-line)
-  (define-key llvm-mode-map "\eS" 'center-paragraph))
+;; Emacs 23 compatibility.
+(defalias 'llvm-mode-prog-mode
+  (if (fboundp 'prog-mode)
+      'prog-mode
+    'fundamental-mode))
 
 ;;;###autoload
-(defun llvm-mode ()
+(define-derived-mode llvm-mode llvm-mode-prog-mode "LLVM"
   "Major mode for editing LLVM source files.
 \\{llvm-mode-map}
   Runs `llvm-mode-hook' on startup."
-  (interactive)
-  (kill-all-local-variables)
-  (use-local-map llvm-mode-map)         ; Provides the local keymap.
-  (setq major-mode 'llvm-mode)
-
-  (make-local-variable 'font-lock-defaults)
-  (setq major-mode 'llvm-mode           ; This is how describe-mode
-                                        ;   finds the doc string to print.
-  mode-name "LLVM"                      ; This name goes into the modeline.
-  font-lock-defaults `(llvm-font-lock-keywords))
-
-  (setq local-abbrev-table llvm-mode-abbrev-table)
-  (set-syntax-table llvm-mode-syntax-table)
-  (setq comment-start ";")
-  (run-hooks 'llvm-mode-hook))          ; Finally, this permits the user to
-                                        ;   customize the mode with a hook.
+  (setq font-lock-defaults `(llvm-font-lock-keywords))
+  (setq comment-start ";"))
 
 ;; Associate .ll files with llvm-mode
 ;;;###autoload
diff --git a/utils/lit/TODO b/utils/lit/TODO
index 90da327be9a2..f94a6ba6bb08 100644
--- a/utils/lit/TODO
+++ b/utils/lit/TODO
@@ -158,7 +158,17 @@ Miscellaneous
 
 * Support valgrind in all configs, and LLVM style valgrind.
 
-* Support a timeout / ulimit.
+* Support ulimit.
 
 * Create an explicit test suite object (instead of using the top-level
   TestingConfig object).
+
+* Introduce a wrapper class that has a ``subprocess.Popen`` like interface
+  but also supports killing the process and all its children and use this for
+  running tests.  This would allow us to implement platform specific methods
+  for killing a process's children which is needed for a per test timeout. On
+  POSIX platforms we can use process groups and on Windows we can probably use
+  job objects. This would not only allow us to remove the dependency on the
+  ``psutil`` module but would also be more reliable as the
+  ``lit.util.killProcessAndChildren()`` function which is currently used is
+  potentially racey (e.g. it might not kill a fork bomb completely).
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index b8183801bfca..2402221bef12 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -8,7 +8,8 @@ import lit.formats
 import lit.TestingConfig
 import lit.util
 
-class LitConfig:
+# LitConfig must be a new style class for properties to work
+class LitConfig(object):
     """LitConfig - Configuration data for a 'lit' test runner instance, shared
     across all tests.
 
@@ -21,7 +22,8 @@ class LitConfig:
     def __init__(self, progname, path, quiet,
                  useValgrind, valgrindLeakCheck, valgrindArgs,
                  noExecute, debug, isWindows,
-                 params, config_prefix = None):
+                 params, config_prefix = None,
+                 maxIndividualTestTime = 0):
         # The name of the test runner.
         self.progname = progname
         # The items to add to the PATH environment variable.
@@ -57,6 +59,36 @@ class LitConfig:
                 self.valgrindArgs.append('--leak-check=no')
             self.valgrindArgs.extend(self.valgrindUserArgs)
 
+        self.maxIndividualTestTime = maxIndividualTestTime
+
+    @property
+    def maxIndividualTestTime(self):
+        """
+            Interface for getting maximum time to spend executing
+            a single test
+        """
+        return self._maxIndividualTestTime
+
+    @maxIndividualTestTime.setter
+    def maxIndividualTestTime(self, value):
+        """
+            Interface for setting maximum time to spend executing
+            a single test
+        """
+        self._maxIndividualTestTime = value
+        if self.maxIndividualTestTime > 0:
+            # The current implementation needs psutil to set
+            # a timeout per test. Check it's available.
+            # See lit.util.killProcessAndChildren()
+            try:
+                import psutil
+            except ImportError:
+                self.fatal("Setting a timeout per test requires the"
+                           " Python psutil module but it could not be"
+                           " found. Try installing it via pip or via"
+                           " your operating system's package manager.")
+        elif self.maxIndividualTestTime < 0:
+            self.fatal('The timeout per test must be >= 0 seconds')
 
     def load_config(self, config, path):
         """load_config(config, path) - Load a config object from an alternate
diff --git a/utils/lit/lit/Test.py b/utils/lit/lit/Test.py
index 38bb41b0252d..ef0e7bfc2a37 100644
--- a/utils/lit/lit/Test.py
+++ b/utils/lit/lit/Test.py
@@ -27,11 +27,13 @@ class ResultCode(object):
                          (self.name, self.isFailure))
 
 PASS        = ResultCode('PASS', False)
+FLAKYPASS   = ResultCode('FLAKYPASS', False)
 XFAIL       = ResultCode('XFAIL', False)
 FAIL        = ResultCode('FAIL', True)
 XPASS       = ResultCode('XPASS', True)
 UNRESOLVED  = ResultCode('UNRESOLVED', True)
 UNSUPPORTED = ResultCode('UNSUPPORTED', False)
+TIMEOUT     = ResultCode('TIMEOUT', True)
 
 # Test metric values.
 
@@ -253,4 +255,4 @@ class Test:
             xml += "\n\t</failure>\n</testcase>"
         else:
             xml += "/>"
-        return xml
\ No newline at end of file
+        return xml
diff --git a/utils/lit/lit/TestRunner.py b/utils/lit/lit/TestRunner.py
index 70382b472f5c..1af82e158445 100644
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@@ -3,6 +3,7 @@ import os, signal, subprocess, sys
 import re
 import platform
 import tempfile
+import threading
 
 import lit.ShUtil as ShUtil
 import lit.Test as Test
@@ -31,30 +32,129 @@ class ShellEnvironment(object):
 
     def __init__(self, cwd, env):
         self.cwd = cwd
-        self.env = env
+        self.env = dict(env)
+
+class TimeoutHelper(object):
+    """
+        Object used to helper manage enforcing a timeout in
+        _executeShCmd(). It is passed through recursive calls
+        to collect processes that have been executed so that when
+        the timeout happens they can be killed.
+    """
+    def __init__(self, timeout):
+        self.timeout = timeout
+        self._procs = []
+        self._timeoutReached = False
+        self._doneKillPass = False
+        # This lock will be used to protect concurrent access
+        # to _procs and _doneKillPass
+        self._lock = None
+        self._timer = None
+
+    def cancel(self):
+        if not self.active():
+            return
+        self._timer.cancel()
+
+    def active(self):
+        return self.timeout > 0
+
+    def addProcess(self, proc):
+        if not self.active():
+            return
+        needToRunKill = False
+        with self._lock:
+            self._procs.append(proc)
+            # Avoid re-entering the lock by finding out if kill needs to be run
+            # again here but call it if necessary once we have left the lock.
+            # We could use a reentrant lock here instead but this code seems
+            # clearer to me.
+            needToRunKill = self._doneKillPass
+
+        # The initial call to _kill() from the timer thread already happened so
+        # we need to call it again from this thread, otherwise this process
+        # will be left to run even though the timeout was already hit
+        if needToRunKill:
+            assert self.timeoutReached()
+            self._kill()
+
+    def startTimer(self):
+        if not self.active():
+            return
+
+        # Do some late initialisation that's only needed
+        # if there is a timeout set
+        self._lock = threading.Lock()
+        self._timer = threading.Timer(self.timeout, self._handleTimeoutReached)
+        self._timer.start()
+
+    def _handleTimeoutReached(self):
+        self._timeoutReached = True
+        self._kill()
+
+    def timeoutReached(self):
+        return self._timeoutReached
+
+    def _kill(self):
+        """
+            This method may be called multiple times as we might get unlucky
+            and be in the middle of creating a new process in _executeShCmd()
+            which won't yet be in ``self._procs``. By locking here and in
+            addProcess() we should be able to kill processes launched after
+            the initial call to _kill()
+        """
+        with self._lock:
+            for p in self._procs:
+                lit.util.killProcessAndChildren(p.pid)
+            # Empty the list and note that we've done a pass over the list
+            self._procs = [] # Python2 doesn't have list.clear()
+            self._doneKillPass = True
+
+def executeShCmd(cmd, shenv, results, timeout=0):
+    """
+        Wrapper around _executeShCmd that handles
+        timeout
+    """
+    # Use the helper even when no timeout is required to make
+    # other code simpler (i.e. avoid bunch of ``!= None`` checks)
+    timeoutHelper = TimeoutHelper(timeout)
+    if timeout > 0:
+        timeoutHelper.startTimer()
+    finalExitCode = _executeShCmd(cmd, shenv, results, timeoutHelper)
+    timeoutHelper.cancel()
+    timeoutInfo = None
+    if timeoutHelper.timeoutReached():
+        timeoutInfo = 'Reached timeout of {} seconds'.format(timeout)
+
+    return (finalExitCode, timeoutInfo)
+
+def _executeShCmd(cmd, shenv, results, timeoutHelper):
+    if timeoutHelper.timeoutReached():
+        # Prevent further recursion if the timeout has been hit
+        # as we should try avoid launching more processes.
+        return None
 
-def executeShCmd(cmd, shenv, results):
     if isinstance(cmd, ShUtil.Seq):
         if cmd.op == ';':
-            res = executeShCmd(cmd.lhs, shenv, results)
-            return executeShCmd(cmd.rhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
+            return _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
 
         if cmd.op == '&':
             raise InternalShellError(cmd,"unsupported shell operator: '&'")
 
         if cmd.op == '||':
-            res = executeShCmd(cmd.lhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
             if res != 0:
-                res = executeShCmd(cmd.rhs, shenv, results)
+                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
             return res
 
         if cmd.op == '&&':
-            res = executeShCmd(cmd.lhs, shenv, results)
+            res = _executeShCmd(cmd.lhs, shenv, results, timeoutHelper)
             if res is None:
                 return res
 
             if res == 0:
-                res = executeShCmd(cmd.rhs, shenv, results)
+                res = _executeShCmd(cmd.rhs, shenv, results, timeoutHelper)
             return res
 
         raise ValueError('Unknown shell command: %r' % cmd.op)
@@ -62,14 +162,18 @@ def executeShCmd(cmd, shenv, results):
 
     # Handle shell builtins first.
     if cmd.commands[0].args[0] == 'cd':
-        # Update the cwd in the environment.
+        if len(cmd.commands) != 1:
+            raise ValueError("'cd' cannot be part of a pipeline")
         if len(cmd.commands[0].args) != 2:
-            raise ValueError('cd supports only one argument')
+            raise ValueError("'cd' supports only one argument")
         newdir = cmd.commands[0].args[1]
+        # Update the cwd in the parent environment.
         if os.path.isabs(newdir):
             shenv.cwd = newdir
         else:
             shenv.cwd = os.path.join(shenv.cwd, newdir)
+        # The cd builtin always succeeds. If the directory does not exist, the
+        # following Popen calls will fail instead.
         return 0
 
     procs = []
@@ -81,6 +185,23 @@ def executeShCmd(cmd, shenv, results):
     # output. This is null until we have seen some output using
     # stderr.
     for i,j in enumerate(cmd.commands):
+        # Reference the global environment by default.
+        cmd_shenv = shenv
+        if j.args[0] == 'env':
+            # Create a copy of the global environment and modify it for this one
+            # command. There might be multiple envs in a pipeline:
+            #   env FOO=1 llc < %s | env BAR=2 llvm-mc | FileCheck %s
+            cmd_shenv = ShellEnvironment(shenv.cwd, shenv.env)
+            arg_idx = 1
+            for arg_idx, arg in enumerate(j.args[1:]):
+                # Partition the string into KEY=VALUE.
+                key, eq, val = arg.partition('=')
+                # Stop if there was no equals.
+                if eq == '':
+                    break
+                cmd_shenv.env[key] = val
+            j.args = j.args[arg_idx+1:]
+
         # Apply the redirections, we use (N,) as a sentinel to indicate stdin,
         # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
         # from a file are represented with a list [file, mode, file-object]
@@ -126,7 +247,7 @@ def executeShCmd(cmd, shenv, results):
                         r[2] = tempfile.TemporaryFile(mode=r[1])
                     else:
                         # Make sure relative paths are relative to the cwd.
-                        redir_filename = os.path.join(shenv.cwd, r[0])
+                        redir_filename = os.path.join(cmd_shenv.cwd, r[0])
                         r[2] = open(redir_filename, r[1])
                     # Workaround a Win32 and/or subprocess bug when appending.
                     #
@@ -157,7 +278,14 @@ def executeShCmd(cmd, shenv, results):
 
         # Resolve the executable path ourselves.
         args = list(j.args)
-        executable = lit.util.which(args[0], shenv.env['PATH'])
+        executable = None
+        # For paths relative to cwd, use the cwd of the shell environment.
+        if args[0].startswith('.'):
+            exe_in_cwd = os.path.join(cmd_shenv.cwd, args[0])
+            if os.path.isfile(exe_in_cwd):
+                executable = exe_in_cwd
+        if not executable:
+            executable = lit.util.which(args[0], cmd_shenv.env['PATH'])
         if not executable:
             raise InternalShellError(j, '%r: command not found' % j.args[0])
 
@@ -171,15 +299,17 @@ def executeShCmd(cmd, shenv, results):
                     args[i] = f.name
 
         try:
-            procs.append(subprocess.Popen(args, cwd=shenv.cwd,
+            procs.append(subprocess.Popen(args, cwd=cmd_shenv.cwd,
                                           executable = executable,
                                           stdin = stdin,
                                           stdout = stdout,
                                           stderr = stderr,
-                                          env = shenv.env,
+                                          env = cmd_shenv.env,
                                           close_fds = kUseCloseFDs))
+            # Let the helper know about this process
+            timeoutHelper.addProcess(procs[-1])
         except OSError as e:
-            raise InternalShellError(j, 'Could not create process due to {}'.format(e))
+            raise InternalShellError(j, 'Could not create process ({}) due to {}'.format(executable, e))
 
         # Immediately close stdin for any process taking stdin from us.
         if stdin == subprocess.PIPE:
@@ -243,7 +373,7 @@ def executeShCmd(cmd, shenv, results):
         except:
             err = str(err)
 
-        results.append((cmd.commands[i], out, err, res))
+        results.append((cmd.commands[i], out, err, res, timeoutHelper.timeoutReached()))
         if cmd.pipe_err:
             # Python treats the exit code as a signed char.
             if exitCode is None:
@@ -281,22 +411,25 @@ def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
         cmd = ShUtil.Seq(cmd, '&&', c)
 
     results = []
+    timeoutInfo = None
     try:
         shenv = ShellEnvironment(cwd, test.config.environment)
-        exitCode = executeShCmd(cmd, shenv, results)
+        exitCode, timeoutInfo = executeShCmd(cmd, shenv, results, timeout=litConfig.maxIndividualTestTime)
     except InternalShellError:
         e = sys.exc_info()[1]
         exitCode = 127
-        results.append((e.command, '', e.message, exitCode))
+        results.append((e.command, '', e.message, exitCode, False))
 
     out = err = ''
-    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
+    for i,(cmd, cmd_out, cmd_err, res, timeoutReached) in enumerate(results):
         out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
         out += 'Command %d Result: %r\n' % (i, res)
+        if litConfig.maxIndividualTestTime > 0:
+            out += 'Command %d Reached Timeout: %s\n\n' % (i, str(timeoutReached))
         out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
         out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
 
-    return out, err, exitCode
+    return out, err, exitCode, timeoutInfo
 
 def executeScript(test, litConfig, tmpBase, commands, cwd):
     bashPath = litConfig.getBashPath();
@@ -331,10 +464,15 @@ def executeScript(test, litConfig, tmpBase, commands, cwd):
             # run on clang with no real loss.
             command = litConfig.valgrindArgs + command
 
-    return lit.util.executeCommand(command, cwd=cwd,
-                                   env=test.config.environment)
+    try:
+        out, err, exitCode = lit.util.executeCommand(command, cwd=cwd,
+                                       env=test.config.environment,
+                                       timeout=litConfig.maxIndividualTestTime)
+        return (out, err, exitCode, None)
+    except lit.util.ExecuteCommandTimeoutException as e:
+        return (e.out, e.err, e.exitCode, e.msg)
 
-def parseIntegratedTestScriptCommands(source_path):
+def parseIntegratedTestScriptCommands(source_path, keywords):
     """
     parseIntegratedTestScriptCommands(source_path) -> commands
 
@@ -353,7 +491,6 @@ def parseIntegratedTestScriptCommands(source_path):
     # remaining code can work with "strings" agnostic of the executing Python
     # version.
 
-    keywords = ['RUN:', 'XFAIL:', 'REQUIRES:', 'UNSUPPORTED:', 'END.']
     keywords_re = re.compile(
         to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
 
@@ -387,27 +524,18 @@ def parseIntegratedTestScriptCommands(source_path):
     finally:
         f.close()
 
-
-def parseIntegratedTestScript(test, normalize_slashes=False,
-                              extra_substitutions=[], require_script=True):
-    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
-    script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
-    and 'UNSUPPORTED' information. The RUN lines also will have variable
-    substitution performed. If 'require_script' is False an empty script may be
-    returned. This can be used for test formats where the actual script is
-    optional or ignored.
-    """
-
-    # Get the temporary location, this is always relative to the test suite
-    # root, not test source root.
-    #
-    # FIXME: This should not be here?
-    sourcepath = test.getSourcePath()
-    sourcedir = os.path.dirname(sourcepath)
+def getTempPaths(test):
+    """Get the temporary location, this is always relative to the test suite
+    root, not test source root."""
     execpath = test.getExecPath()
     execdir,execbase = os.path.split(execpath)
     tmpDir = os.path.join(execdir, 'Output')
     tmpBase = os.path.join(tmpDir, execbase)
+    return tmpDir, tmpBase
+
+def getDefaultSubstitutions(test, tmpDir, tmpBase, normalize_slashes=False):
+    sourcepath = test.getSourcePath()
+    sourcedir = os.path.dirname(sourcepath)
 
     # Normalize slashes, if requested.
     if normalize_slashes:
@@ -417,7 +545,7 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
         tmpBase = tmpBase.replace('\\', '/')
 
     # We use #_MARKER_# to hide %% while we do the other substitutions.
-    substitutions = list(extra_substitutions)
+    substitutions = []
     substitutions.extend([('%%', '#_MARKER_#')])
     substitutions.extend(test.config.substitutions)
     substitutions.extend([('%s', sourcepath),
@@ -436,13 +564,40 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
             ('%/t', tmpBase.replace('\\', '/') + '.tmp'),
             ('%/T', tmpDir.replace('\\', '/')),
             ])
+    return substitutions
 
+def applySubstitutions(script, substitutions):
+    """Apply substitutions to the script.  Allow full regular expression syntax.
+    Replace each matching occurrence of regular expression pattern a with
+    substitution b in line ln."""
+    def processLine(ln):
+        # Apply substitutions
+        for a,b in substitutions:
+            if kIsWindows:
+                b = b.replace("\\","\\\\")
+            ln = re.sub(a, b, ln)
+
+        # Strip the trailing newline and any extra whitespace.
+        return ln.strip()
+    # Note Python 3 map() gives an iterator rather than a list so explicitly
+    # convert to list before returning.
+    return list(map(processLine, script))
+
+def parseIntegratedTestScript(test, require_script=True):
+    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
+    script and extract the lines to 'RUN' as well as 'XFAIL' and 'REQUIRES'
+    and 'UNSUPPORTED' information. If 'require_script' is False an empty script
+    may be returned. This can be used for test formats where the actual script
+    is optional or ignored.
+    """
     # Collect the test lines from the script.
+    sourcepath = test.getSourcePath()
     script = []
     requires = []
     unsupported = []
+    keywords = ['RUN:', 'XFAIL:', 'REQUIRES:', 'UNSUPPORTED:', 'END.']
     for line_number, command_type, ln in \
-            parseIntegratedTestScriptCommands(sourcepath):
+            parseIntegratedTestScriptCommands(sourcepath, keywords):
         if command_type == 'RUN':
             # Trim trailing whitespace.
             ln = ln.rstrip()
@@ -475,21 +630,6 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
             raise ValueError("unknown script command type: %r" % (
                     command_type,))
 
-    # Apply substitutions to the script.  Allow full regular
-    # expression syntax.  Replace each matching occurrence of regular
-    # expression pattern a with substitution b in line ln.
-    def processLine(ln):
-        # Apply substitutions
-        for a,b in substitutions:
-            if kIsWindows:
-                b = b.replace("\\","\\\\")
-            ln = re.sub(a, b, ln)
-
-        # Strip the trailing newline and any extra whitespace.
-        return ln.strip()
-    script = [processLine(ln)
-              for ln in script]
-
     # Verify the script contains a run line.
     if require_script and not script:
         return lit.Test.Result(Test.UNRESOLVED, "Test has no run line!")
@@ -513,6 +653,13 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
         return lit.Test.Result(Test.UNSUPPORTED,
                     "Test is unsupported with the following features: %s" % msg)
 
+    unsupported_targets = [f for f in unsupported
+                           if f in test.suite.config.target_triple]
+    if unsupported_targets:
+      return lit.Test.Result(Test.UNSUPPORTED,
+                  "Test is unsupported with the following triple: %s" % (
+                      test.suite.config.target_triple,))
+
     if test.config.limit_to_features:
         # Check that we have one of the limit_to_features features in requires.
         limit_to_features_tests = [f for f in test.config.limit_to_features
@@ -522,13 +669,13 @@ def parseIntegratedTestScript(test, normalize_slashes=False,
             return lit.Test.Result(Test.UNSUPPORTED,
                  "Test requires one of the limit_to_features features %s" % msg)
 
-    return script,tmpBase,execdir
+    return script
 
-def _runShTest(test, litConfig, useExternalSh,
-                   script, tmpBase, execdir):
+def _runShTest(test, litConfig, useExternalSh, script, tmpBase):
     # Create the output directory if it does not already exist.
     lit.util.mkdir_p(os.path.dirname(tmpBase))
 
+    execdir = os.path.dirname(test.getExecPath())
     if useExternalSh:
         res = executeScript(test, litConfig, tmpBase, script, execdir)
     else:
@@ -536,16 +683,23 @@ def _runShTest(test, litConfig, useExternalSh,
     if isinstance(res, lit.Test.Result):
         return res
 
-    out,err,exitCode = res
+    out,err,exitCode,timeoutInfo = res
     if exitCode == 0:
         status = Test.PASS
     else:
-        status = Test.FAIL
+        if timeoutInfo == None:
+            status = Test.FAIL
+        else:
+            status = Test.TIMEOUT
 
     # Form the output log.
-    output = """Script:\n--\n%s\n--\nExit Code: %d\n\n""" % (
+    output = """Script:\n--\n%s\n--\nExit Code: %d\n""" % (
         '\n'.join(script), exitCode)
 
+    if timeoutInfo != None:
+        output += """Timeout: %s\n""" % (timeoutInfo,)
+    output += "\n"
+
     # Append the outputs, if present.
     if out:
         output += """Command Output (stdout):\n--\n%s\n--\n""" % (out,)
@@ -560,12 +714,28 @@ def executeShTest(test, litConfig, useExternalSh,
     if test.config.unsupported:
         return (Test.UNSUPPORTED, 'Test is unsupported')
 
-    res = parseIntegratedTestScript(test, useExternalSh, extra_substitutions)
-    if isinstance(res, lit.Test.Result):
-        return res
+    script = parseIntegratedTestScript(test)
+    if isinstance(script, lit.Test.Result):
+        return script
     if litConfig.noExecute:
         return lit.Test.Result(Test.PASS)
 
-    script, tmpBase, execdir = res
-    return _runShTest(test, litConfig, useExternalSh, script, tmpBase, execdir)
+    tmpDir, tmpBase = getTempPaths(test)
+    substitutions = list(extra_substitutions)
+    substitutions += getDefaultSubstitutions(test, tmpDir, tmpBase,
+                                             normalize_slashes=useExternalSh)
+    script = applySubstitutions(script, substitutions)
 
+    # Re-run failed tests up to test_retry_attempts times.
+    attempts = 1
+    if hasattr(test.config, 'test_retry_attempts'):
+        attempts += test.config.test_retry_attempts
+    for i in range(attempts):
+        res = _runShTest(test, litConfig, useExternalSh, script, tmpBase)
+        if res.code != Test.FAIL:
+            break
+    # If we had to run the test more than once, count it as a flaky pass. These
+    # will be printed separately in the test summary.
+    if i > 0 and res.code == Test.PASS:
+        res.code = Test.FLAKYPASS
+    return res
diff --git a/utils/lit/lit/TestingConfig.py b/utils/lit/lit/TestingConfig.py
index d49d0c0767f2..7441b94b9f08 100644
--- a/utils/lit/lit/TestingConfig.py
+++ b/utils/lit/lit/TestingConfig.py
@@ -24,7 +24,7 @@ class TestingConfig:
 
         pass_vars = ['LIBRARY_PATH', 'LD_LIBRARY_PATH', 'SYSTEMROOT', 'TERM',
                      'LD_PRELOAD', 'ASAN_OPTIONS', 'UBSAN_OPTIONS',
-                     'LSAN_OPTIONS', 'ADB', 'ADB_SERIAL']
+                     'LSAN_OPTIONS', 'ADB', 'ANDROID_SERIAL']
         for var in pass_vars:
             val = os.environ.get(var, '')
             # Check for empty string as some variables such as LD_PRELOAD cannot be empty
diff --git a/utils/lit/lit/formats/googletest.py b/utils/lit/lit/formats/googletest.py
index 3ce57917113a..5b19d4e638f9 100644
--- a/utils/lit/lit/formats/googletest.py
+++ b/utils/lit/lit/formats/googletest.py
@@ -109,8 +109,15 @@ class GoogleTest(TestFormat):
         if litConfig.noExecute:
             return lit.Test.PASS, ''
 
-        out, err, exitCode = lit.util.executeCommand(
-            cmd, env=test.config.environment)
+        try:
+            out, err, exitCode = lit.util.executeCommand(
+                cmd, env=test.config.environment,
+                timeout=litConfig.maxIndividualTestTime)
+        except lit.util.ExecuteCommandTimeoutException:
+            return (lit.Test.TIMEOUT,
+                    'Reached timeout of {} seconds'.format(
+                        litConfig.maxIndividualTestTime)
+                   )
 
         if exitCode:
             return lit.Test.FAIL, out + err
diff --git a/utils/lit/lit/main.py b/utils/lit/lit/main.py
index e3722674f63f..4df2571da998 100755
--- a/utils/lit/lit/main.py
+++ b/utils/lit/lit/main.py
@@ -43,6 +43,7 @@ class TestingProgressDisplay(object):
                                     test.getFullName())
 
         shouldShow = test.result.code.isFailure or \
+            self.opts.showAllOutput or \
             (not self.opts.quiet and not self.opts.succinct)
         if not shouldShow:
             return
@@ -56,9 +57,11 @@ class TestingProgressDisplay(object):
                                      self.completed, self.numTests))
 
         # Show the test failure output, if requested.
-        if test.result.code.isFailure and self.opts.showOutput:
-            print("%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
-                                              '*'*20))
+        if (test.result.code.isFailure and self.opts.showOutput) or \
+           self.opts.showAllOutput:
+            if test.result.code.isFailure:
+                print("%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
+                                                  '*'*20))
             print(test.result.output)
             print("*" * 20)
 
@@ -161,7 +164,10 @@ def main(builtinParameters = {}):
                      help="Reduce amount of output",
                      action="store_true", default=False)
     group.add_option("-v", "--verbose", dest="showOutput",
-                     help="Show all test output",
+                     help="Show test output for failures",
+                     action="store_true", default=False)
+    group.add_option("-a", "--show-all", dest="showAllOutput",
+                     help="Display all commandlines and output",
                      action="store_true", default=False)
     group.add_option("-o", "--output", dest="output_path",
                      help="Write test results to the provided path",
@@ -199,6 +205,10 @@ def main(builtinParameters = {}):
     group.add_option("", "--xunit-xml-output", dest="xunit_output_file",
                       help=("Write XUnit-compatible XML test reports to the"
                             " specified file"), default=None)
+    group.add_option("", "--timeout", dest="maxIndividualTestTime",
+                     help="Maximum time to spend running a single test (in seconds)."
+                     "0 means no time limit. [Default: 0]",
+                    type=int, default=None)
     parser.add_option_group(group)
 
     group = OptionGroup(parser, "Test Selection")
@@ -269,6 +279,14 @@ def main(builtinParameters = {}):
             name,val = entry.split('=', 1)
         userParams[name] = val
 
+    # Decide what the requested maximum indvidual test time should be
+    if opts.maxIndividualTestTime != None:
+        maxIndividualTestTime = opts.maxIndividualTestTime
+    else:
+        # Default is zero
+        maxIndividualTestTime = 0
+
+
     # Create the global config object.
     litConfig = lit.LitConfig.LitConfig(
         progname = os.path.basename(sys.argv[0]),
@@ -281,12 +299,26 @@ def main(builtinParameters = {}):
         debug = opts.debug,
         isWindows = isWindows,
         params = userParams,
-        config_prefix = opts.configPrefix)
+        config_prefix = opts.configPrefix,
+        maxIndividualTestTime = maxIndividualTestTime)
 
     # Perform test discovery.
     run = lit.run.Run(litConfig,
                       lit.discovery.find_tests_for_inputs(litConfig, inputs))
 
+    # After test discovery the configuration might have changed
+    # the maxIndividualTestTime. If we explicitly set this on the
+    # command line then override what was set in the test configuration
+    if opts.maxIndividualTestTime != None:
+        if opts.maxIndividualTestTime != litConfig.maxIndividualTestTime:
+            litConfig.note(('The test suite configuration requested an individual'
+                ' test timeout of {0} seconds but a timeout of {1} seconds was'
+                ' requested on the command line. Forcing timeout to be {1}'
+                ' seconds')
+                .format(litConfig.maxIndividualTestTime,
+                        opts.maxIndividualTestTime))
+            litConfig.maxIndividualTestTime = opts.maxIndividualTestTime
+
     if opts.showSuites or opts.showTests:
         # Aggregate the tests by suite.
         suitesAndTests = {}
@@ -344,12 +376,33 @@ def main(builtinParameters = {}):
     # Don't create more threads than tests.
     opts.numThreads = min(len(run.tests), opts.numThreads)
 
+    # Because some tests use threads internally, and at least on Linux each
+    # of these threads counts toward the current process limit, try to
+    # raise the (soft) process limit so that tests don't fail due to
+    # resource exhaustion.
+    try:
+        cpus = lit.util.detectCPUs()
+        desired_limit = opts.numThreads * cpus * 2 # the 2 is a safety factor
+
+        # Import the resource module here inside this try block because it
+        # will likely fail on Windows.
+        import resource
+
+        max_procs_soft, max_procs_hard = resource.getrlimit(resource.RLIMIT_NPROC)
+        desired_limit = min(desired_limit, max_procs_hard)
+
+        if max_procs_soft < desired_limit:
+            resource.setrlimit(resource.RLIMIT_NPROC, (desired_limit, max_procs_hard))
+            litConfig.note('raised the process limit from %d to %d' % \
+                               (max_procs_soft, desired_limit))
+    except:
+        pass
+
     extra = ''
     if len(run.tests) != numTotalTests:
         extra = ' of %d' % numTotalTests
     header = '-- Testing: %d%s tests, %d threads --'%(len(run.tests), extra,
                                                       opts.numThreads)
-
     progressBar = None
     if not opts.quiet:
         if opts.succinct and opts.useProgressBar:
@@ -394,7 +447,8 @@ def main(builtinParameters = {}):
                        ('Failing Tests', lit.Test.FAIL),
                        ('Unresolved Tests', lit.Test.UNRESOLVED),
                        ('Unsupported Tests', lit.Test.UNSUPPORTED),
-                       ('Expected Failing Tests', lit.Test.XFAIL)):
+                       ('Expected Failing Tests', lit.Test.XFAIL),
+                       ('Timed Out Tests', lit.Test.TIMEOUT)):
         if (lit.Test.XFAIL == code and not opts.show_xfail) or \
            (lit.Test.UNSUPPORTED == code and not opts.show_unsupported):
             continue
@@ -414,11 +468,13 @@ def main(builtinParameters = {}):
         lit.util.printHistogram(test_times, title='Tests')
 
     for name,code in (('Expected Passes    ', lit.Test.PASS),
+                      ('Passes With Retry  ', lit.Test.FLAKYPASS),
                       ('Expected Failures  ', lit.Test.XFAIL),
                       ('Unsupported Tests  ', lit.Test.UNSUPPORTED),
                       ('Unresolved Tests   ', lit.Test.UNRESOLVED),
                       ('Unexpected Passes  ', lit.Test.XPASS),
-                      ('Unexpected Failures', lit.Test.FAIL)):
+                      ('Unexpected Failures', lit.Test.FAIL),
+                      ('Individual Timeouts', lit.Test.TIMEOUT)):
         if opts.quiet and not code.isFailure:
             continue
         N = len(byCode.get(code,[]))
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index 08f7b71ae210..a6e8d52c0754 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -6,6 +6,7 @@ import platform
 import signal
 import subprocess
 import sys
+import threading
 
 def to_bytes(str):
     # Encode to UTF-8 to get binary data.
@@ -39,7 +40,9 @@ def detectCPUs():
     if "NUMBER_OF_PROCESSORS" in os.environ:
         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
         if ncpus > 0:
-            return ncpus
+            # With more than 32 processes, process creation often fails with
+            # "Too many open files".  FIXME: Check if there's a better fix.
+            return min(ncpus, 32)
     return 1 # Default
 
 def mkdir_p(path):
@@ -48,7 +51,7 @@ def mkdir_p(path):
     if not path or os.path.exists(path):
         return
 
-    parent = os.path.dirname(path) 
+    parent = os.path.dirname(path)
     if parent != path:
         mkdir_p(parent)
 
@@ -94,7 +97,7 @@ def which(command, paths = None):
     for path in paths.split(os.pathsep):
         for ext in pathext:
             p = os.path.join(path, command + ext)
-            if os.path.exists(p):
+            if os.path.exists(p) and not os.path.isdir(p):
                 return p
 
     return None
@@ -155,26 +158,83 @@ def printHistogram(items, title = 'Items'):
             pDigits, pfDigits, i*barH, pDigits, pfDigits, (i+1)*barH,
             '*'*w, ' '*(barW-w), cDigits, len(row), cDigits, len(items)))
 
+class ExecuteCommandTimeoutException(Exception):
+    def __init__(self, msg, out, err, exitCode):
+        assert isinstance(msg, str)
+        assert isinstance(out, str)
+        assert isinstance(err, str)
+        assert isinstance(exitCode, int)
+        self.msg = msg
+        self.out = out
+        self.err = err
+        self.exitCode = exitCode
+
 # Close extra file handles on UNIX (on Windows this cannot be done while
 # also redirecting input).
 kUseCloseFDs = not (platform.system() == 'Windows')
-def executeCommand(command, cwd=None, env=None):
+def executeCommand(command, cwd=None, env=None, input=None, timeout=0):
+    """
+        Execute command ``command`` (list of arguments or string)
+        with
+        * working directory ``cwd`` (str), use None to use the current
+          working directory
+        * environment ``env`` (dict), use None for none
+        * Input to the command ``input`` (str), use string to pass
+          no input.
+        * Max execution time ``timeout`` (int) seconds. Use 0 for no timeout.
+
+        Returns a tuple (out, err, exitCode) where
+        * ``out`` (str) is the standard output of running the command
+        * ``err`` (str) is the standard error of running the command
+        * ``exitCode`` (int) is the exitCode of running the command
+
+        If the timeout is hit an ``ExecuteCommandTimeoutException``
+        is raised.
+    """
     p = subprocess.Popen(command, cwd=cwd,
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE,
                          env=env, close_fds=kUseCloseFDs)
-    out,err = p.communicate()
-    exitCode = p.wait()
+    timerObject = None
+    # FIXME: Because of the way nested function scopes work in Python 2.x we
+    # need to use a reference to a mutable object rather than a plain
+    # bool. In Python 3 we could use the "nonlocal" keyword but we need
+    # to support Python 2 as well.
+    hitTimeOut = [False]
+    try:
+        if timeout > 0:
+            def killProcess():
+                # We may be invoking a shell so we need to kill the
+                # process and all its children.
+                hitTimeOut[0] = True
+                killProcessAndChildren(p.pid)
 
-    # Detect Ctrl-C in subprocess.
-    if exitCode == -signal.SIGINT:
-        raise KeyboardInterrupt
+            timerObject = threading.Timer(timeout, killProcess)
+            timerObject.start()
+
+        out,err = p.communicate(input=input)
+        exitCode = p.wait()
+    finally:
+        if timerObject != None:
+            timerObject.cancel()
 
     # Ensure the resulting output is always of string type.
     out = convert_string(out)
     err = convert_string(err)
 
+    if hitTimeOut[0]:
+        raise ExecuteCommandTimeoutException(
+            msg='Reached timeout of {} seconds'.format(timeout),
+            out=out,
+            err=err,
+            exitCode=exitCode
+            )
+
+    # Detect Ctrl-C in subprocess.
+    if exitCode == -signal.SIGINT:
+        raise KeyboardInterrupt
+
     return out, err, exitCode
 
 def usePlatformSdkOnDarwin(config, lit_config):
@@ -193,3 +253,25 @@ def usePlatformSdkOnDarwin(config, lit_config):
             sdk_path = out
             lit_config.note('using SDKROOT: %r' % sdk_path)
             config.environment['SDKROOT'] = sdk_path
+
+def killProcessAndChildren(pid):
+    """
+    This function kills a process with ``pid`` and all its
+    running children (recursively). It is currently implemented
+    using the psutil module which provides a simple platform
+    neutral implementation.
+
+    TODO: Reimplement this without using psutil so we can
+          remove our dependency on it.
+    """
+    import psutil
+    try:
+        psutilProc = psutil.Process(pid)
+        for child in psutilProc.children(recursive=True):
+            try:
+                child.kill()
+            except psutil.NoSuchProcess:
+                pass
+        psutilProc.kill()
+    except psutil.NoSuchProcess:
+        pass
diff --git a/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest b/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest
new file mode 100755
index 000000000000..f3a90ff4cd67
--- /dev/null
+++ b/utils/lit/tests/Inputs/googletest-timeout/DummySubDir/OneTest
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+import sys
+import time
+
+if len(sys.argv) != 2:
+    raise ValueError("unexpected number of args")
+
+if sys.argv[1] == "--gtest_list_tests":
+    print("""\
+FirstTest.
+  subTestA
+  subTestB
+  subTestC
+""")
+    sys.exit(0)
+elif not sys.argv[1].startswith("--gtest_filter="):
+    raise ValueError("unexpected argument: %r" % (sys.argv[1]))
+
+test_name = sys.argv[1].split('=',1)[1]
+if test_name == 'FirstTest.subTestA':
+    print('I am subTest A, I PASS')
+    print('[  PASSED  ] 1 test.')
+    sys.exit(0)
+elif test_name == 'FirstTest.subTestB':
+    print('I am subTest B, I am slow')
+    time.sleep(6)
+    print('[  PASSED  ] 1 test.')
+    sys.exit(0)
+elif test_name == 'FirstTest.subTestC':
+    print('I am subTest C, I will hang')
+    while True:
+        pass
+else:
+    raise SystemExit("error: invalid test name: %r" % (test_name,))
diff --git a/utils/lit/tests/Inputs/googletest-timeout/lit.cfg b/utils/lit/tests/Inputs/googletest-timeout/lit.cfg
new file mode 100644
index 000000000000..bf8a4db2bf90
--- /dev/null
+++ b/utils/lit/tests/Inputs/googletest-timeout/lit.cfg
@@ -0,0 +1,9 @@
+import lit.formats
+config.name = 'googletest-timeout'
+config.test_format = lit.formats.GoogleTest('DummySubDir', 'Test')
+
+configSetTimeout = lit_config.params.get('set_timeout', '0')
+
+if configSetTimeout == '1':
+    # Try setting the max individual test time in the configuration
+    lit_config.maxIndividualTestTime = 1
diff --git a/utils/lit/tests/Inputs/shtest-timeout/infinite_loop.py b/utils/lit/tests/Inputs/shtest-timeout/infinite_loop.py
new file mode 100644
index 000000000000..55720479d333
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-timeout/infinite_loop.py
@@ -0,0 +1,10 @@
+# RUN: %{python} %s
+from __future__ import print_function
+
+import time
+import sys
+
+print("Running infinite loop")
+sys.stdout.flush() # Make sure the print gets flushed so it appears in lit output.
+while True:
+    pass
diff --git a/utils/lit/tests/Inputs/shtest-timeout/lit.cfg b/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
new file mode 100644
index 000000000000..81b4a12120d8
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-timeout/lit.cfg
@@ -0,0 +1,32 @@
+# -*- Python -*-
+import os
+import sys
+
+import lit.formats
+
+config.name = 'per_test_timeout'
+
+shellType = lit_config.params.get('external', '1')
+
+if shellType == '0':
+    lit_config.note('Using internal shell')
+    externalShell = False
+else:
+    lit_config.note('Using external shell')
+    externalShell = True
+
+configSetTimeout = lit_config.params.get('set_timeout', '0')
+
+if configSetTimeout == '1':
+    # Try setting the max individual test time in the configuration
+    lit_config.maxIndividualTestTime = 1
+
+config.test_format = lit.formats.ShTest(execute_external=externalShell)
+config.suffixes = ['.py']
+
+config.test_source_root = os.path.dirname(__file__)
+config.test_exec_root = config.test_source_root
+config.target_triple = '(unused)'
+src_root = os.path.join(config.test_source_root, '..')
+config.environment['PYTHONPATH'] = src_root
+config.substitutions.append(('%{python}', sys.executable))
diff --git a/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py b/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py
new file mode 100644
index 000000000000..b81fbe5a8bfe
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-timeout/quick_then_slow.py
@@ -0,0 +1,24 @@
+# RUN: %{python} %s quick
+# RUN: %{python} %s slow
+from __future__ import print_function
+
+import time
+import sys
+
+if len(sys.argv) != 2:
+    print("Wrong number of args")
+    sys.exit(1)
+
+mode =  sys.argv[1]
+
+if mode == 'slow':
+    print("Running in slow mode")
+    sys.stdout.flush() # Make sure the print gets flushed so it appears in lit output.
+    time.sleep(6)
+    sys.exit(0)
+elif mode == 'quick':
+    print("Running in quick mode")
+    sys.exit(0)
+else:
+    print("Unrecognised mode {}".format(mode))
+    sys.exit(1)
diff --git a/utils/lit/tests/Inputs/shtest-timeout/short.py b/utils/lit/tests/Inputs/shtest-timeout/short.py
new file mode 100644
index 000000000000..424b7092d839
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-timeout/short.py
@@ -0,0 +1,6 @@
+# RUN: %{python} %s
+from __future__ import print_function
+
+import sys
+
+print("short program")
diff --git a/utils/lit/tests/Inputs/shtest-timeout/slow.py b/utils/lit/tests/Inputs/shtest-timeout/slow.py
new file mode 100644
index 000000000000..2dccd6331360
--- /dev/null
+++ b/utils/lit/tests/Inputs/shtest-timeout/slow.py
@@ -0,0 +1,9 @@
+# RUN: %{python} %s
+from __future__ import print_function
+
+import time
+import sys
+
+print("Running slow program")
+sys.stdout.flush() # Make sure the print gets flushed so it appears in lit output.
+time.sleep(6)
diff --git a/utils/lit/tests/discovery.py b/utils/lit/tests/discovery.py
index 60f67dcd740e..55e54088b587 100644
--- a/utils/lit/tests/discovery.py
+++ b/utils/lit/tests/discovery.py
@@ -7,8 +7,8 @@
 # RUN: FileCheck --check-prefix=CHECK-BASIC-ERR < %t.err %s
 #
 # CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/lit.cfg'
-# CHECK-BASIC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
-# CHECK-BASIC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-BASIC-ERR-DAG: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
+# CHECK-BASIC-ERR-DAG: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
 #
 # CHECK-BASIC-OUT: -- Test Suites --
 # CHECK-BASIC-OUT:   sub-suite - 2 tests
@@ -51,8 +51,8 @@
 # CHECK-ASEXEC-ERR: load_config from '{{.*}}/discovery/lit.cfg'
 # CHECK-ASEXEC-ERR: loaded config '{{.*}}/discovery/lit.cfg'
 # CHECK-ASEXEC-ERR: loaded config '{{.*}}/exec-discovery/lit.site.cfg'
-# CHECK-ASEXEC-ERR: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
-# CHECK-ASEXEC-ERR: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
+# CHECK-ASEXEC-ERR-DAG: loading suite config '{{.*}}/discovery/subsuite/lit.cfg'
+# CHECK-ASEXEC-ERR-DAG: loading local config '{{.*}}/discovery/subdir/lit.local.cfg'
 #
 # CHECK-ASEXEC-OUT: -- Test Suites --
 # CHECK-ASEXEC-OUT:   sub-suite - 2 tests
diff --git a/utils/lit/tests/googletest-timeout.py b/utils/lit/tests/googletest-timeout.py
new file mode 100644
index 000000000000..46acf32b3a61
--- /dev/null
+++ b/utils/lit/tests/googletest-timeout.py
@@ -0,0 +1,29 @@
+# REQUIRES: python-psutil
+
+# Check that the per test timeout is enforced when running GTest tests.
+#
+# RUN: not %{lit} -j 1 -v %{inputs}/googletest-timeout --timeout=1 > %t.cmd.out
+# RUN: FileCheck < %t.cmd.out %s
+
+# Check that the per test timeout is enforced when running GTest tests via
+# the configuration file
+#
+# RUN: not %{lit} -j 1 -v %{inputs}/googletest-timeout \
+# RUN: --param set_timeout=1 > %t.cfgset.out 2> %t.cfgset.err
+# RUN: FileCheck < %t.cfgset.out %s
+
+# CHECK: -- Testing:
+# CHECK: PASS: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestA
+# CHECK: TIMEOUT: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestB
+# CHECK: TIMEOUT: googletest-timeout :: DummySubDir/OneTest/FirstTest.subTestC
+# CHECK: Expected Passes    : 1
+# CHECK: Individual Timeouts: 2
+
+# Test per test timeout via a config file and on the command line.
+# The value set on the command line should override the config file.
+# RUN: not %{lit} -j 1 -v %{inputs}/googletest-timeout \
+# RUN: --param set_timeout=1 --timeout=2 > %t.cmdover.out 2> %t.cmdover.err
+# RUN: FileCheck < %t.cmdover.out %s
+# RUN: FileCheck --check-prefix=CHECK-CMDLINE-OVERRIDE-ERR < %t.cmdover.err %s
+
+# CHECK-CMDLINE-OVERRIDE-ERR: Forcing timeout to be 2 seconds
diff --git a/utils/lit/tests/lit.cfg b/utils/lit/tests/lit.cfg
index 2111b72748b5..4b38241d5a7d 100644
--- a/utils/lit/tests/lit.cfg
+++ b/utils/lit/tests/lit.cfg
@@ -43,3 +43,12 @@ if lit_config.params.get('check-coverage', None):
 # Add a feature to detect the Python version.
 config.available_features.add("python%d.%d" % (sys.version_info[0],
                                                   sys.version_info[1]))
+
+# Add a feature to detect if psutil is available
+try:
+    import psutil
+    lit_config.note('Found python psutil module')
+    config.available_features.add("python-psutil")
+except ImportError:
+    lit_config.warning('Could not import psutil. Some tests will be skipped and'
+                       ' the --timeout command line argument will not work.')
diff --git a/utils/lit/tests/shtest-timeout.py b/utils/lit/tests/shtest-timeout.py
new file mode 100644
index 000000000000..e6b2947a4f7f
--- /dev/null
+++ b/utils/lit/tests/shtest-timeout.py
@@ -0,0 +1,116 @@
+# REQUIRES: python-psutil
+
+# Test per test timeout using external shell
+# RUN: not %{lit} \
+# RUN: %{inputs}/shtest-timeout/infinite_loop.py \
+# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
+# RUN: %{inputs}/shtest-timeout/short.py \
+# RUN: %{inputs}/shtest-timeout/slow.py \
+# RUN: -j 1 -v --debug --timeout 1 --param external=1 > %t.extsh.out 2> %t.extsh.err
+# RUN: FileCheck --check-prefix=CHECK-OUT-COMMON < %t.extsh.out %s
+# RUN: FileCheck --check-prefix=CHECK-EXTSH-ERR < %t.extsh.err %s
+#
+# CHECK-EXTSH-ERR: Using external shell
+
+# Test per test timeout using internal shell
+# RUN: not %{lit} \
+# RUN: %{inputs}/shtest-timeout/infinite_loop.py \
+# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
+# RUN: %{inputs}/shtest-timeout/short.py \
+# RUN: %{inputs}/shtest-timeout/slow.py \
+# RUN: -j 1 -v --debug --timeout 1 --param external=0 > %t.intsh.out 2> %t.intsh.err
+# RUN: FileCheck  --check-prefix=CHECK-OUT-COMMON < %t.intsh.out %s
+# RUN: FileCheck --check-prefix=CHECK-INTSH-OUT < %t.intsh.out %s
+# RUN: FileCheck --check-prefix=CHECK-INTSH-ERR < %t.intsh.err %s
+#
+# CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: infinite_loop.py
+# CHECK-INTSH-OUT: Command 0 Reached Timeout: True
+# CHECK-INTSH-OUT: Command 0 Output:
+# CHECK-INTSH-OUT-NEXT: Running infinite loop
+
+
+# CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: quick_then_slow.py
+# CHECK-INTSH-OUT: Timeout: Reached timeout of 1 seconds
+# CHECK-INTSH-OUT: Command Output
+# CHECK-INTSH-OUT: Command 0 Reached Timeout: False
+# CHECK-INTSH-OUT: Command 0 Output:
+# CHECK-INTSH-OUT-NEXT: Running in quick mode
+# CHECK-INTSH-OUT: Command 1 Reached Timeout: True
+# CHECK-INTSH-OUT: Command 1 Output:
+# CHECK-INTSH-OUT-NEXT: Running in slow mode
+
+# CHECK-INTSH-OUT: TIMEOUT: per_test_timeout :: slow.py
+# CHECK-INTSH-OUT: Command 0 Reached Timeout: True
+# CHECK-INTSH-OUT: Command 0 Output:
+# CHECK-INTSH-OUT-NEXT: Running slow program
+
+# CHECK-INTSH-ERR: Using internal shell
+
+# Test per test timeout set via a config file rather than on the command line
+# RUN: not %{lit} \
+# RUN: %{inputs}/shtest-timeout/infinite_loop.py \
+# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
+# RUN: %{inputs}/shtest-timeout/short.py \
+# RUN: %{inputs}/shtest-timeout/slow.py \
+# RUN: -j 1 -v --debug --param external=0 \
+# RUN: --param set_timeout=1 > %t.cfgset.out 2> %t.cfgset.err
+# RUN: FileCheck --check-prefix=CHECK-OUT-COMMON  < %t.cfgset.out %s
+# RUN: FileCheck --check-prefix=CHECK-CFGSET-ERR < %t.cfgset.err %s
+#
+# CHECK-CFGSET-ERR: Using internal shell
+
+# CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: infinite_loop.py
+# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
+# CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
+# CHECK-OUT-COMMON: Running infinite loop
+
+# CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: quick_then_slow.py
+# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
+# CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
+# CHECK-OUT-COMMON: Running in quick mode
+# CHECK-OUT-COMMON: Running in slow mode
+
+# CHECK-OUT-COMMON: PASS: per_test_timeout :: short.py
+
+# CHECK-OUT-COMMON: TIMEOUT: per_test_timeout :: slow.py
+# CHECK-OUT-COMMON: Timeout: Reached timeout of 1 seconds
+# CHECK-OUT-COMMON: Command {{([0-9]+ )?}}Output
+# CHECK-OUT-COMMON: Running slow program
+
+# CHECK-OUT-COMMON: Expected Passes{{ *}}: 1
+# CHECK-OUT-COMMON: Individual Timeouts{{ *}}: 3
+
+# Test per test timeout via a config file and on the command line.
+# The value set on the command line should override the config file.
+# RUN: not %{lit} \
+# RUN: %{inputs}/shtest-timeout/infinite_loop.py \
+# RUN: %{inputs}/shtest-timeout/quick_then_slow.py \
+# RUN: %{inputs}/shtest-timeout/short.py \
+# RUN: %{inputs}/shtest-timeout/slow.py \
+# RUN: -j 1 -v --debug --param external=0 \
+# RUN: --param set_timeout=1 --timeout=2 > %t.cmdover.out 2> %t.cmdover.err
+# RUN: FileCheck --check-prefix=CHECK-CMDLINE-OVERRIDE-OUT  < %t.cmdover.out %s
+# RUN: FileCheck --check-prefix=CHECK-CMDLINE-OVERRIDE-ERR < %t.cmdover.err %s
+
+# CHECK-CMDLINE-OVERRIDE-ERR: Forcing timeout to be 2 seconds
+
+# CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: infinite_loop.py
+# CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
+# CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
+# CHECK-CMDLINE-OVERRIDE-OUT: Running infinite loop
+
+# CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: quick_then_slow.py
+# CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
+# CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
+# CHECK-CMDLINE-OVERRIDE-OUT: Running in quick mode
+# CHECK-CMDLINE-OVERRIDE-OUT: Running in slow mode
+
+# CHECK-CMDLINE-OVERRIDE-OUT: PASS: per_test_timeout :: short.py
+
+# CHECK-CMDLINE-OVERRIDE-OUT: TIMEOUT: per_test_timeout :: slow.py
+# CHECK-CMDLINE-OVERRIDE-OUT: Timeout: Reached timeout of 2 seconds
+# CHECK-CMDLINE-OVERRIDE-OUT: Command {{([0-9]+ )?}}Output
+# CHECK-CMDLINE-OVERRIDE-OUT: Running slow program
+
+# CHECK-CMDLINE-OVERRIDE-OUT: Expected Passes{{ *}}: 1
+# CHECK-CMDLINE-OVERRIDE-OUT: Individual Timeouts{{ *}}: 3
diff --git a/utils/llvm-build/llvmbuild/main.py b/utils/llvm-build/llvmbuild/main.py
index 353741fdbf85..f2472f698a67 100644
--- a/utils/llvm-build/llvmbuild/main.py
+++ b/utils/llvm-build/llvmbuild/main.py
@@ -411,11 +411,15 @@ subdirectories = %s
         f.write('} AvailableComponents[%d] = {\n' % len(entries))
         for name,library_name,required_names,is_installed in entries:
             if library_name is None:
-                library_name_as_cstr = '0'
+                library_name_as_cstr = 'nullptr'
             else:
                 library_name_as_cstr = '"lib%s.a"' % library_name
-            f.write('  { "%s", %s, %d, { %s } },\n' % (
-                name, library_name_as_cstr, is_installed,
+            if is_installed:
+                is_installed_as_cstr = 'true'
+            else:
+                is_installed_as_cstr = 'false'
+            f.write('  { "%s", %s, %s, { %s } },\n' % (
+                name, library_name_as_cstr, is_installed_as_cstr,
                 ', '.join('"%s"' % dep
                           for dep in required_names)))
         f.write('};\n')
@@ -501,6 +505,33 @@ subdirectories = %s
             if (path.startswith(self.source_root) and os.path.exists(path)):
                 yield path
 
+    def foreach_cmake_library(self, f,
+                              enabled_optional_components,
+                              skip_disabled,
+                              skip_not_installed):
+        for ci in self.ordered_component_infos:
+            # Skip optional components which are not enabled.
+            if ci.type_name == 'OptionalLibrary' \
+                and ci.name not in enabled_optional_components:
+                continue
+
+            # We only write the information for libraries currently.
+            if ci.type_name not in ('Library', 'OptionalLibrary'):
+                continue
+
+            # Skip disabled targets.
+            if skip_disabled:
+                tg = ci.get_parent_target_group()
+                if tg and not tg.enabled:
+                    continue
+
+            # Skip targets that will not be installed
+            if skip_not_installed and not ci.installed:
+                continue
+
+            f(ci)
+
+
     def write_cmake_fragment(self, output_path, enabled_optional_components):
         """
         write_cmake_fragment(output_path) -> None
@@ -569,21 +600,18 @@ configure_file(\"%s\"
 # The following property assignments effectively create a map from component
 # names to required libraries, in a way that is easily accessed from CMake.
 """)
-        for ci in self.ordered_component_infos:
-            # Skip optional components which are not enabled.
-            if ci.type_name == 'OptionalLibrary' \
-                and ci.name not in enabled_optional_components:
-                continue
-
-            # We only write the information for certain components currently.
-            if ci.type_name not in ('Library', 'OptionalLibrary'):
-                continue
-
-            f.write("""\
+        self.foreach_cmake_library(
+            lambda ci:
+              f.write("""\
 set_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_%s %s)\n""" % (
                 ci.get_prefixed_library_name(), " ".join(sorted(
                      dep.get_prefixed_library_name()
                      for dep in self.get_required_libraries_for_component(ci)))))
+            ,
+            enabled_optional_components,
+            skip_disabled = False,
+            skip_not_installed = False # Dependency info must be emitted for internals libs too
+            )
 
         f.close()
 
@@ -608,30 +636,22 @@ set_property(GLOBAL PROPERTY LLVMBUILD_LIB_DEPS_%s %s)\n""" % (
 # The following property assignments tell CMake about link
 # dependencies of libraries imported from LLVM.
 """)
-        for ci in self.ordered_component_infos:
-            # Skip optional components which are not enabled.
-            if ci.type_name == 'OptionalLibrary' \
-                and ci.name not in enabled_optional_components:
-                continue
-
-            # We only write the information for libraries currently.
-            if ci.type_name not in ('Library', 'OptionalLibrary'):
-                continue
-
-            # Skip disabled targets.
-            tg = ci.get_parent_target_group()
-            if tg and not tg.enabled:
-                continue
-
-            f.write("""\
+        self.foreach_cmake_library(
+            lambda ci:
+              f.write("""\
 set_property(TARGET %s PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES %s)\n""" % (
                 ci.get_prefixed_library_name(), " ".join(sorted(
                      dep.get_prefixed_library_name()
                      for dep in self.get_required_libraries_for_component(ci)))))
+            ,
+            enabled_optional_components,
+            skip_disabled = True,
+            skip_not_installed = True # Do not export internal libraries like gtest
+            )
 
         f.close()
 
-    def write_make_fragment(self, output_path):
+    def write_make_fragment(self, output_path, enabled_optional_components):
         """
         write_make_fragment(output_path) -> None
 
@@ -697,6 +717,19 @@ set_property(TARGET %s PROPERTY IMPORTED_LINK_INTERFACE_LIBRARIES %s)\n""" % (
             f.write("%s:\n" % (mk_quote_string_for_target(dep),))
         f.write('endif\n')
 
+        f.write("""
+# List of libraries to be exported for use by applications.
+# See 'cmake/modules/Makefile'.
+LLVM_LIBS_TO_EXPORT :=""")
+        self.foreach_cmake_library(
+            lambda ci:
+                f.write(' \\\n  %s' % ci.get_prefixed_library_name())
+            ,
+            enabled_optional_components,
+            skip_disabled = True,
+            skip_not_installed = True # Do not export internal libraries like gtest
+            )
+        f.write('\n')
         f.close()
 
 def add_magic_target_components(parser, project, opts):
@@ -920,7 +953,8 @@ given by --build-root) at the same SUBPATH""",
 
     # Write out the make fragment, if requested.
     if opts.write_make_fragment:
-        project_info.write_make_fragment(opts.write_make_fragment)
+        project_info.write_make_fragment(opts.write_make_fragment,
+                                         opts.optional_components)
 
     # Write out the cmake fragment, if requested.
     if opts.write_cmake_fragment:
diff --git a/utils/release/build_llvm_package.bat b/utils/release/build_llvm_package.bat
new file mode 100755
index 000000000000..830f25e5cf38
--- /dev/null
+++ b/utils/release/build_llvm_package.bat
@@ -0,0 +1,93 @@
+@echo off
+setlocal
+
+REM Script for building the LLVM installer on Windows,
+REM used for the the weekly snapshots at http://www.llvm.org/builds.
+REM
+REM Usage: build_llvm_package.bat <revision>
+
+REM Prerequisites:
+REM
+REM   Visual Studio 2013, CMake, Ninja, SVN, GNUWin32,
+REM   NSIS with the strlen_8192 patch,
+REM   Visual Studio 2013 SDK (for the clang-format plugin).
+
+
+REM You may need to modify the paths below:
+set vcdir=c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC
+set PATH=%PATH%;c:\gnuwin32\bin
+
+set revision=%1
+set branch=trunk
+set package_version=3.8.0-r%revision%
+set clang_format_vs_version=3.8.0.%revision%
+set build_dir=llvm_package_%revision%
+
+echo Branch: %branch%
+echo Revision: %revision%
+echo Package version: %package_version%
+echo Clang format plugin version: %clang_format_vs_version%
+echo Build dir: %build_dir%
+echo.
+pause
+
+mkdir %build_dir%
+cd %build_dir%
+
+echo Checking out %branch% at r%revision%...
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/llvm/%branch% llvm || exit /b
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/cfe/%branch% llvm/tools/clang || exit /b
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/clang-tools-extra/%branch% llvm/tools/clang/tools/extra || exit /b
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/lld/%branch% llvm/tools/lld || exit /b
+svn.exe export -r %revision% http://llvm.org/svn/llvm-project/compiler-rt/%branch% llvm/projects/compiler-rt || exit /b
+
+
+set cmake_flags=-DCMAKE_BUILD_TYPE=Release -DLLVM_ENABLE_ASSERTIONS=ON -DLLVM_INSTALL_TOOLCHAIN_ONLY=ON -DLLVM_USE_CRT_RELEASE=MT -DCLANG_FORMAT_VS_VERSION=%clang_format_vs_version% -DPACKAGE_VERSION=%package_version%
+
+REM TODO: Run all tests, including lld and compiler-rt.
+
+call "%vcdir%/vcvarsall.bat" x86
+set CC=
+set CXX=
+mkdir build32_stage0
+cd build32_stage0
+cmake -GNinja %cmake_flags% ..\llvm || exit /b
+ninja all || exit /b
+ninja check || exit /b
+ninja check-clang || exit /b
+cd..
+
+mkdir build32
+cd build32
+set CC=..\build32_stage0\bin\clang-cl
+set CXX=..\build32_stage0\bin\clang-cl
+cmake -GNinja %cmake_flags% -DBUILD_CLANG_FORMAT_VS_PLUGIN=ON ..\llvm || exit /b
+ninja all || exit /b
+ninja check || exit /b
+ninja check-clang || exit /b
+copy ..\llvm\tools\clang\tools\clang-format-vs\ClangFormat\bin\Release\ClangFormat.vsix ClangFormat-r%revision%.vsix
+ninja package || exit /b
+cd ..
+
+
+call "%vcdir%/vcvarsall.bat" amd64
+set CC=
+set CXX=
+mkdir build64_stage0
+cd build64_stage0
+cmake -GNinja %cmake_flags%  ..\llvm || exit /b
+ninja all || exit /b
+ninja check || exit /b
+ninja check-clang || exit /b
+cd..
+
+mkdir build64
+cd build64
+set CC=..\build64_stage0\bin\clang-cl
+set CXX=..\build64_stage0\bin\clang-cl
+cmake -GNinja %cmake_flags% ..\llvm || exit /b
+ninja all || exit /b
+ninja check || exit /b
+ninja check-clang || exit /b
+ninja package || exit /b
+cd ..
diff --git a/utils/release/merge.sh b/utils/release/merge.sh
index 949c29837813..93e08c7ce135 100755
--- a/utils/release/merge.sh
+++ b/utils/release/merge.sh
@@ -16,11 +16,13 @@ set -e
 
 rev=""
 proj=""
+revert="no"
 
 function usage() {
     echo "usage: `basename $0` [OPTIONS]"
     echo "  -proj PROJECT  The project to merge the result into"
     echo "  -rev NUM       The revision to merge into the project"
+    echo "  -revert        Revert rather than merge the commit"
 }
 
 while [ $# -gt 0 ]; do
@@ -36,6 +38,9 @@ while [ $# -gt 0 ]; do
         -h | -help | --help )
             usage
             ;;
+        -revert | --revert )
+            revert="yes"
+            ;;
         * )
             echo "unknown option: $1"
             echo ""
@@ -60,17 +65,27 @@ fi
 
 tempfile=`mktemp /tmp/merge.XXXXXX` || exit 1
 
-echo "Merging r$rev:" > $tempfile
+if [ $revert = "yes" ]; then
+    echo "Reverting r$rev:" > $tempfile
+else
+    echo "Merging r$rev:" > $tempfile
+fi
 svn log -c $rev http://llvm.org/svn/llvm-project/$proj/trunk >> $tempfile 2>&1
 
 cd $proj.src
 echo "# Updating tree"
 svn up
-echo "# Merging r$rev into $proj locally"
-svn merge -c $rev https://llvm.org/svn/llvm-project/$proj/trunk . || exit 1
+
+if [ $revert = "yes" ]; then
+    echo "# Reverting r$rev in $proj locally"
+    svn merge -c -$rev . || exit 1
+else
+    echo "# Merging r$rev into $proj locally"
+    svn merge -c $rev https://llvm.org/svn/llvm-project/$proj/trunk . || exit 1
+fi
 
 echo
-echo "# To commit the merge, run the following in $proj.src/:"
+echo "# To commit, run the following in $proj.src/:"
 echo svn commit -F $tempfile
 
 exit 0
diff --git a/utils/release/test-release.sh b/utils/release/test-release.sh
index 568c097badd3..bb1f7869467a 100755
--- a/utils/release/test-release.sh
+++ b/utils/release/test-release.sh
@@ -34,10 +34,11 @@ do_rt="yes"
 do_libs="yes"
 do_libunwind="yes"
 do_test_suite="yes"
-do_openmp="no"
+do_openmp="yes"
 BuildDir="`pwd`"
 use_autoconf="no"
 ExtraConfigureFlags=""
+ExportBranch=""
 
 function usage() {
     echo "usage: `basename $0` -release X.Y.Z -rc NUM [OPTIONS]"
@@ -55,11 +56,13 @@ function usage() {
     echo " -use-gzip            Use gzip instead of xz."
     echo " -configure-flags FLAGS  Extra flags to pass to the configure step."
     echo " -use-autoconf        Use autoconf instead of cmake"
+    echo " -svn-path DIR        Use the specified DIR instead of a release."
+    echo "                      For example -svn-path trunk or -svn-path branches/release_37"
     echo " -no-rt               Disable check-out & build Compiler-RT"
     echo " -no-libs             Disable check-out & build libcxx/libcxxabi/libunwind"
     echo " -no-libunwind        Disable check-out & build libunwind"
     echo " -no-test-suite       Disable check-out & build test-suite"
-    echo " -openmp              Check out and build the OpenMP run-time (experimental)"
+    echo " -no-openmp           Disable check-out & build libomp"
 }
 
 if [ `uname -s` = "Darwin" ]; then
@@ -81,6 +84,16 @@ while [ $# -gt 0 ]; do
         -final | --final )
             RC=final
             ;;
+        -svn-path | --svn-path )
+            shift
+            Release="test"
+            Release_no_dot="test"
+            ExportBranch="$1"
+            RC="`echo $ExportBranch | sed -e 's,/,_,g'`"
+            echo "WARNING: Using the branch $ExportBranch instead of a release tag"
+            echo "         This is intended to aid new packagers in trialing "
+            echo "         builds without requiring a tag to be created first"
+            ;;
         -triple | --triple )
             shift
             Triple="$1"
@@ -130,8 +143,8 @@ while [ $# -gt 0 ]; do
         -no-test-suite )
             do_test_suite="no"
             ;;
-        -openmp )
-            do_openmp="yes"
+        -no-openmp )
+            do_openmp="no"
             ;;
         -help | --help | -h | --h | -\? )
             usage
@@ -155,6 +168,9 @@ if [ -z "$RC" ]; then
     echo "error: no release candidate number specified"
     exit 1
 fi
+if [ -z "$ExportBranch" ]; then
+    ExportBranch="tags/RELEASE_$Release_no_dot/$RC"
+fi
 if [ -z "$Triple" ]; then
     echo "error: no target triple specified"
     exit 1
@@ -238,8 +254,8 @@ function check_valid_urls() {
     for proj in $projects ; do
         echo "# Validating $proj SVN URL"
 
-        if ! svn ls $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC > /dev/null 2>&1 ; then
-            echo "$proj $Release release candidate $RC doesn't exist!"
+        if ! svn ls $Base_url/$proj/$ExportBranch > /dev/null 2>&1 ; then
+            echo "$proj does not have a $ExportBranch branch/tag!"
             exit 1
         fi
     done
@@ -255,7 +271,7 @@ function export_sources() {
           continue
         fi
         echo "# Exporting $proj $Release-$RC sources"
-        if ! svn export -q $Base_url/$proj/tags/RELEASE_$Release_no_dot/$RC $proj.src ; then
+        if ! svn export -q $Base_url/$proj/$ExportBranch $proj.src ; then
             echo "error: failed to export $proj project"
             exit 1
         fi
@@ -277,6 +293,9 @@ function export_sources() {
     if [ -d $BuildDir/compiler-rt.src ] && [ ! -h compiler-rt ]; then
         ln -s ../../compiler-rt.src compiler-rt
     fi
+    if [ -d $BuildDir/openmp.src ] && [ ! -h openmp ]; then
+        ln -s ../../openmp.src openmp
+    fi
     if [ -d $BuildDir/libcxx.src ] && [ ! -h libcxx ]; then
         ln -s ../../libcxx.src libcxx
     fi
@@ -427,46 +446,6 @@ function package_release() {
     cd $cwd
 }
 
-# Build and package the OpenMP run-time. This is still experimental and not
-# meant for official testing in the release, but as a way for providing
-# binaries as a convenience to those who want to try it out.
-function build_OpenMP() {
-    cwd=`pwd`
-
-    rm -rf $BuildDir/Phase3/openmp
-    rm -rf $BuildDir/Phase3/openmp.install
-    mkdir -p $BuildDir/Phase3/openmp
-    cd $BuildDir/Phase3/openmp
-    clang=$BuildDir/Phase3/Release/llvmCore-$Release-$RC.install/usr/local/bin/clang
-
-    echo "#" cmake -DCMAKE_C_COMPILER=${clang} -DCMAKE_CXX_COMPILER=${clang}++ \
-            -DCMAKE_BUILD_TYPE=Release -DLIBOMP_MICRO_TESTS=on \
-            $BuildDir/openmp.src/runtime
-    cmake -DCMAKE_C_COMPILER=${clang} -DCMAKE_CXX_COMPILER=${clang}++ \
-            -DCMAKE_BUILD_TYPE=Release -DLIBOMP_MICRO_TESTS=on \
-            $BuildDir/openmp.src/runtime
-
-    echo "# Building OpenMP run-time"
-    echo "# ${MAKE} -j $NumJobs VERBOSE=1"
-    ${MAKE} -j $NumJobs VERBOSE=1
-    echo "# ${MAKE} libomp-micro-tests VERBOSE=1"
-    ${MAKE} libomp-micro-tests VERBOSE=1
-    echo "# ${MAKE} install DESTDIR=$BuildDir/Phase3/openmp.install"
-    ${MAKE} install DESTDIR=$BuildDir/Phase3/openmp.install
-
-    OpenMPPackage=OpenMP-$Release
-    if [ $RC != "final" ]; then
-        OpenMPPackage=$OpenMPPackage-$RC
-    fi
-    OpenMPPackage=$OpenMPPackage-$Triple
-
-    mv $BuildDir/Phase3/openmp.install/usr/local $BuildDir/$OpenMPPackage
-    cd $BuildDir
-    tar cvfJ $BuildDir/$OpenMPPackage.tar.xz $OpenMPPackage
-    mv $OpenMPPackage $BuildDir/Phase3/openmp.install/usr/local
-    cd $cwd
-}
-
 # Exit if any command fails
 # Note: pipefail is necessary for running build commands through
 # a pipe (i.e. it changes the output of ``false | tee /dev/null ; echo $?``)
@@ -578,10 +557,6 @@ for Flavor in $Flavors ; do
     fi
 done
 
-if [ $do_openmp = "yes" ]; then
-  build_OpenMP
-fi
-
 ) 2>&1 | tee $LogDir/testing.$Release-$RC.log
 
 package_release
diff --git a/utils/schedcover.py b/utils/schedcover.py
new file mode 100644
index 000000000000..ad9b78b207f1
--- /dev/null
+++ b/utils/schedcover.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+
+# This creates a CSV file from the output of the debug output of subtarget:
+#   llvm-tblgen --gen-subtarget --debug-only=subtarget-emitter
+# With thanks to Dave Estes for mentioning the idea at 2014 LLVM Developers' Meeting
+
+import os;
+import sys;
+import re;
+import operator;
+
+table = {}
+models = set()
+filt = None
+
+def add(instr, model, resource=None):
+    global table, models
+
+    entry = table.setdefault(instr, dict())
+    entry[model] = resource
+    models.add(model)
+
+def filter_model(m):
+    global filt
+    if m and filt:
+        return filt.search(m) != None
+    else:
+        return True
+
+
+def display():
+    global table, models
+
+    ordered_table  = sorted(table.items(), key=operator.itemgetter(0))
+    ordered_models = filter(filter_model, sorted(models))
+
+    # print header
+    sys.stdout.write("instruction")
+    for model in ordered_models:
+        if not model: model = "default"
+        sys.stdout.write(", {}".format(model))
+    sys.stdout.write(os.linesep)
+
+    for (instr, mapping) in ordered_table:
+        sys.stdout.write(instr)
+        for model in ordered_models:
+            if model in mapping:
+                sys.stdout.write(", {}".format(mapping[model]))
+            else:
+                sys.stdout.write(", ")
+        sys.stdout.write(os.linesep)
+
+
+def machineModelCover(path):
+    # The interesting bits
+    re_sched_default  = re.compile("SchedRW machine model for ([^ ]*) (.*)\n");
+    re_sched_no_default = re.compile("No machine model for ([^ ]*)\n");
+    re_sched_spec = re.compile("InstRW on ([^ ]*) for ([^ ]*) (.*)\n");
+    re_sched_no_spec = re.compile("No machine model for ([^ ]*) on processor (.*)\n");
+
+    # scan the file
+    with open(path, 'r') as f:
+        for line in f.readlines():
+            match = re_sched_default.match(line)
+            if match: add(match.group(1), None, match.group(2))
+            match = re_sched_no_default.match(line)
+            if match: add(match.group(1), None)
+            match = re_sched_spec.match(line)
+            if match: add(match.group(2), match.group(1), match.group(3))
+            match = re_sched_no_default.match(line)
+            if match: add(match.group(1), None)
+
+    display()
+
+if len(sys.argv) > 2:
+    filt = re.compile(sys.argv[2], re.IGNORECASE)
+machineModelCover(sys.argv[1])
diff --git a/utils/shuffle_fuzz.py b/utils/shuffle_fuzz.py
index 985d1dab9e23..eac34422b3fb 100755
--- a/utils/shuffle_fuzz.py
+++ b/utils/shuffle_fuzz.py
@@ -198,7 +198,7 @@ define i32 @main() {
 entry:
   ; Create a scratch space to print error messages.
   %%str = alloca [128 x i8]
-  %%str.ptr = getelementptr inbounds [128 x i8]* %%str, i32 0, i32 0
+  %%str.ptr = getelementptr inbounds [128 x i8], [128 x i8]* %%str, i32 0, i32 0
 
   ; Build the input vector and call the test function.
   %%v = call fastcc <%(N)d x %(T)s> @test_wrapper(%(inputs)s)
@@ -233,7 +233,7 @@ die.%(i)d:
   ; Capture the actual value and print an error message.
   %%tmp.%(i)d = zext %(IT)s %%v.%(i)d to i2048
   %%bad.%(i)d = trunc i2048 %%tmp.%(i)d to i32
-  call i32 (i8*, i8*, ...)* @sprintf(i8* %%str.ptr, i8* getelementptr inbounds ([128 x i8]* @error.%(i)d, i32 0, i32 0), i32 %%bad.%(i)d)
+  call i32 (i8*, i8*, ...) @sprintf(i8* %%str.ptr, i8* getelementptr inbounds ([128 x i8], [128 x i8]* @error.%(i)d, i32 0, i32 0), i32 %%bad.%(i)d)
   %%length.%(i)d = call i32 @strlen(i8* %%str.ptr)
   call i32 @write(i32 2, i8* %%str.ptr, i32 %%length.%(i)d)
   call void @llvm.trap()
diff --git a/utils/unittest/googletest/src/gtest-internal-inl.h b/utils/unittest/googletest/src/gtest-internal-inl.h
index 35e865ff07fe..f610cfd17b85 100644
--- a/utils/unittest/googletest/src/gtest-internal-inl.h
+++ b/utils/unittest/googletest/src/gtest-internal-inl.h
@@ -151,63 +151,67 @@ class GTestFlagSaver {
  public:
   // The c'tor.
   GTestFlagSaver() {
+    color_ = GTEST_FLAG(color);
+    death_test_style_ = GTEST_FLAG(death_test_style);
+    filter_ = GTEST_FLAG(filter);
+    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
+    output_ = GTEST_FLAG(output);
+    stream_result_to_ = GTEST_FLAG(stream_result_to);
+
+    random_seed_ = GTEST_FLAG(random_seed);
+    repeat_ = GTEST_FLAG(repeat);
+    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
+
     also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
     break_on_failure_ = GTEST_FLAG(break_on_failure);
     catch_exceptions_ = GTEST_FLAG(catch_exceptions);
-    color_ = GTEST_FLAG(color);
-    death_test_style_ = GTEST_FLAG(death_test_style);
     death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
-    filter_ = GTEST_FLAG(filter);
-    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
     list_tests_ = GTEST_FLAG(list_tests);
-    output_ = GTEST_FLAG(output);
     print_time_ = GTEST_FLAG(print_time);
-    random_seed_ = GTEST_FLAG(random_seed);
-    repeat_ = GTEST_FLAG(repeat);
     shuffle_ = GTEST_FLAG(shuffle);
-    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
-    stream_result_to_ = GTEST_FLAG(stream_result_to);
     throw_on_failure_ = GTEST_FLAG(throw_on_failure);
   }
 
   // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
   ~GTestFlagSaver() {
+    GTEST_FLAG(color) = color_;
+    GTEST_FLAG(death_test_style) = death_test_style_;
+    GTEST_FLAG(filter) = filter_;
+    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
+    GTEST_FLAG(output) = output_;
+    GTEST_FLAG(stream_result_to) = stream_result_to_;
+
+    GTEST_FLAG(random_seed) = random_seed_;
+    GTEST_FLAG(repeat) = repeat_;
+    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
+
     GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
     GTEST_FLAG(break_on_failure) = break_on_failure_;
     GTEST_FLAG(catch_exceptions) = catch_exceptions_;
-    GTEST_FLAG(color) = color_;
-    GTEST_FLAG(death_test_style) = death_test_style_;
     GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
-    GTEST_FLAG(filter) = filter_;
-    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
     GTEST_FLAG(list_tests) = list_tests_;
-    GTEST_FLAG(output) = output_;
     GTEST_FLAG(print_time) = print_time_;
-    GTEST_FLAG(random_seed) = random_seed_;
-    GTEST_FLAG(repeat) = repeat_;
     GTEST_FLAG(shuffle) = shuffle_;
-    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
-    GTEST_FLAG(stream_result_to) = stream_result_to_;
     GTEST_FLAG(throw_on_failure) = throw_on_failure_;
   }
  private:
   // Fields for saving the original values of flags.
+  String color_;
+  String death_test_style_;
+  String filter_;
+  String internal_run_death_test_;
+  String output_;
+  String stream_result_to_;
+  internal::Int32 random_seed_;
+  internal::Int32 repeat_;
+  internal::Int32 stack_trace_depth_;
   bool also_run_disabled_tests_;
   bool break_on_failure_;
   bool catch_exceptions_;
-  String color_;
-  String death_test_style_;
   bool death_test_use_fork_;
-  String filter_;
-  String internal_run_death_test_;
   bool list_tests_;
-  String output_;
   bool print_time_;
-  internal::Int32 random_seed_;
-  internal::Int32 repeat_;
   bool shuffle_;
-  internal::Int32 stack_trace_depth_;
-  String stream_result_to_;
   bool throw_on_failure_;
 } GTEST_ATTRIBUTE_UNUSED_;
 
diff --git a/utils/update_llc_test_checks.py b/utils/update_llc_test_checks.py
index df01d8973c46..cfdf830907f5 100755
--- a/utils/update_llc_test_checks.py
+++ b/utils/update_llc_test_checks.py
@@ -73,6 +73,8 @@ def main():
       flags=(re.M | re.S))
   check_prefix_re = re.compile('--check-prefix=(\S+)')
   check_re = re.compile(r'^\s*;\s*([^:]+?)(?:-NEXT|-NOT|-DAG|-LABEL)?:')
+  autogenerated_note = ('; NOTE: Assertions have been autogenerated by '
+                        'utils/update_llc_test_checks.py')
 
   for test in args.tests:
     if args.verbose:
@@ -136,7 +138,7 @@ def main():
           if f in asm[prefix] and asm[prefix][f] != f_asm:
             if prefix == prefixes[-1]:
               print >>sys.stderr, ('WARNING: Found conflicting asm under the '
-                                   'same prefix!')
+                                   'same prefix: %r!' % (prefix,))
             else:
               asm[prefix][f] = None
               continue
@@ -149,6 +151,8 @@ def main():
     if args.verbose:
       print >>sys.stderr, 'Rewriting FileCheck prefixes: %s' % (prefix_set,)
     fixed_lines = []
+    fixed_lines.append(autogenerated_note)
+
     for l in test_lines:
       if is_in_function_start:
         if l.lstrip().startswith(';'):
@@ -191,6 +195,8 @@ def main():
           is_in_function = False
         continue
 
+      if l == autogenerated_note:
+        continue
       fixed_lines.append(l)
 
       m = ir_function_re.match(l)
diff --git a/utils/vim/ftplugin/llvm.vim b/utils/vim/ftplugin/llvm.vim
index 04bb9e001696..bdf49c92ff22 100644
--- a/utils/vim/ftplugin/llvm.vim
+++ b/utils/vim/ftplugin/llvm.vim
@@ -9,3 +9,4 @@ let b:did_ftplugin = 1
 
 setlocal softtabstop=2 shiftwidth=2
 setlocal expandtab
+setlocal comments+=:;
diff --git a/utils/vim/syntax/llvm.vim b/utils/vim/syntax/llvm.vim
index c8e73cd94a57..68a53ec2383d 100644
--- a/utils/vim/syntax/llvm.vim
+++ b/utils/vim/syntax/llvm.vim
@@ -1,7 +1,7 @@
 " Vim syntax file
 " Language:   llvm
 " Maintainer: The LLVM team, http://llvm.org/
-" Version:      $Revision: 235369 $
+" Version:      $Revision: 256512 $
 
 if version < 600
   syntax clear
@@ -16,7 +16,7 @@ syn case match
 " benefit as much from having dedicated highlighting rules.
 syn keyword llvmType void half float double x86_fp80 fp128 ppc_fp128
 syn keyword llvmType label metadata x86_mmx
-syn keyword llvmType type label opaque
+syn keyword llvmType type label opaque token
 syn match   llvmType /\<i\d\+\>/
 
 " Instructions.
@@ -42,10 +42,11 @@ syn keyword llvmKeyword arm_aapcscc arm_apcscc asm atomic available_externally
 syn keyword llvmKeyword blockaddress byval c catch cc ccc cleanup coldcc common
 syn keyword llvmKeyword constant datalayout declare default define deplibs
 syn keyword llvmKeyword distinct dllexport dllimport except extern_weak external
-syn keyword llvmKeyword externally_initialized fastcc filter gc global hidden
-syn keyword llvmKeyword initialexec inlinehint inreg intel_ocl_bicc inteldialect
-syn keyword llvmKeyword internal linkonce linkonce_odr localdynamic localexec
-syn keyword llvmKeyword minsize module monotonic msp430_intrcc naked nest
+syn keyword llvmKeyword externally_initialized fastcc filter gc global hhvmcc
+syn keyword llvmKeyword hhvm_ccc hidden initialexec inlinehint inreg
+syn keyword llvmKeyword intel_ocl_bicc inteldialect internal linkonce
+syn keyword llvmKeyword linkonce_odr localdynamic localexec minsize module
+syn keyword llvmKeyword monotonic msp430_intrcc musttail naked nest
 syn keyword llvmKeyword noalias nocapture noimplicitfloat noinline nonlazybind
 syn keyword llvmKeyword noredzone noreturn nounwind optnone optsize personality
 syn keyword llvmKeyword private protected ptx_device ptx_kernel readnone
@@ -56,7 +57,7 @@ syn keyword llvmKeyword sspstrong tail target thread_local to triple
 syn keyword llvmKeyword unnamed_addr unordered uwtable volatile weak weak_odr
 syn keyword llvmKeyword x86_fastcallcc x86_stdcallcc x86_thiscallcc
 syn keyword llvmKeyword x86_64_sysvcc x86_64_win64cc zeroext uselistorder
-syn keyword llvmKeyword uselistorder_bb
+syn keyword llvmKeyword uselistorder_bb musttail
 
 " Obsolete keywords.
 syn keyword llvmError  getresult begin end
@@ -84,10 +85,10 @@ syn match   llvmConstant /\<DW_LANG_[a-zA-Z0-9_]\+\>/
 syn match   llvmConstant /\<DW_VIRTUALITY_[a-z_]\+\>/
 syn match   llvmConstant /\<DIFlag[A-Za-z]\+\>/
 
-" Syntax-highlight dejagnu test commands.
-syn match  llvmSpecialComment /;\s*RUN:.*$/
+" Syntax-highlight lit test commands and bug numbers.
 syn match  llvmSpecialComment /;\s*PR\d*\s*$/
-syn match  llvmSpecialComment /;\s*END\.\s*$/
+syn match  llvmSpecialComment /;\s*REQUIRES:.*$/
+syn match  llvmSpecialComment /;\s*RUN:.*$/
 syn match  llvmSpecialComment /;\s*XFAIL:.*$/
 
 if version >= 508 || !exists("did_c_syn_inits")

From 45b533945f0851ec234ca846e1af5ee1e4df0b6e Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 11:49:41 +0000
Subject: [PATCH 02/10] Vendor import of clang trunk r256633:
 https://llvm.org/svn/llvm-project/cfe/trunk@256633

---
 CMakeLists.txt                                |  230 +-
 CODE_OWNERS.TXT                               |   12 +-
 INSTALL.txt                                   |    5 +-
 Makefile                                      |    5 +-
 bindings/python/clang/cindex.py               |   41 +-
 bindings/python/tests/cindex/test_cursor.py   |   60 +
 .../python/tests/cindex/test_cursor_kind.py   |    1 +
 bindings/python/tests/cindex/test_type.py     |    4 +-
 cmake/caches/Apple-stage1.cmake               |   32 +
 cmake/caches/Apple-stage2.cmake               |   30 +
 cmake/caches/README.txt                       |   18 +
 docs/AddressSanitizer.rst                     |   14 +-
 docs/AttributeReference.rst                   | 1759 +--
 docs/ClangFormat.rst                          |   83 +-
 docs/ClangFormatStyleOptions.rst              |  170 +-
 docs/ClangTools.rst                           |   44 +-
 docs/CommandGuide/clang.rst                   |   12 +
 docs/ControlFlowIntegrity.rst                 |  192 +-
 docs/ControlFlowIntegrityDesign.rst           |  224 +
 docs/InternalsManual.rst                      |    2 +-
 docs/LanguageExtensions.rst                   |   99 +-
 docs/LeakSanitizer.rst                        |    4 +-
 docs/LibASTMatchersReference.html             | 1340 ++-
 docs/LibASTMatchersTutorial.rst               |    1 -
 docs/MemorySanitizer.rst                      |   70 +-
 docs/Modules.rst                              |    2 +-
 docs/RAVFrontendAction.rst                    |    4 +-
 docs/ReleaseNotes.rst                         |  304 +-
 docs/SanitizerCoverage.rst                    |   35 +
 docs/ThreadSanitizer.rst                      |   28 +-
 docs/UndefinedBehaviorSanitizer.rst           |  204 +
 docs/UsersManual.rst                          |  306 +-
 docs/analyzer/DebugChecks.rst                 |   23 +
 docs/analyzer/nullability.rst                 |   92 +
 docs/conf.py                                  |    7 +-
 docs/doxygen.cfg.in                           |    4 +-
 docs/index.rst                                |    1 +
 docs/tools/dump_ast_matchers.py               |    8 +-
 docs/tools/dump_format_style.py               |   60 +-
 examples/analyzer-plugin/MainCallChecker.cpp  |    2 +-
 include/clang-c/CXCompilationDatabase.h       |    6 +
 include/clang-c/CXString.h                    |   10 +
 include/clang-c/Index.h                       |  130 +-
 include/clang/AST/ASTContext.h                |  178 +-
 include/clang/AST/ASTMutationListener.h       |   12 -
 include/clang/AST/ASTTypeTraits.h             |  122 +-
 include/clang/AST/Attr.h                      |   10 +-
 include/clang/AST/BuiltinTypes.def            |   23 +-
 include/clang/AST/CXXInheritance.h            |   22 +-
 include/clang/AST/CharUnits.h                 |   20 +-
 include/clang/AST/CommentSema.h               |    8 +-
 include/clang/AST/DataRecursiveASTVisitor.h   | 2691 -----
 include/clang/AST/Decl.h                      |  311 +-
 include/clang/AST/DeclBase.h                  |   35 +-
 include/clang/AST/DeclCXX.h                   |   75 +-
 include/clang/AST/DeclFriend.h                |   24 +-
 include/clang/AST/DeclGroup.h                 |   15 +-
 include/clang/AST/DeclObjC.h                  |   40 +-
 include/clang/AST/DeclOpenMP.h                |   13 +-
 include/clang/AST/DeclTemplate.h              |  248 +-
 include/clang/AST/EvaluatedExprVisitor.h      |    4 +-
 include/clang/AST/Expr.h                      |  651 +-
 include/clang/AST/ExprCXX.h                   |  689 +-
 include/clang/AST/ExprObjC.h                  |   36 +-
 include/clang/AST/ExprOpenMP.h                |  129 +
 include/clang/AST/ExternalASTSource.h         |   75 +-
 include/clang/AST/Mangle.h                    |    3 -
 include/clang/AST/NestedNameSpecifier.h       |    3 +-
 include/clang/AST/OpenMPClause.h              | 1027 +-
 include/clang/AST/OperationKinds.h            |    3 +-
 include/clang/AST/PrettyPrinter.h             |    9 +-
 include/clang/AST/RecursiveASTVisitor.h       |  468 +-
 include/clang/AST/Redeclarable.h              |   16 +-
 include/clang/AST/Stmt.h                      |  241 +-
 include/clang/AST/StmtCXX.h                   |  135 +-
 include/clang/AST/StmtIterator.h              |   80 -
 include/clang/AST/StmtOpenMP.h                |  649 +-
 include/clang/AST/StmtVisitor.h               |    4 +-
 include/clang/AST/TemplateBase.h              |  116 +-
 include/clang/AST/TemplateName.h              |   53 +-
 include/clang/AST/Type.h                      |  730 +-
 include/clang/AST/TypeLoc.h                   |   22 +-
 include/clang/AST/VTableBuilder.h             |   71 +-
 include/clang/ASTMatchers/ASTMatchFinder.h    |    3 +-
 include/clang/ASTMatchers/ASTMatchers.h       |  831 +-
 .../clang/ASTMatchers/ASTMatchersInternal.h   |   89 +-
 .../clang/ASTMatchers/Dynamic/Diagnostics.h   |    6 +-
 include/clang/ASTMatchers/Dynamic/Parser.h    |    4 +-
 include/clang/ASTMatchers/Dynamic/Registry.h  |    4 +-
 include/clang/Analysis/Analyses/Consumed.h    |   38 +-
 .../Analysis/Analyses/ThreadSafetyCommon.h    |    4 +-
 .../clang/Analysis/Analyses/ThreadSafetyTIL.h |    8 +-
 include/clang/Analysis/CFG.h                  |    8 +-
 include/clang/Analysis/ProgramPoint.h         |   49 +-
 include/clang/Analysis/Support/BumpVector.h   |    7 +-
 include/clang/Basic/Attr.td                   |  204 +-
 include/clang/Basic/AttrDocs.td               |  278 +-
 include/clang/Basic/Attributes.h              |    4 +-
 include/clang/Basic/Builtins.def              |   48 +-
 include/clang/Basic/Builtins.h                |  126 +-
 include/clang/Basic/BuiltinsAArch64.def       |    1 +
 include/clang/Basic/BuiltinsARM.def           |   12 +-
 include/clang/Basic/BuiltinsNVPTX.def         |    5 +-
 include/clang/Basic/BuiltinsPPC.def           |   11 +
 include/clang/Basic/BuiltinsWebAssembly.def   |   24 +
 include/clang/Basic/BuiltinsX86.def           | 2586 +++--
 include/clang/Basic/DeclNodes.td              |    1 +
 include/clang/Basic/Diagnostic.h              |   20 +-
 include/clang/Basic/DiagnosticCommonKinds.td  |   10 +-
 include/clang/Basic/DiagnosticDriverKinds.td  |   28 +
 .../clang/Basic/DiagnosticFrontendKinds.td    |   35 +-
 include/clang/Basic/DiagnosticGroups.td       |   83 +-
 include/clang/Basic/DiagnosticLexKinds.td     |   78 +-
 include/clang/Basic/DiagnosticParseKinds.td   |  119 +-
 include/clang/Basic/DiagnosticSemaKinds.td    |  707 +-
 .../Basic/DiagnosticSerializationKinds.td     |   25 +-
 include/clang/Basic/FileManager.h             |   18 +-
 include/clang/Basic/IdentifierTable.h         |   23 +-
 include/clang/Basic/LangOptions.def           |    8 +-
 include/clang/Basic/Module.h                  |   20 +-
 include/clang/Basic/ObjCRuntime.h             |   22 +-
 include/clang/Basic/OpenCLExtensions.def      |    3 +
 include/clang/Basic/OpenMPKinds.def           |  148 +-
 include/clang/Basic/OpenMPKinds.h             |   44 +
 include/clang/Basic/OperatorKinds.def         |    1 +
 include/clang/Basic/PartialDiagnostic.h       |    2 +-
 include/clang/Basic/Sanitizers.def            |    4 +-
 include/clang/Basic/SourceLocation.h          |    3 +-
 include/clang/Basic/SourceManager.h           |   51 +-
 include/clang/Basic/Specifiers.h              |   21 +-
 include/clang/Basic/StmtNodes.td              |   15 +
 include/clang/Basic/TargetBuiltins.h          |   11 +
 include/clang/Basic/TargetCXXABI.h            |   67 +-
 include/clang/Basic/TargetInfo.h              |  139 +-
 include/clang/Basic/TokenKinds.def            |   15 +-
 include/clang/Basic/VirtualFileSystem.h       |   76 +-
 include/clang/Basic/arm_neon.td               |   79 +-
 include/clang/CodeGen/BackendUtil.h           |   12 +-
 include/clang/CodeGen/CGFunctionInfo.h        |   47 +-
 include/clang/CodeGen/CodeGenABITypes.h       |   18 +-
 include/clang/CodeGen/CodeGenAction.h         |    8 +-
 .../ObjectFilePCHContainerOperations.h        |    4 +-
 include/clang/Config/config.h.cmake           |    4 +-
 include/clang/Config/config.h.in              |    4 +-
 include/clang/Driver/Action.h                 |    2 -
 include/clang/Driver/CC1Options.td            |   42 +-
 include/clang/Driver/CLCompatOptions.td       |   39 +-
 include/clang/Driver/Compilation.h            |   18 +-
 include/clang/Driver/Driver.h                 |   51 +-
 include/clang/Driver/Job.h                    |   11 +-
 include/clang/Driver/Options.td               |  208 +-
 include/clang/Driver/SanitizerArgs.h          |    6 +-
 include/clang/Driver/ToolChain.h              |   72 +-
 include/clang/Driver/Types.h                  |    3 +
 include/clang/Edit/Commit.h                   |    6 -
 include/clang/Edit/EditedSource.h             |   22 +-
 include/clang/Format/Format.h                 |  201 +-
 include/clang/Frontend/ASTUnit.h              |   29 +-
 include/clang/Frontend/CodeGenOptions.def     |   30 +-
 include/clang/Frontend/CodeGenOptions.h       |   19 +-
 include/clang/Frontend/CompilerInstance.h     |   22 +-
 include/clang/Frontend/CompilerInvocation.h   |    2 +-
 .../clang/Frontend/DependencyOutputOptions.h  |    3 +
 include/clang/Frontend/DiagnosticRenderer.h   |    8 +-
 include/clang/Frontend/FrontendOptions.h      |   19 +-
 .../clang/Frontend/PCHContainerOperations.h   |   16 +-
 .../Frontend/SerializedDiagnosticReader.h     |   18 +-
 include/clang/Frontend/Utils.h                |    7 +-
 include/clang/Lex/DirectoryLookup.h           |    4 +
 include/clang/Lex/HeaderSearch.h              |   81 +-
 include/clang/Lex/HeaderSearchOptions.h       |   18 +-
 include/clang/Lex/MacroInfo.h                 |   11 +-
 include/clang/Lex/ModuleMap.h                 |   44 +-
 include/clang/Lex/PPCallbacks.h               |    6 +-
 include/clang/Lex/Pragma.h                    |    2 +-
 include/clang/Lex/PreprocessingRecord.h       |   42 +-
 include/clang/Lex/Preprocessor.h              |    9 +-
 include/clang/Lex/TokenLexer.h                |    2 +-
 include/clang/Parse/Parser.h                  |   43 +-
 include/clang/Sema/AttributeList.h            |   17 +-
 include/clang/Sema/CodeCompleteOptions.h      |    4 +
 include/clang/Sema/DeclSpec.h                 |   66 +-
 include/clang/Sema/ExternalSemaSource.h       |    2 +-
 include/clang/Sema/Initialization.h           |    3 +
 include/clang/Sema/Lookup.h                   |   19 +-
 .../clang/Sema/MultiplexExternalSemaSource.h  |   29 +-
 include/clang/Sema/Overload.h                 |    7 +-
 include/clang/Sema/ScopeInfo.h                |   47 +-
 include/clang/Sema/Sema.h                     |  578 +-
 include/clang/Sema/Template.h                 |    6 +-
 include/clang/Sema/TemplateDeduction.h        |    9 +-
 include/clang/Sema/TypoCorrection.h           |   18 +-
 include/clang/Serialization/ASTBitCodes.h     |  266 +-
 include/clang/Serialization/ASTReader.h       |  167 +-
 include/clang/Serialization/ASTWriter.h       |  106 +-
 .../clang/Serialization/GlobalModuleIndex.h   |    3 +-
 include/clang/Serialization/Module.h          |   48 +-
 .../clang/Serialization/ModuleFileExtension.h |  149 +
 include/clang/Serialization/ModuleManager.h   |   88 +-
 .../StaticAnalyzer/Core/AnalyzerOptions.h     |   24 +
 .../Core/BugReporter/BugReporterVisitor.h     |   33 +-
 .../Core/BugReporter/PathDiagnostic.h         |    3 +-
 include/clang/StaticAnalyzer/Core/Checker.h   |   19 +
 .../StaticAnalyzer/Core/CheckerManager.h      |   33 +-
 include/clang/StaticAnalyzer/Core/IssueHash.h |   51 +
 .../Core/PathSensitive/CallEvent.h            |   71 +-
 .../Core/PathSensitive/CheckerContext.h       |   44 +-
 .../Core/PathSensitive/CheckerHelpers.h       |    7 +
 .../Core/PathSensitive/ConstraintManager.h    |   29 +
 .../Core/PathSensitive/DynamicTypeMap.h       |   57 +
 .../Core/PathSensitive/ExprEngine.h           |   26 +
 .../Core/PathSensitive/FunctionSummary.h      |    2 +-
 .../Core/PathSensitive/LoopWidening.h         |   36 +
 .../Core/PathSensitive/MemRegion.h            |   10 +-
 .../Core/PathSensitive/ProgramState.h         |   62 +-
 .../Core/PathSensitive/SymbolManager.h        |    9 +-
 include/clang/Tooling/ArgumentsAdjusters.h    |    6 +-
 include/clang/Tooling/CommonOptionsParser.h   |   22 +-
 include/clang/Tooling/CompilationDatabase.h   |   10 +-
 include/clang/Tooling/Core/Lookup.h           |   48 +
 include/clang/Tooling/Core/Replacement.h      |    6 +
 .../clang/Tooling/JSONCompilationDatabase.h   |   31 +-
 include/clang/Tooling/Tooling.h               |   27 +
 include/clang/module.modulemap                |    6 +-
 lib/ARCMigrate/ARCMT.cpp                      |    7 +-
 lib/ARCMigrate/FileRemapper.cpp               |    2 +-
 lib/ARCMigrate/ObjCMT.cpp                     |   52 +-
 .../TransEmptyStatementsAndDealloc.cpp        |    4 +-
 lib/ARCMigrate/TransGCAttrs.cpp               |    4 +-
 lib/ARCMigrate/TransProperties.cpp            |   26 +-
 lib/ARCMigrate/TransRetainReleaseDealloc.cpp  |   28 +-
 lib/ARCMigrate/TransformActions.cpp           |    7 +-
 lib/ARCMigrate/Transforms.cpp                 |   10 +-
 lib/AST/ASTContext.cpp                        |  502 +-
 lib/AST/ASTDiagnostic.cpp                     |   65 +-
 lib/AST/ASTDumper.cpp                         |   29 +-
 lib/AST/ASTImporter.cpp                       |   19 +-
 lib/AST/CMakeLists.txt                        |    5 +
 lib/AST/CXXABI.h                              |   12 +
 lib/AST/CXXInheritance.cpp                    |   85 +-
 lib/AST/Decl.cpp                              |  586 +-
 lib/AST/DeclBase.cpp                          |   60 +-
 lib/AST/DeclCXX.cpp                           |   43 +-
 lib/AST/DeclFriend.cpp                        |    7 +-
 lib/AST/DeclGroup.cpp                         |    7 +-
 lib/AST/DeclObjC.cpp                          |   90 +-
 lib/AST/DeclOpenMP.cpp                        |   10 +-
 lib/AST/DeclPrinter.cpp                       |   52 +-
 lib/AST/DeclTemplate.cpp                      |  148 +-
 lib/AST/DeclarationName.cpp                   |    2 +-
 lib/AST/Expr.cpp                              |  660 +-
 lib/AST/ExprCXX.cpp                           |  291 +-
 lib/AST/ExprClassification.cpp                |    6 +
 lib/AST/ExprConstant.cpp                      |  702 +-
 lib/AST/ExprObjC.cpp                          |  379 +
 lib/AST/ExternalASTSource.cpp                 |   31 +-
 lib/AST/ItaniumCXXABI.cpp                     |   14 +
 lib/AST/ItaniumMangle.cpp                     |  285 +-
 lib/AST/Mangle.cpp                            |    3 -
 lib/AST/MicrosoftCXXABI.cpp                   |   45 +-
 lib/AST/MicrosoftMangle.cpp                   |  502 +-
 lib/AST/NSAPI.cpp                             |   11 +
 lib/AST/NestedNameSpecifier.cpp               |    7 +-
 lib/AST/OpenMPClause.cpp                      |  465 +
 lib/AST/RawCommentList.cpp                    |   86 +-
 lib/AST/RecordLayoutBuilder.cpp               |  531 +-
 lib/AST/Stmt.cpp                              | 1328 +--
 lib/AST/StmtCXX.cpp                           |   86 +
 lib/AST/StmtIterator.cpp                      |    2 +-
 lib/AST/StmtObjC.cpp                          |   73 +
 lib/AST/StmtOpenMP.cpp                        |  884 ++
 lib/AST/StmtPrinter.cpp                       |  208 +-
 lib/AST/StmtProfile.cpp                       |  119 +-
 lib/AST/TemplateBase.cpp                      |  123 +-
 lib/AST/TemplateName.cpp                      |   54 +-
 lib/AST/Type.cpp                              |  273 +-
 lib/AST/TypeLoc.cpp                           |   34 +-
 lib/AST/TypePrinter.cpp                       |   29 +-
 lib/AST/VTableBuilder.cpp                     |  322 +-
 lib/ASTMatchers/ASTMatchFinder.cpp            |   25 +-
 lib/ASTMatchers/ASTMatchersInternal.cpp       |   10 +-
 lib/ASTMatchers/Dynamic/Diagnostics.cpp       |    8 +-
 lib/ASTMatchers/Dynamic/Marshallers.h         |   22 +-
 lib/ASTMatchers/Dynamic/Parser.cpp            |    2 +-
 lib/ASTMatchers/Dynamic/Registry.cpp          |  120 +-
 lib/ASTMatchers/Dynamic/VariantValue.cpp      |    2 +-
 lib/Analysis/AnalysisDeclContext.cpp          |   17 +
 lib/Analysis/BodyFarm.cpp                     |    5 +-
 lib/Analysis/CFG.cpp                          |  187 +-
 lib/Analysis/Consumed.cpp                     |  129 +-
 lib/Analysis/ThreadSafety.cpp                 |  131 +-
 lib/Analysis/ThreadSafetyCommon.cpp           |   53 +-
 lib/Basic/Attributes.cpp                      |    4 +-
 lib/Basic/Builtins.cpp                        |   86 +-
 lib/Basic/Diagnostic.cpp                      |   24 +-
 lib/Basic/DiagnosticIDs.cpp                   |   38 +-
 lib/Basic/FileManager.cpp                     |   92 +-
 lib/Basic/IdentifierTable.cpp                 |    9 +-
 lib/Basic/Module.cpp                          |   20 +-
 lib/Basic/ObjCRuntime.cpp                     |    3 +
 lib/Basic/OpenMPKinds.cpp                     |  168 +-
 lib/Basic/SanitizerBlacklist.cpp              |    2 +-
 lib/Basic/SourceManager.cpp                   |   97 +-
 lib/Basic/TargetInfo.cpp                      |  150 +-
 lib/Basic/Targets.cpp                         | 3331 +++---
 lib/Basic/Version.cpp                         |    2 +-
 lib/Basic/VirtualFileSystem.cpp               |  631 +-
 lib/CodeGen/ABIInfo.h                         |   22 +-
 lib/CodeGen/Address.h                         |  126 +
 lib/CodeGen/BackendUtil.cpp                   |  188 +-
 lib/CodeGen/CGAtomic.cpp                      |  641 +-
 lib/CodeGen/CGBlocks.cpp                      |  895 +-
 lib/CodeGen/CGBlocks.h                        |   33 +-
 lib/CodeGen/CGBuilder.h                       |  263 +-
 lib/CodeGen/CGBuiltin.cpp                     | 1418 +--
 lib/CodeGen/CGCUDANV.cpp                      |   22 +-
 lib/CodeGen/CGCXX.cpp                         |   49 +-
 lib/CodeGen/CGCXXABI.cpp                      |   64 +-
 lib/CodeGen/CGCXXABI.h                        |  106 +-
 lib/CodeGen/CGCall.cpp                        |  983 +-
 lib/CodeGen/CGCall.h                          |   26 +-
 lib/CodeGen/CGClass.cpp                       |  784 +-
 lib/CodeGen/CGCleanup.cpp                     |  233 +-
 lib/CodeGen/CGCleanup.h                       |  125 +-
 lib/CodeGen/CGDebugInfo.cpp                   |  565 +-
 lib/CodeGen/CGDebugInfo.h                     |   96 +-
 lib/CodeGen/CGDecl.cpp                        |  373 +-
 lib/CodeGen/CGDeclCXX.cpp                     |   68 +-
 lib/CodeGen/CGException.cpp                   |  425 +-
 lib/CodeGen/CGExpr.cpp                        | 1357 ++-
 lib/CodeGen/CGExprAgg.cpp                     |  182 +-
 lib/CodeGen/CGExprCXX.cpp                     |  396 +-
 lib/CodeGen/CGExprComplex.cpp                 |  140 +-
 lib/CodeGen/CGExprConstant.cpp                |   64 +-
 lib/CodeGen/CGExprScalar.cpp                  |  348 +-
 lib/CodeGen/CGLoopInfo.cpp                    |  182 +-
 lib/CodeGen/CGLoopInfo.h                      |   55 +-
 lib/CodeGen/CGObjC.cpp                        |  347 +-
 lib/CodeGen/CGObjCGNU.cpp                     |  267 +-
 lib/CodeGen/CGObjCMac.cpp                     | 1451 +--
 lib/CodeGen/CGObjCRuntime.cpp                 |   56 +-
 lib/CodeGen/CGObjCRuntime.h                   |   36 +-
 lib/CodeGen/CGOpenCLRuntime.cpp               |   34 +
 lib/CodeGen/CGOpenMPRuntime.cpp               | 1619 ++-
 lib/CodeGen/CGOpenMPRuntime.h                 |  108 +-
 lib/CodeGen/CGRecordLayoutBuilder.cpp         |    2 +-
 lib/CodeGen/CGStmt.cpp                        |  243 +-
 lib/CodeGen/CGStmtOpenMP.cpp                  | 1040 +-
 lib/CodeGen/CGVTT.cpp                         |    1 -
 lib/CodeGen/CGVTables.cpp                     |  171 +-
 lib/CodeGen/CGVTables.h                       |    4 -
 lib/CodeGen/CGValue.h                         |  182 +-
 lib/CodeGen/CodeGenABITypes.cpp               |   41 +-
 lib/CodeGen/CodeGenAction.cpp                 |  243 +-
 lib/CodeGen/CodeGenFunction.cpp               |  316 +-
 lib/CodeGen/CodeGenFunction.h                 |  746 +-
 lib/CodeGen/CodeGenModule.cpp                 |  687 +-
 lib/CodeGen/CodeGenModule.h                   |  299 +-
 lib/CodeGen/CodeGenPGO.cpp                    |   81 +-
 lib/CodeGen/CodeGenPGO.h                      |    6 +-
 lib/CodeGen/CodeGenTBAA.cpp                   |    2 -
 lib/CodeGen/CodeGenTypeCache.h                |  108 +
 lib/CodeGen/CodeGenTypes.cpp                  |  151 +-
 lib/CodeGen/CodeGenTypes.h                    |   21 +-
 lib/CodeGen/CoverageMappingGen.cpp            |   67 +-
 lib/CodeGen/EHScopeStack.h                    |   29 +-
 lib/CodeGen/ItaniumCXXABI.cpp                 |  693 +-
 lib/CodeGen/MicrosoftCXXABI.cpp               |  645 +-
 lib/CodeGen/ModuleBuilder.cpp                 |   15 +-
 .../ObjectFilePCHContainerOperations.cpp      |  153 +-
 lib/CodeGen/TargetInfo.cpp                    | 2020 ++--
 lib/CodeGen/TargetInfo.h                      |    6 +-
 lib/Driver/Action.cpp                         |   16 +-
 lib/Driver/Compilation.cpp                    |    5 +-
 lib/Driver/CrossWindowsToolChain.cpp          |    6 +
 lib/Driver/Driver.cpp                         |  365 +-
 lib/Driver/DriverOptions.cpp                  |    2 +-
 lib/Driver/Job.cpp                            |   32 +-
 lib/Driver/MSVCToolChain.cpp                  |  324 +-
 lib/Driver/MinGWToolChain.cpp                 |   16 +-
 lib/Driver/Multilib.cpp                       |    5 +-
 lib/Driver/SanitizerArgs.cpp                  |   67 +-
 lib/Driver/ToolChain.cpp                      |  233 +-
 lib/Driver/ToolChains.cpp                     | 1590 ++-
 lib/Driver/ToolChains.h                       |  344 +-
 lib/Driver/Tools.cpp                          | 3363 ++++--
 lib/Driver/Tools.h                            |  837 +-
 lib/Driver/Types.cpp                          |   13 +
 lib/Edit/Commit.cpp                           |    2 +-
 lib/Edit/EditedSource.cpp                     |   82 +-
 lib/Format/ContinuationIndenter.cpp           |   68 +-
 lib/Format/Encoding.h                         |    2 +-
 lib/Format/Format.cpp                         |  620 +-
 lib/Format/FormatToken.cpp                    |   23 +-
 lib/Format/FormatToken.h                      |   28 +-
 lib/Format/TokenAnnotator.cpp                 |  383 +-
 lib/Format/TokenAnnotator.h                   |   11 +
 lib/Format/UnwrappedLineFormatter.cpp         |   41 +-
 lib/Format/UnwrappedLineParser.cpp            |  126 +-
 lib/Format/UnwrappedLineParser.h              |    2 +-
 lib/Format/WhitespaceManager.cpp              |  257 +-
 lib/Format/WhitespaceManager.h                |   16 +-
 lib/Frontend/ASTMerge.cpp                     |    1 -
 lib/Frontend/ASTUnit.cpp                      |  134 +-
 lib/Frontend/CMakeLists.txt                   |    2 +
 lib/Frontend/CacheTokens.cpp                  |    3 +-
 lib/Frontend/ChainedIncludesSource.cpp        |   20 +-
 lib/Frontend/CompilerInstance.cpp             |  266 +-
 lib/Frontend/CompilerInvocation.cpp           |  267 +-
 .../CreateInvocationFromCommandLine.cpp       |    6 +-
 lib/Frontend/DependencyFile.cpp               |   77 +-
 lib/Frontend/DiagnosticRenderer.cpp           |  333 +-
 lib/Frontend/FrontendAction.cpp               |   18 +-
 lib/Frontend/FrontendActions.cpp              |   58 +-
 lib/Frontend/HeaderIncludeGen.cpp             |   64 +-
 lib/Frontend/InitHeaderSearch.cpp             |   88 +-
 lib/Frontend/InitPreprocessor.cpp             |   59 +-
 lib/Frontend/LogDiagnosticPrinter.cpp         |    4 +-
 lib/Frontend/ModuleDependencyCollector.cpp    |    9 +-
 lib/Frontend/MultiplexConsumer.cpp            |   10 -
 lib/Frontend/PCHContainerOperations.cpp       |   21 +-
 lib/Frontend/Rewrite/FrontendActions.cpp      |    2 +-
 lib/Frontend/Rewrite/InclusionRewriter.cpp    |    5 +-
 lib/Frontend/Rewrite/RewriteModernObjC.cpp    |   69 +-
 lib/Frontend/Rewrite/RewriteObjC.cpp          |   47 +-
 lib/Frontend/SerializedDiagnosticPrinter.cpp  |   74 +-
 lib/Frontend/TestModuleFileExtension.cpp      |  123 +
 lib/Frontend/TestModuleFileExtension.h        |   72 +
 lib/Frontend/TextDiagnostic.cpp               |   10 +-
 lib/Frontend/VerifyDiagnosticConsumer.cpp     |    4 +-
 lib/Headers/CMakeLists.txt                    |   14 +
 lib/Headers/Intrin.h                          |   34 +-
 lib/Headers/__clang_cuda_runtime_wrapper.h    |  216 +
 lib/Headers/__wmmintrin_aes.h                 |   10 +-
 lib/Headers/__wmmintrin_pclmul.h              |    6 +-
 lib/Headers/adxintrin.h                       |    6 +-
 lib/Headers/altivec.h                         |  747 +-
 lib/Headers/ammintrin.h                       |  108 +-
 lib/Headers/arm_acle.h                        |   16 +-
 lib/Headers/avx2intrin.h                      |  451 +-
 lib/Headers/avx512bwintrin.h                  |  390 +-
 lib/Headers/avx512dqintrin.h                  |  538 +-
 lib/Headers/avx512erintrin.h                  |   26 +-
 lib/Headers/avx512fintrin.h                   |  701 +-
 lib/Headers/avx512vlbwintrin.h                |  433 +-
 lib/Headers/avx512vldqintrin.h                |  606 +-
 lib/Headers/avx512vlintrin.h                  | 2666 ++++-
 lib/Headers/avxintrin.h                       |  186 +-
 lib/Headers/bmi2intrin.h                      |    6 +-
 lib/Headers/bmiintrin.h                       |   18 +-
 lib/Headers/emmintrin.h                       |   81 +-
 lib/Headers/f16cintrin.h                      |   26 +-
 lib/Headers/fma4intrin.h                      |    8 +-
 lib/Headers/fmaintrin.h                       |    8 +-
 lib/Headers/fxsrintrin.h                      |    2 +-
 lib/Headers/htmxlintrin.h                     |    2 +-
 lib/Headers/immintrin.h                       |   95 +-
 lib/Headers/lzcntintrin.h                     |    6 +-
 lib/Headers/mm3dnow.h                         |    6 +-
 lib/Headers/mmintrin.h                        |   48 +-
 lib/Headers/module.modulemap                  |   25 -
 lib/Headers/nmmintrin.h                       |    5 -
 lib/Headers/pmmintrin.h                       |   10 +-
 lib/Headers/popcntintrin.h                    |   18 +-
 lib/Headers/prfchwintrin.h                    |    6 +
 lib/Headers/rdseedintrin.h                    |    5 +-
 lib/Headers/rtmintrin.h                       |    2 +-
 lib/Headers/shaintrin.h                       |    8 +-
 lib/Headers/smmintrin.h                       |  181 +-
 lib/Headers/stdint.h                          |   14 +-
 lib/Headers/tbmintrin.h                       |   14 +-
 lib/Headers/tgmath.h                          |    2 +-
 lib/Headers/tmmintrin.h                       |   19 +-
 lib/Headers/wmmintrin.h                       |    9 -
 lib/Headers/x86intrin.h                       |   24 -
 lib/Headers/xmmintrin.h                       |   42 +-
 lib/Headers/xopintrin.h                       |   91 +-
 lib/Headers/xsavecintrin.h                    |   48 +
 lib/Headers/xsaveintrin.h                     |   58 +
 lib/Headers/xsaveoptintrin.h                  |   48 +
 lib/Headers/xsavesintrin.h                    |   58 +
 lib/Index/CommentToXML.cpp                    |    5 +-
 lib/Index/SimpleFormatContext.h               |   12 +-
 lib/Index/USRGeneration.cpp                   |   13 +-
 lib/Lex/HeaderSearch.cpp                      |  296 +-
 lib/Lex/Lexer.cpp                             |   25 +-
 lib/Lex/LiteralSupport.cpp                    |   45 +-
 lib/Lex/MacroInfo.cpp                         |   14 +-
 lib/Lex/ModuleMap.cpp                         |  191 +-
 lib/Lex/PPDirectives.cpp                      |   90 +-
 lib/Lex/PPExpressions.cpp                     |    6 +-
 lib/Lex/PPLexerChange.cpp                     |    7 +-
 lib/Lex/PPMacroExpansion.cpp                  |   38 +-
 lib/Lex/Pragma.cpp                            |    8 +-
 lib/Lex/PreprocessingRecord.cpp               |    2 +-
 lib/Lex/Preprocessor.cpp                      |   37 +-
 lib/Lex/TokenLexer.cpp                        |   17 +-
 lib/Parse/ParseAST.cpp                        |   17 +
 lib/Parse/ParseCXXInlineMethods.cpp           |   10 +-
 lib/Parse/ParseDecl.cpp                       |   77 +-
 lib/Parse/ParseDeclCXX.cpp                    |  365 +-
 lib/Parse/ParseExpr.cpp                       |   44 +-
 lib/Parse/ParseExprCXX.cpp                    |   55 +-
 lib/Parse/ParseObjc.cpp                       |  102 +-
 lib/Parse/ParseOpenMP.cpp                     |  377 +-
 lib/Parse/ParsePragma.cpp                     |   47 +-
 lib/Parse/ParseStmt.cpp                       |   63 +-
 lib/Parse/ParseStmtAsm.cpp                    |   40 +-
 lib/Parse/ParseTemplate.cpp                   |   13 +-
 lib/Parse/ParseTentative.cpp                  |    3 +
 lib/Parse/Parser.cpp                          |   86 +-
 lib/Rewrite/Rewriter.cpp                      |   28 +-
 lib/Sema/AnalysisBasedWarnings.cpp            |  182 +-
 lib/Sema/AttributeList.cpp                    |   31 +-
 lib/Sema/CMakeLists.txt                       |    1 +
 lib/Sema/DeclSpec.cpp                         |  120 +-
 lib/Sema/JumpDiagnostics.cpp                  |    7 +-
 lib/Sema/MultiplexExternalSemaSource.cpp      |   12 +-
 lib/Sema/ScopeInfo.cpp                        |    4 +-
 lib/Sema/Sema.cpp                             |   84 +-
 lib/Sema/SemaAccess.cpp                       |   13 +-
 lib/Sema/SemaCUDA.cpp                         |  144 +
 lib/Sema/SemaCXXScopeSpec.cpp                 |   24 +-
 lib/Sema/SemaCast.cpp                         |  110 +-
 lib/Sema/SemaChecking.cpp                     |  423 +-
 lib/Sema/SemaCodeComplete.cpp                 |   49 +-
 lib/Sema/SemaCoroutine.cpp                    |  448 +
 lib/Sema/SemaDecl.cpp                         | 1252 ++-
 lib/Sema/SemaDeclAttr.cpp                     |  695 +-
 lib/Sema/SemaDeclCXX.cpp                      |  601 +-
 lib/Sema/SemaDeclObjC.cpp                     |  172 +-
 lib/Sema/SemaExceptionSpec.cpp                |   68 +-
 lib/Sema/SemaExpr.cpp                         |  948 +-
 lib/Sema/SemaExprCXX.cpp                      |  226 +-
 lib/Sema/SemaExprMember.cpp                   |   83 +-
 lib/Sema/SemaExprObjC.cpp                     |  262 +-
 lib/Sema/SemaFixItUtils.cpp                   |    6 +-
 lib/Sema/SemaInit.cpp                         |   76 +-
 lib/Sema/SemaLambda.cpp                       |  115 +-
 lib/Sema/SemaLookup.cpp                       |  329 +-
 lib/Sema/SemaObjCProperty.cpp                 |  745 +-
 lib/Sema/SemaOpenMP.cpp                       | 2766 ++++-
 lib/Sema/SemaOverload.cpp                     |  814 +-
 lib/Sema/SemaPseudoObject.cpp                 |  323 +-
 lib/Sema/SemaStmt.cpp                         |  191 +-
 lib/Sema/SemaStmtAsm.cpp                      |  224 +-
 lib/Sema/SemaStmtAttr.cpp                     |   75 +-
 lib/Sema/SemaTemplate.cpp                     |  200 +-
 lib/Sema/SemaTemplateDeduction.cpp            |  129 +-
 lib/Sema/SemaTemplateInstantiate.cpp          |   45 +-
 lib/Sema/SemaTemplateInstantiateDecl.cpp      |   57 +-
 lib/Sema/SemaTemplateVariadic.cpp             |    5 +-
 lib/Sema/SemaType.cpp                         |  630 +-
 lib/Sema/TreeTransform.h                      |  732 +-
 lib/Serialization/ASTCommon.cpp               |  201 +-
 lib/Serialization/ASTCommon.h                 |    2 -
 lib/Serialization/ASTReader.cpp               | 2073 ++--
 lib/Serialization/ASTReaderDecl.cpp           |  435 +-
 lib/Serialization/ASTReaderInternals.h        |   99 +-
 lib/Serialization/ASTReaderStmt.cpp           |  333 +-
 lib/Serialization/ASTWriter.cpp               | 1377 ++-
 lib/Serialization/ASTWriterDecl.cpp           |  248 +-
 lib/Serialization/ASTWriterStmt.cpp           |  219 +-
 lib/Serialization/CMakeLists.txt              |    1 +
 lib/Serialization/GeneratePCH.cpp             |   16 +-
 lib/Serialization/GlobalModuleIndex.cpp       |    4 +-
 lib/Serialization/Module.cpp                  |    8 -
 lib/Serialization/ModuleFileExtension.cpp     |   22 +
 lib/Serialization/ModuleManager.cpp           |  122 +-
 lib/Serialization/MultiOnDiskHashTable.h      |  330 +
 .../Checkers/AnalyzerStatsChecker.cpp         |    2 +-
 .../Checkers/ArrayBoundChecker.cpp            |   12 +-
 .../Checkers/ArrayBoundCheckerV2.cpp          |   44 +-
 .../Checkers/BasicObjCFoundationChecks.cpp    |   77 +-
 .../Checkers/BoolAssignmentChecker.cpp        |   46 +-
 .../Checkers/BuiltinFunctionChecker.cpp       |    5 +-
 lib/StaticAnalyzer/Checkers/CMakeLists.txt    |    6 +
 .../Checkers/CStringChecker.cpp               |  199 +-
 .../Checkers/CallAndMessageChecker.cpp        |   48 +-
 .../Checkers/CastSizeChecker.cpp              |    7 +-
 .../Checkers/CastToStructChecker.cpp          |    2 +-
 .../Checkers/CheckObjCDealloc.cpp             |    4 +-
 .../Checkers/CheckSecuritySyntaxOnly.cpp      |   33 +-
 .../Checkers/CheckSizeofPointer.cpp           |    4 +-
 .../Checkers/CheckerDocumentation.cpp         |   22 +-
 lib/StaticAnalyzer/Checkers/Checkers.td       |  118 +-
 lib/StaticAnalyzer/Checkers/ChrootChecker.cpp |   14 +-
 .../Checkers/DeadStoresChecker.cpp            |   71 +-
 lib/StaticAnalyzer/Checkers/DebugCheckers.cpp |   36 +
 .../Checkers/DereferenceChecker.cpp           |   43 +-
 .../Checkers/DirectIvarAssignment.cpp         |   13 +-
 .../Checkers/DivZeroChecker.cpp               |    4 +-
 .../Checkers/DynamicTypeChecker.cpp           |  213 +
 .../Checkers/DynamicTypePropagation.cpp       |  711 +-
 .../Checkers/ExprInspectionChecker.cpp        |   62 +-
 .../Checkers/FixedAddressChecker.cpp          |    4 +-
 .../Checkers/GenericTaintChecker.cpp          |   31 +-
 .../Checkers/IdenticalExprChecker.cpp         |   25 +-
 .../Checkers/IvarInvalidationChecker.cpp      |    6 +-
 .../Checkers/LLVMConventionsChecker.cpp       |    1 -
 .../Checkers/LocalizationChecker.cpp          | 1201 +++
 .../Checkers/MacOSKeychainAPIChecker.cpp      |   33 +-
 .../Checkers/MacOSXAPIChecker.cpp             |    4 +-
 lib/StaticAnalyzer/Checkers/Makefile          |    6 +-
 lib/StaticAnalyzer/Checkers/MallocChecker.cpp |  226 +-
 .../MallocOverflowSecurityChecker.cpp         |  136 +-
 .../Checkers/MallocSizeofChecker.cpp          |    8 +-
 .../Checkers/NSAutoreleasePoolChecker.cpp     |    6 +-
 .../Checkers/NSErrorChecker.cpp               |    4 +-
 .../Checkers/NoReturnFunctionChecker.cpp      |    7 +-
 .../Checkers/NonNullParamChecker.cpp          |   42 +-
 .../Checkers/NullabilityChecker.cpp           | 1066 ++
 .../Checkers/ObjCAtSyncChecker.cpp            |    4 +-
 .../Checkers/ObjCContainersASTChecker.cpp     |    5 +-
 .../Checkers/ObjCContainersChecker.cpp        |    4 +-
 .../Checkers/ObjCMissingSuperCallChecker.cpp  |    8 +-
 .../Checkers/ObjCSelfInitChecker.cpp          |   21 +-
 .../Checkers/PaddingChecker.cpp               |  314 +
 .../Checkers/PointerArithChecker.cpp          |    6 +-
 .../Checkers/PointerSubChecker.cpp            |    6 +-
 .../Checkers/PthreadLockChecker.cpp           |   38 +-
 .../Checkers/RetainCountChecker.cpp           |  132 +-
 .../Checkers/ReturnPointerRangeChecker.cpp    |    8 +-
 .../Checkers/ReturnUndefChecker.cpp           |    2 +-
 .../Checkers/SimpleStreamChecker.cpp          |    8 +-
 .../Checkers/StackAddrEscapeChecker.cpp       |   45 +-
 lib/StaticAnalyzer/Checkers/StreamChecker.cpp |   34 +-
 .../Checkers/TaintTesterChecker.cpp           |    2 +-
 .../Checkers/TestAfterDivZeroChecker.cpp      |    2 +-
 .../Checkers/UndefBranchChecker.cpp           |    6 +-
 .../Checkers/UndefCapturedBlockVarChecker.cpp |    4 +-
 .../Checkers/UndefResultChecker.cpp           |   16 +-
 .../UndefinedArraySubscriptChecker.cpp        |    4 +-
 .../Checkers/UndefinedAssignmentChecker.cpp   |    2 +-
 .../Checkers/UnixAPIChecker.cpp               |   16 +-
 .../Checkers/UnreachableCodeChecker.cpp       |   13 +-
 .../Checkers/VLASizeChecker.cpp               |   10 +-
 lib/StaticAnalyzer/Checkers/VforkChecker.cpp  |  218 +
 .../Checkers/VirtualCallChecker.cpp           |   24 +-
 lib/StaticAnalyzer/Core/AnalysisManager.cpp   |    2 +-
 lib/StaticAnalyzer/Core/AnalyzerOptions.cpp   |   21 +-
 lib/StaticAnalyzer/Core/BlockCounter.cpp      |    8 +-
 lib/StaticAnalyzer/Core/BugReporter.cpp       |  110 +-
 .../Core/BugReporterVisitors.cpp              |   74 +-
 lib/StaticAnalyzer/Core/CMakeLists.txt        |    3 +
 lib/StaticAnalyzer/Core/CallEvent.cpp         |  120 +-
 lib/StaticAnalyzer/Core/Checker.cpp           |    2 +-
 lib/StaticAnalyzer/Core/CheckerContext.cpp    |   10 +-
 lib/StaticAnalyzer/Core/CheckerHelpers.cpp    |   24 +
 lib/StaticAnalyzer/Core/CheckerManager.cpp    |   60 +-
 lib/StaticAnalyzer/Core/CheckerRegistry.cpp   |    2 +-
 lib/StaticAnalyzer/Core/ConstraintManager.cpp |    2 +-
 lib/StaticAnalyzer/Core/CoreEngine.cpp        |    8 +-
 lib/StaticAnalyzer/Core/DynamicTypeMap.cpp    |   51 +
 lib/StaticAnalyzer/Core/Environment.cpp       |   17 +-
 lib/StaticAnalyzer/Core/ExplodedGraph.cpp     |   10 +-
 lib/StaticAnalyzer/Core/ExprEngine.cpp        |  217 +-
 lib/StaticAnalyzer/Core/ExprEngineC.cpp       |  207 +-
 lib/StaticAnalyzer/Core/ExprEngineCXX.cpp     |  188 +-
 .../Core/ExprEngineCallAndReturn.cpp          |   42 +-
 lib/StaticAnalyzer/Core/ExprEngineObjC.cpp    |  119 +-
 lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp   |   33 +-
 lib/StaticAnalyzer/Core/IssueHash.cpp         |  196 +
 lib/StaticAnalyzer/Core/LoopWidening.cpp      |   68 +
 lib/StaticAnalyzer/Core/Makefile              |    6 +-
 lib/StaticAnalyzer/Core/MemRegion.cpp         |   67 +-
 lib/StaticAnalyzer/Core/PathDiagnostic.cpp    |   40 +-
 lib/StaticAnalyzer/Core/PlistDiagnostics.cpp  |   53 +-
 lib/StaticAnalyzer/Core/ProgramState.cpp      |   75 +-
 .../Core/RangeConstraintManager.cpp           |  194 +-
 lib/StaticAnalyzer/Core/RegionStore.cpp       |  244 +-
 lib/StaticAnalyzer/Core/SValBuilder.cpp       |   46 +-
 .../Core/SimpleConstraintManager.cpp          |   67 +
 .../Core/SimpleConstraintManager.h            |   21 +
 lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp |   11 +-
 lib/StaticAnalyzer/Core/Store.cpp             |   14 +-
 lib/StaticAnalyzer/Core/SymbolManager.cpp     |   26 +-
 .../Frontend/AnalysisConsumer.cpp             |   29 +-
 .../Frontend/CheckerRegistration.cpp          |   14 +-
 lib/StaticAnalyzer/Frontend/Makefile          |    4 +-
 lib/Tooling/ArgumentsAdjusters.cpp            |   12 +-
 lib/Tooling/CommonOptionsParser.cpp           |   24 +-
 lib/Tooling/CompilationDatabase.cpp           |    6 +-
 lib/Tooling/Core/CMakeLists.txt               |    2 +
 lib/Tooling/Core/Lookup.cpp                   |  113 +
 lib/Tooling/Core/Replacement.cpp              |  198 +-
 lib/Tooling/JSONCompilationDatabase.cpp       |   93 +-
 lib/Tooling/Tooling.cpp                       |  115 +-
 runtime/CMakeLists.txt                        |  128 +-
 runtime/compiler-rt/Makefile                  |    4 +-
 test/ARCMT/GC-no-arc-runtime.m                |    3 +
 test/ARCMT/checking.m                         |    8 +-
 test/ARCMT/objcmt-subscripting-literals.m     |    2 +
 .../objcmt-subscripting-literals.m.result     |    2 +
 test/ASTMerge/codegen-exprs.c                 |    6 +-
 test/ASTMerge/exprs.c                         |    6 +-
 test/ASTMerge/function.c                      |    2 +-
 test/Analysis/DynamicTypePropagation.m        |   57 +
 .../Analysis/Inputs/system-header-simulator.h |   16 +
 .../MismatchedDeallocator-path-notes.cpp      |    6 +-
 test/Analysis/NSContainers.m                  |   22 +-
 test/Analysis/NewDelete-path-notes.cpp        |    8 +-
 test/Analysis/NoReturn.m                      |   12 +
 test/Analysis/ObjCRetSigs.m                   |    2 +-
 test/Analysis/PR24184.cpp                     |   97 +
 test/Analysis/PR2599.m                        |    2 +-
 test/Analysis/PR2978.m                        |    2 +-
 test/Analysis/analyzer-config.c               |   15 +-
 test/Analysis/analyzer-config.cpp             |   15 +-
 test/Analysis/blocks.mm                       |   75 +
 test/Analysis/bug_hash_test.cpp               | 1345 +++
 test/Analysis/bug_hash_test.m                 | 1192 +++
 test/Analysis/builtin_signbit.cpp             |   43 +
 test/Analysis/conditional-path-notes.c        |   36 +-
 test/Analysis/const-method-call.cpp           |  249 +
 test/Analysis/cxx-for-range.cpp               |   24 +-
 test/Analysis/dead-stores.cpp                 |   32 +-
 test/Analysis/dead-stores.m                   |    2 +-
 .../delayed-template-parsing-crash.cpp        |   11 +
 .../diagnostics/deref-track-symbolic-region.c |    8 +-
 .../report-issues-within-main-file.cpp        | 3101 +++---
 .../Analysis/diagnostics/undef-value-caller.c |    4 +-
 test/Analysis/diagnostics/undef-value-param.c |   12 +-
 test/Analysis/diagnostics/undef-value-param.m |    8 +-
 test/Analysis/dtor.cpp                        |   37 +-
 test/Analysis/dynamic_type_check.m            |   43 +
 test/Analysis/edges-new.mm                    |  204 +-
 test/Analysis/generics.m                      | 6629 ++++++++++++
 test/Analysis/identical-expressions.cpp       |   32 +
 test/Analysis/initializer.cpp                 |   54 +
 test/Analysis/inline-plist.c                  |   26 +-
 test/Analysis/inline-unique-reports.c         |    4 +-
 test/Analysis/inline.cpp                      |    9 +-
 .../inlining/eager-reclamation-path-notes.c   |    8 +-
 .../inlining/eager-reclamation-path-notes.cpp |    4 +-
 test/Analysis/inlining/path-notes.c           |   48 +-
 test/Analysis/inlining/path-notes.cpp         |   76 +-
 test/Analysis/inlining/path-notes.m           |   22 +-
 test/Analysis/lambda-notes.cpp                |  206 +
 test/Analysis/lambdas-generalized-capture.cpp |   50 +
 test/Analysis/lambdas.cpp                     |  336 +-
 test/Analysis/lambdas.mm                      |  130 +
 test/Analysis/localization-aggressive.m       |  266 +
 test/Analysis/localization.m                  |  207 +
 test/Analysis/loop-widening.c                 |  190 +
 test/Analysis/malloc-overflow.c               |    2 +-
 test/Analysis/malloc-overflow2.c              |   36 +
 test/Analysis/malloc-plist.c                  | 9492 +++++++++--------
 test/Analysis/malloc.c                        |   55 +-
 test/Analysis/method-call-path-notes.cpp      |   24 +-
 test/Analysis/model-file.cpp                  |  476 +-
 test/Analysis/no-unreachable-dtors.cpp        |   11 +
 test/Analysis/null-deref-path-notes.m         |   12 +-
 test/Analysis/nullability.mm                  |  289 +
 test/Analysis/nullability_nullonly.mm         |   87 +
 test/Analysis/nullptr.cpp                     |   41 +-
 test/Analysis/objc-arc.m                      |   68 +-
 test/Analysis/objc-message.m                  |   40 +
 test/Analysis/padding_c.c                     |  236 +
 test/Analysis/padding_cpp.cpp                 |  202 +
 test/Analysis/padding_message.cpp             |  185 +
 test/Analysis/plist-macros.cpp                |   32 +-
 test/Analysis/plist-output-alternate.m        |   28 +-
 test/Analysis/plist-output.m                  |   94 +-
 test/Analysis/pr22954.c                       |  916 ++
 test/Analysis/ptr-arith.c                     |   17 +
 test/Analysis/rdar-6540084.m                  |    2 +-
 test/Analysis/reinterpret-cast.cpp            |   15 +-
 test/Analysis/retain-release-gc-only.m        |    2 +-
 test/Analysis/retain-release-path-notes-gc.m  |   20 +-
 test/Analysis/retain-release-path-notes.m     |  108 +-
 test/Analysis/retain-release-region-store.m   |    2 +-
 test/Analysis/return-ptr-range.cpp            |   27 +
 test/Analysis/superclass.m                    |   21 +-
 test/Analysis/switch-case.c                   |  220 +
 test/Analysis/symbol-reaper.c                 |   76 +
 test/Analysis/temp-obj-dtors-cfg-output.cpp   |   32 +-
 test/Analysis/temporaries.cpp                 |    8 +-
 test/Analysis/ubigraph-viz.cpp                |    7 +
 test/Analysis/unix-fns.c                      |   52 +-
 test/Analysis/vfork.c                         |  114 +
 test/CMakeLists.txt                           |   11 +-
 .../basic.lookup/basic.lookup.argdep/p4.cpp   |    5 +
 .../basic.lookup.qual/namespace.qual/p2.cpp   |   14 +-
 .../basic.scope/basic.scope.hiding/p2.cpp     |   12 +-
 .../basic/basic.start/basic.start.main/p3.cpp |   66 +
 test/CXX/class.access/class.friend/p1.cpp     |    5 +-
 test/CXX/class.access/class.friend/p11.cpp    |   11 +-
 .../class.access/class.friend/p2-cxx03.cpp    |    9 +-
 test/CXX/class/class.friend/p1.cpp            |    6 +
 test/CXX/class/class.nest/p1.cpp              |    7 +-
 .../dcl.dcl/dcl.spec/dcl.spec.concept/p1.cpp  |   43 +
 .../dcl.dcl/dcl.spec/dcl.spec.concept/p2.cpp  |   13 +
 .../dcl.dcl/dcl.spec/dcl.spec.concept/p5.cpp  |   13 +
 .../basic.namespace/namespace.udecl/p1.cpp    |    7 +-
 .../CXX/dcl.dcl/dcl.spec/dcl.constexpr/p5.cpp |   14 +-
 test/CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p6.cpp |   14 +-
 test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp      |   34 +-
 .../dcl.spec/dcl.type/dcl.spec.auto/p3-1y.cpp |    6 +-
 .../dcl.spec.auto/p3-generic-lambda-1y.cpp    |    2 +-
 .../dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp |    2 +-
 .../dcl.spec/dcl.type/dcl.type.elab/p1.cpp    |   26 +
 .../dcl.spec/dcl.type/dcl.type.elab/p3.cpp    |    7 +-
 .../dcl.decl/dcl.init/dcl.init.ref/p5-var.cpp |    5 +
 test/CXX/drs/dr0xx.cpp                        |    6 +-
 test/CXX/drs/dr13xx.cpp                       |   12 +-
 test/CXX/drs/dr15xx.cpp                       |   71 +
 test/CXX/drs/dr1xx.cpp                        |    7 +-
 test/CXX/drs/dr3xx.cpp                        |    8 +-
 test/CXX/drs/dr4xx.cpp                        |   20 +-
 test/CXX/drs/dr5xx.cpp                        |   17 +-
 test/CXX/except/except.spec/p3.cpp            |    4 +-
 test/CXX/except/except.spec/p4.cpp            |    2 +-
 test/CXX/except/except.spec/p5-pointers.cpp   |    2 +-
 test/CXX/expr/expr.const/p2-0x.cpp            |    4 +-
 test/CXX/expr/expr.const/p5-0x.cpp            |    2 +-
 .../expr.prim.lambda/default-arguments.cpp    |   13 +-
 .../expr.prim/expr.prim.lambda/p11-1y.cpp     |    5 +-
 .../expr.prim/expr.prim.lambda/templates.cpp  |    2 +-
 test/CXX/expr/expr.unary/expr.unary.op/p4.cpp |    2 +-
 test/CXX/lex/lex.literal/lex.ext/p12.cpp      |    4 +-
 test/CXX/lex/lex.literal/lex.ext/p2.cpp       |   22 +-
 test/CXX/lex/lex.literal/lex.ext/p5.cpp       |    2 +-
 test/CXX/lex/lex.literal/lex.ext/p7.cpp       |    8 +-
 test/CXX/lex/lex.literal/lex.string/p4.cpp    |   17 +
 test/CXX/over/over.oper/over.literal/p2.cpp   |    2 +-
 .../p2-resolve-single-template-id.cpp         |    6 +-
 .../CXX/temp/temp.arg/temp.arg.nontype/p1.cpp |   14 +
 .../temp.class/temp.static/p1-inst.cpp        |    5 +
 .../temp.decls/temp.class/temp.static/p1.cpp  |    5 +
 .../temp.fct.spec/temp.arg.explicit/p3.cpp    |   19 +-
 test/CXX/temp/temp.param/p3.cpp               |    6 +
 test/CXX/temp/temp.res/temp.local/p3.cpp      |    3 +-
 test/CXX/temp/temp.res/temp.local/p6.cpp      |    7 +-
 test/CXX/temp/temp.spec/temp.explicit/p1.cpp  |    5 +
 test/CodeCompletion/macros-in-modules.c       |    2 +-
 test/CodeCompletion/macros-in-modules.m       |    2 +-
 test/CodeCompletion/ordinary-name-cxx11.cpp   |  252 +
 test/CodeCompletion/ordinary-name.cpp         |   10 +-
 test/CodeGen/2003-12-14-ExternInlineSupport.c |    3 +-
 test/CodeGen/2006-01-13-Includes.c            |    2 +-
 test/CodeGen/2007-05-11-str-const.c           |    2 +-
 test/CodeGen/2009-01-21-InvalidIterator.c     |    2 +-
 test/CodeGen/2009-03-13-dbg.c                 |    2 +-
 test/CodeGen/2009-04-23-dbg.c                 |    2 +-
 test/CodeGen/2009-07-31-DbgDeclare.c          |    2 +-
 test/CodeGen/2010-01-14-FnType-DebugInfo.c    |    2 +-
 test/CodeGen/2010-01-18-Inlined-Debug.c       |    2 +-
 test/CodeGen/2010-02-10-PointerName.c         |    2 +-
 test/CodeGen/2010-02-15-DbgStaticVar.c        |    2 +-
 test/CodeGen/2010-02-16-DbgScopes.c           |    2 +-
 test/CodeGen/2010-03-5-LexicalScope.c         |    2 +-
 test/CodeGen/2010-07-08-DeclDebugLineNo.c     |    2 +-
 test/CodeGen/2010-08-10-DbgConstant.c         |    2 +-
 test/CodeGen/3dnow-builtins.c                 |    2 +-
 test/CodeGen/Inputs/stdio.h                   |    2 +-
 test/CodeGen/Nontemporal.cpp                  |   48 +
 test/CodeGen/aarch64-neon-vget.c              |  348 +
 test/CodeGen/aarch64-poly64.c                 |    5 +-
 test/CodeGen/aarch64-v8.1a-neon-intrinsics.c  |  128 +
 test/CodeGen/aarch64-varargs.c                |   57 +-
 test/CodeGen/adc-builtins.c                   |    4 +-
 test/CodeGen/alias.c                          |   42 +-
 test/CodeGen/align-global-large.c             |    2 +-
 test/CodeGen/align-wasm.c                     |   13 +
 test/CodeGen/alignment.c                      |    8 +
 test/CodeGen/arm-abi-vector.c                 |  190 +-
 test/CodeGen/arm-arguments.c                  |   12 +-
 test/CodeGen/arm-eabi.c                       |   20 +
 test/CodeGen/arm-fp16-arguments.c             |   21 +
 test/CodeGen/arm-neon-misc.c                  |    8 +-
 test/CodeGen/arm-no-movt.c                    |    7 +
 test/CodeGen/arm-target-features.c            |   43 +-
 test/CodeGen/arm-v8.1a-neon-intrinsics.c      |  122 +
 test/CodeGen/arm-vector-align.c               |    4 +-
 test/CodeGen/arm64-abi-vector.c               |   68 +-
 test/CodeGen/arm64-arguments.c                |   56 +-
 test/CodeGen/arm64-be-hfa-vararg.c            |   10 +-
 test/CodeGen/arm64_vget.c                     |   13 -
 test/CodeGen/arm64_vset_lane.c                |   33 -
 test/CodeGen/arm_acle.c                       |   46 +-
 test/CodeGen/arm_function_epilog.cpp          |   17 +
 test/CodeGen/armv7k-abi.c                     |   93 +
 test/CodeGen/asm-unicode.S                    |   12 +
 test/CodeGen/asm_64.c                         |   53 +
 test/CodeGen/atomic-arm64.c                   |   17 +-
 test/CodeGen/atomic-ops-libcall.c             |   79 +
 test/CodeGen/atomic-ops.c                     |   32 +-
 test/CodeGen/atomic_ops.c                     |    3 +-
 test/CodeGen/attr-disable-tail-calls.c        |   18 +-
 test/CodeGen/attr-func-def.c                  |   18 +
 test/CodeGen/attr-minsize.cpp                 |   12 +-
 test/CodeGen/attr-no-tail.c                   |   33 +
 test/CodeGen/attr-nodebug.c                   |    2 +-
 test/CodeGen/attr-noinline.c                  |    2 +-
 test/CodeGen/attr-target-ppc.c                |    4 +
 test/CodeGen/attr-target-x86-mmx.c            |   22 +
 test/CodeGen/attr-target-x86.c                |   39 +
 test/CodeGen/attr-target.c                    |   31 -
 test/CodeGen/attributes.c                     |    2 +-
 test/CodeGen/available-externally-hidden.cpp  |    2 +-
 test/CodeGen/avx-builtins.c                   |   29 +-
 test/CodeGen/avx-cmp-builtins.c               |    1 +
 test/CodeGen/avx-shuffle-builtins.c           |    1 +
 test/CodeGen/avx2-builtins.c                  |   77 +-
 test/CodeGen/avx512bw-builtins.c              |  212 +-
 test/CodeGen/avx512cdintrin.c                 |    7 +-
 test/CodeGen/avx512dq-builtins.c              |  583 +-
 test/CodeGen/avx512er-builtins.c              |  101 +-
 test/CodeGen/avx512f-builtins.c               |  535 +-
 test/CodeGen/avx512vl-builtins.c              | 1791 +++-
 test/CodeGen/avx512vlbw-builtins.c            |  330 +-
 test/CodeGen/avx512vldq-builtins.c            |  581 +-
 test/CodeGen/bitfield-2.c                     |    2 +-
 test/CodeGen/block-byref-aggr.c               |    4 +-
 test/CodeGen/block-with-perdefinedexpr.c      |    1 +
 test/CodeGen/bmi2-builtins.c                  |   10 +-
 test/CodeGen/builtin-cpu-supports.c           |    2 +-
 test/CodeGen/builtin-unpredictable.c          |   46 +
 test/CodeGen/builtins-arm.c                   |   38 +
 test/CodeGen/builtins-arm64.c                 |    5 +
 test/CodeGen/builtins-nvptx.c                 |   30 +-
 test/CodeGen/builtins-overflow.c              |  165 +
 test/CodeGen/builtins-ppc-altivec.c           |   72 +
 test/CodeGen/builtins-ppc-crypto.c            |  119 +-
 test/CodeGen/builtins-ppc-p7.c                |   10 +-
 test/CodeGen/builtins-ppc-p8vector.c          |  479 +-
 test/CodeGen/builtins-ppc.c                   |    7 +
 test/CodeGen/builtins-wasm.c                  |   16 +
 test/CodeGen/builtins-x86.c                   |   45 +-
 test/CodeGen/c-strings.c                      |    5 +
 test/CodeGen/c-unicode.c                      |    8 +
 test/CodeGen/c11atomics-ios.c                 |  139 +-
 test/CodeGen/c11atomics.c                     |  130 +-
 test/CodeGen/captured-statements.c            |    6 +-
 test/CodeGen/catch-undef-behavior.c           |   39 +-
 test/CodeGen/cfi-icall-cross-dso.c            |   49 +
 test/CodeGen/cfi-icall.c                      |   20 +
 test/CodeGen/cleanup-destslot-simple.c        |    6 +-
 test/CodeGen/complex-convert.c                |    2 +
 test/CodeGen/complex-math.c                   |    4 +
 test/CodeGen/debug-info-257-args.c            |    8 +-
 test/CodeGen/debug-info-args.c                |    2 +-
 test/CodeGen/debug-info-block-decl.c          |    4 +-
 test/CodeGen/debug-info-block-out-return.c    |    6 +-
 test/CodeGen/debug-info-block.c               |    2 +-
 test/CodeGen/debug-info-compilation-dir.c     |    4 +-
 test/CodeGen/debug-info-crash.c               |    2 +-
 test/CodeGen/debug-info-enum.c                |    2 +-
 test/CodeGen/debug-info-gline-tables-only.c   |    5 +-
 test/CodeGen/debug-info-gline-tables-only2.c  |    2 +-
 test/CodeGen/debug-info-line.c                |    2 +-
 test/CodeGen/debug-info-line2.c               |    2 +-
 test/CodeGen/debug-info-line3.c               |    2 +-
 test/CodeGen/debug-info-member.c              |    2 +-
 test/CodeGen/debug-info-packed-struct.c       |    2 +-
 test/CodeGen/debug-info-same-line.c           |    2 +-
 test/CodeGen/debug-info-scope-file.c          |    6 +-
 test/CodeGen/debug-info-scope.c               |   14 +-
 test/CodeGen/debug-info-static.c              |    2 +-
 test/CodeGen/debug-info-typedef.c             |    2 +-
 test/CodeGen/debug-info-vector.c              |    2 +-
 test/CodeGen/debug-info-vla.c                 |    4 +-
 test/CodeGen/debug-info.c                     |    2 +-
 test/CodeGen/debug-line-1.c                   |    2 +-
 test/CodeGen/debug-prefix-map.c               |   34 +
 test/CodeGen/dwarf-version.c                  |   11 +
 test/CodeGen/enable_if.c                      |   82 +
 test/CodeGen/enum2.c                          |    2 +-
 test/CodeGen/exceptions-seh-finally.c         |   36 +-
 test/CodeGen/exceptions-seh-leave.c           |   80 +-
 test/CodeGen/exceptions-seh.c                 |  172 +-
 test/CodeGen/exprs.c                          |    7 +-
 test/CodeGen/ext-vector-member-alignment.c    |   14 +-
 test/CodeGen/f16c-builtins.c                  |    6 +-
 test/CodeGen/fma-builtins.c                   |    2 +-
 test/CodeGen/fma4-builtins.c                  |   34 +-
 test/CodeGen/fp-contract-pragma.cpp           |   16 +-
 test/CodeGen/fsgsbase-builtins.c              |    2 +-
 test/CodeGen/function-attributes.c            |   37 +-
 test/CodeGen/global-blocks-lines.c            |    2 +-
 .../hidden-alias-to-internal-function.c       |    2 +-
 test/CodeGen/inline.c                         |    2 +-
 test/CodeGen/le32-arguments.c                 |    6 +-
 test/CodeGen/libcalls-fno-builtin.c           |    1 -
 test/CodeGen/lifetime-debuginfo-1.c           |    2 +-
 test/CodeGen/lifetime-debuginfo-2.c           |    2 +-
 test/CodeGen/lineno-dbginfo.c                 |    2 +-
 test/CodeGen/linetable-endscope.c             |    2 +-
 test/CodeGen/link-bitcode-file.c              |   15 +-
 test/CodeGen/long_double_fp128.cpp            |    4 +
 test/CodeGen/lzcnt-builtins.c                 |    2 +-
 test/CodeGen/mangle-ms.c                      |    4 +
 test/CodeGen/mingw-long-double.c              |   41 +
 test/CodeGen/mips-inline-asm-abi.c            |   12 +
 test/CodeGen/mips-interrupt-attr.c            |   64 +
 test/CodeGen/mips-unsupported-nan.c           |   46 +-
 test/CodeGen/mips-varargs.c                   |  220 +-
 test/CodeGen/mmx-builtins.c                   |  925 +-
 test/CodeGen/ms-declspecs.c                   |    5 +
 test/CodeGen/ms-inline-asm-align.c            |   30 +
 test/CodeGen/ms-inline-asm.c                  |   94 +-
 test/CodeGen/ms-mm-align.c                    |   16 +
 test/CodeGen/ms_abi.c                         |  137 +-
 test/CodeGen/ms_struct-pack.c                 |   14 +-
 test/CodeGen/ms_this.cpp                      |   57 +
 test/CodeGen/mult-alt-x86.c                   |   12 +-
 test/CodeGen/named_reg_global.c               |   22 +-
 test/CodeGen/nvptx-abi.c                      |   10 +-
 test/CodeGen/nvptx-inlineasm-ptx.c            |    8 +-
 test/CodeGen/object-size.c                    |  379 +-
 test/CodeGen/object-size.cpp                  |   64 +
 test/CodeGen/overloadable.c                   |    3 +
 test/CodeGen/override-layout.c                |    5 +-
 test/CodeGen/packed-arrays.c                  |    6 +-
 test/CodeGen/packed-structure.c               |    4 +-
 test/CodeGen/pass-object-size.c               |  353 +
 test/CodeGen/pclmul-builtins.c                |    2 +-
 test/CodeGen/popcnt-builtins.c                |   16 +-
 test/CodeGen/ppc-sfvarargs.c                  |   17 +
 test/CodeGen/ppc-varargs-struct.c             |  132 +-
 test/CodeGen/ppc64-align-struct.c             |   62 +-
 test/CodeGen/ppc64-complex-parms.c            |   60 +-
 test/CodeGen/ppc64-struct-onefloat.c          |   24 +-
 test/CodeGen/ppc64-varargs-complex.c          |   58 +-
 test/CodeGen/ppc64le-varargs-complex.c        |   50 +-
 test/CodeGen/pragma-comment.c                 |    1 +
 test/CodeGen/pragma-weak.c                    |   30 +-
 test/CodeGen/prefetchw-builtins.c             |   10 +-
 test/CodeGen/redefine_extname.c               |    6 +
 test/CodeGen/rtm-builtins.c                   |    2 +-
 .../sanitize-address-field-padding.cpp        |    2 +
 test/CodeGen/sanitize-blocks.c                |   13 +
 test/CodeGen/sanitize-trap.c                  |   13 +-
 test/CodeGen/sha-builtins.c                   |    2 +-
 test/CodeGen/sparc-arguments.c                |   27 +
 test/CodeGen/sparcv9-abi.c                    |   12 +-
 test/CodeGen/sse-builtins-dbg.c               |    2 +-
 test/CodeGen/sse-builtins.c                   |  134 +-
 test/CodeGen/sse.c                            |    6 +-
 test/CodeGen/sse2-builtins.c                  | 1105 ++
 test/CodeGen/sse3-builtins.c                  |   72 +
 test/CodeGen/sse41-builtins.c                 |  372 +
 test/CodeGen/sse42-builtins.c                 |  139 +
 test/CodeGen/sse4a-builtins.c                 |   43 +-
 test/CodeGen/ssse3-builtins.c                 |  108 +
 test/CodeGen/stackrealign.c                   |    8 +
 test/CodeGen/string-literal-short-wstring.c   |    4 +
 test/CodeGen/string-literal.c                 |   24 +
 test/CodeGen/target-builtin-error-2.c         |   13 +
 test/CodeGen/target-builtin-error.c           |    8 +
 test/CodeGen/target-builtin-noerror.c         |   44 +
 test/CodeGen/target-data.c                    |   10 +-
 test/CodeGen/target-features-error-2.c        |    7 +
 test/CodeGen/target-features-error.c          |    8 +
 test/CodeGen/target-features-no-error.c       |    9 +
 test/CodeGen/tbaa-class.cpp                   |   60 +-
 test/CodeGen/tbaa.cpp                         |   76 +-
 test/CodeGen/tbm-builtins.c                   |    3 +
 test/CodeGen/thinlto_backend.c                |   14 +
 test/CodeGen/tls-model.c                      |   16 +
 test/CodeGen/ubsan-conditional.c              |   10 +
 test/CodeGen/ubsan-type-blacklist.cpp         |    2 +-
 test/CodeGen/vector-alignment.c               |   49 +-
 test/CodeGen/vector.c                         |    2 +-
 test/CodeGen/vectorcall.c                     |    8 +-
 test/CodeGen/vld_dup.c                        |    8 +-
 test/CodeGen/wasm-arguments.c                 |   93 +
 test/CodeGen/wasm-regparm.c                   |    4 +
 test/CodeGen/x86-soft-float.c                 |    6 +
 test/CodeGen/x86_32-arguments-iamcu.c         |   69 +
 test/CodeGen/x86_32-xsave.c                   |   72 +
 test/CodeGen/x86_64-arguments.c               |    3 +-
 .../{x86_64-fp128.c => x86_64-longdouble.c}   |   28 +-
 test/CodeGen/x86_64-profiling-keep-fp.c       |   14 +
 test/CodeGen/x86_64-xsave.c                   |  120 +
 test/CodeGen/xcore-abi.c                      |   14 +-
 test/CodeGen/xop-builtins.c                   |   66 +-
 test/CodeGenCUDA/Inputs/device-code-2.ll      |   16 +
 test/CodeGenCUDA/Inputs/device-code.ll        |   38 +
 test/CodeGenCUDA/address-spaces.cu            |   20 +-
 test/CodeGenCUDA/device-vtable.cu             |   61 +
 test/CodeGenCUDA/filter-decl.cu               |    8 +-
 test/CodeGenCUDA/function-overload.cu         |  214 +
 test/CodeGenCUDA/link-device-bitcode.cu       |   70 +
 test/CodeGenCUDA/ptx-kernels.cu               |   10 +
 test/CodeGenCXX/2006-11-20-GlobalSymbols.cpp  |    2 +-
 test/CodeGenCXX/2007-01-02-UnboundedArray.cpp |    2 +-
 test/CodeGenCXX/2009-03-17-dbg.cpp            |    2 +-
 test/CodeGenCXX/2009-04-23-bool2.cpp          |    2 +-
 test/CodeGenCXX/2009-06-16-DebugInfoCrash.cpp |    2 +-
 test/CodeGenCXX/2010-03-09-AnonAggregate.cpp  |    2 +-
 test/CodeGenCXX/2010-05-10-Var-DbgInfo.cpp    |    2 +-
 .../CodeGenCXX/2010-05-12-PtrToMember-Dbg.cpp |    4 +-
 test/CodeGenCXX/2010-06-21-LocalVarDbg.cpp    |    2 +-
 test/CodeGenCXX/2010-06-22-BitfieldInit.cpp   |    2 +-
 test/CodeGenCXX/2010-06-22-ZeroBitfield.cpp   |    2 +-
 test/CodeGenCXX/2010-07-23-DeclLoc.cpp        |    2 +-
 test/CodeGenCXX/PR20038.cpp                   |    6 +-
 test/CodeGenCXX/PR24289.cpp                   |   82 +
 test/CodeGenCXX/alignment.cpp                 |  299 +
 test/CodeGenCXX/arm.cpp                       |   26 +-
 test/CodeGenCXX/armv7k.cpp                    |   68 +
 test/CodeGenCXX/attr-disable-tail-calls.cpp   |   35 +
 test/CodeGenCXX/attr-notail.cpp               |   17 +
 test/CodeGenCXX/attr.cpp                      |    2 +-
 .../CodeGenCXX/attribute_internal_linkage.cpp |   79 +
 test/CodeGenCXX/cast-to-ref-bool.cpp          |    9 +
 test/CodeGenCXX/catch-undef-behavior.cpp      |   39 +-
 test/CodeGenCXX/cfi-blacklist.cpp             |   30 +
 test/CodeGenCXX/cfi-cast.cpp                  |   32 +-
 test/CodeGenCXX/cfi-cross-dso.cpp             |   45 +
 test/CodeGenCXX/cfi-icall.cpp                 |   23 +
 test/CodeGenCXX/cfi-ms-rtti.cpp               |    4 +-
 test/CodeGenCXX/cfi-nvcall.cpp                |    8 +-
 test/CodeGenCXX/cfi-vcall.cpp                 |   68 +-
 test/CodeGenCXX/const-init-cxx11.cpp          |    1 +
 test/CodeGenCXX/constructor-alias.cpp         |    2 +-
 .../constructor-destructor-return-this.cpp    |    2 +
 test/CodeGenCXX/cp-blocks-linetables.cpp      |    2 +-
 test/CodeGenCXX/crash.cpp                     |    3 +-
 test/CodeGenCXX/ctor-dtor-alias.cpp           |    2 +-
 test/CodeGenCXX/ctor-globalopt.cpp            |    6 +-
 .../cxx0x-initializer-stdinitializerlist.cpp  |    2 +-
 .../cxx11-initializer-array-new.cpp           |   10 +-
 .../cxx11-thread-local-reference.cpp          |   17 +-
 test/CodeGenCXX/cxx11-thread-local.cpp        |   85 +-
 test/CodeGenCXX/debug-info-access.cpp         |    2 +-
 test/CodeGenCXX/debug-info-anon-namespace.cpp |   26 +
 .../CodeGenCXX/debug-info-anon-union-vars.cpp |   23 +-
 test/CodeGenCXX/debug-info-artificial-arg.cpp |    2 +-
 test/CodeGenCXX/debug-info-blocks.cpp         |    2 +-
 test/CodeGenCXX/debug-info-char16.cpp         |    2 +-
 test/CodeGenCXX/debug-info-class-nolimit.cpp  |    8 +-
 .../debug-info-codeview-display-name.cpp      |   73 +
 test/CodeGenCXX/debug-info-context.cpp        |    2 +-
 test/CodeGenCXX/debug-info-cxx0x.cpp          |    2 +-
 test/CodeGenCXX/debug-info-cxx1y.cpp          |    2 +-
 test/CodeGenCXX/debug-info-decl-nested.cpp    |    2 +-
 test/CodeGenCXX/debug-info-determinism.cpp    |    4 +-
 test/CodeGenCXX/debug-info-dup-fwd-decl.cpp   |    2 +-
 test/CodeGenCXX/debug-info-enum-class.cpp     |    2 +-
 test/CodeGenCXX/debug-info-enum.cpp           |    2 +-
 test/CodeGenCXX/debug-info-explicit-cast.cpp  |   46 +
 test/CodeGenCXX/debug-info-flex-member.cpp    |    2 +-
 .../debug-info-function-context.cpp           |    2 +-
 test/CodeGenCXX/debug-info-fwd-ref.cpp        |    2 +-
 .../debug-info-gline-tables-only.cpp          |    2 +-
 .../debug-info-global-ctor-dtor.cpp           |    4 +-
 test/CodeGenCXX/debug-info-global.cpp         |    2 +-
 test/CodeGenCXX/debug-info-globalinit.cpp     |    2 +-
 .../debug-info-indirect-field-decl.cpp        |    2 +-
 test/CodeGenCXX/debug-info-large-constant.cpp |    2 +-
 test/CodeGenCXX/debug-info-limited.cpp        |    3 +-
 test/CodeGenCXX/debug-info-line-if.cpp        |    2 +-
 test/CodeGenCXX/debug-info-line.cpp           |    4 +-
 test/CodeGenCXX/debug-info-method-nodebug.cpp |    2 +-
 test/CodeGenCXX/debug-info-method.cpp         |    9 +-
 test/CodeGenCXX/debug-info-method2.cpp        |    2 +-
 test/CodeGenCXX/debug-info-namespace.cpp      |   13 +-
 test/CodeGenCXX/debug-info-nullptr.cpp        |    2 +-
 .../debug-info-ptr-to-member-function.cpp     |    4 +-
 test/CodeGenCXX/debug-info-qualifiers.cpp     |    6 +-
 test/CodeGenCXX/debug-info-rvalue-ref.cpp     |    4 +-
 test/CodeGenCXX/debug-info-scope.cpp          |   18 +-
 test/CodeGenCXX/debug-info-static-fns.cpp     |    6 +-
 ...-info-template-explicit-specialization.cpp |    4 +-
 test/CodeGenCXX/debug-info-template-fwd.cpp   |    2 +-
 test/CodeGenCXX/debug-info-template-limit.cpp |    2 +-
 .../CodeGenCXX/debug-info-template-member.cpp |    3 +-
 ...g-info-template-partial-specialization.cpp |    2 +-
 test/CodeGenCXX/debug-info-template-quals.cpp |    2 +-
 .../debug-info-template-recursive.cpp         |    2 +-
 test/CodeGenCXX/debug-info-thunk.cpp          |    2 +-
 test/CodeGenCXX/debug-info-union-template.cpp |    2 +-
 test/CodeGenCXX/debug-info-union.cpp          |    2 +-
 test/CodeGenCXX/debug-info-use-after-free.cpp |    2 +-
 test/CodeGenCXX/debug-info-uuid.cpp           |    4 +-
 test/CodeGenCXX/debug-info-varargs.cpp        |    4 +-
 test/CodeGenCXX/debug-info-vtable-optzn.cpp   |    4 +-
 test/CodeGenCXX/debug-info-wchar.cpp          |    2 +-
 test/CodeGenCXX/debug-info-windows-dtor.cpp   |    8 +-
 test/CodeGenCXX/debug-info.cpp                |   16 +-
 test/CodeGenCXX/debug-lambda-expressions.cpp  |   10 +-
 test/CodeGenCXX/debug-lambda-this.cpp         |    2 +-
 .../default-destructor-synthesis.cpp          |   38 -
 test/CodeGenCXX/delete-two-arg.cpp            |    4 +-
 test/CodeGenCXX/destructor-debug-info.cpp     |    2 +-
 test/CodeGenCXX/destructors.cpp               |    2 +-
 test/CodeGenCXX/dllexport-alias.cpp           |    4 +-
 test/CodeGenCXX/dllexport-members.cpp         |   18 +-
 test/CodeGenCXX/dllexport.cpp                 |   52 +-
 test/CodeGenCXX/dllimport-rtti.cpp            |    4 +-
 test/CodeGenCXX/dllimport.cpp                 |   48 +-
 test/CodeGenCXX/duplicate-mangled-name.cpp    |   36 +-
 test/CodeGenCXX/enable_if.cpp                 |    9 +
 test/CodeGenCXX/exceptions-cxx-new.cpp        |   77 +
 .../exceptions-seh-filter-captures.cpp        |   11 +-
 test/CodeGenCXX/exceptions-seh.cpp            |   34 +-
 test/CodeGenCXX/exceptions.cpp                |   22 +-
 test/CodeGenCXX/extern-c.cpp                  |    4 +-
 .../CodeGenCXX/funcattrs-global-ctor-dtor.cpp |   12 +
 test/CodeGenCXX/globalinit-loc.cpp            |    4 +-
 test/CodeGenCXX/homogeneous-aggregates.cpp    |    2 +-
 test/CodeGenCXX/init-invariant.cpp            |    2 +-
 test/CodeGenCXX/inline-dllexport-member.cpp   |    6 +-
 test/CodeGenCXX/inline-functions.cpp          |   50 +-
 test/CodeGenCXX/invariant.group-for-vptrs.cpp |   74 +
 test/CodeGenCXX/lambda-expressions.cpp        |    2 +-
 test/CodeGenCXX/linetable-cleanup.cpp         |    2 +-
 test/CodeGenCXX/linetable-eh.cpp              |    2 +-
 test/CodeGenCXX/linetable-fnbegin.cpp         |    8 +-
 .../CodeGenCXX/linetable-virtual-variadic.cpp |    8 +-
 test/CodeGenCXX/lpad-linetable.cpp            |    2 +-
 test/CodeGenCXX/main-norecurse.cpp            |    8 +
 test/CodeGenCXX/mangle-literal-suffix.cpp     |    6 +-
 test/CodeGenCXX/mangle-ms-cxx11.cpp           |   43 +
 test/CodeGenCXX/mangle-ms-cxx14.cpp           |   29 +-
 test/CodeGenCXX/mangle-ms-vector-types.cpp    |    6 +-
 test/CodeGenCXX/mangle-ms.cpp                 |   65 +
 test/CodeGenCXX/mangle-variadic-templates.cpp |   12 +-
 test/CodeGenCXX/member-alignment.cpp          |   15 +-
 test/CodeGenCXX/member-function-pointers.cpp  |    2 +
 test/CodeGenCXX/member-initializers.cpp       |   35 -
 test/CodeGenCXX/microsoft-abi-arg-order.cpp   |   10 +-
 .../microsoft-abi-array-cookies.cpp           |    8 +-
 .../CodeGenCXX/microsoft-abi-dynamic-cast.cpp |   25 +-
 test/CodeGenCXX/microsoft-abi-eh-catch.cpp    |   92 +-
 test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp |   65 +-
 .../CodeGenCXX/microsoft-abi-eh-terminate.cpp |   10 +-
 .../microsoft-abi-member-pointers.cpp         |   50 +
 .../microsoft-abi-structors-alias.cpp         |   18 +-
 test/CodeGenCXX/microsoft-abi-structors.cpp   |   38 +-
 .../microsoft-abi-thread-safe-statics.cpp     |   14 +-
 test/CodeGenCXX/microsoft-abi-try-throw.cpp   |   14 +-
 test/CodeGenCXX/microsoft-abi-vftables.cpp    |    6 +-
 ...soft-abi-virtual-inheritance-vtordisps.cpp |    6 +-
 .../microsoft-abi-virtual-inheritance.cpp     |   30 +-
 ...crosoft-abi-vtables-single-inheritance.cpp |   15 +
 ...-vtables-virtual-inheritance-vtordisps.cpp |    4 +-
 test/CodeGenCXX/microsoft-compatibility.cpp   |   10 +
 test/CodeGenCXX/ms-inline-asm-fields.cpp      |   31 +
 .../ms-integer-static-data-members.cpp        |    4 +-
 test/CodeGenCXX/ms-property.cpp               |  113 +
 test/CodeGenCXX/new-alias.cpp                 |    2 +-
 test/CodeGenCXX/new.cpp                       |   11 +-
 test/CodeGenCXX/observe-noexcept.cpp          |   47 +
 test/CodeGenCXX/partial-destruction.cpp       |   31 +
 test/CodeGenCXX/partial-init.cpp              |   27 +
 test/CodeGenCXX/pass-object-size.cpp          |   27 +
 test/CodeGenCXX/pointers-to-data-members.cpp  |   38 -
 test/CodeGenCXX/pragma-loop-safety.cpp        |   33 +-
 test/CodeGenCXX/pragma-loop.cpp               |   60 +-
 test/CodeGenCXX/pragma-unroll.cpp             |   23 +-
 test/CodeGenCXX/redefine_extname.cpp          |    6 +
 test/CodeGenCXX/sanitize-dtor-bit-field.cpp   |   84 +
 test/CodeGenCXX/sanitize-dtor-callback.cpp    |   69 +-
 .../sanitize-dtor-derived-class.cpp           |   71 +
 .../CodeGenCXX/sanitize-dtor-fn-attribute.cpp |   43 +
 .../sanitize-dtor-nontrivial-virtual-base.cpp |   82 +
 .../sanitize-dtor-repress-aliasing.cpp        |   30 +
 test/CodeGenCXX/sanitize-dtor-tail-call.cpp   |   23 +
 test/CodeGenCXX/sanitize-dtor-trivial.cpp     |   15 +
 test/CodeGenCXX/sanitize-dtor-vtable.cpp      |   47 +
 test/CodeGenCXX/sanitize-no-dtor-callback.cpp |   23 +
 test/CodeGenCXX/scoped-enums-debug-info.cpp   |    2 +-
 test/CodeGenCXX/sel-address.mm                |   14 -
 test/CodeGenCXX/static-init-wasm.cpp          |   54 +
 test/CodeGenCXX/static-init.cpp               |   23 +-
 test/CodeGenCXX/strict-vtable-pointers.cpp    |  219 +
 test/CodeGenCXX/thunks.cpp                    |   30 +-
 test/CodeGenCXX/tls-init-funcs.cpp            |   10 +-
 test/CodeGenCXX/typeid-cxx11.cpp              |    4 +-
 test/CodeGenCXX/uncopyable-args.cpp           |    6 +-
 test/CodeGenCXX/vararg-non-pod-ms-compat.cpp  |    2 +-
 test/CodeGenCXX/virtual-base-ctor.cpp         |    2 +-
 test/CodeGenCXX/virtual-destructor-calls.cpp  |    2 +-
 test/CodeGenCXX/visibility.cpp                |    3 +
 test/CodeGenCXX/vtable-assume-load.cpp        |  313 +
 .../vtable-available-externally.cpp           |  241 +-
 .../vtable-holder-self-reference.cpp          |    2 +-
 test/CodeGenCXX/vtable-key-function-ios.cpp   |    3 +
 .../vtable-key-function-win-comdat.cpp        |   25 +
 test/CodeGenCXX/vtable-linkage.cpp            |   11 +-
 test/CodeGenCXX/warn-padded-packed.cpp        |    2 +-
 test/CodeGenCXX/wasm-args-returns.cpp         |  100 +
 test/CodeGenCXX/x86_64-arguments.cpp          |    9 +
 .../2009-01-21-invalid-debug-info.m           |    2 +-
 test/CodeGenObjC/2010-02-09-DbgSelf.m         |    4 +-
 test/CodeGenObjC/2010-02-15-Dbg-MethodStart.m |    2 +-
 test/CodeGenObjC/2010-02-23-DbgInheritance.m  |    2 +-
 test/CodeGenObjC/arc-blocks.m                 |   35 +-
 .../arc-captured-32bit-block-var-layout-2.m   |   13 +-
 .../arc-captured-32bit-block-var-layout.m     |   32 +-
 .../arc-captured-block-var-inlined-layout.m   |   34 +-
 .../arc-captured-block-var-layout.m           |   32 +-
 test/CodeGenObjC/arc-foreach.m                |    4 +-
 test/CodeGenObjC/arc-ivar-layout.m            |   36 +-
 test/CodeGenObjC/arc-linetable-autorelease.m  |    2 +-
 test/CodeGenObjC/arc-linetable.m              |   13 +-
 test/CodeGenObjC/arc-literals.m               |   14 +-
 .../arc-loadweakretained-release.m            |    2 +-
 test/CodeGenObjC/arc-precise-lifetime.m       |  155 +-
 test/CodeGenObjC/arc-weak.m                   |   20 +
 test/CodeGenObjC/arc.ll                       |   27 +
 test/CodeGenObjC/arc.m                        |    6 +-
 test/CodeGenObjC/attr-noreturn.m              |   99 +
 test/CodeGenObjC/block-byref-debuginfo.m      |    2 +-
 test/CodeGenObjC/block-var-layout.m           |    2 +-
 test/CodeGenObjC/blocks-ivar-debug.m          |    2 +-
 test/CodeGenObjC/blocks.m                     |    2 +-
 test/CodeGenObjC/catch-lexical-block.m        |    4 +-
 .../debug-info-block-captured-self.m          |   13 +-
 test/CodeGenObjC/debug-info-block-helper.m    |    2 +-
 test/CodeGenObjC/debug-info-block-line.m      |    2 +-
 test/CodeGenObjC/debug-info-block-type.m      |    2 +-
 test/CodeGenObjC/debug-info-blocks.m          |   10 +-
 test/CodeGenObjC/debug-info-class-extension.m |    2 +-
 .../CodeGenObjC/debug-info-class-extension2.m |    2 +-
 .../CodeGenObjC/debug-info-class-extension3.m |    2 +-
 test/CodeGenObjC/debug-info-crash-2.m         |    2 +-
 test/CodeGenObjC/debug-info-crash.m           |    2 +-
 .../debug-info-default-synth-ivar.m           |    2 +-
 test/CodeGenObjC/debug-info-getter-name.m     |    2 +-
 .../CodeGenObjC/debug-info-id-with-protocol.m |    6 +-
 test/CodeGenObjC/debug-info-impl.m            |    2 +-
 test/CodeGenObjC/debug-info-instancetype.m    |    2 +-
 test/CodeGenObjC/debug-info-ivars-extension.m |    2 +-
 test/CodeGenObjC/debug-info-ivars-indirect.m  |    2 +-
 test/CodeGenObjC/debug-info-ivars-private.m   |    2 +-
 test/CodeGenObjC/debug-info-ivars.m           |    2 +-
 test/CodeGenObjC/debug-info-lifetime-crash.m  |    6 +-
 test/CodeGenObjC/debug-info-linkagename.m     |    2 +-
 test/CodeGenObjC/debug-info-nested-blocks.m   |    2 +-
 .../debug-info-property-accessors.m           |    2 +-
 .../debug-info-property-class-extension.m     |   48 +
 test/CodeGenObjC/debug-info-property.m        |    2 +-
 test/CodeGenObjC/debug-info-property2.m       |    2 +-
 test/CodeGenObjC/debug-info-property3.m       |    2 +-
 test/CodeGenObjC/debug-info-property4.m       |    2 +-
 test/CodeGenObjC/debug-info-property5.m       |    2 +-
 test/CodeGenObjC/debug-info-pubtypes.m        |    2 +-
 test/CodeGenObjC/debug-info-selector.m        |    2 +-
 test/CodeGenObjC/debug-info-self.m            |    8 +-
 test/CodeGenObjC/debug-info-static-var.m      |    2 +-
 test/CodeGenObjC/debug-info-synthesis.m       |    2 +-
 test/CodeGenObjC/debug-info-variadic-method.m |    2 +-
 test/CodeGenObjC/debug-property-synth.m       |    2 +-
 test/CodeGenObjC/debuginfo-properties.m       |    2 +-
 test/CodeGenObjC/encode-test.m                |    3 +
 test/CodeGenObjC/exceptions.m                 |    2 +-
 test/CodeGenObjC/fragile-arc.m                |  175 +
 .../CodeGenObjC/ivar-base-as-invariant-load.m |    6 +-
 test/CodeGenObjC/ivar-invariant.m             |    8 +-
 test/CodeGenObjC/ivar-layout-64.m             |   47 +
 test/CodeGenObjC/layout-bitfield-crash.m      |    2 +-
 test/CodeGenObjC/local-static-block.m         |   10 +-
 test/CodeGenObjC/mrc-weak.m                   |  191 +
 .../mrr-captured-block-var-inlined-layout.m   |   23 +-
 test/CodeGenObjC/objc-fixed-enum.m            |   10 +-
 test/CodeGenObjC/objc-literal-tests.m         |    2 +-
 test/CodeGenObjC/objc2-weak-ivar-debug.m      |    8 +-
 test/CodeGenObjC/optimize-ivar-offset-load.m  |    4 +-
 test/CodeGenObjC/parameterized_classes.m      |    7 +
 test/CodeGenObjC/property-dbg.m               |    2 +-
 test/CodeGenObjC/property-list-in-extension.m |   47 +
 test/CodeGenObjC/selector-ref-invariance.m    |    2 +-
 test/CodeGenObjC/stret-1.m                    |   15 +-
 test/CodeGenObjCXX/arc-cxx11-init-list.mm     |    2 -
 test/CodeGenObjCXX/arc-exceptions.mm          |   33 +
 test/CodeGenObjCXX/arc-new-delete.mm          |   27 +-
 test/CodeGenObjCXX/arc-weak.mm                |   34 +
 test/CodeGenObjCXX/arc.mm                     |   10 +
 test/CodeGenObjCXX/block-var-layout.mm        |    2 +-
 test/CodeGenObjCXX/blocks.mm                  |   24 +
 test/CodeGenObjCXX/debug-info-cyclic.mm       |    2 +-
 test/CodeGenObjCXX/debug-info-line.mm         |    2 +-
 test/CodeGenObjCXX/debug-info.mm              |    2 +-
 test/CodeGenObjCXX/designated-initializers.mm |    8 +
 test/CodeGenObjCXX/exception-cxx.mm           |   13 +
 test/CodeGenObjCXX/literals.mm                |    8 +-
 test/CodeGenObjCXX/mrc-weak.mm                |  183 +
 test/CodeGenObjCXX/nested-ehlocation.mm       |    2 +-
 test/CodeGenObjCXX/personality-abuse.mm       |   19 +
 .../pr14474-gline-tables-only.mm              |    2 +-
 test/CodeGenObjCXX/property-lvalue-capture.mm |    6 +-
 .../property-object-conditional-exp.mm        |    4 +-
 .../property-object-reference-2.mm            |    4 +-
 test/CodeGenObjCXX/property-objects.mm        |  134 +-
 test/CodeGenObjCXX/selector-expr-lvalue.mm    |   23 +-
 test/CodeGenOpenCL/address-spaces.cl          |   54 +-
 test/CodeGenOpenCL/bool_cast.cl               |   27 +
 test/Coverage/codegen-next.m                  |    2 +-
 test/Coverage/codegen.c                       |    4 +-
 test/Coverage/targets.c                       |   32 +-
 test/CoverageMapping/decl.c                   |   15 +
 test/CoverageMapping/ir.c                     |    2 +-
 test/CoverageMapping/trymacro.cpp             |   24 +
 test/CoverageMapping/unused_names.c           |    8 +-
 .../Inputs/CUDA/usr/local/cuda/include/.keep  |    0
 .../Inputs/CUDA/usr/local/cuda/lib/.keep      |    0
 .../Inputs/CUDA/usr/local/cuda/lib64/.keep    |    0
 .../nvvm/libdevice/libdevice.compute_20.10.bc |    0
 .../nvvm/libdevice/libdevice.compute_35.10.bc |    0
 .../Windows/ARM/8.1/usr/bin/ld.lld-link2      |    0
 .../basic_linux_libcxxv2_tree/usr/bin/.keep   |    0
 .../usr/include/c++/v1/.keep                  |    0
 .../usr/include/c++/v2/.keep                  |    0
 .../basic_linux_libcxxv2_tree/usr/lib/.keep   |    0
 .../usr/bin/.keep                             |    0
 .../usr/include/c++/4.8/backward/.keep        |    0
 .../usr/include/c++/v1/.keep                  |    0
 .../usr/include/c++/v2/.keep                  |    0
 .../usr/lib/.keep                             |    0
 .../lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o |    0
 .../lib/gcc/sparc-myriad-elf/4.8.2/crtend.o   |    0
 .../lib/gcc/sparc-myriad-elf/4.8.2/crti.o     |    0
 .../lib/gcc/sparc-myriad-elf/4.8.2/crtn.o     |    0
 .../sparc-myriad-elf/include/c++/4.8.2/.keep  |    0
 .../sparc-myriad-elf/lib/crt0.o               |    0
 .../Inputs/hexagon_tree/gnu/bin/hexagon-as    |    1 -
 .../Inputs/hexagon_tree/gnu/bin/hexagon-gcc   |    1 -
 .../Inputs/hexagon_tree/gnu/bin/hexagon-ld    |    1 -
 .../gnu/hexagon/include/c++/4.4.0/ios         |    1 -
 .../hexagon_tree/gnu/hexagon/include/stdio.h  |    1 -
 .../gcc/hexagon/4.4.0/include-fixed/limits.h  |    1 -
 .../lib/gcc/hexagon/4.4.0/include/stddef.h    |    1 -
 .../Inputs/hexagon_tree/qc/bin/placeholder    |    1 -
 .../lib/linux/libclang_rt.builtins-mips.a     |    0
 .../lib/linux/libclang_rt.builtins-mips.so    |    0
 .../lib/linux/libclang_rt.builtins-mipsel.a   |    0
 .../lib/linux/libclang_rt.builtins-mipsel.so  |    0
 .../sysroot/mips-r2-hard-musl/usr/lib/crt1.o  |    0
 .../sysroot/mips-r2-hard-musl/usr/lib/crti.o  |    0
 .../sysroot/mips-r2-hard-musl/usr/lib/crtn.o  |    0
 .../mipsel-r2-hard-musl/usr/lib/crt1.o        |    0
 .../mipsel-r2-hard-musl/usr/lib/crti.o        |    0
 .../mipsel-r2-hard-musl/usr/lib/crtn.o        |    0
 .../usr/include/arm-linux-gnueabi/.keep       |    0
 .../usr/include/armeb-linux-gnueabi/.keep     |    0
 .../usr/include/armeb-linux-gnueabihf/.keep   |    0
 .../usr/include/arm-linux-gnueabihf/.keep     |    0
 .../Inputs/resource_dir/asan_blacklist.txt    |    0
 .../Inputs/scei-ps4_tree/target/include/.keep |    0
 .../scei-ps4_tree/target/include_common/.keep |    0
 .../gcc/sparc-sun-solaris2.11/4.8.2/crt1.o    |    0
 .../sparc-sun-solaris2.11/4.8.2/crtbegin.o    |    0
 .../gcc/sparc-sun-solaris2.11/4.8.2/crtend.o  |    0
 .../sparc-sun-solaris2.11/usr/lib/crti.o      |    0
 .../sparc-sun-solaris2.11/usr/lib/crtn.o      |    0
 .../sparc-sun-solaris2.11/usr/lib/ld.so.1     |    0
 test/Driver/Xlinker-args.c                    |    8 +-
 test/Driver/aarch64-cpus.c                    |   52 +-
 test/Driver/aarch64-fixed-x18.c               |    2 +-
 test/Driver/amdgpu-toolchain.c                |    3 +
 test/Driver/apple-kext-mkernel.c              |   25 +-
 test/Driver/appletvos-version-min.c           |    7 +
 test/Driver/arch-armv7k.c                     |   13 +
 test/Driver/arm-alignment.c                   |   41 +-
 test/Driver/arm-compiler-rt.c                 |   21 +
 test/Driver/arm-cortex-cpus.c                 |   53 +-
 test/Driver/arm-features.c                    |   13 +
 test/Driver/arm-fixed-r9.c                    |    2 +-
 test/Driver/arm-float-abi.c                   |    6 +
 test/Driver/arm-ias-Wa.s                      |   17 +
 test/Driver/arm-multilibs.c                   |   17 +
 test/Driver/arm-no-movt.c                     |    9 +
 test/Driver/as-default-dwarf.s                |   15 +
 test/Driver/as-options.s                      |   37 +
 test/Driver/biarch.c                          |   58 +-
 test/Driver/cl-eh.cpp                         |   17 +-
 test/Driver/cl-fallback.c                     |   31 +-
 test/Driver/cl-inputs.c                       |    8 +-
 test/Driver/cl-link-at-file.c                 |    3 -
 test/Driver/cl-link.c                         |    3 -
 test/Driver/cl-options.c                      |   82 +-
 test/Driver/cl-outputs.c                      |    3 -
 test/Driver/cl-response-file.c                |   10 +
 test/Driver/cl-runtime-flags.c                |   12 +-
 test/Driver/cl-x86-flags.c                    |    2 -
 test/Driver/cl-zc.cpp                         |    3 -
 test/Driver/cl.c                              |    3 -
 test/Driver/clang-g-opts.c                    |   18 +-
 test/Driver/clang-translation.c               |    2 +-
 test/Driver/clang_f_opts.c                    |   30 +-
 test/Driver/coverage_no_integrated_as.c       |    6 +-
 test/Driver/crash-report-modules.m            |   10 +-
 test/Driver/crash-report.c                    |    9 +-
 test/Driver/cuda-detect.cu                    |   64 +
 test/Driver/cuda-options.cu                   |  130 +-
 test/Driver/cuda-simple.cu                    |    2 +-
 test/Driver/darwin-debug-flags.c              |    3 -
 test/Driver/darwin-ld-lto.c                   |   25 +
 test/Driver/darwin-ld.c                       |   84 +
 test/Driver/darwin-sdkroot.c                  |   10 +-
 test/Driver/darwin-version.c                  |   36 +-
 test/Driver/debug-options-as.c                |   20 +-
 test/Driver/debug-options.c                   |  105 +-
 test/Driver/debug-prefix-map.c                |    9 +
 test/Driver/dragonfly.c                       |    2 +-
 test/Driver/dyld-prefix.c                     |    2 -
 test/Driver/eabi.c                            |   13 +
 test/Driver/elfiamcu-header-search.c          |    6 +
 test/Driver/env.c                             |    2 +-
 test/Driver/fplugin.c                         |    7 +
 test/Driver/freebsd-mips-as.c                 |    6 +
 test/Driver/freebsd.c                         |    7 +-
 test/Driver/fsanitize-blacklist.c             |   28 +-
 test/Driver/fsanitize-coverage.c              |    2 +-
 test/Driver/fsanitize.c                       |   84 +-
 test/Driver/gold-lto.c                        |    9 +-
 test/Driver/hexagon-toolchain-elf.c           |  817 +-
 test/Driver/hexagon-toolchain.c               |  588 -
 test/Driver/implicit-function-as-error.c      |    2 +
 test/Driver/incremental-linker-compatible.c   |   17 +
 test/Driver/instrprof-ld.c                    |   16 +
 test/Driver/integrated-as.s                   |   22 +-
 test/Driver/ios-simulator-arcruntime.c        |    8 +
 test/Driver/linux-as.c                        |    8 +-
 test/Driver/linux-header-search.cpp           |   36 +
 test/Driver/linux-ld.c                        |   10 +-
 test/Driver/lto.c                             |   58 +-
 test/Driver/mingw-libgcc.c                    |   25 +
 test/Driver/mingw-useld.c                     |   19 +
 test/Driver/mips-abi.c                        |    6 +
 test/Driver/mips-as.c                         |    5 +
 test/Driver/mips-ias-Wa.s                     |   49 +
 test/Driver/mips-mti-linux.c                  |   43 +
 test/Driver/modules.m                         |   10 +
 test/Driver/ms-bitfields.c                    |    8 +
 test/Driver/myriad-toolchain.c                |   79 +
 test/Driver/nacl-direct.c                     |    6 +-
 test/Driver/netbsd.c                          |    2 +-
 test/Driver/no-canonical-prefixes.c           |   23 +-
 test/Driver/no-integrated-as.s                |    8 +
 test/Driver/nodefaultlib.c                    |   18 +-
 test/Driver/nostdlib.c                        |   23 +
 test/Driver/objc-weak.m                       |   27 +
 test/Driver/openbsd.c                         |    4 +-
 test/Driver/pic.c                             |   12 +-
 test/Driver/ppc-features.cpp                  |   44 +
 test/Driver/ps4-header-search.c               |   10 +
 test/Driver/ps4-linker-non-win.c              |   21 +
 test/Driver/ps4-linker-win.c                  |   27 +
 test/Driver/ps4-pic.c                         |  106 +
 test/Driver/ps4-runtime-flags.c               |   19 +
 test/Driver/ps4-sdk-root.c                    |   48 +
 test/Driver/rewrite-legacy-objc.m             |    6 +-
 test/Driver/rewrite-objc.m                    |    2 +-
 test/Driver/rtti-options.cpp                  |    5 +
 test/Driver/sanitizer-ld.c                    |   90 +
 test/Driver/save-temps.c                      |    8 +-
 test/Driver/shave-toolchain.c                 |   22 -
 test/Driver/solaris-header-search.cpp         |   11 +
 test/Driver/solaris-ld.c                      |   33 +
 test/Driver/solaris-opts.c                    |    4 +
 test/Driver/sparc-as.c                        |   85 +
 test/Driver/sparcv9-as.c                      |   86 +
 test/Driver/split-debug.c                     |   26 +
 test/Driver/stack-protector.c                 |    9 +
 test/Driver/stackrealign.c                    |   17 +-
 test/Driver/thinlto.c                         |   37 +
 test/Driver/thinlto_backend.c                 |   10 +
 test/Driver/thread-model.c                    |   16 +
 test/Driver/wasm-toolchain.c                  |    3 +
 test/Driver/wasm32-unknown-unknown.cpp        |  119 +
 test/Driver/wasm64-unknown-unknown.cpp        |  119 +
 test/Driver/watchos-version-min.c             |    7 +
 test/Driver/windows-cross.c                   |   29 +
 test/Driver/woa-fp.c                          |   40 +
 test/Driver/working-directory.c               |    1 -
 test/FixIt/atomic-property.m                  |    6 +-
 test/FixIt/fixit-vexing-parse.cpp             |    4 +-
 test/FixIt/fixit.cpp                          |    2 +-
 test/FixIt/format.m                           |    4 +-
 test/Format/adjust-indent.cpp                 |   10 +
 test/Format/basic.cpp                         |    5 +-
 test/Format/cursor.cpp                        |    7 +-
 test/Format/disable-format.cpp                |    5 +-
 test/Format/disable-include-sorting.cpp       |   10 +
 test/Format/incomplete.cpp                    |    5 +-
 test/Format/language-detection.cpp            |   10 +-
 test/Format/line-ranges.cpp                   |   14 +-
 test/Format/ranges.cpp                        |   16 +-
 test/Format/style-on-command-line.cpp         |   19 +-
 test/Format/xmloutput.cpp                     |   12 +
 .../profile-sample-use-loc-tracking.prof      |    2 +-
 test/Frontend/darwin-version.c                |   22 +
 test/Frontend/dependency-gen.c                |    5 +-
 test/Frontend/force-include-not-found.c       |    3 +
 test/Frontend/optimization-remark-analysis.c  |   21 +
 .../optimization-remark-line-directive.c      |    2 +-
 test/Frontend/optimization-remark-options.c   |   21 +
 test/Frontend/optimization-remark.c           |    2 +-
 test/Frontend/print-header-includes.c         |    8 +
 test/Frontend/source-col-map.c                |    1 +
 test/Headers/ms-intrin.cpp                    |   38 +-
 test/Headers/pmmintrin.c                      |   12 +
 test/Headers/x86intrin-2.c                    |  137 +
 test/Headers/x86intrin.c                      |  121 +-
 test/Index/TestClassForwardDecl.m             |    2 +-
 test/Index/annotate-comments-objc.m           |    1 +
 test/Index/annotate-comments.cpp              |    1 +
 test/Index/annotate-module.m                  |    2 +
 test/Index/attributes-cuda.cu                 |   16 +
 test/Index/availability.cpp                   |   13 +
 test/Index/c-index-api-loadTU-test.m          |    2 +-
 test/Index/c-index-getCursor-test.m           |    2 +-
 test/Index/c-index-pch.c                      |    4 +-
 test/Index/c-index-redecls.c                  |    2 +-
 test/Index/cindex-from-source.m               |    2 +-
 test/Index/comment-custom-block-command.cpp   |    1 +
 .../comment-objc-parameterized-classes.m      |    5 +
 test/Index/comment-to-html-xml-conversion.cpp |    3 +-
 test/Index/complete-kvc.m                     |    2 +-
 test/Index/complete-method-decls.m            |   18 +-
 test/Index/complete-modules.m                 |    1 +
 test/Index/complete-parameterized-classes.m   |    2 +-
 test/Index/complete-pch.m                     |    2 +-
 test/Index/complete-preamble.cpp              |   13 +-
 test/Index/complete-property-flags.m          |    6 +-
 test/Index/crash-recovery-modules.m           |    8 +-
 test/Index/fix-its.m                          |    4 -
 test/Index/get-cursor.m                       |   23 +
 test/Index/index-attrs.c                      |   16 +
 test/Index/index-attrs.cpp                    |   50 +
 test/Index/index-file.cpp                     |   16 +
 test/Index/index-module.m                     |    2 +
 test/Index/index-pch-with-module.m            |    2 +-
 test/Index/index-templates.cpp                |    7 +
 test/Index/namespaced-base-ctor-init.cpp      |   10 +
 test/Index/parse-all-comments.c               |   55 +-
 test/Index/pch-depending-on-deleted-module.c  |    2 +-
 test/Index/print-cxx-manglings.cpp            |   66 +
 test/Index/print-mangled-name.cpp             |   11 +-
 test/Index/print-type.cpp                     |   27 +-
 test/Index/print-type.m                       |    2 +
 .../skip-parsed-bodies/compile_commands.json  |    5 +-
 test/Index/symbol-visibility.c                |    7 +
 test/Index/visibility.c                       |   13 +
 test/Index/warning-flags.c                    |    6 +-
 test/Layout/itanium-union-bitfield.cpp        |   16 +-
 test/Layout/ms-vtordisp-local.cpp             |  217 +
 .../Layout/ms-x86-alias-avoidance-padding.cpp |   24 -
 test/Layout/ms-x86-bitfields-vbases.cpp       |   24 +-
 test/Layout/ms-x86-empty-layout.c             |   56 +-
 test/Layout/ms-x86-empty-nonvirtual-bases.cpp |   38 +-
 test/Layout/ms-x86-empty-virtual-base.cpp     |    4 -
 test/Layout/ms-x86-pack-and-align.cpp         |   80 +-
 test/Layout/ms_struct-bitfields.c             |   48 +
 test/Lexer/coroutines.cpp                     |   12 +
 test/Lexer/cxx-features.cpp                   |    5 +
 test/Lexer/cxx0x_keyword_as_cxx98.cpp         |    2 +-
 test/Lexer/has_feature_cxx0x.cpp              |    2 +-
 test/Lexer/has_feature_rtti.cpp               |    1 +
 test/Lexer/hexfloat.cpp                       |    2 +-
 test/Lexer/keywords_test.c                    |   15 +
 test/Lexer/keywords_test.cpp                  |   22 +
 test/Lexer/ms-extensions.c                    |    4 -
 test/Lexer/ms-extensions.cpp                  |    2 +-
 test/Lexer/msdos-cpm-eof.c                    |    4 +-
 test/Lexer/objc_macros.m                      |   22 +
 test/Lexer/unicode.c                          |   11 +-
 test/Lexer/warn_binary_literals.cpp           |    5 +
 test/Misc/ast-dump-attr.cpp                   |    4 +
 test/Misc/ast-dump-color.cpp                  |    2 +-
 test/Misc/ast-dump-invalid.cpp                |   23 +
 test/Misc/ast-print-pragmas.cpp               |    9 +
 test/Misc/backend-optimization-failure.cpp    |    2 +-
 test/Misc/caret-diags-macros.c                |  141 +-
 test/Misc/diag-aka-types.cpp                  |   17 +
 test/Misc/diag-macro-backtrace2.c             |   50 +
 test/Misc/diag-presumed.c                     |   10 +-
 test/Misc/permissions.cpp                     |    4 -
 test/Misc/reduced-diags-macros-backtrace.cpp  |   47 +
 test/Misc/reduced-diags-macros.cpp            |   44 +
 .../serialized-diags-really-long-text.cpp     |   30 +
 test/Misc/serialized-diags.c                  |    1 -
 test/Misc/thinlto.c                           |    9 +
 test/Misc/warning-flags-enabled.c             |    2 +-
 test/Misc/warning-flags.c                     |   15 +-
 test/Modules/DebugInfoSubmoduleImport.c       |   15 +
 test/Modules/DebugInfoSubmodules.c            |   18 +
 test/Modules/DebugInfoTransitiveImport.m      |   22 +
 test/Modules/ExtDebugInfo.cpp                 |   72 +
 test/Modules/ExtDebugInfo.m                   |   36 +
 .../AutolinkTBD.framework/AutolinkTBD.tbd     |    1 +
 .../Headers/AutolinkTBD.h                     |    1 +
 test/Modules/Inputs/DebugCXX.h                |   52 +
 test/Modules/Inputs/DebugModule.h             |    1 -
 test/Modules/Inputs/DebugObjC.h               |   24 +
 test/Modules/Inputs/DebugSubmoduleA.h         |    3 +
 test/Modules/Inputs/DebugSubmoduleB.h         |    3 +
 test/Modules/Inputs/ExtensionTestA.h          |    1 +
 .../Inputs/System/usr/include/assert.h        |    2 +
 .../Inputs/System/usr/include/module.map      |   22 +
 .../System/usr/include/tcl-private/header.h   |    2 +
 .../missing_header/not_missing.h              |    1 +
 .../missing_requirement.h                     |    1 +
 .../auto-import-unavailable/module.modulemap  |   19 +
 .../nonrequired_missing_header/not_missing.h  |    1 +
 .../requires_feature_you_dont_have.h          |    1 +
 .../available-is-better/available-is-better.h |    2 +
 .../available-is-better/module.modulemap      |   17 +
 test/Modules/Inputs/builtin_sub.h             |    1 +
 test/Modules/Inputs/declare-use/module.map    |    2 -
 test/Modules/Inputs/elaborated-type-structs.h |    3 +
 .../Modules/Inputs/explicit-build-overlap/a.h |    1 +
 .../Modules/Inputs/explicit-build-overlap/b.h |    1 +
 .../Inputs/explicit-build-overlap/def.map     |    2 +
 .../Inputs/explicit-build-overlap/use.map     |    3 +
 test/Modules/Inputs/internal-constants/a.h    |    3 +
 test/Modules/Inputs/internal-constants/b.h    |    3 +
 test/Modules/Inputs/internal-constants/c.h    |    3 +
 .../Modules/Inputs/internal-constants/const.h |    3 +
 .../internal-constants/module.modulemap       |    6 +
 .../Inputs/libstdcxx-ambiguous-internal/a.h   |    5 +
 .../Inputs/libstdcxx-ambiguous-internal/b.h   |    4 +
 .../Inputs/libstdcxx-ambiguous-internal/c.h   |    4 +
 .../Inputs/libstdcxx-ambiguous-internal/d.h   |    4 +
 .../module.modulemap                          |    6 +
 .../Inputs/macro-reexport/module.modulemap    |    1 -
 test/Modules/Inputs/misplaced/misplaced-a.h   |    5 +
 test/Modules/Inputs/misplaced/misplaced-b.h   |    1 +
 .../Inputs/misplaced/misplaced.modulemap      |    8 +
 test/Modules/Inputs/module-map-path-hash/a.h  |    2 +
 .../module-map-path-hash/module.modulemap     |    3 +
 test/Modules/Inputs/module.map                |   50 +-
 test/Modules/Inputs/no-linkage/decls.h        |   11 +
 test/Modules/Inputs/no-linkage/empty.h        |    0
 .../Inputs/no-linkage/module.modulemap        |    1 +
 test/Modules/Inputs/private3/private.h        |    7 +
 test/Modules/Inputs/private3/public.h         |   11 +
 test/Modules/Inputs/stress1/merge00.h         |    4 +
 .../Inputs/stress1/merge_no_reexport.h        |    9 +
 test/Modules/Inputs/stress1/module.modulemap  |    1 +
 .../Inputs/submodules-merge-defs/defs.h       |   41 +
 test/Modules/Inputs/template-default-args/a.h |    9 +
 test/Modules/Inputs/template-default-args/c.h |    2 +
 test/Modules/Inputs/template-default-args/d.h |    6 +
 .../template-default-args/module.modulemap    |    3 +
 test/Modules/Inputs/templates-right.h         |    4 +
 test/Modules/Inputs/templates-top.h           |    1 +
 test/Modules/Inputs/thread-safety/a.h         |    4 +
 test/Modules/Inputs/thread-safety/b.h         |    8 +
 test/Modules/Inputs/thread-safety/c.h         |   10 +
 test/Modules/Inputs/thread-safety/module.map  |    3 +
 test/Modules/Inputs/typedef-tag-hidden.h      |    1 +
 test/Modules/Inputs/typedef-tag.h             |    1 +
 test/Modules/Inputs/use-builtin.h             |    2 +
 test/Modules/Inputs/using-decl-a.h            |    1 +
 test/Modules/Inputs/using-decl-b.h            |   27 +
 test/Modules/Inputs/using-decl-redecl/a.h     |    2 +
 test/Modules/Inputs/using-decl-redecl/b.h     |    3 +
 test/Modules/Inputs/using-decl-redecl/c.h     |    2 +
 .../Inputs/using-decl-redecl/module.modulemap |    3 +
 test/Modules/Inputs/va_list/left.h            |    7 +
 test/Modules/Inputs/va_list/module.modulemap  |    3 +
 test/Modules/Inputs/va_list/right.h           |    7 +
 test/Modules/Inputs/va_list/top.h             |    1 +
 .../Test.framework/Headers/Test.h             |    1 +
 .../Test.framework/Modules/module.modulemap   |    6 +
 test/Modules/ModuleDebugInfo.cpp              |   44 +
 test/Modules/ModuleDebugInfo.m                |   51 +
 test/Modules/auto-import-unavailable.cpp      |   47 +
 test/Modules/auto-module-import.m             |   19 +-
 test/Modules/autolinkTBD.m                    |   17 +
 test/Modules/available-is-better.cpp          |    5 +
 test/Modules/builtins.m                       |    8 +
 test/Modules/compiler_builtins_aarch64.m      |    6 +
 test/Modules/cxx-irgen.cpp                    |    2 +-
 test/Modules/cxx-templates.cpp                |   20 +-
 .../Modules/darwin_specific_modulemap_hacks.m |   22 +
 test/Modules/debug-info-moduleimport.m        |   26 +-
 test/Modules/decldef.m                        |    8 +-
 test/Modules/dependency-gen-pch.m             |    2 +-
 test/Modules/dependency-gen.m                 |    4 +-
 test/Modules/dependency-gen.modulemap         |   43 +-
 ...orated-type-specifier-from-hidden-module.m |   18 +
 test/Modules/embed-files.cpp                  |   15 +
 test/Modules/empty.modulemap                  |    6 +
 test/Modules/explicit-build-extra-files.cpp   |   14 +
 test/Modules/explicit-build-missing-files.cpp |   56 +
 test/Modules/explicit-build-overlap.cpp       |   14 +
 test/Modules/explicit-build.cpp               |    7 +-
 test/Modules/extensions.c                     |   44 +
 test/Modules/extern_c.cpp                     |   18 +-
 test/Modules/fatal-module-loader-error.m      |    4 +-
 test/Modules/hidden-definition.cpp            |   16 +
 test/Modules/internal-constants.cpp           |   12 +
 test/Modules/libstdcxx-ambiguous-internal.cpp |   13 +
 test/Modules/linkage-merge.cpp                |    4 -
 test/Modules/macros.c                         |   25 +-
 test/Modules/malformed.cpp                    |   10 +-
 test/Modules/merge-enumerators.cpp            |   19 +-
 test/Modules/merge-target-features.cpp        |    9 +-
 test/Modules/merge-using-decls.cpp            |    2 +
 test/Modules/misplaced-1.cpp                  |    6 +
 test/Modules/misplaced-2.cpp                  |    6 +
 test/Modules/misplaced-3.cpp                  |    6 +
 test/Modules/misplaced-4.cpp                  |    2 +
 test/Modules/misplaced-5.c                    |    6 +
 test/Modules/module-map-path-hash.cpp         |   10 +
 test/Modules/module-private.cpp               |    6 +-
 test/Modules/modules.idx                      |  Bin 0 -> 404 bytes
 test/Modules/no-implicit-builds.cpp           |    5 +
 test/Modules/no-linkage.cpp                   |   56 +
 test/Modules/private.modulemap                |   35 +
 test/Modules/relative-dep-gen.cpp             |   18 +-
 test/Modules/stress1.cpp                      |   22 +
 test/Modules/submodule-visibility-cycles.cpp  |    2 +-
 test/Modules/submodule-visibility.cpp         |    7 +
 test/Modules/submodules-merge-defs.cpp        |   64 +-
 test/Modules/system_headers.m                 |    2 +-
 test/Modules/target-features.m                |   61 +
 test/Modules/template-default-args.cpp        |   18 +-
 test/Modules/templates.mm                     |    2 +
 test/Modules/thread-safety.cpp                |   18 +
 test/Modules/typedef-tag-not-visible.m        |    8 +
 test/Modules/using-decl-redecl.cpp            |   11 +
 test/Modules/using-decl.cpp                   |   80 +-
 test/Modules/va_list.cpp                      |    8 +
 test/Modules/working-dir-flag.m               |    9 +
 test/OpenMP/atomic_capture_codegen.cpp        |    5 +-
 test/OpenMP/atomic_codegen.cpp                |   14 +-
 test/OpenMP/atomic_messages.cpp               |    9 +-
 test/OpenMP/atomic_read_codegen.c             |    5 +-
 test/OpenMP/atomic_update_codegen.cpp         |    5 +-
 test/OpenMP/atomic_write_codegen.c            |    5 +-
 test/OpenMP/cancel_ast_print.cpp              |    8 +-
 test/OpenMP/cancel_codegen.cpp                |   76 +-
 test/OpenMP/cancel_if_messages.cpp            |   66 +
 test/OpenMP/cancellation_point_codegen.cpp    |   73 +
 test/OpenMP/critical_ast_print.cpp            |   28 +-
 test/OpenMP/critical_codegen.cpp              |   33 +-
 test/OpenMP/critical_messages.cpp             |   92 +-
 test/OpenMP/distribute_ast_print.cpp          |   74 +
 test/OpenMP/distribute_collapse_messages.cpp  |   83 +
 .../distribute_firstprivate_messages.cpp      |  157 +
 test/OpenMP/distribute_private_messages.cpp   |  132 +
 test/OpenMP/driver.c                          |   10 +
 test/OpenMP/flush_codegen.cpp                 |    2 +-
 test/OpenMP/for_ast_print.cpp                 |   42 +-
 test/OpenMP/for_codegen.cpp                   |   23 +-
 test/OpenMP/for_collapse_messages.cpp         |    8 +-
 test/OpenMP/for_firstprivate_codegen.cpp      |   92 +-
 test/OpenMP/for_firstprivate_messages.cpp     |   20 +-
 test/OpenMP/for_lastprivate_codegen.cpp       |  148 +-
 test/OpenMP/for_lastprivate_messages.cpp      |   16 +-
 test/OpenMP/for_linear_codegen.cpp            |  266 +
 test/OpenMP/for_linear_messages.cpp           |  218 +
 test/OpenMP/for_loop_messages.cpp             |   23 +-
 test/OpenMP/for_misc_messages.c               |   11 +-
 test/OpenMP/for_ordered_clause.cpp            |  120 +
 test/OpenMP/for_private_codegen.cpp           |   79 +-
 test/OpenMP/for_private_messages.cpp          |   12 +-
 test/OpenMP/for_reduction_codegen.cpp         |  332 +-
 test/OpenMP/for_reduction_messages.cpp        |   59 +-
 test/OpenMP/for_schedule_messages.cpp         |   54 +-
 test/OpenMP/for_simd_aligned_messages.cpp     |    4 +-
 test/OpenMP/for_simd_ast_print.cpp            |   16 +-
 test/OpenMP/for_simd_codegen.cpp              |   13 +-
 test/OpenMP/for_simd_collapse_messages.cpp    |    8 +-
 .../OpenMP/for_simd_firstprivate_messages.cpp |   16 +-
 test/OpenMP/for_simd_lastprivate_messages.cpp |   12 +-
 test/OpenMP/for_simd_linear_messages.cpp      |    8 +-
 test/OpenMP/for_simd_loop_messages.cpp        |   18 +-
 test/OpenMP/for_simd_misc_messages.c          |  111 +-
 test/OpenMP/for_simd_private_messages.cpp     |   12 +-
 test/OpenMP/for_simd_reduction_messages.cpp   |   46 +-
 test/OpenMP/for_simd_safelen_messages.cpp     |    8 +-
 test/OpenMP/for_simd_schedule_messages.cpp    |   26 +-
 test/OpenMP/for_simd_simdlen_messages.cpp     |   79 +
 test/OpenMP/function-attr.cpp                 |   54 +
 test/OpenMP/linking.c                         |   18 +
 test/OpenMP/master_codegen.cpp                |    2 +-
 test/OpenMP/nesting_of_regions.cpp            | 1105 ++
 test/OpenMP/openmp_check.cpp                  |   15 +
 test/OpenMP/ordered_ast_print.cpp             |  156 +-
 test/OpenMP/ordered_codegen.cpp               |   21 +-
 test/OpenMP/ordered_messages.cpp              |  213 +-
 test/OpenMP/parallel_ast_print.cpp            |   47 +-
 test/OpenMP/parallel_codegen.cpp              |   90 +-
 test/OpenMP/parallel_copyin_codegen.cpp       |  215 +-
 test/OpenMP/parallel_firstprivate_codegen.cpp |  191 +-
 .../OpenMP/parallel_firstprivate_messages.cpp |    6 +-
 test/OpenMP/parallel_for_ast_print.cpp        |   48 +-
 test/OpenMP/parallel_for_codegen.cpp          |   63 +-
 .../OpenMP/parallel_for_collapse_messages.cpp |    8 +-
 .../parallel_for_firstprivate_messages.cpp    |   15 +-
 test/OpenMP/parallel_for_if_messages.cpp      |   28 +
 .../parallel_for_lastprivate_messages.cpp     |   20 +-
 test/OpenMP/parallel_for_linear_codegen.cpp   |  249 +
 test/OpenMP/parallel_for_linear_messages.cpp  |  269 +
 test/OpenMP/parallel_for_loop_messages.cpp    |   19 +-
 test/OpenMP/parallel_for_misc_messages.c      |   14 +-
 .../parallel_for_num_threads_messages.cpp     |    6 +-
 test/OpenMP/parallel_for_ordered_messages.cpp |  104 +
 test/OpenMP/parallel_for_private_messages.cpp |   12 +-
 .../parallel_for_reduction_messages.cpp       |   52 +-
 .../OpenMP/parallel_for_schedule_messages.cpp |   26 +-
 .../parallel_for_simd_aligned_messages.cpp    |    4 +-
 test/OpenMP/parallel_for_simd_ast_print.cpp   |   28 +-
 test/OpenMP/parallel_for_simd_codegen.cpp     |   21 +-
 .../parallel_for_simd_collapse_messages.cpp   |    8 +-
 ...arallel_for_simd_firstprivate_messages.cpp |   12 +-
 test/OpenMP/parallel_for_simd_if_messages.cpp |   28 +
 ...parallel_for_simd_lastprivate_messages.cpp |   16 +-
 .../parallel_for_simd_linear_messages.cpp     |    8 +-
 .../parallel_for_simd_loop_messages.cpp       |   18 +-
 test/OpenMP/parallel_for_simd_misc_messages.c |  109 +-
 ...parallel_for_simd_num_threads_messages.cpp |    6 +-
 .../parallel_for_simd_private_messages.cpp    |   12 +-
 .../parallel_for_simd_reduction_messages.cpp  |   52 +-
 .../parallel_for_simd_safelen_messages.cpp    |    8 +-
 .../parallel_for_simd_schedule_messages.cpp   |   26 +-
 .../parallel_for_simd_simdlen_messages.cpp    |   79 +
 test/OpenMP/parallel_if_codegen.cpp           |   16 +-
 test/OpenMP/parallel_if_messages.cpp          |   14 +
 test/OpenMP/parallel_num_threads_messages.cpp |    6 +-
 test/OpenMP/parallel_private_codegen.cpp      |   72 +-
 test/OpenMP/parallel_private_messages.cpp     |   11 +-
 test/OpenMP/parallel_reduction_codegen.cpp    |  177 +-
 test/OpenMP/parallel_reduction_messages.cpp   |   51 +-
 test/OpenMP/parallel_sections_ast_print.cpp   |    8 +-
 test/OpenMP/parallel_sections_codegen.cpp     |    7 +-
 ...arallel_sections_firstprivate_messages.cpp |   13 +-
 test/OpenMP/parallel_sections_if_messages.cpp |   48 +
 ...parallel_sections_lastprivate_messages.cpp |   17 +-
 ...parallel_sections_num_threads_messages.cpp |    6 +-
 .../parallel_sections_private_messages.cpp    |   13 +-
 .../parallel_sections_reduction_messages.cpp  |   53 +-
 test/OpenMP/sections_codegen.cpp              |    3 +-
 test/OpenMP/sections_firstprivate_codegen.cpp |   88 +-
 .../OpenMP/sections_firstprivate_messages.cpp |   17 +-
 test/OpenMP/sections_lastprivate_codegen.cpp  |  135 +-
 test/OpenMP/sections_lastprivate_messages.cpp |   13 +-
 test/OpenMP/sections_private_codegen.cpp      |   60 +-
 test/OpenMP/sections_private_messages.cpp     |   13 +-
 test/OpenMP/sections_reduction_codegen.cpp    |   71 +-
 test/OpenMP/sections_reduction_messages.cpp   |   47 +-
 test/OpenMP/simd_aligned_messages.cpp         |    4 +-
 test/OpenMP/simd_ast_print.cpp                |   24 +-
 test/OpenMP/simd_codegen.cpp                  |  137 +-
 test/OpenMP/simd_collapse_messages.cpp        |    8 +-
 test/OpenMP/simd_lastprivate_messages.cpp     |   12 +-
 test/OpenMP/simd_linear_messages.cpp          |   56 +-
 test/OpenMP/simd_loop_messages.cpp            |   18 +-
 test/OpenMP/simd_metadata.c                   |   67 +
 test/OpenMP/simd_misc_messages.c              |  133 +-
 test/OpenMP/simd_private_messages.cpp         |    8 +-
 test/OpenMP/simd_reduction_messages.cpp       |   42 +-
 test/OpenMP/simd_safelen_messages.cpp         |    8 +-
 test/OpenMP/simd_simdlen_messages.cpp         |   79 +
 test/OpenMP/single_codegen.cpp                |    7 +-
 test/OpenMP/single_firstprivate_codegen.cpp   |   73 +-
 test/OpenMP/single_firstprivate_messages.cpp  |   15 +-
 test/OpenMP/single_private_codegen.cpp        |   51 +-
 test/OpenMP/single_private_messages.cpp       |   11 +-
 test/OpenMP/target_ast_print.cpp              |   72 +-
 test/OpenMP/target_codegen.cpp                |  652 ++
 test/OpenMP/target_codegen_global_capture.cpp |  287 +
 test/OpenMP/target_data_ast_print.cpp         |  173 +
 test/OpenMP/target_data_device_messages.cpp   |   28 +
 test/OpenMP/target_data_if_messages.cpp       |   32 +
 test/OpenMP/target_data_messages.c            |   27 +
 test/OpenMP/target_device_messages.cpp        |   28 +
 test/OpenMP/target_if_messages.cpp            |   12 +
 test/OpenMP/target_map_codegen.cpp            | 1015 ++
 test/OpenMP/target_map_messages.cpp           |  207 +
 test/OpenMP/task_ast_print.cpp                |   37 +-
 test/OpenMP/task_codegen.cpp                  |  156 +-
 test/OpenMP/task_depend_messages.cpp          |   23 +-
 test/OpenMP/task_firstprivate_codegen.cpp     |  127 +-
 test/OpenMP/task_firstprivate_messages.cpp    |    6 +-
 test/OpenMP/task_if_codegen.cpp               |   10 +-
 test/OpenMP/task_if_messages.cpp              |   12 +
 test/OpenMP/task_messages.cpp                 |   36 +-
 test/OpenMP/task_priority_messages.cpp        |   47 +
 test/OpenMP/task_private_codegen.cpp          |   89 +-
 test/OpenMP/task_private_messages.cpp         |   11 +-
 test/OpenMP/taskgroup_codegen.cpp             |    2 +-
 test/OpenMP/taskloop_ast_print.cpp            |   74 +
 test/OpenMP/taskloop_collapse_messages.cpp    |   83 +
 test/OpenMP/taskloop_final_messages.cpp       |   90 +
 .../OpenMP/taskloop_firstprivate_messages.cpp |  313 +
 test/OpenMP/taskloop_grainsize_messages.cpp   |   99 +
 test/OpenMP/taskloop_lastprivate_messages.cpp |  287 +
 test/OpenMP/taskloop_loop_messages.cpp        |  738 ++
 test/OpenMP/taskloop_misc_messages.c          |  373 +
 test/OpenMP/taskloop_num_tasks_messages.cpp   |   99 +
 test/OpenMP/taskloop_priority_messages.cpp    |   93 +
 test/OpenMP/taskloop_private_messages.cpp     |  195 +
 .../OpenMP/taskloop_simd_aligned_messages.cpp |  202 +
 test/OpenMP/taskloop_simd_ast_print.cpp       |   75 +
 .../taskloop_simd_collapse_messages.cpp       |   83 +
 test/OpenMP/taskloop_simd_final_messages.cpp  |   90 +
 .../taskloop_simd_firstprivate_messages.cpp   |  313 +
 .../taskloop_simd_grainsize_messages.cpp      |   99 +
 .../taskloop_simd_lastprivate_messages.cpp    |  287 +
 test/OpenMP/taskloop_simd_linear_messages.cpp |  249 +
 test/OpenMP/taskloop_simd_loop_messages.cpp   |  739 ++
 test/OpenMP/taskloop_simd_misc_messages.c     |  372 +
 .../taskloop_simd_num_tasks_messages.cpp      |   99 +
 .../taskloop_simd_priority_messages.cpp       |   93 +
 .../OpenMP/taskloop_simd_private_messages.cpp |  195 +
 .../OpenMP/taskloop_simd_safelen_messages.cpp |   79 +
 .../OpenMP/taskloop_simd_simdlen_messages.cpp |   79 +
 test/OpenMP/teams_ast_print.cpp               |   12 +-
 test/OpenMP/teams_firstprivate_messages.cpp   |    8 +-
 test/OpenMP/teams_num_teams_messages.cpp      |  111 +
 test/OpenMP/teams_private_messages.cpp        |   12 +-
 test/OpenMP/teams_reduction_messages.cpp      |   52 +-
 test/OpenMP/teams_thread_limit_messages.cpp   |  111 +
 test/OpenMP/threadprivate_codegen.cpp         |   18 +-
 test/OpenMP/threadprivate_messages.cpp        |    5 +-
 test/PCH/Inputs/va_arg.h                      |    3 +
 test/PCH/chain-categories.m                   |   13 +
 test/PCH/chain-openmp-threadprivate.cpp       |    2 -
 test/PCH/check-deserializations.cpp           |   15 +-
 test/PCH/cxx1y-default-initializer.cpp        |    1 +
 test/PCH/datetime.c                           |    3 +
 test/PCH/debug-info-limited-struct.c          |    2 +-
 test/PCH/make-integer-seq.cpp                 |   14 +
 test/PCH/ocl_types.h                          |   34 +
 test/PCH/pch-dir.c                            |   10 +-
 test/PCH/pending-ids.m                        |    2 +-
 test/PCH/preprocess.c                         |    8 -
 test/PCH/preprocess.h                         |    7 -
 test/PCH/va_arg.c                             |    6 +
 test/PCH/va_arg.cpp                           |    5 +
 test/PCH/va_arg.h                             |    7 +
 test/Parser/MicrosoftExtensions.c             |    2 +-
 test/Parser/MicrosoftExtensions.cpp           |    2 +-
 .../arm-windows-calling-convention-handling.c |    5 +-
 test/Parser/colon-colon-parentheses.cpp       |   10 +-
 test/Parser/cxx-casting.cpp                   |   59 +-
 test/Parser/cxx-concept-declaration.cpp       |    4 +-
 test/Parser/cxx-reference.cpp                 |    7 +-
 test/Parser/cxx-template-argument.cpp         |   39 +-
 test/Parser/cxx-typeof.cpp                    |    9 +-
 test/Parser/cxx0x-decl.cpp                    |    4 +
 test/Parser/cxx0x-lambda-expressions.cpp      |    2 +-
 test/Parser/cxx11-user-defined-literals.cpp   |   14 +-
 test/Parser/cxx1z-coroutines.cpp              |   35 +
 test/Parser/objc-init.m                       |   12 +-
 test/Parser/objcxx-lambda-expressions-neg.mm  |   10 +-
 test/Parser/objcxx0x-lambda-expressions.mm    |    2 +-
 test/Parser/opencl-atomics-cl20.cl            |   19 +-
 test/Parser/opencl-storage-class.cl           |    2 +-
 test/Parser/pragma-loop-safety.cpp            |    4 +-
 test/Parser/pragma-loop.cpp                   |    6 +-
 test/Parser/pragma-unroll.cpp                 |    6 +
 .../x64-windows-calling-convention-handling.c |    9 +
 .../Inputs/microsoft-header-search/a/findme.h |    2 +-
 .../microsoft-header-search/a/include2.h      |    3 -
 .../Inputs/microsoft-header-search/findme.h   |    2 +-
 .../Inputs/microsoft-header-search/include1.h |    3 -
 test/Preprocessor/_Pragma.c                   |    4 +
 test/Preprocessor/aarch64-target-features.c   |   34 +-
 test/Preprocessor/arm-acle-6.4.c              |  206 +-
 test/Preprocessor/arm-acle-6.5.c              |   83 +-
 test/Preprocessor/arm-target-features.c       |   93 +-
 test/Preprocessor/elfiamcu-predefines.c       |    5 +
 test/Preprocessor/init-v7k-compat.c           |  184 +
 test/Preprocessor/init.c                      |  703 +-
 test/Preprocessor/macro-multiline.c           |    1 -
 test/Preprocessor/macro-reserved-cxx11.cpp    |    1 +
 test/Preprocessor/macro-reserved.cpp          |    2 +-
 test/Preprocessor/macro_arg_slocentry_merge.c |    2 -
 test/Preprocessor/macro_paste_msextensions.c  |    7 +-
 test/Preprocessor/microsoft-header-search.c   |    4 +-
 test/Preprocessor/pragma_microsoft.c          |    7 +-
 test/Preprocessor/predefined-arch-macros.c    |   59 +
 test/Preprocessor/predefined-macros.c         |   16 +-
 test/Preprocessor/traditional-cpp.c           |    4 +-
 test/Preprocessor/wasm-target-features.c      |   35 +
 test/Preprocessor/x86_target_features.c       |   23 +
 test/Profile/Inputs/c-general.profdata.v3     |  Bin 0 -> 2192 bytes
 .../Inputs/max-function-count.proftext        |   26 +
 test/Profile/c-captured.c                     |    6 +-
 test/Profile/c-general.c                      |   26 +-
 test/Profile/c-linkage-available_externally.c |    6 +-
 test/Profile/c-linkage.c                      |    8 +-
 test/Profile/c-unreachable-after-switch.c     |    2 +-
 test/Profile/cxx-class.cpp                    |    8 +-
 test/Profile/cxx-implicit.cpp                 |    2 +-
 test/Profile/cxx-lambda.cpp                   |    6 +-
 test/Profile/cxx-linkage.cpp                  |    8 +-
 test/Profile/cxx-rangefor.cpp                 |    2 +-
 test/Profile/cxx-structors.cpp                |   32 +
 test/Profile/cxx-templates.cpp                |    4 +-
 test/Profile/cxx-throws.cpp                   |    6 +-
 test/Profile/cxx-virtual-destructor-calls.cpp |   27 +-
 test/Profile/gcc-flag-compatibility.c         |    1 -
 test/Profile/max-function-count.c             |   24 +
 test/Profile/objc-general.m                   |    6 +-
 test/Rewriter/line-generation-test.m          |    2 +-
 test/Sema/128bitint.c                         |   18 +-
 test/Sema/MicrosoftCompatibility.c            |    2 +-
 test/Sema/address_spaces.c                    |    5 +
 test/Sema/asm-label.c                         |   30 +
 test/Sema/asm.c                               |   43 +-
 test/Sema/atomic-compare.c                    |    2 +-
 test/Sema/atomic-ops.c                        |   45 +-
 test/Sema/attr-availability-app-extensions.c  |   10 +
 test/Sema/attr-availability-tvos.c            |   59 +
 test/Sema/attr-availability-watchos.c         |   54 +
 test/Sema/attr-capabilities.c                 |    8 +-
 test/Sema/attr-coldhot.c                      |    6 +-
 test/Sema/attr-disable-tail-calls.c           |   13 +
 test/Sema/attr-flag-enum.c                    |    2 +-
 test/Sema/attr-mode-vector-types.c            |   10 +
 test/Sema/attr-notail.c                       |   14 +
 test/Sema/attr-ownership.c                    |   10 +-
 test/Sema/auto-type.c                         |   26 +
 test/Sema/bitfield.c                          |   10 +-
 test/Sema/bool-compare.c                      |    2 +-
 test/Sema/builtin-longjmp.c                   |    1 -
 test/Sema/builtins-arm.c                      |   34 +
 test/Sema/builtins-overflow.c                 |   22 +
 test/Sema/callingconv-iamcu.c                 |   53 +
 test/Sema/const-eval.c                        |    8 +-
 test/Sema/dllexport.c                         |    5 +
 test/Sema/dllimport.c                         |    5 +
 test/Sema/enable_if.c                         |   33 +
 test/Sema/enum.c                              |    6 +-
 test/Sema/exprs.c                             |    1 +
 test/Sema/ext_vector_casts.c                  |   12 +-
 test/Sema/ext_vector_conversions.c            |   19 +
 test/Sema/fn-ptr-as-fn-prototype.c            |    2 +-
 test/Sema/function-redecl.c                   |    2 +-
 test/Sema/generic-selection.c                 |    8 +
 test/Sema/inline-asm-validate-amdgpu.cl       |   14 +
 test/Sema/inline-asm-validate-x86.c           |   26 +
 test/Sema/inline.c                            |    2 +-
 test/Sema/internal_linkage.c                  |   21 +
 test/Sema/mips-interrupt-attr.c               |   29 +
 test/Sema/ms_bitfield_layout.c                |  220 +-
 test/Sema/ms_class_layout.cpp                 |   11 +-
 test/Sema/non-null-warning.c                  |    3 +
 test/Sema/nonnull.c                           |   22 +-
 test/Sema/nullability.c                       |    7 +-
 test/Sema/overloadable.c                      |   14 +
 test/Sema/parentheses.c                       |   20 +
 test/Sema/pass-object-size.c                  |   52 +
 test/Sema/pointer-addition.c                  |   10 +-
 test/Sema/redefine_extname.c                  |    6 +
 test/Sema/short-enums.c                       |    2 +-
 test/Sema/struct-packed-align.c               |   23 +-
 test/Sema/switch-1.c                          |   42 +-
 test/Sema/thread-specifier.c                  |    4 +-
 test/Sema/unused-expr.c                       |    8 +
 test/Sema/varargs-win64.c                     |    6 +
 test/Sema/varargs-x86-32.c                    |    5 +
 test/Sema/varargs-x86-64.c                    |   72 +
 test/Sema/vector-cast.c                       |    8 +
 test/Sema/warn-absolute-value.c               |   20 +-
 test/Sema/warn-double-promotion.c             |   34 +
 test/Sema/warn-extern-main.c                  |   56 +
 test/Sema/warn-logical-not-compare.c          |  204 +
 test/Sema/warn-overlap.c                      |   34 +
 test/Sema/warn-sizeof-arrayarg.c              |    6 +-
 test/Sema/warn-thread-safety-analysis.c       |    3 +-
 test/Sema/zvector.c                           |  148 +-
 test/SemaCUDA/asm-constraints-mixed.cu        |   32 +-
 test/SemaCUDA/attributes.cu                   |   33 +
 test/SemaCUDA/builtins.cu                     |   31 +
 test/SemaCUDA/function-overload.cu            |  317 +
 test/SemaCUDA/function-target-hd.cu           |    4 +-
 test/SemaCUDA/implicit-intrinsic.cu           |    9 +-
 test/SemaCXX/MicrosoftCompatibility-cxx98.cpp |    9 +
 test/SemaCXX/MicrosoftCompatibility.cpp       |   15 +-
 test/SemaCXX/MicrosoftExtensions.cpp          |   28 +-
 test/SemaCXX/PR16677.cpp                      |   16 +
 ...d_initializer_list_diagnosis_assertion.cpp |   20 +
 test/SemaCXX/abstract.cpp                     |    4 +
 test/SemaCXX/addr-of-overloaded-function.cpp  |   17 +-
 test/SemaCXX/alias-template.cpp               |   11 +
 test/SemaCXX/ast-print.cpp                    |   12 +-
 test/SemaCXX/attr-deprecated.cpp              |   16 +-
 test/SemaCXX/attr-disable-tail-calls.cpp      |    8 +
 test/SemaCXX/attr-no-sanitize-address.cpp     |   14 +-
 test/SemaCXX/attr-no-sanitize-memory.cpp      |   14 +-
 test/SemaCXX/attr-no-sanitize-thread.cpp      |   14 +-
 test/SemaCXX/attr-notail.cpp                  |   16 +
 test/SemaCXX/attr-print.cpp                   |    3 +
 test/SemaCXX/auto-cxx0x.cpp                   |    3 +
 test/SemaCXX/auto-type-from-cxx.cpp           |   19 +
 test/SemaCXX/bitfield-layout.cpp              |    8 +-
 test/SemaCXX/calling-conv-compat.cpp          |   13 +
 test/SemaCXX/cdtor-fn-try-block.cpp           |   97 +
 test/SemaCXX/condition.cpp                    |    8 +-
 test/SemaCXX/const-cast.cpp                   |   13 +-
 test/SemaCXX/constant-expression-cxx11.cpp    |   23 +-
 test/SemaCXX/constant-expression-cxx1y.cpp    |    4 +-
 test/SemaCXX/constant-expression.cpp          |    2 +
 test/SemaCXX/constexpr-printing.cpp           |    4 +-
 test/SemaCXX/constructor-initializer.cpp      |   19 +-
 test/SemaCXX/conversion-function.cpp          |    2 +-
 test/SemaCXX/convert-to-bool.cpp              |   18 +-
 test/SemaCXX/converting-constructor.cpp       |    9 +-
 test/SemaCXX/copy-initialization.cpp          |    8 +-
 test/SemaCXX/coroutines.cpp                   |  268 +
 test/SemaCXX/crashes.cpp                      |    8 +-
 test/SemaCXX/cxx0x-initializer-aggregates.cpp |    4 +
 .../cxx0x-initializer-stdinitializerlist.cpp  |   31 +-
 test/SemaCXX/cxx0x-return-init-list.cpp       |    2 +-
 test/SemaCXX/cxx11-ast-print.cpp              |   14 +-
 test/SemaCXX/cxx11-user-defined-literals.cpp  |   10 +-
 test/SemaCXX/cxx1y-generic-lambdas.cpp        |    2 +-
 test/SemaCXX/cxx1y-init-captures.cpp          |    6 +
 .../cxx1y-variable-templates_in_class.cpp     |   11 +
 .../cxx1y-variable-templates_top_level.cpp    |    4 +-
 test/SemaCXX/cxx98-compat.cpp                 |    3 +
 test/SemaCXX/decl-expr-ambiguity.cpp          |    5 +
 test/SemaCXX/decl-microsoft-call-conv.cpp     |    7 +-
 test/SemaCXX/decltype-crash.cpp               |    2 +-
 test/SemaCXX/default-assignment-operator.cpp  |    6 +-
 test/SemaCXX/default1.cpp                     |    9 +-
 test/SemaCXX/deleted-function-access.cpp      |    6 +
 test/SemaCXX/deprecated.cpp                   |   21 +-
 test/SemaCXX/destructor.cpp                   |   12 +-
 test/SemaCXX/direct-initializer.cpp           |   12 +-
 test/SemaCXX/dllexport.cpp                    |   13 +
 test/SemaCXX/dllimport.cpp                    |   17 +
 test/SemaCXX/enable_if.cpp                    |  143 +-
 test/SemaCXX/enum.cpp                         |   17 +-
 test/SemaCXX/err_typecheck_assign_const.cpp   |    7 +
 test/SemaCXX/exception-spec.cpp               |    7 +
 test/SemaCXX/expressions.cpp                  |    5 +
 test/SemaCXX/gnu-flags.cpp                    |   26 +-
 test/SemaCXX/init-priority-attr.cpp           |    2 +-
 test/SemaCXX/internal_linkage.cpp             |   47 +
 test/SemaCXX/invalid-member-expr.cpp          |   12 +-
 .../libstdcxx_explicit_init_list_hack.cpp     |    5 +-
 test/SemaCXX/literal-operators.cpp            |   12 +-
 test/SemaCXX/make_integer_seq.cpp             |   49 +
 .../SemaCXX/many-template-parameter-lists.cpp |   36 +
 test/SemaCXX/member-expr.cpp                  |    8 +-
 test/SemaCXX/member-pointer.cpp               |    9 +-
 test/SemaCXX/microsoft-super.cpp              |   49 +
 test/SemaCXX/ms-inline-asm.cpp                |   54 +
 test/SemaCXX/ms-interface.cpp                 |    4 +-
 test/SemaCXX/ms-novtable.cpp                  |    2 +-
 test/SemaCXX/ms-property-error.cpp            |   37 +
 test/SemaCXX/ms-property.cpp                  |   60 +
 test/SemaCXX/ms-unsupported.cpp               |    5 +
 test/SemaCXX/ms_integer_suffix.cpp            |    3 -
 test/SemaCXX/ms_struct.cpp                    |    8 +-
 test/SemaCXX/ms_wide_bitfield.cpp             |    7 +-
 test/SemaCXX/namespace-alias.cpp              |   43 +
 test/SemaCXX/namespace.cpp                    |   12 +-
 test/SemaCXX/new-array-size-conv.cpp          |   13 +-
 test/SemaCXX/nullability.cpp                  |   32 +-
 test/SemaCXX/offsetof.cpp                     |    2 +-
 test/SemaCXX/overload-call-copycon.cpp        |   10 +-
 test/SemaCXX/overload-call.cpp                |   47 +-
 test/SemaCXX/overloaded-builtin-operators.cpp |   12 +-
 test/SemaCXX/pass-object-size.cpp             |  122 +
 test/SemaCXX/pragma-init_seg.cpp              |    7 +
 test/SemaCXX/pragma-vtordisp.cpp              |   38 +-
 test/SemaCXX/printf-block.cpp                 |   10 +-
 test/SemaCXX/redefine_extname.cpp             |    6 +
 test/SemaCXX/rval-references.cpp              |    2 +-
 test/SemaCXX/sourceranges.cpp                 |   17 +-
 test/SemaCXX/type-convert-construct.cpp       |    2 +
 test/SemaCXX/type-traits.cpp                  |   81 +-
 test/SemaCXX/typo-correction-blocks.c         |   12 +
 test/SemaCXX/typo-correction-delayed.cpp      |    8 +-
 test/SemaCXX/typo-correction.cpp              |   25 +-
 test/SemaCXX/undefined-internal.cpp           |   11 +-
 test/SemaCXX/underlying_type.cpp              |    2 +-
 test/SemaCXX/unknown-type-name.cpp            |    8 +-
 test/SemaCXX/using-decl-1.cpp                 |   17 +-
 test/SemaCXX/vector-casts.cpp                 |   26 +-
 test/SemaCXX/vector-no-lax.cpp                |    2 +-
 test/SemaCXX/vector.cpp                       |    8 +-
 test/SemaCXX/warn-logical-not-compare.cpp     |  100 +-
 .../warn-pure-virtual-call-from-ctor-dtor.cpp |   12 +-
 test/SemaCXX/warn-pure-virtual-kext.cpp       |    8 +
 test/SemaCXX/warn-sign-conversion.cpp         |    2 +-
 test/SemaCXX/warn-thread-safety-analysis.cpp  |   98 +
 .../warn-unused-local-typedef-serialize.cpp   |    1 -
 test/SemaCXX/writable-strings-deprecated.cpp  |   30 +-
 test/SemaObjC/access-property-getter.m        |   48 +-
 test/SemaObjC/arc-no-runtime.m                |    4 +-
 test/SemaObjC/arc-property-decl-attrs.m       |   26 +-
 test/SemaObjC/arc-property-lifetime.m         |    4 +-
 test/SemaObjC/arc-system-header.m             |   30 +-
 test/SemaObjC/arc-unavailable-for-weakref.m   |    6 +-
 .../arc-unavailable-system-function.m         |    4 +-
 test/SemaObjC/arc.m                           |   14 +-
 .../atomoic-property-synnthesis-rules.m       |   24 +-
 test/SemaObjC/attr-availability-1.m           |    4 +
 test/SemaObjC/attr-availability.m             |   88 +
 test/SemaObjC/attr-designated-init.m          |   12 +-
 test/SemaObjC/attr-objc-gc.m                  |    6 +-
 test/SemaObjC/blocks.m                        |    2 +-
 test/SemaObjC/bool-type.m                     |   12 +
 test/SemaObjC/class-bitfield.m                |    2 +-
 test/SemaObjC/class-extension-dup-methods.m   |   12 +
 test/SemaObjC/format-arg-attribute.m          |    6 +-
 test/SemaObjC/gc-attributes.m                 |    4 +
 test/SemaObjC/kindof.m                        |   16 +
 test/SemaObjC/mrc-no-weak.m                   |   54 +
 test/SemaObjC/mrc-weak.m                      |   67 +
 test/SemaObjC/no-gc-weak-test.m               |    9 +-
 test/SemaObjC/nonarc-weak.m                   |   16 -
 test/SemaObjC/nullable-weak-property.m        |   10 +-
 test/SemaObjC/objc-array-literal.m            |    6 +-
 .../SemaObjC/objc-boxed-expressions-nsvalue.m |    2 +-
 test/SemaObjC/objc-dictionary-literal.m       |    6 +-
 test/SemaObjC/objc-literal-nsnumber.m         |   14 +-
 test/SemaObjC/opaque-is-access-warn.m         |    2 +
 test/SemaObjC/parameterized_classes.m         |    2 +
 test/SemaObjC/parameterized_classes_arc.m     |  107 +
 test/SemaObjC/property-3.m                    |    9 +
 test/SemaObjC/property-atomic-redecl.m        |   57 +
 test/SemaObjC/property-in-class-extension-1.m |   10 +-
 test/SemaObjC/property-in-class-extension.m   |    7 +-
 test/SemaObjC/synthesized-ivar.m              |    2 +-
 test/SemaObjC/typo-correction.m               |   16 +-
 .../Inputs/nullability-consistency-2.h        |    5 +
 .../Inputs/nullability-consistency-6.h        |   10 +
 test/SemaObjCXX/arc-system-header.mm          |    4 +-
 test/SemaObjCXX/arc-type-conversion.mm        |    4 +-
 test/SemaObjCXX/crash.mm                      |    8 +-
 test/SemaObjCXX/cxx1y-lambda.mm               |   19 +
 .../SemaObjCXX/delay-parsing-func-tryblock.mm |    9 +-
 test/SemaObjCXX/exceptions.mm                 |    2 +-
 test/SemaObjCXX/message.mm                    |   19 +-
 .../objc-boxed-expressions-nsvalue.mm         |    2 +-
 test/SemaObjCXX/parameterized_classes_arc.mm  |  119 +
 test/SemaObjCXX/property-invalid-type.mm      |    4 +-
 test/SemaObjCXX/property-type-mismatch.mm     |   10 +
 test/SemaObjCXX/pseudo-destructor.mm          |   23 +
 test/SemaObjCXX/sel-address.mm                |   19 +
 test/SemaObjCXX/vararg-non-pod.mm             |    9 +-
 test/SemaOpenCL/cond.cl                       |    6 +-
 test/SemaOpenCL/null_literal.cl               |   29 +
 test/SemaOpenCL/storageclass-cl20.cl          |   15 +
 test/SemaOpenCL/storageclass.cl               |   27 +-
 test/SemaTemplate/alias-templates.cpp         |   20 +
 .../class-template-ctor-initializer.cpp       |    8 +-
 test/SemaTemplate/class-template-decl.cpp     |    7 +
 test/SemaTemplate/class-template-id.cpp       |    4 +-
 test/SemaTemplate/constructor-template.cpp    |    8 +-
 test/SemaTemplate/deduction.cpp               |    2 +-
 test/SemaTemplate/default-arguments.cpp       |   26 +-
 test/SemaTemplate/default-expr-arguments.cpp  |   11 +-
 test/SemaTemplate/derived.cpp                 |    4 +-
 test/SemaTemplate/fun-template-def.cpp        |    5 +
 .../instantiate-exception-spec-cxx11.cpp      |    8 +
 test/SemaTemplate/instantiate-expr-3.cpp      |   35 +-
 test/SemaTemplate/instantiate-expr-6.cpp      |   12 +
 test/SemaTemplate/instantiate-function-2.cpp  |    8 +-
 test/SemaTemplate/instantiate-local-class.cpp |   27 +
 test/SemaTemplate/instantiate-static-var.cpp  |   11 +-
 test/SemaTemplate/instantiate-using-decl.cpp  |   65 +-
 .../SemaTemplate/instantiate-var-template.cpp |    6 +
 .../ms-lookup-template-base-classes.cpp       |   12 +-
 .../nested-name-spec-template.cpp             |   13 +-
 test/SemaTemplate/overload-candidates.cpp     |   26 +-
 .../SemaTemplate/partial-spec-instantiate.cpp |    9 +-
 test/SemaTemplate/qualified-names-diag.cpp    |    7 +-
 test/SemaTemplate/recovery-crash.cpp          |   22 +
 test/SemaTemplate/temp_arg_nontype_cxx1z.cpp  |    7 +
 test/SemaTemplate/temp_arg_template.cpp       |   25 +-
 test/SemaTemplate/temp_class_spec_neg.cpp     |   12 +-
 test/SemaTemplate/typename-specifier-4.cpp    |    9 +-
 test/SemaTemplate/typename-specifier.cpp      |   38 +-
 test/Tooling/clang-check-analyzer.cpp         |    2 +-
 test/Tooling/clang-check-extra-arg.cpp        |    2 +-
 test/VFS/Inputs/public_header.h               |    1 +
 test/VFS/Inputs/public_header3.h              |    1 +
 test/VFS/Inputs/vfsoverlay.yaml               |    4 +-
 test/VFS/external-names.c                     |    4 +-
 test/VFS/real-path-found-first.m              |    1 +
 test/lit.cfg                                  |   37 +-
 tools/CMakeLists.txt                          |   24 +-
 tools/Makefile                                |    2 +-
 tools/c-index-test/CMakeLists.txt             |   20 +
 tools/c-index-test/c-index-test.c             |   59 +-
 .../ClangFormat/ClangFormatPackage.cs         |  145 +-
 .../ClangFormat/Properties/AssemblyInfo.cs    |    4 +-
 tools/clang-format-vs/README.txt              |    7 +-
 tools/clang-format/ClangFormat.cpp            |  168 +-
 tools/clang-format/clang-format-diff.py       |    4 +
 .../clang-format/fuzzer/ClangFormatFuzzer.cpp |    3 +-
 tools/clang-fuzzer/ClangFuzzer.cpp            |    5 +-
 tools/diagtool/ShowEnabledWarnings.cpp        |    6 +-
 tools/driver/CMakeLists.txt                   |   53 +-
 tools/driver/cc1as_main.cpp                   |   43 +-
 tools/driver/clang_symlink.cmake              |   43 -
 tools/driver/driver.cpp                       |  173 +-
 tools/libclang/CIndex.cpp                     |  586 +-
 tools/libclang/CIndexCodeCompletion.cpp       |   50 +-
 tools/libclang/CIndexer.h                     |    4 +-
 tools/libclang/CMakeLists.txt                 |   17 +-
 tools/libclang/CXCompilationDatabase.cpp      |   10 +
 tools/libclang/CXCursor.cpp                   |   24 +
 tools/libclang/CXLoadedDiagnostic.cpp         |    7 +-
 tools/libclang/CXString.cpp                   |   17 +
 tools/libclang/CXString.h                     |    2 +
 tools/libclang/CXType.cpp                     |   39 +-
 tools/libclang/CursorVisitor.h                |    1 +
 tools/libclang/IndexBody.cpp                  |   11 +-
 tools/libclang/IndexTypeSourceInfo.cpp        |    4 +-
 tools/libclang/Indexing.cpp                   |  180 +-
 tools/libclang/IndexingContext.cpp            |    5 +-
 tools/libclang/libclang.exports               |    9 +
 tools/scan-build/CMakeLists.txt               |   82 +
 tools/scan-build/Makefile                     |   53 +
 tools/scan-build/{ => bin}/scan-build         |  813 +-
 tools/scan-build/{ => bin}/scan-build.bat     |    0
 tools/scan-build/{ => bin}/set-xcode-analyzer |    0
 tools/scan-build/{ => libexec}/c++-analyzer   |    0
 .../scan-build/{ => libexec}/c++-analyzer.bat |    0
 tools/scan-build/{ => libexec}/ccc-analyzer   |   20 +-
 .../scan-build/{ => libexec}/ccc-analyzer.bat |    0
 tools/scan-build/{ => man}/scan-build.1       |    2 +-
 .../{ => share/scan-build}/scanview.css       |    0
 .../{ => share/scan-build}/sorttable.js       |    0
 tools/scan-view/CMakeLists.txt                |   41 +
 tools/scan-view/Makefile                      |   37 +
 tools/scan-view/bin/scan-view                 |  143 +
 tools/scan-view/scan-view                     |  131 -
 .../{Resources => share}/FileRadar.scpt       |  Bin
 tools/scan-view/share/GetRadarVersion.scpt    |    0
 tools/scan-view/{ => share}/Reporter.py       |    4 +-
 tools/scan-view/{ => share}/ScanView.py       |    4 +-
 .../{Resources => share}/bugcatcher.ico       |  Bin
 tools/scan-view/{ => share}/startfile.py      |    0
 unittests/AST/ASTContextParentMapTest.cpp     |   39 +-
 unittests/AST/ASTTypeTraitsTest.cpp           |    7 +
 unittests/AST/DeclPrinterTest.cpp             |   46 +-
 unittests/AST/NamedDeclPrinterTest.cpp        |   42 +
 unittests/AST/SourceLocationTest.cpp          |   26 +-
 unittests/AST/StmtPrinterTest.cpp             |    4 +-
 unittests/ASTMatchers/ASTMatchersTest.cpp     |  765 +-
 unittests/ASTMatchers/ASTMatchersTest.h       |   14 +
 unittests/ASTMatchers/Dynamic/ParserTest.cpp  |   14 +-
 .../ASTMatchers/Dynamic/RegistryTest.cpp      |   31 +-
 unittests/Basic/SourceManagerTest.cpp         |    2 +-
 unittests/Basic/VirtualFileSystemTest.cpp     |  172 +-
 unittests/CMakeLists.txt                      |    2 +-
 unittests/CodeGen/BufferSourceTest.cpp        |    4 +-
 unittests/Driver/CMakeLists.txt               |    2 +
 unittests/Driver/ToolChainTest.cpp            |  120 +
 unittests/Format/CMakeLists.txt               |    1 +
 unittests/Format/FormatTest.cpp               |  799 +-
 unittests/Format/FormatTestJS.cpp             |  198 +-
 unittests/Format/FormatTestJava.cpp           |    9 +
 unittests/Format/FormatTestProto.cpp          |   11 +
 unittests/Format/FormatTestSelective.cpp      |   68 +-
 unittests/Format/SortIncludesTest.cpp         |  255 +
 unittests/Lex/LexerTest.cpp                   |    2 +-
 unittests/Lex/PPCallbacksTest.cpp             |   16 +-
 .../Lex/PPConditionalDirectiveRecordTest.cpp  |    2 +-
 unittests/Tooling/CMakeLists.txt              |    2 +
 unittests/Tooling/CommentHandlerTest.cpp      |    4 +
 unittests/Tooling/CompilationDatabaseTest.cpp |   56 +-
 unittests/Tooling/LookupTest.cpp              |  108 +
 .../Tooling/RefactoringCallbacksTest.cpp      |    4 +-
 unittests/Tooling/RefactoringTest.cpp         |  114 +-
 unittests/Tooling/RewriterTestContext.h       |   29 +-
 unittests/Tooling/ToolingTest.cpp             |  117 +-
 unittests/libclang/LibclangTest.cpp           |   29 +
 utils/TableGen/ClangAttrEmitter.cpp           |  350 +-
 .../ClangCommentCommandInfoEmitter.cpp        |    3 +-
 utils/TableGen/NeonEmitter.cpp                |   83 +-
 utils/analyzer/CmpRuns.py                     |   50 +-
 utils/analyzer/SATestAdd.py                   |   54 +-
 utils/analyzer/SATestBuild.py                 |  347 +-
 utils/analyzer/SumTimerInfo.py                |    5 +-
 utils/analyzer/ubiviz                         |   13 +-
 utils/clang.natvis                            |   23 +-
 utils/perf-training/CMakeLists.txt            |   36 +
 utils/perf-training/README.txt                |    6 +
 utils/perf-training/cxx/hello_world.cpp       |    7 +
 utils/perf-training/lit.cfg                   |   37 +
 utils/perf-training/lit.site.cfg.in           |   20 +
 utils/perf-training/perf-helper.py            |   46 +
 www/analyzer/faq.html                         |   14 +
 www/analyzer/index.html                       |    2 +-
 www/analyzer/installation.html                |    9 +-
 www/analyzer/latest_checker.html.incl         |    2 +-
 www/analyzer/release_notes.html               |   30 +
 www/analyzer/scan-build.html                  |   10 +-
 www/analyzer/xcode.html                       |    4 +-
 www/comparison.html                           |   17 +-
 www/cxx_dr_status.html                        |    4 +-
 www/cxx_status.html                           |   76 +-
 www/get_started.html                          |   10 +-
 www/related.html                              |   11 +
 2496 files changed, 146493 insertions(+), 54196 deletions(-)
 create mode 100644 cmake/caches/Apple-stage1.cmake
 create mode 100644 cmake/caches/Apple-stage2.cmake
 create mode 100644 cmake/caches/README.txt
 create mode 100644 docs/UndefinedBehaviorSanitizer.rst
 create mode 100644 docs/analyzer/nullability.rst
 delete mode 100644 include/clang/AST/DataRecursiveASTVisitor.h
 create mode 100644 include/clang/AST/ExprOpenMP.h
 create mode 100644 include/clang/Basic/BuiltinsWebAssembly.def
 create mode 100644 include/clang/Serialization/ModuleFileExtension.h
 create mode 100644 include/clang/StaticAnalyzer/Core/IssueHash.h
 create mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
 create mode 100644 include/clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h
 create mode 100644 include/clang/Tooling/Core/Lookup.h
 create mode 100644 lib/AST/ExprObjC.cpp
 create mode 100644 lib/AST/OpenMPClause.cpp
 create mode 100644 lib/AST/StmtCXX.cpp
 create mode 100644 lib/AST/StmtObjC.cpp
 create mode 100644 lib/AST/StmtOpenMP.cpp
 create mode 100644 lib/CodeGen/Address.h
 create mode 100644 lib/CodeGen/CodeGenTypeCache.h
 create mode 100644 lib/Frontend/TestModuleFileExtension.cpp
 create mode 100644 lib/Frontend/TestModuleFileExtension.h
 create mode 100644 lib/Headers/__clang_cuda_runtime_wrapper.h
 create mode 100644 lib/Headers/xsavecintrin.h
 create mode 100644 lib/Headers/xsaveintrin.h
 create mode 100644 lib/Headers/xsaveoptintrin.h
 create mode 100644 lib/Headers/xsavesintrin.h
 create mode 100644 lib/Sema/SemaCoroutine.cpp
 create mode 100644 lib/Serialization/ModuleFileExtension.cpp
 create mode 100644 lib/Serialization/MultiOnDiskHashTable.h
 create mode 100644 lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp
 create mode 100644 lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
 create mode 100644 lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
 create mode 100644 lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
 create mode 100644 lib/StaticAnalyzer/Checkers/VforkChecker.cpp
 create mode 100644 lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
 create mode 100644 lib/StaticAnalyzer/Core/IssueHash.cpp
 create mode 100644 lib/StaticAnalyzer/Core/LoopWidening.cpp
 create mode 100644 lib/Tooling/Core/Lookup.cpp
 create mode 100644 test/Analysis/DynamicTypePropagation.m
 create mode 100644 test/Analysis/PR24184.cpp
 create mode 100644 test/Analysis/blocks.mm
 create mode 100644 test/Analysis/bug_hash_test.cpp
 create mode 100644 test/Analysis/bug_hash_test.m
 create mode 100644 test/Analysis/builtin_signbit.cpp
 create mode 100644 test/Analysis/const-method-call.cpp
 create mode 100644 test/Analysis/delayed-template-parsing-crash.cpp
 create mode 100644 test/Analysis/dynamic_type_check.m
 create mode 100644 test/Analysis/generics.m
 create mode 100644 test/Analysis/lambda-notes.cpp
 create mode 100644 test/Analysis/lambdas-generalized-capture.cpp
 create mode 100644 test/Analysis/lambdas.mm
 create mode 100644 test/Analysis/localization-aggressive.m
 create mode 100644 test/Analysis/localization.m
 create mode 100644 test/Analysis/loop-widening.c
 create mode 100644 test/Analysis/malloc-overflow2.c
 create mode 100644 test/Analysis/no-unreachable-dtors.cpp
 create mode 100644 test/Analysis/nullability.mm
 create mode 100644 test/Analysis/nullability_nullonly.mm
 create mode 100644 test/Analysis/objc-message.m
 create mode 100644 test/Analysis/padding_c.c
 create mode 100644 test/Analysis/padding_cpp.cpp
 create mode 100644 test/Analysis/padding_message.cpp
 create mode 100644 test/Analysis/pr22954.c
 create mode 100644 test/Analysis/return-ptr-range.cpp
 create mode 100644 test/Analysis/switch-case.c
 create mode 100644 test/Analysis/symbol-reaper.c
 create mode 100644 test/Analysis/ubigraph-viz.cpp
 create mode 100644 test/Analysis/vfork.c
 create mode 100644 test/CXX/basic/basic.start/basic.start.main/p3.cpp
 create mode 100644 test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p1.cpp
 create mode 100644 test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p2.cpp
 create mode 100644 test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p5.cpp
 create mode 100644 test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p1.cpp
 create mode 100644 test/CXX/lex/lex.literal/lex.string/p4.cpp
 create mode 100644 test/CodeCompletion/ordinary-name-cxx11.cpp
 create mode 100644 test/CodeGen/Nontemporal.cpp
 create mode 100644 test/CodeGen/aarch64-neon-vget.c
 create mode 100644 test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
 create mode 100644 test/CodeGen/align-wasm.c
 create mode 100644 test/CodeGen/arm-eabi.c
 create mode 100644 test/CodeGen/arm-fp16-arguments.c
 create mode 100644 test/CodeGen/arm-no-movt.c
 create mode 100644 test/CodeGen/arm-v8.1a-neon-intrinsics.c
 delete mode 100644 test/CodeGen/arm64_vget.c
 delete mode 100644 test/CodeGen/arm64_vset_lane.c
 create mode 100644 test/CodeGen/arm_function_epilog.cpp
 create mode 100644 test/CodeGen/armv7k-abi.c
 create mode 100644 test/CodeGen/asm-unicode.S
 create mode 100644 test/CodeGen/asm_64.c
 create mode 100644 test/CodeGen/attr-func-def.c
 create mode 100644 test/CodeGen/attr-no-tail.c
 create mode 100644 test/CodeGen/attr-target-ppc.c
 create mode 100644 test/CodeGen/attr-target-x86-mmx.c
 create mode 100644 test/CodeGen/attr-target-x86.c
 delete mode 100644 test/CodeGen/attr-target.c
 create mode 100644 test/CodeGen/builtin-unpredictable.c
 create mode 100644 test/CodeGen/builtins-wasm.c
 create mode 100644 test/CodeGen/c-unicode.c
 create mode 100644 test/CodeGen/cfi-icall-cross-dso.c
 create mode 100644 test/CodeGen/cfi-icall.c
 create mode 100644 test/CodeGen/debug-prefix-map.c
 create mode 100644 test/CodeGen/enable_if.c
 create mode 100644 test/CodeGen/mangle-ms.c
 create mode 100644 test/CodeGen/mingw-long-double.c
 create mode 100644 test/CodeGen/mips-inline-asm-abi.c
 create mode 100644 test/CodeGen/mips-interrupt-attr.c
 create mode 100644 test/CodeGen/ms-inline-asm-align.c
 create mode 100644 test/CodeGen/ms-mm-align.c
 create mode 100644 test/CodeGen/ms_this.cpp
 create mode 100644 test/CodeGen/object-size.cpp
 create mode 100644 test/CodeGen/pass-object-size.c
 create mode 100644 test/CodeGen/ppc-sfvarargs.c
 create mode 100644 test/CodeGen/sanitize-blocks.c
 create mode 100644 test/CodeGen/sparc-arguments.c
 create mode 100644 test/CodeGen/sse2-builtins.c
 create mode 100644 test/CodeGen/sse3-builtins.c
 create mode 100644 test/CodeGen/sse41-builtins.c
 create mode 100644 test/CodeGen/sse42-builtins.c
 create mode 100644 test/CodeGen/ssse3-builtins.c
 create mode 100644 test/CodeGen/stackrealign.c
 create mode 100644 test/CodeGen/target-builtin-error-2.c
 create mode 100644 test/CodeGen/target-builtin-error.c
 create mode 100644 test/CodeGen/target-builtin-noerror.c
 create mode 100644 test/CodeGen/target-features-error-2.c
 create mode 100644 test/CodeGen/target-features-error.c
 create mode 100644 test/CodeGen/target-features-no-error.c
 create mode 100644 test/CodeGen/thinlto_backend.c
 create mode 100644 test/CodeGen/ubsan-conditional.c
 create mode 100644 test/CodeGen/wasm-arguments.c
 create mode 100644 test/CodeGen/wasm-regparm.c
 create mode 100644 test/CodeGen/x86-soft-float.c
 create mode 100644 test/CodeGen/x86_32-arguments-iamcu.c
 create mode 100644 test/CodeGen/x86_32-xsave.c
 rename test/CodeGen/{x86_64-fp128.c => x86_64-longdouble.c} (78%)
 create mode 100644 test/CodeGen/x86_64-profiling-keep-fp.c
 create mode 100644 test/CodeGen/x86_64-xsave.c
 create mode 100644 test/CodeGenCUDA/Inputs/device-code-2.ll
 create mode 100644 test/CodeGenCUDA/Inputs/device-code.ll
 create mode 100644 test/CodeGenCUDA/device-vtable.cu
 create mode 100644 test/CodeGenCUDA/function-overload.cu
 create mode 100644 test/CodeGenCUDA/link-device-bitcode.cu
 create mode 100644 test/CodeGenCXX/PR24289.cpp
 create mode 100644 test/CodeGenCXX/alignment.cpp
 create mode 100644 test/CodeGenCXX/armv7k.cpp
 create mode 100644 test/CodeGenCXX/attr-disable-tail-calls.cpp
 create mode 100644 test/CodeGenCXX/attr-notail.cpp
 create mode 100644 test/CodeGenCXX/attribute_internal_linkage.cpp
 create mode 100644 test/CodeGenCXX/cast-to-ref-bool.cpp
 create mode 100644 test/CodeGenCXX/cfi-blacklist.cpp
 create mode 100644 test/CodeGenCXX/cfi-cross-dso.cpp
 create mode 100644 test/CodeGenCXX/cfi-icall.cpp
 create mode 100644 test/CodeGenCXX/debug-info-anon-namespace.cpp
 create mode 100644 test/CodeGenCXX/debug-info-codeview-display-name.cpp
 create mode 100644 test/CodeGenCXX/debug-info-explicit-cast.cpp
 delete mode 100644 test/CodeGenCXX/default-destructor-synthesis.cpp
 create mode 100644 test/CodeGenCXX/exceptions-cxx-new.cpp
 create mode 100644 test/CodeGenCXX/funcattrs-global-ctor-dtor.cpp
 create mode 100644 test/CodeGenCXX/invariant.group-for-vptrs.cpp
 create mode 100644 test/CodeGenCXX/main-norecurse.cpp
 delete mode 100644 test/CodeGenCXX/member-initializers.cpp
 create mode 100644 test/CodeGenCXX/ms-inline-asm-fields.cpp
 create mode 100644 test/CodeGenCXX/ms-property.cpp
 create mode 100644 test/CodeGenCXX/observe-noexcept.cpp
 create mode 100644 test/CodeGenCXX/partial-init.cpp
 create mode 100644 test/CodeGenCXX/pass-object-size.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-bit-field.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-derived-class.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-fn-attribute.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-repress-aliasing.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-tail-call.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-trivial.cpp
 create mode 100644 test/CodeGenCXX/sanitize-dtor-vtable.cpp
 create mode 100644 test/CodeGenCXX/sanitize-no-dtor-callback.cpp
 delete mode 100644 test/CodeGenCXX/sel-address.mm
 create mode 100644 test/CodeGenCXX/static-init-wasm.cpp
 create mode 100644 test/CodeGenCXX/strict-vtable-pointers.cpp
 create mode 100644 test/CodeGenCXX/vtable-assume-load.cpp
 create mode 100644 test/CodeGenCXX/vtable-key-function-win-comdat.cpp
 create mode 100644 test/CodeGenCXX/wasm-args-returns.cpp
 create mode 100644 test/CodeGenObjC/arc-weak.m
 create mode 100644 test/CodeGenObjC/arc.ll
 create mode 100644 test/CodeGenObjC/attr-noreturn.m
 create mode 100644 test/CodeGenObjC/debug-info-property-class-extension.m
 create mode 100644 test/CodeGenObjC/fragile-arc.m
 create mode 100644 test/CodeGenObjC/mrc-weak.m
 create mode 100644 test/CodeGenObjC/property-list-in-extension.m
 create mode 100644 test/CodeGenObjCXX/arc-weak.mm
 create mode 100644 test/CodeGenObjCXX/exception-cxx.mm
 create mode 100644 test/CodeGenObjCXX/mrc-weak.mm
 create mode 100644 test/CodeGenObjCXX/personality-abuse.mm
 create mode 100644 test/CodeGenOpenCL/bool_cast.cl
 create mode 100644 test/CoverageMapping/decl.c
 create mode 100644 test/CoverageMapping/trymacro.cpp
 rename tools/scan-view/Resources/GetRadarVersion.scpt => test/Driver/Inputs/CUDA/usr/local/cuda/include/.keep (100%)
 create mode 100644 test/Driver/Inputs/CUDA/usr/local/cuda/lib/.keep
 create mode 100644 test/Driver/Inputs/CUDA/usr/local/cuda/lib64/.keep
 create mode 100644 test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
 create mode 100644 test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
 create mode 100755 test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld.lld-link2
 create mode 100644 test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/bin/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v1/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v2/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/lib/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/bin/.keep
 create mode 100755 test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/4.8/backward/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v1/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v2/.keep
 create mode 100644 test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/lib/.keep
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crti.o
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtn.o
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
 create mode 100644 test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
 delete mode 100755 test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-as
 delete mode 100755 test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-gcc
 delete mode 100755 test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-ld
 delete mode 100644 test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/c++/4.4.0/ios
 delete mode 100644 test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/stdio.h
 delete mode 100644 test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include-fixed/limits.h
 delete mode 100644 test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include/stddef.h
 delete mode 100644 test/Driver/Inputs/hexagon_tree/qc/bin/placeholder
 create mode 100644 test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.a
 create mode 100644 test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.so
 create mode 100644 test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.a
 create mode 100644 test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.so
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crt1.o
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crti.o
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crtn.o
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crt1.o
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crti.o
 create mode 100644 test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crtn.o
 create mode 100644 test/Driver/Inputs/multilib_arm_linux_tree/usr/include/arm-linux-gnueabi/.keep
 create mode 100644 test/Driver/Inputs/multilib_armeb_linux_tree/usr/include/armeb-linux-gnueabi/.keep
 create mode 100644 test/Driver/Inputs/multilib_armebhf_linux_tree/usr/include/armeb-linux-gnueabihf/.keep
 create mode 100644 test/Driver/Inputs/multilib_armhf_linux_tree/usr/include/arm-linux-gnueabihf/.keep
 create mode 100644 test/Driver/Inputs/resource_dir/asan_blacklist.txt
 create mode 100644 test/Driver/Inputs/scei-ps4_tree/target/include/.keep
 create mode 100644 test/Driver/Inputs/scei-ps4_tree/target/include_common/.keep
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
 create mode 100644 test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
 create mode 100644 test/Driver/amdgpu-toolchain.c
 create mode 100644 test/Driver/appletvos-version-min.c
 create mode 100644 test/Driver/arch-armv7k.c
 create mode 100644 test/Driver/arm-compiler-rt.c
 create mode 100644 test/Driver/arm-features.c
 create mode 100644 test/Driver/arm-float-abi.c
 create mode 100644 test/Driver/arm-multilibs.c
 create mode 100644 test/Driver/arm-no-movt.c
 create mode 100644 test/Driver/as-default-dwarf.s
 create mode 100644 test/Driver/as-options.s
 create mode 100644 test/Driver/cl-response-file.c
 create mode 100644 test/Driver/cuda-detect.cu
 create mode 100644 test/Driver/darwin-ld-lto.c
 create mode 100644 test/Driver/debug-prefix-map.c
 create mode 100644 test/Driver/eabi.c
 create mode 100644 test/Driver/elfiamcu-header-search.c
 create mode 100644 test/Driver/fplugin.c
 delete mode 100644 test/Driver/hexagon-toolchain.c
 create mode 100644 test/Driver/incremental-linker-compatible.c
 create mode 100644 test/Driver/mingw-libgcc.c
 create mode 100644 test/Driver/mingw-useld.c
 create mode 100644 test/Driver/mips-ias-Wa.s
 create mode 100644 test/Driver/mips-mti-linux.c
 create mode 100644 test/Driver/ms-bitfields.c
 create mode 100644 test/Driver/myriad-toolchain.c
 create mode 100644 test/Driver/no-integrated-as.s
 create mode 100644 test/Driver/objc-weak.m
 create mode 100644 test/Driver/ps4-header-search.c
 create mode 100644 test/Driver/ps4-linker-non-win.c
 create mode 100644 test/Driver/ps4-linker-win.c
 create mode 100644 test/Driver/ps4-pic.c
 create mode 100644 test/Driver/ps4-runtime-flags.c
 create mode 100644 test/Driver/ps4-sdk-root.c
 delete mode 100644 test/Driver/shave-toolchain.c
 create mode 100644 test/Driver/solaris-header-search.cpp
 create mode 100644 test/Driver/solaris-ld.c
 create mode 100644 test/Driver/solaris-opts.c
 create mode 100644 test/Driver/sparc-as.c
 create mode 100644 test/Driver/sparcv9-as.c
 create mode 100644 test/Driver/thinlto.c
 create mode 100644 test/Driver/thinlto_backend.c
 create mode 100644 test/Driver/wasm-toolchain.c
 create mode 100644 test/Driver/wasm32-unknown-unknown.cpp
 create mode 100644 test/Driver/wasm64-unknown-unknown.cpp
 create mode 100644 test/Driver/watchos-version-min.c
 create mode 100644 test/Driver/woa-fp.c
 create mode 100644 test/Format/adjust-indent.cpp
 create mode 100644 test/Format/disable-include-sorting.cpp
 create mode 100644 test/Format/xmloutput.cpp
 create mode 100644 test/Frontend/force-include-not-found.c
 create mode 100644 test/Frontend/optimization-remark-analysis.c
 create mode 100644 test/Frontend/optimization-remark-options.c
 create mode 100644 test/Headers/pmmintrin.c
 create mode 100644 test/Headers/x86intrin-2.c
 create mode 100644 test/Index/availability.cpp
 create mode 100644 test/Index/index-attrs.c
 create mode 100644 test/Index/index-attrs.cpp
 create mode 100644 test/Index/namespaced-base-ctor-init.cpp
 create mode 100644 test/Index/print-cxx-manglings.cpp
 create mode 100644 test/Index/symbol-visibility.c
 create mode 100644 test/Index/visibility.c
 create mode 100644 test/Layout/ms-vtordisp-local.cpp
 create mode 100644 test/Layout/ms_struct-bitfields.c
 create mode 100644 test/Lexer/coroutines.cpp
 create mode 100644 test/Lexer/objc_macros.m
 create mode 100644 test/Lexer/warn_binary_literals.cpp
 create mode 100644 test/Misc/diag-macro-backtrace2.c
 create mode 100644 test/Misc/reduced-diags-macros-backtrace.cpp
 create mode 100644 test/Misc/reduced-diags-macros.cpp
 create mode 100644 test/Misc/serialized-diags-really-long-text.cpp
 create mode 100644 test/Misc/thinlto.c
 create mode 100644 test/Modules/DebugInfoSubmoduleImport.c
 create mode 100644 test/Modules/DebugInfoSubmodules.c
 create mode 100644 test/Modules/DebugInfoTransitiveImport.m
 create mode 100644 test/Modules/ExtDebugInfo.cpp
 create mode 100644 test/Modules/ExtDebugInfo.m
 create mode 100644 test/Modules/Inputs/AutolinkTBD.framework/AutolinkTBD.tbd
 create mode 100644 test/Modules/Inputs/AutolinkTBD.framework/Headers/AutolinkTBD.h
 create mode 100644 test/Modules/Inputs/DebugCXX.h
 delete mode 100644 test/Modules/Inputs/DebugModule.h
 create mode 100644 test/Modules/Inputs/DebugObjC.h
 create mode 100644 test/Modules/Inputs/DebugSubmoduleA.h
 create mode 100644 test/Modules/Inputs/DebugSubmoduleB.h
 create mode 100644 test/Modules/Inputs/ExtensionTestA.h
 create mode 100644 test/Modules/Inputs/System/usr/include/assert.h
 create mode 100644 test/Modules/Inputs/System/usr/include/tcl-private/header.h
 create mode 100644 test/Modules/Inputs/auto-import-unavailable/missing_header/not_missing.h
 create mode 100644 test/Modules/Inputs/auto-import-unavailable/missing_requirement.h
 create mode 100644 test/Modules/Inputs/auto-import-unavailable/module.modulemap
 create mode 100644 test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/not_missing.h
 create mode 100644 test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/requires_feature_you_dont_have.h
 create mode 100644 test/Modules/Inputs/available-is-better/available-is-better.h
 create mode 100644 test/Modules/Inputs/available-is-better/module.modulemap
 create mode 100644 test/Modules/Inputs/elaborated-type-structs.h
 create mode 100644 test/Modules/Inputs/explicit-build-overlap/a.h
 create mode 100644 test/Modules/Inputs/explicit-build-overlap/b.h
 create mode 100644 test/Modules/Inputs/explicit-build-overlap/def.map
 create mode 100644 test/Modules/Inputs/explicit-build-overlap/use.map
 create mode 100644 test/Modules/Inputs/internal-constants/a.h
 create mode 100644 test/Modules/Inputs/internal-constants/b.h
 create mode 100644 test/Modules/Inputs/internal-constants/c.h
 create mode 100644 test/Modules/Inputs/internal-constants/const.h
 create mode 100644 test/Modules/Inputs/internal-constants/module.modulemap
 create mode 100644 test/Modules/Inputs/libstdcxx-ambiguous-internal/a.h
 create mode 100644 test/Modules/Inputs/libstdcxx-ambiguous-internal/b.h
 create mode 100644 test/Modules/Inputs/libstdcxx-ambiguous-internal/c.h
 create mode 100644 test/Modules/Inputs/libstdcxx-ambiguous-internal/d.h
 create mode 100644 test/Modules/Inputs/libstdcxx-ambiguous-internal/module.modulemap
 create mode 100644 test/Modules/Inputs/misplaced/misplaced-a.h
 create mode 100644 test/Modules/Inputs/misplaced/misplaced-b.h
 create mode 100644 test/Modules/Inputs/misplaced/misplaced.modulemap
 create mode 100644 test/Modules/Inputs/module-map-path-hash/a.h
 create mode 100644 test/Modules/Inputs/module-map-path-hash/module.modulemap
 create mode 100644 test/Modules/Inputs/no-linkage/decls.h
 create mode 100644 test/Modules/Inputs/no-linkage/empty.h
 create mode 100644 test/Modules/Inputs/no-linkage/module.modulemap
 create mode 100644 test/Modules/Inputs/private3/private.h
 create mode 100644 test/Modules/Inputs/private3/public.h
 create mode 100644 test/Modules/Inputs/stress1/merge_no_reexport.h
 create mode 100644 test/Modules/Inputs/template-default-args/d.h
 create mode 100644 test/Modules/Inputs/thread-safety/a.h
 create mode 100644 test/Modules/Inputs/thread-safety/b.h
 create mode 100644 test/Modules/Inputs/thread-safety/c.h
 create mode 100644 test/Modules/Inputs/thread-safety/module.map
 create mode 100644 test/Modules/Inputs/typedef-tag-hidden.h
 create mode 100644 test/Modules/Inputs/typedef-tag.h
 create mode 100644 test/Modules/Inputs/use-builtin.h
 create mode 100644 test/Modules/Inputs/using-decl-redecl/a.h
 create mode 100644 test/Modules/Inputs/using-decl-redecl/b.h
 create mode 100644 test/Modules/Inputs/using-decl-redecl/c.h
 create mode 100644 test/Modules/Inputs/using-decl-redecl/module.modulemap
 create mode 100644 test/Modules/Inputs/va_list/left.h
 create mode 100644 test/Modules/Inputs/va_list/right.h
 create mode 100644 test/Modules/Inputs/va_list/top.h
 create mode 100644 test/Modules/Inputs/working-dir-test/Test.framework/Headers/Test.h
 create mode 100644 test/Modules/Inputs/working-dir-test/Test.framework/Modules/module.modulemap
 create mode 100644 test/Modules/ModuleDebugInfo.cpp
 create mode 100644 test/Modules/ModuleDebugInfo.m
 create mode 100644 test/Modules/auto-import-unavailable.cpp
 create mode 100644 test/Modules/autolinkTBD.m
 create mode 100644 test/Modules/available-is-better.cpp
 create mode 100644 test/Modules/compiler_builtins_aarch64.m
 create mode 100644 test/Modules/darwin_specific_modulemap_hacks.m
 create mode 100644 test/Modules/elaborated-type-specifier-from-hidden-module.m
 create mode 100644 test/Modules/embed-files.cpp
 create mode 100644 test/Modules/explicit-build-extra-files.cpp
 create mode 100644 test/Modules/explicit-build-missing-files.cpp
 create mode 100644 test/Modules/explicit-build-overlap.cpp
 create mode 100644 test/Modules/extensions.c
 create mode 100644 test/Modules/hidden-definition.cpp
 create mode 100644 test/Modules/internal-constants.cpp
 create mode 100644 test/Modules/libstdcxx-ambiguous-internal.cpp
 create mode 100644 test/Modules/misplaced-1.cpp
 create mode 100644 test/Modules/misplaced-2.cpp
 create mode 100644 test/Modules/misplaced-3.cpp
 create mode 100644 test/Modules/misplaced-4.cpp
 create mode 100644 test/Modules/misplaced-5.c
 create mode 100644 test/Modules/module-map-path-hash.cpp
 create mode 100644 test/Modules/modules.idx
 create mode 100644 test/Modules/no-linkage.cpp
 create mode 100644 test/Modules/private.modulemap
 create mode 100644 test/Modules/target-features.m
 create mode 100644 test/Modules/thread-safety.cpp
 create mode 100644 test/Modules/typedef-tag-not-visible.m
 create mode 100644 test/Modules/using-decl-redecl.cpp
 create mode 100644 test/Modules/va_list.cpp
 create mode 100644 test/Modules/working-dir-flag.m
 create mode 100644 test/OpenMP/cancel_if_messages.cpp
 create mode 100644 test/OpenMP/distribute_ast_print.cpp
 create mode 100644 test/OpenMP/distribute_collapse_messages.cpp
 create mode 100644 test/OpenMP/distribute_firstprivate_messages.cpp
 create mode 100644 test/OpenMP/distribute_private_messages.cpp
 create mode 100644 test/OpenMP/driver.c
 create mode 100644 test/OpenMP/for_linear_codegen.cpp
 create mode 100644 test/OpenMP/for_linear_messages.cpp
 create mode 100644 test/OpenMP/for_ordered_clause.cpp
 create mode 100644 test/OpenMP/for_simd_simdlen_messages.cpp
 create mode 100644 test/OpenMP/function-attr.cpp
 create mode 100644 test/OpenMP/openmp_check.cpp
 create mode 100644 test/OpenMP/parallel_for_linear_codegen.cpp
 create mode 100644 test/OpenMP/parallel_for_linear_messages.cpp
 create mode 100644 test/OpenMP/parallel_for_ordered_messages.cpp
 create mode 100644 test/OpenMP/parallel_for_simd_simdlen_messages.cpp
 create mode 100644 test/OpenMP/simd_simdlen_messages.cpp
 create mode 100644 test/OpenMP/target_codegen.cpp
 create mode 100644 test/OpenMP/target_codegen_global_capture.cpp
 create mode 100644 test/OpenMP/target_data_ast_print.cpp
 create mode 100644 test/OpenMP/target_data_device_messages.cpp
 create mode 100644 test/OpenMP/target_data_if_messages.cpp
 create mode 100644 test/OpenMP/target_data_messages.c
 create mode 100644 test/OpenMP/target_device_messages.cpp
 create mode 100644 test/OpenMP/target_map_codegen.cpp
 create mode 100644 test/OpenMP/target_map_messages.cpp
 create mode 100644 test/OpenMP/task_priority_messages.cpp
 create mode 100644 test/OpenMP/taskloop_ast_print.cpp
 create mode 100644 test/OpenMP/taskloop_collapse_messages.cpp
 create mode 100644 test/OpenMP/taskloop_final_messages.cpp
 create mode 100644 test/OpenMP/taskloop_firstprivate_messages.cpp
 create mode 100644 test/OpenMP/taskloop_grainsize_messages.cpp
 create mode 100644 test/OpenMP/taskloop_lastprivate_messages.cpp
 create mode 100644 test/OpenMP/taskloop_loop_messages.cpp
 create mode 100644 test/OpenMP/taskloop_misc_messages.c
 create mode 100644 test/OpenMP/taskloop_num_tasks_messages.cpp
 create mode 100644 test/OpenMP/taskloop_priority_messages.cpp
 create mode 100644 test/OpenMP/taskloop_private_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_aligned_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_ast_print.cpp
 create mode 100644 test/OpenMP/taskloop_simd_collapse_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_final_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_firstprivate_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_grainsize_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_lastprivate_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_linear_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_loop_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_misc_messages.c
 create mode 100644 test/OpenMP/taskloop_simd_num_tasks_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_priority_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_private_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_safelen_messages.cpp
 create mode 100644 test/OpenMP/taskloop_simd_simdlen_messages.cpp
 create mode 100644 test/OpenMP/teams_num_teams_messages.cpp
 create mode 100644 test/OpenMP/teams_thread_limit_messages.cpp
 create mode 100644 test/PCH/datetime.c
 create mode 100644 test/PCH/make-integer-seq.cpp
 delete mode 100644 test/PCH/preprocess.c
 delete mode 100644 test/PCH/preprocess.h
 create mode 100644 test/Parser/cxx1z-coroutines.cpp
 create mode 100644 test/Parser/x64-windows-calling-convention-handling.c
 create mode 100644 test/Preprocessor/elfiamcu-predefines.c
 create mode 100644 test/Preprocessor/init-v7k-compat.c
 create mode 100644 test/Preprocessor/wasm-target-features.c
 create mode 100644 test/Profile/Inputs/c-general.profdata.v3
 create mode 100644 test/Profile/Inputs/max-function-count.proftext
 create mode 100644 test/Profile/cxx-structors.cpp
 create mode 100644 test/Profile/max-function-count.c
 create mode 100644 test/Sema/asm-label.c
 create mode 100644 test/Sema/attr-availability-tvos.c
 create mode 100644 test/Sema/attr-availability-watchos.c
 create mode 100644 test/Sema/attr-disable-tail-calls.c
 create mode 100644 test/Sema/attr-notail.c
 create mode 100644 test/Sema/auto-type.c
 create mode 100644 test/Sema/builtins-overflow.c
 create mode 100644 test/Sema/callingconv-iamcu.c
 create mode 100644 test/Sema/ext_vector_conversions.c
 create mode 100644 test/Sema/inline-asm-validate-amdgpu.cl
 create mode 100644 test/Sema/internal_linkage.c
 create mode 100644 test/Sema/mips-interrupt-attr.c
 create mode 100644 test/Sema/pass-object-size.c
 create mode 100644 test/Sema/redefine_extname.c
 create mode 100644 test/Sema/varargs-win64.c
 create mode 100644 test/Sema/varargs-x86-32.c
 create mode 100644 test/Sema/warn-double-promotion.c
 create mode 100644 test/Sema/warn-extern-main.c
 create mode 100644 test/Sema/warn-logical-not-compare.c
 create mode 100644 test/SemaCUDA/attributes.cu
 create mode 100644 test/SemaCUDA/builtins.cu
 create mode 100644 test/SemaCUDA/function-overload.cu
 create mode 100644 test/SemaCXX/PR16677.cpp
 create mode 100644 test/SemaCXX/PR20334-std_initializer_list_diagnosis_assertion.cpp
 create mode 100644 test/SemaCXX/attr-disable-tail-calls.cpp
 create mode 100644 test/SemaCXX/attr-notail.cpp
 create mode 100644 test/SemaCXX/auto-type-from-cxx.cpp
 create mode 100644 test/SemaCXX/cdtor-fn-try-block.cpp
 create mode 100644 test/SemaCXX/coroutines.cpp
 create mode 100644 test/SemaCXX/deleted-function-access.cpp
 create mode 100644 test/SemaCXX/exception-spec.cpp
 create mode 100644 test/SemaCXX/internal_linkage.cpp
 create mode 100644 test/SemaCXX/make_integer_seq.cpp
 create mode 100644 test/SemaCXX/many-template-parameter-lists.cpp
 create mode 100644 test/SemaCXX/microsoft-super.cpp
 create mode 100644 test/SemaCXX/ms-inline-asm.cpp
 create mode 100644 test/SemaCXX/ms-property-error.cpp
 create mode 100644 test/SemaCXX/ms-property.cpp
 create mode 100644 test/SemaCXX/ms-unsupported.cpp
 create mode 100644 test/SemaCXX/pass-object-size.cpp
 create mode 100644 test/SemaCXX/redefine_extname.cpp
 create mode 100644 test/SemaCXX/typo-correction-blocks.c
 create mode 100644 test/SemaCXX/warn-pure-virtual-kext.cpp
 create mode 100644 test/SemaObjC/bool-type.m
 create mode 100644 test/SemaObjC/mrc-no-weak.m
 create mode 100644 test/SemaObjC/mrc-weak.m
 delete mode 100644 test/SemaObjC/nonarc-weak.m
 create mode 100644 test/SemaObjC/parameterized_classes_arc.m
 create mode 100644 test/SemaObjC/property-atomic-redecl.m
 create mode 100644 test/SemaObjCXX/cxx1y-lambda.mm
 create mode 100644 test/SemaObjCXX/parameterized_classes_arc.mm
 create mode 100644 test/SemaObjCXX/pseudo-destructor.mm
 create mode 100644 test/SemaObjCXX/sel-address.mm
 create mode 100644 test/SemaOpenCL/null_literal.cl
 create mode 100644 test/SemaOpenCL/storageclass-cl20.cl
 create mode 100644 test/SemaTemplate/instantiate-expr-6.cpp
 create mode 100644 test/VFS/Inputs/public_header3.h
 delete mode 100644 tools/driver/clang_symlink.cmake
 create mode 100644 tools/scan-build/CMakeLists.txt
 create mode 100644 tools/scan-build/Makefile
 rename tools/scan-build/{ => bin}/scan-build (73%)
 rename tools/scan-build/{ => bin}/scan-build.bat (100%)
 rename tools/scan-build/{ => bin}/set-xcode-analyzer (100%)
 rename tools/scan-build/{ => libexec}/c++-analyzer (100%)
 rename tools/scan-build/{ => libexec}/c++-analyzer.bat (100%)
 rename tools/scan-build/{ => libexec}/ccc-analyzer (97%)
 rename tools/scan-build/{ => libexec}/ccc-analyzer.bat (100%)
 rename tools/scan-build/{ => man}/scan-build.1 (99%)
 rename tools/scan-build/{ => share/scan-build}/scanview.css (100%)
 rename tools/scan-build/{ => share/scan-build}/sorttable.js (100%)
 create mode 100644 tools/scan-view/CMakeLists.txt
 create mode 100644 tools/scan-view/Makefile
 create mode 100755 tools/scan-view/bin/scan-view
 delete mode 100755 tools/scan-view/scan-view
 rename tools/scan-view/{Resources => share}/FileRadar.scpt (100%)
 create mode 100644 tools/scan-view/share/GetRadarVersion.scpt
 rename tools/scan-view/{ => share}/Reporter.py (97%)
 rename tools/scan-view/{ => share}/ScanView.py (99%)
 rename tools/scan-view/{Resources => share}/bugcatcher.ico (100%)
 rename tools/scan-view/{ => share}/startfile.py (100%)
 create mode 100644 unittests/Driver/ToolChainTest.cpp
 create mode 100644 unittests/Format/SortIncludesTest.cpp
 create mode 100644 unittests/Tooling/LookupTest.cpp
 create mode 100644 utils/perf-training/CMakeLists.txt
 create mode 100644 utils/perf-training/README.txt
 create mode 100644 utils/perf-training/cxx/hello_world.cpp
 create mode 100644 utils/perf-training/lit.cfg
 create mode 100644 utils/perf-training/lit.site.cfg.in
 create mode 100644 utils/perf-training/perf-helper.py

diff --git a/CMakeLists.txt b/CMakeLists.txt
index ab70f1dab61f..2c54e751e91e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,6 +96,7 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
 
   option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN
     "Set to ON to force using an old, unsupported host toolchain." OFF)
+  option(CLANG_ENABLE_BOOTSTRAP "Generate the clang bootstrap target" OFF)
 
   include(AddLLVM)
   include(TableGen)
@@ -115,6 +116,19 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
   set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
 
   if(LLVM_INCLUDE_TESTS)
+    set(Python_ADDITIONAL_VERSIONS 2.7)
+    include(FindPythonInterp)
+    if(NOT PYTHONINTERP_FOUND)
+      message(FATAL_ERROR
+"Unable to find Python interpreter, required for builds and testing.
+
+Please install Python or specify the PYTHON_EXECUTABLE CMake variable.")
+    endif()
+
+    if( ${PYTHON_VERSION_STRING} VERSION_LESS 2.7 )
+      message(FATAL_ERROR "Python 2.7 or newer is required")
+    endif()
+
     # Check prebuilt llvm/utils.
     if(EXISTS ${LLVM_TOOLS_BINARY_DIR}/FileCheck${CMAKE_EXECUTABLE_SUFFIX}
         AND EXISTS ${LLVM_TOOLS_BINARY_DIR}/count${CMAKE_EXECUTABLE_SUFFIX}
@@ -167,7 +181,7 @@ else()
   set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}")
 endif()
 
-find_package(LibXml2)
+find_package(LibXml2 2.5.3 QUIET)
 if (LIBXML2_FOUND)
   set(CLANG_HAVE_LIBXML 1)
 endif()
@@ -182,7 +196,7 @@ set(GCC_INSTALL_PREFIX "" CACHE PATH "Directory where gcc is installed." )
 set(DEFAULT_SYSROOT "" CACHE PATH
   "Default <path> to all compiler invocations for --sysroot=<path>." )
 
-set(CLANG_DEFAULT_OPENMP_RUNTIME "libgomp" CACHE STRING
+set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING
   "Default OpenMP runtime used by -fopenmp.")
 
 set(CLANG_VENDOR "" CACHE STRING
@@ -252,7 +266,10 @@ configure_file(
 
 # Add appropriate flags for GCC
 if (LLVM_COMPILER_IS_GCC_COMPATIBLE)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual -fno-strict-aliasing")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common -Woverloaded-virtual")
+  if (NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
+  endif ()
 
   # Enable -pedantic for Clang even if it's not enabled for LLVM.
   if (NOT LLVM_ENABLE_PEDANTIC)
@@ -331,9 +348,13 @@ macro(set_clang_windows_version_resource_properties name)
   endif()
 endmacro()
 
+macro(add_clang_subdirectory name)
+  add_llvm_subdirectory(CLANG TOOL ${name})
+endmacro()
+
 macro(add_clang_library name)
   cmake_parse_arguments(ARG
-    ""
+    "SHARED"
     ""
     "ADDITIONAL_HEADERS"
     ${ARGN})
@@ -358,7 +379,7 @@ macro(add_clang_library name)
       set_source_files_properties(${tds}} PROPERTIES HEADER_FILE_ONLY ON)
 
       if(headers OR tds)
-	set(srcs ${headers} ${tds})
+        set(srcs ${headers} ${tds})
       endif()
     endif()
   endif(MSVC_IDE OR XCODE)
@@ -369,17 +390,29 @@ macro(add_clang_library name)
       ${ARG_ADDITIONAL_HEADERS} # It may contain unparsed unknown args.
       )
   endif()
-  llvm_add_library(${name} ${ARG_UNPARSED_ARGUMENTS} ${srcs})
+  if(ARG_SHARED)
+    set(ARG_ENABLE_SHARED SHARED)
+  endif()
+  llvm_add_library(${name} ${ARG_ENABLE_SHARED} ${ARG_UNPARSED_ARGUMENTS} ${srcs})
 
   if(TARGET ${name})
     target_link_libraries(${name} ${cmake_2_8_12_INTERFACE} ${LLVM_COMMON_LIBS})
 
     if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY OR ${name} STREQUAL "libclang")
       install(TARGETS ${name}
+        COMPONENT ${name}
         EXPORT ClangTargets
         LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
         ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
         RUNTIME DESTINATION bin)
+
+      if (${ARG_SHARED} AND NOT CMAKE_CONFIGURATION_TYPES)
+        add_custom_target(install-${name}
+                          DEPENDS ${name}
+                          COMMAND "${CMAKE_COMMAND}"
+                                  -DCMAKE_INSTALL_COMPONENT=${name}
+                                  -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+      endif()
     endif()
     set_property(GLOBAL APPEND PROPERTY CLANG_EXPORTS ${name})
   else()
@@ -397,6 +430,12 @@ macro(add_clang_executable name)
   set_clang_windows_version_resource_properties(${name})
 endmacro(add_clang_executable)
 
+macro(add_clang_symlink name dest)
+  add_llvm_tool_symlink(${name} ${dest} ALWAYS_GENERATE)
+  # Always generate install targets
+  llvm_install_symlink(${name} ${dest} ALWAYS_GENERATE)
+endmacro()
+
 set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
 include_directories(BEFORE
@@ -423,13 +462,28 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
     )
 endif()
 
+if(INTERNAL_INSTALL_PREFIX)
+  set(LIBCLANG_HEADERS_INSTALL_DESTINATION "${INTERNAL_INSTALL_PREFIX}/include")
+else()
+  set(LIBCLANG_HEADERS_INSTALL_DESTINATION include)
+endif()
+
 install(DIRECTORY include/clang-c
-  DESTINATION include
+  COMPONENT libclang-headers
+  DESTINATION "${LIBCLANG_HEADERS_INSTALL_DESTINATION}"
   FILES_MATCHING
   PATTERN "*.h"
   PATTERN ".svn" EXCLUDE
   )
 
+if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
+  add_custom_target(install-libclang-headers
+    DEPENDS
+    COMMAND "${CMAKE_COMMAND}"
+            -DCMAKE_INSTALL_COMPONENT=libclang-headers
+            -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+endif()
+
 add_definitions( -D_GNU_SOURCE )
 
 option(CLANG_ENABLE_ARCMT "Build ARCMT." ON)
@@ -467,6 +521,10 @@ set(LIBCLANG_LIBRARY_VERSION
     "Version number that will be placed into the libclang library , in the form XX.YY")
 mark_as_advanced(CLANG_EXECUTABLE_VERSION LIBCLANG_LIBRARY_VERSION)
 
+option(CLANG_INCLUDE_TESTS
+       "Generate build targets for the Clang unit tests."
+       ${LLVM_INCLUDE_TESTS})
+
 add_subdirectory(utils/TableGen)
 
 add_subdirectory(include)
@@ -487,10 +545,6 @@ else()
 endif()
 add_subdirectory(examples)
 
-option(CLANG_INCLUDE_TESTS
-       "Generate build targets for the Clang unit tests."
-       ${LLVM_INCLUDE_TESTS})
-
 if( CLANG_INCLUDE_TESTS )
   if(EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include/gtest/gtest.h)
     add_subdirectory(unittests)
@@ -516,6 +570,7 @@ if( CLANG_INCLUDE_TESTS )
       ARGS ${LLVM_LIT_EXTRA_ARGS}
       )
   endif()
+  add_subdirectory(utils/perf-training)
 endif()
 
 option(CLANG_INCLUDE_DOCS "Generate build targets for the Clang docs."
@@ -527,7 +582,8 @@ endif()
 set(CLANG_ORDER_FILE "" CACHE FILEPATH
   "Order file to use when compiling clang in order to improve startup time.")
 
-if (CLANG_BUILT_STANDALONE)
+if (CLANG_BUILT_STANDALONE OR CMAKE_VERSION VERSION_EQUAL 3 OR
+    CMAKE_VERSION VERSION_GREATER 3)
   # Generate a list of CMake library targets so that other CMake projects can
   # link against them. LLVM calls its version of this file LLVMExports.cmake, but
   # the usual CMake convention seems to be ${Project}Targets.cmake.
@@ -551,3 +607,153 @@ if (CLANG_BUILT_STANDALONE)
     ${CLANG_BINARY_DIR}/share/clang/cmake/ClangConfig.cmake
     COPYONLY)
 endif ()
+
+if (CLANG_ENABLE_BOOTSTRAP)
+  include(ExternalProject)
+
+  if(CMAKE_VERSION VERSION_GREATER 3.1.0)
+    set(cmake_3_1_EXCLUDE_FROM_ALL EXCLUDE_FROM_ALL 1)
+  endif()
+
+  if(CMAKE_VERSION VERSION_GREATER 3.3.20150708)
+    set(cmake_3_4_USES_TERMINAL_OPTIONS
+      USES_TERMINAL_CONFIGURE 1
+      USES_TERMINAL_BUILD 1
+      USES_TERMINAL_INSTALL 1
+      )
+    set(cmake_3_4_USES_TERMINAL USES_TERMINAL 1)
+  endif()
+
+  if(NOT CLANG_STAGE)
+    set(CLANG_STAGE stage1)
+    message(STATUS "Setting current clang stage to: ${CLANG_STAGE}")
+  endif()
+
+  string(REGEX MATCH "stage([0-9]*)" MATCHED_STAGE "${CLANG_STAGE}")
+  if(MATCHED_STAGE)
+    math(EXPR STAGE_NUM "${MATCHED_STAGE} + 1")
+    set(NEXT_CLANG_STAGE stage${STAGE_NUM})
+  else()
+    set(NEXT_CLANG_STAGE bootstrap)
+  endif()
+  message(STATUS "Setting next clang stage to: ${NEXT_CLANG_STAGE}")
+  
+  
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-bins/)
+
+  # If on Darwin we need to make bootstrap depend on LTO and pass
+  # DARWIN_LTO_LIBRARY so that -flto will work using the just-built compiler
+  if(APPLE)
+    set(LTO_DEP LTO llvm-ar llvm-ranlib)
+    set(LTO_LIBRARY -DDARWIN_LTO_LIBRARY=${LLVM_SHLIB_OUTPUT_INTDIR}/libLTO.dylib)
+    set(LTO_AR -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar)
+    set(LTO_RANLIB -DCMAKE_RANLIB=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib)
+  endif()
+
+  add_custom_target(${NEXT_CLANG_STAGE}-clear
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-cleared
+    )
+  add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${NEXT_CLANG_STAGE}-cleared
+    DEPENDS clang ${LTO_DEP}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${STAMP_DIR}
+    COMMENT "Clobberring ${NEXT_CLANG_STAGE} build and stamp directories"
+    )
+
+  if(CMAKE_VERBOSE_MAKEFILE)
+    set(verbose -DCMAKE_VERBOSE_MAKEFILE=On)
+  endif()
+
+  set(BOOTSTRAP_DEFAULT_PASSTHROUGH
+    PACKAGE_VERSION
+    LLVM_VERSION_MAJOR
+    LLVM_VERSION_MINOR
+    LLVM_VERSION_PATCH
+    LLVM_VERSION_SUFFIX
+    CLANG_REPOSITORY_STRING
+    CMAKE_MAKE_PROGRAM)
+
+  if(TARGET compiler-rt)
+    set(RUNTIME_DEP compiler-rt)
+  endif()
+
+  # Find all variables that start with BOOTSTRAP_ and populate a variable with
+  # them.
+  get_cmake_property(variableNames VARIABLES)
+  foreach(variableName ${variableNames})
+    if(variableName MATCHES "^BOOTSTRAP_")
+      string(SUBSTRING ${variableName} 10 -1 varName)
+      string(REPLACE ";" "\;" value "${${variableName}}")
+      list(APPEND PASSTHROUGH_VARIABLES
+        -D${varName}=${value})
+    endif()
+  endforeach()
+
+  # Populate the passthrough variables
+  foreach(variableName ${CLANG_BOOTSTRAP_PASSTHROUGH} ${BOOTSTRAP_DEFAULT_PASSTHROUGH})
+    if(${variableName})
+      string(REPLACE ";" "\;" value ${${variableName}})
+      list(APPEND PASSTHROUGH_VARIABLES
+        -D${variableName}=${value})
+    endif()
+  endforeach()
+
+  ExternalProject_Add(${NEXT_CLANG_STAGE}
+    DEPENDS clang ${LTO_DEP} ${RUNTIME_DEP}
+    PREFIX ${NEXT_CLANG_STAGE}
+    SOURCE_DIR ${CMAKE_SOURCE_DIR}
+    STAMP_DIR ${STAMP_DIR}
+    BINARY_DIR ${BINARY_DIR}
+    ${cmake_3_1_EXCLUDE_FROM_ALL}
+    CMAKE_ARGS
+                # We shouldn't need to set this here, but INSTALL_DIR doesn't
+                # seem to work, so instead I'm passing this through
+                -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+                ${CLANG_BOOTSTRAP_CMAKE_ARGS}
+                ${PASSTHROUGH_VARIABLES}
+                -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++
+                -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
+                -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
+                -DCLANG_STAGE=${NEXT_CLANG_STAGE}
+                ${LTO_LIBRARY} ${LTO_AR} ${LTO_RANLIB} ${verbose}
+    INSTALL_COMMAND ""
+    STEP_TARGETS configure build
+    ${cmake_3_4_USES_TERMINAL_OPTIONS}
+    )
+
+  # exclude really-install from main target
+  set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_really-install_EXCLUDE_FROM_MAIN On)
+  ExternalProject_Add_Step(${NEXT_CLANG_STAGE} really-install
+    COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --target install
+    COMMENT "Performing install step for '${NEXT_CLANG_STAGE}'"
+    DEPENDEES build
+    ${cmake_3_4_USES_TERMINAL}
+  )
+  ExternalProject_Add_StepTargets(${NEXT_CLANG_STAGE} really-install)
+  add_custom_target(${NEXT_CLANG_STAGE}-install DEPENDS ${NEXT_CLANG_STAGE}-really-install)
+
+  if(NOT CLANG_BOOTSTRAP_TARGETS)
+    set(CLANG_BOOTSTRAP_TARGETS check-llvm check-clang check-all)
+  endif()
+  foreach(target ${CLANG_BOOTSTRAP_TARGETS})
+    # exclude from main target
+    set_target_properties(${NEXT_CLANG_STAGE} PROPERTIES _EP_${target}_EXCLUDE_FROM_MAIN On)
+
+    ExternalProject_Add_Step(${NEXT_CLANG_STAGE} ${target}
+      COMMAND ${CMAKE_COMMAND} --build <BINARY_DIR> --target ${target}
+      COMMENT "Performing ${target} for '${NEXT_CLANG_STAGE}'"
+      DEPENDEES configure
+      ${cmake_3_4_USES_TERMINAL}
+    )
+
+    if(target MATCHES "^stage[0-9]*")
+      add_custom_target(${target} DEPENDS ${NEXT_CLANG_STAGE}-${target})
+    endif()
+
+    ExternalProject_Add_StepTargets(${NEXT_CLANG_STAGE} ${target})
+  endforeach()
+endif()
diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT
index 905303fe11c2..971fe9bde10a 100644
--- a/CODE_OWNERS.TXT
+++ b/CODE_OWNERS.TXT
@@ -33,22 +33,22 @@ N: Reid Kleckner
 E: rnk@google.com
 D: Microsoft C++ ABI compatibility and general Windows support
 
+N: Manuel Klimek
+E: klimek@google.com
+D: AST matchers, LibTooling
+
 N: Anton Korobeynikov
 E: anton@korobeynikov.info
 D: Exception handling, Windows codegen, ARM EABI
 
-N: Ted Kremenek
-E: kremenek@apple.com
+N: Anna Zaks
+E: ganna@apple.com
 D: Clang Static Analyzer
 
 N: John McCall
 E: rjmccall@apple.com
 D: Clang LLVM IR generation
 
-N: Chad Rosier
-E: mcrosier@codeaurora.org
-D: Compiler driver
-
 N: Richard Smith
 E: richard@metafoo.co.uk
 D: All parts of Clang not covered by someone else
diff --git a/INSTALL.txt b/INSTALL.txt
index bd2f4fe37096..fc9bd4620b90 100644
--- a/INSTALL.txt
+++ b/INSTALL.txt
@@ -44,6 +44,5 @@ From inside the Clang build directory, run 'make install' to install the Clang
 compiler and header files into the prefix directory selected when LLVM was
 configured.
 
-The Clang compiler is available as 'clang' and 'clang++'. It supports a gcc like command line
-interface. See the man page for clang (installed into $prefix/share/man/man1)
-for more information.
+The Clang compiler is available as 'clang' and 'clang++'. It supports a gcc like
+command line interface. See the man page for clang for more information.
diff --git a/Makefile b/Makefile
index bbc521f4c0e5..9497b0a42198 100644
--- a/Makefile
+++ b/Makefile
@@ -67,8 +67,11 @@ endif
 #   http://gcc.gnu.org/PR41874
 #   http://gcc.gnu.org/PR41838
 #
-# We can revisit this when LLVM/Clang support it.
+# We don't need to do this if the host compiler is clang.
+ifneq ($(CXX_COMPILER), "clang")
 CXX.Flags += -fno-strict-aliasing
+endif
+
 
 # Set up Clang's tblgen.
 ifndef CLANG_TBLGEN
diff --git a/bindings/python/clang/cindex.py b/bindings/python/clang/cindex.py
index f5caca8572cb..e4b38769b77d 100644
--- a/bindings/python/clang/cindex.py
+++ b/bindings/python/clang/cindex.py
@@ -1100,6 +1100,11 @@ CursorKind.CUDAGLOBAL_ATTR = CursorKind(414)
 CursorKind.CUDAHOST_ATTR = CursorKind(415)
 CursorKind.CUDASHARED_ATTR = CursorKind(416)
 
+CursorKind.VISIBILITY_ATTR = CursorKind(417)
+
+CursorKind.DLLEXPORT_ATTR = CursorKind(418)
+CursorKind.DLLIMPORT_ATTR = CursorKind(419)
+
 ###
 # Preprocessing
 CursorKind.PREPROCESSING_DIRECTIVE = CursorKind(500)
@@ -1112,7 +1117,8 @@ CursorKind.INCLUSION_DIRECTIVE = CursorKind(503)
 
 # A module import declaration.
 CursorKind.MODULE_IMPORT_DECL = CursorKind(600)
-
+# A type alias template declaration
+CursorKind.TYPE_ALIAS_TEMPLATE_DECL = CursorKind(601)
 
 ### Template Argument Kinds ###
 class TemplateArgumentKind(BaseEnumeration):
@@ -1162,12 +1168,36 @@ class Cursor(Structure):
         """
         return conf.lib.clang_isCursorDefinition(self)
 
+    def is_const_method(self):
+        """Returns True if the cursor refers to a C++ member function or member
+        function template that is declared 'const'.
+        """
+        return conf.lib.clang_CXXMethod_isConst(self)
+
+    def is_mutable_field(self):
+        """Returns True if the cursor refers to a C++ field that is declared
+        'mutable'.
+        """
+        return conf.lib.clang_CXXField_isMutable(self)
+
+    def is_pure_virtual_method(self):
+        """Returns True if the cursor refers to a C++ member function or member
+        function template that is declared pure virtual.
+        """
+        return conf.lib.clang_CXXMethod_isPureVirtual(self)
+
     def is_static_method(self):
         """Returns True if the cursor refers to a C++ member function or member
         function template that is declared 'static'.
         """
         return conf.lib.clang_CXXMethod_isStatic(self)
 
+    def is_virtual_method(self):
+        """Returns True if the cursor refers to a C++ member function or member
+        function template that is declared 'virtual'.
+        """
+        return conf.lib.clang_CXXMethod_isVirtual(self)
+
     def get_definition(self):
         """
         If the cursor is a reference to a declaration or a declaration of
@@ -1673,6 +1703,7 @@ TypeKind.INCOMPLETEARRAY = TypeKind(114)
 TypeKind.VARIABLEARRAY = TypeKind(115)
 TypeKind.DEPENDENTSIZEDARRAY = TypeKind(116)
 TypeKind.MEMBERPOINTER = TypeKind(117)
+TypeKind.AUTO = TypeKind(118)
 
 class RefQualifierKind(BaseEnumeration):
     """Describes a specific ref-qualifier of a type."""
@@ -2877,6 +2908,14 @@ functionList = [
    [Index, c_char_p],
    c_object_p),
 
+  ("clang_CXXField_isMutable",
+   [Cursor],
+   bool),
+
+  ("clang_CXXMethod_isConst",
+   [Cursor],
+   bool),
+
   ("clang_CXXMethod_isPureVirtual",
    [Cursor],
    bool),
diff --git a/bindings/python/tests/cindex/test_cursor.py b/bindings/python/tests/cindex/test_cursor.py
index a5224aafabc1..c5ea50516ad2 100644
--- a/bindings/python/tests/cindex/test_cursor.py
+++ b/bindings/python/tests/cindex/test_cursor.py
@@ -97,6 +97,36 @@ def test_canonical():
     assert len(cursors) == 3
     assert cursors[1].canonical == cursors[2].canonical
 
+def test_is_const_method():
+    """Ensure Cursor.is_const_method works."""
+    source = 'class X { void foo() const; void bar(); };'
+    tu = get_tu(source, lang='cpp')
+
+    cls = get_cursor(tu, 'X')
+    foo = get_cursor(tu, 'foo')
+    bar = get_cursor(tu, 'bar')
+    assert cls is not None
+    assert foo is not None
+    assert bar is not None
+
+    assert foo.is_const_method()
+    assert not bar.is_const_method()
+
+def test_is_mutable_field():
+    """Ensure Cursor.is_mutable_field works."""
+    source = 'class X { int x_; mutable int y_; };'
+    tu = get_tu(source, lang='cpp')
+
+    cls = get_cursor(tu, 'X')
+    x_ = get_cursor(tu, 'x_')
+    y_ = get_cursor(tu, 'y_')
+    assert cls is not None
+    assert x_ is not None
+    assert y_ is not None
+
+    assert not x_.is_mutable_field()
+    assert y_.is_mutable_field()
+
 def test_is_static_method():
     """Ensure Cursor.is_static_method works."""
 
@@ -113,6 +143,36 @@ def test_is_static_method():
     assert foo.is_static_method()
     assert not bar.is_static_method()
 
+def test_is_pure_virtual_method():
+    """Ensure Cursor.is_pure_virtual_method works."""
+    source = 'class X { virtual void foo() = 0; virtual void bar(); };'
+    tu = get_tu(source, lang='cpp')
+
+    cls = get_cursor(tu, 'X')
+    foo = get_cursor(tu, 'foo')
+    bar = get_cursor(tu, 'bar')
+    assert cls is not None
+    assert foo is not None
+    assert bar is not None
+
+    assert foo.is_pure_virtual_method()
+    assert not bar.is_pure_virtual_method()
+
+def test_is_virtual_method():
+    """Ensure Cursor.is_virtual_method works."""
+    source = 'class X { virtual void foo(); void bar(); };'
+    tu = get_tu(source, lang='cpp')
+
+    cls = get_cursor(tu, 'X')
+    foo = get_cursor(tu, 'foo')
+    bar = get_cursor(tu, 'bar')
+    assert cls is not None
+    assert foo is not None
+    assert bar is not None
+
+    assert foo.is_virtual_method()
+    assert not bar.is_virtual_method()
+
 def test_underlying_type():
     tu = get_tu('typedef int foo;')
     typedef = get_cursor(tu, 'foo')
diff --git a/bindings/python/tests/cindex/test_cursor_kind.py b/bindings/python/tests/cindex/test_cursor_kind.py
index 8cabc512d4c5..5bac289625be 100644
--- a/bindings/python/tests/cindex/test_cursor_kind.py
+++ b/bindings/python/tests/cindex/test_cursor_kind.py
@@ -13,6 +13,7 @@ def test_get_all_kinds():
     assert CursorKind.OBJ_SELF_EXPR in kinds
     assert CursorKind.MS_ASM_STMT in kinds
     assert CursorKind.MODULE_IMPORT_DECL in kinds
+    assert CursorKind.TYPE_ALIAS_TEMPLATE_DECL in kinds
 
 def test_kind_groups():
     """Check that every kind classifies to exactly one group."""
diff --git a/bindings/python/tests/cindex/test_type.py b/bindings/python/tests/cindex/test_type.py
index f3dadf999bd8..f2184338be4b 100644
--- a/bindings/python/tests/cindex/test_type.py
+++ b/bindings/python/tests/cindex/test_type.py
@@ -134,7 +134,7 @@ def test_equal():
 
 def test_type_spelling():
     """Ensure Type.spelling works."""
-    tu = get_tu('int c[5]; int i[]; int x; int v[x];')
+    tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
     c = get_cursor(tu, 'c')
     i = get_cursor(tu, 'i')
     x = get_cursor(tu, 'x')
@@ -253,7 +253,7 @@ void bar(int a, int b);
 
 def test_element_type():
     """Ensure Type.element_type works."""
-    tu = get_tu('int c[5]; int i[]; int x; int v[x];')
+    tu = get_tu('int c[5]; void f(int i[]); int x; int v[x];')
     c = get_cursor(tu, 'c')
     i = get_cursor(tu, 'i')
     v = get_cursor(tu, 'v')
diff --git a/cmake/caches/Apple-stage1.cmake b/cmake/caches/Apple-stage1.cmake
new file mode 100644
index 000000000000..a5c3fdbbdfd0
--- /dev/null
+++ b/cmake/caches/Apple-stage1.cmake
@@ -0,0 +1,32 @@
+# This file sets up a CMakeCache for Apple-style bootstrap builds. It can be
+# used on any Darwin system to approximate Apple Clang builds.
+
+if($ENV{DT_TOOLCHAIN_DIR})
+  set(CMAKE_INSTALL_PREFIX $ENV{DT_TOOLCHAIN_DIR}/usr/)
+else()
+  set(CMAKE_INSTALL_PREFIX /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.toolchain/usr/)
+endif()
+
+set(LLVM_TARGETS_TO_BUILD X86 CACHE STRING "")
+set(CLANG_VENDOR Apple CACHE STRING "")
+set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
+set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
+set(LLVM_INCLUDE_UTILS OFF CACHE BOOL "")
+set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
+set(CLANG_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+
+set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+set(PACKAGE_VERSION 7.1.0 CACHE STRING "")
+
+# LIBCXX Settings
+set(LIBCXX_INSTALL_LIBRARY OFF CACHE BOOL "")
+set(LIBCXX_INSTALL_HEADERS ON CACHE BOOL "")
+set(LIBCXX_OVERRIDE_DARWIN_INSTALL ON CACHE BOOL "")
+
+#bootstrap
+set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "")
+set(CLANG_BOOTSTRAP_CMAKE_ARGS
+  -C ${CMAKE_CURRENT_LIST_DIR}/Apple-stage2.cmake
+  CACHE STRING "")
diff --git a/cmake/caches/Apple-stage2.cmake b/cmake/caches/Apple-stage2.cmake
new file mode 100644
index 000000000000..bb319aaeb86c
--- /dev/null
+++ b/cmake/caches/Apple-stage2.cmake
@@ -0,0 +1,30 @@
+# This file sets up a CMakeCache for Apple-style stage2 bootstrap. It is
+# specified by the stage1 build.
+
+set(LLVM_TARGETS_TO_BUILD X86 ARM AArch64 CACHE STRING "") 
+set(CLANG_VENDOR Apple CACHE STRING "")
+set(LLVM_INCLUDE_TESTS OFF CACHE BOOL "")
+set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "")
+set(LLVM_INCLUDE_UTILS OFF CACHE BOOL "")
+set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "")
+set(CLANG_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
+set(CLANG_LINKS_TO_CREATE clang++ cc c++ CACHE STRING "")
+
+set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Os -flto -gline-tables-only -DNDEBUG" CACHE STRING "")
+set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Os -flto -gline-tables-only -DNDEBUG" CACHE STRING "")
+set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+set(PACKAGE_VERSION 7.1.0 CACHE STRING "")
+
+set(LIBCXX_INSTALL_LIBRARY OFF CACHE BOOL "")
+set(LIBCXX_INSTALL_HEADERS OFF CACHE BOOL "")
+
+# setup toolchain
+set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "")
+set(LLVM_TOOLCHAIN_TOOLS
+  llvm-dsymutil
+  llvm-cov
+  llvm-dwarfdump
+  llvm-profdata
+  CACHE STRING "")
diff --git a/cmake/caches/README.txt b/cmake/caches/README.txt
new file mode 100644
index 000000000000..55e5e159db13
--- /dev/null
+++ b/cmake/caches/README.txt
@@ -0,0 +1,18 @@
+CMake Caches
+============
+
+This directory contains CMake cache scripts that pre-populate the CMakeCache in
+a build directory with commonly used settings.
+
+The first two cache files in the directory are used by Apple to build the clang
+distribution packaged with Xcode. You can use the caches with the following
+CMake invocation:
+
+cmake -G <build system>
+  -C <path to llvm>/tools/clang/cmake/caches/Apple-stage1.cmake
+  -DCMAKE_BUILD_TYPE=Release
+  [-DCMAKE_INSTALL_PREFIX=<install path>]
+  <path to llvm>
+
+Building the `bootstrap` target from this generation will build clang, and
+`bootstrap-install` will install it.
diff --git a/docs/AddressSanitizer.rst b/docs/AddressSanitizer.rst
index 66ed3447fdb1..93f63143210b 100644
--- a/docs/AddressSanitizer.rst
+++ b/docs/AddressSanitizer.rst
@@ -196,12 +196,11 @@ Disabling Instrumentation with ``__attribute__((no_sanitize("address")))``
 --------------------------------------------------------------------------
 
 Some code should not be instrumented by AddressSanitizer. One may use the
-function attribute ``__attribute__((no_sanitize("address")))``
-(which has deprecated synonyms
-:ref:`no_sanitize_address <langext-address_sanitizer>` and
-`no_address_safety_analysis`) to disable instrumentation of a particular
-function. This attribute may not be supported by other compilers, so we suggest
-to use it together with ``__has_feature(address_sanitizer)``.
+function attribute ``__attribute__((no_sanitize("address")))`` (which has
+deprecated synonyms `no_sanitize_address` and `no_address_safety_analysis`) to
+disable instrumentation of a particular function. This attribute may not be
+supported by other compilers, so we suggest to use it together with
+``__has_feature(address_sanitizer)``.
 
 Suppressing Errors in Recompiled Code (Blacklist)
 -------------------------------------------------
@@ -268,5 +267,4 @@ check-asan`` command.
 More Information
 ================
 
-`http://code.google.com/p/address-sanitizer <http://code.google.com/p/address-sanitizer/>`_
-
+`<https://github.com/google/sanitizers/wiki/AddressSanitizer>`_
diff --git a/docs/AttributeReference.rst b/docs/AttributeReference.rst
index e282824952ba..a763ddeaeb10 100644
--- a/docs/AttributeReference.rst
+++ b/docs/AttributeReference.rst
@@ -1,1760 +1,13 @@
 ..
   -------------------------------------------------------------------
   NOTE: This file is automatically generated by running clang-tblgen
-  -gen-attr-docs. Do not edit this file by hand!!
+  -gen-attr-docs. Do not edit this file by hand!! The contents for
+  this file are automatically generated by a server-side process.
+  
+  Please do not commit this file. The file exists for local testing
+  purposes only.
   -------------------------------------------------------------------
 
 ===================
 Attributes in Clang
-===================
-.. contents::
-   :local:
-
-Introduction
-============
-
-This page lists the attributes currently supported by Clang.
-
-AMD GPU Register Attributes
-===========================
-Clang supports attributes for controlling register usage on AMD GPU
-targets. These attributes may be attached to a kernel function
-definition and is an optimization hint to the backend for the maximum
-number of registers to use. This is useful in cases where register
-limited occupancy is known to be an important factor for the
-performance for the kernel.
-
-The semantics are as follows:
-
-- The backend will attempt to limit the number of used registers to
-  the specified value, but the exact number used is not
-  guaranteed. The number used may be rounded up to satisfy the
-  allocation requirements or ABI constraints of the subtarget. For
-  example, on Southern Islands VGPRs may only be allocated in
-  increments of 4, so requesting a limit of 39 VGPRs will really
-  attempt to use up to 40. Requesting more registers than the
-  subtarget supports will truncate to the maximum allowed. The backend
-  may also use fewer registers than requested whenever possible.
-
-- 0 implies the default no limit on register usage.
-
-- Ignored on older VLIW subtargets which did not have separate scalar
-  and vector registers, R600 through Northern Islands.
-
-amdgpu_num_sgpr
----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Clang supports the
-``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of scalar
-registers. A typical value would be between 8 and 104 in increments of
-8.
-
-Due to common instruction constraints, an additional 2-4 SGPRs are
-typically required for internal use depending on features used. This
-value is a hint for the total number of SGPRs to use, and not the
-number of user SGPRs, so no special consideration needs to be given
-for these.
-
-
-amdgpu_num_vgpr
----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Clang supports the
-``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of vector
-registers. A typical value would be between 4 and 256 in increments
-of 4.
-
-
-Function Attributes
-===================
-
-
-interrupt
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Clang supports the GNU style ``__attribute__((interrupt("TYPE")))`` attribute on
-ARM targets. This attribute may be attached to a function definition and
-instructs the backend to generate appropriate function entry/exit code so that
-it can be used directly as an interrupt service routine.
-
-The parameter passed to the interrupt attribute is optional, but if
-provided it must be a string literal with one of the following values: "IRQ",
-"FIQ", "SWI", "ABORT", "UNDEF".
-
-The semantics are as follows:
-
-- If the function is AAPCS, Clang instructs the backend to realign the stack to
-  8 bytes on entry. This is a general requirement of the AAPCS at public
-  interfaces, but may not hold when an exception is taken. Doing this allows
-  other AAPCS functions to be called.
-- If the CPU is M-class this is all that needs to be done since the architecture
-  itself is designed in such a way that functions obeying the normal AAPCS ABI
-  constraints are valid exception handlers.
-- If the CPU is not M-class, the prologue and epilogue are modified to save all
-  non-banked registers that are used, so that upon return the user-mode state
-  will not be corrupted. Note that to avoid unnecessary overhead, only
-  general-purpose (integer) registers are saved in this way. If VFP operations
-  are needed, that state must be saved manually.
-
-  Specifically, interrupt kinds other than "FIQ" will save all core registers
-  except "lr" and "sp". "FIQ" interrupts will save r0-r7.
-- If the CPU is not M-class, the return instruction is changed to one of the
-  canonical sequences permitted by the architecture for exception return. Where
-  possible the function itself will make the necessary "lr" adjustments so that
-  the "preferred return address" is selected.
-
-  Unfortunately the compiler is unable to make this guarantee for an "UNDEF"
-  handler, where the offset from "lr" to the preferred return address depends on
-  the execution state of the code which generated the exception. In this case
-  a sequence equivalent to "movs pc, lr" will be used.
-
-
-acquire_capability (acquire_shared_capability, clang::acquire_capability, clang::acquire_shared_capability)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Marks a function as acquiring a capability.
-
-
-assert_capability (assert_shared_capability, clang::assert_capability, clang::assert_shared_capability)
--------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Marks a function that dynamically tests whether a capability is held, and halts
-the program if it is not held.
-
-
-assume_aligned (gnu::assume_aligned)
-------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Use ``__attribute__((assume_aligned(<alignment>[,<offset>]))`` on a function
-declaration to specify that the return value of the function (which must be a
-pointer type) has the specified offset, in bytes, from an address with the
-specified alignment. The offset is taken to be zero if omitted.
-
-.. code-block:: c++
-
-  // The returned pointer value has 32-byte alignment.
-  void *a() __attribute__((assume_aligned (32)));
-
-  // The returned pointer value is 4 bytes greater than an address having
-  // 32-byte alignment.
-  void *b() __attribute__((assume_aligned (32, 4)));
-
-Note that this attribute provides information to the compiler regarding a
-condition that the code already ensures is true. It does not cause the compiler
-to enforce the provided alignment assumption.
-
-
-availability
-------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-The ``availability`` attribute can be placed on declarations to describe the
-lifecycle of that declaration relative to operating system versions.  Consider
-the function declaration for a hypothetical function ``f``:
-
-.. code-block:: c++
-
-  void f(void) __attribute__((availability(macosx,introduced=10.4,deprecated=10.6,obsoleted=10.7)));
-
-The availability attribute states that ``f`` was introduced in Mac OS X 10.4,
-deprecated in Mac OS X 10.6, and obsoleted in Mac OS X 10.7.  This information
-is used by Clang to determine when it is safe to use ``f``: for example, if
-Clang is instructed to compile code for Mac OS X 10.5, a call to ``f()``
-succeeds.  If Clang is instructed to compile code for Mac OS X 10.6, the call
-succeeds but Clang emits a warning specifying that the function is deprecated.
-Finally, if Clang is instructed to compile code for Mac OS X 10.7, the call
-fails because ``f()`` is no longer available.
-
-The availability attribute is a comma-separated list starting with the
-platform name and then including clauses specifying important milestones in the
-declaration's lifetime (in any order) along with additional information.  Those
-clauses can be:
-
-introduced=\ *version*
-  The first version in which this declaration was introduced.
-
-deprecated=\ *version*
-  The first version in which this declaration was deprecated, meaning that
-  users should migrate away from this API.
-
-obsoleted=\ *version*
-  The first version in which this declaration was obsoleted, meaning that it
-  was removed completely and can no longer be used.
-
-unavailable
-  This declaration is never available on this platform.
-
-message=\ *string-literal*
-  Additional message text that Clang will provide when emitting a warning or
-  error about use of a deprecated or obsoleted declaration.  Useful to direct
-  users to replacement APIs.
-
-Multiple availability attributes can be placed on a declaration, which may
-correspond to different platforms.  Only the availability attribute with the
-platform corresponding to the target platform will be used; any others will be
-ignored.  If no availability attribute specifies availability for the current
-target platform, the availability attributes are ignored.  Supported platforms
-are:
-
-``ios``
-  Apple's iOS operating system.  The minimum deployment target is specified by
-  the ``-mios-version-min=*version*`` or ``-miphoneos-version-min=*version*``
-  command-line arguments.
-
-``macosx``
-  Apple's Mac OS X operating system.  The minimum deployment target is
-  specified by the ``-mmacosx-version-min=*version*`` command-line argument.
-
-A declaration can be used even when deploying back to a platform version prior
-to when the declaration was introduced.  When this happens, the declaration is
-`weakly linked
-<https://developer.apple.com/library/mac/#documentation/MacOSX/Conceptual/BPFrameworks/Concepts/WeakLinking.html>`_,
-as if the ``weak_import`` attribute were added to the declaration.  A
-weakly-linked declaration may or may not be present a run-time, and a program
-can determine whether the declaration is present by checking whether the
-address of that declaration is non-NULL.
-
-If there are multiple declarations of the same entity, the availability
-attributes must either match on a per-platform basis or later
-declarations must not have availability attributes for that
-platform. For example:
-
-.. code-block:: c
-
-  void g(void) __attribute__((availability(macosx,introduced=10.4)));
-  void g(void) __attribute__((availability(macosx,introduced=10.4))); // okay, matches
-  void g(void) __attribute__((availability(ios,introduced=4.0))); // okay, adds a new platform
-  void g(void); // okay, inherits both macosx and ios availability from above.
-  void g(void) __attribute__((availability(macosx,introduced=10.5))); // error: mismatch
-
-When one method overrides another, the overriding method can be more widely available than the overridden method, e.g.,:
-
-.. code-block:: objc
-
-  @interface A
-  - (id)method __attribute__((availability(macosx,introduced=10.4)));
-  - (id)method2 __attribute__((availability(macosx,introduced=10.4)));
-  @end
-
-  @interface B : A
-  - (id)method __attribute__((availability(macosx,introduced=10.3))); // okay: method moved into base class later
-  - (id)method __attribute__((availability(macosx,introduced=10.5))); // error: this method was available via the base class in 10.4
-  @end
-
-
-_Noreturn
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-A function declared as ``_Noreturn`` shall not return to its caller. The
-compiler will generate a diagnostic for a function declared as ``_Noreturn``
-that appears to be capable of returning to its caller.
-
-
-noreturn
---------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","X","","", ""
-
-A function declared as ``[[noreturn]]`` shall not return to its caller. The
-compiler will generate a diagnostic for a function declared as ``[[noreturn]]``
-that appears to be capable of returning to its caller.
-
-
-carries_dependency
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``carries_dependency`` attribute specifies dependency propagation into and
-out of functions.
-
-When specified on a function or Objective-C method, the ``carries_dependency``
-attribute means that the return value carries a dependency out of the function, 
-so that the implementation need not constrain ordering upon return from that
-function. Implementations of the function and its caller may choose to preserve
-dependencies instead of emitting memory ordering instructions such as fences.
-
-Note, this attribute does not change the meaning of the program, but may result
-in generation of more efficient code.
-
-
-enable_if
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-.. Note:: Some features of this attribute are experimental. The meaning of
-  multiple enable_if attributes on a single declaration is subject to change in
-  a future version of clang. Also, the ABI is not standardized and the name
-  mangling may change in future versions. To avoid that, use asm labels.
-
-The ``enable_if`` attribute can be placed on function declarations to control
-which overload is selected based on the values of the function's arguments.
-When combined with the ``overloadable`` attribute, this feature is also
-available in C.
-
-.. code-block:: c++
-
-  int isdigit(int c);
-  int isdigit(int c) __attribute__((enable_if(c <= -1 || c > 255, "chosen when 'c' is out of range"))) __attribute__((unavailable("'c' must have the value of an unsigned char or EOF")));
-  
-  void foo(char c) {
-    isdigit(c);
-    isdigit(10);
-    isdigit(-10);  // results in a compile-time error.
-  }
-
-The enable_if attribute takes two arguments, the first is an expression written
-in terms of the function parameters, the second is a string explaining why this
-overload candidate could not be selected to be displayed in diagnostics. The
-expression is part of the function signature for the purposes of determining
-whether it is a redeclaration (following the rules used when determining
-whether a C++ template specialization is ODR-equivalent), but is not part of
-the type.
-
-The enable_if expression is evaluated as if it were the body of a
-bool-returning constexpr function declared with the arguments of the function
-it is being applied to, then called with the parameters at the call site. If the
-result is false or could not be determined through constant expression
-evaluation, then this overload will not be chosen and the provided string may
-be used in a diagnostic if the compile fails as a result.
-
-Because the enable_if expression is an unevaluated context, there are no global
-state changes, nor the ability to pass information from the enable_if
-expression to the function body. For example, suppose we want calls to
-strnlen(strbuf, maxlen) to resolve to strnlen_chk(strbuf, maxlen, size of
-strbuf) only if the size of strbuf can be determined:
-
-.. code-block:: c++
-
-  __attribute__((always_inline))
-  static inline size_t strnlen(const char *s, size_t maxlen)
-    __attribute__((overloadable))
-    __attribute__((enable_if(__builtin_object_size(s, 0) != -1))),
-                             "chosen when the buffer size is known but 'maxlen' is not")))
-  {
-    return strnlen_chk(s, maxlen, __builtin_object_size(s, 0));
-  }
-
-Multiple enable_if attributes may be applied to a single declaration. In this
-case, the enable_if expressions are evaluated from left to right in the
-following manner. First, the candidates whose enable_if expressions evaluate to
-false or cannot be evaluated are discarded. If the remaining candidates do not
-share ODR-equivalent enable_if expressions, the overload resolution is
-ambiguous. Otherwise, enable_if overload resolution continues with the next
-enable_if attribute on the candidates that have not been discarded and have
-remaining enable_if attributes. In this way, we pick the most specific
-overload out of a number of viable overloads using enable_if.
-
-.. code-block:: c++
-
-  void f() __attribute__((enable_if(true, "")));  // #1
-  void f() __attribute__((enable_if(true, ""))) __attribute__((enable_if(true, "")));  // #2
-  
-  void g(int i, int j) __attribute__((enable_if(i, "")));  // #1
-  void g(int i, int j) __attribute__((enable_if(j, ""))) __attribute__((enable_if(true)));  // #2
-
-In this example, a call to f() is always resolved to #2, as the first enable_if
-expression is ODR-equivalent for both declarations, but #1 does not have another
-enable_if expression to continue evaluating, so the next round of evaluation has
-only a single candidate. In a call to g(1, 1), the call is ambiguous even though
-#2 has more enable_if attributes, because the first enable_if expressions are
-not ODR-equivalent.
-
-Query for this feature with ``__has_attribute(enable_if)``.
-
-
-flatten (gnu::flatten)
-----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``flatten`` attribute causes calls within the attributed function to
-be inlined unless it is impossible to do so, for example if the body of the
-callee is unavailable or if the callee has the ``noinline`` attribute.
-
-
-format (gnu::format)
---------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Clang supports the ``format`` attribute, which indicates that the function
-accepts a ``printf`` or ``scanf``-like format string and corresponding
-arguments or a ``va_list`` that contains these arguments.
-
-Please see `GCC documentation about format attribute
-<http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html>`_ to find details
-about attribute syntax.
-
-Clang implements two kinds of checks with this attribute.
-
-#. Clang checks that the function with the ``format`` attribute is called with
-   a format string that uses format specifiers that are allowed, and that
-   arguments match the format string.  This is the ``-Wformat`` warning, it is
-   on by default.
-
-#. Clang checks that the format string argument is a literal string.  This is
-   the ``-Wformat-nonliteral`` warning, it is off by default.
-
-   Clang implements this mostly the same way as GCC, but there is a difference
-   for functions that accept a ``va_list`` argument (for example, ``vprintf``).
-   GCC does not emit ``-Wformat-nonliteral`` warning for calls to such
-   functions.  Clang does not warn if the format string comes from a function
-   parameter, where the function is annotated with a compatible attribute,
-   otherwise it warns.  For example:
-
-   .. code-block:: c
-
-     __attribute__((__format__ (__scanf__, 1, 3)))
-     void foo(const char* s, char *buf, ...) {
-       va_list ap;
-       va_start(ap, buf);
-
-       vprintf(s, ap); // warning: format string is not a string literal
-     }
-
-   In this case we warn because ``s`` contains a format string for a
-   ``scanf``-like function, but it is passed to a ``printf``-like function.
-
-   If the attribute is removed, clang still warns, because the format string is
-   not a string literal.
-
-   Another example:
-
-   .. code-block:: c
-
-     __attribute__((__format__ (__printf__, 1, 3)))
-     void foo(const char* s, char *buf, ...) {
-       va_list ap;
-       va_start(ap, buf);
-
-       vprintf(s, ap); // warning
-     }
-
-   In this case Clang does not warn because the format string ``s`` and
-   the corresponding arguments are annotated.  If the arguments are
-   incorrect, the caller of ``foo`` will receive a warning.
-
-
-noduplicate (clang::noduplicate)
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``noduplicate`` attribute can be placed on function declarations to control
-whether function calls to this function can be duplicated or not as a result of
-optimizations. This is required for the implementation of functions with
-certain special requirements, like the OpenCL "barrier" function, that might
-need to be run concurrently by all the threads that are executing in lockstep
-on the hardware. For example this attribute applied on the function
-"nodupfunc" in the code below avoids that:
-
-.. code-block:: c
-
-  void nodupfunc() __attribute__((noduplicate));
-  // Setting it as a C++11 attribute is also valid
-  // void nodupfunc() [[clang::noduplicate]];
-  void foo();
-  void bar();
-
-  nodupfunc();
-  if (a > n) {
-    foo();
-  } else {
-    bar();
-  }
-
-gets possibly modified by some optimizations into code similar to this:
-
-.. code-block:: c
-
-  if (a > n) {
-    nodupfunc();
-    foo();
-  } else {
-    nodupfunc();
-    bar();
-  }
-
-where the call to "nodupfunc" is duplicated and sunk into the two branches
-of the condition.
-
-
-no_sanitize (clang::no_sanitize)
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Use the ``no_sanitize`` attribute on a function declaration to specify
-that a particular instrumentation or set of instrumentations should not be
-applied to that function. The attribute takes a list of string literals,
-which have the same meaning as values accepted by the ``-fno-sanitize=``
-flag. For example, ``__attribute__((no_sanitize("address", "thread")))``
-specifies that AddressSanitizer and ThreadSanitizer should not be applied
-to the function.
-
-See :ref:`Controlling Code Generation <controlling-code-generation>` for a
-full list of supported sanitizer flags.
-
-
-no_sanitize_address (no_address_safety_analysis, gnu::no_address_safety_analysis, gnu::no_sanitize_address)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-.. _langext-address_sanitizer:
-
-Use ``__attribute__((no_sanitize_address))`` on a function declaration to
-specify that address safety instrumentation (e.g. AddressSanitizer) should
-not be applied to that function.
-
-
-no_sanitize_thread
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-.. _langext-thread_sanitizer:
-
-Use ``__attribute__((no_sanitize_thread))`` on a function declaration to
-specify that checks for data races on plain (non-atomic) memory accesses should
-not be inserted by ThreadSanitizer. The function is still instrumented by the
-tool to avoid false positives and provide meaningful stack traces.
-
-
-no_sanitize_memory
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-.. _langext-memory_sanitizer:
-
-Use ``__attribute__((no_sanitize_memory))`` on a function declaration to
-specify that checks for uninitialized memory should not be inserted 
-(e.g. by MemorySanitizer). The function may still be instrumented by the tool
-to avoid false positives in other places.
-
-
-no_split_stack (gnu::no_split_stack)
-------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``no_split_stack`` attribute disables the emission of the split stack
-preamble for a particular function. It has no effect if ``-fsplit-stack``
-is not specified.
-
-
-objc_boxable
-------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Structs and unions marked with the ``objc_boxable`` attribute can be used 
-with the Objective-C boxed expression syntax, ``@(...)``.
-
-**Usage**: ``__attribute__((objc_boxable))``. This attribute 
-can only be placed on a declaration of a trivially-copyable struct or union:
-
-.. code-block:: objc
-
-  struct __attribute__((objc_boxable)) some_struct {
-    int i;
-  };
-  union __attribute__((objc_boxable)) some_union {
-    int i;
-    float f;
-  };
-  typedef struct __attribute__((objc_boxable)) _some_struct some_struct;
-
-  // ...
-
-  some_struct ss;
-  NSValue *boxed = @(ss);
-
-
-objc_method_family
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Many methods in Objective-C have conventional meanings determined by their
-selectors. It is sometimes useful to be able to mark a method as having a
-particular conventional meaning despite not having the right selector, or as
-not having the conventional meaning that its selector would suggest. For these
-use cases, we provide an attribute to specifically describe the "method family"
-that a method belongs to.
-
-**Usage**: ``__attribute__((objc_method_family(X)))``, where ``X`` is one of
-``none``, ``alloc``, ``copy``, ``init``, ``mutableCopy``, or ``new``.  This
-attribute can only be placed at the end of a method declaration:
-
-.. code-block:: objc
-
-  - (NSString *)initMyStringValue __attribute__((objc_method_family(none)));
-
-Users who do not wish to change the conventional meaning of a method, and who
-merely want to document its non-standard retain and release semantics, should
-use the retaining behavior attributes (``ns_returns_retained``,
-``ns_returns_not_retained``, etc).
-
-Query for this feature with ``__has_attribute(objc_method_family)``.
-
-
-objc_requires_super
--------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Some Objective-C classes allow a subclass to override a particular method in a
-parent class but expect that the overriding method also calls the overridden
-method in the parent class. For these cases, we provide an attribute to
-designate that a method requires a "call to ``super``" in the overriding
-method in the subclass.
-
-**Usage**: ``__attribute__((objc_requires_super))``.  This attribute can only
-be placed at the end of a method declaration:
-
-.. code-block:: objc
-
-  - (void)foo __attribute__((objc_requires_super));
-
-This attribute can only be applied the method declarations within a class, and
-not a protocol.  Currently this attribute does not enforce any placement of
-where the call occurs in the overriding method (such as in the case of
-``-dealloc`` where the call must appear at the end).  It checks only that it
-exists.
-
-Note that on both OS X and iOS that the Foundation framework provides a
-convenience macro ``NS_REQUIRES_SUPER`` that provides syntactic sugar for this
-attribute:
-
-.. code-block:: objc
-
-  - (void)foo NS_REQUIRES_SUPER;
-
-This macro is conditionally defined depending on the compiler's support for
-this attribute.  If the compiler does not support the attribute the macro
-expands to nothing.
-
-Operationally, when a method has this annotation the compiler will warn if the
-implementation of an override in a subclass does not call super.  For example:
-
-.. code-block:: objc
-
-   warning: method possibly missing a [super AnnotMeth] call
-   - (void) AnnotMeth{};
-                      ^
-
-
-objc_runtime_name
------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-By default, the Objective-C interface or protocol identifier is used
-in the metadata name for that object. The `objc_runtime_name`
-attribute allows annotated interfaces or protocols to use the
-specified string argument in the object's metadata name instead of the
-default name.
-        
-**Usage**: ``__attribute__((objc_runtime_name("MyLocalName")))``.  This attribute
-can only be placed before an @protocol or @interface declaration:
-        
-.. code-block:: objc
-        
-  __attribute__((objc_runtime_name("MyLocalName")))
-  @interface Message
-  @end
-
-
-optnone (clang::optnone)
-------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``optnone`` attribute suppresses essentially all optimizations
-on a function or method, regardless of the optimization level applied to
-the compilation unit as a whole.  This is particularly useful when you
-need to debug a particular function, but it is infeasible to build the
-entire application without optimization.  Avoiding optimization on the
-specified function can improve the quality of the debugging information
-for that function.
-
-This attribute is incompatible with the ``always_inline`` and ``minsize``
-attributes.
-
-
-overloadable
-------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Clang provides support for C++ function overloading in C.  Function overloading
-in C is introduced using the ``overloadable`` attribute.  For example, one
-might provide several overloaded versions of a ``tgsin`` function that invokes
-the appropriate standard function computing the sine of a value with ``float``,
-``double``, or ``long double`` precision:
-
-.. code-block:: c
-
-  #include <math.h>
-  float __attribute__((overloadable)) tgsin(float x) { return sinf(x); }
-  double __attribute__((overloadable)) tgsin(double x) { return sin(x); }
-  long double __attribute__((overloadable)) tgsin(long double x) { return sinl(x); }
-
-Given these declarations, one can call ``tgsin`` with a ``float`` value to
-receive a ``float`` result, with a ``double`` to receive a ``double`` result,
-etc.  Function overloading in C follows the rules of C++ function overloading
-to pick the best overload given the call arguments, with a few C-specific
-semantics:
-
-* Conversion from ``float`` or ``double`` to ``long double`` is ranked as a
-  floating-point promotion (per C99) rather than as a floating-point conversion
-  (as in C++).
-
-* A conversion from a pointer of type ``T*`` to a pointer of type ``U*`` is
-  considered a pointer conversion (with conversion rank) if ``T`` and ``U`` are
-  compatible types.
-
-* A conversion from type ``T`` to a value of type ``U`` is permitted if ``T``
-  and ``U`` are compatible types.  This conversion is given "conversion" rank.
-
-The declaration of ``overloadable`` functions is restricted to function
-declarations and definitions.  Most importantly, if any function with a given
-name is given the ``overloadable`` attribute, then all function declarations
-and definitions with that name (and in that scope) must have the
-``overloadable`` attribute.  This rule even applies to redeclarations of
-functions whose original declaration had the ``overloadable`` attribute, e.g.,
-
-.. code-block:: c
-
-  int f(int) __attribute__((overloadable));
-  float f(float); // error: declaration of "f" must have the "overloadable" attribute
-
-  int g(int) __attribute__((overloadable));
-  int g(int) { } // error: redeclaration of "g" must also have the "overloadable" attribute
-
-Functions marked ``overloadable`` must have prototypes.  Therefore, the
-following code is ill-formed:
-
-.. code-block:: c
-
-  int h() __attribute__((overloadable)); // error: h does not have a prototype
-
-However, ``overloadable`` functions are allowed to use a ellipsis even if there
-are no named parameters (as is permitted in C++).  This feature is particularly
-useful when combined with the ``unavailable`` attribute:
-
-.. code-block:: c++
-
-  void honeypot(...) __attribute__((overloadable, unavailable)); // calling me is an error
-
-Functions declared with the ``overloadable`` attribute have their names mangled
-according to the same rules as C++ function names.  For example, the three
-``tgsin`` functions in our motivating example get the mangled names
-``_Z5tgsinf``, ``_Z5tgsind``, and ``_Z5tgsine``, respectively.  There are two
-caveats to this use of name mangling:
-
-* Future versions of Clang may change the name mangling of functions overloaded
-  in C, so you should not depend on an specific mangling.  To be completely
-  safe, we strongly urge the use of ``static inline`` with ``overloadable``
-  functions.
-
-* The ``overloadable`` attribute has almost no meaning when used in C++,
-  because names will already be mangled and functions are already overloadable.
-  However, when an ``overloadable`` function occurs within an ``extern "C"``
-  linkage specification, it's name *will* be mangled in the same way as it
-  would in C.
-
-Query for this feature with ``__has_extension(attribute_overloadable)``.
-
-
-release_capability (release_shared_capability, clang::release_capability, clang::release_shared_capability)
------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Marks a function as releasing a capability.
-
-
-target (gnu::target)
---------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Clang supports the GNU style ``__attribute__((target("OPTIONS")))`` attribute.
-This attribute may be attached to a function definition and instructs
-the backend to use different code generation options than were passed on the
-command line.
-
-The current set of options correspond to the existing "subtarget features" for
-the target with or without a "-mno-" in front corresponding to the absence
-of the feature, as well as ``arch="CPU"`` which will change the default "CPU"
-for the function.
-
-Example "subtarget features" from the x86 backend include: "mmx", "sse", "sse4.2",
-"avx", "xop" and largely correspond to the machine specific options handled by
-the front end.
-
-
-try_acquire_capability (try_acquire_shared_capability, clang::try_acquire_capability, clang::try_acquire_shared_capability)
----------------------------------------------------------------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-Marks a function that attempts to acquire a capability. This function may fail to
-actually acquire the capability; they accept a Boolean value determining
-whether acquiring the capability means success (true), or failing to acquire
-the capability means success (false).
-
-
-Variable Attributes
-===================
-
-
-init_seg
---------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","", "X"
-
-The attribute applied by ``pragma init_seg()`` controls the section into
-which global initialization function pointers are emitted.  It is only
-available with ``-fms-extensions``.  Typically, this function pointer is
-emitted into ``.CRT$XCU`` on Windows.  The user can change the order of
-initialization by using a different section name with the same
-``.CRT$XC`` prefix and a suffix that sorts lexicographically before or
-after the standard ``.CRT$XCU`` sections.  See the init_seg_
-documentation on MSDN for more information.
-
-.. _init_seg: http://msdn.microsoft.com/en-us/library/7977wcck(v=vs.110).aspx
-
-
-section (gnu::section, __declspec(allocate))
---------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","X","", ""
-
-The ``section`` attribute allows you to specify a specific section a
-global variable or function should be in after translation.
-
-
-tls_model (gnu::tls_model)
---------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``tls_model`` attribute allows you to specify which thread-local storage
-model to use. It accepts the following strings:
-
-* global-dynamic
-* local-dynamic
-* initial-exec
-* local-exec
-
-TLS models are mutually exclusive.
-
-
-thread
-------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","X","", ""
-
-The ``__declspec(thread)`` attribute declares a variable with thread local
-storage.  It is available under the ``-fms-extensions`` flag for MSVC
-compatibility.  See the documentation for `__declspec(thread)`_ on MSDN.
-
-.. _`__declspec(thread)`: http://msdn.microsoft.com/en-us/library/9w1sdazb.aspx
-
-In Clang, ``__declspec(thread)`` is generally equivalent in functionality to the
-GNU ``__thread`` keyword.  The variable must not have a destructor and must have
-a constant initializer, if any.  The attribute only applies to variables
-declared with static storage duration, such as globals, class static data
-members, and static locals.
-
-
-Type Attributes
-===============
-
-
-align_value
------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-The align_value attribute can be added to the typedef of a pointer type or the
-declaration of a variable of pointer or reference type. It specifies that the
-pointer will point to, or the reference will bind to, only objects with at
-least the provided alignment. This alignment value must be some positive power
-of 2.
-
-   .. code-block:: c
-
-     typedef double * aligned_double_ptr __attribute__((align_value(64)));
-     void foo(double & x  __attribute__((align_value(128)),
-              aligned_double_ptr y) { ... }
-
-If the pointer value does not have the specified alignment at runtime, the
-behavior of the program is undefined.
-
-
-flag_enum
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-This attribute can be added to an enumerator to signal to the compiler that it
-is intended to be used as a flag type. This will cause the compiler to assume
-that the range of the type includes all of the values that you can get by
-manipulating bits of the enumerator when issuing warnings.
-
-
-__single_inhertiance, __multiple_inheritance, __virtual_inheritance
--------------------------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-This collection of keywords is enabled under ``-fms-extensions`` and controls
-the pointer-to-member representation used on ``*-*-win32`` targets.
-
-The ``*-*-win32`` targets utilize a pointer-to-member representation which
-varies in size and alignment depending on the definition of the underlying
-class.
-
-However, this is problematic when a forward declaration is only available and
-no definition has been made yet.  In such cases, Clang is forced to utilize the
-most general representation that is available to it.
-
-These keywords make it possible to use a pointer-to-member representation other
-than the most general one regardless of whether or not the definition will ever
-be present in the current translation unit.
-
-This family of keywords belong between the ``class-key`` and ``class-name``:
-
-.. code-block:: c++
-
-  struct __single_inheritance S;
-  int S::*i;
-  struct S {};
-
-This keyword can be applied to class templates but only has an effect when used
-on full specializations:
-
-.. code-block:: c++
-
-  template <typename T, typename U> struct __single_inheritance A; // warning: inheritance model ignored on primary template
-  template <typename T> struct __multiple_inheritance A<T, T>; // warning: inheritance model ignored on partial specialization
-  template <> struct __single_inheritance A<int, float>;
-
-Note that choosing an inheritance model less general than strictly necessary is
-an error:
-
-.. code-block:: c++
-
-  struct __multiple_inheritance S; // error: inheritance model does not match definition
-  int S::*i;
-  struct S {};
-
-
-novtable
---------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","X","", ""
-
-This attribute can be added to a class declaration or definition to signal to
-the compiler that constructors and destructors will not reference the virtual
-function table.
-
-
-Statement Attributes
-====================
-
-
-fallthrough (clang::fallthrough)
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","X","","", ""
-
-The ``clang::fallthrough`` attribute is used along with the
-``-Wimplicit-fallthrough`` argument to annotate intentional fall-through
-between switch labels.  It can only be applied to a null statement placed at a
-point of execution between any statement and the next switch label.  It is
-common to mark these places with a specific comment, but this attribute is
-meant to replace comments with a more strict annotation, which can be checked
-by the compiler.  This attribute doesn't change semantics of the code and can
-be used wherever an intended fall-through occurs.  It is designed to mimic
-control-flow statements like ``break;``, so it can be placed in most places
-where ``break;`` can, but only if there are no statements on the execution path
-between it and the next switch label.
-
-Here is an example:
-
-.. code-block:: c++
-
-  // compile with -Wimplicit-fallthrough
-  switch (n) {
-  case 22:
-  case 33:  // no warning: no statements between case labels
-    f();
-  case 44:  // warning: unannotated fall-through
-    g();
-    [[clang::fallthrough]];
-  case 55:  // no warning
-    if (x) {
-      h();
-      break;
-    }
-    else {
-      i();
-      [[clang::fallthrough]];
-    }
-  case 66:  // no warning
-    p();
-    [[clang::fallthrough]]; // warning: fallthrough annotation does not
-                            //          directly precede case label
-    q();
-  case 77:  // warning: unannotated fall-through
-    r();
-  }
-
-
-#pragma clang loop
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","", "X"
-
-The ``#pragma clang loop`` directive allows loop optimization hints to be
-specified for the subsequent loop. The directive allows vectorization,
-interleaving, and unrolling to be enabled or disabled. Vector width as well
-as interleave and unrolling count can be manually specified. See
-`language extensions
-<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
-for details.
-
-
-#pragma unroll, #pragma nounroll
---------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","", "X"
-
-Loop unrolling optimization hints can be specified with ``#pragma unroll`` and
-``#pragma nounroll``. The pragma is placed immediately before a for, while,
-do-while, or c++11 range-based for loop.
-
-Specifying ``#pragma unroll`` without a parameter directs the loop unroller to
-attempt to fully unroll the loop if the trip count is known at compile time:
-
-.. code-block:: c++
-
-  #pragma unroll
-  for (...) {
-    ...
-  }
-
-Specifying the optional parameter, ``#pragma unroll _value_``, directs the
-unroller to unroll the loop ``_value_`` times.  The parameter may optionally be
-enclosed in parentheses:
-
-.. code-block:: c++
-
-  #pragma unroll 16
-  for (...) {
-    ...
-  }
-
-  #pragma unroll(16)
-  for (...) {
-    ...
-  }
-
-Specifying ``#pragma nounroll`` indicates that the loop should not be unrolled:
-
-.. code-block:: c++
-
-  #pragma nounroll
-  for (...) {
-    ...
-  }
-
-``#pragma unroll`` and ``#pragma unroll _value_`` have identical semantics to
-``#pragma clang loop unroll(full)`` and
-``#pragma clang loop unroll_count(_value_)`` respectively. ``#pragma nounroll``
-is equivalent to ``#pragma clang loop unroll(disable)``.  See
-`language extensions
-<http://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations>`_
-for further details including limitations of the unroll hints.
-
-
-Calling Conventions
-===================
-Clang supports several different calling conventions, depending on the target
-platform and architecture. The calling convention used for a function determines
-how parameters are passed, how results are returned to the caller, and other
-low-level details of calling a function.
-
-fastcall (gnu::fastcall, __fastcall, _fastcall)
------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","X", ""
-
-On 32-bit x86 targets, this attribute changes the calling convention of a
-function to use ECX and EDX as register parameters and clear parameters off of
-the stack on return. This convention does not support variadic calls or
-unprototyped functions in C, and has no effect on x86_64 targets. This calling
-convention is supported primarily for compatibility with existing code. Users
-seeking register parameters should use the ``regparm`` attribute, which does
-not require callee-cleanup.  See the documentation for `__fastcall`_ on MSDN.
-
-.. _`__fastcall`: http://msdn.microsoft.com/en-us/library/6xa169sk.aspx
-
-
-ms_abi (gnu::ms_abi)
---------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-On non-Windows x86_64 targets, this attribute changes the calling convention of
-a function to match the default convention used on Windows x86_64. This
-attribute has no effect on Windows targets or non-x86_64 targets.
-
-
-pcs (gnu::pcs)
---------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-On ARM targets, this attribute can be used to select calling conventions
-similar to ``stdcall`` on x86. Valid parameter values are "aapcs" and
-"aapcs-vfp".
-
-
-regparm (gnu::regparm)
-----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-On 32-bit x86 targets, the regparm attribute causes the compiler to pass
-the first three integer parameters in EAX, EDX, and ECX instead of on the
-stack. This attribute has no effect on variadic functions, and all parameters
-are passed via the stack as normal.
-
-
-stdcall (gnu::stdcall, __stdcall, _stdcall)
--------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","X", ""
-
-On 32-bit x86 targets, this attribute changes the calling convention of a
-function to clear parameters off of the stack on return. This convention does
-not support variadic calls or unprototyped functions in C, and has no effect on
-x86_64 targets. This calling convention is used widely by the Windows API and
-COM applications.  See the documentation for `__stdcall`_ on MSDN.
-
-.. _`__stdcall`: http://msdn.microsoft.com/en-us/library/zxk0tw93.aspx
-
-
-thiscall (gnu::thiscall, __thiscall, _thiscall)
------------------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","X", ""
-
-On 32-bit x86 targets, this attribute changes the calling convention of a
-function to use ECX for the first parameter (typically the implicit ``this``
-parameter of C++ methods) and clear parameters off of the stack on return. This
-convention does not support variadic calls or unprototyped functions in C, and
-has no effect on x86_64 targets. See the documentation for `__thiscall`_ on
-MSDN.
-
-.. _`__thiscall`: http://msdn.microsoft.com/en-us/library/ek8tkfbw.aspx
-
-
-vectorcall (__vectorcall, _vectorcall)
---------------------------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","X", ""
-
-On 32-bit x86 *and* x86_64 targets, this attribute changes the calling
-convention of a function to pass vector parameters in SSE registers.
-
-On 32-bit x86 targets, this calling convention is similar to ``__fastcall``.
-The first two integer parameters are passed in ECX and EDX. Subsequent integer
-parameters are passed in memory, and callee clears the stack.  On x86_64
-targets, the callee does *not* clear the stack, and integer parameters are
-passed in RCX, RDX, R8, and R9 as is done for the default Windows x64 calling
-convention.
-
-On both 32-bit x86 and x86_64 targets, vector and floating point arguments are
-passed in XMM0-XMM5. Homogenous vector aggregates of up to four elements are
-passed in sequential SSE registers if enough are available. If AVX is enabled,
-256 bit vectors are passed in YMM0-YMM5. Any vector or aggregate type that
-cannot be passed in registers for any reason is passed by reference, which
-allows the caller to align the parameter memory.
-
-See the documentation for `__vectorcall`_ on MSDN for more details.
-
-.. _`__vectorcall`: http://msdn.microsoft.com/en-us/library/dn375768.aspx
-
-
-Consumed Annotation Checking
-============================
-Clang supports additional attributes for checking basic resource management
-properties, specifically for unique objects that have a single owning reference.
-The following attributes are currently supported, although **the implementation
-for these annotations is currently in development and are subject to change.**
-
-callable_when
--------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Use ``__attribute__((callable_when(...)))`` to indicate what states a method
-may be called in.  Valid states are unconsumed, consumed, or unknown.  Each
-argument to this attribute must be a quoted string.  E.g.:
-
-``__attribute__((callable_when("unconsumed", "unknown")))``
-
-
-consumable
-----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Each ``class`` that uses any of the typestate annotations must first be marked
-using the ``consumable`` attribute.  Failure to do so will result in a warning.
-
-This attribute accepts a single parameter that must be one of the following:
-``unknown``, ``consumed``, or ``unconsumed``.
-
-
-param_typestate
----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-This attribute specifies expectations about function parameters.  Calls to an
-function with annotated parameters will issue a warning if the corresponding
-argument isn't in the expected state.  The attribute is also used to set the
-initial state of the parameter when analyzing the function's body.
-
-
-return_typestate
-----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-The ``return_typestate`` attribute can be applied to functions or parameters.
-When applied to a function the attribute specifies the state of the returned
-value.  The function's body is checked to ensure that it always returns a value
-in the specified state.  On the caller side, values returned by the annotated
-function are initialized to the given state.
-
-When applied to a function parameter it modifies the state of an argument after
-a call to the function returns.  The function's body is checked to ensure that
-the parameter is in the expected state before returning.
-
-
-set_typestate
--------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Annotate methods that transition an object into a new state with
-``__attribute__((set_typestate(new_state)))``.  The new state must be
-unconsumed, consumed, or unknown.
-
-
-test_typestate
---------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Use ``__attribute__((test_typestate(tested_state)))`` to indicate that a method
-returns true if the object is in the specified state..
-
-
-Type Safety Checking
-====================
-Clang supports additional attributes to enable checking type safety properties
-that can't be enforced by the C type system.  Use cases include:
-
-* MPI library implementations, where these attributes enable checking that
-  the buffer type matches the passed ``MPI_Datatype``;
-* for HDF5 library there is a similar use case to MPI;
-* checking types of variadic functions' arguments for functions like
-  ``fcntl()`` and ``ioctl()``.
-
-You can detect support for these attributes with ``__has_attribute()``.  For
-example:
-
-.. code-block:: c++
-
-  #if defined(__has_attribute)
-  #  if __has_attribute(argument_with_type_tag) && \
-        __has_attribute(pointer_with_type_tag) && \
-        __has_attribute(type_tag_for_datatype)
-  #    define ATTR_MPI_PWT(buffer_idx, type_idx) __attribute__((pointer_with_type_tag(mpi,buffer_idx,type_idx)))
-  /* ... other macros ...  */
-  #  endif
-  #endif
-
-  #if !defined(ATTR_MPI_PWT)
-  # define ATTR_MPI_PWT(buffer_idx, type_idx)
-  #endif
-
-  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
-      ATTR_MPI_PWT(1,3);
-
-argument_with_type_tag
-----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Use ``__attribute__((argument_with_type_tag(arg_kind, arg_idx,
-type_tag_idx)))`` on a function declaration to specify that the function
-accepts a type tag that determines the type of some other argument.
-``arg_kind`` is an identifier that should be used when annotating all
-applicable type tags.
-
-This attribute is primarily useful for checking arguments of variadic functions
-(``pointer_with_type_tag`` can be used in most non-variadic cases).
-
-For example:
-
-.. code-block:: c++
-
-  int fcntl(int fd, int cmd, ...)
-      __attribute__(( argument_with_type_tag(fcntl,3,2) ));
-
-
-pointer_with_type_tag
----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Use ``__attribute__((pointer_with_type_tag(ptr_kind, ptr_idx, type_tag_idx)))``
-on a function declaration to specify that the function accepts a type tag that
-determines the pointee type of some other pointer argument.
-
-For example:
-
-.. code-block:: c++
-
-  int MPI_Send(void *buf, int count, MPI_Datatype datatype /*, other args omitted */)
-      __attribute__(( pointer_with_type_tag(mpi,1,3) ));
-
-
-type_tag_for_datatype
----------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","","","", ""
-
-Clang supports annotating type tags of two forms.
-
-* **Type tag that is an expression containing a reference to some declared
-  identifier.** Use ``__attribute__((type_tag_for_datatype(kind, type)))`` on a
-  declaration with that identifier:
-
-  .. code-block:: c++
-
-    extern struct mpi_datatype mpi_datatype_int
-        __attribute__(( type_tag_for_datatype(mpi,int) ));
-    #define MPI_INT ((MPI_Datatype) &mpi_datatype_int)
-
-* **Type tag that is an integral literal.** Introduce a ``static const``
-  variable with a corresponding initializer value and attach
-  ``__attribute__((type_tag_for_datatype(kind, type)))`` on that declaration,
-  for example:
-
-  .. code-block:: c++
-
-    #define MPI_INT ((MPI_Datatype) 42)
-    static const MPI_Datatype mpi_datatype_int
-        __attribute__(( type_tag_for_datatype(mpi,int) )) = 42
-
-The attribute also accepts an optional third argument that determines how the
-expression is compared to the type tag.  There are two supported flags:
-
-* ``layout_compatible`` will cause types to be compared according to
-  layout-compatibility rules (C++11 [class.mem] p 17, 18).  This is
-  implemented to support annotating types like ``MPI_DOUBLE_INT``.
-
-  For example:
-
-  .. code-block:: c++
-
-    /* In mpi.h */
-    struct internal_mpi_double_int { double d; int i; };
-    extern struct mpi_datatype mpi_datatype_double_int
-        __attribute__(( type_tag_for_datatype(mpi, struct internal_mpi_double_int, layout_compatible) ));
-
-    #define MPI_DOUBLE_INT ((MPI_Datatype) &mpi_datatype_double_int)
-
-    /* In user code */
-    struct my_pair { double a; int b; };
-    struct my_pair *buffer;
-    MPI_Send(buffer, 1, MPI_DOUBLE_INT /*, ...  */); // no warning
-
-    struct my_int_pair { int a; int b; }
-    struct my_int_pair *buffer2;
-    MPI_Send(buffer2, 1, MPI_DOUBLE_INT /*, ...  */); // warning: actual buffer element
-                                                      // type 'struct my_int_pair'
-                                                      // doesn't match specified MPI_Datatype
-
-* ``must_be_null`` specifies that the expression should be a null pointer
-  constant, for example:
-
-  .. code-block:: c++
-
-    /* In mpi.h */
-    extern struct mpi_datatype mpi_datatype_null
-        __attribute__(( type_tag_for_datatype(mpi, void, must_be_null) ));
-
-    #define MPI_DATATYPE_NULL ((MPI_Datatype) &mpi_datatype_null)
-
-    /* In user code */
-    MPI_Send(buffer, 1, MPI_DATATYPE_NULL /*, ...  */); // warning: MPI_DATATYPE_NULL
-                                                        // was specified but buffer
-                                                        // is not a null pointer
-
-
-OpenCL Address Spaces
-=====================
-The address space qualifier may be used to specify the region of memory that is
-used to allocate the object. OpenCL supports the following address spaces:
-__generic(generic), __global(global), __local(local), __private(private),
-__constant(constant).
-
-  .. code-block:: c
-
-    __constant int c = ...;
-
-    __generic int* foo(global int* g) {
-      __local int* l;
-      private int p;
-      ...
-      return l;
-    }
-
-More details can be found in the OpenCL C language Spec v2.0, Section 6.5.
-
-__constant(constant)
---------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The constant address space attribute signals that an object is located in
-a constant (non-modifiable) memory region. It is available to all work items.
-Any type can be annotated with the constant address space attribute. Objects
-with the constant address space qualifier can be declared in any scope and must
-have an initializer.
-
-
-__generic(generic)
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The generic address space attribute is only available with OpenCL v2.0 and later.
-It can be used with pointer types. Variables in global and local scope and
-function parameters in non-kernel functions can have the generic address space
-type attribute. It is intended to be a placeholder for any other address space
-except for '__constant' in OpenCL code which can be used with multiple address
-spaces.
-
-
-__global(global)
-----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The global address space attribute specifies that an object is allocated in
-global memory, which is accessible by all work items. The content stored in this
-memory area persists between kernel executions. Pointer types to the global
-address space are allowed as function parameters or local variables. Starting
-with OpenCL v2.0, the global address space can be used with global (program
-scope) variables and static local variable as well.
-
-
-__local(local)
---------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The local address space specifies that an object is allocated in the local (work
-group) memory area, which is accessible to all work items in the same work
-group. The content stored in this memory region is not accessible after
-the kernel execution ends. In a kernel function scope, any variable can be in
-the local address space. In other scopes, only pointer types to the local address
-space are allowed. Local address space variables cannot have an initializer.
-
-
-__private(private)
-------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The private address space specifies that an object is allocated in the private
-(work item) memory. Other work items cannot access the same memory area and its
-content is destroyed after work item execution ends. Local variables can be
-declared in the private address space. Function arguments are always in the
-private address space. Kernel function arguments of a pointer or an array type
-cannot point to the private address space.
-
-
-Nullability Attributes
-======================
-Whether a particular pointer may be "null" is an important concern when working with pointers in the C family of languages. The various nullability attributes indicate whether a particular pointer can be null or not, which makes APIs more expressive and can help static analysis tools identify bugs involving null pointers. Clang supports several kinds of nullability attributes: the ``nonnull`` and ``returns_nonnull`` attributes indicate which function or method parameters and result types can never be null, while nullability type qualifiers indicate which pointer types can be null (``_Nullable``) or cannot be null (``_Nonnull``). 
-
-The nullability (type) qualifiers express whether a value of a given pointer type can be null (the ``_Nullable`` qualifier), doesn't have a defined meaning for null (the ``_Nonnull`` qualifier), or for which the purpose of null is unclear (the ``_Null_unspecified`` qualifier). Because nullability qualifiers are expressed within the type system, they are more general than the ``nonnull`` and ``returns_nonnull`` attributes, allowing one to express (for example) a nullable pointer to an array of nonnull pointers. Nullability qualifiers are written to the right of the pointer to which they apply. For example:
-
-  .. code-block:: c
-
-    // No meaningful result when 'ptr' is null (here, it happens to be undefined behavior).
-    int fetch(int * _Nonnull ptr) { return *ptr; }
-
-    // 'ptr' may be null.
-    int fetch_or_zero(int * _Nullable ptr) {
-      return ptr ? *ptr : 0;
-    }
-
-    // A nullable pointer to non-null pointers to const characters.
-    const char *join_strings(const char * _Nonnull * _Nullable strings, unsigned n);
-
-In Objective-C, there is an alternate spelling for the nullability qualifiers that can be used in Objective-C methods and properties using context-sensitive, non-underscored keywords. For example:
-
-  .. code-block:: objective-c
-
-    @interface NSView : NSResponder
-      - (nullable NSView *)ancestorSharedWithView:(nonnull NSView *)aView;
-      @property (assign, nullable) NSView *superview;
-      @property (readonly, nonnull) NSArray *subviews;
-    @end
-
-nonnull
--------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes>`_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example:
-
-  .. code-block:: c
-
-    extern void * my_memcpy (void *dest, const void *src, size_t len)
-                    __attribute__((nonnull (1, 2)));
-
-Here, the ``nonnull`` attribute indicates that parameters 1 and 2
-cannot have a null value. Omitting the parenthesized list of parameter indices means that all parameters of pointer type cannot be null:
-
-  .. code-block:: c
-
-    extern void * my_memcpy (void *dest, const void *src, size_t len)
-                    __attribute__((nonnull));
-
-Clang also allows the ``nonnull`` attribute to be placed directly on a function (or Objective-C method) parameter, eliminating the need to specify the parameter index ahead of type. For example:
-
-  .. code-block:: c
-
-    extern void * my_memcpy (void *dest __attribute__((nonnull)),
-                             const void *src __attribute__((nonnull)), size_t len);
-
-Note that the ``nonnull`` attribute indicates that passing null to a non-null parameter is undefined behavior, which the optimizer may take advantage of to, e.g., remove null checks. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable.
-
-
-returns_nonnull
----------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "X","X","","", ""
-
-The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer:
-
-  .. code-block:: c
-
-    extern void * malloc (size_t size) __attribute__((returns_nonnull));
-
-The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable
-
-
-_Nonnull
---------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as:
-
-  .. code-block:: c
-
-    int fetch(int * _Nonnull ptr);
-
-a caller of ``fetch`` should not provide a null value, and the compiler will produce a warning if it sees a literal null value passed to ``fetch``. Note that, unlike the declaration attribute ``nonnull``, the presence of ``_Nonnull`` does not imply that passing null is undefined behavior: ``fetch`` is free to consider null undefined behavior or (perhaps for backward-compatibility reasons) defensively handle null.
-
-
-_Null_unspecified
------------------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API.
-
-
-_Nullable
----------
-.. csv-table:: Supported Syntaxes
-   :header: "GNU", "C++11", "__declspec", "Keyword", "Pragma"
-
-   "","","","X", ""
-
-The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given:
-
-  .. code-block:: c
-
-    int fetch_or_zero(int * _Nullable ptr);
-
-a caller of ``fetch_or_zero`` can provide null.
-
-
+===================
\ No newline at end of file
diff --git a/docs/ClangFormat.rst b/docs/ClangFormat.rst
index 45ea32717995..b4968ef1036f 100644
--- a/docs/ClangFormat.rst
+++ b/docs/ClangFormat.rst
@@ -16,7 +16,7 @@ to format C/C++/Obj-C code.
 .. code-block:: console
 
   $ clang-format -help
-  OVERVIEW: A tool to format C/C++/Obj-C code.
+  OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.
 
   If no arguments are specified, it formats the code from standard input
   and writes the result to the standard output.
@@ -30,44 +30,53 @@ to format C/C++/Obj-C code.
 
   Clang-format options:
 
-    -cursor=<uint>           - The position of the cursor when invoking
-                               clang-format from an editor integration
-    -dump-config             - Dump configuration options to stdout and exit.
-                               Can be used with -style option.
-    -i                       - Inplace edit <file>s, if specified.
-    -length=<uint>           - Format a range of this length (in bytes).
-                               Multiple ranges can be formatted by specifying
-                               several -offset and -length pairs.
-                               When only a single -offset is specified without
-                               -length, clang-format will format up to the end
-                               of the file.
-                               Can only be used with one input file.
-    -lines=<string>          - <start line>:<end line> - format a range of
-                               lines (both 1-based).
-                               Multiple ranges can be formatted by specifying
-                               several -lines arguments.
-                               Can't be used with -offset and -length.
-                               Can only be used with one input file.
-    -offset=<uint>           - Format a range starting at this byte offset.
-                               Multiple ranges can be formatted by specifying
-                               several -offset and -length pairs.
-                               Can only be used with one input file.
-    -output-replacements-xml - Output replacements as XML.
-    -style=<string>          - Coding style, currently supports:
-                                 LLVM, Google, Chromium, Mozilla, WebKit.
-                               Use -style=file to load style configuration from
-                               .clang-format file located in one of the parent
-                               directories of the source file (or current
-                               directory for stdin).
-                               Use -style="{key: value, ...}" to set specific
-                               parameters, e.g.:
-                                 -style="{BasedOnStyle: llvm, IndentWidth: 8}"
+    -assume-filename=<string> - When reading from stdin, clang-format assumes this
+                                filename to look for a style config file (with
+                                -style=file) and to determine the language.
+    -cursor=<uint>            - The position of the cursor when invoking
+                                clang-format from an editor integration
+    -dump-config              - Dump configuration options to stdout and exit.
+                                Can be used with -style option.
+    -fallback-style=<string>  - The name of the predefined style used as a
+                                fallback in case clang-format is invoked with
+                                -style=file, but can not find the .clang-format
+                                file to use.
+                                Use -fallback-style=none to skip formatting.
+    -i                        - Inplace edit <file>s, if specified.
+    -length=<uint>            - Format a range of this length (in bytes).
+                                Multiple ranges can be formatted by specifying
+                                several -offset and -length pairs.
+                                When only a single -offset is specified without
+                                -length, clang-format will format up to the end
+                                of the file.
+                                Can only be used with one input file.
+    -lines=<string>           - <start line>:<end line> - format a range of
+                                lines (both 1-based).
+                                Multiple ranges can be formatted by specifying
+                                several -lines arguments.
+                                Can't be used with -offset and -length.
+                                Can only be used with one input file.
+    -offset=<uint>            - Format a range starting at this byte offset.
+                                Multiple ranges can be formatted by specifying
+                                several -offset and -length pairs.
+                                Can only be used with one input file.
+    -output-replacements-xml  - Output replacements as XML.
+    -sort-includes            - Sort touched include lines
+    -style=<string>           - Coding style, currently supports:
+                                  LLVM, Google, Chromium, Mozilla, WebKit.
+                                Use -style=file to load style configuration from
+                                .clang-format file located in one of the parent
+                                directories of the source file (or current
+                                directory for stdin).
+                                Use -style="{key: value, ...}" to set specific
+                                parameters, e.g.:
+                                  -style="{BasedOnStyle: llvm, IndentWidth: 8}"
 
-  General options:
+  Generic Options:
 
-    -help                    - Display available options (-help-hidden for more)
-    -help-list               - Display list of available options (-help-list-hidden for more)
-    -version                 - Display the version of this program
+    -help                     - Display available options (-help-hidden for more)
+    -help-list                - Display list of available options (-help-list-hidden for more)
+    -version                  - Display the version of this program
 
 
 When the desired code formatting style is different from the available options,
diff --git a/docs/ClangFormatStyleOptions.rst b/docs/ClangFormatStyleOptions.rst
index 031daeeaa123..bfabd5985ca0 100644
--- a/docs/ClangFormatStyleOptions.rst
+++ b/docs/ClangFormatStyleOptions.rst
@@ -150,26 +150,61 @@ the configuration (without a prefix: ``Auto``).
 **AccessModifierOffset** (``int``)
   The extra indent or outdent of access modifiers, e.g. ``public:``.
 
-**AlignAfterOpenBracket** (``bool``)
+**AlignAfterOpenBracket** (``BracketAlignmentStyle``)
   If ``true``, horizontally aligns arguments after an open bracket.
 
   This applies to round brackets (parentheses), angle brackets and square
   brackets. This will result in formattings like
-  \code
-  someLongFunction(argument1,
-  argument2);
-  \endcode
+
+  Possible values:
+
+  * ``BAS_Align`` (in configuration: ``Align``)
+    Align parameters on the open bracket, e.g.:
+
+    .. code-block:: c++
+
+      someLongFunction(argument1,
+                       argument2);
+  * ``BAS_DontAlign`` (in configuration: ``DontAlign``)
+    Don't align, instead use ``ContinuationIndentWidth``, e.g.:
+
+    .. code-block:: c++
+
+      someLongFunction(argument1,
+          argument2);
+  * ``BAS_AlwaysBreak`` (in configuration: ``AlwaysBreak``)
+    Always break after an open bracket, if the parameters don't fit
+    on a single line, e.g.:
+
+    .. code-block:: c++
+
+      someLongFunction(
+          argument1, argument2);
+
 
 **AlignConsecutiveAssignments** (``bool``)
   If ``true``, aligns consecutive assignments.
 
   This will align the assignment operators of consecutive lines. This
   will result in formattings like
-  \code
-  int aaaa = 12;
-  int b    = 23;
-  int ccc  = 23;
-  \endcode
+
+  .. code-block:: c++
+
+    int aaaa = 12;
+    int b    = 23;
+    int ccc  = 23;
+
+**AlignConsecutiveDeclarations** (``bool``)
+  If ``true``, aligns consecutive declarations.
+
+  This will align the declaration names of consecutive lines. This
+  will result in formattings like
+
+  .. code-block:: c++
+
+    int         aaaa = 12;
+    float       b = 23;
+    std::string ccc = 23;
 
 **AlignEscapedNewlinesLeft** (``bool``)
   If ``true``, aligns escaped newlines as far left as possible.
@@ -219,7 +254,8 @@ the configuration (without a prefix: ``Auto``).
   single line.
 
 **AlwaysBreakAfterDefinitionReturnType** (``DefinitionReturnTypeBreakingStyle``)
-  The function definition return type breaking style to use.
+  The function definition return type breaking style to use.  This
+  option is deprecated and is retained for backwards compatibility.
 
   Possible values:
 
@@ -229,7 +265,25 @@ the configuration (without a prefix: ``Auto``).
   * ``DRTBS_All`` (in configuration: ``All``)
     Always break after the return type.
   * ``DRTBS_TopLevel`` (in configuration: ``TopLevel``)
-    Always break after the return types of top level functions.
+    Always break after the return types of top-level functions.
+
+
+**AlwaysBreakAfterReturnType** (``ReturnTypeBreakingStyle``)
+  The function declaration return type breaking style to use.
+
+  Possible values:
+
+  * ``RTBS_None`` (in configuration: ``None``)
+    Break after return type automatically.
+    ``PenaltyReturnTypeOnItsOwnLine`` is taken into account.
+  * ``RTBS_All`` (in configuration: ``All``)
+    Always break after the return type.
+  * ``RTBS_TopLevel`` (in configuration: ``TopLevel``)
+    Always break after the return types of top-level functions.
+  * ``RTBS_AllDefinitions`` (in configuration: ``AllDefinitions``)
+    Always break after the return type of function definitions.
+  * ``RTBS_TopLevelDefinitions`` (in configuration: ``TopLevelDefinitions``)
+    Always break after the return type of top-level definitions.
 
 
 **AlwaysBreakBeforeMultilineStrings** (``bool``)
@@ -252,6 +306,30 @@ the configuration (without a prefix: ``Auto``).
   If ``false``, a function declaration's or function definition's
   parameters will either all be on the same line or will have one line each.
 
+**BraceWrapping** (``BraceWrappingFlags``)
+  Control of individual brace wrapping cases.
+
+  If ``BreakBeforeBraces`` is set to ``custom``, use this to specify how each
+  individual brace case should be handled. Otherwise, this is ignored.
+
+  Nested configuration flags:
+
+  * ``bool AfterClass`` Wrap class definitions.
+  * ``bool AfterControlStatement`` Wrap control statements (if/for/while/switch/..).
+  * ``bool AfterEnum`` Wrap enum definitions.
+  * ``bool AfterFunction`` Wrap function definitions.
+  * ``bool AfterNamespace`` Wrap namespace definitions.
+  * ``bool AfterObjCDeclaration`` Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
+  * ``bool AfterStruct`` Wrap struct definitions.
+  * ``bool AfterUnion`` Wrap union definitions.
+  * ``bool BeforeCatch`` Wrap before ``catch``.
+  * ``bool BeforeElse`` Wrap before ``else``.
+  * ``bool IndentBraces`` Indent the wrapped braces themselves.
+
+
+**BreakAfterJavaFieldAnnotations** (``bool``)
+  Break after each annotation on a field in Java files.
+
 **BreakBeforeBinaryOperators** (``BinaryOperatorStyle``)
   The way to wrap binary operators.
 
@@ -279,13 +357,17 @@ the configuration (without a prefix: ``Auto``).
     Like ``Attach``, but break before braces on enum, function, and record
     definitions.
   * ``BS_Stroustrup`` (in configuration: ``Stroustrup``)
-    Like ``Attach``, but break before function definitions, and 'else'.
+    Like ``Attach``, but break before function definitions, 'catch', and 'else'.
   * ``BS_Allman`` (in configuration: ``Allman``)
     Always break before braces.
   * ``BS_GNU`` (in configuration: ``GNU``)
     Always break before braces and add an extra level of indentation to
     braces of control statements, not to those of class, function
     or other definitions.
+  * ``BS_WebKit`` (in configuration: ``WebKit``)
+    Like ``Attach``, but break before functions.
+  * ``BS_Custom`` (in configuration: ``Custom``)
+    Configure each individual brace in ``BraceWrapping``.
 
 
 **BreakBeforeTernaryOperators** (``bool``)
@@ -356,13 +438,47 @@ the configuration (without a prefix: ``Auto``).
   instead of as function calls.
 
   These are expected to be macros of the form:
-  \code
-  FOREACH(<variable-declaration>, ...)
-  <loop-body>
-  \endcode
+
+  .. code-block:: c++
+
+    FOREACH(<variable-declaration>, ...)
+      <loop-body>
+
+  In the .clang-format configuration file, this can be configured like:
+
+  .. code-block:: c++
+
+    ForEachMacros: ['RANGES_FOR', 'FOREACH']
 
   For example: BOOST_FOREACH.
 
+**IncludeCategories** (``std::vector<IncludeCategory>``)
+  Regular expressions denoting the different #include categories used
+  for ordering #includes.
+
+  These regular expressions are matched against the filename of an include
+  (including the <> or "") in order. The value belonging to the first
+  matching regular expression is assigned and #includes are sorted first
+  according to increasing category number and then alphabetically within
+  each category.
+
+  If none of the regular expressions match, UINT_MAX is assigned as
+  category. The main header for a source file automatically gets category 0,
+  so that it is kept at the beginning of the #includes
+  (http://llvm.org/docs/CodingStandards.html#include-style).
+
+  To configure this in the .clang-format file, use:
+
+  .. code-block:: c++
+
+    IncludeCategories:
+      - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+        Priority:        2
+      - Regex:           '^(<|"(gtest|isl|json)/)'
+        Priority:        3
+      - Regex:           '.\*'
+        Priority:        1
+
 **IndentCaseLabels** (``bool``)
   Indent case labels one level from the switch statement.
 
@@ -546,6 +662,26 @@ the configuration (without a prefix: ``Auto``).
 
 .. END_FORMAT_STYLE_OPTIONS
 
+Adding additional style options
+===============================
+
+Each additional style option adds costs to the clang-format project. Some of
+these costs affect the clang-format developement itself, as we need to make
+sure that any given combination of options work and that new features don't
+break any of the existing options in any way. There are also costs for end users
+as options become less discoverable and people have to think about and make a
+decision on options they don't really care about.
+
+The goal of the clang-format project is more on the side of supporting a
+limited set of styles really well as opposed to supporting every single style
+used by a codebase somewhere in the wild. Of course, we do want to support all
+major projects and thus have established the following bar for adding style
+options. Each new style option must ..
+
+  * be used in a project of significant size (have dozens of contributors)
+  * have a publicly accessible style guide
+  * have a person willing to contribute and maintain patches
+
 Examples
 ========
 
diff --git a/docs/ClangTools.rst b/docs/ClangTools.rst
index 8957541c2504..e371596b240c 100644
--- a/docs/ClangTools.rst
+++ b/docs/ClangTools.rst
@@ -82,7 +82,7 @@ fixit-hints offered by clang. See :doc:`HowToSetupToolingForLLVM` for
 instructions on how to setup and used `clang-check`.
 
 ``clang-format``
-~~~~~~~~~~~~~~~~
+----------------
 
 Clang-format is both a :doc:`library <LibFormat>` and a :doc:`stand-alone tool
 <ClangFormat>` with the goal of automatically reformatting C++ sources files
@@ -93,18 +93,6 @@ as a user tool (ideally with powerful IDE integrations) and as part of other
 refactoring tools, e.g. to do a reformatting of all the lines changed during a
 renaming.
 
-``clang-modernize``
-~~~~~~~~~~~~~~~~~~~
-``clang-modernize`` migrates C++ code to use C++11 features where appropriate.
-Currently it can:
-
-* convert loops to range-based for loops;
-
-* convert null pointer constants (like ``NULL`` or ``0``) to C++11 ``nullptr``;
-
-* replace the type specifier in variable declarations with the ``auto`` type specifier;
-
-* add the ``override`` specifier to applicable member functions.
 
 Extra Clang Tools
 =================
@@ -114,6 +102,15 @@ they'll be tracked here. The focus of this documentation is on the scope
 and features of the tools for other tool developers; each tool should
 provide its own user-focused documentation.
 
+``clang-tidy``
+--------------
+
+`clang-tidy <http://clang.llvm.org/extra/clang-tidy/>`_ is a clang-based C++
+linter tool. It provides an extensible framework for building compiler-based
+static analyses detecting and fixing bug-prone patterns, performance,
+portability and maintainability issues.
+
+
 Ideas for new Tools
 ===================
 
@@ -124,27 +121,6 @@ Ideas for new Tools
   ``foo.begin()`` into ``begin(foo)`` and similarly for ``end()``, where
   ``foo`` is a standard container.  We could also detect similar patterns for
   arrays.
-* ``make_shared`` / ``make_unique`` conversion.  Part of this transformation
-  can be incorporated into the ``auto`` transformation.  Will convert
-
-  .. code-block:: c++
-
-    std::shared_ptr<Foo> sp(new Foo);
-    std::unique_ptr<Foo> up(new Foo);
-
-    func(std::shared_ptr<Foo>(new Foo), bar());
-
-  into:
-
-  .. code-block:: c++
-
-    auto sp = std::make_shared<Foo>();
-    auto up = std::make_unique<Foo>(); // In C++14 mode.
-
-    // This also affects correctness.  For the cases where bar() throws,
-    // make_shared() is safe and the original code may leak.
-    func(std::make_shared<Foo>(), bar());
-
 * ``tr1`` removal tool.  Will migrate source code from using TR1 library
   features to C++11 library.  For example:
 
diff --git a/docs/CommandGuide/clang.rst b/docs/CommandGuide/clang.rst
index 39dbe02a0b43..2b4569592a9a 100644
--- a/docs/CommandGuide/clang.rst
+++ b/docs/CommandGuide/clang.rst
@@ -257,6 +257,18 @@ Code Generation Options
 
   Generate debug information.  Note that Clang debug information works best at -O0.
 
+.. option:: -gmodules
+
+  Generate debug information that contains external references to
+  types defined in clang modules or precompiled headers instead of
+  emitting redundant debug type information into every object file.
+  This option implies :option:`-fmodule-format=obj`.
+
+  This option should not be used when building static libraries for
+  distribution to other machines because the debug info will contain
+  references to the module cache on the machine the object files in
+  the library were built on.
+  
 .. option:: -fstandalone-debug -fno-standalone-debug
 
   Clang supports a number of optimizations to reduce the size of debug
diff --git a/docs/ControlFlowIntegrity.rst b/docs/ControlFlowIntegrity.rst
index ce1c37bcd408..780ff882d0e3 100644
--- a/docs/ControlFlowIntegrity.rst
+++ b/docs/ControlFlowIntegrity.rst
@@ -20,20 +20,72 @@ program's control flow. These schemes have been optimized for performance,
 allowing developers to enable them in release builds.
 
 To enable Clang's available CFI schemes, use the flag ``-fsanitize=cfi``.
-As currently implemented, CFI relies on link-time optimization (LTO); so it is
-required to specify ``-flto``, and the linker used must support LTO, for example
-via the `gold plugin`_. To allow the checks to be implemented efficiently,
-the program must be structured such that certain object files are compiled
-with CFI enabled, and are statically linked into the program. This may
-preclude the use of shared libraries in some cases.
+You can also enable a subset of available :ref:`schemes <cfi-schemes>`.
+As currently implemented, all schemes rely on link-time optimization (LTO);
+so it is required to specify ``-flto``, and the linker used must support LTO,
+for example via the `gold plugin`_.
 
-Clang currently implements forward-edge CFI for member function calls and
-bad cast checking. More schemes are under development.
+To allow the checks to be implemented efficiently, the program must be
+structured such that certain object files are compiled with CFI
+enabled, and are statically linked into the program. This may preclude
+the use of shared libraries in some cases. Experimental support for
+:ref:`cross-DSO control flow integrity <cfi-cross-dso>` exists that
+does not have these requirements. This cross-DSO support has unstable
+ABI at this time.
 
 .. _gold plugin: http://llvm.org/docs/GoldPlugin.html
 
+.. _cfi-schemes:
+
+Available schemes
+=================
+
+Available schemes are:
+
+  -  ``-fsanitize=cfi-cast-strict``: Enables :ref:`strict cast checks
+     <cfi-strictness>`.
+  -  ``-fsanitize=cfi-derived-cast``: Base-to-derived cast to the wrong
+     dynamic type.
+  -  ``-fsanitize=cfi-unrelated-cast``: Cast from ``void*`` or another
+     unrelated type to the wrong dynamic type.
+  -  ``-fsanitize=cfi-nvcall``: Non-virtual call via an object whose vptr is of
+     the wrong dynamic type.
+  -  ``-fsanitize=cfi-vcall``: Virtual call via an object whose vptr is of the
+     wrong dynamic type.
+  -  ``-fsanitize=cfi-icall``: Indirect call of a function with wrong dynamic
+     type.
+
+You can use ``-fsanitize=cfi`` to enable all the schemes and use
+``-fno-sanitize`` flag to narrow down the set of schemes as desired.
+For example, you can build your program with
+``-fsanitize=cfi -fno-sanitize=cfi-nvcall,cfi-icall``
+to use all schemes except for non-virtual member function call and indirect call
+checking.
+
+Remember that you have to provide ``-flto`` if at least one CFI scheme is
+enabled.
+
+Trapping and Diagnostics
+========================
+
+By default, CFI will abort the program immediately upon detecting a control
+flow integrity violation. You can use the :ref:`-fno-sanitize-trap=
+<controlling-code-generation>` flag to cause CFI to print a diagnostic
+similar to the one below before the program aborts.
+
+.. code-block:: console
+
+    bad-cast.cpp:109:7: runtime error: control flow integrity check for type 'B' failed during base-to-derived cast (vtable address 0x000000425a50)
+    0x000000425a50: note: vtable is of type 'A'
+     00 00 00 00  f0 f1 41 00 00 00 00 00  00 00 00 00 00 00 00 00  00 00 00 00 00 00 00 00  20 5a 42 00
+                  ^ 
+
+If diagnostics are enabled, you can also configure CFI to continue program
+execution instead of aborting by using the :ref:`-fsanitize-recover=
+<controlling-code-generation>` flag.
+
 Forward-Edge CFI for Virtual Calls
-----------------------------------
+==================================
 
 This scheme checks that virtual calls take place using a vptr of the correct
 dynamic type; that is, the dynamic type of the called object must be a
@@ -41,14 +93,12 @@ derived class of the static type of the object used to make the call.
 This CFI scheme can be enabled on its own using ``-fsanitize=cfi-vcall``.
 
 For this scheme to work, all translation units containing the definition
-of a virtual member function (whether inline or not) must be compiled
-with ``-fsanitize=cfi-vcall`` enabled and be statically linked into the
-program. Classes in the C++ standard library (under namespace ``std``) are
-exempted from checking, and therefore programs may be linked against a
-pre-built standard library, but this may change in the future.
+of a virtual member function (whether inline or not), other than members
+of :ref:`blacklisted <cfi-blacklist>` types, must be compiled with
+``-fsanitize=cfi-vcall`` enabled and be statically linked into the program.
 
 Performance
-~~~~~~~~~~~
+-----------
 
 A performance overhead of less than 1% has been measured by running the
 Dromaeo benchmark suite against an instrumented version of the Chromium
@@ -59,7 +109,7 @@ Note that this scheme has not yet been optimized for binary size; an increase
 of up to 15% has been observed for Chromium.
 
 Bad Cast Checking
------------------
+=================
 
 This scheme checks that pointer casts are made to an object of the correct
 dynamic type; that is, the dynamic type of the object must be a derived class
@@ -85,18 +135,16 @@ If a program as a matter of policy forbids the second type of cast, that
 restriction can normally be enforced. However it may in some cases be necessary
 for a function to perform a forbidden cast to conform with an external API
 (e.g. the ``allocate`` member function of a standard library allocator). Such
-functions may be blacklisted using a :doc:`SanitizerSpecialCaseList`.
+functions may be :ref:`blacklisted <cfi-blacklist>`.
 
 For this scheme to work, all translation units containing the definition
-of a virtual member function (whether inline or not) must be compiled with
+of a virtual member function (whether inline or not), other than members
+of :ref:`blacklisted <cfi-blacklist>` types, must be compiled with
 ``-fsanitize=cfi-derived-cast`` or ``-fsanitize=cfi-unrelated-cast`` enabled
-and be statically linked into the program. Classes in the C++ standard library
-(under namespace ``std``) are exempted from checking, and therefore programs
-may be linked against a pre-built standard library, but this may change in
-the future.
+and be statically linked into the program.
 
 Non-Virtual Member Function Call Checking
------------------------------------------
+=========================================
 
 This scheme checks that non-virtual calls take place using an object of
 the correct dynamic type; that is, the dynamic type of the called object
@@ -106,16 +154,14 @@ polymorphic class type.  This CFI scheme can be enabled on its own using
 ``-fsanitize=cfi-nvcall``.
 
 For this scheme to work, all translation units containing the definition
-of a virtual member function (whether inline or not) must be compiled
-with ``-fsanitize=cfi-nvcall`` enabled and be statically linked into the
-program. Classes in the C++ standard library (under namespace ``std``) are
-exempted from checking, and therefore programs may be linked against a
-pre-built standard library, but this may change in the future.
+of a virtual member function (whether inline or not), other than members
+of :ref:`blacklisted <cfi-blacklist>` types, must be compiled with
+``-fsanitize=cfi-nvcall`` enabled and be statically linked into the program.
 
 .. _cfi-strictness:
 
 Strictness
-~~~~~~~~~~
+----------
 
 If a class has a single non-virtual base and does not introduce or override
 virtual member functions or fields other than an implicitly defined virtual
@@ -129,13 +175,97 @@ member functions on class instances with specific properties that works under
 most compilers and should not have security implications, so we allow it by
 default. It can be disabled with ``-fsanitize=cfi-cast-strict``.
 
+Indirect Function Call Checking
+===============================
+
+This scheme checks that function calls take place using a function of the
+correct dynamic type; that is, the dynamic type of the function must match
+the static type used at the call. This CFI scheme can be enabled on its own
+using ``-fsanitize=cfi-icall``.
+
+For this scheme to work, each indirect function call in the program, other
+than calls in :ref:`blacklisted <cfi-blacklist>` functions, must call a
+function which was either compiled with ``-fsanitize=cfi-icall`` enabled,
+or whose address was taken by a function in a translation unit compiled with
+``-fsanitize=cfi-icall``.
+
+If a function in a translation unit compiled with ``-fsanitize=cfi-icall``
+takes the address of a function not compiled with ``-fsanitize=cfi-icall``,
+that address may differ from the address taken by a function in a translation
+unit not compiled with ``-fsanitize=cfi-icall``. This is technically a
+violation of the C and C++ standards, but it should not affect most programs.
+
+Each translation unit compiled with ``-fsanitize=cfi-icall`` must be
+statically linked into the program or shared library, and calls across
+shared library boundaries are handled as if the callee was not compiled with
+``-fsanitize=cfi-icall``.
+
+This scheme is currently only supported on the x86 and x86_64 architectures.
+
+``-fsanitize=cfi-icall`` and ``-fsanitize=function``
+----------------------------------------------------
+
+This tool is similar to ``-fsanitize=function`` in that both tools check
+the types of function calls. However, the two tools occupy different points
+on the design space; ``-fsanitize=function`` is a developer tool designed
+to find bugs in local development builds, whereas ``-fsanitize=cfi-icall``
+is a security hardening mechanism designed to be deployed in release builds.
+
+``-fsanitize=function`` has a higher space and time overhead due to a more
+complex type check at indirect call sites, as well as a need for run-time
+type information (RTTI), which may make it unsuitable for deployment. Because
+of the need for RTTI, ``-fsanitize=function`` can only be used with C++
+programs, whereas ``-fsanitize=cfi-icall`` can protect both C and C++ programs.
+
+On the other hand, ``-fsanitize=function`` conforms more closely with the C++
+standard and user expectations around interaction with shared libraries;
+the identity of function pointers is maintained, and calls across shared
+library boundaries are no different from calls within a single program or
+shared library.
+
+.. _cfi-blacklist:
+
+Blacklist
+=========
+
+A :doc:`SanitizerSpecialCaseList` can be used to relax CFI checks for certain
+source files, functions and types using the ``src``, ``fun`` and ``type``
+entity types.
+
+In addition, if a type has a ``uuid`` attribute and the blacklist contains
+the type entry ``attr:uuid``, CFI checks are suppressed for that type. This
+allows all COM types to be easily blacklisted, which is useful as COM types
+are typically defined outside of the linked program.
+
+.. code-block:: bash
+
+    # Suppress checking for code in a file.
+    src:bad_file.cpp
+    src:bad_header.h
+    # Ignore all functions with names containing MyFooBar.
+    fun:*MyFooBar*
+    # Ignore all types in the standard library.
+    type:std::*
+    # Ignore all types with a uuid attribute.
+    type:attr:uuid
+
+.. _cfi-cross-dso:
+
+Shared library support
+======================
+
+Use **-f[no-]sanitize-cfi-cross-dso** to enable the cross-DSO control
+flow integrity mode, which allows all CFI schemes listed above to
+apply across DSO boundaries. As in the regular CFI, each DSO must be
+built with ``-flto``.
+
 Design
-------
+======
 
 Please refer to the :doc:`design document<ControlFlowIntegrityDesign>`.
 
 Publications
-------------
+============
 
 `Control-Flow Integrity: Principles, Implementations, and Applications <http://research.microsoft.com/pubs/64250/ccs05.pdf>`_.
 Martin Abadi, Mihai Budiu, Úlfar Erlingsson, Jay Ligatti.
diff --git a/docs/ControlFlowIntegrityDesign.rst b/docs/ControlFlowIntegrityDesign.rst
index 89aa038d003e..b4aacd365670 100644
--- a/docs/ControlFlowIntegrityDesign.rst
+++ b/docs/ControlFlowIntegrityDesign.rst
@@ -273,3 +273,227 @@ Eliminating Bit Vector Checks for All-Ones Bit Vectors
 If the bit vector is all ones, the bit vector check is redundant; we simply
 need to check that the address is in range and well aligned. This is more
 likely to occur if the virtual tables are padded.
+
+Forward-Edge CFI for Indirect Function Calls
+============================================
+
+Under forward-edge CFI for indirect function calls, each unique function
+type has its own bit vector, and at each call site we need to check that the
+function pointer is a member of the function type's bit vector. This scheme
+works in a similar way to forward-edge CFI for virtual calls, the distinction
+being that we need to build bit vectors of function entry points rather than
+of virtual tables.
+
+Unlike when re-arranging global variables, we cannot re-arrange functions
+in a particular order and base our calculations on the layout of the
+functions' entry points, as we have no idea how large a particular function
+will end up being (the function sizes could even depend on how we arrange
+the functions). Instead, we build a jump table, which is a block of code
+consisting of one branch instruction for each of the functions in the bit
+set that branches to the target function, and redirect any taken function
+addresses to the corresponding jump table entry. In this way, the distance
+between function entry points is predictable and controllable. In the object
+file's symbol table, the symbols for the target functions also refer to the
+jump table entries, so that addresses taken outside the module will pass
+any verification done inside the module.
+
+In more concrete terms, suppose we have three functions ``f``, ``g``, ``h``
+which are members of a single bitset, and a function foo that returns their
+addresses:
+
+.. code-block:: none
+
+  f:
+  mov 0, %eax
+  ret
+
+  g:
+  mov 1, %eax
+  ret
+
+  h:
+  mov 2, %eax
+  ret
+
+  foo:
+  mov f, %eax
+  mov g, %edx
+  mov h, %ecx
+  ret
+
+Our jump table will (conceptually) look like this:
+
+.. code-block:: none
+
+  f:
+  jmp .Ltmp0 ; 5 bytes
+  int3       ; 1 byte
+  int3       ; 1 byte
+  int3       ; 1 byte
+
+  g:
+  jmp .Ltmp1 ; 5 bytes
+  int3       ; 1 byte
+  int3       ; 1 byte
+  int3       ; 1 byte
+
+  h:
+  jmp .Ltmp2 ; 5 bytes
+  int3       ; 1 byte
+  int3       ; 1 byte
+  int3       ; 1 byte
+
+  .Ltmp0:
+  mov 0, %eax
+  ret
+
+  .Ltmp1:
+  mov 1, %eax
+  ret
+
+  .Ltmp2:
+  mov 2, %eax
+  ret
+
+  foo:
+  mov f, %eax
+  mov g, %edx
+  mov h, %ecx
+  ret
+
+Because the addresses of ``f``, ``g``, ``h`` are evenly spaced at a power of
+2, and function types do not overlap (unlike class types with base classes),
+we can normally apply the `Alignment`_ and `Eliminating Bit Vector Checks
+for All-Ones Bit Vectors`_ optimizations thus simplifying the check at each
+call site to a range and alignment check.
+
+Shared library support
+======================
+
+**EXPERIMENTAL**
+
+The basic CFI mode described above assumes that the application is a
+monolithic binary; at least that all possible virtual/indirect call
+targets and the entire class hierarchy are known at link time. The
+cross-DSO mode, enabled with **-f[no-]sanitize-cfi-cross-dso** relaxes
+this requirement by allowing virtual and indirect calls to cross the
+DSO boundary.
+
+Assuming the following setup: the binary consists of several
+instrumented and several uninstrumented DSOs. Some of them may be
+dlopen-ed/dlclose-d periodically, even frequently.
+
+  - Calls made from uninstrumented DSOs are not checked and just work.
+  - Calls inside any instrumented DSO are fully protected.
+  - Calls between different instrumented DSOs are also protected, with
+     a performance penalty (in addition to the monolithic CFI
+     overhead).
+  - Calls from an instrumented DSO to an uninstrumented one are
+     unchecked and just work, with performance penalty.
+  - Calls from an instrumented DSO outside of any known DSO are
+     detected as CFI violations.
+
+In the monolithic scheme a call site is instrumented as
+
+.. code-block:: none
+
+   if (!InlinedFastCheck(f))
+     abort();
+   call *f
+
+In the cross-DSO scheme it becomes
+
+.. code-block:: none
+
+   if (!InlinedFastCheck(f))
+     __cfi_slowpath(CallSiteTypeId, f);
+   call *f
+
+CallSiteTypeId
+--------------
+
+``CallSiteTypeId`` is a stable process-wide identifier of the
+call-site type. For a virtual call site, the type in question is the class
+type; for an indirect function call it is the function signature. The
+mapping from a type to an identifier is an ABI detail. In the current,
+experimental, implementation the identifier of type T is calculated as
+follows:
+
+  -  Obtain the mangled name for "typeinfo name for T".
+  -  Calculate MD5 hash of the name as a string.
+  -  Reinterpret the first 8 bytes of the hash as a little-endian
+     64-bit integer.
+
+It is possible, but unlikely, that collisions in the
+``CallSiteTypeId`` hashing will result in weaker CFI checks that would
+still be conservatively correct.
+
+CFI_Check
+---------
+
+In the general case, only the target DSO knows whether the call to
+function ``f`` with type ``CallSiteTypeId`` is valid or not.  To
+export this information, every DSO implements
+
+.. code-block:: none
+
+   void __cfi_check(uint64 CallSiteTypeId, void *TargetAddr)
+
+This function provides external modules with access to CFI checks for
+the targets inside this DSO.  For each known ``CallSiteTypeId``, this
+functions performs an ``llvm.bitset.test`` with the corresponding bit
+set. It aborts if the type is unknown, or if the check fails.
+
+The basic implementation is a large switch statement over all values
+of CallSiteTypeId supported by this DSO, and each case is similar to
+the InlinedFastCheck() in the basic CFI mode.
+
+CFI Shadow
+----------
+
+To route CFI checks to the target DSO's __cfi_check function, a
+mapping from possible virtual / indirect call targets to
+the corresponding __cfi_check functions is maintained. This mapping is
+implemented as a sparse array of 2 bytes for every possible page (4096
+bytes) of memory. The table is kept readonly (FIXME: not yet) most of
+the time.
+
+There are 3 types of shadow values:
+
+  -  Address in a CFI-instrumented DSO.
+  -  Unchecked address (a “trusted” non-instrumented DSO). Encoded as
+     value 0xFFFF.
+  -  Invalid address (everything else). Encoded as value 0.
+
+For a CFI-instrumented DSO, a shadow value encodes the address of the
+__cfi_check function for all call targets in the corresponding memory
+page. If Addr is the target address, and V is the shadow value, then
+the address of __cfi_check is calculated as
+
+.. code-block:: none
+
+  __cfi_check = AlignUpTo(Addr, 4096) - (V + 1) * 4096
+
+This works as long as __cfi_check is aligned by 4096 bytes and located
+below any call targets in its DSO, but not more than 256MB apart from
+them.
+
+CFI_SlowPath
+------------
+
+The slow path check is implemented in compiler-rt library as
+
+.. code-block:: none
+
+  void __cfi_slowpath(uint64 CallSiteTypeId, void *TargetAddr)
+
+This functions loads a shadow value for ``TargetAddr``, finds the
+address of __cfi_check as described above and calls that.
+
+Position-independent executable requirement
+-------------------------------------------
+
+Cross-DSO CFI mode requires that the main executable is built as PIE.
+In non-PIE executables the address of an external function (taken from
+the main executable) is the address of that function’s PLT record in
+the main executable. This would break the CFI checks.
diff --git a/docs/InternalsManual.rst b/docs/InternalsManual.rst
index 7959179d4916..c4af5b112ac7 100644
--- a/docs/InternalsManual.rst
+++ b/docs/InternalsManual.rst
@@ -1995,7 +1995,7 @@ are similar.
    * Make sure that ``children()`` visits all of the subexpressions.  This is
      important for a number of features (e.g., IDE support, C++ variadic
      templates).  If you have sub-types, you'll also need to visit those
-     sub-types in ``RecursiveASTVisitor`` and ``DataRecursiveASTVisitor``.
+     sub-types in ``RecursiveASTVisitor``.
    * Add printing support (``StmtPrinter.cpp``) for your expression.
    * Add profiling support (``StmtProfile.cpp``) for your AST node, noting the
      distinguishing (non-source location) characteristics of an instance of
diff --git a/docs/LanguageExtensions.rst b/docs/LanguageExtensions.rst
index 6a4dd5ccbf3c..333dee618ce7 100644
--- a/docs/LanguageExtensions.rst
+++ b/docs/LanguageExtensions.rst
@@ -415,7 +415,7 @@ dash indicates that an operation is not accepted according to a corresponding
 specification.
 
 ============================== ======= ======= ======= =======
-         Opeator               OpenCL  AltiVec   GCC    NEON
+         Operator              OpenCL  AltiVec   GCC    NEON
 ============================== ======= ======= ======= =======
 []                               yes     yes     yes     --
 unary operators +, --            yes     yes     yes     --
@@ -1017,8 +1017,8 @@ The following type trait primitives are supported by Clang:
   ``argtypes...`` such that no non-trivial functions are called as part of
   that initialization.  This trait is required to implement the C++11 standard
   library.
-* ``__is_destructible`` (MSVC 2013): partially implemented
-* ``__is_nothrow_destructible`` (MSVC 2013): partially implemented
+* ``__is_destructible`` (MSVC 2013)
+* ``__is_nothrow_destructible`` (MSVC 2013)
 * ``__is_nothrow_assignable`` (MSVC 2013, clang)
 * ``__is_constructible`` (MSVC 2013, clang)
 * ``__is_nothrow_constructible`` (MSVC 2013, clang)
@@ -1540,6 +1540,33 @@ takes no arguments and produces a void result.
 
 Query for this feature with ``__has_builtin(__builtin_unreachable)``.
 
+``__builtin_unpredictable``
+---------------------------
+
+``__builtin_unpredictable`` is used to indicate that a branch condition is
+unpredictable by hardware mechanisms such as branch prediction logic.
+
+**Syntax**:
+
+.. code-block:: c++
+
+    __builtin_unpredictable(long long)
+
+**Example of use**:
+
+.. code-block:: c++
+
+  if (__builtin_unpredictable(x > 0)) {
+     foo();
+  }
+
+**Description**:
+
+The ``__builtin_unpredictable()`` builtin is expected to be used with control
+flow conditions such as in ``if`` and ``switch`` statements.
+
+Query for this feature with ``__has_builtin(__builtin_unpredictable)``.
+
 ``__sync_swap``
 ---------------
 
@@ -1652,17 +1679,20 @@ an example of their usage:
   errorcode_t security_critical_application(...) {
     unsigned x, y, result;
     ...
-    if (__builtin_umul_overflow(x, y, &result))
+    if (__builtin_mul_overflow(x, y, &result))
       return kErrorCodeHackers;
     ...
     use_multiply(result);
     ...
   }
 
-A complete enumeration of the builtins are:
+Clang provides the following checked arithmetic builtins:
 
 .. code-block:: c
 
+  bool __builtin_add_overflow   (type1 x, type2 y, type3 *sum);
+  bool __builtin_sub_overflow   (type1 x, type2 y, type3 *diff);
+  bool __builtin_mul_overflow   (type1 x, type2 y, type3 *prod);
   bool __builtin_uadd_overflow  (unsigned x, unsigned y, unsigned *sum);
   bool __builtin_uaddl_overflow (unsigned long x, unsigned long y, unsigned long *sum);
   bool __builtin_uaddll_overflow(unsigned long long x, unsigned long long y, unsigned long long *sum);
@@ -1682,6 +1712,21 @@ A complete enumeration of the builtins are:
   bool __builtin_smull_overflow (long x, long y, long *prod);
   bool __builtin_smulll_overflow(long long x, long long y, long long *prod);
 
+Each builtin performs the specified mathematical operation on the
+first two arguments and stores the result in the third argument.  If
+possible, the result will be equal to mathematically-correct result
+and the builtin will return 0.  Otherwise, the builtin will return
+1 and the result will be equal to the unique value that is equivalent
+to the mathematically-correct result modulo two raised to the *k*
+power, where *k* is the number of bits in the result type.  The
+behavior of these builtins is well-defined for all argument values.
+
+The first three builtins work generically for operands of any integer type,
+including boolean types.  The operands need not have the same type as each
+other, or as the result.  The other builtins may implicitly promote or
+convert their operands before performing the operation.
+
+Query for this feature with ``__has_builtin(__builtin_add_overflow)``, etc.
 
 .. _langext-__c11_atomic:
 
@@ -1715,6 +1760,9 @@ The macros ``__ATOMIC_RELAXED``, ``__ATOMIC_CONSUME``, ``__ATOMIC_ACQUIRE``,
 provided, with values corresponding to the enumerators of C11's
 ``memory_order`` enumeration.
 
+(Note that Clang additionally provides GCC-compatible ``__atomic_*``
+builtins)
+
 Low-level ARM exclusive memory builtins
 ---------------------------------------
 
@@ -1730,6 +1778,7 @@ instructions for implementing atomic operations.
   void __builtin_arm_clrex(void);
 
 The types ``T`` currently supported are:
+
 * Integer types with width at most 64 bits (or 128 bits on AArch64).
 * Floating-point types
 * Pointer types.
@@ -1748,6 +1797,26 @@ care should be exercised.
 For these reasons the higher level atomic primitives should be preferred where
 possible.
 
+Non-temporal load/store builtins
+--------------------------------
+
+Clang provides overloaded builtins allowing generation of non-temporal memory
+accesses.
+
+.. code-block:: c
+
+  T __builtin_nontemporal_load(T *addr);
+  void __builtin_nontemporal_store(T value, T *addr);
+
+The types ``T`` currently supported are:
+
+* Integer types.
+* Floating-point types.
+* Vector types.
+
+Note that the compiler does not guarantee that non-temporal loads or stores
+will be used.
+
 Non-standard C++11 Attributes
 =============================
 
@@ -1990,11 +2059,23 @@ iterations. Full unrolling is only possible if the loop trip count is known at
 compile time. Partial unrolling replicates the loop body within the loop and
 reduces the trip count.
 
-If ``unroll(full)`` is specified the unroller will attempt to fully unroll the
+If ``unroll(enable)`` is specified the unroller will attempt to fully unroll the
 loop if the trip count is known at compile time. If the fully unrolled code size
 is greater than an internal limit the loop will be partially unrolled up to this
-limit. If the loop count is not known at compile time the loop will not be
-unrolled.
+limit. If the trip count is not known at compile time the loop will be partially
+unrolled with a heuristically chosen unroll factor.
+
+.. code-block:: c++
+
+  #pragma clang loop unroll(enable)
+  for(...) {
+    ...
+  }
+
+If ``unroll(full)`` is specified the unroller will attempt to fully unroll the
+loop if the trip count is known at compile time identically to
+``unroll(enable)``. However, with ``unroll(full)`` the loop will not be unrolled
+if the loop count is not known at compile time.
 
 .. code-block:: c++
 
@@ -2006,7 +2087,7 @@ unrolled.
 The unroll count can be specified explicitly with ``unroll_count(_value_)`` where
 _value_ is a positive integer. If this value is greater than the trip count the
 loop will be fully unrolled. Otherwise the loop is partially unrolled subject
-to the same code size limit as with ``unroll(full)``.
+to the same code size limit as with ``unroll(enable)``.
 
 .. code-block:: c++
 
diff --git a/docs/LeakSanitizer.rst b/docs/LeakSanitizer.rst
index d4b4fa02d730..85918088ccdb 100644
--- a/docs/LeakSanitizer.rst
+++ b/docs/LeakSanitizer.rst
@@ -28,6 +28,4 @@ There are plans to support LeakSanitizer in :doc:`MemorySanitizer` builds.
 More Information
 ================
 
-`https://code.google.com/p/address-sanitizer/wiki/LeakSanitizer
-<https://code.google.com/p/address-sanitizer/wiki/LeakSanitizer>`_
-
+`<https://github.com/google/sanitizers/wiki/AddressSanitizerLeakSanitizer>`_
diff --git a/docs/LibASTMatchersReference.html b/docs/LibASTMatchersReference.html
index a555bb339fc1..48f061c60c73 100644
--- a/docs/LibASTMatchersReference.html
+++ b/docs/LibASTMatchersReference.html
@@ -100,8 +100,8 @@ recordDecl(decl().bind("id"), hasName("::MyClass"))
 <tr style="text-align:left"><th>Return type</th><th>Name</th><th>Parameters</th></tr>
 <!-- START_DECL_MATCHERS -->
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;</td><td class="name" onclick="toggle('ctorInitializer0')"><a name="ctorInitializer0Anchor">ctorInitializer</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="ctorInitializer0"><pre>Matches constructor initializers.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;</td><td class="name" onclick="toggle('cxxCtorInitializer0')"><a name="cxxCtorInitializer0Anchor">cxxCtorInitializer</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxCtorInitializer0"><pre>Matches constructor initializers.
 
 Examples matches i(42).
   class C {
@@ -144,8 +144,8 @@ classTemplateSpecializationDecl()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('constructorDecl0')"><a name="constructorDecl0Anchor">constructorDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="constructorDecl0"><pre>Matches C++ constructor declarations.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('cxxConstructorDecl0')"><a name="cxxConstructorDecl0Anchor">cxxConstructorDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxConstructorDecl0"><pre>Matches C++ constructor declarations.
 
 Example matches Foo::Foo() and Foo::Foo(int)
   class Foo {
@@ -157,6 +157,42 @@ Example matches Foo::Foo() and Foo::Foo(int)
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('cxxConversionDecl0')"><a name="cxxConversionDecl0Anchor">cxxConversionDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConversionDecl.html">CXXConversionDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxConversionDecl0"><pre>Matches conversion operator declarations.
+
+Example matches the operator.
+  class X { operator int() const; };
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('cxxDestructorDecl0')"><a name="cxxDestructorDecl0Anchor">cxxDestructorDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDestructorDecl.html">CXXDestructorDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxDestructorDecl0"><pre>Matches explicit C++ destructor declarations.
+
+Example matches Foo::~Foo()
+  class Foo {
+   public:
+    virtual ~Foo();
+  };
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('cxxMethodDecl0')"><a name="cxxMethodDecl0Anchor">cxxMethodDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxMethodDecl0"><pre>Matches method declarations.
+
+Example matches y
+  class X { void y(); };
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('cxxRecordDecl0')"><a name="cxxRecordDecl0Anchor">cxxRecordDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxRecordDecl0"><pre>Matches C++ class declarations.
+
+Example matches X, Z
+  class X;
+  template&lt;class T&gt; class Z {};
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('decl0')"><a name="decl0Anchor">decl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="decl0"><pre>Matches declarations.
 
@@ -179,17 +215,6 @@ declaratorDecl()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('destructorDecl0')"><a name="destructorDecl0Anchor">destructorDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDestructorDecl.html">CXXDestructorDecl</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="destructorDecl0"><pre>Matches explicit C++ destructor declarations.
-
-Example matches Foo::~Foo()
-  class Foo {
-   public:
-    virtual ~Foo();
-  };
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('enumConstantDecl0')"><a name="enumConstantDecl0Anchor">enumConstantDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1EnumConstantDecl.html">EnumConstantDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="enumConstantDecl0"><pre>Matches enum constants.
 
@@ -256,14 +281,6 @@ linkageSpecDecl()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('methodDecl0')"><a name="methodDecl0Anchor">methodDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="methodDecl0"><pre>Matches method declarations.
-
-Example matches y
-  class X { void y(); };
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('namedDecl0')"><a name="namedDecl0Anchor">namedDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="namedDecl0"><pre>Matches a declaration of anything that could have a name.
 
@@ -277,6 +294,17 @@ Example matches X, S, the anonymous union type, i, and U;
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('namespaceAliasDecl0')"><a name="namespaceAliasDecl0Anchor">namespaceAliasDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NamespaceAliasDecl.html">NamespaceAliasDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="namespaceAliasDecl0"><pre>Matches a declaration of a namespace alias.
+
+Given
+  namespace test {}
+  namespace alias = ::test;
+namespaceAliasDecl()
+  matches "namespace alias" but not "namespace test"
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('namespaceDecl0')"><a name="namespaceDecl0Anchor">namespaceDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NamespaceDecl.html">NamespaceDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="namespaceDecl0"><pre>Matches a declaration of a namespace.
 
@@ -288,6 +316,25 @@ namespaceDecl()
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('nonTypeTemplateParmDecl0')"><a name="nonTypeTemplateParmDecl0Anchor">nonTypeTemplateParmDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NonTypeTemplateParmDecl.html">NonTypeTemplateParmDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="nonTypeTemplateParmDecl0"><pre>Matches non-type template parameter declarations.
+
+Given
+  template &lt;typename T, int N&gt; struct C {};
+nonTypeTemplateParmDecl()
+  matches 'N', but not 'T'.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('objcInterfaceDecl0')"><a name="objcInterfaceDecl0Anchor">objcInterfaceDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCInterfaceDecl.html">ObjCInterfaceDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcInterfaceDecl0"><pre>Matches Objective-C interface declarations.
+
+Example matches Foo
+  @interface Foo
+  @end
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('parmVarDecl0')"><a name="parmVarDecl0Anchor">parmVarDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ParmVarDecl.html">ParmVarDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="parmVarDecl0"><pre>Matches parameter variable declarations.
 
@@ -298,12 +345,39 @@ parmVarDecl()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('recordDecl0')"><a name="recordDecl0Anchor">recordDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="recordDecl0"><pre>Matches C++ class declarations.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('recordDecl0')"><a name="recordDecl0Anchor">recordDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordDecl.html">RecordDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="recordDecl0"><pre>Matches class, struct, and union declarations.
 
-Example matches X, Z
+Example matches X, Z, U, and S
   class X;
   template&lt;class T&gt; class Z {};
+  struct S {};
+  union U {};
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('staticAssertDecl0')"><a name="staticAssertDecl0Anchor">staticAssertDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1StaticAssertDecl.html">StaticAssertDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="staticAssertDecl0"><pre>Matches a C++ static_assert declaration.
+
+Example:
+  staticAssertExpr()
+matches
+  static_assert(sizeof(S) == sizeof(int))
+in
+  struct S {
+    int x;
+  };
+  static_assert(sizeof(S) == sizeof(int));
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('templateTypeParmDecl0')"><a name="templateTypeParmDecl0Anchor">templateTypeParmDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateTypeParmDecl.html">TemplateTypeParmDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="templateTypeParmDecl0"><pre>Matches template type parameter declarations.
+
+Given
+  template &lt;typename T, int N&gt; struct C {};
+templateTypeParmDecl()
+  matches 'T', but not 'N'.
 </pre></td></tr>
 
 
@@ -330,6 +404,22 @@ typedefDecl()
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('unresolvedUsingTypenameDecl0')"><a name="unresolvedUsingTypenameDecl0Anchor">unresolvedUsingTypenameDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1UnresolvedUsingTypenameDecl.html">UnresolvedUsingTypenameDecl</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="unresolvedUsingTypenameDecl0"><pre>Matches unresolved using value declarations that involve the
+typename.
+
+Given
+  template &lt;typename T&gt;
+  struct Base { typedef T Foo; };
+
+  template&lt;typename T&gt;
+  struct S : private Base&lt;T&gt; {
+    using typename Base&lt;T&gt;::Foo;
+  };
+unresolvedUsingTypenameDecl()
+  matches using Base&lt;T&gt;::Foo </pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('unresolvedUsingValueDecl0')"><a name="unresolvedUsingValueDecl0Anchor">unresolvedUsingValueDecl</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1UnresolvedUsingValueDecl.html">UnresolvedUsingValueDecl</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="unresolvedUsingValueDecl0"><pre>Matches unresolved using value declarations.
 
@@ -407,14 +497,6 @@ nestedNameSpecifier()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('CUDAKernelCallExpr0')"><a name="CUDAKernelCallExpr0Anchor">CUDAKernelCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CUDAKernelCallExpr.html">CUDAKernelCallExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="CUDAKernelCallExpr0"><pre>Matches CUDA kernel call expression.
-
-Example matches,
-  kernel&lt;&lt;&lt;i,j&gt;&gt;&gt;();
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('arraySubscriptExpr0')"><a name="arraySubscriptExpr0Anchor">arraySubscriptExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="arraySubscriptExpr0"><pre>Matches array subscript expressions.
 
@@ -443,24 +525,6 @@ Example matches a || b
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('bindTemporaryExpr0')"><a name="bindTemporaryExpr0Anchor">bindTemporaryExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBindTemporaryExpr.html">CXXBindTemporaryExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="bindTemporaryExpr0"><pre>Matches nodes where temporaries are created.
-
-Example matches FunctionTakesString(GetStringByValue())
-    (matcher = bindTemporaryExpr())
-  FunctionTakesString(GetStringByValue());
-  FunctionTakesStringByPointer(GetStringPointer());
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('boolLiteral0')"><a name="boolLiteral0Anchor">boolLiteral</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="boolLiteral0"><pre>Matches bool literals.
-
-Example matches true
-  true
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('breakStmt0')"><a name="breakStmt0Anchor">breakStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1BreakStmt.html">BreakStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="breakStmt0"><pre>Matches break statements.
 
@@ -512,15 +576,6 @@ but does not match
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('catchStmt0')"><a name="catchStmt0Anchor">catchStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="catchStmt0"><pre>Matches catch statements.
-
-  try {} catch(int i) {}
-catchStmt()
-  matches 'catch(int i)'
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('characterLiteral0')"><a name="characterLiteral0Anchor">characterLiteral</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="characterLiteral0"><pre>Matches character literals (also matches wchar_t).
 
@@ -556,8 +611,53 @@ Example matches a ? b : c
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('constCastExpr0')"><a name="constCastExpr0Anchor">constCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstCastExpr.html">CXXConstCastExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="constCastExpr0"><pre>Matches a const_cast expression.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('continueStmt0')"><a name="continueStmt0Anchor">continueStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ContinueStmt.html">ContinueStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="continueStmt0"><pre>Matches continue statements.
+
+Given
+  while (true) { continue; }
+continueStmt()
+  matches 'continue'
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cudaKernelCallExpr0')"><a name="cudaKernelCallExpr0Anchor">cudaKernelCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CUDAKernelCallExpr.html">CUDAKernelCallExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cudaKernelCallExpr0"><pre>Matches CUDA kernel call expression.
+
+Example matches,
+  kernel&lt;&lt;&lt;i,j&gt;&gt;&gt;();
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxBindTemporaryExpr0')"><a name="cxxBindTemporaryExpr0Anchor">cxxBindTemporaryExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBindTemporaryExpr.html">CXXBindTemporaryExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxBindTemporaryExpr0"><pre>Matches nodes where temporaries are created.
+
+Example matches FunctionTakesString(GetStringByValue())
+    (matcher = cxxBindTemporaryExpr())
+  FunctionTakesString(GetStringByValue());
+  FunctionTakesStringByPointer(GetStringPointer());
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxBoolLiteral0')"><a name="cxxBoolLiteral0Anchor">cxxBoolLiteral</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXBoolLiteralExpr.html">CXXBoolLiteralExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxBoolLiteral0"><pre>Matches bool literals.
+
+Example matches true
+  true
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxCatchStmt0')"><a name="cxxCatchStmt0Anchor">cxxCatchStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxCatchStmt0"><pre>Matches catch statements.
+
+  try {} catch(int i) {}
+cxxCatchStmt()
+  matches 'catch(int i)'
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxConstCastExpr0')"><a name="cxxConstCastExpr0Anchor">cxxConstCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstCastExpr.html">CXXConstCastExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxConstCastExpr0"><pre>Matches a const_cast expression.
 
 Example: Matches const_cast&lt;int*&gt;(&amp;r) in
   int n = 42;
@@ -566,11 +666,11 @@ Example: Matches const_cast&lt;int*&gt;(&amp;r) in
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('constructExpr0')"><a name="constructExpr0Anchor">constructExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="constructExpr0"><pre>Matches constructor call expressions (including implicit ones).
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxConstructExpr0')"><a name="cxxConstructExpr0Anchor">cxxConstructExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructExpr.html">CXXConstructExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxConstructExpr0"><pre>Matches constructor call expressions (including implicit ones).
 
 Example matches string(ptr, n) and ptr within arguments of f
-    (matcher = constructExpr())
+    (matcher = cxxConstructExpr())
   void f(const string &amp;a, const string &amp;b);
   char *ptr;
   int n;
@@ -578,13 +678,172 @@ Example matches string(ptr, n) and ptr within arguments of f
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('continueStmt0')"><a name="continueStmt0Anchor">continueStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ContinueStmt.html">ContinueStmt</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="continueStmt0"><pre>Matches continue statements.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxDefaultArgExpr0')"><a name="cxxDefaultArgExpr0Anchor">cxxDefaultArgExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDefaultArgExpr.html">CXXDefaultArgExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxDefaultArgExpr0"><pre>Matches the value of a default argument at the call site.
+
+Example matches the CXXDefaultArgExpr placeholder inserted for the
+    default value of the second parameter in the call expression f(42)
+    (matcher = cxxDefaultArgExpr())
+  void f(int x, int y = 0);
+  f(42);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxDeleteExpr0')"><a name="cxxDeleteExpr0Anchor">cxxDeleteExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDeleteExpr.html">CXXDeleteExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxDeleteExpr0"><pre>Matches delete expressions.
 
 Given
-  while (true) { continue; }
-continueStmt()
-  matches 'continue'
+  delete X;
+cxxDeleteExpr()
+  matches 'delete X'.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxDynamicCastExpr0')"><a name="cxxDynamicCastExpr0Anchor">cxxDynamicCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDynamicCastExpr.html">CXXDynamicCastExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxDynamicCastExpr0"><pre>Matches a dynamic_cast expression.
+
+Example:
+  cxxDynamicCastExpr()
+matches
+  dynamic_cast&lt;D*&gt;(&amp;b);
+in
+  struct B { virtual ~B() {} }; struct D : B {};
+  B b;
+  D* p = dynamic_cast&lt;D*&gt;(&amp;b);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxForRangeStmt0')"><a name="cxxForRangeStmt0Anchor">cxxForRangeStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXForRangeStmt.html">CXXForRangeStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxForRangeStmt0"><pre>Matches range-based for statements.
+
+cxxForRangeStmt() matches 'for (auto a : i)'
+  int i[] =  {1, 2, 3}; for (auto a : i);
+  for(int j = 0; j &lt; 5; ++j);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxFunctionalCastExpr0')"><a name="cxxFunctionalCastExpr0Anchor">cxxFunctionalCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXFunctionalCastExpr.html">CXXFunctionalCastExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxFunctionalCastExpr0"><pre>Matches functional cast expressions
+
+Example: Matches Foo(bar);
+  Foo f = bar;
+  Foo g = (Foo) bar;
+  Foo h = Foo(bar);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxMemberCallExpr0')"><a name="cxxMemberCallExpr0Anchor">cxxMemberCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMemberCallExpr.html">CXXMemberCallExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxMemberCallExpr0"><pre>Matches member call expressions.
+
+Example matches x.y()
+  X x;
+  x.y();
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxNewExpr0')"><a name="cxxNewExpr0Anchor">cxxNewExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxNewExpr0"><pre>Matches new expressions.
+
+Given
+  new X;
+cxxNewExpr()
+  matches 'new X'.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxNullPtrLiteralExpr0')"><a name="cxxNullPtrLiteralExpr0Anchor">cxxNullPtrLiteralExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNullPtrLiteralExpr.html">CXXNullPtrLiteralExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxNullPtrLiteralExpr0"><pre>Matches nullptr literal.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxOperatorCallExpr0')"><a name="cxxOperatorCallExpr0Anchor">cxxOperatorCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxOperatorCallExpr0"><pre>Matches overloaded operator calls.
+
+Note that if an operator isn't overloaded, it won't match. Instead, use
+binaryOperator matcher.
+Currently it does not match operators such as new delete.
+FIXME: figure out why these do not match?
+
+Example matches both operator&lt;&lt;((o &lt;&lt; b), c) and operator&lt;&lt;(o, b)
+    (matcher = cxxOperatorCallExpr())
+  ostream &amp;operator&lt;&lt; (ostream &amp;out, int i) { };
+  ostream &amp;o; int b = 1, c = 1;
+  o &lt;&lt; b &lt;&lt; c;
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxReinterpretCastExpr0')"><a name="cxxReinterpretCastExpr0Anchor">cxxReinterpretCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXReinterpretCastExpr.html">CXXReinterpretCastExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxReinterpretCastExpr0"><pre>Matches a reinterpret_cast expression.
+
+Either the source expression or the destination type can be matched
+using has(), but hasDestinationType() is more specific and can be
+more readable.
+
+Example matches reinterpret_cast&lt;char*&gt;(&amp;p) in
+  void* p = reinterpret_cast&lt;char*&gt;(&amp;p);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxStaticCastExpr0')"><a name="cxxStaticCastExpr0Anchor">cxxStaticCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXStaticCastExpr.html">CXXStaticCastExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxStaticCastExpr0"><pre>Matches a C++ static_cast expression.
+
+hasDestinationType
+reinterpretCast
+
+Example:
+  cxxStaticCastExpr()
+matches
+  static_cast&lt;long&gt;(8)
+in
+  long eight(static_cast&lt;long&gt;(8));
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxTemporaryObjectExpr0')"><a name="cxxTemporaryObjectExpr0Anchor">cxxTemporaryObjectExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXTemporaryObjectExpr.html">CXXTemporaryObjectExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxTemporaryObjectExpr0"><pre>Matches functional cast expressions having N != 1 arguments
+
+Example: Matches Foo(bar, bar)
+  Foo h = Foo(bar, bar);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxThisExpr0')"><a name="cxxThisExpr0Anchor">cxxThisExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThisExpr.html">CXXThisExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxThisExpr0"><pre>Matches implicit and explicit this expressions.
+
+Example matches the implicit this expression in "return i".
+    (matcher = cxxThisExpr())
+struct foo {
+  int i;
+  int f() { return i; }
+};
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxThrowExpr0')"><a name="cxxThrowExpr0Anchor">cxxThrowExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThrowExpr.html">CXXThrowExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxThrowExpr0"><pre>Matches throw expressions.
+
+  try { throw 5; } catch(int i) {}
+cxxThrowExpr()
+  matches 'throw 5'
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxTryStmt0')"><a name="cxxTryStmt0Anchor">cxxTryStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXTryStmt.html">CXXTryStmt</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxTryStmt0"><pre>Matches try statements.
+
+  try {} catch(int i) {}
+cxxTryStmt()
+  matches 'try {}'
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('cxxUnresolvedConstructExpr0')"><a name="cxxUnresolvedConstructExpr0Anchor">cxxUnresolvedConstructExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXUnresolvedConstructExpr.html">CXXUnresolvedConstructExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="cxxUnresolvedConstructExpr0"><pre>Matches unresolved constructor call expressions.
+
+Example matches T(t) in return statement of f
+    (matcher = cxxUnresolvedConstructExpr())
+  template &lt;typename T&gt;
+  void f(const T&amp; t) { return T(t); }
 </pre></td></tr>
 
 
@@ -607,17 +866,6 @@ declStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('defaultArgExpr0')"><a name="defaultArgExpr0Anchor">defaultArgExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDefaultArgExpr.html">CXXDefaultArgExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="defaultArgExpr0"><pre>Matches the value of a default argument at the call site.
-
-Example matches the CXXDefaultArgExpr placeholder inserted for the
-    default value of the second parameter in the call expression f(42)
-    (matcher = defaultArgExpr())
-  void f(int x, int y = 0);
-  f(42);
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('defaultStmt0')"><a name="defaultStmt0Anchor">defaultStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DefaultStmt.html">DefaultStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="defaultStmt0"><pre>Matches default statements inside switch statements.
 
@@ -628,16 +876,6 @@ defaultStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('deleteExpr0')"><a name="deleteExpr0Anchor">deleteExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDeleteExpr.html">CXXDeleteExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="deleteExpr0"><pre>Matches delete expressions.
-
-Given
-  delete X;
-deleteExpr()
-  matches 'delete X'.
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('doStmt0')"><a name="doStmt0Anchor">doStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DoStmt.html">DoStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="doStmt0"><pre>Matches do statements.
 
@@ -648,20 +886,6 @@ doStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('dynamicCastExpr0')"><a name="dynamicCastExpr0Anchor">dynamicCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXDynamicCastExpr.html">CXXDynamicCastExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="dynamicCastExpr0"><pre>Matches a dynamic_cast expression.
-
-Example:
-  dynamicCastExpr()
-matches
-  dynamic_cast&lt;D*&gt;(&amp;b);
-in
-  struct B { virtual ~B() {} }; struct D : B {};
-  B b;
-  D* p = dynamic_cast&lt;D*&gt;(&amp;b);
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('explicitCastExpr0')"><a name="explicitCastExpr0Anchor">explicitCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ExplicitCastExpr.html">ExplicitCastExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="explicitCastExpr0"><pre>Matches explicit cast expressions.
 
@@ -709,15 +933,6 @@ Does not match implicit conversions such as
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('forRangeStmt0')"><a name="forRangeStmt0Anchor">forRangeStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXForRangeStmt.html">CXXForRangeStmt</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="forRangeStmt0"><pre>Matches range-based for statements.
-
-forRangeStmt() matches 'for (auto a : i)'
-  int i[] =  {1, 2, 3}; for (auto a : i);
-  for(int j = 0; j &lt; 5; ++j);
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('forStmt0')"><a name="forStmt0Anchor">forStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ForStmt.html">ForStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="forStmt0"><pre>Matches for statements.
 
@@ -727,13 +942,8 @@ Example matches 'for (;;) {}'
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('functionalCastExpr0')"><a name="functionalCastExpr0Anchor">functionalCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXFunctionalCastExpr.html">CXXFunctionalCastExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="functionalCastExpr0"><pre>Matches functional cast expressions
-
-Example: Matches Foo(bar);
-  Foo f = bar;
-  Foo g = (Foo) bar;
-  Foo h = Foo(bar);
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('gnuNullExpr0')"><a name="gnuNullExpr0Anchor">gnuNullExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1GNUNullExpr.html">GNUNullExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="gnuNullExpr0"><pre>Matches GNU __null expression.
 </pre></td></tr>
 
 
@@ -819,15 +1029,6 @@ but does not match
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('memberCallExpr0')"><a name="memberCallExpr0Anchor">memberCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMemberCallExpr.html">CXXMemberCallExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="memberCallExpr0"><pre>Matches member call expressions.
-
-Example matches x.y()
-  X x;
-  x.y();
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('memberExpr0')"><a name="memberExpr0Anchor">memberExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1MemberExpr.html">MemberExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="memberExpr0"><pre>Matches member expressions.
 
@@ -841,21 +1042,6 @@ memberExpr()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('newExpr0')"><a name="newExpr0Anchor">newExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNewExpr.html">CXXNewExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="newExpr0"><pre>Matches new expressions.
-
-Given
-  new X;
-newExpr()
-  matches 'new X'.
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('nullPtrLiteralExpr0')"><a name="nullPtrLiteralExpr0Anchor">nullPtrLiteralExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXNullPtrLiteralExpr.html">CXXNullPtrLiteralExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="nullPtrLiteralExpr0"><pre>Matches nullptr literal.
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('nullStmt0')"><a name="nullStmt0Anchor">nullStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NullStmt.html">NullStmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="nullStmt0"><pre>Matches null statements.
 
@@ -865,31 +1051,14 @@ nullStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('operatorCallExpr0')"><a name="operatorCallExpr0Anchor">operatorCallExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="operatorCallExpr0"><pre>Matches overloaded operator calls.
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('objcMessageExpr0')"><a name="objcMessageExpr0Anchor">objcMessageExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcMessageExpr0"><pre>Matches ObjectiveC Message invocation expressions.
 
-Note that if an operator isn't overloaded, it won't match. Instead, use
-binaryOperator matcher.
-Currently it does not match operators such as new delete.
-FIXME: figure out why these do not match?
-
-Example matches both operator&lt;&lt;((o &lt;&lt; b), c) and operator&lt;&lt;(o, b)
-    (matcher = operatorCallExpr())
-  ostream &amp;operator&lt;&lt; (ostream &amp;out, int i) { };
-  ostream &amp;o; int b = 1, c = 1;
-  o &lt;&lt; b &lt;&lt; c;
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('reinterpretCastExpr0')"><a name="reinterpretCastExpr0Anchor">reinterpretCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXReinterpretCastExpr.html">CXXReinterpretCastExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="reinterpretCastExpr0"><pre>Matches a reinterpret_cast expression.
-
-Either the source expression or the destination type can be matched
-using has(), but hasDestinationType() is more specific and can be
-more readable.
-
-Example matches reinterpret_cast&lt;char*&gt;(&amp;p) in
-  void* p = reinterpret_cast&lt;char*&gt;(&amp;p);
+The innermost message send invokes the "alloc" class method on the
+NSString class, while the outermost message send invokes the
+"initWithString" instance method on the object returned from
+NSString's "alloc". This matcher should match both message sends.
+  [[NSString alloc] initWithString:@"Hello"]
 </pre></td></tr>
 
 
@@ -903,21 +1072,6 @@ returnStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('staticCastExpr0')"><a name="staticCastExpr0Anchor">staticCastExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXStaticCastExpr.html">CXXStaticCastExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="staticCastExpr0"><pre>Matches a C++ static_cast expression.
-
-hasDestinationType
-reinterpretCast
-
-Example:
-  staticCastExpr()
-matches
-  static_cast&lt;long&gt;(8)
-in
-  long eight(static_cast&lt;long&gt;(8));
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('stmt0')"><a name="stmt0Anchor">stmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="stmt0"><pre>Matches statements.
 
@@ -968,44 +1122,6 @@ switchStmt()
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('temporaryObjectExpr0')"><a name="temporaryObjectExpr0Anchor">temporaryObjectExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXTemporaryObjectExpr.html">CXXTemporaryObjectExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="temporaryObjectExpr0"><pre>Matches functional cast expressions having N != 1 arguments
-
-Example: Matches Foo(bar, bar)
-  Foo h = Foo(bar, bar);
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('thisExpr0')"><a name="thisExpr0Anchor">thisExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThisExpr.html">CXXThisExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="thisExpr0"><pre>Matches implicit and explicit this expressions.
-
-Example matches the implicit this expression in "return i".
-    (matcher = thisExpr())
-struct foo {
-  int i;
-  int f() { return i; }
-};
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('throwExpr0')"><a name="throwExpr0Anchor">throwExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXThrowExpr.html">CXXThrowExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="throwExpr0"><pre>Matches throw expressions.
-
-  try { throw 5; } catch(int i) {}
-throwExpr()
-  matches 'throw 5'
-</pre></td></tr>
-
-
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('tryStmt0')"><a name="tryStmt0Anchor">tryStmt</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXTryStmt.html">CXXTryStmt</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="tryStmt0"><pre>Matches try statements.
-
-  try {} catch(int i) {}
-tryStmt()
-  matches 'try {}'
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('unaryExprOrTypeTraitExpr0')"><a name="unaryExprOrTypeTraitExpr0Anchor">unaryExprOrTypeTraitExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html">UnaryExprOrTypeTraitExpr</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="unaryExprOrTypeTraitExpr0"><pre>Matches sizeof (C99), alignof (C++11) and vec_step (OpenCL)
 
@@ -1025,16 +1141,6 @@ Example matches !a
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('unresolvedConstructExpr0')"><a name="unresolvedConstructExpr0Anchor">unresolvedConstructExpr</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXUnresolvedConstructExpr.html">CXXUnresolvedConstructExpr</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="unresolvedConstructExpr0"><pre>Matches unresolved constructor call expressions.
-
-Example matches T(t) in return statement of f
-    (matcher = unresolvedConstructExpr())
-  template &lt;typename T&gt;
-  void f(const T&amp; t) { return T(t); }
-</pre></td></tr>
-
-
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('userDefinedLiteral0')"><a name="userDefinedLiteral0Anchor">userDefinedLiteral</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1UserDefinedLiteral.html">UserDefinedLiteral</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="userDefinedLiteral0"><pre>Matches user defined literal operator call.
 
@@ -1148,6 +1254,18 @@ constantArrayType()
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('decayedType0')"><a name="decayedType0Anchor">decayedType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DecayedType.html">DecayedType</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="decayedType0"><pre>Matches decayed type
+Example matches i[] in declaration of f.
+    (matcher = valueDecl(hasType(decayedType(hasDecayedType(pointerType())))))
+Example matches i[1].
+    (matcher = expr(hasType(decayedType(hasDecayedType(pointerType())))))
+  void f(int i[]) {
+    i[1] = 0;
+  }
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('dependentSizedArrayType0')"><a name="dependentSizedArrayType0Anchor">dependentSizedArrayType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DependentSizedArrayType.html">DependentSizedArrayType</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="dependentSizedArrayType0"><pre>Matches C++ arrays whose size is a value-dependent expression.
 
@@ -1204,6 +1322,18 @@ incompleteArrayType()
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('injectedClassNameType0')"><a name="injectedClassNameType0Anchor">injectedClassNameType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1InjectedClassNameType.html">InjectedClassNameType</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="injectedClassNameType0"><pre>Matches injected class name types.
+
+Example matches S s, but not S&lt;T&gt; s.
+    (matcher = parmVarDecl(hasType(injectedClassNameType())))
+  template &lt;typename T&gt; struct S {
+    void f(S s);
+    void g(S&lt;T&gt; s);
+  };
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('lValueReferenceType0')"><a name="lValueReferenceType0Anchor">lValueReferenceType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1LValueReferenceType.html">LValueReferenceType</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="lValueReferenceType0"><pre>Matches lvalue reference types.
 
@@ -1231,6 +1361,21 @@ memberPointerType()
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('objcObjectPointerType0')"><a name="objcObjectPointerType0Anchor">objcObjectPointerType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCObjectPointerType.html">ObjCObjectPointerType</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="objcObjectPointerType0"><pre>Matches an Objective-C object pointer type, which is different from
+a pointer type, despite being syntactically similar.
+
+Given
+  int *a;
+
+  @interface Foo
+  @end
+  Foo *f;
+pointerType()
+  matches "Foo *f", but does not match "int *a".
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('parenType0')"><a name="parenType0Anchor">parenType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenType.html">ParenType</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="parenType0"><pre>Matches ParenType nodes.
 
@@ -1244,14 +1389,19 @@ array_of_ptrs.
 
 
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('pointerType0')"><a name="pointerType0Anchor">pointerType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1PointerType.html">PointerType</a>&gt;...</td></tr>
-<tr><td colspan="4" class="doc" id="pointerType0"><pre>Matches pointer types.
+<tr><td colspan="4" class="doc" id="pointerType0"><pre>Matches pointer types, but does not match Objective-C object pointer
+types.
 
 Given
   int *a;
   int &amp;b = *a;
   int c = 5;
+
+  @interface Foo
+  @end
+  Foo *f;
 pointerType()
-  matches "int *a"
+  matches "int *a", but does not match "Foo *f".
 </pre></td></tr>
 
 
@@ -1303,6 +1453,20 @@ referenceType() matches the types of b, c, d, e, and f.
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('substTemplateTypeParmType0')"><a name="substTemplateTypeParmType0Anchor">substTemplateTypeParmType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1SubstTemplateTypeParmType.html">SubstTemplateTypeParmType</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="substTemplateTypeParmType0"><pre>Matches types that represent the result of substituting a type for a
+template type parameter.
+
+Given
+  template &lt;typename T&gt;
+  void F(T t) {
+    int i = 1 + t;
+  }
+
+substTemplateTypeParmType() matches the type of 't' but not '1'
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('templateSpecializationType0')"><a name="templateSpecializationType0Anchor">templateSpecializationType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateSpecializationType.html">TemplateSpecializationType</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="templateSpecializationType0"><pre>Matches template specialization types.
 
@@ -1318,6 +1482,15 @@ instantiation in A and the type of the variable declaration in B.
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('templateTypeParmType0')"><a name="templateTypeParmType0Anchor">templateTypeParmType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TemplateTypeParmType.html">TemplateTypeParmType</a>&gt;...</td></tr>
+<tr><td colspan="4" class="doc" id="templateTypeParmType0"><pre>Matches template type parameter types.
+
+Example matches T, but not int.
+    (matcher = templateTypeParmType())
+  template &lt;typename T&gt; void f(int i);
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('type0')"><a name="type0Anchor">type</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;...</td></tr>
 <tr><td colspan="4" class="doc" id="type0"><pre>Matches Types in the clang AST.
 </pre></td></tr>
@@ -1407,7 +1580,7 @@ Usable as: Any Matcher
 <tr><td>Matcher&lt;*&gt;</td><td class="name" onclick="toggle('unless0')"><a name="unless0Anchor">unless</a></td><td>Matcher&lt;*&gt;</td></tr>
 <tr><td colspan="4" class="doc" id="unless0"><pre>Matches if the provided matcher does not match.
 
-Example matches Y (matcher = recordDecl(unless(hasName("X"))))
+Example matches Y (matcher = cxxRecordDecl(unless(hasName("X"))))
   class X {};
   class Y {};
 
@@ -1427,21 +1600,27 @@ Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
 <tr><td>Matcher&lt;CXXBoolLiteral&gt;</td><td class="name" onclick="toggle('equals2')"><a name="equals2Anchor">equals</a></td><td>ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals2"><pre>Matches literals that are equal to the given value.
 
-Example matches true (matcher = boolLiteral(equals(true)))
+Example matches true (matcher = cxxBoolLiteral(equals(true)))
   true
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;, Matcher&lt;CXXBoolLiteral&gt;,
            Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;
 </pre></td></tr>
 
-<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>&gt;</td><td class="name" onclick="toggle('isCatchAll1')"><a name="isCatchAll1Anchor">isCatchAll</a></td><td></td></tr>
-<tr><td colspan="4" class="doc" id="isCatchAll1"><pre>Matches a C++ catch statement that has a handler that catches any exception type.
 
-Example matches catch(...) (matcher = catchStmt(isCatchAll()))
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCatchStmt.html">CXXCatchStmt</a>&gt;</td><td class="name" onclick="toggle('isCatchAll0')"><a name="isCatchAll0Anchor">isCatchAll</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isCatchAll0"><pre>Matches a C++ catch statement that has a catch-all handler.
+
+Given
   try {
-    // ...
-  } catch(...) {
+    ...
+  } catch (int) {
+    ...
+  } catch (...) {
+    ...
   }
+endcode
+cxxCatchStmt(isCatchAll()) matches catch(...) but not catch(int).
 </pre></td></tr>
 
 
@@ -1460,6 +1639,113 @@ Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;</td><td class="name" onclick="toggle('isCopyConstructor0')"><a name="isCopyConstructor0Anchor">isCopyConstructor</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isCopyConstructor0"><pre>Matches constructor declarations that are copy constructors.
+
+Given
+  struct S {
+    S(); #1
+    S(const S &amp;); #2
+    S(S &amp;&amp;); #3
+  };
+cxxConstructorDecl(isCopyConstructor()) will match #2, but not #1 or #3.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;</td><td class="name" onclick="toggle('isDefaultConstructor0')"><a name="isDefaultConstructor0Anchor">isDefaultConstructor</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isDefaultConstructor0"><pre>Matches constructor declarations that are default constructors.
+
+Given
+  struct S {
+    S(); #1
+    S(const S &amp;); #2
+    S(S &amp;&amp;); #3
+  };
+cxxConstructorDecl(isDefaultConstructor()) will match #1, but not #2 or #3.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;</td><td class="name" onclick="toggle('isExplicit0')"><a name="isExplicit0Anchor">isExplicit</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isExplicit0"><pre>Matches constructor and conversion declarations that are marked with
+the explicit keyword.
+
+Given
+  struct S {
+    S(int); #1
+    explicit S(double); #2
+    operator int(); #3
+    explicit operator bool(); #4
+  };
+cxxConstructorDecl(isExplicit()) will match #2, but not #1.
+cxxConversionDecl(isExplicit()) will match #4, but not #3.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConstructorDecl.html">CXXConstructorDecl</a>&gt;</td><td class="name" onclick="toggle('isMoveConstructor0')"><a name="isMoveConstructor0Anchor">isMoveConstructor</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isMoveConstructor0"><pre>Matches constructor declarations that are move constructors.
+
+Given
+  struct S {
+    S(); #1
+    S(const S &amp;); #2
+    S(S &amp;&amp;); #3
+  };
+cxxConstructorDecl(isMoveConstructor()) will match #3, but not #1 or #2.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXConversionDecl.html">CXXConversionDecl</a>&gt;</td><td class="name" onclick="toggle('isExplicit1')"><a name="isExplicit1Anchor">isExplicit</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isExplicit1"><pre>Matches constructor and conversion declarations that are marked with
+the explicit keyword.
+
+Given
+  struct S {
+    S(int); #1
+    explicit S(double); #2
+    operator int(); #3
+    explicit operator bool(); #4
+  };
+cxxConstructorDecl(isExplicit()) will match #2, but not #1.
+cxxConversionDecl(isExplicit()) will match #4, but not #3.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;</td><td class="name" onclick="toggle('isBaseInitializer0')"><a name="isBaseInitializer0Anchor">isBaseInitializer</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isBaseInitializer0"><pre>Matches a constructor initializer if it is initializing a base, as
+opposed to a member.
+
+Given
+  struct B {};
+  struct D : B {
+    int I;
+    D(int i) : I(i) {}
+  };
+  struct E : B {
+    E() : B() {}
+  };
+cxxConstructorDecl(hasAnyConstructorInitializer(isBaseInitializer()))
+  will match E(), but not match D(int).
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;</td><td class="name" onclick="toggle('isMemberInitializer0')"><a name="isMemberInitializer0Anchor">isMemberInitializer</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isMemberInitializer0"><pre>Matches a constructor initializer if it is initializing a member, as
+opposed to a base.
+
+Given
+  struct B {};
+  struct D : B {
+    int I;
+    D(int i) : I(i) {}
+  };
+  struct E : B {
+    E() : B() {}
+  };
+cxxConstructorDecl(hasAnyConstructorInitializer(isMemberInitializer()))
+  will match D(int), but not match E().
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXCtorInitializer.html">CXXCtorInitializer</a>&gt;</td><td class="name" onclick="toggle('isWritten0')"><a name="isWritten0Anchor">isWritten</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isWritten0"><pre>Matches a constructor initializer if it is explicitly written in
 code (as opposed to implicitly added by the compiler).
@@ -1470,7 +1756,7 @@ Given
     Foo(int) : foo_("A") { }
     string foo_;
   };
-constructorDecl(hasAnyConstructorInitializer(isWritten()))
+cxxConstructorDecl(hasAnyConstructorInitializer(isWritten()))
   will match Foo(int), but not Foo()
 </pre></td></tr>
 
@@ -1484,7 +1770,39 @@ struct A {
   void bar();
 };
 
-methodDecl(isConst()) matches A::foo() but not A::bar()
+cxxMethodDecl(isConst()) matches A::foo() but not A::bar()
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>&gt;</td><td class="name" onclick="toggle('isCopyAssignmentOperator0')"><a name="isCopyAssignmentOperator0Anchor">isCopyAssignmentOperator</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isCopyAssignmentOperator0"><pre>Matches if the given method declaration declares a copy assignment
+operator.
+
+Given
+struct A {
+  A &amp;operator=(const A &amp;);
+  A &amp;operator=(A &amp;&amp;);
+};
+
+cxxMethodDecl(isCopyAssignmentOperator()) matches the first method but not
+the second one.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMethodDecl.html">CXXMethodDecl</a>&gt;</td><td class="name" onclick="toggle('isFinal1')"><a name="isFinal1Anchor">isFinal</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isFinal1"><pre>Matches if the given method or class declaration is final.
+
+Given:
+  class A final {};
+
+  struct B {
+    virtual void f();
+  };
+
+  struct C : B {
+    void f() final;
+  };
+matches A and C::f, but not B, C, or B::f
 </pre></td></tr>
 
 
@@ -1540,9 +1858,10 @@ Given:
   A a;
   a &lt;&lt; a;   &lt;-- This matches
 
-operatorCallExpr(hasOverloadedOperatorName("&lt;&lt;"))) matches the specified
-line and recordDecl(hasMethod(hasOverloadedOperatorName("*"))) matches
-the declaration of A.
+cxxOperatorCallExpr(hasOverloadedOperatorName("&lt;&lt;"))) matches the
+specified line and
+cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")))
+matches the declaration of A.
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;
 </pre></td></tr>
@@ -1567,6 +1886,23 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Functi
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;</td><td class="name" onclick="toggle('isFinal0')"><a name="isFinal0Anchor">isFinal</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isFinal0"><pre>Matches if the given method or class declaration is final.
+
+Given:
+  class A final {};
+
+  struct B {
+    virtual void f();
+  };
+
+  struct C : B {
+    void f() final;
+  };
+matches A and C::f, but not B, C, or B::f
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;</td><td class="name" onclick="toggle('isSameOrDerivedFrom1')"><a name="isSameOrDerivedFrom1Anchor">isSameOrDerivedFrom</a></td><td>std::string BaseName</td></tr>
 <tr><td colspan="4" class="doc" id="isSameOrDerivedFrom1"><pre>Overloaded method as shortcut for
 isSameOrDerivedFrom(hasName(...)).
@@ -1581,13 +1917,13 @@ Given
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
 But given
   template &lt;typename T&gt;  class X {}; class A {};
   template &lt;&gt; class X&lt;A&gt; {}; X&lt;A&gt; x;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   does not match, as X&lt;A&gt; is an explicit template specialization.
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;
@@ -1607,7 +1943,7 @@ Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;</td><td class="name" onclick="toggle('equals3')"><a name="equals3Anchor">equals</a></td><td>ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals3"><pre>Matches literals that are equal to the given value.
 
-Example matches true (matcher = boolLiteral(equals(true)))
+Example matches true (matcher = cxxBoolLiteral(equals(true)))
   true
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;, Matcher&lt;CXXBoolLiteral&gt;,
@@ -1670,7 +2006,7 @@ Matches a node if it equals the node previously bound to ID.
 
 Given
   class X { int a; int b; };
-recordDecl(
+cxxRecordDecl(
     has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
     has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))
   matches the class X, as a and b have the same type.
@@ -1692,7 +2028,8 @@ and reference to that variable declaration within a compound statement.
 Given
   __attribute__((device)) void f() { ... }
 decl(hasAttr(clang::attr::CUDADevice)) matches the function declaration of
-f.
+f. If the matcher is use from clang-query, attr::Kind parameter should be
+passed as a quoted string. e.g., hasAttr("attr::CUDADevice").
 </pre></td></tr>
 
 
@@ -1701,7 +2038,7 @@ f.
 partially matching a given regex.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInFileMatching("AST.*"))
+    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
   #include "ASTMatcher.h"
   class X {};
 ASTMatcher.h:
@@ -1714,7 +2051,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('isExpansionInMainFile0')"><a name="isExpansionInMainFile0Anchor">isExpansionInMainFile</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInMainFile0"><pre>Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y (matcher = recordDecl(isExpansionInMainFile())
+Example matches X but not Y
+  (matcher = cxxRecordDecl(isExpansionInMainFile())
   #include &lt;Y.h&gt;
   class X {};
 Y.h:
@@ -1728,7 +2066,7 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td colspan="4" class="doc" id="isExpansionInSystemHeader0"><pre>Matches AST nodes that were expanded within system-header-files.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInSystemHeader())
+    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
   #include &lt;SystemHeader.h&gt;
   class X {};
 SystemHeader.h:
@@ -1789,7 +2127,7 @@ fieldDecl(isPublic())
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FloatingLiteral.html">FloatingLiteral</a>&gt;</td><td class="name" onclick="toggle('equals1')"><a name="equals1Anchor">equals</a></td><td>ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals1"><pre>Matches literals that are equal to the given value.
 
-Example matches true (matcher = boolLiteral(equals(true)))
+Example matches true (matcher = cxxBoolLiteral(equals(true)))
   true
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;, Matcher&lt;CXXBoolLiteral&gt;,
@@ -1809,14 +2147,28 @@ Given:
   A a;
   a &lt;&lt; a;   &lt;-- This matches
 
-operatorCallExpr(hasOverloadedOperatorName("&lt;&lt;"))) matches the specified
-line and recordDecl(hasMethod(hasOverloadedOperatorName("*"))) matches
-the declaration of A.
+cxxOperatorCallExpr(hasOverloadedOperatorName("&lt;&lt;"))) matches the
+specified line and
+cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")))
+matches the declaration of A.
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXOperatorCallExpr.html">CXXOperatorCallExpr</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isConstexpr1')"><a name="isConstexpr1Anchor">isConstexpr</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isConstexpr1"><pre>Matches constexpr variable and function declarations.
+
+Given:
+  constexpr int foo = 42;
+  constexpr int bar();
+varDecl(isConstexpr())
+  matches the declaration of foo.
+functionDecl(isConstexpr())
+  matches the declaration of bar.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition2')"><a name="isDefinition2Anchor">isDefinition</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isDefinition2"><pre>Matches if a declaration has a body attached.
 
@@ -1869,6 +2221,35 @@ functionDecl(isExternC())
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isInline1')"><a name="isInline1Anchor">isInline</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isInline1"><pre>Matches function and namespace declarations that are marked with
+the inline keyword.
+
+Given
+  inline void f();
+  void g();
+  namespace n {
+  inline namespace m {}
+  }
+functionDecl(isInline()) will match ::f().
+namespaceDecl(isInline()) will match n::m.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isNoThrow0')"><a name="isNoThrow0Anchor">isNoThrow</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isNoThrow0"><pre>Matches functions that have a non-throwing exception specification.
+
+Given:
+  void f();
+  void g() noexcept;
+  void h() throw();
+  void i() throw(int);
+  void j() noexcept(false);
+functionDecl(isNoThrow())
+  matches the declarations of g, and h, but not f, i or j.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isTemplateInstantiation0')"><a name="isTemplateInstantiation0Anchor">isTemplateInstantiation</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isTemplateInstantiation0"><pre>Matches template instantiations of function, class, or static
 member variable template instantiations.
@@ -1877,19 +2258,31 @@ Given
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
 But given
   template &lt;typename T&gt;  class X {}; class A {};
   template &lt;&gt; class X&lt;A&gt; {}; X&lt;A&gt; x;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   does not match, as X&lt;A&gt; is an explicit template specialization.
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isVariadic0')"><a name="isVariadic0Anchor">isVariadic</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isVariadic0"><pre>Matches if a function declaration is variadic.
+
+Example matches f, but not g or h. The function i will not match, even when
+compiled in C mode.
+  void f(...);
+  void g(int);
+  template &lt;typename... Ts&gt; void h(Ts...);
+  void i();
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('parameterCountIs0')"><a name="parameterCountIs0Anchor">parameterCountIs</a></td><td>unsigned N</td></tr>
 <tr><td colspan="4" class="doc" id="parameterCountIs0"><pre>Matches FunctionDecls that have a specific parameter count.
 
@@ -1904,7 +2297,7 @@ functionDecl(parameterCountIs(2))
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1IntegerLiteral.html">IntegerLiteral</a>&gt;</td><td class="name" onclick="toggle('equals0')"><a name="equals0Anchor">equals</a></td><td>ValueT  Value</td></tr>
 <tr><td colspan="4" class="doc" id="equals0"><pre>Matches literals that are equal to the given value.
 
-Example matches true (matcher = boolLiteral(equals(true)))
+Example matches true (matcher = cxxBoolLiteral(equals(true)))
   true
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CharacterLiteral.html">CharacterLiteral</a>&gt;, Matcher&lt;CXXBoolLiteral&gt;,
@@ -1960,13 +2353,113 @@ Example matches X (regexp is one of "::X", "^foo::.*X", among others)
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NamespaceDecl.html">NamespaceDecl</a>&gt;</td><td class="name" onclick="toggle('isAnonymous0')"><a name="isAnonymous0Anchor">isAnonymous</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isAnonymous0"><pre>Matches anonymous namespace declarations.
+
+Given
+  namespace n {
+  namespace {} #1
+  }
+namespaceDecl(isAnonymous()) will match #1 but not ::n.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1NamespaceDecl.html">NamespaceDecl</a>&gt;</td><td class="name" onclick="toggle('isInline0')"><a name="isInline0Anchor">isInline</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isInline0"><pre>Matches function and namespace declarations that are marked with
+the inline keyword.
+
+Given
+  inline void f();
+  void g();
+  namespace n {
+  inline namespace m {}
+  }
+functionDecl(isInline()) will match ::f().
+namespaceDecl(isInline()) will match n::m.
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('argumentCountIs2')"><a name="argumentCountIs2Anchor">argumentCountIs</a></td><td>unsigned N</td></tr>
+<tr><td colspan="4" class="doc" id="argumentCountIs2"><pre>Checks that a call expression or a constructor call expression has
+a specific number of arguments (including absent default arguments).
+
+Example matches f(0, 0) (matcher = callExpr(argumentCountIs(2)))
+  void f(int x, int y);
+  f(0, 0);
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasKeywordSelector0')"><a name="hasKeywordSelector0Anchor">hasKeywordSelector</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasKeywordSelector0"><pre>Matches when the selector is a keyword selector
+
+objCMessageExpr(hasKeywordSelector()) matches the generated setFrame
+message expression in
+
+  UIWebView *webView = ...;
+  CGRect bodyFrame = webView.frame;
+  bodyFrame.size.height = self.bodyContentHeight;
+  webView.frame = bodyFrame;
+      ^---- matches here
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasNullSelector0')"><a name="hasNullSelector0Anchor">hasNullSelector</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasNullSelector0"><pre>Matches when the selector is the empty selector
+
+Matches only when the selector of the objCMessageExpr is NULL. This may
+represent an error condition in the tree!
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasSelector0')"><a name="hasSelector0Anchor">hasSelector</a></td><td>std::string BaseName</td></tr>
+<tr><td colspan="4" class="doc" id="hasSelector0"><pre>Matches when BaseName == Selector.getAsString()
+
+ matcher = objCMessageExpr(hasSelector("loadHTMLString:baseURL:"));
+ matches the outer message expr in the code below, but NOT the message
+ invocation for self.bodyView.
+    [self.bodyView loadHTMLString:html baseURL:NULL];
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasUnarySelector0')"><a name="hasUnarySelector0Anchor">hasUnarySelector</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasUnarySelector0"><pre>Matches when the selector is a Unary Selector
+
+ matcher = objCMessageExpr(matchesSelector(hasUnarySelector());
+ matches self.bodyView in the code below, but NOT the outer message
+ invocation of "loadHTMLString:baseURL:".
+    [self.bodyView loadHTMLString:html baseURL:NULL];
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('matchesSelector0')"><a name="matchesSelector0Anchor">matchesSelector</a></td><td>std::string RegExp</td></tr>
+<tr><td colspan="4" class="doc" id="matchesSelector0"><pre>Matches ObjC selectors whose name contains
+a substring matched by the given RegExp.
+ matcher = objCMessageExpr(matchesSelector("loadHTMLStringmatches the outer message expr in the code below, but NOT the message
+ invocation for self.bodyView.
+    [self.bodyView loadHTMLString:html baseURL:NULL];
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('numSelectorArgs0')"><a name="numSelectorArgs0Anchor">numSelectorArgs</a></td><td>unsigned N</td></tr>
+<tr><td colspan="4" class="doc" id="numSelectorArgs0"><pre>Matches when the selector has the specified number of arguments
+
+ matcher = objCMessageExpr(numSelectorArgs(0));
+ matches self.bodyView in the code below
+
+ matcher = objCMessageExpr(numSelectorArgs(2));
+ matches the invocation of "loadHTMLString:baseURL:" but not that
+ of self.bodyView
+    [self.bodyView loadHTMLString:html baseURL:NULL];
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('asString0')"><a name="asString0Anchor">asString</a></td><td>std::string Name</td></tr>
 <tr><td colspan="4" class="doc" id="asString0"><pre>Matches if the matched type is represented by the given string.
 
 Given
   class Y { public: void x(); };
   void z() { Y* y; y-&gt;x(); }
-callExpr(on(hasType(asString("class Y *"))))
+cxxMemberCallExpr(on(hasType(asString("class Y *"))))
   matches y-&gt;x()
 </pre></td></tr>
 
@@ -1978,7 +2471,7 @@ Matches a node if it equals the node previously bound to ID.
 
 Given
   class X { int a; int b; };
-recordDecl(
+cxxRecordDecl(
     has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
     has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))
   matches the class X, as a and b have the same type.
@@ -2009,6 +2502,18 @@ i is const-qualified but the qualifier is not local.
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('isAnyCharacter0')"><a name="isAnyCharacter0Anchor">isAnyCharacter</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isAnyCharacter0"><pre>Matches QualType nodes that are of character type.
+
+Given
+  void a(char);
+  void b(wchar_t);
+  void c(double);
+functionDecl(hasAnyParameter(hasType(isAnyCharacter())))
+matches "a(char)", "b(wchar_t)", but not "c(double)".
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('isConstQualified0')"><a name="isConstQualified0Anchor">isConstQualified</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isConstQualified0"><pre>Matches QualType nodes that are const-qualified, i.e., that
 include "top-level" const.
@@ -2038,6 +2543,53 @@ matches "a(int)", "b(long)", but not "c(double)".
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt;</td><td class="name" onclick="toggle('isVolatileQualified0')"><a name="isVolatileQualified0Anchor">isVolatileQualified</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isVolatileQualified0"><pre>Matches QualType nodes that are volatile-qualified, i.e., that
+include "top-level" volatile.
+
+Given
+  void a(int);
+  void b(int volatile);
+  void c(volatile int);
+  void d(volatile int*);
+  void e(int volatile) {};
+functionDecl(hasAnyParameter(hasType(isVolatileQualified())))
+  matches "void b(int volatile)", "void c(volatile int)" and
+  "void e(int volatile) {}". It does not match d as there
+  is no top-level volatile on the parameter type "volatile int *".
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordDecl.html">RecordDecl</a>&gt;</td><td class="name" onclick="toggle('isClass0')"><a name="isClass0Anchor">isClass</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isClass0"><pre>Matches RecordDecl object that are spelled with "class."
+
+Example matches C, but not S or U.
+  struct S {};
+  class C {};
+  union U {};
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordDecl.html">RecordDecl</a>&gt;</td><td class="name" onclick="toggle('isStruct0')"><a name="isStruct0Anchor">isStruct</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isStruct0"><pre>Matches RecordDecl object that are spelled with "struct."
+
+Example matches S, but not C or U.
+  struct S {};
+  class C {};
+  union U {};
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1RecordDecl.html">RecordDecl</a>&gt;</td><td class="name" onclick="toggle('isUnion0')"><a name="isUnion0Anchor">isUnion</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isUnion0"><pre>Matches RecordDecl object that are spelled with "union."
+
+Example matches U, but not C or S.
+  struct S {};
+  class C {};
+  union U {};
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('equalsBoundNode0')"><a name="equalsBoundNode0Anchor">equalsBoundNode</a></td><td>std::string ID</td></tr>
 <tr><td colspan="4" class="doc" id="equalsBoundNode0"><pre>Matches if a node equals a previously bound node.
 
@@ -2045,7 +2597,7 @@ Matches a node if it equals the node previously bound to ID.
 
 Given
   class X { int a; int b; };
-recordDecl(
+cxxRecordDecl(
     has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
     has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))
   matches the class X, as a and b have the same type.
@@ -2066,7 +2618,7 @@ and reference to that variable declaration within a compound statement.
 partially matching a given regex.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInFileMatching("AST.*"))
+    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
   #include "ASTMatcher.h"
   class X {};
 ASTMatcher.h:
@@ -2079,7 +2631,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('isExpansionInMainFile1')"><a name="isExpansionInMainFile1Anchor">isExpansionInMainFile</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInMainFile1"><pre>Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y (matcher = recordDecl(isExpansionInMainFile())
+Example matches X but not Y
+  (matcher = cxxRecordDecl(isExpansionInMainFile())
   #include &lt;Y.h&gt;
   class X {};
 Y.h:
@@ -2093,7 +2646,7 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td colspan="4" class="doc" id="isExpansionInSystemHeader1"><pre>Matches AST nodes that were expanded within system-header-files.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInSystemHeader())
+    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
   #include &lt;SystemHeader.h&gt;
   class X {};
 SystemHeader.h:
@@ -2163,7 +2716,7 @@ classTemplateSpecializationDecl(templateArgumentCountIs(1))
 partially matching a given regex.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInFileMatching("AST.*"))
+    (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
   #include "ASTMatcher.h"
   class X {};
 ASTMatcher.h:
@@ -2176,7 +2729,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;</td><td class="name" onclick="toggle('isExpansionInMainFile2')"><a name="isExpansionInMainFile2Anchor">isExpansionInMainFile</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInMainFile2"><pre>Matches AST nodes that were expanded within the main-file.
 
-Example matches X but not Y (matcher = recordDecl(isExpansionInMainFile())
+Example matches X but not Y
+  (matcher = cxxRecordDecl(isExpansionInMainFile())
   #include &lt;Y.h&gt;
   class X {};
 Y.h:
@@ -2190,7 +2744,7 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 <tr><td colspan="4" class="doc" id="isExpansionInSystemHeader2"><pre>Matches AST nodes that were expanded within system-header-files.
 
 Example matches Y but not X
-    (matcher = recordDecl(isExpansionInSystemHeader())
+    (matcher = cxxRecordDecl(isExpansionInSystemHeader())
   #include &lt;SystemHeader.h&gt;
   class X {};
 SystemHeader.h:
@@ -2200,6 +2754,16 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.h
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('booleanType0')"><a name="booleanType0Anchor">booleanType</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="booleanType0"><pre>Matches type bool.
+
+Given
+ struct S { bool func(); };
+functionDecl(returns(booleanType()))
+  matches "bool func();"
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td><td class="name" onclick="toggle('equalsBoundNode2')"><a name="equalsBoundNode2Anchor">equalsBoundNode</a></td><td>std::string ID</td></tr>
 <tr><td colspan="4" class="doc" id="equalsBoundNode2"><pre>Matches if a node equals a previously bound node.
 
@@ -2207,7 +2771,7 @@ Matches a node if it equals the node previously bound to ID.
 
 Given
   class X { int a; int b; };
-recordDecl(
+cxxRecordDecl(
     has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
     has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))
   matches the class X, as a and b have the same type.
@@ -2253,6 +2817,20 @@ Example matches a || b (matcher = binaryOperator(hasOperatorName("||")))
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('hasAutomaticStorageDuration0')"><a name="hasAutomaticStorageDuration0Anchor">hasAutomaticStorageDuration</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasAutomaticStorageDuration0"><pre>Matches a variable declaration that has automatic storage duration.
+
+Example matches x, but not y, z, or a.
+(matcher = varDecl(hasAutomaticStorageDuration())
+void f() {
+  int x;
+  static int y;
+  thread_local int z;
+}
+int a;
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('hasGlobalStorage0')"><a name="hasGlobalStorage0Anchor">hasGlobalStorage</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="hasGlobalStorage0"><pre>Matches a variable declaration that does not have local storage.
 
@@ -2278,6 +2856,47 @@ int z;
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('hasStaticStorageDuration0')"><a name="hasStaticStorageDuration0Anchor">hasStaticStorageDuration</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasStaticStorageDuration0"><pre>Matches a variable declaration that has static storage duration.
+
+Example matches y and a, but not x or z.
+(matcher = varDecl(hasStaticStorageDuration())
+void f() {
+  int x;
+  static int y;
+  thread_local int z;
+}
+int a;
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('hasThreadStorageDuration0')"><a name="hasThreadStorageDuration0Anchor">hasThreadStorageDuration</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="hasThreadStorageDuration0"><pre>Matches a variable declaration that has thread storage duration.
+
+Example matches z, but not x, z, or a.
+(matcher = varDecl(hasThreadStorageDuration())
+void f() {
+  int x;
+  static int y;
+  thread_local int z;
+}
+int a;
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isConstexpr0')"><a name="isConstexpr0Anchor">isConstexpr</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isConstexpr0"><pre>Matches constexpr variable and function declarations.
+
+Given:
+  constexpr int foo = 42;
+  constexpr int bar();
+varDecl(isConstexpr())
+  matches the declaration of foo.
+functionDecl(isConstexpr())
+  matches the declaration of bar.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition1')"><a name="isDefinition1Anchor">isDefinition</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isDefinition1"><pre>Matches if a declaration has a body attached.
 
@@ -2293,6 +2912,19 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TagDec
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isExceptionVariable0')"><a name="isExceptionVariable0Anchor">isExceptionVariable</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isExceptionVariable0"><pre>Matches a variable declaration that is an exception variable from
+a C++ catch block, or an Objective-C statement.
+
+Example matches x (matcher = varDecl(isExceptionVariable())
+void f(int y) {
+  try {
+  } catch (int x) {
+  }
+}
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isExplicitTemplateSpecialization1')"><a name="isExplicitTemplateSpecialization1Anchor">isExplicitTemplateSpecialization</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isExplicitTemplateSpecialization1"><pre>Matches explicit template specializations of function, class, or
 static member variable template instantiations.
@@ -2315,13 +2947,13 @@ Given
   template &lt;typename T&gt; class X {}; class A {}; X&lt;A&gt; x;
 or
   template &lt;typename T&gt; class X {}; class A {}; template class X&lt;A&gt;;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   matches the template instantiation of X&lt;A&gt;.
 
 But given
   template &lt;typename T&gt;  class X {}; class A {};
   template &lt;&gt; class X&lt;A&gt; {}; X&lt;A&gt; x;
-recordDecl(hasName("::X"), isTemplateInstantiation())
+cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
   does not match, as X&lt;A&gt; is an explicit template specialization.
 
 Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXRecordDecl.html">CXXRecordDecl</a>&gt;
@@ -2383,8 +3015,8 @@ matching submatcher.
 For example, in:
   class A { int a; int b; };
 The matcher:
-  recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                    has(fieldDecl(hasName("b")).bind("v"))))
+  cxxRecordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                       has(fieldDecl(hasName("b")).bind("v"))))
 will generate two results binding "v", the first of which binds
 the field declaration of a, the second the field declaration of
 b.
@@ -2398,7 +3030,7 @@ Usable as: Any Matcher
 provided matcher.
 
 Example matches X, A, B, C
-    (matcher = recordDecl(forEachDescendant(recordDecl(hasName("X")))))
+  (matcher = cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X")))))
   class X {};  Matches X, because X::X is a class of name X inside X.
   class A { class X {}; };
   class B { class C { class X {}; }; };
@@ -2409,7 +3041,9 @@ As opposed to 'hasDescendant', 'forEachDescendant' will cause a match for
 each result that matches instead of only on the first one.
 
 Note: Recursively combined ForEachDescendant can cause many matches:
-  recordDecl(forEachDescendant(recordDecl(forEachDescendant(recordDecl()))))
+  cxxRecordDecl(forEachDescendant(cxxRecordDecl(
+    forEachDescendant(cxxRecordDecl())
+  )))
 will match 10 times (plus injected class name matches) on:
   class A { class B { class C { class D { class E {}; }; }; }; };
 
@@ -2421,7 +3055,8 @@ Usable as: Any Matcher
 <tr><td colspan="4" class="doc" id="forEach0"><pre>Matches AST nodes that have child AST nodes that match the
 provided matcher.
 
-Example matches X, Y (matcher = recordDecl(forEach(recordDecl(hasName("X")))
+Example matches X, Y
+  (matcher = cxxRecordDecl(forEach(cxxRecordDecl(hasName("X")))
   class X {};  Matches X, because X::X is a class of name X inside X.
   class Y { class X {}; };
   class Z { class Y { class X {}; }; };  Does not match Z.
@@ -2453,7 +3088,7 @@ Usable as: Any Matcher
 provided matcher.
 
 Example matches X, Y, Z
-    (matcher = recordDecl(hasDescendant(recordDecl(hasName("X")))))
+    (matcher = cxxRecordDecl(hasDescendant(cxxRecordDecl(hasName("X")))))
   class X {};  Matches X, because X::X is a class of name X inside X.
   class Y { class X {}; };
   class Z { class Y { class X {}; }; };
@@ -2468,7 +3103,8 @@ Usable as: Any Matcher
 <tr><td colspan="4" class="doc" id="has0"><pre>Matches AST nodes that have child AST nodes that match the
 provided matcher.
 
-Example matches X, Y (matcher = recordDecl(has(recordDecl(hasName("X")))
+Example matches X, Y
+  (matcher = cxxRecordDecl(has(cxxRecordDecl(hasName("X")))
   class X {};  Matches X, because X::X is a class of name X inside X.
   class Y { class X {}; };
   class Z { class Y { class X {}; }; };  Does not match Z.
@@ -2514,6 +3150,22 @@ arraySubscriptExpression(hasIndex(integerLiteral()))
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>&gt;</td><td class="name" onclick="toggle('hasLHS1')"><a name="hasLHS1Anchor">hasLHS</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
+<tr><td colspan="4" class="doc" id="hasLHS1"><pre>Matches the left hand side of binary operator expressions.
+
+Example matches a (matcher = binaryOperator(hasLHS()))
+  a || b
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ArraySubscriptExpr.html">ArraySubscriptExpr</a>&gt;</td><td class="name" onclick="toggle('hasRHS1')"><a name="hasRHS1Anchor">hasRHS</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
+<tr><td colspan="4" class="doc" id="hasRHS1"><pre>Matches the right hand side of binary operator expressions.
+
+Example matches b (matcher = binaryOperator(hasRHS()))
+  a || b
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ArrayTypeLoc.html">ArrayTypeLoc</a>&gt;</td><td class="name" onclick="toggle('hasElementTypeLoc0')"><a name="hasElementTypeLoc0Anchor">hasElementTypeLoc</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;</td></tr>
 <tr><td colspan="4" class="doc" id="hasElementTypeLoc0"><pre>Matches arrays and C99 complex types that have a specific element
 type.
@@ -2695,7 +3347,9 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CallEx
 
 Given
   class A { A() : i(42), j(42) {} int i; int j; };
-constructorDecl(forEachConstructorInitializer(forField(decl().bind("x"))))
+cxxConstructorDecl(forEachConstructorInitializer(
+  forField(decl().bind("x"))
+))
   will trigger two matches, binding for 'i' and 'j' respectively.
 </pre></td></tr>
 
@@ -2708,7 +3362,9 @@ Given
     Foo() : foo_(1) { }
     int foo_;
   };
-recordDecl(has(constructorDecl(hasAnyConstructorInitializer(anything()))))
+cxxRecordDecl(has(cxxConstructorDecl(
+  hasAnyConstructorInitializer(anything())
+)))
   record matches Foo, hasAnyConstructorInitializer matches foo_(1)
 </pre></td></tr>
 
@@ -2721,7 +3377,7 @@ Given
     Foo() : foo_(1) { }
     int foo_;
   };
-recordDecl(has(constructorDecl(hasAnyConstructorInitializer(
+cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer(
     forField(hasName("foo_"))))))
   matches Foo
 with forField matching foo_
@@ -2736,7 +3392,7 @@ Given
     Foo() : foo_(1) { }
     int foo_;
   };
-recordDecl(has(constructorDecl(hasAnyConstructorInitializer(
+cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer(
     withInitializer(integerLiteral(equals(1)))))))
   matches Foo
 with withInitializer matching (1)
@@ -2783,7 +3439,8 @@ matches 'a' in
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CXXMemberCallExpr.html">CXXMemberCallExpr</a>&gt;</td><td class="name" onclick="toggle('on0')"><a name="on0Anchor">on</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="on0"><pre>Matches on the implicit object argument of a member call expression.
 
-Example matches y.x() (matcher = callExpr(on(hasType(recordDecl(hasName("Y"))))))
+Example matches y.x()
+  (matcher = cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("Y"))))))
   class Y { public: void x(); };
   void z() { Y y; y.x(); }",
 
@@ -2811,7 +3468,7 @@ FIXME: What other kind of declarations would we need to generalize
 this to?
 
 Example matches A() in the last line
-    (matcher = constructExpr(hasDeclaration(methodDecl(
+    (matcher = cxxConstructExpr(hasDeclaration(cxxMethodDecl(
         ofClass(hasName("A"))))))
   class A {
    public:
@@ -2828,8 +3485,8 @@ Given:
   class A { void func(); };
   class B { void member(); };
 
-recordDecl(hasMethod(hasName("func"))) matches the declaration of A
-but not B.
+cxxRecordDecl(hasMethod(hasName("func"))) matches the declaration of
+A but not B.
 </pre></td></tr>
 
 
@@ -2864,7 +3521,8 @@ match Base.
 <tr><td colspan="4" class="doc" id="callee1"><pre>Matches if the call expression's callee's declaration matches the
 given matcher.
 
-Example matches y.x() (matcher = callExpr(callee(methodDecl(hasName("x")))))
+Example matches y.x() (matcher = callExpr(callee(
+                                   cxxMethodDecl(hasName("x")))))
   class Y { public: void x(); };
   void z() { Y y; y.x(); }
 </pre></td></tr>
@@ -2953,7 +3611,7 @@ caseStmt(hasCaseConstant(integerLiteral()))
 <tr><td colspan="4" class="doc" id="hasSourceExpression0"><pre>Matches if the cast's source expression matches the given matcher.
 
 Example: matches "a string" (matcher =
-                                 hasSourceExpression(constructExpr()))
+                                 hasSourceExpression(cxxConstructExpr()))
 class URL { URL(string); };
 URL url = "a string";
 </pre></td></tr>
@@ -3034,7 +3692,7 @@ with compoundStmt()
 <tr><td colspan="4" class="doc" id="hasCondition4"><pre>Matches the condition expression of an if statement, for loop,
 or conditional operator.
 
-Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
   if (true) {}
 </pre></td></tr>
 
@@ -3055,6 +3713,11 @@ Example matches a
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DecayedType.html">DecayedType</a>&gt;</td><td class="name" onclick="toggle('hasDecayedType0')"><a name="hasDecayedType0Anchor">hasDecayedType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt; InnerType</td></tr>
+<tr><td colspan="4" class="doc" id="hasDecayedType0"><pre>Matches the decayed type, whos decayed type matches InnerMatcher
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1DeclRefExpr.html">DeclRefExpr</a>&gt;</td><td class="name" onclick="toggle('hasDeclaration11')"><a name="hasDeclaration11Anchor">hasDeclaration</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;  InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasDeclaration11"><pre>Matches a node if the declaration associated with that node
 matches the given matcher.
@@ -3081,8 +3744,6 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CallEx
 <tr><td colspan="4" class="doc" id="throughUsingDecl0"><pre>Matches a DeclRefExpr that refers to a declaration through a
 specific using shadow declaration.
 
-FIXME: This currently only works for functions. Fix.
-
 Given
   namespace a { void f() {} }
   using a::f;
@@ -3090,7 +3751,7 @@ Given
     f();     Matches this ..
     a::f();  .. but not this.
   }
-declRefExpr(throughUsingDeclaration(anything()))
+declRefExpr(throughUsingDecl(anything()))
   matches f()
 </pre></td></tr>
 
@@ -3158,7 +3819,7 @@ Given
     }
   }
 
-recordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the
+cxxRcordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the
 declaration of class D.
 </pre></td></tr>
 
@@ -3180,7 +3841,7 @@ with compoundStmt()
 <tr><td colspan="4" class="doc" id="hasCondition3"><pre>Matches the condition expression of an if statement, for loop,
 or conditional operator.
 
-Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
   if (true) {}
 </pre></td></tr>
 
@@ -3255,12 +3916,12 @@ declaration's type.
 
 In case of a value declaration (for example a variable declaration),
 this resolves one layer of indirection. For example, in the value
-declaration "X x;", recordDecl(hasName("X")) matches the declaration of X,
-while varDecl(hasType(recordDecl(hasName("X")))) matches the declaration
-of x."
+declaration "X x;", cxxRecordDecl(hasName("X")) matches the declaration of
+X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the
+declaration of x.
 
-Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
  class X {};
  void y(X &amp;x) { x; X z; }
 
@@ -3272,8 +3933,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.h
 <tr><td colspan="4" class="doc" id="hasType0"><pre>Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
  class X {};
  void y(X &amp;x) { x; X z; }
 </pre></td></tr>
@@ -3361,7 +4022,7 @@ with compoundStmt()
 <tr><td colspan="4" class="doc" id="hasCondition1"><pre>Matches the condition expression of an if statement, for loop,
 or conditional operator.
 
-Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
   if (true) {}
 </pre></td></tr>
 
@@ -3393,7 +4054,7 @@ Does not match the 'this' parameter of a method.
 
 Given
   class X { void f(int x, int y, int z) {} };
-methodDecl(hasAnyParameter(hasName("y")))
+cxxMethodDecl(hasAnyParameter(hasName("y")))
   matches f(int x, int y, int z) {}
 with hasAnyParameter(...)
   matching int y
@@ -3405,7 +4066,7 @@ with hasAnyParameter(...)
 
 Given
   class X { void f(int x) {} };
-methodDecl(hasParameter(0, hasType(varDecl())))
+cxxMethodDecl(hasParameter(0, hasType(varDecl())))
   matches f(int x) {}
 with hasParameter(...)
   matching int x
@@ -3417,7 +4078,7 @@ with hasParameter(...)
 
 Given:
   class X { int f() { return 1; } };
-methodDecl(returns(asString("int")))
+cxxMethodDecl(returns(asString("int")))
   matches int f() { return 1; }
 </pre></td></tr>
 
@@ -3426,7 +4087,7 @@ methodDecl(returns(asString("int")))
 <tr><td colspan="4" class="doc" id="hasCondition0"><pre>Matches the condition expression of an if statement, for loop,
 or conditional operator.
 
-Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
   if (true) {}
 </pre></td></tr>
 
@@ -3445,7 +4106,7 @@ hasConditionVariableStatement(...)
 <tr><td colspan="4" class="doc" id="hasElse0"><pre>Matches the else-statement of an if statement.
 
 Examples matches the if statement
-  (matcher = ifStmt(hasElse(boolLiteral(equals(true)))))
+  (matcher = ifStmt(hasElse(cxxBoolLiteral(equals(true)))))
   if (false) false; else true;
 </pre></td></tr>
 
@@ -3454,7 +4115,7 @@ Examples matches the if statement
 <tr><td colspan="4" class="doc" id="hasThen0"><pre>Matches the then-statement of an if statement.
 
 Examples matches the if statement
-  (matcher = ifStmt(hasThen(boolLiteral(equals(true)))))
+  (matcher = ifStmt(hasThen(cxxBoolLiteral(equals(true)))))
   if (false) true; else false;
 </pre></td></tr>
 
@@ -3540,7 +4201,7 @@ matched by a given matcher.
 Given
   struct X { int m; };
   void f(X x) { x.m; m; }
-memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))))
+memberExpr(hasObjectExpression(hasType(cxxRecordDecl(hasName("X")))))))
   matches "x.m" and "m"
 with hasObjectExpression(...)
   matching "x" and the implicit object expression of "m" which has type X*.
@@ -3612,7 +4273,7 @@ Given
   struct A { struct B { struct C {}; }; };
   A::B::C c;
 nestedNameSpecifierLoc(specifiesTypeLoc(loc(type(
-  hasDeclaration(recordDecl(hasName("A")))))))
+  hasDeclaration(cxxRecordDecl(hasName("A")))))))
   matches "A::"
 </pre></td></tr>
 
@@ -3647,11 +4308,35 @@ given QualType matcher without qualifiers.
 Given
   struct A { struct B { struct C {}; }; };
   A::B::C c;
-nestedNameSpecifier(specifiesType(hasDeclaration(recordDecl(hasName("A")))))
+nestedNameSpecifier(specifiesType(
+  hasDeclaration(cxxRecordDecl(hasName("A")))
+))
   matches "A::"
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasArgument2')"><a name="hasArgument2Anchor">hasArgument</a></td><td>unsigned N, Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
+<tr><td colspan="4" class="doc" id="hasArgument2"><pre>Matches the n'th argument of a call expression or a constructor
+call expression.
+
+Example matches y in x(y)
+    (matcher = callExpr(hasArgument(0, declRefExpr())))
+  void x(int) { int y; x(y); }
+</pre></td></tr>
+
+
+<tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('hasReceiverType0')"><a name="hasReceiverType0Anchor">hasReceiverType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1QualType.html">QualType</a>&gt; InnerMatcher</td></tr>
+<tr><td colspan="4" class="doc" id="hasReceiverType0"><pre>Matches on the receiver of an ObjectiveC Message expression.
+
+Example
+matcher = objCMessageExpr(hasRecieverType(asString("UIWebView *")));
+matches the [webView ...] message invocation.
+  NSString *webViewJavaScript = ...
+  UIWebView *webView = ...
+  [webView stringByEvaluatingJavaScriptFromString:webViewJavascript];
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1ParenType.html">ParenType</a>&gt;</td><td class="name" onclick="toggle('innerType0')"><a name="innerType0Anchor">innerType</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Type.html">Type</a>&gt;</td></tr>
 <tr><td colspan="4" class="doc" id="innerType0"><pre>Matches ParenType nodes where the inner type is a specific type.
 
@@ -3743,7 +4428,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1CallEx
 matches the specified matcher.
 
 Example matches y-&gt;x()
-    (matcher = callExpr(on(hasType(pointsTo(recordDecl(hasName("Y")))))))
+  (matcher = cxxMemberCallExpr(on(hasType(pointsTo
+     cxxRecordDecl(hasName("Y")))))))
   class Y { public: void x(); };
   void z() { Y *y; y-&gt;x(); }
 </pre></td></tr>
@@ -3759,7 +4445,7 @@ Example matches y-&gt;x()
 type matches the specified matcher.
 
 Example matches X &amp;x and const X &amp;y
-    (matcher = varDecl(hasType(references(recordDecl(hasName("X"))))))
+    (matcher = varDecl(hasType(references(cxxRecordDecl(hasName("X"))))))
   class X {
     void a(X b) {
       X &amp;x = b;
@@ -4004,7 +4690,8 @@ Generates results for each match.
 For example, in:
   class A { class B {}; class C {}; };
 The matcher:
-  recordDecl(hasName("::A"), findAll(recordDecl(isDefinition()).bind("m")))
+  cxxRecordDecl(hasName("::A"),
+                findAll(cxxRecordDecl(isDefinition()).bind("m")))
 will generate results for A, B and C.
 
 Usable as: Any Matcher
@@ -4046,7 +4733,8 @@ unaryExprOrTypeTraitExpr(hasArgumentOfType(asString("int"))
 <tr><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1UnaryOperator.html">UnaryOperator</a>&gt;</td><td class="name" onclick="toggle('hasUnaryOperand0')"><a name="hasUnaryOperand0Anchor">hasUnaryOperand</a></td><td>Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.html">Expr</a>&gt; InnerMatcher</td></tr>
 <tr><td colspan="4" class="doc" id="hasUnaryOperand0"><pre>Matches if the operand of a unary operator matches.
 
-Example matches true (matcher = hasUnaryOperand(boolLiteral(equals(true))))
+Example matches true (matcher = hasUnaryOperand(
+                                  cxxBoolLiteral(equals(true))))
   !true
 </pre></td></tr>
 
@@ -4101,12 +4789,12 @@ declaration's type.
 
 In case of a value declaration (for example a variable declaration),
 this resolves one layer of indirection. For example, in the value
-declaration "X x;", recordDecl(hasName("X")) matches the declaration of X,
-while varDecl(hasType(recordDecl(hasName("X")))) matches the declaration
-of x."
+declaration "X x;", cxxRecordDecl(hasName("X")) matches the declaration of
+X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the
+declaration of x.
 
-Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
  class X {};
  void y(X &amp;x) { x; X z; }
 
@@ -4118,8 +4806,8 @@ Usable as: Matcher&lt<a href="http://clang.llvm.org/doxygen/classclang_1_1Expr.h
 <tr><td colspan="4" class="doc" id="hasType1"><pre>Matches if the expression's or declaration's type matches a type
 matcher.
 
-Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-            and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+            and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
  class X {};
  void y(X &amp;x) { x; X z; }
 </pre></td></tr>
@@ -4166,7 +4854,7 @@ with compoundStmt()
 <tr><td colspan="4" class="doc" id="hasCondition2"><pre>Matches the condition expression of an if statement, for loop,
 or conditional operator.
 
-Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
   if (true) {}
 </pre></td></tr>
 
diff --git a/docs/LibASTMatchersTutorial.rst b/docs/LibASTMatchersTutorial.rst
index fe36511a0cf5..603b2faf01da 100644
--- a/docs/LibASTMatchersTutorial.rst
+++ b/docs/LibASTMatchersTutorial.rst
@@ -108,7 +108,6 @@ CMakeLists.txt should have the following contents:
 ::
 
       set(LLVM_LINK_COMPONENTS support)
-      set(LLVM_USED_LIBS clangTooling clangBasic clangAST)
 
       add_clang_executable(loop-convert
         LoopConvert.cpp
diff --git a/docs/MemorySanitizer.rst b/docs/MemorySanitizer.rst
index 007e0866dec6..62cacce215d0 100644
--- a/docs/MemorySanitizer.rst
+++ b/docs/MemorySanitizer.rst
@@ -48,10 +48,7 @@ to disable inlining (just use ``-O1``) and tail call elimination
     % clang -fsanitize=memory -fno-omit-frame-pointer -g -O2 umr.cc
 
 If a bug is detected, the program will print an error message to
-stderr and exit with a non-zero exit code. Currently, MemorySanitizer
-does not symbolize its output by default, so you may need to use a
-separate script to symbolize the result offline (this will be fixed in
-future).
+stderr and exit with a non-zero exit code.
 
 .. code-block:: console
 
@@ -60,7 +57,9 @@ future).
         #0 0x7f45944b418a in main umr.cc:6
         #1 0x7f45938b676c in __libc_start_main libc-start.c:226
 
-By default, MemorySanitizer exits on the first detected error.
+By default, MemorySanitizer exits on the first detected error. If you
+find the error report hard to understand, try enabling
+:ref:`origin tracking <msan-origins>`.
 
 ``__has_feature(memory_sanitizer)``
 ------------------------------------
@@ -80,14 +79,11 @@ whether MemorySanitizer is enabled. :ref:`\_\_has\_feature
 ``__attribute__((no_sanitize_memory))``
 -----------------------------------------------
 
-Some code should not be checked by MemorySanitizer.
-One may use the function attribute
-:ref:`no_sanitize_memory <langext-memory_sanitizer>`
-to disable uninitialized checks in a particular function.
-MemorySanitizer may still instrument such functions to avoid false positives.
-This attribute may not be
-supported by other compilers, so we suggest to use it together with
-``__has_feature(memory_sanitizer)``.
+Some code should not be checked by MemorySanitizer.  One may use the function
+attribute `no_sanitize_memory` to disable uninitialized checks in a particular
+function.  MemorySanitizer may still instrument such functions to avoid false
+positives.  This attribute may not be supported by other compilers, so we
+suggest to use it together with ``__has_feature(memory_sanitizer)``.
 
 Blacklist
 ---------
@@ -105,10 +101,12 @@ MemorySanitizer uses an external symbolizer to print files and line numbers in
 reports. Make sure that ``llvm-symbolizer`` binary is in ``PATH``,
 or set environment variable ``MSAN_SYMBOLIZER_PATH`` to point to it.
 
+.. _msan-origins:
+
 Origin Tracking
 ===============
 
-MemorySanitizer can track origins of unitialized values, similar to
+MemorySanitizer can track origins of uninitialized values, similar to
 Valgrind's --track-origins option. This feature is enabled by
 ``-fsanitize-memory-track-origins=2`` (or simply
 ``-fsanitize-memory-track-origins``) Clang option. With the code from
@@ -146,14 +144,29 @@ By default, MemorySanitizer collects both allocation points and all
 intermediate stores the uninitialized value went through.  Origin
 tracking has proved to be very useful for debugging MemorySanitizer
 reports. It slows down program execution by a factor of 1.5x-2x on top
-of the usual MemorySanitizer slowdown.
+of the usual MemorySanitizer slowdown and increases memory overhead.
 
-Clang option ``-fsanitize-memory-track-origins=1`` enabled a slightly
+Clang option ``-fsanitize-memory-track-origins=1`` enables a slightly
 faster mode when MemorySanitizer collects only allocation points but
 not intermediate stores.
 
+Use-after-destruction detection
+===============================
+
+You can enable experimental use-after-destruction detection in MemorySanitizer.
+After invocation of the destructor, the object will be considered no longer
+readable, and using underlying memory will lead to error reports in runtime.
+
+This feature is still experimental, in order to enable it at runtime you need
+to:
+
+#. Pass addition Clang option ``-fsanitize-memory-use-after-dtor`` during
+   compilation.
+#. Set environment variable `MSAN_OPTIONS=poison_in_dtor=1` before running
+   the program.
+
 Handling external code
-============================
+======================
 
 MemorySanitizer requires that all program code is instrumented. This
 also includes any libraries that the program depends on, even libc.
@@ -170,9 +183,7 @@ self-built instrumented libc++ (as a replacement for libstdc++).
 Supported Platforms
 ===================
 
-MemorySanitizer is supported on
-
-* Linux x86\_64 (tested on Ubuntu 12.04);
+MemorySanitizer is supported on Linux x86\_64/MIPS64/AArch64.
 
 Limitations
 ===========
@@ -183,22 +194,19 @@ Limitations
   address space. This means that tools like ``ulimit`` may not work as
   usually expected.
 * Static linking is not supported.
-* Non-position-independent executables are not supported.  Therefore, the
-  ``fsanitize=memory`` flag will cause Clang to act as though the ``-fPIE``
-  flag had been supplied if compiling without ``-fPIC``, and as though the
-  ``-pie`` flag had been supplied if linking an executable.
-* Depending on the version of Linux kernel, running without ASLR may
-  be not supported. Note that GDB disables ASLR by default. To debug
-  instrumented programs, use "set disable-randomization off".
+* Older versions of MSan (LLVM 3.7 and older) didn't work with
+  non-position-independent executables, and could fail on some Linux
+  kernel versions with disabled ASLR. Refer to documentation for older versions
+  for more details.
 
 Current Status
 ==============
 
-MemorySanitizer is an experimental tool. It is known to work on large
-real-world programs, like Clang/LLVM itself.
+MemorySanitizer is known to work on large real-world programs
+(like Clang/LLVM itself) that can be recompiled from source, including all
+dependent libraries.
 
 More Information
 ================
 
-`http://code.google.com/p/memory-sanitizer <http://code.google.com/p/memory-sanitizer/>`_
-
+`<https://github.com/google/sanitizers/wiki/MemorySanitizer>`_
diff --git a/docs/Modules.rst b/docs/Modules.rst
index 1f3d89897513..0ea3b5bb3776 100644
--- a/docs/Modules.rst
+++ b/docs/Modules.rst
@@ -222,7 +222,7 @@ Modules are modeled as if each submodule were a separate translation unit, and a
 
   This behavior is currently only approximated when building a module with submodules. Entities within a submodule that has already been built are visible when building later submodules in that module. This can lead to fragile modules that depend on the build order used for the submodules of the module, and should not be relied upon. This behavior is subject to change.
 
-As an example, in C, this implies that if two structs are defined in different submodules with the same name, those two types are distinct types (but may be *compatible* types if their definitions match. In C++, two structs defined with the same name in different submodules are the *same* type, and must be equivalent under C++'s One Definition Rule.
+As an example, in C, this implies that if two structs are defined in different submodules with the same name, those two types are distinct types (but may be *compatible* types if their definitions match). In C++, two structs defined with the same name in different submodules are the *same* type, and must be equivalent under C++'s One Definition Rule.
 
 .. note::
 
diff --git a/docs/RAVFrontendAction.rst b/docs/RAVFrontendAction.rst
index ec5d5d54ff9b..c37d3c0e812e 100644
--- a/docs/RAVFrontendAction.rst
+++ b/docs/RAVFrontendAction.rst
@@ -205,10 +205,10 @@ following CMakeLists.txt to link it:
 
 ::
 
-    set(LLVM_USED_LIBS clangTooling)
-
     add_clang_executable(find-class-decls FindClassDecls.cpp)
 
+    target_link_libraries(find-class-decls clangTooling)
+
 When running this tool over a small code snippet it will output all
 declarations of a class n::m::C it found:
 
diff --git a/docs/ReleaseNotes.rst b/docs/ReleaseNotes.rst
index 2941afd48fcd..779d45c2928b 100644
--- a/docs/ReleaseNotes.rst
+++ b/docs/ReleaseNotes.rst
@@ -1,6 +1,6 @@
-=======================
-Clang 3.7 Release Notes
-=======================
+=====================================
+Clang 3.8 (In-Progress) Release Notes
+=====================================
 
 .. contents::
    :local:
@@ -8,12 +8,17 @@ Clang 3.7 Release Notes
 
 Written by the `LLVM Team <http://llvm.org/>`_
 
+.. warning::
+
+   These are in-progress notes for the upcoming Clang 3.8 release. You may
+   prefer the `Clang 3.7 Release Notes
+   <http://llvm.org/releases/3.7.0/tools/clang/docs/ReleaseNotes.html>`_.
 
 Introduction
 ============
 
 This document contains the release notes for the Clang C/C++/Objective-C
-frontend, part of the LLVM Compiler Infrastructure, release 3.7. Here we
+frontend, part of the LLVM Compiler Infrastructure, release 3.8. Here we
 describe the status of Clang in some detail, including major
 improvements from the previous release and new feature work. For the
 general LLVM release notes, see `the LLVM
@@ -26,7 +31,12 @@ the latest release, please check out the main please see the `Clang Web
 Site <http://clang.llvm.org>`_ or the `LLVM Web
 Site <http://llvm.org>`_.
 
-What's New in Clang 3.7?
+Note that if you are reading this file from a Subversion checkout or the
+main Clang web page, this document applies to the *next* release, not
+the current one. To see the release notes for a specific release, please
+see the `releases page <http://llvm.org/releases/>`_.
+
+What's New in Clang 3.8?
 ========================
 
 Some of the major new features and improvements to Clang are listed
@@ -37,223 +47,147 @@ sections with improvements to Clang's support for those languages.
 Major New Features
 ------------------
 
-- Use of the ``__declspec`` language extension for declaration attributes now
-  requires passing the -fms-extensions or -fborland compiler flag. This language
-  extension is also enabled when compiling CUDA code, but its use should be
-  viewed as an implementation detail that is subject to change.
-
-- On Windows targets, some uses of the ``__try``, ``__except``, and
-  ``__finally`` language constructs are supported in Clang 3.7. MSVC-compatible
-  C++ exceptions are not yet supported, however.
-
-- Clang 3.7 fully supports OpenMP 3.1 and reported to work on many platforms,
-  including x86, x86-64 and Power. Also, pragma ``omp simd`` from OpenMP 4.0 is
-  supported as well. See below for details.
-
-- Clang 3.7 includes an implementation of :doc:`control flow integrity
-  <ControlFlowIntegrity>`, a security hardening mechanism.
-
+- Feature1...
 
 Improvements to Clang's diagnostics
------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Clang's diagnostics are constantly being improved to catch more issues,
 explain them more clearly, and provide more accurate source information
-about them. The improvements since the 3.6 release include:
+about them. The improvements since the 3.7 release include:
 
-- -Wrange-loop-analysis analyzes the loop variable type and the container type
-  to determine whether copies are made of the container elements.  If possible,
-  suggest a const reference type to prevent copies, or a non-reference type
-  to indicate a copy is made.
-
-- -Wredundant-move warns when a parameter variable is moved on return and the
-  return type is the same as the variable.  Returning the variable directly
-  will already make a move, so the call is not needed.
-
-- -Wpessimizing-move warns when a local variable is moved on return and the
-  return type is the same as the variable.  Copy elision cannot take place with
-  a move, but can take place if the variable is returned directly.
-
-- -Wmove is a new warning group which has the previous two warnings,
-  -Wredundant-move and -Wpessimizing-move, as well as previous warning
-  -Wself-move.  In addition, this group is part of -Wmost and -Wall now.
-
-- -Winfinite-recursion, a warning for functions that only call themselves,
-  is now part of -Wmost and -Wall.
-
-- -Wobjc-circular-container prevents creation of circular containers, 
-  it covers ``NSMutableArray``, ``NSMutableSet``, ``NSMutableDictionary``,
-  ``NSMutableOrderedSet`` and all their subclasses.
+-  ...
 
 New Compiler Flags
 ------------------
 
-The sized deallocation feature of C++14 is now controlled by the
-``-fsized-deallocation`` flag. This feature relies on library support that
-isn't yet widely deployed, so the user must supply an extra flag to get the
-extra functionality.
+The option ....
 
 
+New Pragmas in Clang
+-----------------------
+
+Clang now supports the ...
+
+Windows Support
+---------------
+
+Clang's support for building native Windows programs ...
+
+
+C Language Changes in Clang
+---------------------------
+
+...
+
+C11 Feature Support
+^^^^^^^^^^^^^^^^^^^
+
+...
+
+C++ Language Changes in Clang
+-----------------------------
+
+- ...
+
+C++11 Feature Support
+^^^^^^^^^^^^^^^^^^^^^
+
+...
+
 Objective-C Language Changes in Clang
 -------------------------------------
 
-- ``objc_boxable`` attribute was added. Structs and unions marked with this attribute can be
-  used with boxed expressions (``@(...)``) to create ``NSValue``.
+...
 
-Profile Guided Optimization
----------------------------
+OpenCL C Language Changes in Clang
+----------------------------------
 
-Clang now accepts GCC-compatible flags for profile guided optimization (PGO).
-You can now use ``-fprofile-generate=<dir>``, ``-fprofile-use=<dir>``,
-``-fno-profile-generate`` and ``-fno-profile-use``. These flags have the
-same semantics as their GCC counterparts. However, the generated profile
-is still LLVM-specific. PGO profiles generated with Clang cannot be used
-by GCC and vice-versa.
-
-Clang now emits function entry counts in profile-instrumented binaries.
-This has improved the computation of weights and frequencies in
-profile analysis.
-
-OpenMP Support
---------------
-OpenMP 3.1 is fully supported, but disabled by default. To enable it, please use
-the ``-fopenmp=libomp`` command line option. Your feedback (positive or negative) on
-using OpenMP-enabled clang would be much appreciated; please share it either on
-`cfe-dev <http://lists.llvm.org/mailman/listinfo/cfe-dev>`_ or `openmp-dev
-<http://lists.llvm.org/mailman/listinfo/openmp-dev>`_ mailing lists.
-
-In addition to OpenMP 3.1, several important elements of the 4.0 version of the
-standard are supported as well:
-
-- ``omp simd``, ``omp for simd`` and ``omp parallel for simd`` pragmas
-- atomic constructs
-- ``proc_bind`` clause of ``omp parallel`` pragma
-- ``depend`` clause of ``omp task`` pragma (except for array sections)
-- ``omp cancel`` and ``omp cancellation point`` pragmas
-- ``omp taskgroup`` pragma
+...
 
 Internal API Changes
 --------------------
 
-These are major API changes that have happened since the 3.6 release of
+These are major API changes that have happened since the 3.7 release of
 Clang. If upgrading an external codebase that uses Clang as a library,
 this section should help get you past the largest hurdles of upgrading.
 
--  Some of the ``PPCallbacks`` interface now deals in ``MacroDefinition``
-   objects instead of ``MacroDirective`` objects. This allows preserving
-   full information on macros imported from modules.
+-  ...
 
--  ``clang-c/Index.h`` no longer ``#include``\s ``clang-c/Documentation.h``.
-   You now need to explicitly ``#include "clang-c/Documentation.h"`` if
-   you use the libclang documentation API.
+AST Matchers
+------------
+The AST matcher functions were renamed to reflect the exact AST node names,
+which is a breaking change to AST matching code. The following matchers were
+affected:
+
+=======================	============================
+Previous Matcher Name	New Matcher Name
+=======================	============================
+recordDecl		recordDecl and cxxRecordDecl
+ctorInitializer		cxxCtorInitializer
+constructorDecl		cxxConstructorDecl
+destructorDecl		cxxDestructorDecl
+methodDecl		cxxMethodDecl
+conversionDecl		cxxConversionDecl
+memberCallExpr		cxxMemberCallExpr
+constructExpr		cxxConstructExpr
+unresolvedConstructExpr	cxxUnresolvedConstructExpr
+thisExpr		cxxThisExpr
+bindTemporaryExpr	cxxBindTemporaryExpr
+newExpr			cxxNewExpr
+deleteExpr		cxxDeleteExpr
+defaultArgExpr		cxxDefaultArgExpr
+operatorCallExpr	cxxOperatorCallExpr
+forRangeStmt		cxxForRangeStmt
+catchStmt		cxxCatchStmt
+tryStmt			cxxTryStmt
+throwExpr		cxxThrowExpr
+boolLiteral		cxxBoolLiteral
+nullPtrLiteralExpr	cxxNullPtrLiteralExpr
+reinterpretCastExpr	cxxReinterpretCastExpr
+staticCastExpr		cxxStaticCastExpr
+dynamicCastExpr		cxxDynamicCastExpr
+constCastExpr		cxxConstCastExpr
+functionalCastExpr	cxxFunctionalCastExpr
+temporaryObjectExpr	cxxTemporaryObjectExpr
+CUDAKernalCallExpr	cudaKernelCallExpr
+=======================	============================
+
+recordDecl() previously matched AST nodes of type CXXRecordDecl, but now
+matches AST nodes of type RecordDecl. If a CXXRecordDecl is required, use the
+cxxRecordDecl() matcher instead.
+
+...
+
+libclang
+--------
+
+...
 
 Static Analyzer
 ---------------
 
-* The generated plists now contain the name of the check that generated it.
+...
 
-* Configuration options can now be passed to the checkers (not just the static
-  analyzer core).
+Core Analysis Improvements
+==========================
 
-* New check for dereferencing object that the result of a zero-length
-  allocation.
+- ...
 
-* Also check functions in precompiled headers.
+New Issues Found
+================
 
-* Properly handle alloca() in some checkers.
+- ...
 
-* Various improvements to the retain count checker.
+Python Binding Changes
+----------------------
 
+The following methods have been added:
 
-clang-tidy
-----------
-Added new checks:
+-  ...
 
-* google-global-names-in-headers: flag global namespace pollution in header
-  files.
-
-* misc-assert-side-effect: detects ``assert()`` conditions with side effects
-  which can cause different behavior in debug / release builds.
-
-* misc-assign-operator-signature: finds declarations of assign operators with
-  the wrong return and/or argument types.
-
-* misc-inaccurate-erase: warns when some elements of a container are not
-  removed due to using the ``erase()`` algorithm incorrectly.
-
-* misc-inefficient-algorithm: warns on inefficient use of STL algorithms on
-  associative containers.
-
-* misc-macro-parentheses: finds macros that can have unexpected behavior due
-  to missing parentheses.
-
-* misc-macro-repeated-side-effects: checks for repeated argument with side
-  effects in macros.
-
-* misc-noexcept-move-constructor: flags user-defined move constructors and
-  assignment operators not marked with ``noexcept`` or marked with
-  ``noexcept(expr)`` where ``expr`` evaluates to ``false`` (but is not a
-  ``false`` literal itself).
-
-* misc-static-assert: replaces ``assert()`` with ``static_assert()`` if the
-  condition is evaluable at compile time.
-
-* readability-container-size-empty: checks whether a call to the ``size()``
-  method can be replaced with a call to ``empty()``.
-
-* readability-else-after-return: flags conditional statements having the
-  ``else`` branch, when the ``true`` branch has a ``return`` as the last statement.
-
-* readability-redundant-string-cstr: finds unnecessary calls to
-  ``std::string::c_str()``.
-
-* readability-shrink-to-fit: replaces copy and swap tricks on shrinkable
-  containers with the ``shrink_to_fit()`` method call.
-
-* readability-simplify-boolean-expr: looks for boolean expressions involving
-  boolean constants and simplifies them to use the appropriate boolean
-  expression directly (``if (x == true) ... -> if (x)``, etc.)
-
-SystemZ
--------
-
-* Clang will now always default to the z10 processor when compiling
-  without any ``-march=`` option. Previous releases used to automatically
-  detect the current host CPU when compiling natively. If you wish to
-  still have clang detect the current host CPU, you now need to use the
-  ``-march=native`` option.
-
-* Clang now provides the ``<s390intrin.h>`` header file.
-
-* Clang now supports the transactional-execution facility and
-  provides associated builtins and the ``<htmintrin.h>`` and
-  ``<htmxlintrin.h>`` header files. Support is enabled by default
-  on zEC12 and above, and can additionally be enabled or disabled
-  via the ``-mhtm`` / ``-mno-htm`` command line options.
-
-* Clang now supports the vector facility. This includes a
-  change in the ABI to pass arguments and return values of
-  vector types in vector registers, as well as a change in
-  the default alignment of vector types. Support is enabled
-  by default on z13 and above, and can additionally be enabled
-  or disabled via the ``-mvx`` / ``-mno-vx`` command line options.
-
-* Clang now supports the System z vector language extension,
-  providing a "vector" keyword to define vector types, and a
-  set of builtins defined in the ``<vecintrin.h>`` header file.
-  This can be enabled via the ``-fzvector`` command line option.
-  For compatibility with GCC, Clang also supports the
-  ``-mzvector`` option as an alias.
- 
-* Several cases of ABI incompatibility with GCC have been fixed.
-
-
-Last release which will run on Windows XP and Windows Vista
------------------------------------------------------------
-
-This is expected to the be the last major release of Clang that will support
-running on Windows XP and Windows Vista.  For the next major release the
-minimum Windows version requirement will be Windows 7.
+Significant Known Problems
+==========================
 
 Additional Information
 ======================
diff --git a/docs/SanitizerCoverage.rst b/docs/SanitizerCoverage.rst
index 65af6ffbf010..e759b351c1b5 100644
--- a/docs/SanitizerCoverage.rst
+++ b/docs/SanitizerCoverage.rst
@@ -249,6 +249,41 @@ These counters may also be used for in-process coverage-guided fuzzers. See
     uintptr_t
     __sanitizer_update_counter_bitset_and_clear_counters(uint8_t *bitset);
 
+Tracing basic blocks
+====================
+An *experimental* feature to support basic block (or edge) tracing.
+With ``-fsanitize-coverage=trace-bb`` the compiler will insert
+``__sanitizer_cov_trace_basic_block(s32 *id)`` before every function, basic block, or edge
+(depending on the value of ``-fsanitize-coverage=[func,bb,edge]``).
+
+Tracing data flow
+=================
+
+An *experimental* feature to support data-flow-guided fuzzing.
+With ``-fsanitize-coverage=trace-cmp`` the compiler will insert extra instrumentation
+around comparison instructions and switch statements.
+The fuzzer will need to define the following functions,
+they will be called by the instrumented code.
+
+.. code-block:: c++
+
+  // Called before a comparison instruction.
+  // SizeAndType is a packed value containing
+  //   - [63:32] the Size of the operands of comparison in bits
+  //   - [31:0] the Type of comparison (one of ICMP_EQ, ... ICMP_SLE)
+  // Arg1 and Arg2 are arguments of the comparison.
+  void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1, uint64_t Arg2);
+
+  // Called before a switch statement.
+  // Val is the switch operand.
+  // Cases[0] is the number of case constants.
+  // Cases[1] is the size of Val in bits.
+  // Cases[2:] are the case constants.
+  void __sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases);
+
+This interface is a subject to change.
+The current implementation is not thread-safe and thus can be safely used only for single-threaded targets.
+
 Output directory
 ================
 
diff --git a/docs/ThreadSanitizer.rst b/docs/ThreadSanitizer.rst
index d1aeaa8a58c3..0b9b163a9651 100644
--- a/docs/ThreadSanitizer.rst
+++ b/docs/ThreadSanitizer.rst
@@ -86,25 +86,22 @@ this purpose.
 ``__attribute__((no_sanitize_thread))``
 -----------------------------------------------
 
-Some code should not be instrumented by ThreadSanitizer.
-One may use the function attribute
-:ref:`no_sanitize_thread <langext-thread_sanitizer>`
-to disable instrumentation of plain (non-atomic) loads/stores in a particular function.
-ThreadSanitizer still instruments such functions to avoid false positives and
-provide meaningful stack traces.
-This attribute may not be
-supported by other compilers, so we suggest to use it together with
-``__has_feature(thread_sanitizer)``.
+Some code should not be instrumented by ThreadSanitizer.  One may use the
+function attribute `no_sanitize_thread` to disable instrumentation of plain
+(non-atomic) loads/stores in a particular function.  ThreadSanitizer still
+instruments such functions to avoid false positives and provide meaningful stack
+traces.  This attribute may not be supported by other compilers, so we suggest
+to use it together with ``__has_feature(thread_sanitizer)``.
 
 Blacklist
 ---------
 
 ThreadSanitizer supports ``src`` and ``fun`` entity types in
-:doc:`SanitizerSpecialCaseList`, that can be used to suppress data race reports in
-the specified source files or functions. Unlike functions marked with
-:ref:`no_sanitize_thread <langext-thread_sanitizer>` attribute,
-blacklisted functions are not instrumented at all. This can lead to false positives
-due to missed synchronization via atomic operations and missed stack frames in reports.
+:doc:`SanitizerSpecialCaseList`, that can be used to suppress data race reports
+in the specified source files or functions. Unlike functions marked with
+`no_sanitize_thread` attribute, blacklisted functions are not instrumented at
+all. This can lead to false positives due to missed synchronization via atomic
+operations and missed stack frames in reports.
 
 Limitations
 -----------
@@ -134,5 +131,4 @@ especially in the form of minimized standalone tests is more than welcome.
 
 More Information
 ----------------
-`http://code.google.com/p/thread-sanitizer <http://code.google.com/p/thread-sanitizer/>`_.
-
+`<https://github.com/google/sanitizers/wiki/ThreadSanitizerCppManual>`_
diff --git a/docs/UndefinedBehaviorSanitizer.rst b/docs/UndefinedBehaviorSanitizer.rst
new file mode 100644
index 000000000000..37ff16d9a9e9
--- /dev/null
+++ b/docs/UndefinedBehaviorSanitizer.rst
@@ -0,0 +1,204 @@
+==========================
+UndefinedBehaviorSanitizer
+==========================
+
+.. contents::
+   :local:
+
+Introduction
+============
+
+UndefinedBehaviorSanitizer (UBSan) is a fast undefined behavior detector.
+UBSan modifies the program at compile-time to catch various kinds of undefined
+behavior during program execution, for example:
+
+* Using misaligned or null pointer
+* Signed integer overflow
+* Conversion to, from, or between floating-point types which would
+  overflow the destination
+
+See the full list of available :ref:`checks <ubsan-checks>` below.
+
+UBSan has an optional run-time library which provides better error reporting.
+The checks have small runtime cost and no impact on address space layout or ABI.
+
+How to build
+============
+
+Build LLVM/Clang with `CMake <http://llvm.org/docs/CMake.html>`_.
+
+Usage
+=====
+
+Use ``clang++`` to compile and link your program with ``-fsanitize=undefined``
+flag. Make sure to use ``clang++`` (not ``ld``) as a linker, so that your
+executable is linked with proper UBSan runtime libraries. You can use ``clang``
+instead of ``clang++`` if you're compiling/linking C code.
+
+.. code-block:: console
+
+  % cat test.cc
+  int main(int argc, char **argv) {
+    int k = 0x7fffffff;
+    k += argc;
+    return 0;
+  }
+  % clang++ -fsanitize=undefined test.cc
+  % ./a.out
+  test.cc:3:5: runtime error: signed integer overflow: 2147483647 + 1 cannot be represented in type 'int'
+
+You can enable only a subset of :ref:`checks <ubsan-checks>` offered by UBSan,
+and define the desired behavior for each kind of check:
+
+* print a verbose error report and continue execution (default);
+* print a verbose error report and exit the program;
+* execute a trap instruction (doesn't require UBSan run-time support).
+
+For example if you compile/link your program as:
+
+.. code-block:: console
+
+  % clang++ -fsanitize=signed-integer-overflow,null,alignment -fno-sanitize-recover=null -fsanitize-trap=alignment
+
+the program will continue execution after signed integer overflows, exit after
+the first invalid use of a null pointer, and trap after the first use of misaligned
+pointer.
+
+.. _ubsan-checks:
+
+Availablle checks
+=================
+
+Available checks are:
+
+  -  ``-fsanitize=alignment``: Use of a misaligned pointer or creation
+     of a misaligned reference.
+  -  ``-fsanitize=bool``: Load of a ``bool`` value which is neither
+     ``true`` nor ``false``.
+  -  ``-fsanitize=bounds``: Out of bounds array indexing, in cases
+     where the array bound can be statically determined.
+  -  ``-fsanitize=enum``: Load of a value of an enumerated type which
+     is not in the range of representable values for that enumerated
+     type.
+  -  ``-fsanitize=float-cast-overflow``: Conversion to, from, or
+     between floating-point types which would overflow the
+     destination.
+  -  ``-fsanitize=float-divide-by-zero``: Floating point division by
+     zero.
+  -  ``-fsanitize=function``: Indirect call of a function through a
+     function pointer of the wrong type (Linux, C++ and x86/x86_64 only).
+  -  ``-fsanitize=integer-divide-by-zero``: Integer division by zero.
+  -  ``-fsanitize=nonnull-attribute``: Passing null pointer as a function
+     parameter which is declared to never be null.
+  -  ``-fsanitize=null``: Use of a null pointer or creation of a null
+     reference.
+  -  ``-fsanitize=object-size``: An attempt to use bytes which the
+     optimizer can determine are not part of the object being
+     accessed. The sizes of objects are determined using
+     ``__builtin_object_size``, and consequently may be able to detect
+     more problems at higher optimization levels.
+  -  ``-fsanitize=return``: In C++, reaching the end of a
+     value-returning function without returning a value.
+  -  ``-fsanitize=returns-nonnull-attribute``: Returning null pointer
+     from a function which is declared to never return null.
+  -  ``-fsanitize=shift``: Shift operators where the amount shifted is
+     greater or equal to the promoted bit-width of the left hand side
+     or less than zero, or where the left hand side is negative. For a
+     signed left shift, also checks for signed overflow in C, and for
+     unsigned overflow in C++. You can use ``-fsanitize=shift-base`` or
+     ``-fsanitize=shift-exponent`` to check only left-hand side or
+     right-hand side of shift operation, respectively.
+  -  ``-fsanitize=signed-integer-overflow``: Signed integer overflow,
+     including all the checks added by ``-ftrapv``, and checking for
+     overflow in signed division (``INT_MIN / -1``).
+  -  ``-fsanitize=unreachable``: If control flow reaches
+     ``__builtin_unreachable``.
+  -  ``-fsanitize=unsigned-integer-overflow``: Unsigned integer
+     overflows.
+  -  ``-fsanitize=vla-bound``: A variable-length array whose bound
+     does not evaluate to a positive value.
+  -  ``-fsanitize=vptr``: Use of an object whose vptr indicates that
+     it is of the wrong dynamic type, or that its lifetime has not
+     begun or has ended. Incompatible with ``-fno-rtti``. Link must
+     be performed by ``clang++``, not ``clang``, to make sure C++-specific
+     parts of the runtime library and C++ standard libraries are present.
+
+You can also use the following check groups:
+  -  ``-fsanitize=undefined``: All of the checks listed above other than
+     ``unsigned-integer-overflow``.
+  -  ``-fsanitize=undefined-trap``: Deprecated alias of
+     ``-fsanitize=undefined``.
+  -  ``-fsanitize=integer``: Checks for undefined or suspicious integer
+     behavior (e.g. unsigned integer overflow).
+
+Stack traces and report symbolization
+=====================================
+If you want UBSan to print symbolized stack trace for each error report, you
+will need to:
+
+#. Compile with ``-g`` and ``-fno-omit-frame-pointer`` to get proper debug
+   information in your binary.
+#. Run your program with environment variable
+   ``UBSAN_OPTIONS=print_stacktrace=1``.
+#. Make sure ``llvm-symbolizer`` binary is in ``PATH``.
+
+Issue Suppression
+=================
+
+UndefinedBehaviorSanitizer is not expected to produce false positives.
+If you see one, look again; most likely it is a true positive!
+
+Disabling Instrumentation with ``__attribute__((no_sanitize("undefined")))``
+----------------------------------------------------------------------------
+
+You disable UBSan checks for particular functions with
+``__attribute__((no_sanitize("undefined")))``. You can use all values of
+``-fsanitize=`` flag in this attribute, e.g. if your function deliberately
+contains possible signed integer overflow, you can use
+``__attribute__((no_sanitize("signed-integer-overflow")))``.
+
+This attribute may not be
+supported by other compilers, so consider using it together with
+``#if defined(__clang__)``.
+
+Suppressing Errors in Recompiled Code (Blacklist)
+-------------------------------------------------
+
+UndefinedBehaviorSanitizer supports ``src`` and ``fun`` entity types in
+:doc:`SanitizerSpecialCaseList`, that can be used to suppress error reports
+in the specified source files or functions.
+
+Supported Platforms
+===================
+
+UndefinedBehaviorSanitizer is supported on the following OS:
+
+* Android
+* Linux
+* FreeBSD
+* OS X 10.6 onwards
+
+and for the following architectures:
+
+* i386/x86\_64
+* ARM
+* AArch64
+* PowerPC64
+* MIPS/MIPS64
+
+Current Status
+==============
+
+UndefinedBehaviorSanitizer is available on selected platforms starting from LLVM
+3.3. The test suite is integrated into the CMake build and can be run with
+``check-ubsan`` command.
+
+More Information
+================
+
+* From LLVM project blog:
+  `What Every C Programmer Should Know About Undefined Behavior
+  <http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html>`_
+* From John Regehr's *Embedded in Academia* blog:
+  `A Guide to Undefined Behavior in C and C++
+  <http://blog.regehr.org/archives/213>`_
diff --git a/docs/UsersManual.rst b/docs/UsersManual.rst
index 78d8dcc11a9f..5fe1e371df64 100644
--- a/docs/UsersManual.rst
+++ b/docs/UsersManual.rst
@@ -148,8 +148,8 @@ Formatting of Diagnostics
 
 Clang aims to produce beautiful diagnostics by default, particularly for
 new users that first come to Clang. However, different people have
-different preferences, and sometimes Clang is driven by another program
-that wants to parse simple and consistent output, not a person. For
+different preferences, and sometimes Clang is driven not by a human,
+but by a program that wants consistent and easily parsable output. For
 these cases, Clang provides a wide range of options to control the exact
 output format of the diagnostics that it generates.
 
@@ -952,26 +952,18 @@ are listed below.
       ``-fsanitize=address``:
       :doc:`AddressSanitizer`, a memory error
       detector.
-   -  ``-fsanitize=integer``: Enables checks for undefined or
-      suspicious integer behavior.
    -  .. _opt_fsanitize_thread:
 
       ``-fsanitize=thread``: :doc:`ThreadSanitizer`, a data race detector.
    -  .. _opt_fsanitize_memory:
 
       ``-fsanitize=memory``: :doc:`MemorySanitizer`,
-      an *experimental* detector of uninitialized reads. Not ready for
-      widespread use.
+      a detector of uninitialized reads. Requires instrumentation of all
+      program code.
    -  .. _opt_fsanitize_undefined:
 
-      ``-fsanitize=undefined``: Fast and compatible undefined behavior
-      checker. Enables the undefined behavior checks that have small
-      runtime cost and no impact on address space layout or ABI. This
-      includes all of the checks listed below other than
-      ``unsigned-integer-overflow``.
-
-   -  ``-fsanitize=undefined-trap``: This is a deprecated alias for
-      ``-fsanitize=undefined``.
+      ``-fsanitize=undefined``: :doc:`UndefinedBehaviorSanitizer`,
+      a fast and compatible undefined behavior checker.
 
    -  ``-fsanitize=dataflow``: :doc:`DataFlowSanitizer`, a general data
       flow analysis.
@@ -980,103 +972,17 @@ are listed below.
    -  ``-fsanitize=safe-stack``: :doc:`safe stack <SafeStack>`
       protection against stack-based memory corruption errors.
 
-   The following more fine-grained checks are also available:
-
-   -  ``-fsanitize=alignment``: Use of a misaligned pointer or creation
-      of a misaligned reference.
-   -  ``-fsanitize=bool``: Load of a ``bool`` value which is neither
-      ``true`` nor ``false``.
-   -  ``-fsanitize=bounds``: Out of bounds array indexing, in cases
-      where the array bound can be statically determined.
-   -  ``-fsanitize=cfi-cast-strict``: Enables :ref:`strict cast checks
-      <cfi-strictness>`.
-   -  ``-fsanitize=cfi-derived-cast``: Base-to-derived cast to the wrong
-      dynamic type. Requires ``-flto``.
-   -  ``-fsanitize=cfi-unrelated-cast``: Cast from ``void*`` or another
-      unrelated type to the wrong dynamic type. Requires ``-flto``.
-   -  ``-fsanitize=cfi-nvcall``: Non-virtual call via an object whose vptr is of
-      the wrong dynamic type. Requires ``-flto``.
-   -  ``-fsanitize=cfi-vcall``: Virtual call via an object whose vptr is of the
-      wrong dynamic type. Requires ``-flto``.
-   -  ``-fsanitize=enum``: Load of a value of an enumerated type which
-      is not in the range of representable values for that enumerated
-      type.
-   -  ``-fsanitize=float-cast-overflow``: Conversion to, from, or
-      between floating-point types which would overflow the
-      destination.
-   -  ``-fsanitize=float-divide-by-zero``: Floating point division by
-      zero.
-   -  ``-fsanitize=function``: Indirect call of a function through a
-      function pointer of the wrong type (Linux, C++ and x86/x86_64 only).
-   -  ``-fsanitize=integer-divide-by-zero``: Integer division by zero.
-   -  ``-fsanitize=nonnull-attribute``: Passing null pointer as a function
-      parameter which is declared to never be null.
-   -  ``-fsanitize=null``: Use of a null pointer or creation of a null
-      reference.
-   -  ``-fsanitize=object-size``: An attempt to use bytes which the
-      optimizer can determine are not part of the object being
-      accessed. The sizes of objects are determined using
-      ``__builtin_object_size``, and consequently may be able to detect
-      more problems at higher optimization levels.
-   -  ``-fsanitize=return``: In C++, reaching the end of a
-      value-returning function without returning a value.
-   -  ``-fsanitize=returns-nonnull-attribute``: Returning null pointer
-      from a function which is declared to never return null.
-   -  ``-fsanitize=shift``: Shift operators where the amount shifted is
-      greater or equal to the promoted bit-width of the left hand side
-      or less than zero, or where the left hand side is negative. For a
-      signed left shift, also checks for signed overflow in C, and for
-      unsigned overflow in C++. You can use ``-fsanitize=shift-base`` or
-      ``-fsanitize=shift-exponent`` to check only left-hand side or
-      right-hand side of shift operation, respectively.
-   -  ``-fsanitize=signed-integer-overflow``: Signed integer overflow,
-      including all the checks added by ``-ftrapv``, and checking for
-      overflow in signed division (``INT_MIN / -1``).
-   -  ``-fsanitize=unreachable``: If control flow reaches
-      ``__builtin_unreachable``.
-   -  ``-fsanitize=unsigned-integer-overflow``: Unsigned integer
-      overflows.
-   -  ``-fsanitize=vla-bound``: A variable-length array whose bound
-      does not evaluate to a positive value.
-   -  ``-fsanitize=vptr``: Use of an object whose vptr indicates that
-      it is of the wrong dynamic type, or that its lifetime has not
-      begun or has ended. Incompatible with ``-fno-rtti``.
-
-   You can turn off or modify checks for certain source files, functions
-   or even variables by providing a special file:
-
-   -  ``-fsanitize-blacklist=/path/to/blacklist/file``: disable or modify
-      sanitizer checks for objects listed in the file. See
-      :doc:`SanitizerSpecialCaseList` for file format description.
-   -  ``-fno-sanitize-blacklist``: don't use blacklist file, if it was
-      specified earlier in the command line.
-
-   Extra features of MemorySanitizer (require explicit
-   ``-fsanitize=memory``):
-
-   -  ``-fsanitize-memory-track-origins[=level]``: Enables origin tracking in
-      MemorySanitizer. Adds a second section to MemorySanitizer
-      reports pointing to the heap or stack allocation the
-      uninitialized bits came from. Slows down execution by additional
-      1.5x-2x.
-
-      Possible values for level are 0 (off), 1, 2 (default). Level 2
-      adds more sections to MemorySanitizer reports describing the
-      order of memory stores the uninitialized value went
-      through. This mode may use extra memory in programs that copy
-      uninitialized memory a lot.
+   There are more fine-grained checks available: see
+   the :ref:`list <ubsan-checks>` of specific kinds of
+   undefined behavior that can be detected and the :ref:`list <cfi-schemes>`
+   of control flow integrity schemes.
 
    The ``-fsanitize=`` argument must also be provided when linking, in
-   order to link to the appropriate runtime library. When using
-   ``-fsanitize=vptr`` (or a group that includes it, such as
-   ``-fsanitize=undefined``) with a C++ program, the link must be
-   performed by ``clang++``, not ``clang``, in order to link against the
-   C++-specific parts of the runtime library.
+   order to link to the appropriate runtime library.
 
    It is not possible to combine more than one of the ``-fsanitize=address``,
    ``-fsanitize=thread``, and ``-fsanitize=memory`` checkers in the same
-   program. The ``-fsanitize=undefined`` checks can only be combined with
-   ``-fsanitize=address``.
+   program.
 
 **-f[no-]sanitize-recover=check1,check2,...**
 
@@ -1084,10 +990,12 @@ are listed below.
    If the check is fatal, program will halt after the first error
    of this kind is detected and error report is printed.
 
-   By default, non-fatal checks are those enabled by UndefinedBehaviorSanitizer,
+   By default, non-fatal checks are those enabled by
+   :doc:`UndefinedBehaviorSanitizer`,
    except for ``-fsanitize=return`` and ``-fsanitize=unreachable``. Some
-   sanitizers (e.g. :doc:`AddressSanitizer`) may not support recovery,
-   and always crash the program after the issue is detected.
+   sanitizers may not support recovery (or not support it by default
+   e.g. :doc:`AddressSanitizer`), and always crash the program after the issue
+   is detected.
 
    Note that the ``-fsanitize-trap`` flag has precedence over this flag.
    This means that if a check has been configured to trap elsewhere on the
@@ -1107,14 +1015,24 @@ are listed below.
    be used (for instance, when building libc or a kernel module), or where
    the binary size increase caused by the sanitizer runtime is a concern.
 
-   This flag is only compatible with ``local-bounds``,
-   ``unsigned-integer-overflow``, sanitizers in the ``cfi`` group and
-   sanitizers in the ``undefined`` group other than ``vptr``. If this flag
+   This flag is only compatible with :doc:`control flow integrity
+   <ControlFlowIntegrity>` schemes and :doc:`UndefinedBehaviorSanitizer`
+   checks other than ``vptr``. If this flag
    is supplied together with ``-fsanitize=undefined``, the ``vptr`` sanitizer
    will be implicitly disabled.
 
    This flag is enabled by default for sanitizers in the ``cfi`` group.
 
+.. option:: -fsanitize-blacklist=/path/to/blacklist/file
+
+   Disable or modify sanitizer checks for objects (source files, functions,
+   variables, types) listed in the file. See
+   :doc:`SanitizerSpecialCaseList` for file format description.
+
+.. option:: -fno-sanitize-blacklist
+
+   Don't use blacklist file, if it was specified earlier in the command line.
+
 **-f[no-]sanitize-coverage=[type,features,...]**
 
    Enable simple code coverage in addition to certain sanitizers.
@@ -1124,6 +1042,12 @@ are listed below.
 
    Deprecated alias for ``-fsanitize-trap=undefined``.
 
+.. option:: -fsanitize-cfi-cross-dso
+
+   Enable cross-DSO control flow integrity checks. This flag modifies
+   the behavior of sanitizers in the ``cfi`` group to allow checking
+   of cross-DSO virtual and indirect calls.
+
 .. option:: -fno-assume-sane-operator-new
 
    Don't assume that the C++'s new operator is sane.
@@ -1157,6 +1081,13 @@ are listed below.
    efficient model can be used. The TLS model can be overridden per
    variable using the ``tls_model`` attribute.
 
+.. option:: -femulated-tls
+
+   Select emulated TLS model, which overrides all -ftls-model choices.
+
+   In emulated TLS mode, all access to TLS variables are converted to
+   calls to __emutls_get_address in the runtime library.
+
 .. option:: -mhwdiv=[values]
 
    Select the ARM modes (arm or thumb) that support hardware division
@@ -1183,7 +1114,7 @@ are listed below.
    This option restricts the generated code to use general registers
    only. This only applies to the AArch64 architecture.
 
-**-f[no-]max-unknown-pointer-align=[number]**
+**-f[no-]max-type-align=[number]**
    Instruct the code generator to not enforce a higher alignment than the given
    number (of bytes) when accessing memory via an opaque pointer or reference.
    This cap is ignored when directly accessing a variable or when the pointee
@@ -1211,7 +1142,7 @@ are listed below.
 
       void initialize_vector(__aligned_v16si *v) {
         // The compiler may assume that ‘v’ is 64-byte aligned, regardless of the
-        // value of -fmax-unknown-pointer-align.
+        // value of -fmax-type-align.
       }
 
 
@@ -1338,15 +1269,18 @@ read by the backend. LLVM supports three different sample profile formats:
 
 1. ASCII text. This is the easiest one to generate. The file is divided into
    sections, which correspond to each of the functions with profile
-   information. The format is described below.
+   information. The format is described below. It can also be generated from
+   the binary or gcov formats using the ``llvm-profdata`` tool.
 
 2. Binary encoding. This uses a more efficient encoding that yields smaller
-   profile files, which may be useful when generating large profiles. It can be
-   generated from the text format using the ``llvm-profdata`` tool.
+   profile files. This is the format generated by the ``create_llvm_prof`` tool
+   in http://github.com/google/autofdo.
 
 3. GCC encoding. This is based on the gcov format, which is accepted by GCC. It
-   is only interesting in environments where GCC and Clang co-exist. Similarly
-   to the binary encoding, it can be generated using the ``llvm-profdata`` tool.
+   is only interesting in environments where GCC and Clang co-exist. This
+   encoding is only generated by the ``create_gcov`` tool in
+   http://github.com/google/autofdo. It can be read by LLVM and
+   ``llvm-profdata``, but it cannot be generated by either.
 
 If you are using Linux Perf to generate sampling profiles, you can use the
 conversion tool ``create_llvm_prof`` described in the previous section.
@@ -1360,19 +1294,32 @@ Sample Profile Text Format
 This section describes the ASCII text format for sampling profiles. It is,
 arguably, the easiest one to generate. If you are interested in generating any
 of the other two, consult the ``ProfileData`` library in in LLVM's source tree
-(specifically, ``llvm/lib/ProfileData/SampleProfWriter.cpp``).
+(specifically, ``include/llvm/ProfileData/SampleProfReader.h``).
 
 .. code-block:: console
 
     function1:total_samples:total_head_samples
-    offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
-    offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
-    ...
-    offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+     offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ]
+     offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ]
+     ...
+     offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]
+     offsetA[.discriminator]: fnA:num_of_total_samples
+      offsetA1[.discriminator]: number_of_samples [fn7:num fn8:num ... ]
+      offsetA1[.discriminator]: number_of_samples [fn9:num fn10:num ... ]
+      offsetB[.discriminator]: fnB:num_of_total_samples
+       offsetB1[.discriminator]: number_of_samples [fn11:num fn12:num ... ]
 
-The file may contain blank lines between sections and within a
-section. However, the spacing within a single line is fixed. Additional
-spaces will result in an error while reading the file.
+This is a nested tree in which the identation represents the nesting level
+of the inline stack. There are no blank lines in the file. And the spacing
+within a single line is fixed. Additional spaces will result in an error
+while reading the file.
+
+Any line starting with the '#' character is completely ignored.
+
+Inlined calls are represented with indentation. The Inline stack is a
+stack of source locations in which the top of the stack represents the
+leaf function, and the bottom of the stack represents the actual
+symbol to which the instruction belongs.
 
 Function names must be mangled in order for the profile loader to
 match them in the current translation unit. The two numbers in the
@@ -1381,6 +1328,14 @@ function (first number), and the total number of samples accumulated
 in the prologue of the function (second number). This head sample
 count provides an indicator of how frequently the function is invoked.
 
+There are two types of lines in the function body.
+
+-  Sampled line represents the profile information of a source location.
+   ``offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ]``
+
+-  Callsite line represents the profile information of an inlined callsite.
+   ``offsetA[.discriminator]: fnA:num_of_total_samples``
+
 Each sampled line may contain several items. Some are optional (marked
 below):
 
@@ -1434,6 +1389,24 @@ d. [OPTIONAL] Potential call targets and samples. If present, this
    instruction that calls one of ``foo()``, ``bar()`` and ``baz()``,
    with ``baz()`` being the relatively more frequently called target.
 
+As an example, consider a program with the call chain ``main -> foo -> bar``.
+When built with optimizations enabled, the compiler may inline the
+calls to ``bar`` and ``foo`` inside ``main``. The generated profile
+could then be something like this:
+
+.. code-block:: console
+
+    main:35504:0
+    1: _Z3foov:35504
+      2: _Z32bari:31977
+      1.1: 31977
+    2: 0
+
+This profile indicates that there were a total of 35,504 samples
+collected in main. All of those were at line 1 (the call to ``foo``).
+Of those, 31,977 were spent inside the body of ``bar``. The last line
+of the profile (``2: 0``) corresponds to line 2 inside ``main``. No
+samples were collected there.
 
 Profiling with Instrumentation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1527,9 +1500,25 @@ with respect to profile creation and use.
   profile file, it reads from that file. If ``pathname`` is a directory name,
   it reads from ``pathname/default.profdata``.
 
+Disabling Instrumentation
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In certain situations, it may be useful to disable profile generation or use
+for specific files in a build, without affecting the main compilation flags
+used for the other files in the project.
+
+In these cases, you can use the flag ``-fno-profile-instr-generate`` (or
+``-fno-profile-generate``) to disable profile generation, and
+``-fno-profile-instr-use`` (or ``-fno-profile-use``) to disable profile use.
+
+Note that these flags should appear after the corresponding profile
+flags to have an effect.
+
+Controlling Debug Information
+-----------------------------
 
 Controlling Size of Debug Information
--------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Debug info kind generated by Clang can be set by one of the flags listed
 below. If multiple flags are present, the last one is used.
@@ -1573,6 +1562,21 @@ below. If multiple flags are present, the last one is used.
 
   Generate complete debug info.
 
+Controlling Debugger "Tuning"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+While Clang generally emits standard DWARF debug info (http://dwarfstd.org),
+different debuggers may know how to take advantage of different specific DWARF
+features. You can "tune" the debug info for one of several different debuggers.
+
+.. option:: -ggdb, -glldb, -gsce
+
+  Tune the debug info for the ``gdb``, ``lldb``, or Sony Computer Entertainment
+  debugger, respectively. Each of these options implies **-g**. (Therefore, if
+  you want both **-gline-tables-only** and debugger tuning, the tuning option
+  must come first.)
+
+
 Comment Parsing Options
 -----------------------
 
@@ -1832,6 +1836,32 @@ Objective-C Language Features
 Objective-C++ Language Features
 ===============================
 
+.. _openmp:
+
+OpenMP Features
+===============
+
+Clang supports all OpenMP 3.1 directives and clauses.  In addition, some
+features of OpenMP 4.0 are supported.  For example, ``#pragma omp simd``,
+``#pragma omp for simd``, ``#pragma omp parallel for simd`` directives, extended
+set of atomic constructs, ``proc_bind`` clause for all parallel-based
+directives, ``depend`` clause for ``#pragma omp task`` directive (except for
+array sections), ``#pragma omp cancel`` and ``#pragma omp cancellation point``
+directives, and ``#pragma omp taskgroup`` directive.
+
+Use :option:`-fopenmp` to enable OpenMP. Support for OpenMP can be disabled with
+:option:`-fno-openmp`.
+
+Controlling implementation limits
+---------------------------------
+
+.. option:: -fopenmp-use-tls
+
+ Controls code generation for OpenMP threadprivate variables. In presence of
+ this option all threadprivate variables are generated the same way as thread
+ local variables, using TLS support. If :option:`-fno-openmp-use-tls`
+ is provided or target does not support TLS, code generation for threadprivate
+ variables relies on OpenMP runtime library.
 
 .. _target_features:
 
@@ -2019,11 +2049,11 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /FI <value>            Include file before parsing
       /Fi<file>              Set preprocess output file name (with /P)
       /Fo<file or directory> Set output object file, or directory (ends in / or \) (with /c)
-      /fp:except-            
-      /fp:except             
-      /fp:fast               
-      /fp:precise            
-      /fp:strict             
+      /fp:except-
+      /fp:except
+      /fp:fast
+      /fp:precise
+      /fp:strict
       /GA                    Assume thread-local variables are defined in the executable
       /GF-                   Disable string pooling
       /GR-                   Disable emission of RTTI data
@@ -2049,10 +2079,9 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /Oi                    Enable use of builtin functions
       /Os                    Optimize for size
       /Ot                    Optimize for speed
-      /Ox                    Maximum optimization
       /Oy-                   Disable frame pointer omission
       /Oy                    Enable frame pointer omission
-      /O<n>                  Optimization level
+      /O<value>              Optimization level
       /o <file or directory> Set output file or directory (ends in / or \)
       /P                     Preprocess to file
       /Qvec-                 Disable the loop vectorization passes
@@ -2075,11 +2104,12 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /W1                    Enable -Wall
       /W2                    Enable -Wall
       /W3                    Enable -Wall
-      /W4                    Enable -Wall
+      /W4                    Enable -Wall and -Wextra
       /Wall                  Enable -Wall
       /WX-                   Do not treat warnings as errors
       /WX                    Treat warnings as errors
       /w                     Disable all warnings
+      /Z7                    Enable CodeView debug information in object files
       /Zc:sizedDealloc-      Disable C++14 sized global deallocation functions
       /Zc:sizedDealloc       Enable C++14 sized global deallocation functions
       /Zc:strictStrings      Treat string literals as const
@@ -2087,7 +2117,8 @@ Execute ``clang-cl /?`` to see a list of supported options:
       /Zc:threadSafeInit     Enable thread-safe initialization of static variables
       /Zc:trigraphs-         Disable trigraphs (default)
       /Zc:trigraphs          Enable trigraphs
-      /Zi                    Enable debug information
+      /Zi                    Alias for /Z7. Does not produce PDBs.
+      /Zl                    Don't mention any default libraries in the object file
       /Zp                    Set the default maximum struct packing alignment to 1
       /Zp<value>             Specify the default maximum struct packing alignment
       /Zs                    Syntax-check only
@@ -2119,6 +2150,7 @@ Execute ``clang-cl /?`` to see a list of supported options:
       -fsanitize-trap=<value> Enable trapping for specified sanitizers
       -fsanitize=<check>      Turn on runtime checks for various forms of undefined or suspicious
                               behavior. See user manual for available checks
+      -gcodeview              Generate CodeView debug information
       -mllvm <value>          Additional arguments to forward to LLVM's option processing
       -Qunused-arguments      Don't emit warning for unused driver arguments
       -R<remark>              Enable the specified remark
diff --git a/docs/analyzer/DebugChecks.rst b/docs/analyzer/DebugChecks.rst
index 14d6ae4c4c91..771e39fc4395 100644
--- a/docs/analyzer/DebugChecks.rst
+++ b/docs/analyzer/DebugChecks.rst
@@ -138,6 +138,29 @@ ExprInspection checks
       clang_analyzer_warnIfReached();  // no-warning
     }
 
+- void clang_analyzer_warnOnDeadSymbol(int);
+
+  Subscribe for a delayed warning when the symbol that represents the value of
+  the argument is garbage-collected by the analyzer.
+
+  When calling 'clang_analyzer_warnOnDeadSymbol(x)', if value of 'x' is a
+  symbol, then this symbol is marked by the ExprInspection checker. Then,
+  during each garbage collection run, the checker sees if the marked symbol is
+  being collected and issues the 'SYMBOL DEAD' warning if it does.
+  This way you know where exactly, up to the line of code, the symbol dies.
+
+  It is unlikely that you call this function after the symbol is already dead,
+  because the very reference to it as the function argument prevents it from
+  dying. However, if the argument is not a symbol but a concrete value,
+  no warning would be issued.
+
+  Example usage::
+
+    do {
+      int x = generate_some_integer();
+      clang_analyzer_warnOnDeadSymbol(x);
+    } while(0);  // expected-warning{{SYMBOL DEAD}}
+
 
 Statistics
 ==========
diff --git a/docs/analyzer/nullability.rst b/docs/analyzer/nullability.rst
new file mode 100644
index 000000000000..93909d0f25de
--- /dev/null
+++ b/docs/analyzer/nullability.rst
@@ -0,0 +1,92 @@
+============
+Nullability Checks
+============
+
+This document is a high level description of the nullablility checks.
+These checks intended to use the annotations that is described in this
+RFC: http://lists.cs.uiuc.edu/pipermail/cfe-dev/2015-March/041798.html.
+
+Let's consider the following 2 categories:
+
+1) nullable
+============
+
+If a pointer 'p' has a nullable annotation and no explicit null check or assert, we should warn in the following cases:
+- 'p' gets implicitly converted into nonnull pointer, for example, we are passing it to a function that takes a nonnull parameter.
+- 'p' gets dereferenced
+
+Taking a branch on nullable pointers are the same like taking branch on null unspecified pointers.
+
+Explicit cast from nullable to nonnul::
+
+    __nullable id foo;
+    id bar = foo;
+    takesNonNull((_nonnull) bar); <— should not warn here (backward compatibility hack)
+    anotherTakesNonNull(bar); <— would be great to warn here, but not necessary(*)
+
+Because bar corresponds to the same symbol all the time it is not easy to implement the checker that way the cast only suppress the first call but not the second. For this reason in the first implementation after a contradictory cast happens, I will treat bar as nullable unspecified, this way all of the warnings will be suppressed. Treating the symbol as nullable unspecified also has an advantage that in case the takesNonNull function body is being inlined, the will be no warning, when the symbol is dereferenced. In case I have time after the initial version I might spend additional time to try to find a more sophisticated solution, in which we would produce the second warning (*).
+ 
+2) nonnull
+============
+
+- Dereferencing a nonnull, or sending message to it is ok.
+- Converting nonnull to nullable is Ok.
+- When there is an explicit cast from nonnull to nullable I will trust the cast (it is probable there for a reason, because this cast does not suppress any warnings or errors).
+- But what should we do about null checks?::
+
+    __nonnull id takesNonnull(__nonnull id x) {
+        if (x == nil) {
+            // Defensive backward compatible code:
+            ....
+            return nil; <- Should the analyzer cover this piece of code? Should we require the cast (__nonnull)nil?
+        }
+        ....
+    }
+
+There are these directions:
+- We can either take the branch; this way the branch is analyzed
+	- Should we not warn about any nullability issues in that branch? Probably not, it is ok to break the nullability postconditions when the nullability preconditions are violated.
+- We can assume that these pointers are not null and we lose coverage with the analyzer. (This can be implemented either in constraint solver or in the checker itself.)
+
+Other Issues to keep in mind/take care of:
+Messaging:
+- Sending a message to a nullable pointer
+	- Even though the method might return a nonnull pointer, when it was sent to a nullable pointer the return type will be nullable.
+	- The result is nullable unless the receiver is known to be non null.
+- Sending a message to a unspecified or nonnull pointer
+	- If the pointer is not assumed to be nil, we should be optimistic and use the nullability implied by the method.
+        - This will not happen automatically, since the AST will have null unspecified in this case.
+
+Inlining
+============
+
+A symbol may need to be treated differently inside an inlined body. For example, consider these conversions from nonnull to nullable in presence of inlining::
+
+    id obj = getNonnull();
+    takesNullable(obj);
+    takesNonnull(obj);
+    
+    void takesNullable(nullable id obj) {
+       obj->ivar // we should assume obj is nullable and warn here
+    }
+           
+With no special treatment, when the takesNullable is inlined the analyzer will not warn when the obj symbol is dereferenced. One solution for this is to reanalyze takesNullable as a top level function to get possible violations. The alternative method, deducing nullability information from the arguments after inlining is not robust enough (for example there might be more parameters with different nullability, but in the given path the two parameters might end up being the same symbol or there can be nested functions that take different view of the nullability of the same symbol). So the symbol will remain nonnull to avoid false positives but the functions that takes nullable parameters will be analyzed separately as well without inlining.
+
+Annotations on multi level pointers
+============
+
+Tracking multiple levels of annotations for pointers pointing to pointers would make the checker more complicated, because this way a vector of nullability qualifiers would be needed to be tracked for each symbol. This is not a big caveat, since once the top level pointer is dereferenced, the symvol for the inner pointer will have the nullability information. The lack of multi level annotation tracking only observable, when multiple levels of pointers are passed to a function which has a parameter with multiple levels of annotations. So for now the checker support the top level nullability qualifiers only.::
+
+    int * __nonnull * __nullable p;
+    int ** q = p;
+    takesStarNullableStarNullable(q);
+
+Implementation notes
+============
+
+What to track?
+- The checker would track memory regions, and to each relevant region a qualifier information would be attached which is either nullable, nonnull or null unspecified (or contradicted to suppress warnings for a specific region).
+- On a branch, where a nullable pointer is known to be non null, the checker treat it as a same way as a pointer annotated as nonnull.
+- When there is an explicit cast from a null unspecified to either nonnull or nullable I will trust the cast.
+- Unannotated pointers are treated the same way as pointers annotated with nullability unspecified qualifier, unless the region is wrapped in ASSUME_NONNULL macros.
+- We might want to implement a callback for entry points to top level functions, where the pointer nullability assumptions would be made.
diff --git a/docs/conf.py b/docs/conf.py
index b7ed23ac66d2..6d53d94aa318 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -12,6 +12,7 @@
 # serve to show the default.
 
 import sys, os
+from datetime import date
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -41,16 +42,16 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'Clang'
-copyright = u'2007-2015, The Clang Team'
+copyright = u'2007-%d, The Clang Team' % date.today().year
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
-version = '3.7'
+version = '3.8'
 # The full version, including alpha/beta/rc tags.
-release = '3.7'
+release = '3.8'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/doxygen.cfg.in b/docs/doxygen.cfg.in
index b4cfbbdf7f42..f6c7cba6d39a 100644
--- a/docs/doxygen.cfg.in
+++ b/docs/doxygen.cfg.in
@@ -169,7 +169,7 @@ SHORT_NAMES            = NO
 # description.)
 # The default value is: NO.
 
-JAVADOC_AUTOBRIEF      = NO
+JAVADOC_AUTOBRIEF      = YES
 
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
@@ -177,7 +177,7 @@ JAVADOC_AUTOBRIEF      = NO
 # requiring an explicit \brief command for a brief description.)
 # The default value is: NO.
 
-QT_AUTOBRIEF           = NO
+QT_AUTOBRIEF           = YES
 
 # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
 # multi-line C++ special comment block (i.e. a block of //! or /// comments) as
diff --git a/docs/index.rst b/docs/index.rst
index d50667d56691..a0a70c0e61e5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -24,6 +24,7 @@ Using Clang as a Compiler
    AddressSanitizer
    ThreadSanitizer
    MemorySanitizer
+   UndefinedBehaviorSanitizer
    DataFlowSanitizer
    LeakSanitizer
    SanitizerCoverage
diff --git a/docs/tools/dump_ast_matchers.py b/docs/tools/dump_ast_matchers.py
index 1e1fd3c9d69d..9ecff049c963 100644
--- a/docs/tools/dump_ast_matchers.py
+++ b/docs/tools/dump_ast_matchers.py
@@ -166,7 +166,7 @@ def act_on_decl(declaration, comment, allowed_types):
                        \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\)
                      \)\s*;\s*$""", declaration, flags=re.X)
     if m:
-      loc, name, n_results, results = m.groups()[0:4]
+      loc, name, results = m.groups()[0:3]
       result_types = [r.strip() for r in results.split(',')]
 
       comment_result_types = extract_result_types(comment)
@@ -191,8 +191,8 @@ def act_on_decl(declaration, comment, allowed_types):
                       \)\s*{\s*$""", declaration, flags=re.X)
 
     if m:
-      p, n, name, n_results, results = m.groups()[0:5]
-      args = m.groups()[5:]
+      p, n, name, results = m.groups()[0:4]
+      args = m.groups()[4:]
       result_types = [r.strip() for r in results.split(',')]
       if allowed_types and allowed_types != result_types:
         raise Exception('Inconsistent documentation for: %s' % name)
@@ -364,6 +364,6 @@ reference = re.sub(r'<!-- START_NARROWING_MATCHERS.*END_NARROWING_MATCHERS -->',
 reference = re.sub(r'<!-- START_TRAVERSAL_MATCHERS.*END_TRAVERSAL_MATCHERS -->',
                    '%s', reference, flags=re.S) % traversal_matcher_table
 
-with open('../LibASTMatchersReference.html', 'w') as output:
+with open('../LibASTMatchersReference.html', 'wb') as output:
   output.write(reference)
 
diff --git a/docs/tools/dump_format_style.py b/docs/tools/dump_format_style.py
index fdf03c6244ce..b61d2017d05f 100644
--- a/docs/tools/dump_format_style.py
+++ b/docs/tools/dump_format_style.py
@@ -36,14 +36,35 @@ class Option:
     self.type = type
     self.comment = comment.strip()
     self.enum = None
+    self.nested_struct = None
 
   def __str__(self):
     s = '**%s** (``%s``)\n%s' % (self.name, self.type,
                                  doxygen2rst(indent(self.comment, 2)))
     if self.enum:
       s += indent('\n\nPossible values:\n\n%s\n' % self.enum, 2)
+    if self.nested_struct:
+      s += indent('\n\nNested configuration flags:\n\n%s\n' %self.nested_struct,
+                  2)
     return s
 
+class NestedStruct:
+  def __init__(self, name, comment):
+    self.name = name
+    self.comment = comment.strip()
+    self.values = []
+
+  def __str__(self):
+    return '\n'.join(map(str, self.values))
+
+class NestedField:
+  def __init__(self, name, comment):
+    self.name = name
+    self.comment = comment.strip()
+
+  def __str__(self):
+    return '* ``%s`` %s' % (self.name, doxygen2rst(self.comment))
+
 class Enum:
   def __init__(self, name, comment):
     self.name = name
@@ -65,18 +86,24 @@ class EnumValue:
         doxygen2rst(indent(self.comment, 2)))
 
 def clean_comment_line(line):
-  return line[3:].strip() + '\n'
+  if line == '/// \\code':
+    return '\n.. code-block:: c++\n\n'
+  if line == '/// \\endcode':
+    return ''
+  return line[4:] + '\n'
 
 def read_options(header):
   class State:
-    BeforeStruct, Finished, InStruct, InFieldComment, InEnum, \
-    InEnumMemberComment = range(6)
+    BeforeStruct, Finished, InStruct, InNestedStruct, InNestedFieldComent, \
+    InFieldComment, InEnum, InEnumMemberComment = range(8)
   state = State.BeforeStruct
 
   options = []
   enums = {}
+  nested_structs = {}
   comment = ''
   enum = None
+  nested_struct = None
 
   for line in header:
     line = line.strip()
@@ -97,13 +124,31 @@ def read_options(header):
         state = State.InEnum
         name = re.sub(r'enum\s+(\w+)\s*\{', '\\1', line)
         enum = Enum(name, comment)
+      elif line.startswith('struct'):
+        state = State.InNestedStruct
+        name = re.sub(r'struct\s+(\w+)\s*\{', '\\1', line)
+        nested_struct = NestedStruct(name, comment)
       elif line.endswith(';'):
         state = State.InStruct
-        field_type, field_name = re.match(r'([<>:\w]+)\s+(\w+);', line).groups()
+        field_type, field_name = re.match(r'([<>:\w(,\s)]+)\s+(\w+);',
+                                          line).groups()
         option = Option(str(field_name), str(field_type), comment)
         options.append(option)
       else:
         raise Exception('Invalid format, expected comment, field or enum')
+    elif state == State.InNestedStruct:
+      if line.startswith('///'):
+        state = State.InNestedFieldComent
+        comment = clean_comment_line(line)
+      elif line == '};':
+        state = State.InStruct
+        nested_structs[nested_struct.name] = nested_struct
+    elif state == State.InNestedFieldComent:
+      if line.startswith('///'):
+        comment += clean_comment_line(line)
+      else:
+        state = State.InNestedStruct
+        nested_struct.values.append(NestedField(line.replace(';', ''), comment))
     elif state == State.InEnum:
       if line.startswith('///'):
         state = State.InEnumMemberComment
@@ -124,9 +169,12 @@ def read_options(header):
 
   for option in options:
     if not option.type in ['bool', 'unsigned', 'int', 'std::string',
-                           'std::vector<std::string>']:
+                           'std::vector<std::string>',
+                           'std::vector<IncludeCategory>']:
       if enums.has_key(option.type):
         option.enum = enums[option.type]
+      elif nested_structs.has_key(option.type):
+        option.nested_struct = nested_structs[option.type];
       else:
         raise Exception('Unknown type: %s' % option.type)
   return options
@@ -140,6 +188,6 @@ contents = open(DOC_FILE).read()
 
 contents = substitute(contents, 'FORMAT_STYLE_OPTIONS', options_text)
 
-with open(DOC_FILE, 'w') as output:
+with open(DOC_FILE, 'wb') as output:
   output.write(contents)
 
diff --git a/examples/analyzer-plugin/MainCallChecker.cpp b/examples/analyzer-plugin/MainCallChecker.cpp
index a6f69fd175bc..7f08760e3d4c 100644
--- a/examples/analyzer-plugin/MainCallChecker.cpp
+++ b/examples/analyzer-plugin/MainCallChecker.cpp
@@ -30,7 +30,7 @@ void MainCallChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const
     return;
 
   if (II->isStr("main")) {
-    ExplodedNode *N = C.generateSink();
+    ExplodedNode *N = C.generateErrorNode();
     if (!N)
       return;
 
diff --git a/include/clang-c/CXCompilationDatabase.h b/include/clang-c/CXCompilationDatabase.h
index 068a677a95ed..9359abfebfe0 100644
--- a/include/clang-c/CXCompilationDatabase.h
+++ b/include/clang-c/CXCompilationDatabase.h
@@ -125,6 +125,12 @@ clang_CompileCommands_getCommand(CXCompileCommands, unsigned I);
 CINDEX_LINKAGE CXString
 clang_CompileCommand_getDirectory(CXCompileCommand);
 
+/**
+ * \brief Get the filename associated with the CompileCommand.
+ */
+CINDEX_LINKAGE CXString
+clang_CompileCommand_getFilename(CXCompileCommand);
+
 /**
  * \brief Get the number of arguments in the compiler invocation.
  *
diff --git a/include/clang-c/CXString.h b/include/clang-c/CXString.h
index a649cdf82fc7..68ab7bc5244c 100644
--- a/include/clang-c/CXString.h
+++ b/include/clang-c/CXString.h
@@ -40,6 +40,11 @@ typedef struct {
   unsigned private_flags;
 } CXString;
 
+typedef struct {
+  CXString *Strings;
+  unsigned Count;
+} CXStringSet;
+
 /**
  * \brief Retrieve the character data associated with the given string.
  */
@@ -50,6 +55,11 @@ CINDEX_LINKAGE const char *clang_getCString(CXString string);
  */
 CINDEX_LINKAGE void clang_disposeString(CXString string);
 
+/**
+ * \brief Free the given string set.
+ */
+CINDEX_LINKAGE void clang_disposeStringSet(CXStringSet *set);
+
 /**
  * @}
  */
diff --git a/include/clang-c/Index.h b/include/clang-c/Index.h
index fad9cfa61b3c..09e216082630 100644
--- a/include/clang-c/Index.h
+++ b/include/clang-c/Index.h
@@ -32,7 +32,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 30
+#define CINDEX_VERSION_MINOR 32
 
 #define CINDEX_VERSION_ENCODE(major, minor) ( \
       ((major) * 10000)                       \
@@ -285,7 +285,6 @@ CINDEX_LINKAGE unsigned clang_CXIndex_getGlobalOptions(CXIndex);
  */
 typedef void *CXFile;
 
-
 /**
  * \brief Retrieve the complete file and path name of the given file.
  */
@@ -705,7 +704,6 @@ CINDEX_LINKAGE unsigned clang_getNumDiagnosticsInSet(CXDiagnosticSet Diags);
 CINDEX_LINKAGE CXDiagnostic clang_getDiagnosticInSet(CXDiagnosticSet Diags,
                                                      unsigned Index);  
 
-
 /**
  * \brief Describes the kind of error that occurred (if any) in a call to
  * \c clang_loadDiagnostics.
@@ -1202,7 +1200,15 @@ enum CXTranslationUnit_Flags {
    * included into the set of code completions returned from this translation
    * unit.
    */
-  CXTranslationUnit_IncludeBriefCommentsInCodeCompletion = 0x80
+  CXTranslationUnit_IncludeBriefCommentsInCodeCompletion = 0x80,
+
+  /**
+   * \brief Used to indicate that the precompiled preamble should be created on
+   * the first parse. Otherwise it will be created on the first reparse. This
+   * trades runtime on the first parse (serializing the preamble takes time) for
+   * reduced runtime on the second parse (can now reuse the preamble).
+   */
+  CXTranslationUnit_CreatePreambleOnFirstParse = 0x100
 };
 
 /**
@@ -1288,6 +1294,17 @@ clang_parseTranslationUnit2(CXIndex CIdx,
                             unsigned options,
                             CXTranslationUnit *out_TU);
 
+/**
+ * \brief Same as clang_parseTranslationUnit2 but requires a full command line
+ * for \c command_line_args including argv[0]. This is useful if the standard
+ * library paths are relative to the binary.
+ */
+CINDEX_LINKAGE enum CXErrorCode clang_parseTranslationUnit2FullArgv(
+    CXIndex CIdx, const char *source_filename,
+    const char *const *command_line_args, int num_command_line_args,
+    struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files,
+    unsigned options, CXTranslationUnit *out_TU);
+
 /**
  * \brief Flags that control how translation units are saved.
  *
@@ -1573,7 +1590,7 @@ enum CXCursorKind {
   CXCursor_ObjCImplementationDecl        = 18,
   /** \brief An Objective-C \@implementation for a category. */
   CXCursor_ObjCCategoryImplDecl          = 19,
-  /** \brief A typedef */
+  /** \brief A typedef. */
   CXCursor_TypedefDecl                   = 20,
   /** \brief A C++ class method. */
   CXCursor_CXXMethod                     = 21,
@@ -1982,7 +1999,11 @@ enum CXCursorKind {
    */
   CXCursor_ObjCSelfExpr                  = 146,
 
-  CXCursor_LastExpr                      = CXCursor_ObjCSelfExpr,
+  /** \brief OpenMP 4.0 [2.4, Array Section].
+   */
+  CXCursor_OMPArraySectionExpr           = 147,
+
+  CXCursor_LastExpr                      = CXCursor_OMPArraySectionExpr,
 
   /* Statements */
   CXCursor_FirstStmt                     = 200,
@@ -2227,17 +2248,33 @@ enum CXCursorKind {
 
   /** \brief OpenMP taskgroup directive.
    */
-  CXCursor_OMPTaskgroupDirective          = 254,
+  CXCursor_OMPTaskgroupDirective         = 254,
 
   /** \brief OpenMP cancellation point directive.
    */
-  CXCursor_OMPCancellationPointDirective  = 255,
+  CXCursor_OMPCancellationPointDirective = 255,
 
   /** \brief OpenMP cancel directive.
    */
-  CXCursor_OMPCancelDirective             = 256,
+  CXCursor_OMPCancelDirective            = 256,
 
-  CXCursor_LastStmt                    = CXCursor_OMPCancelDirective,
+  /** \brief OpenMP target data directive.
+   */
+  CXCursor_OMPTargetDataDirective        = 257,
+
+  /** \brief OpenMP taskloop directive.
+   */
+  CXCursor_OMPTaskLoopDirective          = 258,
+
+  /** \brief OpenMP taskloop simd directive.
+   */
+  CXCursor_OMPTaskLoopSimdDirective      = 259,
+
+   /** \brief OpenMP distribute directive.
+   */
+  CXCursor_OMPDistributeDirective        = 260,
+
+  CXCursor_LastStmt                      = CXCursor_OMPDistributeDirective,
 
   /**
    * \brief Cursor that represents the translation unit itself.
@@ -2271,7 +2308,10 @@ enum CXCursorKind {
   CXCursor_CUDAGlobalAttr                = 414,
   CXCursor_CUDAHostAttr                  = 415,
   CXCursor_CUDASharedAttr                = 416,
-  CXCursor_LastAttr                      = CXCursor_CUDASharedAttr,
+  CXCursor_VisibilityAttr                = 417,
+  CXCursor_DLLExport                     = 418,
+  CXCursor_DLLImport                     = 419,
+  CXCursor_LastAttr                      = CXCursor_DLLImport,
 
   /* Preprocessing */
   CXCursor_PreprocessingDirective        = 500,
@@ -2287,8 +2327,9 @@ enum CXCursorKind {
    * \brief A module import declaration.
    */
   CXCursor_ModuleImportDecl              = 600,
+  CXCursor_TypeAliasTemplateDecl         = 601,
   CXCursor_FirstExtraDecl                = CXCursor_ModuleImportDecl,
-  CXCursor_LastExtraDecl                 = CXCursor_ModuleImportDecl,
+  CXCursor_LastExtraDecl                 = CXCursor_TypeAliasTemplateDecl,
 
   /**
    * \brief A code completion overload candidate.
@@ -2439,6 +2480,32 @@ enum CXLinkageKind {
  */
 CINDEX_LINKAGE enum CXLinkageKind clang_getCursorLinkage(CXCursor cursor);
 
+enum CXVisibilityKind {
+  /** \brief This value indicates that no visibility information is available
+   * for a provided CXCursor. */
+  CXVisibility_Invalid,
+
+  /** \brief Symbol not seen by the linker. */
+  CXVisibility_Hidden,
+  /** \brief Symbol seen by the linker but resolves to a symbol inside this object. */
+  CXVisibility_Protected,
+  /** \brief Symbol seen by the linker and acts like a normal symbol. */
+  CXVisibility_Default
+};
+
+/**
+ * \brief Describe the visibility of the entity referred to by a cursor.
+ *
+ * This returns the default visibility if not explicitly specified by
+ * a visibility attribute. The default visibility may be changed by
+ * commandline arguments.
+ *
+ * \param cursor The cursor to query.
+ *
+ * \returns The visibility of the cursor.
+ */
+CINDEX_LINKAGE enum CXVisibilityKind clang_getCursorVisibility(CXCursor cursor);
+
 /**
  * \brief Determine the availability of the entity that this cursor refers to,
  * taking the current target platform into account.
@@ -2558,7 +2625,6 @@ CINDEX_LINKAGE enum CXLanguageKind clang_getCursorLanguage(CXCursor cursor);
  */
 CINDEX_LINKAGE CXTranslationUnit clang_Cursor_getTranslationUnit(CXCursor);
 
-
 /**
  * \brief A fast container representing a set of CXCursors.
  */
@@ -2851,7 +2917,8 @@ enum CXTypeKind {
   CXType_IncompleteArray = 114,
   CXType_VariableArray = 115,
   CXType_DependentSizedArray = 116,
-  CXType_MemberPointer = 117
+  CXType_MemberPointer = 117,
+  CXType_Auto = 118
 };
 
 /**
@@ -2876,7 +2943,6 @@ enum CXCallingConv {
   CXCallingConv_Unexposed = 200
 };
 
-
 /**
  * \brief The type of an element in the abstract syntax tree.
  *
@@ -3314,7 +3380,6 @@ CINDEX_LINKAGE long long clang_Cursor_getOffsetOfField(CXCursor C);
  */
 CINDEX_LINKAGE unsigned clang_Cursor_isAnonymous(CXCursor C);
 
-
 enum CXRefQualifierKind {
   /** \brief No ref-qualifier was provided. */
   CXRefQualifier_None = 0,
@@ -3443,7 +3508,6 @@ CINDEX_LINKAGE CXCursor clang_getOverloadedDecl(CXCursor cursor,
  * @{
  */
 
-
 /**
  * \brief For cursors representing an iboutletcollection attribute,
  *  this function returns the collection element type.
@@ -3597,7 +3661,6 @@ CINDEX_LINKAGE CXString
 CINDEX_LINKAGE CXString
   clang_constructUSR_ObjCProtocol(const char *protocol_name);
 
-
 /**
  * \brief Construct a USR for a specified Objective-C instance variable and
  *   the USR for its containing class.
@@ -3723,7 +3786,6 @@ CINDEX_LINKAGE unsigned clang_isCursorDefinition(CXCursor);
  */
 CINDEX_LINKAGE CXCursor clang_getCanonicalCursor(CXCursor);
 
-
 /**
  * \brief If the cursor points to a selector identifier in an Objective-C
  * method or message expression, this returns the selector index.
@@ -3853,6 +3915,12 @@ CINDEX_LINKAGE CXString clang_Cursor_getBriefCommentText(CXCursor C);
  */
 CINDEX_LINKAGE CXString clang_Cursor_getMangling(CXCursor);
 
+/**
+ * \brief Retrieve the CXStrings representing the mangled symbols of the C++
+ * constructor or destructor at the cursor.
+ */
+CINDEX_LINKAGE CXStringSet *clang_Cursor_getCXXManglings(CXCursor);
+
 /**
  * @}
  */
@@ -3947,6 +4015,11 @@ CXFile clang_Module_getTopLevelHeader(CXTranslationUnit,
  * @{
  */
 
+/**
+ * \brief Determine if a C++ field is declared 'mutable'.
+ */
+CINDEX_LINKAGE unsigned clang_CXXField_isMutable(CXCursor C);
+
 /**
  * \brief Determine if a C++ member function or member function template is
  * pure virtual.
@@ -4939,8 +5012,7 @@ enum CXCursorKind clang_codeCompleteGetContainerKind(
  */
 CINDEX_LINKAGE
 CXString clang_codeCompleteGetContainerUSR(CXCodeCompleteResults *Results);
-  
-  
+
 /**
  * \brief Returns the currently-entered selector for an Objective-C message
  * send, formatted like "initWithFoo:bar:". Only guaranteed to return a
@@ -4959,7 +5031,6 @@ CXString clang_codeCompleteGetObjCSelector(CXCodeCompleteResults *Results);
  * @}
  */
 
-
 /**
  * \defgroup CINDEX_MISC Miscellaneous utility functions
  *
@@ -4972,7 +5043,6 @@ CXString clang_codeCompleteGetObjCSelector(CXCodeCompleteResults *Results);
  */
 CINDEX_LINKAGE CXString clang_getClangVersion(void);
 
-  
 /**
  * \brief Enable/disable crash recovery.
  *
@@ -5658,6 +5728,18 @@ CINDEX_LINKAGE int clang_indexSourceFile(CXIndexAction,
                                          CXTranslationUnit *out_TU,
                                          unsigned TU_options);
 
+/**
+ * \brief Same as clang_indexSourceFile but requires a full command line
+ * for \c command_line_args including argv[0]. This is useful if the standard
+ * library paths are relative to the binary.
+ */
+CINDEX_LINKAGE int clang_indexSourceFileFullArgv(
+    CXIndexAction, CXClientData client_data, IndexerCallbacks *index_callbacks,
+    unsigned index_callbacks_size, unsigned index_options,
+    const char *source_filename, const char *const *command_line_args,
+    int num_command_line_args, struct CXUnsavedFile *unsaved_files,
+    unsigned num_unsaved_files, CXTranslationUnit *out_TU, unsigned TU_options);
+
 /**
  * \brief Index the given translation unit via callbacks implemented through
  * #IndexerCallbacks.
@@ -5739,7 +5821,6 @@ CINDEX_LINKAGE unsigned clang_Type_visitFields(CXType T,
                                                CXFieldVisitor visitor,
                                                CXClientData client_data);
 
-
 /**
  * @}
  */
@@ -5752,4 +5833,3 @@ CINDEX_LINKAGE unsigned clang_Type_visitFields(CXType T,
 }
 #endif
 #endif
-
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index a2bd55a089ed..b66009e909a1 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -28,6 +28,7 @@
 #include "clang/Basic/AddressSpaces.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/SanitizerBlacklist.h"
@@ -70,6 +71,7 @@ namespace clang {
   class VTableContextBase;
 
   namespace Builtin { class Context; }
+  enum BuiltinTemplateKind : int;
 
   namespace comments {
     class FullComment;
@@ -176,8 +178,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
     ClassScopeSpecializationPattern;
 
   /// \brief Mapping from materialized temporaries with static storage duration
-  /// that appear in constant initializers to their evaluated values.
-  llvm::DenseMap<const MaterializeTemporaryExpr*, APValue>
+  /// that appear in constant initializers to their evaluated values.  These are
+  /// allocated in a std::map because their address must be stable.
+  llvm::DenseMap<const MaterializeTemporaryExpr *, APValue *>
     MaterializedTemporaryValues;
 
   /// \brief Representation of a "canonical" template template parameter that
@@ -215,6 +218,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// __builtin_va_list type.
   mutable TypedefDecl *BuiltinVaListDecl;
 
+  /// The typedef for the predefined \c __builtin_ms_va_list type.
+  mutable TypedefDecl *BuiltinMSVaListDecl;
+
   /// \brief The typedef for the predefined \c id type.
   mutable TypedefDecl *ObjCIdDecl;
   
@@ -242,6 +248,9 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// The identifier 'NSCopying'.
   IdentifierInfo *NSCopyingName = nullptr;
 
+  /// The identifier '__make_integer_seq'.
+  mutable IdentifierInfo *MakeIntegerSeqName = nullptr;
+
   QualType ObjCConstantStringType;
   mutable RecordDecl *CFConstantStringTypeDecl;
   
@@ -395,6 +404,7 @@ private:
   
   TranslationUnitDecl *TUDecl;
   mutable ExternCContextDecl *ExternCContext;
+  mutable BuiltinTemplateDecl *MakeIntegerSeqDecl;
 
   /// \brief The associated SourceManager object.a
   SourceManager &SourceMgr;
@@ -433,6 +443,7 @@ private:
   friend class CXXRecordDecl;
 
   const TargetInfo *Target;
+  const TargetInfo *AuxTarget;
   clang::PrintingPolicy PrintingPolicy;
   
 public:
@@ -446,10 +457,59 @@ public:
   /// \brief Contains parents of a node.
   typedef llvm::SmallVector<ast_type_traits::DynTypedNode, 2> ParentVector;
 
-  /// \brief Maps from a node to its parents.
+  /// \brief Maps from a node to its parents. This is used for nodes that have
+  /// pointer identity only, which are more common and we can save space by
+  /// only storing a unique pointer to them.
   typedef llvm::DenseMap<const void *,
-                         llvm::PointerUnion<ast_type_traits::DynTypedNode *,
-                                            ParentVector *>> ParentMap;
+                         llvm::PointerUnion4<const Decl *, const Stmt *,
+                                             ast_type_traits::DynTypedNode *,
+                                             ParentVector *>> ParentMapPointers;
+
+  /// Parent map for nodes without pointer identity. We store a full
+  /// DynTypedNode for all keys.
+  typedef llvm::DenseMap<
+      ast_type_traits::DynTypedNode,
+      llvm::PointerUnion4<const Decl *, const Stmt *,
+                          ast_type_traits::DynTypedNode *, ParentVector *>>
+      ParentMapOtherNodes;
+
+  /// Container for either a single DynTypedNode or for an ArrayRef to
+  /// DynTypedNode. For use with ParentMap.
+  class DynTypedNodeList {
+    typedef ast_type_traits::DynTypedNode DynTypedNode;
+    llvm::AlignedCharArrayUnion<ast_type_traits::DynTypedNode,
+                                ArrayRef<DynTypedNode>> Storage;
+    bool IsSingleNode;
+
+  public:
+    DynTypedNodeList(const DynTypedNode &N) : IsSingleNode(true) {
+      new (Storage.buffer) DynTypedNode(N);
+    }
+    DynTypedNodeList(ArrayRef<DynTypedNode> A) : IsSingleNode(false) {
+      new (Storage.buffer) ArrayRef<DynTypedNode>(A);
+    }
+
+    const ast_type_traits::DynTypedNode *begin() const {
+      if (!IsSingleNode)
+        return reinterpret_cast<const ArrayRef<DynTypedNode> *>(Storage.buffer)
+            ->begin();
+      return reinterpret_cast<const DynTypedNode *>(Storage.buffer);
+    }
+
+    const ast_type_traits::DynTypedNode *end() const {
+      if (!IsSingleNode)
+        return reinterpret_cast<const ArrayRef<DynTypedNode> *>(Storage.buffer)
+            ->end();
+      return reinterpret_cast<const DynTypedNode *>(Storage.buffer) + 1;
+    }
+
+    size_t size() const { return end() - begin(); }
+    bool empty() const { return begin() == end(); }
+    const DynTypedNode &operator[](size_t N) const {
+      assert(N < size() && "Out of bounds!");
+      return *(begin() + N);
+    }
+  };
 
   /// \brief Returns the parents of the given node.
   ///
@@ -475,13 +535,11 @@ public:
   ///
   /// 'NodeT' can be one of Decl, Stmt, Type, TypeLoc,
   /// NestedNameSpecifier or NestedNameSpecifierLoc.
-  template <typename NodeT>
-  ArrayRef<ast_type_traits::DynTypedNode> getParents(const NodeT &Node) {
+  template <typename NodeT> DynTypedNodeList getParents(const NodeT &Node) {
     return getParents(ast_type_traits::DynTypedNode::create(Node));
   }
 
-  ArrayRef<ast_type_traits::DynTypedNode>
-  getParents(const ast_type_traits::DynTypedNode &Node);
+  DynTypedNodeList getParents(const ast_type_traits::DynTypedNode &Node);
 
   const clang::PrintingPolicy &getPrintingPolicy() const {
     return PrintingPolicy;
@@ -501,6 +559,9 @@ public:
   void *Allocate(size_t Size, unsigned Align = 8) const {
     return BumpAlloc.Allocate(Size, Align);
   }
+  template <typename T> T *Allocate(size_t Num = 1) const {
+    return static_cast<T *>(Allocate(Num * sizeof(T), llvm::alignOf<T>()));
+  }
   void Deallocate(void *Ptr) const { }
   
   /// Return the total amount of physical memory allocated for representing
@@ -516,7 +577,8 @@ public:
   }
 
   const TargetInfo &getTargetInfo() const { return *Target; }
-  
+  const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
+
   /// getIntTypeForBitwidth -
   /// sets integer QualTy according to specified details:
   /// bitwidth, signed/unsigned.
@@ -812,6 +874,7 @@ public:
   TranslationUnitDecl *getTranslationUnitDecl() const { return TUDecl; }
 
   ExternCContextDecl *getExternCContextDecl() const;
+  BuiltinTemplateDecl *getMakeIntegerSeqDecl() const;
 
   // Builtin Types.
   CanQualType VoidTy;
@@ -835,17 +898,21 @@ public:
   CanQualType ObjCBuiltinIdTy, ObjCBuiltinClassTy, ObjCBuiltinSelTy;
   CanQualType ObjCBuiltinBoolTy;
   CanQualType OCLImage1dTy, OCLImage1dArrayTy, OCLImage1dBufferTy;
-  CanQualType OCLImage2dTy, OCLImage2dArrayTy;
+  CanQualType OCLImage2dTy, OCLImage2dArrayTy, OCLImage2dDepthTy;
+  CanQualType OCLImage2dArrayDepthTy, OCLImage2dMSAATy, OCLImage2dArrayMSAATy;
+  CanQualType OCLImage2dMSAADepthTy, OCLImage2dArrayMSAADepthTy;
   CanQualType OCLImage3dTy;
-  CanQualType OCLSamplerTy, OCLEventTy;
+  CanQualType OCLSamplerTy, OCLEventTy, OCLClkEventTy;
+  CanQualType OCLQueueTy, OCLNDRangeTy, OCLReserveIDTy;
+  CanQualType OMPArraySectionTy;
 
   // Types for deductions in C++0x [stmt.ranged]'s desugaring. Built on demand.
   mutable QualType AutoDeductTy;     // Deduction against 'auto'.
   mutable QualType AutoRRefDeductTy; // Deduction against 'auto &&'.
 
-  // Type used to help define __builtin_va_list for some targets.
-  // The type is built when constructing 'BuiltinVaListDecl'.
-  mutable QualType VaListTagTy;
+  // Decl used to help define __builtin_va_list for some targets.
+  // The decl is built when constructing 'BuiltinVaListDecl'.
+  mutable Decl *VaListTagDecl;
 
   ASTContext(LangOptions &LOpts, SourceManager &SM, IdentifierTable &idents,
              SelectorTable &sels, Builtin::Context &builtins);
@@ -881,6 +948,9 @@ public:
   void PrintStats() const;
   const SmallVectorImpl<Type *>& getTypes() const { return Types; }
 
+  BuiltinTemplateDecl *buildBuiltinTemplateDecl(BuiltinTemplateKind BTK,
+                                                const IdentifierInfo *II) const;
+
   /// \brief Create a new implicit TU-level CXXRecordDecl or RecordDecl
   /// declaration.
   RecordDecl *buildImplicitRecord(StringRef Name,
@@ -955,6 +1025,9 @@ public:
   const FunctionType *adjustFunctionType(const FunctionType *Fn,
                                          FunctionType::ExtInfo EInfo);
 
+  /// Adjust the given function result type.
+  CanQualType getCanonicalFunctionResultType(QualType ResultType) const;
+
   /// \brief Change the result type of a function type once it is deduced.
   void adjustDeducedFunctionResultType(FunctionDecl *FD, QualType ResultType);
 
@@ -1227,7 +1300,7 @@ public:
                                  UnaryTransformType::UTTKind UKind) const;
 
   /// \brief C++11 deduced auto type.
-  QualType getAutoType(QualType DeducedType, bool IsDecltypeAuto,
+  QualType getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
                        bool IsDependent) const;
 
   /// \brief C++11 deduction pattern for 'auto' type.
@@ -1381,6 +1454,12 @@ public:
     return NSCopyingName;
   }
 
+  IdentifierInfo *getMakeIntegerSeqName() const {
+    if (!MakeIntegerSeqName)
+      MakeIntegerSeqName = &Idents.get("__make_integer_seq");
+    return MakeIntegerSeqName;
+  }
+
   /// \brief Retrieve the Objective-C "instancetype" type, if already known;
   /// otherwise, returns a NULL type;
   QualType getObjCInstanceType() {
@@ -1569,7 +1648,16 @@ public:
   /// \brief Retrieve the C type declaration corresponding to the predefined
   /// \c __va_list_tag type used to help define the \c __builtin_va_list type
   /// for some targets.
-  QualType getVaListTagType() const;
+  Decl *getVaListTagDecl() const;
+
+  /// Retrieve the C type declaration corresponding to the predefined
+  /// \c __builtin_ms_va_list type.
+  TypedefDecl *getBuiltinMSVaListDecl() const;
+
+  /// Retrieve the type of the \c __builtin_ms_va_list type.
+  QualType getBuiltinMSVaListType() const {
+    return getTypeDeclType(getBuiltinMSVaListDecl());
+  }
 
   /// \brief Return a type with additional \c const, \c volatile, or
   /// \c restrict qualifiers.
@@ -1774,7 +1862,6 @@ public:
   /// record (struct/union/class) \p D, which indicates its size and field
   /// position information.
   const ASTRecordLayout &getASTRecordLayout(const RecordDecl *D) const;
-  const ASTRecordLayout *BuildMicrosoftASTRecordLayout(const RecordDecl *D) const;
 
   /// \brief Get or compute information about the layout of the specified
   /// Objective-C interface.
@@ -2170,9 +2257,7 @@ public:
          const FunctionProtoType *FromFunctionType,
          const FunctionProtoType *ToFunctionType);
 
-  void ResetObjCLayout(const ObjCContainerDecl *CD) {
-    ObjCLayouts[CD] = nullptr;
-  }
+  void ResetObjCLayout(const ObjCContainerDecl *CD);
 
   //===--------------------------------------------------------------------===//
   //                    Integer Predicates
@@ -2187,16 +2272,6 @@ public:
   // corresponding unsigned integer type.
   QualType getCorrespondingUnsignedType(QualType T) const;
 
-  //===--------------------------------------------------------------------===//
-  //                    Type Iterators.
-  //===--------------------------------------------------------------------===//
-  typedef llvm::iterator_range<SmallVectorImpl<Type *>::const_iterator>
-    type_const_range;
-
-  type_const_range types() const {
-    return type_const_range(Types.begin(), Types.end());
-  }
-
   //===--------------------------------------------------------------------===//
   //                    Integer Values
   //===--------------------------------------------------------------------===//
@@ -2233,16 +2308,11 @@ public:
 
   /// \brief Get the duplicate declaration of a ObjCMethod in the same
   /// interface, or null if none exists.
-  const ObjCMethodDecl *getObjCMethodRedeclaration(
-                                               const ObjCMethodDecl *MD) const {
-    return ObjCMethodRedecls.lookup(MD);
-  }
+  const ObjCMethodDecl *
+  getObjCMethodRedeclaration(const ObjCMethodDecl *MD) const;
 
   void setObjCMethodRedeclaration(const ObjCMethodDecl *MD,
-                                  const ObjCMethodDecl *Redecl) {
-    assert(!getObjCMethodRedeclaration(MD) && "MD already has a redeclaration");
-    ObjCMethodRedecls[MD] = Redecl;
-  }
+                                  const ObjCMethodDecl *Redecl);
 
   /// \brief Returns the Objective-C interface that \p ND belongs to if it is
   /// an Objective-C method/property/ivar etc. that is part of an interface,
@@ -2307,6 +2377,14 @@ public:
   Expr *getDefaultArgExprForConstructor(const CXXConstructorDecl *CD,
                                         unsigned ParmIdx);
 
+  void addTypedefNameForUnnamedTagDecl(TagDecl *TD, TypedefNameDecl *TND);
+
+  TypedefNameDecl *getTypedefNameForUnnamedTagDecl(const TagDecl *TD);
+
+  void addDeclaratorForUnnamedTagDecl(TagDecl *TD, DeclaratorDecl *DD);
+
+  DeclaratorDecl *getDeclaratorForUnnamedTagDecl(const TagDecl *TD);
+
   void setManglingNumber(const NamedDecl *ND, unsigned Number);
   unsigned getManglingNumber(const NamedDecl *ND) const;
 
@@ -2388,9 +2466,10 @@ public:
   /// This routine may only be invoked once for a given ASTContext object.
   /// It is normally invoked after ASTContext construction.
   ///
-  /// \param Target The target 
-  void InitBuiltinTypes(const TargetInfo &Target);
-  
+  /// \param Target The target
+  void InitBuiltinTypes(const TargetInfo &Target,
+                        const TargetInfo *AuxTarget = nullptr);
+
 private:
   void InitBuiltinType(CanQualType &R, BuiltinType::Kind K);
 
@@ -2429,9 +2508,15 @@ private:
 
   /// \brief A set of deallocations that should be performed when the
   /// ASTContext is destroyed.
-  typedef llvm::SmallDenseMap<void(*)(void*), llvm::SmallVector<void*, 16> >
-    DeallocationMap;
-  DeallocationMap Deallocations;
+  // FIXME: We really should have a better mechanism in the ASTContext to
+  // manage running destructors for types which do variable sized allocation
+  // within the AST. In some places we thread the AST bump pointer allocator
+  // into the datastructures which avoids this mess during deallocation but is
+  // wasteful of memory, and here we require a lot of error prone book keeping
+  // in order to track and run destructors while we're tearing things down.
+  typedef llvm::SmallVector<std::pair<void (*)(void *), void *>, 16>
+      DeallocationFunctionsAndArguments;
+  DeallocationFunctionsAndArguments Deallocations;
 
   // FIXME: This currently contains the set of StoredDeclMaps used
   // by DeclContext objects.  This probably should not be in ASTContext,
@@ -2443,7 +2528,8 @@ private:
   void ReleaseDeclContextMaps();
   void ReleaseParentMapEntries();
 
-  std::unique_ptr<ParentMap> AllParents;
+  std::unique_ptr<ParentMapPointers> PointerParents;
+  std::unique_ptr<ParentMapOtherNodes> OtherParents;
 
   std::unique_ptr<VTableContextBase> VTContext;
 
diff --git a/include/clang/AST/ASTMutationListener.h b/include/clang/AST/ASTMutationListener.h
index f4026e952643..3ff392de11a7 100644
--- a/include/clang/AST/ASTMutationListener.h
+++ b/include/clang/AST/ASTMutationListener.h
@@ -92,18 +92,6 @@ public:
   virtual void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                             const ObjCInterfaceDecl *IFD) {}
 
-  /// \brief A objc class extension redeclared or introduced a property.
-  ///
-  /// \param Prop the property in the class extension
-  ///
-  /// \param OrigProp the property from the original interface that was declared
-  /// or null if the property was introduced.
-  ///
-  /// \param ClassExt the class extension.
-  virtual void AddedObjCPropertyInClassExtension(const ObjCPropertyDecl *Prop,
-                                            const ObjCPropertyDecl *OrigProp,
-                                            const ObjCCategoryDecl *ClassExt) {}
-
   /// \brief A declaration is marked used which was not previously marked used.
   ///
   /// \param D the declaration marked used
diff --git a/include/clang/AST/ASTTypeTraits.h b/include/clang/AST/ASTTypeTraits.h
index dc3c34f28d94..dcaac802c110 100644
--- a/include/clang/AST/ASTTypeTraits.h
+++ b/include/clang/AST/ASTTypeTraits.h
@@ -106,18 +106,25 @@ public:
     }
   };
 
+  /// Check if the given ASTNodeKind identifies a type that offers pointer
+  /// identity. This is useful for the fast path in DynTypedNode.
+  bool hasPointerIdentity() const {
+    return KindId > NKI_LastKindWithoutPointerIdentity;
+  }
+
 private:
   /// \brief Kind ids.
   ///
   /// Includes all possible base and derived kinds.
   enum NodeKindId {
     NKI_None,
-    NKI_CXXCtorInitializer,
     NKI_TemplateArgument,
-    NKI_NestedNameSpecifier,
     NKI_NestedNameSpecifierLoc,
     NKI_QualType,
     NKI_TypeLoc,
+    NKI_LastKindWithoutPointerIdentity = NKI_TypeLoc,
+    NKI_CXXCtorInitializer,
+    NKI_NestedNameSpecifier,
     NKI_Decl,
 #define DECL(DERIVED, BASE) NKI_##DERIVED##Decl,
 #include "clang/AST/DeclNodes.inc"
@@ -238,7 +245,11 @@ public:
   /// Note that this is not supported by all AST nodes. For AST nodes
   /// that don't have a pointer-defined identity inside the AST, this
   /// method returns NULL.
-  const void *getMemoizationData() const { return MemoizationData; }
+  const void *getMemoizationData() const {
+    return NodeKind.hasPointerIdentity()
+               ? *reinterpret_cast<void *const *>(Storage.buffer)
+               : nullptr;
+  }
 
   /// \brief Prints the node to the given output stream.
   void print(llvm::raw_ostream &OS, const PrintingPolicy &PP) const;
@@ -257,6 +268,32 @@ public:
   /// FIXME: Implement comparsion for other node types (currently
   /// only Stmt, Decl, Type and NestedNameSpecifier return memoization data).
   bool operator<(const DynTypedNode &Other) const {
+    if (!NodeKind.isSame(Other.NodeKind))
+      return NodeKind < Other.NodeKind;
+
+    if (ASTNodeKind::getFromNodeKind<QualType>().isSame(NodeKind))
+      return getUnchecked<QualType>().getAsOpaquePtr() <
+             Other.getUnchecked<QualType>().getAsOpaquePtr();
+
+    if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(NodeKind)) {
+      auto TLA = getUnchecked<TypeLoc>();
+      auto TLB = Other.getUnchecked<TypeLoc>();
+      return std::make_pair(TLA.getType().getAsOpaquePtr(),
+                            TLA.getOpaqueData()) <
+             std::make_pair(TLB.getType().getAsOpaquePtr(),
+                            TLB.getOpaqueData());
+    }
+
+    if (ASTNodeKind::getFromNodeKind<NestedNameSpecifierLoc>().isSame(
+            NodeKind)) {
+      auto NNSLA = getUnchecked<NestedNameSpecifierLoc>();
+      auto NNSLB = Other.getUnchecked<NestedNameSpecifierLoc>();
+      return std::make_pair(NNSLA.getNestedNameSpecifier(),
+                            NNSLA.getOpaqueData()) <
+             std::make_pair(NNSLB.getNestedNameSpecifier(),
+                            NNSLB.getOpaqueData());
+    }
+
     assert(getMemoizationData() && Other.getMemoizationData());
     return getMemoizationData() < Other.getMemoizationData();
   }
@@ -270,6 +307,13 @@ public:
     if (ASTNodeKind::getFromNodeKind<QualType>().isSame(NodeKind))
       return getUnchecked<QualType>() == Other.getUnchecked<QualType>();
 
+    if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(NodeKind))
+      return getUnchecked<TypeLoc>() == Other.getUnchecked<TypeLoc>();
+
+    if (ASTNodeKind::getFromNodeKind<NestedNameSpecifierLoc>().isSame(NodeKind))
+      return getUnchecked<NestedNameSpecifierLoc>() ==
+             Other.getUnchecked<NestedNameSpecifierLoc>();
+
     assert(getMemoizationData() && Other.getMemoizationData());
     return getMemoizationData() == Other.getMemoizationData();
   }
@@ -278,6 +322,47 @@ public:
   }
   /// @}
 
+  /// \brief Hooks for using DynTypedNode as a key in a DenseMap.
+  struct DenseMapInfo {
+    static inline DynTypedNode getEmptyKey() {
+      DynTypedNode Node;
+      Node.NodeKind = ASTNodeKind::DenseMapInfo::getEmptyKey();
+      return Node;
+    }
+    static inline DynTypedNode getTombstoneKey() {
+      DynTypedNode Node;
+      Node.NodeKind = ASTNodeKind::DenseMapInfo::getTombstoneKey();
+      return Node;
+    }
+    static unsigned getHashValue(const DynTypedNode &Val) {
+      // FIXME: Add hashing support for the remaining types.
+      if (ASTNodeKind::getFromNodeKind<TypeLoc>().isSame(Val.NodeKind)) {
+        auto TL = Val.getUnchecked<TypeLoc>();
+        return llvm::hash_combine(TL.getType().getAsOpaquePtr(),
+                                  TL.getOpaqueData());
+      }
+
+      if (ASTNodeKind::getFromNodeKind<NestedNameSpecifierLoc>().isSame(
+              Val.NodeKind)) {
+        auto NNSL = Val.getUnchecked<NestedNameSpecifierLoc>();
+        return llvm::hash_combine(NNSL.getNestedNameSpecifier(),
+                                  NNSL.getOpaqueData());
+      }
+
+      assert(Val.getMemoizationData());
+      return llvm::hash_value(Val.getMemoizationData());
+    }
+    static bool isEqual(const DynTypedNode &LHS, const DynTypedNode &RHS) {
+      auto Empty = ASTNodeKind::DenseMapInfo::getEmptyKey();
+      auto TombStone = ASTNodeKind::DenseMapInfo::getTombstoneKey();
+      return (ASTNodeKind::DenseMapInfo::isEqual(LHS.NodeKind, Empty) &&
+              ASTNodeKind::DenseMapInfo::isEqual(RHS.NodeKind, Empty)) ||
+             (ASTNodeKind::DenseMapInfo::isEqual(LHS.NodeKind, TombStone) &&
+              ASTNodeKind::DenseMapInfo::isEqual(RHS.NodeKind, TombStone)) ||
+             LHS == RHS;
+    }
+  };
+
 private:
   /// \brief Takes care of converting from and to \c T.
   template <typename T, typename EnablerT = void> struct BaseConverter;
@@ -286,18 +371,18 @@ private:
   template <typename T, typename BaseT> struct DynCastPtrConverter {
     static const T *get(ASTNodeKind NodeKind, const char Storage[]) {
       if (ASTNodeKind::getFromNodeKind<T>().isBaseOf(NodeKind))
-        return cast<T>(*reinterpret_cast<BaseT *const *>(Storage));
+        return &getUnchecked(NodeKind, Storage);
       return nullptr;
     }
     static const T &getUnchecked(ASTNodeKind NodeKind, const char Storage[]) {
       assert(ASTNodeKind::getFromNodeKind<T>().isBaseOf(NodeKind));
-      return *cast<T>(*reinterpret_cast<BaseT *const *>(Storage));
+      return *cast<T>(static_cast<const BaseT *>(
+          *reinterpret_cast<const void *const *>(Storage)));
     }
     static DynTypedNode create(const BaseT &Node) {
       DynTypedNode Result;
       Result.NodeKind = ASTNodeKind::getFromNode(Node);
-      Result.MemoizationData = &Node;
-      new (Result.Storage.buffer) const BaseT * (&Node);
+      new (Result.Storage.buffer) const void *(&Node);
       return Result;
     }
   };
@@ -306,18 +391,18 @@ private:
   template <typename T> struct PtrConverter {
     static const T *get(ASTNodeKind NodeKind, const char Storage[]) {
       if (ASTNodeKind::getFromNodeKind<T>().isSame(NodeKind))
-        return *reinterpret_cast<T *const *>(Storage);
+        return &getUnchecked(NodeKind, Storage);
       return nullptr;
     }
     static const T &getUnchecked(ASTNodeKind NodeKind, const char Storage[]) {
       assert(ASTNodeKind::getFromNodeKind<T>().isSame(NodeKind));
-      return **reinterpret_cast<T *const *>(Storage);
+      return *static_cast<const T *>(
+          *reinterpret_cast<const void *const *>(Storage));
     }
     static DynTypedNode create(const T &Node) {
       DynTypedNode Result;
       Result.NodeKind = ASTNodeKind::getFromNodeKind<T>();
-      Result.MemoizationData = &Node;
-      new (Result.Storage.buffer) const T * (&Node);
+      new (Result.Storage.buffer) const void *(&Node);
       return Result;
     }
   };
@@ -336,14 +421,12 @@ private:
     static DynTypedNode create(const T &Node) {
       DynTypedNode Result;
       Result.NodeKind = ASTNodeKind::getFromNodeKind<T>();
-      Result.MemoizationData = nullptr;
       new (Result.Storage.buffer) T(Node);
       return Result;
     }
   };
 
   ASTNodeKind NodeKind;
-  const void *MemoizationData;
 
   /// \brief Stores the data of the node.
   ///
@@ -353,12 +436,9 @@ private:
   /// \c QualTypes, \c NestedNameSpecifierLocs, \c TypeLocs and
   /// \c TemplateArguments on the other hand do not have storage or unique
   /// pointers and thus need to be stored by value.
-  typedef llvm::AlignedCharArrayUnion<
-      Decl *, Stmt *, Type *, NestedNameSpecifier *, CXXCtorInitializer *>
-      KindsByPointer;
-  llvm::AlignedCharArrayUnion<KindsByPointer, TemplateArgument,
-                              NestedNameSpecifierLoc, QualType, TypeLoc>
-      Storage;
+  llvm::AlignedCharArrayUnion<const void *, TemplateArgument,
+                              NestedNameSpecifierLoc, QualType,
+                              TypeLoc> Storage;
 };
 
 template <typename T>
@@ -420,6 +500,10 @@ template <>
 struct DenseMapInfo<clang::ast_type_traits::ASTNodeKind>
     : clang::ast_type_traits::ASTNodeKind::DenseMapInfo {};
 
+template <>
+struct DenseMapInfo<clang::ast_type_traits::DynTypedNode>
+    : clang::ast_type_traits::DynTypedNode::DenseMapInfo {};
+
 }  // end namespace llvm
 
 #endif
diff --git a/include/clang/AST/Attr.h b/include/clang/AST/Attr.h
index 4e282d68b748..8b80e9f6396e 100644
--- a/include/clang/AST/Attr.h
+++ b/include/clang/AST/Attr.h
@@ -56,21 +56,21 @@ protected:
   bool IsLateParsed : 1;
   bool DuplicatesAllowed : 1;
 
-  void* operator new(size_t bytes) throw() {
+  void *operator new(size_t bytes) LLVM_NOEXCEPT {
     llvm_unreachable("Attrs cannot be allocated with regular 'new'.");
   }
-  void operator delete(void* data) throw() {
+  void operator delete(void *data) LLVM_NOEXCEPT {
     llvm_unreachable("Attrs cannot be released with regular 'delete'.");
   }
 
 public:
   // Forward so that the regular new and delete do not hide global ones.
-  void* operator new(size_t Bytes, ASTContext &C,
-                     size_t Alignment = 8) throw() {
+  void *operator new(size_t Bytes, ASTContext &C,
+                     size_t Alignment = 8) LLVM_NOEXCEPT {
     return ::operator new(Bytes, C, Alignment);
   }
   void operator delete(void *Ptr, ASTContext &C,
-                       size_t Alignment) throw() {
+                       size_t Alignment) LLVM_NOEXCEPT {
     return ::operator delete(Ptr, C, Alignment);
   }
 
diff --git a/include/clang/AST/BuiltinTypes.def b/include/clang/AST/BuiltinTypes.def
index 488cacef0af3..85e237a2bf04 100644
--- a/include/clang/AST/BuiltinTypes.def
+++ b/include/clang/AST/BuiltinTypes.def
@@ -160,6 +160,12 @@ BUILTIN_TYPE(OCLImage1dArray, OCLImage1dArrayTy)
 BUILTIN_TYPE(OCLImage1dBuffer, OCLImage1dBufferTy)
 BUILTIN_TYPE(OCLImage2d, OCLImage2dTy)
 BUILTIN_TYPE(OCLImage2dArray, OCLImage2dArrayTy)
+BUILTIN_TYPE(OCLImage2dDepth, OCLImage2dDepthTy)
+BUILTIN_TYPE(OCLImage2dArrayDepth, OCLImage2dArrayDepthTy)
+BUILTIN_TYPE(OCLImage2dMSAA, OCLImage2dMSAATy)
+BUILTIN_TYPE(OCLImage2dArrayMSAA, OCLImage2dArrayMSAATy)
+BUILTIN_TYPE(OCLImage2dMSAADepth, OCLImage2dMSAADepthTy)
+BUILTIN_TYPE(OCLImage2dArrayMSAADepth, OCLImage2dArrayMSAADepthTy)
 BUILTIN_TYPE(OCLImage3d, OCLImage3dTy)
 
 // OpenCL sampler_t.
@@ -168,6 +174,18 @@ BUILTIN_TYPE(OCLSampler, OCLSamplerTy)
 // OpenCL event_t.
 BUILTIN_TYPE(OCLEvent, OCLEventTy)
 
+// OpenCL clk_event_t.
+BUILTIN_TYPE(OCLClkEvent, OCLClkEventTy)
+
+// OpenCL queue_t.
+BUILTIN_TYPE(OCLQueue, OCLQueueTy)
+
+// OpenCL ndrange_t.
+BUILTIN_TYPE(OCLNDRange, OCLNDRangeTy)
+
+// OpenCL reserve_id_t.
+BUILTIN_TYPE(OCLReserveID, OCLReserveIDTy)
+
 // This represents the type of an expression whose type is
 // totally unknown, e.g. 'T::foo'.  It is permitted for this to
 // appear in situations where the structure of the type is
@@ -227,8 +245,11 @@ PLACEHOLDER_TYPE(BuiltinFn, BuiltinFnTy)
 // context.
 PLACEHOLDER_TYPE(ARCUnbridgedCast, ARCUnbridgedCastTy)
 
+// A placeholder type for OpenMP array sections.
+PLACEHOLDER_TYPE(OMPArraySection, OMPArraySectionTy)
+
 #ifdef LAST_BUILTIN_TYPE
-LAST_BUILTIN_TYPE(ARCUnbridgedCast)
+LAST_BUILTIN_TYPE(OMPArraySection)
 #undef LAST_BUILTIN_TYPE
 #endif
 
diff --git a/include/clang/AST/CXXInheritance.h b/include/clang/AST/CXXInheritance.h
index f7612f21f216..8587260049a0 100644
--- a/include/clang/AST/CXXInheritance.h
+++ b/include/clang/AST/CXXInheritance.h
@@ -155,17 +155,16 @@ class CXXBasePaths {
   /// \brief Array of the declarations that have been found. This
   /// array is constructed only if needed, e.g., to iterate over the
   /// results within LookupResult.
-  NamedDecl **DeclsFound;
+  std::unique_ptr<NamedDecl *[]> DeclsFound;
   unsigned NumDeclsFound;
   
   friend class CXXRecordDecl;
   
   void ComputeDeclsFound();
 
-  bool lookupInBases(ASTContext &Context, 
-                     const CXXRecordDecl *Record,
-                     CXXRecordDecl::BaseMatchesCallback *BaseMatches, 
-                     void *UserData);
+  bool lookupInBases(ASTContext &Context, const CXXRecordDecl *Record,
+                     CXXRecordDecl::BaseMatchesCallback BaseMatches);
+
 public:
   typedef std::list<CXXBasePath>::iterator paths_iterator;
   typedef std::list<CXXBasePath>::const_iterator const_paths_iterator;
@@ -173,15 +172,12 @@ public:
   
   /// BasePaths - Construct a new BasePaths structure to record the
   /// paths for a derived-to-base search.
-  explicit CXXBasePaths(bool FindAmbiguities = true,
-                        bool RecordPaths = true,
+  explicit CXXBasePaths(bool FindAmbiguities = true, bool RecordPaths = true,
                         bool DetectVirtual = true)
-    : FindAmbiguities(FindAmbiguities), RecordPaths(RecordPaths),
-      DetectVirtual(DetectVirtual), DetectedVirtual(nullptr),
-      DeclsFound(nullptr), NumDeclsFound(0) { }
-  
-  ~CXXBasePaths() { delete [] DeclsFound; }
-  
+      : FindAmbiguities(FindAmbiguities), RecordPaths(RecordPaths),
+        DetectVirtual(DetectVirtual), DetectedVirtual(nullptr),
+        NumDeclsFound(0) {}
+
   paths_iterator begin() { return Paths.begin(); }
   paths_iterator end()   { return Paths.end(); }
   const_paths_iterator begin() const { return Paths.begin(); }
diff --git a/include/clang/AST/CharUnits.h b/include/clang/AST/CharUnits.h
index 72ca9f5cd67d..1d22bccd2e89 100644
--- a/include/clang/AST/CharUnits.h
+++ b/include/clang/AST/CharUnits.h
@@ -130,6 +130,14 @@ namespace clang {
         return (Quantity & -Quantity) == Quantity;
       }
 
+      /// Test whether this is a multiple of the other value.
+      ///
+      /// Among other things, this promises that
+      /// self.RoundUpToAlignment(N) will just return self.
+      bool isMultipleOf(CharUnits N) const {
+        return (*this % N) == 0;
+      }
+
       // Arithmetic operators.
       CharUnits operator* (QuantityType N) const {
         return CharUnits(Quantity * N);
@@ -172,10 +180,20 @@ namespace clang {
 
       /// Given that this is a non-zero alignment value, what is the
       /// alignment at the given offset?
-      CharUnits alignmentAtOffset(CharUnits offset) {
+      CharUnits alignmentAtOffset(CharUnits offset) const {
+        assert(Quantity != 0 && "offsetting from unknown alignment?");
         return CharUnits(llvm::MinAlign(Quantity, offset.Quantity));
       }
 
+      /// Given that this is the alignment of the first element of an
+      /// array, return the minimum alignment of any element in the array.
+      CharUnits alignmentOfArrayElement(CharUnits elementSize) const {
+        // Since we don't track offsetted alignments, the alignment of
+        // the second element (or any odd element) will be minimally
+        // aligned.
+        return alignmentAtOffset(elementSize);
+      }
+
 
   }; // class CharUnit
 } // namespace clang
diff --git a/include/clang/AST/CommentSema.h b/include/clang/AST/CommentSema.h
index 9b05d397baf8..6a803836e848 100644
--- a/include/clang/AST/CommentSema.h
+++ b/include/clang/AST/CommentSema.h
@@ -79,12 +79,8 @@ public:
   /// Returns a copy of array, owned by Sema's allocator.
   template<typename T>
   ArrayRef<T> copyArray(ArrayRef<T> Source) {
-    size_t Size = Source.size();
-    if (Size != 0) {
-      T *Mem = Allocator.Allocate<T>(Size);
-      std::uninitialized_copy(Source.begin(), Source.end(), Mem);
-      return llvm::makeArrayRef(Mem, Size);
-    }
+    if (!Source.empty())
+      return Source.copy(Allocator);
     return None;
   }
 
diff --git a/include/clang/AST/DataRecursiveASTVisitor.h b/include/clang/AST/DataRecursiveASTVisitor.h
deleted file mode 100644
index dd167fe27c02..000000000000
--- a/include/clang/AST/DataRecursiveASTVisitor.h
+++ /dev/null
@@ -1,2691 +0,0 @@
-//===--- DataRecursiveASTVisitor.h - Data-Recursive AST Visitor -*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines the DataRecursiveASTVisitor interface, which recursively
-//  traverses the entire AST, using data recursion for Stmts/Exprs.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_CLANG_AST_DATARECURSIVEASTVISITOR_H
-#define LLVM_CLANG_AST_DATARECURSIVEASTVISITOR_H
-
-#include "clang/AST/Attr.h"
-#include "clang/AST/Decl.h"
-#include "clang/AST/DeclCXX.h"
-#include "clang/AST/DeclFriend.h"
-#include "clang/AST/DeclObjC.h"
-#include "clang/AST/DeclOpenMP.h"
-#include "clang/AST/DeclTemplate.h"
-#include "clang/AST/Expr.h"
-#include "clang/AST/ExprCXX.h"
-#include "clang/AST/ExprObjC.h"
-#include "clang/AST/NestedNameSpecifier.h"
-#include "clang/AST/Stmt.h"
-#include "clang/AST/StmtCXX.h"
-#include "clang/AST/StmtObjC.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/TemplateBase.h"
-#include "clang/AST/TemplateName.h"
-#include "clang/AST/Type.h"
-#include "clang/AST/TypeLoc.h"
-
-// The following three macros are used for meta programming.  The code
-// using them is responsible for defining macro OPERATOR().
-
-// All unary operators.
-#define UNARYOP_LIST()                                                         \
-  OPERATOR(PostInc) OPERATOR(PostDec) OPERATOR(PreInc) OPERATOR(PreDec)        \
-      OPERATOR(AddrOf) OPERATOR(Deref) OPERATOR(Plus) OPERATOR(Minus)          \
-      OPERATOR(Not) OPERATOR(LNot) OPERATOR(Real) OPERATOR(Imag)               \
-      OPERATOR(Extension)
-
-// All binary operators (excluding compound assign operators).
-#define BINOP_LIST()                                                           \
-  OPERATOR(PtrMemD) OPERATOR(PtrMemI) OPERATOR(Mul) OPERATOR(Div)              \
-      OPERATOR(Rem) OPERATOR(Add) OPERATOR(Sub) OPERATOR(Shl) OPERATOR(Shr)    \
-      OPERATOR(LT) OPERATOR(GT) OPERATOR(LE) OPERATOR(GE) OPERATOR(EQ)         \
-      OPERATOR(NE) OPERATOR(And) OPERATOR(Xor) OPERATOR(Or) OPERATOR(LAnd)     \
-      OPERATOR(LOr) OPERATOR(Assign) OPERATOR(Comma)
-
-// All compound assign operators.
-#define CAO_LIST()                                                             \
-  OPERATOR(Mul) OPERATOR(Div) OPERATOR(Rem) OPERATOR(Add) OPERATOR(Sub)        \
-      OPERATOR(Shl) OPERATOR(Shr) OPERATOR(And) OPERATOR(Or) OPERATOR(Xor)
-
-namespace clang {
-
-// Reduce the diff between RecursiveASTVisitor / DataRecursiveASTVisitor to
-// make it easier to track changes and keep the two in sync.
-#define RecursiveASTVisitor DataRecursiveASTVisitor
-
-// A helper macro to implement short-circuiting when recursing.  It
-// invokes CALL_EXPR, which must be a method call, on the derived
-// object (s.t. a user of RecursiveASTVisitor can override the method
-// in CALL_EXPR).
-#define TRY_TO(CALL_EXPR)                                                      \
-  do {                                                                         \
-    if (!getDerived().CALL_EXPR)                                               \
-      return false;                                                            \
-  } while (0)
-
-/// \brief A class that does preorder depth-first traversal on the
-/// entire Clang AST and visits each node.
-///
-/// This class performs three distinct tasks:
-///   1. traverse the AST (i.e. go to each node);
-///   2. at a given node, walk up the class hierarchy, starting from
-///      the node's dynamic type, until the top-most class (e.g. Stmt,
-///      Decl, or Type) is reached.
-///   3. given a (node, class) combination, where 'class' is some base
-///      class of the dynamic type of 'node', call a user-overridable
-///      function to actually visit the node.
-///
-/// These tasks are done by three groups of methods, respectively:
-///   1. TraverseDecl(Decl *x) does task #1.  It is the entry point
-///      for traversing an AST rooted at x.  This method simply
-///      dispatches (i.e. forwards) to TraverseFoo(Foo *x) where Foo
-///      is the dynamic type of *x, which calls WalkUpFromFoo(x) and
-///      then recursively visits the child nodes of x.
-///      TraverseStmt(Stmt *x) and TraverseType(QualType x) work
-///      similarly.
-///   2. WalkUpFromFoo(Foo *x) does task #2.  It does not try to visit
-///      any child node of x.  Instead, it first calls WalkUpFromBar(x)
-///      where Bar is the direct parent class of Foo (unless Foo has
-///      no parent), and then calls VisitFoo(x) (see the next list item).
-///   3. VisitFoo(Foo *x) does task #3.
-///
-/// These three method groups are tiered (Traverse* > WalkUpFrom* >
-/// Visit*).  A method (e.g. Traverse*) may call methods from the same
-/// tier (e.g. other Traverse*) or one tier lower (e.g. WalkUpFrom*).
-/// It may not call methods from a higher tier.
-///
-/// Note that since WalkUpFromFoo() calls WalkUpFromBar() (where Bar
-/// is Foo's super class) before calling VisitFoo(), the result is
-/// that the Visit*() methods for a given node are called in the
-/// top-down order (e.g. for a node of type NamespaceDecl, the order will
-/// be VisitDecl(), VisitNamedDecl(), and then VisitNamespaceDecl()).
-///
-/// This scheme guarantees that all Visit*() calls for the same AST
-/// node are grouped together.  In other words, Visit*() methods for
-/// different nodes are never interleaved.
-///
-/// Stmts are traversed internally using a data queue to avoid a stack overflow
-/// with hugely nested ASTs.
-///
-/// Clients of this visitor should subclass the visitor (providing
-/// themselves as the template argument, using the curiously recurring
-/// template pattern) and override any of the Traverse*, WalkUpFrom*,
-/// and Visit* methods for declarations, types, statements,
-/// expressions, or other AST nodes where the visitor should customize
-/// behavior.  Most users only need to override Visit*.  Advanced
-/// users may override Traverse* and WalkUpFrom* to implement custom
-/// traversal strategies.  Returning false from one of these overridden
-/// functions will abort the entire traversal.
-///
-/// By default, this visitor tries to visit every part of the explicit
-/// source code exactly once.  The default policy towards templates
-/// is to descend into the 'pattern' class or function body, not any
-/// explicit or implicit instantiations.  Explicit specializations
-/// are still visited, and the patterns of partial specializations
-/// are visited separately.  This behavior can be changed by
-/// overriding shouldVisitTemplateInstantiations() in the derived class
-/// to return true, in which case all known implicit and explicit
-/// instantiations will be visited at the same time as the pattern
-/// from which they were produced.
-template <typename Derived> class RecursiveASTVisitor {
-public:
-  /// \brief Return a reference to the derived class.
-  Derived &getDerived() { return *static_cast<Derived *>(this); }
-
-  /// \brief Return whether this visitor should recurse into
-  /// template instantiations.
-  bool shouldVisitTemplateInstantiations() const { return false; }
-
-  /// \brief Return whether this visitor should recurse into the types of
-  /// TypeLocs.
-  bool shouldWalkTypesOfTypeLocs() const { return true; }
-
-  /// \brief Recursively visit a statement or expression, by
-  /// dispatching to Traverse*() based on the argument's dynamic type.
-  ///
-  /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is NULL).
-  bool TraverseStmt(Stmt *S);
-
-  /// \brief Recursively visit a type, by dispatching to
-  /// Traverse*Type() based on the argument's getTypeClass() property.
-  ///
-  /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is a Null type).
-  bool TraverseType(QualType T);
-
-  /// \brief Recursively visit a type with location, by dispatching to
-  /// Traverse*TypeLoc() based on the argument type's getTypeClass() property.
-  ///
-  /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is a Null type location).
-  bool TraverseTypeLoc(TypeLoc TL);
-
-  /// \brief Recursively visit an attribute, by dispatching to
-  /// Traverse*Attr() based on the argument's dynamic type.
-  ///
-  /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is a Null type location).
-  bool TraverseAttr(Attr *At);
-
-  /// \brief Recursively visit a declaration, by dispatching to
-  /// Traverse*Decl() based on the argument's dynamic type.
-  ///
-  /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is NULL).
-  bool TraverseDecl(Decl *D);
-
-  /// \brief Recursively visit a C++ nested-name-specifier.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseNestedNameSpecifier(NestedNameSpecifier *NNS);
-
-  /// \brief Recursively visit a C++ nested-name-specifier with location
-  /// information.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNS);
-
-  /// \brief Recursively visit a name with its location information.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseDeclarationNameInfo(DeclarationNameInfo NameInfo);
-
-  /// \brief Recursively visit a template name and dispatch to the
-  /// appropriate method.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseTemplateName(TemplateName Template);
-
-  /// \brief Recursively visit a template argument and dispatch to the
-  /// appropriate method for the argument type.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  // FIXME: migrate callers to TemplateArgumentLoc instead.
-  bool TraverseTemplateArgument(const TemplateArgument &Arg);
-
-  /// \brief Recursively visit a template argument location and dispatch to the
-  /// appropriate method for the argument type.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &ArgLoc);
-
-  /// \brief Recursively visit a set of template arguments.
-  /// This can be overridden by a subclass, but it's not expected that
-  /// will be needed -- this visitor always dispatches to another.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  // FIXME: take a TemplateArgumentLoc* (or TemplateArgumentListInfo) instead.
-  bool TraverseTemplateArguments(const TemplateArgument *Args,
-                                 unsigned NumArgs);
-
-  /// \brief Recursively visit a constructor initializer.  This
-  /// automatically dispatches to another visitor for the initializer
-  /// expression, but not for the name of the initializer, so may
-  /// be overridden for clients that need access to the name.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseConstructorInitializer(CXXCtorInitializer *Init);
-
-  /// \brief Recursively visit a lambda capture.
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseLambdaCapture(LambdaExpr *LE, const LambdaCapture *C);
-
-  /// \brief Recursively visit the body of a lambda expression.
-  ///
-  /// This provides a hook for visitors that need more context when visiting
-  /// \c LE->getBody().
-  ///
-  /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseLambdaBody(LambdaExpr *LE);
-
-  // ---- Methods on Attrs ----
-
-  // \brief Visit an attribute.
-  bool VisitAttr(Attr *A) { return true; }
-
-// Declare Traverse* and empty Visit* for all Attr classes.
-#define ATTR_VISITOR_DECLS_ONLY
-#include "clang/AST/AttrVisitor.inc"
-#undef ATTR_VISITOR_DECLS_ONLY
-
-// ---- Methods on Stmts ----
-
-// Declare Traverse*() for all concrete Stmt classes.
-#define ABSTRACT_STMT(STMT)
-#define STMT(CLASS, PARENT) bool Traverse##CLASS(CLASS *S);
-#include "clang/AST/StmtNodes.inc"
-  // The above header #undefs ABSTRACT_STMT and STMT upon exit.
-
-  // Define WalkUpFrom*() and empty Visit*() for all Stmt classes.
-  bool WalkUpFromStmt(Stmt *S) { return getDerived().VisitStmt(S); }
-  bool VisitStmt(Stmt *S) { return true; }
-#define STMT(CLASS, PARENT)                                                    \
-  bool WalkUpFrom##CLASS(CLASS *S) {                                           \
-    TRY_TO(WalkUpFrom##PARENT(S));                                             \
-    TRY_TO(Visit##CLASS(S));                                                   \
-    return true;                                                               \
-  }                                                                            \
-  bool Visit##CLASS(CLASS *S) { return true; }
-#include "clang/AST/StmtNodes.inc"
-
-// Define Traverse*(), WalkUpFrom*(), and Visit*() for unary
-// operator methods.  Unary operators are not classes in themselves
-// (they're all opcodes in UnaryOperator) but do have visitors.
-#define OPERATOR(NAME)                                                         \
-  bool TraverseUnary##NAME(UnaryOperator *S) {                                 \
-    TRY_TO(WalkUpFromUnary##NAME(S));                                          \
-    StmtQueueAction StmtQueue(*this);                                          \
-    StmtQueue.queue(S->getSubExpr());                                          \
-    return true;                                                               \
-  }                                                                            \
-  bool WalkUpFromUnary##NAME(UnaryOperator *S) {                               \
-    TRY_TO(WalkUpFromUnaryOperator(S));                                        \
-    TRY_TO(VisitUnary##NAME(S));                                               \
-    return true;                                                               \
-  }                                                                            \
-  bool VisitUnary##NAME(UnaryOperator *S) { return true; }
-
-  UNARYOP_LIST()
-#undef OPERATOR
-
-// Define Traverse*(), WalkUpFrom*(), and Visit*() for binary
-// operator methods.  Binary operators are not classes in themselves
-// (they're all opcodes in BinaryOperator) but do have visitors.
-#define GENERAL_BINOP_FALLBACK(NAME, BINOP_TYPE)                               \
-  bool TraverseBin##NAME(BINOP_TYPE *S) {                                      \
-    TRY_TO(WalkUpFromBin##NAME(S));                                            \
-    StmtQueueAction StmtQueue(*this);                                          \
-    StmtQueue.queue(S->getLHS());                                              \
-    StmtQueue.queue(S->getRHS());                                              \
-    return true;                                                               \
-  }                                                                            \
-  bool WalkUpFromBin##NAME(BINOP_TYPE *S) {                                    \
-    TRY_TO(WalkUpFrom##BINOP_TYPE(S));                                         \
-    TRY_TO(VisitBin##NAME(S));                                                 \
-    return true;                                                               \
-  }                                                                            \
-  bool VisitBin##NAME(BINOP_TYPE *S) { return true; }
-
-#define OPERATOR(NAME) GENERAL_BINOP_FALLBACK(NAME, BinaryOperator)
-  BINOP_LIST()
-#undef OPERATOR
-
-// Define Traverse*(), WalkUpFrom*(), and Visit*() for compound
-// assignment methods.  Compound assignment operators are not
-// classes in themselves (they're all opcodes in
-// CompoundAssignOperator) but do have visitors.
-#define OPERATOR(NAME)                                                         \
-  GENERAL_BINOP_FALLBACK(NAME##Assign, CompoundAssignOperator)
-
-  CAO_LIST()
-#undef OPERATOR
-#undef GENERAL_BINOP_FALLBACK
-
-// ---- Methods on Types ----
-// FIXME: revamp to take TypeLoc's rather than Types.
-
-// Declare Traverse*() for all concrete Type classes.
-#define ABSTRACT_TYPE(CLASS, BASE)
-#define TYPE(CLASS, BASE) bool Traverse##CLASS##Type(CLASS##Type *T);
-#include "clang/AST/TypeNodes.def"
-  // The above header #undefs ABSTRACT_TYPE and TYPE upon exit.
-
-  // Define WalkUpFrom*() and empty Visit*() for all Type classes.
-  bool WalkUpFromType(Type *T) { return getDerived().VisitType(T); }
-  bool VisitType(Type *T) { return true; }
-#define TYPE(CLASS, BASE)                                                      \
-  bool WalkUpFrom##CLASS##Type(CLASS##Type *T) {                               \
-    TRY_TO(WalkUpFrom##BASE(T));                                               \
-    TRY_TO(Visit##CLASS##Type(T));                                             \
-    return true;                                                               \
-  }                                                                            \
-  bool Visit##CLASS##Type(CLASS##Type *T) { return true; }
-#include "clang/AST/TypeNodes.def"
-
-// ---- Methods on TypeLocs ----
-// FIXME: this currently just calls the matching Type methods
-
-// Declare Traverse*() for all concrete TypeLoc classes.
-#define ABSTRACT_TYPELOC(CLASS, BASE)
-#define TYPELOC(CLASS, BASE) bool Traverse##CLASS##TypeLoc(CLASS##TypeLoc TL);
-#include "clang/AST/TypeLocNodes.def"
-  // The above header #undefs ABSTRACT_TYPELOC and TYPELOC upon exit.
-
-  // Define WalkUpFrom*() and empty Visit*() for all TypeLoc classes.
-  bool WalkUpFromTypeLoc(TypeLoc TL) { return getDerived().VisitTypeLoc(TL); }
-  bool VisitTypeLoc(TypeLoc TL) { return true; }
-
-  // QualifiedTypeLoc and UnqualTypeLoc are not declared in
-  // TypeNodes.def and thus need to be handled specially.
-  bool WalkUpFromQualifiedTypeLoc(QualifiedTypeLoc TL) {
-    return getDerived().VisitUnqualTypeLoc(TL.getUnqualifiedLoc());
-  }
-  bool VisitQualifiedTypeLoc(QualifiedTypeLoc TL) { return true; }
-  bool WalkUpFromUnqualTypeLoc(UnqualTypeLoc TL) {
-    return getDerived().VisitUnqualTypeLoc(TL.getUnqualifiedLoc());
-  }
-  bool VisitUnqualTypeLoc(UnqualTypeLoc TL) { return true; }
-
-// Note that BASE includes trailing 'Type' which CLASS doesn't.
-#define TYPE(CLASS, BASE)                                                      \
-  bool WalkUpFrom##CLASS##TypeLoc(CLASS##TypeLoc TL) {                         \
-    TRY_TO(WalkUpFrom##BASE##Loc(TL));                                         \
-    TRY_TO(Visit##CLASS##TypeLoc(TL));                                         \
-    return true;                                                               \
-  }                                                                            \
-  bool Visit##CLASS##TypeLoc(CLASS##TypeLoc TL) { return true; }
-#include "clang/AST/TypeNodes.def"
-
-// ---- Methods on Decls ----
-
-// Declare Traverse*() for all concrete Decl classes.
-#define ABSTRACT_DECL(DECL)
-#define DECL(CLASS, BASE) bool Traverse##CLASS##Decl(CLASS##Decl *D);
-#include "clang/AST/DeclNodes.inc"
-  // The above header #undefs ABSTRACT_DECL and DECL upon exit.
-
-  // Define WalkUpFrom*() and empty Visit*() for all Decl classes.
-  bool WalkUpFromDecl(Decl *D) { return getDerived().VisitDecl(D); }
-  bool VisitDecl(Decl *D) { return true; }
-#define DECL(CLASS, BASE)                                                      \
-  bool WalkUpFrom##CLASS##Decl(CLASS##Decl *D) {                               \
-    TRY_TO(WalkUpFrom##BASE(D));                                               \
-    TRY_TO(Visit##CLASS##Decl(D));                                             \
-    return true;                                                               \
-  }                                                                            \
-  bool Visit##CLASS##Decl(CLASS##Decl *D) { return true; }
-#include "clang/AST/DeclNodes.inc"
-
-private:
-  // These are helper methods used by more than one Traverse* method.
-  bool TraverseTemplateParameterListHelper(TemplateParameterList *TPL);
-  bool TraverseClassInstantiations(ClassTemplateDecl *D);
-  bool TraverseVariableInstantiations(VarTemplateDecl *D);
-  bool TraverseFunctionInstantiations(FunctionTemplateDecl *D);
-  bool TraverseTemplateArgumentLocsHelper(const TemplateArgumentLoc *TAL,
-                                          unsigned Count);
-  bool TraverseArrayTypeLocHelper(ArrayTypeLoc TL);
-  bool TraverseRecordHelper(RecordDecl *D);
-  bool TraverseCXXRecordHelper(CXXRecordDecl *D);
-  bool TraverseDeclaratorHelper(DeclaratorDecl *D);
-  bool TraverseDeclContextHelper(DeclContext *DC);
-  bool TraverseFunctionHelper(FunctionDecl *D);
-  bool TraverseVarHelper(VarDecl *D);
-  bool TraverseOMPExecutableDirective(OMPExecutableDirective *S);
-  bool TraverseOMPLoopDirective(OMPLoopDirective *S);
-  bool TraverseOMPClause(OMPClause *C);
-#define OPENMP_CLAUSE(Name, Class) bool Visit##Class(Class *C);
-#include "clang/Basic/OpenMPKinds.def"
-  /// \brief Process clauses with list of variables.
-  template <typename T> bool VisitOMPClauseList(T *Node);
-
-  typedef SmallVector<Stmt *, 16> StmtsTy;
-  typedef SmallVector<StmtsTy *, 4> QueuesTy;
-
-  QueuesTy Queues;
-
-  class NewQueueRAII {
-    RecursiveASTVisitor &RAV;
-
-  public:
-    NewQueueRAII(StmtsTy &queue, RecursiveASTVisitor &RAV) : RAV(RAV) {
-      RAV.Queues.push_back(&queue);
-    }
-    ~NewQueueRAII() { RAV.Queues.pop_back(); }
-  };
-
-  StmtsTy &getCurrentQueue() {
-    assert(!Queues.empty() && "base TraverseStmt was never called?");
-    return *Queues.back();
-  }
-
-public:
-  class StmtQueueAction {
-    StmtsTy &CurrQueue;
-
-  public:
-    explicit StmtQueueAction(RecursiveASTVisitor &RAV)
-        : CurrQueue(RAV.getCurrentQueue()) {}
-
-    void queue(Stmt *S) { CurrQueue.push_back(S); }
-  };
-};
-
-#define DISPATCH(NAME, CLASS, VAR)                                             \
-  return getDerived().Traverse##NAME(static_cast<CLASS *>(VAR))
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseStmt(Stmt *S) {
-  if (!S)
-    return true;
-
-  StmtsTy Queue, StmtsToEnqueue;
-  Queue.push_back(S);
-  NewQueueRAII NQ(StmtsToEnqueue, *this);
-
-  while (!Queue.empty()) {
-    S = Queue.pop_back_val();
-    if (!S)
-      continue;
-
-    StmtsToEnqueue.clear();
-
-#define DISPATCH_STMT(NAME, CLASS, VAR)                                        \
-  TRY_TO(Traverse##NAME(static_cast<CLASS *>(VAR)));                           \
-  break
-
-    // If we have a binary expr, dispatch to the subcode of the binop.  A smart
-    // optimizer (e.g. LLVM) will fold this comparison into the switch stmt
-    // below.
-    if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(S)) {
-      switch (BinOp->getOpcode()) {
-#define OPERATOR(NAME)                                                         \
-  case BO_##NAME:                                                              \
-    DISPATCH_STMT(Bin##NAME, BinaryOperator, S);
-
-        BINOP_LIST()
-#undef OPERATOR
-#undef BINOP_LIST
-
-#define OPERATOR(NAME)                                                         \
-  case BO_##NAME##Assign:                                                      \
-    DISPATCH_STMT(Bin##NAME##Assign, CompoundAssignOperator, S);
-
-        CAO_LIST()
-#undef OPERATOR
-#undef CAO_LIST
-      }
-    } else if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(S)) {
-      switch (UnOp->getOpcode()) {
-#define OPERATOR(NAME)                                                         \
-  case UO_##NAME:                                                              \
-    DISPATCH_STMT(Unary##NAME, UnaryOperator, S);
-
-        UNARYOP_LIST()
-#undef OPERATOR
-#undef UNARYOP_LIST
-      }
-    } else {
-
-      // Top switch stmt: dispatch to TraverseFooStmt for each concrete FooStmt.
-      switch (S->getStmtClass()) {
-      case Stmt::NoStmtClass:
-        break;
-#define ABSTRACT_STMT(STMT)
-#define STMT(CLASS, PARENT)                                                    \
-  case Stmt::CLASS##Class:                                                     \
-    DISPATCH_STMT(CLASS, CLASS, S);
-#include "clang/AST/StmtNodes.inc"
-      }
-    }
-
-    Queue.append(StmtsToEnqueue.rbegin(), StmtsToEnqueue.rend());
-  }
-
-  return true;
-}
-
-#undef DISPATCH_STMT
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseType(QualType T) {
-  if (T.isNull())
-    return true;
-
-  switch (T->getTypeClass()) {
-#define ABSTRACT_TYPE(CLASS, BASE)
-#define TYPE(CLASS, BASE)                                                      \
-  case Type::CLASS:                                                            \
-    DISPATCH(CLASS##Type, CLASS##Type, const_cast<Type *>(T.getTypePtr()));
-#include "clang/AST/TypeNodes.def"
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTypeLoc(TypeLoc TL) {
-  if (TL.isNull())
-    return true;
-
-  switch (TL.getTypeLocClass()) {
-#define ABSTRACT_TYPELOC(CLASS, BASE)
-#define TYPELOC(CLASS, BASE)                                                   \
-  case TypeLoc::CLASS:                                                         \
-    return getDerived().Traverse##CLASS##TypeLoc(TL.castAs<CLASS##TypeLoc>());
-#include "clang/AST/TypeLocNodes.def"
-  }
-
-  return true;
-}
-
-// Define the Traverse*Attr(Attr* A) methods
-#define VISITORCLASS RecursiveASTVisitor
-#include "clang/AST/AttrVisitor.inc"
-#undef VISITORCLASS
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseDecl(Decl *D) {
-  if (!D)
-    return true;
-
-  // As a syntax visitor, we want to ignore declarations for
-  // implicitly-defined declarations (ones not typed explicitly by the
-  // user).
-  if (D->isImplicit())
-    return true;
-
-  switch (D->getKind()) {
-#define ABSTRACT_DECL(DECL)
-#define DECL(CLASS, BASE)                                                      \
-  case Decl::CLASS:                                                            \
-    if (!getDerived().Traverse##CLASS##Decl(static_cast<CLASS##Decl *>(D)))    \
-      return false;                                                            \
-    break;
-#include "clang/AST/DeclNodes.inc"
-  }
-
-  // Visit any attributes attached to this declaration.
-  for (auto *I : D->attrs()) {
-    if (!getDerived().TraverseAttr(I))
-      return false;
-  }
-  return true;
-}
-
-#undef DISPATCH
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseNestedNameSpecifier(
-    NestedNameSpecifier *NNS) {
-  if (!NNS)
-    return true;
-
-  if (NNS->getPrefix())
-    TRY_TO(TraverseNestedNameSpecifier(NNS->getPrefix()));
-
-  switch (NNS->getKind()) {
-  case NestedNameSpecifier::Identifier:
-  case NestedNameSpecifier::Namespace:
-  case NestedNameSpecifier::NamespaceAlias:
-  case NestedNameSpecifier::Global:
-  case NestedNameSpecifier::Super:
-    return true;
-
-  case NestedNameSpecifier::TypeSpec:
-  case NestedNameSpecifier::TypeSpecWithTemplate:
-    TRY_TO(TraverseType(QualType(NNS->getAsType(), 0)));
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseNestedNameSpecifierLoc(
-    NestedNameSpecifierLoc NNS) {
-  if (!NNS)
-    return true;
-
-  if (NestedNameSpecifierLoc Prefix = NNS.getPrefix())
-    TRY_TO(TraverseNestedNameSpecifierLoc(Prefix));
-
-  switch (NNS.getNestedNameSpecifier()->getKind()) {
-  case NestedNameSpecifier::Identifier:
-  case NestedNameSpecifier::Namespace:
-  case NestedNameSpecifier::NamespaceAlias:
-  case NestedNameSpecifier::Global:
-  case NestedNameSpecifier::Super:
-    return true;
-
-  case NestedNameSpecifier::TypeSpec:
-  case NestedNameSpecifier::TypeSpecWithTemplate:
-    TRY_TO(TraverseTypeLoc(NNS.getTypeLoc()));
-    break;
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseDeclarationNameInfo(
-    DeclarationNameInfo NameInfo) {
-  switch (NameInfo.getName().getNameKind()) {
-  case DeclarationName::CXXConstructorName:
-  case DeclarationName::CXXDestructorName:
-  case DeclarationName::CXXConversionFunctionName:
-    if (TypeSourceInfo *TSInfo = NameInfo.getNamedTypeInfo())
-      TRY_TO(TraverseTypeLoc(TSInfo->getTypeLoc()));
-
-    break;
-
-  case DeclarationName::Identifier:
-  case DeclarationName::ObjCZeroArgSelector:
-  case DeclarationName::ObjCOneArgSelector:
-  case DeclarationName::ObjCMultiArgSelector:
-  case DeclarationName::CXXOperatorName:
-  case DeclarationName::CXXLiteralOperatorName:
-  case DeclarationName::CXXUsingDirective:
-    break;
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateName(TemplateName Template) {
-  if (DependentTemplateName *DTN = Template.getAsDependentTemplateName())
-    TRY_TO(TraverseNestedNameSpecifier(DTN->getQualifier()));
-  else if (QualifiedTemplateName *QTN = Template.getAsQualifiedTemplateName())
-    TRY_TO(TraverseNestedNameSpecifier(QTN->getQualifier()));
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateArgument(
-    const TemplateArgument &Arg) {
-  switch (Arg.getKind()) {
-  case TemplateArgument::Null:
-  case TemplateArgument::Declaration:
-  case TemplateArgument::Integral:
-  case TemplateArgument::NullPtr:
-    return true;
-
-  case TemplateArgument::Type:
-    return getDerived().TraverseType(Arg.getAsType());
-
-  case TemplateArgument::Template:
-  case TemplateArgument::TemplateExpansion:
-    return getDerived().TraverseTemplateName(
-        Arg.getAsTemplateOrTemplatePattern());
-
-  case TemplateArgument::Expression:
-    return getDerived().TraverseStmt(Arg.getAsExpr());
-
-  case TemplateArgument::Pack:
-    return getDerived().TraverseTemplateArguments(Arg.pack_begin(),
-                                                  Arg.pack_size());
-  }
-
-  return true;
-}
-
-// FIXME: no template name location?
-// FIXME: no source locations for a template argument pack?
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateArgumentLoc(
-    const TemplateArgumentLoc &ArgLoc) {
-  const TemplateArgument &Arg = ArgLoc.getArgument();
-
-  switch (Arg.getKind()) {
-  case TemplateArgument::Null:
-  case TemplateArgument::Declaration:
-  case TemplateArgument::Integral:
-  case TemplateArgument::NullPtr:
-    return true;
-
-  case TemplateArgument::Type: {
-    // FIXME: how can TSI ever be NULL?
-    if (TypeSourceInfo *TSI = ArgLoc.getTypeSourceInfo())
-      return getDerived().TraverseTypeLoc(TSI->getTypeLoc());
-    else
-      return getDerived().TraverseType(Arg.getAsType());
-  }
-
-  case TemplateArgument::Template:
-  case TemplateArgument::TemplateExpansion:
-    if (ArgLoc.getTemplateQualifierLoc())
-      TRY_TO(getDerived().TraverseNestedNameSpecifierLoc(
-          ArgLoc.getTemplateQualifierLoc()));
-    return getDerived().TraverseTemplateName(
-        Arg.getAsTemplateOrTemplatePattern());
-
-  case TemplateArgument::Expression:
-    return getDerived().TraverseStmt(ArgLoc.getSourceExpression());
-
-  case TemplateArgument::Pack:
-    return getDerived().TraverseTemplateArguments(Arg.pack_begin(),
-                                                  Arg.pack_size());
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateArguments(
-    const TemplateArgument *Args, unsigned NumArgs) {
-  for (unsigned I = 0; I != NumArgs; ++I) {
-    TRY_TO(TraverseTemplateArgument(Args[I]));
-  }
-
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseConstructorInitializer(
-    CXXCtorInitializer *Init) {
-  if (TypeSourceInfo *TInfo = Init->getTypeSourceInfo())
-    TRY_TO(TraverseTypeLoc(TInfo->getTypeLoc()));
-
-  if (Init->isWritten())
-    TRY_TO(TraverseStmt(Init->getInit()));
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::TraverseLambdaCapture(LambdaExpr *LE,
-                                                    const LambdaCapture *C) {
-  if (LE->isInitCapture(C))
-    TRY_TO(TraverseDecl(C->getCapturedVar()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseLambdaBody(LambdaExpr *LE) {
-  StmtQueueAction StmtQueue(*this);
-  StmtQueue.queue(LE->getBody());
-  return true;
-}
-
-// ----------------- Type traversal -----------------
-
-// This macro makes available a variable T, the passed-in type.
-#define DEF_TRAVERSE_TYPE(TYPE, CODE)                                          \
-  template <typename Derived>                                                  \
-  bool RecursiveASTVisitor<Derived>::Traverse##TYPE(TYPE *T) {                 \
-    TRY_TO(WalkUpFrom##TYPE(T));                                               \
-    { CODE; }                                                                  \
-    return true;                                                               \
-  }
-
-DEF_TRAVERSE_TYPE(BuiltinType, {})
-
-DEF_TRAVERSE_TYPE(ComplexType, { TRY_TO(TraverseType(T->getElementType())); })
-
-DEF_TRAVERSE_TYPE(PointerType, { TRY_TO(TraverseType(T->getPointeeType())); })
-
-DEF_TRAVERSE_TYPE(BlockPointerType,
-                  { TRY_TO(TraverseType(T->getPointeeType())); })
-
-DEF_TRAVERSE_TYPE(LValueReferenceType,
-                  { TRY_TO(TraverseType(T->getPointeeType())); })
-
-DEF_TRAVERSE_TYPE(RValueReferenceType,
-                  { TRY_TO(TraverseType(T->getPointeeType())); })
-
-DEF_TRAVERSE_TYPE(MemberPointerType, {
-  TRY_TO(TraverseType(QualType(T->getClass(), 0)));
-  TRY_TO(TraverseType(T->getPointeeType()));
-})
-
-DEF_TRAVERSE_TYPE(AdjustedType, { TRY_TO(TraverseType(T->getOriginalType())); })
-
-DEF_TRAVERSE_TYPE(DecayedType, { TRY_TO(TraverseType(T->getOriginalType())); })
-
-DEF_TRAVERSE_TYPE(ConstantArrayType,
-                  { TRY_TO(TraverseType(T->getElementType())); })
-
-DEF_TRAVERSE_TYPE(IncompleteArrayType,
-                  { TRY_TO(TraverseType(T->getElementType())); })
-
-DEF_TRAVERSE_TYPE(VariableArrayType, {
-  TRY_TO(TraverseType(T->getElementType()));
-  TRY_TO(TraverseStmt(T->getSizeExpr()));
-})
-
-DEF_TRAVERSE_TYPE(DependentSizedArrayType, {
-  TRY_TO(TraverseType(T->getElementType()));
-  if (T->getSizeExpr())
-    TRY_TO(TraverseStmt(T->getSizeExpr()));
-})
-
-DEF_TRAVERSE_TYPE(DependentSizedExtVectorType, {
-  if (T->getSizeExpr())
-    TRY_TO(TraverseStmt(T->getSizeExpr()));
-  TRY_TO(TraverseType(T->getElementType()));
-})
-
-DEF_TRAVERSE_TYPE(VectorType, { TRY_TO(TraverseType(T->getElementType())); })
-
-DEF_TRAVERSE_TYPE(ExtVectorType, { TRY_TO(TraverseType(T->getElementType())); })
-
-DEF_TRAVERSE_TYPE(FunctionNoProtoType,
-                  { TRY_TO(TraverseType(T->getReturnType())); })
-
-DEF_TRAVERSE_TYPE(FunctionProtoType, {
-  TRY_TO(TraverseType(T->getReturnType()));
-
-  for (const auto &A : T->param_types()) {
-    TRY_TO(TraverseType(A));
-  }
-
-  for (const auto &E : T->exceptions()) {
-    TRY_TO(TraverseType(E));
-  }
-
-  if (Expr *NE = T->getNoexceptExpr())
-    TRY_TO(TraverseStmt(NE));
-})
-
-DEF_TRAVERSE_TYPE(UnresolvedUsingType, {})
-DEF_TRAVERSE_TYPE(TypedefType, {})
-
-DEF_TRAVERSE_TYPE(TypeOfExprType,
-                  { TRY_TO(TraverseStmt(T->getUnderlyingExpr())); })
-
-DEF_TRAVERSE_TYPE(TypeOfType, { TRY_TO(TraverseType(T->getUnderlyingType())); })
-
-DEF_TRAVERSE_TYPE(DecltypeType,
-                  { TRY_TO(TraverseStmt(T->getUnderlyingExpr())); })
-
-DEF_TRAVERSE_TYPE(UnaryTransformType, {
-  TRY_TO(TraverseType(T->getBaseType()));
-  TRY_TO(TraverseType(T->getUnderlyingType()));
-})
-
-DEF_TRAVERSE_TYPE(AutoType, { TRY_TO(TraverseType(T->getDeducedType())); })
-
-DEF_TRAVERSE_TYPE(RecordType, {})
-DEF_TRAVERSE_TYPE(EnumType, {})
-DEF_TRAVERSE_TYPE(TemplateTypeParmType, {})
-DEF_TRAVERSE_TYPE(SubstTemplateTypeParmType, {})
-DEF_TRAVERSE_TYPE(SubstTemplateTypeParmPackType, {})
-
-DEF_TRAVERSE_TYPE(TemplateSpecializationType, {
-  TRY_TO(TraverseTemplateName(T->getTemplateName()));
-  TRY_TO(TraverseTemplateArguments(T->getArgs(), T->getNumArgs()));
-})
-
-DEF_TRAVERSE_TYPE(InjectedClassNameType, {})
-
-DEF_TRAVERSE_TYPE(AttributedType,
-                  { TRY_TO(TraverseType(T->getModifiedType())); })
-
-DEF_TRAVERSE_TYPE(ParenType, { TRY_TO(TraverseType(T->getInnerType())); })
-
-DEF_TRAVERSE_TYPE(ElaboratedType, {
-  if (T->getQualifier()) {
-    TRY_TO(TraverseNestedNameSpecifier(T->getQualifier()));
-  }
-  TRY_TO(TraverseType(T->getNamedType()));
-})
-
-DEF_TRAVERSE_TYPE(DependentNameType,
-                  { TRY_TO(TraverseNestedNameSpecifier(T->getQualifier())); })
-
-DEF_TRAVERSE_TYPE(DependentTemplateSpecializationType, {
-  TRY_TO(TraverseNestedNameSpecifier(T->getQualifier()));
-  TRY_TO(TraverseTemplateArguments(T->getArgs(), T->getNumArgs()));
-})
-
-DEF_TRAVERSE_TYPE(PackExpansionType, { TRY_TO(TraverseType(T->getPattern())); })
-
-DEF_TRAVERSE_TYPE(ObjCInterfaceType, {})
-
-DEF_TRAVERSE_TYPE(ObjCObjectType, {
-  // We have to watch out here because an ObjCInterfaceType's base
-  // type is itself.
-  if (T->getBaseType().getTypePtr() != T)
-    TRY_TO(TraverseType(T->getBaseType()));
-  for (auto typeArg : T->getTypeArgsAsWritten()) {
-    TRY_TO(TraverseType(typeArg));
-  }
-})
-
-DEF_TRAVERSE_TYPE(ObjCObjectPointerType,
-                  { TRY_TO(TraverseType(T->getPointeeType())); })
-
-DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); })
-
-#undef DEF_TRAVERSE_TYPE
-
-// ----------------- TypeLoc traversal -----------------
-
-// This macro makes available a variable TL, the passed-in TypeLoc.
-// If requested, it calls WalkUpFrom* for the Type in the given TypeLoc,
-// in addition to WalkUpFrom* for the TypeLoc itself, such that existing
-// clients that override the WalkUpFrom*Type() and/or Visit*Type() methods
-// continue to work.
-#define DEF_TRAVERSE_TYPELOC(TYPE, CODE)                                       \
-  template <typename Derived>                                                  \
-  bool RecursiveASTVisitor<Derived>::Traverse##TYPE##Loc(TYPE##Loc TL) {       \
-    if (getDerived().shouldWalkTypesOfTypeLocs())                              \
-      TRY_TO(WalkUpFrom##TYPE(const_cast<TYPE *>(TL.getTypePtr())));           \
-    TRY_TO(WalkUpFrom##TYPE##Loc(TL));                                         \
-    { CODE; }                                                                  \
-    return true;                                                               \
-  }
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::TraverseQualifiedTypeLoc(QualifiedTypeLoc TL) {
-  // Move this over to the 'main' typeloc tree.  Note that this is a
-  // move -- we pretend that we were really looking at the unqualified
-  // typeloc all along -- rather than a recursion, so we don't follow
-  // the normal CRTP plan of going through
-  // getDerived().TraverseTypeLoc.  If we did, we'd be traversing
-  // twice for the same type (once as a QualifiedTypeLoc version of
-  // the type, once as an UnqualifiedTypeLoc version of the type),
-  // which in effect means we'd call VisitTypeLoc twice with the
-  // 'same' type.  This solves that problem, at the cost of never
-  // seeing the qualified version of the type (unless the client
-  // subclasses TraverseQualifiedTypeLoc themselves).  It's not a
-  // perfect solution.  A perfect solution probably requires making
-  // QualifiedTypeLoc a wrapper around TypeLoc -- like QualType is a
-  // wrapper around Type* -- rather than being its own class in the
-  // type hierarchy.
-  return TraverseTypeLoc(TL.getUnqualifiedLoc());
-}
-
-DEF_TRAVERSE_TYPELOC(BuiltinType, {})
-
-// FIXME: ComplexTypeLoc is unfinished
-DEF_TRAVERSE_TYPELOC(ComplexType, {
-  TRY_TO(TraverseType(TL.getTypePtr()->getElementType()));
-})
-
-DEF_TRAVERSE_TYPELOC(PointerType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
-
-DEF_TRAVERSE_TYPELOC(BlockPointerType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
-
-DEF_TRAVERSE_TYPELOC(LValueReferenceType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
-
-DEF_TRAVERSE_TYPELOC(RValueReferenceType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
-
-// FIXME: location of base class?
-// We traverse this in the type case as well, but how is it not reached through
-// the pointee type?
-DEF_TRAVERSE_TYPELOC(MemberPointerType, {
-  TRY_TO(TraverseType(QualType(TL.getTypePtr()->getClass(), 0)));
-  TRY_TO(TraverseTypeLoc(TL.getPointeeLoc()));
-})
-
-DEF_TRAVERSE_TYPELOC(AdjustedType,
-                     { TRY_TO(TraverseTypeLoc(TL.getOriginalLoc())); })
-
-DEF_TRAVERSE_TYPELOC(DecayedType,
-                     { TRY_TO(TraverseTypeLoc(TL.getOriginalLoc())); })
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseArrayTypeLocHelper(ArrayTypeLoc TL) {
-  // This isn't available for ArrayType, but is for the ArrayTypeLoc.
-  TRY_TO(TraverseStmt(TL.getSizeExpr()));
-  return true;
-}
-
-DEF_TRAVERSE_TYPELOC(ConstantArrayType, {
-  TRY_TO(TraverseTypeLoc(TL.getElementLoc()));
-  return TraverseArrayTypeLocHelper(TL);
-})
-
-DEF_TRAVERSE_TYPELOC(IncompleteArrayType, {
-  TRY_TO(TraverseTypeLoc(TL.getElementLoc()));
-  return TraverseArrayTypeLocHelper(TL);
-})
-
-DEF_TRAVERSE_TYPELOC(VariableArrayType, {
-  TRY_TO(TraverseTypeLoc(TL.getElementLoc()));
-  return TraverseArrayTypeLocHelper(TL);
-})
-
-DEF_TRAVERSE_TYPELOC(DependentSizedArrayType, {
-  TRY_TO(TraverseTypeLoc(TL.getElementLoc()));
-  return TraverseArrayTypeLocHelper(TL);
-})
-
-// FIXME: order? why not size expr first?
-// FIXME: base VectorTypeLoc is unfinished
-DEF_TRAVERSE_TYPELOC(DependentSizedExtVectorType, {
-  if (TL.getTypePtr()->getSizeExpr())
-    TRY_TO(TraverseStmt(TL.getTypePtr()->getSizeExpr()));
-  TRY_TO(TraverseType(TL.getTypePtr()->getElementType()));
-})
-
-// FIXME: VectorTypeLoc is unfinished
-DEF_TRAVERSE_TYPELOC(VectorType, {
-  TRY_TO(TraverseType(TL.getTypePtr()->getElementType()));
-})
-
-// FIXME: size and attributes
-// FIXME: base VectorTypeLoc is unfinished
-DEF_TRAVERSE_TYPELOC(ExtVectorType, {
-  TRY_TO(TraverseType(TL.getTypePtr()->getElementType()));
-})
-
-DEF_TRAVERSE_TYPELOC(FunctionNoProtoType,
-                     { TRY_TO(TraverseTypeLoc(TL.getReturnLoc())); })
-
-// FIXME: location of exception specifications (attributes?)
-DEF_TRAVERSE_TYPELOC(FunctionProtoType, {
-  TRY_TO(TraverseTypeLoc(TL.getReturnLoc()));
-
-  const FunctionProtoType *T = TL.getTypePtr();
-
-  for (unsigned I = 0, E = TL.getNumParams(); I != E; ++I) {
-    if (TL.getParam(I)) {
-      TRY_TO(TraverseDecl(TL.getParam(I)));
-    } else if (I < T->getNumParams()) {
-      TRY_TO(TraverseType(T->getParamType(I)));
-    }
-  }
-
-  for (const auto &E : T->exceptions()) {
-    TRY_TO(TraverseType(E));
-  }
-
-  if (Expr *NE = T->getNoexceptExpr())
-    TRY_TO(TraverseStmt(NE));
-})
-
-DEF_TRAVERSE_TYPELOC(UnresolvedUsingType, {})
-DEF_TRAVERSE_TYPELOC(TypedefType, {})
-
-DEF_TRAVERSE_TYPELOC(TypeOfExprType,
-                     { TRY_TO(TraverseStmt(TL.getUnderlyingExpr())); })
-
-DEF_TRAVERSE_TYPELOC(TypeOfType, {
-  TRY_TO(TraverseTypeLoc(TL.getUnderlyingTInfo()->getTypeLoc()));
-})
-
-// FIXME: location of underlying expr
-DEF_TRAVERSE_TYPELOC(DecltypeType, {
-  TRY_TO(TraverseStmt(TL.getTypePtr()->getUnderlyingExpr()));
-})
-
-DEF_TRAVERSE_TYPELOC(UnaryTransformType, {
-  TRY_TO(TraverseTypeLoc(TL.getUnderlyingTInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_TYPELOC(AutoType, {
-  TRY_TO(TraverseType(TL.getTypePtr()->getDeducedType()));
-})
-
-DEF_TRAVERSE_TYPELOC(RecordType, {})
-DEF_TRAVERSE_TYPELOC(EnumType, {})
-DEF_TRAVERSE_TYPELOC(TemplateTypeParmType, {})
-DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmType, {})
-DEF_TRAVERSE_TYPELOC(SubstTemplateTypeParmPackType, {})
-
-// FIXME: use the loc for the template name?
-DEF_TRAVERSE_TYPELOC(TemplateSpecializationType, {
-  TRY_TO(TraverseTemplateName(TL.getTypePtr()->getTemplateName()));
-  for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) {
-    TRY_TO(TraverseTemplateArgumentLoc(TL.getArgLoc(I)));
-  }
-})
-
-DEF_TRAVERSE_TYPELOC(InjectedClassNameType, {})
-
-DEF_TRAVERSE_TYPELOC(ParenType, { TRY_TO(TraverseTypeLoc(TL.getInnerLoc())); })
-
-DEF_TRAVERSE_TYPELOC(AttributedType,
-                     { TRY_TO(TraverseTypeLoc(TL.getModifiedLoc())); })
-
-DEF_TRAVERSE_TYPELOC(ElaboratedType, {
-  if (TL.getQualifierLoc()) {
-    TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
-  }
-  TRY_TO(TraverseTypeLoc(TL.getNamedTypeLoc()));
-})
-
-DEF_TRAVERSE_TYPELOC(DependentNameType, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
-})
-
-DEF_TRAVERSE_TYPELOC(DependentTemplateSpecializationType, {
-  if (TL.getQualifierLoc()) {
-    TRY_TO(TraverseNestedNameSpecifierLoc(TL.getQualifierLoc()));
-  }
-
-  for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) {
-    TRY_TO(TraverseTemplateArgumentLoc(TL.getArgLoc(I)));
-  }
-})
-
-DEF_TRAVERSE_TYPELOC(PackExpansionType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPatternLoc())); })
-
-DEF_TRAVERSE_TYPELOC(ObjCInterfaceType, {})
-
-DEF_TRAVERSE_TYPELOC(ObjCObjectType, {
-  // We have to watch out here because an ObjCInterfaceType's base
-  // type is itself.
-  if (TL.getTypePtr()->getBaseType().getTypePtr() != TL.getTypePtr())
-    TRY_TO(TraverseTypeLoc(TL.getBaseLoc()));
-  for (unsigned i = 0, n = TL.getNumTypeArgs(); i != n; ++i)
-    TRY_TO(TraverseTypeLoc(TL.getTypeArgTInfo(i)->getTypeLoc()));
-})
-
-DEF_TRAVERSE_TYPELOC(ObjCObjectPointerType,
-                     { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); })
-
-DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
-
-#undef DEF_TRAVERSE_TYPELOC
-
-// ----------------- Decl traversal -----------------
-//
-// For a Decl, we automate (in the DEF_TRAVERSE_DECL macro) traversing
-// the children that come from the DeclContext associated with it.
-// Therefore each Traverse* only needs to worry about children other
-// than those.
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseDeclContextHelper(DeclContext *DC) {
-  if (!DC)
-    return true;
-
-  for (auto *Child : DC->decls()) {
-    // BlockDecls and CapturedDecls are traversed through BlockExprs and
-    // CapturedStmts respectively.
-    if (!isa<BlockDecl>(Child) && !isa<CapturedDecl>(Child))
-      TRY_TO(TraverseDecl(Child));
-  }
-
-  return true;
-}
-
-// This macro makes available a variable D, the passed-in decl.
-#define DEF_TRAVERSE_DECL(DECL, CODE)                                          \
-  template <typename Derived>                                                  \
-  bool RecursiveASTVisitor<Derived>::Traverse##DECL(DECL *D) {                 \
-    TRY_TO(WalkUpFrom##DECL(D));                                               \
-    { CODE; }                                                                  \
-    TRY_TO(TraverseDeclContextHelper(dyn_cast<DeclContext>(D)));               \
-    return true;                                                               \
-  }
-
-DEF_TRAVERSE_DECL(AccessSpecDecl, {})
-
-DEF_TRAVERSE_DECL(BlockDecl, {
-  if (TypeSourceInfo *TInfo = D->getSignatureAsWritten())
-    TRY_TO(TraverseTypeLoc(TInfo->getTypeLoc()));
-  TRY_TO(TraverseStmt(D->getBody()));
-  for (const auto &I : D->captures()) {
-    if (I.hasCopyExpr()) {
-      TRY_TO(TraverseStmt(I.getCopyExpr()));
-    }
-  }
-  // This return statement makes sure the traversal of nodes in
-  // decls_begin()/decls_end() (done in the DEF_TRAVERSE_DECL macro)
-  // is skipped - don't remove it.
-  return true;
-})
-
-DEF_TRAVERSE_DECL(CapturedDecl, {
-  TRY_TO(TraverseStmt(D->getBody()));
-  // This return statement makes sure the traversal of nodes in
-  // decls_begin()/decls_end() (done in the DEF_TRAVERSE_DECL macro)
-  // is skipped - don't remove it.
-  return true;
-})
-
-DEF_TRAVERSE_DECL(EmptyDecl, {})
-
-DEF_TRAVERSE_DECL(FileScopeAsmDecl,
-                  { TRY_TO(TraverseStmt(D->getAsmString())); })
-
-DEF_TRAVERSE_DECL(ImportDecl, {})
-
-DEF_TRAVERSE_DECL(FriendDecl, {
-  // Friend is either decl or a type.
-  if (D->getFriendType())
-    TRY_TO(TraverseTypeLoc(D->getFriendType()->getTypeLoc()));
-  else
-    TRY_TO(TraverseDecl(D->getFriendDecl()));
-})
-
-DEF_TRAVERSE_DECL(FriendTemplateDecl, {
-  if (D->getFriendType())
-    TRY_TO(TraverseTypeLoc(D->getFriendType()->getTypeLoc()));
-  else
-    TRY_TO(TraverseDecl(D->getFriendDecl()));
-  for (unsigned I = 0, E = D->getNumTemplateParameters(); I < E; ++I) {
-    TemplateParameterList *TPL = D->getTemplateParameterList(I);
-    for (TemplateParameterList::iterator ITPL = TPL->begin(), ETPL = TPL->end();
-         ITPL != ETPL; ++ITPL) {
-      TRY_TO(TraverseDecl(*ITPL));
-    }
-  }
-})
-
-DEF_TRAVERSE_DECL(ClassScopeFunctionSpecializationDecl,
-                  { TRY_TO(TraverseDecl(D->getSpecialization())); })
-
-DEF_TRAVERSE_DECL(LinkageSpecDecl, {})
-
-DEF_TRAVERSE_DECL(ObjCPropertyImplDecl, {// FIXME: implement this
-                                        })
-
-DEF_TRAVERSE_DECL(StaticAssertDecl, {
-  TRY_TO(TraverseStmt(D->getAssertExpr()));
-  TRY_TO(TraverseStmt(D->getMessage()));
-})
-
-DEF_TRAVERSE_DECL(
-    TranslationUnitDecl,
-    {// Code in an unnamed namespace shows up automatically in
-     // decls_begin()/decls_end().  Thus we don't need to recurse on
-     // D->getAnonymousNamespace().
-    })
-
-DEF_TRAVERSE_DECL(ExternCContextDecl, {})
-
-DEF_TRAVERSE_DECL(NamespaceAliasDecl, {
-  // We shouldn't traverse an aliased namespace, since it will be
-  // defined (and, therefore, traversed) somewhere else.
-  //
-  // This return statement makes sure the traversal of nodes in
-  // decls_begin()/decls_end() (done in the DEF_TRAVERSE_DECL macro)
-  // is skipped - don't remove it.
-  return true;
-})
-
-DEF_TRAVERSE_DECL(LabelDecl, {// There is no code in a LabelDecl.
-                             })
-
-DEF_TRAVERSE_DECL(
-    NamespaceDecl,
-    {// Code in an unnamed namespace shows up automatically in
-     // decls_begin()/decls_end().  Thus we don't need to recurse on
-     // D->getAnonymousNamespace().
-    })
-
-DEF_TRAVERSE_DECL(ObjCCompatibleAliasDecl, {// FIXME: implement
-                                           })
-
-DEF_TRAVERSE_DECL(ObjCCategoryDecl, {// FIXME: implement
-  if (ObjCTypeParamList *typeParamList = D->getTypeParamList()) {
-    for (auto typeParam : *typeParamList) {
-      TRY_TO(TraverseObjCTypeParamDecl(typeParam));
-    }
-  }
-  return true;
-})
-
-DEF_TRAVERSE_DECL(ObjCCategoryImplDecl, {// FIXME: implement
-                                        })
-
-DEF_TRAVERSE_DECL(ObjCImplementationDecl, {// FIXME: implement
-                                          })
-
-DEF_TRAVERSE_DECL(ObjCInterfaceDecl, {// FIXME: implement
-  if (ObjCTypeParamList *typeParamList = D->getTypeParamListAsWritten()) {
-    for (auto typeParam : *typeParamList) {
-      TRY_TO(TraverseObjCTypeParamDecl(typeParam));
-    }
-  }
-
-  if (TypeSourceInfo *superTInfo = D->getSuperClassTInfo()) {
-    TRY_TO(TraverseTypeLoc(superTInfo->getTypeLoc()));
-  }
-})
-
-DEF_TRAVERSE_DECL(ObjCProtocolDecl, {// FIXME: implement
-                                    })
-
-DEF_TRAVERSE_DECL(ObjCMethodDecl, {
-  if (D->getReturnTypeSourceInfo()) {
-    TRY_TO(TraverseTypeLoc(D->getReturnTypeSourceInfo()->getTypeLoc()));
-  }
-  for (ObjCMethodDecl::param_iterator I = D->param_begin(), E = D->param_end();
-       I != E; ++I) {
-    TRY_TO(TraverseDecl(*I));
-  }
-  if (D->isThisDeclarationADefinition()) {
-    TRY_TO(TraverseStmt(D->getBody()));
-  }
-  return true;
-})
-
-DEF_TRAVERSE_DECL(ObjCTypeParamDecl, {
-  if (D->hasExplicitBound()) {
-    TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-    // We shouldn't traverse D->getTypeForDecl(); it's a result of
-    // declaring the type alias, not something that was written in the
-    // source.
-  }
-})
-
-DEF_TRAVERSE_DECL(ObjCPropertyDecl, {
-  if (D->getTypeSourceInfo())
-    TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-  else
-    TRY_TO(TraverseType(D->getType()));
-  return true;
-})
-
-DEF_TRAVERSE_DECL(UsingDecl, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(D->getNameInfo()));
-})
-
-DEF_TRAVERSE_DECL(UsingDirectiveDecl, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-})
-
-DEF_TRAVERSE_DECL(UsingShadowDecl, {})
-
-DEF_TRAVERSE_DECL(OMPThreadPrivateDecl, {
-  for (auto *I : D->varlists()) {
-    TRY_TO(TraverseStmt(I));
-  }
-})
-
-// A helper method for TemplateDecl's children.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateParameterListHelper(
-    TemplateParameterList *TPL) {
-  if (TPL) {
-    for (TemplateParameterList::iterator I = TPL->begin(), E = TPL->end();
-         I != E; ++I) {
-      TRY_TO(TraverseDecl(*I));
-    }
-  }
-  return true;
-}
-
-// A helper method for traversing the implicit instantiations of a
-// class template.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseClassInstantiations(
-    ClassTemplateDecl *D) {
-  for (auto *SD : D->specializations()) {
-    for (auto *RD : SD->redecls()) {
-      // We don't want to visit injected-class-names in this traversal.
-      if (cast<CXXRecordDecl>(RD)->isInjectedClassName())
-        continue;
-
-      switch (
-          cast<ClassTemplateSpecializationDecl>(RD)->getSpecializationKind()) {
-      // Visit the implicit instantiations with the requested pattern.
-      case TSK_Undeclared:
-      case TSK_ImplicitInstantiation:
-        TRY_TO(TraverseDecl(RD));
-        break;
-
-      // We don't need to do anything on an explicit instantiation
-      // or explicit specialization because there will be an explicit
-      // node for it elsewhere.
-      case TSK_ExplicitInstantiationDeclaration:
-      case TSK_ExplicitInstantiationDefinition:
-      case TSK_ExplicitSpecialization:
-        break;
-      }
-    }
-  }
-
-  return true;
-}
-
-DEF_TRAVERSE_DECL(ClassTemplateDecl, {
-  CXXRecordDecl *TempDecl = D->getTemplatedDecl();
-  TRY_TO(TraverseDecl(TempDecl));
-  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
-
-  // By default, we do not traverse the instantiations of
-  // class templates since they do not appear in the user code. The
-  // following code optionally traverses them.
-  //
-  // We only traverse the class instantiations when we see the canonical
-  // declaration of the template, to ensure we only visit them once.
-  if (getDerived().shouldVisitTemplateInstantiations() &&
-      D == D->getCanonicalDecl())
-    TRY_TO(TraverseClassInstantiations(D));
-
-  // Note that getInstantiatedFromMemberTemplate() is just a link
-  // from a template instantiation back to the template from which
-  // it was instantiated, and thus should not be traversed.
-})
-
-// A helper method for traversing the implicit instantiations of a
-// class template.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseVariableInstantiations(
-    VarTemplateDecl *D) {
-  for (auto *SD : D->specializations()) {
-    for (auto *RD : SD->redecls()) {
-      switch (
-          cast<VarTemplateSpecializationDecl>(RD)->getSpecializationKind()) {
-      // Visit the implicit instantiations with the requested pattern.
-      case TSK_Undeclared:
-      case TSK_ImplicitInstantiation:
-        TRY_TO(TraverseDecl(RD));
-        break;
-
-      // We don't need to do anything on an explicit instantiation
-      // or explicit specialization because there will be an explicit
-      // node for it elsewhere.
-      case TSK_ExplicitInstantiationDeclaration:
-      case TSK_ExplicitInstantiationDefinition:
-      case TSK_ExplicitSpecialization:
-        break;
-      }
-    }
-  }
-
-  return true;
-}
-
-DEF_TRAVERSE_DECL(VarTemplateDecl, {
-  VarDecl *TempDecl = D->getTemplatedDecl();
-  TRY_TO(TraverseDecl(TempDecl));
-  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
-
-  // By default, we do not traverse the instantiations of
-  // variable templates since they do not appear in the user code. The
-  // following code optionally traverses them.
-  //
-  // We only traverse the variable instantiations when we see the canonical
-  // declaration of the template, to ensure we only visit them once.
-  if (getDerived().shouldVisitTemplateInstantiations() &&
-      D == D->getCanonicalDecl())
-    TRY_TO(TraverseVariableInstantiations(D));
-
-  // Note that getInstantiatedFromMemberTemplate() is just a link
-  // from a template instantiation back to the template from which
-  // it was instantiated, and thus should not be traversed.
-})
-
-// A helper method for traversing the instantiations of a
-// function while skipping its specializations.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseFunctionInstantiations(
-    FunctionTemplateDecl *D) {
-  for (auto *FD : D->specializations()) {
-    for (auto *RD : FD->redecls()) {
-      switch (RD->getTemplateSpecializationKind()) {
-      case TSK_Undeclared:
-      case TSK_ImplicitInstantiation:
-        // We don't know what kind of FunctionDecl this is.
-        TRY_TO(TraverseDecl(RD));
-        break;
-
-      // No need to visit explicit instantiations, we'll find the node
-      // eventually.
-      // FIXME: This is incorrect; there is no other node for an explicit
-      // instantiation of a function template specialization.
-      case TSK_ExplicitInstantiationDeclaration:
-      case TSK_ExplicitInstantiationDefinition:
-        break;
-
-      case TSK_ExplicitSpecialization:
-        break;
-      }
-    }
-  }
-
-  return true;
-}
-
-DEF_TRAVERSE_DECL(FunctionTemplateDecl, {
-  TRY_TO(TraverseDecl(D->getTemplatedDecl()));
-  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
-
-  // By default, we do not traverse the instantiations of
-  // function templates since they do not appear in the user code. The
-  // following code optionally traverses them.
-  //
-  // We only traverse the function instantiations when we see the canonical
-  // declaration of the template, to ensure we only visit them once.
-  if (getDerived().shouldVisitTemplateInstantiations() &&
-      D == D->getCanonicalDecl())
-    TRY_TO(TraverseFunctionInstantiations(D));
-})
-
-DEF_TRAVERSE_DECL(TemplateTemplateParmDecl, {
-  // D is the "T" in something like
-  //   template <template <typename> class T> class container { };
-  TRY_TO(TraverseDecl(D->getTemplatedDecl()));
-  if (D->hasDefaultArgument()) {
-    TRY_TO(TraverseTemplateArgumentLoc(D->getDefaultArgument()));
-  }
-  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
-})
-
-DEF_TRAVERSE_DECL(TemplateTypeParmDecl, {
-  // D is the "T" in something like "template<typename T> class vector;"
-  if (D->getTypeForDecl())
-    TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0)));
-  if (D->hasDefaultArgument())
-    TRY_TO(TraverseTypeLoc(D->getDefaultArgumentInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_DECL(TypedefDecl, {
-  TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-  // We shouldn't traverse D->getTypeForDecl(); it's a result of
-  // declaring the typedef, not something that was written in the
-  // source.
-})
-
-DEF_TRAVERSE_DECL(TypeAliasDecl, {
-  TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-  // We shouldn't traverse D->getTypeForDecl(); it's a result of
-  // declaring the type alias, not something that was written in the
-  // source.
-})
-
-DEF_TRAVERSE_DECL(TypeAliasTemplateDecl, {
-  TRY_TO(TraverseDecl(D->getTemplatedDecl()));
-  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
-})
-
-DEF_TRAVERSE_DECL(UnresolvedUsingTypenameDecl, {
-  // A dependent using declaration which was marked with 'typename'.
-  //   template<class T> class A : public B<T> { using typename B<T>::foo; };
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  // We shouldn't traverse D->getTypeForDecl(); it's a result of
-  // declaring the type, not something that was written in the
-  // source.
-})
-
-DEF_TRAVERSE_DECL(EnumDecl, {
-  if (D->getTypeForDecl())
-    TRY_TO(TraverseType(QualType(D->getTypeForDecl(), 0)));
-
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  // The enumerators are already traversed by
-  // decls_begin()/decls_end().
-})
-
-// Helper methods for RecordDecl and its children.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseRecordHelper(RecordDecl *D) {
-  // We shouldn't traverse D->getTypeForDecl(); it's a result of
-  // declaring the type, not something that was written in the source.
-
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseCXXRecordHelper(CXXRecordDecl *D) {
-  if (!TraverseRecordHelper(D))
-    return false;
-  if (D->isCompleteDefinition()) {
-    for (const auto &I : D->bases()) {
-      TRY_TO(TraverseTypeLoc(I.getTypeSourceInfo()->getTypeLoc()));
-    }
-    // We don't traverse the friends or the conversions, as they are
-    // already in decls_begin()/decls_end().
-  }
-  return true;
-}
-
-DEF_TRAVERSE_DECL(RecordDecl, { TRY_TO(TraverseRecordHelper(D)); })
-
-DEF_TRAVERSE_DECL(CXXRecordDecl, { TRY_TO(TraverseCXXRecordHelper(D)); })
-
-DEF_TRAVERSE_DECL(ClassTemplateSpecializationDecl, {
-  // For implicit instantiations ("set<int> x;"), we don't want to
-  // recurse at all, since the instatiated class isn't written in
-  // the source code anywhere.  (Note the instatiated *type* --
-  // set<int> -- is written, and will still get a callback of
-  // TemplateSpecializationType).  For explicit instantiations
-  // ("template set<int>;"), we do need a callback, since this
-  // is the only callback that's made for this instantiation.
-  // We use getTypeAsWritten() to distinguish.
-  if (TypeSourceInfo *TSI = D->getTypeAsWritten())
-    TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));
-
-  if (!getDerived().shouldVisitTemplateInstantiations() &&
-      D->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
-    // Returning from here skips traversing the
-    // declaration context of the ClassTemplateSpecializationDecl
-    // (embedded in the DEF_TRAVERSE_DECL() macro)
-    // which contains the instantiated members of the class.
-    return true;
-})
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseTemplateArgumentLocsHelper(
-    const TemplateArgumentLoc *TAL, unsigned Count) {
-  for (unsigned I = 0; I < Count; ++I) {
-    TRY_TO(TraverseTemplateArgumentLoc(TAL[I]));
-  }
-  return true;
-}
-
-DEF_TRAVERSE_DECL(ClassTemplatePartialSpecializationDecl, {
-  // The partial specialization.
-  if (TemplateParameterList *TPL = D->getTemplateParameters()) {
-    for (TemplateParameterList::iterator I = TPL->begin(), E = TPL->end();
-         I != E; ++I) {
-      TRY_TO(TraverseDecl(*I));
-    }
-  }
-  // The args that remains unspecialized.
-  TRY_TO(TraverseTemplateArgumentLocsHelper(
-      D->getTemplateArgsAsWritten()->getTemplateArgs(),
-      D->getTemplateArgsAsWritten()->NumTemplateArgs));
-
-  // Don't need the ClassTemplatePartialSpecializationHelper, even
-  // though that's our parent class -- we already visit all the
-  // template args here.
-  TRY_TO(TraverseCXXRecordHelper(D));
-
-  // Instantiations will have been visited with the primary template.
-})
-
-DEF_TRAVERSE_DECL(EnumConstantDecl, { TRY_TO(TraverseStmt(D->getInitExpr())); })
-
-DEF_TRAVERSE_DECL(UnresolvedUsingValueDecl, {
-  // Like UnresolvedUsingTypenameDecl, but without the 'typename':
-  //    template <class T> Class A : public Base<T> { using Base<T>::foo; };
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(D->getNameInfo()));
-})
-
-DEF_TRAVERSE_DECL(IndirectFieldDecl, {})
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseDeclaratorHelper(DeclaratorDecl *D) {
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  if (D->getTypeSourceInfo())
-    TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-  else
-    TRY_TO(TraverseType(D->getType()));
-  return true;
-}
-
-DEF_TRAVERSE_DECL(MSPropertyDecl, { TRY_TO(TraverseDeclaratorHelper(D)); })
-
-DEF_TRAVERSE_DECL(FieldDecl, {
-  TRY_TO(TraverseDeclaratorHelper(D));
-  if (D->isBitField())
-    TRY_TO(TraverseStmt(D->getBitWidth()));
-  else if (D->hasInClassInitializer())
-    TRY_TO(TraverseStmt(D->getInClassInitializer()));
-})
-
-DEF_TRAVERSE_DECL(ObjCAtDefsFieldDecl, {
-  TRY_TO(TraverseDeclaratorHelper(D));
-  if (D->isBitField())
-    TRY_TO(TraverseStmt(D->getBitWidth()));
-  // FIXME: implement the rest.
-})
-
-DEF_TRAVERSE_DECL(ObjCIvarDecl, {
-  TRY_TO(TraverseDeclaratorHelper(D));
-  if (D->isBitField())
-    TRY_TO(TraverseStmt(D->getBitWidth()));
-  // FIXME: implement the rest.
-})
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseFunctionHelper(FunctionDecl *D) {
-  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(D->getNameInfo()));
-
-  // If we're an explicit template specialization, iterate over the
-  // template args that were explicitly specified.  If we were doing
-  // this in typing order, we'd do it between the return type and
-  // the function args, but both are handled by the FunctionTypeLoc
-  // above, so we have to choose one side.  I've decided to do before.
-  if (const FunctionTemplateSpecializationInfo *FTSI =
-          D->getTemplateSpecializationInfo()) {
-    if (FTSI->getTemplateSpecializationKind() != TSK_Undeclared &&
-        FTSI->getTemplateSpecializationKind() != TSK_ImplicitInstantiation) {
-      // A specialization might not have explicit template arguments if it has
-      // a templated return type and concrete arguments.
-      if (const ASTTemplateArgumentListInfo *TALI =
-              FTSI->TemplateArgumentsAsWritten) {
-        TRY_TO(TraverseTemplateArgumentLocsHelper(TALI->getTemplateArgs(),
-                                                  TALI->NumTemplateArgs));
-      }
-    }
-  }
-
-  // Visit the function type itself, which can be either
-  // FunctionNoProtoType or FunctionProtoType, or a typedef.  This
-  // also covers the return type and the function parameters,
-  // including exception specifications.
-  TRY_TO(TraverseTypeLoc(D->getTypeSourceInfo()->getTypeLoc()));
-
-  if (CXXConstructorDecl *Ctor = dyn_cast<CXXConstructorDecl>(D)) {
-    // Constructor initializers.
-    for (auto *I : Ctor->inits()) {
-      TRY_TO(TraverseConstructorInitializer(I));
-    }
-  }
-
-  if (D->isThisDeclarationADefinition()) {
-    TRY_TO(TraverseStmt(D->getBody())); // Function body.
-  }
-  return true;
-}
-
-DEF_TRAVERSE_DECL(FunctionDecl, {
-  // We skip decls_begin/decls_end, which are already covered by
-  // TraverseFunctionHelper().
-  return TraverseFunctionHelper(D);
-})
-
-DEF_TRAVERSE_DECL(CXXMethodDecl, {
-  // We skip decls_begin/decls_end, which are already covered by
-  // TraverseFunctionHelper().
-  return TraverseFunctionHelper(D);
-})
-
-DEF_TRAVERSE_DECL(CXXConstructorDecl, {
-  // We skip decls_begin/decls_end, which are already covered by
-  // TraverseFunctionHelper().
-  return TraverseFunctionHelper(D);
-})
-
-// CXXConversionDecl is the declaration of a type conversion operator.
-// It's not a cast expression.
-DEF_TRAVERSE_DECL(CXXConversionDecl, {
-  // We skip decls_begin/decls_end, which are already covered by
-  // TraverseFunctionHelper().
-  return TraverseFunctionHelper(D);
-})
-
-DEF_TRAVERSE_DECL(CXXDestructorDecl, {
-  // We skip decls_begin/decls_end, which are already covered by
-  // TraverseFunctionHelper().
-  return TraverseFunctionHelper(D);
-})
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseVarHelper(VarDecl *D) {
-  TRY_TO(TraverseDeclaratorHelper(D));
-  // Default params are taken care of when we traverse the ParmVarDecl.
-  if (!isa<ParmVarDecl>(D))
-    TRY_TO(TraverseStmt(D->getInit()));
-  return true;
-}
-
-DEF_TRAVERSE_DECL(VarDecl, { TRY_TO(TraverseVarHelper(D)); })
-
-DEF_TRAVERSE_DECL(VarTemplateSpecializationDecl, {
-  // For implicit instantiations, we don't want to
-  // recurse at all, since the instatiated class isn't written in
-  // the source code anywhere.
-  if (TypeSourceInfo *TSI = D->getTypeAsWritten())
-    TRY_TO(TraverseTypeLoc(TSI->getTypeLoc()));
-
-  if (!getDerived().shouldVisitTemplateInstantiations() &&
-      D->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
-    // Returning from here skips traversing the
-    // declaration context of the VarTemplateSpecializationDecl
-    // (embedded in the DEF_TRAVERSE_DECL() macro).
-    return true;
-})
-
-DEF_TRAVERSE_DECL(VarTemplatePartialSpecializationDecl, {
-  // The partial specialization.
-  if (TemplateParameterList *TPL = D->getTemplateParameters()) {
-    for (TemplateParameterList::iterator I = TPL->begin(), E = TPL->end();
-         I != E; ++I) {
-      TRY_TO(TraverseDecl(*I));
-    }
-  }
-  // The args that remains unspecialized.
-  TRY_TO(TraverseTemplateArgumentLocsHelper(
-      D->getTemplateArgsAsWritten()->getTemplateArgs(),
-      D->getTemplateArgsAsWritten()->NumTemplateArgs));
-
-  // Don't need the VarTemplatePartialSpecializationHelper, even
-  // though that's our parent class -- we already visit all the
-  // template args here.
-  TRY_TO(TraverseVarHelper(D));
-
-  // Instantiations will have been visited with the primary
-  // template.
-})
-
-DEF_TRAVERSE_DECL(ImplicitParamDecl, { TRY_TO(TraverseVarHelper(D)); })
-
-DEF_TRAVERSE_DECL(NonTypeTemplateParmDecl, {
-  // A non-type template parameter, e.g. "S" in template<int S> class Foo ...
-  TRY_TO(TraverseDeclaratorHelper(D));
-  TRY_TO(TraverseStmt(D->getDefaultArgument()));
-})
-
-DEF_TRAVERSE_DECL(ParmVarDecl, {
-  TRY_TO(TraverseVarHelper(D));
-
-  if (D->hasDefaultArg() && D->hasUninstantiatedDefaultArg() &&
-      !D->hasUnparsedDefaultArg())
-    TRY_TO(TraverseStmt(D->getUninstantiatedDefaultArg()));
-
-  if (D->hasDefaultArg() && !D->hasUninstantiatedDefaultArg() &&
-      !D->hasUnparsedDefaultArg())
-    TRY_TO(TraverseStmt(D->getDefaultArg()));
-})
-
-#undef DEF_TRAVERSE_DECL
-
-// ----------------- Stmt traversal -----------------
-//
-// For stmts, we automate (in the DEF_TRAVERSE_STMT macro) iterating
-// over the children defined in children() (every stmt defines these,
-// though sometimes the range is empty).  Each individual Traverse*
-// method only needs to worry about children other than those.  To see
-// what children() does for a given class, see, e.g.,
-//   http://clang.llvm.org/doxygen/Stmt_8cpp_source.html
-
-// This macro makes available a variable S, the passed-in stmt.
-#define DEF_TRAVERSE_STMT(STMT, CODE)                                          \
-  template <typename Derived>                                                  \
-  bool RecursiveASTVisitor<Derived>::Traverse##STMT(STMT *S) {                 \
-    TRY_TO(WalkUpFrom##STMT(S));                                               \
-    StmtQueueAction StmtQueue(*this);                                          \
-    { CODE; }                                                                  \
-    for (Stmt *SubStmt : S->children()) {                                      \
-      StmtQueue.queue(SubStmt);                                                \
-    }                                                                          \
-    return true;                                                               \
-  }
-
-DEF_TRAVERSE_STMT(GCCAsmStmt, {
-  StmtQueue.queue(S->getAsmString());
-  for (unsigned I = 0, E = S->getNumInputs(); I < E; ++I) {
-    StmtQueue.queue(S->getInputConstraintLiteral(I));
-  }
-  for (unsigned I = 0, E = S->getNumOutputs(); I < E; ++I) {
-    StmtQueue.queue(S->getOutputConstraintLiteral(I));
-  }
-  for (unsigned I = 0, E = S->getNumClobbers(); I < E; ++I) {
-    StmtQueue.queue(S->getClobberStringLiteral(I));
-  }
-  // children() iterates over inputExpr and outputExpr.
-})
-
-DEF_TRAVERSE_STMT(
-    MSAsmStmt,
-    {// FIXME: MS Asm doesn't currently parse Constraints, Clobbers, etc.  Once
-     // added this needs to be implemented.
-    })
-
-DEF_TRAVERSE_STMT(CXXCatchStmt, {
-  TRY_TO(TraverseDecl(S->getExceptionDecl()));
-  // children() iterates over the handler block.
-})
-
-DEF_TRAVERSE_STMT(DeclStmt, {
-  for (auto *I : S->decls()) {
-    TRY_TO(TraverseDecl(I));
-  }
-  // Suppress the default iteration over children() by
-  // returning.  Here's why: A DeclStmt looks like 'type var [=
-  // initializer]'.  The decls above already traverse over the
-  // initializers, so we don't have to do it again (which
-  // children() would do).
-  return true;
-})
-
-// These non-expr stmts (most of them), do not need any action except
-// iterating over the children.
-DEF_TRAVERSE_STMT(BreakStmt, {})
-DEF_TRAVERSE_STMT(CXXTryStmt, {})
-DEF_TRAVERSE_STMT(CaseStmt, {})
-DEF_TRAVERSE_STMT(CompoundStmt, {})
-DEF_TRAVERSE_STMT(ContinueStmt, {})
-DEF_TRAVERSE_STMT(DefaultStmt, {})
-DEF_TRAVERSE_STMT(DoStmt, {})
-DEF_TRAVERSE_STMT(ForStmt, {})
-DEF_TRAVERSE_STMT(GotoStmt, {})
-DEF_TRAVERSE_STMT(IfStmt, {})
-DEF_TRAVERSE_STMT(IndirectGotoStmt, {})
-DEF_TRAVERSE_STMT(LabelStmt, {})
-DEF_TRAVERSE_STMT(AttributedStmt, {})
-DEF_TRAVERSE_STMT(NullStmt, {})
-DEF_TRAVERSE_STMT(ObjCAtCatchStmt, {})
-DEF_TRAVERSE_STMT(ObjCAtFinallyStmt, {})
-DEF_TRAVERSE_STMT(ObjCAtSynchronizedStmt, {})
-DEF_TRAVERSE_STMT(ObjCAtThrowStmt, {})
-DEF_TRAVERSE_STMT(ObjCAtTryStmt, {})
-DEF_TRAVERSE_STMT(ObjCForCollectionStmt, {})
-DEF_TRAVERSE_STMT(ObjCAutoreleasePoolStmt, {})
-DEF_TRAVERSE_STMT(CXXForRangeStmt, {})
-DEF_TRAVERSE_STMT(MSDependentExistsStmt, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(S->getNameInfo()));
-})
-DEF_TRAVERSE_STMT(ReturnStmt, {})
-DEF_TRAVERSE_STMT(SwitchStmt, {})
-DEF_TRAVERSE_STMT(WhileStmt, {})
-
-DEF_TRAVERSE_STMT(CXXDependentScopeMemberExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(S->getMemberNameInfo()));
-  if (S->hasExplicitTemplateArgs()) {
-    TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
-                                              S->getNumTemplateArgs()));
-  }
-})
-
-DEF_TRAVERSE_STMT(DeclRefExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(S->getNameInfo()));
-  TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
-                                            S->getNumTemplateArgs()));
-})
-
-DEF_TRAVERSE_STMT(DependentScopeDeclRefExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(S->getNameInfo()));
-  if (S->hasExplicitTemplateArgs()) {
-    TRY_TO(TraverseTemplateArgumentLocsHelper(
-        S->getExplicitTemplateArgs().getTemplateArgs(),
-        S->getNumTemplateArgs()));
-  }
-})
-
-DEF_TRAVERSE_STMT(MemberExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(S->getMemberNameInfo()));
-  TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
-                                            S->getNumTemplateArgs()));
-})
-
-DEF_TRAVERSE_STMT(
-    ImplicitCastExpr,
-    {// We don't traverse the cast type, as it's not written in the
-     // source code.
-    })
-
-DEF_TRAVERSE_STMT(CStyleCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXFunctionalCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXConstCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXDynamicCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXReinterpretCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXStaticCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-
-// InitListExpr is a tricky one, because we want to do all our work on
-// the syntactic form of the listexpr, but this method takes the
-// semantic form by default.  We can't use the macro helper because it
-// calls WalkUp*() on the semantic form, before our code can convert
-// to the syntactic form.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseInitListExpr(InitListExpr *S) {
-  if (InitListExpr *Syn = S->getSyntacticForm())
-    S = Syn;
-  TRY_TO(WalkUpFromInitListExpr(S));
-  StmtQueueAction StmtQueue(*this);
-  // All we need are the default actions.  FIXME: use a helper function.
-  for (Stmt *SubStmt : S->children()) {
-    StmtQueue.queue(SubStmt);
-  }
-  return true;
-}
-
-// GenericSelectionExpr is a special case because the types and expressions
-// are interleaved.  We also need to watch out for null types (default
-// generic associations).
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseGenericSelectionExpr(
-    GenericSelectionExpr *S) {
-  TRY_TO(WalkUpFromGenericSelectionExpr(S));
-  StmtQueueAction StmtQueue(*this);
-  StmtQueue.queue(S->getControllingExpr());
-  for (unsigned i = 0; i != S->getNumAssocs(); ++i) {
-    if (TypeSourceInfo *TS = S->getAssocTypeSourceInfo(i))
-      TRY_TO(TraverseTypeLoc(TS->getTypeLoc()));
-    StmtQueue.queue(S->getAssocExpr(i));
-  }
-  return true;
-}
-
-// PseudoObjectExpr is a special case because of the wierdness with
-// syntactic expressions and opaque values.
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::TraversePseudoObjectExpr(PseudoObjectExpr *S) {
-  TRY_TO(WalkUpFromPseudoObjectExpr(S));
-  StmtQueueAction StmtQueue(*this);
-  StmtQueue.queue(S->getSyntacticForm());
-  for (PseudoObjectExpr::semantics_iterator i = S->semantics_begin(),
-                                            e = S->semantics_end();
-       i != e; ++i) {
-    Expr *sub = *i;
-    if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(sub))
-      sub = OVE->getSourceExpr();
-    StmtQueue.queue(sub);
-  }
-  return true;
-}
-
-DEF_TRAVERSE_STMT(CXXScalarValueInitExpr, {
-  // This is called for code like 'return T()' where T is a built-in
-  // (i.e. non-class) type.
-  TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXNewExpr, {
-  // The child-iterator will pick up the other arguments.
-  TRY_TO(TraverseTypeLoc(S->getAllocatedTypeSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(OffsetOfExpr, {
-  // The child-iterator will pick up the expression representing
-  // the field.
-  // FIMXE: for code like offsetof(Foo, a.b.c), should we get
-  // making a MemberExpr callbacks for Foo.a, Foo.a.b, and Foo.a.b.c?
-  TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(UnaryExprOrTypeTraitExpr, {
-  // The child-iterator will pick up the arg if it's an expression,
-  // but not if it's a type.
-  if (S->isArgumentType())
-    TRY_TO(TraverseTypeLoc(S->getArgumentTypeInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXTypeidExpr, {
-  // The child-iterator will pick up the arg if it's an expression,
-  // but not if it's a type.
-  if (S->isTypeOperand())
-    TRY_TO(TraverseTypeLoc(S->getTypeOperandSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(MSPropertyRefExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXUuidofExpr, {
-  // The child-iterator will pick up the arg if it's an expression,
-  // but not if it's a type.
-  if (S->isTypeOperand())
-    TRY_TO(TraverseTypeLoc(S->getTypeOperandSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(TypeTraitExpr, {
-  for (unsigned I = 0, N = S->getNumArgs(); I != N; ++I)
-    TRY_TO(TraverseTypeLoc(S->getArg(I)->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(ArrayTypeTraitExpr, {
-  TRY_TO(TraverseTypeLoc(S->getQueriedTypeSourceInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(ExpressionTraitExpr,
-                  { StmtQueue.queue(S->getQueriedExpression()); })
-
-DEF_TRAVERSE_STMT(VAArgExpr, {
-  // The child-iterator will pick up the expression argument.
-  TRY_TO(TraverseTypeLoc(S->getWrittenTypeInfo()->getTypeLoc()));
-})
-
-DEF_TRAVERSE_STMT(CXXTemporaryObjectExpr, {
-  // This is called for code like 'return T()' where T is a class type.
-  TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
-})
-
-// Walk only the visible parts of lambda expressions.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseLambdaExpr(LambdaExpr *S) {
-  TRY_TO(WalkUpFromLambdaExpr(S));
-
-  for (LambdaExpr::capture_iterator C = S->explicit_capture_begin(),
-                                    CEnd = S->explicit_capture_end();
-       C != CEnd; ++C) {
-    TRY_TO(TraverseLambdaCapture(S, C));
-  }
-
-  TypeLoc TL = S->getCallOperator()->getTypeSourceInfo()->getTypeLoc();
-  FunctionProtoTypeLoc Proto = TL.castAs<FunctionProtoTypeLoc>();
-
-  if (S->hasExplicitParameters() && S->hasExplicitResultType()) {
-    // Visit the whole type.
-    TRY_TO(TraverseTypeLoc(TL));
-  } else {
-    if (S->hasExplicitParameters()) {
-      // Visit parameters.
-      for (unsigned I = 0, N = Proto.getNumParams(); I != N; ++I) {
-        TRY_TO(TraverseDecl(Proto.getParam(I)));
-      }
-    } else if (S->hasExplicitResultType()) {
-      TRY_TO(TraverseTypeLoc(Proto.getReturnLoc()));
-    }
-
-    auto *T = Proto.getTypePtr();
-    for (const auto &E : T->exceptions()) {
-      TRY_TO(TraverseType(E));
-    }
-
-    if (Expr *NE = T->getNoexceptExpr())
-      TRY_TO(TraverseStmt(NE));
-  }
-
-  TRY_TO(TraverseLambdaBody(S));
-  return true;
-}
-
-DEF_TRAVERSE_STMT(CXXUnresolvedConstructExpr, {
-  // This is called for code like 'T()', where T is a template argument.
-  TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
-})
-
-// These expressions all might take explicit template arguments.
-// We traverse those if so.  FIXME: implement these.
-DEF_TRAVERSE_STMT(CXXConstructExpr, {})
-DEF_TRAVERSE_STMT(CallExpr, {})
-DEF_TRAVERSE_STMT(CXXMemberCallExpr, {})
-
-// These exprs (most of them), do not need any action except iterating
-// over the children.
-DEF_TRAVERSE_STMT(AddrLabelExpr, {})
-DEF_TRAVERSE_STMT(ArraySubscriptExpr, {})
-DEF_TRAVERSE_STMT(BlockExpr, {
-  TRY_TO(TraverseDecl(S->getBlockDecl()));
-  return true; // no child statements to loop through.
-})
-DEF_TRAVERSE_STMT(ChooseExpr, {})
-DEF_TRAVERSE_STMT(CompoundLiteralExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeSourceInfo()->getTypeLoc()));
-})
-DEF_TRAVERSE_STMT(CXXBindTemporaryExpr, {})
-DEF_TRAVERSE_STMT(CXXBoolLiteralExpr, {})
-DEF_TRAVERSE_STMT(CXXDefaultArgExpr, {})
-DEF_TRAVERSE_STMT(CXXDefaultInitExpr, {})
-DEF_TRAVERSE_STMT(CXXDeleteExpr, {})
-DEF_TRAVERSE_STMT(ExprWithCleanups, {})
-DEF_TRAVERSE_STMT(CXXNullPtrLiteralExpr, {})
-DEF_TRAVERSE_STMT(CXXStdInitializerListExpr, {})
-DEF_TRAVERSE_STMT(CXXPseudoDestructorExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  if (TypeSourceInfo *ScopeInfo = S->getScopeTypeInfo())
-    TRY_TO(TraverseTypeLoc(ScopeInfo->getTypeLoc()));
-  if (TypeSourceInfo *DestroyedTypeInfo = S->getDestroyedTypeInfo())
-    TRY_TO(TraverseTypeLoc(DestroyedTypeInfo->getTypeLoc()));
-})
-DEF_TRAVERSE_STMT(CXXThisExpr, {})
-DEF_TRAVERSE_STMT(CXXThrowExpr, {})
-DEF_TRAVERSE_STMT(UserDefinedLiteral, {})
-DEF_TRAVERSE_STMT(DesignatedInitExpr, {})
-DEF_TRAVERSE_STMT(DesignatedInitUpdateExpr, {})
-DEF_TRAVERSE_STMT(ExtVectorElementExpr, {})
-DEF_TRAVERSE_STMT(GNUNullExpr, {})
-DEF_TRAVERSE_STMT(ImplicitValueInitExpr, {})
-DEF_TRAVERSE_STMT(NoInitExpr, {})
-DEF_TRAVERSE_STMT(ObjCBoolLiteralExpr, {})
-DEF_TRAVERSE_STMT(ObjCEncodeExpr, {
-  if (TypeSourceInfo *TInfo = S->getEncodedTypeSourceInfo())
-    TRY_TO(TraverseTypeLoc(TInfo->getTypeLoc()));
-})
-DEF_TRAVERSE_STMT(ObjCIsaExpr, {})
-DEF_TRAVERSE_STMT(ObjCIvarRefExpr, {})
-DEF_TRAVERSE_STMT(ObjCMessageExpr, {
-  if (TypeSourceInfo *TInfo = S->getClassReceiverTypeInfo())
-    TRY_TO(TraverseTypeLoc(TInfo->getTypeLoc()));
-})
-DEF_TRAVERSE_STMT(ObjCPropertyRefExpr, {})
-DEF_TRAVERSE_STMT(ObjCSubscriptRefExpr, {})
-DEF_TRAVERSE_STMT(ObjCProtocolExpr, {})
-DEF_TRAVERSE_STMT(ObjCSelectorExpr, {})
-DEF_TRAVERSE_STMT(ObjCIndirectCopyRestoreExpr, {})
-DEF_TRAVERSE_STMT(ObjCBridgedCastExpr, {
-  TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
-})
-DEF_TRAVERSE_STMT(ParenExpr, {})
-DEF_TRAVERSE_STMT(ParenListExpr, {})
-DEF_TRAVERSE_STMT(PredefinedExpr, {})
-DEF_TRAVERSE_STMT(ShuffleVectorExpr, {})
-DEF_TRAVERSE_STMT(ConvertVectorExpr, {})
-DEF_TRAVERSE_STMT(StmtExpr, {})
-DEF_TRAVERSE_STMT(UnresolvedLookupExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  if (S->hasExplicitTemplateArgs()) {
-    TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
-                                              S->getNumTemplateArgs()));
-  }
-})
-
-DEF_TRAVERSE_STMT(UnresolvedMemberExpr, {
-  TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
-  if (S->hasExplicitTemplateArgs()) {
-    TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
-                                              S->getNumTemplateArgs()));
-  }
-})
-
-DEF_TRAVERSE_STMT(SEHTryStmt, {})
-DEF_TRAVERSE_STMT(SEHExceptStmt, {})
-DEF_TRAVERSE_STMT(SEHFinallyStmt, {})
-DEF_TRAVERSE_STMT(SEHLeaveStmt, {})
-DEF_TRAVERSE_STMT(CapturedStmt, { TRY_TO(TraverseDecl(S->getCapturedDecl())); })
-
-DEF_TRAVERSE_STMT(CXXOperatorCallExpr, {})
-DEF_TRAVERSE_STMT(OpaqueValueExpr, {})
-DEF_TRAVERSE_STMT(TypoExpr, {})
-DEF_TRAVERSE_STMT(CUDAKernelCallExpr, {})
-
-// These operators (all of them) do not need any action except
-// iterating over the children.
-DEF_TRAVERSE_STMT(BinaryConditionalOperator, {})
-DEF_TRAVERSE_STMT(ConditionalOperator, {})
-DEF_TRAVERSE_STMT(UnaryOperator, {})
-DEF_TRAVERSE_STMT(BinaryOperator, {})
-DEF_TRAVERSE_STMT(CompoundAssignOperator, {})
-DEF_TRAVERSE_STMT(CXXNoexceptExpr, {})
-DEF_TRAVERSE_STMT(PackExpansionExpr, {})
-DEF_TRAVERSE_STMT(SizeOfPackExpr, {})
-DEF_TRAVERSE_STMT(SubstNonTypeTemplateParmPackExpr, {})
-DEF_TRAVERSE_STMT(SubstNonTypeTemplateParmExpr, {})
-DEF_TRAVERSE_STMT(FunctionParmPackExpr, {})
-DEF_TRAVERSE_STMT(MaterializeTemporaryExpr, {})
-DEF_TRAVERSE_STMT(CXXFoldExpr, {})
-DEF_TRAVERSE_STMT(AtomicExpr, {})
-
-// These literals (all of them) do not need any action.
-DEF_TRAVERSE_STMT(IntegerLiteral, {})
-DEF_TRAVERSE_STMT(CharacterLiteral, {})
-DEF_TRAVERSE_STMT(FloatingLiteral, {})
-DEF_TRAVERSE_STMT(ImaginaryLiteral, {})
-DEF_TRAVERSE_STMT(StringLiteral, {})
-DEF_TRAVERSE_STMT(ObjCStringLiteral, {})
-DEF_TRAVERSE_STMT(ObjCBoxedExpr, {})
-DEF_TRAVERSE_STMT(ObjCArrayLiteral, {})
-DEF_TRAVERSE_STMT(ObjCDictionaryLiteral, {})
-
-// Traverse OpenCL: AsType, Convert.
-DEF_TRAVERSE_STMT(AsTypeExpr, {})
-
-// OpenMP directives.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseOMPExecutableDirective(
-    OMPExecutableDirective *S) {
-  for (auto *C : S->clauses()) {
-    TRY_TO(TraverseOMPClause(C));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::TraverseOMPLoopDirective(OMPLoopDirective *S) {
-  return TraverseOMPExecutableDirective(S);
-}
-
-DEF_TRAVERSE_STMT(OMPParallelDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPSimdDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPForDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPForSimdDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPSectionsDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPSectionDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPSingleDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPMasterDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPCriticalDirective, {
-  TRY_TO(TraverseDeclarationNameInfo(S->getDirectiveName()));
-  TRY_TO(TraverseOMPExecutableDirective(S));
-})
-
-DEF_TRAVERSE_STMT(OMPParallelForDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPParallelForSimdDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPParallelSectionsDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTaskDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTaskyieldDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPBarrierDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTaskwaitDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTaskgroupDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPCancellationPointDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPCancelDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPFlushDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPOrderedDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPAtomicDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTargetDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-DEF_TRAVERSE_STMT(OMPTeamsDirective,
-                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
-
-// OpenMP clauses.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
-  if (!C)
-    return true;
-  switch (C->getClauseKind()) {
-#define OPENMP_CLAUSE(Name, Class)                                             \
-  case OMPC_##Name:                                                            \
-    TRY_TO(Visit##Class(static_cast<Class *>(C)));                             \
-    break;
-#include "clang/Basic/OpenMPKinds.def"
-  case OMPC_threadprivate:
-  case OMPC_unknown:
-    break;
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPIfClause(OMPIfClause *C) {
-  TRY_TO(TraverseStmt(C->getCondition()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPFinalClause(OMPFinalClause *C) {
-  TRY_TO(TraverseStmt(C->getCondition()));
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
-  TRY_TO(TraverseStmt(C->getNumThreads()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPSafelenClause(OMPSafelenClause *C) {
-  TRY_TO(TraverseStmt(C->getSafelen()));
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::VisitOMPCollapseClause(OMPCollapseClause *C) {
-  TRY_TO(TraverseStmt(C->getNumForLoops()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPDefaultClause(OMPDefaultClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPProcBindClause(OMPProcBindClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::VisitOMPScheduleClause(OMPScheduleClause *C) {
-  TRY_TO(TraverseStmt(C->getChunkSize()));
-  TRY_TO(TraverseStmt(C->getHelperChunkSize()));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPOrderedClause(OMPOrderedClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPNowaitClause(OMPNowaitClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPUntiedClause(OMPUntiedClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::VisitOMPMergeableClause(OMPMergeableClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPReadClause(OMPReadClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPWriteClause(OMPWriteClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPUpdateClause(OMPUpdateClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPCaptureClause(OMPCaptureClause *) {
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPSeqCstClause(OMPSeqCstClause *) {
-  return true;
-}
-
-template <typename Derived>
-template <typename T>
-bool RecursiveASTVisitor<Derived>::VisitOMPClauseList(T *Node) {
-  for (auto *E : Node->varlists()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPPrivateClause(OMPPrivateClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->private_copies()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPFirstprivateClause(
-    OMPFirstprivateClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->private_copies()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->inits()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPLastprivateClause(
-    OMPLastprivateClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->private_copies()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->source_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->destination_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->assignment_ops()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPSharedClause(OMPSharedClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPLinearClause(OMPLinearClause *C) {
-  TRY_TO(TraverseStmt(C->getStep()));
-  TRY_TO(TraverseStmt(C->getCalcStep()));
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->inits()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->updates()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->finals()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPAlignedClause(OMPAlignedClause *C) {
-  TRY_TO(TraverseStmt(C->getAlignment()));
-  TRY_TO(VisitOMPClauseList(C));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPCopyinClause(OMPCopyinClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->source_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->destination_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->assignment_ops()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPCopyprivateClause(
-    OMPCopyprivateClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->source_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->destination_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->assignment_ops()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::VisitOMPReductionClause(OMPReductionClause *C) {
-  TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc()));
-  TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo()));
-  TRY_TO(VisitOMPClauseList(C));
-  for (auto *E : C->lhs_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->rhs_exprs()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  for (auto *E : C->reduction_ops()) {
-    TRY_TO(TraverseStmt(E));
-  }
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPFlushClause(OMPFlushClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  return true;
-}
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPDependClause(OMPDependClause *C) {
-  TRY_TO(VisitOMPClauseList(C));
-  return true;
-}
-
-// FIXME: look at the following tricky-seeming exprs to see if we
-// need to recurse on anything.  These are ones that have methods
-// returning decls or qualtypes or nestednamespecifier -- though I'm
-// not sure if they own them -- or just seemed very complicated, or
-// had lots of sub-types to explore.
-//
-// VisitOverloadExpr and its children: recurse on template args? etc?
-
-// FIXME: go through all the stmts and exprs again, and see which of them
-// create new types, and recurse on the types (TypeLocs?) of those.
-// Candidates:
-//
-//    http://clang.llvm.org/doxygen/classclang_1_1CXXTypeidExpr.html
-//    http://clang.llvm.org/doxygen/classclang_1_1UnaryExprOrTypeTraitExpr.html
-//    http://clang.llvm.org/doxygen/classclang_1_1TypesCompatibleExpr.html
-//    Every class that has getQualifier.
-
-#undef DEF_TRAVERSE_STMT
-
-#undef TRY_TO
-
-#undef RecursiveASTVisitor
-
-} // end namespace clang
-
-#endif // LLVM_CLANG_LIBCLANG_RECURSIVEASTVISITOR_H
diff --git a/include/clang/AST/Decl.h b/include/clang/AST/Decl.h
index e06b58b37be2..046ce70a343b 100644
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -21,11 +21,13 @@
 #include "clang/AST/Redeclarable.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/Linkage.h"
+#include "clang/Basic/Module.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TrailingObjects.h"
 
 namespace clang {
 struct ASTTemplateArgumentListInfo;
@@ -37,7 +39,6 @@ class FunctionTemplateDecl;
 class FunctionTemplateSpecializationInfo;
 class LabelStmt;
 class MemberSpecializationInfo;
-class Module;
 class NestedNameSpecifier;
 class ParmVarDecl;
 class Stmt;
@@ -318,7 +319,8 @@ public:
   NamedDecl *getUnderlyingDecl() {
     // Fast-path the common case.
     if (this->getKind() != UsingShadow &&
-        this->getKind() != ObjCCompatibleAlias)
+        this->getKind() != ObjCCompatibleAlias &&
+        this->getKind() != NamespaceAlias)
       return this;
 
     return getUnderlyingDeclImpl();
@@ -462,25 +464,15 @@ public:
   }
 
   /// \brief Get the original (first) namespace declaration.
-  NamespaceDecl *getOriginalNamespace() {
-    if (isFirstDecl())
-      return this;
-
-    return AnonOrFirstNamespaceAndInline.getPointer();
-  }
+  NamespaceDecl *getOriginalNamespace();
 
   /// \brief Get the original (first) namespace declaration.
-  const NamespaceDecl *getOriginalNamespace() const {
-    if (isFirstDecl())
-      return this;
-
-    return AnonOrFirstNamespaceAndInline.getPointer();
-  }
+  const NamespaceDecl *getOriginalNamespace() const;
 
   /// \brief Return true if this declaration is an original (first) declaration
   /// of the namespace. This is false for non-original (subsequent) namespace
   /// declarations and anonymous namespaces.
-  bool isOriginalNamespace() const { return isFirstDecl(); }
+  bool isOriginalNamespace() const;
 
   /// \brief Retrieve the anonymous namespace nested inside this namespace,
   /// if any.
@@ -572,8 +564,7 @@ struct QualifierInfo {
   /// setTemplateParameterListsInfo - Sets info about "outer" template
   /// parameter lists.
   void setTemplateParameterListsInfo(ASTContext &Context,
-                                     unsigned NumTPLists,
-                                     TemplateParameterList **TPLists);
+                                     ArrayRef<TemplateParameterList *> TPLists);
 
 private:
   // Copy constructor and copy assignment are disabled.
@@ -658,8 +649,8 @@ public:
     assert(index < getNumTemplateParameterLists());
     return getExtInfo()->TemplParamLists[index];
   }
-  void setTemplateParameterListsInfo(ASTContext &Context, unsigned NumTPLists,
-                                     TemplateParameterList **TPLists);
+  void setTemplateParameterListsInfo(ASTContext &Context,
+                                     ArrayRef<TemplateParameterList *> TPLists);
 
   SourceLocation getTypeSpecStartLoc() const;
 
@@ -728,17 +719,14 @@ public:
   };
 
 protected:
-  /// \brief Placeholder type used in Init to denote an unparsed C++ default
-  /// argument.
-  struct UnparsedDefaultArgument;
-
-  /// \brief Placeholder type used in Init to denote an uninstantiated C++
-  /// default argument.
-  struct UninstantiatedDefaultArgument;
-
-  typedef llvm::PointerUnion4<Stmt *, EvaluatedStmt *,
-                              UnparsedDefaultArgument *,
-                              UninstantiatedDefaultArgument *> InitType;
+  // A pointer union of Stmt * and EvaluatedStmt *. When an EvaluatedStmt, we
+  // have allocated the auxilliary struct of information there.
+  //
+  // TODO: It is a bit unfortunate to use a PointerUnion inside the VarDecl for
+  // this as *many* VarDecls are ParmVarDecls that don't have default
+  // arguments. We could save some space by moving this pointer union to be
+  // allocated in trailing space when necessary.
+  typedef llvm::PointerUnion<Stmt *, EvaluatedStmt *> InitType;
 
   /// \brief The initializer for this variable or, for a ParmVarDecl, the
   /// C++ default argument.
@@ -762,6 +750,13 @@ private:
 protected:
   enum { NumParameterIndexBits = 8 };
 
+  enum DefaultArgKind {
+    DAK_None,
+    DAK_Unparsed,
+    DAK_Uninstantiated,
+    DAK_Normal
+  };
+
   class ParmVarDeclBitfields {
     friend class ParmVarDecl;
     friend class ASTDeclReader;
@@ -772,6 +767,12 @@ protected:
     /// prior declaration.
     unsigned HasInheritedDefaultArg : 1;
 
+    /// Describes the kind of default argument for this parameter. By default
+    /// this is none. If this is normal, then the default argument is stored in
+    /// the \c VarDecl initalizer expression unless we were unble to parse
+    /// (even an invalid) expression for the default argument.
+    unsigned DefaultArgKind : 2;
+
     /// Whether this parameter undergoes K&R argument promotion.
     unsigned IsKNRPromoted : 1;
 
@@ -815,6 +816,9 @@ protected:
     /// \brief Whether this variable is (C++0x) constexpr.
     unsigned IsConstexpr : 1;
 
+    /// \brief Whether this variable is a (C++ Concepts TS) concept.
+    unsigned IsConcept : 1;
+
     /// \brief Whether this variable is the implicit variable for a lambda
     /// init-capture.
     unsigned IsInitCapture : 1;
@@ -1062,47 +1066,14 @@ public:
   /// declaration it is attached to. Also get that declaration.
   const Expr *getAnyInitializer(const VarDecl *&D) const;
 
-  bool hasInit() const {
-    return !Init.isNull() && (Init.is<Stmt *>() || Init.is<EvaluatedStmt *>());
-  }
+  bool hasInit() const;
   const Expr *getInit() const {
-    if (Init.isNull())
-      return nullptr;
-
-    const Stmt *S = Init.dyn_cast<Stmt *>();
-    if (!S) {
-      if (EvaluatedStmt *ES = Init.dyn_cast<EvaluatedStmt*>())
-        S = ES->Value;
-    }
-    return (const Expr*) S;
-  }
-  Expr *getInit() {
-    if (Init.isNull())
-      return nullptr;
-
-    Stmt *S = Init.dyn_cast<Stmt *>();
-    if (!S) {
-      if (EvaluatedStmt *ES = Init.dyn_cast<EvaluatedStmt*>())
-        S = ES->Value;
-    }
-
-    return (Expr*) S;
+    return const_cast<VarDecl *>(this)->getInit();
   }
+  Expr *getInit();
 
   /// \brief Retrieve the address of the initializer expression.
-  Stmt **getInitAddress() {
-    if (EvaluatedStmt *ES = Init.dyn_cast<EvaluatedStmt*>())
-      return &ES->Value;
-
-    // This union hack tip-toes around strict-aliasing rules.
-    union {
-      InitType *InitPtr;
-      Stmt **StmtPtr;
-    };
-
-    InitPtr = &Init;
-    return StmtPtr;
-  }
+  Stmt **getInitAddress();
 
   void setInit(Expr *I);
 
@@ -1124,33 +1095,18 @@ public:
   /// \brief Return the already-evaluated value of this variable's
   /// initializer, or NULL if the value is not yet known. Returns pointer
   /// to untyped APValue if the value could not be evaluated.
-  APValue *getEvaluatedValue() const {
-    if (EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>())
-      if (Eval->WasEvaluated)
-        return &Eval->Evaluated;
-
-    return nullptr;
-  }
+  APValue *getEvaluatedValue() const;
 
   /// \brief Determines whether it is already known whether the
   /// initializer is an integral constant expression or not.
-  bool isInitKnownICE() const {
-    if (EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>())
-      return Eval->CheckedICE;
-
-    return false;
-  }
+  bool isInitKnownICE() const;
 
   /// \brief Determines whether the initializer is an integral constant
   /// expression, or in C++11, whether the initializer is a constant
   /// expression.
   ///
   /// \pre isInitKnownICE()
-  bool isInitICE() const {
-    assert(isInitKnownICE() &&
-           "Check whether we already know that the initializer is an ICE");
-    return Init.get<EvaluatedStmt *>()->IsICE;
-  }
+  bool isInitICE() const;
 
   /// \brief Determine whether the value of the initializer attached to this
   /// declaration is an integral constant expression.
@@ -1238,6 +1194,15 @@ public:
     NonParmVarDeclBits.IsConstexpr = IC;
   }
 
+  /// Whether this variable is (C++ Concepts TS) concept.
+  bool isConcept() const {
+    return isa<ParmVarDecl>(this) ? false : NonParmVarDeclBits.IsConcept;
+  }
+  void setConcept(bool IC) {
+    assert(!isa<ParmVarDecl>(this));
+    NonParmVarDeclBits.IsConcept = IC;
+  }
+
   /// Whether this variable is the implicit variable for a lambda init-capture.
   bool isInitCapture() const {
     return isa<ParmVarDecl>(this) ? false : NonParmVarDeclBits.IsInitCapture;
@@ -1342,6 +1307,7 @@ protected:
               TypeSourceInfo *TInfo, StorageClass S, Expr *DefArg)
       : VarDecl(DK, C, DC, StartLoc, IdLoc, Id, T, TInfo, S) {
     assert(ParmVarDeclBits.HasInheritedDefaultArg == false);
+    assert(ParmVarDeclBits.DefaultArgKind == DAK_None);
     assert(ParmVarDeclBits.IsKNRPromoted == false);
     assert(ParmVarDeclBits.IsObjCMethodParam == false);
     setDefaultArg(DefArg);
@@ -1416,29 +1382,20 @@ public:
     return const_cast<ParmVarDecl *>(this)->getDefaultArg();
   }
 
-  void setDefaultArg(Expr *defarg) {
-    Init = reinterpret_cast<Stmt *>(defarg);
-  }
+  void setDefaultArg(Expr *defarg);
 
   /// \brief Retrieve the source range that covers the entire default
   /// argument.
   SourceRange getDefaultArgRange() const;
-  void setUninstantiatedDefaultArg(Expr *arg) {
-    Init = reinterpret_cast<UninstantiatedDefaultArgument *>(arg);
-  }
-  Expr *getUninstantiatedDefaultArg() {
-    return (Expr *)Init.get<UninstantiatedDefaultArgument *>();
-  }
+  void setUninstantiatedDefaultArg(Expr *arg);
+  Expr *getUninstantiatedDefaultArg();
   const Expr *getUninstantiatedDefaultArg() const {
-    return (const Expr *)Init.get<UninstantiatedDefaultArgument *>();
+    return const_cast<ParmVarDecl *>(this)->getUninstantiatedDefaultArg();
   }
 
   /// hasDefaultArg - Determines whether this parameter has a default argument,
   /// either parsed or not.
-  bool hasDefaultArg() const {
-    return getInit() || hasUnparsedDefaultArg() ||
-      hasUninstantiatedDefaultArg();
-  }
+  bool hasDefaultArg() const;
 
   /// hasUnparsedDefaultArg - Determines whether this parameter has a
   /// default argument that has not yet been parsed. This will occur
@@ -1451,11 +1408,11 @@ public:
   ///   }; // x has a regular default argument now
   /// @endcode
   bool hasUnparsedDefaultArg() const {
-    return Init.is<UnparsedDefaultArgument*>();
+    return ParmVarDeclBits.DefaultArgKind == DAK_Unparsed;
   }
 
   bool hasUninstantiatedDefaultArg() const {
-    return Init.is<UninstantiatedDefaultArgument*>();
+    return ParmVarDeclBits.DefaultArgKind == DAK_Uninstantiated;
   }
 
   /// setUnparsedDefaultArg - Specify that this parameter has an
@@ -1463,7 +1420,9 @@ public:
   /// real default argument via setDefaultArg when the class
   /// definition enclosing the function declaration that owns this
   /// default argument is completed.
-  void setUnparsedDefaultArg() { Init = (UnparsedDefaultArgument *)nullptr; }
+  void setUnparsedDefaultArg() {
+    ParmVarDeclBits.DefaultArgKind = DAK_Unparsed;
+  }
 
   bool hasInheritedDefaultArg() const {
     return ParmVarDeclBits.HasInheritedDefaultArg;
@@ -1546,25 +1505,25 @@ private:
 
   LazyDeclStmtPtr Body;
 
-  // FIXME: This can be packed into the bitfields in Decl.
-  // NOTE: VC++ treats enums as signed, avoid using the StorageClass enum
+  // FIXME: This can be packed into the bitfields in DeclContext.
+  // NOTE: VC++ packs bitfields poorly if the types differ.
   unsigned SClass : 2;
-  bool IsInline : 1;
-  bool IsInlineSpecified : 1;
-  bool IsVirtualAsWritten : 1;
-  bool IsPure : 1;
-  bool HasInheritedPrototype : 1;
-  bool HasWrittenPrototype : 1;
-  bool IsDeleted : 1;
-  bool IsTrivial : 1; // sunk from CXXMethodDecl
-  bool IsDefaulted : 1; // sunk from CXXMethoDecl
-  bool IsExplicitlyDefaulted : 1; //sunk from CXXMethodDecl
-  bool HasImplicitReturnZero : 1;
-  bool IsLateTemplateParsed : 1;
-  bool IsConstexpr : 1;
+  unsigned IsInline : 1;
+  unsigned IsInlineSpecified : 1;
+  unsigned IsVirtualAsWritten : 1;
+  unsigned IsPure : 1;
+  unsigned HasInheritedPrototype : 1;
+  unsigned HasWrittenPrototype : 1;
+  unsigned IsDeleted : 1;
+  unsigned IsTrivial : 1; // sunk from CXXMethodDecl
+  unsigned IsDefaulted : 1; // sunk from CXXMethoDecl
+  unsigned IsExplicitlyDefaulted : 1; //sunk from CXXMethodDecl
+  unsigned HasImplicitReturnZero : 1;
+  unsigned IsLateTemplateParsed : 1;
+  unsigned IsConstexpr : 1;
 
   /// \brief Indicates if the function uses __try.
-  bool UsesSEHTry : 1;
+  unsigned UsesSEHTry : 1;
 
   /// \brief Indicates if the function was a definition but its body was
   /// skipped.
@@ -1835,7 +1794,7 @@ public:
   bool isConstexpr() const { return IsConstexpr; }
   void setConstexpr(bool IC) { IsConstexpr = IC; }
 
-  /// Whether this is a (C++11) constexpr function or constexpr constructor.
+  /// \brief Indicates the function uses __try.
   bool usesSEHTry() const { return UsesSEHTry; }
   void setUsesSEHTry(bool UST) { UsesSEHTry = UST; }
 
@@ -2084,9 +2043,7 @@ public:
   /// \brief If this function is an instantiation of a member function of a
   /// class template specialization, retrieves the member specialization
   /// information.
-  MemberSpecializationInfo *getMemberSpecializationInfo() const {
-    return TemplateOrSpecialization.dyn_cast<MemberSpecializationInfo*>();
-  }
+  MemberSpecializationInfo *getMemberSpecializationInfo() const;
 
   /// \brief Specify that this record is an instantiation of the
   /// member function FD.
@@ -2107,13 +2064,9 @@ public:
   /// FunctionDecl that describes the function template,
   /// getDescribedFunctionTemplate() retrieves the
   /// FunctionTemplateDecl from a FunctionDecl.
-  FunctionTemplateDecl *getDescribedFunctionTemplate() const {
-    return TemplateOrSpecialization.dyn_cast<FunctionTemplateDecl*>();
-  }
+  FunctionTemplateDecl *getDescribedFunctionTemplate() const;
 
-  void setDescribedFunctionTemplate(FunctionTemplateDecl *Template) {
-    TemplateOrSpecialization = Template;
-  }
+  void setDescribedFunctionTemplate(FunctionTemplateDecl *Template);
 
   /// \brief Determine whether this function is a function template
   /// specialization.
@@ -2128,10 +2081,7 @@ public:
   /// \brief If this function is actually a function template specialization,
   /// retrieve information about this function template specialization.
   /// Otherwise, returns NULL.
-  FunctionTemplateSpecializationInfo *getTemplateSpecializationInfo() const {
-    return TemplateOrSpecialization.
-             dyn_cast<FunctionTemplateSpecializationInfo*>();
-  }
+  FunctionTemplateSpecializationInfo *getTemplateSpecializationInfo() const;
 
   /// \brief Determines whether this function is a function template
   /// specialization or a member of a class template specialization that can
@@ -2208,10 +2158,7 @@ public:
                       const TemplateArgumentListInfo &TemplateArgs);
 
   DependentFunctionTemplateSpecializationInfo *
-  getDependentSpecializationInfo() const {
-    return TemplateOrSpecialization.
-             dyn_cast<DependentFunctionTemplateSpecializationInfo*>();
-  }
+  getDependentSpecializationInfo() const;
 
   /// \brief Determine what kind of template instantiation this function
   /// represents.
@@ -2487,7 +2434,8 @@ public:
 /// IndirectFieldDecl - An instance of this class is created to represent a
 /// field injected from an anonymous union/struct into the parent scope.
 /// IndirectFieldDecl are always implicit.
-class IndirectFieldDecl : public ValueDecl {
+class IndirectFieldDecl : public ValueDecl,
+                          public Mergeable<IndirectFieldDecl> {
   void anchor() override;
   NamedDecl **Chaining;
   unsigned ChainingSize;
@@ -2525,6 +2473,9 @@ public:
     return dyn_cast<VarDecl>(*chain_begin());
   }
 
+  IndirectFieldDecl *getCanonicalDecl() override { return getFirstDecl(); }
+  const IndirectFieldDecl *getCanonicalDecl() const { return getFirstDecl(); }
+
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
   static bool classofKind(Kind K) { return K == IndirectField; }
@@ -2762,12 +2713,12 @@ private:
   /// declaration specifier for variables, it points to the first VarDecl (used
   /// for mangling);
   /// otherwise, it is a null (TypedefNameDecl) pointer.
-  llvm::PointerUnion<NamedDecl *, ExtInfo *> NamedDeclOrQualifier;
+  llvm::PointerUnion<TypedefNameDecl *, ExtInfo *> TypedefNameDeclOrQualifier;
 
-  bool hasExtInfo() const { return NamedDeclOrQualifier.is<ExtInfo *>(); }
-  ExtInfo *getExtInfo() { return NamedDeclOrQualifier.get<ExtInfo *>(); }
+  bool hasExtInfo() const { return TypedefNameDeclOrQualifier.is<ExtInfo *>(); }
+  ExtInfo *getExtInfo() { return TypedefNameDeclOrQualifier.get<ExtInfo *>(); }
   const ExtInfo *getExtInfo() const {
-    return NamedDeclOrQualifier.get<ExtInfo *>();
+    return TypedefNameDeclOrQualifier.get<ExtInfo *>();
   }
 
 protected:
@@ -2778,7 +2729,7 @@ protected:
         TagDeclKind(TK), IsCompleteDefinition(false), IsBeingDefined(false),
         IsEmbeddedInDeclarator(false), IsFreeStanding(false),
         IsCompleteDefinitionRequired(false),
-        NamedDeclOrQualifier((NamedDecl *)nullptr) {
+        TypedefNameDeclOrQualifier((TypedefNameDecl *)nullptr) {
     assert((DK != Enum || TK == TTK_Enum) &&
            "EnumDecl not matched with TTK_Enum");
     setPreviousDecl(PrevDecl);
@@ -2925,22 +2876,11 @@ public:
     return (getDeclName() || getTypedefNameForAnonDecl());
   }
 
-  bool hasDeclaratorForAnonDecl() const {
-    return dyn_cast_or_null<DeclaratorDecl>(
-        NamedDeclOrQualifier.get<NamedDecl *>());
-  }
-  DeclaratorDecl *getDeclaratorForAnonDecl() const {
-    return hasExtInfo() ? nullptr : dyn_cast_or_null<DeclaratorDecl>(
-                                  NamedDeclOrQualifier.get<NamedDecl *>());
-  }
-
   TypedefNameDecl *getTypedefNameForAnonDecl() const {
-    return hasExtInfo() ? nullptr : dyn_cast_or_null<TypedefNameDecl>(
-                                  NamedDeclOrQualifier.get<NamedDecl *>());
+    return hasExtInfo() ? nullptr
+                        : TypedefNameDeclOrQualifier.get<TypedefNameDecl *>();
   }
 
-  void setDeclaratorForAnonDecl(DeclaratorDecl *DD) { NamedDeclOrQualifier = DD; }
-
   void setTypedefNameForAnonDecl(TypedefNameDecl *TDD);
 
   /// \brief Retrieve the nested-name-specifier that qualifies the name of this
@@ -2967,8 +2907,8 @@ public:
     assert(i < getNumTemplateParameterLists());
     return getExtInfo()->TemplParamLists[i];
   }
-  void setTemplateParameterListsInfo(ASTContext &Context, unsigned NumTPLists,
-                                     TemplateParameterList **TPLists);
+  void setTemplateParameterListsInfo(ASTContext &Context,
+                                     ArrayRef<TemplateParameterList *> TPLists);
 
   // Implement isa/cast/dyncast/etc.
   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
@@ -3301,7 +3241,14 @@ public:
 
   bool hasVolatileMember() const { return HasVolatileMember; }
   void setHasVolatileMember (bool val) { HasVolatileMember = val; }
-  
+
+  bool hasLoadedFieldsFromExternalStorage() const {
+    return LoadedFieldsFromExternalStorage;
+  }
+  void setHasLoadedFieldsFromExternalStorage(bool val) {
+    LoadedFieldsFromExternalStorage = val;
+  }
+
   /// \brief Determines whether this declaration represents the
   /// injected class name.
   ///
@@ -3475,7 +3422,7 @@ private:
   Stmt *Body;
   TypeSourceInfo *SignatureAsWritten;
 
-  Capture *Captures;
+  const Capture *Captures;
   unsigned NumCaptures;
 
   unsigned ManglingNumber;
@@ -3581,10 +3528,8 @@ public:
 
   bool capturesVariable(const VarDecl *var) const;
 
-  void setCaptures(ASTContext &Context,
-                   const Capture *begin,
-                   const Capture *end,
-                   bool capturesCXXThis);
+  void setCaptures(ASTContext &Context, ArrayRef<Capture> Captures,
+                   bool CapturesCXXThis);
 
    unsigned getBlockManglingNumber() const {
      return ManglingNumber;
@@ -3613,7 +3558,15 @@ public:
 
 /// \brief This represents the body of a CapturedStmt, and serves as its
 /// DeclContext.
-class CapturedDecl : public Decl, public DeclContext {
+class CapturedDecl final
+    : public Decl,
+      public DeclContext,
+      private llvm::TrailingObjects<CapturedDecl, ImplicitParamDecl *> {
+protected:
+  size_t numTrailingObjects(OverloadToken<ImplicitParamDecl>) {
+    return NumParams;
+  }
+
 private:
   /// \brief The number of parameters to the outlined function.
   unsigned NumParams;
@@ -3622,13 +3575,14 @@ private:
   /// \brief The body of the outlined function.
   llvm::PointerIntPair<Stmt *, 1, bool> BodyAndNothrow;
 
-  explicit CapturedDecl(DeclContext *DC, unsigned NumParams)
-    : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
-      NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) { }
+  explicit CapturedDecl(DeclContext *DC, unsigned NumParams);
 
-  ImplicitParamDecl **getParams() const {
-    return reinterpret_cast<ImplicitParamDecl **>(
-             const_cast<CapturedDecl *>(this) + 1);
+  ImplicitParamDecl *const *getParams() const {
+    return getTrailingObjects<ImplicitParamDecl *>();
+  }
+
+  ImplicitParamDecl **getParams() {
+    return getTrailingObjects<ImplicitParamDecl *>();
   }
 
 public:
@@ -3637,11 +3591,11 @@ public:
   static CapturedDecl *CreateDeserialized(ASTContext &C, unsigned ID,
                                           unsigned NumParams);
 
-  Stmt *getBody() const override { return BodyAndNothrow.getPointer(); }
-  void setBody(Stmt *B) { BodyAndNothrow.setPointer(B); }
+  Stmt *getBody() const override;
+  void setBody(Stmt *B);
 
-  bool isNothrow() const { return BodyAndNothrow.getInt(); }
-  void setNothrow(bool Nothrow = true) { BodyAndNothrow.setInt(Nothrow); }
+  bool isNothrow() const;
+  void setNothrow(bool Nothrow = true);
 
   unsigned getNumParams() const { return NumParams; }
 
@@ -3666,7 +3620,7 @@ public:
   }
   unsigned getContextParamPosition() const { return ContextParam; }
 
-  typedef ImplicitParamDecl **param_iterator;
+  typedef ImplicitParamDecl *const *param_iterator;
   typedef llvm::iterator_range<param_iterator> param_range;
 
   /// \brief Retrieve an iterator pointing to the first parameter decl.
@@ -3689,6 +3643,7 @@ public:
 
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
+  friend TrailingObjects;
 };
 
 /// \brief Describes a module import declaration, which makes the contents
@@ -3701,7 +3656,8 @@ public:
 ///
 /// Import declarations can also be implicitly generated from
 /// \#include/\#import directives.
-class ImportDecl : public Decl {
+class ImportDecl final : public Decl,
+                         llvm::TrailingObjects<ImportDecl, SourceLocation> {
   /// \brief The imported module, along with a bit that indicates whether
   /// we have source-location information for each identifier in the module
   /// name. 
@@ -3717,7 +3673,8 @@ class ImportDecl : public Decl {
   friend class ASTReader;
   friend class ASTDeclReader;
   friend class ASTContext;
-  
+  friend TrailingObjects;
+
   ImportDecl(DeclContext *DC, SourceLocation StartLoc, Module *Imported,
              ArrayRef<SourceLocation> IdentifierLocs);
 
diff --git a/include/clang/AST/DeclBase.h b/include/clang/AST/DeclBase.h
index 6b6ac3f7d5a5..05b2a1280fab 100644
--- a/include/clang/AST/DeclBase.h
+++ b/include/clang/AST/DeclBase.h
@@ -70,8 +70,15 @@ namespace clang {
 /// Decl - This represents one declaration (or definition), e.g. a variable,
 /// typedef, function, struct, etc.
 ///
+/// Note: There are objects tacked on before the *beginning* of Decl
+/// (and its subclasses) in its Decl::operator new(). Proper alignment
+/// of all subclasses (not requiring more than DeclObjAlignment) is
+/// asserted in DeclBase.cpp.
 class Decl {
 public:
+  /// \brief Alignment guaranteed when allocating Decl and any subtypes.
+  enum { DeclObjAlignment = llvm::AlignOf<uint64_t>::Alignment };
+
   /// \brief Lists the kind of concrete classes of Decl.
   enum Kind {
 #define DECL(DERIVED, BASE) DERIVED,
@@ -468,8 +475,7 @@ public:
 
   template <typename T>
   llvm::iterator_range<specific_attr_iterator<T>> specific_attrs() const {
-    return llvm::iterator_range<specific_attr_iterator<T>>(
-        specific_attr_begin<T>(), specific_attr_end<T>());
+    return llvm::make_range(specific_attr_begin<T>(), specific_attr_end<T>());
   }
 
   template <typename T>
@@ -721,6 +727,15 @@ public:
     return getParentFunctionOrMethod() == nullptr;
   }
 
+  /// \brief Returns true if this declaration lexically is inside a function.
+  /// It recognizes non-defining declarations as well as members of local
+  /// classes:
+  /// \code
+  ///     void foo() { void bar(); }
+  ///     void foo2() { class ABC { void bar(); }; }
+  /// \endcode
+  bool isLexicallyWithinFunctionOrMethod() const;
+
   /// \brief If this decl is defined inside a function/method/block it returns
   /// the corresponding DeclContext, otherwise it returns null.
   const DeclContext *getParentFunctionOrMethod() const;
@@ -1126,6 +1141,11 @@ class DeclContext {
   /// that are missing from the lookup table.
   mutable bool HasLazyExternalLexicalLookups : 1;
 
+  /// \brief If \c true, lookups should only return identifier from
+  /// DeclContext scope (for example TranslationUnit). Used in
+  /// LookupQualifiedName()
+  mutable bool UseQualifiedLookup : 1;
+
   /// \brief Pointer to the data structure used to lookup declarations
   /// within this context (or a DependentStoredDeclsMap if this is a
   /// dependent context). We maintain the invariant that, if the map
@@ -1160,6 +1180,7 @@ protected:
         ExternalVisibleStorage(false),
         NeedToReconcileExternalVisibleStorage(false),
         HasLazyLocalLexicalLookups(false), HasLazyExternalLexicalLookups(false),
+        UseQualifiedLookup(false),
         LookupPtr(nullptr), FirstDecl(nullptr), LastDecl(nullptr) {}
 
 public:
@@ -1740,6 +1761,16 @@ public:
                  D == LastDecl);
   }
 
+  bool setUseQualifiedLookup(bool use = true) {
+    bool old_value = UseQualifiedLookup;
+    UseQualifiedLookup = use;
+    return old_value;
+  }
+
+  bool shouldUseQualifiedLookup() const {
+    return UseQualifiedLookup;
+  }
+
   static bool classof(const Decl *D);
   static bool classof(const DeclContext *D) { return true; }
 
diff --git a/include/clang/AST/DeclCXX.h b/include/clang/AST/DeclCXX.h
index 08451c051b57..7c54901be3ea 100644
--- a/include/clang/AST/DeclCXX.h
+++ b/include/clang/AST/DeclCXX.h
@@ -1344,9 +1344,7 @@ public:
   /// \brief If this class is an instantiation of a member class of a
   /// class template specialization, retrieves the member specialization
   /// information.
-  MemberSpecializationInfo *getMemberSpecializationInfo() const {
-    return TemplateOrInstantiation.dyn_cast<MemberSpecializationInfo *>();
-  }
+  MemberSpecializationInfo *getMemberSpecializationInfo() const;
 
   /// \brief Specify that this record is an instantiation of the
   /// member class \p RD.
@@ -1364,13 +1362,9 @@ public:
   /// CXXRecordDecl that from a ClassTemplateDecl, while
   /// getDescribedClassTemplate() retrieves the ClassTemplateDecl from
   /// a CXXRecordDecl.
-  ClassTemplateDecl *getDescribedClassTemplate() const {
-    return TemplateOrInstantiation.dyn_cast<ClassTemplateDecl*>();
-  }
+  ClassTemplateDecl *getDescribedClassTemplate() const;
 
-  void setDescribedClassTemplate(ClassTemplateDecl *Template) {
-    TemplateOrInstantiation = Template;
-  }
+  void setDescribedClassTemplate(ClassTemplateDecl *Template);
 
   /// \brief Determine whether this particular class is a specialization or
   /// instantiation of a class template or member class of a class template,
@@ -1468,8 +1462,8 @@ public:
   /// \param BaseDefinition the definition of the base class
   ///
   /// \returns true if this base matched the search criteria
-  typedef bool ForallBasesCallback(const CXXRecordDecl *BaseDefinition,
-                                   void *UserData);
+  typedef llvm::function_ref<bool(const CXXRecordDecl *BaseDefinition)>
+      ForallBasesCallback;
 
   /// \brief Determines if the given callback holds for all the direct
   /// or indirect base classes of this type.
@@ -1481,13 +1475,10 @@ public:
   /// class of this type, or if \p AllowShortCircuit is true then until a call
   /// returns false.
   ///
-  /// \param UserData Passed as the second argument of every call to
-  /// \p BaseMatches.
-  ///
   /// \param AllowShortCircuit if false, forces the callback to be called
   /// for every base class, even if a dependent or non-matching base was
   /// found.
-  bool forallBases(ForallBasesCallback *BaseMatches, void *UserData,
+  bool forallBases(ForallBasesCallback BaseMatches,
                    bool AllowShortCircuit = true) const;
 
   /// \brief Function type used by lookupInBases() to determine whether a
@@ -1499,13 +1490,9 @@ public:
   /// \param Path the current path, from the most-derived class down to the
   /// base named by the \p Specifier.
   ///
-  /// \param UserData a single pointer to user-specified data, provided to
-  /// lookupInBases().
-  ///
   /// \returns true if this base matched the search criteria, false otherwise.
-  typedef bool BaseMatchesCallback(const CXXBaseSpecifier *Specifier,
-                                   CXXBasePath &Path,
-                                   void *UserData);
+  typedef llvm::function_ref<bool(const CXXBaseSpecifier *Specifier,
+                                  CXXBasePath &Path)> BaseMatchesCallback;
 
   /// \brief Look for entities within the base classes of this C++ class,
   /// transitively searching all base class subobjects.
@@ -1520,14 +1507,12 @@ public:
   /// \param BaseMatches callback function used to determine whether a given
   /// base matches the user-defined search criteria.
   ///
-  /// \param UserData user data pointer that will be provided to \p BaseMatches.
-  ///
   /// \param Paths used to record the paths from this class to its base class
   /// subobjects that match the search criteria.
   ///
   /// \returns true if there exists any path from this class to a base class
   /// subobject that matches the search criteria.
-  bool lookupInBases(BaseMatchesCallback *BaseMatches, void *UserData,
+  bool lookupInBases(BaseMatchesCallback BaseMatches,
                      CXXBasePaths &Paths) const;
 
   /// \brief Base-class lookup callback that determines whether the given
@@ -1535,10 +1520,10 @@ public:
   ///
   /// This callback can be used with \c lookupInBases() to determine whether
   /// a given derived class has is a base class subobject of a particular type.
-  /// The user data pointer should refer to the canonical CXXRecordDecl of the
+  /// The base record pointer should refer to the canonical CXXRecordDecl of the
   /// base class that we are searching for.
   static bool FindBaseClass(const CXXBaseSpecifier *Specifier,
-                            CXXBasePath &Path, void *BaseRecord);
+                            CXXBasePath &Path, const CXXRecordDecl *BaseRecord);
 
   /// \brief Base-class lookup callback that determines whether the
   /// given base class specifier refers to a specific class
@@ -1546,39 +1531,38 @@ public:
   ///
   /// This callback can be used with \c lookupInBases() to determine
   /// whether a given derived class has is a virtual base class
-  /// subobject of a particular type.  The user data pointer should
+  /// subobject of a particular type.  The base record pointer should
   /// refer to the canonical CXXRecordDecl of the base class that we
   /// are searching for.
   static bool FindVirtualBaseClass(const CXXBaseSpecifier *Specifier,
-                                   CXXBasePath &Path, void *BaseRecord);
+                                   CXXBasePath &Path,
+                                   const CXXRecordDecl *BaseRecord);
 
   /// \brief Base-class lookup callback that determines whether there exists
   /// a tag with the given name.
   ///
   /// This callback can be used with \c lookupInBases() to find tag members
-  /// of the given name within a C++ class hierarchy. The user data pointer
-  /// is an opaque \c DeclarationName pointer.
+  /// of the given name within a C++ class hierarchy.
   static bool FindTagMember(const CXXBaseSpecifier *Specifier,
-                            CXXBasePath &Path, void *Name);
+                            CXXBasePath &Path, DeclarationName Name);
 
   /// \brief Base-class lookup callback that determines whether there exists
   /// a member with the given name.
   ///
   /// This callback can be used with \c lookupInBases() to find members
-  /// of the given name within a C++ class hierarchy. The user data pointer
-  /// is an opaque \c DeclarationName pointer.
+  /// of the given name within a C++ class hierarchy.
   static bool FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
-                                 CXXBasePath &Path, void *Name);
+                                 CXXBasePath &Path, DeclarationName Name);
 
   /// \brief Base-class lookup callback that determines whether there exists
   /// a member with the given name that can be used in a nested-name-specifier.
   ///
-  /// This callback can be used with \c lookupInBases() to find membes of
+  /// This callback can be used with \c lookupInBases() to find members of
   /// the given name within a C++ class hierarchy that can occur within
   /// nested-name-specifiers.
   static bool FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier,
                                             CXXBasePath &Path,
-                                            void *UserData);
+                                            DeclarationName Name);
 
   /// \brief Retrieve the final overriders for each virtual member
   /// function in the class hierarchy where this class is the
@@ -1898,7 +1882,8 @@ public:
 ///   B(A& a) : A(a), f(3.14159) { }
 /// };
 /// \endcode
-class CXXCtorInitializer {
+class CXXCtorInitializer final
+    : private llvm::TrailingObjects<CXXCtorInitializer, VarDecl *> {
   /// \brief Either the base class name/delegating constructor type (stored as
   /// a TypeSourceInfo*), an normal field (FieldDecl), or an anonymous field
   /// (IndirectFieldDecl*) being initialized.
@@ -2114,24 +2099,26 @@ public:
   /// describe an array member initialization.
   VarDecl *getArrayIndex(unsigned I) {
     assert(I < getNumArrayIndices() && "Out of bounds member array index");
-    return reinterpret_cast<VarDecl **>(this + 1)[I];
+    return getTrailingObjects<VarDecl *>()[I];
   }
   const VarDecl *getArrayIndex(unsigned I) const {
     assert(I < getNumArrayIndices() && "Out of bounds member array index");
-    return reinterpret_cast<const VarDecl * const *>(this + 1)[I];
+    return getTrailingObjects<VarDecl *>()[I];
   }
   void setArrayIndex(unsigned I, VarDecl *Index) {
     assert(I < getNumArrayIndices() && "Out of bounds member array index");
-    reinterpret_cast<VarDecl **>(this + 1)[I] = Index;
+    getTrailingObjects<VarDecl *>()[I] = Index;
   }
   ArrayRef<VarDecl *> getArrayIndexes() {
     assert(getNumArrayIndices() != 0 && "Getting indexes for non-array init");
-    return llvm::makeArrayRef(reinterpret_cast<VarDecl **>(this + 1),
+    return llvm::makeArrayRef(getTrailingObjects<VarDecl *>(),
                               getNumArrayIndices());
   }
 
   /// \brief Get the initializer.
   Expr *getInit() const { return static_cast<Expr*>(Init); }
+
+  friend TrailingObjects;
 };
 
 /// \brief Represents a C++ constructor within a class.
@@ -2289,14 +2276,14 @@ public:
   }
 
   /// \brief Determine whether this constructor is a move constructor
-  /// (C++0x [class.copy]p3), which can be used to move values of the class.
+  /// (C++11 [class.copy]p3), which can be used to move values of the class.
   ///
   /// \param TypeQuals If this constructor is a move constructor, will be set
   /// to the type qualifiers on the referent of the first parameter's type.
   bool isMoveConstructor(unsigned &TypeQuals) const;
 
   /// \brief Determine whether this constructor is a move constructor
-  /// (C++0x [class.copy]p3), which can be used to move values of the class.
+  /// (C++11 [class.copy]p3), which can be used to move values of the class.
   bool isMoveConstructor() const {
     unsigned TypeQuals = 0;
     return isMoveConstructor(TypeQuals);
@@ -2406,7 +2393,7 @@ class CXXConversionDecl : public CXXMethodDecl {
   void anchor() override;
   /// Whether this conversion function declaration is marked
   /// "explicit", meaning that it can only be applied when the user
-  /// explicitly wrote a cast. This is a C++0x feature.
+  /// explicitly wrote a cast. This is a C++11 feature.
   bool IsExplicitSpecified : 1;
 
   CXXConversionDecl(ASTContext &C, CXXRecordDecl *RD, SourceLocation StartLoc,
diff --git a/include/clang/AST/DeclFriend.h b/include/clang/AST/DeclFriend.h
index 12b93b408a70..27b0388007a1 100644
--- a/include/clang/AST/DeclFriend.h
+++ b/include/clang/AST/DeclFriend.h
@@ -37,7 +37,9 @@ namespace clang {
 /// @endcode
 ///
 /// The semantic context of a friend decl is its declaring class.
-class FriendDecl : public Decl {
+class FriendDecl final
+    : public Decl,
+      private llvm::TrailingObjects<FriendDecl, TemplateParameterList *> {
   virtual void anchor();
 public:
   typedef llvm::PointerUnion<NamedDecl*,TypeSourceInfo*> FriendUnion;
@@ -62,14 +64,6 @@ private:
   //     template <class T> friend class A<T>::B;
   unsigned NumTPLists : 31;
 
-  // The tail-allocated friend type template parameter lists (if any).
-  TemplateParameterList* const *getTPLists() const {
-    return reinterpret_cast<TemplateParameterList* const *>(this + 1);
-  }
-  TemplateParameterList **getTPLists() {
-    return reinterpret_cast<TemplateParameterList**>(this + 1);
-  }
-
   friend class CXXRecordDecl::friend_iterator;
   friend class CXXRecordDecl;
 
@@ -83,7 +77,7 @@ private:
       UnsupportedFriend(false),
       NumTPLists(FriendTypeTPLists.size()) {
     for (unsigned i = 0; i < NumTPLists; ++i)
-      getTPLists()[i] = FriendTypeTPLists[i];
+      getTrailingObjects<TemplateParameterList *>()[i] = FriendTypeTPLists[i];
   }
 
   FriendDecl(EmptyShell Empty, unsigned NumFriendTypeTPLists)
@@ -118,7 +112,7 @@ public:
   }
   TemplateParameterList *getFriendTypeTemplateParameterList(unsigned N) const {
     assert(N < NumTPLists);
-    return getTPLists()[N];
+    return getTrailingObjects<TemplateParameterList *>()[N];
   }
 
   /// If this friend declaration doesn't name a type, return the inner
@@ -148,9 +142,10 @@ public:
       return SourceRange(getFriendLoc(), ND->getLocEnd());
     }
     else if (TypeSourceInfo *TInfo = getFriendType()) {
-      SourceLocation StartL = (NumTPLists == 0)
-        ? getFriendLoc()
-        : getTPLists()[0]->getTemplateLoc();
+      SourceLocation StartL =
+          (NumTPLists == 0) ? getFriendLoc()
+                            : getTrailingObjects<TemplateParameterList *>()[0]
+                                  ->getTemplateLoc();
       return SourceRange(StartL, TInfo->getTypeLoc().getEndLoc());
     }
     else
@@ -171,6 +166,7 @@ public:
 
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
+  friend TrailingObjects;
 };
 
 /// An iterator over the friend declarations of a class.
diff --git a/include/clang/AST/DeclGroup.h b/include/clang/AST/DeclGroup.h
index bd3dbd8fa787..c84bb5e60481 100644
--- a/include/clang/AST/DeclGroup.h
+++ b/include/clang/AST/DeclGroup.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_AST_DECLGROUP_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/TrailingObjects.h"
 #include <cassert>
 
 namespace clang {
@@ -24,13 +25,9 @@ class Decl;
 class DeclGroup;
 class DeclGroupIterator;
 
-class DeclGroup {
+class DeclGroup final : private llvm::TrailingObjects<DeclGroup, Decl *> {
   // FIXME: Include a TypeSpecifier object.
-  union {
-    unsigned NumDecls;
-
-    Decl *Aligner;
-  };
+  unsigned NumDecls;
 
 private:
   DeclGroup() : NumDecls(0) {}
@@ -43,13 +40,15 @@ public:
 
   Decl*& operator[](unsigned i) {
     assert (i < NumDecls && "Out-of-bounds access.");
-    return ((Decl**) (this+1))[i];
+    return getTrailingObjects<Decl *>()[i];
   }
 
   Decl* const& operator[](unsigned i) const {
     assert (i < NumDecls && "Out-of-bounds access.");
-    return ((Decl* const*) (this+1))[i];
+    return getTrailingObjects<Decl *>()[i];
   }
+
+  friend TrailingObjects;
 };
 
 class DeclGroupRef {
diff --git a/include/clang/AST/DeclObjC.h b/include/clang/AST/DeclObjC.h
index c42764b6f3bf..f46078f28a7d 100644
--- a/include/clang/AST/DeclObjC.h
+++ b/include/clang/AST/DeclObjC.h
@@ -612,7 +612,8 @@ public:
 /// @interface NSArray<T> // stores the <T>
 /// @end
 /// \endcode
-class ObjCTypeParamList {
+class ObjCTypeParamList final
+    : private llvm::TrailingObjects<ObjCTypeParamList, ObjCTypeParamDecl *> {
   /// Stores the components of a SourceRange as a POD.
   struct PODSourceRange {
     unsigned Begin;
@@ -644,7 +645,7 @@ public:
   /// Iterate through the type parameters in the list.
   typedef ObjCTypeParamDecl **iterator;
 
-  iterator begin() { return reinterpret_cast<ObjCTypeParamDecl **>(this + 1); }
+  iterator begin() { return getTrailingObjects<ObjCTypeParamDecl *>(); }
 
   iterator end() { return begin() + size(); }
 
@@ -655,7 +656,7 @@ public:
   typedef ObjCTypeParamDecl * const *const_iterator;
 
   const_iterator begin() const {
-    return reinterpret_cast<ObjCTypeParamDecl * const *>(this + 1);
+    return getTrailingObjects<ObjCTypeParamDecl *>();
   }
 
   const_iterator end() const {
@@ -685,6 +686,7 @@ public:
   /// Gather the default set of type arguments to be substituted for
   /// these type parameters when dealing with an unspecialized type.
   void gatherDefaultTypeArgs(SmallVectorImpl<QualType> &typeArgs) const;
+  friend TrailingObjects;
 };
 
 /// ObjCContainerDecl - Represents a container for method declarations.
@@ -1202,13 +1204,8 @@ public:
     // might bring in a definition.
     // Note: a null value indicates that we don't have a definition and that
     // modules are enabled.
-    if (!Data.getOpaqueValue()) {
-      if (IdentifierInfo *II = getIdentifier()) {
-        if (II->isOutOfDate()) {
-          updateOutOfDate(*II);
-        }
-      }
-    }
+    if (!Data.getOpaqueValue())
+      getMostRecentDecl();
 
     return Data.getPointer();
   }
@@ -1851,13 +1848,8 @@ public:
     // might bring in a definition.
     // Note: a null value indicates that we don't have a definition and that
     // modules are enabled.
-    if (!Data.getOpaqueValue()) {
-      if (IdentifierInfo *II = getIdentifier()) {
-        if (II->isOutOfDate()) {
-          updateOutOfDate(*II);
-        }
-      }
-    }
+    if (!Data.getOpaqueValue())
+      getMostRecentDecl();
 
     return Data.getPointer();
   }
@@ -2519,26 +2511,18 @@ public:
   void setPropertyAttributes(PropertyAttributeKind PRVal) {
     PropertyAttributes |= PRVal;
   }
+  void overwritePropertyAttributes(unsigned PRVal) {
+    PropertyAttributes = PRVal;
+  }
 
   PropertyAttributeKind getPropertyAttributesAsWritten() const {
     return PropertyAttributeKind(PropertyAttributesAsWritten);
   }
 
-  bool hasWrittenStorageAttribute() const {
-    return PropertyAttributesAsWritten & (OBJC_PR_assign | OBJC_PR_copy |
-        OBJC_PR_unsafe_unretained | OBJC_PR_retain | OBJC_PR_strong |
-        OBJC_PR_weak);
-  }
-
   void setPropertyAttributesAsWritten(PropertyAttributeKind PRVal) {
     PropertyAttributesAsWritten = PRVal;
   }
 
- void makeitReadWriteAttribute() {
-    PropertyAttributes &= ~OBJC_PR_readonly;
-    PropertyAttributes |= OBJC_PR_readwrite;
- }
-
   // Helper methods for accessing attributes.
 
   /// isReadOnly - Return true iff the property has a setter.
diff --git a/include/clang/AST/DeclOpenMP.h b/include/clang/AST/DeclOpenMP.h
index 7f0616f1e603..598f418f7e35 100644
--- a/include/clang/AST/DeclOpenMP.h
+++ b/include/clang/AST/DeclOpenMP.h
@@ -33,8 +33,12 @@ class Expr;
 /// };
 /// \endcode
 ///
-class OMPThreadPrivateDecl : public Decl {
+class OMPThreadPrivateDecl final
+    : public Decl,
+      private llvm::TrailingObjects<OMPThreadPrivateDecl, Expr *> {
   friend class ASTDeclReader;
+  friend TrailingObjects;
+
   unsigned NumVars;
 
   virtual void anchor();
@@ -43,14 +47,11 @@ class OMPThreadPrivateDecl : public Decl {
     Decl(DK, DC, L), NumVars(0) { }
 
   ArrayRef<const Expr *> getVars() const {
-    return llvm::makeArrayRef(reinterpret_cast<const Expr * const *>(this + 1),
-                              NumVars);
+    return llvm::makeArrayRef(getTrailingObjects<Expr *>(), NumVars);
   }
 
   MutableArrayRef<Expr *> getVars() {
-    return MutableArrayRef<Expr *>(
-                           reinterpret_cast<Expr **>(this + 1),
-                           NumVars);
+    return MutableArrayRef<Expr *>(getTrailingObjects<Expr *>(), NumVars);
   }
 
   void setVars(ArrayRef<Expr *> VL);
diff --git a/include/clang/AST/DeclTemplate.h b/include/clang/AST/DeclTemplate.h
index 0fc9b4947d49..a9109ef11426 100644
--- a/include/clang/AST/DeclTemplate.h
+++ b/include/clang/AST/DeclTemplate.h
@@ -20,10 +20,12 @@
 #include "clang/AST/TemplateBase.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/TrailingObjects.h"
 #include <limits>
 
 namespace clang {
 
+enum BuiltinTemplateKind : int;
 class TemplateParameterList;
 class TemplateDecl;
 class RedeclarableTemplateDecl;
@@ -43,7 +45,9 @@ typedef llvm::PointerUnion3<TemplateTypeParmDecl*, NonTypeTemplateParmDecl*,
 
 /// \brief Stores a list of template parameters for a TemplateDecl and its
 /// derived classes.
-class TemplateParameterList {
+class TemplateParameterList final
+    : private llvm::TrailingObjects<TemplateParameterList, NamedDecl *> {
+
   /// The location of the 'template' keyword.
   SourceLocation TemplateLoc;
 
@@ -59,16 +63,18 @@ class TemplateParameterList {
   unsigned ContainsUnexpandedParameterPack : 1;
 
 protected:
+  size_t numTrailingObjects(OverloadToken<NamedDecl *>) const {
+    return NumParams;
+  }
+
   TemplateParameterList(SourceLocation TemplateLoc, SourceLocation LAngleLoc,
-                        NamedDecl **Params, unsigned NumParams,
-                        SourceLocation RAngleLoc);
+                        ArrayRef<NamedDecl *> Params, SourceLocation RAngleLoc);
 
 public:
   static TemplateParameterList *Create(const ASTContext &C,
                                        SourceLocation TemplateLoc,
                                        SourceLocation LAngleLoc,
-                                       NamedDecl **Params,
-                                       unsigned NumParams,
+                                       ArrayRef<NamedDecl *> Params,
                                        SourceLocation RAngleLoc);
 
   /// \brief Iterates through the template parameters in this list.
@@ -77,10 +83,8 @@ public:
   /// \brief Iterates through the template parameters in this list.
   typedef NamedDecl* const* const_iterator;
 
-  iterator begin() { return reinterpret_cast<NamedDecl **>(this + 1); }
-  const_iterator begin() const {
-    return reinterpret_cast<NamedDecl * const *>(this + 1);
-  }
+  iterator begin() { return getTrailingObjects<NamedDecl *>(); }
+  const_iterator begin() const { return getTrailingObjects<NamedDecl *>(); }
   iterator end() { return begin() + NumParams; }
   const_iterator end() const { return begin() + NumParams; }
 
@@ -130,29 +134,45 @@ public:
   SourceRange getSourceRange() const LLVM_READONLY {
     return SourceRange(TemplateLoc, RAngleLoc);
   }
+
+  friend TrailingObjects;
+  template <size_t N> friend class FixedSizeTemplateParameterListStorage;
 };
 
 /// \brief Stores a list of template parameters for a TemplateDecl and its
 /// derived classes. Suitable for creating on the stack.
-template<size_t N>
-class FixedSizeTemplateParameterList : public TemplateParameterList {
+template <size_t N> class FixedSizeTemplateParameterListStorage {
+  // This is kinda ugly: TemplateParameterList usually gets allocated
+  // in a block of memory with NamedDecls appended to it. Here, to get
+  // it stack allocated, we include the params as a separate
+  // variable. After allocation, the TemplateParameterList object
+  // treats them as part of itself.
+  TemplateParameterList List;
   NamedDecl *Params[N];
 
 public:
-  FixedSizeTemplateParameterList(SourceLocation TemplateLoc,
-                                 SourceLocation LAngleLoc,
-                                 NamedDecl **Params, SourceLocation RAngleLoc) :
-    TemplateParameterList(TemplateLoc, LAngleLoc, Params, N, RAngleLoc) {
+  FixedSizeTemplateParameterListStorage(SourceLocation TemplateLoc,
+                                        SourceLocation LAngleLoc,
+                                        ArrayRef<NamedDecl *> Params,
+                                        SourceLocation RAngleLoc)
+      : List(TemplateLoc, LAngleLoc, Params, RAngleLoc) {
+    // Because we're doing an evil layout hack above, have some
+    // asserts, just to double-check everything is laid out like
+    // expected.
+    assert(sizeof(*this) ==
+               TemplateParameterList::totalSizeToAlloc<NamedDecl *>(N) &&
+           "Object layout not as expected");
+    assert(this->Params == List.getTrailingObjects<NamedDecl *>() &&
+           "Object layout not as expected");
   }
+  TemplateParameterList *get() { return &List; }
 };
 
 /// \brief A template argument list.
-class TemplateArgumentList {
+class TemplateArgumentList final
+    : private llvm::TrailingObjects<TemplateArgumentList, TemplateArgument> {
   /// \brief The template argument list.
-  ///
-  /// The integer value will be non-zero to indicate that this
-  /// template argument list does own the pointer.
-  llvm::PointerIntPair<const TemplateArgument *, 1> Arguments;
+  const TemplateArgument *Arguments;
 
   /// \brief The number of template arguments in this template
   /// argument list.
@@ -161,9 +181,9 @@ class TemplateArgumentList {
   TemplateArgumentList(const TemplateArgumentList &Other) = delete;
   void operator=(const TemplateArgumentList &Other) = delete;
 
-  TemplateArgumentList(const TemplateArgument *Args, unsigned NumArgs,
-                       bool Owned)
-    : Arguments(Args, Owned), NumArguments(NumArgs) { }
+  // Constructs an instance with an internal Argument list, containing
+  // a copy of the Args array. (Called by CreateCopy)
+  TemplateArgumentList(const TemplateArgument *Args, unsigned NumArgs);
 
 public:
   /// \brief Type used to indicate that the template argument list itself is a
@@ -180,9 +200,9 @@ public:
   ///
   /// The template argument list does not own the template arguments
   /// provided.
-  explicit TemplateArgumentList(OnStackType,
-                                const TemplateArgument *Args, unsigned NumArgs)
-    : Arguments(Args, false), NumArguments(NumArgs) { }
+  explicit TemplateArgumentList(OnStackType, const TemplateArgument *Args,
+                                unsigned NumArgs)
+      : Arguments(Args), NumArguments(NumArgs) {}
 
   /// \brief Produces a shallow copy of the given template argument list.
   ///
@@ -191,7 +211,7 @@ public:
   /// constructor, since this really really isn't safe to use that
   /// way.
   explicit TemplateArgumentList(const TemplateArgumentList *Other)
-    : Arguments(Other->data(), false), NumArguments(Other->size()) { }
+      : Arguments(Other->data()), NumArguments(Other->size()) {}
 
   /// \brief Retrieve the template argument at a given index.
   const TemplateArgument &get(unsigned Idx) const {
@@ -212,9 +232,9 @@ public:
   unsigned size() const { return NumArguments; }
 
   /// \brief Retrieve a pointer to the template argument list.
-  const TemplateArgument *data() const {
-    return Arguments.getPointer();
-  }
+  const TemplateArgument *data() const { return Arguments; }
+
+  friend TrailingObjects;
 };
 
 void *allocateDefaultArgStorageChain(const ASTContext &C);
@@ -426,17 +446,8 @@ public:
   /// explicit instantiation declaration, or explicit instantiation
   /// definition.
   bool isExplicitInstantiationOrSpecialization() const {
-    switch (getTemplateSpecializationKind()) {
-    case TSK_ExplicitSpecialization:
-    case TSK_ExplicitInstantiationDeclaration:
-    case TSK_ExplicitInstantiationDefinition:
-      return true;
-
-    case TSK_Undeclared:
-    case TSK_ImplicitInstantiation:
-      return false;
-    }
-    llvm_unreachable("bad template specialization kind");
+    return isTemplateExplicitInstantiationOrSpecialization(
+        getTemplateSpecializationKind());
   }
 
   /// \brief Set the template specialization kind.
@@ -542,56 +553,52 @@ public:
 ///     friend void foo<>(T);
 ///   };
 /// \endcode
-class DependentFunctionTemplateSpecializationInfo {
-  struct CA {
-    /// The number of potential template candidates.
-    unsigned NumTemplates;
+class DependentFunctionTemplateSpecializationInfo final
+    : private llvm::TrailingObjects<DependentFunctionTemplateSpecializationInfo,
+                                    TemplateArgumentLoc,
+                                    FunctionTemplateDecl *> {
+  /// The number of potential template candidates.
+  unsigned NumTemplates;
 
-    /// The number of template arguments.
-    unsigned NumArgs;
-  };
-
-  union {
-    // Force sizeof to be a multiple of sizeof(void*) so that the
-    // trailing data is aligned.
-    void *Aligner;
-    struct CA d;
-  };
+  /// The number of template arguments.
+  unsigned NumArgs;
 
   /// The locations of the left and right angle brackets.
   SourceRange AngleLocs;
 
-  FunctionTemplateDecl * const *getTemplates() const {
-    return reinterpret_cast<FunctionTemplateDecl*const*>(this+1);
+  size_t numTrailingObjects(OverloadToken<TemplateArgumentLoc>) const {
+    return NumArgs;
+  }
+  size_t numTrailingObjects(OverloadToken<FunctionTemplateDecl *>) const {
+    return NumTemplates;
   }
 
-public:
   DependentFunctionTemplateSpecializationInfo(
                                  const UnresolvedSetImpl &Templates,
                                  const TemplateArgumentListInfo &TemplateArgs);
 
+public:
+  static DependentFunctionTemplateSpecializationInfo *
+  Create(ASTContext &Context, const UnresolvedSetImpl &Templates,
+         const TemplateArgumentListInfo &TemplateArgs);
+
   /// \brief Returns the number of function templates that this might
   /// be a specialization of.
-  unsigned getNumTemplates() const {
-    return d.NumTemplates;
-  }
+  unsigned getNumTemplates() const { return NumTemplates; }
 
   /// \brief Returns the i'th template candidate.
   FunctionTemplateDecl *getTemplate(unsigned I) const {
     assert(I < getNumTemplates() && "template index out of range");
-    return getTemplates()[I];
+    return getTrailingObjects<FunctionTemplateDecl *>()[I];
   }
 
   /// \brief Returns the explicit template arguments that were given.
   const TemplateArgumentLoc *getTemplateArgs() const {
-    return reinterpret_cast<const TemplateArgumentLoc*>(
-                                            &getTemplates()[getNumTemplates()]);
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   /// \brief Returns the number of explicit template arguments that were given.
-  unsigned getNumTemplateArgs() const {
-    return d.NumArgs;
-  }
+  unsigned getNumTemplateArgs() const { return NumArgs; }
 
   /// \brief Returns the nth template argument.
   const TemplateArgumentLoc &getTemplateArg(unsigned I) const {
@@ -606,6 +613,8 @@ public:
   SourceLocation getRAngleLoc() const {
     return AngleLocs.getEnd();
   }
+
+  friend TrailingObjects;
 };
 
 /// Declaration of a redeclarable template.
@@ -926,7 +935,7 @@ public:
     return const_cast<FunctionTemplateDecl*>(this)->getMostRecentDecl();
   }
 
-  FunctionTemplateDecl *getInstantiatedFromMemberTemplate() {
+  FunctionTemplateDecl *getInstantiatedFromMemberTemplate() const {
     return cast_or_null<FunctionTemplateDecl>(
              RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
@@ -1120,8 +1129,11 @@ public:
 /// @code
 /// template<int Size> class array { };
 /// @endcode
-class NonTypeTemplateParmDecl
-  : public DeclaratorDecl, protected TemplateParmPosition {
+class NonTypeTemplateParmDecl final
+    : public DeclaratorDecl,
+      protected TemplateParmPosition,
+      private llvm::TrailingObjects<NonTypeTemplateParmDecl,
+                                    std::pair<QualType, TypeSourceInfo *>> {
   /// \brief The default template argument, if any, and whether or not
   /// it was inherited.
   typedef DefaultArgStorage<NonTypeTemplateParmDecl, Expr*> DefArgStorage;
@@ -1141,6 +1153,11 @@ class NonTypeTemplateParmDecl
   /// \brief The number of types in an expanded parameter pack.
   unsigned NumExpandedTypes;
 
+  size_t numTrailingObjects(
+      OverloadToken<std::pair<QualType, TypeSourceInfo *>>) const {
+    return NumExpandedTypes;
+  }
+
   NonTypeTemplateParmDecl(DeclContext *DC, SourceLocation StartLoc,
                           SourceLocation IdLoc, unsigned D, unsigned P,
                           IdentifierInfo *Id, QualType T,
@@ -1159,6 +1176,7 @@ class NonTypeTemplateParmDecl
                           TypeSourceInfo **ExpandedTInfos);
 
   friend class ASTDeclReader;
+  friend TrailingObjects;
 
 public:
   static NonTypeTemplateParmDecl *
@@ -1274,16 +1292,18 @@ public:
   /// pack.
   QualType getExpansionType(unsigned I) const {
     assert(I < NumExpandedTypes && "Out-of-range expansion type index");
-    void * const *TypesAndInfos = reinterpret_cast<void * const*>(this + 1);
-    return QualType::getFromOpaquePtr(TypesAndInfos[2*I]);
+    auto TypesAndInfos =
+        getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
+    return TypesAndInfos[I].first;
   }
 
   /// \brief Retrieve a particular expansion type source info within an
   /// expanded parameter pack.
   TypeSourceInfo *getExpansionTypeSourceInfo(unsigned I) const {
     assert(I < NumExpandedTypes && "Out-of-range expansion type index");
-    void * const *TypesAndInfos = reinterpret_cast<void * const*>(this + 1);
-    return static_cast<TypeSourceInfo *>(TypesAndInfos[2*I+1]);
+    auto TypesAndInfos =
+        getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
+    return TypesAndInfos[I].second;
   }
 
   // Implement isa/cast/dyncast/etc.
@@ -1298,9 +1318,11 @@ public:
 /// @endcode
 /// A template template parameter is a TemplateDecl because it defines the
 /// name of a template and the template parameters allowable for substitution.
-class TemplateTemplateParmDecl : public TemplateDecl, 
-                                 protected TemplateParmPosition 
-{
+class TemplateTemplateParmDecl final
+    : public TemplateDecl,
+      protected TemplateParmPosition,
+      private llvm::TrailingObjects<TemplateTemplateParmDecl,
+                                    TemplateParameterList *> {
   void anchor() override;
 
   /// \brief The default template argument, if any.
@@ -1404,7 +1426,7 @@ public:
   /// pack.
   TemplateParameterList *getExpansionTemplateParameters(unsigned I) const {
     assert(I < NumExpandedParams && "Out-of-range expansion type index");
-    return reinterpret_cast<TemplateParameterList *const *>(this + 1)[I];
+    return getTrailingObjects<TemplateParameterList *>()[I];
   }
 
   const DefArgStorage &getDefaultArgStorage() const { return DefaultArgument; }
@@ -1454,6 +1476,36 @@ public:
 
   friend class ASTDeclReader;
   friend class ASTDeclWriter;
+  friend TrailingObjects;
+};
+
+/// \brief Represents the builtin template declaration which is used to
+/// implement __make_integer_seq.  It serves no real purpose beyond existing as
+/// a place to hold template parameters.
+class BuiltinTemplateDecl : public TemplateDecl {
+  void anchor() override;
+
+  BuiltinTemplateDecl(const ASTContext &C, DeclContext *DC,
+                      DeclarationName Name, BuiltinTemplateKind BTK);
+
+  BuiltinTemplateKind BTK;
+
+public:
+  // Implement isa/cast/dyncast support
+  static bool classof(const Decl *D) { return classofKind(D->getKind()); }
+  static bool classofKind(Kind K) { return K == BuiltinTemplate; }
+
+  static BuiltinTemplateDecl *Create(const ASTContext &C, DeclContext *DC,
+                                     DeclarationName Name,
+                                     BuiltinTemplateKind BTK) {
+    return new (C, DC) BuiltinTemplateDecl(C, DC, Name, BTK);
+  }
+
+  SourceRange getSourceRange() const override LLVM_READONLY {
+    return SourceRange();
+  }
+
+  BuiltinTemplateKind getBuiltinTemplateKind() const { return BTK; }
 };
 
 /// \brief Represents a class template specialization, which refers to
@@ -1580,17 +1632,8 @@ public:
   /// explicit instantiation declaration, or explicit instantiation
   /// definition.
   bool isExplicitInstantiationOrSpecialization() const {
-    switch (getTemplateSpecializationKind()) {
-    case TSK_ExplicitSpecialization:
-    case TSK_ExplicitInstantiationDeclaration:
-    case TSK_ExplicitInstantiationDefinition:
-      return true;
-
-    case TSK_Undeclared:
-    case TSK_ImplicitInstantiation:
-      return false;
-    }
-    llvm_unreachable("bad template specialization kind");
+    return isTemplateExplicitInstantiationOrSpecialization(
+        getTemplateSpecializationKind());
   }
 
   void setSpecializationKind(TemplateSpecializationKind TSK) {
@@ -1819,8 +1862,8 @@ public:
   /// template partial specialization \c Outer<T>::Inner<U*>. Given
   /// \c Outer<float>::Inner<U*>, this function would return
   /// \c Outer<T>::Inner<U*>.
-  ClassTemplatePartialSpecializationDecl *getInstantiatedFromMember() {
-    ClassTemplatePartialSpecializationDecl *First =
+  ClassTemplatePartialSpecializationDecl *getInstantiatedFromMember() const {
+    const ClassTemplatePartialSpecializationDecl *First =
         cast<ClassTemplatePartialSpecializationDecl>(getFirstDecl());
     return First->InstantiatedFromMember.getPointer();
   }
@@ -2000,7 +2043,7 @@ public:
     return const_cast<ClassTemplateDecl*>(this)->getMostRecentDecl();
   }
 
-  ClassTemplateDecl *getInstantiatedFromMemberTemplate() {
+  ClassTemplateDecl *getInstantiatedFromMemberTemplate() const {
     return cast_or_null<ClassTemplateDecl>(
              RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
@@ -2230,7 +2273,7 @@ public:
                this)->getPreviousDecl());
   }
 
-  TypeAliasTemplateDecl *getInstantiatedFromMemberTemplate() {
+  TypeAliasTemplateDecl *getInstantiatedFromMemberTemplate() const {
     return cast_or_null<TypeAliasTemplateDecl>(
              RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
@@ -2435,17 +2478,8 @@ public:
   /// explicit instantiation declaration, or explicit instantiation
   /// definition.
   bool isExplicitInstantiationOrSpecialization() const {
-    switch (getTemplateSpecializationKind()) {
-    case TSK_ExplicitSpecialization:
-    case TSK_ExplicitInstantiationDeclaration:
-    case TSK_ExplicitInstantiationDefinition:
-      return true;
-
-    case TSK_Undeclared:
-    case TSK_ImplicitInstantiation:
-      return false;
-    }
-    llvm_unreachable("bad template specialization kind");
+    return isTemplateExplicitInstantiationOrSpecialization(
+        getTemplateSpecializationKind());
   }
 
   void setSpecializationKind(TemplateSpecializationKind TSK) {
@@ -2668,8 +2702,8 @@ public:
   /// variable template partial specialization \c Outer<T>::Inner<U*>. Given
   /// \c Outer<float>::Inner<U*>, this function would return
   /// \c Outer<T>::Inner<U*>.
-  VarTemplatePartialSpecializationDecl *getInstantiatedFromMember() {
-    VarTemplatePartialSpecializationDecl *First =
+  VarTemplatePartialSpecializationDecl *getInstantiatedFromMember() const {
+    const VarTemplatePartialSpecializationDecl *First =
         cast<VarTemplatePartialSpecializationDecl>(getFirstDecl());
     return First->InstantiatedFromMember.getPointer();
   }
@@ -2833,7 +2867,7 @@ public:
     return const_cast<VarTemplateDecl *>(this)->getMostRecentDecl();
   }
 
-  VarTemplateDecl *getInstantiatedFromMemberTemplate() {
+  VarTemplateDecl *getInstantiatedFromMemberTemplate() const {
     return cast_or_null<VarTemplateDecl>(
         RedeclarableTemplateDecl::getInstantiatedFromMemberTemplate());
   }
diff --git a/include/clang/AST/EvaluatedExprVisitor.h b/include/clang/AST/EvaluatedExprVisitor.h
index ad5287314d56..aad7726f8484 100644
--- a/include/clang/AST/EvaluatedExprVisitor.h
+++ b/include/clang/AST/EvaluatedExprVisitor.h
@@ -88,8 +88,8 @@ public:
 
   void VisitLambdaExpr(PTR(LambdaExpr) LE) {
     // Only visit the capture initializers, and not the body.
-    for (LambdaExpr::capture_init_iterator I = LE->capture_init_begin(),
-                                           E = LE->capture_init_end();
+    for (LambdaExpr::const_capture_init_iterator I = LE->capture_init_begin(),
+                                                 E = LE->capture_init_end();
          I != E; ++I)
       if (*I)
         this->Visit(*I);
diff --git a/include/clang/AST/Expr.h b/include/clang/AST/Expr.h
index 2a5b4c0f5ed0..f9f1995480a8 100644
--- a/include/clang/AST/Expr.h
+++ b/include/clang/AST/Expr.h
@@ -23,6 +23,7 @@
 #include "clang/AST/TemplateBase.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/CharInfo.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TypeTraits.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
@@ -61,7 +62,6 @@ struct SubobjectAdjustment {
     MemberPointerAdjustment
   } Kind;
 
-
   struct DTB {
     const CastExpr *BasePath;
     const CXXRecordDecl *DerivedClass;
@@ -148,8 +148,6 @@ public:
   /// \brief Set whether this expression is value-dependent or not.
   void setValueDependent(bool VD) {
     ExprBits.ValueDependent = VD;
-    if (VD)
-      ExprBits.InstantiationDependent = true;
   }
 
   /// isTypeDependent - Determines whether this expression is
@@ -168,8 +166,6 @@ public:
   /// \brief Set whether this expression is type-dependent or not.
   void setTypeDependent(bool TD) {
     ExprBits.TypeDependent = TD;
-    if (TD)
-      ExprBits.InstantiationDependent = true;
   }
 
   /// \brief Whether this expression is instantiation-dependent, meaning that
@@ -458,6 +454,10 @@ public:
   /// \brief Returns whether this expression refers to a vector element.
   bool refersToVectorElement() const;
 
+  /// \brief Returns whether this expression refers to a global register
+  /// variable.
+  bool refersToGlobalRegisterVar() const;
+
   /// \brief Returns whether this expression has a placeholder type.
   bool hasPlaceholderType() const {
     return getType()->isPlaceholderType();
@@ -529,10 +529,15 @@ public:
 
   /// EvalStatus is a struct with detailed info about an evaluation in progress.
   struct EvalStatus {
-    /// HasSideEffects - Whether the evaluated expression has side effects.
+    /// \brief Whether the evaluated expression has side effects.
     /// For example, (f() && 0) can be folded, but it still has side effects.
     bool HasSideEffects;
 
+    /// \brief Whether the evaluation hit undefined behavior.
+    /// For example, 1.0 / 0.0 can be folded to Inf, but has undefined behavior.
+    /// Likewise, INT_MAX + 1 can be folded to INT_MIN, but has UB.
+    bool HasUndefinedBehavior;
+
     /// Diag - If this is non-null, it will be filled in with a stack of notes
     /// indicating why evaluation failed (or why it failed to produce a constant
     /// expression).
@@ -542,7 +547,8 @@ public:
     /// expression *is* a constant expression, no notes will be produced.
     SmallVectorImpl<PartialDiagnosticAt> *Diag;
 
-    EvalStatus() : HasSideEffects(false), Diag(nullptr) {}
+    EvalStatus()
+        : HasSideEffects(false), HasUndefinedBehavior(false), Diag(nullptr) {}
 
     // hasSideEffects - Return true if the evaluated expression has
     // side effects.
@@ -575,7 +581,12 @@ public:
   /// side-effects.
   bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx) const;
 
-  enum SideEffectsKind { SE_NoSideEffects, SE_AllowSideEffects };
+  enum SideEffectsKind {
+    SE_NoSideEffects,          ///< Strictly evaluate the expression.
+    SE_AllowUndefinedBehavior, ///< Allow UB that we can give a value, but not
+                               ///< arbitrary unmodeled side effects.
+    SE_AllowSideEffects        ///< Allow any unmodeled side effect.
+  };
 
   /// EvaluateAsInt - Return true if this is a constant which we can fold and
   /// convert to an integer, using any crazy technique that we want to.
@@ -584,7 +595,8 @@ public:
 
   /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be
   /// constant folded without side-effects, but discard the result.
-  bool isEvaluatable(const ASTContext &Ctx) const;
+  bool isEvaluatable(const ASTContext &Ctx,
+                     SideEffectsKind AllowSideEffects = SE_NoSideEffects) const;
 
   /// HasSideEffects - This routine returns true for all those expressions
   /// which have any effect other than producing a value. Example is a function
@@ -628,6 +640,16 @@ public:
                                 const FunctionDecl *Callee,
                                 ArrayRef<const Expr*> Args) const;
 
+  /// \brief If the current Expr is a pointer, this will try to statically
+  /// determine the number of bytes available where the pointer is pointing.
+  /// Returns true if all of the above holds and we were able to figure out the
+  /// size, false otherwise.
+  ///
+  /// \param Type - How to evaluate the size of the Expr, as defined by the
+  /// "type" parameter of __builtin_object_size
+  bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
+                             unsigned Type) const;
+
   /// \brief Enumeration used to describe the kind of Null pointer constant
   /// returned from \c isNullPointerConstant().
   enum NullPointerConstantKind {
@@ -806,7 +828,6 @@ public:
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 // Primary Expressions.
 //===----------------------------------------------------------------------===//
@@ -856,7 +877,9 @@ public:
     return Loc;
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   /// The source expression of an opaque value expression is the
   /// expression which originally generated the value.  This is
@@ -896,7 +919,11 @@ public:
 ///   DeclRefExprBits.RefersToEnclosingVariableOrCapture
 ///       Specifies when this declaration reference expression (validly)
 ///       refers to an enclosed local or a captured variable.
-class DeclRefExpr : public Expr {
+class DeclRefExpr final
+    : public Expr,
+      private llvm::TrailingObjects<DeclRefExpr, NestedNameSpecifierLoc,
+                                    NamedDecl *, ASTTemplateKWAndArgsInfo,
+                                    TemplateArgumentLoc> {
   /// \brief The declaration that we are referencing.
   ValueDecl *D;
 
@@ -907,36 +934,22 @@ class DeclRefExpr : public Expr {
   /// embedded in D.
   DeclarationNameLoc DNLoc;
 
-  /// \brief Helper to retrieve the optional NestedNameSpecifierLoc.
-  NestedNameSpecifierLoc &getInternalQualifierLoc() {
-    assert(hasQualifier());
-    return *reinterpret_cast<NestedNameSpecifierLoc *>(this + 1);
+  size_t numTrailingObjects(OverloadToken<NestedNameSpecifierLoc>) const {
+    return hasQualifier() ? 1 : 0;
   }
 
-  /// \brief Helper to retrieve the optional NestedNameSpecifierLoc.
-  const NestedNameSpecifierLoc &getInternalQualifierLoc() const {
-    return const_cast<DeclRefExpr *>(this)->getInternalQualifierLoc();
+  size_t numTrailingObjects(OverloadToken<NamedDecl *>) const {
+    return hasFoundDecl() ? 1 : 0;
+  }
+
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return hasTemplateKWAndArgsInfo() ? 1 : 0;
   }
 
   /// \brief Test whether there is a distinct FoundDecl attached to the end of
   /// this DRE.
   bool hasFoundDecl() const { return DeclRefExprBits.HasFoundDecl; }
 
-  /// \brief Helper to retrieve the optional NamedDecl through which this
-  /// reference occurred.
-  NamedDecl *&getInternalFoundDecl() {
-    assert(hasFoundDecl());
-    if (hasQualifier())
-      return *reinterpret_cast<NamedDecl **>(&getInternalQualifierLoc() + 1);
-    return *reinterpret_cast<NamedDecl **>(this + 1);
-  }
-
-  /// \brief Helper to retrieve the optional NamedDecl through which this
-  /// reference occurred.
-  NamedDecl *getInternalFoundDecl() const {
-    return const_cast<DeclRefExpr *>(this)->getInternalFoundDecl();
-  }
-
   DeclRefExpr(const ASTContext &Ctx,
               NestedNameSpecifierLoc QualifierLoc,
               SourceLocation TemplateKWLoc,
@@ -1008,22 +1021,18 @@ public:
   /// C++ nested-name-specifier, e.g., \c N::foo.
   bool hasQualifier() const { return DeclRefExprBits.HasQualifier; }
 
-  /// \brief If the name was qualified, retrieves the nested-name-specifier
-  /// that precedes the name. Otherwise, returns NULL.
-  NestedNameSpecifier *getQualifier() const {
-    if (!hasQualifier())
-      return nullptr;
-
-    return getInternalQualifierLoc().getNestedNameSpecifier();
-  }
-
   /// \brief If the name was qualified, retrieves the nested-name-specifier
   /// that precedes the name, with source-location information.
   NestedNameSpecifierLoc getQualifierLoc() const {
     if (!hasQualifier())
       return NestedNameSpecifierLoc();
+    return *getTrailingObjects<NestedNameSpecifierLoc>();
+  }
 
-    return getInternalQualifierLoc();
+  /// \brief If the name was qualified, retrieves the nested-name-specifier
+  /// that precedes the name. Otherwise, returns NULL.
+  NestedNameSpecifier *getQualifier() const {
+    return getQualifierLoc().getNestedNameSpecifier();
   }
 
   /// \brief Get the NamedDecl through which this reference occurred.
@@ -1031,60 +1040,40 @@ public:
   /// This Decl may be different from the ValueDecl actually referred to in the
   /// presence of using declarations, etc. It always returns non-NULL, and may
   /// simple return the ValueDecl when appropriate.
+
   NamedDecl *getFoundDecl() {
-    return hasFoundDecl() ? getInternalFoundDecl() : D;
+    return hasFoundDecl() ? *getTrailingObjects<NamedDecl *>() : D;
   }
 
   /// \brief Get the NamedDecl through which this reference occurred.
   /// See non-const variant.
   const NamedDecl *getFoundDecl() const {
-    return hasFoundDecl() ? getInternalFoundDecl() : D;
+    return hasFoundDecl() ? *getTrailingObjects<NamedDecl *>() : D;
   }
 
   bool hasTemplateKWAndArgsInfo() const {
     return DeclRefExprBits.HasTemplateKWAndArgsInfo;
   }
 
-  /// \brief Return the optional template keyword and arguments info.
-  ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() {
-    if (!hasTemplateKWAndArgsInfo())
-      return nullptr;
-
-    if (hasFoundDecl())
-      return reinterpret_cast<ASTTemplateKWAndArgsInfo *>(
-        &getInternalFoundDecl() + 1);
-
-    if (hasQualifier())
-      return reinterpret_cast<ASTTemplateKWAndArgsInfo *>(
-        &getInternalQualifierLoc() + 1);
-
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo *>(this + 1);
-  }
-
-  /// \brief Return the optional template keyword and arguments info.
-  const ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() const {
-    return const_cast<DeclRefExpr*>(this)->getTemplateKWAndArgsInfo();
-  }
-
   /// \brief Retrieve the location of the template keyword preceding
   /// this name, if any.
   SourceLocation getTemplateKeywordLoc() const {
     if (!hasTemplateKWAndArgsInfo()) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->getTemplateKeywordLoc();
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->TemplateKWLoc;
   }
 
   /// \brief Retrieve the location of the left angle bracket starting the
   /// explicit template argument list following the name, if any.
   SourceLocation getLAngleLoc() const {
     if (!hasTemplateKWAndArgsInfo()) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->LAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->LAngleLoc;
   }
 
   /// \brief Retrieve the location of the right angle bracket ending the
   /// explicit template argument list following the name, if any.
   SourceLocation getRAngleLoc() const {
     if (!hasTemplateKWAndArgsInfo()) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->RAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->RAngleLoc;
   }
 
   /// \brief Determines whether the name in this declaration reference
@@ -1095,32 +1084,12 @@ public:
   /// explicit template argument list.
   bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); }
 
-  /// \brief Retrieve the explicit template argument list that followed the
-  /// member template name.
-  ASTTemplateArgumentListInfo &getExplicitTemplateArgs() {
-    assert(hasExplicitTemplateArgs());
-    return *getTemplateKWAndArgsInfo();
-  }
-
-  /// \brief Retrieve the explicit template argument list that followed the
-  /// member template name.
-  const ASTTemplateArgumentListInfo &getExplicitTemplateArgs() const {
-    return const_cast<DeclRefExpr *>(this)->getExplicitTemplateArgs();
-  }
-
-  /// \brief Retrieves the optional explicit template arguments.
-  /// This points to the same data as getExplicitTemplateArgs(), but
-  /// returns null if there are no explicit template arguments.
-  const ASTTemplateArgumentListInfo *getOptionalExplicitTemplateArgs() const {
-    if (!hasExplicitTemplateArgs()) return nullptr;
-    return &getExplicitTemplateArgs();
-  }
-
   /// \brief Copies the template arguments (if present) into the given
   /// structure.
   void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const {
     if (hasExplicitTemplateArgs())
-      getExplicitTemplateArgs().copyInto(List);
+      getTrailingObjects<ASTTemplateKWAndArgsInfo>()->copyInto(
+          getTrailingObjects<TemplateArgumentLoc>(), List);
   }
 
   /// \brief Retrieve the template arguments provided as part of this
@@ -1129,7 +1098,7 @@ public:
     if (!hasExplicitTemplateArgs())
       return nullptr;
 
-    return getExplicitTemplateArgs().getTemplateArgs();
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   /// \brief Retrieve the number of template arguments provided as part of this
@@ -1138,7 +1107,7 @@ public:
     if (!hasExplicitTemplateArgs())
       return 0;
 
-    return getExplicitTemplateArgs().NumTemplateArgs;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->NumTemplateArgs;
   }
 
   /// \brief Returns true if this expression refers to a function that
@@ -1164,8 +1133,11 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
+  friend TrailingObjects;
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 };
@@ -1310,7 +1282,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 class CharacterLiteral : public Expr {
@@ -1357,7 +1331,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 class FloatingLiteral : public Expr, private APFloatStorage {
@@ -1419,7 +1395,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// ImaginaryLiteral - We support imaginary integer and floating point literals,
@@ -1596,13 +1574,15 @@ public:
   /// and can have escape sequences in them in addition to the usual trigraph
   /// and escaped newline business.  This routine handles this complexity.
   ///
-  SourceLocation getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
-                                   const LangOptions &Features,
-                                   const TargetInfo &Target) const;
+  SourceLocation
+  getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                    const LangOptions &Features, const TargetInfo &Target,
+                    unsigned *StartToken = nullptr,
+                    unsigned *StartTokenByteOffset = nullptr) const;
 
   typedef const SourceLocation *tokloc_iterator;
   tokloc_iterator tokloc_begin() const { return TokLocs; }
-  tokloc_iterator tokloc_end() const { return TokLocs+NumConcatenated; }
+  tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return TokLocs[0]; }
   SourceLocation getLocEnd() const LLVM_READONLY {
@@ -1614,7 +1594,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// ParenExpr - This represents a parethesized expression, e.g. "(1)".  This
@@ -1658,7 +1640,6 @@ public:
   child_range children() { return child_range(&Val, &Val+1); }
 };
 
-
 /// UnaryOperator - This represents the unary-expression's (except sizeof and
 /// alignof), the postinc/postdec operators from postfix-expression, and various
 /// extensions.
@@ -1768,6 +1749,99 @@ public:
   child_range children() { return child_range(&Val, &Val+1); }
 };
 
+/// Helper class for OffsetOfExpr.
+
+// __builtin_offsetof(type, identifier(.identifier|[expr])*)
+class OffsetOfNode {
+public:
+  /// \brief The kind of offsetof node we have.
+  enum Kind {
+    /// \brief An index into an array.
+    Array = 0x00,
+    /// \brief A field.
+    Field = 0x01,
+    /// \brief A field in a dependent type, known only by its name.
+    Identifier = 0x02,
+    /// \brief An implicit indirection through a C++ base class, when the
+    /// field found is in a base class.
+    Base = 0x03
+  };
+
+private:
+  enum { MaskBits = 2, Mask = 0x03 };
+
+  /// \brief The source range that covers this part of the designator.
+  SourceRange Range;
+
+  /// \brief The data describing the designator, which comes in three
+  /// different forms, depending on the lower two bits.
+  ///   - An unsigned index into the array of Expr*'s stored after this node
+  ///     in memory, for [constant-expression] designators.
+  ///   - A FieldDecl*, for references to a known field.
+  ///   - An IdentifierInfo*, for references to a field with a given name
+  ///     when the class type is dependent.
+  ///   - A CXXBaseSpecifier*, for references that look at a field in a
+  ///     base class.
+  uintptr_t Data;
+
+public:
+  /// \brief Create an offsetof node that refers to an array element.
+  OffsetOfNode(SourceLocation LBracketLoc, unsigned Index,
+               SourceLocation RBracketLoc)
+      : Range(LBracketLoc, RBracketLoc), Data((Index << 2) | Array) {}
+
+  /// \brief Create an offsetof node that refers to a field.
+  OffsetOfNode(SourceLocation DotLoc, FieldDecl *Field, SourceLocation NameLoc)
+      : Range(DotLoc.isValid() ? DotLoc : NameLoc, NameLoc),
+        Data(reinterpret_cast<uintptr_t>(Field) | OffsetOfNode::Field) {}
+
+  /// \brief Create an offsetof node that refers to an identifier.
+  OffsetOfNode(SourceLocation DotLoc, IdentifierInfo *Name,
+               SourceLocation NameLoc)
+      : Range(DotLoc.isValid() ? DotLoc : NameLoc, NameLoc),
+        Data(reinterpret_cast<uintptr_t>(Name) | Identifier) {}
+
+  /// \brief Create an offsetof node that refers into a C++ base class.
+  explicit OffsetOfNode(const CXXBaseSpecifier *Base)
+      : Range(), Data(reinterpret_cast<uintptr_t>(Base) | OffsetOfNode::Base) {}
+
+  /// \brief Determine what kind of offsetof node this is.
+  Kind getKind() const { return static_cast<Kind>(Data & Mask); }
+
+  /// \brief For an array element node, returns the index into the array
+  /// of expressions.
+  unsigned getArrayExprIndex() const {
+    assert(getKind() == Array);
+    return Data >> 2;
+  }
+
+  /// \brief For a field offsetof node, returns the field.
+  FieldDecl *getField() const {
+    assert(getKind() == Field);
+    return reinterpret_cast<FieldDecl *>(Data & ~(uintptr_t)Mask);
+  }
+
+  /// \brief For a field or identifier offsetof node, returns the name of
+  /// the field.
+  IdentifierInfo *getFieldName() const;
+
+  /// \brief For a base class node, returns the base specifier.
+  CXXBaseSpecifier *getBase() const {
+    assert(getKind() == Base);
+    return reinterpret_cast<CXXBaseSpecifier *>(Data & ~(uintptr_t)Mask);
+  }
+
+  /// \brief Retrieve the source range that covers this offsetof node.
+  ///
+  /// For an array element node, the source range contains the locations of
+  /// the square brackets. For a field or identifier node, the source range
+  /// contains the location of the period (if there is one) and the
+  /// identifier.
+  SourceRange getSourceRange() const LLVM_READONLY { return Range; }
+  SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
+  SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
+};
+
 /// OffsetOfExpr - [C99 7.17] - This represents an expression of the form
 /// offsetof(record-type, member-designator). For example, given:
 /// @code
@@ -1782,104 +1856,9 @@ public:
 /// @endcode
 /// we can represent and evaluate the expression @c offsetof(struct T, s[2].d).
 
-class OffsetOfExpr : public Expr {
-public:
-  // __builtin_offsetof(type, identifier(.identifier|[expr])*)
-  class OffsetOfNode {
-  public:
-    /// \brief The kind of offsetof node we have.
-    enum Kind {
-      /// \brief An index into an array.
-      Array = 0x00,
-      /// \brief A field.
-      Field = 0x01,
-      /// \brief A field in a dependent type, known only by its name.
-      Identifier = 0x02,
-      /// \brief An implicit indirection through a C++ base class, when the
-      /// field found is in a base class.
-      Base = 0x03
-    };
-
-  private:
-    enum { MaskBits = 2, Mask = 0x03 };
-
-    /// \brief The source range that covers this part of the designator.
-    SourceRange Range;
-
-    /// \brief The data describing the designator, which comes in three
-    /// different forms, depending on the lower two bits.
-    ///   - An unsigned index into the array of Expr*'s stored after this node
-    ///     in memory, for [constant-expression] designators.
-    ///   - A FieldDecl*, for references to a known field.
-    ///   - An IdentifierInfo*, for references to a field with a given name
-    ///     when the class type is dependent.
-    ///   - A CXXBaseSpecifier*, for references that look at a field in a
-    ///     base class.
-    uintptr_t Data;
-
-  public:
-    /// \brief Create an offsetof node that refers to an array element.
-    OffsetOfNode(SourceLocation LBracketLoc, unsigned Index,
-                 SourceLocation RBracketLoc)
-      : Range(LBracketLoc, RBracketLoc), Data((Index << 2) | Array) { }
-
-    /// \brief Create an offsetof node that refers to a field.
-    OffsetOfNode(SourceLocation DotLoc, FieldDecl *Field,
-                 SourceLocation NameLoc)
-      : Range(DotLoc.isValid()? DotLoc : NameLoc, NameLoc),
-        Data(reinterpret_cast<uintptr_t>(Field) | OffsetOfNode::Field) { }
-
-    /// \brief Create an offsetof node that refers to an identifier.
-    OffsetOfNode(SourceLocation DotLoc, IdentifierInfo *Name,
-                 SourceLocation NameLoc)
-      : Range(DotLoc.isValid()? DotLoc : NameLoc, NameLoc),
-        Data(reinterpret_cast<uintptr_t>(Name) | Identifier) { }
-
-    /// \brief Create an offsetof node that refers into a C++ base class.
-    explicit OffsetOfNode(const CXXBaseSpecifier *Base)
-      : Range(), Data(reinterpret_cast<uintptr_t>(Base) | OffsetOfNode::Base) {}
-
-    /// \brief Determine what kind of offsetof node this is.
-    Kind getKind() const {
-      return static_cast<Kind>(Data & Mask);
-    }
-
-    /// \brief For an array element node, returns the index into the array
-    /// of expressions.
-    unsigned getArrayExprIndex() const {
-      assert(getKind() == Array);
-      return Data >> 2;
-    }
-
-    /// \brief For a field offsetof node, returns the field.
-    FieldDecl *getField() const {
-      assert(getKind() == Field);
-      return reinterpret_cast<FieldDecl *>(Data & ~(uintptr_t)Mask);
-    }
-
-    /// \brief For a field or identifier offsetof node, returns the name of
-    /// the field.
-    IdentifierInfo *getFieldName() const;
-
-    /// \brief For a base class node, returns the base specifier.
-    CXXBaseSpecifier *getBase() const {
-      assert(getKind() == Base);
-      return reinterpret_cast<CXXBaseSpecifier *>(Data & ~(uintptr_t)Mask);
-    }
-
-    /// \brief Retrieve the source range that covers this offsetof node.
-    ///
-    /// For an array element node, the source range contains the locations of
-    /// the square brackets. For a field or identifier node, the source range
-    /// contains the location of the period (if there is one) and the
-    /// identifier.
-    SourceRange getSourceRange() const LLVM_READONLY { return Range; }
-    SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
-    SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
-  };
-
-private:
-
+class OffsetOfExpr final
+    : public Expr,
+      private llvm::TrailingObjects<OffsetOfExpr, OffsetOfNode, Expr *> {
   SourceLocation OperatorLoc, RParenLoc;
   // Base type;
   TypeSourceInfo *TSInfo;
@@ -1888,6 +1867,10 @@ private:
   // Number of sub-expressions (i.e. array subscript expressions).
   unsigned NumExprs;
 
+  size_t numTrailingObjects(OverloadToken<OffsetOfNode>) const {
+    return NumComps;
+  }
+
   OffsetOfExpr(const ASTContext &C, QualType type,
                SourceLocation OperatorLoc, TypeSourceInfo *tsi,
                ArrayRef<OffsetOfNode> comps, ArrayRef<Expr*> exprs,
@@ -1924,12 +1907,12 @@ public:
 
   const OffsetOfNode &getComponent(unsigned Idx) const {
     assert(Idx < NumComps && "Subscript out of range");
-    return reinterpret_cast<const OffsetOfNode *> (this + 1)[Idx];
+    return getTrailingObjects<OffsetOfNode>()[Idx];
   }
 
   void setComponent(unsigned Idx, OffsetOfNode ON) {
     assert(Idx < NumComps && "Subscript out of range");
-    reinterpret_cast<OffsetOfNode *> (this + 1)[Idx] = ON;
+    getTrailingObjects<OffsetOfNode>()[Idx] = ON;
   }
 
   unsigned getNumComponents() const {
@@ -1938,17 +1921,17 @@ public:
 
   Expr* getIndexExpr(unsigned Idx) {
     assert(Idx < NumExprs && "Subscript out of range");
-    return reinterpret_cast<Expr **>(
-                    reinterpret_cast<OffsetOfNode *>(this+1) + NumComps)[Idx];
+    return getTrailingObjects<Expr *>()[Idx];
   }
+
   const Expr *getIndexExpr(unsigned Idx) const {
-    return const_cast<OffsetOfExpr*>(this)->getIndexExpr(Idx);
+    assert(Idx < NumExprs && "Subscript out of range");
+    return getTrailingObjects<Expr *>()[Idx];
   }
 
   void setIndexExpr(unsigned Idx, Expr* E) {
     assert(Idx < NumComps && "Subscript out of range");
-    reinterpret_cast<Expr **>(
-                reinterpret_cast<OffsetOfNode *>(this+1) + NumComps)[Idx] = E;
+    getTrailingObjects<Expr *>()[Idx] = E;
   }
 
   unsigned getNumExpressions() const {
@@ -1964,11 +1947,10 @@ public:
 
   // Iterators
   child_range children() {
-    Stmt **begin =
-      reinterpret_cast<Stmt**>(reinterpret_cast<OffsetOfNode*>(this + 1)
-                               + NumComps);
+    Stmt **begin = reinterpret_cast<Stmt **>(getTrailingObjects<Expr *>());
     return child_range(begin, begin + NumExprs);
   }
+  friend TrailingObjects;
 };
 
 /// UnaryExprOrTypeTraitExpr - expression with either a type or (unevaluated)
@@ -2142,7 +2124,6 @@ public:
   }
 };
 
-
 /// CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
 /// CallExpr itself represents a normal function call, e.g., "f(x, 2)",
 /// while its subclasses may represent alternative syntax that (semantically)
@@ -2298,20 +2279,24 @@ public:
   }
 };
 
+/// Extra data stored in some MemberExpr objects.
+struct MemberExprNameQualifier {
+  /// \brief The nested-name-specifier that qualifies the name, including
+  /// source-location information.
+  NestedNameSpecifierLoc QualifierLoc;
+
+  /// \brief The DeclAccessPair through which the MemberDecl was found due to
+  /// name qualifiers.
+  DeclAccessPair FoundDecl;
+};
+
 /// MemberExpr - [C99 6.5.2.3] Structure and Union Members.  X->F and X.F.
 ///
-class MemberExpr : public Expr {
-  /// Extra data stored in some member expressions.
-  struct MemberNameQualifier {
-    /// \brief The nested-name-specifier that qualifies the name, including
-    /// source-location information.
-    NestedNameSpecifierLoc QualifierLoc;
-
-    /// \brief The DeclAccessPair through which the MemberDecl was found due to
-    /// name qualifiers.
-    DeclAccessPair FoundDecl;
-  };
-
+class MemberExpr final
+    : public Expr,
+      private llvm::TrailingObjects<MemberExpr, MemberExprNameQualifier,
+                                    ASTTemplateKWAndArgsInfo,
+                                    TemplateArgumentLoc> {
   /// Base - the expression for the base pointer or structure references.  In
   /// X.F, this is "X".
   Stmt *Base;
@@ -2335,7 +2320,7 @@ class MemberExpr : public Expr {
 
   /// \brief True if this member expression used a nested-name-specifier to
   /// refer to the member, e.g., "x->Base::f", or found its member via a using
-  /// declaration.  When true, a MemberNameQualifier
+  /// declaration.  When true, a MemberExprNameQualifier
   /// structure is allocated immediately after the MemberExpr.
   bool HasQualifierOrFoundDecl : 1;
 
@@ -2343,24 +2328,19 @@ class MemberExpr : public Expr {
   /// and/or a template argument list explicitly, e.g., x->f<int>,
   /// x->template f, x->template f<int>.
   /// When true, an ASTTemplateKWAndArgsInfo structure and its
-  /// TemplateArguments (if any) are allocated immediately after
-  /// the MemberExpr or, if the member expression also has a qualifier,
-  /// after the MemberNameQualifier structure.
+  /// TemplateArguments (if any) are present.
   bool HasTemplateKWAndArgsInfo : 1;
 
   /// \brief True if this member expression refers to a method that
   /// was resolved from an overloaded set having size greater than 1.
   bool HadMultipleCandidates : 1;
 
-  /// \brief Retrieve the qualifier that preceded the member name, if any.
-  MemberNameQualifier *getMemberQualifier() {
-    assert(HasQualifierOrFoundDecl);
-    return reinterpret_cast<MemberNameQualifier *> (this + 1);
+  size_t numTrailingObjects(OverloadToken<MemberExprNameQualifier>) const {
+    return HasQualifierOrFoundDecl ? 1 : 0;
   }
 
-  /// \brief Retrieve the qualifier that preceded the member name, if any.
-  const MemberNameQualifier *getMemberQualifier() const {
-    return const_cast<MemberExpr *>(this)->getMemberQualifier();
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
   }
 
 public:
@@ -2416,7 +2396,7 @@ public:
     if (!HasQualifierOrFoundDecl)
       return DeclAccessPair::make(getMemberDecl(),
                                   getMemberDecl()->getAccess());
-    return getMemberQualifier()->FoundDecl;
+    return getTrailingObjects<MemberExprNameQualifier>()->FoundDecl;
   }
 
   /// \brief Determines whether this member expression actually had
@@ -2424,62 +2404,42 @@ public:
   /// x->Base::foo.
   bool hasQualifier() const { return getQualifier() != nullptr; }
 
-  /// \brief If the member name was qualified, retrieves the
-  /// nested-name-specifier that precedes the member name. Otherwise, returns
-  /// NULL.
-  NestedNameSpecifier *getQualifier() const {
-    if (!HasQualifierOrFoundDecl)
-      return nullptr;
-
-    return getMemberQualifier()->QualifierLoc.getNestedNameSpecifier();
-  }
-
   /// \brief If the member name was qualified, retrieves the
   /// nested-name-specifier that precedes the member name, with source-location
   /// information.
   NestedNameSpecifierLoc getQualifierLoc() const {
-    if (!hasQualifier())
+    if (!HasQualifierOrFoundDecl)
       return NestedNameSpecifierLoc();
 
-    return getMemberQualifier()->QualifierLoc;
+    return getTrailingObjects<MemberExprNameQualifier>()->QualifierLoc;
   }
 
-  /// \brief Return the optional template keyword and arguments info.
-  ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() {
-    if (!HasTemplateKWAndArgsInfo)
-      return nullptr;
-
-    if (!HasQualifierOrFoundDecl)
-      return reinterpret_cast<ASTTemplateKWAndArgsInfo *>(this + 1);
-
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo *>(
-                                                      getMemberQualifier() + 1);
-  }
-
-  /// \brief Return the optional template keyword and arguments info.
-  const ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() const {
-    return const_cast<MemberExpr*>(this)->getTemplateKWAndArgsInfo();
+  /// \brief If the member name was qualified, retrieves the
+  /// nested-name-specifier that precedes the member name. Otherwise, returns
+  /// NULL.
+  NestedNameSpecifier *getQualifier() const {
+    return getQualifierLoc().getNestedNameSpecifier();
   }
 
   /// \brief Retrieve the location of the template keyword preceding
   /// the member name, if any.
   SourceLocation getTemplateKeywordLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->getTemplateKeywordLoc();
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->TemplateKWLoc;
   }
 
   /// \brief Retrieve the location of the left angle bracket starting the
   /// explicit template argument list following the member name, if any.
   SourceLocation getLAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->LAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->LAngleLoc;
   }
 
   /// \brief Retrieve the location of the right angle bracket ending the
   /// explicit template argument list following the member name, if any.
   SourceLocation getRAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->RAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->RAngleLoc;
   }
 
   /// Determines whether the member name was preceded by the template keyword.
@@ -2493,30 +2453,8 @@ public:
   /// structure.
   void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const {
     if (hasExplicitTemplateArgs())
-      getExplicitTemplateArgs().copyInto(List);
-  }
-
-  /// \brief Retrieve the explicit template argument list that
-  /// follow the member template name.  This must only be called on an
-  /// expression with explicit template arguments.
-  ASTTemplateArgumentListInfo &getExplicitTemplateArgs() {
-    assert(hasExplicitTemplateArgs());
-    return *getTemplateKWAndArgsInfo();
-  }
-
-  /// \brief Retrieve the explicit template argument list that
-  /// followed the member template name.  This must only be called on
-  /// an expression with explicit template arguments.
-  const ASTTemplateArgumentListInfo &getExplicitTemplateArgs() const {
-    return const_cast<MemberExpr *>(this)->getExplicitTemplateArgs();
-  }
-
-  /// \brief Retrieves the optional explicit template arguments.
-  /// This points to the same data as getExplicitTemplateArgs(), but
-  /// returns null if there are no explicit template arguments.
-  const ASTTemplateArgumentListInfo *getOptionalExplicitTemplateArgs() const {
-    if (!hasExplicitTemplateArgs()) return nullptr;
-    return &getExplicitTemplateArgs();
+      getTrailingObjects<ASTTemplateKWAndArgsInfo>()->copyInto(
+          getTrailingObjects<TemplateArgumentLoc>(), List);
   }
 
   /// \brief Retrieve the template arguments provided as part of this
@@ -2525,7 +2463,7 @@ public:
     if (!hasExplicitTemplateArgs())
       return nullptr;
 
-    return getExplicitTemplateArgs().getTemplateArgs();
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   /// \brief Retrieve the number of template arguments provided as part of this
@@ -2534,7 +2472,7 @@ public:
     if (!hasExplicitTemplateArgs())
       return 0;
 
-    return getExplicitTemplateArgs().NumTemplateArgs;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->NumTemplateArgs;
   }
 
   /// \brief Retrieve the member declaration name info.
@@ -2575,6 +2513,14 @@ public:
     HadMultipleCandidates = V;
   }
 
+  /// \brief Returns true if virtual dispatch is performed.
+  /// If the member access is fully qualified, (i.e. X::f()), virtual
+  /// dispatching is not performed. In -fapple-kext mode qualified
+  /// calls to virtual method will still go through the vtable.
+  bool performsVirtualDispatch(const LangOptions &LO) const {
+    return LO.AppleKext || !hasQualifier();
+  }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == MemberExprClass;
   }
@@ -2582,6 +2528,7 @@ public:
   // Iterators
   child_range children() { return child_range(&Base, &Base+1); }
 
+  friend TrailingObjects;
   friend class ASTReader;
   friend class ASTStmtWriter;
 };
@@ -2731,8 +2678,6 @@ public:
   path_const_iterator path_begin() const { return path_buffer(); }
   path_const_iterator path_end() const { return path_buffer() + path_size(); }
 
-  void setCastPath(const CXXCastPath &Path);
-
   static bool classof(const Stmt *T) {
     return T->getStmtClass() >= firstCastExprConstant &&
            T->getStmtClass() <= lastCastExprConstant;
@@ -2762,7 +2707,9 @@ public:
 ///                     // to an xvalue of type Base
 /// }
 /// @endcode
-class ImplicitCastExpr : public CastExpr {
+class ImplicitCastExpr final
+    : public CastExpr,
+      private llvm::TrailingObjects<ImplicitCastExpr, CXXBaseSpecifier *> {
 private:
   ImplicitCastExpr(QualType ty, CastKind kind, Expr *op,
                    unsigned BasePathLength, ExprValueKind VK)
@@ -2798,6 +2745,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == ImplicitCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 inline Expr *Expr::IgnoreImpCasts() {
@@ -2857,7 +2807,9 @@ public:
 /// CStyleCastExpr - An explicit cast in C (C99 6.5.4) or a C-style
 /// cast in C++ (C++ [expr.cast]), which uses the syntax
 /// (Type)expr. For example: @c (int)f.
-class CStyleCastExpr : public ExplicitCastExpr {
+class CStyleCastExpr final
+    : public ExplicitCastExpr,
+      private llvm::TrailingObjects<CStyleCastExpr, CXXBaseSpecifier *> {
   SourceLocation LPLoc; // the location of the left paren
   SourceLocation RPLoc; // the location of the right paren
 
@@ -2895,6 +2847,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CStyleCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// \brief A builtin binary operation expression such as "x + y" or "x <= y".
@@ -2989,7 +2944,10 @@ public:
 
   /// predicates to categorize the respective opcodes.
   bool isPtrMemOp() const { return Opc == BO_PtrMemD || Opc == BO_PtrMemI; }
-  bool isMultiplicativeOp() const { return Opc >= BO_Mul && Opc <= BO_Rem; }
+  static bool isMultiplicativeOp(Opcode Opc) {
+    return Opc >= BO_Mul && Opc <= BO_Rem;
+  }
+  bool isMultiplicativeOp() const { return isMultiplicativeOp(getOpcode()); }
   static bool isAdditiveOp(Opcode Opc) { return Opc == BO_Add || Opc==BO_Sub; }
   bool isAdditiveOp() const { return isAdditiveOp(getOpcode()); }
   static bool isShiftOp(Opcode Opc) { return Opc == BO_Shl || Opc == BO_Shr; }
@@ -3384,7 +3342,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// StmtExpr - This is the GNU Statement Expression extension: ({int X=4; X;}).
@@ -3429,7 +3389,6 @@ public:
   child_range children() { return child_range(&SubStmt, &SubStmt+1); }
 };
 
-
 /// ShuffleVectorExpr - clang-specific builtin-in function
 /// __builtin_shufflevector.
 /// This AST node represents a operator that does a constant
@@ -3663,36 +3622,40 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
-/// VAArgExpr, used for the builtin function __builtin_va_arg.
+/// Represents a call to the builtin function \c __builtin_va_arg.
 class VAArgExpr : public Expr {
   Stmt *Val;
-  TypeSourceInfo *TInfo;
+  llvm::PointerIntPair<TypeSourceInfo *, 1, bool> TInfo;
   SourceLocation BuiltinLoc, RParenLoc;
 public:
-  VAArgExpr(SourceLocation BLoc, Expr* e, TypeSourceInfo *TInfo,
-            SourceLocation RPLoc, QualType t)
-    : Expr(VAArgExprClass, t, VK_RValue, OK_Ordinary,
-           t->isDependentType(), false,
-           (TInfo->getType()->isInstantiationDependentType() ||
-            e->isInstantiationDependent()),
-           (TInfo->getType()->containsUnexpandedParameterPack() ||
-            e->containsUnexpandedParameterPack())),
-      Val(e), TInfo(TInfo),
-      BuiltinLoc(BLoc),
-      RParenLoc(RPLoc) { }
+  VAArgExpr(SourceLocation BLoc, Expr *e, TypeSourceInfo *TInfo,
+            SourceLocation RPLoc, QualType t, bool IsMS)
+      : Expr(VAArgExprClass, t, VK_RValue, OK_Ordinary, t->isDependentType(),
+             false, (TInfo->getType()->isInstantiationDependentType() ||
+                     e->isInstantiationDependent()),
+             (TInfo->getType()->containsUnexpandedParameterPack() ||
+              e->containsUnexpandedParameterPack())),
+        Val(e), TInfo(TInfo, IsMS), BuiltinLoc(BLoc), RParenLoc(RPLoc) {}
 
-  /// \brief Create an empty __builtin_va_arg expression.
-  explicit VAArgExpr(EmptyShell Empty) : Expr(VAArgExprClass, Empty) { }
+  /// Create an empty __builtin_va_arg expression.
+  explicit VAArgExpr(EmptyShell Empty)
+      : Expr(VAArgExprClass, Empty), Val(nullptr), TInfo(nullptr, false) {}
 
   const Expr *getSubExpr() const { return cast<Expr>(Val); }
   Expr *getSubExpr() { return cast<Expr>(Val); }
   void setSubExpr(Expr *E) { Val = E; }
 
-  TypeSourceInfo *getWrittenTypeInfo() const { return TInfo; }
-  void setWrittenTypeInfo(TypeSourceInfo *TI) { TInfo = TI; }
+  /// Returns whether this is really a Win64 ABI va_arg expression.
+  bool isMicrosoftABI() const { return TInfo.getInt(); }
+  void setIsMicrosoftABI(bool IsMS) { TInfo.setInt(IsMS); }
+
+  TypeSourceInfo *getWrittenTypeInfo() const { return TInfo.getPointer(); }
+  void setWrittenTypeInfo(TypeSourceInfo *TI) { TInfo.setPointer(TI); }
 
   SourceLocation getBuiltinLoc() const { return BuiltinLoc; }
   void setBuiltinLoc(SourceLocation L) { BuiltinLoc = L; }
@@ -3791,6 +3754,10 @@ public:
   /// \brief Retrieve the set of initializers.
   Expr **getInits() { return reinterpret_cast<Expr **>(InitExprs.data()); }
 
+  ArrayRef<Expr *> inits() {
+    return llvm::makeArrayRef(getInits(), getNumInits());
+  }
+
   const Expr *getInit(unsigned Init) const {
     assert(Init < getNumInits() && "Initializer access out of range!");
     return cast_or_null<Expr>(InitExprs[Init]);
@@ -3916,7 +3883,8 @@ public:
   // Iterators
   child_range children() {
     // FIXME: This does not include the array filler expression.
-    if (InitExprs.empty()) return child_range();
+    if (InitExprs.empty())
+      return child_range(child_iterator(), child_iterator());
     return child_range(&InitExprs[0], &InitExprs[0] + InitExprs.size());
   }
 
@@ -4293,7 +4261,9 @@ public:
   SourceLocation getLocEnd() const LLVM_READONLY { return SourceLocation(); }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 // In cases like:
@@ -4367,10 +4337,11 @@ public:
   SourceLocation getLocEnd() const LLVM_READONLY { return SourceLocation(); }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
-
 class ParenListExpr : public Expr {
   Stmt **Exprs;
   unsigned NumExprs;
@@ -4397,6 +4368,10 @@ public:
 
   Expr **getExprs() { return reinterpret_cast<Expr **>(Exprs); }
 
+  ArrayRef<Expr *> exprs() {
+    return llvm::makeArrayRef(getExprs(), getNumExprs());
+  }
+
   SourceLocation getLParenLoc() const { return LParenLoc; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
 
@@ -4416,7 +4391,6 @@ public:
   friend class ASTStmtWriter;
 };
 
-
 /// \brief Represents a C11 generic selection.
 ///
 /// A generic selection (C11 6.5.1.1) contains an unevaluated controlling
@@ -4532,7 +4506,6 @@ public:
 // Clang Extensions
 //===----------------------------------------------------------------------===//
 
-
 /// ExtVectorElementExpr - This represents access to specific elements of a
 /// vector, and may occur on the left hand side or right hand side.  For example
 /// the following is legal:  "V.xy = V.zw" if V is a 4 element extended vector.
@@ -4577,7 +4550,7 @@ public:
 
   /// getEncodedElementAccess - Encode the elements accessed into an llvm
   /// aggregate Constant of ConstantInt(s).
-  void getEncodedElementAccess(SmallVectorImpl<unsigned> &Elts) const;
+  void getEncodedElementAccess(SmallVectorImpl<uint32_t> &Elts) const;
 
   SourceLocation getLocStart() const LLVM_READONLY {
     return getBase()->getLocStart();
@@ -4596,7 +4569,6 @@ public:
   child_range children() { return child_range(&Base, &Base+1); }
 };
 
-
 /// BlockExpr - Adaptor class for mixing a BlockDecl with expressions.
 /// ^{ statement-body }   or   ^(int arg1, float arg2){ statement-body }
 class BlockExpr : public Expr {
@@ -4633,7 +4605,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// AsTypeExpr - Clang builtin function __builtin_astype [OpenCL 6.2.4.2]
@@ -4789,6 +4763,14 @@ public:
   const_semantics_iterator semantics_end() const {
     return getSubExprsBuffer() + getNumSubExprs();
   }
+
+  llvm::iterator_range<semantics_iterator> semantics() {
+    return llvm::make_range(semantics_begin(), semantics_end());
+  }
+  llvm::iterator_range<const_semantics_iterator> semantics() const {
+    return llvm::make_range(semantics_begin(), semantics_end());
+  }
+
   Expr *getSemanticExpr(unsigned index) {
     assert(index + 1 < getNumSubExprs());
     return getSubExprsBuffer()[index + 1];
@@ -4935,10 +4917,17 @@ public:
     assert(T->isDependentType() && "TypoExpr given a non-dependent type");
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
   SourceLocation getLocStart() const LLVM_READONLY { return SourceLocation(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return SourceLocation(); }
-};
-}  // end namespace clang
+  
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == TypoExprClass;
+  }
 
-#endif
+};
+} // end namespace clang
+
+#endif // LLVM_CLANG_AST_EXPR_H
diff --git a/include/clang/AST/ExprCXX.h b/include/clang/AST/ExprCXX.h
index 1dbf5743c124..707aeed14d52 100644
--- a/include/clang/AST/ExprCXX.h
+++ b/include/clang/AST/ExprCXX.h
@@ -235,7 +235,9 @@ public:
 ///
 /// This expression node represents a C++ static cast, e.g.,
 /// \c static_cast<int>(1.0).
-class CXXStaticCastExpr : public CXXNamedCastExpr {
+class CXXStaticCastExpr final
+    : public CXXNamedCastExpr,
+      private llvm::TrailingObjects<CXXStaticCastExpr, CXXBaseSpecifier *> {
   CXXStaticCastExpr(QualType ty, ExprValueKind vk, CastKind kind, Expr *op,
                     unsigned pathSize, TypeSourceInfo *writtenTy,
                     SourceLocation l, SourceLocation RParenLoc,
@@ -259,6 +261,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXStaticCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// \brief A C++ @c dynamic_cast expression (C++ [expr.dynamic.cast]).
@@ -266,7 +271,9 @@ public:
 /// This expression node represents a dynamic cast, e.g.,
 /// \c dynamic_cast<Derived*>(BasePtr). Such a cast may perform a run-time
 /// check to determine how to perform the type conversion.
-class CXXDynamicCastExpr : public CXXNamedCastExpr {
+class CXXDynamicCastExpr final
+    : public CXXNamedCastExpr,
+      private llvm::TrailingObjects<CXXDynamicCastExpr, CXXBaseSpecifier *> {
   CXXDynamicCastExpr(QualType ty, ExprValueKind VK, CastKind kind,
                      Expr *op, unsigned pathSize, TypeSourceInfo *writtenTy,
                      SourceLocation l, SourceLocation RParenLoc,
@@ -293,6 +300,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXDynamicCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// \brief A C++ @c reinterpret_cast expression (C++ [expr.reinterpret.cast]).
@@ -303,7 +313,10 @@ public:
 /// A reinterpret_cast provides a differently-typed view of a value but
 /// (in Clang, as in most C++ implementations) performs no actual work at
 /// run time.
-class CXXReinterpretCastExpr : public CXXNamedCastExpr {
+class CXXReinterpretCastExpr final
+    : public CXXNamedCastExpr,
+      private llvm::TrailingObjects<CXXReinterpretCastExpr,
+                                    CXXBaseSpecifier *> {
   CXXReinterpretCastExpr(QualType ty, ExprValueKind vk, CastKind kind,
                          Expr *op, unsigned pathSize,
                          TypeSourceInfo *writtenTy, SourceLocation l,
@@ -328,6 +341,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXReinterpretCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// \brief A C++ \c const_cast expression (C++ [expr.const.cast]).
@@ -337,7 +353,9 @@ public:
 ///
 /// A const_cast can remove type qualifiers but does not change the underlying
 /// value.
-class CXXConstCastExpr : public CXXNamedCastExpr {
+class CXXConstCastExpr final
+    : public CXXNamedCastExpr,
+      private llvm::TrailingObjects<CXXConstCastExpr, CXXBaseSpecifier *> {
   CXXConstCastExpr(QualType ty, ExprValueKind VK, Expr *op,
                    TypeSourceInfo *writtenTy, SourceLocation l,
                    SourceLocation RParenLoc, SourceRange AngleBrackets)
@@ -358,6 +376,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXConstCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// \brief A call to a literal operator (C++11 [over.literal])
@@ -457,7 +478,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief The null pointer literal (C++11 [lex.nullptr])
@@ -484,7 +507,9 @@ public:
     return T->getStmtClass() == CXXNullPtrLiteralExprClass;
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief Implicit construction of a std::initializer_list<T> object from an
@@ -607,7 +632,8 @@ public:
 
   // Iterators
   child_range children() {
-    if (isTypeOperand()) return child_range();
+    if (isTypeOperand())
+      return child_range(child_iterator(), child_iterator());
     Stmt **begin = reinterpret_cast<Stmt**>(&Operand);
     return child_range(begin, begin + 1);
   }
@@ -672,6 +698,69 @@ public:
   friend class ASTStmtReader;
 };
 
+/// MS property subscript expression.
+/// MSVC supports 'property' attribute and allows to apply it to the
+/// declaration of an empty array in a class or structure definition.
+/// For example:
+/// \code
+/// __declspec(property(get=GetX, put=PutX)) int x[];
+/// \endcode
+/// The above statement indicates that x[] can be used with one or more array
+/// indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b), and
+/// p->x[a][b] = i will be turned into p->PutX(a, b, i).
+/// This is a syntactic pseudo-object expression.
+class MSPropertySubscriptExpr : public Expr {
+  friend class ASTStmtReader;
+  enum { BASE_EXPR, IDX_EXPR, NUM_SUBEXPRS = 2 };
+  Stmt *SubExprs[NUM_SUBEXPRS];
+  SourceLocation RBracketLoc;
+
+  void setBase(Expr *Base) { SubExprs[BASE_EXPR] = Base; }
+  void setIdx(Expr *Idx) { SubExprs[IDX_EXPR] = Idx; }
+
+public:
+  MSPropertySubscriptExpr(Expr *Base, Expr *Idx, QualType Ty, ExprValueKind VK,
+                          ExprObjectKind OK, SourceLocation RBracketLoc)
+      : Expr(MSPropertySubscriptExprClass, Ty, VK, OK, Idx->isTypeDependent(),
+             Idx->isValueDependent(), Idx->isInstantiationDependent(),
+             Idx->containsUnexpandedParameterPack()),
+        RBracketLoc(RBracketLoc) {
+    SubExprs[BASE_EXPR] = Base;
+    SubExprs[IDX_EXPR] = Idx;
+  }
+
+  /// \brief Create an empty array subscript expression.
+  explicit MSPropertySubscriptExpr(EmptyShell Shell)
+      : Expr(MSPropertySubscriptExprClass, Shell) {}
+
+  Expr *getBase() { return cast<Expr>(SubExprs[BASE_EXPR]); }
+  const Expr *getBase() const { return cast<Expr>(SubExprs[BASE_EXPR]); }
+
+  Expr *getIdx() { return cast<Expr>(SubExprs[IDX_EXPR]); }
+  const Expr *getIdx() const { return cast<Expr>(SubExprs[IDX_EXPR]); }
+
+  SourceLocation getLocStart() const LLVM_READONLY {
+    return getBase()->getLocStart();
+  }
+  SourceLocation getLocEnd() const LLVM_READONLY { return RBracketLoc; }
+
+  SourceLocation getRBracketLoc() const { return RBracketLoc; }
+  void setRBracketLoc(SourceLocation L) { RBracketLoc = L; }
+
+  SourceLocation getExprLoc() const LLVM_READONLY {
+    return getBase()->getExprLoc();
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == MSPropertySubscriptExprClass;
+  }
+
+  // Iterators
+  child_range children() {
+    return child_range(&SubExprs[0], &SubExprs[0] + NUM_SUBEXPRS);
+  }
+};
+
 /// A Microsoft C++ @c __uuidof expression, which gets
 /// the _GUID that corresponds to the supplied type or expression.
 ///
@@ -749,7 +838,8 @@ public:
 
   // Iterators
   child_range children() {
-    if (isTypeOperand()) return child_range();
+    if (isTypeOperand())
+      return child_range(child_iterator(), child_iterator());
     Stmt **begin = reinterpret_cast<Stmt**>(&Operand);
     return child_range(begin, begin + 1);
   }
@@ -797,7 +887,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief A C++ throw-expression (C++ [except.throw]).
@@ -935,7 +1027,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
@@ -991,7 +1085,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTReader;
   friend class ASTStmtReader;
@@ -1238,7 +1334,9 @@ public:
 /// \code
 ///   x = int(0.5);
 /// \endcode
-class CXXFunctionalCastExpr : public ExplicitCastExpr {
+class CXXFunctionalCastExpr final
+    : public ExplicitCastExpr,
+      private llvm::TrailingObjects<CXXFunctionalCastExpr, CXXBaseSpecifier *> {
   SourceLocation LParenLoc;
   SourceLocation RParenLoc;
 
@@ -1275,6 +1373,9 @@ public:
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == CXXFunctionalCastExprClass;
   }
+
+  friend TrailingObjects;
+  friend class CastExpr;
 };
 
 /// @brief Represents a C++ functional cast expression that builds a
@@ -1401,25 +1502,39 @@ class LambdaExpr : public Expr {
       ExplicitResultType(false), HasArrayIndexVars(true) { 
     getStoredStmts()[NumCaptures] = nullptr;
   }
-  
-  Stmt **getStoredStmts() const {
-    return reinterpret_cast<Stmt **>(const_cast<LambdaExpr *>(this) + 1);
+
+  Stmt **getStoredStmts() { return reinterpret_cast<Stmt **>(this + 1); }
+
+  Stmt *const *getStoredStmts() const {
+    return reinterpret_cast<Stmt *const *>(this + 1);
   }
-  
+
   /// \brief Retrieve the mapping from captures to the first array index
   /// variable.
-  unsigned *getArrayIndexStarts() const {
+  unsigned *getArrayIndexStarts() {
     return reinterpret_cast<unsigned *>(getStoredStmts() + NumCaptures + 1);
   }
 
+  const unsigned *getArrayIndexStarts() const {
+    return reinterpret_cast<const unsigned *>(getStoredStmts() + NumCaptures +
+                                              1);
+  }
+
   /// \brief Retrieve the complete set of array-index variables.
-  VarDecl **getArrayIndexVars() const {
+  VarDecl **getArrayIndexVars() {
     unsigned ArrayIndexSize = llvm::RoundUpToAlignment(
         sizeof(unsigned) * (NumCaptures + 1), llvm::alignOf<VarDecl *>());
     return reinterpret_cast<VarDecl **>(
         reinterpret_cast<char *>(getArrayIndexStarts()) + ArrayIndexSize);
   }
 
+  VarDecl *const *getArrayIndexVars() const {
+    unsigned ArrayIndexSize = llvm::RoundUpToAlignment(
+        sizeof(unsigned) * (NumCaptures + 1), llvm::alignOf<VarDecl *>());
+    return reinterpret_cast<VarDecl *const *>(
+        reinterpret_cast<const char *>(getArrayIndexStarts()) + ArrayIndexSize);
+  }
+
 public:
   /// \brief Construct a new lambda expression.
   static LambdaExpr *Create(const ASTContext &C,
@@ -1501,31 +1616,52 @@ public:
   /// arguments.
   typedef Expr **capture_init_iterator;
 
+  /// \brief Const iterator that walks over the capture initialization
+  /// arguments.
+  typedef Expr *const *const_capture_init_iterator;
+
   /// \brief Retrieve the initialization expressions for this lambda's captures.
-  llvm::iterator_range<capture_init_iterator> capture_inits() const {
-    return llvm::iterator_range<capture_init_iterator>(capture_init_begin(),
-                                                       capture_init_end());
+  llvm::iterator_range<capture_init_iterator> capture_inits() {
+    return llvm::make_range(capture_init_begin(), capture_init_end());
+  }
+
+  /// \brief Retrieve the initialization expressions for this lambda's captures.
+  llvm::iterator_range<const_capture_init_iterator> capture_inits() const {
+    return llvm::make_range(capture_init_begin(), capture_init_end());
   }
 
   /// \brief Retrieve the first initialization argument for this
   /// lambda expression (which initializes the first capture field).
-  capture_init_iterator capture_init_begin() const {
+  capture_init_iterator capture_init_begin() {
     return reinterpret_cast<Expr **>(getStoredStmts());
   }
 
+  /// \brief Retrieve the first initialization argument for this
+  /// lambda expression (which initializes the first capture field).
+  const_capture_init_iterator capture_init_begin() const {
+    return reinterpret_cast<Expr *const *>(getStoredStmts());
+  }
+
   /// \brief Retrieve the iterator pointing one past the last
   /// initialization argument for this lambda expression.
-  capture_init_iterator capture_init_end() const {
-    return capture_init_begin() + NumCaptures;    
+  capture_init_iterator capture_init_end() {
+    return capture_init_begin() + NumCaptures;
   }
 
-  /// \brief Retrieve the set of index variables used in the capture 
+  /// \brief Retrieve the iterator pointing one past the last
+  /// initialization argument for this lambda expression.
+  const_capture_init_iterator capture_init_end() const {
+    return capture_init_begin() + NumCaptures;
+  }
+
+  /// \brief Retrieve the set of index variables used in the capture
   /// initializer of an array captured by copy.
   ///
-  /// \param Iter The iterator that points at the capture initializer for 
+  /// \param Iter The iterator that points at the capture initializer for
   /// which we are extracting the corresponding index variables.
-  ArrayRef<VarDecl *> getCaptureInitIndexVars(capture_init_iterator Iter) const;
-  
+  ArrayRef<VarDecl *>
+  getCaptureInitIndexVars(const_capture_init_iterator Iter) const;
+
   /// \brief Retrieve the source range covering the lambda introducer,
   /// which contains the explicit capture list surrounded by square
   /// brackets ([...]).
@@ -1615,7 +1751,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief Represents a new-expression for memory allocation and constructor
@@ -1770,6 +1908,14 @@ public:
   typedef ExprIterator arg_iterator;
   typedef ConstExprIterator const_arg_iterator;
 
+  llvm::iterator_range<arg_iterator> placement_arguments() {
+    return llvm::make_range(placement_arg_begin(), placement_arg_end());
+  }
+
+  llvm::iterator_range<const_arg_iterator> placement_arguments() const {
+    return llvm::make_range(placement_arg_begin(), placement_arg_end());
+  }
+
   arg_iterator placement_arg_begin() {
     return SubExprs + Array + hasInitializer();
   }
@@ -2164,8 +2310,10 @@ public:
   }
   
   // Iterators
-  child_range children() { return child_range(); }
-  
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 
@@ -2237,7 +2385,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTStmtReader;
 };
@@ -2294,7 +2444,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTStmtReader;
 };
@@ -2322,13 +2474,18 @@ protected:
   bool HasTemplateKWAndArgsInfo;
 
   /// \brief Return the optional template keyword and arguments info.
-  ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo(); // defined far below.
+  ASTTemplateKWAndArgsInfo *
+  getTrailingASTTemplateKWAndArgsInfo(); // defined far below.
 
   /// \brief Return the optional template keyword and arguments info.
-  const ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() const {
-    return const_cast<OverloadExpr*>(this)->getTemplateKWAndArgsInfo();
+  const ASTTemplateKWAndArgsInfo *getTrailingASTTemplateKWAndArgsInfo() const {
+    return const_cast<OverloadExpr *>(this)
+        ->getTrailingASTTemplateKWAndArgsInfo();
   }
 
+  /// Return the optional template arguments.
+  TemplateArgumentLoc *getTrailingTemplateArgumentLoc(); // defined far below
+
   OverloadExpr(StmtClass K, const ASTContext &C,
                NestedNameSpecifierLoc QualifierLoc,
                SourceLocation TemplateKWLoc,
@@ -2391,7 +2548,7 @@ public:
     return UnresolvedSetIterator(Results + NumResults);
   }
   llvm::iterator_range<decls_iterator> decls() const {
-    return llvm::iterator_range<decls_iterator>(decls_begin(), decls_end());
+    return llvm::make_range(decls_begin(), decls_end());
   }
 
   /// \brief Gets the number of declarations in the unresolved set.
@@ -2419,21 +2576,21 @@ public:
   /// this name, if any.
   SourceLocation getTemplateKeywordLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->getTemplateKeywordLoc();
+    return getTrailingASTTemplateKWAndArgsInfo()->TemplateKWLoc;
   }
 
   /// \brief Retrieve the location of the left angle bracket starting the
   /// explicit template argument list following the name, if any.
   SourceLocation getLAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->LAngleLoc;
+    return getTrailingASTTemplateKWAndArgsInfo()->LAngleLoc;
   }
 
   /// \brief Retrieve the location of the right angle bracket ending the
   /// explicit template argument list following the name, if any.
   SourceLocation getRAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->RAngleLoc;
+    return getTrailingASTTemplateKWAndArgsInfo()->RAngleLoc;
   }
 
   /// \brief Determines whether the name was preceded by the template keyword.
@@ -2442,39 +2599,23 @@ public:
   /// \brief Determines whether this expression had explicit template arguments.
   bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); }
 
-  // Note that, inconsistently with the explicit-template-argument AST
-  // nodes, users are *forbidden* from calling these methods on objects
-  // without explicit template arguments.
-
-  ASTTemplateArgumentListInfo &getExplicitTemplateArgs() {
-    assert(hasExplicitTemplateArgs());
-    return *getTemplateKWAndArgsInfo();
-  }
-
-  const ASTTemplateArgumentListInfo &getExplicitTemplateArgs() const {
-    return const_cast<OverloadExpr*>(this)->getExplicitTemplateArgs();
-  }
-
   TemplateArgumentLoc const *getTemplateArgs() const {
-    return getExplicitTemplateArgs().getTemplateArgs();
+    if (!hasExplicitTemplateArgs())
+      return nullptr;
+    return const_cast<OverloadExpr *>(this)->getTrailingTemplateArgumentLoc();
   }
 
   unsigned getNumTemplateArgs() const {
-    return getExplicitTemplateArgs().NumTemplateArgs;
+    if (!hasExplicitTemplateArgs())
+      return 0;
+
+    return getTrailingASTTemplateKWAndArgsInfo()->NumTemplateArgs;
   }
 
   /// \brief Copies the template arguments into the given structure.
   void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const {
-    getExplicitTemplateArgs().copyInto(List);
-  }
-
-  /// \brief Retrieves the optional explicit template arguments.
-  ///
-  /// This points to the same data as getExplicitTemplateArgs(), but
-  /// returns null if there are no explicit template arguments.
-  const ASTTemplateArgumentListInfo *getOptionalExplicitTemplateArgs() const {
-    if (!hasExplicitTemplateArgs()) return nullptr;
-    return &getExplicitTemplateArgs();
+    if (hasExplicitTemplateArgs())
+      getTrailingASTTemplateKWAndArgsInfo()->copyInto(getTemplateArgs(), List);
   }
 
   static bool classof(const Stmt *T) {
@@ -2497,7 +2638,10 @@ public:
 ///
 /// These never include UnresolvedUsingValueDecls, which are always class
 /// members and therefore appear only in UnresolvedMemberLookupExprs.
-class UnresolvedLookupExpr : public OverloadExpr {
+class UnresolvedLookupExpr final
+    : public OverloadExpr,
+      private llvm::TrailingObjects<
+          UnresolvedLookupExpr, ASTTemplateKWAndArgsInfo, TemplateArgumentLoc> {
   /// True if these lookup results should be extended by
   /// argument-dependent lookup if this is the operand of a function
   /// call.
@@ -2514,6 +2658,10 @@ class UnresolvedLookupExpr : public OverloadExpr {
   /// against the qualified-lookup bits.
   CXXRecordDecl *NamingClass;
 
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
+  }
+
   UnresolvedLookupExpr(const ASTContext &C,
                        CXXRecordDecl *NamingClass,
                        NestedNameSpecifierLoc QualifierLoc,
@@ -2533,6 +2681,8 @@ class UnresolvedLookupExpr : public OverloadExpr {
       RequiresADL(false), Overloaded(false), NamingClass(nullptr)
   {}
 
+  friend TrailingObjects;
+  friend class OverloadExpr;
   friend class ASTStmtReader;
 
 public:
@@ -2585,7 +2735,9 @@ public:
     return getNameInfo().getLocEnd();
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == UnresolvedLookupExprClass;
@@ -2606,7 +2758,11 @@ public:
 /// qualifier (X<T>::) and the name of the entity being referenced
 /// ("value"). Such expressions will instantiate to a DeclRefExpr once the
 /// declaration can be found.
-class DependentScopeDeclRefExpr : public Expr {
+class DependentScopeDeclRefExpr final
+    : public Expr,
+      private llvm::TrailingObjects<DependentScopeDeclRefExpr,
+                                    ASTTemplateKWAndArgsInfo,
+                                    TemplateArgumentLoc> {
   /// \brief The nested-name-specifier that qualifies this unresolved
   /// declaration name.
   NestedNameSpecifierLoc QualifierLoc;
@@ -2618,15 +2774,8 @@ class DependentScopeDeclRefExpr : public Expr {
   /// keyword and arguments.
   bool HasTemplateKWAndArgsInfo;
 
-  /// \brief Return the optional template keyword and arguments info.
-  ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() {
-    if (!HasTemplateKWAndArgsInfo) return nullptr;
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo*>(this + 1);
-  }
-  /// \brief Return the optional template keyword and arguments info.
-  const ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() const {
-    return const_cast<DependentScopeDeclRefExpr*>(this)
-      ->getTemplateKWAndArgsInfo();
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
   }
 
   DependentScopeDeclRefExpr(QualType T,
@@ -2671,21 +2820,21 @@ public:
   /// this name, if any.
   SourceLocation getTemplateKeywordLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->getTemplateKeywordLoc();
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->TemplateKWLoc;
   }
 
   /// \brief Retrieve the location of the left angle bracket starting the
   /// explicit template argument list following the name, if any.
   SourceLocation getLAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->LAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->LAngleLoc;
   }
 
   /// \brief Retrieve the location of the right angle bracket ending the
   /// explicit template argument list following the name, if any.
   SourceLocation getRAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->RAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->RAngleLoc;
   }
 
   /// Determines whether the name was preceded by the template keyword.
@@ -2694,42 +2843,26 @@ public:
   /// Determines whether this lookup had explicit template arguments.
   bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); }
 
-  // Note that, inconsistently with the explicit-template-argument AST
-  // nodes, users are *forbidden* from calling these methods on objects
-  // without explicit template arguments.
-
-  ASTTemplateArgumentListInfo &getExplicitTemplateArgs() {
-    assert(hasExplicitTemplateArgs());
-    return *reinterpret_cast<ASTTemplateArgumentListInfo*>(this + 1);
-  }
-
-  /// Gets a reference to the explicit template argument list.
-  const ASTTemplateArgumentListInfo &getExplicitTemplateArgs() const {
-    assert(hasExplicitTemplateArgs());
-    return *reinterpret_cast<const ASTTemplateArgumentListInfo*>(this + 1);
-  }
-
-  /// \brief Retrieves the optional explicit template arguments.
-  ///
-  /// This points to the same data as getExplicitTemplateArgs(), but
-  /// returns null if there are no explicit template arguments.
-  const ASTTemplateArgumentListInfo *getOptionalExplicitTemplateArgs() const {
-    if (!hasExplicitTemplateArgs()) return nullptr;
-    return &getExplicitTemplateArgs();
-  }
-
   /// \brief Copies the template arguments (if present) into the given
   /// structure.
   void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const {
-    getExplicitTemplateArgs().copyInto(List);
+    if (hasExplicitTemplateArgs())
+      getTrailingObjects<ASTTemplateKWAndArgsInfo>()->copyInto(
+          getTrailingObjects<TemplateArgumentLoc>(), List);
   }
 
   TemplateArgumentLoc const *getTemplateArgs() const {
-    return getExplicitTemplateArgs().getTemplateArgs();
+    if (!hasExplicitTemplateArgs())
+      return nullptr;
+
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   unsigned getNumTemplateArgs() const {
-    return getExplicitTemplateArgs().NumTemplateArgs;
+    if (!hasExplicitTemplateArgs())
+      return 0;
+
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->NumTemplateArgs;
   }
 
   /// Note: getLocStart() is the start of the whole DependentScopeDeclRefExpr,
@@ -2747,8 +2880,11 @@ public:
     return T->getStmtClass() == DependentScopeDeclRefExprClass;
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
+  friend TrailingObjects;
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 };
@@ -2951,7 +3087,11 @@ public:
 /// Like UnresolvedMemberExprs, these can be either implicit or
 /// explicit accesses.  It is only possible to get one of these with
 /// an implicit access if a qualifier is provided.
-class CXXDependentScopeMemberExpr : public Expr {
+class CXXDependentScopeMemberExpr final
+    : public Expr,
+      private llvm::TrailingObjects<CXXDependentScopeMemberExpr,
+                                    ASTTemplateKWAndArgsInfo,
+                                    TemplateArgumentLoc> {
   /// \brief The expression for the base pointer or class reference,
   /// e.g., the \c x in x.f.  Can be null in implicit accesses.
   Stmt *Base;
@@ -2989,15 +3129,8 @@ class CXXDependentScopeMemberExpr : public Expr {
   /// FIXME: could also be a template-id
   DeclarationNameInfo MemberNameInfo;
 
-  /// \brief Return the optional template keyword and arguments info.
-  ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() {
-    if (!HasTemplateKWAndArgsInfo) return nullptr;
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo*>(this + 1);
-  }
-  /// \brief Return the optional template keyword and arguments info.
-  const ASTTemplateKWAndArgsInfo *getTemplateKWAndArgsInfo() const {
-    return const_cast<CXXDependentScopeMemberExpr*>(this)
-      ->getTemplateKWAndArgsInfo();
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
   }
 
   CXXDependentScopeMemberExpr(const ASTContext &C, Expr *Base,
@@ -3093,21 +3226,21 @@ public:
   /// member name, if any.
   SourceLocation getTemplateKeywordLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->getTemplateKeywordLoc();
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->TemplateKWLoc;
   }
 
   /// \brief Retrieve the location of the left angle bracket starting the
   /// explicit template argument list following the member name, if any.
   SourceLocation getLAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->LAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->LAngleLoc;
   }
 
   /// \brief Retrieve the location of the right angle bracket ending the
   /// explicit template argument list following the member name, if any.
   SourceLocation getRAngleLoc() const {
     if (!HasTemplateKWAndArgsInfo) return SourceLocation();
-    return getTemplateKWAndArgsInfo()->RAngleLoc;
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->RAngleLoc;
   }
 
   /// Determines whether the member name was preceded by the template keyword.
@@ -3117,50 +3250,30 @@ public:
   /// template argument list explicitly specified, e.g., x.f<int>.
   bool hasExplicitTemplateArgs() const { return getLAngleLoc().isValid(); }
 
-  /// \brief Retrieve the explicit template argument list that followed the
-  /// member template name, if any.
-  ASTTemplateArgumentListInfo &getExplicitTemplateArgs() {
-    assert(hasExplicitTemplateArgs());
-    return *reinterpret_cast<ASTTemplateArgumentListInfo *>(this + 1);
-  }
-
-  /// \brief Retrieve the explicit template argument list that followed the
-  /// member template name, if any.
-  const ASTTemplateArgumentListInfo &getExplicitTemplateArgs() const {
-    return const_cast<CXXDependentScopeMemberExpr *>(this)
-             ->getExplicitTemplateArgs();
-  }
-
-  /// \brief Retrieves the optional explicit template arguments.
-  ///
-  /// This points to the same data as getExplicitTemplateArgs(), but
-  /// returns null if there are no explicit template arguments.
-  const ASTTemplateArgumentListInfo *getOptionalExplicitTemplateArgs() const {
-    if (!hasExplicitTemplateArgs()) return nullptr;
-    return &getExplicitTemplateArgs();
-  }
-
   /// \brief Copies the template arguments (if present) into the given
   /// structure.
   void copyTemplateArgumentsInto(TemplateArgumentListInfo &List) const {
-    getExplicitTemplateArgs().copyInto(List);
-  }
-
-  /// \brief Initializes the template arguments using the given structure.
-  void initializeTemplateArgumentsFrom(const TemplateArgumentListInfo &List) {
-    getExplicitTemplateArgs().initializeFrom(List);
+    if (hasExplicitTemplateArgs())
+      getTrailingObjects<ASTTemplateKWAndArgsInfo>()->copyInto(
+          getTrailingObjects<TemplateArgumentLoc>(), List);
   }
 
   /// \brief Retrieve the template arguments provided as part of this
   /// template-id.
   const TemplateArgumentLoc *getTemplateArgs() const {
-    return getExplicitTemplateArgs().getTemplateArgs();
+    if (!hasExplicitTemplateArgs())
+      return nullptr;
+
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   /// \brief Retrieve the number of template arguments provided as part of this
   /// template-id.
   unsigned getNumTemplateArgs() const {
-    return getExplicitTemplateArgs().NumTemplateArgs;
+    if (!hasExplicitTemplateArgs())
+      return 0;
+
+    return getTrailingObjects<ASTTemplateKWAndArgsInfo>()->NumTemplateArgs;
   }
 
   SourceLocation getLocStart() const LLVM_READONLY {
@@ -3169,8 +3282,8 @@ public:
     if (getQualifier())
       return getQualifierLoc().getBeginLoc();
     return MemberNameInfo.getBeginLoc();
-
   }
+
   SourceLocation getLocEnd() const LLVM_READONLY {
     if (hasExplicitTemplateArgs())
       return getRAngleLoc();
@@ -3183,10 +3296,12 @@ public:
 
   // Iterators
   child_range children() {
-    if (isImplicitAccess()) return child_range();
+    if (isImplicitAccess())
+      return child_range(child_iterator(), child_iterator());
     return child_range(&Base, &Base + 1);
   }
 
+  friend TrailingObjects;
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 };
@@ -3206,7 +3321,10 @@ public:
 /// In the final AST, an explicit access always becomes a MemberExpr.
 /// An implicit access may become either a MemberExpr or a
 /// DeclRefExpr, depending on whether the member is static.
-class UnresolvedMemberExpr : public OverloadExpr {
+class UnresolvedMemberExpr final
+    : public OverloadExpr,
+      private llvm::TrailingObjects<
+          UnresolvedMemberExpr, ASTTemplateKWAndArgsInfo, TemplateArgumentLoc> {
   /// \brief Whether this member expression used the '->' operator or
   /// the '.' operator.
   bool IsArrow : 1;
@@ -3227,6 +3345,10 @@ class UnresolvedMemberExpr : public OverloadExpr {
   /// \brief The location of the '->' or '.' operator.
   SourceLocation OperatorLoc;
 
+  size_t numTrailingObjects(OverloadToken<ASTTemplateKWAndArgsInfo>) const {
+    return HasTemplateKWAndArgsInfo ? 1 : 0;
+  }
+
   UnresolvedMemberExpr(const ASTContext &C, bool HasUnresolvedUsing,
                        Expr *Base, QualType BaseType, bool IsArrow,
                        SourceLocation OperatorLoc,
@@ -3240,6 +3362,8 @@ class UnresolvedMemberExpr : public OverloadExpr {
     : OverloadExpr(UnresolvedMemberExprClass, Empty), IsArrow(false),
       HasUnresolvedUsing(false), Base(nullptr) { }
 
+  friend TrailingObjects;
+  friend class OverloadExpr;
   friend class ASTStmtReader;
 
 public:
@@ -3325,11 +3449,34 @@ public:
 
   // Iterators
   child_range children() {
-    if (isImplicitAccess()) return child_range();
+    if (isImplicitAccess())
+      return child_range(child_iterator(), child_iterator());
     return child_range(&Base, &Base + 1);
   }
 };
 
+inline ASTTemplateKWAndArgsInfo *
+OverloadExpr::getTrailingASTTemplateKWAndArgsInfo() {
+  if (!HasTemplateKWAndArgsInfo)
+    return nullptr;
+
+  if (isa<UnresolvedLookupExpr>(this))
+    return cast<UnresolvedLookupExpr>(this)
+        ->getTrailingObjects<ASTTemplateKWAndArgsInfo>();
+  else
+    return cast<UnresolvedMemberExpr>(this)
+        ->getTrailingObjects<ASTTemplateKWAndArgsInfo>();
+}
+
+inline TemplateArgumentLoc *OverloadExpr::getTrailingTemplateArgumentLoc() {
+  if (isa<UnresolvedLookupExpr>(this))
+    return cast<UnresolvedLookupExpr>(this)
+        ->getTrailingObjects<TemplateArgumentLoc>();
+  else
+    return cast<UnresolvedMemberExpr>(this)
+        ->getTrailingObjects<TemplateArgumentLoc>();
+}
+
 /// \brief Represents a C++11 noexcept expression (C++ [expr.unary.noexcept]).
 ///
 /// The noexcept expression tests whether a given expression might throw. Its
@@ -3451,15 +3598,6 @@ public:
   }
 };
 
-inline ASTTemplateKWAndArgsInfo *OverloadExpr::getTemplateKWAndArgsInfo() {
-  if (!HasTemplateKWAndArgsInfo) return nullptr;
-  if (isa<UnresolvedLookupExpr>(this))
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo*>
-      (cast<UnresolvedLookupExpr>(this) + 1);
-  else
-    return reinterpret_cast<ASTTemplateKWAndArgsInfo*>
-      (cast<UnresolvedMemberExpr>(this) + 1);
-}
 
 /// \brief Represents an expression that computes the length of a parameter
 /// pack.
@@ -3482,43 +3620,51 @@ class SizeOfPackExpr : public Expr {
 
   /// \brief The length of the parameter pack, if known.
   ///
-  /// When this expression is value-dependent, the length of the parameter pack
-  /// is unknown. When this expression is not value-dependent, the length is
-  /// known.
+  /// When this expression is not value-dependent, this is the length of
+  /// the pack. When the expression was parsed rather than instantiated
+  /// (and thus is value-dependent), this is zero.
+  ///
+  /// After partial substitution into a sizeof...(X) expression (for instance,
+  /// within an alias template or during function template argument deduction),
+  /// we store a trailing array of partially-substituted TemplateArguments,
+  /// and this is the length of that array.
   unsigned Length;
 
-  /// \brief The parameter pack itself.
+  /// \brief The parameter pack.
   NamedDecl *Pack;
 
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
 
-public:
-  /// \brief Create a value-dependent expression that computes the length of
+  /// \brief Create an expression that computes the length of
   /// the given parameter pack.
-  SizeOfPackExpr(QualType SizeType, SourceLocation OperatorLoc, NamedDecl *Pack,
-                 SourceLocation PackLoc, SourceLocation RParenLoc)
-    : Expr(SizeOfPackExprClass, SizeType, VK_RValue, OK_Ordinary,
-           /*TypeDependent=*/false, /*ValueDependent=*/true,
-           /*InstantiationDependent=*/true,
-           /*ContainsUnexpandedParameterPack=*/false),
-      OperatorLoc(OperatorLoc), PackLoc(PackLoc), RParenLoc(RParenLoc),
-      Length(0), Pack(Pack) { }
-
-  /// \brief Create an expression that computes the length of
-  /// the given parameter pack, which is already known.
   SizeOfPackExpr(QualType SizeType, SourceLocation OperatorLoc, NamedDecl *Pack,
                  SourceLocation PackLoc, SourceLocation RParenLoc,
-                 unsigned Length)
-  : Expr(SizeOfPackExprClass, SizeType, VK_RValue, OK_Ordinary,
-         /*TypeDependent=*/false, /*ValueDependent=*/false,
-         /*InstantiationDependent=*/false,
-         /*ContainsUnexpandedParameterPack=*/false),
-    OperatorLoc(OperatorLoc), PackLoc(PackLoc), RParenLoc(RParenLoc),
-    Length(Length), Pack(Pack) { }
+                 Optional<unsigned> Length, ArrayRef<TemplateArgument> PartialArgs)
+      : Expr(SizeOfPackExprClass, SizeType, VK_RValue, OK_Ordinary,
+             /*TypeDependent=*/false, /*ValueDependent=*/!Length,
+             /*InstantiationDependent=*/!Length,
+             /*ContainsUnexpandedParameterPack=*/false),
+        OperatorLoc(OperatorLoc), PackLoc(PackLoc), RParenLoc(RParenLoc),
+        Length(Length ? *Length : PartialArgs.size()), Pack(Pack) {
+    assert((!Length || PartialArgs.empty()) &&
+           "have partial args for non-dependent sizeof... expression");
+    TemplateArgument *Args = reinterpret_cast<TemplateArgument *>(this + 1);
+    std::uninitialized_copy(PartialArgs.begin(), PartialArgs.end(), Args);
+  }
 
   /// \brief Create an empty expression.
-  SizeOfPackExpr(EmptyShell Empty) : Expr(SizeOfPackExprClass, Empty) { }
+  SizeOfPackExpr(EmptyShell Empty, unsigned NumPartialArgs)
+      : Expr(SizeOfPackExprClass, Empty), Length(NumPartialArgs), Pack() {}
+
+public:
+  static SizeOfPackExpr *Create(ASTContext &Context, SourceLocation OperatorLoc,
+                                NamedDecl *Pack, SourceLocation PackLoc,
+                                SourceLocation RParenLoc,
+                                Optional<unsigned> Length = None,
+                                ArrayRef<TemplateArgument> PartialArgs = None);
+  static SizeOfPackExpr *CreateDeserialized(ASTContext &Context,
+                                            unsigned NumPartialArgs);
 
   /// \brief Determine the location of the 'sizeof' keyword.
   SourceLocation getOperatorLoc() const { return OperatorLoc; }
@@ -3542,6 +3688,23 @@ public:
     return Length;
   }
 
+  /// \brief Determine whether this represents a partially-substituted sizeof...
+  /// expression, such as is produced for:
+  ///
+  ///   template<typename ...Ts> using X = int[sizeof...(Ts)];
+  ///   template<typename ...Us> void f(X<Us..., 1, 2, 3, Us...>);
+  bool isPartiallySubstituted() const {
+    return isValueDependent() && Length;
+  }
+
+  /// \brief Get
+  ArrayRef<TemplateArgument> getPartialArguments() const {
+    assert(isPartiallySubstituted());
+    const TemplateArgument *Args =
+        reinterpret_cast<const TemplateArgument *>(this + 1);
+    return llvm::makeArrayRef(Args, Args + Length);
+  }
+
   SourceLocation getLocStart() const LLVM_READONLY { return OperatorLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY { return RParenLoc; }
 
@@ -3550,7 +3713,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief Represents a reference to a non-type template parameter
@@ -3653,7 +3818,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief Represents a reference to a function parameter pack that has been
@@ -3682,7 +3849,7 @@ class FunctionParmPackExpr : public Expr {
 
   FunctionParmPackExpr(QualType T, ParmVarDecl *ParamPack,
                        SourceLocation NameLoc, unsigned NumParams,
-                       Decl * const *Params);
+                       ParmVarDecl *const *Params);
 
   friend class ASTReader;
   friend class ASTStmtReader;
@@ -3691,7 +3858,7 @@ public:
   static FunctionParmPackExpr *Create(const ASTContext &Context, QualType T,
                                       ParmVarDecl *ParamPack,
                                       SourceLocation NameLoc,
-                                      ArrayRef<Decl *> Params);
+                                      ArrayRef<ParmVarDecl *> Params);
   static FunctionParmPackExpr *CreateEmpty(const ASTContext &Context,
                                            unsigned NumParams);
 
@@ -3720,7 +3887,9 @@ public:
     return T->getStmtClass() == FunctionParmPackExprClass;
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief Represents a prvalue temporary that is written into memory so that
@@ -3901,6 +4070,136 @@ public:
   child_range children() { return child_range(SubExprs, SubExprs + 2); }
 };
 
+/// \brief Represents an expression that might suspend coroutine execution;
+/// either a co_await or co_yield expression.
+///
+/// Evaluation of this expression first evaluates its 'ready' expression. If
+/// that returns 'false':
+///  -- execution of the coroutine is suspended
+///  -- the 'suspend' expression is evaluated
+///     -- if the 'suspend' expression returns 'false', the coroutine is
+///        resumed
+///     -- otherwise, control passes back to the resumer.
+/// If the coroutine is not suspended, or when it is resumed, the 'resume'
+/// expression is evaluated, and its result is the result of the overall
+/// expression.
+class CoroutineSuspendExpr : public Expr {
+  SourceLocation KeywordLoc;
+
+  enum SubExpr { Common, Ready, Suspend, Resume, Count };
+  Stmt *SubExprs[SubExpr::Count];
+
+  friend class ASTStmtReader;
+public:
+  CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, Expr *Common,
+                       Expr *Ready, Expr *Suspend, Expr *Resume)
+      : Expr(SC, Resume->getType(), Resume->getValueKind(),
+             Resume->getObjectKind(), Resume->isTypeDependent(),
+             Resume->isValueDependent(), Common->isInstantiationDependent(),
+             Common->containsUnexpandedParameterPack()),
+        KeywordLoc(KeywordLoc) {
+    SubExprs[SubExpr::Common] = Common;
+    SubExprs[SubExpr::Ready] = Ready;
+    SubExprs[SubExpr::Suspend] = Suspend;
+    SubExprs[SubExpr::Resume] = Resume;
+  }
+  CoroutineSuspendExpr(StmtClass SC, SourceLocation KeywordLoc, QualType Ty,
+                       Expr *Common)
+      : Expr(SC, Ty, VK_RValue, OK_Ordinary, true, true, true,
+             Common->containsUnexpandedParameterPack()),
+        KeywordLoc(KeywordLoc) {
+    assert(Common->isTypeDependent() && Ty->isDependentType() &&
+           "wrong constructor for non-dependent co_await/co_yield expression");
+    SubExprs[SubExpr::Common] = Common;
+    SubExprs[SubExpr::Ready] = nullptr;
+    SubExprs[SubExpr::Suspend] = nullptr;
+    SubExprs[SubExpr::Resume] = nullptr;
+  }
+  CoroutineSuspendExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {
+    SubExprs[SubExpr::Common] = nullptr;
+    SubExprs[SubExpr::Ready] = nullptr;
+    SubExprs[SubExpr::Suspend] = nullptr;
+    SubExprs[SubExpr::Resume] = nullptr;
+  }
+
+  SourceLocation getKeywordLoc() const { return KeywordLoc; }
+  Expr *getCommonExpr() const {
+    return static_cast<Expr*>(SubExprs[SubExpr::Common]);
+  }
+
+  Expr *getReadyExpr() const {
+    return static_cast<Expr*>(SubExprs[SubExpr::Ready]);
+  }
+  Expr *getSuspendExpr() const {
+    return static_cast<Expr*>(SubExprs[SubExpr::Suspend]);
+  }
+  Expr *getResumeExpr() const {
+    return static_cast<Expr*>(SubExprs[SubExpr::Resume]);
+  }
+
+  SourceLocation getLocStart() const LLVM_READONLY {
+    return KeywordLoc;
+  }
+  SourceLocation getLocEnd() const LLVM_READONLY {
+    return getCommonExpr()->getLocEnd();
+  }
+
+  child_range children() {
+    return child_range(SubExprs, SubExprs + SubExpr::Count);
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == CoawaitExprClass ||
+           T->getStmtClass() == CoyieldExprClass;
+  }
+};
+
+/// \brief Represents a 'co_await' expression.
+class CoawaitExpr : public CoroutineSuspendExpr {
+  friend class ASTStmtReader;
+public:
+  CoawaitExpr(SourceLocation CoawaitLoc, Expr *Operand, Expr *Ready,
+              Expr *Suspend, Expr *Resume)
+      : CoroutineSuspendExpr(CoawaitExprClass, CoawaitLoc, Operand, Ready,
+                             Suspend, Resume) {}
+  CoawaitExpr(SourceLocation CoawaitLoc, QualType Ty, Expr *Operand)
+      : CoroutineSuspendExpr(CoawaitExprClass, CoawaitLoc, Ty, Operand) {}
+  CoawaitExpr(EmptyShell Empty)
+      : CoroutineSuspendExpr(CoawaitExprClass, Empty) {}
+
+  Expr *getOperand() const {
+    // FIXME: Dig out the actual operand or store it.
+    return getCommonExpr();
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == CoawaitExprClass;
+  }
+};
+
+/// \brief Represents a 'co_yield' expression.
+class CoyieldExpr : public CoroutineSuspendExpr {
+  friend class ASTStmtReader;
+public:
+  CoyieldExpr(SourceLocation CoyieldLoc, Expr *Operand, Expr *Ready,
+              Expr *Suspend, Expr *Resume)
+      : CoroutineSuspendExpr(CoyieldExprClass, CoyieldLoc, Operand, Ready,
+                             Suspend, Resume) {}
+  CoyieldExpr(SourceLocation CoyieldLoc, QualType Ty, Expr *Operand)
+      : CoroutineSuspendExpr(CoyieldExprClass, CoyieldLoc, Ty, Operand) {}
+  CoyieldExpr(EmptyShell Empty)
+      : CoroutineSuspendExpr(CoyieldExprClass, Empty) {}
+
+  Expr *getOperand() const {
+    // FIXME: Dig out the actual operand or store it.
+    return getCommonExpr();
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == CoyieldExprClass;
+  }
+};
+
 }  // end namespace clang
 
 #endif
diff --git a/include/clang/AST/ExprObjC.h b/include/clang/AST/ExprObjC.h
index f28e5196c7a4..38fa718620f2 100644
--- a/include/clang/AST/ExprObjC.h
+++ b/include/clang/AST/ExprObjC.h
@@ -82,7 +82,9 @@ public:
   }
     
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// ObjCBoxedExpr - used for generalized expression boxing.
@@ -341,6 +343,8 @@ public:
   child_range children() { 
     // Note: we're taking advantage of the layout of the KeyValuePair struct
     // here. If that struct changes, this code will need to change as well.
+    static_assert(sizeof(KeyValuePair) == sizeof(Stmt *) * 2,
+                  "KeyValuePair is expected size");
     return child_range(reinterpret_cast<Stmt **>(this + 1),
                        reinterpret_cast<Stmt **>(this + 1) + NumElements * 2);
   }
@@ -389,7 +393,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// ObjCSelectorExpr used for \@selector in Objective-C.
@@ -424,7 +430,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// ObjCProtocolExpr used for protocol expression in Objective-C.
@@ -464,7 +472,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
@@ -713,7 +723,7 @@ public:
       Stmt **begin = reinterpret_cast<Stmt**>(&Receiver); // hack!
       return child_range(begin, begin+1);
     }
-    return child_range();
+    return child_range(child_iterator(), child_iterator());
   }
 
 private:
@@ -1350,6 +1360,14 @@ public:
   typedef ExprIterator arg_iterator;
   typedef ConstExprIterator const_arg_iterator;
 
+  llvm::iterator_range<arg_iterator> arguments() {
+    return llvm::make_range(arg_begin(), arg_end());
+  }
+
+  llvm::iterator_range<const_arg_iterator> arguments() const {
+    return llvm::make_range(arg_begin(), arg_end());
+  }
+
   arg_iterator arg_begin() { return reinterpret_cast<Stmt **>(getArgs()); }
   arg_iterator arg_end()   { 
     return reinterpret_cast<Stmt **>(getArgs() + NumArgs); 
@@ -1503,11 +1521,15 @@ public:
 /// \code
 /// NSString *str = (__bridge_transfer NSString *)CFCreateString();
 /// \endcode
-class ObjCBridgedCastExpr : public ExplicitCastExpr {
+class ObjCBridgedCastExpr final
+    : public ExplicitCastExpr,
+      private llvm::TrailingObjects<ObjCBridgedCastExpr, CXXBaseSpecifier *> {
   SourceLocation LParenLoc;
   SourceLocation BridgeKeywordLoc;
   unsigned Kind : 2;
-  
+
+  friend TrailingObjects;
+  friend class CastExpr;
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
   
diff --git a/include/clang/AST/ExprOpenMP.h b/include/clang/AST/ExprOpenMP.h
new file mode 100644
index 000000000000..2d71a3ad47c7
--- /dev/null
+++ b/include/clang/AST/ExprOpenMP.h
@@ -0,0 +1,129 @@
+//===--- ExprOpenMP.h - Classes for representing expressions ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the Expr interface and subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_AST_EXPROPENMP_H
+#define LLVM_CLANG_AST_EXPROPENMP_H
+
+#include "clang/AST/Expr.h"
+
+namespace clang {
+/// \brief OpenMP 4.0 [2.4, Array Sections].
+/// To specify an array section in an OpenMP construct, array subscript
+/// expressions are extended with the following syntax:
+/// \code
+/// [ lower-bound : length ]
+/// [ lower-bound : ]
+/// [ : length ]
+/// [ : ]
+/// \endcode
+/// The array section must be a subset of the original array.
+/// Array sections are allowed on multidimensional arrays. Base language array
+/// subscript expressions can be used to specify length-one dimensions of
+/// multidimensional array sections.
+/// The lower-bound and length are integral type expressions. When evaluated
+/// they represent a set of integer values as follows:
+/// \code
+/// { lower-bound, lower-bound + 1, lower-bound + 2,... , lower-bound + length -
+/// 1 }
+/// \endcode
+/// The lower-bound and length must evaluate to non-negative integers.
+/// When the size of the array dimension is not known, the length must be
+/// specified explicitly.
+/// When the length is absent, it defaults to the size of the array dimension
+/// minus the lower-bound.
+/// When the lower-bound is absent it defaults to 0.
+class OMPArraySectionExpr : public Expr {
+  enum { BASE, LOWER_BOUND, LENGTH, END_EXPR };
+  Stmt *SubExprs[END_EXPR];
+  SourceLocation ColonLoc;
+  SourceLocation RBracketLoc;
+
+public:
+  OMPArraySectionExpr(Expr *Base, Expr *LowerBound, Expr *Length, QualType Type,
+                      ExprValueKind VK, ExprObjectKind OK,
+                      SourceLocation ColonLoc, SourceLocation RBracketLoc)
+      : Expr(
+            OMPArraySectionExprClass, Type, VK, OK,
+            Base->isTypeDependent() ||
+                (LowerBound && LowerBound->isTypeDependent()) ||
+                (Length && Length->isTypeDependent()),
+            Base->isValueDependent() ||
+                (LowerBound && LowerBound->isValueDependent()) ||
+                (Length && Length->isValueDependent()),
+            Base->isInstantiationDependent() ||
+                (LowerBound && LowerBound->isInstantiationDependent()) ||
+                (Length && Length->isInstantiationDependent()),
+            Base->containsUnexpandedParameterPack() ||
+                (LowerBound && LowerBound->containsUnexpandedParameterPack()) ||
+                (Length && Length->containsUnexpandedParameterPack())),
+        ColonLoc(ColonLoc), RBracketLoc(RBracketLoc) {
+    SubExprs[BASE] = Base;
+    SubExprs[LOWER_BOUND] = LowerBound;
+    SubExprs[LENGTH] = Length;
+  }
+
+  /// \brief Create an empty array section expression.
+  explicit OMPArraySectionExpr(EmptyShell Shell)
+      : Expr(OMPArraySectionExprClass, Shell) {}
+
+  /// An array section can be written only as Base[LowerBound:Length].
+
+  /// \brief Get base of the array section.
+  Expr *getBase() { return cast<Expr>(SubExprs[BASE]); }
+  const Expr *getBase() const { return cast<Expr>(SubExprs[BASE]); }
+  /// \brief Set base of the array section.
+  void setBase(Expr *E) { SubExprs[BASE] = E; }
+
+  /// \brief Return original type of the base expression for array section.
+  static QualType getBaseOriginalType(Expr *Base);
+
+  /// \brief Get lower bound of array section.
+  Expr *getLowerBound() { return cast_or_null<Expr>(SubExprs[LOWER_BOUND]); }
+  const Expr *getLowerBound() const {
+    return cast_or_null<Expr>(SubExprs[LOWER_BOUND]);
+  }
+  /// \brief Set lower bound of the array section.
+  void setLowerBound(Expr *E) { SubExprs[LOWER_BOUND] = E; }
+
+  /// \brief Get length of array section.
+  Expr *getLength() { return cast_or_null<Expr>(SubExprs[LENGTH]); }
+  const Expr *getLength() const { return cast_or_null<Expr>(SubExprs[LENGTH]); }
+  /// \brief Set length of the array section.
+  void setLength(Expr *E) { SubExprs[LENGTH] = E; }
+
+  SourceLocation getLocStart() const LLVM_READONLY {
+    return getBase()->getLocStart();
+  }
+  SourceLocation getLocEnd() const LLVM_READONLY { return RBracketLoc; }
+
+  SourceLocation getColonLoc() const { return ColonLoc; }
+  void setColonLoc(SourceLocation L) { ColonLoc = L; }
+
+  SourceLocation getRBracketLoc() const { return RBracketLoc; }
+  void setRBracketLoc(SourceLocation L) { RBracketLoc = L; }
+
+  SourceLocation getExprLoc() const LLVM_READONLY {
+    return getBase()->getExprLoc();
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPArraySectionExprClass;
+  }
+
+  child_range children() {
+    return child_range(&SubExprs[BASE], &SubExprs[END_EXPR]);
+  }
+};
+} // end namespace clang
+
+#endif
diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h
index 08c2e0cf267f..81cf631c7a4a 100644
--- a/include/clang/AST/ExternalASTSource.h
+++ b/include/clang/AST/ExternalASTSource.h
@@ -33,20 +33,6 @@ class Selector;
 class Stmt;
 class TagDecl;
 
-/// \brief Enumeration describing the result of loading information from
-/// an external source.
-enum ExternalLoadResult {
-  /// \brief Loading the external information has succeeded.
-  ELR_Success,
-  
-  /// \brief Loading the external information has failed.
-  ELR_Failure,
-  
-  /// \brief The external information has already been loaded, and therefore
-  /// no additional processing is required.
-  ELR_AlreadyLoaded
-};
-
 /// \brief Abstract interface for external sources of AST nodes.
 ///
 /// External AST sources provide AST nodes constructed from some
@@ -156,47 +142,50 @@ public:
   /// \brief Retrieve the module that corresponds to the given module ID.
   virtual Module *getModule(unsigned ID) { return nullptr; }
 
-  /// \brief Holds everything needed to generate debug info for an
-  /// imported module or precompiled header file.
-  struct ASTSourceDescriptor {
-    std::string ModuleName;
-    std::string Path;
-    std::string ASTFile;
-    uint64_t Signature;
+  /// Abstracts clang modules and precompiled header files and holds
+  /// everything needed to generate debug info for an imported module
+  /// or PCH.
+  class ASTSourceDescriptor {
+    StringRef PCHModuleName;
+    StringRef Path;
+    StringRef ASTFile;
+    uint64_t Signature = 0;
+    const Module *ClangModule = nullptr;
+
+  public:
+    ASTSourceDescriptor(){};
+    ASTSourceDescriptor(StringRef Name, StringRef Path, StringRef ASTFile,
+                        uint64_t Signature)
+        : PCHModuleName(std::move(Name)), Path(std::move(Path)),
+          ASTFile(std::move(ASTFile)), Signature(Signature){};
+    ASTSourceDescriptor(const Module &M);
+    std::string getModuleName() const;
+    StringRef getPath() const { return Path; }
+    StringRef getASTFile() const { return ASTFile; }
+    uint64_t getSignature() const { return Signature; }
+    const Module *getModuleOrNull() const { return ClangModule; }
   };
 
-  /// \brief Return a descriptor for the corresponding module, if one exists.
+  /// Return a descriptor for the corresponding module, if one exists.
   virtual llvm::Optional<ASTSourceDescriptor> getSourceDescriptor(unsigned ID);
-  /// \brief Return a descriptor for the module.
-  virtual ASTSourceDescriptor getSourceDescriptor(const Module &M);
 
   /// \brief Finds all declarations lexically contained within the given
   /// DeclContext, after applying an optional filter predicate.
   ///
-  /// \param isKindWeWant a predicate function that returns true if the passed
-  /// declaration kind is one we are looking for. If NULL, all declarations
-  /// are returned.
-  ///
-  /// \return an indication of whether the load succeeded or failed.
+  /// \param IsKindWeWant a predicate function that returns true if the passed
+  /// declaration kind is one we are looking for.
   ///
   /// The default implementation of this method is a no-op.
-  virtual ExternalLoadResult FindExternalLexicalDecls(const DeclContext *DC,
-                                        bool (*isKindWeWant)(Decl::Kind),
-                                        SmallVectorImpl<Decl*> &Result);
+  virtual void
+  FindExternalLexicalDecls(const DeclContext *DC,
+                           llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+                           SmallVectorImpl<Decl *> &Result);
 
   /// \brief Finds all declarations lexically contained within the given
   /// DeclContext.
-  ///
-  /// \return true if an error occurred
-  ExternalLoadResult FindExternalLexicalDecls(const DeclContext *DC,
-                                SmallVectorImpl<Decl*> &Result) {
-    return FindExternalLexicalDecls(DC, nullptr, Result);
-  }
-
-  template <typename DeclTy>
-  ExternalLoadResult FindExternalLexicalDeclsBy(const DeclContext *DC,
-                                  SmallVectorImpl<Decl*> &Result) {
-    return FindExternalLexicalDecls(DC, DeclTy::classofKind, Result);
+  void FindExternalLexicalDecls(const DeclContext *DC,
+                                SmallVectorImpl<Decl *> &Result) {
+    FindExternalLexicalDecls(DC, [](Decl::Kind) { return true; }, Result);
   }
 
   /// \brief Get the decls that are contained in a file in the Offset/Length
diff --git a/include/clang/AST/Mangle.h b/include/clang/AST/Mangle.h
index 735ae11a2339..7b725b65ca1e 100644
--- a/include/clang/AST/Mangle.h
+++ b/include/clang/AST/Mangle.h
@@ -144,9 +144,6 @@ public:
   /// across translation units so it can be used with LTO.
   virtual void mangleTypeName(QualType T, raw_ostream &) = 0;
 
-  virtual void mangleCXXVTableBitSet(const CXXRecordDecl *RD,
-                                     raw_ostream &) = 0;
-
   /// @}
 };
 
diff --git a/include/clang/AST/NestedNameSpecifier.h b/include/clang/AST/NestedNameSpecifier.h
index 4da17b0c0779..b1ff9bdff589 100644
--- a/include/clang/AST/NestedNameSpecifier.h
+++ b/include/clang/AST/NestedNameSpecifier.h
@@ -217,7 +217,8 @@ public:
 
   /// \brief Dump the nested name specifier to standard output to aid
   /// in debugging.
-  void dump(const LangOptions &LO);
+  void dump(const LangOptions &LO) const;
+  void dump() const;
 };
 
 /// \brief A C++ nested-name-specifier augmented with source location
diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h
index fcfa1dd4753e..bb982f3acf64 100644
--- a/include/clang/AST/OpenMPClause.h
+++ b/include/clang/AST/OpenMPClause.h
@@ -57,9 +57,15 @@ public:
 
   bool isImplicit() const { return StartLoc.isInvalid(); }
 
-  StmtRange children();
-  ConstStmtRange children() const {
-    return const_cast<OMPClause *>(this)->children();
+  typedef StmtIterator child_iterator;
+  typedef ConstStmtIterator const_child_iterator;
+  typedef llvm::iterator_range<child_iterator> child_range;
+  typedef llvm::iterator_range<const_child_iterator> const_child_range;
+
+  child_range children();
+  const_child_range children() const {
+    auto Children = const_cast<OMPClause *>(this)->children();
+    return const_child_range(Children.begin(), Children.end());
   }
   static bool classof(const OMPClause *) { return true; }
 };
@@ -146,10 +152,10 @@ public:
 /// \brief This represents 'if' clause in the '#pragma omp ...' directive.
 ///
 /// \code
-/// #pragma omp parallel if(a > 5)
+/// #pragma omp parallel if(parallel:a > 5)
 /// \endcode
-/// In this example directive '#pragma omp parallel' has simple 'if'
-/// clause with condition 'a > 5'.
+/// In this example directive '#pragma omp parallel' has simple 'if' clause with
+/// condition 'a > 5' and directive name modifier 'parallel'.
 ///
 class OMPIfClause : public OMPClause {
   friend class OMPClauseReader;
@@ -157,43 +163,73 @@ class OMPIfClause : public OMPClause {
   SourceLocation LParenLoc;
   /// \brief Condition of the 'if' clause.
   Stmt *Condition;
+  /// \brief Location of ':' (if any).
+  SourceLocation ColonLoc;
+  /// \brief Directive name modifier for the clause.
+  OpenMPDirectiveKind NameModifier;
+  /// \brief Name modifier location.
+  SourceLocation NameModifierLoc;
 
   /// \brief Set condition.
   ///
   void setCondition(Expr *Cond) { Condition = Cond; }
+  /// \brief Set directive name modifier for the clause.
+  ///
+  void setNameModifier(OpenMPDirectiveKind NM) { NameModifier = NM; }
+  /// \brief Set location of directive name modifier for the clause.
+  ///
+  void setNameModifierLoc(SourceLocation Loc) { NameModifierLoc = Loc; }
+  /// \brief Set location of ':'.
+  ///
+  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
 
 public:
   /// \brief Build 'if' clause with condition \a Cond.
   ///
+  /// \param NameModifier [OpenMP 4.1] Directive name modifier of clause.
+  /// \param Cond Condition of the clause.
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
-  /// \param Cond Condition of the clause.
+  /// \param NameModifierLoc Location of directive name modifier.
+  /// \param ColonLoc [OpenMP 4.1] Location of ':'.
   /// \param EndLoc Ending location of the clause.
   ///
-  OMPIfClause(Expr *Cond, SourceLocation StartLoc, SourceLocation LParenLoc,
+  OMPIfClause(OpenMPDirectiveKind NameModifier, Expr *Cond,
+              SourceLocation StartLoc, SourceLocation LParenLoc,
+              SourceLocation NameModifierLoc, SourceLocation ColonLoc,
               SourceLocation EndLoc)
       : OMPClause(OMPC_if, StartLoc, EndLoc), LParenLoc(LParenLoc),
-        Condition(Cond) {}
+        Condition(Cond), ColonLoc(ColonLoc), NameModifier(NameModifier),
+        NameModifierLoc(NameModifierLoc) {}
 
   /// \brief Build an empty clause.
   ///
   OMPIfClause()
-      : OMPClause(OMPC_if, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), Condition(nullptr) {}
+      : OMPClause(OMPC_if, SourceLocation(), SourceLocation()), LParenLoc(),
+        Condition(nullptr), ColonLoc(), NameModifier(OMPD_unknown),
+        NameModifierLoc() {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
   /// \brief Returns the location of '('.
   SourceLocation getLParenLoc() const { return LParenLoc; }
 
+  /// \brief Return the location of ':'.
+  SourceLocation getColonLoc() const { return ColonLoc; }
+
   /// \brief Returns condition.
   Expr *getCondition() const { return cast_or_null<Expr>(Condition); }
+  /// \brief Return directive name modifier associated with the clause.
+  OpenMPDirectiveKind getNameModifier() const { return NameModifier; }
+
+  /// \brief Return the location of directive name modifier.
+  SourceLocation getNameModifierLoc() const { return NameModifierLoc; }
 
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_if;
   }
 
-  StmtRange children() { return StmtRange(&Condition, &Condition + 1); }
+  child_range children() { return child_range(&Condition, &Condition + 1); }
 };
 
 /// \brief This represents 'final' clause in the '#pragma omp ...' directive.
@@ -246,7 +282,7 @@ public:
     return T->getClauseKind() == OMPC_final;
   }
 
-  StmtRange children() { return StmtRange(&Condition, &Condition + 1); }
+  child_range children() { return child_range(&Condition, &Condition + 1); }
 };
 
 /// \brief This represents 'num_threads' clause in the '#pragma omp ...'
@@ -300,7 +336,7 @@ public:
     return T->getClauseKind() == OMPC_num_threads;
   }
 
-  StmtRange children() { return StmtRange(&NumThreads, &NumThreads + 1); }
+  child_range children() { return child_range(&NumThreads, &NumThreads + 1); }
 };
 
 /// \brief This represents 'safelen' clause in the '#pragma omp ...'
@@ -356,7 +392,62 @@ public:
     return T->getClauseKind() == OMPC_safelen;
   }
 
-  StmtRange children() { return StmtRange(&Safelen, &Safelen + 1); }
+  child_range children() { return child_range(&Safelen, &Safelen + 1); }
+};
+
+/// \brief This represents 'simdlen' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp simd simdlen(4)
+/// \endcode
+/// In this example directive '#pragma omp simd' has clause 'simdlen'
+/// with single expression '4'.
+/// If the 'simdlen' clause is used then it specifies the preferred number of
+/// iterations to be executed concurrently. The parameter of the 'simdlen'
+/// clause must be a constant positive integer expression.
+///
+class OMPSimdlenClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Safe iteration space distance.
+  Stmt *Simdlen;
+
+  /// \brief Set simdlen.
+  void setSimdlen(Expr *Len) { Simdlen = Len; }
+
+public:
+  /// \brief Build 'simdlen' clause.
+  ///
+  /// \param Len Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPSimdlenClause(Expr *Len, SourceLocation StartLoc, SourceLocation LParenLoc,
+                   SourceLocation EndLoc)
+      : OMPClause(OMPC_simdlen, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        Simdlen(Len) {}
+
+  /// \brief Build an empty clause.
+  ///
+  explicit OMPSimdlenClause()
+      : OMPClause(OMPC_simdlen, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), Simdlen(nullptr) {}
+
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// \brief Return safe iteration space distance.
+  Expr *getSimdlen() const { return cast_or_null<Expr>(Simdlen); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_simdlen;
+  }
+
+  child_range children() { return child_range(&Simdlen, &Simdlen + 1); }
 };
 
 /// \brief This represents 'collapse' clause in the '#pragma omp ...'
@@ -412,7 +503,7 @@ public:
     return T->getClauseKind() == OMPC_collapse;
   }
 
-  StmtRange children() { return StmtRange(&NumForLoops, &NumForLoops + 1); }
+  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
 };
 
 /// \brief This represents 'default' clause in the '#pragma omp ...' directive.
@@ -481,7 +572,9 @@ public:
     return T->getClauseKind() == OMPC_default;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'proc_bind' clause in the '#pragma omp ...'
@@ -552,7 +645,9 @@ public:
     return T->getClauseKind() == OMPC_proc_bind;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'schedule' clause in the '#pragma omp ...' directive.
@@ -569,6 +664,11 @@ class OMPScheduleClause : public OMPClause {
   SourceLocation LParenLoc;
   /// \brief A kind of the 'schedule' clause.
   OpenMPScheduleClauseKind Kind;
+  /// \brief Modifiers for 'schedule' clause.
+  enum {FIRST, SECOND, NUM_MODIFIERS};
+  OpenMPScheduleClauseModifier Modifiers[NUM_MODIFIERS];
+  /// \brief Locations of modifiers.
+  SourceLocation ModifiersLoc[NUM_MODIFIERS];
   /// \brief Start location of the schedule ind in source code.
   SourceLocation KindLoc;
   /// \brief Location of ',' (if any).
@@ -583,6 +683,42 @@ class OMPScheduleClause : public OMPClause {
   /// \param K Schedule kind.
   ///
   void setScheduleKind(OpenMPScheduleClauseKind K) { Kind = K; }
+  /// \brief Set the first schedule modifier.
+  ///
+  /// \param M Schedule modifier.
+  ///
+  void setFirstScheduleModifier(OpenMPScheduleClauseModifier M) {
+    Modifiers[FIRST] = M;
+  }
+  /// \brief Set the second schedule modifier.
+  ///
+  /// \param M Schedule modifier.
+  ///
+  void setSecondScheduleModifier(OpenMPScheduleClauseModifier M) {
+    Modifiers[SECOND] = M;
+  }
+  /// \brief Set location of the first schedule modifier.
+  ///
+  void setFirstScheduleModifierLoc(SourceLocation Loc) {
+    ModifiersLoc[FIRST] = Loc;
+  }
+  /// \brief Set location of the second schedule modifier.
+  ///
+  void setSecondScheduleModifierLoc(SourceLocation Loc) {
+    ModifiersLoc[SECOND] = Loc;
+  }
+  /// \brief Set schedule modifier location.
+  ///
+  /// \param M Schedule modifier location.
+  ///
+  void setScheduleModifer(OpenMPScheduleClauseModifier M) {
+    if (Modifiers[FIRST] == OMPC_SCHEDULE_MODIFIER_unknown)
+      Modifiers[FIRST] = M;
+    else {
+      assert(Modifiers[SECOND] == OMPC_SCHEDULE_MODIFIER_unknown);
+      Modifiers[SECOND] = M;
+    }
+  }
   /// \brief Sets the location of '('.
   ///
   /// \param Loc Location of '('.
@@ -621,15 +757,25 @@ public:
   /// \param Kind Schedule kind.
   /// \param ChunkSize Chunk size.
   /// \param HelperChunkSize Helper chunk size for combined directives.
+  /// \param M1 The first modifier applied to 'schedule' clause.
+  /// \param M1Loc Location of the first modifier
+  /// \param M2 The second modifier applied to 'schedule' clause.
+  /// \param M2Loc Location of the second modifier
   ///
   OMPScheduleClause(SourceLocation StartLoc, SourceLocation LParenLoc,
                     SourceLocation KLoc, SourceLocation CommaLoc,
                     SourceLocation EndLoc, OpenMPScheduleClauseKind Kind,
-                    Expr *ChunkSize, Expr *HelperChunkSize)
+                    Expr *ChunkSize, Expr *HelperChunkSize,
+                    OpenMPScheduleClauseModifier M1, SourceLocation M1Loc,
+                    OpenMPScheduleClauseModifier M2, SourceLocation M2Loc)
       : OMPClause(OMPC_schedule, StartLoc, EndLoc), LParenLoc(LParenLoc),
         Kind(Kind), KindLoc(KLoc), CommaLoc(CommaLoc) {
     ChunkSizes[CHUNK_SIZE] = ChunkSize;
     ChunkSizes[HELPER_CHUNK_SIZE] = HelperChunkSize;
+    Modifiers[FIRST] = M1;
+    Modifiers[SECOND] = M2;
+    ModifiersLoc[FIRST] = M1Loc;
+    ModifiersLoc[SECOND] = M2Loc;
   }
 
   /// \brief Build an empty clause.
@@ -639,17 +785,39 @@ public:
         Kind(OMPC_SCHEDULE_unknown) {
     ChunkSizes[CHUNK_SIZE] = nullptr;
     ChunkSizes[HELPER_CHUNK_SIZE] = nullptr;
+    Modifiers[FIRST] = OMPC_SCHEDULE_MODIFIER_unknown;
+    Modifiers[SECOND] = OMPC_SCHEDULE_MODIFIER_unknown;
   }
 
   /// \brief Get kind of the clause.
   ///
   OpenMPScheduleClauseKind getScheduleKind() const { return Kind; }
+  /// \brief Get the first modifier of the clause.
+  ///
+  OpenMPScheduleClauseModifier getFirstScheduleModifier() const {
+    return Modifiers[FIRST];
+  }
+  /// \brief Get the second modifier of the clause.
+  ///
+  OpenMPScheduleClauseModifier getSecondScheduleModifier() const {
+    return Modifiers[SECOND];
+  }
   /// \brief Get location of '('.
   ///
   SourceLocation getLParenLoc() { return LParenLoc; }
   /// \brief Get kind location.
   ///
   SourceLocation getScheduleKindLoc() { return KindLoc; }
+  /// \brief Get the first modifier location.
+  ///
+  SourceLocation getFirstScheduleModifierLoc() const {
+    return ModifiersLoc[FIRST];
+  }
+  /// \brief Get the second modifier location.
+  ///
+  SourceLocation getSecondScheduleModifierLoc() const {
+    return ModifiersLoc[SECOND];
+  }
   /// \brief Get location of ','.
   ///
   SourceLocation getCommaLoc() { return CommaLoc; }
@@ -676,38 +844,61 @@ public:
     return T->getClauseKind() == OMPC_schedule;
   }
 
-  StmtRange children() {
-    return StmtRange(&ChunkSizes[CHUNK_SIZE], &ChunkSizes[CHUNK_SIZE] + 1);
+  child_range children() {
+    return child_range(&ChunkSizes[CHUNK_SIZE], &ChunkSizes[CHUNK_SIZE] + 1);
   }
 };
 
 /// \brief This represents 'ordered' clause in the '#pragma omp ...' directive.
 ///
 /// \code
-/// #pragma omp for ordered
+/// #pragma omp for ordered (2)
 /// \endcode
-/// In this example directive '#pragma omp for' has 'ordered' clause.
+/// In this example directive '#pragma omp for' has 'ordered' clause with
+/// parameter 2.
 ///
 class OMPOrderedClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Number of for-loops.
+  Stmt *NumForLoops;
+
+  /// \brief Set the number of associated for-loops.
+  void setNumForLoops(Expr *Num) { NumForLoops = Num; }
+
 public:
   /// \brief Build 'ordered' clause.
   ///
+  /// \param Num Expression, possibly associated with this clause.
   /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   ///
-  OMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc)
-      : OMPClause(OMPC_ordered, StartLoc, EndLoc) {}
+  OMPOrderedClause(Expr *Num, SourceLocation StartLoc,
+                    SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_ordered, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        NumForLoops(Num) {}
 
   /// \brief Build an empty clause.
   ///
-  OMPOrderedClause()
-      : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()) {}
+  explicit OMPOrderedClause()
+      : OMPClause(OMPC_ordered, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), NumForLoops(nullptr) {}
+
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// \brief Return the number of associated for-loops.
+  Expr *getNumForLoops() const { return cast_or_null<Expr>(NumForLoops); }
 
   static bool classof(const OMPClause *T) {
     return T->getClauseKind() == OMPC_ordered;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() { return child_range(&NumForLoops, &NumForLoops + 1); }
 };
 
 /// \brief This represents 'nowait' clause in the '#pragma omp ...' directive.
@@ -736,7 +927,9 @@ public:
     return T->getClauseKind() == OMPC_nowait;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'untied' clause in the '#pragma omp ...' directive.
@@ -765,7 +958,9 @@ public:
     return T->getClauseKind() == OMPC_untied;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'mergeable' clause in the '#pragma omp ...'
@@ -795,7 +990,9 @@ public:
     return T->getClauseKind() == OMPC_mergeable;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'read' clause in the '#pragma omp atomic' directive.
@@ -823,7 +1020,9 @@ public:
     return T->getClauseKind() == OMPC_read;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'write' clause in the '#pragma omp atomic' directive.
@@ -852,7 +1051,9 @@ public:
     return T->getClauseKind() == OMPC_write;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'update' clause in the '#pragma omp atomic'
@@ -882,7 +1083,9 @@ public:
     return T->getClauseKind() == OMPC_update;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'capture' clause in the '#pragma omp atomic'
@@ -912,7 +1115,9 @@ public:
     return T->getClauseKind() == OMPC_capture;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents 'seq_cst' clause in the '#pragma omp atomic'
@@ -942,7 +1147,9 @@ public:
     return T->getClauseKind() == OMPC_seq_cst;
   }
 
-  StmtRange children() { return StmtRange(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This represents clause 'private' in the '#pragma omp ...' directives.
@@ -1026,9 +1233,9 @@ public:
                                       getPrivateCopies().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1147,9 +1354,9 @@ public:
     return inits_const_range(getInits().begin(), getInits().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1332,9 +1539,9 @@ public:
                              getAssignmentOps().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1391,9 +1598,9 @@ public:
   ///
   static OMPSharedClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1454,6 +1661,19 @@ class OMPReductionClause : public OMPVarListClause<OMPReductionClause> {
   /// \brief Sets the nested name specifier.
   void setQualifierLoc(NestedNameSpecifierLoc NSL) { QualifierLoc = NSL; }
 
+  /// \brief Set list of helper expressions, required for proper codegen of the
+  /// clause. These expressions represent private copy of the reduction
+  /// variable.
+  void setPrivates(ArrayRef<Expr *> Privates);
+
+  /// \brief Get the list of helper privates.
+  MutableArrayRef<Expr *> getPrivates() {
+    return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
+  }
+  ArrayRef<const Expr *> getPrivates() const {
+    return llvm::makeArrayRef(varlist_end(), varlist_size());
+  }
+
   /// \brief Set list of helper expressions, required for proper codegen of the
   /// clause. These expressions represent LHS expression in the final
   /// reduction expression performed by the reduction clause.
@@ -1461,10 +1681,10 @@ class OMPReductionClause : public OMPVarListClause<OMPReductionClause> {
 
   /// \brief Get the list of helper LHS expressions.
   MutableArrayRef<Expr *> getLHSExprs() {
-    return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
+    return MutableArrayRef<Expr *>(getPrivates().end(), varlist_size());
   }
   ArrayRef<const Expr *> getLHSExprs() const {
-    return llvm::makeArrayRef(varlist_end(), varlist_size());
+    return llvm::makeArrayRef(getPrivates().end(), varlist_size());
   }
 
   /// \brief Set list of helper expressions, required for proper codegen of the
@@ -1506,6 +1726,8 @@ public:
   /// \param VL The variables in the clause.
   /// \param QualifierLoc The nested-name qualifier with location information
   /// \param NameInfo The full name info for reduction identifier.
+  /// \param Privates List of helper expressions for proper generation of
+  /// private copies.
   /// \param LHSExprs List of helper expressions for proper generation of
   /// assignment operation required for copyprivate clause. This list represents
   /// LHSs of the reduction expressions.
@@ -1528,8 +1750,9 @@ public:
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
          SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
          NestedNameSpecifierLoc QualifierLoc,
-         const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> LHSExprs,
-         ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps);
+         const DeclarationNameInfo &NameInfo, ArrayRef<Expr *> Privates,
+         ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
+         ArrayRef<Expr *> ReductionOps);
   /// \brief Creates an empty clause with the place for \a N variables.
   ///
   /// \param C AST context.
@@ -1550,6 +1773,12 @@ public:
   typedef llvm::iterator_range<helper_expr_const_iterator>
       helper_expr_const_range;
 
+  helper_expr_const_range privates() const {
+    return helper_expr_const_range(getPrivates().begin(), getPrivates().end());
+  }
+  helper_expr_range privates() {
+    return helper_expr_range(getPrivates().begin(), getPrivates().end());
+  }
   helper_expr_const_range lhs_exprs() const {
     return helper_expr_const_range(getLHSExprs().begin(), getLHSExprs().end());
   }
@@ -1571,9 +1800,9 @@ public:
                              getReductionOps().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1592,6 +1821,10 @@ public:
 ///
 class OMPLinearClause : public OMPVarListClause<OMPLinearClause> {
   friend class OMPClauseReader;
+  /// \brief Modifier of 'linear' clause.
+  OpenMPLinearClauseKind Modifier;
+  /// \brief Location of linear modifier if any.
+  SourceLocation ModifierLoc;
   /// \brief Location of ':'.
   SourceLocation ColonLoc;
 
@@ -1610,11 +1843,12 @@ class OMPLinearClause : public OMPVarListClause<OMPLinearClause> {
   /// \param NumVars Number of variables.
   ///
   OMPLinearClause(SourceLocation StartLoc, SourceLocation LParenLoc,
+                  OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc,
                   SourceLocation ColonLoc, SourceLocation EndLoc,
                   unsigned NumVars)
       : OMPVarListClause<OMPLinearClause>(OMPC_linear, StartLoc, LParenLoc,
                                           EndLoc, NumVars),
-        ColonLoc(ColonLoc) {}
+        Modifier(Modifier), ModifierLoc(ModifierLoc), ColonLoc(ColonLoc) {}
 
   /// \brief Build an empty clause.
   ///
@@ -1624,7 +1858,7 @@ class OMPLinearClause : public OMPVarListClause<OMPLinearClause> {
       : OMPVarListClause<OMPLinearClause>(OMPC_linear, SourceLocation(),
                                           SourceLocation(), SourceLocation(),
                                           NumVars),
-        ColonLoc(SourceLocation()) {}
+        Modifier(OMPC_LINEAR_val), ModifierLoc(), ColonLoc() {}
 
   /// \brief Gets the list of initial values for linear variables.
   ///
@@ -1636,16 +1870,23 @@ class OMPLinearClause : public OMPVarListClause<OMPLinearClause> {
   /// expressions - linear step and a helper to calculate it before the
   /// loop body (used when the linear step is not constant):
   ///
-  /// { Vars[] /* in OMPVarListClause */; Inits[]; Updates[]; Finals[];
-  ///   Step; CalcStep; }
+  /// { Vars[] /* in OMPVarListClause */; Privates[]; Inits[]; Updates[];
+  /// Finals[]; Step; CalcStep; }
   ///
-  MutableArrayRef<Expr *> getInits() {
+  MutableArrayRef<Expr *> getPrivates() {
     return MutableArrayRef<Expr *>(varlist_end(), varlist_size());
   }
-  ArrayRef<const Expr *> getInits() const {
+  ArrayRef<const Expr *> getPrivates() const {
     return llvm::makeArrayRef(varlist_end(), varlist_size());
   }
 
+  MutableArrayRef<Expr *> getInits() {
+    return MutableArrayRef<Expr *>(getPrivates().end(), varlist_size());
+  }
+  ArrayRef<const Expr *> getInits() const {
+    return llvm::makeArrayRef(getPrivates().end(), varlist_size());
+  }
+
   /// \brief Sets the list of update expressions for linear variables.
   MutableArrayRef<Expr *> getUpdates() {
     return MutableArrayRef<Expr *>(getInits().end(), varlist_size());
@@ -1662,6 +1903,10 @@ class OMPLinearClause : public OMPVarListClause<OMPLinearClause> {
     return llvm::makeArrayRef(getUpdates().end(), varlist_size());
   }
 
+  /// \brief Sets the list of the copies of original linear variables.
+  /// \param PL List of expressions.
+  void setPrivates(ArrayRef<Expr *> PL);
+
   /// \brief Sets the list of the initial values for linear variables.
   /// \param IL List of expressions.
   void setInits(ArrayRef<Expr *> IL);
@@ -1673,17 +1918,20 @@ public:
   /// \param C AST Context.
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
+  /// \param Modifier Modifier of 'linear' clause.
+  /// \param ModifierLoc Modifier location.
   /// \param ColonLoc Location of ':'.
   /// \param EndLoc Ending location of the clause.
   /// \param VL List of references to the variables.
+  /// \param PL List of private copies of original variables.
   /// \param IL List of initial values for the variables.
   /// \param Step Linear step.
   /// \param CalcStep Calculation of the linear step.
-  static OMPLinearClause *Create(const ASTContext &C, SourceLocation StartLoc,
-                                 SourceLocation LParenLoc,
-                                 SourceLocation ColonLoc, SourceLocation EndLoc,
-                                 ArrayRef<Expr *> VL, ArrayRef<Expr *> IL,
-                                 Expr *Step, Expr *CalcStep);
+  static OMPLinearClause *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+         OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc,
+         SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
+         ArrayRef<Expr *> PL, ArrayRef<Expr *> IL, Expr *Step, Expr *CalcStep);
 
   /// \brief Creates an empty clause with the place for \a NumVars variables.
   ///
@@ -1692,9 +1940,19 @@ public:
   ///
   static OMPLinearClause *CreateEmpty(const ASTContext &C, unsigned NumVars);
 
+  /// \brief Set modifier.
+  void setModifier(OpenMPLinearClauseKind Kind) { Modifier = Kind; }
+  /// \brief Return modifier.
+  OpenMPLinearClauseKind getModifier() const { return Modifier; }
+
+  /// \brief Set modifier location.
+  void setModifierLoc(SourceLocation Loc) { ModifierLoc = Loc; }
+  /// \brief Return modifier location.
+  SourceLocation getModifierLoc() const { return ModifierLoc; }
+
   /// \brief Sets the location of ':'.
   void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
-  /// \brief Returns the location of '('.
+  /// \brief Returns the location of ':'.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
   /// \brief Returns linear step.
@@ -1714,6 +1972,18 @@ public:
   /// \param FL List of expressions.
   void setFinals(ArrayRef<Expr *> FL);
 
+  typedef MutableArrayRef<Expr *>::iterator privates_iterator;
+  typedef ArrayRef<const Expr *>::iterator privates_const_iterator;
+  typedef llvm::iterator_range<privates_iterator> privates_range;
+  typedef llvm::iterator_range<privates_const_iterator> privates_const_range;
+
+  privates_range privates() {
+    return privates_range(getPrivates().begin(), getPrivates().end());
+  }
+  privates_const_range privates() const {
+    return privates_const_range(getPrivates().begin(), getPrivates().end());
+  }
+
   typedef MutableArrayRef<Expr *>::iterator inits_iterator;
   typedef ArrayRef<const Expr *>::iterator inits_const_iterator;
   typedef llvm::iterator_range<inits_iterator> inits_range;
@@ -1750,9 +2020,9 @@ public:
     return finals_const_range(getFinals().begin(), getFinals().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1835,9 +2105,9 @@ public:
   /// \brief Returns alignment.
   const Expr *getAlignment() const { return *varlist_end(); }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -1993,9 +2263,9 @@ public:
                              getAssignmentOps().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -2138,9 +2408,9 @@ public:
                              getAssignmentOps().end());
   }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -2202,9 +2472,9 @@ public:
   ///
   static OMPFlushClause *CreateEmpty(const ASTContext &C, unsigned N);
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -2290,9 +2560,9 @@ public:
   /// \brief Get colon location.
   SourceLocation getColonLoc() const { return ColonLoc; }
 
-  StmtRange children() {
-    return StmtRange(reinterpret_cast<Stmt **>(varlist_begin()),
-                     reinterpret_cast<Stmt **>(varlist_end()));
+  child_range children() {
+    return child_range(reinterpret_cast<Stmt **>(varlist_begin()),
+                       reinterpret_cast<Stmt **>(varlist_end()));
   }
 
   static bool classof(const OMPClause *T) {
@@ -2300,7 +2570,590 @@ public:
   }
 };
 
+/// \brief This represents 'device' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp target device(a)
+/// \endcode
+/// In this example directive '#pragma omp target' has clause 'device'
+/// with single expression 'a'.
+///
+class OMPDeviceClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Device number.
+  Stmt *Device;
+  /// \brief Set the device number.
+  ///
+  /// \param E Device number.
+  ///
+  void setDevice(Expr *E) { Device = E; }
+
+public:
+  /// \brief Build 'device' clause.
+  ///
+  /// \param E Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPDeviceClause(Expr *E, SourceLocation StartLoc, SourceLocation LParenLoc, 
+                  SourceLocation EndLoc)
+      : OMPClause(OMPC_device, StartLoc, EndLoc), LParenLoc(LParenLoc), 
+        Device(E) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPDeviceClause()
+      : OMPClause(OMPC_device, SourceLocation(), SourceLocation()), 
+        LParenLoc(SourceLocation()), Device(nullptr) {}
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+  /// \brief Return device number.
+  Expr *getDevice() { return cast<Expr>(Device); }
+  /// \brief Return device number.
+  Expr *getDevice() const { return cast<Expr>(Device); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_device;
+  }
+
+  child_range children() { return child_range(&Device, &Device + 1); }
+};
+
+/// \brief This represents 'threads' clause in the '#pragma omp ...' directive.
+///
+/// \code
+/// #pragma omp ordered threads
+/// \endcode
+/// In this example directive '#pragma omp ordered' has simple 'threads' clause.
+///
+class OMPThreadsClause : public OMPClause {
+public:
+  /// \brief Build 'threads' clause.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPThreadsClause(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_threads, StartLoc, EndLoc) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPThreadsClause()
+      : OMPClause(OMPC_threads, SourceLocation(), SourceLocation()) {}
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_threads;
+  }
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+};
+
+/// \brief This represents 'simd' clause in the '#pragma omp ...' directive.
+///
+/// \code
+/// #pragma omp ordered simd
+/// \endcode
+/// In this example directive '#pragma omp ordered' has simple 'simd' clause.
+///
+class OMPSIMDClause : public OMPClause {
+public:
+  /// \brief Build 'simd' clause.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPSIMDClause(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_simd, StartLoc, EndLoc) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPSIMDClause() : OMPClause(OMPC_simd, SourceLocation(), SourceLocation()) {}
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_simd;
+  }
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+};
+
+/// \brief This represents clause 'map' in the '#pragma omp ...'
+/// directives.
+///
+/// \code
+/// #pragma omp target map(a,b)
+/// \endcode
+/// In this example directive '#pragma omp target' has clause 'map'
+/// with the variables 'a' and 'b'.
+///
+class OMPMapClause : public OMPVarListClause<OMPMapClause> {
+  friend class OMPClauseReader;
+
+  /// \brief Map type modifier for the 'map' clause.
+  OpenMPMapClauseKind MapTypeModifier;
+  /// \brief Map type for the 'map' clause.
+  OpenMPMapClauseKind MapType;
+  /// \brief Location of the map type.
+  SourceLocation MapLoc;
+  /// \brief Colon location.
+  SourceLocation ColonLoc;
+
+  /// \brief Set type modifier for the clause.
+  ///
+  /// \param T Type Modifier for the clause.
+  ///
+  void setMapTypeModifier(OpenMPMapClauseKind T) { MapTypeModifier = T; }
+
+  /// \brief Set type for the clause.
+  ///
+  /// \param T Type for the clause.
+  ///
+  void setMapType(OpenMPMapClauseKind T) { MapType = T; }
+
+  /// \brief Set type location.
+  ///
+  /// \param TLoc Type location.
+  ///
+  void setMapLoc(SourceLocation TLoc) { MapLoc = TLoc; }
+
+  /// \brief Set colon location.
+  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
+
+  /// \brief Build clause with number of variables \a N.
+  ///
+  /// \param MapTypeModifier Map type modifier.
+  /// \param MapType Map type.
+  /// \param MapLoc Location of the map type.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  /// \param N Number of the variables in the clause.
+  ///
+  explicit OMPMapClause(OpenMPMapClauseKind MapTypeModifier,
+                        OpenMPMapClauseKind MapType, SourceLocation MapLoc,
+                        SourceLocation StartLoc, SourceLocation LParenLoc,
+                        SourceLocation EndLoc, unsigned N)
+    : OMPVarListClause<OMPMapClause>(OMPC_map, StartLoc, LParenLoc, EndLoc, N),
+      MapTypeModifier(MapTypeModifier), MapType(MapType), MapLoc(MapLoc) {}
+
+  /// \brief Build an empty clause.
+  ///
+  /// \param N Number of variables.
+  ///
+  explicit OMPMapClause(unsigned N)
+      : OMPVarListClause<OMPMapClause>(OMPC_map, SourceLocation(),
+                                       SourceLocation(), SourceLocation(), N),
+        MapTypeModifier(OMPC_MAP_unknown), MapType(OMPC_MAP_unknown), MapLoc() {}
+
+public:
+  /// \brief Creates clause with a list of variables \a VL.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  /// \param VL List of references to the variables.
+  /// \param TypeModifier Map type modifier.
+  /// \param Type Map type.
+  /// \param TypeLoc Location of the map type.
+  ///
+  static OMPMapClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                              SourceLocation LParenLoc,
+                              SourceLocation EndLoc, ArrayRef<Expr *> VL,
+                              OpenMPMapClauseKind TypeModifier,
+                              OpenMPMapClauseKind Type, SourceLocation TypeLoc);
+  /// \brief Creates an empty clause with the place for \a N variables.
+  ///
+  /// \param C AST context.
+  /// \param N The number of variables.
+  ///
+  static OMPMapClause *CreateEmpty(const ASTContext &C, unsigned N);
+
+  /// \brief Fetches mapping kind for the clause.
+  OpenMPMapClauseKind getMapType() const LLVM_READONLY { return MapType; }
+
+  /// \brief Fetches the map type modifier for the clause.
+  OpenMPMapClauseKind getMapTypeModifier() const LLVM_READONLY {
+    return MapTypeModifier;
+  }
+
+  /// \brief Fetches location of clause mapping kind.
+  SourceLocation getMapLoc() const LLVM_READONLY { return MapLoc; }
+
+  /// \brief Get colon location.
+  SourceLocation getColonLoc() const { return ColonLoc; }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_map;
+  }
+
+  child_range children() {
+    return child_range(
+        reinterpret_cast<Stmt **>(varlist_begin()),
+        reinterpret_cast<Stmt **>(varlist_end()));
+  }
+};
+
+/// \brief This represents 'num_teams' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp teams num_teams(n)
+/// \endcode
+/// In this example directive '#pragma omp teams' has clause 'num_teams'
+/// with single expression 'n'.
+///
+class OMPNumTeamsClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief NumTeams number.
+  Stmt *NumTeams;
+  /// \brief Set the NumTeams number.
+  ///
+  /// \param E NumTeams number.
+  ///
+  void setNumTeams(Expr *E) { NumTeams = E; }
+
+public:
+  /// \brief Build 'num_teams' clause.
+  ///
+  /// \param E Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPNumTeamsClause(Expr *E, SourceLocation StartLoc, SourceLocation LParenLoc,
+                    SourceLocation EndLoc)
+      : OMPClause(OMPC_num_teams, StartLoc, EndLoc), LParenLoc(LParenLoc), 
+        NumTeams(E) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPNumTeamsClause()
+      : OMPClause(OMPC_num_teams, SourceLocation(), SourceLocation()), 
+        LParenLoc(SourceLocation()), NumTeams(nullptr) {}
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+  /// \brief Return NumTeams number.
+  Expr *getNumTeams() { return cast<Expr>(NumTeams); }
+  /// \brief Return NumTeams number.
+  Expr *getNumTeams() const { return cast<Expr>(NumTeams); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_num_teams;
+  }
+
+  child_range children() { return child_range(&NumTeams, &NumTeams + 1); }
+};
+
+/// \brief This represents 'thread_limit' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp teams thread_limit(n)
+/// \endcode
+/// In this example directive '#pragma omp teams' has clause 'thread_limit'
+/// with single expression 'n'.
+///
+class OMPThreadLimitClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief ThreadLimit number.
+  Stmt *ThreadLimit;
+  /// \brief Set the ThreadLimit number.
+  ///
+  /// \param E ThreadLimit number.
+  ///
+  void setThreadLimit(Expr *E) { ThreadLimit = E; }
+
+public:
+  /// \brief Build 'thread_limit' clause.
+  ///
+  /// \param E Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPThreadLimitClause(Expr *E, SourceLocation StartLoc,
+                       SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_thread_limit, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        ThreadLimit(E) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPThreadLimitClause()
+      : OMPClause(OMPC_thread_limit, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), ThreadLimit(nullptr) {}
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+  /// \brief Return ThreadLimit number.
+  Expr *getThreadLimit() { return cast<Expr>(ThreadLimit); }
+  /// \brief Return ThreadLimit number.
+  Expr *getThreadLimit() const { return cast<Expr>(ThreadLimit); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_thread_limit;
+  }
+
+  child_range children() { return child_range(&ThreadLimit, &ThreadLimit + 1); }
+};
+
+/// \brief This represents 'priority' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp task priority(n)
+/// \endcode
+/// In this example directive '#pragma omp teams' has clause 'priority' with
+/// single expression 'n'.
+///
+class OMPPriorityClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Priority number.
+  Stmt *Priority;
+  /// \brief Set the Priority number.
+  ///
+  /// \param E Priority number.
+  ///
+  void setPriority(Expr *E) { Priority = E; }
+
+public:
+  /// \brief Build 'priority' clause.
+  ///
+  /// \param E Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPPriorityClause(Expr *E, SourceLocation StartLoc, SourceLocation LParenLoc,
+                    SourceLocation EndLoc)
+      : OMPClause(OMPC_priority, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        Priority(E) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPPriorityClause()
+      : OMPClause(OMPC_priority, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), Priority(nullptr) {}
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+  /// \brief Return Priority number.
+  Expr *getPriority() { return cast<Expr>(Priority); }
+  /// \brief Return Priority number.
+  Expr *getPriority() const { return cast<Expr>(Priority); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_priority;
+  }
+
+  child_range children() { return child_range(&Priority, &Priority + 1); }
+};
+
+/// \brief This represents 'grainsize' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp taskloop grainsize(4)
+/// \endcode
+/// In this example directive '#pragma omp taskloop' has clause 'grainsize'
+/// with single expression '4'.
+///
+class OMPGrainsizeClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Safe iteration space distance.
+  Stmt *Grainsize;
+
+  /// \brief Set safelen.
+  void setGrainsize(Expr *Size) { Grainsize = Size; }
+
+public:
+  /// \brief Build 'grainsize' clause.
+  ///
+  /// \param Size Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPGrainsizeClause(Expr *Size, SourceLocation StartLoc,
+                     SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_grainsize, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        Grainsize(Size) {}
+
+  /// \brief Build an empty clause.
+  ///
+  explicit OMPGrainsizeClause()
+      : OMPClause(OMPC_grainsize, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), Grainsize(nullptr) {}
+
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// \brief Return safe iteration space distance.
+  Expr *getGrainsize() const { return cast_or_null<Expr>(Grainsize); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_grainsize;
+  }
+
+  child_range children() { return child_range(&Grainsize, &Grainsize + 1); }
+};
+
+/// \brief This represents 'nogroup' clause in the '#pragma omp ...' directive.
+///
+/// \code
+/// #pragma omp taskloop nogroup
+/// \endcode
+/// In this example directive '#pragma omp taskloop' has 'nogroup' clause.
+///
+class OMPNogroupClause : public OMPClause {
+public:
+  /// \brief Build 'nogroup' clause.
+  ///
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPNogroupClause(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_nogroup, StartLoc, EndLoc) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPNogroupClause()
+      : OMPClause(OMPC_nogroup, SourceLocation(), SourceLocation()) {}
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_nogroup;
+  }
+
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+};
+
+/// \brief This represents 'num_tasks' clause in the '#pragma omp ...'
+/// directive.
+///
+/// \code
+/// #pragma omp taskloop num_tasks(4)
+/// \endcode
+/// In this example directive '#pragma omp taskloop' has clause 'num_tasks'
+/// with single expression '4'.
+///
+class OMPNumTasksClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Safe iteration space distance.
+  Stmt *NumTasks;
+
+  /// \brief Set safelen.
+  void setNumTasks(Expr *Size) { NumTasks = Size; }
+
+public:
+  /// \brief Build 'num_tasks' clause.
+  ///
+  /// \param Size Expression associated with this clause.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPNumTasksClause(Expr *Size, SourceLocation StartLoc,
+                    SourceLocation LParenLoc, SourceLocation EndLoc)
+      : OMPClause(OMPC_num_tasks, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        NumTasks(Size) {}
+
+  /// \brief Build an empty clause.
+  ///
+  explicit OMPNumTasksClause()
+      : OMPClause(OMPC_num_tasks, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), NumTasks(nullptr) {}
+
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// \brief Return safe iteration space distance.
+  Expr *getNumTasks() const { return cast_or_null<Expr>(NumTasks); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_num_tasks;
+  }
+
+  child_range children() { return child_range(&NumTasks, &NumTasks + 1); }
+};
+
+/// \brief This represents 'hint' clause in the '#pragma omp ...' directive.
+///
+/// \code
+/// #pragma omp critical (name) hint(6)
+/// \endcode
+/// In this example directive '#pragma omp critical' has name 'name' and clause
+/// 'hint' with argument '6'.
+///
+class OMPHintClause : public OMPClause {
+  friend class OMPClauseReader;
+  /// \brief Location of '('.
+  SourceLocation LParenLoc;
+  /// \brief Hint expression of the 'hint' clause.
+  Stmt *Hint;
+
+  /// \brief Set hint expression.
+  ///
+  void setHint(Expr *H) { Hint = H; }
+
+public:
+  /// \brief Build 'hint' clause with expression \a Hint.
+  ///
+  /// \param Hint Hint expression.
+  /// \param StartLoc Starting location of the clause.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc Ending location of the clause.
+  ///
+  OMPHintClause(Expr *Hint, SourceLocation StartLoc, SourceLocation LParenLoc,
+                SourceLocation EndLoc)
+      : OMPClause(OMPC_hint, StartLoc, EndLoc), LParenLoc(LParenLoc),
+        Hint(Hint) {}
+
+  /// \brief Build an empty clause.
+  ///
+  OMPHintClause()
+      : OMPClause(OMPC_hint, SourceLocation(), SourceLocation()),
+        LParenLoc(SourceLocation()), Hint(nullptr) {}
+
+  /// \brief Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+  /// \brief Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// \brief Returns number of threads.
+  Expr *getHint() const { return cast_or_null<Expr>(Hint); }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == OMPC_hint;
+  }
+
+  child_range children() { return child_range(&Hint, &Hint + 1); }
+};
+
 } // end namespace clang
 
-#endif
-
+#endif // LLVM_CLANG_AST_OPENMPCLAUSE_H
diff --git a/include/clang/AST/OperationKinds.h b/include/clang/AST/OperationKinds.h
index e3f012667727..2235c1012fb7 100644
--- a/include/clang/AST/OperationKinds.h
+++ b/include/clang/AST/OperationKinds.h
@@ -334,7 +334,8 @@ enum UnaryOperatorKind {
   UO_Plus, UO_Minus,      // [C99 6.5.3.3] Unary arithmetic
   UO_Not, UO_LNot,        // [C99 6.5.3.3] Unary arithmetic
   UO_Real, UO_Imag,       // "__real expr"/"__imag expr" Extension.
-  UO_Extension            // __extension__ marker.
+  UO_Extension,           // __extension__ marker.
+  UO_Coawait              // [C++ Coroutines] co_await operator
 };
 
 /// \brief The kind of bridging performed by the Objective-C bridge cast.
diff --git a/include/clang/AST/PrettyPrinter.h b/include/clang/AST/PrettyPrinter.h
index 35ceabbd6b71..8ab3f61703a3 100644
--- a/include/clang/AST/PrettyPrinter.h
+++ b/include/clang/AST/PrettyPrinter.h
@@ -42,7 +42,7 @@ struct PrintingPolicy {
       SuppressStrongLifetime(false), SuppressLifetimeQualifiers(false),
       Bool(LO.Bool), TerseOutput(false), PolishForDeclaration(false),
       Half(LO.Half), MSWChar(LO.MicrosoftExt && !LO.WChar),
-      IncludeNewlines(true) { }
+      IncludeNewlines(true), MSVCFormatting(false) { }
 
   /// \brief What language we're printing.
   LangOptions LangOpts;
@@ -109,7 +109,7 @@ struct PrintingPolicy {
   /// \brief Whether we should print the sizes of constant array expressions
   /// as written in the sources.
   ///
-  /// This flag is determines whether arrays types declared as
+  /// This flag determines whether array types declared as
   ///
   /// \code
   /// int a[4+10*10];
@@ -163,6 +163,11 @@ struct PrintingPolicy {
 
   /// \brief When true, include newlines after statements like "break", etc.
   unsigned IncludeNewlines : 1;
+
+  /// \brief Use whitespace and punctuation like MSVC does. In particular, this
+  /// prints anonymous namespaces as `anonymous namespace' and does not insert
+  /// spaces after template arguments.
+  bool MSVCFormatting : 1;
 };
 
 } // end namespace clang
diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h
index 1017656b662c..e6f758364a85 100644
--- a/include/clang/AST/RecursiveASTVisitor.h
+++ b/include/clang/AST/RecursiveASTVisitor.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_CLANG_AST_RECURSIVEASTVISITOR_H
 #define LLVM_CLANG_AST_RECURSIVEASTVISITOR_H
 
+#include <type_traits>
+
 #include "clang/AST/Attr.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
@@ -24,6 +26,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
@@ -42,7 +45,7 @@
   OPERATOR(PostInc) OPERATOR(PostDec) OPERATOR(PreInc) OPERATOR(PreDec)        \
       OPERATOR(AddrOf) OPERATOR(Deref) OPERATOR(Plus) OPERATOR(Minus)          \
       OPERATOR(Not) OPERATOR(LNot) OPERATOR(Real) OPERATOR(Imag)               \
-      OPERATOR(Extension)
+      OPERATOR(Extension) OPERATOR(Coawait)
 
 // All binary operators (excluding compound assign operators).
 #define BINOP_LIST()                                                           \
@@ -132,6 +135,12 @@ namespace clang {
 /// from which they were produced.
 template <typename Derived> class RecursiveASTVisitor {
 public:
+  /// A queue used for performing data recursion over statements.
+  /// Parameters involving this type are used to implement data
+  /// recursion over Stmts and Exprs within this class, and should
+  /// typically not be explicitly specified by derived classes.
+  typedef SmallVectorImpl<Stmt *> DataRecursionQueue;
+
   /// \brief Return a reference to the derived class.
   Derived &getDerived() { return *static_cast<Derived *>(this); }
 
@@ -147,19 +156,12 @@ public:
   /// code, e.g., implicit constructors and destructors.
   bool shouldVisitImplicitCode() const { return false; }
 
-  /// \brief Return whether \param S should be traversed using data recursion
-  /// to avoid a stack overflow with extreme cases.
-  bool shouldUseDataRecursionFor(Stmt *S) const {
-    return isa<BinaryOperator>(S) || isa<UnaryOperator>(S) ||
-           isa<CaseStmt>(S) || isa<CXXOperatorCallExpr>(S);
-  }
-
   /// \brief Recursively visit a statement or expression, by
   /// dispatching to Traverse*() based on the argument's dynamic type.
   ///
   /// \returns false if the visitation was terminated early, true
-  /// otherwise (including when the argument is NULL).
-  bool TraverseStmt(Stmt *S);
+  /// otherwise (including when the argument is nullptr).
+  bool TraverseStmt(Stmt *S, DataRecursionQueue *Queue = nullptr);
 
   /// \brief Recursively visit a type, by dispatching to
   /// Traverse*Type() based on the argument's getTypeClass() property.
@@ -252,7 +254,14 @@ public:
   /// \c LE->getBody().
   ///
   /// \returns false if the visitation was terminated early, true otherwise.
-  bool TraverseLambdaBody(LambdaExpr *LE);
+  bool TraverseLambdaBody(LambdaExpr *LE, DataRecursionQueue *Queue = nullptr);
+
+  /// \brief Recursively visit the syntactic or semantic form of an
+  /// initialization list.
+  ///
+  /// \returns false if the visitation was terminated early, true otherwise.
+  bool TraverseSynOrSemInitListExpr(InitListExpr *S,
+                                    DataRecursionQueue *Queue = nullptr);
 
   // ---- Methods on Attrs ----
 
@@ -266,9 +275,44 @@ public:
 
 // ---- Methods on Stmts ----
 
+private:
+  template<typename T, typename U>
+  struct has_same_member_pointer_type : std::false_type {};
+  template<typename T, typename U, typename R, typename... P>
+  struct has_same_member_pointer_type<R (T::*)(P...), R (U::*)(P...)>
+      : std::true_type {};
+
+  // Traverse the given statement. If the most-derived traverse function takes a
+  // data recursion queue, pass it on; otherwise, discard it. Note that the
+  // first branch of this conditional must compile whether or not the derived
+  // class can take a queue, so if we're taking the second arm, make the first
+  // arm call our function rather than the derived class version.
+#define TRAVERSE_STMT_BASE(NAME, CLASS, VAR, QUEUE)                            \
+  (has_same_member_pointer_type<decltype(                                      \
+                                    &RecursiveASTVisitor::Traverse##NAME),     \
+                                decltype(&Derived::Traverse##NAME)>::value     \
+       ? static_cast<typename std::conditional<                                \
+             has_same_member_pointer_type<                                     \
+                 decltype(&RecursiveASTVisitor::Traverse##NAME),               \
+                 decltype(&Derived::Traverse##NAME)>::value,                   \
+             Derived &, RecursiveASTVisitor &>::type>(*this)                   \
+             .Traverse##NAME(static_cast<CLASS *>(VAR), QUEUE)                 \
+       : getDerived().Traverse##NAME(static_cast<CLASS *>(VAR)))
+
+// Try to traverse the given statement, or enqueue it if we're performing data
+// recursion in the middle of traversing another statement. Can only be called
+// from within a DEF_TRAVERSE_STMT body or similar context.
+#define TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S)                                     \
+  do {                                                                         \
+    if (!TRAVERSE_STMT_BASE(Stmt, Stmt, S, Queue))                             \
+      return false;                                                            \
+  } while (0)
+
+public:
 // Declare Traverse*() for all concrete Stmt classes.
 #define ABSTRACT_STMT(STMT)
-#define STMT(CLASS, PARENT) bool Traverse##CLASS(CLASS *S);
+#define STMT(CLASS, PARENT) \
+  bool Traverse##CLASS(CLASS *S, DataRecursionQueue *Queue = nullptr);
 #include "clang/AST/StmtNodes.inc"
   // The above header #undefs ABSTRACT_STMT and STMT upon exit.
 
@@ -288,9 +332,10 @@ public:
 // operator methods.  Unary operators are not classes in themselves
 // (they're all opcodes in UnaryOperator) but do have visitors.
 #define OPERATOR(NAME)                                                         \
-  bool TraverseUnary##NAME(UnaryOperator *S) {                                 \
+  bool TraverseUnary##NAME(UnaryOperator *S,                                   \
+                           DataRecursionQueue *Queue = nullptr) {              \
     TRY_TO(WalkUpFromUnary##NAME(S));                                          \
-    TRY_TO(TraverseStmt(S->getSubExpr()));                                     \
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getSubExpr());                                    \
     return true;                                                               \
   }                                                                            \
   bool WalkUpFromUnary##NAME(UnaryOperator *S) {                               \
@@ -307,10 +352,10 @@ public:
 // operator methods.  Binary operators are not classes in themselves
 // (they're all opcodes in BinaryOperator) but do have visitors.
 #define GENERAL_BINOP_FALLBACK(NAME, BINOP_TYPE)                               \
-  bool TraverseBin##NAME(BINOP_TYPE *S) {                                      \
+  bool TraverseBin##NAME(BINOP_TYPE *S, DataRecursionQueue *Queue = nullptr) { \
     TRY_TO(WalkUpFromBin##NAME(S));                                            \
-    TRY_TO(TraverseStmt(S->getLHS()));                                         \
-    TRY_TO(TraverseStmt(S->getRHS()));                                         \
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getLHS());                                        \
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getRHS());                                        \
     return true;                                                               \
   }                                                                            \
   bool WalkUpFromBin##NAME(BINOP_TYPE *S) {                                    \
@@ -436,129 +481,14 @@ private:
   /// \brief Process clauses with list of variables.
   template <typename T> bool VisitOMPClauseList(T *Node);
 
-  struct EnqueueJob {
-    Stmt *S;
-    Stmt::child_iterator StmtIt;
-
-    EnqueueJob(Stmt *S) : S(S), StmtIt() {}
-  };
-  bool dataTraverse(Stmt *S);
-  bool dataTraverseNode(Stmt *S, bool &EnqueueChildren);
+  bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue);
 };
 
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::dataTraverse(Stmt *S) {
-
-  SmallVector<EnqueueJob, 16> Queue;
-  Queue.push_back(S);
-
-  while (!Queue.empty()) {
-    EnqueueJob &job = Queue.back();
-    Stmt *CurrS = job.S;
-    if (!CurrS) {
-      Queue.pop_back();
-      continue;
-    }
-
-    if (getDerived().shouldUseDataRecursionFor(CurrS)) {
-      if (job.StmtIt == Stmt::child_iterator()) {
-        bool EnqueueChildren = true;
-        if (!dataTraverseNode(CurrS, EnqueueChildren))
-          return false;
-        if (!EnqueueChildren) {
-          Queue.pop_back();
-          continue;
-        }
-        job.StmtIt = CurrS->child_begin();
-      } else {
-        ++job.StmtIt;
-      }
-
-      if (job.StmtIt != CurrS->child_end())
-        Queue.push_back(*job.StmtIt);
-      else
-        Queue.pop_back();
-      continue;
-    }
-
-    Queue.pop_back();
-    TRY_TO(TraverseStmt(CurrS));
-  }
-
-  return true;
-}
-
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::dataTraverseNode(Stmt *S,
-                                                    bool &EnqueueChildren) {
-
-// Dispatch to the corresponding WalkUpFrom* function only if the derived
-// class didn't override Traverse* (and thus the traversal is trivial).
-#define DISPATCH_WALK(NAME, CLASS, VAR)                                        \
-  {                                                                            \
-    bool (Derived::*DerivedFn)(CLASS *) = &Derived::Traverse##NAME;            \
-    bool (Derived::*BaseFn)(CLASS *) = &RecursiveASTVisitor::Traverse##NAME;   \
-    if (DerivedFn == BaseFn)                                                   \
-      return getDerived().WalkUpFrom##NAME(static_cast<CLASS *>(VAR));         \
-  }                                                                            \
-  EnqueueChildren = false;                                                     \
-  return getDerived().Traverse##NAME(static_cast<CLASS *>(VAR));
-
-  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(S)) {
-    switch (BinOp->getOpcode()) {
-#define OPERATOR(NAME)                                                         \
-  case BO_##NAME:                                                              \
-    DISPATCH_WALK(Bin##NAME, BinaryOperator, S);
-
-      BINOP_LIST()
-#undef OPERATOR
-
-#define OPERATOR(NAME)                                                         \
-  case BO_##NAME##Assign:                                                      \
-    DISPATCH_WALK(Bin##NAME##Assign, CompoundAssignOperator, S);
-
-      CAO_LIST()
-#undef OPERATOR
-    }
-  } else if (UnaryOperator *UnOp = dyn_cast<UnaryOperator>(S)) {
-    switch (UnOp->getOpcode()) {
-#define OPERATOR(NAME)                                                         \
-  case UO_##NAME:                                                              \
-    DISPATCH_WALK(Unary##NAME, UnaryOperator, S);
-
-      UNARYOP_LIST()
-#undef OPERATOR
-    }
-  }
-
-  // Top switch stmt: dispatch to TraverseFooStmt for each concrete FooStmt.
-  switch (S->getStmtClass()) {
-  case Stmt::NoStmtClass:
-    break;
-#define ABSTRACT_STMT(STMT)
-#define STMT(CLASS, PARENT)                                                    \
-  case Stmt::CLASS##Class:                                                     \
-    DISPATCH_WALK(CLASS, CLASS, S);
-#include "clang/AST/StmtNodes.inc"
-  }
-
-#undef DISPATCH_WALK
-
-  return true;
-}
-
-#define DISPATCH(NAME, CLASS, VAR)                                             \
-  return getDerived().Traverse##NAME(static_cast<CLASS *>(VAR))
-
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseStmt(Stmt *S) {
-  if (!S)
-    return true;
-
-#define DISPATCH_STMT(NAME, CLASS, VAR) DISPATCH(NAME, CLASS, VAR)
-
-  if (getDerived().shouldUseDataRecursionFor(S))
-    return dataTraverse(S);
+                                                    DataRecursionQueue *Queue) {
+#define DISPATCH_STMT(NAME, CLASS, VAR)                                        \
+  return TRAVERSE_STMT_BASE(NAME, CLASS, VAR, Queue);
 
   // If we have a binary expr, dispatch to the subcode of the binop.  A smart
   // optimizer (e.g. LLVM) will fold this comparison into the switch stmt
@@ -609,6 +539,35 @@ bool RecursiveASTVisitor<Derived>::TraverseStmt(Stmt *S) {
 
 #undef DISPATCH_STMT
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::TraverseStmt(Stmt *S,
+                                                DataRecursionQueue *Queue) {
+  if (!S)
+    return true;
+
+  if (Queue) {
+    Queue->push_back(S);
+    return true;
+  }
+
+  SmallVector<Stmt *, 8> LocalQueue;
+  LocalQueue.push_back(S);
+
+  while (!LocalQueue.empty()) {
+    Stmt *CurrS = LocalQueue.pop_back_val();
+
+    size_t N = LocalQueue.size();
+    TRY_TO(dataTraverseNode(CurrS, &LocalQueue));
+    // Process new children in the order they were added.
+    std::reverse(LocalQueue.begin() + N, LocalQueue.end());
+  }
+
+  return true;
+}
+
+#define DISPATCH(NAME, CLASS, VAR)                                             \
+  return getDerived().Traverse##NAME(static_cast<CLASS *>(VAR))
+
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::TraverseType(QualType T) {
   if (T.isNull())
@@ -863,8 +822,9 @@ RecursiveASTVisitor<Derived>::TraverseLambdaCapture(LambdaExpr *LE,
 }
 
 template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseLambdaBody(LambdaExpr *LE) {
-  TRY_TO(TraverseStmt(LE->getBody()));
+bool RecursiveASTVisitor<Derived>::TraverseLambdaBody(
+    LambdaExpr *LE, DataRecursionQueue *Queue) {
+  TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(LE->getBody());
   return true;
 }
 
@@ -1364,6 +1324,8 @@ DEF_TRAVERSE_DECL(
 DEF_TRAVERSE_DECL(ExternCContextDecl, {})
 
 DEF_TRAVERSE_DECL(NamespaceAliasDecl, {
+  TRY_TO(TraverseNestedNameSpecifierLoc(D->getQualifierLoc()));
+
   // We shouldn't traverse an aliased namespace, since it will be
   // defined (and, therefore, traversed) somewhere else.
   //
@@ -1596,6 +1558,10 @@ DEF_TRAVERSE_DECL(TemplateTemplateParmDecl, {
   TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
 })
 
+DEF_TRAVERSE_DECL(BuiltinTemplateDecl, {
+  TRY_TO(TraverseTemplateParameterListHelper(D->getTemplateParameters()));
+})
+
 DEF_TRAVERSE_DECL(TemplateTypeParmDecl, {
   // D is the "T" in something like "template<typename T> class vector;"
   if (D->getTypeForDecl())
@@ -1906,25 +1872,26 @@ DEF_TRAVERSE_DECL(ParmVarDecl, {
 // This macro makes available a variable S, the passed-in stmt.
 #define DEF_TRAVERSE_STMT(STMT, CODE)                                          \
   template <typename Derived>                                                  \
-  bool RecursiveASTVisitor<Derived>::Traverse##STMT(STMT *S) {                 \
+  bool RecursiveASTVisitor<Derived>::Traverse##STMT(                           \
+      STMT *S, DataRecursionQueue *Queue) {                                    \
     TRY_TO(WalkUpFrom##STMT(S));                                               \
     { CODE; }                                                                  \
     for (Stmt *SubStmt : S->children()) {                                      \
-      TRY_TO(TraverseStmt(SubStmt));                                           \
+      TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(SubStmt);                                          \
     }                                                                          \
     return true;                                                               \
   }
 
 DEF_TRAVERSE_STMT(GCCAsmStmt, {
-  TRY_TO(TraverseStmt(S->getAsmString()));
+  TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getAsmString());
   for (unsigned I = 0, E = S->getNumInputs(); I < E; ++I) {
-    TRY_TO(TraverseStmt(S->getInputConstraintLiteral(I)));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getInputConstraintLiteral(I));
   }
   for (unsigned I = 0, E = S->getNumOutputs(); I < E; ++I) {
-    TRY_TO(TraverseStmt(S->getOutputConstraintLiteral(I)));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getOutputConstraintLiteral(I));
   }
   for (unsigned I = 0, E = S->getNumClobbers(); I < E; ++I) {
-    TRY_TO(TraverseStmt(S->getClobberStringLiteral(I)));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getClobberStringLiteral(I));
   }
   // children() iterates over inputExpr and outputExpr.
 })
@@ -1977,9 +1944,9 @@ DEF_TRAVERSE_STMT(ObjCForCollectionStmt, {})
 DEF_TRAVERSE_STMT(ObjCAutoreleasePoolStmt, {})
 DEF_TRAVERSE_STMT(CXXForRangeStmt, {
   if (!getDerived().shouldVisitImplicitCode()) {
-    TRY_TO(TraverseStmt(S->getLoopVarStmt()));
-    TRY_TO(TraverseStmt(S->getRangeInit()));
-    TRY_TO(TraverseStmt(S->getBody()));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getLoopVarStmt());
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getRangeInit());
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getBody());
     // Visit everything else only if shouldVisitImplicitCode().
     return true;
   }
@@ -2012,9 +1979,8 @@ DEF_TRAVERSE_STMT(DependentScopeDeclRefExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
   TRY_TO(TraverseDeclarationNameInfo(S->getNameInfo()));
   if (S->hasExplicitTemplateArgs()) {
-    TRY_TO(TraverseTemplateArgumentLocsHelper(
-        S->getExplicitTemplateArgs().getTemplateArgs(),
-        S->getNumTemplateArgs()));
+    TRY_TO(TraverseTemplateArgumentLocsHelper(S->getTemplateArgs(),
+                                              S->getNumTemplateArgs()));
   }
 })
 
@@ -2055,64 +2021,60 @@ DEF_TRAVERSE_STMT(CXXStaticCastExpr, {
   TRY_TO(TraverseTypeLoc(S->getTypeInfoAsWritten()->getTypeLoc()));
 })
 
-// InitListExpr is a tricky one, because we want to do all our work on
-// the syntactic form of the listexpr, but this method takes the
-// semantic form by default.  We can't use the macro helper because it
-// calls WalkUp*() on the semantic form, before our code can convert
-// to the syntactic form.
 template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseInitListExpr(InitListExpr *S) {
-  InitListExpr *Syn = S->isSemanticForm() ? S->getSyntacticForm() : S;
-  if (Syn) {
-    TRY_TO(WalkUpFromInitListExpr(Syn));
+bool RecursiveASTVisitor<Derived>::TraverseSynOrSemInitListExpr(
+    InitListExpr *S, DataRecursionQueue *Queue) {
+  if (S) {
+    TRY_TO(WalkUpFromInitListExpr(S));
     // All we need are the default actions.  FIXME: use a helper function.
-    for (Stmt *SubStmt : Syn->children()) {
-      TRY_TO(TraverseStmt(SubStmt));
-    }
-  }
-  InitListExpr *Sem = S->isSemanticForm() ? S : S->getSemanticForm();
-  if (Sem) {
-    TRY_TO(WalkUpFromInitListExpr(Sem));
-    for (Stmt *SubStmt : Sem->children()) {
-      TRY_TO(TraverseStmt(SubStmt));
+    for (Stmt *SubStmt : S->children()) {
+      TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(SubStmt);
     }
   }
   return true;
 }
 
+// This method is called once for each pair of syntactic and semantic
+// InitListExpr, and it traverses the subtrees defined by the two forms. This
+// may cause some of the children to be visited twice, if they appear both in
+// the syntactic and the semantic form.
+//
+// There is no guarantee about which form \p S takes when this method is called.
+DEF_TRAVERSE_STMT(InitListExpr, {
+  TRY_TO(TraverseSynOrSemInitListExpr(
+      S->isSemanticForm() ? S->getSyntacticForm() : S, Queue));
+  TRY_TO(TraverseSynOrSemInitListExpr(
+      S->isSemanticForm() ? S : S->getSemanticForm(), Queue));
+  return true;
+})
+
 // GenericSelectionExpr is a special case because the types and expressions
 // are interleaved.  We also need to watch out for null types (default
 // generic associations).
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseGenericSelectionExpr(
-    GenericSelectionExpr *S) {
-  TRY_TO(WalkUpFromGenericSelectionExpr(S));
+DEF_TRAVERSE_STMT(GenericSelectionExpr, {
   TRY_TO(TraverseStmt(S->getControllingExpr()));
   for (unsigned i = 0; i != S->getNumAssocs(); ++i) {
     if (TypeSourceInfo *TS = S->getAssocTypeSourceInfo(i))
       TRY_TO(TraverseTypeLoc(TS->getTypeLoc()));
-    TRY_TO(TraverseStmt(S->getAssocExpr(i)));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getAssocExpr(i));
   }
   return true;
-}
+})
 
-// PseudoObjectExpr is a special case because of the wierdness with
+// PseudoObjectExpr is a special case because of the weirdness with
 // syntactic expressions and opaque values.
-template <typename Derived>
-bool
-RecursiveASTVisitor<Derived>::TraversePseudoObjectExpr(PseudoObjectExpr *S) {
-  TRY_TO(WalkUpFromPseudoObjectExpr(S));
-  TRY_TO(TraverseStmt(S->getSyntacticForm()));
+DEF_TRAVERSE_STMT(PseudoObjectExpr, {
+  TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getSyntacticForm());
   for (PseudoObjectExpr::semantics_iterator i = S->semantics_begin(),
                                             e = S->semantics_end();
        i != e; ++i) {
     Expr *sub = *i;
     if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(sub))
       sub = OVE->getSourceExpr();
-    TRY_TO(TraverseStmt(sub));
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(sub);
   }
   return true;
-}
+})
 
 DEF_TRAVERSE_STMT(CXXScalarValueInitExpr, {
   // This is called for code like 'return T()' where T is a built-in
@@ -2151,6 +2113,8 @@ DEF_TRAVERSE_STMT(MSPropertyRefExpr, {
   TRY_TO(TraverseNestedNameSpecifierLoc(S->getQualifierLoc()));
 })
 
+DEF_TRAVERSE_STMT(MSPropertySubscriptExpr, {})
+
 DEF_TRAVERSE_STMT(CXXUuidofExpr, {
   // The child-iterator will pick up the arg if it's an expression,
   // but not if it's a type.
@@ -2168,7 +2132,7 @@ DEF_TRAVERSE_STMT(ArrayTypeTraitExpr, {
 })
 
 DEF_TRAVERSE_STMT(ExpressionTraitExpr,
-                  { TRY_TO(TraverseStmt(S->getQueriedExpression())); })
+                  { TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getQueriedExpression()); })
 
 DEF_TRAVERSE_STMT(VAArgExpr, {
   // The child-iterator will pick up the expression argument.
@@ -2181,10 +2145,7 @@ DEF_TRAVERSE_STMT(CXXTemporaryObjectExpr, {
 })
 
 // Walk only the visible parts of lambda expressions.
-template <typename Derived>
-bool RecursiveASTVisitor<Derived>::TraverseLambdaExpr(LambdaExpr *S) {
-  TRY_TO(WalkUpFromLambdaExpr(S));
-
+DEF_TRAVERSE_STMT(LambdaExpr, {
   for (LambdaExpr::capture_iterator C = S->explicit_capture_begin(),
                                     CEnd = S->explicit_capture_end();
        C != CEnd; ++C) {
@@ -2213,12 +2174,11 @@ bool RecursiveASTVisitor<Derived>::TraverseLambdaExpr(LambdaExpr *S) {
     }
 
     if (Expr *NE = T->getNoexceptExpr())
-      TRY_TO(TraverseStmt(NE));
+      TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(NE);
   }
 
-  TRY_TO(TraverseLambdaBody(S));
-  return true;
-}
+  return TRAVERSE_STMT_BASE(LambdaBody, LambdaExpr, S, Queue);
+})
 
 DEF_TRAVERSE_STMT(CXXUnresolvedConstructExpr, {
   // This is called for code like 'T()', where T is a template argument.
@@ -2235,6 +2195,7 @@ DEF_TRAVERSE_STMT(CXXMemberCallExpr, {})
 // over the children.
 DEF_TRAVERSE_STMT(AddrLabelExpr, {})
 DEF_TRAVERSE_STMT(ArraySubscriptExpr, {})
+DEF_TRAVERSE_STMT(OMPArraySectionExpr, {})
 DEF_TRAVERSE_STMT(BlockExpr, {
   TRY_TO(TraverseDecl(S->getBlockDecl()));
   return true; // no child statements to loop through.
@@ -2336,6 +2297,34 @@ DEF_TRAVERSE_STMT(MaterializeTemporaryExpr, {})
 DEF_TRAVERSE_STMT(CXXFoldExpr, {})
 DEF_TRAVERSE_STMT(AtomicExpr, {})
 
+// For coroutines expressions, traverse either the operand
+// as written or the implied calls, depending on what the
+// derived class requests.
+DEF_TRAVERSE_STMT(CoroutineBodyStmt, {
+  if (!getDerived().shouldVisitImplicitCode()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getBody());
+    return true;
+  }
+})
+DEF_TRAVERSE_STMT(CoreturnStmt, {
+  if (!getDerived().shouldVisitImplicitCode()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getOperand());
+    return true;
+  }
+})
+DEF_TRAVERSE_STMT(CoawaitExpr, {
+  if (!getDerived().shouldVisitImplicitCode()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getOperand());
+    return true;
+  }
+})
+DEF_TRAVERSE_STMT(CoyieldExpr, {
+  if (!getDerived().shouldVisitImplicitCode()) {
+    TRY_TO_TRAVERSE_OR_ENQUEUE_STMT(S->getOperand());
+    return true;
+  }
+})
+
 // These literals (all of them) do not need any action.
 DEF_TRAVERSE_STMT(IntegerLiteral, {})
 DEF_TRAVERSE_STMT(CharacterLiteral, {})
@@ -2437,9 +2426,21 @@ DEF_TRAVERSE_STMT(OMPAtomicDirective,
 DEF_TRAVERSE_STMT(OMPTargetDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPTargetDataDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPTeamsDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPTaskLoopDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
+DEF_TRAVERSE_STMT(OMPTaskLoopSimdDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
+DEF_TRAVERSE_STMT(OMPDistributeDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 // OpenMP clauses.
 template <typename Derived>
 bool RecursiveASTVisitor<Derived>::TraverseOMPClause(OMPClause *C) {
@@ -2483,6 +2484,12 @@ bool RecursiveASTVisitor<Derived>::VisitOMPSafelenClause(OMPSafelenClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
+  TRY_TO(TraverseStmt(C->getSimdlen()));
+  return true;
+}
+
 template <typename Derived>
 bool
 RecursiveASTVisitor<Derived>::VisitOMPCollapseClause(OMPCollapseClause *C) {
@@ -2509,7 +2516,8 @@ RecursiveASTVisitor<Derived>::VisitOMPScheduleClause(OMPScheduleClause *C) {
 }
 
 template <typename Derived>
-bool RecursiveASTVisitor<Derived>::VisitOMPOrderedClause(OMPOrderedClause *) {
+bool RecursiveASTVisitor<Derived>::VisitOMPOrderedClause(OMPOrderedClause *C) {
+  TRY_TO(TraverseStmt(C->getNumForLoops()));
   return true;
 }
 
@@ -2554,6 +2562,21 @@ bool RecursiveASTVisitor<Derived>::VisitOMPSeqCstClause(OMPSeqCstClause *) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPThreadsClause(OMPThreadsClause *) {
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPSIMDClause(OMPSIMDClause *) {
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPNogroupClause(OMPNogroupClause *) {
+  return true;
+}
+
 template <typename Derived>
 template <typename T>
 bool RecursiveASTVisitor<Derived>::VisitOMPClauseList(T *Node) {
@@ -2615,6 +2638,9 @@ bool RecursiveASTVisitor<Derived>::VisitOMPLinearClause(OMPLinearClause *C) {
   TRY_TO(TraverseStmt(C->getStep()));
   TRY_TO(TraverseStmt(C->getCalcStep()));
   TRY_TO(VisitOMPClauseList(C));
+  for (auto *E : C->privates()) {
+    TRY_TO(TraverseStmt(E));
+  }
   for (auto *E : C->inits()) {
     TRY_TO(TraverseStmt(E));
   }
@@ -2671,6 +2697,9 @@ RecursiveASTVisitor<Derived>::VisitOMPReductionClause(OMPReductionClause *C) {
   TRY_TO(TraverseNestedNameSpecifierLoc(C->getQualifierLoc()));
   TRY_TO(TraverseDeclarationNameInfo(C->getNameInfo()));
   TRY_TO(VisitOMPClauseList(C));
+  for (auto *E : C->privates()) {
+    TRY_TO(TraverseStmt(E));
+  }
   for (auto *E : C->lhs_exprs()) {
     TRY_TO(TraverseStmt(E));
   }
@@ -2695,6 +2724,59 @@ bool RecursiveASTVisitor<Derived>::VisitOMPDependClause(OMPDependClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPDeviceClause(OMPDeviceClause *C) {
+  TRY_TO(TraverseStmt(C->getDevice()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPMapClause(OMPMapClause *C) {
+  TRY_TO(VisitOMPClauseList(C));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPNumTeamsClause(
+    OMPNumTeamsClause *C) {
+  TRY_TO(TraverseStmt(C->getNumTeams()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPThreadLimitClause(
+    OMPThreadLimitClause *C) {
+  TRY_TO(TraverseStmt(C->getThreadLimit()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPPriorityClause(
+    OMPPriorityClause *C) {
+  TRY_TO(TraverseStmt(C->getPriority()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPGrainsizeClause(
+    OMPGrainsizeClause *C) {
+  TRY_TO(TraverseStmt(C->getGrainsize()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPNumTasksClause(
+    OMPNumTasksClause *C) {
+  TRY_TO(TraverseStmt(C->getNumTasks()));
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPHintClause(OMPHintClause *C) {
+  TRY_TO(TraverseStmt(C->getHint()));
+  return true;
+}
+
 // FIXME: look at the following tricky-seeming exprs to see if we
 // need to recurse on anything.  These are ones that have methods
 // returning decls or qualtypes or nestednamespecifier -- though I'm
@@ -2713,6 +2795,8 @@ bool RecursiveASTVisitor<Derived>::VisitOMPDependClause(OMPDependClause *C) {
 //    Every class that has getQualifier.
 
 #undef DEF_TRAVERSE_STMT
+#undef TRAVERSE_STMT
+#undef TRAVERSE_STMT_BASE
 
 #undef TRY_TO
 
diff --git a/include/clang/AST/Redeclarable.h b/include/clang/AST/Redeclarable.h
index 92046d582bf3..eaa22f8d0102 100644
--- a/include/clang/AST/Redeclarable.h
+++ b/include/clang/AST/Redeclarable.h
@@ -20,6 +20,7 @@
 #include <iterator>
 
 namespace clang {
+class ASTContext;
 
 /// \brief Provides common interface for the Decls that can be redeclared.
 template<typename decl_type>
@@ -32,7 +33,11 @@ protected:
                                       &ExternalASTSource::CompleteRedeclChain>
                                           KnownLatest;
 
-    typedef const ASTContext *UninitializedLatest;
+    /// We store a pointer to the ASTContext in the UninitializedLatest
+    /// pointer, but to avoid circular type dependencies when we steal the low
+    /// bits of this pointer, we use a raw void* here.
+    typedef const void *UninitializedLatest;
+
     typedef Decl *Previous;
 
     /// A pointer to either an uninitialized latest declaration (where either
@@ -47,7 +52,7 @@ protected:
     enum LatestTag { LatestLink };
 
     DeclLink(LatestTag, const ASTContext &Ctx)
-        : Next(NotKnownLatest(&Ctx)) {}
+        : Next(NotKnownLatest(reinterpret_cast<UninitializedLatest>(&Ctx))) {}
     DeclLink(PreviousTag, decl_type *D)
         : Next(NotKnownLatest(Previous(D))) {}
 
@@ -67,7 +72,8 @@ protected:
           return static_cast<decl_type*>(NKL.get<Previous>());
 
         // Allocate the generational 'most recent' cache now, if needed.
-        Next = KnownLatest(*NKL.get<UninitializedLatest>(),
+        Next = KnownLatest(*reinterpret_cast<const ASTContext *>(
+                               NKL.get<UninitializedLatest>()),
                            const_cast<decl_type *>(D));
       }
 
@@ -83,7 +89,9 @@ protected:
       assert(NextIsLatest() && "decl became canonical unexpectedly");
       if (Next.is<NotKnownLatest>()) {
         NotKnownLatest NKL = Next.get<NotKnownLatest>();
-        Next = KnownLatest(*NKL.get<UninitializedLatest>(), D);
+        Next = KnownLatest(*reinterpret_cast<const ASTContext *>(
+                               NKL.get<UninitializedLatest>()),
+                           D);
       } else {
         auto Latest = Next.get<KnownLatest>();
         Latest.set(D);
diff --git a/include/clang/AST/Stmt.h b/include/clang/AST/Stmt.h
index ce9449dc46f6..e48b7dcc28f5 100644
--- a/include/clang/AST/Stmt.h
+++ b/include/clang/AST/Stmt.h
@@ -22,6 +22,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <string>
@@ -49,57 +50,6 @@ namespace clang {
   class Token;
   class VarDecl;
 
-  //===--------------------------------------------------------------------===//
-  // ExprIterator - Iterators for iterating over Stmt* arrays that contain
-  //  only Expr*.  This is needed because AST nodes use Stmt* arrays to store
-  //  references to children (to be compatible with StmtIterator).
-  //===--------------------------------------------------------------------===//
-
-  class Stmt;
-  class Expr;
-
-  class ExprIterator : public std::iterator<std::forward_iterator_tag,
-                                            Expr *&, ptrdiff_t,
-                                            Expr *&, Expr *&> {
-    Stmt** I;
-  public:
-    ExprIterator(Stmt** i) : I(i) {}
-    ExprIterator() : I(nullptr) {}
-    ExprIterator& operator++() { ++I; return *this; }
-    ExprIterator operator-(size_t i) { return I-i; }
-    ExprIterator operator+(size_t i) { return I+i; }
-    Expr* operator[](size_t idx);
-    // FIXME: Verify that this will correctly return a signed distance.
-    signed operator-(const ExprIterator& R) const { return I - R.I; }
-    Expr* operator*() const;
-    Expr* operator->() const;
-    bool operator==(const ExprIterator& R) const { return I == R.I; }
-    bool operator!=(const ExprIterator& R) const { return I != R.I; }
-    bool operator>(const ExprIterator& R) const { return I > R.I; }
-    bool operator>=(const ExprIterator& R) const { return I >= R.I; }
-  };
-
-  class ConstExprIterator : public std::iterator<std::forward_iterator_tag,
-                                                 const Expr *&, ptrdiff_t,
-                                                 const Expr *&,
-                                                 const Expr *&> {
-    const Stmt * const *I;
-  public:
-    ConstExprIterator(const Stmt * const *i) : I(i) {}
-    ConstExprIterator() : I(nullptr) {}
-    ConstExprIterator& operator++() { ++I; return *this; }
-    ConstExprIterator operator+(size_t i) const { return I+i; }
-    ConstExprIterator operator-(size_t i) const { return I-i; }
-    const Expr * operator[](size_t idx) const;
-    signed operator-(const ConstExprIterator& R) const { return I - R.I; }
-    const Expr * operator*() const;
-    const Expr * operator->() const;
-    bool operator==(const ConstExprIterator& R) const { return I == R.I; }
-    bool operator!=(const ConstExprIterator& R) const { return I != R.I; }
-    bool operator>(const ConstExprIterator& R) const { return I > R.I; }
-    bool operator>=(const ConstExprIterator& R) const { return I >= R.I; }
-  };
-
 //===----------------------------------------------------------------------===//
 // AST classes for statements.
 //===----------------------------------------------------------------------===//
@@ -121,10 +71,10 @@ public:
 
   // Make vanilla 'new' and 'delete' illegal for Stmts.
 protected:
-  void* operator new(size_t bytes) throw() {
+  void *operator new(size_t bytes) LLVM_NOEXCEPT {
     llvm_unreachable("Stmts cannot be allocated with regular 'new'.");
   }
-  void operator delete(void* data) throw() {
+  void operator delete(void *data) LLVM_NOEXCEPT {
     llvm_unreachable("Stmts cannot be released with regular 'delete'.");
   }
 
@@ -322,14 +272,12 @@ public:
     return operator new(bytes, *C, alignment);
   }
 
-  void* operator new(size_t bytes, void* mem) throw() {
-    return mem;
-  }
+  void *operator new(size_t bytes, void *mem) LLVM_NOEXCEPT { return mem; }
 
-  void operator delete(void*, const ASTContext&, unsigned) throw() { }
-  void operator delete(void*, const ASTContext*, unsigned) throw() { }
-  void operator delete(void*, size_t) throw() { }
-  void operator delete(void*, void*) throw() { }
+  void operator delete(void *, const ASTContext &, unsigned) LLVM_NOEXCEPT {}
+  void operator delete(void *, const ASTContext *, unsigned) LLVM_NOEXCEPT {}
+  void operator delete(void *, size_t) LLVM_NOEXCEPT {}
+  void operator delete(void *, void *) LLVM_NOEXCEPT {}
 
 public:
   /// \brief A placeholder type used to construct an empty shell of a
@@ -337,6 +285,39 @@ public:
   /// de-serialization).
   struct EmptyShell { };
 
+protected:
+  /// Iterator for iterating over Stmt * arrays that contain only Expr *
+  ///
+  /// This is needed because AST nodes use Stmt* arrays to store
+  /// references to children (to be compatible with StmtIterator).
+  struct ExprIterator
+      : llvm::iterator_adaptor_base<ExprIterator, Stmt **,
+                                    std::random_access_iterator_tag, Expr *> {
+    ExprIterator() : iterator_adaptor_base(nullptr) {}
+    ExprIterator(Stmt **I) : iterator_adaptor_base(I) {}
+
+    reference operator*() const {
+      assert((*I)->getStmtClass() >= firstExprConstant &&
+             (*I)->getStmtClass() <= lastExprConstant);
+      return *reinterpret_cast<Expr **>(I);
+    }
+  };
+
+  /// Const iterator for iterating over Stmt * arrays that contain only Expr *
+  struct ConstExprIterator
+      : llvm::iterator_adaptor_base<ConstExprIterator, const Stmt *const *,
+                                    std::random_access_iterator_tag,
+                                    const Expr *const> {
+    ConstExprIterator() : iterator_adaptor_base(nullptr) {}
+    ConstExprIterator(const Stmt *const *I) : iterator_adaptor_base(I) {}
+
+    reference operator*() const {
+      assert((*I)->getStmtClass() >= firstExprConstant &&
+             (*I)->getStmtClass() <= lastExprConstant);
+      return *reinterpret_cast<const Expr *const *>(I);
+    }
+  };
+
 private:
   /// \brief Whether statistic collection is enabled.
   static bool StatisticsEnabled;
@@ -411,19 +392,20 @@ public:
   typedef StmtIterator       child_iterator;
   typedef ConstStmtIterator  const_child_iterator;
 
-  typedef StmtRange          child_range;
-  typedef ConstStmtRange     const_child_range;
+  typedef llvm::iterator_range<child_iterator> child_range;
+  typedef llvm::iterator_range<const_child_iterator> const_child_range;
 
   child_range children();
   const_child_range children() const {
-    return const_cast<Stmt*>(this)->children();
+    auto Children = const_cast<Stmt *>(this)->children();
+    return const_child_range(Children.begin(), Children.end());
   }
 
-  child_iterator child_begin() { return children().first; }
-  child_iterator child_end() { return children().second; }
+  child_iterator child_begin() { return children().begin(); }
+  child_iterator child_end() { return children().end(); }
 
-  const_child_iterator child_begin() const { return children().first; }
-  const_child_iterator child_end() const { return children().second; }
+  const_child_iterator child_begin() const { return children().begin(); }
+  const_child_iterator child_end() const { return children().end(); }
 
   /// \brief Produce a unique representation of the given statement.
   ///
@@ -544,7 +526,9 @@ public:
     return T->getStmtClass() == NullStmtClass;
   }
 
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 
   friend class ASTStmtReader;
   friend class ASTStmtWriter;
@@ -574,7 +558,7 @@ public:
     CompoundStmtBits.NumStmts = 0;
   }
 
-  void setStmts(const ASTContext &C, Stmt **Stmts, unsigned NumStmts);
+  void setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts);
 
   bool body_empty() const { return CompoundStmtBits.NumStmts == 0; }
   unsigned size() const { return CompoundStmtBits.NumStmts; }
@@ -643,7 +627,8 @@ public:
   }
 
   const_child_range children() const {
-    return child_range(Body, Body + CompoundStmtBits.NumStmts);
+    return const_child_range(child_iterator(Body),
+                             child_iterator(Body + CompoundStmtBits.NumStmts));
   }
 };
 
@@ -840,18 +825,20 @@ class AttributedStmt : public Stmt {
   AttributedStmt(SourceLocation Loc, ArrayRef<const Attr*> Attrs, Stmt *SubStmt)
     : Stmt(AttributedStmtClass), SubStmt(SubStmt), AttrLoc(Loc),
       NumAttrs(Attrs.size()) {
-    memcpy(getAttrArrayPtr(), Attrs.data(), Attrs.size() * sizeof(Attr *));
+    std::copy(Attrs.begin(), Attrs.end(), getAttrArrayPtr());
   }
 
   explicit AttributedStmt(EmptyShell Empty, unsigned NumAttrs)
     : Stmt(AttributedStmtClass, Empty), NumAttrs(NumAttrs) {
-    memset(getAttrArrayPtr(), 0, NumAttrs * sizeof(Attr *));
+    std::fill_n(getAttrArrayPtr(), NumAttrs, nullptr);
   }
 
-  Attr *const *getAttrArrayPtr() const {
-    return reinterpret_cast<Attr *const *>(this + 1);
+  const Attr *const *getAttrArrayPtr() const {
+    return reinterpret_cast<const Attr *const *>(this + 1);
+  }
+  const Attr **getAttrArrayPtr() {
+    return reinterpret_cast<const Attr **>(this + 1);
   }
-  Attr **getAttrArrayPtr() { return reinterpret_cast<Attr **>(this + 1); }
 
 public:
   static AttributedStmt *Create(const ASTContext &C, SourceLocation Loc,
@@ -1239,7 +1226,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// IndirectGotoStmt - This represents an indirect goto.
@@ -1307,7 +1296,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// BreakStmt - This represents a break.
@@ -1335,7 +1326,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 
@@ -1390,7 +1383,7 @@ public:
   // Iterators
   child_range children() {
     if (RetExpr) return child_range(&RetExpr, &RetExpr+1);
-    return child_range();
+    return child_range(child_iterator(), child_iterator());
   }
 };
 
@@ -1974,7 +1967,9 @@ public:
   }
 
   // Iterators
-  child_range children() { return child_range(); }
+  child_range children() {
+    return child_range(child_iterator(), child_iterator());
+  }
 };
 
 /// \brief This captures a statement into a function. For example, the following
@@ -1993,6 +1988,7 @@ public:
   enum VariableCaptureKind {
     VCK_This,
     VCK_ByRef,
+    VCK_ByCopy,
     VCK_VLAType,
   };
 
@@ -2012,24 +2008,10 @@ public:
     /// \param Var The variable being captured, or null if capturing this.
     ///
     Capture(SourceLocation Loc, VariableCaptureKind Kind,
-            VarDecl *Var = nullptr)
-      : VarAndKind(Var, Kind), Loc(Loc) {
-      switch (Kind) {
-      case VCK_This:
-        assert(!Var && "'this' capture cannot have a variable!");
-        break;
-      case VCK_ByRef:
-        assert(Var && "capturing by reference must have a variable!");
-        break;
-      case VCK_VLAType:
-        assert(!Var &&
-               "Variable-length array type capture cannot have a variable!");
-        break;
-      }
-    }
+            VarDecl *Var = nullptr);
 
     /// \brief Determine the kind of capture.
-    VariableCaptureKind getCaptureKind() const { return VarAndKind.getInt(); }
+    VariableCaptureKind getCaptureKind() const;
 
     /// \brief Retrieve the source location at which the variable or 'this' was
     /// first used.
@@ -2038,9 +2020,14 @@ public:
     /// \brief Determine whether this capture handles the C++ 'this' pointer.
     bool capturesThis() const { return getCaptureKind() == VCK_This; }
 
-    /// \brief Determine whether this capture handles a variable.
+    /// \brief Determine whether this capture handles a variable (by reference).
     bool capturesVariable() const { return getCaptureKind() == VCK_ByRef; }
 
+    /// \brief Determine whether this capture handles a variable by copy.
+    bool capturesVariableByCopy() const {
+      return getCaptureKind() == VCK_ByCopy;
+    }
+
     /// \brief Determine whether this capture handles a variable-length array
     /// type.
     bool capturesVariableArrayType() const {
@@ -2050,11 +2037,8 @@ public:
     /// \brief Retrieve the declaration of the variable being captured.
     ///
     /// This operation is only valid if this capture captures a variable.
-    VarDecl *getCapturedVar() const {
-      assert(capturesVariable() &&
-             "No variable available for 'this' or VAT capture");
-      return VarAndKind.getPointer();
-    }
+    VarDecl *getCapturedVar() const;
+
     friend class ASTStmtReader;
   };
 
@@ -2076,8 +2060,10 @@ private:
   /// \brief Construct an empty captured statement.
   CapturedStmt(EmptyShell Empty, unsigned NumCaptures);
 
-  Stmt **getStoredStmts() const {
-    return reinterpret_cast<Stmt **>(const_cast<CapturedStmt *>(this) + 1);
+  Stmt **getStoredStmts() { return reinterpret_cast<Stmt **>(this + 1); }
+
+  Stmt *const *getStoredStmts() const {
+    return reinterpret_cast<Stmt *const *>(this + 1);
   }
 
   Capture *getStoredCaptures() const;
@@ -2096,31 +2082,20 @@ public:
 
   /// \brief Retrieve the statement being captured.
   Stmt *getCapturedStmt() { return getStoredStmts()[NumCaptures]; }
-  const Stmt *getCapturedStmt() const {
-    return const_cast<CapturedStmt *>(this)->getCapturedStmt();
-  }
+  const Stmt *getCapturedStmt() const { return getStoredStmts()[NumCaptures]; }
 
   /// \brief Retrieve the outlined function declaration.
-  CapturedDecl *getCapturedDecl() { return CapDeclAndKind.getPointer(); }
-  const CapturedDecl *getCapturedDecl() const {
-    return const_cast<CapturedStmt *>(this)->getCapturedDecl();
-  }
+  CapturedDecl *getCapturedDecl();
+  const CapturedDecl *getCapturedDecl() const;
 
   /// \brief Set the outlined function declaration.
-  void setCapturedDecl(CapturedDecl *D) {
-    assert(D && "null CapturedDecl");
-    CapDeclAndKind.setPointer(D);
-  }
+  void setCapturedDecl(CapturedDecl *D);
 
   /// \brief Retrieve the captured region kind.
-  CapturedRegionKind getCapturedRegionKind() const {
-    return CapDeclAndKind.getInt();
-  }
+  CapturedRegionKind getCapturedRegionKind() const;
 
   /// \brief Set the captured region kind.
-  void setCapturedRegionKind(CapturedRegionKind Kind) {
-    CapDeclAndKind.setInt(Kind);
-  }
+  void setCapturedRegionKind(CapturedRegionKind Kind);
 
   /// \brief Retrieve the record declaration for captured variables.
   const RecordDecl *getCapturedRecordDecl() const { return TheRecordDecl; }
@@ -2164,18 +2139,36 @@ public:
   typedef Expr **capture_init_iterator;
   typedef llvm::iterator_range<capture_init_iterator> capture_init_range;
 
-  capture_init_range capture_inits() const {
+  /// \brief Const iterator that walks over the capture initialization
+  /// arguments.
+  typedef Expr *const *const_capture_init_iterator;
+  typedef llvm::iterator_range<const_capture_init_iterator>
+      const_capture_init_range;
+
+  capture_init_range capture_inits() {
     return capture_init_range(capture_init_begin(), capture_init_end());
   }
 
+  const_capture_init_range capture_inits() const {
+    return const_capture_init_range(capture_init_begin(), capture_init_end());
+  }
+
   /// \brief Retrieve the first initialization argument.
-  capture_init_iterator capture_init_begin() const {
+  capture_init_iterator capture_init_begin() {
     return reinterpret_cast<Expr **>(getStoredStmts());
   }
 
+  const_capture_init_iterator capture_init_begin() const {
+    return reinterpret_cast<Expr *const *>(getStoredStmts());
+  }
+
   /// \brief Retrieve the iterator pointing one past the last initialization
   /// argument.
-  capture_init_iterator capture_init_end() const {
+  capture_init_iterator capture_init_end() {
+    return capture_init_begin() + NumCaptures;
+  }
+
+  const_capture_init_iterator capture_init_end() const {
     return capture_init_begin() + NumCaptures;
   }
 
diff --git a/include/clang/AST/StmtCXX.h b/include/clang/AST/StmtCXX.h
index 567a77288430..1ca73e207e4c 100644
--- a/include/clang/AST/StmtCXX.h
+++ b/include/clang/AST/StmtCXX.h
@@ -131,12 +131,16 @@ class CXXForRangeStmt : public Stmt {
   // SubExprs[RANGE] is an expression or declstmt.
   // SubExprs[COND] and SubExprs[INC] are expressions.
   Stmt *SubExprs[END];
+  SourceLocation CoawaitLoc;
   SourceLocation ColonLoc;
   SourceLocation RParenLoc;
+
+  friend class ASTStmtReader;
 public:
   CXXForRangeStmt(DeclStmt *Range, DeclStmt *BeginEnd,
                   Expr *Cond, Expr *Inc, DeclStmt *LoopVar, Stmt *Body,
-                  SourceLocation FL, SourceLocation CL, SourceLocation RPL);
+                  SourceLocation FL, SourceLocation CAL, SourceLocation CL,
+                  SourceLocation RPL);
   CXXForRangeStmt(EmptyShell Empty) : Stmt(CXXForRangeStmtClass, Empty) { }
 
 
@@ -181,13 +185,10 @@ public:
   void setLoopVarStmt(Stmt *S) { SubExprs[LOOPVAR] = S; }
   void setBody(Stmt *S) { SubExprs[BODY] = S; }
 
-
   SourceLocation getForLoc() const { return ForLoc; }
-  void setForLoc(SourceLocation Loc) { ForLoc = Loc; }
+  SourceLocation getCoawaitLoc() const { return CoawaitLoc; }
   SourceLocation getColonLoc() const { return ColonLoc; }
-  void setColonLoc(SourceLocation Loc) { ColonLoc = Loc; }
   SourceLocation getRParenLoc() const { return RParenLoc; }
-  void setRParenLoc(SourceLocation Loc) { RParenLoc = Loc; }
 
   SourceLocation getLocStart() const LLVM_READONLY { return ForLoc; }
   SourceLocation getLocEnd() const LLVM_READONLY {
@@ -287,6 +288,130 @@ public:
   }
 };
 
+/// \brief Represents the body of a coroutine. This wraps the normal function
+/// body and holds the additional semantic context required to set up and tear
+/// down the coroutine frame.
+class CoroutineBodyStmt : public Stmt {
+  enum SubStmt {
+    Body,          ///< The body of the coroutine.
+    Promise,       ///< The promise statement.
+    InitSuspend,   ///< The initial suspend statement, run before the body.
+    FinalSuspend,  ///< The final suspend statement, run after the body.
+    OnException,   ///< Handler for exceptions thrown in the body.
+    OnFallthrough, ///< Handler for control flow falling off the body.
+    ReturnValue,   ///< Return value for thunk function.
+    FirstParamMove ///< First offset for move construction of parameter copies.
+  };
+  Stmt *SubStmts[SubStmt::FirstParamMove];
+
+  friend class ASTStmtReader;
+public:
+  CoroutineBodyStmt(Stmt *Body, Stmt *Promise, Stmt *InitSuspend,
+                    Stmt *FinalSuspend, Stmt *OnException, Stmt *OnFallthrough,
+                    Expr *ReturnValue, ArrayRef<Expr *> ParamMoves)
+      : Stmt(CoroutineBodyStmtClass) {
+    SubStmts[CoroutineBodyStmt::Body] = Body;
+    SubStmts[CoroutineBodyStmt::Promise] = Promise;
+    SubStmts[CoroutineBodyStmt::InitSuspend] = InitSuspend;
+    SubStmts[CoroutineBodyStmt::FinalSuspend] = FinalSuspend;
+    SubStmts[CoroutineBodyStmt::OnException] = OnException;
+    SubStmts[CoroutineBodyStmt::OnFallthrough] = OnFallthrough;
+    SubStmts[CoroutineBodyStmt::ReturnValue] = ReturnValue;
+    // FIXME: Tail-allocate space for parameter move expressions and store them.
+    assert(ParamMoves.empty() && "not implemented yet");
+  }
+
+  /// \brief Retrieve the body of the coroutine as written. This will be either
+  /// a CompoundStmt or a TryStmt.
+  Stmt *getBody() const {
+    return SubStmts[SubStmt::Body];
+  }
+
+  Stmt *getPromiseDeclStmt() const { return SubStmts[SubStmt::Promise]; }
+  VarDecl *getPromiseDecl() const {
+    return cast<VarDecl>(cast<DeclStmt>(getPromiseDeclStmt())->getSingleDecl());
+  }
+
+  Stmt *getInitSuspendStmt() const { return SubStmts[SubStmt::InitSuspend]; }
+  Stmt *getFinalSuspendStmt() const { return SubStmts[SubStmt::FinalSuspend]; }
+
+  Stmt *getExceptionHandler() const { return SubStmts[SubStmt::OnException]; }
+  Stmt *getFallthroughHandler() const {
+    return SubStmts[SubStmt::OnFallthrough];
+  }
+
+  Expr *getReturnValueInit() const {
+    return cast<Expr>(SubStmts[SubStmt::ReturnValue]);
+  }
+
+  SourceLocation getLocStart() const LLVM_READONLY {
+    return getBody()->getLocStart();
+  }
+  SourceLocation getLocEnd() const LLVM_READONLY {
+    return getBody()->getLocEnd();
+  }
+
+  child_range children() {
+    return child_range(SubStmts, SubStmts + SubStmt::FirstParamMove);
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == CoroutineBodyStmtClass;
+  }
+};
+
+/// \brief Represents a 'co_return' statement in the C++ Coroutines TS.
+///
+/// This statament models the initialization of the coroutine promise
+/// (encapsulating the eventual notional return value) from an expression
+/// (or braced-init-list), followed by termination of the coroutine.
+///
+/// This initialization is modeled by the evaluation of the operand
+/// followed by a call to one of:
+///   <promise>.return_value(<operand>)
+///   <promise>.return_void()
+/// which we name the "promise call".
+class CoreturnStmt : public Stmt {
+  SourceLocation CoreturnLoc;
+
+  enum SubStmt { Operand, PromiseCall, Count };
+  Stmt *SubStmts[SubStmt::Count];
+
+  friend class ASTStmtReader;
+public:
+  CoreturnStmt(SourceLocation CoreturnLoc, Stmt *Operand, Stmt *PromiseCall)
+      : Stmt(CoreturnStmtClass), CoreturnLoc(CoreturnLoc) {
+    SubStmts[SubStmt::Operand] = Operand;
+    SubStmts[SubStmt::PromiseCall] = PromiseCall;
+  }
+
+  SourceLocation getKeywordLoc() const { return CoreturnLoc; }
+
+  /// \brief Retrieve the operand of the 'co_return' statement. Will be nullptr
+  /// if none was specified.
+  Expr *getOperand() const { return static_cast<Expr*>(SubStmts[Operand]); }
+
+  /// \brief Retrieve the promise call that results from this 'co_return'
+  /// statement. Will be nullptr if either the coroutine has not yet been
+  /// finalized or the coroutine has no eventual return type.
+  Expr *getPromiseCall() const {
+    return static_cast<Expr*>(SubStmts[PromiseCall]);
+  }
+
+  SourceLocation getLocStart() const LLVM_READONLY { return CoreturnLoc; }
+  SourceLocation getLocEnd() const LLVM_READONLY {
+    return getOperand()->getLocEnd();
+  }
+
+  child_range children() {
+    return child_range(SubStmts, SubStmts + SubStmt::Count);
+  }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == CoreturnStmtClass;
+  }
+};
+
 }  // end namespace clang
 
 #endif
diff --git a/include/clang/AST/StmtIterator.h b/include/clang/AST/StmtIterator.h
index a5a57af523d1..81f8ad4344a9 100644
--- a/include/clang/AST/StmtIterator.h
+++ b/include/clang/AST/StmtIterator.h
@@ -139,86 +139,6 @@ struct ConstStmtIterator : public StmtIteratorImpl<ConstStmtIterator,
     StmtIteratorImpl<ConstStmtIterator,const Stmt*>(RHS) {}
 };
 
-/// A range of statement iterators.
-///
-/// This class provides some extra functionality beyond std::pair
-/// in order to allow the following idiom:
-///   for (StmtRange range = stmt->children(); range; ++range)
-struct StmtRange : std::pair<StmtIterator,StmtIterator> {
-  StmtRange() {}
-  StmtRange(const StmtIterator &begin, const StmtIterator &end)
-    : std::pair<StmtIterator,StmtIterator>(begin, end) {}
-
-  bool empty() const { return first == second; }
-  explicit operator bool() const { return !empty(); }
-
-  Stmt *operator->() const { return first.operator->(); }
-  Stmt *&operator*() const { return first.operator*(); }
-
-  StmtRange &operator++() {
-    assert(!empty() && "incrementing on empty range");
-    ++first;
-    return *this;
-  }
-
-  StmtRange operator++(int) {
-    assert(!empty() && "incrementing on empty range");
-    StmtRange copy = *this;
-    ++first;
-    return copy;
-  }
-
-  friend const StmtIterator &begin(const StmtRange &range) {
-    return range.first;
-  }
-  friend const StmtIterator &end(const StmtRange &range) {
-    return range.second;
-  }
-};
-
-/// A range of const statement iterators.
-///
-/// This class provides some extra functionality beyond std::pair
-/// in order to allow the following idiom:
-///   for (ConstStmtRange range = stmt->children(); range; ++range)
-struct ConstStmtRange : std::pair<ConstStmtIterator,ConstStmtIterator> {
-  ConstStmtRange() {}
-  ConstStmtRange(const ConstStmtIterator &begin,
-                 const ConstStmtIterator &end)
-    : std::pair<ConstStmtIterator,ConstStmtIterator>(begin, end) {}
-  ConstStmtRange(const StmtRange &range)
-    : std::pair<ConstStmtIterator,ConstStmtIterator>(range.first, range.second)
-  {}
-  ConstStmtRange(const StmtIterator &begin, const StmtIterator &end)
-    : std::pair<ConstStmtIterator,ConstStmtIterator>(begin, end) {}
-
-  bool empty() const { return first == second; }
-  explicit operator bool() const { return !empty(); }
-
-  const Stmt *operator->() const { return first.operator->(); }
-  const Stmt *operator*() const { return first.operator*(); }
-
-  ConstStmtRange &operator++() {
-    assert(!empty() && "incrementing on empty range");
-    ++first;
-    return *this;
-  }
-
-  ConstStmtRange operator++(int) {
-    assert(!empty() && "incrementing on empty range");
-    ConstStmtRange copy = *this;
-    ++first;
-    return copy;
-  }
-
-  friend const ConstStmtIterator &begin(const ConstStmtRange &range) {
-    return range.first;
-  }
-  friend const ConstStmtIterator &end(const ConstStmtRange &range) {
-    return range.second;
-  }
-};
-
 } // end namespace clang
 
 #endif
diff --git a/include/clang/AST/StmtOpenMP.h b/include/clang/AST/StmtOpenMP.h
index 708b8667335e..1ba859c0b846 100644
--- a/include/clang/AST/StmtOpenMP.h
+++ b/include/clang/AST/StmtOpenMP.h
@@ -92,65 +92,78 @@ public:
   /// \brief Iterates over a filtered subrange of clauses applied to a
   /// directive.
   ///
-  /// This iterator visits only those declarations that meet some run-time
-  /// criteria.
-  template <class FilterPredicate> class filtered_clause_iterator {
-  protected:
-    ArrayRef<OMPClause *>::const_iterator Current;
+  /// This iterator visits only clauses of type SpecificClause.
+  template <typename SpecificClause>
+  class specific_clause_iterator
+      : public llvm::iterator_adaptor_base<
+            specific_clause_iterator<SpecificClause>,
+            ArrayRef<OMPClause *>::const_iterator, std::forward_iterator_tag,
+            const SpecificClause *, ptrdiff_t, const SpecificClause *,
+            const SpecificClause *> {
     ArrayRef<OMPClause *>::const_iterator End;
-    FilterPredicate Pred;
+
     void SkipToNextClause() {
-      while (Current != End && !Pred(*Current))
-        ++Current;
+      while (this->I != End && !isa<SpecificClause>(*this->I))
+        ++this->I;
     }
 
   public:
-    typedef const OMPClause *value_type;
-    filtered_clause_iterator() : Current(), End() {}
-    filtered_clause_iterator(ArrayRef<OMPClause *> Arr, FilterPredicate Pred)
-        : Current(Arr.begin()), End(Arr.end()), Pred(std::move(Pred)) {
+    explicit specific_clause_iterator(ArrayRef<OMPClause *> Clauses)
+        : specific_clause_iterator::iterator_adaptor_base(Clauses.begin()),
+          End(Clauses.end()) {
       SkipToNextClause();
     }
-    value_type operator*() const { return *Current; }
-    value_type operator->() const { return *Current; }
-    filtered_clause_iterator &operator++() {
-      ++Current;
+
+    const SpecificClause *operator*() const {
+      return cast<SpecificClause>(*this->I);
+    }
+    const SpecificClause *operator->() const { return **this; }
+
+    specific_clause_iterator &operator++() {
+      ++this->I;
       SkipToNextClause();
       return *this;
     }
-
-    filtered_clause_iterator operator++(int) {
-      filtered_clause_iterator tmp(*this);
-      ++(*this);
-      return tmp;
-    }
-
-    bool operator!() { return Current == End; }
-    explicit operator bool() { return Current != End; }
-    bool empty() const { return Current == End; }
   };
 
-  template <typename Fn>
-  filtered_clause_iterator<Fn> getFilteredClauses(Fn &&fn) const {
-    return filtered_clause_iterator<Fn>(clauses(), std::move(fn));
-  }
-  struct ClauseKindFilter {
-    OpenMPClauseKind Kind;
-    bool operator()(const OMPClause *clause) const {
-      return clause->getClauseKind() == Kind;
-    }
-  };
-  filtered_clause_iterator<ClauseKindFilter>
-  getClausesOfKind(OpenMPClauseKind Kind) const {
-    return getFilteredClauses(ClauseKindFilter{Kind});
+  template <typename SpecificClause>
+  static llvm::iterator_range<specific_clause_iterator<SpecificClause>>
+  getClausesOfKind(ArrayRef<OMPClause *> Clauses) {
+    return {specific_clause_iterator<SpecificClause>(Clauses),
+            specific_clause_iterator<SpecificClause>(
+                llvm::makeArrayRef(Clauses.end(), 0))};
   }
 
-  /// \brief Gets a single clause of the specified kind \a K associated with the
+  template <typename SpecificClause>
+  llvm::iterator_range<specific_clause_iterator<SpecificClause>>
+  getClausesOfKind() const {
+    return getClausesOfKind<SpecificClause>(clauses());
+  }
+
+  /// Gets a single clause of the specified kind associated with the
   /// current directive iff there is only one clause of this kind (and assertion
   /// is fired if there is more than one clause is associated with the
-  /// directive). Returns nullptr if no clause of kind \a K is associated with
+  /// directive). Returns nullptr if no clause of this kind is associated with
   /// the directive.
-  const OMPClause *getSingleClause(OpenMPClauseKind K) const;
+  template <typename SpecificClause>
+  const SpecificClause *getSingleClause() const {
+    auto Clauses = getClausesOfKind<SpecificClause>();
+
+    if (Clauses.begin() != Clauses.end()) {
+      assert(std::next(Clauses.begin()) == Clauses.end() &&
+             "There are at least 2 clauses of the specified kind");
+      return *Clauses.begin();
+    }
+    return nullptr;
+  }
+
+  /// Returns true if the current directive has one or more clauses of a
+  /// specific kind.
+  template <typename SpecificClause>
+  bool hasClausesOfKind() const {
+    auto Clauses = getClausesOfKind<SpecificClause>();
+    return Clauses.begin() != Clauses.end();
+  }
 
   /// \brief Returns starting location of directive kind.
   SourceLocation getLocStart() const { return StartLoc; }
@@ -195,7 +208,7 @@ public:
 
   child_range children() {
     if (!hasAssociatedStmt())
-      return child_range();
+      return child_range(child_iterator(), child_iterator());
     Stmt **ChildStorage = reinterpret_cast<Stmt **>(getClauses().end());
     return child_range(ChildStorage, ChildStorage + NumChildren);
   }
@@ -217,6 +230,10 @@ public:
 /// variables 'c' and 'd'.
 ///
 class OMPParallelDirective : public OMPExecutableDirective {
+  friend class ASTStmtReader;
+  /// \brief true if the construct has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive (directive keyword).
@@ -225,7 +242,8 @@ class OMPParallelDirective : public OMPExecutableDirective {
   OMPParallelDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                        unsigned NumClauses)
       : OMPExecutableDirective(this, OMPParallelDirectiveClass, OMPD_parallel,
-                               StartLoc, EndLoc, NumClauses, 1) {}
+                               StartLoc, EndLoc, NumClauses, 1),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -234,7 +252,11 @@ class OMPParallelDirective : public OMPExecutableDirective {
   explicit OMPParallelDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPParallelDirectiveClass, OMPD_parallel,
                                SourceLocation(), SourceLocation(), NumClauses,
-                               1) {}
+                               1),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -244,10 +266,11 @@ public:
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement associated with the directive.
+  /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPParallelDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
 
   /// \brief Creates an empty directive with the place for \a N clauses.
   ///
@@ -257,6 +280,9 @@ public:
   static OMPParallelDirective *CreateEmpty(const ASTContext &C,
                                            unsigned NumClauses, EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPParallelDirectiveClass;
   }
@@ -311,11 +337,18 @@ class OMPLoopDirective : public OMPExecutableDirective {
     return MutableArrayRef<Expr *>(Storage, CollapsedNum);
   }
 
+  /// \brief Get the private counters storage.
+  MutableArrayRef<Expr *> getPrivateCounters() {
+    Expr **Storage = reinterpret_cast<Expr **>(&*std::next(
+        child_begin(), getArraysOffset(getDirectiveKind()) + CollapsedNum));
+    return MutableArrayRef<Expr *>(Storage, CollapsedNum);
+  }
+
   /// \brief Get the updates storage.
   MutableArrayRef<Expr *> getInits() {
     Expr **Storage = reinterpret_cast<Expr **>(
         &*std::next(child_begin(),
-                    getArraysOffset(getDirectiveKind()) + CollapsedNum));
+                    getArraysOffset(getDirectiveKind()) + 2 * CollapsedNum));
     return MutableArrayRef<Expr *>(Storage, CollapsedNum);
   }
 
@@ -323,7 +356,7 @@ class OMPLoopDirective : public OMPExecutableDirective {
   MutableArrayRef<Expr *> getUpdates() {
     Expr **Storage = reinterpret_cast<Expr **>(
         &*std::next(child_begin(),
-                    getArraysOffset(getDirectiveKind()) + 2 * CollapsedNum));
+                    getArraysOffset(getDirectiveKind()) + 3 * CollapsedNum));
     return MutableArrayRef<Expr *>(Storage, CollapsedNum);
   }
 
@@ -331,7 +364,7 @@ class OMPLoopDirective : public OMPExecutableDirective {
   MutableArrayRef<Expr *> getFinals() {
     Expr **Storage = reinterpret_cast<Expr **>(
         &*std::next(child_begin(),
-                    getArraysOffset(getDirectiveKind()) + 3 * CollapsedNum));
+                    getArraysOffset(getDirectiveKind()) + 4 * CollapsedNum));
     return MutableArrayRef<Expr *>(Storage, CollapsedNum);
   }
 
@@ -358,15 +391,19 @@ protected:
 
   /// \brief Offset to the start of children expression arrays.
   static unsigned getArraysOffset(OpenMPDirectiveKind Kind) {
-    return isOpenMPWorksharingDirective(Kind) ? WorksharingEnd
-                                              : DefaultEnd;
+    return (isOpenMPWorksharingDirective(Kind) ||
+            isOpenMPTaskLoopDirective(Kind) ||
+            isOpenMPDistributeDirective(Kind))
+               ? WorksharingEnd
+               : DefaultEnd;
   }
 
   /// \brief Children number.
   static unsigned numLoopChildren(unsigned CollapsedNum,
                                   OpenMPDirectiveKind Kind) {
-    return getArraysOffset(Kind) +
-           4 * CollapsedNum; // Counters, Inits, Updates and Finals
+    return getArraysOffset(Kind) + 5 * CollapsedNum; // Counters,
+                                                     // PrivateCounters, Inits,
+                                                     // Updates and Finals
   }
 
   void setIterationVariable(Expr *IV) {
@@ -387,41 +424,56 @@ protected:
   void setInit(Expr *Init) { *std::next(child_begin(), InitOffset) = Init; }
   void setInc(Expr *Inc) { *std::next(child_begin(), IncOffset) = Inc; }
   void setIsLastIterVariable(Expr *IL) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), IsLastIterVariableOffset) = IL;
   }
   void setLowerBoundVariable(Expr *LB) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), LowerBoundVariableOffset) = LB;
   }
   void setUpperBoundVariable(Expr *UB) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), UpperBoundVariableOffset) = UB;
   }
   void setStrideVariable(Expr *ST) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), StrideVariableOffset) = ST;
   }
   void setEnsureUpperBound(Expr *EUB) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), EnsureUpperBoundOffset) = EUB;
   }
   void setNextLowerBound(Expr *NLB) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), NextLowerBoundOffset) = NLB;
   }
   void setNextUpperBound(Expr *NUB) {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind()) ||
+            isOpenMPDistributeDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     *std::next(child_begin(), NextUpperBoundOffset) = NUB;
   }
   void setCounters(ArrayRef<Expr *> A);
+  void setPrivateCounters(ArrayRef<Expr *> A);
   void setInits(ArrayRef<Expr *> A);
   void setUpdates(ArrayRef<Expr *> A);
   void setFinals(ArrayRef<Expr *> A);
@@ -462,6 +514,8 @@ public:
     Expr *NUB;
     /// \brief Counters Loop counters.
     SmallVector<Expr *, 4> Counters;
+    /// \brief PrivateCounters Loop counters.
+    SmallVector<Expr *, 4> PrivateCounters;
     /// \brief Expressions for loop counters inits for CodeGen.
     SmallVector<Expr *, 4> Inits;
     /// \brief Expressions for loop counters update for CodeGen.
@@ -495,11 +549,13 @@ public:
       NLB = nullptr;
       NUB = nullptr;
       Counters.resize(Size);
+      PrivateCounters.resize(Size);
       Inits.resize(Size);
       Updates.resize(Size);
       Finals.resize(Size);
       for (unsigned i = 0; i < Size; ++i) {
         Counters[i] = nullptr;
+        PrivateCounters[i] = nullptr;
         Inits[i] = nullptr;
         Updates[i] = nullptr;
         Finals[i] = nullptr;
@@ -539,43 +595,50 @@ public:
         reinterpret_cast<const Expr *>(*std::next(child_begin(), IncOffset)));
   }
   Expr *getIsLastIterVariable() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), IsLastIterVariableOffset)));
   }
   Expr *getLowerBoundVariable() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), LowerBoundVariableOffset)));
   }
   Expr *getUpperBoundVariable() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), UpperBoundVariableOffset)));
   }
   Expr *getStrideVariable() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), StrideVariableOffset)));
   }
   Expr *getEnsureUpperBound() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), EnsureUpperBoundOffset)));
   }
   Expr *getNextLowerBound() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), NextLowerBoundOffset)));
   }
   Expr *getNextUpperBound() const {
-    assert(isOpenMPWorksharingDirective(getDirectiveKind()) &&
+    assert((isOpenMPWorksharingDirective(getDirectiveKind()) ||
+            isOpenMPTaskLoopDirective(getDirectiveKind())) &&
            "expected worksharing loop directive");
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), NextUpperBoundOffset)));
@@ -597,6 +660,12 @@ public:
     return const_cast<OMPLoopDirective *>(this)->getCounters();
   }
 
+  ArrayRef<Expr *> private_counters() { return getPrivateCounters(); }
+
+  ArrayRef<Expr *> private_counters() const {
+    return const_cast<OMPLoopDirective *>(this)->getPrivateCounters();
+  }
+
   ArrayRef<Expr *> inits() { return getInits(); }
 
   ArrayRef<Expr *> inits() const {
@@ -620,7 +689,10 @@ public:
            T->getStmtClass() == OMPForDirectiveClass ||
            T->getStmtClass() == OMPForSimdDirectiveClass ||
            T->getStmtClass() == OMPParallelForDirectiveClass ||
-           T->getStmtClass() == OMPParallelForSimdDirectiveClass;
+           T->getStmtClass() == OMPParallelForSimdDirectiveClass ||
+           T->getStmtClass() == OMPTaskLoopDirectiveClass ||
+           T->getStmtClass() == OMPTaskLoopSimdDirectiveClass ||
+           T->getStmtClass() == OMPDistributeDirectiveClass;
   }
 };
 
@@ -700,6 +772,10 @@ public:
 ///
 class OMPForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
+
+  /// \brief true if current directive has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -710,7 +786,8 @@ class OMPForDirective : public OMPLoopDirective {
   OMPForDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                   unsigned CollapsedNum, unsigned NumClauses)
       : OMPLoopDirective(this, OMPForDirectiveClass, OMPD_for, StartLoc, EndLoc,
-                         CollapsedNum, NumClauses) {}
+                         CollapsedNum, NumClauses),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -719,7 +796,11 @@ class OMPForDirective : public OMPLoopDirective {
   ///
   explicit OMPForDirective(unsigned CollapsedNum, unsigned NumClauses)
       : OMPLoopDirective(this, OMPForDirectiveClass, OMPD_for, SourceLocation(),
-                         SourceLocation(), CollapsedNum, NumClauses) {}
+                         SourceLocation(), CollapsedNum, NumClauses),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -731,12 +812,13 @@ public:
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc,
                                  SourceLocation EndLoc, unsigned CollapsedNum,
                                  ArrayRef<OMPClause *> Clauses,
-                                 Stmt *AssociatedStmt,
-                                 const HelperExprs &Exprs);
+                                 Stmt *AssociatedStmt, const HelperExprs &Exprs,
+                                 bool HasCancel);
 
   /// \brief Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -748,6 +830,9 @@ public:
   static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                       unsigned CollapsedNum, EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPForDirectiveClass;
   }
@@ -829,6 +914,10 @@ public:
 ///
 class OMPSectionsDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+
+  /// \brief true if current directive has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -838,7 +927,8 @@ class OMPSectionsDirective : public OMPExecutableDirective {
   OMPSectionsDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                        unsigned NumClauses)
       : OMPExecutableDirective(this, OMPSectionsDirectiveClass, OMPD_sections,
-                               StartLoc, EndLoc, NumClauses, 1) {}
+                               StartLoc, EndLoc, NumClauses, 1),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -847,7 +937,11 @@ class OMPSectionsDirective : public OMPExecutableDirective {
   explicit OMPSectionsDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPSectionsDirectiveClass, OMPD_sections,
                                SourceLocation(), SourceLocation(), NumClauses,
-                               1) {}
+                               1),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -857,10 +951,11 @@ public:
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param HasCancel true if current directive has inner directive.
   ///
   static OMPSectionsDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
 
   /// \brief Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -871,6 +966,9 @@ public:
   static OMPSectionsDirective *CreateEmpty(const ASTContext &C,
                                            unsigned NumClauses, EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPSectionsDirectiveClass;
   }
@@ -884,6 +982,10 @@ public:
 ///
 class OMPSectionDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+
+  /// \brief true if current directive has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -891,13 +993,15 @@ class OMPSectionDirective : public OMPExecutableDirective {
   ///
   OMPSectionDirective(SourceLocation StartLoc, SourceLocation EndLoc)
       : OMPExecutableDirective(this, OMPSectionDirectiveClass, OMPD_section,
-                               StartLoc, EndLoc, 0, 1) {}
+                               StartLoc, EndLoc, 0, 1),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
   explicit OMPSectionDirective()
       : OMPExecutableDirective(this, OMPSectionDirectiveClass, OMPD_section,
-                               SourceLocation(), SourceLocation(), 0, 1) {}
+                               SourceLocation(), SourceLocation(), 0, 1),
+        HasCancel(false) {}
 
 public:
   /// \brief Creates directive.
@@ -906,11 +1010,12 @@ public:
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param HasCancel true if current directive has inner directive.
   ///
   static OMPSectionDirective *Create(const ASTContext &C,
                                      SourceLocation StartLoc,
                                      SourceLocation EndLoc,
-                                     Stmt *AssociatedStmt);
+                                     Stmt *AssociatedStmt, bool HasCancel);
 
   /// \brief Creates an empty directive.
   ///
@@ -918,6 +1023,12 @@ public:
   ///
   static OMPSectionDirective *CreateEmpty(const ASTContext &C, EmptyShell);
 
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
+
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPSectionDirectiveClass;
   }
@@ -1042,18 +1153,22 @@ class OMPCriticalDirective : public OMPExecutableDirective {
   /// \param Name Name of the directive.
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending location of the directive.
+  /// \param NumClauses Number of clauses.
   ///
   OMPCriticalDirective(const DeclarationNameInfo &Name, SourceLocation StartLoc,
-                       SourceLocation EndLoc)
+                       SourceLocation EndLoc, unsigned NumClauses)
       : OMPExecutableDirective(this, OMPCriticalDirectiveClass, OMPD_critical,
-                               StartLoc, EndLoc, 0, 1),
+                               StartLoc, EndLoc, NumClauses, 1),
         DirName(Name) {}
 
   /// \brief Build an empty directive.
   ///
-  explicit OMPCriticalDirective()
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPCriticalDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPCriticalDirectiveClass, OMPD_critical,
-                               SourceLocation(), SourceLocation(), 0, 1),
+                               SourceLocation(), SourceLocation(), NumClauses,
+                               1),
         DirName() {}
 
   /// \brief Set name of the directive.
@@ -1069,17 +1184,21 @@ public:
   /// \param Name Name of the directive.
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
+  /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   ///
   static OMPCriticalDirective *
   Create(const ASTContext &C, const DeclarationNameInfo &Name,
-         SourceLocation StartLoc, SourceLocation EndLoc, Stmt *AssociatedStmt);
+         SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
 
   /// \brief Creates an empty directive.
   ///
   /// \param C AST context.
+  /// \param NumClauses Number of clauses.
   ///
-  static OMPCriticalDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+  static OMPCriticalDirective *CreateEmpty(const ASTContext &C,
+                                           unsigned NumClauses, EmptyShell);
 
   /// \brief Return name of the directive.
   ///
@@ -1101,6 +1220,10 @@ public:
 ///
 class OMPParallelForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
+
+  /// \brief true if current region has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -1111,7 +1234,8 @@ class OMPParallelForDirective : public OMPLoopDirective {
   OMPParallelForDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                           unsigned CollapsedNum, unsigned NumClauses)
       : OMPLoopDirective(this, OMPParallelForDirectiveClass, OMPD_parallel_for,
-                         StartLoc, EndLoc, CollapsedNum, NumClauses) {}
+                         StartLoc, EndLoc, CollapsedNum, NumClauses),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -1121,7 +1245,11 @@ class OMPParallelForDirective : public OMPLoopDirective {
   explicit OMPParallelForDirective(unsigned CollapsedNum, unsigned NumClauses)
       : OMPLoopDirective(this, OMPParallelForDirectiveClass, OMPD_parallel_for,
                          SourceLocation(), SourceLocation(), CollapsedNum,
-                         NumClauses) {}
+                         NumClauses),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -1133,11 +1261,12 @@ public:
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
 
   /// \brief Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -1151,6 +1280,9 @@ public:
                                               unsigned CollapsedNum,
                                               EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPParallelForDirectiveClass;
   }
@@ -1236,6 +1368,10 @@ public:
 ///
 class OMPParallelSectionsDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+
+  /// \brief true if current directive has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -1246,7 +1382,8 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
                                unsigned NumClauses)
       : OMPExecutableDirective(this, OMPParallelSectionsDirectiveClass,
                                OMPD_parallel_sections, StartLoc, EndLoc,
-                               NumClauses, 1) {}
+                               NumClauses, 1),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -1255,7 +1392,11 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
   explicit OMPParallelSectionsDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPParallelSectionsDirectiveClass,
                                OMPD_parallel_sections, SourceLocation(),
-                               SourceLocation(), NumClauses, 1) {}
+                               SourceLocation(), NumClauses, 1),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -1265,10 +1406,11 @@ public:
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPParallelSectionsDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
 
   /// \brief Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -1279,6 +1421,9 @@ public:
   static OMPParallelSectionsDirective *
   CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPParallelSectionsDirectiveClass;
   }
@@ -1294,6 +1439,9 @@ public:
 ///
 class OMPTaskDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+  /// \brief true if this directive has inner cancel directive.
+  bool HasCancel;
+
   /// \brief Build directive with the given start and end location.
   ///
   /// \param StartLoc Starting location of the directive kind.
@@ -1303,7 +1451,8 @@ class OMPTaskDirective : public OMPExecutableDirective {
   OMPTaskDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                    unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTaskDirectiveClass, OMPD_task, StartLoc,
-                               EndLoc, NumClauses, 1) {}
+                               EndLoc, NumClauses, 1),
+        HasCancel(false) {}
 
   /// \brief Build an empty directive.
   ///
@@ -1312,7 +1461,11 @@ class OMPTaskDirective : public OMPExecutableDirective {
   explicit OMPTaskDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPTaskDirectiveClass, OMPD_task,
                                SourceLocation(), SourceLocation(), NumClauses,
-                               1) {}
+                               1),
+        HasCancel(false) {}
+
+  /// \brief Set cancel state.
+  void setHasCancel(bool Has) { HasCancel = Has; }
 
 public:
   /// \brief Creates directive with a list of \a Clauses.
@@ -1322,11 +1475,12 @@ public:
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param HasCancel true, if current directive has inner cancel directive.
   ///
   static OMPTaskDirective *Create(const ASTContext &C, SourceLocation StartLoc,
                                   SourceLocation EndLoc,
                                   ArrayRef<OMPClause *> Clauses,
-                                  Stmt *AssociatedStmt);
+                                  Stmt *AssociatedStmt, bool HasCancel);
 
   /// \brief Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -1337,6 +1491,9 @@ public:
   static OMPTaskDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                        EmptyShell);
 
+  /// \brief Return true if current directive has inner cancel directive.
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPTaskDirectiveClass;
   }
@@ -1592,16 +1749,21 @@ class OMPOrderedDirective : public OMPExecutableDirective {
   ///
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending location of the directive.
+  /// \param NumClauses Number of clauses.
   ///
-  OMPOrderedDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+  OMPOrderedDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                      unsigned NumClauses)
       : OMPExecutableDirective(this, OMPOrderedDirectiveClass, OMPD_ordered,
-                               StartLoc, EndLoc, 0, 1) {}
+                               StartLoc, EndLoc, NumClauses, 1) {}
 
   /// \brief Build an empty directive.
   ///
-  explicit OMPOrderedDirective()
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPOrderedDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPOrderedDirectiveClass, OMPD_ordered,
-                               SourceLocation(), SourceLocation(), 0, 1) {}
+                               SourceLocation(), SourceLocation(), NumClauses,
+                               1) {}
 
 public:
   /// \brief Creates directive.
@@ -1609,18 +1771,20 @@ public:
   /// \param C AST context.
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
+  /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   ///
-  static OMPOrderedDirective *Create(const ASTContext &C,
-                                     SourceLocation StartLoc,
-                                     SourceLocation EndLoc,
-                                     Stmt *AssociatedStmt);
+  static OMPOrderedDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
 
   /// \brief Creates an empty directive.
   ///
   /// \param C AST context.
+  /// \param NumClauses Number of clauses.
   ///
-  static OMPOrderedDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+  static OMPOrderedDirective *CreateEmpty(const ASTContext &C,
+                                          unsigned NumClauses, EmptyShell);
 
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPOrderedDirectiveClass;
@@ -1817,6 +1981,64 @@ public:
   }
 };
 
+/// \brief This represents '#pragma omp target data' directive.
+///
+/// \code
+/// #pragma omp target data device(0) if(a) map(b[:])
+/// \endcode
+/// In this example directive '#pragma omp target data' has clauses 'device'
+/// with the value '0', 'if' with condition 'a' and 'map' with array
+/// section 'b[:]'.
+///
+class OMPTargetDataDirective : public OMPExecutableDirective {
+  friend class ASTStmtReader;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param NumClauses The number of clauses.
+  ///
+  OMPTargetDataDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                         unsigned NumClauses)
+      : OMPExecutableDirective(this, OMPTargetDataDirectiveClass, 
+                               OMPD_target_data, StartLoc, EndLoc, NumClauses,
+                               1) {}
+
+  /// \brief Build an empty directive.
+  ///
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPTargetDataDirective(unsigned NumClauses)
+      : OMPExecutableDirective(this, OMPTargetDataDirectiveClass, 
+                               OMPD_target_data, SourceLocation(),
+                               SourceLocation(), NumClauses, 1) {}
+
+public:
+  /// \brief Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  ///
+  static OMPTargetDataDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+
+  /// \brief Creates an empty directive with the place for \a N clauses.
+  ///
+  /// \param C AST context.
+  /// \param N The number of clauses.
+  ///
+  static OMPTargetDataDirective *CreateEmpty(const ASTContext &C, unsigned N,
+                                             EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPTargetDataDirectiveClass;
+  }
+};
+
 /// \brief This represents '#pragma omp teams' directive.
 ///
 /// \code
@@ -1947,17 +2169,21 @@ class OMPCancelDirective : public OMPExecutableDirective {
   ///
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending location of the directive.
+  /// \param NumClauses Number of clauses.
   ///
-  OMPCancelDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+  OMPCancelDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                     unsigned NumClauses)
       : OMPExecutableDirective(this, OMPCancelDirectiveClass, OMPD_cancel,
-                               StartLoc, EndLoc, 0, 0),
+                               StartLoc, EndLoc, NumClauses, 0),
         CancelRegion(OMPD_unknown) {}
 
   /// \brief Build an empty directive.
   ///
-  explicit OMPCancelDirective()
+  /// \param NumClauses Number of clauses.
+  explicit OMPCancelDirective(unsigned NumClauses)
       : OMPExecutableDirective(this, OMPCancelDirectiveClass, OMPD_cancel,
-                               SourceLocation(), SourceLocation(), 0, 0),
+                               SourceLocation(), SourceLocation(), NumClauses,
+                               0),
         CancelRegion(OMPD_unknown) {}
 
   /// \brief Set cancel region for current cancellation point.
@@ -1970,17 +2196,19 @@ public:
   /// \param C AST context.
   /// \param StartLoc Starting location of the directive kind.
   /// \param EndLoc Ending Location of the directive.
+  /// \param Clauses List of clauses.
   ///
-  static OMPCancelDirective *Create(const ASTContext &C,
-                                    SourceLocation StartLoc,
-                                    SourceLocation EndLoc,
-                                    OpenMPDirectiveKind CancelRegion);
+  static OMPCancelDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, OpenMPDirectiveKind CancelRegion);
 
   /// \brief Creates an empty directive.
   ///
   /// \param C AST context.
+  /// \param NumClauses Number of clauses.
   ///
-  static OMPCancelDirective *CreateEmpty(const ASTContext &C, EmptyShell);
+  static OMPCancelDirective *CreateEmpty(const ASTContext &C,
+                                         unsigned NumClauses, EmptyShell);
 
   /// \brief Get cancellation region for the current cancellation point.
   OpenMPDirectiveKind getCancelRegion() const { return CancelRegion; }
@@ -1990,6 +2218,205 @@ public:
   }
 };
 
+/// \brief This represents '#pragma omp taskloop' directive.
+///
+/// \code
+/// #pragma omp taskloop private(a,b) grainsize(val) num_tasks(num)
+/// \endcode
+/// In this example directive '#pragma omp taskloop' has clauses 'private'
+/// with the variables 'a' and 'b', 'grainsize' with expression 'val' and
+/// 'num_tasks' with expression 'num'.
+///
+class OMPTaskLoopDirective : public OMPLoopDirective {
+  friend class ASTStmtReader;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPTaskLoopDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                       unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPTaskLoopDirectiveClass, OMPD_taskloop,
+                         StartLoc, EndLoc, CollapsedNum, NumClauses) {}
+
+  /// \brief Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPTaskLoopDirective(unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPTaskLoopDirectiveClass, OMPD_taskloop,
+                         SourceLocation(), SourceLocation(), CollapsedNum,
+                         NumClauses) {}
+
+public:
+  /// \brief Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPTaskLoopDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// \brief Creates an empty directive with the place
+  /// for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPTaskLoopDirective *CreateEmpty(const ASTContext &C,
+                                           unsigned NumClauses,
+                                           unsigned CollapsedNum, EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPTaskLoopDirectiveClass;
+  }
+};
+
+/// \brief This represents '#pragma omp taskloop simd' directive.
+///
+/// \code
+/// #pragma omp taskloop simd private(a,b) grainsize(val) num_tasks(num)
+/// \endcode
+/// In this example directive '#pragma omp taskloop simd' has clauses 'private'
+/// with the variables 'a' and 'b', 'grainsize' with expression 'val' and
+/// 'num_tasks' with expression 'num'.
+///
+class OMPTaskLoopSimdDirective : public OMPLoopDirective {
+  friend class ASTStmtReader;
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPTaskLoopSimdDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                           unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPTaskLoopSimdDirectiveClass,
+                         OMPD_taskloop_simd, StartLoc, EndLoc, CollapsedNum,
+                         NumClauses) {}
+
+  /// \brief Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPTaskLoopSimdDirective(unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPTaskLoopSimdDirectiveClass,
+                         OMPD_taskloop_simd, SourceLocation(), SourceLocation(),
+                         CollapsedNum, NumClauses) {}
+
+public:
+  /// \brief Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPTaskLoopSimdDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// \brief Creates an empty directive with the place
+  /// for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPTaskLoopSimdDirective *CreateEmpty(const ASTContext &C,
+                                               unsigned NumClauses,
+                                               unsigned CollapsedNum,
+                                               EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPTaskLoopSimdDirectiveClass;
+  }
+};
+
+/// \brief This represents '#pragma omp distribute' directive.
+///
+/// \code
+/// #pragma omp distribute private(a,b)
+/// \endcode
+/// In this example directive '#pragma omp distribute' has clauses 'private'
+/// with the variables 'a' and 'b'
+///
+class OMPDistributeDirective : public OMPLoopDirective {
+  friend class ASTStmtReader;
+
+  /// \brief Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPDistributeDirective(SourceLocation StartLoc, SourceLocation EndLoc,
+                         unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPDistributeDirectiveClass, OMPD_distribute,
+                         StartLoc, EndLoc, CollapsedNum, NumClauses)
+        {}
+
+  /// \brief Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPDistributeDirective(unsigned CollapsedNum, unsigned NumClauses)
+      : OMPLoopDirective(this, OMPDistributeDirectiveClass, OMPD_distribute,
+                         SourceLocation(), SourceLocation(), CollapsedNum,
+                         NumClauses)
+        {}
+
+public:
+  /// \brief Creates directive with a list of \a Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+    ///
+  static OMPDistributeDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// \brief Creates an empty directive with the place
+  /// for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPDistributeDirective *CreateEmpty(const ASTContext &C,
+                                             unsigned NumClauses,
+                                             unsigned CollapsedNum, EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPDistributeDirectiveClass;
+  }
+};
+
 } // end namespace clang
 
 #endif
diff --git a/include/clang/AST/StmtVisitor.h b/include/clang/AST/StmtVisitor.h
index c71af38b61b6..df4a2d8bc3d7 100644
--- a/include/clang/AST/StmtVisitor.h
+++ b/include/clang/AST/StmtVisitor.h
@@ -16,6 +16,7 @@
 
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
 #include "clang/AST/StmtOpenMP.h"
@@ -93,6 +94,7 @@ public:
       case UO_Real:      DISPATCH(UnaryReal,      UnaryOperator);
       case UO_Imag:      DISPATCH(UnaryImag,      UnaryOperator);
       case UO_Extension: DISPATCH(UnaryExtension, UnaryOperator);
+      case UO_Coawait:   DISPATCH(UnaryCoawait,   UnaryOperator);
       }
     }
 
@@ -157,7 +159,7 @@ public:
   UNARYOP_FALLBACK(Plus)      UNARYOP_FALLBACK(Minus)
   UNARYOP_FALLBACK(Not)       UNARYOP_FALLBACK(LNot)
   UNARYOP_FALLBACK(Real)      UNARYOP_FALLBACK(Imag)
-  UNARYOP_FALLBACK(Extension)
+  UNARYOP_FALLBACK(Extension) UNARYOP_FALLBACK(Coawait)
 #undef UNARYOP_FALLBACK
 
   // Base case, ignore it. :)
diff --git a/include/clang/AST/TemplateBase.h b/include/clang/AST/TemplateBase.h
index 1d01753c10fe..f87171a81d1d 100644
--- a/include/clang/AST/TemplateBase.h
+++ b/include/clang/AST/TemplateBase.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TrailingObjects.h"
 
 namespace llvm {
   class FoldingSetNodeID;
@@ -198,22 +199,19 @@ public:
   ///
   /// We assume that storage for the template arguments provided
   /// outlives the TemplateArgument itself.
-  TemplateArgument(const TemplateArgument *Args, unsigned NumArgs) {
+  explicit TemplateArgument(ArrayRef<TemplateArgument> Args) {
     this->Args.Kind = Pack;
-    this->Args.Args = Args;
-    this->Args.NumArgs = NumArgs;
+    this->Args.Args = Args.data();
+    this->Args.NumArgs = Args.size();
   }
 
-  static TemplateArgument getEmptyPack() {
-    return TemplateArgument((TemplateArgument*)nullptr, 0);
-  }
+  static TemplateArgument getEmptyPack() { return TemplateArgument(None); }
 
   /// \brief Create a new template argument pack by copying the given set of
   /// template arguments.
   static TemplateArgument CreatePackCopy(ASTContext &Context,
-                                         const TemplateArgument *Args,
-                                         unsigned NumArgs);
-  
+                                         ArrayRef<TemplateArgument> Args);
+
   /// \brief Return the kind of stored template argument.
   ArgKind getKind() const { return (ArgKind)TypeOrValue.Kind; }
 
@@ -523,7 +521,7 @@ class TemplateArgumentListInfo {
 
   // This can leak if used in an AST node, use ASTTemplateArgumentListInfo
   // instead.
-  void* operator new(size_t bytes, ASTContext& C);
+  void *operator new(size_t bytes, ASTContext &C) = delete;
 
 public:
   TemplateArgumentListInfo() {}
@@ -544,6 +542,10 @@ public:
     return Arguments.data();
   }
 
+  llvm::ArrayRef<TemplateArgumentLoc> arguments() const {
+    return Arguments;
+  }
+
   const TemplateArgumentLoc &operator[](unsigned I) const {
     return Arguments[I];
   }
@@ -561,84 +563,72 @@ public:
 /// the "<int>" in "sort<int>".
 /// This is safe to be used inside an AST node, in contrast with
 /// TemplateArgumentListInfo.
-struct ASTTemplateArgumentListInfo {
+struct ASTTemplateArgumentListInfo final
+    : private llvm::TrailingObjects<ASTTemplateArgumentListInfo,
+                                    TemplateArgumentLoc> {
+private:
+  friend TrailingObjects;
+
+  ASTTemplateArgumentListInfo(const TemplateArgumentListInfo &List);
+
+public:
   /// \brief The source location of the left angle bracket ('<').
   SourceLocation LAngleLoc;
-  
+
   /// \brief The source location of the right angle bracket ('>').
   SourceLocation RAngleLoc;
-  
-  union {
-    /// \brief The number of template arguments in TemplateArgs.
-    /// The actual template arguments (if any) are stored after the
-    /// ExplicitTemplateArgumentList structure.
-    unsigned NumTemplateArgs;
 
-    /// Force ASTTemplateArgumentListInfo to the right alignment
-    /// for the following array of TemplateArgumentLocs.
-    llvm::AlignedCharArray<
-        llvm::AlignOf<TemplateArgumentLoc>::Alignment, 1> Aligner;
-  };
+  /// \brief The number of template arguments in TemplateArgs.
+  unsigned NumTemplateArgs;
 
-  /// \brief Retrieve the template arguments
-  TemplateArgumentLoc *getTemplateArgs() {
-    return reinterpret_cast<TemplateArgumentLoc *> (this + 1);
-  }
-  
   /// \brief Retrieve the template arguments
   const TemplateArgumentLoc *getTemplateArgs() const {
-    return reinterpret_cast<const TemplateArgumentLoc *> (this + 1);
+    return getTrailingObjects<TemplateArgumentLoc>();
   }
 
   const TemplateArgumentLoc &operator[](unsigned I) const {
     return getTemplateArgs()[I];
   }
 
-  static const ASTTemplateArgumentListInfo *Create(ASTContext &C,
-                                          const TemplateArgumentListInfo &List);
-
-  void initializeFrom(const TemplateArgumentListInfo &List);
-  void initializeFrom(const TemplateArgumentListInfo &List,
-                      bool &Dependent, bool &InstantiationDependent,
-                      bool &ContainsUnexpandedParameterPack);
-  void copyInto(TemplateArgumentListInfo &List) const;
-  static std::size_t sizeFor(unsigned NumTemplateArgs);
+  static const ASTTemplateArgumentListInfo *
+  Create(ASTContext &C, const TemplateArgumentListInfo &List);
 };
 
-/// \brief Extends ASTTemplateArgumentListInfo with the source location
-/// information for the template keyword; this is used as part of the
-/// representation of qualified identifiers, such as S<T>::template apply<T>.
-struct ASTTemplateKWAndArgsInfo : public ASTTemplateArgumentListInfo {
-  typedef ASTTemplateArgumentListInfo Base;
+/// \brief Represents an explicit template argument list in C++, e.g.,
+/// the "<int>" in "sort<int>".
+///
+/// It is intended to be used as a trailing object on AST nodes, and
+/// as such, doesn't contain the array of TemplateArgumentLoc itself,
+/// but expects the containing object to also provide storage for
+/// that.
+struct LLVM_ALIGNAS(LLVM_PTR_SIZE) ASTTemplateKWAndArgsInfo {
+  /// \brief The source location of the left angle bracket ('<').
+  SourceLocation LAngleLoc;
 
-  // NOTE: the source location of the (optional) template keyword is
-  // stored after all template arguments.
+  /// \brief The source location of the right angle bracket ('>').
+  SourceLocation RAngleLoc;
 
-  /// \brief Get the source location of the template keyword.
-  SourceLocation getTemplateKeywordLoc() const {
-    return *reinterpret_cast<const SourceLocation*>
-      (getTemplateArgs() + NumTemplateArgs);
-  }
+  /// \brief The source location of the template keyword; this is used
+  /// as part of the representation of qualified identifiers, such as
+  /// S<T>::template apply<T>.  Will be empty if this expression does
+  /// not have a template keyword.
+  SourceLocation TemplateKWLoc;
 
-  /// \brief Sets the source location of the template keyword.
-  void setTemplateKeywordLoc(SourceLocation TemplateKWLoc) {
-    *reinterpret_cast<SourceLocation*>
-      (getTemplateArgs() + NumTemplateArgs) = TemplateKWLoc;
-  }
+  /// \brief The number of template arguments in TemplateArgs.
+  unsigned NumTemplateArgs;
 
-  static const ASTTemplateKWAndArgsInfo*
-  Create(ASTContext &C, SourceLocation TemplateKWLoc,
-         const TemplateArgumentListInfo &List);
-
-  void initializeFrom(SourceLocation TemplateKWLoc,
-                      const TemplateArgumentListInfo &List);
   void initializeFrom(SourceLocation TemplateKWLoc,
                       const TemplateArgumentListInfo &List,
-                      bool &Dependent, bool &InstantiationDependent,
+                      TemplateArgumentLoc *OutArgArray);
+  void initializeFrom(SourceLocation TemplateKWLoc,
+                      const TemplateArgumentListInfo &List,
+                      TemplateArgumentLoc *OutArgArray, bool &Dependent,
+                      bool &InstantiationDependent,
                       bool &ContainsUnexpandedParameterPack);
   void initializeFrom(SourceLocation TemplateKWLoc);
 
-  static std::size_t sizeFor(unsigned NumTemplateArgs);
+  void copyInto(const TemplateArgumentLoc *ArgArray,
+                TemplateArgumentListInfo &List) const;
 };
 
 const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
diff --git a/include/clang/AST/TemplateName.h b/include/clang/AST/TemplateName.h
index f3d23b9260ae..3e10d2fc4ad2 100644
--- a/include/clang/AST/TemplateName.h
+++ b/include/clang/AST/TemplateName.h
@@ -180,9 +180,7 @@ class TemplateName {
 
   StorageType Storage;
 
-  explicit TemplateName(void *Ptr) {
-    Storage = StorageType::getFromOpaqueValue(Ptr);
-  }
+  explicit TemplateName(void *Ptr);
 
 public:
   // \brief Kind of name that is actually stored.
@@ -207,17 +205,15 @@ public:
   };
 
   TemplateName() : Storage() { }
-  explicit TemplateName(TemplateDecl *Template) : Storage(Template) { }
-  explicit TemplateName(OverloadedTemplateStorage *Storage)
-    : Storage(Storage) { }
+  explicit TemplateName(TemplateDecl *Template);
+  explicit TemplateName(OverloadedTemplateStorage *Storage);
   explicit TemplateName(SubstTemplateTemplateParmStorage *Storage);
-  explicit TemplateName(SubstTemplateTemplateParmPackStorage *Storage)
-    : Storage(Storage) { }
-  explicit TemplateName(QualifiedTemplateName *Qual) : Storage(Qual) { }
-  explicit TemplateName(DependentTemplateName *Dep) : Storage(Dep) { }
+  explicit TemplateName(SubstTemplateTemplateParmPackStorage *Storage);
+  explicit TemplateName(QualifiedTemplateName *Qual);
+  explicit TemplateName(DependentTemplateName *Dep);
 
   /// \brief Determine whether this template name is NULL.
-  bool isNull() const { return Storage.isNull(); }
+  bool isNull() const;
   
   // \brief Get the kind of name that is actually stored.
   NameKind getKind() const;
@@ -238,26 +234,14 @@ public:
   /// name refers to, if known. If the template name does not refer to a
   /// specific set of function templates because it is a dependent name or
   /// refers to a single template, returns NULL.
-  OverloadedTemplateStorage *getAsOverloadedTemplate() const {
-    if (UncommonTemplateNameStorage *Uncommon = 
-                              Storage.dyn_cast<UncommonTemplateNameStorage *>())
-      return Uncommon->getAsOverloadedStorage();
-    
-    return nullptr;
-  }
+  OverloadedTemplateStorage *getAsOverloadedTemplate() const;
 
   /// \brief Retrieve the substituted template template parameter, if 
   /// known.
   ///
   /// \returns The storage for the substituted template template parameter,
   /// if known. Otherwise, returns NULL.
-  SubstTemplateTemplateParmStorage *getAsSubstTemplateTemplateParm() const {
-    if (UncommonTemplateNameStorage *uncommon = 
-          Storage.dyn_cast<UncommonTemplateNameStorage *>())
-      return uncommon->getAsSubstTemplateTemplateParm();
-    
-    return nullptr;
-  }
+  SubstTemplateTemplateParmStorage *getAsSubstTemplateTemplateParm() const;
 
   /// \brief Retrieve the substituted template template parameter pack, if 
   /// known.
@@ -265,25 +249,15 @@ public:
   /// \returns The storage for the substituted template template parameter pack,
   /// if known. Otherwise, returns NULL.
   SubstTemplateTemplateParmPackStorage *
-  getAsSubstTemplateTemplateParmPack() const {
-    if (UncommonTemplateNameStorage *Uncommon = 
-        Storage.dyn_cast<UncommonTemplateNameStorage *>())
-      return Uncommon->getAsSubstTemplateTemplateParmPack();
-    
-    return nullptr;
-  }
+  getAsSubstTemplateTemplateParmPack() const;
 
   /// \brief Retrieve the underlying qualified template name
   /// structure, if any.
-  QualifiedTemplateName *getAsQualifiedTemplateName() const {
-    return Storage.dyn_cast<QualifiedTemplateName *>();
-  }
+  QualifiedTemplateName *getAsQualifiedTemplateName() const;
 
   /// \brief Retrieve the underlying dependent template name
   /// structure, if any.
-  DependentTemplateName *getAsDependentTemplateName() const {
-    return Storage.dyn_cast<DependentTemplateName *>();
-  }
+  DependentTemplateName *getAsDependentTemplateName() const;
 
   TemplateName getUnderlying() const;
 
@@ -359,9 +333,6 @@ public:
                       TemplateName replacement);
 };
 
-inline TemplateName::TemplateName(SubstTemplateTemplateParmStorage *Storage)
-  : Storage(Storage) { }
-
 inline TemplateName TemplateName::getUnderlying() const {
   if (SubstTemplateTemplateParmStorage *subst
         = getAsSubstTemplateTemplateParm())
diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index 632d4b95e376..0c08130da621 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -6,9 +6,12 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-//
-//  This file defines the Type interface and subclasses.
-//
+/// \file
+/// \brief C Language Family Type Representation
+///
+/// This file defines the clang::Type interface and subclasses, used to
+/// represent types for languages in the C family.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_AST_TYPE_H
@@ -105,7 +108,7 @@ namespace clang {
 #define TYPE(Class, Base) class Class##Type;
 #include "clang/AST/TypeNodes.def"
 
-/// Qualifiers - The collection of all-type qualifiers we support.
+/// The collection of all-type qualifiers we support.
 /// Clang supports five independent qualifiers:
 /// * C99: const, volatile, and restrict
 /// * Embedded C (TR18037): address spaces
@@ -161,7 +164,7 @@ public:
 
   Qualifiers() : Mask(0) {}
 
-  /// \brief Returns the common set of qualifiers while removing them from
+  /// Returns the common set of qualifiers while removing them from
   /// the given sets.
   static Qualifiers removeCommonQualifiers(Qualifiers &L, Qualifiers &R) {
     // If both are only CVR-qualified, bit operations are sufficient.
@@ -342,8 +345,8 @@ public:
     Mask |= mask;
   }
 
-  /// hasNonFastQualifiers - Return true if the set contains any
-  /// qualifiers which require an ExtQuals node to be allocated.
+  /// Return true if the set contains any qualifiers which require an ExtQuals
+  /// node to be allocated.
   bool hasNonFastQualifiers() const { return Mask & ~FastMask; }
   Qualifiers getNonFastQualifiers() const {
     Qualifiers Quals = *this;
@@ -351,11 +354,11 @@ public:
     return Quals;
   }
 
-  /// hasQualifiers - Return true if the set contains any qualifiers.
+  /// Return true if the set contains any qualifiers.
   bool hasQualifiers() const { return Mask; }
   bool empty() const { return !Mask; }
 
-  /// \brief Add the qualifiers from the given set to this set.
+  /// Add the qualifiers from the given set to this set.
   void addQualifiers(Qualifiers Q) {
     // If the other set doesn't have any non-boolean qualifiers, just
     // bit-or it in.
@@ -389,7 +392,7 @@ public:
     }
   }
 
-  /// \brief Add the qualifiers from the given set to this set, given that
+  /// Add the qualifiers from the given set to this set, given that
   /// they don't conflict.
   void addConsistentQualifiers(Qualifiers qs) {
     assert(getAddressSpace() == qs.getAddressSpace() ||
@@ -401,7 +404,7 @@ public:
     Mask |= qs.Mask;
   }
 
-  /// \brief Returns true if this address space is a superset of the other one.
+  /// Returns true if this address space is a superset of the other one.
   /// OpenCL v2.0 defines conversion rules (OpenCLC v2.0 s6.5.5) and notion of
   /// overlapping address spaces.
   /// CL1.1 or CL1.2:
@@ -418,7 +421,7 @@ public:
          other.getAddressSpace() != LangAS::opencl_constant);
   }
 
-  /// \brief Determines if these qualifiers compatibly include another set.
+  /// Determines if these qualifiers compatibly include another set.
   /// Generally this answers the question of whether an object with the other
   /// qualifiers can be safely used as an object with these qualifiers.
   bool compatiblyIncludes(Qualifiers other) const {
@@ -438,7 +441,8 @@ public:
   ///
   /// One set of Objective-C lifetime qualifiers compatibly includes the other
   /// if the lifetime qualifiers match, or if both are non-__weak and the
-  /// including set also contains the 'const' qualifier.
+  /// including set also contains the 'const' qualifier, or both are non-__weak
+  /// and one is None (which can only happen in non-ARC modes).
   bool compatiblyIncludesObjCLifetime(Qualifiers other) const {
     if (getObjCLifetime() == other.getObjCLifetime())
       return true;
@@ -446,6 +450,9 @@ public:
     if (getObjCLifetime() == OCL_Weak || other.getObjCLifetime() == OCL_Weak)
       return false;
 
+    if (getObjCLifetime() == OCL_None || other.getObjCLifetime() == OCL_None)
+      return true;
+
     return hasConst();
   }
 
@@ -551,8 +558,10 @@ enum class ObjCSubstitutionContext {
   Superclass,
 };
 
-/// QualType - For efficiency, we don't store CV-qualified types as nodes on
-/// their own: instead each reference to a type stores the qualifiers.  This
+/// A (possibly-)qualified type.
+///
+/// For efficiency, we don't store CV-qualified types as nodes on their
+/// own: instead each reference to a type stores the qualifiers.  This
 /// greatly reduces the number of nodes we need to allocate for types (for
 /// example we only need one for 'int', 'const int', 'volatile int',
 /// 'const volatile int', etc).
@@ -629,7 +638,7 @@ public:
   bool isCanonical() const;
   bool isCanonicalAsParam() const;
 
-  /// isNull - Return true if this QualType doesn't point to a type yet.
+  /// Return true if this QualType doesn't point to a type yet.
   bool isNull() const {
     return Value.getPointer().isNull();
   }
@@ -707,29 +716,25 @@ public:
   /// \brief Determine whether this is a Plain Old Data (POD) type (C++ 3.9p10).
   bool isPODType(ASTContext &Context) const;
 
-  /// isCXX98PODType() - Return true if this is a POD type according to the
-  /// rules of the C++98 standard, regardless of the current compilation's
-  /// language.
+  /// Return true if this is a POD type according to the rules of the C++98
+  /// standard, regardless of the current compilation's language.
   bool isCXX98PODType(ASTContext &Context) const;
 
-  /// isCXX11PODType() - Return true if this is a POD type according to the
-  /// more relaxed rules of the C++11 standard, regardless of the current
-  /// compilation's language.
+  /// Return true if this is a POD type according to the more relaxed rules
+  /// of the C++11 standard, regardless of the current compilation's language.
   /// (C++0x [basic.types]p9)
   bool isCXX11PODType(ASTContext &Context) const;
 
-  /// isTrivialType - Return true if this is a trivial type
-  /// (C++0x [basic.types]p9)
+  /// Return true if this is a trivial type per (C++0x [basic.types]p9)
   bool isTrivialType(ASTContext &Context) const;
 
-  /// isTriviallyCopyableType - Return true if this is a trivially
-  /// copyable type (C++0x [basic.types]p9)
+  /// Return true if this is a trivially copyable type (C++0x [basic.types]p9)
   bool isTriviallyCopyableType(ASTContext &Context) const;
 
   // Don't promise in the API that anything besides 'const' can be
   // easily added.
 
-  /// addConst - add the specified type qualifier to this QualType.
+  /// Add the `const` type qualifier to this QualType.
   void addConst() {
     addFastQualifiers(Qualifiers::Const);
   }
@@ -737,15 +742,15 @@ public:
     return withFastQualifiers(Qualifiers::Const);
   }
 
-  /// addVolatile - add the specified type qualifier to this QualType.
+  /// Add the `volatile` type qualifier to this QualType.
   void addVolatile() {
     addFastQualifiers(Qualifiers::Volatile);
   }
   QualType withVolatile() const {
     return withFastQualifiers(Qualifiers::Volatile);
   }
-  
-  /// Add the restrict qualifier to this QualType.
+
+  /// Add the `restrict` qualifier to this QualType.
   void addRestrict() {
     addFastQualifiers(Qualifiers::Restrict);
   }
@@ -822,8 +827,8 @@ public:
   /// ASTContext::getUnqualifiedArrayType.
   inline QualType getUnqualifiedType() const;
 
-  /// getSplitUnqualifiedType - Retrieve the unqualified variant of the
-  /// given type, removing as little sugar as possible.
+  /// Retrieve the unqualified variant of the given type, removing as little
+  /// sugar as possible.
   ///
   /// Like getUnqualifiedType(), but also returns the set of
   /// qualifiers that were built up.
@@ -853,7 +858,7 @@ public:
   /// from non-class types (in C++) or all types (in C).
   QualType getNonLValueExprType(const ASTContext &Context) const;
 
-  /// getDesugaredType - Return the specified type with any "sugar" removed from
+  /// Return the specified type with any "sugar" removed from
   /// the type.  This takes off typedefs, typeof's etc.  If the outer level of
   /// the type is already concrete, it returns it unmodified.  This is similar
   /// to getting the canonical type, but it doesn't remove *all* typedefs.  For
@@ -878,7 +883,7 @@ public:
     return getSingleStepDesugaredTypeImpl(*this, Context);
   }
 
-  /// IgnoreParens - Returns the specified type after dropping any
+  /// Returns the specified type after dropping any
   /// outer-level parentheses.
   QualType IgnoreParens() const {
     if (isa<ParenType>(*this))
@@ -886,8 +891,7 @@ public:
     return *this;
   }
 
-  /// operator==/!= - Indicate whether the specified types and qualifiers are
-  /// identical.
+  /// Indicate whether the specified types and qualifiers are identical.
   friend bool operator==(const QualType &LHS, const QualType &RHS) {
     return LHS.Value == RHS.Value;
   }
@@ -956,23 +960,23 @@ public:
     ID.AddPointer(getAsOpaquePtr());
   }
 
-  /// getAddressSpace - Return the address space of this type.
+  /// Return the address space of this type.
   inline unsigned getAddressSpace() const;
 
-  /// getObjCGCAttr - Returns gc attribute of this type.
+  /// Returns gc attribute of this type.
   inline Qualifiers::GC getObjCGCAttr() const;
 
-  /// isObjCGCWeak true when Type is objc's weak.
+  /// true when Type is objc's weak.
   bool isObjCGCWeak() const {
     return getObjCGCAttr() == Qualifiers::Weak;
   }
 
-  /// isObjCGCStrong true when Type is objc's strong.
+  /// true when Type is objc's strong.
   bool isObjCGCStrong() const {
     return getObjCGCAttr() == Qualifiers::Strong;
   }
 
-  /// getObjCLifetime - Returns lifetime attribute of this type.
+  /// Returns lifetime attribute of this type.
   Qualifiers::ObjCLifetime getObjCLifetime() const {
     return getQualifiers().getObjCLifetime();
   }
@@ -992,7 +996,7 @@ public:
     DK_objc_weak_lifetime
   };
 
-  /// isDestructedType - nonzero if objects of this type require
+  /// Returns a nonzero value if objects of this type require
   /// non-trivial work to clean up after.  Non-zero because it's
   /// conceivable that qualifiers (objc_gc(weak)?) could make
   /// something require destruction.
@@ -1000,7 +1004,7 @@ public:
     return isDestructedTypeImpl(*this);
   }
 
-  /// \brief Determine whether expressions of the given type are forbidden
+  /// Determine whether expressions of the given type are forbidden
   /// from being lvalues in C.
   ///
   /// The expression types that are forbidden to be lvalues are:
@@ -1124,7 +1128,7 @@ class ExtQualsTypeCommonBase {
   friend class ExtQuals;
 };
 
-/// ExtQuals - We can encode up to four bits in the low bits of a
+/// We can encode up to four bits in the low bits of a
 /// type pointer, but there are many more type qualifiers that we want
 /// to be able to apply to an arbitrary type.  Therefore we have this
 /// struct, intended to be heap-allocated and used by QualType to
@@ -1148,8 +1152,8 @@ class ExtQuals : public ExtQualsTypeCommonBase, public llvm::FoldingSetNode {
   // 3. ASTContext:
   //    a) Update get{Volatile,Restrict}Type.
 
-  /// Quals - the immutable set of qualifiers applied by this
-  /// node;  always contains extended qualifiers.
+  /// The immutable set of qualifiers applied by this node. Always contains
+  /// extended qualifiers.
   Qualifiers Quals;
 
   ExtQuals *this_() { return this; }
@@ -1194,8 +1198,8 @@ public:
   }
 };
 
-/// \brief The kind of C++0x ref-qualifier associated with a function type,
-/// which determines whether a member function's "this" object can be an
+/// The kind of C++11 ref-qualifier associated with a function type.
+/// This determines whether a member function's "this" object can be an
 /// lvalue, rvalue, or neither.
 enum RefQualifierKind {
   /// \brief No ref-qualifier was provided.
@@ -1206,17 +1210,28 @@ enum RefQualifierKind {
   RQ_RValue
 };
 
-/// Type - This is the base class of the type hierarchy.  A central concept
-/// with types is that each type always has a canonical type.  A canonical type
-/// is the type with any typedef names stripped out of it or the types it
-/// references.  For example, consider:
+/// Which keyword(s) were used to create an AutoType.
+enum class AutoTypeKeyword {
+  /// \brief auto
+  Auto,
+  /// \brief decltype(auto)
+  DecltypeAuto,
+  /// \brief __auto_type (GNU extension)
+  GNUAutoType
+};
+
+/// The base class of the type hierarchy.
+///
+/// A central concept with types is that each type always has a canonical
+/// type.  A canonical type is the type with any typedef names stripped out
+/// of it or the types it references.  For example, consider:
 ///
 ///  typedef int  foo;
 ///  typedef foo* bar;
 ///    'int *'    'foo *'    'bar'
 ///
 /// There will be a Type object created for 'int'.  Since int is canonical, its
-/// canonicaltype pointer points to itself.  There is also a Type for 'foo' (a
+/// CanonicalType pointer points to itself.  There is also a Type for 'foo' (a
 /// TypedefType).  Its CanonicalType pointer points to the 'int' Type.  Next
 /// there is a PointerType that represents 'int*', which, like 'int', is
 /// canonical.  Finally, there is a PointerType type for 'foo*' whose canonical
@@ -1253,18 +1268,18 @@ private:
     /// TypeClass bitfield - Enum that specifies what subclass this belongs to.
     unsigned TC : 8;
 
-    /// Dependent - Whether this type is a dependent type (C++ [temp.dep.type]).
+    /// Whether this type is a dependent type (C++ [temp.dep.type]).
     unsigned Dependent : 1;
 
-    /// \brief Whether this type somehow involves a template parameter, even
+    /// Whether this type somehow involves a template parameter, even
     /// if the resolution of the type does not depend on a template parameter.
     unsigned InstantiationDependent : 1;
 
-    /// \brief Whether this type is a variably-modified type (C99 6.7.5).
+    /// Whether this type is a variably-modified type (C99 6.7.5).
     unsigned VariablyModified : 1;
 
     /// \brief Whether this type contains an unexpanded parameter pack
-    /// (for C++0x variadic templates).
+    /// (for C++11 variadic templates).
     unsigned ContainsUnexpandedParameterPack : 1;
 
     /// \brief True if the cache (i.e. the bitfields here starting with
@@ -1277,7 +1292,7 @@ private:
     /// \brief Whether this type involves and local or unnamed types.
     mutable unsigned CachedLocalOrUnnamed : 1;
 
-    /// \brief FromAST - Whether this type comes from an AST file.
+    /// \brief Whether this type comes from an AST file.
     mutable unsigned FromAST : 1;
 
     bool isCacheValid() const {
@@ -1303,11 +1318,11 @@ protected:
 
     unsigned : NumTypeBits;
 
-    /// IndexTypeQuals - CVR qualifiers from declarations like
+    /// CVR qualifiers from declarations like
     /// 'int X[static restrict 4]'. For function parameters only.
     unsigned IndexTypeQuals : 3;
 
-    /// SizeModifier - storage class qualifiers from declarations like
+    /// Storage class qualifiers from declarations like
     /// 'int X[static restrict 4]'. For function parameters only.
     /// Actually an ArrayType::ArraySizeModifier.
     unsigned SizeModifier : 3;
@@ -1332,7 +1347,7 @@ protected:
     /// regparm and the calling convention.
     unsigned ExtInfo : 9;
 
-    /// TypeQuals - Used only by FunctionProtoType, put here to pack with the
+    /// Used only by FunctionProtoType, put here to pack with the
     /// other bitfields.
     /// The qualifiers are part of FunctionProtoType because...
     ///
@@ -1354,8 +1369,7 @@ protected:
     /// The number of type arguments stored directly on this object type.
     unsigned NumTypeArgs : 7;
 
-    /// NumProtocols - The number of protocols stored directly on this
-    /// object type.
+    /// The number of protocols stored directly on this object type.
     unsigned NumProtocols : 6;
 
     /// Whether this is a "kindof" type.
@@ -1400,11 +1414,11 @@ protected:
 
     unsigned : NumTypeBits;
 
-    /// VecKind - The kind of vector, either a generic vector type or some
+    /// The kind of vector, either a generic vector type or some
     /// target-specific vector type such as for AltiVec or Neon.
     unsigned VecKind : 3;
 
-    /// NumElements - The number of elements in the vector.
+    /// The number of elements in the vector.
     unsigned NumElements : 29 - NumTypeBits;
 
     enum { MaxNumElements = (1 << (29 - NumTypeBits)) - 1 };
@@ -1415,7 +1429,7 @@ protected:
 
     unsigned : NumTypeBits;
 
-    /// AttrKind - an AttributedType::Kind
+    /// An AttributedType::Kind
     unsigned AttrKind : 32 - NumTypeBits;
   };
 
@@ -1424,8 +1438,9 @@ protected:
 
     unsigned : NumTypeBits;
 
-    /// Was this placeholder type spelled as 'decltype(auto)'?
-    unsigned IsDecltypeAuto : 1;
+    /// Was this placeholder type spelled as 'auto', 'decltype(auto)',
+    /// or '__auto_type'?  AutoTypeKeyword value.
+    unsigned Keyword : 2;
   };
 
   union {
@@ -1521,17 +1536,17 @@ public:
   /// Types are partitioned into 3 broad categories (C99 6.2.5p1):
   /// object types, function types, and incomplete types.
 
-  /// isIncompleteType - Return true if this is an incomplete type.
+  /// Return true if this is an incomplete type.
   /// A type that can describe objects, but which lacks information needed to
   /// determine its size (e.g. void, or a fwd declared struct). Clients of this
   /// routine will need to determine if the size is actually required.
   ///
-  /// \brief Def If non-NULL, and the type refers to some kind of declaration
+  /// \brief Def If non-null, and the type refers to some kind of declaration
   /// that can be completed (such as a C struct, C++ class, or Objective-C
   /// class), will be set to the declaration.
   bool isIncompleteType(NamedDecl **Def = nullptr) const;
 
-  /// isIncompleteOrObjectType - Return true if this is an incomplete or object
+  /// Return true if this is an incomplete or object
   /// type, in other words, not a function type.
   bool isIncompleteOrObjectType() const {
     return !isFunctionType();
@@ -1545,35 +1560,34 @@ public:
     return !isReferenceType() && !isFunctionType() && !isVoidType();
   }
 
-  /// isLiteralType - Return true if this is a literal type
+  /// Return true if this is a literal type
   /// (C++11 [basic.types]p10)
   bool isLiteralType(const ASTContext &Ctx) const;
 
-  /// \brief Test if this type is a standard-layout type.
+  /// Test if this type is a standard-layout type.
   /// (C++0x [basic.type]p9)
   bool isStandardLayoutType() const;
 
   /// Helper methods to distinguish type categories. All type predicates
   /// operate on the canonical type, ignoring typedefs and qualifiers.
 
-  /// isBuiltinType - returns true if the type is a builtin type.
+  /// Returns true if the type is a builtin type.
   bool isBuiltinType() const;
 
-  /// isSpecificBuiltinType - Test for a particular builtin type.
+  /// Test for a particular builtin type.
   bool isSpecificBuiltinType(unsigned K) const;
 
-  /// isPlaceholderType - Test for a type which does not represent an
-  /// actual type-system type but is instead used as a placeholder for
-  /// various convenient purposes within Clang.  All such types are
-  /// BuiltinTypes.
+  /// Test for a type which does not represent an actual type-system type but
+  /// is instead used as a placeholder for various convenient purposes within
+  /// Clang.  All such types are BuiltinTypes.
   bool isPlaceholderType() const;
   const BuiltinType *getAsPlaceholderType() const;
 
-  /// isSpecificPlaceholderType - Test for a specific placeholder type.
+  /// Test for a specific placeholder type.
   bool isSpecificPlaceholderType(unsigned K) const;
 
-  /// isNonOverloadPlaceholderType - Test for a placeholder type
-  /// other than Overload;  see BuiltinType::isNonOverloadPlaceholderType.
+  /// Test for a placeholder type other than Overload; see
+  /// BuiltinType::isNonOverloadPlaceholderType.
   bool isNonOverloadPlaceholderType() const;
 
   /// isIntegerType() does *not* include complex integers (a GCC extension).
@@ -1588,10 +1602,9 @@ public:
   bool isAnyCharacterType() const;
   bool isIntegralType(ASTContext &Ctx) const;
 
-  /// \brief Determine whether this type is an integral or enumeration type.
+  /// Determine whether this type is an integral or enumeration type.
   bool isIntegralOrEnumerationType() const;
-  /// \brief Determine whether this type is an integral or unscoped enumeration
-  /// type.
+  /// Determine whether this type is an integral or unscoped enumeration type.
   bool isIntegralOrUnscopedEnumerationType() const;
 
   /// Floating point categories.
@@ -1655,6 +1668,7 @@ public:
   bool isObjCQualifiedClassType() const;        // Class<foo>
   bool isObjCObjectOrInterfaceType() const;
   bool isObjCIdType() const;                    // id
+  bool isObjCInertUnsafeUnretainedType() const;
 
   /// Whether the type is Objective-C 'id' or a __kindof type of an
   /// object type, e.g., __kindof NSView * or __kindof id
@@ -1685,17 +1699,27 @@ public:
   bool isNullPtrType() const;                   // C++0x nullptr_t
   bool isAtomicType() const;                    // C11 _Atomic()
 
-  bool isImage1dT() const;                      // OpenCL image1d_t
-  bool isImage1dArrayT() const;                 // OpenCL image1d_array_t
-  bool isImage1dBufferT() const;                // OpenCL image1d_buffer_t
-  bool isImage2dT() const;                      // OpenCL image2d_t
-  bool isImage2dArrayT() const;                 // OpenCL image2d_array_t
-  bool isImage3dT() const;                      // OpenCL image3d_t
+  bool isImage1dT() const;               // OpenCL image1d_t
+  bool isImage1dArrayT() const;          // OpenCL image1d_array_t
+  bool isImage1dBufferT() const;         // OpenCL image1d_buffer_t
+  bool isImage2dT() const;               // OpenCL image2d_t
+  bool isImage2dArrayT() const;          // OpenCL image2d_array_t
+  bool isImage2dDepthT() const;          // OpenCL image_2d_depth_t
+  bool isImage2dArrayDepthT() const;     // OpenCL image_2d_array_depth_t
+  bool isImage2dMSAAT() const;           // OpenCL image_2d_msaa_t
+  bool isImage2dArrayMSAAT() const;      // OpenCL image_2d_array_msaa_t
+  bool isImage2dMSAATDepth() const;      // OpenCL image_2d_msaa_depth_t
+  bool isImage2dArrayMSAATDepth() const; // OpenCL image_2d_array_msaa_depth_t
+  bool isImage3dT() const;               // OpenCL image3d_t
 
   bool isImageType() const;                     // Any OpenCL image type
 
   bool isSamplerT() const;                      // OpenCL sampler_t
   bool isEventT() const;                        // OpenCL event_t
+  bool isClkEventT() const;                     // OpenCL clk_event_t
+  bool isQueueT() const;                        // OpenCL queue_t
+  bool isNDRangeT() const;                      // OpenCL ndrange_t
+  bool isReserveIDT() const;                    // OpenCL reserve_id_t
 
   bool isOpenCLSpecificType() const;            // Any OpenCL specific type
 
@@ -1718,12 +1742,11 @@ public:
     STK_IntegralComplex,
     STK_FloatingComplex
   };
-  /// getScalarTypeKind - Given that this is a scalar type, classify it.
+  /// Given that this is a scalar type, classify it.
   ScalarTypeKind getScalarTypeKind() const;
 
-  /// isDependentType - Whether this type is a dependent type, meaning
-  /// that its definition somehow depends on a template parameter
-  /// (C++ [temp.dep.type]).
+  /// Whether this type is a dependent type, meaning that its definition
+  /// somehow depends on a template parameter (C++ [temp.dep.type]).
   bool isDependentType() const { return TypeBits.Dependent; }
 
   /// \brief Determine whether this type is an instantiation-dependent type,
@@ -1755,14 +1778,13 @@ public:
 
   bool canDecayToPointerType() const;
 
-  /// hasPointerRepresentation - Whether this type is represented
-  /// natively as a pointer; this includes pointers, references, block
-  /// pointers, and Objective-C interface, qualified id, and qualified
-  /// interface types, as well as nullptr_t.
+  /// Whether this type is represented natively as a pointer.  This includes
+  /// pointers, references, block pointers, and Objective-C interface,
+  /// qualified id, and qualified interface types, as well as nullptr_t.
   bool hasPointerRepresentation() const;
 
-  /// hasObjCPointerRepresentation - Whether this type can represent
-  /// an objective pointer type for the purpose of GC'ability
+  /// Whether this type can represent an objective pointer type for the
+  /// purpose of GC'ability
   bool hasObjCPointerRepresentation() const;
 
   /// \brief Determine whether this type has an integer representation
@@ -1813,7 +1835,7 @@ public:
   /// not refer to a CXXRecordDecl, returns NULL.
   const CXXRecordDecl *getPointeeCXXRecordDecl() const;
 
-  /// \brief Get the AutoType whose type will be deduced for a variable with
+  /// Get the AutoType whose type will be deduced for a variable with
   /// an initializer of this type. This looks through declarators like pointer
   /// types, but not through decltype or typedefs.
   AutoType *getContainedAutoType() const;
@@ -1842,34 +1864,33 @@ public:
   /// qualifiers from the outermost type.
   const ArrayType *castAsArrayTypeUnsafe() const;
 
-  /// getBaseElementTypeUnsafe - Get the base element type of this
-  /// type, potentially discarding type qualifiers.  This method
-  /// should never be used when type qualifiers are meaningful.
+  /// Get the base element type of this type, potentially discarding type
+  /// qualifiers.  This should never be used when type qualifiers
+  /// are meaningful.
   const Type *getBaseElementTypeUnsafe() const;
 
-  /// getArrayElementTypeNoTypeQual - If this is an array type, return the
-  /// element type of the array, potentially with type qualifiers missing.
-  /// This method should never be used when type qualifiers are meaningful.
+  /// If this is an array type, return the element type of the array,
+  /// potentially with type qualifiers missing.
+  /// This should never be used when type qualifiers are meaningful.
   const Type *getArrayElementTypeNoTypeQual() const;
 
-  /// getPointeeType - If this is a pointer, ObjC object pointer, or block
+  /// If this is a pointer, ObjC object pointer, or block
   /// pointer, this returns the respective pointee.
   QualType getPointeeType() const;
 
-  /// getUnqualifiedDesugaredType() - Return the specified type with
-  /// any "sugar" removed from the type, removing any typedefs,
-  /// typeofs, etc., as well as any qualifiers.
+  /// Return the specified type with any "sugar" removed from the type,
+  /// removing any typedefs, typeofs, etc., as well as any qualifiers.
   const Type *getUnqualifiedDesugaredType() const;
 
   /// More type predicates useful for type checking/promotion
   bool isPromotableIntegerType() const; // C99 6.3.1.1p2
 
-  /// isSignedIntegerType - Return true if this is an integer type that is
+  /// Return true if this is an integer type that is
   /// signed, according to C99 6.2.5p4 [char, signed char, short, int, long..],
   /// or an enum decl which has a signed representation.
   bool isSignedIntegerType() const;
 
-  /// isUnsignedIntegerType - Return true if this is an integer type that is
+  /// Return true if this is an integer type that is
   /// unsigned, according to C99 6.2.5p6 [which returns true for _Bool],
   /// or an enum decl which has an unsigned representation.
   bool isUnsignedIntegerType() const;
@@ -1882,32 +1903,32 @@ public:
   /// enumeration types whose underlying type is a unsigned integer type.
   bool isUnsignedIntegerOrEnumerationType() const;
 
-  /// isConstantSizeType - Return true if this is not a variable sized type,
+  /// Return true if this is not a variable sized type,
   /// according to the rules of C99 6.7.5p3.  It is not legal to call this on
   /// incomplete types.
   bool isConstantSizeType() const;
 
-  /// isSpecifierType - Returns true if this type can be represented by some
+  /// Returns true if this type can be represented by some
   /// set of type specifiers.
   bool isSpecifierType() const;
 
-  /// \brief Determine the linkage of this type.
+  /// Determine the linkage of this type.
   Linkage getLinkage() const;
 
-  /// \brief Determine the visibility of this type.
+  /// Determine the visibility of this type.
   Visibility getVisibility() const {
     return getLinkageAndVisibility().getVisibility();
   }
 
-  /// \brief Return true if the visibility was explicitly set is the code.
+  /// Return true if the visibility was explicitly set is the code.
   bool isVisibilityExplicit() const {
     return getLinkageAndVisibility().isVisibilityExplicit();
   }
 
-  /// \brief Determine the linkage and visibility of this type.
+  /// Determine the linkage and visibility of this type.
   LinkageInfo getLinkageAndVisibility() const;
 
-  /// \brief True if the computed linkage is valid. Used for consistency
+  /// True if the computed linkage is valid. Used for consistency
   /// checking. Should always return true.
   bool isLinkageValid() const;
 
@@ -1984,7 +2005,7 @@ template <> inline const Class##Type *Type::castAs() const { \
 #include "clang/AST/TypeNodes.def"
 
 
-/// BuiltinType - This class is used for builtin types like 'int'.  Builtin
+/// This class is used for builtin types like 'int'.  Builtin
 /// types are always canonical and have a literal name field.
 class BuiltinType : public Type {
 public:
@@ -2059,7 +2080,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Builtin; }
 };
 
-/// ComplexType - C99 6.2.5p11 - Complex values.  This supports the C99 complex
+/// Complex values, per C99 6.2.5p11.  This supports the C99 complex
 /// types (_Complex float etc) as well as the GCC integer complex extensions.
 ///
 class ComplexType : public Type, public llvm::FoldingSetNode {
@@ -2089,7 +2110,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Complex; }
 };
 
-/// ParenType - Sugar for parentheses used when specifying types.
+/// Sugar for parentheses used when specifying types.
 ///
 class ParenType : public Type, public llvm::FoldingSetNode {
   QualType Inner;
@@ -2138,7 +2159,7 @@ public:
 
   QualType getPointeeType() const { return PointeeType; }
 
-  /// \brief Returns true if address spaces of pointers overlap.
+  /// Returns true if address spaces of pointers overlap.
   /// OpenCL v2.0 defines conversion rules for pointers to different
   /// address spaces (OpenCLC v2.0 s6.5.5) and notion of overlapping
   /// address spaces.
@@ -2167,7 +2188,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Pointer; }
 };
 
-/// \brief Represents a type which was implicitly adjusted by the semantic
+/// Represents a type which was implicitly adjusted by the semantic
 /// engine for arbitrary reasons.  For example, array and function types can
 /// decay, and function types can have their calling conventions adjusted.
 class AdjustedType : public Type, public llvm::FoldingSetNode {
@@ -2205,7 +2226,7 @@ public:
   }
 };
 
-/// \brief Represents a pointer type decayed from an array or function type.
+/// Represents a pointer type decayed from an array or function type.
 class DecayedType : public AdjustedType {
 
   DecayedType(QualType OriginalType, QualType DecayedPtr, QualType CanonicalPtr)
@@ -2225,7 +2246,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Decayed; }
 };
 
-/// BlockPointerType - pointer to a block type.
+/// Pointer to a block type.
 /// This type is to represent types syntactically represented as
 /// "void (^)(int)", etc. Pointee is required to always be a function type.
 ///
@@ -2260,7 +2281,7 @@ public:
   }
 };
 
-/// ReferenceType - Base for LValueReferenceType and RValueReferenceType
+/// Base for LValueReferenceType and RValueReferenceType
 ///
 class ReferenceType : public Type, public llvm::FoldingSetNode {
   QualType PointeeType;
@@ -2307,7 +2328,7 @@ public:
   }
 };
 
-/// LValueReferenceType - C++ [dcl.ref] - Lvalue reference
+/// An lvalue reference type, per C++11 [dcl.ref].
 ///
 class LValueReferenceType : public ReferenceType {
   LValueReferenceType(QualType Referencee, QualType CanonicalRef,
@@ -2324,7 +2345,7 @@ public:
   }
 };
 
-/// RValueReferenceType - C++0x [dcl.ref] - Rvalue reference
+/// An rvalue reference type, per C++11 [dcl.ref].
 ///
 class RValueReferenceType : public ReferenceType {
   RValueReferenceType(QualType Referencee, QualType CanonicalRef) :
@@ -2340,7 +2361,9 @@ public:
   }
 };
 
-/// MemberPointerType - C++ 8.3.3 - Pointers to members
+/// A pointer to member type per C++ 8.3.3 - Pointers to members.
+///
+/// This includes both pointers to data members and pointer to member functions.
 ///
 class MemberPointerType : public Type, public llvm::FoldingSetNode {
   QualType PointeeType;
@@ -2395,11 +2418,11 @@ public:
   }
 };
 
-/// ArrayType - C99 6.7.5.2 - Array Declarators.
+/// Represents an array type, per C99 6.7.5.2 - Array Declarators.
 ///
 class ArrayType : public Type, public llvm::FoldingSetNode {
 public:
-  /// ArraySizeModifier - Capture whether this is a normal array (e.g. int X[4])
+  /// Capture whether this is a normal array (e.g. int X[4])
   /// an array with a static size (e.g. int X[static 4]), or an array
   /// with a star size (e.g. int X[*]).
   /// 'static' is only allowed on function parameters.
@@ -2407,7 +2430,7 @@ public:
     Normal, Static, Star
   };
 private:
-  /// ElementType - The element type of the array.
+  /// The element type of the array.
   QualType ElementType;
 
 protected:
@@ -2450,10 +2473,9 @@ public:
   }
 };
 
-/// ConstantArrayType - This class represents the canonical version of
-/// C arrays with a specified constant size.  For example, the canonical
-/// type for 'int A[4 + 4*100]' is a ConstantArrayType where the element
-/// type is 'int' and the size is 404.
+/// Represents the canonical version of C arrays with a specified constant size.
+/// For example, the canonical type for 'int A[4 + 4*100]' is a
+/// ConstantArrayType where the element type is 'int' and the size is 404.
 class ConstantArrayType : public ArrayType {
   llvm::APInt Size; // Allows us to unique the type.
 
@@ -2501,9 +2523,9 @@ public:
   }
 };
 
-/// IncompleteArrayType - This class represents C arrays with an unspecified
-/// size.  For example 'int A[]' has an IncompleteArrayType where the element
-/// type is 'int' and the size is unspecified.
+/// Represents a C array with an unspecified size.  For example 'int A[]' has
+/// an IncompleteArrayType where the element type is 'int' and the size is
+/// unspecified.
 class IncompleteArrayType : public ArrayType {
 
   IncompleteArrayType(QualType et, QualType can,
@@ -2534,8 +2556,8 @@ public:
   }
 };
 
-/// VariableArrayType - This class represents C arrays with a specified size
-/// which is not an integer-constant-expression.  For example, 'int s[x+foo()]'.
+/// Represents a C array with a specified size that is not an
+/// integer-constant-expression.  For example, 'int s[x+foo()]'.
 /// Since the size expression is an arbitrary expression, we store it as such.
 ///
 /// Note: VariableArrayType's aren't uniqued (since the expressions aren't) and
@@ -2550,10 +2572,10 @@ public:
 /// }
 ///
 class VariableArrayType : public ArrayType {
-  /// SizeExpr - An assignment expression. VLA's are only permitted within
+  /// An assignment-expression. VLA's are only permitted within
   /// a function block.
   Stmt *SizeExpr;
-  /// Brackets - The left and right array brackets.
+  /// The range spanned by the left and right array brackets.
   SourceRange Brackets;
 
   VariableArrayType(QualType et, QualType can, Expr *e,
@@ -2588,9 +2610,9 @@ public:
   }
 };
 
-/// DependentSizedArrayType - This type represents an array type in
-/// C++ whose size is a value-dependent expression. For example:
+/// Represents an array type in C++ whose size is a value-dependent expression.
 ///
+/// For example:
 /// \code
 /// template<typename T, int Size>
 /// class array {
@@ -2607,11 +2629,11 @@ class DependentSizedArrayType : public ArrayType {
   /// \brief An assignment expression that will instantiate to the
   /// size of the array.
   ///
-  /// The expression itself might be NULL, in which case the array
+  /// The expression itself might be null, in which case the array
   /// type will have its size deduced from an initializer.
   Stmt *SizeExpr;
 
-  /// Brackets - The left and right array brackets.
+  /// The range spanned by the left and right array brackets.
   SourceRange Brackets;
 
   DependentSizedArrayType(const ASTContext &Context, QualType et, QualType can,
@@ -2650,18 +2672,20 @@ public:
                       unsigned TypeQuals, Expr *E);
 };
 
-/// DependentSizedExtVectorType - This type represent an extended vector type
-/// where either the type or size is dependent. For example:
-/// @code
+/// Represents an extended vector type where either the type or size is
+/// dependent.
+///
+/// For example:
+/// \code
 /// template<typename T, int Size>
 /// class vector {
 ///   typedef T __attribute__((ext_vector_type(Size))) type;
 /// }
-/// @endcode
+/// \endcode
 class DependentSizedExtVectorType : public Type, public llvm::FoldingSetNode {
   const ASTContext &Context;
   Expr *SizeExpr;
-  /// ElementType - The element type of the array.
+  /// The element type of the array.
   QualType ElementType;
   SourceLocation loc;
 
@@ -2691,7 +2715,7 @@ public:
 };
 
 
-/// VectorType - GCC generic vector type. This type is created using
+/// Represents a GCC generic vector type. This type is created using
 /// __attribute__((vector_size(n)), where "n" specifies the vector size in
 /// bytes; or from an Altivec __vector or vector declaration.
 /// Since the constructor takes the number of vector elements, the
@@ -2699,15 +2723,15 @@ public:
 class VectorType : public Type, public llvm::FoldingSetNode {
 public:
   enum VectorKind {
-    GenericVector,  // not a target-specific vector type
-    AltiVecVector,  // is AltiVec vector
-    AltiVecPixel,   // is AltiVec 'vector Pixel'
-    AltiVecBool,    // is AltiVec 'vector bool ...'
-    NeonVector,     // is ARM Neon vector
-    NeonPolyVector  // is ARM Neon polynomial vector
+    GenericVector,  ///< not a target-specific vector type
+    AltiVecVector,  ///< is AltiVec vector
+    AltiVecPixel,   ///< is AltiVec 'vector Pixel'
+    AltiVecBool,    ///< is AltiVec 'vector bool ...'
+    NeonVector,     ///< is ARM Neon vector
+    NeonPolyVector  ///< is ARM Neon polynomial vector
   };
 protected:
-  /// ElementType - The element type of the vector.
+  /// The element type of the vector.
   QualType ElementType;
 
   VectorType(QualType vecType, unsigned nElements, QualType canonType,
@@ -2824,7 +2848,7 @@ class FunctionType : public Type {
   QualType ResultType;
 
  public:
-  /// ExtInfo - A class which abstracts out some details necessary for
+  /// A class which abstracts out some details necessary for
   /// making a call.
   ///
   /// It is not actually used directly for storing this information in
@@ -2878,7 +2902,7 @@ class FunctionType : public Type {
     }
 
     // Constructor with all defaults. Use when for example creating a
-    // function know to use defaults.
+    // function known to use defaults.
     ExtInfo() : Bits(CC_C) { }
 
     // Constructor with just the calling convention, which is an important part
@@ -2953,7 +2977,7 @@ public:
 
   bool getHasRegParm() const { return getExtInfo().getHasRegParm(); }
   unsigned getRegParmType() const { return getExtInfo().getRegParm(); }
-  /// \brief Determine whether this function type includes the GNU noreturn
+  /// Determine whether this function type includes the GNU noreturn
   /// attribute. The C++11 [[noreturn]] attribute does not affect the function
   /// type.
   bool getNoReturnAttr() const { return getExtInfo().getNoReturn(); }
@@ -2977,7 +3001,7 @@ public:
   }
 };
 
-/// FunctionNoProtoType - Represents a K&R-style 'int foo()' function, which has
+/// Represents a K&R-style 'int foo()' function, which has
 /// no information available about its arguments.
 class FunctionNoProtoType : public FunctionType, public llvm::FoldingSetNode {
   FunctionNoProtoType(QualType Result, QualType Canonical, ExtInfo Info)
@@ -3008,7 +3032,7 @@ public:
   }
 };
 
-/// FunctionProtoType - Represents a prototype with parameter type info, e.g.
+/// Represents a prototype with parameter type info, e.g.
 /// 'int foo(int)' or 'int foo(void)'.  'void' is represented as having no
 /// parameters, not as having a single void parameter. Such a type can have an
 /// exception specification, but this specification is not part of the canonical
@@ -3038,7 +3062,7 @@ public:
     FunctionDecl *SourceTemplate;
   };
 
-  /// ExtProtoInfo - Extra information about a function prototype.
+  /// Extra information about a function prototype.
   struct ExtProtoInfo {
     ExtProtoInfo()
         : Variadic(false), HasTrailingReturn(false), TypeQuals(0),
@@ -3081,19 +3105,19 @@ private:
   /// The number of parameters this function has, not counting '...'.
   unsigned NumParams : 15;
 
-  /// NumExceptions - The number of types in the exception spec, if any.
+  /// The number of types in the exception spec, if any.
   unsigned NumExceptions : 9;
 
-  /// ExceptionSpecType - The type of exception specification this function has.
+  /// The type of exception specification this function has.
   unsigned ExceptionSpecType : 4;
 
-  /// HasAnyConsumedParams - Whether this function has any consumed parameters.
+  /// Whether this function has any consumed parameters.
   unsigned HasAnyConsumedParams : 1;
 
-  /// Variadic - Whether the function is variadic.
+  /// Whether the function is variadic.
   unsigned Variadic : 1;
 
-  /// HasTrailingReturn - Whether this function has a trailing return type.
+  /// Whether this function has a trailing return type.
   unsigned HasTrailingReturn : 1;
 
   // ParamInfo - There is an variable size array after the class in memory that
@@ -3120,11 +3144,13 @@ private:
     assert(hasAnyConsumedParams());
 
     // Find the end of the exceptions.
-    Expr *const *eh_end = reinterpret_cast<Expr *const *>(param_type_end());
-    if (getExceptionSpecType() != EST_ComputedNoexcept)
-      eh_end += NumExceptions;
-    else
+    Expr *const *eh_end = reinterpret_cast<Expr *const *>(exception_end());
+    if (getExceptionSpecType() == EST_ComputedNoexcept)
       eh_end += 1; // NoexceptExpr
+    // The memory layout of these types isn't handled here, so
+    // hopefully this is never called for them?
+    assert(getExceptionSpecType() != EST_Uninstantiated &&
+           getExceptionSpecType() != EST_Unevaluated);
 
     return reinterpret_cast<const bool*>(eh_end);
   }
@@ -3162,25 +3188,25 @@ public:
     return EPI;
   }
 
-  /// \brief Get the kind of exception specification on this function.
+  /// Get the kind of exception specification on this function.
   ExceptionSpecificationType getExceptionSpecType() const {
     return static_cast<ExceptionSpecificationType>(ExceptionSpecType);
   }
-  /// \brief Return whether this function has any kind of exception spec.
+  /// Return whether this function has any kind of exception spec.
   bool hasExceptionSpec() const {
     return getExceptionSpecType() != EST_None;
   }
-  /// \brief Return whether this function has a dynamic (throw) exception spec.
+  /// Return whether this function has a dynamic (throw) exception spec.
   bool hasDynamicExceptionSpec() const {
     return isDynamicExceptionSpec(getExceptionSpecType());
   }
-  /// \brief Return whether this function has a noexcept exception spec.
+  /// Return whether this function has a noexcept exception spec.
   bool hasNoexceptExceptionSpec() const {
     return isNoexceptExceptionSpec(getExceptionSpecType());
   }
-  /// \brief Return whether this function has a dependent exception spec.
+  /// Return whether this function has a dependent exception spec.
   bool hasDependentExceptionSpec() const;
-  /// \brief Result type of getNoexceptSpec().
+  /// Result type of getNoexceptSpec().
   enum NoexceptResult {
     NR_NoNoexcept,  ///< There is no noexcept specifier.
     NR_BadNoexcept, ///< The noexcept specifier has a bad expression.
@@ -3188,7 +3214,7 @@ public:
     NR_Throw,       ///< The noexcept specifier evaluates to false.
     NR_Nothrow      ///< The noexcept specifier evaluates to true.
   };
-  /// \brief Get the meaning of the noexcept spec on this function, if any.
+  /// Get the meaning of the noexcept spec on this function, if any.
   NoexceptResult getNoexceptSpec(const ASTContext &Ctx) const;
   unsigned getNumExceptions() const { return NumExceptions; }
   QualType getExceptionType(unsigned i) const {
@@ -3220,14 +3246,14 @@ public:
       return nullptr;
     return reinterpret_cast<FunctionDecl *const *>(param_type_end())[1];
   }
-  /// \brief Determine whether this function type has a non-throwing exception
+  /// Determine whether this function type has a non-throwing exception
   /// specification. If this depends on template arguments, returns
   /// \c ResultIfDependent.
   bool isNothrow(const ASTContext &Ctx, bool ResultIfDependent = false) const;
 
   bool isVariadic() const { return Variadic; }
 
-  /// \brief Determines whether this function prototype contains a
+  /// Determines whether this function prototype contains a
   /// parameter pack at the end.
   ///
   /// A function template whose last parameter is a parameter pack can be
@@ -3240,7 +3266,7 @@ public:
   unsigned getTypeQuals() const { return FunctionType::getTypeQuals(); }
 
 
-  /// \brief Retrieve the ref-qualifier associated with this function type.
+  /// Retrieve the ref-qualifier associated with this function type.
   RefQualifierKind getRefQualifier() const {
     return static_cast<RefQualifierKind>(FunctionTypeBits.RefQualifier);
   }
@@ -3284,7 +3310,7 @@ public:
   bool isSugared() const { return false; }
   QualType desugar() const { return QualType(this, 0); }
 
-  void printExceptionSpecification(raw_ostream &OS, 
+  void printExceptionSpecification(raw_ostream &OS,
                                    const PrintingPolicy &Policy) const;
 
   static bool classof(const Type *T) {
@@ -3297,10 +3323,10 @@ public:
                       const ExtProtoInfo &EPI, const ASTContext &Context);
 };
 
-
 /// \brief Represents the dependent type named by a dependently-scoped
 /// typename using declaration, e.g.
 ///   using typename Base<T>::foo;
+///
 /// Template instantiation turns these into the underlying type.
 class UnresolvedUsingType : public Type {
   UnresolvedUsingTypenameDecl *Decl;
@@ -3353,7 +3379,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Typedef; }
 };
 
-/// TypeOfExprType (GCC extension).
+/// Represents a `typeof` (or __typeof__) expression (a GCC extension).
 class TypeOfExprType : public Type {
   Expr *TOExpr;
 
@@ -3373,7 +3399,7 @@ public:
 };
 
 /// \brief Internal representation of canonical, dependent
-/// typeof(expr) types.
+/// `typeof(expr)` types.
 ///
 /// This class is used internally by the ASTContext to manage
 /// canonical, dependent types, only. Clients will only see instances
@@ -3394,7 +3420,7 @@ public:
                       Expr *E);
 };
 
-/// TypeOfType (GCC extension).
+/// Represents `typeof(type)`, a GCC extension.
 class TypeOfType : public Type {
   QualType TOType;
   TypeOfType(QualType T, QualType can)
@@ -3418,7 +3444,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == TypeOf; }
 };
 
-/// DecltypeType (C++0x)
+/// Represents the type `decltype(expr)` (C++11).
 class DecltypeType : public Type {
   Expr *E;
   QualType UnderlyingType;
@@ -3459,7 +3485,7 @@ public:
                       Expr *E);
 };
 
-/// \brief A unary type transform, which is a type constructed from another
+/// A unary type transform, which is a type constructed from another.
 class UnaryTransformType : public Type {
 public:
   enum UTTKind {
@@ -3497,15 +3523,14 @@ class TagType : public Type {
   TagDecl * decl;
 
   friend class ASTReader;
-  
+
 protected:
   TagType(TypeClass TC, const TagDecl *D, QualType can);
 
 public:
   TagDecl *getDecl() const;
 
-  /// @brief Determines whether this type is in the process of being
-  /// defined.
+  /// Determines whether this type is in the process of being defined.
   bool isBeingDefined() const;
 
   static bool classof(const Type *T) {
@@ -3513,7 +3538,7 @@ public:
   }
 };
 
-/// RecordType - This is a helper class that allows the use of isa/cast/dyncast
+/// A helper class that allows the use of isa/cast/dyncast
 /// to detect TagType objects of structs/unions/classes.
 class RecordType : public TagType {
 protected:
@@ -3539,7 +3564,7 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Record; }
 };
 
-/// EnumType - This is a helper class that allows the use of isa/cast/dyncast
+/// A helper class that allows the use of isa/cast/dyncast
 /// to detect TagType objects of enums.
 class EnumType : public TagType {
   explicit EnumType(const EnumDecl *D)
@@ -3557,12 +3582,12 @@ public:
   static bool classof(const Type *T) { return T->getTypeClass() == Enum; }
 };
 
-/// AttributedType - An attributed type is a type to which a type
-/// attribute has been applied.  The "modified type" is the
-/// fully-sugared type to which the attributed type was applied;
-/// generally it is not canonically equivalent to the attributed type.
-/// The "equivalent type" is the minimally-desugared type which the
-/// type is canonically equivalent to.
+/// An attributed type is a type to which a type attribute has been applied.
+///
+/// The "modified type" is the fully-sugared type to which the attributed
+/// type was applied; generally it is not canonically equivalent to the
+/// attributed type. The "equivalent type" is the minimally-desugared type
+/// which the type is canonically equivalent to.
 ///
 /// For example, in the following attributed type:
 ///     int32_t __attribute__((vector_size(16)))
@@ -3612,6 +3637,7 @@ public:
     attr_nullable,
     attr_null_unspecified,
     attr_objc_kindof,
+    attr_objc_inert_unsafe_unretained,
   };
 
 private:
@@ -3641,6 +3667,23 @@ public:
   bool isSugared() const { return true; }
   QualType desugar() const { return getEquivalentType(); }
 
+  /// Does this attribute behave like a type qualifier?
+  ///
+  /// A type qualifier adjusts a type to provide specialized rules for
+  /// a specific object, like the standard const and volatile qualifiers.
+  /// This includes attributes controlling things like nullability,
+  /// address spaces, and ARC ownership.  The value of the object is still
+  /// largely described by the modified type.
+  ///
+  /// In contrast, many type attributes "rewrite" their modified type to
+  /// produce a fundamentally different type, not necessarily related in any
+  /// formalizable way to the original type.  For example, calling convention
+  /// and vector attributes are not simple type qualifiers.
+  ///
+  /// Type qualifiers are often, but not always, reflected in the canonical
+  /// type.
+  bool isQualifier() const;
+
   bool isMSTypeSpec() const;
 
   bool isCallingConv() const;
@@ -3653,7 +3696,7 @@ public:
     switch (kind) {
     case NullabilityKind::NonNull:
       return attr_nonnull;
-     
+
     case NullabilityKind::Nullable:
       return attr_nullable;
 
@@ -3865,29 +3908,33 @@ public:
   }
 };
 
-/// \brief Represents a C++11 auto or C++1y decltype(auto) type.
+/// \brief Represents a C++11 auto or C++14 decltype(auto) type.
 ///
 /// These types are usually a placeholder for a deduced type. However, before
 /// the initializer is attached, or if the initializer is type-dependent, there
 /// is no deduced type and an auto type is canonical. In the latter case, it is
 /// also a dependent type.
 class AutoType : public Type, public llvm::FoldingSetNode {
-  AutoType(QualType DeducedType, bool IsDecltypeAuto, 
-           bool IsDependent)
+  AutoType(QualType DeducedType, AutoTypeKeyword Keyword, bool IsDependent)
     : Type(Auto, DeducedType.isNull() ? QualType(this, 0) : DeducedType,
            /*Dependent=*/IsDependent, /*InstantiationDependent=*/IsDependent,
-           /*VariablyModified=*/false, 
-           /*ContainsParameterPack=*/DeducedType.isNull() 
+           /*VariablyModified=*/false,
+           /*ContainsParameterPack=*/DeducedType.isNull()
                ? false : DeducedType->containsUnexpandedParameterPack()) {
     assert((DeducedType.isNull() || !IsDependent) &&
            "auto deduced to dependent type");
-    AutoTypeBits.IsDecltypeAuto = IsDecltypeAuto;
+    AutoTypeBits.Keyword = (unsigned)Keyword;
   }
 
   friend class ASTContext;  // ASTContext creates these
 
 public:
-  bool isDecltypeAuto() const { return AutoTypeBits.IsDecltypeAuto; }
+  bool isDecltypeAuto() const {
+    return getKeyword() == AutoTypeKeyword::DecltypeAuto;
+  }
+  AutoTypeKeyword getKeyword() const {
+    return (AutoTypeKeyword)AutoTypeBits.Keyword;
+  }
 
   bool isSugared() const { return !isCanonicalUnqualified(); }
   QualType desugar() const { return getCanonicalTypeInternal(); }
@@ -3902,14 +3949,13 @@ public:
   }
 
   void Profile(llvm::FoldingSetNodeID &ID) {
-    Profile(ID, getDeducedType(), isDecltypeAuto(), 
-		    isDependentType());
+    Profile(ID, getDeducedType(), getKeyword(), isDependentType());
   }
 
   static void Profile(llvm::FoldingSetNodeID &ID, QualType Deduced,
-                      bool IsDecltypeAuto, bool IsDependent) {
+                      AutoTypeKeyword Keyword, bool IsDependent) {
     ID.AddPointer(Deduced.getAsOpaquePtr());
-    ID.AddBoolean(IsDecltypeAuto);
+    ID.AddInteger((unsigned)Keyword);
     ID.AddBoolean(IsDependent);
   }
 
@@ -3926,9 +3972,9 @@ public:
 /// @c DependentTemplateSpecializationType.
 ///
 /// A non-dependent template specialization type is always "sugar",
-/// typically for a @c RecordType.  For example, a class template
-/// specialization type of @c vector<int> will refer to a tag type for
-/// the instantiation @c std::vector<int, std::allocator<int>>
+/// typically for a \c RecordType.  For example, a class template
+/// specialization type of \c vector<int> will refer to a tag type for
+/// the instantiation \c std::vector<int, std::allocator<int>>
 ///
 /// Template specializations are dependent if either the template or
 /// any of the template arguments are dependent, in which case the
@@ -3938,9 +3984,10 @@ public:
 /// TemplateArguments, followed by a QualType representing the
 /// non-canonical aliased type when the template is a type alias
 /// template.
-class TemplateSpecializationType
-  : public Type, public llvm::FoldingSetNode {
-  /// \brief The name of the template being specialized.  This is
+class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) TemplateSpecializationType
+    : public Type,
+      public llvm::FoldingSetNode {
+  /// The name of the template being specialized.  This is
   /// either a TemplateName::Template (in which case it is a
   /// ClassTemplateDecl*, a TemplateTemplateParmDecl*, or a
   /// TypeAliasTemplateDecl*), a
@@ -3949,14 +3996,13 @@ class TemplateSpecializationType
   /// replacement must, recursively, be one of these).
   TemplateName Template;
 
-  /// \brief - The number of template arguments named in this class
-  /// template specialization.
+  /// The number of template arguments named in this class template
+  /// specialization.
   unsigned NumArgs : 31;
 
-  /// \brief Whether this template specialization type is a substituted
-  /// type alias.
+  /// Whether this template specialization type is a substituted type alias.
   bool TypeAlias : 1;
-    
+
   TemplateSpecializationType(TemplateName T,
                              const TemplateArgument *Args,
                              unsigned NumArgs, QualType Canon,
@@ -3965,8 +4011,7 @@ class TemplateSpecializationType
   friend class ASTContext;  // ASTContext creates these
 
 public:
-  /// \brief Determine whether any of the given template arguments are
-  /// dependent.
+  /// Determine whether any of the given template arguments are dependent.
   static bool anyDependentTemplateArguments(const TemplateArgumentLoc *Args,
                                             unsigned NumArgs,
                                             bool &InstantiationDependent);
@@ -4013,7 +4058,7 @@ public:
   /// };
   /// \endcode
   bool isTypeAlias() const { return TypeAlias; }
-    
+
   /// Get the aliased type, if this is a specialization of a type alias
   /// template.
   QualType getAliasedType() const {
@@ -4026,19 +4071,19 @@ public:
   iterator begin() const { return getArgs(); }
   iterator end() const; // defined inline in TemplateBase.h
 
-  /// \brief Retrieve the name of the template that we are specializing.
+  /// Retrieve the name of the template that we are specializing.
   TemplateName getTemplateName() const { return Template; }
 
-  /// \brief Retrieve the template arguments.
+  /// Retrieve the template arguments.
   const TemplateArgument *getArgs() const {
     return reinterpret_cast<const TemplateArgument *>(this + 1);
   }
 
-  /// \brief Retrieve the number of template arguments.
+  /// Retrieve the number of template arguments.
   unsigned getNumArgs() const { return NumArgs; }
 
-  /// \brief Retrieve a specific template argument as a type.
-  /// \pre @c isArgType(Arg)
+  /// Retrieve a specific template argument as a type.
+  /// \pre \c isArgType(Arg)
   const TemplateArgument &getArg(unsigned Idx) const; // in TemplateBase.h
 
   bool isSugared() const {
@@ -4062,7 +4107,7 @@ public:
   }
 };
 
-/// \brief The injected class name of a C++ class template or class
+/// The injected class name of a C++ class template or class
 /// template partial specialization.  Used to record that a type was
 /// spelled with a bare identifier rather than as a template-id; the
 /// equivalent for non-templated classes is just RecordType.
@@ -4178,21 +4223,18 @@ public:
     return static_cast<ElaboratedTypeKeyword>(TypeWithKeywordBits.Keyword);
   }
 
-  /// getKeywordForTypeSpec - Converts a type specifier (DeclSpec::TST)
-  /// into an elaborated type keyword.
+  /// Converts a type specifier (DeclSpec::TST) into an elaborated type keyword.
   static ElaboratedTypeKeyword getKeywordForTypeSpec(unsigned TypeSpec);
 
-  /// getTagTypeKindForTypeSpec - Converts a type specifier (DeclSpec::TST)
-  /// into a tag type kind.  It is an error to provide a type specifier
-  /// which *isn't* a tag kind here.
+  /// Converts a type specifier (DeclSpec::TST) into a tag type kind.
+  /// It is an error to provide a type specifier which *isn't* a tag kind here.
   static TagTypeKind getTagTypeKindForTypeSpec(unsigned TypeSpec);
 
-  /// getKeywordForTagDeclKind - Converts a TagTypeKind into an
-  /// elaborated type keyword.
+  /// Converts a TagTypeKind into an elaborated type keyword.
   static ElaboratedTypeKeyword getKeywordForTagTypeKind(TagTypeKind Tag);
 
-  /// getTagTypeKindForKeyword - Converts an elaborated type keyword into
-  // a TagTypeKind. It is an error to provide an elaborated type keyword
+  /// Converts an elaborated type keyword into a TagTypeKind.
+  /// It is an error to provide an elaborated type keyword
   /// which *isn't* a tag kind here.
   static TagTypeKind getTagTypeKindForKeyword(ElaboratedTypeKeyword Keyword);
 
@@ -4218,10 +4260,10 @@ public:
 /// in the source code but containing no additional semantic information.
 class ElaboratedType : public TypeWithKeyword, public llvm::FoldingSetNode {
 
-  /// \brief The nested name specifier containing the qualifier.
+  /// The nested name specifier containing the qualifier.
   NestedNameSpecifier *NNS;
 
-  /// \brief The type that this qualified name refers to.
+  /// The type that this qualified name refers to.
   QualType NamedType;
 
   ElaboratedType(ElaboratedTypeKeyword Keyword, NestedNameSpecifier *NNS,
@@ -4242,16 +4284,16 @@ class ElaboratedType : public TypeWithKeyword, public llvm::FoldingSetNode {
 public:
   ~ElaboratedType();
 
-  /// \brief Retrieve the qualification on this type.
+  /// Retrieve the qualification on this type.
   NestedNameSpecifier *getQualifier() const { return NNS; }
 
-  /// \brief Retrieve the type named by the qualified-id.
+  /// Retrieve the type named by the qualified-id.
   QualType getNamedType() const { return NamedType; }
 
-  /// \brief Remove a single level of sugar.
+  /// Remove a single level of sugar.
   QualType desugar() const { return getNamedType(); }
 
-  /// \brief Returns whether this type directly provides sugar.
+  /// Returns whether this type directly provides sugar.
   bool isSugared() const { return true; }
 
   void Profile(llvm::FoldingSetNodeID &ID) {
@@ -4301,11 +4343,10 @@ class DependentNameType : public TypeWithKeyword, public llvm::FoldingSetNode {
   friend class ASTContext;  // ASTContext creates these
 
 public:
-  /// \brief Retrieve the qualification on this type.
+  /// Retrieve the qualification on this type.
   NestedNameSpecifier *getQualifier() const { return NNS; }
 
-  /// \brief Retrieve the type named by the typename specifier as an
-  /// identifier.
+  /// Retrieve the type named by the typename specifier as an identifier.
   ///
   /// This routine will return a non-NULL identifier pointer when the
   /// form of the original typename was terminated by an identifier,
@@ -4333,20 +4374,21 @@ public:
   }
 };
 
-/// DependentTemplateSpecializationType - Represents a template
-/// specialization type whose template cannot be resolved, e.g.
+/// Represents a template specialization type whose template cannot be
+/// resolved, e.g.
 ///   A<T>::template B<T>
-class DependentTemplateSpecializationType :
-  public TypeWithKeyword, public llvm::FoldingSetNode {
+class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) DependentTemplateSpecializationType
+    : public TypeWithKeyword,
+      public llvm::FoldingSetNode {
 
-  /// \brief The nested name specifier containing the qualifier.
+  /// The nested name specifier containing the qualifier.
   NestedNameSpecifier *NNS;
 
-  /// \brief The identifier of the template.
+  /// The identifier of the template.
   const IdentifierInfo *Name;
 
-  /// \brief - The number of template arguments named in this class
-  /// template specialization.
+  /// \brief The number of template arguments named in this class template
+  /// specialization.
   unsigned NumArgs;
 
   const TemplateArgument *getArgBuffer() const {
@@ -4405,7 +4447,7 @@ public:
 
 /// \brief Represents a pack expansion of types.
 ///
-/// Pack expansions are part of C++0x variadic templates. A pack
+/// Pack expansions are part of C++11 variadic templates. A pack
 /// expansion contains a pattern, which itself contains one or more
 /// "unexpanded" parameter packs. When instantiated, a pack expansion
 /// produces a series of types, each instantiated from the pattern of
@@ -4483,7 +4525,7 @@ public:
   }
 };
 
-/// ObjCObjectType - Represents a class type in Objective C.
+/// Represents a class type in Objective C.
 ///
 /// Every Objective C type is a combination of a base type, a set of
 /// type arguments (optional, for parameterized classes) and a list of
@@ -4560,8 +4602,8 @@ protected:
   void computeSuperClassTypeSlow() const;
 
 public:
-  /// getBaseType - Gets the base type of this object type.  This is
-  /// always (possibly sugar for) one of:
+  /// Gets the base type of this object type.  This is always (possibly
+  /// sugar for) one of:
   ///  - the 'id' builtin type (as opposed to the 'id' type visible to the
   ///    user, which is a typedef for an ObjCObjectPointerType)
   ///  - the 'Class' builtin type (same caveat)
@@ -4595,8 +4637,8 @@ public:
   bool isSpecialized() const;
 
   /// Determine whether this object type was written with type arguments.
-  bool isSpecializedAsWritten() const { 
-    return ObjCObjectTypeBits.NumTypeArgs > 0; 
+  bool isSpecializedAsWritten() const {
+    return ObjCObjectTypeBits.NumTypeArgs > 0;
   }
 
   /// Determine whether this object type is "unspecialized", meaning
@@ -4612,8 +4654,8 @@ public:
 
   /// Retrieve the type arguments of this object type as they were
   /// written.
-  ArrayRef<QualType> getTypeArgsAsWritten() const { 
-    return ArrayRef<QualType>(getTypeArgStorage(), 
+  ArrayRef<QualType> getTypeArgsAsWritten() const {
+    return llvm::makeArrayRef(getTypeArgStorage(),
                               ObjCObjectTypeBits.NumTypeArgs);
   }
 
@@ -4626,11 +4668,11 @@ public:
 
   bool qual_empty() const { return getNumProtocols() == 0; }
 
-  /// getNumProtocols - Return the number of qualifying protocols in this
-  /// interface type, or 0 if there are none.
+  /// Return the number of qualifying protocols in this interface type,
+  /// or 0 if there are none.
   unsigned getNumProtocols() const { return ObjCObjectTypeBits.NumProtocols; }
 
-  /// \brief Fetch a protocol by index.
+  /// Fetch a protocol by index.
   ObjCProtocolDecl *getProtocol(unsigned I) const {
     assert(I < getNumProtocols() && "Out-of-range protocol access");
     return qual_begin()[I];
@@ -4674,7 +4716,7 @@ public:
   }
 };
 
-/// ObjCObjectTypeImpl - A class providing a concrete implementation
+/// A class providing a concrete implementation
 /// of ObjCObjectType, so as to not increase the footprint of
 /// ObjCInterfaceType.  Code outside of ASTContext and the core type
 /// system should not reference this type.
@@ -4708,11 +4750,10 @@ inline ObjCProtocolDecl **ObjCObjectType::getProtocolStorage() {
              getTypeArgStorage() + ObjCObjectTypeBits.NumTypeArgs);
 }
 
-/// ObjCInterfaceType - Interfaces are the core concept in Objective-C for
-/// object oriented design.  They basically correspond to C++ classes.  There
-/// are two kinds of interface types, normal interfaces like "NSString" and
-/// qualified interfaces, which are qualified with a protocol list like
-/// "NSString<NSCopyable, NSAmazing>".
+/// Interfaces are the core concept in Objective-C for object oriented design.
+/// They basically correspond to C++ classes.  There are two kinds of interface
+/// types: normal interfaces like `NSString`, and qualified interfaces, which
+/// are qualified with a protocol list like `NSString<NSCopyable, NSAmazing>`.
 ///
 /// ObjCInterfaceType guarantees the following properties when considered
 /// as a subtype of its superclass, ObjCObjectType:
@@ -4732,7 +4773,7 @@ class ObjCInterfaceType : public ObjCObjectType {
   friend class ObjCInterfaceDecl;
 
 public:
-  /// getDecl - Get the declaration of this interface.
+  /// Get the declaration of this interface.
   ObjCInterfaceDecl *getDecl() const { return Decl; }
 
   bool isSugared() const { return false; }
@@ -4767,12 +4808,12 @@ inline ObjCInterfaceDecl *ObjCObjectType::getInterface() const {
   return nullptr;
 }
 
-/// ObjCObjectPointerType - Used to represent a pointer to an
-/// Objective C object.  These are constructed from pointer
-/// declarators when the pointee type is an ObjCObjectType (or sugar
-/// for one).  In addition, the 'id' and 'Class' types are typedefs
-/// for these, and the protocol-qualified types 'id<P>' and 'Class<P>'
-/// are translated into these.
+/// Represents a pointer to an Objective C object.
+///
+/// These are constructed from pointer declarators when the pointee type is
+/// an ObjCObjectType (or sugar for one).  In addition, the 'id' and 'Class'
+/// types are typedefs for these, and the protocol-qualified types 'id<P>'
+/// and 'Class<P>' are translated into these.
 ///
 /// Pointers to pointers to Objective C objects are still PointerTypes;
 /// only the first level of pointer gets it own type implementation.
@@ -4789,12 +4830,11 @@ class ObjCObjectPointerType : public Type, public llvm::FoldingSetNode {
   friend class ASTContext;  // ASTContext creates these.
 
 public:
-  /// getPointeeType - Gets the type pointed to by this ObjC pointer.
+  /// Gets the type pointed to by this ObjC pointer.
   /// The result will always be an ObjCObjectType or sugar thereof.
   QualType getPointeeType() const { return PointeeType; }
 
-  /// getObjCObjectType - Gets the type pointed to by this ObjC
-  /// pointer.  This method always returns non-null.
+  /// Gets the type pointed to by this ObjC pointer.  Always returns non-null.
   ///
   /// This method is equivalent to getPointeeType() except that
   /// it discards any typedefs (or other sugar) between this
@@ -4821,14 +4861,14 @@ public:
     return PointeeType->castAs<ObjCObjectType>();
   }
 
-  /// getInterfaceType - If this pointer points to an Objective C
+  /// If this pointer points to an Objective C
   /// \@interface type, gets the type for that interface.  Any protocol
   /// qualifiers on the interface are ignored.
   ///
   /// \return null if the base type for this pointer is 'id' or 'Class'
   const ObjCInterfaceType *getInterfaceType() const;
 
-  /// getInterfaceDecl - If this pointer points to an Objective \@interface
+  /// If this pointer points to an Objective \@interface
   /// type, gets the declaration for that interface.
   ///
   /// \return null if the base type for this pointer is 'id' or 'Class'
@@ -4836,32 +4876,31 @@ public:
     return getObjectType()->getInterface();
   }
 
-  /// isObjCIdType - True if this is equivalent to the 'id' type, i.e. if
+  /// True if this is equivalent to the 'id' type, i.e. if
   /// its object type is the primitive 'id' type with no protocols.
   bool isObjCIdType() const {
     return getObjectType()->isObjCUnqualifiedId();
   }
 
-  /// isObjCClassType - True if this is equivalent to the 'Class' type,
+  /// True if this is equivalent to the 'Class' type,
   /// i.e. if its object tive is the primitive 'Class' type with no protocols.
   bool isObjCClassType() const {
     return getObjectType()->isObjCUnqualifiedClass();
   }
 
-  /// isObjCIdOrClassType - True if this is equivalent to the 'id' or
-  /// 'Class' type,
+  /// True if this is equivalent to the 'id' or 'Class' type,
   bool isObjCIdOrClassType() const {
     return getObjectType()->isObjCUnqualifiedIdOrClass();
   }
 
-  /// isObjCQualifiedIdType - True if this is equivalent to 'id<P>' for some
-  /// non-empty set of protocols.
+  /// True if this is equivalent to 'id<P>' for some non-empty set of
+  /// protocols.
   bool isObjCQualifiedIdType() const {
     return getObjectType()->isObjCQualifiedId();
   }
 
-  /// isObjCQualifiedClassType - True if this is equivalent to 'Class<P>' for
-  /// some non-empty set of protocols.
+  /// True if this is equivalent to 'Class<P>' for some non-empty set of
+  /// protocols.
   bool isObjCQualifiedClassType() const {
     return getObjectType()->isObjCQualifiedClass();
   }
@@ -4873,10 +4912,10 @@ public:
   bool isSpecialized() const { return getObjectType()->isSpecialized(); }
 
   /// Whether this type is specialized, meaning that it has type arguments.
-  bool isSpecializedAsWritten() const { 
-    return getObjectType()->isSpecializedAsWritten(); 
+  bool isSpecializedAsWritten() const {
+    return getObjectType()->isSpecializedAsWritten();
   }
-  
+
   /// Whether this type is unspecialized, meaning that is has no type arguments.
   bool isUnspecialized() const { return getObjectType()->isUnspecialized(); }
 
@@ -4885,13 +4924,13 @@ public:
   bool isUnspecializedAsWritten() const { return !isSpecializedAsWritten(); }
 
   /// Retrieve the type arguments for this type.
-  ArrayRef<QualType> getTypeArgs() const { 
-    return getObjectType()->getTypeArgs(); 
+  ArrayRef<QualType> getTypeArgs() const {
+    return getObjectType()->getTypeArgs();
   }
 
   /// Retrieve the type arguments for this type.
-  ArrayRef<QualType> getTypeArgsAsWritten() const { 
-    return getObjectType()->getTypeArgsAsWritten(); 
+  ArrayRef<QualType> getTypeArgsAsWritten() const {
+    return getObjectType()->getTypeArgsAsWritten();
   }
 
   /// An iterator over the qualifiers on the object type.  Provided
@@ -4909,14 +4948,12 @@ public:
   }
   bool qual_empty() const { return getObjectType()->qual_empty(); }
 
-  /// getNumProtocols - Return the number of qualifying protocols on
-  /// the object type.
+  /// Return the number of qualifying protocols on the object type.
   unsigned getNumProtocols() const {
     return getObjectType()->getNumProtocols();
   }
 
-  /// \brief Retrieve a qualifying protocol by index on the object
-  /// type.
+  /// Retrieve a qualifying protocol by index on the object type.
   ObjCProtocolDecl *getProtocol(unsigned I) const {
     return getObjectType()->getProtocol(I);
   }
@@ -4960,7 +4997,7 @@ class AtomicType : public Type, public llvm::FoldingSetNode {
   friend class ASTContext;  // ASTContext creates these.
 
   public:
-  /// getValueType - Gets the type contained by this atomic type, i.e.
+  /// Gets the type contained by this atomic type, i.e.
   /// the type returned by performing an atomic load of this atomic type.
   QualType getValueType() const { return ValueType; }
 
@@ -5127,12 +5164,12 @@ inline void QualType::removeLocalCVRQualifiers(unsigned Mask) {
   removeLocalFastQualifiers(Mask);
 }
 
-/// getAddressSpace - Return the address space of this type.
+/// Return the address space of this type.
 inline unsigned QualType::getAddressSpace() const {
   return getQualifiers().getAddressSpace();
 }
-  
-/// getObjCGCAttr - Return the gc attribute of this type.
+
+/// Return the gc attribute of this type.
 inline Qualifiers::GC QualType::getObjCGCAttr() const {
   return getQualifiers().getObjCGCAttr();
 }
@@ -5151,7 +5188,7 @@ inline FunctionType::ExtInfo getFunctionExtInfo(QualType t) {
   return getFunctionExtInfo(*t);
 }
 
-/// isMoreQualifiedThan - Determine whether this type is more
+/// Determine whether this type is more
 /// qualified than the Other type. For example, "const volatile int"
 /// is more qualified than "const int", "volatile int", and
 /// "int". However, it is not more qualified than "const volatile
@@ -5162,7 +5199,7 @@ inline bool QualType::isMoreQualifiedThan(QualType other) const {
   return (myQuals != otherQuals && myQuals.compatiblyIncludes(otherQuals));
 }
 
-/// isAtLeastAsQualifiedAs - Determine whether this type is at last
+/// Determine whether this type is at last
 /// as qualified as the Other type. For example, "const volatile
 /// int" is at least as qualified as "const int", "volatile int",
 /// "int", and "const volatile int".
@@ -5170,7 +5207,7 @@ inline bool QualType::isAtLeastAsQualifiedAs(QualType other) const {
   return getQualifiers().compatiblyIncludes(other.getQualifiers());
 }
 
-/// getNonReferenceType - If Type is a reference type (e.g., const
+/// If Type is a reference type (e.g., const
 /// int&), returns the type that the reference refers to ("const
 /// int"). Otherwise, returns the type itself. This routine is used
 /// throughout Sema to implement C++ 5p6:
@@ -5191,7 +5228,7 @@ inline bool QualType::isCForbiddenLValueType() const {
           getTypePtr()->isFunctionType());
 }
 
-/// \brief Tests whether the type is categorized as a fundamental type.
+/// Tests whether the type is categorized as a fundamental type.
 ///
 /// \returns True for types specified in C++0x [basic.fundamental].
 inline bool Type::isFundamentalType() const {
@@ -5201,7 +5238,7 @@ inline bool Type::isFundamentalType() const {
          (isArithmeticType() && !isEnumeralType());
 }
 
-/// \brief Tests whether the type is categorized as a compound type.
+/// Tests whether the type is categorized as a compound type.
 ///
 /// \returns True for types specified in C++0x [basic.compound].
 inline bool Type::isCompoundType() const {
@@ -5364,6 +5401,30 @@ inline bool Type::isImage2dArrayT() const {
   return isSpecificBuiltinType(BuiltinType::OCLImage2dArray);
 }
 
+inline bool Type::isImage2dDepthT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dDepth);
+}
+
+inline bool Type::isImage2dArrayDepthT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dArrayDepth);
+}
+
+inline bool Type::isImage2dMSAAT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dMSAA);
+}
+
+inline bool Type::isImage2dArrayMSAAT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dArrayMSAA);
+}
+
+inline bool Type::isImage2dMSAATDepth() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dMSAADepth);
+}
+
+inline bool Type::isImage2dArrayMSAATDepth() const {
+  return isSpecificBuiltinType(BuiltinType::OCLImage2dArrayMSAADepth);
+}
+
 inline bool Type::isImage3dT() const {
   return isSpecificBuiltinType(BuiltinType::OCLImage3d);
 }
@@ -5376,14 +5437,33 @@ inline bool Type::isEventT() const {
   return isSpecificBuiltinType(BuiltinType::OCLEvent);
 }
 
+inline bool Type::isClkEventT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLClkEvent);
+}
+
+inline bool Type::isQueueT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLQueue);
+}
+
+inline bool Type::isNDRangeT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLNDRange);
+}
+
+inline bool Type::isReserveIDT() const {
+  return isSpecificBuiltinType(BuiltinType::OCLReserveID);
+}
+
 inline bool Type::isImageType() const {
-  return isImage3dT() ||
-         isImage2dT() || isImage2dArrayT() ||
-         isImage1dT() || isImage1dArrayT() || isImage1dBufferT();
+  return isImage3dT() || isImage2dT() || isImage2dArrayT() ||
+         isImage2dDepthT() || isImage2dArrayDepthT() || isImage2dMSAAT() ||
+         isImage2dArrayMSAAT() || isImage2dMSAATDepth() ||
+         isImage2dArrayMSAATDepth() || isImage1dT() || isImage1dArrayT() ||
+         isImage1dBufferT();
 }
 
 inline bool Type::isOpenCLSpecificType() const {
-  return isSamplerT() || isEventT() || isImageType();
+  return isSamplerT() || isEventT() || isImageType() || isClkEventT() ||
+         isQueueT() || isNDRangeT() || isReserveIDT();
 }
 
 inline bool Type::isTemplateTypeParmType() const {
diff --git a/include/clang/AST/TypeLoc.h b/include/clang/AST/TypeLoc.h
index f4d20b8d526a..26feda5d7668 100644
--- a/include/clang/AST/TypeLoc.h
+++ b/include/clang/AST/TypeLoc.h
@@ -151,6 +151,14 @@ public:
 
   TypeLoc IgnoreParens() const;
 
+  /// \brief Find a type with the location of an explicit type qualifier.
+  ///
+  /// The result, if non-null, will be one of:
+  ///   QualifiedTypeLoc
+  ///   AtomicTypeLoc
+  ///   AttributedTypeLoc, for those type attributes that behave as qualifiers
+  TypeLoc findExplicitQualifierLoc() const;
+
   /// \brief Initializes this to state that every location in this
   /// type is the given location.
   ///
@@ -162,19 +170,18 @@ public:
 
   /// \brief Initializes this by copying its information from another
   /// TypeLoc of the same type.
-  void initializeFullCopy(TypeLoc Other) const {
+  void initializeFullCopy(TypeLoc Other) {
     assert(getType() == Other.getType());
-    size_t Size = getFullDataSize();
-    memcpy(getOpaqueData(), Other.getOpaqueData(), Size);
+    copy(Other);
   }
 
   /// \brief Initializes this by copying its information from another
   /// TypeLoc of the same type.  The given size must be the full data
   /// size.
-  void initializeFullCopy(TypeLoc Other, unsigned Size) const {
+  void initializeFullCopy(TypeLoc Other, unsigned Size) {
     assert(getType() == Other.getType());
     assert(getFullDataSize() == Size);
-    memcpy(getOpaqueData(), Other.getOpaqueData(), Size);
+    copy(Other);
   }
 
   /// Copies the other type loc into this one.
@@ -206,6 +213,7 @@ private:
 
 /// \brief Return the TypeLoc for a type source info.
 inline TypeLoc TypeSourceInfo::getTypeLoc() const {
+  // TODO: is this alignment already sufficient?
   return TypeLoc(Ty, const_cast<void*>(static_cast<const void*>(this + 1)));
 }
 
@@ -736,6 +744,10 @@ public:
     return hasAttrExprOperand() || hasAttrEnumOperand();
   }
 
+  bool isQualifier() const {
+    return getTypePtr()->isQualifier();
+  }
+
   /// The modified type, which is generally canonically different from
   /// the attribute type.
   ///    int main(int, char**) __attribute__((noreturn))
diff --git a/include/clang/AST/VTableBuilder.h b/include/clang/AST/VTableBuilder.h
index ebfbb8ad04ab..481fd11d6afb 100644
--- a/include/clang/AST/VTableBuilder.h
+++ b/include/clang/AST/VTableBuilder.h
@@ -51,7 +51,7 @@ public:
     CK_UnusedFunctionPointer
   };
 
-  VTableComponent() { }
+  VTableComponent() = default;
 
   static VTableComponent MakeVCallOffset(CharUnits Offset) {
     return VTableComponent(CK_VCallOffset, Offset);
@@ -122,31 +122,56 @@ public:
   }
 
   const CXXRecordDecl *getRTTIDecl() const {
-    assert(getKind() == CK_RTTI && "Invalid component kind!");
-
+    assert(isRTTIKind() && "Invalid component kind!");
     return reinterpret_cast<CXXRecordDecl *>(getPointer());
   }
 
   const CXXMethodDecl *getFunctionDecl() const {
-    assert(getKind() == CK_FunctionPointer);
-
+    assert(isFunctionPointerKind() && "Invalid component kind!");
+    if (isDestructorKind())
+      return getDestructorDecl();
     return reinterpret_cast<CXXMethodDecl *>(getPointer());
   }
 
   const CXXDestructorDecl *getDestructorDecl() const {
-    assert((getKind() == CK_CompleteDtorPointer ||
-            getKind() == CK_DeletingDtorPointer) && "Invalid component kind!");
-
+    assert(isDestructorKind() && "Invalid component kind!");
     return reinterpret_cast<CXXDestructorDecl *>(getPointer());
   }
 
   const CXXMethodDecl *getUnusedFunctionDecl() const {
-    assert(getKind() == CK_UnusedFunctionPointer);
-
+    assert(getKind() == CK_UnusedFunctionPointer && "Invalid component kind!");
     return reinterpret_cast<CXXMethodDecl *>(getPointer());
   }
 
+  bool isDestructorKind() const { return isDestructorKind(getKind()); }
+
+  bool isUsedFunctionPointerKind() const {
+    return isUsedFunctionPointerKind(getKind());
+  }
+
+  bool isFunctionPointerKind() const {
+    return isFunctionPointerKind(getKind());
+  }
+
+  bool isRTTIKind() const { return isRTTIKind(getKind()); }
+
 private:
+  static bool isFunctionPointerKind(Kind ComponentKind) {
+    return isUsedFunctionPointerKind(ComponentKind) ||
+           ComponentKind == CK_UnusedFunctionPointer;
+  }
+  static bool isUsedFunctionPointerKind(Kind ComponentKind) {
+    return ComponentKind == CK_FunctionPointer ||
+           isDestructorKind(ComponentKind);
+  }
+  static bool isDestructorKind(Kind ComponentKind) {
+    return ComponentKind == CK_CompleteDtorPointer ||
+           ComponentKind == CK_DeletingDtorPointer;
+  }
+  static bool isRTTIKind(Kind ComponentKind) {
+    return ComponentKind == CK_RTTI;
+  }
+
   VTableComponent(Kind ComponentKind, CharUnits Offset) {
     assert((ComponentKind == CK_VCallOffset ||
             ComponentKind == CK_VBaseOffset ||
@@ -158,12 +183,8 @@ private:
   }
 
   VTableComponent(Kind ComponentKind, uintptr_t Ptr) {
-    assert((ComponentKind == CK_RTTI ||
-            ComponentKind == CK_FunctionPointer ||
-            ComponentKind == CK_CompleteDtorPointer ||
-            ComponentKind == CK_DeletingDtorPointer ||
-            ComponentKind == CK_UnusedFunctionPointer) &&
-            "Invalid component kind!");
+    assert((isRTTIKind(ComponentKind) || isFunctionPointerKind(ComponentKind)) &&
+           "Invalid component kind!");
 
     assert((Ptr & 7) == 0 && "Pointer not sufficiently aligned!");
 
@@ -178,11 +199,7 @@ private:
   }
 
   uintptr_t getPointer() const {
-    assert((getKind() == CK_RTTI ||
-            getKind() == CK_FunctionPointer ||
-            getKind() == CK_CompleteDtorPointer ||
-            getKind() == CK_DeletingDtorPointer ||
-            getKind() == CK_UnusedFunctionPointer) &&
+    assert((getKind() == CK_RTTI || isFunctionPointerKind()) &&
            "Invalid component kind!");
 
     return static_cast<uintptr_t>(Value & ~7ULL);
@@ -205,8 +222,11 @@ public:
 
   typedef const VTableComponent *vtable_component_iterator;
   typedef const VTableThunkTy *vtable_thunk_iterator;
+  typedef llvm::iterator_range<vtable_component_iterator>
+      vtable_component_range;
 
   typedef llvm::DenseMap<BaseSubobject, uint64_t> AddressPointsMapTy;
+
 private:
   uint64_t NumVTableComponents;
   std::unique_ptr<VTableComponent[]> VTableComponents;
@@ -233,6 +253,11 @@ public:
     return NumVTableComponents;
   }
 
+  vtable_component_range vtable_components() const {
+    return vtable_component_range(vtable_component_begin(),
+                                  vtable_component_end());
+  }
+
   vtable_component_iterator vtable_component_begin() const {
     return VTableComponents.get();
   }
@@ -376,10 +401,6 @@ struct VPtrInfo {
   VPtrInfo(const CXXRecordDecl *RD)
       : ReusingBase(RD), BaseWithVPtr(RD), NextBaseToMangle(RD) {}
 
-  // Copy constructor.
-  // FIXME: Uncomment when we've moved to C++11.
-  // VPtrInfo(const VPtrInfo &) = default;
-
   /// The vtable will hold all of the virtual bases or virtual methods of
   /// ReusingBase.  This may or may not be the same class as VPtrSubobject.Base.
   /// A derived class will reuse the vptr of the first non-virtual base
diff --git a/include/clang/ASTMatchers/ASTMatchFinder.h b/include/clang/ASTMatchers/ASTMatchFinder.h
index ce2674e442da..92ec92c299c5 100644
--- a/include/clang/ASTMatchers/ASTMatchFinder.h
+++ b/include/clang/ASTMatchers/ASTMatchFinder.h
@@ -42,6 +42,7 @@
 #define LLVM_CLANG_ASTMATCHERS_ASTMATCHFINDER_H
 
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Timer.h"
 
@@ -208,7 +209,7 @@ public:
         NestedNameSpecifierLoc;
     std::vector<std::pair<TypeLocMatcher, MatchCallback *>> TypeLoc;
     /// \brief All the callbacks in one container to simplify iteration.
-    std::vector<MatchCallback *> AllCallbacks;
+    llvm::SmallPtrSet<MatchCallback *, 16> AllCallbacks;
   };
 
 private:
diff --git a/include/clang/ASTMatchers/ASTMatchers.h b/include/clang/ASTMatchers/ASTMatchers.h
index 281d6370e5c8..e6ba8778f249 100644
--- a/include/clang/ASTMatchers/ASTMatchers.h
+++ b/include/clang/ASTMatchers/ASTMatchers.h
@@ -14,7 +14,7 @@
 //  a functional in-language DSL to express queries over the C++ AST.
 //
 //  For example, to match a class with a certain name, one would call:
-//    recordDecl(hasName("MyClass"))
+//    cxxRecordDecl(hasName("MyClass"))
 //  which returns a matcher that can be used to find all AST nodes that declare
 //  a class named 'MyClass'.
 //
@@ -25,13 +25,13 @@
 //
 //  For example, when we're interested in child classes of a certain class, we
 //  would write:
-//    recordDecl(hasName("MyClass"), hasChild(id("child", recordDecl())))
+//    cxxRecordDecl(hasName("MyClass"), hasChild(id("child", recordDecl())))
 //  When the match is found via the MatchFinder, a user provided callback will
 //  be called with a BoundNodes instance that contains a mapping from the
 //  strings that we provided for the id(...) calls to the nodes that were
 //  matched.
 //  In the given example, each time our matcher finds a match we get a callback
-//  where "child" is bound to the CXXRecordDecl node of the matching child
+//  where "child" is bound to the RecordDecl node of the matching child
 //  class declaration.
 //
 //  See ASTMatchersInternal.h for a more in-depth explanation of the
@@ -170,7 +170,8 @@ const internal::VariadicDynCastAllOfMatcher<Decl, TypedefDecl> typedefDecl;
 
 /// \brief Matches AST nodes that were expanded within the main-file.
 ///
-/// Example matches X but not Y (matcher = recordDecl(isExpansionInMainFile())
+/// Example matches X but not Y
+///   (matcher = cxxRecordDecl(isExpansionInMainFile())
 /// \code
 ///   #include <Y.h>
 ///   class X {};
@@ -191,7 +192,7 @@ AST_POLYMORPHIC_MATCHER(isExpansionInMainFile,
 /// \brief Matches AST nodes that were expanded within system-header-files.
 ///
 /// Example matches Y but not X
-///     (matcher = recordDecl(isExpansionInSystemHeader())
+///     (matcher = cxxRecordDecl(isExpansionInSystemHeader())
 /// \code
 ///   #include <SystemHeader.h>
 ///   class X {};
@@ -216,7 +217,7 @@ AST_POLYMORPHIC_MATCHER(isExpansionInSystemHeader,
 /// partially matching a given regex.
 ///
 /// Example matches Y but not X
-///     (matcher = recordDecl(isExpansionInFileMatching("AST.*"))
+///     (matcher = cxxRecordDecl(isExpansionInFileMatching("AST.*"))
 /// \code
 ///   #include "ASTMatcher.h"
 ///   class X {};
@@ -292,6 +293,31 @@ const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;
 ///   matches "namespace {}" and "namespace test {}"
 const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceDecl> namespaceDecl;
 
+/// \brief Matches a declaration of a namespace alias.
+///
+/// Given
+/// \code
+///   namespace test {}
+///   namespace alias = ::test;
+/// \endcode
+/// namespaceAliasDecl()
+///   matches "namespace alias" but not "namespace test"
+const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceAliasDecl>
+    namespaceAliasDecl;
+
+/// \brief Matches class, struct, and union declarations.
+///
+/// Example matches \c X, \c Z, \c U, and \c S
+/// \code
+///   class X;
+///   template<class T> class Z {};
+///   struct S {};
+///   union U {};
+/// \endcode
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  RecordDecl> recordDecl;
+
 /// \brief Matches C++ class declarations.
 ///
 /// Example matches \c X, \c Z
@@ -301,7 +327,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, NamespaceDecl> namespaceDecl;
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Decl,
-  CXXRecordDecl> recordDecl;
+  CXXRecordDecl> cxxRecordDecl;
 
 /// \brief Matches C++ class template declarations.
 ///
@@ -373,7 +399,7 @@ const internal::VariadicDynCastAllOfMatcher<
 ///     int i;
 ///   };
 /// \endcode
-const internal::VariadicAllOfMatcher<CXXCtorInitializer> ctorInitializer;
+const internal::VariadicAllOfMatcher<CXXCtorInitializer> cxxCtorInitializer;
 
 /// \brief Matches template arguments.
 ///
@@ -386,6 +412,30 @@ const internal::VariadicAllOfMatcher<CXXCtorInitializer> ctorInitializer;
 ///   matches 'int' in C<int>.
 const internal::VariadicAllOfMatcher<TemplateArgument> templateArgument;
 
+/// \brief Matches non-type template parameter declarations.
+///
+/// Given
+/// \code
+///   template <typename T, int N> struct C {};
+/// \endcode
+/// nonTypeTemplateParmDecl()
+///   matches 'N', but not 'T'.
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  NonTypeTemplateParmDecl> nonTypeTemplateParmDecl;
+
+/// \brief Matches template type parameter declarations.
+///
+/// Given
+/// \code
+///   template <typename T, int N> struct C {};
+/// \endcode
+/// templateTypeParmDecl()
+///   matches 'T', but not 'N'.
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  TemplateTypeParmDecl> templateTypeParmDecl;
+
 /// \brief Matches public C++ declarations.
 ///
 /// Given
@@ -712,7 +762,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, ValueDecl> valueDecl;
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Decl,
-  CXXConstructorDecl> constructorDecl;
+  CXXConstructorDecl> cxxConstructorDecl;
 
 /// \brief Matches explicit C++ destructor declarations.
 ///
@@ -725,7 +775,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Decl,
-  CXXDestructorDecl> destructorDecl;
+  CXXDestructorDecl> cxxDestructorDecl;
 
 /// \brief Matches enum declarations.
 ///
@@ -755,7 +805,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \code
 ///   class X { void y(); };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl> methodDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl> cxxMethodDecl;
 
 /// \brief Matches conversion operator declarations.
 ///
@@ -764,7 +814,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, CXXMethodDecl> methodDecl;
 ///   class X { operator int() const; };
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<Decl, CXXConversionDecl>
-    conversionDecl;
+    cxxConversionDecl;
 
 /// \brief Matches variable declarations.
 ///
@@ -877,7 +927,7 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, LambdaExpr> lambdaExpr;
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXMemberCallExpr> memberCallExpr;
+  CXXMemberCallExpr> cxxMemberCallExpr;
 
 /// \brief Matches ObjectiveC Message invocation expressions.
 ///
@@ -892,6 +942,16 @@ const internal::VariadicDynCastAllOfMatcher<
   Stmt,
   ObjCMessageExpr> objcMessageExpr;
 
+/// \brief Matches Objective-C interface declarations.
+///
+/// Example matches Foo
+/// \code
+///   @interface Foo
+///   @end
+/// \endcode
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  ObjCInterfaceDecl> objcInterfaceDecl;
 
 /// \brief Matches expressions that introduce cleanups to be run at the end
 /// of the sub-expression's evaluation.
@@ -900,8 +960,9 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \code
 ///   const std::string str = std::string();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, ExprWithCleanups>
-exprWithCleanups;
+const internal::VariadicDynCastAllOfMatcher<
+  Stmt,
+  ExprWithCleanups> exprWithCleanups;
 
 /// \brief Matches init list expressions.
 ///
@@ -925,8 +986,9 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, InitListExpr> initListExpr;
 /// \endcode
 /// substNonTypeTemplateParmExpr()
 ///   matches "N" in the right-hand side of "static const int n = N;"
-const internal::VariadicDynCastAllOfMatcher<Stmt, SubstNonTypeTemplateParmExpr>
-substNonTypeTemplateParmExpr;
+const internal::VariadicDynCastAllOfMatcher<
+  Stmt,
+  SubstNonTypeTemplateParmExpr> substNonTypeTemplateParmExpr;
 
 /// \brief Matches using declarations.
 ///
@@ -948,8 +1010,9 @@ const internal::VariadicDynCastAllOfMatcher<Decl, UsingDecl> usingDecl;
 /// \endcode
 /// usingDirectiveDecl()
 ///   matches \code using namespace X \endcode
-const internal::VariadicDynCastAllOfMatcher<Decl, UsingDirectiveDecl>
-    usingDirectiveDecl;
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  UsingDirectiveDecl> usingDirectiveDecl;
 
 /// \brief Matches unresolved using value declarations.
 ///
@@ -966,10 +1029,29 @@ const internal::VariadicDynCastAllOfMatcher<
   Decl,
   UnresolvedUsingValueDecl> unresolvedUsingValueDecl;
 
+/// \brief Matches unresolved using value declarations that involve the
+/// typename.
+///
+/// Given
+/// \code
+///   template <typename T>
+///   struct Base { typedef T Foo; };
+///
+///   template<typename T>
+///   struct S : private Base<T> {
+///     using typename Base<T>::Foo;
+///   };
+/// \endcode
+/// unresolvedUsingTypenameDecl()
+///   matches \code using Base<T>::Foo \endcode
+const internal::VariadicDynCastAllOfMatcher<
+  Decl,
+  UnresolvedUsingTypenameDecl> unresolvedUsingTypenameDecl;
+
 /// \brief Matches constructor call expressions (including implicit ones).
 ///
 /// Example matches string(ptr, n) and ptr within arguments of f
-///     (matcher = constructExpr())
+///     (matcher = cxxConstructExpr())
 /// \code
 ///   void f(const string &a, const string &b);
 ///   char *ptr;
@@ -978,43 +1060,43 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXConstructExpr> constructExpr;
+  CXXConstructExpr> cxxConstructExpr;
 
 /// \brief Matches unresolved constructor call expressions.
 ///
 /// Example matches T(t) in return statement of f
-///     (matcher = unresolvedConstructExpr())
+///     (matcher = cxxUnresolvedConstructExpr())
 /// \code
 ///   template <typename T>
 ///   void f(const T& t) { return T(t); }
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXUnresolvedConstructExpr> unresolvedConstructExpr;
+  CXXUnresolvedConstructExpr> cxxUnresolvedConstructExpr;
 
 /// \brief Matches implicit and explicit this expressions.
 ///
 /// Example matches the implicit this expression in "return i".
-///     (matcher = thisExpr())
+///     (matcher = cxxThisExpr())
 /// \code
 /// struct foo {
 ///   int i;
 ///   int f() { return i; }
 /// };
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThisExpr> thisExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThisExpr> cxxThisExpr;
 
 /// \brief Matches nodes where temporaries are created.
 ///
 /// Example matches FunctionTakesString(GetStringByValue())
-///     (matcher = bindTemporaryExpr())
+///     (matcher = cxxBindTemporaryExpr())
 /// \code
 ///   FunctionTakesString(GetStringByValue());
 ///   FunctionTakesStringByPointer(GetStringPointer());
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXBindTemporaryExpr> bindTemporaryExpr;
+  CXXBindTemporaryExpr> cxxBindTemporaryExpr;
 
 /// \brief Matches nodes where temporaries are materialized.
 ///
@@ -1044,9 +1126,9 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \code
 ///   new X;
 /// \endcode
-/// newExpr()
+/// cxxNewExpr()
 ///   matches 'new X'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> newExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> cxxNewExpr;
 
 /// \brief Matches delete expressions.
 ///
@@ -1054,9 +1136,9 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, CXXNewExpr> newExpr;
 /// \code
 ///   delete X;
 /// \endcode
-/// deleteExpr()
+/// cxxDeleteExpr()
 ///   matches 'delete X'.
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDeleteExpr> deleteExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXDeleteExpr> cxxDeleteExpr;
 
 /// \brief Matches array subscript expressions.
 ///
@@ -1074,14 +1156,14 @@ const internal::VariadicDynCastAllOfMatcher<
 ///
 /// Example matches the CXXDefaultArgExpr placeholder inserted for the
 ///     default value of the second parameter in the call expression f(42)
-///     (matcher = defaultArgExpr())
+///     (matcher = cxxDefaultArgExpr())
 /// \code
 ///   void f(int x, int y = 0);
 ///   f(42);
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXDefaultArgExpr> defaultArgExpr;
+  CXXDefaultArgExpr> cxxDefaultArgExpr;
 
 /// \brief Matches overloaded operator calls.
 ///
@@ -1091,7 +1173,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// FIXME: figure out why these do not match?
 ///
 /// Example matches both operator<<((o << b), c) and operator<<(o, b)
-///     (matcher = operatorCallExpr())
+///     (matcher = cxxOperatorCallExpr())
 /// \code
 ///   ostream &operator<< (ostream &out, int i) { };
 ///   ostream &o; int b = 1, c = 1;
@@ -1099,7 +1181,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXOperatorCallExpr> operatorCallExpr;
+  CXXOperatorCallExpr> cxxOperatorCallExpr;
 
 /// \brief Matches expressions.
 ///
@@ -1166,12 +1248,14 @@ AST_MATCHER_P(ForStmt, hasLoopInit, internal::Matcher<Stmt>,
 
 /// \brief Matches range-based for statements.
 ///
-/// forRangeStmt() matches 'for (auto a : i)'
+/// cxxForRangeStmt() matches 'for (auto a : i)'
 /// \code
 ///   int i[] =  {1, 2, 3}; for (auto a : i);
 ///   for(int j = 0; j < 5; ++j);
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXForRangeStmt> forRangeStmt;
+const internal::VariadicDynCastAllOfMatcher<
+  Stmt,
+  CXXForRangeStmt> cxxForRangeStmt;
 
 /// \brief Matches the initialization statement of a for loop.
 ///
@@ -1326,27 +1410,27 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, CompoundStmt> compoundStmt;
 /// \code
 ///   try {} catch(int i) {}
 /// \endcode
-/// catchStmt()
+/// cxxCatchStmt()
 ///   matches 'catch(int i)'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXCatchStmt> catchStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXCatchStmt> cxxCatchStmt;
 
 /// \brief Matches try statements.
 ///
 /// \code
 ///   try {} catch(int i) {}
 /// \endcode
-/// tryStmt()
+/// cxxTryStmt()
 ///   matches 'try {}'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> tryStmt;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXTryStmt> cxxTryStmt;
 
 /// \brief Matches throw expressions.
 ///
 /// \code
 ///   try { throw 5; } catch(int i) {}
 /// \endcode
-/// throwExpr()
+/// cxxThrowExpr()
 ///   matches 'throw 5'
-const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> throwExpr;
+const internal::VariadicDynCastAllOfMatcher<Stmt, CXXThrowExpr> cxxThrowExpr;
 
 /// \brief Matches null statements.
 ///
@@ -1375,7 +1459,7 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, AsmStmt> asmStmt;
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXBoolLiteralExpr> boolLiteral;
+  CXXBoolLiteralExpr> cxxBoolLiteral;
 
 /// \brief Matches string literals (also matches wide string literals).
 ///
@@ -1439,7 +1523,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \brief Matches nullptr literal.
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXNullPtrLiteralExpr> nullPtrLiteralExpr;
+  CXXNullPtrLiteralExpr> cxxNullPtrLiteralExpr;
 
 /// \brief Matches GNU __null expression.
 const internal::VariadicDynCastAllOfMatcher<
@@ -1505,7 +1589,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXReinterpretCastExpr> reinterpretCastExpr;
+  CXXReinterpretCastExpr> cxxReinterpretCastExpr;
 
 /// \brief Matches a C++ static_cast expression.
 ///
@@ -1513,7 +1597,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \see reinterpretCast
 ///
 /// Example:
-///   staticCastExpr()
+///   cxxStaticCastExpr()
 /// matches
 ///   static_cast<long>(8)
 /// in
@@ -1522,12 +1606,12 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXStaticCastExpr> staticCastExpr;
+  CXXStaticCastExpr> cxxStaticCastExpr;
 
 /// \brief Matches a dynamic_cast expression.
 ///
 /// Example:
-///   dynamicCastExpr()
+///   cxxDynamicCastExpr()
 /// matches
 ///   dynamic_cast<D*>(&b);
 /// in
@@ -1538,7 +1622,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXDynamicCastExpr> dynamicCastExpr;
+  CXXDynamicCastExpr> cxxDynamicCastExpr;
 
 /// \brief Matches a const_cast expression.
 ///
@@ -1550,7 +1634,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXConstCastExpr> constCastExpr;
+  CXXConstCastExpr> cxxConstCastExpr;
 
 /// \brief Matches a C-style cast expression.
 ///
@@ -1620,7 +1704,7 @@ const internal::VariadicDynCastAllOfMatcher<Stmt, CastExpr> castExpr;
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXFunctionalCastExpr> functionalCastExpr;
+  CXXFunctionalCastExpr> cxxFunctionalCastExpr;
 
 /// \brief Matches functional cast expressions having N != 1 arguments
 ///
@@ -1630,7 +1714,7 @@ const internal::VariadicDynCastAllOfMatcher<
 /// \endcode
 const internal::VariadicDynCastAllOfMatcher<
   Stmt,
-  CXXTemporaryObjectExpr> temporaryObjectExpr;
+  CXXTemporaryObjectExpr> cxxTemporaryObjectExpr;
 
 /// \brief Matches \c QualTypes in the clang AST.
 const internal::VariadicAllOfMatcher<QualType> qualType;
@@ -1652,8 +1736,8 @@ const internal::VariadicAllOfMatcher<TypeLoc> typeLoc;
 /// \endcode
 /// The matcher:
 /// \code
-///   recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-///                     has(fieldDecl(hasName("b")).bind("v"))))
+///   cxxRecordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+///                        has(fieldDecl(hasName("b")).bind("v"))))
 /// \endcode
 /// will generate two results binding "v", the first of which binds
 /// the field declaration of \c a, the second the field declaration of
@@ -1789,9 +1873,10 @@ AST_MATCHER_P(NamedDecl, matchesName, std::string, RegExp) {
 ///   a << a;   // <-- This matches
 /// \endcode
 ///
-/// \c operatorCallExpr(hasOverloadedOperatorName("<<"))) matches the specified
-/// line and \c recordDecl(hasMethod(hasOverloadedOperatorName("*"))) matches
-/// the declaration of \c A.
+/// \c cxxOperatorCallExpr(hasOverloadedOperatorName("<<"))) matches the
+/// specified line and
+/// \c cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")))
+/// matches the declaration of \c A.
 ///
 /// Usable as: Matcher<CXXOperatorCallExpr>, Matcher<FunctionDecl>
 inline internal::PolymorphicMatcherWithParam1<
@@ -1858,10 +1943,10 @@ AST_MATCHER_P_OVERLOAD(CXXRecordDecl, isSameOrDerivedFrom, std::string,
 /// \code
 ///   class A { void func(); };
 ///   class B { void member(); };
-/// \code
+/// \endcode
 ///
-/// \c recordDecl(hasMethod(hasName("func"))) matches the declaration of \c A
-/// but not \c B.
+/// \c cxxRecordDecl(hasMethod(hasName("func"))) matches the declaration of
+/// \c A but not \c B.
 AST_MATCHER_P(CXXRecordDecl, hasMethod, internal::Matcher<CXXMethodDecl>,
               InnerMatcher) {
   return matchesFirstInPointerRange(InnerMatcher, Node.method_begin(),
@@ -1871,7 +1956,8 @@ AST_MATCHER_P(CXXRecordDecl, hasMethod, internal::Matcher<CXXMethodDecl>,
 /// \brief Matches AST nodes that have child AST nodes that match the
 /// provided matcher.
 ///
-/// Example matches X, Y (matcher = recordDecl(has(recordDecl(hasName("X")))
+/// Example matches X, Y
+///   (matcher = cxxRecordDecl(has(cxxRecordDecl(hasName("X")))
 /// \code
 ///   class X {};  // Matches X, because X::X is a class of name X inside X.
 ///   class Y { class X {}; };
@@ -1888,7 +1974,7 @@ LLVM_ATTRIBUTE_UNUSED has = {};
 /// provided matcher.
 ///
 /// Example matches X, Y, Z
-///     (matcher = recordDecl(hasDescendant(recordDecl(hasName("X")))))
+///     (matcher = cxxRecordDecl(hasDescendant(cxxRecordDecl(hasName("X")))))
 /// \code
 ///   class X {};  // Matches X, because X::X is a class of name X inside X.
 ///   class Y { class X {}; };
@@ -1904,7 +1990,8 @@ LLVM_ATTRIBUTE_UNUSED hasDescendant = {};
 /// \brief Matches AST nodes that have child AST nodes that match the
 /// provided matcher.
 ///
-/// Example matches X, Y (matcher = recordDecl(forEach(recordDecl(hasName("X")))
+/// Example matches X, Y
+///   (matcher = cxxRecordDecl(forEach(cxxRecordDecl(hasName("X")))
 /// \code
 ///   class X {};  // Matches X, because X::X is a class of name X inside X.
 ///   class Y { class X {}; };
@@ -1924,7 +2011,7 @@ LLVM_ATTRIBUTE_UNUSED forEach = {};
 /// provided matcher.
 ///
 /// Example matches X, A, B, C
-///     (matcher = recordDecl(forEachDescendant(recordDecl(hasName("X")))))
+///   (matcher = cxxRecordDecl(forEachDescendant(cxxRecordDecl(hasName("X")))))
 /// \code
 ///   class X {};  // Matches X, because X::X is a class of name X inside X.
 ///   class A { class X {}; };
@@ -1937,7 +2024,9 @@ LLVM_ATTRIBUTE_UNUSED forEach = {};
 /// each result that matches instead of only on the first one.
 ///
 /// Note: Recursively combined ForEachDescendant can cause many matches:
-///   recordDecl(forEachDescendant(recordDecl(forEachDescendant(recordDecl()))))
+///   cxxRecordDecl(forEachDescendant(cxxRecordDecl(
+///     forEachDescendant(cxxRecordDecl())
+///   )))
 /// will match 10 times (plus injected class name matches) on:
 /// \code
 ///   class A { class B { class C { class D { class E {}; }; }; }; };
@@ -1957,7 +2046,8 @@ LLVM_ATTRIBUTE_UNUSED forEachDescendant = {};
 /// \endcode
 /// The matcher:
 /// \code
-///   recordDecl(hasName("::A"), findAll(recordDecl(isDefinition()).bind("m")))
+///   cxxRecordDecl(hasName("::A"),
+///                 findAll(cxxRecordDecl(isDefinition()).bind("m")))
 /// \endcode
 /// will generate results for \c A, \c B and \c C.
 ///
@@ -1978,8 +2068,10 @@ internal::Matcher<T> findAll(const internal::Matcher<T> &Matcher) {
 ///
 /// Usable as: Any Matcher
 const internal::ArgumentAdaptingMatcherFunc<
-    internal::HasParentMatcher, internal::TypeList<Decl, Stmt>,
-    internal::TypeList<Decl, Stmt> > LLVM_ATTRIBUTE_UNUSED hasParent = {};
+    internal::HasParentMatcher,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+    LLVM_ATTRIBUTE_UNUSED hasParent = {};
 
 /// \brief Matches AST nodes that have an ancestor that matches the provided
 /// matcher.
@@ -1993,12 +2085,14 @@ const internal::ArgumentAdaptingMatcherFunc<
 ///
 /// Usable as: Any Matcher
 const internal::ArgumentAdaptingMatcherFunc<
-    internal::HasAncestorMatcher, internal::TypeList<Decl, Stmt>,
-    internal::TypeList<Decl, Stmt> > LLVM_ATTRIBUTE_UNUSED hasAncestor = {};
+    internal::HasAncestorMatcher,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>,
+    internal::TypeList<Decl, NestedNameSpecifierLoc, Stmt, TypeLoc>>
+    LLVM_ATTRIBUTE_UNUSED hasAncestor = {};
 
 /// \brief Matches if the provided matcher does not match.
 ///
-/// Example matches Y (matcher = recordDecl(unless(hasName("X"))))
+/// Example matches Y (matcher = cxxRecordDecl(unless(hasName("X"))))
 /// \code
 ///   class X {};
 ///   class Y {};
@@ -2038,7 +2132,8 @@ hasDeclaration(const internal::Matcher<Decl> &InnerMatcher) {
 
 /// \brief Matches on the implicit object argument of a member call expression.
 ///
-/// Example matches y.x() (matcher = callExpr(on(hasType(recordDecl(hasName("Y"))))))
+/// Example matches y.x()
+///   (matcher = cxxMemberCallExpr(on(hasType(cxxRecordDecl(hasName("Y"))))))
 /// \code
 ///   class Y { public: void x(); };
 ///   void z() { Y y; y.x(); }",
@@ -2078,7 +2173,7 @@ AST_MATCHER_P(ObjCMessageExpr, hasReceiverType, internal::Matcher<QualType>,
 /// \code
 ///     [self.bodyView loadHTMLString:html baseURL:NULL];
 /// \endcode
-  AST_MATCHER_P(ObjCMessageExpr, hasSelector, std::string, BaseName) {
+AST_MATCHER_P(ObjCMessageExpr, hasSelector, std::string, BaseName) {
   Selector Sel = Node.getSelector();
   return BaseName.compare(Sel.getAsString()) == 0;
 }
@@ -2131,14 +2226,13 @@ AST_MATCHER(ObjCMessageExpr, hasUnarySelector) {
 ///   webView.frame = bodyFrame;
 ///   //     ^---- matches here
 /// \endcode
-
 AST_MATCHER(ObjCMessageExpr, hasKeywordSelector) {
   return Node.getSelector().isKeywordSelector();
 }
 
 /// \brief Matches when the selector has the specified number of arguments
 ///
-///  matcher = objCMessageExpr(numSelectorArgs(1));
+///  matcher = objCMessageExpr(numSelectorArgs(0));
 ///  matches self.bodyView in the code below
 ///
 ///  matcher = objCMessageExpr(numSelectorArgs(2));
@@ -2177,7 +2271,8 @@ AST_MATCHER_P(CallExpr, callee, internal::Matcher<Stmt>,
 /// \brief Matches if the call expression's callee's declaration matches the
 /// given matcher.
 ///
-/// Example matches y.x() (matcher = callExpr(callee(methodDecl(hasName("x")))))
+/// Example matches y.x() (matcher = callExpr(callee(
+///                                    cxxMethodDecl(hasName("x")))))
 /// \code
 ///   class Y { public: void x(); };
 ///   void z() { Y y; y.x(); }
@@ -2190,8 +2285,8 @@ AST_MATCHER_P_OVERLOAD(CallExpr, callee, internal::Matcher<Decl>, InnerMatcher,
 /// \brief Matches if the expression's or declaration's type matches a type
 /// matcher.
 ///
-/// Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-///             and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+/// Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+///             and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
 /// \code
 ///  class X {};
 ///  void y(X &x) { x; X z; }
@@ -2207,12 +2302,12 @@ AST_POLYMORPHIC_MATCHER_P_OVERLOAD(
 ///
 /// In case of a value declaration (for example a variable declaration),
 /// this resolves one layer of indirection. For example, in the value
-/// declaration "X x;", recordDecl(hasName("X")) matches the declaration of X,
-/// while varDecl(hasType(recordDecl(hasName("X")))) matches the declaration
-/// of x."
+/// declaration "X x;", cxxRecordDecl(hasName("X")) matches the declaration of
+/// X, while varDecl(hasType(cxxRecordDecl(hasName("X")))) matches the
+/// declaration of x.
 ///
-/// Example matches x (matcher = expr(hasType(recordDecl(hasName("X")))))
-///             and z (matcher = varDecl(hasType(recordDecl(hasName("X")))))
+/// Example matches x (matcher = expr(hasType(cxxRecordDecl(hasName("X")))))
+///             and z (matcher = varDecl(hasType(cxxRecordDecl(hasName("X")))))
 /// \code
 ///  class X {};
 ///  void y(X &x) { x; X z; }
@@ -2250,7 +2345,7 @@ AST_MATCHER_P(DeclaratorDecl, hasTypeLoc, internal::Matcher<TypeLoc>, Inner) {
 ///   class Y { public: void x(); };
 ///   void z() { Y* y; y->x(); }
 /// \endcode
-/// callExpr(on(hasType(asString("class Y *"))))
+/// cxxMemberCallExpr(on(hasType(asString("class Y *"))))
 ///   matches y->x()
 AST_MATCHER_P(QualType, asString, std::string, Name) {
   return Name == Node.getAsString();
@@ -2260,7 +2355,8 @@ AST_MATCHER_P(QualType, asString, std::string, Name) {
 /// matches the specified matcher.
 ///
 /// Example matches y->x()
-///     (matcher = callExpr(on(hasType(pointsTo(recordDecl(hasName("Y")))))))
+///   (matcher = cxxMemberCallExpr(on(hasType(pointsTo
+///      cxxRecordDecl(hasName("Y")))))))
 /// \code
 ///   class Y { public: void x(); };
 ///   void z() { Y *y; y->x(); }
@@ -2268,7 +2364,7 @@ AST_MATCHER_P(QualType, asString, std::string, Name) {
 AST_MATCHER_P(
     QualType, pointsTo, internal::Matcher<QualType>,
     InnerMatcher) {
-  return (!Node.isNull() && Node->isPointerType() &&
+  return (!Node.isNull() && Node->isAnyPointerType() &&
           InnerMatcher.matches(Node->getPointeeType(), Finder, Builder));
 }
 
@@ -2283,7 +2379,7 @@ AST_MATCHER_P_OVERLOAD(QualType, pointsTo, internal::Matcher<Decl>,
 /// type matches the specified matcher.
 ///
 /// Example matches X &x and const X &y
-///     (matcher = varDecl(hasType(references(recordDecl(hasName("X"))))))
+///     (matcher = varDecl(hasType(references(cxxRecordDecl(hasName("X"))))))
 /// \code
 ///   class X {
 ///     void a(X b) {
@@ -2305,7 +2401,7 @@ AST_MATCHER_P(QualType, references, internal::Matcher<QualType>,
 ///   typedef int &int_ref;
 ///   int a;
 ///   int_ref b = a;
-/// \code
+/// \endcode
 ///
 /// \c varDecl(hasType(qualType(referenceType()))))) will not match the
 /// declaration of b but \c
@@ -2367,8 +2463,6 @@ AST_MATCHER_P(DeclRefExpr, to, internal::Matcher<Decl>,
 /// \brief Matches a \c DeclRefExpr that refers to a declaration through a
 /// specific using shadow declaration.
 ///
-/// FIXME: This currently only works for functions. Fix.
-///
 /// Given
 /// \code
 ///   namespace a { void f() {} }
@@ -2378,7 +2472,7 @@ AST_MATCHER_P(DeclRefExpr, to, internal::Matcher<Decl>,
 ///     a::f();  // .. but not this.
 ///   }
 /// \endcode
-/// declRefExpr(throughUsingDeclaration(anything()))
+/// declRefExpr(throughUsingDecl(anything()))
 ///   matches \c f()
 AST_MATCHER_P(DeclRefExpr, throughUsingDecl,
               internal::Matcher<UsingShadowDecl>, InnerMatcher) {
@@ -2450,6 +2544,69 @@ AST_MATCHER(VarDecl, hasGlobalStorage) {
   return Node.hasGlobalStorage();
 }
 
+/// \brief Matches a variable declaration that has automatic storage duration.
+///
+/// Example matches x, but not y, z, or a.
+/// (matcher = varDecl(hasAutomaticStorageDuration())
+/// \code
+/// void f() {
+///   int x;
+///   static int y;
+///   thread_local int z;
+/// }
+/// int a;
+/// \endcode
+AST_MATCHER(VarDecl, hasAutomaticStorageDuration) {
+  return Node.getStorageDuration() == SD_Automatic;
+}
+
+/// \brief Matches a variable declaration that has static storage duration.
+///
+/// Example matches y and a, but not x or z.
+/// (matcher = varDecl(hasStaticStorageDuration())
+/// \code
+/// void f() {
+///   int x;
+///   static int y;
+///   thread_local int z;
+/// }
+/// int a;
+/// \endcode
+AST_MATCHER(VarDecl, hasStaticStorageDuration) {
+  return Node.getStorageDuration() == SD_Static;
+}
+
+/// \brief Matches a variable declaration that has thread storage duration.
+///
+/// Example matches z, but not x, z, or a.
+/// (matcher = varDecl(hasThreadStorageDuration())
+/// \code
+/// void f() {
+///   int x;
+///   static int y;
+///   thread_local int z;
+/// }
+/// int a;
+/// \endcode
+AST_MATCHER(VarDecl, hasThreadStorageDuration) {
+  return Node.getStorageDuration() == SD_Thread;
+}
+
+/// \brief Matches a variable declaration that is an exception variable from
+/// a C++ catch block, or an Objective-C \@catch statement.
+///
+/// Example matches x (matcher = varDecl(isExceptionVariable())
+/// \code
+/// void f(int y) {
+///   try {
+///   } catch (int x) {
+///   }
+/// }
+/// \endcode
+AST_MATCHER(VarDecl, isExceptionVariable) {
+  return Node.isExceptionVariable();
+}
+
 /// \brief Checks that a call expression or a constructor call expression has
 /// a specific number of arguments (including absent default arguments).
 ///
@@ -2540,7 +2697,7 @@ AST_MATCHER_P2(DeclStmt, containsDeclaration, unsigned, N,
 ///     // ...
 ///   }
 /// /endcode
-/// catchStmt(isCatchAll()) matches catch(...) but not catch(int).
+/// cxxCatchStmt(isCatchAll()) matches catch(...) but not catch(int).
 AST_MATCHER(CXXCatchStmt, isCatchAll) {
   return Node.getExceptionDecl() == nullptr;
 }
@@ -2554,7 +2711,9 @@ AST_MATCHER(CXXCatchStmt, isCatchAll) {
 ///     int foo_;
 ///   };
 /// \endcode
-/// recordDecl(has(constructorDecl(hasAnyConstructorInitializer(anything()))))
+/// cxxRecordDecl(has(cxxConstructorDecl(
+///   hasAnyConstructorInitializer(anything())
+/// )))
 ///   record matches Foo, hasAnyConstructorInitializer matches foo_(1)
 AST_MATCHER_P(CXXConstructorDecl, hasAnyConstructorInitializer,
               internal::Matcher<CXXCtorInitializer>, InnerMatcher) {
@@ -2571,7 +2730,7 @@ AST_MATCHER_P(CXXConstructorDecl, hasAnyConstructorInitializer,
 ///     int foo_;
 ///   };
 /// \endcode
-/// recordDecl(has(constructorDecl(hasAnyConstructorInitializer(
+/// cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer(
 ///     forField(hasName("foo_"))))))
 ///   matches Foo
 /// with forField matching foo_
@@ -2591,7 +2750,7 @@ AST_MATCHER_P(CXXCtorInitializer, forField,
 ///     int foo_;
 ///   };
 /// \endcode
-/// recordDecl(has(constructorDecl(hasAnyConstructorInitializer(
+/// cxxRecordDecl(has(cxxConstructorDecl(hasAnyConstructorInitializer(
 ///     withInitializer(integerLiteral(equals(1)))))))
 ///   matches Foo
 /// with withInitializer matching (1)
@@ -2613,12 +2772,52 @@ AST_MATCHER_P(CXXCtorInitializer, withInitializer,
 ///     string foo_;
 ///   };
 /// \endcode
-/// constructorDecl(hasAnyConstructorInitializer(isWritten()))
+/// cxxConstructorDecl(hasAnyConstructorInitializer(isWritten()))
 ///   will match Foo(int), but not Foo()
 AST_MATCHER(CXXCtorInitializer, isWritten) {
   return Node.isWritten();
 }
 
+/// \brief Matches a constructor initializer if it is initializing a base, as
+/// opposed to a member.
+///
+/// Given
+/// \code
+///   struct B {};
+///   struct D : B {
+///     int I;
+///     D(int i) : I(i) {}
+///   };
+///   struct E : B {
+///     E() : B() {}
+///   };
+/// \endcode
+/// cxxConstructorDecl(hasAnyConstructorInitializer(isBaseInitializer()))
+///   will match E(), but not match D(int).
+AST_MATCHER(CXXCtorInitializer, isBaseInitializer) {
+  return Node.isBaseInitializer();
+}
+
+/// \brief Matches a constructor initializer if it is initializing a member, as
+/// opposed to a base.
+///
+/// Given
+/// \code
+///   struct B {};
+///   struct D : B {
+///     int I;
+///     D(int i) : I(i) {}
+///   };
+///   struct E : B {
+///     E() : B() {}
+///   };
+/// \endcode
+/// cxxConstructorDecl(hasAnyConstructorInitializer(isMemberInitializer()))
+///   will match D(int), but not match E().
+AST_MATCHER(CXXCtorInitializer, isMemberInitializer) {
+  return Node.isMemberInitializer();
+}
+
 /// \brief Matches any argument of a call expression or a constructor call
 /// expression.
 ///
@@ -2660,7 +2859,7 @@ AST_MATCHER(CXXConstructExpr, isListInitialization) {
 /// \code
 ///   class X { void f(int x) {} };
 /// \endcode
-/// methodDecl(hasParameter(0, hasType(varDecl())))
+/// cxxMethodDecl(hasParameter(0, hasType(varDecl())))
 ///   matches f(int x) {}
 /// with hasParameter(...)
 ///   matching int x
@@ -2680,7 +2879,7 @@ AST_MATCHER_P2(FunctionDecl, hasParameter,
 /// \code
 ///   class X { void f(int x, int y, int z) {} };
 /// \endcode
-/// methodDecl(hasAnyParameter(hasName("y")))
+/// cxxMethodDecl(hasAnyParameter(hasName("y")))
 ///   matches f(int x, int y, int z) {}
 /// with hasAnyParameter(...)
 ///   matching int y
@@ -2709,7 +2908,7 @@ AST_MATCHER_P(FunctionDecl, parameterCountIs, unsigned, N) {
 /// \code
 ///   class X { int f() { return 1; } };
 /// \endcode
-/// methodDecl(returns(asString("int")))
+/// cxxMethodDecl(returns(asString("int")))
 ///   matches int f() { return 1; }
 AST_MATCHER_P(FunctionDecl, returns,
               internal::Matcher<QualType>, InnerMatcher) {
@@ -2743,6 +2942,34 @@ AST_MATCHER(FunctionDecl, isDeleted) {
   return Node.isDeleted();
 }
 
+/// \brief Matches functions that have a non-throwing exception specification.
+///
+/// Given:
+/// \code
+///   void f();
+///   void g() noexcept;
+///   void h() throw();
+///   void i() throw(int);
+///   void j() noexcept(false);
+/// \endcode
+/// functionDecl(isNoThrow())
+///   matches the declarations of g, and h, but not f, i or j.
+AST_MATCHER(FunctionDecl, isNoThrow) {
+  const auto *FnTy = Node.getType()->getAs<FunctionProtoType>();
+
+  // If the function does not have a prototype, then it is assumed to be a
+  // throwing function (as it would if the function did not have any exception
+  // specification).
+  if (!FnTy)
+    return false;
+
+  // Assume the best for any unresolved exception specification.
+  if (isUnresolvedExceptionSpec(FnTy->getExceptionSpecType()))
+    return true;
+
+  return FnTy->isNothrow(Node.getASTContext());
+}
+
 /// \brief Matches constexpr variable and function declarations.
 ///
 /// Given:
@@ -2763,7 +2990,7 @@ AST_POLYMORPHIC_MATCHER(isConstexpr,
 /// \brief Matches the condition expression of an if statement, for loop,
 /// or conditional operator.
 ///
-/// Example matches true (matcher = hasCondition(boolLiteral(equals(true))))
+/// Example matches true (matcher = hasCondition(cxxBoolLiteral(equals(true))))
 /// \code
 ///   if (true) {}
 /// \endcode
@@ -2780,7 +3007,7 @@ AST_POLYMORPHIC_MATCHER_P(hasCondition,
 /// \brief Matches the then-statement of an if statement.
 ///
 /// Examples matches the if statement
-///   (matcher = ifStmt(hasThen(boolLiteral(equals(true)))))
+///   (matcher = ifStmt(hasThen(cxxBoolLiteral(equals(true)))))
 /// \code
 ///   if (false) true; else false;
 /// \endcode
@@ -2792,7 +3019,7 @@ AST_MATCHER_P(IfStmt, hasThen, internal::Matcher<Stmt>, InnerMatcher) {
 /// \brief Matches the else-statement of an if statement.
 ///
 /// Examples matches the if statement
-///   (matcher = ifStmt(hasElse(boolLiteral(equals(true)))))
+///   (matcher = ifStmt(hasElse(cxxBoolLiteral(equals(true)))))
 /// \code
 ///   if (false) false; else true;
 /// \endcode
@@ -2809,7 +3036,7 @@ AST_MATCHER_P(IfStmt, hasElse, internal::Matcher<Stmt>, InnerMatcher) {
 /// \code
 ///   class X { int a; int b; };
 /// \endcode
-/// recordDecl(
+/// cxxRecordDecl(
 ///     has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
 ///     has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))
 ///   matches the class \c X, as \c a and \c b have the same type.
@@ -2941,7 +3168,7 @@ AST_MATCHER_P(CompoundStmt, statementCountIs, unsigned, N) {
 
 /// \brief Matches literals that are equal to the given value.
 ///
-/// Example matches true (matcher = boolLiteral(equals(true)))
+/// Example matches true (matcher = cxxBoolLiteral(equals(true)))
 /// \code
 ///   true
 /// \endcode
@@ -2976,9 +3203,11 @@ AST_POLYMORPHIC_MATCHER_P(hasOperatorName,
 /// \code
 ///   a || b
 /// \endcode
-AST_MATCHER_P(BinaryOperator, hasLHS,
-              internal::Matcher<Expr>, InnerMatcher) {
-  Expr *LeftHandSide = Node.getLHS();
+AST_POLYMORPHIC_MATCHER_P(hasLHS,
+                          AST_POLYMORPHIC_SUPPORTED_TYPES(BinaryOperator,
+                                                          ArraySubscriptExpr),
+                          internal::Matcher<Expr>, InnerMatcher) {
+  const Expr *LeftHandSide = Node.getLHS();
   return (LeftHandSide != nullptr &&
           InnerMatcher.matches(*LeftHandSide, Finder, Builder));
 }
@@ -2989,9 +3218,11 @@ AST_MATCHER_P(BinaryOperator, hasLHS,
 /// \code
 ///   a || b
 /// \endcode
-AST_MATCHER_P(BinaryOperator, hasRHS,
-              internal::Matcher<Expr>, InnerMatcher) {
-  Expr *RightHandSide = Node.getRHS();
+AST_POLYMORPHIC_MATCHER_P(hasRHS,
+                          AST_POLYMORPHIC_SUPPORTED_TYPES(BinaryOperator,
+                                                          ArraySubscriptExpr),
+                          internal::Matcher<Expr>, InnerMatcher) {
+  const Expr *RightHandSide = Node.getRHS();
   return (RightHandSide != nullptr &&
           InnerMatcher.matches(*RightHandSide, Finder, Builder));
 }
@@ -3005,7 +3236,8 @@ inline internal::Matcher<BinaryOperator> hasEitherOperand(
 
 /// \brief Matches if the operand of a unary operator matches.
 ///
-/// Example matches true (matcher = hasUnaryOperand(boolLiteral(equals(true))))
+/// Example matches true (matcher = hasUnaryOperand(
+///                                   cxxBoolLiteral(equals(true))))
 /// \code
 ///   !true
 /// \endcode
@@ -3019,10 +3251,11 @@ AST_MATCHER_P(UnaryOperator, hasUnaryOperand,
 /// \brief Matches if the cast's source expression matches the given matcher.
 ///
 /// Example: matches "a string" (matcher =
-///                                  hasSourceExpression(constructExpr()))
+///                                  hasSourceExpression(cxxConstructExpr()))
 /// \code
 /// class URL { URL(string); };
 /// URL url = "a string";
+/// \endcode
 AST_MATCHER_P(CastExpr, hasSourceExpression,
               internal::Matcher<Expr>, InnerMatcher) {
   const Expr* const SubExpression = Node.getSubExpr();
@@ -3049,6 +3282,42 @@ AST_MATCHER_P(ImplicitCastExpr, hasImplicitDestinationType,
   return InnerMatcher.matches(Node.getType(), Finder, Builder);
 }
 
+/// \brief Matches RecordDecl object that are spelled with "struct."
+///
+/// Example matches S, but not C or U.
+/// \code
+///   struct S {};
+///   class C {};
+///   union U {};
+/// \endcode
+AST_MATCHER(RecordDecl, isStruct) {
+  return Node.isStruct();
+}
+
+/// \brief Matches RecordDecl object that are spelled with "union."
+///
+/// Example matches U, but not C or S.
+/// \code
+///   struct S {};
+///   class C {};
+///   union U {};
+/// \endcode
+AST_MATCHER(RecordDecl, isUnion) {
+  return Node.isUnion();
+}
+
+/// \brief Matches RecordDecl object that are spelled with "class."
+///
+/// Example matches C, but not S or U.
+/// \code
+///   struct S {};
+///   class C {};
+///   union U {};
+/// \endcode
+AST_MATCHER(RecordDecl, isClass) {
+  return Node.isClass();
+}
+
 /// \brief Matches the true branch expression of a conditional operator.
 ///
 /// Example matches a
@@ -3057,7 +3326,7 @@ AST_MATCHER_P(ImplicitCastExpr, hasImplicitDestinationType,
 /// \endcode
 AST_MATCHER_P(ConditionalOperator, hasTrueExpression,
               internal::Matcher<Expr>, InnerMatcher) {
-  Expr *Expression = Node.getTrueExpr();
+  const Expr *Expression = Node.getTrueExpr();
   return (Expression != nullptr &&
           InnerMatcher.matches(*Expression, Finder, Builder));
 }
@@ -3070,7 +3339,7 @@ AST_MATCHER_P(ConditionalOperator, hasTrueExpression,
 /// \endcode
 AST_MATCHER_P(ConditionalOperator, hasFalseExpression,
               internal::Matcher<Expr>, InnerMatcher) {
-  Expr *Expression = Node.getFalseExpr();
+  const Expr *Expression = Node.getFalseExpr();
   return (Expression != nullptr &&
           InnerMatcher.matches(*Expression, Finder, Builder));
 }
@@ -3094,6 +3363,20 @@ AST_POLYMORPHIC_MATCHER(isDefinition,
   return Node.isThisDeclarationADefinition();
 }
 
+/// \brief Matches if a function declaration is variadic.
+///
+/// Example matches f, but not g or h. The function i will not match, even when
+/// compiled in C mode.
+/// \code
+///   void f(...);
+///   void g(int);
+///   template <typename... Ts> void h(Ts...);
+///   void i();
+/// \endcode
+AST_MATCHER(FunctionDecl, isVariadic) {
+  return Node.isVariadic();
+}
+
 /// \brief Matches the class declaration that the given method declaration
 /// belongs to.
 ///
@@ -3102,7 +3385,7 @@ AST_POLYMORPHIC_MATCHER(isDefinition,
 /// this to?
 ///
 /// Example matches A() in the last line
-///     (matcher = constructExpr(hasDeclaration(methodDecl(
+///     (matcher = cxxConstructExpr(hasDeclaration(cxxMethodDecl(
 ///         ofClass(hasName("A"))))))
 /// \code
 ///   class A {
@@ -3132,6 +3415,27 @@ AST_MATCHER(CXXMethodDecl, isVirtual) {
   return Node.isVirtual();
 }
 
+/// \brief Matches if the given method or class declaration is final.
+///
+/// Given:
+/// \code
+///   class A final {};
+///
+///   struct B {
+///     virtual void f();
+///   };
+///
+///   struct C : B {
+///     void f() final;
+///   };
+/// \endcode
+/// matches A and C::f, but not B, C, or B::f
+AST_POLYMORPHIC_MATCHER(isFinal,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(CXXRecordDecl,
+                                                        CXXMethodDecl)) {
+  return Node.template hasAttr<FinalAttr>();
+}
+
 /// \brief Matches if the given method declaration is pure.
 ///
 /// Given
@@ -3156,11 +3460,28 @@ AST_MATCHER(CXXMethodDecl, isPure) {
 /// };
 /// \endcode
 ///
-/// methodDecl(isConst()) matches A::foo() but not A::bar()
+/// cxxMethodDecl(isConst()) matches A::foo() but not A::bar()
 AST_MATCHER(CXXMethodDecl, isConst) {
   return Node.isConst();
 }
 
+/// \brief Matches if the given method declaration declares a copy assignment
+/// operator.
+///
+/// Given
+/// \code
+/// struct A {
+///   A &operator=(const A &);
+///   A &operator=(A &&);
+/// };
+/// \endcode
+///
+/// cxxMethodDecl(isCopyAssignmentOperator()) matches the first method but not
+/// the second one.
+AST_MATCHER(CXXMethodDecl, isCopyAssignmentOperator) {
+  return Node.isCopyAssignmentOperator();
+}
+
 /// \brief Matches if the given method declaration overrides another method.
 ///
 /// Given
@@ -3212,6 +3533,20 @@ AST_MATCHER(QualType, isInteger) {
     return Node->isIntegerType();
 }
 
+/// \brief Matches QualType nodes that are of character type.
+///
+/// Given
+/// \code
+///   void a(char);
+///   void b(wchar_t);
+///   void c(double);
+/// \endcode
+/// functionDecl(hasAnyParameter(hasType(isAnyCharacter())))
+/// matches "a(char)", "b(wchar_t)", but not "c(double)".
+AST_MATCHER(QualType, isAnyCharacter) {
+    return Node->isAnyCharacterType();
+}
+
 /// \brief Matches QualType nodes that are const-qualified, i.e., that
 /// include "top-level" const.
 ///
@@ -3231,6 +3566,25 @@ AST_MATCHER(QualType, isConstQualified) {
   return Node.isConstQualified();
 }
 
+/// \brief Matches QualType nodes that are volatile-qualified, i.e., that
+/// include "top-level" volatile.
+///
+/// Given
+/// \code
+///   void a(int);
+///   void b(int volatile);
+///   void c(volatile int);
+///   void d(volatile int*);
+///   void e(int volatile) {};
+/// \endcode
+/// functionDecl(hasAnyParameter(hasType(isVolatileQualified())))
+///   matches "void b(int volatile)", "void c(volatile int)" and
+///   "void e(int volatile) {}". It does not match d as there
+///   is no top-level volatile on the parameter type "volatile int *".
+AST_MATCHER(QualType, isVolatileQualified) {
+  return Node.isVolatileQualified();
+}
+
 /// \brief Matches QualType nodes that have local CV-qualifiers attached to
 /// the node, not hidden within a typedef.
 ///
@@ -3273,7 +3627,7 @@ AST_MATCHER_P(MemberExpr, member,
 ///   struct X { int m; };
 ///   void f(X x) { x.m; m; }
 /// \endcode
-/// memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))))
+/// memberExpr(hasObjectExpression(hasType(cxxRecordDecl(hasName("X")))))))
 ///   matches "x.m" and "m"
 /// with hasObjectExpression(...)
 ///   matching "x" and the implicit object expression of "m" which has type X*.
@@ -3325,7 +3679,7 @@ AST_MATCHER_P(UsingShadowDecl, hasTargetDecl,
 /// \code
 ///   template <typename T> class X {}; class A {}; template class X<A>;
 /// \endcode
-/// recordDecl(hasName("::X"), isTemplateInstantiation())
+/// cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
 ///   matches the template instantiation of X<A>.
 ///
 /// But given
@@ -3333,7 +3687,7 @@ AST_MATCHER_P(UsingShadowDecl, hasTargetDecl,
 ///   template <typename T>  class X {}; class A {};
 ///   template <> class X<A> {}; X<A> x;
 /// \endcode
-/// recordDecl(hasName("::X"), isTemplateInstantiation())
+/// cxxRecordDecl(hasName("::X"), isTemplateInstantiation())
 ///   does not match, as X<A> is an explicit template specialization.
 ///
 /// Usable as: Matcher<FunctionDecl>, Matcher<VarDecl>, Matcher<CXXRecordDecl>
@@ -3357,7 +3711,7 @@ AST_POLYMORPHIC_MATCHER(isTemplateInstantiation,
 /// functionDecl(isInstantiated())
 ///   matches 'A(int) {...};' and 'A(unsigned) {...}'.
 AST_MATCHER_FUNCTION(internal::Matcher<Decl>, isInstantiated) {
-  auto IsInstantiation = decl(anyOf(recordDecl(isTemplateInstantiation()),
+  auto IsInstantiation = decl(anyOf(cxxRecordDecl(isTemplateInstantiation()),
                                     functionDecl(isTemplateInstantiation())));
   return decl(anyOf(IsInstantiation, hasAncestor(IsInstantiation)));
 }
@@ -3378,7 +3732,7 @@ AST_MATCHER_FUNCTION(internal::Matcher<Decl>, isInstantiated) {
 ///   instantiation.
 AST_MATCHER_FUNCTION(internal::Matcher<Stmt>, isInTemplateInstantiation) {
   return stmt(
-      hasAncestor(decl(anyOf(recordDecl(isTemplateInstantiation()),
+      hasAncestor(decl(anyOf(cxxRecordDecl(isTemplateInstantiation()),
                              functionDecl(isTemplateInstantiation())))));
 }
 
@@ -3408,6 +3762,18 @@ AST_MATCHER_FUNCTION_P_OVERLOAD(internal::BindableMatcher<TypeLoc>, loc,
       new internal::TypeLocTypeMatcher(InnerMatcher));
 }
 
+/// \brief Matches type \c bool.
+///
+/// Given
+/// \code
+///  struct S { bool func(); };
+/// \endcode
+/// functionDecl(returns(booleanType()))
+///   matches "bool func();"
+AST_MATCHER(Type, booleanType) {
+  return Node.isBooleanType();
+}
+
 /// \brief Matches type \c void.
 ///
 /// Given
@@ -3665,18 +4031,38 @@ AST_TYPE_MATCHER(BlockPointerType, blockPointerType);
 ///   matches "A::* ptr"
 AST_TYPE_MATCHER(MemberPointerType, memberPointerType);
 
-/// \brief Matches pointer types.
+/// \brief Matches pointer types, but does not match Objective-C object pointer
+/// types.
 ///
 /// Given
 /// \code
 ///   int *a;
 ///   int &b = *a;
 ///   int c = 5;
+///
+///   @interface Foo
+///   @end
+///   Foo *f;
 /// \endcode
 /// pointerType()
-///   matches "int *a"
+///   matches "int *a", but does not match "Foo *f".
 AST_TYPE_MATCHER(PointerType, pointerType);
 
+/// \brief Matches an Objective-C object pointer type, which is different from
+/// a pointer type, despite being syntactically similar.
+///
+/// Given
+/// \code
+///   int *a;
+///
+///   @interface Foo
+///   @end
+///   Foo *f;
+/// \endcode
+/// pointerType()
+///   matches "Foo *f", but does not match "int *a".
+AST_TYPE_MATCHER(ObjCObjectPointerType, objcObjectPointerType);
+
 /// \brief Matches both lvalue and rvalue reference types.
 ///
 /// Given
@@ -3766,7 +4152,7 @@ AST_TYPE_MATCHER(TypedefType, typedefType);
 ///
 ///   template class C<int>;  // A
 ///   C<char> var;            // B
-/// \code
+/// \endcode
 ///
 /// \c templateSpecializationType() matches the type of the explicit
 /// instantiation in \c A and the type of the variable declaration in \c B.
@@ -3791,7 +4177,7 @@ AST_TYPE_MATCHER(UnaryTransformType, unaryTransformType);
 ///
 ///   C c;
 ///   S s;
-/// \code
+/// \endcode
 ///
 /// \c recordType() matches the type of the variable declarations of both \c c
 /// and \c s.
@@ -3811,7 +4197,7 @@ AST_TYPE_MATCHER(RecordType, recordType);
 ///
 ///   class C c;
 ///   N::M::D d;
-/// \code
+/// \endcode
 ///
 /// \c elaboratedType() matches the type of the variable declarations of both
 /// \c c and \c d.
@@ -3828,7 +4214,7 @@ AST_TYPE_MATCHER(ElaboratedType, elaboratedType);
 ///     }
 ///   }
 ///   N::M::D d;
-/// \code
+/// \endcode
 ///
 /// \c elaboratedType(hasQualifier(hasPrefix(specifiesNamespace(hasName("N"))))
 /// matches the type of the variable declaration of \c d.
@@ -3850,7 +4236,7 @@ AST_MATCHER_P(ElaboratedType, hasQualifier,
 ///     }
 ///   }
 ///   N::M::D d;
-/// \code
+/// \endcode
 ///
 /// \c elaboratedType(namesType(recordType(
 /// hasDeclaration(namedDecl(hasName("D")))))) matches the type of the variable
@@ -3860,6 +4246,59 @@ AST_MATCHER_P(ElaboratedType, namesType, internal::Matcher<QualType>,
   return InnerMatcher.matches(Node.getNamedType(), Finder, Builder);
 }
 
+/// \brief Matches types that represent the result of substituting a type for a
+/// template type parameter.
+///
+/// Given
+/// \code
+///   template <typename T>
+///   void F(T t) {
+///     int i = 1 + t;
+///   }
+/// \endcode
+///
+/// \c substTemplateTypeParmType() matches the type of 't' but not '1'
+AST_TYPE_MATCHER(SubstTemplateTypeParmType, substTemplateTypeParmType);
+
+/// \brief Matches template type parameter types.
+///
+/// Example matches T, but not int.
+///     (matcher = templateTypeParmType())
+/// \code
+///   template <typename T> void f(int i);
+/// \endcode
+AST_TYPE_MATCHER(TemplateTypeParmType, templateTypeParmType);
+
+/// \brief Matches injected class name types.
+///
+/// Example matches S s, but not S<T> s.
+///     (matcher = parmVarDecl(hasType(injectedClassNameType())))
+/// \code
+///   template <typename T> struct S {
+///     void f(S s);
+///     void g(S<T> s);
+///   };
+/// \endcode
+AST_TYPE_MATCHER(InjectedClassNameType, injectedClassNameType);
+
+/// \brief Matches decayed type
+/// Example matches i[] in declaration of f.
+///     (matcher = valueDecl(hasType(decayedType(hasDecayedType(pointerType())))))
+/// Example matches i[1].
+///     (matcher = expr(hasType(decayedType(hasDecayedType(pointerType())))))
+/// \code
+///   void f(int i[]) {
+///     i[1] = 0;
+///   }
+/// \endcode
+AST_TYPE_MATCHER(DecayedType, decayedType);
+
+/// \brief Matches the decayed type, whos decayed type matches \c InnerMatcher
+AST_MATCHER_P(DecayedType, hasDecayedType, internal::Matcher<QualType>,
+              InnerType) {
+  return InnerType.matches(Node.getDecayedType(), Finder, Builder);
+}
+
 /// \brief Matches declarations whose declaration context, interpreted as a
 /// Decl, matches \c InnerMatcher.
 ///
@@ -3870,9 +4309,9 @@ AST_MATCHER_P(ElaboratedType, namesType, internal::Matcher<QualType>,
 ///       class D {};
 ///     }
 ///   }
-/// \code
+/// \endcode
 ///
-/// \c recordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the
+/// \c cxxRcordDecl(hasDeclContext(namedDecl(hasName("M")))) matches the
 /// declaration of \c class \c D.
 AST_MATCHER_P(Decl, hasDeclContext, internal::Matcher<Decl>, InnerMatcher) {
   const DeclContext *DC = Node.getDeclContext();
@@ -3917,7 +4356,9 @@ AST_MATCHER_FUNCTION_P_OVERLOAD(
 ///   struct A { struct B { struct C {}; }; };
 ///   A::B::C c;
 /// \endcode
-/// nestedNameSpecifier(specifiesType(hasDeclaration(recordDecl(hasName("A")))))
+/// nestedNameSpecifier(specifiesType(
+///   hasDeclaration(cxxRecordDecl(hasName("A")))
+/// ))
 ///   matches "A::"
 AST_MATCHER_P(NestedNameSpecifier, specifiesType,
               internal::Matcher<QualType>, InnerMatcher) {
@@ -3935,7 +4376,7 @@ AST_MATCHER_P(NestedNameSpecifier, specifiesType,
 ///   A::B::C c;
 /// \endcode
 /// nestedNameSpecifierLoc(specifiesTypeLoc(loc(type(
-///   hasDeclaration(recordDecl(hasName("A")))))))
+///   hasDeclaration(cxxRecordDecl(hasName("A")))))))
 ///   matches "A::"
 AST_MATCHER_P(NestedNameSpecifierLoc, specifiesTypeLoc,
               internal::Matcher<TypeLoc>, InnerMatcher) {
@@ -3954,7 +4395,7 @@ AST_MATCHER_P(NestedNameSpecifierLoc, specifiesTypeLoc,
 AST_MATCHER_P_OVERLOAD(NestedNameSpecifier, hasPrefix,
                        internal::Matcher<NestedNameSpecifier>, InnerMatcher,
                        0) {
-  NestedNameSpecifier *NextNode = Node.getPrefix();
+  const NestedNameSpecifier *NextNode = Node.getPrefix();
   if (!NextNode)
     return false;
   return InnerMatcher.matches(*NextNode, Finder, Builder);
@@ -4008,10 +4449,15 @@ AST_MATCHER_P_OVERLOAD(Decl, equalsNode, const Decl*, Other, 0) {
 /// \brief Matches if a node equals another node.
 ///
 /// \c Stmt has pointer identity in the AST.
-///
 AST_MATCHER_P_OVERLOAD(Stmt, equalsNode, const Stmt*, Other, 1) {
   return &Node == Other;
 }
+/// \brief Matches if a node equals another node.
+///
+/// \c Type has pointer identity in the AST.
+AST_MATCHER_P_OVERLOAD(Type, equalsNode, const Type*, Other, 2) {
+    return &Node == Other;
+}
 
 /// @}
 
@@ -4053,7 +4499,9 @@ AST_MATCHER_P(SwitchStmt, forEachSwitchCase, internal::Matcher<SwitchCase>,
 /// \code
 ///   class A { A() : i(42), j(42) {} int i; int j; };
 /// \endcode
-/// constructorDecl(forEachConstructorInitializer(forField(decl().bind("x"))))
+/// cxxConstructorDecl(forEachConstructorInitializer(
+///   forField(decl().bind("x"))
+/// ))
 ///   will trigger two matches, binding for 'i' and 'j' respectively.
 AST_MATCHER_P(CXXConstructorDecl, forEachConstructorInitializer,
               internal::Matcher<CXXCtorInitializer>, InnerMatcher) {
@@ -4070,6 +4518,109 @@ AST_MATCHER_P(CXXConstructorDecl, forEachConstructorInitializer,
   return Matched;
 }
 
+/// \brief Matches constructor declarations that are copy constructors.
+///
+/// Given
+/// \code
+///   struct S {
+///     S(); // #1
+///     S(const S &); // #2
+///     S(S &&); // #3
+///   };
+/// \endcode
+/// cxxConstructorDecl(isCopyConstructor()) will match #2, but not #1 or #3.
+AST_MATCHER(CXXConstructorDecl, isCopyConstructor) {
+  return Node.isCopyConstructor();
+}
+
+/// \brief Matches constructor declarations that are move constructors.
+///
+/// Given
+/// \code
+///   struct S {
+///     S(); // #1
+///     S(const S &); // #2
+///     S(S &&); // #3
+///   };
+/// \endcode
+/// cxxConstructorDecl(isMoveConstructor()) will match #3, but not #1 or #2.
+AST_MATCHER(CXXConstructorDecl, isMoveConstructor) {
+  return Node.isMoveConstructor();
+}
+
+/// \brief Matches constructor declarations that are default constructors.
+///
+/// Given
+/// \code
+///   struct S {
+///     S(); // #1
+///     S(const S &); // #2
+///     S(S &&); // #3
+///   };
+/// \endcode
+/// cxxConstructorDecl(isDefaultConstructor()) will match #1, but not #2 or #3.
+AST_MATCHER(CXXConstructorDecl, isDefaultConstructor) {
+  return Node.isDefaultConstructor();
+}
+
+/// \brief Matches constructor and conversion declarations that are marked with
+/// the explicit keyword.
+///
+/// Given
+/// \code
+///   struct S {
+///     S(int); // #1
+///     explicit S(double); // #2
+///     operator int(); // #3
+///     explicit operator bool(); // #4
+///   };
+/// \endcode
+/// cxxConstructorDecl(isExplicit()) will match #2, but not #1.
+/// cxxConversionDecl(isExplicit()) will match #4, but not #3.
+AST_POLYMORPHIC_MATCHER(isExplicit,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(CXXConstructorDecl,
+                                                        CXXConversionDecl)) {
+  return Node.isExplicit();
+}
+
+/// \brief Matches function and namespace declarations that are marked with
+/// the inline keyword.
+///
+/// Given
+/// \code
+///   inline void f();
+///   void g();
+///   namespace n {
+///   inline namespace m {}
+///   }
+/// \endcode
+/// functionDecl(isInline()) will match ::f().
+/// namespaceDecl(isInline()) will match n::m.
+AST_POLYMORPHIC_MATCHER(isInline,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(NamespaceDecl,
+                                                        FunctionDecl)) {
+  // This is required because the spelling of the function used to determine
+  // whether inline is specified or not differs between the polymorphic types.
+  if (const auto *FD = dyn_cast<FunctionDecl>(&Node))
+    return FD->isInlineSpecified();
+  else if (const auto *NSD = dyn_cast<NamespaceDecl>(&Node))
+    return NSD->isInline();
+  llvm_unreachable("Not a valid polymorphic type");
+}
+
+/// \brief Matches anonymous namespace declarations.
+///
+/// Given
+/// \code
+///   namespace n {
+///   namespace {} // #1
+///   }
+/// \endcode
+/// namespaceDecl(isAnonymous()) will match #1 but not ::n.
+AST_MATCHER(NamespaceDecl, isAnonymous) {
+  return Node.isAnonymousNamespace();
+}
+
 /// \brief If the given case statement does not use the GNU case range
 /// extension, matches the constant given in the statement.
 ///
@@ -4094,7 +4645,8 @@ AST_MATCHER_P(CaseStmt, hasCaseConstant, internal::Matcher<Expr>,
 ///   __attribute__((device)) void f() { ... }
 /// \endcode
 /// decl(hasAttr(clang::attr::CUDADevice)) matches the function declaration of
-/// f.
+/// f. If the matcher is use from clang-query, attr::Kind parameter should be
+/// passed as a quoted string. e.g., hasAttr("attr::CUDADevice").
 AST_MATCHER_P(Decl, hasAttr, attr::Kind, AttrKind) {
   for (const auto *Attr : Node.attrs()) {
     if (Attr->getKind() == AttrKind)
@@ -4109,8 +4661,9 @@ AST_MATCHER_P(Decl, hasAttr, attr::Kind, AttrKind) {
 /// \code
 ///   kernel<<<i,j>>>();
 /// \endcode
-const internal::VariadicDynCastAllOfMatcher<Stmt, CUDAKernelCallExpr>
-    CUDAKernelCallExpr;
+const internal::VariadicDynCastAllOfMatcher<
+  Stmt,
+  CUDAKernelCallExpr> cudaKernelCallExpr;
 
 } // end namespace ast_matchers
 } // end namespace clang
diff --git a/include/clang/ASTMatchers/ASTMatchersInternal.h b/include/clang/ASTMatchers/ASTMatchersInternal.h
index b494647d79bc..d49909183fd4 100644
--- a/include/clang/ASTMatchers/ASTMatchersInternal.h
+++ b/include/clang/ASTMatchers/ASTMatchersInternal.h
@@ -188,8 +188,11 @@ class ASTMatchFinder;
 /// Used by the implementation of Matcher<T> and DynTypedMatcher.
 /// In general, implement MatcherInterface<T> or SingleNodeMatcherInterface<T>
 /// instead.
-class DynMatcherInterface : public RefCountedBaseVPTR {
+class DynMatcherInterface
+    : public llvm::ThreadSafeRefCountedBase<DynMatcherInterface> {
 public:
+  virtual ~DynMatcherInterface() {}
+
   /// \brief Returns true if \p DynNode can be matched.
   ///
   /// May bind \p DynNode to an ID via \p Builder, or recurse into
@@ -209,8 +212,6 @@ public:
 template <typename T>
 class MatcherInterface : public DynMatcherInterface {
 public:
-  ~MatcherInterface() override {}
-
   /// \brief Returns true if 'Node' can be matched.
   ///
   /// May bind 'Node' to an ID via 'Builder', or recurse into
@@ -281,6 +282,7 @@ public:
   };
   static DynTypedMatcher
   constructVariadic(VariadicOperator Op,
+                    ast_type_traits::ASTNodeKind SupportedKind,
                     std::vector<DynTypedMatcher> InnerMatchers);
 
   /// \brief Get a "true" matcher for \p NodeKind.
@@ -556,22 +558,32 @@ bool matchesFirstInPointerRange(const MatcherT &Matcher, IteratorT Start,
   return false;
 }
 
-/// \brief Metafunction to determine if type T has a member called getDecl.
+// Metafunction to determine if type T has a member called
+// getDecl.
+#if defined(_MSC_VER) && (_MSC_VER < 1900) && !defined(__clang__)
+// For old versions of MSVC, we use a weird nonstandard __if_exists
+// statement, since before MSVC2015, it was not standards-conformant
+// enough to compile the usual code below.
 template <typename T> struct has_getDecl {
-  struct Default { int getDecl; };
-  struct Derived : T, Default { };
-
-  template<typename C, C> struct CheckT;
-
-  // If T::getDecl exists, an ambiguity arises and CheckT will
-  // not be instantiable. This makes f(...) the only available
-  // overload.
-  template<typename C>
-  static char (&f(CheckT<int Default::*, &C::getDecl>*))[1];
-  template<typename C> static char (&f(...))[2];
-
-  static bool const value = sizeof(f<Derived>(nullptr)) == 2;
+  __if_exists(T::getDecl) {
+    enum { value = 1 };
+  }
+  __if_not_exists(T::getDecl) {
+    enum { value = 0 };
+  }
 };
+#else
+// There is a default template inheriting from "false_type". Then, a
+// partial specialization inherits from "true_type". However, this
+// specialization will only exist when the call to getDecl() isn't an
+// error -- it vanishes by SFINAE when the member doesn't exist.
+template <typename> struct type_sink_to_void { typedef void type; };
+template <typename T, typename = void> struct has_getDecl : std::false_type {};
+template <typename T>
+struct has_getDecl<
+    T, typename type_sink_to_void<decltype(std::declval<T>().getDecl())>::type>
+    : std::true_type {};
+#endif
 
 /// \brief Matches overloaded operators with a specific name.
 ///
@@ -667,16 +679,30 @@ private:
     return matchesDecl(Node.getDecl(), Finder, Builder);
   }
 
-  /// \brief Extracts the CXXRecordDecl or EnumDecl of a QualType and returns
-  /// whether the inner matcher matches on it.
+  /// \brief Extracts the TagDecl of a QualType and returns whether the inner
+  /// matcher matches on it.
   bool matchesSpecialized(const QualType &Node, ASTMatchFinder *Finder,
                           BoundNodesTreeBuilder *Builder) const {
-    /// FIXME: Add other ways to convert...
     if (Node.isNull())
       return false;
-    if (const EnumType *AsEnum = dyn_cast<EnumType>(Node.getTypePtr()))
-      return matchesDecl(AsEnum->getDecl(), Finder, Builder);
-    return matchesDecl(Node->getAsCXXRecordDecl(), Finder, Builder);
+
+    if (auto *TD = Node->getAsTagDecl())
+      return matchesDecl(TD, Finder, Builder);
+    else if (auto *TT = Node->getAs<TypedefType>())
+      return matchesDecl(TT->getDecl(), Finder, Builder);
+    // Do not use getAs<TemplateTypeParmType> instead of the direct dyn_cast.
+    // Calling getAs will return the canonical type, but that type does not
+    // store a TemplateTypeParmDecl. We *need* the uncanonical type, if it is
+    // available, and using dyn_cast ensures that.
+    else if (auto *TTP = dyn_cast<TemplateTypeParmType>(Node.getTypePtr()))
+      return matchesDecl(TTP->getDecl(), Finder, Builder);
+    else if (auto *OCIT = Node->getAs<ObjCInterfaceType>())
+      return matchesDecl(OCIT->getDecl(), Finder, Builder);
+    else if (auto *UUT = Node->getAs<UnresolvedUsingType>())
+      return matchesDecl(UUT->getDecl(), Finder, Builder);
+    else if (auto *ICNT = Node->getAs<InjectedClassNameType>())
+      return matchesDecl(ICNT->getDecl(), Finder, Builder);
+    return false;
   }
 
   /// \brief Gets the TemplateDecl from a TemplateSpecializationType
@@ -753,7 +779,7 @@ const bool IsBaseType<T>::value;
 ///   at least one ancestor matched.
 ///
 /// FIXME: Currently we only allow Stmt and Decl nodes to start a traversal.
-/// In the future, we wan to implement this for all nodes for which it makes
+/// In the future, we want to implement this for all nodes for which it makes
 /// sense. In the case of matchesAncestorOf, we'll want to implement it for
 /// all nodes, as all nodes have ancestors.
 class ASTMatchFinder {
@@ -833,8 +859,10 @@ public:
                          BoundNodesTreeBuilder *Builder,
                          AncestorMatchMode MatchMode) {
     static_assert(std::is_base_of<Decl, T>::value ||
-                  std::is_base_of<Stmt, T>::value,
-                  "only Decl or Stmt allowed for recursive matching");
+                      std::is_base_of<NestedNameSpecifierLoc, T>::value ||
+                      std::is_base_of<Stmt, T>::value ||
+                      std::is_base_of<TypeLoc, T>::value,
+                  "type not allowed for recursive matching");
     return matchesAncestorOf(ast_type_traits::DynTypedNode::create(Node),
                              Matcher, Builder, MatchMode);
   }
@@ -1137,7 +1165,8 @@ public:
 
   template <typename T> operator Matcher<T>() const {
     return DynTypedMatcher::constructVariadic(
-               Op, getMatchers<T>(llvm::index_sequence_for<Ps...>()))
+               Op, ast_type_traits::ASTNodeKind::getFromNodeKind<T>(),
+               getMatchers<T>(llvm::index_sequence_for<Ps...>()))
         .template unconditionalConvertTo<T>();
   }
 
@@ -1191,8 +1220,10 @@ BindableMatcher<T> makeAllOfComposite(
   std::vector<DynTypedMatcher> DynMatchers(PI(InnerMatchers.begin()),
                                            PI(InnerMatchers.end()));
   return BindableMatcher<T>(
-      DynTypedMatcher::constructVariadic(DynTypedMatcher::VO_AllOf,
-                                         std::move(DynMatchers))
+      DynTypedMatcher::constructVariadic(
+          DynTypedMatcher::VO_AllOf,
+          ast_type_traits::ASTNodeKind::getFromNodeKind<T>(),
+          std::move(DynMatchers))
           .template unconditionalConvertTo<T>());
 }
 
diff --git a/include/clang/ASTMatchers/Dynamic/Diagnostics.h b/include/clang/ASTMatchers/Dynamic/Diagnostics.h
index ef93ac54508d..2c76ddaa07d9 100644
--- a/include/clang/ASTMatchers/Dynamic/Diagnostics.h
+++ b/include/clang/ASTMatchers/Dynamic/Diagnostics.h
@@ -104,11 +104,11 @@ public:
     /// \brief About to call the constructor for a matcher.
     enum ConstructMatcherEnum { ConstructMatcher };
     Context(ConstructMatcherEnum, Diagnostics *Error, StringRef MatcherName,
-            const SourceRange &MatcherRange);
+            SourceRange MatcherRange);
     /// \brief About to recurse into parsing one argument for a matcher.
     enum MatcherArgEnum { MatcherArg };
     Context(MatcherArgEnum, Diagnostics *Error, StringRef MatcherName,
-            const SourceRange &MatcherRange, unsigned ArgNumber);
+            SourceRange MatcherRange, unsigned ArgNumber);
     ~Context();
 
   private:
@@ -137,7 +137,7 @@ public:
   /// All the context information will be kept on the error message.
   /// \return a helper class to allow the caller to pass the arguments for the
   /// error message, using the << operator.
-  ArgStream addError(const SourceRange &Range, ErrorType Error);
+  ArgStream addError(SourceRange Range, ErrorType Error);
 
   /// \brief Information stored for one frame of the context.
   struct ContextFrame {
diff --git a/include/clang/ASTMatchers/Dynamic/Parser.h b/include/clang/ASTMatchers/Dynamic/Parser.h
index cdc259e0d045..76926f09dbcb 100644
--- a/include/clang/ASTMatchers/Dynamic/Parser.h
+++ b/include/clang/ASTMatchers/Dynamic/Parser.h
@@ -81,7 +81,7 @@ public:
     ///   matcher if an error occurred. In that case, \c Error will contain a
     ///   description of the error.
     virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
-                                                  const SourceRange &NameRange,
+                                                  SourceRange NameRange,
                                                   StringRef BindID,
                                                   ArrayRef<ParserValue> Args,
                                                   Diagnostics *Error) = 0;
@@ -129,7 +129,7 @@ public:
     lookupMatcherCtor(StringRef MatcherName) override;
 
     VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
-                                          const SourceRange &NameRange,
+                                          SourceRange NameRange,
                                           StringRef BindID,
                                           ArrayRef<ParserValue> Args,
                                           Diagnostics *Error) override;
diff --git a/include/clang/ASTMatchers/Dynamic/Registry.h b/include/clang/ASTMatchers/Dynamic/Registry.h
index fc1e783a6c1e..3808adb11e7f 100644
--- a/include/clang/ASTMatchers/Dynamic/Registry.h
+++ b/include/clang/ASTMatchers/Dynamic/Registry.h
@@ -106,7 +106,7 @@ public:
   ///   the signature.  In that case \c Error will contain the description of
   ///   the error.
   static VariantMatcher constructMatcher(MatcherCtor Ctor,
-                                         const SourceRange &NameRange,
+                                         SourceRange NameRange,
                                          ArrayRef<ParserValue> Args,
                                          Diagnostics *Error);
 
@@ -117,7 +117,7 @@ public:
   /// If the matcher is not bindable, it sets an error in \c Error and returns
   /// a null matcher.
   static VariantMatcher constructBoundMatcher(MatcherCtor Ctor,
-                                              const SourceRange &NameRange,
+                                              SourceRange NameRange,
                                               StringRef BindID,
                                               ArrayRef<ParserValue> Args,
                                               Diagnostics *Error);
diff --git a/include/clang/Analysis/Analyses/Consumed.h b/include/clang/Analysis/Analyses/Consumed.h
index a7109233987c..1f5aa12111c1 100644
--- a/include/clang/Analysis/Analyses/Consumed.h
+++ b/include/clang/Analysis/Analyses/Consumed.h
@@ -71,7 +71,7 @@ namespace consumed {
     virtual void warnParamReturnTypestateMismatch(SourceLocation Loc,
                                                   StringRef VariableName,
                                                   StringRef ExpectedState,
-                                                  StringRef ObservedState) {};
+                                                  StringRef ObservedState) {}
     
     // FIXME: Add documentation.
     virtual void warnParamTypestateMismatch(SourceLocation LOC,
@@ -162,8 +162,8 @@ namespace consumed {
     ConsumedState getState(const CXXBindTemporaryExpr *Tmp) const;
     
     /// \brief Merge this state map with another map.
-    void intersect(const ConsumedStateMap *Other);
-    
+    void intersect(const ConsumedStateMap &Other);
+
     void intersectAtLoopHead(const CFGBlock *LoopHead, const CFGBlock *LoopBack,
       const ConsumedStateMap *LoopBackStates,
       ConsumedWarningsHandlerBase &WarningsHandler);
@@ -196,15 +196,19 @@ namespace consumed {
   };
   
   class ConsumedBlockInfo {
-    std::vector<ConsumedStateMap*> StateMapsArray;
+    std::vector<std::unique_ptr<ConsumedStateMap>> StateMapsArray;
     std::vector<unsigned int> VisitOrder;
     
   public:
-    ConsumedBlockInfo() { }
-    ~ConsumedBlockInfo() { llvm::DeleteContainerPointers(StateMapsArray); }
+    ConsumedBlockInfo() = default;
+    ConsumedBlockInfo &operator=(ConsumedBlockInfo &&Other) {
+      StateMapsArray = std::move(Other.StateMapsArray);
+      VisitOrder = std::move(Other.VisitOrder);
+      return *this;
+    }
 
     ConsumedBlockInfo(unsigned int NumBlocks, PostOrderCFGView *SortedGraph)
-        : StateMapsArray(NumBlocks, nullptr), VisitOrder(NumBlocks, 0) {
+        : StateMapsArray(NumBlocks), VisitOrder(NumBlocks, 0) {
       unsigned int VisitOrderCounter = 0;
       for (PostOrderCFGView::iterator BI = SortedGraph->begin(),
            BE = SortedGraph->end(); BI != BE; ++BI) {
@@ -214,17 +218,18 @@ namespace consumed {
     
     bool allBackEdgesVisited(const CFGBlock *CurrBlock,
                              const CFGBlock *TargetBlock);
-    
+
     void addInfo(const CFGBlock *Block, ConsumedStateMap *StateMap,
-                 bool &AlreadyOwned);
-    void addInfo(const CFGBlock *Block, ConsumedStateMap *StateMap);
-    
+                 std::unique_ptr<ConsumedStateMap> &OwnedStateMap);
+    void addInfo(const CFGBlock *Block,
+                 std::unique_ptr<ConsumedStateMap> StateMap);
+
     ConsumedStateMap* borrowInfo(const CFGBlock *Block);
     
     void discardInfo(const CFGBlock *Block);
-    
-    ConsumedStateMap* getInfo(const CFGBlock *Block);
-    
+
+    std::unique_ptr<ConsumedStateMap> getInfo(const CFGBlock *Block);
+
     bool isBackEdge(const CFGBlock *From, const CFGBlock *To);
     bool isBackEdgeTarget(const CFGBlock *Block);
   };
@@ -233,13 +238,12 @@ namespace consumed {
   class ConsumedAnalyzer {
     
     ConsumedBlockInfo BlockInfo;
-    ConsumedStateMap *CurrStates;
-    
+    std::unique_ptr<ConsumedStateMap> CurrStates;
+
     ConsumedState ExpectedReturnState;
     
     void determineExpectedReturnState(AnalysisDeclContext &AC,
                                       const FunctionDecl *D);
-    bool hasConsumableAttributes(const CXXRecordDecl *RD);
     bool splitState(const CFGBlock *CurrBlock,
                     const ConsumedStmtVisitor &Visitor);
     
diff --git a/include/clang/Analysis/Analyses/ThreadSafetyCommon.h b/include/clang/Analysis/Analyses/ThreadSafetyCommon.h
index 9b7725ab0f3c..e3570130c2c2 100644
--- a/include/clang/Analysis/Analyses/ThreadSafetyCommon.h
+++ b/include/clang/Analysis/Analyses/ThreadSafetyCommon.h
@@ -287,10 +287,12 @@ public:
   }
 
   const ValueDecl* valueDecl() const {
-    if (Negated)
+    if (Negated || CapExpr == nullptr)
       return nullptr;
     if (auto *P = dyn_cast<til::Project>(CapExpr))
       return P->clangDecl();
+    if (auto *P = dyn_cast<til::LiteralPtr>(CapExpr))
+      return P->clangDecl();
     return nullptr;
   }
 
diff --git a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
index 4b5946617dfc..be8a7105d783 100644
--- a/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
+++ b/include/clang/Analysis/Analyses/ThreadSafetyTIL.h
@@ -1395,7 +1395,7 @@ public:
 
   /// Return the list of basic blocks that this terminator can branch to.
   ArrayRef<BasicBlock*> successors() {
-    return ArrayRef<BasicBlock*>(&TargetBlock, 1);
+    return TargetBlock;
   }
 
   template <class V>
@@ -1445,7 +1445,7 @@ public:
 
   /// Return the list of basic blocks that this terminator can branch to.
   ArrayRef<BasicBlock*> successors() {
-    return ArrayRef<BasicBlock*>(Branches, 2);
+    return llvm::makeArrayRef(Branches);
   }
 
   template <class V>
@@ -1479,7 +1479,7 @@ public:
 
   /// Return an empty list.
   ArrayRef<BasicBlock*> successors() {
-    return ArrayRef<BasicBlock*>();
+    return None;
   }
 
   SExpr *returnValue() { return Retval; }
@@ -1507,7 +1507,7 @@ inline ArrayRef<BasicBlock*> Terminator::successors() {
     case COP_Branch: return cast<Branch>(this)->successors();
     case COP_Return: return cast<Return>(this)->successors();
     default:
-      return ArrayRef<BasicBlock*>();
+      return None;
   }
 }
 
diff --git a/include/clang/Analysis/CFG.h b/include/clang/Analysis/CFG.h
index 5430c3bc6f21..293990c88e70 100644
--- a/include/clang/Analysis/CFG.h
+++ b/include/clang/Analysis/CFG.h
@@ -229,7 +229,6 @@ public:
     return static_cast<CXXDeleteExpr *>(Data2.getPointer());
   }
 
-
 private:
   friend class CFGElement;
   CFGDeleteDtor() {}
@@ -693,7 +692,7 @@ public:
   iterator beginAutomaticObjDtorsInsert(iterator I, size_t Cnt,
       BumpVectorContext &C) {
     return iterator(Elements.insert(I.base(), Cnt,
-                                    CFGAutomaticObjDtor(nullptr, 0), C));
+                                    CFGAutomaticObjDtor(nullptr, nullptr), C));
   }
   iterator insertAutomaticObjDtor(iterator I, VarDecl *VD, Stmt *S) {
     *I = CFGAutomaticObjDtor(VD, S);
@@ -767,7 +766,7 @@ public:
   /// (not a pointer to CFGBlock).
   class graph_iterator {
   public:
-    typedef const CFGBlock                  value_type;
+    typedef CFGBlock                        value_type;
     typedef value_type&                     reference;
     typedef value_type*                     pointer;
     typedef BumpVector<CFGBlock*>::iterator ImplTy;
@@ -1110,4 +1109,5 @@ template <> struct GraphTraits<Inverse<const ::clang::CFG*> >
   }
 };
 } // end llvm namespace
-#endif
+
+#endif // LLVM_CLANG_ANALYSIS_CFG_H
diff --git a/include/clang/Analysis/ProgramPoint.h b/include/clang/Analysis/ProgramPoint.h
index f87271550c20..6d816fd733ec 100644
--- a/include/clang/Analysis/ProgramPoint.h
+++ b/include/clang/Analysis/ProgramPoint.h
@@ -33,8 +33,31 @@ namespace clang {
 class AnalysisDeclContext;
 class FunctionDecl;
 class LocationContext;
-class ProgramPointTag;
   
+/// ProgramPoints can be "tagged" as representing points specific to a given
+/// analysis entity.  Tags are abstract annotations, with an associated
+/// description and potentially other information.
+class ProgramPointTag {
+public:
+  ProgramPointTag(void *tagKind = nullptr) : TagKind(tagKind) {}
+  virtual ~ProgramPointTag();
+  virtual StringRef getTagDescription() const = 0;    
+
+protected:
+  /// Used to implement 'isKind' in subclasses.
+  const void *getTagKind() { return TagKind; }
+  
+private:
+  const void *TagKind;
+};
+
+class SimpleProgramPointTag : public ProgramPointTag {
+  std::string Desc;
+public:
+  SimpleProgramPointTag(StringRef MsgProvider, StringRef Msg);
+  StringRef getTagDescription() const override;
+};
+
 class ProgramPoint {
 public:
   enum Kind { BlockEdgeKind,
@@ -643,30 +666,6 @@ private:
   }
 };
 
-/// ProgramPoints can be "tagged" as representing points specific to a given
-/// analysis entity.  Tags are abstract annotations, with an associated
-/// description and potentially other information.
-class ProgramPointTag {
-public:
-  ProgramPointTag(void *tagKind = nullptr) : TagKind(tagKind) {}
-  virtual ~ProgramPointTag();
-  virtual StringRef getTagDescription() const = 0;    
-
-protected:
-  /// Used to implement 'isKind' in subclasses.
-  const void *getTagKind() { return TagKind; }
-  
-private:
-  const void *TagKind;
-};
-
-class SimpleProgramPointTag : public ProgramPointTag {
-  std::string Desc;
-public:
-  SimpleProgramPointTag(StringRef MsgProvider, StringRef Msg);
-  StringRef getTagDescription() const override;
-};
-
 } // end namespace clang
 
 
diff --git a/include/clang/Analysis/Support/BumpVector.h b/include/clang/Analysis/Support/BumpVector.h
index 3abe32d79cce..591d17b9bc03 100644
--- a/include/clang/Analysis/Support/BumpVector.h
+++ b/include/clang/Analysis/Support/BumpVector.h
@@ -35,7 +35,12 @@ public:
   /// Construct a new BumpVectorContext that creates a new BumpPtrAllocator
   /// and destroys it when the BumpVectorContext object is destroyed.
   BumpVectorContext() : Alloc(new llvm::BumpPtrAllocator(), 1) {}
-  
+
+  BumpVectorContext(BumpVectorContext &&Other) : Alloc(Other.Alloc) {
+    Other.Alloc.setInt(false);
+    Other.Alloc.setPointer(nullptr);
+  }
+
   /// Construct a new BumpVectorContext that reuses an existing
   /// BumpPtrAllocator.  This BumpPtrAllocator is not destroyed when the
   /// BumpVectorContext object is destroyed.
diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td
index 6187bcb2c4bf..d5ba72261644 100644
--- a/include/clang/Basic/Attr.td
+++ b/include/clang/Basic/Attr.td
@@ -113,7 +113,7 @@ def GlobalVar : SubsetSubject<Var,
 // the case of a SubsetSubject, there's no way to express it without this hack.
 def DeclBase : AttrSubject;
 def FunctionLike : SubsetSubject<DeclBase,
-                                  [{S->getFunctionType(false) != NULL}]>;
+                                  [{S->getFunctionType(false) != nullptr}]>;
 
 def OpenCLKernelFunction : SubsetSubject<Function, [{
   S->hasAttr<OpenCLKernelAttr>()
@@ -123,15 +123,19 @@ def OpenCLKernelFunction : SubsetSubject<Function, [{
 // never be specified in a Subjects list along with FunctionLike (due to the
 // inclusive nature of subject testing).
 def HasFunctionProto : SubsetSubject<DeclBase,
-                                     [{(S->getFunctionType(true) != NULL &&
+                                     [{(S->getFunctionType(true) != nullptr &&
                               isa<FunctionProtoType>(S->getFunctionType())) ||
                                        isa<ObjCMethodDecl>(S) ||
                                        isa<BlockDecl>(S)}]>;
 
 // A single argument to an attribute
-class Argument<string name, bit optional> {
+class Argument<string name, bit optional, bit fake = 0> {
   string Name = name;
   bit Optional = optional;
+
+  /// A fake argument is used to store and serialize additional information
+  /// in an attribute without actually changing its parsing or pretty-printing.
+  bit Fake = fake;
 }
 
 class BoolArgument<string name, bit opt = 0> : Argument<name, opt>;
@@ -167,7 +171,8 @@ class DefaultIntArgument<string name, int default> : IntArgument<name, 1> {
 // This argument is more complex, it includes the enumerator type name,
 // a list of strings to accept, and a list of enumerators to map them to.
 class EnumArgument<string name, string type, list<string> values,
-                   list<string> enums, bit opt = 0> : Argument<name, opt> {
+                   list<string> enums, bit opt = 0, bit fake = 0>
+    : Argument<name, opt, fake> {
   string Type = type;
   list<string> Values = values;
   list<string> Enums = enums;
@@ -241,14 +246,18 @@ def COnly : LangOpt<"CPlusPlus", 1>;
 class TargetArch<list<string> arches> {
   list<string> Arches = arches;
   list<string> OSes;
+  list<string> CXXABIs;
 }
 def TargetARM : TargetArch<["arm", "thumb"]>;
+def TargetMips : TargetArch<["mips", "mipsel"]>;
 def TargetMSP430 : TargetArch<["msp430"]>;
 def TargetX86 : TargetArch<["x86"]>;
 def TargetWindows : TargetArch<["x86", "x86_64", "arm", "thumb"]> {
   let OSes = ["Win32"];
 }
-def TargetMips : TargetArch<["mips", "mipsel"]>;
+def TargetMicrosoftCXXABI : TargetArch<["x86", "x86_64", "arm", "thumb"]> {
+  let CXXABIs = ["Microsoft"];
+}
 
 class Attr {
   // The various ways in which an attribute can be spelled in source
@@ -286,6 +295,8 @@ class Attr {
   // attribute to be applicable. If empty, no language options are required.
   list<LangOpt> LangOpts = [];
   // Any additional text that should be included verbatim in the class.
+  // Note: Any additional data members will leak and should be constructed
+  // externally on the ASTContext.
   code AdditionalMembers = [{}];
   // Any documentation that should be associated with the attribute. Since an
   // attribute may be documented under multiple categories, more than one
@@ -415,8 +426,8 @@ def Annotate : InheritableParamAttr {
 }
 
 def ARMInterrupt : InheritableAttr, TargetSpecificAttr<TargetARM> {
-  // NOTE: If you add any additional spellings, MSP430Interrupt's spellings
-  // must match.
+  // NOTE: If you add any additional spellings, MSP430Interrupt's and
+  // MipsInterrupt's spellings must match.
   let Spellings = [GNU<"interrupt">];
   let Args = [EnumArgument<"Interrupt", "InterruptType",
                            ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", ""],
@@ -445,8 +456,12 @@ def Availability : InheritableAttr {
              .Case("android", "Android")
              .Case("ios", "iOS")
              .Case("macosx", "OS X")
+             .Case("tvos", "tvOS")
+             .Case("watchos", "watchOS")
              .Case("ios_app_extension", "iOS (App Extension)")
              .Case("macosx_app_extension", "OS X (App Extension)")
+             .Case("tvos_app_extension", "tvOS (App Extension)")
+             .Case("watchos_app_extension", "watchOS (App Extension)")
              .Default(llvm::StringRef());
 } }];
   let HasCustomParsing = 1;
@@ -553,6 +568,11 @@ def CUDAConstant : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def CUDACudartBuiltin : IgnoredAttr {
+  let Spellings = [GNU<"cudart_builtin">];
+  let LangOpts = [CUDA];
+}
+
 def CUDADevice : InheritableAttr {
   let Spellings = [GNU<"device">];
   let Subjects = SubjectList<[Function, Var]>;
@@ -560,6 +580,21 @@ def CUDADevice : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def CUDADeviceBuiltin : IgnoredAttr {
+  let Spellings = [GNU<"device_builtin">];
+  let LangOpts = [CUDA];
+}
+
+def CUDADeviceBuiltinSurfaceType : IgnoredAttr {
+  let Spellings = [GNU<"device_builtin_surface_type">];
+  let LangOpts = [CUDA];
+}
+
+def CUDADeviceBuiltinTextureType : IgnoredAttr {
+  let Spellings = [GNU<"device_builtin_texture_type">];
+  let LangOpts = [CUDA];
+}
+
 def CUDAGlobal : InheritableAttr {
   let Spellings = [GNU<"global">];
   let Subjects = SubjectList<[Function]>;
@@ -721,18 +756,6 @@ def FlagEnum : InheritableAttr {
   let Subjects = SubjectList<[Enum]>;
   let Documentation = [FlagEnumDocs];
   let LangOpts = [COnly];
-  let AdditionalMembers = [{
-private:
-    llvm::APInt FlagBits;
-public:
-    llvm::APInt &getFlagBits() {
-      return FlagBits;
-    }
-
-    const llvm::APInt &getFlagBits() const {
-      return FlagBits;
-    }
-}];
 }
 
 def Flatten : InheritableAttr {
@@ -746,7 +769,7 @@ def Format : InheritableAttr {
   let Args = [IdentifierArgument<"Type">, IntArgument<"FormatIdx">,
               IntArgument<"FirstArg">];
   let Subjects = SubjectList<[ObjCMethod, Block, HasFunctionProto], WarnDiag,
-                             "ExpectedFunction">;
+                             "ExpectedFunctionWithProtoType">;
   let Documentation = [FormatDocs];
 }
 
@@ -754,7 +777,7 @@ def FormatArg : InheritableAttr {
   let Spellings = [GCC<"format_arg">];
   let Args = [IntArgument<"FormatIdx">];
   let Subjects = SubjectList<[ObjCMethod, HasFunctionProto], WarnDiag,
-                             "ExpectedFunction">;
+                             "ExpectedFunctionWithProtoType">;
   let Documentation = [Undocumented];
 }
 
@@ -822,8 +845,8 @@ def MSABI : InheritableAttr {
 }
 
 def MSP430Interrupt : InheritableAttr, TargetSpecificAttr<TargetMSP430> {
-  // NOTE: If you add any additional spellings, ARMInterrupt's spellings must
-  // match.
+  // NOTE: If you add any additional spellings, ARMInterrupt's and
+  // MipsInterrupt's spellings must match.
   let Spellings = [GNU<"interrupt">];
   let Args = [UnsignedArgument<"Number">];
   let ParseKind = "Interrupt";
@@ -837,6 +860,22 @@ def Mips16 : InheritableAttr, TargetSpecificAttr<TargetMips> {
   let Documentation = [Undocumented];
 }
 
+def MipsInterrupt : InheritableAttr, TargetSpecificAttr<TargetMips> {
+  // NOTE: If you add any additional spellings, ARMInterrupt's and
+  // MSP430Interrupt's spellings must match.
+  let Spellings = [GNU<"interrupt">];
+  let Subjects = SubjectList<[Function]>;
+  let Args = [EnumArgument<"Interrupt", "InterruptType",
+                           ["vector=sw0", "vector=sw1", "vector=hw0",
+                            "vector=hw1", "vector=hw2", "vector=hw3",
+                            "vector=hw4", "vector=hw5", "eic", ""],
+                           ["sw0", "sw1", "hw0", "hw1", "hw2", "hw3",
+                            "hw4", "hw5", "eic", "eic"]
+                           >];
+  let ParseKind = "Interrupt";
+  let Documentation = [MipsInterruptDocs];
+}
+
 def Mode : Attr {
   let Spellings = [GCC<"mode">];
   let Args = [IdentifierArgument<"Mode">];
@@ -867,6 +906,19 @@ def ReturnsTwice : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def DisableTailCalls : InheritableAttr {
+  let Spellings = [GNU<"disable_tail_calls">,
+                   CXX11<"clang", "disable_tail_calls">];
+  let Subjects = SubjectList<[Function, ObjCMethod]>;
+  let Documentation = [DisableTailCallsDocs];
+}
+
+def NoAlias : InheritableAttr {
+  let Spellings = [Declspec<"noalias">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [NoAliasDocs];
+}
+
 def NoCommon : InheritableAttr {
   let Spellings = [GCC<"nocommon">];
   let Subjects = SubjectList<[Var]>;
@@ -960,6 +1012,15 @@ def ReturnsNonNull : InheritableAttr {
   let Documentation = [ReturnsNonNullDocs];
 }
 
+// pass_object_size(N) indicates that the parameter should have
+// __builtin_object_size with Type=N evaluated on the parameter at the callsite.
+def PassObjectSize : InheritableParamAttr {
+  let Spellings = [GNU<"pass_object_size">];
+  let Args = [IntArgument<"Type">];
+  let Subjects = SubjectList<[ParmVar]>;
+  let Documentation = [PassObjectSizeDocs];
+}
+
 // Nullability type attributes.
 def TypeNonNull : TypeAttr {
   let Spellings = [Keyword<"_Nonnull">];
@@ -1000,11 +1061,22 @@ def NoInstrumentFunction : InheritableAttr {
   let Documentation = [Undocumented];
 }
 
+def NotTailCalled : InheritableAttr {
+  let Spellings = [GNU<"not_tail_called">, CXX11<"clang", "not_tail_called">];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [NotTailCalledDocs];
+}
+
 def NoThrow : InheritableAttr {
   let Spellings = [GCC<"nothrow">, Declspec<"nothrow">];
   let Documentation = [Undocumented];
 }
 
+def NvWeak : IgnoredAttr {
+  let Spellings = [GNU<"nv_weak">];
+  let LangOpts = [CUDA];
+}
+
 def ObjCBridge : InheritableAttr {
   let Spellings = [GNU<"objc_bridge">];
   let Subjects = SubjectList<[Record, TypedefName], ErrorDiag,
@@ -1024,8 +1096,8 @@ def ObjCBridgeRelated : InheritableAttr {
   let Spellings = [GNU<"objc_bridge_related">];
   let Subjects = SubjectList<[Record], ErrorDiag>;
   let Args = [IdentifierArgument<"RelatedClass">,
-          IdentifierArgument<"ClassMethod">,
-          IdentifierArgument<"InstanceMethod">];
+          IdentifierArgument<"ClassMethod", 1>,
+          IdentifierArgument<"InstanceMethod", 1>];
   let HasCustomParsing = 1;
   let Documentation = [Undocumented];
 }
@@ -1169,7 +1241,8 @@ def Ownership : InheritableAttr {
     }
   }];
   let Args = [IdentifierArgument<"Module">, VariadicUnsignedArgument<"Args">];
-  let Subjects = SubjectList<[HasFunctionProto], WarnDiag, "ExpectedFunction">;
+  let Subjects = SubjectList<[HasFunctionProto], WarnDiag,
+                             "ExpectedFunctionWithProtoType">;
   let Documentation = [Undocumented];
 }
 
@@ -1280,9 +1353,41 @@ def Pascal : InheritableAttr {
 
 def Target : InheritableAttr {
   let Spellings = [GCC<"target">];
-  let Args = [StringArgument<"features">];
+  let Args = [StringArgument<"featuresStr">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
   let Documentation = [TargetDocs];
+  let AdditionalMembers = [{
+    typedef std::pair<std::vector<std::string>, StringRef> ParsedTargetAttr;
+    ParsedTargetAttr parse() const {
+      ParsedTargetAttr Ret;
+      SmallVector<StringRef, 1> AttrFeatures;
+      getFeaturesStr().split(AttrFeatures, ",");
+
+      // Grab the various features and prepend a "+" to turn on the feature to
+      // the backend and add them to our existing set of features.
+      for (auto &Feature : AttrFeatures) {
+        // Go ahead and trim whitespace rather than either erroring or
+        // accepting it weirdly.
+        Feature = Feature.trim();
+
+        // We don't support cpu tuning this way currently.
+        // TODO: Support the fpmath option. It will require checking
+        // overall feature validity for the function with the rest of the
+        // attributes on the function.
+        if (Feature.startswith("fpmath=") || Feature.startswith("tune="))
+	  continue;
+
+        // While we're here iterating check for a different target cpu.
+        if (Feature.startswith("arch="))
+          Ret.second = Feature.split("=").second.trim();
+        else if (Feature.startswith("no-"))
+          Ret.first.push_back("-" + Feature.split("-").second.str());
+        else
+          Ret.first.push_back("+" + Feature.str());
+      }
+      return Ret;
+    }
+  }];
 }
 
 def TransparentUnion : InheritableAttr {
@@ -1293,7 +1398,15 @@ def TransparentUnion : InheritableAttr {
 
 def Unavailable : InheritableAttr {
   let Spellings = [GNU<"unavailable">];
-  let Args = [StringArgument<"Message", 1>];
+  let Args = [StringArgument<"Message", 1>,
+              EnumArgument<"ImplicitReason", "ImplicitReason",
+                ["", "", "", ""],
+                ["IR_None",
+                 "IR_ARCForbiddenType",
+                 "IR_ForbiddenWeak",
+                 "IR_ARCForbiddenConversion",
+                 "IR_ARCInitReturnsUnrelated",
+                 "IR_ARCFieldWithOwnership"], 1, /*fake*/ 1>];
   let Documentation = [Undocumented];
 }
 
@@ -1486,8 +1599,8 @@ def Capability : InheritableAttr {
   let Spellings = [GNU<"capability">, CXX11<"clang", "capability">,
                    GNU<"shared_capability">,
                    CXX11<"clang", "shared_capability">];
-  let Subjects = SubjectList<[Struct, TypedefName], ErrorDiag,
-                             "ExpectedStructOrTypedef">;
+  let Subjects = SubjectList<[Record, TypedefName], ErrorDiag,
+                             "ExpectedStructOrUnionOrTypedef">;
   let Args = [StringArgument<"Name">];
   let Accessors = [Accessor<"isShared",
                     [GNU<"shared_capability">,
@@ -1814,7 +1927,7 @@ def TypeTagForDatatype : InheritableAttr {
 
 // Microsoft-related attributes
 
-def MSNoVTable : InheritableAttr {
+def MSNoVTable : InheritableAttr, TargetSpecificAttr<TargetMicrosoftCXXABI> {
   let Spellings = [Declspec<"novtable">];
   let Subjects = SubjectList<[CXXRecord]>;
   let Documentation = [MSNoVTableDocs];
@@ -1970,8 +2083,8 @@ def LoopHint : Attr {
                           ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount",
                            "Unroll", "UnrollCount"]>,
               EnumArgument<"State", "LoopHintState",
-                           ["default", "enable", "disable", "assume_safety"],
-                           ["Default", "Enable", "Disable", "AssumeSafety"]>,
+                           ["enable", "disable", "numeric", "assume_safety", "full"],
+                           ["Enable", "Disable", "Numeric", "AssumeSafety", "Full"]>,
               ExprArgument<"Value">];
 
   let AdditionalMembers = [{
@@ -1991,17 +2104,15 @@ def LoopHint : Attr {
     unsigned SpellingIndex = getSpellingListIndex();
     // For "#pragma unroll" and "#pragma nounroll" the string "unroll" or
     // "nounroll" is already emitted as the pragma name.
-    if (SpellingIndex == Pragma_nounroll) {
-      OS << "\n";
+    if (SpellingIndex == Pragma_nounroll)
       return;
-    }
     else if (SpellingIndex == Pragma_unroll) {
-      OS << getValueString(Policy) << "\n";
+      OS << getValueString(Policy);
       return;
     }
 
     assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling");
-    OS << getOptionName(option) << getValueString(Policy) << "\n";
+    OS << getOptionName(option) << getValueString(Policy);
   }
 
   // Return a string containing the loop hint argument including the
@@ -2010,13 +2121,12 @@ def LoopHint : Attr {
     std::string ValueName;
     llvm::raw_string_ostream OS(ValueName);
     OS << "(";
-    if (option == VectorizeWidth || option == InterleaveCount ||
-        option == UnrollCount)
+    if (state == Numeric)
       value->printPretty(OS, nullptr, Policy);
-    else if (state == Default)
-      return "";
     else if (state == Enable)
-      OS << (option == Unroll ? "full" : "enable");
+      OS << "enable";
+    else if (state == Full)
+      OS << "full";
     else if (state == AssumeSafety)
       OS << "assume_safety";
     else
@@ -2031,7 +2141,7 @@ def LoopHint : Attr {
     if (SpellingIndex == Pragma_nounroll)
       return "#pragma nounroll";
     else if (SpellingIndex == Pragma_unroll)
-      return "#pragma unroll" + getValueString(Policy);
+      return "#pragma unroll" + (option == UnrollCount ? getValueString(Policy) : "");
 
     assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling");
     return getOptionName(option) + getValueString(Policy);
@@ -2054,3 +2164,9 @@ def OMPThreadPrivateDecl : InheritableAttr {
   let SemaHandler = 0;
   let Documentation = [Undocumented];
 }
+
+def InternalLinkage : InheritableAttr {
+  let Spellings = [GNU<"internal_linkage">, CXX11<"clang", "internal_linkage">];
+  let Subjects = SubjectList<[Var, Function, CXXRecord]>;
+  let Documentation = [InternalLinkageDocs];
+}
diff --git a/include/clang/Basic/AttrDocs.td b/include/clang/Basic/AttrDocs.td
index 48660166f6c6..2567d55eb7e0 100644
--- a/include/clang/Basic/AttrDocs.td
+++ b/include/clang/Basic/AttrDocs.td
@@ -181,9 +181,9 @@ to enforce the provided alignment assumption.
 def EnableIfDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-.. Note:: Some features of this attribute are experimental. The meaning of
-  multiple enable_if attributes on a single declaration is subject to change in
-  a future version of clang. Also, the ABI is not standardized and the name
+.. Note:: Some features of this attribute are experimental. The meaning of
+  multiple enable_if attributes on a single declaration is subject to change in
+  a future version of clang. Also, the ABI is not standardized and the name
   mangling may change in future versions. To avoid that, use asm labels.
 
 The ``enable_if`` attribute can be placed on function declarations to control
@@ -263,6 +263,103 @@ Query for this feature with ``__has_attribute(enable_if)``.
   }];
 }
 
+def PassObjectSizeDocs : Documentation {
+  let Category = DocCatVariable; // Technically it's a parameter doc, but eh.
+  let Content = [{
+.. Note:: The mangling of functions with parameters that are annotated with
+  ``pass_object_size`` is subject to change. You can get around this by
+  using ``__asm__("foo")`` to explicitly name your functions, thus preserving
+  your ABI; also, non-overloadable C functions with ``pass_object_size`` are
+  not mangled.
+
+The ``pass_object_size(Type)`` attribute can be placed on function parameters to
+instruct clang to call ``__builtin_object_size(param, Type)`` at each callsite
+of said function, and implicitly pass the result of this call in as an invisible
+argument of type ``size_t`` directly after the parameter annotated with
+``pass_object_size``. Clang will also replace any calls to
+``__builtin_object_size(param, Type)`` in the function by said implicit
+parameter.
+
+Example usage:
+
+.. code-block:: c
+
+  int bzero1(char *const p __attribute__((pass_object_size(0))))
+      __attribute__((noinline)) {
+    int i = 0;
+    for (/**/; i < (int)__builtin_object_size(p, 0); ++i) {
+      p[i] = 0;
+    }
+    return i;
+  }
+
+  int main() {
+    char chars[100];
+    int n = bzero1(&chars[0]);
+    assert(n == sizeof(chars));
+    return 0;
+  }
+
+If successfully evaluating ``__builtin_object_size(param, Type)`` at the
+callsite is not possible, then the "failed" value is passed in. So, using the
+definition of ``bzero1`` from above, the following code would exit cleanly:
+
+.. code-block:: c
+
+  int main2(int argc, char *argv[]) {
+    int n = bzero1(argv);
+    assert(n == -1);
+    return 0;
+  }
+
+``pass_object_size`` plays a part in overload resolution. If two overload
+candidates are otherwise equally good, then the overload with one or more
+parameters with ``pass_object_size`` is preferred. This implies that the choice
+between two identical overloads both with ``pass_object_size`` on one or more
+parameters will always be ambiguous; for this reason, having two such overloads
+is illegal. For example:
+
+.. code-block:: c++
+
+  #define PS(N) __attribute__((pass_object_size(N)))
+  // OK
+  void Foo(char *a, char *b); // Overload A
+  // OK -- overload A has no parameters with pass_object_size.
+  void Foo(char *a PS(0), char *b PS(0)); // Overload B
+  // Error -- Same signature (sans pass_object_size) as overload B, and both
+  // overloads have one or more parameters with the pass_object_size attribute.
+  void Foo(void *a PS(0), void *b);
+
+  // OK
+  void Bar(void *a PS(0)); // Overload C
+  // OK
+  void Bar(char *c PS(1)); // Overload D
+
+  void main() {
+    char known[10], *unknown;
+    Foo(unknown, unknown); // Calls overload B
+    Foo(known, unknown); // Calls overload B
+    Foo(unknown, known); // Calls overload B
+    Foo(known, known); // Calls overload B
+
+    Bar(known); // Calls overload D
+    Bar(unknown); // Calls overload D
+  }
+
+Currently, ``pass_object_size`` is a bit restricted in terms of its usage:
+
+* Only one use of ``pass_object_size`` is allowed per parameter.
+
+* It is an error to take the address of a function with ``pass_object_size`` on
+  any of its parameters. If you wish to do this, you can create an overload
+  without ``pass_object_size`` on any parameters.
+
+* It is an error to apply the ``pass_object_size`` attribute to parameters that
+  are not pointers. Additionally, any parameter that ``pass_object_size`` is
+  applied to must be marked ``const`` at its function's definition.
+  }];
+}
+
 def OverloadableDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
@@ -580,6 +677,14 @@ are:
   Apple's Mac OS X operating system.  The minimum deployment target is
   specified by the ``-mmacosx-version-min=*version*`` command-line argument.
 
+``tvos``
+  Apple's tvOS operating system.  The minimum deployment target is specified by
+  the ``-mtvos-version-min=*version*`` command-line argument.
+
+``watchos``
+  Apple's watchOS operating system.  The minimum deployment target is specified by
+  the ``-mwatchos-version-min=*version*`` command-line argument.
+
 A declaration can be used even when deploying back to a platform version prior
 to when the declaration was introduced.  When this happens, the declaration is
 `weakly linked
@@ -706,6 +811,46 @@ The semantics are as follows:
   }];
 }
 
+def MipsInterruptDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the GNU style ``__attribute__((interrupt("ARGUMENT")))`` attribute on
+MIPS targets. This attribute may be attached to a function definition and instructs
+the backend to generate appropriate function entry/exit code so that it can be used
+directly as an interrupt service routine.
+
+By default, the compiler will produce a function prologue and epilogue suitable for
+an interrupt service routine that handles an External Interrupt Controller (eic)
+generated interrupt. This behaviour can be explicitly requested with the "eic"
+argument.
+
+Otherwise, for use with vectored interrupt mode, the argument passed should be
+of the form "vector=LEVEL" where LEVEL is one of the following values:
+"sw0", "sw1", "hw0", "hw1", "hw2", "hw3", "hw4", "hw5". The compiler will
+then set the interrupt mask to the corresponding level which will mask all
+interrupts up to and including the argument.
+
+The semantics are as follows:
+
+- The prologue is modified so that the Exception Program Counter (EPC) and
+  Status coprocessor registers are saved to the stack. The interrupt mask is
+  set so that the function can only be interrupted by a higher priority
+  interrupt. The epilogue will restore the previous values of EPC and Status.
+
+- The prologue and epilogue are modified to save and restore all non-kernel
+  registers as necessary.
+
+- The FPU is disabled in the prologue, as the floating pointer registers are not
+  spilled to the stack.
+
+- The function return sequence is changed to use an exception return instruction.
+
+- The parameter sets the interrupt mask for the function corresponding to the
+  interrupt level specified. If no mask is specified the interrupt mask
+  defaults to "eic".
+  }];
+}
+
 def TargetDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
@@ -1328,7 +1473,7 @@ def MSNoVTableDocs : Documentation {
   let Content = [{
 This attribute can be added to a class declaration or definition to signal to
 the compiler that constructors and destructors will not reference the virtual
-function table.
+function table. It is only supported when using the Microsoft C++ ABI.
   }];
 }
 
@@ -1371,7 +1516,9 @@ Loop unrolling optimization hints can be specified with ``#pragma unroll`` and
 do-while, or c++11 range-based for loop.
 
 Specifying ``#pragma unroll`` without a parameter directs the loop unroller to
-attempt to fully unroll the loop if the trip count is known at compile time:
+attempt to fully unroll the loop if the trip count is known at compile time and
+attempt to partially unroll the loop if the trip count is not known at compile
+time:
 
 .. code-block:: c++
 
@@ -1439,7 +1586,6 @@ More details can be found in the OpenCL C language Spec v2.0, Section 6.5.
 
 def OpenCLAddressSpaceGenericDocs : Documentation {
   let Category = DocOpenCLAddressSpaces;
-  let Heading = "__generic(generic)";
   let Content = [{
 The generic address space attribute is only available with OpenCL v2.0 and later.
 It can be used with pointer types. Variables in global and local scope and
@@ -1452,7 +1598,6 @@ spaces.
 
 def OpenCLAddressSpaceConstantDocs : Documentation {
   let Category = DocOpenCLAddressSpaces;
-  let Heading = "__constant(constant)";
   let Content = [{
 The constant address space attribute signals that an object is located in
 a constant (non-modifiable) memory region. It is available to all work items.
@@ -1464,7 +1609,6 @@ have an initializer.
 
 def OpenCLAddressSpaceGlobalDocs : Documentation {
   let Category = DocOpenCLAddressSpaces;
-  let Heading = "__global(global)";
   let Content = [{
 The global address space attribute specifies that an object is allocated in
 global memory, which is accessible by all work items. The content stored in this
@@ -1477,7 +1621,6 @@ scope) variables and static local variable as well.
 
 def OpenCLAddressSpaceLocalDocs : Documentation {
   let Category = DocOpenCLAddressSpaces;
-  let Heading = "__local(local)";
   let Content = [{
 The local address space specifies that an object is allocated in the local (work
 group) memory area, which is accessible to all work items in the same work
@@ -1490,7 +1633,6 @@ space are allowed. Local address space variables cannot have an initializer.
 
 def OpenCLAddressSpacePrivateDocs : Documentation {
   let Category = DocOpenCLAddressSpaces;
-  let Heading = "__private(private)";
   let Content = [{
 The private address space specifies that an object is allocated in the private
 (work item) memory. Other work items cannot access the same memory area and its
@@ -1534,7 +1676,6 @@ In Objective-C, there is an alternate spelling for the nullability qualifiers th
 
 def TypeNonNullDocs : Documentation {
   let Category = NullabilityDocs;
-  let Heading = "_Nonnull";
   let Content = [{
 The ``_Nonnull`` nullability qualifier indicates that null is not a meaningful value for a value of the ``_Nonnull`` pointer type. For example, given a declaration such as:
 
@@ -1548,7 +1689,6 @@ a caller of ``fetch`` should not provide a null value, and the compiler will pro
 
 def TypeNullableDocs : Documentation {
   let Category = NullabilityDocs;
-  let Heading = "_Nullable";
   let Content = [{
 The ``_Nullable`` nullability qualifier indicates that a value of the ``_Nullable`` pointer type can be null. For example, given:
 
@@ -1562,7 +1702,6 @@ a caller of ``fetch_or_zero`` can provide null.
 
 def TypeNullUnspecifiedDocs : Documentation {
   let Category = NullabilityDocs;
-  let Heading = "_Null_unspecified";
   let Content = [{
 The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_Nonnull`` nor ``_Nullable`` qualifiers make sense for a particular pointer type. It is used primarily to indicate that the role of null with specific pointers in a nullability-annotated header is unclear, e.g., due to overly-complex implementations or historical factors with a long-lived API.
   }];
@@ -1570,7 +1709,6 @@ The ``_Null_unspecified`` nullability qualifier indicates that neither the ``_No
 
 def NonNullDocs : Documentation {
   let Category = NullabilityDocs;
-  let Heading = "nonnull";
   let Content = [{
 The ``nonnull`` attribute indicates that some function parameters must not be null, and can be used in several different ways. It's original usage (`from GCC <https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes>`_) is as a function (or Objective-C method) attribute that specifies which parameters of the function are nonnull in a comma-separated list. For example:
 
@@ -1600,7 +1738,6 @@ Note that the ``nonnull`` attribute indicates that passing null to a non-null pa
 
 def ReturnsNonNullDocs : Documentation {
   let Category = NullabilityDocs;
-  let Heading = "returns_nonnull";
   let Content = [{
 The ``returns_nonnull`` attribute indicates that a particular function (or Objective-C method) always returns a non-null pointer. For example, a particular system ``malloc`` might be defined to terminate a process when memory is not available rather than returning a null pointer:
 
@@ -1611,3 +1748,114 @@ The ``returns_nonnull`` attribute indicates that a particular function (or Objec
 The ``returns_nonnull`` attribute implies that returning a null pointer is undefined behavior, which the optimizer may take advantage of. The ``_Nonnull`` type qualifier indicates that a pointer cannot be null in a more general manner (because it is part of the type system) and does not imply undefined behavior, making it more widely applicable
 }];
 }
+
+def NoAliasDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``noalias`` attribute indicates that the only memory accesses inside
+function are loads and stores from objects pointed to by its pointer-typed
+arguments, with arbitrary offsets.
+  }];
+}
+
+def NotTailCalledDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``not_tail_called`` attribute prevents tail-call optimization on statically bound calls. It has no effect on indirect calls. Virtual functions, objective-c methods, and functions marked as ``always_inline`` cannot be marked as ``not_tail_called``.
+
+For example, it prevents tail-call optimization in the following case:
+
+  .. code-block: c
+
+    int __attribute__((not_tail_called)) foo1(int);
+
+    int foo2(int a) {
+      return foo1(a); // No tail-call optimization on direct calls.
+    }
+
+However, it doesn't prevent tail-call optimization in this case:
+
+  .. code-block: c
+
+    int __attribute__((not_tail_called)) foo1(int);
+
+    int foo2(int a) {
+      int (*fn)(int) = &foo1;
+
+      // not_tail_called has no effect on an indirect call even if the call can be
+      // resolved at compile time.
+      return (*fn)(a);
+    }
+
+Marking virtual functions as ``not_tail_called`` is an error:
+
+  .. code-block: c++
+
+    class Base {
+    public:
+      // not_tail_called on a virtual function is an error.
+      [[clang::not_tail_called]] virtual int foo1();
+
+      virtual int foo2();
+
+      // Non-virtual functions can be marked ``not_tail_called``.
+      [[clang::not_tail_called]] int foo3();
+    };
+
+    class Derived1 : public Base {
+    public:
+      int foo1() override;
+
+      // not_tail_called on a virtual function is an error.
+      [[clang::not_tail_called]] int foo2() override;
+    };
+  }];
+}
+
+def InternalLinkageDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``internal_linkage`` attribute changes the linkage type of the declaration to internal.
+This is similar to C-style ``static``, but can be used on classes and class methods. When applied to a class definition,
+this attribute affects all methods and static data members of that class.
+This can be used to contain the ABI of a C++ library by excluding unwanted class methods from the export tables.
+  }];
+}
+
+def DisableTailCallsDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+The ``disable_tail_calls`` attribute instructs the backend to not perform tail call optimization inside the marked function.
+
+For example:
+
+  .. code-block:: c
+
+    int callee(int);
+
+    int foo(int a) __attribute__((disable_tail_calls)) {
+      return callee(a); // This call is not tail-call optimized.
+    }
+
+Marking virtual functions as ``disable_tail_calls`` is legal.
+
+  .. code-block: c++
+
+    int callee(int);
+
+    class Base {
+    public:
+      [[clang::disable_tail_calls]] virtual int foo1() {
+        return callee(); // This call is not tail-call optimized.
+      }
+    };
+
+    class Derived1 : public Base {
+    public:
+      int foo1() override {
+        return callee(); // This call is tail-call optimized.
+      }
+    };
+
+  }];
+}
diff --git a/include/clang/Basic/Attributes.h b/include/clang/Basic/Attributes.h
index a64dd5666b73..a2b86841cddf 100644
--- a/include/clang/Basic/Attributes.h
+++ b/include/clang/Basic/Attributes.h
@@ -11,7 +11,7 @@
 #define LLVM_CLANG_BASIC_ATTRIBUTES_H
 
 #include "clang/Basic/LangOptions.h"
-#include "llvm/ADT/Triple.h"
+#include "clang/Basic/TargetInfo.h"
 
 namespace clang {
 
@@ -31,7 +31,7 @@ enum class AttrSyntax {
 /// \brief Return the version number associated with the attribute if we
 /// recognize and implement the attribute specified by the given information.
 int hasAttribute(AttrSyntax Syntax, const IdentifierInfo *Scope,
-                 const IdentifierInfo *Attr, const llvm::Triple &T,
+                 const IdentifierInfo *Attr, const TargetInfo &Target,
                  const LangOptions &LangOpts);
 
 } // end namespace clang
diff --git a/include/clang/Basic/Builtins.def b/include/clang/Basic/Builtins.def
index bf65b5fa2ea8..4f474ebe42ba 100644
--- a/include/clang/Basic/Builtins.def
+++ b/include/clang/Basic/Builtins.def
@@ -357,25 +357,25 @@ BUILTIN(__builtin_ctanhf, "XfXf", "Fnc")
 BUILTIN(__builtin_ctanhl, "XLdXLd", "Fnc")
 
 // FP Comparisons.
-BUILTIN(__builtin_isgreater     , "i.", "nc")
-BUILTIN(__builtin_isgreaterequal, "i.", "nc")
-BUILTIN(__builtin_isless        , "i.", "nc")
-BUILTIN(__builtin_islessequal   , "i.", "nc")
-BUILTIN(__builtin_islessgreater , "i.", "nc")
-BUILTIN(__builtin_isunordered   , "i.", "nc")
+BUILTIN(__builtin_isgreater     , "i.", "Fnc")
+BUILTIN(__builtin_isgreaterequal, "i.", "Fnc")
+BUILTIN(__builtin_isless        , "i.", "Fnc")
+BUILTIN(__builtin_islessequal   , "i.", "Fnc")
+BUILTIN(__builtin_islessgreater , "i.", "Fnc")
+BUILTIN(__builtin_isunordered   , "i.", "Fnc")
 
 // Unary FP classification
-BUILTIN(__builtin_fpclassify, "iiiii.", "nc")
-BUILTIN(__builtin_isfinite,   "i.", "nc")
-BUILTIN(__builtin_isinf,      "i.", "nc")
-BUILTIN(__builtin_isinf_sign, "i.", "nc")
-BUILTIN(__builtin_isnan,      "i.", "nc")
-BUILTIN(__builtin_isnormal,   "i.", "nc")
+BUILTIN(__builtin_fpclassify, "iiiii.", "Fnc")
+BUILTIN(__builtin_isfinite,   "i.", "Fnc")
+BUILTIN(__builtin_isinf,      "i.", "Fnc")
+BUILTIN(__builtin_isinf_sign, "i.", "Fnc")
+BUILTIN(__builtin_isnan,      "i.", "Fnc")
+BUILTIN(__builtin_isnormal,   "i.", "Fnc")
 
 // FP signbit builtins
-BUILTIN(__builtin_signbit, "id", "nc")
-BUILTIN(__builtin_signbitf, "if", "nc")
-BUILTIN(__builtin_signbitl, "iLd", "nc")
+BUILTIN(__builtin_signbit, "i.", "Fnc")
+BUILTIN(__builtin_signbitf, "if", "Fnc")
+BUILTIN(__builtin_signbitl, "iLd", "Fnc")
 
 // Builtins for arithmetic.
 BUILTIN(__builtin_clzs , "iUs"  , "nc")
@@ -388,9 +388,9 @@ BUILTIN(__builtin_ctz  , "iUi"  , "nc")
 BUILTIN(__builtin_ctzl , "iULi" , "nc")
 BUILTIN(__builtin_ctzll, "iULLi", "nc")
 // TODO: int ctzimax(uintmax_t)
-BUILTIN(__builtin_ffs  , "ii"  , "nc")
-BUILTIN(__builtin_ffsl , "iLi" , "nc")
-BUILTIN(__builtin_ffsll, "iLLi", "nc")
+BUILTIN(__builtin_ffs  , "ii"  , "Fnc")
+BUILTIN(__builtin_ffsl , "iLi" , "Fnc")
+BUILTIN(__builtin_ffsll, "iLLi", "Fnc")
 BUILTIN(__builtin_parity  , "iUi"  , "nc")
 BUILTIN(__builtin_parityl , "iULi" , "nc")
 BUILTIN(__builtin_parityll, "iULLi", "nc")
@@ -414,7 +414,7 @@ BUILTIN(__builtin_va_end, "vA", "n")
 BUILTIN(__builtin_va_copy, "vAA", "n")
 BUILTIN(__builtin_stdarg_start, "vA.", "n")
 BUILTIN(__builtin_assume_aligned, "v*vC*z.", "nc")
-BUILTIN(__builtin_bcmp, "iv*v*z", "n")
+BUILTIN(__builtin_bcmp, "iv*v*z", "Fn")
 BUILTIN(__builtin_bcopy, "vv*v*z", "n")
 BUILTIN(__builtin_bzero, "vv*z", "nF")
 BUILTIN(__builtin_fprintf, "iP*cC*.", "Fp:1:")
@@ -489,6 +489,7 @@ BUILTIN(__builtin___printf_chk, "iicC*.", "Fp:1:")
 BUILTIN(__builtin___vfprintf_chk, "iP*icC*a", "FP:2:")
 BUILTIN(__builtin___vprintf_chk, "iicC*a", "FP:1:")
 
+BUILTIN(__builtin_unpredictable, "LiLi"   , "nc")
 BUILTIN(__builtin_expect, "LiLiLi"   , "nc")
 BUILTIN(__builtin_prefetch, "vvC*.", "nc")
 BUILTIN(__builtin_readcyclecounter, "ULLi", "n")
@@ -497,7 +498,7 @@ BUILTIN(__builtin_debugtrap, "v", "n")
 BUILTIN(__builtin_unreachable, "v", "nr")
 BUILTIN(__builtin_shufflevector, "v."   , "nc")
 BUILTIN(__builtin_convertvector, "v."   , "nct")
-BUILTIN(__builtin_alloca, "v*z"   , "n")
+BUILTIN(__builtin_alloca, "v*z"   , "Fn")
 BUILTIN(__builtin_call_with_static_chain, "v.", "nt")
 
 // "Overloaded" Atomic operator builtins.  These are overloaded to support data
@@ -1216,6 +1217,9 @@ BUILTIN(__builtin_subcl, "ULiULiCULiCULiCULi*", "n")
 BUILTIN(__builtin_subcll, "ULLiULLiCULLiCULLiCULLi*", "n")
 
 // Checked Arithmetic Builtins for Security.
+BUILTIN(__builtin_add_overflow, "v.", "nt")
+BUILTIN(__builtin_sub_overflow, "v.", "nt")
+BUILTIN(__builtin_mul_overflow, "v.", "nt")
 BUILTIN(__builtin_uadd_overflow, "bUiCUiCUi*", "n")
 BUILTIN(__builtin_uaddl_overflow, "bULiCULiCULi*", "n")
 BUILTIN(__builtin_uaddll_overflow, "bULLiCULLiCULLi*", "n")
@@ -1244,6 +1248,10 @@ BUILTIN(__builtin_operator_delete, "vv*", "n")
 BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
 BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
 
+// Nontemporal loads/stores builtins
+BUILTIN(__builtin_nontemporal_store, "v.", "t")
+BUILTIN(__builtin_nontemporal_load, "v.", "t")
+
 #undef BUILTIN
 #undef LIBBUILTIN
 #undef LANGBUILTIN
diff --git a/include/clang/Basic/Builtins.h b/include/clang/Basic/Builtins.h
index 27428ad81cb8..41f19e7cbfe0 100644
--- a/include/clang/Basic/Builtins.h
+++ b/include/clang/Basic/Builtins.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_CLANG_BASIC_BUILTINS_H
 #define LLVM_CLANG_BASIC_BUILTINS_H
 
-#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/ArrayRef.h"
 #include <cstring>
 
 // VC++ defines 'alloca' as an object-like macro, which interferes with our
@@ -24,22 +24,22 @@
 #undef alloca
 
 namespace clang {
-  class TargetInfo;
-  class IdentifierTable;
-  class ASTContext;
-  class QualType;
-  class LangOptions;
+class TargetInfo;
+class IdentifierTable;
+class ASTContext;
+class QualType;
+class LangOptions;
 
-  enum LanguageID {
-    GNU_LANG = 0x1,  // builtin requires GNU mode.
-    C_LANG = 0x2,    // builtin for c only.
-    CXX_LANG = 0x4,  // builtin for cplusplus only.
-    OBJC_LANG = 0x8, // builtin for objective-c and objective-c++
-    MS_LANG = 0x10,  // builtin requires MS mode.
-    ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages.
-    ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG,  // builtin requires GNU mode.
-    ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG     // builtin requires MS mode.
-  };
+enum LanguageID {
+  GNU_LANG = 0x1,  // builtin requires GNU mode.
+  C_LANG = 0x2,    // builtin for c only.
+  CXX_LANG = 0x4,  // builtin for cplusplus only.
+  OBJC_LANG = 0x8, // builtin for objective-c and objective-c++
+  MS_LANG = 0x10,  // builtin requires MS mode.
+  ALL_LANGUAGES = C_LANG | CXX_LANG | OBJC_LANG, // builtin for all languages.
+  ALL_GNU_LANGUAGES = ALL_LANGUAGES | GNU_LANG,  // builtin requires GNU mode.
+  ALL_MS_LANGUAGES = ALL_LANGUAGES | MS_LANG     // builtin requires MS mode.
+};
 
 namespace Builtin {
 enum ID {
@@ -51,112 +51,114 @@ enum ID {
 
 struct Info {
   const char *Name, *Type, *Attributes, *HeaderName;
-  LanguageID builtin_lang;
-
-  bool operator==(const Info &RHS) const {
-    return !strcmp(Name, RHS.Name) &&
-           !strcmp(Type, RHS.Type) &&
-           !strcmp(Attributes, RHS.Attributes);
-  }
-  bool operator!=(const Info &RHS) const { return !(*this == RHS); }
+  LanguageID Langs;
+  const char *Features;
 };
 
 /// \brief Holds information about both target-independent and
 /// target-specific builtins, allowing easy queries by clients.
+///
+/// Builtins from an optional auxiliary target are stored in
+/// AuxTSRecords. Their IDs are shifted up by TSRecords.size() and need to
+/// be translated back with getAuxBuiltinID() before use.
 class Context {
-  const Info *TSRecords;
-  unsigned NumTSRecords;
+  llvm::ArrayRef<Info> TSRecords;
+  llvm::ArrayRef<Info> AuxTSRecords;
+
 public:
-  Context();
+  Context() {}
 
   /// \brief Perform target-specific initialization
-  void InitializeTarget(const TargetInfo &Target);
-  
+  /// \param AuxTarget Target info to incorporate builtins from. May be nullptr.
+  void InitializeTarget(const TargetInfo &Target, const TargetInfo *AuxTarget);
+
   /// \brief Mark the identifiers for all the builtins with their
   /// appropriate builtin ID # and mark any non-portable builtin identifiers as
   /// such.
-  void InitializeBuiltins(IdentifierTable &Table, const LangOptions& LangOpts);
-
-  /// \brief Populate the vector with the names of all of the builtins.
-  void GetBuiltinNames(SmallVectorImpl<const char *> &Names);
+  void initializeBuiltins(IdentifierTable &Table, const LangOptions& LangOpts);
 
   /// \brief Return the identifier name for the specified builtin,
   /// e.g. "__builtin_abs".
-  const char *GetName(unsigned ID) const {
-    return GetRecord(ID).Name;
+  const char *getName(unsigned ID) const {
+    return getRecord(ID).Name;
   }
 
   /// \brief Get the type descriptor string for the specified builtin.
-  const char *GetTypeString(unsigned ID) const {
-    return GetRecord(ID).Type;
+  const char *getTypeString(unsigned ID) const {
+    return getRecord(ID).Type;
+  }
+
+  /// \brief Return true if this function is a target-specific builtin
+  bool isTSBuiltin(unsigned ID) const {
+    return ID >= Builtin::FirstTSBuiltin;
   }
 
   /// \brief Return true if this function has no side effects and doesn't
   /// read memory.
   bool isConst(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'c') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'c') != nullptr;
   }
 
   /// \brief Return true if we know this builtin never throws an exception.
   bool isNoThrow(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'n') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'n') != nullptr;
   }
 
   /// \brief Return true if we know this builtin never returns.
   bool isNoReturn(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'r') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'r') != nullptr;
   }
 
   /// \brief Return true if we know this builtin can return twice.
   bool isReturnsTwice(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'j') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'j') != nullptr;
   }
 
   /// \brief Returns true if this builtin does not perform the side-effects
   /// of its arguments.
   bool isUnevaluated(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'u') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'u') != nullptr;
   }
 
   /// \brief Return true if this is a builtin for a libc/libm function,
   /// with a "__builtin_" prefix (e.g. __builtin_abs).
   bool isLibFunction(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'F') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'F') != nullptr;
   }
 
   /// \brief Determines whether this builtin is a predefined libc/libm
   /// function, such as "malloc", where we know the signature a
   /// priori.
   bool isPredefinedLibFunction(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'f') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'f') != nullptr;
   }
 
   /// \brief Determines whether this builtin is a predefined compiler-rt/libgcc
   /// function, such as "__clear_cache", where we know the signature a
   /// priori.
   bool isPredefinedRuntimeFunction(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'i') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'i') != nullptr;
   }
 
   /// \brief Determines whether this builtin has custom typechecking.
   bool hasCustomTypechecking(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 't') != nullptr;
+    return strchr(getRecord(ID).Attributes, 't') != nullptr;
   }
 
   /// \brief Determines whether this builtin has a result or any arguments which
   /// are pointer types.
   bool hasPtrArgsOrResult(unsigned ID) const {
-    return strchr(GetRecord(ID).Type, '*') != nullptr;
+    return strchr(getRecord(ID).Type, '*') != nullptr;
   }
 
   /// \brief Completely forget that the given ID was ever considered a builtin,
   /// e.g., because the user provided a conflicting signature.
-  void ForgetBuiltin(unsigned ID, IdentifierTable &Table);
+  void forgetBuiltin(unsigned ID, IdentifierTable &Table);
   
   /// \brief If this is a library function that comes from a specific
   /// header, retrieve that header name.
   const char *getHeaderName(unsigned ID) const {
-    return GetRecord(ID).HeaderName;
+    return getRecord(ID).HeaderName;
   }
 
   /// \brief Determine whether this builtin is like printf in its
@@ -174,14 +176,27 @@ public:
   ///
   /// Such functions can be const when the MathErrno lang option is disabled.
   bool isConstWithoutErrno(unsigned ID) const {
-    return strchr(GetRecord(ID).Attributes, 'e') != nullptr;
+    return strchr(getRecord(ID).Attributes, 'e') != nullptr;
   }
 
+  const char *getRequiredFeatures(unsigned ID) const {
+    return getRecord(ID).Features;
+  }
+
+  /// \brief Return true if builtin ID belongs to AuxTarget.
+  bool isAuxBuiltinID(unsigned ID) const {
+    return ID >= (Builtin::FirstTSBuiltin + TSRecords.size());
+  }
+
+  /// Return real buitin ID (i.e. ID it would have furing compilation
+  /// for AuxTarget).
+  unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSRecords.size(); }
+
 private:
-  const Info &GetRecord(unsigned ID) const;
+  const Info &getRecord(unsigned ID) const;
 
   /// \brief Is this builtin supported according to the given language options?
-  bool BuiltinIsSupported(const Builtin::Info &BuiltinInfo,
+  bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
                           const LangOptions &LangOpts);
 
   /// \brief Helper function for isPrintfLike and isScanfLike.
@@ -190,5 +205,12 @@ private:
 };
 
 }
+
+/// \brief Kinds of BuiltinTemplateDecl.
+enum BuiltinTemplateKind : int {
+  /// \brief This names the __make_integer_seq BuiltinTemplateDecl.
+  BTK__make_integer_seq
+};
+
 } // end namespace clang
 #endif
diff --git a/include/clang/Basic/BuiltinsAArch64.def b/include/clang/Basic/BuiltinsAArch64.def
index 1db4c1471029..b4404434e7a8 100644
--- a/include/clang/Basic/BuiltinsAArch64.def
+++ b/include/clang/Basic/BuiltinsAArch64.def
@@ -60,5 +60,6 @@ BUILTIN(__builtin_arm_rsrp, "v*cC*", "nc")
 BUILTIN(__builtin_arm_wsr, "vcC*Ui", "nc")
 BUILTIN(__builtin_arm_wsr64, "vcC*LUi", "nc")
 BUILTIN(__builtin_arm_wsrp, "vcC*vC*", "nc")
+BUILTIN(__builtin_thread_pointer, "v*", "nc")
 
 #undef BUILTIN
diff --git a/include/clang/Basic/BuiltinsARM.def b/include/clang/Basic/BuiltinsARM.def
index c9cdb4b675bc..3e8e2bf912ad 100644
--- a/include/clang/Basic/BuiltinsARM.def
+++ b/include/clang/Basic/BuiltinsARM.def
@@ -48,14 +48,14 @@ BUILTIN(__builtin_arm_vcvtr_f, "ffi", "nc")
 BUILTIN(__builtin_arm_vcvtr_d, "fdi", "nc")
 
 // Coprocessor
-BUILTIN(__builtin_arm_mcr, "vUiUiUiUiUiUi", "")
-BUILTIN(__builtin_arm_mcr2, "vUiUiUiUiUiUi", "")
-BUILTIN(__builtin_arm_mrc, "UiUiUiUiUiUi", "")
-BUILTIN(__builtin_arm_mrc2, "UiUiUiUiUiUi", "")
+BUILTIN(__builtin_arm_mcr, "vUIiUIiUiUIiUIiUIi", "")
+BUILTIN(__builtin_arm_mcr2, "vUIiUIiUiUIiUIiUIi", "")
+BUILTIN(__builtin_arm_mrc, "UiUIiUIiUIiUIiUIi", "")
+BUILTIN(__builtin_arm_mrc2, "UiUIiUIiUIiUIiUIi", "")
 BUILTIN(__builtin_arm_cdp, "vUiUiUiUiUiUi", "")
 BUILTIN(__builtin_arm_cdp2, "vUiUiUiUiUiUi", "")
-BUILTIN(__builtin_arm_mcrr, "vUiUiUiUiUi", "")
-BUILTIN(__builtin_arm_mcrr2, "vUiUiUiUiUi", "")
+BUILTIN(__builtin_arm_mcrr, "vUIiUIiUiUiUIi", "")
+BUILTIN(__builtin_arm_mcrr2, "vUIiUIiUiUiUIi", "")
 
 // CRC32
 BUILTIN(__builtin_arm_crc32b, "UiUiUc", "nc")
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def
index 970f55fe223c..3ab6413bb0ec 100644
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -50,7 +50,7 @@ BUILTIN(__builtin_ptx_read_lanemask_ge, "i", "nc")
 BUILTIN(__builtin_ptx_read_lanemask_gt, "i", "nc")
 
 BUILTIN(__builtin_ptx_read_clock, "i", "n")
-BUILTIN(__builtin_ptx_read_clock64, "Li", "n")
+BUILTIN(__builtin_ptx_read_clock64, "LLi", "n")
 
 BUILTIN(__builtin_ptx_read_pm0, "i", "n")
 BUILTIN(__builtin_ptx_read_pm1, "i", "n")
@@ -453,6 +453,9 @@ BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n")
 BUILTIN(__nvvm_atom_add_g_f, "ffD*1f", "n")
 BUILTIN(__nvvm_atom_add_s_f, "ffD*3f", "n")
 BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n")
+BUILTIN(__nvvm_atom_add_g_d, "ddD*1d", "n")
+BUILTIN(__nvvm_atom_add_s_d, "ddD*3d", "n")
+BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n")
 
 BUILTIN(__nvvm_atom_sub_g_i, "iiD*1i", "n")
 BUILTIN(__nvvm_atom_sub_s_i, "iiD*3i", "n")
diff --git a/include/clang/Basic/BuiltinsPPC.def b/include/clang/Basic/BuiltinsPPC.def
index fdf1cb0b5a40..5681c1f2ae1a 100644
--- a/include/clang/Basic/BuiltinsPPC.def
+++ b/include/clang/Basic/BuiltinsPPC.def
@@ -17,6 +17,8 @@
 
 // The format of this database matches clang/Basic/Builtins.def.
 
+BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n")
+
 // This is just a placeholder, the types and attributes are wrong.
 BUILTIN(__builtin_altivec_vaddcuw, "V4UiV4UiV4Ui", "")
 
@@ -278,12 +280,21 @@ BUILTIN(__builtin_vsx_xvrspip, "V4fV4f", "")
 BUILTIN(__builtin_vsx_xvcmpeqdp, "V2ULLiV2dV2d", "")
 BUILTIN(__builtin_vsx_xvcmpeqsp, "V4UiV4fV4f", "")
 
+BUILTIN(__builtin_vsx_xvcmpeqdp_p, "iiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpeqsp_p, "iiV4fV4f", "")
+
 BUILTIN(__builtin_vsx_xvcmpgedp, "V2ULLiV2dV2d", "")
 BUILTIN(__builtin_vsx_xvcmpgesp, "V4UiV4fV4f", "")
 
+BUILTIN(__builtin_vsx_xvcmpgedp_p, "iiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpgesp_p, "iiV4fV4f", "")
+
 BUILTIN(__builtin_vsx_xvcmpgtdp, "V2ULLiV2dV2d", "")
 BUILTIN(__builtin_vsx_xvcmpgtsp, "V4UiV4fV4f", "")
 
+BUILTIN(__builtin_vsx_xvcmpgtdp_p, "iiV2dV2d", "")
+BUILTIN(__builtin_vsx_xvcmpgtsp_p, "iiV4fV4f", "")
+
 BUILTIN(__builtin_vsx_xvrdpim, "V2dV2d", "")
 BUILTIN(__builtin_vsx_xvrspim, "V4fV4f", "")
 
diff --git a/include/clang/Basic/BuiltinsWebAssembly.def b/include/clang/Basic/BuiltinsWebAssembly.def
new file mode 100644
index 000000000000..975433523ada
--- /dev/null
+++ b/include/clang/Basic/BuiltinsWebAssembly.def
@@ -0,0 +1,24 @@
+// BuiltinsWebAssembly.def - WebAssembly builtin function database -*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines the WebAssembly-specific builtin function database.
+/// Users of this file must define the BUILTIN macro to make use of this
+/// information.
+///
+//===----------------------------------------------------------------------===//
+
+// The format of this database matches clang/Basic/Builtins.def.
+
+// Note that memory_size is not "c" (readnone) because it must be sequenced with
+// respect to grow_memory calls.
+BUILTIN(__builtin_wasm_memory_size, "z", "n")
+BUILTIN(__builtin_wasm_grow_memory, "vz", "n")
+
+#undef BUILTIN
diff --git a/include/clang/Basic/BuiltinsX86.def b/include/clang/Basic/BuiltinsX86.def
index 1cd8973cbd6d..91111f6cbf09 100644
--- a/include/clang/Basic/BuiltinsX86.def
+++ b/include/clang/Basic/BuiltinsX86.def
@@ -14,14 +14,15 @@
 
 // The format of this database matches clang/Basic/Builtins.def.
 
-// FIXME: In GCC, these builtins are defined depending on whether support for
-// MMX/SSE/etc is turned on. We should do this too.
-
 // FIXME: Ideally we would be able to pull this information from what
 // LLVM already knows about X86 builtins. We need to match the LLVM
 // definition anyway, since code generation will lower to the
 // intrinsic if one exists.
 
+#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
+#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
+#endif
+
 // FIXME: Are these nothrow/const?
 
 // Miscellaneous builtin for checking x86 cpu features.
@@ -29,35 +30,46 @@
 // can use it?
 BUILTIN(__builtin_cpu_supports, "bcC*", "nc")
 
+// Win64-compatible va_list functions
+BUILTIN(__builtin_ms_va_start, "vc*&.", "nt")
+BUILTIN(__builtin_ms_va_end, "vc*&", "n")
+BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n")
+
+// Undefined Values
+//
+TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "nc", "")
+
 // 3DNow!
 //
-BUILTIN(__builtin_ia32_femms, "v", "")
-BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "nc")
-BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "nc")
-BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "nc")
-BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "nc")
+TARGET_BUILTIN(__builtin_ia32_femms, "v", "", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "nc", "3dnow")
 // 3DNow! Extensions (3dnowa).
-BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "nc")
-BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "nc")
-BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "nc")
-BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "nc")
-BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc")
+TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc", "3dnowa")
 
 // MMX
 //
@@ -67,1155 +79,1489 @@ BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc")
 // FIXME: _mm_prefetch must be a built-in because it takes a compile-time constant
 // argument and our prior approach of using a #define to the current built-in
 // doesn't work in the presence of re-declaration of _mm_prefetch for windows.
-BUILTIN(_mm_prefetch, "vcC*i", "nc")
-BUILTIN(__builtin_ia32_emms, "v", "")
-BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "")
-BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "")
-BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "")
-BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "")
-BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "")
-BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "")
-BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "")
-BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "")
-BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "")
-BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "")
-BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "")
-BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "")
-BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "")
-BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "")
-BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "")
-BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "")
-BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "")
-BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "")
-BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "")
-BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "")
-BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "")
-BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "")
-BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "")
-BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "")
+TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx")
+TARGET_BUILTIN(__builtin_ia32_emms, "v", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "", "mmx")
 
 // MMX2 (MMX+SSE) intrinsics
-BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "")
-BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "")
-BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "")
-BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "")
-BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "")
-BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "", "sse")
 
 // MMX+SSE2
-BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "")
-BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "")
-BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "")
-BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "")
-BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "")
-BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "", "sse2")
 
 // MMX+SSSE3
-BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "")
-BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "")
-BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "")
-BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "")
-BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "")
-BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "")
-BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "")
-BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "")
+TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "", "ssse3")
 
 // SSE intrinsics.
-BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comile, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comige, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "")
-BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "")
-BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "")
-BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "")
-BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "")
-BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "")
-BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "")
-BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "")
-BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "")
-BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "")
-BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "")
-BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "")
-BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "")
-BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "")
-BUILTIN(__builtin_ia32_stmxcsr, "Ui", "")
-BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "")
-BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "")
-BUILTIN(__builtin_ia32_storeups, "vf*V4f", "")
-BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "")
-BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "")
-BUILTIN(__builtin_ia32_movmskps, "iV4f", "")
-BUILTIN(__builtin_ia32_movntps, "vf*V4f", "")
-BUILTIN(__builtin_ia32_sfence, "v", "")
-BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "")
-BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "")
-BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "")
-BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "")
-BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "")
-BUILTIN(__builtin_ia32_movmskpd, "iV2d", "")
-BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "")
-BUILTIN(__builtin_ia32_movnti, "vi*i", "")
-BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "")
-BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "")
-BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "")
-BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "")
-BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "")
-BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "")
-BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "")
-BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "")
-BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "")
-BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "")
-BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "")
-BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "")
-BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "")
-BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "")
-BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "")
-BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "")
-BUILTIN(__builtin_ia32_clflush, "vvC*", "")
-BUILTIN(__builtin_ia32_lfence, "v", "")
-BUILTIN(__builtin_ia32_mfence, "v", "")
-BUILTIN(__builtin_ia32_storedqu, "vc*V16c", "")
-BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "")
-BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "")
-BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "")
-BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "")
-BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "")
-BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "")
-BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "")
-BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "")
-BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "")
-BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "")
-BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "")
-BUILTIN(__builtin_ia32_mwait, "vUiUi", "")
-BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "")
-BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "")
-BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "")
+TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "", "sse")
 
-BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "")
-BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "")
+TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "", "sse2")
 
-BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "")
-BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_pmovsxbd128, "V4iV16c", "")
-BUILTIN(__builtin_ia32_pmovsxbq128, "V2LLiV16c", "")
-BUILTIN(__builtin_ia32_pmovsxbw128, "V8sV16c", "")
-BUILTIN(__builtin_ia32_pmovsxdq128, "V2LLiV4i", "")
-BUILTIN(__builtin_ia32_pmovsxwd128, "V4iV8s", "")
-BUILTIN(__builtin_ia32_pmovsxwq128, "V2LLiV8s", "")
-BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "")
-BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "")
-BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "")
-BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "")
-BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "")
-BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "")
-BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "")
-BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_roundps, "V4fV4fi", "")
-BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fi", "")
-BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2di", "")
-BUILTIN(__builtin_ia32_roundpd, "V2dV2di", "")
-BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "")
-BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "")
-BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLi*", "")
-BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "")
-BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "")
+TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "", "sse")
+
+TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "", "sse2")
+
+TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "", "ssse3")
+
+TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_storeups, "vf*V4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_movntps, "vf*V4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_sfence, "v", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "", "sse")
+TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "", "sse")
+
+TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_storeupd, "vd*V2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movntpd, "vd*V2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movntdq, "vV2LLi*V2LLi", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2pd, "V2dV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps, "V4fV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd, "V2dV4f", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pause, "v", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_storedqu, "vc*V16c", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "", "sse2")
+
+TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "", "sse3")
+TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "", "sse3")
+
+TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIc", "", "ssse3")
+
+TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "", "sse4.1")
+
+TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbd128, "V4iV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbq128, "V2LLiV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbw128, "V8sV16c", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxdq128, "V2LLiV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxwd128, "V4iV8s", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmovzxwq128, "V2LLiV8s", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmulld128, "V4iV4iV4i", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_movntdqa, "V2LLiV2LLiC*", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "", "sse4.1")
 
 // SSE 4.2
-BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "")
-BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "")
-BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "")
-BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","")
+TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","", "sse4.2")
 
-BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","")
-BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","")
-BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","")
-BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","")
-BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","")
-BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","")
-BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","")
-BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","")
-BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","")
-BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","")
+TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","", "sse4.2")
 
-BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "")
-BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "")
-BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "")
-BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "")
+TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "", "sse4.2")
 
 // SSE4a
-BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "")
-BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "")
-BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "")
-BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "")
-BUILTIN(__builtin_ia32_movntss, "vf*V4f", "")
+TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "", "sse4a")
 
 // AES
-BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "")
+TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes")
+TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes")
 
 // CLMUL
-BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul")
 
 // AVX
-BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "")
-BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "")
-BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "")
-BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "")
-BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "")
-BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "")
-BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "")
-BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "")
-BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "")
-BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "")
-BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "")
-BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "")
-BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "")
-BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "")
-BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "")
-BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "")
-BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "")
-BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "")
-BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "")
-BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "")
-BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "")
-BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "")
-BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "")
-BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "")
-BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "")
-BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "")
-BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "")
-BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "")
-BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "")
-BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "")
-BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "")
-BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "")
-BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "")
-BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "")
-BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "")
-BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "")
-BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "")
-BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "")
-BUILTIN(__builtin_ia32_movmskps256, "iV8f", "")
-BUILTIN(__builtin_ia32_vzeroall, "v", "")
-BUILTIN(__builtin_ia32_vzeroupper, "v", "")
-BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "")
-BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "")
-BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "")
-BUILTIN(__builtin_ia32_storeups256, "vf*V8f", "")
-BUILTIN(__builtin_ia32_storedqu256, "vc*V32c", "")
-BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "")
-BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "")
-BUILTIN(__builtin_ia32_movntpd256, "vd*V4d", "")
-BUILTIN(__builtin_ia32_movntps256, "vf*V8f", "")
-BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2d", "")
-BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4f", "")
-BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4d", "")
-BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8f", "")
-BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2dV2d", "")
-BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4fV4f", "")
-BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4dV4d", "")
-BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8fV8f", "")
+TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_storeupd256, "vd*V4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_storeups256, "vf*V8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_storedqu256, "vc*V32c", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_movntdq256, "vV4LLi*V4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_movntpd256, "vd*V4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_movntps256, "vf*V8f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "", "avx")
 
 // AVX2
-BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "")
-BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "")
-BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "")
-BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "")
-BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "")
-BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "")
-BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "")
-BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "")
-BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIc", "")
-BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "")
-BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "")
-BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "")
-BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "")
-BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "")
-BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "")
-BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "")
-BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "")
-BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "")
-BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "")
-BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "")
-BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "")
-BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "")
-BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "")
-BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "")
-BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "")
-BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "")
-BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "")
-BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "")
-BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "")
-BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "")
-BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_pslldqi256, "V4LLiV4LLiIi", "")
-BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "")
-BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "")
-BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "")
-BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "")
-BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "")
-BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "")
-BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "")
-BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "")
-BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "")
-BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "")
-BUILTIN(__builtin_ia32_psrldqi256, "V4LLiV4LLiIi", "")
-BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "")
-BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "")
-BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "")
-BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "")
-BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "")
-BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "")
-BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLi*", "")
-BUILTIN(__builtin_ia32_vbroadcastss_ps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_vbroadcastss_ps256, "V8fV4f", "")
-BUILTIN(__builtin_ia32_vbroadcastsd_pd256, "V4dV2d", "")
-BUILTIN(__builtin_ia32_pbroadcastb256, "V32cV16c", "")
-BUILTIN(__builtin_ia32_pbroadcastw256, "V16sV8s", "")
-BUILTIN(__builtin_ia32_pbroadcastd256, "V8iV4i", "")
-BUILTIN(__builtin_ia32_pbroadcastq256, "V4LLiV2LLi", "")
-BUILTIN(__builtin_ia32_pbroadcastb128, "V16cV16c", "")
-BUILTIN(__builtin_ia32_pbroadcastw128, "V8sV8s", "")
-BUILTIN(__builtin_ia32_pbroadcastd128, "V4iV4i", "")
-BUILTIN(__builtin_ia32_pbroadcastq128, "V2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8f", "")
-BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "")
-BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "")
-BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "")
-BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "")
-BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "")
-BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "")
-BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "")
-BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "")
-BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "")
+TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxbw256, "V16sV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxbd256, "V8iV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxbq256, "V4LLiV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxwd256, "V8iV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxwq256, "V4LLiV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovsxdq256, "V4LLiV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbw256, "V16sV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbd256, "V8iV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxbq256, "V4LLiV16c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxwd256, "V8iV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxwq256, "V4LLiV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovzxdq256, "V4LLiV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi256, "V4LLiV4LLiIi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi256, "V4LLiV4LLiIi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_movntdqa256, "V4LLiV4LLiC*", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "", "avx2")
 
 // GATHER
-BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2dV2dC*V4iV2dIc", "")
-BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4dV4dC*V4iV4dIc", "")
-BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2dV2dC*V2LLiV2dIc", "")
-BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4dV4dC*V4LLiV4dIc", "")
-BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4fV4fC*V4iV4fIc", "")
-BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8fV8fC*V8iV8fIc", "")
-BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4fV4fC*V2LLiV4fIc", "")
-BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4fV4fC*V4LLiV4fIc", "")
+TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "", "avx2")
 
-BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiV2LLiC*V4iV2LLiIc", "")
-BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiV4LLiC*V4iV4LLiIc", "")
-BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiV2LLiC*V2LLiV2LLiIc", "")
-BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiV4LLiC*V4LLiV4LLiIc", "")
-BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iV4iC*V4iV4iIc", "")
-BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iV8iC*V8iV8iIc", "")
-BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iV4iC*V2LLiV4iIc", "")
-BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iV4iC*V4LLiV4iIc", "")
+TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "", "avx2")
 
 // F16C
-BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "")
-BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "")
-BUILTIN(__builtin_ia32_vcvtps2ph512, "V16sV16fIi", "")
-BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "")
-BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "")
-BUILTIN(__builtin_ia32_vcvtph2ps512, "V16fV16s", "")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512, "V16sV16fIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512, "V16fV16s", "", "avx512f")
 
 // RDRAND
-BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "")
-BUILTIN(__builtin_ia32_rdrand32_step, "UiUi*", "")
-BUILTIN(__builtin_ia32_rdrand64_step, "UiULLi*", "")
+TARGET_BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "", "rdrnd")
+TARGET_BUILTIN(__builtin_ia32_rdrand32_step, "UiUi*", "", "rdrnd")
+TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiULLi*", "", "rdrnd")
 
 // FSGSBASE
-BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "")
-BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "")
-BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "")
-BUILTIN(__builtin_ia32_rdgsbase64, "ULLi", "")
-BUILTIN(__builtin_ia32_wrfsbase32, "vUi", "")
-BUILTIN(__builtin_ia32_wrfsbase64, "vULLi", "")
-BUILTIN(__builtin_ia32_wrgsbase32, "vUi", "")
-BUILTIN(__builtin_ia32_wrgsbase64, "vULLi", "")
+TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_rdgsbase64, "ULLi", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrfsbase32, "vUi", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrfsbase64, "vULLi", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrgsbase32, "vUi", "", "fsgsbase")
+TARGET_BUILTIN(__builtin_ia32_wrgsbase64, "vULLi", "", "fsgsbase")
 
 // FXSR
-BUILTIN(__builtin_ia32_fxrstor, "vv*", "")
-BUILTIN(__builtin_ia32_fxrstor64, "vv*", "")
-BUILTIN(__builtin_ia32_fxsave, "vv*", "")
-BUILTIN(__builtin_ia32_fxsave64, "vv*", "")
+TARGET_BUILTIN(__builtin_ia32_fxrstor, "vv*", "", "fxsr")
+TARGET_BUILTIN(__builtin_ia32_fxrstor64, "vv*", "", "fxsr")
+TARGET_BUILTIN(__builtin_ia32_fxsave, "vv*", "", "fxsr")
+TARGET_BUILTIN(__builtin_ia32_fxsave64, "vv*", "", "fxsr")
+
+// XSAVE
+TARGET_BUILTIN(__builtin_ia32_xsave, "vv*ULLi", "", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xsave64, "vv*ULLi", "", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xrstor, "vv*ULLi", "", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xrstor64, "vv*ULLi", "", "xsave")
+TARGET_BUILTIN(__builtin_ia32_xsaveopt, "vv*ULLi", "", "xsaveopt")
+TARGET_BUILTIN(__builtin_ia32_xsaveopt64, "vv*ULLi", "", "xsaveopt")
+TARGET_BUILTIN(__builtin_ia32_xrstors, "vv*ULLi", "", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xrstors64, "vv*ULLi", "", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xsavec, "vv*ULLi", "", "xsavec")
+TARGET_BUILTIN(__builtin_ia32_xsavec64, "vv*ULLi", "", "xsavec")
+TARGET_BUILTIN(__builtin_ia32_xsaves, "vv*ULLi", "", "xsaves")
+TARGET_BUILTIN(__builtin_ia32_xsaves64, "vv*ULLi", "", "xsaves")
 
 // ADX
-BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "")
-BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "")
-BUILTIN(__builtin_ia32_addcarry_u32, "UcUcUiUiUi*", "")
-BUILTIN(__builtin_ia32_addcarry_u64, "UcUcULLiULLiULLi*", "")
-BUILTIN(__builtin_ia32_subborrow_u32, "UcUcUiUiUi*", "")
-BUILTIN(__builtin_ia32_subborrow_u64, "UcUcULLiULLiULLi*", "")
+TARGET_BUILTIN(__builtin_ia32_addcarryx_u32, "UcUcUiUiUi*", "", "adx")
+TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcULLiULLiULLi*", "", "adx")
+TARGET_BUILTIN(__builtin_ia32_addcarry_u32, "UcUcUiUiUi*", "", "adx")
+TARGET_BUILTIN(__builtin_ia32_addcarry_u64, "UcUcULLiULLiULLi*", "", "adx")
+TARGET_BUILTIN(__builtin_ia32_subborrow_u32, "UcUcUiUiUi*", "", "adx")
+TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcULLiULLiULLi*", "", "adx")
 
 // RDSEED
-BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "")
-BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "")
-BUILTIN(__builtin_ia32_rdseed64_step, "UiULLi*", "")
+TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "", "rdseed")
+TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "", "rdseed")
+TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiULLi*", "", "rdseed")
 
 // BMI
-BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "")
-BUILTIN(__builtin_ia32_bextr_u64, "ULLiULLiULLi", "")
+TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "", "bmi")
+TARGET_BUILTIN(__builtin_ia32_bextr_u64, "ULLiULLiULLi", "", "bmi")
 
 // BMI2
-BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "")
-BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "")
-BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "")
-BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "")
-BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "")
-BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "")
+TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_bzhi_di, "ULLiULLiULLi", "", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pdep_si, "UiUiUi", "", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pdep_di, "ULLiULLiULLi", "", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pext_si, "UiUiUi", "", "bmi2")
+TARGET_BUILTIN(__builtin_ia32_pext_di, "ULLiULLiULLi", "", "bmi2")
 
 // TBM
-BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "")
-BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "")
+TARGET_BUILTIN(__builtin_ia32_bextri_u32, "UiUiIUi", "", "tbm")
+TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "", "tbm")
 
 // SHA
-BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "")
-BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "")
-BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "")
+TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "", "sha")
 
 // FMA
-BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "")
-BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "")
-BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "")
-BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "")
-BUILTIN(__builtin_ia32_vfmaddpd128_mask,     "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd128_mask3,    "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd128_maskz,    "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd256_mask,     "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd256_mask3,    "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd256_maskz,    "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddpd512_mask,     "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddpd512_maskz,    "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddps128_mask,     "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps128_mask3,    "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps128_maskz,    "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps256_mask,     "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps256_mask3,    "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps256_maskz,    "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddps512_mask,     "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmaddps512_mask3,    "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmaddps512_maskz,    "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd128_mask,  "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd128_mask3, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd128_maskz, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd256_mask,  "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd256_mask3, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd256_maskz, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd512_mask,  "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubps128_mask,  "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps128_mask3, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps128_maskz, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps256_mask,  "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps256_mask3, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps256_maskz, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmaddsubps512_mask,  "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmsubpd128_mask3,    "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmsubpd256_mask3,    "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmsubpd512_mask3,    "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmsubps128_mask3,    "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmsubps256_mask3,    "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmsubps512_mask3,    "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfmsubaddpd128_mask3, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfmsubaddpd256_mask3, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfmsubaddps128_mask3, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfmsubaddps256_mask3, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfnmaddpd128_mask,    "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfnmaddpd256_mask,    "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfnmaddpd512_mask,    "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfnmaddps128_mask,    "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfnmaddps256_mask,    "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfnmaddps512_mask,    "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfnmsubpd128_mask,    "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfnmsubpd128_mask3,   "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_vfnmsubpd256_mask,    "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfnmsubpd256_mask3,   "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_vfnmsubpd512_mask,    "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfnmsubpd512_mask3,   "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_vfnmsubps128_mask,    "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfnmsubps128_mask3,   "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_vfnmsubps256_mask,    "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfnmsubps256_mask3,   "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_vfnmsubps512_mask,    "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_vfnmsubps512_mask3,   "V16fV16fV16fV16fUsIi", "")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubss, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubsd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps, "V4fV4fV4fV4f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd, "V2dV2dV2dV2d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256, "V8fV8fV8fV8f", "", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256, "V4dV4dV4dV4d", "", "fma|fma4")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd128_maskz, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_mask3, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_maskz, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps128_mask3, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps128_maskz, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_mask3, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_maskz, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd128_maskz, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_mask3, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_maskz, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps128_mask3, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps128_maskz, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_mask3, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_maskz, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd256_mask3, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubps128_mask3, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps256_mask3, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256_mask3, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps128_mask3, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256_mask3, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmaddpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmaddps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd128_mask3, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd256_mask3, "V4dV4dV4dV4dUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps128_mask3, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps256_mask3, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubps512_mask3, "V16fV16fV16fV16fUsIi", "", "avx512f")
 
 // XOP
-BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "")
-BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "")
-BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "")
-BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "")
-BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "")
-BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "")
-BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "")
-BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "")
-BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "")
-BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "")
-BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "")
-BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "")
+TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "", "xop")
+
+TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcmov, "V2LLiV2LLiV2LLiV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcmov_256, "V4LLiV4LLiV4LLiV4LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "", "xop")
+
+TARGET_BUILTIN(__builtin_ia32_xbegin, "i", "", "rtm")
+TARGET_BUILTIN(__builtin_ia32_xend, "v", "", "rtm")
+TARGET_BUILTIN(__builtin_ia32_xabort, "vIc", "", "rtm")
+TARGET_BUILTIN(__builtin_ia32_xtest, "i", "", "rtm")
 
-BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "")
-BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "")
-BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "")
-BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "")
-BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "")
-BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "")
-BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "")
-BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "")
-BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "")
-BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "")
-BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "")
-BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "")
-BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "")
-BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "")
-BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "")
-BUILTIN(__builtin_ia32_vpcmov, "V2LLiV2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_vpcmov_256, "V4LLiV4LLiV4LLiV4LLi", "")
-BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "")
-BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "")
-BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "")
-BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "")
-BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "")
-BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "")
-BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "")
-BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "")
-BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "")
-BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "")
-BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "")
-BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "")
-BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "")
-BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "")
-BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "")
-BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "")
-BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "")
-BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "")
-BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "")
-BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "")
-BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "")
-BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "")
-BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "")
-BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "")
-BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "")
-BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "")
-BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "")
-BUILTIN(__builtin_ia32_xbegin, "i", "")
-BUILTIN(__builtin_ia32_xend, "v", "")
-BUILTIN(__builtin_ia32_xabort, "vIc", "")
-BUILTIN(__builtin_ia32_xtest, "i", "")
 BUILTIN(__builtin_ia32_rdpmc, "ULLii", "")
 BUILTIN(__builtin_ia32_rdtsc, "ULLi", "")
 BUILTIN(__builtin_ia32_rdtscp, "ULLiUi*", "")
 
 // AVX-512
-BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_rsqrt28sd_mask, "V2dV2dV2dV2dUcIi", "")
-BUILTIN(__builtin_ia32_rsqrt28ss_mask, "V4fV4fV4fV4fUcIi", "")
-BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_rcp28sd_mask, "V2dV2dV2dV2dUcIi", "")
-BUILTIN(__builtin_ia32_rcp28ss_mask, "V4fV4fV4fV4fUcIi", "")
-BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "")
-BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "")
-BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "")
-BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "")
-BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "")
-BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "")
-BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "")
-BUILTIN(__builtin_ia32_pcmpeqb512_mask, "LLiV64cV64cLLi", "")
-BUILTIN(__builtin_ia32_pcmpeqd512_mask, "sV16iV16is", "")
-BUILTIN(__builtin_ia32_pcmpeqq512_mask, "cV8LLiV8LLic", "")
-BUILTIN(__builtin_ia32_pcmpeqw512_mask, "iV32sV32si", "")
-BUILTIN(__builtin_ia32_pcmpeqb256_mask, "iV32cV32ci", "")
-BUILTIN(__builtin_ia32_pcmpeqd256_mask, "cV8iV8ic", "")
-BUILTIN(__builtin_ia32_pcmpeqq256_mask, "cV4LLiV4LLic", "")
-BUILTIN(__builtin_ia32_pcmpeqw256_mask, "sV16sV16ss", "")
-BUILTIN(__builtin_ia32_pcmpeqb128_mask, "sV16cV16cs", "")
-BUILTIN(__builtin_ia32_pcmpeqd128_mask, "cV4iV4ic", "")
-BUILTIN(__builtin_ia32_pcmpeqq128_mask, "cV2LLiV2LLic", "")
-BUILTIN(__builtin_ia32_pcmpeqw128_mask, "cV8sV8sc", "")
-BUILTIN(__builtin_ia32_pcmpgtb512_mask, "LLiV64cV64cLLi", "")
-BUILTIN(__builtin_ia32_pcmpgtd512_mask, "sV16iV16is", "")
-BUILTIN(__builtin_ia32_pcmpgtq512_mask, "cV8LLiV8LLic", "")
-BUILTIN(__builtin_ia32_pcmpgtw512_mask, "iV32sV32si", "")
-BUILTIN(__builtin_ia32_pcmpgtb256_mask, "iV32cV32ci", "")
-BUILTIN(__builtin_ia32_pcmpgtd256_mask, "cV8iV8ic", "")
-BUILTIN(__builtin_ia32_pcmpgtq256_mask, "cV4LLiV4LLic", "")
-BUILTIN(__builtin_ia32_pcmpgtw256_mask, "sV16sV16ss", "")
-BUILTIN(__builtin_ia32_pcmpgtb128_mask, "sV16cV16cs", "")
-BUILTIN(__builtin_ia32_pcmpgtd128_mask, "cV4iV4ic", "")
-BUILTIN(__builtin_ia32_pcmpgtq128_mask, "cV2LLiV2LLic", "")
-BUILTIN(__builtin_ia32_pcmpgtw128_mask, "cV8sV8sc", "")
-BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "")
-BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "")
-BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "")
-BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "")
-BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "")
-BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "")
-BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "")
-BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "")
-BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "")
-BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsIi", "")
-BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "")
-BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "")
-BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "")
-BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "")
-BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "")
-BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "")
-BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "")
-BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "")
-BUILTIN(__builtin_ia32_pandd512_mask,  "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pandq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pord512_mask,  "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_porq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pxord512_mask,  "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pxorq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pabsd512_mask, "V16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pabsq512_mask, "V8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pmaxsd512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pmaxsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pmaxud512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pmaxuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pminsd512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pminsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pminud512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pmuldq512_mask, "V8LLiV16iV16iV8LLiUc", "")
-BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "")
-BUILTIN(__builtin_ia32_blendmd_512_mask, "V16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_blendmq_512_mask, "V8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_blendmps_512_mask, "V16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_blendmpd_512_mask, "V8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "")
-BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "")
-BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "")
-BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "")
-BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "")
-BUILTIN(__builtin_ia32_loadaps512_mask, "V16fvC*V16fUs", "")
-BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "")
-BUILTIN(__builtin_ia32_loadapd512_mask, "V8dvC*V8dUc", "")
-BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "")
-BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "")
-BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "")
-BUILTIN(__builtin_ia32_storeapd512_mask, "vv*V8dUc", "")
-BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "")
-BUILTIN(__builtin_ia32_storeaps512_mask, "vv*V16fUs", "")
-BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "")
-BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "")
-BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiIcV8LLiUc", "")
-BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iIcV16iUs", "")
-BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIcV4dUc", "")
-BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIcV4fUc", "")
-BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "")
-BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*UsIi", "")
-BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8LLiUcIi", "")
-BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8LLiUcIi", "")
-BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLivC*V8iUcIi", "")
-BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*UsIi", "")
-BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLivC*V8LLiUcIi", "")
-BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8LLiUcIi", "")
-BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "")
-BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "")
-BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8LLiV8dIi", "")
-BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8LLiV8fIi", "")
-BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8LLiIi", "")
-BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "")
-BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8LLiV8LLiIi", "")
-BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8LLiV8iIi", "")
-BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "")
-BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "")
-BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8LLivC*IiIi", "")
-BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8LLivC*IiIi", "")
-BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "")
-BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "")
-BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiv*IiIi", "")
-BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLiv*IiIi", "")
-BUILTIN(__builtin_ia32_knothi, "UsUs", "")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14sd, "V2dV2dV2dV2dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ss, "V4fV4fV4fV4fUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "", "avx512f")
 
-BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "")
-BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "")
-BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "")
-BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "")
-BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "")
-BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "")
-BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "")
-BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "")
-BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "")
-BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "")
-BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "")
-BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "")
-BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "")
-BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "")
-BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "")
-BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "")
-BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "")
-BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "")
-BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "")
-BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "")
-BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "")
-BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "")
-BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "")
-BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round, "V2dV2dV2dV2dUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round, "V4fV4fV4fV4fUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "", "avx512er")
 
-BUILTIN(__builtin_ia32_paddd256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_paddq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_psubd256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_psubq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_paddd128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_paddq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_psubd128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_psubq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_pmuldq256_mask, "V4LLiV8iV8iV4LLiUc", "")
-BUILTIN(__builtin_ia32_pmuldq128_mask, "V2LLiV4iV4iV2LLiUc", "")
-BUILTIN(__builtin_ia32_pmuludq256_mask, "V4LLiV8iV8iV4LLiUc", "")
-BUILTIN(__builtin_ia32_pmuludq128_mask, "V2LLiV4iV4iV2LLiUc", "")
-BUILTIN(__builtin_ia32_pmulld256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_pmulld128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_pandd256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_pandd128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_pandnd256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_pandnd128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_pord256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_pord128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_pxord256_mask, "V8iV8iV8iV8iUc", "")
-BUILTIN(__builtin_ia32_pxord128_mask, "V4iV4iV4iV4iUc", "")
-BUILTIN(__builtin_ia32_pandq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_pandq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_pandnq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_pandnq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_porq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_porq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_pxorq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_pxorq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_paddb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_psubb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_paddw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_psubw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pmullw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_paddb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_paddw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_psubb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_psubw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_paddb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_paddw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_psubb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_psubw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pmullw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pmullw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pandnd512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pandnq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_paddq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_psubq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_paddd512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_psubd512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "")
-BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "")
-BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "")
-BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "")
-BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "")
-BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "")
-BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "")
-BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "")
-BUILTIN(__builtin_ia32_blendmb_512_mask, "V64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_blendmw_512_mask, "V32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_packssdw512_mask, "V32sV16iV16iV32sUi", "")
-BUILTIN(__builtin_ia32_packsswb512_mask, "V64cV32sV32sV64cULLi", "")
-BUILTIN(__builtin_ia32_packusdw512_mask, "V32sV16iV16iV32sUi", "")
-BUILTIN(__builtin_ia32_packuswb512_mask, "V64cV32sV32sV64cULLi", "")
-BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pavgb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pavgw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pmaxsb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pmaxsw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pmaxub512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pmaxuw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pminsb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pminsw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pminub512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_pminuw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_pshufb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "")
-BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_vpermi2varhi512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_vpermt2varhi512_mask, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_vpermt2varhi512_maskz, "V32sV32sV32sV32sUi", "")
-BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "")
-BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "")
-BUILTIN(__builtin_ia32_blendmb_128_mask, "V16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_blendmb_256_mask, "V32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_blendmw_128_mask, "V8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_blendmw_256_mask, "V16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pabsb128_mask, "V16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pabsb256_mask, "V32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pabsw128_mask, "V8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pabsw256_mask, "V16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_packssdw128_mask, "V8sV4iV4iV8sUc", "")
-BUILTIN(__builtin_ia32_packssdw256_mask, "V16sV8iV8iV16sUs", "")
-BUILTIN(__builtin_ia32_packsswb128_mask, "V16cV8sV8sV16cUs", "")
-BUILTIN(__builtin_ia32_packsswb256_mask, "V32cV16sV16sV32cUi", "")
-BUILTIN(__builtin_ia32_packusdw128_mask, "V8sV4iV4iV8sUc", "")
-BUILTIN(__builtin_ia32_packusdw256_mask, "V16sV8iV8iV16sUs", "")
-BUILTIN(__builtin_ia32_packuswb128_mask, "V16cV8sV8sV16cUs", "")
-BUILTIN(__builtin_ia32_packuswb256_mask, "V32cV16sV16sV32cUi", "")
-BUILTIN(__builtin_ia32_paddsb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_paddsb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_paddsw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_paddsw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_paddusb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_paddusb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_paddusw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_paddusw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pavgb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pavgb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pavgw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pavgw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pmaxsb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pmaxsb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pmaxsw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pmaxsw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pmaxub128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pmaxub256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pmaxuw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pmaxuw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pminsb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pminsb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pminsw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pminsw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pminub128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pminub256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_pminuw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_pminuw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_pshufb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_pshufb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_psubsb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_psubsb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_psubsw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_psubsw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_psubusb128_mask, "V16cV16cV16cV16cUs", "")
-BUILTIN(__builtin_ia32_psubusb256_mask, "V32cV32cV32cV32cUi", "")
-BUILTIN(__builtin_ia32_psubusw128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_psubusw256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_vpermt2varhi128_maskz, "V8sV8sV8sV8sUc", "")
-BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "")
-BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "")
+TARGET_BUILTIN(__builtin_ia32_rcp14sd, "V2dV2dV2dV2dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14ss, "V4fV4fV4fV4fUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_rcp28sd_round, "V2dV2dV2dV2dUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ss_round, "V4fV4fV4fV4fUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "", "avx512er")
+
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_pcmpeqb512_mask, "LLiV64cV64cLLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqd512_mask, "sV16iV16is", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqq512_mask, "cV8LLiV8LLic", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqw512_mask, "iV32sV32si", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pcmpeqb256_mask, "iV32cV32ci", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqd256_mask, "cV8iV8ic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqq256_mask, "cV4LLiV4LLic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqw256_mask, "sV16sV16ss", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqb128_mask, "sV16cV16cs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqd128_mask, "cV4iV4ic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqq128_mask, "cV2LLiV2LLic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqw128_mask, "cV8sV8sc", "", "avx512vl,avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pcmpgtb512_mask, "LLiV64cV64cLLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtd512_mask, "sV16iV16is", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtq512_mask, "cV8LLiV8LLic", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtw512_mask, "iV32sV32si", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pcmpgtb256_mask, "iV32cV32ci", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtd256_mask, "cV8iV8ic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtq256_mask, "cV4LLiV4LLic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtw256_mask, "sV16sV16ss", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtb128_mask, "sV16cV16cs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtd128_mask, "cV4iV4ic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtq128_mask, "cV2LLiV2LLic", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtw128_mask, "cV8sV8sc", "", "avx512vl,avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2pd512_mask, "V8dV8iV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pandd512_mask,  "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pandq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pord512_mask,  "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_porq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pxord512_mask,  "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pxorq512_mask,  "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsd512_mask, "V16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsq512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxud512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsd512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminud512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminuq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuldq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuludq512_mask, "V8LLiV16iV16iV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_blendmd_512_mask, "V16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_blendmq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_blendmps_512_mask, "V16fV16fV16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_blendmpd_512_mask, "V8dV8dV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ptestmd512, "UsV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ptestmq512, "UcV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pbroadcastd512_gpr_mask, "V16iiV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_gpr_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pbroadcastq512_mem_mask, "V8LLiLLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16ivC*V16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLivC*V8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16fvC*V16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fvC*V16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8dvC*V8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dvC*V8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vv*V8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vv*V16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vv*V8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vv*V8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vv*V16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vv*V16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermt2vard512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varps512_mask, "V16fV16iV16fV16fUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varpd512_mask, "V8dV8LLiV8dV8dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd512_mask, "V16iV16iV16iIiV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8dvC*V8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16fvC*UsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8dvC*V8LLiUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8fvC*V8LLiUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLivC*V8iUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16ivC*UsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLivC*V8LLiUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8ivC*V8LLiUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vv*UcV8iV8dIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vv*UsV16iV16fIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vv*UcV8LLiV8dIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vv*UcV8LLiV8fIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vv*UcV8iV8LLiIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8LLiV8LLiIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8LLiV8iIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8LLivC*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8LLivC*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiv*IiIi", "", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLiv*IiIi", "", "avx512pf")
+
+TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_paddd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_paddq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psubd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psubq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_paddd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_paddq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psubd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psubq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmuldq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmuldq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmuludq256_mask, "V4LLiV8iV8iV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmuludq128_mask, "V2LLiV4iV4iV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmulld256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmulld128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandnd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandnd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pord256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pord128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pxord256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pxord128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandnq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pandnq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_porq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_porq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pxorq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pxorq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_paddb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmullw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmullw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmullw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pandnd512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pandnq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_paddq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psubq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_paddd512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psubd512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_pmulld512_mask, "V16iV16iV16iV16iUs", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmullq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnpd512_mask, "V8dV8dV8dV8dUc", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnps512_mask, "V16fV16fV16fV16fUs", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_pmullq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_pmullq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andnps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_andps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_xorps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_orps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl,avx512dq")
+
+TARGET_BUILTIN(__builtin_ia32_blendmb_512_mask, "V64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_blendmw_512_mask, "V32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsb512_mask, "V64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsw512_mask, "V32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packssdw512_mask, "V32sV16iV16iV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packsswb512_mask, "V64cV32sV32sV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packusdw512_mask, "V32sV16iV16iV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packuswb512_mask, "V64cV32sV32sV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxub512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminub512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminuw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshufb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi512_maskz, "V32sV32sV32sV32sUi", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd")
+
+TARGET_BUILTIN(__builtin_ia32_blendmb_128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_blendmb_256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_blendmw_128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_blendmw_256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsb128_mask, "V16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsb256_mask, "V32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsw128_mask, "V8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsw256_mask, "V16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packssdw128_mask, "V8sV4iV4iV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packssdw256_mask, "V16sV8iV8iV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packsswb128_mask, "V16cV8sV8sV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packsswb256_mask, "V32cV16sV16sV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packusdw128_mask, "V8sV4iV4iV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packusdw256_mask, "V16sV8iV8iV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packuswb128_mask, "V16cV8sV8sV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packuswb256_mask, "V32cV16sV16sV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pavgw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxub128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxub256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminub128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminub256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminuw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminuw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshufb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshufb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_maskz, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varhi256_maskz, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw512_mask, "V32sV64cV64cV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd512_mask, "V16iV32sV32sV16iUs", "", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_addss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minss_round, "V4fV4fV4fV4fUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minsd_round, "V2dV2dV2dV2dUcIi", "", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_addpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_addpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_addps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_addps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmd_128_mask, "V4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmd_256_mask, "V8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmpd_128_mask, "V2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmpd_256_mask, "V4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmps_128_mask, "V4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmps_256_mask, "V8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmq_128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_blendmq_256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256_mask, "V4fV4dV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd128_mask, "V2dV4fV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd256_mask, "V4dV4fV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256_mask, "V4iV4dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq128_mask, "V4iV4fV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq256_mask, "V8iV8fV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2pd128_mask, "V2dV4iV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2pd256_mask, "V4dV4iV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2ps128_mask, "V4fV4iV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2ps256_mask, "V8fV8iV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_divpd_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_divpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_divps_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_divps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2d*V2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4d*V4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLi*V2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLi*V4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4f*V4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8f*V8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4i*V4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8i*V8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxpd_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxps_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_maxps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_minpd_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_minpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_minps_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_minps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_mulpd_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_mulpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_mulps_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_mulps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsd128_mask, "V4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsd256_mask, "V8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq128_mask, "V2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq256_mask, "V4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxud128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxud256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsd128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsd256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminud128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminud256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vv*UcV2LLiV2dIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vv*UcV2LLiV2LLiIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vv*UcV4LLiV4dIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vv*UcV4LLiV4LLiIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vv*UcV2LLiV4fIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vv*UcV2LLiV4iIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vv*UcV4LLiV4fIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vv*UcV4LLiV4iIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vv*UcV4iV2dIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vv*UcV4iV2LLiIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vv*UcV4iV4dIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vv*UcV4iV4LLiIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vv*UcV4iV4fIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vv*UcV4iV4iIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vv*UcV8iV8fIi", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vv*UcV8iV8iIi", "", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_sqrtpd128_mask, "V2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd256_mask, "V4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtps128_mask, "V4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtps256_mask, "V8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_subpd128_mask, "V2dV2dV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_subpd256_mask, "V4dV4dV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_subps128_mask, "V4fV4fV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_subps256_mask, "V8fV8fV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128_mask, "V2dV2dV2LLiV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256_mask, "V4dV4dV4LLiV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps128_mask, "V4fV4fV4iV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps256_mask, "V8fV8fV8iV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2vard128_mask, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2vard128_maskz, "V4iV4iV4iV4iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2vard256_mask, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2vard256_maskz, "V8iV8iV8iV8iUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varpd128_mask, "V2dV2LLiV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varpd128_maskz, "V2dV2LLiV2dV2dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varpd256_mask, "V4dV4LLiV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varpd256_maskz, "V4dV4LLiV4dV4dUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varps128_mask, "V4fV4iV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varps128_maskz, "V4fV4iV4fV4fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varps256_mask, "V8fV8iV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varps256_maskz, "V8fV8iV8fV8fUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermt2varq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhbw512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhwd512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklbw512_mask, "V64cV64cV64cV64cULLi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklwd512_mask, "V32sV32sV32sV32sUi", "", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd128_mask, "V2dV2LLiV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd256_mask, "V4dV4LLiV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd128_mask, "V2dV2LLiV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd256_mask, "V4dV4LLiV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw128_mask, "V8sV16cV16cV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw256_mask, "V16sV32cV32cV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd128_mask, "V4iV8sV8sV4iUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd256_mask, "V8iV16sV16sV8iUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb256_mask, "V16cV16sV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhbw128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhbw256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhwd128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpckhwd256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklbw128_mask, "V16cV16cV16cV16cUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklbw256_mask, "V32cV32cV32cV32cUi", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklwd128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_punpcklwd256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "", "avx512dq")
 
 #undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/include/clang/Basic/DeclNodes.td b/include/clang/Basic/DeclNodes.td
index e1a23120c186..723ea547d8aa 100644
--- a/include/clang/Basic/DeclNodes.td
+++ b/include/clang/Basic/DeclNodes.td
@@ -59,6 +59,7 @@ def Named : Decl<1>;
       def VarTemplate : DDecl<RedeclarableTemplate>;
       def TypeAliasTemplate : DDecl<RedeclarableTemplate>;
     def TemplateTemplateParm : DDecl<Template>;
+    def BuiltinTemplate : DDecl<Template>;
   def Using : DDecl<Named>;
   def UsingShadow : DDecl<Named>;
   def ObjCMethod : DDecl<Named>, DeclContext;
diff --git a/include/clang/Basic/Diagnostic.h b/include/clang/Basic/Diagnostic.h
index 3b7c282d37ad..1d6c19bc9539 100644
--- a/include/clang/Basic/Diagnostic.h
+++ b/include/clang/Basic/Diagnostic.h
@@ -864,28 +864,27 @@ public:
 /// the common fields to registers, eliminating increments of the NumArgs field,
 /// for example.
 class DiagnosticBuilder {
-  mutable DiagnosticsEngine *DiagObj;
-  mutable unsigned NumArgs;
+  mutable DiagnosticsEngine *DiagObj = nullptr;
+  mutable unsigned NumArgs = 0;
 
   /// \brief Status variable indicating if this diagnostic is still active.
   ///
   // NOTE: This field is redundant with DiagObj (IsActive iff (DiagObj == 0)),
   // but LLVM is not currently smart enough to eliminate the null check that
   // Emit() would end up with if we used that as our status variable.
-  mutable bool IsActive;
+  mutable bool IsActive = false;
 
   /// \brief Flag indicating that this diagnostic is being emitted via a
   /// call to ForceEmit.
-  mutable bool IsForceEmit;
+  mutable bool IsForceEmit = false;
 
   void operator=(const DiagnosticBuilder &) = delete;
   friend class DiagnosticsEngine;
 
-  DiagnosticBuilder()
-      : DiagObj(nullptr), NumArgs(0), IsActive(false), IsForceEmit(false) {}
+  DiagnosticBuilder() = default;
 
   explicit DiagnosticBuilder(DiagnosticsEngine *diagObj)
-      : DiagObj(diagObj), NumArgs(0), IsActive(true), IsForceEmit(false) {
+      : DiagObj(diagObj), IsActive(true) {
     assert(diagObj && "DiagnosticBuilder requires a valid DiagnosticsEngine!");
     diagObj->DiagRanges.clear();
     diagObj->DiagFixItHints.clear();
@@ -1077,14 +1076,14 @@ operator<<(const DiagnosticBuilder &DB, T *DC) {
 }
 
 inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
-                                           const SourceRange &R) {
+                                           SourceRange R) {
   DB.AddSourceRange(CharSourceRange::getTokenRange(R));
   return DB;
 }
 
 inline const DiagnosticBuilder &operator<<(const DiagnosticBuilder &DB,
                                            ArrayRef<SourceRange> Ranges) {
-  for (const SourceRange &R: Ranges)
+  for (SourceRange R : Ranges)
     DB.AddSourceRange(CharSourceRange::getTokenRange(R));
   return DB;
 }
@@ -1264,7 +1263,7 @@ class StoredDiagnostic {
   std::vector<FixItHint> FixIts;
 
 public:
-  StoredDiagnostic();
+  StoredDiagnostic() = default;
   StoredDiagnostic(DiagnosticsEngine::Level Level, const Diagnostic &Info);
   StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID, 
                    StringRef Message);
@@ -1272,7 +1271,6 @@ public:
                    StringRef Message, FullSourceLoc Loc,
                    ArrayRef<CharSourceRange> Ranges,
                    ArrayRef<FixItHint> Fixits);
-  ~StoredDiagnostic();
 
   /// \brief Evaluates true when this object stores a diagnostic.
   explicit operator bool() const { return Message.size() > 0; }
diff --git a/include/clang/Basic/DiagnosticCommonKinds.td b/include/clang/Basic/DiagnosticCommonKinds.td
index fc3ca62113e5..ccc271a69f91 100644
--- a/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/include/clang/Basic/DiagnosticCommonKinds.td
@@ -84,6 +84,10 @@ def err_module_not_built : Error<"could not build module '%0'">, DefaultFatal;
 def err_module_build_disabled: Error<
   "module '%0' is needed but has not been provided, and implicit use of module "
   "files is disabled">, DefaultFatal;
+def err_module_unavailable : Error<
+  "module '%0' %select{is incompatible with|requires}1 feature '%2'">;
+def err_module_header_missing : Error<
+  "%select{|umbrella }0header '%1' not found">;
 def err_module_lock_failure : Error<
   "could not acquire lock file for module '%0'">, DefaultFatal;
 def err_module_lock_timeout : Error<
@@ -194,9 +198,8 @@ def err_unable_to_make_temp : Error<
   "unable to make temporary file: %0">;
   
 // Modules
-def err_module_file_conflict : Error<"module '%0' found in both '%1' and '%2'">;
 def err_module_format_unhandled : Error<
-  "no handler registered for module format '%0'">;
+  "no handler registered for module format '%0'">, DefaultFatal;
 
 // TransformActions
 // TODO: Use a custom category name to distinguish rewriter errors.
@@ -208,4 +211,7 @@ def note_mt_message : Note<"[rewriter] %0">;
 def warn_arcmt_nsalloc_realloc : Warning<"[rewriter] call returns pointer to GC managed memory; it will become unmanaged in ARC">;
 def err_arcmt_nsinvocation_ownership : Error<"NSInvocation's %0 is not safe to be used with an object with ownership other than __unsafe_unretained">;
 
+// OpenMP
+def err_omp_more_one_clause : Error<
+  "directive '#pragma omp %0' cannot contain more than one '%1' clause%select{| with '%3' name modifier| with 'source' dependence}2">;
 }
diff --git a/include/clang/Basic/DiagnosticDriverKinds.td b/include/clang/Basic/DiagnosticDriverKinds.td
index f7f09da4ea6e..7a71285482f6 100644
--- a/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/include/clang/Basic/DiagnosticDriverKinds.td
@@ -81,6 +81,8 @@ def err_drv_invalid_mfloat_abi : Error<
   "invalid float ABI '%0'">;
 def err_drv_invalid_libcxx_deployment : Error<
   "invalid deployment target for -stdlib=libc++ (requires %0 or later)">;
+def err_drv_invalid_argument_to_fdebug_prefix_map : Error<
+  "invalid argument '%0' to -fdebug-prefix-map">;
 def err_drv_malformed_sanitizer_blacklist : Error<
   "malformed sanitizer blacklist: '%0'">;
 
@@ -100,12 +102,18 @@ def err_drv_cc_print_options_failure : Error<
     "unable to open CC_PRINT_OPTIONS file: %0">;
 def err_drv_preamble_format : Error<
     "incorrect format for -preamble-bytes=N,END">;
+def err_drv_conflicting_deployment_targets : Error<
+  "conflicting deployment targets, both '%0' and '%1' are present in environment">;
 def err_drv_objc_gc_arr : Error<
   "cannot specify both '-fobjc-arc' and '%0'">;
 def err_arc_unsupported_on_runtime : Error<
   "-fobjc-arc is not supported on platforms using the legacy runtime">;
 def err_arc_unsupported_on_toolchain : Error< // feel free to generalize this
   "-fobjc-arc is not supported on versions of OS X prior to 10.6">;
+def err_objc_weak_with_gc : Error<
+  "-fobjc-weak is not supported in Objective-C garbage collection">;
+def err_objc_weak_unsupported : Error<
+  "-fobjc-weak is not supported on the current deployment target">;
 def err_drv_mg_requires_m_or_mm : Error<
   "option '-MG' requires '-M' or '-MM'">;
 def err_drv_unknown_objc_runtime : Error<
@@ -117,6 +125,8 @@ def err_drv_optimization_remark_pattern : Error<
 def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
 
 def warn_O4_is_O3 : Warning<"-O4 is equivalent to -O3">, InGroup<Deprecated>;
+def warn_drv_lto_libpath : Warning<"libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead">,
+  InGroup<LibLTO>;
 def warn_drv_optimization_value : Warning<"optimization level '%0' is not supported; using '%1%2' instead">,
   InGroup<InvalidCommandLineArgument>;
 def warn_ignored_gcc_optimization : Warning<"optimization flag '%0' is not supported">,
@@ -183,6 +193,10 @@ def err_drv_modules_validate_once_requires_timestamp : Error<
   "option '-fmodules-validate-once-per-build-session' requires "
   "'-fbuild-session-timestamp=<seconds since Epoch>' or '-fbuild-session-file=<file>'">;
 
+def err_test_module_file_extension_format : Error<
+  "-ftest-module-file-extension argument '%0' is not of the required form "
+  "'blockname:major:minor:hashed:user info'">;
+
 def warn_drv_invoking_fallback : Warning<"falling back to %0">,
   InGroup<Fallback>;
 
@@ -192,4 +206,18 @@ def warn_target_unsupported_nan2008 : Warning<
 def warn_target_unsupported_nanlegacy : Warning<
   "ignoring '-mnan=legacy' option because the '%0' architecture does not support it">,
   InGroup<UnsupportedNan>;
+
+def warn_drv_unable_to_find_directory_expected : Warning<
+  "unable to find %0 directory, expected to be in '%1'">,
+  InGroup<InvalidOrNonExistentDirectory>, DefaultIgnore;
+
+def warn_drv_ps4_force_pic : Warning<
+  "option '%0' was ignored by the PS4 toolchain, using '-fPIC'">,
+  InGroup<OptionIgnored>;
+
+def warn_drv_ps4_sdk_dir : Warning<
+  "environment variable SCE_PS4_SDK_DIR is set, but points to invalid or nonexistent directory '%0'">,
+  InGroup<InvalidOrNonExistentDirectory>;
+
+def err_drv_unsupported_linker : Error<"unsupported value '%0' for -linker option">;
 }
diff --git a/include/clang/Basic/DiagnosticFrontendKinds.td b/include/clang/Basic/DiagnosticFrontendKinds.td
index f4ab4800c9e7..033834bc505d 100644
--- a/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -45,6 +45,17 @@ def remark_fe_backend_optimization_remark_missed : Remark<"%0">, BackendInfo,
     InGroup<BackendOptimizationRemarkMissed>;
 def remark_fe_backend_optimization_remark_analysis : Remark<"%0">, BackendInfo,
     InGroup<BackendOptimizationRemarkAnalysis>;
+def remark_fe_backend_optimization_remark_analysis_fpcommute : Remark<"%0; "
+    "allow reordering by specifying '#pragma clang loop vectorize(enable)' "
+    "before the loop or by providing the compiler option '-ffast-math'.">,
+    BackendInfo, InGroup<BackendOptimizationRemarkAnalysis>;
+def remark_fe_backend_optimization_remark_analysis_aliasing : Remark<"%0; "
+    "allow reordering by specifying '#pragma clang loop vectorize(enable)' "
+    "before the loop. If the arrays will always be independent specify "
+    "'#pragma clang loop vectorize(assume_safety)' before the loop or provide "
+    "the '__restrict__' qualifier with the independent array arguments. "
+    "Erroneous results will occur if these options are incorrectly applied!">,
+    BackendInfo, InGroup<BackendOptimizationRemarkAnalysis>;
 def warn_fe_backend_optimization_failure : Warning<"%0">, BackendInfo,
     InGroup<BackendOptimizationFailure>, DefaultWarn;
 def note_fe_backend_optimization_remark_invalid_loc : Note<"could "
@@ -161,6 +172,9 @@ def warn_incompatible_analyzer_plugin_api : Warning<
 def note_incompatible_analyzer_plugin_api : Note<
     "current API version is '%0', but plugin was compiled with version '%1'">;
 
+def warn_module_config_mismatch : Warning<
+  "module file %0 cannot be loaded due to a configuration mismatch with the current "
+  "compilation">, InGroup<DiagGroup<"module-file-config-mismatch">>, DefaultError;
 def err_module_map_not_found : Error<"module map file '%0' not found">, 
   DefaultFatal;
 def err_missing_module_name : Error<
@@ -173,10 +187,6 @@ def err_no_submodule_suggest : Error<
   "no submodule named %0 in module '%1'; did you mean '%2'?">;
 def warn_missing_submodule : Warning<"missing submodule '%0'">,
   InGroup<IncompleteUmbrella>;
-def err_module_unavailable : Error<
-  "module '%0' %select{is incompatible with|requires}1 feature '%2'">;
-def err_module_header_missing : Error<
-  "%select{|umbrella }0header '%1' not found">;
 def err_module_cannot_create_includes : Error<
   "cannot create includes file for module %0: %1">;
 def warn_module_config_macro_undef : Warning<
@@ -190,20 +200,17 @@ def remark_module_build : Remark<"building module '%0' as '%1'">,
   InGroup<ModuleBuild>;
 def remark_module_build_done : Remark<"finished building module '%0'">,
   InGroup<ModuleBuild>;
+def err_modules_embed_file_not_found :
+  Error<"file '%0' specified by '-fmodules-embed-file=' not found">,
+  DefaultFatal;
+
+def err_test_module_file_extension_version : Error<
+  "test module file extension '%0' has different version (%1.%2) than expected "
+  "(%3.%4)">;
 
 def err_conflicting_module_names : Error<
   "conflicting module names specified: '-fmodule-name=%0' and "
   "'-fmodule-implementation-of %1'">;
-def err_conflicting_module_files : Error<
-  "module '%0' is defined in both '%1' and '%2'">;
-def err_module_file_not_found : Error<
-  "module file '%0' not found">, DefaultFatal;
-def err_module_file_invalid : Error<
-  "file '%0' is not a valid precompiled module file">, DefaultFatal;
-def note_module_file_imported_by : Note<
-  "imported by %select{|module '%2' in }1'%0'">;
-def err_module_file_not_module : Error<
-  "AST file '%0' was not built as a module">, DefaultFatal;
 
 def err_missing_vfs_overlay_file : Error<
   "virtual filesystem overlay file '%0' not found">, DefaultFatal;
diff --git a/include/clang/Basic/DiagnosticGroups.td b/include/clang/Basic/DiagnosticGroups.td
index 4ecd5d5e1842..8e5f57d6d890 100644
--- a/include/clang/Basic/DiagnosticGroups.td
+++ b/include/clang/Basic/DiagnosticGroups.td
@@ -24,14 +24,17 @@ def : DiagGroup<"aggregate-return">;
 def GNUAlignofExpression : DiagGroup<"gnu-alignof-expression">;
 def AmbigMemberTemplate : DiagGroup<"ambiguous-member-template">;
 def GNUAnonymousStruct : DiagGroup<"gnu-anonymous-struct">;
+def GNUAutoType : DiagGroup<"gnu-auto-type">;
 def ArrayBounds : DiagGroup<"array-bounds">;
 def ArrayBoundsPointerArithmetic : DiagGroup<"array-bounds-pointer-arithmetic">;
 def Availability : DiagGroup<"availability">;
 def Section : DiagGroup<"section">;
 def AutoImport : DiagGroup<"auto-import">;
+def CXX14BinaryLiteral : DiagGroup<"c++14-binary-literal">;
 def GNUBinaryLiteral : DiagGroup<"gnu-binary-literal">;
 def GNUCompoundLiteralInitializer : DiagGroup<"gnu-compound-literal-initializer">;
 def BitFieldConstantConversion : DiagGroup<"bitfield-constant-conversion">;
+def BitFieldWidth : DiagGroup<"bitfield-width">;
 def ConstantConversion :
   DiagGroup<"constant-conversion", [ BitFieldConstantConversion ] >;
 def LiteralConversion : DiagGroup<"literal-conversion">;
@@ -44,6 +47,7 @@ def BoolConversion : DiagGroup<"bool-conversion", [PointerBoolConversion,
 def IntConversion : DiagGroup<"int-conversion">;
 def EnumConversion : DiagGroup<"enum-conversion">;
 def FloatConversion : DiagGroup<"float-conversion">;
+def DoublePromotion : DiagGroup<"double-promotion">;
 def EnumTooLarge : DiagGroup<"enum-too-large">;
 def UnsupportedNan : DiagGroup<"unsupported-nan">;
 def NonLiteralNullConversion : DiagGroup<"non-literal-null-conversion">;
@@ -79,6 +83,7 @@ def AbstractFinalClass : DiagGroup<"abstract-final-class">;
 def CXX11CompatDeprecatedWritableStr :
   DiagGroup<"c++11-compat-deprecated-writable-strings">;
 
+def DeprecatedAttributes : DiagGroup<"deprecated-attributes">;
 def DeprecatedDeclarations : DiagGroup<"deprecated-declarations">;
 def UnavailableDeclarations : DiagGroup<"unavailable-declarations">;
 def PartialAvailability : DiagGroup<"partial-availability">;
@@ -88,12 +93,14 @@ def DeprecatedRegister : DiagGroup<"deprecated-register">;
 def DeprecatedWritableStr : DiagGroup<"deprecated-writable-strings",
                                       [CXX11CompatDeprecatedWritableStr]>;
 // FIXME: Why is DeprecatedImplementations not in this group?
-def Deprecated : DiagGroup<"deprecated", [DeprecatedDeclarations,
+def Deprecated : DiagGroup<"deprecated", [DeprecatedAttributes,
+                                          DeprecatedDeclarations,
                                           DeprecatedIncrementBool,
                                           DeprecatedRegister,
                                           DeprecatedWritableStr]>,
                  DiagCategory<"Deprecations">;
 
+def LibLTO : DiagGroup<"liblto">;
 def : DiagGroup<"disabled-optimization">;
 def : DiagGroup<"discard-qual">;
 def : DiagGroup<"div-by-zero">;
@@ -180,6 +187,9 @@ def CXX14Compat : DiagGroup<"c++14-compat", [CXXPre1zCompat]>;
 def CXX14CompatPedantic : DiagGroup<"c++14-compat-pedantic",
                                     [CXXPre1zCompatPedantic]>;
 
+def CXX1zCompat : DiagGroup<"c++1z-compat", [DeprecatedRegister,
+                                             DeprecatedIncrementBool]>;
+
 def : DiagGroup<"effc++">;
 def DivZero : DiagGroup<"division-by-zero">;
 def ExitTimeDestructors : DiagGroup<"exit-time-destructors">;
@@ -195,10 +205,12 @@ def DanglingElse: DiagGroup<"dangling-else">;
 def DanglingField : DiagGroup<"dangling-field">;
 def DistributedObjectModifiers : DiagGroup<"distributed-object-modifiers">;
 def FlagEnum : DiagGroup<"flag-enum">;
+def IncrementBool : DiagGroup<"increment-bool", [DeprecatedIncrementBool]>;
 def InfiniteRecursion : DiagGroup<"infinite-recursion">;
 def GNUImaginaryConstant : DiagGroup<"gnu-imaginary-constant">;
 def IgnoredQualifiers : DiagGroup<"ignored-qualifiers">;
 def : DiagGroup<"import">;
+def GNUIncludeNext : DiagGroup<"gnu-include-next">;
 def IncompatibleMSStruct : DiagGroup<"incompatible-ms-struct">;
 def IncompatiblePointerTypesDiscardsQualifiers 
   : DiagGroup<"incompatible-pointer-types-discards-qualifiers">;
@@ -213,6 +225,7 @@ def NonModularIncludeInModule : DiagGroup<"non-modular-include-in-module",
 def IncompleteModule : DiagGroup<"incomplete-module",
     [IncompleteUmbrella, NonModularIncludeInModule]>;
 
+def CXX11InlineNamespace : DiagGroup<"c++11-inline-namespace">;
 def InvalidNoreturn : DiagGroup<"invalid-noreturn">;
 def InvalidSourceEncoding : DiagGroup<"invalid-source-encoding">;
 def KNRPromotedParameter : DiagGroup<"knr-promoted-parameter">;
@@ -247,6 +260,7 @@ def MismatchedTags : DiagGroup<"mismatched-tags">;
 def MissingFieldInitializers : DiagGroup<"missing-field-initializers">;
 def ModuleBuild : DiagGroup<"module-build">;
 def ModuleConflict : DiagGroup<"module-conflict">;
+def ModuleFileExtension : DiagGroup<"module-file-extension">;
 def NewlineEOF : DiagGroup<"newline-eof">;
 def Nullability : DiagGroup<"nullability">;
 def NullabilityDeclSpec : DiagGroup<"nullability-declspec">;
@@ -304,6 +318,7 @@ def : DiagGroup<"redundant-decls">;
 def RedeclaredClassMember : DiagGroup<"redeclared-class-member">;
 def GNURedeclaredEnum : DiagGroup<"gnu-redeclared-enum">;
 def RedundantMove : DiagGroup<"redundant-move">;
+def Register : DiagGroup<"register", [DeprecatedRegister]>;
 def ReturnStackAddress : DiagGroup<"return-stack-address">;
 def ReturnTypeCLinkage : DiagGroup<"return-type-c-linkage">;
 def ReturnType : DiagGroup<"return-type", [ReturnTypeCLinkage]>;
@@ -648,7 +663,10 @@ def Consumed       : DiagGroup<"consumed">;
 // Note that putting warnings in -Wall will not disable them by default. If a
 // warning should be active _only_ when -Wall is passed in, mark it as
 // DefaultIgnore in addition to putting it here.
-def : DiagGroup<"all", [Most, Parentheses, Switch, SwitchBool]>;
+def All : DiagGroup<"all", [Most, Parentheses, Switch, SwitchBool]>;
+
+// Warnings that should be in clang-cl /w4.
+def : DiagGroup<"CL4", [All, Extra]>;
 
 // Warnings enabled by -pedantic.  This is magically filled in by TableGen.
 def Pedantic : DiagGroup<"pedantic">;
@@ -675,11 +693,12 @@ def NonGCC : DiagGroup<"non-gcc",
 
 // A warning group for warnings about using C++11 features as extensions in
 // earlier C++ versions.
-def CXX11 : DiagGroup<"c++11-extensions", [CXX11ExtraSemi, CXX11LongLong]>;
+def CXX11 : DiagGroup<"c++11-extensions", [CXX11ExtraSemi, CXX11InlineNamespace,
+                                           CXX11LongLong]>;
 
 // A warning group for warnings about using C++14 features as extensions in
 // earlier C++ versions.
-def CXX14 : DiagGroup<"c++14-extensions">;
+def CXX14 : DiagGroup<"c++14-extensions", [CXX14BinaryLiteral]>;
 
 // A warning group for warnings about using C++1z features as extensions in
 // earlier C++ versions.
@@ -699,13 +718,15 @@ def C99 : DiagGroup<"c99-extensions">;
 
 // A warning group for warnings about GCC extensions.
 def GNU : DiagGroup<"gnu", [GNUAlignofExpression, GNUAnonymousStruct,
+                            GNUAutoType,
                             GNUBinaryLiteral, GNUCaseRange,
                             GNUComplexInteger, GNUCompoundLiteralInitializer,
                             GNUConditionalOmittedOperand, GNUDesignator,
                             GNUEmptyInitializer, GNUEmptyStruct,
                             VLAExtension, GNUFlexibleArrayInitializer,
                             GNUFlexibleArrayUnionMember, GNUFoldingConstant,
-                            GNUImaginaryConstant, GNULabelsAsValue,
+                            GNUImaginaryConstant, GNUIncludeNext,
+                            GNULabelsAsValue,
                             RedeclaredClassMember, GNURedeclaredEnum,
                             GNUStatementExpression, GNUStaticFloatInit,
                             GNUStringLiteralOperatorTemplate,
@@ -715,8 +736,52 @@ def GNU : DiagGroup<"gnu", [GNUAlignofExpression, GNUAnonymousStruct,
 // A warning group for warnings about code that clang accepts but gcc doesn't.
 def GccCompat : DiagGroup<"gcc-compat">;
 
-// A warning group for warnings about Microsoft extensions.
-def Microsoft : DiagGroup<"microsoft">;
+// Warnings for Microsoft extensions.
+def MicrosoftCharize : DiagGroup<"microsoft-charize">;
+def MicrosoftInclude : DiagGroup<"microsoft-include">;
+def MicrosoftCppMacro : DiagGroup<"microsoft-cpp-macro">;
+def MicrosoftFixedEnum : DiagGroup<"microsoft-fixed-enum">;
+def MicrosoftSealed : DiagGroup<"microsoft-sealed">;
+def MicrosoftUnqualifiedFriend : DiagGroup<"microsoft-unqualified-friend">;
+def MicrosoftExceptionSpec : DiagGroup<"microsoft-exception-spec">;
+def MicrosoftUsingDecl : DiagGroup<"microsoft-using-decl">;
+def MicrosoftMutableReference : DiagGroup<"microsoft-mutable-reference">;
+def MicrosoftPureDefinition : DiagGroup<"microsoft-pure-definition">;
+def MicrosoftUnionMemberReference : DiagGroup<
+    "microsoft-union-member-reference">;
+def MicrosoftExplicitConstructorCall : DiagGroup<
+    "microsoft-explicit-constructor-call">;
+def MicrosoftEnumValue : DiagGroup<"microsoft-enum-value">;
+def MicrosoftDefaultArgRedefinition :
+    DiagGroup<"microsoft-default-arg-redefinition">;
+def MicrosoftTemplate : DiagGroup<"microsoft-template">;
+def MicrosoftRedeclareStatic : DiagGroup<"microsoft-redeclare-static">;
+def MicrosoftEnumForwardReference :
+    DiagGroup<"microsoft-enum-forward-reference">;
+def MicrosoftGoto : DiagGroup<"microsoft-goto">;
+def MicrosoftFlexibleArray : DiagGroup<"microsoft-flexible-array">;
+def MicrosoftExtraQualification : DiagGroup<"microsoft-extra-qualification">;
+def MicrosoftCast : DiagGroup<"microsoft-cast">;
+def MicrosoftConstInit : DiagGroup<"microsoft-const-init">;
+def MicrosoftVoidPseudoDtor : DiagGroup<"microsoft-void-pseudo-dtor">;
+def MicrosoftAnonTag : DiagGroup<"microsoft-anon-tag">;
+def MicrosoftCommentPaste : DiagGroup<"microsoft-comment-paste">;
+def MicrosoftEndOfFile : DiagGroup<"microsoft-end-of-file">;
+// Aliases.
+def : DiagGroup<"msvc-include", [MicrosoftInclude]>;
+                // -Wmsvc-include = -Wmicrosoft-include
+
+// Warnings group for warnings about Microsoft extensions.
+def Microsoft : DiagGroup<"microsoft",
+    [MicrosoftCharize, MicrosoftInclude, MicrosoftCppMacro, MicrosoftFixedEnum,
+     MicrosoftSealed, MicrosoftUnqualifiedFriend, MicrosoftExceptionSpec,
+     MicrosoftUsingDecl, MicrosoftMutableReference, MicrosoftPureDefinition,
+     MicrosoftUnionMemberReference, MicrosoftExplicitConstructorCall,
+     MicrosoftEnumValue, MicrosoftDefaultArgRedefinition, MicrosoftTemplate,
+     MicrosoftRedeclareStatic, MicrosoftEnumForwardReference, MicrosoftGoto,
+     MicrosoftFlexibleArray, MicrosoftExtraQualification, MicrosoftCast,
+     MicrosoftConstInit, MicrosoftVoidPseudoDtor, MicrosoftAnonTag,
+     MicrosoftCommentPaste, MicrosoftEndOfFile]>;
 
 def ObjCNonUnifiedException : DiagGroup<"objc-nonunified-exceptions">;
 
@@ -777,3 +842,7 @@ def CudaCompat : DiagGroup<"cuda-compat">;
 
 // A warning group for things that will change semantics in the future.
 def FutureCompat : DiagGroup<"future-compat">;
+
+def InvalidOrNonExistentDirectory : DiagGroup<"invalid-or-nonexistent-directory">;
+
+def OptionIgnored : DiagGroup<"option-ignored">;
diff --git a/include/clang/Basic/DiagnosticLexKinds.td b/include/clang/Basic/DiagnosticLexKinds.td
index 5dd4ae0ac7fd..ed6ff20f5c42 100644
--- a/include/clang/Basic/DiagnosticLexKinds.td
+++ b/include/clang/Basic/DiagnosticLexKinds.td
@@ -13,9 +13,8 @@
 
 let Component = "Lex", CategoryName = "Lexical or Preprocessor Issue" in {
 
-def null_in_string : Warning<"null character(s) preserved in string literal">,
-  InGroup<NullCharacter>;
-def null_in_char : Warning<"null character(s) preserved in character literal">,
+def null_in_char_or_string : Warning<
+  "null character(s) preserved in %select{char|string}0 literal">,
   InGroup<NullCharacter>;
 def null_in_file : Warning<"null character ignored">, InGroup<NullCharacter>;
 def warn_nested_block_comment : Warning<"'/*' within block comment">,
@@ -57,8 +56,15 @@ def warn_cxx98_compat_no_newline_eof : Warning<
 
 def ext_dollar_in_identifier : Extension<"'$' in identifier">,
   InGroup<DiagGroup<"dollar-in-identifier-extension">>;
-def ext_charize_microsoft : Extension<"charizing operator #@ is a Microsoft extension">,
-  InGroup<Microsoft>;
+def ext_charize_microsoft : Extension<
+  "charizing operator #@ is a Microsoft extension">,
+  InGroup<MicrosoftCharize>;
+def ext_comment_paste_microsoft : Extension<
+  "pasting two '/' tokens into a '//' comment is a Microsoft extension">,
+  InGroup<MicrosoftCommentPaste>;
+def ext_ctrl_z_eof_microsoft : Extension<
+  "treating Ctrl-Z as end-of-file is a Microsoft extension">,
+  InGroup<MicrosoftEndOfFile>;
 
 def ext_token_used : Extension<"extension used">,
   InGroup<DiagGroup<"language-extension-token">>;
@@ -66,10 +72,8 @@ def ext_token_used : Extension<"extension used">,
 def warn_cxx11_keyword : Warning<"'%0' is a keyword in C++11">,
   InGroup<CXX11Compat>, DefaultIgnore;
 
-def ext_unterminated_string : ExtWarn<"missing terminating '\"' character">,
-  InGroup<InvalidPPToken>;
-def ext_unterminated_char : ExtWarn<"missing terminating ' character">,
-  InGroup<InvalidPPToken>;
+def ext_unterminated_char_or_string : ExtWarn<
+  "missing terminating %select{'|'\"'}0 character">, InGroup<InvalidPPToken>;
 def ext_empty_character : ExtWarn<"empty character constant">,
   InGroup<InvalidPPToken>;
 def err_unterminated_block_comment : Error<"unterminated /* comment">;
@@ -153,13 +157,10 @@ def ext_nonstandard_escape : Extension<
   "use of non-standard escape character '\\%0'">;
 def ext_unknown_escape : ExtWarn<"unknown escape sequence '\\%0'">,
   InGroup<DiagGroup<"unknown-escape-sequence">>;
-def err_invalid_decimal_digit : Error<"invalid digit '%0' in decimal constant">;
-def err_invalid_binary_digit : Error<"invalid digit '%0' in binary constant">;
-def err_invalid_octal_digit : Error<"invalid digit '%0' in octal constant">;
-def err_invalid_suffix_integer_constant : Error<
-  "invalid suffix '%0' on integer constant">;
-def err_invalid_suffix_float_constant : Error<
-  "invalid suffix '%0' on floating constant">;
+def err_invalid_digit : Error<
+  "invalid digit '%0' in %select{decimal|octal|binary}1 constant">;
+def err_invalid_suffix_constant : Error<
+  "invalid suffix '%0' on %select{integer|floating}1 constant">;
 def warn_cxx11_compat_digit_separator : Warning<
   "digit separators are incompatible with C++ standards before C++14">,
   InGroup<CXXPre14Compat>, DefaultIgnore;
@@ -174,22 +175,20 @@ def err_multichar_utf_character_literal : Error<
 def err_exponent_has_no_digits : Error<"exponent has no digits">;
 def ext_imaginary_constant : Extension<
   "imaginary constants are a GNU extension">, InGroup<GNUImaginaryConstant>;
-def err_hexconstant_requires_exponent : Error<
-  "hexadecimal floating constants require an exponent">;
-def err_hexconstant_requires_digits : Error<
-  "hexadecimal floating constants require a significand">;
+def err_hexconstant_requires: Error<
+  "hexadecimal floating constants require %select{an exponent|a significand}0">;
 def ext_hexconstant_invalid : Extension<
   "hexadecimal floating constants are a C99 feature">, InGroup<C99>;
 def ext_binary_literal : Extension<
   "binary integer literals are a GNU extension">, InGroup<GNUBinaryLiteral>;
 def ext_binary_literal_cxx14 : Extension<
-  "binary integer literals are a C++14 extension">, InGroup<CXX14>;
+  "binary integer literals are a C++14 extension">, InGroup<CXX14BinaryLiteral>;
 def warn_cxx11_compat_binary_literal : Warning<
   "binary integer literals are incompatible with C++ standards before C++14">,
   InGroup<CXXPre14CompatPedantic>, DefaultIgnore;
 def err_pascal_string_too_long : Error<"Pascal string is too long">;
-def err_octal_escape_too_large : Error<"octal escape sequence out of range">;
-def err_hex_escape_too_large : Error<"hex escape sequence out of range">;
+def err_escape_too_large : Error<
+  "%select{hex|octal}0 escape sequence out of range">;
 def ext_string_too_long : Extension<"string literal of length %0 exceeds "
   "maximum length %1 that %select{C90|ISO C99|C++}2 compilers are required to "
   "support">, InGroup<OverlengthStrings>;
@@ -255,10 +254,13 @@ def err_pp_hash_error : Error<"%0">;
 }
 
 def pp_include_next_in_primary : Warning<
-  "#include_next in primary source file">;
+  "#include_next in primary source file">,
+  InGroup<DiagGroup<"include-next-outside-header">>;
 def pp_include_macros_out_of_predefines : Error<
   "the #__include_macros directive is only for internal use by -imacros">;
-def pp_include_next_absolute_path : Warning<"#include_next with absolute path">;
+def pp_include_next_absolute_path : Warning<
+  "#include_next with absolute path">,
+  InGroup<DiagGroup<"include-next-absolute-path">>;
 def ext_c99_whitespace_required_after_macro_name : ExtWarn<
   "ISO C99 requires whitespace after the macro name">, InGroup<C99>;
 def ext_missing_whitespace_after_macro_name : ExtWarn<
@@ -266,9 +268,11 @@ def ext_missing_whitespace_after_macro_name : ExtWarn<
 def warn_missing_whitespace_after_macro_name : Warning<
   "whitespace recommended after macro name">;
   
-def pp_pragma_once_in_main_file : Warning<"#pragma once in main file">;
+def pp_pragma_once_in_main_file : Warning<"#pragma once in main file">,
+  InGroup<DiagGroup<"pragma-once-outside-header">>;
 def pp_pragma_sysheader_in_main_file : Warning<
-  "#pragma system_header ignored in main file">;
+  "#pragma system_header ignored in main file">,
+  InGroup<DiagGroup<"pragma-system-header-outside-header">>;
 def pp_poisoning_existing_macro : Warning<"poisoning existing macro">;
 def pp_out_of_date_dependency : Warning<
   "current file is older than dependency %0">;
@@ -295,27 +299,29 @@ def warn_pp_macro_hides_keyword : Extension<
 def warn_pp_macro_is_reserved_id : Warning<
   "macro name is a reserved identifier">, DefaultIgnore,
   InGroup<ReservedIdAsMacro>;
+def warn_pp_objc_macro_redef_ignored : Warning<
+  "ignoring redefinition of Objective-C qualifier macro">,
+  InGroup<DiagGroup<"objc-macro-redefinition">>;
 
 def pp_invalid_string_literal : Warning<
   "invalid string literal, ignoring final '\\'">;
 def warn_pp_expr_overflow : Warning<
   "integer overflow in preprocessor expression">;
-def warn_pp_convert_lhs_to_positive : Warning<
-  "left side of operator converted from negative value to unsigned: %0">;
-def warn_pp_convert_rhs_to_positive : Warning<
-  "right side of operator converted from negative value to unsigned: %0">;
+def warn_pp_convert_to_positive : Warning<
+  "%select{left|right}0 side of operator converted from negative value to "
+  "unsigned: %1">;
 
 def ext_pp_import_directive : Extension<"#import is a language extension">,
   InGroup<DiagGroup<"import-preprocessor-directive-pedantic">>;
 def err_pp_import_directive_ms : Error<
   "#import of type library is an unsupported Microsoft feature">;
 def ext_pp_include_search_ms : ExtWarn<
-  "#include resolved using non-portable MSVC search rules as: %0">,
-  InGroup<DiagGroup<"msvc-include">>;
+  "#include resolved using non-portable Microsoft search rules as: %0">,
+  InGroup<MicrosoftInclude>;
 
 def ext_pp_ident_directive : Extension<"#ident is a language extension">;
 def ext_pp_include_next_directive : Extension<
-  "#include_next is a language extension">;
+  "#include_next is a language extension">, InGroup<GNUIncludeNext>;
 def ext_pp_warning_directive : Extension<"#warning is a language extension">;
 
 def ext_pp_extra_tokens_at_eol : ExtWarn<
@@ -510,7 +516,7 @@ def ext_pp_bad_paste_ms : ExtWarn<
 def err_pp_operator_used_as_macro_name : Error<
   "C++ operator %0 (aka %1) used as a macro name">;
 def ext_pp_operator_used_as_macro_name : Extension<
-  "C++ operator %0 (aka %1) used as a macro name">, InGroup<Microsoft>;
+  err_pp_operator_used_as_macro_name.Text>, InGroup<MicrosoftCppMacro>;
 def err_pp_illegal_floating_literal : Error<
   "floating point literal in preprocessor expression">;
 def err_pp_line_requires_integer : Error<
@@ -622,6 +628,8 @@ def warn_mmap_unknown_attribute : Warning<"unknown attribute '%0'">,
 def warn_auto_module_import : Warning<
   "treating #%select{include|import|include_next|__include_macros}0 as an "
   "import of module '%1'">, InGroup<AutoImport>, DefaultIgnore;
+def note_implicit_top_level_module_import_here : Note<
+  "submodule of top-level module '%0' implicitly imported here">;
 def warn_uncovered_module_header : Warning<
   "umbrella header for module '%0' does not include header '%1'">, 
   InGroup<IncompleteUmbrella>;
diff --git a/include/clang/Basic/DiagnosticParseKinds.td b/include/clang/Basic/DiagnosticParseKinds.td
index e4f859943266..446852a86b0d 100644
--- a/include/clang/Basic/DiagnosticParseKinds.td
+++ b/include/clang/Basic/DiagnosticParseKinds.td
@@ -51,14 +51,6 @@ def warn_extra_semi_after_mem_fn_def : Warning<
   "extra ';' after member function definition">,
   InGroup<ExtraSemi>, DefaultIgnore;
 
-def ext_duplicate_declspec : ExtWarn<"duplicate '%0' declaration specifier">,
-  InGroup<DuplicateDeclSpecifier>;
-def warn_duplicate_declspec : Warning<"duplicate '%0' declaration specifier">,
-  InGroup<DuplicateDeclSpecifier>;
-def ext_plain_complex : ExtWarn<
-  "plain '_Complex' requires a type specifier; assuming '_Complex double'">;
-def ext_integer_complex : Extension<
-  "complex integer types are a GNU extension">, InGroup<GNUComplexInteger>;
 def ext_thread_before : Extension<"'__thread' before '%0'">;
 def ext_keyword_as_ident : ExtWarn<
   "keyword '%0' will be made available as an identifier "
@@ -70,12 +62,6 @@ def ext_nullability : Extension<
   InGroup<DiagGroup<"nullability-extension">>;
 
 def error_empty_enum : Error<"use of empty enum">;
-def err_invalid_sign_spec : Error<"'%0' cannot be signed or unsigned">;
-def err_invalid_short_spec : Error<"'short %0' is invalid">;
-def err_invalid_long_spec : Error<"'long %0' is invalid">;
-def err_invalid_longlong_spec : Error<"'long long %0' is invalid">;
-def err_invalid_complex_spec : Error<"'_Complex %0' is invalid">;
-def err_friend_decl_spec : Error<"'%0' is invalid in friend declarations">;
 
 def ext_ident_list_in_param : Extension<
   "type-less parameter names in function declaration">;
@@ -101,7 +87,7 @@ def ext_cxx11_enum_fixed_underlying_type : Extension<
   InGroup<CXX11>;
 def ext_c_enum_fixed_underlying_type : Extension<
   "enumeration types with a fixed underlying type are a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftFixedEnum>;
 def warn_cxx98_compat_enum_fixed_underlying_type : Warning<
   "enumeration types with a fixed underlying type are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
@@ -271,7 +257,7 @@ def warn_cxx98_compat_ref_qualifier : Warning<
   "reference qualifiers on functions are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
 def ext_inline_namespace : ExtWarn<
-  "inline namespaces are a C++11 feature">, InGroup<CXX11>;
+  "inline namespaces are a C++11 feature">, InGroup<CXX11InlineNamespace>;
 def warn_cxx98_compat_inline_namespace : Warning<
   "inline namespaces are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
@@ -286,11 +272,6 @@ def err_init_list_bin_op : Error<"initializer list cannot be used on the "
 def warn_cxx98_compat_trailing_return_type : Warning<
   "trailing return types are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
-def ext_auto_type_specifier : ExtWarn<
-  "'auto' type specifier is a C++11 extension">, InGroup<CXX11>;
-def warn_auto_storage_class : Warning<
-  "'auto' storage class specifier is redundant and incompatible with C++11">,
-  InGroup<CXX11Compat>, DefaultIgnore;
 def ext_auto_storage_class : ExtWarn<
   "'auto' storage class specifier is not permitted in C++11, and will not "
   "be supported in future releases">, InGroup<DiagGroup<"auto-storage-class">>;
@@ -299,6 +280,9 @@ def ext_decltype_auto_type_specifier : ExtWarn<
 def warn_cxx11_compat_decltype_auto_type_specifier : Warning<
   "'decltype(auto)' type specifier is incompatible with C++ standards before "
   "C++14">, InGroup<CXXPre14Compat>, DefaultIgnore;
+def ext_auto_type : Extension<
+  "'__auto_type' is a GNU extension">,
+  InGroup<GNUAutoType>;
 def ext_for_range : ExtWarn<
   "range-based for loop is a C++11 extension">, InGroup<CXX11>;
 def warn_cxx98_compat_for_range : Warning<
@@ -327,9 +311,6 @@ def err_unspecified_vla_size_with_static : Error<
   "'static' may not be used with an unspecified variable length array size">;
 def err_unspecified_size_with_static : Error<
   "'static' may not be used without an array size">;
-def warn_deprecated_register : Warning<
-  "'register' storage class specifier is deprecated">,
-  InGroup<DeprecatedRegister>;
 def err_expected_parentheses_around_typename : Error<
   "expected parentheses around type name in %0 expression">;
 
@@ -348,39 +329,10 @@ def err_typename_invalid_constexpr : Error<
 def err_typename_identifiers_only : Error<
   "typename is allowed for identifiers only">;
 
-def err_invalid_decl_spec_combination : Error<
-  "cannot combine with previous '%0' declaration specifier">;
-def err_invalid_vector_decl_spec_combination : Error<
-  "cannot combine with previous '%0' declaration specifier. "
-  "'__vector' must be first">;
-def err_invalid_pixel_decl_spec_combination : Error<
-  "'__pixel' must be preceded by '__vector'.  "
-  "'%0' declaration specifier not allowed here">;
-def err_invalid_vector_bool_decl_spec : Error<
-  "cannot use '%0' with '__vector bool'">;
-def err_invalid_vector_long_decl_spec : Error<
-  "cannot use 'long' with '__vector'">;
-def err_invalid_vector_float_decl_spec : Error<
-  "cannot use 'float' with '__vector'">;
-def err_invalid_vector_double_decl_spec : Error <
-  "use of 'double' with '__vector' requires VSX support to be enabled "
-  "(available on POWER7 or later)">;
-def err_invalid_vector_long_long_decl_spec : Error <
-  "use of 'long long' with '__vector bool' requires VSX support (available on "
-  "POWER7 or later) or extended Altivec support (available on POWER8 or later) " 
-  "to be enabled">;
-def err_invalid_vector_long_double_decl_spec : Error<
-  "cannot use 'long double' with '__vector'">;
-def warn_vector_long_decl_spec_combination : Warning<
-  "Use of 'long' with '__vector' is deprecated">, InGroup<Deprecated>;
 def err_friend_invalid_in_context : Error<
   "'friend' used outside of class">;
-def err_use_of_tag_name_without_tag : Error<
-  "must use '%1' tag to refer to type %0%select{| in this scope}2">;
-def err_templated_using_directive : Error<
-  "cannot template a using directive">;
-def err_templated_using_declaration : Error<
-  "cannot template a using declaration">;
+def err_templated_using_directive_declaration : Error<
+  "cannot template a using %select{directive|declaration}0">;
 def err_unexpected_colon_in_nested_name_spec : Error<
   "unexpected ':' in nested name specifier; did you mean '::'?">;
 def err_unexpected_token_in_nested_name_spec : Error<
@@ -512,7 +464,7 @@ def note_missing_end_of_definition_before : Note<
   "still within definition of %q0 here">;
 def ext_ellipsis_exception_spec : Extension<
   "exception specification of '...' is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftExceptionSpec>;
 def err_dynamic_and_noexcept_specification : Error<
   "cannot have both throw() and noexcept() clause on the same function">;
 def err_except_spec_unparsed : Error<
@@ -695,12 +647,9 @@ def warn_static_inline_explicit_inst_ignored : Warning<
 // Constructor template diagnostics.
 def err_out_of_line_constructor_template_id : Error<
   "out-of-line constructor for %0 cannot have template arguments">;
-def err_out_of_line_template_id_names_constructor : Error<
+def err_out_of_line_template_id_type_names_constructor : Error<
   "qualified reference to %0 is a constructor name rather than a "
-  "template name wherever a constructor can be declared">;
-def err_out_of_line_type_names_constructor : Error<
-  "qualified reference to %0 is a constructor name rather than a "
-  "type wherever a constructor can be declared">;
+  "%select{template name|type}1 wherever a constructor can be declared">;
 
 def err_expected_qualified_after_typename : Error<
   "expected a qualified name after 'typename'">;
@@ -740,15 +689,11 @@ def err_missing_whitespace_digraph : Error<
   "%select{template name|const_cast|dynamic_cast|reinterpret_cast|static_cast}0"
   " which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?">;
 
-def ext_deleted_function : ExtWarn<
-  "deleted function definitions are a C++11 extension">, InGroup<CXX11>;
-def warn_cxx98_compat_deleted_function : Warning<
-  "deleted function definitions are incompatible with C++98">,
-  InGroup<CXX98Compat>, DefaultIgnore;
-def ext_defaulted_function : ExtWarn<
-  "defaulted function definitions are a C++11 extension">, InGroup<CXX11>;
-def warn_cxx98_compat_defaulted_function : Warning<
-  "defaulted function definitions are incompatible with C++98">,
+def ext_defaulted_deleted_function : ExtWarn<
+  "%select{defaulted|deleted}0 function definitions are a C++11 extension">,
+  InGroup<CXX11>;
+def warn_cxx98_compat_defaulted_deleted_function : Warning<
+  "%select{defaulted|deleted}0 function definitions are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
 
 // C++11 in-class member initialization
@@ -784,7 +729,7 @@ def err_override_control_interface : Error<
   "'%0' keyword not permitted with interface types">;
 def ext_ms_sealed_keyword : ExtWarn<
   "'sealed' keyword is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftSealed>;
 
 def err_access_specifier_interface : Error<
   "interface types cannot specify '%select{private|protected}0' access">;
@@ -821,10 +766,6 @@ def warn_cxx98_compat_lambda : Warning<
 def err_lambda_missing_parens : Error<
   "lambda requires '()' before %select{'mutable'|return type|"
   "attribute specifier}0">;
-def warn_init_capture_direct_list_init : Warning<
-  "direct list initialization of a lambda init-capture will change meaning in "
-  "a future version of Clang; insert an '=' to avoid a change in behavior">,
-  InGroup<FutureCompat>;
 
 // Availability attribute
 def err_expected_version : Error<
@@ -945,7 +886,7 @@ def err_pragma_comment_malformed : Error<
 def err_pragma_comment_unknown_kind : Error<"unknown kind of pragma comment">;
 // PS4 recognizes only #pragma comment(lib)
 def warn_pragma_comment_ignored : Warning<"'#pragma comment %0' ignored">,
-  InGroup<Microsoft>;
+  InGroup<IgnoredPragmas>;
 // - #pragma detect_mismatch
 def err_pragma_detect_mismatch_malformed : Error<
   "pragma detect_mismatch is malformed; it requires two comma-separated "
@@ -961,10 +902,6 @@ def err_pragma_optimize_invalid_argument : Error<
 def err_pragma_optimize_extra_argument : Error<
   "unexpected extra argument '%0' to '#pragma clang optimize'">;
 
-// OpenCL Section 6.8.g
-def err_opencl_unknown_type_specifier : Error<
-  "OpenCL does not support the '%0' %select{type qualifier|storage class specifier}1">;
-
 // OpenCL EXTENSION pragma (OpenCL 1.1 [9.1])
 def warn_pragma_expected_colon : Warning<
   "missing ':' after %0 - ignoring">, InGroup<IgnoredPragmas>;
@@ -979,6 +916,8 @@ def warn_pragma_omp_ignored : Warning<
 def warn_omp_extra_tokens_at_eol : Warning<
   "extra tokens at the end of '#pragma omp %0' are ignored">,
   InGroup<ExtraTokens>;
+def warn_pragma_expected_colon_r_paren : Warning<
+  "missing ':' or ')' after %0 - ignoring">, InGroup<IgnoredPragmas>;
 def err_omp_unknown_directive : Error<
   "expected an OpenMP directive">;
 def err_omp_unexpected_directive : Error<
@@ -987,22 +926,26 @@ def err_omp_expected_punc : Error<
   "expected ',' or ')' in '%0' %select{clause|directive}1">;
 def err_omp_unexpected_clause : Error<
   "unexpected OpenMP clause '%0' in directive '#pragma omp %1'">;
-def err_omp_more_one_clause : Error<
-  "directive '#pragma omp %0' cannot contain more than one '%1' clause">;
 def err_omp_immediate_directive : Error<
-  "'#pragma omp %0' cannot be an immediate substatement">;
+  "'#pragma omp %0' %select{|with '%2' clause }1cannot be an immediate substatement">;
 def err_omp_expected_identifier_for_critical : Error<
   "expected identifier specifying the name of the 'omp critical' directive">;
+def err_omp_unknown_map_type : Error<
+  "incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'">;
+def err_omp_unknown_map_type_modifier : Error<
+  "incorrect map type modifier, expected 'always'">;
+def err_omp_map_type_missing : Error<
+  "missing map type">;
 
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<
   "missing argument; expected %select{an integer value|"
-  "%select{'enable', 'assume_safety'|'full'}1 or 'disable'}0">;
+  "'enable', %select{'assume_safety'|'full'}1 or 'disable'}0">;
 def err_pragma_loop_invalid_option : Error<
   "%select{invalid|missing}0 option%select{ %1|}0; expected vectorize, "
   "vectorize_width, interleave, interleave_count, unroll, or unroll_count">;
 def err_pragma_invalid_keyword : Error<
-  "invalid argument; expected %select{'enable', 'assume_safety'|'full'}0 or 'disable'">;
+  "invalid argument; expected 'enable', %select{'assume_safety'|'full'}0 or 'disable'">;
 
 // Pragma unroll support.
 def warn_pragma_unroll_cuda_value_in_parens : Warning<
@@ -1015,6 +958,7 @@ def err_module_expected_ident : Error<
   "expected a module name after module import">;
 def err_module_expected_semi : Error<
   "expected ';' after module name">;
+def err_missing_before_module_end : Error<"expected %0 at end of module">;
 }
 
 let CategoryName = "Generics Issue" in {
@@ -1029,4 +973,9 @@ def err_objc_type_args_after_protocols : Error<
   "protocol qualifiers must precede type arguments">;
 }
 
+let CategoryName = "Coroutines Issue" in {
+def err_for_co_await_not_range_for : Error<
+  "'co_await' modifier can only be applied to range-based for loop">;
+}
+
 } // end of Parser diagnostics
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 82f512130885..1f2791c1c8fc 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -67,6 +67,9 @@ def warn_wrong_absolute_value_type : Warning<
   "when argument is of %select{integer|floating point|complex}2 type">,
   InGroup<AbsoluteValue>;
 def note_replace_abs_function : Note<"use function '%0' instead">;
+def warn_pointer_abs : Warning<
+  "taking the absolute value of %select{pointer|function|array}0 type %1 is suspicious">,
+  InGroup<AbsoluteValue>;
 
 def warn_infinite_recursive_function : Warning<
   "all paths through this function will call itself">,
@@ -188,6 +191,63 @@ def ext_flexible_array_init : Extension<
   "flexible array initialization is a GNU extension">, InGroup<GNUFlexibleArrayInitializer>;
 
 // Declarations.
+def ext_duplicate_declspec : ExtWarn<"duplicate '%0' declaration specifier">,
+  InGroup<DuplicateDeclSpecifier>;
+def warn_duplicate_declspec : Warning<"duplicate '%0' declaration specifier">,
+  InGroup<DuplicateDeclSpecifier>;
+def ext_plain_complex : ExtWarn<
+  "plain '_Complex' requires a type specifier; assuming '_Complex double'">;
+def ext_integer_complex : Extension<
+  "complex integer types are a GNU extension">, InGroup<GNUComplexInteger>;
+
+def err_invalid_sign_spec : Error<"'%0' cannot be signed or unsigned">;
+def err_invalid_width_spec : Error<
+  "'%select{|short|long|long long}0 %1' is invalid">;
+def err_invalid_complex_spec : Error<"'_Complex %0' is invalid">;
+def err_friend_decl_spec : Error<"'%0' is invalid in friend declarations">;
+
+def ext_auto_type_specifier : ExtWarn<
+  "'auto' type specifier is a C++11 extension">, InGroup<CXX11>;
+def warn_auto_storage_class : Warning<
+  "'auto' storage class specifier is redundant and incompatible with C++11">,
+  InGroup<CXX11Compat>, DefaultIgnore;
+
+def warn_deprecated_register : Warning<
+  "'register' storage class specifier is deprecated "
+  "and incompatible with C++1z">, InGroup<DeprecatedRegister>;
+def ext_register_storage_class : ExtWarn<
+  "ISO C++1z does not allow 'register' storage class specifier">,
+  DefaultError, InGroup<Register>;
+
+def err_invalid_decl_spec_combination : Error<
+  "cannot combine with previous '%0' declaration specifier">;
+def err_invalid_vector_decl_spec_combination : Error<
+  "cannot combine with previous '%0' declaration specifier. "
+  "'__vector' must be first">;
+def err_invalid_pixel_decl_spec_combination : Error<
+  "'__pixel' must be preceded by '__vector'.  "
+  "'%0' declaration specifier not allowed here">;
+def err_invalid_vector_bool_decl_spec : Error<
+  "cannot use '%0' with '__vector bool'">;
+def err_invalid_vector_long_decl_spec : Error<
+  "cannot use 'long' with '__vector'">;
+def err_invalid_vector_float_decl_spec : Error<
+  "cannot use 'float' with '__vector'">;
+def err_invalid_vector_double_decl_spec : Error <
+  "use of 'double' with '__vector' requires VSX support to be enabled "
+  "(available on POWER7 or later)">;
+def err_invalid_vector_long_long_decl_spec : Error <
+  "use of 'long long' with '__vector bool' requires VSX support (available on "
+  "POWER7 or later) or extended Altivec support (available on POWER8 or later) " 
+  "to be enabled">;
+def err_invalid_vector_long_double_decl_spec : Error<
+  "cannot use 'long double' with '__vector'">;
+def warn_vector_long_decl_spec_combination : Warning<
+  "Use of 'long' with '__vector' is deprecated">, InGroup<Deprecated>;
+
+def err_use_of_tag_name_without_tag : Error<
+  "must use '%1' tag to refer to type %0%select{| in this scope}2">;
+
 def err_redeclaration_different_type : Error<
   "redeclaration of %0 with a different type%diff{: $ vs $|}1,2">;
 def err_bad_variable_name : Error<
@@ -195,6 +255,10 @@ def err_bad_variable_name : Error<
 def err_bad_parameter_name : Error<
   "%0 cannot be the name of a parameter">;
 def err_parameter_name_omitted : Error<"parameter name omitted">;
+def warn_mips_interrupt_attribute : Warning<
+   "MIPS 'interrupt' attribute only applies to functions that have "
+   "%select{no parameters|a 'void' return type}0">,
+   InGroup<IgnoredAttributes>;
 def warn_unused_parameter : Warning<"unused parameter %0">,
   InGroup<UnusedParameter>, DefaultIgnore;
 def warn_unused_variable : Warning<"unused variable %0">,
@@ -416,7 +480,8 @@ def note_unreachable_silence : Note<
 
 /// Built-in functions.
 def ext_implicit_lib_function_decl : ExtWarn<
-  "implicitly declaring library function '%0' with type %1">;
+  "implicitly declaring library function '%0' with type %1">,
+  InGroup<ImplicitFunctionDeclare>;
 def note_include_header_or_declare : Note<
   "include the header <%0> or explicitly provide a declaration for '%1'">;
 def note_previous_builtin_declaration : Note<"%0 is a builtin with type %1">;
@@ -429,6 +494,11 @@ def warn_redecl_library_builtin : Warning<
 def err_builtin_definition : Error<"definition of builtin function %0">;
 def err_arm_invalid_specialreg : Error<"invalid special register for builtin">;
 def err_invalid_cpu_supports : Error<"invalid cpu feature string for builtin">;
+def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
+def err_function_needs_feature
+    : Error<"always_inline function %1 requires target feature '%2', but would "
+            "be inlined into function %0 that is compiled without support for "
+            "'%2'">;
 def warn_builtin_unknown : Warning<"use of unknown builtin %0">,
   InGroup<ImplicitFunctionDeclare>, DefaultError;
 def warn_dyn_class_memaccess : Warning<
@@ -510,6 +580,10 @@ def warn_main_one_arg : Warning<"only one parameter on 'main' declaration">,
 def err_main_arg_wrong : Error<"%select{first|second|third|fourth}0 "
     "parameter of 'main' (%select{argument count|argument array|environment|"
     "platform-specific data}0) must be of type %1">;
+def err_main_global_variable :
+    Error<"main cannot be declared as global variable">;
+def warn_main_redefined : Warning<"variable named 'main' with external linkage "
+    "has undefined behavior">, InGroup<Main>;
 def ext_main_used : Extension<
   "ISO C++ does not allow 'main' to be used by a program">, InGroup<Main>;
 
@@ -548,14 +622,14 @@ def err_pragma_options_align_mac68k_target_unsupported : Error<
 def warn_pragma_pack_invalid_alignment : Warning<
   "expected #pragma pack parameter to be '1', '2', '4', '8', or '16'">,
   InGroup<IgnoredPragmas>;
-// Follow the MSVC implementation.
+// Follow the Microsoft implementation.
 def warn_pragma_pack_show : Warning<"value of #pragma pack(show) == %0">;
 def warn_pragma_pack_pop_identifer_and_alignment : Warning<
   "specifying both a name and alignment to 'pop' is undefined">;
 def warn_pragma_pop_failed : Warning<"#pragma %0(pop, ...) failed: %1">,
   InGroup<IgnoredPragmas>;
 def warn_cxx_ms_struct :
-  Warning<"ms_struct may not produce MSVC-compatible layouts for classes "
+  Warning<"ms_struct may not produce Microsoft-compatible layouts for classes "
           "with base classes or virtual functions">,
   DefaultError, InGroup<IncompatibleMSStruct>;
 def err_section_conflict : Error<"%0 causes a section type conflict with %1">;
@@ -856,6 +930,9 @@ def warn_missing_explicit_synthesis : Warning <
 def warn_property_getter_owning_mismatch : Warning<
   "property declared as returning non-retained objects"
   "; getter returning retained objects">;
+def warn_property_redecl_getter_mismatch : Warning<
+  "getter name mismatch between property redeclaration (%1) and its original "
+  "declaration (%0)">, InGroup<PropertyAttr>;
 def error_property_setter_ambiguous_use : Error<
   "synthesized properties %0 and %1 both claim setter %2 -"
   " use of this setter will cause unexpected behavior">;
@@ -878,7 +955,8 @@ def err_property_type : Error<"property cannot have array or function type %0">;
 def error_missing_property_context : Error<
   "missing context for property implementation declaration">;
 def error_bad_property_decl : Error<
-  "property implementation must have its declaration in interface %0">;
+  "property implementation must have its declaration in interface %0 or one of "
+  "its extensions">;
 def error_category_property : Error<
   "property declared in category %0 cannot be implemented in "
   "class implementation">;
@@ -901,8 +979,6 @@ def error_bad_property_context : Error<
 def error_missing_property_ivar_decl : Error<
   "synthesized property %0 must either be named the same as a compatible"
   " instance variable or must explicitly name an instance variable">;
-def error_synthesize_weak_non_arc_or_gc : Error<
-  "@synthesize of 'weak' property is only allowed in ARC or GC mode">;
 def err_arc_perform_selector_retains : Error<
   "performSelector names a selector which retains the object">;
 def warn_arc_perform_selector_leaks : Warning<
@@ -1058,7 +1134,7 @@ def warn_template_qualified_friend_ignored : Warning<
 def ext_friend_tag_redecl_outside_namespace : ExtWarn<
   "unqualified friend declaration referring to type outside of the nearest "
   "enclosing namespace is a Microsoft extension; add a nested name specifier">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftUnqualifiedFriend>;
 def err_pure_friend : Error<"friend declaration cannot have a pure-specifier">;
 
 def err_invalid_member_in_interface : Error<
@@ -1096,10 +1172,15 @@ def err_type_defined_in_param_type : Error<
   "%0 cannot be defined in a parameter type">;
 def err_type_defined_in_alias_template : Error<
   "%0 cannot be defined in a type alias template">;
+def err_type_defined_in_condition : Error<
+  "%0 cannot be defined in a condition">;
 
 def note_pure_virtual_function : Note<
   "unimplemented pure virtual method %0 in %1">;
 
+def note_pure_qualified_call_kext : Note<
+  "qualified call to %0::%1 is treated as a virtual call to %1 due to -fapple-kext">;
+
 def err_deleted_decl_not_first : Error<
   "deleted definition must be first declaration">;
 
@@ -1134,21 +1215,25 @@ def err_rref_in_exception_spec : Error<
   "rvalue reference type %0 is not allowed in exception specification">;
 def err_mismatched_exception_spec : Error<
   "exception specification in declaration does not match previous declaration">;
-def ext_mismatched_exception_spec : ExtWarn<
-  "exception specification in declaration does not match previous declaration">,
-  InGroup<Microsoft>;
+def ext_mismatched_exception_spec : ExtWarn<err_mismatched_exception_spec.Text>,
+  InGroup<MicrosoftExceptionSpec>;
 def err_override_exception_spec : Error<
   "exception specification of overriding function is more lax than "
   "base version">;
-def ext_override_exception_spec : ExtWarn<
-  "exception specification of overriding function is more lax than "
-  "base version">, InGroup<Microsoft>;
+def ext_override_exception_spec : ExtWarn<err_override_exception_spec.Text>,
+  InGroup<MicrosoftExceptionSpec>;
 def err_incompatible_exception_specs : Error<
   "target exception specification is not superset of source">;
 def err_deep_exception_specs_differ : Error<
   "exception specifications of %select{return|argument}0 types differ">;
-def warn_missing_exception_specification : Warning<
+def err_missing_exception_specification : Error<
   "%0 is missing exception specification '%1'">;
+def ext_missing_exception_specification : ExtWarn<
+  err_missing_exception_specification.Text>,
+  InGroup<DiagGroup<"missing-exception-spec">>;
+def ext_ms_missing_exception_specification : ExtWarn<
+  err_missing_exception_specification.Text>,
+  InGroup<MicrosoftExceptionSpec>;
 def err_noexcept_needs_constant_expression : Error<
   "argument to noexcept specifier must be a constant expression">;
 def err_exception_spec_not_parsed : Error<
@@ -1162,7 +1247,7 @@ def err_access : Error<
 def ext_ms_using_declaration_inaccessible : ExtWarn<
   "using declaration referring to inaccessible member '%0' (which refers "
   "to accessible member '%1') is a Microsoft compatibility extension">,
-    AccessControl, InGroup<Microsoft>;
+    AccessControl, InGroup<MicrosoftUsingDecl>;
 def err_access_ctor : Error<
   "calling a %select{private|protected}0 constructor of class %2">, 
   AccessControl;
@@ -1268,7 +1353,7 @@ def err_mutable_function : Error<"'mutable' cannot be applied to functions">;
 def err_mutable_reference : Error<"'mutable' cannot be applied to references">;
 def ext_mutable_reference : ExtWarn<
   "'mutable' on a reference type is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftMutableReference>;
 def err_mutable_const : Error<"'mutable' and 'const' cannot be mixed">;
 def err_mutable_nonmember : Error<
   "'mutable' can only be applied to member variables">;
@@ -1304,7 +1389,7 @@ def err_non_virtual_pure : Error<
   "%0 is not virtual and cannot be declared pure">;
 def ext_pure_function_definition : ExtWarn<
   "function definition with pure-specifier is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftPureDefinition>;
 def err_implicit_object_parameter_init : Error<
   "cannot initialize object parameter of type %0 with an expression "
   "of type %1">;
@@ -1312,8 +1397,9 @@ def err_qualified_member_of_unrelated : Error<
   "%q0 is not a member of class %1">;
 
 def warn_call_to_pure_virtual_member_function_from_ctor_dtor : Warning<
-  "call to pure virtual member function %0; overrides of %0 in subclasses are "
-  "not available in the %select{constructor|destructor}1 of %2">;
+  "call to pure virtual member function %0 has undefined behavior; "
+  "overrides of %0 in subclasses are not available in the "
+  "%select{constructor|destructor}1 of %2">;
 
 def note_member_declared_at : Note<"member is declared here">;
 def note_ivar_decl : Note<"instance variable is declared here">;
@@ -1382,7 +1468,7 @@ def warn_cxx98_compat_static_data_member_in_union : Warning<
   InGroup<CXX98Compat>, DefaultIgnore;
 def ext_union_member_of_reference_type : ExtWarn<
   "union member %0 has reference type %1, which is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftUnionMemberReference>;
 def err_union_member_of_reference_type : Error<
   "union member %0 has reference type %1">;
 def ext_anonymous_struct_union_qualified : Extension<
@@ -1437,7 +1523,8 @@ def warn_no_constructor_for_refconst : Warning<
 def note_refconst_member_not_initialized : Note<
   "%select{const|reference}0 member %1 will never be initialized">;
 def ext_ms_explicit_constructor_call : ExtWarn<
-  "explicit constructor calls are a Microsoft extension">, InGroup<Microsoft>;
+  "explicit constructor calls are a Microsoft extension">,
+  InGroup<MicrosoftExplicitConstructorCall>;
 
 // C++ destructors
 def err_destructor_not_member : Error<
@@ -1649,18 +1736,22 @@ def warn_cxx98_compat_auto_type_specifier : Warning<
   "'auto' type specifier is incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
 def err_auto_variable_cannot_appear_in_own_initializer : Error<
-  "variable %0 declared with 'auto' type cannot appear in its own initializer">;
+  "variable %0 declared with %select{'auto'|'decltype(auto)'|'__auto_type'}1 "
+  "type cannot appear in its own initializer">;
 def err_illegal_decl_array_of_auto : Error<
   "'%0' declared as array of %1">;
 def err_new_array_of_auto : Error<
   "cannot allocate array of 'auto'">;
 def err_auto_not_allowed : Error<
-  "%select{'auto'|'decltype(auto)'}0 not allowed %select{in function prototype"
-  "|in non-static struct member"
-  "|in non-static union member|in non-static class member|in interface member"
+  "%select{'auto'|'decltype(auto)'|'__auto_type'}0 not allowed "
+  "%select{in function prototype"
+  "|in non-static struct member|in struct member"
+  "|in non-static union member|in union member"
+  "|in non-static class member|in interface member"
   "|in exception declaration|in template parameter|in block literal"
   "|in template argument|in typedef|in type alias|in function return type"
-  "|in conversion function type|here|in lambda parameter}1">;
+  "|in conversion function type|here|in lambda parameter"
+  "|in type allocated by 'new'|in K&R-style function parameter}1">;
 def err_auto_not_allowed_var_inst : Error<
   "'auto' variable template instantiation is not allowed">;
 def err_auto_var_requires_init : Error<
@@ -1674,12 +1765,8 @@ def err_auto_var_init_no_expression : Error<
 def err_auto_var_init_multiple_expressions : Error<
   "initializer for variable %0 with type %1 contains multiple expressions">;
 def err_auto_var_init_paren_braces : Error<
-  "cannot deduce type for variable %0 with type %1 from "
-  "parenthesized initializer list">;
-def warn_auto_var_direct_list_init : Warning<
-  "direct list initialization of a variable with a deduced type will change "
-  "meaning in a future version of Clang; insert an '=' to avoid a change in "
-  "behavior">, InGroup<FutureCompat>;
+  "cannot deduce type for variable %1 with type %2 from "
+  "%select{parenthesized|nested}0 initializer list">;
 def err_auto_new_ctor_multiple_expressions : Error<
   "new expression for type %0 contains multiple constructor arguments">;
 def err_auto_missing_trailing_return : Error<
@@ -1698,8 +1785,8 @@ def err_auto_var_deduction_failure_from_init_list : Error<
 def err_auto_new_deduction_failure : Error<
   "new expression for type %0 has incompatible constructor argument of type %1">;
 def err_auto_different_deductions : Error<
-  "'%select{auto|decltype(auto)}0' deduced as %1 in declaration of %2 and "
-  "deduced as %3 in declaration of %4">;
+  "'%select{auto|decltype(auto)|__auto_type}0' deduced as %1 in declaration "
+  "of %2 and deduced as %3 in declaration of %4">;
 def err_implied_std_initializer_list_not_found : Error<
   "cannot deduce type of initializer list because std::initializer_list was "
   "not found; include <initializer_list>">;
@@ -1709,6 +1796,10 @@ def warn_dangling_std_initializer_list : Warning<
   "array backing the initializer list will be destroyed at the end of "
   "%select{the full-expression|the constructor}0">,
   InGroup<DiagGroup<"dangling-initializer-list">>;
+def err_auto_init_list_from_c : Error<
+  "cannot use __auto_type with initializer list in C">;
+def err_auto_bitfield : Error<
+  "cannot pass bit-field as __auto_type initializer in C">;
 
 // C++1y decltype(auto) type
 def err_decltype_auto_cannot_be_combined : Error<
@@ -1768,7 +1859,7 @@ def err_enumerator_too_large : Error<
   "enumerator value is not representable in the underlying type %0">;
 def ext_enumerator_too_large : ExtWarn<
   "enumerator value is not representable in the underlying type %0">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftEnumValue>;
 def err_enumerator_wrapped : Error<
   "enumerator value %0 is not representable in the underlying type %1">;
 def err_enum_redeclare_type_mismatch : Error<
@@ -1961,11 +2052,34 @@ def warn_private_extern : Warning<
 def note_private_extern : Note<
   "use __attribute__((visibility(\"hidden\"))) attribute instead">;
 
+// C++ Concepts TS
+def err_concept_wrong_decl_kind : Error<
+  "'concept' can only appear on the definition of a function template or variable template">;
+def err_concept_decls_may_only_appear_in_namespace_scope : Error<
+  "concept declarations may only appear in namespace scope">;
+def err_function_concept_not_defined : Error<
+  "function concept declaration must be a definition">;
+def err_var_concept_not_initialized : Error<
+  "variable concept declaration must be initialized">;
+def err_function_concept_exception_spec : Error<
+  "function concept cannot have exception specification">;
+def err_concept_decl_invalid_specifiers : Error<
+  "%select{variable|function}0 concept cannot be declared "
+  "'%select{thread_local|inline|friend|constexpr}1'">;
+def err_function_concept_with_params : Error<
+  "function concept cannot have any parameters">;
+
 // C++11 char16_t/char32_t
 def warn_cxx98_compat_unicode_type : Warning<
   "'%0' type specifier is incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
- 
+
+// __make_integer_seq
+def err_integer_sequence_negative_length : Error<
+  "integer sequences must have non-negative sequence length">;
+def err_integer_sequence_integral_element_type : Error<
+  "integer sequences must have integral element type">;
+
 // Objective-C++
 def err_objc_decls_may_only_appear_in_global_scope : Error<
   "Objective-C declarations may only appear in global scope">;
@@ -2016,8 +2130,7 @@ def err_attribute_argument_type : Error<
   "%0 attribute requires %select{int or bool|an integer "
   "constant|a string|an identifier}1">;
 def err_attribute_argument_outof_range : Error<
-  "init_priority attribute requires integer constant between "
-  "101 and 65535 inclusive">;
+  "%0 attribute requires integer constant between %1 and %2 inclusive">;
 def err_init_priority_object_attr : Error<
   "can only use 'init_priority' attribute on file-scope definitions "
   "of objects of class type">;
@@ -2025,10 +2138,12 @@ def err_attribute_argument_vec_type_hint : Error<
   "invalid attribute argument %0 - expecting a vector or vectorizable scalar type">;
 def err_attribute_argument_out_of_bounds : Error<
   "%0 attribute parameter %1 is out of bounds">;
+def err_attribute_only_once_per_parameter : Error<
+  "%0 attribute can only be applied once per parameter">;
 def err_attribute_uuid_malformed_guid : Error<
   "uuid attribute contains a malformed GUID">;
 def warn_attribute_pointers_only : Warning<
-  "%0 attribute only applies to pointer arguments">,
+  "%0 attribute only applies to%select{| constant}1 pointer arguments">,
   InGroup<IgnoredAttributes>;
 def err_attribute_pointers_only : Error<warn_attribute_pointers_only.Text>;
 def warn_attribute_return_pointers_only : Warning<
@@ -2066,9 +2181,9 @@ def err_attribute_invalid_size : Error<
 def err_attribute_zero_size : Error<"zero vector size">;
 def err_attribute_size_too_large : Error<"vector size too large">;
 def err_typecheck_vector_not_convertable : Error<
-  "can't convert between vector values of different size (%0 and %1)">;
+  "cannot convert between vector values of different size (%0 and %1)">;
 def err_typecheck_vector_not_convertable_non_scalar : Error<
-  "can't convert between vector and non-scalar values (%0 and %1)">;
+  "cannot convert between vector and non-scalar values (%0 and %1)">;
 def err_typecheck_vector_lengths_not_equal : Error<
   "vector operands do not have the same number of elements (%0 and %1)">;
 def err_ext_vector_component_exceeds_length : Error<
@@ -2091,25 +2206,18 @@ def err_field_with_address_space : Error<
   "field may not be qualified with an address space">;
 def err_attr_objc_ownership_redundant : Error<
   "the type %0 is already explicitly ownership-qualified">;
-def err_undeclared_nsnumber : Error<
-  "NSNumber must be available to use Objective-C literals">;
-def err_undeclared_nsvalue : Error<
-  "NSValue must be available to use Objective-C boxed expressions">;
 def err_invalid_nsnumber_type : Error<
   "%0 is not a valid literal type for NSNumber">;
-def err_undeclared_nsstring : Error<
-  "cannot box a string value because NSString has not been declared">;
 def err_objc_illegal_boxed_expression_type : Error<
   "illegal type %0 used in a boxed expression">;
 def err_objc_non_trivially_copyable_boxed_expression_type : Error<
   "non-trivially copyable type %0 cannot be used in a boxed expression">;
 def err_objc_incomplete_boxed_expression_type : Error<
   "incomplete type %0 used in a boxed expression">;
-def err_undeclared_nsarray : Error<
-  "NSArray must be available to use Objective-C array literals">;
-def err_undeclared_nsdictionary : Error<
-  "NSDictionary must be available to use Objective-C dictionary "
-  "literals">;
+def err_undeclared_objc_literal_class : Error<
+  "definition of class %0 must be available to use Objective-C "
+  "%select{array literals|dictionary literals|numeric literals|boxed expressions|"
+  "string literals}1">;
 def err_undeclared_boxing_method : Error<
   "declaration of %0 is missing in %1 class">;
 def err_objc_literal_method_sig : Error<
@@ -2258,6 +2366,8 @@ def err_attribute_dll_not_extern : Error<
   "%q0 must have external linkage when declared %q1">;
 def err_attribute_dll_thread_local : Error<
   "%q0 cannot be thread local when declared %q1">;
+def err_attribute_dll_lambda : Error<
+  "lambda cannot be declared %0">;
 def warn_attribute_invalid_on_definition : Warning<
   "'%0' attribute cannot be specified on a definition">,
   InGroup<IgnoredAttributes>;
@@ -2323,7 +2433,7 @@ def warn_attribute_wrong_decl_type : Warning<
   "Objective-C instance methods|init methods of interface or class extension declarations|"
   "variables, functions and classes|Objective-C protocols|"
   "functions and global variables|structs, unions, and typedefs|structs and typedefs|"
-  "interface or protocol declarations|kernel functions}1">,
+  "interface or protocol declarations|kernel functions|non-K&R-style functions}1">,
   InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Text>;
 def warn_type_attribute_wrong_type : Warning<
@@ -2359,6 +2469,9 @@ def err_cconv_varargs : Error<
 def warn_cconv_varargs : Warning<
   "%0 calling convention ignored on variadic function">,
   InGroup<IgnoredAttributes>;
+def warn_cconv_structors : Warning<
+  "%0 calling convention ignored on constructor/destructor">,
+  InGroup<IgnoredAttributes>;
 def err_regparm_mismatch : Error<"function declared with regparm(%0) "
   "attribute was previously declared "
   "%plural{0:without the regparm|:with the regparm(%1)}1 attribute">;
@@ -2375,6 +2488,8 @@ def warn_attribute_not_on_decl : Warning<
   "%0 attribute ignored when parsing type">, InGroup<IgnoredAttributes>;
 def err_base_specifier_attribute : Error<
   "%0 attribute cannot be applied to a base specifier">;
+def err_invalid_attribute_on_virtual_function : Error<
+  "%0 attribute cannot be applied to virtual functions">;
 
 // Availability attribute
 def warn_availability_unknown_platform : Warning<
@@ -2386,15 +2501,19 @@ def warn_availability_version_ordering : Warning<
 def warn_mismatched_availability: Warning<
   "availability does not match previous declaration">, InGroup<Availability>;
 def warn_mismatched_availability_override : Warning<
-  "overriding method %select{introduced after|"
-  "deprecated before|obsoleted before}0 overridden method on %1 (%2 vs. %3)">, 
-  InGroup<Availability>;
+  "%select{|overriding }4method %select{introduced after|"
+  "deprecated before|obsoleted before}0 "
+  "%select{the protocol method it implements|overridden method}4 "
+  "on %1 (%2 vs. %3)">, InGroup<Availability>;
 def warn_mismatched_availability_override_unavail : Warning<
-  "overriding method cannot be unavailable on %0 when its overridden method is "
+  "%select{|overriding }1method cannot be unavailable on %0 when "
+  "%select{the protocol method it implements|its overridden method}1 is "
   "available">,
   InGroup<Availability>;
 def note_overridden_method : Note<
   "overridden method is here">;
+def note_protocol_method : Note<
+  "protocol method is here">;
 
 // Thread Safety Attributes
 def warn_invalid_capability_name : Warning<
@@ -2559,6 +2678,9 @@ def warn_impcast_complex_scalar : Warning<
 def warn_impcast_float_precision : Warning<
   "implicit conversion loses floating-point precision: %0 to %1">,
   InGroup<Conversion>, DefaultIgnore;
+def warn_impcast_double_promotion : Warning<
+  "implicit conversion increases floating-point precision: %0 to %1">,
+  InGroup<DoublePromotion>, DefaultIgnore;
 def warn_impcast_float_integer : Warning<
   "implicit conversion turns floating-point number into integer: %0 to %1">,
   InGroup<FloatConversion>, DefaultIgnore;
@@ -2601,13 +2723,16 @@ def warn_impcast_null_pointer_to_integer : Warning<
 def warn_impcast_floating_point_to_bool : Warning<
     "implicit conversion turns floating-point number into bool: %0 to %1">,
     InGroup<ImplicitConversionFloatingPointToBool>;
+def ext_ms_impcast_fn_obj : ExtWarn<
+  "implicit conversion between pointer-to-function and pointer-to-object is a "
+  "Microsoft extension">, InGroup<MicrosoftCast>;
 
 def warn_impcast_pointer_to_bool : Warning<
     "address of%select{| function| array}0 '%1' will always evaluate to "
     "'true'">,
     InGroup<PointerBoolConversion>;
 def warn_cast_nonnull_to_bool : Warning<
-    "nonnull parameter '%0' will evaluate to "
+    "nonnull %select{function call|parameter}0 '%1' will evaluate to "
     "'true' on first encounter">,
     InGroup<PointerBoolConversion>;
 def warn_this_bool_conversion : Warning<
@@ -2622,9 +2747,10 @@ def warn_null_pointer_compare : Warning<
     "comparison of %select{address of|function|array}0 '%1' %select{not |}2"
     "equal to a null pointer is always %select{true|false}2">,
     InGroup<TautologicalPointerCompare>;
-def warn_nonnull_parameter_compare : Warning<
-    "comparison of nonnull parameter '%0' %select{not |}1"
-    "equal to a null pointer is %select{true|false}1 on first encounter">,
+def warn_nonnull_expr_compare : Warning<
+    "comparison of nonnull %select{function call|parameter}0 '%1' "
+    "%select{not |}2equal to a null pointer is '%select{true|false}2' on first "
+    "encounter">,
     InGroup<TautologicalPointerCompare>;
 def warn_this_null_compare : Warning<
   "'this' pointer cannot be null in well-defined C++ code; comparison may be "
@@ -2664,9 +2790,10 @@ def warn_int_to_void_pointer_cast : Warning<
   "cast to %1 from smaller integer type %0">,
   InGroup<IntToVoidPointerCast>;
 
-def warn_attribute_ignored_for_field_of_type : Warning<
-  "%0 attribute ignored for field of type %1">,
-  InGroup<IgnoredAttributes>;
+def warn_attribute_packed_for_bitfield : Warning<
+  "'packed' attribute was ignored on bit-fields with single-byte alignment "
+  "in older versions of GCC and Clang">,
+  InGroup<DiagGroup<"attribute-packed-for-bitfield">>;
 def warn_transparent_union_attribute_field_size_align : Warning<
   "%select{alignment|size}0 of field %1 (%2 bits) does not match the "
   "%select{alignment|size}0 of the first field in transparent union; "
@@ -2710,6 +2837,10 @@ def err_mode_not_primitive : Error<
   "mode attribute only supported for integer and floating-point types">;
 def err_mode_wrong_type : Error<
   "type of machine mode does not match type of base type">;
+def warn_vector_mode_deprecated : Warning<
+  "specifying vector types with the 'mode' attribute is deprecated; "
+  "use the 'vector_size' attribute instead">,
+  InGroup<DeprecatedAttributes>;
 def err_complex_mode_vector_type : Error<
   "type of machine mode does not support base vector types">;
 def err_attr_wrong_decl : Error<
@@ -2852,7 +2983,8 @@ def err_param_default_argument : Error<
 def err_param_default_argument_redefinition : Error<
   "redefinition of default argument">;
 def ext_param_default_argument_redefinition : ExtWarn<
-  "redefinition of default argument">, InGroup<Microsoft>;
+  err_param_default_argument_redefinition.Text>,
+  InGroup<MicrosoftDefaultArgRedefinition>;
 def err_param_default_argument_missing : Error<
   "missing default argument on parameter">;
 def err_param_default_argument_missing_name : Error<
@@ -2877,7 +3009,7 @@ def err_param_default_argument_on_parameter_pack : Error<
   "parameter pack cannot have a default argument">;
 def err_uninitialized_member_for_assign : Error<
   "cannot define the implicit copy assignment operator for %0, because "
-  "non-static %select{reference|const}1 member %2 can't use copy "
+  "non-static %select{reference|const}1 member %2 cannot use copy "
   "assignment operator">;
 def err_uninitialized_member_in_ctor : Error<
   "%select{|implicit default |inheriting }0constructor for %1 must explicitly "
@@ -2961,14 +3093,22 @@ def note_ovl_candidate_instantiation_depth : Note<
     "candidate template ignored: substitution exceeded maximum template "
     "instantiation depth">;
 def note_ovl_candidate_underqualified : Note<
-    "candidate template ignored: can't deduce a type for %0 that would "
+    "candidate template ignored: cannot deduce a type for %0 that would "
     "make %2 equal %1">;
 def note_ovl_candidate_substitution_failure : Note<
     "candidate template ignored: substitution failure%0%1">;
 def note_ovl_candidate_disabled_by_enable_if : Note<
     "candidate template ignored: disabled by %0%1">;
+def note_ovl_candidate_has_pass_object_size_params: Note<
+    "candidate address cannot be taken because parameter %0 has "
+    "pass_object_size attribute">;
 def note_ovl_candidate_disabled_by_enable_if_attr : Note<
     "candidate disabled: %0">;
+def err_addrof_function_disabled_by_enable_if_attr : Error<
+    "cannot take address of function %0 becuase it has one or more "
+    "non-tautological enable_if conditions">;
+def note_addrof_ovl_candidate_disabled_by_enable_if_attr : Note<
+    "candidate function made ineligible by enable_if">;
 def note_ovl_candidate_failed_overload_resolution : Note<
     "candidate template ignored: couldn't resolve reference to overloaded "
     "function %0">;
@@ -2979,7 +3119,7 @@ def note_ovl_candidate_non_deduced_mismatch : Note<
 // can handle that case properly.
 def note_ovl_candidate_non_deduced_mismatch_qualified : Note<
     "candidate template ignored: could not match %q0 against %q1">;
-    
+
 // Note that we don't treat templates differently for this diagnostic.
 def note_ovl_candidate_arity : Note<"candidate "
     "%select{function|function|constructor|function|function|constructor|"
@@ -3251,7 +3391,7 @@ def err_addr_ovl_ambiguous : Error<
 def err_addr_ovl_not_func_ptrref : Error<
   "address of overloaded function %0 cannot be converted to type %1">;
 def err_addr_ovl_no_qualifier : Error<
-  "can't form member pointer of type %0 without '&' and class name">;
+  "cannot form member pointer of type %0 without '&' and class name">;
 
 // C++11 Literal Operators
 def err_ovl_no_viable_literal_operator : Error<
@@ -3344,11 +3484,12 @@ def note_template_decl_here : Note<"template is declared here">;
 def err_template_arg_must_be_type : Error<
   "template argument for template type parameter must be a type">;
 def err_template_arg_must_be_type_suggest : Error<
-  "template argument for template type parameter must be a type; did you forget 'typename'?">;
+  "template argument for template type parameter must be a type; "
+  "did you forget 'typename'?">;
 def ext_ms_template_type_arg_missing_typename : ExtWarn<
   "template argument for template type parameter must be a type; "
   "omitted 'typename' is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftTemplate>;
 def err_template_arg_must_be_expr : Error<
   "template argument for non-type template parameter must be an expression">;
 def err_template_arg_nontype_ambig : Error<
@@ -3468,10 +3609,10 @@ def err_pointer_to_member_oper_value_classify: Error<
   "%select{rvalue|lvalue}1">;
 def ext_ms_deref_template_argument: ExtWarn<
   "non-type template argument containing a dereference operation is a "
-  "Microsoft extension">, InGroup<Microsoft>;
+  "Microsoft extension">, InGroup<MicrosoftTemplate>;
 def ext_ms_delayed_template_argument: ExtWarn<
   "using the undeclared type %0 as a default template argument is a "
-  "Microsoft extension">, InGroup<Microsoft>;
+  "Microsoft extension">, InGroup<MicrosoftTemplate>;
 
 // C++ template specialization
 def err_template_spec_unknown_kind : Error<
@@ -3519,7 +3660,7 @@ def ext_ms_template_spec_redecl_out_of_scope: ExtWarn<
   "variable template partial|function template|member "
   "function|static data member|member class|member enumeration}0 "
   "specialization of %1 outside namespace enclosing %2 "
-  "is a Microsoft extension">, InGroup<Microsoft>;
+  "is a Microsoft extension">, InGroup<MicrosoftTemplate>;
 def err_template_spec_redecl_global_scope : Error<
   "%select{class template|class template partial|variable template|"
   "variable template partial|function template|member "
@@ -3544,7 +3685,7 @@ def err_function_specialization_in_class : Error<
   "cannot specialize a function %0 within class scope">;
 def ext_function_specialization_in_class : ExtWarn<
   "explicit specialization of %0 within class scope is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftTemplate>;
 def ext_explicit_specialization_storage_class : ExtWarn<
   "explicit specialization cannot have a storage class">;
 def err_explicit_specialization_inconsistent_storage_class : Error<
@@ -3705,7 +3846,7 @@ def err_explicit_instantiation_duplicate : Error<
     "duplicate explicit instantiation of %0">;
 def ext_explicit_instantiation_duplicate : ExtWarn<
     "duplicate explicit instantiation of %0 ignored as a Microsoft extension">,
-    InGroup<Microsoft>;
+    InGroup<MicrosoftTemplate>;
 def note_previous_explicit_instantiation : Note<
     "previous explicit instantiation is here">;
 def ext_explicit_instantiation_after_specialization : Extension<
@@ -3790,10 +3931,11 @@ def err_invalid_var_template_spec_type : Error<"type %2 "
   "partial specialization|redeclaration}0 of %1 does not match"
   " expected type %3">;
 def err_mismatched_exception_spec_explicit_instantiation : Error<
-  "exception specification in explicit instantiation does not match instantiated one">;
+  "exception specification in explicit instantiation does not match "
+  "instantiated one">;
 def ext_mismatched_exception_spec_explicit_instantiation : ExtWarn<
-  "exception specification in explicit instantiation does not match instantiated one">,
-  InGroup<Microsoft>;
+  err_mismatched_exception_spec_explicit_instantiation.Text>,
+  InGroup<MicrosoftExceptionSpec>;
   
 // C++ typename-specifiers
 def err_typename_nested_not_found : Error<"no type named %0 in %1">;
@@ -3911,10 +4053,10 @@ def err_undeclared_var_use : Error<"use of undeclared identifier %0">;
 def ext_undeclared_unqual_id_with_dependent_base : ExtWarn<
   "use of undeclared identifier %0; "
   "unqualified lookup into dependent bases of class template %1 is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftTemplate>;
 def ext_found_via_dependent_bases_lookup : ExtWarn<"use of identifier %0 "
   "found via unqualified lookup into dependent bases of class templates is a "
-  "Microsoft extension">, InGroup<Microsoft>;
+  "Microsoft extension">, InGroup<MicrosoftTemplate>;
 def note_dependent_var_use : Note<"must qualify identifier to find this "
     "declaration in dependent base class">;
 def err_not_found_by_two_phase_lookup : Error<"call to function %0 that is neither "
@@ -4034,6 +4176,12 @@ def warn_undefined_inline : Warning<"inline function %q0 is not defined">,
   InGroup<DiagGroup<"undefined-inline">>;
 def note_used_here : Note<"used here">;
 
+def err_internal_linkage_redeclaration : Error<
+  "'internal_linkage' attribute does not appear on the first declaration of %0">;
+def warn_internal_linkage_local_storage : Warning<
+  "'internal_linkage' attribute on a non-static local variable is ignored">,
+  InGroup<IgnoredAttributes>;
+
 def ext_internal_in_extern_inline : ExtWarn<
   "static %select{function|variable}0 %1 is used in an inline function with "
   "external linkage">, InGroup<StaticInInline>;
@@ -4071,7 +4219,8 @@ def note_extern_c_global_conflict : Note<
 def warn_weak_import : Warning <
   "an already-declared variable is made a weak_import declaration %0">;
 def ext_static_non_static : Extension<
-  "redeclaring non-static %0 as static is a Microsoft extension">, InGroup<Microsoft>;
+  "redeclaring non-static %0 as static is a Microsoft extension">,
+  InGroup<MicrosoftRedeclareStatic>;
 def err_non_static_static : Error<
   "non-static declaration of %0 follows static declaration">;
 def err_extern_non_extern : Error<
@@ -4110,6 +4259,11 @@ def err_dependent_tag_decl : Error<
 def err_tag_definition_of_typedef : Error<
   "definition of type %0 conflicts with %select{typedef|type alias}1 of the same name">;
 def err_conflicting_types : Error<"conflicting types for %0">;
+def err_different_pass_object_size_params : Error<
+  "conflicting pass_object_size attributes on parameters">;
+def err_late_asm_label_name : Error<
+  "cannot apply asm label to %select{variable|function}0 after its first use">;
+def err_different_asm_label : Error<"conflicting asm label">;
 def err_nested_redefinition : Error<"nested redefinition of %0">;
 def err_use_with_wrong_tag : Error<
   "use of %0 with tag type that does not match previous declaration">;
@@ -4129,9 +4283,11 @@ def ext_forward_ref_enum : Extension<
 def err_forward_ref_enum : Error<
   "ISO C++ forbids forward references to 'enum' types">;
 def ext_ms_forward_ref_enum : Extension<
-  "forward references to 'enum' types are a Microsoft extension">, InGroup<Microsoft>;
+  "forward references to 'enum' types are a Microsoft extension">,
+  InGroup<MicrosoftEnumForwardReference>;
 def ext_forward_ref_enum_def : Extension<
-  "redeclaration of already-defined enum %0 is a GNU extension">, InGroup<GNURedeclaredEnum>;
+  "redeclaration of already-defined enum %0 is a GNU extension">,
+  InGroup<GNURedeclaredEnum>;
   
 def err_redefinition_of_enumerator : Error<"redefinition of enumerator %0">;
 def err_duplicate_member : Error<"duplicate member %0">;
@@ -4172,15 +4328,15 @@ def warn_array_new_too_large : Warning<"array is too large (%0 elements)">,
 // -Wpadded, -Wpacked
 def warn_padded_struct_field : Warning<
   "padding %select{struct|interface|class}0 %1 with %2 "
-  "%select{byte|bit}3%select{|s}4 to align %5">,
+  "%select{byte|bit}3%s2 to align %4">,
   InGroup<Padded>, DefaultIgnore;
 def warn_padded_struct_anon_field : Warning<
   "padding %select{struct|interface|class}0 %1 with %2 "
-  "%select{byte|bit}3%select{|s}4 to align anonymous bit-field">,
+  "%select{byte|bit}3%s2 to align anonymous bit-field">,
   InGroup<Padded>, DefaultIgnore;
 def warn_padded_struct_size : Warning<
-  "padding size of %0 with %1 %select{byte|bit}2%select{|s}3 "
-  "to alignment boundary">, InGroup<Padded>, DefaultIgnore;
+  "padding size of %0 with %1 %select{byte|bit}2%s1 to alignment boundary">,
+  InGroup<Padded>, DefaultIgnore;
 def warn_unnecessary_packed : Warning<
   "packed attribute is unnecessary for %0">, InGroup<Packed>, DefaultIgnore;
 
@@ -4292,20 +4448,22 @@ def err_bitfield_has_negative_width : Error<
 def err_anon_bitfield_has_negative_width : Error<
   "anonymous bit-field has negative width (%0)">;
 def err_bitfield_has_zero_width : Error<"named bit-field %0 has zero width">;
-def err_bitfield_width_exceeds_type_size : Error<
-  "size of bit-field %0 (%1 bits) exceeds size of its type (%2 bits)">;
-def err_anon_bitfield_width_exceeds_type_size : Error<
-  "size of anonymous bit-field (%0 bits) exceeds size of its type (%1 bits)">;
+def err_bitfield_width_exceeds_type_width : Error<
+  "width of bit-field %0 (%1 bits) exceeds %select{width|size}2 "
+  "of its type (%3 bit%s3)">;
+def err_anon_bitfield_width_exceeds_type_width : Error<
+  "width of anonymous bit-field (%0 bits) exceeds %select{width|size}1 "
+  "of its type (%2 bit%s2)">;
 def err_incorrect_number_of_vector_initializers : Error<
   "number of elements must be either one or match the size of the vector">;
 
 // Used by C++ which allows bit-fields that are wider than the type.
-def warn_bitfield_width_exceeds_type_size: Warning<
-  "size of bit-field %0 (%1 bits) exceeds the size of its type; value will be "
-  "truncated to %2 bits">;
-def warn_anon_bitfield_width_exceeds_type_size : Warning<
-  "size of anonymous bit-field (%0 bits) exceeds size of its type; value will "
-  "be truncated to %1 bits">;
+def warn_bitfield_width_exceeds_type_width: Warning<
+  "width of bit-field %0 (%1 bits) exceeds the width of its type; value will "
+  "be truncated to %2 bit%s2">, InGroup<BitFieldWidth>;
+def warn_anon_bitfield_width_exceeds_type_width : Warning<
+  "width of anonymous bit-field (%0 bits) exceeds width of its type; value "
+  "will be truncated to %1 bit%s1">, InGroup<BitFieldWidth>;
 
 def warn_missing_braces : Warning<
   "suggest braces around initialization of subobject">,
@@ -4324,7 +4482,7 @@ def err_goto_into_protected_scope : Error<
   "cannot jump from this goto statement to its label">;
 def ext_goto_into_protected_scope : ExtWarn<
   "jump from this goto statement to its label is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftGoto>;
 def warn_cxx98_compat_goto_into_protected_scope : Warning<
   "jump from this goto statement to its label is incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
@@ -4378,8 +4536,10 @@ def note_protected_by_seh_finally : Note<
   "jump bypasses initialization of __finally block">;
 def note_protected_by___block : Note<
   "jump bypasses setup of __block variable">;
-def note_protected_by_objc_ownership : Note<
-  "jump bypasses initialization of retaining variable">;
+def note_protected_by_objc_strong_init : Note<
+  "jump bypasses initialization of __strong variable">;
+def note_protected_by_objc_weak_init : Note<
+  "jump bypasses initialization of __weak variable">;
 def note_enters_block_captures_cxx_obj : Note<
   "jump enters lifetime of block which captures a destructible C++ object">;
 def note_enters_block_captures_strong : Note<
@@ -4416,8 +4576,10 @@ def note_exits_seh_finally : Note<
   "jump exits __finally block">;
 def note_exits_objc_autoreleasepool : Note<
   "jump exits autoreleasepool block">;
-def note_exits_objc_ownership : Note<
-  "jump exits scope of retaining variable">;
+def note_exits_objc_strong : Note<
+  "jump exits scope of __strong variable">;
+def note_exits_objc_weak : Note<
+  "jump exits scope of __weak variable">;
 def note_exits_block_captures_cxx_obj : Note<
   "jump exits lifetime of block which captures a destructible C++ object">;
 def note_exits_block_captures_strong : Note<
@@ -4457,12 +4619,12 @@ def err_flexible_array_init : Error<
 def ext_flexible_array_empty_aggregate_ms : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftFlexibleArray>;
 def err_flexible_array_union : Error<
   "flexible array member %0 in a union is not allowed">;
 def ext_flexible_array_union_ms : Extension<
   "flexible array member %0 in a union is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftFlexibleArray>;
 def ext_flexible_array_empty_aggregate_gnu : Extension<
   "flexible array member %0 in otherwise empty "
   "%select{struct|interface|union|class|enum}1 is a GNU extension">,
@@ -4477,7 +4639,15 @@ let CategoryName = "ARC Semantic Issue" in {
 let CategoryName = "ARC Weak References" in {
 
 def err_arc_weak_no_runtime : Error<
-  "the current deployment target does not support automated __weak references">;
+  "cannot create __weak reference because the current deployment target "
+  "does not support weak references">;
+def err_arc_weak_disabled : Error<
+  "cannot create __weak reference in file using manual reference counting">;
+def err_synthesizing_arc_weak_property_disabled : Error<
+  "cannot synthesize weak property in file using manual reference counting">;
+def err_synthesizing_arc_weak_property_no_runtime : Error<
+  "cannot synthesize weak property because the current deployment target "
+  "does not support weak references">;
 def err_arc_unsupported_weak_class : Error<
   "class is incompatible with __weak references">;
 def err_arc_weak_unavailable_assign : Error<
@@ -4495,6 +4665,21 @@ def err_arc_convesion_of_weak_unavailable : Error<
 
 let CategoryName = "ARC Restrictions" in {
 
+def err_unavailable_in_arc : Error<
+  "%0 is unavailable in ARC">;
+def note_arc_forbidden_type : Note<
+  "declaration uses type that is ill-formed in ARC">;
+def note_performs_forbidden_arc_conversion : Note<
+  "inline function performs a conversion which is forbidden in ARC">;
+def note_arc_init_returns_unrelated : Note<
+  "init method must return a type related to its receiver type">;
+def note_arc_weak_disabled : Note<
+  "declaration uses __weak, but ARC is disabled">;
+def note_arc_weak_no_runtime : Note<"declaration uses __weak, which "
+  "the current deployment target does not support">;
+def note_arc_field_with_ownership : Note<
+  "field has non-trivial ownership qualification">;
+
 def err_arc_illegal_explicit_message : Error<
   "ARC forbids explicit message send of %0">;
 def err_arc_unused_init_message : Error<
@@ -4540,7 +4725,7 @@ def err_typecheck_arc_assign_self : Error<
 def err_typecheck_arc_assign_self_class_method : Error<
   "cannot assign to 'self' in a class method">;
 def err_typecheck_arr_assign_enumeration : Error<
-  "fast enumeration variables can't be modified in ARC by default; "
+  "fast enumeration variables cannot be modified in ARC by default; "
   "declare the variable __strong to allow this">;
 def warn_arc_retained_assign : Warning<
   "assigning retained object to %select{weak|unsafe_unretained}0 "
@@ -4620,7 +4805,7 @@ def err_arc_strong_property_ownership : Error<
   "existing instance variable %1 for strong property %0 may not be "
   "%select{|__unsafe_unretained||__weak}2">;
 def err_arc_assign_property_ownership : Error<
-  "existing instance variable %1 for property %0 with %select{unsafe_unretained| assign}2 "
+  "existing instance variable %1 for property %0 with %select{unsafe_unretained|assign}2 "
   "attribute must be __unsafe_unretained">;
 def err_arc_inconsistent_property_ownership : Error<
   "%select{|unsafe_unretained|strong|weak}1 property %0 may not also be "
@@ -4724,8 +4909,8 @@ def err_sizeof_alignof_function_type : Error<
   "function type">;
 def err_openmp_default_simd_align_expr : Error<
   "invalid application of '__builtin_omp_required_simd_align' to an expression, only type is allowed">;
-def err_sizeof_alignof_bitfield : Error<
-  "invalid application of '%select{sizeof|alignof}0' to bit-field">;
+def err_sizeof_alignof_typeof_bitfield : Error<
+  "invalid application of '%select{sizeof|alignof|typeof}0' to bit-field">;
 def err_alignof_member_of_incomplete_type : Error<
   "invalid application of 'alignof' to a field of a class still being defined">;
 def err_vecstep_non_scalar_vector_type : Error<
@@ -4753,9 +4938,8 @@ def warn_floatingpoint_eq : Warning<
   "comparing floating point with == or != is unsafe">,
   InGroup<DiagGroup<"float-equal">>, DefaultIgnore;
 
-def warn_division_by_zero : Warning<"division by zero is undefined">,
-  InGroup<DivZero>;
-def warn_remainder_by_zero : Warning<"remainder by zero is undefined">,
+def warn_remainder_division_by_zero : Warning<
+  "%select{remainder|division}0 by zero is undefined">,
   InGroup<DivZero>;
 def warn_shift_lhs_negative : Warning<"shifting a negative signed value is undefined">,
   InGroup<DiagGroup<"shift-negative-value">>;
@@ -4793,8 +4977,8 @@ def note_logical_instead_of_bitwise_change_operator : Note<
 def note_logical_instead_of_bitwise_remove_constant : Note<
   "remove constant to silence this warning">;
 
-def warn_bitwise_and_in_bitwise_or : Warning<
-  "'&' within '|'">, InGroup<BitwiseOpParentheses>;
+def warn_bitwise_op_in_bitwise_op : Warning<
+  "'%0' within '%1'">, InGroup<BitwiseOpParentheses>;
 
 def warn_logical_and_in_logical_or : Warning<
   "'&&' within '||'">, InGroup<LogicalOpParentheses>;
@@ -4945,10 +5129,10 @@ def err_qualified_param_declarator : Error<
 def ext_out_of_line_declaration : ExtWarn<
   "out-of-line declaration of a member must be a definition">,
   InGroup<OutOfLineDeclaration>, DefaultError;
-def warn_member_extra_qualification : Warning<
-  "extra qualification on member %0">, InGroup<Microsoft>;
 def err_member_extra_qualification : Error<
   "extra qualification on member %0">;
+def warn_member_extra_qualification : Warning<
+  err_member_extra_qualification.Text>, InGroup<MicrosoftExtraQualification>;
 def warn_namespace_member_extra_qualification : Warning<
   "extra qualification on member %0">,
   InGroup<DiagGroup<"extra-qualification">>;
@@ -5044,6 +5228,9 @@ def err_unqualified_pointer_member_function : Error<
   "must explicitly qualify name of member function when taking its address">;
 def err_invalid_form_pointer_member_function : Error<
   "cannot create a non-constant pointer to member function">;
+def err_address_of_function_with_pass_object_size_params: Error<
+  "cannot take address of function %0 because parameter %1 has "
+  "pass_object_size attribute">;
 def err_parens_pointer_member_function : Error<
   "cannot parenthesize the name of a method when forming a member pointer">;
 def err_typecheck_invalid_lvalue_addrof_addrof_function : Error<
@@ -5197,6 +5384,8 @@ def err_builtin_func_cast_more_than_one_arg : Error<
   "function-style cast to a builtin type can only take one argument">;
 def err_value_init_for_array_type : Error<
   "array types cannot be value-initialized">;
+def err_value_init_for_function_type : Error<
+  "function types cannot be value-initialized">;
 def warn_format_nonliteral_noargs : Warning<
   "format string is not a string literal (potentially insecure)">,
   InGroup<FormatSecurity>;
@@ -5354,7 +5543,7 @@ def warn_objc_pointer_cxx_catch_fragile : Warning<
   "cannot catch an exception thrown with @throw in C++ in the non-unified "
   "exception model">, InGroup<ObjCNonUnifiedException>;
 def err_objc_object_catch : Error<
-  "can't catch an Objective-C object by value">;
+  "cannot catch an Objective-C object by value">;
 def err_incomplete_type_objc_at_encode : Error<
   "'@encode' of incomplete type %0">;
 def warn_objc_circular_container : Warning<
@@ -5423,8 +5612,7 @@ def ext_cast_fn_obj : Extension<
   "cast between pointer-to-function and pointer-to-object is an extension">;
 def ext_ms_cast_fn_obj : ExtWarn<
   "static_cast between pointer-to-function and pointer-to-object is a "
-  "Microsoft extension">,
-  InGroup<Microsoft>;
+  "Microsoft extension">, InGroup<MicrosoftCast>;
 def warn_cxx98_compat_cast_fn_obj : Warning<
   "cast between pointer-to-function and pointer-to-object is incompatible with C++98">,
   InGroup<CXX98CompatPedantic>, DefaultIgnore;
@@ -5534,7 +5722,7 @@ def ext_default_init_const : ExtWarn<
   "default initialization of an object of const type %0"
   "%select{| without a user-provided default constructor}1 "
   "is a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftConstInit>;
 def err_delete_operand : Error<"cannot delete expression of type %0">;
 def ext_delete_void_ptr_operand : ExtWarn<
   "cannot delete expression with pointer-to-'void' type %0">,
@@ -5565,8 +5753,11 @@ def note_member_declared_here : Note<
   "member %0 declared here">;
 def err_decrement_bool : Error<"cannot decrement expression of type bool">;
 def warn_increment_bool : Warning<
-  "incrementing expression of type bool is deprecated">,
-  InGroup<DeprecatedIncrementBool>;
+  "incrementing expression of type bool is deprecated and "
+  "incompatible with C++1z">, InGroup<DeprecatedIncrementBool>;
+def ext_increment_bool : ExtWarn<
+  "ISO C++1z does not allow incrementing expression of type bool">,
+  DefaultError, InGroup<IncrementBool>;
 def err_increment_decrement_enum : Error<
   "cannot %select{decrement|increment}0 expression of enum type %1">;
 def err_catch_incomplete_ptr : Error<
@@ -5606,7 +5797,8 @@ def warn_non_virtual_dtor : Warning<
   "%0 has virtual functions but non-virtual destructor">,
   InGroup<NonVirtualDtor>, DefaultIgnore;
 def warn_delete_non_virtual_dtor : Warning<
-  "delete called on %0 that has virtual functions but non-virtual destructor">,
+  "delete called on non-final %0 that has virtual functions "
+  "but non-virtual destructor">,
   InGroup<DeleteNonVirtualDtor>, DefaultIgnore;
 def warn_delete_abstract_non_virtual_dtor : Warning<
   "delete called on %0 that is abstract but has non-virtual destructor">,
@@ -5656,6 +5848,9 @@ def err_throw_incomplete_ptr : Error<
   "cannot throw pointer to object of incomplete type %0">;
 def err_return_in_constructor_handler : Error<
   "return in the catch of a function try block of a constructor is illegal">;
+def warn_cdtor_function_try_handler_mem_expr : Warning<
+  "cannot refer to a non-static member from the handler of a "
+  "%select{constructor|destructor}0 function try block">, InGroup<Exceptions>;
 
 let CategoryName = "Lambda Issue" in {
   def err_capture_more_than_once : Error<
@@ -5721,8 +5916,8 @@ let CategoryName = "Lambda Issue" in {
   def err_init_capture_multiple_expressions : Error<
     "initializer for lambda capture %0 contains multiple expressions">;
   def err_init_capture_paren_braces : Error<
-    "cannot deduce type for lambda capture %0 from "
-    "parenthesized initializer list">;
+    "cannot deduce type for lambda capture %1 from "
+    "%select{parenthesized|nested}0 initializer list">;
   def err_init_capture_deduction_failure : Error<
     "cannot deduce type for lambda capture %0 from initializer of type %2">;
   def err_init_capture_deduction_failure_from_init_list : Error<
@@ -5752,7 +5947,7 @@ def err_pseudo_dtor_base_not_scalar : Error<
   "pseudo-destructor expression">;
 def ext_pseudo_dtor_on_void : ExtWarn<
   "pseudo-destructors on type void are a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftVoidPseudoDtor>;
 def err_pseudo_dtor_type_mismatch : Error<
   "the type of object expression "
   "%diff{($) does not match the type being destroyed ($)|"
@@ -5769,14 +5964,13 @@ def err_pseudo_dtor_destructor_non_type : Error<
 def err_invalid_use_of_function_type : Error<
   "a function type is not allowed here">;
 def err_invalid_use_of_array_type : Error<"an array type is not allowed here">;
-def err_type_defined_in_condition : Error<
-  "types may not be defined in conditions">;
 def err_typecheck_bool_condition : Error<
   "value of type %0 is not contextually convertible to 'bool'">;
 def err_typecheck_ambiguous_condition : Error<
   "conversion %diff{from $ to $|between types}0,1 is ambiguous">;
 def err_typecheck_nonviable_condition : Error<
-  "no viable conversion%diff{ from $ to $|}0,1">;
+  "no viable conversion%select{%diff{ from $ to $|}1,2|"
+  "%diff{ from returned value of type $ to function return type $|}1,2}0">;
 def err_typecheck_nonviable_condition_incomplete : Error<
   "no viable conversion%diff{ from $ to incomplete type $|}0,1">;
 def err_typecheck_deleted_function : Error<
@@ -6160,6 +6354,9 @@ def err_atomic_op_needs_atomic : Error<
 def err_atomic_op_needs_non_const_atomic : Error<
   "address argument to atomic operation must be a pointer to non-const _Atomic "
   "type (%0 invalid)">;
+def err_atomic_op_needs_non_const_pointer : Error<
+  "address argument to atomic operation must be a pointer to non-const "
+  "type (%0 invalid)">;
 def err_atomic_op_needs_trivial_copy : Error<
   "address argument to atomic operation must be a pointer to a "
   "trivially-copyable type (%0 invalid)">;
@@ -6173,10 +6370,22 @@ def warn_atomic_op_has_invalid_memory_order : Warning<
   "memory order argument to atomic operation is invalid">,
   InGroup<DiagGroup<"atomic-memory-ordering">>;
 
+def err_overflow_builtin_must_be_int : Error<
+  "operand argument to overflow builtin must be an integer (%0 invalid)">;
+def err_overflow_builtin_must_be_ptr_int : Error<
+  "result argument to overflow builtin must be a pointer "
+  "to a non-const integer (%0 invalid)">;
+
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "
   "available for this target">;
 
+def err_nontemporal_builtin_must_be_pointer : Error<
+  "address argument to nontemporal builtin must be a pointer (%0 invalid)">;
+def err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector : Error<
+  "address argument to nontemporal builtin must be a pointer to integer, float, "
+  "pointer, or a vector of such types (%0 invalid)">;
+
 def err_deleted_function_use : Error<"attempt to use a deleted function">;
 
 def err_kern_type_not_void_return : Error<
@@ -6339,6 +6548,10 @@ def warn_cast_qual : Warning<"cast from %0 to %1 drops %select{const and "
   InGroup<CastQual>, DefaultIgnore;
 def warn_cast_qual2 : Warning<"cast from %0 to %1 must have all intermediate "
   "pointers const qualified to be safe">, InGroup<CastQual>, DefaultIgnore;
+def warn_redefine_extname_not_applied : Warning<
+  "#pragma redefine_extname is applicable to external C declarations only; "
+  "not applied to %select{function|variable}0 %1">,
+  InGroup<Pragmas>;
 } // End of general sema category.
 
 // inline asm.
@@ -6361,6 +6574,10 @@ let CategoryName = "Inline Assembly Issue" in {
     "asm constraint has an unexpected number of alternatives: %0 vs %1">;
   def err_asm_incomplete_type : Error<"asm operand has incomplete type %0">;
   def err_asm_unknown_register_name : Error<"unknown register name '%0' in asm">;
+  def err_asm_invalid_global_var_reg : Error<"register '%0' unsuitable for "
+    "global register variables on this target">;
+  def err_asm_register_size_mismatch : Error<"size of register '%0' does not "
+    "match variable size">;
   def err_asm_bad_register_type : Error<"bad type for named register variable">;
   def err_asm_invalid_input_size : Error<
     "invalid input size for constraint '%0'">;
@@ -6371,9 +6588,11 @@ let CategoryName = "Inline Assembly Issue" in {
     "remove the cast or build with -fheinous-gnu-extensions">;
   def err_invalid_asm_value_for_constraint
       : Error <"value '%0' out of range for constraint '%1'">;
-  def err_asm_bitfield_in_memory_constraint
-      : Error <"reference to a bit-field in asm "
-      "%select{input|output}0 with a memory constraint '%1'">;
+  def err_asm_non_addr_value_in_memory_constraint : Error <
+    "reference to a %select{bit-field|vector element|global register variable}0"
+    " in asm %select{input|output}1 with a memory constraint '%2'">;
+  def err_asm_input_duplicate_match : Error<
+    "more than one input constraint matches the same output '%0'">;
 
   def warn_asm_label_on_auto_decl : Warning<
     "ignored asm label '%0' on automatic variable">;
@@ -6388,6 +6607,8 @@ let CategoryName = "Inline Assembly Issue" in {
 
   def note_asm_missing_constraint_modifier : Note<
     "use constraint modifier \"%0\"">;
+  def note_asm_input_duplicate_first : Note<
+    "constraint '%0' is already present here">;
 }
 
 let CategoryName = "Semantic Issue" in {
@@ -6490,15 +6711,13 @@ def err_anonymous_union_with_storage_spec : Error<
 def err_anonymous_struct_not_member : Error<
   "anonymous %select{structs|structs and classes}0 must be "
   "%select{struct or union|class}0 members">;
-def err_anonymous_union_member_redecl : Error<
-  "member of anonymous union redeclares %0">;
-def err_anonymous_struct_member_redecl : Error<
-  "member of anonymous struct redeclares %0">;
+def err_anonymous_record_member_redecl : Error<
+  "member of anonymous %select{struct|union}0 redeclares %1">;
 def err_anonymous_record_with_type : Error<
   "types cannot be declared in an anonymous %select{struct|union}0">;
 def ext_anonymous_record_with_type : Extension<
   "types declared in an anonymous %select{struct|union}0 are a Microsoft "
-  "extension">, InGroup<Microsoft>;
+  "extension">, InGroup<MicrosoftAnonTag>;
 def ext_anonymous_record_with_anonymous_type : Extension<
   "anonymous types declared in an anonymous %select{struct|union}0 "
   "are an extension">, InGroup<DiagGroup<"nested-anon-types">>;
@@ -6513,7 +6732,7 @@ def err_anonymous_record_nonpublic_member : Error<
   "%select{private|protected}1 data member">;
 def ext_ms_anonymous_record : ExtWarn<
   "anonymous %select{structs|unions}0 are a Microsoft extension">,
-  InGroup<Microsoft>;
+  InGroup<MicrosoftAnonTag>;
 
 // C++ local classes
 def err_reference_to_local_var_in_enclosing_function : Error<
@@ -6846,17 +7065,12 @@ def warn_null_ret : Warning<
   InGroup<NonNull>;
 
 // CHECK: returning address/reference of stack memory
-def warn_ret_stack_addr : Warning<
-  "address of stack memory associated with local variable %0 returned">,
+def warn_ret_stack_addr_ref : Warning<
+  "%select{address of|reference to}0 stack memory associated with local "
+  "variable %1 returned">,
   InGroup<ReturnStackAddress>;
-def warn_ret_stack_ref : Warning<
-  "reference to stack memory associated with local variable %0 returned">,
-  InGroup<ReturnStackAddress>;
-def warn_ret_local_temp_addr : Warning<
-  "returning address of local temporary object">,
-  InGroup<ReturnStackAddress>;
-def warn_ret_local_temp_ref : Warning<
-  "returning reference to local temporary object">,
+def warn_ret_local_temp_addr_ref : Warning<
+  "returning %select{address of|reference to}0 local temporary object">,
   InGroup<ReturnStackAddress>;
 def warn_ret_addr_label : Warning<
   "returning address of label, which is local">,
@@ -7058,6 +7272,10 @@ def note_empty_body_on_separate_line : Note<
 
 def err_va_start_used_in_non_variadic_function : Error<
   "'va_start' used in function with fixed args">;
+def err_va_start_used_in_wrong_abi_function : Error<
+  "'va_start' used in %select{System V|Win64}0 ABI function">;
+def err_ms_va_start_used_in_sysv_function : Error<
+  "'__builtin_ms_va_start' used in System V ABI function">;
 def warn_second_parameter_of_va_start_not_last_named_argument : Warning<
   "second parameter of 'va_start' not last named argument">, InGroup<Varargs>;
 def warn_va_start_of_reference_type_is_undefined : Warning<
@@ -7172,6 +7390,8 @@ def err_64_bit_builtin_32_bit_tgt : Error<
   "this builtin is only available on 64-bit targets">;
 def err_ppc_builtin_only_on_pwr7 : Error<
   "this builtin is only valid on POWER7 or later CPUs">;
+def err_x86_builtin_32_bit_tgt : Error<
+  "this builtin is only available on x86-64 targets">;
 
 def err_builtin_longjmp_unsupported : Error<
   "__builtin_longjmp is not supported for the current target">;
@@ -7387,6 +7607,8 @@ def err_opencl_ptrptr_kernel_param : Error<
   "kernel parameter cannot be declared as a pointer to a pointer">;
 def err_opencl_private_ptr_kernel_param : Error<
   "kernel parameter cannot be declared as a pointer to the __private address space">;
+def err_opencl_non_kernel_variable : Error<
+  "non-kernel function variable cannot be declared in %0 address space">;
 def err_static_function_scope : Error<
   "variables in function scope cannot be declared static">;
 def err_opencl_bitfields : Error<
@@ -7414,7 +7636,7 @@ def err_sampler_argument_required : Error<
 def err_wrong_sampler_addressspace: Error<
   "sampler type cannot be used with the __local and __global address space qualifiers">;
 def err_opencl_global_invalid_addr_space : Error<
-  "global variables must have a constant address space qualifier">;
+  "program scope variable must reside in %0 address space">;
 def err_opencl_no_main : Error<"%select{function|kernel}0 cannot be called 'main'">;
 def err_opencl_kernel_attr :
   Error<"attribute %0 can only be applied to a kernel function">;
@@ -7422,6 +7644,16 @@ def err_opencl_return_value_with_address_space : Error<
   "return value cannot be qualified with address space">;
 def err_opencl_constant_no_init : Error<
   "variable in constant address space must be initialized">;
+def err_atomic_init_constant : Error<
+  "atomic variable can only be assigned to a compile time constant"
+  " in the declaration statement in the program scope">;
+def err_opencl_implicit_vector_conversion : Error<
+  "implicit conversions between vector types (%0 and %1) are not permitted">;
+
+// OpenCL Section 6.8.g
+def err_opencl_unknown_type_specifier : Error<
+  "OpenCL does not support the '%0' %select{type qualifier|storage class specifier}1">;
+
 } // end of sema category
 
 let CategoryName = "OpenMP Issue" in {
@@ -7448,7 +7680,7 @@ def err_omp_firstprivate_incomplete_type : Error<
 def err_omp_lastprivate_incomplete_type : Error<
   "a lastprivate variable with incomplete type %0">;
 def err_omp_reduction_incomplete_type : Error<
-  "a reduction variable with incomplete type %0">;
+  "a reduction list item with incomplete type %0">;
 def err_omp_unexpected_clause_value : Error<
   "expected %0 in OpenMP clause '%1'">;
 def err_omp_expected_var_name : Error<
@@ -7457,10 +7689,6 @@ def err_omp_expected_var_name_or_array_item : Error<
   "expected variable name, array element or array section">;
 def note_omp_task_predetermined_firstprivate_here : Note<
   "predetermined as a firstprivate in a task construct here">;
-def err_omp_clause_ref_type_arg : Error<
-  "arguments of OpenMP clause '%0' cannot be of reference type %1">;
-def err_omp_task_predetermined_firstprivate_ref_type_arg : Error<
-  "predetermined as a firstprivate in a task construct variable cannot be of reference type %0">;
 def err_omp_threadprivate_incomplete_type : Error<
   "threadprivate variable with incomplete type %0">;
 def err_omp_no_dsa_for_variable : Error<
@@ -7489,10 +7717,10 @@ def err_omp_loop_var_dsa : Error<
 def err_omp_not_for : Error<
   "%select{statement after '#pragma omp %1' must be a for loop|"
   "expected %2 for loops after '#pragma omp %1'%select{|, but found only %4}3}0">;
-def note_omp_collapse_expr : Note<
-  "as specified in 'collapse' clause">;
+def note_omp_collapse_ordered_expr : Note<
+  "as specified in %select{'collapse'|'ordered'|'collapse' and 'ordered'}0 clause%select{||s}0">;
 def err_omp_negative_expression_in_clause : Error<
-  "argument to '%0' clause must be a positive integer value">;
+  "argument to '%0' clause must be a %select{non-negative|strictly positive}1 integer value">;
 def err_omp_not_integral : Error<
   "expression must have integral or unscoped enumeration "
   "type, not %0">;
@@ -7509,6 +7737,8 @@ def err_omp_required_access : Error<
   "%0 variable must be %1">;
 def err_omp_const_variable : Error<
   "const-qualified variable cannot be %0">;
+def err_omp_const_reduction_list_item : Error<
+  "const-qualified list item cannot be reduction">;
 def err_omp_linear_incomplete_type : Error<
   "a linear variable with incomplete type %0">;
 def err_omp_linear_expected_int_or_ptr : Error<
@@ -7529,8 +7759,8 @@ def err_omp_aligned_twice : Error<
 def err_omp_local_var_in_threadprivate_init : Error<
   "variable with local storage in initial value of threadprivate variable">;
 def err_omp_loop_not_canonical_init : Error<
-  "initialization clause of OpenMP for loop must be of the form "
-  "'var = init' or 'T var = init'">;
+  "initialization clause of OpenMP for loop is not in canonical form "
+  "('var = init' or 'T var = init')">;
 def ext_omp_loop_not_canonical_init : ExtWarn<
   "initialization clause of OpenMP for loop is not in canonical form "
   "('var = init' or 'T var = init')">, InGroup<OpenMPLoopForm>;
@@ -7560,7 +7790,7 @@ def warn_omp_loop_64_bit_var : Warning<
 def err_omp_unknown_reduction_identifier : Error<
   "incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'">;
 def err_omp_reduction_type_array : Error<
-  "a reduction variable with array type %0">;
+  "a reduction list item with array type %0">;
 def err_omp_reduction_ref_type_arg : Error<
   "argument of OpenMP clause 'reduction' must reference the same object in all threads">;
 def err_omp_clause_not_arithmetic_type_arg : Error<
@@ -7574,12 +7804,13 @@ def note_omp_referenced : Note<
 def err_omp_reduction_in_task : Error<
   "reduction variables may not be accessed in an explicit task">;
 def err_omp_reduction_id_not_compatible : Error<
-  "variable of type %0 is not valid for specified reduction operation: unable to provide default initialization value">;
+  "list item of type %0 is not valid for specified reduction operation: unable to provide default initialization value">;
 def err_omp_prohibited_region : Error<
   "region cannot be%select{| closely}0 nested inside '%1' region"
   "%select{|; perhaps you forget to enclose 'omp %3' directive into a parallel region?|"
   "; perhaps you forget to enclose 'omp %3' directive into a for or a parallel for region with 'ordered' clause?|"
-  "; perhaps you forget to enclose 'omp %3' directive into a target region?}2">;
+  "; perhaps you forget to enclose 'omp %3' directive into a target region?|"
+  "; perhaps you forget to enclose 'omp %3' directive into a teams region?}2">;
 def err_omp_prohibited_region_simd : Error<
   "OpenMP constructs may not be nested inside a simd region">;
 def err_omp_prohibited_region_atomic : Error<
@@ -7649,6 +7880,95 @@ def err_omp_parent_cancel_region_nowait : Error<
   "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be nowait">;
 def err_omp_parent_cancel_region_ordered : Error<
   "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be ordered">;
+def err_omp_array_section_use : Error<"OpenMP array section is not allowed here">;
+def err_omp_typecheck_section_value : Error<
+  "subscripted value is not an array or pointer">;
+def err_omp_typecheck_section_not_integer : Error<
+  "array section %select{lower bound|length}0 is not an integer">;
+def err_omp_section_function_type : Error<
+  "section of pointer to function type %0">;
+def warn_omp_section_is_char : Warning<"array section %select{lower bound|length}0 is of type 'char'">,
+  InGroup<CharSubscript>, DefaultIgnore;
+def err_omp_section_incomplete_type : Error<
+  "section of pointer to incomplete type %0">;
+def err_omp_section_negative : Error<
+  "section %select{lower bound|length}0 is evaluated to a negative value %1">;
+def err_omp_section_length_undefined : Error<
+  "section length is unspecified and cannot be inferred because subscripted value is %select{not an array|an array of unknown bound}0">;
+def err_omp_wrong_linear_modifier : Error<
+  "expected %select{'val' modifier|one of 'ref', val' or 'uval' modifiers}0">;
+def err_omp_wrong_linear_modifier_non_reference : Error<
+  "variable of non-reference type %0 can be used only with 'val' modifier, but used with '%1'">;
+def err_omp_wrong_simdlen_safelen_values : Error<
+  "the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter">;
+def err_omp_wrong_if_directive_name_modifier : Error<
+  "directive name modifier '%0' is not allowed for '#pragma omp %1'">;
+def err_omp_no_more_if_clause : Error<
+  "no more 'if' clause is allowed">;
+def err_omp_unnamed_if_clause : Error<
+  "expected %select{|one of}0 %1 directive name modifier%select{|s}0">;
+def note_omp_previous_named_if_clause : Note<
+  "previous clause with directive name modifier specified here">;
+def err_omp_ordered_directive_with_param : Error<
+  "'ordered' directive %select{without any clauses|with 'threads' clause}0 cannot be closely nested inside ordered region with specified parameter">;
+def err_omp_ordered_directive_without_param : Error<
+  "'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter">;
+def note_omp_ordered_param : Note<
+  "'ordered' clause with specified parameter">;
+def err_omp_expected_base_var_name : Error<
+  "expected variable name as a base of the array %select{subscript|section}0">;
+def err_omp_map_shared_storage : Error<
+  "variable already marked as mapped in current construct">;
+def err_omp_not_mappable_type : Error<
+  "type %0 is not mappable to target">;
+def note_omp_polymorphic_in_target : Note<
+  "mappable type cannot be polymorphic">;
+def note_omp_static_member_in_target : Note<
+  "mappable type cannot contain static members">;
+def err_omp_threadprivate_in_map : Error<
+  "threadprivate variables are not allowed in map clause">;
+def err_omp_wrong_ordered_loop_count : Error<
+  "the parameter of the 'ordered' clause must be greater than or equal to the parameter of the 'collapse' clause">;
+def note_collapse_loop_count : Note<
+  "parameter of the 'collapse' clause">;
+def err_omp_grainsize_num_tasks_mutually_exclusive : Error<
+  "'%0' and '%1' clause are mutually exclusive and may not appear on the same directive">;
+def note_omp_previous_grainsize_num_tasks : Note<
+  "'%0' clause is specified here">;
+def err_omp_hint_clause_no_name : Error<
+  "the name of the construct must be specified in presence of 'hint' clause">;
+def err_omp_critical_with_hint : Error<
+  "constructs with the same name must have a 'hint' clause with the same value">;
+def note_omp_critical_hint_here : Note<
+  "%select{|previous }0'hint' clause with value '%1'">;
+def note_omp_critical_no_hint : Note<
+  "%select{|previous }0directive with no 'hint' clause specified">;
+def err_omp_firstprivate_distribute_private_teams : Error<
+  "private variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'">;
+def err_omp_firstprivate_and_lastprivate_in_distribute : Error<
+  "lastprivate variable cannot be firstprivate in '#pragma omp distribute'">;
+def err_omp_firstprivate_distribute_in_teams_reduction : Error<
+  "reduction variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'">;
+def err_omp_depend_clause_thread_simd : Error<
+  "'depend' clauses cannot be mixed with '%0' clause">;
+def err_omp_depend_sink_wrong_expr : Error<
+  "expected expression form x[+-d], where x is the loop iteration variable and d is a constant non-negative integer">;
+def err_omp_depend_sink_expected_loop_iteration : Error<
+  "expected %0 loop iteration variable">;
+def err_omp_depend_sink_unexpected_expr : Error<
+  "unexpected expression: number of expressions is larger than the number of associated loops">;
+def err_omp_depend_sink_expected_plus_minus : Error<
+  "expected '+' or '-' operation">;
+def err_omp_depend_sink_source_not_allowed : Error<
+  "'depend(%select{source|sink:vec}0)' clause%select{|s}0 cannot be mixed with 'depend(%select{sink:vec|source}0)' clause%select{s|}0">;
+def err_omp_linear_ordered : Error<
+  "'linear' clause cannot be specified along with 'ordered' clause with a parameter">;
+def err_omp_unexpected_schedule_modifier : Error<
+  "modifier '%0' cannot be used along with modifier '%1'">;
+def err_omp_schedule_nonmonotonic_static : Error<
+  "'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind">;
+def err_omp_schedule_nonmonotonic_ordered : Error<
+  "'schedule' clause with 'nonmonotonic' modifier cannot be specified if an 'ordered' clause is specified">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {
@@ -7692,18 +8012,63 @@ def err_module_unimported_use : Error<
 def err_module_unimported_use_multiple : Error<
   "%select{declaration|definition|default argument}0 of %1 must be imported "
   "from one of the following modules before it is required:%2">;
-def err_module_import_in_extern_c : Error<
+def ext_module_import_in_extern_c : ExtWarn<
   "import of C++ module '%0' appears within extern \"C\" language linkage "
-  "specification">;
+  "specification">, DefaultError,
+  InGroup<DiagGroup<"module-import-in-extern-c">>;
 def note_module_import_in_extern_c : Note<
   "extern \"C\" language linkage specification begins here">;
-def err_module_import_not_at_top_level : Error<
-  "import of module '%0' appears within %1">;
+def err_module_import_not_at_top_level_fatal : Error<
+  "import of module '%0' appears within %1">, DefaultFatal;
+def ext_module_import_not_at_top_level_noop : ExtWarn<
+  "redundant #include of module '%0' appears within %1">, DefaultError,
+  InGroup<DiagGroup<"modules-import-nested-redundant">>;
 def note_module_import_not_at_top_level : Note<"%0 begins here">;
 def err_module_self_import : Error<
   "import of module '%0' appears within same top-level module '%1'">;
 def err_module_import_in_implementation : Error<
   "@import of module '%0' in implementation of '%1'; use #import">;
+
+def ext_equivalent_internal_linkage_decl_in_modules : ExtWarn<
+  "ambiguous use of internal linkage declaration %0 defined in multiple modules">,
+  InGroup<DiagGroup<"modules-ambiguous-internal-linkage">>;
+def note_equivalent_internal_linkage_decl : Note<
+  "declared here%select{ in module '%1'|}0">;
+}
+
+let CategoryName = "Coroutines Issue" in {
+def err_return_in_coroutine : Error<
+  "return statement not allowed in coroutine; did you mean 'co_return'?">;
+def note_declared_coroutine_here : Note<
+  "function is a coroutine due to use of "
+  "'%select{co_await|co_yield|co_return}0' here">;
+def err_coroutine_objc_method : Error<
+  "Objective-C methods as coroutines are not yet supported">;
+def err_coroutine_unevaluated_context : Error<
+  "'%0' cannot be used in an unevaluated context">;
+def err_coroutine_outside_function : Error<
+  "'%0' cannot be used outside a function">;
+def err_coroutine_ctor_dtor : Error<
+  "'%1' cannot be used in a %select{constructor|destructor}0">;
+def err_coroutine_constexpr : Error<
+  "'%0' cannot be used in a constexpr function">;
+def err_coroutine_varargs : Error<
+  "'%0' cannot be used in a varargs function">;
+def ext_coroutine_without_co_await_co_yield : ExtWarn<
+  "'co_return' used in a function "
+  "that uses neither 'co_await' nor 'co_yield'">,
+  InGroup<DiagGroup<"coreturn-without-coawait">>;
+def err_implied_std_coroutine_traits_not_found : Error<
+  "you need to include <coroutine> before defining a coroutine">;
+def err_malformed_std_coroutine_traits : Error<
+  "'std::coroutine_traits' must be a class template">;
+def err_implied_std_coroutine_traits_promise_type_not_found : Error<
+  "this function cannot be a coroutine: %q0 has no member named 'promise_type'">;
+def err_implied_std_coroutine_traits_promise_type_not_class : Error<
+  "this function cannot be a coroutine: %0 is not a class">;
+def err_coroutine_traits_missing_specialization : Error<
+  "this function cannot be a coroutine: missing definition of "
+  "specialization %q0">;
 }
 
 let CategoryName = "Documentation Issue" in {
@@ -7761,10 +8126,10 @@ def warn_nullability_missing : Warning<
   "type specifier (_Nonnull, _Nullable, or _Null_unspecified)">,
   InGroup<NullabilityCompleteness>;
 
-def err_type_arg_explicit_nullability : Error<
+def err_objc_type_arg_explicit_nullability : Error<
   "type argument %0 cannot explicitly specify nullability">;
 
-def err_type_param_bound_explicit_nullability : Error<
+def err_objc_type_param_bound_explicit_nullability : Error<
   "type parameter %0 bound %1 cannot explicitly specify nullability">;
 
 }
@@ -7776,6 +8141,8 @@ def err_objc_type_param_bound_nonobject : Error<
 
 def err_objc_type_param_bound_missing_pointer : Error<
   "missing '*' in type bound %0 for type parameter %1">;
+def err_objc_type_param_bound_qualified : Error<
+  "type bound %1 for type parameter %0 cannot be qualified with '%2'">;
 
 def err_objc_type_param_redecl : Error<
   "redeclaration of type parameter %0">;
@@ -7810,6 +8177,8 @@ def err_objc_parameterized_forward_class_first : Error<
 
 def err_objc_type_arg_missing_star : Error<
   "type argument %0 must be a pointer (requires a '*')">;
+def err_objc_type_arg_qualified : Error<
+  "type argument %0 cannot be qualified with '%1'">;
 
 def err_objc_type_arg_missing : Error<
   "no type or protocol named %0">;
diff --git a/include/clang/Basic/DiagnosticSerializationKinds.td b/include/clang/Basic/DiagnosticSerializationKinds.td
index 796027ea628b..16c77435558f 100644
--- a/include/clang/Basic/DiagnosticSerializationKinds.td
+++ b/include/clang/Basic/DiagnosticSerializationKinds.td
@@ -53,6 +53,20 @@ def err_pch_different_branch : Error<
 def err_pch_with_compiler_errors : Error<
     "PCH file contains compiler errors">;
 
+def err_module_file_conflict : Error<
+  "module '%0' is defined in both '%1' and '%2'">, DefaultFatal;
+def err_module_file_not_found : Error<
+  "%select{PCH|module|AST}0 file '%1' not found%select{|: %3}2">, DefaultFatal;
+def err_module_file_out_of_date : Error<
+  "%select{PCH|module|AST}0 file '%1' is out of date and "
+  "needs to be rebuilt%select{|: %3}2">, DefaultFatal;
+def err_module_file_invalid : Error<
+  "file '%1' is not a valid precompiled %select{PCH|module|AST}0 file">, DefaultFatal;
+def note_module_file_imported_by : Note<
+  "imported by %select{|module '%2' in }1'%0'">;
+def err_module_file_not_module : Error<
+  "AST file '%0' was not built as a module">, DefaultFatal;
+
 def err_imported_module_not_found : Error<
     "module '%0' in AST file '%1' (imported by AST file '%2') "
     "is not defined in any loaded module map file; "
@@ -82,9 +96,6 @@ def err_pch_pp_detailed_record : Error<
     "'-detailed-preprocessing-record' but %select{precompiled header was not "
     "built with it|it is not present on the command line}0">;
 
-def err_not_a_pch_file : Error<
-    "'%0' does not appear to be a precompiled header file">, DefaultFatal;
-
 def err_module_odr_violation_missing_decl : Error<
   "%q0 from module '%1' is not present in definition of %q2"
   "%select{ in module '%4'| provided earlier}3">, NoSFINAE;
@@ -100,6 +111,14 @@ def note_module_odr_violation_different_definitions : Note<
 def err_module_odr_violation_different_instantiations : Error<
   "instantiation of %q0 is different in different modules">;
 
+def warn_module_uses_date_time : Warning<
+  "%select{precompiled header|module}0 uses __DATE__ or __TIME__">,
+  InGroup<DiagGroup<"pch-date-time">>;
+
+def warn_duplicate_module_file_extension : Warning<
+  "duplicate module file extension block name '%0'">,
+  InGroup<ModuleFileExtension>;
+
 } // let CategoryName
 } // let Component
 
diff --git a/include/clang/Basic/FileManager.h b/include/clang/Basic/FileManager.h
index ac0d7a15e568..17758ec3f398 100644
--- a/include/clang/Basic/FileManager.h
+++ b/include/clang/Basic/FileManager.h
@@ -126,9 +126,9 @@ class FileManager : public RefCountedBase<FileManager> {
   ///
   /// For each virtual file (e.g. foo/bar/baz.cpp), we add all of its parent
   /// directories (foo/ and foo/bar/) here.
-  SmallVector<DirectoryEntry*, 4> VirtualDirectoryEntries;
+  SmallVector<std::unique_ptr<DirectoryEntry>, 4> VirtualDirectoryEntries;
   /// \brief The virtual files that we have allocated.
-  SmallVector<FileEntry*, 4> VirtualFileEntries;
+  SmallVector<std::unique_ptr<FileEntry>, 4> VirtualFileEntries;
 
   /// \brief A cache that maps paths to directory entries (either real or
   /// virtual) we have looked up
@@ -218,7 +218,8 @@ public:
                            bool CacheFailure = true);
 
   /// \brief Returns the current file system options
-  const FileSystemOptions &getFileSystemOptions() { return FileSystemOpts; }
+  FileSystemOptions &getFileSystemOpts() { return FileSystemOpts; }
+  const FileSystemOptions &getFileSystemOpts() const { return FileSystemOpts; }
 
   IntrusiveRefCntPtr<vfs::FileSystem> getVirtualFileSystem() const {
     return FS;
@@ -254,7 +255,13 @@ public:
   /// \brief If path is not absolute and FileSystemOptions set the working
   /// directory, the path is modified to be relative to the given
   /// working directory.
-  void FixupRelativePath(SmallVectorImpl<char> &path) const;
+  /// \returns true if \c path changed.
+  bool FixupRelativePath(SmallVectorImpl<char> &path) const;
+
+  /// Makes \c Path absolute taking into account FileSystemOptions and the
+  /// working directory option.
+  /// \returns true if \c Path changed to absolute.
+  bool makeAbsolutePath(SmallVectorImpl<char> &Path) const;
 
   /// \brief Produce an array mapping from the unique IDs assigned to each
   /// file to the corresponding FileEntry pointer.
@@ -266,9 +273,6 @@ public:
   static void modifyFileEntry(FileEntry *File, off_t Size,
                               time_t ModificationTime);
 
-  /// \brief Remove any './' components from a path.
-  static bool removeDotPaths(SmallVectorImpl<char> &Path);
-
   /// \brief Retrieve the canonical name for a given directory.
   ///
   /// This is a very expensive operation, despite its results being cached,
diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h
index 1785e04b91ca..d672314f56e2 100644
--- a/include/clang/Basic/IdentifierTable.h
+++ b/include/clang/Basic/IdentifierTable.h
@@ -62,7 +62,7 @@ class IdentifierInfo {
                                    // partially) from an AST file.
   bool ChangedAfterLoad       : 1; // True if identifier has changed from the
                                    // definition loaded from an AST file.
-  bool RevertedTokenID        : 1; // True if RevertTokenIDToIdentifier was
+  bool RevertedTokenID        : 1; // True if revertTokenIDToIdentifier was
                                    // called.
   bool OutOfDate              : 1; // True if there may be additional
                                    // information about this identifier
@@ -152,7 +152,7 @@ public:
   /// tokens.
   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
 
-  /// \brief True if RevertTokenIDToIdentifier() was called.
+  /// \brief True if revertTokenIDToIdentifier() was called.
   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
 
   /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
@@ -161,11 +161,16 @@ public:
   /// TokenID is normally read-only but there are 2 instances where we revert it
   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
   /// using this method so we can inform serialization about it.
-  void RevertTokenIDToIdentifier() {
+  void revertTokenIDToIdentifier() {
     assert(TokenID != tok::identifier && "Already at tok::identifier");
     TokenID = tok::identifier;
     RevertedTokenID = true;
   }
+  void revertIdentifierToTokenID(tok::TokenKind TK) {
+    assert(TokenID == tok::identifier && "Should be at tok::identifier");
+    TokenID = TK;
+    RevertedTokenID = false;
+  }
 
   /// \brief Return the preprocessor keyword ID for this identifier.
   ///
@@ -183,6 +188,18 @@ public:
   }
   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
 
+  /// \brief True if setNotBuiltin() was called.
+  bool hasRevertedBuiltin() const {
+    return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
+  }
+
+  /// \brief Revert the identifier to a non-builtin identifier. We do this if
+  /// the name of a known builtin library function is used to declare that
+  /// function, but an unexpected type is specified.
+  void revertBuiltin() {
+    setBuiltinID(0);
+  }
+
   /// \brief Return a value indicating whether this is a builtin function.
   ///
   /// 0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
diff --git a/include/clang/Basic/LangOptions.def b/include/clang/Basic/LangOptions.def
index c184df77c37e..fdf7e49499b8 100644
--- a/include/clang/Basic/LangOptions.def
+++ b/include/clang/Basic/LangOptions.def
@@ -90,6 +90,7 @@ LANGOPT(LineComment       , 1, 0, "'//' comments")
 LANGOPT(Bool              , 1, 0, "bool, true, and false keywords")
 LANGOPT(Half              , 1, 0, "half keyword")
 LANGOPT(WChar             , 1, CPlusPlus, "wchar_t keyword")
+LANGOPT(DeclSpecKeyword   , 1, 0, "__declspec keyword")
 BENIGN_LANGOPT(DollarIdents   , 1, 1, "'$' in identifiers")
 BENIGN_LANGOPT(AsmPreprocessor, 1, 0, "preprocessor in asm mode")
 BENIGN_LANGOPT(GNUMode        , 1, 1, "GNU extensions")
@@ -117,6 +118,7 @@ LANGOPT(Freestanding, 1, 0, "freestanding implementation")
 LANGOPT(NoBuiltin         , 1, 0, "disable builtin functions")
 LANGOPT(NoMathBuiltin     , 1, 0, "disable math builtin functions")
 LANGOPT(GNUAsm            , 1, 1, "GNU-style inline assembly")
+LANGOPT(Coroutines        , 1, 0, "C++ coroutines")
 
 BENIGN_LANGOPT(ThreadsafeStatics , 1, 1, "thread-safe static initializers")
 LANGOPT(POSIXThreads      , 1, 0, "POSIX thread support")
@@ -131,7 +133,6 @@ COMPATIBLE_LANGOPT(ModulesStrictDeclUse, 1, 0, "require declaration of module us
 BENIGN_LANGOPT(ModulesErrorRecovery, 1, 1, "automatically import modules as needed when performing error recovery")
 BENIGN_LANGOPT(ImplicitModules, 1, 1, "build modules that are not specified via -fmodule-file")
 COMPATIBLE_LANGOPT(ModulesLocalVisibility, 1, 0, "local submodule visibility")
-COMPATIBLE_LANGOPT(ModulesHideInternalLinkage, 1, 1, "hiding non-visible internal linkage declarations from redeclaration lookup")
 COMPATIBLE_LANGOPT(Optimize          , 1, 0, "__OPTIMIZE__ predefined macro")
 COMPATIBLE_LANGOPT(OptimizeSize      , 1, 0, "__OPTIMIZE_SIZE__ predefined macro")
 LANGOPT(Static            , 1, 0, "__STATIC__ predefined macro (as opposed to __DYNAMIC__)")
@@ -146,6 +147,7 @@ COMPATIBLE_LANGOPT(NoInlineDefine    , 1, 0, "__NO_INLINE__ predefined macro")
 COMPATIBLE_LANGOPT(Deprecated        , 1, 0, "__DEPRECATED predefined macro")
 LANGOPT(FastMath          , 1, 0, "__FAST_MATH__ predefined macro")
 LANGOPT(FiniteMathOnly    , 1, 0, "__FINITE_MATH_ONLY__ predefined macro")
+LANGOPT(UnsafeFPMath      , 1, 0, "Unsafe Floating Point Math")
 
 BENIGN_LANGOPT(ObjCGCBitmapPrint , 1, 0, "printing of GC's bitmap layout for __weak/__strong ivars")
 
@@ -166,6 +168,7 @@ LANGOPT(OpenMPUseTLS      , 1, 0, "Use TLS for threadprivates or runtime calls")
 LANGOPT(CUDAIsDevice      , 1, 0, "Compiling for CUDA device")
 LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions")
 LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)")
+LANGOPT(CUDATargetOverloads, 1, 0, "Enable function overloads based on CUDA target attributes")
 
 LANGOPT(AssumeSaneOperatorNew , 1, 1, "implicit __attribute__((malloc)) for C++'s new operators")
 LANGOPT(SizedDeallocation , 1, 0, "enable sized deallocation functions")
@@ -188,7 +191,8 @@ LANGOPT(DefaultFPContract , 1, 0, "FP_CONTRACT")
 LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment")
 LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility")
 LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting")
-LANGOPT(ObjCARCWeak         , 1, 0, "__weak support in the ARC runtime")
+LANGOPT(ObjCWeakRuntime     , 1, 0, "__weak support in the ARC runtime")
+LANGOPT(ObjCWeak            , 1, 0, "Objective-C __weak in ARC and MRC files")
 LANGOPT(ObjCSubscriptingLegacyRuntime         , 1, 0, "Subscripting support in legacy ObjectiveC runtime")
 LANGOPT(FakeAddressSpaceMap , 1, 0, "OpenCL fake address space map")
 ENUM_LANGOPT(AddressSpaceMapMangling , AddrSpaceMapMangling, 2, ASMM_Target, "OpenCL address space map mangling mode")
diff --git a/include/clang/Basic/Module.h b/include/clang/Basic/Module.h
index 1bc89257d384..1702fb1114d7 100644
--- a/include/clang/Basic/Module.h
+++ b/include/clang/Basic/Module.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CLANG_BASIC_MODULE_H
 #define LLVM_CLANG_BASIC_MODULE_H
 
+#include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
@@ -35,9 +36,6 @@ namespace llvm {
 
 namespace clang {
   
-class DirectoryEntry;
-class FileEntry;
-class FileManager;
 class LangOptions;
 class TargetInfo;
 class IdentifierInfo;
@@ -152,6 +150,9 @@ public:
   /// \brief Whether this module is missing a feature from \c Requirements.
   unsigned IsMissingRequirement : 1;
 
+  /// \brief Whether we tried and failed to load a module file for this module.
+  unsigned HasIncompatibleModuleFile : 1;
+
   /// \brief Whether this module is available in the current translation unit.
   ///
   /// If the module is missing headers or does not meet all requirements then
@@ -356,6 +357,12 @@ public:
   /// its top-level module.
   std::string getFullModuleName() const;
 
+  /// \brief Whether the full name of this module is equal to joining
+  /// \p nameParts with "."s.
+  ///
+  /// This is more efficient than getFullModuleName().
+  bool fullModuleNameIs(ArrayRef<StringRef> nameParts) const;
+
   /// \brief Retrieve the top-level module for this (sub)module, which may
   /// be this module.
   Module *getTopLevelModule() {
@@ -469,6 +476,13 @@ public:
   submodule_iterator submodule_end()   { return SubModules.end(); }
   submodule_const_iterator submodule_end() const { return SubModules.end(); }
 
+  llvm::iterator_range<submodule_iterator> submodules() {
+    return llvm::make_range(submodule_begin(), submodule_end());
+  }
+  llvm::iterator_range<submodule_const_iterator> submodules() const {
+    return llvm::make_range(submodule_begin(), submodule_end());
+  }
+
   /// \brief Appends this module's list of exported modules to \p Exported.
   ///
   /// This provides a subset of immediately imported modules (the ones that are
diff --git a/include/clang/Basic/ObjCRuntime.h b/include/clang/Basic/ObjCRuntime.h
index e33587d8f30f..cf51b146b1dc 100644
--- a/include/clang/Basic/ObjCRuntime.h
+++ b/include/clang/Basic/ObjCRuntime.h
@@ -41,6 +41,10 @@ public:
     /// version of iOS.
     iOS,
 
+    /// 'watchos' is a variant of iOS for Apple's watchOS. The version
+    /// is a release version of watchOS.
+    WatchOS,
+
     /// 'gcc' is the Objective-C runtime shipped with GCC, implementing a
     /// fragile Objective-C ABI
     GCC,
@@ -81,6 +85,7 @@ public:
     case GNUstep: return true;
     case ObjFW: return true;
     case iOS: return true;
+    case WatchOS: return true;
     }
     llvm_unreachable("bad kind");
   }
@@ -114,6 +119,7 @@ public:
     case FragileMacOSX:
     case MacOSX:
     case iOS:
+    case WatchOS:
       return false;
     case GCC:
     case GNUstep:
@@ -133,9 +139,12 @@ public:
   /// \brief Does this runtime allow ARC at all?
   bool allowsARC() const {
     switch (getKind()) {
-    case FragileMacOSX: return false;
+    case FragileMacOSX:
+      // No stub library for the fragile runtime.
+      return getVersion() >= VersionTuple(10, 7);
     case MacOSX: return true;
     case iOS: return true;
+    case WatchOS: return true;
     case GCC: return false;
     case GNUstep: return true;
     case ObjFW: return true;
@@ -150,9 +159,10 @@ public:
   /// library.
   bool hasNativeARC() const {
     switch (getKind()) {
-    case FragileMacOSX: return false;
+    case FragileMacOSX: return getVersion() >= VersionTuple(10, 7);
     case MacOSX: return getVersion() >= VersionTuple(10, 7);
     case iOS: return getVersion() >= VersionTuple(5);
+    case WatchOS: return true;
 
     case GCC: return false;
     case GNUstep: return getVersion() >= VersionTuple(1, 6);
@@ -168,6 +178,8 @@ public:
         return getVersion() >= VersionTuple(10, 8);
       case iOS:
         return (getVersion() >= VersionTuple(6));
+      case WatchOS:
+        return true;
       case GNUstep:
         return getVersion() >= VersionTuple(1, 7);
     
@@ -197,6 +209,7 @@ public:
     case FragileMacOSX: return false;
     case MacOSX: return getVersion() >= VersionTuple(10, 8);
     case iOS: return getVersion() >= VersionTuple(6);
+    case WatchOS: return true;
 
     // This is really a lie, because some implementations and versions
     // of the runtime do not support ARC.  Probably -fgnu-runtime
@@ -224,6 +237,7 @@ public:
       return true;
     case MacOSX:
     case iOS:
+    case WatchOS:
     case GNUstep:
     case ObjFW:
       return false;
@@ -245,6 +259,7 @@ public:
     case FragileMacOSX: return getVersion() >= VersionTuple(10, 8);
     case MacOSX: return getVersion() >= VersionTuple(10, 8);
     case iOS: return getVersion() >= VersionTuple(5);
+    case WatchOS: return true;
     case GCC: return false;
     case GNUstep: return false;
     case ObjFW: return false;
@@ -257,6 +272,7 @@ public:
     switch (getKind()) {
     case MacOSX: return true;
     case iOS: return true;
+    case WatchOS: return true;
     case FragileMacOSX: return false;
     case GCC: return true;
     case GNUstep: return true;
@@ -270,6 +286,7 @@ public:
     switch (getKind()) {
     case MacOSX: return true;
     case iOS: return true;
+    case WatchOS: return true;
     case FragileMacOSX: return false;
     case GCC: return true;
     case GNUstep: return true;
@@ -283,6 +300,7 @@ public:
     case FragileMacOSX:
     case MacOSX:
     case iOS:
+    case WatchOS:
       return true;
     case GNUstep:
       return getVersion() >= VersionTuple(1, 7);
diff --git a/include/clang/Basic/OpenCLExtensions.def b/include/clang/Basic/OpenCLExtensions.def
index 103fa839b8df..91fd9195b190 100644
--- a/include/clang/Basic/OpenCLExtensions.def
+++ b/include/clang/Basic/OpenCLExtensions.def
@@ -26,6 +26,9 @@ OPENCLEXT(cl_khr_local_int32_extended_atomics)
 OPENCLEXT(cl_khr_byte_addressable_store)
 OPENCLEXT(cl_khr_3d_image_writes)
 
+// OpenCL 2.0
+OPENCLEXT(cl_khr_gl_msaa_sharing)
+
 // Clang Extensions.
 OPENCLEXT(cl_clang_storage_class_specifiers)
 
diff --git a/include/clang/Basic/OpenMPKinds.def b/include/clang/Basic/OpenMPKinds.def
index 67a5068cc2cb..313e1c1ac686 100644
--- a/include/clang/Basic/OpenMPKinds.def
+++ b/include/clang/Basic/OpenMPKinds.def
@@ -57,9 +57,30 @@
 #ifndef OPENMP_TARGET_CLAUSE
 #  define OPENMP_TARGET_CLAUSE(Name)
 #endif
+#ifndef OPENMP_TARGET_DATA_CLAUSE
+#  define OPENMP_TARGET_DATA_CLAUSE(Name)
+#endif
 #ifndef OPENMP_TEAMS_CLAUSE
 #  define OPENMP_TEAMS_CLAUSE(Name)
 #endif
+#ifndef OPENMP_CANCEL_CLAUSE
+#  define OPENMP_CANCEL_CLAUSE(Name)
+#endif
+#ifndef OPENMP_ORDERED_CLAUSE
+#  define OPENMP_ORDERED_CLAUSE(Name)
+#endif
+#ifndef OPENMP_TASKLOOP_CLAUSE
+#  define OPENMP_TASKLOOP_CLAUSE(Name)
+#endif
+#ifndef OPENMP_TASKLOOP_SIMD_CLAUSE
+#  define OPENMP_TASKLOOP_SIMD_CLAUSE(Name)
+#endif
+#ifndef OPENMP_CRITICAL_CLAUSE
+#  define OPENMP_CRITICAL_CLAUSE(Name)
+#endif
+#ifndef OPENMP_DISTRIBUTE_CLAUSE
+#define OPENMP_DISTRIBUTE_CLAUSE(Name)
+#endif
 #ifndef OPENMP_DEFAULT_KIND
 #  define OPENMP_DEFAULT_KIND(Name)
 #endif
@@ -69,9 +90,18 @@
 #ifndef OPENMP_SCHEDULE_KIND
 #define OPENMP_SCHEDULE_KIND(Name)
 #endif
+#ifndef OPENMP_SCHEDULE_MODIFIER
+#define OPENMP_SCHEDULE_MODIFIER(Name)
+#endif
 #ifndef OPENMP_DEPEND_KIND
 #define OPENMP_DEPEND_KIND(Name)
 #endif
+#ifndef OPENMP_LINEAR_KIND
+#define OPENMP_LINEAR_KIND(Name)
+#endif
+#ifndef OPENMP_MAP_KIND
+#define OPENMP_MAP_KIND(Name)
+#endif
 
 // OpenMP directives.
 OPENMP_DIRECTIVE(threadprivate)
@@ -94,17 +124,22 @@ OPENMP_DIRECTIVE(atomic)
 OPENMP_DIRECTIVE(target)
 OPENMP_DIRECTIVE(teams)
 OPENMP_DIRECTIVE(cancel)
+OPENMP_DIRECTIVE_EXT(target_data, "target data")
 OPENMP_DIRECTIVE_EXT(parallel_for, "parallel for")
 OPENMP_DIRECTIVE_EXT(parallel_for_simd, "parallel for simd")
 OPENMP_DIRECTIVE_EXT(parallel_sections, "parallel sections")
 OPENMP_DIRECTIVE_EXT(for_simd, "for simd")
 OPENMP_DIRECTIVE_EXT(cancellation_point, "cancellation point")
+OPENMP_DIRECTIVE(taskloop)
+OPENMP_DIRECTIVE_EXT(taskloop_simd, "taskloop simd")
+OPENMP_DIRECTIVE(distribute)
 
 // OpenMP clauses.
 OPENMP_CLAUSE(if, OMPIfClause)
 OPENMP_CLAUSE(final, OMPFinalClause)
 OPENMP_CLAUSE(num_threads, OMPNumThreadsClause)
 OPENMP_CLAUSE(safelen, OMPSafelenClause)
+OPENMP_CLAUSE(simdlen, OMPSimdlenClause)
 OPENMP_CLAUSE(collapse, OMPCollapseClause)
 OPENMP_CLAUSE(default, OMPDefaultClause)
 OPENMP_CLAUSE(private, OMPPrivateClause)
@@ -129,6 +164,17 @@ OPENMP_CLAUSE(update, OMPUpdateClause)
 OPENMP_CLAUSE(capture, OMPCaptureClause)
 OPENMP_CLAUSE(seq_cst, OMPSeqCstClause)
 OPENMP_CLAUSE(depend, OMPDependClause)
+OPENMP_CLAUSE(device, OMPDeviceClause)
+OPENMP_CLAUSE(threads, OMPThreadsClause)
+OPENMP_CLAUSE(simd, OMPSIMDClause)
+OPENMP_CLAUSE(map, OMPMapClause)
+OPENMP_CLAUSE(num_teams, OMPNumTeamsClause)
+OPENMP_CLAUSE(thread_limit, OMPThreadLimitClause)
+OPENMP_CLAUSE(priority, OMPPriorityClause)
+OPENMP_CLAUSE(grainsize, OMPGrainsizeClause)
+OPENMP_CLAUSE(nogroup, OMPNogroupClause)
+OPENMP_CLAUSE(num_tasks, OMPNumTasksClause)
+OPENMP_CLAUSE(hint, OMPHintClause)
 
 // Clauses allowed for OpenMP directive 'parallel'.
 OPENMP_PARALLEL_CLAUSE(if)
@@ -147,6 +193,7 @@ OPENMP_SIMD_CLAUSE(lastprivate)
 OPENMP_SIMD_CLAUSE(linear)
 OPENMP_SIMD_CLAUSE(aligned)
 OPENMP_SIMD_CLAUSE(safelen)
+OPENMP_SIMD_CLAUSE(simdlen)
 OPENMP_SIMD_CLAUSE(collapse)
 OPENMP_SIMD_CLAUSE(reduction)
 
@@ -159,6 +206,7 @@ OPENMP_FOR_CLAUSE(collapse)
 OPENMP_FOR_CLAUSE(schedule)
 OPENMP_FOR_CLAUSE(ordered)
 OPENMP_FOR_CLAUSE(nowait)
+OPENMP_FOR_CLAUSE(linear)
 
 // Clauses allowed for directive 'omp for simd'.
 OPENMP_FOR_SIMD_CLAUSE(private)
@@ -169,6 +217,7 @@ OPENMP_FOR_SIMD_CLAUSE(schedule)
 OPENMP_FOR_SIMD_CLAUSE(collapse)
 OPENMP_FOR_SIMD_CLAUSE(nowait)
 OPENMP_FOR_SIMD_CLAUSE(safelen)
+OPENMP_FOR_SIMD_CLAUSE(simdlen)
 OPENMP_FOR_SIMD_CLAUSE(linear)
 OPENMP_FOR_SIMD_CLAUSE(aligned)
 
@@ -185,6 +234,9 @@ OPENMP_SINGLE_CLAUSE(firstprivate)
 OPENMP_SINGLE_CLAUSE(copyprivate)
 OPENMP_SINGLE_CLAUSE(nowait)
 
+// Clauses allowed for OpenMP directive 'cancel'.
+OPENMP_CANCEL_CLAUSE(if)
+
 // Static attributes for 'default' clause.
 OPENMP_DEFAULT_KIND(none)
 OPENMP_DEFAULT_KIND(shared)
@@ -201,10 +253,22 @@ OPENMP_SCHEDULE_KIND(guided)
 OPENMP_SCHEDULE_KIND(auto)
 OPENMP_SCHEDULE_KIND(runtime)
 
+// Modifiers for 'schedule' clause.
+OPENMP_SCHEDULE_MODIFIER(monotonic)
+OPENMP_SCHEDULE_MODIFIER(nonmonotonic)
+OPENMP_SCHEDULE_MODIFIER(simd)
+
 // Static attributes for 'depend' clause.
 OPENMP_DEPEND_KIND(in)
 OPENMP_DEPEND_KIND(out)
 OPENMP_DEPEND_KIND(inout)
+OPENMP_DEPEND_KIND(source)
+OPENMP_DEPEND_KIND(sink)
+
+// Modifiers for 'linear' clause.
+OPENMP_LINEAR_KIND(val)
+OPENMP_LINEAR_KIND(ref)
+OPENMP_LINEAR_KIND(uval)
 
 // Clauses allowed for OpenMP directive 'parallel for'.
 OPENMP_PARALLEL_FOR_CLAUSE(if)
@@ -220,6 +284,7 @@ OPENMP_PARALLEL_FOR_CLAUSE(lastprivate)
 OPENMP_PARALLEL_FOR_CLAUSE(collapse)
 OPENMP_PARALLEL_FOR_CLAUSE(schedule)
 OPENMP_PARALLEL_FOR_CLAUSE(ordered)
+OPENMP_PARALLEL_FOR_CLAUSE(linear)
 
 // Clauses allowed for OpenMP directive 'parallel for simd'.
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(if)
@@ -235,6 +300,7 @@ OPENMP_PARALLEL_FOR_SIMD_CLAUSE(lastprivate)
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(collapse)
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(schedule)
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(safelen)
+OPENMP_PARALLEL_FOR_SIMD_CLAUSE(simdlen)
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(linear)
 OPENMP_PARALLEL_FOR_SIMD_CLAUSE(aligned)
 
@@ -260,6 +326,7 @@ OPENMP_TASK_CLAUSE(shared)
 OPENMP_TASK_CLAUSE(untied)
 OPENMP_TASK_CLAUSE(mergeable)
 OPENMP_TASK_CLAUSE(depend)
+OPENMP_TASK_CLAUSE(priority)
 
 // Clauses allowed for OpenMP directive 'atomic'.
 OPENMP_ATOMIC_CLAUSE(read)
@@ -271,6 +338,14 @@ OPENMP_ATOMIC_CLAUSE(seq_cst)
 // Clauses allowed for OpenMP directive 'target'.
 // TODO More clauses for 'target' directive.
 OPENMP_TARGET_CLAUSE(if)
+OPENMP_TARGET_CLAUSE(device)
+OPENMP_TARGET_CLAUSE(map)
+
+// Clauses allowed for OpenMP directive 'target data'.
+// TODO More clauses for 'target data' directive.
+OPENMP_TARGET_DATA_CLAUSE(if)
+OPENMP_TARGET_DATA_CLAUSE(device)
+OPENMP_TARGET_DATA_CLAUSE(map)
 
 // Clauses allowed for OpenMP directive 'teams'.
 // TODO More clauses for 'teams' directive.
@@ -279,14 +354,83 @@ OPENMP_TEAMS_CLAUSE(private)
 OPENMP_TEAMS_CLAUSE(firstprivate)
 OPENMP_TEAMS_CLAUSE(shared)
 OPENMP_TEAMS_CLAUSE(reduction)
+OPENMP_TEAMS_CLAUSE(num_teams)
+OPENMP_TEAMS_CLAUSE(thread_limit)
 
+// Clauses allowed for OpenMP directive 'ordered'.
+// TODO More clauses for 'ordered' directive.
+OPENMP_ORDERED_CLAUSE(threads)
+OPENMP_ORDERED_CLAUSE(simd)
+OPENMP_ORDERED_CLAUSE(depend)
+
+// Map types and map type modifier for 'map' clause.
+OPENMP_MAP_KIND(alloc)
+OPENMP_MAP_KIND(to)
+OPENMP_MAP_KIND(from)
+OPENMP_MAP_KIND(tofrom)
+OPENMP_MAP_KIND(delete)
+OPENMP_MAP_KIND(release)
+OPENMP_MAP_KIND(always)
+
+// Clauses allowed for OpenMP directive 'taskloop'.
+OPENMP_TASKLOOP_CLAUSE(if)
+OPENMP_TASKLOOP_CLAUSE(shared)
+OPENMP_TASKLOOP_CLAUSE(private)
+OPENMP_TASKLOOP_CLAUSE(firstprivate)
+OPENMP_TASKLOOP_CLAUSE(lastprivate)
+OPENMP_TASKLOOP_CLAUSE(default)
+OPENMP_TASKLOOP_CLAUSE(collapse)
+OPENMP_TASKLOOP_CLAUSE(final)
+OPENMP_TASKLOOP_CLAUSE(untied)
+OPENMP_TASKLOOP_CLAUSE(mergeable)
+OPENMP_TASKLOOP_CLAUSE(priority)
+OPENMP_TASKLOOP_CLAUSE(grainsize)
+OPENMP_TASKLOOP_CLAUSE(nogroup)
+OPENMP_TASKLOOP_CLAUSE(num_tasks)
+
+// Clauses allowed for OpenMP directive 'taskloop simd'.
+OPENMP_TASKLOOP_SIMD_CLAUSE(if)
+OPENMP_TASKLOOP_SIMD_CLAUSE(shared)
+OPENMP_TASKLOOP_SIMD_CLAUSE(private)
+OPENMP_TASKLOOP_SIMD_CLAUSE(firstprivate)
+OPENMP_TASKLOOP_SIMD_CLAUSE(lastprivate)
+OPENMP_TASKLOOP_SIMD_CLAUSE(default)
+OPENMP_TASKLOOP_SIMD_CLAUSE(collapse)
+OPENMP_TASKLOOP_SIMD_CLAUSE(final)
+OPENMP_TASKLOOP_SIMD_CLAUSE(untied)
+OPENMP_TASKLOOP_SIMD_CLAUSE(mergeable)
+OPENMP_TASKLOOP_SIMD_CLAUSE(priority)
+OPENMP_TASKLOOP_SIMD_CLAUSE(linear)
+OPENMP_TASKLOOP_SIMD_CLAUSE(aligned)
+OPENMP_TASKLOOP_SIMD_CLAUSE(safelen)
+OPENMP_TASKLOOP_SIMD_CLAUSE(simdlen)
+OPENMP_TASKLOOP_SIMD_CLAUSE(grainsize)
+OPENMP_TASKLOOP_SIMD_CLAUSE(nogroup)
+OPENMP_TASKLOOP_SIMD_CLAUSE(num_tasks)
+
+// Clauses allowed for OpenMP directive 'critical'.
+OPENMP_CRITICAL_CLAUSE(hint)
+
+// Clauses allowed for OpenMP directive 'distribute'
+OPENMP_DISTRIBUTE_CLAUSE(private)
+OPENMP_DISTRIBUTE_CLAUSE(firstprivate)
+OPENMP_DISTRIBUTE_CLAUSE(lastprivate)
+OPENMP_DISTRIBUTE_CLAUSE(collapse)
+
+#undef OPENMP_TASKLOOP_SIMD_CLAUSE
+#undef OPENMP_TASKLOOP_CLAUSE
+#undef OPENMP_LINEAR_KIND
 #undef OPENMP_DEPEND_KIND
+#undef OPENMP_SCHEDULE_MODIFIER
 #undef OPENMP_SCHEDULE_KIND
 #undef OPENMP_PROC_BIND_KIND
 #undef OPENMP_DEFAULT_KIND
 #undef OPENMP_DIRECTIVE
 #undef OPENMP_DIRECTIVE_EXT
 #undef OPENMP_CLAUSE
+#undef OPENMP_CRITICAL_CLAUSE
+#undef OPENMP_ORDERED_CLAUSE
+#undef OPENMP_CANCEL_CLAUSE
 #undef OPENMP_SINGLE_CLAUSE
 #undef OPENMP_SECTIONS_CLAUSE
 #undef OPENMP_PARALLEL_CLAUSE
@@ -296,8 +440,10 @@ OPENMP_TEAMS_CLAUSE(reduction)
 #undef OPENMP_TASK_CLAUSE
 #undef OPENMP_ATOMIC_CLAUSE
 #undef OPENMP_TARGET_CLAUSE
+#undef OPENMP_TARGET_DATA_CLAUSE
 #undef OPENMP_TEAMS_CLAUSE
 #undef OPENMP_SIMD_CLAUSE
 #undef OPENMP_FOR_CLAUSE
 #undef OPENMP_FOR_SIMD_CLAUSE
-
+#undef OPENMP_MAP_KIND
+#undef OPENMP_DISTRIBUTE_CLAUSE
diff --git a/include/clang/Basic/OpenMPKinds.h b/include/clang/Basic/OpenMPKinds.h
index 83939bb079a8..d4d3db80329d 100644
--- a/include/clang/Basic/OpenMPKinds.h
+++ b/include/clang/Basic/OpenMPKinds.h
@@ -62,6 +62,15 @@ enum OpenMPScheduleClauseKind {
   OMPC_SCHEDULE_unknown
 };
 
+/// \brief OpenMP modifiers for 'schedule' clause.
+enum OpenMPScheduleClauseModifier {
+  OMPC_SCHEDULE_MODIFIER_unknown = OMPC_SCHEDULE_unknown,
+#define OPENMP_SCHEDULE_MODIFIER(Name) \
+  OMPC_SCHEDULE_MODIFIER_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+  OMPC_SCHEDULE_MODIFIER_last
+};
+
 /// \brief OpenMP attributes for 'depend' clause.
 enum OpenMPDependClauseKind {
 #define OPENMP_DEPEND_KIND(Name) \
@@ -70,6 +79,22 @@ enum OpenMPDependClauseKind {
   OMPC_DEPEND_unknown
 };
 
+/// \brief OpenMP attributes for 'linear' clause.
+enum OpenMPLinearClauseKind {
+#define OPENMP_LINEAR_KIND(Name) \
+  OMPC_LINEAR_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+  OMPC_LINEAR_unknown
+};
+
+/// \brief OpenMP mapping kind for 'map' clause.
+enum OpenMPMapClauseKind {
+#define OPENMP_MAP_KIND(Name) \
+  OMPC_MAP_##Name,
+#include "clang/Basic/OpenMPKinds.def"
+  OMPC_MAP_unknown
+};
+
 OpenMPDirectiveKind getOpenMPDirectiveKind(llvm::StringRef Str);
 const char *getOpenMPDirectiveName(OpenMPDirectiveKind Kind);
 
@@ -95,12 +120,24 @@ bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind);
 /// otherwise - false.
 bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind);
 
+/// \brief Checks if the specified directive is a taskloop directive.
+/// \param DKind Specified directive.
+/// \return true - the directive is a worksharing directive like 'omp taskloop',
+/// otherwise - false.
+bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind);
+
 /// \brief Checks if the specified directive is a parallel-kind directive.
 /// \param DKind Specified directive.
 /// \return true - the directive is a parallel-like directive like 'omp
 /// parallel', otherwise - false.
 bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind);
 
+/// \brief Checks if the specified directive is a target-kind directive.
+/// \param DKind Specified directive.
+/// \return true - the directive is a target-like directive like 'omp target',
+/// otherwise - false.
+bool isOpenMPTargetDirective(OpenMPDirectiveKind DKind);
+
 /// \brief Checks if the specified directive is a teams-kind directive.
 /// \param DKind Specified directive.
 /// \return true - the directive is a teams-like directive like 'omp teams',
@@ -113,6 +150,13 @@ bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind);
 /// otherwise - false.
 bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind);
 
+/// \brief Checks if the specified directive is a distribute directive.
+/// \param DKind Specified directive.
+/// \return true - the directive is a distribute-directive like 'omp
+/// distribute',
+/// otherwise - false.
+bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind);
+
 /// \brief Checks if the specified clause is one of private clauses like
 /// 'private', 'firstprivate', 'reduction' etc..
 /// \param Kind Clause kind.
diff --git a/include/clang/Basic/OperatorKinds.def b/include/clang/Basic/OperatorKinds.def
index d011e9d39ff8..34ad7644cd2b 100644
--- a/include/clang/Basic/OperatorKinds.def
+++ b/include/clang/Basic/OperatorKinds.def
@@ -101,6 +101,7 @@ OVERLOADED_OPERATOR_MULTI(Subscript      , "[]"                       , false, t
 // ?: can *not* be overloaded, but we need the overload
 // resolution machinery for it.
 OVERLOADED_OPERATOR_MULTI(Conditional    , "?"                        , false, true , false)
+OVERLOADED_OPERATOR(Coawait              , "co_await", kw_co_await    , true , false, false)
 
 #undef OVERLOADED_OPERATOR_MULTI
 #undef OVERLOADED_OPERATOR
diff --git a/include/clang/Basic/PartialDiagnostic.h b/include/clang/Basic/PartialDiagnostic.h
index 84c8dd159615..53ce95cab1b0 100644
--- a/include/clang/Basic/PartialDiagnostic.h
+++ b/include/clang/Basic/PartialDiagnostic.h
@@ -377,7 +377,7 @@ public:
   }
 
   friend inline const PartialDiagnostic &operator<<(const PartialDiagnostic &PD,
-                                                    const SourceRange &R) {
+                                                    SourceRange R) {
     PD.AddSourceRange(CharSourceRange::getTokenRange(R));
     return PD;
   }
diff --git a/include/clang/Basic/Sanitizers.def b/include/clang/Basic/Sanitizers.def
index 1b528c8d6f69..4b6859362910 100644
--- a/include/clang/Basic/Sanitizers.def
+++ b/include/clang/Basic/Sanitizers.def
@@ -84,11 +84,13 @@ SANITIZER("dataflow", DataFlow)
 // Control Flow Integrity
 SANITIZER("cfi-cast-strict", CFICastStrict)
 SANITIZER("cfi-derived-cast", CFIDerivedCast)
+SANITIZER("cfi-icall", CFIICall)
 SANITIZER("cfi-unrelated-cast", CFIUnrelatedCast)
 SANITIZER("cfi-nvcall", CFINVCall)
 SANITIZER("cfi-vcall", CFIVCall)
 SANITIZER_GROUP("cfi", CFI,
-                CFIDerivedCast | CFIUnrelatedCast | CFINVCall | CFIVCall)
+                CFIDerivedCast | CFIICall | CFIUnrelatedCast | CFINVCall |
+                CFIVCall)
 
 // Safe Stack
 SANITIZER("safe-stack", SafeStack)
diff --git a/include/clang/Basic/SourceLocation.h b/include/clang/Basic/SourceLocation.h
index 7aaee1df2260..0aeba5e3f372 100644
--- a/include/clang/Basic/SourceLocation.h
+++ b/include/clang/Basic/SourceLocation.h
@@ -43,6 +43,7 @@ class FileID {
 public:
   FileID() : ID(0) {}
 
+  bool isValid() const { return ID != 0; }
   bool isInvalid() const { return ID == 0; }
 
   bool operator==(const FileID &RHS) const { return ID == RHS.ID; }
@@ -252,7 +253,7 @@ public:
   
   SourceLocation getBegin() const { return Range.getBegin(); }
   SourceLocation getEnd() const { return Range.getEnd(); }
-  const SourceRange &getAsRange() const { return Range; }
+  SourceRange getAsRange() const { return Range; }
  
   void setBegin(SourceLocation b) { Range.setBegin(b); }
   void setEnd(SourceLocation e) { Range.setEnd(e); }
diff --git a/include/clang/Basic/SourceManager.h b/include/clang/Basic/SourceManager.h
index 3aea5ea9825e..99392a098274 100644
--- a/include/clang/Basic/SourceManager.h
+++ b/include/clang/Basic/SourceManager.h
@@ -39,6 +39,7 @@
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceLocation.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -121,7 +122,7 @@ namespace SrcMgr {
     /// \brief The number of lines in this ContentCache.
     ///
     /// This is only valid if SourceLineCache is non-null.
-    unsigned NumLines : 31;
+    unsigned NumLines;
 
     /// \brief Indicates whether the buffer itself was provided to override
     /// the actual file contents.
@@ -134,12 +135,17 @@ namespace SrcMgr {
     /// file considered as a system one.
     unsigned IsSystemFile : 1;
 
+    /// \brief True if this file may be transient, that is, if it might not
+    /// exist at some later point in time when this content entry is used,
+    /// after serialization and deserialization.
+    unsigned IsTransient : 1;
+
     ContentCache(const FileEntry *Ent = nullptr) : ContentCache(Ent, Ent) {}
 
     ContentCache(const FileEntry *Ent, const FileEntry *contentEnt)
       : Buffer(nullptr, false), OrigEntry(Ent), ContentsEntry(contentEnt),
         SourceLineCache(nullptr), NumLines(0), BufferOverridden(false),
-        IsSystemFile(false) {}
+        IsSystemFile(false), IsTransient(false) {}
     
     ~ContentCache();
     
@@ -148,7 +154,7 @@ namespace SrcMgr {
     /// is not transferred, so this is a logical error.
     ContentCache(const ContentCache &RHS)
       : Buffer(nullptr, false), SourceLineCache(nullptr),
-        BufferOverridden(false), IsSystemFile(false) {
+        BufferOverridden(false), IsSystemFile(false), IsTransient(false) {
       OrigEntry = RHS.OrigEntry;
       ContentsEntry = RHS.ContentsEntry;
 
@@ -388,15 +394,16 @@ namespace SrcMgr {
   /// SourceManager keeps an array of these objects, and they are uniquely
   /// identified by the FileID datatype.
   class SLocEntry {
-    unsigned Offset;   // low bit is set for expansion info.
+    unsigned Offset : 31;
+    unsigned IsExpansion : 1;
     union {
       FileInfo File;
       ExpansionInfo Expansion;
     };
   public:
-    unsigned getOffset() const { return Offset >> 1; }
+    unsigned getOffset() const { return Offset; }
 
-    bool isExpansion() const { return Offset & 1; }
+    bool isExpansion() const { return IsExpansion; }
     bool isFile() const { return !isExpansion(); }
 
     const FileInfo &getFile() const {
@@ -410,15 +417,19 @@ namespace SrcMgr {
     }
 
     static SLocEntry get(unsigned Offset, const FileInfo &FI) {
+      assert(!(Offset & (1 << 31)) && "Offset is too large");
       SLocEntry E;
-      E.Offset = Offset << 1;
+      E.Offset = Offset;
+      E.IsExpansion = false;
       E.File = FI;
       return E;
     }
 
     static SLocEntry get(unsigned Offset, const ExpansionInfo &Expansion) {
+      assert(!(Offset & (1 << 31)) && "Offset is too large");
       SLocEntry E;
-      E.Offset = (Offset << 1) | 1;
+      E.Offset = Offset;
+      E.IsExpansion = true;
       E.Expansion = Expansion;
       return E;
     }
@@ -560,6 +571,11 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// (likely to change while trying to use them). Defaults to false.
   bool UserFilesAreVolatile;
 
+  /// \brief True if all files read during this compilation should be treated
+  /// as transient (may not be present in later compilations using a module
+  /// file created from this compilation). Defaults to false.
+  bool FilesAreTransient;
+
   struct OverriddenFilesInfoTy {
     /// \brief Files that have been overridden with the contents from another
     /// file.
@@ -615,7 +631,7 @@ class SourceManager : public RefCountedBase<SourceManager> {
   /// have already been loaded from the external source.
   ///
   /// Same indexing as LoadedSLocEntryTable.
-  std::vector<bool> SLocEntryLoaded;
+  llvm::BitVector SLocEntryLoaded;
 
   /// \brief An external source for source location entries.
   ExternalSLocEntrySource *ExternalSLocEntries;
@@ -851,6 +867,15 @@ public:
   /// This should be called before parsing has begun.
   void disableFileContentsOverride(const FileEntry *File);
 
+  /// \brief Specify that a file is transient.
+  void setFileIsTransient(const FileEntry *SourceFile);
+
+  /// \brief Specify that all files that are read during this compilation are
+  /// transient.
+  void setAllFilesAreTransient(bool Transient) {
+    FilesAreTransient = Transient;
+  }
+
   //===--------------------------------------------------------------------===//
   // FileID manipulation methods.
   //===--------------------------------------------------------------------===//
@@ -1139,10 +1164,14 @@ public:
   /// \brief Tests whether the given source location represents a macro
   /// argument's expansion into the function-like macro definition.
   ///
+  /// \param StartLoc If non-null and function returns true, it is set to the
+  /// start location of the macro argument expansion.
+  ///
   /// Such source locations only appear inside of the expansion
   /// locations representing where a particular function-like macro was
   /// expanded.
-  bool isMacroArgExpansion(SourceLocation Loc) const;
+  bool isMacroArgExpansion(SourceLocation Loc,
+                           SourceLocation *StartLoc = nullptr) const;
 
   /// \brief Tests whether the given source location represents the expansion of
   /// a macro body.
@@ -1463,6 +1492,8 @@ public:
   ///
   void PrintStats() const;
 
+  void dump() const;
+
   /// \brief Get the number of local SLocEntries we have.
   unsigned local_sloc_entry_size() const { return LocalSLocEntryTable.size(); }
 
diff --git a/include/clang/Basic/Specifiers.h b/include/clang/Basic/Specifiers.h
index d95a77f88e97..1d59d64d6bc7 100644
--- a/include/clang/Basic/Specifiers.h
+++ b/include/clang/Basic/Specifiers.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace clang {
   /// \brief Specifies the width of a type, e.g., short, long, or long long.
@@ -64,6 +65,7 @@ namespace clang {
     TST_underlyingType,   // __underlying_type for C++11
     TST_auto,             // C++11 auto
     TST_decltype_auto,    // C++1y decltype(auto)
+    TST_auto_type,        // __auto_type extension
     TST_unknown_anytype,  // __unknown_anytype extension
     TST_atomic,           // C11 _Atomic
     TST_error         // erroneous type
@@ -156,6 +158,24 @@ namespace clang {
     return Kind != TSK_Undeclared && Kind != TSK_ExplicitSpecialization;
   }
 
+  /// \brief True if this template specialization kind is an explicit
+  /// specialization, explicit instantiation declaration, or explicit
+  /// instantiation definition.
+  inline bool isTemplateExplicitInstantiationOrSpecialization(
+      TemplateSpecializationKind Kind) {
+    switch (Kind) {
+    case TSK_ExplicitSpecialization:
+    case TSK_ExplicitInstantiationDeclaration:
+    case TSK_ExplicitInstantiationDefinition:
+      return true;
+
+    case TSK_Undeclared:
+    case TSK_ImplicitInstantiation:
+      return false;
+    }
+    llvm_unreachable("bad template specialization kind");
+  }
+
   /// \brief Thread storage-class-specifier.
   enum ThreadStorageClassSpecifier {
     TSCS_unspecified,
@@ -178,7 +198,6 @@ namespace clang {
     SC_PrivateExtern,
 
     // These are only legal on variables.
-    SC_OpenCLWorkGroupLocal,
     SC_Auto,
     SC_Register
   };
diff --git a/include/clang/Basic/StmtNodes.td b/include/clang/Basic/StmtNodes.td
index 9d7b6fb0cbe2..36519ea29c98 100644
--- a/include/clang/Basic/StmtNodes.td
+++ b/include/clang/Basic/StmtNodes.td
@@ -48,6 +48,10 @@ def CXXCatchStmt : Stmt;
 def CXXTryStmt : Stmt;
 def CXXForRangeStmt : Stmt;
 
+// C++ Coroutines TS statements
+def CoroutineBodyStmt : Stmt;
+def CoreturnStmt : Stmt;
+
 // Expressions
 def Expr : Stmt<1>;
 def PredefinedExpr : DStmt<Expr>;
@@ -62,6 +66,7 @@ def UnaryOperator : DStmt<Expr>;
 def OffsetOfExpr : DStmt<Expr>;
 def UnaryExprOrTypeTraitExpr : DStmt<Expr>;
 def ArraySubscriptExpr : DStmt<Expr>;
+def OMPArraySectionExpr : DStmt<Expr>;
 def CallExpr : DStmt<Expr>;
 def MemberExpr : DStmt<Expr>;
 def CastExpr : DStmt<Expr, 1>;
@@ -139,6 +144,11 @@ def MaterializeTemporaryExpr : DStmt<Expr>;
 def LambdaExpr : DStmt<Expr>;
 def CXXFoldExpr : DStmt<Expr>;
 
+// C++ Coroutines TS expressions
+def CoroutineSuspendExpr : DStmt<Expr, 1>;
+def CoawaitExpr : DStmt<CoroutineSuspendExpr>;
+def CoyieldExpr : DStmt<CoroutineSuspendExpr>;
+
 // Obj-C Expressions.
 def ObjCStringLiteral : DStmt<Expr>;
 def ObjCBoxedExpr : DStmt<Expr>;
@@ -170,6 +180,7 @@ def TypoExpr : DStmt<Expr>;
 
 // Microsoft Extensions.
 def MSPropertyRefExpr : DStmt<Expr>;
+def MSPropertySubscriptExpr : DStmt<Expr>;
 def CXXUuidofExpr : DStmt<Expr>; 
 def SEHTryStmt : Stmt;
 def SEHExceptStmt : Stmt;
@@ -204,6 +215,10 @@ def OMPFlushDirective : DStmt<OMPExecutableDirective>;
 def OMPOrderedDirective : DStmt<OMPExecutableDirective>;
 def OMPAtomicDirective : DStmt<OMPExecutableDirective>;
 def OMPTargetDirective : DStmt<OMPExecutableDirective>;
+def OMPTargetDataDirective : DStmt<OMPExecutableDirective>;
 def OMPTeamsDirective : DStmt<OMPExecutableDirective>;
 def OMPCancellationPointDirective : DStmt<OMPExecutableDirective>;
 def OMPCancelDirective : DStmt<OMPExecutableDirective>;
+def OMPTaskLoopDirective : DStmt<OMPLoopDirective>;
+def OMPTaskLoopSimdDirective : DStmt<OMPLoopDirective>;
+def OMPDistributeDirective : DStmt<OMPLoopDirective>;
diff --git a/include/clang/Basic/TargetBuiltins.h b/include/clang/Basic/TargetBuiltins.h
index b4740c595226..623c0b64dbf3 100644
--- a/include/clang/Basic/TargetBuiltins.h
+++ b/include/clang/Basic/TargetBuiltins.h
@@ -185,6 +185,17 @@ namespace clang {
         LastTSBuiltin
     };
   }
+
+  /// \brief WebAssembly builtins
+  namespace WebAssembly {
+    enum {
+      LastTIBuiltin = clang::Builtin::FirstTSBuiltin-1,
+#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
+#include "clang/Basic/BuiltinsWebAssembly.def"
+      LastTSBuiltin
+    };
+  }
+
 } // end namespace clang.
 
 #endif
diff --git a/include/clang/Basic/TargetCXXABI.h b/include/clang/Basic/TargetCXXABI.h
index 42a976b3afde..67247ead2eb4 100644
--- a/include/clang/Basic/TargetCXXABI.h
+++ b/include/clang/Basic/TargetCXXABI.h
@@ -71,6 +71,11 @@ public:
     ///                  /help/topic/com.arm.doc.ihi0059a/IHI0059A_cppabi64.pdf
     iOS64,
 
+    /// WatchOS is a modernisation of the iOS ABI, which roughly means it's
+    /// the iOS64 ABI ported to 32-bits. The primary difference from iOS64 is
+    /// that RTTI objects must still be unique at the moment.
+    WatchOS,
+
     /// The generic AArch64 ABI is also a modified version of the Itanium ABI,
     /// but it has fewer divergences than the 32-bit ARM ABI.
     ///
@@ -85,6 +90,21 @@ public:
     ///   - representation of member function pointers adjusted as in ARM.
     GenericMIPS,
 
+    /// The WebAssembly ABI is a modified version of the Itanium ABI.
+    ///
+    /// The changes from the Itanium ABI are:
+    ///   - representation of member function pointers is adjusted, as in ARM;
+    ///   - member functions are not specially aligned;
+    ///   - constructors and destructors return 'this', as in ARM;
+    ///   - guard variables are 32-bit on wasm32, as in ARM;
+    ///   - unused bits of guard variables are reserved, as in ARM;
+    ///   - inline functions are never key functions, as in ARM;
+    ///   - C++11 POD rules are used for tail padding, as in iOS64.
+    ///
+    /// TODO: At present the WebAssembly ABI is not considered stable, so none
+    /// of these details is necessarily final yet.
+    WebAssembly,
+
     /// The Microsoft ABI is the ABI used by Microsoft Visual Studio (and
     /// compatible compilers).
     ///
@@ -120,7 +140,9 @@ public:
     case GenericARM:
     case iOS:
     case iOS64:
+    case WatchOS:
     case GenericMIPS:
+    case WebAssembly:
       return true;
 
     case Microsoft:
@@ -137,7 +159,9 @@ public:
     case GenericARM:
     case iOS:
     case iOS64:
+    case WatchOS:
     case GenericMIPS:
+    case WebAssembly:
       return false;
 
     case Microsoft:
@@ -146,6 +170,36 @@ public:
     llvm_unreachable("bad ABI kind");
   }
 
+  /// \brief Are member functions differently aligned?
+  ///
+  /// Many Itanium-style C++ ABIs require member functions to be aligned, so
+  /// that a pointer to such a function is guaranteed to have a zero in the
+  /// least significant bit, so that pointers to member functions can use that
+  /// bit to distinguish between virtual and non-virtual functions. However,
+  /// some Itanium-style C++ ABIs differentiate between virtual and non-virtual
+  /// functions via other means, and consequently don't require that member
+  /// functions be aligned.
+  bool areMemberFunctionsAligned() const {
+    switch (getKind()) {
+    case WebAssembly:
+      // WebAssembly doesn't require any special alignment for member functions.
+      return false;
+    case GenericARM:
+    case GenericAArch64:
+    case GenericMIPS:
+      // TODO: ARM-style pointers to member functions put the discriminator in
+      //       the this adjustment, so they don't require functions to have any
+      //       special alignment and could therefore also return false.
+    case GenericItanium:
+    case iOS:
+    case iOS64:
+    case WatchOS:
+    case Microsoft:
+      return true;
+    }
+    llvm_unreachable("bad ABI kind");
+  }
+
   /// \brief Is the default C++ member function calling convention
   /// the same as the default calling convention?
   bool isMemberFunctionCCDefault() const {
@@ -214,6 +268,8 @@ public:
     switch (getKind()) {
     case GenericARM:
     case iOS64:
+    case WebAssembly:
+    case WatchOS:
       return false;
 
     case GenericAArch64:
@@ -261,7 +317,7 @@ public:
     switch (getKind()) {
     // To preserve binary compatibility, the generic Itanium ABI has
     // permanently locked the definition of POD to the rules of C++ TR1,
-    // and that trickles down to all the derived ABIs.
+    // and that trickles down to derived ABIs.
     case GenericItanium:
     case GenericAArch64:
     case GenericARM:
@@ -269,9 +325,11 @@ public:
     case GenericMIPS:
       return UseTailPaddingUnlessPOD03;
 
-    // iOS on ARM64 uses the C++11 POD rules.  It does not honor the
-    // Itanium exception about classes with over-large bitfields.
+    // iOS on ARM64 and WebAssembly use the C++11 POD rules.  They do not honor
+    // the Itanium exception about classes with over-large bitfields.
     case iOS64:
+    case WebAssembly:
+    case WatchOS:
       return UseTailPaddingUnlessPOD11;
 
     // MSVC always allocates fields in the tail-padding of a base class
@@ -282,9 +340,6 @@ public:
     llvm_unreachable("bad ABI kind");
   }
 
-  /// Try to parse an ABI name, returning false on error.
-  bool tryParse(llvm::StringRef name);
-
   friend bool operator==(const TargetCXXABI &left, const TargetCXXABI &right) {
     return left.getKind() == right.getKind();
   }
diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h
index 39f575f90ef8..f1d833817077 100644
--- a/include/clang/Basic/TargetInfo.h
+++ b/include/clang/Basic/TargetInfo.h
@@ -22,6 +22,8 @@
 #include "clang/Basic/TargetOptions.h"
 #include "clang/Basic/VersionTuple.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -72,7 +74,7 @@ protected:
   unsigned short MaxVectorAlign;
   unsigned short MaxTLSAlign;
   unsigned short SimdDefaultAlign;
-  const char *DescriptionString;
+  const char *DataLayoutString;
   const char *UserLabelPrefix;
   const char *MCountName;
   const llvm::fltSemantics *HalfFormat, *FloatFormat, *DoubleFormat,
@@ -88,6 +90,8 @@ protected:
   unsigned RealTypeUsesObjCFPRet : 3;
   unsigned ComplexLongDoubleUsesFP2Ret : 1;
 
+  unsigned HasBuiltinMSVaList : 1;
+
   // TargetInfo Constructor.  Default initializes all fields.
   TargetInfo(const llvm::Triple &T);
 
@@ -104,9 +108,9 @@ public:
   virtual ~TargetInfo();
 
   /// \brief Retrieve the target options.
-  TargetOptions &getTargetOpts() const { 
+  TargetOptions &getTargetOpts() const {
     assert(TargetOpts && "Missing target options");
-    return *TargetOpts; 
+    return *TargetOpts;
   }
 
   ///===---- Target Data Type Query Methods -------------------------------===//
@@ -253,10 +257,11 @@ public:
   unsigned getTypeWidth(IntType T) const;
 
   /// \brief Return integer type with specified width.
-  IntType getIntTypeByWidth(unsigned BitWidth, bool IsSigned) const;
+  virtual IntType getIntTypeByWidth(unsigned BitWidth, bool IsSigned) const;
 
   /// \brief Return the smallest integer type with at least the specified width.
-  IntType getLeastIntTypeByWidth(unsigned BitWidth, bool IsSigned) const;
+  virtual IntType getLeastIntTypeByWidth(unsigned BitWidth,
+                                         bool IsSigned) const;
 
   /// \brief Return floating point type with specified width.
   RealType getRealTypeByWidth(unsigned BitWidth) const;
@@ -311,7 +316,9 @@ public:
   unsigned getLongLongAlign() const { return LongLongAlign; }
 
   /// \brief Determine whether the __int128 type is supported on this target.
-  virtual bool hasInt128Type() const { return getPointerWidth(0) >= 64; } // FIXME
+  virtual bool hasInt128Type() const {
+    return getPointerWidth(0) >= 64;
+  } // FIXME
 
   /// \brief Return the alignment that is suitable for storing any
   /// object with a fundamental alignment requirement.
@@ -509,8 +516,7 @@ public:
   /// Return information about target-specific builtins for
   /// the current primary target, and info about which builtins are non-portable
   /// across the current set of primary and secondary targets.
-  virtual void getTargetBuiltins(const Builtin::Info *&Records,
-                                 unsigned &NumRecords) const = 0;
+  virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
 
   /// The __builtin_clz* and __builtin_ctz* built-in
   /// functions are specified to have undefined results for zero inputs, but
@@ -523,6 +529,10 @@ public:
   /// with this target.
   virtual BuiltinVaListKind getBuiltinVaListKind() const = 0;
 
+  /// Returns whether or not type \c __builtin_ms_va_list type is
+  /// available on this target.
+  bool hasBuiltinMSVaList() const { return HasBuiltinMSVaList; }
+
   /// \brief Returns whether the passed in string is a valid clobber in an
   /// inline asm statement.
   ///
@@ -556,6 +566,7 @@ public:
       int Min;
       int Max;
     } ImmRange;
+    llvm::SmallSet<int, 4> ImmSet;
 
     std::string ConstraintStr;  // constraint: "=rm"
     std::string Name;           // Operand name: [foo] with no []'s.
@@ -591,8 +602,10 @@ public:
     bool requiresImmediateConstant() const {
       return (Flags & CI_ImmediateConstant) != 0;
     }
-    int getImmConstantMin() const { return ImmRange.Min; }
-    int getImmConstantMax() const { return ImmRange.Max; }
+    bool isValidAsmImmediate(const llvm::APInt &Value) const {
+      return (Value.sge(ImmRange.Min) && Value.sle(ImmRange.Max)) ||
+             ImmSet.count(Value.getZExtValue()) != 0;
+    }
 
     void setIsReadWrite() { Flags |= CI_ReadWrite; }
     void setEarlyClobber() { Flags |= CI_EarlyClobber; }
@@ -604,9 +617,23 @@ public:
       ImmRange.Min = Min;
       ImmRange.Max = Max;
     }
+    void setRequiresImmediate(llvm::ArrayRef<int> Exacts) {
+      Flags |= CI_ImmediateConstant;
+      for (int Exact : Exacts)
+        ImmSet.insert(Exact);
+    }
+    void setRequiresImmediate(int Exact) {
+      Flags |= CI_ImmediateConstant;
+      ImmSet.insert(Exact);
+    }
+    void setRequiresImmediate() {
+      Flags |= CI_ImmediateConstant;
+      ImmRange.Min = INT_MIN;
+      ImmRange.Max = INT_MAX;
+    }
 
     /// \brief Indicate that this is an input operand that is tied to
-    /// the specified output operand. 
+    /// the specified output operand.
     ///
     /// Copy over the various constraint information from the output.
     void setTiedOperand(unsigned N, ConstraintInfo &Output) {
@@ -617,15 +644,24 @@ public:
     }
   };
 
-  // Validate the contents of the __builtin_cpu_supports(const char*) argument.
-  virtual bool validateCpuSupports(StringRef Name) const { return false; }
+  /// \brief Validate register name used for global register variables.
+  ///
+  /// This function returns true if the register passed in RegName can be used
+  /// for global register variables on this target. In addition, it returns
+  /// true in HasSizeMismatch if the size of the register doesn't match the
+  /// variable size passed in RegSize.
+  virtual bool validateGlobalRegisterVariable(StringRef RegName,
+                                              unsigned RegSize,
+                                              bool &HasSizeMismatch) const {
+    HasSizeMismatch = false;
+    return true;
+  }
 
   // validateOutputConstraint, validateInputConstraint - Checks that
   // a constraint is valid and provides information about it.
   // FIXME: These should return a real error instead of just true/false.
   bool validateOutputConstraint(ConstraintInfo &Info) const;
-  bool validateInputConstraint(ConstraintInfo *OutputConstraints,
-                               unsigned NumOutputs,
+  bool validateInputConstraint(MutableArrayRef<ConstraintInfo> OutputConstraints,
                                ConstraintInfo &info) const;
 
   virtual bool validateOutputSize(StringRef /*Constraint*/,
@@ -644,9 +680,13 @@ public:
                              std::string &/*SuggestedModifier*/) const {
     return true;
   }
+  virtual bool
+  validateAsmConstraint(const char *&Name,
+                        TargetInfo::ConstraintInfo &info) const = 0;
+
   bool resolveSymbolicName(const char *&Name,
-                           ConstraintInfo *OutputConstraints,
-                           unsigned NumOutputs, unsigned &Index) const;
+                           ArrayRef<ConstraintInfo> OutputConstraints,
+                           unsigned &Index) const;
 
   // Constraint parm will be left pointing at the last character of
   // the constraint.  In practice, it won't be changed unless the
@@ -658,24 +698,23 @@ public:
     return std::string(1, *Constraint);
   }
 
+  /// \brief Returns a string of target-specific clobbers, in LLVM format.
+  virtual const char *getClobbers() const = 0;
+
   /// \brief Returns true if NaN encoding is IEEE 754-2008.
   /// Only MIPS allows a different encoding.
   virtual bool isNan2008() const {
     return true;
   }
 
-  /// \brief Returns a string of target-specific clobbers, in LLVM format.
-  virtual const char *getClobbers() const = 0;
-
-
   /// \brief Returns the target triple of the primary target.
   const llvm::Triple &getTriple() const {
     return Triple;
   }
 
-  const char *getTargetDescription() const {
-    assert(DescriptionString);
-    return DescriptionString;
+  const char *getDataLayoutString() const {
+    assert(DataLayoutString && "Uninitialized DataLayoutString!");
+    return DataLayoutString;
   }
 
   struct GCCRegAlias {
@@ -721,10 +760,13 @@ public:
   /// language options which change the target configuration.
   virtual void adjust(const LangOptions &Opts);
 
-  /// \brief Get the default set of target features for the CPU;
-  /// this should include all legal feature strings on the target.
-  virtual void getDefaultFeatures(llvm::StringMap<bool> &Features) const {
-  }
+  /// \brief Initialize the map with the default set of target features for the
+  /// CPU this should include all legal feature strings on the target.
+  ///
+  /// \return False on error (invalid features).
+  virtual bool initFeatureMap(llvm::StringMap<bool> &Features,
+                              DiagnosticsEngine &Diags, StringRef CPU,
+                              const std::vector<std::string> &FeatureVec) const;
 
   /// \brief Get the ABI currently in use.
   virtual StringRef getABI() const { return StringRef(); }
@@ -755,23 +797,6 @@ public:
     return false;
   }
 
-  /// \brief Use this specified C++ ABI.
-  ///
-  /// \return False on error (invalid C++ ABI name).
-  bool setCXXABI(llvm::StringRef name) {
-    TargetCXXABI ABI;
-    if (!ABI.tryParse(name)) return false;
-    return setCXXABI(ABI);
-  }
-
-  /// \brief Set the C++ ABI to be used by this implementation.
-  ///
-  /// \return False on error (ABI not valid on this target)
-  virtual bool setCXXABI(TargetCXXABI ABI) {
-    TheCXXABI = ABI;
-    return true;
-  }
-
   /// \brief Enable or disable a specific target feature;
   /// the feature name must be valid.
   virtual void setFeatureEnabled(llvm::StringMap<bool> &Features,
@@ -787,6 +812,8 @@ public:
   ///
   /// The target may modify the features list, to change which options are
   /// passed onwards to the backend.
+  /// FIXME: This part should be fixed so that we can change handleTargetFeatures
+  /// to merely a TargetInfo initialization routine.
   ///
   /// \return  False on error.
   virtual bool handleTargetFeatures(std::vector<std::string> &Features,
@@ -798,7 +825,11 @@ public:
   virtual bool hasFeature(StringRef Feature) const {
     return false;
   }
-  
+
+  // \brief Validate the contents of the __builtin_cpu_supports(const char*)
+  // argument.
+  virtual bool validateCpuSupports(StringRef Name) const { return false; }
+
   // \brief Returns maximal number of args passed in registers.
   unsigned getRegParmMax() const {
     assert(RegParmMax < 7 && "RegParmMax value is larger than AST can handle");
@@ -882,7 +913,7 @@ public:
   };
 
   /// \brief Determines whether a given calling convention is valid for the
-  /// target. A calling convention can either be accepted, produce a warning 
+  /// target. A calling convention can either be accepted, produce a warning
   /// and be substituted with the default calling convention, or (someday)
   /// produce an error (such as using thiscall on a non-instance function).
   virtual CallingConvCheckResult checkCallingConvention(CallingConv CC) const {
@@ -910,17 +941,11 @@ protected:
   virtual enum IntType getPtrDiffTypeV(unsigned AddrSpace) const {
     return PtrDiffType;
   }
-  virtual void getGCCRegNames(const char * const *&Names,
-                              unsigned &NumNames) const = 0;
-  virtual void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                unsigned &NumAliases) const = 0;
-  virtual void getGCCAddlRegNames(const AddlRegName *&Addl,
-                                  unsigned &NumAddl) const {
-    Addl = nullptr;
-    NumAddl = 0;
+  virtual ArrayRef<const char *> getGCCRegNames() const = 0;
+  virtual ArrayRef<GCCRegAlias> getGCCRegAliases() const = 0;
+  virtual ArrayRef<AddlRegName> getGCCAddlRegNames() const {
+    return None;
   }
-  virtual bool validateAsmConstraint(const char *&Name,
-                                     TargetInfo::ConstraintInfo &info) const= 0;
 };
 
 }  // end namespace clang
diff --git a/include/clang/Basic/TokenKinds.def b/include/clang/Basic/TokenKinds.def
index 8333a4ccf8db..9252d9947a94 100644
--- a/include/clang/Basic/TokenKinds.def
+++ b/include/clang/Basic/TokenKinds.def
@@ -242,6 +242,8 @@ PUNCTUATOR(greatergreatergreater, ">>>")
 //   KEYZVECTOR - This is a keyword for the System z vector extensions,
 //                which are heavily based on AltiVec
 //   KEYBORLAND - This is a keyword if Borland extensions are enabled
+//   KEYCOROUTINES - This is a keyword if support for the C++ coroutines
+//                   TS is enabled
 //   BOOLSUPPORT - This is a keyword if 'bool' is a built-in type
 //   HALFSUPPORT - This is a keyword if 'half' is a built-in type
 //   WCHARSUPPORT - This is a keyword if 'wchar_t' is a built-in type
@@ -282,7 +284,7 @@ KEYWORD(volatile                    , KEYALL)
 KEYWORD(while                       , KEYALL)
 KEYWORD(_Alignas                    , KEYALL)
 KEYWORD(_Alignof                    , KEYALL)
-KEYWORD(_Atomic                     , KEYALL|KEYNOMS18|KEYNOOPENCL)
+KEYWORD(_Atomic                     , KEYALL|KEYNOOPENCL)
 KEYWORD(_Bool                       , KEYNOCXX)
 KEYWORD(_Complex                    , KEYALL)
 KEYWORD(_Generic                    , KEYALL)
@@ -356,6 +358,11 @@ CXX11_KEYWORD(thread_local          , 0)
 CONCEPTS_KEYWORD(concept)
 CONCEPTS_KEYWORD(requires)
 
+// C++ coroutines TS keywords
+KEYWORD(co_await                    , KEYCOROUTINES)
+KEYWORD(co_return                   , KEYCOROUTINES)
+KEYWORD(co_yield                    , KEYCOROUTINES)
+
 // GNU Extensions (in impl-reserved namespace)
 KEYWORD(_Decimal32                  , KEYALL)
 KEYWORD(_Decimal64                  , KEYALL)
@@ -377,6 +384,7 @@ KEYWORD(__real                      , KEYALL)
 KEYWORD(__thread                    , KEYALL)
 KEYWORD(__FUNCTION__                , KEYALL)
 KEYWORD(__PRETTY_FUNCTION__         , KEYALL)
+KEYWORD(__auto_type                 , KEYALL)
 
 // GNU Extensions (outside impl-reserved namespace)
 KEYWORD(typeof                      , KEYGNU)
@@ -469,8 +477,11 @@ KEYWORD(__array_extent              , KEYCXX)
 KEYWORD(__private_extern__          , KEYALL)
 KEYWORD(__module_private__          , KEYALL)
 
+// Extension that will be enabled for Microsoft, Borland and PS4, but can be
+// disabled via '-fno-declspec'.
+KEYWORD(__declspec                  , 0)
+
 // Microsoft Extension.
-KEYWORD(__declspec                  , KEYMS|KEYBORLAND)
 KEYWORD(__cdecl                     , KEYALL)
 KEYWORD(__stdcall                   , KEYALL)
 KEYWORD(__fastcall                  , KEYALL)
diff --git a/include/clang/Basic/VirtualFileSystem.h b/include/clang/Basic/VirtualFileSystem.h
index 1c65fb5eac06..1df4947dd7e8 100644
--- a/include/clang/Basic/VirtualFileSystem.h
+++ b/include/clang/Basic/VirtualFileSystem.h
@@ -45,14 +45,18 @@ public:
 public:
   Status() : Type(llvm::sys::fs::file_type::status_error) {}
   Status(const llvm::sys::fs::file_status &Status);
-  Status(StringRef Name, StringRef RealName, llvm::sys::fs::UniqueID UID,
+  Status(StringRef Name, llvm::sys::fs::UniqueID UID,
          llvm::sys::TimeValue MTime, uint32_t User, uint32_t Group,
          uint64_t Size, llvm::sys::fs::file_type Type,
          llvm::sys::fs::perms Perms);
 
+  /// Get a copy of a Status with a different name.
+  static Status copyWithNewName(const Status &In, StringRef NewName);
+  static Status copyWithNewName(const llvm::sys::fs::file_status &In,
+                                StringRef NewName);
+
   /// \brief Returns the name that should be used for this file or directory.
   StringRef getName() const { return Name; }
-  void setName(StringRef N) { Name = N; }
 
   /// @name Status interface from llvm::sys::fs
   /// @{
@@ -63,8 +67,6 @@ public:
   uint32_t getUser() const { return User; }
   uint32_t getGroup() const { return Group; }
   uint64_t getSize() const { return Size; }
-  void setType(llvm::sys::fs::file_type v) { Type = v; }
-  void setPermissions(llvm::sys::fs::perms p) { Perms = p; }
   /// @}
   /// @name Status queries
   /// These are static queries in llvm::sys::fs.
@@ -94,8 +96,6 @@ public:
             bool RequiresNullTerminator = true, bool IsVolatile = false) = 0;
   /// \brief Closes the file.
   virtual std::error_code close() = 0;
-  /// \brief Sets the name to use for this file.
-  virtual void setName(StringRef Name) = 0;
 };
 
 namespace detail {
@@ -199,6 +199,28 @@ public:
   /// \note The 'end' iterator is directory_iterator().
   virtual directory_iterator dir_begin(const Twine &Dir,
                                        std::error_code &EC) = 0;
+
+  /// Set the working directory. This will affect all following operations on
+  /// this file system and may propagate down for nested file systems.
+  virtual std::error_code setCurrentWorkingDirectory(const Twine &Path) = 0;
+  /// Get the working directory of this file system.
+  virtual llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const = 0;
+
+  /// Check whether a file exists. Provided for convenience.
+  bool exists(const Twine &Path);
+
+  /// Make \a Path an absolute path.
+  ///
+  /// Makes \a Path absolute using the current directory if it is not already.
+  /// An empty \a Path will result in the current directory.
+  ///
+  /// /absolute/path   => /absolute/path
+  /// relative/../path => <current-directory>/relative/../path
+  ///
+  /// \param Path A path that is modified to be an absolute path.
+  /// \returns success if \a path has been made absolute, otherwise a
+  ///          platform-specific error_code.
+  std::error_code makeAbsolute(SmallVectorImpl<char> &Path) const;
 };
 
 /// \brief Gets an \p vfs::FileSystem for the 'real' file system, as seen by
@@ -230,6 +252,8 @@ public:
   llvm::ErrorOr<std::unique_ptr<File>>
   openFileForRead(const Twine &Path) override;
   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 
   typedef FileSystemList::reverse_iterator iterator;
   
@@ -241,6 +265,46 @@ public:
   iterator overlays_end() { return FSList.rend(); }
 };
 
+namespace detail {
+class InMemoryDirectory;
+} // end namespace detail
+
+/// An in-memory file system.
+class InMemoryFileSystem : public FileSystem {
+  std::unique_ptr<detail::InMemoryDirectory> Root;
+  std::string WorkingDirectory;
+  bool UseNormalizedPaths = true;
+
+public:
+  explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
+  ~InMemoryFileSystem() override;
+  /// Add a buffer to the VFS with a path. The VFS owns the buffer.
+  /// \return true if the file was successfully added, false if the file already
+  /// exists in the file system with different contents.
+  bool addFile(const Twine &Path, time_t ModificationTime,
+               std::unique_ptr<llvm::MemoryBuffer> Buffer);
+  /// Add a buffer to the VFS with a path. The VFS does not own the buffer.
+  /// \return true if the file was successfully added, false if the file already
+  /// exists in the file system with different contents.
+  bool addFileNoOwn(const Twine &Path, time_t ModificationTime,
+                    llvm::MemoryBuffer *Buffer);
+  std::string toString() const;
+  /// Return true if this file system normalizes . and .. in paths.
+  bool useNormalizedPaths() const { return UseNormalizedPaths; }
+
+  llvm::ErrorOr<Status> status(const Twine &Path) override;
+  llvm::ErrorOr<std::unique_ptr<File>>
+  openFileForRead(const Twine &Path) override;
+  directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    return WorkingDirectory;
+  }
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+    WorkingDirectory = Path.str();
+    return std::error_code();
+  }
+};
+
 /// \brief Get a globally unique ID for a virtual file or directory.
 llvm::sys::fs::UniqueID getNextVirtualUniqueID();
 
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td
index c6f879513eac..6d95c1ec157a 100644
--- a/include/clang/Basic/arm_neon.td
+++ b/include/clang/Basic/arm_neon.td
@@ -373,6 +373,10 @@ def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1),
                                               (splat $p2, $p3))>;
 def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>;
 def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>;
+def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>;
+def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>;
+def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
+def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>;
 def OP_FMS_LN   : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>;
 def OP_FMS_LNQ  : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>;
 def OP_TRN1     : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2),
@@ -414,7 +418,7 @@ def OP_XTN      : Op<(call "vcombine", $p0, (call "vmovn", $p1))>;
 def OP_SQXTUN   : Op<(call "vcombine", (cast $p0, "U", $p0),
                                        (call "vqmovun", $p1))>;
 def OP_QXTN     : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>;
-def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16", $p1))>;
+def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16_f32", $p1))>;
 def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>;
 def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>;
 def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>;
@@ -473,6 +477,11 @@ def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">;
 def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">;
 def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">;
 
+def OP_SCALAR_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1,
+                              (call "vget_lane", $p2, $p3)))>;
+def OP_SCALAR_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1,
+                              (call "vget_lane", $p2, $p3)))>;
+
 def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t",
                                    (call "vget_lane",
                                          (bitcast "int16x4_t", $p0), $p1))>;
@@ -514,6 +523,12 @@ def VMLS     : IOpInst<"vmls", "dddd", "csifUcUsUiQcQsQiQfQUcQUsQUi", OP_MLS>;
 def VMLSL    : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>;
 def VQDMULH  : SInst<"vqdmulh", "ddd", "siQsQi">;
 def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">;
+
+let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
+def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>;
+def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>;
+}
+
 def VQDMLAL  : SInst<"vqdmlal", "wwdd", "si">;
 def VQDMLSL  : SInst<"vqdmlsl", "wwdd", "si">;
 def VMULL    : SInst<"vmull", "wdd", "csiUcUsUiPc">;
@@ -687,16 +702,19 @@ def VGET_LOW  : NoTestOpInst<"vget_low", "dk", "csilhfUcUsUiUlPcPs", OP_LO>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // E.3.22 Converting vectors
+
+def VCVT_F16_F32 : SInst<"vcvt_f16_f32", "md", "Hf">;
+def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "wd", "h">;
+
 def VCVT_S32     : SInst<"vcvt_s32", "xd",  "fQf">;
 def VCVT_U32     : SInst<"vcvt_u32", "ud",  "fQf">;
-def VCVT_F16     : SInst<"vcvt_f16", "hk",  "f">;
 def VCVT_F32     : SInst<"vcvt_f32", "fd",  "iUiQiQUi">;
-def VCVT_F32_F16 : SInst<"vcvt_f32_f16", "fd",  "h">;
 let isVCVT_N = 1 in {
 def VCVT_N_S32   : SInst<"vcvt_n_s32", "xdi", "fQf">;
 def VCVT_N_U32   : SInst<"vcvt_n_u32", "udi", "fQf">;
 def VCVT_N_F32   : SInst<"vcvt_n_f32", "fdi", "iUiQiQUi">;
 }
+
 def VMOVN        : IInst<"vmovn", "hk",  "silUsUiUl">;
 def VMOVL        : SInst<"vmovl", "wd",  "csiUcUsUi">;
 def VQMOVN       : SInst<"vqmovn", "hk",  "silUsUiUl">;
@@ -738,6 +756,12 @@ def VQDMULH_N     : SInst<"vqdmulh_n", "dda", "siQsQi">;
 def VQDMULH_LANE  : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_N    : SInst<"vqrdmulh_n", "dda", "siQsQi">;
 def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>;
+
+let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in {
+def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>;
+}
+
 def VMLA_N        : IOpInst<"vmla_n", "ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>;
 def VMLAL_N       : SOpInst<"vmlal_n", "wwda", "siUsUi", OP_MLAL_N>;
 def VQDMLAL_N     : SInst<"vqdmlal_n", "wwda", "si">;
@@ -923,6 +947,9 @@ def USQADD : SInst<"vsqadd", "ddd", "UcUsUiUlQUcQUsQUiQUl">;
 // Reciprocal/Sqrt
 def FRECPS  : IInst<"vrecps", "ddd", "dQd">;
 def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">;
+def FRECPE  : SInst<"vrecpe", "dd", "dQd">;
+def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">;
+def FSQRT   : SInst<"vsqrt", "dd", "fdQfQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
 // bitwise reverse
@@ -942,20 +969,21 @@ def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Converting vectors
-def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI_F16>;
-def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>;
+
 def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">;
-def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>;
 def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">;
-def VCVT_F64 : SInst<"vcvt_f64", "Fd",  "lUlQlQUl">;
-def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>;
-def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj",  "d">;
-def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
+
 def VCVT_S64 : SInst<"vcvt_s64", "xd",  "dQd">;
 def VCVT_U64 : SInst<"vcvt_u64", "ud",  "dQd">;
-def FRECPE  : SInst<"vrecpe", "dd", "dQd">;
-def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">;
-def FSQRT   : SInst<"vsqrt", "dd", "fdQfQd">;
+def VCVT_F64 : SInst<"vcvt_f64", "Fd",  "lUlQlQUl">;
+
+def VCVT_HIGH_F16_F32 : SOpInst<"vcvt_high_f16", "hmj", "Hf", OP_VCVT_NA_HI_F16>;
+def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>;
+def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>;
+def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>;
+
+def VCVTX_F32_F64      : SInst<"vcvtx_f32", "fj",  "d">;
+def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Comparison
@@ -1153,6 +1181,11 @@ def VQDMULL_HIGH_LANEQ  : SOpInst<"vqdmull_high_laneq", "wkki", "si",
 def VQDMULH_LANEQ  : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>;
 def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>;
 
+let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>;
+def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>;
+}
+
 // Note: d type implemented by SCALAR_VMULX_LANE
 def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>;
 // Note: d type is implemented by SCALAR_VMULX_LANEQ
@@ -1398,6 +1431,16 @@ def SCALAR_SQDMULH : SInst<"vqdmulh", "sss", "SsSi">;
 // Scalar Integer Saturating Rounding Doubling Multiply Half High
 def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">;
 
+let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+////////////////////////////////////////////////////////////////////////////////
+// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
+def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>;
+
+////////////////////////////////////////////////////////////////////////////////
+// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
+def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar Floating-point Multiply Extended
 def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">;
@@ -1599,6 +1642,16 @@ def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QD
 def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>;
 def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>;
 
+let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in {
+// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
+def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>;
+def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>;
+
+// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
+def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>;
+def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>;
+}
+
 def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">;
 }
diff --git a/include/clang/CodeGen/BackendUtil.h b/include/clang/CodeGen/BackendUtil.h
index 8586e7788942..ba5dc3939ced 100644
--- a/include/clang/CodeGen/BackendUtil.h
+++ b/include/clang/CodeGen/BackendUtil.h
@@ -11,6 +11,8 @@
 #define LLVM_CLANG_CODEGEN_BACKENDUTIL_H
 
 #include "clang/Basic/LLVM.h"
+#include "llvm/IR/FunctionInfo.h"
+#include <memory>
 
 namespace llvm {
   class Module;
@@ -31,10 +33,12 @@ namespace clang {
     Backend_EmitObj        ///< Emit native object files
   };
 
-  void EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts,
-                         const TargetOptions &TOpts, const LangOptions &LOpts,
-                         StringRef TDesc, llvm::Module *M, BackendAction Action,
-                         raw_pwrite_stream *OS);
+  void
+  EmitBackendOutput(DiagnosticsEngine &Diags, const CodeGenOptions &CGOpts,
+                    const TargetOptions &TOpts, const LangOptions &LOpts,
+                    StringRef TDesc, llvm::Module *M, BackendAction Action,
+                    raw_pwrite_stream *OS,
+                    std::unique_ptr<llvm::FunctionInfoIndex> Index = nullptr);
 }
 
 #endif
diff --git a/include/clang/CodeGen/CGFunctionInfo.h b/include/clang/CodeGen/CGFunctionInfo.h
index e32fb145856f..bb6ceb43514e 100644
--- a/include/clang/CodeGen/CGFunctionInfo.h
+++ b/include/clang/CodeGen/CGFunctionInfo.h
@@ -17,6 +17,7 @@
 #define LLVM_CLANG_CODEGEN_CGFUNCTIONINFO_H
 
 #include "clang/AST/CanonicalType.h"
+#include "clang/AST/CharUnits.h"
 #include "clang/AST/Type.h"
 #include "llvm/ADT/FoldingSet.h"
 #include <cassert>
@@ -126,7 +127,7 @@ public:
   static ABIArgInfo getIgnore() {
     return ABIArgInfo(Ignore);
   }
-  static ABIArgInfo getIndirect(unsigned Alignment, bool ByVal = true,
+  static ABIArgInfo getIndirect(CharUnits Alignment, bool ByVal = true,
                                 bool Realign = false,
                                 llvm::Type *Padding = nullptr) {
     auto AI = ABIArgInfo(Indirect);
@@ -137,7 +138,7 @@ public:
     AI.setPaddingType(Padding);
     return AI;
   }
-  static ABIArgInfo getIndirectInReg(unsigned Alignment, bool ByVal = true,
+  static ABIArgInfo getIndirectInReg(CharUnits Alignment, bool ByVal = true,
                                      bool Realign = false) {
     auto AI = getIndirect(Alignment, ByVal, Realign);
     AI.setInReg(true);
@@ -211,20 +212,20 @@ public:
   }
 
   // Indirect accessors
-  unsigned getIndirectAlign() const {
+  CharUnits getIndirectAlign() const {
     assert(isIndirect() && "Invalid kind!");
-    return IndirectAlign;
+    return CharUnits::fromQuantity(IndirectAlign);
   }
-  void setIndirectAlign(unsigned IA) {
+  void setIndirectAlign(CharUnits IA) {
     assert(isIndirect() && "Invalid kind!");
-    IndirectAlign = IA;
+    IndirectAlign = IA.getQuantity();
   }
 
   bool getIndirectByVal() const {
     assert(isIndirect() && "Invalid kind!");
     return IndirectByVal;
   }
-  void setIndirectByVal(unsigned IBV) {
+  void setIndirectByVal(bool IBV) {
     assert(isIndirect() && "Invalid kind!");
     IndirectByVal = IBV;
   }
@@ -370,6 +371,7 @@ class CGFunctionInfo : public llvm::FoldingSetNode {
   /// The struct representing all arguments passed in memory.  Only used when
   /// passing non-trivial types with inalloca.  Not part of the profile.
   llvm::StructType *ArgStruct;
+  unsigned ArgStructAlign;
 
   unsigned NumArgs;
   ArgInfo *getArgsBuffer() {
@@ -463,7 +465,13 @@ public:
 
   /// \brief Get the struct type used to represent all the arguments in memory.
   llvm::StructType *getArgStruct() const { return ArgStruct; }
-  void setArgStruct(llvm::StructType *Ty) { ArgStruct = Ty; }
+  CharUnits getArgStructAlignment() const {
+    return CharUnits::fromQuantity(ArgStructAlign);
+  }
+  void setArgStruct(llvm::StructType *Ty, CharUnits Align) {
+    ArgStruct = Ty;
+    ArgStructAlign = Align.getQuantity();
+  }
 
   void Profile(llvm::FoldingSetNodeID &ID) {
     ID.AddInteger(getASTCallingConvention());
@@ -501,6 +509,29 @@ public:
   }
 };
 
+/// CGCalleeInfo - Class to encapsulate the information about a callee to be
+/// used during the generation of call/invoke instructions.
+class CGCalleeInfo {
+  /// \brief The function proto type of the callee.
+  const FunctionProtoType *CalleeProtoTy;
+  /// \brief The function declaration of the callee.
+  const Decl *CalleeDecl;
+
+public:
+  explicit CGCalleeInfo() : CalleeProtoTy(nullptr), CalleeDecl(nullptr) {}
+  CGCalleeInfo(const FunctionProtoType *calleeProtoTy, const Decl *calleeDecl)
+      : CalleeProtoTy(calleeProtoTy), CalleeDecl(calleeDecl) {}
+  CGCalleeInfo(const FunctionProtoType *calleeProtoTy)
+      : CalleeProtoTy(calleeProtoTy), CalleeDecl(nullptr) {}
+  CGCalleeInfo(const Decl *calleeDecl)
+      : CalleeProtoTy(nullptr), CalleeDecl(calleeDecl) {}
+
+  const FunctionProtoType *getCalleeFunctionProtoType() {
+    return CalleeProtoTy;
+  }
+  const Decl *getCalleeDecl() { return CalleeDecl; }
+};
+
 }  // end namespace CodeGen
 }  // end namespace clang
 
diff --git a/include/clang/CodeGen/CodeGenABITypes.h b/include/clang/CodeGen/CodeGenABITypes.h
index 4e76cd4d5b0b..9d9504ad8e9f 100644
--- a/include/clang/CodeGen/CodeGenABITypes.h
+++ b/include/clang/CodeGen/CodeGenABITypes.h
@@ -36,6 +36,7 @@ namespace llvm {
 namespace clang {
 class ASTContext;
 class CXXRecordDecl;
+class CXXMethodDecl;
 class CodeGenOptions;
 class CoverageSourceInfo;
 class DiagnosticsEngine;
@@ -50,7 +51,7 @@ class CodeGenModule;
 class CodeGenABITypes
 {
 public:
-  CodeGenABITypes(ASTContext &C, llvm::Module &M, const llvm::DataLayout &TD,
+  CodeGenABITypes(ASTContext &C, llvm::Module &M,
                   CoverageSourceInfo *CoverageInfo = nullptr);
   ~CodeGenABITypes();
 
@@ -60,12 +61,13 @@ public:
   const CGFunctionInfo &arrangeObjCMessageSendSignature(
                                                      const ObjCMethodDecl *MD,
                                                      QualType receiverType);
-  const CGFunctionInfo &arrangeFreeFunctionType(
-                                               CanQual<FunctionProtoType> Ty);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
+                                                const FunctionDecl *FD);
   const CGFunctionInfo &arrangeFreeFunctionType(
                                              CanQual<FunctionNoProtoType> Ty);
   const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                             const FunctionProtoType *FTP);
+                                             const FunctionProtoType *FTP,
+                                             const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeFreeFunctionCall(CanQualType returnType,
                                                 ArrayRef<CanQualType> argTypes,
                                                 FunctionType::ExtInfo info,
@@ -75,12 +77,12 @@ private:
   /// Default CodeGenOptions object used to initialize the
   /// CodeGenModule and otherwise not used. More specifically, it is
   /// not used in ABI type generation, so none of the options matter.
-  CodeGenOptions *CGO;
-  HeaderSearchOptions *HSO;
-  PreprocessorOptions *PPO;
+  std::unique_ptr<CodeGenOptions> CGO;
+  std::unique_ptr<HeaderSearchOptions> HSO;
+  std::unique_ptr<PreprocessorOptions> PPO;
 
   /// The CodeGenModule we use get to the CodeGenTypes object.
-  CodeGen::CodeGenModule *CGM;
+  std::unique_ptr<CodeGen::CodeGenModule> CGM;
 };
 
 }  // end namespace CodeGen
diff --git a/include/clang/CodeGen/CodeGenAction.h b/include/clang/CodeGen/CodeGenAction.h
index 264780d01ca9..cc38e243420b 100644
--- a/include/clang/CodeGen/CodeGenAction.h
+++ b/include/clang/CodeGen/CodeGenAction.h
@@ -25,7 +25,9 @@ class CodeGenAction : public ASTFrontendAction {
 private:
   unsigned Act;
   std::unique_ptr<llvm::Module> TheModule;
-  llvm::Module *LinkModule;
+  // Vector of {Linker::Flags, Module*} pairs to specify bitcode
+  // modules to link in using corresponding linker flags.
+  SmallVector<std::pair<unsigned, llvm::Module *>, 4> LinkModules;
   llvm::LLVMContext *VMContext;
   bool OwnsVMContext;
 
@@ -50,7 +52,9 @@ public:
   /// setLinkModule - Set the link module to be used by this action.  If a link
   /// module is not provided, and CodeGenOptions::LinkBitcodeFile is non-empty,
   /// the action will load it from the specified file.
-  void setLinkModule(llvm::Module *Mod) { LinkModule = Mod; }
+  void addLinkModule(llvm::Module *Mod, unsigned LinkFlags) {
+    LinkModules.push_back(std::make_pair(LinkFlags, Mod));
+  }
 
   /// Take the generated LLVM module, for use after the action has been run.
   /// The result may be null on failure.
diff --git a/include/clang/CodeGen/ObjectFilePCHContainerOperations.h b/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
index e82aab787515..15132acfc24a 100644
--- a/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
+++ b/include/clang/CodeGen/ObjectFilePCHContainerOperations.h
@@ -23,9 +23,7 @@ class ObjectFilePCHContainerWriter : public PCHContainerWriter {
   /// PCHGenerator that produces a wrapper file format
   /// that also contains full debug info for the module.
   std::unique_ptr<ASTConsumer> CreatePCHContainerGenerator(
-      DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
-      const PreprocessorOptions &PPO, const TargetOptions &TO,
-      const LangOptions &LO, const std::string &MainFileName,
+      CompilerInstance &CI, const std::string &MainFileName,
       const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
       std::shared_ptr<PCHBuffer> Buffer) const override;
 };
diff --git a/include/clang/Config/config.h.cmake b/include/clang/Config/config.h.cmake
index 78a508697e83..b7486f34da6b 100644
--- a/include/clang/Config/config.h.cmake
+++ b/include/clang/Config/config.h.cmake
@@ -1,9 +1,9 @@
 /* This generated file is for internal use. Do not include it from headers. */
 
-#ifdef CONFIG_H
+#ifdef CLANG_CONFIG_H
 #error config.h can only be included once
 #else
-#define CONFIG_H
+#define CLANG_CONFIG_H
 
 /* Bug report URL. */
 #define BUG_REPORT_URL "${BUG_REPORT_URL}"
diff --git a/include/clang/Config/config.h.in b/include/clang/Config/config.h.in
index 4395e73c568a..91983f6430fe 100644
--- a/include/clang/Config/config.h.in
+++ b/include/clang/Config/config.h.in
@@ -1,9 +1,9 @@
 /* This generated file is for internal use. Do not include it from headers. */
 
-#ifdef CONFIG_H
+#ifdef CLANG_CONFIG_H
 #error config.h can only be included once
 #else
-#define CONFIG_H
+#define CLANG_CONFIG_H
 
 /* Bug report URL. */
 #undef BUG_REPORT_URL
diff --git a/include/clang/Driver/Action.h b/include/clang/Driver/Action.h
index fddd15885e18..fc31d4b0dec2 100644
--- a/include/clang/Driver/Action.h
+++ b/include/clang/Driver/Action.h
@@ -164,7 +164,6 @@ public:
                  const ActionList &DeviceActions);
   ~CudaHostAction() override;
 
-  ActionList &getDeviceActions() { return DeviceActions; }
   const ActionList &getDeviceActions() const { return DeviceActions; }
 
   static bool classof(const Action *A) { return A->getKind() == CudaHostClass; }
@@ -288,7 +287,6 @@ class VerifyJobAction : public JobAction {
 public:
   VerifyJobAction(ActionClass Kind, std::unique_ptr<Action> Input,
                   types::ID Type);
-  VerifyJobAction(ActionClass Kind, ActionList &Inputs, types::ID Type);
   static bool classof(const Action *A) {
     return A->getKind() == VerifyDebugInfoJobClass ||
            A->getKind() == VerifyPCHJobClass;
diff --git a/include/clang/Driver/CC1Options.td b/include/clang/Driver/CC1Options.td
index d2f0d05eedb8..d7f42a991a8f 100644
--- a/include/clang/Driver/CC1Options.td
+++ b/include/clang/Driver/CC1Options.td
@@ -132,6 +132,9 @@ def migrator_no_finalize_removal : Flag<["-"], "no-finalize-removal">,
 
 let Flags = [CC1Option, CC1AsOption, NoDriverOption] in {
 
+def debug_info_kind_EQ : Joined<["-"], "debug-info-kind=">;
+def dwarf_version_EQ : Joined<["-"], "dwarf-version=">;
+def debugger_tuning_EQ : Joined<["-"], "debugger-tuning=">;
 def fdebug_compilation_dir : Separate<["-"], "fdebug-compilation-dir">,
   HelpText<"The compilation directory to embed in the debug info.">;
 def dwarf_debug_flags : Separate<["-"], "dwarf-debug-flags">,
@@ -146,13 +149,17 @@ def msave_temp_labels : Flag<["-"], "msave-temp-labels">,
   HelpText<"Save temporary labels in the symbol table. "
            "Note this may change .s semantics and shouldn't generally be used "
            "on compiler-generated code.">;
-
+def mrelocation_model : Separate<["-"], "mrelocation-model">,
+  HelpText<"The relocation model to use">;
 }
 
 def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">,
   HelpText<"Don't run LLVM optimization passes">;
 def disable_llvm_verifier : Flag<["-"], "disable-llvm-verifier">,
   HelpText<"Don't run the LLVM IR verifier pass">;
+def disable_llvm_passes : Flag<["-"], "disable-llvm-passes">,
+  HelpText<"Use together with -emit-llvm to get pristine LLVM IR from the "
+           "frontend by not running any LLVM passes at all">;
 def disable_red_zone : Flag<["-"], "disable-red-zone">,
   HelpText<"Do not emit code that uses the red zone.">;
 def dwarf_column_info : Flag<["-"], "dwarf-column-info">,
@@ -163,6 +170,9 @@ def gnu_pubnames : Flag<["-"], "gnu-pubnames">,
   HelpText<"Emit newer GNU style pubnames">;
 def arange_sections : Flag<["-"], "arange_sections">,
   HelpText<"Emit DWARF .debug_arange sections">;
+def dwarf_ext_refs : Flag<["-"], "dwarf-ext-refs">,
+  HelpText<"Generate debug info with external references to clang modules"
+           " or precompiled headers">;
 def fforbid_guard_variables : Flag<["-"], "fforbid-guard-variables">,
   HelpText<"Emit an error if a C++ static local initializer would need a guard variable">;
 def no_implicit_float : Flag<["-"], "no-implicit-float">,
@@ -225,14 +235,15 @@ def backend_option : Separate<["-"], "backend-option">,
   HelpText<"Additional arguments to forward to LLVM backend (during code gen)">;
 def mregparm : Separate<["-"], "mregparm">,
   HelpText<"Limit the number of registers available for integer arguments">;
-def mrelocation_model : Separate<["-"], "mrelocation-model">,
-  HelpText<"The relocation model to use">;
 def munwind_tables : Flag<["-"], "munwind-tables">,
   HelpText<"Generate unwinding tables for all functions">;
 def mconstructor_aliases : Flag<["-"], "mconstructor-aliases">,
   HelpText<"Emit complete constructors and destructors as aliases when possible">;
 def mlink_bitcode_file : Separate<["-"], "mlink-bitcode-file">,
   HelpText<"Link the given bitcode file before performing optimizations.">;
+def mlink_cuda_bitcode : Separate<["-"], "mlink-cuda-bitcode">,
+  HelpText<"Link and internalize needed symbols from the given bitcode file "
+           "before performing optimizations.">;
 def vectorize_loops : Flag<["-"], "vectorize-loops">,
   HelpText<"Run the Loop vectorization passes">;
 def vectorize_slp : Flag<["-"], "vectorize-slp">,
@@ -320,6 +331,8 @@ def cc1as : Flag<["-"], "cc1as">;
 def ast_merge : Separate<["-"], "ast-merge">,
   MetaVarName<"<ast file>">,
   HelpText<"Merge the given AST file into the translation unit being compiled.">;
+def aux_triple : Separate<["-"], "aux-triple">,
+  HelpText<"Auxiliary target triple.">;
 def code_completion_at : Separate<["-"], "code-completion-at">,
   MetaVarName<"<file>:<line>:<column>">,
   HelpText<"Dump code-completion information at a location">;
@@ -365,6 +378,13 @@ def fmodule_map_file_home_is_cwd : Flag<["-"], "fmodule-map-file-home-is-cwd">,
 def fmodule_feature : Separate<["-"], "fmodule-feature">,
   MetaVarName<"<feature>">,
   HelpText<"Enable <feature> in module map requires declarations">;
+def fmodules_embed_file_EQ : Joined<["-"], "fmodules-embed-file=">,
+  MetaVarName<"<file>">,
+  HelpText<"Embed the contents of the specified file into the module file "
+           "being compiled.">;
+def fmodules_embed_all_files : Joined<["-"], "fmodules-embed-all-files">,
+  HelpText<"Embed the contents of all files read by this compilation into "
+           "the produced module file.">;
 def fmodules_local_submodule_visibility :
   Flag<["-"], "fmodules-local-submodule-visibility">,
   HelpText<"Enforce name visibility rules across submodules of the same "
@@ -372,10 +392,10 @@ def fmodules_local_submodule_visibility :
 def fmodule_format_EQ : Joined<["-"], "fmodule-format=">,
   HelpText<"Select the container format for clang modules and PCH. "
            "Supported options are 'raw' and 'obj'.">;
-def fno_modules_hide_internal_linkage :
-  Flag<["-"], "fno-modules-hide-internal-linkage">,
-  HelpText<"Make all declarations visible to redeclaration lookup, "
-           "even if they have internal linkage.">;
+def ftest_module_file_extension_EQ :
+  Joined<["-"], "ftest-module-file-extension=">,
+  HelpText<"introduce a module file extension for testing purposes. "
+           "The argument is parsed as blockname:major:minor:hashed:user info">;
 def fconcepts_ts : Flag<["-"], "fconcepts-ts">,
   HelpText<"Enable C++ Extensions for Concepts.">;
 
@@ -483,6 +503,8 @@ def fblocks_runtime_optional : Flag<["-"], "fblocks-runtime-optional">,
   HelpText<"Weakly link in the blocks runtime">;
 def fsjlj_exceptions : Flag<["-"], "fsjlj-exceptions">,
   HelpText<"Use SjLj style exceptions">;
+def fnew_ms_eh: Flag<["-"], "fnew-ms-eh">,
+  HelpText<"Use the new IR representation for MS exceptions">;
 def split_dwarf_file : Separate<["-"], "split-dwarf-file">,
   HelpText<"File name to use for split dwarf debug info output">;
 def fno_wchar : Flag<["-"], "fno-wchar">,
@@ -565,6 +587,10 @@ def fnative_half_type: Flag<["-"], "fnative-half-type">,
 def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
   HelpText<"Allow function arguments and returns of type half">;
 
+// C++ TSes.
+def fcoroutines : Flag<["-"], "fcoroutines">,
+  HelpText<"Enable support for the C++ Coroutines TS">;
+
 //===----------------------------------------------------------------------===//
 // Header Search Options
 //===----------------------------------------------------------------------===//
@@ -648,6 +674,8 @@ def fcuda_disable_target_call_checks : Flag<["-"],
   HelpText<"Disable all cross-target (host, device, etc.) call checks in CUDA">;
 def fcuda_include_gpubinary : Separate<["-"], "fcuda-include-gpubinary">,
   HelpText<"Incorporate CUDA device-side binary into host object file.">;
+def fcuda_target_overloads : Flag<["-"], "fcuda-target-overloads">,
+  HelpText<"Enable function overloads based on CUDA target attributes.">;
 
 } // let Flags = [CC1Option]
 
diff --git a/include/clang/Driver/CLCompatOptions.td b/include/clang/Driver/CLCompatOptions.td
index 907b16fd5caf..16a5b727836c 100644
--- a/include/clang/Driver/CLCompatOptions.td
+++ b/include/clang/Driver/CLCompatOptions.td
@@ -52,6 +52,12 @@ class CLRemainingArgs<string name> : Option<["/", "-"], name,
 // (We don't put any of these in cl_compile_Group as the options they alias are
 // already in the right group.)
 
+def _SLASH_Brepro : CLFlag<"Brepro">,
+  HelpText<"Emit an object file which can be reproduced over time">,
+  Alias<mincremental_linker_compatible>;
+def _SLASH_Brepro_ : CLFlag<"Brepro-">,
+  HelpText<"Emit an object file which cannot be reproduced over time">,
+  Alias<mno_incremental_linker_compatible>;
 def _SLASH_C : CLFlag<"C">,
   HelpText<"Don't discard comments when preprocessing">, Alias<C>;
 def _SLASH_c : CLFlag<"c">, HelpText<"Compile only">, Alias<c>;
@@ -92,8 +98,7 @@ def _SLASH_I : CLJoinedOrSeparate<"I">,
 def _SLASH_J : CLFlag<"J">, HelpText<"Make char type unsigned">,
   Alias<funsigned_char>;
 def _SLASH_O0 : CLFlag<"O0">, Alias<O0>;
-def _SLASH_O : CLJoined<"O">, HelpText<"Optimization level">,
-  MetaVarName<"<n>">, Alias<O>;
+def _SLASH_O : CLJoined<"O">, HelpText<"Optimization level">;
 def _SLASH_Ob0 : CLFlag<"Ob0">, HelpText<"Disable inlining">,
   Alias<fno_inline>;
 def _SLASH_Od : CLFlag<"Od">, HelpText<"Disable optimization">, Alias<O0>;
@@ -105,12 +110,6 @@ def _SLASH_Os : CLFlag<"Os">, HelpText<"Optimize for size">, Alias<O>,
   AliasArgs<["s"]>;
 def _SLASH_Ot : CLFlag<"Ot">, HelpText<"Optimize for speed">, Alias<O>,
   AliasArgs<["2"]>;
-def _SLASH_Ox : CLFlag<"Ox">, HelpText<"Maximum optimization">, Alias<O>,
-  AliasArgs<["3"]>;
-def _SLASH_Oy : CLFlag<"Oy">, HelpText<"Enable frame pointer omission">,
-  Alias<fomit_frame_pointer>;
-def _SLASH_Oy_ : CLFlag<"Oy-">, HelpText<"Disable frame pointer omission">,
-  Alias<fno_omit_frame_pointer>;
 def _SLASH_QUESTION : CLFlag<"?">, Alias<help>,
   HelpText<"Display available options">;
 def _SLASH_Qvec : CLFlag<"Qvec">,
@@ -126,8 +125,8 @@ def _SLASH_W0 : CLFlag<"W0">, HelpText<"Disable all warnings">, Alias<w>;
 def _SLASH_W1 : CLFlag<"W1">, HelpText<"Enable -Wall">, Alias<Wall>;
 def _SLASH_W2 : CLFlag<"W2">, HelpText<"Enable -Wall">, Alias<Wall>;
 def _SLASH_W3 : CLFlag<"W3">, HelpText<"Enable -Wall">, Alias<Wall>;
-def _SLASH_W4 : CLFlag<"W4">, HelpText<"Enable -Wall">, Alias<Wall>;
-def _SLASH_Wall : CLFlag<"Wall">, HelpText<"Enable -Wall">, Alias<Wall>;
+def _SLASH_W4 : CLFlag<"W4">, HelpText<"Enable -Wall and -Wextra">, Alias<WCL4>;
+def _SLASH_Wall : CLFlag<"Wall">, HelpText<"Enable -Wall and -Wextra">, Alias<WCL4>;
 def _SLASH_WX : CLFlag<"WX">, HelpText<"Treat warnings as errors">,
   Alias<W_Joined>, AliasArgs<["error"]>;
 def _SLASH_WX_ : CLFlag<"WX-">, HelpText<"Do not treat warnings as errors">,
@@ -135,10 +134,12 @@ def _SLASH_WX_ : CLFlag<"WX-">, HelpText<"Do not treat warnings as errors">,
 def _SLASH_w_flag : CLFlag<"w">, HelpText<"Disable all warnings">, Alias<w>;
 def _SLASH_wd4005 : CLFlag<"wd4005">, Alias<W_Joined>,
   AliasArgs<["no-macro-redefined"]>;
-def _SLASH_wd4996 : CLFlag<"wd4996">, Alias<W_Joined>,
-  AliasArgs<["no-deprecated-declarations"]>;
+def _SLASH_wd4100 : CLFlag<"wd4100">, Alias<W_Joined>,
+  AliasArgs<["no-unused-parameter"]>;
 def _SLASH_wd4910 : CLFlag<"wd4910">, Alias<W_Joined>,
   AliasArgs<["no-dllexport-explicit-instantiation-decl"]>;
+def _SLASH_wd4996 : CLFlag<"wd4996">, Alias<W_Joined>,
+  AliasArgs<["no-deprecated-declarations"]>;
 def _SLASH_vd : CLJoined<"vd">, HelpText<"Control vtordisp placement">,
   Alias<vtordisp_mode_EQ>;
 def _SLASH_Zc_sizedDealloc : CLFlag<"Zc:sizedDealloc">,
@@ -160,9 +161,10 @@ def _SLASH_Zc_trigraphs : CLFlag<"Zc:trigraphs">,
   HelpText<"Enable trigraphs">, Alias<ftrigraphs>;
 def _SLASH_Zc_trigraphs_off : CLFlag<"Zc:trigraphs-">,
   HelpText<"Disable trigraphs (default)">, Alias<fno_trigraphs>;
-def _SLASH_Z7 : CLFlag<"Z7">, Alias<gline_tables_only>;
-def _SLASH_Zi : CLFlag<"Zi">, HelpText<"Enable debug information">,
-  Alias<gline_tables_only>;
+def _SLASH_Z7 : CLFlag<"Z7">,
+  HelpText<"Enable CodeView debug information in object files">;
+def _SLASH_Zi : CLFlag<"Zi">, Alias<_SLASH_Z7>,
+  HelpText<"Alias for /Z7. Does not produce PDBs.">;
 def _SLASH_Zp : CLJoined<"Zp">,
   HelpText<"Specify the default maximum struct packing alignment">,
   Alias<fpack_struct_EQ>;
@@ -243,10 +245,13 @@ def _SLASH_vmv : CLFlag<"vmv">,
 def _SLASH_volatile_ms  : Option<["/", "-"], "volatile:ms", KIND_FLAG>,
   Group<_SLASH_volatile_Group>, Flags<[CLOption, DriverOption]>,
   HelpText<"Volatile loads and stores have acquire and release semantics">;
+def _SLASH_Zl : CLFlag<"Zl">,
+  HelpText<"Don't mention any default libraries in the object file">;
 
 // Ignored:
 
 def _SLASH_analyze_ : CLIgnoredFlag<"analyze-">;
+def _SLASH_bigobj : CLIgnoredFlag<"bigobj">;
 def _SLASH_cgthreads : CLIgnoredJoined<"cgthreads">;
 def _SLASH_d2Zi_PLUS : CLIgnoredFlag<"d2Zi+">;
 def _SLASH_errorReport : CLIgnoredJoined<"errorReport">;
@@ -259,6 +264,7 @@ def _SLASH_kernel_ : CLIgnoredFlag<"kernel-">;
 def _SLASH_nologo : CLIgnoredFlag<"nologo">;
 def _SLASH_Ob1 : CLIgnoredFlag<"Ob1">;
 def _SLASH_Ob2 : CLIgnoredFlag<"Ob2">;
+def _SLASH_Og : CLIgnoredFlag<"Og">;
 def _SLASH_openmp_ : CLIgnoredFlag<"openmp-">;
 def _SLASH_RTC : CLIgnoredJoined<"RTC">;
 def _SLASH_sdl : CLIgnoredFlag<"sdl">;
@@ -277,7 +283,6 @@ def _SLASH_Zo_ : CLIgnoredFlag<"Zo-">;
 // Unsupported:
 
 def _SLASH_AI : CLJoined<"AI">;
-def _SLASH_bigobj : CLFlag<"bigobj">;
 def _SLASH_clr : CLJoined<"clr">;
 def _SLASH_doc : CLJoined<"doc">;
 def _SLASH_FA_joined : CLJoined<"FA">;
@@ -302,6 +307,7 @@ def _SLASH_Gm_ : CLFlag<"Gm-">;
 def _SLASH_Gr : CLFlag<"Gr">;
 def _SLASH_GS : CLFlag<"GS">;
 def _SLASH_GT : CLFlag<"GT">;
+def _SLASH_Guard : CLJoined<"guard:">;
 def _SLASH_GX : CLFlag<"GX">;
 def _SLASH_Gv : CLFlag<"Gv">;
 def _SLASH_Gz : CLFlag<"Gz">;
@@ -333,5 +339,4 @@ def _SLASH_Zc : CLJoined<"Zc:">;
 def _SLASH_Ze : CLFlag<"Ze">;
 def _SLASH_Zg : CLFlag<"Zg">;
 def _SLASH_ZI : CLFlag<"ZI">;
-def _SLASH_Zl : CLFlag<"Zl">;
 def _SLASH_ZW : CLJoined<"ZW">;
diff --git a/include/clang/Driver/Compilation.h b/include/clang/Driver/Compilation.h
index f0c1bedb3989..12ff068d133c 100644
--- a/include/clang/Driver/Compilation.h
+++ b/include/clang/Driver/Compilation.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CLANG_DRIVER_COMPILATION_H
 #define LLVM_CLANG_DRIVER_COMPILATION_H
 
+#include "clang/Driver/Action.h"
 #include "clang/Driver/Job.h"
 #include "clang/Driver/Util.h"
 #include "llvm/ADT/DenseMap.h"
@@ -25,7 +26,6 @@ namespace opt {
 namespace clang {
 namespace driver {
   class Driver;
-  class JobAction;
   class JobList;
   class ToolChain;
 
@@ -38,6 +38,9 @@ class Compilation {
   /// The default tool chain.
   const ToolChain &DefaultToolChain;
 
+  const ToolChain *CudaHostToolChain;
+  const ToolChain *CudaDeviceToolChain;
+
   /// The original (untranslated) input argument list.
   llvm::opt::InputArgList *Args;
 
@@ -81,6 +84,17 @@ public:
   const Driver &getDriver() const { return TheDriver; }
 
   const ToolChain &getDefaultToolChain() const { return DefaultToolChain; }
+  const ToolChain *getCudaHostToolChain() const { return CudaHostToolChain; }
+  const ToolChain *getCudaDeviceToolChain() const {
+    return CudaDeviceToolChain;
+  }
+
+  void setCudaHostToolChain(const ToolChain *HostToolChain) {
+    CudaHostToolChain = HostToolChain;
+  }
+  void setCudaDeviceToolChain(const ToolChain *DeviceToolChain) {
+    CudaDeviceToolChain = DeviceToolChain;
+  }
 
   const llvm::opt::InputArgList &getInputArgs() const { return *Args; }
 
@@ -179,7 +193,7 @@ public:
   void initCompilationForDiagnostics();
 
   /// Return true if we're compiling for diagnostics.
-  bool isForDiagnostics() { return ForDiagnostics; }
+  bool isForDiagnostics() const { return ForDiagnostics; }
 };
 
 } // end namespace driver
diff --git a/include/clang/Driver/Driver.h b/include/clang/Driver/Driver.h
index 4a67fdb6532a..c9940ba5501f 100644
--- a/include/clang/Driver/Driver.h
+++ b/include/clang/Driver/Driver.h
@@ -36,6 +36,11 @@ namespace opt {
 }
 
 namespace clang {
+
+namespace vfs {
+class FileSystem;
+}
+
 namespace driver {
 
   class Action;
@@ -47,6 +52,14 @@ namespace driver {
   class SanitizerArgs;
   class ToolChain;
 
+/// Describes the kind of LTO mode selected via -f(no-)?lto(=.*)? options.
+enum LTOKind {
+  LTOK_None,
+  LTOK_Full,
+  LTOK_Thin,
+  LTOK_Unknown
+};
+
 /// Driver - Encapsulate logic for constructing compilation processes
 /// from a set of gcc-driver-like command line arguments.
 class Driver {
@@ -54,6 +67,8 @@ class Driver {
 
   DiagnosticsEngine &Diags;
 
+  IntrusiveRefCntPtr<vfs::FileSystem> VFS;
+
   enum DriverMode {
     GCCMode,
     GXXMode,
@@ -67,6 +82,9 @@ class Driver {
     SaveTempsObj
   } SaveTemps;
 
+  /// LTO mode selected via -f(no-)?lto(=.*)? options.
+  LTOKind LTOMode;
+
 public:
   // Diag - Forwarding function for diagnostics.
   DiagnosticBuilder Diag(unsigned DiagID) const {
@@ -201,9 +219,9 @@ private:
                                  SmallVectorImpl<std::string> &Names) const;
 
 public:
-  Driver(StringRef _ClangExecutable,
-         StringRef _DefaultTargetTriple,
-         DiagnosticsEngine &_Diags);
+  Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
+         DiagnosticsEngine &Diags,
+         IntrusiveRefCntPtr<vfs::FileSystem> VFS = nullptr);
   ~Driver();
 
   /// @name Accessors
@@ -216,6 +234,8 @@ public:
 
   const DiagnosticsEngine &getDiags() const { return Diags; }
 
+  vfs::FileSystem &getVFS() const { return *VFS; }
+
   bool getCheckInputsExist() const { return CheckInputsExist; }
 
   void setCheckInputsExist(bool Value) { CheckInputsExist = Value; }
@@ -277,22 +297,21 @@ public:
   /// BuildActions - Construct the list of actions to perform for the
   /// given arguments, which are only done for a single architecture.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
   /// \param Args - The input arguments.
   /// \param Actions - The list to store the resulting actions onto.
-  void BuildActions(const ToolChain &TC, llvm::opt::DerivedArgList &Args,
-                    const InputList &Inputs, ActionList &Actions) const;
+  void BuildActions(Compilation &C, const ToolChain &TC,
+                    llvm::opt::DerivedArgList &Args, const InputList &Inputs,
+                    ActionList &Actions) const;
 
   /// BuildUniversalActions - Construct the list of actions to perform
   /// for the given arguments, which may require a universal build.
   ///
+  /// \param C - The compilation that is being built.
   /// \param TC - The default host tool chain.
-  /// \param Args - The input arguments.
-  /// \param Actions - The list to store the resulting actions onto.
-  void BuildUniversalActions(const ToolChain &TC,
-                             llvm::opt::DerivedArgList &Args,
-                             const InputList &BAInputs,
-                             ActionList &Actions) const;
+  void BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                             const InputList &BAInputs) const;
 
   /// BuildJobs - Bind actions to concrete tools and translate
   /// arguments to form the list of jobs to run.
@@ -402,9 +421,17 @@ public:
   /// handle this action.
   bool ShouldUseClangCompiler(const JobAction &JA) const;
 
-  bool IsUsingLTO(const llvm::opt::ArgList &Args) const;
+  /// Returns true if we are performing any kind of LTO.
+  bool isUsingLTO() const { return LTOMode != LTOK_None; }
+
+  /// Get the specific kind of LTO being performed.
+  LTOKind getLTOMode() const { return LTOMode; }
 
 private:
+  /// Parse the \p Args list for LTO options and record the type of LTO
+  /// compilation based on which -f(no-)?lto(=.*)? option occurs last.
+  void setLTOMode(const llvm::opt::ArgList &Args);
+
   /// \brief Retrieves a ToolChain for a particular \p Target triple.
   ///
   /// Will cache ToolChains for the life of the driver object, and create them
diff --git a/include/clang/Driver/Job.h b/include/clang/Driver/Job.h
index 186244bacf31..263356f396f3 100644
--- a/include/clang/Driver/Job.h
+++ b/include/clang/Driver/Job.h
@@ -25,6 +25,7 @@ namespace driver {
 class Action;
 class Command;
 class Tool;
+class InputInfo;
 
 // Re-export this as clang::driver::ArgStringList.
 using llvm::opt::ArgStringList;
@@ -53,6 +54,9 @@ class Command {
   /// argument, which will be the executable).
   llvm::opt::ArgStringList Arguments;
 
+  /// The list of program arguments which are inputs.
+  llvm::opt::ArgStringList InputFilenames;
+
   /// Response file name, if this command is set to use one, or nullptr
   /// otherwise
   const char *ResponseFile;
@@ -79,7 +83,11 @@ class Command {
 
 public:
   Command(const Action &Source, const Tool &Creator, const char *Executable,
-          const llvm::opt::ArgStringList &Arguments);
+          const llvm::opt::ArgStringList &Arguments,
+          ArrayRef<InputInfo> Inputs);
+  // FIXME: This really shouldn't be copyable, but is currently copied in some
+  // error handling in Driver::generateCompilationDiagnostics.
+  Command(const Command &) = default;
   virtual ~Command() {}
 
   virtual void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
@@ -117,6 +125,7 @@ class FallbackCommand : public Command {
 public:
   FallbackCommand(const Action &Source_, const Tool &Creator_,
                   const char *Executable_, const ArgStringList &Arguments_,
+                  ArrayRef<InputInfo> Inputs,
                   std::unique_ptr<Command> Fallback_);
 
   void Print(llvm::raw_ostream &OS, const char *Terminator, bool Quote,
diff --git a/include/clang/Driver/Options.td b/include/clang/Driver/Options.td
index 9d3e2cfccdc7..3dbe43f1075a 100644
--- a/include/clang/Driver/Options.td
+++ b/include/clang/Driver/Options.td
@@ -71,15 +71,31 @@ def d_Group               : OptionGroup<"<d group>">;
 def f_Group               : OptionGroup<"<f group>">, Group<CompileOnly_Group>;
 def f_clang_Group         : OptionGroup<"<f (clang-only) group>">, Group<CompileOnly_Group>;
 def g_Group               : OptionGroup<"<g group>">;
+def gN_Group              : OptionGroup<"<gN group>">, Group<g_Group>;
+def ggdbN_Group           : OptionGroup<"<ggdbN group>">, Group<gN_Group>;
+def gTune_Group           : OptionGroup<"<gTune group>">, Group<g_Group>;
 def g_flags_Group         : OptionGroup<"<g flags group>">;
 def i_Group               : OptionGroup<"<i group>">, Group<CompileOnly_Group>;
 def clang_i_Group         : OptionGroup<"<clang i group>">, Group<i_Group>;
 def m_Group               : OptionGroup<"<m group>">, Group<CompileOnly_Group>;
-def m_x86_Features_Group  : OptionGroup<"<m x86 features group>">, Group<m_Group>, Flags<[CoreOption]>;
-def m_hexagon_Features_Group  : OptionGroup<"<m hexagon features group>">, Group<m_Group>;
-def m_arm_Features_Group  : OptionGroup<"<m arm features group>">, Group<m_Group>;
-def m_aarch64_Features_Group  : OptionGroup<"<m aarch64 features group>">, Group<m_Group>;
-def m_ppc_Features_Group  : OptionGroup<"<m ppc features group>">, Group<m_Group>;
+
+// Feature groups - these take command line options that correspond directly to
+// target specific features and can be translated directly from command line
+// options.
+def m_x86_Features_Group : OptionGroup<"<x86 features group>">,
+                           Group<m_Group>,
+                           Flags<[CoreOption]>;
+def m_hexagon_Features_Group : OptionGroup<"<hexagon features group>">,
+                               Group<m_Group>;
+def m_arm_Features_Group : OptionGroup<"<arm features group>">,
+                           Group<m_Group>;
+def m_aarch64_Features_Group : OptionGroup<"<aarch64 features group>">,
+                               Group<m_Group>;
+def m_ppc_Features_Group : OptionGroup<"<ppc features group>">,
+                           Group<m_Group>;
+def m_wasm_Features_Group : OptionGroup<"<wasm features group>">,
+                            Group<m_Group>;
+
 def m_libc_Group          : OptionGroup<"<m libc group>">, Group<m_Group>;
 def u_Group               : OptionGroup<"<u group>">;
 
@@ -143,7 +159,7 @@ def ccc_pch_is_pth : Flag<["-"], "ccc-pch-is-pth">, InternalDriverOpt,
   HelpText<"Use pretokenized headers for precompiled headers">;
 
 class InternalDebugOpt : Group<internal_debug_Group>,
-  Flags<[DriverOption, HelpHidden]>;
+  Flags<[DriverOption, HelpHidden, CoreOption]>;
 def ccc_install_dir : Separate<["-"], "ccc-install-dir">, InternalDebugOpt,
   HelpText<"Simulate installation in the given directory">;
 def ccc_print_phases : Flag<["-"], "ccc-print-phases">, InternalDebugOpt,
@@ -293,6 +309,7 @@ def Wa_COMMA : CommaJoined<["-"], "Wa,">,
   HelpText<"Pass the comma separated arguments in <arg> to the assembler">,
   MetaVarName<"<arg>">;
 def Wall : Flag<["-"], "Wall">, Group<W_Group>, Flags<[CC1Option]>;
+def WCL4 : Flag<["-"], "WCL4">, Group<W_Group>, Flags<[CC1Option]>;
 def Wdeprecated : Flag<["-"], "Wdeprecated">, Group<W_Group>, Flags<[CC1Option]>;
 def Wno_deprecated : Flag<["-"], "Wno-deprecated">, Group<W_Group>, Flags<[CC1Option]>;
 def Wextra : Flag<["-"], "Wextra">, Group<W_Group>, Flags<[CC1Option]>;
@@ -357,6 +374,8 @@ def cuda_gpu_arch_EQ : Joined<["--"], "cuda-gpu-arch=">,
   Flags<[DriverOption, HelpHidden]>, HelpText<"CUDA GPU architecture">;
 def cuda_host_only : Flag<["--"], "cuda-host-only">,
   HelpText<"Do host-side CUDA compilation only">;
+def cuda_path_EQ : Joined<["--"], "cuda-path=">, Group<i_Group>,
+  HelpText<"CUDA installation path">;
 def dA : Flag<["-"], "dA">, Group<d_Group>;
 def dD : Flag<["-"], "dD">, Group<d_Group>, Flags<[CC1Option]>,
   HelpText<"Print macro definitions in -E mode in addition to normal output">;
@@ -436,6 +455,9 @@ def fprofile_instr_use_EQ : Joined<["-"], "fprofile-instr-use=">,
 def fcoverage_mapping : Flag<["-"], "fcoverage-mapping">,
     Group<f_Group>, Flags<[CC1Option]>,
     HelpText<"Generate coverage mapping to enable code coverage analysis">;
+def fno_coverage_mapping : Flag<["-"], "fno-coverage-mapping">,
+    Group<f_Group>, Flags<[DriverOption]>,
+    HelpText<"Disable code coverage analysis">;
 def fprofile_generate : Flag<["-"], "fprofile-generate">,
     Alias<fprofile_instr_generate>;
 def fprofile_generate_EQ : Joined<["-"], "fprofile-generate=">,
@@ -446,6 +468,16 @@ def fprofile_use : Flag<["-"], "fprofile-use">, Group<f_Group>,
 def fprofile_use_EQ : Joined<["-"], "fprofile-use=">,
     Group<f_Group>, Flags<[DriverOption]>, MetaVarName<"<pathname>">,
     HelpText<"Use instrumentation data for profile-guided optimization. If pathname is a directory, it reads from <pathname>/default.profdata. Otherwise, it reads from file <pathname>.">;
+def fno_profile_instr_generate : Flag<["-"], "fno-profile-instr-generate">,
+    Group<f_Group>, Flags<[DriverOption]>,
+    HelpText<"Disable generation of profile instrumentation.">;
+def fno_profile_generate : Flag<["-"], "fno-profile-generate">,
+    Alias<fno_profile_instr_generate>;
+def fno_profile_instr_use : Flag<["-"], "fno-profile-instr-use">,
+    Group<f_Group>, Flags<[DriverOption]>,
+    HelpText<"Disable using instrumentation data for profile-guided optimization">;
+def fno_profile_use : Flag<["-"], "fno-profile-use">,
+    Alias<fno_profile_instr_use>;
 
 def fblocks : Flag<["-"], "fblocks">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Enable the 'blocks' language feature">;
@@ -482,6 +514,8 @@ def fcxx_modules : Flag <["-"], "fcxx-modules">, Group<f_Group>,
   Flags<[DriverOption]>;
 def fdebug_pass_arguments : Flag<["-"], "fdebug-pass-arguments">, Group<f_Group>;
 def fdebug_pass_structure : Flag<["-"], "fdebug-pass-structure">, Group<f_Group>;
+def fdepfile_entry : Joined<["-"], "fdepfile-entry=">,
+    Group<f_clang_Group>, Flags<[CC1Option]>;
 def fdiagnostics_fixit_info : Flag<["-"], "fdiagnostics-fixit-info">, Group<f_clang_Group>;
 def fdiagnostics_parseable_fixits : Flag<["-"], "fdiagnostics-parseable-fixits">, Group<f_clang_Group>,
     Flags<[CoreOption, CC1Option]>, HelpText<"Print fix-its in machine parseable form">;
@@ -497,6 +531,8 @@ def fdiagnostics_show_category_EQ : Joined<["-"], "fdiagnostics-show-category=">
 def fdiagnostics_show_template_tree : Flag<["-"], "fdiagnostics-show-template-tree">,
     Group<f_Group>, Flags<[CC1Option]>,
     HelpText<"Print a template comparison tree for differing templates">;
+def fdeclspec : Flag<["-"], "fdeclspec">, Group<f_clang_Group>,
+  HelpText<"Allow __declspec as a keyword">, Flags<[CC1Option]>;
 def fdollars_in_identifiers : Flag<["-"], "fdollars-in-identifiers">, Group<f_Group>,
   HelpText<"Allow '$' in identifiers">, Flags<[CC1Option]>;
 def fdwarf2_cfi_asm : Flag<["-"], "fdwarf2-cfi-asm">, Group<clang_ignored_f_Group>;
@@ -510,6 +546,9 @@ def fno_elide_type : Flag<["-"], "fno-elide-type">, Group<f_Group>,
 def feliminate_unused_debug_symbols : Flag<["-"], "feliminate-unused-debug-symbols">, Group<f_Group>;
 def femit_all_decls : Flag<["-"], "femit-all-decls">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Emit all declarations, even if unused">;
+def femulated_tls : Flag<["-"], "femulated-tls">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Use emutls functions to access thread_local variables">;
+def fno_emulated_tls : Flag<["-"], "fno-emulated-tls">, Group<f_Group>;
 def fencoding_EQ : Joined<["-"], "fencoding=">, Group<f_Group>;
 def ferror_limit_EQ : Joined<["-"], "ferror-limit=">, Group<f_Group>, Flags<[CoreOption]>;
 def fexceptions : Flag<["-"], "fexceptions">, Group<f_Group>, Flags<[CC1Option]>,
@@ -538,7 +577,7 @@ def fno_signaling_math : Flag<["-"], "fno-signaling-math">, Group<f_Group>;
 def fsanitize_EQ : CommaJoined<["-"], "fsanitize=">, Group<f_clang_Group>,
                    Flags<[CC1Option, CoreOption]>, MetaVarName<"<check>">,
                    HelpText<"Turn on runtime checks for various forms of undefined "
-                            "or suspicious behavior. See user manual for available checks ">;
+                            "or suspicious behavior. See user manual for available checks">;
 def fno_sanitize_EQ : CommaJoined<["-"], "fno-sanitize=">, Group<f_clang_Group>,
                       Flags<[CoreOption]>;
 def fsanitize_blacklist : Joined<["-"], "fsanitize-blacklist=">,
@@ -595,6 +634,12 @@ def fno_sanitize_undefined_trap_on_error : Flag<["-"], "fno-sanitize-undefined-t
                                            Group<f_clang_Group>;
 def fsanitize_link_cxx_runtime : Flag<["-"], "fsanitize-link-c++-runtime">,
                                  Group<f_clang_Group>;
+def fsanitize_cfi_cross_dso : Flag<["-"], "fsanitize-cfi-cross-dso">,
+                              Group<f_clang_Group>, Flags<[CC1Option]>,
+                              HelpText<"Enable control flow integrity (CFI) checks for cross-DSO calls.">;
+def fno_sanitize_cfi_cross_dso : Flag<["-"], "fno-sanitize-cfi-cross-dso">,
+                                 Group<f_clang_Group>, Flags<[CC1Option]>,
+                                 HelpText<"Disable control flow integrity (CFI) checks for cross-DSO calls.">;
 def funsafe_math_optimizations : Flag<["-"], "funsafe-math-optimizations">,
   Group<f_Group>;
 def fno_unsafe_math_optimizations : Flag<["-"], "fno-unsafe-math-optimizations">,
@@ -663,16 +708,22 @@ def finstrument_functions : Flag<["-"], "finstrument-functions">, Group<f_Group>
 def flat__namespace : Flag<["-"], "flat_namespace">;
 def flax_vector_conversions : Flag<["-"], "flax-vector-conversions">, Group<f_Group>;
 def flimited_precision_EQ : Joined<["-"], "flimited-precision=">, Group<f_Group>;
-def flto_EQ : Joined<["-"], "flto=">, Group<clang_ignored_gcc_optimization_f_Group>;
-def flto : Flag<["-"], "flto">, Flags<[CC1Option]>, Group<f_Group>;
-def fno_lto : Flag<["-"], "fno-lto">, Group<f_Group>;
+def flto_EQ : Joined<["-"], "flto=">, Flags<[CC1Option]>, Group<f_Group>,
+  HelpText<"Set LTO mode to either 'full' or 'thin'">;
+def flto : Flag<["-"], "flto">, Flags<[CC1Option]>, Group<f_Group>,
+  HelpText<"Enable LTO in 'full' mode">;
+def fno_lto : Flag<["-"], "fno-lto">, Group<f_Group>,
+  HelpText<"Disable LTO mode (default)">;
+def fthinlto_index_EQ : Joined<["-"], "fthinlto-index=">,
+  Flags<[CC1Option]>, Group<f_Group>,
+  HelpText<"Perform ThinLTO importing using provided function summary index">;
 def fmacro_backtrace_limit_EQ : Joined<["-"], "fmacro-backtrace-limit=">,
                                 Group<f_Group>, Flags<[DriverOption, CoreOption]>;
 def fmerge_all_constants : Flag<["-"], "fmerge-all-constants">, Group<f_Group>;
 def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group<f_Group>;
-def fms_extensions : Flag<["-"], "fms-extensions">, Group<f_Group>, Flags<[CC1Option]>,
+def fms_extensions : Flag<["-"], "fms-extensions">, Group<f_Group>, Flags<[CC1Option, CoreOption]>,
   HelpText<"Accept some non-standard constructs supported by the Microsoft compiler">;
-def fms_compatibility : Flag<["-"], "fms-compatibility">, Group<f_Group>, Flags<[CC1Option]>,
+def fms_compatibility : Flag<["-"], "fms-compatibility">, Group<f_Group>, Flags<[CC1Option, CoreOption]>,
   HelpText<"Enable full Microsoft Visual C++ compatibility">;
 def fms_volatile : Joined<["-"], "fms-volatile">, Group<f_Group>, Flags<[CC1Option]>;
 def fmsc_version : Joined<["-"], "fmsc-version=">, Group<f_Group>, Flags<[DriverOption, CoreOption]>,
@@ -786,6 +837,8 @@ def fno_diagnostics_fixit_info : Flag<["-"], "fno-diagnostics-fixit-info">, Grou
 def fno_diagnostics_show_option : Flag<["-"], "fno-diagnostics-show-option">, Group<f_Group>;
 def fno_diagnostics_show_note_include_stack : Flag<["-"], "fno-diagnostics-show-note-include-stack">,
     Flags<[CC1Option]>, Group<f_Group>;
+def fno_declspec : Flag<["-"], "fno-declspec">, Group<f_clang_Group>,
+  HelpText<"Disallow __declspec as a keyword">, Flags<[CC1Option]>;
 def fno_dollars_in_identifiers : Flag<["-"], "fno-dollars-in-identifiers">, Group<f_Group>,
   HelpText<"Disallow '$' in identifiers">, Flags<[CC1Option]>;
 def fno_elide_constructors : Flag<["-"], "fno-elide-constructors">, Group<f_Group>,
@@ -816,11 +869,14 @@ def fmodule_file_deps : Flag <["-"], "fmodule-file-deps">, Group<f_Group>,
   Flags<[DriverOption]>;
 def fno_module_file_deps : Flag <["-"], "fno-module-file-deps">, Group<f_Group>,
   Flags<[DriverOption]>;
-def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>;
-def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>;
+def fno_ms_extensions : Flag<["-"], "fno-ms-extensions">, Group<f_Group>,
+  Flags<[CoreOption]>;
+def fno_ms_compatibility : Flag<["-"], "fno-ms-compatibility">, Group<f_Group>,
+  Flags<[CoreOption]>;
 def fno_delayed_template_parsing : Flag<["-"], "fno-delayed-template-parsing">, Group<f_Group>;
 def fno_objc_exceptions: Flag<["-"], "fno-objc-exceptions">, Group<f_Group>;
 def fno_objc_legacy_dispatch : Flag<["-"], "fno-objc-legacy-dispatch">, Group<f_Group>;
+def fno_objc_weak : Flag<["-"], "fno-objc-weak">, Group<f_Group>, Flags<[CC1Option]>;
 def fno_omit_frame_pointer : Flag<["-"], "fno-omit-frame-pointer">, Group<f_Group>;
 def fno_operator_names : Flag<["-"], "fno-operator-names">, Group<f_Group>,
   HelpText<"Do not treat C++ operator name keywords as synonyms for operators">,
@@ -842,6 +898,8 @@ def fno_strict_aliasing : Flag<["-"], "fno-strict-aliasing">, Group<f_Group>,
 def fstruct_path_tbaa : Flag<["-"], "fstruct-path-tbaa">, Group<f_Group>;
 def fno_struct_path_tbaa : Flag<["-"], "fno-struct-path-tbaa">, Group<f_Group>;
 def fno_strict_enums : Flag<["-"], "fno-strict-enums">, Group<f_Group>;
+def fno_strict_vtable_pointers: Flag<["-"], "fno-strict-vtable-pointers">, 
+  Group<f_Group>;
 def fno_strict_overflow : Flag<["-"], "fno-strict-overflow">, Group<f_Group>;
 def fno_threadsafe_statics : Flag<["-"], "fno-threadsafe-statics">, Group<f_Group>,
   Flags<[CC1Option]>, HelpText<"Do not emit code to make initialization of local statics thread safe">;
@@ -888,6 +946,8 @@ def fno_objc_infer_related_result_type : Flag<["-"],
     "do not infer Objective-C related result type based on method family">,
   Flags<[CC1Option]>;
 def fobjc_link_runtime: Flag<["-"], "fobjc-link-runtime">, Group<f_Group>;
+def fobjc_weak : Flag<["-"], "fobjc-weak">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Enable ARC-style weak references in Objective-C">;
 
 // Objective-C ABI options.
 def fobjc_runtime_EQ : Joined<["-"], "fobjc-runtime=">, Group<f_Group>, Flags<[CC1Option]>,
@@ -927,6 +987,8 @@ def fpic : Flag<["-"], "fpic">, Group<f_Group>;
 def fno_pic : Flag<["-"], "fno-pic">, Group<f_Group>;
 def fpie : Flag<["-"], "fpie">, Group<f_Group>;
 def fno_pie : Flag<["-"], "fno-pie">, Group<f_Group>;
+def fplugin_EQ : Joined<["-"], "fplugin=">, Group<f_Group>, Flags<[DriverOption]>, MetaVarName<"<dsopath>">,
+  HelpText<"Load the named plugin (dynamic shared object)">;
 def fprofile_arcs : Flag<["-"], "fprofile-arcs">, Group<f_Group>;
 def fno_profile_arcs : Flag<["-"], "fno-profile-arcs">, Group<f_Group>;
 def framework : Separate<["-"], "framework">, Flags<[LinkerInput]>;
@@ -959,9 +1021,9 @@ def fstack_protector_strong : Flag<["-"], "fstack-protector-strong">, Group<f_Gr
   HelpText<"Use a strong heuristic to apply stack protectors to functions">;
 def fstack_protector : Flag<["-"], "fstack-protector">, Group<f_Group>,
   HelpText<"Enable stack protectors for functions potentially vulnerable to stack smashing">;
-def fstandalone_debug : Flag<["-"], "fstandalone-debug">, Group<f_Group>, Flags<[CC1Option]>,
+def fstandalone_debug : Flag<["-"], "fstandalone-debug">, Group<f_Group>,
   HelpText<"Emit full debug info for all types used by the program">;
-def fno_standalone_debug : Flag<["-"], "fno-standalone-debug">, Group<f_Group>, Flags<[CC1Option]>,
+def fno_standalone_debug : Flag<["-"], "fno-standalone-debug">, Group<f_Group>,
   HelpText<"Limit debug information produced to reduce size of debug binary">;
 def flimit_debug_info : Flag<["-"], "flimit-debug-info">, Alias<fno_standalone_debug>;
 def fno_limit_debug_info : Flag<["-"], "fno-limit-debug-info">, Alias<fstandalone_debug>;
@@ -970,6 +1032,10 @@ def fstrict_aliasing : Flag<["-"], "fstrict-aliasing">, Group<f_Group>,
 def fstrict_enums : Flag<["-"], "fstrict-enums">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Enable optimizations based on the strict definition of an enum's "
            "value range">;
+def fstrict_vtable_pointers: Flag<["-"], "fstrict-vtable-pointers">, 
+  Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Enable optimizations based on the strict rules for overwriting "
+             "polymorphic C++ objects">;
 def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
 def fsyntax_only : Flag<["-"], "fsyntax-only">,
   Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
@@ -1073,26 +1139,40 @@ def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group<f_Group>,
   Flags<[CC1Option]>, HelpText<"Place debug types in their own section (ELF Only)">;
 def fno_debug_types_section: Flag<["-"], "fno-debug-types-section">, Group<f_Group>,
   Flags<[CC1Option]>;
+def fdebug_prefix_map_EQ
+  : Joined<["-"], "fdebug-prefix-map=">, Group<f_Group>, Flags<[CC1Option]>,
+    HelpText<"remap file source paths in debug info">;
 def g_Flag : Flag<["-"], "g">, Group<g_Group>,
-  HelpText<"Generate source-level debug information">, Flags<[CC1Option,CC1AsOption]>;
-def gline_tables_only : Flag<["-"], "gline-tables-only">, Group<g_Group>,
-  HelpText<"Emit debug line number tables only">, Flags<[CC1Option]>;
+  HelpText<"Generate source-level debug information">;
+def gline_tables_only : Flag<["-"], "gline-tables-only">, Group<gN_Group>,
+  HelpText<"Emit debug line number tables only">;
 def gmlt : Flag<["-"], "gmlt">, Alias<gline_tables_only>;
-def g0 : Flag<["-"], "g0">, Group<g_Group>;
-def g1 : Flag<["-"], "g1">, Group<g_Group>;
-def g2 : Flag<["-"], "g2">, Group<g_Group>;
-def g3 : Flag<["-"], "g3">, Group<g_Group>;
-def ggdb : Flag<["-"], "ggdb">, Group<g_Group>;
-def ggdb0 : Flag<["-"], "ggdb0">, Group<g_Group>;
-def ggdb1 : Flag<["-"], "ggdb1">, Group<g_Group>;
-def ggdb2 : Flag<["-"], "ggdb2">, Group<g_Group>;
-def ggdb3 : Flag<["-"], "ggdb3">, Group<g_Group>;
+def g0 : Flag<["-"], "g0">, Group<gN_Group>;
+def g1 : Flag<["-"], "g1">, Group<gN_Group>, Alias<gline_tables_only>;
+def g2 : Flag<["-"], "g2">, Group<gN_Group>;
+def g3 : Flag<["-"], "g3">, Group<gN_Group>;
+def ggdb : Flag<["-"], "ggdb">, Group<gTune_Group>;
+def ggdb0 : Flag<["-"], "ggdb0">, Group<ggdbN_Group>;
+def ggdb1 : Flag<["-"], "ggdb1">, Group<ggdbN_Group>;
+def ggdb2 : Flag<["-"], "ggdb2">, Group<ggdbN_Group>;
+def ggdb3 : Flag<["-"], "ggdb3">, Group<ggdbN_Group>;
+def glldb : Flag<["-"], "glldb">, Group<gTune_Group>;
+def gsce : Flag<["-"], "gsce">, Group<gTune_Group>;
 def gdwarf_2 : Flag<["-"], "gdwarf-2">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 2">, Flags<[CC1Option,CC1AsOption]>;
+  HelpText<"Generate source-level debug information with dwarf version 2">;
 def gdwarf_3 : Flag<["-"], "gdwarf-3">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 3">, Flags<[CC1Option,CC1AsOption]>;
+  HelpText<"Generate source-level debug information with dwarf version 3">;
 def gdwarf_4 : Flag<["-"], "gdwarf-4">, Group<g_Group>,
-  HelpText<"Generate source-level debug information with dwarf version 4">, Flags<[CC1Option,CC1AsOption]>;
+  HelpText<"Generate source-level debug information with dwarf version 4">;
+def gdwarf_5 : Flag<["-"], "gdwarf-5">, Group<g_Group>,
+  HelpText<"Generate source-level debug information with dwarf version 5">;
+def gcodeview : Flag<["-"], "gcodeview">,
+  HelpText<"Generate CodeView debug information">,
+  Flags<[CC1Option, CC1AsOption, CoreOption]>;
+// Equivalent to our default dwarf version. Forces usual dwarf emission when
+// CodeView is enabled.
+def gdwarf : Flag<["-"], "gdwarf">, Alias<gdwarf_4>, Flags<[CoreOption]>;
+
 def gfull : Flag<["-"], "gfull">, Group<g_Group>;
 def gused : Flag<["-"], "gused">, Group<g_Group>;
 def gstabs : Joined<["-"], "gstabs">, Group<g_Group>, Flags<[Unsupported]>;
@@ -1110,6 +1190,9 @@ def gno_column_info : Flag<["-"], "gno-column-info">, Group<g_flags_Group>;
 def gsplit_dwarf : Flag<["-"], "gsplit-dwarf">, Group<g_flags_Group>;
 def ggnu_pubnames : Flag<["-"], "ggnu-pubnames">, Group<g_flags_Group>;
 def gdwarf_aranges : Flag<["-"], "gdwarf-aranges">, Group<g_flags_Group>;
+def gmodules : Flag <["-"], "gmodules">, Group<f_Group>,
+  HelpText<"Generate debug info with external references to clang modules"
+           " or precompiled headers">;
 def headerpad__max__install__names : Joined<["-"], "headerpad_max_install_names">;
 def help : Flag<["-", "--"], "help">, Flags<[CC1Option,CC1AsOption]>,
   HelpText<"Display available options">;
@@ -1173,6 +1256,13 @@ def malign_functions_EQ : Joined<["-"], "malign-functions=">, Group<clang_ignore
 def malign_loops_EQ : Joined<["-"], "malign-loops=">, Group<clang_ignored_m_Group>;
 def malign_jumps_EQ : Joined<["-"], "malign-jumps=">, Group<clang_ignored_m_Group>;
 def mfancy_math_387 : Flag<["-"], "mfancy-math-387">, Group<clang_ignored_m_Group>;
+def mtvos_version_min_EQ : Joined<["-"], "mtvos-version-min=">, Group<m_Group>;
+def mappletvos_version_min_EQ : Joined<["-"], "mappletvos-version-min=">, Alias<mtvos_version_min_EQ>;
+def mtvos_simulator_version_min_EQ : Joined<["-"], "mtvos-simulator-version-min=">, Alias<mtvos_version_min_EQ>;
+def mappletvsimulator_version_min_EQ : Joined<["-"], "mappletvsimulator-version-min=">, Alias<mtvos_version_min_EQ>;
+def mwatchos_version_min_EQ : Joined<["-"], "mwatchos-version-min=">, Group<m_Group>;
+def mwatchos_simulator_version_min_EQ : Joined<["-"], "mwatchos-simulator-version-min=">, Alias<mwatchos_version_min_EQ>;
+def mwatchsimulator_version_min_EQ : Joined<["-"], "mwatchsimulator-version-min=">, Alias<mwatchos_version_min_EQ>;
 def march_EQ : Joined<["-"], "march=">, Group<m_Group>;
 def masm_EQ : Joined<["-"], "masm=">, Group<m_Group>, Flags<[DriverOption]>;
 def mcmodel_EQ : Joined<["-"], "mcmodel=">, Group<m_Group>;
@@ -1209,6 +1299,8 @@ def mmacosx_version_min_EQ : Joined<["-"], "mmacosx-version-min=">,
   Group<m_Group>, HelpText<"Set Mac OS X deployment target">;
 def mms_bitfields : Flag<["-"], "mms-bitfields">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Set the default structure layout to be compatible with the Microsoft compiler standard">;
+def mno_ms_bitfields : Flag<["-"], "mno-ms-bitfields">, Group<m_Group>,
+  HelpText<"Do not set the default structure layout to be compatible with the Microsoft compiler standard">;
 def mstackrealign : Flag<["-"], "mstackrealign">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Force realign the stack at entry to every function">;
 def mstack_alignment : Joined<["-"], "mstack-alignment=">, Group<m_Group>, Flags<[CC1Option]>,
@@ -1217,6 +1309,8 @@ def mstack_probe_size : Joined<["-"], "mstack-probe-size=">, Group<m_Group>, Fla
   HelpText<"Set the stack probe size">;
 def mthread_model : Separate<["-"], "mthread-model">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"The thread model to use, e.g. posix, single (posix by default)">;
+def meabi : Separate<["-"], "meabi">, Group<m_Group>, Flags<[CC1Option]>,
+  HelpText<"Set EABI type, e.g. 4, 5 or gnu (default depends on triple)">;
 
 def mmmx : Flag<["-"], "mmmx">, Group<m_x86_Features_Group>;
 def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group<m_x86_Features_Group>;
@@ -1270,6 +1364,11 @@ def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
 def mno_rdseed : Flag<["-"], "mno-rdseed">, Group<m_x86_Features_Group>;
 def mno_adx : Flag<["-"], "mno-adx">, Group<m_x86_Features_Group>;
 def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
+def mno_fxsr : Flag<["-"], "mno-fxsr">, Group<m_x86_Features_Group>;
+def mno_xsave : Flag<["-"], "mno-xsave">, Group<m_x86_Features_Group>;
+def mno_xsaveopt : Flag<["-"], "mno-xsaveopt">, Group<m_x86_Features_Group>;
+def mno_xsavec : Flag<["-"], "mno-xsavec">, Group<m_x86_Features_Group>;
+def mno_xsaves : Flag<["-"], "mno-xsaves">, Group<m_x86_Features_Group>;
 
 def munaligned_access : Flag<["-"], "munaligned-access">, Group<m_arm_Features_Group>,
   HelpText<"Allow memory accesses to be unaligned (AArch32/AArch64 only)">;
@@ -1306,6 +1405,9 @@ def mno_fix_cortex_a53_835769 : Flag<["-"], "mno-fix-cortex-a53-835769">,
 def ffixed_x18 : Flag<["-"], "ffixed-x18">, Group<m_aarch64_Features_Group>,
   HelpText<"Reserve the x18 register (AArch64 only)">;
 
+def msimd128 : Flag<["-"], "msimd128">, Group<m_wasm_Features_Group>;
+def mno_simd128 : Flag<["-"], "mno-simd128">, Group<m_wasm_Features_Group>;
+
 def mvsx : Flag<["-"], "mvsx">, Group<m_ppc_Features_Group>;
 def mno_vsx : Flag<["-"], "mno-vsx">, Group<m_ppc_Features_Group>;
 def mpower8_vector : Flag<["-"], "mpower8-vector">,
@@ -1328,8 +1430,10 @@ def mcmpb : Flag<["-"], "mcmpb">, Group<m_ppc_Features_Group>;
 def mno_cmpb : Flag<["-"], "mno-cmpb">, Group<m_ppc_Features_Group>;
 def misel : Flag<["-"], "misel">, Group<m_ppc_Features_Group>;
 def mno_isel : Flag<["-"], "mno-isel">, Group<m_ppc_Features_Group>;
-def mmfcrf : Flag<["-"], "mmfcrf">, Group<m_ppc_Features_Group>;
-def mno_mfcrf : Flag<["-"], "mno-mfcrf">, Group<m_ppc_Features_Group>;
+def mmfocrf : Flag<["-"], "mmfocrf">, Group<m_ppc_Features_Group>;
+def mmfcrf : Flag<["-"], "mmfcrf">, Alias<mmfocrf>;
+def mno_mfocrf : Flag<["-"], "mno-mfocrf">, Group<m_ppc_Features_Group>;
+def mno_mfcrf : Flag<["-"], "mno-mfcrf">, Alias<mno_mfocrf>;
 def mpopcntd : Flag<["-"], "mpopcntd">, Group<m_ppc_Features_Group>;
 def mno_popcntd : Flag<["-"], "mno-popcntd">, Group<m_ppc_Features_Group>;
 def mqpx : Flag<["-"], "mqpx">, Group<m_ppc_Features_Group>;
@@ -1368,6 +1472,11 @@ def mred_zone : Flag<["-"], "mred-zone">, Group<m_Group>;
 def mregparm_EQ : Joined<["-"], "mregparm=">, Group<m_Group>;
 def mrelax_all : Flag<["-"], "mrelax-all">, Group<m_Group>, Flags<[CC1Option,CC1AsOption]>,
   HelpText<"(integrated-as) Relax all machine instructions">;
+def mincremental_linker_compatible : Flag<["-"], "mincremental-linker-compatible">, Group<m_Group>,
+  Flags<[CC1Option,CC1AsOption]>,
+  HelpText<"(integrated-as) Emit an object file which can be used with an incremental linker">;
+def mno_incremental_linker_compatible : Flag<["-"], "mno-incremental-linker-compatible">, Group<m_Group>,
+  HelpText<"(integrated-as) Emit an object file which cannot be used with an incremental linker">;
 def mrtd : Flag<["-"], "mrtd">, Group<m_Group>, Flags<[CC1Option]>,
   HelpText<"Make StdCall calling convention the default">;
 def msmall_data_threshold_EQ : Joined <["-"], "msmall-data-threshold=">, Group<m_Group>;
@@ -1414,6 +1523,11 @@ def mrdseed : Flag<["-"], "mrdseed">, Group<m_x86_Features_Group>;
 def madx : Flag<["-"], "madx">, Group<m_x86_Features_Group>;
 def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
 def mcx16 : Flag<["-"], "mcx16">, Group<m_x86_Features_Group>;
+def mfxsr : Flag<["-"], "mfxsr">, Group<m_x86_Features_Group>;
+def mxsave : Flag<["-"], "mxsave">, Group<m_x86_Features_Group>;
+def mxsaveopt : Flag<["-"], "mxsaveopt">, Group<m_x86_Features_Group>;
+def mxsavec : Flag<["-"], "mxsavec">, Group<m_x86_Features_Group>;
+def mxsaves : Flag<["-"], "mxsaves">, Group<m_x86_Features_Group>;
 def mips16 : Flag<["-"], "mips16">, Group<m_Group>;
 def mno_mips16 : Flag<["-"], "mno-mips16">, Group<m_Group>;
 def mmicromips : Flag<["-"], "mmicromips">, Group<m_Group>;
@@ -1515,6 +1629,8 @@ def no_pedantic : Flag<["-", "--"], "no-pedantic">, Group<pedantic_Group>;
 def no__dead__strip__inits__and__terms : Flag<["-"], "no_dead_strip_inits_and_terms">;
 def nobuiltininc : Flag<["-"], "nobuiltininc">, Flags<[CC1Option]>,
   HelpText<"Disable builtin #include directories">;
+def nocudainc : Flag<["-"], "nocudainc">;
+def nocudalib : Flag<["-"], "nocudalib">;
 def nodefaultlibs : Flag<["-"], "nodefaultlibs">;
 def nofixprebinding : Flag<["-"], "nofixprebinding">;
 def nolibc : Flag<["-"], "nolibc">;
@@ -1576,7 +1692,7 @@ def resource_dir_EQ : Joined<["-"], "resource-dir=">, Flags<[DriverOption]>,
   Alias<resource_dir>;
 def rpath : Separate<["-"], "rpath">, Flags<[LinkerInput]>;
 def rtlib_EQ : Joined<["-", "--"], "rtlib=">;
-def r : Flag<["-"], "r">;
+def r : Flag<["-"], "r">, Flags<[LinkerInput,NoArgumentUnused]>;
 def save_temps_EQ : Joined<["-", "--"], "save-temps=">, Flags<[DriverOption]>,
   HelpText<"Save intermediate compilation results.">;
 def save_temps : Flag<["-", "--"], "save-temps">, Flags<[DriverOption]>,
@@ -1783,16 +1899,22 @@ def _write_user_dependencies : Flag<["--"], "write-user-dependencies">, Alias<MM
 def _ : Joined<["--"], "">, Flags<[Unsupported]>;
 
 def mieee_rnd_near : Flag<["-"], "mieee-rnd-near">, Group<m_hexagon_Features_Group>;
-def mv1 : Flag<["-"], "mv1">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
-          AliasArgs<["v1"]>;
-def mv2 : Flag<["-"], "mv2">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
-          AliasArgs<["v2"]>;
-def mv3 : Flag<["-"], "mv3">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
-          AliasArgs<["v3"]>;
-def mv4 : Flag<["-"], "mv4">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
-          AliasArgs<["v4"]>;
-def mv5 : Flag<["-"], "mv5">, Group<m_hexagon_Features_Group>, Alias<march_EQ>,
+def mv4 : Flag<["-"], "mv4">, Group<m_hexagon_Features_Group>,
+          Alias<mcpu_EQ>, AliasArgs<["v4"]>;
+def mv5 : Flag<["-"], "mv5">, Group<m_hexagon_Features_Group>, Alias<mcpu_EQ>,
           AliasArgs<["v5"]>;
+def mv55 : Flag<["-"], "mv55">, Group<m_hexagon_Features_Group>,
+           Alias<mcpu_EQ>, AliasArgs<["v55"]>;
+def mv60 : Flag<["-"], "mv60">, Group<m_hexagon_Features_Group>,
+           Alias<mcpu_EQ>, AliasArgs<["v60"]>;
+def mhexagon_hvx : Flag<["-"], "mhvx">, Group<m_hexagon_Features_Group>,
+    Flags<[CC1Option]>, HelpText<"Enable Hexagon Vector eXtensions">;
+def mno_hexagon_hvx : Flag<["-"], "mno-hvx">, Group<m_hexagon_Features_Group>,
+    Flags<[CC1Option]>, HelpText<"Disable Hexagon Vector eXtensions">;
+def mhexagon_hvx_double : Flag<["-"], "mhvx-double">, Group<m_hexagon_Features_Group>,
+    Flags<[CC1Option]>, HelpText<"Enable Hexagon Double Vector eXtensions">;
+def mno_hexagon_hvx_double : Flag<["-"], "mno-hvx-double">, Group<m_hexagon_Features_Group>,
+    Flags<[CC1Option]>, HelpText<"Disable Hexagon Double Vector eXtensions">;
 
 // These are legacy user-facing driver-level option spellings. They are always
 // aliases for options that are spelled using the more common Unix / GNU flag
diff --git a/include/clang/Driver/SanitizerArgs.h b/include/clang/Driver/SanitizerArgs.h
index 82b668ac883a..c2611b5cd8ee 100644
--- a/include/clang/Driver/SanitizerArgs.h
+++ b/include/clang/Driver/SanitizerArgs.h
@@ -27,13 +27,15 @@ class SanitizerArgs {
   SanitizerSet TrapSanitizers;
 
   std::vector<std::string> BlacklistFiles;
+  std::vector<std::string> ExtraDeps;
   int CoverageFeatures;
   int MsanTrackOrigins;
   bool MsanUseAfterDtor;
+  bool CfiCrossDso;
   int AsanFieldPadding;
-  bool AsanZeroBaseShadow;
   bool AsanSharedRuntime;
   bool LinkCXXRuntimes;
+  bool NeedPIE;
 
  public:
   /// Parses the sanitizer arguments from an argument list.
@@ -52,6 +54,8 @@ class SanitizerArgs {
   bool needsSafeStackRt() const {
     return Sanitizers.has(SanitizerKind::SafeStack);
   }
+  bool needsCfiRt() const;
+  bool needsCfiDiagRt() const;
 
   bool requiresPIE() const;
   bool needsUnwindTables() const;
diff --git a/include/clang/Driver/ToolChain.h b/include/clang/Driver/ToolChain.h
index aba18c9916c3..ed73107940ce 100644
--- a/include/clang/Driver/ToolChain.h
+++ b/include/clang/Driver/ToolChain.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Target/TargetOptions.h"
 #include <memory>
 #include <string>
 
@@ -30,7 +31,10 @@ namespace opt {
 }
 
 namespace clang {
-  class ObjCRuntime;
+class ObjCRuntime;
+namespace vfs {
+class FileSystem;
+}
 
 namespace driver {
   class Compilation;
@@ -89,6 +93,7 @@ private:
 
 protected:
   MultilibSet Multilibs;
+  const char *DefaultLinker = "ld";
 
   ToolChain(const Driver &D, const llvm::Triple &T,
             const llvm::opt::ArgList &Args);
@@ -119,7 +124,8 @@ public:
 
   // Accessors
 
-  const Driver &getDriver() const;
+  const Driver &getDriver() const { return D; }
+  vfs::FileSystem &getVFS() const;
   const llvm::Triple &getTriple() const { return Triple; }
 
   llvm::Triple::ArchType getArch() const { return Triple.getArch(); }
@@ -151,6 +157,20 @@ public:
   // Returns the RTTIMode for the toolchain with the current arguments.
   RTTIMode getRTTIMode() const { return CachedRTTIMode; }
 
+  /// \brief Return any implicit target and/or mode flag for an invocation of
+  /// the compiler driver as `ProgName`.
+  ///
+  /// For example, when called with i686-linux-android-g++, the first element
+  /// of the return value will be set to `"i686-linux-android"` and the second
+  /// will be set to "--driver-mode=g++"`.
+  ///
+  /// \pre `llvm::InitializeAllTargets()` has been called.
+  /// \param ProgName The name the Clang driver was invoked with (from,
+  /// e.g., argv[0])
+  /// \return A pair of (`target`, `mode-flag`), where one or both may be empty.
+  static std::pair<std::string, std::string>
+  getTargetAndModeFromProgramName(StringRef ProgName);
+
   // Tool access.
 
   /// TranslateArgs - Create a new derived argument list for any argument
@@ -167,7 +187,7 @@ public:
   /// Choose a tool to use to handle the action \p JA.
   ///
   /// This can be overridden when a particular ToolChain needs to use
-  /// a C compiler other than Clang.
+  /// a compiler other than Clang.
   virtual Tool *SelectTool(const JobAction &JA) const;
 
   // Helper methods
@@ -184,7 +204,7 @@ public:
   /// This is used when handling the verbose option to print detailed,
   /// toolchain-specific information useful for understanding the behavior of
   /// the driver on a specific platform.
-  virtual void printVerboseInfo(raw_ostream &OS) const {};
+  virtual void printVerboseInfo(raw_ostream &OS) const {}
 
   // Platform defaults information
 
@@ -236,6 +256,16 @@ public:
     return ToolChain::RLT_Libgcc;
   }
 
+  virtual std::string getCompilerRT(const llvm::opt::ArgList &Args,
+                                    StringRef Component,
+                                    bool Shared = false) const;
+
+  const char *getCompilerRTArgString(const llvm::opt::ArgList &Args,
+                                     StringRef Component,
+                                     bool Shared = false) const;
+  /// needsProfileRT - returns true if instrumentation profile is on.
+  static bool needsProfileRT(const llvm::opt::ArgList &Args);
+
   /// IsUnwindTablesDefault - Does this tool chain use -funwind-tables
   /// by default.
   virtual bool IsUnwindTablesDefault() const;
@@ -265,8 +295,25 @@ public:
   /// compile unit information.
   virtual bool UseDwarfDebugFlags() const { return false; }
 
+  // Return the DWARF version to emit, in the absence of arguments
+  // to the contrary.
+  virtual unsigned GetDefaultDwarfVersion() const { return 4; }
+
+  // True if the driver should assume "-fstandalone-debug"
+  // in the absence of an option specifying otherwise,
+  // provided that debugging was requested in the first place.
+  // i.e. a value of 'true' does not imply that debugging is wanted.
+  virtual bool GetDefaultStandaloneDebug() const { return false; }
+
+  // Return the default debugger "tuning."
+  virtual llvm::DebuggerKind getDefaultDebuggerTuning() const {
+    return llvm::DebuggerKind::GDB;
+  }
+
   /// UseSjLjExceptions - Does this tool chain use SjLj exceptions.
-  virtual bool UseSjLjExceptions() const { return false; }
+  virtual bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const {
+    return false;
+  }
 
   /// getThreadModel() - Which thread model does this target use?
   virtual std::string getThreadModel() const { return "posix"; }
@@ -337,6 +384,10 @@ public:
   virtual void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                                    llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// AddFilePathLibArgs - Add each thing in getFilePaths() as a "-L" option.
+  void AddFilePathLibArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+
   /// AddCCKextLibArgs - Add the system specific linker arguments to use
   /// for kernel extensions (Darwin-specific).
   virtual void AddCCKextLibArgs(const llvm::opt::ArgList &Args,
@@ -346,10 +397,17 @@ public:
   /// global flags for unsafe floating point math, add it and return true.
   ///
   /// This checks for presence of the -Ofast, -ffast-math or -funsafe-math flags.
-  virtual bool
-  AddFastMathRuntimeIfAvailable(const llvm::opt::ArgList &Args,
+  virtual bool AddFastMathRuntimeIfAvailable(
+      const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const;
+  /// addProfileRTLibs - When -fprofile-instr-profile is specified, try to pass
+  /// a suitable profile runtime library to the linker.
+  virtual void addProfileRTLibs(const llvm::opt::ArgList &Args,
                                 llvm::opt::ArgStringList &CmdArgs) const;
 
+  /// \brief Add arguments to use system-specific CUDA includes.
+  virtual void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                                  llvm::opt::ArgStringList &CC1Args) const;
+
   /// \brief Return sanitizers which are available in this toolchain.
   virtual SanitizerMask getSupportedSanitizers() const;
 };
diff --git a/include/clang/Driver/Types.h b/include/clang/Driver/Types.h
index dd95d6599156..22122c7a8eb9 100644
--- a/include/clang/Driver/Types.h
+++ b/include/clang/Driver/Types.h
@@ -63,6 +63,9 @@ namespace types {
   /// isCXX - Is this a "C++" input (C++ and Obj-C++ sources and headers).
   bool isCXX(ID Id);
 
+  /// Is this LLVM IR.
+  bool isLLVMIR(ID Id);
+
   /// isCuda - Is this a CUDA input.
   bool isCuda(ID Id);
 
diff --git a/include/clang/Edit/Commit.h b/include/clang/Edit/Commit.h
index 5cc5b9c6ff8b..ac4bb471fe1c 100644
--- a/include/clang/Edit/Commit.h
+++ b/include/clang/Edit/Commit.h
@@ -134,12 +134,6 @@ private:
                                  SourceLocation *MacroBegin = nullptr) const;
   bool isAtEndOfMacroExpansion(SourceLocation loc,
                                SourceLocation *MacroEnd = nullptr) const;
-
-  StringRef copyString(StringRef str) {
-    char *buf = StrAlloc.Allocate<char>(str.size());
-    std::memcpy(buf, str.data(), str.size());
-    return StringRef(buf, str.size());
-  }
 };
 
 }
diff --git a/include/clang/Edit/EditedSource.h b/include/clang/Edit/EditedSource.h
index 150a5b41b5a4..b6ec8b8f06e2 100644
--- a/include/clang/Edit/EditedSource.h
+++ b/include/clang/Edit/EditedSource.h
@@ -10,9 +10,11 @@
 #ifndef LLVM_CLANG_EDIT_EDITEDSOURCE_H
 #define LLVM_CLANG_EDIT_EDITEDSOURCE_H
 
+#include "clang/Basic/IdentifierTable.h"
 #include "clang/Edit/FileOffset.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/Allocator.h"
 #include <map>
 
@@ -39,14 +41,18 @@ class EditedSource {
   typedef std::map<FileOffset, FileEdit> FileEditsTy;
   FileEditsTy FileEdits;
 
-  llvm::DenseMap<unsigned, SourceLocation> ExpansionToArgMap;
+  llvm::DenseMap<unsigned, llvm::TinyPtrVector<IdentifierInfo*>>
+    ExpansionToArgMap;
+  SmallVector<std::pair<SourceLocation, IdentifierInfo*>, 2>
+    CurrCommitMacroArgExps;
 
+  IdentifierTable IdentTable;
   llvm::BumpPtrAllocator StrAlloc;
 
 public:
   EditedSource(const SourceManager &SM, const LangOptions &LangOpts,
                const PPConditionalDirectiveRecord *PPRec = nullptr)
-    : SourceMgr(SM), LangOpts(LangOpts), PPRec(PPRec),
+    : SourceMgr(SM), LangOpts(LangOpts), PPRec(PPRec), IdentTable(LangOpts),
       StrAlloc() { }
 
   const SourceManager &getSourceManager() const { return SourceMgr; }
@@ -62,11 +68,7 @@ public:
   void applyRewrites(EditsReceiver &receiver);
   void clearRewrites();
 
-  StringRef copyString(StringRef str) {
-    char *buf = StrAlloc.Allocate<char>(str.size());
-    std::memcpy(buf, str.data(), str.size());
-    return StringRef(buf, str.size());
-  }
+  StringRef copyString(StringRef str) { return str.copy(StrAlloc); }
   StringRef copyString(const Twine &twine);
 
 private:
@@ -80,6 +82,12 @@ private:
   StringRef getSourceText(FileOffset BeginOffs, FileOffset EndOffs,
                           bool &Invalid);
   FileEditsTy::iterator getActionForOffset(FileOffset Offs);
+  void deconstructMacroArgLoc(SourceLocation Loc,
+                              SourceLocation &ExpansionLoc,
+                              IdentifierInfo *&II);
+
+  void startingCommit();
+  void finishedCommit();
 };
 
 }
diff --git a/include/clang/Format/Format.h b/include/clang/Format/Format.h
index f8c8c373e143..6d051e09cb64 100644
--- a/include/clang/Format/Format.h
+++ b/include/clang/Format/Format.h
@@ -43,33 +43,70 @@ struct FormatStyle {
   /// \brief The extra indent or outdent of access modifiers, e.g. \c public:.
   int AccessModifierOffset;
 
+  /// \brief Different styles for aligning after open brackets.
+  enum BracketAlignmentStyle {
+    /// \brief Align parameters on the open bracket, e.g.:
+    /// \code
+    ///   someLongFunction(argument1,
+    ///                    argument2);
+    /// \endcode
+    BAS_Align,
+    /// \brief Don't align, instead use \c ContinuationIndentWidth, e.g.:
+    /// \code
+    ///   someLongFunction(argument1,
+    ///       argument2);
+    /// \endcode
+    BAS_DontAlign,
+    /// \brief Always break after an open bracket, if the parameters don't fit
+    /// on a single line, e.g.:
+    /// \code
+    ///   someLongFunction(
+    ///       argument1, argument2);
+    /// \endcode
+    BAS_AlwaysBreak,
+  };
+
   /// \brief If \c true, horizontally aligns arguments after an open bracket.
   ///
   /// This applies to round brackets (parentheses), angle brackets and square
-  /// brackets. This will result in formattings like
-  /// \code
-  /// someLongFunction(argument1,
-  ///                  argument2);
-  /// \endcode
-  bool AlignAfterOpenBracket;
+  /// brackets.
+  BracketAlignmentStyle AlignAfterOpenBracket;
 
   /// \brief If \c true, aligns consecutive assignments.
   ///
   /// This will align the assignment operators of consecutive lines. This
   /// will result in formattings like
   /// \code
-  /// int aaaa = 12;
-  /// int b    = 23;
-  /// int ccc  = 23;
+  ///   int aaaa = 12;
+  ///   int b    = 23;
+  ///   int ccc  = 23;
   /// \endcode
   bool AlignConsecutiveAssignments;
 
+  /// \brief If \c true, aligns consecutive declarations.
+  ///
+  /// This will align the declaration names of consecutive lines. This
+  /// will result in formattings like
+  /// \code
+  ///   int         aaaa = 12;
+  ///   float       b = 23;
+  ///   std::string ccc = 23;
+  /// \endcode
+  bool AlignConsecutiveDeclarations;
+
   /// \brief If \c true, aligns escaped newlines as far left as possible.
   /// Otherwise puts them into the right-most column.
   bool AlignEscapedNewlinesLeft;
 
   /// \brief If \c true, horizontally align operands of binary and ternary
   /// expressions.
+  ///
+  /// Specifically, this aligns operands of a single expression that needs to be
+  /// split over multiple lines, e.g.:
+  /// \code
+  ///   int aaa = bbbbbbbbbbbbbbb +
+  ///             ccccccccccccccc;
+  /// \endcode
   bool AlignOperands;
 
   /// \brief If \c true, aligns trailing comments.
@@ -119,13 +156,33 @@ struct FormatStyle {
     DRTBS_None,
     /// Always break after the return type.
     DRTBS_All,
-    /// Always break after the return types of top level functions.
+    /// Always break after the return types of top-level functions.
     DRTBS_TopLevel,
   };
 
-  /// \brief The function definition return type breaking style to use.
+  /// \brief Different ways to break after the function definition or
+  /// declaration return type.
+  enum ReturnTypeBreakingStyle {
+    /// Break after return type automatically.
+    /// \c PenaltyReturnTypeOnItsOwnLine is taken into account.
+    RTBS_None,
+    /// Always break after the return type.
+    RTBS_All,
+    /// Always break after the return types of top-level functions.
+    RTBS_TopLevel,
+    /// Always break after the return type of function definitions.
+    RTBS_AllDefinitions,
+    /// Always break after the return type of top-level definitions.
+    RTBS_TopLevelDefinitions,
+  };
+
+  /// \brief The function definition return type breaking style to use.  This
+  /// option is deprecated and is retained for backwards compatibility.
   DefinitionReturnTypeBreakingStyle AlwaysBreakAfterDefinitionReturnType;
 
+  /// \brief The function declaration return type breaking style to use.
+  ReturnTypeBreakingStyle AlwaysBreakAfterReturnType;
+
   /// \brief If \c true, always break before multiline string literals.
   ///
   /// This flag is mean to make cases where there are multiple multiline strings
@@ -169,19 +226,55 @@ struct FormatStyle {
     /// Like ``Attach``, but break before braces on enum, function, and record
     /// definitions.
     BS_Mozilla,
-    /// Like \c Attach, but break before function definitions, and 'else'.
+    /// Like \c Attach, but break before function definitions, 'catch', and 'else'.
     BS_Stroustrup,
     /// Always break before braces.
     BS_Allman,
     /// Always break before braces and add an extra level of indentation to
     /// braces of control statements, not to those of class, function
     /// or other definitions.
-    BS_GNU
+    BS_GNU,
+    /// Like ``Attach``, but break before functions.
+    BS_WebKit,
+    /// Configure each individual brace in \c BraceWrapping.
+    BS_Custom
   };
 
   /// \brief The brace breaking style to use.
   BraceBreakingStyle BreakBeforeBraces;
 
+  /// \brief Precise control over the wrapping of braces.
+  struct BraceWrappingFlags {
+    /// \brief Wrap class definitions.
+    bool AfterClass;
+    /// \brief Wrap control statements (if/for/while/switch/..).
+    bool AfterControlStatement;
+    /// \brief Wrap enum definitions.
+    bool AfterEnum;
+    /// \brief Wrap function definitions.
+    bool AfterFunction;
+    /// \brief Wrap namespace definitions.
+    bool AfterNamespace;
+    /// \brief Wrap ObjC definitions (@autoreleasepool, interfaces, ..).
+    bool AfterObjCDeclaration;
+    /// \brief Wrap struct definitions.
+    bool AfterStruct;
+    /// \brief Wrap union definitions.
+    bool AfterUnion;
+    /// \brief Wrap before \c catch.
+    bool BeforeCatch;
+    /// \brief Wrap before \c else.
+    bool BeforeElse;
+    /// \brief Indent the wrapped braces themselves.
+    bool IndentBraces;
+  };
+
+  /// \brief Control of individual brace wrapping cases.
+  ///
+  /// If \c BreakBeforeBraces is set to \c custom, use this to specify how each
+  /// individual brace case should be handled. Otherwise, this is ignored.
+  BraceWrappingFlags BraceWrapping;
+
   /// \brief If \c true, ternary operators will be placed after line breaks.
   bool BreakBeforeTernaryOperators;
 
@@ -189,6 +282,9 @@ struct FormatStyle {
   /// the commas with the colon.
   bool BreakConstructorInitializersBeforeComma;
 
+  /// \brief Break after each annotation on a field in Java files.
+  bool BreakAfterJavaFieldAnnotations;
+
   /// \brief The column limit.
   ///
   /// A column limit of \c 0 means that there is no column limit. In this case,
@@ -250,13 +346,57 @@ struct FormatStyle {
   ///
   /// These are expected to be macros of the form:
   /// \code
-  /// FOREACH(<variable-declaration>, ...)
-  ///   <loop-body>
+  ///   FOREACH(<variable-declaration>, ...)
+  ///     <loop-body>
+  /// \endcode
+  ///
+  /// In the .clang-format configuration file, this can be configured like:
+  /// \code
+  ///   ForEachMacros: ['RANGES_FOR', 'FOREACH']
   /// \endcode
   ///
   /// For example: BOOST_FOREACH.
   std::vector<std::string> ForEachMacros;
 
+  /// \brief See documentation of \c IncludeCategories.
+  struct IncludeCategory {
+    /// \brief The regular expression that this category matches.
+    std::string Regex;
+    /// \brief The priority to assign to this category.
+    int Priority;
+    bool operator==(const IncludeCategory &Other) const {
+      return Regex == Other.Regex && Priority == Other.Priority;
+    }
+  };
+
+  /// \brief Regular expressions denoting the different #include categories used
+  /// for ordering #includes.
+  ///
+  /// These regular expressions are matched against the filename of an include
+  /// (including the <> or "") in order. The value belonging to the first
+  /// matching regular expression is assigned and #includes are sorted first
+  /// according to increasing category number and then alphabetically within
+  /// each category.
+  ///
+  /// If none of the regular expressions match, INT_MAX is assigned as
+  /// category. The main header for a source file automatically gets category 0.
+  /// so that it is generally kept at the beginning of the #includes
+  /// (http://llvm.org/docs/CodingStandards.html#include-style). However, you
+  /// can also assign negative priorities if you have certain headers that
+  /// always need to be first.
+  ///
+  /// To configure this in the .clang-format file, use:
+  /// \code
+  ///   IncludeCategories:
+  ///     - Regex:           '^"(llvm|llvm-c|clang|clang-c)/'
+  ///       Priority:        2
+  ///     - Regex:           '^(<|"(gtest|isl|json)/)'
+  ///       Priority:        3
+  ///     - Regex:           '.*'
+  ///       Priority:        1
+  /// \endcode
+  std::vector<IncludeCategory> IncludeCategories;
+
   /// \brief Indent case labels one level from the switch statement.
   ///
   /// When \c false, use the same indentation level as for the switch statement.
@@ -287,7 +427,9 @@ struct FormatStyle {
     LK_JavaScript,
     /// Should be used for Protocol Buffers
     /// (https://developers.google.com/protocol-buffers/).
-    LK_Proto
+    LK_Proto,
+    /// Should be used for TableGen code.
+    LK_TableGen
   };
 
   /// \brief Language, this format style is targeted at.
@@ -355,9 +497,15 @@ struct FormatStyle {
     PAS_Middle
   };
 
-  /// Pointer and reference alignment style.
+  /// \brief Pointer and reference alignment style.
   PointerAlignmentStyle PointerAlignment;
 
+  /// \brief If true, clang-format will attempt to re-flow comments.
+  bool ReflowComments;
+
+  /// \brief If true, clang-format will sort #includes.
+  bool SortIncludes;
+
   /// \brief If \c true, a space may be inserted after C style casts.
   bool SpaceAfterCStyleCast;
 
@@ -444,6 +592,7 @@ struct FormatStyle {
     return AccessModifierOffset == R.AccessModifierOffset &&
            AlignAfterOpenBracket == R.AlignAfterOpenBracket &&
            AlignConsecutiveAssignments == R.AlignConsecutiveAssignments &&
+           AlignConsecutiveDeclarations == R.AlignConsecutiveDeclarations &&
            AlignEscapedNewlinesLeft == R.AlignEscapedNewlinesLeft &&
            AlignOperands == R.AlignOperands &&
            AlignTrailingComments == R.AlignTrailingComments &&
@@ -457,8 +606,7 @@ struct FormatStyle {
            AllowShortIfStatementsOnASingleLine ==
                R.AllowShortIfStatementsOnASingleLine &&
            AllowShortLoopsOnASingleLine == R.AllowShortLoopsOnASingleLine &&
-           AlwaysBreakAfterDefinitionReturnType ==
-               R.AlwaysBreakAfterDefinitionReturnType &&
+           AlwaysBreakAfterReturnType == R.AlwaysBreakAfterReturnType &&
            AlwaysBreakBeforeMultilineStrings ==
                R.AlwaysBreakBeforeMultilineStrings &&
            AlwaysBreakTemplateDeclarations ==
@@ -470,8 +618,8 @@ struct FormatStyle {
            BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators &&
            BreakConstructorInitializersBeforeComma ==
                R.BreakConstructorInitializersBeforeComma &&
-           ColumnLimit == R.ColumnLimit &&
-           CommentPragmas == R.CommentPragmas &&
+           BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations &&
+           ColumnLimit == R.ColumnLimit && CommentPragmas == R.CommentPragmas &&
            ConstructorInitializerAllOnOneLineOrOnePerLine ==
                R.ConstructorInitializerAllOnOneLineOrOnePerLine &&
            ConstructorInitializerIndentWidth ==
@@ -483,6 +631,7 @@ struct FormatStyle {
            ExperimentalAutoDetectBinPacking ==
                R.ExperimentalAutoDetectBinPacking &&
            ForEachMacros == R.ForEachMacros &&
+           IncludeCategories == R.IncludeCategories &&
            IndentCaseLabels == R.IndentCaseLabels &&
            IndentWidth == R.IndentWidth && Language == R.Language &&
            IndentWrappedFunctionNames == R.IndentWrappedFunctionNames &&
@@ -513,8 +662,7 @@ struct FormatStyle {
            SpacesInCStyleCastParentheses == R.SpacesInCStyleCastParentheses &&
            SpacesInParentheses == R.SpacesInParentheses &&
            SpacesInSquareBrackets == R.SpacesInSquareBrackets &&
-           Standard == R.Standard &&
-           TabWidth == R.TabWidth &&
+           Standard == R.Standard && TabWidth == R.TabWidth &&
            UseTab == R.UseTab;
   }
 };
@@ -569,6 +717,13 @@ std::error_code parseConfiguration(StringRef Text, FormatStyle *Style);
 /// \brief Gets configuration in a YAML string.
 std::string configurationAsText(const FormatStyle &Style);
 
+/// \brief Returns the replacements necessary to sort all #include blocks that
+/// are affected by 'Ranges'.
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+                                   ArrayRef<tooling::Range> Ranges,
+                                   StringRef FileName,
+                                   unsigned *Cursor = nullptr);
+
 /// \brief Reformats the given \p Ranges in the file \p ID.
 ///
 /// Each range is extended on either end to its next bigger logic unit, i.e.
diff --git a/include/clang/Frontend/ASTUnit.h b/include/clang/Frontend/ASTUnit.h
index fa4bcf2edd22..a5f7af571439 100644
--- a/include/clang/Frontend/ASTUnit.h
+++ b/include/clang/Frontend/ASTUnit.h
@@ -719,7 +719,7 @@ public:
   ///
   /// \param Filename - The AST file to load.
   ///
-  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// \param PCHContainerRdr - The PCHContainerOperations to use for loading and
   /// creating modules.
   /// \param Diags - The diagnostics engine to use for reporting errors; its
   /// lifetime is expected to extend past that of the returned ASTUnit.
@@ -728,8 +728,8 @@ public:
   static std::unique_ptr<ASTUnit> LoadFromASTFile(
       const std::string &Filename, const PCHContainerReader &PCHContainerRdr,
       IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
-      const FileSystemOptions &FileSystemOpts, bool OnlyLocalDecls = false,
-      ArrayRef<RemappedFile> RemappedFiles = None,
+      const FileSystemOptions &FileSystemOpts, bool UseDebugInfo = false,
+      bool OnlyLocalDecls = false, ArrayRef<RemappedFile> RemappedFiles = None,
       bool CaptureDiagnostics = false, bool AllowPCHWithCompilerErrors = false,
       bool UserFilesAreVolatile = false);
 
@@ -737,14 +737,15 @@ private:
   /// \brief Helper function for \c LoadFromCompilerInvocation() and
   /// \c LoadFromCommandLine(), which loads an AST from a compiler invocation.
   ///
-  /// \param PrecompilePreamble Whether to precompile the preamble of this
-  /// translation unit, to improve the performance of reparsing.
+  /// \param PrecompilePreambleAfterNParses After how many parses the preamble
+  /// of this translation unit should be precompiled, to improve the performance
+  /// of reparsing. Set to zero to disable preambles.
   ///
   /// \returns \c true if a catastrophic failure occurred (which means that the
   /// \c ASTUnit itself is invalid), or \c false otherwise.
   bool LoadFromCompilerInvocation(
       std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-      bool PrecompilePreamble);
+      unsigned PrecompilePreambleAfterNParses);
 
 public:
   
@@ -783,7 +784,8 @@ public:
       ASTFrontendAction *Action = nullptr, ASTUnit *Unit = nullptr,
       bool Persistent = true, StringRef ResourceFilesPath = StringRef(),
       bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
-      bool PrecompilePreamble = false, bool CacheCodeCompletionResults = false,
+      unsigned PrecompilePreambleAfterNParses = 0,
+      bool CacheCodeCompletionResults = false,
       bool IncludeBriefCommentsInCodeCompletion = false,
       bool UserFilesAreVolatile = false,
       std::unique_ptr<ASTUnit> *ErrAST = nullptr);
@@ -805,8 +807,9 @@ public:
   static std::unique_ptr<ASTUnit> LoadFromCompilerInvocation(
       CompilerInvocation *CI,
       std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-      IntrusiveRefCntPtr<DiagnosticsEngine> Diags, bool OnlyLocalDecls = false,
-      bool CaptureDiagnostics = false, bool PrecompilePreamble = false,
+      IntrusiveRefCntPtr<DiagnosticsEngine> Diags, FileManager *FileMgr,
+      bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
+      unsigned PrecompilePreambleAfterNParses = 0,
       TranslationUnitKind TUKind = TU_Complete,
       bool CacheCodeCompletionResults = false,
       bool IncludeBriefCommentsInCodeCompletion = false,
@@ -827,6 +830,8 @@ public:
   ///
   /// \param ResourceFilesPath - The path to the compiler resource files.
   ///
+  /// \param ModuleFormat - If provided, uses the specific module format.
+  ///
   /// \param ErrAST - If non-null and parsing failed without any AST to return
   /// (e.g. because the PCH could not be loaded), this accepts the ASTUnit
   /// mainly to allow the caller to see the diagnostics.
@@ -840,11 +845,13 @@ public:
       bool OnlyLocalDecls = false, bool CaptureDiagnostics = false,
       ArrayRef<RemappedFile> RemappedFiles = None,
       bool RemappedFilesKeepOriginalName = true,
-      bool PrecompilePreamble = false, TranslationUnitKind TUKind = TU_Complete,
+      unsigned PrecompilePreambleAfterNParses = 0,
+      TranslationUnitKind TUKind = TU_Complete,
       bool CacheCodeCompletionResults = false,
       bool IncludeBriefCommentsInCodeCompletion = false,
       bool AllowPCHWithCompilerErrors = false, bool SkipFunctionBodies = false,
       bool UserFilesAreVolatile = false, bool ForSerialization = false,
+      llvm::Optional<StringRef> ModuleFormat = llvm::None,
       std::unique_ptr<ASTUnit> *ErrAST = nullptr);
 
   /// \brief Reparse the source files using the same command-line options that
@@ -909,7 +916,7 @@ public:
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
     { return nullptr; }
   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; };
+    { return 0; }
 };
 
 } // namespace clang
diff --git a/include/clang/Frontend/CodeGenOptions.def b/include/clang/Frontend/CodeGenOptions.def
index 803d0233046d..d9f6ab7f42d5 100644
--- a/include/clang/Frontend/CodeGenOptions.def
+++ b/include/clang/Frontend/CodeGenOptions.def
@@ -48,6 +48,9 @@ CODEGENOPT(DisableLLVMOpts   , 1, 0) ///< Don't run any optimizations, for use i
                                      ///< getting .bc files that correspond to the
                                      ///< internal state before optimizations are
                                      ///< done.
+CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get
+                                     ///< the pristine IR generated by the
+                                     ///< frontend.
 CODEGENOPT(DisableRedZone    , 1, 0) ///< Set when -mno-red-zone is enabled.
 CODEGENOPT(DisableTailCalls  , 1, 0) ///< Do not emit tail calls.
 CODEGENOPT(EmitDeclMetadata  , 1, 0) ///< Emit special metadata indicating what
@@ -57,6 +60,7 @@ CODEGENOPT(EmitDeclMetadata  , 1, 0) ///< Emit special metadata indicating what
 CODEGENOPT(EmitGcovArcs      , 1, 0) ///< Emit coverage data files, aka. GCDA.
 CODEGENOPT(EmitGcovNotes     , 1, 0) ///< Emit coverage "notes" files, aka GCNO.
 CODEGENOPT(EmitOpenCLArgMetadata , 1, 0) ///< Emit OpenCL kernel arg metadata.
+CODEGENOPT(EmulatedTLS       , 1, 0) ///< Set when -femulated-tls is enabled.
 /// \brief FP_CONTRACT mode (on/off/fast).
 ENUM_CODEGENOPT(FPContractMode, FPContractModeKind, 2, FPC_On)
 CODEGENOPT(ForbidGuardVariables , 1, 0) ///< Issue errors if C++ guard variables
@@ -69,6 +73,11 @@ CODEGENOPT(LessPreciseFPMAD  , 1, 0) ///< Enable less precise MAD instructions t
                                      ///< be generated.
 CODEGENOPT(PrepareForLTO     , 1, 0) ///< Set when -flto is enabled on the
                                      ///< compile step.
+CODEGENOPT(EmitFunctionSummary, 1, 0) ///< Set when -flto=thin is enabled on the
+                                      ///< compile step.
+CODEGENOPT(IncrementalLinkerCompatible, 1, 0) ///< Emit an object file which can
+                                              ///< be used with an incremental
+                                              ///< linker.
 CODEGENOPT(MergeAllConstants , 1, 1) ///< Merge identical constants.
 CODEGENOPT(MergeFunctions    , 1, 0) ///< Set when -fmerge-functions is enabled.
 CODEGENOPT(MSVolatile        , 1, 0) ///< Set when /volatile:ms is enabled.
@@ -114,6 +123,7 @@ CODEGENOPT(SanitizeMemoryTrackOrigins, 2, 0) ///< Enable tracking origins in
                                              ///< MemorySanitizer
 CODEGENOPT(SanitizeMemoryUseAfterDtor, 1, 0) ///< Enable use-after-delete detection
                                              ///< in MemorySanitizer
+CODEGENOPT(SanitizeCfiCrossDso, 1, 0) ///< Enable cross-dso support in CFI.
 CODEGENOPT(SanitizeCoverageType, 2, 0) ///< Type of sanitizer coverage
                                        ///< instrumentation.
 CODEGENOPT(SanitizeCoverageIndirectCalls, 1, 0) ///< Enable sanitizer coverage
@@ -127,6 +137,7 @@ CODEGENOPT(SanitizeCoverage8bitCounters, 1, 0) ///< Use 8-bit frequency counters
 CODEGENOPT(SimplifyLibCalls  , 1, 1) ///< Set when -fbuiltin is enabled.
 CODEGENOPT(SoftFloat         , 1, 0) ///< -soft-float.
 CODEGENOPT(StrictEnums       , 1, 0) ///< Optimize based on strict enum definition.
+CODEGENOPT(StrictVTablePointers, 1, 0) ///< Optimize based on the strict vtable pointers
 CODEGENOPT(TimePasses        , 1, 0) ///< Set when -ftime-report is enabled.
 CODEGENOPT(UnitAtATime       , 1, 1) ///< Unused. For mirroring GCC optimization
                                      ///< selection.
@@ -145,7 +156,7 @@ CODEGENOPT(UseRegisterSizedBitfieldAccess , 1, 0)
 CODEGENOPT(VerifyModule      , 1, 1) ///< Control whether the module should be run
                                      ///< through the LLVM Verifier.
 
-CODEGENOPT(StackRealignment  , 1, 0) ///< Control whether to permit stack
+CODEGENOPT(StackRealignment  , 1, 0) ///< Control whether to force stack
                                      ///< realignment.
 CODEGENOPT(UseInitArray      , 1, 0) ///< Control whether to use .init_array or
                                      ///< .ctors.
@@ -156,6 +167,13 @@ VALUE_CODEGENOPT(StackProbeSize    , 32, 4096) ///< Overrides default stack
 CODEGENOPT(DebugColumnInfo, 1, 0) ///< Whether or not to use column information
                                   ///< in debug info.
 
+CODEGENOPT(DebugTypeExtRefs, 1, 0) ///< Whether or not debug info should contain
+                                   ///< external references to a PCH or module.
+
+CODEGENOPT(DebugExplicitImport, 1, 0)  ///< Whether or not debug info should 
+                                       ///< contain explicit imports for 
+                                       ///< anonymous namespaces
+
 CODEGENOPT(EmitLLVMUseLists, 1, 0) ///< Control whether to serialize use-lists.
 
 /// The user specified number of registers to be used for integral arguments,
@@ -168,9 +186,17 @@ VALUE_CODEGENOPT(SSPBufferSize, 32, 0)
 /// The kind of generated debug info.
 ENUM_CODEGENOPT(DebugInfo, DebugInfoKind, 3, NoDebugInfo)
 
-/// Dwarf version.
+/// Tune the debug info for this debugger.
+ENUM_CODEGENOPT(DebuggerTuning, DebuggerKind, 2, DebuggerKindDefault)
+
+/// Dwarf version. Version zero indicates to LLVM that no DWARF should be
+/// emitted.
 VALUE_CODEGENOPT(DwarfVersion, 3, 0)
 
+/// Whether we should emit CodeView debug information. It's possible to emit
+/// CodeView and DWARF into the same object.
+CODEGENOPT(EmitCodeView, 1, 0)
+
 /// The kind of inlining to perform.
 ENUM_CODEGENOPT(Inlining, InliningMethod, 2, NoInlining)
 
diff --git a/include/clang/Frontend/CodeGenOptions.h b/include/clang/Frontend/CodeGenOptions.h
index 53246bcf22c1..fac6f1a038f3 100644
--- a/include/clang/Frontend/CodeGenOptions.h
+++ b/include/clang/Frontend/CodeGenOptions.h
@@ -16,6 +16,7 @@
 
 #include "clang/Basic/Sanitizers.h"
 #include "llvm/Support/Regex.h"
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
@@ -81,6 +82,13 @@ public:
     FullDebugInfo         /// Generate complete debug info.
   };
 
+  enum DebuggerKind {
+    DebuggerKindDefault,
+    DebuggerKindGDB,
+    DebuggerKindLLDB,
+    DebuggerKindSCE
+  };
+
   enum TLSModel {
     GeneralDynamicTLSModel,
     LocalDynamicTLSModel,
@@ -120,6 +128,8 @@ public:
   /// non-empty.
   std::string DwarfDebugFlags;
 
+  std::map<std::string, std::string> DebugPrefixMap;
+
   /// The ABI to use for passing floating point arguments.
   std::string FloatABI;
 
@@ -127,7 +137,7 @@ public:
   std::string LimitFloatPrecision;
 
   /// The name of the bitcode file to link before optzns.
-  std::string LinkBitcodeFile;
+  std::vector<std::pair<unsigned, std::string>> LinkBitcodeFiles;
 
   /// The user provided name for the "main file", if non-empty. This is useful
   /// in situations where the input file name does not match the original input
@@ -164,6 +174,13 @@ public:
   /// Name of the profile file to use as input for -fprofile-instr-use
   std::string InstrProfileInput;
 
+  /// Name of the function summary index file to use for ThinLTO function
+  /// importing.
+  std::string ThinLTOIndexFile;
+
+  /// The EABI version to use
+  std::string EABIVersion;
+
   /// A list of file names passed with -fcuda-include-gpubinary options to
   /// forward to CUDA runtime back-end for incorporating them into host-side
   /// object file.
diff --git a/include/clang/Frontend/CompilerInstance.h b/include/clang/Frontend/CompilerInstance.h
index 45e5ed12046f..83eed2cdc592 100644
--- a/include/clang/Frontend/CompilerInstance.h
+++ b/include/clang/Frontend/CompilerInstance.h
@@ -78,6 +78,9 @@ class CompilerInstance : public ModuleLoader {
   /// The target being compiled for.
   IntrusiveRefCntPtr<TargetInfo> Target;
 
+  /// Auxiliary Target info.
+  IntrusiveRefCntPtr<TargetInfo> AuxTarget;
+
   /// The virtual file system.
   IntrusiveRefCntPtr<vfs::FileSystem> VirtualFileSystem;
 
@@ -126,13 +129,6 @@ class CompilerInstance : public ModuleLoader {
   /// along with the module map
   llvm::DenseMap<const IdentifierInfo *, Module *> KnownModules;
 
-  /// \brief Module names that have an override for the target file.
-  llvm::StringMap<std::string> ModuleFileOverrides;
-
-  /// \brief Module files that we've explicitly loaded via \ref loadModuleFile,
-  /// and their dependencies.
-  llvm::StringSet<> ExplicitlyLoadedModuleFiles;
-
   /// \brief The location of the module-import keyword for the last module
   /// import. 
   SourceLocation LastModuleImportLoc;
@@ -355,9 +351,18 @@ public:
     return *Target;
   }
 
-  /// Replace the current diagnostics engine.
+  /// Replace the current Target.
   void setTarget(TargetInfo *Value);
 
+  /// }
+  /// @name AuxTarget Info
+  /// {
+
+  TargetInfo *getAuxTarget() const { return AuxTarget.get(); }
+
+  /// Replace the current AuxTarget.
+  void setAuxTarget(TargetInfo *Value);
+
   /// }
   /// @name Virtual File System
   /// {
@@ -650,6 +655,7 @@ public:
       StringRef Path, StringRef Sysroot, bool DisablePCHValidation,
       bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context,
       const PCHContainerReader &PCHContainerRdr,
+      ArrayRef<IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
       void *DeserializationListener, bool OwnDeserializationListener,
       bool Preamble, bool UseGlobalModuleIndex);
 
diff --git a/include/clang/Frontend/CompilerInvocation.h b/include/clang/Frontend/CompilerInvocation.h
index 7d125480439c..0b4a1e587e7e 100644
--- a/include/clang/Frontend/CompilerInvocation.h
+++ b/include/clang/Frontend/CompilerInvocation.h
@@ -72,7 +72,7 @@ public:
   ~CompilerInvocationBase();
 
   CompilerInvocationBase(const CompilerInvocationBase &X);
-  
+
   LangOptions *getLangOpts() { return LangOpts.get(); }
   const LangOptions *getLangOpts() const { return LangOpts.get(); }
 
diff --git a/include/clang/Frontend/DependencyOutputOptions.h b/include/clang/Frontend/DependencyOutputOptions.h
index 2221b54cf85f..129b5346c745 100644
--- a/include/clang/Frontend/DependencyOutputOptions.h
+++ b/include/clang/Frontend/DependencyOutputOptions.h
@@ -47,6 +47,9 @@ public:
   /// must contain at least one entry.
   std::vector<std::string> Targets;
 
+  /// A list of filenames to be used as extra dependencies for every target.
+  std::vector<std::string> ExtraDeps;
+
   /// \brief The file to write GraphViz-formatted header dependencies to.
   std::string DOTOutputFile;
 
diff --git a/include/clang/Frontend/DiagnosticRenderer.h b/include/clang/Frontend/DiagnosticRenderer.h
index 84a0f50823bc..c372fdd0872f 100644
--- a/include/clang/Frontend/DiagnosticRenderer.h
+++ b/include/clang/Frontend/DiagnosticRenderer.h
@@ -117,13 +117,15 @@ private:
   void emitCaret(SourceLocation Loc, DiagnosticsEngine::Level Level,
                  ArrayRef<CharSourceRange> Ranges, ArrayRef<FixItHint> Hints,
                  const SourceManager &SM);
+  void emitSingleMacroExpansion(SourceLocation Loc,
+                                DiagnosticsEngine::Level Level,
+                                ArrayRef<CharSourceRange> Ranges,
+                                const SourceManager &SM);
   void emitMacroExpansions(SourceLocation Loc,
                            DiagnosticsEngine::Level Level,
                            ArrayRef<CharSourceRange> Ranges,
                            ArrayRef<FixItHint> Hints,
-                           const SourceManager &SM,
-                           unsigned &MacroDepth,
-                           unsigned OnMacroInst = 0);
+                           const SourceManager &SM);
 public:
   /// \brief Emit a diagnostic.
   ///
diff --git a/include/clang/Frontend/FrontendOptions.h b/include/clang/Frontend/FrontendOptions.h
index c3aa226ea909..c800a5148e49 100644
--- a/include/clang/Frontend/FrontendOptions.h
+++ b/include/clang/Frontend/FrontendOptions.h
@@ -11,6 +11,7 @@
 #define LLVM_CLANG_FRONTEND_FRONTENDOPTIONS_H
 
 #include "clang/Frontend/CommandLineSourceLoc.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "clang/Sema/CodeCompleteOptions.h"
 #include "llvm/ADT/StringRef.h"
 #include <string>
@@ -91,7 +92,7 @@ class FrontendInputFile {
   bool IsSystem;
 
 public:
-  FrontendInputFile() : Buffer(nullptr), Kind(IK_None) { }
+  FrontendInputFile() : Buffer(nullptr), Kind(IK_None), IsSystem(false) { }
   FrontendInputFile(StringRef File, InputKind Kind, bool IsSystem = false)
     : File(File.str()), Buffer(nullptr), Kind(Kind), IsSystem(IsSystem) { }
   FrontendInputFile(llvm::MemoryBuffer *buffer, InputKind Kind,
@@ -147,6 +148,10 @@ public:
                                            ///< dumps in AST dumps.
   unsigned ASTDumpLookups : 1;             ///< Whether we include lookup table
                                            ///< dumps in AST dumps.
+  unsigned BuildingImplicitModule : 1;     ///< Whether we are performing an
+                                           ///< implicit module build.
+  unsigned ModulesEmbedAllFiles : 1;       ///< Whether we should embed all used
+                                           ///< files into the PCM file.
 
   CodeCompleteOptions CodeCompleteOpts;
 
@@ -234,6 +239,9 @@ public:
   /// The list of plugins to load.
   std::vector<std::string> Plugins;
 
+  /// The list of module file extensions.
+  std::vector<IntrusiveRefCntPtr<ModuleFileExtension>> ModuleFileExtensions;
+
   /// \brief The list of module map files to load before processing the input.
   std::vector<std::string> ModuleMapFiles;
 
@@ -241,6 +249,9 @@ public:
   /// processing the input.
   std::vector<std::string> ModuleFiles;
 
+  /// \brief The list of files to embed into the compiled module file.
+  std::vector<std::string> ModulesEmbedFiles;
+
   /// \brief The list of AST files to merge.
   std::vector<std::string> ASTMergeFiles;
 
@@ -251,7 +262,10 @@ public:
   /// \brief File name of the file that will provide record layouts
   /// (in the format produced by -fdump-record-layouts).
   std::string OverrideRecordLayoutsFile;
-  
+
+  /// \brief Auxiliary triple for CUDA compilation.
+  std::string AuxTriple;
+
 public:
   FrontendOptions() :
     DisableFree(false), RelocatablePCH(false), ShowHelp(false),
@@ -260,6 +274,7 @@ public:
     FixToTemporaries(false), ARCMTMigrateEmitARCErrors(false),
     SkipFunctionBodies(false), UseGlobalModuleIndex(true),
     GenerateGlobalModuleIndex(true), ASTDumpDecls(false), ASTDumpLookups(false),
+    BuildingImplicitModule(false), ModulesEmbedAllFiles(false),
     ARCMTAction(ARCMT_None), ObjCMTAction(ObjCMT_None),
     ProgramAction(frontend::ParseSyntaxOnly)
   {}
diff --git a/include/clang/Frontend/PCHContainerOperations.h b/include/clang/Frontend/PCHContainerOperations.h
index 868ea6866ad6..67c36cf08eb3 100644
--- a/include/clang/Frontend/PCHContainerOperations.h
+++ b/include/clang/Frontend/PCHContainerOperations.h
@@ -27,14 +27,12 @@ namespace clang {
 class ASTConsumer;
 class CodeGenOptions;
 class DiagnosticsEngine;
-class HeaderSearchOptions;
-class LangOptions;
-class PreprocessorOptions;
-class TargetOptions;
+class CompilerInstance;
 
 struct PCHBuffer {
-  bool IsComplete;
+  uint64_t Signature;
   llvm::SmallVector<char, 0> Data;
+  bool IsComplete;
 };
   
 /// This abstract interface provides operations for creating
@@ -49,9 +47,7 @@ public:
   /// PCHGenerator that produces a wrapper file format containing a
   /// serialized AST bitstream.
   virtual std::unique_ptr<ASTConsumer> CreatePCHContainerGenerator(
-      DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
-      const PreprocessorOptions &PPO, const TargetOptions &TO,
-      const LangOptions &LO, const std::string &MainFileName,
+      CompilerInstance &CI, const std::string &MainFileName,
       const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
       std::shared_ptr<PCHBuffer> Buffer) const = 0;
 };
@@ -78,9 +74,7 @@ class RawPCHContainerWriter : public PCHContainerWriter {
   /// Return an ASTConsumer that can be chained with a
   /// PCHGenerator that writes the module to a flat file.
   std::unique_ptr<ASTConsumer> CreatePCHContainerGenerator(
-      DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
-      const PreprocessorOptions &PPO, const TargetOptions &TO,
-      const LangOptions &LO, const std::string &MainFileName,
+      CompilerInstance &CI, const std::string &MainFileName,
       const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
       std::shared_ptr<PCHBuffer> Buffer) const override;
 };
diff --git a/include/clang/Frontend/SerializedDiagnosticReader.h b/include/clang/Frontend/SerializedDiagnosticReader.h
index 92e99d305da5..3db362bf3470 100644
--- a/include/clang/Frontend/SerializedDiagnosticReader.h
+++ b/include/clang/Frontend/SerializedDiagnosticReader.h
@@ -81,43 +81,43 @@ protected:
   /// \brief Visit the start of a diagnostic block.
   virtual std::error_code visitStartOfDiagnostic() {
     return std::error_code();
-  };
+  }
   /// \brief Visit the end of a diagnostic block.
-  virtual std::error_code visitEndOfDiagnostic() { return std::error_code(); };
+  virtual std::error_code visitEndOfDiagnostic() { return std::error_code(); }
   /// \brief Visit a category. This associates the category \c ID to a \c Name.
   virtual std::error_code visitCategoryRecord(unsigned ID, StringRef Name) {
     return std::error_code();
-  };
+  }
   /// \brief Visit a flag. This associates the flag's \c ID to a \c Name.
   virtual std::error_code visitDiagFlagRecord(unsigned ID, StringRef Name) {
     return std::error_code();
-  };
+  }
   /// \brief Visit a diagnostic.
   virtual std::error_code
   visitDiagnosticRecord(unsigned Severity, const Location &Location,
                         unsigned Category, unsigned Flag, StringRef Message) {
     return std::error_code();
-  };
+  }
   /// \brief Visit a filename. This associates the file's \c ID to a \c Name.
   virtual std::error_code visitFilenameRecord(unsigned ID, unsigned Size,
                                               unsigned Timestamp,
                                               StringRef Name) {
     return std::error_code();
-  };
+  }
   /// \brief Visit a fixit hint.
   virtual std::error_code
   visitFixitRecord(const Location &Start, const Location &End, StringRef Text) {
     return std::error_code();
-  };
+  }
   /// \brief Visit a source range.
   virtual std::error_code visitSourceRangeRecord(const Location &Start,
                                                  const Location &End) {
     return std::error_code();
-  };
+  }
   /// \brief Visit the version of the set of diagnostics.
   virtual std::error_code visitVersionRecord(unsigned Version) {
     return std::error_code();
-  };
+  }
 };
 
 } // end serialized_diags namespace
diff --git a/include/clang/Frontend/Utils.h b/include/clang/Frontend/Utils.h
index aa567b4c0279..a5f667ef7c7d 100644
--- a/include/clang/Frontend/Utils.h
+++ b/include/clang/Frontend/Utils.h
@@ -148,6 +148,9 @@ public:
 /// AttachHeaderIncludeGen - Create a header include list generator, and attach
 /// it to the given preprocessor.
 ///
+/// \param ExtraHeaders - If not empty, will write the header filenames, just
+/// like they were included during a regular preprocessing. Useful for
+/// implicit include dependencies, like sanitizer blacklists.
 /// \param ShowAllHeaders - If true, show all header information instead of just
 /// headers following the predefines buffer. This is useful for making sure
 /// includes mentioned on the command line are also reported, but differs from
@@ -156,7 +159,9 @@ public:
 /// information to, instead of writing to stderr.
 /// \param ShowDepth - Whether to indent to show the nesting of the includes.
 /// \param MSStyle - Whether to print in cl.exe /showIncludes style.
-void AttachHeaderIncludeGen(Preprocessor &PP, bool ShowAllHeaders = false,
+void AttachHeaderIncludeGen(Preprocessor &PP,
+                            const std::vector<std::string> &ExtraHeaders,
+                            bool ShowAllHeaders = false,
                             StringRef OutputPath = "",
                             bool ShowDepth = true, bool MSStyle = false);
 
diff --git a/include/clang/Lex/DirectoryLookup.h b/include/clang/Lex/DirectoryLookup.h
index 9edf11966091..20c4bb03ab6e 100644
--- a/include/clang/Lex/DirectoryLookup.h
+++ b/include/clang/Lex/DirectoryLookup.h
@@ -158,6 +158,8 @@ public:
   /// SearchPath at which the file was found. This only differs from the
   /// Filename for framework includes.
   ///
+  /// \param RequestingModule The module in which the lookup was performed.
+  ///
   /// \param SuggestedModule If non-null, and the file found is semantically
   /// part of a known module, this will be set to the module that should
   /// be imported instead of preprocessing/parsing the file found.
@@ -172,6 +174,7 @@ public:
   const FileEntry *LookupFile(StringRef &Filename, HeaderSearch &HS,
                               SmallVectorImpl<char> *SearchPath,
                               SmallVectorImpl<char> *RelativePath,
+                              Module *RequestingModule,
                               ModuleMap::KnownHeader *SuggestedModule,
                               bool &InUserSpecifiedSystemFramework,
                               bool &HasBeenMapped,
@@ -182,6 +185,7 @@ private:
       StringRef Filename, HeaderSearch &HS,
       SmallVectorImpl<char> *SearchPath,
       SmallVectorImpl<char> *RelativePath,
+      Module *RequestingModule,
       ModuleMap::KnownHeader *SuggestedModule,
       bool &InUserSpecifiedSystemHeader) const;
 
diff --git a/include/clang/Lex/HeaderSearch.h b/include/clang/Lex/HeaderSearch.h
index 4c1338010078..6d592e19c068 100644
--- a/include/clang/Lex/HeaderSearch.h
+++ b/include/clang/Lex/HeaderSearch.h
@@ -49,7 +49,8 @@ struct HeaderFileInfo {
   /// SrcMgr::CharacteristicKind.
   unsigned DirInfo : 2;
 
-  /// \brief Whether this header file info was supplied by an external source.
+  /// \brief Whether this header file info was supplied by an external source,
+  /// and has not changed since.
   unsigned External : 1;
 
   /// \brief Whether this header is part of a module.
@@ -58,10 +59,6 @@ struct HeaderFileInfo {
   /// \brief Whether this header is part of the module that we are building.
   unsigned isCompilingModuleHeader : 1;
 
-  /// \brief Whether this header is part of the module that we are building.
-  /// This is an instance of ModuleMap::ModuleHeaderRole.
-  unsigned HeaderRole : 2;
-  
   /// \brief Whether this structure is considered to already have been
   /// "resolved", meaning that it was loaded from the external source.
   unsigned Resolved : 1;
@@ -75,7 +72,7 @@ struct HeaderFileInfo {
   /// those framework headers.
   unsigned IndexHeaderMapHeader : 1;
 
-  /// \brief Whether this file had been looked up as a header.
+  /// \brief Whether this file has been looked up as a header.
   unsigned IsValid : 1;
   
   /// \brief The number of times the file has been included already.
@@ -105,7 +102,6 @@ struct HeaderFileInfo {
   HeaderFileInfo()
     : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User), 
       External(false), isModuleHeader(false), isCompilingModuleHeader(false),
-      HeaderRole(ModuleMap::NormalHeader),
       Resolved(false), IndexHeaderMapHeader(false), IsValid(0),
       NumIncludes(0), ControllingMacroID(0), ControllingMacro(nullptr)  {}
 
@@ -120,16 +116,6 @@ struct HeaderFileInfo {
     return isImport || isPragmaOnce || NumIncludes || ControllingMacro || 
       ControllingMacroID;
   }
-
-  /// \brief Get the HeaderRole properly typed.
-  ModuleMap::ModuleHeaderRole getHeaderRole() const {
-    return static_cast<ModuleMap::ModuleHeaderRole>(HeaderRole);
-  }
-
-  /// \brief Set the HeaderRole properly typed.
-  void setHeaderRole(ModuleMap::ModuleHeaderRole Role) {
-    HeaderRole = Role;
-  }
 };
 
 /// \brief An external source of header file information, which may supply
@@ -189,7 +175,7 @@ class HeaderSearch {
   
   /// \brief All of the preprocessor-specific data about files that are
   /// included, indexed by the FileEntry's UID.
-  std::vector<HeaderFileInfo> FileInfo;
+  mutable std::vector<HeaderFileInfo> FileInfo;
 
   /// Keeps track of each lookup performed by LookupFile.
   struct LookupFileCacheInfo {
@@ -396,7 +382,8 @@ public:
       const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir,
       ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,
       SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
-      ModuleMap::KnownHeader *SuggestedModule, bool SkipCache = false);
+      Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
+      bool SkipCache = false);
 
   /// \brief Look up a subframework for the specified \#include file.
   ///
@@ -405,11 +392,9 @@ public:
   /// HIToolbox is a subframework within Carbon.framework.  If so, return
   /// the FileEntry for the designated file, otherwise return null.
   const FileEntry *LookupSubframeworkHeader(
-      StringRef Filename,
-      const FileEntry *RelativeFileEnt,
-      SmallVectorImpl<char> *SearchPath,
-      SmallVectorImpl<char> *RelativePath,
-      ModuleMap::KnownHeader *SuggestedModule);
+      StringRef Filename, const FileEntry *RelativeFileEnt,
+      SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
+      Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule);
 
   /// \brief Look up the specified framework name in our framework cache.
   /// \returns The DirectoryEntry it is in if we know, null otherwise.
@@ -575,20 +560,51 @@ private:
   /// of the given search directory.
   void loadSubdirectoryModuleMaps(DirectoryLookup &SearchDir);
 
-  /// \brief Return the HeaderFileInfo structure for the specified FileEntry.
-  const HeaderFileInfo &getFileInfo(const FileEntry *FE) const {
-    return const_cast<HeaderSearch*>(this)->getFileInfo(FE);
-  }
+  /// \brief Find and suggest a usable module for the given file.
+  ///
+  /// \return \c true if the file can be used, \c false if we are not permitted to
+  ///         find this file due to requirements from \p RequestingModule.
+  bool findUsableModuleForHeader(const FileEntry *File,
+                                 const DirectoryEntry *Root,
+                                 Module *RequestingModule,
+                                 ModuleMap::KnownHeader *SuggestedModule,
+                                 bool IsSystemHeaderDir);
+
+  /// \brief Find and suggest a usable module for the given file, which is part of
+  /// the specified framework.
+  ///
+  /// \return \c true if the file can be used, \c false if we are not permitted to
+  ///         find this file due to requirements from \p RequestingModule.
+  bool findUsableModuleForFrameworkHeader(
+      const FileEntry *File, StringRef FrameworkDir, Module *RequestingModule,
+      ModuleMap::KnownHeader *SuggestedModule, bool IsSystemFramework);
+
+  /// \brief Look up the file with the specified name and determine its owning
+  /// module.
+  const FileEntry *
+  getFileAndSuggestModule(StringRef FileName, const DirectoryEntry *Dir,
+                          bool IsSystemHeaderDir, Module *RequestingModule,
+                          ModuleMap::KnownHeader *SuggestedModule);
 
 public:
   /// \brief Retrieve the module map.
   ModuleMap &getModuleMap() { return ModMap; }
   
+  /// \brief Retrieve the module map.
+  const ModuleMap &getModuleMap() const { return ModMap; }
+  
   unsigned header_file_size() const { return FileInfo.size(); }
 
-  /// \brief Get a \c HeaderFileInfo structure for the specified \c FileEntry,
-  /// if one exists.
-  bool tryGetFileInfo(const FileEntry *FE, HeaderFileInfo &Result) const;
+  /// \brief Return the HeaderFileInfo structure for the specified FileEntry,
+  /// in preparation for updating it in some way.
+  HeaderFileInfo &getFileInfo(const FileEntry *FE);
+
+  /// \brief Return the HeaderFileInfo structure for the specified FileEntry,
+  /// if it has ever been filled in.
+  /// \param WantExternal Whether the caller wants purely-external header file
+  ///        info (where \p External is true).
+  const HeaderFileInfo *getExistingFileInfo(const FileEntry *FE,
+                                            bool WantExternal = true) const;
 
   // Used by external tools
   typedef std::vector<DirectoryLookup>::const_iterator search_dir_iterator;
@@ -665,9 +681,6 @@ private:
   /// named directory.
   LoadModuleMapResult loadModuleMapFile(const DirectoryEntry *Dir,
                                         bool IsSystem, bool IsFramework);
-
-  /// \brief Return the HeaderFileInfo structure for the specified FileEntry.
-  HeaderFileInfo &getFileInfo(const FileEntry *FE);
 };
 
 }  // end namespace clang
diff --git a/include/clang/Lex/HeaderSearchOptions.h b/include/clang/Lex/HeaderSearchOptions.h
index 12f0447a1169..915dbf74b2a4 100644
--- a/include/clang/Lex/HeaderSearchOptions.h
+++ b/include/clang/Lex/HeaderSearchOptions.h
@@ -20,10 +20,11 @@
 namespace clang {
 
 namespace frontend {
-  /// IncludeDirGroup - Identifiers the group a include entry belongs to, which
-  /// represents its relative positive in the search list.  A \#include of a ""
-  /// path starts at the -iquote group, then searches the Angled group, then
-  /// searches the system group, etc.
+  /// IncludeDirGroup - Identifies the group an include Entry belongs to,
+  /// representing its relative positive in the search list.
+  /// \#include directives whose paths are enclosed by string quotes ("")
+  /// start searching at the Quoted group (specified by '-iquote'),
+  /// then search the Angled group, then the System group, etc.
   enum IncludeDirGroup {
     Quoted = 0,     ///< '\#include ""' paths, added by 'gcc -iquote'.
     Angled,         ///< Paths for '\#include <>' added by '-I'.
@@ -140,7 +141,7 @@ public:
 
   /// \brief The set of macro names that should be ignored for the purposes
   /// of computing the module hash.
-  llvm::SetVector<std::string> ModulesIgnoreMacros;
+  llvm::SmallSetVector<std::string, 16> ModulesIgnoreMacros;
 
   /// \brief The set of user-provided virtual filesystem overlay files.
   std::vector<std::string> VFSOverlayFiles;
@@ -168,7 +169,9 @@ public:
   /// \brief Whether to validate system input files when a module is loaded.
   unsigned ModulesValidateSystemHeaders : 1;
 
-public:
+  /// Whether the module includes debug information (-gmodules).
+  unsigned UseDebugInfo : 1;
+
   HeaderSearchOptions(StringRef _Sysroot = "/")
       : Sysroot(_Sysroot), ModuleFormat("raw"), DisableModuleHash(0),
         ImplicitModuleMaps(0), ModuleMapFileHomeIsCwd(0),
@@ -177,7 +180,8 @@ public:
         UseBuiltinIncludes(true), UseStandardSystemIncludes(true),
         UseStandardCXXIncludes(true), UseLibcxx(false), Verbose(false),
         ModulesValidateOncePerBuildSession(false),
-        ModulesValidateSystemHeaders(false) {}
+        ModulesValidateSystemHeaders(false),
+        UseDebugInfo(false) {}
 
   /// AddPath - Add the \p Path path to the specified \p Group list.
   void AddPath(StringRef Path, frontend::IncludeDirGroup Group,
diff --git a/include/clang/Lex/MacroInfo.h b/include/clang/Lex/MacroInfo.h
index 8b82a5bc9310..320645e2380c 100644
--- a/include/clang/Lex/MacroInfo.h
+++ b/include/clang/Lex/MacroInfo.h
@@ -158,17 +158,16 @@ public:
 
   /// \brief Set the specified list of identifiers as the argument list for
   /// this macro.
-  void setArgumentList(IdentifierInfo *const *List, unsigned NumArgs,
+  void setArgumentList(ArrayRef<IdentifierInfo *> List,
                        llvm::BumpPtrAllocator &PPAllocator) {
     assert(ArgumentList == nullptr && NumArguments == 0 &&
            "Argument list already set!");
-    if (NumArgs == 0)
+    if (List.empty())
       return;
 
-    NumArguments = NumArgs;
-    ArgumentList = PPAllocator.Allocate<IdentifierInfo *>(NumArgs);
-    for (unsigned i = 0; i != NumArgs; ++i)
-      ArgumentList[i] = List[i];
+    NumArguments = List.size();
+    ArgumentList = PPAllocator.Allocate<IdentifierInfo *>(List.size());
+    std::copy(List.begin(), List.end(), ArgumentList);
   }
 
   /// Arguments - The list of arguments for a function-like macro.  This can be
diff --git a/include/clang/Lex/ModuleMap.h b/include/clang/Lex/ModuleMap.h
index 0b03c4aa1cd7..155943e5453c 100644
--- a/include/clang/Lex/ModuleMap.h
+++ b/include/clang/Lex/ModuleMap.h
@@ -35,6 +35,22 @@ class DiagnosticConsumer;
 class DiagnosticsEngine;
 class HeaderSearch;
 class ModuleMapParser;
+
+/// \brief A mechanism to observe the actions of the module map parser as it
+/// reads module map files.
+class ModuleMapCallbacks {
+public:
+  virtual ~ModuleMapCallbacks() {}
+
+  /// \brief Called when a module map file has been read.
+  ///
+  /// \param FileStart A SourceLocation referring to the start of the file's
+  /// contents.
+  /// \param File The file itself.
+  /// \param IsSystem Whether this is a module map from a system include path.
+  virtual void moduleMapFileRead(SourceLocation FileStart,
+                                 const FileEntry &File, bool IsSystem) {}
+};
   
 class ModuleMap {
   SourceManager &SourceMgr;
@@ -42,6 +58,8 @@ class ModuleMap {
   const LangOptions &LangOpts;
   const TargetInfo *Target;
   HeaderSearch &HeaderInfo;
+
+  llvm::SmallVector<std::unique_ptr<ModuleMapCallbacks>, 1> Callbacks;
   
   /// \brief The directory used for Clang-supplied, builtin include headers,
   /// such as "stdint.h".
@@ -94,6 +112,13 @@ public:
     KnownHeader() : Storage(nullptr, NormalHeader) { }
     KnownHeader(Module *M, ModuleHeaderRole Role) : Storage(M, Role) { }
 
+    friend bool operator==(const KnownHeader &A, const KnownHeader &B) {
+      return A.Storage == B.Storage;
+    }
+    friend bool operator!=(const KnownHeader &A, const KnownHeader &B) {
+      return A.Storage != B.Storage;
+    }
+
     /// \brief Retrieve the module the header is stored in.
     Module *getModule() const { return Storage.getPointer(); }
 
@@ -224,6 +249,10 @@ private:
   KnownHeader findHeaderInUmbrellaDirs(const FileEntry *File,
                     SmallVectorImpl<const DirectoryEntry *> &IntermediateDirs);
 
+  /// \brief Given that \p File is not in the Headers map, look it up within
+  /// umbrella directories and find or create a module for it.
+  KnownHeader findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File);
+
   /// \brief A convenience method to determine if \p File is (possibly nested)
   /// in an umbrella directory.
   bool isHeaderInUmbrellaDirs(const FileEntry *File) {
@@ -263,6 +292,11 @@ public:
     BuiltinIncludeDir = Dir;
   }
 
+  /// \brief Add a module map callback.
+  void addModuleMapCallbacks(std::unique_ptr<ModuleMapCallbacks> Callback) {
+    Callbacks.push_back(std::move(Callback));
+  }
+
   /// \brief Retrieve the module that owns the given header file, if any.
   ///
   /// \param File The header file that is likely to be included.
@@ -272,6 +306,14 @@ public:
   /// that no module owns this header file.
   KnownHeader findModuleForHeader(const FileEntry *File);
 
+  /// \brief Retrieve all the modules that contain the given header file. This
+  /// may not include umbrella modules, nor information from external sources,
+  /// if they have not yet been inferred / loaded.
+  ///
+  /// Typically, \ref findModuleForHeader should be used instead, as it picks
+  /// the preferred module for the header.
+  ArrayRef<KnownHeader> findAllModulesForHeader(const FileEntry *File) const;
+
   /// \brief Reports errors if a module must not include a specific file.
   ///
   /// \param RequestingModule The module including a file.
@@ -436,7 +478,7 @@ public:
   /// \brief Adds this header to the given module.
   /// \param Role The role of the header wrt the module.
   void addHeader(Module *Mod, Module::Header Header,
-                 ModuleHeaderRole Role);
+                 ModuleHeaderRole Role, bool Imported = false);
 
   /// \brief Marks this header as being excluded from the given module.
   void excludeHeader(Module *Mod, Module::Header Header);
diff --git a/include/clang/Lex/PPCallbacks.h b/include/clang/Lex/PPCallbacks.h
index 3803533b49b1..68b8f1cc7348 100644
--- a/include/clang/Lex/PPCallbacks.h
+++ b/include/clang/Lex/PPCallbacks.h
@@ -201,19 +201,19 @@ public:
                              PragmaMessageKind Kind, StringRef Str) {
   }
 
-  /// \brief Callback invoked when a \#pragma gcc dianostic push directive
+  /// \brief Callback invoked when a \#pragma gcc diagnostic push directive
   /// is read.
   virtual void PragmaDiagnosticPush(SourceLocation Loc,
                                     StringRef Namespace) {
   }
 
-  /// \brief Callback invoked when a \#pragma gcc dianostic pop directive
+  /// \brief Callback invoked when a \#pragma gcc diagnostic pop directive
   /// is read.
   virtual void PragmaDiagnosticPop(SourceLocation Loc,
                                    StringRef Namespace) {
   }
 
-  /// \brief Callback invoked when a \#pragma gcc dianostic directive is read.
+  /// \brief Callback invoked when a \#pragma gcc diagnostic directive is read.
   virtual void PragmaDiagnostic(SourceLocation Loc, StringRef Namespace,
                                 diag::Severity mapping, StringRef Str) {}
 
diff --git a/include/clang/Lex/Pragma.h b/include/clang/Lex/Pragma.h
index 70fcfda6e754..274f0dad3dd0 100644
--- a/include/clang/Lex/Pragma.h
+++ b/include/clang/Lex/Pragma.h
@@ -76,7 +76,7 @@ public:
 /// used to ignore particular pragmas.
 class EmptyPragmaHandler : public PragmaHandler {
 public:
-  EmptyPragmaHandler();
+  explicit EmptyPragmaHandler(StringRef Name = StringRef());
 
   void HandlePragma(Preprocessor &PP, PragmaIntroducerKind Introducer,
                     Token &FirstToken) override;
diff --git a/include/clang/Lex/PreprocessingRecord.h b/include/clang/Lex/PreprocessingRecord.h
index 53367ab854f2..87b8ce1af0de 100644
--- a/include/clang/Lex/PreprocessingRecord.h
+++ b/include/clang/Lex/PreprocessingRecord.h
@@ -32,12 +32,12 @@ namespace clang {
 }
 
 /// \brief Allocates memory within a Clang preprocessing record.
-void* operator new(size_t bytes, clang::PreprocessingRecord& PR,
-                   unsigned alignment = 8) throw();
+void *operator new(size_t bytes, clang::PreprocessingRecord &PR,
+                   unsigned alignment = 8) LLVM_NOEXCEPT;
 
 /// \brief Frees memory allocated in a Clang preprocessing record.
 void operator delete(void *ptr, clang::PreprocessingRecord &PR,
-                     unsigned) throw();
+                     unsigned) LLVM_NOEXCEPT;
 
 namespace clang {
   class MacroDefinitionRecord;
@@ -98,27 +98,25 @@ namespace clang {
 
     // Only allow allocation of preprocessed entities using the allocator 
     // in PreprocessingRecord or by doing a placement new.
-    void* operator new(size_t bytes, PreprocessingRecord& PR,
-                       unsigned alignment = 8) throw() {
+    void *operator new(size_t bytes, PreprocessingRecord &PR,
+                       unsigned alignment = 8) LLVM_NOEXCEPT {
       return ::operator new(bytes, PR, alignment);
     }
-    
-    void* operator new(size_t bytes, void* mem) throw() {
-      return mem;
-    }
-    
-    void operator delete(void* ptr, PreprocessingRecord& PR, 
-                         unsigned alignment) throw() {
+
+    void *operator new(size_t bytes, void *mem) LLVM_NOEXCEPT { return mem; }
+
+    void operator delete(void *ptr, PreprocessingRecord &PR,
+                         unsigned alignment) LLVM_NOEXCEPT {
       return ::operator delete(ptr, PR, alignment);
     }
-    
-    void operator delete(void*, std::size_t) throw() { }
-    void operator delete(void*, void*) throw() { }
-    
+
+    void operator delete(void *, std::size_t) LLVM_NOEXCEPT {}
+    void operator delete(void *, void *) LLVM_NOEXCEPT {}
+
   private:
     // Make vanilla 'new' and 'delete' illegal for preprocessed entities.
-    void* operator new(size_t bytes) throw();
-    void operator delete(void* data) throw();
+    void *operator new(size_t bytes) LLVM_NOEXCEPT;
+    void operator delete(void *data) LLVM_NOEXCEPT;
   };
   
   /// \brief Records the presence of a preprocessor directive.
@@ -525,13 +523,13 @@ namespace clang {
   };
 } // end namespace clang
 
-inline void* operator new(size_t bytes, clang::PreprocessingRecord& PR,
-                          unsigned alignment) throw() {
+inline void *operator new(size_t bytes, clang::PreprocessingRecord &PR,
+                          unsigned alignment) LLVM_NOEXCEPT {
   return PR.Allocate(bytes, alignment);
 }
 
-inline void operator delete(void* ptr, clang::PreprocessingRecord& PR,
-                            unsigned) throw() {
+inline void operator delete(void *ptr, clang::PreprocessingRecord &PR,
+                            unsigned) LLVM_NOEXCEPT {
   PR.Deallocate(ptr);
 }
 
diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h
index b2f58ead0e75..f6154b6a49cd 100644
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@@ -24,7 +24,6 @@
 #include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/PTHLexer.h"
-#include "clang/Lex/PTHManager.h"
 #include "clang/Lex/TokenLexer.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -57,6 +56,7 @@ class CodeCompletionHandler;
 class DirectoryLookup;
 class PreprocessingRecord;
 class ModuleLoader;
+class PTHManager;
 class PreprocessorOptions;
 
 /// \brief Stores token information for comparing actual tokens with
@@ -98,6 +98,7 @@ class Preprocessor : public RefCountedBase<Preprocessor> {
   DiagnosticsEngine        *Diags;
   LangOptions       &LangOpts;
   const TargetInfo  *Target;
+  const TargetInfo  *AuxTarget;
   FileManager       &FileMgr;
   SourceManager     &SourceMgr;
   std::unique_ptr<ScratchBuffer> ScratchBuf;
@@ -656,7 +657,10 @@ public:
   ///
   /// \param Target is owned by the caller and must remain valid for the
   /// lifetime of the preprocessor.
-  void Initialize(const TargetInfo &Target);
+  /// \param AuxTarget is owned by the caller and must remain valid for
+  /// the lifetime of the preprocessor.
+  void Initialize(const TargetInfo &Target,
+                  const TargetInfo *AuxTarget = nullptr);
 
   /// \brief Initialize the preprocessor to parse a model file
   ///
@@ -678,6 +682,7 @@ public:
 
   const LangOptions &getLangOpts() const { return LangOpts; }
   const TargetInfo &getTargetInfo() const { return *Target; }
+  const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
   FileManager &getFileManager() const { return FileMgr; }
   SourceManager &getSourceManager() const { return SourceMgr; }
   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
diff --git a/include/clang/Lex/TokenLexer.h b/include/clang/Lex/TokenLexer.h
index 31197367c422..fdeed44d8f9b 100644
--- a/include/clang/Lex/TokenLexer.h
+++ b/include/clang/Lex/TokenLexer.h
@@ -175,7 +175,7 @@ private:
   /// macro, other active macros, and anything left on the current physical
   /// source line of the expanded buffer.  Handle this by returning the
   /// first token on the next line.
-  void HandleMicrosoftCommentPaste(Token &Tok);
+  void HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc);
 
   /// \brief If \p loc is a FileID and points inside the current macro
   /// definition, returns the appropriate source location pointing at the
diff --git a/include/clang/Parse/Parser.h b/include/clang/Parse/Parser.h
index 8719555be914..82b779879c20 100644
--- a/include/clang/Parse/Parser.h
+++ b/include/clang/Parse/Parser.h
@@ -163,6 +163,7 @@ class Parser : public CodeCompletionHandler {
   std::unique_ptr<PragmaHandler> MSConstSeg;
   std::unique_ptr<PragmaHandler> MSCodeSeg;
   std::unique_ptr<PragmaHandler> MSSection;
+  std::unique_ptr<PragmaHandler> MSRuntimeChecks;
   std::unique_ptr<PragmaHandler> OptimizeHandler;
   std::unique_ptr<PragmaHandler> LoopHintHandler;
   std::unique_ptr<PragmaHandler> UnrollHintHandler;
@@ -1236,6 +1237,7 @@ private:
                                                 ParsingDeclSpec &DS,
                                                 AccessSpecifier AS);
 
+  void SkipFunctionBody();
   Decl *ParseFunctionDefinition(ParsingDeclarator &D,
                  const ParsedTemplateInfo &TemplateInfo = ParsedTemplateInfo(),
                  LateParsedAttrList *LateParsedAttrs = nullptr);
@@ -1251,12 +1253,12 @@ private:
   DeclGroupPtrTy ParseObjCAtClassDeclaration(SourceLocation atLoc);
   Decl *ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
                                         ParsedAttributes &prefixAttrs);
+  class ObjCTypeParamListScope;
   ObjCTypeParamList *parseObjCTypeParamList();
   ObjCTypeParamList *parseObjCTypeParamListOrProtocolRefs(
-                           SourceLocation &lAngleLoc,
-                           SmallVectorImpl<IdentifierLocPair> &protocolIdents,
-                           SourceLocation &rAngleLoc,
-                           bool mayBeProtocolList = true);
+      ObjCTypeParamListScope &Scope, SourceLocation &lAngleLoc,
+      SmallVectorImpl<IdentifierLocPair> &protocolIdents,
+      SourceLocation &rAngleLoc, bool mayBeProtocolList = true);
 
   void HelperActionsForIvarDeclarations(Decl *interfaceDecl, SourceLocation atLoc,
                                         BalancedDelimiterTracker &T,
@@ -1578,7 +1580,9 @@ private:
                          SourceLocation Loc, bool ConvertToBoolean);
 
   //===--------------------------------------------------------------------===//
-  // C++ types
+  // C++ Coroutines
+
+  ExprResult ParseCoyieldExpression();
 
   //===--------------------------------------------------------------------===//
   // C99 6.7.8: Initialization.
@@ -2164,8 +2168,7 @@ private:
   void MaybeParseMicrosoftDeclSpecs(ParsedAttributes &Attrs,
                                     SourceLocation *End = nullptr) {
     const auto &LO = getLangOpts();
-    if ((LO.MicrosoftExt || LO.Borland || LO.CUDA) &&
-        Tok.is(tok::kw___declspec))
+    if (LO.DeclSpecKeyword && Tok.is(tok::kw___declspec))
       ParseMicrosoftDeclSpecs(Attrs, End);
   }
   void ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs,
@@ -2334,8 +2337,8 @@ private:
 
   void DiagnoseUnexpectedNamespace(NamedDecl *Context);
 
-  Decl *ParseNamespace(unsigned Context, SourceLocation &DeclEnd,
-                       SourceLocation InlineLoc = SourceLocation());
+  DeclGroupPtrTy ParseNamespace(unsigned Context, SourceLocation &DeclEnd,
+                                SourceLocation InlineLoc = SourceLocation());
   void ParseInnerNamespace(std::vector<SourceLocation>& IdentLoc,
                            std::vector<IdentifierInfo*>& Ident,
                            std::vector<SourceLocation>& NamespaceLoc,
@@ -2388,9 +2391,13 @@ private:
                                                  LateParsedAttrList &LateAttrs);
   void MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq(Declarator &D,
                                                                VirtSpecifiers &VS);
-  void ParseCXXClassMemberDeclaration(AccessSpecifier AS, AttributeList *Attr,
-                  const ParsedTemplateInfo &TemplateInfo = ParsedTemplateInfo(),
-                  ParsingDeclRAIIObject *DiagsFromTParams = nullptr);
+  DeclGroupPtrTy ParseCXXClassMemberDeclaration(
+      AccessSpecifier AS, AttributeList *Attr,
+      const ParsedTemplateInfo &TemplateInfo = ParsedTemplateInfo(),
+      ParsingDeclRAIIObject *DiagsFromTParams = nullptr);
+  DeclGroupPtrTy ParseCXXClassMemberDeclarationWithPragmas(
+      AccessSpecifier &AS, ParsedAttributesWithRange &AccessAttrs,
+      DeclSpec::TST TagType, Decl *TagDecl);
   void ParseConstructorInitializer(Decl *ConstructorDecl);
   MemInitResult ParseMemInitializer(Decl *ConstructorDecl);
   void HandleMemberFunctionDeclDelays(Declarator& DeclaratorInfo,
@@ -2471,7 +2478,9 @@ private:
   ///
   /// \param Kind Kind of current clause.
   ///
-  OMPClause *ParseOpenMPVarListClause(OpenMPClauseKind Kind);
+  OMPClause *ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
+                                      OpenMPClauseKind Kind);
+
 public:
   bool ParseUnqualifiedId(CXXScopeSpec &SS, bool EnteringContext,
                           bool AllowDestructorName,
@@ -2550,6 +2559,14 @@ private:
   //===--------------------------------------------------------------------===//
   // Modules
   DeclGroupPtrTy ParseModuleImport(SourceLocation AtLoc);
+  bool parseMisplacedModuleImport();
+  bool tryParseMisplacedModuleImport() {
+    tok::TokenKind Kind = Tok.getKind();
+    if (Kind == tok::annot_module_begin || Kind == tok::annot_module_end ||
+        Kind == tok::annot_module_include)
+      return parseMisplacedModuleImport();
+    return false;
+  }
 
   //===--------------------------------------------------------------------===//
   // C++11/G++: Type Traits [Type-Traits.html in the GCC manual]
diff --git a/include/clang/Sema/AttributeList.h b/include/clang/Sema/AttributeList.h
index 4d18633cd338..e32781d35fc7 100644
--- a/include/clang/Sema/AttributeList.h
+++ b/include/clang/Sema/AttributeList.h
@@ -16,11 +16,11 @@
 #define LLVM_CLANG_SEMA_ATTRIBUTELIST_H
 
 #include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/VersionTuple.h"
 #include "clang/Sema/Ownership.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/Allocator.h"
 #include <cassert>
 
@@ -137,11 +137,9 @@ private:
   AttributeList *NextInPool;
 
   /// Arguments, if any, are stored immediately following the object.
-  ArgsUnion *getArgsBuffer() {
-    return reinterpret_cast<ArgsUnion*>(this+1);
-  }
+  ArgsUnion *getArgsBuffer() { return reinterpret_cast<ArgsUnion *>(this + 1); }
   ArgsUnion const *getArgsBuffer() const {
-    return reinterpret_cast<ArgsUnion const *>(this+1);
+    return reinterpret_cast<ArgsUnion const *>(this + 1);
   }
 
   enum AvailabilitySlot {
@@ -466,7 +464,7 @@ public:
   bool hasVariadicArg() const;
   bool diagnoseAppertainsTo(class Sema &S, const Decl *D) const;
   bool diagnoseLangOpts(class Sema &S) const;
-  bool existsInTarget(const llvm::Triple &T) const;
+  bool existsInTarget(const TargetInfo &Target) const;
   bool isKnownToGCC() const;
 
   /// \brief If the parsed attribute has a semantic equivalent, and it would
@@ -559,8 +557,10 @@ public:
   /// Create a new pool for a factory.
   AttributePool(AttributeFactory &factory) : Factory(factory), Head(nullptr) {}
 
+  AttributePool(const AttributePool &) = delete;
+
   /// Move the given pool's allocations to this pool.
-  AttributePool(AttributePool &pool) : Factory(pool.Factory), Head(pool.Head) {
+  AttributePool(AttributePool &&pool) : Factory(pool.Factory), Head(pool.Head) {
     pool.Head = nullptr;
   }
 
@@ -854,7 +854,8 @@ enum AttributeDeclKind {
   ExpectedStructOrUnionOrTypedef,
   ExpectedStructOrTypedef,
   ExpectedObjectiveCInterfaceOrProtocol,
-  ExpectedKernelFunction
+  ExpectedKernelFunction,
+  ExpectedFunctionWithProtoType
 };
 
 }  // end namespace clang
diff --git a/include/clang/Sema/CodeCompleteOptions.h b/include/clang/Sema/CodeCompleteOptions.h
index e43496f55bdb..fc7713c79571 100644
--- a/include/clang/Sema/CodeCompleteOptions.h
+++ b/include/clang/Sema/CodeCompleteOptions.h
@@ -10,6 +10,8 @@
 #ifndef LLVM_CLANG_SEMA_CODECOMPLETEOPTIONS_H
 #define LLVM_CLANG_SEMA_CODECOMPLETEOPTIONS_H
 
+namespace clang {
+
 /// Options controlling the behavior of code completion.
 class CodeCompleteOptions {
 public:
@@ -33,5 +35,7 @@ public:
   { }
 };
 
+} // namespace clang
+
 #endif
 
diff --git a/include/clang/Sema/DeclSpec.h b/include/clang/Sema/DeclSpec.h
index 41d490063299..e9fdb707f277 100644
--- a/include/clang/Sema/DeclSpec.h
+++ b/include/clang/Sema/DeclSpec.h
@@ -31,7 +31,6 @@
 #include "clang/Lex/Token.h"
 #include "clang/Sema/AttributeList.h"
 #include "clang/Sema/Ownership.h"
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -41,14 +40,10 @@ namespace clang {
   class CXXRecordDecl;
   class TypeLoc;
   class LangOptions;
-  class DiagnosticsEngine;
   class IdentifierInfo;
   class NamespaceAliasDecl;
   class NamespaceDecl;
-  class NestedNameSpecifier;
-  class NestedNameSpecifierLoc;
   class ObjCDeclSpec;
-  class Preprocessor;
   class Sema;
   class Declarator;
   struct TemplateIdAnnotation;
@@ -70,8 +65,8 @@ class CXXScopeSpec {
   NestedNameSpecifierLocBuilder Builder;
 
 public:
-  const SourceRange &getRange() const { return Range; }
-  void setRange(const SourceRange &R) { Range = R; }
+  SourceRange getRange() const { return Range; }
+  void setRange(SourceRange R) { Range = R; }
   void setBeginLoc(SourceLocation Loc) { Range.setBegin(Loc); }
   void setEndLoc(SourceLocation Loc) { Range.setEnd(Loc); }
   SourceLocation getBeginLoc() const { return Range.getBegin(); }
@@ -301,6 +296,7 @@ public:
   static const TST TST_decltype_auto = clang::TST_decltype_auto;
   static const TST TST_underlyingType = clang::TST_underlyingType;
   static const TST TST_auto = clang::TST_auto;
+  static const TST TST_auto_type = clang::TST_auto_type;
   static const TST TST_unknown_anytype = clang::TST_unknown_anytype;
   static const TST TST_atomic = clang::TST_atomic;
   static const TST TST_error = clang::TST_error;
@@ -493,7 +489,7 @@ public:
   CXXScopeSpec &getTypeSpecScope() { return TypeScope; }
   const CXXScopeSpec &getTypeSpecScope() const { return TypeScope; }
 
-  const SourceRange &getSourceRange() const LLVM_READONLY { return Range; }
+  SourceRange getSourceRange() const LLVM_READONLY { return Range; }
   SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
 
@@ -512,7 +508,8 @@ public:
   void setTypeofParensRange(SourceRange range) { TypeofParensRange = range; }
 
   bool containsPlaceholderType() const {
-    return TypeSpecType == TST_auto || TypeSpecType == TST_decltype_auto;
+    return (TypeSpecType == TST_auto || TypeSpecType == TST_auto_type ||
+            TypeSpecType == TST_decltype_auto);
   }
 
   bool hasTagDefinition() const;
@@ -742,8 +739,7 @@ public:
   /// Finish - This does final analysis of the declspec, issuing diagnostics for
   /// things like "_Imaginary" (lacking an FP type).  After calling this method,
   /// DeclSpec is guaranteed self-consistent, even if an error occurred.
-  void Finish(DiagnosticsEngine &D, Preprocessor &PP,
-              const PrintingPolicy &Policy);
+  void Finish(Sema &S, const PrintingPolicy &Policy);
 
   const WrittenBuiltinSpecs& getWrittenBuiltinSpecs() const {
     return writtenBS;
@@ -1255,8 +1251,11 @@ struct DeclaratorChunk {
     /// any.
     unsigned MutableLoc;
 
-    /// \brief The location of the keyword introducing the spec, if any.
-    unsigned ExceptionSpecLoc;
+    /// \brief The beginning location of the exception specification, if any.
+    unsigned ExceptionSpecLocBeg;
+
+    /// \brief The end location of the exception specification, if any.
+    unsigned ExceptionSpecLocEnd;
 
     /// Params - This is a pointer to a new[]'d array of ParamInfo objects that
     /// describe the parameters specified by this function declarator.  null if
@@ -1323,8 +1322,16 @@ struct DeclaratorChunk {
       return SourceLocation::getFromRawEncoding(RParenLoc);
     }
 
-    SourceLocation getExceptionSpecLoc() const {
-      return SourceLocation::getFromRawEncoding(ExceptionSpecLoc);
+    SourceLocation getExceptionSpecLocBeg() const {
+      return SourceLocation::getFromRawEncoding(ExceptionSpecLocBeg);
+    }
+
+    SourceLocation getExceptionSpecLocEnd() const {
+      return SourceLocation::getFromRawEncoding(ExceptionSpecLocEnd);
+    }
+
+    SourceRange getExceptionSpecRange() const {
+      return SourceRange(getExceptionSpecLocBeg(), getExceptionSpecLocEnd());
     }
 
     /// \brief Retrieve the location of the ref-qualifier, if any.
@@ -1496,7 +1503,7 @@ struct DeclaratorChunk {
                                      SourceLocation RestrictQualifierLoc,
                                      SourceLocation MutableLoc,
                                      ExceptionSpecificationType ESpecType,
-                                     SourceLocation ESpecLoc,
+                                     SourceRange ESpecRange,
                                      ParsedType *Exceptions,
                                      SourceRange *ExceptionRanges,
                                      unsigned NumExceptions,
@@ -1704,7 +1711,7 @@ public:
   }
 
   /// \brief Get the source range that spans this declarator.
-  const SourceRange &getSourceRange() const LLVM_READONLY { return Range; }
+  SourceRange getSourceRange() const LLVM_READONLY { return Range; }
   SourceLocation getLocStart() const LLVM_READONLY { return Range.getBegin(); }
   SourceLocation getLocEnd() const LLVM_READONLY { return Range.getEnd(); }
 
@@ -1724,7 +1731,7 @@ public:
   /// given declspec, unless its location is invalid. Adopts the range start if
   /// the current range start is invalid.
   void ExtendWithDeclSpec(const DeclSpec &DS) {
-    const SourceRange &SR = DS.getSourceRange();
+    SourceRange SR = DS.getSourceRange();
     if (Range.getBegin().isInvalid())
       Range.setBegin(SR.getBegin());
     if (!SR.getEnd().isInvalid())
@@ -2197,6 +2204,9 @@ public:
   /// redeclaration time if the decl is static.
   bool isStaticMember();
 
+  /// Returns true if this declares a constructor or a destructor.
+  bool isCtorOrDtor();
+
   void setRedeclaration(bool Val) { Redeclaration = Val; }
   bool isRedeclaration() const { return Redeclaration; }
 };
@@ -2251,6 +2261,13 @@ private:
   SourceLocation LastLocation;
 };
 
+enum class LambdaCaptureInitKind {
+  NoInit,     //!< [a]
+  CopyInit,   //!< [a = b], [a = {b}]
+  DirectInit, //!< [a(b)]
+  ListInit    //!< [a{b}]
+};
+
 /// \brief Represents a complete lambda introducer.
 struct LambdaIntroducer {
   /// \brief An individual capture in a lambda introducer.
@@ -2259,13 +2276,15 @@ struct LambdaIntroducer {
     SourceLocation Loc;
     IdentifierInfo *Id;
     SourceLocation EllipsisLoc;
+    LambdaCaptureInitKind InitKind;
     ExprResult Init;
     ParsedType InitCaptureType;
     LambdaCapture(LambdaCaptureKind Kind, SourceLocation Loc,
                   IdentifierInfo *Id, SourceLocation EllipsisLoc,
-                  ExprResult Init, ParsedType InitCaptureType)
-        : Kind(Kind), Loc(Loc), Id(Id), EllipsisLoc(EllipsisLoc), Init(Init),
-          InitCaptureType(InitCaptureType) {}
+                  LambdaCaptureInitKind InitKind, ExprResult Init,
+                  ParsedType InitCaptureType)
+        : Kind(Kind), Loc(Loc), Id(Id), EllipsisLoc(EllipsisLoc),
+          InitKind(InitKind), Init(Init), InitCaptureType(InitCaptureType) {}
   };
 
   SourceRange Range;
@@ -2281,10 +2300,11 @@ struct LambdaIntroducer {
                   SourceLocation Loc,
                   IdentifierInfo* Id,
                   SourceLocation EllipsisLoc,
+                  LambdaCaptureInitKind InitKind,
                   ExprResult Init, 
                   ParsedType InitCaptureType) {
-    Captures.push_back(LambdaCapture(Kind, Loc, Id, EllipsisLoc, Init, 
-        InitCaptureType));
+    Captures.push_back(LambdaCapture(Kind, Loc, Id, EllipsisLoc, InitKind, Init,
+                                     InitCaptureType));
   }
 };
 
diff --git a/include/clang/Sema/ExternalSemaSource.h b/include/clang/Sema/ExternalSemaSource.h
index ef3d2dbff142..97f78f46a613 100644
--- a/include/clang/Sema/ExternalSemaSource.h
+++ b/include/clang/Sema/ExternalSemaSource.h
@@ -139,7 +139,7 @@ public:
   /// be invoked multiple times; the external source should take care not to
   /// introduce the same declarations repeatedly.
   virtual void ReadUnusedLocalTypedefNameCandidates(
-      llvm::SmallSetVector<const TypedefNameDecl *, 4> &Decls) {};
+      llvm::SmallSetVector<const TypedefNameDecl *, 4> &Decls) {}
 
   /// \brief Read the set of referenced selectors known to the
   /// external Sema source.
diff --git a/include/clang/Sema/Initialization.h b/include/clang/Sema/Initialization.h
index 74de00ff2571..d4f57b701127 100644
--- a/include/clang/Sema/Initialization.h
+++ b/include/clang/Sema/Initialization.h
@@ -828,6 +828,9 @@ public:
     /// \brief Initializer has a placeholder type which cannot be
     /// resolved by initialization.
     FK_PlaceholderType,
+    /// \brief Trying to take the address of a function that doesn't support
+    /// having its address taken.
+    FK_AddressOfUnaddressableFunction,
     /// \brief List-copy-initialization chose an explicit constructor.
     FK_ExplicitConstructor
   };
diff --git a/include/clang/Sema/Lookup.h b/include/clang/Sema/Lookup.h
index 5bfee8b0d037..87c40f0cf206 100644
--- a/include/clang/Sema/Lookup.h
+++ b/include/clang/Sema/Lookup.h
@@ -139,7 +139,7 @@ public:
       Redecl(Redecl != Sema::NotForRedeclaration),
       HideTags(true),
       Diagnose(Redecl == Sema::NotForRedeclaration),
-      AllowHidden(Redecl == Sema::ForRedeclaration),
+      AllowHidden(false),
       Shadowed(false)
   {
     configure();
@@ -161,7 +161,7 @@ public:
       Redecl(Redecl != Sema::NotForRedeclaration),
       HideTags(true),
       Diagnose(Redecl == Sema::NotForRedeclaration),
-      AllowHidden(Redecl == Sema::ForRedeclaration),
+      AllowHidden(false),
       Shadowed(false)
   {
     configure();
@@ -228,10 +228,11 @@ public:
 
   /// \brief Determine whether this lookup is permitted to see hidden
   /// declarations, such as those in modules that have not yet been imported.
-  bool isHiddenDeclarationVisible() const {
-    return AllowHidden || LookupKind == Sema::LookupTagName;
+  bool isHiddenDeclarationVisible(NamedDecl *ND) const {
+    return AllowHidden ||
+           (isForRedeclaration() && ND->isExternallyVisible());
   }
-  
+
   /// Sets whether tag declarations should be hidden by non-tag
   /// declarations during resolution.  The default is true.
   void setHideTags(bool Hide) {
@@ -302,7 +303,7 @@ public:
     if (!D->isInIdentifierNamespace(IDNS))
       return nullptr;
 
-    if (isHiddenDeclarationVisible() || isVisible(getSema(), D))
+    if (isVisible(getSema(), D) || isHiddenDeclarationVisible(D))
       return D;
 
     return getAcceptableDeclSlow(D);
@@ -511,7 +512,6 @@ public:
   /// \brief Change this lookup's redeclaration kind.
   void setRedeclarationKind(Sema::RedeclarationKind RK) {
     Redecl = RK;
-    AllowHidden = (RK == Sema::ForRedeclaration);
     configure();
   }
 
@@ -565,6 +565,11 @@ public:
     {}
 
   public:
+    Filter(Filter &&F)
+        : Results(F.Results), I(F.I), Changed(F.Changed),
+          CalledDone(F.CalledDone) {
+      F.CalledDone = true;
+    }
     ~Filter() {
       assert(CalledDone &&
              "LookupResult::Filter destroyed without done() call");
diff --git a/include/clang/Sema/MultiplexExternalSemaSource.h b/include/clang/Sema/MultiplexExternalSemaSource.h
index af7083a82de7..d6daadc89da5 100644
--- a/include/clang/Sema/MultiplexExternalSemaSource.h
+++ b/include/clang/Sema/MultiplexExternalSemaSource.h
@@ -102,29 +102,12 @@ public:
   /// \brief Finds all declarations lexically contained within the given
   /// DeclContext, after applying an optional filter predicate.
   ///
-  /// \param isKindWeWant a predicate function that returns true if the passed
-  /// declaration kind is one we are looking for. If NULL, all declarations
-  /// are returned.
-  ///
-  /// \return an indication of whether the load succeeded or failed.
-  ExternalLoadResult FindExternalLexicalDecls(const DeclContext *DC,
-                                bool (*isKindWeWant)(Decl::Kind),
-                                SmallVectorImpl<Decl*> &Result) override;
-
-  /// \brief Finds all declarations lexically contained within the given
-  /// DeclContext.
-  ///
-  /// \return true if an error occurred
-  ExternalLoadResult FindExternalLexicalDecls(const DeclContext *DC,
-                                SmallVectorImpl<Decl*> &Result) {
-    return FindExternalLexicalDecls(DC, nullptr, Result);
-  }
-
-  template <typename DeclTy>
-  ExternalLoadResult FindExternalLexicalDeclsBy(const DeclContext *DC,
-                                  SmallVectorImpl<Decl*> &Result) {
-    return FindExternalLexicalDecls(DC, DeclTy::classofKind, Result);
-  }
+  /// \param IsKindWeWant a predicate function that returns true if the passed
+  /// declaration kind is one we are looking for.
+  void
+  FindExternalLexicalDecls(const DeclContext *DC,
+                           llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+                           SmallVectorImpl<Decl *> &Result) override;
 
   /// \brief Get the decls that are contained in a file in the Offset/Length
   /// range. \p Length can be 0 to indicate a point at \p Offset instead of
diff --git a/include/clang/Sema/Overload.h b/include/clang/Sema/Overload.h
index 2007dcb59ac5..20958b057a4f 100644
--- a/include/clang/Sema/Overload.h
+++ b/include/clang/Sema/Overload.h
@@ -83,7 +83,8 @@ namespace clang {
     ICK_TransparentUnionConversion, ///< Transparent Union Conversions
     ICK_Writeback_Conversion,  ///< Objective-C ARC writeback conversion
     ICK_Zero_Event_Conversion, ///< Zero constant to event (OpenCL1.2 6.12.10)
-    ICK_Num_Conversion_Kinds   ///< The number of conversion kinds
+    ICK_C_Only_Conversion,     ///< Conversions allowed in C, but not C++
+    ICK_Num_Conversion_Kinds,  ///< The number of conversion kinds
   };
 
   /// ImplicitConversionRank - The rank of an implicit conversion
@@ -95,7 +96,9 @@ namespace clang {
     ICR_Promotion,               ///< Promotion
     ICR_Conversion,              ///< Conversion
     ICR_Complex_Real_Conversion, ///< Complex <-> Real conversion
-    ICR_Writeback_Conversion     ///< ObjC ARC writeback conversion
+    ICR_Writeback_Conversion,    ///< ObjC ARC writeback conversion
+    ICR_C_Conversion             ///< Conversion only allowed in the C standard.
+                                 ///  (e.g. void* to char*)
   };
 
   ImplicitConversionRank GetConversionRank(ImplicitConversionKind Kind);
diff --git a/include/clang/Sema/ScopeInfo.h b/include/clang/Sema/ScopeInfo.h
index 15ee8a4ba7cd..d13667e80070 100644
--- a/include/clang/Sema/ScopeInfo.h
+++ b/include/clang/Sema/ScopeInfo.h
@@ -89,40 +89,43 @@ protected:
 public:
   /// \brief What kind of scope we are describing.
   ///
-  ScopeKind Kind;
+  ScopeKind Kind : 3;
 
   /// \brief Whether this function contains a VLA, \@try, try, C++
   /// initializer, or anything else that can't be jumped past.
-  bool HasBranchProtectedScope;
+  bool HasBranchProtectedScope : 1;
 
   /// \brief Whether this function contains any switches or direct gotos.
-  bool HasBranchIntoScope;
+  bool HasBranchIntoScope : 1;
 
   /// \brief Whether this function contains any indirect gotos.
-  bool HasIndirectGoto;
+  bool HasIndirectGoto : 1;
 
   /// \brief Whether a statement was dropped because it was invalid.
-  bool HasDroppedStmt;
+  bool HasDroppedStmt : 1;
 
   /// A flag that is set when parsing a method that must call super's
   /// implementation, such as \c -dealloc, \c -finalize, or any method marked
   /// with \c __attribute__((objc_requires_super)).
-  bool ObjCShouldCallSuper;
+  bool ObjCShouldCallSuper : 1;
 
   /// True when this is a method marked as a designated initializer.
-  bool ObjCIsDesignatedInit;
+  bool ObjCIsDesignatedInit : 1;
   /// This starts true for a method marked as designated initializer and will
   /// be set to false if there is an invocation to a designated initializer of
   /// the super class.
-  bool ObjCWarnForNoDesignatedInitChain;
+  bool ObjCWarnForNoDesignatedInitChain : 1;
 
   /// True when this is an initializer method not marked as a designated
   /// initializer within a class that has at least one initializer marked as a
   /// designated initializer.
-  bool ObjCIsSecondaryInit;
+  bool ObjCIsSecondaryInit : 1;
   /// This starts true for a secondary initializer method and will be set to
   /// false if there is an invocation of an initializer on 'self'.
-  bool ObjCWarnForNoInitDelegation;
+  bool ObjCWarnForNoInitDelegation : 1;
+
+  /// First 'return' statement in the current function.
+  SourceLocation FirstReturnLoc;
 
   /// First C++ 'try' statement in the current function.
   SourceLocation FirstCXXTryLoc;
@@ -142,6 +145,14 @@ public:
   /// optimization, or if we need to infer a return type.
   SmallVector<ReturnStmt*, 4> Returns;
 
+  /// \brief The promise object for this coroutine, if any.
+  VarDecl *CoroutinePromise;
+
+  /// \brief The list of coroutine control flow constructs (co_await, co_yield,
+  /// co_return) that occur within the function or block. Empty if and only if
+  /// this function or block is not (yet known to be) a coroutine.
+  SmallVector<Stmt*, 4> CoroutineStmts;
+
   /// \brief The stack of currently active compound stamement scopes in the
   /// function.
   SmallVector<CompoundScopeInfo, 4> CompoundScopes;
@@ -153,7 +164,7 @@ public:
   
   /// \brief A list of parameters which have the nonnull attribute and are
   /// modified in the function.
-  llvm::SmallPtrSet<const ParmVarDecl*, 8>  ModifiedNonNullParams;
+  llvm::SmallPtrSet<const ParmVarDecl*, 8> ModifiedNonNullParams;
 
 public:
   /// Represents a simple identification of a weak object.
@@ -291,6 +302,9 @@ private:
   /// Part of the implementation of -Wrepeated-use-of-weak.
   WeakObjectUseMap WeakObjectUses;
 
+protected:
+  FunctionScopeInfo(const FunctionScopeInfo&) = default;
+
 public:
   /// Record that a weak object was accessed.
   ///
@@ -364,6 +378,9 @@ public:
 };
 
 class CapturingScopeInfo : public FunctionScopeInfo {
+protected:
+  CapturingScopeInfo(const CapturingScopeInfo&) = default;
+
 public:
   enum ImplicitCaptureStyle {
     ImpCap_None, ImpCap_LambdaByval, ImpCap_LambdaByref, ImpCap_Block,
@@ -549,7 +566,7 @@ public:
 };
 
 /// \brief Retains information about a block that is currently being parsed.
-class BlockScopeInfo : public CapturingScopeInfo {
+class BlockScopeInfo final : public CapturingScopeInfo {
 public:
   BlockDecl *TheDecl;
   
@@ -576,7 +593,7 @@ public:
 };
 
 /// \brief Retains information about a captured region.
-class CapturedRegionScopeInfo: public CapturingScopeInfo {
+class CapturedRegionScopeInfo final : public CapturingScopeInfo {
 public:
   /// \brief The CapturedDecl for this statement.
   CapturedDecl *TheCapturedDecl;
@@ -617,7 +634,7 @@ public:
   }
 };
 
-class LambdaScopeInfo : public CapturingScopeInfo {
+class LambdaScopeInfo final : public CapturingScopeInfo {
 public:
   /// \brief The class that describes the lambda.
   CXXRecordDecl *Lambda;
@@ -697,8 +714,6 @@ public:
     Kind = SK_Lambda;
   }
 
-  ~LambdaScopeInfo() override;
-
   /// \brief Note when all explicit captures have been added.
   void finishedExplicitCaptures() {
     NumExplicitCaptures = Captures.size();
diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h
index 72044336a83b..7873843ab3bd 100644
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -145,6 +145,7 @@ namespace clang {
   class ObjCProtocolDecl;
   class OMPThreadPrivateDecl;
   class OMPClause;
+  struct OverloadCandidate;
   class OverloadCandidateSet;
   class OverloadExpr;
   class ParenListExpr;
@@ -277,10 +278,9 @@ class Sema {
     // it will keep having external linkage. If it has internal linkage, we
     // will not link it. Since it has no previous decls, it will remain
     // with internal linkage.
-    if (getLangOpts().ModulesHideInternalLinkage)
-      return isVisible(Old) || New->isExternallyVisible();
-    return true;
+    return isVisible(Old) || New->isExternallyVisible();
   }
+  bool shouldLinkPossiblyHiddenDecl(LookupResult &Old, const NamedDecl *New);
 
 public:
   typedef OpaquePtr<DeclGroupRef> DeclGroupPtrTy;
@@ -903,6 +903,10 @@ public:
   /// for C++ records.
   llvm::FoldingSet<SpecialMemberOverloadResult> SpecialMemberCache;
 
+  /// \brief A cache of the flags available in enumerations with the flag_bits
+  /// attribute.
+  mutable llvm::DenseMap<const EnumDecl*, llvm::APInt> FlagBitsCache;
+
   /// \brief The kind of translation unit we are processing.
   ///
   /// When we're processing a complete translation unit, Sema will perform
@@ -1004,6 +1008,24 @@ public:
     bool OldFPContractState : 1;
   };
 
+  /// Records and restores the vtordisp state on entry/exit of C++ method body.
+  class VtorDispStackRAII {
+  public:
+    VtorDispStackRAII(Sema &S, bool ShouldSaveAndRestore)
+      : S(S), ShouldSaveAndRestore(ShouldSaveAndRestore), OldVtorDispStack() {
+      if (ShouldSaveAndRestore)
+        OldVtorDispStack = S.VtorDispModeStack;
+    }
+    ~VtorDispStackRAII() {
+      if (ShouldSaveAndRestore)
+        S.VtorDispModeStack = OldVtorDispStack;
+    }
+  private:
+    Sema &S;
+    bool ShouldSaveAndRestore;
+    SmallVector<MSVtorDispAttr::Mode, 2> OldVtorDispStack;
+  };
+
   void addImplicitTypedef(StringRef Name, QualType T);
 
 public:
@@ -1054,6 +1076,14 @@ public:
     SemaDiagnosticBuilder(DiagnosticBuilder &DB, Sema &SemaRef, unsigned DiagID)
       : DiagnosticBuilder(DB), SemaRef(SemaRef), DiagID(DiagID) { }
 
+    // This is a cunning lie. DiagnosticBuilder actually performs move
+    // construction in its copy constructor (but due to varied uses, it's not
+    // possible to conveniently express this as actual move construction). So
+    // the default copy ctor here is fine, because the base class disables the
+    // source anyway, so the user-defined ~SemaDiagnosticBuilder is a safe no-op
+    // in that case anwyay.
+    SemaDiagnosticBuilder(const SemaDiagnosticBuilder&) = default;
+
     ~SemaDiagnosticBuilder() {
       // If we aren't active, there is nothing to do.
       if (!isActive()) return;
@@ -1200,16 +1230,6 @@ public:
 
   bool CheckFunctionReturnType(QualType T, SourceLocation Loc);
 
-  unsigned deduceWeakPropertyFromType(QualType T) {
-    if ((getLangOpts().getGC() != LangOptions::NonGC &&
-         T.isObjCGCWeak()) ||
-        (getLangOpts().ObjCAutoRefCount &&
-         T.getObjCLifetime() == Qualifiers::OCL_Weak))
-        return ObjCDeclSpec::DQ_PR_weak;
-    return 0;
-  }
-
-
   /// \brief Build a function type.
   ///
   /// This routine checks the function type according to C++ rules and
@@ -1266,7 +1286,7 @@ public:
                                                 const FunctionProtoType *FPT);
   void UpdateExceptionSpec(FunctionDecl *FD,
                            const FunctionProtoType::ExceptionSpecInfo &ESI);
-  bool CheckSpecifiedExceptionType(QualType &T, const SourceRange &Range);
+  bool CheckSpecifiedExceptionType(QualType &T, SourceRange Range);
   bool CheckDistantExceptionSpec(QualType T);
   bool CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New);
   bool CheckEquivalentExceptionSpec(
@@ -1296,9 +1316,7 @@ public:
 
   /// \brief Abstract class used to diagnose incomplete types.
   struct TypeDiagnoser {
-    bool Suppressed;
-
-    TypeDiagnoser(bool Suppressed = false) : Suppressed(Suppressed) { }
+    TypeDiagnoser() {}
 
     virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) = 0;
     virtual ~TypeDiagnoser() {}
@@ -1328,17 +1346,17 @@ public:
     void emit(const SemaDiagnosticBuilder &DB,
               llvm::index_sequence<Is...>) const {
       // Apply all tuple elements to the builder in order.
-      bool Dummy[] = {(DB << getPrintable(std::get<Is>(Args)))...};
+      bool Dummy[] = {false, (DB << getPrintable(std::get<Is>(Args)))...};
       (void)Dummy;
     }
 
   public:
     BoundTypeDiagnoser(unsigned DiagID, const Ts &...Args)
-        : TypeDiagnoser(DiagID == 0), DiagID(DiagID), Args(Args...) {}
+        : TypeDiagnoser(), DiagID(DiagID), Args(Args...) {
+      assert(DiagID != 0 && "no diagnostic for type diagnoser");
+    }
 
     void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
-      if (Suppressed)
-        return;
       const SemaDiagnosticBuilder &DB = S.Diag(Loc, DiagID);
       emit(DB, llvm::index_sequence_for<Ts...>());
       DB << T;
@@ -1347,7 +1365,7 @@ public:
 
 private:
   bool RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
-                           TypeDiagnoser &Diagnoser);
+                               TypeDiagnoser *Diagnoser);
 
   VisibleModuleSet VisibleModules;
   llvm::SmallVector<VisibleModuleSet, 16> VisibleModulesStack;
@@ -1384,6 +1402,18 @@ public:
   hasVisibleDefaultArgument(const NamedDecl *D,
                             llvm::SmallVectorImpl<Module *> *Modules = nullptr);
 
+  /// Determine if \p A and \p B are equivalent internal linkage declarations
+  /// from different modules, and thus an ambiguity error can be downgraded to
+  /// an extension warning.
+  bool isEquivalentInternalLinkageDeclaration(const NamedDecl *A,
+                                              const NamedDecl *B);
+  void diagnoseEquivalentInternalLinkageDeclarations(
+      SourceLocation Loc, const NamedDecl *D,
+      ArrayRef<const NamedDecl *> Equiv);
+
+  bool isCompleteType(SourceLocation Loc, QualType T) {
+    return !RequireCompleteTypeImpl(Loc, T, nullptr);
+  }
   bool RequireCompleteType(SourceLocation Loc, QualType T,
                            TypeDiagnoser &Diagnoser);
   bool RequireCompleteType(SourceLocation Loc, QualType T,
@@ -1396,6 +1426,7 @@ public:
     return RequireCompleteType(Loc, T, Diagnoser);
   }
 
+  void completeExprArrayBound(Expr *E);
   bool RequireCompleteExprType(Expr *E, TypeDiagnoser &Diagnoser);
   bool RequireCompleteExprType(Expr *E, unsigned DiagID);
 
@@ -1432,6 +1463,12 @@ public:
   // Symbol table / Decl tracking callbacks: SemaDecl.cpp.
   //
 
+  struct SkipBodyInfo {
+    SkipBodyInfo() : ShouldSkip(false), Previous(nullptr) {}
+    bool ShouldSkip;
+    NamedDecl *Previous;
+  };
+
   /// List of decls defined in a function prototype. This contains EnumConstants
   /// that incorrectly end up in translation unit scope because there is no
   /// function to pin them on. ActOnFunctionDeclarator reads this list and patches
@@ -1697,11 +1734,14 @@ public:
 
   void ActOnFinishKNRParamDeclarations(Scope *S, Declarator &D,
                                        SourceLocation LocAfterDecls);
-  void CheckForFunctionRedefinition(FunctionDecl *FD,
-                                    const FunctionDecl *EffectiveDefinition =
-                                        nullptr);
-  Decl *ActOnStartOfFunctionDef(Scope *S, Declarator &D);
-  Decl *ActOnStartOfFunctionDef(Scope *S, Decl *D);
+  void CheckForFunctionRedefinition(
+      FunctionDecl *FD, const FunctionDecl *EffectiveDefinition = nullptr,
+      SkipBodyInfo *SkipBody = nullptr);
+  Decl *ActOnStartOfFunctionDef(Scope *S, Declarator &D,
+                                MultiTemplateParamsArg TemplateParamLists,
+                                SkipBodyInfo *SkipBody = nullptr);
+  Decl *ActOnStartOfFunctionDef(Scope *S, Decl *D,
+                                SkipBodyInfo *SkipBody = nullptr);
   void ActOnStartOfObjCMethodDef(Scope *S, Decl *D);
   bool isObjCMethodDecl(Decl *D) {
     return D && isa<ObjCMethodDecl>(D);
@@ -1778,6 +1818,10 @@ public:
   /// \brief The parser has left a submodule.
   void ActOnModuleEnd(SourceLocation DirectiveLoc, Module *Mod);
 
+  /// \brief Check if module import may be found in the current context,
+  /// emit error if not.
+  void diagnoseMisplacedModuleImport(Module *M, SourceLocation ImportLoc);
+
   /// \brief Create an implicit import of the given module at the given
   /// source location, for error recovery, if possible.
   ///
@@ -1843,12 +1887,6 @@ public:
     TUK_Friend       // Friend declaration:  'friend struct foo;'
   };
 
-  struct SkipBodyInfo {
-    SkipBodyInfo() : ShouldSkip(false), Previous(nullptr) {}
-    bool ShouldSkip;
-    NamedDecl *Previous;
-  };
-
   Decl *ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                  SourceLocation KWLoc, CXXScopeSpec &SS,
                  IdentifierInfo *Name, SourceLocation NameLoc,
@@ -1965,7 +2003,9 @@ public:
                                       Expr *val);
   bool CheckEnumUnderlyingType(TypeSourceInfo *TI);
   bool CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
-                              QualType EnumUnderlyingTy, const EnumDecl *Prev);
+                              QualType EnumUnderlyingTy,
+                              bool EnumUnderlyingIsImplicit,
+                              const EnumDecl *Prev);
 
   /// Determine whether the body of an anonymous enumeration should be skipped.
   /// \param II The name of the first enumerator.
@@ -2043,6 +2083,22 @@ public:
                                 TypeSourceInfo *TInfo);
   bool isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New);
 
+  /// \brief Describes the kind of merge to perform for availability
+  /// attributes (including "deprecated", "unavailable", and "availability").
+  enum AvailabilityMergeKind {
+    /// \brief Don't merge availability attributes at all.
+    AMK_None,
+    /// \brief Merge availability attributes for a redeclaration, which requires
+    /// an exact match.
+    AMK_Redeclaration,
+    /// \brief Merge availability attributes for an override, which requires
+    /// an exact match or a weakening of constraints.
+    AMK_Override,
+    /// \brief Merge availability attributes for an implementation of
+    /// a protocol requirement.
+    AMK_ProtocolImplementation,
+  };
+
   /// Attribute merging methods. Return true if a new attribute was added.
   AvailabilityAttr *mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
                                           IdentifierInfo *Platform,
@@ -2051,7 +2107,7 @@ public:
                                           VersionTuple Obsoleted,
                                           bool IsUnavailable,
                                           StringRef Message,
-                                          bool Override,
+                                          AvailabilityMergeKind AMK,
                                           unsigned AttrSpellingListIndex);
   TypeVisibilityAttr *mergeTypeVisibilityAttr(Decl *D, SourceRange Range,
                                        TypeVisibilityAttr::VisibilityType Vis,
@@ -2079,23 +2135,16 @@ public:
                                 unsigned AttrSpellingListIndex);
   OptimizeNoneAttr *mergeOptimizeNoneAttr(Decl *D, SourceRange Range,
                                           unsigned AttrSpellingListIndex);
-
-  /// \brief Describes the kind of merge to perform for availability
-  /// attributes (including "deprecated", "unavailable", and "availability").
-  enum AvailabilityMergeKind {
-    /// \brief Don't merge availability attributes at all.
-    AMK_None,
-    /// \brief Merge availability attributes for a redeclaration, which requires
-    /// an exact match.
-    AMK_Redeclaration,
-    /// \brief Merge availability attributes for an override, which requires
-    /// an exact match or a weakening of constraints.
-    AMK_Override
-  };
+  InternalLinkageAttr *mergeInternalLinkageAttr(Decl *D, SourceRange Range,
+                                                IdentifierInfo *Ident,
+                                                unsigned AttrSpellingListIndex);
+  CommonAttr *mergeCommonAttr(Decl *D, SourceRange Range, IdentifierInfo *Ident,
+                              unsigned AttrSpellingListIndex);
 
   void mergeDeclAttributes(NamedDecl *New, Decl *Old,
                            AvailabilityMergeKind AMK = AMK_Redeclaration);
-  void MergeTypedefNameDecl(TypedefNameDecl *New, LookupResult &OldDecls);
+  void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
+                            LookupResult &OldDecls);
   bool MergeFunctionDecl(FunctionDecl *New, NamedDecl *&Old, Scope *S,
                          bool MergeTypeWithOld);
   bool MergeCompatibleFunctionDecls(FunctionDecl *New, FunctionDecl *Old,
@@ -2416,17 +2465,27 @@ public:
                                             bool PartialOverloading = false);
 
   // Emit as a 'note' the specific overload candidate
-  void NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType = QualType());
+  void NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType = QualType(),
+                             bool TakingAddress = false);
 
-  // Emit as a series of 'note's all template and non-templates
-  // identified by the expression Expr
-  void NoteAllOverloadCandidates(Expr* E, QualType DestType = QualType());
+  // Emit as a series of 'note's all template and non-templates identified by
+  // the expression Expr
+  void NoteAllOverloadCandidates(Expr *E, QualType DestType = QualType(),
+                                 bool TakingAddress = false);
 
   /// Check the enable_if expressions on the given function. Returns the first
   /// failing attribute, or NULL if they were all successful.
   EnableIfAttr *CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
                               bool MissingImplicitThis = false);
 
+  /// Returns whether the given function's address can be taken or not,
+  /// optionally emitting a diagnostic if the address can't be taken.
+  ///
+  /// Returns false if taking the address of the function is illegal.
+  bool checkAddressOfFunctionIsAvailable(const FunctionDecl *Function,
+                                         bool Complain = false,
+                                         SourceLocation Loc = SourceLocation());
+
   // [PossiblyAFunctionType]  -->   [Return]
   // NonFunctionType --> NonFunctionType
   // R (A) --> R(A)
@@ -2451,7 +2510,7 @@ public:
                       ExprResult &SrcExpr,
                       bool DoFunctionPointerConverion = false,
                       bool Complain = false,
-                      const SourceRange& OpRangeForComplaining = SourceRange(),
+                      SourceRange OpRangeForComplaining = SourceRange(),
                       QualType DestTypeForComplaining = QualType(),
                       unsigned DiagIDForComplaining = 0);
 
@@ -2476,17 +2535,8 @@ public:
     FRS_DiagnosticIssued
   };
 
-  // An enum to represent whether something is dealing with a call to begin()
-  // or a call to end() in a range-based for loop.
-  enum BeginEndFunction {
-    BEF_begin,
-    BEF_end
-  };
-
-  ForRangeStatus BuildForRangeBeginEndCall(Scope *S, SourceLocation Loc,
+  ForRangeStatus BuildForRangeBeginEndCall(SourceLocation Loc,
                                            SourceLocation RangeLoc,
-                                           VarDecl *Decl,
-                                           BeginEndFunction BEF,
                                            const DeclarationNameInfo &NameInfo,
                                            LookupResult &MemberLookup,
                                            OverloadCandidateSet *CandidateSet,
@@ -2506,12 +2556,12 @@ public:
                               ExprResult *Result);
 
   ExprResult CreateOverloadedUnaryOp(SourceLocation OpLoc,
-                                     unsigned Opc,
+                                     UnaryOperatorKind Opc,
                                      const UnresolvedSetImpl &Fns,
                                      Expr *input);
 
   ExprResult CreateOverloadedBinOp(SourceLocation OpLoc,
-                                   unsigned Opc,
+                                   BinaryOperatorKind Opc,
                                    const UnresolvedSetImpl &Fns,
                                    Expr *LHS, Expr *RHS);
 
@@ -2904,7 +2954,8 @@ public:
   /// Adjust the calling convention of a method to be the ABI default if it
   /// wasn't specified explicitly.  This handles method types formed from
   /// function type typedefs and typename template arguments.
-  void adjustMemberFunctionCC(QualType &T, bool IsStatic);
+  void adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor,
+                              SourceLocation Loc);
 
   // Check if there is an explicit attribute, but only look through parens.
   // The intent is to look for an attribute on the current declarator, but not
@@ -3008,11 +3059,9 @@ public:
                       FieldDeclarator &FD,
                       Selector GetterSel,
                       Selector SetterSel,
-                      const bool isAssign,
                       const bool isReadWrite,
-                      const unsigned Attributes,
+                      unsigned &Attributes,
                       const unsigned AttributesAsWritten,
-                      bool *isOverridingProperty,
                       QualType T,
                       TypeSourceInfo *TSI,
                       tok::ObjCKeywordKind MethodImplKind);
@@ -3026,7 +3075,6 @@ public:
                                        FieldDeclarator &FD,
                                        Selector GetterSel,
                                        Selector SetterSel,
-                                       const bool isAssign,
                                        const bool isReadWrite,
                                        const unsigned Attributes,
                                        const unsigned AttributesAsWritten,
@@ -3039,7 +3087,7 @@ public:
   /// warning) when atomic property has one but not the other user-declared
   /// setter or getter.
   void AtomicPropertySetterGetterRules(ObjCImplDecl* IMPDecl,
-                                       ObjCContainerDecl* IDecl);
+                                       ObjCInterfaceDecl* IDecl);
 
   void DiagnoseOwningPropertyGetterSynthesis(const ObjCImplementationDecl *D);
 
@@ -3307,11 +3355,14 @@ public:
     BFRK_Check
   };
 
-  StmtResult ActOnCXXForRangeStmt(SourceLocation ForLoc, Stmt *LoopVar,
+  StmtResult ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc,
+                                  SourceLocation CoawaitLoc,
+                                  Stmt *LoopVar,
                                   SourceLocation ColonLoc, Expr *Collection,
                                   SourceLocation RParenLoc,
                                   BuildForRangeKind Kind);
   StmtResult BuildCXXForRangeStmt(SourceLocation ForLoc,
+                                  SourceLocation CoawaitLoc,
                                   SourceLocation ColonLoc,
                                   Stmt *RangeDecl, Stmt *BeginEndDecl,
                                   Expr *Cond, Expr *Inc,
@@ -3364,6 +3415,10 @@ public:
                                        bool IsUnevaluatedContext);
   bool LookupInlineAsmField(StringRef Base, StringRef Member,
                             unsigned &Offset, SourceLocation AsmLoc);
+  ExprResult LookupInlineAsmVarDeclField(Expr *RefExpr, StringRef Member,
+                                         unsigned &Offset,
+                                         llvm::InlineAsmIdentifierInfo &Info,
+                                         SourceLocation AsmLoc);
   StmtResult ActOnMSAsmStmt(SourceLocation AsmLoc, SourceLocation LBraceLoc,
                             ArrayRef<Token> AsmToks,
                             StringRef AsmString,
@@ -3458,6 +3513,11 @@ public:
   void DiagnoseSelfMove(const Expr *LHSExpr, const Expr *RHSExpr,
                         SourceLocation OpLoc);
 
+  /// \brief Warn if we're implicitly casting from a _Nullable pointer type to a
+  /// _Nonnull one.
+  void diagnoseNullableToNonnullConversion(QualType DstType, QualType SrcType,
+                                           SourceLocation Loc);
+
   ParsingDeclState PushParsingDeclaration(sema::DelayedDiagnosticPool &pool) {
     return DelayedDiagnostics.push(pool);
   }
@@ -3483,7 +3543,7 @@ public:
                                bool ObjCPropertyAccess);
 
   bool makeUnavailableInSystemHeader(SourceLocation loc,
-                                     StringRef message);
+                                     UnavailableAttr::ImplicitReason reason);
 
   //===--------------------------------------------------------------------===//
   // Expression Parsing Callbacks: SemaExpr.cpp.
@@ -3664,19 +3724,23 @@ public:
   ExprResult BuildPossibleImplicitMemberExpr(const CXXScopeSpec &SS,
                                              SourceLocation TemplateKWLoc,
                                              LookupResult &R,
-                                const TemplateArgumentListInfo *TemplateArgs);
+                                const TemplateArgumentListInfo *TemplateArgs,
+                                             const Scope *S);
   ExprResult BuildImplicitMemberExpr(const CXXScopeSpec &SS,
                                      SourceLocation TemplateKWLoc,
                                      LookupResult &R,
                                 const TemplateArgumentListInfo *TemplateArgs,
-                                     bool IsDefiniteInstance);
+                                     bool IsDefiniteInstance,
+                                     const Scope *S);
   bool UseArgumentDependentLookup(const CXXScopeSpec &SS,
                                   const LookupResult &R,
                                   bool HasTrailingLParen);
 
-  ExprResult BuildQualifiedDeclarationNameExpr(
-      CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo,
-      bool IsAddressOfOperand, TypeSourceInfo **RecoveryTSI = nullptr);
+  ExprResult
+  BuildQualifiedDeclarationNameExpr(CXXScopeSpec &SS,
+                                    const DeclarationNameInfo &NameInfo,
+                                    bool IsAddressOfOperand, const Scope *S,
+                                    TypeSourceInfo **RecoveryTSI = nullptr);
 
   ExprResult BuildDependentDeclRefExpr(const CXXScopeSpec &SS,
                                        SourceLocation TemplateKWLoc,
@@ -3752,7 +3816,7 @@ public:
     ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc,
                                   UnaryExprOrTypeTrait ExprKind,
                                   bool IsType, void *TyOrEx,
-                                  const SourceRange &ArgRange);
+                                  SourceRange ArgRange);
 
   ExprResult CheckPlaceholderExpr(Expr *E);
   bool CheckVecStepExpr(Expr *E);
@@ -3773,6 +3837,9 @@ public:
                                      Expr *Idx, SourceLocation RLoc);
   ExprResult CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc,
                                              Expr *Idx, SourceLocation RLoc);
+  ExprResult ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
+                                      Expr *LowerBound, SourceLocation ColonLoc,
+                                      Expr *Length, SourceLocation RBLoc);
 
   // This struct is for use by ActOnMemberAccess to allow
   // BuildMemberReferenceExpr to be able to reinvoke ActOnMemberAccess after
@@ -3790,6 +3857,7 @@ public:
       CXXScopeSpec &SS, SourceLocation TemplateKWLoc,
       NamedDecl *FirstQualifierInScope, const DeclarationNameInfo &NameInfo,
       const TemplateArgumentListInfo *TemplateArgs,
+      const Scope *S,
       ActOnMemberAccessExtraArgs *ExtraArgs = nullptr);
 
   ExprResult
@@ -3798,6 +3866,7 @@ public:
                            SourceLocation TemplateKWLoc,
                            NamedDecl *FirstQualifierInScope, LookupResult &R,
                            const TemplateArgumentListInfo *TemplateArgs,
+                           const Scope *S,
                            bool SuppressQualifierCheck = false,
                            ActOnMemberAccessExtraArgs *ExtraArgs = nullptr);
 
@@ -3926,15 +3995,13 @@ public:
   /// __builtin_offsetof(type, a.b[123][456].c)
   ExprResult BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
                                   TypeSourceInfo *TInfo,
-                                  OffsetOfComponent *CompPtr,
-                                  unsigned NumComponents,
+                                  ArrayRef<OffsetOfComponent> Components,
                                   SourceLocation RParenLoc);
   ExprResult ActOnBuiltinOffsetOf(Scope *S,
                                   SourceLocation BuiltinLoc,
                                   SourceLocation TypeLoc,
                                   ParsedType ParsedArgTy,
-                                  OffsetOfComponent *CompPtr,
-                                  unsigned NumComponents,
+                                  ArrayRef<OffsetOfComponent> Components,
                                   SourceLocation RParenLoc);
 
   // __builtin_choose_expr(constExpr, expr1, expr2)
@@ -4030,7 +4097,8 @@ public:
                                SourceLocation IdentLoc,
                                IdentifierInfo *Ident,
                                SourceLocation LBrace,
-                               AttributeList *AttrList);
+                               AttributeList *AttrList,
+                               UsingDirectiveDecl * &UsingDecl);
   void ActOnFinishNamespaceDef(Decl *Dcl, SourceLocation RBrace);
 
   NamespaceDecl *getStdNamespace() const;
@@ -4936,15 +5004,25 @@ public:
   /// \brief Perform initialization analysis of the init-capture and perform
   /// any implicit conversions such as an lvalue-to-rvalue conversion if
   /// not being used to initialize a reference.
-  QualType performLambdaInitCaptureInitialization(SourceLocation Loc, 
-      bool ByRef, IdentifierInfo *Id, Expr *&Init);
+  ParsedType actOnLambdaInitCaptureInitialization(
+      SourceLocation Loc, bool ByRef, IdentifierInfo *Id,
+      LambdaCaptureInitKind InitKind, Expr *&Init) {
+    return ParsedType::make(buildLambdaInitCaptureInitialization(
+        Loc, ByRef, Id, InitKind != LambdaCaptureInitKind::CopyInit, Init));
+  }
+  QualType buildLambdaInitCaptureInitialization(SourceLocation Loc, bool ByRef,
+                                                IdentifierInfo *Id,
+                                                bool DirectInit, Expr *&Init);
+
   /// \brief Create a dummy variable within the declcontext of the lambda's
   ///  call operator, for name lookup purposes for a lambda init capture.
   ///  
   ///  CodeGen handles emission of lambda captures, ignoring these dummy
   ///  variables appropriately.
-  VarDecl *createLambdaInitCaptureVarDecl(SourceLocation Loc, 
-    QualType InitCaptureType, IdentifierInfo *Id, Expr *Init);
+  VarDecl *createLambdaInitCaptureVarDecl(SourceLocation Loc,
+                                          QualType InitCaptureType,
+                                          IdentifierInfo *Id,
+                                          unsigned InitStyle, Expr *Init);
 
   /// \brief Build the implicit field for an init-capture.
   FieldDecl *buildInitCaptureField(sema::LambdaScopeInfo *LSI, VarDecl *Var);
@@ -5009,8 +5087,7 @@ public:
 
   // ParseObjCStringLiteral - Parse Objective-C string literals.
   ExprResult ParseObjCStringLiteral(SourceLocation *AtLocs,
-                                    Expr **Strings,
-                                    unsigned NumStrings);
+                                    ArrayRef<Expr *> Strings);
 
   ExprResult BuildObjCStringLiteral(SourceLocation AtLoc, StringLiteral *S);
 
@@ -5035,8 +5112,7 @@ public:
                                           ObjCMethodDecl *setterMethod);
 
   ExprResult BuildObjCDictionaryLiteral(SourceRange SR,
-                                        ObjCDictionaryElement *Elements,
-                                        unsigned NumElements);
+                               MutableArrayRef<ObjCDictionaryElement> Elements);
 
   ExprResult BuildObjCEncodeExpression(SourceLocation AtLoc,
                                   TypeSourceInfo *EncodedTypeInfo,
@@ -5222,7 +5298,7 @@ public:
                                          SourceLocation RBrac,
                                          AttributeList *AttrList);
   void ActOnFinishCXXMemberDecls();
-  void ActOnFinishCXXMemberDefaultArgs(Decl *D);
+  void ActOnFinishCXXNonNestedClass(Decl *D);
 
   void ActOnReenterCXXMethodParameter(Scope *S, ParmVarDecl *Param);
   unsigned ActOnReenterTemplateScope(Scope *S, Decl *Template);
@@ -5289,13 +5365,14 @@ public:
                                 SourceLocation BaseLoc,
                                 SourceLocation EllipsisLoc);
 
-  bool AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
-                            unsigned NumBases);
-  void ActOnBaseSpecifiers(Decl *ClassDecl, CXXBaseSpecifier **Bases,
-                           unsigned NumBases);
+  bool AttachBaseSpecifiers(CXXRecordDecl *Class,
+                            MutableArrayRef<CXXBaseSpecifier *> Bases);
+  void ActOnBaseSpecifiers(Decl *ClassDecl,
+                           MutableArrayRef<CXXBaseSpecifier *> Bases);
 
-  bool IsDerivedFrom(QualType Derived, QualType Base);
-  bool IsDerivedFrom(QualType Derived, QualType Base, CXXBasePaths &Paths);
+  bool IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base);
+  bool IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base,
+                     CXXBasePaths &Paths);
 
   // FIXME: I don't like this name.
   void BuildBasePathArray(const CXXBasePaths &Paths, CXXCastPath &BasePath);
@@ -5424,6 +5501,7 @@ public:
     AbstractArrayType
   };
 
+  bool isAbstractType(SourceLocation Loc, QualType T);
   bool RequireNonAbstractType(SourceLocation Loc, QualType T,
                               TypeDiagnoser &Diagnoser);
   template <typename... Ts>
@@ -5435,9 +5513,6 @@ public:
 
   void DiagnoseAbstractType(const CXXRecordDecl *RD);
 
-  bool RequireNonAbstractType(SourceLocation Loc, QualType T, unsigned DiagID,
-                              AbstractDiagSelID SelID = AbstractNone);
-
   //===--------------------------------------------------------------------===//
   // C++ Overloaded Operators [C++ 13.5]
   //
@@ -5508,7 +5583,7 @@ public:
                              SourceLocation ExportLoc,
                              SourceLocation TemplateLoc,
                              SourceLocation LAngleLoc,
-                             Decl **Params, unsigned NumParams,
+                             ArrayRef<Decl *> Params,
                              SourceLocation RAngleLoc);
 
   /// \brief The context in which we are checking a template parameter list.
@@ -5622,10 +5697,6 @@ public:
                                 MultiTemplateParamsArg TemplateParameterLists,
                                 Declarator &D);
 
-  Decl *ActOnStartOfFunctionTemplateDef(Scope *FnBodyScope,
-                                  MultiTemplateParamsArg TemplateParameterLists,
-                                        Declarator &D);
-
   bool
   CheckSpecializationInstantiationRedecl(SourceLocation NewLoc,
                                          TemplateSpecializationKind NewTSK,
@@ -6340,6 +6411,11 @@ public:
   bool DeduceReturnType(FunctionDecl *FD, SourceLocation Loc,
                         bool Diagnose = true);
 
+  QualType deduceVarTypeFromInitializer(VarDecl *VDecl, DeclarationName Name,
+                                        QualType Type, TypeSourceInfo *TSI,
+                                        SourceRange Range, bool DirectInit,
+                                        Expr *Init);
+
   TypeLoc getReturnTypeLoc(FunctionDecl *FD) const;
 
   bool DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD,
@@ -6581,12 +6657,6 @@ public:
 
   friend class ArgumentPackSubstitutionRAII;
 
-  /// \brief The stack of calls expression undergoing template instantiation.
-  ///
-  /// The top of this stack is used by a fixit instantiating unresolved
-  /// function calls to fix the AST to match the textual change it prints.
-  SmallVector<CallExpr *, 8> CallsUndergoingInstantiation;
-
   /// \brief For each declaration that involved template argument deduction, the
   /// set of diagnostics that were suppressed during that template argument
   /// deduction.
@@ -6609,7 +6679,8 @@ public:
   /// the stack.
   struct InstantiatingTemplate {
     /// \brief Note that we are instantiating a class template,
-    /// function template, or a member thereof.
+    /// function template, variable template, alias template,
+    /// or a member thereof.
     InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
                           Decl *Entity,
                           SourceRange InstantiationRange = SourceRange());
@@ -6655,6 +6726,8 @@ public:
                           sema::TemplateDeductionInfo &DeductionInfo,
                           SourceRange InstantiationRange = SourceRange());
 
+    /// \brief Note that we are instantiating a default argument for a function
+    /// parameter.
     InstantiatingTemplate(Sema &SemaRef, SourceLocation PointOfInstantiation,
                           ParmVarDecl *Param,
                           ArrayRef<TemplateArgument> TemplateArgs,
@@ -6705,7 +6778,7 @@ public:
         Sema &SemaRef, ActiveTemplateInstantiation::InstantiationKind Kind,
         SourceLocation PointOfInstantiation, SourceRange InstantiationRange,
         Decl *Entity, NamedDecl *Template = nullptr,
-        ArrayRef<TemplateArgument> TemplateArgs = ArrayRef<TemplateArgument>(),
+        ArrayRef<TemplateArgument> TemplateArgs = None,
         sema::TemplateDeductionInfo *DeductionInfo = nullptr);
 
     InstantiatingTemplate(const InstantiatingTemplate&) = delete;
@@ -6925,8 +6998,6 @@ public:
   ///
   /// \param Exprs The list of expressions to substitute into.
   ///
-  /// \param NumExprs The number of expressions in \p Exprs.
-  ///
   /// \param IsCall Whether this is some form of call, in which case
   /// default arguments will be dropped.
   ///
@@ -6935,7 +7006,7 @@ public:
   /// \param Outputs Will receive all of the substituted arguments.
   ///
   /// \returns true if an error occurred, false otherwise.
-  bool SubstExprs(Expr **Exprs, unsigned NumExprs, bool IsCall,
+  bool SubstExprs(ArrayRef<Expr *> Exprs, bool IsCall,
                   const MultiLevelTemplateArgumentList &TemplateArgs,
                   SmallVectorImpl<Expr *> &Outputs);
 
@@ -7169,13 +7240,11 @@ public:
                    unsigned NumElts);
 
   DeclGroupPtrTy ActOnForwardProtocolDeclaration(SourceLocation AtProtoclLoc,
-                                        const IdentifierLocPair *IdentList,
-                                        unsigned NumElts,
+                                        ArrayRef<IdentifierLocPair> IdentList,
                                         AttributeList *attrList);
 
   void FindProtocolDeclaration(bool WarnOnDeclarations, bool ForObjCContainer,
-                               const IdentifierLocPair *ProtocolId,
-                               unsigned NumProtocols,
+                               ArrayRef<IdentifierLocPair> ProtocolId,
                                SmallVectorImpl<Decl *> &Protocols);
 
   /// Given a list of identifiers (and their locations), resolve the
@@ -7244,14 +7313,7 @@ public:
   /// Process the specified property declaration and create decls for the
   /// setters and getters as needed.
   /// \param property The property declaration being processed
-  /// \param CD The semantic container for the property
-  /// \param redeclaredProperty Declaration for property if redeclared
-  ///        in class extension.
-  /// \param lexicalDC Container for redeclaredProperty.
-  void ProcessPropertyDecl(ObjCPropertyDecl *property,
-                           ObjCContainerDecl *CD,
-                           ObjCPropertyDecl *redeclaredProperty = nullptr,
-                           ObjCContainerDecl *lexicalDC = nullptr);
+  void ProcessPropertyDecl(ObjCPropertyDecl *property);
 
 
   void DiagnosePropertyMismatch(ObjCPropertyDecl *Property,
@@ -7270,7 +7332,6 @@ public:
                       SourceLocation LParenLoc,
                       FieldDeclarator &FD, ObjCDeclSpec &ODS,
                       Selector GetterSel, Selector SetterSel,
-                      bool *OverridingProperty,
                       tok::ObjCKeywordKind MethodImplKind,
                       DeclContext *lexicalDC = nullptr);
 
@@ -7674,25 +7735,53 @@ public:
   void AddLaunchBoundsAttr(SourceRange AttrRange, Decl *D, Expr *MaxThreads,
                            Expr *MinBlocks, unsigned SpellingListIndex);
 
+  //===--------------------------------------------------------------------===//
+  // C++ Coroutines TS
+  //
+  ExprResult ActOnCoawaitExpr(Scope *S, SourceLocation KwLoc, Expr *E);
+  ExprResult ActOnCoyieldExpr(Scope *S, SourceLocation KwLoc, Expr *E);
+  StmtResult ActOnCoreturnStmt(SourceLocation KwLoc, Expr *E);
+
+  ExprResult BuildCoawaitExpr(SourceLocation KwLoc, Expr *E);
+  ExprResult BuildCoyieldExpr(SourceLocation KwLoc, Expr *E);
+  StmtResult BuildCoreturnStmt(SourceLocation KwLoc, Expr *E);
+
+  void CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body);
+
+  //===--------------------------------------------------------------------===//
   // OpenMP directives and clauses.
+  //
 private:
   void *VarDataSharingAttributesStack;
   /// \brief Initialization of data-sharing attributes stack.
   void InitDataSharingAttributesStack();
   void DestroyDataSharingAttributesStack();
-  ExprResult VerifyPositiveIntegerConstantInClause(Expr *Op,
-                                                   OpenMPClauseKind CKind);
+  ExprResult
+  VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind,
+                                        bool StrictlyPositive = true);
+
 public:
-  /// \brief Check if the specified variable is used in a private clause in
-  /// Checks if the specified variable is used in one of the private
-  /// clauses in OpenMP constructs.
+  /// \brief Return true if the provided declaration \a VD should be captured by
+  /// reference in the provided scope \a RSI. This will take into account the
+  /// semantics of the directive and associated clauses.
+  bool IsOpenMPCapturedByRef(VarDecl *VD,
+                             const sema::CapturedRegionScopeInfo *RSI);
+
+  /// \brief Check if the specified variable is used in one of the private
+  /// clauses (private, firstprivate, lastprivate, reduction etc.) in OpenMP
+  /// constructs.
   bool IsOpenMPCapturedVar(VarDecl *VD);
 
-  /// OpenMP constructs.
+  /// \brief Check if the specified variable is used in 'private' clause.
   /// \param Level Relative level of nested OpenMP construct for that the check
   /// is performed.
   bool isOpenMPPrivateVar(VarDecl *VD, unsigned Level);
 
+  /// \brief Check if the specified variable is captured  by 'target' directive.
+  /// \param Level Relative level of nested OpenMP construct for that the check
+  /// is performed.
+  bool isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level);
+
   ExprResult PerformOpenMPImplicitIntegerConversion(SourceLocation OpLoc,
                                                     Expr *Op);
   /// \brief Called on start of new data sharing attribute block.
@@ -7785,6 +7874,7 @@ public:
   /// \brief Called on well-formed '\#pragma omp critical' after parsing of the
   /// associated statement.
   StmtResult ActOnOpenMPCriticalDirective(const DeclarationNameInfo &DirName,
+                                          ArrayRef<OMPClause *> Clauses,
                                           Stmt *AStmt, SourceLocation StartLoc,
                                           SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp parallel for' after parsing
@@ -7828,7 +7918,8 @@ public:
                                        SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp ordered' after parsing of the
   /// associated statement.
-  StmtResult ActOnOpenMPOrderedDirective(Stmt *AStmt, SourceLocation StartLoc,
+  StmtResult ActOnOpenMPOrderedDirective(ArrayRef<OMPClause *> Clauses,
+                                         Stmt *AStmt, SourceLocation StartLoc,
                                          SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp atomic' after parsing of the
   /// associated statement.
@@ -7840,6 +7931,11 @@ public:
   StmtResult ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
                                         Stmt *AStmt, SourceLocation StartLoc,
                                         SourceLocation EndLoc);
+  /// \brief Called on well-formed '\#pragma omp target data' after parsing of
+  /// the associated statement.
+  StmtResult ActOnOpenMPTargetDataDirective(ArrayRef<OMPClause *> Clauses,
+                                            Stmt *AStmt, SourceLocation StartLoc,
+                                            SourceLocation EndLoc);
   /// \brief Called on well-formed '\#pragma omp teams' after parsing of the
   /// associated statement.
   StmtResult ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
@@ -7851,9 +7947,28 @@ public:
                                         SourceLocation EndLoc,
                                         OpenMPDirectiveKind CancelRegion);
   /// \brief Called on well-formed '\#pragma omp cancel'.
-  StmtResult ActOnOpenMPCancelDirective(SourceLocation StartLoc,
+  StmtResult ActOnOpenMPCancelDirective(ArrayRef<OMPClause *> Clauses,
+                                        SourceLocation StartLoc,
                                         SourceLocation EndLoc,
                                         OpenMPDirectiveKind CancelRegion);
+  /// \brief Called on well-formed '\#pragma omp taskloop' after parsing of the
+  /// associated statement.
+  StmtResult ActOnOpenMPTaskLoopDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA);
+  /// \brief Called on well-formed '\#pragma omp taskloop simd' after parsing of
+  /// the associated statement.
+  StmtResult ActOnOpenMPTaskLoopSimdDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA);
+  /// \brief Called on well-formed '\#pragma omp distribute' after parsing
+  /// of the associated statement.
+  StmtResult ActOnOpenMPDistributeDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc,
+      llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA);
 
   OMPClause *ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind,
                                          Expr *Expr,
@@ -7861,8 +7976,11 @@ public:
                                          SourceLocation LParenLoc,
                                          SourceLocation EndLoc);
   /// \brief Called on well-formed 'if' clause.
-  OMPClause *ActOnOpenMPIfClause(Expr *Condition, SourceLocation StartLoc,
+  OMPClause *ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier,
+                                 Expr *Condition, SourceLocation StartLoc,
                                  SourceLocation LParenLoc,
+                                 SourceLocation NameModifierLoc,
+                                 SourceLocation ColonLoc,
                                  SourceLocation EndLoc);
   /// \brief Called on well-formed 'final' clause.
   OMPClause *ActOnOpenMPFinalClause(Expr *Condition, SourceLocation StartLoc,
@@ -7878,11 +7996,32 @@ public:
                                       SourceLocation StartLoc,
                                       SourceLocation LParenLoc,
                                       SourceLocation EndLoc);
+  /// \brief Called on well-formed 'simdlen' clause.
+  OMPClause *ActOnOpenMPSimdlenClause(Expr *Length, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc);
   /// \brief Called on well-formed 'collapse' clause.
   OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops,
                                        SourceLocation StartLoc,
                                        SourceLocation LParenLoc,
                                        SourceLocation EndLoc);
+  /// \brief Called on well-formed 'ordered' clause.
+  OMPClause *
+  ActOnOpenMPOrderedClause(SourceLocation StartLoc, SourceLocation EndLoc,
+                           SourceLocation LParenLoc = SourceLocation(),
+                           Expr *NumForLoops = nullptr);
+  /// \brief Called on well-formed 'grainsize' clause.
+  OMPClause *ActOnOpenMPGrainsizeClause(Expr *Size, SourceLocation StartLoc,
+                                        SourceLocation LParenLoc,
+                                        SourceLocation EndLoc);
+  /// \brief Called on well-formed 'num_tasks' clause.
+  OMPClause *ActOnOpenMPNumTasksClause(Expr *NumTasks, SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc);
+  /// \brief Called on well-formed 'hint' clause.
+  OMPClause *ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc,
+                                   SourceLocation LParenLoc,
+                                   SourceLocation EndLoc);
 
   OMPClause *ActOnOpenMPSimpleClause(OpenMPClauseKind Kind,
                                      unsigned Argument,
@@ -7903,26 +8042,20 @@ public:
                                        SourceLocation LParenLoc,
                                        SourceLocation EndLoc);
 
-  OMPClause *ActOnOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind,
-                                                unsigned Argument, Expr *Expr,
-                                                SourceLocation StartLoc,
-                                                SourceLocation LParenLoc,
-                                                SourceLocation ArgumentLoc,
-                                                SourceLocation CommaLoc,
-                                                SourceLocation EndLoc);
+  OMPClause *ActOnOpenMPSingleExprWithArgClause(
+      OpenMPClauseKind Kind, ArrayRef<unsigned> Arguments, Expr *Expr,
+      SourceLocation StartLoc, SourceLocation LParenLoc,
+      ArrayRef<SourceLocation> ArgumentsLoc, SourceLocation DelimLoc,
+      SourceLocation EndLoc);
   /// \brief Called on well-formed 'schedule' clause.
-  OMPClause *ActOnOpenMPScheduleClause(OpenMPScheduleClauseKind Kind,
-                                       Expr *ChunkSize, SourceLocation StartLoc,
-                                       SourceLocation LParenLoc,
-                                       SourceLocation KindLoc,
-                                       SourceLocation CommaLoc,
-                                       SourceLocation EndLoc);
+  OMPClause *ActOnOpenMPScheduleClause(
+      OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
+      OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
+      SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc,
+      SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc);
 
   OMPClause *ActOnOpenMPClause(OpenMPClauseKind Kind, SourceLocation StartLoc,
                                SourceLocation EndLoc);
-  /// \brief Called on well-formed 'ordered' clause.
-  OMPClause *ActOnOpenMPOrderedClause(SourceLocation StartLoc,
-                                      SourceLocation EndLoc);
   /// \brief Called on well-formed 'nowait' clause.
   OMPClause *ActOnOpenMPNowaitClause(SourceLocation StartLoc,
                                      SourceLocation EndLoc);
@@ -7947,6 +8080,15 @@ public:
   /// \brief Called on well-formed 'seq_cst' clause.
   OMPClause *ActOnOpenMPSeqCstClause(SourceLocation StartLoc,
                                      SourceLocation EndLoc);
+  /// \brief Called on well-formed 'threads' clause.
+  OMPClause *ActOnOpenMPThreadsClause(SourceLocation StartLoc,
+                                      SourceLocation EndLoc);
+  /// \brief Called on well-formed 'simd' clause.
+  OMPClause *ActOnOpenMPSIMDClause(SourceLocation StartLoc,
+                                   SourceLocation EndLoc);
+  /// \brief Called on well-formed 'nogroup' clause.
+  OMPClause *ActOnOpenMPNogroupClause(SourceLocation StartLoc,
+                                      SourceLocation EndLoc);
 
   OMPClause *ActOnOpenMPVarListClause(
       OpenMPClauseKind Kind, ArrayRef<Expr *> Vars, Expr *TailExpr,
@@ -7954,7 +8096,8 @@ public:
       SourceLocation ColonLoc, SourceLocation EndLoc,
       CXXScopeSpec &ReductionIdScopeSpec,
       const DeclarationNameInfo &ReductionId, OpenMPDependClauseKind DepKind,
-      SourceLocation DepLoc);
+      OpenMPLinearClauseKind LinKind, OpenMPMapClauseKind MapTypeModifier,
+      OpenMPMapClauseKind MapType, SourceLocation DepLinMapLoc);
   /// \brief Called on well-formed 'private' clause.
   OMPClause *ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
                                       SourceLocation StartLoc,
@@ -7983,12 +8126,11 @@ public:
                              CXXScopeSpec &ReductionIdScopeSpec,
                              const DeclarationNameInfo &ReductionId);
   /// \brief Called on well-formed 'linear' clause.
-  OMPClause *ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList,
-                                     Expr *Step,
-                                     SourceLocation StartLoc,
-                                     SourceLocation LParenLoc,
-                                     SourceLocation ColonLoc,
-                                     SourceLocation EndLoc);
+  OMPClause *
+  ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
+                          SourceLocation StartLoc, SourceLocation LParenLoc,
+                          OpenMPLinearClauseKind LinKind, SourceLocation LinLoc,
+                          SourceLocation ColonLoc, SourceLocation EndLoc);
   /// \brief Called on well-formed 'aligned' clause.
   OMPClause *ActOnOpenMPAlignedClause(ArrayRef<Expr *> VarList,
                                       Expr *Alignment,
@@ -8017,6 +8159,28 @@ public:
                           SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
                           SourceLocation StartLoc, SourceLocation LParenLoc,
                           SourceLocation EndLoc);
+  /// \brief Called on well-formed 'device' clause.
+  OMPClause *ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc);
+  /// \brief Called on well-formed 'map' clause.
+  OMPClause *ActOnOpenMPMapClause(
+      OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType,
+      SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+      SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc);
+  /// \brief Called on well-formed 'num_teams' clause.
+  OMPClause *ActOnOpenMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc);
+  /// \brief Called on well-formed 'thread_limit' clause.
+  OMPClause *ActOnOpenMPThreadLimitClause(Expr *ThreadLimit,
+                                          SourceLocation StartLoc,
+                                          SourceLocation LParenLoc,
+                                          SourceLocation EndLoc);
+  /// \brief Called on well-formed 'priority' clause.
+  OMPClause *ActOnOpenMPPriorityClause(Expr *Priority, SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc);
 
   /// \brief The kind of conversion being performed.
   enum CheckedConversionKind {
@@ -8058,12 +8222,13 @@ public:
 
   // DefaultFunctionArrayConversion - converts functions and arrays
   // to their respective pointers (C99 6.3.2.1).
-  ExprResult DefaultFunctionArrayConversion(Expr *E);
+  ExprResult DefaultFunctionArrayConversion(Expr *E, bool Diagnose = true);
 
   // DefaultFunctionArrayLvalueConversion - converts functions and
   // arrays to their respective pointers and performs the
   // lvalue-to-rvalue conversion.
-  ExprResult DefaultFunctionArrayLvalueConversion(Expr *E);
+  ExprResult DefaultFunctionArrayLvalueConversion(Expr *E,
+                                                  bool Diagnose = true);
 
   // DefaultLvalueConversion - performs lvalue-to-rvalue conversion on
   // the operand.  This is DefaultFunctionArrayLvalueConversion,
@@ -8231,19 +8396,23 @@ public:
                                                QualType LHSType,
                                                QualType RHSType);
 
-  /// Check assignment constraints and prepare for a conversion of the
-  /// RHS to the LHS type.
+  /// Check assignment constraints and optionally prepare for a conversion of
+  /// the RHS to the LHS type. The conversion is prepared for if ConvertRHS
+  /// is true.
   AssignConvertType CheckAssignmentConstraints(QualType LHSType,
                                                ExprResult &RHS,
-                                               CastKind &Kind);
+                                               CastKind &Kind,
+                                               bool ConvertRHS = true);
 
   // CheckSingleAssignmentConstraints - Currently used by
   // CheckAssignmentOperands, and ActOnReturnStmt. Prior to type checking,
-  // this routine performs the default function/array converions.
+  // this routine performs the default function/array converions, if ConvertRHS
+  // is true.
   AssignConvertType CheckSingleAssignmentConstraints(QualType LHSType,
                                                      ExprResult &RHS,
                                                      bool Diagnose = true,
-                                                     bool DiagnoseCFAudited = false);
+                                                     bool DiagnoseCFAudited = false,
+                                                     bool ConvertRHS = true);
 
   // \brief If the lhs type is a transparent union, check whether we
   // can initialize the transparent union with the given expression.
@@ -8287,22 +8456,23 @@ public:
     ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
     bool IsCompAssign = false);
   QualType CheckAdditionOperands( // C99 6.5.6
-    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned Opc,
-    QualType* CompLHSTy = nullptr);
+    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
+    BinaryOperatorKind Opc, QualType* CompLHSTy = nullptr);
   QualType CheckSubtractionOperands( // C99 6.5.6
     ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
     QualType* CompLHSTy = nullptr);
   QualType CheckShiftOperands( // C99 6.5.7
-    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned Opc,
-    bool IsCompAssign = false);
+    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
+    BinaryOperatorKind Opc, bool IsCompAssign = false);
   QualType CheckCompareOperands( // C99 6.5.8/9
-    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned OpaqueOpc,
-                                bool isRelational);
+    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
+    BinaryOperatorKind Opc, bool isRelational);
   QualType CheckBitwiseOperands( // C99 6.5.[10...12]
     ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
     bool IsCompAssign = false);
   QualType CheckLogicalOperands( // C99 6.5.[13,14]
-    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned Opc);
+    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc,
+    BinaryOperatorKind Opc);
   // CheckAssignmentOperands is used for both simple and compound assignment.
   // For simple assignment, pass both expressions and a null converted type.
   // For compound assignment, pass both expressions and the converted type.
@@ -8356,6 +8526,7 @@ public:
   QualType CheckVectorLogicalOperands(ExprResult &LHS, ExprResult &RHS,
                                       SourceLocation Loc);
 
+  bool areLaxCompatibleVectorTypes(QualType srcType, QualType destType);
   bool isLaxVectorConversion(QualType srcType, QualType destType);
 
   /// type checking declaration initializers (C99 6.7.8)
@@ -8562,8 +8733,37 @@ public:
 
   CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D);
 
+  enum CUDAFunctionPreference {
+    CFP_Never,      // Invalid caller/callee combination.
+    CFP_LastResort, // Lowest priority. Only in effect if
+                    // LangOpts.CUDADisableTargetCallChecks is true.
+    CFP_Fallback,   // Low priority caller/callee combination
+    CFP_Best,       // Preferred caller/callee combination
+  };
+
+  /// Identifies relative preference of a given Caller/Callee
+  /// combination, based on their host/device attributes.
+  /// \param Caller function which needs address of \p Callee.
+  ///               nullptr in case of global context.
+  /// \param Callee target function
+  ///
+  /// \returns preference value for particular Caller/Callee combination.
+  CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
+                                                const FunctionDecl *Callee);
+
   bool CheckCUDATarget(const FunctionDecl *Caller, const FunctionDecl *Callee);
 
+  /// Finds a function in \p Matches with highest calling priority
+  /// from \p Caller context and erases all functions with lower
+  /// calling priority.
+  void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
+                                SmallVectorImpl<FunctionDecl *> &Matches);
+  void EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
+                                SmallVectorImpl<DeclAccessPair> &Matches);
+  void EraseUnwantedCUDAMatches(
+      const FunctionDecl *Caller,
+      SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
+
   /// Given a implicit special member, infer its CUDA target from the
   /// calls it needs to make to underlying base/field special members.
   /// \param ClassDecl the class for which the member is being created.
@@ -8693,8 +8893,8 @@ public:
                                      DeclGroupPtrTy IterationVar);
   void CodeCompleteObjCSelector(Scope *S,
                                 ArrayRef<IdentifierInfo *> SelIdents);
-  void CodeCompleteObjCProtocolReferences(IdentifierLocPair *Protocols,
-                                          unsigned NumProtocols);
+  void CodeCompleteObjCProtocolReferences(
+                                         ArrayRef<IdentifierLocPair> Protocols);
   void CodeCompleteObjCProtocolDecl(Scope *S);
   void CodeCompleteObjCInterfaceDecl(Scope *S);
   void CodeCompleteObjCSuperclass(Scope *S,
@@ -8752,8 +8952,8 @@ private:
     bool HasVAListArg;
   };
 
-  bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
-                           FormatStringInfo *FSI);
+  static bool getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
+                                  FormatStringInfo *FSI);
   bool CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall,
                          const FunctionProtoType *Proto);
   bool CheckObjCMethodCall(ObjCMethodDecl *Method, SourceLocation loc,
@@ -8774,7 +8974,7 @@ private:
   bool CheckObjCString(Expr *Arg);
 
   ExprResult CheckBuiltinFunctionCall(FunctionDecl *FDecl,
-		                      unsigned BuiltinID, CallExpr *TheCall);
+                                      unsigned BuiltinID, CallExpr *TheCall);
 
   bool CheckARMBuiltinExclusiveCall(unsigned BuiltinID, CallExpr *TheCall,
                                     unsigned MaxWidth);
@@ -8786,8 +8986,10 @@ private:
   bool CheckSystemZBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
   bool CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
-  
+
+  bool SemaBuiltinVAStartImpl(CallExpr *TheCall);
   bool SemaBuiltinVAStart(CallExpr *TheCall);
+  bool SemaBuiltinMSVAStart(CallExpr *TheCall);
   bool SemaBuiltinVAStartARM(CallExpr *Call);
   bool SemaBuiltinUnorderedCompare(CallExpr *TheCall);
   bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs);
@@ -8806,6 +9008,7 @@ private:
   bool SemaBuiltinLongjmp(CallExpr *TheCall);
   bool SemaBuiltinSetjmp(CallExpr *TheCall);
   ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
+  ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
   ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
                                      AtomicExpr::AtomicOp Op);
   bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
@@ -8815,7 +9018,6 @@ private:
   bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
                                 int ArgNum, unsigned ExpectedFieldNum,
                                 bool AllowName);
-  bool SemaBuiltinCpuSupports(CallExpr *TheCall);
 public:
   enum FormatStringType {
     FST_Scanf,
@@ -8839,7 +9041,7 @@ public:
   
   bool FormatStringHasSArg(const StringLiteral *FExpr);
   
-  bool GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx);
+  static bool GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx);
 
 private:
   bool CheckFormatArguments(const FormatAttr *Format,
@@ -9010,6 +9212,10 @@ public:
       return NumArgs + 1 > NumParams; // If so, we view as an extra argument.
     return NumArgs > NumParams;
   }
+
+  // Emitting members of dllexported classes is delayed until the class
+  // (including field initializers) is fully parsed.
+  SmallVector<CXXRecordDecl*, 4> DelayedDllExportClasses;
 };
 
 /// \brief RAII object that enters a new expression evaluation context.
diff --git a/include/clang/Sema/Template.h b/include/clang/Sema/Template.h
index 416ef7b1a682..c0926304cf43 100644
--- a/include/clang/Sema/Template.h
+++ b/include/clang/Sema/Template.h
@@ -178,8 +178,8 @@ namespace clang {
   class LocalInstantiationScope {
   public:
     /// \brief A set of declarations.
-    typedef SmallVector<Decl *, 4> DeclArgumentPack;
-    
+    typedef SmallVector<ParmVarDecl *, 4> DeclArgumentPack;
+
   private:
     /// \brief Reference to the semantic analysis that is performing
     /// this template instantiation.
@@ -332,7 +332,7 @@ namespace clang {
     findInstantiationOf(const Decl *D);
 
     void InstantiatedLocal(const Decl *D, Decl *Inst);
-    void InstantiatedLocalPackArg(const Decl *D, Decl *Inst);
+    void InstantiatedLocalPackArg(const Decl *D, ParmVarDecl *Inst);
     void MakeInstantiatedLocalArgPack(const Decl *D);
     
     /// \brief Note that the given parameter pack has been partially substituted
diff --git a/include/clang/Sema/TemplateDeduction.h b/include/clang/Sema/TemplateDeduction.h
index 229eb71cabc1..9315ddd89c6b 100644
--- a/include/clang/Sema/TemplateDeduction.h
+++ b/include/clang/Sema/TemplateDeduction.h
@@ -236,7 +236,7 @@ struct TemplateSpecCandidate {
   }
 
   /// Diagnose a template argument deduction failure.
-  void NoteDeductionFailure(Sema &S);
+  void NoteDeductionFailure(Sema &S, bool ForTakingAddress);
 };
 
 /// TemplateSpecCandidateSet - A set of generalized overload candidates,
@@ -246,6 +246,10 @@ struct TemplateSpecCandidate {
 class TemplateSpecCandidateSet {
   SmallVector<TemplateSpecCandidate, 16> Candidates;
   SourceLocation Loc;
+  // Stores whether we're taking the address of these candidates. This helps us
+  // produce better error messages when dealing with the pass_object_size
+  // attribute on parameters.
+  bool ForTakingAddress;
 
   TemplateSpecCandidateSet(
       const TemplateSpecCandidateSet &) = delete;
@@ -254,7 +258,8 @@ class TemplateSpecCandidateSet {
   void destroyCandidates();
 
 public:
-  TemplateSpecCandidateSet(SourceLocation Loc) : Loc(Loc) {}
+  TemplateSpecCandidateSet(SourceLocation Loc, bool ForTakingAddress = false)
+      : Loc(Loc), ForTakingAddress(ForTakingAddress) {}
   ~TemplateSpecCandidateSet() { destroyCandidates(); }
 
   SourceLocation getLocation() const { return Loc; }
diff --git a/include/clang/Sema/TypoCorrection.h b/include/clang/Sema/TypoCorrection.h
index 958aab0fce34..3b0385ef2a9d 100644
--- a/include/clang/Sema/TypoCorrection.h
+++ b/include/clang/Sema/TypoCorrection.h
@@ -72,15 +72,15 @@ public:
 
   /// \brief Gets the DeclarationName of the typo correction
   DeclarationName getCorrection() const { return CorrectionName; }
-  IdentifierInfo* getCorrectionAsIdentifierInfo() const {
+  IdentifierInfo *getCorrectionAsIdentifierInfo() const {
     return CorrectionName.getAsIdentifierInfo();
   }
 
   /// \brief Gets the NestedNameSpecifier needed to use the typo correction
-  NestedNameSpecifier* getCorrectionSpecifier() const {
+  NestedNameSpecifier *getCorrectionSpecifier() const {
     return CorrectionNameSpec;
   }
-  void setCorrectionSpecifier(NestedNameSpecifier* NNS) {
+  void setCorrectionSpecifier(NestedNameSpecifier *NNS) {
     CorrectionNameSpec = NNS;
     ForceSpecifierReplacement = (NNS != nullptr);
   }
@@ -129,9 +129,16 @@ public:
     return Normalized ? NormalizeEditDistance(ED) : ED;
   }
 
+  /// \brief Get the correction declaration found by name lookup (before we
+  /// looked through using shadow declarations and the like).
+  NamedDecl *getFoundDecl() const {
+    return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : nullptr;
+  }
+
   /// \brief Gets the pointer to the declaration of the typo correction
   NamedDecl *getCorrectionDecl() const {
-    return hasCorrectionDecl() ? *(CorrectionDecls.begin()) : nullptr;
+    auto *D = getFoundDecl();
+    return D ? D->getUnderlyingDecl() : nullptr;
   }
   template <class DeclClass>
   DeclClass *getCorrectionDeclAs() const {
@@ -180,8 +187,7 @@ public:
   // Check if this TypoCorrection is a keyword by checking if the first
   // item in CorrectionDecls is NULL.
   bool isKeyword() const {
-    return !CorrectionDecls.empty() &&
-        CorrectionDecls.front() == nullptr;
+    return !CorrectionDecls.empty() && CorrectionDecls.front() == nullptr;
   }
 
   // Check if this TypoCorrection is the given keyword.
diff --git a/include/clang/Serialization/ASTBitCodes.h b/include/clang/Serialization/ASTBitCodes.h
index 4b662077c3c8..16bda6ea03c1 100644
--- a/include/clang/Serialization/ASTBitCodes.h
+++ b/include/clang/Serialization/ASTBitCodes.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_CLANG_SERIALIZATION_ASTBITCODES_H
 #define LLVM_CLANG_SERIALIZATION_ASTBITCODES_H
 
+#include "clang/AST/DeclarationName.h"
 #include "clang/AST/Type.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Bitcode/BitCodes.h"
@@ -61,9 +62,6 @@ namespace clang {
     /// used for the translation unit declaration.
     typedef uint32_t DeclID;
 
-    /// \brief a Decl::Kind/DeclID pair.
-    typedef std::pair<uint32_t, DeclID> KindDeclIDPair;
-
     // FIXME: Turn these into classes so we can have some type safety when
     // we go from local ID to global and vice-versa.
     typedef DeclID LocalDeclID;
@@ -235,7 +233,17 @@ namespace clang {
       /// to create this AST file.
       ///
       /// This block is part of the control block.
-      INPUT_FILES_BLOCK_ID
+      INPUT_FILES_BLOCK_ID,
+
+      /// \brief The block of configuration options, used to check that
+      /// a module is being used in a configuration compatible with the
+      /// configuration in which it was built.
+      ///
+      /// This block is part of the control block.
+      OPTIONS_BLOCK_ID,
+
+      /// \brief A block containing a module file extension.
+      EXTENSION_BLOCK_ID,
     };
 
     /// \brief Record types that occur within the control block.
@@ -246,63 +254,72 @@ namespace clang {
 
       /// \brief Record code for the list of other AST files imported by
       /// this AST file.
-      IMPORTS = 2,
+      IMPORTS,
 
+      /// \brief Record code for the original file that was used to
+      /// generate the AST file, including both its file ID and its
+      /// name.
+      ORIGINAL_FILE,
+      
+      /// \brief The directory that the PCH was originally created in.
+      ORIGINAL_PCH_DIR,
+
+      /// \brief Record code for file ID of the file or buffer that was used to
+      /// generate the AST file.
+      ORIGINAL_FILE_ID,
+
+      /// \brief Offsets into the input-files block where input files
+      /// reside.
+      INPUT_FILE_OFFSETS,
+
+      /// \brief Record code for the module name.
+      MODULE_NAME,
+
+      /// \brief Record code for the module map file that was used to build this
+      /// AST file.
+      MODULE_MAP_FILE,
+
+      /// \brief Record code for the signature that identifiers this AST file.
+      SIGNATURE,
+
+      /// \brief Record code for the module build directory.
+      MODULE_DIRECTORY,
+    };
+
+    /// \brief Record types that occur within the options block inside
+    /// the control block.
+    enum OptionsRecordTypes {
       /// \brief Record code for the language options table.
       ///
       /// The record with this code contains the contents of the
       /// LangOptions structure. We serialize the entire contents of
       /// the structure, and let the reader decide which options are
       /// actually important to check.
-      LANGUAGE_OPTIONS = 3,
+      LANGUAGE_OPTIONS = 1,
 
       /// \brief Record code for the target options table.
-      TARGET_OPTIONS = 4,
-
-      /// \brief Record code for the original file that was used to
-      /// generate the AST file, including both its file ID and its
-      /// name.
-      ORIGINAL_FILE = 5,
-      
-      /// \brief The directory that the PCH was originally created in.
-      ORIGINAL_PCH_DIR = 6,
-
-      /// \brief Record code for file ID of the file or buffer that was used to
-      /// generate the AST file.
-      ORIGINAL_FILE_ID = 7,
-
-      /// \brief Offsets into the input-files block where input files
-      /// reside.
-      INPUT_FILE_OFFSETS = 8,
+      TARGET_OPTIONS,
 
       /// \brief Record code for the diagnostic options table.
-      DIAGNOSTIC_OPTIONS = 9,
+      DIAGNOSTIC_OPTIONS,
 
       /// \brief Record code for the filesystem options table.
-      FILE_SYSTEM_OPTIONS = 10,
+      FILE_SYSTEM_OPTIONS,
 
       /// \brief Record code for the headers search options table.
-      HEADER_SEARCH_OPTIONS = 11,
+      HEADER_SEARCH_OPTIONS,
 
       /// \brief Record code for the preprocessor options table.
-      PREPROCESSOR_OPTIONS = 12,
+      PREPROCESSOR_OPTIONS,
+    };
 
-      /// \brief Record code for the module name.
-      MODULE_NAME = 13,
+    /// \brief Record code for extension blocks.
+    enum ExtensionBlockRecordTypes {
+      /// Metadata describing this particular extension.
+      EXTENSION_METADATA = 1,
 
-      /// \brief Record code for the module map file that was used to build this
-      /// AST file.
-      MODULE_MAP_FILE = 14,
-
-      /// \brief Record code for the signature that identifiers this AST file.
-      SIGNATURE = 15,
-
-      /// \brief Record code for the module build directory.
-      MODULE_DIRECTORY = 16,
-
-      /// \brief Record code for the list of other AST files made available by
-      /// this AST file but not actually used by it.
-      KNOWN_MODULE_FILES = 17,
+      /// The first record ID allocated to the extensions themselves.
+      FIRST_EXTENSION_RECORD_ID = 4
     };
 
     /// \brief Record types that occur within the input-files block
@@ -350,7 +367,7 @@ namespace clang {
 
       /// \brief This is so that older clang versions, before the introduction
       /// of the control block, can read and reject the newer PCH format.
-      /// *DON"T CHANGE THIS NUMBER*.
+      /// *DON'T CHANGE THIS NUMBER*.
       METADATA_OLD_FORMAT = 4,
 
       /// \brief Record code for the identifier table.
@@ -440,10 +457,7 @@ namespace clang {
       /// declarations.
       TU_UPDATE_LEXICAL = 22,
       
-      /// \brief Record code for the array describing the locations (in the
-      /// LOCAL_REDECLARATIONS record) of the redeclaration chains, indexed by
-      /// the first known ID.
-      LOCAL_REDECLARATIONS_MAP = 23,
+      // ID 23 used to be for a list of local redeclarations.
 
       /// \brief Record code for declarations that Sema keeps references of.
       SEMA_DECL_REFS = 24,
@@ -521,13 +535,8 @@ namespace clang {
       /// imported by the AST file.
       IMPORTED_MODULES = 43,
       
-      // ID 40 used to be a table of merged canonical declarations.
-      
-      /// \brief Record code for the array of redeclaration chains.
-      ///
-      /// This array can only be interpreted properly using the local 
-      /// redeclarations map.
-      LOCAL_REDECLARATIONS = 45,
+      // ID 44 used to be a table of merged canonical declarations.
+      // ID 45 used to be a list of declaration IDs of local redeclarations.
       
       /// \brief Record code for the array of Objective-C categories (including
       /// extensions).
@@ -543,7 +552,10 @@ namespace clang {
       /// macro definition.
       MACRO_OFFSET = 47,
 
-      // ID 48 used to be a table of macros.
+      /// \brief A list of "interesting" identifiers. Only used in C++ (where we
+      /// don't normally do lookups into the serialized identifier table). These
+      /// are eagerly deserialized.
+      INTERESTING_IDENTIFIERS = 48,
 
       /// \brief Record code for undefined but used functions and variables that
       /// need a definition in this TU.
@@ -758,26 +770,46 @@ namespace clang {
       PREDEF_TYPE_ARC_UNBRIDGED_CAST = 34,
       /// \brief The pseudo-object placeholder type.
       PREDEF_TYPE_PSEUDO_OBJECT = 35,
-      /// \brief The __va_list_tag placeholder type.
-      PREDEF_TYPE_VA_LIST_TAG = 36,
       /// \brief The placeholder type for builtin functions.
-      PREDEF_TYPE_BUILTIN_FN = 37,
+      PREDEF_TYPE_BUILTIN_FN = 36,
       /// \brief OpenCL 1d image type.
-      PREDEF_TYPE_IMAGE1D_ID    = 38,
+      PREDEF_TYPE_IMAGE1D_ID    = 37,
       /// \brief OpenCL 1d image array type.
-      PREDEF_TYPE_IMAGE1D_ARR_ID = 39,
+      PREDEF_TYPE_IMAGE1D_ARR_ID = 38,
       /// \brief OpenCL 1d image buffer type.
-      PREDEF_TYPE_IMAGE1D_BUFF_ID = 40,
+      PREDEF_TYPE_IMAGE1D_BUFF_ID = 39,
       /// \brief OpenCL 2d image type.
-      PREDEF_TYPE_IMAGE2D_ID    = 41,
+      PREDEF_TYPE_IMAGE2D_ID    = 40,
       /// \brief OpenCL 2d image array type.
-      PREDEF_TYPE_IMAGE2D_ARR_ID = 42,
+      PREDEF_TYPE_IMAGE2D_ARR_ID = 41,
+      /// \brief OpenCL 2d image depth type.
+      PREDEF_TYPE_IMAGE2D_DEP_ID = 42,
+      /// \brief OpenCL 2d image array depth type.
+      PREDEF_TYPE_IMAGE2D_ARR_DEP_ID = 43,
+      /// \brief OpenCL 2d image MSAA type.
+      PREDEF_TYPE_IMAGE2D_MSAA_ID = 44,
+      /// \brief OpenCL 2d image array MSAA type.
+      PREDEF_TYPE_IMAGE2D_ARR_MSAA_ID = 45,
+      /// \brief OpenCL 2d image MSAA depth type.
+      PREDEF_TYPE_IMAGE2D_MSAA_DEP_ID = 46,
+      /// \brief OpenCL 2d image array MSAA depth type.
+      PREDEF_TYPE_IMAGE2D_ARR_MSAA_DEPTH_ID = 47,
       /// \brief OpenCL 3d image type.
-      PREDEF_TYPE_IMAGE3D_ID    = 43,
+      PREDEF_TYPE_IMAGE3D_ID    = 48,
       /// \brief OpenCL event type.
-      PREDEF_TYPE_EVENT_ID      = 44,
+      PREDEF_TYPE_EVENT_ID      = 49,
+      /// \brief OpenCL clk event type.
+      PREDEF_TYPE_CLK_EVENT_ID  = 50,
       /// \brief OpenCL sampler type.
-      PREDEF_TYPE_SAMPLER_ID    = 45
+      PREDEF_TYPE_SAMPLER_ID    = 51,
+      /// \brief OpenCL queue type.
+      PREDEF_TYPE_QUEUE_ID      = 52,
+      /// \brief OpenCL ndrange type.
+      PREDEF_TYPE_NDRANGE_ID    = 53,
+      /// \brief OpenCL reserve_id type.
+      PREDEF_TYPE_RESERVE_ID_ID = 54,
+      /// \brief The placeholder type for OpenMP array section.
+      PREDEF_TYPE_OMP_ARRAY_SECTION = 55
     };
 
     /// \brief The number of predefined type IDs that are reserved for
@@ -913,44 +945,56 @@ namespace clang {
     /// it is created.
     enum PredefinedDeclIDs {
       /// \brief The NULL declaration.
-      PREDEF_DECL_NULL_ID       = 0,
-      
+      PREDEF_DECL_NULL_ID = 0,
+
       /// \brief The translation unit.
       PREDEF_DECL_TRANSLATION_UNIT_ID = 1,
-      
+
       /// \brief The Objective-C 'id' type.
       PREDEF_DECL_OBJC_ID_ID = 2,
-      
+
       /// \brief The Objective-C 'SEL' type.
       PREDEF_DECL_OBJC_SEL_ID = 3,
-      
+
       /// \brief The Objective-C 'Class' type.
       PREDEF_DECL_OBJC_CLASS_ID = 4,
-            
+
       /// \brief The Objective-C 'Protocol' type.
       PREDEF_DECL_OBJC_PROTOCOL_ID = 5,
-      
+
       /// \brief The signed 128-bit integer type.
       PREDEF_DECL_INT_128_ID = 6,
 
       /// \brief The unsigned 128-bit integer type.
       PREDEF_DECL_UNSIGNED_INT_128_ID = 7,
-      
+
       /// \brief The internal 'instancetype' typedef.
       PREDEF_DECL_OBJC_INSTANCETYPE_ID = 8,
 
       /// \brief The internal '__builtin_va_list' typedef.
       PREDEF_DECL_BUILTIN_VA_LIST_ID = 9,
 
+      /// \brief The internal '__va_list_tag' struct, if any.
+      PREDEF_DECL_VA_LIST_TAG = 10,
+
+      /// \brief The internal '__builtin_ms_va_list' typedef.
+      PREDEF_DECL_BUILTIN_MS_VA_LIST_ID = 11,
+
       /// \brief The extern "C" context.
-      PREDEF_DECL_EXTERN_C_CONTEXT_ID = 10,
+      PREDEF_DECL_EXTERN_C_CONTEXT_ID = 12,
+
+      /// \brief The internal '__make_integer_seq' template.
+      PREDEF_DECL_MAKE_INTEGER_SEQ_ID = 13,
     };
 
     /// \brief The number of declaration IDs that are predefined.
     ///
     /// For more information about predefined declarations, see the
     /// \c PredefinedDeclIDs type and the PREDEF_DECL_*_ID constants.
-    const unsigned int NUM_PREDEF_DECL_IDS = 11;
+    const unsigned int NUM_PREDEF_DECL_IDS = 14;
+
+    /// \brief Record code for a list of local redeclarations of a declaration.
+    const unsigned int LOCAL_REDECLARATIONS = 50;
     
     /// \brief Record codes for each kind of declaration.
     ///
@@ -1369,6 +1413,7 @@ namespace clang {
 
       // Microsoft
       EXPR_CXX_PROPERTY_REF_EXPR, // MSPropertyRefExpr
+      EXPR_CXX_PROPERTY_SUBSCRIPT_EXPR, // MSPropertySubscriptExpr
       EXPR_CXX_UUIDOF_EXPR,       // CXXUuidofExpr (of expr).
       EXPR_CXX_UUIDOF_TYPE,       // CXXUuidofExpr (of type).
       STMT_SEH_LEAVE,             // SEHLeaveStmt
@@ -1397,10 +1442,15 @@ namespace clang {
       STMT_OMP_ORDERED_DIRECTIVE,
       STMT_OMP_ATOMIC_DIRECTIVE,
       STMT_OMP_TARGET_DIRECTIVE,
+      STMT_OMP_TARGET_DATA_DIRECTIVE,
       STMT_OMP_TEAMS_DIRECTIVE,
       STMT_OMP_TASKGROUP_DIRECTIVE,
       STMT_OMP_CANCELLATION_POINT_DIRECTIVE,
       STMT_OMP_CANCEL_DIRECTIVE,
+      STMT_OMP_TASKLOOP_DIRECTIVE,
+      STMT_OMP_TASKLOOP_SIMD_DIRECTIVE,
+      STMT_OMP_DISTRIBUTE_DIRECTIVE,
+      EXPR_OMP_ARRAY_SECTION,
 
       // ARC
       EXPR_OBJC_BRIDGED_CAST,     // ObjCBridgedCastExpr
@@ -1484,8 +1534,72 @@ namespace clang {
       }
     };
 
+    /// \brief A key used when looking up entities by \ref DeclarationName.
+    ///
+    /// Different \ref DeclarationNames are mapped to different keys, but the
+    /// same key can occasionally represent multiple names (for names that
+    /// contain types, in particular).
+    class DeclarationNameKey {
+      typedef unsigned NameKind;
+
+      NameKind Kind;
+      uint64_t Data;
+
+    public:
+      DeclarationNameKey() : Kind(), Data() {}
+      DeclarationNameKey(DeclarationName Name);
+
+      DeclarationNameKey(NameKind Kind, uint64_t Data)
+          : Kind(Kind), Data(Data) {}
+
+      NameKind getKind() const { return Kind; }
+
+      IdentifierInfo *getIdentifier() const {
+        assert(Kind == DeclarationName::Identifier ||
+               Kind == DeclarationName::CXXLiteralOperatorName);
+        return (IdentifierInfo *)Data;
+      }
+      Selector getSelector() const {
+        assert(Kind == DeclarationName::ObjCZeroArgSelector ||
+               Kind == DeclarationName::ObjCOneArgSelector ||
+               Kind == DeclarationName::ObjCMultiArgSelector);
+        return Selector(Data);
+      }
+      OverloadedOperatorKind getOperatorKind() const {
+        assert(Kind == DeclarationName::CXXOperatorName);
+        return (OverloadedOperatorKind)Data;
+      }
+
+      /// Compute a fingerprint of this key for use in on-disk hash table.
+      unsigned getHash() const;
+
+      friend bool operator==(const DeclarationNameKey &A,
+                             const DeclarationNameKey &B) {
+        return A.Kind == B.Kind && A.Data == B.Data;
+      }
+    };
+
     /// @}
   }
 } // end namespace clang
 
+namespace llvm {
+  template <> struct DenseMapInfo<clang::serialization::DeclarationNameKey> {
+    static clang::serialization::DeclarationNameKey getEmptyKey() {
+      return clang::serialization::DeclarationNameKey(-1, 1);
+    }
+    static clang::serialization::DeclarationNameKey getTombstoneKey() {
+      return clang::serialization::DeclarationNameKey(-1, 2);
+    }
+    static unsigned
+    getHashValue(const clang::serialization::DeclarationNameKey &Key) {
+      return Key.getHash();
+    }
+    static bool isEqual(const clang::serialization::DeclarationNameKey &L,
+                        const clang::serialization::DeclarationNameKey &R) {
+      return L == R;
+    }
+  };
+}
+
 #endif
diff --git a/include/clang/Serialization/ASTReader.h b/include/clang/Serialization/ASTReader.h
index 840655ea43e3..588a6a978c2d 100644
--- a/include/clang/Serialization/ASTReader.h
+++ b/include/clang/Serialization/ASTReader.h
@@ -30,6 +30,7 @@
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ContinuousRangeMap.h"
 #include "clang/Serialization/Module.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "clang/Serialization/ModuleManager.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
@@ -38,6 +39,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Bitcode/BitstreamReader.h"
@@ -179,7 +181,8 @@ public:
                            unsigned Value) {}
 
   /// This is called for each AST file loaded.
-  virtual void visitModuleFile(StringRef Filename) {}
+  virtual void visitModuleFile(StringRef Filename,
+                               serialization::ModuleKind Kind) {}
 
   /// \brief Returns true if this \c ASTReaderListener wants to receive the
   /// input files of the AST file via \c visitInputFile, false otherwise.
@@ -194,7 +197,7 @@ public:
   ///
   /// \returns true to continue receiving the next input file, false to stop.
   virtual bool visitInputFile(StringRef Filename, bool isSystem,
-                              bool isOverridden) {
+                              bool isOverridden, bool isExplicitModule) {
     return true;
   }
 
@@ -204,6 +207,10 @@ public:
   /// \brief If needsImportVisitation returns \c true, this is called for each
   /// AST file imported by this AST file.
   virtual void visitImport(StringRef Filename) {}
+
+  /// Indicates that a particular module file extension has been read.
+  virtual void readModuleFileExtension(
+                 const ModuleFileExtensionMetadata &Metadata) {}
 };
 
 /// \brief Simple wrapper class for chaining listeners.
@@ -242,9 +249,12 @@ public:
   void ReadCounter(const serialization::ModuleFile &M, unsigned Value) override;
   bool needsInputFileVisitation() override;
   bool needsSystemInputFileVisitation() override;
-  void visitModuleFile(StringRef Filename) override;
+  void visitModuleFile(StringRef Filename,
+                       serialization::ModuleKind Kind) override;
   bool visitInputFile(StringRef Filename, bool isSystem,
-                      bool isOverridden) override;
+                      bool isOverridden, bool isExplicitModule) override;
+  void readModuleFileExtension(
+         const ModuleFileExtensionMetadata &Metadata) override;
 };
 
 /// \brief ASTReaderListener implementation to validate the information of
@@ -280,9 +290,8 @@ class ReadMethodPoolVisitor;
 
 namespace reader {
   class ASTIdentifierLookupTrait;
-  /// \brief The on-disk hash table used for the DeclContext's Name lookup table.
-  typedef llvm::OnDiskIterableChainedHashTable<ASTDeclContextNameLookupTrait>
-    ASTDeclContextNameLookupTable;
+  /// \brief The on-disk hash table(s) used for DeclContext name lookup.
+  struct DeclContextLookupTable;
 }
 
 } // end namespace serialization
@@ -381,6 +390,9 @@ private:
   /// \brief The module manager which manages modules and their dependencies
   ModuleManager ModuleMgr;
 
+  /// A mapping from extension block names to module file extensions.
+  llvm::StringMap<IntrusiveRefCntPtr<ModuleFileExtension>> ModuleFileExtensions;
+
   /// \brief A timer used to track the time spent deserializing.
   std::unique_ptr<llvm::Timer> ReadTimer;
 
@@ -494,20 +506,36 @@ private:
   /// \brief Map from a FileID to the file-level declarations that it contains.
   llvm::DenseMap<FileID, FileDeclsInfo> FileDeclIDs;
 
+  /// \brief An array of lexical contents of a declaration context, as a sequence of
+  /// Decl::Kind, DeclID pairs.
+  typedef ArrayRef<llvm::support::unaligned_uint32_t> LexicalContents;
+
+  /// \brief Map from a DeclContext to its lexical contents.
+  llvm::DenseMap<const DeclContext*, std::pair<ModuleFile*, LexicalContents>>
+      LexicalDecls;
+
+  /// \brief Map from the TU to its lexical contents from each module file.
+  std::vector<std::pair<ModuleFile*, LexicalContents>> TULexicalDecls;
+
+  /// \brief Map from a DeclContext to its lookup tables.
+  llvm::DenseMap<const DeclContext *,
+                 serialization::reader::DeclContextLookupTable> Lookups;
+
   // Updates for visible decls can occur for other contexts than just the
-  // TU, and when we read those update records, the actual context will not
-  // be available yet (unless it's the TU), so have this pending map using the
-  // ID as a key. It will be realized when the context is actually loaded.
-  typedef
-    SmallVector<std::pair<serialization::reader::ASTDeclContextNameLookupTable *,
-                          ModuleFile*>, 1> DeclContextVisibleUpdates;
-  typedef llvm::DenseMap<serialization::DeclID, DeclContextVisibleUpdates>
-      DeclContextVisibleUpdatesPending;
+  // TU, and when we read those update records, the actual context may not
+  // be available yet, so have this pending map using the ID as a key. It
+  // will be realized when the context is actually loaded.
+  struct PendingVisibleUpdate {
+    ModuleFile *Mod;
+    const unsigned char *Data;
+  };
+  typedef SmallVector<PendingVisibleUpdate, 1> DeclContextVisibleUpdates;
 
   /// \brief Updates to the visible declarations of declaration contexts that
   /// haven't been loaded yet.
-  DeclContextVisibleUpdatesPending PendingVisibleUpdates;
-  
+  llvm::DenseMap<serialization::DeclID, DeclContextVisibleUpdates>
+      PendingVisibleUpdates;
+
   /// \brief The set of C++ or Objective-C classes that have forward 
   /// declarations that have not yet been linked to their definitions.
   llvm::SmallPtrSet<Decl *, 4> PendingDefinitions;
@@ -524,11 +552,14 @@ private:
   /// performed deduplication.
   llvm::SetVector<NamedDecl*> PendingMergedDefinitionsToDeduplicate;
 
-  /// \brief Read the records that describe the contents of declcontexts.
-  bool ReadDeclContextStorage(ModuleFile &M,
-                              llvm::BitstreamCursor &Cursor,
-                              const std::pair<uint64_t, uint64_t> &Offsets,
-                              serialization::DeclContextInfo &Info);
+  /// \brief Read the record that describes the lexical contents of a DC.
+  bool ReadLexicalDeclContextStorage(ModuleFile &M,
+                                     llvm::BitstreamCursor &Cursor,
+                                     uint64_t Offset, DeclContext *DC);
+  /// \brief Read the record that describes the visible contents of a DC.
+  bool ReadVisibleDeclContextStorage(ModuleFile &M,
+                                     llvm::BitstreamCursor &Cursor,
+                                     uint64_t Offset, serialization::DeclID ID);
 
   /// \brief A vector containing identifiers that have already been
   /// loaded.
@@ -914,20 +945,10 @@ private:
   /// Objective-C protocols.
   std::deque<Decl *> InterestingDecls;
 
-  /// \brief The set of redeclarable declarations that have been deserialized
-  /// since the last time the declaration chains were linked.
-  llvm::SmallPtrSet<Decl *, 16> RedeclsDeserialized;
-  
   /// \brief The list of redeclaration chains that still need to be 
-  /// reconstructed.
-  ///
-  /// Each element is the canonical declaration of the chain.
-  /// Elements in this vector should be unique; use 
-  /// PendingDeclChainsKnown to ensure uniqueness.
-  SmallVector<Decl *, 16> PendingDeclChains;
-
-  /// \brief Keeps track of the elements added to PendingDeclChains.
-  llvm::SmallSet<Decl *, 16> PendingDeclChainsKnown;
+  /// reconstructed, and the local offset to the corresponding list
+  /// of redeclarations.
+  SmallVector<std::pair<Decl *, uint64_t>, 16> PendingDeclChains;
 
   /// \brief The list of canonical declarations whose redeclaration chains
   /// need to be marked as incomplete once we're done deserializing things.
@@ -972,13 +993,6 @@ private:
   /// module is loaded.
   SmallVector<ObjCInterfaceDecl *, 16> ObjCClassesLoaded;
 
-  /// \brief A mapping from a primary context for a declaration chain to the
-  /// other declarations of that entity that also have name lookup tables.
-  /// Used when we merge together two class definitions that have different
-  /// sets of declared special member functions.
-  llvm::DenseMap<const DeclContext*, SmallVector<const DeclContext*, 2>>
-      MergedLookups;
-
   typedef llvm::DenseMap<Decl *, SmallVector<serialization::DeclID, 2> >
     KeyDeclsMap;
     
@@ -1044,12 +1058,11 @@ private:
     off_t StoredSize;
     time_t StoredTime;
     bool Overridden;
+    bool Transient;
   };
 
   /// \brief Reads the stored information about an input file.
   InputFileInfo readInputFileInfo(ModuleFile &F, unsigned ID);
-  /// \brief A convenience method to read the filename from an input file.
-  std::string getInputFileName(ModuleFile &F, unsigned ID);
 
   /// \brief Retrieve the file entry and 'overridden' bit for an input
   /// file in the given module file.
@@ -1090,6 +1103,10 @@ public:
         Visit(GetExistingDecl(ID));
   }
 
+  /// \brief Get the loaded lookup tables for \p Primary, if any.
+  const serialization::reader::DeclContextLookupTable *
+  getLoadedLookupTables(DeclContext *Primary) const;
+
 private:
   struct ImportedModule {
     ModuleFile *Mod;
@@ -1112,7 +1129,12 @@ private:
                                  SmallVectorImpl<ImportedModule> &Loaded,
                                  const ModuleFile *ImportedBy,
                                  unsigned ClientLoadCapabilities);
+  static ASTReadResult ReadOptionsBlock(
+      llvm::BitstreamCursor &Stream, unsigned ClientLoadCapabilities,
+      bool AllowCompatibleConfigurationMismatch, ASTReaderListener &Listener,
+      std::string &SuggestedPredefines);
   ASTReadResult ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities);
+  ASTReadResult ReadExtensionBlock(ModuleFile &F);
   bool ParseLineTable(ModuleFile &F, const RecordData &Record);
   bool ReadSourceManagerBlock(ModuleFile &F);
   llvm::BitstreamCursor &SLocCursorForID(int ID);
@@ -1163,7 +1185,7 @@ private:
   RecordLocation DeclCursorForID(serialization::DeclID ID,
                                  unsigned &RawLocation);
   void loadDeclUpdateRecords(serialization::DeclID ID, Decl *D);
-  void loadPendingDeclChain(Decl *D);
+  void loadPendingDeclChain(Decl *D, uint64_t LocalOffset);
   void loadObjCCategories(serialization::GlobalDeclID ID, ObjCInterfaceDecl *D,
                           unsigned PreviousGeneration = 0);
 
@@ -1261,9 +1283,12 @@ public:
   /// \param Context the AST context that this precompiled header will be
   /// loaded into.
   ///
-  /// \param PCHContainerOps the PCHContainerOperations to use for loading and
+  /// \param PCHContainerRdr the PCHContainerOperations to use for loading and
   /// creating modules.
   ///
+  /// \param Extensions the list of module file extensions that can be loaded
+  /// from the AST files.
+  ///
   /// \param isysroot If non-NULL, the system include path specified by the
   /// user. This is only used with relocatable PCH files. If non-NULL,
   /// a relocatable PCH file will use the default path "/".
@@ -1290,6 +1315,7 @@ public:
   /// deserializing.
   ASTReader(Preprocessor &PP, ASTContext &Context,
             const PCHContainerReader &PCHContainerRdr,
+            ArrayRef<IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
             StringRef isysroot = "", bool DisableValidation = false,
             bool AllowASTWithCompilerErrors = false,
             bool AllowConfigurationMismatch = false,
@@ -1300,6 +1326,7 @@ public:
 
   SourceManager &getSourceManager() const { return SourceMgr; }
   FileManager &getFileManager() const { return FileMgr; }
+  DiagnosticsEngine &getDiags() const { return Diags; }
 
   /// \brief Flags that indicate what kind of AST loading failures the client
   /// of the AST reader can directly handle.
@@ -1467,6 +1494,7 @@ public:
   static bool
   readASTFileControlBlock(StringRef Filename, FileManager &FileMgr,
                           const PCHContainerReader &PCHContainerRdr,
+                          bool FindModuleFileExtensions,
                           ASTReaderListener &Listener);
 
   /// \brief Determine whether the given AST file is acceptable to load into a
@@ -1684,7 +1712,7 @@ public:
   /// ReadBlockAbbrevs - Enter a subblock of the specified BlockID with the
   /// specified cursor.  Read the abbreviations that are at the top of the block
   /// and then leave the cursor pointing into the block.
-  bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID);
+  static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID);
 
   /// \brief Finds all the visible declarations with a given name.
   /// The current implementation of this method just loads the entire
@@ -1698,16 +1726,17 @@ public:
   /// \param DC The declaration context whose declarations will be
   /// read.
   ///
+  /// \param IsKindWeWant A predicate indicating which declaration kinds
+  /// we are interested in.
+  ///
   /// \param Decls Vector that will contain the declarations loaded
   /// from the external source. The caller is responsible for merging
   /// these declarations with any declarations already stored in the
   /// declaration context.
-  ///
-  /// \returns true if there was an error while reading the
-  /// declarations for this declaration context.
-  ExternalLoadResult FindExternalLexicalDecls(const DeclContext *DC,
-                                bool (*isKindWeWant)(Decl::Kind),
-                                SmallVectorImpl<Decl*> &Decls) override;
+  void
+  FindExternalLexicalDecls(const DeclContext *DC,
+                           llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+                           SmallVectorImpl<Decl *> &Decls) override;
 
   /// \brief Get the decls that are contained in a file in the Offset/Length
   /// range. \p Length can be 0 to indicate a point at \p Offset instead of
@@ -1755,10 +1784,7 @@ public:
   /// declarations with this name are visible from translation unit scope, their
   /// declarations will be deserialized and introduced into the declaration
   /// chain of the identifier.
-  virtual IdentifierInfo *get(const char *NameStart, const char *NameEnd);
-  IdentifierInfo *get(StringRef Name) override {
-    return get(Name.begin(), Name.end());
-  }
+  IdentifierInfo *get(StringRef Name) override;
 
   /// \brief Retrieve an iterator into the set of all identifiers
   /// in all loaded AST files.
@@ -1873,10 +1899,15 @@ public:
   /// Note: overrides method in ExternalASTSource
   Module *getModule(unsigned ID) override;
 
+  /// \brief Retrieve the module file with a given local ID within the specified
+  /// ModuleFile.
+  ModuleFile *getLocalModuleFile(ModuleFile &M, unsigned ID);
+
+  /// \brief Get an ID for the given module file.
+  unsigned getModuleFileID(ModuleFile *M);
+
   /// \brief Return a descriptor for the corresponding module.
   llvm::Optional<ASTSourceDescriptor> getSourceDescriptor(unsigned ID) override;
-  /// \brief Return a descriptor for the module.
-  ASTSourceDescriptor getSourceDescriptor(const Module &M) override;
 
   /// \brief Retrieve a selector from the given module with its local ID
   /// number.
@@ -1921,8 +1952,9 @@ public:
                                 unsigned &Idx);
 
   /// \brief Read a template argument.
-  TemplateArgument ReadTemplateArgument(ModuleFile &F,
-                                        const RecordData &Record,unsigned &Idx);
+  TemplateArgument ReadTemplateArgument(ModuleFile &F, const RecordData &Record,
+                                        unsigned &Idx,
+                                        bool Canonicalize = false);
 
   /// \brief Read a template parameter list.
   TemplateParameterList *ReadTemplateParameterList(ModuleFile &F,
@@ -1930,10 +1962,9 @@ public:
                                                    unsigned &Idx);
 
   /// \brief Read a template argument array.
-  void
-  ReadTemplateArgumentList(SmallVectorImpl<TemplateArgument> &TemplArgs,
-                           ModuleFile &F, const RecordData &Record,
-                           unsigned &Idx);
+  void ReadTemplateArgumentList(SmallVectorImpl<TemplateArgument> &TemplArgs,
+                                ModuleFile &F, const RecordData &Record,
+                                unsigned &Idx, bool Canonicalize = false);
 
   /// \brief Read a UnresolvedSet structure.
   void ReadUnresolvedSet(ModuleFile &F, LazyASTUnresolvedSet &Set,
@@ -2082,12 +2113,8 @@ public:
   SmallVector<std::pair<llvm::BitstreamCursor,
                         serialization::ModuleFile *>, 8> CommentsCursors;
 
-  //RIDErief Loads comments ranges.
+  /// \brief Loads comments ranges.
   void ReadComments() override;
-
-  /// Return all input files for the given module file.
-  void getInputFiles(ModuleFile &F,
-                     SmallVectorImpl<serialization::InputFile> &Files);
 };
 
 /// \brief Helper class that saves the current stream position and
diff --git a/include/clang/Serialization/ASTWriter.h b/include/clang/Serialization/ASTWriter.h
index e830fdcf8f20..ed345472fc7d 100644
--- a/include/clang/Serialization/ASTWriter.h
+++ b/include/clang/Serialization/ASTWriter.h
@@ -58,6 +58,8 @@ class OpaqueValueExpr;
 class OpenCLOptions;
 class ASTReader;
 class Module;
+class ModuleFileExtension;
+class ModuleFileExtensionWriter;
 class PreprocessedEntity;
 class PreprocessingRecord;
 class Preprocessor;
@@ -84,6 +86,7 @@ class ASTWriter : public ASTDeserializationListener,
 public:
   typedef SmallVector<uint64_t, 64> RecordData;
   typedef SmallVectorImpl<uint64_t> RecordDataImpl;
+  typedef ArrayRef<uint64_t> RecordDataRef;
 
   friend class ASTDeclWriter;
   friend class ASTStmtWriter;
@@ -119,6 +122,12 @@ private:
   /// \brief The base directory for any relative paths we emit.
   std::string BaseDirectory;
 
+  /// \brief Indicates whether timestamps should be written to the produced
+  /// module file. This is the case for files implicitly written to the
+  /// module cache, where we need the timestamps to determine if the module
+  /// file is up to date, but not otherwise.
+  bool IncludeTimestamps;
+
   /// \brief Indicates when the AST writing is actively performing
   /// serialization, rather than just queueing updates.
   bool WritingAST;
@@ -369,10 +378,6 @@ private:
   /// coming from another AST file.
   SmallVector<const Decl *, 16> UpdatingVisibleDecls;
 
-  typedef llvm::SmallSetVector<const Decl *, 16> DeclsToRewriteTy;
-  /// \brief Decls that will be replaced in the current dependent AST file.
-  DeclsToRewriteTy DeclsToRewrite;
-
   /// \brief The set of Objective-C class that have categories we
   /// should serialize.
   llvm::SetVector<ObjCInterfaceDecl *> ObjCClassesWithCategories;
@@ -399,6 +404,10 @@ private:
   /// \brief The set of declarations that may have redeclaration chains that
   /// need to be serialized.
   llvm::SmallVector<const Decl *, 16> Redeclarations;
+
+  /// \brief A cache of the first local declaration for "interesting"
+  /// redeclaration chains.
+  llvm::DenseMap<const Decl *, const Decl *> FirstLocalDeclCache;
                                       
   /// \brief Statements that we've encountered while serializing a
   /// declaration or type.
@@ -484,20 +493,23 @@ private:
   /// \brief A mapping from each known submodule to its ID number, which will
   /// be a positive integer.
   llvm::DenseMap<Module *, unsigned> SubmoduleIDs;
-                    
+
+  /// \brief A list of the module file extension writers.
+  std::vector<std::unique_ptr<ModuleFileExtensionWriter>>
+    ModuleFileExtensionWriters;
+
   /// \brief Retrieve or create a submodule ID for this module.
   unsigned getSubmoduleID(Module *Mod);
-                    
+
   /// \brief Write the given subexpression to the bitstream.
   void WriteSubStmt(Stmt *S,
                     llvm::DenseMap<Stmt *, uint64_t> &SubStmtEntries,
                     llvm::DenseSet<Stmt *> &ParentStmts);
 
   void WriteBlockInfoBlock();
-  void WriteControlBlock(Preprocessor &PP, ASTContext &Context,
-                         StringRef isysroot, const std::string &OutputFile);
-  void WriteInputFiles(SourceManager &SourceMgr,
-                       HeaderSearchOptions &HSOpts,
+  uint64_t WriteControlBlock(Preprocessor &PP, ASTContext &Context,
+                             StringRef isysroot, const std::string &OutputFile);
+  void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts,
                        bool Modules);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
                                const Preprocessor &PP);
@@ -519,8 +531,8 @@ private:
   bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
   bool isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC);
 
-  uint32_t GenerateNameLookupTable(const DeclContext *DC,
-                                   llvm::SmallVectorImpl<char> &LookupTable);
+  void GenerateNameLookupTable(const DeclContext *DC,
+                               llvm::SmallVectorImpl<char> &LookupTable);
   uint64_t WriteDeclContextLexicalBlock(ASTContext &Context, DeclContext *DC);
   uint64_t WriteDeclContextVisibleBlock(ASTContext &Context, DeclContext *DC);
   void WriteTypeDeclOffsets();
@@ -537,9 +549,10 @@ private:
   void WriteFPPragmaOptions(const FPOptions &Opts);
   void WriteOpenCLExtensions(Sema &SemaRef);
   void WriteObjCCategories();
-  void WriteRedeclarations();
   void WriteLateParsedTemplates(Sema &SemaRef);
   void WriteOptimizePragmaOptions(Sema &SemaRef);
+  void WriteModuleFileExtension(Sema &SemaRef,
+                                ModuleFileExtensionWriter &Writer);
 
   unsigned DeclParmVarAbbrev;
   unsigned DeclContextLexicalAbbrev;
@@ -562,18 +575,25 @@ private:
   void WriteDecl(ASTContext &Context, Decl *D);
   void AddFunctionDefinition(const FunctionDecl *FD, RecordData &Record);
 
-  void WriteASTCore(Sema &SemaRef,
-                    StringRef isysroot, const std::string &OutputFile,
-                    Module *WritingModule);
+  uint64_t WriteASTCore(Sema &SemaRef,
+                        StringRef isysroot, const std::string &OutputFile,
+                        Module *WritingModule);
 
 public:
   /// \brief Create a new precompiled header writer that outputs to
   /// the given bitstream.
-  ASTWriter(llvm::BitstreamWriter &Stream);
+  ASTWriter(llvm::BitstreamWriter &Stream,
+            ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
+            bool IncludeTimestamps = true);
   ~ASTWriter() override;
 
   const LangOptions &getLangOpts() const;
 
+  /// \brief Get a timestamp for output into the AST file. The actual timestamp
+  /// of the specified file may be ignored if we have been instructed to not
+  /// include timestamps in the output file.
+  time_t getTimestampForOutput(const FileEntry *E) const;
+
   /// \brief Write a precompiled header for the given semantic analysis.
   ///
   /// \param SemaRef a reference to the semantic analysis object that processed
@@ -585,10 +605,12 @@ public:
   /// \param isysroot if non-empty, write a relocatable file whose headers
   /// are relative to the given system root. If we're writing a module, its
   /// build directory will be used in preference to this if both are available.
-  void WriteAST(Sema &SemaRef,
-                const std::string &OutputFile,
-                Module *WritingModule, StringRef isysroot,
-                bool hasErrors = false);
+  ///
+  /// \return the module signature, which eventually will be a hash of
+  /// the module but currently is merely a random 32-bit number.
+  uint64_t WriteAST(Sema &SemaRef, const std::string &OutputFile,
+                    Module *WritingModule, StringRef isysroot,
+                    bool hasErrors = false);
 
   /// \brief Emit a token.
   void AddToken(const Token &Tok, RecordDataImpl &Record);
@@ -665,6 +687,10 @@ public:
                           const ASTTemplateArgumentListInfo *ASTTemplArgList,
                           RecordDataImpl &Record);
 
+  /// \brief Find the first local declaration of a given local redeclarable
+  /// decl.
+  const Decl *getFirstLocalDecl(const Decl *D);
+
   /// \brief Emit a reference to a declaration.
   void AddDeclRef(const Decl *D, RecordDataImpl &Record);
 
@@ -738,27 +764,20 @@ public:
   void AddPath(StringRef Path, RecordDataImpl &Record);
 
   /// \brief Emit the current record with the given path as a blob.
-  void EmitRecordWithPath(unsigned Abbrev, RecordDataImpl &Record,
+  void EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
                           StringRef Path);
 
   /// \brief Add a version tuple to the given record
   void AddVersionTuple(const VersionTuple &Version, RecordDataImpl &Record);
 
-  void RewriteDecl(const Decl *D) {
-    DeclsToRewrite.insert(D);
-  }
-
-  bool isRewritten(const Decl *D) const {
-    return DeclsToRewrite.count(D);
-  }
-
   /// \brief Infer the submodule ID that contains an entity at the given
   /// source location.
   serialization::SubmoduleID inferSubmoduleIDFromLocation(SourceLocation Loc);
 
-  /// \brief Retrieve a submodule ID for this module.
-  /// Returns 0 If no ID has been associated with the module.
-  unsigned getExistingSubmoduleID(Module *Mod) const;
+  /// \brief Retrieve or create a submodule ID for this module, or return 0 if
+  /// the submodule is neither local (a submodle of the currently-written module)
+  /// nor from an imported module.
+  unsigned getLocalOrImportedSubmoduleID(Module *Mod);
 
   /// \brief Note that the identifier II occurs at the given offset
   /// within the identifier table.
@@ -830,6 +849,7 @@ public:
   unsigned getExprImplicitCastAbbrev() const { return ExprImplicitCastAbbrev; }
 
   bool hasChain() const { return Chain; }
+  ASTReader *getChain() const { return Chain; }
 
   // ASTDeserializationListener implementation
   void ReaderInitialized(ASTReader *Reader) override;
@@ -845,12 +865,6 @@ public:
   void CompletedTagDefinition(const TagDecl *D) override;
   void AddedVisibleDecl(const DeclContext *DC, const Decl *D) override;
   void AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) override;
-  void AddedCXXTemplateSpecialization(const ClassTemplateDecl *TD,
-                             const ClassTemplateSpecializationDecl *D) override;
-  void AddedCXXTemplateSpecialization(const VarTemplateDecl *TD,
-                               const VarTemplateSpecializationDecl *D) override;
-  void AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD,
-                                      const FunctionDecl *D) override;
   void ResolvedExceptionSpec(const FunctionDecl *FD) override;
   void DeducedReturnType(const FunctionDecl *FD, QualType ReturnType) override;
   void ResolvedOperatorDelete(const CXXDestructorDecl *DD,
@@ -860,9 +874,6 @@ public:
   void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
-  void AddedObjCPropertyInClassExtension(const ObjCPropertyDecl *Prop,
-                                    const ObjCPropertyDecl *OrigProp,
-                                    const ObjCCategoryDecl *ClassExt) override;
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
   void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
@@ -889,10 +900,13 @@ protected:
   SmallVectorImpl<char> &getPCH() const { return Buffer->Data; }
 
 public:
-  PCHGenerator(const Preprocessor &PP, StringRef OutputFile,
-               clang::Module *Module, StringRef isysroot,
-               std::shared_ptr<PCHBuffer> Buffer,
-               bool AllowASTWithErrors = false);
+  PCHGenerator(
+    const Preprocessor &PP, StringRef OutputFile,
+    clang::Module *Module, StringRef isysroot,
+    std::shared_ptr<PCHBuffer> Buffer,
+    ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
+    bool AllowASTWithErrors = false,
+    bool IncludeTimestamps = true);
   ~PCHGenerator() override;
   void InitializeSema(Sema &S) override { SemaPtr = &S; }
   void HandleTranslationUnit(ASTContext &Ctx) override;
diff --git a/include/clang/Serialization/GlobalModuleIndex.h b/include/clang/Serialization/GlobalModuleIndex.h
index ba4f7e216aec..0f14eca0fd86 100644
--- a/include/clang/Serialization/GlobalModuleIndex.h
+++ b/include/clang/Serialization/GlobalModuleIndex.h
@@ -36,6 +36,7 @@ class FileEntry;
 class FileManager;
 class IdentifierIterator;
 class PCHContainerOperations;
+class PCHContainerReader;
 
 namespace serialization {
   class ModuleFile;
@@ -193,7 +194,7 @@ public:
   /// \brief Write a global index into the given
   ///
   /// \param FileMgr The file manager to use to load module files.
-  /// \param PCHContainerOps - The PCHContainerOperations to use for loading and
+  /// \param PCHContainerRdr - The PCHContainerOperations to use for loading and
   /// creating modules.
   /// \param Path The path to the directory containing module files, into
   /// which the global index will be written.
diff --git a/include/clang/Serialization/Module.h b/include/clang/Serialization/Module.h
index c98ced41aae5..d6d16a023044 100644
--- a/include/clang/Serialization/Module.h
+++ b/include/clang/Serialization/Module.h
@@ -15,9 +15,11 @@
 #ifndef LLVM_CLANG_SERIALIZATION_MODULE_H
 #define LLVM_CLANG_SERIALIZATION_MODULE_H
 
+#include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Serialization/ASTBitCodes.h"
 #include "clang/Serialization/ContinuousRangeMap.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/Endian.h"
@@ -31,7 +33,6 @@ template <typename Info> class OnDiskIterableChainedHashTable;
 
 namespace clang {
 
-class FileEntry;
 class DeclContext;
 class Module;
 
@@ -50,17 +51,6 @@ enum ModuleKind {
   MK_MainFile        ///< File is a PCH file treated as the actual main file.
 };
 
-/// \brief Information about the contents of a DeclContext.
-struct DeclContextInfo {
-  DeclContextInfo()
-    : NameLookupTableData(), LexicalDecls(), NumLexicalDecls() {}
-
-  llvm::OnDiskIterableChainedHashTable<reader::ASTDeclContextNameLookupTrait>
-    *NameLookupTableData; // an ASTDeclContextNameLookupTable.
-  const KindDeclIDPair *LexicalDecls;
-  unsigned NumLexicalDecls;
-};
-
 /// \brief The input file that has been loaded from this AST file, along with
 /// bools indicating whether this was an overridden buffer or if it was
 /// out-of-date or not-found.
@@ -155,6 +145,9 @@ public:
   /// \brief Whether this precompiled header is a relocatable PCH file.
   bool RelocatablePCH;
 
+  /// \brief Whether timestamps are included in this module file.
+  bool HasTimestamps;
+
   /// \brief The file entry for the module file.
   const FileEntry *File;
 
@@ -202,6 +195,10 @@ public:
   /// \brief The first source location in this module.
   SourceLocation FirstLoc;
 
+  /// The list of extension readers that are attached to this module
+  /// file.
+  std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
+
   // === Input Files ===
   /// \brief The cursor to the start of the input-files block.
   llvm::BitstreamCursor InputFilesCursor;
@@ -270,6 +267,10 @@ public:
   /// IdentifierHashTable.
   void *IdentifierLookupTable;
 
+  /// \brief Offsets of identifiers that we're going to preload within
+  /// IdentifierTableData.
+  std::vector<unsigned> PreloadIdentifierOffsets;
+
   // === Macros ===
 
   /// \brief The cursor to the start of the preprocessor block, which stores
@@ -412,28 +413,10 @@ public:
   /// indexed by the C++ ctor initializer list ID minus 1.
   const uint32_t *CXXCtorInitializersOffsets;
 
-  typedef llvm::DenseMap<const DeclContext *, DeclContextInfo>
-  DeclContextInfosMap;
-
-  /// \brief Information about the lexical and visible declarations
-  /// for each DeclContext.
-  DeclContextInfosMap DeclContextInfos;
-
   /// \brief Array of file-level DeclIDs sorted by file.
   const serialization::DeclID *FileSortedDecls;
   unsigned NumFileSortedDecls;
 
-  /// \brief Array of redeclaration chain location information within this 
-  /// module file, sorted by the first declaration ID.
-  const serialization::LocalRedeclarationsInfo *RedeclarationsMap;
-
-  /// \brief The number of redeclaration info entries in RedeclarationsMap.
-  unsigned LocalNumRedeclarationsInMap;
-  
-  /// \brief The redeclaration chains for declarations local to this
-  /// module file.
-  SmallVector<uint64_t, 1> RedeclarationChains;
-  
   /// \brief Array of category list location information within this 
   /// module file, sorted by the definition ID.
   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap;
@@ -476,6 +459,11 @@ public:
   /// any point during translation.
   bool isDirectlyImported() const { return DirectlyImported; }
 
+  /// \brief Is this a module file for a module (rather than a PCH or similar).
+  bool isModule() const {
+    return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule;
+  }
+
   /// \brief Dump debugging output for this module.
   void dump();
 };
diff --git a/include/clang/Serialization/ModuleFileExtension.h b/include/clang/Serialization/ModuleFileExtension.h
new file mode 100644
index 000000000000..ba2e2fd0d9f1
--- /dev/null
+++ b/include/clang/Serialization/ModuleFileExtension.h
@@ -0,0 +1,149 @@
+//===-- ModuleFileExtension.h - Module File Extensions ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H
+#define LLVM_CLANG_SERIALIZATION_MODULEFILEEXTENSION_H
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include <memory>
+#include <string>
+
+namespace llvm {
+class BitstreamCursor;
+class BitstreamWriter;
+class hash_code;
+class raw_ostream;
+}
+
+namespace clang {
+
+class ASTReader;
+class ASTWriter;
+class Sema;
+  
+namespace serialization {
+  class ModuleFile;
+} // end namespace serialization
+
+/// Metadata for a module file extension.
+struct ModuleFileExtensionMetadata {
+  /// The name used to identify this particular extension block within
+  /// the resulting module file. It should be unique to the particular
+  /// extension, because this name will be used to match the name of
+  /// an extension block to the appropriate reader.
+  std::string BlockName;
+
+  /// The major version of the extension data.
+  unsigned MajorVersion;
+
+  /// The minor version of the extension data.
+  unsigned MinorVersion;
+
+  /// A string containing additional user information that will be
+  /// stored with the metadata.
+  std::string UserInfo;
+};
+
+class ModuleFileExtensionReader;
+class ModuleFileExtensionWriter;
+
+/// An abstract superclass that describes a custom extension to the
+/// module/precompiled header file format.
+///
+/// A module file extension can introduce additional information into
+/// compiled module files (.pcm) and precompiled headers (.pch) via a
+/// custom writer that can then be accessed via a custom reader when
+/// the module file or precompiled header is loaded.
+class ModuleFileExtension : public llvm::RefCountedBase<ModuleFileExtension> {
+public:
+  virtual ~ModuleFileExtension();
+
+  /// Retrieves the metadata for this module file extension.
+  virtual ModuleFileExtensionMetadata getExtensionMetadata() const = 0;
+
+  /// Hash information about the presence of this extension into the
+  /// module hash code.
+  ///
+  /// The module hash code is used to distinguish different variants
+  /// of a module that are incompatible. If the presence, absence, or
+  /// version of the module file extension should force the creation
+  /// of a separate set of module files, override this method to
+  /// combine that distinguishing information into the module hash
+  /// code.
+  ///
+  /// The default implementation of this function simply returns the
+  /// hash code as given, so the presence/absence of this extension
+  /// does not distinguish module files.
+  virtual llvm::hash_code hashExtension(llvm::hash_code c) const;
+
+  /// Create a new module file extension writer, which will be
+  /// responsible for writing the extension contents into a particular
+  /// module file.
+  virtual std::unique_ptr<ModuleFileExtensionWriter>
+  createExtensionWriter(ASTWriter &Writer) = 0;
+
+  /// Create a new module file extension reader, given the
+  /// metadata read from the block and the cursor into the extension
+  /// block.
+  ///
+  /// May return null to indicate that an extension block with the
+  /// given metadata cannot be read.
+  virtual std::unique_ptr<ModuleFileExtensionReader>
+  createExtensionReader(const ModuleFileExtensionMetadata &Metadata,
+                        ASTReader &Reader, serialization::ModuleFile &Mod,
+                        const llvm::BitstreamCursor &Stream) = 0;
+};
+
+/// Abstract base class that writes a module file extension block into
+/// a module file.
+class ModuleFileExtensionWriter {
+  ModuleFileExtension *Extension;
+
+protected:
+  ModuleFileExtensionWriter(ModuleFileExtension *Extension)
+    : Extension(Extension) { }
+
+public:
+  virtual ~ModuleFileExtensionWriter();
+
+  /// Retrieve the module file extension with which this writer is
+  /// associated.
+  ModuleFileExtension *getExtension() const { return Extension; }
+
+  /// Write the contents of the extension block into the given bitstream.
+  ///
+  /// Responsible for writing the contents of the extension into the
+  /// given stream. All of the contents should be written into custom
+  /// records with IDs >= FIRST_EXTENSION_RECORD_ID.
+  virtual void writeExtensionContents(Sema &SemaRef,
+                                      llvm::BitstreamWriter &Stream) = 0;
+};
+
+/// Abstract base class that reads a module file extension block from
+/// a module file.
+///
+/// Subclasses 
+class ModuleFileExtensionReader {
+  ModuleFileExtension *Extension;
+
+protected:
+  ModuleFileExtensionReader(ModuleFileExtension *Extension)
+    : Extension(Extension) { }
+
+public:
+  /// Retrieve the module file extension with which this reader is
+  /// associated.
+  ModuleFileExtension *getExtension() const { return Extension; }
+
+  virtual ~ModuleFileExtensionReader();
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_FRONTEND_MODULEFILEEXTENSION_H
diff --git a/include/clang/Serialization/ModuleManager.h b/include/clang/Serialization/ModuleManager.h
index ab39aefa940b..08e7d4049e55 100644
--- a/include/clang/Serialization/ModuleManager.h
+++ b/include/clang/Serialization/ModuleManager.h
@@ -30,23 +30,22 @@ namespace serialization {
 
 /// \brief Manages the set of modules loaded by an AST reader.
 class ModuleManager {
-  /// \brief The chain of AST files. The first entry is the one named by the
-  /// user, the last one is the one that doesn't depend on anything further.
+  /// \brief The chain of AST files, in the order in which we started to load
+  /// them (this order isn't really useful for anything).
   SmallVector<ModuleFile *, 2> Chain;
 
+  /// \brief The chain of non-module PCH files. The first entry is the one named
+  /// by the user, the last one is the one that doesn't depend on anything
+  /// further.
+  SmallVector<ModuleFile *, 2> PCHChain;
+
   // \brief The roots of the dependency DAG of AST files. This is used
   // to implement short-circuiting logic when running DFS over the dependencies.
   SmallVector<ModuleFile *, 2> Roots;
-  
+
   /// \brief All loaded modules, indexed by name.
   llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
 
-  typedef llvm::SetVector<const FileEntry *> AdditionalKnownModuleFileSet;
-
-  /// \brief Additional module files that are known but not loaded. Tracked
-  /// here so that we can re-export them if necessary.
-  AdditionalKnownModuleFileSet AdditionalKnownModuleFiles;
-
   /// \brief FileManager that handles translating between filenames and
   /// FileEntry *.
   FileManager &FileMgr;
@@ -121,24 +120,26 @@ public:
                          const PCHContainerReader &PCHContainerRdr);
   ~ModuleManager();
 
-  /// \brief Forward iterator to traverse all loaded modules.  This is reverse
-  /// source-order.
+  /// \brief Forward iterator to traverse all loaded modules.
   ModuleIterator begin() { return Chain.begin(); }
   /// \brief Forward iterator end-point to traverse all loaded modules
   ModuleIterator end() { return Chain.end(); }
   
-  /// \brief Const forward iterator to traverse all loaded modules.  This is 
-  /// in reverse source-order.
+  /// \brief Const forward iterator to traverse all loaded modules.
   ModuleConstIterator begin() const { return Chain.begin(); }
   /// \brief Const forward iterator end-point to traverse all loaded modules
   ModuleConstIterator end() const { return Chain.end(); }
   
-  /// \brief Reverse iterator to traverse all loaded modules.  This is in 
-  /// source order.
+  /// \brief Reverse iterator to traverse all loaded modules.
   ModuleReverseIterator rbegin() { return Chain.rbegin(); }
   /// \brief Reverse iterator end-point to traverse all loaded modules.
   ModuleReverseIterator rend() { return Chain.rend(); }
-  
+
+  /// \brief A range covering the PCH and preamble module files loaded.
+  llvm::iterator_range<ModuleConstIterator> pch_modules() const {
+    return llvm::make_range(PCHChain.begin(), PCHChain.end());
+  }
+
   /// \brief Returns the primary module associated with the manager, that is,
   /// the first module loaded
   ModuleFile &getPrimaryModule() { return *Chain[0]; }
@@ -235,19 +236,6 @@ public:
   /// has been "accepted", and will not (can not) be unloaded.
   void moduleFileAccepted(ModuleFile *MF);
 
-  /// \brief Notification from the frontend that the given module file is
-  /// part of this compilation (even if not imported) and, if this compilation
-  /// is exported, should be made available to importers of it.
-  bool addKnownModuleFile(StringRef FileName);
-
-  /// \brief Get a list of additional module files that are not currently
-  /// loaded but are considered to be part of the current compilation.
-  llvm::iterator_range<AdditionalKnownModuleFileSet::const_iterator>
-  getAdditionalKnownModuleFiles() {
-    return llvm::make_range(AdditionalKnownModuleFiles.begin(),
-                            AdditionalKnownModuleFiles.end());
-  }
-
   /// \brief Visit each of the modules.
   ///
   /// This routine visits each of the modules, starting with the
@@ -259,51 +247,17 @@ public:
   /// operations that can find data in any of the loaded modules.
   ///
   /// \param Visitor A visitor function that will be invoked with each
-  /// module and the given user data pointer. The return value must be
-  /// convertible to bool; when false, the visitation continues to
-  /// modules that the current module depends on. When true, the
-  /// visitation skips any modules that the current module depends on.
-  ///
-  /// \param UserData User data associated with the visitor object, which
-  /// will be passed along to the visitor.
+  /// module. The return value must be convertible to bool; when false, the
+  /// visitation continues to modules that the current module depends on. When
+  /// true, the visitation skips any modules that the current module depends on.
   ///
   /// \param ModuleFilesHit If non-NULL, contains the set of module files
   /// that we know we need to visit because the global module index told us to.
   /// Any module that is known to both the global module index and the module
   /// manager that is *not* in this set can be skipped.
-  void visit(bool (*Visitor)(ModuleFile &M, void *UserData), void *UserData,
+  void visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
              llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit = nullptr);
 
-  /// \brief Control DFS behavior during preorder visitation.
-  enum DFSPreorderControl {
-    Continue,    /// Continue visiting all nodes.
-    Abort,       /// Stop the visitation immediately.
-    SkipImports, /// Do not visit imports of the current node.
-  };
-
-  /// \brief Visit each of the modules with a depth-first traversal.
-  ///
-  /// This routine visits each of the modules known to the module
-  /// manager using a depth-first search, starting with the first
-  /// loaded module. The traversal invokes one callback before
-  /// traversing the imports (preorder traversal) and one after
-  /// traversing the imports (postorder traversal).
-  ///
-  /// \param PreorderVisitor A visitor function that will be invoked with each
-  /// module before visiting its imports. The visitor can control how to
-  /// continue the visitation through its return value.
-  ///
-  /// \param PostorderVisitor A visitor function taht will be invoked with each
-  /// module after visiting its imports. The visitor may return true at any time
-  /// to abort the depth-first visitation.
-  ///
-  /// \param UserData User data ssociated with the visitor object,
-  /// which will be passed along to the user.
-  void visitDepthFirst(DFSPreorderControl (*PreorderVisitor)(ModuleFile &M,
-                                                             void *UserData),
-                       bool (*PostorderVisitor)(ModuleFile &M, void *UserData),
-                       void *UserData);
-
   /// \brief Attempt to resolve the given module file name to a file entry.
   ///
   /// \param FileName The name of the module file.
diff --git a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
index f02e48a4418b..3959de24d431 100644
--- a/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
+++ b/include/clang/StaticAnalyzer/Core/AnalyzerOptions.h
@@ -253,9 +253,18 @@ private:
   /// \sa getMaxTimesInlineLarge
   Optional<unsigned> MaxTimesInlineLarge;
 
+  /// \sa getMinCFGSizeTreatFunctionsAsLarge
+  Optional<unsigned> MinCFGSizeTreatFunctionsAsLarge;
+
   /// \sa getMaxNodesPerTopLevelFunction
   Optional<unsigned> MaxNodesPerTopLevelFunction;
 
+  /// \sa shouldInlineLambdas
+  Optional<bool> InlineLambdas;
+
+  /// \sa shouldWidenLoops
+  Optional<bool> WidenLoops;
+
   /// A helper function that retrieves option for a given full-qualified
   /// checker name.
   /// Options for checkers can be specified via 'analyzer-config' command-line
@@ -502,6 +511,13 @@ public:
   /// This is controlled by the 'max-times-inline-large' config option.
   unsigned getMaxTimesInlineLarge();
 
+  /// Returns the number of basic blocks a function needs to have to be
+  /// considered large for the 'max-times-inline-large' config option.
+  ///
+  /// This is controlled by the 'min-cfg-size-treat-functions-as-large' config
+  /// option.
+  unsigned getMinCFGSizeTreatFunctionsAsLarge();
+
   /// Returns the maximum number of nodes the analyzer can generate while
   /// exploring a top level function (for each exploded graph).
   /// 150000 is default; 0 means no limit.
@@ -509,6 +525,14 @@ public:
   /// This is controlled by the 'max-nodes' config option.
   unsigned getMaxNodesPerTopLevelFunction();
 
+  /// Returns true if lambdas should be inlined. Otherwise a sink node will be
+  /// generated each time a LambdaExpr is visited.
+  bool shouldInlineLambdas();
+
+  /// Returns true if the analysis should try to widen loops.
+  /// This is controlled by the 'widen-loops' config option.
+  bool shouldWidenLoops();
+
 public:
   AnalyzerOptions() :
     AnalysisStoreOpt(RegionStoreModel),
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h
index 83b05ecc52a5..197d27a2f37e 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitor.h
@@ -37,6 +37,9 @@ class PathDiagnosticPiece;
 /// will have to provide your own implementation.)
 class BugReporterVisitor : public llvm::FoldingSetNode {
 public:
+  BugReporterVisitor() = default;
+  BugReporterVisitor(const BugReporterVisitor &) = default;
+  BugReporterVisitor(BugReporterVisitor &&) {}
   virtual ~BugReporterVisitor();
 
   /// \brief Returns a copy of this BugReporter.
@@ -92,9 +95,8 @@ class BugReporterVisitorImpl : public BugReporterVisitor {
   }
 };
 
-class FindLastStoreBRVisitor
-  : public BugReporterVisitorImpl<FindLastStoreBRVisitor>
-{
+class FindLastStoreBRVisitor final
+    : public BugReporterVisitorImpl<FindLastStoreBRVisitor> {
   const MemRegion *R;
   SVal V;
   bool Satisfied;
@@ -124,9 +126,8 @@ public:
                                  BugReport &BR) override;
 };
 
-class TrackConstraintBRVisitor
-  : public BugReporterVisitorImpl<TrackConstraintBRVisitor>
-{
+class TrackConstraintBRVisitor final
+    : public BugReporterVisitorImpl<TrackConstraintBRVisitor> {
   DefinedSVal Constraint;
   bool Assumption;
   bool IsSatisfied;
@@ -161,8 +162,8 @@ private:
 
 /// \class NilReceiverBRVisitor
 /// \brief Prints path notes when a message is sent to a nil receiver.
-class NilReceiverBRVisitor
-  : public BugReporterVisitorImpl<NilReceiverBRVisitor> {
+class NilReceiverBRVisitor final
+    : public BugReporterVisitorImpl<NilReceiverBRVisitor> {
 public:
 
   void Profile(llvm::FoldingSetNodeID &ID) const override {
@@ -181,7 +182,8 @@ public:
 };
 
 /// Visitor that tries to report interesting diagnostics from conditions.
-class ConditionBRVisitor : public BugReporterVisitorImpl<ConditionBRVisitor> {
+class ConditionBRVisitor final
+    : public BugReporterVisitorImpl<ConditionBRVisitor> {
 public:
   void Profile(llvm::FoldingSetNodeID &ID) const override {
     static int x = 0;
@@ -247,8 +249,8 @@ public:
 /// \brief Suppress reports that might lead to known false positives.
 ///
 /// Currently this suppresses reports based on locations of bugs.
-class LikelyFalsePositiveSuppressionBRVisitor
-  : public BugReporterVisitorImpl<LikelyFalsePositiveSuppressionBRVisitor> {
+class LikelyFalsePositiveSuppressionBRVisitor final
+    : public BugReporterVisitorImpl<LikelyFalsePositiveSuppressionBRVisitor> {
 public:
   static void *getTag() {
     static int Tag = 0;
@@ -276,8 +278,8 @@ public:
 ///
 /// As a result, BugReporter will not prune the path through the function even
 /// if the region's contents are not modified/accessed by the call.
-class UndefOrNullArgVisitor
-  : public BugReporterVisitorImpl<UndefOrNullArgVisitor> {
+class UndefOrNullArgVisitor final
+    : public BugReporterVisitorImpl<UndefOrNullArgVisitor> {
 
   /// The interesting memory region this visitor is tracking.
   const MemRegion *R;
@@ -297,9 +299,8 @@ public:
                                  BugReport &BR) override;
 };
 
-class SuppressInlineDefensiveChecksVisitor
-: public BugReporterVisitorImpl<SuppressInlineDefensiveChecksVisitor>
-{
+class SuppressInlineDefensiveChecksVisitor final
+    : public BugReporterVisitorImpl<SuppressInlineDefensiveChecksVisitor> {
   /// The symbolic value for which we are tracking constraints.
   /// This value is constrained to null in the end of path.
   DefinedSVal V;
diff --git a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
index 941d524079b0..35421f9455ea 100644
--- a/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
+++ b/include/clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h
@@ -122,7 +122,7 @@ class PathDiagnosticRange : public SourceRange {
 public:
   bool isPoint;
 
-  PathDiagnosticRange(const SourceRange &R, bool isP = false)
+  PathDiagnosticRange(SourceRange R, bool isP = false)
     : SourceRange(R), isPoint(isP) {}
 
   PathDiagnosticRange() : isPoint(false) {}
@@ -422,7 +422,6 @@ class PathPieces : public std::list<IntrusiveRefCntPtr<PathDiagnosticPiece> > {
   void flattenTo(PathPieces &Primary, PathPieces &Current,
                  bool ShouldFlattenMacros) const;
 public:
-  ~PathPieces();
 
   PathPieces flatten(bool ShouldFlattenMacros) const {
     PathPieces Result;
diff --git a/include/clang/StaticAnalyzer/Core/Checker.h b/include/clang/StaticAnalyzer/Core/Checker.h
index 099d76311fc9..1410af156815 100644
--- a/include/clang/StaticAnalyzer/Core/Checker.h
+++ b/include/clang/StaticAnalyzer/Core/Checker.h
@@ -131,6 +131,21 @@ public:
   }
 };
 
+class ObjCMessageNil {
+  template <typename CHECKER>
+  static void _checkObjCMessage(void *checker, const ObjCMethodCall &msg,
+                                CheckerContext &C) {
+    ((const CHECKER *)checker)->checkObjCMessageNil(msg, C);
+  }
+
+public:
+  template <typename CHECKER>
+  static void _register(CHECKER *checker, CheckerManager &mgr) {
+    mgr._registerForObjCMessageNil(
+     CheckerManager::CheckObjCMessageFunc(checker, _checkObjCMessage<CHECKER>));
+  }
+};
+
 class PostObjCMessage {
   template <typename CHECKER>
   static void _checkObjCMessage(void *checker, const ObjCMethodCall &msg,
@@ -514,6 +529,10 @@ struct ImplicitNullDerefEvent {
   bool IsLoad;
   ExplodedNode *SinkNode;
   BugReporter *BR;
+  // When true, the dereference is in the source code directly. When false, the
+  // dereference might happen later (for example pointer passed to a parameter
+  // that is marked with nonnull attribute.)
+  bool IsDirectDereference;
 };
 
 /// \brief A helper class which wraps a boolean value set to false by default.
diff --git a/include/clang/StaticAnalyzer/Core/CheckerManager.h b/include/clang/StaticAnalyzer/Core/CheckerManager.h
index 8a1a82b342ac..bc9af496b054 100644
--- a/include/clang/StaticAnalyzer/Core/CheckerManager.h
+++ b/include/clang/StaticAnalyzer/Core/CheckerManager.h
@@ -89,11 +89,16 @@ class CheckName {
   explicit CheckName(StringRef Name) : Name(Name) {}
 
 public:
-  CheckName() {}
-  CheckName(const CheckName &Other) : Name(Other.Name) {}
+  CheckName() = default;
   StringRef getName() const { return Name; }
 };
 
+enum class ObjCMessageVisitKind {
+  Pre,
+  Post,
+  MessageNil
+};
+
 class CheckerManager {
   const LangOptions LangOpts;
   AnalyzerOptionsRef AOptions;
@@ -212,7 +217,7 @@ public:
                                     const ExplodedNodeSet &Src,
                                     const ObjCMethodCall &msg,
                                     ExprEngine &Eng) {
-    runCheckersForObjCMessage(/*isPreVisit=*/true, Dst, Src, msg, Eng);
+    runCheckersForObjCMessage(ObjCMessageVisitKind::Pre, Dst, Src, msg, Eng);
   }
 
   /// \brief Run checkers for post-visiting obj-c messages.
@@ -221,12 +226,22 @@ public:
                                      const ObjCMethodCall &msg,
                                      ExprEngine &Eng,
                                      bool wasInlined = false) {
-    runCheckersForObjCMessage(/*isPreVisit=*/false, Dst, Src, msg, Eng,
+    runCheckersForObjCMessage(ObjCMessageVisitKind::Post, Dst, Src, msg, Eng,
                               wasInlined);
   }
 
+  /// \brief Run checkers for visiting an obj-c message to nil.
+  void runCheckersForObjCMessageNil(ExplodedNodeSet &Dst,
+                                    const ExplodedNodeSet &Src,
+                                    const ObjCMethodCall &msg,
+                                    ExprEngine &Eng) {
+    runCheckersForObjCMessage(ObjCMessageVisitKind::MessageNil, Dst, Src, msg,
+                              Eng);
+  }
+
+
   /// \brief Run checkers for visiting obj-c messages.
-  void runCheckersForObjCMessage(bool isPreVisit,
+  void runCheckersForObjCMessage(ObjCMessageVisitKind visitKind,
                                  ExplodedNodeSet &Dst,
                                  const ExplodedNodeSet &Src,
                                  const ObjCMethodCall &msg, ExprEngine &Eng,
@@ -458,6 +473,8 @@ public:
   void _registerForPreObjCMessage(CheckObjCMessageFunc checkfn);
   void _registerForPostObjCMessage(CheckObjCMessageFunc checkfn);
 
+  void _registerForObjCMessageNil(CheckObjCMessageFunc checkfn);
+
   void _registerForPreCall(CheckCallFunc checkfn);
   void _registerForPostCall(CheckCallFunc checkfn);
 
@@ -558,8 +575,14 @@ private:
   const CachedStmtCheckers &getCachedStmtCheckersFor(const Stmt *S,
                                                      bool isPreVisit);
 
+  /// Returns the checkers that have registered for callbacks of the
+  /// given \p Kind.
+  const std::vector<CheckObjCMessageFunc> &
+  getObjCMessageCheckers(ObjCMessageVisitKind Kind);
+
   std::vector<CheckObjCMessageFunc> PreObjCMessageCheckers;
   std::vector<CheckObjCMessageFunc> PostObjCMessageCheckers;
+  std::vector<CheckObjCMessageFunc> ObjCMessageNilCheckers;
 
   std::vector<CheckCallFunc> PreCallCheckers;
   std::vector<CheckCallFunc> PostCallCheckers;
diff --git a/include/clang/StaticAnalyzer/Core/IssueHash.h b/include/clang/StaticAnalyzer/Core/IssueHash.h
new file mode 100644
index 000000000000..b3c4f1465594
--- /dev/null
+++ b/include/clang/StaticAnalyzer/Core/IssueHash.h
@@ -0,0 +1,51 @@
+//===---------- IssueHash.h - Generate identification hashes ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H
+#define LLVM_CLANG_STATICANALYZER_CORE_ISSUE_HASH_H
+
+#include "llvm/ADT/SmallString.h"
+
+namespace clang {
+class Decl;
+class SourceManager;
+class FullSourceLoc;
+class LangOptions;
+
+/// \brief Get an MD5 hash to help identify bugs.
+///
+/// This function returns a hash that helps identify bugs within a source file.
+/// This identification can be utilized to diff diagnostic results on different
+/// snapshots of a projects, or maintain a database of suppressed diagnotics.
+///
+/// The hash contains the normalized text of the location associated with the
+/// diagnostic. Normalization means removing the whitespaces. The associated
+/// location is the either the last location of a diagnostic path or a uniqueing
+/// location. The bugtype and the name of the checker is also part of the hash.
+/// The last component is the string representation of the enclosing declaration
+/// of the associated location.
+/// 
+/// In case a new hash is introduced, the old one should still be maintained for
+/// a while. One should not introduce a new hash for every change, it is
+/// possible to introduce experimental hashes that may change in the future.
+/// Such hashes should be marked as experimental using a comment in the plist
+/// files.
+llvm::SmallString<32> GetIssueHash(const SourceManager &SM,
+                                   FullSourceLoc &IssueLoc,
+                                   llvm::StringRef CheckerName,
+                                   llvm::StringRef BugType, const Decl *D,
+                                   const LangOptions &LangOpts);
+
+/// \brief Get the string representation of issue hash. See GetIssueHash() for
+/// more information.
+std::string GetIssueString(const SourceManager &SM, FullSourceLoc &IssueLoc,
+                           llvm::StringRef CheckerName, llvm::StringRef BugType,
+                           const Decl *D, const LangOptions &LangOpts);
+} // namespace clang
+
+#endif
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
index 63b8631665ac..b09dffaf4e57 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
@@ -164,7 +164,8 @@ protected:
 
   /// \brief Used to specify non-argument regions that will be invalidated as a
   /// result of this call.
-  virtual void getExtraInvalidatedValues(ValueList &Values) const {}
+  virtual void getExtraInvalidatedValues(ValueList &Values,
+                 RegionAndSymbolInvalidationTraits *ETraits) const {}
 
 public:
   virtual ~CallEvent() {}
@@ -253,9 +254,16 @@ public:
   /// which the return value has already been bound to the origin expression.
   SVal getReturnValue() const;
 
+  /// \brief Returns true if the type of any of the non-null arguments satisfies
+  /// the condition.
+  bool hasNonNullArgumentsWithType(bool (*Condition)(QualType)) const;
+
   /// \brief Returns true if any of the arguments appear to represent callbacks.
   bool hasNonZeroCallbackArg() const;
 
+  /// \brief Returns true if any of the arguments is void*.
+  bool hasVoidPointerToNonConstArg() const;
+
   /// \brief Returns true if any of the arguments are known to escape to long-
   /// term storage, even if this method will not modify them.
   // NOTE: The exact semantics of this are still being defined!
@@ -472,7 +480,8 @@ protected:
   BlockCall(const BlockCall &Other) : CallEvent(Other) {}
   void cloneTo(void *Dest) const override { new (Dest) BlockCall(*this); }
 
-  void getExtraInvalidatedValues(ValueList &Values) const override;
+  void getExtraInvalidatedValues(ValueList &Values,
+         RegionAndSymbolInvalidationTraits *ETraits) const override;
 
 public:
   virtual const CallExpr *getOriginExpr() const {
@@ -497,8 +506,55 @@ public:
     return BR->getDecl();
   }
 
+  bool isConversionFromLambda() const {
+    const BlockDecl *BD = getDecl();
+    if (!BD)
+      return false;
+
+    return BD->isConversionFromLambda();
+  }
+
+  /// \brief For a block converted from a C++ lambda, returns the block
+  /// VarRegion for the variable holding the captured C++ lambda record.
+  const VarRegion *getRegionStoringCapturedLambda() const {
+    assert(isConversionFromLambda());
+    const BlockDataRegion *BR = getBlockRegion();
+    assert(BR && "Block converted from lambda must have a block region");
+
+    auto I = BR->referenced_vars_begin();
+    assert(I != BR->referenced_vars_end());
+
+    return I.getCapturedRegion();
+  }
+
   RuntimeDefinition getRuntimeDefinition() const override {
-    return RuntimeDefinition(getDecl());
+    if (!isConversionFromLambda())
+      return RuntimeDefinition(getDecl());
+
+    // Clang converts lambdas to blocks with an implicit user-defined
+    // conversion operator method on the lambda record that looks (roughly)
+    // like:
+    //
+    // typedef R(^block_type)(P1, P2, ...);
+    // operator block_type() const {
+    //   auto Lambda = *this;
+    //   return ^(P1 p1, P2 p2, ...){
+    //     /* return Lambda(p1, p2, ...); */
+    //   };
+    // }
+    //
+    // Here R is the return type of the lambda and P1, P2, ... are
+    // its parameter types. 'Lambda' is a fake VarDecl captured by the block
+    // that is initialized to a copy of the lambda.
+    //
+    // Sema leaves the body of a lambda-converted block empty (it is
+    // produced by CodeGen), so we can't analyze it directly. Instead, we skip
+    // the block body and analyze the operator() method on the captured lambda.
+    const VarDecl *LambdaVD = getRegionStoringCapturedLambda()->getDecl();
+    const CXXRecordDecl *LambdaDecl = LambdaVD->getType()->getAsCXXRecordDecl();
+    CXXMethodDecl* LambdaCallOperator = LambdaDecl->getLambdaCallOperator();
+
+    return RuntimeDefinition(LambdaCallOperator);
   }
 
   bool argumentsMayEscape() const override {
@@ -521,7 +577,8 @@ public:
 /// it is written.
 class CXXInstanceCall : public AnyFunctionCall {
 protected:
-  void getExtraInvalidatedValues(ValueList &Values) const override;
+  void getExtraInvalidatedValues(ValueList &Values, 
+         RegionAndSymbolInvalidationTraits *ETraits) const override;
 
   CXXInstanceCall(const CallExpr *CE, ProgramStateRef St,
                   const LocationContext *LCtx)
@@ -704,7 +761,8 @@ protected:
   CXXConstructorCall(const CXXConstructorCall &Other) : AnyFunctionCall(Other){}
   void cloneTo(void *Dest) const override { new (Dest) CXXConstructorCall(*this); }
 
-  void getExtraInvalidatedValues(ValueList &Values) const override;
+  void getExtraInvalidatedValues(ValueList &Values,
+         RegionAndSymbolInvalidationTraits *ETraits) const override;
 
 public:
   virtual const CXXConstructExpr *getOriginExpr() const {
@@ -803,7 +861,8 @@ protected:
   ObjCMethodCall(const ObjCMethodCall &Other) : CallEvent(Other) {}
   void cloneTo(void *Dest) const override { new (Dest) ObjCMethodCall(*this); }
 
-  void getExtraInvalidatedValues(ValueList &Values) const override;
+  void getExtraInvalidatedValues(ValueList &Values,
+         RegionAndSymbolInvalidationTraits *ETraits) const override;
 
   /// Check if the selector may have multiple definitions (may have overrides).
   virtual bool canBeOverridenInSubclass(ObjCInterfaceDecl *IDecl,
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
index a4ff133b4b93..d4f014d29984 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h
@@ -224,13 +224,39 @@ public:
   }
 
   /// \brief Generate a sink node. Generating a sink stops exploration of the
-  /// given path.
-  ExplodedNode *generateSink(ProgramStateRef State = nullptr,
-                             ExplodedNode *Pred = nullptr,
+  /// given path. To create a sink node for the purpose of reporting an error,
+  /// checkers should use generateErrorNode() instead.
+  ExplodedNode *generateSink(ProgramStateRef State, ExplodedNode *Pred,
                              const ProgramPointTag *Tag = nullptr) {
     return addTransitionImpl(State ? State : getState(), true, Pred, Tag);
   }
 
+  /// \brief Generate a transition to a node that will be used to report
+  /// an error. This node will be a sink. That is, it will stop exploration of
+  /// the given path.
+  ///
+  /// @param State The state of the generated node.
+  /// @param Tag The tag to uniquely identify the creation site. If null,
+  ///        the default tag for the checker will be used.
+  ExplodedNode *generateErrorNode(ProgramStateRef State = nullptr,
+                                  const ProgramPointTag *Tag = nullptr) {
+    return generateSink(State, Pred,
+                       (Tag ? Tag : Location.getTag()));
+  }
+
+  /// \brief Generate a transition to a node that will be used to report
+  /// an error. This node will not be a sink. That is, exploration will
+  /// continue along this path.
+  ///
+  /// @param State The state of the generated node.
+  /// @param Tag The tag to uniquely identify the creation site. If null,
+  ///        the default tag for the checker will be used.
+  ExplodedNode *
+  generateNonFatalErrorNode(ProgramStateRef State = nullptr,
+                            const ProgramPointTag *Tag = nullptr) {
+    return addTransition(State, (Tag ? Tag : Location.getTag()));
+  }
+
   /// \brief Emit the diagnostics report.
   void emitReport(std::unique_ptr<BugReport> R) {
     Changed = true;
@@ -287,6 +313,18 @@ private:
                                  bool MarkAsSink,
                                  ExplodedNode *P = nullptr,
                                  const ProgramPointTag *Tag = nullptr) {
+    // The analyzer may stop exploring if it sees a state it has previously
+    // visited ("cache out"). The early return here is a defensive check to
+    // prevent accidental caching out by checker API clients. Unless there is a
+    // tag or the client checker has requested that the generated node be
+    // marked as a sink, we assume that a client requesting a transition to a
+    // state that is the same as the predecessor state has made a mistake. We
+    // return the predecessor rather than cache out.
+    //
+    // TODO: We could potentially change the return to an assertion to alert
+    // clients to their mistake, but several checkers (including
+    // DereferenceChecker, CallAndMessageChecker, and DynamicTypePropagation)
+    // rely upon the defensive behavior and would need to be updated.
     if (!State || (State == Pred->getState() && !Tag && !MarkAsSink))
       return Pred;
 
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
index e7ec1f497b2b..8dda6367c2e0 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h
@@ -15,9 +15,13 @@
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CHECKERHELPERS_H
 
 #include "clang/AST/Stmt.h"
+#include <tuple>
 
 namespace clang {
 
+class Expr;
+class VarDecl;
+
 namespace ento {
 
 bool containsMacro(const Stmt *S);
@@ -35,6 +39,9 @@ template <class T> bool containsStmt(const Stmt *S) {
   return false;
 }
 
+std::pair<const clang::VarDecl *, const clang::Expr *>
+parseAssignment(const Stmt *S);
+
 } // end GR namespace
 
 } // end clang namespace
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
index f8760964b754..9a858c2cfb01 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ConstraintManager.h
@@ -99,6 +99,35 @@ public:
     return ProgramStatePair(StTrue, StFalse);
   }
 
+  virtual ProgramStateRef assumeWithinInclusiveRange(ProgramStateRef State,
+                                                     NonLoc Value,
+                                                     const llvm::APSInt &From,
+                                                     const llvm::APSInt &To,
+                                                     bool InBound) = 0;
+
+  virtual ProgramStatePair assumeWithinInclusiveRangeDual(
+      ProgramStateRef State, NonLoc Value, const llvm::APSInt &From,
+      const llvm::APSInt &To) {
+    ProgramStateRef StInRange = assumeWithinInclusiveRange(State, Value, From,
+                                                           To, true);
+
+    // If StTrue is infeasible, asserting the falseness of Cond is unnecessary
+    // because the existing constraints already establish this.
+    if (!StInRange)
+      return ProgramStatePair((ProgramStateRef)nullptr, State);
+
+    ProgramStateRef StOutOfRange = assumeWithinInclusiveRange(State, Value,
+                                                              From, To, false);
+    if (!StOutOfRange) {
+      // We are careful to return the original state, /not/ StTrue,
+      // because we want to avoid having callers generate a new node
+      // in the ExplodedGraph.
+      return ProgramStatePair(State, (ProgramStateRef)nullptr);
+    }
+
+    return ProgramStatePair(StInRange, StOutOfRange);
+  }
+
   /// \brief If a symbol is perfectly constrained to a constant, attempt
   /// to return the concrete value.
   ///
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
new file mode 100644
index 000000000000..555191d99709
--- /dev/null
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h
@@ -0,0 +1,57 @@
+//== DynamicTypeMap.h - Dynamic type map ----------------------- -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides APIs for tracking dynamic type information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
+#include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "llvm/ADT/ImmutableMap.h"
+
+namespace clang {
+namespace ento {
+
+/// The GDM component containing the dynamic type info. This is a map from a
+/// symbol to its most likely type.
+struct DynamicTypeMap {};
+typedef llvm::ImmutableMap<const MemRegion *, DynamicTypeInfo>
+    DynamicTypeMapImpl;
+template <>
+struct ProgramStateTrait<DynamicTypeMap>
+    : public ProgramStatePartialTrait<DynamicTypeMapImpl> {
+  static void *GDMIndex() {
+    static int index = 0;
+    return &index;
+  }
+};
+
+/// \brief Get dynamic type information for a region.
+DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State,
+                                   const MemRegion *Reg);
+
+/// \brief Set dynamic type information of the region; return the new state.
+ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *Reg,
+                                   DynamicTypeInfo NewTy);
+
+/// \brief Set dynamic type information of the region; return the new state.
+inline ProgramStateRef setDynamicTypeInfo(ProgramStateRef State,
+                                          const MemRegion *Reg, QualType NewTy,
+                                          bool CanBeSubClassed = true) {
+  return setDynamicTypeInfo(State, Reg,
+                            DynamicTypeInfo(NewTy, CanBeSubClassed));
+}
+
+} // ento
+} // clang
+
+#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_DYNAMICTYPEMAP_H
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
index d8f1c34fefd1..99083c9d6135 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h
@@ -341,6 +341,10 @@ public:
   void VisitBlockExpr(const BlockExpr *BE, ExplodedNode *Pred, 
                       ExplodedNodeSet &Dst);
 
+  /// VisitLambdaExpr - Transfer function logic for LambdaExprs.
+  void VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred, 
+                       ExplodedNodeSet &Dst);
+
   /// VisitBinaryOperator - Transfer function logic for binary operators.
   void VisitBinaryOperator(const BinaryOperator* B, ExplodedNode *Pred, 
                            ExplodedNodeSet &Dst);
@@ -600,6 +604,28 @@ private:
                                                 const LocationContext *LC,
                                                 const Expr *E,
                                                 const Expr *ResultE = nullptr);
+
+  /// For a DeclStmt or CXXInitCtorInitializer, walk backward in the current CFG
+  /// block to find the constructor expression that directly constructed into
+  /// the storage for this statement. Returns null if the constructor for this
+  /// statement created a temporary object region rather than directly
+  /// constructing into an existing region.
+  const CXXConstructExpr *findDirectConstructorForCurrentCFGElement();
+
+  /// For a CXXConstructExpr, walk forward in the current CFG block to find the
+  /// CFGElement for the DeclStmt or CXXInitCtorInitializer for which is
+  /// directly constructed by this constructor. Returns None if the current
+  /// constructor expression did not directly construct into an existing
+  /// region.
+  Optional<CFGElement> findElementDirectlyInitializedByCurrentConstructor();
+
+  /// For a given constructor, look forward in the current CFG block to
+  /// determine the region into which an object will be constructed by \p CE.
+  /// Returns either a field or local variable region if the object will be
+  /// directly constructed in an existing region or a temporary object region
+  /// if not.
+  const MemRegion *getRegionForConstructedObject(const CXXConstructExpr *CE,
+                                                 ExplodedNode *Pred);
 };
 
 /// Traits for storing the call processing policy inside GDM.
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
index faa350004511..ce81c98c206b 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/FunctionSummary.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_FUNCTIONSUMMARY_H
 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_FUNCTIONSUMMARY_H
 
+#include "clang/AST/Decl.h"
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
@@ -22,7 +23,6 @@
 #include <deque>
 
 namespace clang {
-class Decl;
 
 namespace ento {
 typedef std::deque<Decl*> SetOfDecls;
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h b/include/clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h
new file mode 100644
index 000000000000..3168733e4258
--- /dev/null
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h
@@ -0,0 +1,36 @@
+//===--- LoopWidening.h - Widen loops ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This header contains the declarations of functions which are used to widen
+/// loops which do not otherwise exit. The widening is done by invalidating
+/// anything which might be modified by the body of the loop.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_LOOPWIDENING_H
+#define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_LOOPWIDENING_H
+
+#include "clang/Analysis/CFG.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+
+namespace clang {
+namespace ento {
+
+/// \brief Get the states that result from widening the loop.
+///
+/// Widen the loop by invalidating anything that might be modified
+/// by the loop body in any iteration.
+ProgramStateRef getWidenedLoopState(ProgramStateRef PrevState,
+                                    const LocationContext *LCtx,
+                                    unsigned BlockCount, const Stmt *LoopStmt);
+
+} // end namespace ento
+} // end namespace clang
+
+#endif
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
index 4f0712968bf6..bb835c4c565e 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/MemRegion.h
@@ -19,6 +19,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.h"
@@ -46,7 +47,7 @@ class RegionOffset {
   /// The base region.
   const MemRegion *R;
 
-  /// The bit offset within the base region. It shouldn't be negative.
+  /// The bit offset within the base region. Can be negative.
   int64_t Offset;
 
 public:
@@ -1333,7 +1334,12 @@ public:
     /// Tells that a region's contents is not changed.
     TK_PreserveContents = 0x1,
     /// Suppress pointer-escaping of a region.
-    TK_SuppressEscape = 0x2
+    TK_SuppressEscape = 0x2,
+    // Do not invalidate super region.
+    TK_DoNotInvalidateSuperRegion = 0x4,
+    /// When applied to a MemSpaceRegion, indicates the entire memory space
+    /// should be invalidated.
+    TK_EntireMemSpace = 0x8
 
     // Do not forget to extend StorageTypeForKinds if number of traits exceed 
     // the number of bits StorageTypeForKinds can store.
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index ac4e452c0250..c4a62ecbddd3 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -190,6 +190,27 @@ public:
                                DefinedOrUnknownSVal upperBound,
                                bool assumption,
                                QualType IndexType = QualType()) const;
+
+  /// Assumes that the value of \p Val is bounded with [\p From; \p To]
+  /// (if \p assumption is "true") or it is fully out of this range
+  /// (if \p assumption is "false").
+  ///
+  /// This returns a new state with the added constraint on \p cond.
+  /// If no new state is feasible, NULL is returned.
+  ProgramStateRef assumeWithinInclusiveRange(DefinedOrUnknownSVal Val,
+                                             const llvm::APSInt &From,
+                                             const llvm::APSInt &To,
+                                             bool assumption) const;
+
+  /// Assumes given range both "true" and "false" for \p Val, and returns both
+  /// corresponding states (respectively).
+  ///
+  /// This is more efficient than calling assume() twice. Note that one (but not
+  /// both) of the returned states may be NULL.
+  std::pair<ProgramStateRef, ProgramStateRef>
+  assumeWithinInclusiveRange(DefinedOrUnknownSVal Val, const llvm::APSInt &From,
+                             const llvm::APSInt &To) const;
+
   
   /// \brief Check if the given SVal is constrained to zero or is a zero
   ///        constant.
@@ -337,20 +358,6 @@ public:
   bool isTainted(SymbolRef Sym, TaintTagType Kind = TaintTagGeneric) const;
   bool isTainted(const MemRegion *Reg, TaintTagType Kind=TaintTagGeneric) const;
 
-  /// \brief Get dynamic type information for a region.
-  DynamicTypeInfo getDynamicTypeInfo(const MemRegion *Reg) const;
-
-  /// \brief Set dynamic type information of the region; return the new state.
-  ProgramStateRef setDynamicTypeInfo(const MemRegion *Reg,
-                                     DynamicTypeInfo NewTy) const;
-
-  /// \brief Set dynamic type information of the region; return the new state.
-  ProgramStateRef setDynamicTypeInfo(const MemRegion *Reg,
-                                     QualType NewTy,
-                                     bool CanBeSubClassed = true) const {
-    return setDynamicTypeInfo(Reg, DynamicTypeInfo(NewTy, CanBeSubClassed));
-  }
-
   //==---------------------------------------------------------------------==//
   // Accessing the Generic Data Map (GDM).
   //==---------------------------------------------------------------------==//
@@ -650,6 +657,33 @@ ProgramState::assume(DefinedOrUnknownSVal Cond) const {
       ->assumeDual(this, Cond.castAs<DefinedSVal>());
 }
 
+inline ProgramStateRef
+ProgramState::assumeWithinInclusiveRange(DefinedOrUnknownSVal Val,
+                                         const llvm::APSInt &From,
+                                         const llvm::APSInt &To,
+                                         bool Assumption) const {
+  if (Val.isUnknown())
+    return this;
+
+  assert(Val.getAs<NonLoc>() && "Only NonLocs are supported!");
+
+  return getStateManager().ConstraintMgr->assumeWithinInclusiveRange(
+        this, Val.castAs<NonLoc>(), From, To, Assumption);
+}
+
+inline std::pair<ProgramStateRef, ProgramStateRef>
+ProgramState::assumeWithinInclusiveRange(DefinedOrUnknownSVal Val,
+                                         const llvm::APSInt &From,
+                                         const llvm::APSInt &To) const {
+  if (Val.isUnknown())
+    return std::make_pair(this, this);
+
+  assert(Val.getAs<NonLoc>() && "Only NonLocs are supported!");
+
+  return getStateManager().ConstraintMgr
+      ->assumeWithinInclusiveRangeDual(this, Val.castAs<NonLoc>(), From, To);
+}
+
 inline ProgramStateRef ProgramState::bindLoc(SVal LV, SVal V) const {
   if (Optional<Loc> L = LV.getAs<Loc>())
     return bindLoc(*L, V);
diff --git a/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h b/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
index 1ca96a223147..9dbfab24b417 100644
--- a/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
+++ b/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h
@@ -639,6 +639,7 @@ public:
   }
   
   void markLive(const MemRegion *region);
+  void markElementIndicesLive(const MemRegion *region);
   
   /// \brief Set to the value of the symbolic store after
   /// StoreManager::removeDeadBindings has been called.
@@ -650,14 +651,20 @@ private:
 };
 
 class SymbolVisitor {
+protected:
+  ~SymbolVisitor() = default;
+
 public:
+  SymbolVisitor() = default;
+  SymbolVisitor(const SymbolVisitor &) = default;
+  SymbolVisitor(SymbolVisitor &&) {}
+
   /// \brief A visitor method invoked by ProgramStateManager::scanReachableSymbols.
   ///
   /// The method returns \c true if symbols should continue be scanned and \c
   /// false otherwise.
   virtual bool VisitSymbol(SymbolRef sym) = 0;
   virtual bool VisitMemRegion(const MemRegion *region) { return true; }
-  virtual ~SymbolVisitor();
 };
 
 } // end GR namespace
diff --git a/include/clang/Tooling/ArgumentsAdjusters.h b/include/clang/Tooling/ArgumentsAdjusters.h
index a92e02142005..1fd7be688761 100644
--- a/include/clang/Tooling/ArgumentsAdjusters.h
+++ b/include/clang/Tooling/ArgumentsAdjusters.h
@@ -17,6 +17,8 @@
 #ifndef LLVM_CLANG_TOOLING_ARGUMENTSADJUSTERS_H
 #define LLVM_CLANG_TOOLING_ARGUMENTSADJUSTERS_H
 
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
 #include <functional>
 #include <string>
 #include <vector>
@@ -31,8 +33,8 @@ typedef std::vector<std::string> CommandLineArguments;
 ///
 /// Command line argument adjuster is responsible for command line arguments
 /// modification before the arguments are used to run a frontend action.
-typedef std::function<CommandLineArguments(const CommandLineArguments &)>
-    ArgumentsAdjuster;
+typedef std::function<CommandLineArguments(
+    const CommandLineArguments &, StringRef Filename)> ArgumentsAdjuster;
 
 /// \brief Gets an argument adjuster that converts input command line arguments
 /// to the "syntax check only" variant.
diff --git a/include/clang/Tooling/CommonOptionsParser.h b/include/clang/Tooling/CommonOptionsParser.h
index c23dc9211dc8..1e8462c631c3 100644
--- a/include/clang/Tooling/CommonOptionsParser.h
+++ b/include/clang/Tooling/CommonOptionsParser.h
@@ -42,6 +42,7 @@ namespace tooling {
 /// \code
 /// #include "clang/Frontend/FrontendActions.h"
 /// #include "clang/Tooling/CommonOptionsParser.h"
+/// #include "clang/Tooling/Tooling.h"
 /// #include "llvm/Support/CommandLine.h"
 ///
 /// using namespace clang::tooling;
@@ -56,8 +57,8 @@ namespace tooling {
 /// int main(int argc, const char **argv) {
 ///   CommonOptionsParser OptionsParser(argc, argv, MyToolCategory);
 ///   ClangTool Tool(OptionsParser.getCompilations(),
-///                  OptionsParser.getSourcePathListi());
-///   return Tool.run(newFrontendActionFactory<clang::SyntaxOnlyAction>());
+///                  OptionsParser.getSourcePathList());
+///   return Tool.run(newFrontendActionFactory<SyntaxOnlyAction>().get());
 /// }
 /// \endcode
 class CommonOptionsParser {
@@ -72,6 +73,23 @@ public:
   /// This constructor exits program in case of error.
   CommonOptionsParser(int &argc, const char **argv,
                       llvm::cl::OptionCategory &Category,
+                      const char *Overview = nullptr)
+      : CommonOptionsParser(argc, argv, Category, llvm::cl::OneOrMore,
+                            Overview) {}
+
+  /// \brief Parses command-line, initializes a compilation database.
+  ///
+  /// This constructor can change argc and argv contents, e.g. consume
+  /// command-line options used for creating FixedCompilationDatabase.
+  ///
+  /// All options not belonging to \p Category become hidden.
+  ///
+  /// I also allows calls to set the required number of positional parameters.
+  ///
+  /// This constructor exits program in case of error.
+  CommonOptionsParser(int &argc, const char **argv,
+                      llvm::cl::OptionCategory &Category,
+                      llvm::cl::NumOccurrencesFlag OccurrencesFlag,
                       const char *Overview = nullptr);
 
   /// Returns a reference to the loaded compilations database.
diff --git a/include/clang/Tooling/CompilationDatabase.h b/include/clang/Tooling/CompilationDatabase.h
index e5b95af3aef4..08a0ffec9d63 100644
--- a/include/clang/Tooling/CompilationDatabase.h
+++ b/include/clang/Tooling/CompilationDatabase.h
@@ -42,12 +42,18 @@ namespace tooling {
 /// \brief Specifies the working directory and command of a compilation.
 struct CompileCommand {
   CompileCommand() {}
-  CompileCommand(Twine Directory, std::vector<std::string> CommandLine)
-      : Directory(Directory.str()), CommandLine(std::move(CommandLine)) {}
+  CompileCommand(Twine Directory, Twine Filename,
+                 std::vector<std::string> CommandLine)
+      : Directory(Directory.str()),
+        Filename(Filename.str()),
+        CommandLine(std::move(CommandLine)) {}
 
   /// \brief The working directory the command was executed from.
   std::string Directory;
 
+  /// The source file associated with the command.
+  std::string Filename;
+
   /// \brief The command line that was executed.
   std::vector<std::string> CommandLine;
 
diff --git a/include/clang/Tooling/Core/Lookup.h b/include/clang/Tooling/Core/Lookup.h
new file mode 100644
index 000000000000..bc2b4db383cf
--- /dev/null
+++ b/include/clang/Tooling/Core/Lookup.h
@@ -0,0 +1,48 @@
+//===--- Lookup.h - Framework for clang refactoring tools --*- C++ -*------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines helper methods for clang tools performing name lookup.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_CORE_LOOKUP_H
+#define LLVM_CLANG_TOOLING_CORE_LOOKUP_H
+
+#include "clang/Basic/LLVM.h"
+#include <string>
+
+namespace clang {
+
+class DeclContext;
+class NamedDecl;
+class NestedNameSpecifier;
+
+namespace tooling {
+
+/// Emulate a lookup to replace one nested name specifier with another using as
+/// few additional namespace qualifications as possible.
+///
+/// This does not perform a full C++ lookup so ADL will not work.
+///
+/// \param Use The nested name to be replaced.
+/// \param UseContext The context in which the nested name is contained. This
+///                   will be used to minimize namespace qualifications.
+/// \param FromDecl The declaration to which the nested name points.
+/// \param ReplacementString The replacement nested name. Must be fully
+///                          qualified including a leading "::".
+/// \returns The new name to be inserted in place of the current nested name.
+std::string replaceNestedName(const NestedNameSpecifier *Use,
+                              const DeclContext *UseContext,
+                              const NamedDecl *FromDecl,
+                              StringRef ReplacementString);
+
+} // end namespace tooling
+} // end namespace clang
+
+#endif // LLVM_CLANG_TOOLING_CORE_LOOKUP_H
diff --git a/include/clang/Tooling/Core/Replacement.h b/include/clang/Tooling/Core/Replacement.h
index f189e1250121..37389ac91566 100644
--- a/include/clang/Tooling/Core/Replacement.h
+++ b/include/clang/Tooling/Core/Replacement.h
@@ -220,6 +220,12 @@ bool applyAllReplacements(const std::vector<Replacement> &Replaces,
 /// replacements cannot be applied, this returns an empty \c string.
 std::string applyAllReplacements(StringRef Code, const Replacements &Replaces);
 
+/// \brief Merges two sets of replacements with the second set referring to the
+/// code after applying the first set. Within both 'First' and 'Second',
+/// replacements must not overlap.
+Replacements mergeReplacements(const Replacements &First,
+                               const Replacements &Second);
+
 template <typename Node>
 Replacement::Replacement(const SourceManager &Sources,
                          const Node &NodeToReplace, StringRef ReplacementText,
diff --git a/include/clang/Tooling/JSONCompilationDatabase.h b/include/clang/Tooling/JSONCompilationDatabase.h
index b4edc31652f7..2a13fc155cea 100644
--- a/include/clang/Tooling/JSONCompilationDatabase.h
+++ b/include/clang/Tooling/JSONCompilationDatabase.h
@@ -33,18 +33,26 @@ namespace tooling {
 /// \brief A JSON based compilation database.
 ///
 /// JSON compilation database files must contain a list of JSON objects which
-/// provide the command lines in the attributes 'directory', 'command' and
-/// 'file':
+/// provide the command lines in the attributes 'directory', 'command',
+/// 'arguments' and 'file':
 /// [
 ///   { "directory": "<working directory of the compile>",
 ///     "command": "<compile command line>",
 ///     "file": "<path to source file>"
 ///   },
+///   { "directory": "<working directory of the compile>",
+///     "arguments": ["<raw>", "<command>" "<line>" "<parameters>"],
+///     "file": "<path to source file>"
+///   },
 ///   ...
 /// ]
 /// Each object entry defines one compile action. The specified file is
 /// considered to be the main source file for the translation unit.
 ///
+/// 'command' is a full command line that will be unescaped.
+///
+/// 'arguments' is a list of command line arguments that will not be unescaped.
+///
 /// JSON compilation databases can for example be generated in CMake projects
 /// by setting the flag -DCMAKE_EXPORT_COMPILE_COMMANDS.
 class JSONCompilationDatabase : public CompilationDatabase {
@@ -91,17 +99,26 @@ private:
   /// failed.
   bool parse(std::string &ErrorMessage);
 
-  // Tuple (directory, commandline) where 'commandline' pointing to the
-  // corresponding nodes in the YAML stream.
-  typedef std::pair<llvm::yaml::ScalarNode*,
-                    llvm::yaml::ScalarNode*> CompileCommandRef;
+  // Tuple (directory, filename, commandline) where 'commandline' points to the
+  // corresponding scalar nodes in the YAML stream.
+  // If the command line contains a single argument, it is a shell-escaped
+  // command line.
+  // Otherwise, each entry in the command line vector is a literal
+  // argument to the compiler.
+  typedef std::tuple<llvm::yaml::ScalarNode *,
+                     llvm::yaml::ScalarNode *,
+                    std::vector<llvm::yaml::ScalarNode *>> CompileCommandRef;
 
   /// \brief Converts the given array of CompileCommandRefs to CompileCommands.
   void getCommands(ArrayRef<CompileCommandRef> CommandsRef,
                    std::vector<CompileCommand> &Commands) const;
 
   // Maps file paths to the compile command lines for that file.
-  llvm::StringMap< std::vector<CompileCommandRef> > IndexByFile;
+  llvm::StringMap<std::vector<CompileCommandRef>> IndexByFile;
+
+  /// All the compile commands in the order that they were provided in the
+  /// JSON stream.
+  std::vector<CompileCommandRef> AllCommands;
 
   FileMatchTrie MatchTrie;
 
diff --git a/include/clang/Tooling/Tooling.h b/include/clang/Tooling/Tooling.h
index 92e9065c6a8a..b7a9b25acd0e 100644
--- a/include/clang/Tooling/Tooling.h
+++ b/include/clang/Tooling/Tooling.h
@@ -243,6 +243,7 @@ public:
   ///
   /// \param FilePath The path at which the content will be mapped.
   /// \param Content A null terminated buffer of the file's content.
+  // FIXME: remove this when all users have migrated!
   void mapVirtualFile(StringRef FilePath, StringRef Content);
 
   /// \brief Run the clang invocation.
@@ -331,9 +332,12 @@ class ClangTool {
   std::vector<std::string> SourcePaths;
   std::shared_ptr<PCHContainerOperations> PCHContainerOps;
 
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem;
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem;
   llvm::IntrusiveRefCntPtr<FileManager> Files;
   // Contains a list of pairs (<file name>, <file content>).
   std::vector< std::pair<StringRef, StringRef> > MappedFileContents;
+  llvm::StringSet<> SeenWorkingDirectories;
 
   ArgumentsAdjuster ArgsAdjuster;
 
@@ -417,6 +421,29 @@ inline std::unique_ptr<FrontendActionFactory> newFrontendActionFactory(
 /// \param File Either an absolute or relative path.
 std::string getAbsolutePath(StringRef File);
 
+/// \brief Changes CommandLine to contain implicit flags that would have been
+/// defined had the compiler driver been invoked through the path InvokedAs.
+///
+/// For example, when called with \c InvokedAs set to `i686-linux-android-g++`,
+/// the arguments '-target', 'i686-linux-android`, `--driver-mode=g++` will
+/// be inserted after the first argument in \c CommandLine.
+///
+/// This function will not add new `-target` or `--driver-mode` flags if they
+/// are already present in `CommandLine` (even if they have different settings
+/// than would have been inserted).
+///
+/// \pre `llvm::InitializeAllTargets()` has been called.
+///
+/// \param CommandLine the command line used to invoke the compiler driver or
+/// Clang tool, including the path to the executable as \c CommandLine[0].
+/// \param InvokedAs the path to the driver used to infer implicit flags.
+///
+/// \note This will not set \c CommandLine[0] to \c InvokedAs. The tooling
+/// infrastructure expects that CommandLine[0] is a tool path relative to which
+/// the builtin headers can be found.
+void addTargetAndModeForProgramName(std::vector<std::string> &CommandLine,
+                                    StringRef InvokedAs);
+
 /// \brief Creates a \c CompilerInvocation.
 clang::CompilerInvocation *newInvocation(
     clang::DiagnosticsEngine *Diagnostics,
diff --git a/include/clang/module.modulemap b/include/clang/module.modulemap
index 6b77adb0027d..28b6d1652c5e 100644
--- a/include/clang/module.modulemap
+++ b/include/clang/module.modulemap
@@ -25,6 +25,7 @@ module Clang_Basic {
   umbrella "Basic"
 
   textual header "Basic/BuiltinsAArch64.def"
+  textual header "Basic/BuiltinsAMDGPU.def"
   textual header "Basic/BuiltinsARM.def"
   textual header "Basic/Builtins.def"
   textual header "Basic/BuiltinsHexagon.def"
@@ -33,8 +34,8 @@ module Clang_Basic {
   textual header "Basic/BuiltinsNEON.def"
   textual header "Basic/BuiltinsNVPTX.def"
   textual header "Basic/BuiltinsPPC.def"
-  textual header "Basic/BuiltinsR600.def"
   textual header "Basic/BuiltinsSystemZ.def"
+  textual header "Basic/BuiltinsWebAssembly.def"
   textual header "Basic/BuiltinsX86.def"
   textual header "Basic/BuiltinsXCore.def"
   textual header "Basic/DiagnosticOptions.def"
@@ -89,6 +90,9 @@ module Clang_Frontend {
   textual header "Frontend/LangStandards.def"
 
   module * { export * }
+
+  // FIXME: This violates layers.
+  exclude header "Frontend/PCHContainerOperations.h"
 }
 
 module Clang_FrontendTool { requires cplusplus umbrella "FrontendTool" module * { export * } }
diff --git a/lib/ARCMigrate/ARCMT.cpp b/lib/ARCMigrate/ARCMT.cpp
index e32884218597..8c04c8371cef 100644
--- a/lib/ARCMigrate/ARCMT.cpp
+++ b/lib/ARCMigrate/ARCMT.cpp
@@ -153,6 +153,9 @@ static bool HasARCRuntime(CompilerInvocation &origCI) {
   if (triple.isiOS())
     return triple.getOSMajorVersion() >= 5;
 
+  if (triple.isWatchOS())
+    return true;
+
   if (triple.getOS() == llvm::Triple::Darwin)
     return triple.getOSMajorVersion() >= 11;
 
@@ -206,7 +209,8 @@ createInvocationForMigration(CompilerInvocation &origCI,
   WarnOpts.push_back("error=arc-unsafe-retained-assign");
   CInvok->getDiagnosticOpts().Warnings = std::move(WarnOpts);
 
-  CInvok->getLangOpts()->ObjCARCWeak = HasARCRuntime(origCI);
+  CInvok->getLangOpts()->ObjCWeakRuntime = HasARCRuntime(origCI);
+  CInvok->getLangOpts()->ObjCWeak = CInvok->getLangOpts()->ObjCWeakRuntime;
 
   return CInvok.release();
 }
@@ -600,7 +604,6 @@ bool MigrationProcess::applyTransform(TransformFn trans,
     SmallString<512> newText;
     llvm::raw_svector_ostream vecOS(newText);
     buf.write(vecOS);
-    vecOS.flush();
     std::unique_ptr<llvm::MemoryBuffer> memBuf(
         llvm::MemoryBuffer::getMemBufferCopy(
             StringRef(newText.data(), newText.size()), newFname));
diff --git a/lib/ARCMigrate/FileRemapper.cpp b/lib/ARCMigrate/FileRemapper.cpp
index 72a55da5d50b..2cf20699aeef 100644
--- a/lib/ARCMigrate/FileRemapper.cpp
+++ b/lib/ARCMigrate/FileRemapper.cpp
@@ -144,7 +144,7 @@ bool FileRemapper::flushToFile(StringRef outputPath, DiagnosticsEngine &Diag) {
       SmallString<64> tempPath;
       int fd;
       if (fs::createTemporaryFile(path::filename(origFE->getName()),
-                                  path::extension(origFE->getName()), fd,
+                                  path::extension(origFE->getName()).drop_front(), fd,
                                   tempPath))
         return report("Could not create file: " + tempPath.str(), Diag);
 
diff --git a/lib/ARCMigrate/ObjCMT.cpp b/lib/ARCMigrate/ObjCMT.cpp
index b61a421ce415..50b113660d3a 100644
--- a/lib/ARCMigrate/ObjCMT.cpp
+++ b/lib/ARCMigrate/ObjCMT.cpp
@@ -214,25 +214,15 @@ namespace {
   // FIXME. This duplicates one in RewriteObjCFoundationAPI.cpp
   bool subscriptOperatorNeedsParens(const Expr *FullExpr) {
     const Expr* Expr = FullExpr->IgnoreImpCasts();
-    if (isa<ArraySubscriptExpr>(Expr) ||
-        isa<CallExpr>(Expr) ||
-        isa<DeclRefExpr>(Expr) ||
-        isa<CXXNamedCastExpr>(Expr) ||
-        isa<CXXConstructExpr>(Expr) ||
-        isa<CXXThisExpr>(Expr) ||
-        isa<CXXTypeidExpr>(Expr) ||
-        isa<CXXUnresolvedConstructExpr>(Expr) ||
-        isa<ObjCMessageExpr>(Expr) ||
-        isa<ObjCPropertyRefExpr>(Expr) ||
-        isa<ObjCProtocolExpr>(Expr) ||
-        isa<MemberExpr>(Expr) ||
-        isa<ObjCIvarRefExpr>(Expr) ||
-        isa<ParenExpr>(FullExpr) ||
-        isa<ParenListExpr>(Expr) ||
-        isa<SizeOfPackExpr>(Expr))
-      return false;
-    
-    return true;
+    return !(isa<ArraySubscriptExpr>(Expr) || isa<CallExpr>(Expr) ||
+             isa<DeclRefExpr>(Expr) || isa<CXXNamedCastExpr>(Expr) ||
+             isa<CXXConstructExpr>(Expr) || isa<CXXThisExpr>(Expr) ||
+             isa<CXXTypeidExpr>(Expr) ||
+             isa<CXXUnresolvedConstructExpr>(Expr) ||
+             isa<ObjCMessageExpr>(Expr) || isa<ObjCPropertyRefExpr>(Expr) ||
+             isa<ObjCProtocolExpr>(Expr) || isa<MemberExpr>(Expr) ||
+             isa<ObjCIvarRefExpr>(Expr) || isa<ParenExpr>(FullExpr) ||
+             isa<ParenListExpr>(Expr) || isa<SizeOfPackExpr>(Expr));
   }
   
   /// \brief - Rewrite message expression for Objective-C setter and getters into
@@ -665,9 +655,7 @@ ClassImplementsAllMethodsAndProperties(ASTContext &Ctx,
         return false;
     }
   }
-  if (HasAtleastOneRequiredProperty || HasAtleastOneRequiredMethod)
-    return true;
-  return false;
+  return HasAtleastOneRequiredProperty || HasAtleastOneRequiredMethod;
 }
 
 static bool rewriteToObjCInterfaceDecl(const ObjCInterfaceDecl *IDecl,
@@ -736,7 +724,7 @@ static bool rewriteToNSEnumDecl(const EnumDecl *EnumDcl,
   SourceLocation EndOfEnumDclLoc = EnumDcl->getLocEnd();
   EndOfEnumDclLoc = trans::findSemiAfterLocation(EndOfEnumDclLoc,
                                                  NS.getASTContext(), /*IsDecl*/true);
-  if (!EndOfEnumDclLoc.isInvalid()) {
+  if (EndOfEnumDclLoc.isValid()) {
     SourceRange EnumDclRange(EnumDcl->getLocStart(), EndOfEnumDclLoc);
     commit.insertFromRange(TypedefDcl->getLocStart(), EnumDclRange);
   }
@@ -746,7 +734,7 @@ static bool rewriteToNSEnumDecl(const EnumDecl *EnumDcl,
   SourceLocation EndTypedefDclLoc = TypedefDcl->getLocEnd();
   EndTypedefDclLoc = trans::findSemiAfterLocation(EndTypedefDclLoc,
                                                  NS.getASTContext(), /*IsDecl*/true);
-  if (!EndTypedefDclLoc.isInvalid()) {
+  if (EndTypedefDclLoc.isValid()) {
     SourceRange TDRange(TypedefDcl->getLocStart(), EndTypedefDclLoc);
     commit.remove(TDRange);
   }
@@ -755,7 +743,7 @@ static bool rewriteToNSEnumDecl(const EnumDecl *EnumDcl,
 
   EndOfEnumDclLoc = trans::findLocationAfterSemi(EnumDcl->getLocEnd(), NS.getASTContext(),
                                                  /*IsDecl*/true);
-  if (!EndOfEnumDclLoc.isInvalid()) {
+  if (EndOfEnumDclLoc.isValid()) {
     SourceLocation BeginOfEnumDclLoc = EnumDcl->getLocStart();
     // FIXME. This assumes that enum decl; is immediately preceded by eoln.
     // It is trying to remove the enum decl. lines entirely.
@@ -1536,7 +1524,7 @@ ObjCMigrateASTConsumer::CF_BRIDGING_KIND
                                 FuncDecl->hasAttr<NSReturnsNotRetainedAttr>() ||
                                 FuncDecl->hasAttr<NSReturnsAutoreleasedAttr>());
   
-  // Trivial case of when funciton is annotated and has no argument.
+  // Trivial case of when function is annotated and has no argument.
   if (FuncIsReturnAnnotated && FuncDecl->getNumParams() == 0)
     return CF_BRIDGING_NONE;
   
@@ -1665,7 +1653,7 @@ void ObjCMigrateASTConsumer::migrateAddMethodAnnotation(
     Editor->commit(commit);
   }
   
-  // Trivial case of when funciton is annotated and has no argument.
+  // Trivial case of when function is annotated and has no argument.
   if (MethodIsReturnAnnotated &&
       (MethodDecl->param_begin() == MethodDecl->param_end()))
     return;
@@ -1805,7 +1793,7 @@ private:
       FileID FID;
       unsigned Offset;
       std::tie(FID, Offset) = SourceMgr.getDecomposedLoc(Loc);
-      assert(!FID.isInvalid());
+      assert(FID.isValid());
       SmallString<200> Path =
           StringRef(SourceMgr.getFileEntryForID(FID)->getName());
       llvm::sys::fs::make_absolute(Path);
@@ -1862,8 +1850,8 @@ void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) {
     for (DeclContext::decl_iterator D = TU->decls_begin(), DEnd = TU->decls_end();
          D != DEnd; ++D) {
       FileID FID = PP.getSourceManager().getFileID((*D)->getLocation());
-      if (!FID.isInvalid())
-        if (!FileId.isInvalid() && FileId != FID) {
+      if (FID.isValid())
+        if (FileId.isValid() && FileId != FID) {
           if (ASTMigrateActions & FrontendOptions::ObjCMT_Annotation)
             AnnotateImplicitBridging(Ctx);
         }
@@ -1982,7 +1970,6 @@ void ObjCMigrateASTConsumer::HandleTranslationUnit(ASTContext &Ctx) {
     SmallString<512> newText;
     llvm::raw_svector_ostream vecOS(newText);
     buf.write(vecOS);
-    vecOS.flush();
     std::unique_ptr<llvm::MemoryBuffer> memBuf(
         llvm::MemoryBuffer::getMemBufferCopy(
             StringRef(newText.data(), newText.size()), file->getName()));
@@ -2215,12 +2202,11 @@ static std::string applyEditsToTemp(const FileEntry *FE,
   SmallString<512> NewText;
   llvm::raw_svector_ostream OS(NewText);
   Buf->write(OS);
-  OS.flush();
 
   SmallString<64> TempPath;
   int FD;
   if (fs::createTemporaryFile(path::filename(FE->getName()),
-                              path::extension(FE->getName()), FD,
+                              path::extension(FE->getName()).drop_front(), FD,
                               TempPath)) {
     reportDiag("Could not create file: " + TempPath.str(), Diag);
     return std::string();
diff --git a/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp b/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp
index 9689f40760cd..d45d5d60b78a 100644
--- a/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp
+++ b/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp
@@ -104,9 +104,7 @@ public:
       return false;
     if (!S->getThen() || !Visit(S->getThen()))
       return false;
-    if (S->getElse() && !Visit(S->getElse()))
-      return false;
-    return true;
+    return !S->getElse() || Visit(S->getElse());
   }
   bool VisitWhileStmt(WhileStmt *S) {
     if (S->getConditionVariable())
diff --git a/lib/ARCMigrate/TransGCAttrs.cpp b/lib/ARCMigrate/TransGCAttrs.cpp
index 10fce19b6f19..2ae6b78a4634 100644
--- a/lib/ARCMigrate/TransGCAttrs.cpp
+++ b/lib/ARCMigrate/TransGCAttrs.cpp
@@ -152,9 +152,7 @@ public:
         return ID->getImplementation() != nullptr;
       if (ObjCCategoryDecl *CD = dyn_cast<ObjCCategoryDecl>(ContD))
         return CD->getImplementation() != nullptr;
-      if (isa<ObjCImplDecl>(ContD))
-        return true;
-      return false;
+      return isa<ObjCImplDecl>(ContD);
     }
     return false;
   }
diff --git a/lib/ARCMigrate/TransProperties.cpp b/lib/ARCMigrate/TransProperties.cpp
index ab128844b45f..8667bc2a37da 100644
--- a/lib/ARCMigrate/TransProperties.cpp
+++ b/lib/ARCMigrate/TransProperties.cpp
@@ -96,6 +96,10 @@ public:
 
     collectProperties(iface, AtProps);
 
+    // Look through extensions.
+    for (auto *Ext : iface->visible_extensions())
+      collectProperties(Ext, AtProps);
+
     typedef DeclContext::specific_decl_iterator<ObjCPropertyImplDecl>
         prop_impl_iterator;
     for (prop_impl_iterator
@@ -137,19 +141,6 @@ public:
       Transaction Trans(Pass.TA);
       rewriteProperty(props, atLoc);
     }
-
-    AtPropDeclsTy AtExtProps;
-    // Look through extensions.
-    for (auto *Ext : iface->visible_extensions())
-      collectProperties(Ext, AtExtProps, &AtProps);
-
-    for (AtPropDeclsTy::iterator
-           I = AtExtProps.begin(), E = AtExtProps.end(); I != E; ++I) {
-      SourceLocation atLoc = SourceLocation::getFromRawEncoding(I->first);
-      PropsTy &props = I->second;
-      Transaction Trans(Pass.TA);
-      doActionForExtensionProp(props, atLoc);
-    }
   }
 
 private:
@@ -177,15 +168,6 @@ private:
     }
   }
 
-  void doActionForExtensionProp(PropsTy &props, SourceLocation atLoc) {
-    llvm::DenseMap<IdentifierInfo *, PropActionKind>::iterator I;
-    I = ActionOnProp.find(props[0].PropD->getIdentifier());
-    if (I == ActionOnProp.end())
-      return;
-
-    doPropAction(I->second, props, atLoc, false);
-  }
-
   void rewriteProperty(PropsTy &props, SourceLocation atLoc) {
     ObjCPropertyDecl::PropertyAttributeKind propAttrs = getPropertyAttrs(props);
     
diff --git a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
index 7db1a1c378cc..f81133f3aad3 100644
--- a/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
+++ b/lib/ARCMigrate/TransRetainReleaseDealloc.cpp
@@ -150,11 +150,8 @@ public:
       return true;
     }
 
-    if (!hasSideEffects(rec, Pass.Ctx)) {
-      if (tryRemoving(RecContainer))
-        return true;
-    }
-    Pass.TA.replace(RecContainer->getSourceRange(), RecRange);
+    if (hasSideEffects(rec, Pass.Ctx) || !tryRemoving(RecContainer))
+      Pass.TA.replace(RecContainer->getSourceRange(), RecRange);
 
     return true;
   }
@@ -174,11 +171,8 @@ private:
   ///   return var;
   ///
   bool isCommonUnusedAutorelease(ObjCMessageExpr *E) {
-    if (isPlusOneAssignBeforeOrAfterAutorelease(E))
-      return true;
-    if (isReturnedAfterAutorelease(E))
-      return true;
-    return false;
+    return isPlusOneAssignBeforeOrAfterAutorelease(E) ||
+           isReturnedAfterAutorelease(E);
   }
 
   bool isReturnedAfterAutorelease(ObjCMessageExpr *E) {
@@ -225,11 +219,7 @@ private:
     // Check for "RefD = [+1 retained object];".
 
     if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(S)) {
-      if (RefD != getReferencedDecl(Bop->getLHS()))
-        return false;
-      if (isPlusOneAssign(Bop))
-        return true;
-      return false;
+      return (RefD == getReferencedDecl(Bop->getLHS())) && isPlusOneAssign(Bop);
     }
 
     if (DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
@@ -359,16 +349,16 @@ private:
       return;
 
     Stmt::child_range StmtExprChild = StmtE->children();
-    if (!StmtExprChild)
+    if (StmtExprChild.begin() == StmtExprChild.end())
       return;
-    CompoundStmt *CompS = dyn_cast_or_null<CompoundStmt>(*StmtExprChild);
+    auto *CompS = dyn_cast_or_null<CompoundStmt>(*StmtExprChild.begin());
     if (!CompS)
       return;
 
     Stmt::child_range CompStmtChild = CompS->children();
-    if (!CompStmtChild)
+    if (CompStmtChild.begin() == CompStmtChild.end())
       return;
-    DeclStmt *DeclS = dyn_cast_or_null<DeclStmt>(*CompStmtChild);
+    auto *DeclS = dyn_cast_or_null<DeclStmt>(*CompStmtChild.begin());
     if (!DeclS)
       return;
     if (!DeclS->isSingleDecl())
diff --git a/lib/ARCMigrate/TransformActions.cpp b/lib/ARCMigrate/TransformActions.cpp
index 9fb2f1d3eea8..c628b54ed414 100644
--- a/lib/ARCMigrate/TransformActions.cpp
+++ b/lib/ARCMigrate/TransformActions.cpp
@@ -505,11 +505,10 @@ void TransformActionsImpl::commitClearDiagnostic(ArrayRef<unsigned> IDs,
 void TransformActionsImpl::addInsertion(SourceLocation loc, StringRef text) {
   SourceManager &SM = Ctx.getSourceManager();
   loc = SM.getExpansionLoc(loc);
-  for (std::list<CharRange>::reverse_iterator
-         I = Removals.rbegin(), E = Removals.rend(); I != E; ++I) {
-    if (!SM.isBeforeInTranslationUnit(loc, I->End))
+  for (const CharRange &I : llvm::reverse(Removals)) {
+    if (!SM.isBeforeInTranslationUnit(loc, I.End))
       break;
-    if (I->Begin.isBeforeInTranslationUnitThan(loc))
+    if (I.Begin.isBeforeInTranslationUnitThan(loc))
       return;
   }
 
diff --git a/lib/ARCMigrate/Transforms.cpp b/lib/ARCMigrate/Transforms.cpp
index 56d3af7233bf..3fd36ff310f3 100644
--- a/lib/ARCMigrate/Transforms.cpp
+++ b/lib/ARCMigrate/Transforms.cpp
@@ -42,7 +42,7 @@ bool MigrationPass::CFBridgingFunctionsDefined() {
 
 bool trans::canApplyWeak(ASTContext &Ctx, QualType type,
                          bool AllowOnUnknownClass) {
-  if (!Ctx.getLangOpts().ObjCARCWeak)
+  if (!Ctx.getLangOpts().ObjCWeakRuntime)
     return false;
 
   QualType T = type;
@@ -50,7 +50,8 @@ bool trans::canApplyWeak(ASTContext &Ctx, QualType type,
     return false;
 
   // iOS is always safe to use 'weak'.
-  if (Ctx.getTargetInfo().getTriple().isiOS())
+  if (Ctx.getTargetInfo().getTriple().isiOS() ||
+      Ctx.getTargetInfo().getTriple().isWatchOS())
     AllowOnUnknownClass = true;
 
   while (const PointerType *ptr = T->getAs<PointerType>())
@@ -112,10 +113,7 @@ bool trans::isPlusOne(const Expr *E) {
   while (implCE && implCE->getCastKind() ==  CK_BitCast)
     implCE = dyn_cast<ImplicitCastExpr>(implCE->getSubExpr());
 
-  if (implCE && implCE->getCastKind() == CK_ARCConsumeObject)
-    return true;
-
-  return false;
+  return implCE && implCE->getCastKind() == CK_ARCConsumeObject;
 }
 
 /// \brief 'Loc' is the end of a statement range. This returns the location
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index fb9630180dca..108677abb8a4 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/Comment.h"
 #include "clang/AST/CommentCommandTraits.h"
 #include "clang/AST/DeclCXX.h"
+#include "clang/AST/DeclContextInternals.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
@@ -327,7 +328,7 @@ const Decl *adjustDeclToTemplate(const Decl *D) {
   // FIXME: Adjust alias templates?
   return D;
 }
-} // unnamed namespace
+} // anonymous namespace
 
 const RawComment *ASTContext::getRawCommentForAnyRedecl(
                                                 const Decl *D,
@@ -366,8 +367,10 @@ const RawComment *ASTContext::getRawCommentForAnyRedecl(
       OriginalDeclForRC = I;
       RawCommentAndCacheFlags Raw;
       if (RC) {
-        Raw.setRaw(RC);
+        // Call order swapped to work around ICE in VS2015 RTM (Release Win32)
+        // https://connect.microsoft.com/VisualStudio/feedback/details/1741530
         Raw.setKind(RawCommentAndCacheFlags::FromDecl);
+        Raw.setRaw(RC);
       } else
         Raw.setKind(RawCommentAndCacheFlags::NoCommentInDecl);
       Raw.setOriginalDecl(I);
@@ -428,7 +431,6 @@ comments::FullComment *ASTContext::cloneFullComment(comments::FullComment *FC,
     new (*this) comments::FullComment(FC->getBlocks(),
                                       ThisDeclInfo);
   return CFC;
-  
 }
 
 comments::FullComment *ASTContext::getLocalCommentForDeclUncached(const Decl *D) const {
@@ -659,8 +661,7 @@ ASTContext::getCanonicalTemplateTemplateParmDecl(
                                        nullptr,
                          TemplateParameterList::Create(*this, SourceLocation(),
                                                        SourceLocation(),
-                                                       CanonParams.data(),
-                                                       CanonParams.size(),
+                                                       CanonParams,
                                                        SourceLocation()));
 
   // Get the new insert position for the node we care about.
@@ -681,9 +682,11 @@ CXXABI *ASTContext::createCXXABI(const TargetInfo &T) {
   case TargetCXXABI::GenericARM: // Same as Itanium at this level
   case TargetCXXABI::iOS:
   case TargetCXXABI::iOS64:
+  case TargetCXXABI::WatchOS:
   case TargetCXXABI::GenericAArch64:
   case TargetCXXABI::GenericMIPS:
   case TargetCXXABI::GenericItanium:
+  case TargetCXXABI::WebAssembly:
     return CreateItaniumCXXABI(*this);
   case TargetCXXABI::Microsoft:
     return CreateMicrosoftCXXABI(*this);
@@ -732,19 +735,20 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM,
       SubstTemplateTemplateParmPacks(this_()),
       GlobalNestedNameSpecifier(nullptr), Int128Decl(nullptr),
       UInt128Decl(nullptr), Float128StubDecl(nullptr),
-      BuiltinVaListDecl(nullptr), ObjCIdDecl(nullptr), ObjCSelDecl(nullptr),
-      ObjCClassDecl(nullptr), ObjCProtocolClassDecl(nullptr), BOOLDecl(nullptr),
+      BuiltinVaListDecl(nullptr), BuiltinMSVaListDecl(nullptr),
+      ObjCIdDecl(nullptr), ObjCSelDecl(nullptr), ObjCClassDecl(nullptr),
+      ObjCProtocolClassDecl(nullptr), BOOLDecl(nullptr),
       CFConstantStringTypeDecl(nullptr), ObjCInstanceTypeDecl(nullptr),
       FILEDecl(nullptr), jmp_bufDecl(nullptr), sigjmp_bufDecl(nullptr),
       ucontext_tDecl(nullptr), BlockDescriptorType(nullptr),
       BlockDescriptorExtendedType(nullptr), cudaConfigureCallDecl(nullptr),
       FirstLocalImport(), LastLocalImport(), ExternCContext(nullptr),
-      SourceMgr(SM), LangOpts(LOpts),
+      MakeIntegerSeqDecl(nullptr), SourceMgr(SM), LangOpts(LOpts),
       SanitizerBL(new SanitizerBlacklist(LangOpts.SanitizerBlacklistFiles, SM)),
-      AddrSpaceMap(nullptr), Target(nullptr), PrintingPolicy(LOpts),
-      Idents(idents), Selectors(sels), BuiltinInfo(builtins),
-      DeclarationNames(*this), ExternalSource(nullptr), Listener(nullptr),
-      Comments(SM), CommentsLoaded(false),
+      AddrSpaceMap(nullptr), Target(nullptr), AuxTarget(nullptr),
+      PrintingPolicy(LOpts), Idents(idents), Selectors(sels),
+      BuiltinInfo(builtins), DeclarationNames(*this), ExternalSource(nullptr),
+      Listener(nullptr), Comments(SM), CommentsLoaded(false),
       CommentCommandTraits(BumpAlloc, LOpts.CommentOpts), LastSDM(nullptr, 0) {
   TUDecl = TranslationUnitDecl::Create(*this);
 }
@@ -757,10 +761,8 @@ ASTContext::~ASTContext() {
   ReleaseDeclContextMaps();
 
   // Call all of the deallocation functions on all of their targets.
-  for (DeallocationMap::const_iterator I = Deallocations.begin(),
-           E = Deallocations.end(); I != E; ++I)
-    for (unsigned J = 0, N = I->second.size(); J != N; ++J)
-      (I->first)((I->second)[J]);
+  for (auto &Pair : Deallocations)
+    (Pair.first)(Pair.second);
 
   // ASTRecordLayout objects in ASTRecordLayouts must always be destroyed
   // because they can contain DenseMaps.
@@ -783,23 +785,33 @@ ASTContext::~ASTContext() {
        A != AEnd; ++A)
     A->second->~AttrVec();
 
+  for (std::pair<const MaterializeTemporaryExpr *, APValue *> &MTVPair :
+       MaterializedTemporaryValues)
+    MTVPair.second->~APValue();
+
   llvm::DeleteContainerSeconds(MangleNumberingContexts);
 }
 
 void ASTContext::ReleaseParentMapEntries() {
-  if (!AllParents) return;
-  for (const auto &Entry : *AllParents) {
+  if (!PointerParents) return;
+  for (const auto &Entry : *PointerParents) {
     if (Entry.second.is<ast_type_traits::DynTypedNode *>()) {
       delete Entry.second.get<ast_type_traits::DynTypedNode *>();
-    } else {
-      assert(Entry.second.is<ParentVector *>());
+    } else if (Entry.second.is<ParentVector *>()) {
+      delete Entry.second.get<ParentVector *>();
+    }
+  }
+  for (const auto &Entry : *OtherParents) {
+    if (Entry.second.is<ast_type_traits::DynTypedNode *>()) {
+      delete Entry.second.get<ast_type_traits::DynTypedNode *>();
+    } else if (Entry.second.is<ParentVector *>()) {
       delete Entry.second.get<ParentVector *>();
     }
   }
 }
 
 void ASTContext::AddDeallocation(void (*Callback)(void*), void *Data) {
-  Deallocations[Callback].push_back(Data);
+  Deallocations.push_back({Callback, Data});
 }
 
 void
@@ -898,6 +910,24 @@ ExternCContextDecl *ASTContext::getExternCContextDecl() const {
   return ExternCContext;
 }
 
+BuiltinTemplateDecl *
+ASTContext::buildBuiltinTemplateDecl(BuiltinTemplateKind BTK,
+                                     const IdentifierInfo *II) const {
+  auto *BuiltinTemplate = BuiltinTemplateDecl::Create(*this, TUDecl, II, BTK);
+  BuiltinTemplate->setImplicit();
+  TUDecl->addDecl(BuiltinTemplate);
+
+  return BuiltinTemplate;
+}
+
+BuiltinTemplateDecl *
+ASTContext::getMakeIntegerSeqDecl() const {
+  if (!MakeIntegerSeqDecl)
+    MakeIntegerSeqDecl = buildBuiltinTemplateDecl(BTK__make_integer_seq,
+                                                  getMakeIntegerSeqName());
+  return MakeIntegerSeqDecl;
+}
+
 RecordDecl *ASTContext::buildImplicitRecord(StringRef Name,
                                             RecordDecl::TagKind TK) const {
   SourceLocation Loc;
@@ -950,13 +980,15 @@ void ASTContext::InitBuiltinType(CanQualType &R, BuiltinType::Kind K) {
   Types.push_back(Ty);
 }
 
-void ASTContext::InitBuiltinTypes(const TargetInfo &Target) {
+void ASTContext::InitBuiltinTypes(const TargetInfo &Target,
+                                  const TargetInfo *AuxTarget) {
   assert((!this->Target || this->Target == &Target) &&
          "Incorrect target reinitialization");
   assert(VoidTy.isNull() && "Context reinitialized?");
 
   this->Target = &Target;
-  
+  this->AuxTarget = AuxTarget;
+
   ABI.reset(createCXXABI(Target));
   AddrSpaceMap = getAddressSpaceMap(Target, LangOpts);
   AddrSpaceMapMangling = isAddrSpaceMapManglingEnabled(Target, LangOpts);
@@ -1043,6 +1075,10 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) {
   // Placeholder type for builtin functions.
   InitBuiltinType(BuiltinFnTy,  BuiltinType::BuiltinFn);
 
+  // Placeholder type for OMP array sections.
+  if (LangOpts.OpenMP)
+    InitBuiltinType(OMPArraySectionTy, BuiltinType::OMPArraySection);
+
   // C99 6.2.5p11.
   FloatComplexTy      = getComplexType(FloatTy);
   DoubleComplexTy     = getComplexType(DoubleTy);
@@ -1059,10 +1095,21 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) {
     InitBuiltinType(OCLImage1dBufferTy, BuiltinType::OCLImage1dBuffer);
     InitBuiltinType(OCLImage2dTy, BuiltinType::OCLImage2d);
     InitBuiltinType(OCLImage2dArrayTy, BuiltinType::OCLImage2dArray);
+    InitBuiltinType(OCLImage2dDepthTy, BuiltinType::OCLImage2dDepth);
+    InitBuiltinType(OCLImage2dArrayDepthTy, BuiltinType::OCLImage2dArrayDepth);
+    InitBuiltinType(OCLImage2dMSAATy, BuiltinType::OCLImage2dMSAA);
+    InitBuiltinType(OCLImage2dArrayMSAATy, BuiltinType::OCLImage2dArrayMSAA);
+    InitBuiltinType(OCLImage2dMSAADepthTy, BuiltinType::OCLImage2dMSAADepth);
+    InitBuiltinType(OCLImage2dArrayMSAADepthTy,
+                    BuiltinType::OCLImage2dArrayMSAADepth);
     InitBuiltinType(OCLImage3dTy, BuiltinType::OCLImage3d);
 
     InitBuiltinType(OCLSamplerTy, BuiltinType::OCLSampler);
     InitBuiltinType(OCLEventTy, BuiltinType::OCLEvent);
+    InitBuiltinType(OCLClkEventTy, BuiltinType::OCLClkEvent);
+    InitBuiltinType(OCLQueueTy, BuiltinType::OCLQueue);
+    InitBuiltinType(OCLNDRangeTy, BuiltinType::OCLNDRange);
+    InitBuiltinType(OCLReserveIDTy, BuiltinType::OCLReserveID);
   }
   
   // Builtin type for __objc_yes and __objc_no
@@ -1083,7 +1130,7 @@ void ASTContext::InitBuiltinTypes(const TargetInfo &Target) {
   InitBuiltinType(HalfTy, BuiltinType::Half);
 
   // Builtin type used to help define __builtin_va_list.
-  VaListTagTy = QualType();
+  VaListTagDecl = nullptr;
 }
 
 DiagnosticsEngine &ASTContext::getDiagnostics() const {
@@ -1629,11 +1676,21 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
       Align = Target->getIntAlign();
       break;
     case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
     case BuiltinType::OCLImage1d:
     case BuiltinType::OCLImage1dArray:
     case BuiltinType::OCLImage1dBuffer:
     case BuiltinType::OCLImage2d:
     case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage2dDepth:
+    case BuiltinType::OCLImage2dArrayDepth:
+    case BuiltinType::OCLImage2dMSAA:
+    case BuiltinType::OCLImage2dArrayMSAA:
+    case BuiltinType::OCLImage2dMSAADepth:
+    case BuiltinType::OCLImage2dArrayMSAADepth:
     case BuiltinType::OCLImage3d:
       // Currently these types are pointers to opaque types.
       Width = Target->getPointerWidth(0);
@@ -1861,7 +1918,7 @@ unsigned ASTContext::getPreferredTypeAlign(const Type *T) const {
 /// getTargetDefaultAlignForAttributeAligned - Return the default alignment
 /// for __attribute__((aligned)) on this target, to be used if no alignment
 /// value is specified.
-unsigned ASTContext::getTargetDefaultAlignForAttributeAligned(void) const {
+unsigned ASTContext::getTargetDefaultAlignForAttributeAligned() const {
   return getTargetInfo().getDefaultAlignForAttributeAligned();
 }
 
@@ -2006,6 +2063,17 @@ void ASTContext::setObjCImplementation(ObjCCategoryDecl *CatD,
   ObjCImpls[CatD] = ImplD;
 }
 
+const ObjCMethodDecl *
+ASTContext::getObjCMethodRedeclaration(const ObjCMethodDecl *MD) const {
+  return ObjCMethodRedecls.lookup(MD);
+}
+
+void ASTContext::setObjCMethodRedeclaration(const ObjCMethodDecl *MD,
+                                            const ObjCMethodDecl *Redecl) {
+  assert(!getObjCMethodRedeclaration(MD) && "MD already has a redeclaration");
+  ObjCMethodRedecls[MD] = Redecl;
+}
+
 const ObjCInterfaceDecl *ASTContext::getObjContainingInterface(
                                               const NamedDecl *ND) const {
   if (const ObjCInterfaceDecl *ID =
@@ -2759,9 +2827,10 @@ QualType ASTContext::getDependentSizedArrayType(QualType elementType,
   QualType canon = getQualifiedType(QualType(canonTy,0),
                                     canonElementType.Quals);
 
-  // If we didn't need extra canonicalization for the element type,
-  // then just use that as our result.
-  if (QualType(canonElementType.Ty, 0) == elementType)
+  // If we didn't need extra canonicalization for the element type or the size
+  // expression, then just use that as our result.
+  if (QualType(canonElementType.Ty, 0) == elementType &&
+      canonTy->getSizeExpr() == numElements)
     return canon;
 
   // Otherwise, we need to build a type which follows the spelling
@@ -2956,6 +3025,21 @@ static bool isCanonicalResultType(QualType T) {
           T.getObjCLifetime() == Qualifiers::OCL_ExplicitNone);
 }
 
+CanQualType
+ASTContext::getCanonicalFunctionResultType(QualType ResultType) const {
+  CanQualType CanResultType = getCanonicalType(ResultType);
+
+  // Canonical result types do not have ARC lifetime qualifiers.
+  if (CanResultType.getQualifiers().hasObjCLifetime()) {
+    Qualifiers Qs = CanResultType.getQualifiers();
+    Qs.removeObjCLifetime();
+    return CanQualType::CreateUnsafe(
+             getQualifiedType(CanResultType.getUnqualifiedType(), Qs));
+  }
+
+  return CanResultType;
+}
+
 QualType
 ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray,
                             const FunctionProtoType::ExtProtoInfo &EPI) const {
@@ -2993,14 +3077,8 @@ ASTContext::getFunctionType(QualType ResultTy, ArrayRef<QualType> ArgArray,
     CanonicalEPI.HasTrailingReturn = false;
     CanonicalEPI.ExceptionSpec = FunctionProtoType::ExceptionSpecInfo();
 
-    // Result types do not have ARC lifetime qualifiers.
-    QualType CanResultTy = getCanonicalType(ResultTy);
-    if (ResultTy.getQualifiers().hasObjCLifetime()) {
-      Qualifiers Qs = CanResultTy.getQualifiers();
-      Qs.removeObjCLifetime();
-      CanResultTy = getQualifiedType(CanResultTy.getUnqualifiedType(), Qs);
-    }
-
+    // Adjust the canonical function result type.
+    CanQualType CanResultTy = getCanonicalFunctionResultType(ResultTy);
     Canonical = getFunctionType(CanResultTy, CanonicalArgs, CanonicalEPI);
 
     // Get the new insert position for the node we care about.
@@ -3164,7 +3242,6 @@ QualType ASTContext::getAttributedType(AttributedType::Kind attrKind,
   return QualType(type, 0);
 }
 
-
 /// \brief Retrieve a substitution-result type.
 QualType
 ASTContext::getSubstTemplateTypeParmType(const TemplateTypeParmType *Parm,
@@ -3595,20 +3672,18 @@ static bool areSortedAndUniqued(ObjCProtocolDecl * const *Protocols,
   return true;
 }
 
-static void SortAndUniqueProtocols(ObjCProtocolDecl **Protocols,
-                                   unsigned &NumProtocols) {
-  ObjCProtocolDecl **ProtocolsEnd = Protocols+NumProtocols;
-
+static void
+SortAndUniqueProtocols(SmallVectorImpl<ObjCProtocolDecl *> &Protocols) {
   // Sort protocols, keyed by name.
-  llvm::array_pod_sort(Protocols, ProtocolsEnd, CmpProtocolNames);
+  llvm::array_pod_sort(Protocols.begin(), Protocols.end(), CmpProtocolNames);
 
   // Canonicalize.
-  for (unsigned I = 0, N = NumProtocols; I != N; ++I)
-    Protocols[I] = Protocols[I]->getCanonicalDecl();
-  
+  for (ObjCProtocolDecl *&P : Protocols)
+    P = P->getCanonicalDecl();
+
   // Remove duplicates.
-  ProtocolsEnd = std::unique(Protocols, ProtocolsEnd);
-  NumProtocols = ProtocolsEnd-Protocols;
+  auto ProtocolsEnd = std::unique(Protocols.begin(), Protocols.end());
+  Protocols.erase(ProtocolsEnd, Protocols.end());
 }
 
 QualType ASTContext::getObjCObjectType(QualType BaseType,
@@ -3673,12 +3748,9 @@ QualType ASTContext::getObjCObjectType(
     ArrayRef<ObjCProtocolDecl *> canonProtocols;
     SmallVector<ObjCProtocolDecl*, 8> canonProtocolsVec;
     if (!protocolsSorted) {
-      canonProtocolsVec.insert(canonProtocolsVec.begin(),
-                               protocols.begin(), 
-                               protocols.end());
-      unsigned uniqueCount = protocols.size();
-      SortAndUniqueProtocols(&canonProtocolsVec[0], uniqueCount);
-      canonProtocols = llvm::makeArrayRef(&canonProtocolsVec[0], uniqueCount);
+      canonProtocolsVec.append(protocols.begin(), protocols.end());
+      SortAndUniqueProtocols(canonProtocolsVec);
+      canonProtocols = canonProtocolsVec;
     } else {
       canonProtocols = protocols;
     }
@@ -3869,7 +3941,6 @@ QualType ASTContext::getTypeOfType(QualType tofType) const {
   return QualType(tot, 0);
 }
 
-
 /// \brief Unlike many "get<Type>" functions, we don't unique DecltypeType
 /// nodes. This would never be helpful, since each such type has its own
 /// expression, and would not give a significant memory saving, since there
@@ -3921,20 +3992,20 @@ QualType ASTContext::getUnaryTransformType(QualType BaseType,
 /// getAutoType - Return the uniqued reference to the 'auto' type which has been
 /// deduced to the given type, or to the canonical undeduced 'auto' type, or the
 /// canonical deduced-but-dependent 'auto' type.
-QualType ASTContext::getAutoType(QualType DeducedType, bool IsDecltypeAuto,
+QualType ASTContext::getAutoType(QualType DeducedType, AutoTypeKeyword Keyword,
                                  bool IsDependent) const {
-  if (DeducedType.isNull() && !IsDecltypeAuto && !IsDependent)
+  if (DeducedType.isNull() && Keyword == AutoTypeKeyword::Auto && !IsDependent)
     return getAutoDeductType();
 
   // Look in the folding set for an existing type.
   void *InsertPos = nullptr;
   llvm::FoldingSetNodeID ID;
-  AutoType::Profile(ID, DeducedType, IsDecltypeAuto, IsDependent);
+  AutoType::Profile(ID, DeducedType, Keyword, IsDependent);
   if (AutoType *AT = AutoTypes.FindNodeOrInsertPos(ID, InsertPos))
     return QualType(AT, 0);
 
   AutoType *AT = new (*this, TypeAlignment) AutoType(DeducedType,
-                                                     IsDecltypeAuto,
+                                                     Keyword,
                                                      IsDependent);
   Types.push_back(AT);
   if (InsertPos)
@@ -3974,7 +4045,7 @@ QualType ASTContext::getAtomicType(QualType T) const {
 QualType ASTContext::getAutoDeductType() const {
   if (AutoDeductTy.isNull())
     AutoDeductTy = QualType(
-      new (*this, TypeAlignment) AutoType(QualType(), /*decltype(auto)*/false,
+      new (*this, TypeAlignment) AutoType(QualType(), AutoTypeKeyword::Auto,
                                           /*dependent*/false),
       0);
   return AutoDeductTy;
@@ -4310,7 +4381,7 @@ ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
            A != AEnd; (void)++A, ++Idx)
         CanonArgs[Idx] = getCanonicalTemplateArgument(*A);
 
-      return TemplateArgument(CanonArgs, Arg.pack_size());
+      return TemplateArgument(llvm::makeArrayRef(CanonArgs, Arg.pack_size()));
     }
   }
 
@@ -4374,7 +4445,6 @@ ASTContext::getCanonicalNestedNameSpecifier(NestedNameSpecifier *NNS) const {
   llvm_unreachable("Invalid NestedNameSpecifier::Kind!");
 }
 
-
 const ArrayType *ASTContext::getAsArrayType(QualType T) const {
   // Handle the non-qualified case efficiently.
   if (!T.hasLocalQualifiers()) {
@@ -4909,8 +4979,6 @@ bool ASTContext::BlockRequiresCopying(QualType Ty,
   
   // If we have lifetime, that dominates.
   if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) {
-    assert(getLangOpts().ObjCAutoRefCount);
-    
     switch (lifetime) {
       case Qualifiers::OCL_None: llvm_unreachable("impossible");
         
@@ -4944,14 +5012,14 @@ bool ASTContext::getByrefLifetime(QualType Ty,
   if (Ty->isRecordType()) {
     HasByrefExtendedLayout = true;
     LifeTime = Qualifiers::OCL_None;
-  }
-  else if (getLangOpts().ObjCAutoRefCount)
-    LifeTime = Ty.getObjCLifetime();
-  // MRR.
-  else if (Ty->isObjCObjectPointerType() || Ty->isBlockPointerType())
+  } else if ((LifeTime = Ty.getObjCLifetime())) {
+    // Honor the ARC qualifiers.
+  } else if (Ty->isObjCObjectPointerType() || Ty->isBlockPointerType()) {
+    // The MRR rule.
     LifeTime = Qualifiers::OCL_ExplicitNone;
-  else
+  } else {
     LifeTime = Qualifiers::OCL_None;
+  }
   return true;
 }
 
@@ -4990,9 +5058,10 @@ CharUnits ASTContext::getObjCEncodingTypeSize(QualType type) const {
 }
 
 bool ASTContext::isMSStaticDataMemberInlineDefinition(const VarDecl *VD) const {
-  return getLangOpts().MSVCCompat && VD->isStaticDataMember() &&
+  return getTargetInfo().getCXXABI().isMicrosoft() &&
+         VD->isStaticDataMember() &&
          VD->getType()->isIntegralOrEnumerationType() &&
-         VD->isFirstDecl() && !VD->isOutOfLine() && VD->hasInit();
+         !VD->getFirstDecl()->isOutOfLine() && VD->getFirstDecl()->hasInit();
 }
 
 static inline 
@@ -5364,8 +5433,18 @@ static char getObjCEncodingForPrimitiveKind(const ASTContext *C,
     case BuiltinType::OCLImage1dBuffer:
     case BuiltinType::OCLImage2d:
     case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage2dDepth:
+    case BuiltinType::OCLImage2dArrayDepth:
+    case BuiltinType::OCLImage2dMSAA:
+    case BuiltinType::OCLImage2dArrayMSAA:
+    case BuiltinType::OCLImage2dMSAADepth:
+    case BuiltinType::OCLImage2dArrayMSAADepth:
     case BuiltinType::OCLImage3d:
     case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
     case BuiltinType::OCLSampler:
     case BuiltinType::Dependent:
 #define BUILTIN_TYPE(KIND, ID)
@@ -5779,7 +5858,6 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string& S,
   // Just ignore it.
   case Type::Auto:
     return;
-  
 
 #define ABSTRACT_TYPE(KIND, BASE)
 #define TYPE(KIND, BASE)
@@ -5998,10 +6076,19 @@ ObjCInterfaceDecl *ASTContext::getObjCProtocolDecl() const {
 // __builtin_va_list Construction Functions
 //===----------------------------------------------------------------------===//
 
-static TypedefDecl *CreateCharPtrBuiltinVaListDecl(const ASTContext *Context) {
-  // typedef char* __builtin_va_list;
+static TypedefDecl *CreateCharPtrNamedVaListDecl(const ASTContext *Context,
+                                                 StringRef Name) {
+  // typedef char* __builtin[_ms]_va_list;
   QualType T = Context->getPointerType(Context->CharTy);
-  return Context->buildImplicitTypedef(T, "__builtin_va_list");
+  return Context->buildImplicitTypedef(T, Name);
+}
+
+static TypedefDecl *CreateMSVaListDecl(const ASTContext *Context) {
+  return CreateCharPtrNamedVaListDecl(Context, "__builtin_ms_va_list");
+}
+
+static TypedefDecl *CreateCharPtrBuiltinVaListDecl(const ASTContext *Context) {
+  return CreateCharPtrNamedVaListDecl(Context, "__builtin_va_list");
 }
 
 static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) {
@@ -6067,8 +6154,8 @@ CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) {
     VaListTagDecl->addDecl(Field);
   }
   VaListTagDecl->completeDefinition();
+  Context->VaListTagDecl = VaListTagDecl;
   QualType VaListTagType = Context->getRecordType(VaListTagDecl);
-  Context->VaListTagTy = VaListTagType;
 
   // } __builtin_va_list;
   return Context->buildImplicitTypedef(VaListTagType, "__builtin_va_list");
@@ -6119,8 +6206,8 @@ static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) {
     VaListTagDecl->addDecl(Field);
   }
   VaListTagDecl->completeDefinition();
+  Context->VaListTagDecl = VaListTagDecl;
   QualType VaListTagType = Context->getRecordType(VaListTagDecl);
-  Context->VaListTagTy = VaListTagType;
 
   // } __va_list_tag;
   TypedefDecl *VaListTagTypedefDecl =
@@ -6139,7 +6226,7 @@ static TypedefDecl *CreatePowerABIBuiltinVaListDecl(const ASTContext *Context) {
 
 static TypedefDecl *
 CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) {
-  // typedef struct __va_list_tag {
+  // struct __va_list_tag {
   RecordDecl *VaListTagDecl;
   VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
   VaListTagDecl->startDefinition();
@@ -6179,21 +6266,15 @@ CreateX86_64ABIBuiltinVaListDecl(const ASTContext *Context) {
     VaListTagDecl->addDecl(Field);
   }
   VaListTagDecl->completeDefinition();
+  Context->VaListTagDecl = VaListTagDecl;
   QualType VaListTagType = Context->getRecordType(VaListTagDecl);
-  Context->VaListTagTy = VaListTagType;
 
-  // } __va_list_tag;
-  TypedefDecl *VaListTagTypedefDecl =
-      Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");
+  // };
 
-  QualType VaListTagTypedefType =
-    Context->getTypedefType(VaListTagTypedefDecl);
-
-  // typedef __va_list_tag __builtin_va_list[1];
+  // typedef struct __va_list_tag __builtin_va_list[1];
   llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
-  QualType VaListTagArrayType
-    = Context->getConstantArrayType(VaListTagTypedefType,
-                                      Size, ArrayType::Normal,0);
+  QualType VaListTagArrayType =
+      Context->getConstantArrayType(VaListTagType, Size, ArrayType::Normal, 0);
   return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
 }
 
@@ -6248,7 +6329,7 @@ CreateAAPCSABIBuiltinVaListDecl(const ASTContext *Context) {
 
 static TypedefDecl *
 CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
-  // typedef struct __va_list_tag {
+  // struct __va_list_tag {
   RecordDecl *VaListTagDecl;
   VaListTagDecl = Context->buildImplicitRecord("__va_list_tag");
   VaListTagDecl->startDefinition();
@@ -6288,20 +6369,15 @@ CreateSystemZBuiltinVaListDecl(const ASTContext *Context) {
     VaListTagDecl->addDecl(Field);
   }
   VaListTagDecl->completeDefinition();
+  Context->VaListTagDecl = VaListTagDecl;
   QualType VaListTagType = Context->getRecordType(VaListTagDecl);
-  Context->VaListTagTy = VaListTagType;
 
-  // } __va_list_tag;
-  TypedefDecl *VaListTagTypedefDecl =
-      Context->buildImplicitTypedef(VaListTagType, "__va_list_tag");
-  QualType VaListTagTypedefType =
-    Context->getTypedefType(VaListTagTypedefDecl);
+  // };
 
   // typedef __va_list_tag __builtin_va_list[1];
   llvm::APInt Size(Context->getTypeSize(Context->getSizeType()), 1);
-  QualType VaListTagArrayType
-    = Context->getConstantArrayType(VaListTagTypedefType,
-                                      Size, ArrayType::Normal,0);
+  QualType VaListTagArrayType =
+      Context->getConstantArrayType(VaListTagType, Size, ArrayType::Normal, 0);
 
   return Context->buildImplicitTypedef(VaListTagArrayType, "__builtin_va_list");
 }
@@ -6339,13 +6415,20 @@ TypedefDecl *ASTContext::getBuiltinVaListDecl() const {
   return BuiltinVaListDecl;
 }
 
-QualType ASTContext::getVaListTagType() const {
-  // Force the creation of VaListTagTy by building the __builtin_va_list
+Decl *ASTContext::getVaListTagDecl() const {
+  // Force the creation of VaListTagDecl by building the __builtin_va_list
   // declaration.
-  if (VaListTagTy.isNull())
-    (void) getBuiltinVaListDecl();
+  if (!VaListTagDecl)
+    (void)getBuiltinVaListDecl();
 
-  return VaListTagTy;
+  return VaListTagDecl;
+}
+
+TypedefDecl *ASTContext::getBuiltinMSVaListDecl() const {
+  if (!BuiltinMSVaListDecl)
+    BuiltinMSVaListDecl = CreateMSVaListDecl(this);
+
+  return BuiltinMSVaListDecl;
 }
 
 void ASTContext::setObjCConstantStringInterface(ObjCInterfaceDecl *Decl) {
@@ -7734,6 +7817,10 @@ bool ASTContext::FunctionTypesMatchOnNSConsumedAttrs(
   return true;
 }
 
+void ASTContext::ResetObjCLayout(const ObjCContainerDecl *CD) {
+  ObjCLayouts[CD] = nullptr;
+}
+
 /// mergeObjCGCQualifiers - This routine merges ObjC's GC attribute of 'LHS' and
 /// 'RHS' attributes and returns the merged version; including for function
 /// return types.
@@ -8128,7 +8215,7 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context,
 QualType ASTContext::GetBuiltinType(unsigned Id,
                                     GetBuiltinTypeError &Error,
                                     unsigned *IntegerConstantArgs) const {
-  const char *TypeStr = BuiltinInfo.GetTypeString(Id);
+  const char *TypeStr = BuiltinInfo.getTypeString(Id);
 
   SmallVector<QualType, 8> ArgTypes;
 
@@ -8212,7 +8299,8 @@ static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
   if (!FD->isInlined())
     return External;
 
-  if ((!Context.getLangOpts().CPlusPlus && !Context.getLangOpts().MSVCCompat &&
+  if ((!Context.getLangOpts().CPlusPlus &&
+       !Context.getTargetInfo().getCXXABI().isMicrosoft() &&
        !FD->hasAttr<DLLExportAttr>()) ||
       FD->hasAttr<GNUInlineAttr>()) {
     // FIXME: This doesn't match gcc's behavior for dllexport inline functions.
@@ -8235,13 +8323,13 @@ static GVALinkage basicGVALinkageForFunction(const ASTContext &Context,
   return GVA_DiscardableODR;
 }
 
-static GVALinkage adjustGVALinkageForDLLAttribute(GVALinkage L, const Decl *D) {
+static GVALinkage adjustGVALinkageForAttributes(GVALinkage L, const Decl *D) {
   // See http://msdn.microsoft.com/en-us/library/xa0d9ste.aspx
   // dllexport/dllimport on inline functions.
   if (D->hasAttr<DLLImportAttr>()) {
     if (L == GVA_DiscardableODR || L == GVA_StrongODR)
       return GVA_AvailableExternally;
-  } else if (D->hasAttr<DLLExportAttr>()) {
+  } else if (D->hasAttr<DLLExportAttr>() || D->hasAttr<CUDAGlobalAttr>()) {
     if (L == GVA_DiscardableODR)
       return GVA_StrongODR;
   }
@@ -8249,8 +8337,8 @@ static GVALinkage adjustGVALinkageForDLLAttribute(GVALinkage L, const Decl *D) {
 }
 
 GVALinkage ASTContext::GetGVALinkageForFunction(const FunctionDecl *FD) const {
-  return adjustGVALinkageForDLLAttribute(basicGVALinkageForFunction(*this, FD),
-                                         FD);
+  return adjustGVALinkageForAttributes(basicGVALinkageForFunction(*this, FD),
+                                       FD);
 }
 
 static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
@@ -8285,9 +8373,14 @@ static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
 
   switch (VD->getTemplateSpecializationKind()) {
   case TSK_Undeclared:
-  case TSK_ExplicitSpecialization:
     return GVA_StrongExternal;
 
+  case TSK_ExplicitSpecialization:
+    return Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+                   VD->isStaticDataMember()
+               ? GVA_StrongODR
+               : GVA_StrongExternal;
+
   case TSK_ExplicitInstantiationDefinition:
     return GVA_StrongODR;
 
@@ -8302,8 +8395,8 @@ static GVALinkage basicGVALinkageForVariable(const ASTContext &Context,
 }
 
 GVALinkage ASTContext::GetGVALinkageForVariable(const VarDecl *VD) {
-  return adjustGVALinkageForDLLAttribute(basicGVALinkageForVariable(*this, VD),
-                                         VD);
+  return adjustGVALinkageForAttributes(basicGVALinkageForVariable(*this, VD),
+                                       VD);
 }
 
 bool ASTContext::DeclMustBeEmitted(const Decl *D) {
@@ -8313,6 +8406,9 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
     // Global named register variables (GNU extension) are never emitted.
     if (VD->getStorageClass() == SC_Register)
       return false;
+    if (VD->getDescribedVarTemplate() ||
+        isa<VarTemplatePartialSpecializationDecl>(VD))
+      return false;
   } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
     // We never need to emit an uninstantiated function template.
     if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate)
@@ -8385,7 +8481,8 @@ bool ASTContext::DeclMustBeEmitted(const Decl *D) {
     return true;
 
   // Variables that have initialization with side-effects are required.
-  if (VD->getInit() && VD->getInit()->HasSideEffects(*this))
+  if (VD->getInit() && VD->getInit()->HasSideEffects(*this) &&
+      !VD->evaluateValue())
     return true;
 
   return false;
@@ -8425,6 +8522,8 @@ MangleContext *ASTContext::createMangleContext() {
   case TargetCXXABI::GenericMIPS:
   case TargetCXXABI::iOS:
   case TargetCXXABI::iOS64:
+  case TargetCXXABI::WebAssembly:
+  case TargetCXXABI::WatchOS:
     return ItaniumMangleContext::create(*this, getDiagnostics());
   case TargetCXXABI::Microsoft:
     return MicrosoftMangleContext::create(*this, getDiagnostics());
@@ -8543,6 +8642,25 @@ Expr *ASTContext::getDefaultArgExprForConstructor(const CXXConstructorDecl *CD,
       cast<CXXConstructorDecl>(CD->getFirstDecl()), ParmIdx);
 }
 
+void ASTContext::addTypedefNameForUnnamedTagDecl(TagDecl *TD,
+                                                 TypedefNameDecl *DD) {
+  return ABI->addTypedefNameForUnnamedTagDecl(TD, DD);
+}
+
+TypedefNameDecl *
+ASTContext::getTypedefNameForUnnamedTagDecl(const TagDecl *TD) {
+  return ABI->getTypedefNameForUnnamedTagDecl(TD);
+}
+
+void ASTContext::addDeclaratorForUnnamedTagDecl(TagDecl *TD,
+                                                DeclaratorDecl *DD) {
+  return ABI->addDeclaratorForUnnamedTagDecl(TD, DD);
+}
+
+DeclaratorDecl *ASTContext::getDeclaratorForUnnamedTagDecl(const TagDecl *TD) {
+  return ABI->getDeclaratorForUnnamedTagDecl(TD);
+}
+
 void ASTContext::setParameterIndex(const ParmVarDecl *D, unsigned int index) {
   ParamIndices[D] = index;
 }
@@ -8559,12 +8677,14 @@ ASTContext::getMaterializedTemporaryValue(const MaterializeTemporaryExpr *E,
                                           bool MayCreate) {
   assert(E && E->getStorageDuration() == SD_Static &&
          "don't need to cache the computed value for this temporary");
-  if (MayCreate)
-    return &MaterializedTemporaryValues[E];
+  if (MayCreate) {
+    APValue *&MTVI = MaterializedTemporaryValues[E];
+    if (!MTVI)
+      MTVI = new (*this) APValue;
+    return MTVI;
+  }
 
-  llvm::DenseMap<const MaterializeTemporaryExpr *, APValue>::iterator I =
-      MaterializedTemporaryValues.find(E);
-  return I == MaterializedTemporaryValues.end() ? nullptr : &I->second;
+  return MaterializedTemporaryValues.lookup(E);
 }
 
 bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const {
@@ -8587,6 +8707,32 @@ bool ASTContext::AtomicUsesUnsupportedLibcall(const AtomicExpr *E) const {
 
 namespace {
 
+ast_type_traits::DynTypedNode getSingleDynTypedNodeFromParentMap(
+    ASTContext::ParentMapPointers::mapped_type U) {
+  if (const auto *D = U.dyn_cast<const Decl *>())
+    return ast_type_traits::DynTypedNode::create(*D);
+  if (const auto *S = U.dyn_cast<const Stmt *>())
+    return ast_type_traits::DynTypedNode::create(*S);
+  return *U.get<ast_type_traits::DynTypedNode *>();
+}
+
+/// Template specializations to abstract away from pointers and TypeLocs.
+/// @{
+template <typename T>
+ast_type_traits::DynTypedNode createDynTypedNode(const T &Node) {
+  return ast_type_traits::DynTypedNode::create(*Node);
+}
+template <>
+ast_type_traits::DynTypedNode createDynTypedNode(const TypeLoc &Node) {
+  return ast_type_traits::DynTypedNode::create(Node);
+}
+template <>
+ast_type_traits::DynTypedNode
+createDynTypedNode(const NestedNameSpecifierLoc &Node) {
+  return ast_type_traits::DynTypedNode::create(Node);
+}
+/// @}
+
   /// \brief A \c RecursiveASTVisitor that builds a map from nodes to their
   /// parents as defined by the \c RecursiveASTVisitor.
   ///
@@ -8596,22 +8742,25 @@ namespace {
   ///
   /// FIXME: Currently only builds up the map using \c Stmt and \c Decl nodes.
   class ParentMapASTVisitor : public RecursiveASTVisitor<ParentMapASTVisitor> {
-
   public:
     /// \brief Builds and returns the translation unit's parent map.
     ///
     ///  The caller takes ownership of the returned \c ParentMap.
-    static ASTContext::ParentMap *buildMap(TranslationUnitDecl &TU) {
-      ParentMapASTVisitor Visitor(new ASTContext::ParentMap);
+    static std::pair<ASTContext::ParentMapPointers *,
+                     ASTContext::ParentMapOtherNodes *>
+    buildMap(TranslationUnitDecl &TU) {
+      ParentMapASTVisitor Visitor(new ASTContext::ParentMapPointers,
+                                  new ASTContext::ParentMapOtherNodes);
       Visitor.TraverseDecl(&TU);
-      return Visitor.Parents;
+      return std::make_pair(Visitor.Parents, Visitor.OtherParents);
     }
 
   private:
     typedef RecursiveASTVisitor<ParentMapASTVisitor> VisitorBase;
 
-    ParentMapASTVisitor(ASTContext::ParentMap *Parents) : Parents(Parents) {
-    }
+    ParentMapASTVisitor(ASTContext::ParentMapPointers *Parents,
+                        ASTContext::ParentMapOtherNodes *OtherParents)
+        : Parents(Parents), OtherParents(OtherParents) {}
 
     bool shouldVisitTemplateInstantiations() const {
       return true;
@@ -8619,14 +8768,11 @@ namespace {
     bool shouldVisitImplicitCode() const {
       return true;
     }
-    // Disables data recursion. We intercept Traverse* methods in the RAV, which
-    // are not triggered during data recursion.
-    bool shouldUseDataRecursionFor(clang::Stmt *S) const {
-      return false;
-    }
 
-    template <typename T>
-    bool TraverseNode(T *Node, bool(VisitorBase:: *traverse) (T *)) {
+    template <typename T, typename MapNodeTy, typename BaseTraverseFn,
+              typename MapTy>
+    bool TraverseNode(T Node, MapNodeTy MapNode,
+                      BaseTraverseFn BaseTraverse, MapTy *Parents) {
       if (!Node)
         return true;
       if (ParentStack.size() > 0) {
@@ -8640,18 +8786,25 @@ namespace {
         // map. The main problem there is to implement hash functions /
         // comparison operators for all types that DynTypedNode supports that
         // do not have pointer identity.
-        auto &NodeOrVector = (*Parents)[Node];
+        auto &NodeOrVector = (*Parents)[MapNode];
         if (NodeOrVector.isNull()) {
-          NodeOrVector = new ast_type_traits::DynTypedNode(ParentStack.back());
+          if (const auto *D = ParentStack.back().get<Decl>())
+            NodeOrVector = D;
+          else if (const auto *S = ParentStack.back().get<Stmt>())
+            NodeOrVector = S;
+          else
+            NodeOrVector =
+                new ast_type_traits::DynTypedNode(ParentStack.back());
         } else {
-          if (NodeOrVector.template is<ast_type_traits::DynTypedNode *>()) {
-            auto *Node =
-                NodeOrVector.template get<ast_type_traits::DynTypedNode *>();
-            auto *Vector = new ASTContext::ParentVector(1, *Node);
+          if (!NodeOrVector.template is<ASTContext::ParentVector *>()) {
+            auto *Vector = new ASTContext::ParentVector(
+                1, getSingleDynTypedNodeFromParentMap(NodeOrVector));
+            if (auto *Node =
+                    NodeOrVector
+                        .template dyn_cast<ast_type_traits::DynTypedNode *>())
+              delete Node;
             NodeOrVector = Vector;
-            delete Node;
           }
-          assert(NodeOrVector.template is<ASTContext::ParentVector *>());
 
           auto *Vector =
               NodeOrVector.template get<ASTContext::ParentVector *>();
@@ -8666,47 +8819,74 @@ namespace {
             Vector->push_back(ParentStack.back());
         }
       }
-      ParentStack.push_back(ast_type_traits::DynTypedNode::create(*Node));
-      bool Result = (this ->* traverse) (Node);
+      ParentStack.push_back(createDynTypedNode(Node));
+      bool Result = BaseTraverse();
       ParentStack.pop_back();
       return Result;
     }
 
     bool TraverseDecl(Decl *DeclNode) {
-      return TraverseNode(DeclNode, &VisitorBase::TraverseDecl);
+      return TraverseNode(DeclNode, DeclNode,
+                          [&] { return VisitorBase::TraverseDecl(DeclNode); },
+                          Parents);
     }
 
     bool TraverseStmt(Stmt *StmtNode) {
-      return TraverseNode(StmtNode, &VisitorBase::TraverseStmt);
+      return TraverseNode(StmtNode, StmtNode,
+                          [&] { return VisitorBase::TraverseStmt(StmtNode); },
+                          Parents);
     }
 
-    ASTContext::ParentMap *Parents;
+    bool TraverseTypeLoc(TypeLoc TypeLocNode) {
+      return TraverseNode(
+          TypeLocNode, ast_type_traits::DynTypedNode::create(TypeLocNode),
+          [&] { return VisitorBase::TraverseTypeLoc(TypeLocNode); },
+          OtherParents);
+    }
+
+    bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc NNSLocNode) {
+      return TraverseNode(
+          NNSLocNode, ast_type_traits::DynTypedNode::create(NNSLocNode),
+          [&] {
+            return VisitorBase::TraverseNestedNameSpecifierLoc(NNSLocNode);
+          },
+          OtherParents);
+    }
+
+    ASTContext::ParentMapPointers *Parents;
+    ASTContext::ParentMapOtherNodes *OtherParents;
     llvm::SmallVector<ast_type_traits::DynTypedNode, 16> ParentStack;
 
     friend class RecursiveASTVisitor<ParentMapASTVisitor>;
   };
 
-} // end namespace
+} // anonymous namespace
 
-ArrayRef<ast_type_traits::DynTypedNode>
+template <typename NodeTy, typename MapTy>
+static ASTContext::DynTypedNodeList getDynNodeFromMap(const NodeTy &Node,
+                                                      const MapTy &Map) {
+  auto I = Map.find(Node);
+  if (I == Map.end()) {
+    return llvm::ArrayRef<ast_type_traits::DynTypedNode>();
+  }
+  if (auto *V = I->second.template dyn_cast<ASTContext::ParentVector *>()) {
+    return llvm::makeArrayRef(*V);
+  }
+  return getSingleDynTypedNodeFromParentMap(I->second);
+}
+
+ASTContext::DynTypedNodeList
 ASTContext::getParents(const ast_type_traits::DynTypedNode &Node) {
-  assert(Node.getMemoizationData() &&
-         "Invariant broken: only nodes that support memoization may be "
-         "used in the parent map.");
-  if (!AllParents) {
+  if (!PointerParents) {
     // We always need to run over the whole translation unit, as
     // hasAncestor can escape any subtree.
-    AllParents.reset(
-        ParentMapASTVisitor::buildMap(*getTranslationUnitDecl()));
+    auto Maps = ParentMapASTVisitor::buildMap(*getTranslationUnitDecl());
+    PointerParents.reset(Maps.first);
+    OtherParents.reset(Maps.second);
   }
-  ParentMap::const_iterator I = AllParents->find(Node.getMemoizationData());
-  if (I == AllParents->end()) {
-    return None;
-  }
-  if (auto *N = I->second.dyn_cast<ast_type_traits::DynTypedNode *>()) {
-    return llvm::makeArrayRef(N, 1);
-  }
-  return *I->second.get<ParentVector *>();
+  if (Node.getNodeKind().hasPointerIdentity())
+    return getDynNodeFromMap(Node.getMemoizationData(), *PointerParents);
+  return getDynNodeFromMap(Node, *OtherParents);
 }
 
 bool
diff --git a/lib/AST/ASTDiagnostic.cpp b/lib/AST/ASTDiagnostic.cpp
index dddaa5af6fb0..0ab1fa788603 100644
--- a/lib/AST/ASTDiagnostic.cpp
+++ b/lib/AST/ASTDiagnostic.cpp
@@ -66,11 +66,63 @@ static QualType Desugar(ASTContext &Context, QualType QT, bool &ShouldAKA) {
       continue;
     }
 
-    // Don't desugar template specializations, unless it's an alias template.
-    if (const TemplateSpecializationType *TST
-          = dyn_cast<TemplateSpecializationType>(Ty))
-      if (!TST->isTypeAlias())
+    // Desugar FunctionType if return type or any parameter type should be
+    // desugared. Preserve nullability attribute on desugared types.
+    if (const FunctionType *FT = dyn_cast<FunctionType>(Ty)) {
+      bool DesugarReturn = false;
+      QualType SugarRT = FT->getReturnType();
+      QualType RT = Desugar(Context, SugarRT, DesugarReturn);
+      if (auto nullability = AttributedType::stripOuterNullability(SugarRT)) {
+        RT = Context.getAttributedType(
+            AttributedType::getNullabilityAttrKind(*nullability), RT, RT);
+      }
+
+      bool DesugarArgument = false;
+      SmallVector<QualType, 4> Args;
+      const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT);
+      if (FPT) {
+        for (QualType SugarPT : FPT->param_types()) {
+          QualType PT = Desugar(Context, SugarPT, DesugarArgument);
+          if (auto nullability =
+                  AttributedType::stripOuterNullability(SugarPT)) {
+            PT = Context.getAttributedType(
+                AttributedType::getNullabilityAttrKind(*nullability), PT, PT);
+          }
+          Args.push_back(PT);
+        }
+      }
+
+      if (DesugarReturn || DesugarArgument) {
+        ShouldAKA = true;
+        QT = FPT ? Context.getFunctionType(RT, Args, FPT->getExtProtoInfo())
+                 : Context.getFunctionNoProtoType(RT, FT->getExtInfo());
         break;
+      }
+    }
+
+    // Desugar template specializations if any template argument should be
+    // desugared.
+    if (const TemplateSpecializationType *TST =
+            dyn_cast<TemplateSpecializationType>(Ty)) {
+      if (!TST->isTypeAlias()) {
+        bool DesugarArgument = false;
+        SmallVector<TemplateArgument, 4> Args;
+        for (unsigned I = 0, N = TST->getNumArgs(); I != N; ++I) {
+          const TemplateArgument &Arg = TST->getArg(I);
+          if (Arg.getKind() == TemplateArgument::Type)
+            Args.push_back(Desugar(Context, Arg.getAsType(), DesugarArgument));
+          else
+            Args.push_back(Arg);
+        }
+
+        if (DesugarArgument) {
+          ShouldAKA = true;
+          QT = Context.getTemplateSpecializationType(
+              TST->getTemplateName(), Args.data(), Args.size(), QT);
+        }
+        break;
+      }
+    }
 
     // Don't desugar magic Objective-C types.
     if (QualType(Ty,0) == Context.getObjCIdType() ||
@@ -80,7 +132,8 @@ static QualType Desugar(ASTContext &Context, QualType QT, bool &ShouldAKA) {
       break;
 
     // Don't desugar va_list.
-    if (QualType(Ty,0) == Context.getBuiltinVaListType())
+    if (QualType(Ty, 0) == Context.getBuiltinVaListType() ||
+        QualType(Ty, 0) == Context.getBuiltinMSVaListType())
       break;
 
     // Otherwise, do a single-step desugar.
@@ -393,8 +446,6 @@ void clang::FormatASTNodeDiagnosticArgument(
 
   }
 
-  OS.flush();
-
   if (NeedQuotes) {
     Output.insert(Output.begin()+OldEnd, '\'');
     Output.push_back('\'');
diff --git a/lib/AST/ASTDumper.cpp b/lib/AST/ASTDumper.cpp
index c95922b141e0..e7fee0316b69 100644
--- a/lib/AST/ASTDumper.cpp
+++ b/lib/AST/ASTDumper.cpp
@@ -21,8 +21,10 @@
 #include "clang/AST/DeclVisitor.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/AST/TypeVisitor.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/SourceManager.h"
+#include "clang/Sema/LocInfoType.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
 using namespace clang::comments;
@@ -447,6 +449,7 @@ namespace  {
         const ClassTemplatePartialSpecializationDecl *D);
     void VisitClassScopeFunctionSpecializationDecl(
         const ClassScopeFunctionSpecializationDecl *D);
+    void VisitBuiltinTemplateDecl(const BuiltinTemplateDecl *D);
     void VisitVarTemplateDecl(const VarTemplateDecl *D);
     void VisitVarTemplateSpecializationDecl(
         const VarTemplateSpecializationDecl *D);
@@ -653,6 +656,15 @@ void ASTDumper::dumpTypeAsChild(const Type *T) {
       OS << "<<<NULL>>>";
       return;
     }
+    if (const LocInfoType *LIT = llvm::dyn_cast<LocInfoType>(T)) {
+      {
+        ColorScope Color(*this, TypeColor);
+        OS << "LocInfo Type";
+      }
+      dumpPointer(T);
+      dumpTypeAsChild(LIT->getTypeSourceInfo()->getType());
+      return;
+    }
 
     {
       ColorScope Color(*this, TypeColor);
@@ -1333,6 +1345,11 @@ void ASTDumper::VisitVarTemplateDecl(const VarTemplateDecl *D) {
   VisitTemplateDecl(D, false);
 }
 
+void ASTDumper::VisitBuiltinTemplateDecl(const BuiltinTemplateDecl *D) {
+  dumpName(D);
+  dumpTemplateParameters(D->getTemplateParameters());
+}
+
 void ASTDumper::VisitVarTemplateSpecializationDecl(
     const VarTemplateSpecializationDecl *D) {
   dumpTemplateArgumentList(D->getTemplateArgs());
@@ -1378,20 +1395,23 @@ void ASTDumper::VisitTemplateTemplateParmDecl(
 
 void ASTDumper::VisitUsingDecl(const UsingDecl *D) {
   OS << ' ';
-  D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
+  if (D->getQualifier())
+    D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
   OS << D->getNameAsString();
 }
 
 void ASTDumper::VisitUnresolvedUsingTypenameDecl(
     const UnresolvedUsingTypenameDecl *D) {
   OS << ' ';
-  D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
+  if (D->getQualifier())
+    D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
   OS << D->getNameAsString();
 }
 
 void ASTDumper::VisitUnresolvedUsingValueDecl(const UnresolvedUsingValueDecl *D) {
   OS << ' ';
-  D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
+  if (D->getQualifier())
+    D->getQualifier()->print(OS, D->getASTContext().getPrintingPolicy());
   OS << D->getNameAsString();
   dumpType(D->getType());
 }
@@ -2017,6 +2037,9 @@ void ASTDumper::VisitSizeOfPackExpr(const SizeOfPackExpr *Node) {
   VisitExpr(Node);
   dumpPointer(Node->getPack());
   dumpName(Node->getPack());
+  if (Node->isPartiallySubstituted())
+    for (const auto &A : Node->getPartialArguments())
+      dumpTemplateArgument(A);
 }
 
 
diff --git a/lib/AST/ASTImporter.cpp b/lib/AST/ASTImporter.cpp
index 35c0f690db82..359db1ba81b3 100644
--- a/lib/AST/ASTImporter.cpp
+++ b/lib/AST/ASTImporter.cpp
@@ -1746,7 +1746,7 @@ QualType ASTNodeImporter::VisitAutoType(const AutoType *T) {
       return QualType();
   }
   
-  return Importer.getToContext().getAutoType(ToDeduced, T->isDecltypeAuto(), 
+  return Importer.getToContext().getAutoType(ToDeduced, T->getKeyword(),
                                              /*IsDependent*/false);
 }
 
@@ -2144,7 +2144,7 @@ TemplateParameterList *ASTNodeImporter::ImportTemplateParameterList(
   return TemplateParameterList::Create(Importer.getToContext(),
                                        Importer.Import(Params->getTemplateLoc()),
                                        Importer.Import(Params->getLAngleLoc()),
-                                       ToParams.data(), ToParams.size(),
+                                       ToParams,
                                        Importer.Import(Params->getRAngleLoc()));
 }
 
@@ -2210,11 +2210,9 @@ ASTNodeImporter::ImportTemplateArgument(const TemplateArgument &From) {
     ToPack.reserve(From.pack_size());
     if (ImportTemplateArguments(From.pack_begin(), From.pack_size(), ToPack))
       return TemplateArgument();
-    
-    TemplateArgument *ToArgs 
-      = new (Importer.getToContext()) TemplateArgument[ToPack.size()];
-    std::copy(ToPack.begin(), ToPack.end(), ToArgs);
-    return TemplateArgument(ToArgs, ToPack.size());
+
+    return TemplateArgument(
+        llvm::makeArrayRef(ToPack).copy(Importer.getToContext()));
   }
   }
   
@@ -4917,13 +4915,14 @@ Stmt *ASTNodeImporter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   if (!ToBody && S->getBody())
     return nullptr;
   SourceLocation ToForLoc = Importer.Import(S->getForLoc());
+  SourceLocation ToCoawaitLoc = Importer.Import(S->getCoawaitLoc());
   SourceLocation ToColonLoc = Importer.Import(S->getColonLoc());
   SourceLocation ToRParenLoc = Importer.Import(S->getRParenLoc());
   return new (Importer.getToContext()) CXXForRangeStmt(ToRange, ToBeginEnd,
                                                        ToCond, ToInc,
                                                        ToLoopVar, ToBody,
-                                                       ToForLoc, ToColonLoc,
-                                                       ToRParenLoc);
+                                                       ToForLoc, ToCoawaitLoc,
+                                                       ToColonLoc, ToRParenLoc);
 }
 
 Stmt *ASTNodeImporter::VisitObjCForCollectionStmt(ObjCForCollectionStmt *S) {
@@ -5331,7 +5330,7 @@ Expr *ASTNodeImporter::VisitCallExpr(CallExpr *E) {
 
   return new (Importer.getToContext())
     CallExpr(Importer.getToContext(), ToCallee, 
-             ArrayRef<Expr*>(ToArgs_Copied, NumArgs), T, E->getValueKind(),
+             llvm::makeArrayRef(ToArgs_Copied, NumArgs), T, E->getValueKind(),
              Importer.Import(E->getRParenLoc()));
 }
 
diff --git a/lib/AST/CMakeLists.txt b/lib/AST/CMakeLists.txt
index 6ce347b4a31c..e28bd2e16d17 100644
--- a/lib/AST/CMakeLists.txt
+++ b/lib/AST/CMakeLists.txt
@@ -30,6 +30,7 @@ add_clang_library(clangAST
   ExprClassification.cpp
   ExprConstant.cpp
   ExprCXX.cpp
+  ExprObjC.cpp
   ExternalASTSource.cpp
   InheritViz.cpp
   ItaniumCXXABI.cpp
@@ -39,13 +40,17 @@ add_clang_library(clangAST
   MicrosoftMangle.cpp
   NestedNameSpecifier.cpp
   NSAPI.cpp
+  OpenMPClause.cpp
   ParentMap.cpp
   RawCommentList.cpp
   RecordLayout.cpp
   RecordLayoutBuilder.cpp
   SelectorLocationsKind.cpp
   Stmt.cpp
+  StmtCXX.cpp
   StmtIterator.cpp
+  StmtObjC.cpp
+  StmtOpenMP.cpp
   StmtPrinter.cpp
   StmtProfile.cpp
   StmtViz.cpp
diff --git a/lib/AST/CXXABI.h b/lib/AST/CXXABI.h
index dad226474fa7..c23b9191c7ab 100644
--- a/lib/AST/CXXABI.h
+++ b/lib/AST/CXXABI.h
@@ -21,6 +21,7 @@ namespace clang {
 
 class ASTContext;
 class CXXConstructorDecl;
+class DeclaratorDecl;
 class Expr;
 class MemberPointerType;
 class MangleNumberingContext;
@@ -57,6 +58,17 @@ public:
 
   virtual Expr *getDefaultArgExprForConstructor(const CXXConstructorDecl *CD,
                                                 unsigned ParmIdx) = 0;
+
+  virtual void addTypedefNameForUnnamedTagDecl(TagDecl *TD,
+                                               TypedefNameDecl *DD) = 0;
+
+  virtual TypedefNameDecl *
+  getTypedefNameForUnnamedTagDecl(const TagDecl *TD) = 0;
+
+  virtual void addDeclaratorForUnnamedTagDecl(TagDecl *TD,
+                                              DeclaratorDecl *DD) = 0;
+
+  virtual DeclaratorDecl *getDeclaratorForUnnamedTagDecl(const TagDecl *TD) = 0;
 };
 
 /// Creates an instance of a C++ ABI class.
diff --git a/lib/AST/CXXInheritance.cpp b/lib/AST/CXXInheritance.cpp
index 800c8f83b880..6785a0c2935a 100644
--- a/lib/AST/CXXInheritance.cpp
+++ b/lib/AST/CXXInheritance.cpp
@@ -31,16 +31,16 @@ void CXXBasePaths::ComputeDeclsFound() {
     Decls.insert(Path->Decls.front());
 
   NumDeclsFound = Decls.size();
-  DeclsFound = new NamedDecl * [NumDeclsFound];
-  std::copy(Decls.begin(), Decls.end(), DeclsFound);
+  DeclsFound = llvm::make_unique<NamedDecl *[]>(NumDeclsFound);
+  std::copy(Decls.begin(), Decls.end(), DeclsFound.get());
 }
 
 CXXBasePaths::decl_range CXXBasePaths::found_decls() {
   if (NumDeclsFound == 0)
     ComputeDeclsFound();
 
-  return decl_range(decl_iterator(DeclsFound),
-                    decl_iterator(DeclsFound + NumDeclsFound));
+  return decl_range(decl_iterator(DeclsFound.get()),
+                    decl_iterator(DeclsFound.get() + NumDeclsFound));
 }
 
 /// isAmbiguous - Determines whether the set of paths provided is
@@ -85,9 +85,14 @@ bool CXXRecordDecl::isDerivedFrom(const CXXRecordDecl *Base,
     return false;
   
   Paths.setOrigin(const_cast<CXXRecordDecl*>(this));
-  return lookupInBases(&FindBaseClass,
-                       const_cast<CXXRecordDecl*>(Base->getCanonicalDecl()),
-                       Paths);
+
+  const CXXRecordDecl *BaseDecl = Base->getCanonicalDecl();
+  // FIXME: Capturing 'this' is a workaround for name lookup bugs in GCC 4.7.
+  return lookupInBases(
+      [this, BaseDecl](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+        return FindBaseClass(Specifier, Path, BaseDecl);
+      },
+      Paths);
 }
 
 bool CXXRecordDecl::isVirtuallyDerivedFrom(const CXXRecordDecl *Base) const {
@@ -102,20 +107,20 @@ bool CXXRecordDecl::isVirtuallyDerivedFrom(const CXXRecordDecl *Base) const {
   
   Paths.setOrigin(const_cast<CXXRecordDecl*>(this));
 
-  const void *BasePtr = static_cast<const void*>(Base->getCanonicalDecl());
-  return lookupInBases(&FindVirtualBaseClass,
-                       const_cast<void *>(BasePtr),
-                       Paths);
-}
-
-static bool BaseIsNot(const CXXRecordDecl *Base, void *OpaqueTarget) {
-  // OpaqueTarget is a CXXRecordDecl*.
-  return Base->getCanonicalDecl() != (const CXXRecordDecl*) OpaqueTarget;
+  const CXXRecordDecl *BaseDecl = Base->getCanonicalDecl();
+  // FIXME: Capturing 'this' is a workaround for name lookup bugs in GCC 4.7.
+  return lookupInBases(
+      [this, BaseDecl](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+        return FindVirtualBaseClass(Specifier, Path, BaseDecl);
+      },
+      Paths);
 }
 
 bool CXXRecordDecl::isProvablyNotDerivedFrom(const CXXRecordDecl *Base) const {
-  return forallBases(BaseIsNot,
-                     const_cast<CXXRecordDecl *>(Base->getCanonicalDecl()));
+  const CXXRecordDecl *TargetDecl = Base->getCanonicalDecl();
+  return forallBases([TargetDecl](const CXXRecordDecl *Base) {
+    return Base->getCanonicalDecl() != TargetDecl;
+  });
 }
 
 bool
@@ -129,8 +134,7 @@ CXXRecordDecl::isCurrentInstantiation(const DeclContext *CurContext) const {
   return false;
 }
 
-bool CXXRecordDecl::forallBases(ForallBasesCallback *BaseMatches,
-                                void *OpaqueData,
+bool CXXRecordDecl::forallBases(ForallBasesCallback BaseMatches,
                                 bool AllowShortCircuit) const {
   SmallVector<const CXXRecordDecl*, 8> Queue;
 
@@ -156,7 +160,7 @@ bool CXXRecordDecl::forallBases(ForallBasesCallback *BaseMatches,
       }
       
       Queue.push_back(Base);
-      if (!BaseMatches(Base, OpaqueData)) {
+      if (!BaseMatches(Base)) {
         if (AllowShortCircuit) return false;
         AllMatches = false;
         continue;
@@ -171,10 +175,9 @@ bool CXXRecordDecl::forallBases(ForallBasesCallback *BaseMatches,
   return AllMatches;
 }
 
-bool CXXBasePaths::lookupInBases(ASTContext &Context,
-                                 const CXXRecordDecl *Record,
-                               CXXRecordDecl::BaseMatchesCallback *BaseMatches, 
-                                 void *UserData) {
+bool CXXBasePaths::lookupInBases(
+    ASTContext &Context, const CXXRecordDecl *Record,
+    CXXRecordDecl::BaseMatchesCallback BaseMatches) {
   bool FoundPath = false;
 
   // The access of the path down to this record.
@@ -248,7 +251,7 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context,
     // Track whether there's a path involving this specific base.
     bool FoundPathThroughBase = false;
     
-    if (BaseMatches(&BaseSpec, ScratchPath, UserData)) {
+    if (BaseMatches(&BaseSpec, ScratchPath)) {
       // We've found a path that terminates at this base.
       FoundPath = FoundPathThroughBase = true;
       if (isRecordingPaths()) {
@@ -263,7 +266,7 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context,
       CXXRecordDecl *BaseRecord
         = cast<CXXRecordDecl>(BaseSpec.getType()->castAs<RecordType>()
                                 ->getDecl());
-      if (lookupInBases(Context, BaseRecord, BaseMatches, UserData)) {
+      if (lookupInBases(Context, BaseRecord, BaseMatches)) {
         // C++ [class.member.lookup]p2:
         //   A member name f in one sub-object B hides a member name f in
         //   a sub-object A if A is a base class sub-object of B. Any
@@ -296,11 +299,10 @@ bool CXXBasePaths::lookupInBases(ASTContext &Context,
   return FoundPath;
 }
 
-bool CXXRecordDecl::lookupInBases(BaseMatchesCallback *BaseMatches,
-                                  void *UserData,
+bool CXXRecordDecl::lookupInBases(BaseMatchesCallback BaseMatches,
                                   CXXBasePaths &Paths) const {
   // If we didn't find anything, report that.
-  if (!Paths.lookupInBases(getASTContext(), this, BaseMatches, UserData))
+  if (!Paths.lookupInBases(getASTContext(), this, BaseMatches))
     return false;
 
   // If we're not recording paths or we won't ever find ambiguities,
@@ -353,8 +355,8 @@ bool CXXRecordDecl::lookupInBases(BaseMatchesCallback *BaseMatches,
 
 bool CXXRecordDecl::FindBaseClass(const CXXBaseSpecifier *Specifier, 
                                   CXXBasePath &Path,
-                                  void *BaseRecord) {
-  assert(((Decl *)BaseRecord)->getCanonicalDecl() == BaseRecord &&
+                                  const CXXRecordDecl *BaseRecord) {
+  assert(BaseRecord->getCanonicalDecl() == BaseRecord &&
          "User data for FindBaseClass is not canonical!");
   return Specifier->getType()->castAs<RecordType>()->getDecl()
             ->getCanonicalDecl() == BaseRecord;
@@ -362,8 +364,8 @@ bool CXXRecordDecl::FindBaseClass(const CXXBaseSpecifier *Specifier,
 
 bool CXXRecordDecl::FindVirtualBaseClass(const CXXBaseSpecifier *Specifier, 
                                          CXXBasePath &Path,
-                                         void *BaseRecord) {
-  assert(((Decl *)BaseRecord)->getCanonicalDecl() == BaseRecord &&
+                                         const CXXRecordDecl *BaseRecord) {
+  assert(BaseRecord->getCanonicalDecl() == BaseRecord &&
          "User data for FindBaseClass is not canonical!");
   return Specifier->isVirtual() &&
          Specifier->getType()->castAs<RecordType>()->getDecl()
@@ -372,12 +374,11 @@ bool CXXRecordDecl::FindVirtualBaseClass(const CXXBaseSpecifier *Specifier,
 
 bool CXXRecordDecl::FindTagMember(const CXXBaseSpecifier *Specifier, 
                                   CXXBasePath &Path,
-                                  void *Name) {
+                                  DeclarationName Name) {
   RecordDecl *BaseRecord =
     Specifier->getType()->castAs<RecordType>()->getDecl();
 
-  DeclarationName N = DeclarationName::getFromOpaquePtr(Name);
-  for (Path.Decls = BaseRecord->lookup(N);
+  for (Path.Decls = BaseRecord->lookup(Name);
        !Path.Decls.empty();
        Path.Decls = Path.Decls.slice(1)) {
     if (Path.Decls.front()->isInIdentifierNamespace(IDNS_Tag))
@@ -389,13 +390,12 @@ bool CXXRecordDecl::FindTagMember(const CXXBaseSpecifier *Specifier,
 
 bool CXXRecordDecl::FindOrdinaryMember(const CXXBaseSpecifier *Specifier, 
                                        CXXBasePath &Path,
-                                       void *Name) {
+                                       DeclarationName Name) {
   RecordDecl *BaseRecord =
     Specifier->getType()->castAs<RecordType>()->getDecl();
   
   const unsigned IDNS = IDNS_Ordinary | IDNS_Tag | IDNS_Member;
-  DeclarationName N = DeclarationName::getFromOpaquePtr(Name);
-  for (Path.Decls = BaseRecord->lookup(N);
+  for (Path.Decls = BaseRecord->lookup(Name);
        !Path.Decls.empty();
        Path.Decls = Path.Decls.slice(1)) {
     if (Path.Decls.front()->isInIdentifierNamespace(IDNS))
@@ -408,12 +408,11 @@ bool CXXRecordDecl::FindOrdinaryMember(const CXXBaseSpecifier *Specifier,
 bool CXXRecordDecl::
 FindNestedNameSpecifierMember(const CXXBaseSpecifier *Specifier, 
                               CXXBasePath &Path,
-                              void *Name) {
+                              DeclarationName Name) {
   RecordDecl *BaseRecord =
     Specifier->getType()->castAs<RecordType>()->getDecl();
   
-  DeclarationName N = DeclarationName::getFromOpaquePtr(Name);
-  for (Path.Decls = BaseRecord->lookup(N);
+  for (Path.Decls = BaseRecord->lookup(Name);
        !Path.Decls.empty();
        Path.Decls = Path.Decls.slice(1)) {
     // FIXME: Refactor the "is it a nested-name-specifier?" check
diff --git a/lib/AST/Decl.cpp b/lib/AST/Decl.cpp
index ea4b2f517cd0..42bebc543e3e 100644
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -207,13 +207,13 @@ static Optional<Visibility> getVisibilityOf(const NamedDecl *D,
   // If we're ultimately computing the visibility of a type, look for
   // a 'type_visibility' attribute before looking for 'visibility'.
   if (kind == NamedDecl::VisibilityForType) {
-    if (const TypeVisibilityAttr *A = D->getAttr<TypeVisibilityAttr>()) {
+    if (const auto *A = D->getAttr<TypeVisibilityAttr>()) {
       return getVisibilityFromAttr(A);
     }
   }
 
   // If this declaration has an explicit visibility attribute, use it.
-  if (const VisibilityAttr *A = D->getAttr<VisibilityAttr>()) {
+  if (const auto *A = D->getAttr<VisibilityAttr>()) {
     return getVisibilityFromAttr(A);
   }
 
@@ -252,8 +252,7 @@ getLVForTemplateParameterList(const TemplateParameterList *Params,
     //   template <enum X> class A { ... };
     // We have to be careful here, though, because we can be dealing with
     // dependent types.
-    if (const NonTypeTemplateParmDecl *NTTP =
-            dyn_cast<NonTypeTemplateParmDecl>(P)) {
+    if (const auto *NTTP = dyn_cast<NonTypeTemplateParmDecl>(P)) {
       // Handle the non-pack case first.
       if (!NTTP->isExpandedParameterPack()) {
         if (!NTTP->getType()->isDependentType()) {
@@ -273,7 +272,7 @@ getLVForTemplateParameterList(const TemplateParameterList *Params,
 
     // Template template parameters can be restricted by their
     // template parameters, recursively.
-    const TemplateTemplateParmDecl *TTP = cast<TemplateTemplateParmDecl>(P);
+    const auto *TTP = cast<TemplateTemplateParmDecl>(P);
 
     // Handle the non-pack case first.
     if (!TTP->isExpandedParameterPack()) {
@@ -329,7 +328,7 @@ static LinkageInfo getLVForTemplateArgumentList(ArrayRef<TemplateArgument> Args,
       continue;
 
     case TemplateArgument::Declaration:
-      if (NamedDecl *ND = dyn_cast<NamedDecl>(Arg.getAsDecl())) {
+      if (const auto *ND = dyn_cast<NamedDecl>(Arg.getAsDecl())) {
         assert(!usesTypeVisibility(ND));
         LV.merge(getLVForDecl(ND, computation));
       }
@@ -541,7 +540,7 @@ static bool useInlineVisibilityHidden(const NamedDecl *D) {
   if (!Opts.CPlusPlus || !Opts.InlineVisibilityHidden)
     return false;
 
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const auto *FD = dyn_cast<FunctionDecl>(D);
   if (!FD)
     return false;
 
@@ -569,7 +568,7 @@ template <typename T> static bool isFirstInExternCContext(T *D) {
 }
 
 static bool isSingleLineLanguageLinkage(const Decl &D) {
-  if (const LinkageSpecDecl *SD = dyn_cast<LinkageSpecDecl>(D.getDeclContext()))
+  if (const auto *SD = dyn_cast<LinkageSpecDecl>(D.getDeclContext()))
     if (!SD->hasBraces())
       return true;
   return false;
@@ -587,7 +586,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   //     - an object, reference, function or function template that is
   //       explicitly declared static; or,
   // (This bullet corresponds to C99 6.2.2p3.)
-  if (const VarDecl *Var = dyn_cast<VarDecl>(D)) {
+  if (const auto *Var = dyn_cast<VarDecl>(D)) {
     // Explicitly declared static.
     if (Var->getStorageClass() == SC_Static)
       return LinkageInfo::internal();
@@ -634,8 +633,10 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   assert(!isa<FieldDecl>(D) && "Didn't expect a FieldDecl!");
 
   if (D->isInAnonymousNamespace()) {
-    const VarDecl *Var = dyn_cast<VarDecl>(D);
-    const FunctionDecl *Func = dyn_cast<FunctionDecl>(D);
+    const auto *Var = dyn_cast<VarDecl>(D);
+    const auto *Func = dyn_cast<FunctionDecl>(D);
+    // FIXME: In C++11 onwards, anonymous namespaces should give decls
+    // within them internal linkage, not unique external linkage.
     if ((!Var || !isFirstInExternCContext(Var)) &&
         (!Func || !isFirstInExternCContext(Func)))
       return LinkageInfo::uniqueExternal();
@@ -658,7 +659,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
       for (const DeclContext *DC = D->getDeclContext();
            !isa<TranslationUnitDecl>(DC);
            DC = DC->getParent()) {
-        const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(DC);
+        const auto *ND = dyn_cast<NamespaceDecl>(DC);
         if (!ND) continue;
         if (Optional<Visibility> Vis = getExplicitVisibility(ND, computation)) {
           LV.mergeVisibility(*Vis, true);
@@ -692,7 +693,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   //   name of
   //
   //     - an object or reference, unless it has internal linkage; or
-  if (const VarDecl *Var = dyn_cast<VarDecl>(D)) {
+  if (const auto *Var = dyn_cast<VarDecl>(D)) {
     // GCC applies the following optimization to variables and static
     // data members, but not to functions:
     //
@@ -732,13 +733,12 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
     // As per function and class template specializations (below),
     // consider LV for the template and template arguments.  We're at file
     // scope, so we do not need to worry about nested specializations.
-    if (const VarTemplateSpecializationDecl *spec
-              = dyn_cast<VarTemplateSpecializationDecl>(Var)) {
+    if (const auto *spec = dyn_cast<VarTemplateSpecializationDecl>(Var)) {
       mergeTemplateLV(LV, spec, computation);
     }
 
   //     - a function, unless it has internal linkage; or
-  } else if (const FunctionDecl *Function = dyn_cast<FunctionDecl>(D)) {
+  } else if (const auto *Function = dyn_cast<FunctionDecl>(D)) {
     // In theory, we can modify the function's LV by the LV of its
     // type unless it has C linkage (see comment above about variables
     // for justification).  In practice, GCC doesn't do this, so it's
@@ -785,7 +785,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   //     - a named enumeration (7.2), or an unnamed enumeration
   //       defined in a typedef declaration in which the enumeration
   //       has the typedef name for linkage purposes (7.1.3); or
-  } else if (const TagDecl *Tag = dyn_cast<TagDecl>(D)) {
+  } else if (const auto *Tag = dyn_cast<TagDecl>(D)) {
     // Unnamed tags have no linkage.
     if (!Tag->hasNameForLinkage())
       return LinkageInfo::none();
@@ -793,8 +793,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
     // If this is a class template specialization, consider the
     // linkage of the template and template arguments.  We're at file
     // scope, so we do not need to worry about nested specializations.
-    if (const ClassTemplateSpecializationDecl *spec
-          = dyn_cast<ClassTemplateSpecializationDecl>(Tag)) {
+    if (const auto *spec = dyn_cast<ClassTemplateSpecializationDecl>(Tag)) {
       mergeTemplateLV(LV, spec, computation);
     }
 
@@ -808,7 +807,7 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
 
   //     - a template, unless it is a function template that has
   //       internal linkage (Clause 14);
-  } else if (const TemplateDecl *temp = dyn_cast<TemplateDecl>(D)) {
+  } else if (const auto *temp = dyn_cast<TemplateDecl>(D)) {
     bool considerVisibility = !hasExplicitVisibilityAlready(computation);
     LinkageInfo tempLV =
       getLVForTemplateParameterList(temp->getTemplateParameters(), computation);
@@ -824,10 +823,14 @@ static LinkageInfo getLVForNamespaceScopeDecl(const NamedDecl *D,
   } else if (isa<ObjCInterfaceDecl>(D)) {
     // fallout
 
+  } else if (auto *TD = dyn_cast<TypedefNameDecl>(D)) {
+    // A typedef declaration has linkage if it gives a type a name for
+    // linkage purposes.
+    if (!TD->getAnonDeclWithTypedefName(/*AnyRedecl*/true))
+      return LinkageInfo::none();
+
   // Everything not covered here has no linkage.
   } else {
-    // FIXME: A typedef declaration has linkage if it gives a type a name for
-    // linkage purposes.
     return LinkageInfo::none();
   }
 
@@ -897,7 +900,7 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D,
   // Specifically, if this decl exists and has an explicit attribute.
   const NamedDecl *explicitSpecSuppressor = nullptr;
 
-  if (const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
+  if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
     // If the type of the function uses a type with unique-external
     // linkage, it's not legally usable from outside this translation unit.
     // But only look at the type-as-written. If this function has an
@@ -928,9 +931,8 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D,
       explicitSpecSuppressor = MD;
     }
 
-  } else if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D)) {
-    if (const ClassTemplateSpecializationDecl *spec
-        = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+  } else if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
+    if (const auto *spec = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
       mergeTemplateLV(LV, spec, computation);
       if (spec->isExplicitSpecialization()) {
         explicitSpecSuppressor = spec;
@@ -945,9 +947,8 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D,
     }
 
   // Static data members.
-  } else if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
-    if (const VarTemplateSpecializationDecl *spec
-        = dyn_cast<VarTemplateSpecializationDecl>(VD))
+  } else if (const auto *VD = dyn_cast<VarDecl>(D)) {
+    if (const auto *spec = dyn_cast<VarTemplateSpecializationDecl>(VD))
       mergeTemplateLV(LV, spec, computation);
 
     // Modify the variable's linkage by its type, but ignore the
@@ -962,7 +963,7 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D,
     }
 
   // Template members.
-  } else if (const TemplateDecl *temp = dyn_cast<TemplateDecl>(D)) {
+  } else if (const auto *temp = dyn_cast<TemplateDecl>(D)) {
     bool considerVisibility =
       (!LV.isVisibilityExplicit() &&
        !classLV.isVisibilityExplicit() &&
@@ -971,8 +972,7 @@ static LinkageInfo getLVForClassMember(const NamedDecl *D,
       getLVForTemplateParameterList(temp->getTemplateParameters(), computation);
     LV.mergeMaybeWithVisibility(tempLV, considerVisibility);
 
-    if (const RedeclarableTemplateDecl *redeclTemp =
-          dyn_cast<RedeclarableTemplateDecl>(temp)) {
+    if (const auto *redeclTemp = dyn_cast<RedeclarableTemplateDecl>(temp)) {
       if (isExplicitMemberSpecialization(redeclTemp)) {
         explicitSpecSuppressor = temp->getTemplatedDecl();
       }
@@ -1048,7 +1048,7 @@ getExplicitVisibilityAux(const NamedDecl *ND,
 
   // If this is a member class of a specialization of a class template
   // and the corresponding decl has explicit visibility, use that.
-  if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(ND)) {
+  if (const auto *RD = dyn_cast<CXXRecordDecl>(ND)) {
     CXXRecordDecl *InstantiatedFrom = RD->getInstantiatedFromMemberClass();
     if (InstantiatedFrom)
       return getVisibilityOf(InstantiatedFrom, kind);
@@ -1057,8 +1057,7 @@ getExplicitVisibilityAux(const NamedDecl *ND,
   // If there wasn't explicit visibility there, and this is a
   // specialization of a class template, check for visibility
   // on the pattern.
-  if (const ClassTemplateSpecializationDecl *spec
-        = dyn_cast<ClassTemplateSpecializationDecl>(ND))
+  if (const auto *spec = dyn_cast<ClassTemplateSpecializationDecl>(ND))
     return getVisibilityOf(spec->getSpecializedTemplate()->getTemplatedDecl(),
                            kind);
 
@@ -1069,7 +1068,7 @@ getExplicitVisibilityAux(const NamedDecl *ND,
       return getExplicitVisibilityAux(MostRecent, kind, true);
   }
 
-  if (const VarDecl *Var = dyn_cast<VarDecl>(ND)) {
+  if (const auto *Var = dyn_cast<VarDecl>(ND)) {
     if (Var->isStaticDataMember()) {
       VarDecl *InstantiatedFrom = Var->getInstantiatedFromStaticDataMember();
       if (InstantiatedFrom)
@@ -1083,7 +1082,7 @@ getExplicitVisibilityAux(const NamedDecl *ND,
     return None;
   }
   // Also handle function template specializations.
-  if (const FunctionDecl *fn = dyn_cast<FunctionDecl>(ND)) {
+  if (const auto *fn = dyn_cast<FunctionDecl>(ND)) {
     // If the function is a specialization of a template with an
     // explicit visibility attribute, use that.
     if (FunctionTemplateSpecializationInfo *templateInfo
@@ -1101,7 +1100,7 @@ getExplicitVisibilityAux(const NamedDecl *ND,
   }
 
   // The visibility of a template is stored in the templated decl.
-  if (const TemplateDecl *TD = dyn_cast<TemplateDecl>(ND))
+  if (const auto *TD = dyn_cast<TemplateDecl>(ND))
     return getVisibilityOf(TD->getTemplatedDecl(), kind);
 
   return None;
@@ -1122,7 +1121,7 @@ static LinkageInfo getLVForClosure(const DeclContext *DC, Decl *ContextDecl,
       return getLVForDecl(cast<NamedDecl>(ContextDecl), computation);
   }
 
-  if (const NamedDecl *ND = dyn_cast<NamedDecl>(DC))
+  if (const auto *ND = dyn_cast<NamedDecl>(DC))
     return getLVForDecl(ND, computation);
 
   return LinkageInfo::external();
@@ -1130,7 +1129,7 @@ static LinkageInfo getLVForClosure(const DeclContext *DC, Decl *ContextDecl,
 
 static LinkageInfo getLVForLocalDecl(const NamedDecl *D,
                                      LVComputationKind computation) {
-  if (const FunctionDecl *Function = dyn_cast<FunctionDecl>(D)) {
+  if (const auto *Function = dyn_cast<FunctionDecl>(D)) {
     if (Function->isInAnonymousNamespace() &&
         !Function->isInExternCContext())
       return LinkageInfo::uniqueExternal();
@@ -1153,7 +1152,7 @@ static LinkageInfo getLVForLocalDecl(const NamedDecl *D,
     return LV;
   }
 
-  if (const VarDecl *Var = dyn_cast<VarDecl>(D)) {
+  if (const auto *Var = dyn_cast<VarDecl>(D)) {
     if (Var->hasExternalStorage()) {
       if (Var->isInAnonymousNamespace() && !Var->isInExternCContext())
         return LinkageInfo::uniqueExternal();
@@ -1189,14 +1188,14 @@ static LinkageInfo getLVForLocalDecl(const NamedDecl *D,
     return LinkageInfo::none();
 
   LinkageInfo LV;
-  if (const BlockDecl *BD = dyn_cast<BlockDecl>(OuterD)) {
+  if (const auto *BD = dyn_cast<BlockDecl>(OuterD)) {
     if (!BD->getBlockManglingNumber())
       return LinkageInfo::none();
 
     LV = getLVForClosure(BD->getDeclContext()->getRedeclContext(),
                          BD->getBlockManglingContextDecl(), computation);
   } else {
-    const FunctionDecl *FD = cast<FunctionDecl>(OuterD);
+    const auto *FD = cast<FunctionDecl>(OuterD);
     if (!FD->isInlined() &&
         !isTemplateInstantiation(FD->getTemplateSpecializationKind()))
       return LinkageInfo::none();
@@ -1224,13 +1223,45 @@ getOutermostEnclosingLambda(const CXXRecordDecl *Record) {
 
 static LinkageInfo computeLVForDecl(const NamedDecl *D,
                                     LVComputationKind computation) {
+  // Internal_linkage attribute overrides other considerations.
+  if (D->hasAttr<InternalLinkageAttr>())
+    return LinkageInfo::internal();
+
   // Objective-C: treat all Objective-C declarations as having external
   // linkage.
   switch (D->getKind()) {
     default:
       break;
+
+    // Per C++ [basic.link]p2, only the names of objects, references,
+    // functions, types, templates, namespaces, and values ever have linkage.
+    //
+    // Note that the name of a typedef, namespace alias, using declaration,
+    // and so on are not the name of the corresponding type, namespace, or
+    // declaration, so they do *not* have linkage.
+    case Decl::ImplicitParam:
+    case Decl::Label:
+    case Decl::NamespaceAlias:
     case Decl::ParmVar:
+    case Decl::Using:
+    case Decl::UsingShadow:
+    case Decl::UsingDirective:
       return LinkageInfo::none();
+
+    case Decl::EnumConstant:
+      // C++ [basic.link]p4: an enumerator has the linkage of its enumeration.
+      return getLVForDecl(cast<EnumDecl>(D->getDeclContext()), computation);
+
+    case Decl::Typedef:
+    case Decl::TypeAlias:
+      // A typedef declaration has linkage if it gives a type a name for
+      // linkage purposes.
+      if (!D->getASTContext().getLangOpts().CPlusPlus ||
+          !cast<TypedefNameDecl>(D)
+               ->getAnonDeclWithTypedefName(/*AnyRedecl*/true))
+        return LinkageInfo::none();
+      break;
+
     case Decl::TemplateTemplateParm: // count these as external
     case Decl::NonTypeTemplateParm:
     case Decl::ObjCAtDefsField:
@@ -1245,7 +1276,7 @@ static LinkageInfo computeLVForDecl(const NamedDecl *D,
       return LinkageInfo::external();
       
     case Decl::CXXRecord: {
-      const CXXRecordDecl *Record = cast<CXXRecordDecl>(D);
+      const auto *Record = cast<CXXRecordDecl>(D);
       if (Record->isLambda()) {
         if (!Record->getLambdaManglingNumber()) {
           // This lambda has no mangling number, so it's internal.
@@ -1314,6 +1345,10 @@ class LinkageComputer {
 public:
   static LinkageInfo getLVForDecl(const NamedDecl *D,
                                   LVComputationKind computation) {
+    // Internal_linkage attribute overrides other considerations.
+    if (D->hasAttr<InternalLinkageAttr>())
+      return LinkageInfo::internal();
+
     if (computation == LVForLinkageOnly && D->hasCachedLinkage())
       return LinkageInfo(D->getCachedLinkage(), DefaultVisibility, false);
 
@@ -1336,7 +1371,7 @@ public:
     // computed also does.
     NamedDecl *Old = nullptr;
     for (auto I : D->redecls()) {
-      NamedDecl *T = cast<NamedDecl>(I);
+      auto *T = cast<NamedDecl>(I);
       if (T == D)
         continue;
       if (!T->isInvalidDecl() && T->hasCachedLinkage()) {
@@ -1388,28 +1423,29 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
 
   for (ContextsTy::reverse_iterator I = Contexts.rbegin(), E = Contexts.rend();
        I != E; ++I) {
-    if (const ClassTemplateSpecializationDecl *Spec
-          = dyn_cast<ClassTemplateSpecializationDecl>(*I)) {
+    if (const auto *Spec = dyn_cast<ClassTemplateSpecializationDecl>(*I)) {
       OS << Spec->getName();
       const TemplateArgumentList &TemplateArgs = Spec->getTemplateArgs();
       TemplateSpecializationType::PrintTemplateArgumentList(OS,
                                                             TemplateArgs.data(),
                                                             TemplateArgs.size(),
                                                             P);
-    } else if (const NamespaceDecl *ND = dyn_cast<NamespaceDecl>(*I)) {
+    } else if (const auto *ND = dyn_cast<NamespaceDecl>(*I)) {
       if (P.SuppressUnwrittenScope &&
           (ND->isAnonymousNamespace() || ND->isInline()))
         continue;
-      if (ND->isAnonymousNamespace())
-        OS << "(anonymous namespace)";
+      if (ND->isAnonymousNamespace()) {
+        OS << (P.MSVCFormatting ? "`anonymous namespace\'"
+                                : "(anonymous namespace)");
+      }
       else
         OS << *ND;
-    } else if (const RecordDecl *RD = dyn_cast<RecordDecl>(*I)) {
+    } else if (const auto *RD = dyn_cast<RecordDecl>(*I)) {
       if (!RD->getIdentifier())
         OS << "(anonymous " << RD->getKindName() << ')';
       else
         OS << *RD;
-    } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(*I)) {
+    } else if (const auto *FD = dyn_cast<FunctionDecl>(*I)) {
       const FunctionProtoType *FT = nullptr;
       if (FD->hasWrittenPrototype())
         FT = dyn_cast<FunctionProtoType>(FD->getType()->castAs<FunctionType>());
@@ -1430,6 +1466,15 @@ void NamedDecl::printQualifiedName(raw_ostream &OS,
         }
       }
       OS << ')';
+    } else if (const auto *ED = dyn_cast<EnumDecl>(*I)) {
+      // C++ [dcl.enum]p10: Each enum-name and each unscoped
+      // enumerator is declared in the scope that immediately contains
+      // the enum-specifier. Each scoped enumerator is declared in the
+      // scope of the enumeration.
+      if (ED->isScoped() || ED->getIdentifier())
+        OS << *ED;
+      else
+        continue;
     } else {
       OS << *cast<NamedDecl>(*I);
     }
@@ -1451,32 +1496,6 @@ void NamedDecl::getNameForDiagnostic(raw_ostream &OS,
     printName(OS);
 }
 
-static bool isKindReplaceableBy(Decl::Kind OldK, Decl::Kind NewK) {
-  // For method declarations, we never replace.
-  if (ObjCMethodDecl::classofKind(NewK))
-    return false;
-
-  if (OldK == NewK)
-    return true;
-
-  // A compatibility alias for a class can be replaced by an interface.
-  if (ObjCCompatibleAliasDecl::classofKind(OldK) &&
-      ObjCInterfaceDecl::classofKind(NewK))
-    return true;
-
-  // A typedef-declaration, alias-declaration, or Objective-C class declaration
-  // can replace another declaration of the same type. Semantic analysis checks
-  // that we have matching types.
-  if ((TypedefNameDecl::classofKind(OldK) ||
-       ObjCInterfaceDecl::classofKind(OldK)) &&
-      (TypedefNameDecl::classofKind(NewK) ||
-       ObjCInterfaceDecl::classofKind(NewK)))
-    return true;
-
-  // Otherwise, a kind mismatch implies that the declaration is not replaced.
-  return false;
-}
-
 template<typename T> static bool isRedeclarableImpl(Redeclarable<T> *) {
   return true;
 }
@@ -1500,9 +1519,19 @@ bool NamedDecl::declarationReplaces(NamedDecl *OldD, bool IsKnownNewer) const {
   if (OldD->isFromASTFile() && isFromASTFile())
     return false;
 
-  if (!isKindReplaceableBy(OldD->getKind(), getKind()))
+  // A kind mismatch implies that the declaration is not replaced.
+  if (OldD->getKind() != getKind())
     return false;
 
+  // For method declarations, we never replace. (Why?)
+  if (isa<ObjCMethodDecl>(this))
+    return false;
+
+  // For parameters, pick the newer one. This is either an error or (in
+  // Objective-C) permitted as an extension.
+  if (isa<ParmVarDecl>(this))
+    return true;
+
   // Inline namespaces can give us two declarations with the same
   // name and kind in the same scope but different contexts; we should
   // keep both declarations in this case.
@@ -1510,28 +1539,8 @@ bool NamedDecl::declarationReplaces(NamedDecl *OldD, bool IsKnownNewer) const {
           OldD->getDeclContext()->getRedeclContext()))
     return false;
 
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(this))
-    // For function declarations, we keep track of redeclarations.
-    // FIXME: This returns false for functions that should in fact be replaced.
-    // Instead, perform some kind of type check?
-    if (FD->getPreviousDecl() != OldD)
-      return false;
-
-  // For function templates, the underlying function declarations are linked.
-  if (const FunctionTemplateDecl *FunctionTemplate =
-          dyn_cast<FunctionTemplateDecl>(this))
-    return FunctionTemplate->getTemplatedDecl()->declarationReplaces(
-        cast<FunctionTemplateDecl>(OldD)->getTemplatedDecl());
-
-  // Using shadow declarations can be overloaded on their target declarations
-  // if they introduce functions.
-  // FIXME: If our target replaces the old target, can we replace the old
-  //        shadow declaration?
-  if (auto *USD = dyn_cast<UsingShadowDecl>(this))
-    if (USD->getTargetDecl() != cast<UsingShadowDecl>(OldD)->getTargetDecl())
-      return false;
-
-  // Using declarations can be overloaded if they introduce functions.
+  // Using declarations can be replaced if they import the same name from the
+  // same context.
   if (auto *UD = dyn_cast<UsingDecl>(this)) {
     ASTContext &Context = getASTContext();
     return Context.getCanonicalNestedNameSpecifier(UD->getQualifier()) ==
@@ -1546,13 +1555,20 @@ bool NamedDecl::declarationReplaces(NamedDecl *OldD, bool IsKnownNewer) const {
   }
 
   // UsingDirectiveDecl's are not really NamedDecl's, and all have same name.
-  // We want to keep it, unless it nominates same namespace.
+  // They can be replaced if they nominate the same namespace.
+  // FIXME: Is this true even if they have different module visibility?
   if (auto *UD = dyn_cast<UsingDirectiveDecl>(this))
     return UD->getNominatedNamespace()->getOriginalNamespace() ==
            cast<UsingDirectiveDecl>(OldD)->getNominatedNamespace()
                ->getOriginalNamespace();
 
-  if (!IsKnownNewer && isRedeclarable(getKind())) {
+  if (isRedeclarable(getKind())) {
+    if (getCanonicalDecl() != OldD->getCanonicalDecl())
+      return false;
+
+    if (IsKnownNewer)
+      return true;
+
     // Check whether this is actually newer than OldD. We want to keep the
     // newer declaration. This loop will usually only iterate once, because
     // OldD is usually the previous declaration.
@@ -1567,11 +1583,16 @@ bool NamedDecl::declarationReplaces(NamedDecl *OldD, bool IsKnownNewer) const {
       if (D->isCanonicalDecl())
         return false;
     }
+
+    // It's a newer declaration of the same kind of declaration in the same
+    // scope: we want this decl instead of the existing one.
+    return true;
   }
 
-  // It's a newer declaration of the same kind of declaration in the same scope,
-  // and not an overload: we want this decl instead of the existing one.
-  return true;
+  // In all other cases, we need to keep both declarations in case they have
+  // different visibility. Any attempt to use the name will result in an
+  // ambiguity if more than one is visible.
+  return false;
 }
 
 bool NamedDecl::hasLinkage() const {
@@ -1580,12 +1601,15 @@ bool NamedDecl::hasLinkage() const {
 
 NamedDecl *NamedDecl::getUnderlyingDeclImpl() {
   NamedDecl *ND = this;
-  while (UsingShadowDecl *UD = dyn_cast<UsingShadowDecl>(ND))
+  while (auto *UD = dyn_cast<UsingShadowDecl>(ND))
     ND = UD->getTargetDecl();
 
-  if (ObjCCompatibleAliasDecl *AD = dyn_cast<ObjCCompatibleAliasDecl>(ND))
+  if (auto *AD = dyn_cast<ObjCCompatibleAliasDecl>(ND))
     return AD->getClassInterface();
 
+  if (auto *AD = dyn_cast<NamespaceAliasDecl>(ND))
+    return AD->getNamespace();
+
   return ND;
 }
 
@@ -1599,8 +1623,7 @@ bool NamedDecl::isCXXInstanceMember() const {
 
   if (isa<FieldDecl>(D) || isa<IndirectFieldDecl>(D) || isa<MSPropertyDecl>(D))
     return true;
-  if (const CXXMethodDecl *MD =
-          dyn_cast_or_null<CXXMethodDecl>(D->getAsFunction()))
+  if (const auto *MD = dyn_cast_or_null<CXXMethodDecl>(D->getAsFunction()))
     return MD->isInstance();
   return false;
 }
@@ -1628,7 +1651,7 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) {
     // Make sure the extended decl info is allocated.
     if (!hasExtInfo()) {
       // Save (non-extended) type source info pointer.
-      TypeSourceInfo *savedTInfo = DeclInfo.get<TypeSourceInfo*>();
+      auto *savedTInfo = DeclInfo.get<TypeSourceInfo*>();
       // Allocate external info struct.
       DeclInfo = new (getASTContext()) ExtInfo;
       // Restore savedTInfo into (extended) decl info.
@@ -1653,22 +1676,20 @@ void DeclaratorDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) {
   }
 }
 
-void
-DeclaratorDecl::setTemplateParameterListsInfo(ASTContext &Context,
-                                              unsigned NumTPLists,
-                                              TemplateParameterList **TPLists) {
-  assert(NumTPLists > 0);
+void DeclaratorDecl::setTemplateParameterListsInfo(
+    ASTContext &Context, ArrayRef<TemplateParameterList *> TPLists) {
+  assert(!TPLists.empty());
   // Make sure the extended decl info is allocated.
   if (!hasExtInfo()) {
     // Save (non-extended) type source info pointer.
-    TypeSourceInfo *savedTInfo = DeclInfo.get<TypeSourceInfo*>();
+    auto *savedTInfo = DeclInfo.get<TypeSourceInfo*>();
     // Allocate external info struct.
     DeclInfo = new (getASTContext()) ExtInfo;
     // Restore savedTInfo into (extended) decl info.
     getExtInfo()->TInfo = savedTInfo;
   }
   // Set the template parameter lists info.
-  getExtInfo()->setTemplateParameterListsInfo(Context, NumTPLists, TPLists);
+  getExtInfo()->setTemplateParameterListsInfo(Context, TPLists);
 }
 
 SourceLocation DeclaratorDecl::getOuterLocStart() const {
@@ -1726,13 +1747,8 @@ SourceRange DeclaratorDecl::getSourceRange() const {
   return SourceRange(getOuterLocStart(), RangeEnd);
 }
 
-void
-QualifierInfo::setTemplateParameterListsInfo(ASTContext &Context,
-                                             unsigned NumTPLists,
-                                             TemplateParameterList **TPLists) {
-  assert((NumTPLists == 0 || TPLists != nullptr) &&
-         "Empty array of template parameters with positive size!");
-
+void QualifierInfo::setTemplateParameterListsInfo(
+    ASTContext &Context, ArrayRef<TemplateParameterList *> TPLists) {
   // Free previous template parameters (if any).
   if (NumTemplParamLists > 0) {
     Context.Deallocate(TemplParamLists);
@@ -1740,10 +1756,10 @@ QualifierInfo::setTemplateParameterListsInfo(ASTContext &Context,
     NumTemplParamLists = 0;
   }
   // Set info on matched template parameter lists (if any).
-  if (NumTPLists > 0) {
-    TemplParamLists = new (Context) TemplateParameterList*[NumTPLists];
-    NumTemplParamLists = NumTPLists;
-    std::copy(TPLists, TPLists + NumTPLists, TemplParamLists);
+  if (!TPLists.empty()) {
+    TemplParamLists = new (Context) TemplateParameterList *[TPLists.size()];
+    NumTemplParamLists = TPLists.size();
+    std::copy(TPLists.begin(), TPLists.end(), TemplParamLists);
   }
 }
 
@@ -1756,7 +1772,6 @@ const char *VarDecl::getStorageClassSpecifierString(StorageClass SC) {
   case SC_None:                 break;
   case SC_Auto:                 return "auto";
   case SC_Extern:               return "extern";
-  case SC_OpenCLWorkGroupLocal: return "<<work-group-local>>";
   case SC_PrivateExtern:        return "__private_extern__";
   case SC_Register:             return "register";
   case SC_Static:               return "static";
@@ -1995,7 +2010,7 @@ VarDecl *VarDecl::getDefinition(ASTContext &C) {
 
 VarDecl::DefinitionKind VarDecl::hasDefinition(ASTContext &C) const {
   DefinitionKind Kind = DeclarationOnly;
-  
+
   const VarDecl *First = getFirstDecl();
   for (auto I : First->redecls()) {
     Kind = std::max(Kind, I->isThisDeclarationADefinition(C));
@@ -2016,6 +2031,31 @@ const Expr *VarDecl::getAnyInitializer(const VarDecl *&D) const {
   return nullptr;
 }
 
+bool VarDecl::hasInit() const {
+  if (auto *P = dyn_cast<ParmVarDecl>(this))
+    if (P->hasUnparsedDefaultArg() || P->hasUninstantiatedDefaultArg())
+      return false;
+
+  return !Init.isNull();
+}
+
+Expr *VarDecl::getInit() {
+  if (!hasInit())
+    return nullptr;
+
+  if (auto *S = Init.dyn_cast<Stmt *>())
+    return cast<Expr>(S);
+
+  return cast_or_null<Expr>(Init.get<EvaluatedStmt *>()->Value);
+}
+
+Stmt **VarDecl::getInitAddress() {
+  if (auto *ES = Init.dyn_cast<EvaluatedStmt *>())
+    return &ES->Value;
+
+  return Init.getAddrOfPtr1();
+}
+
 bool VarDecl::isOutOfLine() const {
   if (Decl::isOutOfLine())
     return true;
@@ -2045,7 +2085,7 @@ VarDecl *VarDecl::getOutOfLineDefinition() {
 }
 
 void VarDecl::setInit(Expr *I) {
-  if (EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>()) {
+  if (auto *Eval = Init.dyn_cast<EvaluatedStmt *>()) {
     Eval->~EvaluatedStmt();
     getASTContext().Deallocate(Eval);
   }
@@ -2084,15 +2124,14 @@ bool VarDecl::isUsableInConstantExpressions(ASTContext &C) const {
 /// form, which contains extra information on the evaluated value of the
 /// initializer.
 EvaluatedStmt *VarDecl::ensureEvaluatedStmt() const {
-  EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>();
+  auto *Eval = Init.dyn_cast<EvaluatedStmt *>();
   if (!Eval) {
-    Stmt *S = Init.get<Stmt *>();
     // Note: EvaluatedStmt contains an APValue, which usually holds
     // resources not allocated from the ASTContext.  We need to do some
     // work to avoid leaking those, but we do so in VarDecl::evaluateValue
     // where we can detect whether there's anything to clean up or not.
     Eval = new (getASTContext()) EvaluatedStmt;
-    Eval->Value = S;
+    Eval->Value = Init.get<Stmt *>();
     Init = Eval;
   }
   return Eval;
@@ -2120,7 +2159,7 @@ APValue *VarDecl::evaluateValue(
   if (Eval->WasEvaluated)
     return Eval->Evaluated.isUninit() ? nullptr : &Eval->Evaluated;
 
-  const Expr *Init = cast<Expr>(Eval->Value);
+  const auto *Init = cast<Expr>(Eval->Value);
   assert(!Init->isValueDependent());
 
   if (Eval->IsEvaluating) {
@@ -2156,6 +2195,27 @@ APValue *VarDecl::evaluateValue(
   return Result ? &Eval->Evaluated : nullptr;
 }
 
+APValue *VarDecl::getEvaluatedValue() const {
+  if (EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>())
+    if (Eval->WasEvaluated)
+      return &Eval->Evaluated;
+
+  return nullptr;
+}
+
+bool VarDecl::isInitKnownICE() const {
+  if (EvaluatedStmt *Eval = Init.dyn_cast<EvaluatedStmt *>())
+    return Eval->CheckedICE;
+
+  return false;
+}
+
+bool VarDecl::isInitICE() const {
+  assert(isInitKnownICE() &&
+         "Check whether we already know that the initializer is an ICE");
+  return Init.get<EvaluatedStmt *>()->IsICE;
+}
+
 bool VarDecl::checkInitIsICE() const {
   // Initializers of weak variables are never ICEs.
   if (isWeak())
@@ -2167,7 +2227,7 @@ bool VarDecl::checkInitIsICE() const {
     // integral constant expression.
     return Eval->IsICE;
 
-  const Expr *Init = cast<Expr>(Eval->Value);
+  const auto *Init = cast<Expr>(Eval->Value);
   assert(!Init->isValueDependent());
 
   // In C++11, evaluate the initializer to check whether it's a constant
@@ -2200,8 +2260,7 @@ VarDecl *VarDecl::getInstantiatedFromStaticDataMember() const {
 }
 
 TemplateSpecializationKind VarDecl::getTemplateSpecializationKind() const {
-  if (const VarTemplateSpecializationDecl *Spec =
-          dyn_cast<VarTemplateSpecializationDecl>(this))
+  if (const auto *Spec = dyn_cast<VarTemplateSpecializationDecl>(this))
     return Spec->getSpecializationKind();
 
   if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo())
@@ -2211,8 +2270,7 @@ TemplateSpecializationKind VarDecl::getTemplateSpecializationKind() const {
 }
 
 SourceLocation VarDecl::getPointOfInstantiation() const {
-  if (const VarTemplateSpecializationDecl *Spec =
-          dyn_cast<VarTemplateSpecializationDecl>(this))
+  if (const auto *Spec = dyn_cast<VarTemplateSpecializationDecl>(this))
     return Spec->getPointOfInstantiation();
 
   if (MemberSpecializationInfo *MSI = getMemberSpecializationInfo())
@@ -2285,7 +2343,7 @@ ParmVarDecl *ParmVarDecl::Create(ASTContext &C, DeclContext *DC,
 QualType ParmVarDecl::getOriginalType() const {
   TypeSourceInfo *TSI = getTypeSourceInfo();
   QualType T = TSI ? TSI->getType() : getType();
-  if (const DecayedType *DT = dyn_cast<DecayedType>(T))
+  if (const auto *DT = dyn_cast<DecayedType>(T))
     return DT->getOriginalType();
   return T;
 }
@@ -2315,22 +2373,56 @@ Expr *ParmVarDecl::getDefaultArg() {
   assert(!hasUnparsedDefaultArg() && "Default argument is not yet parsed!");
   assert(!hasUninstantiatedDefaultArg() &&
          "Default argument is not yet instantiated!");
-  
+
   Expr *Arg = getInit();
-  if (ExprWithCleanups *E = dyn_cast_or_null<ExprWithCleanups>(Arg))
+  if (auto *E = dyn_cast_or_null<ExprWithCleanups>(Arg))
     return E->getSubExpr();
 
   return Arg;
 }
 
-SourceRange ParmVarDecl::getDefaultArgRange() const {
-  if (const Expr *E = getInit())
-    return E->getSourceRange();
+void ParmVarDecl::setDefaultArg(Expr *defarg) {
+  ParmVarDeclBits.DefaultArgKind = DAK_Normal;
+  Init = defarg;
+}
 
-  if (hasUninstantiatedDefaultArg())
+SourceRange ParmVarDecl::getDefaultArgRange() const {
+  switch (ParmVarDeclBits.DefaultArgKind) {
+  case DAK_None:
+  case DAK_Unparsed:
+    // Nothing we can do here.
+    return SourceRange();
+
+  case DAK_Uninstantiated:
     return getUninstantiatedDefaultArg()->getSourceRange();
 
-  return SourceRange();
+  case DAK_Normal:
+    if (const Expr *E = getInit())
+      return E->getSourceRange();
+
+    // Missing an actual expression, may be invalid.
+    return SourceRange();
+  }
+  llvm_unreachable("Invalid default argument kind.");
+}
+
+void ParmVarDecl::setUninstantiatedDefaultArg(Expr *arg) {
+  ParmVarDeclBits.DefaultArgKind = DAK_Uninstantiated;
+  Init = arg;
+}
+
+Expr *ParmVarDecl::getUninstantiatedDefaultArg() {
+  assert(hasUninstantiatedDefaultArg() &&
+         "Wrong kind of initialization expression!");
+  return cast_or_null<Expr>(Init.get<Stmt *>());
+}
+
+bool ParmVarDecl::hasDefaultArg() const {
+  // FIXME: We should just return false for DAK_None here once callers are
+  // prepared for the case that we encountered an invalid default argument and
+  // were unable to even build an invalid expression.
+  return hasUnparsedDefaultArg() || hasUninstantiatedDefaultArg() ||
+         !Init.isNull();
 }
 
 bool ParmVarDecl::isParameterPack() const {
@@ -2360,7 +2452,7 @@ void FunctionDecl::getNameForDiagnostic(
 }
 
 bool FunctionDecl::isVariadic() const {
-  if (const FunctionProtoType *FT = getType()->getAs<FunctionProtoType>())
+  if (const auto *FT = getType()->getAs<FunctionProtoType>())
     return FT->isVariadic();
   return false;
 }
@@ -2421,7 +2513,7 @@ void FunctionDecl::setBody(Stmt *B) {
 void FunctionDecl::setPure(bool P) {
   IsPure = P;
   if (P)
-    if (CXXRecordDecl *Parent = dyn_cast<CXXRecordDecl>(getDeclContext()))
+    if (auto *Parent = dyn_cast<CXXRecordDecl>(getDeclContext()))
       Parent->markedVirtualFunctionPure();
 }
 
@@ -2476,7 +2568,7 @@ bool FunctionDecl::isReservedGlobalPlacementOperator() const {
   if (!getDeclContext()->getRedeclContext()->isTranslationUnit())
     return false;
 
-  const FunctionProtoType *proto = getType()->castAs<FunctionProtoType>();
+  const auto *proto = getType()->castAs<FunctionProtoType>();
   if (proto->getNumParams() != 2 || proto->isVariadic())
     return false;
 
@@ -2505,7 +2597,7 @@ bool FunctionDecl::isReplaceableGlobalAllocationFunction() const {
   if (!getDeclContext()->getRedeclContext()->isTranslationUnit())
     return false;
 
-  const FunctionProtoType *FPT = getType()->castAs<FunctionProtoType>();
+  const auto *FPT = getType()->castAs<FunctionProtoType>();
   if (FPT->getNumParams() == 0 || FPT->getNumParams() > 2 || FPT->isVariadic())
     return false;
 
@@ -2547,7 +2639,7 @@ bool FunctionDecl::isInExternCXXContext() const {
 }
 
 bool FunctionDecl::isGlobal() const {
-  if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(this))
+  if (const auto *Method = dyn_cast<CXXMethodDecl>(this))
     return Method->isStatic();
 
   if (getCanonicalDecl()->getStorageClass() == SC_Static)
@@ -2556,7 +2648,7 @@ bool FunctionDecl::isGlobal() const {
   for (const DeclContext *DC = getDeclContext();
        DC->isNamespace();
        DC = DC->getParent()) {
-    if (const NamespaceDecl *Namespace = cast<NamespaceDecl>(DC)) {
+    if (const auto *Namespace = cast<NamespaceDecl>(DC)) {
       if (!Namespace->getDeclName())
         return false;
       break;
@@ -2608,8 +2700,8 @@ unsigned FunctionDecl::getBuiltinID() const {
 
   ASTContext &Context = getASTContext();
   if (Context.getLangOpts().CPlusPlus) {
-    const LinkageSpecDecl *LinkageDecl = dyn_cast<LinkageSpecDecl>(
-        getFirstDecl()->getDeclContext());
+    const auto *LinkageDecl =
+        dyn_cast<LinkageSpecDecl>(getFirstDecl()->getDeclContext());
     // In C++, the first declaration of a builtin is always inside an implicit
     // extern "C".
     // FIXME: A recognised library function may not be directly in an extern "C"
@@ -2649,7 +2741,7 @@ unsigned FunctionDecl::getBuiltinID() const {
 /// based on its FunctionType.  This is the length of the ParamInfo array
 /// after it has been created.
 unsigned FunctionDecl::getNumParams() const {
-  const FunctionProtoType *FPT = getType()->getAs<FunctionProtoType>();
+  const auto *FPT = getType()->getAs<FunctionProtoType>();
   return FPT ? FPT->getNumParams() : 0;
 }
 
@@ -2711,7 +2803,8 @@ bool FunctionDecl::isMSExternInline() const {
   assert(isInlined() && "expected to get called on an inlined function!");
 
   const ASTContext &Context = getASTContext();
-  if (!Context.getLangOpts().MSVCCompat && !hasAttr<DLLExportAttr>())
+  if (!Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+      !hasAttr<DLLExportAttr>())
     return false;
 
   for (const FunctionDecl *FD = getMostRecentDecl(); FD;
@@ -2840,7 +2933,7 @@ bool FunctionDecl::hasUnusedResultAttr() const {
   QualType RetType = getReturnType();
   if (RetType->isRecordType()) {
     const CXXRecordDecl *Ret = RetType->getAsCXXRecordDecl();
-    const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(this);
+    const auto *MD = dyn_cast<CXXMethodDecl>(this);
     if (Ret && Ret->hasAttr<WarnUnusedResultAttr>() &&
         !(MD && MD->getCorrespondingMethodInClass(Ret, true)))
       return true;
@@ -2952,6 +3045,10 @@ FunctionDecl *FunctionDecl::getInstantiatedFromMemberFunction() const {
   return nullptr;
 }
 
+MemberSpecializationInfo *FunctionDecl::getMemberSpecializationInfo() const {
+  return TemplateOrSpecialization.dyn_cast<MemberSpecializationInfo *>();
+}
+
 void 
 FunctionDecl::setInstantiationOfMemberFunction(ASTContext &C,
                                                FunctionDecl *FD,
@@ -2963,6 +3060,14 @@ FunctionDecl::setInstantiationOfMemberFunction(ASTContext &C,
   TemplateOrSpecialization = Info;
 }
 
+FunctionTemplateDecl *FunctionDecl::getDescribedFunctionTemplate() const {
+  return TemplateOrSpecialization.dyn_cast<FunctionTemplateDecl *>();
+}
+
+void FunctionDecl::setDescribedFunctionTemplate(FunctionTemplateDecl *Template) {
+  TemplateOrSpecialization = Template;
+}
+
 bool FunctionDecl::isImplicitlyInstantiable() const {
   // If the function is invalid, it can't be implicitly instantiated.
   if (isInvalidDecl())
@@ -3069,6 +3174,12 @@ FunctionDecl *FunctionDecl::getClassScopeSpecializationPattern() const {
     return getASTContext().getClassScopeSpecializationPattern(this);
 }
 
+FunctionTemplateSpecializationInfo *
+FunctionDecl::getTemplateSpecializationInfo() const {
+  return TemplateOrSpecialization
+      .dyn_cast<FunctionTemplateSpecializationInfo *>();
+}
+
 const TemplateArgumentList *
 FunctionDecl::getTemplateSpecializationArgs() const {
   if (FunctionTemplateSpecializationInfo *Info
@@ -3115,33 +3226,41 @@ FunctionDecl::setDependentTemplateSpecialization(ASTContext &Context,
                                     const UnresolvedSetImpl &Templates,
                              const TemplateArgumentListInfo &TemplateArgs) {
   assert(TemplateOrSpecialization.isNull());
-  size_t Size = sizeof(DependentFunctionTemplateSpecializationInfo);
-  Size += Templates.size() * sizeof(FunctionTemplateDecl*);
-  Size += TemplateArgs.size() * sizeof(TemplateArgumentLoc);
-  void *Buffer = Context.Allocate(Size);
   DependentFunctionTemplateSpecializationInfo *Info =
-    new (Buffer) DependentFunctionTemplateSpecializationInfo(Templates,
-                                                             TemplateArgs);
+      DependentFunctionTemplateSpecializationInfo::Create(Context, Templates,
+                                                          TemplateArgs);
   TemplateOrSpecialization = Info;
 }
 
+DependentFunctionTemplateSpecializationInfo *
+FunctionDecl::getDependentSpecializationInfo() const {
+  return TemplateOrSpecialization
+      .dyn_cast<DependentFunctionTemplateSpecializationInfo *>();
+}
+
+DependentFunctionTemplateSpecializationInfo *
+DependentFunctionTemplateSpecializationInfo::Create(
+    ASTContext &Context, const UnresolvedSetImpl &Ts,
+    const TemplateArgumentListInfo &TArgs) {
+  void *Buffer = Context.Allocate(
+      totalSizeToAlloc<TemplateArgumentLoc, FunctionTemplateDecl *>(
+          TArgs.size(), Ts.size()));
+  return new (Buffer) DependentFunctionTemplateSpecializationInfo(Ts, TArgs);
+}
+
 DependentFunctionTemplateSpecializationInfo::
 DependentFunctionTemplateSpecializationInfo(const UnresolvedSetImpl &Ts,
                                       const TemplateArgumentListInfo &TArgs)
   : AngleLocs(TArgs.getLAngleLoc(), TArgs.getRAngleLoc()) {
-  static_assert(sizeof(*this) % llvm::AlignOf<void *>::Alignment == 0,
-                "Trailing data is unaligned!");
 
-  d.NumTemplates = Ts.size();
-  d.NumArgs = TArgs.size();
+  NumTemplates = Ts.size();
+  NumArgs = TArgs.size();
 
-  FunctionTemplateDecl **TsArray =
-    const_cast<FunctionTemplateDecl**>(getTemplates());
+  FunctionTemplateDecl **TsArray = getTrailingObjects<FunctionTemplateDecl *>();
   for (unsigned I = 0, E = Ts.size(); I != E; ++I)
     TsArray[I] = cast<FunctionTemplateDecl>(Ts[I]->getUnderlyingDecl());
 
-  TemplateArgumentLoc *ArgsArray =
-    const_cast<TemplateArgumentLoc*>(getTemplateArgs());
+  TemplateArgumentLoc *ArgsArray = getTrailingObjects<TemplateArgumentLoc>();
   for (unsigned I = 0, E = TArgs.size(); I != E; ++I)
     new (&ArgsArray[I]) TemplateArgumentLoc(TArgs[I]);
 }
@@ -3335,7 +3454,7 @@ bool FieldDecl::isAnonymousStructOrUnion() const {
   if (!isImplicit() || getDeclName())
     return false;
 
-  if (const RecordType *Record = getType()->getAs<RecordType>())
+  if (const auto *Record = getType()->getAs<RecordType>())
     return Record->getDecl()->isAnonymousStructOrUnion();
 
   return false;
@@ -3343,7 +3462,7 @@ bool FieldDecl::isAnonymousStructOrUnion() const {
 
 unsigned FieldDecl::getBitWidthValue(const ASTContext &Ctx) const {
   assert(isBitField() && "not a bitfield");
-  Expr *BitWidth = static_cast<Expr *>(InitStorage.getPointer());
+  auto *BitWidth = static_cast<Expr *>(InitStorage.getPointer());
   return BitWidth->EvaluateKnownConstInt(Ctx).getZExtValue();
 }
 
@@ -3372,7 +3491,7 @@ SourceRange FieldDecl::getSourceRange() const {
   case ISK_BitWidthOrNothing:
   case ISK_InClassCopyInit:
   case ISK_InClassListInit:
-    if (const Expr *E = static_cast<const Expr *>(InitStorage.getPointer()))
+    if (const auto *E = static_cast<const Expr *>(InitStorage.getPointer()))
       return SourceRange(getInnerLocStart(), E->getLocEnd());
     // FALLTHROUGH
 
@@ -3408,7 +3527,7 @@ SourceRange TagDecl::getSourceRange() const {
 TagDecl *TagDecl::getCanonicalDecl() { return getFirstDecl(); }
 
 void TagDecl::setTypedefNameForAnonDecl(TypedefNameDecl *TDD) {
-  NamedDeclOrQualifier = TDD;
+  TypedefNameDeclOrQualifier = TDD;
   if (const Type *T = getTypeForDecl()) {
     (void)T;
     assert(T->isLinkageValid());
@@ -3419,7 +3538,7 @@ void TagDecl::setTypedefNameForAnonDecl(TypedefNameDecl *TDD) {
 void TagDecl::startDefinition() {
   IsBeingDefined = true;
 
-  if (CXXRecordDecl *D = dyn_cast<CXXRecordDecl>(this)) {
+  if (auto *D = dyn_cast<CXXRecordDecl>(this)) {
     struct CXXRecordDecl::DefinitionData *Data =
       new (getASTContext()) struct CXXRecordDecl::DefinitionData(D);
     for (auto I : redecls())
@@ -3452,7 +3571,7 @@ TagDecl *TagDecl::getDefinition() const {
     }
   }
 
-  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(this))
+  if (const auto *CXXRD = dyn_cast<CXXRecordDecl>(this))
     return CXXRD->getDefinition();
 
   for (auto R : redecls())
@@ -3466,7 +3585,7 @@ void TagDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) {
   if (QualifierLoc) {
     // Make sure the extended qualifier info is allocated.
     if (!hasExtInfo())
-      NamedDeclOrQualifier = new (getASTContext()) ExtInfo;
+      TypedefNameDeclOrQualifier = new (getASTContext()) ExtInfo;
     // Set qualifier info.
     getExtInfo()->QualifierLoc = QualifierLoc;
   } else {
@@ -3474,7 +3593,7 @@ void TagDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) {
     if (hasExtInfo()) {
       if (getExtInfo()->NumTemplParamLists == 0) {
         getASTContext().Deallocate(getExtInfo());
-        NamedDeclOrQualifier = (TypedefNameDecl*)nullptr;
+        TypedefNameDeclOrQualifier = (TypedefNameDecl *)nullptr;
       }
       else
         getExtInfo()->QualifierLoc = QualifierLoc;
@@ -3482,16 +3601,15 @@ void TagDecl::setQualifierInfo(NestedNameSpecifierLoc QualifierLoc) {
   }
 }
 
-void TagDecl::setTemplateParameterListsInfo(ASTContext &Context,
-                                            unsigned NumTPLists,
-                                            TemplateParameterList **TPLists) {
-  assert(NumTPLists > 0);
+void TagDecl::setTemplateParameterListsInfo(
+    ASTContext &Context, ArrayRef<TemplateParameterList *> TPLists) {
+  assert(!TPLists.empty());
   // Make sure the extended decl info is allocated.
   if (!hasExtInfo())
     // Allocate external info struct.
-    NamedDeclOrQualifier = new (getASTContext()) ExtInfo;
+    TypedefNameDeclOrQualifier = new (getASTContext()) ExtInfo;
   // Set the template parameter lists info.
-  getExtInfo()->setTemplateParameterListsInfo(Context, NumTPLists, TPLists);
+  getExtInfo()->setTemplateParameterListsInfo(Context, TPLists);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3505,9 +3623,8 @@ EnumDecl *EnumDecl::Create(ASTContext &C, DeclContext *DC,
                            IdentifierInfo *Id,
                            EnumDecl *PrevDecl, bool IsScoped,
                            bool IsScopedUsingClassTag, bool IsFixed) {
-  EnumDecl *Enum = new (C, DC) EnumDecl(C, DC, StartLoc, IdLoc, Id, PrevDecl,
-                                        IsScoped, IsScopedUsingClassTag,
-                                        IsFixed);
+  auto *Enum = new (C, DC) EnumDecl(C, DC, StartLoc, IdLoc, Id, PrevDecl,
+                                    IsScoped, IsScopedUsingClassTag, IsFixed);
   Enum->MayHaveOutOfDateDef = C.getLangOpts().Modules;
   C.getTypeDeclType(Enum, PrevDecl);
   return Enum;
@@ -3647,10 +3764,6 @@ bool RecordDecl::isMsStruct(const ASTContext &C) const {
   return hasAttr<MSStructAttr>() || C.getLangOpts().MSBitfields == 1;
 }
 
-static bool isFieldOrIndirectField(Decl::Kind K) {
-  return FieldDecl::classofKind(K) || IndirectFieldDecl::classofKind(K);
-}
-
 void RecordDecl::LoadFieldsFromExternalStorage() const {
   ExternalASTSource *Source = getASTContext().getExternalSource();
   assert(hasExternalLexicalStorage() && Source && "No external storage?");
@@ -3659,16 +3772,10 @@ void RecordDecl::LoadFieldsFromExternalStorage() const {
   ExternalASTSource::Deserializing TheFields(Source);
 
   SmallVector<Decl*, 64> Decls;
-  LoadedFieldsFromExternalStorage = true;  
-  switch (Source->FindExternalLexicalDecls(this, isFieldOrIndirectField,
-                                           Decls)) {
-  case ELR_Success:
-    break;
-    
-  case ELR_AlreadyLoaded:
-  case ELR_Failure:
-    return;
-  }
+  LoadedFieldsFromExternalStorage = true;
+  Source->FindExternalLexicalDecls(this, [](Decl::Kind K) {
+    return FieldDecl::classofKind(K) || IndirectFieldDecl::classofKind(K);
+  }, Decls);
 
 #ifndef NDEBUG
   // Check that all decls we got were FieldDecls.
@@ -3690,7 +3797,7 @@ bool RecordDecl::mayInsertExtraPadding(bool EmitRemark) const {
       !Context.getLangOpts().SanitizeAddressFieldPadding)
     return false;
   const auto &Blacklist = Context.getSanitizerBlacklist();
-  const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(this);
+  const auto *CXXRD = dyn_cast<CXXRecordDecl>(this);
   // We may be able to relax some of these requirements.
   int ReasonToReject = -1;
   if (!CXXRD || CXXRD->isExternCContext())
@@ -3731,9 +3838,9 @@ const FieldDecl *RecordDecl::findFirstNamedDataMember() const {
     if (I->getIdentifier())
       return I;
 
-    if (const RecordType *RT = I->getType()->getAs<RecordType>())
+    if (const auto *RT = I->getType()->getAs<RecordType>())
       if (const FieldDecl *NamedDataMember =
-          RT->getDecl()->findFirstNamedDataMember())
+              RT->getDecl()->findFirstNamedDataMember())
         return NamedDataMember;
   }
 
@@ -3757,26 +3864,17 @@ void BlockDecl::setParams(ArrayRef<ParmVarDecl *> NewParamInfo) {
   }
 }
 
-void BlockDecl::setCaptures(ASTContext &Context,
-                            const Capture *begin,
-                            const Capture *end,
-                            bool capturesCXXThis) {
-  CapturesCXXThis = capturesCXXThis;
+void BlockDecl::setCaptures(ASTContext &Context, ArrayRef<Capture> Captures,
+                            bool CapturesCXXThis) {
+  this->CapturesCXXThis = CapturesCXXThis;
+  this->NumCaptures = Captures.size();
 
-  if (begin == end) {
-    NumCaptures = 0;
-    Captures = nullptr;
+  if (Captures.empty()) {
+    this->Captures = nullptr;
     return;
   }
 
-  NumCaptures = end - begin;
-
-  // Avoid new Capture[] because we don't want to provide a default
-  // constructor.
-  size_t allocationSize = NumCaptures * sizeof(Capture);
-  void *buffer = Context.Allocate(allocationSize, /*alignment*/sizeof(void*));
-  memcpy(buffer, begin, allocationSize);
-  Captures = static_cast<Capture*>(buffer);
+  this->Captures = Captures.copy(Context).data();
 }
 
 bool BlockDecl::capturesVariable(const VarDecl *variable) const {
@@ -3889,18 +3987,28 @@ BlockDecl *BlockDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
   return new (C, ID) BlockDecl(nullptr, SourceLocation());
 }
 
+CapturedDecl::CapturedDecl(DeclContext *DC, unsigned NumParams)
+    : Decl(Captured, DC, SourceLocation()), DeclContext(Captured),
+      NumParams(NumParams), ContextParam(0), BodyAndNothrow(nullptr, false) {}
+
 CapturedDecl *CapturedDecl::Create(ASTContext &C, DeclContext *DC,
                                    unsigned NumParams) {
-  return new (C, DC, NumParams * sizeof(ImplicitParamDecl *))
+  return new (C, DC, additionalSizeToAlloc<ImplicitParamDecl *>(NumParams))
       CapturedDecl(DC, NumParams);
 }
 
 CapturedDecl *CapturedDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                                unsigned NumParams) {
-  return new (C, ID, NumParams * sizeof(ImplicitParamDecl *))
+  return new (C, ID, additionalSizeToAlloc<ImplicitParamDecl *>(NumParams))
       CapturedDecl(nullptr, NumParams);
 }
 
+Stmt *CapturedDecl::getBody() const { return BodyAndNothrow.getPointer(); }
+void CapturedDecl::setBody(Stmt *B) { BodyAndNothrow.setPointer(B); }
+
+bool CapturedDecl::isNothrow() const { return BodyAndNothrow.getInt(); }
+void CapturedDecl::setNothrow(bool Nothrow) { BodyAndNothrow.setInt(Nothrow); }
+
 EnumConstantDecl *EnumConstantDecl::Create(ASTContext &C, EnumDecl *CD,
                                            SourceLocation L,
                                            IdentifierInfo *Id, QualType T,
@@ -4042,9 +4150,9 @@ ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc,
     NextLocalImport()
 {
   assert(getNumModuleIdentifiers(Imported) == IdentifierLocs.size());
-  SourceLocation *StoredLocs = reinterpret_cast<SourceLocation *>(this + 1);
-  memcpy(StoredLocs, IdentifierLocs.data(), 
-         IdentifierLocs.size() * sizeof(SourceLocation));
+  auto *StoredLocs = getTrailingObjects<SourceLocation>();
+  std::uninitialized_copy(IdentifierLocs.begin(), IdentifierLocs.end(),
+                          StoredLocs);
 }
 
 ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc, 
@@ -4052,13 +4160,14 @@ ImportDecl::ImportDecl(DeclContext *DC, SourceLocation StartLoc,
   : Decl(Import, DC, StartLoc), ImportedAndComplete(Imported, false),
     NextLocalImport()
 {
-  *reinterpret_cast<SourceLocation *>(this + 1) = EndLoc;
+  *getTrailingObjects<SourceLocation>() = EndLoc;
 }
 
 ImportDecl *ImportDecl::Create(ASTContext &C, DeclContext *DC,
                                SourceLocation StartLoc, Module *Imported,
                                ArrayRef<SourceLocation> IdentifierLocs) {
-  return new (C, DC, IdentifierLocs.size() * sizeof(SourceLocation))
+  return new (C, DC,
+              additionalSizeToAlloc<SourceLocation>(IdentifierLocs.size()))
       ImportDecl(DC, StartLoc, Imported, IdentifierLocs);
 }
 
@@ -4066,16 +4175,15 @@ ImportDecl *ImportDecl::CreateImplicit(ASTContext &C, DeclContext *DC,
                                        SourceLocation StartLoc,
                                        Module *Imported,
                                        SourceLocation EndLoc) {
-  ImportDecl *Import =
-      new (C, DC, sizeof(SourceLocation)) ImportDecl(DC, StartLoc,
-                                                     Imported, EndLoc);
+  ImportDecl *Import = new (C, DC, additionalSizeToAlloc<SourceLocation>(1))
+      ImportDecl(DC, StartLoc, Imported, EndLoc);
   Import->setImplicit();
   return Import;
 }
 
 ImportDecl *ImportDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                            unsigned NumLocations) {
-  return new (C, ID, NumLocations * sizeof(SourceLocation))
+  return new (C, ID, additionalSizeToAlloc<SourceLocation>(NumLocations))
       ImportDecl(EmptyShell());
 }
 
@@ -4083,16 +4191,14 @@ ArrayRef<SourceLocation> ImportDecl::getIdentifierLocs() const {
   if (!ImportedAndComplete.getInt())
     return None;
 
-  const SourceLocation *StoredLocs
-    = reinterpret_cast<const SourceLocation *>(this + 1);
+  const auto *StoredLocs = getTrailingObjects<SourceLocation>();
   return llvm::makeArrayRef(StoredLocs,
                             getNumModuleIdentifiers(getImportedModule()));
 }
 
 SourceRange ImportDecl::getSourceRange() const {
   if (!ImportedAndComplete.getInt())
-    return SourceRange(getLocation(), 
-                       *reinterpret_cast<const SourceLocation *>(this + 1));
-  
+    return SourceRange(getLocation(), *getTrailingObjects<SourceLocation>());
+
   return SourceRange(getLocation(), getIdentifierLocs().back());
 }
diff --git a/lib/AST/DeclBase.cpp b/lib/AST/DeclBase.cpp
index 4fcec53d6eb9..16394e865eb1 100644
--- a/lib/AST/DeclBase.cpp
+++ b/lib/AST/DeclBase.cpp
@@ -45,10 +45,19 @@ void Decl::updateOutOfDate(IdentifierInfo &II) const {
   getASTContext().getExternalSource()->updateOutOfDateIdentifier(II);
 }
 
+#define DECL(DERIVED, BASE)                                                    \
+  static_assert(Decl::DeclObjAlignment >=                                      \
+                    llvm::AlignOf<DERIVED##Decl>::Alignment,                   \
+                "Alignment sufficient after objects prepended to " #DERIVED);
+#define ABSTRACT_DECL(DECL)
+#include "clang/AST/DeclNodes.inc"
+
 void *Decl::operator new(std::size_t Size, const ASTContext &Context,
                          unsigned ID, std::size_t Extra) {
   // Allocate an extra 8 bytes worth of storage, which ensures that the
-  // resulting pointer will still be 8-byte aligned. 
+  // resulting pointer will still be 8-byte aligned.
+  static_assert(sizeof(unsigned) * 2 >= DeclObjAlignment,
+                "Decl won't be misaligned");
   void *Start = Context.Allocate(Size + Extra + 8);
   void *Result = (char*)Start + 8;
 
@@ -69,7 +78,13 @@ void *Decl::operator new(std::size_t Size, const ASTContext &Ctx,
   // With local visibility enabled, we track the owning module even for local
   // declarations.
   if (Ctx.getLangOpts().ModulesLocalVisibility) {
-    void *Buffer = ::operator new(sizeof(Module *) + Size + Extra, Ctx);
+    // Ensure required alignment of the resulting object by adding extra
+    // padding at the start if required.
+    size_t ExtraAlign =
+        llvm::OffsetToAlignment(sizeof(Module *), DeclObjAlignment);
+    char *Buffer = reinterpret_cast<char *>(
+        ::operator new(ExtraAlign + sizeof(Module *) + Size + Extra, Ctx));
+    Buffer += ExtraAlign;
     return new (Buffer) Module*(nullptr) + 1;
   }
   return ::operator new(Size + Extra, Ctx);
@@ -251,6 +266,18 @@ void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC,
   }
 }
 
+bool Decl::isLexicallyWithinFunctionOrMethod() const {
+  const DeclContext *LDC = getLexicalDeclContext();
+  while (true) {
+    if (LDC->isFunctionOrMethod())
+      return true;
+    if (!isa<TagDecl>(LDC))
+      return false;
+    LDC = LDC->getLexicalParent();
+  }
+  return false;
+}
+
 bool Decl::isInAnonymousNamespace() const {
   const DeclContext *DC = getDeclContext();
   do {
@@ -612,6 +639,7 @@ unsigned Decl::getIdentifierNamespaceForKind(Kind DeclKind) {
     case ExternCContext:
 
     case UsingDirective:
+    case BuiltinTemplate:
     case ClassTemplateSpecialization:
     case ClassTemplatePartialSpecialization:
     case ClassScopeFunctionSpecialization:
@@ -1044,14 +1072,7 @@ DeclContext::LoadLexicalDeclsFromExternalStorage() const {
   // Load the external declarations, if any.
   SmallVector<Decl*, 64> Decls;
   ExternalLexicalStorage = false;
-  switch (Source->FindExternalLexicalDecls(this, Decls)) {
-  case ELR_Success:
-    break;
-    
-  case ELR_Failure:
-  case ELR_AlreadyLoaded:
-    return false;
-  }
+  Source->FindExternalLexicalDecls(this, Decls);
 
   if (Decls.empty())
     return false;
@@ -1189,13 +1210,16 @@ void DeclContext::removeDecl(Decl *D) {
     // Remove only decls that have a name
     if (!ND->getDeclName()) return;
 
-    StoredDeclsMap *Map = getPrimaryContext()->LookupPtr;
-    if (!Map) return;
-
-    StoredDeclsMap::iterator Pos = Map->find(ND->getDeclName());
-    assert(Pos != Map->end() && "no lookup entry for decl");
-    if (Pos->second.getAsVector() || Pos->second.getAsDecl() == ND)
-      Pos->second.remove(ND);
+    auto *DC = this;
+    do {
+      StoredDeclsMap *Map = DC->getPrimaryContext()->LookupPtr;
+      if (Map) {
+        StoredDeclsMap::iterator Pos = Map->find(ND->getDeclName());
+        assert(Pos != Map->end() && "no lookup entry for decl");
+        if (Pos->second.getAsVector() || Pos->second.getAsDecl() == ND)
+          Pos->second.remove(ND);
+      }
+    } while (DC->isTransparentContext() && (DC = DC->getParent()));
   }
 }
 
@@ -1213,7 +1237,7 @@ void DeclContext::addHiddenDecl(Decl *D) {
   }
 
   // Notify a C++ record declaration that we've added a member, so it can
-  // update it's class-specific state.
+  // update its class-specific state.
   if (CXXRecordDecl *Record = dyn_cast<CXXRecordDecl>(this))
     Record->addedMember(D);
 
diff --git a/lib/AST/DeclCXX.cpp b/lib/AST/DeclCXX.cpp
index d905fcf13a45..4f24fdc28f71 100644
--- a/lib/AST/DeclCXX.cpp
+++ b/lib/AST/DeclCXX.cpp
@@ -385,17 +385,11 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) {
   }
 }
 
-/// Callback function for CXXRecordDecl::forallBases that acknowledges
-/// that it saw a base class.
-static bool SawBase(const CXXRecordDecl *, void *) {
-  return true;
-}
-
 bool CXXRecordDecl::hasAnyDependentBases() const {
   if (!isDependentContext())
     return false;
 
-  return !forallBases(SawBase, nullptr);
+  return !forallBases([](const CXXRecordDecl *) { return true; });
 }
 
 bool CXXRecordDecl::isTriviallyCopyable() const {
@@ -1224,6 +1218,10 @@ CXXRecordDecl *CXXRecordDecl::getInstantiatedFromMemberClass() const {
   return nullptr;
 }
 
+MemberSpecializationInfo *CXXRecordDecl::getMemberSpecializationInfo() const {
+  return TemplateOrInstantiation.dyn_cast<MemberSpecializationInfo *>();
+}
+
 void 
 CXXRecordDecl::setInstantiationOfMemberClass(CXXRecordDecl *RD,
                                              TemplateSpecializationKind TSK) {
@@ -1234,6 +1232,14 @@ CXXRecordDecl::setInstantiationOfMemberClass(CXXRecordDecl *RD,
     = new (getASTContext()) MemberSpecializationInfo(RD, TSK);
 }
 
+ClassTemplateDecl *CXXRecordDecl::getDescribedClassTemplate() const {
+  return TemplateOrInstantiation.dyn_cast<ClassTemplateDecl *>();
+}
+
+void CXXRecordDecl::setDescribedClassTemplate(ClassTemplateDecl *Template) {
+  TemplateOrInstantiation = Template;
+}
+
 TemplateSpecializationKind CXXRecordDecl::getTemplateSpecializationKind() const{
   if (const ClassTemplateSpecializationDecl *Spec
         = dyn_cast<ClassTemplateSpecializationDecl>(this))
@@ -1681,8 +1687,8 @@ CXXCtorInitializer::CXXCtorInitializer(ASTContext &Context,
     LParenLoc(L), RParenLoc(R), IsDelegating(false), IsVirtual(false),
     IsWritten(false), SourceOrderOrNumArrayIndices(NumIndices)
 {
-  VarDecl **MyIndices = reinterpret_cast<VarDecl **> (this + 1);
-  memcpy(MyIndices, Indices, NumIndices * sizeof(VarDecl *));
+  std::uninitialized_copy(Indices, Indices + NumIndices,
+                          getTrailingObjects<VarDecl *>());
 }
 
 CXXCtorInitializer *CXXCtorInitializer::Create(ASTContext &Context,
@@ -1692,8 +1698,7 @@ CXXCtorInitializer *CXXCtorInitializer::Create(ASTContext &Context,
                                                SourceLocation R,
                                                VarDecl **Indices,
                                                unsigned NumIndices) {
-  void *Mem = Context.Allocate(sizeof(CXXCtorInitializer) +
-                               sizeof(VarDecl *) * NumIndices,
+  void *Mem = Context.Allocate(totalSizeToAlloc<VarDecl *>(NumIndices),
                                llvm::alignOf<CXXCtorInitializer>());
   return new (Mem) CXXCtorInitializer(Context, Member, MemberLoc, L, Init, R,
                                       Indices, NumIndices);
@@ -2023,6 +2028,22 @@ NamespaceDecl *NamespaceDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
                                    SourceLocation(), nullptr, nullptr);
 }
 
+NamespaceDecl *NamespaceDecl::getOriginalNamespace() {
+  if (isFirstDecl())
+    return this;
+
+  return AnonOrFirstNamespaceAndInline.getPointer();
+}
+
+const NamespaceDecl *NamespaceDecl::getOriginalNamespace() const {
+  if (isFirstDecl())
+    return this;
+
+  return AnonOrFirstNamespaceAndInline.getPointer();
+}
+
+bool NamespaceDecl::isOriginalNamespace() const { return isFirstDecl(); }
+
 NamespaceDecl *NamespaceDecl::getNextRedeclarationImpl() {
   return getNextRedeclaration();
 }
diff --git a/lib/AST/DeclFriend.cpp b/lib/AST/DeclFriend.cpp
index a996cab093af..121403b07e57 100644
--- a/lib/AST/DeclFriend.cpp
+++ b/lib/AST/DeclFriend.cpp
@@ -46,7 +46,9 @@ FriendDecl *FriendDecl::Create(ASTContext &C, DeclContext *DC,
   }
 #endif
 
-  std::size_t Extra = FriendTypeTPLists.size() * sizeof(TemplateParameterList*);
+  std::size_t Extra =
+      FriendDecl::additionalSizeToAlloc<TemplateParameterList *>(
+          FriendTypeTPLists.size());
   FriendDecl *FD = new (C, DC, Extra) FriendDecl(DC, L, Friend, FriendL,
                                                  FriendTypeTPLists);
   cast<CXXRecordDecl>(DC)->pushFriendDecl(FD);
@@ -55,7 +57,8 @@ FriendDecl *FriendDecl::Create(ASTContext &C, DeclContext *DC,
 
 FriendDecl *FriendDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                            unsigned FriendTypeNumTPLists) {
-  std::size_t Extra = FriendTypeNumTPLists * sizeof(TemplateParameterList*);
+  std::size_t Extra =
+      additionalSizeToAlloc<TemplateParameterList *>(FriendTypeNumTPLists);
   return new (C, ID, Extra) FriendDecl(EmptyShell(), FriendTypeNumTPLists);
 }
 
diff --git a/lib/AST/DeclGroup.cpp b/lib/AST/DeclGroup.cpp
index 512837fdf3f4..f162e6d40c48 100644
--- a/lib/AST/DeclGroup.cpp
+++ b/lib/AST/DeclGroup.cpp
@@ -18,10 +18,8 @@
 using namespace clang;
 
 DeclGroup* DeclGroup::Create(ASTContext &C, Decl **Decls, unsigned NumDecls) {
-  static_assert(sizeof(DeclGroup) % llvm::AlignOf<void *>::Alignment == 0,
-                "Trailing data is unaligned!");
   assert(NumDecls > 1 && "Invalid DeclGroup");
-  unsigned Size = sizeof(DeclGroup) + sizeof(Decl*) * NumDecls;
+  unsigned Size = totalSizeToAlloc<Decl *>(NumDecls);
   void* Mem = C.Allocate(Size, llvm::AlignOf<DeclGroup>::Alignment);
   new (Mem) DeclGroup(NumDecls, Decls);
   return static_cast<DeclGroup*>(Mem);
@@ -30,5 +28,6 @@ DeclGroup* DeclGroup::Create(ASTContext &C, Decl **Decls, unsigned NumDecls) {
 DeclGroup::DeclGroup(unsigned numdecls, Decl** decls) : NumDecls(numdecls) {
   assert(numdecls > 0);
   assert(decls);
-  memcpy(this+1, decls, numdecls * sizeof(*decls));
+  std::uninitialized_copy(decls, decls + numdecls,
+                          getTrailingObjects<Decl *>());
 }
diff --git a/lib/AST/DeclObjC.cpp b/lib/AST/DeclObjC.cpp
index 280c412ae8ff..050a0f53f1e5 100644
--- a/lib/AST/DeclObjC.cpp
+++ b/lib/AST/DeclObjC.cpp
@@ -161,6 +161,15 @@ ObjCPropertyDecl::findPropertyDecl(const DeclContext *DC,
         return nullptr;
   }
 
+  // If context is class, then lookup property in its extensions.
+  // This comes before property is looked up in primary class.
+  if (auto *IDecl = dyn_cast<ObjCInterfaceDecl>(DC)) {
+    for (const auto *Ext : IDecl->known_extensions())
+      if (ObjCPropertyDecl *PD = ObjCPropertyDecl::findPropertyDecl(Ext,
+                                                                    propertyID))
+        return PD;
+  }
+
   DeclContext::lookup_result R = DC->lookup(propertyID);
   for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E;
        ++I)
@@ -190,6 +199,15 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
       if (Def->isHidden())
         return nullptr;
   }
+ 
+  // Search the extensions of a class first; they override what's in
+  // the class itself.
+  if (const auto *ClassDecl = dyn_cast<ObjCInterfaceDecl>(this)) {
+    for (const auto *Ext : ClassDecl->visible_extensions()) {
+      if (auto *P = Ext->FindPropertyDeclaration(PropertyId))
+        return P;
+    }
+  }
 
   if (ObjCPropertyDecl *PD =
         ObjCPropertyDecl::findPropertyDecl(cast<DeclContext>(this), PropertyId))
@@ -207,7 +225,7 @@ ObjCPropertyDecl *ObjCContainerDecl::FindPropertyDeclaration(
     }
     case Decl::ObjCInterface: {
       const ObjCInterfaceDecl *OID = cast<ObjCInterfaceDecl>(this);
-      // Look through categories (but not extensions).
+      // Look through categories (but not extensions; they were handled above).
       for (const auto *Cat : OID->visible_categories()) {
         if (!Cat->IsClassExtension())
           if (ObjCPropertyDecl *P = Cat->FindPropertyDeclaration(PropertyId))
@@ -327,6 +345,13 @@ void ObjCInterfaceDecl::collectPropertiesToImplement(PropertyMap &PM,
     PM[Prop->getIdentifier()] = Prop;
     PO.push_back(Prop);
   }
+  for (const auto *Ext : known_extensions()) {
+    const ObjCCategoryDecl *ClassExt = Ext;
+    for (auto *Prop : ClassExt->properties()) {
+      PM[Prop->getIdentifier()] = Prop;
+      PO.push_back(Prop);
+    }
+  }
   for (const auto *PI : all_referenced_protocols())
     PI->collectPropertiesToImplement(PM, PO);
   // Note, the properties declared only in class extensions are still copied
@@ -747,6 +772,10 @@ void ObjCMethodDecl::setParamsAndSelLocs(ASTContext &C,
   if (Params.empty() && SelLocs.empty())
     return;
 
+  static_assert(llvm::AlignOf<ParmVarDecl *>::Alignment >=
+                    llvm::AlignOf<SourceLocation>::Alignment,
+                "Alignment not sufficient for SourceLocation");
+
   unsigned Size = sizeof(ParmVarDecl *) * NumParams +
                   sizeof(SourceLocation) * SelLocs.size();
   ParamsAndSelLocs = C.Allocate(Size);
@@ -1182,18 +1211,47 @@ ObjCMethodDecl::findPropertyDecl(bool CheckOverrides) const {
 
   if (isPropertyAccessor()) {
     const ObjCContainerDecl *Container = cast<ObjCContainerDecl>(getParent());
-    // If container is class extension, find its primary class.
-    if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(Container))
-      if (CatDecl->IsClassExtension())
-        Container = CatDecl->getClassInterface();
-    
     bool IsGetter = (NumArgs == 0);
 
-    for (const auto *I : Container->properties()) {
-      Selector NextSel = IsGetter ? I->getGetterName()
-                                  : I->getSetterName();
-      if (NextSel == Sel)
-        return I;
+    /// Local function that attempts to find a matching property within the
+    /// given Objective-C container.
+    auto findMatchingProperty =
+      [&](const ObjCContainerDecl *Container) -> const ObjCPropertyDecl * {
+
+      for (const auto *I : Container->properties()) {
+        Selector NextSel = IsGetter ? I->getGetterName()
+                                    : I->getSetterName();
+        if (NextSel == Sel)
+          return I;
+      }
+
+      return nullptr;
+    };
+
+    // Look in the container we were given.
+    if (const auto *Found = findMatchingProperty(Container))
+      return Found;
+
+    // If we're in a category or extension, look in the main class.
+    const ObjCInterfaceDecl *ClassDecl = nullptr;
+    if (const auto *Category = dyn_cast<ObjCCategoryDecl>(Container)) {
+      ClassDecl = Category->getClassInterface();
+      if (const auto *Found = findMatchingProperty(ClassDecl))
+        return Found;
+    } else {
+      // Determine whether the container is a class.
+      ClassDecl = dyn_cast<ObjCInterfaceDecl>(Container);
+    }
+
+    // If we have a class, check its visible extensions.
+    if (ClassDecl) {
+      for (const auto *Ext : ClassDecl->visible_extensions()) {
+        if (Ext == Container)
+          continue;
+
+        if (const auto *Found = findMatchingProperty(Ext))
+          return Found;
+      }
     }
 
     llvm_unreachable("Marked as a property accessor but no property found!");
@@ -1272,13 +1330,9 @@ ObjCTypeParamList *ObjCTypeParamList::create(
                      SourceLocation lAngleLoc,
                      ArrayRef<ObjCTypeParamDecl *> typeParams,
                      SourceLocation rAngleLoc) {
-  unsigned size = sizeof(ObjCTypeParamList)
-                + sizeof(ObjCTypeParamDecl *) * typeParams.size();
-  static_assert(llvm::AlignOf<ObjCTypeParamList>::Alignment >=
-                    llvm::AlignOf<ObjCTypeParamDecl *>::Alignment,
-                "type parameter list needs greater alignment");
-  unsigned align = llvm::alignOf<ObjCTypeParamList>();
-  void *mem = ctx.Allocate(size, align);
+  void *mem =
+      ctx.Allocate(totalSizeToAlloc<ObjCTypeParamDecl *>(typeParams.size()),
+                   llvm::alignOf<ObjCTypeParamList>());
   return new (mem) ObjCTypeParamList(lAngleLoc, typeParams, rAngleLoc);
 }
 
diff --git a/lib/AST/DeclOpenMP.cpp b/lib/AST/DeclOpenMP.cpp
index 5f8b42b3f964..493e2cd41226 100644
--- a/lib/AST/DeclOpenMP.cpp
+++ b/lib/AST/DeclOpenMP.cpp
@@ -29,8 +29,9 @@ OMPThreadPrivateDecl *OMPThreadPrivateDecl::Create(ASTContext &C,
                                                    DeclContext *DC,
                                                    SourceLocation L,
                                                    ArrayRef<Expr *> VL) {
-  OMPThreadPrivateDecl *D = new (C, DC, VL.size() * sizeof(Expr *))
-      OMPThreadPrivateDecl(OMPThreadPrivate, DC, L);
+  OMPThreadPrivateDecl *D =
+      new (C, DC, additionalSizeToAlloc<Expr *>(VL.size()))
+          OMPThreadPrivateDecl(OMPThreadPrivate, DC, L);
   D->NumVars = VL.size();
   D->setVars(VL);
   return D;
@@ -39,7 +40,7 @@ OMPThreadPrivateDecl *OMPThreadPrivateDecl::Create(ASTContext &C,
 OMPThreadPrivateDecl *OMPThreadPrivateDecl::CreateDeserialized(ASTContext &C,
                                                                unsigned ID,
                                                                unsigned N) {
-  OMPThreadPrivateDecl *D = new (C, ID, N * sizeof(Expr *))
+  OMPThreadPrivateDecl *D = new (C, ID, additionalSizeToAlloc<Expr *>(N))
       OMPThreadPrivateDecl(OMPThreadPrivate, nullptr, SourceLocation());
   D->NumVars = N;
   return D;
@@ -48,7 +49,6 @@ OMPThreadPrivateDecl *OMPThreadPrivateDecl::CreateDeserialized(ASTContext &C,
 void OMPThreadPrivateDecl::setVars(ArrayRef<Expr *> VL) {
   assert(VL.size() == NumVars &&
          "Number of variables is not the same as the preallocated buffer");
-  Expr **Vars = reinterpret_cast<Expr **>(this + 1);
-  std::copy(VL.begin(), VL.end(), Vars);
+  std::uninitialized_copy(VL.begin(), VL.end(), getTrailingObjects<Expr *>());
 }
 
diff --git a/lib/AST/DeclPrinter.cpp b/lib/AST/DeclPrinter.cpp
index 3202d8c75436..5c6002d55c0f 100644
--- a/lib/AST/DeclPrinter.cpp
+++ b/lib/AST/DeclPrinter.cpp
@@ -96,6 +96,7 @@ namespace {
     void PrintTemplateParameters(const TemplateParameterList *Params,
                                  const TemplateArgumentList *Args = nullptr);
     void prettyPrintAttributes(Decl *D);
+    void prettyPrintPragmas(Decl *D);
     void printDeclType(QualType T, StringRef DeclName, bool Pack = false);
   };
 }
@@ -197,12 +198,40 @@ raw_ostream& DeclPrinter::Indent(unsigned Indentation) {
 void DeclPrinter::prettyPrintAttributes(Decl *D) {
   if (Policy.PolishForDeclaration)
     return;
-  
+
   if (D->hasAttrs()) {
     AttrVec &Attrs = D->getAttrs();
-    for (AttrVec::const_iterator i=Attrs.begin(), e=Attrs.end(); i!=e; ++i) {
-      Attr *A = *i;
-      A->printPretty(Out, Policy);
+    for (auto *A : Attrs) {
+      switch (A->getKind()) {
+#define ATTR(X)
+#define PRAGMA_SPELLING_ATTR(X) case attr::X:
+#include "clang/Basic/AttrList.inc"
+        break;
+      default:
+        A->printPretty(Out, Policy);
+        break;
+      }
+    }
+  }
+}
+
+void DeclPrinter::prettyPrintPragmas(Decl *D) {
+  if (Policy.PolishForDeclaration)
+    return;
+
+  if (D->hasAttrs()) {
+    AttrVec &Attrs = D->getAttrs();
+    for (auto *A : Attrs) {
+      switch (A->getKind()) {
+#define ATTR(X)
+#define PRAGMA_SPELLING_ATTR(X) case attr::X:
+#include "clang/Basic/AttrList.inc"
+        A->printPretty(Out, Policy);
+        Indent();
+        break;
+      default:
+        break;
+      }
     }
   }
 }
@@ -408,6 +437,10 @@ void DeclPrinter::VisitEnumConstantDecl(EnumConstantDecl *D) {
 }
 
 void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {
+  if (!D->getDescribedFunctionTemplate() &&
+      !D->isFunctionTemplateSpecialization())
+    prettyPrintPragmas(D);
+
   CXXConstructorDecl *CDecl = dyn_cast<CXXConstructorDecl>(D);
   CXXConversionDecl *ConversionDecl = dyn_cast<CXXConversionDecl>(D);
   if (!Policy.SuppressSpecifiers) {
@@ -416,7 +449,7 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {
     case SC_Extern: Out << "extern "; break;
     case SC_Static: Out << "static "; break;
     case SC_PrivateExtern: Out << "__private_extern__ "; break;
-    case SC_Auto: case SC_Register: case SC_OpenCLWorkGroupLocal:
+    case SC_Auto: case SC_Register:
       llvm_unreachable("invalid for functions");
     }
 
@@ -643,6 +676,7 @@ void DeclPrinter::VisitFriendDecl(FriendDecl *D) {
 }
 
 void DeclPrinter::VisitFieldDecl(FieldDecl *D) {
+  // FIXME: add printing of pragma attributes if required.
   if (!Policy.SuppressSpecifiers && D->isMutable())
     Out << "mutable ";
   if (!Policy.SuppressSpecifiers && D->isModulePrivate())
@@ -672,6 +706,7 @@ void DeclPrinter::VisitLabelDecl(LabelDecl *D) {
 }
 
 void DeclPrinter::VisitVarDecl(VarDecl *D) {
+  prettyPrintPragmas(D);
   if (!Policy.SuppressSpecifiers) {
     StorageClass SC = D->getStorageClass();
     if (SC != SC_None)
@@ -779,6 +814,7 @@ void DeclPrinter::VisitEmptyDecl(EmptyDecl *D) {
 }
 
 void DeclPrinter::VisitCXXRecordDecl(CXXRecordDecl *D) {
+  // FIXME: add printing of pragma attributes if required.
   if (!Policy.SuppressSpecifiers && D->isModulePrivate())
     Out << "__module_private__ ";
   Out << D->getKindName();
@@ -914,11 +950,13 @@ void DeclPrinter::VisitFunctionTemplateDecl(FunctionTemplateDecl *D) {
   if (PrintInstantiation) {
     TemplateParameterList *Params = D->getTemplateParameters();
     for (auto *I : D->specializations()) {
+      prettyPrintPragmas(I);
       PrintTemplateParameters(Params, I->getTemplateSpecializationArgs());
       Visit(I);
     }
   }
 
+  prettyPrintPragmas(D->getTemplatedDecl());
   return VisitRedeclarableTemplateDecl(D);
 }
 
@@ -1088,7 +1126,7 @@ void DeclPrinter::VisitObjCInterfaceDecl(ObjCInterfaceDecl *OID) {
   }
   
   if (SID)
-    Out << " : " << OID->getSuperClass()->getName();
+    Out << " : " << QualType(OID->getSuperClassType(), 0).getAsString(Policy);
 
   // Protocols?
   const ObjCList<ObjCProtocolDecl> &Protocols = OID->getReferencedProtocols();
@@ -1299,7 +1337,7 @@ void DeclPrinter::VisitUnresolvedUsingValueDecl(UnresolvedUsingValueDecl *D) {
   if (!D->isAccessDeclaration())
     Out << "using ";
   D->getQualifier()->print(Out, Policy);
-  Out << D->getName();
+  Out << D->getDeclName();
 }
 
 void DeclPrinter::VisitUsingShadowDecl(UsingShadowDecl *D) {
diff --git a/lib/AST/DeclTemplate.cpp b/lib/AST/DeclTemplate.cpp
index cde497b012e2..de3ebd23ef4f 100644
--- a/lib/AST/DeclTemplate.cpp
+++ b/lib/AST/DeclTemplate.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/TypeLoc.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "llvm/ADT/STLExtras.h"
 #include <memory>
@@ -29,10 +30,10 @@ using namespace clang;
 
 TemplateParameterList::TemplateParameterList(SourceLocation TemplateLoc,
                                              SourceLocation LAngleLoc,
-                                             NamedDecl **Params, unsigned NumParams,
+                                             ArrayRef<NamedDecl *> Params,
                                              SourceLocation RAngleLoc)
   : TemplateLoc(TemplateLoc), LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc),
-    NumParams(NumParams), ContainsUnexpandedParameterPack(false) {
+    NumParams(Params.size()), ContainsUnexpandedParameterPack(false) {
   assert(this->NumParams == NumParams && "Too many template parameters");
   for (unsigned Idx = 0; Idx < NumParams; ++Idx) {
     NamedDecl *P = Params[Idx];
@@ -53,17 +54,13 @@ TemplateParameterList::TemplateParameterList(SourceLocation TemplateLoc,
   }
 }
 
-TemplateParameterList *
-TemplateParameterList::Create(const ASTContext &C, SourceLocation TemplateLoc,
-                              SourceLocation LAngleLoc, NamedDecl **Params,
-                              unsigned NumParams, SourceLocation RAngleLoc) {
-  unsigned Size = sizeof(TemplateParameterList) 
-                + sizeof(NamedDecl *) * NumParams;
-  unsigned Align = std::max(llvm::alignOf<TemplateParameterList>(),
-                            llvm::alignOf<NamedDecl*>());
-  void *Mem = C.Allocate(Size, Align);
+TemplateParameterList *TemplateParameterList::Create(
+    const ASTContext &C, SourceLocation TemplateLoc, SourceLocation LAngleLoc,
+    ArrayRef<NamedDecl *> Params, SourceLocation RAngleLoc) {
+  void *Mem = C.Allocate(totalSizeToAlloc<NamedDecl *>(Params.size()),
+                         llvm::alignOf<TemplateParameterList>());
   return new (Mem) TemplateParameterList(TemplateLoc, LAngleLoc, Params,
-                                         NumParams, RAngleLoc);
+                                         RAngleLoc);
 }
 
 unsigned TemplateParameterList::getMinRequiredArguments() const {
@@ -240,8 +237,8 @@ static void GenerateInjectedTemplateArgs(ASTContext &Context,
     }
     
     if ((*Param)->isTemplateParameterPack())
-      Arg = TemplateArgument::CreatePackCopy(Context, &Arg, 1);
-    
+      Arg = TemplateArgument::CreatePackCopy(Context, Arg);
+
     *Args++ = Arg;
   }
 }
@@ -552,10 +549,11 @@ NonTypeTemplateParmDecl::NonTypeTemplateParmDecl(DeclContext *DC,
     TemplateParmPosition(D, P), ParameterPack(true),
     ExpandedParameterPack(true), NumExpandedTypes(NumExpandedTypes) {
   if (ExpandedTypes && ExpandedTInfos) {
-    void **TypesAndInfos = reinterpret_cast<void **>(this + 1);
+    auto TypesAndInfos =
+        getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
     for (unsigned I = 0; I != NumExpandedTypes; ++I) {
-      TypesAndInfos[2*I] = ExpandedTypes[I].getAsOpaquePtr();
-      TypesAndInfos[2*I + 1] = ExpandedTInfos[I];
+      new (&TypesAndInfos[I].first) QualType(ExpandedTypes[I]);
+      TypesAndInfos[I].second = ExpandedTInfos[I];
     }
   }
 }
@@ -579,10 +577,11 @@ NonTypeTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
                                 const QualType *ExpandedTypes, 
                                 unsigned NumExpandedTypes,
                                 TypeSourceInfo **ExpandedTInfos) {
-  unsigned Extra = NumExpandedTypes * 2 * sizeof(void*);
-  return new (C, DC, Extra) NonTypeTemplateParmDecl(
-      DC, StartLoc, IdLoc, D, P, Id, T, TInfo,
-      ExpandedTypes, NumExpandedTypes, ExpandedTInfos);
+  return new (C, DC,
+              additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>>(
+                  NumExpandedTypes))
+      NonTypeTemplateParmDecl(DC, StartLoc, IdLoc, D, P, Id, T, TInfo,
+                              ExpandedTypes, NumExpandedTypes, ExpandedTInfos);
 }
 
 NonTypeTemplateParmDecl *
@@ -595,10 +594,12 @@ NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 NonTypeTemplateParmDecl *
 NonTypeTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                             unsigned NumExpandedTypes) {
-  unsigned Extra = NumExpandedTypes * 2 * sizeof(void*);
-  return new (C, ID, Extra) NonTypeTemplateParmDecl(
-      nullptr, SourceLocation(), SourceLocation(), 0, 0, nullptr, QualType(),
-      nullptr, nullptr, NumExpandedTypes, nullptr);
+  return new (C, ID,
+              additionalSizeToAlloc<std::pair<QualType, TypeSourceInfo *>>(
+                  NumExpandedTypes))
+      NonTypeTemplateParmDecl(nullptr, SourceLocation(), SourceLocation(), 0, 0,
+                              nullptr, QualType(), nullptr, nullptr,
+                              NumExpandedTypes, nullptr);
 }
 
 SourceRange NonTypeTemplateParmDecl::getSourceRange() const {
@@ -628,8 +629,8 @@ TemplateTemplateParmDecl::TemplateTemplateParmDecl(
     TemplateParmPosition(D, P), ParameterPack(true),
     ExpandedParameterPack(true), NumExpandedParams(NumExpansions) {
   if (Expansions)
-    std::memcpy(reinterpret_cast<void*>(this + 1), Expansions,
-                sizeof(TemplateParameterList*) * NumExpandedParams);
+    std::uninitialized_copy(Expansions, Expansions + NumExpandedParams,
+                            getTrailingObjects<TemplateParameterList *>());
 }
 
 TemplateTemplateParmDecl *
@@ -647,9 +648,10 @@ TemplateTemplateParmDecl::Create(const ASTContext &C, DeclContext *DC,
                                  IdentifierInfo *Id,
                                  TemplateParameterList *Params,
                                  ArrayRef<TemplateParameterList *> Expansions) {
-  return new (C, DC, sizeof(TemplateParameterList*) * Expansions.size())
-      TemplateTemplateParmDecl(DC, L, D, P, Id, Params,
-                               Expansions.size(), Expansions.data());
+  return new (C, DC,
+              additionalSizeToAlloc<TemplateParameterList *>(Expansions.size()))
+      TemplateTemplateParmDecl(DC, L, D, P, Id, Params, Expansions.size(),
+                               Expansions.data());
 }
 
 TemplateTemplateParmDecl *
@@ -661,7 +663,8 @@ TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID) {
 TemplateTemplateParmDecl *
 TemplateTemplateParmDecl::CreateDeserialized(ASTContext &C, unsigned ID,
                                              unsigned NumExpansions) {
-  return new (C, ID, sizeof(TemplateParameterList*) * NumExpansions)
+  return new (C, ID,
+              additionalSizeToAlloc<TemplateParameterList *>(NumExpansions))
       TemplateTemplateParmDecl(nullptr, SourceLocation(), 0, 0, nullptr,
                                nullptr, NumExpansions, nullptr);
 }
@@ -682,18 +685,19 @@ void TemplateTemplateParmDecl::setDefaultArgument(
 //===----------------------------------------------------------------------===//
 // TemplateArgumentList Implementation
 //===----------------------------------------------------------------------===//
+TemplateArgumentList::TemplateArgumentList(const TemplateArgument *Args,
+                                           unsigned NumArgs)
+    : Arguments(getTrailingObjects<TemplateArgument>()), NumArguments(NumArgs) {
+  std::uninitialized_copy(Args, Args + NumArgs,
+                          getTrailingObjects<TemplateArgument>());
+}
+
 TemplateArgumentList *
 TemplateArgumentList::CreateCopy(ASTContext &Context,
                                  const TemplateArgument *Args,
                                  unsigned NumArgs) {
-  std::size_t Size = sizeof(TemplateArgumentList)
-                   + NumArgs * sizeof(TemplateArgument);
-  void *Mem = Context.Allocate(Size);
-  TemplateArgument *StoredArgs 
-    = reinterpret_cast<TemplateArgument *>(
-                                static_cast<TemplateArgumentList *>(Mem) + 1);
-  std::uninitialized_copy(Args, Args + NumArgs, StoredArgs);
-  return new (Mem) TemplateArgumentList(StoredArgs, NumArgs, true);
+  void *Mem = Context.Allocate(totalSizeToAlloc<TemplateArgument>(NumArgs));
+  return new (Mem) TemplateArgumentList(Args, NumArgs);
 }
 
 FunctionTemplateSpecializationInfo *
@@ -1187,3 +1191,69 @@ VarTemplatePartialSpecializationDecl::CreateDeserialized(ASTContext &C,
                                                          unsigned ID) {
   return new (C, ID) VarTemplatePartialSpecializationDecl(C);
 }
+
+static TemplateParameterList *
+createMakeIntegerSeqParameterList(const ASTContext &C, DeclContext *DC) {
+  // typename T
+  auto *T = TemplateTypeParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/1, /*Position=*/0,
+      /*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/false);
+  T->setImplicit(true);
+
+  // T ...Ints
+  TypeSourceInfo *TI =
+      C.getTrivialTypeSourceInfo(QualType(T->getTypeForDecl(), 0));
+  auto *N = NonTypeTemplateParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
+      /*Id=*/nullptr, TI->getType(), /*ParameterPack=*/true, TI);
+  N->setImplicit(true);
+
+  // <typename T, T ...Ints>
+  NamedDecl *P[2] = {T, N};
+  auto *TPL = TemplateParameterList::Create(
+      C, SourceLocation(), SourceLocation(), P, SourceLocation());
+
+  // template <typename T, ...Ints> class IntSeq
+  auto *TemplateTemplateParm = TemplateTemplateParmDecl::Create(
+      C, DC, SourceLocation(), /*Depth=*/0, /*Position=*/0,
+      /*ParameterPack=*/false, /*Id=*/nullptr, TPL);
+  TemplateTemplateParm->setImplicit(true);
+
+  // typename T
+  auto *TemplateTypeParm = TemplateTypeParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/1,
+      /*Id=*/nullptr, /*Typename=*/true, /*ParameterPack=*/false);
+  TemplateTypeParm->setImplicit(true);
+
+  // T N
+  TypeSourceInfo *TInfo = C.getTrivialTypeSourceInfo(
+      QualType(TemplateTypeParm->getTypeForDecl(), 0));
+  auto *NonTypeTemplateParm = NonTypeTemplateParmDecl::Create(
+      C, DC, SourceLocation(), SourceLocation(), /*Depth=*/0, /*Position=*/2,
+      /*Id=*/nullptr, TInfo->getType(), /*ParameterPack=*/false, TInfo);
+  NamedDecl *Params[] = {TemplateTemplateParm, TemplateTypeParm,
+                         NonTypeTemplateParm};
+
+  // template <template <typename T, T ...Ints> class IntSeq, typename T, T N>
+  return TemplateParameterList::Create(C, SourceLocation(), SourceLocation(),
+                                       Params, SourceLocation());
+}
+
+static TemplateParameterList *createBuiltinTemplateParameterList(
+    const ASTContext &C, DeclContext *DC, BuiltinTemplateKind BTK) {
+  switch (BTK) {
+  case BTK__make_integer_seq:
+    return createMakeIntegerSeqParameterList(C, DC);
+  }
+
+  llvm_unreachable("unhandled BuiltinTemplateKind!");
+}
+
+void BuiltinTemplateDecl::anchor() {}
+
+BuiltinTemplateDecl::BuiltinTemplateDecl(const ASTContext &C, DeclContext *DC,
+                                         DeclarationName Name,
+                                         BuiltinTemplateKind BTK)
+    : TemplateDecl(BuiltinTemplate, DC, SourceLocation(), Name,
+                   createBuiltinTemplateParameterList(C, DC, BTK)),
+      BTK(BTK) {}
diff --git a/lib/AST/DeclarationName.cpp b/lib/AST/DeclarationName.cpp
index b7c287720027..b2f27275f49c 100644
--- a/lib/AST/DeclarationName.cpp
+++ b/lib/AST/DeclarationName.cpp
@@ -182,7 +182,7 @@ raw_ostream &operator<<(raw_ostream &OS, DeclarationName N) {
   }
 
   case DeclarationName::CXXLiteralOperatorName:
-    return OS << "operator \"\" " << N.getCXXLiteralIdentifier()->getName();
+    return OS << "operator\"\"" << N.getCXXLiteralIdentifier()->getName();
 
   case DeclarationName::CXXConversionFunctionName: {
     OS << "operator ";
diff --git a/lib/AST/Expr.cpp b/lib/AST/Expr.cpp
index 2e066b2c42c6..bdd7a45f0850 100644
--- a/lib/AST/Expr.cpp
+++ b/lib/AST/Expr.cpp
@@ -331,7 +331,8 @@ DeclRefExpr::DeclRefExpr(const ASTContext &Ctx,
     D(D), Loc(NameInfo.getLoc()), DNLoc(NameInfo.getInfo()) {
   DeclRefExprBits.HasQualifier = QualifierLoc ? 1 : 0;
   if (QualifierLoc) {
-    getInternalQualifierLoc() = QualifierLoc;
+    new (getTrailingObjects<NestedNameSpecifierLoc>())
+        NestedNameSpecifierLoc(QualifierLoc);
     auto *NNS = QualifierLoc.getNestedNameSpecifier();
     if (NNS->isInstantiationDependent())
       ExprBits.InstantiationDependent = true;
@@ -340,7 +341,7 @@ DeclRefExpr::DeclRefExpr(const ASTContext &Ctx,
   }
   DeclRefExprBits.HasFoundDecl = FoundD ? 1 : 0;
   if (FoundD)
-    getInternalFoundDecl() = FoundD;
+    *getTrailingObjects<NamedDecl *>() = FoundD;
   DeclRefExprBits.HasTemplateKWAndArgsInfo
     = (TemplateArgs || TemplateKWLoc.isValid()) ? 1 : 0;
   DeclRefExprBits.RefersToEnclosingVariableOrCapture =
@@ -349,15 +350,15 @@ DeclRefExpr::DeclRefExpr(const ASTContext &Ctx,
     bool Dependent = false;
     bool InstantiationDependent = false;
     bool ContainsUnexpandedParameterPack = false;
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc, *TemplateArgs,
-                                               Dependent,
-                                               InstantiationDependent,
-                                               ContainsUnexpandedParameterPack);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc, *TemplateArgs, getTrailingObjects<TemplateArgumentLoc>(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
     assert(!Dependent && "built a DeclRefExpr with dependent template args");
     ExprBits.InstantiationDependent |= InstantiationDependent;
     ExprBits.ContainsUnexpandedParameterPack |= ContainsUnexpandedParameterPack;
   } else if (TemplateKWLoc.isValid()) {
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc);
   }
   DeclRefExprBits.HadMultipleCandidates = 0;
 
@@ -394,15 +395,13 @@ DeclRefExpr *DeclRefExpr::Create(const ASTContext &Context,
   if (D == FoundD)
     FoundD = nullptr;
 
-  std::size_t Size = sizeof(DeclRefExpr);
-  if (QualifierLoc)
-    Size += sizeof(NestedNameSpecifierLoc);
-  if (FoundD)
-    Size += sizeof(NamedDecl *);
-  if (TemplateArgs)
-    Size += ASTTemplateKWAndArgsInfo::sizeFor(TemplateArgs->size());
-  else if (TemplateKWLoc.isValid())
-    Size += ASTTemplateKWAndArgsInfo::sizeFor(0);
+  bool HasTemplateKWAndArgsInfo = TemplateArgs || TemplateKWLoc.isValid();
+  std::size_t Size =
+      totalSizeToAlloc<NestedNameSpecifierLoc, NamedDecl *,
+                       ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          QualifierLoc ? 1 : 0, FoundD ? 1 : 0,
+          HasTemplateKWAndArgsInfo ? 1 : 0,
+          TemplateArgs ? TemplateArgs->size() : 0);
 
   void *Mem = Context.Allocate(Size, llvm::alignOf<DeclRefExpr>());
   return new (Mem) DeclRefExpr(Context, QualifierLoc, TemplateKWLoc, D,
@@ -415,14 +414,12 @@ DeclRefExpr *DeclRefExpr::CreateEmpty(const ASTContext &Context,
                                       bool HasFoundDecl,
                                       bool HasTemplateKWAndArgsInfo,
                                       unsigned NumTemplateArgs) {
-  std::size_t Size = sizeof(DeclRefExpr);
-  if (HasQualifier)
-    Size += sizeof(NestedNameSpecifierLoc);
-  if (HasFoundDecl)
-    Size += sizeof(NamedDecl *);
-  if (HasTemplateKWAndArgsInfo)
-    Size += ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
-
+  assert(NumTemplateArgs == 0 || HasTemplateKWAndArgsInfo);
+  std::size_t Size =
+      totalSizeToAlloc<NestedNameSpecifierLoc, NamedDecl *,
+                       ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasQualifier ? 1 : 0, HasFoundDecl ? 1 : 0, HasTemplateKWAndArgsInfo,
+          NumTemplateArgs);
   void *Mem = Context.Allocate(Size, llvm::alignOf<DeclRefExpr>());
   return new (Mem) DeclRefExpr(EmptyShell());
 }
@@ -490,7 +487,6 @@ std::string PredefinedExpr::ComputeName(IdentType IT, const Decl *CurrentDecl) {
         else
           MC->mangleName(ND, Out);
 
-        Out.flush();
         if (!Buffer.empty() && Buffer.front() == '\01')
           return Buffer.substr(1);
         return Buffer.str();
@@ -652,7 +648,6 @@ std::string PredefinedExpr::ComputeName(IdentType IT, const Decl *CurrentDecl) {
 
     Out << Proto;
 
-    Out.flush();
     return Name.str().str();
   }
   if (const CapturedDecl *CD = dyn_cast<CapturedDecl>(CurrentDecl)) {
@@ -684,7 +679,6 @@ std::string PredefinedExpr::ComputeName(IdentType IT, const Decl *CurrentDecl) {
     MD->getSelector().print(Out);
     Out <<  ']';
 
-    Out.flush();
     return Name.str().str();
   }
   if (isa<TranslationUnitDecl>(CurrentDecl) && IT == PrettyFunction) {
@@ -1002,15 +996,33 @@ void StringLiteral::setString(const ASTContext &C, StringRef Str,
 /// can have escape sequences in them in addition to the usual trigraph and
 /// escaped newline business.  This routine handles this complexity.
 ///
-SourceLocation StringLiteral::
-getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
-                  const LangOptions &Features, const TargetInfo &Target) const {
+/// The *StartToken sets the first token to be searched in this function and
+/// the *StartTokenByteOffset is the byte offset of the first token. Before
+/// returning, it updates the *StartToken to the TokNo of the token being found
+/// and sets *StartTokenByteOffset to the byte offset of the token in the
+/// string.
+/// Using these two parameters can reduce the time complexity from O(n^2) to
+/// O(n) if one wants to get the location of byte for all the tokens in a
+/// string.
+///
+SourceLocation
+StringLiteral::getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
+                                 const LangOptions &Features,
+                                 const TargetInfo &Target, unsigned *StartToken,
+                                 unsigned *StartTokenByteOffset) const {
   assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
          "Only narrow string literals are currently supported");
 
   // Loop over all of the tokens in this string until we find the one that
   // contains the byte we're looking for.
   unsigned TokNo = 0;
+  unsigned StringOffset = 0;
+  if (StartToken)
+    TokNo = *StartToken;
+  if (StartTokenByteOffset) {
+    StringOffset = *StartTokenByteOffset;
+    ByteNo -= StringOffset;
+  }
   while (1) {
     assert(TokNo < getNumConcatenated() && "Invalid byte number!");
     SourceLocation StrTokLoc = getStrTokenLoc(TokNo);
@@ -1019,14 +1031,20 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
     // the string literal, not the identifier for the macro it is potentially
     // expanded through.
     SourceLocation StrTokSpellingLoc = SM.getSpellingLoc(StrTokLoc);
-    
+
     // Re-lex the token to get its length and original spelling.
-    std::pair<FileID, unsigned> LocInfo =SM.getDecomposedLoc(StrTokSpellingLoc);
+    std::pair<FileID, unsigned> LocInfo =
+        SM.getDecomposedLoc(StrTokSpellingLoc);
     bool Invalid = false;
     StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid);
-    if (Invalid)
+    if (Invalid) {
+      if (StartTokenByteOffset != nullptr)
+        *StartTokenByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
       return StrTokSpellingLoc;
-    
+    }
+
     const char *StrData = Buffer.data()+LocInfo.second;
     
     // Create a lexer starting at the beginning of this token.
@@ -1042,14 +1060,19 @@ getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
     // If the byte is in this token, return the location of the byte.
     if (ByteNo < TokNumBytes ||
         (ByteNo == TokNumBytes && TokNo == getNumConcatenated() - 1)) {
-      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo); 
-      
+      unsigned Offset = SLP.getOffsetOfStringByte(TheTok, ByteNo);
+
       // Now that we know the offset of the token in the spelling, use the
       // preprocessor to get the offset in the original source.
+      if (StartTokenByteOffset != nullptr)
+        *StartTokenByteOffset = StringOffset;
+      if (StartToken != nullptr)
+        *StartToken = TokNo;
       return Lexer::AdvanceToTokenCharacter(StrTokLoc, Offset, SM, Features);
     }
-    
+
     // Move to the next string token.
+    StringOffset += TokNumBytes;
     ++TokNo;
     ByteNo -= TokNumBytes;
   }
@@ -1074,6 +1097,7 @@ StringRef UnaryOperator::getOpcodeStr(Opcode Op) {
   case UO_Real:    return "__real";
   case UO_Imag:    return "__imag";
   case UO_Extension: return "__extension__";
+  case UO_Coawait: return "co_await";
   }
   llvm_unreachable("Unknown unary operator");
 }
@@ -1090,6 +1114,7 @@ UnaryOperator::getOverloadedOpcode(OverloadedOperatorKind OO, bool Postfix) {
   case OO_Minus:      return UO_Minus;
   case OO_Tilde:      return UO_Not;
   case OO_Exclaim:    return UO_LNot;
+  case OO_Coawait:    return UO_Coawait;
   }
 }
 
@@ -1103,6 +1128,7 @@ OverloadedOperatorKind UnaryOperator::getOverloadedOperator(Opcode Opc) {
   case UO_Minus: return OO_Minus;
   case UO_Not: return OO_Tilde;
   case UO_LNot: return OO_Exclaim;
+  case UO_Coawait: return OO_Coawait;
   default: return OO_None;
   }
 }
@@ -1288,9 +1314,8 @@ OffsetOfExpr *OffsetOfExpr::Create(const ASTContext &C, QualType type,
                                    ArrayRef<OffsetOfNode> comps,
                                    ArrayRef<Expr*> exprs,
                                    SourceLocation RParenLoc) {
-  void *Mem = C.Allocate(sizeof(OffsetOfExpr) +
-                         sizeof(OffsetOfNode) * comps.size() +
-                         sizeof(Expr*) * exprs.size());
+  void *Mem = C.Allocate(
+      totalSizeToAlloc<OffsetOfNode, Expr *>(comps.size(), exprs.size()));
 
   return new (Mem) OffsetOfExpr(C, type, OperatorLoc, tsi, comps, exprs,
                                 RParenLoc);
@@ -1298,9 +1323,8 @@ OffsetOfExpr *OffsetOfExpr::Create(const ASTContext &C, QualType type,
 
 OffsetOfExpr *OffsetOfExpr::CreateEmpty(const ASTContext &C,
                                         unsigned numComps, unsigned numExprs) {
-  void *Mem = C.Allocate(sizeof(OffsetOfExpr) +
-                         sizeof(OffsetOfNode) * numComps +
-                         sizeof(Expr*) * numExprs);
+  void *Mem =
+      C.Allocate(totalSizeToAlloc<OffsetOfNode, Expr *>(numComps, numExprs));
   return new (Mem) OffsetOfExpr(numComps, numExprs);
 }
 
@@ -1330,7 +1354,7 @@ OffsetOfExpr::OffsetOfExpr(const ASTContext &C, QualType type,
   }
 }
 
-IdentifierInfo *OffsetOfExpr::OffsetOfNode::getFieldName() const {
+IdentifierInfo *OffsetOfNode::getFieldName() const {
   assert(getKind() == Field || getKind() == Identifier);
   if (getKind() == Field)
     return getField()->getIdentifier();
@@ -1382,18 +1406,17 @@ MemberExpr *MemberExpr::Create(
     ValueDecl *memberdecl, DeclAccessPair founddecl,
     DeclarationNameInfo nameinfo, const TemplateArgumentListInfo *targs,
     QualType ty, ExprValueKind vk, ExprObjectKind ok) {
-  std::size_t Size = sizeof(MemberExpr);
 
   bool hasQualOrFound = (QualifierLoc ||
                          founddecl.getDecl() != memberdecl ||
                          founddecl.getAccess() != memberdecl->getAccess());
-  if (hasQualOrFound)
-    Size += sizeof(MemberNameQualifier);
 
-  if (targs)
-    Size += ASTTemplateKWAndArgsInfo::sizeFor(targs->size());
-  else if (TemplateKWLoc.isValid())
-    Size += ASTTemplateKWAndArgsInfo::sizeFor(0);
+  bool HasTemplateKWAndArgsInfo = targs || TemplateKWLoc.isValid();
+  std::size_t Size =
+      totalSizeToAlloc<MemberExprNameQualifier, ASTTemplateKWAndArgsInfo,
+                       TemplateArgumentLoc>(hasQualOrFound ? 1 : 0,
+                                            HasTemplateKWAndArgsInfo ? 1 : 0,
+                                            targs ? targs->size() : 0);
 
   void *Mem = C.Allocate(Size, llvm::alignOf<MemberExpr>());
   MemberExpr *E = new (Mem)
@@ -1412,7 +1435,8 @@ MemberExpr *MemberExpr::Create(
     
     E->HasQualifierOrFoundDecl = true;
 
-    MemberNameQualifier *NQ = E->getMemberQualifier();
+    MemberExprNameQualifier *NQ =
+        E->getTrailingObjects<MemberExprNameQualifier>();
     NQ->QualifierLoc = QualifierLoc;
     NQ->FoundDecl = founddecl;
   }
@@ -1423,14 +1447,14 @@ MemberExpr *MemberExpr::Create(
     bool Dependent = false;
     bool InstantiationDependent = false;
     bool ContainsUnexpandedParameterPack = false;
-    E->getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc, *targs,
-                                                  Dependent,
-                                                  InstantiationDependent,
-                                             ContainsUnexpandedParameterPack);
+    E->getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc, *targs, E->getTrailingObjects<TemplateArgumentLoc>(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
     if (InstantiationDependent)
       E->setInstantiationDependent(true);
   } else if (TemplateKWLoc.isValid()) {
-    E->getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
+    E->getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc);
   }
 
   return E;
@@ -1719,9 +1743,9 @@ Expr *CastExpr::getSubExprAsWritten() {
 CXXBaseSpecifier **CastExpr::path_buffer() {
   switch (getStmtClass()) {
 #define ABSTRACT_STMT(x)
-#define CASTEXPR(Type, Base) \
-  case Stmt::Type##Class: \
-    return reinterpret_cast<CXXBaseSpecifier**>(static_cast<Type*>(this)+1);
+#define CASTEXPR(Type, Base)                                                   \
+  case Stmt::Type##Class:                                                      \
+    return static_cast<Type *>(this)->getTrailingObjects<CXXBaseSpecifier *>();
 #define STMT(Type, Base)
 #include "clang/AST/StmtNodes.inc"
   default:
@@ -1729,28 +1753,23 @@ CXXBaseSpecifier **CastExpr::path_buffer() {
   }
 }
 
-void CastExpr::setCastPath(const CXXCastPath &Path) {
-  assert(Path.size() == path_size());
-  memcpy(path_buffer(), Path.data(), Path.size() * sizeof(CXXBaseSpecifier*));
-}
-
 ImplicitCastExpr *ImplicitCastExpr::Create(const ASTContext &C, QualType T,
                                            CastKind Kind, Expr *Operand,
                                            const CXXCastPath *BasePath,
                                            ExprValueKind VK) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer =
-    C.Allocate(sizeof(ImplicitCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   ImplicitCastExpr *E =
     new (Buffer) ImplicitCastExpr(T, Kind, Operand, PathSize, VK);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 ImplicitCastExpr *ImplicitCastExpr::CreateEmpty(const ASTContext &C,
                                                 unsigned PathSize) {
-  void *Buffer =
-    C.Allocate(sizeof(ImplicitCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) ImplicitCastExpr(EmptyShell(), PathSize);
 }
 
@@ -1761,18 +1780,18 @@ CStyleCastExpr *CStyleCastExpr::Create(const ASTContext &C, QualType T,
                                        TypeSourceInfo *WrittenTy,
                                        SourceLocation L, SourceLocation R) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer =
-    C.Allocate(sizeof(CStyleCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   CStyleCastExpr *E =
     new (Buffer) CStyleCastExpr(T, VK, K, Op, PathSize, WrittenTy, L, R);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 CStyleCastExpr *CStyleCastExpr::CreateEmpty(const ASTContext &C,
                                             unsigned PathSize) {
-  void *Buffer =
-    C.Allocate(sizeof(CStyleCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) CStyleCastExpr(EmptyShell(), PathSize);
 }
 
@@ -2045,6 +2064,9 @@ bool Expr::isUnusedResultAWarning(const Expr *&WarnE, SourceLocation &Loc,
     case UO_LNot:
     case UO_Deref:
       break;
+    case UO_Coawait:
+      // This is just the 'operator co_await' call inside the guts of a
+      // dependent co_await call.
     case UO_PostInc:
     case UO_PostDec:
     case UO_PreInc:
@@ -2880,7 +2902,10 @@ bool Expr::isConstantInitializer(ASTContext &Ctx, bool IsForRef,
     return cast<CXXDefaultInitExpr>(this)->getExpr()
       ->isConstantInitializer(Ctx, false, Culprit);
   }
-  if (isEvaluatable(Ctx))
+  // Allow certain forms of UB in constant initializers: signed integer
+  // overflow and floating-point division by zero. We'll give a warning on
+  // these, but they're common enough that we have to accept them.
+  if (isEvaluatable(Ctx, SE_AllowUndefinedBehavior))
     return true;
   if (Culprit)
     *Culprit = this;
@@ -2993,6 +3018,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
     return true;
 
   case MSPropertyRefExprClass:
+  case MSPropertySubscriptExprClass:
   case CompoundAssignOperatorClass:
   case VAArgExprClass:
   case AtomicExprClass:
@@ -3000,6 +3026,8 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case CXXNewExprClass:
   case CXXDeleteExprClass:
   case ExprWithCleanupsClass:
+  case CoawaitExprClass:
+  case CoyieldExprClass:
     // These always have a side-effect.
     return true;
 
@@ -3012,6 +3040,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
 
   case ParenExprClass:
   case ArraySubscriptExprClass:
+  case OMPArraySectionExprClass:
   case MemberExprClass:
   case ConditionalOperatorClass:
   case BinaryConditionalOperatorClass:
@@ -3246,9 +3275,20 @@ Expr::isNullPointerConstant(ASTContext &Ctx,
       // Check that it is a cast to void*.
       if (const PointerType *PT = CE->getType()->getAs<PointerType>()) {
         QualType Pointee = PT->getPointeeType();
-        if (!Pointee.hasQualifiers() &&
-            Pointee->isVoidType() &&                              // to void*
-            CE->getSubExpr()->getType()->isIntegerType())         // from int.
+        Qualifiers Q = Pointee.getQualifiers();
+        // In OpenCL v2.0 generic address space acts as a placeholder
+        // and should be ignored.
+        bool IsASValid = true;
+        if (Ctx.getLangOpts().OpenCLVersion >= 200) {
+          if (Pointee.getAddressSpace() == LangAS::opencl_generic)
+            Q.removeAddressSpace();
+          else
+            IsASValid = false;
+        }
+
+        if (IsASValid && !Q.hasQualifiers() &&
+            Pointee->isVoidType() &&                      // to void*
+            CE->getSubExpr()->getType()->isIntegerType()) // from int.
           return CE->getSubExpr()->isNullPointerConstant(Ctx, NPC);
       }
     }
@@ -3429,6 +3469,18 @@ bool Expr::refersToVectorElement() const {
   return false;
 }
 
+bool Expr::refersToGlobalRegisterVar() const {
+  const Expr *E = this->IgnoreParenImpCasts();
+
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E))
+    if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()))
+      if (VD->getStorageClass() == SC_Register &&
+          VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())
+        return true;
+
+  return false;
+}
+
 /// isArrow - Return true if the base expression is a pointer to vector,
 /// return false if the base expression is a vector.
 bool ExtVectorElementExpr::isArrow() const {
@@ -3464,7 +3516,7 @@ bool ExtVectorElementExpr::containsDuplicateElements() const {
 
 /// getEncodedElementAccess - We encode the fields as a llvm ConstantArray.
 void ExtVectorElementExpr::getEncodedElementAccess(
-                                  SmallVectorImpl<unsigned> &Elts) const {
+    SmallVectorImpl<uint32_t> &Elts) const {
   StringRef Comp = Accessor->getName();
   if (Comp[0] == 's' || Comp[0] == 'S')
     Comp = Comp.substr(1);
@@ -3492,285 +3544,6 @@ void ExtVectorElementExpr::getEncodedElementAccess(
   }
 }
 
-ObjCMessageExpr::ObjCMessageExpr(QualType T,
-                                 ExprValueKind VK,
-                                 SourceLocation LBracLoc,
-                                 SourceLocation SuperLoc,
-                                 bool IsInstanceSuper,
-                                 QualType SuperType,
-                                 Selector Sel, 
-                                 ArrayRef<SourceLocation> SelLocs,
-                                 SelectorLocationsKind SelLocsK,
-                                 ObjCMethodDecl *Method,
-                                 ArrayRef<Expr *> Args,
-                                 SourceLocation RBracLoc,
-                                 bool isImplicit)
-  : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary,
-         /*TypeDependent=*/false, /*ValueDependent=*/false,
-         /*InstantiationDependent=*/false,
-         /*ContainsUnexpandedParameterPack=*/false),
-    SelectorOrMethod(reinterpret_cast<uintptr_t>(Method? Method
-                                                       : Sel.getAsOpaquePtr())),
-    Kind(IsInstanceSuper? SuperInstance : SuperClass),
-    HasMethod(Method != nullptr), IsDelegateInitCall(false),
-    IsImplicit(isImplicit), SuperLoc(SuperLoc), LBracLoc(LBracLoc),
-    RBracLoc(RBracLoc)
-{
-  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
-  setReceiverPointer(SuperType.getAsOpaquePtr());
-}
-
-ObjCMessageExpr::ObjCMessageExpr(QualType T,
-                                 ExprValueKind VK,
-                                 SourceLocation LBracLoc,
-                                 TypeSourceInfo *Receiver,
-                                 Selector Sel,
-                                 ArrayRef<SourceLocation> SelLocs,
-                                 SelectorLocationsKind SelLocsK,
-                                 ObjCMethodDecl *Method,
-                                 ArrayRef<Expr *> Args,
-                                 SourceLocation RBracLoc,
-                                 bool isImplicit)
-  : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary, T->isDependentType(),
-         T->isDependentType(), T->isInstantiationDependentType(),
-         T->containsUnexpandedParameterPack()),
-    SelectorOrMethod(reinterpret_cast<uintptr_t>(Method? Method
-                                                       : Sel.getAsOpaquePtr())),
-    Kind(Class),
-    HasMethod(Method != nullptr), IsDelegateInitCall(false),
-    IsImplicit(isImplicit), LBracLoc(LBracLoc), RBracLoc(RBracLoc)
-{
-  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
-  setReceiverPointer(Receiver);
-}
-
-ObjCMessageExpr::ObjCMessageExpr(QualType T,
-                                 ExprValueKind VK,
-                                 SourceLocation LBracLoc,
-                                 Expr *Receiver,
-                                 Selector Sel, 
-                                 ArrayRef<SourceLocation> SelLocs,
-                                 SelectorLocationsKind SelLocsK,
-                                 ObjCMethodDecl *Method,
-                                 ArrayRef<Expr *> Args,
-                                 SourceLocation RBracLoc,
-                                 bool isImplicit)
-  : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary, Receiver->isTypeDependent(),
-         Receiver->isTypeDependent(),
-         Receiver->isInstantiationDependent(),
-         Receiver->containsUnexpandedParameterPack()),
-    SelectorOrMethod(reinterpret_cast<uintptr_t>(Method? Method
-                                                       : Sel.getAsOpaquePtr())),
-    Kind(Instance),
-    HasMethod(Method != nullptr), IsDelegateInitCall(false),
-    IsImplicit(isImplicit), LBracLoc(LBracLoc), RBracLoc(RBracLoc)
-{
-  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
-  setReceiverPointer(Receiver);
-}
-
-void ObjCMessageExpr::initArgsAndSelLocs(ArrayRef<Expr *> Args,
-                                         ArrayRef<SourceLocation> SelLocs,
-                                         SelectorLocationsKind SelLocsK) {
-  setNumArgs(Args.size());
-  Expr **MyArgs = getArgs();
-  for (unsigned I = 0; I != Args.size(); ++I) {
-    if (Args[I]->isTypeDependent())
-      ExprBits.TypeDependent = true;
-    if (Args[I]->isValueDependent())
-      ExprBits.ValueDependent = true;
-    if (Args[I]->isInstantiationDependent())
-      ExprBits.InstantiationDependent = true;
-    if (Args[I]->containsUnexpandedParameterPack())
-      ExprBits.ContainsUnexpandedParameterPack = true;
-  
-    MyArgs[I] = Args[I];
-  }
-
-  SelLocsKind = SelLocsK;
-  if (!isImplicit()) {
-    if (SelLocsK == SelLoc_NonStandard)
-      std::copy(SelLocs.begin(), SelLocs.end(), getStoredSelLocs());
-  }
-}
-
-ObjCMessageExpr *ObjCMessageExpr::Create(const ASTContext &Context, QualType T,
-                                         ExprValueKind VK,
-                                         SourceLocation LBracLoc,
-                                         SourceLocation SuperLoc,
-                                         bool IsInstanceSuper,
-                                         QualType SuperType,
-                                         Selector Sel, 
-                                         ArrayRef<SourceLocation> SelLocs,
-                                         ObjCMethodDecl *Method,
-                                         ArrayRef<Expr *> Args,
-                                         SourceLocation RBracLoc,
-                                         bool isImplicit) {
-  assert((!SelLocs.empty() || isImplicit) &&
-         "No selector locs for non-implicit message");
-  ObjCMessageExpr *Mem;
-  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
-  if (isImplicit)
-    Mem = alloc(Context, Args.size(), 0);
-  else
-    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
-  return new (Mem) ObjCMessageExpr(T, VK, LBracLoc, SuperLoc, IsInstanceSuper,
-                                   SuperType, Sel, SelLocs, SelLocsK,
-                                   Method, Args, RBracLoc, isImplicit);
-}
-
-ObjCMessageExpr *ObjCMessageExpr::Create(const ASTContext &Context, QualType T,
-                                         ExprValueKind VK,
-                                         SourceLocation LBracLoc,
-                                         TypeSourceInfo *Receiver,
-                                         Selector Sel, 
-                                         ArrayRef<SourceLocation> SelLocs,
-                                         ObjCMethodDecl *Method,
-                                         ArrayRef<Expr *> Args,
-                                         SourceLocation RBracLoc,
-                                         bool isImplicit) {
-  assert((!SelLocs.empty() || isImplicit) &&
-         "No selector locs for non-implicit message");
-  ObjCMessageExpr *Mem;
-  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
-  if (isImplicit)
-    Mem = alloc(Context, Args.size(), 0);
-  else
-    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
-  return new (Mem) ObjCMessageExpr(T, VK, LBracLoc, Receiver, Sel,
-                                   SelLocs, SelLocsK, Method, Args, RBracLoc,
-                                   isImplicit);
-}
-
-ObjCMessageExpr *ObjCMessageExpr::Create(const ASTContext &Context, QualType T,
-                                         ExprValueKind VK,
-                                         SourceLocation LBracLoc,
-                                         Expr *Receiver,
-                                         Selector Sel,
-                                         ArrayRef<SourceLocation> SelLocs,
-                                         ObjCMethodDecl *Method,
-                                         ArrayRef<Expr *> Args,
-                                         SourceLocation RBracLoc,
-                                         bool isImplicit) {
-  assert((!SelLocs.empty() || isImplicit) &&
-         "No selector locs for non-implicit message");
-  ObjCMessageExpr *Mem;
-  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
-  if (isImplicit)
-    Mem = alloc(Context, Args.size(), 0);
-  else
-    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
-  return new (Mem) ObjCMessageExpr(T, VK, LBracLoc, Receiver, Sel,
-                                   SelLocs, SelLocsK, Method, Args, RBracLoc,
-                                   isImplicit);
-}
-
-ObjCMessageExpr *ObjCMessageExpr::CreateEmpty(const ASTContext &Context,
-                                              unsigned NumArgs,
-                                              unsigned NumStoredSelLocs) {
-  ObjCMessageExpr *Mem = alloc(Context, NumArgs, NumStoredSelLocs);
-  return new (Mem) ObjCMessageExpr(EmptyShell(), NumArgs);
-}
-
-ObjCMessageExpr *ObjCMessageExpr::alloc(const ASTContext &C,
-                                        ArrayRef<Expr *> Args,
-                                        SourceLocation RBraceLoc,
-                                        ArrayRef<SourceLocation> SelLocs,
-                                        Selector Sel,
-                                        SelectorLocationsKind &SelLocsK) {
-  SelLocsK = hasStandardSelectorLocs(Sel, SelLocs, Args, RBraceLoc);
-  unsigned NumStoredSelLocs = (SelLocsK == SelLoc_NonStandard) ? SelLocs.size()
-                                                               : 0;
-  return alloc(C, Args.size(), NumStoredSelLocs);
-}
-
-ObjCMessageExpr *ObjCMessageExpr::alloc(const ASTContext &C,
-                                        unsigned NumArgs,
-                                        unsigned NumStoredSelLocs) {
-  unsigned Size = sizeof(ObjCMessageExpr) + sizeof(void *) + 
-    NumArgs * sizeof(Expr *) + NumStoredSelLocs * sizeof(SourceLocation);
-  return (ObjCMessageExpr *)C.Allocate(Size,
-                                     llvm::AlignOf<ObjCMessageExpr>::Alignment);
-}
-
-void ObjCMessageExpr::getSelectorLocs(
-                               SmallVectorImpl<SourceLocation> &SelLocs) const {
-  for (unsigned i = 0, e = getNumSelectorLocs(); i != e; ++i)
-    SelLocs.push_back(getSelectorLoc(i));
-}
-
-SourceRange ObjCMessageExpr::getReceiverRange() const {
-  switch (getReceiverKind()) {
-  case Instance:
-    return getInstanceReceiver()->getSourceRange();
-
-  case Class:
-    return getClassReceiverTypeInfo()->getTypeLoc().getSourceRange();
-
-  case SuperInstance:
-  case SuperClass:
-    return getSuperLoc();
-  }
-
-  llvm_unreachable("Invalid ReceiverKind!");
-}
-
-Selector ObjCMessageExpr::getSelector() const {
-  if (HasMethod)
-    return reinterpret_cast<const ObjCMethodDecl *>(SelectorOrMethod)
-                                                               ->getSelector();
-  return Selector(SelectorOrMethod); 
-}
-
-QualType ObjCMessageExpr::getReceiverType() const {
-  switch (getReceiverKind()) {
-  case Instance:
-    return getInstanceReceiver()->getType();
-  case Class:
-    return getClassReceiver();
-  case SuperInstance:
-  case SuperClass:
-    return getSuperType();
-  }
-
-  llvm_unreachable("unexpected receiver kind");
-}
-
-ObjCInterfaceDecl *ObjCMessageExpr::getReceiverInterface() const {
-  QualType T = getReceiverType();
-
-  if (const ObjCObjectPointerType *Ptr = T->getAs<ObjCObjectPointerType>())
-    return Ptr->getInterfaceDecl();
-
-  if (const ObjCObjectType *Ty = T->getAs<ObjCObjectType>())
-    return Ty->getInterface();
-
-  return nullptr;
-}
-
-QualType ObjCPropertyRefExpr::getReceiverType(const ASTContext &ctx) const {
-  if (isClassReceiver())
-    return ctx.getObjCInterfaceType(getClassReceiver());
-
-  if (isSuperReceiver())
-    return getSuperReceiverType();
-
-  return getBase()->getType();
-}
-
-StringRef ObjCBridgedCastExpr::getBridgeKindName() const {
-  switch (getBridgeKind()) {
-  case OBC_Bridge:
-    return "__bridge";
-  case OBC_BridgeTransfer:
-    return "__bridge_transfer";
-  case OBC_BridgeRetained:
-    return "__bridge_retained";
-  }
-
-  llvm_unreachable("Invalid BridgeKind!");
-}
-
 ShuffleVectorExpr::ShuffleVectorExpr(const ASTContext &C, ArrayRef<Expr*> args,
                                      QualType Type, SourceLocation BLoc,
                                      SourceLocation RP) 
@@ -3883,7 +3656,7 @@ DesignatedInitExpr::DesignatedInitExpr(const ASTContext &C, QualType Ty,
   this->Designators = new (C) Designator[NumDesignators];
 
   // Record the initializer itself.
-  child_range Child = children();
+  child_iterator Child = child_begin();
   *Child++ = Init;
 
   // Copy the designators and their subexpressions, computing
@@ -3939,7 +3712,8 @@ DesignatedInitExpr::Create(const ASTContext &C, Designator *Designators,
                            SourceLocation ColonOrEqualLoc,
                            bool UsesColonSyntax, Expr *Init) {
   void *Mem = C.Allocate(sizeof(DesignatedInitExpr) +
-                         sizeof(Stmt *) * (IndexExprs.size() + 1), 8);
+                             sizeof(Stmt *) * (IndexExprs.size() + 1),
+                         llvm::alignOf<DesignatedInitExpr>());
   return new (Mem) DesignatedInitExpr(C, C.VoidTy, NumDesignators, Designators,
                                       ColonOrEqualLoc, UsesColonSyntax,
                                       IndexExprs, Init);
@@ -4153,19 +3927,6 @@ PseudoObjectExpr::PseudoObjectExpr(QualType type, ExprValueKind VK,
   }
 }
 
-//===----------------------------------------------------------------------===//
-//  ExprIterator.
-//===----------------------------------------------------------------------===//
-
-Expr* ExprIterator::operator[](size_t idx) { return cast<Expr>(I[idx]); }
-Expr* ExprIterator::operator*() const { return cast<Expr>(*I); }
-Expr* ExprIterator::operator->() const { return cast<Expr>(*I); }
-const Expr* ConstExprIterator::operator[](size_t idx) const {
-  return cast<Expr>(I[idx]);
-}
-const Expr* ConstExprIterator::operator*() const { return cast<Expr>(*I); }
-const Expr* ConstExprIterator::operator->() const { return cast<Expr>(*I); }
-
 //===----------------------------------------------------------------------===//
 //  Child Iterators for iterating over subexpressions/substatements
 //===----------------------------------------------------------------------===//
@@ -4179,134 +3940,11 @@ Stmt::child_range UnaryExprOrTypeTraitExpr::children() {
     if (const VariableArrayType* T = dyn_cast<VariableArrayType>(
                                    getArgumentType().getTypePtr()))
       return child_range(child_iterator(T), child_iterator());
-    return child_range();
+    return child_range(child_iterator(), child_iterator());
   }
   return child_range(&Argument.Ex, &Argument.Ex + 1);
 }
 
-// ObjCMessageExpr
-Stmt::child_range ObjCMessageExpr::children() {
-  Stmt **begin;
-  if (getReceiverKind() == Instance)
-    begin = reinterpret_cast<Stmt **>(this + 1);
-  else
-    begin = reinterpret_cast<Stmt **>(getArgs());
-  return child_range(begin,
-                     reinterpret_cast<Stmt **>(getArgs() + getNumArgs()));
-}
-
-ObjCArrayLiteral::ObjCArrayLiteral(ArrayRef<Expr *> Elements, 
-                                   QualType T, ObjCMethodDecl *Method,
-                                   SourceRange SR)
-  : Expr(ObjCArrayLiteralClass, T, VK_RValue, OK_Ordinary, 
-         false, false, false, false), 
-    NumElements(Elements.size()), Range(SR), ArrayWithObjectsMethod(Method)
-{
-  Expr **SaveElements = getElements();
-  for (unsigned I = 0, N = Elements.size(); I != N; ++I) {
-    if (Elements[I]->isTypeDependent() || Elements[I]->isValueDependent())
-      ExprBits.ValueDependent = true;
-    if (Elements[I]->isInstantiationDependent())
-      ExprBits.InstantiationDependent = true;
-    if (Elements[I]->containsUnexpandedParameterPack())
-      ExprBits.ContainsUnexpandedParameterPack = true;
-    
-    SaveElements[I] = Elements[I];
-  }
-}
-
-ObjCArrayLiteral *ObjCArrayLiteral::Create(const ASTContext &C,
-                                           ArrayRef<Expr *> Elements,
-                                           QualType T, ObjCMethodDecl * Method,
-                                           SourceRange SR) {
-  void *Mem = C.Allocate(sizeof(ObjCArrayLiteral) 
-                         + Elements.size() * sizeof(Expr *));
-  return new (Mem) ObjCArrayLiteral(Elements, T, Method, SR);
-}
-
-ObjCArrayLiteral *ObjCArrayLiteral::CreateEmpty(const ASTContext &C,
-                                                unsigned NumElements) {
-  
-  void *Mem = C.Allocate(sizeof(ObjCArrayLiteral) 
-                         + NumElements * sizeof(Expr *));
-  return new (Mem) ObjCArrayLiteral(EmptyShell(), NumElements);
-}
-
-ObjCDictionaryLiteral::ObjCDictionaryLiteral(
-                                             ArrayRef<ObjCDictionaryElement> VK, 
-                                             bool HasPackExpansions,
-                                             QualType T, ObjCMethodDecl *method,
-                                             SourceRange SR)
-  : Expr(ObjCDictionaryLiteralClass, T, VK_RValue, OK_Ordinary, false, false,
-         false, false),
-    NumElements(VK.size()), HasPackExpansions(HasPackExpansions), Range(SR), 
-    DictWithObjectsMethod(method)
-{
-  KeyValuePair *KeyValues = getKeyValues();
-  ExpansionData *Expansions = getExpansionData();
-  for (unsigned I = 0; I < NumElements; I++) {
-    if (VK[I].Key->isTypeDependent() || VK[I].Key->isValueDependent() ||
-        VK[I].Value->isTypeDependent() || VK[I].Value->isValueDependent())
-      ExprBits.ValueDependent = true;
-    if (VK[I].Key->isInstantiationDependent() ||
-        VK[I].Value->isInstantiationDependent())
-      ExprBits.InstantiationDependent = true;
-    if (VK[I].EllipsisLoc.isInvalid() &&
-        (VK[I].Key->containsUnexpandedParameterPack() ||
-         VK[I].Value->containsUnexpandedParameterPack()))
-      ExprBits.ContainsUnexpandedParameterPack = true;
-
-    KeyValues[I].Key = VK[I].Key;
-    KeyValues[I].Value = VK[I].Value; 
-    if (Expansions) {
-      Expansions[I].EllipsisLoc = VK[I].EllipsisLoc;
-      if (VK[I].NumExpansions)
-        Expansions[I].NumExpansionsPlusOne = *VK[I].NumExpansions + 1;
-      else
-        Expansions[I].NumExpansionsPlusOne = 0;
-    }
-  }
-}
-
-ObjCDictionaryLiteral *
-ObjCDictionaryLiteral::Create(const ASTContext &C,
-                              ArrayRef<ObjCDictionaryElement> VK, 
-                              bool HasPackExpansions,
-                              QualType T, ObjCMethodDecl *method,
-                              SourceRange SR) {
-  unsigned ExpansionsSize = 0;
-  if (HasPackExpansions)
-    ExpansionsSize = sizeof(ExpansionData) * VK.size();
-    
-  void *Mem = C.Allocate(sizeof(ObjCDictionaryLiteral) + 
-                         sizeof(KeyValuePair) * VK.size() + ExpansionsSize);
-  return new (Mem) ObjCDictionaryLiteral(VK, HasPackExpansions, T, method, SR);
-}
-
-ObjCDictionaryLiteral *
-ObjCDictionaryLiteral::CreateEmpty(const ASTContext &C, unsigned NumElements,
-                                   bool HasPackExpansions) {
-  unsigned ExpansionsSize = 0;
-  if (HasPackExpansions)
-    ExpansionsSize = sizeof(ExpansionData) * NumElements;
-  void *Mem = C.Allocate(sizeof(ObjCDictionaryLiteral) + 
-                         sizeof(KeyValuePair) * NumElements + ExpansionsSize);
-  return new (Mem) ObjCDictionaryLiteral(EmptyShell(), NumElements, 
-                                         HasPackExpansions);
-}
-
-ObjCSubscriptRefExpr *ObjCSubscriptRefExpr::Create(const ASTContext &C,
-                                                   Expr *base,
-                                                   Expr *key, QualType T, 
-                                                   ObjCMethodDecl *getMethod,
-                                                   ObjCMethodDecl *setMethod, 
-                                                   SourceLocation RB) {
-  void *Mem = C.Allocate(sizeof(ObjCSubscriptRefExpr));
-  return new (Mem) ObjCSubscriptRefExpr(base, key, T, VK_LValue, 
-                                        OK_ObjCSubscript,
-                                        getMethod, setMethod, RB);
-}
-
 AtomicExpr::AtomicExpr(SourceLocation BLoc, ArrayRef<Expr*> args,
                        QualType t, AtomicOp op, SourceLocation RP)
   : Expr(AtomicExprClass, t, VK_RValue, OK_Ordinary,
@@ -4373,3 +4011,29 @@ unsigned AtomicExpr::getNumSubExprs(AtomicOp Op) {
   }
   llvm_unreachable("unknown atomic op");
 }
+
+QualType OMPArraySectionExpr::getBaseOriginalType(Expr *Base) {
+  unsigned ArraySectionCount = 0;
+  while (auto *OASE = dyn_cast<OMPArraySectionExpr>(Base->IgnoreParens())) {
+    Base = OASE->getBase();
+    ++ArraySectionCount;
+  }
+  while (auto *ASE = dyn_cast<ArraySubscriptExpr>(Base->IgnoreParens())) {
+    Base = ASE->getBase();
+    ++ArraySectionCount;
+  }
+  auto OriginalTy = Base->getType();
+  if (auto *DRE = dyn_cast<DeclRefExpr>(Base))
+    if (auto *PVD = dyn_cast<ParmVarDecl>(DRE->getDecl()))
+      OriginalTy = PVD->getOriginalType().getNonReferenceType();
+
+  for (unsigned Cnt = 0; Cnt < ArraySectionCount; ++Cnt) {
+    if (OriginalTy->isAnyPointerType())
+      OriginalTy = OriginalTy->getPointeeType();
+    else {
+      assert (OriginalTy->isArrayType());
+      OriginalTy = OriginalTy->castAsArrayTypeUnsafe()->getElementType();
+    }
+  }
+  return OriginalTy;
+}
diff --git a/lib/AST/ExprCXX.cpp b/lib/AST/ExprCXX.cpp
index d6f2ce63a0a5..4bb4b5073c4f 100644
--- a/lib/AST/ExprCXX.cpp
+++ b/lib/AST/ExprCXX.cpp
@@ -295,8 +295,11 @@ UnresolvedLookupExpr::Create(const ASTContext &C,
 {
   assert(Args || TemplateKWLoc.isValid());
   unsigned num_args = Args ? Args->size() : 0;
-  void *Mem = C.Allocate(sizeof(UnresolvedLookupExpr) +
-                         ASTTemplateKWAndArgsInfo::sizeFor(num_args));
+
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(1,
+                                                                      num_args);
+  void *Mem = C.Allocate(Size, llvm::alignOf<UnresolvedLookupExpr>());
   return new (Mem) UnresolvedLookupExpr(C, NamingClass, QualifierLoc,
                                         TemplateKWLoc, NameInfo,
                                         ADL, /*Overload*/ true, Args,
@@ -307,11 +310,11 @@ UnresolvedLookupExpr *
 UnresolvedLookupExpr::CreateEmpty(const ASTContext &C,
                                   bool HasTemplateKWAndArgsInfo,
                                   unsigned NumTemplateArgs) {
-  std::size_t size = sizeof(UnresolvedLookupExpr);
-  if (HasTemplateKWAndArgsInfo)
-    size += ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
-
-  void *Mem = C.Allocate(size, llvm::alignOf<UnresolvedLookupExpr>());
+  assert(NumTemplateArgs == 0 || HasTemplateKWAndArgsInfo);
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, NumTemplateArgs);
+  void *Mem = C.Allocate(Size, llvm::alignOf<UnresolvedLookupExpr>());
   UnresolvedLookupExpr *E = new (Mem) UnresolvedLookupExpr(EmptyShell());
   E->HasTemplateKWAndArgsInfo = HasTemplateKWAndArgsInfo;
   return E;
@@ -367,10 +370,9 @@ OverloadExpr::OverloadExpr(StmtClass K, const ASTContext &C,
     bool Dependent = false;
     bool InstantiationDependent = false;
     bool ContainsUnexpandedParameterPack = false;
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc, *TemplateArgs,
-                                               Dependent,
-                                               InstantiationDependent,
-                                               ContainsUnexpandedParameterPack);
+    getTrailingASTTemplateKWAndArgsInfo()->initializeFrom(
+        TemplateKWLoc, *TemplateArgs, getTrailingTemplateArgumentLoc(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
 
     if (Dependent) {
       ExprBits.TypeDependent = true;
@@ -381,7 +383,7 @@ OverloadExpr::OverloadExpr(StmtClass K, const ASTContext &C,
     if (ContainsUnexpandedParameterPack)
       ExprBits.ContainsUnexpandedParameterPack = true;
   } else if (TemplateKWLoc.isValid()) {
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
+    getTrailingASTTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
   }
 
   if (isTypeDependent())
@@ -432,13 +434,13 @@ DependentScopeDeclRefExpr::DependentScopeDeclRefExpr(QualType T,
     bool InstantiationDependent = true;
     bool ContainsUnexpandedParameterPack
       = ExprBits.ContainsUnexpandedParameterPack;
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc, *Args,
-                                               Dependent,
-                                               InstantiationDependent,
-                                               ContainsUnexpandedParameterPack);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc, *Args, getTrailingObjects<TemplateArgumentLoc>(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
     ExprBits.ContainsUnexpandedParameterPack = ContainsUnexpandedParameterPack;
   } else if (TemplateKWLoc.isValid()) {
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc);
   }
 }
 
@@ -449,12 +451,11 @@ DependentScopeDeclRefExpr::Create(const ASTContext &C,
                                   const DeclarationNameInfo &NameInfo,
                                   const TemplateArgumentListInfo *Args) {
   assert(QualifierLoc && "should be created for dependent qualifiers");
-  std::size_t size = sizeof(DependentScopeDeclRefExpr);
-  if (Args)
-    size += ASTTemplateKWAndArgsInfo::sizeFor(Args->size());
-  else if (TemplateKWLoc.isValid())
-    size += ASTTemplateKWAndArgsInfo::sizeFor(0);
-  void *Mem = C.Allocate(size);
+  bool HasTemplateKWAndArgsInfo = Args || TemplateKWLoc.isValid();
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, Args ? Args->size() : 0);
+  void *Mem = C.Allocate(Size);
   return new (Mem) DependentScopeDeclRefExpr(C.DependentTy, QualifierLoc,
                                              TemplateKWLoc, NameInfo, Args);
 }
@@ -463,10 +464,11 @@ DependentScopeDeclRefExpr *
 DependentScopeDeclRefExpr::CreateEmpty(const ASTContext &C,
                                        bool HasTemplateKWAndArgsInfo,
                                        unsigned NumTemplateArgs) {
-  std::size_t size = sizeof(DependentScopeDeclRefExpr);
-  if (HasTemplateKWAndArgsInfo)
-    size += ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
-  void *Mem = C.Allocate(size);
+  assert(NumTemplateArgs == 0 || HasTemplateKWAndArgsInfo);
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, NumTemplateArgs);
+  void *Mem = C.Allocate(Size);
   DependentScopeDeclRefExpr *E
     = new (Mem) DependentScopeDeclRefExpr(QualType(), NestedNameSpecifierLoc(),
                                           SourceLocation(),
@@ -587,19 +589,19 @@ CXXStaticCastExpr *CXXStaticCastExpr::Create(const ASTContext &C, QualType T,
                                              SourceLocation RParenLoc,
                                              SourceRange AngleBrackets) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer = C.Allocate(sizeof(CXXStaticCastExpr)
-                            + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   CXXStaticCastExpr *E =
     new (Buffer) CXXStaticCastExpr(T, VK, K, Op, PathSize, WrittenTy, L,
                                    RParenLoc, AngleBrackets);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 CXXStaticCastExpr *CXXStaticCastExpr::CreateEmpty(const ASTContext &C,
                                                   unsigned PathSize) {
-  void *Buffer =
-    C.Allocate(sizeof(CXXStaticCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) CXXStaticCastExpr(EmptyShell(), PathSize);
 }
 
@@ -612,19 +614,19 @@ CXXDynamicCastExpr *CXXDynamicCastExpr::Create(const ASTContext &C, QualType T,
                                                SourceLocation RParenLoc,
                                                SourceRange AngleBrackets) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer = C.Allocate(sizeof(CXXDynamicCastExpr)
-                            + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   CXXDynamicCastExpr *E =
     new (Buffer) CXXDynamicCastExpr(T, VK, K, Op, PathSize, WrittenTy, L,
                                     RParenLoc, AngleBrackets);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 CXXDynamicCastExpr *CXXDynamicCastExpr::CreateEmpty(const ASTContext &C,
                                                     unsigned PathSize) {
-  void *Buffer =
-    C.Allocate(sizeof(CXXDynamicCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) CXXDynamicCastExpr(EmptyShell(), PathSize);
 }
 
@@ -669,19 +671,19 @@ CXXReinterpretCastExpr::Create(const ASTContext &C, QualType T,
                                SourceLocation RParenLoc,
                                SourceRange AngleBrackets) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer =
-    C.Allocate(sizeof(CXXReinterpretCastExpr) + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   CXXReinterpretCastExpr *E =
     new (Buffer) CXXReinterpretCastExpr(T, VK, K, Op, PathSize, WrittenTy, L,
                                         RParenLoc, AngleBrackets);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 CXXReinterpretCastExpr *
 CXXReinterpretCastExpr::CreateEmpty(const ASTContext &C, unsigned PathSize) {
-  void *Buffer = C.Allocate(sizeof(CXXReinterpretCastExpr)
-                            + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) CXXReinterpretCastExpr(EmptyShell(), PathSize);
 }
 
@@ -704,18 +706,18 @@ CXXFunctionalCastExpr::Create(const ASTContext &C, QualType T, ExprValueKind VK,
                               const CXXCastPath *BasePath,
                               SourceLocation L, SourceLocation R) {
   unsigned PathSize = (BasePath ? BasePath->size() : 0);
-  void *Buffer = C.Allocate(sizeof(CXXFunctionalCastExpr)
-                            + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   CXXFunctionalCastExpr *E =
     new (Buffer) CXXFunctionalCastExpr(T, VK, Written, K, Op, PathSize, L, R);
-  if (PathSize) E->setCastPath(*BasePath);
+  if (PathSize)
+    std::uninitialized_copy_n(BasePath->data(), BasePath->size(),
+                              E->getTrailingObjects<CXXBaseSpecifier *>());
   return E;
 }
 
 CXXFunctionalCastExpr *
 CXXFunctionalCastExpr::CreateEmpty(const ASTContext &C, unsigned PathSize) {
-  void *Buffer = C.Allocate(sizeof(CXXFunctionalCastExpr)
-                            + PathSize * sizeof(CXXBaseSpecifier*));
+  void *Buffer = C.Allocate(totalSizeToAlloc<CXXBaseSpecifier *>(PathSize));
   return new (Buffer) CXXFunctionalCastExpr(EmptyShell(), PathSize);
 }
 
@@ -1070,15 +1072,15 @@ LambdaExpr::capture_range LambdaExpr::implicit_captures() const {
   return capture_range(implicit_capture_begin(), implicit_capture_end());
 }
 
-ArrayRef<VarDecl *> 
-LambdaExpr::getCaptureInitIndexVars(capture_init_iterator Iter) const {
+ArrayRef<VarDecl *>
+LambdaExpr::getCaptureInitIndexVars(const_capture_init_iterator Iter) const {
   assert(HasArrayIndexVars && "No array index-var data?");
   
   unsigned Index = Iter - capture_init_begin();
   assert(Index < getLambdaClass()->getLambdaData().NumCaptures &&
          "Capture index out-of-range");
-  VarDecl **IndexVars = getArrayIndexVars();
-  unsigned *IndexStarts = getArrayIndexStarts();
+  VarDecl *const *IndexVars = getArrayIndexVars();
+  const unsigned *IndexStarts = getArrayIndexStarts();
   return llvm::makeArrayRef(IndexVars + IndexStarts[Index],
                             IndexVars + IndexStarts[Index + 1]);
 }
@@ -1099,9 +1101,13 @@ TemplateParameterList *LambdaExpr::getTemplateParameterList() const {
 }
 
 CompoundStmt *LambdaExpr::getBody() const {
+  // FIXME: this mutation in getBody is bogus. It should be
+  // initialized in ASTStmtReader::VisitLambdaExpr, but for reasons I
+  // don't understand, that doesn't work.
   if (!getStoredStmts()[NumCaptures])
-    getStoredStmts()[NumCaptures] = getCallOperator()->getBody();
-    
+    *const_cast<clang::Stmt **>(&getStoredStmts()[NumCaptures]) =
+        getCallOperator()->getBody();
+
   return reinterpret_cast<CompoundStmt *>(getStoredStmts()[NumCaptures]);
 }
 
@@ -1191,63 +1197,40 @@ SourceLocation CXXUnresolvedConstructExpr::getLocStart() const {
   return Type->getTypeLoc().getBeginLoc();
 }
 
-CXXDependentScopeMemberExpr::CXXDependentScopeMemberExpr(const ASTContext &C,
-                                                 Expr *Base, QualType BaseType,
-                                                 bool IsArrow,
-                                                 SourceLocation OperatorLoc,
-                                          NestedNameSpecifierLoc QualifierLoc,
-                                          SourceLocation TemplateKWLoc,
-                                          NamedDecl *FirstQualifierFoundInScope,
-                                          DeclarationNameInfo MemberNameInfo,
-                                   const TemplateArgumentListInfo *TemplateArgs)
-  : Expr(CXXDependentScopeMemberExprClass, C.DependentTy,
-         VK_LValue, OK_Ordinary, true, true, true,
-         ((Base && Base->containsUnexpandedParameterPack()) ||
-          (QualifierLoc && 
-           QualifierLoc.getNestedNameSpecifier()
-                                       ->containsUnexpandedParameterPack()) ||
-          MemberNameInfo.containsUnexpandedParameterPack())),
-    Base(Base), BaseType(BaseType), IsArrow(IsArrow),
-    HasTemplateKWAndArgsInfo(TemplateArgs != nullptr ||
-                             TemplateKWLoc.isValid()),
-    OperatorLoc(OperatorLoc), QualifierLoc(QualifierLoc), 
-    FirstQualifierFoundInScope(FirstQualifierFoundInScope),
-    MemberNameInfo(MemberNameInfo) {
+CXXDependentScopeMemberExpr::CXXDependentScopeMemberExpr(
+    const ASTContext &C, Expr *Base, QualType BaseType, bool IsArrow,
+    SourceLocation OperatorLoc, NestedNameSpecifierLoc QualifierLoc,
+    SourceLocation TemplateKWLoc, NamedDecl *FirstQualifierFoundInScope,
+    DeclarationNameInfo MemberNameInfo,
+    const TemplateArgumentListInfo *TemplateArgs)
+    : Expr(CXXDependentScopeMemberExprClass, C.DependentTy, VK_LValue,
+           OK_Ordinary, true, true, true,
+           ((Base && Base->containsUnexpandedParameterPack()) ||
+            (QualifierLoc &&
+             QualifierLoc.getNestedNameSpecifier()
+                 ->containsUnexpandedParameterPack()) ||
+            MemberNameInfo.containsUnexpandedParameterPack())),
+      Base(Base), BaseType(BaseType), IsArrow(IsArrow),
+      HasTemplateKWAndArgsInfo(TemplateArgs != nullptr ||
+                               TemplateKWLoc.isValid()),
+      OperatorLoc(OperatorLoc), QualifierLoc(QualifierLoc),
+      FirstQualifierFoundInScope(FirstQualifierFoundInScope),
+      MemberNameInfo(MemberNameInfo) {
   if (TemplateArgs) {
     bool Dependent = true;
     bool InstantiationDependent = true;
     bool ContainsUnexpandedParameterPack = false;
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc, *TemplateArgs,
-                                               Dependent,
-                                               InstantiationDependent,
-                                               ContainsUnexpandedParameterPack);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc, *TemplateArgs, getTrailingObjects<TemplateArgumentLoc>(),
+        Dependent, InstantiationDependent, ContainsUnexpandedParameterPack);
     if (ContainsUnexpandedParameterPack)
       ExprBits.ContainsUnexpandedParameterPack = true;
   } else if (TemplateKWLoc.isValid()) {
-    getTemplateKWAndArgsInfo()->initializeFrom(TemplateKWLoc);
+    getTrailingObjects<ASTTemplateKWAndArgsInfo>()->initializeFrom(
+        TemplateKWLoc);
   }
 }
 
-CXXDependentScopeMemberExpr::CXXDependentScopeMemberExpr(const ASTContext &C,
-                          Expr *Base, QualType BaseType,
-                          bool IsArrow,
-                          SourceLocation OperatorLoc,
-                          NestedNameSpecifierLoc QualifierLoc,
-                          NamedDecl *FirstQualifierFoundInScope,
-                          DeclarationNameInfo MemberNameInfo)
-  : Expr(CXXDependentScopeMemberExprClass, C.DependentTy,
-         VK_LValue, OK_Ordinary, true, true, true,
-         ((Base && Base->containsUnexpandedParameterPack()) ||
-          (QualifierLoc && 
-           QualifierLoc.getNestedNameSpecifier()->
-                                         containsUnexpandedParameterPack()) ||
-          MemberNameInfo.containsUnexpandedParameterPack())),
-    Base(Base), BaseType(BaseType), IsArrow(IsArrow),
-    HasTemplateKWAndArgsInfo(false),
-    OperatorLoc(OperatorLoc), QualifierLoc(QualifierLoc),
-    FirstQualifierFoundInScope(FirstQualifierFoundInScope),
-    MemberNameInfo(MemberNameInfo) { }
-
 CXXDependentScopeMemberExpr *
 CXXDependentScopeMemberExpr::Create(const ASTContext &C,
                                 Expr *Base, QualType BaseType, bool IsArrow,
@@ -1257,18 +1240,13 @@ CXXDependentScopeMemberExpr::Create(const ASTContext &C,
                                 NamedDecl *FirstQualifierFoundInScope,
                                 DeclarationNameInfo MemberNameInfo,
                                 const TemplateArgumentListInfo *TemplateArgs) {
-  if (!TemplateArgs && !TemplateKWLoc.isValid())
-    return new (C) CXXDependentScopeMemberExpr(C, Base, BaseType,
-                                               IsArrow, OperatorLoc,
-                                               QualifierLoc,
-                                               FirstQualifierFoundInScope,
-                                               MemberNameInfo);
-
+  bool HasTemplateKWAndArgsInfo = TemplateArgs || TemplateKWLoc.isValid();
   unsigned NumTemplateArgs = TemplateArgs ? TemplateArgs->size() : 0;
-  std::size_t size = sizeof(CXXDependentScopeMemberExpr)
-    + ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, NumTemplateArgs);
 
-  void *Mem = C.Allocate(size, llvm::alignOf<CXXDependentScopeMemberExpr>());
+  void *Mem = C.Allocate(Size, llvm::alignOf<CXXDependentScopeMemberExpr>());
   return new (Mem) CXXDependentScopeMemberExpr(C, Base, BaseType,
                                                IsArrow, OperatorLoc,
                                                QualifierLoc,
@@ -1281,22 +1259,18 @@ CXXDependentScopeMemberExpr *
 CXXDependentScopeMemberExpr::CreateEmpty(const ASTContext &C,
                                          bool HasTemplateKWAndArgsInfo,
                                          unsigned NumTemplateArgs) {
-  if (!HasTemplateKWAndArgsInfo)
-    return new (C) CXXDependentScopeMemberExpr(C, nullptr, QualType(),
-                                               0, SourceLocation(),
-                                               NestedNameSpecifierLoc(),
-                                               nullptr, DeclarationNameInfo());
-
-  std::size_t size = sizeof(CXXDependentScopeMemberExpr) +
-                     ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
-  void *Mem = C.Allocate(size, llvm::alignOf<CXXDependentScopeMemberExpr>());
+  assert(NumTemplateArgs == 0 || HasTemplateKWAndArgsInfo);
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, NumTemplateArgs);
+  void *Mem = C.Allocate(Size, llvm::alignOf<CXXDependentScopeMemberExpr>());
   CXXDependentScopeMemberExpr *E
     =  new (Mem) CXXDependentScopeMemberExpr(C, nullptr, QualType(),
                                              0, SourceLocation(),
                                              NestedNameSpecifierLoc(),
                                              SourceLocation(), nullptr,
                                              DeclarationNameInfo(), nullptr);
-  E->HasTemplateKWAndArgsInfo = true;
+  E->HasTemplateKWAndArgsInfo = HasTemplateKWAndArgsInfo;
   return E;
 }
 
@@ -1361,38 +1335,34 @@ bool UnresolvedMemberExpr::isImplicitAccess() const {
   return cast<Expr>(Base)->isImplicitCXXThis();
 }
 
-UnresolvedMemberExpr *
-UnresolvedMemberExpr::Create(const ASTContext &C, bool HasUnresolvedUsing,
-                             Expr *Base, QualType BaseType, bool IsArrow,
-                             SourceLocation OperatorLoc,
-                             NestedNameSpecifierLoc QualifierLoc,
-                             SourceLocation TemplateKWLoc,
-                             const DeclarationNameInfo &MemberNameInfo,
-                             const TemplateArgumentListInfo *TemplateArgs,
-                             UnresolvedSetIterator Begin, 
-                             UnresolvedSetIterator End) {
-  std::size_t size = sizeof(UnresolvedMemberExpr);
-  if (TemplateArgs)
-    size += ASTTemplateKWAndArgsInfo::sizeFor(TemplateArgs->size());
-  else if (TemplateKWLoc.isValid())
-    size += ASTTemplateKWAndArgsInfo::sizeFor(0);
+UnresolvedMemberExpr *UnresolvedMemberExpr::Create(
+    const ASTContext &C, bool HasUnresolvedUsing, Expr *Base, QualType BaseType,
+    bool IsArrow, SourceLocation OperatorLoc,
+    NestedNameSpecifierLoc QualifierLoc, SourceLocation TemplateKWLoc,
+    const DeclarationNameInfo &MemberNameInfo,
+    const TemplateArgumentListInfo *TemplateArgs, UnresolvedSetIterator Begin,
+    UnresolvedSetIterator End) {
+  bool HasTemplateKWAndArgsInfo = TemplateArgs || TemplateKWLoc.isValid();
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, TemplateArgs ? TemplateArgs->size() : 0);
 
-  void *Mem = C.Allocate(size, llvm::alignOf<UnresolvedMemberExpr>());
-  return new (Mem) UnresolvedMemberExpr(C, 
-                             HasUnresolvedUsing, Base, BaseType,
-                             IsArrow, OperatorLoc, QualifierLoc, TemplateKWLoc,
-                             MemberNameInfo, TemplateArgs, Begin, End);
+  void *Mem = C.Allocate(Size, llvm::alignOf<UnresolvedMemberExpr>());
+  return new (Mem) UnresolvedMemberExpr(
+      C, HasUnresolvedUsing, Base, BaseType, IsArrow, OperatorLoc, QualifierLoc,
+      TemplateKWLoc, MemberNameInfo, TemplateArgs, Begin, End);
 }
 
 UnresolvedMemberExpr *
 UnresolvedMemberExpr::CreateEmpty(const ASTContext &C,
                                   bool HasTemplateKWAndArgsInfo,
                                   unsigned NumTemplateArgs) {
-  std::size_t size = sizeof(UnresolvedMemberExpr);
-  if (HasTemplateKWAndArgsInfo)
-    size += ASTTemplateKWAndArgsInfo::sizeFor(NumTemplateArgs);
+  assert(NumTemplateArgs == 0 || HasTemplateKWAndArgsInfo);
+  std::size_t Size =
+      totalSizeToAlloc<ASTTemplateKWAndArgsInfo, TemplateArgumentLoc>(
+          HasTemplateKWAndArgsInfo, NumTemplateArgs);
 
-  void *Mem = C.Allocate(size, llvm::alignOf<UnresolvedMemberExpr>());
+  void *Mem = C.Allocate(Size, llvm::alignOf<UnresolvedMemberExpr>());
   UnresolvedMemberExpr *E = new (Mem) UnresolvedMemberExpr(EmptyShell());
   E->HasTemplateKWAndArgsInfo = HasTemplateKWAndArgsInfo;
   return E;
@@ -1428,6 +1398,25 @@ CXXRecordDecl *UnresolvedMemberExpr::getNamingClass() const {
   return Record;
 }
 
+SizeOfPackExpr *
+SizeOfPackExpr::Create(ASTContext &Context, SourceLocation OperatorLoc,
+                       NamedDecl *Pack, SourceLocation PackLoc,
+                       SourceLocation RParenLoc,
+                       Optional<unsigned> Length,
+                       ArrayRef<TemplateArgument> PartialArgs) {
+  void *Storage = Context.Allocate(
+      sizeof(SizeOfPackExpr) + sizeof(TemplateArgument) * PartialArgs.size());
+  return new (Storage) SizeOfPackExpr(Context.getSizeType(), OperatorLoc, Pack,
+                                      PackLoc, RParenLoc, Length, PartialArgs);
+}
+
+SizeOfPackExpr *SizeOfPackExpr::CreateDeserialized(ASTContext &Context,
+                                                   unsigned NumPartialArgs) {
+  void *Storage = Context.Allocate(
+      sizeof(SizeOfPackExpr) + sizeof(TemplateArgument) * NumPartialArgs);
+  return new (Storage) SizeOfPackExpr(EmptyShell(), NumPartialArgs);
+}
+
 SubstNonTypeTemplateParmPackExpr::
 SubstNonTypeTemplateParmPackExpr(QualType T, 
                                  NonTypeTemplateParmDecl *Param,
@@ -1439,25 +1428,25 @@ SubstNonTypeTemplateParmPackExpr(QualType T,
     NumArguments(ArgPack.pack_size()), NameLoc(NameLoc) { }
 
 TemplateArgument SubstNonTypeTemplateParmPackExpr::getArgumentPack() const {
-  return TemplateArgument(Arguments, NumArguments);
+  return TemplateArgument(llvm::makeArrayRef(Arguments, NumArguments));
 }
 
 FunctionParmPackExpr::FunctionParmPackExpr(QualType T, ParmVarDecl *ParamPack,
                                            SourceLocation NameLoc,
                                            unsigned NumParams,
-                                           Decl * const *Params)
-  : Expr(FunctionParmPackExprClass, T, VK_LValue, OK_Ordinary,
-         true, true, true, true),
-    ParamPack(ParamPack), NameLoc(NameLoc), NumParameters(NumParams) {
+                                           ParmVarDecl *const *Params)
+    : Expr(FunctionParmPackExprClass, T, VK_LValue, OK_Ordinary, true, true,
+           true, true),
+      ParamPack(ParamPack), NameLoc(NameLoc), NumParameters(NumParams) {
   if (Params)
     std::uninitialized_copy(Params, Params + NumParams,
-                            reinterpret_cast<Decl**>(this+1));
+                            reinterpret_cast<ParmVarDecl **>(this + 1));
 }
 
 FunctionParmPackExpr *
 FunctionParmPackExpr::Create(const ASTContext &Context, QualType T,
                              ParmVarDecl *ParamPack, SourceLocation NameLoc,
-                             ArrayRef<Decl *> Params) {
+                             ArrayRef<ParmVarDecl *> Params) {
   return new (Context.Allocate(sizeof(FunctionParmPackExpr) +
                                sizeof(ParmVarDecl*) * Params.size()))
     FunctionParmPackExpr(T, ParamPack, NameLoc, Params.size(), Params.data());
diff --git a/lib/AST/ExprClassification.cpp b/lib/AST/ExprClassification.cpp
index 9cc612eae9b7..a47b03c0afba 100644
--- a/lib/AST/ExprClassification.cpp
+++ b/lib/AST/ExprClassification.cpp
@@ -136,6 +136,8 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::ObjCIvarRefExprClass:
   case Expr::FunctionParmPackExprClass:
   case Expr::MSPropertyRefExprClass:
+  case Expr::MSPropertySubscriptExprClass:
+  case Expr::OMPArraySectionExprClass:
     return Cl::CL_LValue;
 
     // C99 6.5.2.5p5 says that compound literals are lvalues.
@@ -185,6 +187,7 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
   case Expr::CXXFoldExprClass:
   case Expr::NoInitExprClass:
   case Expr::DesignatedInitUpdateExprClass:
+  case Expr::CoyieldExprClass:
     return Cl::CL_PRValue;
 
     // Next come the complicated cases.
@@ -396,6 +399,9 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) {
     assert(cast<InitListExpr>(E)->getNumInits() == 1 &&
            "Only 1-element init lists can be glvalues.");
     return ClassifyInternal(Ctx, cast<InitListExpr>(E)->getInit(0));
+
+  case Expr::CoawaitExprClass:
+    return ClassifyInternal(Ctx, cast<CoawaitExpr>(E)->getResumeExpr());
   }
 
   llvm_unreachable("unhandled expression kind in classification");
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index ed749cc56f1e..c4c4398c3622 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -114,7 +114,8 @@ namespace {
   static
   unsigned findMostDerivedSubobject(ASTContext &Ctx, QualType Base,
                                     ArrayRef<APValue::LValuePathEntry> Path,
-                                    uint64_t &ArraySize, QualType &Type) {
+                                    uint64_t &ArraySize, QualType &Type,
+                                    bool &IsArray) {
     unsigned MostDerivedLength = 0;
     Type = Base;
     for (unsigned I = 0, N = Path.size(); I != N; ++I) {
@@ -124,18 +125,22 @@ namespace {
         Type = CAT->getElementType();
         ArraySize = CAT->getSize().getZExtValue();
         MostDerivedLength = I + 1;
+        IsArray = true;
       } else if (Type->isAnyComplexType()) {
         const ComplexType *CT = Type->castAs<ComplexType>();
         Type = CT->getElementType();
         ArraySize = 2;
         MostDerivedLength = I + 1;
+        IsArray = true;
       } else if (const FieldDecl *FD = getAsField(Path[I])) {
         Type = FD->getType();
         ArraySize = 0;
         MostDerivedLength = I + 1;
+        IsArray = false;
       } else {
         // Path[I] describes a base class.
         ArraySize = 0;
+        IsArray = false;
       }
     }
     return MostDerivedLength;
@@ -157,12 +162,17 @@ namespace {
     /// Is this a pointer one past the end of an object?
     bool IsOnePastTheEnd : 1;
 
+    /// Indicator of whether the most-derived object is an array element.
+    bool MostDerivedIsArrayElement : 1;
+
     /// The length of the path to the most-derived object of which this is a
     /// subobject.
-    unsigned MostDerivedPathLength : 30;
+    unsigned MostDerivedPathLength : 29;
 
-    /// The size of the array of which the most-derived object is an element, or
-    /// 0 if the most-derived object is not an array element.
+    /// The size of the array of which the most-derived object is an element.
+    /// This will always be 0 if the most-derived object is not an array
+    /// element. 0 is not an indicator of whether or not the most-derived object
+    /// is an array, however, because 0-length arrays are allowed.
     uint64_t MostDerivedArraySize;
 
     /// The type of the most derived object referred to by this address.
@@ -176,21 +186,26 @@ namespace {
     SubobjectDesignator() : Invalid(true) {}
 
     explicit SubobjectDesignator(QualType T)
-      : Invalid(false), IsOnePastTheEnd(false), MostDerivedPathLength(0),
-        MostDerivedArraySize(0), MostDerivedType(T) {}
+        : Invalid(false), IsOnePastTheEnd(false),
+          MostDerivedIsArrayElement(false), MostDerivedPathLength(0),
+          MostDerivedArraySize(0), MostDerivedType(T) {}
 
     SubobjectDesignator(ASTContext &Ctx, const APValue &V)
-      : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false),
-        MostDerivedPathLength(0), MostDerivedArraySize(0) {
+        : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false),
+          MostDerivedIsArrayElement(false), MostDerivedPathLength(0),
+          MostDerivedArraySize(0) {
       if (!Invalid) {
         IsOnePastTheEnd = V.isLValueOnePastTheEnd();
         ArrayRef<PathEntry> VEntries = V.getLValuePath();
         Entries.insert(Entries.end(), VEntries.begin(), VEntries.end());
-        if (V.getLValueBase())
+        if (V.getLValueBase()) {
+          bool IsArray = false;
           MostDerivedPathLength =
               findMostDerivedSubobject(Ctx, getType(V.getLValueBase()),
                                        V.getLValuePath(), MostDerivedArraySize,
-                                       MostDerivedType);
+                                       MostDerivedType, IsArray);
+          MostDerivedIsArrayElement = IsArray;
+        }
       }
     }
 
@@ -204,7 +219,7 @@ namespace {
       assert(!Invalid);
       if (IsOnePastTheEnd)
         return true;
-      if (MostDerivedArraySize &&
+      if (MostDerivedIsArrayElement &&
           Entries[MostDerivedPathLength - 1].ArrayIndex == MostDerivedArraySize)
         return true;
       return false;
@@ -228,6 +243,7 @@ namespace {
 
       // This is a most-derived object.
       MostDerivedType = CAT->getElementType();
+      MostDerivedIsArrayElement = true;
       MostDerivedArraySize = CAT->getSize().getZExtValue();
       MostDerivedPathLength = Entries.size();
     }
@@ -242,6 +258,7 @@ namespace {
       // If this isn't a base class, it's a new most-derived object.
       if (const FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
         MostDerivedType = FD->getType();
+        MostDerivedIsArrayElement = false;
         MostDerivedArraySize = 0;
         MostDerivedPathLength = Entries.size();
       }
@@ -255,6 +272,7 @@ namespace {
       // This is technically a most-derived object, though in practice this
       // is unlikely to matter.
       MostDerivedType = EltTy;
+      MostDerivedIsArrayElement = true;
       MostDerivedArraySize = 2;
       MostDerivedPathLength = Entries.size();
     }
@@ -262,7 +280,8 @@ namespace {
     /// Add N to the address of this subobject.
     void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) {
       if (Invalid) return;
-      if (MostDerivedPathLength == Entries.size() && MostDerivedArraySize) {
+      if (MostDerivedPathLength == Entries.size() &&
+          MostDerivedIsArrayElement) {
         Entries.back().ArrayIndex += N;
         if (Entries.back().ArrayIndex > MostDerivedArraySize) {
           diagnosePointerArithmetic(Info, E, Entries.back().ArrayIndex);
@@ -454,6 +473,10 @@ namespace {
     /// notes attached to it will also be stored, otherwise they will not be.
     bool HasActiveDiagnostic;
 
+    /// \brief Have we emitted a diagnostic explaining why we couldn't constant
+    /// fold (not just why it's not strictly a constant expression)?
+    bool HasFoldFailureDiagnostic;
+
     enum EvaluationMode {
       /// Evaluate as a constant expression. Stop if we find that the expression
       /// is not a constant expression.
@@ -492,7 +515,11 @@ namespace {
       /// optimizer if we don't constant fold them here, but in an unevaluated
       /// context we try to fold them immediately since the optimizer never
       /// gets a chance to look at it.
-      EM_PotentialConstantExpressionUnevaluated
+      EM_PotentialConstantExpressionUnevaluated,
+
+      /// Evaluate as a constant expression. Continue evaluating if we find a
+      /// MemberExpr with a base that can't be evaluated.
+      EM_DesignatorFold,
     } EvalMode;
 
     /// Are we checking whether the expression is a potential constant
@@ -514,7 +541,7 @@ namespace {
         BottomFrame(*this, SourceLocation(), nullptr, nullptr, nullptr),
         EvaluatingDecl((const ValueDecl *)nullptr),
         EvaluatingDeclValue(nullptr), HasActiveDiagnostic(false),
-        EvalMode(Mode) {}
+        HasFoldFailureDiagnostic(false), EvalMode(Mode) {}
 
     void setEvaluatingDecl(APValue::LValueBase Base, APValue &Value) {
       EvaluatingDecl = Base;
@@ -574,7 +601,7 @@ namespace {
     /// Diagnose that the evaluation cannot be folded.
     OptionalDiagnostic Diag(SourceLocation Loc, diag::kind DiagId
                               = diag::note_invalid_subexpr_in_const_expr,
-                            unsigned ExtraNotes = 0) {
+                            unsigned ExtraNotes = 0, bool IsCCEDiag = false) {
       if (EvalStatus.Diag) {
         // If we have a prior diagnostic, it will be noting that the expression
         // isn't a constant expression. This diagnostic is more important,
@@ -587,14 +614,14 @@ namespace {
           case EM_ConstantFold:
           case EM_IgnoreSideEffects:
           case EM_EvaluateForOverflow:
-            if (!EvalStatus.HasSideEffects)
+            if (!HasFoldFailureDiagnostic)
               break;
-            // We've had side-effects; we want the diagnostic from them, not
-            // some later problem.
+            // We've already failed to fold something. Keep that diagnostic.
           case EM_ConstantExpression:
           case EM_PotentialConstantExpression:
           case EM_ConstantExpressionUnevaluated:
           case EM_PotentialConstantExpressionUnevaluated:
+          case EM_DesignatorFold:
             HasActiveDiagnostic = false;
             return OptionalDiagnostic();
           }
@@ -608,6 +635,7 @@ namespace {
           CallStackNotes = 0;
 
         HasActiveDiagnostic = true;
+        HasFoldFailureDiagnostic = !IsCCEDiag;
         EvalStatus.Diag->clear();
         EvalStatus.Diag->reserve(1 + ExtraNotes + CallStackNotes);
         addDiag(Loc, DiagId);
@@ -621,9 +649,9 @@ namespace {
 
     OptionalDiagnostic Diag(const Expr *E, diag::kind DiagId
                               = diag::note_invalid_subexpr_in_const_expr,
-                            unsigned ExtraNotes = 0) {
+                            unsigned ExtraNotes = 0, bool IsCCEDiag = false) {
       if (EvalStatus.Diag)
-        return Diag(E->getExprLoc(), DiagId, ExtraNotes);
+        return Diag(E->getExprLoc(), DiagId, ExtraNotes, IsCCEDiag);
       HasActiveDiagnostic = false;
       return OptionalDiagnostic();
     }
@@ -643,7 +671,7 @@ namespace {
         HasActiveDiagnostic = false;
         return OptionalDiagnostic();
       }
-      return Diag(Loc, DiagId, ExtraNotes);
+      return Diag(Loc, DiagId, ExtraNotes, true);
     }
 
     /// Add a note to a prior diagnostic.
@@ -674,6 +702,7 @@ namespace {
       case EM_ConstantExpression:
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
+      case EM_DesignatorFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
@@ -686,6 +715,32 @@ namespace {
       return keepEvaluatingAfterSideEffect();
     }
 
+    /// Should we continue evaluation after encountering undefined behavior?
+    bool keepEvaluatingAfterUndefinedBehavior() {
+      switch (EvalMode) {
+      case EM_EvaluateForOverflow:
+      case EM_IgnoreSideEffects:
+      case EM_ConstantFold:
+      case EM_DesignatorFold:
+        return true;
+
+      case EM_PotentialConstantExpression:
+      case EM_PotentialConstantExpressionUnevaluated:
+      case EM_ConstantExpression:
+      case EM_ConstantExpressionUnevaluated:
+        return false;
+      }
+      llvm_unreachable("Missed EvalMode case");
+    }
+
+    /// Note that we hit something that was technically undefined behavior, but
+    /// that we can evaluate past it (such as signed overflow or floating-point
+    /// division by zero.)
+    bool noteUndefinedBehavior() {
+      EvalStatus.HasUndefinedBehavior = true;
+      return keepEvaluatingAfterUndefinedBehavior();
+    }
+
     /// Should we continue evaluation as much as possible after encountering a
     /// construct which can't be reduced to a value?
     bool keepEvaluatingAfterFailure() {
@@ -702,10 +757,15 @@ namespace {
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
       case EM_IgnoreSideEffects:
+      case EM_DesignatorFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
     }
+
+    bool allowInvalidBaseExpr() const {
+      return EvalMode == EM_DesignatorFold;
+    }
   };
 
   /// Object used to treat all foldable expressions as constant expressions.
@@ -736,6 +796,21 @@ namespace {
     }
   };
 
+  /// RAII object used to treat the current evaluation as the correct pointer
+  /// offset fold for the current EvalMode
+  struct FoldOffsetRAII {
+    EvalInfo &Info;
+    EvalInfo::EvaluationMode OldMode;
+    explicit FoldOffsetRAII(EvalInfo &Info, bool Subobject)
+        : Info(Info), OldMode(Info.EvalMode) {
+      if (!Info.checkingPotentialConstantExpression())
+        Info.EvalMode = Subobject ? EvalInfo::EM_DesignatorFold
+                                  : EvalInfo::EM_ConstantFold;
+    }
+
+    ~FoldOffsetRAII() { Info.EvalMode = OldMode; }
+  };
+
   /// RAII object used to suppress diagnostics and side-effects from a
   /// speculative evaluation.
   class SpeculativeEvaluationRAII {
@@ -808,7 +883,7 @@ bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E,
 
 void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info,
                                                     const Expr *E, uint64_t N) {
-  if (MostDerivedPathLength == Entries.size() && MostDerivedArraySize)
+  if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement)
     Info.CCEDiag(E, diag::note_constexpr_array_index)
       << static_cast<int>(N) << /*array*/ 0
       << static_cast<unsigned>(MostDerivedArraySize);
@@ -917,7 +992,8 @@ namespace {
   struct LValue {
     APValue::LValueBase Base;
     CharUnits Offset;
-    unsigned CallIndex;
+    bool InvalidBase : 1;
+    unsigned CallIndex : 31;
     SubobjectDesignator Designator;
 
     const APValue::LValueBase getLValueBase() const { return Base; }
@@ -938,17 +1014,23 @@ namespace {
       assert(V.isLValue());
       Base = V.getLValueBase();
       Offset = V.getLValueOffset();
+      InvalidBase = false;
       CallIndex = V.getLValueCallIndex();
       Designator = SubobjectDesignator(Ctx, V);
     }
 
-    void set(APValue::LValueBase B, unsigned I = 0) {
+    void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false) {
       Base = B;
       Offset = CharUnits::Zero();
+      InvalidBase = BInvalid;
       CallIndex = I;
       Designator = SubobjectDesignator(getType(B));
     }
 
+    void setInvalid(APValue::LValueBase B, unsigned I = 0) {
+      set(B, I, true);
+    }
+
     // Check that this LValue is not based on a null pointer. If it is, produce
     // a diagnostic and mark the designator as invalid.
     bool checkNullPointer(EvalInfo &Info, const Expr *E,
@@ -967,10 +1049,6 @@ namespace {
     // Check this LValue refers to an object. If not, set the designator to be
     // invalid and emit a diagnostic.
     bool checkSubobject(EvalInfo &Info, const Expr *E, CheckSubobjectKind CSK) {
-      // Outside C++11, do not build a designator referring to a subobject of
-      // any object: we won't use such a designator for anything.
-      if (!Info.getLangOpts().CPlusPlus11)
-        Designator.setInvalid();
       return (CSK == CSK_ArrayToPointer || checkNullPointer(Info, E, CSK)) &&
              Designator.checkSubobject(Info, E, CSK);
     }
@@ -1102,12 +1180,13 @@ static bool EvaluatePointer(const Expr *E, LValue &Result, EvalInfo &Info);
 static bool EvaluateMemberPointer(const Expr *E, MemberPtr &Result,
                                   EvalInfo &Info);
 static bool EvaluateTemporary(const Expr *E, LValue &Result, EvalInfo &Info);
-static bool EvaluateInteger(const Expr *E, APSInt  &Result, EvalInfo &Info);
+static bool EvaluateInteger(const Expr *E, APSInt &Result, EvalInfo &Info);
 static bool EvaluateIntegerOrLValue(const Expr *E, APValue &Result,
                                     EvalInfo &Info);
 static bool EvaluateFloat(const Expr *E, APFloat &Result, EvalInfo &Info);
 static bool EvaluateComplex(const Expr *E, ComplexValue &Res, EvalInfo &Info);
 static bool EvaluateAtomic(const Expr *E, APValue &Result, EvalInfo &Info);
+static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
 
 //===----------------------------------------------------------------------===//
 // Misc utilities
@@ -1492,10 +1571,11 @@ static bool EvaluateAsBooleanCondition(const Expr *E, bool &Result,
 }
 
 template<typename T>
-static void HandleOverflow(EvalInfo &Info, const Expr *E,
+static bool HandleOverflow(EvalInfo &Info, const Expr *E,
                            const T &SrcValue, QualType DestType) {
   Info.CCEDiag(E, diag::note_constexpr_overflow)
     << SrcValue << DestType;
+  return Info.noteUndefinedBehavior();
 }
 
 static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E,
@@ -1509,7 +1589,7 @@ static bool HandleFloatToIntCast(EvalInfo &Info, const Expr *E,
   bool ignored;
   if (Value.convertToInteger(Result, llvm::APFloat::rmTowardZero, &ignored)
       & APFloat::opInvalidOp)
-    HandleOverflow(Info, E, Value, DestType);
+    return HandleOverflow(Info, E, Value, DestType);
   return true;
 }
 
@@ -1521,13 +1601,13 @@ static bool HandleFloatToFloatCast(EvalInfo &Info, const Expr *E,
   if (Result.convert(Info.Ctx.getFloatTypeSemantics(DestType),
                      APFloat::rmNearestTiesToEven, &ignored)
       & APFloat::opOverflow)
-    HandleOverflow(Info, E, Value, DestType);
+    return HandleOverflow(Info, E, Value, DestType);
   return true;
 }
 
 static APSInt HandleIntToIntCast(EvalInfo &Info, const Expr *E,
                                  QualType DestType, QualType SrcType,
-                                 APSInt &Value) {
+                                 const APSInt &Value) {
   unsigned DestWidth = Info.Ctx.getIntWidth(DestType);
   APSInt Result = Value;
   // Figure out if this is a truncate, extend or noop cast.
@@ -1544,7 +1624,7 @@ static bool HandleIntToFloatCast(EvalInfo &Info, const Expr *E,
   if (Result.convertFromAPInt(Value, Value.isSigned(),
                               APFloat::rmNearestTiesToEven)
       & APFloat::opOverflow)
-    HandleOverflow(Info, E, Value, DestType);
+    return HandleOverflow(Info, E, Value, DestType);
   return true;
 }
 
@@ -1620,23 +1700,26 @@ static bool EvalAndBitcastToAPInt(EvalInfo &Info, const Expr *E,
 /// bits, and check for overflow in the original type (if that type was not an
 /// unsigned type).
 template<typename Operation>
-static APSInt CheckedIntArithmetic(EvalInfo &Info, const Expr *E,
-                                   const APSInt &LHS, const APSInt &RHS,
-                                   unsigned BitWidth, Operation Op) {
-  if (LHS.isUnsigned())
-    return Op(LHS, RHS);
+static bool CheckedIntArithmetic(EvalInfo &Info, const Expr *E,
+                                 const APSInt &LHS, const APSInt &RHS,
+                                 unsigned BitWidth, Operation Op,
+                                 APSInt &Result) {
+  if (LHS.isUnsigned()) {
+    Result = Op(LHS, RHS);
+    return true;
+  }
 
   APSInt Value(Op(LHS.extend(BitWidth), RHS.extend(BitWidth)), false);
-  APSInt Result = Value.trunc(LHS.getBitWidth());
+  Result = Value.trunc(LHS.getBitWidth());
   if (Result.extend(BitWidth) != Value) {
     if (Info.checkingForOverflow())
       Info.Ctx.getDiagnostics().Report(E->getExprLoc(),
-        diag::warn_integer_constant_overflow)
+                                       diag::warn_integer_constant_overflow)
           << Result.toString(10) << E->getType();
     else
-      HandleOverflow(Info, E, Value, E->getType());
+      return HandleOverflow(Info, E, Value, E->getType());
   }
-  return Result;
+  return true;
 }
 
 /// Perform the given binary integer operation.
@@ -1648,17 +1731,14 @@ static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
     Info.Diag(E);
     return false;
   case BO_Mul:
-    Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2,
-                                  std::multiplies<APSInt>());
-    return true;
+    return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() * 2,
+                                std::multiplies<APSInt>(), Result);
   case BO_Add:
-    Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
-                                  std::plus<APSInt>());
-    return true;
+    return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
+                                std::plus<APSInt>(), Result);
   case BO_Sub:
-    Result = CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
-                                  std::minus<APSInt>());
-    return true;
+    return CheckedIntArithmetic(Info, E, LHS, RHS, LHS.getBitWidth() + 1,
+                                std::minus<APSInt>(), Result);
   case BO_And: Result = LHS & RHS; return true;
   case BO_Xor: Result = LHS ^ RHS; return true;
   case BO_Or:  Result = LHS | RHS; return true;
@@ -1668,11 +1748,13 @@ static bool handleIntIntBinOp(EvalInfo &Info, const Expr *E, const APSInt &LHS,
       Info.Diag(E, diag::note_expr_divide_by_zero);
       return false;
     }
-    // Check for overflow case: INT_MIN / -1 or INT_MIN % -1.
+    Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS);
+    // Check for overflow case: INT_MIN / -1 or INT_MIN % -1. APSInt supports
+    // this operation and gives the two's complement result.
     if (RHS.isNegative() && RHS.isAllOnesValue() &&
         LHS.isSigned() && LHS.isMinSignedValue())
-      HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1), E->getType());
-    Result = (Opcode == BO_Rem ? LHS % RHS : LHS / RHS);
+      return HandleOverflow(Info, E, -LHS.extend(LHS.getBitWidth() + 1),
+                            E->getType());
     return true;
   case BO_Shl: {
     if (Info.getLangOpts().OpenCL)
@@ -1760,8 +1842,10 @@ static bool handleFloatFloatBinOp(EvalInfo &Info, const Expr *E,
     break;
   }
 
-  if (LHS.isInfinity() || LHS.isNaN())
+  if (LHS.isInfinity() || LHS.isNaN()) {
     Info.CCEDiag(E, diag::note_constexpr_float_arithmetic) << LHS.isNaN();
+    return Info.noteUndefinedBehavior();
+  }
   return true;
 }
 
@@ -2159,6 +2243,7 @@ enum AccessKinds {
   AK_Decrement
 };
 
+namespace {
 /// A handle to a complete object (an object that is not a subobject of
 /// another object).
 struct CompleteObject {
@@ -2175,6 +2260,7 @@ struct CompleteObject {
 
   explicit operator bool() const { return Value; }
 };
+} // end anonymous namespace
 
 /// Find the designated sub-object of an rvalue.
 template<typename SubobjectHandler>
@@ -2488,7 +2574,7 @@ static bool AreElementsOfSameArray(QualType ObjType,
   if (A.Entries.size() != B.Entries.size())
     return false;
 
-  bool IsArray = A.MostDerivedArraySize != 0;
+  bool IsArray = A.MostDerivedIsArrayElement;
   if (IsArray && A.MostDerivedPathLength != A.Entries.size())
     // A is a subobject of the array element.
     return false;
@@ -2713,8 +2799,7 @@ static bool handleLValueToRValueConversion(EvalInfo &Info, const Expr *Conv,
 
   // Check for special cases where there is no existing APValue to look at.
   const Expr *Base = LVal.Base.dyn_cast<const Expr*>();
-  if (!LVal.Designator.Invalid && Base && !LVal.CallIndex &&
-      !Type.isVolatileQualified()) {
+  if (Base && !LVal.CallIndex && !Type.isVolatileQualified()) {
     if (const CompoundLiteralExpr *CLE = dyn_cast<CompoundLiteralExpr>(Base)) {
       // In C99, a CompoundLiteralExpr is an lvalue, and we defer evaluating the
       // initializer until now for such expressions. Such an expression can't be
@@ -2959,7 +3044,7 @@ struct IncDecSubobjectHandler {
       if (!WasNegative && Value.isNegative() &&
           isOverflowingIntegerType(Info.Ctx, SubobjType)) {
         APSInt ActualValue(Value, /*IsUnsigned*/true);
-        HandleOverflow(Info, E, ActualValue, SubobjType);
+        return HandleOverflow(Info, E, ActualValue, SubobjType);
       }
     } else {
       --Value;
@@ -2969,7 +3054,7 @@ struct IncDecSubobjectHandler {
         unsigned BitWidth = Value.getBitWidth();
         APSInt ActualValue(Value.sext(BitWidth + 1), /*IsUnsigned*/false);
         ActualValue.setBit(BitWidth);
-        HandleOverflow(Info, E, ActualValue, SubobjType);
+        return HandleOverflow(Info, E, ActualValue, SubobjType);
       }
     }
     return true;
@@ -3253,12 +3338,21 @@ static bool EvaluateCond(EvalInfo &Info, const VarDecl *CondDecl,
   return EvaluateAsBooleanCondition(Cond, Result, Info);
 }
 
-static EvalStmtResult EvaluateStmt(APValue &Result, EvalInfo &Info,
+/// \brief A location where the result (returned value) of evaluating a
+/// statement should be stored.
+struct StmtResult {
+  /// The APValue that should be filled in with the returned value.
+  APValue &Value;
+  /// The location containing the result, if any (used to support RVO).
+  const LValue *Slot;
+};
+
+static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
                                    const Stmt *S,
                                    const SwitchCase *SC = nullptr);
 
 /// Evaluate the body of a loop, and translate the result as appropriate.
-static EvalStmtResult EvaluateLoopBody(APValue &Result, EvalInfo &Info,
+static EvalStmtResult EvaluateLoopBody(StmtResult &Result, EvalInfo &Info,
                                        const Stmt *Body,
                                        const SwitchCase *Case = nullptr) {
   BlockScopeRAII Scope(Info);
@@ -3277,7 +3371,7 @@ static EvalStmtResult EvaluateLoopBody(APValue &Result, EvalInfo &Info,
 }
 
 /// Evaluate a switch statement.
-static EvalStmtResult EvaluateSwitch(APValue &Result, EvalInfo &Info,
+static EvalStmtResult EvaluateSwitch(StmtResult &Result, EvalInfo &Info,
                                      const SwitchStmt *SS) {
   BlockScopeRAII Scope(Info);
 
@@ -3334,7 +3428,7 @@ static EvalStmtResult EvaluateSwitch(APValue &Result, EvalInfo &Info,
 }
 
 // Evaluate a statement.
-static EvalStmtResult EvaluateStmt(APValue &Result, EvalInfo &Info,
+static EvalStmtResult EvaluateStmt(StmtResult &Result, EvalInfo &Info,
                                    const Stmt *S, const SwitchCase *Case) {
   if (!Info.nextStep(S))
     return ESR_Failed;
@@ -3440,7 +3534,10 @@ static EvalStmtResult EvaluateStmt(APValue &Result, EvalInfo &Info,
   case Stmt::ReturnStmtClass: {
     const Expr *RetExpr = cast<ReturnStmt>(S)->getRetValue();
     FullExpressionRAII Scope(Info);
-    if (RetExpr && !Evaluate(Result, Info, RetExpr))
+    if (RetExpr &&
+        !(Result.Slot
+              ? EvaluateInPlace(Result.Value, Info, *Result.Slot, RetExpr)
+              : Evaluate(Result.Value, Info, RetExpr)))
       return ESR_Failed;
     return ESR_Returned;
   }
@@ -3710,7 +3807,8 @@ static bool EvaluateArgs(ArrayRef<const Expr*> Args, ArgVector &ArgValues,
 static bool HandleFunctionCall(SourceLocation CallLoc,
                                const FunctionDecl *Callee, const LValue *This,
                                ArrayRef<const Expr*> Args, const Stmt *Body,
-                               EvalInfo &Info, APValue &Result) {
+                               EvalInfo &Info, APValue &Result,
+                               const LValue *ResultSlot) {
   ArgVector ArgValues(Args.size());
   if (!EvaluateArgs(Args, ArgValues, Info))
     return false;
@@ -3745,7 +3843,8 @@ static bool HandleFunctionCall(SourceLocation CallLoc,
     return true;
   }
 
-  EvalStmtResult ESR = EvaluateStmt(Result, Info, Body);
+  StmtResult Ret = {Result, ResultSlot};
+  EvalStmtResult ESR = EvaluateStmt(Ret, Info, Body);
   if (ESR == ESR_Succeeded) {
     if (Callee->getReturnType()->isVoidType())
       return true;
@@ -3774,6 +3873,11 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
 
   CallStackFrame Frame(Info, CallLoc, Definition, &This, ArgValues.data());
 
+  // FIXME: Creating an APValue just to hold a nonexistent return value is
+  // wasteful.
+  APValue RetVal;
+  StmtResult Ret = {RetVal, nullptr};
+
   // If it's a delegating constructor, just delegate.
   if (Definition->isDelegatingConstructor()) {
     CXXConstructorDecl::init_const_iterator I = Definition->init_begin();
@@ -3782,7 +3886,7 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
       if (!EvaluateInPlace(Result, Info, This, (*I)->getInit()))
         return false;
     }
-    return EvaluateStmt(Result, Info, Definition->getBody()) != ESR_Failed;
+    return EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed;
   }
 
   // For a trivial copy or move constructor, perform an APValue copy. This is
@@ -3890,7 +3994,7 @@ static bool HandleConstructorCall(SourceLocation CallLoc, const LValue &This,
   }
 
   return Success &&
-         EvaluateStmt(Result, Info, Definition->getBody()) != ESR_Failed;
+         EvaluateStmt(Ret, Info, Definition->getBody()) != ESR_Failed;
 }
 
 //===----------------------------------------------------------------------===//
@@ -3902,11 +4006,12 @@ template <class Derived>
 class ExprEvaluatorBase
   : public ConstStmtVisitor<Derived, bool> {
 private:
+  Derived &getDerived() { return static_cast<Derived&>(*this); }
   bool DerivedSuccess(const APValue &V, const Expr *E) {
-    return static_cast<Derived*>(this)->Success(V, E);
+    return getDerived().Success(V, E);
   }
   bool DerivedZeroInitialization(const Expr *E) {
-    return static_cast<Derived*>(this)->ZeroInitialization(E);
+    return getDerived().ZeroInitialization(E);
   }
 
   // Check whether a conditional operator with a non-constant condition is a
@@ -4087,6 +4192,14 @@ public:
   }
 
   bool VisitCallExpr(const CallExpr *E) {
+    APValue Result;
+    if (!handleCallExpr(E, Result, nullptr))
+      return false;
+    return DerivedSuccess(Result, E);
+  }
+
+  bool handleCallExpr(const CallExpr *E, APValue &Result,
+                     const LValue *ResultSlot) {
     const Expr *Callee = E->getCallee()->IgnoreParens();
     QualType CalleeType = Callee->getType();
 
@@ -4161,14 +4274,13 @@ public:
 
     const FunctionDecl *Definition = nullptr;
     Stmt *Body = FD->getBody(Definition);
-    APValue Result;
 
     if (!CheckConstexprFunction(Info, E->getExprLoc(), FD, Definition) ||
-        !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body,
-                            Info, Result))
+        !HandleFunctionCall(E->getExprLoc(), Definition, This, Args, Body, Info,
+                            Result, ResultSlot))
       return false;
 
-    return DerivedSuccess(Result, E);
+    return true;
   }
 
   bool VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
@@ -4293,7 +4405,8 @@ public:
       }
 
       APValue ReturnValue;
-      EvalStmtResult ESR = EvaluateStmt(ReturnValue, Info, *BI);
+      StmtResult Result = { ReturnValue, nullptr };
+      EvalStmtResult ESR = EvaluateStmt(Result, Info, *BI);
       if (ESR != ESR_Succeeded) {
         // FIXME: If the statement-expression terminated due to 'return',
         // 'break', or 'continue', it would be nice to propagate that to
@@ -4345,20 +4458,24 @@ public:
   bool VisitMemberExpr(const MemberExpr *E) {
     // Handle non-static data members.
     QualType BaseTy;
+    bool EvalOK;
     if (E->isArrow()) {
-      if (!EvaluatePointer(E->getBase(), Result, this->Info))
-        return false;
+      EvalOK = EvaluatePointer(E->getBase(), Result, this->Info);
       BaseTy = E->getBase()->getType()->castAs<PointerType>()->getPointeeType();
     } else if (E->getBase()->isRValue()) {
       assert(E->getBase()->getType()->isRecordType());
-      if (!EvaluateTemporary(E->getBase(), Result, this->Info))
-        return false;
+      EvalOK = EvaluateTemporary(E->getBase(), Result, this->Info);
       BaseTy = E->getBase()->getType();
     } else {
-      if (!this->Visit(E->getBase()))
-        return false;
+      EvalOK = this->Visit(E->getBase());
       BaseTy = E->getBase()->getType();
     }
+    if (!EvalOK) {
+      if (!this->Info.allowInvalidBaseExpr())
+        return false;
+      Result.setInvalid(E);
+      return true;
+    }
 
     const ValueDecl *MD = E->getMemberDecl();
     if (const FieldDecl *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) {
@@ -4498,12 +4615,13 @@ public:
 } // end anonymous namespace
 
 /// Evaluate an expression as an lvalue. This can be legitimately called on
-/// expressions which are not glvalues, in two cases:
+/// expressions which are not glvalues, in three cases:
 ///  * function designators in C, and
 ///  * "extern void" objects
+///  * @selector() expressions in Objective-C
 static bool EvaluateLValue(const Expr *E, LValue &Result, EvalInfo &Info) {
   assert(E->isGLValue() || E->getType()->isFunctionType() ||
-         E->getType()->isVoidType());
+         E->getType()->isVoidType() || isa<ObjCSelectorExpr>(E));
   return LValueExprEvaluator(Info, Result).Visit(E);
 }
 
@@ -4770,7 +4888,7 @@ public:
   bool VisitObjCStringLiteral(const ObjCStringLiteral *E)
       { return Success(E); }
   bool VisitObjCBoxedExpr(const ObjCBoxedExpr *E)
-      { return Success(E); }    
+      { return Success(E); }
   bool VisitAddrLabelExpr(const AddrLabelExpr *E)
       { return Success(E); }
   bool VisitCallExpr(const CallExpr *E);
@@ -4896,6 +5014,7 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) {
       unsigned Size = Info.Ctx.getTypeSize(E->getType());
       uint64_t N = Value.getInt().extOrTrunc(Size).getZExtValue();
       Result.Base = (Expr*)nullptr;
+      Result.InvalidBase = false;
       Result.Offset = CharUnits::fromQuantity(N);
       Result.CallIndex = 0;
       Result.Designator.setInvalid();
@@ -5148,6 +5267,9 @@ namespace {
     }
     bool ZeroInitialization(const Expr *E);
 
+    bool VisitCallExpr(const CallExpr *E) {
+      return handleCallExpr(E, Result, &This);
+    }
     bool VisitCastExpr(const CastExpr *E);
     bool VisitInitListExpr(const InitListExpr *E);
     bool VisitCXXConstructExpr(const CXXConstructExpr *E);
@@ -5504,7 +5626,7 @@ namespace {
     VectorExprEvaluator(EvalInfo &info, APValue &Result)
       : ExprEvaluatorBaseTy(info), Result(Result) {}
 
-    bool Success(const ArrayRef<APValue> &V, const Expr *E) {
+    bool Success(ArrayRef<APValue> V, const Expr *E) {
       assert(V.size() == E->getType()->castAs<VectorType>()->getNumElements());
       // FIXME: remove this APValue copy.
       Result = APValue(V.data(), V.size());
@@ -5533,7 +5655,7 @@ static bool EvaluateVector(const Expr* E, APValue& Result, EvalInfo &Info) {
   return VectorExprEvaluator(Info, Result).Visit(E);
 }
 
-bool VectorExprEvaluator::VisitCastExpr(const CastExpr* E) {
+bool VectorExprEvaluator::VisitCastExpr(const CastExpr *E) {
   const VectorType *VTy = E->getType()->castAs<VectorType>();
   unsigned NElts = VTy->getNumElements();
 
@@ -5546,13 +5668,13 @@ bool VectorExprEvaluator::VisitCastExpr(const CastExpr* E) {
     if (SETy->isIntegerType()) {
       APSInt IntResult;
       if (!EvaluateInteger(SE, IntResult, Info))
-         return false;
-      Val = APValue(IntResult);
+        return false;
+      Val = APValue(std::move(IntResult));
     } else if (SETy->isRealFloatingType()) {
-       APFloat F(0.0);
-       if (!EvaluateFloat(SE, F, Info))
-         return false;
-       Val = APValue(F);
+      APFloat FloatResult(0.0);
+      if (!EvaluateFloat(SE, FloatResult, Info))
+        return false;
+      Val = APValue(std::move(FloatResult));
     } else {
       return Error(E);
     }
@@ -5710,6 +5832,9 @@ namespace {
       return EvaluateInPlace(Result.getArrayFiller(), Info, Subobject, &VIE);
     }
 
+    bool VisitCallExpr(const CallExpr *E) {
+      return handleCallExpr(E, Result, &This);
+    }
     bool VisitInitListExpr(const InitListExpr *E);
     bool VisitCXXConstructExpr(const CXXConstructExpr *E);
     bool VisitCXXConstructExpr(const CXXConstructExpr *E,
@@ -5998,8 +6123,7 @@ public:
   bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
 
 private:
-  static QualType GetObjectType(APValue::LValueBase B);
-  bool TryEvaluateBuiltinObjectSize(const CallExpr *E);
+  bool TryEvaluateBuiltinObjectSize(const CallExpr *E, unsigned Type);
   // FIXME: Missing: array subscript of vector, member of vector
 };
 } // end anonymous namespace
@@ -6151,8 +6275,8 @@ static bool EvaluateBuiltinConstantP(ASTContext &Ctx, const Expr *Arg) {
     APValue &V = Result.Val;
     if (V.getKind() == APValue::Int)
       return true;
-
-    return EvaluateBuiltinConstantPForLValue(V);
+    if (V.getKind() == APValue::LValue)
+      return EvaluateBuiltinConstantPForLValue(V);
   } else if (ArgType->isFloatingType() || ArgType->isAnyComplexType()) {
     return Arg->isEvaluatable(Ctx);
   } else if (ArgType->isPointerType() || Arg->isGLValue()) {
@@ -6171,7 +6295,7 @@ static bool EvaluateBuiltinConstantP(ASTContext &Ctx, const Expr *Arg) {
 
 /// Retrieves the "underlying object type" of the given expression,
 /// as used by __builtin_object_size.
-QualType IntExprEvaluator::GetObjectType(APValue::LValueBase B) {
+static QualType getObjectType(APValue::LValueBase B) {
   if (const ValueDecl *D = B.dyn_cast<const ValueDecl*>()) {
     if (const VarDecl *VD = dyn_cast<VarDecl>(D))
       return VD->getType();
@@ -6183,49 +6307,258 @@ QualType IntExprEvaluator::GetObjectType(APValue::LValueBase B) {
   return QualType();
 }
 
-bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E) {
-  LValue Base;
+/// A more selective version of E->IgnoreParenCasts for
+/// TryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only
+/// to change the type of E.
+/// Ex. For E = `(short*)((char*)(&foo))`, returns `&foo`
+///
+/// Always returns an RValue with a pointer representation.
+static const Expr *ignorePointerCastsAndParens(const Expr *E) {
+  assert(E->isRValue() && E->getType()->hasPointerRepresentation());
 
+  auto *NoParens = E->IgnoreParens();
+  auto *Cast = dyn_cast<CastExpr>(NoParens);
+  if (Cast == nullptr)
+    return NoParens;
+
+  // We only conservatively allow a few kinds of casts, because this code is
+  // inherently a simple solution that seeks to support the common case.
+  auto CastKind = Cast->getCastKind();
+  if (CastKind != CK_NoOp && CastKind != CK_BitCast &&
+      CastKind != CK_AddressSpaceConversion)
+    return NoParens;
+
+  auto *SubExpr = Cast->getSubExpr();
+  if (!SubExpr->getType()->hasPointerRepresentation() || !SubExpr->isRValue())
+    return NoParens;
+  return ignorePointerCastsAndParens(SubExpr);
+}
+
+/// Checks to see if the given LValue's Designator is at the end of the LValue's
+/// record layout. e.g.
+///   struct { struct { int a, b; } fst, snd; } obj;
+///   obj.fst   // no
+///   obj.snd   // yes
+///   obj.fst.a // no
+///   obj.fst.b // no
+///   obj.snd.a // no
+///   obj.snd.b // yes
+///
+/// Please note: this function is specialized for how __builtin_object_size
+/// views "objects".
+static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) {
+  assert(!LVal.Designator.Invalid);
+
+  auto IsLastFieldDecl = [&Ctx](const FieldDecl *FD) {
+    if (FD->getParent()->isUnion())
+      return true;
+    const ASTRecordLayout &Layout = Ctx.getASTRecordLayout(FD->getParent());
+    return FD->getFieldIndex() + 1 == Layout.getFieldCount();
+  };
+
+  auto &Base = LVal.getLValueBase();
+  if (auto *ME = dyn_cast_or_null<MemberExpr>(Base.dyn_cast<const Expr *>())) {
+    if (auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) {
+      if (!IsLastFieldDecl(FD))
+        return false;
+    } else if (auto *IFD = dyn_cast<IndirectFieldDecl>(ME->getMemberDecl())) {
+      for (auto *FD : IFD->chain())
+        if (!IsLastFieldDecl(cast<FieldDecl>(FD)))
+          return false;
+    }
+  }
+
+  QualType BaseType = getType(Base);
+  for (int I = 0, E = LVal.Designator.Entries.size(); I != E; ++I) {
+    if (BaseType->isArrayType()) {
+      // Because __builtin_object_size treats arrays as objects, we can ignore
+      // the index iff this is the last array in the Designator.
+      if (I + 1 == E)
+        return true;
+      auto *CAT = cast<ConstantArrayType>(Ctx.getAsArrayType(BaseType));
+      uint64_t Index = LVal.Designator.Entries[I].ArrayIndex;
+      if (Index + 1 != CAT->getSize())
+        return false;
+      BaseType = CAT->getElementType();
+    } else if (BaseType->isAnyComplexType()) {
+      auto *CT = BaseType->castAs<ComplexType>();
+      uint64_t Index = LVal.Designator.Entries[I].ArrayIndex;
+      if (Index != 1)
+        return false;
+      BaseType = CT->getElementType();
+    } else if (auto *FD = getAsField(LVal.Designator.Entries[I])) {
+      if (!IsLastFieldDecl(FD))
+        return false;
+      BaseType = FD->getType();
+    } else {
+      assert(getAsBaseClass(LVal.Designator.Entries[I]) != nullptr &&
+             "Expecting cast to a base class");
+      return false;
+    }
+  }
+  return true;
+}
+
+/// Tests to see if the LValue has a designator (that isn't necessarily valid).
+static bool refersToCompleteObject(const LValue &LVal) {
+  if (LVal.Designator.Invalid || !LVal.Designator.Entries.empty())
+    return false;
+
+  if (!LVal.InvalidBase)
+    return true;
+
+  auto *E = LVal.Base.dyn_cast<const Expr *>();
+  (void)E;
+  assert(E != nullptr && isa<MemberExpr>(E));
+  return false;
+}
+
+/// Tries to evaluate the __builtin_object_size for @p E. If successful, returns
+/// true and stores the result in @p Size.
+///
+/// If @p WasError is non-null, this will report whether the failure to evaluate
+/// is to be treated as an Error in IntExprEvaluator.
+static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type,
+                                         EvalInfo &Info, uint64_t &Size,
+                                         bool *WasError = nullptr) {
+  if (WasError != nullptr)
+    *WasError = false;
+
+  auto Error = [&](const Expr *E) {
+    if (WasError != nullptr)
+      *WasError = true;
+    return false;
+  };
+
+  auto Success = [&](uint64_t S, const Expr *E) {
+    Size = S;
+    return true;
+  };
+
+  // Determine the denoted object.
+  LValue Base;
   {
     // The operand of __builtin_object_size is never evaluated for side-effects.
     // If there are any, but we can determine the pointed-to object anyway, then
     // ignore the side-effects.
     SpeculativeEvaluationRAII SpeculativeEval(Info);
-    if (!EvaluatePointer(E->getArg(0), Base, Info))
+    FoldOffsetRAII Fold(Info, Type & 1);
+
+    if (E->isGLValue()) {
+      // It's possible for us to be given GLValues if we're called via
+      // Expr::tryEvaluateObjectSize.
+      APValue RVal;
+      if (!EvaluateAsRValue(Info, E, RVal))
+        return false;
+      Base.setFrom(Info.Ctx, RVal);
+    } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), Base, Info))
       return false;
   }
 
-  if (!Base.getLValueBase()) {
-    // It is not possible to determine which objects ptr points to at compile time,
-    // __builtin_object_size should return (size_t) -1 for type 0 or 1
-    // and (size_t) 0 for type 2 or 3.
-    llvm::APSInt TypeIntVaue;
-    const Expr *ExprType = E->getArg(1);
-    if (!ExprType->EvaluateAsInt(TypeIntVaue, Info.Ctx))
-      return false;
-    if (TypeIntVaue == 0 || TypeIntVaue == 1)
-      return Success(-1, E);
-    if (TypeIntVaue == 2 || TypeIntVaue == 3)
-      return Success(0, E);
+  CharUnits BaseOffset = Base.getLValueOffset();
+  // If we point to before the start of the object, there are no accessible
+  // bytes.
+  if (BaseOffset.isNegative())
+    return Success(0, E);
+
+  // In the case where we're not dealing with a subobject, we discard the
+  // subobject bit.
+  bool SubobjectOnly = (Type & 1) != 0 && !refersToCompleteObject(Base);
+
+  // If Type & 1 is 0, we need to be able to statically guarantee that the bytes
+  // exist. If we can't verify the base, then we can't do that.
+  //
+  // As a special case, we produce a valid object size for an unknown object
+  // with a known designator if Type & 1 is 1. For instance:
+  //
+  //   extern struct X { char buff[32]; int a, b, c; } *p;
+  //   int a = __builtin_object_size(p->buff + 4, 3); // returns 28
+  //   int b = __builtin_object_size(p->buff + 4, 2); // returns 0, not 40
+  //
+  // This matches GCC's behavior.
+  if (Base.InvalidBase && !SubobjectOnly)
     return Error(E);
+
+  // If we're not examining only the subobject, then we reset to a complete
+  // object designator
+  //
+  // If Type is 1 and we've lost track of the subobject, just find the complete
+  // object instead. (If Type is 3, that's not correct behavior and we should
+  // return 0 instead.)
+  LValue End = Base;
+  if (!SubobjectOnly || (End.Designator.Invalid && Type == 1)) {
+    QualType T = getObjectType(End.getLValueBase());
+    if (T.isNull())
+      End.Designator.setInvalid();
+    else {
+      End.Designator = SubobjectDesignator(T);
+      End.Offset = CharUnits::Zero();
+    }
   }
 
-  QualType T = GetObjectType(Base.getLValueBase());
-  if (T.isNull() ||
-      T->isIncompleteType() ||
-      T->isFunctionType() ||
-      T->isVariablyModifiedType() ||
-      T->isDependentType())
+  // If it is not possible to determine which objects ptr points to at compile
+  // time, __builtin_object_size should return (size_t) -1 for type 0 or 1
+  // and (size_t) 0 for type 2 or 3.
+  if (End.Designator.Invalid)
+    return false;
+
+  // According to the GCC documentation, we want the size of the subobject
+  // denoted by the pointer. But that's not quite right -- what we actually
+  // want is the size of the immediately-enclosing array, if there is one.
+  int64_t AmountToAdd = 1;
+  if (End.Designator.MostDerivedIsArrayElement &&
+      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength) {
+    // We got a pointer to an array. Step to its end.
+    AmountToAdd = End.Designator.MostDerivedArraySize -
+                  End.Designator.Entries.back().ArrayIndex;
+  } else if (End.Designator.isOnePastTheEnd()) {
+    // We're already pointing at the end of the object.
+    AmountToAdd = 0;
+  }
+
+  QualType PointeeType = End.Designator.MostDerivedType;
+  assert(!PointeeType.isNull());
+  if (PointeeType->isIncompleteType() || PointeeType->isFunctionType())
     return Error(E);
 
-  CharUnits Size = Info.Ctx.getTypeSizeInChars(T);
-  CharUnits Offset = Base.getLValueOffset();
+  if (!HandleLValueArrayAdjustment(Info, E, End, End.Designator.MostDerivedType,
+                                   AmountToAdd))
+    return false;
 
-  if (!Offset.isNegative() && Offset <= Size)
-    Size -= Offset;
-  else
-    Size = CharUnits::Zero();
-  return Success(Size, E);
+  auto EndOffset = End.getLValueOffset();
+
+  // The following is a moderately common idiom in C:
+  //
+  // struct Foo { int a; char c[1]; };
+  // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar));
+  // strcpy(&F->c[0], Bar);
+  //
+  // So, if we see that we're examining a 1-length (or 0-length) array at the
+  // end of a struct with an unknown base, we give up instead of breaking code
+  // that behaves this way. Note that we only do this when Type=1, because
+  // Type=3 is a lower bound, so answering conservatively is fine.
+  if (End.InvalidBase && SubobjectOnly && Type == 1 &&
+      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength &&
+      End.Designator.MostDerivedIsArrayElement &&
+      End.Designator.MostDerivedArraySize < 2 &&
+      isDesignatorAtObjectEnd(Info.Ctx, End))
+    return false;
+
+  if (BaseOffset > EndOffset)
+    return Success(0, E);
+
+  return Success((EndOffset - BaseOffset).getQuantity(), E);
+}
+
+bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E,
+                                                    unsigned Type) {
+  uint64_t Size;
+  bool WasError;
+  if (::tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size, &WasError))
+    return Success(Size, E);
+  if (WasError)
+    return Error(E);
+  return false;
 }
 
 bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
@@ -6234,17 +6567,16 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
     return ExprEvaluatorBaseTy::VisitCallExpr(E);
 
   case Builtin::BI__builtin_object_size: {
-    if (TryEvaluateBuiltinObjectSize(E))
+    // The type was checked when we built the expression.
+    unsigned Type =
+        E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue();
+    assert(Type <= 3 && "unexpected type");
+
+    if (TryEvaluateBuiltinObjectSize(E, Type))
       return true;
 
-    // If evaluating the argument has side-effects, we can't determine the size
-    // of the object, and so we lower it to unknown now. CodeGen relies on us to
-    // handle all cases where the expression has side-effects.
-    if (E->getArg(0)->HasSideEffects(Info.Ctx)) {
-      if (E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue() <= 1)
-        return Success(-1ULL, E);
-      return Success(0, E);
-    }
+    if (E->getArg(0)->HasSideEffects(Info.Ctx))
+      return Success((Type & 2) ? 0 : -1, E);
 
     // Expression had no side effects, but we couldn't statically determine the
     // size of the referenced object.
@@ -6254,10 +6586,13 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
     case EvalInfo::EM_ConstantFold:
     case EvalInfo::EM_EvaluateForOverflow:
     case EvalInfo::EM_IgnoreSideEffects:
+    case EvalInfo::EM_DesignatorFold:
+      // Leave it to IR generation.
       return Error(E);
     case EvalInfo::EM_ConstantExpressionUnevaluated:
     case EvalInfo::EM_PotentialConstantExpressionUnevaluated:
-      return Success(-1ULL, E);
+      // Reduce it to a constant now.
+      return Success((Type & 2) ? 0 : -1, E);
     }
   }
 
@@ -6523,9 +6858,15 @@ static bool isOnePastTheEndOfCompleteObject(const ASTContext &Ctx,
       !LV.getLValueDesignator().isOnePastTheEnd())
     return false;
 
+  // A pointer to an incomplete type might be past-the-end if the type's size is
+  // zero.  We cannot tell because the type is incomplete.
+  QualType Ty = getType(LV.getLValueBase());
+  if (Ty->isIncompleteType())
+    return true;
+
   // We're a past-the-end pointer if we point to the byte after the object,
   // no matter what our type or path is.
-  auto Size = Ctx.getTypeSizeInChars(getType(LV.getLValueBase()));
+  auto Size = Ctx.getTypeSizeInChars(Ty);
   return LV.getLValueOffset() == Size;
 }
 
@@ -6555,7 +6896,13 @@ class DataRecursiveIntBinOpEvaluator {
     EvalResult LHSResult; // meaningful only for binary operator expression.
     enum { AnyExprKind, BinOpKind, BinOpVisitedLHSKind } Kind;
 
-    Job() : StoredInfo(nullptr) {}
+    Job() = default;
+    Job(Job &&J)
+        : E(J.E), LHSResult(J.LHSResult), Kind(J.Kind),
+          StoredInfo(J.StoredInfo), OldEvalStatus(J.OldEvalStatus) {
+      J.StoredInfo = nullptr;
+    }
+
     void startSpeculativeEval(EvalInfo &Info) {
       OldEvalStatus = Info.EvalStatus;
       Info.EvalStatus.Diag = nullptr;
@@ -6567,7 +6914,7 @@ class DataRecursiveIntBinOpEvaluator {
       }
     }
   private:
-    EvalInfo *StoredInfo; // non-null if status changed.
+    EvalInfo *StoredInfo = nullptr; // non-null if status changed.
     Expr::EvalStatus OldEvalStatus;
   };
 
@@ -6946,7 +7293,7 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
       LValue LHSValue, RHSValue;
 
       bool LHSOK = EvaluatePointer(E->getLHS(), LHSValue, Info);
-      if (!LHSOK && Info.keepEvaluatingAfterFailure())
+      if (!LHSOK && !Info.keepEvaluatingAfterFailure())
         return false;
 
       if (!EvaluatePointer(E->getRHS(), RHSValue, Info) || !LHSOK)
@@ -6958,21 +7305,20 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
         if (E->getOpcode() == BO_Sub) {
           // Handle &&A - &&B.
           if (!LHSValue.Offset.isZero() || !RHSValue.Offset.isZero())
-            return false;
+            return Error(E);
           const Expr *LHSExpr = LHSValue.Base.dyn_cast<const Expr*>();
           const Expr *RHSExpr = RHSValue.Base.dyn_cast<const Expr*>();
           if (!LHSExpr || !RHSExpr)
-            return false;
+            return Error(E);
           const AddrLabelExpr *LHSAddrExpr = dyn_cast<AddrLabelExpr>(LHSExpr);
           const AddrLabelExpr *RHSAddrExpr = dyn_cast<AddrLabelExpr>(RHSExpr);
           if (!LHSAddrExpr || !RHSAddrExpr)
-            return false;
+            return Error(E);
           // Make sure both labels come from the same function.
           if (LHSAddrExpr->getLabel()->getDeclContext() !=
               RHSAddrExpr->getLabel()->getDeclContext())
-            return false;
-          Result = APValue(LHSAddrExpr, RHSAddrExpr);
-          return true;
+            return Error(E);
+          return Success(APValue(LHSAddrExpr, RHSAddrExpr), E);
         }
         // Inequalities and subtractions between unrelated pointers have
         // unspecified or undefined behavior.
@@ -7063,8 +7409,9 @@ bool IntExprEvaluator::VisitBinaryOperator(const BinaryOperator *E) {
         APSInt TrueResult = (LHS - RHS) / ElemSize;
         APSInt Result = TrueResult.trunc(Info.Ctx.getIntWidth(E->getType()));
 
-        if (Result.extend(65) != TrueResult)
-          HandleOverflow(Info, E, TrueResult, E->getType());
+        if (Result.extend(65) != TrueResult &&
+            !HandleOverflow(Info, E, TrueResult, E->getType()))
+          return false;
         return Success(Result, E);
       }
 
@@ -7270,9 +7617,9 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) {
     return Error(OOE);
   QualType CurrentType = OOE->getTypeSourceInfo()->getType();
   for (unsigned i = 0; i != n; ++i) {
-    OffsetOfExpr::OffsetOfNode ON = OOE->getComponent(i);
+    OffsetOfNode ON = OOE->getComponent(i);
     switch (ON.getKind()) {
-    case OffsetOfExpr::OffsetOfNode::Array: {
+    case OffsetOfNode::Array: {
       const Expr *Idx = OOE->getIndexExpr(ON.getArrayExprIndex());
       APSInt IdxResult;
       if (!EvaluateInteger(Idx, IdxResult, Info))
@@ -7286,7 +7633,7 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) {
       break;
     }
 
-    case OffsetOfExpr::OffsetOfNode::Field: {
+    case OffsetOfNode::Field: {
       FieldDecl *MemberDecl = ON.getField();
       const RecordType *RT = CurrentType->getAs<RecordType>();
       if (!RT)
@@ -7301,10 +7648,10 @@ bool IntExprEvaluator::VisitOffsetOfExpr(const OffsetOfExpr *OOE) {
       break;
     }
 
-    case OffsetOfExpr::OffsetOfNode::Identifier:
+    case OffsetOfNode::Identifier:
       llvm_unreachable("dependent __builtin_offsetof");
 
-    case OffsetOfExpr::OffsetOfNode::Base: {
+    case OffsetOfNode::Base: {
       CXXBaseSpecifier *BaseSpec = ON.getBase();
       if (BaseSpec->isVirtual())
         return Error(OOE);
@@ -7350,9 +7697,10 @@ bool IntExprEvaluator::VisitUnaryOperator(const UnaryOperator *E) {
       return false;
     if (!Result.isInt()) return Error(E);
     const APSInt &Value = Result.getInt();
-    if (Value.isSigned() && Value.isMinSignedValue())
-      HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1),
-                     E->getType());
+    if (Value.isSigned() && Value.isMinSignedValue() &&
+        !HandleOverflow(Info, E, -Value.extend(Value.getBitWidth() + 1),
+                        E->getType()))
+      return false;
     return Success(-Value, E);
   }
   case UO_Not: {
@@ -8512,6 +8860,12 @@ bool Expr::EvaluateAsBooleanCondition(bool &Result,
          HandleConversionToBool(Scratch.Val, Result);
 }
 
+static bool hasUnacceptableSideEffect(Expr::EvalStatus &Result,
+                                      Expr::SideEffectsKind SEK) {
+  return (SEK < Expr::SE_AllowSideEffects && Result.HasSideEffects) ||
+         (SEK < Expr::SE_AllowUndefinedBehavior && Result.HasUndefinedBehavior);
+}
+
 bool Expr::EvaluateAsInt(APSInt &Result, const ASTContext &Ctx,
                          SideEffectsKind AllowSideEffects) const {
   if (!getType()->isIntegralOrEnumerationType())
@@ -8519,7 +8873,7 @@ bool Expr::EvaluateAsInt(APSInt &Result, const ASTContext &Ctx,
 
   EvalResult ExprResult;
   if (!EvaluateAsRValue(ExprResult, Ctx) || !ExprResult.Val.isInt() ||
-      (!AllowSideEffects && ExprResult.HasSideEffects))
+      hasUnacceptableSideEffect(ExprResult, AllowSideEffects))
     return false;
 
   Result = ExprResult.Val.getInt();
@@ -8551,7 +8905,9 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx,
   Expr::EvalStatus EStatus;
   EStatus.Diag = &Notes;
 
-  EvalInfo InitInfo(Ctx, EStatus, EvalInfo::EM_ConstantFold);
+  EvalInfo InitInfo(Ctx, EStatus, VD->isConstexpr()
+                                      ? EvalInfo::EM_ConstantExpression
+                                      : EvalInfo::EM_ConstantFold);
   InitInfo.setEvaluatingDecl(VD, Value);
 
   LValue LVal;
@@ -8580,9 +8936,10 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx,
 
 /// isEvaluatable - Call EvaluateAsRValue to see if this expression can be
 /// constant folded, but discard the result.
-bool Expr::isEvaluatable(const ASTContext &Ctx) const {
+bool Expr::isEvaluatable(const ASTContext &Ctx, SideEffectsKind SEK) const {
   EvalResult Result;
-  return EvaluateAsRValue(Result, Ctx) && !Result.HasSideEffects;
+  return EvaluateAsRValue(Result, Ctx) &&
+         !hasUnacceptableSideEffect(Result, SEK);
 }
 
 APSInt Expr::EvaluateKnownConstInt(const ASTContext &Ctx,
@@ -8677,6 +9034,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::ImaginaryLiteralClass:
   case Expr::StringLiteralClass:
   case Expr::ArraySubscriptExprClass:
+  case Expr::OMPArraySectionExprClass:
   case Expr::MemberExprClass:
   case Expr::CompoundAssignOperatorClass:
   case Expr::CompoundLiteralExprClass:
@@ -8695,6 +9053,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::CXXTypeidExprClass:
   case Expr::CXXUuidofExprClass:
   case Expr::MSPropertyRefExprClass:
+  case Expr::MSPropertySubscriptExprClass:
   case Expr::CXXNullPtrLiteralExprClass:
   case Expr::UserDefinedLiteralClass:
   case Expr::CXXThisExprClass:
@@ -8740,6 +9099,8 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
   case Expr::AtomicExprClass:
   case Expr::LambdaExprClass:
   case Expr::CXXFoldExprClass:
+  case Expr::CoawaitExprClass:
+  case Expr::CoyieldExprClass:
     return ICEDiag(IK_NotICE, E->getLocStart());
 
   case Expr::InitListExprClass: {
@@ -8825,6 +9186,7 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) {
     case UO_PreDec:
     case UO_AddrOf:
     case UO_Deref:
+    case UO_Coawait:
       // C99 6.6/3 allows increment and decrement within unevaluated
       // subexpressions of constant expressions, but they can never be ICEs
       // because an ICE cannot contain an lvalue operand.
@@ -9078,7 +9440,11 @@ bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx,
 
   if (!isIntegerConstantExpr(Ctx, Loc))
     return false;
-  if (!EvaluateAsInt(Value, Ctx))
+  // The only possible side-effects here are due to UB discovered in the
+  // evaluation (for instance, INT_MAX + 1). In such a case, we are still
+  // required to treat the expression as an ICE, so we produce the folded
+  // value.
+  if (!EvaluateAsInt(Value, Ctx, SE_AllowSideEffects))
     llvm_unreachable("ICE cannot be evaluated!");
   return true;
 }
@@ -9172,7 +9538,7 @@ bool Expr::isPotentialConstantExpr(const FunctionDecl *FD,
     HandleConstructorCall(Loc, This, Args, CD, Info, Scratch);
   } else
     HandleFunctionCall(Loc, FD, (MD && MD->isInstance()) ? &This : nullptr,
-                       Args, FD->getBody(), Info, Scratch);
+                       Args, FD->getBody(), Info, Scratch, nullptr);
 
   return Diags.empty();
 }
@@ -9200,3 +9566,13 @@ bool Expr::isPotentialConstantExprUnevaluated(Expr *E,
   Evaluate(ResultScratch, Info, E);
   return Diags.empty();
 }
+
+bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
+                                 unsigned Type) const {
+  if (!getType()->isPointerType())
+    return false;
+
+  Expr::EvalStatus Status;
+  EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
+  return ::tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
+}
diff --git a/lib/AST/ExprObjC.cpp b/lib/AST/ExprObjC.cpp
new file mode 100644
index 000000000000..46298c7a730b
--- /dev/null
+++ b/lib/AST/ExprObjC.cpp
@@ -0,0 +1,379 @@
+//===--- ExprObjC.cpp - (ObjC) Expression AST Node Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the subclesses of Expr class declared in ExprObjC.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/ExprObjC.h"
+
+#include "clang/AST/ASTContext.h"
+
+using namespace clang;
+
+ObjCArrayLiteral::ObjCArrayLiteral(ArrayRef<Expr *> Elements, QualType T,
+                                   ObjCMethodDecl *Method, SourceRange SR)
+    : Expr(ObjCArrayLiteralClass, T, VK_RValue, OK_Ordinary, false, false,
+           false, false),
+      NumElements(Elements.size()), Range(SR), ArrayWithObjectsMethod(Method) {
+  Expr **SaveElements = getElements();
+  for (unsigned I = 0, N = Elements.size(); I != N; ++I) {
+    if (Elements[I]->isTypeDependent() || Elements[I]->isValueDependent())
+      ExprBits.ValueDependent = true;
+    if (Elements[I]->isInstantiationDependent())
+      ExprBits.InstantiationDependent = true;
+    if (Elements[I]->containsUnexpandedParameterPack())
+      ExprBits.ContainsUnexpandedParameterPack = true;
+
+    SaveElements[I] = Elements[I];
+  }
+}
+
+ObjCArrayLiteral *ObjCArrayLiteral::Create(const ASTContext &C,
+                                           ArrayRef<Expr *> Elements,
+                                           QualType T, ObjCMethodDecl *Method,
+                                           SourceRange SR) {
+  void *Mem =
+      C.Allocate(sizeof(ObjCArrayLiteral) + Elements.size() * sizeof(Expr *));
+  return new (Mem) ObjCArrayLiteral(Elements, T, Method, SR);
+}
+
+ObjCArrayLiteral *ObjCArrayLiteral::CreateEmpty(const ASTContext &C,
+                                                unsigned NumElements) {
+
+  void *Mem =
+      C.Allocate(sizeof(ObjCArrayLiteral) + NumElements * sizeof(Expr *));
+  return new (Mem) ObjCArrayLiteral(EmptyShell(), NumElements);
+}
+
+ObjCDictionaryLiteral::ObjCDictionaryLiteral(ArrayRef<ObjCDictionaryElement> VK,
+                                             bool HasPackExpansions, QualType T,
+                                             ObjCMethodDecl *method,
+                                             SourceRange SR)
+    : Expr(ObjCDictionaryLiteralClass, T, VK_RValue, OK_Ordinary, false, false,
+           false, false),
+      NumElements(VK.size()), HasPackExpansions(HasPackExpansions), Range(SR),
+      DictWithObjectsMethod(method) {
+  KeyValuePair *KeyValues = getKeyValues();
+  ExpansionData *Expansions = getExpansionData();
+  for (unsigned I = 0; I < NumElements; I++) {
+    if (VK[I].Key->isTypeDependent() || VK[I].Key->isValueDependent() ||
+        VK[I].Value->isTypeDependent() || VK[I].Value->isValueDependent())
+      ExprBits.ValueDependent = true;
+    if (VK[I].Key->isInstantiationDependent() ||
+        VK[I].Value->isInstantiationDependent())
+      ExprBits.InstantiationDependent = true;
+    if (VK[I].EllipsisLoc.isInvalid() &&
+        (VK[I].Key->containsUnexpandedParameterPack() ||
+         VK[I].Value->containsUnexpandedParameterPack()))
+      ExprBits.ContainsUnexpandedParameterPack = true;
+
+    KeyValues[I].Key = VK[I].Key;
+    KeyValues[I].Value = VK[I].Value;
+    if (Expansions) {
+      Expansions[I].EllipsisLoc = VK[I].EllipsisLoc;
+      if (VK[I].NumExpansions)
+        Expansions[I].NumExpansionsPlusOne = *VK[I].NumExpansions + 1;
+      else
+        Expansions[I].NumExpansionsPlusOne = 0;
+    }
+  }
+}
+
+ObjCDictionaryLiteral *
+ObjCDictionaryLiteral::Create(const ASTContext &C,
+                              ArrayRef<ObjCDictionaryElement> VK,
+                              bool HasPackExpansions, QualType T,
+                              ObjCMethodDecl *method, SourceRange SR) {
+  unsigned ExpansionsSize = 0;
+  if (HasPackExpansions)
+    ExpansionsSize = sizeof(ExpansionData) * VK.size();
+
+  void *Mem = C.Allocate(sizeof(ObjCDictionaryLiteral) +
+                         sizeof(KeyValuePair) * VK.size() + ExpansionsSize);
+  return new (Mem) ObjCDictionaryLiteral(VK, HasPackExpansions, T, method, SR);
+}
+
+ObjCDictionaryLiteral *
+ObjCDictionaryLiteral::CreateEmpty(const ASTContext &C, unsigned NumElements,
+                                   bool HasPackExpansions) {
+  unsigned ExpansionsSize = 0;
+  if (HasPackExpansions)
+    ExpansionsSize = sizeof(ExpansionData) * NumElements;
+  void *Mem = C.Allocate(sizeof(ObjCDictionaryLiteral) +
+                         sizeof(KeyValuePair) * NumElements + ExpansionsSize);
+  return new (Mem)
+      ObjCDictionaryLiteral(EmptyShell(), NumElements, HasPackExpansions);
+}
+
+QualType ObjCPropertyRefExpr::getReceiverType(const ASTContext &ctx) const {
+  if (isClassReceiver())
+    return ctx.getObjCInterfaceType(getClassReceiver());
+
+  if (isSuperReceiver())
+    return getSuperReceiverType();
+
+  return getBase()->getType();
+}
+
+ObjCSubscriptRefExpr *
+ObjCSubscriptRefExpr::Create(const ASTContext &C, Expr *base, Expr *key,
+                             QualType T, ObjCMethodDecl *getMethod,
+                             ObjCMethodDecl *setMethod, SourceLocation RB) {
+  void *Mem = C.Allocate(sizeof(ObjCSubscriptRefExpr));
+  return new (Mem) ObjCSubscriptRefExpr(
+      base, key, T, VK_LValue, OK_ObjCSubscript, getMethod, setMethod, RB);
+}
+
+ObjCMessageExpr::ObjCMessageExpr(QualType T, ExprValueKind VK,
+                                 SourceLocation LBracLoc,
+                                 SourceLocation SuperLoc, bool IsInstanceSuper,
+                                 QualType SuperType, Selector Sel,
+                                 ArrayRef<SourceLocation> SelLocs,
+                                 SelectorLocationsKind SelLocsK,
+                                 ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                                 SourceLocation RBracLoc, bool isImplicit)
+    : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary,
+           /*TypeDependent=*/false, /*ValueDependent=*/false,
+           /*InstantiationDependent=*/false,
+           /*ContainsUnexpandedParameterPack=*/false),
+      SelectorOrMethod(
+          reinterpret_cast<uintptr_t>(Method ? Method : Sel.getAsOpaquePtr())),
+      Kind(IsInstanceSuper ? SuperInstance : SuperClass),
+      HasMethod(Method != nullptr), IsDelegateInitCall(false),
+      IsImplicit(isImplicit), SuperLoc(SuperLoc), LBracLoc(LBracLoc),
+      RBracLoc(RBracLoc) {
+  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
+  setReceiverPointer(SuperType.getAsOpaquePtr());
+}
+
+ObjCMessageExpr::ObjCMessageExpr(QualType T, ExprValueKind VK,
+                                 SourceLocation LBracLoc,
+                                 TypeSourceInfo *Receiver, Selector Sel,
+                                 ArrayRef<SourceLocation> SelLocs,
+                                 SelectorLocationsKind SelLocsK,
+                                 ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                                 SourceLocation RBracLoc, bool isImplicit)
+    : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary, T->isDependentType(),
+           T->isDependentType(), T->isInstantiationDependentType(),
+           T->containsUnexpandedParameterPack()),
+      SelectorOrMethod(
+          reinterpret_cast<uintptr_t>(Method ? Method : Sel.getAsOpaquePtr())),
+      Kind(Class), HasMethod(Method != nullptr), IsDelegateInitCall(false),
+      IsImplicit(isImplicit), LBracLoc(LBracLoc), RBracLoc(RBracLoc) {
+  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
+  setReceiverPointer(Receiver);
+}
+
+ObjCMessageExpr::ObjCMessageExpr(QualType T, ExprValueKind VK,
+                                 SourceLocation LBracLoc, Expr *Receiver,
+                                 Selector Sel, ArrayRef<SourceLocation> SelLocs,
+                                 SelectorLocationsKind SelLocsK,
+                                 ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                                 SourceLocation RBracLoc, bool isImplicit)
+    : Expr(ObjCMessageExprClass, T, VK, OK_Ordinary,
+           Receiver->isTypeDependent(), Receiver->isTypeDependent(),
+           Receiver->isInstantiationDependent(),
+           Receiver->containsUnexpandedParameterPack()),
+      SelectorOrMethod(
+          reinterpret_cast<uintptr_t>(Method ? Method : Sel.getAsOpaquePtr())),
+      Kind(Instance), HasMethod(Method != nullptr), IsDelegateInitCall(false),
+      IsImplicit(isImplicit), LBracLoc(LBracLoc), RBracLoc(RBracLoc) {
+  initArgsAndSelLocs(Args, SelLocs, SelLocsK);
+  setReceiverPointer(Receiver);
+}
+
+void ObjCMessageExpr::initArgsAndSelLocs(ArrayRef<Expr *> Args,
+                                         ArrayRef<SourceLocation> SelLocs,
+                                         SelectorLocationsKind SelLocsK) {
+  setNumArgs(Args.size());
+  Expr **MyArgs = getArgs();
+  for (unsigned I = 0; I != Args.size(); ++I) {
+    if (Args[I]->isTypeDependent())
+      ExprBits.TypeDependent = true;
+    if (Args[I]->isValueDependent())
+      ExprBits.ValueDependent = true;
+    if (Args[I]->isInstantiationDependent())
+      ExprBits.InstantiationDependent = true;
+    if (Args[I]->containsUnexpandedParameterPack())
+      ExprBits.ContainsUnexpandedParameterPack = true;
+
+    MyArgs[I] = Args[I];
+  }
+
+  SelLocsKind = SelLocsK;
+  if (!isImplicit()) {
+    if (SelLocsK == SelLoc_NonStandard)
+      std::copy(SelLocs.begin(), SelLocs.end(), getStoredSelLocs());
+  }
+}
+
+ObjCMessageExpr *
+ObjCMessageExpr::Create(const ASTContext &Context, QualType T, ExprValueKind VK,
+                        SourceLocation LBracLoc, SourceLocation SuperLoc,
+                        bool IsInstanceSuper, QualType SuperType, Selector Sel,
+                        ArrayRef<SourceLocation> SelLocs,
+                        ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                        SourceLocation RBracLoc, bool isImplicit) {
+  assert((!SelLocs.empty() || isImplicit) &&
+         "No selector locs for non-implicit message");
+  ObjCMessageExpr *Mem;
+  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
+  if (isImplicit)
+    Mem = alloc(Context, Args.size(), 0);
+  else
+    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
+  return new (Mem) ObjCMessageExpr(T, VK, LBracLoc, SuperLoc, IsInstanceSuper,
+                                   SuperType, Sel, SelLocs, SelLocsK, Method,
+                                   Args, RBracLoc, isImplicit);
+}
+
+ObjCMessageExpr *
+ObjCMessageExpr::Create(const ASTContext &Context, QualType T, ExprValueKind VK,
+                        SourceLocation LBracLoc, TypeSourceInfo *Receiver,
+                        Selector Sel, ArrayRef<SourceLocation> SelLocs,
+                        ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                        SourceLocation RBracLoc, bool isImplicit) {
+  assert((!SelLocs.empty() || isImplicit) &&
+         "No selector locs for non-implicit message");
+  ObjCMessageExpr *Mem;
+  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
+  if (isImplicit)
+    Mem = alloc(Context, Args.size(), 0);
+  else
+    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
+  return new (Mem)
+      ObjCMessageExpr(T, VK, LBracLoc, Receiver, Sel, SelLocs, SelLocsK, Method,
+                      Args, RBracLoc, isImplicit);
+}
+
+ObjCMessageExpr *
+ObjCMessageExpr::Create(const ASTContext &Context, QualType T, ExprValueKind VK,
+                        SourceLocation LBracLoc, Expr *Receiver, Selector Sel,
+                        ArrayRef<SourceLocation> SelLocs,
+                        ObjCMethodDecl *Method, ArrayRef<Expr *> Args,
+                        SourceLocation RBracLoc, bool isImplicit) {
+  assert((!SelLocs.empty() || isImplicit) &&
+         "No selector locs for non-implicit message");
+  ObjCMessageExpr *Mem;
+  SelectorLocationsKind SelLocsK = SelectorLocationsKind();
+  if (isImplicit)
+    Mem = alloc(Context, Args.size(), 0);
+  else
+    Mem = alloc(Context, Args, RBracLoc, SelLocs, Sel, SelLocsK);
+  return new (Mem)
+      ObjCMessageExpr(T, VK, LBracLoc, Receiver, Sel, SelLocs, SelLocsK, Method,
+                      Args, RBracLoc, isImplicit);
+}
+
+ObjCMessageExpr *ObjCMessageExpr::CreateEmpty(const ASTContext &Context,
+                                              unsigned NumArgs,
+                                              unsigned NumStoredSelLocs) {
+  ObjCMessageExpr *Mem = alloc(Context, NumArgs, NumStoredSelLocs);
+  return new (Mem) ObjCMessageExpr(EmptyShell(), NumArgs);
+}
+
+ObjCMessageExpr *ObjCMessageExpr::alloc(const ASTContext &C,
+                                        ArrayRef<Expr *> Args,
+                                        SourceLocation RBraceLoc,
+                                        ArrayRef<SourceLocation> SelLocs,
+                                        Selector Sel,
+                                        SelectorLocationsKind &SelLocsK) {
+  SelLocsK = hasStandardSelectorLocs(Sel, SelLocs, Args, RBraceLoc);
+  unsigned NumStoredSelLocs =
+      (SelLocsK == SelLoc_NonStandard) ? SelLocs.size() : 0;
+  return alloc(C, Args.size(), NumStoredSelLocs);
+}
+
+ObjCMessageExpr *ObjCMessageExpr::alloc(const ASTContext &C, unsigned NumArgs,
+                                        unsigned NumStoredSelLocs) {
+  unsigned Size = sizeof(ObjCMessageExpr) + sizeof(void *) +
+                  NumArgs * sizeof(Expr *) +
+                  NumStoredSelLocs * sizeof(SourceLocation);
+  return (ObjCMessageExpr *)C.Allocate(
+      Size, llvm::AlignOf<ObjCMessageExpr>::Alignment);
+}
+
+void ObjCMessageExpr::getSelectorLocs(
+    SmallVectorImpl<SourceLocation> &SelLocs) const {
+  for (unsigned i = 0, e = getNumSelectorLocs(); i != e; ++i)
+    SelLocs.push_back(getSelectorLoc(i));
+}
+
+SourceRange ObjCMessageExpr::getReceiverRange() const {
+  switch (getReceiverKind()) {
+  case Instance:
+    return getInstanceReceiver()->getSourceRange();
+
+  case Class:
+    return getClassReceiverTypeInfo()->getTypeLoc().getSourceRange();
+
+  case SuperInstance:
+  case SuperClass:
+    return getSuperLoc();
+  }
+
+  llvm_unreachable("Invalid ReceiverKind!");
+}
+
+Selector ObjCMessageExpr::getSelector() const {
+  if (HasMethod)
+    return reinterpret_cast<const ObjCMethodDecl *>(SelectorOrMethod)
+        ->getSelector();
+  return Selector(SelectorOrMethod);
+}
+
+QualType ObjCMessageExpr::getReceiverType() const {
+  switch (getReceiverKind()) {
+  case Instance:
+    return getInstanceReceiver()->getType();
+  case Class:
+    return getClassReceiver();
+  case SuperInstance:
+  case SuperClass:
+    return getSuperType();
+  }
+
+  llvm_unreachable("unexpected receiver kind");
+}
+
+ObjCInterfaceDecl *ObjCMessageExpr::getReceiverInterface() const {
+  QualType T = getReceiverType();
+
+  if (const ObjCObjectPointerType *Ptr = T->getAs<ObjCObjectPointerType>())
+    return Ptr->getInterfaceDecl();
+
+  if (const ObjCObjectType *Ty = T->getAs<ObjCObjectType>())
+    return Ty->getInterface();
+
+  return nullptr;
+}
+
+Stmt::child_range ObjCMessageExpr::children() {
+  Stmt **begin;
+  if (getReceiverKind() == Instance)
+    begin = reinterpret_cast<Stmt **>(this + 1);
+  else
+    begin = reinterpret_cast<Stmt **>(getArgs());
+  return child_range(begin,
+                     reinterpret_cast<Stmt **>(getArgs() + getNumArgs()));
+}
+
+StringRef ObjCBridgedCastExpr::getBridgeKindName() const {
+  switch (getBridgeKind()) {
+  case OBC_Bridge:
+    return "__bridge";
+  case OBC_BridgeTransfer:
+    return "__bridge_transfer";
+  case OBC_BridgeRetained:
+    return "__bridge_retained";
+  }
+
+  llvm_unreachable("Invalid BridgeKind!");
+}
diff --git a/lib/AST/ExternalASTSource.cpp b/lib/AST/ExternalASTSource.cpp
index 1c82c355134e..e3de8c5fefa2 100644
--- a/lib/AST/ExternalASTSource.cpp
+++ b/lib/AST/ExternalASTSource.cpp
@@ -16,6 +16,7 @@
 #include "clang/AST/ExternalASTSource.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclarationName.h"
+#include "clang/Basic/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -27,9 +28,19 @@ ExternalASTSource::getSourceDescriptor(unsigned ID) {
   return None;
 }
 
-ExternalASTSource::ASTSourceDescriptor
-ExternalASTSource::getSourceDescriptor(const Module &M) {
-  return ASTSourceDescriptor();
+ExternalASTSource::ASTSourceDescriptor::ASTSourceDescriptor(const Module &M)
+  : Signature(M.Signature), ClangModule(&M) {
+  if (M.Directory)
+    Path = M.Directory->getName();
+  if (auto *File = M.getASTFile())
+    ASTFile = File->getName();
+}
+
+std::string ExternalASTSource::ASTSourceDescriptor::getModuleName() const {
+  if (ClangModule)
+    return ClangModule->Name;
+  else
+    return PCHModuleName;
 }
 
 void ExternalASTSource::FindFileRegionDecls(FileID File, unsigned Offset,
@@ -92,17 +103,13 @@ ExternalASTSource::FindExternalVisibleDeclsByName(const DeclContext *DC,
   return false;
 }
 
-void ExternalASTSource::completeVisibleDeclsMap(const DeclContext *DC) {
-}
+void ExternalASTSource::completeVisibleDeclsMap(const DeclContext *DC) {}
 
-ExternalLoadResult
-ExternalASTSource::FindExternalLexicalDecls(const DeclContext *DC,
-                                            bool (*isKindWeWant)(Decl::Kind),
-                                         SmallVectorImpl<Decl*> &Result) {
-  return ELR_AlreadyLoaded;
-}
+void ExternalASTSource::FindExternalLexicalDecls(
+    const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+    SmallVectorImpl<Decl *> &Result) {}
 
-void ExternalASTSource::getMemoryBufferSizes(MemoryBufferSizes &sizes) const { }
+void ExternalASTSource::getMemoryBufferSizes(MemoryBufferSizes &sizes) const {}
 
 uint32_t ExternalASTSource::incrementGeneration(ASTContext &C) {
   uint32_t OldGeneration = CurrentGeneration;
diff --git a/lib/AST/ItaniumCXXABI.cpp b/lib/AST/ItaniumCXXABI.cpp
index 7503cbfc9805..8a2cc0fbee42 100644
--- a/lib/AST/ItaniumCXXABI.cpp
+++ b/lib/AST/ItaniumCXXABI.cpp
@@ -149,6 +149,20 @@ public:
     return nullptr;
   }
 
+  void addTypedefNameForUnnamedTagDecl(TagDecl *TD,
+                                       TypedefNameDecl *DD) override {}
+
+  TypedefNameDecl *getTypedefNameForUnnamedTagDecl(const TagDecl *TD) override {
+    return nullptr;
+  }
+
+  void addDeclaratorForUnnamedTagDecl(TagDecl *TD,
+                                      DeclaratorDecl *DD) override {}
+
+  DeclaratorDecl *getDeclaratorForUnnamedTagDecl(const TagDecl *TD) override {
+    return nullptr;
+  }
+
   MangleNumberingContext *createMangleNumberingContext() const override {
     return new ItaniumNumberingContext();
   }
diff --git a/lib/AST/ItaniumMangle.cpp b/lib/AST/ItaniumMangle.cpp
index dac803e5d2ff..8018188a6b24 100644
--- a/lib/AST/ItaniumMangle.cpp
+++ b/lib/AST/ItaniumMangle.cpp
@@ -174,8 +174,6 @@ public:
 
   void mangleStringLiteral(const StringLiteral *, raw_ostream &) override;
 
-  void mangleCXXVTableBitSet(const CXXRecordDecl *RD, raw_ostream &) override;
-
   bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) {
     // Lambda closure types are already numbered.
     if (isLambda(ND))
@@ -379,8 +377,8 @@ private:
 
   void mangleType(const TagType*);
   void mangleType(TemplateName);
-  void mangleBareFunctionType(const FunctionType *T,
-                              bool MangleReturnType);
+  void mangleBareFunctionType(const FunctionType *T, bool MangleReturnType,
+                              const FunctionDecl *FD = nullptr);
   void mangleNeonVectorType(const VectorType *T);
   void mangleAArch64NeonVectorType(const VectorType *T);
 
@@ -397,7 +395,8 @@ private:
   void mangleCXXCtorType(CXXCtorType T);
   void mangleCXXDtorType(CXXDtorType T);
 
-  void mangleTemplateArgs(const ASTTemplateArgumentListInfo &TemplateArgs);
+  void mangleTemplateArgs(const TemplateArgumentLoc *TemplateArgs,
+                          unsigned NumTemplateArgs);
   void mangleTemplateArgs(const TemplateArgument *TemplateArgs,
                           unsigned NumTemplateArgs);
   void mangleTemplateArgs(const TemplateArgumentList &AL);
@@ -525,7 +524,7 @@ void CXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD) {
   }
 
   mangleBareFunctionType(FD->getType()->getAs<FunctionType>(), 
-                         MangleReturnType);
+                         MangleReturnType, FD);
 }
 
 static const DeclContext *IgnoreLinkageSpecDecls(const DeclContext *DC) {
@@ -700,8 +699,7 @@ void CXXNameMangler::mangleFloat(const llvm::APFloat &f) {
   assert(numCharacters != 0);
 
   // Allocate a buffer of the right number of characters.
-  SmallVector<char, 20> buffer;
-  buffer.set_size(numCharacters);
+  SmallVector<char, 20> buffer(numCharacters);
 
   // Fill the buffer left-to-right.
   for (unsigned stringIndex = 0; stringIndex != numCharacters; ++stringIndex) {
@@ -1020,7 +1018,7 @@ void CXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       unsigned UnnamedMangle = getASTContext().getManglingNumber(TD);
       Out << "Ut";
       if (UnnamedMangle > 1)
-        Out << llvm::utostr(UnnamedMangle - 2);
+        Out << UnnamedMangle - 2;
       Out << '_';
       break;
     }
@@ -1285,7 +1283,8 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
   Out << "Ul";
   const FunctionProtoType *Proto = Lambda->getLambdaTypeInfo()->getType()->
                                    getAs<FunctionProtoType>();
-  mangleBareFunctionType(Proto, /*MangleReturnType=*/false);        
+  mangleBareFunctionType(Proto, /*MangleReturnType=*/false,
+                         Lambda->getLambdaStaticInvoker());
   Out << "E";
   
   // The number is omitted for the first closure type with a given 
@@ -1756,6 +1755,9 @@ CXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO, unsigned Arity) {
   // The conditional operator can't be overloaded, but we still handle it when
   // mangling expressions.
   case OO_Conditional: Out << "qu"; break;
+  // Proposal on cxx-abi-dev, 2015-10-21.
+  //              ::= aw        # co_await
+  case OO_Coawait: Out << "aw"; break;
 
   case OO_None:
   case NUM_OVERLOADED_OPERATORS:
@@ -1988,34 +1990,79 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
   //                 ::= Dn # std::nullptr_t (i.e., decltype(nullptr))
   //                 ::= u <source-name>    # vendor extended type
   switch (T->getKind()) {
-  case BuiltinType::Void: Out << 'v'; break;
-  case BuiltinType::Bool: Out << 'b'; break;
-  case BuiltinType::Char_U: case BuiltinType::Char_S: Out << 'c'; break;
-  case BuiltinType::UChar: Out << 'h'; break;
-  case BuiltinType::UShort: Out << 't'; break;
-  case BuiltinType::UInt: Out << 'j'; break;
-  case BuiltinType::ULong: Out << 'm'; break;
-  case BuiltinType::ULongLong: Out << 'y'; break;
-  case BuiltinType::UInt128: Out << 'o'; break;
-  case BuiltinType::SChar: Out << 'a'; break;
+  case BuiltinType::Void:
+    Out << 'v';
+    break;
+  case BuiltinType::Bool:
+    Out << 'b';
+    break;
+  case BuiltinType::Char_U:
+  case BuiltinType::Char_S:
+    Out << 'c';
+    break;
+  case BuiltinType::UChar:
+    Out << 'h';
+    break;
+  case BuiltinType::UShort:
+    Out << 't';
+    break;
+  case BuiltinType::UInt:
+    Out << 'j';
+    break;
+  case BuiltinType::ULong:
+    Out << 'm';
+    break;
+  case BuiltinType::ULongLong:
+    Out << 'y';
+    break;
+  case BuiltinType::UInt128:
+    Out << 'o';
+    break;
+  case BuiltinType::SChar:
+    Out << 'a';
+    break;
   case BuiltinType::WChar_S:
-  case BuiltinType::WChar_U: Out << 'w'; break;
-  case BuiltinType::Char16: Out << "Ds"; break;
-  case BuiltinType::Char32: Out << "Di"; break;
-  case BuiltinType::Short: Out << 's'; break;
-  case BuiltinType::Int: Out << 'i'; break;
-  case BuiltinType::Long: Out << 'l'; break;
-  case BuiltinType::LongLong: Out << 'x'; break;
-  case BuiltinType::Int128: Out << 'n'; break;
-  case BuiltinType::Half: Out << "Dh"; break;
-  case BuiltinType::Float: Out << 'f'; break;
-  case BuiltinType::Double: Out << 'd'; break;
+  case BuiltinType::WChar_U:
+    Out << 'w';
+    break;
+  case BuiltinType::Char16:
+    Out << "Ds";
+    break;
+  case BuiltinType::Char32:
+    Out << "Di";
+    break;
+  case BuiltinType::Short:
+    Out << 's';
+    break;
+  case BuiltinType::Int:
+    Out << 'i';
+    break;
+  case BuiltinType::Long:
+    Out << 'l';
+    break;
+  case BuiltinType::LongLong:
+    Out << 'x';
+    break;
+  case BuiltinType::Int128:
+    Out << 'n';
+    break;
+  case BuiltinType::Half:
+    Out << "Dh";
+    break;
+  case BuiltinType::Float:
+    Out << 'f';
+    break;
+  case BuiltinType::Double:
+    Out << 'd';
+    break;
   case BuiltinType::LongDouble:
     Out << (getASTContext().getTargetInfo().useFloat128ManglingForLongDouble()
                 ? 'g'
                 : 'e');
     break;
-  case BuiltinType::NullPtr: Out << "Dn"; break;
+  case BuiltinType::NullPtr:
+    Out << "Dn";
+    break;
 
 #define BUILTIN_TYPE(Id, SingletonId)
 #define PLACEHOLDER_TYPE(Id, SingletonId) \
@@ -2023,17 +2070,69 @@ void CXXNameMangler::mangleType(const BuiltinType *T) {
 #include "clang/AST/BuiltinTypes.def"
   case BuiltinType::Dependent:
     llvm_unreachable("mangling a placeholder type");
-  case BuiltinType::ObjCId: Out << "11objc_object"; break;
-  case BuiltinType::ObjCClass: Out << "10objc_class"; break;
-  case BuiltinType::ObjCSel: Out << "13objc_selector"; break;
-  case BuiltinType::OCLImage1d: Out << "11ocl_image1d"; break;
-  case BuiltinType::OCLImage1dArray: Out << "16ocl_image1darray"; break;
-  case BuiltinType::OCLImage1dBuffer: Out << "17ocl_image1dbuffer"; break;
-  case BuiltinType::OCLImage2d: Out << "11ocl_image2d"; break;
-  case BuiltinType::OCLImage2dArray: Out << "16ocl_image2darray"; break;
-  case BuiltinType::OCLImage3d: Out << "11ocl_image3d"; break;
-  case BuiltinType::OCLSampler: Out << "11ocl_sampler"; break;
-  case BuiltinType::OCLEvent: Out << "9ocl_event"; break;
+  case BuiltinType::ObjCId:
+    Out << "11objc_object";
+    break;
+  case BuiltinType::ObjCClass:
+    Out << "10objc_class";
+    break;
+  case BuiltinType::ObjCSel:
+    Out << "13objc_selector";
+    break;
+  case BuiltinType::OCLImage1d:
+    Out << "11ocl_image1d";
+    break;
+  case BuiltinType::OCLImage1dArray:
+    Out << "16ocl_image1darray";
+    break;
+  case BuiltinType::OCLImage1dBuffer:
+    Out << "17ocl_image1dbuffer";
+    break;
+  case BuiltinType::OCLImage2d:
+    Out << "11ocl_image2d";
+    break;
+  case BuiltinType::OCLImage2dArray:
+    Out << "16ocl_image2darray";
+    break;
+  case BuiltinType::OCLImage2dDepth:
+    Out << "16ocl_image2ddepth";
+    break;
+  case BuiltinType::OCLImage2dArrayDepth:
+    Out << "21ocl_image2darraydepth";
+    break;
+  case BuiltinType::OCLImage2dMSAA:
+    Out << "15ocl_image2dmsaa";
+    break;
+  case BuiltinType::OCLImage2dArrayMSAA:
+    Out << "20ocl_image2darraymsaa";
+    break;
+  case BuiltinType::OCLImage2dMSAADepth:
+    Out << "20ocl_image2dmsaadepth";
+    break;
+  case BuiltinType::OCLImage2dArrayMSAADepth:
+    Out << "35ocl_image2darraymsaadepth";
+    break;
+  case BuiltinType::OCLImage3d:
+    Out << "11ocl_image3d";
+    break;
+  case BuiltinType::OCLSampler:
+    Out << "11ocl_sampler";
+    break;
+  case BuiltinType::OCLEvent:
+    Out << "9ocl_event";
+    break;
+  case BuiltinType::OCLClkEvent:
+    Out << "12ocl_clkevent";
+    break;
+  case BuiltinType::OCLQueue:
+    Out << "9ocl_queue";
+    break;
+  case BuiltinType::OCLNDRange:
+    Out << "11ocl_ndrange";
+    break;
+  case BuiltinType::OCLReserveID:
+    Out << "13ocl_reserveid";
+    break;
   }
 }
 
@@ -2056,11 +2155,26 @@ void CXXNameMangler::mangleType(const FunctionProtoType *T) {
 
   Out << 'E';
 }
+
 void CXXNameMangler::mangleType(const FunctionNoProtoType *T) {
-  llvm_unreachable("Can't mangle K&R function prototypes");
+  // Function types without prototypes can arise when mangling a function type
+  // within an overloadable function in C. We mangle these as the absence of any
+  // parameter types (not even an empty parameter list).
+  Out << 'F';
+
+  FunctionTypeDepthState saved = FunctionTypeDepth.push();
+
+  FunctionTypeDepth.enterResultType();
+  mangleType(T->getReturnType());
+  FunctionTypeDepth.leaveResultType();
+
+  FunctionTypeDepth.pop(saved);
+  Out << 'E';
 }
+
 void CXXNameMangler::mangleBareFunctionType(const FunctionType *T,
-                                            bool MangleReturnType) {
+                                            bool MangleReturnType,
+                                            const FunctionDecl *FD) {
   // We should never be mangling something without a prototype.
   const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
 
@@ -2083,8 +2197,19 @@ void CXXNameMangler::mangleBareFunctionType(const FunctionType *T,
     return;
   }
 
-  for (const auto &Arg : Proto->param_types())
-    mangleType(Context.getASTContext().getSignatureParameterType(Arg));
+  assert(!FD || FD->getNumParams() == Proto->getNumParams());
+  for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) {
+    const auto &ParamTy = Proto->getParamType(I);
+    mangleType(Context.getASTContext().getSignatureParameterType(ParamTy));
+
+    if (FD) {
+      if (auto *Attr = FD->getParamDecl(I)->getAttr<PassObjectSizeAttr>()) {
+        // Attr can only take 1 character, so we can hardcode the length below.
+        assert(Attr->getType() <= 9 && Attr->getType() >= 0);
+        Out << "U17pass_object_size" << Attr->getType();
+      }
+    }
+  }
 
   FunctionTypeDepth.pop(saved);
 
@@ -2325,7 +2450,7 @@ void CXXNameMangler::mangleAArch64NeonVectorType(const VectorType *T) {
     EltName = mangleAArch64VectorBase(cast<BuiltinType>(EltType));
 
   std::string TypeName =
-      ("__" + EltName + "x" + llvm::utostr(T->getNumElements()) + "_t").str();
+      ("__" + EltName + "x" + Twine(T->getNumElements()) + "_t").str();
   Out << TypeName.length() << TypeName;
 }
 
@@ -2392,7 +2517,6 @@ void CXXNameMangler::mangleType(const ObjCObjectType *T) {
       StringRef name = I->getName();
       QualOS << name.size() << name;
     }
-    QualOS.flush();
     Out << 'U' << QualStr.size() << QualStr;
   }
 
@@ -2543,9 +2667,11 @@ void CXXNameMangler::mangleType(const UnaryTransformType *T) {
 void CXXNameMangler::mangleType(const AutoType *T) {
   QualType D = T->getDeducedType();
   // <builtin-type> ::= Da  # dependent auto
-  if (D.isNull())
+  if (D.isNull()) {
+    assert(T->getKeyword() != AutoTypeKeyword::GNUAutoType &&
+           "shouldn't need to mangle __auto_type!");
     Out << (T->isDecltypeAuto() ? "Dc" : "Da");
-  else
+  } else
     mangleType(D);
 }
 
@@ -2699,7 +2825,9 @@ recurse:
   case Expr::ParenListExprClass:
   case Expr::LambdaExprClass:
   case Expr::MSPropertyRefExprClass:
+  case Expr::MSPropertySubscriptExprClass:
   case Expr::TypoExprClass:  // This should no longer exist in the AST by now.
+  case Expr::OMPArraySectionExprClass:
     llvm_unreachable("unexpected statement kind");
 
   // FIXME: invent manglings for all these.
@@ -2908,7 +3036,7 @@ recurse:
                      ME->isArrow(), ME->getQualifier(), nullptr,
                      ME->getMemberName(), Arity);
     if (ME->hasExplicitTemplateArgs())
-      mangleTemplateArgs(ME->getExplicitTemplateArgs());
+      mangleTemplateArgs(ME->getTemplateArgs(), ME->getNumTemplateArgs());
     break;
   }
 
@@ -2920,7 +3048,7 @@ recurse:
                      ME->getFirstQualifierFoundInScope(),
                      ME->getMember(), Arity);
     if (ME->hasExplicitTemplateArgs())
-      mangleTemplateArgs(ME->getExplicitTemplateArgs());
+      mangleTemplateArgs(ME->getTemplateArgs(), ME->getNumTemplateArgs());
     break;
   }
 
@@ -2932,7 +3060,7 @@ recurse:
     // base-unresolved-name, where <template-args> are just tacked
     // onto the end.
     if (ULE->hasExplicitTemplateArgs())
-      mangleTemplateArgs(ULE->getExplicitTemplateArgs());
+      mangleTemplateArgs(ULE->getTemplateArgs(), ULE->getNumTemplateArgs());
     break;
   }
 
@@ -3254,7 +3382,7 @@ recurse:
     // base-unresolved-name, where <template-args> are just tacked
     // onto the end.
     if (DRE->hasExplicitTemplateArgs())
-      mangleTemplateArgs(DRE->getExplicitTemplateArgs());
+      mangleTemplateArgs(DRE->getTemplateArgs(), DRE->getNumTemplateArgs());
     break;
   }
 
@@ -3350,8 +3478,17 @@ recurse:
     break;
       
   case Expr::SizeOfPackExprClass: {
+    auto *SPE = cast<SizeOfPackExpr>(E);
+    if (SPE->isPartiallySubstituted()) {
+      Out << "sP";
+      for (const auto &A : SPE->getPartialArguments())
+        mangleTemplateArg(A);
+      Out << "E";
+      break;
+    }
+
     Out << "sZ";
-    const NamedDecl *Pack = cast<SizeOfPackExpr>(E)->getPack();
+    const NamedDecl *Pack = SPE->getPack();
     if (const TemplateTypeParmDecl *TTP = dyn_cast<TemplateTypeParmDecl>(Pack))
       mangleTemplateParameter(TTP->getIndex());
     else if (const NonTypeTemplateParmDecl *NTTP
@@ -3394,6 +3531,18 @@ recurse:
   case Expr::CXXThisExprClass:
     Out << "fpT";
     break;
+
+  case Expr::CoawaitExprClass:
+    // FIXME: Propose a non-vendor mangling.
+    Out << "v18co_await";
+    mangleExpression(cast<CoawaitExpr>(E)->getOperand());
+    break;
+
+  case Expr::CoyieldExprClass:
+    // FIXME: Propose a non-vendor mangling.
+    Out << "v18co_yield";
+    mangleExpression(cast<CoawaitExpr>(E)->getOperand());
+    break;
   }
 }
 
@@ -3501,12 +3650,12 @@ void CXXNameMangler::mangleCXXDtorType(CXXDtorType T) {
   }
 }
 
-void CXXNameMangler::mangleTemplateArgs(
-                          const ASTTemplateArgumentListInfo &TemplateArgs) {
+void CXXNameMangler::mangleTemplateArgs(const TemplateArgumentLoc *TemplateArgs,
+                                        unsigned NumTemplateArgs) {
   // <template-args> ::= I <template-arg>+ E
   Out << 'I';
-  for (unsigned i = 0, e = TemplateArgs.NumTemplateArgs; i != e; ++i)
-    mangleTemplateArg(TemplateArgs.getTemplateArgs()[i].getArgument());
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+    mangleTemplateArg(TemplateArgs[i].getArgument());
   Out << 'E';
 }
 
@@ -4085,21 +4234,6 @@ void ItaniumMangleContextImpl::mangleTypeName(QualType Ty, raw_ostream &Out) {
   mangleCXXRTTIName(Ty, Out);
 }
 
-void ItaniumMangleContextImpl::mangleCXXVTableBitSet(const CXXRecordDecl *RD,
-                                                     raw_ostream &Out) {
-  if (!RD->isExternallyVisible()) {
-    // This part of the identifier needs to be unique across all translation
-    // units in the linked program. The scheme fails if multiple translation
-    // units are compiled using the same relative source file path, or if
-    // multiple translation units are built from the same source file.
-    SourceManager &SM = getASTContext().getSourceManager();
-    Out << "[" << SM.getFileEntryForID(SM.getMainFileID())->getName() << "]";
-  }
-
-  CXXNameMangler Mangler(*this, Out);
-  Mangler.mangleType(QualType(RD->getTypeForDecl(), 0));
-}
-
 void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, raw_ostream &) {
   llvm_unreachable("Can't mangle string literals");
 }
@@ -4108,4 +4242,3 @@ ItaniumMangleContext *
 ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {
   return new ItaniumMangleContextImpl(Context, Diags);
 }
-
diff --git a/lib/AST/Mangle.cpp b/lib/AST/Mangle.cpp
index 1a061c4f6632..014338f0490f 100644
--- a/lib/AST/Mangle.cpp
+++ b/lib/AST/Mangle.cpp
@@ -206,7 +206,6 @@ void MangleContext::mangleCtorBlock(const CXXConstructorDecl *CD,
   SmallString<64> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
   mangleCXXCtor(CD, CT, Out);
-  Out.flush();
   mangleFunctionBlock(*this, Buffer, BD, ResStream);
 }
 
@@ -216,7 +215,6 @@ void MangleContext::mangleDtorBlock(const CXXDestructorDecl *DD,
   SmallString<64> Buffer;
   llvm::raw_svector_ostream Out(Buffer);
   mangleCXXDtor(DD, DT, Out);
-  Out.flush();
   mangleFunctionBlock(*this, Buffer, BD, ResStream);
 }
 
@@ -253,7 +251,6 @@ void MangleContext::mangleBlock(const DeclContext *DC, const BlockDecl *BD,
       }
     }
   }
-  Stream.flush();
   mangleFunctionBlock(*this, Buffer, BD, Out);
 }
 
diff --git a/lib/AST/MicrosoftCXXABI.cpp b/lib/AST/MicrosoftCXXABI.cpp
index aba6796256a7..6ba31ccf1e37 100644
--- a/lib/AST/MicrosoftCXXABI.cpp
+++ b/lib/AST/MicrosoftCXXABI.cpp
@@ -70,6 +70,11 @@ class MicrosoftCXXABI : public CXXABI {
   llvm::SmallDenseMap<std::pair<const CXXConstructorDecl *, unsigned>, Expr *>
       CtorToDefaultArgExpr;
 
+  llvm::SmallDenseMap<TagDecl *, DeclaratorDecl *>
+      UnnamedTagDeclToDeclaratorDecl;
+  llvm::SmallDenseMap<TagDecl *, TypedefNameDecl *>
+      UnnamedTagDeclToTypedefNameDecl;
+
 public:
   MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { }
 
@@ -84,17 +89,7 @@ public:
   }
 
   bool isNearlyEmpty(const CXXRecordDecl *RD) const override {
-    // FIXME: Audit the corners
-    if (!RD->isDynamicClass())
-      return false;
-
-    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
-
-    // In the Microsoft ABI, classes can have one or two vtable pointers.
-    CharUnits PointerSize =
-        Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
-    return Layout.getNonVirtualSize() == PointerSize ||
-      Layout.getNonVirtualSize() == PointerSize * 2;
+    llvm_unreachable("unapplicable to the MS ABI");
   }
 
   void addDefaultArgExprForConstructor(const CXXConstructorDecl *CD,
@@ -120,6 +115,34 @@ public:
     RecordToCopyCtor[RD] = CD;
   }
 
+  void addTypedefNameForUnnamedTagDecl(TagDecl *TD,
+                                       TypedefNameDecl *DD) override {
+    TD = TD->getCanonicalDecl();
+    DD = cast<TypedefNameDecl>(DD->getCanonicalDecl());
+    TypedefNameDecl *&I = UnnamedTagDeclToTypedefNameDecl[TD];
+    if (!I)
+      I = DD;
+  }
+
+  TypedefNameDecl *getTypedefNameForUnnamedTagDecl(const TagDecl *TD) override {
+    return UnnamedTagDeclToTypedefNameDecl.lookup(
+        const_cast<TagDecl *>(TD->getCanonicalDecl()));
+  }
+
+  void addDeclaratorForUnnamedTagDecl(TagDecl *TD,
+                                      DeclaratorDecl *DD) override {
+    TD = TD->getCanonicalDecl();
+    DD = cast<DeclaratorDecl>(DD->getCanonicalDecl());
+    DeclaratorDecl *&I = UnnamedTagDeclToDeclaratorDecl[TD];
+    if (!I)
+      I = DD;
+  }
+
+  DeclaratorDecl *getDeclaratorForUnnamedTagDecl(const TagDecl *TD) override {
+    return UnnamedTagDeclToDeclaratorDecl.lookup(
+        const_cast<TagDecl *>(TD->getCanonicalDecl()));
+  }
+
   MangleNumberingContext *createMangleNumberingContext() const override {
     return new MicrosoftNumberingContext();
   }
diff --git a/lib/AST/MicrosoftMangle.cpp b/lib/AST/MicrosoftMangle.cpp
index 48a8fa541a69..136a43b640f7 100644
--- a/lib/AST/MicrosoftMangle.cpp
+++ b/lib/AST/MicrosoftMangle.cpp
@@ -28,6 +28,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/JamCRC.h"
 
 using namespace clang;
 
@@ -160,8 +161,6 @@ public:
   void mangleSEHFinallyBlock(const NamedDecl *EnclosingDecl,
                              raw_ostream &Out) override;
   void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) override;
-  void mangleCXXVTableBitSet(const CXXRecordDecl *RD,
-                             raw_ostream &Out) override;
   bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) {
     // Lambda closure types are already numbered.
     if (isLambda(ND))
@@ -179,7 +178,9 @@ public:
 
     // Anonymous tags are already numbered.
     if (const TagDecl *Tag = dyn_cast<TagDecl>(ND)) {
-      if (Tag->getName().empty() && !Tag->getTypedefNameForAnonDecl())
+      if (!Tag->hasNameForLinkage() &&
+          !getASTContext().getDeclaratorForUnnamedTagDecl(Tag) &&
+          !getASTContext().getTypedefNameForUnnamedTagDecl(Tag))
         return false;
     }
 
@@ -223,6 +224,9 @@ class MicrosoftCXXNameMangler {
   typedef llvm::DenseMap<void *, unsigned> ArgBackRefMap;
   ArgBackRefMap TypeBackReferences;
 
+  typedef std::set<int> PassObjectSizeArgsSet;
+  PassObjectSizeArgsSet PassObjectSizeArgs;
+
   ASTContext &getASTContext() const { return Context.getASTContext(); }
 
   // FIXME: If we add support for __ptr32/64 qualifiers, then we should push
@@ -262,6 +266,9 @@ public:
       const CXXMethodDecl *MD,
       const MicrosoftVTableContext::MethodVFTableLocation &ML);
   void mangleNumber(int64_t Number);
+  void mangleTagTypeKind(TagTypeKind TK);
+  void mangleArtificalTagType(TagTypeKind TK, StringRef UnqualifiedName,
+                              ArrayRef<StringRef> NestedNames = None);
   void mangleType(QualType T, SourceRange Range,
                   QualifierMangleMode QMM = QMM_Mangle);
   void mangleFunctionType(const FunctionType *T,
@@ -289,6 +296,7 @@ private:
   void mangleObjCMethodName(const ObjCMethodDecl *MD);
 
   void mangleArgumentType(QualType T, SourceRange Range);
+  void manglePassObjectSizeArg(const PassObjectSizeAttr *POSA);
 
   // Declare manglers for every type class.
 #define ABSTRACT_TYPE(CLASS, PARENT)
@@ -364,7 +372,8 @@ bool MicrosoftMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
         DC = getEffectiveParentContext(DC);
 
     if (DC->isTranslationUnit() && D->getFormalLinkage() == InternalLinkage &&
-        !isa<VarTemplateSpecializationDecl>(D))
+        !isa<VarTemplateSpecializationDecl>(D) &&
+        D->getIdentifier() != nullptr)
       return false;
   }
 
@@ -390,14 +399,8 @@ void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, StringRef Prefix) {
     mangleFunctionEncoding(FD, Context.shouldMangleDeclName(FD));
   else if (const VarDecl *VD = dyn_cast<VarDecl>(D))
     mangleVariableEncoding(VD);
-  else {
-    // TODO: Fields? Can MSVC even mangle them?
-    // Issue a diagnostic for now.
-    DiagnosticsEngine &Diags = Context.getDiags();
-    unsigned DiagID = Diags.getCustomDiagID(
-        DiagnosticsEngine::Error, "cannot mangle this declaration yet");
-    Diags.Report(D->getLocation(), DiagID) << D->getSourceRange();
-  }
+  else
+    llvm_unreachable("Tried to mangle unexpected NamedDecl!");
 }
 
 void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD,
@@ -420,7 +423,7 @@ void MicrosoftCXXNameMangler::mangleFunctionEncoding(const FunctionDecl *FD,
     // We would like to mangle all extern "C" functions using this additional
     // component but this would break compatibility with MSVC's behavior.
     // Instead, do this when we know that compatibility isn't important (in
-    // other words, when it is an overloaded extern "C" funciton).
+    // other words, when it is an overloaded extern "C" function).
     if (FD->isExternC() && FD->hasAttr<OverloadableAttr>())
       Out << "$$J0";
 
@@ -695,8 +698,7 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     // Function templates aren't considered for name back referencing.  This
     // makes sense since function templates aren't likely to occur multiple
     // times in a symbol.
-    // FIXME: Test alias template mangling with MSVC 2013.
-    if (!isa<ClassTemplateDecl>(TD)) {
+    if (isa<FunctionTemplateDecl>(TD)) {
       mangleTemplateInstantiationName(TD, *TemplateArgs);
       Out << '@';
       return;
@@ -721,7 +723,6 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
     llvm::raw_svector_ostream Stream(TemplateMangling);
     MicrosoftCXXNameMangler Extra(Context, Stream);
     Extra.mangleTemplateInstantiationName(TD, *TemplateArgs);
-    Stream.flush();
 
     mangleSourceName(TemplateMangling);
     return;
@@ -787,14 +788,21 @@ void MicrosoftCXXNameMangler::mangleUnqualifiedName(const NamedDecl *ND,
       }
 
       llvm::SmallString<64> Name("<unnamed-type-");
-      if (TD->hasDeclaratorForAnonDecl()) {
-        // Anonymous types with no tag or typedef get the name of their
+      if (DeclaratorDecl *DD =
+              Context.getASTContext().getDeclaratorForUnnamedTagDecl(TD)) {
+        // Anonymous types without a name for linkage purposes have their
         // declarator mangled in if they have one.
-        Name += TD->getDeclaratorForAnonDecl()->getName();
+        Name += DD->getName();
+      } else if (TypedefNameDecl *TND =
+                     Context.getASTContext().getTypedefNameForUnnamedTagDecl(
+                         TD)) {
+        // Anonymous types without a name for linkage purposes have their
+        // associate typedef mangled in if they have one.
+        Name += TND->getName();
       } else {
         // Otherwise, number the types using a $S prefix.
         Name += "$S";
-        Name += llvm::utostr(Context.getAnonymousStructId(TD));
+        Name += llvm::utostr(Context.getAnonymousStructId(TD) + 1);
       }
       Name += ">";
       mangleSourceName(Name.str());
@@ -1039,6 +1047,14 @@ void MicrosoftCXXNameMangler::mangleOperatorName(OverloadedOperatorKind OO,
     break;
   }
 
+  case OO_Coawait: {
+    DiagnosticsEngine &Diags = Context.getDiags();
+    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
+      "cannot mangle this operator co_await yet");
+    Diags.Report(Loc, DiagID);
+    break;
+  }
+
   case OO_None:
   case NUM_OVERLOADED_OPERATORS:
     llvm_unreachable("Not an overloaded operator");
@@ -1071,8 +1087,10 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
   // Templates have their own context for back references.
   ArgBackRefMap OuterArgsContext;
   BackRefVec OuterTemplateContext;
+  PassObjectSizeArgsSet OuterPassObjectSizeArgs;
   NameBackReferences.swap(OuterTemplateContext);
   TypeBackReferences.swap(OuterArgsContext);
+  PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
 
   mangleUnscopedTemplateName(TD);
   mangleTemplateArgs(TD, TemplateArgs);
@@ -1080,6 +1098,7 @@ void MicrosoftCXXNameMangler::mangleTemplateInstantiationName(
   // Restore the previous back reference contexts.
   NameBackReferences.swap(OuterTemplateContext);
   TypeBackReferences.swap(OuterArgsContext);
+  PassObjectSizeArgs.swap(OuterPassObjectSizeArgs);
 }
 
 void
@@ -1121,12 +1140,6 @@ void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
     UE = dyn_cast<CXXUuidofExpr>(E);
 
   if (UE) {
-    // This CXXUuidofExpr is mangled as-if it were actually a VarDecl from
-    // const __s_GUID _GUID_{lower case UUID with underscores}
-    StringRef Uuid = UE->getUuidAsStringRef(Context.getASTContext());
-    std::string Name = "_GUID_" + Uuid.lower();
-    std::replace(Name.begin(), Name.end(), '-', '_');
-
     // If we had to peek through an address-of operator, treat this like we are
     // dealing with a pointer type.  Otherwise, treat it like a const reference.
     //
@@ -1136,7 +1149,22 @@ void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
       Out << "$E?";
     else
       Out << "$1?";
-    Out << Name << "@@3U__s_GUID@@B";
+
+    // This CXXUuidofExpr is mangled as-if it were actually a VarDecl from
+    // const __s_GUID _GUID_{lower case UUID with underscores}
+    StringRef Uuid = UE->getUuidAsStringRef(Context.getASTContext());
+    std::string Name = "_GUID_" + Uuid.lower();
+    std::replace(Name.begin(), Name.end(), '-', '_');
+
+    mangleSourceName(Name);
+    // Terminate the whole name with an '@'.
+    Out << '@';
+    // It's a global variable.
+    Out << '3';
+    // It's a struct called __s_GUID.
+    mangleArtificalTagType(TTK_Struct, "__s_GUID");
+    // It's const.
+    Out << 'B';
     return;
   }
 
@@ -1210,12 +1238,13 @@ void MicrosoftCXXNameMangler::mangleTemplateArg(const TemplateDecl *TD,
     QualType T = TA.getNullPtrType();
     if (const MemberPointerType *MPT = T->getAs<MemberPointerType>()) {
       const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
-      if (MPT->isMemberFunctionPointerType() && isa<ClassTemplateDecl>(TD)) {
+      if (MPT->isMemberFunctionPointerType() &&
+          !isa<FunctionTemplateDecl>(TD)) {
         mangleMemberFunctionPointer(RD, nullptr);
         return;
       }
       if (MPT->isMemberDataPointer()) {
-        if (isa<ClassTemplateDecl>(TD)) {
+        if (!isa<FunctionTemplateDecl>(TD)) {
           mangleMemberDataPointer(RD, nullptr);
           return;
         }
@@ -1455,6 +1484,27 @@ void MicrosoftCXXNameMangler::mangleArgumentType(QualType T,
   }
 }
 
+void MicrosoftCXXNameMangler::manglePassObjectSizeArg(
+    const PassObjectSizeAttr *POSA) {
+  int Type = POSA->getType();
+
+  auto Iter = PassObjectSizeArgs.insert(Type).first;
+  void *TypePtr = (void *)&*Iter;
+  ArgBackRefMap::iterator Found = TypeBackReferences.find(TypePtr);
+
+  if (Found == TypeBackReferences.end()) {
+    mangleArtificalTagType(TTK_Enum, "__pass_object_size" + llvm::utostr(Type),
+                           {"__clang"});
+
+    if (TypeBackReferences.size() < 10) {
+      size_t Size = TypeBackReferences.size();
+      TypeBackReferences[TypePtr] = Size;
+    }
+  } else {
+    Out << Found->second;
+  }
+}
+
 void MicrosoftCXXNameMangler::mangleType(QualType T, SourceRange Range,
                                          QualifierMangleMode QMM) {
   // Don't use the canonical types.  MSVC includes things like 'const' on
@@ -1546,29 +1596,72 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
   //                 ::= _W # wchar_t
   //                 ::= _Z # __float80 (Digital Mars)
   switch (T->getKind()) {
-  case BuiltinType::Void: Out << 'X'; break;
-  case BuiltinType::SChar: Out << 'C'; break;
-  case BuiltinType::Char_U: case BuiltinType::Char_S: Out << 'D'; break;
-  case BuiltinType::UChar: Out << 'E'; break;
-  case BuiltinType::Short: Out << 'F'; break;
-  case BuiltinType::UShort: Out << 'G'; break;
-  case BuiltinType::Int: Out << 'H'; break;
-  case BuiltinType::UInt: Out << 'I'; break;
-  case BuiltinType::Long: Out << 'J'; break;
-  case BuiltinType::ULong: Out << 'K'; break;
-  case BuiltinType::Float: Out << 'M'; break;
-  case BuiltinType::Double: Out << 'N'; break;
+  case BuiltinType::Void:
+    Out << 'X';
+    break;
+  case BuiltinType::SChar:
+    Out << 'C';
+    break;
+  case BuiltinType::Char_U:
+  case BuiltinType::Char_S:
+    Out << 'D';
+    break;
+  case BuiltinType::UChar:
+    Out << 'E';
+    break;
+  case BuiltinType::Short:
+    Out << 'F';
+    break;
+  case BuiltinType::UShort:
+    Out << 'G';
+    break;
+  case BuiltinType::Int:
+    Out << 'H';
+    break;
+  case BuiltinType::UInt:
+    Out << 'I';
+    break;
+  case BuiltinType::Long:
+    Out << 'J';
+    break;
+  case BuiltinType::ULong:
+    Out << 'K';
+    break;
+  case BuiltinType::Float:
+    Out << 'M';
+    break;
+  case BuiltinType::Double:
+    Out << 'N';
+    break;
   // TODO: Determine size and mangle accordingly
-  case BuiltinType::LongDouble: Out << 'O'; break;
-  case BuiltinType::LongLong: Out << "_J"; break;
-  case BuiltinType::ULongLong: Out << "_K"; break;
-  case BuiltinType::Int128: Out << "_L"; break;
-  case BuiltinType::UInt128: Out << "_M"; break;
-  case BuiltinType::Bool: Out << "_N"; break;
-  case BuiltinType::Char16: Out << "_S"; break;
-  case BuiltinType::Char32: Out << "_U"; break;
+  case BuiltinType::LongDouble:
+    Out << 'O';
+    break;
+  case BuiltinType::LongLong:
+    Out << "_J";
+    break;
+  case BuiltinType::ULongLong:
+    Out << "_K";
+    break;
+  case BuiltinType::Int128:
+    Out << "_L";
+    break;
+  case BuiltinType::UInt128:
+    Out << "_M";
+    break;
+  case BuiltinType::Bool:
+    Out << "_N";
+    break;
+  case BuiltinType::Char16:
+    Out << "_S";
+    break;
+  case BuiltinType::Char32:
+    Out << "_U";
+    break;
   case BuiltinType::WChar_S:
-  case BuiltinType::WChar_U: Out << "_W"; break;
+  case BuiltinType::WChar_U:
+    Out << "_W";
+    break;
 
 #define BUILTIN_TYPE(Id, SingletonId)
 #define PLACEHOLDER_TYPE(Id, SingletonId) \
@@ -1577,28 +1670,102 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers,
   case BuiltinType::Dependent:
     llvm_unreachable("placeholder types shouldn't get to name mangling");
 
-  case BuiltinType::ObjCId: Out << "PAUobjc_object@@"; break;
-  case BuiltinType::ObjCClass: Out << "PAUobjc_class@@"; break;
-  case BuiltinType::ObjCSel: Out << "PAUobjc_selector@@"; break;
+  case BuiltinType::ObjCId:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "objc_object");
+    break;
+  case BuiltinType::ObjCClass:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "objc_class");
+    break;
+  case BuiltinType::ObjCSel:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "objc_selector");
+    break;
 
-  case BuiltinType::OCLImage1d: Out << "PAUocl_image1d@@"; break;
-  case BuiltinType::OCLImage1dArray: Out << "PAUocl_image1darray@@"; break;
-  case BuiltinType::OCLImage1dBuffer: Out << "PAUocl_image1dbuffer@@"; break;
-  case BuiltinType::OCLImage2d: Out << "PAUocl_image2d@@"; break;
-  case BuiltinType::OCLImage2dArray: Out << "PAUocl_image2darray@@"; break;
-  case BuiltinType::OCLImage3d: Out << "PAUocl_image3d@@"; break;
-  case BuiltinType::OCLSampler: Out << "PAUocl_sampler@@"; break;
-  case BuiltinType::OCLEvent: Out << "PAUocl_event@@"; break;
+  case BuiltinType::OCLImage1d:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image1d");
+    break;
+  case BuiltinType::OCLImage1dArray:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image1darray");
+    break;
+  case BuiltinType::OCLImage1dBuffer:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image1dbuffer");
+    break;
+  case BuiltinType::OCLImage2d:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2d");
+    break;
+  case BuiltinType::OCLImage2dArray:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2darray");
+    break;
+  case BuiltinType::OCLImage2dDepth:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2ddepth");
+    break;
+  case BuiltinType::OCLImage2dArrayDepth:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2darraydepth");
+    break;
+  case BuiltinType::OCLImage2dMSAA:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2dmsaa");
+    break;
+  case BuiltinType::OCLImage2dArrayMSAA:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2darraymsaa");
+    break;
+  case BuiltinType::OCLImage2dMSAADepth:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2dmsaadepth");
+    break;
+  case BuiltinType::OCLImage2dArrayMSAADepth:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image2darraymsaadepth");
+    break;
+  case BuiltinType::OCLImage3d:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_image3d");
+    break;
+  case BuiltinType::OCLSampler:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_sampler");
+    break;
+  case BuiltinType::OCLEvent:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_event");
+    break;
+  case BuiltinType::OCLClkEvent:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_clkevent");
+    break;
+  case BuiltinType::OCLQueue:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_queue");
+    break;
+  case BuiltinType::OCLNDRange:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_ndrange");
+    break;
+  case BuiltinType::OCLReserveID:
+    Out << "PA";
+    mangleArtificalTagType(TTK_Struct, "ocl_reserveid");
+    break;
 
-  case BuiltinType::NullPtr: Out << "$$T"; break;
+  case BuiltinType::NullPtr:
+    Out << "$$T";
+    break;
 
   case BuiltinType::Half: {
     DiagnosticsEngine &Diags = Context.getDiags();
-    unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-      "cannot mangle this built-in %0 type yet");
+    unsigned DiagID = Diags.getCustomDiagID(
+        DiagnosticsEngine::Error, "cannot mangle this built-in %0 type yet");
     Diags.Report(Range.getBegin(), DiagID)
-      << T->getName(Context.getASTContext().getPrintingPolicy())
-      << Range;
+        << T->getName(Context.getASTContext().getPrintingPolicy()) << Range;
     break;
   }
   }
@@ -1620,7 +1787,8 @@ void MicrosoftCXXNameMangler::mangleType(const FunctionProtoType *T, Qualifiers,
 }
 void MicrosoftCXXNameMangler::mangleType(const FunctionNoProtoType *T,
                                          Qualifiers, SourceRange) {
-  llvm_unreachable("Can't mangle K&R function prototypes");
+  Out << "$$A6";
+  mangleFunctionType(T);
 }
 
 void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
@@ -1628,7 +1796,7 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
                                                  bool ForceThisQuals) {
   // <function-type> ::= <this-cvr-qualifiers> <calling-convention>
   //                     <return-type> <argument-list> <throw-spec>
-  const FunctionProtoType *Proto = cast<FunctionProtoType>(T);
+  const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(T);
 
   SourceRange Range;
   if (D) Range = D->getSourceRange();
@@ -1699,12 +1867,14 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
     }
     Out << '@';
   } else {
-    QualType ResultType = Proto->getReturnType();
+    QualType ResultType = T->getReturnType();
     if (const auto *AT =
             dyn_cast_or_null<AutoType>(ResultType->getContainedAutoType())) {
       Out << '?';
       mangleQualifiers(ResultType.getLocalQualifiers(), /*IsMember=*/false);
       Out << '?';
+      assert(AT->getKeyword() != AutoTypeKeyword::GNUAutoType &&
+             "shouldn't need to mangle __auto_type!");
       mangleSourceName(AT->isDecltypeAuto() ? "<decltype-auto>" : "<auto>");
       Out << '@';
     } else {
@@ -1717,12 +1887,29 @@ void MicrosoftCXXNameMangler::mangleFunctionType(const FunctionType *T,
   // <argument-list> ::= X # void
   //                 ::= <type>+ @
   //                 ::= <type>* Z # varargs
-  if (Proto->getNumParams() == 0 && !Proto->isVariadic()) {
+  if (!Proto) {
+    // Function types without prototypes can arise when mangling a function type
+    // within an overloadable function in C. We mangle these as the absence of
+    // any parameter types (not even an empty parameter list).
+    Out << '@';
+  } else if (Proto->getNumParams() == 0 && !Proto->isVariadic()) {
     Out << 'X';
   } else {
     // Happens for function pointer type arguments for example.
-    for (const QualType &Arg : Proto->param_types())
-      mangleArgumentType(Arg, Range);
+    for (unsigned I = 0, E = Proto->getNumParams(); I != E; ++I) {
+      mangleArgumentType(Proto->getParamType(I), Range);
+      // Mangle each pass_object_size parameter as if it's a paramater of enum
+      // type passed directly after the parameter with the pass_object_size
+      // attribute. The aforementioned enum's name is __pass_object_size, and we
+      // pretend it resides in a top-level namespace called __clang.
+      //
+      // FIXME: Is there a defined extension notation for the MS ABI, or is it
+      // necessary to just cross our fingers and hope this type+namespace
+      // combination doesn't conflict with anything?
+      if (D)
+        if (const auto *P = D->getParamDecl(I)->getAttr<PassObjectSizeAttr>())
+          manglePassObjectSizeArg(P);
+    }
     // <builtin-type>      ::= Z  # ellipsis
     if (Proto->isVariadic())
       Out << 'Z';
@@ -1851,16 +2038,8 @@ void MicrosoftCXXNameMangler::mangleType(const UnresolvedUsingType *T,
 // <struct-type> ::= U <name>
 // <class-type>  ::= V <name>
 // <enum-type>   ::= W4 <name>
-void MicrosoftCXXNameMangler::mangleType(const EnumType *T, Qualifiers,
-                                         SourceRange) {
-  mangleType(cast<TagType>(T)->getDecl());
-}
-void MicrosoftCXXNameMangler::mangleType(const RecordType *T, Qualifiers,
-                                         SourceRange) {
-  mangleType(cast<TagType>(T)->getDecl());
-}
-void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
-  switch (TD->getTagKind()) {
+void MicrosoftCXXNameMangler::mangleTagTypeKind(TagTypeKind TTK) {
+  switch (TTK) {
     case TTK_Union:
       Out << 'T';
       break;
@@ -1875,8 +2054,33 @@ void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
       Out << "W4";
       break;
   }
+}
+void MicrosoftCXXNameMangler::mangleType(const EnumType *T, Qualifiers,
+                                         SourceRange) {
+  mangleType(cast<TagType>(T)->getDecl());
+}
+void MicrosoftCXXNameMangler::mangleType(const RecordType *T, Qualifiers,
+                                         SourceRange) {
+  mangleType(cast<TagType>(T)->getDecl());
+}
+void MicrosoftCXXNameMangler::mangleType(const TagDecl *TD) {
+  mangleTagTypeKind(TD->getTagKind());
   mangleName(TD);
 }
+void MicrosoftCXXNameMangler::mangleArtificalTagType(
+    TagTypeKind TK, StringRef UnqualifiedName, ArrayRef<StringRef> NestedNames) {
+  // <name> ::= <unscoped-name> {[<named-scope>]+ | [<nested-name>]}? @
+  mangleTagTypeKind(TK);
+
+  // Always start with the unqualified name.
+  mangleSourceName(UnqualifiedName);
+
+  for (auto I = NestedNames.rbegin(), E = NestedNames.rend(); I != E; ++I)
+    mangleSourceName(*I);
+
+  // Terminate the whole name with an '@'.
+  Out << '@';
+}
 
 // <type>       ::= <array-type>
 // <array-type> ::= <pointer-cvr-qualifiers> <cvr-qualifiers>
@@ -2029,11 +2233,16 @@ void MicrosoftCXXNameMangler::mangleType(const RValueReferenceType *T,
 
 void MicrosoftCXXNameMangler::mangleType(const ComplexType *T, Qualifiers,
                                          SourceRange Range) {
-  DiagnosticsEngine &Diags = Context.getDiags();
-  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-    "cannot mangle this complex number type yet");
-  Diags.Report(Range.getBegin(), DiagID)
-    << Range;
+  QualType ElementType = T->getElementType();
+
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+  Stream << "?$";
+  Extra.mangleSourceName("_Complex");
+  Extra.mangleType(ElementType, Range, QMM_Escape);
+
+  mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__clang"});
 }
 
 void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals,
@@ -2043,46 +2252,44 @@ void MicrosoftCXXNameMangler::mangleType(const VectorType *T, Qualifiers Quals,
   uint64_t Width = getASTContext().getTypeSize(T);
   // Pattern match exactly the typedefs in our intrinsic headers.  Anything that
   // doesn't match the Intel types uses a custom mangling below.
-  bool IsBuiltin = true;
+  size_t OutSizeBefore = Out.tell();
   llvm::Triple::ArchType AT =
       getASTContext().getTargetInfo().getTriple().getArch();
   if (AT == llvm::Triple::x86 || AT == llvm::Triple::x86_64) {
     if (Width == 64 && ET->getKind() == BuiltinType::LongLong) {
-      Out << "T__m64";
+      mangleArtificalTagType(TTK_Union, "__m64");
     } else if (Width >= 128) {
       if (ET->getKind() == BuiltinType::Float)
-        Out << "T__m" << Width;
+        mangleArtificalTagType(TTK_Union, "__m" + llvm::utostr(Width));
       else if (ET->getKind() == BuiltinType::LongLong)
-        Out << "T__m" << Width << 'i';
+        mangleArtificalTagType(TTK_Union, "__m" + llvm::utostr(Width) + 'i');
       else if (ET->getKind() == BuiltinType::Double)
-        Out << "U__m" << Width << 'd';
-      else
-        IsBuiltin = false;
-    } else {
-      IsBuiltin = false;
+        mangleArtificalTagType(TTK_Struct, "__m" + llvm::utostr(Width) + 'd');
     }
-  } else {
-    IsBuiltin = false;
   }
 
+  bool IsBuiltin = Out.tell() != OutSizeBefore;
   if (!IsBuiltin) {
     // The MS ABI doesn't have a special mangling for vector types, so we define
     // our own mangling to handle uses of __vector_size__ on user-specified
     // types, and for extensions like __v4sf.
-    Out << "T__clang_vec" << T->getNumElements() << '_';
-    mangleType(ET, Quals, Range);
-  }
 
-  Out << "@@";
+    llvm::SmallString<64> TemplateMangling;
+    llvm::raw_svector_ostream Stream(TemplateMangling);
+    MicrosoftCXXNameMangler Extra(Context, Stream);
+    Stream << "?$";
+    Extra.mangleSourceName("__vector");
+    Extra.mangleType(QualType(ET, 0), Range, QMM_Escape);
+    Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumElements()),
+                               /*IsBoolean=*/false);
+
+    mangleArtificalTagType(TTK_Union, TemplateMangling, {"__clang"});
+  }
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T, Qualifiers,
-                                         SourceRange Range) {
-  DiagnosticsEngine &Diags = Context.getDiags();
-  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-    "cannot mangle this extended vector type yet");
-  Diags.Report(Range.getBegin(), DiagID)
-    << Range;
+void MicrosoftCXXNameMangler::mangleType(const ExtVectorType *T,
+                                         Qualifiers Quals, SourceRange Range) {
+  mangleType(static_cast<const VectorType *>(T), Quals, Range);
 }
 void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
                                          Qualifiers, SourceRange Range) {
@@ -2096,7 +2303,7 @@ void MicrosoftCXXNameMangler::mangleType(const DependentSizedExtVectorType *T,
 void MicrosoftCXXNameMangler::mangleType(const ObjCInterfaceType *T, Qualifiers,
                                          SourceRange) {
   // ObjC interfaces have structs underlying them.
-  Out << 'U';
+  mangleTagTypeKind(TTK_Struct);
   mangleName(T->getDecl());
 }
 
@@ -2209,11 +2416,16 @@ void MicrosoftCXXNameMangler::mangleType(const AutoType *T, Qualifiers,
 
 void MicrosoftCXXNameMangler::mangleType(const AtomicType *T, Qualifiers,
                                          SourceRange Range) {
-  DiagnosticsEngine &Diags = Context.getDiags();
-  unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Error,
-    "cannot mangle this C11 atomic type yet");
-  Diags.Report(Range.getBegin(), DiagID)
-    << Range;
+  QualType ValueType = T->getValueType();
+
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+  Stream << "?$";
+  Extra.mangleSourceName("_Atomic");
+  Extra.mangleType(ValueType, Range, QMM_Escape);
+
+  mangleArtificalTagType(TTK_Struct, TemplateMangling, {"__clang"});
 }
 
 void MicrosoftMangleContextImpl::mangleCXXName(const NamedDecl *D,
@@ -2574,12 +2786,12 @@ void MicrosoftMangleContextImpl::mangleCXXDtor(const CXXDestructorDecl *D,
   mangler.mangle(D);
 }
 
-void MicrosoftMangleContextImpl::mangleReferenceTemporary(const VarDecl *VD,
-                                                          unsigned,
-                                                          raw_ostream &) {
-  unsigned DiagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error,
-    "cannot mangle this reference temporary yet");
-  getDiags().Report(VD->getLocation(), DiagID);
+void MicrosoftMangleContextImpl::mangleReferenceTemporary(
+    const VarDecl *VD, unsigned ManglingNumber, raw_ostream &Out) {
+  MicrosoftCXXNameMangler Mangler(*this, Out);
+
+  Mangler.getStream() << "\01?$RT" << ManglingNumber << '@';
+  Mangler.mangle(VD, "");
 }
 
 void MicrosoftMangleContextImpl::mangleThreadSafeStaticGuardVariable(
@@ -2688,28 +2900,6 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
   // N.B. The length is in terms of bytes, not characters.
   Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth());
 
-  // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the
-  // properties of our CRC:
-  //   Width  : 32
-  //   Poly   : 04C11DB7
-  //   Init   : FFFFFFFF
-  //   RefIn  : True
-  //   RefOut : True
-  //   XorOut : 00000000
-  //   Check  : 340BC6D9
-  uint32_t CRC = 0xFFFFFFFFU;
-
-  auto UpdateCRC = [&CRC](char Byte) {
-    for (unsigned i = 0; i < 8; ++i) {
-      bool Bit = CRC & 0x80000000U;
-      if (Byte & (1U << i))
-        Bit = !Bit;
-      CRC <<= 1;
-      if (Bit)
-        CRC ^= 0x04C11DB7U;
-    }
-  };
-
   auto GetLittleEndianByte = [&Mangler, &SL](unsigned Index) {
     unsigned CharByteWidth = SL->getCharByteWidth();
     uint32_t CodeUnit = SL->getCodeUnit(Index / CharByteWidth);
@@ -2725,22 +2915,19 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
   };
 
   // CRC all the bytes of the StringLiteral.
+  llvm::JamCRC JC;
   for (unsigned I = 0, E = SL->getByteLength(); I != E; ++I)
-    UpdateCRC(GetLittleEndianByte(I));
+    JC.update(GetLittleEndianByte(I));
 
   // The NUL terminator byte(s) were not present earlier,
   // we need to manually process those bytes into the CRC.
   for (unsigned NullTerminator = 0; NullTerminator < SL->getCharByteWidth();
        ++NullTerminator)
-    UpdateCRC('\x00');
-
-  // The literature refers to the process of reversing the bits in the final CRC
-  // output as "reflection".
-  CRC = llvm::reverseBits(CRC);
+    JC.update('\x00');
 
   // <encoded-crc>: The CRC is encoded utilizing the standard number mangling
   // scheme.
-  Mangler.mangleNumber(CRC);
+  Mangler.mangleNumber(JC.getCRC());
 
   // <encoded-string>: The mangled name also contains the first 32 _characters_
   // (including null-terminator bytes) of the StringLiteral.
@@ -2790,21 +2977,6 @@ void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral *SL,
   Mangler.getStream() << '@';
 }
 
-void MicrosoftMangleContextImpl::mangleCXXVTableBitSet(const CXXRecordDecl *RD,
-                                                       raw_ostream &Out) {
-  if (!RD->isExternallyVisible()) {
-    // This part of the identifier needs to be unique across all translation
-    // units in the linked program. The scheme fails if multiple translation
-    // units are compiled using the same relative source file path, or if
-    // multiple translation units are built from the same source file.
-    SourceManager &SM = getASTContext().getSourceManager();
-    Out << "[" << SM.getFileEntryForID(SM.getMainFileID())->getName() << "]";
-  }
-
-  MicrosoftCXXNameMangler mangler(*this, Out);
-  mangler.mangleName(RD);
-}
-
 MicrosoftMangleContext *
 MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine &Diags) {
   return new MicrosoftMangleContextImpl(Context, Diags);
diff --git a/lib/AST/NSAPI.cpp b/lib/AST/NSAPI.cpp
index c9264d59aae3..c562dae63231 100644
--- a/lib/AST/NSAPI.cpp
+++ b/lib/AST/NSAPI.cpp
@@ -450,9 +450,19 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
   case BuiltinType::OCLImage1dBuffer:
   case BuiltinType::OCLImage2d:
   case BuiltinType::OCLImage2dArray:
+  case BuiltinType::OCLImage2dDepth:
+  case BuiltinType::OCLImage2dArrayDepth:
+  case BuiltinType::OCLImage2dMSAA:
+  case BuiltinType::OCLImage2dArrayMSAA:
+  case BuiltinType::OCLImage2dMSAADepth:
+  case BuiltinType::OCLImage2dArrayMSAADepth:
   case BuiltinType::OCLImage3d:
   case BuiltinType::OCLSampler:
   case BuiltinType::OCLEvent:
+  case BuiltinType::OCLClkEvent:
+  case BuiltinType::OCLQueue:
+  case BuiltinType::OCLNDRange:
+  case BuiltinType::OCLReserveID:
   case BuiltinType::BoundMember:
   case BuiltinType::Dependent:
   case BuiltinType::Overload:
@@ -461,6 +471,7 @@ NSAPI::getNSNumberFactoryMethodKind(QualType T) const {
   case BuiltinType::Half:
   case BuiltinType::PseudoObject:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::OMPArraySection:
     break;
   }
   
diff --git a/lib/AST/NestedNameSpecifier.cpp b/lib/AST/NestedNameSpecifier.cpp
index 97425d001de0..d2370c88b9c5 100644
--- a/lib/AST/NestedNameSpecifier.cpp
+++ b/lib/AST/NestedNameSpecifier.cpp
@@ -318,7 +318,12 @@ NestedNameSpecifier::print(raw_ostream &OS,
   OS << "::";
 }
 
-void NestedNameSpecifier::dump(const LangOptions &LO) {
+void NestedNameSpecifier::dump(const LangOptions &LO) const {
+  print(llvm::errs(), PrintingPolicy(LO));
+}
+
+void NestedNameSpecifier::dump() const {
+  LangOptions LO;
   print(llvm::errs(), PrintingPolicy(LO));
 }
 
diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp
new file mode 100644
index 000000000000..cd60d3727ba1
--- /dev/null
+++ b/lib/AST/OpenMPClause.cpp
@@ -0,0 +1,465 @@
+//===--- OpenMPClause.cpp - Classes for OpenMP clauses --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the subclesses of Stmt class declared in OpenMPClause.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/OpenMPClause.h"
+
+#include "clang/AST/ASTContext.h"
+
+using namespace clang;
+
+OMPClause::child_range OMPClause::children() {
+  switch (getClauseKind()) {
+  default:
+    break;
+#define OPENMP_CLAUSE(Name, Class)                                             \
+  case OMPC_##Name:                                                            \
+    return static_cast<Class *>(this)->children();
+#include "clang/Basic/OpenMPKinds.def"
+  }
+  llvm_unreachable("unknown OMPClause");
+}
+
+void OMPPrivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
+  assert(VL.size() == varlist_size() &&
+         "Number of private copies is not the same as the preallocated buffer");
+  std::copy(VL.begin(), VL.end(), varlist_end());
+}
+
+OMPPrivateClause *
+OMPPrivateClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                         SourceLocation LParenLoc, SourceLocation EndLoc,
+                         ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL) {
+  // Allocate space for private variables and initializer expressions.
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPPrivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         2 * sizeof(Expr *) * VL.size());
+  OMPPrivateClause *Clause =
+      new (Mem) OMPPrivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setPrivateCopies(PrivateVL);
+  return Clause;
+}
+
+OMPPrivateClause *OMPPrivateClause::CreateEmpty(const ASTContext &C,
+                                                unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPPrivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         2 * sizeof(Expr *) * N);
+  return new (Mem) OMPPrivateClause(N);
+}
+
+void OMPFirstprivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
+  assert(VL.size() == varlist_size() &&
+         "Number of private copies is not the same as the preallocated buffer");
+  std::copy(VL.begin(), VL.end(), varlist_end());
+}
+
+void OMPFirstprivateClause::setInits(ArrayRef<Expr *> VL) {
+  assert(VL.size() == varlist_size() &&
+         "Number of inits is not the same as the preallocated buffer");
+  std::copy(VL.begin(), VL.end(), getPrivateCopies().end());
+}
+
+OMPFirstprivateClause *
+OMPFirstprivateClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                              SourceLocation LParenLoc, SourceLocation EndLoc,
+                              ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL,
+                              ArrayRef<Expr *> InitVL) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFirstprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         3 * sizeof(Expr *) * VL.size());
+  OMPFirstprivateClause *Clause =
+      new (Mem) OMPFirstprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setPrivateCopies(PrivateVL);
+  Clause->setInits(InitVL);
+  return Clause;
+}
+
+OMPFirstprivateClause *OMPFirstprivateClause::CreateEmpty(const ASTContext &C,
+                                                          unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFirstprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         3 * sizeof(Expr *) * N);
+  return new (Mem) OMPFirstprivateClause(N);
+}
+
+void OMPLastprivateClause::setPrivateCopies(ArrayRef<Expr *> PrivateCopies) {
+  assert(PrivateCopies.size() == varlist_size() &&
+         "Number of private copies is not the same as the preallocated buffer");
+  std::copy(PrivateCopies.begin(), PrivateCopies.end(), varlist_end());
+}
+
+void OMPLastprivateClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
+  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
+                                              "not the same as the "
+                                              "preallocated buffer");
+  std::copy(SrcExprs.begin(), SrcExprs.end(), getPrivateCopies().end());
+}
+
+void OMPLastprivateClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
+  assert(DstExprs.size() == varlist_size() && "Number of destination "
+                                              "expressions is not the same as "
+                                              "the preallocated buffer");
+  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
+}
+
+void OMPLastprivateClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
+  assert(AssignmentOps.size() == varlist_size() &&
+         "Number of assignment expressions is not the same as the preallocated "
+         "buffer");
+  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
+            getDestinationExprs().end());
+}
+
+OMPLastprivateClause *OMPLastprivateClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
+    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLastprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         5 * sizeof(Expr *) * VL.size());
+  OMPLastprivateClause *Clause =
+      new (Mem) OMPLastprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setSourceExprs(SrcExprs);
+  Clause->setDestinationExprs(DstExprs);
+  Clause->setAssignmentOps(AssignmentOps);
+  return Clause;
+}
+
+OMPLastprivateClause *OMPLastprivateClause::CreateEmpty(const ASTContext &C,
+                                                        unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLastprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         5 * sizeof(Expr *) * N);
+  return new (Mem) OMPLastprivateClause(N);
+}
+
+OMPSharedClause *OMPSharedClause::Create(const ASTContext &C,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc,
+                                         ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPSharedClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * VL.size());
+  OMPSharedClause *Clause =
+      new (Mem) OMPSharedClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  return Clause;
+}
+
+OMPSharedClause *OMPSharedClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPSharedClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * N);
+  return new (Mem) OMPSharedClause(N);
+}
+
+void OMPLinearClause::setPrivates(ArrayRef<Expr *> PL) {
+  assert(PL.size() == varlist_size() &&
+         "Number of privates is not the same as the preallocated buffer");
+  std::copy(PL.begin(), PL.end(), varlist_end());
+}
+
+void OMPLinearClause::setInits(ArrayRef<Expr *> IL) {
+  assert(IL.size() == varlist_size() &&
+         "Number of inits is not the same as the preallocated buffer");
+  std::copy(IL.begin(), IL.end(), getPrivates().end());
+}
+
+void OMPLinearClause::setUpdates(ArrayRef<Expr *> UL) {
+  assert(UL.size() == varlist_size() &&
+         "Number of updates is not the same as the preallocated buffer");
+  std::copy(UL.begin(), UL.end(), getInits().end());
+}
+
+void OMPLinearClause::setFinals(ArrayRef<Expr *> FL) {
+  assert(FL.size() == varlist_size() &&
+         "Number of final updates is not the same as the preallocated buffer");
+  std::copy(FL.begin(), FL.end(), getUpdates().end());
+}
+
+OMPLinearClause *OMPLinearClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc,
+    SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef<Expr *> VL,
+    ArrayRef<Expr *> PL, ArrayRef<Expr *> IL, Expr *Step, Expr *CalcStep) {
+  // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions
+  // (Step and CalcStep).
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLinearClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         (5 * VL.size() + 2) * sizeof(Expr *));
+  OMPLinearClause *Clause = new (Mem) OMPLinearClause(
+      StartLoc, LParenLoc, Modifier, ModifierLoc, ColonLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setPrivates(PL);
+  Clause->setInits(IL);
+  // Fill update and final expressions with zeroes, they are provided later,
+  // after the directive construction.
+  std::fill(Clause->getInits().end(), Clause->getInits().end() + VL.size(),
+            nullptr);
+  std::fill(Clause->getUpdates().end(), Clause->getUpdates().end() + VL.size(),
+            nullptr);
+  Clause->setStep(Step);
+  Clause->setCalcStep(CalcStep);
+  return Clause;
+}
+
+OMPLinearClause *OMPLinearClause::CreateEmpty(const ASTContext &C,
+                                              unsigned NumVars) {
+  // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions
+  // (Step and CalcStep).
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLinearClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         (5 * NumVars + 2) * sizeof(Expr *));
+  return new (Mem) OMPLinearClause(NumVars);
+}
+
+OMPAlignedClause *
+OMPAlignedClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                         SourceLocation LParenLoc, SourceLocation ColonLoc,
+                         SourceLocation EndLoc, ArrayRef<Expr *> VL, Expr *A) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPAlignedClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * (VL.size() + 1));
+  OMPAlignedClause *Clause = new (Mem)
+      OMPAlignedClause(StartLoc, LParenLoc, ColonLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setAlignment(A);
+  return Clause;
+}
+
+OMPAlignedClause *OMPAlignedClause::CreateEmpty(const ASTContext &C,
+                                                unsigned NumVars) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPAlignedClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * (NumVars + 1));
+  return new (Mem) OMPAlignedClause(NumVars);
+}
+
+void OMPCopyinClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
+  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
+                                              "not the same as the "
+                                              "preallocated buffer");
+  std::copy(SrcExprs.begin(), SrcExprs.end(), varlist_end());
+}
+
+void OMPCopyinClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
+  assert(DstExprs.size() == varlist_size() && "Number of destination "
+                                              "expressions is not the same as "
+                                              "the preallocated buffer");
+  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
+}
+
+void OMPCopyinClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
+  assert(AssignmentOps.size() == varlist_size() &&
+         "Number of assignment expressions is not the same as the preallocated "
+         "buffer");
+  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
+            getDestinationExprs().end());
+}
+
+OMPCopyinClause *OMPCopyinClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
+    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyinClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         4 * sizeof(Expr *) * VL.size());
+  OMPCopyinClause *Clause =
+      new (Mem) OMPCopyinClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setSourceExprs(SrcExprs);
+  Clause->setDestinationExprs(DstExprs);
+  Clause->setAssignmentOps(AssignmentOps);
+  return Clause;
+}
+
+OMPCopyinClause *OMPCopyinClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyinClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         4 * sizeof(Expr *) * N);
+  return new (Mem) OMPCopyinClause(N);
+}
+
+void OMPCopyprivateClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
+  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
+                                              "not the same as the "
+                                              "preallocated buffer");
+  std::copy(SrcExprs.begin(), SrcExprs.end(), varlist_end());
+}
+
+void OMPCopyprivateClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
+  assert(DstExprs.size() == varlist_size() && "Number of destination "
+                                              "expressions is not the same as "
+                                              "the preallocated buffer");
+  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
+}
+
+void OMPCopyprivateClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
+  assert(AssignmentOps.size() == varlist_size() &&
+         "Number of assignment expressions is not the same as the preallocated "
+         "buffer");
+  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
+            getDestinationExprs().end());
+}
+
+OMPCopyprivateClause *OMPCopyprivateClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
+    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         4 * sizeof(Expr *) * VL.size());
+  OMPCopyprivateClause *Clause =
+      new (Mem) OMPCopyprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setSourceExprs(SrcExprs);
+  Clause->setDestinationExprs(DstExprs);
+  Clause->setAssignmentOps(AssignmentOps);
+  return Clause;
+}
+
+OMPCopyprivateClause *OMPCopyprivateClause::CreateEmpty(const ASTContext &C,
+                                                        unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyprivateClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         4 * sizeof(Expr *) * N);
+  return new (Mem) OMPCopyprivateClause(N);
+}
+
+void OMPReductionClause::setPrivates(ArrayRef<Expr *> Privates) {
+  assert(Privates.size() == varlist_size() &&
+         "Number of private copies is not the same as the preallocated buffer");
+  std::copy(Privates.begin(), Privates.end(), varlist_end());
+}
+
+void OMPReductionClause::setLHSExprs(ArrayRef<Expr *> LHSExprs) {
+  assert(
+      LHSExprs.size() == varlist_size() &&
+      "Number of LHS expressions is not the same as the preallocated buffer");
+  std::copy(LHSExprs.begin(), LHSExprs.end(), getPrivates().end());
+}
+
+void OMPReductionClause::setRHSExprs(ArrayRef<Expr *> RHSExprs) {
+  assert(
+      RHSExprs.size() == varlist_size() &&
+      "Number of RHS expressions is not the same as the preallocated buffer");
+  std::copy(RHSExprs.begin(), RHSExprs.end(), getLHSExprs().end());
+}
+
+void OMPReductionClause::setReductionOps(ArrayRef<Expr *> ReductionOps) {
+  assert(ReductionOps.size() == varlist_size() && "Number of reduction "
+                                                  "expressions is not the same "
+                                                  "as the preallocated buffer");
+  std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
+}
+
+OMPReductionClause *OMPReductionClause::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
+    SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL,
+    NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
+    ArrayRef<Expr *> Privates, ArrayRef<Expr *> LHSExprs,
+    ArrayRef<Expr *> RHSExprs, ArrayRef<Expr *> ReductionOps) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         5 * sizeof(Expr *) * VL.size());
+  OMPReductionClause *Clause = new (Mem) OMPReductionClause(
+      StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
+  Clause->setVarRefs(VL);
+  Clause->setPrivates(Privates);
+  Clause->setLHSExprs(LHSExprs);
+  Clause->setRHSExprs(RHSExprs);
+  Clause->setReductionOps(ReductionOps);
+  return Clause;
+}
+
+OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C,
+                                                    unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         5 * sizeof(Expr *) * N);
+  return new (Mem) OMPReductionClause(N);
+}
+
+OMPFlushClause *OMPFlushClause::Create(const ASTContext &C,
+                                       SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc,
+                                       ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFlushClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * VL.size());
+  OMPFlushClause *Clause =
+      new (Mem) OMPFlushClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  return Clause;
+}
+
+OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFlushClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * N);
+  return new (Mem) OMPFlushClause(N);
+}
+
+OMPDependClause *
+OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                        SourceLocation LParenLoc, SourceLocation EndLoc,
+                        OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
+                        SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * VL.size());
+  OMPDependClause *Clause =
+      new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setDependencyKind(DepKind);
+  Clause->setDependencyLoc(DepLoc);
+  Clause->setColonLoc(ColonLoc);
+  return Clause;
+}
+
+OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
+                                                  llvm::alignOf<Expr *>()) +
+                         sizeof(Expr *) * N);
+  return new (Mem) OMPDependClause(N);
+}
+
+OMPMapClause *OMPMapClause::Create(const ASTContext &C, SourceLocation StartLoc,
+                                   SourceLocation LParenLoc,
+                                   SourceLocation EndLoc, ArrayRef<Expr *> VL,
+                                   OpenMPMapClauseKind TypeModifier,
+                                   OpenMPMapClauseKind Type,
+                                   SourceLocation TypeLoc) {
+  void *Mem = C.Allocate(
+      llvm::RoundUpToAlignment(sizeof(OMPMapClause), llvm::alignOf<Expr *>()) +
+      sizeof(Expr *) * VL.size());
+  OMPMapClause *Clause = new (Mem) OMPMapClause(
+      TypeModifier, Type, TypeLoc, StartLoc, LParenLoc, EndLoc, VL.size());
+  Clause->setVarRefs(VL);
+  Clause->setMapTypeModifier(TypeModifier);
+  Clause->setMapType(Type);
+  Clause->setMapLoc(TypeLoc);
+  return Clause;
+}
+
+OMPMapClause *OMPMapClause::CreateEmpty(const ASTContext &C, unsigned N) {
+  void *Mem = C.Allocate(
+      llvm::RoundUpToAlignment(sizeof(OMPMapClause), llvm::alignOf<Expr *>()) +
+      sizeof(Expr *) * N);
+  return new (Mem) OMPMapClause(N);
+}
diff --git a/lib/AST/RawCommentList.cpp b/lib/AST/RawCommentList.cpp
index 24b129a5c532..8317f76b8569 100644
--- a/lib/AST/RawCommentList.cpp
+++ b/lib/AST/RawCommentList.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/CommentLexer.h"
 #include "clang/AST/CommentParser.h"
 #include "clang/AST/CommentSema.h"
+#include "clang/Basic/CharInfo.h"
 #include "llvm/ADT/STLExtras.h"
 
 using namespace clang;
@@ -62,12 +63,53 @@ std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
 bool mergedCommentIsTrailingComment(StringRef Comment) {
   return (Comment.size() > 3) && (Comment[3] == '<');
 }
+
+/// Returns true if R1 and R2 both have valid locations that start on the same
+/// column.
+bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
+                               const RawComment &R2) {
+  SourceLocation L1 = R1.getLocStart();
+  SourceLocation L2 = R2.getLocStart();
+  bool Invalid = false;
+  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
+  if (!Invalid) {
+    unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
+    return !Invalid && (C1 == C2);
+  }
+  return false;
+}
 } // unnamed namespace
 
+/// \brief Determines whether there is only whitespace in `Buffer` between `P`
+/// and the previous line.
+/// \param Buffer The buffer to search in.
+/// \param P The offset from the beginning of `Buffer` to start from.
+/// \return true if all of the characters in `Buffer` ranging from the closest
+/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
+/// are whitespace.
+static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
+  // Search backwards until we see linefeed or carriage return.
+  for (unsigned I = P; I != 0; --I) {
+    char C = Buffer[I - 1];
+    if (isVerticalWhitespace(C))
+      return true;
+    if (!isHorizontalWhitespace(C))
+      return false;
+  }
+  // We hit the beginning of the buffer.
+  return true;
+}
+
+/// Returns whether `K` is an ordinary comment kind.
+static bool isOrdinaryKind(RawComment::CommentKind K) {
+  return (K == RawComment::RCK_OrdinaryBCPL) ||
+         (K == RawComment::RCK_OrdinaryC);
+}
+
 RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
                        bool Merged, bool ParseAllComments) :
     Range(SR), RawTextValid(false), BriefTextValid(false),
-    IsAttached(false), IsAlmostTrailingComment(false),
+    IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
     ParseAllComments(ParseAllComments) {
   // Extract raw comment text, if possible.
   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
@@ -75,17 +117,34 @@ RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
     return;
   }
 
+  // Guess comment kind.
+  std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
+
+  // Guess whether an ordinary comment is trailing.
+  if (ParseAllComments && isOrdinaryKind(K.first)) {
+    FileID BeginFileID;
+    unsigned BeginOffset;
+    std::tie(BeginFileID, BeginOffset) =
+        SourceMgr.getDecomposedLoc(Range.getBegin());
+    if (BeginOffset != 0) {
+      bool Invalid = false;
+      const char *Buffer =
+          SourceMgr.getBufferData(BeginFileID, &Invalid).data();
+      IsTrailingComment |=
+          (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
+    }
+  }
+
   if (!Merged) {
-    // Guess comment kind.
-    std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
     Kind = K.first;
-    IsTrailingComment = K.second;
+    IsTrailingComment |= K.second;
 
     IsAlmostTrailingComment = RawText.startswith("//<") ||
                                  RawText.startswith("/*<");
   } else {
     Kind = RCK_Merged;
-    IsTrailingComment = mergedCommentIsTrailingComment(RawText);
+    IsTrailingComment =
+        IsTrailingComment || mergedCommentIsTrailingComment(RawText);
   }
 }
 
@@ -239,9 +298,22 @@ void RawCommentList::addComment(const RawComment &RC,
   const RawComment &C2 = RC;
 
   // Merge comments only if there is only whitespace between them.
-  // Can't merge trailing and non-trailing comments.
+  // Can't merge trailing and non-trailing comments unless the second is
+  // non-trailing ordinary in the same column, as in the case:
+  //   int x; // documents x
+  //          // more text
+  // versus:
+  //   int x; // documents x
+  //   int y; // documents y
+  // or:
+  //   int x; // documents x
+  //   // documents y
+  //   int y;
   // Merge comments if they are on same or consecutive lines.
-  if (C1.isTrailingComment() == C2.isTrailingComment() &&
+  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
+       (C1.isTrailingComment() && !C2.isTrailingComment() &&
+        isOrdinaryKind(C2.getKind()) &&
+        commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
                             /*MaxNewlinesAllowed=*/1)) {
     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
diff --git a/lib/AST/RecordLayoutBuilder.cpp b/lib/AST/RecordLayoutBuilder.cpp
index de7bcb826ebb..bc3c2a831c47 100644
--- a/lib/AST/RecordLayoutBuilder.cpp
+++ b/lib/AST/RecordLayoutBuilder.cpp
@@ -18,7 +18,6 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 
@@ -565,7 +564,7 @@ void EmptySubobjectMap::UpdateEmptyFieldSubobjects(const FieldDecl *FD,
 
 typedef llvm::SmallPtrSet<const CXXRecordDecl*, 4> ClassSetTy;
 
-class RecordLayoutBuilder {
+class ItaniumRecordLayoutBuilder {
 protected:
   // FIXME: Remove this and make the appropriate fields public.
   friend class clang::ASTContext;
@@ -656,19 +655,18 @@ protected:
   /// Valid if UseExternalLayout is true.
   ExternalLayout External;
 
-  RecordLayoutBuilder(const ASTContext &Context,
-                      EmptySubobjectMap *EmptySubobjects)
-    : Context(Context), EmptySubobjects(EmptySubobjects), Size(0), 
-      Alignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()),
-      UseExternalLayout(false), InferAlignment(false),
-      Packed(false), IsUnion(false), IsMac68kAlign(false), IsMsStruct(false),
-      UnfilledBitsInLastUnit(0), LastBitfieldTypeSize(0),
-      MaxFieldAlignment(CharUnits::Zero()), 
-      DataSize(0), NonVirtualSize(CharUnits::Zero()), 
-      NonVirtualAlignment(CharUnits::One()), 
-      PrimaryBase(nullptr), PrimaryBaseIsVirtual(false),
-      HasOwnVFPtr(false),
-      FirstNearlyEmptyVBase(nullptr) {}
+  ItaniumRecordLayoutBuilder(const ASTContext &Context,
+                             EmptySubobjectMap *EmptySubobjects)
+      : Context(Context), EmptySubobjects(EmptySubobjects), Size(0),
+        Alignment(CharUnits::One()), UnpackedAlignment(CharUnits::One()),
+        UseExternalLayout(false), InferAlignment(false), Packed(false),
+        IsUnion(false), IsMac68kAlign(false), IsMsStruct(false),
+        UnfilledBitsInLastUnit(0), LastBitfieldTypeSize(0),
+        MaxFieldAlignment(CharUnits::Zero()), DataSize(0),
+        NonVirtualSize(CharUnits::Zero()),
+        NonVirtualAlignment(CharUnits::One()), PrimaryBase(nullptr),
+        PrimaryBaseIsVirtual(false), HasOwnVFPtr(false),
+        FirstNearlyEmptyVBase(nullptr) {}
 
   void Layout(const RecordDecl *D);
   void Layout(const CXXRecordDecl *D);
@@ -782,13 +780,12 @@ protected:
   void setDataSize(CharUnits NewSize) { DataSize = Context.toBits(NewSize); }
   void setDataSize(uint64_t NewSize) { DataSize = NewSize; }
 
-  RecordLayoutBuilder(const RecordLayoutBuilder &) = delete;
-  void operator=(const RecordLayoutBuilder &) = delete;
+  ItaniumRecordLayoutBuilder(const ItaniumRecordLayoutBuilder &) = delete;
+  void operator=(const ItaniumRecordLayoutBuilder &) = delete;
 };
 } // end anonymous namespace
 
-void
-RecordLayoutBuilder::SelectPrimaryVBase(const CXXRecordDecl *RD) {
+void ItaniumRecordLayoutBuilder::SelectPrimaryVBase(const CXXRecordDecl *RD) {
   for (const auto &I : RD->bases()) {
     assert(!I.getType()->isDependentType() &&
            "Cannot layout class with dependent bases.");
@@ -817,7 +814,7 @@ RecordLayoutBuilder::SelectPrimaryVBase(const CXXRecordDecl *RD) {
 }
 
 /// DeterminePrimaryBase - Determine the primary base of the given class.
-void RecordLayoutBuilder::DeterminePrimaryBase(const CXXRecordDecl *RD) {
+void ItaniumRecordLayoutBuilder::DeterminePrimaryBase(const CXXRecordDecl *RD) {
   // If the class isn't dynamic, it won't have a primary base.
   if (!RD->isDynamicClass())
     return;
@@ -864,10 +861,8 @@ void RecordLayoutBuilder::DeterminePrimaryBase(const CXXRecordDecl *RD) {
   assert(!PrimaryBase && "Should not get here with a primary base!");
 }
 
-BaseSubobjectInfo *
-RecordLayoutBuilder::ComputeBaseSubobjectInfo(const CXXRecordDecl *RD, 
-                                              bool IsVirtual,
-                                              BaseSubobjectInfo *Derived) {
+BaseSubobjectInfo *ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo(
+    const CXXRecordDecl *RD, bool IsVirtual, BaseSubobjectInfo *Derived) {
   BaseSubobjectInfo *Info;
   
   if (IsVirtual) {
@@ -943,7 +938,8 @@ RecordLayoutBuilder::ComputeBaseSubobjectInfo(const CXXRecordDecl *RD,
   return Info;
 }
 
-void RecordLayoutBuilder::ComputeBaseSubobjectInfo(const CXXRecordDecl *RD) {
+void ItaniumRecordLayoutBuilder::ComputeBaseSubobjectInfo(
+    const CXXRecordDecl *RD) {
   for (const auto &I : RD->bases()) {
     bool IsVirtual = I.isVirtual();
 
@@ -966,8 +962,8 @@ void RecordLayoutBuilder::ComputeBaseSubobjectInfo(const CXXRecordDecl *RD) {
   }
 }
 
-void
-RecordLayoutBuilder::EnsureVTablePointerAlignment(CharUnits UnpackedBaseAlign) {
+void ItaniumRecordLayoutBuilder::EnsureVTablePointerAlignment(
+    CharUnits UnpackedBaseAlign) {
   CharUnits BaseAlign = (Packed) ? CharUnits::One() : UnpackedBaseAlign;
 
   // The maximum field alignment overrides base align.
@@ -984,8 +980,8 @@ RecordLayoutBuilder::EnsureVTablePointerAlignment(CharUnits UnpackedBaseAlign) {
   UpdateAlignment(BaseAlign, UnpackedBaseAlign);
 }
 
-void
-RecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD) {
+void ItaniumRecordLayoutBuilder::LayoutNonVirtualBases(
+    const CXXRecordDecl *RD) {
   // Then, determine the primary base class.
   DeterminePrimaryBase(RD);
 
@@ -1054,7 +1050,8 @@ RecordLayoutBuilder::LayoutNonVirtualBases(const CXXRecordDecl *RD) {
   }
 }
 
-void RecordLayoutBuilder::LayoutNonVirtualBase(const BaseSubobjectInfo *Base) {
+void ItaniumRecordLayoutBuilder::LayoutNonVirtualBase(
+    const BaseSubobjectInfo *Base) {
   // Layout the base.
   CharUnits Offset = LayoutBase(Base);
 
@@ -1065,9 +1062,8 @@ void RecordLayoutBuilder::LayoutNonVirtualBase(const BaseSubobjectInfo *Base) {
   AddPrimaryVirtualBaseOffsets(Base, Offset);
 }
 
-void
-RecordLayoutBuilder::AddPrimaryVirtualBaseOffsets(const BaseSubobjectInfo *Info, 
-                                                  CharUnits Offset) {
+void ItaniumRecordLayoutBuilder::AddPrimaryVirtualBaseOffsets(
+    const BaseSubobjectInfo *Info, CharUnits Offset) {
   // This base isn't interesting, it has no virtual bases.
   if (!Info->Class->getNumVBases())
     return;
@@ -1099,9 +1095,8 @@ RecordLayoutBuilder::AddPrimaryVirtualBaseOffsets(const BaseSubobjectInfo *Info,
   }
 }
 
-void
-RecordLayoutBuilder::LayoutVirtualBases(const CXXRecordDecl *RD,
-                                        const CXXRecordDecl *MostDerivedClass) {
+void ItaniumRecordLayoutBuilder::LayoutVirtualBases(
+    const CXXRecordDecl *RD, const CXXRecordDecl *MostDerivedClass) {
   const CXXRecordDecl *PrimaryBase;
   bool PrimaryBaseIsVirtual;
 
@@ -1146,7 +1141,8 @@ RecordLayoutBuilder::LayoutVirtualBases(const CXXRecordDecl *RD,
   }
 }
 
-void RecordLayoutBuilder::LayoutVirtualBase(const BaseSubobjectInfo *Base) {
+void ItaniumRecordLayoutBuilder::LayoutVirtualBase(
+    const BaseSubobjectInfo *Base) {
   assert(!Base->Derived && "Trying to lay out a primary virtual base!");
   
   // Layout the base.
@@ -1160,7 +1156,8 @@ void RecordLayoutBuilder::LayoutVirtualBase(const BaseSubobjectInfo *Base) {
   AddPrimaryVirtualBaseOffsets(Base, Offset);
 }
 
-CharUnits RecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) {
+CharUnits
+ItaniumRecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) {
   const ASTRecordLayout &Layout = Context.getASTRecordLayout(Base->Class);
 
   
@@ -1229,7 +1226,7 @@ CharUnits RecordLayoutBuilder::LayoutBase(const BaseSubobjectInfo *Base) {
   return Offset;
 }
 
-void RecordLayoutBuilder::InitializeLayout(const Decl *D) {
+void ItaniumRecordLayoutBuilder::InitializeLayout(const Decl *D) {
   if (const RecordDecl *RD = dyn_cast<RecordDecl>(D)) {
     IsUnion = RD->isUnion();
     IsMsStruct = RD->isMsStruct(Context);
@@ -1277,7 +1274,7 @@ void RecordLayoutBuilder::InitializeLayout(const Decl *D) {
     }
 }
 
-void RecordLayoutBuilder::Layout(const RecordDecl *D) {
+void ItaniumRecordLayoutBuilder::Layout(const RecordDecl *D) {
   InitializeLayout(D);
   LayoutFields(D);
 
@@ -1286,7 +1283,7 @@ void RecordLayoutBuilder::Layout(const RecordDecl *D) {
   FinishLayout(D);
 }
 
-void RecordLayoutBuilder::Layout(const CXXRecordDecl *RD) {
+void ItaniumRecordLayoutBuilder::Layout(const CXXRecordDecl *RD) {
   InitializeLayout(RD);
 
   // Lay out the vtable and the non-virtual bases.
@@ -1326,7 +1323,7 @@ void RecordLayoutBuilder::Layout(const CXXRecordDecl *RD) {
 #endif
 }
 
-void RecordLayoutBuilder::Layout(const ObjCInterfaceDecl *D) {
+void ItaniumRecordLayoutBuilder::Layout(const ObjCInterfaceDecl *D) {
   if (ObjCInterfaceDecl *SD = D->getSuperClass()) {
     const ASTRecordLayout &SL = Context.getASTObjCInterfaceLayout(SD);
 
@@ -1349,7 +1346,7 @@ void RecordLayoutBuilder::Layout(const ObjCInterfaceDecl *D) {
   FinishLayout(D);
 }
 
-void RecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
+void ItaniumRecordLayoutBuilder::LayoutFields(const RecordDecl *D) {
   // Layout each field, for now, just sequentially, respecting alignment.  In
   // the future, this will need to be tweakable by targets.
   bool InsertExtraPadding = D->mayInsertExtraPadding(/*EmitRemark=*/true);
@@ -1370,10 +1367,10 @@ roundUpSizeToCharAlignment(uint64_t Size,
   return llvm::RoundUpToAlignment(Size, CharAlignment);
 }
 
-void RecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
-                                             uint64_t TypeSize,
-                                             bool FieldPacked,
-                                             const FieldDecl *D) {
+void ItaniumRecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
+                                                    uint64_t TypeSize,
+                                                    bool FieldPacked,
+                                                    const FieldDecl *D) {
   assert(Context.getLangOpts().CPlusPlus &&
          "Can only have wide bit-fields in C++!");
 
@@ -1437,7 +1434,7 @@ void RecordLayoutBuilder::LayoutWideBitField(uint64_t FieldSize,
   UpdateAlignment(TypeAlign);
 }
 
-void RecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
+void ItaniumRecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
   bool FieldPacked = Packed || D->hasAttr<PackedAttr>();
   uint64_t FieldSize = D->getBitWidthValue(Context);
   TypeInfo FieldInfo = Context.getTypeInfo(D->getType());
@@ -1568,6 +1565,12 @@ void RecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
     UnpackedFieldAlign = std::min(UnpackedFieldAlign, MaxFieldAlignmentInBits);
   }
 
+  // But, ms_struct just ignores all of that in unions, even explicit
+  // alignment attributes.
+  if (IsMsStruct && IsUnion) {
+    FieldAlign = UnpackedFieldAlign = 1;
+  }
+
   // For purposes of diagnostics, we're going to simultaneously
   // compute the field offsets that we would have used if we weren't
   // adding any alignment padding or if the field weren't packed.
@@ -1634,9 +1637,20 @@ void RecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
 
   // For unions, this is just a max operation, as usual.
   if (IsUnion) {
-    uint64_t RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize,
-                                                           Context);
+    // For ms_struct, allocate the entire storage unit --- unless this
+    // is a zero-width bitfield, in which case just use a size of 1.
+    uint64_t RoundedFieldSize;
+    if (IsMsStruct) {
+      RoundedFieldSize =
+        (FieldSize ? TypeSize : Context.getTargetInfo().getCharWidth());
+
+    // Otherwise, allocate just the number of bytes required to store
+    // the bitfield.
+    } else {
+      RoundedFieldSize = roundUpSizeToCharAlignment(FieldSize, Context);
+    }
     setDataSize(std::max(getDataSizeInBits(), RoundedFieldSize));
+
   // For non-zero-width bitfields in ms_struct structs, allocate a new
   // storage unit if necessary.
   } else if (IsMsStruct && FieldSize) {
@@ -1672,8 +1686,8 @@ void RecordLayoutBuilder::LayoutBitField(const FieldDecl *D) {
                   Context.toCharUnitsFromBits(UnpackedFieldAlign));
 }
 
-void RecordLayoutBuilder::LayoutField(const FieldDecl *D,
-                                      bool InsertExtraPadding) {
+void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
+                                             bool InsertExtraPadding) {
   if (D->isBitField()) {
     LayoutBitField(D);
     return;
@@ -1800,7 +1814,7 @@ void RecordLayoutBuilder::LayoutField(const FieldDecl *D,
   UpdateAlignment(FieldAlign, UnpackedFieldAlign);
 }
 
-void RecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
+void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
   // In C++, records cannot be of size 0.
   if (Context.getLangOpts().CPlusPlus && getSizeInBits() == 0) {
     if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D)) {
@@ -1852,7 +1866,7 @@ void RecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
       Diag(RD->getLocation(), diag::warn_padded_struct_size)
           << Context.getTypeDeclType(RD)
           << PadSize
-          << (InBits ? 1 : 0) /*(byte|bit)*/ << (PadSize > 1); // plural or not
+          << (InBits ? 1 : 0); // (byte|bit)
     }
 
     // Warn if we packed it unnecessarily. If the alignment is 1 byte don't
@@ -1864,8 +1878,8 @@ void RecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
   }
 }
 
-void RecordLayoutBuilder::UpdateAlignment(CharUnits NewAlignment,
-                                          CharUnits UnpackedNewAlignment) {
+void ItaniumRecordLayoutBuilder::UpdateAlignment(
+    CharUnits NewAlignment, CharUnits UnpackedNewAlignment) {
   // The alignment is not modified when using 'mac68k' alignment or when
   // we have an externally-supplied layout that also provides overall alignment.
   if (IsMac68kAlign || (UseExternalLayout && !InferAlignment))
@@ -1885,8 +1899,8 @@ void RecordLayoutBuilder::UpdateAlignment(CharUnits NewAlignment,
 }
 
 uint64_t
-RecordLayoutBuilder::updateExternalFieldOffset(const FieldDecl *Field, 
-                                               uint64_t ComputedOffset) {
+ItaniumRecordLayoutBuilder::updateExternalFieldOffset(const FieldDecl *Field,
+                                                      uint64_t ComputedOffset) {
   uint64_t ExternalFieldOffset = External.getExternalFieldOffset(Field);
 
   if (InferAlignment && ExternalFieldOffset < ComputedOffset) {
@@ -1914,12 +1928,9 @@ static unsigned getPaddingDiagFromTagKind(TagTypeKind Tag) {
   }
 }
 
-void RecordLayoutBuilder::CheckFieldPadding(uint64_t Offset,
-                                            uint64_t UnpaddedOffset,
-                                            uint64_t UnpackedOffset,
-                                            unsigned UnpackedAlign,
-                                            bool isPacked,
-                                            const FieldDecl *D) {
+void ItaniumRecordLayoutBuilder::CheckFieldPadding(
+    uint64_t Offset, uint64_t UnpaddedOffset, uint64_t UnpackedOffset,
+    unsigned UnpackedAlign, bool isPacked, const FieldDecl *D) {
   // We let objc ivars without warning, objc interfaces generally are not used
   // for padding tricks.
   if (isa<ObjCIvarDecl>(D))
@@ -1945,14 +1956,14 @@ void RecordLayoutBuilder::CheckFieldPadding(uint64_t Offset,
           << getPaddingDiagFromTagKind(D->getParent()->getTagKind())
           << Context.getTypeDeclType(D->getParent())
           << PadSize
-          << (InBits ? 1 : 0) /*(byte|bit)*/ << (PadSize > 1) // plural or not
+          << (InBits ? 1 : 0) // (byte|bit)
           << D->getIdentifier();
     else
       Diag(D->getLocation(), diag::warn_padded_struct_anon_field)
           << getPaddingDiagFromTagKind(D->getParent()->getTagKind())
           << Context.getTypeDeclType(D->getParent())
           << PadSize
-          << (InBits ? 1 : 0) /*(byte|bit)*/ << (PadSize > 1); // plural or not
+          << (InBits ? 1 : 0); // (byte|bit)
   }
 
   // Warn if we packed it unnecessarily. If the alignment is 1 byte don't
@@ -2014,6 +2025,21 @@ static const CXXMethodDecl *computeKeyFunction(ASTContext &Context,
         continue;
     }
 
+    if (Context.getLangOpts().CUDA) {
+      // While compiler may see key method in this TU, during CUDA
+      // compilation we should ignore methods that are not accessible
+      // on this side of compilation.
+      if (Context.getLangOpts().CUDAIsDevice) {
+        // In device mode ignore methods without __device__ attribute.
+        if (!MD->hasAttr<CUDADeviceAttr>())
+          continue;
+      } else {
+        // In host mode ignore __device__-only methods.
+        if (!MD->hasAttr<CUDAHostAttr>() && MD->hasAttr<CUDADeviceAttr>())
+          continue;
+      }
+    }
+
     // If the key function is dllimport but the class isn't, then the class has
     // no key function. The DLL that exports the key function won't export the
     // vtable in this case.
@@ -2027,8 +2053,8 @@ static const CXXMethodDecl *computeKeyFunction(ASTContext &Context,
   return nullptr;
 }
 
-DiagnosticBuilder
-RecordLayoutBuilder::Diag(SourceLocation Loc, unsigned DiagID) {
+DiagnosticBuilder ItaniumRecordLayoutBuilder::Diag(SourceLocation Loc,
+                                                   unsigned DiagID) {
   return Context.getDiagnostics().Report(Loc, DiagID);
 }
 
@@ -2074,8 +2100,8 @@ static bool mustSkipTailPadding(TargetCXXABI ABI, const CXXRecordDecl *RD) {
   llvm_unreachable("bad tail-padding use kind");
 }
 
-static bool isMsLayout(const RecordDecl* D) {
-  return D->getASTContext().getTargetInfo().getCXXABI().isMicrosoft();
+static bool isMsLayout(const ASTContext &Context) {
+  return Context.getTargetInfo().getCXXABI().isMicrosoft();
 }
 
 // This section contains an implementation of struct layout that is, up to the
@@ -2656,13 +2682,20 @@ void MicrosoftRecordLayoutBuilder::injectVFPtr(const CXXRecordDecl *RD) {
   // alignment.
   CharUnits Offset = PointerInfo.Size.RoundUpToAlignment(
       std::max(RequiredAlignment, Alignment));
-  // Increase the size of the object and push back all fields, the vbptr and all
-  // bases by the offset amount.
-  Size += Offset;
-  for (uint64_t &FieldOffset : FieldOffsets)
-    FieldOffset += Context.toBits(Offset);
+  // Push back the vbptr, but increase the size of the object and push back
+  // regular fields by the offset only if not using external record layout.
   if (HasVBPtr)
     VBPtrOffset += Offset;
+
+  if (UseExternalLayout)
+    return;
+
+  Size += Offset;
+
+  // If we're using an external layout, the fields offsets have already
+  // accounted for this adjustment.
+  for (uint64_t &FieldOffset : FieldOffsets)
+    FieldOffset += Context.toBits(Offset);
   for (BaseOffsetsMapTy::value_type &Base : Bases)
     Base.second += Offset;
 }
@@ -2840,32 +2873,6 @@ void MicrosoftRecordLayoutBuilder::computeVtorDispSet(
   }
 }
 
-/// \brief Get or compute information about the layout of the specified record
-/// (struct/union/class), which indicates its size and field position
-/// information.
-const ASTRecordLayout *
-ASTContext::BuildMicrosoftASTRecordLayout(const RecordDecl *D) const {
-  MicrosoftRecordLayoutBuilder Builder(*this);
-  if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D)) {
-    Builder.cxxLayout(RD);
-    return new (*this) ASTRecordLayout(
-        *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
-        Builder.HasOwnVFPtr,
-        Builder.HasOwnVFPtr || Builder.PrimaryBase,
-        Builder.VBPtrOffset, Builder.NonVirtualSize, Builder.FieldOffsets.data(),
-        Builder.FieldOffsets.size(), Builder.NonVirtualSize,
-        Builder.Alignment, CharUnits::Zero(), Builder.PrimaryBase,
-        false, Builder.SharedVBPtrBase,
-        Builder.EndsWithZeroSizedObject, Builder.LeadsWithZeroSizedBase,
-        Builder.Bases, Builder.VBases);
-  } else {
-    Builder.layout(D);
-    return new (*this) ASTRecordLayout(
-        *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
-        Builder.Size, Builder.FieldOffsets.data(), Builder.FieldOffsets.size());
-  }
-}
-
 /// getASTRecordLayout - Get or compute information about the layout of the
 /// specified record (struct/union/class), which indicates its size and field
 /// position information.
@@ -2892,54 +2899,63 @@ ASTContext::getASTRecordLayout(const RecordDecl *D) const {
 
   const ASTRecordLayout *NewEntry = nullptr;
 
-  if (isMsLayout(D)) {
-    NewEntry = BuildMicrosoftASTRecordLayout(D);
-  } else if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D)) {
-    EmptySubobjectMap EmptySubobjects(*this, RD);
-    RecordLayoutBuilder Builder(*this, &EmptySubobjects);
-    Builder.Layout(RD);
-
-    // In certain situations, we are allowed to lay out objects in the
-    // tail-padding of base classes.  This is ABI-dependent.
-    // FIXME: this should be stored in the record layout.
-    bool skipTailPadding =
-      mustSkipTailPadding(getTargetInfo().getCXXABI(), cast<CXXRecordDecl>(D));
-
-    // FIXME: This should be done in FinalizeLayout.
-    CharUnits DataSize =
-      skipTailPadding ? Builder.getSize() : Builder.getDataSize();
-    CharUnits NonVirtualSize = 
-      skipTailPadding ? DataSize : Builder.NonVirtualSize;
-    NewEntry =
-      new (*this) ASTRecordLayout(*this, Builder.getSize(), 
-                                  Builder.Alignment,
-                                  /*RequiredAlignment : used by MS-ABI)*/
-                                  Builder.Alignment,
-                                  Builder.HasOwnVFPtr,
-                                  RD->isDynamicClass(),
-                                  CharUnits::fromQuantity(-1),
-                                  DataSize, 
-                                  Builder.FieldOffsets.data(),
-                                  Builder.FieldOffsets.size(),
-                                  NonVirtualSize,
-                                  Builder.NonVirtualAlignment,
-                                  EmptySubobjects.SizeOfLargestEmptySubobject,
-                                  Builder.PrimaryBase,
-                                  Builder.PrimaryBaseIsVirtual,
-                                  nullptr, false, false,
-                                  Builder.Bases, Builder.VBases);
+  if (isMsLayout(*this)) {
+    MicrosoftRecordLayoutBuilder Builder(*this);
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
+      Builder.cxxLayout(RD);
+      NewEntry = new (*this) ASTRecordLayout(
+          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          Builder.HasOwnVFPtr, Builder.HasOwnVFPtr || Builder.PrimaryBase,
+          Builder.VBPtrOffset, Builder.NonVirtualSize,
+          Builder.FieldOffsets.data(), Builder.FieldOffsets.size(),
+          Builder.NonVirtualSize, Builder.Alignment, CharUnits::Zero(),
+          Builder.PrimaryBase, false, Builder.SharedVBPtrBase,
+          Builder.EndsWithZeroSizedObject, Builder.LeadsWithZeroSizedBase,
+          Builder.Bases, Builder.VBases);
+    } else {
+      Builder.layout(D);
+      NewEntry = new (*this) ASTRecordLayout(
+          *this, Builder.Size, Builder.Alignment, Builder.RequiredAlignment,
+          Builder.Size, Builder.FieldOffsets.data(),
+          Builder.FieldOffsets.size());
+    }
   } else {
-    RecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr);
-    Builder.Layout(D);
+    if (const auto *RD = dyn_cast<CXXRecordDecl>(D)) {
+      EmptySubobjectMap EmptySubobjects(*this, RD);
+      ItaniumRecordLayoutBuilder Builder(*this, &EmptySubobjects);
+      Builder.Layout(RD);
 
-    NewEntry =
-      new (*this) ASTRecordLayout(*this, Builder.getSize(), 
-                                  Builder.Alignment,
-                                  /*RequiredAlignment : used by MS-ABI)*/
-                                  Builder.Alignment,
-                                  Builder.getSize(),
-                                  Builder.FieldOffsets.data(),
-                                  Builder.FieldOffsets.size());
+      // In certain situations, we are allowed to lay out objects in the
+      // tail-padding of base classes.  This is ABI-dependent.
+      // FIXME: this should be stored in the record layout.
+      bool skipTailPadding =
+          mustSkipTailPadding(getTargetInfo().getCXXABI(), RD);
+
+      // FIXME: This should be done in FinalizeLayout.
+      CharUnits DataSize =
+          skipTailPadding ? Builder.getSize() : Builder.getDataSize();
+      CharUnits NonVirtualSize =
+          skipTailPadding ? DataSize : Builder.NonVirtualSize;
+      NewEntry = new (*this) ASTRecordLayout(
+          *this, Builder.getSize(), Builder.Alignment,
+          /*RequiredAlignment : used by MS-ABI)*/
+          Builder.Alignment, Builder.HasOwnVFPtr, RD->isDynamicClass(),
+          CharUnits::fromQuantity(-1), DataSize, Builder.FieldOffsets.data(),
+          Builder.FieldOffsets.size(), NonVirtualSize,
+          Builder.NonVirtualAlignment,
+          EmptySubobjects.SizeOfLargestEmptySubobject, Builder.PrimaryBase,
+          Builder.PrimaryBaseIsVirtual, nullptr, false, false, Builder.Bases,
+          Builder.VBases);
+    } else {
+      ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr);
+      Builder.Layout(D);
+
+      NewEntry = new (*this) ASTRecordLayout(
+          *this, Builder.getSize(), Builder.Alignment,
+          /*RequiredAlignment : used by MS-ABI)*/
+          Builder.Alignment, Builder.getSize(), Builder.FieldOffsets.data(),
+          Builder.FieldOffsets.size());
+    }
   }
 
   ASTRecordLayouts[D] = NewEntry;
@@ -3049,7 +3065,7 @@ ASTContext::getObjCLayout(const ObjCInterfaceDecl *D,
       return getObjCLayout(D, nullptr);
   }
 
-  RecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr);
+  ItaniumRecordLayoutBuilder Builder(*this, /*EmptySubobjects=*/nullptr);
   Builder.Layout(D);
 
   const ASTRecordLayout *NewEntry =
@@ -3068,148 +3084,193 @@ ASTContext::getObjCLayout(const ObjCInterfaceDecl *D,
 
 static void PrintOffset(raw_ostream &OS,
                         CharUnits Offset, unsigned IndentLevel) {
-  OS << llvm::format("%4" PRId64 " | ", (int64_t)Offset.getQuantity());
+  OS << llvm::format("%10" PRId64 " | ", (int64_t)Offset.getQuantity());
+  OS.indent(IndentLevel * 2);
+}
+
+static void PrintBitFieldOffset(raw_ostream &OS, CharUnits Offset,
+                                unsigned Begin, unsigned Width,
+                                unsigned IndentLevel) {
+  llvm::SmallString<10> Buffer;
+  {
+    llvm::raw_svector_ostream BufferOS(Buffer);
+    BufferOS << Offset.getQuantity() << ':';
+    if (Width == 0) {
+      BufferOS << '-';
+    } else {
+      BufferOS << Begin << '-' << (Begin + Width - 1);
+    }
+  }
+  
+  OS << llvm::right_justify(Buffer, 10) << " | ";
   OS.indent(IndentLevel * 2);
 }
 
 static void PrintIndentNoOffset(raw_ostream &OS, unsigned IndentLevel) {
-  OS << "     | ";
+  OS << "           | ";
   OS.indent(IndentLevel * 2);
 }
 
-static void DumpCXXRecordLayout(raw_ostream &OS,
-                                const CXXRecordDecl *RD, const ASTContext &C,
-                                CharUnits Offset,
-                                unsigned IndentLevel,
-                                const char* Description,
-                                bool IncludeVirtualBases) {
+static void DumpRecordLayout(raw_ostream &OS, const RecordDecl *RD,
+                             const ASTContext &C,
+                             CharUnits Offset,
+                             unsigned IndentLevel,
+                             const char* Description,
+                             bool PrintSizeInfo,
+                             bool IncludeVirtualBases) {
   const ASTRecordLayout &Layout = C.getASTRecordLayout(RD);
+  auto CXXRD = dyn_cast<CXXRecordDecl>(RD);
 
   PrintOffset(OS, Offset, IndentLevel);
-  OS << C.getTypeDeclType(const_cast<CXXRecordDecl *>(RD)).getAsString();
+  OS << C.getTypeDeclType(const_cast<RecordDecl*>(RD)).getAsString();
   if (Description)
     OS << ' ' << Description;
-  if (RD->isEmpty())
+  if (CXXRD && CXXRD->isEmpty())
     OS << " (empty)";
   OS << '\n';
 
   IndentLevel++;
 
-  const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase();
-  bool HasOwnVFPtr = Layout.hasOwnVFPtr();
-  bool HasOwnVBPtr = Layout.hasOwnVBPtr();
+  // Dump bases.
+  if (CXXRD) {
+    const CXXRecordDecl *PrimaryBase = Layout.getPrimaryBase();
+    bool HasOwnVFPtr = Layout.hasOwnVFPtr();
+    bool HasOwnVBPtr = Layout.hasOwnVBPtr();
 
-  // Vtable pointer.
-  if (RD->isDynamicClass() && !PrimaryBase && !isMsLayout(RD)) {
-    PrintOffset(OS, Offset, IndentLevel);
-    OS << '(' << *RD << " vtable pointer)\n";
-  } else if (HasOwnVFPtr) {
-    PrintOffset(OS, Offset, IndentLevel);
-    // vfptr (for Microsoft C++ ABI)
-    OS << '(' << *RD << " vftable pointer)\n";
-  }
+    // Vtable pointer.
+    if (CXXRD->isDynamicClass() && !PrimaryBase && !isMsLayout(C)) {
+      PrintOffset(OS, Offset, IndentLevel);
+      OS << '(' << *RD << " vtable pointer)\n";
+    } else if (HasOwnVFPtr) {
+      PrintOffset(OS, Offset, IndentLevel);
+      // vfptr (for Microsoft C++ ABI)
+      OS << '(' << *RD << " vftable pointer)\n";
+    }
 
-  // Collect nvbases.
-  SmallVector<const CXXRecordDecl *, 4> Bases;
-  for (const CXXBaseSpecifier &Base : RD->bases()) {
-    assert(!Base.getType()->isDependentType() &&
-           "Cannot layout class with dependent bases.");
-    if (!Base.isVirtual())
-      Bases.push_back(Base.getType()->getAsCXXRecordDecl());
-  }
+    // Collect nvbases.
+    SmallVector<const CXXRecordDecl *, 4> Bases;
+    for (const CXXBaseSpecifier &Base : CXXRD->bases()) {
+      assert(!Base.getType()->isDependentType() &&
+             "Cannot layout class with dependent bases.");
+      if (!Base.isVirtual())
+        Bases.push_back(Base.getType()->getAsCXXRecordDecl());
+    }
 
-  // Sort nvbases by offset.
-  std::stable_sort(Bases.begin(), Bases.end(),
-                   [&](const CXXRecordDecl *L, const CXXRecordDecl *R) {
-    return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R);
-  });
+    // Sort nvbases by offset.
+    std::stable_sort(Bases.begin(), Bases.end(),
+                     [&](const CXXRecordDecl *L, const CXXRecordDecl *R) {
+      return Layout.getBaseClassOffset(L) < Layout.getBaseClassOffset(R);
+    });
 
-  // Dump (non-virtual) bases
-  for (const CXXRecordDecl *Base : Bases) {
-    CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base);
-    DumpCXXRecordLayout(OS, Base, C, BaseOffset, IndentLevel,
-                        Base == PrimaryBase ? "(primary base)" : "(base)",
-                        /*IncludeVirtualBases=*/false);
-  }
+    // Dump (non-virtual) bases
+    for (const CXXRecordDecl *Base : Bases) {
+      CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(Base);
+      DumpRecordLayout(OS, Base, C, BaseOffset, IndentLevel,
+                       Base == PrimaryBase ? "(primary base)" : "(base)",
+                       /*PrintSizeInfo=*/false,
+                       /*IncludeVirtualBases=*/false);
+    }
 
-  // vbptr (for Microsoft C++ ABI)
-  if (HasOwnVBPtr) {
-    PrintOffset(OS, Offset + Layout.getVBPtrOffset(), IndentLevel);
-    OS << '(' << *RD << " vbtable pointer)\n";
+    // vbptr (for Microsoft C++ ABI)
+    if (HasOwnVBPtr) {
+      PrintOffset(OS, Offset + Layout.getVBPtrOffset(), IndentLevel);
+      OS << '(' << *RD << " vbtable pointer)\n";
+    }
   }
 
   // Dump fields.
   uint64_t FieldNo = 0;
-  for (CXXRecordDecl::field_iterator I = RD->field_begin(),
+  for (RecordDecl::field_iterator I = RD->field_begin(),
          E = RD->field_end(); I != E; ++I, ++FieldNo) {
     const FieldDecl &Field = **I;
-    CharUnits FieldOffset = Offset + 
-      C.toCharUnitsFromBits(Layout.getFieldOffset(FieldNo));
+    uint64_t LocalFieldOffsetInBits = Layout.getFieldOffset(FieldNo);
+    CharUnits FieldOffset =
+      Offset + C.toCharUnitsFromBits(LocalFieldOffsetInBits);
 
-    if (const CXXRecordDecl *D = Field.getType()->getAsCXXRecordDecl()) {
-      DumpCXXRecordLayout(OS, D, C, FieldOffset, IndentLevel,
-                          Field.getName().data(),
-                          /*IncludeVirtualBases=*/true);
+    // Recursively dump fields of record type.
+    if (auto RT = Field.getType()->getAs<RecordType>()) {
+      DumpRecordLayout(OS, RT->getDecl(), C, FieldOffset, IndentLevel,
+                       Field.getName().data(),
+                       /*PrintSizeInfo=*/false,
+                       /*IncludeVirtualBases=*/true);
       continue;
     }
 
-    PrintOffset(OS, FieldOffset, IndentLevel);
+    if (Field.isBitField()) {
+      uint64_t LocalFieldByteOffsetInBits = C.toBits(FieldOffset - Offset);
+      unsigned Begin = LocalFieldOffsetInBits - LocalFieldByteOffsetInBits;
+      unsigned Width = Field.getBitWidthValue(C);
+      PrintBitFieldOffset(OS, FieldOffset, Begin, Width, IndentLevel);
+    } else {
+      PrintOffset(OS, FieldOffset, IndentLevel);
+    }
     OS << Field.getType().getAsString() << ' ' << Field << '\n';
   }
 
-  if (!IncludeVirtualBases)
-    return;
-
   // Dump virtual bases.
-  const ASTRecordLayout::VBaseOffsetsMapTy &vtordisps = 
-    Layout.getVBaseOffsetsMap();
-  for (const CXXBaseSpecifier &Base : RD->vbases()) {
-    assert(Base.isVirtual() && "Found non-virtual class!");
-    const CXXRecordDecl *VBase = Base.getType()->getAsCXXRecordDecl();
+  if (CXXRD && IncludeVirtualBases) {
+    const ASTRecordLayout::VBaseOffsetsMapTy &VtorDisps = 
+      Layout.getVBaseOffsetsMap();
 
-    CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBase);
+    for (const CXXBaseSpecifier &Base : CXXRD->vbases()) {
+      assert(Base.isVirtual() && "Found non-virtual class!");
+      const CXXRecordDecl *VBase = Base.getType()->getAsCXXRecordDecl();
 
-    if (vtordisps.find(VBase)->second.hasVtorDisp()) {
-      PrintOffset(OS, VBaseOffset - CharUnits::fromQuantity(4), IndentLevel);
-      OS << "(vtordisp for vbase " << *VBase << ")\n";
+      CharUnits VBaseOffset = Offset + Layout.getVBaseClassOffset(VBase);
+
+      if (VtorDisps.find(VBase)->second.hasVtorDisp()) {
+        PrintOffset(OS, VBaseOffset - CharUnits::fromQuantity(4), IndentLevel);
+        OS << "(vtordisp for vbase " << *VBase << ")\n";
+      }
+
+      DumpRecordLayout(OS, VBase, C, VBaseOffset, IndentLevel,
+                       VBase == Layout.getPrimaryBase() ?
+                         "(primary virtual base)" : "(virtual base)",
+                       /*PrintSizeInfo=*/false,
+                       /*IncludeVirtualBases=*/false);
     }
-
-    DumpCXXRecordLayout(OS, VBase, C, VBaseOffset, IndentLevel,
-                        VBase == PrimaryBase ?
-                        "(primary virtual base)" : "(virtual base)",
-                        /*IncludeVirtualBases=*/false);
   }
 
+  if (!PrintSizeInfo) return;
+
   PrintIndentNoOffset(OS, IndentLevel - 1);
   OS << "[sizeof=" << Layout.getSize().getQuantity();
-  if (!isMsLayout(RD))
+  if (CXXRD && !isMsLayout(C))
     OS << ", dsize=" << Layout.getDataSize().getQuantity();
-  OS << ", align=" << Layout.getAlignment().getQuantity() << '\n';
+  OS << ", align=" << Layout.getAlignment().getQuantity();
 
-  PrintIndentNoOffset(OS, IndentLevel - 1);
-  OS << " nvsize=" << Layout.getNonVirtualSize().getQuantity();
-  OS << ", nvalign=" << Layout.getNonVirtualAlignment().getQuantity() << "]\n";
+  if (CXXRD) {
+    OS << ",\n";
+    PrintIndentNoOffset(OS, IndentLevel - 1);
+    OS << " nvsize=" << Layout.getNonVirtualSize().getQuantity();
+    OS << ", nvalign=" << Layout.getNonVirtualAlignment().getQuantity();
+  }
+  OS << "]\n";
 }
 
 void ASTContext::DumpRecordLayout(const RecordDecl *RD,
                                   raw_ostream &OS,
                                   bool Simple) const {
-  const ASTRecordLayout &Info = getASTRecordLayout(RD);
-
-  if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
-    if (!Simple)
-      return DumpCXXRecordLayout(OS, CXXRD, *this, CharUnits(), 0, nullptr,
-                                 /*IncludeVirtualBases=*/true);
-
-  OS << "Type: " << getTypeDeclType(RD).getAsString() << "\n";
   if (!Simple) {
-    OS << "Record: ";
-    RD->dump();
+    ::DumpRecordLayout(OS, RD, *this, CharUnits(), 0, nullptr,
+                       /*PrintSizeInfo*/true,
+                       /*IncludeVirtualBases=*/true);
+    return;
   }
+
+  // The "simple" format is designed to be parsed by the
+  // layout-override testing code.  There shouldn't be any external
+  // uses of this format --- when LLDB overrides a layout, it sets up
+  // the data structures directly --- so feel free to adjust this as
+  // you like as long as you also update the rudimentary parser for it
+  // in libFrontend.
+
+  const ASTRecordLayout &Info = getASTRecordLayout(RD);
+  OS << "Type: " << getTypeDeclType(RD).getAsString() << "\n";
   OS << "\nLayout: ";
   OS << "<ASTRecordLayout\n";
   OS << "  Size:" << toBits(Info.getSize()) << "\n";
-  if (!isMsLayout(RD))
+  if (!isMsLayout(*this))
     OS << "  DataSize:" << toBits(Info.getDataSize()) << "\n";
   OS << "  Alignment:" << toBits(Info.getAlignment()) << "\n";
   OS << "  FieldOffsets: [";
diff --git a/lib/AST/Stmt.cpp b/lib/AST/Stmt.cpp
index e6292b495365..ca63d8486d82 100644
--- a/lib/AST/Stmt.cpp
+++ b/lib/AST/Stmt.cpp
@@ -15,6 +15,7 @@
 #include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
@@ -294,14 +295,15 @@ CompoundStmt::CompoundStmt(const ASTContext &C, ArrayRef<Stmt*> Stmts,
   std::copy(Stmts.begin(), Stmts.end(), Body);
 }
 
-void CompoundStmt::setStmts(const ASTContext &C, Stmt **Stmts,
-                            unsigned NumStmts) {
-  if (this->Body)
+void CompoundStmt::setStmts(const ASTContext &C, ArrayRef<Stmt *> Stmts) {
+  if (Body)
     C.Deallocate(Body);
-  this->CompoundStmtBits.NumStmts = NumStmts;
+  CompoundStmtBits.NumStmts = Stmts.size();
+  assert(CompoundStmtBits.NumStmts == Stmts.size() &&
+         "NumStmts doesn't fit in bits of CompoundStmtBits.NumStmts!");
 
-  Body = new (C) Stmt*[NumStmts];
-  memcpy(Body, Stmts, sizeof(Stmt *) * NumStmts);
+  Body = new (C) Stmt*[Stmts.size()];
+  std::copy(Stmts.begin(), Stmts.end(), Body);
 }
 
 const char *LabelStmt::getName() const {
@@ -675,12 +677,6 @@ void MSAsmStmt::setInputExpr(unsigned i, Expr *E) {
   Exprs[i + NumOutputs] = E;
 }
 
-QualType CXXCatchStmt::getCaughtType() const {
-  if (ExceptionDecl)
-    return ExceptionDecl->getType();
-  return QualType();
-}
-
 //===----------------------------------------------------------------------===//
 // Constructors
 //===----------------------------------------------------------------------===//
@@ -724,12 +720,7 @@ MSAsmStmt::MSAsmStmt(const ASTContext &C, SourceLocation asmloc,
 }
 
 static StringRef copyIntoContext(const ASTContext &C, StringRef str) {
-  if (str.empty())
-    return StringRef();
-  size_t size = str.size();
-  char *buffer = new (C) char[size];
-  memcpy(buffer, str.data(), size);
-  return StringRef(buffer, size);
+  return str.copy(C);
 }
 
 void MSAsmStmt::initialize(const ASTContext &C, StringRef asmstr,
@@ -740,145 +731,29 @@ void MSAsmStmt::initialize(const ASTContext &C, StringRef asmstr,
   assert(NumAsmToks == asmtoks.size());
   assert(NumClobbers == clobbers.size());
 
-  unsigned NumExprs = exprs.size();
-  assert(NumExprs == NumOutputs + NumInputs);
-  assert(NumExprs == constraints.size());
+  assert(exprs.size() == NumOutputs + NumInputs);
+  assert(exprs.size() == constraints.size());
 
   AsmStr = copyIntoContext(C, asmstr);
 
-  Exprs = new (C) Stmt*[NumExprs];
-  for (unsigned i = 0, e = NumExprs; i != e; ++i)
-    Exprs[i] = exprs[i];
+  Exprs = new (C) Stmt*[exprs.size()];
+  std::copy(exprs.begin(), exprs.end(), Exprs);
 
-  AsmToks = new (C) Token[NumAsmToks];
-  for (unsigned i = 0, e = NumAsmToks; i != e; ++i)
-    AsmToks[i] = asmtoks[i];
+  AsmToks = new (C) Token[asmtoks.size()];
+  std::copy(asmtoks.begin(), asmtoks.end(), AsmToks);
 
-  Constraints = new (C) StringRef[NumExprs];
-  for (unsigned i = 0, e = NumExprs; i != e; ++i) {
-    Constraints[i] = copyIntoContext(C, constraints[i]);
-  }
+  Constraints = new (C) StringRef[exprs.size()];
+  std::transform(constraints.begin(), constraints.end(), Constraints,
+                 [&](StringRef Constraint) {
+                   return copyIntoContext(C, Constraint);
+                 });
 
   Clobbers = new (C) StringRef[NumClobbers];
-  for (unsigned i = 0, e = NumClobbers; i != e; ++i) {
-    // FIXME: Avoid the allocation/copy if at all possible.
-    Clobbers[i] = copyIntoContext(C, clobbers[i]);
-  }
-}
-
-ObjCForCollectionStmt::ObjCForCollectionStmt(Stmt *Elem, Expr *Collect,
-                                             Stmt *Body,  SourceLocation FCL,
-                                             SourceLocation RPL)
-: Stmt(ObjCForCollectionStmtClass) {
-  SubExprs[ELEM] = Elem;
-  SubExprs[COLLECTION] = Collect;
-  SubExprs[BODY] = Body;
-  ForLoc = FCL;
-  RParenLoc = RPL;
-}
-
-ObjCAtTryStmt::ObjCAtTryStmt(SourceLocation atTryLoc, Stmt *atTryStmt,
-                             Stmt **CatchStmts, unsigned NumCatchStmts,
-                             Stmt *atFinallyStmt)
-  : Stmt(ObjCAtTryStmtClass), AtTryLoc(atTryLoc),
-    NumCatchStmts(NumCatchStmts), HasFinally(atFinallyStmt != nullptr) {
-  Stmt **Stmts = getStmts();
-  Stmts[0] = atTryStmt;
-  for (unsigned I = 0; I != NumCatchStmts; ++I)
-    Stmts[I + 1] = CatchStmts[I];
-
-  if (HasFinally)
-    Stmts[NumCatchStmts + 1] = atFinallyStmt;
-}
-
-ObjCAtTryStmt *ObjCAtTryStmt::Create(const ASTContext &Context,
-                                     SourceLocation atTryLoc,
-                                     Stmt *atTryStmt,
-                                     Stmt **CatchStmts,
-                                     unsigned NumCatchStmts,
-                                     Stmt *atFinallyStmt) {
-  unsigned Size = sizeof(ObjCAtTryStmt) +
-    (1 + NumCatchStmts + (atFinallyStmt != nullptr)) * sizeof(Stmt *);
-  void *Mem = Context.Allocate(Size, llvm::alignOf<ObjCAtTryStmt>());
-  return new (Mem) ObjCAtTryStmt(atTryLoc, atTryStmt, CatchStmts, NumCatchStmts,
-                                 atFinallyStmt);
-}
-
-ObjCAtTryStmt *ObjCAtTryStmt::CreateEmpty(const ASTContext &Context,
-                                          unsigned NumCatchStmts,
-                                          bool HasFinally) {
-  unsigned Size = sizeof(ObjCAtTryStmt) +
-    (1 + NumCatchStmts + HasFinally) * sizeof(Stmt *);
-  void *Mem = Context.Allocate(Size, llvm::alignOf<ObjCAtTryStmt>());
-  return new (Mem) ObjCAtTryStmt(EmptyShell(), NumCatchStmts, HasFinally);
-}
-
-SourceLocation ObjCAtTryStmt::getLocEnd() const {
-  if (HasFinally)
-    return getFinallyStmt()->getLocEnd();
-  if (NumCatchStmts)
-    return getCatchStmt(NumCatchStmts - 1)->getLocEnd();
-  return getTryBody()->getLocEnd();
-}
-
-CXXTryStmt *CXXTryStmt::Create(const ASTContext &C, SourceLocation tryLoc,
-                               Stmt *tryBlock, ArrayRef<Stmt*> handlers) {
-  std::size_t Size = sizeof(CXXTryStmt);
-  Size += ((handlers.size() + 1) * sizeof(Stmt));
-
-  void *Mem = C.Allocate(Size, llvm::alignOf<CXXTryStmt>());
-  return new (Mem) CXXTryStmt(tryLoc, tryBlock, handlers);
-}
-
-CXXTryStmt *CXXTryStmt::Create(const ASTContext &C, EmptyShell Empty,
-                               unsigned numHandlers) {
-  std::size_t Size = sizeof(CXXTryStmt);
-  Size += ((numHandlers + 1) * sizeof(Stmt));
-
-  void *Mem = C.Allocate(Size, llvm::alignOf<CXXTryStmt>());
-  return new (Mem) CXXTryStmt(Empty, numHandlers);
-}
-
-CXXTryStmt::CXXTryStmt(SourceLocation tryLoc, Stmt *tryBlock,
-                       ArrayRef<Stmt*> handlers)
-  : Stmt(CXXTryStmtClass), TryLoc(tryLoc), NumHandlers(handlers.size()) {
-  Stmt **Stmts = reinterpret_cast<Stmt **>(this + 1);
-  Stmts[0] = tryBlock;
-  std::copy(handlers.begin(), handlers.end(), Stmts + 1);
-}
-
-CXXForRangeStmt::CXXForRangeStmt(DeclStmt *Range, DeclStmt *BeginEndStmt,
-                                 Expr *Cond, Expr *Inc, DeclStmt *LoopVar,
-                                 Stmt *Body, SourceLocation FL,
-                                 SourceLocation CL, SourceLocation RPL)
-  : Stmt(CXXForRangeStmtClass), ForLoc(FL), ColonLoc(CL), RParenLoc(RPL) {
-  SubExprs[RANGE] = Range;
-  SubExprs[BEGINEND] = BeginEndStmt;
-  SubExprs[COND] = Cond;
-  SubExprs[INC] = Inc;
-  SubExprs[LOOPVAR] = LoopVar;
-  SubExprs[BODY] = Body;
-}
-
-Expr *CXXForRangeStmt::getRangeInit() {
-  DeclStmt *RangeStmt = getRangeStmt();
-  VarDecl *RangeDecl = dyn_cast_or_null<VarDecl>(RangeStmt->getSingleDecl());
-  assert(RangeDecl && "for-range should have a single var decl");
-  return RangeDecl->getInit();
-}
-
-const Expr *CXXForRangeStmt::getRangeInit() const {
-  return const_cast<CXXForRangeStmt*>(this)->getRangeInit();
-}
-
-VarDecl *CXXForRangeStmt::getLoopVariable() {
-  Decl *LV = cast<DeclStmt>(getLoopVarStmt())->getSingleDecl();
-  assert(LV && "No loop variable in CXXForRangeStmt");
-  return cast<VarDecl>(LV);
-}
-
-const VarDecl *CXXForRangeStmt::getLoopVariable() const {
-  return const_cast<CXXForRangeStmt*>(this)->getLoopVariable();
+  // FIXME: Avoid the allocation/copy if at all possible.
+  std::transform(clobbers.begin(), clobbers.end(), Clobbers,
+                 [&](StringRef Clobber) {
+                   return copyIntoContext(C, Clobber);
+                 });
 }
 
 IfStmt::IfStmt(const ASTContext &C, SourceLocation IL, VarDecl *var, Expr *cond,
@@ -1070,6 +945,44 @@ SEHFinallyStmt* SEHFinallyStmt::Create(const ASTContext &C, SourceLocation Loc,
   return new(C)SEHFinallyStmt(Loc,Block);
 }
 
+CapturedStmt::Capture::Capture(SourceLocation Loc, VariableCaptureKind Kind,
+                               VarDecl *Var)
+    : VarAndKind(Var, Kind), Loc(Loc) {
+  switch (Kind) {
+  case VCK_This:
+    assert(!Var && "'this' capture cannot have a variable!");
+    break;
+  case VCK_ByRef:
+    assert(Var && "capturing by reference must have a variable!");
+    break;
+  case VCK_ByCopy:
+    assert(Var && "capturing by copy must have a variable!");
+    assert(
+        (Var->getType()->isScalarType() || (Var->getType()->isReferenceType() &&
+                                            Var->getType()
+                                                ->castAs<ReferenceType>()
+                                                ->getPointeeType()
+                                                ->isScalarType())) &&
+        "captures by copy are expected to have a scalar type!");
+    break;
+  case VCK_VLAType:
+    assert(!Var &&
+           "Variable-length array type capture cannot have a variable!");
+    break;
+  }
+}
+
+CapturedStmt::VariableCaptureKind
+CapturedStmt::Capture::getCaptureKind() const {
+  return VarAndKind.getInt();
+}
+
+VarDecl *CapturedStmt::Capture::getCapturedVar() const {
+  assert((capturesVariable() || capturesVariableByCopy()) &&
+         "No variable available for 'this' or VAT capture");
+  return VarAndKind.getPointer();
+}
+
 CapturedStmt::Capture *CapturedStmt::getStoredCaptures() const {
   unsigned Size = sizeof(CapturedStmt) + sizeof(Stmt *) * (NumCaptures + 1);
 
@@ -1158,6 +1071,29 @@ Stmt::child_range CapturedStmt::children() {
   return child_range(getStoredStmts(), getStoredStmts() + NumCaptures);
 }
 
+CapturedDecl *CapturedStmt::getCapturedDecl() {
+  return CapDeclAndKind.getPointer();
+}
+const CapturedDecl *CapturedStmt::getCapturedDecl() const {
+  return CapDeclAndKind.getPointer();
+}
+
+/// \brief Set the outlined function declaration.
+void CapturedStmt::setCapturedDecl(CapturedDecl *D) {
+  assert(D && "null CapturedDecl");
+  CapDeclAndKind.setPointer(D);
+}
+
+/// \brief Retrieve the captured region kind.
+CapturedRegionKind CapturedStmt::getCapturedRegionKind() const {
+  return CapDeclAndKind.getInt();
+}
+
+/// \brief Set the captured region kind.
+void CapturedStmt::setCapturedRegionKind(CapturedRegionKind Kind) {
+  CapDeclAndKind.setInt(Kind);
+}
+
 bool CapturedStmt::capturesVariable(const VarDecl *Var) const {
   for (const auto &I : captures()) {
     if (!I.capturesVariable())
@@ -1172,1095 +1108,3 @@ bool CapturedStmt::capturesVariable(const VarDecl *Var) const {
 
   return false;
 }
-
-StmtRange OMPClause::children() {
-  switch(getClauseKind()) {
-  default : break;
-#define OPENMP_CLAUSE(Name, Class)                                       \
-  case OMPC_ ## Name : return static_cast<Class *>(this)->children();
-#include "clang/Basic/OpenMPKinds.def"
-  }
-  llvm_unreachable("unknown OMPClause");
-}
-
-void OMPPrivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
-  assert(VL.size() == varlist_size() &&
-         "Number of private copies is not the same as the preallocated buffer");
-  std::copy(VL.begin(), VL.end(), varlist_end());
-}
-
-OMPPrivateClause *
-OMPPrivateClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                         SourceLocation LParenLoc, SourceLocation EndLoc,
-                         ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL) {
-  // Allocate space for private variables and initializer expressions.
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPPrivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         2 * sizeof(Expr *) * VL.size());
-  OMPPrivateClause *Clause =
-      new (Mem) OMPPrivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setPrivateCopies(PrivateVL);
-  return Clause;
-}
-
-OMPPrivateClause *OMPPrivateClause::CreateEmpty(const ASTContext &C,
-                                                unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPPrivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         2 * sizeof(Expr *) * N);
-  return new (Mem) OMPPrivateClause(N);
-}
-
-void OMPFirstprivateClause::setPrivateCopies(ArrayRef<Expr *> VL) {
-  assert(VL.size() == varlist_size() &&
-         "Number of private copies is not the same as the preallocated buffer");
-  std::copy(VL.begin(), VL.end(), varlist_end());
-}
-
-void OMPFirstprivateClause::setInits(ArrayRef<Expr *> VL) {
-  assert(VL.size() == varlist_size() &&
-         "Number of inits is not the same as the preallocated buffer");
-  std::copy(VL.begin(), VL.end(), getPrivateCopies().end());
-}
-
-OMPFirstprivateClause *
-OMPFirstprivateClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                              SourceLocation LParenLoc, SourceLocation EndLoc,
-                              ArrayRef<Expr *> VL, ArrayRef<Expr *> PrivateVL,
-                              ArrayRef<Expr *> InitVL) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFirstprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         3 * sizeof(Expr *) * VL.size());
-  OMPFirstprivateClause *Clause =
-      new (Mem) OMPFirstprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setPrivateCopies(PrivateVL);
-  Clause->setInits(InitVL);
-  return Clause;
-}
-
-OMPFirstprivateClause *OMPFirstprivateClause::CreateEmpty(const ASTContext &C,
-                                                          unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFirstprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         3 * sizeof(Expr *) * N);
-  return new (Mem) OMPFirstprivateClause(N);
-}
-
-void OMPLastprivateClause::setPrivateCopies(ArrayRef<Expr *> PrivateCopies) {
-  assert(PrivateCopies.size() == varlist_size() &&
-         "Number of private copies is not the same as the preallocated buffer");
-  std::copy(PrivateCopies.begin(), PrivateCopies.end(), varlist_end());
-}
-
-void OMPLastprivateClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
-  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
-                                              "not the same as the "
-                                              "preallocated buffer");
-  std::copy(SrcExprs.begin(), SrcExprs.end(), getPrivateCopies().end());
-}
-
-void OMPLastprivateClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
-  assert(DstExprs.size() == varlist_size() && "Number of destination "
-                                              "expressions is not the same as "
-                                              "the preallocated buffer");
-  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
-}
-
-void OMPLastprivateClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
-  assert(AssignmentOps.size() == varlist_size() &&
-         "Number of assignment expressions is not the same as the preallocated "
-         "buffer");
-  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
-            getDestinationExprs().end());
-}
-
-OMPLastprivateClause *OMPLastprivateClause::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
-    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
-    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLastprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         5 * sizeof(Expr *) * VL.size());
-  OMPLastprivateClause *Clause =
-      new (Mem) OMPLastprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setSourceExprs(SrcExprs);
-  Clause->setDestinationExprs(DstExprs);
-  Clause->setAssignmentOps(AssignmentOps);
-  return Clause;
-}
-
-OMPLastprivateClause *OMPLastprivateClause::CreateEmpty(const ASTContext &C,
-                                                        unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLastprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         5 * sizeof(Expr *) * N);
-  return new (Mem) OMPLastprivateClause(N);
-}
-
-OMPSharedClause *OMPSharedClause::Create(const ASTContext &C,
-                                         SourceLocation StartLoc,
-                                         SourceLocation LParenLoc,
-                                         SourceLocation EndLoc,
-                                         ArrayRef<Expr *> VL) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPSharedClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * VL.size());
-  OMPSharedClause *Clause = new (Mem) OMPSharedClause(StartLoc, LParenLoc,
-                                                      EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  return Clause;
-}
-
-OMPSharedClause *OMPSharedClause::CreateEmpty(const ASTContext &C,
-                                              unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPSharedClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * N);
-  return new (Mem) OMPSharedClause(N);
-}
-
-void OMPLinearClause::setInits(ArrayRef<Expr *> IL) {
-  assert(IL.size() == varlist_size() &&
-         "Number of inits is not the same as the preallocated buffer");
-  std::copy(IL.begin(), IL.end(), varlist_end());
-}
-
-void OMPLinearClause::setUpdates(ArrayRef<Expr *> UL) {
-  assert(UL.size() == varlist_size() &&
-         "Number of updates is not the same as the preallocated buffer");
-  std::copy(UL.begin(), UL.end(), getInits().end());
-}
-
-void OMPLinearClause::setFinals(ArrayRef<Expr *> FL) {
-  assert(FL.size() == varlist_size() &&
-         "Number of final updates is not the same as the preallocated buffer");
-  std::copy(FL.begin(), FL.end(), getUpdates().end());
-}
-
-OMPLinearClause *
-OMPLinearClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation LParenLoc, SourceLocation ColonLoc,
-                        SourceLocation EndLoc, ArrayRef<Expr *> VL,
-                        ArrayRef<Expr *> IL, Expr *Step, Expr *CalcStep) {
-  // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions
-  // (Step and CalcStep).
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLinearClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         (4 * VL.size() + 2) * sizeof(Expr *));
-  OMPLinearClause *Clause = new (Mem)
-      OMPLinearClause(StartLoc, LParenLoc, ColonLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setInits(IL);
-  // Fill update and final expressions with zeroes, they are provided later,
-  // after the directive construction.
-  std::fill(Clause->getInits().end(), Clause->getInits().end() + VL.size(),
-            nullptr);
-  std::fill(Clause->getUpdates().end(), Clause->getUpdates().end() + VL.size(),
-            nullptr);
-  Clause->setStep(Step);
-  Clause->setCalcStep(CalcStep);
-  return Clause;
-}
-
-OMPLinearClause *OMPLinearClause::CreateEmpty(const ASTContext &C,
-                                              unsigned NumVars) {
-  // Allocate space for 4 lists (Vars, Inits, Updates, Finals) and 2 expressions
-  // (Step and CalcStep).
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPLinearClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         (4 * NumVars + 2) * sizeof(Expr *));
-  return new (Mem) OMPLinearClause(NumVars);
-}
-
-OMPAlignedClause *
-OMPAlignedClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                         SourceLocation LParenLoc, SourceLocation ColonLoc,
-                         SourceLocation EndLoc, ArrayRef<Expr *> VL, Expr *A) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPAlignedClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * (VL.size() + 1));
-  OMPAlignedClause *Clause = new (Mem)
-      OMPAlignedClause(StartLoc, LParenLoc, ColonLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setAlignment(A);
-  return Clause;
-}
-
-OMPAlignedClause *OMPAlignedClause::CreateEmpty(const ASTContext &C,
-                                                unsigned NumVars) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPAlignedClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * (NumVars + 1));
-  return new (Mem) OMPAlignedClause(NumVars);
-}
-
-void OMPCopyinClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
-  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
-                                              "not the same as the "
-                                              "preallocated buffer");
-  std::copy(SrcExprs.begin(), SrcExprs.end(), varlist_end());
-}
-
-void OMPCopyinClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
-  assert(DstExprs.size() == varlist_size() && "Number of destination "
-                                              "expressions is not the same as "
-                                              "the preallocated buffer");
-  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
-}
-
-void OMPCopyinClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
-  assert(AssignmentOps.size() == varlist_size() &&
-         "Number of assignment expressions is not the same as the preallocated "
-         "buffer");
-  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
-            getDestinationExprs().end());
-}
-
-OMPCopyinClause *OMPCopyinClause::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
-    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
-    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyinClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * VL.size());
-  OMPCopyinClause *Clause = new (Mem) OMPCopyinClause(StartLoc, LParenLoc,
-                                                      EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setSourceExprs(SrcExprs);
-  Clause->setDestinationExprs(DstExprs);
-  Clause->setAssignmentOps(AssignmentOps);
-  return Clause;
-}
-
-OMPCopyinClause *OMPCopyinClause::CreateEmpty(const ASTContext &C,
-                                              unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyinClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * N);
-  return new (Mem) OMPCopyinClause(N);
-}
-
-void OMPCopyprivateClause::setSourceExprs(ArrayRef<Expr *> SrcExprs) {
-  assert(SrcExprs.size() == varlist_size() && "Number of source expressions is "
-                                              "not the same as the "
-                                              "preallocated buffer");
-  std::copy(SrcExprs.begin(), SrcExprs.end(), varlist_end());
-}
-
-void OMPCopyprivateClause::setDestinationExprs(ArrayRef<Expr *> DstExprs) {
-  assert(DstExprs.size() == varlist_size() && "Number of destination "
-                                              "expressions is not the same as "
-                                              "the preallocated buffer");
-  std::copy(DstExprs.begin(), DstExprs.end(), getSourceExprs().end());
-}
-
-void OMPCopyprivateClause::setAssignmentOps(ArrayRef<Expr *> AssignmentOps) {
-  assert(AssignmentOps.size() == varlist_size() &&
-         "Number of assignment expressions is not the same as the preallocated "
-         "buffer");
-  std::copy(AssignmentOps.begin(), AssignmentOps.end(),
-            getDestinationExprs().end());
-}
-
-OMPCopyprivateClause *OMPCopyprivateClause::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
-    SourceLocation EndLoc, ArrayRef<Expr *> VL, ArrayRef<Expr *> SrcExprs,
-    ArrayRef<Expr *> DstExprs, ArrayRef<Expr *> AssignmentOps) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * VL.size());
-  OMPCopyprivateClause *Clause =
-      new (Mem) OMPCopyprivateClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setSourceExprs(SrcExprs);
-  Clause->setDestinationExprs(DstExprs);
-  Clause->setAssignmentOps(AssignmentOps);
-  return Clause;
-}
-
-OMPCopyprivateClause *OMPCopyprivateClause::CreateEmpty(const ASTContext &C,
-                                                        unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPCopyprivateClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * N);
-  return new (Mem) OMPCopyprivateClause(N);
-}
-
-void OMPExecutableDirective::setClauses(ArrayRef<OMPClause *> Clauses) {
-  assert(Clauses.size() == getNumClauses() &&
-         "Number of clauses is not the same as the preallocated buffer");
-  std::copy(Clauses.begin(), Clauses.end(), getClauses().begin());
-}
-
-void OMPLoopDirective::setCounters(ArrayRef<Expr *> A) {
-  assert(A.size() == getCollapsedNumber() &&
-         "Number of loop counters is not the same as the collapsed number");
-  std::copy(A.begin(), A.end(), getCounters().begin());
-}
-
-void OMPLoopDirective::setInits(ArrayRef<Expr *> A) {
-  assert(A.size() == getCollapsedNumber() &&
-         "Number of counter inits is not the same as the collapsed number");
-  std::copy(A.begin(), A.end(), getInits().begin());
-}
-
-void OMPLoopDirective::setUpdates(ArrayRef<Expr *> A) {
-  assert(A.size() == getCollapsedNumber() &&
-         "Number of counter updates is not the same as the collapsed number");
-  std::copy(A.begin(), A.end(), getUpdates().begin());
-}
-
-void OMPLoopDirective::setFinals(ArrayRef<Expr *> A) {
-  assert(A.size() == getCollapsedNumber() &&
-         "Number of counter finals is not the same as the collapsed number");
-  std::copy(A.begin(), A.end(), getFinals().begin());
-}
-
-void OMPReductionClause::setLHSExprs(ArrayRef<Expr *> LHSExprs) {
-  assert(
-      LHSExprs.size() == varlist_size() &&
-      "Number of LHS expressions is not the same as the preallocated buffer");
-  std::copy(LHSExprs.begin(), LHSExprs.end(), varlist_end());
-}
-
-void OMPReductionClause::setRHSExprs(ArrayRef<Expr *> RHSExprs) {
-  assert(
-      RHSExprs.size() == varlist_size() &&
-      "Number of RHS expressions is not the same as the preallocated buffer");
-  std::copy(RHSExprs.begin(), RHSExprs.end(), getLHSExprs().end());
-}
-
-void OMPReductionClause::setReductionOps(ArrayRef<Expr *> ReductionOps) {
-  assert(ReductionOps.size() == varlist_size() && "Number of reduction "
-                                                  "expressions is not the same "
-                                                  "as the preallocated buffer");
-  std::copy(ReductionOps.begin(), ReductionOps.end(), getRHSExprs().end());
-}
-
-OMPReductionClause *OMPReductionClause::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc,
-    SourceLocation EndLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VL,
-    NestedNameSpecifierLoc QualifierLoc, const DeclarationNameInfo &NameInfo,
-    ArrayRef<Expr *> LHSExprs, ArrayRef<Expr *> RHSExprs,
-    ArrayRef<Expr *> ReductionOps) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * VL.size());
-  OMPReductionClause *Clause = new (Mem) OMPReductionClause(
-      StartLoc, LParenLoc, EndLoc, ColonLoc, VL.size(), QualifierLoc, NameInfo);
-  Clause->setVarRefs(VL);
-  Clause->setLHSExprs(LHSExprs);
-  Clause->setRHSExprs(RHSExprs);
-  Clause->setReductionOps(ReductionOps);
-  return Clause;
-}
-
-OMPReductionClause *OMPReductionClause::CreateEmpty(const ASTContext &C,
-                                                    unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPReductionClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         4 * sizeof(Expr *) * N);
-  return new (Mem) OMPReductionClause(N);
-}
-
-OMPFlushClause *OMPFlushClause::Create(const ASTContext &C,
-                                       SourceLocation StartLoc,
-                                       SourceLocation LParenLoc,
-                                       SourceLocation EndLoc,
-                                       ArrayRef<Expr *> VL) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFlushClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * VL.size());
-  OMPFlushClause *Clause =
-      new (Mem) OMPFlushClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  return Clause;
-}
-
-OMPFlushClause *OMPFlushClause::CreateEmpty(const ASTContext &C, unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPFlushClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * N);
-  return new (Mem) OMPFlushClause(N);
-}
-
-OMPDependClause *
-OMPDependClause::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation LParenLoc, SourceLocation EndLoc,
-                        OpenMPDependClauseKind DepKind, SourceLocation DepLoc,
-                        SourceLocation ColonLoc, ArrayRef<Expr *> VL) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * VL.size());
-  OMPDependClause *Clause =
-      new (Mem) OMPDependClause(StartLoc, LParenLoc, EndLoc, VL.size());
-  Clause->setVarRefs(VL);
-  Clause->setDependencyKind(DepKind);
-  Clause->setDependencyLoc(DepLoc);
-  Clause->setColonLoc(ColonLoc);
-  return Clause;
-}
-
-OMPDependClause *OMPDependClause::CreateEmpty(const ASTContext &C, unsigned N) {
-  void *Mem = C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPDependClause),
-                                                  llvm::alignOf<Expr *>()) +
-                         sizeof(Expr *) * N);
-  return new (Mem) OMPDependClause(N);
-}
-
-const OMPClause *
-OMPExecutableDirective::getSingleClause(OpenMPClauseKind K) const {
-  auto &&I = getClausesOfKind(K);
-
-  if (I) {
-    auto *Clause = *I;
-    assert(!++I && "There are at least 2 clauses of the specified kind");
-    return Clause;
-  }
-  return nullptr;
-}
-
-OMPParallelDirective *OMPParallelDirective::Create(
-                                              const ASTContext &C,
-                                              SourceLocation StartLoc,
-                                              SourceLocation EndLoc,
-                                              ArrayRef<OMPClause *> Clauses,
-                                              Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                         sizeof(Stmt *));
-  OMPParallelDirective *Dir = new (Mem) OMPParallelDirective(StartLoc, EndLoc,
-                                                             Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
-                                                        unsigned NumClauses,
-                                                        EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
-                         sizeof(Stmt *));
-  return new (Mem) OMPParallelDirective(NumClauses);
-}
-
-OMPSimdDirective *
-OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                         SourceLocation EndLoc, unsigned CollapsedNum,
-                         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-                         const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
-  OMPSimdDirective *Dir = new (Mem)
-      OMPSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setIterationVariable(Exprs.IterationVarRef);
-  Dir->setLastIteration(Exprs.LastIteration);
-  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
-  Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond);
-  Dir->setInit(Exprs.Init);
-  Dir->setInc(Exprs.Inc);
-  Dir->setCounters(Exprs.Counters);
-  Dir->setInits(Exprs.Inits);
-  Dir->setUpdates(Exprs.Updates);
-  Dir->setFinals(Exprs.Finals);
-  return Dir;
-}
-
-OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
-                                                unsigned NumClauses,
-                                                unsigned CollapsedNum,
-                                                EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
-  return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
-}
-
-OMPForDirective *
-OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation EndLoc, unsigned CollapsedNum,
-                        ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-                        const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
-  OMPForDirective *Dir =
-      new (Mem) OMPForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setIterationVariable(Exprs.IterationVarRef);
-  Dir->setLastIteration(Exprs.LastIteration);
-  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
-  Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond);
-  Dir->setInit(Exprs.Init);
-  Dir->setInc(Exprs.Inc);
-  Dir->setIsLastIterVariable(Exprs.IL);
-  Dir->setLowerBoundVariable(Exprs.LB);
-  Dir->setUpperBoundVariable(Exprs.UB);
-  Dir->setStrideVariable(Exprs.ST);
-  Dir->setEnsureUpperBound(Exprs.EUB);
-  Dir->setNextLowerBound(Exprs.NLB);
-  Dir->setNextUpperBound(Exprs.NUB);
-  Dir->setCounters(Exprs.Counters);
-  Dir->setInits(Exprs.Inits);
-  Dir->setUpdates(Exprs.Updates);
-  Dir->setFinals(Exprs.Finals);
-  return Dir;
-}
-
-OMPForDirective *OMPForDirective::CreateEmpty(const ASTContext &C,
-                                              unsigned NumClauses,
-                                              unsigned CollapsedNum,
-                                              EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
-  return new (Mem) OMPForDirective(CollapsedNum, NumClauses);
-}
-
-OMPForSimdDirective *
-OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                            SourceLocation EndLoc, unsigned CollapsedNum,
-                            ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-                            const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
-  OMPForSimdDirective *Dir = new (Mem)
-      OMPForSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setIterationVariable(Exprs.IterationVarRef);
-  Dir->setLastIteration(Exprs.LastIteration);
-  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
-  Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond);
-  Dir->setInit(Exprs.Init);
-  Dir->setInc(Exprs.Inc);
-  Dir->setIsLastIterVariable(Exprs.IL);
-  Dir->setLowerBoundVariable(Exprs.LB);
-  Dir->setUpperBoundVariable(Exprs.UB);
-  Dir->setStrideVariable(Exprs.ST);
-  Dir->setEnsureUpperBound(Exprs.EUB);
-  Dir->setNextLowerBound(Exprs.NLB);
-  Dir->setNextUpperBound(Exprs.NUB);
-  Dir->setCounters(Exprs.Counters);
-  Dir->setInits(Exprs.Inits);
-  Dir->setUpdates(Exprs.Updates);
-  Dir->setFinals(Exprs.Finals);
-  return Dir;
-}
-
-OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
-                                                      unsigned NumClauses,
-                                                      unsigned CollapsedNum,
-                                                      EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
-                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
-  return new (Mem) OMPForSimdDirective(CollapsedNum, NumClauses);
-}
-
-OMPSectionsDirective *OMPSectionsDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPSectionsDirective *Dir =
-      new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPSectionsDirective *OMPSectionsDirective::CreateEmpty(const ASTContext &C,
-                                                        unsigned NumClauses,
-                                                        EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPSectionsDirective(NumClauses);
-}
-
-OMPSectionDirective *OMPSectionDirective::Create(const ASTContext &C,
-                                                 SourceLocation StartLoc,
-                                                 SourceLocation EndLoc,
-                                                 Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  OMPSectionDirective *Dir = new (Mem) OMPSectionDirective(StartLoc, EndLoc);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPSectionDirective *OMPSectionDirective::CreateEmpty(const ASTContext &C,
-                                                      EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPSectionDirective();
-}
-
-OMPSingleDirective *OMPSingleDirective::Create(const ASTContext &C,
-                                               SourceLocation StartLoc,
-                                               SourceLocation EndLoc,
-                                               ArrayRef<OMPClause *> Clauses,
-                                               Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPSingleDirective *Dir =
-      new (Mem) OMPSingleDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPSingleDirective *OMPSingleDirective::CreateEmpty(const ASTContext &C,
-                                                    unsigned NumClauses,
-                                                    EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPSingleDirective(NumClauses);
-}
-
-OMPMasterDirective *OMPMasterDirective::Create(const ASTContext &C,
-                                               SourceLocation StartLoc,
-                                               SourceLocation EndLoc,
-                                               Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  OMPMasterDirective *Dir = new (Mem) OMPMasterDirective(StartLoc, EndLoc);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPMasterDirective *OMPMasterDirective::CreateEmpty(const ASTContext &C,
-                                                    EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPMasterDirective();
-}
-
-OMPCriticalDirective *OMPCriticalDirective::Create(
-    const ASTContext &C, const DeclarationNameInfo &Name,
-    SourceLocation StartLoc, SourceLocation EndLoc, Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  OMPCriticalDirective *Dir =
-      new (Mem) OMPCriticalDirective(Name, StartLoc, EndLoc);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
-                                                        EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPCriticalDirective();
-}
-
-OMPParallelForDirective *OMPParallelForDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                         sizeof(Stmt *) *
-                             numLoopChildren(CollapsedNum, OMPD_parallel_for));
-  OMPParallelForDirective *Dir = new (Mem)
-      OMPParallelForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setIterationVariable(Exprs.IterationVarRef);
-  Dir->setLastIteration(Exprs.LastIteration);
-  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
-  Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond);
-  Dir->setInit(Exprs.Init);
-  Dir->setInc(Exprs.Inc);
-  Dir->setIsLastIterVariable(Exprs.IL);
-  Dir->setLowerBoundVariable(Exprs.LB);
-  Dir->setUpperBoundVariable(Exprs.UB);
-  Dir->setStrideVariable(Exprs.ST);
-  Dir->setEnsureUpperBound(Exprs.EUB);
-  Dir->setNextLowerBound(Exprs.NLB);
-  Dir->setNextUpperBound(Exprs.NUB);
-  Dir->setCounters(Exprs.Counters);
-  Dir->setInits(Exprs.Inits);
-  Dir->setUpdates(Exprs.Updates);
-  Dir->setFinals(Exprs.Finals);
-  return Dir;
-}
-
-OMPParallelForDirective *
-OMPParallelForDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
-                                     unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
-                         sizeof(Stmt *) *
-                             numLoopChildren(CollapsedNum, OMPD_parallel_for));
-  return new (Mem) OMPParallelForDirective(CollapsedNum, NumClauses);
-}
-
-OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(
-      Size + sizeof(OMPClause *) * Clauses.size() +
-      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
-  OMPParallelForSimdDirective *Dir = new (Mem) OMPParallelForSimdDirective(
-      StartLoc, EndLoc, CollapsedNum, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setIterationVariable(Exprs.IterationVarRef);
-  Dir->setLastIteration(Exprs.LastIteration);
-  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
-  Dir->setPreCond(Exprs.PreCond);
-  Dir->setCond(Exprs.Cond);
-  Dir->setInit(Exprs.Init);
-  Dir->setInc(Exprs.Inc);
-  Dir->setIsLastIterVariable(Exprs.IL);
-  Dir->setLowerBoundVariable(Exprs.LB);
-  Dir->setUpperBoundVariable(Exprs.UB);
-  Dir->setStrideVariable(Exprs.ST);
-  Dir->setEnsureUpperBound(Exprs.EUB);
-  Dir->setNextLowerBound(Exprs.NLB);
-  Dir->setNextUpperBound(Exprs.NUB);
-  Dir->setCounters(Exprs.Counters);
-  Dir->setInits(Exprs.Inits);
-  Dir->setUpdates(Exprs.Updates);
-  Dir->setFinals(Exprs.Finals);
-  return Dir;
-}
-
-OMPParallelForSimdDirective *
-OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
-                                         unsigned NumClauses,
-                                         unsigned CollapsedNum, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(
-      Size + sizeof(OMPClause *) * NumClauses +
-      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
-  return new (Mem) OMPParallelForSimdDirective(CollapsedNum, NumClauses);
-}
-
-OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPParallelSectionsDirective *Dir =
-      new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPParallelSectionsDirective *
-OMPParallelSectionsDirective::CreateEmpty(const ASTContext &C,
-                                          unsigned NumClauses, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPParallelSectionsDirective(NumClauses);
-}
-
-OMPTaskDirective *OMPTaskDirective::Create(const ASTContext &C,
-                                           SourceLocation StartLoc,
-                                           SourceLocation EndLoc,
-                                           ArrayRef<OMPClause *> Clauses,
-                                           Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPTaskDirective *Dir =
-      new (Mem) OMPTaskDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPTaskDirective *OMPTaskDirective::CreateEmpty(const ASTContext &C,
-                                                unsigned NumClauses,
-                                                EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPTaskDirective(NumClauses);
-}
-
-OMPTaskyieldDirective *OMPTaskyieldDirective::Create(const ASTContext &C,
-                                                     SourceLocation StartLoc,
-                                                     SourceLocation EndLoc) {
-  void *Mem = C.Allocate(sizeof(OMPTaskyieldDirective));
-  OMPTaskyieldDirective *Dir =
-      new (Mem) OMPTaskyieldDirective(StartLoc, EndLoc);
-  return Dir;
-}
-
-OMPTaskyieldDirective *OMPTaskyieldDirective::CreateEmpty(const ASTContext &C,
-                                                          EmptyShell) {
-  void *Mem = C.Allocate(sizeof(OMPTaskyieldDirective));
-  return new (Mem) OMPTaskyieldDirective();
-}
-
-OMPBarrierDirective *OMPBarrierDirective::Create(const ASTContext &C,
-                                                 SourceLocation StartLoc,
-                                                 SourceLocation EndLoc) {
-  void *Mem = C.Allocate(sizeof(OMPBarrierDirective));
-  OMPBarrierDirective *Dir = new (Mem) OMPBarrierDirective(StartLoc, EndLoc);
-  return Dir;
-}
-
-OMPBarrierDirective *OMPBarrierDirective::CreateEmpty(const ASTContext &C,
-                                                      EmptyShell) {
-  void *Mem = C.Allocate(sizeof(OMPBarrierDirective));
-  return new (Mem) OMPBarrierDirective();
-}
-
-OMPTaskwaitDirective *OMPTaskwaitDirective::Create(const ASTContext &C,
-                                                   SourceLocation StartLoc,
-                                                   SourceLocation EndLoc) {
-  void *Mem = C.Allocate(sizeof(OMPTaskwaitDirective));
-  OMPTaskwaitDirective *Dir = new (Mem) OMPTaskwaitDirective(StartLoc, EndLoc);
-  return Dir;
-}
-
-OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C,
-                                                        EmptyShell) {
-  void *Mem = C.Allocate(sizeof(OMPTaskwaitDirective));
-  return new (Mem) OMPTaskwaitDirective();
-}
-
-OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
-                                                     SourceLocation StartLoc,
-                                                     SourceLocation EndLoc,
-                                                     Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  OMPTaskgroupDirective *Dir =
-      new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
-                                                          EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPTaskgroupDirective();
-}
-
-OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    OpenMPDirectiveKind CancelRegion) {
-  unsigned Size = llvm::RoundUpToAlignment(
-      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size);
-  OMPCancellationPointDirective *Dir =
-      new (Mem) OMPCancellationPointDirective(StartLoc, EndLoc);
-  Dir->setCancelRegion(CancelRegion);
-  return Dir;
-}
-
-OMPCancellationPointDirective *
-OMPCancellationPointDirective::CreateEmpty(const ASTContext &C, EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(
-      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size);
-  return new (Mem) OMPCancellationPointDirective();
-}
-
-OMPCancelDirective *
-OMPCancelDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                           SourceLocation EndLoc,
-                           OpenMPDirectiveKind CancelRegion) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size);
-  OMPCancelDirective *Dir = new (Mem) OMPCancelDirective(StartLoc, EndLoc);
-  Dir->setCancelRegion(CancelRegion);
-  return Dir;
-}
-
-OMPCancelDirective *OMPCancelDirective::CreateEmpty(const ASTContext &C,
-                                                    EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size);
-  return new (Mem) OMPCancelDirective();
-}
-
-OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
-                                             SourceLocation StartLoc,
-                                             SourceLocation EndLoc,
-                                             ArrayRef<OMPClause *> Clauses) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size());
-  OMPFlushDirective *Dir =
-      new (Mem) OMPFlushDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  return Dir;
-}
-
-OMPFlushDirective *OMPFlushDirective::CreateEmpty(const ASTContext &C,
-                                                  unsigned NumClauses,
-                                                  EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses);
-  return new (Mem) OMPFlushDirective(NumClauses);
-}
-
-OMPOrderedDirective *OMPOrderedDirective::Create(const ASTContext &C,
-                                                 SourceLocation StartLoc,
-                                                 SourceLocation EndLoc,
-                                                 Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  OMPOrderedDirective *Dir = new (Mem) OMPOrderedDirective(StartLoc, EndLoc);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPOrderedDirective *OMPOrderedDirective::CreateEmpty(const ASTContext &C,
-                                                      EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
-                                           llvm::alignOf<Stmt *>());
-  void *Mem = C.Allocate(Size + sizeof(Stmt *));
-  return new (Mem) OMPOrderedDirective();
-}
-
-OMPAtomicDirective *OMPAtomicDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *X, Expr *V,
-    Expr *E, Expr *UE, bool IsXLHSInRHSPart, bool IsPostfixUpdate) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
-                         5 * sizeof(Stmt *));
-  OMPAtomicDirective *Dir =
-      new (Mem) OMPAtomicDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  Dir->setX(X);
-  Dir->setV(V);
-  Dir->setExpr(E);
-  Dir->setUpdateExpr(UE);
-  Dir->IsXLHSInRHSPart = IsXLHSInRHSPart;
-  Dir->IsPostfixUpdate = IsPostfixUpdate;
-  return Dir;
-}
-
-OMPAtomicDirective *OMPAtomicDirective::CreateEmpty(const ASTContext &C,
-                                                    unsigned NumClauses,
-                                                    EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + 5 * sizeof(Stmt *));
-  return new (Mem) OMPAtomicDirective(NumClauses);
-}
-
-OMPTargetDirective *OMPTargetDirective::Create(const ASTContext &C,
-                                               SourceLocation StartLoc,
-                                               SourceLocation EndLoc,
-                                               ArrayRef<OMPClause *> Clauses,
-                                               Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPTargetDirective *Dir =
-      new (Mem) OMPTargetDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
-                                                    unsigned NumClauses,
-                                                    EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPTargetDirective(NumClauses);
-}
-
-OMPTeamsDirective *OMPTeamsDirective::Create(const ASTContext &C,
-                                             SourceLocation StartLoc,
-                                             SourceLocation EndLoc,
-                                             ArrayRef<OMPClause *> Clauses,
-                                             Stmt *AssociatedStmt) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
-  OMPTeamsDirective *Dir =
-      new (Mem) OMPTeamsDirective(StartLoc, EndLoc, Clauses.size());
-  Dir->setClauses(Clauses);
-  Dir->setAssociatedStmt(AssociatedStmt);
-  return Dir;
-}
-
-OMPTeamsDirective *OMPTeamsDirective::CreateEmpty(const ASTContext &C,
-                                                  unsigned NumClauses,
-                                                  EmptyShell) {
-  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
-                                           llvm::alignOf<OMPClause *>());
-  void *Mem =
-      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
-  return new (Mem) OMPTeamsDirective(NumClauses);
-}
-
diff --git a/lib/AST/StmtCXX.cpp b/lib/AST/StmtCXX.cpp
new file mode 100644
index 000000000000..e39a01daf96c
--- /dev/null
+++ b/lib/AST/StmtCXX.cpp
@@ -0,0 +1,86 @@
+//===--- StmtCXX.cpp - Classes for representing C++ statements ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the subclesses of Stmt class declared in StmtCXX.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/StmtCXX.h"
+
+#include "clang/AST/ASTContext.h"
+
+using namespace clang;
+
+QualType CXXCatchStmt::getCaughtType() const {
+  if (ExceptionDecl)
+    return ExceptionDecl->getType();
+  return QualType();
+}
+
+CXXTryStmt *CXXTryStmt::Create(const ASTContext &C, SourceLocation tryLoc,
+                               Stmt *tryBlock, ArrayRef<Stmt *> handlers) {
+  std::size_t Size = sizeof(CXXTryStmt);
+  Size += ((handlers.size() + 1) * sizeof(Stmt *));
+
+  void *Mem = C.Allocate(Size, llvm::alignOf<CXXTryStmt>());
+  return new (Mem) CXXTryStmt(tryLoc, tryBlock, handlers);
+}
+
+CXXTryStmt *CXXTryStmt::Create(const ASTContext &C, EmptyShell Empty,
+                               unsigned numHandlers) {
+  std::size_t Size = sizeof(CXXTryStmt);
+  Size += ((numHandlers + 1) * sizeof(Stmt *));
+
+  void *Mem = C.Allocate(Size, llvm::alignOf<CXXTryStmt>());
+  return new (Mem) CXXTryStmt(Empty, numHandlers);
+}
+
+CXXTryStmt::CXXTryStmt(SourceLocation tryLoc, Stmt *tryBlock,
+                       ArrayRef<Stmt *> handlers)
+    : Stmt(CXXTryStmtClass), TryLoc(tryLoc), NumHandlers(handlers.size()) {
+  Stmt **Stmts = reinterpret_cast<Stmt **>(this + 1);
+  Stmts[0] = tryBlock;
+  std::copy(handlers.begin(), handlers.end(), Stmts + 1);
+}
+
+CXXForRangeStmt::CXXForRangeStmt(DeclStmt *Range, DeclStmt *BeginEndStmt,
+                                 Expr *Cond, Expr *Inc, DeclStmt *LoopVar,
+                                 Stmt *Body, SourceLocation FL,
+                                 SourceLocation CAL, SourceLocation CL,
+                                 SourceLocation RPL)
+    : Stmt(CXXForRangeStmtClass), ForLoc(FL), CoawaitLoc(CAL), ColonLoc(CL),
+      RParenLoc(RPL) {
+  SubExprs[RANGE] = Range;
+  SubExprs[BEGINEND] = BeginEndStmt;
+  SubExprs[COND] = Cond;
+  SubExprs[INC] = Inc;
+  SubExprs[LOOPVAR] = LoopVar;
+  SubExprs[BODY] = Body;
+}
+
+Expr *CXXForRangeStmt::getRangeInit() {
+  DeclStmt *RangeStmt = getRangeStmt();
+  VarDecl *RangeDecl = dyn_cast_or_null<VarDecl>(RangeStmt->getSingleDecl());
+  assert(RangeDecl && "for-range should have a single var decl");
+  return RangeDecl->getInit();
+}
+
+const Expr *CXXForRangeStmt::getRangeInit() const {
+  return const_cast<CXXForRangeStmt *>(this)->getRangeInit();
+}
+
+VarDecl *CXXForRangeStmt::getLoopVariable() {
+  Decl *LV = cast<DeclStmt>(getLoopVarStmt())->getSingleDecl();
+  assert(LV && "No loop variable in CXXForRangeStmt");
+  return cast<VarDecl>(LV);
+}
+
+const VarDecl *CXXForRangeStmt::getLoopVariable() const {
+  return const_cast<CXXForRangeStmt *>(this)->getLoopVariable();
+}
diff --git a/lib/AST/StmtIterator.cpp b/lib/AST/StmtIterator.cpp
index 732756fbec9a..861d0908209d 100644
--- a/lib/AST/StmtIterator.cpp
+++ b/lib/AST/StmtIterator.cpp
@@ -42,7 +42,7 @@ void StmtIteratorBase::NextVA() {
 
   if (inDeclGroup()) {
     if (VarDecl* VD = dyn_cast<VarDecl>(*DGI))
-      if (VD->Init)
+      if (VD->hasInit())
         return;
 
     NextDecl();
diff --git a/lib/AST/StmtObjC.cpp b/lib/AST/StmtObjC.cpp
new file mode 100644
index 000000000000..a77550c7605d
--- /dev/null
+++ b/lib/AST/StmtObjC.cpp
@@ -0,0 +1,73 @@
+//===--- StmtObjC.cpp - Classes for representing ObjC statements ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the subclesses of Stmt class declared in StmtObjC.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/StmtObjC.h"
+
+#include "clang/AST/Expr.h"
+#include "clang/AST/ASTContext.h"
+
+using namespace clang;
+
+ObjCForCollectionStmt::ObjCForCollectionStmt(Stmt *Elem, Expr *Collect,
+                                             Stmt *Body, SourceLocation FCL,
+                                             SourceLocation RPL)
+    : Stmt(ObjCForCollectionStmtClass) {
+  SubExprs[ELEM] = Elem;
+  SubExprs[COLLECTION] = Collect;
+  SubExprs[BODY] = Body;
+  ForLoc = FCL;
+  RParenLoc = RPL;
+}
+
+ObjCAtTryStmt::ObjCAtTryStmt(SourceLocation atTryLoc, Stmt *atTryStmt,
+                             Stmt **CatchStmts, unsigned NumCatchStmts,
+                             Stmt *atFinallyStmt)
+    : Stmt(ObjCAtTryStmtClass), AtTryLoc(atTryLoc),
+      NumCatchStmts(NumCatchStmts), HasFinally(atFinallyStmt != nullptr) {
+  Stmt **Stmts = getStmts();
+  Stmts[0] = atTryStmt;
+  for (unsigned I = 0; I != NumCatchStmts; ++I)
+    Stmts[I + 1] = CatchStmts[I];
+
+  if (HasFinally)
+    Stmts[NumCatchStmts + 1] = atFinallyStmt;
+}
+
+ObjCAtTryStmt *ObjCAtTryStmt::Create(const ASTContext &Context,
+                                     SourceLocation atTryLoc, Stmt *atTryStmt,
+                                     Stmt **CatchStmts, unsigned NumCatchStmts,
+                                     Stmt *atFinallyStmt) {
+  unsigned Size =
+      sizeof(ObjCAtTryStmt) +
+      (1 + NumCatchStmts + (atFinallyStmt != nullptr)) * sizeof(Stmt *);
+  void *Mem = Context.Allocate(Size, llvm::alignOf<ObjCAtTryStmt>());
+  return new (Mem) ObjCAtTryStmt(atTryLoc, atTryStmt, CatchStmts, NumCatchStmts,
+                                 atFinallyStmt);
+}
+
+ObjCAtTryStmt *ObjCAtTryStmt::CreateEmpty(const ASTContext &Context,
+                                          unsigned NumCatchStmts,
+                                          bool HasFinally) {
+  unsigned Size =
+      sizeof(ObjCAtTryStmt) + (1 + NumCatchStmts + HasFinally) * sizeof(Stmt *);
+  void *Mem = Context.Allocate(Size, llvm::alignOf<ObjCAtTryStmt>());
+  return new (Mem) ObjCAtTryStmt(EmptyShell(), NumCatchStmts, HasFinally);
+}
+
+SourceLocation ObjCAtTryStmt::getLocEnd() const {
+  if (HasFinally)
+    return getFinallyStmt()->getLocEnd();
+  if (NumCatchStmts)
+    return getCatchStmt(NumCatchStmts - 1)->getLocEnd();
+  return getTryBody()->getLocEnd();
+}
diff --git a/lib/AST/StmtOpenMP.cpp b/lib/AST/StmtOpenMP.cpp
new file mode 100644
index 000000000000..7f923d8a8251
--- /dev/null
+++ b/lib/AST/StmtOpenMP.cpp
@@ -0,0 +1,884 @@
+//===--- StmtOpenMP.cpp - Classes for OpenMP directives -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the subclesses of Stmt class declared in StmtOpenMP.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/StmtOpenMP.h"
+
+#include "clang/AST/ASTContext.h"
+
+using namespace clang;
+
+void OMPExecutableDirective::setClauses(ArrayRef<OMPClause *> Clauses) {
+  assert(Clauses.size() == getNumClauses() &&
+         "Number of clauses is not the same as the preallocated buffer");
+  std::copy(Clauses.begin(), Clauses.end(), getClauses().begin());
+}
+
+void OMPLoopDirective::setCounters(ArrayRef<Expr *> A) {
+  assert(A.size() == getCollapsedNumber() &&
+         "Number of loop counters is not the same as the collapsed number");
+  std::copy(A.begin(), A.end(), getCounters().begin());
+}
+
+void OMPLoopDirective::setPrivateCounters(ArrayRef<Expr *> A) {
+  assert(A.size() == getCollapsedNumber() && "Number of loop private counters "
+                                             "is not the same as the collapsed "
+                                             "number");
+  std::copy(A.begin(), A.end(), getPrivateCounters().begin());
+}
+
+void OMPLoopDirective::setInits(ArrayRef<Expr *> A) {
+  assert(A.size() == getCollapsedNumber() &&
+         "Number of counter inits is not the same as the collapsed number");
+  std::copy(A.begin(), A.end(), getInits().begin());
+}
+
+void OMPLoopDirective::setUpdates(ArrayRef<Expr *> A) {
+  assert(A.size() == getCollapsedNumber() &&
+         "Number of counter updates is not the same as the collapsed number");
+  std::copy(A.begin(), A.end(), getUpdates().begin());
+}
+
+void OMPLoopDirective::setFinals(ArrayRef<Expr *> A) {
+  assert(A.size() == getCollapsedNumber() &&
+         "Number of counter finals is not the same as the collapsed number");
+  std::copy(A.begin(), A.end(), getFinals().begin());
+}
+
+OMPParallelDirective *OMPParallelDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPParallelDirective *Dir =
+      new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
+                                                        unsigned NumClauses,
+                                                        EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPParallelDirective(NumClauses);
+}
+
+OMPSimdDirective *
+OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                         SourceLocation EndLoc, unsigned CollapsedNum,
+                         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+                         const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
+  OMPSimdDirective *Dir = new (Mem)
+      OMPSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
+                                                unsigned NumClauses,
+                                                unsigned CollapsedNum,
+                                                EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_simd));
+  return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPForDirective *
+OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                        SourceLocation EndLoc, unsigned CollapsedNum,
+                        ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+                        const HelperExprs &Exprs, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
+  OMPForDirective *Dir =
+      new (Mem) OMPForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPForDirective *OMPForDirective::CreateEmpty(const ASTContext &C,
+                                              unsigned NumClauses,
+                                              unsigned CollapsedNum,
+                                              EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for));
+  return new (Mem) OMPForDirective(CollapsedNum, NumClauses);
+}
+
+OMPForSimdDirective *
+OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                            SourceLocation EndLoc, unsigned CollapsedNum,
+                            ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+                            const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
+  OMPForSimdDirective *Dir = new (Mem)
+      OMPForSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
+                                                      unsigned NumClauses,
+                                                      unsigned CollapsedNum,
+                                                      EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPForSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_for_simd));
+  return new (Mem) OMPForSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPSectionsDirective *OMPSectionsDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPSectionsDirective *Dir =
+      new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPSectionsDirective *OMPSectionsDirective::CreateEmpty(const ASTContext &C,
+                                                        unsigned NumClauses,
+                                                        EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPSectionsDirective(NumClauses);
+}
+
+OMPSectionDirective *OMPSectionDirective::Create(const ASTContext &C,
+                                                 SourceLocation StartLoc,
+                                                 SourceLocation EndLoc,
+                                                 Stmt *AssociatedStmt,
+                                                 bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  OMPSectionDirective *Dir = new (Mem) OMPSectionDirective(StartLoc, EndLoc);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPSectionDirective *OMPSectionDirective::CreateEmpty(const ASTContext &C,
+                                                      EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSectionDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  return new (Mem) OMPSectionDirective();
+}
+
+OMPSingleDirective *OMPSingleDirective::Create(const ASTContext &C,
+                                               SourceLocation StartLoc,
+                                               SourceLocation EndLoc,
+                                               ArrayRef<OMPClause *> Clauses,
+                                               Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPSingleDirective *Dir =
+      new (Mem) OMPSingleDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPSingleDirective *OMPSingleDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPSingleDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPSingleDirective(NumClauses);
+}
+
+OMPMasterDirective *OMPMasterDirective::Create(const ASTContext &C,
+                                               SourceLocation StartLoc,
+                                               SourceLocation EndLoc,
+                                               Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  OMPMasterDirective *Dir = new (Mem) OMPMasterDirective(StartLoc, EndLoc);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPMasterDirective *OMPMasterDirective::CreateEmpty(const ASTContext &C,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPMasterDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  return new (Mem) OMPMasterDirective();
+}
+
+OMPCriticalDirective *OMPCriticalDirective::Create(
+    const ASTContext &C, const DeclarationNameInfo &Name,
+    SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPCriticalDirective *Dir =
+      new (Mem) OMPCriticalDirective(Name, StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
+                                                        unsigned NumClauses,
+                                                        EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCriticalDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPCriticalDirective(NumClauses);
+}
+
+OMPParallelForDirective *OMPParallelForDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_parallel_for));
+  OMPParallelForDirective *Dir = new (Mem)
+      OMPParallelForDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPParallelForDirective *
+OMPParallelForDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                     unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_parallel_for));
+  return new (Mem) OMPParallelForDirective(CollapsedNum, NumClauses);
+}
+
+OMPParallelForSimdDirective *OMPParallelForSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
+  OMPParallelForSimdDirective *Dir = new (Mem) OMPParallelForSimdDirective(
+      StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPParallelForSimdDirective *
+OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
+                                         unsigned NumClauses,
+                                         unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelForSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_parallel_for_simd));
+  return new (Mem) OMPParallelForSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPParallelSectionsDirective *Dir =
+      new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPParallelSectionsDirective *
+OMPParallelSectionsDirective::CreateEmpty(const ASTContext &C,
+                                          unsigned NumClauses, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPParallelSectionsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPParallelSectionsDirective(NumClauses);
+}
+
+OMPTaskDirective *
+OMPTaskDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                         SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
+                         Stmt *AssociatedStmt, bool HasCancel) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPTaskDirective *Dir =
+      new (Mem) OMPTaskDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setHasCancel(HasCancel);
+  return Dir;
+}
+
+OMPTaskDirective *OMPTaskDirective::CreateEmpty(const ASTContext &C,
+                                                unsigned NumClauses,
+                                                EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPTaskDirective(NumClauses);
+}
+
+OMPTaskyieldDirective *OMPTaskyieldDirective::Create(const ASTContext &C,
+                                                     SourceLocation StartLoc,
+                                                     SourceLocation EndLoc) {
+  void *Mem = C.Allocate(sizeof(OMPTaskyieldDirective));
+  OMPTaskyieldDirective *Dir =
+      new (Mem) OMPTaskyieldDirective(StartLoc, EndLoc);
+  return Dir;
+}
+
+OMPTaskyieldDirective *OMPTaskyieldDirective::CreateEmpty(const ASTContext &C,
+                                                          EmptyShell) {
+  void *Mem = C.Allocate(sizeof(OMPTaskyieldDirective));
+  return new (Mem) OMPTaskyieldDirective();
+}
+
+OMPBarrierDirective *OMPBarrierDirective::Create(const ASTContext &C,
+                                                 SourceLocation StartLoc,
+                                                 SourceLocation EndLoc) {
+  void *Mem = C.Allocate(sizeof(OMPBarrierDirective));
+  OMPBarrierDirective *Dir = new (Mem) OMPBarrierDirective(StartLoc, EndLoc);
+  return Dir;
+}
+
+OMPBarrierDirective *OMPBarrierDirective::CreateEmpty(const ASTContext &C,
+                                                      EmptyShell) {
+  void *Mem = C.Allocate(sizeof(OMPBarrierDirective));
+  return new (Mem) OMPBarrierDirective();
+}
+
+OMPTaskwaitDirective *OMPTaskwaitDirective::Create(const ASTContext &C,
+                                                   SourceLocation StartLoc,
+                                                   SourceLocation EndLoc) {
+  void *Mem = C.Allocate(sizeof(OMPTaskwaitDirective));
+  OMPTaskwaitDirective *Dir = new (Mem) OMPTaskwaitDirective(StartLoc, EndLoc);
+  return Dir;
+}
+
+OMPTaskwaitDirective *OMPTaskwaitDirective::CreateEmpty(const ASTContext &C,
+                                                        EmptyShell) {
+  void *Mem = C.Allocate(sizeof(OMPTaskwaitDirective));
+  return new (Mem) OMPTaskwaitDirective();
+}
+
+OMPTaskgroupDirective *OMPTaskgroupDirective::Create(const ASTContext &C,
+                                                     SourceLocation StartLoc,
+                                                     SourceLocation EndLoc,
+                                                     Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  OMPTaskgroupDirective *Dir =
+      new (Mem) OMPTaskgroupDirective(StartLoc, EndLoc);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTaskgroupDirective *OMPTaskgroupDirective::CreateEmpty(const ASTContext &C,
+                                                          EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskgroupDirective),
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size + sizeof(Stmt *));
+  return new (Mem) OMPTaskgroupDirective();
+}
+
+OMPCancellationPointDirective *OMPCancellationPointDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    OpenMPDirectiveKind CancelRegion) {
+  unsigned Size = llvm::RoundUpToAlignment(
+      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  OMPCancellationPointDirective *Dir =
+      new (Mem) OMPCancellationPointDirective(StartLoc, EndLoc);
+  Dir->setCancelRegion(CancelRegion);
+  return Dir;
+}
+
+OMPCancellationPointDirective *
+OMPCancellationPointDirective::CreateEmpty(const ASTContext &C, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(
+      sizeof(OMPCancellationPointDirective), llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  return new (Mem) OMPCancellationPointDirective();
+}
+
+OMPCancelDirective *
+OMPCancelDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
+                           OpenMPDirectiveKind CancelRegion) {
+  unsigned Size = llvm::RoundUpToAlignment(
+      sizeof(OMPCancelDirective) + sizeof(OMPClause *) * Clauses.size(),
+      llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  OMPCancelDirective *Dir =
+      new (Mem) OMPCancelDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setCancelRegion(CancelRegion);
+  return Dir;
+}
+
+OMPCancelDirective *OMPCancelDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPCancelDirective) +
+                                               sizeof(OMPClause *) * NumClauses,
+                                           llvm::alignOf<Stmt *>());
+  void *Mem = C.Allocate(Size);
+  return new (Mem) OMPCancelDirective(NumClauses);
+}
+
+OMPFlushDirective *OMPFlushDirective::Create(const ASTContext &C,
+                                             SourceLocation StartLoc,
+                                             SourceLocation EndLoc,
+                                             ArrayRef<OMPClause *> Clauses) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size());
+  OMPFlushDirective *Dir =
+      new (Mem) OMPFlushDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  return Dir;
+}
+
+OMPFlushDirective *OMPFlushDirective::CreateEmpty(const ASTContext &C,
+                                                  unsigned NumClauses,
+                                                  EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPFlushDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses);
+  return new (Mem) OMPFlushDirective(NumClauses);
+}
+
+OMPOrderedDirective *OMPOrderedDirective::Create(const ASTContext &C,
+                                                 SourceLocation StartLoc,
+                                                 SourceLocation EndLoc,
+                                                 ArrayRef<OMPClause *> Clauses,
+                                                 Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(Stmt *) + sizeof(OMPClause *) * Clauses.size());
+  OMPOrderedDirective *Dir =
+      new (Mem) OMPOrderedDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPOrderedDirective *OMPOrderedDirective::CreateEmpty(const ASTContext &C,
+                                                      unsigned NumClauses,
+                                                      EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPOrderedDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(Stmt *) + sizeof(OMPClause *) * NumClauses);
+  return new (Mem) OMPOrderedDirective(NumClauses);
+}
+
+OMPAtomicDirective *OMPAtomicDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *X, Expr *V,
+    Expr *E, Expr *UE, bool IsXLHSInRHSPart, bool IsPostfixUpdate) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                         5 * sizeof(Stmt *));
+  OMPAtomicDirective *Dir =
+      new (Mem) OMPAtomicDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setX(X);
+  Dir->setV(V);
+  Dir->setExpr(E);
+  Dir->setUpdateExpr(UE);
+  Dir->IsXLHSInRHSPart = IsXLHSInRHSPart;
+  Dir->IsPostfixUpdate = IsPostfixUpdate;
+  return Dir;
+}
+
+OMPAtomicDirective *OMPAtomicDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPAtomicDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + 5 * sizeof(Stmt *));
+  return new (Mem) OMPAtomicDirective(NumClauses);
+}
+
+OMPTargetDirective *OMPTargetDirective::Create(const ASTContext &C,
+                                               SourceLocation StartLoc,
+                                               SourceLocation EndLoc,
+                                               ArrayRef<OMPClause *> Clauses,
+                                               Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPTargetDirective *Dir =
+      new (Mem) OMPTargetDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses,
+                                                    EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTargetDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPTargetDirective(NumClauses);
+}
+
+OMPTargetDataDirective *OMPTargetDataDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+  void *Mem =
+      C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPTargetDataDirective),
+                                          llvm::alignOf<OMPClause *>()) +
+                 sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPTargetDataDirective *Dir =
+      new (Mem) OMPTargetDataDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTargetDataDirective *OMPTargetDataDirective::CreateEmpty(const ASTContext &C,
+                                                            unsigned N,
+                                                            EmptyShell) {
+  void *Mem =
+      C.Allocate(llvm::RoundUpToAlignment(sizeof(OMPTargetDataDirective),
+                                          llvm::alignOf<OMPClause *>()) +
+                 sizeof(OMPClause *) * N + sizeof(Stmt *));
+  return new (Mem) OMPTargetDataDirective(N);
+}
+
+OMPTeamsDirective *OMPTeamsDirective::Create(const ASTContext &C,
+                                             SourceLocation StartLoc,
+                                             SourceLocation EndLoc,
+                                             ArrayRef<OMPClause *> Clauses,
+                                             Stmt *AssociatedStmt) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() + sizeof(Stmt *));
+  OMPTeamsDirective *Dir =
+      new (Mem) OMPTeamsDirective(StartLoc, EndLoc, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  return Dir;
+}
+
+OMPTeamsDirective *OMPTeamsDirective::CreateEmpty(const ASTContext &C,
+                                                  unsigned NumClauses,
+                                                  EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTeamsDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses + sizeof(Stmt *));
+  return new (Mem) OMPTeamsDirective(NumClauses);
+}
+
+OMPTaskLoopDirective *OMPTaskLoopDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_taskloop));
+  OMPTaskLoopDirective *Dir = new (Mem)
+      OMPTaskLoopDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPTaskLoopDirective *OMPTaskLoopDirective::CreateEmpty(const ASTContext &C,
+                                                        unsigned NumClauses,
+                                                        unsigned CollapsedNum,
+                                                        EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem =
+      C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                 sizeof(Stmt *) * numLoopChildren(CollapsedNum, OMPD_taskloop));
+  return new (Mem) OMPTaskLoopDirective(CollapsedNum, NumClauses);
+}
+
+OMPTaskLoopSimdDirective *OMPTaskLoopSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_taskloop_simd));
+  OMPTaskLoopSimdDirective *Dir = new (Mem)
+      OMPTaskLoopSimdDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPTaskLoopSimdDirective *
+OMPTaskLoopSimdDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                      unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPTaskLoopSimdDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_taskloop_simd));
+  return new (Mem) OMPTaskLoopSimdDirective(CollapsedNum, NumClauses);
+}
+
+OMPDistributeDirective *OMPDistributeDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPDistributeDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_distribute));
+  OMPDistributeDirective *Dir = new (Mem)
+      OMPDistributeDirective(StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  return Dir;
+}
+
+OMPDistributeDirective *
+OMPDistributeDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
+                                    unsigned CollapsedNum, EmptyShell) {
+  unsigned Size = llvm::RoundUpToAlignment(sizeof(OMPDistributeDirective),
+                                           llvm::alignOf<OMPClause *>());
+  void *Mem = C.Allocate(Size + sizeof(OMPClause *) * NumClauses +
+                         sizeof(Stmt *) *
+                             numLoopChildren(CollapsedNum, OMPD_distribute));
+  return new (Mem) OMPDistributeDirective(CollapsedNum, NumClauses);
+}
diff --git a/lib/AST/StmtPrinter.cpp b/lib/AST/StmtPrinter.cpp
index 79600773f567..e55b2fc19a1a 100644
--- a/lib/AST/StmtPrinter.cpp
+++ b/lib/AST/StmtPrinter.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/CharInfo.h"
@@ -601,6 +602,8 @@ public:
 
 void OMPClausePrinter::VisitOMPIfClause(OMPIfClause *Node) {
   OS << "if(";
+  if (Node->getNameModifier() != OMPD_unknown)
+    OS << getOpenMPDirectiveName(Node->getNameModifier()) << ": ";
   Node->getCondition()->printPretty(OS, nullptr, Policy, 0);
   OS << ")";
 }
@@ -623,6 +626,12 @@ void OMPClausePrinter::VisitOMPSafelenClause(OMPSafelenClause *Node) {
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPSimdlenClause(OMPSimdlenClause *Node) {
+  OS << "simdlen(";
+  Node->getSimdlen()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
 void OMPClausePrinter::VisitOMPCollapseClause(OMPCollapseClause *Node) {
   OS << "collapse(";
   Node->getNumForLoops()->printPretty(OS, nullptr, Policy, 0);
@@ -642,8 +651,18 @@ void OMPClausePrinter::VisitOMPProcBindClause(OMPProcBindClause *Node) {
 }
 
 void OMPClausePrinter::VisitOMPScheduleClause(OMPScheduleClause *Node) {
-  OS << "schedule("
-     << getOpenMPSimpleClauseTypeName(OMPC_schedule, Node->getScheduleKind());
+  OS << "schedule(";
+  if (Node->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) {
+    OS << getOpenMPSimpleClauseTypeName(OMPC_schedule,
+                                        Node->getFirstScheduleModifier());
+    if (Node->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) {
+      OS << ", ";
+      OS << getOpenMPSimpleClauseTypeName(OMPC_schedule,
+                                          Node->getSecondScheduleModifier());
+    }
+    OS << ": ";
+  }
+  OS << getOpenMPSimpleClauseTypeName(OMPC_schedule, Node->getScheduleKind());
   if (Node->getChunkSize()) {
     OS << ", ";
     Node->getChunkSize()->printPretty(OS, nullptr, Policy);
@@ -651,8 +670,13 @@ void OMPClausePrinter::VisitOMPScheduleClause(OMPScheduleClause *Node) {
   OS << ")";
 }
 
-void OMPClausePrinter::VisitOMPOrderedClause(OMPOrderedClause *) {
+void OMPClausePrinter::VisitOMPOrderedClause(OMPOrderedClause *Node) {
   OS << "ordered";
+  if (auto *Num = Node->getNumForLoops()) {
+    OS << "(";
+    Num->printPretty(OS, nullptr, Policy, 0);
+    OS << ")";
+  }
 }
 
 void OMPClausePrinter::VisitOMPNowaitClause(OMPNowaitClause *) {
@@ -663,6 +687,10 @@ void OMPClausePrinter::VisitOMPUntiedClause(OMPUntiedClause *) {
   OS << "untied";
 }
 
+void OMPClausePrinter::VisitOMPNogroupClause(OMPNogroupClause *) {
+  OS << "nogroup";
+}
+
 void OMPClausePrinter::VisitOMPMergeableClause(OMPMergeableClause *) {
   OS << "mergeable";
 }
@@ -683,6 +711,54 @@ void OMPClausePrinter::VisitOMPSeqCstClause(OMPSeqCstClause *) {
   OS << "seq_cst";
 }
 
+void OMPClausePrinter::VisitOMPThreadsClause(OMPThreadsClause *) {
+  OS << "threads";
+}
+
+void OMPClausePrinter::VisitOMPSIMDClause(OMPSIMDClause *) { OS << "simd"; }
+
+void OMPClausePrinter::VisitOMPDeviceClause(OMPDeviceClause *Node) {
+  OS << "device(";
+  Node->getDevice()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPNumTeamsClause(OMPNumTeamsClause *Node) {
+  OS << "num_teams(";
+  Node->getNumTeams()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPThreadLimitClause(OMPThreadLimitClause *Node) {
+  OS << "thread_limit(";
+  Node->getThreadLimit()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPPriorityClause(OMPPriorityClause *Node) {
+  OS << "priority(";
+  Node->getPriority()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPGrainsizeClause(OMPGrainsizeClause *Node) {
+  OS << "grainsize(";
+  Node->getGrainsize()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPNumTasksClause(OMPNumTasksClause *Node) {
+  OS << "num_tasks(";
+  Node->getNumTasks()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPHintClause(OMPHintClause *Node) {
+  OS << "hint(";
+  Node->getHint()->printPretty(OS, nullptr, Policy, 0);
+  OS << ")";
+}
+
 template<typename T>
 void OMPClausePrinter::VisitOMPClauseList(T *Node, char StartSym) {
   for (typename T::varlist_iterator I = Node->varlist_begin(),
@@ -756,7 +832,13 @@ void OMPClausePrinter::VisitOMPReductionClause(OMPReductionClause *Node) {
 void OMPClausePrinter::VisitOMPLinearClause(OMPLinearClause *Node) {
   if (!Node->varlist_empty()) {
     OS << "linear";
+    if (Node->getModifierLoc().isValid()) {
+      OS << '('
+         << getOpenMPSimpleClauseTypeName(OMPC_linear, Node->getModifier());
+    }
     VisitOMPClauseList(Node, '(');
+    if (Node->getModifierLoc().isValid())
+      OS << ')';
     if (Node->getStep() != nullptr) {
       OS << ": ";
       Node->getStep()->printPretty(OS, nullptr, Policy, 0);
@@ -801,11 +883,28 @@ void OMPClausePrinter::VisitOMPFlushClause(OMPFlushClause *Node) {
 }
 
 void OMPClausePrinter::VisitOMPDependClause(OMPDependClause *Node) {
+  OS << "depend(";
+  OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(),
+                                      Node->getDependencyKind());
   if (!Node->varlist_empty()) {
-    OS << "depend(";
-    OS << getOpenMPSimpleClauseTypeName(Node->getClauseKind(),
-                                        Node->getDependencyKind())
-       << " :";
+    OS << " :";
+    VisitOMPClauseList(Node, ' ');
+  }
+  OS << ")";
+}
+
+void OMPClausePrinter::VisitOMPMapClause(OMPMapClause *Node) {
+  if (!Node->varlist_empty()) {
+    OS << "map(";
+    if (Node->getMapType() != OMPC_MAP_unknown) {
+      if (Node->getMapTypeModifier() != OMPC_MAP_unknown) {
+        OS << getOpenMPSimpleClauseTypeName(OMPC_map, 
+                                            Node->getMapTypeModifier());
+        OS << ',';
+      }
+      OS << getOpenMPSimpleClauseTypeName(OMPC_map, Node->getMapType());
+      OS << ':';
+    }
     VisitOMPClauseList(Node, ' ');
     OS << ")";
   }
@@ -881,6 +980,7 @@ void StmtPrinter::VisitOMPCriticalDirective(OMPCriticalDirective *Node) {
     Node->getDirectiveName().printName(OS);
     OS << ")";
   }
+  OS << " ";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -932,7 +1032,7 @@ void StmtPrinter::VisitOMPFlushDirective(OMPFlushDirective *Node) {
 }
 
 void StmtPrinter::VisitOMPOrderedDirective(OMPOrderedDirective *Node) {
-  Indent() << "#pragma omp ordered";
+  Indent() << "#pragma omp ordered ";
   PrintOMPExecutableDirective(Node);
 }
 
@@ -946,6 +1046,11 @@ void StmtPrinter::VisitOMPTargetDirective(OMPTargetDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPTargetDataDirective(OMPTargetDataDirective *Node) {
+  Indent() << "#pragma omp target data ";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPTeamsDirective(OMPTeamsDirective *Node) {
   Indent() << "#pragma omp teams ";
   PrintOMPExecutableDirective(Node);
@@ -960,9 +1065,26 @@ void StmtPrinter::VisitOMPCancellationPointDirective(
 
 void StmtPrinter::VisitOMPCancelDirective(OMPCancelDirective *Node) {
   Indent() << "#pragma omp cancel "
-           << getOpenMPDirectiveName(Node->getCancelRegion());
+           << getOpenMPDirectiveName(Node->getCancelRegion()) << " ";
   PrintOMPExecutableDirective(Node);
 }
+
+void StmtPrinter::VisitOMPTaskLoopDirective(OMPTaskLoopDirective *Node) {
+  Indent() << "#pragma omp taskloop ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPTaskLoopSimdDirective(
+    OMPTaskLoopSimdDirective *Node) {
+  Indent() << "#pragma omp taskloop simd ";
+  PrintOMPExecutableDirective(Node);
+}
+
+void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) {
+  Indent() << "#pragma omp distribute ";
+  PrintOMPExecutableDirective(Node);
+}
+
 //===----------------------------------------------------------------------===//
 //  Expr printing methods.
 //===----------------------------------------------------------------------===//
@@ -1110,8 +1232,6 @@ void StmtPrinter::VisitIntegerLiteral(IntegerLiteral *Node) {
   case BuiltinType::ULong:     OS << "UL"; break;
   case BuiltinType::LongLong:  OS << "LL"; break;
   case BuiltinType::ULongLong: OS << "ULL"; break;
-  case BuiltinType::Int128:    OS << "i128"; break;
-  case BuiltinType::UInt128:   OS << "Ui128"; break;
   }
 }
 
@@ -1185,8 +1305,8 @@ void StmtPrinter::VisitOffsetOfExpr(OffsetOfExpr *Node) {
   OS << ", ";
   bool PrintedSomething = false;
   for (unsigned i = 0, n = Node->getNumComponents(); i < n; ++i) {
-    OffsetOfExpr::OffsetOfNode ON = Node->getComponent(i);
-    if (ON.getKind() == OffsetOfExpr::OffsetOfNode::Array) {
+    OffsetOfNode ON = Node->getComponent(i);
+    if (ON.getKind() == OffsetOfNode::Array) {
       // Array node
       OS << "[";
       PrintExpr(Node->getIndexExpr(ON.getArrayExprIndex()));
@@ -1196,7 +1316,7 @@ void StmtPrinter::VisitOffsetOfExpr(OffsetOfExpr *Node) {
     }
 
     // Skip implicit base indirections.
-    if (ON.getKind() == OffsetOfExpr::OffsetOfNode::Base)
+    if (ON.getKind() == OffsetOfNode::Base)
       continue;
 
     // Field or identifier node.
@@ -1266,6 +1386,19 @@ void StmtPrinter::VisitArraySubscriptExpr(ArraySubscriptExpr *Node) {
   OS << "]";
 }
 
+void StmtPrinter::VisitOMPArraySectionExpr(OMPArraySectionExpr *Node) {
+  PrintExpr(Node->getBase());
+  OS << "[";
+  if (Node->getLowerBound())
+    PrintExpr(Node->getLowerBound());
+  if (Node->getColonLoc().isValid()) {
+    OS << ":";
+    if (Node->getLength())
+      PrintExpr(Node->getLength());
+  }
+  OS << "]";
+}
+
 void StmtPrinter::PrintCallArgs(CallExpr *Call) {
   for (unsigned i = 0, e = Call->getNumArgs(); i != e; ++i) {
     if (isa<CXXDefaultArgExpr>(Call->getArg(i))) {
@@ -1667,6 +1800,13 @@ void StmtPrinter::VisitMSPropertyRefExpr(MSPropertyRefExpr *Node) {
   OS << Node->getPropertyDecl()->getDeclName();
 }
 
+void StmtPrinter::VisitMSPropertySubscriptExpr(MSPropertySubscriptExpr *Node) {
+  PrintExpr(Node->getBase());
+  OS << "[";
+  PrintExpr(Node->getIdx());
+  OS << "]";
+}
+
 void StmtPrinter::VisitUserDefinedLiteral(UserDefinedLiteral *Node) {
   switch (Node->getLiteralOperatorKind()) {
   case UserDefinedLiteral::LOK_Raw:
@@ -1679,7 +1819,7 @@ void StmtPrinter::VisitUserDefinedLiteral(UserDefinedLiteral *Node) {
     assert(Args);
 
     if (Args->size() != 1) {
-      OS << "operator \"\" " << Node->getUDSuffix()->getName();
+      OS << "operator\"\"" << Node->getUDSuffix()->getName();
       TemplateSpecializationType::PrintTemplateArgumentList(
           OS, Args->data(), Args->size(), Policy);
       OS << "()";
@@ -1768,7 +1908,7 @@ void StmtPrinter::VisitCXXTemporaryObjectExpr(CXXTemporaryObjectExpr *Node) {
   for (CXXTemporaryObjectExpr::arg_iterator Arg = Node->arg_begin(),
                                          ArgEnd = Node->arg_end();
        Arg != ArgEnd; ++Arg) {
-    if (Arg->isDefaultArgument())
+    if ((*Arg)->isDefaultArgument())
       break;
     if (Arg != Node->arg_begin())
       OS << ", ";
@@ -2115,6 +2255,31 @@ void StmtPrinter::VisitCXXFoldExpr(CXXFoldExpr *E) {
   OS << ")";
 }
 
+// C++ Coroutines TS
+
+void StmtPrinter::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) {
+  Visit(S->getBody());
+}
+
+void StmtPrinter::VisitCoreturnStmt(CoreturnStmt *S) {
+  OS << "co_return";
+  if (S->getOperand()) {
+    OS << " ";
+    Visit(S->getOperand());
+  }
+  OS << ";";
+}
+
+void StmtPrinter::VisitCoawaitExpr(CoawaitExpr *S) {
+  OS << "co_await ";
+  PrintExpr(S->getOperand());
+}
+
+void StmtPrinter::VisitCoyieldExpr(CoyieldExpr *S) {
+  OS << "co_yield ";
+  PrintExpr(S->getOperand());
+}
+
 // Obj-C
 
 void StmtPrinter::VisitObjCStringLiteral(ObjCStringLiteral *Node) {
@@ -2129,14 +2294,11 @@ void StmtPrinter::VisitObjCBoxedExpr(ObjCBoxedExpr *E) {
 
 void StmtPrinter::VisitObjCArrayLiteral(ObjCArrayLiteral *E) {
   OS << "@[ ";
-  StmtRange ch = E->children();
-  if (ch.first != ch.second) {
-    while (1) {
-      Visit(*ch.first);
-      ++ch.first;
-      if (ch.first == ch.second) break;
+  ObjCArrayLiteral::child_range Ch = E->children();
+  for (auto I = Ch.begin(), E = Ch.end(); I != E; ++I) {
+    if (I != Ch.begin())
       OS << ", ";
-    }
+    Visit(*I);
   }
   OS << " ]";
 }
diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp
index da996920c420..175a43abbf61 100644
--- a/lib/AST/StmtProfile.cpp
+++ b/lib/AST/StmtProfile.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/StmtVisitor.h"
 #include "llvm/ADT/FoldingSet.h"
 using namespace clang;
@@ -261,6 +262,7 @@ class OMPClauseProfiler : public ConstOMPClauseVisitor<OMPClauseProfiler> {
   /// \brief Process clauses with list of variables.
   template <typename T>
   void VisitOMPClauseList(T *Node);
+
 public:
   OMPClauseProfiler(StmtProfiler *P) : Profiler(P) { }
 #define OPENMP_CLAUSE(Name, Class)                                             \
@@ -288,6 +290,11 @@ void OMPClauseProfiler::VisitOMPSafelenClause(const OMPSafelenClause *C) {
     Profiler->VisitStmt(C->getSafelen());
 }
 
+void OMPClauseProfiler::VisitOMPSimdlenClause(const OMPSimdlenClause *C) {
+  if (C->getSimdlen())
+    Profiler->VisitStmt(C->getSimdlen());
+}
+
 void OMPClauseProfiler::VisitOMPCollapseClause(const OMPCollapseClause *C) {
   if (C->getNumForLoops())
     Profiler->VisitStmt(C->getNumForLoops());
@@ -306,7 +313,10 @@ void OMPClauseProfiler::VisitOMPScheduleClause(const OMPScheduleClause *C) {
   }
 }
 
-void OMPClauseProfiler::VisitOMPOrderedClause(const OMPOrderedClause *) {}
+void OMPClauseProfiler::VisitOMPOrderedClause(const OMPOrderedClause *C) {
+  if (auto *Num = C->getNumForLoops())
+    Profiler->VisitStmt(Num);
+}
 
 void OMPClauseProfiler::VisitOMPNowaitClause(const OMPNowaitClause *) {}
 
@@ -324,6 +334,12 @@ void OMPClauseProfiler::VisitOMPCaptureClause(const OMPCaptureClause *) {}
 
 void OMPClauseProfiler::VisitOMPSeqCstClause(const OMPSeqCstClause *) {}
 
+void OMPClauseProfiler::VisitOMPThreadsClause(const OMPThreadsClause *) {}
+
+void OMPClauseProfiler::VisitOMPSIMDClause(const OMPSIMDClause *) {}
+
+void OMPClauseProfiler::VisitOMPNogroupClause(const OMPNogroupClause *) {}
+
 template<typename T>
 void OMPClauseProfiler::VisitOMPClauseList(T *Node) {
   for (auto *E : Node->varlists()) {
@@ -369,6 +385,9 @@ void OMPClauseProfiler::VisitOMPReductionClause(
       C->getQualifierLoc().getNestedNameSpecifier());
   Profiler->VisitName(C->getNameInfo().getName());
   VisitOMPClauseList(C);
+  for (auto *E : C->privates()) {
+    Profiler->VisitStmt(E);
+  }
   for (auto *E : C->lhs_exprs()) {
     Profiler->VisitStmt(E);
   }
@@ -381,6 +400,9 @@ void OMPClauseProfiler::VisitOMPReductionClause(
 }
 void OMPClauseProfiler::VisitOMPLinearClause(const OMPLinearClause *C) {
   VisitOMPClauseList(C);
+  for (auto *E : C->privates()) {
+    Profiler->VisitStmt(E);
+  }
   for (auto *E : C->inits()) {
     Profiler->VisitStmt(E);
   }
@@ -428,6 +450,31 @@ void OMPClauseProfiler::VisitOMPFlushClause(const OMPFlushClause *C) {
 void OMPClauseProfiler::VisitOMPDependClause(const OMPDependClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseProfiler::VisitOMPDeviceClause(const OMPDeviceClause *C) {
+  Profiler->VisitStmt(C->getDevice());
+}
+void OMPClauseProfiler::VisitOMPMapClause(const OMPMapClause *C) {
+  VisitOMPClauseList(C);
+}
+void OMPClauseProfiler::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) {
+  Profiler->VisitStmt(C->getNumTeams());
+}
+void OMPClauseProfiler::VisitOMPThreadLimitClause(
+    const OMPThreadLimitClause *C) {
+  Profiler->VisitStmt(C->getThreadLimit());
+}
+void OMPClauseProfiler::VisitOMPPriorityClause(const OMPPriorityClause *C) {
+  Profiler->VisitStmt(C->getPriority());
+}
+void OMPClauseProfiler::VisitOMPGrainsizeClause(const OMPGrainsizeClause *C) {
+  Profiler->VisitStmt(C->getGrainsize());
+}
+void OMPClauseProfiler::VisitOMPNumTasksClause(const OMPNumTasksClause *C) {
+  Profiler->VisitStmt(C->getNumTasks());
+}
+void OMPClauseProfiler::VisitOMPHintClause(const OMPHintClause *C) {
+  Profiler->VisitStmt(C->getHint());
+}
 }
 
 void
@@ -533,6 +580,10 @@ void StmtProfiler::VisitOMPTargetDirective(const OMPTargetDirective *S) {
   VisitOMPExecutableDirective(S);
 }
 
+void StmtProfiler::VisitOMPTargetDataDirective(const OMPTargetDataDirective *S) {
+  VisitOMPExecutableDirective(S);
+}
+
 void StmtProfiler::VisitOMPTeamsDirective(const OMPTeamsDirective *S) {
   VisitOMPExecutableDirective(S);
 }
@@ -546,6 +597,20 @@ void StmtProfiler::VisitOMPCancelDirective(const OMPCancelDirective *S) {
   VisitOMPExecutableDirective(S);
 }
 
+void StmtProfiler::VisitOMPTaskLoopDirective(const OMPTaskLoopDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
+void StmtProfiler::VisitOMPTaskLoopSimdDirective(
+    const OMPTaskLoopSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
+void StmtProfiler::VisitOMPDistributeDirective(
+    const OMPDistributeDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitExpr(const Expr *S) {
   VisitStmt(S);
 }
@@ -610,22 +675,22 @@ void StmtProfiler::VisitOffsetOfExpr(const OffsetOfExpr *S) {
   VisitType(S->getTypeSourceInfo()->getType());
   unsigned n = S->getNumComponents();
   for (unsigned i = 0; i < n; ++i) {
-    const OffsetOfExpr::OffsetOfNode& ON = S->getComponent(i);
+    const OffsetOfNode &ON = S->getComponent(i);
     ID.AddInteger(ON.getKind());
     switch (ON.getKind()) {
-    case OffsetOfExpr::OffsetOfNode::Array:
+    case OffsetOfNode::Array:
       // Expressions handled below.
       break;
 
-    case OffsetOfExpr::OffsetOfNode::Field:
+    case OffsetOfNode::Field:
       VisitDecl(ON.getField());
       break;
 
-    case OffsetOfExpr::OffsetOfNode::Identifier:
+    case OffsetOfNode::Identifier:
       ID.AddPointer(ON.getFieldName());
       break;
-        
-    case OffsetOfExpr::OffsetOfNode::Base:
+
+    case OffsetOfNode::Base:
       // These nodes are implicit, and therefore don't need profiling.
       break;
     }
@@ -646,6 +711,10 @@ void StmtProfiler::VisitArraySubscriptExpr(const ArraySubscriptExpr *S) {
   VisitExpr(S);
 }
 
+void StmtProfiler::VisitOMPArraySectionExpr(const OMPArraySectionExpr *S) {
+  VisitExpr(S);
+}
+
 void StmtProfiler::VisitCallExpr(const CallExpr *S) {
   VisitExpr(S);
 }
@@ -824,6 +893,7 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
   case OO_Arrow:
   case OO_Call:
   case OO_Conditional:
+  case OO_Coawait:
   case NUM_OVERLOADED_OPERATORS:
     llvm_unreachable("Invalid operator call kind");
       
@@ -985,7 +1055,6 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
     BinaryOp = BO_Comma;
     return Stmt::BinaryOperatorClass;
 
-
   case OO_ArrowStar:
     BinaryOp = BO_PtrMemI;
     return Stmt::BinaryOperatorClass;
@@ -997,7 +1066,6 @@ static Stmt::StmtClass DecodeOperatorCall(const CXXOperatorCallExpr *S,
   llvm_unreachable("Invalid overloaded operator expression");
 }
 
-
 void StmtProfiler::VisitCXXOperatorCallExpr(const CXXOperatorCallExpr *S) {
   if (S->isTypeDependent()) {
     // Type-dependent operator calls are profiled like their underlying
@@ -1092,6 +1160,11 @@ void StmtProfiler::VisitMSPropertyRefExpr(const MSPropertyRefExpr *S) {
   VisitDecl(S->getPropertyDecl());
 }
 
+void StmtProfiler::VisitMSPropertySubscriptExpr(
+    const MSPropertySubscriptExpr *S) {
+  VisitExpr(S);
+}
+
 void StmtProfiler::VisitCXXThisExpr(const CXXThisExpr *S) {
   VisitExpr(S);
   ID.AddBoolean(S->isImplicit());
@@ -1169,7 +1242,6 @@ void StmtProfiler::VisitCXXDeleteExpr(const CXXDeleteExpr *S) {
   VisitDecl(S->getOperatorDelete());
 }
 
-
 void StmtProfiler::VisitCXXNewExpr(const CXXNewExpr *S) {
   VisitExpr(S);
   VisitType(S->getAllocatedType());
@@ -1203,8 +1275,7 @@ void StmtProfiler::VisitOverloadExpr(const OverloadExpr *S) {
   VisitName(S->getName());
   ID.AddBoolean(S->hasExplicitTemplateArgs());
   if (S->hasExplicitTemplateArgs())
-    VisitTemplateArguments(S->getExplicitTemplateArgs().getTemplateArgs(),
-                           S->getExplicitTemplateArgs().NumTemplateArgs);
+    VisitTemplateArguments(S->getTemplateArgs(), S->getNumTemplateArgs());
 }
 
 void
@@ -1290,6 +1361,14 @@ void StmtProfiler::VisitPackExpansionExpr(const PackExpansionExpr *S) {
 void StmtProfiler::VisitSizeOfPackExpr(const SizeOfPackExpr *S) {
   VisitExpr(S);
   VisitDecl(S->getPack());
+  if (S->isPartiallySubstituted()) {
+    auto Args = S->getPartialArguments();
+    ID.AddInteger(Args.size());
+    for (const auto &TA : Args)
+      VisitTemplateArgument(TA);
+  } else {
+    ID.AddInteger(0);
+  }
 }
 
 void StmtProfiler::VisitSubstNonTypeTemplateParmPackExpr(
@@ -1323,6 +1402,22 @@ void StmtProfiler::VisitCXXFoldExpr(const CXXFoldExpr *S) {
   ID.AddInteger(S->getOperator());
 }
 
+void StmtProfiler::VisitCoroutineBodyStmt(const CoroutineBodyStmt *S) {
+  VisitStmt(S);
+}
+
+void StmtProfiler::VisitCoreturnStmt(const CoreturnStmt *S) {
+  VisitStmt(S);
+}
+
+void StmtProfiler::VisitCoawaitExpr(const CoawaitExpr *S) {
+  VisitExpr(S);
+}
+
+void StmtProfiler::VisitCoyieldExpr(const CoyieldExpr *S) {
+  VisitExpr(S);
+}
+
 void StmtProfiler::VisitOpaqueValueExpr(const OpaqueValueExpr *E) {
   VisitExpr(E);  
 }
diff --git a/lib/AST/TemplateBase.cpp b/lib/AST/TemplateBase.cpp
index f8b73cb8f89a..e9edb0df66df 100644
--- a/lib/AST/TemplateBase.cpp
+++ b/lib/AST/TemplateBase.cpp
@@ -53,7 +53,7 @@ static void printIntegral(const TemplateArgument &TemplArg,
     }
   }
 
-  if (T->isBooleanType()) {
+  if (T->isBooleanType() && !Policy.MSVCFormatting) {
     Out << (Val.getBoolValue() ? "true" : "false");
   } else if (T->isCharType()) {
     const char Ch = Val.getZExtValue();
@@ -88,15 +88,13 @@ TemplateArgument::TemplateArgument(ASTContext &Ctx, const llvm::APSInt &Value,
   Integer.Type = Type.getAsOpaquePtr();
 }
 
-TemplateArgument TemplateArgument::CreatePackCopy(ASTContext &Context,
-                                                  const TemplateArgument *Args,
-                                                  unsigned NumArgs) {
-  if (NumArgs == 0)
+TemplateArgument
+TemplateArgument::CreatePackCopy(ASTContext &Context,
+                                 ArrayRef<TemplateArgument> Args) {
+  if (Args.empty())
     return getEmptyPack();
-  
-  TemplateArgument *Storage = new (Context) TemplateArgument [NumArgs];
-  std::copy(Args, Args + NumArgs, Storage);
-  return TemplateArgument(Storage, NumArgs);
+
+  return TemplateArgument(Args.copy(Context));
 }
 
 bool TemplateArgument::isDependent() const {
@@ -522,94 +520,67 @@ const DiagnosticBuilder &clang::operator<<(const DiagnosticBuilder &DB,
 const ASTTemplateArgumentListInfo *
 ASTTemplateArgumentListInfo::Create(ASTContext &C,
                                     const TemplateArgumentListInfo &List) {
-  assert(llvm::alignOf<ASTTemplateArgumentListInfo>() >=
-         llvm::alignOf<TemplateArgumentLoc>());
-  std::size_t size = ASTTemplateArgumentListInfo::sizeFor(List.size());
+  std::size_t size = totalSizeToAlloc<TemplateArgumentLoc>(List.size());
   void *Mem = C.Allocate(size, llvm::alignOf<ASTTemplateArgumentListInfo>());
-  ASTTemplateArgumentListInfo *TAI = new (Mem) ASTTemplateArgumentListInfo();
-  TAI->initializeFrom(List);
-  return TAI;
+  return new (Mem) ASTTemplateArgumentListInfo(List);
 }
 
-void ASTTemplateArgumentListInfo::initializeFrom(
-                                      const TemplateArgumentListInfo &Info) {
+ASTTemplateArgumentListInfo::ASTTemplateArgumentListInfo(
+    const TemplateArgumentListInfo &Info) {
   LAngleLoc = Info.getLAngleLoc();
   RAngleLoc = Info.getRAngleLoc();
   NumTemplateArgs = Info.size();
 
-  TemplateArgumentLoc *ArgBuffer = getTemplateArgs();
+  TemplateArgumentLoc *ArgBuffer = getTrailingObjects<TemplateArgumentLoc>();
   for (unsigned i = 0; i != NumTemplateArgs; ++i)
     new (&ArgBuffer[i]) TemplateArgumentLoc(Info[i]);
 }
 
-void ASTTemplateArgumentListInfo::initializeFrom(
-                                          const TemplateArgumentListInfo &Info,
-                                                  bool &Dependent, 
-                                                  bool &InstantiationDependent,
-                                       bool &ContainsUnexpandedParameterPack) {
+void ASTTemplateKWAndArgsInfo::initializeFrom(
+    SourceLocation TemplateKWLoc, const TemplateArgumentListInfo &Info,
+    TemplateArgumentLoc *OutArgArray) {
+  this->TemplateKWLoc = TemplateKWLoc;
   LAngleLoc = Info.getLAngleLoc();
   RAngleLoc = Info.getRAngleLoc();
   NumTemplateArgs = Info.size();
 
-  TemplateArgumentLoc *ArgBuffer = getTemplateArgs();
-  for (unsigned i = 0; i != NumTemplateArgs; ++i) {
-    Dependent = Dependent || Info[i].getArgument().isDependent();
-    InstantiationDependent = InstantiationDependent || 
-                             Info[i].getArgument().isInstantiationDependent();
-    ContainsUnexpandedParameterPack 
-      = ContainsUnexpandedParameterPack || 
-        Info[i].getArgument().containsUnexpandedParameterPack();
-
-    new (&ArgBuffer[i]) TemplateArgumentLoc(Info[i]);
-  }
+  for (unsigned i = 0; i != NumTemplateArgs; ++i)
+    new (&OutArgArray[i]) TemplateArgumentLoc(Info[i]);
 }
 
-void ASTTemplateArgumentListInfo::copyInto(
-                                      TemplateArgumentListInfo &Info) const {
-  Info.setLAngleLoc(LAngleLoc);
-  Info.setRAngleLoc(RAngleLoc);
-  for (unsigned I = 0; I != NumTemplateArgs; ++I)
-    Info.addArgument(getTemplateArgs()[I]);
-}
-
-std::size_t ASTTemplateArgumentListInfo::sizeFor(unsigned NumTemplateArgs) {
-  return sizeof(ASTTemplateArgumentListInfo) +
-         sizeof(TemplateArgumentLoc) * NumTemplateArgs;
-}
-
-void
-ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc,
-                                         const TemplateArgumentListInfo &Info) {
-  Base::initializeFrom(Info);
-  setTemplateKeywordLoc(TemplateKWLoc);
-}
-
-void
-ASTTemplateKWAndArgsInfo
-::initializeFrom(SourceLocation TemplateKWLoc,
-                 const TemplateArgumentListInfo &Info,
-                 bool &Dependent,
-                 bool &InstantiationDependent,
-                 bool &ContainsUnexpandedParameterPack) {
-  Base::initializeFrom(Info, Dependent, InstantiationDependent,
-                       ContainsUnexpandedParameterPack);
-  setTemplateKeywordLoc(TemplateKWLoc);
-}
-
-void
-ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc) {
-  // No explicit template arguments, but template keyword loc is valid.
+void ASTTemplateKWAndArgsInfo::initializeFrom(SourceLocation TemplateKWLoc) {
   assert(TemplateKWLoc.isValid());
   LAngleLoc = SourceLocation();
   RAngleLoc = SourceLocation();
+  this->TemplateKWLoc = TemplateKWLoc;
   NumTemplateArgs = 0;
-  setTemplateKeywordLoc(TemplateKWLoc);
 }
 
-std::size_t
-ASTTemplateKWAndArgsInfo::sizeFor(unsigned NumTemplateArgs) {
-  // Add space for the template keyword location.
-  // FIXME: There's room for this in the padding before the template args in
-  //        64-bit builds.
-  return Base::sizeFor(NumTemplateArgs) + sizeof(SourceLocation);
+void ASTTemplateKWAndArgsInfo::initializeFrom(
+    SourceLocation TemplateKWLoc, const TemplateArgumentListInfo &Info,
+    TemplateArgumentLoc *OutArgArray, bool &Dependent,
+    bool &InstantiationDependent, bool &ContainsUnexpandedParameterPack) {
+  this->TemplateKWLoc = TemplateKWLoc;
+  LAngleLoc = Info.getLAngleLoc();
+  RAngleLoc = Info.getRAngleLoc();
+  NumTemplateArgs = Info.size();
+
+  for (unsigned i = 0; i != NumTemplateArgs; ++i) {
+    Dependent = Dependent || Info[i].getArgument().isDependent();
+    InstantiationDependent = InstantiationDependent ||
+                             Info[i].getArgument().isInstantiationDependent();
+    ContainsUnexpandedParameterPack =
+        ContainsUnexpandedParameterPack ||
+        Info[i].getArgument().containsUnexpandedParameterPack();
+
+    new (&OutArgArray[i]) TemplateArgumentLoc(Info[i]);
+  }
+}
+
+void ASTTemplateKWAndArgsInfo::copyInto(const TemplateArgumentLoc *ArgArray,
+                                        TemplateArgumentListInfo &Info) const {
+  Info.setLAngleLoc(LAngleLoc);
+  Info.setRAngleLoc(RAngleLoc);
+  for (unsigned I = 0; I != NumTemplateArgs; ++I)
+    Info.addArgument(ArgArray[I]);
 }
diff --git a/lib/AST/TemplateName.cpp b/lib/AST/TemplateName.cpp
index 77c8fd5d1e02..47e0255d52ef 100644
--- a/lib/AST/TemplateName.cpp
+++ b/lib/AST/TemplateName.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 TemplateArgument 
 SubstTemplateTemplateParmPackStorage::getArgumentPack() const {
-  return TemplateArgument(Arguments, size());
+  return TemplateArgument(llvm::makeArrayRef(Arguments, size()));
 }
 
 void SubstTemplateTemplateParmStorage::Profile(llvm::FoldingSetNodeID &ID) {
@@ -40,7 +40,7 @@ void SubstTemplateTemplateParmStorage::Profile(llvm::FoldingSetNodeID &ID,
 
 void SubstTemplateTemplateParmPackStorage::Profile(llvm::FoldingSetNodeID &ID,
                                                    ASTContext &Context) {
-  Profile(ID, Context, Parameter, TemplateArgument(Arguments, size()));
+  Profile(ID, Context, Parameter, getArgumentPack());
 }
 
 void SubstTemplateTemplateParmPackStorage::Profile(llvm::FoldingSetNodeID &ID, 
@@ -51,6 +51,22 @@ void SubstTemplateTemplateParmPackStorage::Profile(llvm::FoldingSetNodeID &ID,
   ArgPack.Profile(ID, Context);
 }
 
+TemplateName::TemplateName(void *Ptr) {
+  Storage = StorageType::getFromOpaqueValue(Ptr);
+}
+
+TemplateName::TemplateName(TemplateDecl *Template) : Storage(Template) {}
+TemplateName::TemplateName(OverloadedTemplateStorage *Storage)
+    : Storage(Storage) {}
+TemplateName::TemplateName(SubstTemplateTemplateParmStorage *Storage)
+    : Storage(Storage) {}
+TemplateName::TemplateName(SubstTemplateTemplateParmPackStorage *Storage)
+    : Storage(Storage) {}
+TemplateName::TemplateName(QualifiedTemplateName *Qual) : Storage(Qual) {}
+TemplateName::TemplateName(DependentTemplateName *Dep) : Storage(Dep) {}
+
+bool TemplateName::isNull() const { return Storage.isNull(); }
+
 TemplateName::NameKind TemplateName::getKind() const {
   if (Storage.is<TemplateDecl *>())
     return Template;
@@ -81,6 +97,40 @@ TemplateDecl *TemplateName::getAsTemplateDecl() const {
   return nullptr;
 }
 
+OverloadedTemplateStorage *TemplateName::getAsOverloadedTemplate() const {
+  if (UncommonTemplateNameStorage *Uncommon =
+          Storage.dyn_cast<UncommonTemplateNameStorage *>())
+    return Uncommon->getAsOverloadedStorage();
+
+  return nullptr;
+}
+
+SubstTemplateTemplateParmStorage *
+TemplateName::getAsSubstTemplateTemplateParm() const {
+  if (UncommonTemplateNameStorage *uncommon =
+          Storage.dyn_cast<UncommonTemplateNameStorage *>())
+    return uncommon->getAsSubstTemplateTemplateParm();
+
+  return nullptr;
+}
+
+SubstTemplateTemplateParmPackStorage *
+TemplateName::getAsSubstTemplateTemplateParmPack() const {
+  if (UncommonTemplateNameStorage *Uncommon =
+          Storage.dyn_cast<UncommonTemplateNameStorage *>())
+    return Uncommon->getAsSubstTemplateTemplateParmPack();
+
+  return nullptr;
+}
+
+QualifiedTemplateName *TemplateName::getAsQualifiedTemplateName() const {
+  return Storage.dyn_cast<QualifiedTemplateName *>();
+}
+
+DependentTemplateName *TemplateName::getAsDependentTemplateName() const {
+  return Storage.dyn_cast<DependentTemplateName *>();
+}
+
 bool TemplateName::isDependent() const {
   if (TemplateDecl *Template = getAsTemplateDecl()) {
     if (isa<TemplateTemplateParmDecl>(Template))
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index cee5fee83913..7dd38cba229b 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -22,6 +22,7 @@
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeVisitor.h"
 #include "clang/Basic/Specifiers.h"
+#include "clang/Basic/TargetInfo.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -509,6 +510,28 @@ bool Type::isObjCClassOrClassKindOfType() const {
   return OPT->isObjCClassType() || OPT->isObjCQualifiedClassType();
 }
 
+/// Was this type written with the special inert-in-MRC __unsafe_unretained
+/// qualifier?
+///
+/// This approximates the answer to the following question: if this
+/// translation unit were compiled in ARC, would this type be qualified
+/// with __unsafe_unretained?
+bool Type::isObjCInertUnsafeUnretainedType() const {
+  const Type *cur = this;
+  while (true) {
+    if (auto attributed = dyn_cast<AttributedType>(cur)) {
+      if (attributed->getAttrKind() ==
+            AttributedType::attr_objc_inert_unsafe_unretained)
+        return true;
+    }
+
+    // Single-step desugar until we run out of sugar.
+    QualType next = cur->getLocallyUnqualifiedSingleStepDesugaredType();
+    if (next.getTypePtr() == cur) return false;
+    cur = next.getTypePtr();
+  }
+}
+
 ObjCObjectType::ObjCObjectType(QualType Canonical, QualType Base,
                                ArrayRef<QualType> typeArgs,
                                ArrayRef<ObjCProtocolDecl *> protocols,
@@ -836,11 +859,8 @@ public:
       }
 
       if (exceptionChanged) {
-        unsigned size = sizeof(QualType) * exceptionTypes.size();
-        void *mem = Ctx.Allocate(size, llvm::alignOf<QualType>());
-        memcpy(mem, exceptionTypes.data(), size);
-        info.ExceptionSpec.Exceptions
-          = llvm::makeArrayRef((QualType *)mem, exceptionTypes.size());
+        info.ExceptionSpec.Exceptions =
+            llvm::makeArrayRef(exceptionTypes).copy(Ctx);
       }
     }
 
@@ -950,7 +970,7 @@ public:
           == T->getDeducedType().getAsOpaquePtr())
       return QualType(T, 0);
 
-    return Ctx.getAutoType(deducedType, T->isDecltypeAuto(),
+    return Ctx.getAutoType(deducedType, T->getKeyword(),
                            T->isDependentType());
   }
 
@@ -1158,11 +1178,8 @@ QualType QualType::substObjCTypeArgs(
         }
 
         if (exceptionChanged) {
-          unsigned size = sizeof(QualType) * exceptionTypes.size();
-          void *mem = ctx.Allocate(size, llvm::alignOf<QualType>());
-          memcpy(mem, exceptionTypes.data(), size);
-          info.ExceptionSpec.Exceptions
-            = llvm::makeArrayRef((QualType *)mem, exceptionTypes.size());
+          info.ExceptionSpec.Exceptions =
+              llvm::makeArrayRef(exceptionTypes).copy(ctx);
         }
       }
 
@@ -1275,7 +1292,7 @@ Optional<ArrayRef<QualType>> Type::getObjCSubstitutions(
     if (!dcTypeParams)
       return None;
   } else {
-    // If we are in neither a class mor a category, there's no
+    // If we are in neither a class nor a category, there's no
     // substitution to perform.
     dcCategoryDecl = dyn_cast<ObjCCategoryDecl>(dc);
     if (!dcCategoryDecl)
@@ -1905,6 +1922,28 @@ bool Type::isIncompleteType(NamedDecl **Def) const {
   case IncompleteArray:
     // An array of unknown size is an incomplete type (C99 6.2.5p22).
     return true;
+  case MemberPointer: {
+    // Member pointers in the MS ABI have special behavior in
+    // RequireCompleteType: they attach a MSInheritanceAttr to the CXXRecordDecl
+    // to indicate which inheritance model to use.
+    auto *MPTy = cast<MemberPointerType>(CanonicalType);
+    const Type *ClassTy = MPTy->getClass();
+    // Member pointers with dependent class types don't get special treatment.
+    if (ClassTy->isDependentType())
+      return false;
+    const CXXRecordDecl *RD = ClassTy->getAsCXXRecordDecl();
+    ASTContext &Context = RD->getASTContext();
+    // Member pointers not in the MS ABI don't get special treatment.
+    if (!Context.getTargetInfo().getCXXABI().isMicrosoft())
+      return false;
+    // The inheritance attribute might only be present on the most recent
+    // CXXRecordDecl, use that one.
+    RD = RD->getMostRecentDecl();
+    // Nothing interesting to do if the inheritance attribute is already set.
+    if (RD->hasAttr<MSInheritanceAttr>())
+      return false;
+    return true;
+  }
   case ObjCObject:
     return cast<ObjCObjectType>(CanonicalType)->getBaseType()
              ->isIncompleteType(Def);
@@ -2260,7 +2299,7 @@ bool QualType::isCXX11PODType(ASTContext &Context) const {
       //   a standard-layout class, and has no non-static data members of type
       //   non-POD struct, non-POD union (or array of such types). [...]
       //
-      // We don't directly query the recursive aspect as the requiremets for
+      // We don't directly query the recursive aspect as the requirements for
       // both standard-layout classes and trivial classes apply recursively
       // already.
     }
@@ -2473,51 +2512,115 @@ const char *Type::getTypeClassName() const {
 
 StringRef BuiltinType::getName(const PrintingPolicy &Policy) const {
   switch (getKind()) {
-  case Void:              return "void";
-  case Bool:              return Policy.Bool ? "bool" : "_Bool";
-  case Char_S:            return "char";
-  case Char_U:            return "char";
-  case SChar:             return "signed char";
-  case Short:             return "short";
-  case Int:               return "int";
-  case Long:              return "long";
-  case LongLong:          return "long long";
-  case Int128:            return "__int128";
-  case UChar:             return "unsigned char";
-  case UShort:            return "unsigned short";
-  case UInt:              return "unsigned int";
-  case ULong:             return "unsigned long";
-  case ULongLong:         return "unsigned long long";
-  case UInt128:           return "unsigned __int128";
-  case Half:              return Policy.Half ? "half" : "__fp16";
-  case Float:             return "float";
-  case Double:            return "double";
-  case LongDouble:        return "long double";
+  case Void:
+    return "void";
+  case Bool:
+    return Policy.Bool ? "bool" : "_Bool";
+  case Char_S:
+    return "char";
+  case Char_U:
+    return "char";
+  case SChar:
+    return "signed char";
+  case Short:
+    return "short";
+  case Int:
+    return "int";
+  case Long:
+    return "long";
+  case LongLong:
+    return "long long";
+  case Int128:
+    return "__int128";
+  case UChar:
+    return "unsigned char";
+  case UShort:
+    return "unsigned short";
+  case UInt:
+    return "unsigned int";
+  case ULong:
+    return "unsigned long";
+  case ULongLong:
+    return "unsigned long long";
+  case UInt128:
+    return "unsigned __int128";
+  case Half:
+    return Policy.Half ? "half" : "__fp16";
+  case Float:
+    return "float";
+  case Double:
+    return "double";
+  case LongDouble:
+    return "long double";
   case WChar_S:
-  case WChar_U:           return Policy.MSWChar ? "__wchar_t" : "wchar_t";
-  case Char16:            return "char16_t";
-  case Char32:            return "char32_t";
-  case NullPtr:           return "nullptr_t";
-  case Overload:          return "<overloaded function type>";
-  case BoundMember:       return "<bound member function type>";
-  case PseudoObject:      return "<pseudo-object type>";
-  case Dependent:         return "<dependent type>";
-  case UnknownAny:        return "<unknown type>";
-  case ARCUnbridgedCast:  return "<ARC unbridged cast type>";
-  case BuiltinFn:         return "<builtin fn type>";
-  case ObjCId:            return "id";
-  case ObjCClass:         return "Class";
-  case ObjCSel:           return "SEL";
-  case OCLImage1d:        return "image1d_t";
-  case OCLImage1dArray:   return "image1d_array_t";
-  case OCLImage1dBuffer:  return "image1d_buffer_t";
-  case OCLImage2d:        return "image2d_t";
-  case OCLImage2dArray:   return "image2d_array_t";
-  case OCLImage3d:        return "image3d_t";
-  case OCLSampler:        return "sampler_t";
-  case OCLEvent:          return "event_t";
+  case WChar_U:
+    return Policy.MSWChar ? "__wchar_t" : "wchar_t";
+  case Char16:
+    return "char16_t";
+  case Char32:
+    return "char32_t";
+  case NullPtr:
+    return "nullptr_t";
+  case Overload:
+    return "<overloaded function type>";
+  case BoundMember:
+    return "<bound member function type>";
+  case PseudoObject:
+    return "<pseudo-object type>";
+  case Dependent:
+    return "<dependent type>";
+  case UnknownAny:
+    return "<unknown type>";
+  case ARCUnbridgedCast:
+    return "<ARC unbridged cast type>";
+  case BuiltinFn:
+    return "<builtin fn type>";
+  case ObjCId:
+    return "id";
+  case ObjCClass:
+    return "Class";
+  case ObjCSel:
+    return "SEL";
+  case OCLImage1d:
+    return "image1d_t";
+  case OCLImage1dArray:
+    return "image1d_array_t";
+  case OCLImage1dBuffer:
+    return "image1d_buffer_t";
+  case OCLImage2d:
+    return "image2d_t";
+  case OCLImage2dArray:
+    return "image2d_array_t";
+  case OCLImage2dDepth:
+    return "image2d_depth_t";
+  case OCLImage2dArrayDepth:
+    return "image2d_array_depth_t";
+  case OCLImage2dMSAA:
+    return "image2d_msaa_t";
+  case OCLImage2dArrayMSAA:
+    return "image2d_array_msaa_t";
+  case OCLImage2dMSAADepth:
+    return "image2d_msaa_depth_t";
+  case OCLImage2dArrayMSAADepth:
+    return "image2d_array_msaa_depth_t";
+  case OCLImage3d:
+    return "image3d_t";
+  case OCLSampler:
+    return "sampler_t";
+  case OCLEvent:
+    return "event_t";
+  case OCLClkEvent:
+    return "clk_event_t";
+  case OCLQueue:
+    return "queue_t";
+  case OCLNDRange:
+    return "event_t";
+  case OCLReserveID:
+    return "reserve_id_t";
+  case OMPArraySection:
+    return "<OpenMP array section type>";
   }
-  
+
   llvm_unreachable("Invalid builtin type.");
 }
 
@@ -2863,6 +2966,48 @@ bool TagType::isBeingDefined() const {
   return getDecl()->isBeingDefined();
 }
 
+bool AttributedType::isQualifier() const {
+  switch (getAttrKind()) {
+  // These are type qualifiers in the traditional C sense: they annotate
+  // something about a specific value/variable of a type.  (They aren't
+  // always part of the canonical type, though.)
+  case AttributedType::attr_address_space:
+  case AttributedType::attr_objc_gc:
+  case AttributedType::attr_objc_ownership:
+  case AttributedType::attr_objc_inert_unsafe_unretained:
+  case AttributedType::attr_nonnull:
+  case AttributedType::attr_nullable:
+  case AttributedType::attr_null_unspecified:
+    return true;
+
+  // These aren't qualifiers; they rewrite the modified type to be a
+  // semantically different type.
+  case AttributedType::attr_regparm:
+  case AttributedType::attr_vector_size:
+  case AttributedType::attr_neon_vector_type:
+  case AttributedType::attr_neon_polyvector_type:
+  case AttributedType::attr_pcs:
+  case AttributedType::attr_pcs_vfp:
+  case AttributedType::attr_noreturn:
+  case AttributedType::attr_cdecl:
+  case AttributedType::attr_fastcall:
+  case AttributedType::attr_stdcall:
+  case AttributedType::attr_thiscall:
+  case AttributedType::attr_pascal:
+  case AttributedType::attr_vectorcall:
+  case AttributedType::attr_inteloclbicc:
+  case AttributedType::attr_ms_abi:
+  case AttributedType::attr_sysv_abi:
+  case AttributedType::attr_ptr32:
+  case AttributedType::attr_ptr64:
+  case AttributedType::attr_sptr:
+  case AttributedType::attr_uptr:
+  case AttributedType::attr_objc_kindof:
+    return false;
+  }
+  llvm_unreachable("bad attributed type kind");
+}
+
 bool AttributedType::isMSTypeSpec() const {
   switch (getAttrKind()) {
   default:  return false;
@@ -2888,6 +3033,7 @@ bool AttributedType::isCallingConv() const {
   case attr_neon_polyvector_type:
   case attr_objc_gc:
   case attr_objc_ownership:
+  case attr_objc_inert_unsafe_unretained:
   case attr_noreturn:
   case attr_nonnull:
   case attr_nullable:
@@ -2930,7 +3076,7 @@ SubstTemplateTypeParmPackType(const TemplateTypeParmType *Param,
 }
 
 TemplateArgument SubstTemplateTypeParmPackType::getArgumentPack() const {
-  return TemplateArgument(Arguments, NumArguments);
+  return TemplateArgument(llvm::makeArrayRef(Arguments, NumArguments));
 }
 
 void SubstTemplateTypeParmPackType::Profile(llvm::FoldingSetNodeID &ID) {
@@ -3416,11 +3562,22 @@ bool Type::canHaveNullability() const {
     case BuiltinType::OCLImage1dBuffer:
     case BuiltinType::OCLImage2d:
     case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage2dDepth:
+    case BuiltinType::OCLImage2dArrayDepth:
+    case BuiltinType::OCLImage2dMSAA:
+    case BuiltinType::OCLImage2dArrayMSAA:
+    case BuiltinType::OCLImage2dMSAADepth:
+    case BuiltinType::OCLImage2dArrayMSAADepth:
     case BuiltinType::OCLImage3d:
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
     case BuiltinType::BuiltinFn:
     case BuiltinType::NullPtr:
+    case BuiltinType::OMPArraySection:
       return false;
     }
 
@@ -3521,7 +3678,7 @@ bool Type::isObjCARCImplicitlyUnretainedType() const {
 
   if (const ObjCObjectPointerType *opt
         = dyn_cast<ObjCObjectPointerType>(canon)) {
-    // Class and Class<Protocol> don't require retension.
+    // Class and Class<Protocol> don't require retention.
     if (opt->getObjectType()->isObjCClass())
       return true;
   }
diff --git a/lib/AST/TypeLoc.cpp b/lib/AST/TypeLoc.cpp
index 85bda6a06d97..d08b07b2ccd6 100644
--- a/lib/AST/TypeLoc.cpp
+++ b/lib/AST/TypeLoc.cpp
@@ -192,7 +192,7 @@ SourceLocation TypeLoc::getBeginLoc() const {
       Cur = Cur.getNextTypeLoc();
       continue;
     default:
-      if (!Cur.getLocalSourceRange().getBegin().isInvalid())
+      if (Cur.getLocalSourceRange().getBegin().isValid())
         LeftMost = Cur;
       Cur = Cur.getNextTypeLoc();
       if (Cur.isNull())
@@ -338,10 +338,21 @@ TypeSpecifierType BuiltinTypeLoc::getWrittenTypeSpec() const {
   case BuiltinType::OCLImage1dBuffer:
   case BuiltinType::OCLImage2d:
   case BuiltinType::OCLImage2dArray:
+  case BuiltinType::OCLImage2dDepth:
+  case BuiltinType::OCLImage2dArrayDepth:
+  case BuiltinType::OCLImage2dMSAA:
+  case BuiltinType::OCLImage2dArrayMSAA:
+  case BuiltinType::OCLImage2dMSAADepth:
+  case BuiltinType::OCLImage2dArrayMSAADepth:
   case BuiltinType::OCLImage3d:
   case BuiltinType::OCLSampler:
   case BuiltinType::OCLEvent:
+  case BuiltinType::OCLClkEvent:
+  case BuiltinType::OCLQueue:
+  case BuiltinType::OCLNDRange:
+  case BuiltinType::OCLReserveID:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::OMPArraySection:
     return TST_unspecified;
   }
 
@@ -365,6 +376,27 @@ SourceLocation TypeLoc::findNullabilityLoc() const {
   return SourceLocation();
 }
 
+TypeLoc TypeLoc::findExplicitQualifierLoc() const {
+  // Qualified types.
+  if (auto qual = getAs<QualifiedTypeLoc>())
+    return qual;
+
+  TypeLoc loc = IgnoreParens();
+
+  // Attributed types.
+  if (auto attr = loc.getAs<AttributedTypeLoc>()) {
+    if (attr.isQualifier()) return attr;
+    return attr.getModifiedLoc().findExplicitQualifierLoc();
+  }
+
+  // C11 _Atomic types.
+  if (auto atomic = loc.getAs<AtomicTypeLoc>()) {
+    return atomic;
+  }
+
+  return TypeLoc();
+}
+
 void ObjCObjectTypeLoc::initializeLocal(ASTContext &Context, 
                                         SourceLocation Loc) {
   setHasBaseTypeAsWritten(true);
diff --git a/lib/AST/TypePrinter.cpp b/lib/AST/TypePrinter.cpp
index 0bb50c6ba815..4617e1d3803f 100644
--- a/lib/AST/TypePrinter.cpp
+++ b/lib/AST/TypePrinter.cpp
@@ -835,7 +835,11 @@ void TypePrinter::printAutoBefore(const AutoType *T, raw_ostream &OS) {
   if (!T->getDeducedType().isNull()) {
     printBefore(T->getDeducedType(), OS);
   } else {
-    OS << (T->isDecltypeAuto() ? "decltype(auto)" : "auto");
+    switch (T->getKeyword()) {
+    case AutoTypeKeyword::Auto: OS << "auto"; break;
+    case AutoTypeKeyword::DecltypeAuto: OS << "decltype(auto)"; break;
+    case AutoTypeKeyword::GNUAutoType: OS << "__auto_type"; break;
+    }
     spaceBeforePlaceHolder(OS);
   }
 }
@@ -921,12 +925,13 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
   } else {
     // Make an unambiguous representation for anonymous types, e.g.
     //   (anonymous enum at /usr/include/string.h:120:9)
-    
+    OS << (Policy.MSVCFormatting ? '`' : '(');
+
     if (isa<CXXRecordDecl>(D) && cast<CXXRecordDecl>(D)->isLambda()) {
-      OS << "(lambda";
+      OS << "lambda";
       HasKindDecoration = true;
     } else {
-      OS << "(anonymous";
+      OS << "anonymous";
     }
     
     if (Policy.AnonymousTagLocations) {
@@ -944,8 +949,8 @@ void TypePrinter::printTag(TagDecl *D, raw_ostream &OS) {
            << ':' << PLoc.getColumn();
       }
     }
-    
-    OS << ')';
+
+    OS << (Policy.MSVCFormatting ? '\'' : ')');
   }
 
   // If this is a class template specialization, print the template
@@ -1187,6 +1192,10 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
 
   printAfter(T->getModifiedType(), OS);
 
+  // Don't print the inert __unsafe_unretained attribute at all.
+  if (T->getAttrKind() == AttributedType::attr_objc_inert_unsafe_unretained)
+    return;
+
   // Print nullability type specifiers that occur after
   if (T->getAttrKind() == AttributedType::attr_nonnull ||
       T->getAttrKind() == AttributedType::attr_nullable ||
@@ -1393,6 +1402,7 @@ TemplateSpecializationType::PrintTemplateArgumentList(
                                                 unsigned NumArgs,
                                                   const PrintingPolicy &Policy,
                                                       bool SkipBrackets) {
+  const char *Comma = Policy.MSVCFormatting ? "," : ", ";
   if (!SkipBrackets)
     OS << '<';
   
@@ -1403,14 +1413,14 @@ TemplateSpecializationType::PrintTemplateArgumentList(
     llvm::raw_svector_ostream ArgOS(Buf);
     if (Args[Arg].getKind() == TemplateArgument::Pack) {
       if (Args[Arg].pack_size() && Arg > 0)
-        OS << ", ";
+        OS << Comma;
       PrintTemplateArgumentList(ArgOS,
                                 Args[Arg].pack_begin(), 
                                 Args[Arg].pack_size(), 
                                 Policy, true);
     } else {
       if (Arg > 0)
-        OS << ", ";
+        OS << Comma;
       Args[Arg].print(Policy, ArgOS);
     }
     StringRef ArgString = ArgOS.str();
@@ -1442,11 +1452,12 @@ PrintTemplateArgumentList(raw_ostream &OS,
                           const TemplateArgumentLoc *Args, unsigned NumArgs,
                           const PrintingPolicy &Policy) {
   OS << '<';
+  const char *Comma = Policy.MSVCFormatting ? "," : ", ";
 
   bool needSpace = false;
   for (unsigned Arg = 0; Arg < NumArgs; ++Arg) {
     if (Arg > 0)
-      OS << ", ";
+      OS << Comma;
     
     // Print the argument into a string.
     SmallString<128> Buf;
diff --git a/lib/AST/VTableBuilder.cpp b/lib/AST/VTableBuilder.cpp
index ca5f0aad0013..bae018652f91 100644
--- a/lib/AST/VTableBuilder.cpp
+++ b/lib/AST/VTableBuilder.cpp
@@ -177,14 +177,12 @@ FinalOverriders::FinalOverriders(const CXXRecordDecl *MostDerivedClass,
   CXXFinalOverriderMap FinalOverriders;
   MostDerivedClass->getFinalOverriders(FinalOverriders);
 
-  for (CXXFinalOverriderMap::const_iterator I = FinalOverriders.begin(),
-       E = FinalOverriders.end(); I != E; ++I) {
-    const CXXMethodDecl *MD = I->first;
-    const OverridingMethods& Methods = I->second;
+  for (const auto &Overrider : FinalOverriders) {
+    const CXXMethodDecl *MD = Overrider.first;
+    const OverridingMethods &Methods = Overrider.second;
 
-    for (OverridingMethods::const_iterator I = Methods.begin(),
-         E = Methods.end(); I != E; ++I) {
-      unsigned SubobjectNumber = I->first;
+    for (const auto &M : Methods) {
+      unsigned SubobjectNumber = M.first;
       assert(SubobjectOffsets.count(std::make_pair(MD->getParent(), 
                                                    SubobjectNumber)) &&
              "Did not find subobject offset!");
@@ -192,8 +190,8 @@ FinalOverriders::FinalOverriders(const CXXRecordDecl *MostDerivedClass,
       CharUnits BaseOffset = SubobjectOffsets[std::make_pair(MD->getParent(),
                                                             SubobjectNumber)];
 
-      assert(I->second.size() == 1 && "Final overrider is not unique!");
-      const UniqueVirtualMethod &Method = I->second.front();
+      assert(M.second.size() == 1 && "Final overrider is not unique!");
+      const UniqueVirtualMethod &Method = M.second.front();
 
       const CXXRecordDecl *OverriderRD = Method.Method->getParent();
       assert(SubobjectLayoutClassOffsets.count(
@@ -482,13 +480,9 @@ static bool HasSameVirtualSignature(const CXXMethodDecl *LHS,
   // Force the signatures to match.  We can't rely on the overrides
   // list here because there isn't necessarily an inheritance
   // relationship between the two methods.
-  if (LT->getTypeQuals() != RT->getTypeQuals() ||
-      LT->getNumParams() != RT->getNumParams())
+  if (LT->getTypeQuals() != RT->getTypeQuals())
     return false;
-  for (unsigned I = 0, E = LT->getNumParams(); I != E; ++I)
-    if (LT->getParamType(I) != RT->getParamType(I))
-      return false;
-  return true;
+  return LT->getParamTypes() == RT->getParamTypes();
 }
 
 bool VCallOffsetMap::MethodsCanShareVCallOffset(const CXXMethodDecl *LHS,
@@ -515,8 +509,8 @@ bool VCallOffsetMap::MethodsCanShareVCallOffset(const CXXMethodDecl *LHS,
 bool VCallOffsetMap::AddVCallOffset(const CXXMethodDecl *MD, 
                                     CharUnits OffsetOffset) {
   // Check if we can reuse an offset.
-  for (unsigned I = 0, E = Offsets.size(); I != E; ++I) {
-    if (MethodsCanShareVCallOffset(Offsets[I].first, MD))
+  for (const auto &OffsetPair : Offsets) {
+    if (MethodsCanShareVCallOffset(OffsetPair.first, MD))
       return false;
   }
   
@@ -527,9 +521,9 @@ bool VCallOffsetMap::AddVCallOffset(const CXXMethodDecl *MD,
 
 CharUnits VCallOffsetMap::getVCallOffsetOffset(const CXXMethodDecl *MD) {
   // Look for an offset.
-  for (unsigned I = 0, E = Offsets.size(); I != E; ++I) {
-    if (MethodsCanShareVCallOffset(Offsets[I].first, MD))
-      return Offsets[I].second;
+  for (const auto &OffsetPair : Offsets) {
+    if (MethodsCanShareVCallOffset(OffsetPair.first, MD))
+      return OffsetPair.second;
   }
   
   llvm_unreachable("Should always find a vcall offset offset!");
@@ -1097,39 +1091,30 @@ visitAllOverriddenMethods(const CXXMethodDecl *MD, VisitorTy &Visitor) {
   for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
        E = MD->end_overridden_methods(); I != E; ++I) {
     const CXXMethodDecl *OverriddenMD = *I;
-    if (!Visitor.visit(OverriddenMD))
+    if (!Visitor(OverriddenMD))
       continue;
     visitAllOverriddenMethods(OverriddenMD, Visitor);
   }
 }
 
-namespace {
-  struct OverriddenMethodsCollector {
-    OverriddenMethodsSetTy *Methods;
-
-    bool visit(const CXXMethodDecl *MD) {
-      // Don't recurse on this method if we've already collected it.
-      return Methods->insert(MD).second;
-    }
-  };
-}
-
 /// ComputeAllOverriddenMethods - Given a method decl, will return a set of all
 /// the overridden methods that the function decl overrides.
 static void
 ComputeAllOverriddenMethods(const CXXMethodDecl *MD,
                             OverriddenMethodsSetTy& OverriddenMethods) {
-  OverriddenMethodsCollector Collector = { &OverriddenMethods };
-  visitAllOverriddenMethods(MD, Collector);
+  auto OverriddenMethodsCollector = [&](const CXXMethodDecl *MD) {
+    // Don't recurse on this method if we've already collected it.
+    return OverriddenMethods.insert(MD).second;
+  };
+  visitAllOverriddenMethods(MD, OverriddenMethodsCollector);
 }
 
 void ItaniumVTableBuilder::ComputeThisAdjustments() {
   // Now go through the method info map and see if any of the methods need
   // 'this' pointer adjustments.
-  for (MethodInfoMapTy::const_iterator I = MethodInfoMap.begin(),
-       E = MethodInfoMap.end(); I != E; ++I) {
-    const CXXMethodDecl *MD = I->first;
-    const MethodInfo &MethodInfo = I->second;
+  for (const auto &MI : MethodInfoMap) {
+    const CXXMethodDecl *MD = MI.first;
+    const MethodInfo &MethodInfo = MI.second;
 
     // Ignore adjustments for unused function pointers.
     uint64_t VTableIndex = MethodInfo.VTableIndex;
@@ -1175,10 +1160,9 @@ void ItaniumVTableBuilder::ComputeThisAdjustments() {
     return;
   }
 
-  for (VTableThunksMapTy::const_iterator I = VTableThunks.begin(),
-       E = VTableThunks.end(); I != E; ++I) {
-    const VTableComponent &Component = Components[I->first];
-    const ThunkInfo &Thunk = I->second;
+  for (const auto &TI : VTableThunks) {
+    const VTableComponent &Component = Components[TI.first];
+    const ThunkInfo &Thunk = TI.second;
     const CXXMethodDecl *MD;
     
     switch (Component.getKind()) {
@@ -1237,10 +1221,9 @@ BaseOffset ItaniumVTableBuilder::ComputeThisAdjustmentBaseOffset(
 
   // We have to go through all the paths, and see which one leads us to the
   // right base subobject.
-  for (CXXBasePaths::const_paths_iterator I = Paths.begin(), E = Paths.end();
-       I != E; ++I) {
-    BaseOffset Offset = ComputeBaseOffset(Context, DerivedRD, *I);
-    
+  for (const CXXBasePath &Path : Paths) {
+    BaseOffset Offset = ComputeBaseOffset(Context, DerivedRD, Path);
+
     CharUnits OffsetToBaseSubobject = Offset.NonVirtualOffset;
     
     if (Offset.VirtualBase) {
@@ -1440,15 +1423,11 @@ FindNearestOverriddenMethod(const CXXMethodDecl *MD,
                             BasesSetVectorTy &Bases) {
   OverriddenMethodsSetTy OverriddenMethods;
   ComputeAllOverriddenMethods(MD, OverriddenMethods);
-  
-  for (int I = Bases.size(), E = 0; I != E; --I) {
-    const CXXRecordDecl *PrimaryBase = Bases[I - 1];
 
+  for (const CXXRecordDecl *PrimaryBase :
+       llvm::make_range(Bases.rbegin(), Bases.rend())) {
     // Now check the overridden methods.
-    for (OverriddenMethodsSetTy::const_iterator I = OverriddenMethods.begin(),
-         E = OverriddenMethods.end(); I != E; ++I) {
-      const CXXMethodDecl *OverriddenMD = *I;
-      
+    for (const CXXMethodDecl *OverriddenMD : OverriddenMethods) {
       // We found our overridden method.
       if (OverriddenMD->getParent() == PrimaryBase)
         return OverriddenMD;
@@ -1596,10 +1575,7 @@ void ItaniumVTableBuilder::AddMethods(
   if (ImplicitVirtualDtor)
     NewVirtualFunctions.push_back(ImplicitVirtualDtor);
 
-  for (NewVirtualFunctionsTy::const_iterator I = NewVirtualFunctions.begin(),
-       E = NewVirtualFunctions.end(); I != E; ++I) {
-    const CXXMethodDecl *MD = *I;
-
+  for (const CXXMethodDecl *MD : NewVirtualFunctions) {
     // Get the final overrider.
     FinalOverriders::OverriderInfo Overrider =
       Overriders.getOverrider(MD, Base.getBaseOffset());
@@ -1700,10 +1676,9 @@ void ItaniumVTableBuilder::LayoutPrimaryAndSecondaryVTables(
   const CXXRecordDecl *RD = Base.getBase();
   if (RD == MostDerivedClass) {
     assert(MethodVTableIndices.empty());
-    for (MethodInfoMapTy::const_iterator I = MethodInfoMap.begin(),
-         E = MethodInfoMap.end(); I != E; ++I) {
-      const CXXMethodDecl *MD = I->first;
-      const MethodInfo &MI = I->second;
+    for (const auto &I : MethodInfoMap) {
+      const CXXMethodDecl *MD = I.first;
+      const MethodInfo &MI = I.second;
       if (const CXXDestructorDecl *DD = dyn_cast<CXXDestructorDecl>(MD)) {
         MethodVTableIndices[GlobalDecl(DD, Dtor_Complete)]
             = MI.VTableIndex - AddressPoint;
@@ -1924,11 +1899,10 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
   // Since an address point can be shared by multiple subobjects, we use an
   // STL multimap.
   std::multimap<uint64_t, BaseSubobject> AddressPointsByIndex;
-  for (AddressPointsMapTy::const_iterator I = AddressPoints.begin(), 
-       E = AddressPoints.end(); I != E; ++I) {
-    const BaseSubobject& Base = I->first;
-    uint64_t Index = I->second;
-    
+  for (const auto &AP : AddressPoints) {
+    const BaseSubobject &Base = AP.first;
+    uint64_t Index = AP.second;
+
     AddressPointsByIndex.insert(std::make_pair(Index, Base));
   }
   
@@ -2077,18 +2051,16 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
         
         // We store the class names in a set to get a stable order.
         std::set<std::string> ClassNames;
-        for (std::multimap<uint64_t, BaseSubobject>::const_iterator I =
-             AddressPointsByIndex.lower_bound(NextIndex), E =
-             AddressPointsByIndex.upper_bound(NextIndex); I != E; ++I) {
-          assert(I->second.getBaseOffset() == BaseOffset &&
+        for (const auto &I :
+             llvm::make_range(AddressPointsByIndex.equal_range(NextIndex))) {
+          assert(I.second.getBaseOffset() == BaseOffset &&
                  "Invalid base offset!");
-          const CXXRecordDecl *RD = I->second.getBase();
+          const CXXRecordDecl *RD = I.second.getBase();
           ClassNames.insert(RD->getQualifiedNameAsString());
         }
-        
-        for (std::set<std::string>::const_iterator I = ClassNames.begin(),
-             E = ClassNames.end(); I != E; ++I) {
-          Out << "       -- (" << *I;
+
+        for (const std::string &Name : ClassNames) {
+          Out << "       -- (" << Name;
           Out << ", " << BaseOffset.getQuantity() << ") vtable address --\n";
         }
       }
@@ -2105,12 +2077,10 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
     // a stable order.
 
     std::map<std::string, CharUnits> ClassNamesAndOffsets;
-    for (VBaseOffsetOffsetsMapTy::const_iterator I = VBaseOffsetOffsets.begin(),
-         E = VBaseOffsetOffsets.end(); I != E; ++I) {
-      std::string ClassName = I->first->getQualifiedNameAsString();
-      CharUnits OffsetOffset = I->second;
-      ClassNamesAndOffsets.insert(
-          std::make_pair(ClassName, OffsetOffset));
+    for (const auto &I : VBaseOffsetOffsets) {
+      std::string ClassName = I.first->getQualifiedNameAsString();
+      CharUnits OffsetOffset = I.second;
+      ClassNamesAndOffsets.insert(std::make_pair(ClassName, OffsetOffset));
     }
     
     Out << "Virtual base offset offsets for '";
@@ -2119,10 +2089,8 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
     Out << ClassNamesAndOffsets.size();
     Out << (ClassNamesAndOffsets.size() == 1 ? " entry" : " entries") << ").\n";
 
-    for (std::map<std::string, CharUnits>::const_iterator I =
-         ClassNamesAndOffsets.begin(), E = ClassNamesAndOffsets.end(); 
-         I != E; ++I)
-      Out << "   " << I->first << " | " << I->second.getQuantity() << '\n';
+    for (const auto &I : ClassNamesAndOffsets)
+      Out << "   " << I.first << " | " << I.second.getQuantity() << '\n';
 
     Out << "\n";
   }
@@ -2130,10 +2098,9 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
   if (!Thunks.empty()) {
     // We store the method names in a map to get a stable order.
     std::map<std::string, const CXXMethodDecl *> MethodNamesAndDecls;
-    
-    for (ThunksMapTy::const_iterator I = Thunks.begin(), E = Thunks.end();
-         I != E; ++I) {
-      const CXXMethodDecl *MD = I->first;
+
+    for (const auto &I : Thunks) {
+      const CXXMethodDecl *MD = I.first;
       std::string MethodName = 
         PredefinedExpr::ComputeName(PredefinedExpr::PrettyFunctionNoVirtual,
                                     MD);
@@ -2141,11 +2108,9 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
       MethodNamesAndDecls.insert(std::make_pair(MethodName, MD));
     }
 
-    for (std::map<std::string, const CXXMethodDecl *>::const_iterator I =
-         MethodNamesAndDecls.begin(), E = MethodNamesAndDecls.end(); 
-         I != E; ++I) {
-      const std::string &MethodName = I->first;
-      const CXXMethodDecl *MD = I->second;
+    for (const auto &I : MethodNamesAndDecls) {
+      const std::string &MethodName = I.first;
+      const CXXMethodDecl *MD = I.second;
 
       ThunkInfoVectorTy ThunksVector = Thunks[MD];
       std::sort(ThunksVector.begin(), ThunksVector.end(),
@@ -2225,10 +2190,9 @@ void ItaniumVTableBuilder::dumpLayout(raw_ostream &Out) {
     MostDerivedClass->printQualifiedName(Out);
     Out << "' (" << IndicesMap.size() << " entries).\n";
 
-    for (std::map<uint64_t, std::string>::const_iterator I = IndicesMap.begin(),
-         E = IndicesMap.end(); I != E; ++I) {
-      uint64_t VTableIndex = I->first;
-      const std::string &MethodName = I->second;
+    for (const auto &I : IndicesMap) {
+      uint64_t VTableIndex = I.first;
+      const std::string &MethodName = I.second;
 
       Out << llvm::format("%4" PRIu64 " | ", VTableIndex) << MethodName
           << '\n';
@@ -2303,14 +2267,11 @@ ItaniumVTableContext::getVirtualBaseOffsetOffset(const CXXRecordDecl *RD,
                                      /*BaseIsVirtual=*/false,
                                      /*OffsetInLayoutClass=*/CharUnits::Zero());
 
-  for (VCallAndVBaseOffsetBuilder::VBaseOffsetOffsetsMapTy::const_iterator I =
-       Builder.getVBaseOffsetOffsets().begin(), 
-       E = Builder.getVBaseOffsetOffsets().end(); I != E; ++I) {
+  for (const auto &I : Builder.getVBaseOffsetOffsets()) {
     // Insert all types.
-    ClassPairTy ClassPair(RD, I->first);
-    
-    VirtualBaseClassOffsetOffsets.insert(
-        std::make_pair(ClassPair, I->second));
+    ClassPairTy ClassPair(RD, I.first);
+
+    VirtualBaseClassOffsetOffsets.insert(std::make_pair(ClassPair, I.second));
   }
   
   I = VirtualBaseClassOffsetOffsets.find(ClassPair);
@@ -2361,14 +2322,11 @@ ItaniumVTableContext::computeVTableRelatedInformation(const CXXRecordDecl *RD) {
   if (VirtualBaseClassOffsetOffsets.count(std::make_pair(RD, VBase)))
     return;
 
-  for (ItaniumVTableBuilder::VBaseOffsetOffsetsMapTy::const_iterator
-           I = Builder.getVBaseOffsetOffsets().begin(),
-           E = Builder.getVBaseOffsetOffsets().end();
-       I != E; ++I) {
+  for (const auto &I : Builder.getVBaseOffsetOffsets()) {
     // Insert all types.
-    ClassPairTy ClassPair(RD, I->first);
-    
-    VirtualBaseClassOffsetOffsets.insert(std::make_pair(ClassPair, I->second));
+    ClassPairTy ClassPair(RD, I.first);
+
+    VirtualBaseClassOffsetOffsets.insert(std::make_pair(ClassPair, I.second));
   }
 }
 
@@ -2561,10 +2519,9 @@ private:
            "vftable can't be empty");
 
     assert(MethodVFTableLocations.empty());
-    for (MethodInfoMapTy::const_iterator I = MethodInfoMap.begin(),
-         E = MethodInfoMap.end(); I != E; ++I) {
-      const CXXMethodDecl *MD = I->first;
-      const MethodInfo &MI = I->second;
+    for (const auto &I : MethodInfoMap) {
+      const CXXMethodDecl *MD = I.first;
+      const MethodInfo &MI = I.second;
       // Skip the methods that the MostDerivedClass didn't override
       // and the entries shadowed by return adjusting thunks.
       if (MD->getParent() != MostDerivedClass || MI.Shadowed)
@@ -2633,28 +2590,8 @@ public:
   void dumpLayout(raw_ostream &);
 };
 
-/// InitialOverriddenDefinitionCollector - Finds the set of least derived bases
-/// that define the given method.
-struct InitialOverriddenDefinitionCollector {
-  BasesSetVectorTy Bases;
-  OverriddenMethodsSetTy VisitedOverriddenMethods;
-
-  bool visit(const CXXMethodDecl *OverriddenMD) {
-    if (OverriddenMD->size_overridden_methods() == 0)
-      Bases.insert(OverriddenMD->getParent());
-    // Don't recurse on this method if we've already collected it.
-    return VisitedOverriddenMethods.insert(OverriddenMD).second;
-  }
-};
-
 } // end namespace
 
-static bool BaseInSet(const CXXBaseSpecifier *Specifier,
-                      CXXBasePath &Path, void *BasesSet) {
-  BasesSetVectorTy *Bases = (BasesSetVectorTy *)BasesSet;
-  return Bases->count(Specifier->getType()->getAsCXXRecordDecl());
-}
-
 // Let's study one class hierarchy as an example:
 //   struct A {
 //     virtual void f();
@@ -2711,17 +2648,33 @@ static bool BaseInSet(const CXXBaseSpecifier *Specifier,
 // for the given method, relative to the beginning of the MostDerivedClass.
 CharUnits
 VFTableBuilder::ComputeThisOffset(FinalOverriders::OverriderInfo Overrider) {
-  InitialOverriddenDefinitionCollector Collector;
-  visitAllOverriddenMethods(Overrider.Method, Collector);
+  BasesSetVectorTy Bases;
+
+  {
+    // Find the set of least derived bases that define the given method.
+    OverriddenMethodsSetTy VisitedOverriddenMethods;
+    auto InitialOverriddenDefinitionCollector = [&](
+        const CXXMethodDecl *OverriddenMD) {
+      if (OverriddenMD->size_overridden_methods() == 0)
+        Bases.insert(OverriddenMD->getParent());
+      // Don't recurse on this method if we've already collected it.
+      return VisitedOverriddenMethods.insert(OverriddenMD).second;
+    };
+    visitAllOverriddenMethods(Overrider.Method,
+                              InitialOverriddenDefinitionCollector);
+  }
 
   // If there are no overrides then 'this' is located
   // in the base that defines the method.
-  if (Collector.Bases.size() == 0)
+  if (Bases.size() == 0)
     return Overrider.Offset;
 
   CXXBasePaths Paths;
-  Overrider.Method->getParent()->lookupInBases(BaseInSet, &Collector.Bases,
-                                               Paths);
+  Overrider.Method->getParent()->lookupInBases(
+      [&Bases](const CXXBaseSpecifier *Specifier, CXXBasePath &) {
+        return Bases.count(Specifier->getType()->getAsCXXRecordDecl());
+      },
+      Paths);
 
   // This will hold the smallest this offset among overridees of MD.
   // This implies that an offset of a non-virtual base will dominate an offset
@@ -2732,17 +2685,14 @@ VFTableBuilder::ComputeThisOffset(FinalOverriders::OverriderInfo Overrider) {
 
   const ASTRecordLayout &OverriderRDLayout =
       Context.getASTRecordLayout(Overrider.Method->getParent());
-  for (CXXBasePaths::paths_iterator I = Paths.begin(), E = Paths.end();
-       I != E; ++I) {
-    const CXXBasePath &Path = (*I);
+  for (const CXXBasePath &Path : Paths) {
     CharUnits ThisOffset = Overrider.Offset;
     CharUnits LastVBaseOffset;
 
     // For each path from the overrider to the parents of the overridden
     // methods, traverse the path, calculating the this offset in the most
     // derived class.
-    for (int J = 0, F = Path.size(); J != F; ++J) {
-      const CXXBasePathElement &Element = Path[J];
+    for (const CXXBasePathElement &Element : Path) {
       QualType CurTy = Element.Base->getType();
       const CXXRecordDecl *PrevRD = Element.Class,
                           *CurRD = CurTy->getAsCXXRecordDecl();
@@ -2932,26 +2882,30 @@ static void GroupNewVirtualOverloads(
   // Put the virtual methods into VirtualMethods in the proper order:
   // 1) Group overloads by declaration name. New groups are added to the
   //    vftable in the order of their first declarations in this class
-  //    (including overrides and non-virtual methods).
+  //    (including overrides, non-virtual methods and any other named decl that
+  //    might be nested within the class).
   // 2) In each group, new overloads appear in the reverse order of declaration.
   typedef SmallVector<const CXXMethodDecl *, 1> MethodGroup;
   SmallVector<MethodGroup, 10> Groups;
   typedef llvm::DenseMap<DeclarationName, unsigned> VisitedGroupIndicesTy;
   VisitedGroupIndicesTy VisitedGroupIndices;
-  for (const auto *MD : RD->methods()) {
-    MD = MD->getCanonicalDecl();
+  for (const auto *D : RD->decls()) {
+    const auto *ND = dyn_cast<NamedDecl>(D);
+    if (!ND)
+      continue;
     VisitedGroupIndicesTy::iterator J;
     bool Inserted;
     std::tie(J, Inserted) = VisitedGroupIndices.insert(
-        std::make_pair(MD->getDeclName(), Groups.size()));
+        std::make_pair(ND->getDeclName(), Groups.size()));
     if (Inserted)
       Groups.push_back(MethodGroup());
-    if (MD->isVirtual())
-      Groups[J->second].push_back(MD);
+    if (const auto *MD = dyn_cast<CXXMethodDecl>(ND))
+      if (MD->isVirtual())
+        Groups[J->second].push_back(MD->getCanonicalDecl());
   }
 
-  for (unsigned I = 0, E = Groups.size(); I != E; ++I)
-    VirtualMethods.append(Groups[I].rbegin(), Groups[I].rend());
+  for (const MethodGroup &Group : Groups)
+    VirtualMethods.append(Group.rbegin(), Group.rend());
 }
 
 static bool isDirectVBase(const CXXRecordDecl *Base, const CXXRecordDecl *RD) {
@@ -3011,9 +2965,7 @@ void VFTableBuilder::AddMethods(BaseSubobject Base, unsigned BaseDepth,
   //    sub-bases;
   //  - adding new slots for methods that require Return adjustment.
   // We keep track of the methods visited in the sub-bases in MethodInfoMap.
-  for (unsigned I = 0, E = VirtualMethods.size(); I != E; ++I) {
-    const CXXMethodDecl *MD = VirtualMethods[I];
-
+  for (const CXXMethodDecl *MD : VirtualMethods) {
     FinalOverriders::OverriderInfo FinalOverrider =
         Overriders.getOverrider(MD, Base.getBaseOffset());
     const CXXMethodDecl *FinalOverriderMD = FinalOverrider.Method;
@@ -3122,10 +3074,10 @@ void VFTableBuilder::AddMethods(BaseSubobject Base, unsigned BaseDepth,
 }
 
 static void PrintBasePath(const VPtrInfo::BasePath &Path, raw_ostream &Out) {
-  for (VPtrInfo::BasePath::const_reverse_iterator I = Path.rbegin(),
-       E = Path.rend(); I != E; ++I) {
+  for (const CXXRecordDecl *Elem :
+       llvm::make_range(Path.rbegin(), Path.rend())) {
     Out << "'";
-    (*I)->printQualifiedName(Out);
+    Elem->printQualifiedName(Out);
     Out << "' in ";
   }
 }
@@ -3247,21 +3199,17 @@ void VFTableBuilder::dumpLayout(raw_ostream &Out) {
     // We store the method names in a map to get a stable order.
     std::map<std::string, const CXXMethodDecl *> MethodNamesAndDecls;
 
-    for (ThunksMapTy::const_iterator I = Thunks.begin(), E = Thunks.end();
-         I != E; ++I) {
-      const CXXMethodDecl *MD = I->first;
+    for (const auto &I : Thunks) {
+      const CXXMethodDecl *MD = I.first;
       std::string MethodName = PredefinedExpr::ComputeName(
           PredefinedExpr::PrettyFunctionNoVirtual, MD);
 
       MethodNamesAndDecls.insert(std::make_pair(MethodName, MD));
     }
 
-    for (std::map<std::string, const CXXMethodDecl *>::const_iterator
-             I = MethodNamesAndDecls.begin(),
-             E = MethodNamesAndDecls.end();
-         I != E; ++I) {
-      const std::string &MethodName = I->first;
-      const CXXMethodDecl *MD = I->second;
+    for (const auto &MethodNameAndDecl : MethodNamesAndDecls) {
+      const std::string &MethodName = MethodNameAndDecl.first;
+      const CXXMethodDecl *MD = MethodNameAndDecl.second;
 
       ThunkInfoVectorTy ThunksVector = Thunks[MD];
       std::stable_sort(ThunksVector.begin(), ThunksVector.end(),
@@ -3291,9 +3239,8 @@ void VFTableBuilder::dumpLayout(raw_ostream &Out) {
 
 static bool setsIntersect(const llvm::SmallPtrSet<const CXXRecordDecl *, 4> &A,
                           ArrayRef<const CXXRecordDecl *> B) {
-  for (ArrayRef<const CXXRecordDecl *>::iterator I = B.begin(), E = B.end();
-       I != E; ++I) {
-    if (A.count(*I))
+  for (const CXXRecordDecl *Decl : B) {
+    if (A.count(Decl))
       return true;
   }
   return false;
@@ -3636,11 +3583,10 @@ void MicrosoftVTableContext::computeVTableRelatedInformation(
   VFPtrLocations[RD] = VFPtrs;
 
   MethodVFTableLocationsTy NewMethodLocations;
-  for (VPtrInfoVector::iterator I = VFPtrs->begin(), E = VFPtrs->end();
-       I != E; ++I) {
-    VFTableBuilder Builder(*this, RD, *I);
+  for (const VPtrInfo *VFPtr : *VFPtrs) {
+    VFTableBuilder Builder(*this, RD, VFPtr);
 
-    VFTableIdTy id(RD, (*I)->FullOffsetInMDC);
+    VFTableIdTy id(RD, VFPtr->FullOffsetInMDC);
     assert(VFTableLayouts.count(id) == 0);
     SmallVector<VTableLayout::VTableThunkTy, 1> VTableThunks(
         Builder.vtable_thunks_begin(), Builder.vtable_thunks_end());
@@ -3672,21 +3618,20 @@ void MicrosoftVTableContext::dumpMethodLocations(
   std::map<MethodVFTableLocation, std::string> IndicesMap;
   bool HasNonzeroOffset = false;
 
-  for (MethodVFTableLocationsTy::const_iterator I = NewMethods.begin(),
-       E = NewMethods.end(); I != E; ++I) {
-    const CXXMethodDecl *MD = cast<const CXXMethodDecl>(I->first.getDecl());
+  for (const auto &I : NewMethods) {
+    const CXXMethodDecl *MD = cast<const CXXMethodDecl>(I.first.getDecl());
     assert(MD->isVirtual());
 
     std::string MethodName = PredefinedExpr::ComputeName(
         PredefinedExpr::PrettyFunctionNoVirtual, MD);
 
     if (isa<CXXDestructorDecl>(MD)) {
-      IndicesMap[I->second] = MethodName + " [scalar deleting]";
+      IndicesMap[I.second] = MethodName + " [scalar deleting]";
     } else {
-      IndicesMap[I->second] = MethodName;
+      IndicesMap[I.second] = MethodName;
     }
 
-    if (!I->second.VFPtrOffset.isZero() || I->second.VBTableIndex != 0)
+    if (!I.second.VFPtrOffset.isZero() || I.second.VBTableIndex != 0)
       HasNonzeroOffset = true;
   }
 
@@ -3700,12 +3645,9 @@ void MicrosoftVTableContext::dumpMethodLocations(
 
     CharUnits LastVFPtrOffset = CharUnits::fromQuantity(-1);
     uint64_t LastVBIndex = 0;
-    for (std::map<MethodVFTableLocation, std::string>::const_iterator
-             I = IndicesMap.begin(),
-             E = IndicesMap.end();
-         I != E; ++I) {
-      CharUnits VFPtrOffset = I->first.VFPtrOffset;
-      uint64_t VBIndex = I->first.VBTableIndex;
+    for (const auto &I : IndicesMap) {
+      CharUnits VFPtrOffset = I.first.VFPtrOffset;
+      uint64_t VBIndex = I.first.VBTableIndex;
       if (HasNonzeroOffset &&
           (VFPtrOffset != LastVFPtrOffset || VBIndex != LastVBIndex)) {
         assert(VBIndex > LastVBIndex || VFPtrOffset > LastVFPtrOffset);
@@ -3717,8 +3659,8 @@ void MicrosoftVTableContext::dumpMethodLocations(
         LastVBIndex = VBIndex;
       }
 
-      uint64_t VTableIndex = I->first.Index;
-      const std::string &MethodName = I->second;
+      uint64_t VTableIndex = I.first.Index;
+      const std::string &MethodName = I.second;
       Out << llvm::format("%4" PRIu64 " | ", VTableIndex) << MethodName << '\n';
     }
     Out << '\n';
diff --git a/lib/ASTMatchers/ASTMatchFinder.cpp b/lib/ASTMatchers/ASTMatchFinder.cpp
index e3b666ef42af..847398c0861c 100644
--- a/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -197,9 +197,6 @@ public:
 
   bool shouldVisitTemplateInstantiations() const { return true; }
   bool shouldVisitImplicitCode() const { return true; }
-  // Disables data recursion. We intercept Traverse* methods in the RAV, which
-  // are not triggered during data recursion.
-  bool shouldUseDataRecursionFor(clang::Stmt *S) const { return false; }
 
 private:
   // Used for updating the depth during traversal.
@@ -487,9 +484,6 @@ public:
 
   bool shouldVisitTemplateInstantiations() const { return true; }
   bool shouldVisitImplicitCode() const { return true; }
-  // Disables data recursion. We intercept Traverse* methods in the RAV, which
-  // are not triggered during data recursion.
-  bool shouldUseDataRecursionFor(clang::Stmt *S) const { return false; }
 
 private:
   class TimeBucketRegion {
@@ -621,9 +615,6 @@ private:
     if (Node.get<TranslationUnitDecl>() ==
         ActiveASTContext->getTranslationUnitDecl())
       return false;
-    assert(Node.getMemoizationData() &&
-           "Invariant broken: only nodes that support memoization may be "
-           "used in the parent map.");
 
     MatchKey Key;
     Key.MatcherID = Matcher.getID();
@@ -867,7 +858,11 @@ bool MatchASTVisitor::TraverseNestedNameSpecifier(NestedNameSpecifier *NNS) {
 
 bool MatchASTVisitor::TraverseNestedNameSpecifierLoc(
     NestedNameSpecifierLoc NNS) {
+  if (!NNS)
+    return true;
+
   match(NNS);
+
   // We only match the nested name specifier here (as opposed to traversing it)
   // because the traversal is already done in the parallel "Loc"-hierarchy.
   if (NNS.hasQualifier())
@@ -913,37 +908,37 @@ MatchFinder::~MatchFinder() {}
 void MatchFinder::addMatcher(const DeclarationMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 void MatchFinder::addMatcher(const TypeMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.Type.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 void MatchFinder::addMatcher(const StatementMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.DeclOrStmt.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 void MatchFinder::addMatcher(const NestedNameSpecifierMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.NestedNameSpecifier.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 void MatchFinder::addMatcher(const NestedNameSpecifierLocMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.NestedNameSpecifierLoc.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 void MatchFinder::addMatcher(const TypeLocMatcher &NodeMatch,
                              MatchCallback *Action) {
   Matchers.TypeLoc.emplace_back(NodeMatch, Action);
-  Matchers.AllCallbacks.push_back(Action);
+  Matchers.AllCallbacks.insert(Action);
 }
 
 bool MatchFinder::addDynamicMatcher(const internal::DynTypedMatcher &NodeMatch,
diff --git a/lib/ASTMatchers/ASTMatchersInternal.cpp b/lib/ASTMatchers/ASTMatchersInternal.cpp
index 069fcba474b2..463cf0ba9df6 100644
--- a/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -110,15 +110,15 @@ static llvm::ManagedStatic<TrueMatcherImpl> TrueMatcherInstance;
 
 DynTypedMatcher DynTypedMatcher::constructVariadic(
     DynTypedMatcher::VariadicOperator Op,
+    ast_type_traits::ASTNodeKind SupportedKind,
     std::vector<DynTypedMatcher> InnerMatchers) {
   assert(InnerMatchers.size() > 0 && "Array must not be empty.");
   assert(std::all_of(InnerMatchers.begin(), InnerMatchers.end(),
-                     [&InnerMatchers](const DynTypedMatcher &M) {
-           return InnerMatchers[0].canConvertTo(M.SupportedKind);
-         }) &&
-         "SupportedKind must be convertible to a common type!");
+                     [SupportedKind](const DynTypedMatcher &M) {
+                       return M.canConvertTo(SupportedKind);
+                     }) &&
+         "InnerMatchers must be convertible to SupportedKind!");
 
-  auto SupportedKind = InnerMatchers[0].SupportedKind;
   // We must relax the restrict kind here.
   // The different operators might deal differently with a mismatch.
   // Make it the same as SupportedKind, since that is the broadest type we are
diff --git a/lib/ASTMatchers/Dynamic/Diagnostics.cpp b/lib/ASTMatchers/Dynamic/Diagnostics.cpp
index 72f127185ef5..787b780c4243 100644
--- a/lib/ASTMatchers/Dynamic/Diagnostics.cpp
+++ b/lib/ASTMatchers/Dynamic/Diagnostics.cpp
@@ -23,14 +23,14 @@ Diagnostics::ArgStream Diagnostics::pushContextFrame(ContextType Type,
 
 Diagnostics::Context::Context(ConstructMatcherEnum, Diagnostics *Error,
                               StringRef MatcherName,
-                              const SourceRange &MatcherRange)
+                              SourceRange MatcherRange)
     : Error(Error) {
   Error->pushContextFrame(CT_MatcherConstruct, MatcherRange) << MatcherName;
 }
 
 Diagnostics::Context::Context(MatcherArgEnum, Diagnostics *Error,
                               StringRef MatcherName,
-                              const SourceRange &MatcherRange,
+                              SourceRange MatcherRange,
                               unsigned ArgNumber)
     : Error(Error) {
   Error->pushContextFrame(CT_MatcherArg, MatcherRange) << ArgNumber
@@ -63,7 +63,7 @@ Diagnostics::ArgStream &Diagnostics::ArgStream::operator<<(const Twine &Arg) {
   return *this;
 }
 
-Diagnostics::ArgStream Diagnostics::addError(const SourceRange &Range,
+Diagnostics::ArgStream Diagnostics::addError(SourceRange Range,
                                              ErrorType Error) {
   Errors.emplace_back();
   ErrorContent &Last = Errors.back();
@@ -150,7 +150,7 @@ static void formatErrorString(StringRef FormatString,
   }
 }
 
-static void maybeAddLineAndColumn(const SourceRange &Range,
+static void maybeAddLineAndColumn(SourceRange Range,
                                   llvm::raw_ostream &OS) {
   if (Range.Start.Line > 0 && Range.Start.Column > 0) {
     OS << Range.Start.Line << ":" << Range.Start.Column << ": ";
diff --git a/lib/ASTMatchers/Dynamic/Marshallers.h b/lib/ASTMatchers/Dynamic/Marshallers.h
index 36a6415ae82c..64d6b7814aeb 100644
--- a/lib/ASTMatchers/Dynamic/Marshallers.h
+++ b/lib/ASTMatchers/Dynamic/Marshallers.h
@@ -104,7 +104,7 @@ public:
 class MatcherDescriptor {
 public:
   virtual ~MatcherDescriptor() {}
-  virtual VariantMatcher create(const SourceRange &NameRange,
+  virtual VariantMatcher create(SourceRange NameRange,
                                 ArrayRef<ParserValue> Args,
                                 Diagnostics *Error) const = 0;
 
@@ -162,7 +162,7 @@ class FixedArgCountMatcherDescriptor : public MatcherDescriptor {
 public:
   typedef VariantMatcher (*MarshallerType)(void (*Func)(),
                                            StringRef MatcherName,
-                                           const SourceRange &NameRange,
+                                           SourceRange NameRange,
                                            ArrayRef<ParserValue> Args,
                                            Diagnostics *Error);
 
@@ -180,7 +180,7 @@ public:
         RetKinds(RetKinds.begin(), RetKinds.end()),
         ArgKinds(ArgKinds.begin(), ArgKinds.end()) {}
 
-  VariantMatcher create(const SourceRange &NameRange,
+  VariantMatcher create(SourceRange NameRange,
                         ArrayRef<ParserValue> Args,
                         Diagnostics *Error) const override {
     return Marshaller(Func, MatcherName, NameRange, Args, Error);
@@ -279,7 +279,7 @@ struct BuildReturnTypeVector<ast_matchers::internal::BindableMatcher<T> > {
 template <typename ResultT, typename ArgT,
           ResultT (*Func)(ArrayRef<const ArgT *>)>
 VariantMatcher
-variadicMatcherDescriptor(StringRef MatcherName, const SourceRange &NameRange,
+variadicMatcherDescriptor(StringRef MatcherName, SourceRange NameRange,
                           ArrayRef<ParserValue> Args, Diagnostics *Error) {
   ArgT **InnerArgs = new ArgT *[Args.size()]();
 
@@ -320,7 +320,7 @@ variadicMatcherDescriptor(StringRef MatcherName, const SourceRange &NameRange,
 class VariadicFuncMatcherDescriptor : public MatcherDescriptor {
 public:
   typedef VariantMatcher (*RunFunc)(StringRef MatcherName,
-                                    const SourceRange &NameRange,
+                                    SourceRange NameRange,
                                     ArrayRef<ParserValue> Args,
                                     Diagnostics *Error);
 
@@ -334,7 +334,7 @@ public:
     BuildReturnTypeVector<ResultT>::build(RetKinds);
   }
 
-  VariantMatcher create(const SourceRange &NameRange,
+  VariantMatcher create(SourceRange NameRange,
                         ArrayRef<ParserValue> Args,
                         Diagnostics *Error) const override {
     return Func(MatcherName, NameRange, Args, Error);
@@ -414,7 +414,7 @@ private:
 /// \brief 0-arg marshaller function.
 template <typename ReturnType>
 static VariantMatcher matcherMarshall0(void (*Func)(), StringRef MatcherName,
-                                       const SourceRange &NameRange,
+                                       SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
   typedef ReturnType (*FuncType)();
@@ -425,7 +425,7 @@ static VariantMatcher matcherMarshall0(void (*Func)(), StringRef MatcherName,
 /// \brief 1-arg marshaller function.
 template <typename ReturnType, typename ArgType1>
 static VariantMatcher matcherMarshall1(void (*Func)(), StringRef MatcherName,
-                                       const SourceRange &NameRange,
+                                       SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
   typedef ReturnType (*FuncType)(ArgType1);
@@ -438,7 +438,7 @@ static VariantMatcher matcherMarshall1(void (*Func)(), StringRef MatcherName,
 /// \brief 2-arg marshaller function.
 template <typename ReturnType, typename ArgType1, typename ArgType2>
 static VariantMatcher matcherMarshall2(void (*Func)(), StringRef MatcherName,
-                                       const SourceRange &NameRange,
+                                       SourceRange NameRange,
                                        ArrayRef<ParserValue> Args,
                                        Diagnostics *Error) {
   typedef ReturnType (*FuncType)(ArgType1, ArgType2);
@@ -493,7 +493,7 @@ public:
 
   ~OverloadedMatcherDescriptor() override {}
 
-  VariantMatcher create(const SourceRange &NameRange,
+  VariantMatcher create(SourceRange NameRange,
                         ArrayRef<ParserValue> Args,
                         Diagnostics *Error) const override {
     std::vector<VariantMatcher> Constructed;
@@ -567,7 +567,7 @@ public:
       : MinCount(MinCount), MaxCount(MaxCount), Op(Op),
         MatcherName(MatcherName) {}
 
-  VariantMatcher create(const SourceRange &NameRange,
+  VariantMatcher create(SourceRange NameRange,
                         ArrayRef<ParserValue> Args,
                         Diagnostics *Error) const override {
     if (Args.size() < MinCount || MaxCount < Args.size()) {
diff --git a/lib/ASTMatchers/Dynamic/Parser.cpp b/lib/ASTMatchers/Dynamic/Parser.cpp
index 96a78cd9f8fa..cf9dab6dc7db 100644
--- a/lib/ASTMatchers/Dynamic/Parser.cpp
+++ b/lib/ASTMatchers/Dynamic/Parser.cpp
@@ -534,7 +534,7 @@ Parser::RegistrySema::lookupMatcherCtor(StringRef MatcherName) {
 }
 
 VariantMatcher Parser::RegistrySema::actOnMatcherExpression(
-    MatcherCtor Ctor, const SourceRange &NameRange, StringRef BindID,
+    MatcherCtor Ctor, SourceRange NameRange, StringRef BindID,
     ArrayRef<ParserValue> Args, Diagnostics *Error) {
   if (BindID.empty()) {
     return Registry::constructMatcher(Ctor, NameRange, Args, Error);
diff --git a/lib/ASTMatchers/Dynamic/Registry.cpp b/lib/ASTMatchers/Dynamic/Registry.cpp
index 72713dda03c7..5b1c5529aa47 100644
--- a/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -15,6 +15,7 @@
 #include "clang/ASTMatchers/Dynamic/Registry.h"
 #include "Marshallers.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ManagedStatic.h"
@@ -106,15 +107,13 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(atomicType);
   REGISTER_MATCHER(autoType);
   REGISTER_MATCHER(binaryOperator);
-  REGISTER_MATCHER(bindTemporaryExpr);
   REGISTER_MATCHER(blockPointerType);
-  REGISTER_MATCHER(boolLiteral);
+  REGISTER_MATCHER(booleanType);
   REGISTER_MATCHER(breakStmt);
   REGISTER_MATCHER(builtinType);
   REGISTER_MATCHER(callExpr);
   REGISTER_MATCHER(caseStmt);
   REGISTER_MATCHER(castExpr);
-  REGISTER_MATCHER(catchStmt);
   REGISTER_MATCHER(characterLiteral);
   REGISTER_MATCHER(classTemplateDecl);
   REGISTER_MATCHER(classTemplateSpecializationDecl);
@@ -123,27 +122,46 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(compoundStmt);
   REGISTER_MATCHER(conditionalOperator);
   REGISTER_MATCHER(constantArrayType);
-  REGISTER_MATCHER(constCastExpr);
-  REGISTER_MATCHER(constructExpr);
-  REGISTER_MATCHER(constructorDecl);
   REGISTER_MATCHER(containsDeclaration);
   REGISTER_MATCHER(continueStmt);
-  REGISTER_MATCHER(conversionDecl);
   REGISTER_MATCHER(cStyleCastExpr);
-  REGISTER_MATCHER(ctorInitializer);
-  REGISTER_MATCHER(CUDAKernelCallExpr);
+  REGISTER_MATCHER(cudaKernelCallExpr);
+  REGISTER_MATCHER(cxxBindTemporaryExpr);
+  REGISTER_MATCHER(cxxBoolLiteral);
+  REGISTER_MATCHER(cxxCatchStmt);
+  REGISTER_MATCHER(cxxConstCastExpr);
+  REGISTER_MATCHER(cxxConstructExpr);
+  REGISTER_MATCHER(cxxConstructorDecl);
+  REGISTER_MATCHER(cxxConversionDecl);
+  REGISTER_MATCHER(cxxCtorInitializer);
+  REGISTER_MATCHER(cxxDefaultArgExpr);
+  REGISTER_MATCHER(cxxDeleteExpr);
+  REGISTER_MATCHER(cxxDestructorDecl);
+  REGISTER_MATCHER(cxxDynamicCastExpr);
+  REGISTER_MATCHER(cxxForRangeStmt);
+  REGISTER_MATCHER(cxxFunctionalCastExpr);
+  REGISTER_MATCHER(cxxMemberCallExpr);
+  REGISTER_MATCHER(cxxMethodDecl);
+  REGISTER_MATCHER(cxxNewExpr);
+  REGISTER_MATCHER(cxxNullPtrLiteralExpr);
+  REGISTER_MATCHER(cxxOperatorCallExpr);
+  REGISTER_MATCHER(cxxRecordDecl);
+  REGISTER_MATCHER(cxxReinterpretCastExpr);
+  REGISTER_MATCHER(cxxStaticCastExpr);
+  REGISTER_MATCHER(cxxTemporaryObjectExpr);
+  REGISTER_MATCHER(cxxThisExpr);
+  REGISTER_MATCHER(cxxThrowExpr);
+  REGISTER_MATCHER(cxxTryStmt);
+  REGISTER_MATCHER(cxxUnresolvedConstructExpr);
+  REGISTER_MATCHER(decayedType);
   REGISTER_MATCHER(decl);
   REGISTER_MATCHER(declaratorDecl);
   REGISTER_MATCHER(declCountIs);
   REGISTER_MATCHER(declRefExpr);
   REGISTER_MATCHER(declStmt);
-  REGISTER_MATCHER(defaultArgExpr);
   REGISTER_MATCHER(defaultStmt);
-  REGISTER_MATCHER(deleteExpr);
   REGISTER_MATCHER(dependentSizedArrayType);
-  REGISTER_MATCHER(destructorDecl);
   REGISTER_MATCHER(doStmt);
-  REGISTER_MATCHER(dynamicCastExpr);
   REGISTER_MATCHER(eachOf);
   REGISTER_MATCHER(elaboratedType);
   REGISTER_MATCHER(enumConstantDecl);
@@ -160,10 +178,8 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(forEachDescendant);
   REGISTER_MATCHER(forEachSwitchCase);
   REGISTER_MATCHER(forField);
-  REGISTER_MATCHER(forRangeStmt);
   REGISTER_MATCHER(forStmt);
   REGISTER_MATCHER(friendDecl);
-  REGISTER_MATCHER(functionalCastExpr);
   REGISTER_MATCHER(functionDecl);
   REGISTER_MATCHER(functionTemplateDecl);
   REGISTER_MATCHER(functionType);
@@ -179,12 +195,14 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasArgument);
   REGISTER_MATCHER(hasArgumentOfType);
   REGISTER_MATCHER(hasAttr);
+  REGISTER_MATCHER(hasAutomaticStorageDuration);
   REGISTER_MATCHER(hasBase);
   REGISTER_MATCHER(hasBody);
   REGISTER_MATCHER(hasCanonicalType);
   REGISTER_MATCHER(hasCaseConstant);
   REGISTER_MATCHER(hasCondition);
   REGISTER_MATCHER(hasConditionVariableStatement);
+  REGISTER_MATCHER(hasDecayedType);
   REGISTER_MATCHER(hasDeclaration);
   REGISTER_MATCHER(hasDeclContext);
   REGISTER_MATCHER(hasDeducedType);
@@ -222,9 +240,11 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(hasSize);
   REGISTER_MATCHER(hasSizeExpr);
   REGISTER_MATCHER(hasSourceExpression);
+  REGISTER_MATCHER(hasStaticStorageDuration);
   REGISTER_MATCHER(hasTargetDecl);
   REGISTER_MATCHER(hasTemplateArgument);
   REGISTER_MATCHER(hasThen);
+  REGISTER_MATCHER(hasThreadStorageDuration);
   REGISTER_MATCHER(hasTrueExpression);
   REGISTER_MATCHER(hasTypeLoc);
   REGISTER_MATCHER(hasUnaryOperand);
@@ -237,17 +257,27 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(implicitCastExpr);
   REGISTER_MATCHER(incompleteArrayType);
   REGISTER_MATCHER(initListExpr);
+  REGISTER_MATCHER(injectedClassNameType);
   REGISTER_MATCHER(innerType);
   REGISTER_MATCHER(integerLiteral);
+  REGISTER_MATCHER(isAnonymous);
   REGISTER_MATCHER(isArrow);
+  REGISTER_MATCHER(isBaseInitializer);
   REGISTER_MATCHER(isCatchAll);
+  REGISTER_MATCHER(isClass);
   REGISTER_MATCHER(isConst);
   REGISTER_MATCHER(isConstQualified);
+  REGISTER_MATCHER(isCopyConstructor);
+  REGISTER_MATCHER(isDefaultConstructor);
   REGISTER_MATCHER(isDefinition);
   REGISTER_MATCHER(isDeleted);
+  REGISTER_MATCHER(isExceptionVariable);
+  REGISTER_MATCHER(isExplicit);
   REGISTER_MATCHER(isExplicitTemplateSpecialization);
   REGISTER_MATCHER(isExpr);
   REGISTER_MATCHER(isExternC);
+  REGISTER_MATCHER(isFinal);
+  REGISTER_MATCHER(isInline);
   REGISTER_MATCHER(isImplicit);
   REGISTER_MATCHER(isExpansionInFileMatching);
   REGISTER_MATCHER(isExpansionInMainFile);
@@ -257,13 +287,20 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isIntegral);
   REGISTER_MATCHER(isInTemplateInstantiation);
   REGISTER_MATCHER(isListInitialization);
+  REGISTER_MATCHER(isMemberInitializer);
+  REGISTER_MATCHER(isMoveConstructor);
+  REGISTER_MATCHER(isNoThrow);
   REGISTER_MATCHER(isOverride);
   REGISTER_MATCHER(isPrivate);
   REGISTER_MATCHER(isProtected);
   REGISTER_MATCHER(isPublic);
   REGISTER_MATCHER(isPure);
+  REGISTER_MATCHER(isStruct);
   REGISTER_MATCHER(isTemplateInstantiation);
+  REGISTER_MATCHER(isUnion);
+  REGISTER_MATCHER(isVariadic);
   REGISTER_MATCHER(isVirtual);
+  REGISTER_MATCHER(isVolatileQualified);
   REGISTER_MATCHER(isWritten);
   REGISTER_MATCHER(labelStmt);
   REGISTER_MATCHER(lambdaExpr);
@@ -272,24 +309,22 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(matchesSelector);
   REGISTER_MATCHER(materializeTemporaryExpr);
   REGISTER_MATCHER(member);
-  REGISTER_MATCHER(memberCallExpr);
   REGISTER_MATCHER(memberExpr);
   REGISTER_MATCHER(memberPointerType);
-  REGISTER_MATCHER(methodDecl);
   REGISTER_MATCHER(namedDecl);
+  REGISTER_MATCHER(namespaceAliasDecl);
   REGISTER_MATCHER(namespaceDecl);
   REGISTER_MATCHER(namesType);
   REGISTER_MATCHER(nestedNameSpecifier);
   REGISTER_MATCHER(nestedNameSpecifierLoc);
-  REGISTER_MATCHER(newExpr);
-  REGISTER_MATCHER(nullPtrLiteralExpr);
   REGISTER_MATCHER(nullStmt);
   REGISTER_MATCHER(numSelectorArgs);
   REGISTER_MATCHER(ofClass);
+  REGISTER_MATCHER(objcInterfaceDecl);
   REGISTER_MATCHER(objcMessageExpr);
+  REGISTER_MATCHER(objcObjectPointerType);
   REGISTER_MATCHER(on);
   REGISTER_MATCHER(onImplicitObjectArgument);
-  REGISTER_MATCHER(operatorCallExpr);
   REGISTER_MATCHER(parameterCountIs);
   REGISTER_MATCHER(parenType);
   REGISTER_MATCHER(parmVarDecl);
@@ -302,7 +337,6 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(refersToDeclaration);
   REGISTER_MATCHER(refersToIntegralType);
   REGISTER_MATCHER(refersToType);
-  REGISTER_MATCHER(reinterpretCastExpr);
   REGISTER_MATCHER(returns);
   REGISTER_MATCHER(returnStmt);
   REGISTER_MATCHER(rValueReferenceType);
@@ -311,23 +345,20 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(specifiesType);
   REGISTER_MATCHER(specifiesTypeLoc);
   REGISTER_MATCHER(statementCountIs);
-  REGISTER_MATCHER(staticCastExpr);
   REGISTER_MATCHER(staticAssertDecl);
   REGISTER_MATCHER(stmt);
   REGISTER_MATCHER(stringLiteral);
   REGISTER_MATCHER(substNonTypeTemplateParmExpr);
+  REGISTER_MATCHER(substTemplateTypeParmType);
   REGISTER_MATCHER(switchCase);
   REGISTER_MATCHER(switchStmt);
   REGISTER_MATCHER(templateArgument);
   REGISTER_MATCHER(templateArgumentCountIs);
   REGISTER_MATCHER(templateSpecializationType);
-  REGISTER_MATCHER(temporaryObjectExpr);
-  REGISTER_MATCHER(thisExpr);
+  REGISTER_MATCHER(templateTypeParmType);
   REGISTER_MATCHER(throughUsingDecl);
-  REGISTER_MATCHER(throwExpr);
   REGISTER_MATCHER(to);
   REGISTER_MATCHER(translationUnitDecl);
-  REGISTER_MATCHER(tryStmt);
   REGISTER_MATCHER(type);
   REGISTER_MATCHER(typedefDecl);
   REGISTER_MATCHER(typedefType);
@@ -336,7 +367,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(unaryOperator);
   REGISTER_MATCHER(unaryTransformType);
   REGISTER_MATCHER(unless);
-  REGISTER_MATCHER(unresolvedConstructExpr);
+  REGISTER_MATCHER(unresolvedUsingTypenameDecl);
   REGISTER_MATCHER(unresolvedUsingValueDecl);
   REGISTER_MATCHER(userDefinedLiteral);
   REGISTER_MATCHER(usingDecl);
@@ -350,11 +381,7 @@ RegistryMaps::RegistryMaps() {
 }
 
 RegistryMaps::~RegistryMaps() {
-  for (ConstructorMap::iterator it = Constructors.begin(),
-                                end = Constructors.end();
-       it != end; ++it) {
-    delete it->second;
-  }
+  llvm::DeleteContainerSeconds(Constructors);
 }
 
 static llvm::ManagedStatic<RegistryMaps> RegistryData;
@@ -425,12 +452,13 @@ Registry::getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes) {
   std::vector<MatcherCompletion> Completions;
 
   // Search the registry for acceptable matchers.
-  for (ConstructorMap::const_iterator I = RegistryData->constructors().begin(),
-                                      E = RegistryData->constructors().end();
-       I != E; ++I) {
+  for (const auto &M : RegistryData->constructors()) {
+    const auto *Matcher = M.getValue();
+    StringRef Name = M.getKey();
+
     std::set<ASTNodeKind> RetKinds;
-    unsigned NumArgs = I->second->isVariadic() ? 1 : I->second->getNumArgs();
-    bool IsPolymorphic = I->second->isPolymorphic();
+    unsigned NumArgs = Matcher->isVariadic() ? 1 : Matcher->getNumArgs();
+    bool IsPolymorphic = Matcher->isPolymorphic();
     std::vector<std::vector<ArgKind>> ArgsKinds(NumArgs);
     unsigned MaxSpecificity = 0;
     for (const ArgKind& Kind : AcceptedTypes) {
@@ -438,13 +466,13 @@ Registry::getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes) {
         continue;
       unsigned Specificity;
       ASTNodeKind LeastDerivedKind;
-      if (I->second->isConvertibleTo(Kind.getMatcherKind(), &Specificity,
-                                     &LeastDerivedKind)) {
+      if (Matcher->isConvertibleTo(Kind.getMatcherKind(), &Specificity,
+                                   &LeastDerivedKind)) {
         if (MaxSpecificity < Specificity)
           MaxSpecificity = Specificity;
         RetKinds.insert(LeastDerivedKind);
         for (unsigned Arg = 0; Arg != NumArgs; ++Arg)
-          I->second->getArgKinds(Kind.getMatcherKind(), Arg, ArgsKinds[Arg]);
+          Matcher->getArgKinds(Kind.getMatcherKind(), Arg, ArgsKinds[Arg]);
         if (IsPolymorphic)
           break;
       }
@@ -455,9 +483,9 @@ Registry::getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes) {
       llvm::raw_string_ostream OS(Decl);
 
       if (IsPolymorphic) {
-        OS << "Matcher<T> " << I->first() << "(Matcher<T>";
+        OS << "Matcher<T> " << Name << "(Matcher<T>";
       } else {
-        OS << "Matcher<" << RetKinds << "> " << I->first() << "(";
+        OS << "Matcher<" << RetKinds << "> " << Name << "(";
         for (const std::vector<ArgKind> &Arg : ArgsKinds) {
           if (&Arg != &ArgsKinds[0])
             OS << ", ";
@@ -480,11 +508,11 @@ Registry::getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes) {
           }
         }
       }
-      if (I->second->isVariadic())
+      if (Matcher->isVariadic())
         OS << "...";
       OS << ")";
 
-      std::string TypedText = I->first();
+      std::string TypedText = Name;
       TypedText += "(";
       if (ArgsKinds.empty())
         TypedText += ")";
@@ -500,7 +528,7 @@ Registry::getMatcherCompletions(ArrayRef<ArgKind> AcceptedTypes) {
 
 // static
 VariantMatcher Registry::constructMatcher(MatcherCtor Ctor,
-                                          const SourceRange &NameRange,
+                                          SourceRange NameRange,
                                           ArrayRef<ParserValue> Args,
                                           Diagnostics *Error) {
   return Ctor->create(NameRange, Args, Error);
@@ -508,7 +536,7 @@ VariantMatcher Registry::constructMatcher(MatcherCtor Ctor,
 
 // static
 VariantMatcher Registry::constructBoundMatcher(MatcherCtor Ctor,
-                                               const SourceRange &NameRange,
+                                               SourceRange NameRange,
                                                StringRef BindID,
                                                ArrayRef<ParserValue> Args,
                                                Diagnostics *Error) {
diff --git a/lib/ASTMatchers/Dynamic/VariantValue.cpp b/lib/ASTMatchers/Dynamic/VariantValue.cpp
index 9d8be4700581..8f3c70c1a8d8 100644
--- a/lib/ASTMatchers/Dynamic/VariantValue.cpp
+++ b/lib/ASTMatchers/Dynamic/VariantValue.cpp
@@ -72,7 +72,7 @@ VariantMatcher::MatcherOps::constructVariadicOperator(
       return llvm::None;
     DynMatchers.push_back(*Inner);
   }
-  return DynTypedMatcher::constructVariadic(Op, DynMatchers);
+  return DynTypedMatcher::constructVariadic(Op, NodeKind, DynMatchers);
 }
 
 VariantMatcher::Payload::~Payload() {}
diff --git a/lib/Analysis/AnalysisDeclContext.cpp b/lib/Analysis/AnalysisDeclContext.cpp
index d7fb7e95d758..52c7f2613654 100644
--- a/lib/Analysis/AnalysisDeclContext.cpp
+++ b/lib/Analysis/AnalysisDeclContext.cpp
@@ -148,6 +148,23 @@ const ImplicitParamDecl *AnalysisDeclContext::getSelfDecl() const {
     }    
   }
 
+  auto *CXXMethod = dyn_cast<CXXMethodDecl>(D);
+  if (!CXXMethod)
+    return nullptr;
+
+  const CXXRecordDecl *parent = CXXMethod->getParent();
+  if (!parent->isLambda())
+    return nullptr;
+
+  for (const LambdaCapture &LC : parent->captures()) {
+    if (!LC.capturesVariable())
+      continue;
+
+    VarDecl *VD = LC.getCapturedVar();
+    if (VD->getName() == "self")
+      return dyn_cast<ImplicitParamDecl>(VD);
+  }
+
   return nullptr;
 }
 
diff --git a/lib/Analysis/BodyFarm.cpp b/lib/Analysis/BodyFarm.cpp
index 7d1b23575293..09904369ba9c 100644
--- a/lib/Analysis/BodyFarm.cpp
+++ b/lib/Analysis/BodyFarm.cpp
@@ -36,10 +36,7 @@ static bool isDispatchBlock(QualType Ty) {
   // returns void.
   const FunctionProtoType *FT =
   BPT->getPointeeType()->getAs<FunctionProtoType>();
-  if (!FT || !FT->getReturnType()->isVoidType() || FT->getNumParams() != 0)
-    return false;
-
-  return true;
+  return FT && FT->getReturnType()->isVoidType() && FT->getNumParams() == 0;
 }
 
 namespace {
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 54d15bd232a1..ed2239f88ae5 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -39,6 +39,78 @@ static SourceLocation GetEndLoc(Decl *D) {
   return D->getLocation();
 }
 
+/// Helper for tryNormalizeBinaryOperator. Attempts to extract an IntegerLiteral
+/// or EnumConstantDecl from the given Expr. If it fails, returns nullptr.
+const Expr *tryTransformToIntOrEnumConstant(const Expr *E) {
+  E = E->IgnoreParens();
+  if (isa<IntegerLiteral>(E))
+    return E;
+  if (auto *DR = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts()))
+    return isa<EnumConstantDecl>(DR->getDecl()) ? DR : nullptr;
+  return nullptr;
+}
+
+/// Tries to interpret a binary operator into `Decl Op Expr` form, if Expr is
+/// an integer literal or an enum constant.
+///
+/// If this fails, at least one of the returned DeclRefExpr or Expr will be
+/// null.
+static std::tuple<const DeclRefExpr *, BinaryOperatorKind, const Expr *>
+tryNormalizeBinaryOperator(const BinaryOperator *B) {
+  BinaryOperatorKind Op = B->getOpcode();
+
+  const Expr *MaybeDecl = B->getLHS();
+  const Expr *Constant = tryTransformToIntOrEnumConstant(B->getRHS());
+  // Expr looked like `0 == Foo` instead of `Foo == 0`
+  if (Constant == nullptr) {
+    // Flip the operator
+    if (Op == BO_GT)
+      Op = BO_LT;
+    else if (Op == BO_GE)
+      Op = BO_LE;
+    else if (Op == BO_LT)
+      Op = BO_GT;
+    else if (Op == BO_LE)
+      Op = BO_GE;
+
+    MaybeDecl = B->getRHS();
+    Constant = tryTransformToIntOrEnumConstant(B->getLHS());
+  }
+
+  auto *D = dyn_cast<DeclRefExpr>(MaybeDecl->IgnoreParenImpCasts());
+  return std::make_tuple(D, Op, Constant);
+}
+
+/// For an expression `x == Foo && x == Bar`, this determines whether the
+/// `Foo` and `Bar` are either of the same enumeration type, or both integer
+/// literals.
+///
+/// It's an error to pass this arguments that are not either IntegerLiterals
+/// or DeclRefExprs (that have decls of type EnumConstantDecl)
+static bool areExprTypesCompatible(const Expr *E1, const Expr *E2) {
+  // User intent isn't clear if they're mixing int literals with enum
+  // constants.
+  if (isa<IntegerLiteral>(E1) != isa<IntegerLiteral>(E2))
+    return false;
+
+  // Integer literal comparisons, regardless of literal type, are acceptable.
+  if (isa<IntegerLiteral>(E1))
+    return true;
+
+  // IntegerLiterals are handled above and only EnumConstantDecls are expected
+  // beyond this point
+  assert(isa<DeclRefExpr>(E1) && isa<DeclRefExpr>(E2));
+  auto *Decl1 = cast<DeclRefExpr>(E1)->getDecl();
+  auto *Decl2 = cast<DeclRefExpr>(E2)->getDecl();
+
+  assert(isa<EnumConstantDecl>(Decl1) && isa<EnumConstantDecl>(Decl2));
+  const DeclContext *DC1 = Decl1->getDeclContext();
+  const DeclContext *DC2 = Decl2->getDeclContext();
+
+  assert(isa<EnumDecl>(DC1) && isa<EnumDecl>(DC2));
+  return DC1 == DC2;
+}
+
 class CFGBuilder;
   
 /// The CFG builder uses a recursive algorithm to build the CFG.  When
@@ -176,8 +248,8 @@ private:
 
 public:
   /// Constructs empty scope linked to previous scope in specified place.
-  LocalScope(BumpVectorContext &ctx, const_iterator P)
-      : ctx(ctx), Vars(ctx, 4), Prev(P) {}
+  LocalScope(BumpVectorContext ctx, const_iterator P)
+      : ctx(std::move(ctx)), Vars(this->ctx, 4), Prev(P) {}
 
   /// Begin of scope in direction of CFG building (backwards).
   const_iterator begin() const { return const_iterator(*this, Vars.size()); }
@@ -284,7 +356,7 @@ reverse_children::reverse_children(Stmt *S) {
 ///   Example usage:
 ///
 ///     CFGBuilder builder;
-///     CFG* cfg = builder.BuildAST(stmt1);
+///     std::unique_ptr<CFG> cfg = builder.buildCFG(decl, stmt1);
 ///
 ///  CFG construction is done via a recursive walk of an AST.  We actually parse
 ///  the AST in reverse order so that the successor of a basic block is
@@ -388,6 +460,7 @@ private:
   CFGBlock *VisitImplicitCastExpr(ImplicitCastExpr *E, AddStmtChoice asc);
   CFGBlock *VisitIndirectGotoStmt(IndirectGotoStmt *I);
   CFGBlock *VisitLabelStmt(LabelStmt *L);
+  CFGBlock *VisitBlockExpr(BlockExpr *E, AddStmtChoice asc);
   CFGBlock *VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc);
   CFGBlock *VisitLogicalOperator(BinaryOperator *B);
   std::pair<CFGBlock *, CFGBlock *> VisitLogicalOperator(BinaryOperator *B,
@@ -694,56 +767,35 @@ private:
     if (!LHS->isComparisonOp() || !RHS->isComparisonOp())
       return TryResult();
 
-    BinaryOperatorKind BO1 = LHS->getOpcode();
-    const DeclRefExpr *Decl1 =
-        dyn_cast<DeclRefExpr>(LHS->getLHS()->IgnoreParenImpCasts());
-    const IntegerLiteral *Literal1 =
-        dyn_cast<IntegerLiteral>(LHS->getRHS()->IgnoreParens());
-    if (!Decl1 && !Literal1) {
-      if (BO1 == BO_GT)
-        BO1 = BO_LT;
-      else if (BO1 == BO_GE)
-        BO1 = BO_LE;
-      else if (BO1 == BO_LT)
-        BO1 = BO_GT;
-      else if (BO1 == BO_LE)
-        BO1 = BO_GE;
-      Decl1 = dyn_cast<DeclRefExpr>(LHS->getRHS()->IgnoreParenImpCasts());
-      Literal1 = dyn_cast<IntegerLiteral>(LHS->getLHS()->IgnoreParens());
-    }
+    const DeclRefExpr *Decl1;
+    const Expr *Expr1;
+    BinaryOperatorKind BO1;
+    std::tie(Decl1, BO1, Expr1) = tryNormalizeBinaryOperator(LHS);
 
-    if (!Decl1 || !Literal1)
+    if (!Decl1 || !Expr1)
       return TryResult();
 
-    BinaryOperatorKind BO2 = RHS->getOpcode();
-    const DeclRefExpr *Decl2 =
-        dyn_cast<DeclRefExpr>(RHS->getLHS()->IgnoreParenImpCasts());
-    const IntegerLiteral *Literal2 =
-        dyn_cast<IntegerLiteral>(RHS->getRHS()->IgnoreParens());
-    if (!Decl2 && !Literal2) {
-      if (BO2 == BO_GT)
-        BO2 = BO_LT;
-      else if (BO2 == BO_GE)
-        BO2 = BO_LE;
-      else if (BO2 == BO_LT)
-        BO2 = BO_GT;
-      else if (BO2 == BO_LE)
-        BO2 = BO_GE;
-      Decl2 = dyn_cast<DeclRefExpr>(RHS->getRHS()->IgnoreParenImpCasts());
-      Literal2 = dyn_cast<IntegerLiteral>(RHS->getLHS()->IgnoreParens());
-    }
+    const DeclRefExpr *Decl2;
+    const Expr *Expr2;
+    BinaryOperatorKind BO2;
+    std::tie(Decl2, BO2, Expr2) = tryNormalizeBinaryOperator(RHS);
 
-    if (!Decl2 || !Literal2)
+    if (!Decl2 || !Expr2)
       return TryResult();
 
     // Check that it is the same variable on both sides.
     if (Decl1->getDecl() != Decl2->getDecl())
       return TryResult();
 
+    // Make sure the user's intent is clear (e.g. they're comparing against two
+    // int literals, or two things from the same enum)
+    if (!areExprTypesCompatible(Expr1, Expr2))
+      return TryResult();
+
     llvm::APSInt L1, L2;
 
-    if (!Literal1->EvaluateAsInt(L1, *Context) ||
-        !Literal2->EvaluateAsInt(L2, *Context))
+    if (!Expr1->EvaluateAsInt(L1, *Context) ||
+        !Expr2->EvaluateAsInt(L2, *Context))
       return TryResult();
 
     // Can't compare signed with unsigned or with different bit width.
@@ -773,10 +825,7 @@ private:
     // * Variable x is equal to the largest literal.
     // * Variable x is greater than largest literal.
     bool AlwaysTrue = true, AlwaysFalse = true;
-    for (unsigned int ValueIndex = 0;
-         ValueIndex < sizeof(Values) / sizeof(Values[0]);
-         ++ValueIndex) {
-      llvm::APSInt Value = Values[ValueIndex];
+    for (llvm::APSInt Value : Values) {
       TryResult Res1, Res2;
       Res1 = analyzeLogicOperatorCondition(BO1, Value, L1);
       Res2 = analyzeLogicOperatorCondition(BO2, Value, L2);
@@ -994,9 +1043,8 @@ std::unique_ptr<CFG> CFGBuilder::buildCFG(const Decl *D, Stmt *Statement) {
 
   // For C++ constructor add initializers to CFG.
   if (const CXXConstructorDecl *CD = dyn_cast_or_null<CXXConstructorDecl>(D)) {
-    for (CXXConstructorDecl::init_const_reverse_iterator I = CD->init_rbegin(),
-        E = CD->init_rend(); I != E; ++I) {
-      B = addInitializer(*I);
+    for (auto *I : llvm::reverse(CD->inits())) {
+      B = addInitializer(I);
       if (badCFG)
         return nullptr;
     }
@@ -1248,13 +1296,11 @@ void CFGBuilder::addImplicitDtorsForDestructor(const CXXDestructorDecl *DD) {
 /// createOrReuseLocalScope - If Scope is NULL create new LocalScope. Either
 /// way return valid LocalScope object.
 LocalScope* CFGBuilder::createOrReuseLocalScope(LocalScope* Scope) {
-  if (!Scope) {
-    llvm::BumpPtrAllocator &alloc = cfg->getAllocator();
-    Scope = alloc.Allocate<LocalScope>();
-    BumpVectorContext ctx(alloc);
-    new (Scope) LocalScope(ctx, ScopePos);
-  }
-  return Scope;
+  if (Scope)
+    return Scope;
+  llvm::BumpPtrAllocator &alloc = cfg->getAllocator();
+  return new (alloc.Allocate<LocalScope>())
+      LocalScope(BumpVectorContext(alloc), ScopePos);
 }
 
 /// addLocalScopeForStmt - Add LocalScope to local scopes tree for statement
@@ -1405,7 +1451,7 @@ CFGBlock *CFGBuilder::Visit(Stmt * S, AddStmtChoice asc) {
       return VisitBinaryOperator(cast<BinaryOperator>(S), asc);
 
     case Stmt::BlockExprClass:
-      return VisitNoRecurse(cast<Expr>(S), asc);
+      return VisitBlockExpr(cast<BlockExpr>(S), asc);
 
     case Stmt::BreakStmtClass:
       return VisitBreakStmt(cast<BreakStmt>(S));
@@ -1894,7 +1940,15 @@ CFGBlock *CFGBuilder::VisitChooseExpr(ChooseExpr *C,
 
 
 CFGBlock *CFGBuilder::VisitCompoundStmt(CompoundStmt *C) {
-  addLocalScopeAndDtors(C);
+  LocalScope::const_iterator scopeBeginPos = ScopePos;
+  if (BuildOpts.AddImplicitDtors) {
+    addLocalScopeForStmt(C);
+  }
+  if (!C->body_empty() && !isa<ReturnStmt>(*C->body_rbegin())) {
+    // If the body ends with a ReturnStmt, the dtors will be added in VisitReturnStmt
+    addAutomaticObjDtors(ScopePos, scopeBeginPos, C);
+  }
+
   CFGBlock *LastBlock = Block;
 
   for (CompoundStmt::reverse_body_iterator I=C->body_rbegin(), E=C->body_rend();
@@ -2277,6 +2331,18 @@ CFGBlock *CFGBuilder::VisitLabelStmt(LabelStmt *L) {
   return LabelBlock;
 }
 
+CFGBlock *CFGBuilder::VisitBlockExpr(BlockExpr *E, AddStmtChoice asc) {
+  CFGBlock *LastBlock = VisitNoRecurse(E, asc);
+  for (const BlockDecl::Capture &CI : E->getBlockDecl()->captures()) {
+    if (Expr *CopyExpr = CI.getCopyExpr()) {
+      CFGBlock *Tmp = Visit(CopyExpr);
+      if (Tmp)
+        LastBlock = Tmp;
+    }
+  }
+  return LastBlock;
+}
+
 CFGBlock *CFGBuilder::VisitLambdaExpr(LambdaExpr *E, AddStmtChoice asc) {
   CFGBlock *LastBlock = VisitNoRecurse(E, asc);
   for (LambdaExpr::capture_init_iterator it = E->capture_init_begin(),
@@ -3104,11 +3170,11 @@ static bool shouldAddCase(bool &switchExclusivelyCovered,
         addCase = true;
         switchExclusivelyCovered = true;
       }
-      else if (condInt < lhsInt) {
+      else if (condInt > lhsInt) {
         if (const Expr *RHS = CS->getRHS()) {
           // Evaluate the RHS of the case value.
           const llvm::APSInt &V2 = RHS->EvaluateKnownConstInt(Ctx);
-          if (V2 <= condInt) {
+          if (V2 >= condInt) {
             addCase = true;
             switchExclusivelyCovered = true;
           }
@@ -4128,7 +4194,8 @@ static void print_elem(raw_ostream &OS, StmtPrinterHelper &Helper,
     if (const StmtExpr *SE = dyn_cast<StmtExpr>(S)) {
       const CompoundStmt *Sub = SE->getSubStmt();
 
-      if (Sub->children()) {
+      auto Children = Sub->children();
+      if (Children.begin() != Children.end()) {
         OS << "({ ... ; ";
         Helper.handledStmt(*SE->getSubStmt()->body_rbegin(),OS);
         OS << " })\n";
diff --git a/lib/Analysis/Consumed.cpp b/lib/Analysis/Consumed.cpp
index fa985ee02e59..9df23923b014 100644
--- a/lib/Analysis/Consumed.cpp
+++ b/lib/Analysis/Consumed.cpp
@@ -1038,65 +1038,54 @@ bool ConsumedBlockInfo::allBackEdgesVisited(const CFGBlock *CurrBlock,
   return true;
 }
 
-void ConsumedBlockInfo::addInfo(const CFGBlock *Block,
-                                ConsumedStateMap *StateMap,
-                                bool &AlreadyOwned) {
-  
+void ConsumedBlockInfo::addInfo(
+    const CFGBlock *Block, ConsumedStateMap *StateMap,
+    std::unique_ptr<ConsumedStateMap> &OwnedStateMap) {
+
   assert(Block && "Block pointer must not be NULL");
-  
-  ConsumedStateMap *Entry = StateMapsArray[Block->getBlockID()];
-    
+
+  auto &Entry = StateMapsArray[Block->getBlockID()];
+
   if (Entry) {
-    Entry->intersect(StateMap);
-    
-  } else if (AlreadyOwned) {
-    StateMapsArray[Block->getBlockID()] = new ConsumedStateMap(*StateMap);
-    
-  } else {
-    StateMapsArray[Block->getBlockID()] = StateMap;
-    AlreadyOwned = true;
-  }
+    Entry->intersect(*StateMap);
+  } else if (OwnedStateMap)
+    Entry = std::move(OwnedStateMap);
+  else
+    Entry = llvm::make_unique<ConsumedStateMap>(*StateMap);
 }
 
 void ConsumedBlockInfo::addInfo(const CFGBlock *Block,
-                                ConsumedStateMap *StateMap) {
+                                std::unique_ptr<ConsumedStateMap> StateMap) {
 
   assert(Block && "Block pointer must not be NULL");
 
-  ConsumedStateMap *Entry = StateMapsArray[Block->getBlockID()];
-    
+  auto &Entry = StateMapsArray[Block->getBlockID()];
+
   if (Entry) {
-    Entry->intersect(StateMap);
-    delete StateMap;
-    
+    Entry->intersect(*StateMap);
   } else {
-    StateMapsArray[Block->getBlockID()] = StateMap;
+    Entry = std::move(StateMap);
   }
 }
 
 ConsumedStateMap* ConsumedBlockInfo::borrowInfo(const CFGBlock *Block) {
   assert(Block && "Block pointer must not be NULL");
   assert(StateMapsArray[Block->getBlockID()] && "Block has no block info");
-  
-  return StateMapsArray[Block->getBlockID()];
+
+  return StateMapsArray[Block->getBlockID()].get();
 }
 
 void ConsumedBlockInfo::discardInfo(const CFGBlock *Block) {
-  unsigned int BlockID = Block->getBlockID();
-  delete StateMapsArray[BlockID];
-  StateMapsArray[BlockID] = nullptr;
+  StateMapsArray[Block->getBlockID()] = nullptr;
 }
 
-ConsumedStateMap* ConsumedBlockInfo::getInfo(const CFGBlock *Block) {
+std::unique_ptr<ConsumedStateMap>
+ConsumedBlockInfo::getInfo(const CFGBlock *Block) {
   assert(Block && "Block pointer must not be NULL");
-  
-  ConsumedStateMap *StateMap = StateMapsArray[Block->getBlockID()];
-  if (isBackEdgeTarget(Block)) {
-    return new ConsumedStateMap(*StateMap);
-  } else {
-    StateMapsArray[Block->getBlockID()] = nullptr;
-    return StateMap;
-  }
+
+  auto &Entry = StateMapsArray[Block->getBlockID()];
+  return isBackEdgeTarget(Block) ? llvm::make_unique<ConsumedStateMap>(*Entry)
+                                 : std::move(Entry);
 }
 
 bool ConsumedBlockInfo::isBackEdge(const CFGBlock *From, const CFGBlock *To) {
@@ -1166,15 +1155,15 @@ ConsumedStateMap::getState(const CXXBindTemporaryExpr *Tmp) const {
   return CS_None;
 }
 
-void ConsumedStateMap::intersect(const ConsumedStateMap *Other) {
+void ConsumedStateMap::intersect(const ConsumedStateMap &Other) {
   ConsumedState LocalState;
-  
-  if (this->From && this->From == Other->From && !Other->Reachable) {
+
+  if (this->From && this->From == Other.From && !Other.Reachable) {
     this->markUnreachable();
     return;
   }
-  
-  for (const auto &DM : Other->VarMap) {
+
+  for (const auto &DM : Other.VarMap) {
     LocalState = this->getState(DM.first);
     
     if (LocalState == CS_None)
@@ -1282,14 +1271,14 @@ bool ConsumedAnalyzer::splitState(const CFGBlock *CurrBlock,
     if (PInfo.isVarTest()) {
       CurrStates->setSource(Cond);
       FalseStates->setSource(Cond);
-      splitVarStateForIf(IfNode, PInfo.getVarTest(), CurrStates,
+      splitVarStateForIf(IfNode, PInfo.getVarTest(), CurrStates.get(),
                          FalseStates.get());
-      
+
     } else if (PInfo.isBinTest()) {
       CurrStates->setSource(PInfo.testSourceNode());
       FalseStates->setSource(PInfo.testSourceNode());
-      splitVarStateForIfBinOp(PInfo, CurrStates, FalseStates.get());
-      
+      splitVarStateForIfBinOp(PInfo, CurrStates.get(), FalseStates.get());
+
     } else {
       return false;
     }
@@ -1337,14 +1326,13 @@ bool ConsumedAnalyzer::splitState(const CFGBlock *CurrBlock,
   CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin();
   
   if (*SI)
-    BlockInfo.addInfo(*SI, CurrStates);
+    BlockInfo.addInfo(*SI, std::move(CurrStates));
   else
-    delete CurrStates;
-    
-  if (*++SI)
-    BlockInfo.addInfo(*SI, FalseStates.release());
+    CurrStates = nullptr;
+
+  if (*++SI)
+    BlockInfo.addInfo(*SI, std::move(FalseStates));
 
-  CurrStates = nullptr;
   return true;
 }
 
@@ -1363,10 +1351,10 @@ void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
   // AC.getCFG()->viewCFG(LangOptions());
   
   BlockInfo = ConsumedBlockInfo(CFGraph->getNumBlockIDs(), SortedGraph);
-  
-  CurrStates = new ConsumedStateMap();
-  ConsumedStmtVisitor Visitor(AC, *this, CurrStates);
-  
+
+  CurrStates = llvm::make_unique<ConsumedStateMap>();
+  ConsumedStmtVisitor Visitor(AC, *this, CurrStates.get());
+
   // Add all trackable parameters to the state map.
   for (const auto *PI : D->params())
     Visitor.VisitParmVarDecl(PI);
@@ -1380,13 +1368,12 @@ void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
       continue;
       
     } else if (!CurrStates->isReachable()) {
-      delete CurrStates;
       CurrStates = nullptr;
       continue;
     }
-    
-    Visitor.reset(CurrStates);
-    
+
+    Visitor.reset(CurrStates.get());
+
     // Visit all of the basic block's statements.
     for (const auto &B : *CurrBlock) {
       switch (B.getKind()) {
@@ -1429,28 +1416,24 @@ void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
       if (CurrBlock->succ_size() > 1 ||
           (CurrBlock->succ_size() == 1 &&
            (*CurrBlock->succ_begin())->pred_size() > 1)) {
-        
-        bool OwnershipTaken = false;
-        
+
+        auto *RawState = CurrStates.get();
+
         for (CFGBlock::const_succ_iterator SI = CurrBlock->succ_begin(),
              SE = CurrBlock->succ_end(); SI != SE; ++SI) {
 
           if (*SI == nullptr) continue;
 
           if (BlockInfo.isBackEdge(CurrBlock, *SI)) {
-            BlockInfo.borrowInfo(*SI)->intersectAtLoopHead(*SI, CurrBlock,
-                                                           CurrStates,
-                                                           WarningsHandler);
-            
+            BlockInfo.borrowInfo(*SI)->intersectAtLoopHead(
+                *SI, CurrBlock, RawState, WarningsHandler);
+
             if (BlockInfo.allBackEdgesVisited(CurrBlock, *SI))
               BlockInfo.discardInfo(*SI);
           } else {
-            BlockInfo.addInfo(*SI, CurrStates, OwnershipTaken);
+            BlockInfo.addInfo(*SI, RawState, CurrStates);
           }
         }
-        
-        if (!OwnershipTaken)
-          delete CurrStates;
 
         CurrStates = nullptr;
       }
@@ -1463,8 +1446,8 @@ void ConsumedAnalyzer::run(AnalysisDeclContext &AC) {
   } // End of block iterator.
   
   // Delete the last existing state map.
-  delete CurrStates;
-  
+  CurrStates = nullptr;
+
   WarningsHandler.emitDiagnostics();
 }
 }} // end namespace clang::consumed
diff --git a/lib/Analysis/ThreadSafety.cpp b/lib/Analysis/ThreadSafety.cpp
index e2c6ab5d9485..b282a5bbd8d8 100644
--- a/lib/Analysis/ThreadSafety.cpp
+++ b/lib/Analysis/ThreadSafety.cpp
@@ -258,16 +258,15 @@ private:
   typedef SmallVector<const ValueDecl*, 4>  BeforeVect;
 
   struct BeforeInfo {
-    BeforeInfo() : Vect(nullptr), Visited(false) { }
-    BeforeInfo(BeforeInfo &&O)
-        : Vect(std::move(O.Vect)), Visited(O.Visited)
-    {}
+    BeforeInfo() : Visited(0) {}
+    BeforeInfo(BeforeInfo &&O) : Vect(std::move(O.Vect)), Visited(O.Visited) {}
 
-    std::unique_ptr<BeforeVect> Vect;
-    int                         Visited;
+    BeforeVect Vect;
+    int Visited;
   };
 
-  typedef llvm::DenseMap<const ValueDecl*, BeforeInfo>  BeforeMap;
+  typedef llvm::DenseMap<const ValueDecl *, std::unique_ptr<BeforeInfo>>
+      BeforeMap;
   typedef llvm::DenseMap<const ValueDecl*, bool>        CycleMap;
 
 public:
@@ -276,6 +275,9 @@ public:
   BeforeInfo* insertAttrExprs(const ValueDecl* Vd,
                               ThreadSafetyAnalyzer& Analyzer);
 
+  BeforeInfo *getBeforeInfoForDecl(const ValueDecl *Vd,
+                                   ThreadSafetyAnalyzer &Analyzer);
+
   void checkBeforeAfter(const ValueDecl* Vd,
                         const FactSet& FSet,
                         ThreadSafetyAnalyzer& Analyzer,
@@ -787,7 +789,7 @@ static void findBlockLocations(CFG *CFGraph,
       }
     }
 
-    if (!CurrBlockInfo->ExitLoc.isInvalid()) {
+    if (CurrBlockInfo->ExitLoc.isValid()) {
       // This block contains at least one statement. Find the source location
       // of the first statement in the block.
       for (CFGBlock::const_iterator BI = CurrBlock->begin(),
@@ -965,26 +967,27 @@ public:
 BeforeSet::BeforeInfo* BeforeSet::insertAttrExprs(const ValueDecl* Vd,
     ThreadSafetyAnalyzer& Analyzer) {
   // Create a new entry for Vd.
-  auto& Entry = BMap.FindAndConstruct(Vd);
-  BeforeInfo* Info = &Entry.second;
-  BeforeVect* Bv = nullptr;
+  BeforeInfo *Info = nullptr;
+  {
+    // Keep InfoPtr in its own scope in case BMap is modified later and the
+    // reference becomes invalid.
+    std::unique_ptr<BeforeInfo> &InfoPtr = BMap[Vd];
+    if (!InfoPtr)
+      InfoPtr.reset(new BeforeInfo());
+    Info = InfoPtr.get();
+  }
 
   for (Attr* At : Vd->attrs()) {
     switch (At->getKind()) {
       case attr::AcquiredBefore: {
         auto *A = cast<AcquiredBeforeAttr>(At);
 
-        // Create a new BeforeVect for Vd if necessary.
-        if (!Bv) {
-          Bv = new BeforeVect;
-          Info->Vect.reset(Bv);
-        }
         // Read exprs from the attribute, and add them to BeforeVect.
         for (const auto *Arg : A->args()) {
           CapabilityExpr Cp =
             Analyzer.SxBuilder.translateAttrExpr(Arg, nullptr);
           if (const ValueDecl *Cpvd = Cp.valueDecl()) {
-            Bv->push_back(Cpvd);
+            Info->Vect.push_back(Cpvd);
             auto It = BMap.find(Cpvd);
             if (It == BMap.end())
               insertAttrExprs(Cpvd, Analyzer);
@@ -1001,20 +1004,8 @@ BeforeSet::BeforeInfo* BeforeSet::insertAttrExprs(const ValueDecl* Vd,
             Analyzer.SxBuilder.translateAttrExpr(Arg, nullptr);
           if (const ValueDecl *ArgVd = Cp.valueDecl()) {
             // Get entry for mutex listed in attribute
-            BeforeInfo* ArgInfo;
-            auto It = BMap.find(ArgVd);
-            if (It == BMap.end())
-              ArgInfo = insertAttrExprs(ArgVd, Analyzer);
-            else
-              ArgInfo = &It->second;
-
-            // Create a new BeforeVect if necessary.
-            BeforeVect* ArgBv = ArgInfo->Vect.get();
-            if (!ArgBv) {
-              ArgBv = new BeforeVect;
-              ArgInfo->Vect.reset(ArgBv);
-            }
-            ArgBv->push_back(Vd);
+            BeforeInfo *ArgInfo = getBeforeInfoForDecl(ArgVd, Analyzer);
+            ArgInfo->Vect.push_back(Vd);
           }
         }
         break;
@@ -1027,6 +1018,18 @@ BeforeSet::BeforeInfo* BeforeSet::insertAttrExprs(const ValueDecl* Vd,
   return Info;
 }
 
+BeforeSet::BeforeInfo *
+BeforeSet::getBeforeInfoForDecl(const ValueDecl *Vd,
+                                ThreadSafetyAnalyzer &Analyzer) {
+  auto It = BMap.find(Vd);
+  BeforeInfo *Info = nullptr;
+  if (It == BMap.end())
+    Info = insertAttrExprs(Vd, Analyzer);
+  else
+    Info = It->second.get();
+  assert(Info && "BMap contained nullptr?");
+  return Info;
+}
 
 /// Return true if any mutexes in FSet are in the acquired_before set of Vd.
 void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd,
@@ -1041,12 +1044,7 @@ void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd,
     if (!Vd)
       return false;
 
-    BeforeSet::BeforeInfo* Info;
-    auto It = BMap.find(Vd);
-    if (It == BMap.end())
-      Info = insertAttrExprs(Vd, Analyzer);
-    else
-      Info = &It->second;
+    BeforeSet::BeforeInfo *Info = getBeforeInfoForDecl(Vd, Analyzer);
 
     if (Info->Visited == 1)
       return true;
@@ -1054,13 +1052,12 @@ void BeforeSet::checkBeforeAfter(const ValueDecl* StartVd,
     if (Info->Visited == 2)
       return false;
 
-    BeforeVect* Bv = Info->Vect.get();
-    if (!Bv)
+    if (Info->Vect.empty())
       return false;
 
     InfoVect.push_back(Info);
     Info->Visited = 1;
-    for (auto *Vdb : *Bv) {
+    for (auto *Vdb : Info->Vect) {
       // Exclude mutexes in our immediate before set.
       if (FSet.containsMutexDecl(Analyzer.FactMan, Vdb)) {
         StringRef L1 = StartVd->getName();
@@ -1926,34 +1923,42 @@ void BuildLockset::VisitCallExpr(CallExpr *Exp) {
     }
   }
 
-
   if (ExamineArgs) {
     if (FunctionDecl *FD = Exp->getDirectCallee()) {
-      unsigned Fn = FD->getNumParams();
-      unsigned Cn = Exp->getNumArgs();
-      unsigned Skip = 0;
 
-      unsigned i = 0;
-      if (OperatorFun) {
-        if (isa<CXXMethodDecl>(FD)) {
-          // First arg in operator call is implicit self argument,
-          // and doesn't appear in the FunctionDecl.
-          Skip = 1;
-          Cn--;
-        } else {
-          // Ignore the first argument of operators; it's been checked above.
-          i = 1;
+      // NO_THREAD_SAFETY_ANALYSIS does double duty here.  Normally it
+      // only turns off checking within the body of a function, but we also
+      // use it to turn off checking in arguments to the function.  This
+      // could result in some false negatives, but the alternative is to
+      // create yet another attribute.
+      //
+      if (!FD->hasAttr<NoThreadSafetyAnalysisAttr>()) {
+        unsigned Fn = FD->getNumParams();
+        unsigned Cn = Exp->getNumArgs();
+        unsigned Skip = 0;
+
+        unsigned i = 0;
+        if (OperatorFun) {
+          if (isa<CXXMethodDecl>(FD)) {
+            // First arg in operator call is implicit self argument,
+            // and doesn't appear in the FunctionDecl.
+            Skip = 1;
+            Cn--;
+          } else {
+            // Ignore the first argument of operators; it's been checked above.
+            i = 1;
+          }
         }
-      }
-      // Ignore default arguments
-      unsigned n = (Fn < Cn) ? Fn : Cn;
+        // Ignore default arguments
+        unsigned n = (Fn < Cn) ? Fn : Cn;
 
-      for (; i < n; ++i) {
-        ParmVarDecl* Pvd = FD->getParamDecl(i);
-        Expr* Arg = Exp->getArg(i+Skip);
-        QualType Qt = Pvd->getType();
-        if (Qt->isReferenceType())
-          checkAccess(Arg, AK_Read, POK_PassByRef);
+        for (; i < n; ++i) {
+          ParmVarDecl* Pvd = FD->getParamDecl(i);
+          Expr* Arg = Exp->getArg(i+Skip);
+          QualType Qt = Pvd->getType();
+          if (Qt->isReferenceType())
+            checkAccess(Arg, AK_Read, POK_PassByRef);
+        }
       }
     }
   }
diff --git a/lib/Analysis/ThreadSafetyCommon.cpp b/lib/Analysis/ThreadSafetyCommon.cpp
index d4b1ce26d4b3..ffe95ea22a42 100644
--- a/lib/Analysis/ThreadSafetyCommon.cpp
+++ b/lib/Analysis/ThreadSafetyCommon.cpp
@@ -1,4 +1,4 @@
-//===- ThreadSafetyCommon.cpp ----------------------------------*- C++ --*-===//
+//===- ThreadSafetyCommon.cpp -----------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -31,6 +31,7 @@
 #include <algorithm>
 #include <climits>
 #include <vector>
+
 using namespace clang;
 using namespace threadSafety;
 
@@ -66,7 +67,6 @@ static bool isIncompletePhi(const til::SExpr *E) {
 
 typedef SExprBuilder::CallingContext CallingContext;
 
-
 til::SExpr *SExprBuilder::lookupStmt(const Stmt *S) {
   auto It = SMap.find(S);
   if (It != SMap.end())
@@ -74,7 +74,6 @@ til::SExpr *SExprBuilder::lookupStmt(const Stmt *S) {
   return nullptr;
 }
 
-
 til::SCFG *SExprBuilder::buildCFG(CFGWalker &Walker) {
   Walker.walk(*this);
   return Scfg;
@@ -85,7 +84,6 @@ static bool isCalleeArrow(const Expr *E) {
   return ME ? ME->isArrow() : false;
 }
 
-
 /// \brief Translate a clang expression in an attribute to a til::SExpr.
 /// Constructs the context from D, DeclExp, and SelfDecl.
 ///
@@ -148,7 +146,6 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp,
     return translateAttrExpr(AttrExp, &Ctx);
 }
 
-
 /// \brief Translate a clang expression in an attribute to a til::SExpr.
 // This assumes a CallingContext has already been created.
 CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp,
@@ -195,8 +192,6 @@ CapabilityExpr SExprBuilder::translateAttrExpr(const Expr *AttrExp,
   return CapabilityExpr(E, Neg);
 }
 
-
-
 // Translate a clang statement or expression to a TIL expression.
 // Also performs substitution of variables; Ctx provides the context.
 // Dispatches on the type of S.
@@ -268,8 +263,6 @@ til::SExpr *SExprBuilder::translate(const Stmt *S, CallingContext *Ctx) {
   return new (Arena) til::Undefined(S);
 }
 
-
-
 til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE,
                                                CallingContext *Ctx) {
   const ValueDecl *VD = cast<ValueDecl>(DRE->getDecl()->getCanonicalDecl());
@@ -290,11 +283,10 @@ til::SExpr *SExprBuilder::translateDeclRefExpr(const DeclRefExpr *DRE,
     VD = FD->getParamDecl(I);
   }
 
-  // For non-local variables, treat it as a referenced to a named object.
+  // For non-local variables, treat it as a reference to a named object.
   return new (Arena) til::LiteralPtr(VD);
 }
 
-
 til::SExpr *SExprBuilder::translateCXXThisExpr(const CXXThisExpr *TE,
                                                CallingContext *Ctx) {
   // Substitute for 'this'
@@ -313,7 +305,7 @@ static const ValueDecl *getValueDeclFromSExpr(const til::SExpr *E) {
     return P->clangDecl();
   if (auto *L = dyn_cast<til::LiteralPtr>(E))
     return L->clangDecl();
-  return 0;
+  return nullptr;
 }
 
 static bool hasCppPointerType(const til::SExpr *E) {
@@ -344,7 +336,8 @@ til::SExpr *SExprBuilder::translateMemberExpr(const MemberExpr *ME,
   til::SExpr *BE = translate(ME->getBase(), Ctx);
   til::SExpr *E  = new (Arena) til::SApply(BE);
 
-  const ValueDecl *D = ME->getMemberDecl();
+  const ValueDecl *D =
+      cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
   if (auto *VD = dyn_cast<CXXMethodDecl>(D))
     D = getFirstVirtualDecl(VD);
 
@@ -354,7 +347,6 @@ til::SExpr *SExprBuilder::translateMemberExpr(const MemberExpr *ME,
   return P;
 }
 
-
 til::SExpr *SExprBuilder::translateCallExpr(const CallExpr *CE,
                                             CallingContext *Ctx,
                                             const Expr *SelfE) {
@@ -380,7 +372,6 @@ til::SExpr *SExprBuilder::translateCallExpr(const CallExpr *CE,
   return new (Arena) til::Call(E, CE);
 }
 
-
 til::SExpr *SExprBuilder::translateCXXMemberCallExpr(
     const CXXMemberCallExpr *ME, CallingContext *Ctx) {
   if (CapabilityExprMode) {
@@ -396,7 +387,6 @@ til::SExpr *SExprBuilder::translateCXXMemberCallExpr(
                            ME->getImplicitObjectArgument());
 }
 
-
 til::SExpr *SExprBuilder::translateCXXOperatorCallExpr(
     const CXXOperatorCallExpr *OCE, CallingContext *Ctx) {
   if (CapabilityExprMode) {
@@ -411,7 +401,6 @@ til::SExpr *SExprBuilder::translateCXXOperatorCallExpr(
   return translateCallExpr(cast<CallExpr>(OCE), Ctx);
 }
 
-
 til::SExpr *SExprBuilder::translateUnaryOperator(const UnaryOperator *UO,
                                                  CallingContext *Ctx) {
   switch (UO->getOpcode()) {
@@ -456,12 +445,12 @@ til::SExpr *SExprBuilder::translateUnaryOperator(const UnaryOperator *UO,
   case UO_Real:
   case UO_Imag:
   case UO_Extension:
+  case UO_Coawait:
     return new (Arena) til::Undefined(UO);
   }
   return new (Arena) til::Undefined(UO);
 }
 
-
 til::SExpr *SExprBuilder::translateBinOp(til::TIL_BinaryOpcode Op,
                                          const BinaryOperator *BO,
                                          CallingContext *Ctx, bool Reverse) {
@@ -473,7 +462,6 @@ til::SExpr *SExprBuilder::translateBinOp(til::TIL_BinaryOpcode Op,
      return new (Arena) til::BinaryOp(Op, E0, E1);
 }
 
-
 til::SExpr *SExprBuilder::translateBinAssign(til::TIL_BinaryOpcode Op,
                                              const BinaryOperator *BO,
                                              CallingContext *Ctx,
@@ -500,7 +488,6 @@ til::SExpr *SExprBuilder::translateBinAssign(til::TIL_BinaryOpcode Op,
   return new (Arena) til::Store(E0, E1);
 }
 
-
 til::SExpr *SExprBuilder::translateBinaryOperator(const BinaryOperator *BO,
                                                   CallingContext *Ctx) {
   switch (BO->getOpcode()) {
@@ -546,7 +533,6 @@ til::SExpr *SExprBuilder::translateBinaryOperator(const BinaryOperator *BO,
   return new (Arena) til::Undefined(BO);
 }
 
-
 til::SExpr *SExprBuilder::translateCastExpr(const CastExpr *CE,
                                             CallingContext *Ctx) {
   clang::CastKind K = CE->getCastKind();
@@ -580,7 +566,6 @@ til::SExpr *SExprBuilder::translateCastExpr(const CastExpr *CE,
   }
 }
 
-
 til::SExpr *
 SExprBuilder::translateArraySubscriptExpr(const ArraySubscriptExpr *E,
                                           CallingContext *Ctx) {
@@ -589,7 +574,6 @@ SExprBuilder::translateArraySubscriptExpr(const ArraySubscriptExpr *E,
   return new (Arena) til::ArrayIndex(E0, E1);
 }
 
-
 til::SExpr *
 SExprBuilder::translateAbstractConditionalOperator(
     const AbstractConditionalOperator *CO, CallingContext *Ctx) {
@@ -599,7 +583,6 @@ SExprBuilder::translateAbstractConditionalOperator(
   return new (Arena) til::IfThenElse(C, T, E);
 }
 
-
 til::SExpr *
 SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) {
   DeclGroupRef DGrp = S->getDeclGroup();
@@ -621,8 +604,6 @@ SExprBuilder::translateDeclStmt(const DeclStmt *S, CallingContext *Ctx) {
   return nullptr;
 }
 
-
-
 // If (E) is non-trivial, then add it to the current basic block, and
 // update the statement map so that S refers to E.  Returns a new variable
 // that refers to E.
@@ -639,7 +620,6 @@ til::SExpr *SExprBuilder::addStatement(til::SExpr* E, const Stmt *S,
   return E;
 }
 
-
 // Returns the current value of VD, if known, and nullptr otherwise.
 til::SExpr *SExprBuilder::lookupVarDecl(const ValueDecl *VD) {
   auto It = LVarIdxMap.find(VD);
@@ -650,7 +630,6 @@ til::SExpr *SExprBuilder::lookupVarDecl(const ValueDecl *VD) {
   return nullptr;
 }
 
-
 // if E is a til::Variable, update its clangDecl.
 static void maybeUpdateVD(til::SExpr *E, const ValueDecl *VD) {
   if (!E)
@@ -670,7 +649,6 @@ til::SExpr *SExprBuilder::addVarDecl(const ValueDecl *VD, til::SExpr *E) {
   return E;
 }
 
-
 // Updates a current variable declaration.  (E.g. by assignment)
 til::SExpr *SExprBuilder::updateVarDecl(const ValueDecl *VD, til::SExpr *E) {
   maybeUpdateVD(E, VD);
@@ -685,7 +663,6 @@ til::SExpr *SExprBuilder::updateVarDecl(const ValueDecl *VD, til::SExpr *E) {
   return E;
 }
 
-
 // Make a Phi node in the current block for the i^th variable in CurrentVarMap.
 // If E != null, sets Phi[CurrentBlockInfo->ArgIndex] = E.
 // If E == null, this is a backedge and will be set later.
@@ -728,7 +705,6 @@ void SExprBuilder::makePhiNodeVar(unsigned i, unsigned NPreds, til::SExpr *E) {
   CurrentLVarMap.elem(i).second = Ph;
 }
 
-
 // Merge values from Map into the current variable map.
 // This will construct Phi nodes in the current basic block as necessary.
 void SExprBuilder::mergeEntryMap(LVarDefinitionMap Map) {
@@ -763,7 +739,6 @@ void SExprBuilder::mergeEntryMap(LVarDefinitionMap Map) {
   }
 }
 
-
 // Merge a back edge into the current variable map.
 // This will create phi nodes for all variables in the variable map.
 void SExprBuilder::mergeEntryMapBackEdge() {
@@ -790,7 +765,6 @@ void SExprBuilder::mergeEntryMapBackEdge() {
   }
 }
 
-
 // Update the phi nodes that were initially created for a back edge
 // once the variable definitions have been computed.
 // I.e., merge the current variable map into the phi nodes for Blk.
@@ -843,7 +817,6 @@ void SExprBuilder::enterCFG(CFG *Cfg, const NamedDecl *D,
   }
 }
 
-
 void SExprBuilder::enterCFGBlock(const CFGBlock *B) {
   // Intialize TIL basic block and add it to the CFG.
   CurrentBB = lookupBlock(B);
@@ -857,7 +830,6 @@ void SExprBuilder::enterCFGBlock(const CFGBlock *B) {
   // assert(!CurrentLVarMap.valid() && "CurrentLVarMap already initialized.");
 }
 
-
 void SExprBuilder::handlePredecessor(const CFGBlock *Pred) {
   // Compute CurrentLVarMap on entry from ExitMaps of predecessors
 
@@ -873,12 +845,10 @@ void SExprBuilder::handlePredecessor(const CFGBlock *Pred) {
   ++CurrentBlockInfo->ProcessedPredecessors;
 }
 
-
 void SExprBuilder::handlePredecessorBackEdge(const CFGBlock *Pred) {
   mergeEntryMapBackEdge();
 }
 
-
 void SExprBuilder::enterCFGBlockBody(const CFGBlock *B) {
   // The merge*() methods have created arguments.
   // Push those arguments onto the basic block.
@@ -888,13 +858,11 @@ void SExprBuilder::enterCFGBlockBody(const CFGBlock *B) {
     CurrentBB->addArgument(A);
 }
 
-
 void SExprBuilder::handleStatement(const Stmt *S) {
   til::SExpr *E = translate(S, nullptr);
   addStatement(E, S);
 }
 
-
 void SExprBuilder::handleDestructorCall(const VarDecl *VD,
                                         const CXXDestructorDecl *DD) {
   til::SExpr *Sf = new (Arena) til::LiteralPtr(VD);
@@ -904,8 +872,6 @@ void SExprBuilder::handleDestructorCall(const VarDecl *VD,
   addStatement(E, nullptr);
 }
 
-
-
 void SExprBuilder::exitCFGBlockBody(const CFGBlock *B) {
   CurrentBB->instructions().reserve(
     static_cast<unsigned>(CurrentInstructions.size()), Arena);
@@ -933,18 +899,15 @@ void SExprBuilder::exitCFGBlockBody(const CFGBlock *B) {
   }
 }
 
-
 void SExprBuilder::handleSuccessor(const CFGBlock *Succ) {
   ++CurrentBlockInfo->UnprocessedSuccessors;
 }
 
-
 void SExprBuilder::handleSuccessorBackEdge(const CFGBlock *Succ) {
   mergePhiNodesBackEdge(Succ);
   ++BBInfo[Succ->getBlockID()].ProcessedPredecessors;
 }
 
-
 void SExprBuilder::exitCFGBlock(const CFGBlock *B) {
   CurrentArguments.clear();
   CurrentInstructions.clear();
@@ -953,7 +916,6 @@ void SExprBuilder::exitCFGBlock(const CFGBlock *B) {
   CurrentBlockInfo = nullptr;
 }
 
-
 void SExprBuilder::exitCFG(const CFGBlock *Last) {
   for (auto *Ph : IncompleteArgs) {
     if (Ph->status() == til::Phi::PH_Incomplete)
@@ -965,7 +927,6 @@ void SExprBuilder::exitCFG(const CFGBlock *Last) {
   IncompleteArgs.clear();
 }
 
-
 /*
 void printSCFG(CFGWalker &Walker) {
   llvm::BumpPtrAllocator Bpa;
diff --git a/lib/Basic/Attributes.cpp b/lib/Basic/Attributes.cpp
index da9ac793f163..c215366fc398 100644
--- a/lib/Basic/Attributes.cpp
+++ b/lib/Basic/Attributes.cpp
@@ -4,8 +4,8 @@
 using namespace clang;
 
 int clang::hasAttribute(AttrSyntax Syntax, const IdentifierInfo *Scope,
-                         const IdentifierInfo *Attr, const llvm::Triple &T,
-                         const LangOptions &LangOpts) {
+                        const IdentifierInfo *Attr, const TargetInfo &Target,
+                        const LangOptions &LangOpts) {
   StringRef Name = Attr->getName();
   // Normalize the attribute name, __foo__ becomes foo.
   if (Name.size() >= 4 && Name.startswith("__") && Name.endswith("__"))
diff --git a/lib/Basic/Builtins.cpp b/lib/Basic/Builtins.cpp
index 8efcac6d7fe4..69b10c13ede1 100644
--- a/lib/Basic/Builtins.cpp
+++ b/lib/Basic/Builtins.cpp
@@ -15,86 +15,78 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 using namespace clang;
 
 static const Builtin::Info BuiltinInfo[] = {
-  { "not a builtin function", nullptr, nullptr, nullptr, ALL_LANGUAGES},
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) { #ID, TYPE, ATTRS, 0, BUILTIN_LANG },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, BUILTIN_LANG) { #ID, TYPE, ATTRS, HEADER,\
-                                                            BUILTIN_LANG },
+  { "not a builtin function", nullptr, nullptr, nullptr, ALL_LANGUAGES,nullptr},
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANGS)                                    \
+  { #ID, TYPE, ATTRS, nullptr, LANGS, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS)                             \
+  { #ID, TYPE, ATTRS, HEADER, LANGS, nullptr },
 #include "clang/Basic/Builtins.def"
 };
 
-const Builtin::Info &Builtin::Context::GetRecord(unsigned ID) const {
+const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const {
   if (ID < Builtin::FirstTSBuiltin)
     return BuiltinInfo[ID];
-  assert(ID - Builtin::FirstTSBuiltin < NumTSRecords && "Invalid builtin ID!");
+  assert(((ID - Builtin::FirstTSBuiltin) <
+          (TSRecords.size() + AuxTSRecords.size())) &&
+         "Invalid builtin ID!");
+  if (isAuxBuiltinID(ID))
+    return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin];
   return TSRecords[ID - Builtin::FirstTSBuiltin];
 }
 
-Builtin::Context::Context() {
-  // Get the target specific builtins from the target.
-  TSRecords = nullptr;
-  NumTSRecords = 0;
+void Builtin::Context::InitializeTarget(const TargetInfo &Target,
+                                        const TargetInfo *AuxTarget) {
+  assert(TSRecords.empty() && "Already initialized target?");
+  TSRecords = Target.getTargetBuiltins();
+  if (AuxTarget)
+    AuxTSRecords = AuxTarget->getTargetBuiltins();
 }
 
-void Builtin::Context::InitializeTarget(const TargetInfo &Target) {
-  assert(NumTSRecords == 0 && "Already initialized target?");
-  Target.getTargetBuiltins(TSRecords, NumTSRecords);  
-}
-
-bool Builtin::Context::BuiltinIsSupported(const Builtin::Info &BuiltinInfo,
+bool Builtin::Context::builtinIsSupported(const Builtin::Info &BuiltinInfo,
                                           const LangOptions &LangOpts) {
   bool BuiltinsUnsupported = LangOpts.NoBuiltin &&
                              strchr(BuiltinInfo.Attributes, 'f');
   bool MathBuiltinsUnsupported =
-    LangOpts.NoMathBuiltin && BuiltinInfo.HeaderName &&      
+    LangOpts.NoMathBuiltin && BuiltinInfo.HeaderName &&
     llvm::StringRef(BuiltinInfo.HeaderName).equals("math.h");
-  bool GnuModeUnsupported = !LangOpts.GNUMode &&
-                            (BuiltinInfo.builtin_lang & GNU_LANG);
-  bool MSModeUnsupported = !LangOpts.MicrosoftExt &&
-                           (BuiltinInfo.builtin_lang & MS_LANG);
-  bool ObjCUnsupported = !LangOpts.ObjC1 &&
-                         BuiltinInfo.builtin_lang == OBJC_LANG;
+  bool GnuModeUnsupported = !LangOpts.GNUMode && (BuiltinInfo.Langs & GNU_LANG);
+  bool MSModeUnsupported =
+      !LangOpts.MicrosoftExt && (BuiltinInfo.Langs & MS_LANG);
+  bool ObjCUnsupported = !LangOpts.ObjC1 && BuiltinInfo.Langs == OBJC_LANG;
   return !BuiltinsUnsupported && !MathBuiltinsUnsupported &&
          !GnuModeUnsupported && !MSModeUnsupported && !ObjCUnsupported;
 }
 
-/// InitializeBuiltins - Mark the identifiers for all the builtins with their
+/// initializeBuiltins - Mark the identifiers for all the builtins with their
 /// appropriate builtin ID # and mark any non-portable builtin identifiers as
 /// such.
-void Builtin::Context::InitializeBuiltins(IdentifierTable &Table,
+void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
                                           const LangOptions& LangOpts) {
   // Step #1: mark all target-independent builtins with their ID's.
   for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i)
-    if (BuiltinIsSupported(BuiltinInfo[i], LangOpts)) {
+    if (builtinIsSupported(BuiltinInfo[i], LangOpts)) {
       Table.get(BuiltinInfo[i].Name).setBuiltinID(i);
     }
 
   // Step #2: Register target-specific builtins.
-  for (unsigned i = 0, e = NumTSRecords; i != e; ++i)
-    if (BuiltinIsSupported(TSRecords[i], LangOpts))
-      Table.get(TSRecords[i].Name).setBuiltinID(i+Builtin::FirstTSBuiltin);
+  for (unsigned i = 0, e = TSRecords.size(); i != e; ++i)
+    if (builtinIsSupported(TSRecords[i], LangOpts))
+      Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin);
+
+  // Step #3: Register target-specific builtins for AuxTarget.
+  for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i)
+    Table.get(AuxTSRecords[i].Name)
+        .setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size());
 }
 
-void
-Builtin::Context::GetBuiltinNames(SmallVectorImpl<const char *> &Names) {
-  // Final all target-independent names
-  for (unsigned i = Builtin::NotBuiltin+1; i != Builtin::FirstTSBuiltin; ++i)
-    if (!strchr(BuiltinInfo[i].Attributes, 'f'))
-      Names.push_back(BuiltinInfo[i].Name);
-
-  // Find target-specific names.
-  for (unsigned i = 0, e = NumTSRecords; i != e; ++i)
-    if (!strchr(TSRecords[i].Attributes, 'f'))
-      Names.push_back(TSRecords[i].Name);
-}
-
-void Builtin::Context::ForgetBuiltin(unsigned ID, IdentifierTable &Table) {
-  Table.get(GetRecord(ID).Name).setBuiltinID(0);
+void Builtin::Context::forgetBuiltin(unsigned ID, IdentifierTable &Table) {
+  Table.get(getRecord(ID).Name).setBuiltinID(0);
 }
 
 bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
@@ -105,7 +97,7 @@ bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
   assert(::toupper(Fmt[0]) == Fmt[1] &&
          "Format string is not in the form \"xX\"");
 
-  const char *Like = ::strpbrk(GetRecord(ID).Attributes, Fmt);
+  const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt);
   if (!Like)
     return false;
 
diff --git a/lib/Basic/Diagnostic.cpp b/lib/Basic/Diagnostic.cpp
index f89caf7b248f..7cf7305827fe 100644
--- a/lib/Basic/Diagnostic.cpp
+++ b/lib/Basic/Diagnostic.cpp
@@ -226,12 +226,12 @@ void DiagnosticsEngine::setSeverity(diag::kind Diag, diag::Severity Map,
   // Update all diagnostic states that are active after the given location.
   for (DiagStatePointsTy::iterator
          I = Pos+1, E = DiagStatePoints.end(); I != E; ++I) {
-    GetCurDiagState()->setMapping(Diag, Mapping);
+    I->State->setMapping(Diag, Mapping);
   }
 
   // If the location corresponds to an existing point, just update its state.
   if (Pos->Loc == Loc) {
-    GetCurDiagState()->setMapping(Diag, Mapping);
+    Pos->State->setMapping(Diag, Mapping);
     return;
   }
 
@@ -240,7 +240,7 @@ void DiagnosticsEngine::setSeverity(diag::kind Diag, diag::Severity Map,
   assert(Pos->Loc.isBeforeInTranslationUnitThan(Loc));
   DiagStates.push_back(*Pos->State);
   DiagState *NewState = &DiagStates.back();
-  GetCurDiagState()->setMapping(Diag, Mapping);
+  NewState->setMapping(Diag, Mapping);
   DiagStatePoints.insert(Pos+1, DiagStatePoint(NewState,
                                                FullSourceLoc(Loc, *SourceMgr)));
 }
@@ -278,8 +278,8 @@ bool DiagnosticsEngine::setDiagnosticGroupWarningAsError(StringRef Group,
     return true;
 
   // Perform the mapping change.
-  for (unsigned i = 0, e = GroupDiags.size(); i != e; ++i) {
-    DiagnosticMapping &Info = GetCurDiagState()->getOrAddMapping(GroupDiags[i]);
+  for (diag::kind Diag : GroupDiags) {
+    DiagnosticMapping &Info = GetCurDiagState()->getOrAddMapping(Diag);
 
     if (Info.getSeverity() == diag::Severity::Error ||
         Info.getSeverity() == diag::Severity::Fatal)
@@ -309,8 +309,8 @@ bool DiagnosticsEngine::setDiagnosticGroupErrorAsFatal(StringRef Group,
     return true;
 
   // Perform the mapping change.
-  for (unsigned i = 0, e = GroupDiags.size(); i != e; ++i) {
-    DiagnosticMapping &Info = GetCurDiagState()->getOrAddMapping(GroupDiags[i]);
+  for (diag::kind Diag : GroupDiags) {
+    DiagnosticMapping &Info = GetCurDiagState()->getOrAddMapping(Diag);
 
     if (Info.getSeverity() == diag::Severity::Fatal)
       Info.setSeverity(diag::Severity::Error);
@@ -329,9 +329,9 @@ void DiagnosticsEngine::setSeverityForAll(diag::Flavor Flavor,
   Diags->getAllDiagnostics(Flavor, AllDiags);
 
   // Set the mapping.
-  for (unsigned i = 0, e = AllDiags.size(); i != e; ++i)
-    if (Diags->isBuiltinWarningOrExtension(AllDiags[i]))
-      setSeverity(AllDiags[i], Map, Loc);
+  for (diag::kind Diag : AllDiags)
+    if (Diags->isBuiltinWarningOrExtension(Diag))
+      setSeverity(Diag, Map, Loc);
 }
 
 void DiagnosticsEngine::Report(const StoredDiagnostic &storedDiag) {
@@ -945,8 +945,6 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
   OutStr.append(Tree.begin(), Tree.end());
 }
 
-StoredDiagnostic::StoredDiagnostic() { }
-
 StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID,
                                    StringRef Message)
   : ID(ID), Level(Level), Loc(), Message(Message) { }
@@ -975,8 +973,6 @@ StoredDiagnostic::StoredDiagnostic(DiagnosticsEngine::Level Level, unsigned ID,
 {
 }
 
-StoredDiagnostic::~StoredDiagnostic() { }
-
 /// IncludeInDiagnosticCounts - This method (whose default implementation
 ///  returns true) indicates whether the diagnostics handled by this
 ///  DiagnosticConsumer should be included in the number of diagnostics
diff --git a/lib/Basic/DiagnosticIDs.cpp b/lib/Basic/DiagnosticIDs.cpp
index 643503b00b91..a34c7fecb53b 100644
--- a/lib/Basic/DiagnosticIDs.cpp
+++ b/lib/Basic/DiagnosticIDs.cpp
@@ -100,14 +100,10 @@ static const StaticDiagInfoRec *GetDiagInfo(unsigned DiagID) {
 #ifndef NDEBUG
   static bool IsFirst = true; // So the check is only performed on first call.
   if (IsFirst) {
-    for (unsigned i = 1; i != StaticDiagInfoSize; ++i) {
-      assert(StaticDiagInfo[i-1].DiagID != StaticDiagInfo[i].DiagID &&
-             "Diag ID conflict, the enums at the start of clang::diag (in "
-             "DiagnosticIDs.h) probably need to be increased");
-
-      assert(StaticDiagInfo[i-1] < StaticDiagInfo[i] &&
-             "Improperly sorted diag info");
-    }
+    assert(std::is_sorted(std::begin(StaticDiagInfo),
+                          std::end(StaticDiagInfo)) &&
+           "Diag ID conflict, the enums at the start of clang::diag (in "
+           "DiagnosticIDs.h) probably need to be increased");
     IsFirst = false;
   }
 #endif
@@ -505,11 +501,6 @@ static const WarningOption OptionTable[] = {
 #include "clang/Basic/DiagnosticGroups.inc"
 #undef GET_DIAG_TABLE
 };
-static const size_t OptionTableSize = llvm::array_lengthof(OptionTable);
-
-static bool WarningOptionCompare(const WarningOption &LHS, StringRef RHS) {
-  return LHS.getName() < RHS;
-}
 
 /// getWarningOptionForDiag - Return the lowest-level warning option that
 /// enables the specified diagnostic.  If there is no -Wfoo flag that controls
@@ -553,10 +544,12 @@ static bool getDiagnosticsInGroup(diag::Flavor Flavor,
 bool
 DiagnosticIDs::getDiagnosticsInGroup(diag::Flavor Flavor, StringRef Group,
                                      SmallVectorImpl<diag::kind> &Diags) const {
-  const WarningOption *Found = std::lower_bound(
-      OptionTable, OptionTable + OptionTableSize, Group, WarningOptionCompare);
-  if (Found == OptionTable + OptionTableSize ||
-      Found->getName() != Group)
+  auto Found = std::lower_bound(std::begin(OptionTable), std::end(OptionTable),
+                                Group,
+                                [](const WarningOption &LHS, StringRef RHS) {
+                                  return LHS.getName() < RHS;
+                                });
+  if (Found == std::end(OptionTable) || Found->getName() != Group)
     return true; // Option not found.
 
   return ::getDiagnosticsInGroup(Flavor, Found, Diags);
@@ -573,19 +566,18 @@ StringRef DiagnosticIDs::getNearestOption(diag::Flavor Flavor,
                                           StringRef Group) {
   StringRef Best;
   unsigned BestDistance = Group.size() + 1; // Sanity threshold.
-  for (const WarningOption *i = OptionTable, *e = OptionTable + OptionTableSize;
-       i != e; ++i) {
+  for (const WarningOption &O : OptionTable) {
     // Don't suggest ignored warning flags.
-    if (!i->Members && !i->SubGroups)
+    if (!O.Members && !O.SubGroups)
       continue;
 
-    unsigned Distance = i->getName().edit_distance(Group, true, BestDistance);
+    unsigned Distance = O.getName().edit_distance(Group, true, BestDistance);
     if (Distance > BestDistance)
       continue;
 
     // Don't suggest groups that are not of this kind.
     llvm::SmallVector<diag::kind, 8> Diags;
-    if (::getDiagnosticsInGroup(Flavor, i, Diags) || Diags.empty())
+    if (::getDiagnosticsInGroup(Flavor, &O, Diags) || Diags.empty())
       continue;
 
     if (Distance == BestDistance) {
@@ -593,7 +585,7 @@ StringRef DiagnosticIDs::getNearestOption(diag::Flavor Flavor,
       Best = "";
     } else if (Distance < BestDistance) {
       // This is a better match.
-      Best = i->getName();
+      Best = O.getName();
       BestDistance = Distance;
     }
   }
diff --git a/lib/Basic/FileManager.cpp b/lib/Basic/FileManager.cpp
index d4927443aa9c..cb3f75c25a0b 100644
--- a/lib/Basic/FileManager.cpp
+++ b/lib/Basic/FileManager.cpp
@@ -22,6 +22,7 @@
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -58,12 +59,7 @@ FileManager::FileManager(const FileSystemOptions &FSO,
     this->FS = vfs::getRealFileSystem();
 }
 
-FileManager::~FileManager() {
-  for (unsigned i = 0, e = VirtualFileEntries.size(); i != e; ++i)
-    delete VirtualFileEntries[i];
-  for (unsigned i = 0, e = VirtualDirectoryEntries.size(); i != e; ++i)
-    delete VirtualDirectoryEntries[i];
-}
+FileManager::~FileManager() = default;
 
 void FileManager::addStatCache(std::unique_ptr<FileSystemStatCache> statCache,
                                bool AtBeginning) {
@@ -137,14 +133,14 @@ void FileManager::addAncestorsAsVirtualDirs(StringRef Path) {
   // at the same time.  Therefore, if DirName is already in the cache,
   // we don't need to recurse as its ancestors must also already be in
   // the cache.
-  if (NamedDirEnt.second)
+  if (NamedDirEnt.second && NamedDirEnt.second != NON_EXISTENT_DIR)
     return;
 
   // Add the virtual directory to the cache.
-  DirectoryEntry *UDE = new DirectoryEntry;
+  auto UDE = llvm::make_unique<DirectoryEntry>();
   UDE->Name = NamedDirEnt.first().data();
-  NamedDirEnt.second = UDE;
-  VirtualDirectoryEntries.push_back(UDE);
+  NamedDirEnt.second = UDE.get();
+  VirtualDirectoryEntries.push_back(std::move(UDE));
 
   // Recursively add the other ancestors.
   addAncestorsAsVirtualDirs(DirName);
@@ -375,8 +371,8 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
   }
 
   if (!UFE) {
-    UFE = new FileEntry();
-    VirtualFileEntries.push_back(UFE);
+    VirtualFileEntries.push_back(llvm::make_unique<FileEntry>());
+    UFE = VirtualFileEntries.back().get();
     NamedFileEnt.second = UFE;
   }
 
@@ -389,16 +385,28 @@ FileManager::getVirtualFile(StringRef Filename, off_t Size,
   return UFE;
 }
 
-void FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
+bool FileManager::FixupRelativePath(SmallVectorImpl<char> &path) const {
   StringRef pathRef(path.data(), path.size());
 
   if (FileSystemOpts.WorkingDir.empty() 
       || llvm::sys::path::is_absolute(pathRef))
-    return;
+    return false;
 
   SmallString<128> NewPath(FileSystemOpts.WorkingDir);
   llvm::sys::path::append(NewPath, pathRef);
   path = NewPath;
+  return true;
+}
+
+bool FileManager::makeAbsolutePath(SmallVectorImpl<char> &Path) const {
+  bool Changed = FixupRelativePath(Path);
+
+  if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) {
+    llvm::sys::fs::make_absolute(Path);
+    Changed = true;
+  }
+
+  return Changed;
 }
 
 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
@@ -501,11 +509,9 @@ void FileManager::GetUniqueIDMapping(
       UIDToFiles[FE->getValue()->getUID()] = FE->getValue();
   
   // Map virtual file entries
-  for (SmallVectorImpl<FileEntry *>::const_iterator
-         VFE = VirtualFileEntries.begin(), VFEEnd = VirtualFileEntries.end();
-       VFE != VFEEnd; ++VFE)
-    if (*VFE && *VFE != NON_EXISTENT_FILE)
-      UIDToFiles[(*VFE)->getUID()] = *VFE;
+  for (const auto &VFE : VirtualFileEntries)
+    if (VFE && VFE.get() != NON_EXISTENT_FILE)
+      UIDToFiles[VFE->getUID()] = VFE.get();
 }
 
 void FileManager::modifyFileEntry(FileEntry *File,
@@ -514,37 +520,6 @@ void FileManager::modifyFileEntry(FileEntry *File,
   File->ModTime = ModificationTime;
 }
 
-/// Remove '.' path components from the given absolute path.
-/// \return \c true if any changes were made.
-// FIXME: Move this to llvm::sys::path.
-bool FileManager::removeDotPaths(SmallVectorImpl<char> &Path) {
-  using namespace llvm::sys;
-
-  SmallVector<StringRef, 16> ComponentStack;
-  StringRef P(Path.data(), Path.size());
-
-  // Skip the root path, then look for traversal in the components.
-  StringRef Rel = path::relative_path(P);
-  bool AnyDots = false;
-  for (StringRef C : llvm::make_range(path::begin(Rel), path::end(Rel))) {
-    if (C == ".") {
-      AnyDots = true;
-      continue;
-    }
-    ComponentStack.push_back(C);
-  }
-
-  if (!AnyDots)
-    return false;
-
-  SmallString<256> Buffer = path::root_path(P);
-  for (StringRef C : ComponentStack)
-    path::append(Buffer, C);
-
-  Path.swap(Buffer);
-  return true;
-}
-
 StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
   // FIXME: use llvm::sys::fs::canonical() when it gets implemented
   llvm::DenseMap<const DirectoryEntry *, llvm::StringRef>::iterator Known
@@ -556,17 +531,20 @@ StringRef FileManager::getCanonicalName(const DirectoryEntry *Dir) {
 
 #ifdef LLVM_ON_UNIX
   char CanonicalNameBuf[PATH_MAX];
-  if (realpath(Dir->getName(), CanonicalNameBuf)) {
-    unsigned Len = strlen(CanonicalNameBuf);
-    char *Mem = static_cast<char *>(CanonicalNameStorage.Allocate(Len, 1));
-    memcpy(Mem, CanonicalNameBuf, Len);
-    CanonicalName = StringRef(Mem, Len);
-  }
+  if (realpath(Dir->getName(), CanonicalNameBuf))
+    CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
 #else
   SmallString<256> CanonicalNameBuf(CanonicalName);
   llvm::sys::fs::make_absolute(CanonicalNameBuf);
   llvm::sys::path::native(CanonicalNameBuf);
-  removeDotPaths(CanonicalNameBuf);
+  // We've run into needing to remove '..' here in the wild though, so
+  // remove it.
+  // On Windows, symlinks are significantly less prevalent, so removing
+  // '..' is pretty safe.
+  // Ideally we'd have an equivalent of `realpath` and could implement
+  // sys::fs::canonical across all the platforms.
+  llvm::sys::path::remove_dots(CanonicalNameBuf, /* remove_dot_dot */ true);
+  CanonicalName = StringRef(CanonicalNameBuf).copy(CanonicalNameStorage);
 #endif
 
   CanonicalDirNames.insert(std::make_pair(Dir, CanonicalName));
diff --git a/lib/Basic/IdentifierTable.cpp b/lib/Basic/IdentifierTable.cpp
index 7705834d91a0..67de1cb6fdaa 100644
--- a/lib/Basic/IdentifierTable.cpp
+++ b/lib/Basic/IdentifierTable.cpp
@@ -111,7 +111,8 @@ namespace {
     KEYCONCEPTS = 0x10000,
     KEYOBJC2    = 0x20000,
     KEYZVECTOR  = 0x40000,
-    KEYALL = (0x7ffff & ~KEYNOMS18 &
+    KEYCOROUTINES = 0x80000,
+    KEYALL = (0xfffff & ~KEYNOMS18 &
               ~KEYNOOPENCL) // KEYNOMS18 and KEYNOOPENCL are used to exclude.
   };
 
@@ -147,6 +148,7 @@ static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
   if (LangOpts.ObjC2 && (Flags & KEYARC)) return KS_Enabled;
   if (LangOpts.ConceptsTS && (Flags & KEYCONCEPTS)) return KS_Enabled;
   if (LangOpts.ObjC2 && (Flags & KEYOBJC2)) return KS_Enabled;
+  if (LangOpts.Coroutines && (Flags & KEYCOROUTINES)) return KS_Enabled;
   if (LangOpts.CPlusPlus && (Flags & KEYCXX11)) return KS_Future;
   return KS_Disabled;
 }
@@ -220,10 +222,7 @@ void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
     AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
                LangOpts, *this);
 
-  // FIXME: __declspec isn't really a CUDA extension, however it is required for
-  // supporting cuda_builtin_vars.h, which uses __declspec(property). Once that
-  // has been rewritten in terms of something more generic, remove this code.
-  if (LangOpts.CUDA)
+  if (LangOpts.DeclSpecKeyword)
     AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
 }
 
diff --git a/lib/Basic/Module.cpp b/lib/Basic/Module.cpp
index 4314b41eb340..0b7832636943 100644
--- a/lib/Basic/Module.cpp
+++ b/lib/Basic/Module.cpp
@@ -28,11 +28,12 @@ Module::Module(StringRef Name, SourceLocation DefinitionLoc, Module *Parent,
                bool IsFramework, bool IsExplicit, unsigned VisibilityID)
     : Name(Name), DefinitionLoc(DefinitionLoc), Parent(Parent), Directory(),
       Umbrella(), Signature(0), ASTFile(nullptr), VisibilityID(VisibilityID),
-      IsMissingRequirement(false), IsAvailable(true), IsFromModuleFile(false),
-      IsFramework(IsFramework), IsExplicit(IsExplicit), IsSystem(false),
-      IsExternC(false), IsInferred(false), InferSubmodules(false),
-      InferExplicitSubmodules(false), InferExportWildcard(false),
-      ConfigMacrosExhaustive(false), NameVisibility(Hidden) {
+      IsMissingRequirement(false), HasIncompatibleModuleFile(false),
+      IsAvailable(true), IsFromModuleFile(false), IsFramework(IsFramework),
+      IsExplicit(IsExplicit), IsSystem(false), IsExternC(false),
+      IsInferred(false), InferSubmodules(false), InferExplicitSubmodules(false),
+      InferExportWildcard(false), ConfigMacrosExhaustive(false),
+      NameVisibility(Hidden) {
   if (Parent) {
     if (!Parent->isAvailable())
       IsAvailable = false;
@@ -139,6 +140,15 @@ std::string Module::getFullModuleName() const {
   return Result;
 }
 
+bool Module::fullModuleNameIs(ArrayRef<StringRef> nameParts) const {
+  for (const Module *M = this; M; M = M->Parent) {
+    if (nameParts.empty() || M->Name != nameParts.back())
+      return false;
+    nameParts = nameParts.drop_back();
+  }
+  return nameParts.empty();
+}
+
 Module::DirectoryName Module::getUmbrellaDir() const {
   if (Header U = getUmbrellaHeader())
     return {"", U.Entry->getDir()};
diff --git a/lib/Basic/ObjCRuntime.cpp b/lib/Basic/ObjCRuntime.cpp
index be50fc4fe24f..133c66945dde 100644
--- a/lib/Basic/ObjCRuntime.cpp
+++ b/lib/Basic/ObjCRuntime.cpp
@@ -30,6 +30,7 @@ raw_ostream &clang::operator<<(raw_ostream &out, const ObjCRuntime &value) {
   case ObjCRuntime::MacOSX: out << "macosx"; break;
   case ObjCRuntime::FragileMacOSX: out << "macosx-fragile"; break;
   case ObjCRuntime::iOS: out << "ios"; break;
+  case ObjCRuntime::WatchOS: out << "watchos"; break;
   case ObjCRuntime::GNUstep: out << "gnustep"; break;
   case ObjCRuntime::GCC: out << "gcc"; break;
   case ObjCRuntime::ObjFW: out << "objfw"; break;
@@ -62,6 +63,8 @@ bool ObjCRuntime::tryParse(StringRef input) {
     kind = ObjCRuntime::FragileMacOSX;
   } else if (runtimeName == "ios") {
     kind = ObjCRuntime::iOS;
+  } else if (runtimeName == "watchos") {
+    kind = ObjCRuntime::WatchOS;
   } else if (runtimeName == "gnustep") {
     // If no version is specified then default to the most recent one that we
     // know about.
diff --git a/lib/Basic/OpenMPKinds.cpp b/lib/Basic/OpenMPKinds.cpp
index b7407f60e6d1..577132dc1442 100644
--- a/lib/Basic/OpenMPKinds.cpp
+++ b/lib/Basic/OpenMPKinds.cpp
@@ -87,8 +87,11 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
 #include "clang/Basic/OpenMPKinds.def"
         .Default(OMPC_PROC_BIND_unknown);
   case OMPC_schedule:
-    return llvm::StringSwitch<OpenMPScheduleClauseKind>(Str)
-#define OPENMP_SCHEDULE_KIND(Name) .Case(#Name, OMPC_SCHEDULE_##Name)
+    return llvm::StringSwitch<unsigned>(Str)
+#define OPENMP_SCHEDULE_KIND(Name)                                             \
+  .Case(#Name, static_cast<unsigned>(OMPC_SCHEDULE_##Name))
+#define OPENMP_SCHEDULE_MODIFIER(Name)                                         \
+  .Case(#Name, static_cast<unsigned>(OMPC_SCHEDULE_MODIFIER_##Name))
 #include "clang/Basic/OpenMPKinds.def"
         .Default(OMPC_SCHEDULE_unknown);
   case OMPC_depend:
@@ -96,19 +99,29 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
 #define OPENMP_DEPEND_KIND(Name) .Case(#Name, OMPC_DEPEND_##Name)
 #include "clang/Basic/OpenMPKinds.def"
         .Default(OMPC_DEPEND_unknown);
+  case OMPC_linear:
+    return llvm::StringSwitch<OpenMPLinearClauseKind>(Str)
+#define OPENMP_LINEAR_KIND(Name) .Case(#Name, OMPC_LINEAR_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+        .Default(OMPC_LINEAR_unknown);
+  case OMPC_map:
+    return llvm::StringSwitch<OpenMPMapClauseKind>(Str)
+#define OPENMP_MAP_KIND(Name) .Case(#Name, OMPC_MAP_##Name)
+#include "clang/Basic/OpenMPKinds.def"
+        .Default(OMPC_MAP_unknown);
   case OMPC_unknown:
   case OMPC_threadprivate:
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_private:
   case OMPC_firstprivate:
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
-  case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
   case OMPC_copyprivate:
@@ -122,6 +135,16 @@ unsigned clang::getOpenMPSimpleClauseType(OpenMPClauseKind Kind,
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
     break;
   }
   llvm_unreachable("Invalid OpenMP simple clause kind");
@@ -153,12 +176,17 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_schedule:
     switch (Type) {
     case OMPC_SCHEDULE_unknown:
+    case OMPC_SCHEDULE_MODIFIER_last:
       return "unknown";
 #define OPENMP_SCHEDULE_KIND(Name)                                             \
-  case OMPC_SCHEDULE_##Name:                                                   \
-    return #Name;
+    case OMPC_SCHEDULE_##Name:                                                 \
+      return #Name;
+#define OPENMP_SCHEDULE_MODIFIER(Name)                                         \
+    case OMPC_SCHEDULE_MODIFIER_##Name:                                        \
+      return #Name;
 #include "clang/Basic/OpenMPKinds.def"
     }
+    llvm_unreachable("Invalid OpenMP 'schedule' clause type");
   case OMPC_depend:
     switch (Type) {
     case OMPC_DEPEND_unknown:
@@ -168,20 +196,42 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
     return #Name;
 #include "clang/Basic/OpenMPKinds.def"
     }
-    llvm_unreachable("Invalid OpenMP 'schedule' clause type");
+    llvm_unreachable("Invalid OpenMP 'depend' clause type");
+  case OMPC_linear:
+    switch (Type) {
+    case OMPC_LINEAR_unknown:
+      return "unknown";
+#define OPENMP_LINEAR_KIND(Name)                                             \
+  case OMPC_LINEAR_##Name:                                                   \
+    return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+    }
+    llvm_unreachable("Invalid OpenMP 'linear' clause type");
+  case OMPC_map:
+    switch (Type) {
+    case OMPC_MAP_unknown:
+      return "unknown";
+#define OPENMP_MAP_KIND(Name)                                                \
+  case OMPC_MAP_##Name:                                                      \
+    return #Name;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    llvm_unreachable("Invalid OpenMP 'map' clause type");
   case OMPC_unknown:
   case OMPC_threadprivate:
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_private:
   case OMPC_firstprivate:
   case OMPC_lastprivate:
   case OMPC_shared:
   case OMPC_reduction:
-  case OMPC_linear:
   case OMPC_aligned:
   case OMPC_copyin:
   case OMPC_copyprivate:
@@ -195,6 +245,16 @@ const char *clang::getOpenMPSimpleClauseTypeName(OpenMPClauseKind Kind,
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
     break;
   }
   llvm_unreachable("Invalid OpenMP simple clause kind");
@@ -323,6 +383,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_TARGET_CLAUSE(Name)                                             \
   case OMPC_##Name:                                                            \
     return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_target_data:
+    switch (CKind) {
+#define OPENMP_TARGET_DATA_CLAUSE(Name)                                        \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
     default:
       break;
@@ -333,6 +403,66 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_TEAMS_CLAUSE(Name)                                              \
   case OMPC_##Name:                                                            \
     return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_cancel:
+    switch (CKind) {
+#define OPENMP_CANCEL_CLAUSE(Name)                                             \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_ordered:
+    switch (CKind) {
+#define OPENMP_ORDERED_CLAUSE(Name)                                            \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_taskloop:
+    switch (CKind) {
+#define OPENMP_TASKLOOP_CLAUSE(Name)                                           \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_taskloop_simd:
+    switch (CKind) {
+#define OPENMP_TASKLOOP_SIMD_CLAUSE(Name)                                      \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_critical:
+    switch (CKind) {
+#define OPENMP_CRITICAL_CLAUSE(Name)                                           \
+  case OMPC_##Name:                                                            \
+    return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_distribute:
+    switch (CKind) {
+#define OPENMP_DISTRIBUTE_CLAUSE(Name)                                         \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
     default:
       break;
@@ -342,14 +472,11 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
   case OMPD_threadprivate:
   case OMPD_section:
   case OMPD_master:
-  case OMPD_critical:
   case OMPD_taskyield:
   case OMPD_barrier:
   case OMPD_taskwait:
   case OMPD_taskgroup:
   case OMPD_cancellation_point:
-  case OMPD_cancel:
-  case OMPD_ordered:
     break;
   }
   return false;
@@ -357,8 +484,10 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 
 bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for || DKind == OMPD_for_simd ||
-         DKind == OMPD_parallel_for ||
-         DKind == OMPD_parallel_for_simd; // TODO add next directives.
+         DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd ||
+         DKind == OMPD_taskloop ||
+         DKind == OMPD_taskloop_simd ||
+         DKind == OMPD_distribute; // TODO add next directives.
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -369,19 +498,32 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_parallel_sections; // TODO add next directives.
 }
 
+bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd;
+}
+
 bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_parallel || DKind == OMPD_parallel_for ||
          DKind == OMPD_parallel_for_simd ||
          DKind == OMPD_parallel_sections; // TODO add next directives.
 }
 
+bool clang::isOpenMPTargetDirective(OpenMPDirectiveKind DKind) {
+  return DKind == OMPD_target; // TODO add next directives.
+}
+
 bool clang::isOpenMPTeamsDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_teams; // TODO add next directives.
 }
 
 bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for_simd ||
-         DKind == OMPD_parallel_for_simd; // TODO add next directives.
+         DKind == OMPD_parallel_for_simd ||
+         DKind == OMPD_taskloop_simd; // TODO add next directives.
+}
+
+bool clang::isOpenMPDistributeDirective(OpenMPDirectiveKind Kind) {
+  return Kind == OMPD_distribute; // TODO add next directives.
 }
 
 bool clang::isOpenMPPrivate(OpenMPClauseKind Kind) {
diff --git a/lib/Basic/SanitizerBlacklist.cpp b/lib/Basic/SanitizerBlacklist.cpp
index 095fcd6ccaeb..de78c94bc195 100644
--- a/lib/Basic/SanitizerBlacklist.cpp
+++ b/lib/Basic/SanitizerBlacklist.cpp
@@ -40,7 +40,7 @@ bool SanitizerBlacklist::isBlacklistedFile(StringRef FileName,
 
 bool SanitizerBlacklist::isBlacklistedLocation(SourceLocation Loc,
                                                StringRef Category) const {
-  return !Loc.isInvalid() &&
+  return Loc.isValid() &&
          isBlacklistedFile(SM.getFilename(SM.getFileLoc(Loc)), Category);
 }
 
diff --git a/lib/Basic/SourceManager.cpp b/lib/Basic/SourceManager.cpp
index c0b045331dd6..4c501616a3e8 100644
--- a/lib/Basic/SourceManager.cpp
+++ b/lib/Basic/SourceManager.cpp
@@ -279,9 +279,7 @@ void LineTableInfo::AddEntry(FileID FID,
 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
 ///
 unsigned SourceManager::getLineTableFilenameID(StringRef Name) {
-  if (!LineTable)
-    LineTable = new LineTableInfo();
-  return LineTable->getLineTableFilenameID(Name);
+  return getLineTable().getLineTableFilenameID(Name);
 }
 
 
@@ -302,9 +300,7 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
   // Remember that this file has #line directives now if it doesn't already.
   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
 
-  if (!LineTable)
-    LineTable = new LineTableInfo();
-  LineTable->AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID);
+  getLineTable().AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID);
 }
 
 /// AddLineNote - Add a GNU line marker to the line table.
@@ -332,8 +328,7 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
   // Remember that this file has #line directives now if it doesn't already.
   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
 
-  if (!LineTable)
-    LineTable = new LineTableInfo();
+  (void) getLineTable();
 
   SrcMgr::CharacteristicKind FileKind;
   if (IsExternCHeader)
@@ -366,7 +361,7 @@ LineTableInfo &SourceManager::getLineTable() {
 SourceManager::SourceManager(DiagnosticsEngine &Diag, FileManager &FileMgr,
                              bool UserFilesAreVolatile)
   : Diag(Diag), FileMgr(FileMgr), OverridenFilesKeepOriginalName(true),
-    UserFilesAreVolatile(UserFilesAreVolatile),
+    UserFilesAreVolatile(UserFilesAreVolatile), FilesAreTransient(false),
     ExternalSLocEntries(nullptr), LineTable(nullptr), NumLinearScans(0),
     NumBinaryProbes(0) {
   clearIDTables();
@@ -444,6 +439,7 @@ SourceManager::getOrCreateContentCache(const FileEntry *FileEnt,
   }
 
   Entry->IsSystemFile = isSystemFile;
+  Entry->IsTransient = FilesAreTransient;
 
   return Entry;
 }
@@ -484,10 +480,12 @@ std::pair<int, unsigned>
 SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
                                          unsigned TotalSize) {
   assert(ExternalSLocEntries && "Don't have an external sloc source");
+  // Make sure we're not about to run out of source locations.
+  if (CurrentLoadedOffset - TotalSize < NextLocalOffset)
+    return std::make_pair(0, 0);
   LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
   SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
   CurrentLoadedOffset -= TotalSize;
-  assert(CurrentLoadedOffset >= NextLocalOffset && "Out of source locations");
   int ID = LoadedSLocEntryTable.size();
   return std::make_pair(-ID - 1, CurrentLoadedOffset);
 }
@@ -676,6 +674,11 @@ void SourceManager::disableFileContentsOverride(const FileEntry *File) {
   OverriddenFilesInfo->OverriddenFilesWithBuffer.erase(File);
 }
 
+void SourceManager::setFileIsTransient(const FileEntry *File) {
+  const SrcMgr::ContentCache *CC = getOrCreateContentCache(File);
+  const_cast<SrcMgr::ContentCache *>(CC)->IsTransient = true;
+}
+
 StringRef SourceManager::getBufferData(FileID FID, bool *Invalid) const {
   bool MyInvalid = false;
   const SLocEntry &SLoc = getSLocEntry(FID, &MyInvalid);
@@ -995,12 +998,17 @@ SourceManager::getExpansionRange(SourceLocation Loc) const {
   return Res;
 }
 
-bool SourceManager::isMacroArgExpansion(SourceLocation Loc) const {
+bool SourceManager::isMacroArgExpansion(SourceLocation Loc,
+                                        SourceLocation *StartLoc) const {
   if (!Loc.isMacroID()) return false;
 
   FileID FID = getFileID(Loc);
   const SrcMgr::ExpansionInfo &Expansion = getSLocEntry(FID).getExpansion();
-  return Expansion.isMacroArgExpansion();
+  if (!Expansion.isMacroArgExpansion()) return false;
+
+  if (StartLoc)
+    *StartLoc = Expansion.getExpansionLocStart();
+  return true;
 }
 
 bool SourceManager::isMacroBodyExpansion(SourceLocation Loc) const {
@@ -1394,7 +1402,7 @@ unsigned SourceManager::getPresumedLineNumber(SourceLocation Loc,
 /// considered to be from a system header.
 SrcMgr::CharacteristicKind
 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
-  assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
+  assert(Loc.isValid() && "Can't get file characteristic of invalid loc!");
   std::pair<FileID, unsigned> LocInfo = getDecomposedExpansionLoc(Loc);
   bool Invalid = false;
   const SLocEntry &SEntry = getSLocEntry(LocInfo.first, &Invalid);
@@ -1599,7 +1607,7 @@ FileID SourceManager::translateFile(const FileEntry *SourceFile) const {
   // location in the main file.
   Optional<llvm::sys::fs::UniqueID> SourceFileUID;
   Optional<StringRef> SourceFileName;
-  if (!MainFileID.isInvalid()) {
+  if (MainFileID.isValid()) {
     bool Invalid = false;
     const SLocEntry &MainSLoc = getSLocEntry(MainFileID, &Invalid);
     if (Invalid)
@@ -1709,7 +1717,7 @@ SourceLocation SourceManager::translateLineCol(FileID FID,
                                                unsigned Col) const {
   // Lines are used as a one-based index into a zero-based array. This assert
   // checks for possible buffer underruns.
-  assert(Line != 0 && "Passed a zero-based line");
+  assert(Line && Col && "Line and column should start from 1!");
 
   if (FID.isInvalid())
     return SourceLocation();
@@ -1772,7 +1780,7 @@ SourceLocation SourceManager::translateLineCol(FileID FID,
 ///     110 -> SourceLocation()
 void SourceManager::computeMacroArgsCache(MacroArgsMap *&CachePtr,
                                           FileID FID) const {
-  assert(!FID.isInvalid());
+  assert(FID.isValid());
   assert(!CachePtr);
 
   CachePtr = new MacroArgsMap();
@@ -2133,6 +2141,63 @@ void SourceManager::PrintStats() const {
                << NumBinaryProbes << " binary.\n";
 }
 
+LLVM_DUMP_METHOD void SourceManager::dump() const {
+  llvm::raw_ostream &out = llvm::errs();
+
+  auto DumpSLocEntry = [&](int ID, const SrcMgr::SLocEntry &Entry,
+                           llvm::Optional<unsigned> NextStart) {
+    out << "SLocEntry <FileID " << ID << "> " << (Entry.isFile() ? "file" : "expansion")
+        << " <SourceLocation " << Entry.getOffset() << ":";
+    if (NextStart)
+      out << *NextStart << ">\n";
+    else
+      out << "???\?>\n";
+    if (Entry.isFile()) {
+      auto &FI = Entry.getFile();
+      if (FI.NumCreatedFIDs)
+        out << "  covers <FileID " << ID << ":" << int(ID + FI.NumCreatedFIDs)
+            << ">\n";
+      if (FI.getIncludeLoc().isValid())
+        out << "  included from " << FI.getIncludeLoc().getOffset() << "\n";
+      if (auto *CC = FI.getContentCache()) {
+        out << "  for " << (CC->OrigEntry ? CC->OrigEntry->getName() : "<none>")
+            << "\n";
+        if (CC->BufferOverridden)
+          out << "  contents overridden\n";
+        if (CC->ContentsEntry != CC->OrigEntry) {
+          out << "  contents from "
+              << (CC->ContentsEntry ? CC->ContentsEntry->getName() : "<none>")
+              << "\n";
+        }
+      }
+    } else {
+      auto &EI = Entry.getExpansion();
+      out << "  spelling from " << EI.getSpellingLoc().getOffset() << "\n";
+      out << "  macro " << (EI.isMacroArgExpansion() ? "arg" : "body")
+          << " range <" << EI.getExpansionLocStart().getOffset() << ":"
+          << EI.getExpansionLocEnd().getOffset() << ">\n";
+    }
+  };
+
+  // Dump local SLocEntries.
+  for (unsigned ID = 0, NumIDs = LocalSLocEntryTable.size(); ID != NumIDs; ++ID) {
+    DumpSLocEntry(ID, LocalSLocEntryTable[ID],
+                  ID == NumIDs - 1 ? NextLocalOffset
+                                   : LocalSLocEntryTable[ID + 1].getOffset());
+  }
+  // Dump loaded SLocEntries.
+  llvm::Optional<unsigned> NextStart;
+  for (unsigned Index = 0; Index != LoadedSLocEntryTable.size(); ++Index) {
+    int ID = -(int)Index - 2;
+    if (SLocEntryLoaded[Index]) {
+      DumpSLocEntry(ID, LoadedSLocEntryTable[Index], NextStart);
+      NextStart = LoadedSLocEntryTable[Index].getOffset();
+    } else {
+      NextStart = None;
+    }
+  }
+}
+
 ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
 
 /// Return the amount of memory used by memory buffers, breaking down
diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp
index dbd2f9ae9954..1648a27d8b37 100644
--- a/lib/Basic/TargetInfo.cpp
+++ b/lib/Basic/TargetInfo.cpp
@@ -71,12 +71,13 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : TargetOpts(), Triple(T) {
   FloatFormat = &llvm::APFloat::IEEEsingle;
   DoubleFormat = &llvm::APFloat::IEEEdouble;
   LongDoubleFormat = &llvm::APFloat::IEEEdouble;
-  DescriptionString = nullptr;
+  DataLayoutString = nullptr;
   UserLabelPrefix = "_";
   MCountName = "mcount";
   RegParmMax = 0;
   SSERegParmMax = 0;
   HasAlignMac68kSupport = false;
+  HasBuiltinMSVaList = false;
 
   // Default to no types using fpret.
   RealTypeUsesObjCFPRet = 0;
@@ -286,9 +287,9 @@ void TargetInfo::adjust(const LangOptions &Opts) {
     LongLongWidth = LongLongAlign = 128;
     HalfWidth = HalfAlign = 16;
     FloatWidth = FloatAlign = 32;
-    
-    // Embedded 32-bit targets (OpenCL EP) might have double C type 
-    // defined as float. Let's not override this as it might lead 
+
+    // Embedded 32-bit targets (OpenCL EP) might have double C type
+    // defined as float. Let's not override this as it might lead
     // to generating illegal code that uses 64bit doubles.
     if (DoubleWidth != FloatWidth) {
       DoubleWidth = DoubleAlign = 64;
@@ -311,6 +312,18 @@ void TargetInfo::adjust(const LangOptions &Opts) {
   }
 }
 
+bool TargetInfo::initFeatureMap(
+    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
+    const std::vector<std::string> &FeatureVec) const {
+  for (const auto &F : FeatureVec) {
+    StringRef Name = F;
+    // Apply the feature via the target.
+    bool Enabled = Name[0] == '+';
+    setFeatureEnabled(Features, Name.substr(1), Enabled);
+  }
+  return true;
+}
+
 //===----------------------------------------------------------------------===//
 
 
@@ -326,7 +339,7 @@ static StringRef removeGCCRegisterPrefix(StringRef Name) {
 /// Sema.
 bool TargetInfo::isValidClobber(StringRef Name) const {
   return (isValidGCCRegisterName(Name) ||
-	  Name == "memory" || Name == "cc");
+          Name == "memory" || Name == "cc");
 }
 
 /// isValidGCCRegisterName - Returns whether the passed in string
@@ -336,56 +349,43 @@ bool TargetInfo::isValidGCCRegisterName(StringRef Name) const {
   if (Name.empty())
     return false;
 
-  const char * const *Names;
-  unsigned NumNames;
-
   // Get rid of any register prefix.
   Name = removeGCCRegisterPrefix(Name);
   if (Name.empty())
-      return false;
+    return false;
 
-  getGCCRegNames(Names, NumNames);
+  ArrayRef<const char *> Names = getGCCRegNames();
 
   // If we have a number it maps to an entry in the register name array.
   if (isDigit(Name[0])) {
-    int n;
+    unsigned n;
     if (!Name.getAsInteger(0, n))
-      return n >= 0 && (unsigned)n < NumNames;
+      return n < Names.size();
   }
 
   // Check register names.
-  for (unsigned i = 0; i < NumNames; i++) {
-    if (Name == Names[i])
-      return true;
-  }
+  if (std::find(Names.begin(), Names.end(), Name) != Names.end())
+    return true;
 
   // Check any additional names that we have.
-  const AddlRegName *AddlNames;
-  unsigned NumAddlNames;
-  getGCCAddlRegNames(AddlNames, NumAddlNames);
-  for (unsigned i = 0; i < NumAddlNames; i++)
-    for (unsigned j = 0; j < llvm::array_lengthof(AddlNames[i].Names); j++) {
-      if (!AddlNames[i].Names[j])
-	break;
+  for (const AddlRegName &ARN : getGCCAddlRegNames())
+    for (const char *AN : ARN.Names) {
+      if (!AN)
+        break;
       // Make sure the register that the additional name is for is within
       // the bounds of the register names from above.
-      if (AddlNames[i].Names[j] == Name && AddlNames[i].RegNum < NumNames)
-	return true;
-  }
-
-  // Now check aliases.
-  const GCCRegAlias *Aliases;
-  unsigned NumAliases;
-
-  getGCCRegAliases(Aliases, NumAliases);
-  for (unsigned i = 0; i < NumAliases; i++) {
-    for (unsigned j = 0 ; j < llvm::array_lengthof(Aliases[i].Aliases); j++) {
-      if (!Aliases[i].Aliases[j])
-        break;
-      if (Aliases[i].Aliases[j] == Name)
+      if (AN == Name && ARN.RegNum < Names.size())
+        return true;
+    }
+
+  // Now check aliases.
+  for (const GCCRegAlias &GRA : getGCCRegAliases())
+    for (const char *A : GRA.Aliases) {
+      if (!A)
+        break;
+      if (A == Name)
         return true;
     }
-  }
 
   return false;
 }
@@ -397,48 +397,36 @@ TargetInfo::getNormalizedGCCRegisterName(StringRef Name) const {
   // Get rid of any register prefix.
   Name = removeGCCRegisterPrefix(Name);
 
-  const char * const *Names;
-  unsigned NumNames;
-
-  getGCCRegNames(Names, NumNames);
+  ArrayRef<const char *> Names = getGCCRegNames();
 
   // First, check if we have a number.
   if (isDigit(Name[0])) {
-    int n;
+    unsigned n;
     if (!Name.getAsInteger(0, n)) {
-      assert(n >= 0 && (unsigned)n < NumNames &&
-             "Out of bounds register number!");
+      assert(n < Names.size() && "Out of bounds register number!");
       return Names[n];
     }
   }
 
   // Check any additional names that we have.
-  const AddlRegName *AddlNames;
-  unsigned NumAddlNames;
-  getGCCAddlRegNames(AddlNames, NumAddlNames);
-  for (unsigned i = 0; i < NumAddlNames; i++)
-    for (unsigned j = 0; j < llvm::array_lengthof(AddlNames[i].Names); j++) {
-      if (!AddlNames[i].Names[j])
-	break;
+  for (const AddlRegName &ARN : getGCCAddlRegNames())
+    for (const char *AN : ARN.Names) {
+      if (!AN)
+        break;
       // Make sure the register that the additional name is for is within
       // the bounds of the register names from above.
-      if (AddlNames[i].Names[j] == Name && AddlNames[i].RegNum < NumNames)
-	return Name;
+      if (AN == Name && ARN.RegNum < Names.size())
+        return Name;
     }
 
   // Now check aliases.
-  const GCCRegAlias *Aliases;
-  unsigned NumAliases;
-
-  getGCCRegAliases(Aliases, NumAliases);
-  for (unsigned i = 0; i < NumAliases; i++) {
-    for (unsigned j = 0 ; j < llvm::array_lengthof(Aliases[i].Aliases); j++) {
-      if (!Aliases[i].Aliases[j])
+  for (const GCCRegAlias &RA : getGCCRegAliases())
+    for (const char *A : RA.Aliases) {
+      if (!A)
         break;
-      if (Aliases[i].Aliases[j] == Name)
-        return Aliases[i].Register;
+      if (A == Name)
+        return RA.Register;
     }
-  }
 
   return Name;
 }
@@ -513,8 +501,7 @@ bool TargetInfo::validateOutputConstraint(ConstraintInfo &Info) const {
 }
 
 bool TargetInfo::resolveSymbolicName(const char *&Name,
-                                     ConstraintInfo *OutputConstraints,
-                                     unsigned NumOutputs,
+                                     ArrayRef<ConstraintInfo> OutputConstraints,
                                      unsigned &Index) const {
   assert(*Name == '[' && "Symbolic name did not start with '['");
   Name++;
@@ -529,16 +516,16 @@ bool TargetInfo::resolveSymbolicName(const char *&Name,
 
   std::string SymbolicName(Start, Name - Start);
 
-  for (Index = 0; Index != NumOutputs; ++Index)
+  for (Index = 0; Index != OutputConstraints.size(); ++Index)
     if (SymbolicName == OutputConstraints[Index].getName())
       return true;
 
   return false;
 }
 
-bool TargetInfo::validateInputConstraint(ConstraintInfo *OutputConstraints,
-                                         unsigned NumOutputs,
-                                         ConstraintInfo &Info) const {
+bool TargetInfo::validateInputConstraint(
+                              MutableArrayRef<ConstraintInfo> OutputConstraints,
+                              ConstraintInfo &Info) const {
   const char *Name = Info.ConstraintStr.c_str();
 
   if (!*Name)
@@ -559,13 +546,13 @@ bool TargetInfo::validateInputConstraint(ConstraintInfo *OutputConstraints,
           return false;
 
         // Check if matching constraint is out of bounds.
-        if (i >= NumOutputs) return false;
+        if (i >= OutputConstraints.size()) return false;
 
         // A number must refer to an output only operand.
         if (OutputConstraints[i].isReadWrite())
           return false;
 
-        // If the constraint is already tied, it must be tied to the 
+        // If the constraint is already tied, it must be tied to the
         // same operand referenced to by the number.
         if (Info.hasTiedOperand() && Info.getTiedOperand() != i)
           return false;
@@ -582,10 +569,10 @@ bool TargetInfo::validateInputConstraint(ConstraintInfo *OutputConstraints,
       break;
     case '[': {
       unsigned Index = 0;
-      if (!resolveSymbolicName(Name, OutputConstraints, NumOutputs, Index))
+      if (!resolveSymbolicName(Name, OutputConstraints, Index))
         return false;
 
-      // If the constraint is already tied, it must be tied to the 
+      // If the constraint is already tied, it must be tied to the
       // same operand referenced to by the number.
       if (Info.hasTiedOperand() && Info.getTiedOperand() != Index)
         return false;
@@ -650,18 +637,3 @@ bool TargetInfo::validateInputConstraint(ConstraintInfo *OutputConstraints,
 
   return true;
 }
-
-bool TargetCXXABI::tryParse(llvm::StringRef name) {
-  const Kind unknown = static_cast<Kind>(-1);
-  Kind kind = llvm::StringSwitch<Kind>(name)
-    .Case("arm", GenericARM)
-    .Case("ios", iOS)
-    .Case("itanium", GenericItanium)
-    .Case("microsoft", Microsoft)
-    .Case("mips", GenericMIPS)
-    .Default(unknown);
-  if (kind == unknown) return false;
-
-  set(kind);
-  return true;
-}
diff --git a/lib/Basic/Targets.cpp b/lib/Basic/Targets.cpp
index 9e44f7d9be63..893bd7c49815 100644
--- a/lib/Basic/Targets.cpp
+++ b/lib/Basic/Targets.cpp
@@ -1,4 +1,4 @@
-//===--- Targets.cpp - Implement -arch option and targets -----------------===//
+//===--- Targets.cpp - Implement target feature support -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,6 +19,7 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetOptions.h"
+#include "clang/Basic/Version.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -30,6 +31,7 @@
 #include "llvm/Support/TargetParser.h"
 #include <algorithm>
 #include <memory>
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -82,8 +84,28 @@ public:
   }
 
 };
-} // end anonymous namespace
 
+// CloudABI Target
+template <typename Target>
+class CloudABITargetInfo : public OSTargetInfo<Target> {
+protected:
+  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
+                    MacroBuilder &Builder) const override {
+    Builder.defineMacro("__CloudABI__");
+    Builder.defineMacro("__ELF__");
+
+    // CloudABI uses ISO/IEC 10646:2012 for wchar_t, char16_t and char32_t.
+    Builder.defineMacro("__STDC_ISO_10646__", "201206L");
+    Builder.defineMacro("__STDC_UTF_16__");
+    Builder.defineMacro("__STDC_UTF_32__");
+  }
+
+public:
+  CloudABITargetInfo(const llvm::Triple &Triple)
+      : OSTargetInfo<Target>(Triple) {
+    this->UserLabelPrefix = "";
+  }
+};
 
 static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
                              const llvm::Triple &Triple,
@@ -97,19 +119,11 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
   if (Opts.Sanitize.has(SanitizerKind::Address))
     Builder.defineMacro("_FORTIFY_SOURCE", "0");
 
-  if (!Opts.ObjCAutoRefCount) {
+  // Darwin defines __weak, __strong, and __unsafe_unretained even in C mode.
+  if (!Opts.ObjC1) {
     // __weak is always defined, for use in blocks and with objc pointers.
     Builder.defineMacro("__weak", "__attribute__((objc_gc(weak)))");
-
-    // Darwin defines __strong even in C mode (just to nothing).
-    if (Opts.getGC() != LangOptions::NonGC)
-      Builder.defineMacro("__strong", "__attribute__((objc_gc(strong)))");
-    else
-      Builder.defineMacro("__strong", "");
-
-    // __unsafe_unretained is defined to nothing in non-ARC mode. We even
-    // allow this in C, since one might have block pointers in structs that
-    // are used in pure C code and in Objective-C ARC.
+    Builder.defineMacro("__strong", "");
     Builder.defineMacro("__unsafe_unretained", "");
   }
 
@@ -149,8 +163,22 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
     Str[3] = '0' + (Rev / 10);
     Str[4] = '0' + (Rev % 10);
     Str[5] = '\0';
-    Builder.defineMacro("__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__",
-                        Str);
+    if (Triple.isTvOS())
+      Builder.defineMacro("__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__", Str);
+    else
+      Builder.defineMacro("__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__",
+                          Str);
+
+  } else if (Triple.isWatchOS()) {
+    assert(Maj < 10 && Min < 100 && Rev < 100 && "Invalid version!");
+    char Str[6];
+    Str[0] = '0' + Maj;
+    Str[1] = '0' + (Min / 10);
+    Str[2] = '0' + (Min % 10);
+    Str[3] = '0' + (Rev / 10);
+    Str[4] = '0' + (Rev % 10);
+    Str[5] = '\0';
+    Builder.defineMacro("__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__", Str);
   } else if (Triple.isMacOSX()) {
     // Note that the Driver allows versions which aren't representable in the
     // define (because we only get a single digit for the minor and micro
@@ -184,29 +212,6 @@ static void getDarwinDefines(MacroBuilder &Builder, const LangOptions &Opts,
   PlatformMinVersion = VersionTuple(Maj, Min, Rev);
 }
 
-namespace {
-// CloudABI Target
-template <typename Target>
-class CloudABITargetInfo : public OSTargetInfo<Target> {
-protected:
-  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
-                    MacroBuilder &Builder) const override {
-    Builder.defineMacro("__CloudABI__");
-    Builder.defineMacro("__ELF__");
-
-    // CloudABI uses ISO/IEC 10646:2012 for wchar_t, char16_t and char32_t.
-    Builder.defineMacro("__STDC_ISO_10646__", "201206L");
-    Builder.defineMacro("__STDC_UTF_16__");
-    Builder.defineMacro("__STDC_UTF_32__");
-  }
-
-public:
-  CloudABITargetInfo(const llvm::Triple &Triple)
-      : OSTargetInfo<Target>(Triple) {
-    this->UserLabelPrefix = "";
-  }
-};
-
 template<typename Target>
 class DarwinTargetInfo : public OSTargetInfo<Target> {
 protected:
@@ -386,7 +391,7 @@ protected:
     DefineStd(Builder, "linux", Opts);
     Builder.defineMacro("__gnu_linux__");
     Builder.defineMacro("__ELF__");
-    if (Triple.getEnvironment() == llvm::Triple::Android) {
+    if (Triple.isAndroid()) {
       Builder.defineMacro("__ANDROID__", "1");
       unsigned Maj, Min, Rev;
       Triple.getEnvironmentVersion(Maj, Min, Rev);
@@ -560,7 +565,7 @@ public:
     this->IntMaxType = TargetInfo::SignedLongLong;
     this->Int64Type = TargetInfo::SignedLongLong;
     this->SizeType = TargetInfo::UnsignedInt;
-    this->DescriptionString = "E-m:e-p:32:32-i64:64-n32:64";
+    this->DataLayoutString = "E-m:e-p:32:32-i64:64-n32:64";
   }
 };
 
@@ -645,6 +650,9 @@ protected:
         Builder.defineMacro("_CPPUNWIND");
     }
 
+    if (Opts.Bool)
+      Builder.defineMacro("__BOOL_DEFINED");
+
     if (!Opts.CharIsSigned)
       Builder.defineMacro("_CHAR_UNSIGNED");
 
@@ -719,18 +727,45 @@ public:
     if (Triple.getArch() == llvm::Triple::arm) {
       // Handled in ARM's setABI().
     } else if (Triple.getArch() == llvm::Triple::x86) {
-      this->DescriptionString = "e-m:e-p:32:32-i64:64-n8:16:32-S128";
+      this->DataLayoutString = "e-m:e-p:32:32-i64:64-n8:16:32-S128";
     } else if (Triple.getArch() == llvm::Triple::x86_64) {
-      this->DescriptionString = "e-m:e-p:32:32-i64:64-n8:16:32:64-S128";
+      this->DataLayoutString = "e-m:e-p:32:32-i64:64-n8:16:32:64-S128";
     } else if (Triple.getArch() == llvm::Triple::mipsel) {
-      // Handled on mips' setDescriptionString.
+      // Handled on mips' setDataLayoutString.
     } else {
       assert(Triple.getArch() == llvm::Triple::le32);
-      this->DescriptionString = "e-p:32:32-i64:64";
+      this->DataLayoutString = "e-p:32:32-i64:64";
     }
   }
 };
 
+// WebAssembly target
+template <typename Target>
+class WebAssemblyOSTargetInfo : public OSTargetInfo<Target> {
+  void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
+                    MacroBuilder &Builder) const final {
+    // A common platform macro.
+    if (Opts.POSIXThreads)
+      Builder.defineMacro("_REENTRANT");
+    // Follow g++ convention and predefine _GNU_SOURCE for C++.
+    if (Opts.CPlusPlus)
+      Builder.defineMacro("_GNU_SOURCE");
+  }
+
+  // As an optimization, group static init code together in a section.
+  const char *getStaticInitSectionSpecifier() const final {
+    return ".text.__startup";
+  }
+
+public:
+  explicit WebAssemblyOSTargetInfo(const llvm::Triple &Triple)
+      : OSTargetInfo<Target>(Triple) {
+    this->MCountName = "__mcount";
+    this->UserLabelPrefix = "";
+    this->TheCXXABI.set(TargetCXXABI::WebAssembly);
+  }
+};
+
 //===----------------------------------------------------------------------===//
 // Specific target implementations.
 //===----------------------------------------------------------------------===//
@@ -849,10 +884,9 @@ public:
 
   StringRef getABI() const override { return ABI; }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::PPC::LastTSBuiltin-Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                             clang::PPC::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
 
   bool isCLZForZeroUndef() const override { return false; }
@@ -860,7 +894,10 @@ public:
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override;
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override;
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
@@ -868,10 +905,8 @@ public:
   void setFeatureEnabled(llvm::StringMap<bool> &Features, StringRef Name,
                          bool Enabled) const override;
 
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
     switch (*Name) {
@@ -1006,9 +1041,10 @@ public:
 };
 
 const Builtin::Info PPCTargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsPPC.def"
 };
 
@@ -1016,65 +1052,27 @@ const Builtin::Info PPCTargetInfo::BuiltinInfo[] = {
 /// configured set of features.
 bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
                                          DiagnosticsEngine &Diags) {
-  for (unsigned i = 0, e = Features.size(); i !=e; ++i) {
-    // Ignore disabled features.
-    if (Features[i][0] == '-')
-      continue;
-
-    StringRef Feature = StringRef(Features[i]).substr(1);
-
-    if (Feature == "vsx") {
+  for (const auto &Feature : Features) {
+    if (Feature == "+vsx") {
       HasVSX = true;
-      continue;
-    }
-
-    if (Feature == "bpermd") {
+    } else if (Feature == "+bpermd") {
       HasBPERMD = true;
-      continue;
-    }
-
-    if (Feature == "extdiv") {
+    } else if (Feature == "+extdiv") {
       HasExtDiv = true;
-      continue;
-    }
-
-    if (Feature == "power8-vector") {
+    } else if (Feature == "+power8-vector") {
       HasP8Vector = true;
-      continue;
-    }
-
-    if (Feature == "crypto") {
+    } else if (Feature == "+crypto") {
       HasP8Crypto = true;
-      continue;
-    }
-
-    if (Feature == "direct-move") {
+    } else if (Feature == "+direct-move") {
       HasDirectMove = true;
-      continue;
-    }
-
-    if (Feature == "qpx") {
+    } else if (Feature == "+qpx") {
       HasQPX = true;
-      continue;
-    }
-
-    if (Feature == "htm") {
+    } else if (Feature == "+htm") {
       HasHTM = true;
-      continue;
     }
-
     // TODO: Finish this list and add an assert that we've handled them
     // all.
   }
-  if (!HasVSX && (HasP8Vector || HasDirectMove)) {
-    if (HasP8Vector)
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mpower8-vector" <<
-                                                        "-mno-vsx";
-    else if (HasDirectMove)
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mdirect-move" <<
-                                                        "-mno-vsx";
-    return false;
-  }
 
   return true;
 }
@@ -1228,14 +1226,12 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__CRYPTO__");
   if (HasHTM)
     Builder.defineMacro("__HTM__");
-  if (getTriple().getArch() == llvm::Triple::ppc64le ||
-      (defs & ArchDefinePwr8) || (CPU == "pwr8")) {
-    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
-    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
-    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
-    if (PointerWidth == 64)
-      Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
-  }
+
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+  Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+  if (PointerWidth == 64)
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
 
   // FIXME: The following are not yet generated here by Clang, but are
   //        generated by GCC:
@@ -1258,7 +1254,36 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
   //   __NO_FPRS__
 }
 
-void PPCTargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
+// Handle explicit options being passed to the compiler here: if we've
+// explicitly turned off vsx and turned on power8-vector or direct-move then
+// go ahead and error since the customer has expressed a somewhat incompatible
+// set of options.
+static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags,
+                                 const std::vector<std::string> &FeaturesVec) {
+
+  if (std::find(FeaturesVec.begin(), FeaturesVec.end(), "-vsx") !=
+      FeaturesVec.end()) {
+    if (std::find(FeaturesVec.begin(), FeaturesVec.end(), "+power8-vector") !=
+        FeaturesVec.end()) {
+      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mpower8-vector"
+                                                     << "-mno-vsx";
+      return false;
+    }
+
+    if (std::find(FeaturesVec.begin(), FeaturesVec.end(), "+direct-move") !=
+        FeaturesVec.end()) {
+      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mdirect-move"
+                                                     << "-mno-vsx";
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool PPCTargetInfo::initFeatureMap(
+    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
+    const std::vector<std::string> &FeaturesVec) const {
   Features["altivec"] = llvm::StringSwitch<bool>(CPU)
     .Case("7400", true)
     .Case("g4", true)
@@ -1301,6 +1326,11 @@ void PPCTargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     .Case("pwr8", true)
     .Case("pwr7", true)
     .Default(false);
+
+  if (!ppcUserFeaturesCheck(Diags, FeaturesVec))
+    return false;
+
+  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
 }
 
 bool PPCTargetInfo::hasFeature(StringRef Feature) const {
@@ -1317,37 +1347,29 @@ bool PPCTargetInfo::hasFeature(StringRef Feature) const {
     .Default(false);
 }
 
-/*  There is no clear way for the target to know which of the features in the
-    final feature vector came from defaults and which are actually specified by
-    the user. To that end, we use the fact that this function is not called on
-    default features - only user specified ones. By the first time this
-    function is called, the default features are populated.
-    We then keep track of the features that the user specified so that we
-    can ensure we do not override a user's request (only defaults).
-    For example:
-    -mcpu=pwr8 -mno-vsx (should disable vsx and everything that depends on it)
-    -mcpu=pwr8 -mdirect-move -mno-vsx (should actually be diagnosed)
-
-NOTE: Do not call this from PPCTargetInfo::getDefaultFeatures
-*/
 void PPCTargetInfo::setFeatureEnabled(llvm::StringMap<bool> &Features,
                                       StringRef Name, bool Enabled) const {
-  static llvm::StringMap<bool> ExplicitFeatures;
-  ExplicitFeatures[Name] = Enabled;
-
-  // At this point, -mno-vsx turns off the dependent features but we respect
-  // the user's requests.
-  if (!Enabled && Name == "vsx") {
-    Features["direct-move"] = ExplicitFeatures["direct-move"];
-    Features["power8-vector"] = ExplicitFeatures["power8-vector"];
-  }
-  if ((Enabled && Name == "power8-vector") ||
-      (Enabled && Name == "direct-move")) {
-    if (ExplicitFeatures.find("vsx") == ExplicitFeatures.end()) {
-      Features["vsx"] = true;
+  // If we're enabling direct-move or power8-vector go ahead and enable vsx
+  // as well. Do the inverse if we're disabling vsx. We'll diagnose any user
+  // incompatible options.
+  if (Enabled) {
+    if (Name == "vsx") {
+     Features[Name] = true;
+    } else if (Name == "direct-move") {
+      Features[Name] = Features["vsx"] = true;
+    } else if (Name == "power8-vector") {
+      Features[Name] = Features["vsx"] = true;
+    } else {
+      Features[Name] = true;
+    }
+  } else {
+    if (Name == "vsx") {
+      Features[Name] = Features["direct-move"] = Features["power8-vector"] =
+          false;
+    } else {
+      Features[Name] = false;
     }
   }
-  Features[Name] = Enabled;
 }
 
 const char * const PPCTargetInfo::GCCRegNames[] = {
@@ -1371,10 +1393,8 @@ const char * const PPCTargetInfo::GCCRegNames[] = {
   "sfp"
 };
 
-void PPCTargetInfo::getGCCRegNames(const char * const *&Names,
-                                   unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char*> PPCTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 const TargetInfo::GCCRegAlias PPCTargetInfo::GCCRegAliases[] = {
@@ -1447,16 +1467,14 @@ const TargetInfo::GCCRegAlias PPCTargetInfo::GCCRegAliases[] = {
   { { "cc" }, "cr0" },
 };
 
-void PPCTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                     unsigned &NumAliases) const {
-  Aliases = GCCRegAliases;
-  NumAliases = llvm::array_lengthof(GCCRegAliases);
+ArrayRef<TargetInfo::GCCRegAlias> PPCTargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
 }
 
 class PPC32TargetInfo : public PPCTargetInfo {
 public:
   PPC32TargetInfo(const llvm::Triple &Triple) : PPCTargetInfo(Triple) {
-    DescriptionString = "E-m:e-p:32:32-i64:64-n32";
+    DataLayoutString = "E-m:e-p:32:32-i64:64-n32";
 
     switch (getTriple().getOS()) {
     case llvm::Triple::Linux:
@@ -1495,10 +1513,10 @@ public:
     Int64Type = SignedLong;
 
     if ((Triple.getArch() == llvm::Triple::ppc64le)) {
-      DescriptionString = "e-m:e-i64:64-n32:64";
+      DataLayoutString = "e-m:e-i64:64-n32:64";
       ABI = "elfv2";
     } else {
-      DescriptionString = "E-m:e-i64:64-n32:64";
+      DataLayoutString = "E-m:e-i64:64-n32:64";
       ABI = "elfv1";
     }
 
@@ -1541,7 +1559,7 @@ public:
     PtrDiffType = SignedInt; // for http://llvm.org/bugs/show_bug.cgi?id=15726
     LongLongAlign = 32;
     SuitableAlign = 128;
-    DescriptionString = "E-m:o-p:32:32-f64:32:64-n32";
+    DataLayoutString = "E-m:o-p:32:32-f64:32:64-n32";
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
@@ -1555,23 +1573,24 @@ public:
       : DarwinTargetInfo<PPC64TargetInfo>(Triple) {
     HasAlignMac68kSupport = true;
     SuitableAlign = 128;
-    DescriptionString = "E-m:o-i64:64-n32:64";
+    DataLayoutString = "E-m:o-i64:64-n32:64";
   }
 };
 
-  static const unsigned NVPTXAddrSpaceMap[] = {
-    1,    // opencl_global
-    3,    // opencl_local
-    4,    // opencl_constant
+static const unsigned NVPTXAddrSpaceMap[] = {
+    1, // opencl_global
+    3, // opencl_local
+    4, // opencl_constant
     // FIXME: generic has to be added to the target
-    0,    // opencl_generic
-    1,    // cuda_device
-    4,    // cuda_constant
-    3,    // cuda_shared
-  };
-  class NVPTXTargetInfo : public TargetInfo {
-    static const char * const GCCRegNames[];
-    static const Builtin::Info BuiltinInfo[];
+    0, // opencl_generic
+    1, // cuda_device
+    4, // cuda_constant
+    3, // cuda_shared
+};
+
+class NVPTXTargetInfo : public TargetInfo {
+  static const char *const GCCRegNames[];
+  static const Builtin::Info BuiltinInfo[];
 
   // The GPU profiles supported by the NVPTX backend
   enum GPUKind {
@@ -1583,139 +1602,133 @@ public:
     GK_SM37,
   } GPU;
 
-  public:
-    NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
-      BigEndian = false;
-      TLSSupported = false;
-      LongWidth = LongAlign = 64;
-      AddrSpaceMap = &NVPTXAddrSpaceMap;
-      UseAddrSpaceMapMangling = true;
-      // Define available target features
-      // These must be defined in sorted order!
-      NoAsmVariants = true;
-      // Set the default GPU to sm20
-      GPU = GK_SM20;
-    }
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      Builder.defineMacro("__PTX__");
-      Builder.defineMacro("__NVPTX__");
-      if (Opts.CUDAIsDevice) {
-        // Set __CUDA_ARCH__ for the GPU specified.
-        std::string CUDAArchCode;
-        switch (GPU) {
-        case GK_SM20:
-          CUDAArchCode = "200";
-          break;
-        case GK_SM21:
-          CUDAArchCode = "210";
-          break;
-        case GK_SM30:
-          CUDAArchCode = "300";
-          break;
-        case GK_SM35:
-          CUDAArchCode = "350";
-          break;
-        case GK_SM37:
-          CUDAArchCode = "370";
-          break;
-        default:
-          llvm_unreachable("Unhandled target CPU");
-        }
-        Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
+public:
+  NVPTXTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    BigEndian = false;
+    TLSSupported = false;
+    LongWidth = LongAlign = 64;
+    AddrSpaceMap = &NVPTXAddrSpaceMap;
+    UseAddrSpaceMapMangling = true;
+    // Define available target features
+    // These must be defined in sorted order!
+    NoAsmVariants = true;
+    // Set the default GPU to sm20
+    GPU = GK_SM20;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    Builder.defineMacro("__PTX__");
+    Builder.defineMacro("__NVPTX__");
+    if (Opts.CUDAIsDevice) {
+      // Set __CUDA_ARCH__ for the GPU specified.
+      std::string CUDAArchCode;
+      switch (GPU) {
+      case GK_SM20:
+        CUDAArchCode = "200";
+        break;
+      case GK_SM21:
+        CUDAArchCode = "210";
+        break;
+      case GK_SM30:
+        CUDAArchCode = "300";
+        break;
+      case GK_SM35:
+        CUDAArchCode = "350";
+        break;
+      case GK_SM37:
+        CUDAArchCode = "370";
+        break;
+      default:
+        llvm_unreachable("Unhandled target CPU");
       }
+      Builder.defineMacro("__CUDA_ARCH__", CUDAArchCode);
     }
-    void getTargetBuiltins(const Builtin::Info *&Records,
-                           unsigned &NumRecords) const override {
-      Records = BuiltinInfo;
-      NumRecords = clang::NVPTX::LastTSBuiltin-Builtin::FirstTSBuiltin;
-    }
-    bool hasFeature(StringRef Feature) const override {
-      return Feature == "ptx" || Feature == "nvptx";
-    }
-
-    void getGCCRegNames(const char * const *&Names,
-                        unsigned &NumNames) const override;
-    void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                  unsigned &NumAliases) const override {
-      // No aliases.
-      Aliases = nullptr;
-      NumAliases = 0;
-    }
-    bool
-    validateAsmConstraint(const char *&Name,
-                          TargetInfo::ConstraintInfo &Info) const override {
-      switch (*Name) {
-      default: return false;
-      case 'c':
-      case 'h':
-      case 'r':
-      case 'l':
-      case 'f':
-      case 'd':
-        Info.setAllowsRegister();
-        return true;
-      }
-    }
-    const char *getClobbers() const override {
-      // FIXME: Is this really right?
-      return "";
-    }
-    BuiltinVaListKind getBuiltinVaListKind() const override {
-      // FIXME: implement
-      return TargetInfo::CharPtrBuiltinVaList;
-    }
-    bool setCPU(const std::string &Name) override {
-      GPU = llvm::StringSwitch<GPUKind>(Name)
-                .Case("sm_20", GK_SM20)
-                .Case("sm_21", GK_SM21)
-                .Case("sm_30", GK_SM30)
-                .Case("sm_35", GK_SM35)
-                .Case("sm_37", GK_SM37)
-                .Default(GK_NONE);
-
-      return GPU != GK_NONE;
-    }
-  };
-
-  const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
-#include "clang/Basic/BuiltinsNVPTX.def"
-  };
-
-  const char * const NVPTXTargetInfo::GCCRegNames[] = {
-    "r0"
-  };
-
-  void NVPTXTargetInfo::getGCCRegNames(const char * const *&Names,
-                                     unsigned &NumNames) const {
-    Names = GCCRegNames;
-    NumNames = llvm::array_lengthof(GCCRegNames);
+  }
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                         clang::NVPTX::LastTSBuiltin - Builtin::FirstTSBuiltin);
+  }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "ptx" || Feature == "nvptx";
   }
 
-  class NVPTX32TargetInfo : public NVPTXTargetInfo {
-  public:
-    NVPTX32TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
-      PointerWidth = PointerAlign = 32;
-      SizeType = TargetInfo::UnsignedInt;
-      PtrDiffType = TargetInfo::SignedInt;
-      IntPtrType = TargetInfo::SignedInt;
-      DescriptionString = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    // No aliases.
+    return None;
+  }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &Info) const override {
+    switch (*Name) {
+    default:
+      return false;
+    case 'c':
+    case 'h':
+    case 'r':
+    case 'l':
+    case 'f':
+    case 'd':
+      Info.setAllowsRegister();
+      return true;
     }
-  };
+  }
+  const char *getClobbers() const override {
+    // FIXME: Is this really right?
+    return "";
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    // FIXME: implement
+    return TargetInfo::CharPtrBuiltinVaList;
+  }
+  bool setCPU(const std::string &Name) override {
+    GPU = llvm::StringSwitch<GPUKind>(Name)
+              .Case("sm_20", GK_SM20)
+              .Case("sm_21", GK_SM21)
+              .Case("sm_30", GK_SM30)
+              .Case("sm_35", GK_SM35)
+              .Case("sm_37", GK_SM37)
+              .Default(GK_NONE);
 
-  class NVPTX64TargetInfo : public NVPTXTargetInfo {
-  public:
-    NVPTX64TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
-      PointerWidth = PointerAlign = 64;
-      SizeType = TargetInfo::UnsignedLong;
-      PtrDiffType = TargetInfo::SignedLong;
-      IntPtrType = TargetInfo::SignedLong;
-      DescriptionString = "e-i64:64-v16:16-v32:32-n16:32:64";
-    }
-  };
+    return GPU != GK_NONE;
+  }
+};
+
+const Builtin::Info NVPTXTargetInfo::BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
+#include "clang/Basic/BuiltinsNVPTX.def"
+};
+
+const char *const NVPTXTargetInfo::GCCRegNames[] = {"r0"};
+
+ArrayRef<const char *> NVPTXTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
+}
+
+class NVPTX32TargetInfo : public NVPTXTargetInfo {
+public:
+  NVPTX32TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
+    LongWidth = LongAlign = 32;
+    PointerWidth = PointerAlign = 32;
+    SizeType = TargetInfo::UnsignedInt;
+    PtrDiffType = TargetInfo::SignedInt;
+    IntPtrType = TargetInfo::SignedInt;
+    DataLayoutString = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
+  }
+};
+
+class NVPTX64TargetInfo : public NVPTXTargetInfo {
+public:
+  NVPTX64TargetInfo(const llvm::Triple &Triple) : NVPTXTargetInfo(Triple) {
+    PointerWidth = PointerAlign = 64;
+    SizeType = TargetInfo::UnsignedLong;
+    PtrDiffType = TargetInfo::SignedLong;
+    IntPtrType = TargetInfo::SignedLong;
+    DataLayoutString = "e-i64:64-v16:16-v32:32-n16:32:64";
+  }
+};
 
 static const unsigned AMDGPUAddrSpaceMap[] = {
   1,    // opencl_global
@@ -1730,15 +1743,15 @@ static const unsigned AMDGPUAddrSpaceMap[] = {
 // If you edit the description strings, make sure you update
 // getPointerWidthV().
 
-static const char *DescriptionStringR600 =
+static const char *const DataLayoutStringR600 =
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *DescriptionStringR600DoubleOps =
+static const char *const DataLayoutStringR600DoubleOps =
   "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
 
-static const char *DescriptionStringSI =
+static const char *const DataLayoutStringSI =
   "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"
   "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
   "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
@@ -1772,13 +1785,13 @@ public:
     : TargetInfo(Triple) {
 
     if (Triple.getArch() == llvm::Triple::amdgcn) {
-      DescriptionString = DescriptionStringSI;
+      DataLayoutString = DataLayoutStringSI;
       GPU = GK_SOUTHERN_ISLANDS;
       hasFP64 = true;
       hasFMAF = true;
       hasLDEXPF = true;
     } else {
-      DescriptionString = DescriptionStringR600;
+      DataLayoutString = DataLayoutStringR600;
       GPU = GK_R600;
       hasFP64 = false;
       hasFMAF = false;
@@ -1806,24 +1819,27 @@ public:
     return "";
   }
 
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
 
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
-    Aliases = nullptr;
-    NumAliases = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
   }
 
   bool validateAsmConstraint(const char *&Name,
-                             TargetInfo::ConstraintInfo &info) const override {
-    return true;
+                             TargetInfo::ConstraintInfo &Info) const override {
+    switch (*Name) {
+    default: break;
+    case 'v': // vgpr
+    case 's': // sgpr
+      Info.setAllowsRegister();
+      return true;
+    }
+    return false;
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                        clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -1833,8 +1849,16 @@ public:
       Builder.defineMacro("__HAS_FMAF__");
     if (hasLDEXPF)
       Builder.defineMacro("__HAS_LDEXPF__");
-    if (hasFP64 && Opts.OpenCL) {
+    if (hasFP64 && Opts.OpenCL)
       Builder.defineMacro("cl_khr_fp64");
+    if (Opts.OpenCL) {
+      if (GPU >= GK_NORTHERN_ISLANDS) {
+        Builder.defineMacro("cl_khr_byte_addressable_store");
+        Builder.defineMacro("cl_khr_global_int32_base_atomics");
+        Builder.defineMacro("cl_khr_global_int32_extended_atomics");
+        Builder.defineMacro("cl_khr_local_int32_base_atomics");
+        Builder.defineMacro("cl_khr_local_int32_extended_atomics");
+      }
     }
   }
 
@@ -1895,7 +1919,7 @@ public:
     case GK_R700:
     case GK_EVERGREEN:
     case GK_NORTHERN_ISLANDS:
-      DescriptionString = DescriptionStringR600;
+      DataLayoutString = DataLayoutStringR600;
       hasFP64 = false;
       hasFMAF = false;
       hasLDEXPF = false;
@@ -1904,7 +1928,7 @@ public:
     case GK_R700_DOUBLE_OPS:
     case GK_EVERGREEN_DOUBLE_OPS:
     case GK_CAYMAN:
-      DescriptionString = DescriptionStringR600DoubleOps;
+      DataLayoutString = DataLayoutStringR600DoubleOps;
       hasFP64 = true;
       hasFMAF = true;
       hasLDEXPF = false;
@@ -1912,7 +1936,7 @@ public:
     case GK_SOUTHERN_ISLANDS:
     case GK_SEA_ISLANDS:
     case GK_VOLCANIC_ISLANDS:
-      DescriptionString = DescriptionStringSI;
+      DataLayoutString = DataLayoutStringSI;
       hasFP64 = true;
       hasFMAF = true;
       hasLDEXPF = true;
@@ -1925,7 +1949,7 @@ public:
 
 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                \
-  { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsAMDGPU.def"
 };
 const char * const AMDGPUTargetInfo::GCCRegNames[] = {
@@ -1981,17 +2005,18 @@ const char * const AMDGPUTargetInfo::GCCRegNames[] = {
   "vcc_lo", "vcc_hi", "flat_scr_lo", "flat_scr_hi"
 };
 
-void AMDGPUTargetInfo::getGCCRegNames(const char * const *&Names,
-                                      unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 // Namespace for x86 abstract base class
 const Builtin::Info BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER)                                    \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE },
 #include "clang/Basic/BuiltinsX86.def"
 };
 
@@ -2016,6 +2041,14 @@ const TargetInfo::AddlRegName AddlRegNames[] = {
   { { "edi", "rdi" }, 5 },
   { { "esp", "rsp" }, 7 },
   { { "ebp", "rbp" }, 6 },
+  { { "r8d", "r8w", "r8b" }, 38 },
+  { { "r9d", "r9w", "r9b" }, 39 },
+  { { "r10d", "r10w", "r10b" }, 40 },
+  { { "r11d", "r11w", "r11b" }, 41 },
+  { { "r12d", "r12w", "r12b" }, 42 },
+  { { "r13d", "r13w", "r13b" }, 43 },
+  { { "r14d", "r14w", "r14b" }, 44 },
+  { { "r15d", "r15w", "r15b" }, 45 },
 };
 
 // X86 target abstract base class; x86-32 and x86-64 are very close, so
@@ -2023,36 +2056,45 @@ const TargetInfo::AddlRegName AddlRegNames[] = {
 class X86TargetInfo : public TargetInfo {
   enum X86SSEEnum {
     NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F
-  } SSELevel;
+  } SSELevel = NoSSE;
   enum MMX3DNowEnum {
     NoMMX3DNow, MMX, AMD3DNow, AMD3DNowAthlon
-  } MMX3DNowLevel;
+  } MMX3DNowLevel = NoMMX3DNow;
   enum XOPEnum {
     NoXOP,
     SSE4A,
     FMA4,
     XOP
-  } XOPLevel;
+  } XOPLevel = NoXOP;
 
-  bool HasAES;
-  bool HasPCLMUL;
-  bool HasLZCNT;
-  bool HasRDRND;
-  bool HasFSGSBASE;
-  bool HasBMI;
-  bool HasBMI2;
-  bool HasPOPCNT;
-  bool HasRTM;
-  bool HasPRFCHW;
-  bool HasRDSEED;
-  bool HasADX;
-  bool HasTBM;
-  bool HasFMA;
-  bool HasF16C;
-  bool HasAVX512CD, HasAVX512ER, HasAVX512PF, HasAVX512DQ, HasAVX512BW,
-      HasAVX512VL;
-  bool HasSHA;
-  bool HasCX16;
+  bool HasAES = false;
+  bool HasPCLMUL = false;
+  bool HasLZCNT = false;
+  bool HasRDRND = false;
+  bool HasFSGSBASE = false;
+  bool HasBMI = false;
+  bool HasBMI2 = false;
+  bool HasPOPCNT = false;
+  bool HasRTM = false;
+  bool HasPRFCHW = false;
+  bool HasRDSEED = false;
+  bool HasADX = false;
+  bool HasTBM = false;
+  bool HasFMA = false;
+  bool HasF16C = false;
+  bool HasAVX512CD = false;
+  bool HasAVX512ER = false;
+  bool HasAVX512PF = false;
+  bool HasAVX512DQ = false;
+  bool HasAVX512BW = false;
+  bool HasAVX512VL = false;
+  bool HasSHA = false;
+  bool HasCX16 = false;
+  bool HasFXSR = false;
+  bool HasXSAVE = false;
+  bool HasXSAVEOPT = false;
+  bool HasXSAVEC = false;
+  bool HasXSAVES = false;
 
   /// \brief Enumeration of all of the X86 CPUs supported by Clang.
   ///
@@ -2220,24 +2262,85 @@ class X86TargetInfo : public TargetInfo {
     //@{
     CK_Geode
     //@}
-  } CPU;
+  } CPU = CK_Generic;
+
+  CPUKind getCPUKind(StringRef CPU) const {
+    return llvm::StringSwitch<CPUKind>(CPU)
+        .Case("i386", CK_i386)
+        .Case("i486", CK_i486)
+        .Case("winchip-c6", CK_WinChipC6)
+        .Case("winchip2", CK_WinChip2)
+        .Case("c3", CK_C3)
+        .Case("i586", CK_i586)
+        .Case("pentium", CK_Pentium)
+        .Case("pentium-mmx", CK_PentiumMMX)
+        .Case("i686", CK_i686)
+        .Case("pentiumpro", CK_PentiumPro)
+        .Case("pentium2", CK_Pentium2)
+        .Case("pentium3", CK_Pentium3)
+        .Case("pentium3m", CK_Pentium3M)
+        .Case("pentium-m", CK_PentiumM)
+        .Case("c3-2", CK_C3_2)
+        .Case("yonah", CK_Yonah)
+        .Case("pentium4", CK_Pentium4)
+        .Case("pentium4m", CK_Pentium4M)
+        .Case("prescott", CK_Prescott)
+        .Case("nocona", CK_Nocona)
+        .Case("core2", CK_Core2)
+        .Case("penryn", CK_Penryn)
+        .Case("bonnell", CK_Bonnell)
+        .Case("atom", CK_Bonnell) // Legacy name.
+        .Case("silvermont", CK_Silvermont)
+        .Case("slm", CK_Silvermont) // Legacy name.
+        .Case("nehalem", CK_Nehalem)
+        .Case("corei7", CK_Nehalem) // Legacy name.
+        .Case("westmere", CK_Westmere)
+        .Case("sandybridge", CK_SandyBridge)
+        .Case("corei7-avx", CK_SandyBridge) // Legacy name.
+        .Case("ivybridge", CK_IvyBridge)
+        .Case("core-avx-i", CK_IvyBridge) // Legacy name.
+        .Case("haswell", CK_Haswell)
+        .Case("core-avx2", CK_Haswell) // Legacy name.
+        .Case("broadwell", CK_Broadwell)
+        .Case("skylake", CK_Skylake)
+        .Case("skx", CK_Skylake) // Legacy name.
+        .Case("knl", CK_KNL)
+        .Case("k6", CK_K6)
+        .Case("k6-2", CK_K6_2)
+        .Case("k6-3", CK_K6_3)
+        .Case("athlon", CK_Athlon)
+        .Case("athlon-tbird", CK_AthlonThunderbird)
+        .Case("athlon-4", CK_Athlon4)
+        .Case("athlon-xp", CK_AthlonXP)
+        .Case("athlon-mp", CK_AthlonMP)
+        .Case("athlon64", CK_Athlon64)
+        .Case("athlon64-sse3", CK_Athlon64SSE3)
+        .Case("athlon-fx", CK_AthlonFX)
+        .Case("k8", CK_K8)
+        .Case("k8-sse3", CK_K8SSE3)
+        .Case("opteron", CK_Opteron)
+        .Case("opteron-sse3", CK_OpteronSSE3)
+        .Case("barcelona", CK_AMDFAM10)
+        .Case("amdfam10", CK_AMDFAM10)
+        .Case("btver1", CK_BTVER1)
+        .Case("btver2", CK_BTVER2)
+        .Case("bdver1", CK_BDVER1)
+        .Case("bdver2", CK_BDVER2)
+        .Case("bdver3", CK_BDVER3)
+        .Case("bdver4", CK_BDVER4)
+        .Case("x86-64", CK_x86_64)
+        .Case("geode", CK_Geode)
+        .Default(CK_Generic);
+  }
 
   enum FPMathKind {
     FP_Default,
     FP_SSE,
     FP_387
-  } FPMath;
+  } FPMath = FP_Default;
 
 public:
-  X86TargetInfo(const llvm::Triple &Triple)
-      : TargetInfo(Triple), SSELevel(NoSSE), MMX3DNowLevel(NoMMX3DNow),
-        XOPLevel(NoXOP), HasAES(false), HasPCLMUL(false), HasLZCNT(false),
-        HasRDRND(false), HasFSGSBASE(false), HasBMI(false), HasBMI2(false),
-        HasPOPCNT(false), HasRTM(false), HasPRFCHW(false), HasRDSEED(false),
-        HasADX(false), HasTBM(false), HasFMA(false), HasF16C(false),
-        HasAVX512CD(false), HasAVX512ER(false), HasAVX512PF(false),
-        HasAVX512DQ(false), HasAVX512BW(false), HasAVX512VL(false),
-        HasSHA(false), HasCX16(false), CPU(CK_Generic), FPMath(FP_Default) {
+  X86TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
     BigEndian = false;
     LongDoubleFormat = &llvm::APFloat::x87DoubleExtended;
   }
@@ -2245,30 +2348,37 @@ public:
     // X87 evaluates with 80 bits "long double" precision.
     return SSELevel == NoSSE ? 2 : 0;
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                                 unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::X86::LastTSBuiltin-Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                             clang::X86::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override {
-    Names = GCCRegNames;
-    NumNames = llvm::array_lengthof(GCCRegNames);
+  ArrayRef<const char *> getGCCRegNames() const override {
+    return llvm::makeArrayRef(GCCRegNames);
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
-    Aliases = nullptr;
-    NumAliases = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
   }
-  void getGCCAddlRegNames(const AddlRegName *&Names,
-                          unsigned &NumNames) const override {
-    Names = AddlRegNames;
-    NumNames = llvm::array_lengthof(AddlRegNames);
+  ArrayRef<TargetInfo::AddlRegName> getGCCAddlRegNames() const override {
+    return llvm::makeArrayRef(AddlRegNames);
   }
   bool validateCpuSupports(StringRef Name) const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
 
+  bool validateGlobalRegisterVariable(StringRef RegName,
+                                      unsigned RegSize,
+                                      bool &HasSizeMismatch) const override {
+    // esp and ebp are the only 32-bit registers the x86 backend can currently
+    // handle.
+    if (RegName.equals("esp") || RegName.equals("ebp")) {
+      // Check that the register size is 32-bit.
+      HasSizeMismatch = RegSize != 32;
+      return true;
+    }
+
+    return false;
+  }
+
   bool validateOutputSize(StringRef Constraint, unsigned Size) const override;
 
   bool validateInputSize(StringRef Constraint, unsigned Size) const override;
@@ -2292,90 +2402,28 @@ public:
     setFeatureEnabledImpl(Features, Name, Enabled);
   }
   // This exists purely to cut down on the number of virtual calls in
-  // getDefaultFeatures which calls this repeatedly.
+  // initFeatureMap which calls this repeatedly.
   static void setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
                                     StringRef Name, bool Enabled);
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override;
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override;
   bool hasFeature(StringRef Feature) const override;
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override;
   StringRef getABI() const override {
     if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F)
       return "avx512";
-    else if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
+    if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX)
       return "avx";
-    else if (getTriple().getArch() == llvm::Triple::x86 &&
+    if (getTriple().getArch() == llvm::Triple::x86 &&
              MMX3DNowLevel == NoMMX3DNow)
       return "no-mmx";
     return "";
   }
   bool setCPU(const std::string &Name) override {
-    CPU = llvm::StringSwitch<CPUKind>(Name)
-      .Case("i386", CK_i386)
-      .Case("i486", CK_i486)
-      .Case("winchip-c6", CK_WinChipC6)
-      .Case("winchip2", CK_WinChip2)
-      .Case("c3", CK_C3)
-      .Case("i586", CK_i586)
-      .Case("pentium", CK_Pentium)
-      .Case("pentium-mmx", CK_PentiumMMX)
-      .Case("i686", CK_i686)
-      .Case("pentiumpro", CK_PentiumPro)
-      .Case("pentium2", CK_Pentium2)
-      .Case("pentium3", CK_Pentium3)
-      .Case("pentium3m", CK_Pentium3M)
-      .Case("pentium-m", CK_PentiumM)
-      .Case("c3-2", CK_C3_2)
-      .Case("yonah", CK_Yonah)
-      .Case("pentium4", CK_Pentium4)
-      .Case("pentium4m", CK_Pentium4M)
-      .Case("prescott", CK_Prescott)
-      .Case("nocona", CK_Nocona)
-      .Case("core2", CK_Core2)
-      .Case("penryn", CK_Penryn)
-      .Case("bonnell", CK_Bonnell)
-      .Case("atom", CK_Bonnell) // Legacy name.
-      .Case("silvermont", CK_Silvermont)
-      .Case("slm", CK_Silvermont) // Legacy name.
-      .Case("nehalem", CK_Nehalem)
-      .Case("corei7", CK_Nehalem) // Legacy name.
-      .Case("westmere", CK_Westmere)
-      .Case("sandybridge", CK_SandyBridge)
-      .Case("corei7-avx", CK_SandyBridge) // Legacy name.
-      .Case("ivybridge", CK_IvyBridge)
-      .Case("core-avx-i", CK_IvyBridge) // Legacy name.
-      .Case("haswell", CK_Haswell)
-      .Case("core-avx2", CK_Haswell) // Legacy name.
-      .Case("broadwell", CK_Broadwell)
-      .Case("skylake", CK_Skylake)
-      .Case("skx", CK_Skylake) // Legacy name.
-      .Case("knl", CK_KNL)
-      .Case("k6", CK_K6)
-      .Case("k6-2", CK_K6_2)
-      .Case("k6-3", CK_K6_3)
-      .Case("athlon", CK_Athlon)
-      .Case("athlon-tbird", CK_AthlonThunderbird)
-      .Case("athlon-4", CK_Athlon4)
-      .Case("athlon-xp", CK_AthlonXP)
-      .Case("athlon-mp", CK_AthlonMP)
-      .Case("athlon64", CK_Athlon64)
-      .Case("athlon64-sse3", CK_Athlon64SSE3)
-      .Case("athlon-fx", CK_AthlonFX)
-      .Case("k8", CK_K8)
-      .Case("k8-sse3", CK_K8SSE3)
-      .Case("opteron", CK_Opteron)
-      .Case("opteron-sse3", CK_OpteronSSE3)
-      .Case("barcelona", CK_AMDFAM10)
-      .Case("amdfam10", CK_AMDFAM10)
-      .Case("btver1", CK_BTVER1)
-      .Case("btver2", CK_BTVER2)
-      .Case("bdver1", CK_BDVER1)
-      .Case("bdver2", CK_BDVER2)
-      .Case("bdver3", CK_BDVER3)
-      .Case("bdver4", CK_BDVER4)
-      .Case("x86-64", CK_x86_64)
-      .Case("geode", CK_Geode)
-      .Default(CK_Generic);
+    CPU = getCPUKind(Name);
 
     // Perform any per-CPU checks necessary to determine if this CPU is
     // acceptable.
@@ -2486,14 +2534,15 @@ bool X86TargetInfo::setFPMath(StringRef Name) {
   return false;
 }
 
-void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
+bool X86TargetInfo::initFeatureMap(
+    llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
+    const std::vector<std::string> &FeaturesVec) const {
   // FIXME: This *really* should not be here.
-
   // X86_64 always has SSE2.
   if (getTriple().getArch() == llvm::Triple::x86_64)
     setFeatureEnabledImpl(Features, "sse2", true);
 
-  switch (CPU) {
+  switch (getCPUKind(CPU)) {
   case CK_Generic:
   case CK_i386:
   case CK_i486:
@@ -2512,26 +2561,31 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
   case CK_Pentium3M:
   case CK_C3_2:
     setFeatureEnabledImpl(Features, "sse", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     break;
   case CK_PentiumM:
   case CK_Pentium4:
   case CK_Pentium4M:
   case CK_x86_64:
     setFeatureEnabledImpl(Features, "sse2", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     break;
   case CK_Yonah:
   case CK_Prescott:
   case CK_Nocona:
     setFeatureEnabledImpl(Features, "sse3", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
   case CK_Core2:
   case CK_Bonnell:
     setFeatureEnabledImpl(Features, "ssse3", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
   case CK_Penryn:
     setFeatureEnabledImpl(Features, "sse4.1", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
   case CK_Skylake:
@@ -2540,6 +2594,8 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "avx512dq", true);
     setFeatureEnabledImpl(Features, "avx512bw", true);
     setFeatureEnabledImpl(Features, "avx512vl", true);
+    setFeatureEnabledImpl(Features, "xsavec", true);
+    setFeatureEnabledImpl(Features, "xsaves", true);
     // FALLTHROUGH
   case CK_Broadwell:
     setFeatureEnabledImpl(Features, "rdseed", true);
@@ -2560,6 +2616,8 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     // FALLTHROUGH
   case CK_SandyBridge:
     setFeatureEnabledImpl(Features, "avx", true);
+    setFeatureEnabledImpl(Features, "xsave", true);
+    setFeatureEnabledImpl(Features, "xsaveopt", true);
     // FALLTHROUGH
   case CK_Westmere:
   case CK_Silvermont:
@@ -2568,6 +2626,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     // FALLTHROUGH
   case CK_Nehalem:
     setFeatureEnabledImpl(Features, "sse4.2", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "cx16", true);
     break;
   case CK_KNL:
@@ -2575,6 +2634,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "avx512cd", true);
     setFeatureEnabledImpl(Features, "avx512er", true);
     setFeatureEnabledImpl(Features, "avx512pf", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     setFeatureEnabledImpl(Features, "rdseed", true);
     setFeatureEnabledImpl(Features, "adx", true);
     setFeatureEnabledImpl(Features, "lzcnt", true);
@@ -2588,6 +2648,8 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "aes", true);
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "cx16", true);
+    setFeatureEnabledImpl(Features, "xsaveopt", true);
+    setFeatureEnabledImpl(Features, "xsave", true);
     break;
   case CK_K6_2:
   case CK_K6_3:
@@ -2605,6 +2667,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
   case CK_AthlonMP:
     setFeatureEnabledImpl(Features, "sse", true);
     setFeatureEnabledImpl(Features, "3dnowa", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     break;
   case CK_K8:
   case CK_Opteron:
@@ -2612,6 +2675,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
   case CK_AthlonFX:
     setFeatureEnabledImpl(Features, "sse2", true);
     setFeatureEnabledImpl(Features, "3dnowa", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     break;
   case CK_AMDFAM10:
     setFeatureEnabledImpl(Features, "sse4a", true);
@@ -2623,6 +2687,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
   case CK_Athlon64SSE3:
     setFeatureEnabledImpl(Features, "sse3", true);
     setFeatureEnabledImpl(Features, "3dnowa", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
     break;
   case CK_BTVER2:
     setFeatureEnabledImpl(Features, "avx", true);
@@ -2630,6 +2695,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "bmi", true);
     setFeatureEnabledImpl(Features, "f16c", true);
+    setFeatureEnabledImpl(Features, "xsaveopt", true);
     // FALLTHROUGH
   case CK_BTVER1:
     setFeatureEnabledImpl(Features, "ssse3", true);
@@ -2638,6 +2704,8 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "popcnt", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "cx16", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
+    setFeatureEnabledImpl(Features, "xsave", true);
     break;
   case CK_BDVER4:
     setFeatureEnabledImpl(Features, "avx2", true);
@@ -2645,6 +2713,7 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     // FALLTHROUGH
   case CK_BDVER3:
     setFeatureEnabledImpl(Features, "fsgsbase", true);
+    setFeatureEnabledImpl(Features, "xsaveopt", true);
     // FALLTHROUGH
   case CK_BDVER2:
     setFeatureEnabledImpl(Features, "bmi", true);
@@ -2660,8 +2729,39 @@ void X86TargetInfo::getDefaultFeatures(llvm::StringMap<bool> &Features) const {
     setFeatureEnabledImpl(Features, "pclmul", true);
     setFeatureEnabledImpl(Features, "prfchw", true);
     setFeatureEnabledImpl(Features, "cx16", true);
+    setFeatureEnabledImpl(Features, "fxsr", true);
+    setFeatureEnabledImpl(Features, "xsave", true);
     break;
   }
+  if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec))
+    return false;
+
+  // Can't do this earlier because we need to be able to explicitly enable
+  // or disable these features and the things that they depend upon.
+
+  // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
+  auto I = Features.find("sse4.2");
+  if (I != Features.end() && I->getValue() &&
+      std::find(FeaturesVec.begin(), FeaturesVec.end(), "-popcnt") ==
+          FeaturesVec.end())
+    Features["popcnt"] = true;
+
+  // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
+  I = Features.find("3dnow");
+  if (I != Features.end() && I->getValue() &&
+      std::find(FeaturesVec.begin(), FeaturesVec.end(), "-prfchw") ==
+          FeaturesVec.end())
+    Features["prfchw"] = true;
+
+  // Additionally, if SSE is enabled and mmx is not explicitly disabled,
+  // then enable MMX.
+  I = Features.find("sse");
+  if (I != Features.end() && I->getValue() &&
+      std::find(FeaturesVec.begin(), FeaturesVec.end(), "-mmx") ==
+          FeaturesVec.end())
+    Features["mmx"] = true;
+
+  return true;
 }
 
 void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
@@ -2674,6 +2774,7 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
       Features["avx2"] = true;
     case AVX:
       Features["avx"] = true;
+      Features["xsave"] = true;
     case SSE42:
       Features["sse4.2"] = true;
     case SSE41:
@@ -2709,7 +2810,8 @@ void X86TargetInfo::setSSELevel(llvm::StringMap<bool> &Features,
   case SSE42:
     Features["sse4.2"] = false;
   case AVX:
-    Features["fma"] = Features["avx"] = Features["f16c"] = false;
+    Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] =
+      Features["xsaveopt"] = false;
     setXOPLevel(Features, FMA4, false);
   case AVX2:
     Features["avx2"] = false;
@@ -2842,6 +2944,16 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
       setSSELevel(Features, SSE42, Enabled);
     else
       setSSELevel(Features, SSE41, Enabled);
+  } else if (Name == "xsave") {
+    if (Enabled)
+      setSSELevel(Features, AVX, Enabled);
+    else
+      Features["xsaveopt"] = false;
+  } else if (Name == "xsaveopt" || Name == "xsavec" || Name == "xsaves") {
+    if (Enabled) {
+      Features["xsave"] = true;
+      setSSELevel(Features, AVX, Enabled);
+    }
   }
 }
 
@@ -2849,198 +2961,108 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap<bool> &Features,
 /// configured set of features.
 bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
                                          DiagnosticsEngine &Diags) {
-  // Remember the maximum enabled sselevel.
-  for (unsigned i = 0, e = Features.size(); i !=e; ++i) {
-    // Ignore disabled features.
-    if (Features[i][0] == '-')
+  for (const auto &Feature : Features) {
+    if (Feature[0] != '+')
       continue;
 
-    StringRef Feature = StringRef(Features[i]).substr(1);
-
-    if (Feature == "aes") {
+    if (Feature == "+aes") {
       HasAES = true;
-      continue;
-    }
-
-    if (Feature == "pclmul") {
+    } else if (Feature == "+pclmul") {
       HasPCLMUL = true;
-      continue;
-    }
-
-    if (Feature == "lzcnt") {
+    } else if (Feature == "+lzcnt") {
       HasLZCNT = true;
-      continue;
-    }
-
-    if (Feature == "rdrnd") {
+    } else if (Feature == "+rdrnd") {
       HasRDRND = true;
-      continue;
-    }
-
-    if (Feature == "fsgsbase") {
+    } else if (Feature == "+fsgsbase") {
       HasFSGSBASE = true;
-      continue;
-    }
-
-    if (Feature == "bmi") {
+    } else if (Feature == "+bmi") {
       HasBMI = true;
-      continue;
-    }
-
-    if (Feature == "bmi2") {
+    } else if (Feature == "+bmi2") {
       HasBMI2 = true;
-      continue;
-    }
-
-    if (Feature == "popcnt") {
+    } else if (Feature == "+popcnt") {
       HasPOPCNT = true;
-      continue;
-    }
-
-    if (Feature == "rtm") {
+    } else if (Feature == "+rtm") {
       HasRTM = true;
-      continue;
-    }
-
-    if (Feature == "prfchw") {
+    } else if (Feature == "+prfchw") {
       HasPRFCHW = true;
-      continue;
-    }
-
-    if (Feature == "rdseed") {
+    } else if (Feature == "+rdseed") {
       HasRDSEED = true;
-      continue;
-    }
-
-    if (Feature == "adx") {
+    } else if (Feature == "+adx") {
       HasADX = true;
-      continue;
-    }
-
-    if (Feature == "tbm") {
+    } else if (Feature == "+tbm") {
       HasTBM = true;
-      continue;
-    }
-
-    if (Feature == "fma") {
+    } else if (Feature == "+fma") {
       HasFMA = true;
-      continue;
-    }
-
-    if (Feature == "f16c") {
+    } else if (Feature == "+f16c") {
       HasF16C = true;
-      continue;
-    }
-
-    if (Feature == "avx512cd") {
+    } else if (Feature == "+avx512cd") {
       HasAVX512CD = true;
-      continue;
-    }
-
-    if (Feature == "avx512er") {
+    } else if (Feature == "+avx512er") {
       HasAVX512ER = true;
-      continue;
-    }
-
-    if (Feature == "avx512pf") {
+    } else if (Feature == "+avx512pf") {
       HasAVX512PF = true;
-      continue;
-    }
-
-    if (Feature == "avx512dq") {
+    } else if (Feature == "+avx512dq") {
       HasAVX512DQ = true;
-      continue;
-    }
-
-    if (Feature == "avx512bw") {
+    } else if (Feature == "+avx512bw") {
       HasAVX512BW = true;
-      continue;
-    }
-
-    if (Feature == "avx512vl") {
+    } else if (Feature == "+avx512vl") {
       HasAVX512VL = true;
-      continue;
-    }
-
-    if (Feature == "sha") {
+    } else if (Feature == "+sha") {
       HasSHA = true;
-      continue;
-    }
-
-    if (Feature == "cx16") {
+    } else if (Feature == "+cx16") {
       HasCX16 = true;
-      continue;
+    } else if (Feature == "+fxsr") {
+      HasFXSR = true;
+    } else if (Feature == "+xsave") {
+      HasXSAVE = true;
+    } else if (Feature == "+xsaveopt") {
+      HasXSAVEOPT = true;
+    } else if (Feature == "+xsavec") {
+      HasXSAVEC = true;
+    } else if (Feature == "+xsaves") {
+      HasXSAVES = true;
     }
 
-    assert(Features[i][0] == '+' && "Invalid target feature!");
     X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
-      .Case("avx512f", AVX512F)
-      .Case("avx2", AVX2)
-      .Case("avx", AVX)
-      .Case("sse4.2", SSE42)
-      .Case("sse4.1", SSE41)
-      .Case("ssse3", SSSE3)
-      .Case("sse3", SSE3)
-      .Case("sse2", SSE2)
-      .Case("sse", SSE1)
+      .Case("+avx512f", AVX512F)
+      .Case("+avx2", AVX2)
+      .Case("+avx", AVX)
+      .Case("+sse4.2", SSE42)
+      .Case("+sse4.1", SSE41)
+      .Case("+ssse3", SSSE3)
+      .Case("+sse3", SSE3)
+      .Case("+sse2", SSE2)
+      .Case("+sse", SSE1)
       .Default(NoSSE);
     SSELevel = std::max(SSELevel, Level);
 
     MMX3DNowEnum ThreeDNowLevel =
       llvm::StringSwitch<MMX3DNowEnum>(Feature)
-        .Case("3dnowa", AMD3DNowAthlon)
-        .Case("3dnow", AMD3DNow)
-        .Case("mmx", MMX)
+        .Case("+3dnowa", AMD3DNowAthlon)
+        .Case("+3dnow", AMD3DNow)
+        .Case("+mmx", MMX)
         .Default(NoMMX3DNow);
     MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel);
 
     XOPEnum XLevel = llvm::StringSwitch<XOPEnum>(Feature)
-        .Case("xop", XOP)
-        .Case("fma4", FMA4)
-        .Case("sse4a", SSE4A)
+        .Case("+xop", XOP)
+        .Case("+fma4", FMA4)
+        .Case("+sse4a", SSE4A)
         .Default(NoXOP);
     XOPLevel = std::max(XOPLevel, XLevel);
   }
 
-  // Enable popcnt if sse4.2 is enabled and popcnt is not explicitly disabled.
-  // Can't do this earlier because we need to be able to explicitly enable
-  // popcnt and still disable sse4.2.
-  if (!HasPOPCNT && SSELevel >= SSE42 &&
-      std::find(Features.begin(), Features.end(), "-popcnt") == Features.end()){
-    HasPOPCNT = true;
-    Features.push_back("+popcnt");
-  }
-
-  // Enable prfchw if 3DNow! is enabled and prfchw is not explicitly disabled.
-  if (!HasPRFCHW && MMX3DNowLevel >= AMD3DNow &&
-      std::find(Features.begin(), Features.end(), "-prfchw") == Features.end()){
-    HasPRFCHW = true;
-    Features.push_back("+prfchw");
-  }
-
   // LLVM doesn't have a separate switch for fpmath, so only accept it if it
   // matches the selected sse level.
-  if (FPMath == FP_SSE && SSELevel < SSE1) {
-    Diags.Report(diag::err_target_unsupported_fpmath) << "sse";
-    return false;
-  } else if (FPMath == FP_387 && SSELevel >= SSE1) {
-    Diags.Report(diag::err_target_unsupported_fpmath) << "387";
+  if ((FPMath == FP_SSE && SSELevel < SSE1) ||
+      (FPMath == FP_387 && SSELevel >= SSE1)) {
+    Diags.Report(diag::err_target_unsupported_fpmath) <<
+      (FPMath == FP_SSE ? "sse" : "387");
     return false;
   }
 
-  // Don't tell the backend if we're turning off mmx; it will end up disabling
-  // SSE, which we don't want.
-  // Additionally, if SSE is enabled and mmx is not explicitly disabled,
-  // then enable MMX.
-  std::vector<std::string>::iterator it;
-  it = std::find(Features.begin(), Features.end(), "-mmx");
-  if (it != Features.end())
-    Features.erase(it);
-  else if (SSELevel > NoSSE)
-    MMX3DNowLevel = std::max(MMX3DNowLevel, MMX);
-
   SimdDefaultAlign =
-      (getABI() == "avx512") ? 512 : (getABI() == "avx") ? 256 : 128;
+      hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
   return true;
 }
 
@@ -3290,6 +3312,17 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   if (HasSHA)
     Builder.defineMacro("__SHA__");
 
+  if (HasFXSR)
+    Builder.defineMacro("__FXSR__");
+  if (HasXSAVE)
+    Builder.defineMacro("__XSAVE__");
+  if (HasXSAVEOPT)
+    Builder.defineMacro("__XSAVEOPT__");
+  if (HasXSAVEC)
+    Builder.defineMacro("__XSAVEC__");
+  if (HasXSAVES)
+    Builder.defineMacro("__XSAVES__");
+
   if (HasCX16)
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
 
@@ -3379,6 +3412,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("fma", HasFMA)
       .Case("fma4", XOPLevel >= FMA4)
       .Case("fsgsbase", HasFSGSBASE)
+      .Case("fxsr", HasFXSR)
       .Case("lzcnt", HasLZCNT)
       .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow)
       .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon)
@@ -3402,6 +3436,10 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("x86_32", getTriple().getArch() == llvm::Triple::x86)
       .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64)
       .Case("xop", XOPLevel >= XOP)
+      .Case("xsave", HasXSAVE)
+      .Case("xsavec", HasXSAVEC)
+      .Case("xsaves", HasXSAVES)
+      .Case("xsaveopt", HasXSAVEOPT)
       .Default(false);
 }
 
@@ -3437,6 +3475,14 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
                                      TargetInfo::ConstraintInfo &Info) const {
   switch (*Name) {
   default: return false;
+  // Constant constraints.
+  case 'e': // 32-bit signed integer constant for use with sign-extending x86_64
+            // instructions.
+  case 'Z': // 32-bit unsigned integer constant for use with zero-extending
+            // x86_64 instructions.
+  case 's':
+    Info.setRequiresImmediate();
+    return true;
   case 'I':
     Info.setRequiresImmediate(0, 31);
     return true;
@@ -3447,8 +3493,7 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
     Info.setRequiresImmediate(-128, 127);
     return true;
   case 'L':
-    // FIXME: properly analyze this constraint:
-    //  must be one of 0xff, 0xffff, or 0xffffffff
+    Info.setRequiresImmediate({ int(0xff), int(0xffff), int(0xffffffff) });
     return true;
   case 'M':
     Info.setRequiresImmediate(0, 3);
@@ -3459,20 +3504,24 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
   case 'O':
     Info.setRequiresImmediate(0, 127);
     return true;
-  case 'Y': // first letter of a pair:
-    switch (*(Name+1)) {
-    default: return false;
-    case '0':  // First SSE register.
-    case 't':  // Any SSE register, when SSE2 is enabled.
-    case 'i':  // Any SSE register, when SSE2 and inter-unit moves enabled.
-    case 'm':  // any MMX register, when inter-unit moves enabled.
-      break;   // falls through to setAllowsRegister.
-  }
-  case 'f': // any x87 floating point stack register.
+  // Register constraints.
+  case 'Y': // 'Y' is the first character for several 2-character constraints.
+    // Shift the pointer to the second character of the constraint.
+    Name++;
+    switch (*Name) {
+    default:
+      return false;
+    case '0': // First SSE register.
+    case 't': // Any SSE register, when SSE2 is enabled.
+    case 'i': // Any SSE register, when SSE2 and inter-unit moves enabled.
+    case 'm': // Any MMX register, when inter-unit moves enabled.
+      Info.setAllowsRegister();
+      return true;
+    }
+  case 'f': // Any x87 floating point stack register.
     // Constraint 'f' cannot be used for output operands.
     if (Info.ConstraintStr[0] == '=')
       return false;
-
     Info.setAllowsRegister();
     return true;
   case 'a': // eax.
@@ -3482,8 +3531,8 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
   case 'S': // esi.
   case 'D': // edi.
   case 'A': // edx:eax.
-  case 't': // top of floating point stack.
-  case 'u': // second from top of floating point stack.
+  case 't': // Top of floating point stack.
+  case 'u': // Second from top of floating point stack.
   case 'q': // Any register accessible as [r]l: a, b, c, and d.
   case 'y': // Any MMX register.
   case 'x': // Any SSE register.
@@ -3493,12 +3542,9 @@ X86TargetInfo::validateAsmConstraint(const char *&Name,
             // index in a base+index memory access.
     Info.setAllowsRegister();
     return true;
+  // Floating point constant constraints.
   case 'C': // SSE floating point constant.
   case 'G': // x87 floating point constant.
-  case 'e': // 32-bit signed integer constant for use with zero-extending
-            // x86_64 instructions.
-  case 'Z': // 32-bit unsigned integer constant for use with zero-extending
-            // x86_64 instructions.
     return true;
   }
 }
@@ -3530,8 +3576,30 @@ bool X86TargetInfo::validateOperandSize(StringRef Constraint,
   case 'u':
     return Size <= 128;
   case 'x':
-    // 256-bit ymm registers can be used if target supports AVX.
-    return Size <= (SSELevel >= AVX ? 256U : 128U);
+    if (SSELevel >= AVX512F)
+      // 512-bit zmm registers can be used if target supports AVX512F.
+      return Size <= 512U;
+    else if (SSELevel >= AVX)
+      // 256-bit ymm registers can be used if target supports AVX.
+      return Size <= 256U;
+    return Size <= 128U;
+  case 'Y':
+    // 'Y' is the first character for several 2-character constraints.
+    switch (Constraint[1]) {
+    default: break;
+    case 'm':
+      // 'Ym' is synonymous with 'y'.
+      return Size <= 64;
+    case 'i':
+    case 't':
+      // 'Yi' and 'Yt' are synonymous with 'x' when SSE2 is enabled.
+      if (SSELevel >= AVX512F)
+        return Size <= 512U;
+      else if (SSELevel >= AVX)
+        return Size <= 256U;
+      return SSELevel >= SSE2 && Size <= 128U;
+    }
+
   }
 
   return true;
@@ -3565,7 +3633,7 @@ public:
     LongDoubleWidth = 96;
     LongDoubleAlign = 32;
     SuitableAlign = 128;
-    DescriptionString = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128";
+    DataLayoutString = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128";
     SizeType = UnsignedInt;
     PtrDiffType = SignedInt;
     IntPtrType = SignedInt;
@@ -3656,12 +3724,26 @@ public:
     LongDoubleAlign = 128;
     SuitableAlign = 128;
     MaxVectorAlign = 256;
+    // The watchOS simulator uses the builtin bool type for Objective-C.
+    llvm::Triple T = llvm::Triple(Triple);
+    if (T.isWatchOS())
+      UseSignedCharForObjCBool = false;
     SizeType = UnsignedLong;
     IntPtrType = SignedLong;
-    DescriptionString = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128";
+    DataLayoutString = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128";
     HasAlignMac68kSupport = true;
   }
 
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) override {
+    if (!DarwinTargetInfo<X86_32TargetInfo>::handleTargetFeatures(Features,
+                                                                  Diags))
+      return false;
+    // We now know the features we have: we can decide how to align vectors.
+    MaxVectorAlign =
+        hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
+    return true;
+  }
 };
 
 // x86-32 Windows target
@@ -3673,9 +3755,9 @@ public:
     DoubleAlign = LongLongAlign = 64;
     bool IsWinCOFF =
         getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
-    DescriptionString = IsWinCOFF
-                            ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-                            : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
+    DataLayoutString = IsWinCOFF
+                           ? "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+                           : "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3701,12 +3783,11 @@ public:
     Builder.defineMacro("_M_IX86", "600");
   }
 };
-} // end anonymous namespace
 
 static void addCygMingDefines(const LangOptions &Opts, MacroBuilder &Builder) {
-  // Mingw and cygwin define __declspec(a) to __attribute__((a)).  Clang supports
-  // __declspec natively under -fms-extensions, but we define a no-op __declspec
-  // macro anyway for pre-processor compatibility.
+  // Mingw and cygwin define __declspec(a) to __attribute__((a)).  Clang
+  // supports __declspec natively under -fms-extensions, but we define a no-op
+  // __declspec macro anyway for pre-processor compatibility.
   if (Opts.MicrosoftExt)
     Builder.defineMacro("__declspec", "__declspec");
   else
@@ -3733,7 +3814,6 @@ static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) {
   addCygMingDefines(Opts, Builder);
 }
 
-namespace {
 // x86-32 MinGW target
 class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo {
 public:
@@ -3754,10 +3834,9 @@ class CygwinX86_32TargetInfo : public X86_32TargetInfo {
 public:
   CygwinX86_32TargetInfo(const llvm::Triple &Triple)
       : X86_32TargetInfo(Triple) {
-    TLSSupported = false;
     WCharType = UnsignedShort;
     DoubleAlign = LongLongAlign = 64;
-    DescriptionString = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
+    DataLayoutString = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
@@ -3791,6 +3870,27 @@ public:
   }
 };
 
+// X86-32 MCU target
+class MCUX86_32TargetInfo : public X86_32TargetInfo {
+public:
+  MCUX86_32TargetInfo(const llvm::Triple &Triple) : X86_32TargetInfo(Triple) {
+    LongDoubleWidth = 64;
+    LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+  }
+
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    // On MCU we support only C calling convention.
+    return CC == CC_C ? CCCR_OK : CCCR_Warning;
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    X86_32TargetInfo::getTargetDefines(Opts, Builder);
+    Builder.defineMacro("__iamcu");
+    Builder.defineMacro("__iamcu__");
+  }
+};
+
 // RTEMS Target
 template<typename Target>
 class RTEMSTargetInfo : public OSTargetInfo<Target> {
@@ -3864,10 +3964,10 @@ public:
     RegParmMax = 6;
 
     // Pointers are 32-bit in x32.
-    DescriptionString = IsX32 ? "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
-                              : IsWinCOFF
-                                    ? "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
-                                    : "e-m:e-i64:64-f80:128-n8:16:32:64-S128";
+    DataLayoutString = IsX32 ? "e-m:e-p:32:32-i64:64-f80:128-n8:16:32:64-S128"
+                             : IsWinCOFF
+                                   ? "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+                                   : "e-m:e-i64:64-f80:128-n8:16:32:64-S128";
 
     // Use fpret only for long double.
     RealTypeUsesObjCFPRet = (1 << TargetInfo::LongDouble);
@@ -3875,6 +3975,9 @@ public:
     // Use fp2ret for _Complex long double.
     ComplexLongDoubleUsesFP2Ret = true;
 
+    // Make __builtin_ms_va_list available.
+    HasBuiltinMSVaList = true;
+
     // x86-64 has atomics up to 16 bytes.
     MaxAtomicPromoteWidth = 128;
     MaxAtomicInlineWidth = 128;
@@ -3902,6 +4005,22 @@ public:
 
   // for x32 we need it here explicitly
   bool hasInt128Type() const override { return true; }
+
+  bool validateGlobalRegisterVariable(StringRef RegName,
+                                      unsigned RegSize,
+                                      bool &HasSizeMismatch) const override {
+    // rsp and rbp are the only 64-bit registers the x86 backend can currently
+    // handle.
+    if (RegName.equals("rsp") || RegName.equals("rbp")) {
+      // Check that the register size is 64-bit.
+      HasSizeMismatch = RegSize != 64;
+      return true;
+    }
+
+    // Check if the register is a 32-bit register the backend can handle.
+    return X86TargetInfo::validateGlobalRegisterVariable(RegName, RegSize,
+                                                         HasSizeMismatch);
+  }
 };
 
 // x86-64 Windows target
@@ -3959,8 +4078,8 @@ public:
                         MacroBuilder &Builder) const override {
     WindowsX86_64TargetInfo::getTargetDefines(Opts, Builder);
     WindowsX86_64TargetInfo::getVisualStudioDefines(Opts, Builder);
-    Builder.defineMacro("_M_X64");
-    Builder.defineMacro("_M_AMD64");
+    Builder.defineMacro("_M_X64", "100");
+    Builder.defineMacro("_M_AMD64", "100");
   }
 };
 
@@ -3988,17 +4107,52 @@ public:
   }
 };
 
+// x86-64 Cygwin target
+class CygwinX86_64TargetInfo : public X86_64TargetInfo {
+public:
+  CygwinX86_64TargetInfo(const llvm::Triple &Triple)
+      : X86_64TargetInfo(Triple) {
+    TLSSupported = false;
+    WCharType = UnsignedShort;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    X86_64TargetInfo::getTargetDefines(Opts, Builder);
+    Builder.defineMacro("__x86_64__");
+    Builder.defineMacro("__CYGWIN__");
+    Builder.defineMacro("__CYGWIN64__");
+    addCygMingDefines(Opts, Builder);
+    DefineStd(Builder, "unix", Opts);
+    if (Opts.CPlusPlus)
+      Builder.defineMacro("_GNU_SOURCE");
+
+    // GCC defines this macro when it is using __gxx_personality_seh0.
+    if (!Opts.SjLjExceptions)
+      Builder.defineMacro("__SEH__");
+  }
+};
+
 class DarwinX86_64TargetInfo : public DarwinTargetInfo<X86_64TargetInfo> {
 public:
   DarwinX86_64TargetInfo(const llvm::Triple &Triple)
       : DarwinTargetInfo<X86_64TargetInfo>(Triple) {
     Int64Type = SignedLongLong;
-    MaxVectorAlign = 256;
     // The 64-bit iOS simulator uses the builtin bool type for Objective-C.
     llvm::Triple T = llvm::Triple(Triple);
     if (T.isiOS())
       UseSignedCharForObjCBool = false;
-    DescriptionString = "e-m:o-i64:64-f80:128-n8:16:32:64-S128";
+    DataLayoutString = "e-m:o-i64:64-f80:128-n8:16:32:64-S128";
+  }
+
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) override {
+    if (!DarwinTargetInfo<X86_64TargetInfo>::handleTargetFeatures(Features,
+                                                                  Diags))
+      return false;
+    // We now know the features we have: we can decide how to align vectors.
+    MaxVectorAlign =
+        hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128;
+    return true;
   }
 };
 
@@ -4045,16 +4199,23 @@ class ARMTargetInfo : public TargetInfo {
 
   std::string ABI, CPU;
 
+  StringRef CPUProfile;
+  StringRef CPUAttr;
+
   enum {
     FP_Default,
     FP_VFP,
     FP_Neon
   } FPMath;
 
+  unsigned ArchISA;
+  unsigned ArchKind = llvm::ARM::AK_ARMV4T;
+  unsigned ArchProfile;
+  unsigned ArchVersion;
+
   unsigned FPU : 5;
 
   unsigned IsAAPCS : 1;
-  unsigned IsThumb : 1;
   unsigned HWDiv : 2;
 
   // Initialized via features.
@@ -4063,6 +4224,17 @@ class ARMTargetInfo : public TargetInfo {
 
   unsigned CRC : 1;
   unsigned Crypto : 1;
+  unsigned DSP : 1;
+  unsigned Unaligned : 1;
+
+  enum {
+    LDREX_B = (1 << 0), /// byte (8-bit)
+    LDREX_H = (1 << 1), /// half (16-bit)
+    LDREX_W = (1 << 2), /// word (32-bit)
+    LDREX_D = (1 << 3), /// double (64-bit)
+  };
+
+  uint32_t LDREX;
 
   // ACLE 6.5.1 Hardware floating point
   enum {
@@ -4074,37 +4246,6 @@ class ARMTargetInfo : public TargetInfo {
 
   static const Builtin::Info BuiltinInfo[];
 
-  static bool shouldUseInlineAtomic(const llvm::Triple &T) {
-    StringRef ArchName = T.getArchName();
-    if (T.getArch() == llvm::Triple::arm ||
-        T.getArch() == llvm::Triple::armeb) {
-      StringRef VersionStr;
-      if (ArchName.startswith("armv"))
-        VersionStr = ArchName.substr(4, 1);
-      else if (ArchName.startswith("armebv"))
-        VersionStr = ArchName.substr(6, 1);
-      else
-        return false;
-      unsigned Version;
-      if (VersionStr.getAsInteger(10, Version))
-        return false;
-      return Version >= 6;
-    }
-    assert(T.getArch() == llvm::Triple::thumb ||
-           T.getArch() == llvm::Triple::thumbeb);
-    StringRef VersionStr;
-    if (ArchName.startswith("thumbv"))
-      VersionStr = ArchName.substr(6, 1);
-    else if (ArchName.startswith("thumbebv"))
-      VersionStr = ArchName.substr(8, 1);
-    else
-      return false;
-    unsigned Version;
-    if (VersionStr.getAsInteger(10, Version))
-      return false;
-    return Version >= 7;
-  }
-
   void setABIAAPCS() {
     IsAAPCS = true;
 
@@ -4139,24 +4280,24 @@ class ARMTargetInfo : public TargetInfo {
     // Thumb1 add sp, #imm requires the immediate value be multiple of 4,
     // so set preferred for small types to 32.
     if (T.isOSBinFormatMachO()) {
-      DescriptionString =
+      DataLayoutString =
           BigEndian ? "E-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
                     : "e-m:o-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
     } else if (T.isOSWindows()) {
       assert(!BigEndian && "Windows on ARM does not support big endian");
-      DescriptionString = "e"
-                          "-m:w"
-                          "-p:32:32"
-                          "-i64:64"
-                          "-v128:64:128"
-                          "-a:0:32"
-                          "-n32"
-                          "-S64";
+      DataLayoutString = "e"
+                         "-m:w"
+                         "-p:32:32"
+                         "-i64:64"
+                         "-v128:64:128"
+                         "-a:0:32"
+                         "-n32"
+                         "-S64";
     } else if (T.isOSNaCl()) {
       assert(!BigEndian && "NaCl on ARM does not support big endian");
-      DescriptionString = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S128";
+      DataLayoutString = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S128";
     } else {
-      DescriptionString =
+      DataLayoutString =
           BigEndian ? "E-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
                     : "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
     }
@@ -4164,12 +4305,15 @@ class ARMTargetInfo : public TargetInfo {
     // FIXME: Enumerated types are variable width in straight AAPCS.
   }
 
-  void setABIAPCS() {
+  void setABIAPCS(bool IsAAPCS16) {
     const llvm::Triple &T = getTriple();
 
     IsAAPCS = false;
 
-    DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
+    if (IsAAPCS16)
+      DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64;
+    else
+      DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32;
 
     // size_t is unsigned int on FreeBSD.
     if (T.getOS() == llvm::Triple::FreeBSD)
@@ -4189,13 +4333,16 @@ class ARMTargetInfo : public TargetInfo {
     /// gcc.
     ZeroLengthBitfieldBoundary = 32;
 
-    if (T.isOSBinFormatMachO())
-      DescriptionString =
+    if (T.isOSBinFormatMachO() && IsAAPCS16) {
+      assert(!BigEndian && "AAPCS16 does not support big-endian");
+      DataLayoutString = "e-m:o-p:32:32-i64:64-a:0:32-n32-S128";
+    } else if (T.isOSBinFormatMachO())
+      DataLayoutString =
           BigEndian
               ? "E-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
               : "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32";
     else
-      DescriptionString =
+      DataLayoutString =
           BigEndian
               ? "E-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
               : "e-m:e-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32";
@@ -4203,10 +4350,104 @@ class ARMTargetInfo : public TargetInfo {
     // FIXME: Override "preferred align" for double and long long.
   }
 
+  void setArchInfo() {
+    StringRef ArchName = getTriple().getArchName();
+
+    ArchISA     = llvm::ARM::parseArchISA(ArchName);
+    CPU         = llvm::ARM::getDefaultCPU(ArchName);
+    unsigned AK = llvm::ARM::parseArch(ArchName);
+    if (AK != llvm::ARM::AK_INVALID)
+      ArchKind = AK;
+    setArchInfo(ArchKind);
+  }
+
+  void setArchInfo(unsigned Kind) {
+    StringRef SubArch;
+
+    // cache TargetParser info
+    ArchKind    = Kind;
+    SubArch     = llvm::ARM::getSubArch(ArchKind);
+    ArchProfile = llvm::ARM::parseArchProfile(SubArch);
+    ArchVersion = llvm::ARM::parseArchVersion(SubArch);
+
+    // cache CPU related strings
+    CPUAttr    = getCPUAttr();
+    CPUProfile = getCPUProfile();
+  }
+
+  void setAtomic() {
+    // when triple does not specify a sub arch,
+    // then we are not using inline atomics
+    bool ShouldUseInlineAtomic =
+                   (ArchISA == llvm::ARM::IK_ARM   && ArchVersion >= 6) ||
+                   (ArchISA == llvm::ARM::IK_THUMB && ArchVersion >= 7);
+    // Cortex M does not support 8 byte atomics, while general Thumb2 does.
+    if (ArchProfile == llvm::ARM::PK_M) {
+      MaxAtomicPromoteWidth = 32;
+      if (ShouldUseInlineAtomic)
+        MaxAtomicInlineWidth = 32;
+    }
+    else {
+      MaxAtomicPromoteWidth = 64;
+      if (ShouldUseInlineAtomic)
+        MaxAtomicInlineWidth = 64;
+    }
+  }
+
+  bool isThumb() const {
+    return (ArchISA == llvm::ARM::IK_THUMB);
+  }
+
+  bool supportsThumb() const {
+    return CPUAttr.count('T') || ArchVersion >= 6;
+  }
+
+  bool supportsThumb2() const {
+    return CPUAttr.equals("6T2") || ArchVersion >= 7;
+  }
+
+  StringRef getCPUAttr() const {
+    // For most sub-arches, the build attribute CPU name is enough.
+    // For Cortex variants, it's slightly different.
+    switch(ArchKind) {
+    default:
+      return llvm::ARM::getCPUAttr(ArchKind);
+    case llvm::ARM::AK_ARMV6M:
+      return "6M";
+    case llvm::ARM::AK_ARMV7S:
+      return "7S";
+    case llvm::ARM::AK_ARMV7A:
+      return "7A";
+    case llvm::ARM::AK_ARMV7R:
+      return "7R";
+    case llvm::ARM::AK_ARMV7M:
+      return "7M";
+    case llvm::ARM::AK_ARMV7EM:
+      return "7EM";
+    case llvm::ARM::AK_ARMV8A:
+      return "8A";
+    case llvm::ARM::AK_ARMV8_1A:
+      return "8_1A";
+    }
+  }
+
+  StringRef getCPUProfile() const {
+    switch(ArchProfile) {
+    case llvm::ARM::PK_A:
+      return "A";
+    case llvm::ARM::PK_R:
+      return "R";
+    case llvm::ARM::PK_M:
+      return "M";
+    default:
+      return "";
+    }
+  }
+
 public:
   ARMTargetInfo(const llvm::Triple &Triple, bool IsBigEndian)
-      : TargetInfo(Triple), CPU("arm1136j-s"), FPMath(FP_Default),
-        IsAAPCS(true), HW_FP(0) {
+      : TargetInfo(Triple), FPMath(FP_Default),
+        IsAAPCS(true), LDREX(0), HW_FP(0) {
     BigEndian = IsBigEndian;
 
     switch (getTriple().getOS()) {
@@ -4218,13 +4459,13 @@ public:
       break;
     }
 
+    // Cache arch related info.
+    setArchInfo();
+
     // {} in inline assembly are neon specifiers, not assembly variant
     // specifiers.
     NoAsmVariants = true;
 
-    // FIXME: Should we just treat this as a feature?
-    IsThumb = getTriple().getArchName().startswith("thumb");
-
     // FIXME: This duplicates code from the driver that sets the -target-abi
     // option - this code is used if -target-abi isn't passed and should
     // be unified in some way.
@@ -4235,6 +4476,8 @@ public:
           Triple.getOS() == llvm::Triple::UnknownOS ||
           StringRef(CPU).startswith("cortex-m")) {
         setABI("aapcs");
+      } else if (Triple.isWatchOS()) {
+        setABI("aapcs16");
       } else {
         setABI("apcs-gnu");
       }
@@ -4254,8 +4497,8 @@ public:
         setABI("aapcs");
         break;
       case llvm::Triple::GNU:
-	setABI("apcs-gnu");
-	break;
+        setABI("apcs-gnu");
+      break;
       default:
         if (Triple.getOS() == llvm::Triple::NetBSD)
           setABI("apcs-gnu");
@@ -4269,9 +4512,7 @@ public:
     TheCXXABI.set(TargetCXXABI::GenericARM);
 
     // ARM has atomics up to 8 bytes
-    MaxAtomicPromoteWidth = 64;
-    if (shouldUseInlineAtomic(getTriple()))
-      MaxAtomicInlineWidth = 64;
+    setAtomic();
 
     // Do force alignment of members that follow zero length bitfields.  If
     // the alignment of the zero-length bitfield is greater than the member
@@ -4289,8 +4530,8 @@ public:
     //
     // FIXME: We need support for -meabi... we could just mangle it into the
     // name.
-    if (Name == "apcs-gnu") {
-      setABIAPCS();
+    if (Name == "apcs-gnu" || Name == "aapcs16") {
+      setABIAPCS(Name == "aapcs16");
       return true;
     }
     if (Name == "aapcs" || Name == "aapcs-vfp" || Name == "aapcs-linux") {
@@ -4301,43 +4542,27 @@ public:
   }
 
   // FIXME: This should be based on Arch attributes, not CPU names.
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
-    StringRef ArchName = getTriple().getArchName();
-    unsigned ArchKind = llvm::ARMTargetParser::parseArch(ArchName);
-    bool IsV8 = (ArchKind == llvm::ARM::AK_ARMV8A ||
-                 ArchKind == llvm::ARM::AK_ARMV8_1A);
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override {
 
-    if (CPU == "arm1136jf-s" || CPU == "arm1176jzf-s" || CPU == "mpcore")
-      Features["vfp2"] = true;
-    else if (CPU == "cortex-a8" || CPU == "cortex-a9") {
-      Features["vfp3"] = true;
-      Features["neon"] = true;
-    }
-    else if (CPU == "cortex-a5") {
-      Features["vfp4"] = true;
-      Features["neon"] = true;
-    } else if (CPU == "swift" || CPU == "cortex-a7" ||
-               CPU == "cortex-a12" || CPU == "cortex-a15" ||
-               CPU == "cortex-a17" || CPU == "krait") {
-      Features["vfp4"] = true;
-      Features["neon"] = true;
-      Features["hwdiv"] = true;
-      Features["hwdiv-arm"] = true;
-    } else if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" ||
-               CPU == "cortex-a72") {
-      Features["fp-armv8"] = true;
-      Features["neon"] = true;
-      Features["hwdiv"] = true;
-      Features["hwdiv-arm"] = true;
-      Features["crc"] = true;
-      Features["crypto"] = true;
-    } else if (CPU == "cortex-r5" || CPU == "cortex-r7" || IsV8) {
-      Features["hwdiv"] = true;
-      Features["hwdiv-arm"] = true;
-    } else if (CPU == "cortex-m3" || CPU == "cortex-m4" || CPU == "cortex-m7" ||
-               CPU == "sc300" || CPU == "cortex-r4" || CPU == "cortex-r4f") {
-      Features["hwdiv"] = true;
-    }
+    std::vector<const char*> TargetFeatures;
+    unsigned Arch = llvm::ARM::parseArch(getTriple().getArchName());
+
+    // get default FPU features
+    unsigned FPUKind = llvm::ARM::getDefaultFPU(CPU, Arch);
+    llvm::ARM::getFPUFeatures(FPUKind, TargetFeatures);
+
+    // get default Extension features
+    unsigned Extensions = llvm::ARM::getDefaultExtensions(CPU, Arch);
+    llvm::ARM::getExtensionFeatures(Extensions, TargetFeatures);
+
+    for (const char *Feature : TargetFeatures)
+      if (Feature[0] == '+')
+        Features[Feature+1] = true;
+
+    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
@@ -4345,6 +4570,8 @@ public:
     FPU = 0;
     CRC = 0;
     Crypto = 0;
+    DSP = 0;
+    Unaligned = 1;
     SoftFloat = SoftFloatABI = false;
     HWDiv = 0;
 
@@ -4379,12 +4606,37 @@ public:
         CRC = 1;
       } else if (Feature == "+crypto") {
         Crypto = 1;
+      } else if (Feature == "+dsp") {
+        DSP = 1;
       } else if (Feature == "+fp-only-sp") {
-        HW_FP_remove |= HW_FP_DP | HW_FP_HP;
+        HW_FP_remove |= HW_FP_DP; 
+      } else if (Feature == "+strict-align") {
+        Unaligned = 0;
+      } else if (Feature == "+fp16") {
+        HW_FP |= HW_FP_HP;
       }
     }
     HW_FP &= ~HW_FP_remove;
 
+    switch (ArchVersion) {
+    case 6:
+      if (ArchProfile == llvm::ARM::PK_M)
+        LDREX = 0;
+      else if (ArchKind == llvm::ARM::AK_ARMV6K)
+        LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B ;
+      else
+        LDREX = LDREX_W;
+      break;
+    case 7:
+      if (ArchProfile == llvm::ARM::PK_M)
+        LDREX = LDREX_W | LDREX_H | LDREX_B ;
+      else
+        LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B ;
+      break;
+    case 8:
+      LDREX = LDREX_D | LDREX_W | LDREX_H | LDREX_B ;
+    }
+
     if (!(FPU & NeonFPU) && FPMath == FP_Neon) {
       Diags.Report(diag::err_target_unsupported_fpmath) << "neon";
       return false;
@@ -4407,107 +4659,28 @@ public:
   bool hasFeature(StringRef Feature) const override {
     return llvm::StringSwitch<bool>(Feature)
         .Case("arm", true)
+        .Case("aarch32", true)
         .Case("softfloat", SoftFloat)
-        .Case("thumb", IsThumb)
+        .Case("thumb", isThumb())
         .Case("neon", (FPU & NeonFPU) && !SoftFloat)
         .Case("hwdiv", HWDiv & HWDivThumb)
         .Case("hwdiv-arm", HWDiv & HWDivARM)
         .Default(false);
   }
-  const char *getCPUDefineSuffix(StringRef Name) const {
-    if(Name == "generic") {
-      auto subarch = getTriple().getSubArch();
-      switch (subarch) {
-        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
-          return "8_1A";
-        default:
-          break;
-      }
-    }
 
-    unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(Name);
-    if (ArchKind == llvm::ARM::AK_INVALID)
-      return "";
-
-    // For most sub-arches, the build attribute CPU name is enough.
-    // For Cortex variants, it's slightly different.
-    switch(ArchKind) {
-    default:
-      return llvm::ARMTargetParser::getCPUAttr(ArchKind);
-    case llvm::ARM::AK_ARMV6M:
-    case llvm::ARM::AK_ARMV6SM:
-      return "6M";
-    case llvm::ARM::AK_ARMV7:
-    case llvm::ARM::AK_ARMV7A:
-    case llvm::ARM::AK_ARMV7S:
-      return "7A";
-    case llvm::ARM::AK_ARMV7R:
-      return "7R";
-    case llvm::ARM::AK_ARMV7M:
-      return "7M";
-    case llvm::ARM::AK_ARMV7EM:
-      return "7EM";
-    case llvm::ARM::AK_ARMV8A:
-      return "8A";
-    case llvm::ARM::AK_ARMV8_1A:
-      return "8_1A";
-    }
-  }
-  const char *getCPUProfile(StringRef Name) const {
-    if(Name == "generic") {
-      auto subarch = getTriple().getSubArch();
-      switch (subarch) {
-        case llvm::Triple::SubArchType::ARMSubArch_v8_1a: 
-          return "A";
-        default:
-          break;
-      }
-    }
-
-    unsigned CPUArch = llvm::ARMTargetParser::parseCPUArch(Name);
-    if (CPUArch == llvm::ARM::AK_INVALID)
-      return "";
-
-    StringRef ArchName = llvm::ARMTargetParser::getArchName(CPUArch);
-    switch(llvm::ARMTargetParser::parseArchProfile(ArchName)) {
-      case llvm::ARM::PK_A:
-        return "A";
-      case llvm::ARM::PK_R:
-        return "R";
-      case llvm::ARM::PK_M:
-        return "M";
-      default:
-        return "";
-    }
-  }
   bool setCPU(const std::string &Name) override {
-    if (!getCPUDefineSuffix(Name))
+    if (Name != "generic")
+      setArchInfo(llvm::ARM::parseCPUArch(Name));
+
+    if (ArchKind == llvm::ARM::AK_INVALID)
       return false;
-
-    // Cortex M does not support 8 byte atomics, while general Thumb2 does.
-    StringRef Profile = getCPUProfile(Name);
-    if (Profile == "M" && MaxAtomicInlineWidth) {
-      MaxAtomicPromoteWidth = 32;
-      MaxAtomicInlineWidth = 32;
-    }
-
+    setAtomic();
     CPU = Name;
     return true;
   }
+
   bool setFPMath(StringRef Name) override;
-  bool supportsThumb(StringRef ArchName, StringRef CPUArch,
-                     unsigned CPUArchVer) const {
-    return CPUArchVer >= 7 || (CPUArch.find('T') != StringRef::npos) ||
-           (CPUArch.find('M') != StringRef::npos);
-  }
-  bool supportsThumb2(StringRef ArchName, StringRef CPUArch,
-                      unsigned CPUArchVer) const {
-    // We check both CPUArchVer and ArchName because when only triple is
-    // specified, the default CPU is arm1136j-s.
-    return ArchName.endswith("v6t2") || ArchName.endswith("v7") ||
-           ArchName.endswith("v8.1a") ||
-           ArchName.endswith("v8") || CPUArch == "6T2" || CPUArchVer >= 7;
-  }
+
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     // Target identification.
@@ -4517,21 +4690,29 @@ public:
     // Target properties.
     Builder.defineMacro("__REGISTER_PREFIX__", "");
 
-    StringRef CPUArch = getCPUDefineSuffix(CPU);
-    unsigned int CPUArchVer;
-    if (CPUArch.substr(0, 1).getAsInteger<unsigned int>(10, CPUArchVer))
-      llvm_unreachable("Invalid char for architecture version number");
-    Builder.defineMacro("__ARM_ARCH_" + CPUArch + "__");
+    // Unfortunately, __ARM_ARCH_7K__ is now more of an ABI descriptor. The CPU
+    // happens to be Cortex-A7 though, so it should still get __ARM_ARCH_7A__.
+    if (getTriple().isWatchOS())
+      Builder.defineMacro("__ARM_ARCH_7K__", "2");
+
+    if (!CPUAttr.empty())
+      Builder.defineMacro("__ARM_ARCH_" + CPUAttr + "__");
 
     // ACLE 6.4.1 ARM/Thumb instruction set architecture
-    StringRef CPUProfile = getCPUProfile(CPU);
-    StringRef ArchName = getTriple().getArchName();
-
     // __ARM_ARCH is defined as an integer value indicating the current ARM ISA
-    Builder.defineMacro("__ARM_ARCH", CPUArch.substr(0, 1));
-    if (CPUArch[0] >= '8') {
-      Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN");
-      Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING");
+    Builder.defineMacro("__ARM_ARCH", Twine(ArchVersion));
+
+    if (ArchVersion >= 8) {
+      // ACLE 6.5.7 Crypto Extension
+      if (Crypto)
+        Builder.defineMacro("__ARM_FEATURE_CRYPTO", "1");
+      // ACLE 6.5.8 CRC32 Extension
+      if (CRC)
+        Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
+      // ACLE 6.5.10 Numeric Maximum and Minimum
+      Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN", "1");
+      // ACLE 6.5.9 Directed Rounding
+      Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING", "1");
     }
 
     // __ARM_ARCH_ISA_ARM is defined to 1 if the core supports the ARM ISA.  It
@@ -4543,9 +4724,9 @@ public:
     // __ARM_ARCH_ISA_THUMB is defined to 1 if the core supporst the original
     // Thumb ISA (including v6-M).  It is set to 2 if the core supports the
     // Thumb-2 ISA as found in the v6T2 architecture and all v7 architecture.
-    if (supportsThumb2(ArchName, CPUArch, CPUArchVer))
+    if (supportsThumb2())
       Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "2");
-    else if (supportsThumb(ArchName, CPUArch, CPUArchVer))
+    else if (supportsThumb())
       Builder.defineMacro("__ARM_ARCH_ISA_THUMB", "1");
 
     // __ARM_32BIT_STATE is defined to 1 if code is being generated for a 32-bit
@@ -4558,6 +4739,20 @@ public:
     if (!CPUProfile.empty())
       Builder.defineMacro("__ARM_ARCH_PROFILE", "'" + CPUProfile + "'");
 
+    // ACLE 6.4.3 Unaligned access supported in hardware
+    if (Unaligned)
+      Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
+
+    // ACLE 6.4.4 LDREX/STREX
+    if (LDREX)
+      Builder.defineMacro("__ARM_FEATURE_LDREX", "0x" + llvm::utohexstr(LDREX));
+
+    // ACLE 6.4.5 CLZ
+    if (ArchVersion == 5 ||
+       (ArchVersion == 6 && CPUProfile != "M") ||
+        ArchVersion >  6)
+      Builder.defineMacro("__ARM_FEATURE_CLZ", "1");
+
     // ACLE 6.5.1 Hardware Floating Point
     if (HW_FP)
       Builder.defineMacro("__ARM_FP", "0x" + llvm::utohexstr(HW_FP));
@@ -4565,12 +4760,20 @@ public:
     // ACLE predefines.
     Builder.defineMacro("__ARM_ACLE", "200");
 
+    // FP16 support (we currently only support IEEE format).
+    Builder.defineMacro("__ARM_FP16_FORMAT_IEEE", "1");
+    Builder.defineMacro("__ARM_FP16_ARGS", "1");
+
+    // ACLE 6.5.3 Fused multiply-accumulate (FMA)
+    if (ArchVersion >= 7 && (CPUProfile != "M" || CPUAttr == "7EM"))
+      Builder.defineMacro("__ARM_FEATURE_FMA", "1");
+
     // Subtarget options.
 
     // FIXME: It's more complicated than this and we don't really support
     // interworking.
     // Windows on ARM does not "support" interworking
-    if (5 <= CPUArchVer && CPUArchVer <= 8 && !getTriple().isOSWindows())
+    if (5 <= ArchVersion && ArchVersion <= 8 && !getTriple().isOSWindows())
       Builder.defineMacro("__THUMB_INTERWORK__");
 
     if (ABI == "aapcs" || ABI == "aapcs-linux" || ABI == "aapcs-vfp") {
@@ -4590,14 +4793,23 @@ public:
     if (CPU == "xscale")
       Builder.defineMacro("__XSCALE__");
 
-    if (IsThumb) {
+    if (isThumb()) {
       Builder.defineMacro("__THUMBEL__");
       Builder.defineMacro("__thumb__");
-      if (supportsThumb2(ArchName, CPUArch, CPUArchVer))
+      if (supportsThumb2())
         Builder.defineMacro("__thumb2__");
     }
-    if (((HWDiv & HWDivThumb) && IsThumb) || ((HWDiv & HWDivARM) && !IsThumb))
+
+    // ACLE 6.4.9 32-bit SIMD instructions
+    if (ArchVersion >= 6 && (CPUProfile != "M" || CPUAttr == "7EM"))
+      Builder.defineMacro("__ARM_FEATURE_SIMD32", "1");
+
+    // ACLE 6.4.10 Hardware Integer Divide
+    if (((HWDiv & HWDivThumb) && isThumb()) ||
+        ((HWDiv & HWDivARM) && !isThumb())) {
+      Builder.defineMacro("__ARM_FEATURE_IDIV", "1");
       Builder.defineMacro("__ARM_ARCH_EXT_IDIV__", "1");
+    }
 
     // Note, this is always on in gcc, even though it doesn't make sense.
     Builder.defineMacro("__APCS_32__");
@@ -4616,9 +4828,13 @@ public:
     // the VFP define, hence the soft float and arch check. This is subtly
     // different from gcc, we follow the intent which was that it should be set
     // when Neon instructions are actually available.
-    if ((FPU & NeonFPU) && !SoftFloat && CPUArchVer >= 7) {
-      Builder.defineMacro("__ARM_NEON");
+    if ((FPU & NeonFPU) && !SoftFloat && ArchVersion >= 7) {
+      Builder.defineMacro("__ARM_NEON", "1");
       Builder.defineMacro("__ARM_NEON__");
+      // current AArch32 NEON implementations do not support double-precision
+      // floating-point even when it is present in VFP.
+      Builder.defineMacro("__ARM_NEON_FP",
+                          "0x" + llvm::utohexstr(HW_FP & ~HW_FP_DP));
     }
 
     Builder.defineMacro("__ARM_SIZEOF_WCHAR_T",
@@ -4627,39 +4843,49 @@ public:
     Builder.defineMacro("__ARM_SIZEOF_MINIMAL_ENUM",
                         Opts.ShortEnums ? "1" : "4");
 
-    if (CRC)
-      Builder.defineMacro("__ARM_FEATURE_CRC32");
-
-    if (Crypto)
-      Builder.defineMacro("__ARM_FEATURE_CRYPTO");
-
-    if (CPUArchVer >= 6 && CPUArch != "6M") {
+    if (ArchVersion >= 6 && CPUAttr != "6M") {
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
       Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
     }
 
-    bool is5EOrAbove = (CPUArchVer >= 6 ||
-                        (CPUArchVer == 5 &&
-                         CPUArch.find('E') != StringRef::npos));
-    bool is32Bit = (!IsThumb || supportsThumb2(ArchName, CPUArch, CPUArchVer));
-    if (is5EOrAbove && is32Bit && (CPUProfile != "M" || CPUArch  == "7EM"))
-      Builder.defineMacro("__ARM_FEATURE_DSP");
+    // ACLE 6.4.7 DSP instructions
+    if (DSP) {
+      Builder.defineMacro("__ARM_FEATURE_DSP", "1");
+    }
+
+    // ACLE 6.4.8 Saturation instructions
+    bool SAT = false;
+    if ((ArchVersion == 6 && CPUProfile != "M") || ArchVersion > 6 ) {
+      Builder.defineMacro("__ARM_FEATURE_SAT", "1");
+      SAT = true;
+    }
+
+    // ACLE 6.4.6 Q (saturation) flag
+    if (DSP || SAT)
+      Builder.defineMacro("__ARM_FEATURE_QBIT", "1");
+
+    if (Opts.UnsafeFPMath)
+      Builder.defineMacro("__ARM_FP_FAST", "1");
+
+    if (ArchKind == llvm::ARM::AK_ARMV8_1A)
+      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::ARM::LastTSBuiltin-Builtin::FirstTSBuiltin;
+
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                             clang::ARM::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
   bool isCLZForZeroUndef() const override { return false; }
   BuiltinVaListKind getBuiltinVaListKind() const override {
-    return IsAAPCS ? AAPCSABIBuiltinVaList : TargetInfo::VoidPtrBuiltinVaList;
+    return IsAAPCS
+               ? AAPCSABIBuiltinVaList
+               : (getTriple().isWatchOS() ? TargetInfo::CharPtrBuiltinVaList
+                                          : TargetInfo::VoidPtrBuiltinVaList);
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
     switch (*Name) {
@@ -4754,6 +4980,10 @@ public:
     if (RegNo == 1) return 1;
     return -1;
   }
+
+  bool hasSjLjLowering() const override {
+    return true;
+  }
 };
 
 bool ARMTargetInfo::setFPMath(StringRef Name) {
@@ -4790,10 +5020,8 @@ const char * const ARMTargetInfo::GCCRegNames[] = {
   "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
 };
 
-void ARMTargetInfo::getGCCRegNames(const char * const *&Names,
-                                   unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char *> ARMTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 const TargetInfo::GCCRegAlias ARMTargetInfo::GCCRegAliases[] = {
@@ -4817,22 +5045,23 @@ const TargetInfo::GCCRegAlias ARMTargetInfo::GCCRegAliases[] = {
   // don't want to substitute one of these for a different-sized one.
 };
 
-void ARMTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                       unsigned &NumAliases) const {
-  Aliases = GCCRegAliases;
-  NumAliases = llvm::array_lengthof(GCCRegAliases);
+ArrayRef<TargetInfo::GCCRegAlias> ARMTargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
 }
 
 const Builtin::Info ARMTargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsNEON.def"
 
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) { #ID, TYPE, ATTRS, 0, LANG },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \
+  { #ID, TYPE, ATTRS, nullptr, LANG, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsARM.def"
 };
 
@@ -4891,6 +5120,19 @@ public:
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    switch (CC) {
+    case CC_X86StdCall:
+    case CC_X86ThisCall:
+    case CC_X86FastCall:
+    case CC_X86VectorCall:
+      return CCCR_Ignore;
+    case CC_C:
+      return CCCR_OK;
+    default:
+      return CCCR_Warning;
+    }
+  }
 };
 
 // Windows ARM + Itanium C++ ABI Target
@@ -4925,6 +5167,45 @@ public:
   }
 };
 
+// ARM MinGW target
+class MinGWARMTargetInfo : public WindowsARMTargetInfo {
+public:
+  MinGWARMTargetInfo(const llvm::Triple &Triple)
+      : WindowsARMTargetInfo(Triple) {
+    TheCXXABI.set(TargetCXXABI::GenericARM);
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    WindowsARMTargetInfo::getTargetDefines(Opts, Builder);
+    DefineStd(Builder, "WIN32", Opts);
+    DefineStd(Builder, "WINNT", Opts);
+    Builder.defineMacro("_ARM_");
+    addMinGWDefines(Opts, Builder);
+  }
+};
+
+// ARM Cygwin target
+class CygwinARMTargetInfo : public ARMleTargetInfo {
+public:
+  CygwinARMTargetInfo(const llvm::Triple &Triple) : ARMleTargetInfo(Triple) {
+    TLSSupported = false;
+    WCharType = UnsignedShort;
+    DoubleAlign = LongLongAlign = 64;
+    DataLayoutString = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64";
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    ARMleTargetInfo::getTargetDefines(Opts, Builder);
+    Builder.defineMacro("_ARM_");
+    Builder.defineMacro("__CYGWIN__");
+    Builder.defineMacro("__CYGWIN32__");
+    DefineStd(Builder, "unix", Opts);
+    if (Opts.CPlusPlus)
+      Builder.defineMacro("_GNU_SOURCE");
+  }
+};
+
 class DarwinARMTargetInfo :
   public DarwinTargetInfo<ARMleTargetInfo> {
 protected:
@@ -4942,13 +5223,23 @@ public:
     // ARMleTargetInfo.
     MaxAtomicInlineWidth = 64;
 
-    // Darwin on iOS uses a variant of the ARM C++ ABI.
-    TheCXXABI.set(TargetCXXABI::iOS);
+    if (Triple.isWatchOS()) {
+      // Darwin on iOS uses a variant of the ARM C++ ABI.
+      TheCXXABI.set(TargetCXXABI::WatchOS);
+
+      // The 32-bit ABI is silent on what ptrdiff_t should be, but given that
+      // size_t is long, it's a bit weird for it to be int.
+      PtrDiffType = SignedLong;
+
+      // BOOL should be a real boolean on the new ABI
+      UseSignedCharForObjCBool = false;
+    } else
+      TheCXXABI.set(TargetCXXABI::iOS);
   }
 };
 
 class AArch64TargetInfo : public TargetInfo {
-  virtual void setDescriptionString() = 0;
+  virtual void setDataLayoutString() = 0;
   static const TargetInfo::GCCRegAlias GCCRegAliases[];
   static const char *const GCCRegNames[];
 
@@ -4960,6 +5251,8 @@ class AArch64TargetInfo : public TargetInfo {
   unsigned FPU;
   unsigned CRC;
   unsigned Crypto;
+  unsigned Unaligned;
+  unsigned V8_1A;
 
   static const Builtin::Info BuiltinInfo[];
 
@@ -4998,7 +5291,7 @@ public:
     // contributes to the alignment of the containing aggregate in the same way
     // a plain (non bit-field) member of that type would, without exception for
     // zero-sized or anonymous bit-fields."
-    UseBitFieldTypeAlignment = true;
+    assert(UseBitFieldTypeAlignment && "bitfields affect type alignment");
     UseZeroLengthBitfieldAlignment = true;
 
     // AArch64 targets default to using the ARM C++ ABI.
@@ -5017,7 +5310,7 @@ public:
   bool setCPU(const std::string &Name) override {
     bool CPUKnown = llvm::StringSwitch<bool>(Name)
                         .Case("generic", true)
-                        .Cases("cortex-a53", "cortex-a57", "cortex-a72", true)
+                        .Cases("cortex-a53", "cortex-a57", "cortex-a72", "cortex-a35", true)
                         .Case("cyclone", true)
                         .Default(false);
     return CPUKnown;
@@ -5037,33 +5330,30 @@ public:
     Builder.defineMacro("__ARM_ARCH", "8");
     Builder.defineMacro("__ARM_ARCH_PROFILE", "'A'");
 
-    Builder.defineMacro("__ARM_64BIT_STATE");
-    Builder.defineMacro("__ARM_PCS_AAPCS64");
-    Builder.defineMacro("__ARM_ARCH_ISA_A64");
+    Builder.defineMacro("__ARM_64BIT_STATE", "1");
+    Builder.defineMacro("__ARM_PCS_AAPCS64", "1");
+    Builder.defineMacro("__ARM_ARCH_ISA_A64", "1");
 
-    Builder.defineMacro("__ARM_FEATURE_UNALIGNED");
-    Builder.defineMacro("__ARM_FEATURE_CLZ");
-    Builder.defineMacro("__ARM_FEATURE_FMA");
-    Builder.defineMacro("__ARM_FEATURE_DIV");
-    Builder.defineMacro("__ARM_FEATURE_IDIV"); // As specified in ACLE
+    Builder.defineMacro("__ARM_FEATURE_CLZ", "1");
+    Builder.defineMacro("__ARM_FEATURE_FMA", "1");
+    Builder.defineMacro("__ARM_FEATURE_LDREX", "0xF");
+    Builder.defineMacro("__ARM_FEATURE_IDIV", "1"); // As specified in ACLE
     Builder.defineMacro("__ARM_FEATURE_DIV");  // For backwards compatibility
-    Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN");
-    Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING");
+    Builder.defineMacro("__ARM_FEATURE_NUMERIC_MAXMIN", "1");
+    Builder.defineMacro("__ARM_FEATURE_DIRECTED_ROUNDING", "1");
 
     Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4");
 
     // 0xe implies support for half, single and double precision operations.
-    Builder.defineMacro("__ARM_FP", "0xe");
+    Builder.defineMacro("__ARM_FP", "0xE");
 
     // PCS specifies this for SysV variants, which is all we support. Other ABIs
     // may choose __ARM_FP16_FORMAT_ALTERNATIVE.
-    Builder.defineMacro("__ARM_FP16_FORMAT_IEEE");
+    Builder.defineMacro("__ARM_FP16_FORMAT_IEEE", "1");
+    Builder.defineMacro("__ARM_FP16_ARGS", "1");
 
-    if (Opts.FastMath || Opts.FiniteMathOnly)
-      Builder.defineMacro("__ARM_FP_FAST");
-
-    if (Opts.C99 && !Opts.Freestanding)
-      Builder.defineMacro("__ARM_FP_FENV_ROUNDING");
+    if (Opts.UnsafeFPMath)
+      Builder.defineMacro("__ARM_FP_FAST", "1");
 
     Builder.defineMacro("__ARM_SIZEOF_WCHAR_T", Opts.ShortWChar ? "2" : "4");
 
@@ -5071,16 +5361,22 @@ public:
                         Opts.ShortEnums ? "1" : "4");
 
     if (FPU == NeonMode) {
-      Builder.defineMacro("__ARM_NEON");
+      Builder.defineMacro("__ARM_NEON", "1");
       // 64-bit NEON supports half, single and double precision operations.
-      Builder.defineMacro("__ARM_NEON_FP", "0xe");
+      Builder.defineMacro("__ARM_NEON_FP", "0xE");
     }
 
     if (CRC)
-      Builder.defineMacro("__ARM_FEATURE_CRC32");
+      Builder.defineMacro("__ARM_FEATURE_CRC32", "1");
 
     if (Crypto)
-      Builder.defineMacro("__ARM_FEATURE_CRYPTO");
+      Builder.defineMacro("__ARM_FEATURE_CRYPTO", "1");
+
+    if (Unaligned)
+      Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1");
+
+    if (V8_1A)
+      Builder.defineMacro("__ARM_FEATURE_QRDMX", "1");
 
     // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work.
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
@@ -5089,15 +5385,15 @@ public:
     Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                       clang::AArch64::LastTSBuiltin - Builtin::FirstTSBuiltin);
   }
 
   bool hasFeature(StringRef Feature) const override {
     return Feature == "aarch64" ||
       Feature == "arm64" ||
+      Feature == "arm" ||
       (Feature == "neon" && FPU == NeonMode);
   }
 
@@ -5106,16 +5402,23 @@ public:
     FPU = FPUMode;
     CRC = 0;
     Crypto = 0;
-    for (unsigned i = 0, e = Features.size(); i != e; ++i) {
-      if (Features[i] == "+neon")
+    Unaligned = 1;
+    V8_1A = 0;
+
+    for (const auto &Feature : Features) {
+      if (Feature == "+neon")
         FPU = NeonMode;
-      if (Features[i] == "+crc")
+      if (Feature == "+crc")
         CRC = 1;
-      if (Features[i] == "+crypto")
+      if (Feature == "+crypto")
         Crypto = 1;
+      if (Feature == "+strict-align")
+        Unaligned = 0;
+      if (Feature == "+v8.1a")
+        V8_1A = 1;
     }
 
-    setDescriptionString();
+    setDataLayoutString();
 
     return true;
   }
@@ -5126,10 +5429,8 @@ public:
     return TargetInfo::AArch64ABIBuiltinVaList;
   }
 
-  void getGCCRegNames(const char *const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
 
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
@@ -5239,10 +5540,8 @@ const char *const AArch64TargetInfo::GCCRegNames[] = {
   "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
 };
 
-void AArch64TargetInfo::getGCCRegNames(const char *const *&Names,
-                                     unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char *> AArch64TargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
@@ -5254,28 +5553,26 @@ const TargetInfo::GCCRegAlias AArch64TargetInfo::GCCRegAliases[] = {
   // don't want to substitute one of these for a different-sized one.
 };
 
-void AArch64TargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                       unsigned &NumAliases) const {
-  Aliases = GCCRegAliases;
-  NumAliases = llvm::array_lengthof(GCCRegAliases);
+ArrayRef<TargetInfo::GCCRegAlias> AArch64TargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
 }
 
 const Builtin::Info AArch64TargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsNEON.def"
 
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsAArch64.def"
 };
 
 class AArch64leTargetInfo : public AArch64TargetInfo {
-  void setDescriptionString() override {
+  void setDataLayoutString() override {
     if (getTriple().isOSBinFormatMachO())
-      DescriptionString = "e-m:o-i64:64-i128:128-n32:64-S128";
+      DataLayoutString = "e-m:o-i64:64-i128:128-n32:64-S128";
     else
-      DescriptionString = "e-m:e-i64:64-i128:128-n32:64-S128";
+      DataLayoutString = "e-m:e-i64:64-i128:128-n32:64-S128";
   }
 
 public:
@@ -5291,9 +5588,9 @@ public:
 };
 
 class AArch64beTargetInfo : public AArch64TargetInfo {
-  void setDescriptionString() override {
+  void setDataLayoutString() override {
     assert(!getTriple().isOSBinFormatMachO());
-    DescriptionString = "E-m:e-i64:64-i128:128-n32:64-S128";
+    DataLayoutString = "E-m:e-i64:64-i128:128-n32:64-S128";
   }
 
 public:
@@ -5347,20 +5644,32 @@ class HexagonTargetInfo : public TargetInfo {
   static const char * const GCCRegNames[];
   static const TargetInfo::GCCRegAlias GCCRegAliases[];
   std::string CPU;
+  bool HasHVX, HasHVXDouble;
+
 public:
   HexagonTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
     BigEndian = false;
-    DescriptionString = "e-m:e-p:32:32-i1:32-i64:64-a:0-n32";
+    DataLayoutString = "e-m:e-p:32:32:32-"
+                       "i64:64:64-i32:32:32-i16:16:16-i1:8:8-"
+                       "f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32";
+    SizeType    = UnsignedInt;
+    PtrDiffType = SignedInt;
+    IntPtrType  = SignedInt;
 
     // {} in inline assembly are packet specifiers, not assembly variant
     // specifiers.
     NoAsmVariants = true;
+
+    LargeArrayMinWidth = 64;
+    LargeArrayAlign = 64;
+    UseBitFieldTypeAlignment = true;
+    ZeroLengthBitfieldBoundary = 32;
+    HasHVX = HasHVXDouble = false;
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::Hexagon::LastTSBuiltin-Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                         clang::Hexagon::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
 
   bool validateAsmConstraint(const char *&Name,
@@ -5371,17 +5680,28 @@ public:
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
 
+  bool isCLZForZeroUndef() const override { return false; }
+
   bool hasFeature(StringRef Feature) const override {
-    return Feature == "hexagon";
+    return llvm::StringSwitch<bool>(Feature)
+      .Case("hexagon", true)
+      .Case("hvx", HasHVX)
+      .Case("hvx-double", HasHVXDouble)
+      .Default(false);
   }
 
+  bool initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+        StringRef CPU, const std::vector<std::string> &FeaturesVec)
+        const override;
+
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) override;
+
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::CharPtrBuiltinVaList;
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
   const char *getClobbers() const override {
     return "";
   }
@@ -5390,71 +5710,77 @@ public:
     return llvm::StringSwitch<const char*>(Name)
       .Case("hexagonv4", "4")
       .Case("hexagonv5", "5")
+      .Case("hexagonv55", "55")
+      .Case("hexagonv60", "60")
       .Default(nullptr);
   }
 
   bool setCPU(const std::string &Name) override {
     if (!getHexagonCPUSuffix(Name))
       return false;
-
     CPU = Name;
     return true;
   }
+
+  int getEHDataRegisterNumber(unsigned RegNo) const override {
+    return RegNo < 2 ? RegNo : -1;
+  }
 };
 
 void HexagonTargetInfo::getTargetDefines(const LangOptions &Opts,
-                                MacroBuilder &Builder) const {
-  Builder.defineMacro("qdsp6");
-  Builder.defineMacro("__qdsp6", "1");
+                                         MacroBuilder &Builder) const {
   Builder.defineMacro("__qdsp6__", "1");
-
-  Builder.defineMacro("hexagon");
-  Builder.defineMacro("__hexagon", "1");
   Builder.defineMacro("__hexagon__", "1");
 
-  if(CPU == "hexagonv1") {
-    Builder.defineMacro("__HEXAGON_V1__");
-    Builder.defineMacro("__HEXAGON_ARCH__", "1");
-    if(Opts.HexagonQdsp6Compat) {
-      Builder.defineMacro("__QDSP6_V1__");
-      Builder.defineMacro("__QDSP6_ARCH__", "1");
-    }
-  }
-  else if(CPU == "hexagonv2") {
-    Builder.defineMacro("__HEXAGON_V2__");
-    Builder.defineMacro("__HEXAGON_ARCH__", "2");
-    if(Opts.HexagonQdsp6Compat) {
-      Builder.defineMacro("__QDSP6_V2__");
-      Builder.defineMacro("__QDSP6_ARCH__", "2");
-    }
-  }
-  else if(CPU == "hexagonv3") {
-    Builder.defineMacro("__HEXAGON_V3__");
-    Builder.defineMacro("__HEXAGON_ARCH__", "3");
-    if(Opts.HexagonQdsp6Compat) {
-      Builder.defineMacro("__QDSP6_V3__");
-      Builder.defineMacro("__QDSP6_ARCH__", "3");
-    }
-  }
-  else if(CPU == "hexagonv4") {
+  if (CPU == "hexagonv4") {
     Builder.defineMacro("__HEXAGON_V4__");
     Builder.defineMacro("__HEXAGON_ARCH__", "4");
-    if(Opts.HexagonQdsp6Compat) {
+    if (Opts.HexagonQdsp6Compat) {
       Builder.defineMacro("__QDSP6_V4__");
       Builder.defineMacro("__QDSP6_ARCH__", "4");
     }
-  }
-  else if(CPU == "hexagonv5") {
+  } else if (CPU == "hexagonv5") {
     Builder.defineMacro("__HEXAGON_V5__");
     Builder.defineMacro("__HEXAGON_ARCH__", "5");
     if(Opts.HexagonQdsp6Compat) {
       Builder.defineMacro("__QDSP6_V5__");
       Builder.defineMacro("__QDSP6_ARCH__", "5");
     }
+  } else if (CPU == "hexagonv60") {
+    Builder.defineMacro("__HEXAGON_V60__");
+    Builder.defineMacro("__HEXAGON_ARCH__", "60");
+    Builder.defineMacro("__QDSP6_V60__");
+    Builder.defineMacro("__QDSP6_ARCH__", "60");
   }
 }
 
-const char * const HexagonTargetInfo::GCCRegNames[] = {
+bool HexagonTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
+                                             DiagnosticsEngine &Diags) {
+  for (auto &F : Features) {
+    if (F == "+hvx")
+      HasHVX = true;
+    else if (F == "-hvx")
+      HasHVX = HasHVXDouble = false;
+    else if (F == "+hvx-double")
+      HasHVX = HasHVXDouble = true;
+    else if (F == "-hvx-double")
+      HasHVXDouble = false;
+  }
+  return true;
+}
+
+bool HexagonTargetInfo::initFeatureMap(llvm::StringMap<bool> &Features,
+      DiagnosticsEngine &Diags, StringRef CPU,
+      const std::vector<std::string> &FeaturesVec) const {
+  // Default for v60: -hvx, -hvx-double.
+  Features["hvx"] = false;
+  Features["hvx-double"] = false;
+
+  return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
+}
+
+
+const char *const HexagonTargetInfo::GCCRegNames[] = {
   "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
   "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
   "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
@@ -5463,30 +5789,26 @@ const char * const HexagonTargetInfo::GCCRegNames[] = {
   "sa0", "lc0", "sa1", "lc1", "m0", "m1", "usr", "ugp"
 };
 
-void HexagonTargetInfo::getGCCRegNames(const char * const *&Names,
-                                   unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char*> HexagonTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
-
 const TargetInfo::GCCRegAlias HexagonTargetInfo::GCCRegAliases[] = {
   { { "sp" }, "r29" },
   { { "fp" }, "r30" },
   { { "lr" }, "r31" },
- };
+};
 
-void HexagonTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                     unsigned &NumAliases) const {
-  Aliases = GCCRegAliases;
-  NumAliases = llvm::array_lengthof(GCCRegAliases);
+ArrayRef<TargetInfo::GCCRegAlias> HexagonTargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
 }
 
 
 const Builtin::Info HexagonTargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsHexagon.def"
 };
 
@@ -5526,17 +5848,15 @@ public:
              .Default(false);
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
     // FIXME: Implement!
+    return None;
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override {
     // FIXME: Implement!
@@ -5556,6 +5876,80 @@ public:
     // FIXME: Implement!
     return "";
   }
+
+  // No Sparc V7 for now, the backend doesn't support it anyway.
+  enum CPUKind {
+    CK_GENERIC,
+    CK_V8,
+    CK_SUPERSPARC,
+    CK_SPARCLITE,
+    CK_F934,
+    CK_HYPERSPARC,
+    CK_SPARCLITE86X,
+    CK_SPARCLET,
+    CK_TSC701,
+    CK_V9,
+    CK_ULTRASPARC,
+    CK_ULTRASPARC3,
+    CK_NIAGARA,
+    CK_NIAGARA2,
+    CK_NIAGARA3,
+    CK_NIAGARA4
+  } CPU = CK_GENERIC;
+
+  enum CPUGeneration {
+    CG_V8,
+    CG_V9,
+  };
+
+  CPUGeneration getCPUGeneration(CPUKind Kind) const {
+    switch (Kind) {
+    case CK_GENERIC:
+    case CK_V8:
+    case CK_SUPERSPARC:
+    case CK_SPARCLITE:
+    case CK_F934:
+    case CK_HYPERSPARC:
+    case CK_SPARCLITE86X:
+    case CK_SPARCLET:
+    case CK_TSC701:
+      return CG_V8;
+    case CK_V9:
+    case CK_ULTRASPARC:
+    case CK_ULTRASPARC3:
+    case CK_NIAGARA:
+    case CK_NIAGARA2:
+    case CK_NIAGARA3:
+    case CK_NIAGARA4:
+      return CG_V9;
+    }
+    llvm_unreachable("Unexpected CPU kind");
+  }
+
+  CPUKind getCPUKind(StringRef Name) const {
+    return llvm::StringSwitch<CPUKind>(Name)
+        .Case("v8", CK_V8)
+        .Case("supersparc", CK_SUPERSPARC)
+        .Case("sparclite", CK_SPARCLITE)
+        .Case("f934", CK_F934)
+        .Case("hypersparc", CK_HYPERSPARC)
+        .Case("sparclite86x", CK_SPARCLITE86X)
+        .Case("sparclet", CK_SPARCLET)
+        .Case("tsc701", CK_TSC701)
+        .Case("v9", CK_V9)
+        .Case("ultrasparc", CK_ULTRASPARC)
+        .Case("ultrasparc3", CK_ULTRASPARC3)
+        .Case("niagara", CK_NIAGARA)
+        .Case("niagara2", CK_NIAGARA2)
+        .Case("niagara3", CK_NIAGARA3)
+        .Case("niagara4", CK_NIAGARA4)
+        .Default(CK_GENERIC);
+  }
+
+  bool setCPU(const std::string &Name) override {
+    CPU = getCPUKind(Name);
+    return CPU != CK_GENERIC;
+  }
 };
 
 const char * const SparcTargetInfo::GCCRegNames[] = {
@@ -5565,10 +5959,8 @@ const char * const SparcTargetInfo::GCCRegNames[] = {
   "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
 };
 
-void SparcTargetInfo::getGCCRegNames(const char * const *&Names,
-                                     unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char *> SparcTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 const TargetInfo::GCCRegAlias SparcTargetInfo::GCCRegAliases[] = {
@@ -5606,33 +5998,48 @@ const TargetInfo::GCCRegAlias SparcTargetInfo::GCCRegAliases[] = {
   { { "i7" }, "r31" },
 };
 
-void SparcTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                       unsigned &NumAliases) const {
-  Aliases = GCCRegAliases;
-  NumAliases = llvm::array_lengthof(GCCRegAliases);
+ArrayRef<TargetInfo::GCCRegAlias> SparcTargetInfo::getGCCRegAliases() const {
+  return llvm::makeArrayRef(GCCRegAliases);
 }
 
 // SPARC v8 is the 32-bit mode selected by Triple::sparc.
 class SparcV8TargetInfo : public SparcTargetInfo {
 public:
   SparcV8TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
-    DescriptionString = "E-m:e-p:32:32-i64:64-f128:64-n32-S64";
-    // NetBSD uses long (same as llvm default); everyone else uses int.
-    if (getTriple().getOS() == llvm::Triple::NetBSD) {
-      SizeType = UnsignedLong;
-      IntPtrType = SignedLong;
-      PtrDiffType = SignedLong;
-    } else {
+    DataLayoutString = "E-m:e-p:32:32-i64:64-f128:64-n32-S64";
+    // NetBSD / OpenBSD use long (same as llvm default); everyone else uses int.
+    switch (getTriple().getOS()) {
+    default:
       SizeType = UnsignedInt;
       IntPtrType = SignedInt;
       PtrDiffType = SignedInt;
+      break;
+    case llvm::Triple::NetBSD:
+    case llvm::Triple::OpenBSD:
+      SizeType = UnsignedLong;
+      IntPtrType = SignedLong;
+      PtrDiffType = SignedLong;
+      break;
     }
   }
 
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     SparcTargetInfo::getTargetDefines(Opts, Builder);
-    Builder.defineMacro("__sparcv8");
+    switch (getCPUGeneration(CPU)) {
+    case CG_V8:
+      Builder.defineMacro("__sparcv8");
+      if (getTriple().getOS() != llvm::Triple::Solaris)
+        Builder.defineMacro("__sparcv8__");
+      break;
+    case CG_V9:
+      Builder.defineMacro("__sparcv9");
+      if (getTriple().getOS() != llvm::Triple::Solaris) {
+        Builder.defineMacro("__sparcv9__");
+        Builder.defineMacro("__sparc_v9__");
+      }
+      break;
+    }
   }
 };
 
@@ -5640,7 +6047,7 @@ public:
 class SparcV8elTargetInfo : public SparcV8TargetInfo {
  public:
   SparcV8elTargetInfo(const llvm::Triple &Triple) : SparcV8TargetInfo(Triple) {
-    DescriptionString = "e-m:e-p:32:32-i64:64-f128:64-n32-S64";
+    DataLayoutString = "e-m:e-p:32:32-i64:64-f128:64-n32-S64";
     BigEndian = false;
   }
 };
@@ -5650,7 +6057,7 @@ class SparcV9TargetInfo : public SparcTargetInfo {
 public:
   SparcV9TargetInfo(const llvm::Triple &Triple) : SparcTargetInfo(Triple) {
     // FIXME: Support Sparc quad-precision long double?
-    DescriptionString = "E-m:e-i64:64-n32:64-S128";
+    DataLayoutString = "E-m:e-i64:64-n32:64-S128";
     // This is an LP64 platform.
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
 
@@ -5683,19 +6090,9 @@ public:
   }
 
   bool setCPU(const std::string &Name) override {
-    bool CPUKnown = llvm::StringSwitch<bool>(Name)
-      .Case("v9", true)
-      .Case("ultrasparc", true)
-      .Case("ultrasparc3", true)
-      .Case("niagara", true)
-      .Case("niagara2", true)
-      .Case("niagara3", true)
-      .Case("niagara4", true)
-      .Default(false);
-
-    // No need to store the CPU yet.  There aren't any CPU-specific
-    // macros to define.
-    return CPUKnown;
+    if (!SparcTargetInfo::setCPU(Name))
+      return false;
+    return getCPUGeneration(CPU) == CG_V9;
   }
 };
 
@@ -5708,7 +6105,8 @@ class SystemZTargetInfo : public TargetInfo {
 
 public:
   SystemZTargetInfo(const llvm::Triple &Triple)
-    : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false), HasVector(false) {
+      : TargetInfo(Triple), CPU("z10"), HasTransactionalExecution(false),
+        HasVector(false) {
     IntMaxType = SignedLong;
     Int64Type = SignedLong;
     TLSSupported = true;
@@ -5720,7 +6118,7 @@ public:
     LongDoubleFormat = &llvm::APFloat::IEEEquad;
     DefaultAlignForAttributeAligned = 64;
     MinGlobalAlign = 16;
-    DescriptionString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64";
+    DataLayoutString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64";
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
   }
   void getTargetDefines(const LangOptions &Opts,
@@ -5734,19 +6132,15 @@ public:
     if (Opts.ZVector)
       Builder.defineMacro("__VEC__", "10301");
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::SystemZ::LastTSBuiltin-Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                         clang::SystemZ::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
 
-  void getGCCRegNames(const char *const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
     // No aliases.
-    Aliases = nullptr;
-    NumAliases = 0;
+    return None;
   }
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override;
@@ -5768,29 +6162,33 @@ public:
 
     return CPUKnown;
   }
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override {
     if (CPU == "zEC12")
       Features["transactional-execution"] = true;
     if (CPU == "z13") {
       Features["transactional-execution"] = true;
       Features["vector"] = true;
     }
+    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
 
   bool handleTargetFeatures(std::vector<std::string> &Features,
                             DiagnosticsEngine &Diags) override {
     HasTransactionalExecution = false;
-    for (unsigned i = 0, e = Features.size(); i != e; ++i) {
-      if (Features[i] == "+transactional-execution")
+    for (const auto &Feature : Features) {
+      if (Feature == "+transactional-execution")
         HasTransactionalExecution = true;
-      if (Features[i] == "+vector")
+      else if (Feature == "+vector")
         HasVector = true;
     }
     // If we use the vector ABI, vector types are 64-bit aligned.
     if (HasVector) {
       MaxVectorAlign = 64;
-      DescriptionString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
-                          "-v128:64-a:8:16-n32:64";
+      DataLayoutString = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64"
+                         "-v128:64-a:8:16-n32:64";
     }
     return true;
   }
@@ -5816,7 +6214,7 @@ public:
 
 const Builtin::Info SystemZTargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsSystemZ.def"
 };
 
@@ -5827,10 +6225,8 @@ const char *const SystemZTargetInfo::GCCRegNames[] = {
   "f8",  "f10", "f12", "f14", "f9",  "f11", "f13", "f15"
 };
 
-void SystemZTargetInfo::getGCCRegNames(const char *const *&Names,
-                                       unsigned &NumNames) const {
-  Names = GCCRegNames;
-  NumNames = llvm::array_lengthof(GCCRegNames);
+ArrayRef<const char *> SystemZTargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
 }
 
 bool SystemZTargetInfo::
@@ -5862,157 +6258,147 @@ validateAsmConstraint(const char *&Name,
   }
 }
 
-  class MSP430TargetInfo : public TargetInfo {
-    static const char * const GCCRegNames[];
-  public:
-    MSP430TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
-      BigEndian = false;
-      TLSSupported = false;
-      IntWidth = 16; IntAlign = 16;
-      LongWidth = 32; LongLongWidth = 64;
-      LongAlign = LongLongAlign = 16;
-      PointerWidth = 16; PointerAlign = 16;
-      SuitableAlign = 16;
-      SizeType = UnsignedInt;
-      IntMaxType = SignedLongLong;
-      IntPtrType = SignedInt;
-      PtrDiffType = SignedInt;
-      SigAtomicType = SignedLong;
-      DescriptionString = "e-m:e-p:16:16-i32:16:32-a:16-n8:16";
-    }
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      Builder.defineMacro("MSP430");
-      Builder.defineMacro("__MSP430__");
-      // FIXME: defines for different 'flavours' of MCU
-    }
-    void getTargetBuiltins(const Builtin::Info *&Records,
-                           unsigned &NumRecords) const override {
-      // FIXME: Implement.
-      Records = nullptr;
-      NumRecords = 0;
-    }
-    bool hasFeature(StringRef Feature) const override {
-      return Feature == "msp430";
-    }
-    void getGCCRegNames(const char * const *&Names,
-                        unsigned &NumNames) const override;
-    void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                          unsigned &NumAliases) const override {
-      // No aliases.
-      Aliases = nullptr;
-      NumAliases = 0;
-    }
-    bool
-    validateAsmConstraint(const char *&Name,
-                          TargetInfo::ConstraintInfo &info) const override {
-      // FIXME: implement
-      switch (*Name) {
-      case 'K': // the constant 1
-      case 'L': // constant -1^20 .. 1^19
-      case 'M': // constant 1-4:
-        return true;
-      }
-      // No target constraints for now.
-      return false;
-    }
-    const char *getClobbers() const override {
-      // FIXME: Is this really right?
-      return "";
-    }
-    BuiltinVaListKind getBuiltinVaListKind() const override {
-      // FIXME: implement
-      return TargetInfo::CharPtrBuiltinVaList;
-   }
-  };
+class MSP430TargetInfo : public TargetInfo {
+  static const char *const GCCRegNames[];
 
-  const char * const MSP430TargetInfo::GCCRegNames[] = {
-    "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
-    "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
-  };
-
-  void MSP430TargetInfo::getGCCRegNames(const char * const *&Names,
-                                        unsigned &NumNames) const {
-    Names = GCCRegNames;
-    NumNames = llvm::array_lengthof(GCCRegNames);
+public:
+  MSP430TargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    BigEndian = false;
+    TLSSupported = false;
+    IntWidth = 16;
+    IntAlign = 16;
+    LongWidth = 32;
+    LongLongWidth = 64;
+    LongAlign = LongLongAlign = 16;
+    PointerWidth = 16;
+    PointerAlign = 16;
+    SuitableAlign = 16;
+    SizeType = UnsignedInt;
+    IntMaxType = SignedLongLong;
+    IntPtrType = SignedInt;
+    PtrDiffType = SignedInt;
+    SigAtomicType = SignedLong;
+    DataLayoutString = "e-m:e-p:16:16-i32:16:32-a:16-n8:16";
   }
-
-  // LLVM and Clang cannot be used directly to output native binaries for
-  // target, but is used to compile C code to llvm bitcode with correct
-  // type and alignment information.
-  //
-  // TCE uses the llvm bitcode as input and uses it for generating customized
-  // target processor and program binary. TCE co-design environment is
-  // publicly available in http://tce.cs.tut.fi
-
-  static const unsigned TCEOpenCLAddrSpaceMap[] = {
-      3, // opencl_global
-      4, // opencl_local
-      5, // opencl_constant
-      // FIXME: generic has to be added to the target
-      0, // opencl_generic
-      0, // cuda_device
-      0, // cuda_constant
-      0  // cuda_shared
-  };
-
-  class TCETargetInfo : public TargetInfo{
-  public:
-    TCETargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
-      TLSSupported = false;
-      IntWidth = 32;
-      LongWidth = LongLongWidth = 32;
-      PointerWidth = 32;
-      IntAlign = 32;
-      LongAlign = LongLongAlign = 32;
-      PointerAlign = 32;
-      SuitableAlign = 32;
-      SizeType = UnsignedInt;
-      IntMaxType = SignedLong;
-      IntPtrType = SignedInt;
-      PtrDiffType = SignedInt;
-      FloatWidth = 32;
-      FloatAlign = 32;
-      DoubleWidth = 32;
-      DoubleAlign = 32;
-      LongDoubleWidth = 32;
-      LongDoubleAlign = 32;
-      FloatFormat = &llvm::APFloat::IEEEsingle;
-      DoubleFormat = &llvm::APFloat::IEEEsingle;
-      LongDoubleFormat = &llvm::APFloat::IEEEsingle;
-      DescriptionString = "E-p:32:32-i8:8:32-i16:16:32-i64:32"
-                          "-f64:32-v64:32-v128:32-a:0:32-n32";
-      AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
-      UseAddrSpaceMapMangling = true;
-    }
-
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      DefineStd(Builder, "tce", Opts);
-      Builder.defineMacro("__TCE__");
-      Builder.defineMacro("__TCE_V1__");
-    }
-    bool hasFeature(StringRef Feature) const override {
-      return Feature == "tce";
-    }
-
-    void getTargetBuiltins(const Builtin::Info *&Records,
-                           unsigned &NumRecords) const override {}
-    const char *getClobbers() const override {
-      return "";
-    }
-    BuiltinVaListKind getBuiltinVaListKind() const override {
-      return TargetInfo::VoidPtrBuiltinVaList;
-    }
-    void getGCCRegNames(const char * const *&Names,
-                        unsigned &NumNames) const override {}
-    bool validateAsmConstraint(const char *&Name,
-                               TargetInfo::ConstraintInfo &info) const override{
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    Builder.defineMacro("MSP430");
+    Builder.defineMacro("__MSP430__");
+    // FIXME: defines for different 'flavours' of MCU
+  }
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    // FIXME: Implement.
+    return None;
+  }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "msp430";
+  }
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    // No aliases.
+    return None;
+  }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    // FIXME: implement
+    switch (*Name) {
+    case 'K': // the constant 1
+    case 'L': // constant -1^20 .. 1^19
+    case 'M': // constant 1-4:
       return true;
     }
-    void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                          unsigned &NumAliases) const override {}
-  };
+    // No target constraints for now.
+    return false;
+  }
+  const char *getClobbers() const override {
+    // FIXME: Is this really right?
+    return "";
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    // FIXME: implement
+    return TargetInfo::CharPtrBuiltinVaList;
+  }
+};
+
+const char *const MSP430TargetInfo::GCCRegNames[] = {
+    "r0", "r1", "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+    "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"};
+
+ArrayRef<const char *> MSP430TargetInfo::getGCCRegNames() const {
+  return llvm::makeArrayRef(GCCRegNames);
+}
+
+// LLVM and Clang cannot be used directly to output native binaries for
+// target, but is used to compile C code to llvm bitcode with correct
+// type and alignment information.
+//
+// TCE uses the llvm bitcode as input and uses it for generating customized
+// target processor and program binary. TCE co-design environment is
+// publicly available in http://tce.cs.tut.fi
+
+static const unsigned TCEOpenCLAddrSpaceMap[] = {
+    3, // opencl_global
+    4, // opencl_local
+    5, // opencl_constant
+    // FIXME: generic has to be added to the target
+    0, // opencl_generic
+    0, // cuda_device
+    0, // cuda_constant
+    0  // cuda_shared
+};
+
+class TCETargetInfo : public TargetInfo {
+public:
+  TCETargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    TLSSupported = false;
+    IntWidth = 32;
+    LongWidth = LongLongWidth = 32;
+    PointerWidth = 32;
+    IntAlign = 32;
+    LongAlign = LongLongAlign = 32;
+    PointerAlign = 32;
+    SuitableAlign = 32;
+    SizeType = UnsignedInt;
+    IntMaxType = SignedLong;
+    IntPtrType = SignedInt;
+    PtrDiffType = SignedInt;
+    FloatWidth = 32;
+    FloatAlign = 32;
+    DoubleWidth = 32;
+    DoubleAlign = 32;
+    LongDoubleWidth = 32;
+    LongDoubleAlign = 32;
+    FloatFormat = &llvm::APFloat::IEEEsingle;
+    DoubleFormat = &llvm::APFloat::IEEEsingle;
+    LongDoubleFormat = &llvm::APFloat::IEEEsingle;
+    DataLayoutString = "E-p:32:32-i8:8:32-i16:16:32-i64:32"
+                       "-f64:32-v64:32-v128:32-a:0:32-n32";
+    AddrSpaceMap = &TCEOpenCLAddrSpaceMap;
+    UseAddrSpaceMapMangling = true;
+  }
+
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "tce", Opts);
+    Builder.defineMacro("__TCE__");
+    Builder.defineMacro("__TCE_V1__");
+  }
+  bool hasFeature(StringRef Feature) const override { return Feature == "tce"; }
+
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
+  const char *getClobbers() const override { return ""; }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
+  ArrayRef<const char *> getGCCRegNames() const override { return None; }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    return true;
+  }
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
+  }
+};
 
 class BPFTargetInfo : public TargetInfo {
 public:
@@ -6026,10 +6412,10 @@ public:
     RegParmMax = 5;
     if (Triple.getArch() == llvm::Triple::bpfeb) {
       BigEndian = true;
-      DescriptionString = "E-m:e-p:64:64-i64:64-n32:64-S128";
+      DataLayoutString = "E-m:e-p:64:64-i64:64-n32:64-S128";
     } else {
       BigEndian = false;
-      DescriptionString = "e-m:e-p:64:64-i64:64-n32:64-S128";
+      DataLayoutString = "e-m:e-p:64:64-i64:64-n32:64-S128";
     }
     MaxAtomicPromoteWidth = 64;
     MaxAtomicInlineWidth = 64;
@@ -6044,32 +6430,27 @@ public:
     return Feature == "bpf";
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {}
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
   const char *getClobbers() const override {
     return "";
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override {
-    Names = nullptr;
-    NumNames = 0;
+  ArrayRef<const char *> getGCCRegNames() const override {
+    return None;
   }
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &info) const override {
     return true;
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
-    Aliases = nullptr;
-    NumAliases = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
   }
 };
 
 class MipsTargetInfoBase : public TargetInfo {
-  virtual void setDescriptionString() = 0;
+  virtual void setDataLayoutString() = 0;
 
   static const Builtin::Info BuiltinInfo[];
   std::string CPU;
@@ -6132,14 +6513,19 @@ public:
         .Case("mips64r5", true)
         .Case("mips64r6", true)
         .Case("octeon", true)
+        .Case("p5600", true)
         .Default(false);
   }
   const std::string& getCPU() const { return CPU; }
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override {
     if (CPU == "octeon")
       Features["mips64r2"] = Features["cnmips"] = true;
     else
       Features[CPU] = true;
+    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -6199,12 +6585,17 @@ public:
 
     Builder.defineMacro("_MIPS_ARCH", "\"" + CPU + "\"");
     Builder.defineMacro("_MIPS_ARCH_" + StringRef(CPU).upper());
+
+    // These shouldn't be defined for MIPS-I but there's no need to check
+    // for that since MIPS-I isn't supported.
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
   }
 
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                          clang::Mips::LastTSBuiltin - Builtin::FirstTSBuiltin);
   }
   bool hasFeature(StringRef Feature) const override {
     return llvm::StringSwitch<bool>(Feature)
@@ -6215,8 +6606,7 @@ public:
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override {
+  ArrayRef<const char *> getGCCRegNames() const override {
     static const char *const GCCRegNames[] = {
       // CPU register names
       // Must match second column of GCCRegAliases
@@ -6241,11 +6631,9 @@ public:
       "$msair",      "$msacsr", "$msaaccess", "$msasave", "$msamodify",
       "$msarequest", "$msamap", "$msaunmap"
     };
-    Names = GCCRegNames;
-    NumNames = llvm::array_lengthof(GCCRegNames);
+    return llvm::makeArrayRef(GCCRegNames);
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override = 0;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
     switch (*Name) {
@@ -6331,33 +6719,32 @@ public:
     DspRev = NoDSP;
     HasFP64 = isFP64Default();
 
-    for (std::vector<std::string>::iterator it = Features.begin(),
-         ie = Features.end(); it != ie; ++it) {
-      if (*it == "+single-float")
+    for (const auto &Feature : Features) {
+      if (Feature == "+single-float")
         IsSingleFloat = true;
-      else if (*it == "+soft-float")
+      else if (Feature == "+soft-float")
         FloatABI = SoftFloat;
-      else if (*it == "+mips16")
+      else if (Feature == "+mips16")
         IsMips16 = true;
-      else if (*it == "+micromips")
+      else if (Feature == "+micromips")
         IsMicromips = true;
-      else if (*it == "+dsp")
+      else if (Feature == "+dsp")
         DspRev = std::max(DspRev, DSP1);
-      else if (*it == "+dspr2")
+      else if (Feature == "+dspr2")
         DspRev = std::max(DspRev, DSP2);
-      else if (*it == "+msa")
+      else if (Feature == "+msa")
         HasMSA = true;
-      else if (*it == "+fp64")
+      else if (Feature == "+fp64")
         HasFP64 = true;
-      else if (*it == "-fp64")
+      else if (Feature == "-fp64")
         HasFP64 = false;
-      else if (*it == "+nan2008")
+      else if (Feature == "+nan2008")
         IsNan2008 = true;
-      else if (*it == "-nan2008")
+      else if (Feature == "-nan2008")
         IsNan2008 = false;
     }
 
-    setDescriptionString();
+    setDataLayoutString();
 
     return true;
   }
@@ -6372,9 +6759,10 @@ public:
 };
 
 const Builtin::Info MipsTargetInfoBase::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsMips.def"
 };
 
@@ -6424,8 +6812,7 @@ public:
     else
       llvm_unreachable("Invalid ABI for Mips32.");
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
     static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
       { { "at" },  "$1" },
       { { "v0" },  "$2" },
@@ -6459,14 +6846,13 @@ public:
       { { "fp","$fp" }, "$30" },
       { { "ra" }, "$31" }
     };
-    Aliases = GCCRegAliases;
-    NumAliases = llvm::array_lengthof(GCCRegAliases);
+    return llvm::makeArrayRef(GCCRegAliases);
   }
 };
 
 class Mips32EBTargetInfo : public Mips32TargetInfoBase {
-  void setDescriptionString() override {
-    DescriptionString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
+  void setDataLayoutString() override {
+    DataLayoutString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
   }
 
 public:
@@ -6482,8 +6868,8 @@ public:
 };
 
 class Mips32ELTargetInfo : public Mips32TargetInfoBase {
-  void setDescriptionString() override {
-    DescriptionString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
+  void setDataLayoutString() override {
+    DataLayoutString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64";
   }
 
 public:
@@ -6579,9 +6965,10 @@ public:
     }
     else
       llvm_unreachable("Invalid ABI for Mips64.");
+
+    Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
     static const TargetInfo::GCCRegAlias GCCRegAliases[] = {
       { { "at" },  "$1" },
       { { "v0" },  "$2" },
@@ -6615,19 +7002,18 @@ public:
       { { "fp","$fp" }, "$30" },
       { { "ra" }, "$31" }
     };
-    Aliases = GCCRegAliases;
-    NumAliases = llvm::array_lengthof(GCCRegAliases);
+    return llvm::makeArrayRef(GCCRegAliases);
   }
 
   bool hasInt128Type() const override { return true; }
 };
 
 class Mips64EBTargetInfo : public Mips64TargetInfoBase {
-  void setDescriptionString() override {
+  void setDataLayoutString() override {
     if (ABI == "n32")
-      DescriptionString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+      DataLayoutString = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
     else
-      DescriptionString = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+      DataLayoutString = "E-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
 
   }
 
@@ -6643,11 +7029,11 @@ public:
 };
 
 class Mips64ELTargetInfo : public Mips64TargetInfoBase {
-  void setDescriptionString() override {
+  void setDataLayoutString() override {
     if (ABI == "n32")
-      DescriptionString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+      DataLayoutString = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32:64-S128";
     else
-      DescriptionString = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
+      DataLayoutString = "e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128";
   }
 public:
   Mips64ELTargetInfo(const llvm::Triple &Triple)
@@ -6683,8 +7069,6 @@ public:
     this->RegParmMax = 0; // Disallow regparm
   }
 
-  void getDefaultFeatures(llvm::StringMap<bool> &Features) const override {
-  }
   void getArchDefines(const LangOptions &Opts, MacroBuilder &Builder) const {
     Builder.defineMacro("__le32__");
     Builder.defineMacro("__pnacl__");
@@ -6696,16 +7080,12 @@ public:
   bool hasFeature(StringRef Feature) const override {
     return Feature == "pnacl";
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-  }
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::PNaClABIBuiltinVaList;
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override;
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override;
+  ArrayRef<const char *> getGCCRegNames() const override;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override;
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
     return false;
@@ -6716,24 +7096,19 @@ public:
   }
 };
 
-void PNaClTargetInfo::getGCCRegNames(const char * const *&Names,
-                                     unsigned &NumNames) const {
-  Names = nullptr;
-  NumNames = 0;
+ArrayRef<const char *> PNaClTargetInfo::getGCCRegNames() const {
+  return None;
 }
 
-void PNaClTargetInfo::getGCCRegAliases(const GCCRegAlias *&Aliases,
-                                       unsigned &NumAliases) const {
-  Aliases = nullptr;
-  NumAliases = 0;
+ArrayRef<TargetInfo::GCCRegAlias> PNaClTargetInfo::getGCCRegAliases() const {
+  return None;
 }
 
 // We attempt to use PNaCl (le32) frontend and Mips32EL backend.
 class NaClMips32ELTargetInfo : public Mips32ELTargetInfo {
 public:
   NaClMips32ELTargetInfo(const llvm::Triple &Triple) :
-    Mips32ELTargetInfo(Triple)  {
-      MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0;
+    Mips32ELTargetInfo(Triple) {
   }
 
   BuiltinVaListKind getBuiltinVaListKind() const override {
@@ -6750,8 +7125,7 @@ public:
     NoAsmVariants = true;
     LongWidth = LongAlign = PointerWidth = PointerAlign = 64;
     MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
-    DescriptionString =
-        "e-m:e-v128:32-v16:16-v32:32-v96:32-n8:16:32:64-S128";
+    DataLayoutString = "e-m:e-v128:32-v16:16-v32:32-v96:32-n8:16:32:64-S128";
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -6760,24 +7134,19 @@ public:
     defineCPUMacros(Builder, "le64", /*Tuning=*/false);
     Builder.defineMacro("__ELF__");
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::Le64::LastTSBuiltin - Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                          clang::Le64::LastTSBuiltin - Builtin::FirstTSBuiltin);
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::PNaClABIBuiltinVaList;
   }
   const char *getClobbers() const override { return ""; }
-  void getGCCRegNames(const char *const *&Names,
-                      unsigned &NumNames) const override {
-    Names = nullptr;
-    NumNames = 0;
+  ArrayRef<const char *> getGCCRegNames() const override {
+    return None;
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
-    Aliases = nullptr;
-    NumAliases = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
   }
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
@@ -6786,107 +7155,246 @@ public:
 
   bool hasProtectedVisibility() const override { return false; }
 };
-} // end anonymous namespace.
+
+class WebAssemblyTargetInfo : public TargetInfo {
+  static const Builtin::Info BuiltinInfo[];
+
+  enum SIMDEnum {
+    NoSIMD,
+    SIMD128,
+  } SIMDLevel;
+
+public:
+  explicit WebAssemblyTargetInfo(const llvm::Triple &T)
+      : TargetInfo(T), SIMDLevel(NoSIMD) {
+    BigEndian = false;
+    NoAsmVariants = true;
+    SuitableAlign = 128;
+    LargeArrayMinWidth = 128;
+    LargeArrayAlign = 128;
+    SimdDefaultAlign = 128;
+    SigAtomicType = SignedLong;
+    LongDoubleWidth = LongDoubleAlign = 128;
+    LongDoubleFormat = &llvm::APFloat::IEEEquad;
+  }
+
+protected:
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    defineCPUMacros(Builder, "wasm", /*Tuning=*/false);
+    if (SIMDLevel >= SIMD128)
+      Builder.defineMacro("__wasm_simd128__");
+  }
+
+private:
+  bool
+  initFeatureMap(llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags,
+                 StringRef CPU,
+                 const std::vector<std::string> &FeaturesVec) const override {
+    if (CPU == "bleeding-edge")
+      Features["simd128"] = true;
+    return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
+  }
+  bool hasFeature(StringRef Feature) const final {
+    return llvm::StringSwitch<bool>(Feature)
+        .Case("simd128", SIMDLevel >= SIMD128)
+        .Default(false);
+  }
+  bool handleTargetFeatures(std::vector<std::string> &Features,
+                            DiagnosticsEngine &Diags) final {
+    for (const auto &Feature : Features) {
+      if (Feature == "+simd128") {
+        SIMDLevel = std::max(SIMDLevel, SIMD128);
+        continue;
+      }
+      if (Feature == "-simd128") {
+        SIMDLevel = std::min(SIMDLevel, SIMDEnum(SIMD128 - 1));
+        continue;
+      }
+
+      Diags.Report(diag::err_opt_not_valid_with_opt) << Feature
+                                                     << "-target-feature";
+      return false;
+    }
+    return true;
+  }
+  bool setCPU(const std::string &Name) final {
+    return llvm::StringSwitch<bool>(Name)
+              .Case("mvp",           true)
+              .Case("bleeding-edge", true)
+              .Case("generic",       true)
+              .Default(false);
+  }
+  ArrayRef<Builtin::Info> getTargetBuiltins() const final {
+    return llvm::makeArrayRef(BuiltinInfo,
+                   clang::WebAssembly::LastTSBuiltin - Builtin::FirstTSBuiltin);
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const final {
+    return VoidPtrBuiltinVaList;
+  }
+  ArrayRef<const char *> getGCCRegNames() const final {
+    return None;
+  }
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const final {
+    return None;
+  }
+  bool
+  validateAsmConstraint(const char *&Name,
+                        TargetInfo::ConstraintInfo &Info) const final {
+    return false;
+  }
+  const char *getClobbers() const final { return ""; }
+  bool isCLZForZeroUndef() const final { return false; }
+  bool hasInt128Type() const final { return true; }
+  IntType getIntTypeByWidth(unsigned BitWidth,
+                            bool IsSigned) const final {
+    // WebAssembly prefers long long for explicitly 64-bit integers.
+    return BitWidth == 64 ? (IsSigned ? SignedLongLong : UnsignedLongLong)
+                          : TargetInfo::getIntTypeByWidth(BitWidth, IsSigned);
+  }
+  IntType getLeastIntTypeByWidth(unsigned BitWidth,
+                                 bool IsSigned) const final {
+    // WebAssembly uses long long for int_least64_t and int_fast64_t.
+    return BitWidth == 64
+               ? (IsSigned ? SignedLongLong : UnsignedLongLong)
+               : TargetInfo::getLeastIntTypeByWidth(BitWidth, IsSigned);
+  }
+};
+
+const Builtin::Info WebAssemblyTargetInfo::BuiltinInfo[] = {
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
+#include "clang/Basic/BuiltinsWebAssembly.def"
+};
+
+class WebAssembly32TargetInfo : public WebAssemblyTargetInfo {
+public:
+  explicit WebAssembly32TargetInfo(const llvm::Triple &T)
+      : WebAssemblyTargetInfo(T) {
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 32;
+    DataLayoutString = "e-p:32:32-i64:64-n32:64-S128";
+  }
+
+protected:
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    WebAssemblyTargetInfo::getTargetDefines(Opts, Builder);
+    defineCPUMacros(Builder, "wasm32", /*Tuning=*/false);
+  }
+};
+
+class WebAssembly64TargetInfo : public WebAssemblyTargetInfo {
+public:
+  explicit WebAssembly64TargetInfo(const llvm::Triple &T)
+      : WebAssemblyTargetInfo(T) {
+    LongAlign = LongWidth = 64;
+    PointerAlign = PointerWidth = 64;
+    MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
+    DataLayoutString = "e-p:64:64-i64:64-n32:64-S128";
+  }
+
+protected:
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    WebAssemblyTargetInfo::getTargetDefines(Opts, Builder);
+    defineCPUMacros(Builder, "wasm64", /*Tuning=*/false);
+  }
+};
 
 const Builtin::Info Le64TargetInfo::BuiltinInfo[] = {
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsLe64.def"
 };
 
-namespace {
-  static const unsigned SPIRAddrSpaceMap[] = {
-    1,    // opencl_global
-    3,    // opencl_local
-    2,    // opencl_constant
-    4,    // opencl_generic
-    0,    // cuda_device
-    0,    // cuda_constant
-    0     // cuda_shared
-  };
-  class SPIRTargetInfo : public TargetInfo {
-  public:
-    SPIRTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
-      assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
-        "SPIR target must use unknown OS");
-      assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
-        "SPIR target must use unknown environment type");
-      BigEndian = false;
-      TLSSupported = false;
-      LongWidth = LongAlign = 64;
-      AddrSpaceMap = &SPIRAddrSpaceMap;
-      UseAddrSpaceMapMangling = true;
-      // Define available target features
-      // These must be defined in sorted order!
-      NoAsmVariants = true;
-    }
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      DefineStd(Builder, "SPIR", Opts);
-    }
-    bool hasFeature(StringRef Feature) const override {
-      return Feature == "spir";
-    }
+static const unsigned SPIRAddrSpaceMap[] = {
+    1, // opencl_global
+    3, // opencl_local
+    2, // opencl_constant
+    4, // opencl_generic
+    0, // cuda_device
+    0, // cuda_constant
+    0  // cuda_shared
+};
+class SPIRTargetInfo : public TargetInfo {
+public:
+  SPIRTargetInfo(const llvm::Triple &Triple) : TargetInfo(Triple) {
+    assert(getTriple().getOS() == llvm::Triple::UnknownOS &&
+           "SPIR target must use unknown OS");
+    assert(getTriple().getEnvironment() == llvm::Triple::UnknownEnvironment &&
+           "SPIR target must use unknown environment type");
+    BigEndian = false;
+    TLSSupported = false;
+    LongWidth = LongAlign = 64;
+    AddrSpaceMap = &SPIRAddrSpaceMap;
+    UseAddrSpaceMapMangling = true;
+    // Define available target features
+    // These must be defined in sorted order!
+    NoAsmVariants = true;
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "SPIR", Opts);
+  }
+  bool hasFeature(StringRef Feature) const override {
+    return Feature == "spir";
+  }
 
-    void getTargetBuiltins(const Builtin::Info *&Records,
-                           unsigned &NumRecords) const override {}
-    const char *getClobbers() const override {
-      return "";
-    }
-    void getGCCRegNames(const char * const *&Names,
-                        unsigned &NumNames) const override {}
-    bool
-    validateAsmConstraint(const char *&Name,
-                          TargetInfo::ConstraintInfo &info) const override {
-      return true;
-    }
-    void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                          unsigned &NumAliases) const override {}
-    BuiltinVaListKind getBuiltinVaListKind() const override {
-      return TargetInfo::VoidPtrBuiltinVaList;
-    }
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override { return None; }
+  const char *getClobbers() const override { return ""; }
+  ArrayRef<const char *> getGCCRegNames() const override { return None; }
+  bool validateAsmConstraint(const char *&Name,
+                             TargetInfo::ConstraintInfo &info) const override {
+    return true;
+  }
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
+  }
+  BuiltinVaListKind getBuiltinVaListKind() const override {
+    return TargetInfo::VoidPtrBuiltinVaList;
+  }
 
-    CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
-      return (CC == CC_SpirFunction ||
-              CC == CC_SpirKernel) ? CCCR_OK : CCCR_Warning;
-    }
+  CallingConvCheckResult checkCallingConvention(CallingConv CC) const override {
+    return (CC == CC_SpirFunction || CC == CC_SpirKernel) ? CCCR_OK
+                                                          : CCCR_Warning;
+  }
 
-    CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
-      return CC_SpirFunction;
-    }
-  };
+  CallingConv getDefaultCallingConv(CallingConvMethodType MT) const override {
+    return CC_SpirFunction;
+  }
+};
 
+class SPIR32TargetInfo : public SPIRTargetInfo {
+public:
+  SPIR32TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
+    PointerWidth = PointerAlign = 32;
+    SizeType = TargetInfo::UnsignedInt;
+    PtrDiffType = IntPtrType = TargetInfo::SignedInt;
+    DataLayoutString = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
+                       "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "SPIR32", Opts);
+  }
+};
 
-  class SPIR32TargetInfo : public SPIRTargetInfo {
-  public:
-    SPIR32TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
-      PointerWidth = PointerAlign = 32;
-      SizeType     = TargetInfo::UnsignedInt;
-      PtrDiffType = IntPtrType = TargetInfo::SignedInt;
-      DescriptionString
-        = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-"
-          "v96:128-v192:256-v256:256-v512:512-v1024:1024";
-    }
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      DefineStd(Builder, "SPIR32", Opts);
-    }
-  };
-
-  class SPIR64TargetInfo : public SPIRTargetInfo {
-  public:
-    SPIR64TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
-      PointerWidth = PointerAlign = 64;
-      SizeType     = TargetInfo::UnsignedLong;
-      PtrDiffType = IntPtrType = TargetInfo::SignedLong;
-      DescriptionString = "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
-                          "v96:128-v192:256-v256:256-v512:512-v1024:1024";
-    }
-    void getTargetDefines(const LangOptions &Opts,
-                          MacroBuilder &Builder) const override {
-      DefineStd(Builder, "SPIR64", Opts);
-    }
-  };
+class SPIR64TargetInfo : public SPIRTargetInfo {
+public:
+  SPIR64TargetInfo(const llvm::Triple &Triple) : SPIRTargetInfo(Triple) {
+    PointerWidth = PointerAlign = 64;
+    SizeType = TargetInfo::UnsignedLong;
+    PtrDiffType = IntPtrType = TargetInfo::SignedLong;
+    DataLayoutString = "e-i64:64-v16:16-v24:32-v32:32-v48:64-"
+                       "v96:128-v192:256-v256:256-v512:512-v1024:1024";
+  }
+  void getTargetDefines(const LangOptions &Opts,
+                        MacroBuilder &Builder) const override {
+    DefineStd(Builder, "SPIR64", Opts);
+  }
+};
 
 class XCoreTargetInfo : public TargetInfo {
   static const Builtin::Info BuiltinInfo[];
@@ -6903,17 +7411,16 @@ public:
     WCharType = UnsignedChar;
     WIntType = UnsignedInt;
     UseZeroLengthBitfieldAlignment = true;
-    DescriptionString = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32"
-                        "-f64:32-a:0:32-n32";
+    DataLayoutString = "e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32"
+                       "-f64:32-a:0:32-n32";
   }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override {
     Builder.defineMacro("__XS1B__");
   }
-  void getTargetBuiltins(const Builtin::Info *&Records,
-                         unsigned &NumRecords) const override {
-    Records = BuiltinInfo;
-    NumRecords = clang::XCore::LastTSBuiltin-Builtin::FirstTSBuiltin;
+  ArrayRef<Builtin::Info> getTargetBuiltins() const override {
+    return llvm::makeArrayRef(BuiltinInfo,
+                           clang::XCore::LastTSBuiltin-Builtin::FirstTSBuiltin);
   }
   BuiltinVaListKind getBuiltinVaListKind() const override {
     return TargetInfo::VoidPtrBuiltinVaList;
@@ -6921,19 +7428,15 @@ public:
   const char *getClobbers() const override {
     return "";
   }
-  void getGCCRegNames(const char * const *&Names,
-                      unsigned &NumNames) const override {
+  ArrayRef<const char *> getGCCRegNames() const override {
     static const char * const GCCRegNames[] = {
       "r0",   "r1",   "r2",   "r3",   "r4",   "r5",   "r6",   "r7",
       "r8",   "r9",   "r10",  "r11",  "cp",   "dp",   "sp",   "lr"
     };
-    Names = GCCRegNames;
-    NumNames = llvm::array_lengthof(GCCRegNames);
+    return llvm::makeArrayRef(GCCRegNames);
   }
-  void getGCCRegAliases(const GCCRegAlias *&Aliases,
-                        unsigned &NumAliases) const override {
-    Aliases = nullptr;
-    NumAliases = 0;
+  ArrayRef<TargetInfo::GCCRegAlias> getGCCRegAliases() const override {
+    return None;
   }
   bool validateAsmConstraint(const char *&Name,
                              TargetInfo::ConstraintInfo &Info) const override {
@@ -6946,14 +7449,13 @@ public:
 };
 
 const Builtin::Info XCoreTargetInfo::BuiltinInfo[] = {
-#define BUILTIN(ID, TYPE, ATTRS) { #ID, TYPE, ATTRS, 0, ALL_LANGUAGES },
-#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) { #ID, TYPE, ATTRS, HEADER,\
-                                              ALL_LANGUAGES },
+#define BUILTIN(ID, TYPE, ATTRS) \
+  { #ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr },
+#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER) \
+  { #ID, TYPE, ATTRS, HEADER, ALL_LANGUAGES, nullptr },
 #include "clang/Basic/BuiltinsXCore.def"
 };
-} // end anonymous namespace.
 
-namespace {
 // x86_32 Android target
 class AndroidX86_32TargetInfo : public LinuxTargetInfo<X86_32TargetInfo> {
 public:
@@ -6964,9 +7466,7 @@ public:
     LongDoubleFormat = &llvm::APFloat::IEEEdouble;
   }
 };
-} // end anonymous namespace
 
-namespace {
 // x86_64 Android target
 class AndroidX86_64TargetInfo : public LinuxTargetInfo<X86_64TargetInfo> {
 public:
@@ -6981,7 +7481,6 @@ public:
 };
 } // end anonymous namespace
 
-
 //===----------------------------------------------------------------------===//
 // Driver code
 //===----------------------------------------------------------------------===//
@@ -7004,6 +7503,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new DarwinAArch64TargetInfo(Triple);
 
     switch (os) {
+    case llvm::Triple::CloudABI:
+      return new CloudABITargetInfo<AArch64leTargetInfo>(Triple);
     case llvm::Triple::FreeBSD:
       return new FreeBSDTargetInfo<AArch64leTargetInfo>(Triple);
     case llvm::Triple::Linux:
@@ -7048,6 +7549,10 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new NaClTargetInfo<ARMleTargetInfo>(Triple);
     case llvm::Triple::Win32:
       switch (Triple.getEnvironment()) {
+      case llvm::Triple::Cygnus:
+        return new CygwinARMTargetInfo(Triple);
+      case llvm::Triple::GNU:
+        return new MinGWARMTargetInfo(Triple);
       case llvm::Triple::Itanium:
         return new ItaniumWindowsARMleTargetInfo(Triple);
       case llvm::Triple::MSVC:
@@ -7322,6 +7827,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new RTEMSX86_32TargetInfo(Triple);
     case llvm::Triple::NaCl:
       return new NaClTargetInfo<X86_32TargetInfo>(Triple);
+    case llvm::Triple::ELFIAMCU:
+      return new MCUX86_32TargetInfo(Triple);
     default:
       return new X86_32TargetInfo(Triple);
     }
@@ -7357,6 +7864,8 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return new SolarisTargetInfo<X86_64TargetInfo>(Triple);
     case llvm::Triple::Win32: {
       switch (Triple.getEnvironment()) {
+      case llvm::Triple::Cygnus:
+        return new CygwinX86_64TargetInfo(Triple);
       case llvm::Triple::GNU:
         return new MinGWX86_64TargetInfo(Triple);
       case llvm::Triple::MSVC:
@@ -7384,11 +7893,19 @@ static TargetInfo *AllocateTarget(const llvm::Triple &Triple) {
       return nullptr;
     return new SPIR64TargetInfo(Triple);
   }
+  case llvm::Triple::wasm32:
+    if (!(Triple == llvm::Triple("wasm32-unknown-unknown")))
+      return nullptr;
+    return new WebAssemblyOSTargetInfo<WebAssembly32TargetInfo>(Triple);
+  case llvm::Triple::wasm64:
+    if (!(Triple == llvm::Triple("wasm64-unknown-unknown")))
+      return nullptr;
+    return new WebAssemblyOSTargetInfo<WebAssembly64TargetInfo>(Triple);
   }
 }
 
 /// CreateTargetInfo - Return the target info object for the specified target
-/// triple.
+/// options.
 TargetInfo *
 TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
                              const std::shared_ptr<TargetOptions> &Opts) {
@@ -7423,25 +7940,15 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
   // Compute the default target features, we need the target to handle this
   // because features may have dependencies on one another.
   llvm::StringMap<bool> Features;
-  Target->getDefaultFeatures(Features);
-
-  // Apply the user specified deltas.
-  for (unsigned I = 0, N = Opts->FeaturesAsWritten.size();
-       I < N; ++I) {
-    const char *Name = Opts->FeaturesAsWritten[I].c_str();
-    // Apply the feature via the target.
-    bool Enabled = Name[0] == '+';
-    Target->setFeatureEnabled(Features, Name + 1, Enabled);
-  }
+  if (!Target->initFeatureMap(Features, Diags, Opts->CPU,
+                              Opts->FeaturesAsWritten))
+      return nullptr;
 
   // Add the features to the compile options.
-  //
-  // FIXME: If we are completely confident that we have the right set, we only
-  // need to pass the minuses.
   Opts->Features.clear();
-  for (llvm::StringMap<bool>::const_iterator it = Features.begin(),
-         ie = Features.end(); it != ie; ++it)
-    Opts->Features.push_back((it->second ? "+" : "-") + it->first().str());
+  for (const auto &F : Features)
+    Opts->Features.push_back((F.getValue() ? "+" : "-") + F.getKey().str());
+
   if (!Target->handleTargetFeatures(Opts->Features, Diags))
     return nullptr;
 
diff --git a/lib/Basic/Version.cpp b/lib/Basic/Version.cpp
index 892897fc9cda..a1a67c2bc144 100644
--- a/lib/Basic/Version.cpp
+++ b/lib/Basic/Version.cpp
@@ -36,7 +36,7 @@ std::string getClangRepositoryPath() {
 
   // If the SVN_REPOSITORY is empty, try to use the SVN keyword. This helps us
   // pick up a tag in an SVN export, for example.
-  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_371/final/lib/Basic/Version.cpp $");
+  StringRef SVNRepository("$URL: https://llvm.org/svn/llvm-project/cfe/trunk/lib/Basic/Version.cpp $");
   if (URL.empty()) {
     URL = SVNRepository.slice(SVNRepository.find(':'),
                               SVNRepository.find("/lib/Basic"));
diff --git a/lib/Basic/VirtualFileSystem.cpp b/lib/Basic/VirtualFileSystem.cpp
index a36102cf0f5a..cf5a8d681eac 100644
--- a/lib/Basic/VirtualFileSystem.cpp
+++ b/lib/Basic/VirtualFileSystem.cpp
@@ -10,6 +10,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Basic/FileManager.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -19,9 +20,17 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/Config/llvm-config.h"
 #include <atomic>
 #include <memory>
 
+// For chdir.
+#ifdef LLVM_ON_WIN32
+#  include <direct.h>
+#else
+#  include <unistd.h>
+#endif
+
 using namespace clang;
 using namespace clang::vfs;
 using namespace llvm;
@@ -35,12 +44,24 @@ Status::Status(const file_status &Status)
       User(Status.getUser()), Group(Status.getGroup()), Size(Status.getSize()),
       Type(Status.type()), Perms(Status.permissions()), IsVFSMapped(false)  {}
 
-Status::Status(StringRef Name, StringRef ExternalName, UniqueID UID,
-               sys::TimeValue MTime, uint32_t User, uint32_t Group,
-               uint64_t Size, file_type Type, perms Perms)
+Status::Status(StringRef Name, UniqueID UID, sys::TimeValue MTime,
+               uint32_t User, uint32_t Group, uint64_t Size, file_type Type,
+               perms Perms)
     : Name(Name), UID(UID), MTime(MTime), User(User), Group(Group), Size(Size),
       Type(Type), Perms(Perms), IsVFSMapped(false) {}
 
+Status Status::copyWithNewName(const Status &In, StringRef NewName) {
+  return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+                In.getUser(), In.getGroup(), In.getSize(), In.getType(),
+                In.getPermissions());
+}
+
+Status Status::copyWithNewName(const file_status &In, StringRef NewName) {
+  return Status(NewName, In.getUniqueID(), In.getLastModificationTime(),
+                In.getUser(), In.getGroup(), In.getSize(), In.type(),
+                In.permissions());
+}
+
 bool Status::equivalent(const Status &Other) const {
   return getUniqueID() == Other.getUniqueID();
 }
@@ -77,6 +98,19 @@ FileSystem::getBufferForFile(const llvm::Twine &Name, int64_t FileSize,
   return (*F)->getBuffer(Name, FileSize, RequiresNullTerminator, IsVolatile);
 }
 
+std::error_code FileSystem::makeAbsolute(SmallVectorImpl<char> &Path) const {
+  auto WorkingDir = getCurrentWorkingDirectory();
+  if (!WorkingDir)
+    return WorkingDir.getError();
+
+  return llvm::sys::fs::make_absolute(WorkingDir.get(), Path);
+}
+
+bool FileSystem::exists(const Twine &Path) {
+  auto Status = status(Path);
+  return Status && Status->exists();
+}
+
 //===-----------------------------------------------------------------------===/
 // RealFileSystem implementation
 //===-----------------------------------------------------------------------===/
@@ -87,19 +121,20 @@ class RealFile : public File {
   int FD;
   Status S;
   friend class RealFileSystem;
-  RealFile(int FD) : FD(FD) {
+  RealFile(int FD, StringRef NewName)
+      : FD(FD), S(NewName, {}, {}, {}, {}, {},
+                  llvm::sys::fs::file_type::status_error, {}) {
     assert(FD >= 0 && "Invalid or inactive file descriptor");
   }
 
 public:
   ~RealFile() override;
   ErrorOr<Status> status() override;
-  ErrorOr<std::unique_ptr<MemoryBuffer>>
-  getBuffer(const Twine &Name, int64_t FileSize = -1,
-            bool RequiresNullTerminator = true,
-            bool IsVolatile = false) override;
+  ErrorOr<std::unique_ptr<MemoryBuffer>> getBuffer(const Twine &Name,
+                                                   int64_t FileSize,
+                                                   bool RequiresNullTerminator,
+                                                   bool IsVolatile) override;
   std::error_code close() override;
-  void setName(StringRef Name) override;
 };
 } // end anonymous namespace
 RealFile::~RealFile() { close(); }
@@ -110,9 +145,7 @@ ErrorOr<Status> RealFile::status() {
     file_status RealStatus;
     if (std::error_code EC = sys::fs::status(FD, RealStatus))
       return EC;
-    Status NewS(RealStatus);
-    NewS.setName(S.getName());
-    S = std::move(NewS);
+    S = Status::copyWithNewName(RealStatus, S.getName());
   }
   return S;
 }
@@ -142,10 +175,6 @@ std::error_code RealFile::close() {
   return std::error_code();
 }
 
-void RealFile::setName(StringRef Name) {
-  S.setName(Name);
-}
-
 namespace {
 /// \brief The file system according to your operating system.
 class RealFileSystem : public FileSystem {
@@ -153,6 +182,9 @@ public:
   ErrorOr<Status> status(const Twine &Path) override;
   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override;
+
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override;
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
 };
 } // end anonymous namespace
 
@@ -160,9 +192,7 @@ ErrorOr<Status> RealFileSystem::status(const Twine &Path) {
   sys::fs::file_status RealStatus;
   if (std::error_code EC = sys::fs::status(Path, RealStatus))
     return EC;
-  Status Result(RealStatus);
-  Result.setName(Path.str());
-  return Result;
+  return Status::copyWithNewName(RealStatus, Path.str());
 }
 
 ErrorOr<std::unique_ptr<File>>
@@ -170,9 +200,29 @@ RealFileSystem::openFileForRead(const Twine &Name) {
   int FD;
   if (std::error_code EC = sys::fs::openFileForRead(Name, FD))
     return EC;
-  std::unique_ptr<File> Result(new RealFile(FD));
-  Result->setName(Name.str());
-  return std::move(Result);
+  return std::unique_ptr<File>(new RealFile(FD, Name.str()));
+}
+
+llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
+  SmallString<256> Dir;
+  if (std::error_code EC = llvm::sys::fs::current_path(Dir))
+    return EC;
+  return Dir.str().str();
+}
+
+std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+  // FIXME: chdir is thread hostile; on the other hand, creating the same
+  // behavior as chdir is complex: chdir resolves the path once, thus
+  // guaranteeing that all subsequent relative path operations work
+  // on the same path the original chdir resulted in. This makes a
+  // difference for example on network filesystems, where symlinks might be
+  // switched during runtime of the tool. Fixing this depends on having a
+  // file system abstraction that allows openat() style interactions.
+  SmallString<256> Storage;
+  StringRef Dir = Path.toNullTerminatedStringRef(Storage);
+  if (int Err = ::chdir(Dir.data()))
+    return std::error_code(Err, std::generic_category());
+  return std::error_code();
 }
 
 IntrusiveRefCntPtr<FileSystem> vfs::getRealFileSystem() {
@@ -190,10 +240,8 @@ public:
     if (!EC && Iter != llvm::sys::fs::directory_iterator()) {
       llvm::sys::fs::file_status S;
       EC = Iter->status(S);
-      if (!EC) {
-        CurrentEntry = Status(S);
-        CurrentEntry.setName(Iter->path());
-      }
+      if (!EC)
+        CurrentEntry = Status::copyWithNewName(S, Iter->path());
     }
   }
 
@@ -207,8 +255,7 @@ public:
     } else {
       llvm::sys::fs::file_status S;
       EC = Iter->status(S);
-      CurrentEntry = Status(S);
-      CurrentEntry.setName(Iter->path());
+      CurrentEntry = Status::copyWithNewName(S, Iter->path());
     }
     return EC;
   }
@@ -224,11 +271,14 @@ directory_iterator RealFileSystem::dir_begin(const Twine &Dir,
 // OverlayFileSystem implementation
 //===-----------------------------------------------------------------------===/
 OverlayFileSystem::OverlayFileSystem(IntrusiveRefCntPtr<FileSystem> BaseFS) {
-  pushOverlay(BaseFS);
+  FSList.push_back(BaseFS);
 }
 
 void OverlayFileSystem::pushOverlay(IntrusiveRefCntPtr<FileSystem> FS) {
   FSList.push_back(FS);
+  // Synchronize added file systems by duplicating the working directory from
+  // the first one in the list.
+  FS->setCurrentWorkingDirectory(getCurrentWorkingDirectory().get());
 }
 
 ErrorOr<Status> OverlayFileSystem::status(const Twine &Path) {
@@ -252,6 +302,19 @@ OverlayFileSystem::openFileForRead(const llvm::Twine &Path) {
   return make_error_code(llvm::errc::no_such_file_or_directory);
 }
 
+llvm::ErrorOr<std::string>
+OverlayFileSystem::getCurrentWorkingDirectory() const {
+  // All file systems are synchronized, just take the first working directory.
+  return FSList.front()->getCurrentWorkingDirectory();
+}
+std::error_code
+OverlayFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
+  for (auto &FS : FSList)
+    if (std::error_code EC = FS->setCurrentWorkingDirectory(Path))
+      return EC;
+  return std::error_code();
+}
+
 clang::vfs::detail::DirIterImpl::~DirIterImpl() { }
 
 namespace {
@@ -320,8 +383,286 @@ directory_iterator OverlayFileSystem::dir_begin(const Twine &Dir,
       std::make_shared<OverlayFSDirIterImpl>(Dir, *this, EC));
 }
 
+namespace clang {
+namespace vfs {
+namespace detail {
+
+enum InMemoryNodeKind { IME_File, IME_Directory };
+
+/// The in memory file system is a tree of Nodes. Every node can either be a
+/// file or a directory.
+class InMemoryNode {
+  Status Stat;
+  InMemoryNodeKind Kind;
+
+public:
+  InMemoryNode(Status Stat, InMemoryNodeKind Kind)
+      : Stat(std::move(Stat)), Kind(Kind) {}
+  virtual ~InMemoryNode() {}
+  const Status &getStatus() const { return Stat; }
+  InMemoryNodeKind getKind() const { return Kind; }
+  virtual std::string toString(unsigned Indent) const = 0;
+};
+
+namespace {
+class InMemoryFile : public InMemoryNode {
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+
+public:
+  InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
+      : InMemoryNode(std::move(Stat), IME_File), Buffer(std::move(Buffer)) {}
+
+  llvm::MemoryBuffer *getBuffer() { return Buffer.get(); }
+  std::string toString(unsigned Indent) const override {
+    return (std::string(Indent, ' ') + getStatus().getName() + "\n").str();
+  }
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_File;
+  }
+};
+
+/// Adapt a InMemoryFile for VFS' File interface.
+class InMemoryFileAdaptor : public File {
+  InMemoryFile &Node;
+
+public:
+  explicit InMemoryFileAdaptor(InMemoryFile &Node) : Node(Node) {}
+
+  llvm::ErrorOr<Status> status() override { return Node.getStatus(); }
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    llvm::MemoryBuffer *Buf = Node.getBuffer();
+    return llvm::MemoryBuffer::getMemBuffer(
+        Buf->getBuffer(), Buf->getBufferIdentifier(), RequiresNullTerminator);
+  }
+  std::error_code close() override { return std::error_code(); }
+};
+} // end anonymous namespace
+
+class InMemoryDirectory : public InMemoryNode {
+  std::map<std::string, std::unique_ptr<InMemoryNode>> Entries;
+
+public:
+  InMemoryDirectory(Status Stat)
+      : InMemoryNode(std::move(Stat), IME_Directory) {}
+  InMemoryNode *getChild(StringRef Name) {
+    auto I = Entries.find(Name);
+    if (I != Entries.end())
+      return I->second.get();
+    return nullptr;
+  }
+  InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) {
+    return Entries.insert(make_pair(Name, std::move(Child)))
+        .first->second.get();
+  }
+
+  typedef decltype(Entries)::const_iterator const_iterator;
+  const_iterator begin() const { return Entries.begin(); }
+  const_iterator end() const { return Entries.end(); }
+
+  std::string toString(unsigned Indent) const override {
+    std::string Result =
+        (std::string(Indent, ' ') + getStatus().getName() + "\n").str();
+    for (const auto &Entry : Entries) {
+      Result += Entry.second->toString(Indent + 2);
+    }
+    return Result;
+  }
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_Directory;
+  }
+};
+}
+
+InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
+    : Root(new detail::InMemoryDirectory(
+          Status("", getNextVirtualUniqueID(), llvm::sys::TimeValue::MinTime(),
+                 0, 0, 0, llvm::sys::fs::file_type::directory_file,
+                 llvm::sys::fs::perms::all_all))),
+      UseNormalizedPaths(UseNormalizedPaths) {}
+
+InMemoryFileSystem::~InMemoryFileSystem() {}
+
+std::string InMemoryFileSystem::toString() const {
+  return Root->toString(/*Indent=*/0);
+}
+
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+                                 std::unique_ptr<llvm::MemoryBuffer> Buffer) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (Path.empty())
+    return false;
+
+  detail::InMemoryDirectory *Dir = Root.get();
+  auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
+  while (true) {
+    StringRef Name = *I;
+    detail::InMemoryNode *Node = Dir->getChild(Name);
+    ++I;
+    if (!Node) {
+      if (I == E) {
+        // End of the path, create a new file.
+        // FIXME: expose the status details in the interface.
+        Status Stat(P.str(), getNextVirtualUniqueID(),
+                    llvm::sys::TimeValue(ModificationTime, 0), 0, 0,
+                    Buffer->getBufferSize(),
+                    llvm::sys::fs::file_type::regular_file,
+                    llvm::sys::fs::all_all);
+        Dir->addChild(Name, llvm::make_unique<detail::InMemoryFile>(
+                                std::move(Stat), std::move(Buffer)));
+        return true;
+      }
+
+      // Create a new directory. Use the path up to here.
+      // FIXME: expose the status details in the interface.
+      Status Stat(
+          StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
+          getNextVirtualUniqueID(), llvm::sys::TimeValue(ModificationTime, 0),
+          0, 0, Buffer->getBufferSize(),
+          llvm::sys::fs::file_type::directory_file, llvm::sys::fs::all_all);
+      Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
+          Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
+      continue;
+    }
+
+    if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
+      Dir = NewDir;
+    } else {
+      assert(isa<detail::InMemoryFile>(Node) &&
+             "Must be either file or directory!");
+
+      // Trying to insert a directory in place of a file.
+      if (I != E)
+        return false;
+
+      // Return false only if the new file is different from the existing one.
+      return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
+             Buffer->getBuffer();
+    }
+  }
+}
+
+bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
+                                      llvm::MemoryBuffer *Buffer) {
+  return addFile(P, ModificationTime,
+                 llvm::MemoryBuffer::getMemBuffer(
+                     Buffer->getBuffer(), Buffer->getBufferIdentifier()));
+}
+
+static ErrorOr<detail::InMemoryNode *>
+lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
+                   const Twine &P) {
+  SmallString<128> Path;
+  P.toVector(Path);
+
+  // Fix up relative paths. This just prepends the current working directory.
+  std::error_code EC = FS.makeAbsolute(Path);
+  assert(!EC);
+  (void)EC;
+
+  if (FS.useNormalizedPaths())
+    llvm::sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+
+  if (Path.empty())
+    return Dir;
+
+  auto I = llvm::sys::path::begin(Path), E = llvm::sys::path::end(Path);
+  while (true) {
+    detail::InMemoryNode *Node = Dir->getChild(*I);
+    ++I;
+    if (!Node)
+      return errc::no_such_file_or_directory;
+
+    // Return the file if it's at the end of the path.
+    if (auto File = dyn_cast<detail::InMemoryFile>(Node)) {
+      if (I == E)
+        return File;
+      return errc::no_such_file_or_directory;
+    }
+
+    // Traverse directories.
+    Dir = cast<detail::InMemoryDirectory>(Node);
+    if (I == E)
+      return Dir;
+  }
+}
+
+llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+  if (Node)
+    return (*Node)->getStatus();
+  return Node.getError();
+}
+
+llvm::ErrorOr<std::unique_ptr<File>>
+InMemoryFileSystem::openFileForRead(const Twine &Path) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Path);
+  if (!Node)
+    return Node.getError();
+
+  // When we have a file provide a heap-allocated wrapper for the memory buffer
+  // to match the ownership semantics for File.
+  if (auto *F = dyn_cast<detail::InMemoryFile>(*Node))
+    return std::unique_ptr<File>(new detail::InMemoryFileAdaptor(*F));
+
+  // FIXME: errc::not_a_file?
+  return make_error_code(llvm::errc::invalid_argument);
+}
+
+namespace {
+/// Adaptor from InMemoryDir::iterator to directory_iterator.
+class InMemoryDirIterator : public clang::vfs::detail::DirIterImpl {
+  detail::InMemoryDirectory::const_iterator I;
+  detail::InMemoryDirectory::const_iterator E;
+
+public:
+  InMemoryDirIterator() {}
+  explicit InMemoryDirIterator(detail::InMemoryDirectory &Dir)
+      : I(Dir.begin()), E(Dir.end()) {
+    if (I != E)
+      CurrentEntry = I->second->getStatus();
+  }
+
+  std::error_code increment() override {
+    ++I;
+    // When we're at the end, make CurrentEntry invalid and DirIterImpl will do
+    // the rest.
+    CurrentEntry = I != E ? I->second->getStatus() : Status();
+    return std::error_code();
+  }
+};
+} // end anonymous namespace
+
+directory_iterator InMemoryFileSystem::dir_begin(const Twine &Dir,
+                                                 std::error_code &EC) {
+  auto Node = lookupInMemoryNode(*this, Root.get(), Dir);
+  if (!Node) {
+    EC = Node.getError();
+    return directory_iterator(std::make_shared<InMemoryDirIterator>());
+  }
+
+  if (auto *DirNode = dyn_cast<detail::InMemoryDirectory>(*Node))
+    return directory_iterator(std::make_shared<InMemoryDirIterator>(*DirNode));
+
+  EC = make_error_code(llvm::errc::not_a_directory);
+  return directory_iterator(std::make_shared<InMemoryDirIterator>());
+}
+}
+}
+
 //===-----------------------------------------------------------------------===/
-// VFSFromYAML implementation
+// RedirectingFileSystem implementation
 //===-----------------------------------------------------------------------===/
 
 namespace {
@@ -343,23 +684,24 @@ public:
   EntryKind getKind() const { return Kind; }
 };
 
-class DirectoryEntry : public Entry {
-  std::vector<Entry *> Contents;
+class RedirectingDirectoryEntry : public Entry {
+  std::vector<std::unique_ptr<Entry>> Contents;
   Status S;
 
 public:
-  ~DirectoryEntry() override;
-  DirectoryEntry(StringRef Name, std::vector<Entry *> Contents, Status S)
+  RedirectingDirectoryEntry(StringRef Name,
+                            std::vector<std::unique_ptr<Entry>> Contents,
+                            Status S)
       : Entry(EK_Directory, Name), Contents(std::move(Contents)),
         S(std::move(S)) {}
   Status getStatus() { return S; }
-  typedef std::vector<Entry *>::iterator iterator;
+  typedef decltype(Contents)::iterator iterator;
   iterator contents_begin() { return Contents.begin(); }
   iterator contents_end() { return Contents.end(); }
   static bool classof(const Entry *E) { return E->getKind() == EK_Directory; }
 };
 
-class FileEntry : public Entry {
+class RedirectingFileEntry : public Entry {
 public:
   enum NameKind {
     NK_NotSet,
@@ -370,7 +712,8 @@ private:
   std::string ExternalContentsPath;
   NameKind UseName;
 public:
-  FileEntry(StringRef Name, StringRef ExternalContentsPath, NameKind UseName)
+  RedirectingFileEntry(StringRef Name, StringRef ExternalContentsPath,
+                       NameKind UseName)
       : Entry(EK_File, Name), ExternalContentsPath(ExternalContentsPath),
         UseName(UseName) {}
   StringRef getExternalContentsPath() const { return ExternalContentsPath; }
@@ -382,16 +725,18 @@ public:
   static bool classof(const Entry *E) { return E->getKind() == EK_File; }
 };
 
-class VFSFromYAML;
+class RedirectingFileSystem;
 
 class VFSFromYamlDirIterImpl : public clang::vfs::detail::DirIterImpl {
   std::string Dir;
-  VFSFromYAML &FS;
-  DirectoryEntry::iterator Current, End;
+  RedirectingFileSystem &FS;
+  RedirectingDirectoryEntry::iterator Current, End;
+
 public:
-  VFSFromYamlDirIterImpl(const Twine &Path, VFSFromYAML &FS,
-                         DirectoryEntry::iterator Begin,
-                         DirectoryEntry::iterator End, std::error_code &EC);
+  VFSFromYamlDirIterImpl(const Twine &Path, RedirectingFileSystem &FS,
+                         RedirectingDirectoryEntry::iterator Begin,
+                         RedirectingDirectoryEntry::iterator End,
+                         std::error_code &EC);
   std::error_code increment() override;
 };
 
@@ -448,8 +793,9 @@ public:
 /// In both cases, the 'name' field may contain multiple path components (e.g.
 /// /path/to/file). However, any directory that contains more than one child
 /// must be uniquely represented by a directory entry.
-class VFSFromYAML : public vfs::FileSystem {
-  std::vector<Entry *> Roots; ///< The root(s) of the virtual file system.
+class RedirectingFileSystem : public vfs::FileSystem {
+  /// The root(s) of the virtual file system.
+  std::vector<std::unique_ptr<Entry>> Roots;
   /// \brief The file system to use for external references.
   IntrusiveRefCntPtr<FileSystem> ExternalFS;
 
@@ -466,10 +812,10 @@ class VFSFromYAML : public vfs::FileSystem {
   bool UseExternalNames;
   /// @}
 
-  friend class VFSFromYAMLParser;
+  friend class RedirectingFileSystemParser;
 
 private:
-  VFSFromYAML(IntrusiveRefCntPtr<FileSystem> ExternalFS)
+  RedirectingFileSystem(IntrusiveRefCntPtr<FileSystem> ExternalFS)
       : ExternalFS(ExternalFS), CaseSensitive(true), UseExternalNames(true) {}
 
   /// \brief Looks up \p Path in \c Roots.
@@ -484,18 +830,23 @@ private:
   ErrorOr<Status> status(const Twine &Path, Entry *E);
 
 public:
-  ~VFSFromYAML() override;
-
   /// \brief Parses \p Buffer, which is expected to be in YAML format and
   /// returns a virtual file system representing its contents.
-  static VFSFromYAML *create(std::unique_ptr<MemoryBuffer> Buffer,
-                             SourceMgr::DiagHandlerTy DiagHandler,
-                             void *DiagContext,
-                             IntrusiveRefCntPtr<FileSystem> ExternalFS);
+  static RedirectingFileSystem *
+  create(std::unique_ptr<MemoryBuffer> Buffer,
+         SourceMgr::DiagHandlerTy DiagHandler, void *DiagContext,
+         IntrusiveRefCntPtr<FileSystem> ExternalFS);
 
   ErrorOr<Status> status(const Twine &Path) override;
   ErrorOr<std::unique_ptr<File>> openFileForRead(const Twine &Path) override;
 
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    return ExternalFS->getCurrentWorkingDirectory();
+  }
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+    return ExternalFS->setCurrentWorkingDirectory(Path);
+  }
+
   directory_iterator dir_begin(const Twine &Dir, std::error_code &EC) override{
     ErrorOr<Entry *> E = lookupPath(Dir);
     if (!E) {
@@ -513,14 +864,14 @@ public:
       return directory_iterator();
     }
 
-    DirectoryEntry *D = cast<DirectoryEntry>(*E);
+    auto *D = cast<RedirectingDirectoryEntry>(*E);
     return directory_iterator(std::make_shared<VFSFromYamlDirIterImpl>(Dir,
         *this, D->contents_begin(), D->contents_end(), EC));
   }
 };
 
 /// \brief A helper class to hold the common YAML parsing state.
-class VFSFromYAMLParser {
+class RedirectingFileSystemParser {
   yaml::Stream &Stream;
 
   void error(yaml::Node *N, const Twine &Msg) {
@@ -596,7 +947,7 @@ class VFSFromYAMLParser {
     return true;
   }
 
-  Entry *parseEntry(yaml::Node *N) {
+  std::unique_ptr<Entry> parseEntry(yaml::Node *N) {
     yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
     if (!M) {
       error(N, "expected mapping node for file or directory entry");
@@ -611,14 +962,13 @@ class VFSFromYAMLParser {
       KeyStatusPair("use-external-name", false),
     };
 
-    DenseMap<StringRef, KeyStatus> Keys(
-        &Fields[0], Fields + sizeof(Fields)/sizeof(Fields[0]));
+    DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
 
     bool HasContents = false; // external or otherwise
-    std::vector<Entry *> EntryArrayContents;
+    std::vector<std::unique_ptr<Entry>> EntryArrayContents;
     std::string ExternalContentsPath;
     std::string Name;
-    FileEntry::NameKind UseExternalName = FileEntry::NK_NotSet;
+    auto UseExternalName = RedirectingFileEntry::NK_NotSet;
     EntryKind Kind;
 
     for (yaml::MappingNode::iterator I = M->begin(), E = M->end(); I != E;
@@ -667,8 +1017,8 @@ class VFSFromYAMLParser {
         for (yaml::SequenceNode::iterator I = Contents->begin(),
                                           E = Contents->end();
              I != E; ++I) {
-          if (Entry *E = parseEntry(&*I))
-            EntryArrayContents.push_back(E);
+          if (std::unique_ptr<Entry> E = parseEntry(&*I))
+            EntryArrayContents.push_back(std::move(E));
           else
             return nullptr;
         }
@@ -686,7 +1036,8 @@ class VFSFromYAMLParser {
         bool Val;
         if (!parseScalarBool(I->getValue(), Val))
           return nullptr;
-        UseExternalName = Val ? FileEntry::NK_External : FileEntry::NK_Virtual;
+        UseExternalName = Val ? RedirectingFileEntry::NK_External
+                              : RedirectingFileEntry::NK_Virtual;
       } else {
         llvm_unreachable("key missing from Keys");
       }
@@ -704,7 +1055,8 @@ class VFSFromYAMLParser {
       return nullptr;
 
     // check invalid configuration
-    if (Kind == EK_Directory && UseExternalName != FileEntry::NK_NotSet) {
+    if (Kind == EK_Directory &&
+        UseExternalName != RedirectingFileEntry::NK_NotSet) {
       error(N, "'use-external-name' is not supported for directories");
       return nullptr;
     }
@@ -718,16 +1070,17 @@ class VFSFromYAMLParser {
     // Get the last component
     StringRef LastComponent = sys::path::filename(Trimmed);
 
-    Entry *Result = nullptr;
+    std::unique_ptr<Entry> Result;
     switch (Kind) {
     case EK_File:
-      Result = new FileEntry(LastComponent, std::move(ExternalContentsPath),
-                             UseExternalName);
+      Result = llvm::make_unique<RedirectingFileEntry>(
+          LastComponent, std::move(ExternalContentsPath), UseExternalName);
       break;
     case EK_Directory:
-      Result = new DirectoryEntry(LastComponent, std::move(EntryArrayContents),
-          Status("", "", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0,
-                 0, file_type::directory_file, sys::fs::all_all));
+      Result = llvm::make_unique<RedirectingDirectoryEntry>(
+          LastComponent, std::move(EntryArrayContents),
+          Status("", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0, 0,
+                 file_type::directory_file, sys::fs::all_all));
       break;
     }
 
@@ -739,18 +1092,21 @@ class VFSFromYAMLParser {
     for (sys::path::reverse_iterator I = sys::path::rbegin(Parent),
                                      E = sys::path::rend(Parent);
          I != E; ++I) {
-      Result = new DirectoryEntry(*I, llvm::makeArrayRef(Result),
-          Status("", "", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0,
-                 0, file_type::directory_file, sys::fs::all_all));
+      std::vector<std::unique_ptr<Entry>> Entries;
+      Entries.push_back(std::move(Result));
+      Result = llvm::make_unique<RedirectingDirectoryEntry>(
+          *I, std::move(Entries),
+          Status("", getNextVirtualUniqueID(), sys::TimeValue::now(), 0, 0, 0,
+                 file_type::directory_file, sys::fs::all_all));
     }
     return Result;
   }
 
 public:
-  VFSFromYAMLParser(yaml::Stream &S) : Stream(S) {}
+  RedirectingFileSystemParser(yaml::Stream &S) : Stream(S) {}
 
   // false on error
-  bool parse(yaml::Node *Root, VFSFromYAML *FS) {
+  bool parse(yaml::Node *Root, RedirectingFileSystem *FS) {
     yaml::MappingNode *Top = dyn_cast<yaml::MappingNode>(Root);
     if (!Top) {
       error(Root, "expected mapping node");
@@ -764,8 +1120,7 @@ public:
       KeyStatusPair("roots", true),
     };
 
-    DenseMap<StringRef, KeyStatus> Keys(
-        &Fields[0], Fields + sizeof(Fields)/sizeof(Fields[0]));
+    DenseMap<StringRef, KeyStatus> Keys(std::begin(Fields), std::end(Fields));
 
     // Parse configuration and 'roots'
     for (yaml::MappingNode::iterator I = Top->begin(), E = Top->end(); I != E;
@@ -787,8 +1142,8 @@ public:
 
         for (yaml::SequenceNode::iterator I = Roots->begin(), E = Roots->end();
              I != E; ++I) {
-          if (Entry *E = parseEntry(&*I))
-            FS->Roots.push_back(E);
+          if (std::unique_ptr<Entry> E = parseEntry(&*I))
+            FS->Roots.push_back(std::move(E));
           else
             return false;
         }
@@ -831,15 +1186,11 @@ public:
 };
 } // end of anonymous namespace
 
-Entry::~Entry() {}
-DirectoryEntry::~DirectoryEntry() { llvm::DeleteContainerPointers(Contents); }
+Entry::~Entry() = default;
 
-VFSFromYAML::~VFSFromYAML() { llvm::DeleteContainerPointers(Roots); }
-
-VFSFromYAML *VFSFromYAML::create(std::unique_ptr<MemoryBuffer> Buffer,
-                                 SourceMgr::DiagHandlerTy DiagHandler,
-                                 void *DiagContext,
-                                 IntrusiveRefCntPtr<FileSystem> ExternalFS) {
+RedirectingFileSystem *RedirectingFileSystem::create(
+    std::unique_ptr<MemoryBuffer> Buffer, SourceMgr::DiagHandlerTy DiagHandler,
+    void *DiagContext, IntrusiveRefCntPtr<FileSystem> ExternalFS) {
 
   SourceMgr SM;
   yaml::Stream Stream(Buffer->getMemBufferRef(), SM);
@@ -852,21 +1203,22 @@ VFSFromYAML *VFSFromYAML::create(std::unique_ptr<MemoryBuffer> Buffer,
     return nullptr;
   }
 
-  VFSFromYAMLParser P(Stream);
+  RedirectingFileSystemParser P(Stream);
 
-  std::unique_ptr<VFSFromYAML> FS(new VFSFromYAML(ExternalFS));
+  std::unique_ptr<RedirectingFileSystem> FS(
+      new RedirectingFileSystem(ExternalFS));
   if (!P.parse(Root, FS.get()))
     return nullptr;
 
   return FS.release();
 }
 
-ErrorOr<Entry *> VFSFromYAML::lookupPath(const Twine &Path_) {
+ErrorOr<Entry *> RedirectingFileSystem::lookupPath(const Twine &Path_) {
   SmallString<256> Path;
   Path_.toVector(Path);
 
   // Handle relative paths
-  if (std::error_code EC = sys::fs::make_absolute(Path))
+  if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
   if (Path.empty())
@@ -874,18 +1226,17 @@ ErrorOr<Entry *> VFSFromYAML::lookupPath(const Twine &Path_) {
 
   sys::path::const_iterator Start = sys::path::begin(Path);
   sys::path::const_iterator End = sys::path::end(Path);
-  for (std::vector<Entry *>::iterator I = Roots.begin(), E = Roots.end();
-       I != E; ++I) {
-    ErrorOr<Entry *> Result = lookupPath(Start, End, *I);
+  for (const std::unique_ptr<Entry> &Root : Roots) {
+    ErrorOr<Entry *> Result = lookupPath(Start, End, Root.get());
     if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
       return Result;
   }
   return make_error_code(llvm::errc::no_such_file_or_directory);
 }
 
-ErrorOr<Entry *> VFSFromYAML::lookupPath(sys::path::const_iterator Start,
-                                         sys::path::const_iterator End,
-                                         Entry *From) {
+ErrorOr<Entry *>
+RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
+                                  sys::path::const_iterator End, Entry *From) {
   if (Start->equals("."))
     ++Start;
 
@@ -902,52 +1253,78 @@ ErrorOr<Entry *> VFSFromYAML::lookupPath(sys::path::const_iterator Start,
     return From;
   }
 
-  DirectoryEntry *DE = dyn_cast<DirectoryEntry>(From);
+  auto *DE = dyn_cast<RedirectingDirectoryEntry>(From);
   if (!DE)
     return make_error_code(llvm::errc::not_a_directory);
 
-  for (DirectoryEntry::iterator I = DE->contents_begin(),
-                                E = DE->contents_end();
-       I != E; ++I) {
-    ErrorOr<Entry *> Result = lookupPath(Start, End, *I);
+  for (const std::unique_ptr<Entry> &DirEntry :
+       llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+    ErrorOr<Entry *> Result = lookupPath(Start, End, DirEntry.get());
     if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
       return Result;
   }
   return make_error_code(llvm::errc::no_such_file_or_directory);
 }
 
-ErrorOr<Status> VFSFromYAML::status(const Twine &Path, Entry *E) {
+static Status getRedirectedFileStatus(const Twine &Path, bool UseExternalNames,
+                                      Status ExternalStatus) {
+  Status S = ExternalStatus;
+  if (!UseExternalNames)
+    S = Status::copyWithNewName(S, Path.str());
+  S.IsVFSMapped = true;
+  return S;
+}
+
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path, Entry *E) {
   assert(E != nullptr);
-  std::string PathStr(Path.str());
-  if (FileEntry *F = dyn_cast<FileEntry>(E)) {
+  if (auto *F = dyn_cast<RedirectingFileEntry>(E)) {
     ErrorOr<Status> S = ExternalFS->status(F->getExternalContentsPath());
     assert(!S || S->getName() == F->getExternalContentsPath());
-    if (S && !F->useExternalName(UseExternalNames))
-      S->setName(PathStr);
     if (S)
-      S->IsVFSMapped = true;
+      return getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+                                     *S);
     return S;
   } else { // directory
-    DirectoryEntry *DE = cast<DirectoryEntry>(E);
-    Status S = DE->getStatus();
-    S.setName(PathStr);
-    return S;
+    auto *DE = cast<RedirectingDirectoryEntry>(E);
+    return Status::copyWithNewName(DE->getStatus(), Path.str());
   }
 }
 
-ErrorOr<Status> VFSFromYAML::status(const Twine &Path) {
+ErrorOr<Status> RedirectingFileSystem::status(const Twine &Path) {
   ErrorOr<Entry *> Result = lookupPath(Path);
   if (!Result)
     return Result.getError();
   return status(Path, *Result);
 }
 
-ErrorOr<std::unique_ptr<File>> VFSFromYAML::openFileForRead(const Twine &Path) {
+namespace {
+/// Provide a file wrapper with an overriden status.
+class FileWithFixedStatus : public File {
+  std::unique_ptr<File> InnerFile;
+  Status S;
+
+public:
+  FileWithFixedStatus(std::unique_ptr<File> InnerFile, Status S)
+      : InnerFile(std::move(InnerFile)), S(S) {}
+
+  ErrorOr<Status> status() override { return S; }
+  ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    return InnerFile->getBuffer(Name, FileSize, RequiresNullTerminator,
+                                IsVolatile);
+  }
+  std::error_code close() override { return InnerFile->close(); }
+};
+} // end anonymous namespace
+
+ErrorOr<std::unique_ptr<File>>
+RedirectingFileSystem::openFileForRead(const Twine &Path) {
   ErrorOr<Entry *> E = lookupPath(Path);
   if (!E)
     return E.getError();
 
-  FileEntry *F = dyn_cast<FileEntry>(*E);
+  auto *F = dyn_cast<RedirectingFileEntry>(*E);
   if (!F) // FIXME: errc::not_a_file?
     return make_error_code(llvm::errc::invalid_argument);
 
@@ -955,18 +1332,23 @@ ErrorOr<std::unique_ptr<File>> VFSFromYAML::openFileForRead(const Twine &Path) {
   if (!Result)
     return Result;
 
-  if (!F->useExternalName(UseExternalNames))
-    (*Result)->setName(Path.str());
+  auto ExternalStatus = (*Result)->status();
+  if (!ExternalStatus)
+    return ExternalStatus.getError();
 
-  return Result;
+  // FIXME: Update the status with the name and VFSMapped.
+  Status S = getRedirectedFileStatus(Path, F->useExternalName(UseExternalNames),
+                                     *ExternalStatus);
+  return std::unique_ptr<File>(
+      llvm::make_unique<FileWithFixedStatus>(std::move(*Result), S));
 }
 
 IntrusiveRefCntPtr<FileSystem>
 vfs::getVFSFromYAML(std::unique_ptr<MemoryBuffer> Buffer,
                     SourceMgr::DiagHandlerTy DiagHandler, void *DiagContext,
                     IntrusiveRefCntPtr<FileSystem> ExternalFS) {
-  return VFSFromYAML::create(std::move(Buffer), DiagHandler, DiagContext,
-                             ExternalFS);
+  return RedirectingFileSystem::create(std::move(Buffer), DiagHandler,
+                                       DiagContext, ExternalFS);
 }
 
 UniqueID vfs::getNextVirtualUniqueID() {
@@ -1111,11 +1493,10 @@ void YAMLVFSWriter::write(llvm::raw_ostream &OS) {
   JSONWriter(OS).write(Mappings, IsCaseSensitive);
 }
 
-VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl(const Twine &_Path,
-                                               VFSFromYAML &FS,
-                                               DirectoryEntry::iterator Begin,
-                                               DirectoryEntry::iterator End,
-                                               std::error_code &EC)
+VFSFromYamlDirIterImpl::VFSFromYamlDirIterImpl(
+    const Twine &_Path, RedirectingFileSystem &FS,
+    RedirectingDirectoryEntry::iterator Begin,
+    RedirectingDirectoryEntry::iterator End, std::error_code &EC)
     : Dir(_Path.str()), FS(FS), Current(Begin), End(End) {
   if (Current != End) {
     SmallString<128> PathStr(Dir);
diff --git a/lib/CodeGen/ABIInfo.h b/lib/CodeGen/ABIInfo.h
index cc8652e169d8..a65f27085616 100644
--- a/lib/CodeGen/ABIInfo.h
+++ b/lib/CodeGen/ABIInfo.h
@@ -25,6 +25,8 @@ namespace clang {
   class TargetInfo;
 
   namespace CodeGen {
+    class ABIArgInfo;
+    class Address;
     class CGCXXABI;
     class CGFunctionInfo;
     class CodeGenFunction;
@@ -79,8 +81,15 @@ namespace clang {
     // the ABI information any lower than CodeGen. Of course, for
     // VAArg handling it has to be at this level; there is no way to
     // abstract this out.
-    virtual llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                   CodeGen::CodeGenFunction &CGF) const = 0;
+    virtual CodeGen::Address EmitVAArg(CodeGen::CodeGenFunction &CGF,
+                                       CodeGen::Address VAListAddr,
+                                       QualType Ty) const = 0;
+
+    /// Emit the target dependent code to load a value of
+    /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr.
+    virtual CodeGen::Address EmitMSVAArg(CodeGen::CodeGenFunction &CGF,
+                                         CodeGen::Address VAListAddr,
+                                         QualType Ty) const;
 
     virtual bool isHomogeneousAggregateBaseType(QualType Ty) const;
 
@@ -92,6 +101,15 @@ namespace clang {
     bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
                                 uint64_t &Members) const;
 
+    /// A convenience method to return an indirect ABIArgInfo with an
+    /// expected alignment equal to the ABI alignment of the given type.
+    CodeGen::ABIArgInfo
+    getNaturalAlignIndirect(QualType Ty, bool ByRef = true,
+                            bool Realign = false,
+                            llvm::Type *Padding = nullptr) const;
+
+    CodeGen::ABIArgInfo
+    getNaturalAlignIndirectInReg(QualType Ty, bool Realign = false) const;
   };
 }  // end namespace clang
 
diff --git a/lib/CodeGen/Address.h b/lib/CodeGen/Address.h
new file mode 100644
index 000000000000..9d145fa26b5f
--- /dev/null
+++ b/lib/CodeGen/Address.h
@@ -0,0 +1,126 @@
+//===-- Address.h - An aligned address -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class provides a simple wrapper for a pair of a pointer and an
+// alignment.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_ADDRESS_H
+#define LLVM_CLANG_LIB_CODEGEN_ADDRESS_H
+
+#include "llvm/IR/Constants.h"
+#include "clang/AST/CharUnits.h"
+
+namespace clang {
+namespace CodeGen {
+
+/// An aligned address.
+class Address {
+  llvm::Value *Pointer;
+  CharUnits Alignment;
+public:
+  Address(llvm::Value *pointer, CharUnits alignment)
+      : Pointer(pointer), Alignment(alignment) {
+    assert((!alignment.isZero() || pointer == nullptr) &&
+           "creating valid address with invalid alignment");
+  }
+
+  static Address invalid() { return Address(nullptr, CharUnits()); }
+  bool isValid() const { return Pointer != nullptr; }
+
+  llvm::Value *getPointer() const {
+    assert(isValid());
+    return Pointer;
+  }
+
+  /// Return the type of the pointer value.
+  llvm::PointerType *getType() const {
+    return llvm::cast<llvm::PointerType>(getPointer()->getType());
+  }
+
+  /// Return the type of the values stored in this address.
+  ///
+  /// When IR pointer types lose their element type, we should simply
+  /// store it in Address instead for the convenience of writing code.
+  llvm::Type *getElementType() const {
+    return getType()->getElementType();
+  }
+
+  /// Return the address space that this address resides in.
+  unsigned getAddressSpace() const {
+    return getType()->getAddressSpace();
+  }
+
+  /// Return the IR name of the pointer value.
+  llvm::StringRef getName() const {
+    return getPointer()->getName();
+  }
+
+  /// Return the alignment of this pointer.
+  CharUnits getAlignment() const {
+    assert(isValid());
+    return Alignment;
+  }
+};
+
+/// A specialization of Address that requires the address to be an
+/// LLVM Constant.
+class ConstantAddress : public Address {
+public:
+  ConstantAddress(llvm::Constant *pointer, CharUnits alignment)
+    : Address(pointer, alignment) {}
+
+  static ConstantAddress invalid() {
+    return ConstantAddress(nullptr, CharUnits());
+  }
+
+  llvm::Constant *getPointer() const {
+    return llvm::cast<llvm::Constant>(Address::getPointer());
+  }
+
+  ConstantAddress getBitCast(llvm::Type *ty) const {
+    return ConstantAddress(llvm::ConstantExpr::getBitCast(getPointer(), ty),
+                           getAlignment());
+  }
+
+  ConstantAddress getElementBitCast(llvm::Type *ty) const {
+    return getBitCast(ty->getPointerTo(getAddressSpace()));
+  }
+
+  static bool isaImpl(Address addr) {
+    return llvm::isa<llvm::Constant>(addr.getPointer());
+  }
+  static ConstantAddress castImpl(Address addr) {
+    return ConstantAddress(llvm::cast<llvm::Constant>(addr.getPointer()),
+                           addr.getAlignment());
+  }
+};
+
+}
+}
+
+namespace llvm {
+  // Present a minimal LLVM-like casting interface.
+  template <class U> inline U cast(clang::CodeGen::Address addr) {
+    return U::castImpl(addr);
+  }
+  template <class U> inline bool isa(clang::CodeGen::Address addr) {
+    return U::isaImpl(addr);
+  }
+}
+
+namespace clang {
+  // Make our custom isa and cast available in namespace clang, to mirror
+  // what we do for LLVM's versions in Basic/LLVM.h.
+  using llvm::isa;
+  using llvm::cast;
+}
+
+#endif
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index afcb9e5c5055..82297e7ee417 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -14,6 +14,7 @@
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/Utils.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -21,6 +22,7 @@
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -52,6 +54,7 @@ class EmitAssemblyHelper {
   const clang::TargetOptions &TargetOpts;
   const LangOptions &LangOpts;
   Module *TheModule;
+  std::unique_ptr<FunctionInfoIndex> FunctionIndex;
 
   Timer CodeGenerationTime;
 
@@ -112,15 +115,14 @@ private:
   bool AddEmitPasses(BackendAction Action, raw_pwrite_stream &OS);
 
 public:
-  EmitAssemblyHelper(DiagnosticsEngine &_Diags,
-                     const CodeGenOptions &CGOpts,
+  EmitAssemblyHelper(DiagnosticsEngine &_Diags, const CodeGenOptions &CGOpts,
                      const clang::TargetOptions &TOpts,
-                     const LangOptions &LOpts,
-                     Module *M)
-    : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
-      TheModule(M), CodeGenerationTime("Code Generation Time"),
-      CodeGenPasses(nullptr), PerModulePasses(nullptr),
-      PerFunctionPasses(nullptr) {}
+                     const LangOptions &LOpts, Module *M,
+                     std::unique_ptr<FunctionInfoIndex> Index)
+      : Diags(_Diags), CodeGenOpts(CGOpts), TargetOpts(TOpts), LangOpts(LOpts),
+        TheModule(M), FunctionIndex(std::move(Index)),
+        CodeGenerationTime("Code Generation Time"), CodeGenPasses(nullptr),
+        PerModulePasses(nullptr), PerFunctionPasses(nullptr) {}
 
   ~EmitAssemblyHelper() {
     delete CodeGenPasses;
@@ -166,14 +168,6 @@ static void addObjCARCOptPass(const PassManagerBuilder &Builder, PassManagerBase
     PM.add(createObjCARCOptPass());
 }
 
-static void addSampleProfileLoaderPass(const PassManagerBuilder &Builder,
-                                       legacy::PassManagerBase &PM) {
-  const PassManagerBuilderWrapper &BuilderWrapper =
-      static_cast<const PassManagerBuilderWrapper &>(Builder);
-  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
-  PM.add(createSampleProfileLoaderPass(CGOpts.SampleProfileFile));
-}
-
 static void addAddDiscriminatorsPass(const PassManagerBuilder &Builder,
                                      legacy::PassManagerBase &PM) {
   PM.add(createAddDiscriminatorsPass());
@@ -201,14 +195,20 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
 
 static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                       legacy::PassManagerBase &PM) {
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false));
-  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false));
+  const PassManagerBuilderWrapper &BuilderWrapper =
+      static_cast<const PassManagerBuilderWrapper&>(Builder);
+  const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
+  bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/false, Recover));
+  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover));
 }
 
 static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
                                             legacy::PassManagerBase &PM) {
-  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true));
-  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true));
+  PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/true,
+                                            /*Recover*/true));
+  PM.add(createAddressSanitizerModulePass(/*CompileKernel*/true,
+                                          /*Recover*/true));
 }
 
 static void addMemorySanitizerPass(const PassManagerBuilder &Builder,
@@ -272,6 +272,9 @@ static void addSymbolRewriterPass(const CodeGenOptions &Opts,
 }
 
 void EmitAssemblyHelper::CreatePasses() {
+  if (CodeGenOpts.DisableLLVMPasses)
+    return;
+
   unsigned OptLevel = CodeGenOpts.OptimizationLevel;
   CodeGenOptions::InliningMethod Inlining = CodeGenOpts.getInlining();
 
@@ -283,6 +286,29 @@ void EmitAssemblyHelper::CreatePasses() {
   }
 
   PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts);
+
+  // Figure out TargetLibraryInfo.
+  Triple TargetTriple(TheModule->getTargetTriple());
+  PMBuilder.LibraryInfo = createTLII(TargetTriple, CodeGenOpts);
+
+  switch (Inlining) {
+  case CodeGenOptions::NoInlining:
+    break;
+  case CodeGenOptions::NormalInlining: {
+    PMBuilder.Inliner =
+        createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
+    break;
+  }
+  case CodeGenOptions::OnlyAlwaysInlining:
+    // Respect always_inline.
+    if (OptLevel == 0)
+      // Do not insert lifetime intrinsics at -O0.
+      PMBuilder.Inliner = createAlwaysInlinerPass(false);
+    else
+      PMBuilder.Inliner = createAlwaysInlinerPass();
+    break;
+  }
+
   PMBuilder.OptLevel = OptLevel;
   PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
   PMBuilder.BBVectorize = CodeGenOpts.VectorizeBB;
@@ -295,13 +321,20 @@ void EmitAssemblyHelper::CreatePasses() {
   PMBuilder.PrepareForLTO = CodeGenOpts.PrepareForLTO;
   PMBuilder.RerollLoops = CodeGenOpts.RerollLoops;
 
+  legacy::PassManager *MPM = getPerModulePasses();
+
+  // If we are performing a ThinLTO importing compile, invoke the LTO
+  // pipeline and pass down the in-memory function index.
+  if (!CodeGenOpts.ThinLTOIndexFile.empty()) {
+    assert(FunctionIndex && "Expected non-empty function index");
+    PMBuilder.FunctionIndex = FunctionIndex.get();
+    PMBuilder.populateLTOPassManager(*MPM);
+    return;
+  }
+
   PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
                          addAddDiscriminatorsPass);
 
-  if (!CodeGenOpts.SampleProfileFile.empty())
-    PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
-                           addSampleProfileLoaderPass);
-
   // In ObjC ARC mode, add the main ARC optimization passes.
   if (LangOpts.ObjCAutoRefCount) {
     PMBuilder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
@@ -363,27 +396,6 @@ void EmitAssemblyHelper::CreatePasses() {
                            addDataFlowSanitizerPass);
   }
 
-  // Figure out TargetLibraryInfo.
-  Triple TargetTriple(TheModule->getTargetTriple());
-  PMBuilder.LibraryInfo = createTLII(TargetTriple, CodeGenOpts);
-
-  switch (Inlining) {
-  case CodeGenOptions::NoInlining: break;
-  case CodeGenOptions::NormalInlining: {
-    PMBuilder.Inliner =
-        createFunctionInliningPass(OptLevel, CodeGenOpts.OptimizeSize);
-    break;
-  }
-  case CodeGenOptions::OnlyAlwaysInlining:
-    // Respect always_inline.
-    if (OptLevel == 0)
-      // Do not insert lifetime intrinsics at -O0.
-      PMBuilder.Inliner = createAlwaysInlinerPass(false);
-    else
-      PMBuilder.Inliner = createAlwaysInlinerPass();
-    break;
-  }
-
   // Set up the per-function pass manager.
   legacy::FunctionPassManager *FPM = getPerFunctionPasses();
   if (CodeGenOpts.VerifyModule)
@@ -391,7 +403,6 @@ void EmitAssemblyHelper::CreatePasses() {
   PMBuilder.populateFunctionPassManager(*FPM);
 
   // Set up the per-module pass manager.
-  legacy::PassManager *MPM = getPerModulePasses();
   if (!CodeGenOpts.RewriteMapFiles.empty())
     addSymbolRewriterPass(CodeGenOpts, MPM);
 
@@ -420,6 +431,9 @@ void EmitAssemblyHelper::CreatePasses() {
     MPM->add(createInstrProfilingPass(Options));
   }
 
+  if (!CodeGenOpts.SampleProfileFile.empty())
+    MPM->add(createSampleProfileLoaderPass(CodeGenOpts.SampleProfileFile));
+
   PMBuilder.populateModulePassManager(*MPM);
 }
 
@@ -455,20 +469,16 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
     BackendArgs.push_back("-limit-float-precision");
     BackendArgs.push_back(CodeGenOpts.LimitFloatPrecision.c_str());
   }
-  for (unsigned i = 0, e = CodeGenOpts.BackendOptions.size(); i != e; ++i)
-    BackendArgs.push_back(CodeGenOpts.BackendOptions[i].c_str());
+  for (const std::string &BackendOption : CodeGenOpts.BackendOptions)
+    BackendArgs.push_back(BackendOption.c_str());
   BackendArgs.push_back(nullptr);
   llvm::cl::ParseCommandLineOptions(BackendArgs.size() - 1,
                                     BackendArgs.data());
 
-  std::string FeaturesStr;
-  if (!TargetOpts.Features.empty()) {
-    SubtargetFeatures Features;
-    for (const std::string &Feature : TargetOpts.Features)
-      Features.AddFeature(Feature);
-    FeaturesStr = Features.getString();
-  }
+  std::string FeaturesStr =
+      llvm::join(TargetOpts.Features.begin(), TargetOpts.Features.end(), ",");
 
+  // Keep this synced with the equivalent code in tools/driver/cc1as_main.cpp.
   llvm::Reloc::Model RM = llvm::Reloc::Default;
   if (CodeGenOpts.RelocationModel == "static") {
     RM = llvm::Reloc::Static;
@@ -497,24 +507,16 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
       .Case("posix", llvm::ThreadModel::POSIX)
       .Case("single", llvm::ThreadModel::Single);
 
-  if (CodeGenOpts.DisableIntegratedAS)
-    Options.DisableIntegratedAS = true;
-
-  if (CodeGenOpts.CompressDebugSections)
-    Options.CompressDebugSections = true;
-
-  if (CodeGenOpts.UseInitArray)
-    Options.UseInitArray = true;
-
   // Set float ABI type.
-  if (CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp")
-    Options.FloatABIType = llvm::FloatABI::Soft;
-  else if (CodeGenOpts.FloatABI == "hard")
-    Options.FloatABIType = llvm::FloatABI::Hard;
-  else {
-    assert(CodeGenOpts.FloatABI.empty() && "Invalid float abi!");
-    Options.FloatABIType = llvm::FloatABI::Default;
-  }
+  assert((CodeGenOpts.FloatABI == "soft" || CodeGenOpts.FloatABI == "softfp" ||
+          CodeGenOpts.FloatABI == "hard" || CodeGenOpts.FloatABI.empty()) &&
+         "Invalid Floating Point ABI!");
+  Options.FloatABIType =
+      llvm::StringSwitch<llvm::FloatABI::ABIType>(CodeGenOpts.FloatABI)
+          .Case("soft", llvm::FloatABI::Soft)
+          .Case("softfp", llvm::FloatABI::Soft)
+          .Case("hard", llvm::FloatABI::Hard)
+          .Default(llvm::FloatABI::Default);
 
   // Set FP fusion mode.
   switch (CodeGenOpts.getFPContractMode()) {
@@ -529,6 +531,17 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
     break;
   }
 
+  Options.UseInitArray = CodeGenOpts.UseInitArray;
+  Options.DisableIntegratedAS = CodeGenOpts.DisableIntegratedAS;
+  Options.CompressDebugSections = CodeGenOpts.CompressDebugSections;
+
+  // Set EABI version.
+  Options.EABIVersion = llvm::StringSwitch<llvm::EABI>(CodeGenOpts.EABIVersion)
+                            .Case("4", llvm::EABI::EABI4)
+                            .Case("5", llvm::EABI::EABI5)
+                            .Case("gnu", llvm::EABI::GNU)
+                            .Default(llvm::EABI::Default);
+
   Options.LessPreciseFPMADOption = CodeGenOpts.LessPreciseFPMAD;
   Options.NoInfsFPMath = CodeGenOpts.NoInfsFPMath;
   Options.NoNaNsFPMath = CodeGenOpts.NoNaNsFPMath;
@@ -539,11 +552,27 @@ TargetMachine *EmitAssemblyHelper::CreateTargetMachine(bool MustCreateTM) {
   Options.FunctionSections = CodeGenOpts.FunctionSections;
   Options.DataSections = CodeGenOpts.DataSections;
   Options.UniqueSectionNames = CodeGenOpts.UniqueSectionNames;
+  Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
+  switch (CodeGenOpts.getDebuggerTuning()) {
+  case CodeGenOptions::DebuggerKindGDB:
+    Options.DebuggerTuning = llvm::DebuggerKind::GDB;
+    break;
+  case CodeGenOptions::DebuggerKindLLDB:
+    Options.DebuggerTuning = llvm::DebuggerKind::LLDB;
+    break;
+  case CodeGenOptions::DebuggerKindSCE:
+    Options.DebuggerTuning = llvm::DebuggerKind::SCE;
+    break;
+  default:
+    break;
+  }
 
   Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
   Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
   Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
   Options.MCOptions.MCNoExecStack = CodeGenOpts.NoExecStack;
+  Options.MCOptions.MCIncrementalLinkerCompatible =
+      CodeGenOpts.IncrementalLinkerCompatible;
   Options.MCOptions.MCFatalWarnings = CodeGenOpts.FatalWarnings;
   Options.MCOptions.AsmVerbose = CodeGenOpts.AsmVerbose;
   Options.MCOptions.ABIName = TargetOpts.ABI;
@@ -605,7 +634,7 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
   if (UsesCodeGen && !TM)
     return;
   if (TM)
-    TheModule->setDataLayout(*TM->getDataLayout());
+    TheModule->setDataLayout(TM->createDataLayout());
   CreatePasses();
 
   switch (Action) {
@@ -613,8 +642,8 @@ void EmitAssemblyHelper::EmitAssembly(BackendAction Action,
     break;
 
   case Backend_EmitBC:
-    getPerModulePasses()->add(
-        createBitcodeWriterPass(*OS, CodeGenOpts.EmitLLVMUseLists));
+    getPerModulePasses()->add(createBitcodeWriterPass(
+        *OS, CodeGenOpts.EmitLLVMUseLists, CodeGenOpts.EmitFunctionSummary));
     break;
 
   case Backend_EmitLL:
@@ -659,16 +688,17 @@ void clang::EmitBackendOutput(DiagnosticsEngine &Diags,
                               const clang::TargetOptions &TOpts,
                               const LangOptions &LOpts, StringRef TDesc,
                               Module *M, BackendAction Action,
-                              raw_pwrite_stream *OS) {
-  EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M);
+                              raw_pwrite_stream *OS,
+                              std::unique_ptr<FunctionInfoIndex> Index) {
+  EmitAssemblyHelper AsmHelper(Diags, CGOpts, TOpts, LOpts, M,
+                               std::move(Index));
 
   AsmHelper.EmitAssembly(Action, OS);
 
   // If an optional clang TargetInfo description string was passed in, use it to
   // verify the LLVM TargetMachine's DataLayout.
   if (AsmHelper.TM && !TDesc.empty()) {
-    std::string DLDesc =
-        AsmHelper.TM->getDataLayout()->getStringRepresentation();
+    std::string DLDesc = M->getDataLayout().getStringRepresentation();
     if (DLDesc != TDesc) {
       unsigned DiagID = Diags.getCustomDiagID(
           DiagnosticsEngine::Error, "backend data layout '%0' does not match "
diff --git a/lib/CodeGen/CGAtomic.cpp b/lib/CodeGen/CGAtomic.cpp
index 9839617c0e41..24de30b0b862 100644
--- a/lib/CodeGen/CGAtomic.cpp
+++ b/lib/CodeGen/CGAtomic.cpp
@@ -80,7 +80,7 @@ namespace {
         AtomicSizeInBits = C.toBits(
             C.toCharUnitsFromBits(Offset + OrigBFI.Size + C.getCharWidth() - 1)
                 .RoundUpToAlignment(lvalue.getAlignment()));
-        auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldAddr());
+        auto VoidPtrAddr = CGF.EmitCastToVoidPtr(lvalue.getBitFieldPointer());
         auto OffsetInChars =
             (C.toCharUnitsFromBits(OrigBFI.Offset) / lvalue.getAlignment()) *
             lvalue.getAlignment();
@@ -94,8 +94,9 @@ namespace {
         BFI.Offset = Offset;
         BFI.StorageSize = AtomicSizeInBits;
         BFI.StorageOffset += OffsetInChars;
-        LVal = LValue::MakeBitfield(Addr, BFI, lvalue.getType(),
-                                    lvalue.getAlignment());
+        LVal = LValue::MakeBitfield(Address(Addr, lvalue.getAlignment()),
+                                    BFI, lvalue.getType(),
+                                    lvalue.getAlignmentSource());
         LVal.setTBAAInfo(lvalue.getTBAAInfo());
         AtomicTy = C.getIntTypeForBitwidth(AtomicSizeInBits, OrigBFI.IsSigned);
         if (AtomicTy.isNull()) {
@@ -118,10 +119,8 @@ namespace {
         ValueTy = lvalue.getType();
         ValueSizeInBits = C.getTypeSize(ValueTy);
         AtomicTy = ValueTy = CGF.getContext().getExtVectorType(
-            lvalue.getType(), lvalue.getExtVectorAddr()
-                                  ->getType()
-                                  ->getPointerElementType()
-                                  ->getVectorNumElements());
+            lvalue.getType(), lvalue.getExtVectorAddress()
+                                  .getElementType()->getVectorNumElements());
         AtomicSizeInBits = C.getTypeSize(AtomicTy);
         AtomicAlign = ValueAlign = lvalue.getAlignment();
         LVal = lvalue;
@@ -139,15 +138,22 @@ namespace {
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
     const LValue &getAtomicLValue() const { return LVal; }
-    llvm::Value *getAtomicAddress() const {
+    llvm::Value *getAtomicPointer() const {
       if (LVal.isSimple())
-        return LVal.getAddress();
+        return LVal.getPointer();
       else if (LVal.isBitField())
-        return LVal.getBitFieldAddr();
+        return LVal.getBitFieldPointer();
       else if (LVal.isVectorElt())
-        return LVal.getVectorAddr();
+        return LVal.getVectorPointer();
       assert(LVal.isExtVectorElt());
-      return LVal.getExtVectorAddr();
+      return LVal.getExtVectorPointer();
+    }
+    Address getAtomicAddress() const {
+      return Address(getAtomicPointer(), getAtomicAlignment());
+    }
+
+    Address getAtomicAddressAsAtomicIntPointer() const {
+      return emitCastToAtomicIntPointer(getAtomicAddress());
     }
 
     /// Is the atomic size larger than the underlying value type?
@@ -167,13 +173,18 @@ namespace {
       return CGF.CGM.getSize(size);
     }
 
-    /// Cast the given pointer to an integer pointer suitable for
-    /// atomic operations.
-    llvm::Value *emitCastToAtomicIntPointer(llvm::Value *addr) const;
+    /// Cast the given pointer to an integer pointer suitable for atomic
+    /// operations if the source.
+    Address emitCastToAtomicIntPointer(Address Addr) const;
+
+    /// If Addr is compatible with the iN that will be used for an atomic
+    /// operation, bitcast it. Otherwise, create a temporary that is suitable
+    /// and copy the value across.
+    Address convertToAtomicIntPointer(Address Addr) const;
 
     /// Turn an atomic-layout object into an r-value.
-    RValue convertTempToRValue(llvm::Value *addr, AggValueSlot resultSlot,
-                               SourceLocation loc, bool AsValue) const;
+    RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot,
+                                     SourceLocation loc, bool AsValue) const;
 
     /// \brief Converts a rvalue to integer value.
     llvm::Value *convertRValueToInt(RValue RVal) const;
@@ -188,12 +199,12 @@ namespace {
     /// Project an l-value down to the value field.
     LValue projectValue() const {
       assert(LVal.isSimple());
-      llvm::Value *addr = getAtomicAddress();
+      Address addr = getAtomicAddress();
       if (hasPadding())
-        addr = CGF.Builder.CreateStructGEP(nullptr, addr, 0);
+        addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits());
 
-      return LValue::MakeAddr(addr, getValueType(), LVal.getAlignment(),
-                              CGF.getContext(), LVal.getTBAAInfo());
+      return LValue::MakeAddr(addr, getValueType(), CGF.getContext(),
+                              LVal.getAlignmentSource(), LVal.getTBAAInfo());
     }
 
     /// \brief Emits atomic load.
@@ -228,18 +239,18 @@ namespace {
                           bool IsVolatile);
 
     /// Materialize an atomic r-value in atomic-layout memory.
-    llvm::Value *materializeRValue(RValue rvalue) const;
+    Address materializeRValue(RValue rvalue) const;
 
     /// \brief Translates LLVM atomic ordering to GNU atomic ordering for
     /// libcalls.
     static AtomicExpr::AtomicOrderingKind
     translateAtomicOrdering(const llvm::AtomicOrdering AO);
 
+    /// \brief Creates temp alloca for intermediate operations on atomic value.
+    Address CreateTempAlloca() const;
   private:
     bool requiresMemSetZero(llvm::Type *type) const;
 
-    /// \brief Creates temp alloca for intermediate operations on atomic value.
-    llvm::Value *CreateTempAlloca() const;
 
     /// \brief Emits atomic load as a libcall.
     void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
@@ -294,16 +305,16 @@ AtomicInfo::translateAtomicOrdering(const llvm::AtomicOrdering AO) {
   llvm_unreachable("Unhandled AtomicOrdering");
 }
 
-llvm::Value *AtomicInfo::CreateTempAlloca() const {
-  auto *TempAlloca = CGF.CreateMemTemp(
+Address AtomicInfo::CreateTempAlloca() const {
+  Address TempAlloca = CGF.CreateMemTemp(
       (LVal.isBitField() && ValueSizeInBits > AtomicSizeInBits) ? ValueTy
                                                                 : AtomicTy,
+      getAtomicAlignment(),
       "atomic-temp");
-  TempAlloca->setAlignment(getAtomicAlignment().getQuantity());
   // Cast to pointer to value type for bitfields.
   if (LVal.isBitField())
     return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        TempAlloca, getAtomicAddress()->getType());
+        TempAlloca, getAtomicAddress().getType());
   return TempAlloca;
 }
 
@@ -351,7 +362,7 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
 
 bool AtomicInfo::emitMemSetZeroIfNecessary() const {
   assert(LVal.isSimple());
-  llvm::Value *addr = LVal.getAddress();
+  llvm::Value *addr = LVal.getPointer();
   if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
     return false;
 
@@ -363,19 +374,17 @@ bool AtomicInfo::emitMemSetZeroIfNecessary() const {
 }
 
 static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
-                              llvm::Value *Dest, llvm::Value *Ptr,
-                              llvm::Value *Val1, llvm::Value *Val2,
-                              uint64_t Size, unsigned Align,
+                              Address Dest, Address Ptr,
+                              Address Val1, Address Val2,
+                              uint64_t Size,
                               llvm::AtomicOrdering SuccessOrder,
                               llvm::AtomicOrdering FailureOrder) {
   // Note that cmpxchg doesn't support weak cmpxchg, at least at the moment.
-  llvm::LoadInst *Expected = CGF.Builder.CreateLoad(Val1);
-  Expected->setAlignment(Align);
-  llvm::LoadInst *Desired = CGF.Builder.CreateLoad(Val2);
-  Desired->setAlignment(Align);
+  llvm::Value *Expected = CGF.Builder.CreateLoad(Val1);
+  llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
 
   llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
-      Ptr, Expected, Desired, SuccessOrder, FailureOrder);
+      Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder);
   Pair->setVolatile(E->isVolatile());
   Pair->setWeak(IsWeak);
 
@@ -400,26 +409,24 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
 
   CGF.Builder.SetInsertPoint(StoreExpectedBB);
   // Update the memory at Expected with Old's value.
-  llvm::StoreInst *StoreExpected = CGF.Builder.CreateStore(Old, Val1);
-  StoreExpected->setAlignment(Align);
+  CGF.Builder.CreateStore(Old, Val1);
   // Finally, branch to the exit point.
   CGF.Builder.CreateBr(ContinueBB);
 
   CGF.Builder.SetInsertPoint(ContinueBB);
   // Update the memory at Dest with Cmp's value.
   CGF.EmitStoreOfScalar(Cmp, CGF.MakeAddrLValue(Dest, E->getType()));
-  return;
 }
 
 /// Given an ordering required on success, emit all possible cmpxchg
 /// instructions to cope with the provided (but possibly only dynamically known)
 /// FailureOrder.
 static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
-                                        bool IsWeak, llvm::Value *Dest,
-                                        llvm::Value *Ptr, llvm::Value *Val1,
-                                        llvm::Value *Val2,
+                                        bool IsWeak, Address Dest,
+                                        Address Ptr, Address Val1,
+                                        Address Val2,
                                         llvm::Value *FailureOrderVal,
-                                        uint64_t Size, unsigned Align,
+                                        uint64_t Size,
                                         llvm::AtomicOrdering SuccessOrder) {
   llvm::AtomicOrdering FailureOrder;
   if (llvm::ConstantInt *FO = dyn_cast<llvm::ConstantInt>(FailureOrderVal)) {
@@ -440,7 +447,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
       FailureOrder =
         llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrder);
     }
-    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size, Align,
+    emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2, Size,
                       SuccessOrder, FailureOrder);
     return;
   }
@@ -465,13 +472,13 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   // doesn't fold to a constant for the ordering.
   CGF.Builder.SetInsertPoint(MonotonicBB);
   emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                    Size, Align, SuccessOrder, llvm::Monotonic);
+                    Size, SuccessOrder, llvm::Monotonic);
   CGF.Builder.CreateBr(ContBB);
 
   if (AcquireBB) {
     CGF.Builder.SetInsertPoint(AcquireBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, Align, SuccessOrder, llvm::Acquire);
+                      Size, SuccessOrder, llvm::Acquire);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
                 AcquireBB);
@@ -481,7 +488,7 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   if (SeqCstBB) {
     CGF.Builder.SetInsertPoint(SeqCstBB);
     emitAtomicCmpXchg(CGF, E, IsWeak, Dest, Ptr, Val1, Val2,
-                      Size, Align, SuccessOrder, llvm::SequentiallyConsistent);
+                      Size, SuccessOrder, llvm::SequentiallyConsistent);
     CGF.Builder.CreateBr(ContBB);
     SI->addCase(CGF.Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
                 SeqCstBB);
@@ -490,11 +497,10 @@ static void emitAtomicCmpXchgFailureSet(CodeGenFunction &CGF, AtomicExpr *E,
   CGF.Builder.SetInsertPoint(ContBB);
 }
 
-static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
-                         llvm::Value *Ptr, llvm::Value *Val1, llvm::Value *Val2,
+static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
+                         Address Ptr, Address Val1, Address Val2,
                          llvm::Value *IsWeak, llvm::Value *FailureOrder,
-                         uint64_t Size, unsigned Align,
-                         llvm::AtomicOrdering Order) {
+                         uint64_t Size, llvm::AtomicOrdering Order) {
   llvm::AtomicRMWInst::BinOp Op = llvm::AtomicRMWInst::Add;
   llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
 
@@ -504,17 +510,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
     emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Align, Order);
+                                FailureOrder, Size, Order);
     return;
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
     emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                FailureOrder, Size, Align, Order);
+                                FailureOrder, Size, Order);
     return;
   case AtomicExpr::AO__atomic_compare_exchange:
   case AtomicExpr::AO__atomic_compare_exchange_n: {
     if (llvm::ConstantInt *IsWeakC = dyn_cast<llvm::ConstantInt>(IsWeak)) {
       emitAtomicCmpXchgFailureSet(CGF, E, IsWeakC->getZExtValue(), Dest, Ptr,
-                                  Val1, Val2, FailureOrder, Size, Align, Order);
+                                  Val1, Val2, FailureOrder, Size, Order);
     } else {
       // Create all the relevant BB's
       llvm::BasicBlock *StrongBB =
@@ -528,12 +534,12 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
 
       CGF.Builder.SetInsertPoint(StrongBB);
       emitAtomicCmpXchgFailureSet(CGF, E, false, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Align, Order);
+                                  FailureOrder, Size, Order);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(WeakBB);
       emitAtomicCmpXchgFailureSet(CGF, E, true, Dest, Ptr, Val1, Val2,
-                                  FailureOrder, Size, Align, Order);
+                                  FailureOrder, Size, Order);
       CGF.Builder.CreateBr(ContBB);
 
       CGF.Builder.SetInsertPoint(ContBB);
@@ -545,22 +551,17 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
   case AtomicExpr::AO__atomic_load: {
     llvm::LoadInst *Load = CGF.Builder.CreateLoad(Ptr);
     Load->setAtomic(Order);
-    Load->setAlignment(Size);
     Load->setVolatile(E->isVolatile());
-    llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Load, Dest);
-    StoreDest->setAlignment(Align);
+    CGF.Builder.CreateStore(Load, Dest);
     return;
   }
 
   case AtomicExpr::AO__c11_atomic_store:
   case AtomicExpr::AO__atomic_store:
   case AtomicExpr::AO__atomic_store_n: {
-    assert(!Dest && "Store does not return a value");
-    llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-    LoadVal1->setAlignment(Align);
+    llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
     llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr);
     Store->setAtomic(Order);
-    Store->setAlignment(Size);
     Store->setVolatile(E->isVolatile());
     return;
   }
@@ -612,17 +613,16 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
     break;
 
   case AtomicExpr::AO__atomic_nand_fetch:
-    PostOp = llvm::Instruction::And;
-    // Fall through.
+    PostOp = llvm::Instruction::And; // the NOT is special cased below
+  // Fall through.
   case AtomicExpr::AO__atomic_fetch_nand:
     Op = llvm::AtomicRMWInst::Nand;
     break;
   }
 
-  llvm::LoadInst *LoadVal1 = CGF.Builder.CreateLoad(Val1);
-  LoadVal1->setAlignment(Align);
+  llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
   llvm::AtomicRMWInst *RMWI =
-      CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order);
+      CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order);
   RMWI->setVolatile(E->isVolatile());
 
   // For __atomic_*_fetch operations, perform the operation again to
@@ -632,15 +632,14 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, llvm::Value *Dest,
     Result = CGF.Builder.CreateBinOp(PostOp, RMWI, LoadVal1);
   if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
     Result = CGF.Builder.CreateNot(Result);
-  llvm::StoreInst *StoreDest = CGF.Builder.CreateStore(Result, Dest);
-  StoreDest->setAlignment(Align);
+  CGF.Builder.CreateStore(Result, Dest);
 }
 
 // This function emits any expression (scalar, complex, or aggregate)
 // into a temporary alloca.
-static llvm::Value *
+static Address
 EmitValToTemp(CodeGenFunction &CGF, Expr *E) {
-  llvm::Value *DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
+  Address DeclPtr = CGF.CreateMemTemp(E->getType(), ".atomictmp");
   CGF.EmitAnyExprToMem(E, DeclPtr, E->getType().getQualifiers(),
                        /*Init*/ true);
   return DeclPtr;
@@ -652,14 +651,15 @@ AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
                   SourceLocation Loc, CharUnits SizeInChars) {
   if (UseOptimizedLibcall) {
     // Load value and pass it to the function directly.
-    unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity();
+    CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
     int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
     ValTy =
         CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false);
     llvm::Type *IPtrTy = llvm::IntegerType::get(CGF.getLLVMContext(),
                                                 SizeInBits)->getPointerTo();
-    Val = CGF.EmitLoadOfScalar(CGF.Builder.CreateBitCast(Val, IPtrTy), false,
-                               Align, CGF.getContext().getPointerType(ValTy),
+    Address Ptr = Address(CGF.Builder.CreateBitCast(Val, IPtrTy), Align);
+    Val = CGF.EmitLoadOfScalar(Ptr, false,
+                               CGF.getContext().getPointerType(ValTy),
                                Loc);
     // Coerce the value into an appropriately sized integer type.
     Args.add(RValue::get(Val), ValTy);
@@ -670,27 +670,27 @@ AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
   }
 }
 
-RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
+RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
   QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
   QualType MemTy = AtomicTy;
   if (const AtomicType *AT = AtomicTy->getAs<AtomicType>())
     MemTy = AT->getValueType();
-  CharUnits sizeChars = getContext().getTypeSizeInChars(AtomicTy);
+  CharUnits sizeChars, alignChars;
+  std::tie(sizeChars, alignChars) = getContext().getTypeInfoInChars(AtomicTy);
   uint64_t Size = sizeChars.getQuantity();
-  CharUnits alignChars = getContext().getTypeAlignInChars(AtomicTy);
-  unsigned Align = alignChars.getQuantity();
-  unsigned MaxInlineWidthInBits =
-    getTarget().getMaxAtomicInlineWidth();
-  bool UseLibcall = (Size != Align ||
+  unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
+  bool UseLibcall = (sizeChars != alignChars ||
                      getContext().toBits(sizeChars) > MaxInlineWidthInBits);
 
-  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr, *Val1 = nullptr,
-              *Val2 = nullptr;
-  llvm::Value *Ptr = EmitScalarExpr(E->getPtr());
+  llvm::Value *IsWeak = nullptr, *OrderFail = nullptr;
+
+  Address Val1 = Address::invalid();
+  Address Val2 = Address::invalid();
+  Address Dest = Address::invalid();
+  Address Ptr(EmitScalarExpr(E->getPtr()), alignChars);
 
   if (E->getOp() == AtomicExpr::AO__c11_atomic_init) {
-    assert(!Dest && "Init does not return a value");
-    LValue lvalue = LValue::MakeAddr(Ptr, AtomicTy, alignChars, getContext());
+    LValue lvalue = MakeAddrLValue(Ptr, AtomicTy);
     EmitAtomicInit(E->getVal1(), lvalue);
     return RValue::get(nullptr);
   }
@@ -699,32 +699,32 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
 
   switch (E->getOp()) {
   case AtomicExpr::AO__c11_atomic_init:
-    llvm_unreachable("Already handled!");
+    llvm_unreachable("Already handled above with EmitAtomicInit!");
 
   case AtomicExpr::AO__c11_atomic_load:
   case AtomicExpr::AO__atomic_load_n:
     break;
 
   case AtomicExpr::AO__atomic_load:
-    Dest = EmitScalarExpr(E->getVal1());
+    Dest = EmitPointerWithAlignment(E->getVal1());
     break;
 
   case AtomicExpr::AO__atomic_store:
-    Val1 = EmitScalarExpr(E->getVal1());
+    Val1 = EmitPointerWithAlignment(E->getVal1());
     break;
 
   case AtomicExpr::AO__atomic_exchange:
-    Val1 = EmitScalarExpr(E->getVal1());
-    Dest = EmitScalarExpr(E->getVal2());
+    Val1 = EmitPointerWithAlignment(E->getVal1());
+    Dest = EmitPointerWithAlignment(E->getVal2());
     break;
 
   case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
   case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
   case AtomicExpr::AO__atomic_compare_exchange_n:
   case AtomicExpr::AO__atomic_compare_exchange:
-    Val1 = EmitScalarExpr(E->getVal1());
+    Val1 = EmitPointerWithAlignment(E->getVal1());
     if (E->getOp() == AtomicExpr::AO__atomic_compare_exchange)
-      Val2 = EmitScalarExpr(E->getVal2());
+      Val2 = EmitPointerWithAlignment(E->getVal2());
     else
       Val2 = EmitValToTemp(*this, E->getVal2());
     OrderFail = EmitScalarExpr(E->getOrderFail());
@@ -744,8 +744,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
       CharUnits PointeeIncAmt =
           getContext().getTypeSizeInChars(MemTy->getPointeeType());
       Val1Scalar = Builder.CreateMul(Val1Scalar, CGM.getSize(PointeeIncAmt));
-      Val1 = CreateMemTemp(Val1Ty, ".atomictmp");
-      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Val1, Val1Ty));
+      auto Temp = CreateMemTemp(Val1Ty, ".atomictmp");
+      Val1 = Temp;
+      EmitStoreOfScalar(Val1Scalar, MakeAddrLValue(Temp, Val1Ty));
       break;
     }
     // Fall through.
@@ -774,31 +775,63 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
 
   QualType RValTy = E->getType().getUnqualifiedType();
 
-  auto GetDest = [&] {
-    if (!RValTy->isVoidType() && !Dest) {
-      Dest = CreateMemTemp(RValTy, ".atomicdst");
-    }
-    return Dest;
-  };
+  // The inlined atomics only function on iN types, where N is a power of 2. We
+  // need to make sure (via temporaries if necessary) that all incoming values
+  // are compatible.
+  LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy);
+  AtomicInfo Atomics(*this, AtomicVal);
+
+  Ptr = Atomics.emitCastToAtomicIntPointer(Ptr);
+  if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1);
+  if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2);
+  if (Dest.isValid())
+    Dest = Atomics.emitCastToAtomicIntPointer(Dest);
+  else if (E->isCmpXChg())
+    Dest = CreateMemTemp(RValTy, "cmpxchg.bool");
+  else if (!RValTy->isVoidType())
+    Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca());
 
   // Use a library call.  See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary .
   if (UseLibcall) {
     bool UseOptimizedLibcall = false;
     switch (E->getOp()) {
+    case AtomicExpr::AO__c11_atomic_init:
+      llvm_unreachable("Already handled above with EmitAtomicInit!");
+
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
     case AtomicExpr::AO__c11_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
     case AtomicExpr::AO__c11_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
+    case AtomicExpr::AO__atomic_fetch_nand:
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
     case AtomicExpr::AO__c11_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
+    case AtomicExpr::AO__atomic_add_fetch:
+    case AtomicExpr::AO__atomic_and_fetch:
+    case AtomicExpr::AO__atomic_nand_fetch:
+    case AtomicExpr::AO__atomic_or_fetch:
+    case AtomicExpr::AO__atomic_sub_fetch:
+    case AtomicExpr::AO__atomic_xor_fetch:
       // For these, only library calls for certain sizes exist.
       UseOptimizedLibcall = true;
       break;
-    default:
+
+    case AtomicExpr::AO__c11_atomic_load:
+    case AtomicExpr::AO__c11_atomic_store:
+    case AtomicExpr::AO__c11_atomic_exchange:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
+    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
+    case AtomicExpr::AO__atomic_load_n:
+    case AtomicExpr::AO__atomic_load:
+    case AtomicExpr::AO__atomic_store_n:
+    case AtomicExpr::AO__atomic_store:
+    case AtomicExpr::AO__atomic_exchange_n:
+    case AtomicExpr::AO__atomic_exchange:
+    case AtomicExpr::AO__atomic_compare_exchange_n:
+    case AtomicExpr::AO__atomic_compare_exchange:
       // Only use optimized library calls for sizes for which they exist.
       if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
         UseOptimizedLibcall = true;
@@ -812,14 +845,19 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
                getContext().getSizeType());
     }
     // Atomic address is the first or second parameter
-    Args.add(RValue::get(EmitCastToVoidPtr(Ptr)), getContext().VoidPtrTy);
+    Args.add(RValue::get(EmitCastToVoidPtr(Ptr.getPointer())),
+             getContext().VoidPtrTy);
 
     std::string LibCallName;
     QualType LoweredMemTy =
       MemTy->isPointerType() ? getContext().getIntPtrType() : MemTy;
     QualType RetTy;
     bool HaveRetTy = false;
+    llvm::Instruction::BinaryOps PostOp = (llvm::Instruction::BinaryOps)0;
     switch (E->getOp()) {
+    case AtomicExpr::AO__c11_atomic_init:
+      llvm_unreachable("Already handled!");
+
     // There is only one libcall for compare an exchange, because there is no
     // optimisation benefit possible from a libcall version of a weak compare
     // and exchange.
@@ -834,9 +872,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
       LibCallName = "__atomic_compare_exchange";
       RetTy = getContext().BoolTy;
       HaveRetTy = true;
-      Args.add(RValue::get(EmitCastToVoidPtr(Val1)), getContext().VoidPtrTy);
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy,
-                        E->getExprLoc(), sizeChars);
+      Args.add(RValue::get(EmitCastToVoidPtr(Val1.getPointer())),
+               getContext().VoidPtrTy);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       Args.add(RValue::get(Order), getContext().IntTy);
       Order = OrderFail;
       break;
@@ -847,8 +886,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     case AtomicExpr::AO__atomic_exchange_n:
     case AtomicExpr::AO__atomic_exchange:
       LibCallName = "__atomic_exchange";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       break;
     // void __atomic_store(size_t size, void *mem, void *val, int order)
     // void __atomic_store_N(T *mem, T val, int order)
@@ -858,8 +897,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
       LibCallName = "__atomic_store";
       RetTy = getContext().VoidTy;
       HaveRetTy = true;
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       break;
     // void __atomic_load(size_t size, void *mem, void *return, int order)
     // T __atomic_load_N(T *mem, int order)
@@ -868,42 +907,71 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     case AtomicExpr::AO__atomic_load_n:
       LibCallName = "__atomic_load";
       break;
+    // T __atomic_add_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_add_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_add_fetch:
+      PostOp = llvm::Instruction::Add;
+    // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_add:
     case AtomicExpr::AO__atomic_fetch_add:
       LibCallName = "__atomic_fetch_add";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        LoweredMemTy, E->getExprLoc(), sizeChars);
       break;
+    // T __atomic_and_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_and_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_and_fetch:
+      PostOp = llvm::Instruction::And;
+    // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_and:
     case AtomicExpr::AO__atomic_fetch_and:
       LibCallName = "__atomic_fetch_and";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       break;
+    // T __atomic_or_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_or_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_or_fetch:
+      PostOp = llvm::Instruction::Or;
+    // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_or:
     case AtomicExpr::AO__atomic_fetch_or:
       LibCallName = "__atomic_fetch_or";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       break;
+    // T __atomic_sub_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_sub_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_sub_fetch:
+      PostOp = llvm::Instruction::Sub;
+    // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_sub:
     case AtomicExpr::AO__atomic_fetch_sub:
       LibCallName = "__atomic_fetch_sub";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        LoweredMemTy, E->getExprLoc(), sizeChars);
       break;
+    // T __atomic_xor_fetch_N(T *mem, T val, int order)
     // T __atomic_fetch_xor_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_xor_fetch:
+      PostOp = llvm::Instruction::Xor;
+    // Fall through.
     case AtomicExpr::AO__c11_atomic_fetch_xor:
     case AtomicExpr::AO__atomic_fetch_xor:
       LibCallName = "__atomic_fetch_xor";
-      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                        E->getExprLoc(), sizeChars);
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
+      break;
+    // T __atomic_nand_fetch_N(T *mem, T val, int order)
+    // T __atomic_fetch_nand_N(T *mem, T val, int order)
+    case AtomicExpr::AO__atomic_nand_fetch:
+      PostOp = llvm::Instruction::And; // the NOT is special cased below
+    // Fall through.
+    case AtomicExpr::AO__atomic_fetch_nand:
+      LibCallName = "__atomic_fetch_nand";
+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1.getPointer(),
+                        MemTy, E->getExprLoc(), sizeChars);
       break;
-    default: return EmitUnsupportedRValue(E, "atomic library call");
     }
 
     // Optimized functions have the size in their name.
@@ -919,30 +987,46 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
       } else {
         // Value is returned through parameter before the order.
         RetTy = getContext().VoidTy;
-        Args.add(RValue::get(EmitCastToVoidPtr(Dest)), getContext().VoidPtrTy);
+        Args.add(RValue::get(EmitCastToVoidPtr(Dest.getPointer())),
+                 getContext().VoidPtrTy);
       }
     }
     // order is always the last parameter
     Args.add(RValue::get(Order),
              getContext().IntTy);
 
+    // PostOp is only needed for the atomic_*_fetch operations, and
+    // thus is only needed for and implemented in the
+    // UseOptimizedLibcall codepath.
+    assert(UseOptimizedLibcall || !PostOp);
+
     RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args);
     // The value is returned directly from the libcall.
-    if (HaveRetTy && !RetTy->isVoidType())
+    if (E->isCmpXChg())
       return Res;
-    // The value is returned via an explicit out param.
-    if (RetTy->isVoidType())
-      return RValue::get(nullptr);
-    // The value is returned directly for optimized libcalls but the caller is
-    // expected an out-param.
-    if (UseOptimizedLibcall) {
+
+    // The value is returned directly for optimized libcalls but the expr
+    // provided an out-param.
+    if (UseOptimizedLibcall && Res.getScalarVal()) {
       llvm::Value *ResVal = Res.getScalarVal();
-      llvm::StoreInst *StoreDest = Builder.CreateStore(
+      if (PostOp) {
+        llvm::Value *LoadVal1 = Args[1].RV.getScalarVal();
+        ResVal = Builder.CreateBinOp(PostOp, ResVal, LoadVal1);
+      }
+      if (E->getOp() == AtomicExpr::AO__atomic_nand_fetch)
+        ResVal = Builder.CreateNot(ResVal);
+
+      Builder.CreateStore(
           ResVal,
-          Builder.CreateBitCast(GetDest(), ResVal->getType()->getPointerTo()));
-      StoreDest->setAlignment(Align);
+          Builder.CreateBitCast(Dest, ResVal->getType()->getPointerTo()));
     }
-    return convertTempToRValue(Dest, RValTy, E->getExprLoc());
+
+    if (RValTy->isVoidType())
+      return RValue::get(nullptr);
+
+    return convertTempToRValue(
+        Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+        RValTy, E->getExprLoc());
   }
 
   bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store ||
@@ -952,45 +1036,35 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
                 E->getOp() == AtomicExpr::AO__atomic_load ||
                 E->getOp() == AtomicExpr::AO__atomic_load_n;
 
-  llvm::Type *ITy =
-      llvm::IntegerType::get(getLLVMContext(), Size * 8);
-  llvm::Value *OrigDest = GetDest();
-  Ptr = Builder.CreateBitCast(
-      Ptr, ITy->getPointerTo(Ptr->getType()->getPointerAddressSpace()));
-  if (Val1) Val1 = Builder.CreateBitCast(Val1, ITy->getPointerTo());
-  if (Val2) Val2 = Builder.CreateBitCast(Val2, ITy->getPointerTo());
-  if (Dest && !E->isCmpXChg())
-    Dest = Builder.CreateBitCast(Dest, ITy->getPointerTo());
-
   if (isa<llvm::ConstantInt>(Order)) {
     int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
     switch (ord) {
     case AtomicExpr::AO_ABI_memory_order_relaxed:
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, Align, llvm::Monotonic);
+                   Size, llvm::Monotonic);
       break;
     case AtomicExpr::AO_ABI_memory_order_consume:
     case AtomicExpr::AO_ABI_memory_order_acquire:
       if (IsStore)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, Align, llvm::Acquire);
+                   Size, llvm::Acquire);
       break;
     case AtomicExpr::AO_ABI_memory_order_release:
       if (IsLoad)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, Align, llvm::Release);
+                   Size, llvm::Release);
       break;
     case AtomicExpr::AO_ABI_memory_order_acq_rel:
       if (IsLoad || IsStore)
         break; // Avoid crashing on code with undefined behavior
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, Align, llvm::AcquireRelease);
+                   Size, llvm::AcquireRelease);
       break;
     case AtomicExpr::AO_ABI_memory_order_seq_cst:
       EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                   Size, Align, llvm::SequentiallyConsistent);
+                   Size, llvm::SequentiallyConsistent);
       break;
     default: // invalid order
       // We should not ever get here normally, but it's hard to
@@ -999,7 +1073,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
     }
     if (RValTy->isVoidType())
       return RValue::get(nullptr);
-    return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
+
+    return convertTempToRValue(
+        Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+        RValTy, E->getExprLoc());
   }
 
   // Long case, when Order isn't obviously constant.
@@ -1028,12 +1105,12 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   // Emit all the different atomics
   Builder.SetInsertPoint(MonotonicBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, Align, llvm::Monotonic);
+               Size, llvm::Monotonic);
   Builder.CreateBr(ContBB);
   if (!IsStore) {
     Builder.SetInsertPoint(AcquireBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, Align, llvm::Acquire);
+                 Size, llvm::Acquire);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_consume),
                 AcquireBB);
@@ -1043,7 +1120,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   if (!IsLoad) {
     Builder.SetInsertPoint(ReleaseBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, Align, llvm::Release);
+                 Size, llvm::Release);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_release),
                 ReleaseBB);
@@ -1051,14 +1128,14 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   if (!IsLoad && !IsStore) {
     Builder.SetInsertPoint(AcqRelBB);
     EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-                 Size, Align, llvm::AcquireRelease);
+                 Size, llvm::AcquireRelease);
     Builder.CreateBr(ContBB);
     SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_acq_rel),
                 AcqRelBB);
   }
   Builder.SetInsertPoint(SeqCstBB);
   EmitAtomicOp(*this, E, Dest, Ptr, Val1, Val2, IsWeak, OrderFail,
-               Size, Align, llvm::SequentiallyConsistent);
+               Size, llvm::SequentiallyConsistent);
   Builder.CreateBr(ContBB);
   SI->addCase(Builder.getInt32(AtomicExpr::AO_ABI_memory_order_seq_cst),
               SeqCstBB);
@@ -1067,47 +1144,65 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest) {
   Builder.SetInsertPoint(ContBB);
   if (RValTy->isVoidType())
     return RValue::get(nullptr);
-  return convertTempToRValue(OrigDest, RValTy, E->getExprLoc());
+
+  assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits());
+  return convertTempToRValue(
+      Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()),
+      RValTy, E->getExprLoc());
 }
 
-llvm::Value *AtomicInfo::emitCastToAtomicIntPointer(llvm::Value *addr) const {
+Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const {
   unsigned addrspace =
-    cast<llvm::PointerType>(addr->getType())->getAddressSpace();
+    cast<llvm::PointerType>(addr.getPointer()->getType())->getAddressSpace();
   llvm::IntegerType *ty =
     llvm::IntegerType::get(CGF.getLLVMContext(), AtomicSizeInBits);
   return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace));
 }
 
-RValue AtomicInfo::convertTempToRValue(llvm::Value *addr,
-                                       AggValueSlot resultSlot,
-                                       SourceLocation loc, bool AsValue) const {
+Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const {
+  llvm::Type *Ty = Addr.getElementType();
+  uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty);
+  if (SourceSizeInBits != AtomicSizeInBits) {
+    Address Tmp = CreateTempAlloca();
+    CGF.Builder.CreateMemCpy(Tmp, Addr,
+                             std::min(AtomicSizeInBits, SourceSizeInBits) / 8);
+    Addr = Tmp;
+  }
+
+  return emitCastToAtomicIntPointer(Addr);
+}
+
+RValue AtomicInfo::convertAtomicTempToRValue(Address addr,
+                                             AggValueSlot resultSlot,
+                                             SourceLocation loc,
+                                             bool asValue) const {
   if (LVal.isSimple()) {
     if (EvaluationKind == TEK_Aggregate)
       return resultSlot.asRValue();
 
     // Drill into the padding structure if we have one.
     if (hasPadding())
-      addr = CGF.Builder.CreateStructGEP(nullptr, addr, 0);
+      addr = CGF.Builder.CreateStructGEP(addr, 0, CharUnits());
 
     // Otherwise, just convert the temporary to an r-value using the
     // normal conversion routine.
     return CGF.convertTempToRValue(addr, getValueType(), loc);
   }
-  if (!AsValue)
+  if (!asValue)
     // Get RValue from temp memory as atomic for non-simple lvalues
-    return RValue::get(
-        CGF.Builder.CreateAlignedLoad(addr, AtomicAlign.getQuantity()));
+    return RValue::get(CGF.Builder.CreateLoad(addr));
   if (LVal.isBitField())
-    return CGF.EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
-        addr, LVal.getBitFieldInfo(), LVal.getType(), LVal.getAlignment()));
+    return CGF.EmitLoadOfBitfieldLValue(
+        LValue::MakeBitfield(addr, LVal.getBitFieldInfo(), LVal.getType(),
+                             LVal.getAlignmentSource()));
   if (LVal.isVectorElt())
-    return CGF.EmitLoadOfLValue(LValue::MakeVectorElt(addr, LVal.getVectorIdx(),
-                                                      LVal.getType(),
-                                                      LVal.getAlignment()),
-                                loc);
+    return CGF.EmitLoadOfLValue(
+        LValue::MakeVectorElt(addr, LVal.getVectorIdx(), LVal.getType(),
+                              LVal.getAlignmentSource()), loc);
   assert(LVal.isExtVectorElt());
   return CGF.EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
-      addr, LVal.getExtVectorElts(), LVal.getType(), LVal.getAlignment()));
+      addr, LVal.getExtVectorElts(), LVal.getType(),
+      LVal.getAlignmentSource()));
 }
 
 RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
@@ -1123,7 +1218,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
        !AsValue)) {
     auto *ValTy = AsValue
                       ? CGF.ConvertTypeForMem(ValueTy)
-                      : getAtomicAddress()->getType()->getPointerElementType();
+                      : getAtomicAddress().getType()->getPointerElementType();
     if (ValTy->isIntegerTy()) {
       assert(IntVal->getType() == ValTy && "Different integer types.");
       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
@@ -1135,25 +1230,22 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
 
   // Create a temporary.  This needs to be big enough to hold the
   // atomic integer.
-  llvm::Value *Temp;
+  Address Temp = Address::invalid();
   bool TempIsVolatile = false;
-  CharUnits TempAlignment;
   if (AsValue && getEvaluationKind() == TEK_Aggregate) {
     assert(!ResultSlot.isIgnored());
-    Temp = ResultSlot.getAddr();
-    TempAlignment = getValueAlignment();
+    Temp = ResultSlot.getAddress();
     TempIsVolatile = ResultSlot.isVolatile();
   } else {
     Temp = CreateTempAlloca();
-    TempAlignment = getAtomicAlignment();
   }
 
   // Slam the integer into the temporary.
-  llvm::Value *CastTemp = emitCastToAtomicIntPointer(Temp);
-  CGF.Builder.CreateAlignedStore(IntVal, CastTemp, TempAlignment.getQuantity())
+  Address CastTemp = emitCastToAtomicIntPointer(Temp);
+  CGF.Builder.CreateStore(IntVal, CastTemp)
       ->setVolatile(TempIsVolatile);
 
-  return convertTempToRValue(Temp, ResultSlot, Loc, AsValue);
+  return convertAtomicTempToRValue(Temp, ResultSlot, Loc, AsValue);
 }
 
 void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
@@ -1161,7 +1253,7 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
   // void __atomic_load(size_t size, void *mem, void *return, int order);
   CallArgList Args;
   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
-  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())),
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(AddForLoaded)),
            CGF.getContext().VoidPtrTy);
@@ -1174,16 +1266,15 @@ void AtomicInfo::EmitAtomicLoadLibcall(llvm::Value *AddForLoaded,
 llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
                                           bool IsVolatile) {
   // Okay, we're doing this natively.
-  llvm::Value *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
+  Address Addr = getAtomicAddressAsAtomicIntPointer();
   llvm::LoadInst *Load = CGF.Builder.CreateLoad(Addr, "atomic-load");
   Load->setAtomic(AO);
 
   // Other decoration.
-  Load->setAlignment(getAtomicAlignment().getQuantity());
   if (IsVolatile)
     Load->setVolatile(true);
   if (LVal.getTBAAInfo())
-    CGF.CGM.DecorateInstruction(Load, LVal.getTBAAInfo());
+    CGF.CGM.DecorateInstructionWithTBAA(Load, LVal.getTBAAInfo());
   return Load;
 }
 
@@ -1191,11 +1282,12 @@ llvm::Value *AtomicInfo::EmitAtomicLoadOp(llvm::AtomicOrdering AO,
 /// we are operating under /volatile:ms *and* the LValue itself is volatile and
 /// performing such an operation can be performed without a libcall.
 bool CodeGenFunction::LValueIsSuitableForInlineAtomic(LValue LV) {
+  if (!CGM.getCodeGenOpts().MSVolatile) return false;
   AtomicInfo AI(*this, LV);
   bool IsVolatile = LV.isVolatile() || hasVolatileMember(LV.getType());
   // An atomic is inline if we don't need to use a libcall.
   bool AtomicIsInline = !AI.shouldUseLibcall();
-  return CGM.getCodeGenOpts().MSVolatile && IsVolatile && AtomicIsInline;
+  return IsVolatile && AtomicIsInline;
 }
 
 /// An type is a candidate for having its loads and stores be made atomic if
@@ -1227,18 +1319,18 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
                                   bool IsVolatile) {
   // Check whether we should use a library call.
   if (shouldUseLibcall()) {
-    llvm::Value *TempAddr;
+    Address TempAddr = Address::invalid();
     if (LVal.isSimple() && !ResultSlot.isIgnored()) {
       assert(getEvaluationKind() == TEK_Aggregate);
-      TempAddr = ResultSlot.getAddr();
+      TempAddr = ResultSlot.getAddress();
     } else
       TempAddr = CreateTempAlloca();
 
-    EmitAtomicLoadLibcall(TempAddr, AO, IsVolatile);
+    EmitAtomicLoadLibcall(TempAddr.getPointer(), AO, IsVolatile);
 
     // Okay, turn that back into the original value or whole atomic (for
     // non-simple lvalues) type.
-    return convertTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
+    return convertAtomicTempToRValue(TempAddr, ResultSlot, Loc, AsValue);
   }
 
   // Okay, we're doing this natively.
@@ -1246,7 +1338,7 @@ RValue AtomicInfo::EmitAtomicLoad(AggValueSlot ResultSlot, SourceLocation Loc,
 
   // If we're ignoring an aggregate return, don't do anything.
   if (getEvaluationKind() == TEK_Aggregate && ResultSlot.isIgnored())
-    return RValue::getAggregate(nullptr, false);
+    return RValue::getAggregate(Address::invalid(), false);
 
   // Okay, turn that back into the original value or atomic (for non-simple
   // lvalues) type.
@@ -1272,11 +1364,10 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
   // any padding.  Just do an aggregate copy of that type.
   if (rvalue.isAggregate()) {
     CGF.EmitAggregateCopy(getAtomicAddress(),
-                          rvalue.getAggregateAddr(),
+                          rvalue.getAggregateAddress(),
                           getAtomicType(),
                           (rvalue.isVolatileQualified()
-                           || LVal.isVolatileQualified()),
-                          LVal.getAlignment());
+                           || LVal.isVolatileQualified()));
     return;
   }
 
@@ -1299,15 +1390,14 @@ void AtomicInfo::emitCopyIntoMemory(RValue rvalue) const {
 
 /// Materialize an r-value into memory for the purposes of storing it
 /// to an atomic type.
-llvm::Value *AtomicInfo::materializeRValue(RValue rvalue) const {
+Address AtomicInfo::materializeRValue(RValue rvalue) const {
   // Aggregate r-values are already in memory, and EmitAtomicStore
   // requires them to be values of the atomic type.
   if (rvalue.isAggregate())
-    return rvalue.getAggregateAddr();
+    return rvalue.getAggregateAddress();
 
   // Otherwise, make a temporary and materialize into it.
-  LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType(),
-                                     getAtomicAlignment());
+  LValue TempLV = CGF.MakeAddrLValue(CreateTempAlloca(), getAtomicType());
   AtomicInfo Atomics(CGF, TempLV);
   Atomics.emitCopyIntoMemory(rvalue);
   return TempLV.getAddress();
@@ -1332,20 +1422,20 @@ llvm::Value *AtomicInfo::convertRValueToInt(RValue RVal) const {
   }
   // Otherwise, we need to go through memory.
   // Put the r-value in memory.
-  llvm::Value *Addr = materializeRValue(RVal);
+  Address Addr = materializeRValue(RVal);
 
   // Cast the temporary to the atomic int type and pull a value out.
   Addr = emitCastToAtomicIntPointer(Addr);
-  return CGF.Builder.CreateAlignedLoad(Addr,
-                                       getAtomicAlignment().getQuantity());
+  return CGF.Builder.CreateLoad(Addr);
 }
 
 std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
     llvm::Value *ExpectedVal, llvm::Value *DesiredVal,
     llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
   // Do the atomic store.
-  auto *Addr = emitCastToAtomicIntPointer(getAtomicAddress());
-  auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
+  Address Addr = getAtomicAddressAsAtomicIntPointer();
+  auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(),
+                                               ExpectedVal, DesiredVal,
                                                Success, Failure);
   // Other decoration.
   Inst->setVolatile(LVal.isVolatileQualified());
@@ -1366,7 +1456,7 @@ AtomicInfo::EmitAtomicCompareExchangeLibcall(llvm::Value *ExpectedAddr,
   // void *desired, int success, int failure);
   CallArgList Args;
   Args.add(RValue::get(getAtomicSizeValue()), CGF.getContext().getSizeType());
-  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicAddress())),
+  Args.add(RValue::get(CGF.EmitCastToVoidPtr(getAtomicPointer())),
            CGF.getContext().VoidPtrTy);
   Args.add(RValue::get(CGF.EmitCastToVoidPtr(ExpectedAddr)),
            CGF.getContext().VoidPtrTy);
@@ -1394,13 +1484,14 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
   // Check whether we should use a library call.
   if (shouldUseLibcall()) {
     // Produce a source address.
-    auto *ExpectedAddr = materializeRValue(Expected);
-    auto *DesiredAddr = materializeRValue(Desired);
-    auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr,
+    Address ExpectedAddr = materializeRValue(Expected);
+    Address DesiredAddr = materializeRValue(Desired);
+    auto *Res = EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
+                                                 DesiredAddr.getPointer(),
                                                  Success, Failure);
     return std::make_pair(
-        convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
-                            SourceLocation(), /*AsValue=*/false),
+        convertAtomicTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
+                                  SourceLocation(), /*AsValue=*/false),
         Res);
   }
 
@@ -1419,42 +1510,41 @@ std::pair<RValue, llvm::Value *> AtomicInfo::EmitAtomicCompareExchange(
 static void
 EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics, RValue OldRVal,
                       const llvm::function_ref<RValue(RValue)> &UpdateOp,
-                      llvm::Value *DesiredAddr) {
-  llvm::Value *Ptr = nullptr;
-  LValue UpdateLVal;
+                      Address DesiredAddr) {
   RValue UpRVal;
   LValue AtomicLVal = Atomics.getAtomicLValue();
   LValue DesiredLVal;
   if (AtomicLVal.isSimple()) {
     UpRVal = OldRVal;
-    DesiredLVal =
-        LValue::MakeAddr(DesiredAddr, AtomicLVal.getType(),
-                         AtomicLVal.getAlignment(), CGF.CGM.getContext());
+    DesiredLVal = CGF.MakeAddrLValue(DesiredAddr, AtomicLVal.getType());
   } else {
     // Build new lvalue for temp address
-    Ptr = Atomics.materializeRValue(OldRVal);
+    Address Ptr = Atomics.materializeRValue(OldRVal);
+    LValue UpdateLVal;
     if (AtomicLVal.isBitField()) {
       UpdateLVal =
           LValue::MakeBitfield(Ptr, AtomicLVal.getBitFieldInfo(),
-                               AtomicLVal.getType(), AtomicLVal.getAlignment());
+                               AtomicLVal.getType(),
+                               AtomicLVal.getAlignmentSource());
       DesiredLVal =
           LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
-                               AtomicLVal.getType(), AtomicLVal.getAlignment());
+                               AtomicLVal.getType(),
+                               AtomicLVal.getAlignmentSource());
     } else if (AtomicLVal.isVectorElt()) {
       UpdateLVal = LValue::MakeVectorElt(Ptr, AtomicLVal.getVectorIdx(),
                                          AtomicLVal.getType(),
-                                         AtomicLVal.getAlignment());
+                                         AtomicLVal.getAlignmentSource());
       DesiredLVal = LValue::MakeVectorElt(
           DesiredAddr, AtomicLVal.getVectorIdx(), AtomicLVal.getType(),
-          AtomicLVal.getAlignment());
+          AtomicLVal.getAlignmentSource());
     } else {
       assert(AtomicLVal.isExtVectorElt());
       UpdateLVal = LValue::MakeExtVectorElt(Ptr, AtomicLVal.getExtVectorElts(),
                                             AtomicLVal.getType(),
-                                            AtomicLVal.getAlignment());
+                                            AtomicLVal.getAlignmentSource());
       DesiredLVal = LValue::MakeExtVectorElt(
           DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
-          AtomicLVal.getAlignment());
+          AtomicLVal.getAlignmentSource());
     }
     UpdateLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
     DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
@@ -1476,26 +1566,26 @@ void AtomicInfo::EmitAtomicUpdateLibcall(
     bool IsVolatile) {
   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
 
-  llvm::Value *ExpectedAddr = CreateTempAlloca();
+  Address ExpectedAddr = CreateTempAlloca();
 
-  EmitAtomicLoadLibcall(ExpectedAddr, AO, IsVolatile);
+  EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
   auto *ContBB = CGF.createBasicBlock("atomic_cont");
   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
   CGF.EmitBlock(ContBB);
-  auto *DesiredAddr = CreateTempAlloca();
+  Address DesiredAddr = CreateTempAlloca();
   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
-      requiresMemSetZero(
-          getAtomicAddress()->getType()->getPointerElementType())) {
-    auto *OldVal = CGF.Builder.CreateAlignedLoad(
-        ExpectedAddr, getAtomicAlignment().getQuantity());
-    CGF.Builder.CreateAlignedStore(OldVal, DesiredAddr,
-                                   getAtomicAlignment().getQuantity());
+      requiresMemSetZero(getAtomicAddress().getElementType())) {
+    auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
+    CGF.Builder.CreateStore(OldVal, DesiredAddr);
   }
-  auto OldRVal = convertTempToRValue(ExpectedAddr, AggValueSlot::ignored(),
-                                    SourceLocation(), /*AsValue=*/false);
+  auto OldRVal = convertAtomicTempToRValue(ExpectedAddr,
+                                           AggValueSlot::ignored(),
+                                           SourceLocation(), /*AsValue=*/false);
   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, DesiredAddr);
   auto *Res =
-      EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, AO, Failure);
+      EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
+                                       DesiredAddr.getPointer(),
+                                       AO, Failure);
   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
 }
@@ -1515,19 +1605,16 @@ void AtomicInfo::EmitAtomicUpdateOp(
   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
                                              /*NumReservedValues=*/2);
   PHI->addIncoming(OldVal, CurBB);
-  auto *NewAtomicAddr = CreateTempAlloca();
-  auto *NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
+  Address NewAtomicAddr = CreateTempAlloca();
+  Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
-      requiresMemSetZero(
-          getAtomicAddress()->getType()->getPointerElementType())) {
-    CGF.Builder.CreateAlignedStore(PHI, NewAtomicIntAddr,
-                                   getAtomicAlignment().getQuantity());
+      requiresMemSetZero(getAtomicAddress().getElementType())) {
+    CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
   }
   auto OldRVal = ConvertIntToValueOrAtomic(PHI, AggValueSlot::ignored(),
                                            SourceLocation(), /*AsValue=*/false);
   EmitAtomicUpdateValue(CGF, *this, OldRVal, UpdateOp, NewAtomicAddr);
-  auto *DesiredVal = CGF.Builder.CreateAlignedLoad(
-      NewAtomicIntAddr, getAtomicAlignment().getQuantity());
+  auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
   // Try to write new value using cmpxchg operation
   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
@@ -1536,23 +1623,25 @@ void AtomicInfo::EmitAtomicUpdateOp(
 }
 
 static void EmitAtomicUpdateValue(CodeGenFunction &CGF, AtomicInfo &Atomics,
-                                  RValue UpdateRVal, llvm::Value *DesiredAddr) {
+                                  RValue UpdateRVal, Address DesiredAddr) {
   LValue AtomicLVal = Atomics.getAtomicLValue();
   LValue DesiredLVal;
   // Build new lvalue for temp address
   if (AtomicLVal.isBitField()) {
     DesiredLVal =
         LValue::MakeBitfield(DesiredAddr, AtomicLVal.getBitFieldInfo(),
-                             AtomicLVal.getType(), AtomicLVal.getAlignment());
+                             AtomicLVal.getType(),
+                             AtomicLVal.getAlignmentSource());
   } else if (AtomicLVal.isVectorElt()) {
     DesiredLVal =
         LValue::MakeVectorElt(DesiredAddr, AtomicLVal.getVectorIdx(),
-                              AtomicLVal.getType(), AtomicLVal.getAlignment());
+                              AtomicLVal.getType(),
+                              AtomicLVal.getAlignmentSource());
   } else {
     assert(AtomicLVal.isExtVectorElt());
     DesiredLVal = LValue::MakeExtVectorElt(
         DesiredAddr, AtomicLVal.getExtVectorElts(), AtomicLVal.getType(),
-        AtomicLVal.getAlignment());
+        AtomicLVal.getAlignmentSource());
   }
   DesiredLVal.setTBAAInfo(AtomicLVal.getTBAAInfo());
   // Store new value in the corresponding memory area
@@ -1564,24 +1653,23 @@ void AtomicInfo::EmitAtomicUpdateLibcall(llvm::AtomicOrdering AO,
                                          RValue UpdateRVal, bool IsVolatile) {
   auto Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
 
-  llvm::Value *ExpectedAddr = CreateTempAlloca();
+  Address ExpectedAddr = CreateTempAlloca();
 
-  EmitAtomicLoadLibcall(ExpectedAddr, AO, IsVolatile);
+  EmitAtomicLoadLibcall(ExpectedAddr.getPointer(), AO, IsVolatile);
   auto *ContBB = CGF.createBasicBlock("atomic_cont");
   auto *ExitBB = CGF.createBasicBlock("atomic_exit");
   CGF.EmitBlock(ContBB);
-  auto *DesiredAddr = CreateTempAlloca();
+  Address DesiredAddr = CreateTempAlloca();
   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
-      requiresMemSetZero(
-          getAtomicAddress()->getType()->getPointerElementType())) {
-    auto *OldVal = CGF.Builder.CreateAlignedLoad(
-        ExpectedAddr, getAtomicAlignment().getQuantity());
-    CGF.Builder.CreateAlignedStore(OldVal, DesiredAddr,
-                                   getAtomicAlignment().getQuantity());
+      requiresMemSetZero(getAtomicAddress().getElementType())) {
+    auto *OldVal = CGF.Builder.CreateLoad(ExpectedAddr);
+    CGF.Builder.CreateStore(OldVal, DesiredAddr);
   }
   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, DesiredAddr);
   auto *Res =
-      EmitAtomicCompareExchangeLibcall(ExpectedAddr, DesiredAddr, AO, Failure);
+      EmitAtomicCompareExchangeLibcall(ExpectedAddr.getPointer(),
+                                       DesiredAddr.getPointer(),
+                                       AO, Failure);
   CGF.Builder.CreateCondBr(Res, ExitBB, ContBB);
   CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
 }
@@ -1600,17 +1688,14 @@ void AtomicInfo::EmitAtomicUpdateOp(llvm::AtomicOrdering AO, RValue UpdateRVal,
   llvm::PHINode *PHI = CGF.Builder.CreatePHI(OldVal->getType(),
                                              /*NumReservedValues=*/2);
   PHI->addIncoming(OldVal, CurBB);
-  auto *NewAtomicAddr = CreateTempAlloca();
-  auto *NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
+  Address NewAtomicAddr = CreateTempAlloca();
+  Address NewAtomicIntAddr = emitCastToAtomicIntPointer(NewAtomicAddr);
   if ((LVal.isBitField() && BFI.Size != ValueSizeInBits) ||
-      requiresMemSetZero(
-          getAtomicAddress()->getType()->getPointerElementType())) {
-    CGF.Builder.CreateAlignedStore(PHI, NewAtomicIntAddr,
-                                   getAtomicAlignment().getQuantity());
+      requiresMemSetZero(getAtomicAddress().getElementType())) {
+    CGF.Builder.CreateStore(PHI, NewAtomicIntAddr);
   }
   EmitAtomicUpdateValue(CGF, *this, UpdateRVal, NewAtomicAddr);
-  auto *DesiredVal = CGF.Builder.CreateAlignedLoad(
-      NewAtomicIntAddr, getAtomicAlignment().getQuantity());
+  auto *DesiredVal = CGF.Builder.CreateLoad(NewAtomicIntAddr);
   // Try to write new value using cmpxchg operation
   auto Res = EmitAtomicCompareExchangeOp(PHI, DesiredVal, AO, Failure);
   PHI->addIncoming(Res.first, CGF.Builder.GetInsertBlock());
@@ -1661,8 +1746,8 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
   // If this is an aggregate r-value, it should agree in type except
   // maybe for address-space qualification.
   assert(!rvalue.isAggregate() ||
-         rvalue.getAggregateAddr()->getType()->getPointerElementType()
-           == dest.getAddress()->getType()->getPointerElementType());
+         rvalue.getAggregateAddress().getElementType()
+           == dest.getAddress().getElementType());
 
   AtomicInfo atomics(*this, dest);
   LValue LVal = atomics.getAtomicLValue();
@@ -1677,15 +1762,16 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
     // Check whether we should use a library call.
     if (atomics.shouldUseLibcall()) {
       // Produce a source address.
-      llvm::Value *srcAddr = atomics.materializeRValue(rvalue);
+      Address srcAddr = atomics.materializeRValue(rvalue);
 
       // void __atomic_store(size_t size, void *mem, void *val, int order)
       CallArgList args;
       args.add(RValue::get(atomics.getAtomicSizeValue()),
                getContext().getSizeType());
-      args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicAddress())),
+      args.add(RValue::get(EmitCastToVoidPtr(atomics.getAtomicPointer())),
+               getContext().VoidPtrTy);
+      args.add(RValue::get(EmitCastToVoidPtr(srcAddr.getPointer())),
                getContext().VoidPtrTy);
-      args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), getContext().VoidPtrTy);
       args.add(RValue::get(llvm::ConstantInt::get(
                    IntTy, AtomicInfo::translateAtomicOrdering(AO))),
                getContext().IntTy);
@@ -1697,10 +1783,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
     llvm::Value *intValue = atomics.convertRValueToInt(rvalue);
 
     // Do the atomic store.
-    llvm::Value *addr =
+    Address addr =
         atomics.emitCastToAtomicIntPointer(atomics.getAtomicAddress());
     intValue = Builder.CreateIntCast(
-        intValue, addr->getType()->getPointerElementType(), /*isSigned=*/false);
+        intValue, addr.getElementType(), /*isSigned=*/false);
     llvm::StoreInst *store = Builder.CreateStore(intValue, addr);
 
     // Initializations don't need to be atomic.
@@ -1708,11 +1794,10 @@ void CodeGenFunction::EmitAtomicStore(RValue rvalue, LValue dest,
       store->setAtomic(AO);
 
     // Other decoration.
-    store->setAlignment(dest.getAlignment().getQuantity());
     if (IsVolatile)
       store->setVolatile(true);
     if (dest.getTBAAInfo())
-      CGM.DecorateInstruction(store, dest.getTBAAInfo());
+      CGM.DecorateInstructionWithTBAA(store, dest.getTBAAInfo());
     return;
   }
 
@@ -1729,11 +1814,11 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
   // If this is an aggregate r-value, it should agree in type except
   // maybe for address-space qualification.
   assert(!Expected.isAggregate() ||
-         Expected.getAggregateAddr()->getType()->getPointerElementType() ==
-             Obj.getAddress()->getType()->getPointerElementType());
+         Expected.getAggregateAddress().getElementType() ==
+             Obj.getAddress().getElementType());
   assert(!Desired.isAggregate() ||
-         Desired.getAggregateAddr()->getType()->getPointerElementType() ==
-             Obj.getAddress()->getType()->getPointerElementType());
+         Desired.getAggregateAddress().getElementType() ==
+             Obj.getAddress().getElementType());
   AtomicInfo Atomics(*this, Obj);
 
   return Atomics.EmitAtomicCompareExchange(Expected, Desired, Success, Failure,
diff --git a/lib/CodeGen/CGBlocks.cpp b/lib/CodeGen/CGBlocks.cpp
index 3fd344c389a5..ba2941e9df4a 100644
--- a/lib/CodeGen/CGBlocks.cpp
+++ b/lib/CodeGen/CGBlocks.cpp
@@ -1,4 +1,4 @@
-//===--- CGBlocks.cpp - Emit LLVM Code for declarations -------------------===//
+//===--- CGBlocks.cpp - Emit LLVM Code for declarations ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -30,7 +30,7 @@ using namespace CodeGen;
 CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
   : Name(name), CXXThisIndex(0), CanBeGlobal(false), NeedsCopyDispose(false),
     HasCXXObject(false), UsesStret(false), HasCapturedVariableLayout(false),
-    StructureType(nullptr), Block(block),
+    LocalAddress(Address::invalid()), StructureType(nullptr), Block(block),
     DominatingIP(nullptr) {
 
   // Skip asm prefix, if any.  'name' is usually taken directly from
@@ -40,7 +40,7 @@ CGBlockInfo::CGBlockInfo(const BlockDecl *block, StringRef name)
 }
 
 // Anchor the vtable to this translation unit.
-CodeGenModule::ByrefHelpers::~ByrefHelpers() {}
+BlockByrefHelpers::~BlockByrefHelpers() {}
 
 /// Build the given block as a global block.
 static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
@@ -78,7 +78,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
   ASTContext &C = CGM.getContext();
 
   llvm::Type *ulong = CGM.getTypes().ConvertType(C.UnsignedLongTy);
-  llvm::Type *i8p = NULL;
+  llvm::Type *i8p = nullptr;
   if (CGM.getLangOpts().OpenCL)
     i8p = 
       llvm::Type::getInt8PtrTy(
@@ -111,7 +111,7 @@ static llvm::Constant *buildBlockDescriptor(CodeGenModule &CGM,
   std::string typeAtEncoding =
     CGM.getContext().getObjCEncodingForBlock(blockInfo.getBlockExpr());
   elements.push_back(llvm::ConstantExpr::getBitCast(
-                          CGM.GetAddrOfConstantCString(typeAtEncoding), i8p));
+    CGM.GetAddrOfConstantCString(typeAtEncoding).getPointer(), i8p));
   
   // GC layout.
   if (C.getLangOpts().ObjC1) {
@@ -203,46 +203,36 @@ namespace {
         Capture(capture), Type(type) {}
 
     /// Tell the block info that this chunk has the given field index.
-    void setIndex(CGBlockInfo &info, unsigned index) {
-      if (!Capture)
+    void setIndex(CGBlockInfo &info, unsigned index, CharUnits offset) {
+      if (!Capture) {
         info.CXXThisIndex = index;
-      else
-        info.Captures[Capture->getVariable()]
-          = CGBlockInfo::Capture::makeIndex(index);
+        info.CXXThisOffset = offset;
+      } else {
+        info.Captures.insert({Capture->getVariable(),
+                              CGBlockInfo::Capture::makeIndex(index, offset)});
+      }
     }
   };
 
   /// Order by 1) all __strong together 2) next, all byfref together 3) next,
   /// all __weak together. Preserve descending alignment in all situations.
   bool operator<(const BlockLayoutChunk &left, const BlockLayoutChunk &right) {
-    CharUnits LeftValue, RightValue;
-    bool LeftByref = left.Capture ? left.Capture->isByRef() : false;
-    bool RightByref = right.Capture ? right.Capture->isByRef() : false;
-    
-    if (left.Lifetime == Qualifiers::OCL_Strong &&
-        left.Alignment >= right.Alignment)
-      LeftValue = CharUnits::fromQuantity(64);
-    else if (LeftByref && left.Alignment >= right.Alignment)
-      LeftValue = CharUnits::fromQuantity(32);
-    else if (left.Lifetime == Qualifiers::OCL_Weak &&
-             left.Alignment >= right.Alignment)
-      LeftValue = CharUnits::fromQuantity(16);
-    else
-      LeftValue = left.Alignment;
-    if (right.Lifetime == Qualifiers::OCL_Strong &&
-        right.Alignment >= left.Alignment)
-      RightValue = CharUnits::fromQuantity(64);
-    else if (RightByref && right.Alignment >= left.Alignment)
-      RightValue = CharUnits::fromQuantity(32);
-    else if (right.Lifetime == Qualifiers::OCL_Weak &&
-             right.Alignment >= left.Alignment)
-      RightValue = CharUnits::fromQuantity(16);
-    else
-      RightValue = right.Alignment;
-    
-      return LeftValue > RightValue;
+    if (left.Alignment != right.Alignment)
+      return left.Alignment > right.Alignment;
+
+    auto getPrefOrder = [](const BlockLayoutChunk &chunk) {
+      if (chunk.Capture && chunk.Capture->isByRef())
+        return 1;
+      if (chunk.Lifetime == Qualifiers::OCL_Strong)
+        return 0;
+      if (chunk.Lifetime == Qualifiers::OCL_Weak)
+        return 2;
+      return 3;
+    };
+
+    return getPrefOrder(left) < getPrefOrder(right);
   }
-}
+} // end anonymous namespace
 
 /// Determines if the given type is safe for constant capture in C++.
 static bool isSafeForCXXConstantCapture(QualType type) {
@@ -302,31 +292,20 @@ static CharUnits getLowBit(CharUnits v) {
 
 static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
                              SmallVectorImpl<llvm::Type*> &elementTypes) {
-  ASTContext &C = CGM.getContext();
+  // The header is basically 'struct { void *; int; int; void *; void *; }'.
+  // Assert that that struct is packed.
+  assert(CGM.getIntSize() <= CGM.getPointerSize());
+  assert(CGM.getIntAlign() <= CGM.getPointerAlign());
+  assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
 
-  // The header is basically a 'struct { void *; int; int; void *; void *; }'.
-  CharUnits ptrSize, ptrAlign, intSize, intAlign;
-  std::tie(ptrSize, ptrAlign) = C.getTypeInfoInChars(C.VoidPtrTy);
-  std::tie(intSize, intAlign) = C.getTypeInfoInChars(C.IntTy);
-
-  // Are there crazy embedded platforms where this isn't true?
-  assert(intSize <= ptrSize && "layout assumptions horribly violated");
-
-  CharUnits headerSize = ptrSize;
-  if (2 * intSize < ptrAlign) headerSize += ptrSize;
-  else headerSize += 2 * intSize;
-  headerSize += 2 * ptrSize;
-
-  info.BlockAlign = ptrAlign;
-  info.BlockSize = headerSize;
+  info.BlockAlign = CGM.getPointerAlign();
+  info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();
 
   assert(elementTypes.empty());
-  llvm::Type *i8p = CGM.getTypes().ConvertType(C.VoidPtrTy);
-  llvm::Type *intTy = CGM.getTypes().ConvertType(C.IntTy);
-  elementTypes.push_back(i8p);
-  elementTypes.push_back(intTy);
-  elementTypes.push_back(intTy);
-  elementTypes.push_back(i8p);
+  elementTypes.push_back(CGM.VoidPtrTy);
+  elementTypes.push_back(CGM.IntTy);
+  elementTypes.push_back(CGM.IntTy);
+  elementTypes.push_back(CGM.VoidPtrTy);
   elementTypes.push_back(CGM.getBlockDescriptorType());
 
   assert(elementTypes.size() == BlockHeaderSize);
@@ -365,6 +344,8 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
            "Can't capture 'this' outside a method");
     QualType thisType = cast<CXXMethodDecl>(CGF->CurFuncDecl)->getThisType(C);
 
+    // Theoretically, this could be in a different address space, so
+    // don't assume standard pointer size/align.
     llvm::Type *llvmType = CGM.getTypes().ConvertType(thisType);
     std::pair<CharUnits,CharUnits> tinfo
       = CGM.getContext().getTypeInfoInChars(thisType);
@@ -384,15 +365,12 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       info.NeedsCopyDispose = true;
 
       // Just use void* instead of a pointer to the byref type.
-      QualType byRefPtrTy = C.VoidPtrTy;
+      CharUnits align = CGM.getPointerAlign();
+      maxFieldAlign = std::max(maxFieldAlign, align);
 
-      llvm::Type *llvmType = CGM.getTypes().ConvertType(byRefPtrTy);
-      std::pair<CharUnits,CharUnits> tinfo
-        = CGM.getContext().getTypeInfoInChars(byRefPtrTy);
-      maxFieldAlign = std::max(maxFieldAlign, tinfo.second);
-
-      layout.push_back(BlockLayoutChunk(tinfo.second, tinfo.first,
-                                        Qualifiers::OCL_None, &CI, llvmType));
+      layout.push_back(BlockLayoutChunk(align, CGM.getPointerSize(),
+                                        Qualifiers::OCL_None, &CI,
+                                        CGM.VoidPtrTy));
       continue;
     }
 
@@ -421,9 +399,15 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
 
     // Block pointers require copy/dispose.  So do Objective-C pointers.
     } else if (variable->getType()->isObjCRetainableType()) {
-      info.NeedsCopyDispose = true;
-      // used for mrr below.
-      lifetime = Qualifiers::OCL_Strong;
+      // But honor the inert __unsafe_unretained qualifier, which doesn't
+      // actually make it into the type system.
+       if (variable->getType()->isObjCInertUnsafeUnretainedType()) {
+        lifetime = Qualifiers::OCL_ExplicitNone;
+      } else {
+        info.NeedsCopyDispose = true;
+        // used for mrr below.
+        lifetime = Qualifiers::OCL_Strong;
+      }
 
     // So do types that require non-trivial copy construction.
     } else if (CI.hasCopyExpr()) {
@@ -504,18 +488,13 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       for (; li != le; ++li) {
         assert(endAlign >= li->Alignment);
 
-        li->setIndex(info, elementTypes.size());
+        li->setIndex(info, elementTypes.size(), blockSize);
         elementTypes.push_back(li->Type);
         blockSize += li->Size;
         endAlign = getLowBit(blockSize);
 
         // ...until we get to the alignment of the maximum field.
         if (endAlign >= maxFieldAlign) {
-          if (li == first) {
-            // No user field was appended. So, a gap was added.
-            // Save total gap size for use in block layout bit map.
-            info.BlockHeaderForcedGapSize = li->Size;
-          }
           break;
         }
       }
@@ -532,6 +511,12 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
     CharUnits newBlockSize = blockSize.RoundUpToAlignment(maxFieldAlign);
     CharUnits padding = newBlockSize - blockSize;
 
+    // If we haven't yet added any fields, remember that there was an
+    // initial gap; this need to go into the block layout bit map.
+    if (blockSize == info.BlockHeaderForcedGapOffset) {
+      info.BlockHeaderForcedGapSize = padding;
+    }
+
     elementTypes.push_back(llvm::ArrayType::get(CGM.Int8Ty,
                                                 padding.getQuantity()));
     blockSize = newBlockSize;
@@ -556,7 +541,7 @@ static void computeBlockInfo(CodeGenModule &CGM, CodeGenFunction *CGF,
       endAlign = getLowBit(blockSize);
     }
     assert(endAlign >= li->Alignment);
-    li->setIndex(info, elementTypes.size());
+    li->setIndex(info, elementTypes.size(), blockSize);
     elementTypes.push_back(li->Type);
     blockSize += li->Size;
     endAlign = getLowBit(blockSize);
@@ -586,9 +571,8 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
   if (blockInfo.CanBeGlobal) return;
 
   // Make the allocation for the block.
-  blockInfo.Address =
-    CGF.CreateTempAlloca(blockInfo.StructureType, "block");
-  blockInfo.Address->setAlignment(blockInfo.BlockAlign.getQuantity());
+  blockInfo.LocalAddress = CGF.CreateTempAlloca(blockInfo.StructureType,
+                                                blockInfo.BlockAlign, "block");
 
   // If there are cleanups to emit, enter them (but inactive).
   if (!blockInfo.NeedsCopyDispose) return;
@@ -621,12 +605,13 @@ static void enterBlockScope(CodeGenFunction &CGF, BlockDecl *block) {
     }
 
     // GEP down to the address.
-    llvm::Value *addr = CGF.Builder.CreateStructGEP(
-        blockInfo.StructureType, blockInfo.Address, capture.getIndex());
+    Address addr = CGF.Builder.CreateStructGEP(blockInfo.LocalAddress,
+                                               capture.getIndex(),
+                                               capture.getOffset());
 
     // We can use that GEP as the dominating IP.
     if (!blockInfo.DominatingIP)
-      blockInfo.DominatingIP = cast<llvm::Instruction>(addr);
+      blockInfo.DominatingIP = cast<llvm::Instruction>(addr.getPointer());
 
     CleanupKind cleanupKind = InactiveNormalCleanup;
     bool useArrayEHCleanup = CGF.needsEHCleanup(dtorKind);
@@ -721,9 +706,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   // Build the block descriptor.
   llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo);
 
-  llvm::Type *blockTy = blockInfo.StructureType;
-  llvm::AllocaInst *blockAddr = blockInfo.Address;
-  assert(blockAddr && "block has no address!");
+  Address blockAddr = blockInfo.LocalAddress;
+  assert(blockAddr.isValid() && "block has no address!");
 
   // Compute the initial on-stack block flags.
   BlockFlags flags = BLOCK_HAS_SIGNATURE;
@@ -732,27 +716,44 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ;
   if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;
 
-  // Initialize the block literal.
-  Builder.CreateStore(
-      isa, Builder.CreateStructGEP(blockTy, blockAddr, 0, "block.isa"));
-  Builder.CreateStore(
-      llvm::ConstantInt::get(IntTy, flags.getBitMask()),
-      Builder.CreateStructGEP(blockTy, blockAddr, 1, "block.flags"));
-  Builder.CreateStore(
-      llvm::ConstantInt::get(IntTy, 0),
-      Builder.CreateStructGEP(blockTy, blockAddr, 2, "block.reserved"));
-  Builder.CreateStore(
-      blockFn, Builder.CreateStructGEP(blockTy, blockAddr, 3, "block.invoke"));
-  Builder.CreateStore(descriptor, Builder.CreateStructGEP(blockTy, blockAddr, 4,
-                                                          "block.descriptor"));
+  auto projectField =
+    [&](unsigned index, CharUnits offset, const Twine &name) -> Address {
+      return Builder.CreateStructGEP(blockAddr, index, offset, name);
+    };
+  auto storeField =
+    [&](llvm::Value *value, unsigned index, CharUnits offset,
+        const Twine &name) {
+      Builder.CreateStore(value, projectField(index, offset, name));
+    };
+
+  // Initialize the block header.
+  {
+    // We assume all the header fields are densely packed.
+    unsigned index = 0;
+    CharUnits offset;
+    auto addHeaderField =
+      [&](llvm::Value *value, CharUnits size, const Twine &name) {
+        storeField(value, index, offset, name);
+        offset += size;
+        index++;
+      };
+
+    addHeaderField(isa, getPointerSize(), "block.isa");
+    addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
+                   getIntSize(), "block.flags");
+    addHeaderField(llvm::ConstantInt::get(IntTy, 0),
+                   getIntSize(), "block.reserved");
+    addHeaderField(blockFn, getPointerSize(), "block.invoke");
+    addHeaderField(descriptor, getPointerSize(), "block.descriptor");
+  }
 
   // Finally, capture all the values into the block.
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
 
   // First, 'this'.
   if (blockDecl->capturesCXXThis()) {
-    llvm::Value *addr = Builder.CreateStructGEP(
-        blockTy, blockAddr, blockInfo.CXXThisIndex, "block.captured-this.addr");
+    Address addr = projectField(blockInfo.CXXThisIndex, blockInfo.CXXThisOffset,
+                                "block.captured-this.addr");
     Builder.CreateStore(LoadCXXThis(), addr);
   }
 
@@ -765,35 +766,37 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     if (capture.isConstant()) continue;
 
     QualType type = variable->getType();
-    CharUnits align = getContext().getDeclAlign(variable);
 
     // This will be a [[type]]*, except that a byref entry will just be
     // an i8**.
-    llvm::Value *blockField = Builder.CreateStructGEP(
-        blockTy, blockAddr, capture.getIndex(), "block.captured");
+    Address blockField =
+      projectField(capture.getIndex(), capture.getOffset(), "block.captured");
 
     // Compute the address of the thing we're going to move into the
     // block literal.
-    llvm::Value *src;
+    Address src = Address::invalid();
     if (BlockInfo && CI.isNested()) {
       // We need to use the capture from the enclosing block.
       const CGBlockInfo::Capture &enclosingCapture =
         BlockInfo->getCapture(variable);
 
       // This is a [[type]]*, except that a byref entry wil just be an i8**.
-      src = Builder.CreateStructGEP(BlockInfo->StructureType, LoadBlockStruct(),
+      src = Builder.CreateStructGEP(LoadBlockStruct(),
                                     enclosingCapture.getIndex(),
+                                    enclosingCapture.getOffset(),
                                     "block.capture.addr");
     } else if (blockDecl->isConversionFromLambda()) {
       // The lambda capture in a lambda's conversion-to-block-pointer is
       // special; we'll simply emit it directly.
-      src = nullptr;
+      src = Address::invalid();
     } else {
       // Just look it up in the locals map, which will give us back a
       // [[type]]*.  If that doesn't work, do the more elaborate DRE
       // emission.
-      src = LocalDeclMap.lookup(variable);
-      if (!src) {
+      auto it = LocalDeclMap.find(variable);
+      if (it != LocalDeclMap.end()) {
+        src = it->second;
+      } else {
         DeclRefExpr declRef(
             const_cast<VarDecl *>(variable),
             /*RefersToEnclosingVariableOrCapture*/ CI.isNested(), type,
@@ -808,14 +811,14 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     // live a shorter life than the stack byref anyway.
     if (CI.isByRef()) {
       // Get a void* that points to the byref struct.
+      llvm::Value *byrefPointer;
       if (CI.isNested())
-        src = Builder.CreateAlignedLoad(src, align.getQuantity(),
-                                        "byref.capture");
+        byrefPointer = Builder.CreateLoad(src, "byref.capture");
       else
-        src = Builder.CreateBitCast(src, VoidPtrTy);
+        byrefPointer = Builder.CreateBitCast(src.getPointer(), VoidPtrTy);
 
       // Write that void* into the capture field.
-      Builder.CreateAlignedStore(src, blockField, align.getQuantity());
+      Builder.CreateStore(byrefPointer, blockField);
 
     // If we have a copy constructor, evaluate that into the block field.
     } else if (const Expr *copyExpr = CI.getCopyExpr()) {
@@ -823,7 +826,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
         // If we have a lambda conversion, emit the expression
         // directly into the block instead.
         AggValueSlot Slot =
-            AggValueSlot::forAddr(blockField, align, Qualifiers(),
+            AggValueSlot::forAddr(blockField, Qualifiers(),
                                   AggValueSlot::IsDestructed,
                                   AggValueSlot::DoesNotNeedGCBarriers,
                                   AggValueSlot::IsNotAliased);
@@ -834,9 +837,8 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
 
     // If it's a reference variable, copy the reference into the block field.
     } else if (type->isReferenceType()) {
-      llvm::Value *ref =
-        Builder.CreateAlignedLoad(src, align.getQuantity(), "ref.val");
-      Builder.CreateAlignedStore(ref, blockField, align.getQuantity());
+      llvm::Value *ref = Builder.CreateLoad(src, "ref.val");
+      Builder.CreateStore(ref, blockField);
 
     // If this is an ARC __strong block-pointer variable, don't do a
     // block copy.
@@ -848,13 +850,11 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
     } else if (type.getObjCLifetime() == Qualifiers::OCL_Strong &&
                type->isBlockPointerType()) {
       // Load the block and do a simple retain.
-      LValue srcLV = MakeAddrLValue(src, type, align);
-      llvm::Value *value = EmitLoadOfScalar(srcLV, SourceLocation());
+      llvm::Value *value = Builder.CreateLoad(src, "block.captured_block");
       value = EmitARCRetainNonBlock(value);
 
       // Do a primitive store to the block field.
-      LValue destLV = MakeAddrLValue(blockField, type, align);
-      EmitStoreOfScalar(value, destLV, /*init*/ true);
+      Builder.CreateStore(value, blockField);
 
     // Otherwise, fake up a POD copy into the block field.
     } else {
@@ -876,7 +876,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
       // attributed to a reasonable location - otherwise it may be attributed to
       // locations of subexpressions in the initialization.
       EmitExprAsInit(&l2r, &blockFieldPseudoVar,
-                     MakeAddrLValue(blockField, type, align),
+                     MakeAddrLValue(blockField, type, AlignmentSource::Decl),
                      /*captured by init*/ false);
     }
 
@@ -891,7 +891,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
   // Cast to the converted block-pointer type, which happens (somewhat
   // unfortunately) to be a pointer to function type.
   llvm::Value *result =
-    Builder.CreateBitCast(blockAddr,
+    Builder.CreateBitCast(blockAddr.getPointer(),
                           ConvertType(blockInfo.getBlockExpr()->getType()));
 
   return result;
@@ -949,7 +949,6 @@ llvm::Type *CodeGenModule::getGenericBlockLiteralType() {
   return GenericBlockLiteralType;
 }
 
-
 RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E, 
                                           ReturnValueSlot ReturnValue) {
   const BlockPointerType *BPT =
@@ -966,8 +965,8 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
     Builder.CreateBitCast(Callee, BlockLiteralTy, "block.literal");
 
   // Get the function pointer from the literal.
-  llvm::Value *FuncPtr = Builder.CreateStructGEP(
-      CGM.getGenericBlockLiteralType(), BlockLiteral, 3);
+  llvm::Value *FuncPtr =
+    Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockLiteral, 3);
 
   BlockLiteral = Builder.CreateBitCast(BlockLiteral, VoidPtrTy);
 
@@ -978,11 +977,10 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
   QualType FnType = BPT->getPointeeType();
 
   // And the rest of the arguments.
-  EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(),
-               E->arg_begin(), E->arg_end());
+  EmitCallArgs(Args, FnType->getAs<FunctionProtoType>(), E->arguments());
 
   // Load the function.
-  llvm::Value *Func = Builder.CreateLoad(FuncPtr);
+  llvm::Value *Func = Builder.CreateAlignedLoad(FuncPtr, getPointerAlign());
 
   const FunctionType *FuncTy = FnType->castAs<FunctionType>();
   const CGFunctionInfo &FnInfo =
@@ -998,41 +996,35 @@ RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
   return EmitCall(FnInfo, Func, ReturnValue, Args);
 }
 
-llvm::Value *CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
-                                                 bool isByRef) {
+Address CodeGenFunction::GetAddrOfBlockDecl(const VarDecl *variable,
+                                            bool isByRef) {
   assert(BlockInfo && "evaluating block ref without block information?");
   const CGBlockInfo::Capture &capture = BlockInfo->getCapture(variable);
 
   // Handle constant captures.
-  if (capture.isConstant()) return LocalDeclMap[variable];
+  if (capture.isConstant()) return LocalDeclMap.find(variable)->second;
 
-  llvm::Value *addr =
-      Builder.CreateStructGEP(BlockInfo->StructureType, LoadBlockStruct(),
-                              capture.getIndex(), "block.capture.addr");
+  Address addr =
+    Builder.CreateStructGEP(LoadBlockStruct(), capture.getIndex(),
+                            capture.getOffset(), "block.capture.addr");
 
   if (isByRef) {
     // addr should be a void** right now.  Load, then cast the result
     // to byref*.
 
-    addr = Builder.CreateLoad(addr);
-    auto *byrefType = BuildByRefType(variable);
-    llvm::PointerType *byrefPointerType = llvm::PointerType::get(byrefType, 0);
-    addr = Builder.CreateBitCast(addr, byrefPointerType,
-                                 "byref.addr");
+    auto &byrefInfo = getBlockByrefInfo(variable);
+    addr = Address(Builder.CreateLoad(addr), byrefInfo.ByrefAlignment);
 
-    // Follow the forwarding pointer.
-    addr = Builder.CreateStructGEP(byrefType, addr, 1, "byref.forwarding");
-    addr = Builder.CreateLoad(addr, "byref.addr.forwarded");
+    auto byrefPointerType = llvm::PointerType::get(byrefInfo.Type, 0);
+    addr = Builder.CreateBitCast(addr, byrefPointerType, "byref.addr");
 
-    // Cast back to byref* and GEP over to the actual object.
-    addr = Builder.CreateBitCast(addr, byrefPointerType);
-    addr = Builder.CreateStructGEP(byrefType, addr,
-                                   getByRefValueLLVMField(variable).second,
-                                   variable->getNameAsString());
+    addr = emitBlockByrefAddress(addr, byrefInfo, /*follow*/ true,
+                                 variable->getName());
   }
 
-  if (variable->getType()->isReferenceType())
-    addr = Builder.CreateLoad(addr, "ref.tmp");
+  if (auto refType = variable->getType()->getAs<ReferenceType>()) {
+    addr = EmitLoadOfReference(addr, refType);
+  }
 
   return addr;
 }
@@ -1049,7 +1041,7 @@ CodeGenModule::GetAddrOfGlobalBlock(const BlockExpr *blockExpr,
   // Using that metadata, generate the actual block function.
   llvm::Constant *blockFn;
   {
-    llvm::DenseMap<const Decl*, llvm::Value*> LocalDeclMap;
+    CodeGenFunction::DeclMapTy LocalDeclMap;
     blockFn = CodeGenFunction(*this).GenerateBlockFunction(GlobalDecl(),
                                                            blockInfo,
                                                            LocalDeclMap,
@@ -1103,6 +1095,44 @@ static llvm::Constant *buildGlobalBlock(CodeGenModule &CGM,
   return llvm::ConstantExpr::getBitCast(literal, requiredType);
 }
 
+void CodeGenFunction::setBlockContextParameter(const ImplicitParamDecl *D,
+                                               unsigned argNum,
+                                               llvm::Value *arg) {
+  assert(BlockInfo && "not emitting prologue of block invocation function?!");
+
+  llvm::Value *localAddr = nullptr;
+  if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+    // Allocate a stack slot to let the debug info survive the RA.
+    Address alloc = CreateMemTemp(D->getType(), D->getName() + ".addr");
+    Builder.CreateStore(arg, alloc);
+    localAddr = Builder.CreateLoad(alloc);
+  }
+
+  if (CGDebugInfo *DI = getDebugInfo()) {
+    if (CGM.getCodeGenOpts().getDebugInfo()
+          >= CodeGenOptions::LimitedDebugInfo) {
+      DI->setLocation(D->getLocation());
+      DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, arg, argNum,
+                                               localAddr, Builder);
+    }
+  }
+
+  SourceLocation StartLoc = BlockInfo->getBlockExpr()->getBody()->getLocStart();
+  ApplyDebugLocation Scope(*this, StartLoc);
+
+  // Instead of messing around with LocalDeclMap, just set the value
+  // directly as BlockPointer.
+  BlockPointer = Builder.CreateBitCast(arg,
+                                       BlockInfo->StructureType->getPointerTo(),
+                                       "block");
+}
+
+Address CodeGenFunction::LoadBlockStruct() {
+  assert(BlockInfo && "not in a block invocation function!");
+  assert(BlockPointer && "no block pointer set!");
+  return Address(BlockPointer, BlockInfo->BlockAlign);
+}
+
 llvm::Function *
 CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
                                        const CGBlockInfo &blockInfo,
@@ -1122,7 +1152,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   for (DeclMapTy::const_iterator i = ldm.begin(), e = ldm.end(); i != e; ++i) {
     const auto *var = dyn_cast<VarDecl>(i->first);
     if (var && !var->hasLocalStorage())
-      LocalDeclMap[var] = i->second;
+      setAddrOfLocalVar(var, i->second);
   }
 
   // Begin building the function declaration.
@@ -1163,35 +1193,28 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
                 blockInfo.getBlockExpr()->getBody()->getLocStart());
 
   // Okay.  Undo some of what StartFunction did.
-  
-  // Pull the 'self' reference out of the local decl map.
-  llvm::Value *blockAddr = LocalDeclMap[&selfDecl];
-  LocalDeclMap.erase(&selfDecl);
-  BlockPointer = Builder.CreateBitCast(blockAddr,
-                                       blockInfo.StructureType->getPointerTo(),
-                                       "block");
+
   // At -O0 we generate an explicit alloca for the BlockPointer, so the RA
   // won't delete the dbg.declare intrinsics for captured variables.
   llvm::Value *BlockPointerDbgLoc = BlockPointer;
   if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
     // Allocate a stack slot for it, so we can point the debugger to it
-    llvm::AllocaInst *Alloca = CreateTempAlloca(BlockPointer->getType(),
-                                                "block.addr");
-    unsigned Align = getContext().getDeclAlign(&selfDecl).getQuantity();
-    Alloca->setAlignment(Align);
+    Address Alloca = CreateTempAlloca(BlockPointer->getType(),
+                                      getPointerAlign(),
+                                      "block.addr");
     // Set the DebugLocation to empty, so the store is recognized as a
     // frame setup instruction by llvm::DwarfDebug::beginFunction().
     auto NL = ApplyDebugLocation::CreateEmpty(*this);
-    Builder.CreateAlignedStore(BlockPointer, Alloca, Align);
-    BlockPointerDbgLoc = Alloca;
+    Builder.CreateStore(BlockPointer, Alloca);
+    BlockPointerDbgLoc = Alloca.getPointer();
   }
 
   // If we have a C++ 'this' reference, go ahead and force it into
   // existence now.
   if (blockDecl->capturesCXXThis()) {
-    llvm::Value *addr =
-        Builder.CreateStructGEP(blockInfo.StructureType, BlockPointer,
-                                blockInfo.CXXThisIndex, "block.captured-this");
+    Address addr =
+      Builder.CreateStructGEP(LoadBlockStruct(), blockInfo.CXXThisIndex,
+                              blockInfo.CXXThisOffset, "block.captured-this");
     CXXThisValue = Builder.CreateLoad(addr, "this");
   }
 
@@ -1201,15 +1224,13 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
     const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
     if (!capture.isConstant()) continue;
 
-    unsigned align = getContext().getDeclAlign(variable).getQuantity();
+    CharUnits align = getContext().getDeclAlign(variable);
+    Address alloca =
+      CreateMemTemp(variable->getType(), align, "block.captured-const");
 
-    llvm::AllocaInst *alloca =
-      CreateMemTemp(variable->getType(), "block.captured-const");
-    alloca->setAlignment(align);
+    Builder.CreateStore(capture.getConstant(), alloca);
 
-    Builder.CreateAlignedStore(capture.getConstant(), alloca, align);
-
-    LocalDeclMap[variable] = alloca;
+    setAddrOfLocalVar(variable, alloca);
   }
 
   // Save a spot to insert the debug information for all the DeclRefExprs.
@@ -1220,7 +1241,7 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
   if (IsLambdaConversionToBlock)
     EmitLambdaBlockInvokeBody();
   else {
-    PGO.assignRegionCounters(blockDecl, fn);
+    PGO.assignRegionCounters(GlobalDecl(blockDecl), fn);
     incrementProfileCounter(blockDecl->getBody());
     EmitStmt(blockDecl->getBody());
   }
@@ -1243,15 +1264,15 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
             >= CodeGenOptions::LimitedDebugInfo) {
         const CGBlockInfo::Capture &capture = blockInfo.getCapture(variable);
         if (capture.isConstant()) {
-          DI->EmitDeclareOfAutoVariable(variable, LocalDeclMap[variable],
+          auto addr = LocalDeclMap.find(variable)->second;
+          DI->EmitDeclareOfAutoVariable(variable, addr.getPointer(),
                                         Builder);
           continue;
         }
 
-        DI->EmitDeclareOfBlockDeclRefVariable(variable, BlockPointerDbgLoc,
-                                              Builder, blockInfo,
-                                              entry_ptr == entry->end()
-                                              ? nullptr : entry_ptr);
+        DI->EmitDeclareOfBlockDeclRefVariable(
+            variable, BlockPointerDbgLoc, Builder, blockInfo,
+            entry_ptr == entry->end() ? nullptr : &*entry_ptr);
       }
     }
     // Recover location if it was changed in the above loop.
@@ -1288,7 +1309,6 @@ CodeGenFunction::GenerateBlockFunction(GlobalDecl GD,
     }
  */
 
-
 /// Generate the copy-helper function for a block closure object:
 ///   static void block_copy_helper(block_t *dst, block_t *src);
 /// The runtime will have previously initialized 'dst' by doing a
@@ -1330,18 +1350,21 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
                                           nullptr, SC_Static,
                                           false,
                                           false);
+
+  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   // Create a scope with an artificial location for the body of this function.
   auto AL = ApplyDebugLocation::CreateArtificial(*this);
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
-  llvm::Value *src = GetAddrOfLocalVar(&srcDecl);
-  src = Builder.CreateLoad(src);
+  Address src = GetAddrOfLocalVar(&srcDecl);
+  src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
   src = Builder.CreateBitCast(src, structPtrTy, "block.source");
 
-  llvm::Value *dst = GetAddrOfLocalVar(&dstDecl);
-  dst = Builder.CreateLoad(dst);
+  Address dst = GetAddrOfLocalVar(&dstDecl);
+  dst = Address(Builder.CreateLoad(dst), blockInfo.BlockAlign);
   dst = Builder.CreateBitCast(dst, structPtrTy, "block.dest");
 
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
@@ -1375,40 +1398,38 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
         flags = BLOCK_FIELD_IS_BLOCK;
 
       // Special rules for ARC captures:
-      if (getLangOpts().ObjCAutoRefCount) {
-        Qualifiers qs = type.getQualifiers();
+      Qualifiers qs = type.getQualifiers();
 
-        // We need to register __weak direct captures with the runtime.
-        if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
-          useARCWeakCopy = true;
+      // We need to register __weak direct captures with the runtime.
+      if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
+        useARCWeakCopy = true;
 
-        // We need to retain the copied value for __strong direct captures.
-        } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
-          // If it's a block pointer, we have to copy the block and
-          // assign that to the destination pointer, so we might as
-          // well use _Block_object_assign.  Otherwise we can avoid that.
-          if (!isBlockPointer)
-            useARCStrongCopy = true;
-
-        // Otherwise the memcpy is fine.
-        } else {
-          continue;
-        }
+      // We need to retain the copied value for __strong direct captures.
+      } else if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
+        // If it's a block pointer, we have to copy the block and
+        // assign that to the destination pointer, so we might as
+        // well use _Block_object_assign.  Otherwise we can avoid that.
+        if (!isBlockPointer)
+          useARCStrongCopy = true;
 
       // Non-ARC captures of retainable pointers are strong and
       // therefore require a call to _Block_object_assign.
-      } else {
+      } else if (!qs.getObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
         // fall through
+
+      // Otherwise the memcpy is fine.
+      } else {
+        continue;
       }
+
+    // For all other types, the memcpy is fine.
     } else {
       continue;
     }
 
     unsigned index = capture.getIndex();
-    llvm::Value *srcField =
-        Builder.CreateStructGEP(blockInfo.StructureType, src, index);
-    llvm::Value *dstField =
-        Builder.CreateStructGEP(blockInfo.StructureType, dst, index);
+    Address srcField = Builder.CreateStructGEP(src, index, capture.getOffset());
+    Address dstField = Builder.CreateStructGEP(dst, index, capture.getOffset());
 
     // If there's an explicit copy expression, we do that.
     if (copyExpr) {
@@ -1435,11 +1456,12 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) {
 
           // We don't need this anymore, so kill it.  It's not quite
           // worth the annoyance to avoid creating it in the first place.
-          cast<llvm::Instruction>(dstField)->eraseFromParent();
+          cast<llvm::Instruction>(dstField.getPointer())->eraseFromParent();
         }
       } else {
         srcValue = Builder.CreateBitCast(srcValue, VoidPtrTy);
-        llvm::Value *dstAddr = Builder.CreateBitCast(dstField, VoidPtrTy);
+        llvm::Value *dstAddr =
+          Builder.CreateBitCast(dstField.getPointer(), VoidPtrTy);
         llvm::Value *args[] = {
           dstAddr, srcValue, llvm::ConstantInt::get(Int32Ty, flags.getBitMask())
         };
@@ -1502,6 +1524,9 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
                                           SourceLocation(), II, C.VoidTy,
                                           nullptr, SC_Static,
                                           false, false);
+
+  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   // Create a scope with an artificial location for the body of this function.
   auto NL = ApplyDebugLocation::CreateEmpty(*this);
   StartFunction(FD, C.VoidTy, Fn, FI, args);
@@ -1509,8 +1534,8 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 
   llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo();
 
-  llvm::Value *src = GetAddrOfLocalVar(&srcDecl);
-  src = Builder.CreateLoad(src);
+  Address src = GetAddrOfLocalVar(&srcDecl);
+  src = Address(Builder.CreateLoad(src), blockInfo.BlockAlign);
   src = Builder.CreateBitCast(src, structPtrTy, "block");
 
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
@@ -1544,29 +1569,31 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
         flags = BLOCK_FIELD_IS_BLOCK;
 
       // Special rules for ARC captures.
-      if (getLangOpts().ObjCAutoRefCount) {
-        Qualifiers qs = type.getQualifiers();
+      Qualifiers qs = type.getQualifiers();
 
-        // Don't generate special dispose logic for a captured object
-        // unless it's __strong or __weak.
-        if (!qs.hasStrongOrWeakObjCLifetime())
-          continue;
+      // Use objc_storeStrong for __strong direct captures; the
+      // dynamic tools really like it when we do this.
+      if (qs.getObjCLifetime() == Qualifiers::OCL_Strong) {
+        useARCStrongDestroy = true;
 
-        // Support __weak direct captures.
-        if (qs.getObjCLifetime() == Qualifiers::OCL_Weak)
-          useARCWeakDestroy = true;
+      // Support __weak direct captures.
+      } else if (qs.getObjCLifetime() == Qualifiers::OCL_Weak) {
+        useARCWeakDestroy = true;
 
-        // Tools really want us to use objc_storeStrong here.
-        else
-          useARCStrongDestroy = true;
+      // Non-ARC captures are strong, and we need to use _Block_object_dispose.
+      } else if (!qs.hasObjCLifetime() && !getLangOpts().ObjCAutoRefCount) {
+        // fall through
+
+      // Otherwise, we have nothing to do.
+      } else {
+        continue;
       }
     } else {
       continue;
     }
 
-    unsigned index = capture.getIndex();
-    llvm::Value *srcField =
-        Builder.CreateStructGEP(blockInfo.StructureType, src, index);
+    Address srcField =
+      Builder.CreateStructGEP(src, capture.getIndex(), capture.getOffset());
 
     // If there's an explicit copy expression, we do that.
     if (dtor) {
@@ -1600,15 +1627,15 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) {
 namespace {
 
 /// Emits the copy/dispose helper functions for a __block object of id type.
-class ObjectByrefHelpers : public CodeGenModule::ByrefHelpers {
+class ObjectByrefHelpers final : public BlockByrefHelpers {
   BlockFieldFlags Flags;
 
 public:
   ObjectByrefHelpers(CharUnits alignment, BlockFieldFlags flags)
-    : ByrefHelpers(alignment), Flags(flags) {}
+    : BlockByrefHelpers(alignment), Flags(flags) {}
 
-  void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) override {
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
     destField = CGF.Builder.CreateBitCast(destField, CGF.VoidPtrTy);
 
     srcField = CGF.Builder.CreateBitCast(srcField, CGF.VoidPtrPtrTy);
@@ -1619,11 +1646,11 @@ public:
     llvm::Value *flagsVal = llvm::ConstantInt::get(CGF.Int32Ty, flags);
     llvm::Value *fn = CGF.CGM.getBlockObjectAssign();
 
-    llvm::Value *args[] = { destField, srcValue, flagsVal };
+    llvm::Value *args[] = { destField.getPointer(), srcValue, flagsVal };
     CGF.EmitNounwindRuntimeCall(fn, args);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
     field = CGF.Builder.CreateBitCast(field, CGF.Int8PtrTy->getPointerTo(0));
     llvm::Value *value = CGF.Builder.CreateLoad(field);
 
@@ -1636,16 +1663,16 @@ public:
 };
 
 /// Emits the copy/dispose helpers for an ARC __block __weak variable.
-class ARCWeakByrefHelpers : public CodeGenModule::ByrefHelpers {
+class ARCWeakByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCWeakByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
+  ARCWeakByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
 
-  void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) override {
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
     CGF.EmitARCMoveWeak(destField, srcField);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
     CGF.EmitARCDestroyWeak(field);
   }
 
@@ -1657,36 +1684,31 @@ public:
 
 /// Emits the copy/dispose helpers for an ARC __block __strong variable
 /// that's not of block-pointer type.
-class ARCStrongByrefHelpers : public CodeGenModule::ByrefHelpers {
+class ARCStrongByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
+  ARCStrongByrefHelpers(CharUnits alignment) : BlockByrefHelpers(alignment) {}
 
-  void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) override {
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
     // Do a "move" by copying the value and then zeroing out the old
     // variable.
 
-    llvm::LoadInst *value = CGF.Builder.CreateLoad(srcField);
-    value->setAlignment(Alignment.getQuantity());
+    llvm::Value *value = CGF.Builder.CreateLoad(srcField);
     
     llvm::Value *null =
       llvm::ConstantPointerNull::get(cast<llvm::PointerType>(value->getType()));
 
     if (CGF.CGM.getCodeGenOpts().OptimizationLevel == 0) {
-      llvm::StoreInst *store = CGF.Builder.CreateStore(null, destField);
-      store->setAlignment(Alignment.getQuantity());
+      CGF.Builder.CreateStore(null, destField);
       CGF.EmitARCStoreStrongCall(destField, value, /*ignored*/ true);
       CGF.EmitARCStoreStrongCall(srcField, null, /*ignored*/ true);
       return;
     }
-    llvm::StoreInst *store = CGF.Builder.CreateStore(value, destField);
-    store->setAlignment(Alignment.getQuantity());
-
-    store = CGF.Builder.CreateStore(null, srcField);
-    store->setAlignment(Alignment.getQuantity());
+    CGF.Builder.CreateStore(value, destField);
+    CGF.Builder.CreateStore(null, srcField);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
     CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
@@ -1698,25 +1720,22 @@ public:
 
 /// Emits the copy/dispose helpers for an ARC __block __strong
 /// variable that's of block-pointer type.
-class ARCStrongBlockByrefHelpers : public CodeGenModule::ByrefHelpers {
+class ARCStrongBlockByrefHelpers final : public BlockByrefHelpers {
 public:
-  ARCStrongBlockByrefHelpers(CharUnits alignment) : ByrefHelpers(alignment) {}
+  ARCStrongBlockByrefHelpers(CharUnits alignment)
+    : BlockByrefHelpers(alignment) {}
 
-  void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) override {
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
     // Do the copy with objc_retainBlock; that's all that
     // _Block_object_assign would do anyway, and we'd have to pass the
     // right arguments to make sure it doesn't get no-op'ed.
-    llvm::LoadInst *oldValue = CGF.Builder.CreateLoad(srcField);
-    oldValue->setAlignment(Alignment.getQuantity());
-
+    llvm::Value *oldValue = CGF.Builder.CreateLoad(srcField);
     llvm::Value *copy = CGF.EmitARCRetainBlock(oldValue, /*mandatory*/ true);
-
-    llvm::StoreInst *store = CGF.Builder.CreateStore(copy, destField);
-    store->setAlignment(Alignment.getQuantity());
+    CGF.Builder.CreateStore(copy, destField);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
     CGF.EmitARCDestroyStrong(field, ARCImpreciseLifetime);
   }
 
@@ -1728,23 +1747,23 @@ public:
 
 /// Emits the copy/dispose helpers for a __block variable with a
 /// nontrivial copy constructor or destructor.
-class CXXByrefHelpers : public CodeGenModule::ByrefHelpers {
+class CXXByrefHelpers final : public BlockByrefHelpers {
   QualType VarType;
   const Expr *CopyExpr;
 
 public:
   CXXByrefHelpers(CharUnits alignment, QualType type,
                   const Expr *copyExpr)
-    : ByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
+    : BlockByrefHelpers(alignment), VarType(type), CopyExpr(copyExpr) {}
 
   bool needsCopy() const override { return CopyExpr != nullptr; }
-  void emitCopy(CodeGenFunction &CGF, llvm::Value *destField,
-                llvm::Value *srcField) override {
+  void emitCopy(CodeGenFunction &CGF, Address destField,
+                Address srcField) override {
     if (!CopyExpr) return;
     CGF.EmitSynthesizedCXXCopyCtor(destField, srcField, CopyExpr);
   }
 
-  void emitDispose(CodeGenFunction &CGF, llvm::Value *field) override {
+  void emitDispose(CodeGenFunction &CGF, Address field) override {
     EHScopeStack::stable_iterator cleanupDepth = CGF.EHStack.stable_begin();
     CGF.PushDestructorCleanup(VarType, field);
     CGF.PopCleanupBlocks(cleanupDepth);
@@ -1757,10 +1776,8 @@ public:
 } // end anonymous namespace
 
 static llvm::Constant *
-generateByrefCopyHelper(CodeGenFunction &CGF,
-                        llvm::StructType &byrefType,
-                        unsigned valueFieldIndex,
-                        CodeGenModule::ByrefHelpers &byrefInfo) {
+generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo,
+                        BlockByrefHelpers &generator) {
   ASTContext &Context = CGF.getContext();
 
   QualType R = Context.VoidTy;
@@ -1777,8 +1794,7 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
   const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
       R, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
-  CodeGenTypes &Types = CGF.CGM.getTypes();
-  llvm::FunctionType *LTy = Types.GetFunctionType(FI);
+  llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
   // FIXME: We'd like to put these into a mergable by content, with
   // internal linkage.
@@ -1796,26 +1812,30 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
                                           SC_Static,
                                           false, false);
 
+  CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   CGF.StartFunction(FD, R, Fn, FI, args);
 
-  if (byrefInfo.needsCopy()) {
-    llvm::Type *byrefPtrType = byrefType.getPointerTo(0);
+  if (generator.needsCopy()) {
+    llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0);
 
     // dst->x
-    llvm::Value *destField = CGF.GetAddrOfLocalVar(&dst);
-    destField = CGF.Builder.CreateLoad(destField);
+    Address destField = CGF.GetAddrOfLocalVar(&dst);
+    destField = Address(CGF.Builder.CreateLoad(destField),
+                        byrefInfo.ByrefAlignment);
     destField = CGF.Builder.CreateBitCast(destField, byrefPtrType);
-    destField = CGF.Builder.CreateStructGEP(&byrefType, destField,
-                                            valueFieldIndex, "x");
+    destField = CGF.emitBlockByrefAddress(destField, byrefInfo, false,
+                                          "dest-object");
 
     // src->x
-    llvm::Value *srcField = CGF.GetAddrOfLocalVar(&src);
-    srcField = CGF.Builder.CreateLoad(srcField);
+    Address srcField = CGF.GetAddrOfLocalVar(&src);
+    srcField = Address(CGF.Builder.CreateLoad(srcField),
+                       byrefInfo.ByrefAlignment);
     srcField = CGF.Builder.CreateBitCast(srcField, byrefPtrType);
-    srcField =
-        CGF.Builder.CreateStructGEP(&byrefType, srcField, valueFieldIndex, "x");
+    srcField = CGF.emitBlockByrefAddress(srcField, byrefInfo, false,
+                                         "src-object");
 
-    byrefInfo.emitCopy(CGF, destField, srcField);
+    generator.emitCopy(CGF, destField, srcField);
   }  
 
   CGF.FinishFunction();
@@ -1825,19 +1845,17 @@ generateByrefCopyHelper(CodeGenFunction &CGF,
 
 /// Build the copy helper for a __block variable.
 static llvm::Constant *buildByrefCopyHelper(CodeGenModule &CGM,
-                                            llvm::StructType &byrefType,
-                                            unsigned byrefValueIndex,
-                                            CodeGenModule::ByrefHelpers &info) {
+                                            const BlockByrefInfo &byrefInfo,
+                                            BlockByrefHelpers &generator) {
   CodeGenFunction CGF(CGM);
-  return generateByrefCopyHelper(CGF, byrefType, byrefValueIndex, info);
+  return generateByrefCopyHelper(CGF, byrefInfo, generator);
 }
 
 /// Generate code for a __block variable's dispose helper.
 static llvm::Constant *
 generateByrefDisposeHelper(CodeGenFunction &CGF,
-                           llvm::StructType &byrefType,
-                           unsigned byrefValueIndex,
-                           CodeGenModule::ByrefHelpers &byrefInfo) {
+                           const BlockByrefInfo &byrefInfo,
+                           BlockByrefHelpers &generator) {
   ASTContext &Context = CGF.getContext();
   QualType R = Context.VoidTy;
 
@@ -1849,8 +1867,7 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
   const CGFunctionInfo &FI = CGF.CGM.getTypes().arrangeFreeFunctionDeclaration(
       R, args, FunctionType::ExtInfo(), /*variadic=*/false);
 
-  CodeGenTypes &Types = CGF.CGM.getTypes();
-  llvm::FunctionType *LTy = Types.GetFunctionType(FI);
+  llvm::FunctionType *LTy = CGF.CGM.getTypes().GetFunctionType(FI);
 
   // FIXME: We'd like to put these into a mergable by content, with
   // internal linkage.
@@ -1868,15 +1885,19 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
                                           SourceLocation(), II, R, nullptr,
                                           SC_Static,
                                           false, false);
+
+  CGF.CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   CGF.StartFunction(FD, R, Fn, FI, args);
 
-  if (byrefInfo.needsDispose()) {
-    llvm::Value *V = CGF.GetAddrOfLocalVar(&src);
-    V = CGF.Builder.CreateLoad(V);
-    V = CGF.Builder.CreateBitCast(V, byrefType.getPointerTo(0));
-    V = CGF.Builder.CreateStructGEP(&byrefType, V, byrefValueIndex, "x");
+  if (generator.needsDispose()) {
+    Address addr = CGF.GetAddrOfLocalVar(&src);
+    addr = Address(CGF.Builder.CreateLoad(addr), byrefInfo.ByrefAlignment);
+    auto byrefPtrType = byrefInfo.Type->getPointerTo(0);
+    addr = CGF.Builder.CreateBitCast(addr, byrefPtrType);
+    addr = CGF.emitBlockByrefAddress(addr, byrefInfo, false, "object");
 
-    byrefInfo.emitDispose(CGF, V);
+    generator.emitDispose(CGF, addr);
   }
 
   CGF.FinishFunction();
@@ -1886,38 +1907,29 @@ generateByrefDisposeHelper(CodeGenFunction &CGF,
 
 /// Build the dispose helper for a __block variable.
 static llvm::Constant *buildByrefDisposeHelper(CodeGenModule &CGM,
-                                              llvm::StructType &byrefType,
-                                               unsigned byrefValueIndex,
-                                            CodeGenModule::ByrefHelpers &info) {
+                                               const BlockByrefInfo &byrefInfo,
+                                               BlockByrefHelpers &generator) {
   CodeGenFunction CGF(CGM);
-  return generateByrefDisposeHelper(CGF, byrefType, byrefValueIndex, info);
+  return generateByrefDisposeHelper(CGF, byrefInfo, generator);
 }
 
 /// Lazily build the copy and dispose helpers for a __block variable
 /// with the given information.
-template <class T> static T *buildByrefHelpers(CodeGenModule &CGM,
-                                               llvm::StructType &byrefTy,
-                                               unsigned byrefValueIndex,
-                                               T &byrefInfo) {
-  // Increase the field's alignment to be at least pointer alignment,
-  // since the layout of the byref struct will guarantee at least that.
-  byrefInfo.Alignment = std::max(byrefInfo.Alignment,
-                              CharUnits::fromQuantity(CGM.PointerAlignInBytes));
-
+template <class T>
+static T *buildByrefHelpers(CodeGenModule &CGM, const BlockByrefInfo &byrefInfo,
+                            T &&generator) {
   llvm::FoldingSetNodeID id;
-  byrefInfo.Profile(id);
+  generator.Profile(id);
 
   void *insertPos;
-  CodeGenModule::ByrefHelpers *node
+  BlockByrefHelpers *node
     = CGM.ByrefHelpersCache.FindNodeOrInsertPos(id, insertPos);
   if (node) return static_cast<T*>(node);
 
-  byrefInfo.CopyHelper =
-    buildByrefCopyHelper(CGM, byrefTy, byrefValueIndex, byrefInfo);
-  byrefInfo.DisposeHelper =
-    buildByrefDisposeHelper(CGM, byrefTy, byrefValueIndex,byrefInfo);
+  generator.CopyHelper = buildByrefCopyHelper(CGM, byrefInfo, generator);
+  generator.DisposeHelper = buildByrefDisposeHelper(CGM, byrefInfo, generator);
 
-  T *copy = new (CGM.getContext()) T(byrefInfo);
+  T *copy = new (CGM.getContext()) T(std::move(generator));
   CGM.ByrefHelpersCache.InsertNode(copy, insertPos);
   return copy;
 }
@@ -1925,20 +1937,25 @@ template <class T> static T *buildByrefHelpers(CodeGenModule &CGM,
 /// Build the copy and dispose helpers for the given __block variable
 /// emission.  Places the helpers in the global cache.  Returns null
 /// if no helpers are required.
-CodeGenModule::ByrefHelpers *
+BlockByrefHelpers *
 CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
                                    const AutoVarEmission &emission) {
   const VarDecl &var = *emission.Variable;
   QualType type = var.getType();
 
-  unsigned byrefValueIndex = getByRefValueLLVMField(&var).second;
+  auto &byrefInfo = getBlockByrefInfo(&var);
+
+  // The alignment we care about for the purposes of uniquing byref
+  // helpers is the alignment of the actual byref value field.
+  CharUnits valueAlignment =
+    byrefInfo.ByrefAlignment.alignmentAtOffset(byrefInfo.FieldOffset);
 
   if (const CXXRecordDecl *record = type->getAsCXXRecordDecl()) {
     const Expr *copyExpr = CGM.getContext().getBlockVarCopyInits(&var);
     if (!copyExpr && record->hasTrivialDestructor()) return nullptr;
 
-    CXXByrefHelpers byrefInfo(emission.Alignment, type, copyExpr);
-    return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
+    return ::buildByrefHelpers(
+        CGM, byrefInfo, CXXByrefHelpers(valueAlignment, type, copyExpr));
   }
 
   // Otherwise, if we don't have a retainable type, there's nothing to do.
@@ -1949,8 +1966,6 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
   // If we have lifetime, that dominates.
   if (Qualifiers::ObjCLifetime lifetime = qs.getObjCLifetime()) {
-    assert(getLangOpts().ObjCAutoRefCount);
-
     switch (lifetime) {
     case Qualifiers::OCL_None: llvm_unreachable("impossible");
 
@@ -1961,24 +1976,23 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
 
     // Tell the runtime that this is ARC __weak, called by the
     // byref routines.
-    case Qualifiers::OCL_Weak: {
-      ARCWeakByrefHelpers byrefInfo(emission.Alignment);
-      return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
-    }
+    case Qualifiers::OCL_Weak:
+      return ::buildByrefHelpers(CGM, byrefInfo,
+                                 ARCWeakByrefHelpers(valueAlignment));
 
     // ARC __strong __block variables need to be retained.
     case Qualifiers::OCL_Strong:
       // Block pointers need to be copied, and there's no direct
       // transfer possible.
       if (type->isBlockPointerType()) {
-        ARCStrongBlockByrefHelpers byrefInfo(emission.Alignment);
-        return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
+        return ::buildByrefHelpers(CGM, byrefInfo,
+                                   ARCStrongBlockByrefHelpers(valueAlignment));
 
       // Otherwise, we transfer ownership of the retain from the stack
       // to the heap.
       } else {
-        ARCStrongByrefHelpers byrefInfo(emission.Alignment);
-        return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
+        return ::buildByrefHelpers(CGM, byrefInfo,
+                                   ARCStrongByrefHelpers(valueAlignment));
       }
     }
     llvm_unreachable("fell out of lifetime switch!");
@@ -1997,28 +2011,33 @@ CodeGenFunction::buildByrefHelpers(llvm::StructType &byrefType,
   if (type.isObjCGCWeak())
     flags |= BLOCK_FIELD_IS_WEAK;
 
-  ObjectByrefHelpers byrefInfo(emission.Alignment, flags);
-  return ::buildByrefHelpers(CGM, byrefType, byrefValueIndex, byrefInfo);
+  return ::buildByrefHelpers(CGM, byrefInfo,
+                             ObjectByrefHelpers(valueAlignment, flags));
 }
 
-std::pair<llvm::Type *, unsigned>
-CodeGenFunction::getByRefValueLLVMField(const ValueDecl *VD) const {
-  assert(ByRefValueInfo.count(VD) && "Did not find value!");
-
-  return ByRefValueInfo.find(VD)->second;
+Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr,
+                                               const VarDecl *var,
+                                               bool followForward) {
+  auto &info = getBlockByrefInfo(var);
+  return emitBlockByrefAddress(baseAddr, info, followForward, var->getName());
 }
 
-llvm::Value *CodeGenFunction::BuildBlockByrefAddress(llvm::Value *BaseAddr,
-                                                     const VarDecl *V) {
-  auto P = getByRefValueLLVMField(V);
-  llvm::Value *Loc =
-      Builder.CreateStructGEP(P.first, BaseAddr, 1, "forwarding");
-  Loc = Builder.CreateLoad(Loc);
-  Loc = Builder.CreateStructGEP(P.first, Loc, P.second, V->getNameAsString());
-  return Loc;
+Address CodeGenFunction::emitBlockByrefAddress(Address baseAddr,
+                                               const BlockByrefInfo &info,
+                                               bool followForward,
+                                               const llvm::Twine &name) {
+  // Chase the forwarding address if requested.
+  if (followForward) {
+    Address forwardingAddr =
+      Builder.CreateStructGEP(baseAddr, 1, getPointerSize(), "forwarding");
+    baseAddr = Address(Builder.CreateLoad(forwardingAddr), info.ByrefAlignment);
+  }
+
+  return Builder.CreateStructGEP(baseAddr, info.FieldIndex,
+                                 info.FieldOffset, name);
 }
 
-/// BuildByRefType - This routine changes a __block variable declared as T x
+/// BuildByrefInfo - This routine changes a __block variable declared as T x
 ///   into:
 ///
 ///      struct {
@@ -2033,108 +2052,116 @@ llvm::Value *CodeGenFunction::BuildBlockByrefAddress(llvm::Value *BaseAddr,
 ///        T x;
 ///      } x
 ///
-llvm::Type *CodeGenFunction::BuildByRefType(const VarDecl *D) {
-  std::pair<llvm::Type *, unsigned> &Info = ByRefValueInfo[D];
-  if (Info.first)
-    return Info.first;
-  
-  QualType Ty = D->getType();
+const BlockByrefInfo &CodeGenFunction::getBlockByrefInfo(const VarDecl *D) {
+  auto it = BlockByrefInfos.find(D);
+  if (it != BlockByrefInfos.end())
+    return it->second;
 
-  SmallVector<llvm::Type *, 8> types;
-  
-  llvm::StructType *ByRefType =
+  llvm::StructType *byrefType =
     llvm::StructType::create(getLLVMContext(),
                              "struct.__block_byref_" + D->getNameAsString());
   
+  QualType Ty = D->getType();
+
+  CharUnits size;
+  SmallVector<llvm::Type *, 8> types;
+  
   // void *__isa;
   types.push_back(Int8PtrTy);
+  size += getPointerSize();
   
   // void *__forwarding;
-  types.push_back(llvm::PointerType::getUnqual(ByRefType));
+  types.push_back(llvm::PointerType::getUnqual(byrefType));
+  size += getPointerSize();
   
   // int32_t __flags;
   types.push_back(Int32Ty);
+  size += CharUnits::fromQuantity(4);
     
   // int32_t __size;
   types.push_back(Int32Ty);
+  size += CharUnits::fromQuantity(4);
+
   // Note that this must match *exactly* the logic in buildByrefHelpers.
-  bool HasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D);
-  if (HasCopyAndDispose) {
+  bool hasCopyAndDispose = getContext().BlockRequiresCopying(Ty, D);
+  if (hasCopyAndDispose) {
     /// void *__copy_helper;
     types.push_back(Int8PtrTy);
+    size += getPointerSize();
     
     /// void *__destroy_helper;
     types.push_back(Int8PtrTy);
+    size += getPointerSize();
   }
+
   bool HasByrefExtendedLayout = false;
   Qualifiers::ObjCLifetime Lifetime;
   if (getContext().getByrefLifetime(Ty, Lifetime, HasByrefExtendedLayout) &&
-      HasByrefExtendedLayout)
+      HasByrefExtendedLayout) {
     /// void *__byref_variable_layout;
     types.push_back(Int8PtrTy);
-
-  bool Packed = false;
-  CharUnits Align = getContext().getDeclAlign(D);
-  if (Align >
-      getContext().toCharUnitsFromBits(getTarget().getPointerAlign(0))) {
-    // We have to insert padding.
-    
-    // The struct above has 2 32-bit integers.
-    unsigned CurrentOffsetInBytes = 4 * 2;
-    
-    // And either 2, 3, 4 or 5 pointers.
-    unsigned noPointers = 2;
-    if (HasCopyAndDispose)
-      noPointers += 2;
-    if (HasByrefExtendedLayout)
-      noPointers += 1;
-    
-    CurrentOffsetInBytes += noPointers * CGM.getDataLayout().getTypeAllocSize(Int8PtrTy);
-    
-    // Align the offset.
-    unsigned AlignedOffsetInBytes = 
-      llvm::RoundUpToAlignment(CurrentOffsetInBytes, Align.getQuantity());
-    
-    unsigned NumPaddingBytes = AlignedOffsetInBytes - CurrentOffsetInBytes;
-    if (NumPaddingBytes > 0) {
-      llvm::Type *Ty = Int8Ty;
-      // FIXME: We need a sema error for alignment larger than the minimum of
-      // the maximal stack alignment and the alignment of malloc on the system.
-      if (NumPaddingBytes > 1)
-        Ty = llvm::ArrayType::get(Ty, NumPaddingBytes);
-    
-      types.push_back(Ty);
-
-      // We want a packed struct.
-      Packed = true;
-    }
+    size += CharUnits::fromQuantity(PointerSizeInBytes);
   }
 
   // T x;
-  types.push_back(ConvertTypeForMem(Ty));
-  
-  ByRefType->setBody(types, Packed);
-  
-  Info.first = ByRefType;
-  
-  Info.second = types.size() - 1;
-  
-  return Info.first;
+  llvm::Type *varTy = ConvertTypeForMem(Ty);
+
+  bool packed = false;
+  CharUnits varAlign = getContext().getDeclAlign(D);
+  CharUnits varOffset = size.RoundUpToAlignment(varAlign);
+
+  // We may have to insert padding.
+  if (varOffset != size) {
+    llvm::Type *paddingTy =
+      llvm::ArrayType::get(Int8Ty, (varOffset - size).getQuantity());
+
+    types.push_back(paddingTy);
+    size = varOffset;
+
+  // Conversely, we might have to prevent LLVM from inserting padding.
+  } else if (CGM.getDataLayout().getABITypeAlignment(varTy)
+               > varAlign.getQuantity()) {
+    packed = true;
+  }
+  types.push_back(varTy);
+
+  byrefType->setBody(types, packed);
+
+  BlockByrefInfo info;
+  info.Type = byrefType;
+  info.FieldIndex = types.size() - 1;
+  info.FieldOffset = varOffset;
+  info.ByrefAlignment = std::max(varAlign, getPointerAlign());
+
+  auto pair = BlockByrefInfos.insert({D, info});
+  assert(pair.second && "info was inserted recursively?");
+  return pair.first->second;
 }
 
 /// Initialize the structural components of a __block variable, i.e.
 /// everything but the actual object.
 void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
   // Find the address of the local.
-  llvm::Value *addr = emission.Address;
+  Address addr = emission.Addr;
 
   // That's an alloca of the byref structure type.
   llvm::StructType *byrefType = cast<llvm::StructType>(
-                 cast<llvm::PointerType>(addr->getType())->getElementType());
+    cast<llvm::PointerType>(addr.getPointer()->getType())->getElementType());
+
+  unsigned nextHeaderIndex = 0;
+  CharUnits nextHeaderOffset;
+  auto storeHeaderField = [&](llvm::Value *value, CharUnits fieldSize,
+                              const Twine &name) {
+    auto fieldAddr = Builder.CreateStructGEP(addr, nextHeaderIndex,
+                                             nextHeaderOffset, name);
+    Builder.CreateStore(value, fieldAddr);
+
+    nextHeaderIndex++;
+    nextHeaderOffset += fieldSize;
+  };
 
   // Build the byref helpers if necessary.  This is null if we don't need any.
-  CodeGenModule::ByrefHelpers *helpers =
-    buildByrefHelpers(*byrefType, emission);
+  BlockByrefHelpers *helpers = buildByrefHelpers(*byrefType, emission);
 
   const VarDecl &D = *emission.Variable;
   QualType type = D.getType();
@@ -2143,7 +2170,7 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
   Qualifiers::ObjCLifetime ByrefLifetime;
   bool ByRefHasLifetime =
     getContext().getByrefLifetime(type, ByrefLifetime, HasByrefExtendedLayout);
-  
+
   llvm::Value *V;
 
   // Initialize the 'isa', which is just 0 or 1.
@@ -2151,12 +2178,10 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
   if (type.isObjCGCWeak())
     isa = 1;
   V = Builder.CreateIntToPtr(Builder.getInt32(isa), Int8PtrTy, "isa");
-  Builder.CreateStore(V,
-                      Builder.CreateStructGEP(nullptr, addr, 0, "byref.isa"));
+  storeHeaderField(V, getPointerSize(), "byref.isa");
 
   // Store the address of the variable into its own forwarding pointer.
-  Builder.CreateStore(
-      addr, Builder.CreateStructGEP(nullptr, addr, 1, "byref.forwarding"));
+  storeHeaderField(addr.getPointer(), getPointerSize(), "byref.forwarding");
 
   // Blocks ABI:
   //   c) the flags field is set to either 0 if no helper functions are
@@ -2202,31 +2227,23 @@ void CodeGenFunction::emitByrefStructureInit(const AutoVarEmission &emission) {
       printf("\n");
     }
   }
-
-  Builder.CreateStore(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
-                      Builder.CreateStructGEP(nullptr, addr, 2, "byref.flags"));
+  storeHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
+                   getIntSize(), "byref.flags");
 
   CharUnits byrefSize = CGM.GetTargetTypeStoreSize(byrefType);
   V = llvm::ConstantInt::get(IntTy, byrefSize.getQuantity());
-  Builder.CreateStore(V,
-                      Builder.CreateStructGEP(nullptr, addr, 3, "byref.size"));
+  storeHeaderField(V, getIntSize(), "byref.size");
 
   if (helpers) {
-    llvm::Value *copy_helper = Builder.CreateStructGEP(nullptr, addr, 4);
-    Builder.CreateStore(helpers->CopyHelper, copy_helper);
-
-    llvm::Value *destroy_helper = Builder.CreateStructGEP(nullptr, addr, 5);
-    Builder.CreateStore(helpers->DisposeHelper, destroy_helper);
+    storeHeaderField(helpers->CopyHelper, getPointerSize(),
+                     "byref.copyHelper");
+    storeHeaderField(helpers->DisposeHelper, getPointerSize(),
+                     "byref.disposeHelper");
   }
+
   if (ByRefHasLifetime && HasByrefExtendedLayout) {
-    llvm::Constant* ByrefLayoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type);
-    llvm::Value *ByrefInfoAddr =
-        Builder.CreateStructGEP(nullptr, addr, helpers ? 6 : 4, "byref.layout");
-    // cast destination to pointer to source type.
-    llvm::Type *DesTy = ByrefLayoutInfo->getType();
-    DesTy = DesTy->getPointerTo();
-    llvm::Value *BC = Builder.CreatePointerCast(ByrefInfoAddr, DesTy);
-    Builder.CreateStore(ByrefLayoutInfo, BC);
+    auto layoutInfo = CGM.getObjCRuntime().BuildByrefLayout(CGM, type);
+    storeHeaderField(layoutInfo, getPointerSize(), "byref.layout");
   }
 }
 
@@ -2240,7 +2257,8 @@ void CodeGenFunction::BuildBlockRelease(llvm::Value *V, BlockFieldFlags flags) {
 }
 
 namespace {
-  struct CallBlockRelease : EHScopeStack::Cleanup {
+  /// Release a __block variable.
+  struct CallBlockRelease final : EHScopeStack::Cleanup {
     llvm::Value *Addr;
     CallBlockRelease(llvm::Value *Addr) : Addr(Addr) {}
 
@@ -2249,7 +2267,7 @@ namespace {
       CGF.BuildBlockRelease(Addr, BLOCK_FIELD_IS_BYREF);
     }
   };
-}
+} // end anonymous namespace
 
 /// Enter a cleanup to destroy a __block variable.  Note that this
 /// cleanup should be a no-op if the variable hasn't left the stack
@@ -2260,7 +2278,8 @@ void CodeGenFunction::enterByrefCleanup(const AutoVarEmission &emission) {
   if (CGM.getLangOpts().getGC() == LangOptions::GCOnly)
     return;
 
-  EHStack.pushCleanup<CallBlockRelease>(NormalAndEHCleanup, emission.Address);
+  EHStack.pushCleanup<CallBlockRelease>(NormalAndEHCleanup,
+                                        emission.Addr.getPointer());
 }
 
 /// Adjust the declaration of something from the blocks API.
diff --git a/lib/CodeGen/CGBlocks.h b/lib/CodeGen/CGBlocks.h
index c4eed0d0e8eb..1edabef4ec74 100644
--- a/lib/CodeGen/CGBlocks.h
+++ b/lib/CodeGen/CGBlocks.h
@@ -140,6 +140,15 @@ inline BlockFieldFlags operator|(BlockFieldFlag_t l, BlockFieldFlag_t r) {
   return BlockFieldFlags(l) | BlockFieldFlags(r);
 }
 
+/// Information about the layout of a __block variable.
+class BlockByrefInfo {
+public:
+  llvm::StructType *Type;
+  unsigned FieldIndex;
+  CharUnits ByrefAlignment;
+  CharUnits FieldOffset;
+};
+
 /// CGBlockInfo - Information to generate a block literal.
 class CGBlockInfo {
 public:
@@ -152,14 +161,19 @@ public:
   class Capture {
     uintptr_t Data;
     EHScopeStack::stable_iterator Cleanup;
+    CharUnits::QuantityType Offset;
 
   public:
     bool isIndex() const { return (Data & 1) != 0; }
     bool isConstant() const { return !isIndex(); }
-    unsigned getIndex() const { assert(isIndex()); return Data >> 1; }
-    llvm::Value *getConstant() const {
-      assert(isConstant());
-      return reinterpret_cast<llvm::Value*>(Data);
+
+    unsigned getIndex() const {
+      assert(isIndex());
+      return Data >> 1;
+    }
+    CharUnits getOffset() const {
+      assert(isIndex());
+      return CharUnits::fromQuantity(Offset);
     }
     EHScopeStack::stable_iterator getCleanup() const {
       assert(isIndex());
@@ -170,9 +184,15 @@ public:
       Cleanup = cleanup;
     }
 
-    static Capture makeIndex(unsigned index) {
+    llvm::Value *getConstant() const {
+      assert(isConstant());
+      return reinterpret_cast<llvm::Value*>(Data);
+    }
+
+    static Capture makeIndex(unsigned index, CharUnits offset) {
       Capture v;
       v.Data = (index << 1) | 1;
+      v.Offset = offset.getQuantity();
       return v;
     }
 
@@ -205,12 +225,13 @@ public:
   /// The mapping of allocated indexes within the block.
   llvm::DenseMap<const VarDecl*, Capture> Captures;  
 
-  llvm::AllocaInst *Address;
+  Address LocalAddress;
   llvm::StructType *StructureType;
   const BlockDecl *Block;
   const BlockExpr *BlockExpression;
   CharUnits BlockSize;
   CharUnits BlockAlign;
+  CharUnits CXXThisOffset;
   
   // Offset of the gap caused by block header having a smaller
   // alignment than the alignment of the block descriptor. This
diff --git a/lib/CodeGen/CGBuilder.h b/lib/CodeGen/CGBuilder.h
index 6610659131f7..489f3413d4b8 100644
--- a/lib/CodeGen/CGBuilder.h
+++ b/lib/CodeGen/CGBuilder.h
@@ -11,6 +11,8 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGBUILDER_H
 
 #include "llvm/IR/IRBuilder.h"
+#include "Address.h"
+#include "CodeGenTypeCache.h"
 
 namespace clang {
 namespace CodeGen {
@@ -22,9 +24,9 @@ class CodeGenFunction;
 /// instructions.
 template <bool PreserveNames>
 class CGBuilderInserter
-  : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
+    : protected llvm::IRBuilderDefaultInserter<PreserveNames> {
 public:
-  CGBuilderInserter() : CGF(nullptr) {}
+  CGBuilderInserter() = default;
   explicit CGBuilderInserter(CodeGenFunction *CGF) : CGF(CGF) {}
 
 protected:
@@ -33,9 +35,7 @@ protected:
                     llvm::BasicBlock *BB,
                     llvm::BasicBlock::iterator InsertPt) const;
 private:
-  void operator=(const CGBuilderInserter &) = delete;
-
-  CodeGenFunction *CGF;
+  CodeGenFunction *CGF = nullptr;
 };
 
 // Don't preserve names on values in an optimized build.
@@ -44,9 +44,260 @@ private:
 #else
 #define PreserveNames true
 #endif
+
 typedef CGBuilderInserter<PreserveNames> CGBuilderInserterTy;
+
 typedef llvm::IRBuilder<PreserveNames, llvm::ConstantFolder,
-                        CGBuilderInserterTy> CGBuilderTy;
+                        CGBuilderInserterTy> CGBuilderBaseTy;
+
+class CGBuilderTy : public CGBuilderBaseTy {
+  /// Storing a reference to the type cache here makes it a lot easier
+  /// to build natural-feeling, target-specific IR.
+  const CodeGenTypeCache &TypeCache;
+public:
+  CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::LLVMContext &C)
+    : CGBuilderBaseTy(C), TypeCache(TypeCache) {}
+  CGBuilderTy(const CodeGenTypeCache &TypeCache,
+              llvm::LLVMContext &C, const llvm::ConstantFolder &F,
+              const CGBuilderInserterTy &Inserter)
+    : CGBuilderBaseTy(C, F, Inserter), TypeCache(TypeCache) {}
+  CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::Instruction *I)
+    : CGBuilderBaseTy(I), TypeCache(TypeCache) {}
+  CGBuilderTy(const CodeGenTypeCache &TypeCache, llvm::BasicBlock *BB)
+    : CGBuilderBaseTy(BB), TypeCache(TypeCache) {}
+
+  llvm::ConstantInt *getSize(CharUnits N) {
+    return llvm::ConstantInt::get(TypeCache.SizeTy, N.getQuantity());
+  }
+  llvm::ConstantInt *getSize(uint64_t N) {
+    return llvm::ConstantInt::get(TypeCache.SizeTy, N);
+  }
+
+  // Note that we intentionally hide the CreateLoad APIs that don't
+  // take an alignment.
+  llvm::LoadInst *CreateLoad(Address Addr, const llvm::Twine &Name = "") {
+    return CreateAlignedLoad(Addr.getPointer(),
+                             Addr.getAlignment().getQuantity(),
+                             Name);
+  }
+  llvm::LoadInst *CreateLoad(Address Addr, const char *Name) {
+    // This overload is required to prevent string literals from
+    // ending up in the IsVolatile overload.
+    return CreateAlignedLoad(Addr.getPointer(),
+                             Addr.getAlignment().getQuantity(),
+                             Name);
+  }
+  llvm::LoadInst *CreateLoad(Address Addr, bool IsVolatile,
+                             const llvm::Twine &Name = "") {
+    return CreateAlignedLoad(Addr.getPointer(),
+                             Addr.getAlignment().getQuantity(),
+                             IsVolatile,
+                             Name);
+  }
+
+  using CGBuilderBaseTy::CreateAlignedLoad;
+  llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align,
+                                    const llvm::Twine &Name = "") {
+    return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+  }
+  llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align,
+                                    const char *Name) {
+    return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+  }
+  llvm::LoadInst *CreateAlignedLoad(llvm::Type *Ty, llvm::Value *Addr,
+                                    CharUnits Align,
+                                    const llvm::Twine &Name = "") {
+    assert(Addr->getType()->getPointerElementType() == Ty);
+    return CreateAlignedLoad(Addr, Align.getQuantity(), Name);
+  }
+  llvm::LoadInst *CreateAlignedLoad(llvm::Value *Addr, CharUnits Align,
+                                    bool IsVolatile,
+                                    const llvm::Twine &Name = "") {
+    return CreateAlignedLoad(Addr, Align.getQuantity(), IsVolatile, Name);
+  }
+
+  // Note that we intentionally hide the CreateStore APIs that don't
+  // take an alignment.
+  llvm::StoreInst *CreateStore(llvm::Value *Val, Address Addr,
+                               bool IsVolatile = false) {
+    return CreateAlignedStore(Val, Addr.getPointer(),
+                              Addr.getAlignment().getQuantity(), IsVolatile);
+  }
+
+  using CGBuilderBaseTy::CreateAlignedStore;
+  llvm::StoreInst *CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr,
+                                      CharUnits Align, bool IsVolatile = false) {
+    return CreateAlignedStore(Val, Addr, Align.getQuantity(), IsVolatile);
+  }
+  
+  // FIXME: these "default-aligned" APIs should be removed,
+  // but I don't feel like fixing all the builtin code right now.
+  llvm::LoadInst *CreateDefaultAlignedLoad(llvm::Value *Addr,
+                                           const llvm::Twine &Name = "") {
+    return CGBuilderBaseTy::CreateLoad(Addr, false, Name);
+  }
+  llvm::LoadInst *CreateDefaultAlignedLoad(llvm::Value *Addr,
+                                           const char *Name) {
+    return CGBuilderBaseTy::CreateLoad(Addr, false, Name);
+  }
+  llvm::LoadInst *CreateDefaultAlignedLoad(llvm::Value *Addr, bool IsVolatile,
+                                           const llvm::Twine &Name = "") {
+    return CGBuilderBaseTy::CreateLoad(Addr, IsVolatile, Name);
+  }
+
+  llvm::StoreInst *CreateDefaultAlignedStore(llvm::Value *Val,
+                                             llvm::Value *Addr,
+                                             bool IsVolatile = false) {
+    return CGBuilderBaseTy::CreateStore(Val, Addr, IsVolatile);
+  }
+
+  /// Emit a load from an i1 flag variable.
+  llvm::LoadInst *CreateFlagLoad(llvm::Value *Addr,
+                                 const llvm::Twine &Name = "") {
+    assert(Addr->getType()->getPointerElementType() == getInt1Ty());
+    return CreateAlignedLoad(getInt1Ty(), Addr, CharUnits::One(), Name);
+  }
+
+  /// Emit a store to an i1 flag variable.
+  llvm::StoreInst *CreateFlagStore(bool Value, llvm::Value *Addr) {
+    assert(Addr->getType()->getPointerElementType() == getInt1Ty());
+    return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One());
+  }
+
+  using CGBuilderBaseTy::CreateBitCast;
+  Address CreateBitCast(Address Addr, llvm::Type *Ty,
+                        const llvm::Twine &Name = "") {
+    return Address(CreateBitCast(Addr.getPointer(), Ty, Name),
+                   Addr.getAlignment());
+  }
+
+  /// Cast the element type of the given address to a different type,
+  /// preserving information like the alignment and address space.
+  Address CreateElementBitCast(Address Addr, llvm::Type *Ty,
+                               const llvm::Twine &Name = "") {
+    auto PtrTy = Ty->getPointerTo(Addr.getAddressSpace());
+    return CreateBitCast(Addr, PtrTy, Name);
+  }
+
+  using CGBuilderBaseTy::CreatePointerBitCastOrAddrSpaceCast;
+  Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty,
+                                              const llvm::Twine &Name = "") {
+    llvm::Value *Ptr =
+      CreatePointerBitCastOrAddrSpaceCast(Addr.getPointer(), Ty, Name);
+    return Address(Ptr, Addr.getAlignment());
+  }
+
+  using CGBuilderBaseTy::CreateStructGEP;
+  Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset,
+                          const llvm::Twine &Name = "") {
+    return Address(CreateStructGEP(Addr.getElementType(),
+                                   Addr.getPointer(), Index, Name),
+                   Addr.getAlignment().alignmentAtOffset(Offset));
+  }
+
+  /// Given
+  ///   %addr = [n x T]* ...
+  /// produce
+  ///   %name = getelementptr inbounds %addr, i64 0, i64 index
+  /// where i64 is actually the target word size.
+  ///
+  /// This API assumes that drilling into an array like this is always
+  /// an inbounds operation.
+  ///
+  /// \param EltSize - the size of the type T in bytes
+  Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize,
+                              const llvm::Twine &Name = "") {
+    return Address(CreateInBoundsGEP(Addr.getPointer(),
+                                     {getSize(CharUnits::Zero()),
+                                      getSize(Index)},
+                                     Name),
+                   Addr.getAlignment().alignmentAtOffset(Index * EltSize));
+  }
+
+  /// Given
+  ///   %addr = T* ...
+  /// produce
+  ///   %name = getelementptr inbounds %addr, i64 index
+  /// where i64 is actually the target word size.
+  ///
+  /// \param EltSize - the size of the type T in bytes
+  Address CreateConstInBoundsGEP(Address Addr, uint64_t Index,
+                                 CharUnits EltSize,
+                                 const llvm::Twine &Name = "") {
+    return Address(CreateInBoundsGEP(Addr.getElementType(), Addr.getPointer(),
+                                     getSize(Index), Name),
+                   Addr.getAlignment().alignmentAtOffset(Index * EltSize));
+  }
+
+  /// Given
+  ///   %addr = T* ...
+  /// produce
+  ///   %name = getelementptr inbounds %addr, i64 index
+  /// where i64 is actually the target word size.
+  ///
+  /// \param EltSize - the size of the type T in bytes
+  Address CreateConstGEP(Address Addr, uint64_t Index, CharUnits EltSize,
+                         const llvm::Twine &Name = "") {
+    return Address(CreateGEP(Addr.getElementType(), Addr.getPointer(),
+                             getSize(Index), Name),
+                   Addr.getAlignment().alignmentAtOffset(Index * EltSize));
+  }
+
+  /// Given a pointer to i8, adjust it by a given constant offset.
+  Address CreateConstInBoundsByteGEP(Address Addr, CharUnits Offset,
+                                     const llvm::Twine &Name = "") {
+    assert(Addr.getElementType() == TypeCache.Int8Ty);
+    return Address(CreateInBoundsGEP(Addr.getPointer(), getSize(Offset), Name),
+                   Addr.getAlignment().alignmentAtOffset(Offset));
+  }
+  Address CreateConstByteGEP(Address Addr, CharUnits Offset,
+                             const llvm::Twine &Name = "") {
+    assert(Addr.getElementType() == TypeCache.Int8Ty);
+    return Address(CreateGEP(Addr.getPointer(), getSize(Offset), Name),
+                   Addr.getAlignment().alignmentAtOffset(Offset));
+  }
+
+  llvm::Value *CreateConstInBoundsByteGEP(llvm::Value *Ptr, CharUnits Offset,
+                                          const llvm::Twine &Name = "") {
+    assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty);
+    return CreateInBoundsGEP(Ptr, getSize(Offset), Name);
+  }
+  llvm::Value *CreateConstByteGEP(llvm::Value *Ptr, CharUnits Offset,
+                                  const llvm::Twine &Name = "") {
+    assert(Ptr->getType()->getPointerElementType() == TypeCache.Int8Ty);
+    return CreateGEP(Ptr, getSize(Offset), Name);
+  }
+
+  using CGBuilderBaseTy::CreateMemCpy;
+  llvm::CallInst *CreateMemCpy(Address Dest, Address Src, llvm::Value *Size,
+                               bool IsVolatile = false) {
+    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
+    return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
+                        Align.getQuantity(), IsVolatile);
+  }
+  llvm::CallInst *CreateMemCpy(Address Dest, Address Src, uint64_t Size,
+                               bool IsVolatile = false) {
+    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
+    return CreateMemCpy(Dest.getPointer(), Src.getPointer(), Size,
+                        Align.getQuantity(), IsVolatile);
+  }
+
+  using CGBuilderBaseTy::CreateMemMove;
+  llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size,
+                                bool IsVolatile = false) {
+    auto Align = std::min(Dest.getAlignment(), Src.getAlignment());
+    return CreateMemMove(Dest.getPointer(), Src.getPointer(), Size,
+                         Align.getQuantity(), IsVolatile);
+  }
+
+  using CGBuilderBaseTy::CreateMemSet;
+  llvm::CallInst *CreateMemSet(Address Dest, llvm::Value *Value,
+                               llvm::Value *Size, bool IsVolatile = false) {
+    return CreateMemSet(Dest.getPointer(), Value, Size,
+                        Dest.getAlignment().getQuantity(), IsVolatile);
+  }
+};
+
 #undef PreserveNames
 
 }  // end namespace CodeGen
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 9b8694f9c5f2..787ac5361bbb 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -48,7 +48,7 @@ llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
   if (FD->hasAttr<AsmLabelAttr>())
     Name = getMangledName(D);
   else
-    Name = Context.BuiltinInfo.GetName(BuiltinID) + 10;
+    Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
 
   llvm::FunctionType *Ty =
     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
@@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
   return EmitFromInt(CGF, Result, T, ValueType);
 }
 
+static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
+  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
+
+  // Convert the type of the pointer to a pointer to the stored type.
+  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
+  Value *BC = CGF.Builder.CreateBitCast(
+      Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
+  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
+  LV.setNontemporal(true);
+  CGF.EmitStoreOfScalar(Val, LV, false);
+  return nullptr;
+}
+
+static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
+  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
+
+  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
+  LV.setNontemporal(true);
+  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
+}
+
 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
                                llvm::AtomicRMWInst::BinOp Kind,
                                const CallExpr *E) {
@@ -215,10 +237,20 @@ static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
   V = CGF.Builder.CreateBitCast(V, IntTy);
   if (Ty->isPPC_FP128Ty()) {
-    // The higher-order double comes first, and so we need to truncate the
-    // pair to extract the overall sign. The order of the pair is the same
-    // in both little- and big-Endian modes.
+    // We want the sign bit of the higher-order double. The bitcast we just
+    // did works as if the double-double was stored to memory and then
+    // read as an i128. The "store" will put the higher-order double in the
+    // lower address in both little- and big-Endian modes, but the "load"
+    // will treat those bits as a different part of the i128: the low bits in
+    // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
+    // we need to shift the high bits down to the low before truncating.
     Width >>= 1;
+    if (CGF.getTarget().isBigEndian()) {
+      Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
+      V = CGF.Builder.CreateLShr(V, ShiftCst);
+    } 
+    // We are truncating value in order to extract the higher-order 
+    // double, which we will be using to extract the sign from.
     IntTy = llvm::IntegerType::get(C, Width);
     V = CGF.Builder.CreateTrunc(V, IntTy);
   }
@@ -256,6 +288,125 @@ static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
   return CGF.Builder.CreateExtractValue(Tmp, 0);
 }
 
+namespace {
+  struct WidthAndSignedness {
+    unsigned Width;
+    bool Signed;
+  };
+}
+
+static WidthAndSignedness
+getIntegerWidthAndSignedness(const clang::ASTContext &context,
+                             const clang::QualType Type) {
+  assert(Type->isIntegerType() && "Given type is not an integer.");
+  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
+  bool Signed = Type->isSignedIntegerType();
+  return {Width, Signed};
+}
+
+// Given one or more integer types, this function produces an integer type that
+// encompasses them: any value in one of the given types could be expressed in
+// the encompassing type.
+static struct WidthAndSignedness
+EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
+  assert(Types.size() > 0 && "Empty list of types.");
+
+  // If any of the given types is signed, we must return a signed type.
+  bool Signed = false;
+  for (const auto &Type : Types) {
+    Signed |= Type.Signed;
+  }
+
+  // The encompassing type must have a width greater than or equal to the width
+  // of the specified types.  Aditionally, if the encompassing type is signed,
+  // its width must be strictly greater than the width of any unsigned types
+  // given.
+  unsigned Width = 0;
+  for (const auto &Type : Types) {
+    unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
+    if (Width < MinWidth) {
+      Width = MinWidth;
+    }
+  }
+
+  return {Width, Signed};
+}
+
+Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
+  llvm::Type *DestType = Int8PtrTy;
+  if (ArgValue->getType() != DestType)
+    ArgValue =
+        Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
+
+  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
+  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
+}
+
+/// Checks if using the result of __builtin_object_size(p, @p From) in place of
+/// __builtin_object_size(p, @p To) is correct
+static bool areBOSTypesCompatible(int From, int To) {
+  // Note: Our __builtin_object_size implementation currently treats Type=0 and
+  // Type=2 identically. Encoding this implementation detail here may make
+  // improving __builtin_object_size difficult in the future, so it's omitted.
+  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
+}
+
+static llvm::Value *
+getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
+  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
+}
+
+llvm::Value *
+CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                                 llvm::IntegerType *ResType) {
+  uint64_t ObjectSize;
+  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
+    return emitBuiltinObjectSize(E, Type, ResType);
+  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
+}
+
+/// Returns a Value corresponding to the size of the given expression.
+/// This Value may be either of the following:
+///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
+///     it)
+///   - A call to the @llvm.objectsize intrinsic
+llvm::Value *
+CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                       llvm::IntegerType *ResType) {
+  // We need to reference an argument if the pointer is a parameter with the
+  // pass_object_size attribute.
+  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
+    auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
+    auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
+    if (Param != nullptr && PS != nullptr &&
+        areBOSTypesCompatible(PS->getType(), Type)) {
+      auto Iter = SizeArguments.find(Param);
+      assert(Iter != SizeArguments.end());
+
+      const ImplicitParamDecl *D = Iter->second;
+      auto DIter = LocalDeclMap.find(D);
+      assert(DIter != LocalDeclMap.end());
+
+      return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
+                              getContext().getSizeType(), E->getLocStart());
+    }
+  }
+
+  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
+  // evaluate E for side-effects. In either case, we shouldn't lower to
+  // @llvm.objectsize.
+  if (Type == 3 || E->HasSideEffects(getContext()))
+    return getDefaultBuiltinObjectSizeResult(Type, ResType);
+
+  // LLVM only supports 0 and 2, make sure that we pass along that
+  // as a boolean.
+  auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
+  // FIXME: Get right address space.
+  llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
+  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
+  return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                         unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -279,22 +430,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   case Builtin::BI__builtin_stdarg_start:
   case Builtin::BI__builtin_va_start:
   case Builtin::BI__va_start:
-  case Builtin::BI__builtin_va_end: {
-    Value *ArgValue = (BuiltinID == Builtin::BI__va_start)
-                          ? EmitScalarExpr(E->getArg(0))
-                          : EmitVAListRef(E->getArg(0));
-    llvm::Type *DestType = Int8PtrTy;
-    if (ArgValue->getType() != DestType)
-      ArgValue = Builder.CreateBitCast(ArgValue, DestType,
-                                       ArgValue->getName().data());
-
-    Intrinsic::ID inst = (BuiltinID == Builtin::BI__builtin_va_end) ?
-      Intrinsic::vaend : Intrinsic::vastart;
-    return RValue::get(Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue));
-  }
+  case Builtin::BI__builtin_va_end:
+    return RValue::get(
+        EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
+                           ? EmitScalarExpr(E->getArg(0))
+                           : EmitVAListRef(E->getArg(0)).getPointer(),
+                       BuiltinID != Builtin::BI__builtin_va_end));
   case Builtin::BI__builtin_va_copy: {
-    Value *DstPtr = EmitVAListRef(E->getArg(0));
-    Value *SrcPtr = EmitVAListRef(E->getArg(1));
+    Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
+    Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
 
     llvm::Type *Type = Int8PtrTy;
 
@@ -455,6 +599,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                      "cast");
     return RValue::get(Result);
   }
+  case Builtin::BI__builtin_unpredictable: {
+    // Always return the argument of __builtin_unpredictable. LLVM does not
+    // handle this builtin. Metadata for this builtin should be added directly
+    // to instructions such as branches or switches that use it.
+    return RValue::get(EmitScalarExpr(E->getArg(0)));
+  }
   case Builtin::BI__builtin_expect: {
     Value *ArgValue = EmitScalarExpr(E->getArg(0));
     llvm::Type *ArgType = ArgValue->getType();
@@ -501,26 +651,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(Builder.CreateCall(F, ArgValue));
   }
   case Builtin::BI__builtin_object_size: {
-    // We rely on constant folding to deal with expressions with side effects.
-    assert(!E->getArg(0)->HasSideEffects(getContext()) &&
-           "should have been constant folded");
+    unsigned Type =
+        E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
+    auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
 
-    // We pass this builtin onto the optimizer so that it can
-    // figure out the object size in more complex cases.
-    llvm::Type *ResType = ConvertType(E->getType());
-
-    // LLVM only supports 0 and 2, make sure that we pass along that
-    // as a boolean.
-    Value *Ty = EmitScalarExpr(E->getArg(1));
-    ConstantInt *CI = dyn_cast<ConstantInt>(Ty);
-    assert(CI);
-    uint64_t val = CI->getZExtValue();
-    CI = ConstantInt::get(Builder.getInt1Ty(), (val & 0x2) >> 1);
-    // FIXME: Get right address space.
-    llvm::Type *Tys[] = { ResType, Builder.getInt8PtrTy(0) };
-    Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
-    return RValue::get(
-        Builder.CreateCall(F, {EmitScalarExpr(E->getArg(0)), CI}));
+    // We pass this builtin onto the optimizer so that it can figure out the
+    // object size in more complex cases.
+    return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
   }
   case Builtin::BI__builtin_prefetch: {
     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
@@ -737,29 +874,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   }
   case Builtin::BIbzero:
   case Builtin::BI__builtin_bzero: {
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Value *SizeVal = EmitScalarExpr(E->getArg(1));
-    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    Builder.CreateMemSet(Dest.first, Builder.getInt8(0), SizeVal,
-                         Dest.second, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy: {
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
-    std::pair<llvm::Value*, unsigned> Src =
-        EmitPointerWithAlignment(E->getArg(1));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
-    unsigned Align = std::min(Dest.second, Src.second);
-    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(),
+    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
 
   case Builtin::BI__builtin___memcpy_chk: {
@@ -770,23 +902,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       break;
     if (Size.ugt(DstSize))
       break;
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
-    std::pair<llvm::Value*, unsigned> Src =
-        EmitPointerWithAlignment(E->getArg(1));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
-    unsigned Align = std::min(Dest.second, Src.second);
-    Builder.CreateMemCpy(Dest.first, Src.first, SizeVal, Align, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemCpy(Dest, Src, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
 
   case Builtin::BI__builtin_objc_memmove_collectable: {
-    Value *Address = EmitScalarExpr(E->getArg(0));
-    Value *SrcAddr = EmitScalarExpr(E->getArg(1));
+    Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
+    Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
-                                                  Address, SrcAddr, SizeVal);
-    return RValue::get(Address);
+                                                  DestAddr, SrcAddr, SizeVal);
+    return RValue::get(DestAddr.getPointer());
   }
 
   case Builtin::BI__builtin___memmove_chk: {
@@ -797,42 +926,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       break;
     if (Size.ugt(DstSize))
       break;
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
-    std::pair<llvm::Value*, unsigned> Src =
-        EmitPointerWithAlignment(E->getArg(1));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
-    unsigned Align = std::min(Dest.second, Src.second);
-    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemMove(Dest, Src, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
 
   case Builtin::BImemmove:
   case Builtin::BI__builtin_memmove: {
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
-    std::pair<llvm::Value*, unsigned> Src =
-        EmitPointerWithAlignment(E->getArg(1));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
+    Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
-    unsigned Align = std::min(Dest.second, Src.second);
-    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    EmitNonNullArgCheck(RValue::get(Src.first), E->getArg(1)->getType(),
+    EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
                         E->getArg(1)->getExprLoc(), FD, 1);
-    Builder.CreateMemMove(Dest.first, Src.first, SizeVal, Align, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemMove(Dest, Src, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
   case Builtin::BImemset:
   case Builtin::BI__builtin_memset: {
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
                                          Builder.getInt8Ty());
     Value *SizeVal = EmitScalarExpr(E->getArg(2));
-    EmitNonNullArgCheck(RValue::get(Dest.first), E->getArg(0)->getType(),
+    EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
                         E->getArg(0)->getExprLoc(), FD, 0);
-    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
   case Builtin::BI__builtin___memset_chk: {
     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
@@ -842,13 +964,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
       break;
     if (Size.ugt(DstSize))
       break;
-    std::pair<llvm::Value*, unsigned> Dest =
-        EmitPointerWithAlignment(E->getArg(0));
+    Address Dest = EmitPointerWithAlignment(E->getArg(0));
     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
                                          Builder.getInt8Ty());
     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
-    Builder.CreateMemSet(Dest.first, ByteVal, SizeVal, Dest.second, false);
-    return RValue::get(Dest.first);
+    Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
+    return RValue::get(Dest.getPointer());
   }
   case Builtin::BI__builtin_dwarf_cfa: {
     // The offset in bytes from the first argument to the CFA.
@@ -952,7 +1073,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   }
   case Builtin::BI__builtin_setjmp: {
     // Buffer is a void**.
-    Value *Buf = EmitScalarExpr(E->getArg(0));
+    Address Buf = EmitPointerWithAlignment(E->getArg(0));
 
     // Store the frame pointer to the setjmp buffer.
     Value *FrameAddr =
@@ -963,14 +1084,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // Store the stack pointer to the setjmp buffer.
     Value *StackAddr =
         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
-    Value *StackSaveSlot =
-      Builder.CreateGEP(Buf, ConstantInt::get(Int32Ty, 2));
+    Address StackSaveSlot =
+      Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
     Builder.CreateStore(StackAddr, StackSaveSlot);
 
     // Call LLVM's EH setjmp, which is lightweight.
     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
-    return RValue::get(Builder.CreateCall(F, Buf));
+    return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
   }
   case Builtin::BI__builtin_longjmp: {
     Value *Buf = EmitScalarExpr(E->getArg(0));
@@ -1135,8 +1256,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                              StoreSize.getQuantity() * 8);
     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
     llvm::StoreInst *Store =
-      Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
-    Store->setAlignment(StoreSize.getQuantity());
+      Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
+                                 StoreSize);
     Store->setAtomic(llvm::Release);
     return RValue::get(nullptr);
   }
@@ -1153,6 +1274,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     return RValue::get(nullptr);
   }
 
+  case Builtin::BI__builtin_nontemporal_load:
+    return RValue::get(EmitNontemporalLoad(*this, E));
+  case Builtin::BI__builtin_nontemporal_store:
+    return RValue::get(EmitNontemporalStore(*this, E));
   case Builtin::BI__c11_atomic_is_lock_free:
   case Builtin::BI__atomic_is_lock_free: {
     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
@@ -1270,15 +1395,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     bool Volatile =
         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
 
-    Value *Ptr = EmitScalarExpr(E->getArg(0));
-    unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
+    Address Ptr = EmitPointerWithAlignment(E->getArg(0));
+    unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
     Value *NewVal = Builder.getInt8(0);
     Value *Order = EmitScalarExpr(E->getArg(1));
     if (isa<llvm::ConstantInt>(Order)) {
       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
-      Store->setAlignment(1);
       switch (ord) {
       case 0:  // memory_order_relaxed
       default: // invalid order
@@ -1311,7 +1435,6 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     for (unsigned i = 0; i < 3; ++i) {
       Builder.SetInsertPoint(BBs[i]);
       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
-      Store->setAlignment(1);
       Store->setOrdering(Orders[i]);
       Builder.CreateBr(ContBB);
     }
@@ -1493,8 +1616,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     llvm::Value *X = EmitScalarExpr(E->getArg(0));
     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
-    std::pair<llvm::Value*, unsigned> CarryOutPtr =
-      EmitPointerWithAlignment(E->getArg(3));
+    Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
 
     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
     llvm::Intrinsic::ID IntrinsicId;
@@ -1525,11 +1647,91 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                               Sum1, Carryin, Carry2);
     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
                                                X->getType());
-    llvm::StoreInst *CarryOutStore = Builder.CreateStore(CarryOut,
-                                                         CarryOutPtr.first);
-    CarryOutStore->setAlignment(CarryOutPtr.second);
+    Builder.CreateStore(CarryOut, CarryOutPtr);
     return RValue::get(Sum2);
   }
+
+  case Builtin::BI__builtin_add_overflow:
+  case Builtin::BI__builtin_sub_overflow:
+  case Builtin::BI__builtin_mul_overflow: {
+    const clang::Expr *LeftArg = E->getArg(0);
+    const clang::Expr *RightArg = E->getArg(1);
+    const clang::Expr *ResultArg = E->getArg(2);
+
+    clang::QualType ResultQTy =
+        ResultArg->getType()->castAs<PointerType>()->getPointeeType();
+
+    WidthAndSignedness LeftInfo =
+        getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
+    WidthAndSignedness RightInfo =
+        getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
+    WidthAndSignedness ResultInfo =
+        getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
+    WidthAndSignedness EncompassingInfo =
+        EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
+
+    llvm::Type *EncompassingLLVMTy =
+        llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
+
+    llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
+
+    llvm::Intrinsic::ID IntrinsicId;
+    switch (BuiltinID) {
+    default:
+      llvm_unreachable("Unknown overflow builtin id.");
+    case Builtin::BI__builtin_add_overflow:
+      IntrinsicId = EncompassingInfo.Signed
+                        ? llvm::Intrinsic::sadd_with_overflow
+                        : llvm::Intrinsic::uadd_with_overflow;
+      break;
+    case Builtin::BI__builtin_sub_overflow:
+      IntrinsicId = EncompassingInfo.Signed
+                        ? llvm::Intrinsic::ssub_with_overflow
+                        : llvm::Intrinsic::usub_with_overflow;
+      break;
+    case Builtin::BI__builtin_mul_overflow:
+      IntrinsicId = EncompassingInfo.Signed
+                        ? llvm::Intrinsic::smul_with_overflow
+                        : llvm::Intrinsic::umul_with_overflow;
+      break;
+    }
+
+    llvm::Value *Left = EmitScalarExpr(LeftArg);
+    llvm::Value *Right = EmitScalarExpr(RightArg);
+    Address ResultPtr = EmitPointerWithAlignment(ResultArg);
+
+    // Extend each operand to the encompassing type.
+    Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
+    Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
+
+    // Perform the operation on the extended values.
+    llvm::Value *Overflow, *Result;
+    Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
+
+    if (EncompassingInfo.Width > ResultInfo.Width) {
+      // The encompassing type is wider than the result type, so we need to
+      // truncate it.
+      llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
+
+      // To see if the truncation caused an overflow, we will extend
+      // the result and then compare it to the original result.
+      llvm::Value *ResultTruncExt = Builder.CreateIntCast(
+          ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
+      llvm::Value *TruncationOverflow =
+          Builder.CreateICmpNE(Result, ResultTruncExt);
+
+      Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
+      Result = ResultTrunc;
+    }
+
+    // Finally, store the result using the pointer.
+    bool isVolatile =
+      ResultArg->getType()->getPointeeType().isVolatileQualified();
+    Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
+
+    return RValue::get(Overflow);
+  }
+
   case Builtin::BI__builtin_uadd_overflow:
   case Builtin::BI__builtin_uaddl_overflow:
   case Builtin::BI__builtin_uaddll_overflow:
@@ -1554,13 +1756,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     // Scalarize our inputs.
     llvm::Value *X = EmitScalarExpr(E->getArg(0));
     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
-    std::pair<llvm::Value *, unsigned> SumOutPtr =
-      EmitPointerWithAlignment(E->getArg(2));
+    Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
 
     // Decide which of the overflow intrinsics we are lowering to:
     llvm::Intrinsic::ID IntrinsicId;
     switch (BuiltinID) {
-    default: llvm_unreachable("Unknown security overflow builtin id.");
+    default: llvm_unreachable("Unknown overflow builtin id.");
     case Builtin::BI__builtin_uadd_overflow:
     case Builtin::BI__builtin_uaddl_overflow:
     case Builtin::BI__builtin_uaddll_overflow:
@@ -1596,13 +1797,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
     
     llvm::Value *Carry;
     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
-    llvm::StoreInst *SumOutStore = Builder.CreateStore(Sum, SumOutPtr.first);
-    SumOutStore->setAlignment(SumOutPtr.second);
+    Builder.CreateStore(Sum, SumOutPtr);
 
     return RValue::get(Carry);
   }
   case Builtin::BI__builtin_addressof:
-    return RValue::get(EmitLValue(E->getArg(0)).getAddress());
+    return RValue::get(EmitLValue(E->getArg(0)).getPointer());
   case Builtin::BI__builtin_operator_new:
     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
                                     E->getArg(0), false);
@@ -1777,8 +1977,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
 
+  // Check that a call to a target specific builtin has the correct target
+  // features.
+  // This is down here to avoid non-target specific builtins, however, if
+  // generic builtins start to require generic target features then we
+  // can move this up to the beginning of the function.
+  checkTargetFeatures(E, FD);
+
   // See if we have a target specific intrinsic.
-  const char *Name = getContext().BuiltinInfo.GetName(BuiltinID);
+  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
   if (const char *Prefix =
           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
@@ -1856,37 +2063,54 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
   return GetUndefRValue(E->getType());
 }
 
-Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
-                                              const CallExpr *E) {
-  switch (getTarget().getTriple().getArch()) {
+static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
+                                        unsigned BuiltinID, const CallExpr *E,
+                                        llvm::Triple::ArchType Arch) {
+  switch (Arch) {
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-    return EmitARMBuiltinExpr(BuiltinID, E);
+    return CGF->EmitARMBuiltinExpr(BuiltinID, E);
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_be:
-    return EmitAArch64BuiltinExpr(BuiltinID, E);
+    return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
-    return EmitX86BuiltinExpr(BuiltinID, E);
+    return CGF->EmitX86BuiltinExpr(BuiltinID, E);
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
-    return EmitPPCBuiltinExpr(BuiltinID, E);
+    return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
-    return EmitAMDGPUBuiltinExpr(BuiltinID, E);
+    return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
   case llvm::Triple::systemz:
-    return EmitSystemZBuiltinExpr(BuiltinID, E);
+    return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:
-    return EmitNVPTXBuiltinExpr(BuiltinID, E);
+    return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
   default:
     return nullptr;
   }
 }
 
+Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
+                                              const CallExpr *E) {
+  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
+    assert(getContext().getAuxTargetInfo() && "Missing aux target info");
+    return EmitTargetArchBuiltinExpr(
+        this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
+        getContext().getAuxTargetInfo()->getTriple().getArch());
+  }
+
+  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
+                                   getTarget().getTriple().getArch());
+}
+
 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
                                      NeonTypeFlags TypeFlags,
                                      bool V1Ty=false) {
@@ -1917,6 +2141,19 @@ static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
   llvm_unreachable("Unknown vector element type!");
 }
 
+static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
+                                          NeonTypeFlags IntTypeFlags) {
+  int IsQuad = IntTypeFlags.isQuad();
+  switch (IntTypeFlags.getEltType()) {
+  case NeonTypeFlags::Int32:
+    return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
+  case NeonTypeFlags::Int64:
+    return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
+  default:
+    llvm_unreachable("Type can't be converted to floating-point!");
+  }
+}
+
 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
   unsigned nElts = cast<llvm::VectorType>(V->getType())->getNumElements();
   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
@@ -1940,10 +2177,7 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
                                             bool neg) {
   int SV = cast<ConstantInt>(V)->getSExtValue();
-
-  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
-  llvm::Constant *C = ConstantInt::get(VTy->getElementType(), neg ? -SV : SV);
-  return llvm::ConstantVector::getSplat(VTy->getNumElements(), C);
+  return ConstantInt::get(Ty, neg ? -SV : SV);
 }
 
 // \brief Right-shift a vector by a constant.
@@ -1962,8 +2196,7 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
   if (ShiftAmt == EltSize) {
     if (usgn) {
       // Right-shifting an unsigned value by its size yields 0.
-      llvm::Constant *Zero = ConstantInt::get(VTy->getElementType(), 0);
-      return llvm::ConstantVector::getSplat(VTy->getNumElements(), Zero);
+      return llvm::ConstantAggregateZero::get(VTy);
     } else {
       // Right-shifting a signed value by its size is equivalent
       // to a shift of size-1.
@@ -1979,61 +2212,6 @@ Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
     return Builder.CreateAShr(Vec, Shift, name);
 }
 
-/// GetPointeeAlignment - Given an expression with a pointer type, find the
-/// alignment of the type referenced by the pointer.  Skip over implicit
-/// casts.
-std::pair<llvm::Value*, unsigned>
-CodeGenFunction::EmitPointerWithAlignment(const Expr *Addr) {
-  assert(Addr->getType()->isPointerType());
-  Addr = Addr->IgnoreParens();
-  if (const ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Addr)) {
-    if ((ICE->getCastKind() == CK_BitCast || ICE->getCastKind() == CK_NoOp) &&
-        ICE->getSubExpr()->getType()->isPointerType()) {
-      std::pair<llvm::Value*, unsigned> Ptr =
-          EmitPointerWithAlignment(ICE->getSubExpr());
-      Ptr.first = Builder.CreateBitCast(Ptr.first,
-                                        ConvertType(Addr->getType()));
-      return Ptr;
-    } else if (ICE->getCastKind() == CK_ArrayToPointerDecay) {
-      LValue LV = EmitLValue(ICE->getSubExpr());
-      unsigned Align = LV.getAlignment().getQuantity();
-      if (!Align) {
-        // FIXME: Once LValues are fixed to always set alignment,
-        // zap this code.
-        QualType PtTy = ICE->getSubExpr()->getType();
-        if (!PtTy->isIncompleteType())
-          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
-        else
-          Align = 1;
-      }
-      return std::make_pair(LV.getAddress(), Align);
-    }
-  }
-  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(Addr)) {
-    if (UO->getOpcode() == UO_AddrOf) {
-      LValue LV = EmitLValue(UO->getSubExpr());
-      unsigned Align = LV.getAlignment().getQuantity();
-      if (!Align) {
-        // FIXME: Once LValues are fixed to always set alignment,
-        // zap this code.
-        QualType PtTy = UO->getSubExpr()->getType();
-        if (!PtTy->isIncompleteType())
-          Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
-        else
-          Align = 1;
-      }
-      return std::make_pair(LV.getAddress(), Align);
-    }
-  }
-
-  unsigned Align = 1;
-  QualType PtTy = Addr->getType()->getPointeeType();
-  if (!PtTy->isIncompleteType())
-    Align = getContext().getTypeAlignInChars(PtTy).getQuantity();
-
-  return std::make_pair(EmitScalarExpr(Addr), Align);
-}
-
 enum {
   AddRetType = (1 << 0),
   Add1ArgType = (1 << 1),
@@ -2056,31 +2234,36 @@ enum {
       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
 };
 
- struct NeonIntrinsicInfo {
+namespace {
+struct NeonIntrinsicInfo {
+  const char *NameHint;
   unsigned BuiltinID;
   unsigned LLVMIntrinsic;
   unsigned AltLLVMIntrinsic;
-  const char *NameHint;
   unsigned TypeModifier;
 
   bool operator<(unsigned RHSBuiltinID) const {
     return BuiltinID < RHSBuiltinID;
   }
+  bool operator<(const NeonIntrinsicInfo &TE) const {
+    return BuiltinID < TE.BuiltinID;
+  }
 };
+} // end anonymous namespace
 
 #define NEONMAP0(NameBase) \
-  { NEON::BI__builtin_neon_ ## NameBase, 0, 0, #NameBase, 0 }
+  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
 
 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
-  { NEON:: BI__builtin_neon_ ## NameBase, \
-      Intrinsic::LLVMIntrinsic, 0, #NameBase, TypeModifier }
+  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
+      Intrinsic::LLVMIntrinsic, 0, TypeModifier }
 
 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
-  { NEON:: BI__builtin_neon_ ## NameBase, \
+  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
-      #NameBase, TypeModifier }
+      TypeModifier }
 
-static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
+static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
   NEONMAP1(vabs_v, arm_neon_vabs, 0),
@@ -2106,7 +2289,7 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_v, arm_neon_vcvtfp2hf, 0),
+  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
@@ -2297,7 +2480,7 @@ static NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
   NEONMAP0(vzipq_v)
 };
 
-static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
+static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
   NEONMAP0(vaddhn_v),
@@ -2319,7 +2502,7 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
-  NEONMAP1(vcvt_f16_v, aarch64_neon_vcvtfp2hf, 0),
+  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
   NEONMAP0(vcvt_f32_v),
   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
@@ -2412,7 +2595,7 @@ static NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
   NEONMAP0(vtstq_v),
 };
 
-static NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
+static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
@@ -2623,9 +2806,7 @@ findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
 
 #ifndef NDEBUG
   if (!MapProvenSorted) {
-    // FIXME: use std::is_sorted once C++11 is allowed
-    for (unsigned i = 0; i < IntrinsicMap.size() - 1; ++i)
-      assert(IntrinsicMap[i].BuiltinID <= IntrinsicMap[i + 1].BuiltinID);
+    assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
     MapProvenSorted = true;
   }
 #endif
@@ -2744,7 +2925,7 @@ static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
     const char *NameHint, unsigned Modifier, const CallExpr *E,
-    SmallVectorImpl<llvm::Value *> &Ops, llvm::Value *Align) {
+    SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
   // Get the last argument, which specifies the vector type.
   llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
@@ -2761,6 +2942,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   if (!Ty)
     return nullptr;
 
+  auto getAlignmentValue32 = [&](Address addr) -> Value* {
+    return Builder.getInt32(addr.getAlignment().getQuantity());
+  };
+
   unsigned Int = LLVMIntrinsic;
   if ((Modifier & UnsignedAlts) && !Usgn)
     Int = AltLLVMIntrinsic;
@@ -2782,9 +2967,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
 
     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
-                                       SrcTy->getScalarSizeInBits() / 2);
-    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+    Constant *ShiftAmt =
+        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
 
     // %res = trunc <4 x i32> %high to <4 x i16>
@@ -2822,13 +3006,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcvt_n_f64_v:
   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                               : NeonTypeFlags::Float32,
-                                        false, Quad));
-    llvm::Type *Tys[2] = { FloatTy, Ty };
+    llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
     Function *F = CGM.getIntrinsic(Int, Tys);
     return EmitNeonCall(F, Ops, "vcvt_n");
@@ -2841,13 +3019,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                               : NeonTypeFlags::Float32,
-                                        false, Quad));
-    llvm::Type *Tys[2] = { Ty, FloatTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     return EmitNeonCall(F, Ops, "vcvt_n");
   }
@@ -2859,13 +3031,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcvtq_u32_v:
   case NEON::BI__builtin_neon_vcvtq_s64_v:
   case NEON::BI__builtin_neon_vcvtq_u64_v: {
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *FloatTy =
-        GetNeonType(this, NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                               : NeonTypeFlags::Float32,
-                                        false, Quad));
-    Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   }
@@ -2901,13 +3067,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vcvtmq_s64_v:
   case NEON::BI__builtin_neon_vcvtmq_u32_v:
   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, Quad));
-    llvm::Type *Tys[2] = { Ty, InTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
   }
   case NEON::BI__builtin_neon_vext_v:
@@ -2933,28 +3093,31 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
   }
   case NEON::BI__builtin_neon_vld1_v:
-  case NEON::BI__builtin_neon_vld1q_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vld1");
+  case NEON::BI__builtin_neon_vld1q_v: {
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Ops.push_back(getAlignmentValue32(PtrOp0));
+    return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
+  }
   case NEON::BI__builtin_neon_vld2_v:
   case NEON::BI__builtin_neon_vld2q_v:
   case NEON::BI__builtin_neon_vld3_v:
   case NEON::BI__builtin_neon_vld3q_v:
   case NEON::BI__builtin_neon_vld4_v:
   case NEON::BI__builtin_neon_vld4q_v: {
-    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
+    Value *Align = getAlignmentValue32(PtrOp1);
     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld1_dup_v:
   case NEON::BI__builtin_neon_vld1q_dup_v: {
     Value *V = UndefValue::get(Ty);
     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
-    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+    PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
+    LoadInst *Ld = Builder.CreateLoad(PtrOp0);
     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
     return EmitNeonSplat(Ops[0], CI);
@@ -2965,14 +3128,15 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vld3q_lane_v:
   case NEON::BI__builtin_neon_vld4_lane_v:
   case NEON::BI__builtin_neon_vld4q_lane_v: {
-    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
     for (unsigned I = 2; I < Ops.size() - 1; ++I)
       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
-    Ops.push_back(Align);
+    Ops.push_back(getAlignmentValue32(PtrOp1));
     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vmovl_v: {
     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
@@ -3019,14 +3183,10 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vqdmlal_v:
   case NEON::BI__builtin_neon_vqdmlsl_v: {
     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
-    Value *Mul = EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty),
-                              MulOps, "vqdmlal");
-
-    SmallVector<Value *, 2> AccumOps;
-    AccumOps.push_back(Ops[0]);
-    AccumOps.push_back(Mul);
-    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty),
-                        AccumOps, NameHint);
+    Ops[1] =
+        EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
+    Ops.resize(2);
+    return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
   }
   case NEON::BI__builtin_neon_vqshl_n_v:
   case NEON::BI__builtin_neon_vqshlq_n_v:
@@ -3088,9 +3248,11 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
   case NEON::BI__builtin_neon_vst3_lane_v:
   case NEON::BI__builtin_neon_vst3q_lane_v:
   case NEON::BI__builtin_neon_vst4_lane_v:
-  case NEON::BI__builtin_neon_vst4q_lane_v:
-    Ops.push_back(Align);
-    return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "");
+  case NEON::BI__builtin_neon_vst4q_lane_v: {
+    llvm::Type *Tys[] = {Int8PtrTy, Ty};
+    Ops.push_back(getAlignmentValue32(PtrOp0));
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
+  }
   case NEON::BI__builtin_neon_vsubhn_v: {
     llvm::VectorType *SrcTy =
         llvm::VectorType::getExtendedElementVectorType(VTy);
@@ -3101,9 +3263,8 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
 
     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
-    Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
-                                       SrcTy->getScalarSizeInBits() / 2);
-    ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+    Constant *ShiftAmt =
+        ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
 
     // %res = trunc <4 x i32> %high to <4 x i16>
@@ -3125,7 +3286,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -3153,7 +3314,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -3173,7 +3334,7 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -3252,33 +3413,37 @@ static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
 }
 
 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
+  unsigned Value;
   switch (BuiltinID) {
   default:
     return nullptr;
   case ARM::BI__builtin_arm_nop:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 0));
+    Value = 0;
+    break;
   case ARM::BI__builtin_arm_yield:
   case ARM::BI__yield:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 1));
+    Value = 1;
+    break;
   case ARM::BI__builtin_arm_wfe:
   case ARM::BI__wfe:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 2));
+    Value = 2;
+    break;
   case ARM::BI__builtin_arm_wfi:
   case ARM::BI__wfi:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 3));
+    Value = 3;
+    break;
   case ARM::BI__builtin_arm_sev:
   case ARM::BI__sev:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 4));
+    Value = 4;
+    break;
   case ARM::BI__builtin_arm_sevl:
   case ARM::BI__sevl:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
-                              llvm::ConstantInt::get(Int32Ty, 5));
+    Value = 5;
+    break;
   }
+
+  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
+                            llvm::ConstantInt::get(Int32Ty, Value));
 }
 
 // Generates the IR for the read/write special register builtin,
@@ -3428,9 +3593,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   if (BuiltinID == ARM::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
-    SmallVector<Value*, 2> Ops;
+    Value *Ops[2];
     for (unsigned i = 0; i < 2; i++)
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+      Ops[i] = EmitScalarExpr(E->getArg(i));
     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
     StringRef Name = FD->getName();
@@ -3504,11 +3669,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                        : Intrinsic::arm_strexd);
     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
 
-    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
+    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
     Value *Val = EmitScalarExpr(E->getArg(0));
     Builder.CreateStore(Val, Tmp);
 
-    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
+    Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
     Val = Builder.CreateLoad(LdPtr);
 
     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
@@ -3627,8 +3792,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
   assert(Error == ASTContext::GE_None && "Should not codegen an error");
 
+  auto getAlignmentValue32 = [&](Address addr) -> Value* {
+    return Builder.getInt32(addr.getAlignment().getQuantity());
+  };
+
+  Address PtrOp0 = Address::invalid();
+  Address PtrOp1 = Address::invalid();
   SmallVector<Value*, 4> Ops;
-  llvm::Value *Align = nullptr;
   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
   for (unsigned i = 0, e = NumArgs; i != e; i++) {
@@ -3658,10 +3828,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       case NEON::BI__builtin_neon_vst4q_lane_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
-        std::pair<llvm::Value*, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(0));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
+        PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
+        Ops.push_back(PtrOp0.getPointer());
         continue;
       }
     }
@@ -3684,10 +3852,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       case NEON::BI__builtin_neon_vld4_dup_v:
         // Get the alignment for the argument in addition to the value;
         // we'll use it later.
-        std::pair<llvm::Value*, unsigned> Src =
-            EmitPointerWithAlignment(E->getArg(1));
-        Ops.push_back(Src.first);
-        Align = Builder.getInt32(Src.second);
+        PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
+        Ops.push_back(PtrOp1.getPointer());
         continue;
       }
     }
@@ -3798,7 +3964,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
-        Builtin->NameHint, Builtin->TypeModifier, E, Ops, Align);
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
 
   unsigned Int;
   switch (BuiltinID) {
@@ -3809,27 +3975,25 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     if (VTy->getElementType()->isIntegerTy(64)) {
       // Extract the other lane.
       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-      int Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
+      uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
       // Load the value as a one-element vector.
       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
-      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty);
+      llvm::Type *Tys[] = {Ty, Int8PtrTy};
+      Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
+      Value *Align = getAlignmentValue32(PtrOp0);
       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
       // Combine them.
-      SmallVector<Constant*, 2> Indices;
-      Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane));
-      Indices.push_back(ConstantInt::get(Int32Ty, Lane));
-      SV = llvm::ConstantVector::get(Indices);
+      uint32_t Indices[] = {1 - Lane, Lane};
+      SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
     }
     // fall through
   case NEON::BI__builtin_neon_vld1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ty = llvm::PointerType::getUnqual(VTy->getElementType());
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    LoadInst *Ld = Builder.CreateLoad(Ops[0]);
-    Ld->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+    PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
+    Value *Ld = Builder.CreateLoad(PtrOp0);
     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
   }
   case NEON::BI__builtin_neon_vld2_dup_v:
@@ -3849,11 +4013,13 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
         break;
       default: llvm_unreachable("unknown vld_dup intrinsic?");
       }
-      Function *F = CGM.getIntrinsic(Int, Ty);
+      llvm::Type *Tys[] = {Ty, Int8PtrTy};
+      Function *F = CGM.getIntrinsic(Int, Tys);
+      llvm::Value *Align = getAlignmentValue32(PtrOp1);
       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-      return Builder.CreateStore(Ops[1], Ops[0]);
+      return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
     }
     switch (BuiltinID) {
     case NEON::BI__builtin_neon_vld2_dup_v:
@@ -3867,7 +4033,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       break;
     default: llvm_unreachable("unknown vld_dup intrinsic?");
     }
-    Function *F = CGM.getIntrinsic(Int, Ty);
+    llvm::Type *Tys[] = {Ty, Int8PtrTy};
+    Function *F = CGM.getIntrinsic(Int, Tys);
     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
 
     SmallVector<Value*, 6> Args;
@@ -3876,7 +4043,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
     Args.push_back(CI);
-    Args.push_back(Align);
+    Args.push_back(getAlignmentValue32(PtrOp1));
 
     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
     // splat lane 0 to all elts in each vector of the result.
@@ -3889,7 +4056,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     }
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vqrshrn_n_v:
     Int =
@@ -3941,18 +4108,17 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
-      Ops[2] = Align;
+      Ops[2] = getAlignmentValue32(PtrOp0);
+      llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
-                                                 Ops[1]->getType()), Ops);
+                                                 Tys), Ops);
     }
     // fall through
   case NEON::BI__builtin_neon_vst1_lane_v: {
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    StoreInst *St = Builder.CreateStore(Ops[1],
-                                        Builder.CreateBitCast(Ops[0], Ty));
-    St->setAlignment(cast<ConstantInt>(Align)->getZExtValue());
+    auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
     return St;
   }
   case NEON::BI__builtin_neon_vtbl1_v:
@@ -4029,52 +4195,41 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
 
   // Determine the type of this overloaded NEON intrinsic.
   NeonTypeFlags Type(Result.getZExtValue());
-  llvm::VectorType *VTy = GetNeonType(&CGF, Type);
-  llvm::Type *Ty = VTy;
+  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
   if (!Ty)
     return nullptr;
 
-  unsigned nElts = VTy->getNumElements();
-
   CodeGen::CGBuilderTy &Builder = CGF.Builder;
 
   // AArch64 scalar builtins are not overloaded, they do not have an extra
   // argument that specifies the vector type, need to handle each case.
-  SmallVector<Value *, 2> TblOps;
   switch (BuiltinID) {
   case NEON::BI__builtin_neon_vtbl1_v: {
-    TblOps.push_back(Ops[0]);
-    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[1], Ty,
-                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
+                              Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
+                              "vtbl1");
   }
   case NEON::BI__builtin_neon_vtbl2_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
-                              Intrinsic::aarch64_neon_tbl1, "vtbl1");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
+                              Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
+                              "vtbl1");
   }
   case NEON::BI__builtin_neon_vtbl3_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[3], Ty,
-                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
+                              Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
+                              "vtbl2");
   }
   case NEON::BI__builtin_neon_vtbl4_v: {
-    TblOps.push_back(Ops[0]);
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    return packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
-                              Intrinsic::aarch64_neon_tbl2, "vtbl2");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
+                              Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
+                              "vtbl2");
   }
   case NEON::BI__builtin_neon_vtbx1_v: {
-    TblOps.push_back(Ops[1]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[2], Ty,
-                                       Intrinsic::aarch64_neon_tbl1, "vtbl1");
+    Value *TblRes =
+        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
+                           Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
 
-    llvm::Constant *Eight = ConstantInt::get(VTy->getElementType(), 8);
-    Value* EightV = llvm::ConstantVector::getSplat(nElts, Eight);
+    llvm::Constant *EightV = ConstantInt::get(Ty, 8);
     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
     CmpRes = Builder.CreateSExt(CmpRes, Ty);
 
@@ -4083,20 +4238,16 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
   }
   case NEON::BI__builtin_neon_vtbx2_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[3], Ty,
-                              Intrinsic::aarch64_neon_tbx1, "vtbx1");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
+                              Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
+                              "vtbx1");
   }
   case NEON::BI__builtin_neon_vtbx3_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    Value *TblRes = packTBLDVectorList(CGF, TblOps, nullptr, Ops[4], Ty,
-                                       Intrinsic::aarch64_neon_tbl2, "vtbl2");
+    Value *TblRes =
+        packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
+                           Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
 
-    llvm::Constant *TwentyFour = ConstantInt::get(VTy->getElementType(), 24);
-    Value* TwentyFourV = llvm::ConstantVector::getSplat(nElts, TwentyFour);
+    llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
                                            TwentyFourV);
     CmpRes = Builder.CreateSExt(CmpRes, Ty);
@@ -4106,12 +4257,9 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
   }
   case NEON::BI__builtin_neon_vtbx4_v: {
-    TblOps.push_back(Ops[1]);
-    TblOps.push_back(Ops[2]);
-    TblOps.push_back(Ops[3]);
-    TblOps.push_back(Ops[4]);
-    return packTBLDVectorList(CGF, TblOps, Ops[0], Ops[5], Ty,
-                              Intrinsic::aarch64_neon_tbx2, "vtbx2");
+    return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
+                              Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
+                              "vtbx2");
   }
   case NEON::BI__builtin_neon_vqtbl1_v:
   case NEON::BI__builtin_neon_vqtbl1q_v:
@@ -4156,15 +4304,6 @@ Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
   return Op;
 }
 
-Value *CodeGenFunction::vectorWrapScalar8(Value *Op) {
-  llvm::Type *VTy = llvm::VectorType::get(Int8Ty, 8);
-  Op = Builder.CreateBitCast(Op, Int8Ty);
-  Value *V = UndefValue::get(VTy);
-  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
-  Op = Builder.CreateInsertElement(V, Op, CI);
-  return Op;
-}
-
 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
                                                const CallExpr *E) {
   unsigned HintID = static_cast<unsigned>(-1);
@@ -4236,9 +4375,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   if (BuiltinID == AArch64::BI__clear_cache) {
     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
     const FunctionDecl *FD = E->getDirectCallee();
-    SmallVector<Value*, 2> Ops;
+    Value *Ops[2];
     for (unsigned i = 0; i < 2; i++)
-      Ops.push_back(EmitScalarExpr(E->getArg(i)));
+      Ops[i] = EmitScalarExpr(E->getArg(i));
     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
     StringRef Name = FD->getName();
@@ -4297,14 +4436,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
                                        : Intrinsic::aarch64_stxp);
     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
 
-    Value *One = llvm::ConstantInt::get(Int32Ty, 1);
-    Value *Tmp = Builder.CreateAlloca(ConvertType(E->getArg(0)->getType()),
-                                      One);
-    Value *Val = EmitScalarExpr(E->getArg(0));
-    Builder.CreateStore(Val, Tmp);
+    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
+    EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
 
-    Value *LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
-    Val = Builder.CreateLoad(LdPtr);
+    Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
+    llvm::Value *Val = Builder.CreateLoad(Tmp);
 
     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
@@ -4342,6 +4478,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateCall(F);
   }
 
+  if (BuiltinID == AArch64::BI__builtin_thread_pointer) {
+    Function *F = CGM.getIntrinsic(Intrinsic::aarch64_thread_pointer);
+    return Builder.CreateCall(F);
+  }
+
   // CRC32
   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
   switch (BuiltinID) {
@@ -4453,12 +4594,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vldrq_p128: {
     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
-    return Builder.CreateLoad(Ptr);
+    return Builder.CreateDefaultAlignedLoad(Ptr);
   }
   case NEON::BI__builtin_neon_vstrq_p128: {
     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
-    return Builder.CreateStore(EmitScalarExpr(E->getArg(1)), Ptr);
+    return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
   }
   case NEON::BI__builtin_neon_vcvts_u32_f32:
   case NEON::BI__builtin_neon_vcvtd_u64_f64:
@@ -4491,8 +4632,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateSIToFP(Ops[0], FTy);
   }
   case NEON::BI__builtin_neon_vpaddd_s64: {
-    llvm::Type *Ty =
-      llvm::VectorType::get(llvm::Type::getInt64Ty(getLLVMContext()), 2);
+    llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
     Value *Vec = EmitScalarExpr(E->getArg(0));
     // The vector is v2f64, so make sure it's bitcast to that.
     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
@@ -4505,7 +4645,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   }
   case NEON::BI__builtin_neon_vpaddd_f64: {
     llvm::Type *Ty =
-      llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2);
+      llvm::VectorType::get(DoubleTy, 2);
     Value *Vec = EmitScalarExpr(E->getArg(0));
     // The vector is v2f64, so make sure it's bitcast to that.
     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
@@ -4518,7 +4658,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   }
   case NEON::BI__builtin_neon_vpadds_f32: {
     llvm::Type *Ty =
-      llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2);
+      llvm::VectorType::get(FloatTy, 2);
     Value *Vec = EmitScalarExpr(E->getArg(0));
     // The vector is v2f32, so make sure it's bitcast to that.
     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
@@ -4566,12 +4706,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
 
   case NEON::BI__builtin_neon_vceqzd_u64: {
-    llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[0] = Builder.CreateICmp(llvm::ICmpInst::ICMP_EQ, Ops[0],
-                                llvm::Constant::getNullValue(Ty));
-    return Builder.CreateSExt(Ops[0], Ty, "vceqzd");
+    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
+    Ops[0] =
+        Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
+    return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
   }
   case NEON::BI__builtin_neon_vceqd_f64:
   case NEON::BI__builtin_neon_vcled_f64:
@@ -4645,14 +4784,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   }
   case NEON::BI__builtin_neon_vtstd_s64:
   case NEON::BI__builtin_neon_vtstd_u64: {
-    llvm::Type *Ty = llvm::Type::getInt64Ty(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
-                                llvm::Constant::getNullValue(Ty));
-    return Builder.CreateSExt(Ops[0], Ty, "vtstd");
+                                llvm::Constant::getNullValue(Int64Ty));
+    return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
   }
   case NEON::BI__builtin_neon_vset_lane_i8:
   case NEON::BI__builtin_neon_vset_lane_i16:
@@ -4675,89 +4813,80 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vsetq_lane_f64:
     // The vector type needs a cast for the v2f64 variant.
     Ops[1] = Builder.CreateBitCast(Ops[1],
-        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+        llvm::VectorType::get(DoubleTy, 2));
     Ops.push_back(EmitScalarExpr(E->getArg(2)));
     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
 
   case NEON::BI__builtin_neon_vget_lane_i8:
   case NEON::BI__builtin_neon_vdupb_lane_i8:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vgetq_lane_i8:
   case NEON::BI__builtin_neon_vdupb_laneq_i8:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vget_lane_i16:
   case NEON::BI__builtin_neon_vduph_lane_i16:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vgetq_lane_i16:
   case NEON::BI__builtin_neon_vduph_laneq_i16:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vget_lane_i32:
   case NEON::BI__builtin_neon_vdups_lane_i32:
-    Ops[0] = Builder.CreateBitCast(
-        Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 2));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vdups_lane_f32:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+        llvm::VectorType::get(FloatTy, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vdups_lane");
   case NEON::BI__builtin_neon_vgetq_lane_i32:
   case NEON::BI__builtin_neon_vdups_laneq_i32:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 32), 4));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vget_lane_i64:
   case NEON::BI__builtin_neon_vdupd_lane_i64:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 1));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vdupd_lane_f64:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+        llvm::VectorType::get(DoubleTy, 1));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vdupd_lane");
   case NEON::BI__builtin_neon_vgetq_lane_i64:
   case NEON::BI__builtin_neon_vdupd_laneq_i64:
-    Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 64), 2));
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vget_lane_f32:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 2));
+        llvm::VectorType::get(FloatTy, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vget_lane_f64:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 1));
+        llvm::VectorType::get(DoubleTy, 1));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vget_lane");
   case NEON::BI__builtin_neon_vgetq_lane_f32:
   case NEON::BI__builtin_neon_vdups_laneq_f32:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getFloatTy(getLLVMContext()), 4));
+        llvm::VectorType::get(FloatTy, 4));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vgetq_lane_f64:
   case NEON::BI__builtin_neon_vdupd_laneq_f64:
     Ops[0] = Builder.CreateBitCast(Ops[0],
-        llvm::VectorType::get(llvm::Type::getDoubleTy(getLLVMContext()), 2));
+        llvm::VectorType::get(DoubleTy, 2));
     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
                                         "vgetq_lane");
   case NEON::BI__builtin_neon_vaddd_s64:
@@ -4930,7 +5059,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   if (Builtin)
     return EmitCommonNeonBuiltinExpr(
         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
-        Builtin->NameHint, Builtin->TypeModifier, E, Ops, nullptr);
+        Builtin->NameHint, Builtin->TypeModifier, E, Ops,
+        /*never use addresses*/ Address::invalid(), Address::invalid());
 
   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
     return V;
@@ -5096,15 +5226,13 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Int = Intrinsic::aarch64_neon_fmaxnm;
     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
   case NEON::BI__builtin_neon_vrecpss_f32: {
-    llvm::Type *f32Type = llvm::Type::getFloatTy(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f32Type),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
                         Ops, "vrecps");
   }
   case NEON::BI__builtin_neon_vrecpsd_f64: {
-    llvm::Type *f64Type = llvm::Type::getDoubleTy(getLLVMContext());
     Ops.push_back(EmitScalarExpr(E->getArg(1)));
-    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, f64Type),
+    return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
                         Ops, "vrecps");
   }
   case NEON::BI__builtin_neon_vqshrun_n_v:
@@ -5207,13 +5335,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtq_u32_v:
   case NEON::BI__builtin_neon_vcvtq_s64_v:
   case NEON::BI__builtin_neon_vcvtq_u64_v: {
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, quad));
-    Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
     if (usgn)
       return Builder.CreateFPToUI(Ops[0], Ty);
     return Builder.CreateFPToSI(Ops[0], Ty);
@@ -5227,13 +5349,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvta_u64_v:
   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { Ty, InTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
   }
   case NEON::BI__builtin_neon_vcvtm_s32_v:
@@ -5245,13 +5361,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtm_u64_v:
   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { Ty, InTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
   }
   case NEON::BI__builtin_neon_vcvtn_s32_v:
@@ -5263,13 +5373,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtn_u64_v:
   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { Ty, InTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
   }
   case NEON::BI__builtin_neon_vcvtp_s32_v:
@@ -5281,13 +5385,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vcvtp_u64_v:
   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
-    bool Double =
-      (cast<llvm::IntegerType>(VTy->getElementType())->getBitWidth() == 64);
-    llvm::Type *InTy =
-      GetNeonType(this,
-                  NeonTypeFlags(Double ? NeonTypeFlags::Float64
-                                : NeonTypeFlags::Float32, false, quad));
-    llvm::Type *Tys[2] = { Ty, InTy };
+    llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
   }
   case NEON::BI__builtin_neon_vmulx_v:
@@ -5338,232 +5436,192 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddv_s8: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vaddv_u16:
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddv_s16: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vaddvq_u8:
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddvq_s8: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vaddvq_u16:
     usgn = true;
     // FALLTHROUGH
   case NEON::BI__builtin_neon_vaddvq_s16: {
     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vmaxv_u8: {
     Int = Intrinsic::aarch64_neon_umaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vmaxv_u16: {
     Int = Intrinsic::aarch64_neon_umaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vmaxvq_u8: {
     Int = Intrinsic::aarch64_neon_umaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vmaxvq_u16: {
     Int = Intrinsic::aarch64_neon_umaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vmaxv_s8: {
     Int = Intrinsic::aarch64_neon_smaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vmaxv_s16: {
     Int = Intrinsic::aarch64_neon_smaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vmaxvq_s8: {
     Int = Intrinsic::aarch64_neon_smaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vmaxvq_s16: {
     Int = Intrinsic::aarch64_neon_smaxv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vminv_u8: {
     Int = Intrinsic::aarch64_neon_uminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vminv_u16: {
     Int = Intrinsic::aarch64_neon_uminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vminvq_u8: {
     Int = Intrinsic::aarch64_neon_uminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vminvq_u16: {
     Int = Intrinsic::aarch64_neon_uminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vminv_s8: {
     Int = Intrinsic::aarch64_neon_sminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vminv_s16: {
     Int = Intrinsic::aarch64_neon_sminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vminvq_s8: {
     Int = Intrinsic::aarch64_neon_sminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 8));
+    return Builder.CreateTrunc(Ops[0], Int8Ty);
   }
   case NEON::BI__builtin_neon_vminvq_s16: {
     Int = Intrinsic::aarch64_neon_sminv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vmul_n_f64: {
     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
@@ -5572,80 +5630,68 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   }
   case NEON::BI__builtin_neon_vaddlv_u8: {
     Int = Intrinsic::aarch64_neon_uaddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vaddlv_u16: {
     Int = Intrinsic::aarch64_neon_uaddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlvq_u8: {
     Int = Intrinsic::aarch64_neon_uaddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vaddlvq_u16: {
     Int = Intrinsic::aarch64_neon_uaddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlv_s8: {
     Int = Intrinsic::aarch64_neon_saddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vaddlv_s16: {
     Int = Intrinsic::aarch64_neon_saddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 4);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 4);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
   }
   case NEON::BI__builtin_neon_vaddlvq_s8: {
     Int = Intrinsic::aarch64_neon_saddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 8), 16);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int8Ty, 16);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
-    return Builder.CreateTrunc(Ops[0],
-             llvm::IntegerType::get(getLLVMContext(), 16));
+    return Builder.CreateTrunc(Ops[0], Int16Ty);
   }
   case NEON::BI__builtin_neon_vaddlvq_s16: {
     Int = Intrinsic::aarch64_neon_saddlv;
-    Ty = llvm::IntegerType::get(getLLVMContext(), 32);
-    VTy =
-      llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 16), 8);
+    Ty = Int32Ty;
+    VTy = llvm::VectorType::get(Int16Ty, 8);
     llvm::Type *Tys[2] = { Ty, VTy };
     Ops.push_back(EmitScalarExpr(E->getArg(0)));
     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
@@ -5708,7 +5754,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vst1_x2_v:
   case NEON::BI__builtin_neon_vst1q_x2_v:
@@ -5733,32 +5779,31 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Int = Intrinsic::aarch64_neon_st1x4;
       break;
     }
-    SmallVector<Value *, 4> IntOps(Ops.begin()+1, Ops.end());
-    IntOps.push_back(Ops[0]);
-    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), IntOps, "");
+    std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
+    return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
   }
   case NEON::BI__builtin_neon_vld1_v:
   case NEON::BI__builtin_neon_vld1q_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
-    return Builder.CreateLoad(Ops[0]);
+    return Builder.CreateDefaultAlignedLoad(Ops[0]);
   case NEON::BI__builtin_neon_vst1_v:
   case NEON::BI__builtin_neon_vst1q_v:
     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   case NEON::BI__builtin_neon_vld1_lane_v:
   case NEON::BI__builtin_neon_vld1q_lane_v:
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[0] = Builder.CreateLoad(Ops[0]);
+    Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
   case NEON::BI__builtin_neon_vld1_dup_v:
   case NEON::BI__builtin_neon_vld1q_dup_v: {
     Value *V = UndefValue::get(Ty);
     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    Ops[0] = Builder.CreateLoad(Ops[0]);
+    Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
     return EmitNeonSplat(Ops[0], CI);
@@ -5768,7 +5813,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
-    return Builder.CreateStore(Ops[1], Builder.CreateBitCast(Ops[0], Ty));
+    return Builder.CreateDefaultAlignedStore(Ops[1],
+                                             Builder.CreateBitCast(Ops[0], Ty));
   case NEON::BI__builtin_neon_vld2_v:
   case NEON::BI__builtin_neon_vld2q_v: {
     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
@@ -5778,7 +5824,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld3_v:
   case NEON::BI__builtin_neon_vld3q_v: {
@@ -5789,7 +5835,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld4_v:
   case NEON::BI__builtin_neon_vld4q_v: {
@@ -5800,7 +5846,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld2_dup_v:
   case NEON::BI__builtin_neon_vld2q_dup_v: {
@@ -5812,7 +5858,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld3_dup_v:
   case NEON::BI__builtin_neon_vld3q_dup_v: {
@@ -5824,7 +5870,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld4_dup_v:
   case NEON::BI__builtin_neon_vld4q_dup_v: {
@@ -5836,7 +5882,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
     Ops[0] = Builder.CreateBitCast(Ops[0],
                 llvm::PointerType::getUnqual(Ops[1]->getType()));
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld2_lane_v:
   case NEON::BI__builtin_neon_vld2q_lane_v: {
@@ -5846,12 +5892,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops.erase(Ops.begin()+1);
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Ops[3] = Builder.CreateZExt(Ops[3],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld3_lane_v:
   case NEON::BI__builtin_neon_vld3q_lane_v: {
@@ -5862,12 +5907,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
-    Ops[4] = Builder.CreateZExt(Ops[4],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vld4_lane_v:
   case NEON::BI__builtin_neon_vld4q_lane_v: {
@@ -5879,12 +5923,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
-    Ops[5] = Builder.CreateZExt(Ops[5],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case NEON::BI__builtin_neon_vst2_v:
   case NEON::BI__builtin_neon_vst2q_v: {
@@ -5898,8 +5941,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vst2q_lane_v: {
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
-    Ops[2] = Builder.CreateZExt(Ops[2],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
                         Ops, "");
@@ -5916,8 +5958,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vst3q_lane_v: {
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
-    Ops[3] = Builder.CreateZExt(Ops[3],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
                         Ops, "");
@@ -5934,8 +5975,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
   case NEON::BI__builtin_neon_vst4q_lane_v: {
     Ops.push_back(Ops[0]);
     Ops.erase(Ops.begin());
-    Ops[4] = Builder.CreateZExt(Ops[4],
-                llvm::IntegerType::get(getLLVMContext(), 64));
+    Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
                         Ops, "");
@@ -5956,7 +5996,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -5975,7 +6015,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -5995,7 +6035,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
       SV = llvm::ConstantVector::get(Indices);
       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
-      SV = Builder.CreateStore(SV, Addr);
+      SV = Builder.CreateDefaultAlignedStore(SV, Addr);
     }
     return SV;
   }
@@ -6072,6 +6112,31 @@ BuildVector(ArrayRef<llvm::Value*> Ops) {
 
 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
+  if (BuiltinID == X86::BI__builtin_ms_va_start ||
+      BuiltinID == X86::BI__builtin_ms_va_end)
+    return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
+                          BuiltinID == X86::BI__builtin_ms_va_start);
+  if (BuiltinID == X86::BI__builtin_ms_va_copy) {
+    // Lower this manually. We can't reliably determine whether or not any
+    // given va_copy() is for a Win64 va_list from the calling convention
+    // alone, because it's legal to do this from a System V ABI function.
+    // With opaque pointer types, we won't have enough information in LLVM
+    // IR to determine this from the argument types, either. Best to do it
+    // now, while we have enough information.
+    Address DestAddr = EmitMSVAListRef(E->getArg(0));
+    Address SrcAddr = EmitMSVAListRef(E->getArg(1));
+
+    llvm::Type *BPP = Int8PtrPtrTy;
+
+    DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
+                       DestAddr.getAlignment());
+    SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
+                      SrcAddr.getAlignment());
+
+    Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
+    return Builder.CreateStore(ArgPtr, DestAddr);
+  }
+
   SmallVector<Value*, 4> Ops;
 
   // Find out if any arguments are required to be integer constant expressions.
@@ -6167,7 +6232,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       ConstantInt::get(Int32Ty, 0)
     };
     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
-    Value *Features = Builder.CreateLoad(CpuFeatures);
+    Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
+                                                CharUnits::fromQuantity(4));
 
     // Check the value of the bit corresponding to the feature requested.
     Value *Bitset = Builder.CreateAnd(
@@ -6175,13 +6241,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
   }
   case X86::BI_mm_prefetch: {
-    Value *Address = EmitScalarExpr(E->getArg(0));
+    Value *Address = Ops[0];
     Value *RW = ConstantInt::get(Int32Ty, 0);
-    Value *Locality = EmitScalarExpr(E->getArg(1));
+    Value *Locality = Ops[1];
     Value *Data = ConstantInt::get(Int32Ty, 1);
     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
     return Builder.CreateCall(F, {Address, RW, Locality, Data});
   }
+  case X86::BI__builtin_ia32_undef128:
+  case X86::BI__builtin_ia32_undef256:
+  case X86::BI__builtin_ia32_undef512:
+    return UndefValue::get(ConvertType(E->getType()));
   case X86::BI__builtin_ia32_vec_init_v8qi:
   case X86::BI__builtin_ia32_vec_init_v4hi:
   case X86::BI__builtin_ia32_vec_init_v2si:
@@ -6191,17 +6261,57 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return Builder.CreateExtractElement(Ops[0],
                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
   case X86::BI__builtin_ia32_ldmxcsr: {
-    Value *Tmp = CreateMemTemp(E->getArg(0)->getType());
+    Address Tmp = CreateMemTemp(E->getArg(0)->getType());
     Builder.CreateStore(Ops[0], Tmp);
     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
-                              Builder.CreateBitCast(Tmp, Int8PtrTy));
+                          Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
   }
   case X86::BI__builtin_ia32_stmxcsr: {
-    Value *Tmp = CreateMemTemp(E->getType());
+    Address Tmp = CreateMemTemp(E->getType());
     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
-                       Builder.CreateBitCast(Tmp, Int8PtrTy));
+                       Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
     return Builder.CreateLoad(Tmp, "stmxcsr");
   }
+  case X86::BI__builtin_ia32_xsave:
+  case X86::BI__builtin_ia32_xsave64:
+  case X86::BI__builtin_ia32_xrstor:
+  case X86::BI__builtin_ia32_xrstor64:
+  case X86::BI__builtin_ia32_xsaveopt:
+  case X86::BI__builtin_ia32_xsaveopt64:
+  case X86::BI__builtin_ia32_xrstors:
+  case X86::BI__builtin_ia32_xrstors64:
+  case X86::BI__builtin_ia32_xsavec:
+  case X86::BI__builtin_ia32_xsavec64:
+  case X86::BI__builtin_ia32_xsaves:
+  case X86::BI__builtin_ia32_xsaves64: {
+    Intrinsic::ID ID;
+#define INTRINSIC_X86_XSAVE_ID(NAME) \
+    case X86::BI__builtin_ia32_##NAME: \
+      ID = Intrinsic::x86_##NAME; \
+      break
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    INTRINSIC_X86_XSAVE_ID(xsave);
+    INTRINSIC_X86_XSAVE_ID(xsave64);
+    INTRINSIC_X86_XSAVE_ID(xrstor);
+    INTRINSIC_X86_XSAVE_ID(xrstor64);
+    INTRINSIC_X86_XSAVE_ID(xsaveopt);
+    INTRINSIC_X86_XSAVE_ID(xsaveopt64);
+    INTRINSIC_X86_XSAVE_ID(xrstors);
+    INTRINSIC_X86_XSAVE_ID(xrstors64);
+    INTRINSIC_X86_XSAVE_ID(xsavec);
+    INTRINSIC_X86_XSAVE_ID(xsavec64);
+    INTRINSIC_X86_XSAVE_ID(xsaves);
+    INTRINSIC_X86_XSAVE_ID(xsaves64);
+    }
+#undef INTRINSIC_X86_XSAVE_ID
+    Value *Mhi = Builder.CreateTrunc(
+      Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
+    Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
+    Ops[1] = Mhi;
+    Ops.push_back(Mlo);
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
+  }
   case X86::BI__builtin_ia32_storehps:
   case X86::BI__builtin_ia32_storelps: {
     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
@@ -6217,7 +6327,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // cast pointer to i64 & store
     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
-    return Builder.CreateStore(Ops[1], Ops[0]);
+    return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
   }
   case X86::BI__builtin_ia32_palignr128:
   case X86::BI__builtin_ia32_palignr256: {
@@ -6242,18 +6352,19 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
     }
 
-    SmallVector<llvm::Constant*, 32> Indices;
+    uint32_t Indices[32];
     // 256-bit palignr operates on 128-bit lanes so we need to handle that
     for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
       for (unsigned i = 0; i != NumLaneElts; ++i) {
         unsigned Idx = ShiftVal + i;
         if (Idx >= NumLaneElts)
           Idx += NumElts - NumLaneElts; // End of lane, switch operand.
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
+        Indices[l + i] = Idx + l;
       }
     }
 
-    Value* SV = llvm::ConstantVector::get(Indices);
+    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(),
+                                              makeArrayRef(Indices, NumElts));
     return Builder.CreateShuffleVector(Ops[1], Ops[0], SV, "palignr");
   }
   case X86::BI__builtin_ia32_pslldqi256: {
@@ -6264,13 +6375,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     if (shiftVal >= 16)
       return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
-    SmallVector<llvm::Constant*, 32> Indices;
+    uint32_t Indices[32];
     // 256-bit pslldq operates on 128-bit lanes so we need to handle that
     for (unsigned l = 0; l != 32; l += 16) {
       for (unsigned i = 0; i != 16; ++i) {
         unsigned Idx = 32 + i - shiftVal;
         if (Idx < 32) Idx -= 16; // end of lane, switch operand.
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
+        Indices[l + i] = Idx + l;
       }
     }
 
@@ -6278,7 +6389,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
     Value *Zero = llvm::Constant::getNullValue(VecTy);
 
-    Value *SV = llvm::ConstantVector::get(Indices);
+    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
     SV = Builder.CreateShuffleVector(Zero, Ops[0], SV, "pslldq");
     llvm::Type *ResultType = ConvertType(E->getType());
     return Builder.CreateBitCast(SV, ResultType, "cast");
@@ -6291,13 +6402,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     if (shiftVal >= 16)
       return llvm::Constant::getNullValue(ConvertType(E->getType()));
 
-    SmallVector<llvm::Constant*, 32> Indices;
+    uint32_t Indices[32];
     // 256-bit psrldq operates on 128-bit lanes so we need to handle that
     for (unsigned l = 0; l != 32; l += 16) {
       for (unsigned i = 0; i != 16; ++i) {
         unsigned Idx = i + shiftVal;
         if (Idx >= 16) Idx += 16; // end of lane, switch operand.
-        Indices.push_back(llvm::ConstantInt::get(Int32Ty, Idx + l));
+        Indices[l + i] = Idx + l;
       }
     }
 
@@ -6305,7 +6416,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Ops[0] = Builder.CreateBitCast(Ops[0], VecTy, "cast");
     Value *Zero = llvm::Constant::getNullValue(VecTy);
 
-    Value *SV = llvm::ConstantVector::get(Indices);
+    Value *SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
     SV = Builder.CreateShuffleVector(Ops[0], Zero, SV, "psrldq");
     llvm::Type *ResultType = ConvertType(E->getType());
     return Builder.CreateBitCast(SV, ResultType, "cast");
@@ -6325,7 +6436,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     Value *BC = Builder.CreateBitCast(Ops[0],
                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
                                       "cast");
-    StoreInst *SI = Builder.CreateStore(Ops[1], BC);
+    StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
 
     // If the operand is an integer, we can't assume alignment. Otherwise,
@@ -6377,7 +6488,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     }
 
     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
-    Builder.CreateStore(Builder.CreateExtractValue(Call, 0), Ops[0]);
+    Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
+                                      Ops[0]);
     return Builder.CreateExtractValue(Call, 1);
   }
   // SSE comparison intrisics
@@ -6544,6 +6656,11 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
   switch (BuiltinID) {
   default: return nullptr;
 
+  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
+  // call __builtin_readcyclecounter.
+  case PPC::BI__builtin_ppc_get_timebase:
+    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
+
   // vec_ld, vec_lvsl, vec_lvsr
   case PPC::BI__builtin_altivec_lvx:
   case PPC::BI__builtin_altivec_lvxl:
@@ -6775,8 +6892,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     // Translate from the intrinsics's struct return to the builtin's out
     // argument.
 
-    std::pair<llvm::Value *, unsigned> FlagOutPtr
-      = EmitPointerWithAlignment(E->getArg(3));
+    Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
 
     llvm::Value *X = EmitScalarExpr(E->getArg(0));
     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
@@ -6791,11 +6907,10 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
 
     llvm::Type *RealFlagType
-      = FlagOutPtr.first->getType()->getPointerElementType();
+      = FlagOutPtr.getPointer()->getType()->getPointerElementType();
 
     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
-    llvm::StoreInst *FlagStore = Builder.CreateStore(FlagExt, FlagOutPtr.first);
-    FlagStore->setAlignment(FlagOutPtr.second);
+    Builder.CreateStore(FlagExt, FlagOutPtr);
     return Result;
   }
   case AMDGPU::BI__builtin_amdgpu_div_fmas:
@@ -6846,7 +6961,7 @@ static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
   SmallVector<Value *, 8> Args(NumArgs);
   for (unsigned I = 0; I < NumArgs; ++I)
     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
-  Value *CCPtr = CGF.EmitScalarExpr(E->getArg(NumArgs));
+  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
   Value *Call = CGF.Builder.CreateCall(F, Args);
   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
@@ -7115,23 +7230,29 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
   case NVPTX::BI__nvvm_atom_max_gen_i:
   case NVPTX::BI__nvvm_atom_max_gen_l:
   case NVPTX::BI__nvvm_atom_max_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
+
   case NVPTX::BI__nvvm_atom_max_gen_ui:
   case NVPTX::BI__nvvm_atom_max_gen_ul:
   case NVPTX::BI__nvvm_atom_max_gen_ull:
-    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
 
   case NVPTX::BI__nvvm_atom_min_gen_i:
   case NVPTX::BI__nvvm_atom_min_gen_l:
   case NVPTX::BI__nvvm_atom_min_gen_ll:
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
+
   case NVPTX::BI__nvvm_atom_min_gen_ui:
   case NVPTX::BI__nvvm_atom_min_gen_ul:
   case NVPTX::BI__nvvm_atom_min_gen_ull:
-    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
+    return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
 
   case NVPTX::BI__nvvm_atom_cas_gen_i:
   case NVPTX::BI__nvvm_atom_cas_gen_l:
   case NVPTX::BI__nvvm_atom_cas_gen_ll:
-    return MakeAtomicCmpXchgValue(*this, E, true);
+    // __nvvm_atom_cas_gen_* should return the old value rather than the
+    // success flag.
+    return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
 
   case NVPTX::BI__nvvm_atom_add_gen_f: {
     Value *Ptr = EmitScalarExpr(E->getArg(0));
@@ -7147,3 +7268,22 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
     return nullptr;
   }
 }
+
+Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
+                                                   const CallExpr *E) {
+  switch (BuiltinID) {
+  case WebAssembly::BI__builtin_wasm_memory_size: {
+    llvm::Type *ResultType = ConvertType(E->getType());
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
+    return Builder.CreateCall(Callee);
+  }
+  case WebAssembly::BI__builtin_wasm_grow_memory: {
+    Value *X = EmitScalarExpr(E->getArg(0));
+    Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
+    return Builder.CreateCall(Callee, X);
+  }
+
+  default:
+    return nullptr;
+  }
+}
diff --git a/lib/CodeGen/CGCUDANV.cpp b/lib/CodeGen/CGCUDANV.cpp
index 67d0ab7a82f7..045e19b189dc 100644
--- a/lib/CodeGen/CGCUDANV.cpp
+++ b/lib/CodeGen/CGCUDANV.cpp
@@ -57,9 +57,9 @@ private:
                                      unsigned Alignment = 0) {
     llvm::Constant *Zeros[] = {llvm::ConstantInt::get(SizeTy, 0),
                                llvm::ConstantInt::get(SizeTy, 0)};
-    auto *ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
-    return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
-                                                ConstStr, Zeros);
+    auto ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
+    return llvm::ConstantExpr::getGetElementPtr(ConstStr.getElementType(),
+                                                ConstStr.getPointer(), Zeros);
  }
 
   void emitDeviceStubBody(CodeGenFunction &CGF, FunctionArgList &Args);
@@ -121,7 +121,7 @@ void CGNVCUDARuntime::emitDeviceStubBody(CodeGenFunction &CGF,
   std::vector<llvm::Type *> ArgTypes;
   for (FunctionArgList::const_iterator I = Args.begin(), E = Args.end();
        I != E; ++I) {
-    llvm::Value *V = CGF.GetAddrOfLocalVar(*I);
+    llvm::Value *V = CGF.GetAddrOfLocalVar(*I).getPointer();
     ArgValues.push_back(V);
     assert(isa<llvm::PointerType>(V->getType()) && "Arg type not PointerType");
     ArgTypes.push_back(cast<llvm::PointerType>(V->getType())->getElementType());
@@ -173,7 +173,7 @@ llvm::Function *CGNVCUDARuntime::makeRegisterKernelsFn() {
       llvm::GlobalValue::InternalLinkage, "__cuda_register_kernels", &TheModule);
   llvm::BasicBlock *EntryBB =
       llvm::BasicBlock::Create(Context, "entry", RegisterKernelsFunc);
-  CGBuilderTy Builder(Context);
+  CGBuilderTy Builder(CGM, Context);
   Builder.SetInsertPoint(EntryBB);
 
   // void __cudaRegisterFunction(void **, const char *, char *, const char *,
@@ -230,7 +230,7 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
       llvm::GlobalValue::InternalLinkage, "__cuda_module_ctor", &TheModule);
   llvm::BasicBlock *CtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleCtorFunc);
-  CGBuilderTy CtorBuilder(Context);
+  CGBuilderTy CtorBuilder(CGM, Context);
 
   CtorBuilder.SetInsertPoint(CtorEntryBB);
 
@@ -267,7 +267,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
     llvm::GlobalVariable *GpuBinaryHandle = new llvm::GlobalVariable(
         TheModule, VoidPtrPtrTy, false, llvm::GlobalValue::InternalLinkage,
         llvm::ConstantPointerNull::get(VoidPtrPtrTy), "__cuda_gpubin_handle");
-    CtorBuilder.CreateStore(RegisterFatbinCall, GpuBinaryHandle, false);
+    CtorBuilder.CreateAlignedStore(RegisterFatbinCall, GpuBinaryHandle,
+                                   CGM.getPointerAlign());
 
     // Call __cuda_register_kernels(GpuBinaryHandle);
     CtorBuilder.CreateCall(RegisterKernelsFunc, RegisterFatbinCall);
@@ -300,12 +301,13 @@ llvm::Function *CGNVCUDARuntime::makeModuleDtorFunction() {
       llvm::GlobalValue::InternalLinkage, "__cuda_module_dtor", &TheModule);
   llvm::BasicBlock *DtorEntryBB =
       llvm::BasicBlock::Create(Context, "entry", ModuleDtorFunc);
-  CGBuilderTy DtorBuilder(Context);
+  CGBuilderTy DtorBuilder(CGM, Context);
   DtorBuilder.SetInsertPoint(DtorEntryBB);
 
   for (llvm::GlobalVariable *GpuBinaryHandle : GpuBinaryHandles) {
-    DtorBuilder.CreateCall(UnregisterFatbinFunc,
-                           DtorBuilder.CreateLoad(GpuBinaryHandle, false));
+    auto HandleValue =
+      DtorBuilder.CreateAlignedLoad(GpuBinaryHandle, CGM.getPointerAlign());
+    DtorBuilder.CreateCall(UnregisterFatbinFunc, HandleValue);
   }
 
   DtorBuilder.CreateRetVoid();
diff --git a/lib/CodeGen/CGCXX.cpp b/lib/CodeGen/CGCXX.cpp
index 7d7ed784b181..6847df9b749b 100644
--- a/lib/CodeGen/CGCXX.cpp
+++ b/lib/CodeGen/CGCXX.cpp
@@ -28,6 +28,7 @@
 using namespace clang;
 using namespace CodeGen;
 
+
 /// Try to emit a base destructor as an alias to its primary
 /// base-class destructor.
 bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
@@ -39,6 +40,12 @@ bool CodeGenModule::TryEmitBaseDestructorAsAlias(const CXXDestructorDecl *D) {
   if (getCodeGenOpts().OptimizationLevel == 0)
     return true;
 
+  // If sanitizing memory to check for use-after-dtor, do not emit as
+  //  an alias, unless this class owns no members.
+  if (getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+      !D->getParent()->field_empty())
+    return true;
+
   // If the destructor doesn't have a trivial body, we have to emit it
   // separately.
   if (!D->hasTrivialBody())
@@ -124,11 +131,6 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
   if (!llvm::GlobalAlias::isValidLinkage(Linkage))
     return true;
 
-  // Don't create a weak alias for a dllexport'd symbol.
-  if (AliasDecl.getDecl()->hasAttr<DLLExportAttr>() &&
-      llvm::GlobalValue::isWeakForLinker(Linkage))
-    return true;
-
   llvm::GlobalValue::LinkageTypes TargetLinkage =
       getFunctionLinkage(TargetDecl);
 
@@ -141,8 +143,8 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     return false;
 
   // Derive the type for the alias.
-  llvm::PointerType *AliasType
-    = getTypes().GetFunctionType(AliasDecl)->getPointerTo();
+  llvm::Type *AliasValueType = getTypes().GetFunctionType(AliasDecl);
+  llvm::PointerType *AliasType = AliasValueType->getPointerTo();
 
   // Find the referent.  Some aliases might require a bitcast, in
   // which case the caller is responsible for ensuring the soundness
@@ -166,6 +168,16 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     return false;
   }
 
+  // If we have a weak, non-discardable alias (weak, weak_odr), like an extern
+  // template instantiation or a dllexported class, avoid forming it on COFF.
+  // A COFF weak external alias cannot satisfy a normal undefined symbol
+  // reference from another TU. The other TU must also mark the referenced
+  // symbol as weak, which we cannot rely on.
+  if (llvm::GlobalValue::isWeakForLinker(Linkage) &&
+      getTriple().isOSBinFormatCOFF()) {
+    return true;
+  }
+
   if (!InEveryTU) {
     // If we don't have a definition for the destructor yet, don't
     // emit.  We can't emit aliases to declarations; that's just not
@@ -182,8 +194,8 @@ bool CodeGenModule::TryEmitDefinitionAsAlias(GlobalDecl AliasDecl,
     return true;
 
   // Create the alias with no name.
-  auto *Alias =
-      llvm::GlobalAlias::create(AliasType, Linkage, "", Aliasee, &getModule());
+  auto *Alias = llvm::GlobalAlias::create(AliasValueType, 0, Linkage, "",
+                                          Aliasee, &getModule());
 
   // Switch any previous uses to the alias.
   if (Entry) {
@@ -207,7 +219,8 @@ llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD,
   const CGFunctionInfo &FnInfo =
       getTypes().arrangeCXXStructorDeclaration(MD, Type);
   auto *Fn = cast<llvm::Function>(
-      getAddrOfCXXStructor(MD, Type, &FnInfo, nullptr, true));
+      getAddrOfCXXStructor(MD, Type, &FnInfo, /*FnType=*/nullptr,
+                           /*DontDefer=*/true, /*IsForDefinition=*/true));
 
   GlobalDecl GD;
   if (const auto *DD = dyn_cast<CXXDestructorDecl>(MD)) {
@@ -226,9 +239,9 @@ llvm::Function *CodeGenModule::codegenCXXStructor(const CXXMethodDecl *MD,
   return Fn;
 }
 
-llvm::GlobalValue *CodeGenModule::getAddrOfCXXStructor(
+llvm::Constant *CodeGenModule::getAddrOfCXXStructor(
     const CXXMethodDecl *MD, StructorType Type, const CGFunctionInfo *FnInfo,
-    llvm::FunctionType *FnType, bool DontDefer) {
+    llvm::FunctionType *FnType, bool DontDefer, bool IsForDefinition) {
   GlobalDecl GD;
   if (auto *CD = dyn_cast<CXXConstructorDecl>(MD)) {
     GD = GlobalDecl(CD, toCXXCtorType(Type));
@@ -236,19 +249,15 @@ llvm::GlobalValue *CodeGenModule::getAddrOfCXXStructor(
     GD = GlobalDecl(cast<CXXDestructorDecl>(MD), toCXXDtorType(Type));
   }
 
-  StringRef Name = getMangledName(GD);
-  if (llvm::GlobalValue *Existing = GetGlobalValue(Name))
-    return Existing;
-
   if (!FnType) {
     if (!FnInfo)
       FnInfo = &getTypes().arrangeCXXStructorDeclaration(MD, Type);
     FnType = getTypes().GetFunctionType(*FnInfo);
   }
 
-  return cast<llvm::Function>(GetOrCreateLLVMFunction(Name, FnType, GD,
-                                                      /*ForVTable=*/false,
-                                                      DontDefer));
+  return GetOrCreateLLVMFunction(
+      getMangledName(GD), FnType, GD, /*ForVTable=*/false, DontDefer,
+      /*isThunk=*/false, /*ExtraAttrs=*/llvm::AttributeSet(), IsForDefinition);
 }
 
 static llvm::Value *BuildAppleKextVirtualCall(CodeGenFunction &CGF,
@@ -270,7 +279,7 @@ static llvm::Value *BuildAppleKextVirtualCall(CodeGenFunction &CGF,
   VTableIndex += AddressPoint;
   llvm::Value *VFuncPtr =
     CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfnkxt");
-  return CGF.Builder.CreateLoad(VFuncPtr);
+  return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.PointerAlignInBytes);
 }
 
 /// BuildAppleKextVirtualCall - This routine is to support gcc's kext ABI making
diff --git a/lib/CodeGen/CGCXXABI.cpp b/lib/CodeGen/CGCXXABI.cpp
index dc16616df9c5..e4da447eddc7 100644
--- a/lib/CodeGen/CGCXXABI.cpp
+++ b/lib/CodeGen/CGCXXABI.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCXXABI.h"
+#include "CGCleanup.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -73,25 +74,28 @@ CGCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
 }
 
 llvm::Value *CGCXXABI::EmitLoadOfMemberFunctionPointer(
-    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+    CodeGenFunction &CGF, const Expr *E, Address This,
+    llvm::Value *&ThisPtrForCall,
     llvm::Value *MemPtr, const MemberPointerType *MPT) {
   ErrorUnsupportedABI(CGF, "calls through member pointers");
 
+  ThisPtrForCall = This.getPointer();
   const FunctionProtoType *FPT = 
     MPT->getPointeeType()->getAs<FunctionProtoType>();
   const CXXRecordDecl *RD = 
     cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
-                              CGM.getTypes().arrangeCXXMethodType(RD, FPT));
+      CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
   return llvm::Constant::getNullValue(FTy->getPointerTo());
 }
 
 llvm::Value *
 CGCXXABI::EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                                       llvm::Value *Base, llvm::Value *MemPtr,
+                                       Address Base, llvm::Value *MemPtr,
                                        const MemberPointerType *MPT) {
   ErrorUnsupportedABI(CGF, "loads of member pointers");
-  llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())->getPointerTo();
+  llvm::Type *Ty = CGF.ConvertType(MPT->getPointeeType())
+                         ->getPointerTo(Base.getAddressSpace());
   return llvm::Constant::getNullValue(Ty);
 }
 
@@ -159,13 +163,24 @@ void CGCXXABI::buildThisParam(CodeGenFunction &CGF, FunctionArgList &params) {
                                 &CGM.getContext().Idents.get("this"),
                                 MD->getThisType(CGM.getContext()));
   params.push_back(ThisDecl);
-  getThisDecl(CGF) = ThisDecl;
+  CGF.CXXABIThisDecl = ThisDecl;
+
+  // Compute the presumed alignment of 'this', which basically comes
+  // down to whether we know it's a complete object or not.
+  auto &Layout = CGF.getContext().getASTRecordLayout(MD->getParent());
+  if (MD->getParent()->getNumVBases() == 0 || // avoid vcall in common case
+      MD->getParent()->hasAttr<FinalAttr>() ||
+      !isThisCompleteObject(CGF.CurGD)) {
+    CGF.CXXABIThisAlignment = Layout.getAlignment();
+  } else {
+    CGF.CXXABIThisAlignment = Layout.getNonVirtualAlignment();
+  }
 }
 
 void CGCXXABI::EmitThisParam(CodeGenFunction &CGF) {
   /// Initialize the 'this' slot.
   assert(getThisDecl(CGF) && "no 'this' variable for function");
-  getThisValue(CGF)
+  CGF.CXXABIThisValue
     = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(getThisDecl(CGF)),
                              "this");
 }
@@ -186,14 +201,14 @@ CharUnits CGCXXABI::getArrayCookieSizeImpl(QualType elementType) {
   return CharUnits::Zero();
 }
 
-llvm::Value *CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
-                                             llvm::Value *NewPtr,
-                                             llvm::Value *NumElements,
-                                             const CXXNewExpr *expr,
-                                             QualType ElementType) {
+Address CGCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
+                                        Address NewPtr,
+                                        llvm::Value *NumElements,
+                                        const CXXNewExpr *expr,
+                                        QualType ElementType) {
   // Should never be called.
   ErrorUnsupportedABI(CGF, "array cookie initialization");
-  return nullptr;
+  return Address::invalid();
 }
 
 bool CGCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -215,31 +230,30 @@ bool CGCXXABI::requiresArrayCookie(const CXXNewExpr *expr) {
   return expr->getAllocatedType().isDestructedType();
 }
 
-void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *ptr,
+void CGCXXABI::ReadArrayCookie(CodeGenFunction &CGF, Address ptr,
                                const CXXDeleteExpr *expr, QualType eltTy,
                                llvm::Value *&numElements,
                                llvm::Value *&allocPtr, CharUnits &cookieSize) {
   // Derive a char* in the same address space as the pointer.
-  unsigned AS = ptr->getType()->getPointerAddressSpace();
-  llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
-  ptr = CGF.Builder.CreateBitCast(ptr, charPtrTy);
+  ptr = CGF.Builder.CreateElementBitCast(ptr, CGF.Int8Ty);
 
   // If we don't need an array cookie, bail out early.
   if (!requiresArrayCookie(expr, eltTy)) {
-    allocPtr = ptr;
+    allocPtr = ptr.getPointer();
     numElements = nullptr;
     cookieSize = CharUnits::Zero();
     return;
   }
 
   cookieSize = getArrayCookieSizeImpl(eltTy);
-  allocPtr = CGF.Builder.CreateConstInBoundsGEP1_64(ptr,
-                                                    -cookieSize.getQuantity());
-  numElements = readArrayCookieImpl(CGF, allocPtr, cookieSize);
+  Address allocAddr =
+    CGF.Builder.CreateConstInBoundsByteGEP(ptr, -cookieSize);
+  allocPtr = allocAddr.getPointer();
+  numElements = readArrayCookieImpl(CGF, allocAddr, cookieSize);
 }
 
 llvm::Value *CGCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
-                                           llvm::Value *ptr,
+                                           Address ptr,
                                            CharUnits cookieSize) {
   ErrorUnsupportedABI(CGF, "reading a new[] cookie");
   return llvm::ConstantInt::get(CGF.SizeTy, 0);
@@ -308,3 +322,11 @@ CGCXXABI::emitTerminateForUnexpectedException(CodeGenFunction &CGF,
   // Just call std::terminate and ignore the violating exception.
   return CGF.EmitNounwindRuntimeCall(CGF.CGM.getTerminateFn());
 }
+
+CatchTypeInfo CGCXXABI::getCatchAllTypeInfo() {
+  return CatchTypeInfo{nullptr, 0};
+}
+
+std::vector<CharUnits> CGCXXABI::getVBPtrOffsets(const CXXRecordDecl *RD) {
+  return std::vector<CharUnits>();
+}
diff --git a/lib/CodeGen/CGCXXABI.h b/lib/CodeGen/CGCXXABI.h
index 436b96a615ef..3f240b1802b8 100644
--- a/lib/CodeGen/CGCXXABI.h
+++ b/lib/CodeGen/CGCXXABI.h
@@ -37,6 +37,7 @@ class MangleContext;
 namespace CodeGen {
 class CodeGenFunction;
 class CodeGenModule;
+struct CatchTypeInfo;
 
 /// \brief Implements C++ ABI-specific code generation functions.
 class CGCXXABI {
@@ -48,12 +49,15 @@ protected:
     : CGM(CGM), MangleCtx(CGM.getContext().createMangleContext()) {}
 
 protected:
-  ImplicitParamDecl *&getThisDecl(CodeGenFunction &CGF) {
+  ImplicitParamDecl *getThisDecl(CodeGenFunction &CGF) {
     return CGF.CXXABIThisDecl;
   }
-  llvm::Value *&getThisValue(CodeGenFunction &CGF) {
+  llvm::Value *getThisValue(CodeGenFunction &CGF) {
     return CGF.CXXABIThisValue;
   }
+  Address getThisAddress(CodeGenFunction &CGF) {
+    return Address(CGF.CXXABIThisValue, CGF.CXXABIThisAlignment);
+  }
 
   /// Issue a diagnostic about unsupported features in the ABI.
   void ErrorUnsupportedABI(CodeGenFunction &CGF, StringRef S);
@@ -77,6 +81,12 @@ protected:
   virtual bool requiresArrayCookie(const CXXDeleteExpr *E, QualType eltType);
   virtual bool requiresArrayCookie(const CXXNewExpr *E);
 
+  /// Determine whether there's something special about the rules of
+  /// the ABI tell us that 'this' is a complete object within the
+  /// given function.  Obvious common logic like being defined on a
+  /// final class will have been taken care of by the caller.
+  virtual bool isThisCompleteObject(GlobalDecl GD) const = 0;
+
 public:
 
   virtual ~CGCXXABI();
@@ -135,13 +145,14 @@ public:
   /// pointer.  Apply the this-adjustment and set 'This' to the
   /// adjusted value.
   virtual llvm::Value *EmitLoadOfMemberFunctionPointer(
-      CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
-      llvm::Value *MemPtr, const MemberPointerType *MPT);
+      CodeGenFunction &CGF, const Expr *E, Address This,
+      llvm::Value *&ThisPtrForCall, llvm::Value *MemPtr,
+      const MemberPointerType *MPT);
 
   /// Calculate an l-value from an object and a data member pointer.
   virtual llvm::Value *
   EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                               llvm::Value *Base, llvm::Value *MemPtr,
+                               Address Base, llvm::Value *MemPtr,
                                const MemberPointerType *MPT);
 
   /// Perform a derived-to-base, base-to-derived, or bitcast member
@@ -164,10 +175,6 @@ public:
     return true;
   }
 
-  virtual bool isTypeInfoCalculable(QualType Ty) const {
-    return !Ty->isIncompleteType();
-  }
-
   /// Create a null member pointer of the given type.
   virtual llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT);
 
@@ -212,12 +219,17 @@ protected:
 public:
   virtual void emitVirtualObjectDelete(CodeGenFunction &CGF,
                                        const CXXDeleteExpr *DE,
-                                       llvm::Value *Ptr, QualType ElementType,
+                                       Address Ptr, QualType ElementType,
                                        const CXXDestructorDecl *Dtor) = 0;
   virtual void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) = 0;
   virtual void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) = 0;
   virtual llvm::GlobalVariable *getThrowInfo(QualType T) { return nullptr; }
 
+  /// \brief Determine whether it's possible to emit a vtable for \p RD, even
+  /// though we do not know that the vtable has been marked as used by semantic
+  /// analysis.
+  virtual bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const = 0;
+
   virtual void emitBeginCatch(CodeGenFunction &CGF, const CXXCatchStmt *C) = 0;
 
   virtual llvm::CallInst *
@@ -225,33 +237,34 @@ public:
                                       llvm::Value *Exn);
 
   virtual llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) = 0;
-  virtual llvm::Constant *
+  virtual CatchTypeInfo
   getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) = 0;
+  virtual CatchTypeInfo getCatchAllTypeInfo();
 
   virtual bool shouldTypeidBeNullChecked(bool IsDeref,
                                          QualType SrcRecordTy) = 0;
   virtual void EmitBadTypeidCall(CodeGenFunction &CGF) = 0;
   virtual llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
-                                  llvm::Value *ThisPtr,
+                                  Address ThisPtr,
                                   llvm::Type *StdTypeInfoPtrTy) = 0;
 
   virtual bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
                                                   QualType SrcRecordTy) = 0;
 
   virtual llvm::Value *
-  EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+  EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
                       QualType SrcRecordTy, QualType DestTy,
                       QualType DestRecordTy, llvm::BasicBlock *CastEnd) = 0;
 
   virtual llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF,
-                                             llvm::Value *Value,
+                                             Address Value,
                                              QualType SrcRecordTy,
                                              QualType DestTy) = 0;
 
   virtual bool EmitBadCastCall(CodeGenFunction &CGF) = 0;
 
   virtual llvm::Value *GetVirtualBaseClassOffset(CodeGenFunction &CGF,
-                                                 llvm::Value *This,
+                                                 Address This,
                                                  const CXXRecordDecl *ClassDecl,
                                         const CXXRecordDecl *BaseClassDecl) = 0;
 
@@ -294,10 +307,9 @@ public:
   /// Perform ABI-specific "this" argument adjustment required prior to
   /// a call of a virtual function.
   /// The "VirtualCall" argument is true iff the call itself is virtual.
-  virtual llvm::Value *
+  virtual Address
   adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
-                                           llvm::Value *This,
-                                           bool VirtualCall) {
+                                           Address This, bool VirtualCall) {
     return This;
   }
 
@@ -337,19 +349,31 @@ public:
   virtual void EmitDestructorCall(CodeGenFunction &CGF,
                                   const CXXDestructorDecl *DD, CXXDtorType Type,
                                   bool ForVirtualBase, bool Delegating,
-                                  llvm::Value *This) = 0;
+                                  Address This) = 0;
 
   /// Emits the VTable definitions required for the given record type.
   virtual void emitVTableDefinitions(CodeGenVTables &CGVT,
                                      const CXXRecordDecl *RD) = 0;
 
+  /// Checks if ABI requires extra virtual offset for vtable field.
+  virtual bool
+  isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
+                                      CodeGenFunction::VPtr Vptr) = 0;
+
+  /// Checks if ABI requires to initilize vptrs for given dynamic class.
+  virtual bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) = 0;
+
+  /// Get the address point of the vtable for the given base subobject.
+  virtual llvm::Constant *
+  getVTableAddressPoint(BaseSubobject Base,
+                        const CXXRecordDecl *VTableClass) = 0;
+
   /// Get the address point of the vtable for the given base subobject while
-  /// building a constructor or a destructor. On return, NeedsVirtualOffset
-  /// tells if a virtual base adjustment is needed in order to get the offset
-  /// of the base subobject.
-  virtual llvm::Value *getVTableAddressPointInStructor(
-      CodeGenFunction &CGF, const CXXRecordDecl *RD, BaseSubobject Base,
-      const CXXRecordDecl *NearestVBase, bool &NeedsVirtualOffset) = 0;
+  /// building a constructor or a destructor.
+  virtual llvm::Value *
+  getVTableAddressPointInStructor(CodeGenFunction &CGF, const CXXRecordDecl *RD,
+                                  BaseSubobject Base,
+                                  const CXXRecordDecl *NearestVBase) = 0;
 
   /// Get the address point of the vtable for the given base subobject while
   /// building a constexpr.
@@ -365,14 +389,14 @@ public:
   /// Build a virtual function pointer in the ABI-specific way.
   virtual llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                  GlobalDecl GD,
-                                                 llvm::Value *This,
+                                                 Address This,
                                                  llvm::Type *Ty,
                                                  SourceLocation Loc) = 0;
 
   /// Emit the ABI-specific virtual destructor call.
   virtual llvm::Value *
   EmitVirtualDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *Dtor,
-                            CXXDtorType DtorType, llvm::Value *This,
+                            CXXDtorType DtorType, Address This,
                             const CXXMemberCallExpr *CE) = 0;
 
   virtual void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF,
@@ -388,11 +412,11 @@ public:
                                GlobalDecl GD, bool ReturnAdjustment) = 0;
 
   virtual llvm::Value *performThisAdjustment(CodeGenFunction &CGF,
-                                             llvm::Value *This,
+                                             Address This,
                                              const ThisAdjustment &TA) = 0;
 
   virtual llvm::Value *performReturnAdjustment(CodeGenFunction &CGF,
-                                               llvm::Value *Ret,
+                                               Address Ret,
                                                const ReturnAdjustment &RA) = 0;
 
   virtual void EmitReturnFromThunk(CodeGenFunction &CGF,
@@ -401,6 +425,9 @@ public:
   virtual size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
                                       FunctionArgList &Args) const = 0;
 
+  /// Gets the offsets of all the virtual base pointers in a given class.
+  virtual std::vector<CharUnits> getVBPtrOffsets(const CXXRecordDecl *RD);
+
   /// Gets the pure virtual member call function.
   virtual StringRef GetPureVirtualCallName() = 0;
 
@@ -429,11 +456,11 @@ public:
   ///   always a size_t
   /// \param ElementType - the base element allocated type,
   ///   i.e. the allocated type after stripping all array types
-  virtual llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
-                                             llvm::Value *NewPtr,
-                                             llvm::Value *NumElements,
-                                             const CXXNewExpr *expr,
-                                             QualType ElementType);
+  virtual Address InitializeArrayCookie(CodeGenFunction &CGF,
+                                        Address NewPtr,
+                                        llvm::Value *NumElements,
+                                        const CXXNewExpr *expr,
+                                        QualType ElementType);
 
   /// Reads the array cookie associated with the given pointer,
   /// if it has one.
@@ -448,7 +475,7 @@ public:
   ///   function
   /// \param CookieSize - an out parameter which will be initialized
   ///   with the size of the cookie, or zero if there is no cookie
-  virtual void ReadArrayCookie(CodeGenFunction &CGF, llvm::Value *Ptr,
+  virtual void ReadArrayCookie(CodeGenFunction &CGF, Address Ptr,
                                const CXXDeleteExpr *expr,
                                QualType ElementType, llvm::Value *&NumElements,
                                llvm::Value *&AllocPtr, CharUnits &CookieSize);
@@ -471,8 +498,7 @@ protected:
   /// Other parameters are as above.
   ///
   /// \return a size_t
-  virtual llvm::Value *readArrayCookieImpl(CodeGenFunction &IGF,
-                                           llvm::Value *ptr,
+  virtual llvm::Value *readArrayCookieImpl(CodeGenFunction &IGF, Address ptr,
                                            CharUnits cookieSize);
 
 public:
@@ -512,11 +538,9 @@ public:
   ///        thread_local variables, a list of functions to perform the
   ///        initialization.
   virtual void EmitThreadLocalInitFuncs(
-      CodeGenModule &CGM,
-      ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *>>
-          CXXThreadLocals,
+      CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
       ArrayRef<llvm::Function *> CXXThreadLocalInits,
-      ArrayRef<llvm::GlobalVariable *> CXXThreadLocalInitVars) = 0;
+      ArrayRef<const VarDecl *> CXXThreadLocalInitVars) = 0;
 
   // Determine if references to thread_local global variables can be made
   // directly or require access through a thread wrapper function.
diff --git a/lib/CodeGen/CGCall.cpp b/lib/CodeGen/CGCall.cpp
index 0bcf59bb5c3f..49b5df0c4f06 100644
--- a/lib/CodeGen/CGCall.cpp
+++ b/lib/CodeGen/CGCall.cpp
@@ -15,12 +15,14 @@
 #include "CGCall.h"
 #include "ABIInfo.h"
 #include "CGCXXABI.h"
+#include "CGCleanup.h"
 #include "CodeGenFunction.h"
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
@@ -91,15 +93,41 @@ CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> FTNP) {
                                  FTNP->getExtInfo(), RequiredArgs(0));
 }
 
+/// Adds the formal paramaters in FPT to the given prefix. If any parameter in
+/// FPT has pass_object_size attrs, then we'll add parameters for those, too.
+static void appendParameterTypes(const CodeGenTypes &CGT,
+                                 SmallVectorImpl<CanQualType> &prefix,
+                                 const CanQual<FunctionProtoType> &FPT,
+                                 const FunctionDecl *FD) {
+  // Fast path: unknown target.
+  if (FD == nullptr) {
+    prefix.append(FPT->param_type_begin(), FPT->param_type_end());
+    return;
+  }
+
+  // In the vast majority cases, we'll have precisely FPT->getNumParams()
+  // parameters; the only thing that can change this is the presence of
+  // pass_object_size. So, we preallocate for the common case.
+  prefix.reserve(prefix.size() + FPT->getNumParams());
+
+  assert(FD->getNumParams() == FPT->getNumParams());
+  for (unsigned I = 0, E = FPT->getNumParams(); I != E; ++I) {
+    prefix.push_back(FPT->getParamType(I));
+    if (FD->getParamDecl(I)->hasAttr<PassObjectSizeAttr>())
+      prefix.push_back(CGT.getContext().getSizeType());
+  }
+}
+
 /// Arrange the LLVM function layout for a value of the given function
 /// type, on top of any implicit parameters already stored.
 static const CGFunctionInfo &
 arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
                         SmallVectorImpl<CanQualType> &prefix,
-                        CanQual<FunctionProtoType> FTP) {
+                        CanQual<FunctionProtoType> FTP,
+                        const FunctionDecl *FD) {
   RequiredArgs required = RequiredArgs::forPrototypePlus(FTP, prefix.size());
   // FIXME: Kill copy.
-  prefix.append(FTP->param_type_begin(), FTP->param_type_end());
+  appendParameterTypes(CGT, prefix, FTP, FD);
   CanQualType resultType = FTP->getReturnType().getUnqualifiedType();
   return CGT.arrangeLLVMFunctionInfo(resultType, instanceMethod,
                                      /*chainCall=*/false, prefix,
@@ -109,10 +137,11 @@ arrangeLLVMFunctionInfo(CodeGenTypes &CGT, bool instanceMethod,
 /// Arrange the argument and result information for a value of the
 /// given freestanding function type.
 const CGFunctionInfo &
-CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP) {
+CodeGenTypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> FTP,
+                                      const FunctionDecl *FD) {
   SmallVector<CanQualType, 16> argTypes;
   return ::arrangeLLVMFunctionInfo(*this, /*instanceMethod=*/false, argTypes,
-                                   FTP);
+                                   FTP, FD);
 }
 
 static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
@@ -155,7 +184,8 @@ static CallingConv getCallingConventionForDecl(const Decl *D, bool IsWindows) {
 /// constructor or destructor.
 const CGFunctionInfo &
 CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                   const FunctionProtoType *FTP) {
+                                   const FunctionProtoType *FTP,
+                                   const CXXMethodDecl *MD) {
   SmallVector<CanQualType, 16> argTypes;
 
   // Add the 'this' pointer.
@@ -166,7 +196,7 @@ CodeGenTypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
 
   return ::arrangeLLVMFunctionInfo(
       *this, true, argTypes,
-      FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>());
+      FTP->getCanonicalTypeUnqualified().getAs<FunctionProtoType>(), MD);
 }
 
 /// Arrange the argument and result information for a declaration or
@@ -183,10 +213,10 @@ CodeGenTypes::arrangeCXXMethodDeclaration(const CXXMethodDecl *MD) {
   if (MD->isInstance()) {
     // The abstract case is perfectly fine.
     const CXXRecordDecl *ThisType = TheCXXABI.getThisArgumentTypeForMethod(MD);
-    return arrangeCXXMethodType(ThisType, prototype.getTypePtr());
+    return arrangeCXXMethodType(ThisType, prototype.getTypePtr(), MD);
   }
 
-  return arrangeFreeFunctionType(prototype);
+  return arrangeFreeFunctionType(prototype, MD);
 }
 
 const CGFunctionInfo &
@@ -207,7 +237,7 @@ CodeGenTypes::arrangeCXXStructorDeclaration(const CXXMethodDecl *MD,
   CanQual<FunctionProtoType> FTP = GetFormalType(MD);
 
   // Add the formal parameters.
-  argTypes.append(FTP->param_type_begin(), FTP->param_type_end());
+  appendParameterTypes(*this, argTypes, FTP, MD);
 
   TheCXXABI.buildStructorSignature(MD, Type, argTypes);
 
@@ -273,7 +303,7 @@ CodeGenTypes::arrangeFunctionDeclaration(const FunctionDecl *FD) {
   }
 
   assert(isa<FunctionProtoType>(FTy));
-  return arrangeFreeFunctionType(FTy.getAs<FunctionProtoType>());
+  return arrangeFreeFunctionType(FTy.getAs<FunctionProtoType>(), FD);
 }
 
 /// Arrange the argument and result information for the declaration or
@@ -553,6 +583,7 @@ CGFunctionInfo *CGFunctionInfo::create(unsigned llvmCC,
   FI->HasRegParm = info.getHasRegParm();
   FI->RegParm = info.getRegParm();
   FI->ArgStruct = nullptr;
+  FI->ArgStructAlign = 0;
   FI->NumArgs = argTypes.size();
   FI->getArgsBuffer()[0].type = resultType;
   for (unsigned i = 0, e = argTypes.size(); i != e; ++i)
@@ -726,6 +757,21 @@ CodeGenTypes::getExpandedTypes(QualType Ty,
   }
 }
 
+static void forConstantArrayExpansion(CodeGenFunction &CGF,
+                                      ConstantArrayExpansion *CAE,
+                                      Address BaseAddr,
+                                      llvm::function_ref<void(Address)> Fn) {
+  CharUnits EltSize = CGF.getContext().getTypeSizeInChars(CAE->EltTy);
+  CharUnits EltAlign =
+    BaseAddr.getAlignment().alignmentOfArrayElement(EltSize);
+
+  for (int i = 0, n = CAE->NumElts; i < n; i++) {
+    llvm::Value *EltAddr =
+      CGF.Builder.CreateConstGEP2_32(nullptr, BaseAddr.getPointer(), 0, i);
+    Fn(Address(EltAddr, EltAlign));
+  }
+}
+
 void CodeGenFunction::ExpandTypeFromArgs(
     QualType Ty, LValue LV, SmallVectorImpl<llvm::Argument *>::iterator &AI) {
   assert(LV.isSimple() &&
@@ -733,17 +779,16 @@ void CodeGenFunction::ExpandTypeFromArgs(
 
   auto Exp = getTypeExpansion(Ty, getContext());
   if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
-    for (int i = 0, n = CAExp->NumElts; i < n; i++) {
-      llvm::Value *EltAddr =
-          Builder.CreateConstGEP2_32(nullptr, LV.getAddress(), 0, i);
+    forConstantArrayExpansion(*this, CAExp, LV.getAddress(),
+                              [&](Address EltAddr) {
       LValue LV = MakeAddrLValue(EltAddr, CAExp->EltTy);
       ExpandTypeFromArgs(CAExp->EltTy, LV, AI);
-    }
+    });
   } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
-    llvm::Value *This = LV.getAddress();
+    Address This = LV.getAddress();
     for (const CXXBaseSpecifier *BS : RExp->Bases) {
       // Perform a single step derived-to-base conversion.
-      llvm::Value *Base =
+      Address Base =
           GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
                                 /*NullCheckValue=*/false, SourceLocation());
       LValue SubLV = MakeAddrLValue(Base, BS->getType());
@@ -756,15 +801,10 @@ void CodeGenFunction::ExpandTypeFromArgs(
       LValue SubLV = EmitLValueForField(LV, FD);
       ExpandTypeFromArgs(FD->getType(), SubLV, AI);
     }
-  } else if (auto CExp = dyn_cast<ComplexExpansion>(Exp.get())) {
-    llvm::Value *RealAddr =
-        Builder.CreateStructGEP(nullptr, LV.getAddress(), 0, "real");
-    EmitStoreThroughLValue(RValue::get(*AI++),
-                           MakeAddrLValue(RealAddr, CExp->EltTy));
-    llvm::Value *ImagAddr =
-        Builder.CreateStructGEP(nullptr, LV.getAddress(), 1, "imag");
-    EmitStoreThroughLValue(RValue::get(*AI++),
-                           MakeAddrLValue(ImagAddr, CExp->EltTy));
+  } else if (isa<ComplexExpansion>(Exp.get())) {
+    auto realValue = *AI++;
+    auto imagValue = *AI++;
+    EmitStoreOfComplex(ComplexPairTy(realValue, imagValue), LV, /*init*/ true);
   } else {
     assert(isa<NoExpansion>(Exp.get()));
     EmitStoreThroughLValue(RValue::get(*AI++), LV);
@@ -776,18 +816,17 @@ void CodeGenFunction::ExpandTypeToArgs(
     SmallVectorImpl<llvm::Value *> &IRCallArgs, unsigned &IRCallArgPos) {
   auto Exp = getTypeExpansion(Ty, getContext());
   if (auto CAExp = dyn_cast<ConstantArrayExpansion>(Exp.get())) {
-    llvm::Value *Addr = RV.getAggregateAddr();
-    for (int i = 0, n = CAExp->NumElts; i < n; i++) {
-      llvm::Value *EltAddr = Builder.CreateConstGEP2_32(nullptr, Addr, 0, i);
+    forConstantArrayExpansion(*this, CAExp, RV.getAggregateAddress(),
+                              [&](Address EltAddr) {
       RValue EltRV =
           convertTempToRValue(EltAddr, CAExp->EltTy, SourceLocation());
       ExpandTypeToArgs(CAExp->EltTy, EltRV, IRFuncTy, IRCallArgs, IRCallArgPos);
-    }
+    });
   } else if (auto RExp = dyn_cast<RecordExpansion>(Exp.get())) {
-    llvm::Value *This = RV.getAggregateAddr();
+    Address This = RV.getAggregateAddress();
     for (const CXXBaseSpecifier *BS : RExp->Bases) {
       // Perform a single step derived-to-base conversion.
-      llvm::Value *Base =
+      Address Base =
           GetAddressOfBaseClass(This, Ty->getAsCXXRecordDecl(), &BS, &BS + 1,
                                 /*NullCheckValue=*/false, SourceLocation());
       RValue BaseRV = RValue::getAggregate(Base);
@@ -822,12 +861,22 @@ void CodeGenFunction::ExpandTypeToArgs(
   }
 }
 
+/// Create a temporary allocation for the purposes of coercion.
+static Address CreateTempAllocaForCoercion(CodeGenFunction &CGF, llvm::Type *Ty,
+                                           CharUnits MinAlign) {
+  // Don't use an alignment that's worse than what LLVM would prefer.
+  auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(Ty);
+  CharUnits Align = std::max(MinAlign, CharUnits::fromQuantity(PrefAlign));
+
+  return CGF.CreateTempAlloca(Ty, Align);
+}
+
 /// EnterStructPointerForCoercedAccess - Given a struct pointer that we are
 /// accessing some number of bytes out of it, try to gep into the struct to get
 /// at its inner goodness.  Dive as deep as possible without entering an element
 /// with an in-memory size smaller than DstSize.
-static llvm::Value *
-EnterStructPointerForCoercedAccess(llvm::Value *SrcPtr,
+static Address
+EnterStructPointerForCoercedAccess(Address SrcPtr,
                                    llvm::StructType *SrcSTy,
                                    uint64_t DstSize, CodeGenFunction &CGF) {
   // We can't dive into a zero-element struct.
@@ -846,11 +895,10 @@ EnterStructPointerForCoercedAccess(llvm::Value *SrcPtr,
     return SrcPtr;
 
   // GEP into the first element.
-  SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcSTy, SrcPtr, 0, 0, "coerce.dive");
+  SrcPtr = CGF.Builder.CreateStructGEP(SrcPtr, 0, CharUnits(), "coerce.dive");
 
   // If the first element is a struct, recurse.
-  llvm::Type *SrcTy =
-    cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+  llvm::Type *SrcTy = SrcPtr.getElementType();
   if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy))
     return EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
 
@@ -918,21 +966,19 @@ static llvm::Value *CoerceIntOrPtrToIntOrPtr(llvm::Value *Val,
 /// This safely handles the case when the src type is smaller than the
 /// destination type; in this situation the values of bits which not
 /// present in the src are undefined.
-static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
-                                      llvm::Type *Ty, CharUnits SrcAlign,
+static llvm::Value *CreateCoercedLoad(Address Src, llvm::Type *Ty,
                                       CodeGenFunction &CGF) {
-  llvm::Type *SrcTy =
-    cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+  llvm::Type *SrcTy = Src.getElementType();
 
   // If SrcTy and Ty are the same, just do a load.
   if (SrcTy == Ty)
-    return CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
+    return CGF.Builder.CreateLoad(Src);
 
   uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
 
   if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
-    SrcPtr = EnterStructPointerForCoercedAccess(SrcPtr, SrcSTy, DstSize, CGF);
-    SrcTy = cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+    Src = EnterStructPointerForCoercedAccess(Src, SrcSTy, DstSize, CGF);
+    SrcTy = Src.getType()->getElementType();
   }
 
   uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -941,8 +987,7 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
   // extension or truncation to the desired type.
   if ((isa<llvm::IntegerType>(Ty) || isa<llvm::PointerType>(Ty)) &&
       (isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy))) {
-    llvm::LoadInst *Load =
-      CGF.Builder.CreateAlignedLoad(SrcPtr, SrcAlign.getQuantity());
+    llvm::Value *Load = CGF.Builder.CreateLoad(Src);
     return CoerceIntOrPtrToIntOrPtr(Load, Ty, CGF);
   }
 
@@ -954,22 +999,18 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
     //
     // FIXME: Assert that we aren't truncating non-padding bits when have access
     // to that information.
-    llvm::Value *Casted =
-      CGF.Builder.CreateBitCast(SrcPtr, llvm::PointerType::getUnqual(Ty));
-    return CGF.Builder.CreateAlignedLoad(Casted, SrcAlign.getQuantity());
+    Src = CGF.Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(Ty));
+    return CGF.Builder.CreateLoad(Src);
   }
 
-  // Otherwise do coercion through memory. This is stupid, but
-  // simple.
-  llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(Ty);
-  Tmp->setAlignment(SrcAlign.getQuantity());
-  llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
-  llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
-  llvm::Value *SrcCasted = CGF.Builder.CreateBitCast(SrcPtr, I8PtrTy);
+  // Otherwise do coercion through memory. This is stupid, but simple.
+  Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
+  Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
+  Address SrcCasted = CGF.Builder.CreateBitCast(Src, CGF.Int8PtrTy);
   CGF.Builder.CreateMemCpy(Casted, SrcCasted,
       llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize),
-      SrcAlign.getQuantity(), false);
-  return CGF.Builder.CreateAlignedLoad(Tmp, SrcAlign.getQuantity());
+      false);
+  return CGF.Builder.CreateLoad(Tmp);
 }
 
 // Function to store a first-class aggregate into memory.  We prefer to
@@ -977,8 +1018,7 @@ static llvm::Value *CreateCoercedLoad(llvm::Value *SrcPtr,
 // fast-isel.
 // FIXME: Do we need to recurse here?
 static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
-                          llvm::Value *DestPtr, bool DestIsVolatile,
-                          CharUnits DestAlign) {
+                          Address Dest, bool DestIsVolatile) {
   // Prefer scalar stores to first-class aggregate stores.
   if (llvm::StructType *STy =
         dyn_cast<llvm::StructType>(Val->getType())) {
@@ -986,17 +1026,13 @@ static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
       CGF.CGM.getDataLayout().getStructLayout(STy);
 
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      llvm::Value *EltPtr = CGF.Builder.CreateConstGEP2_32(STy, DestPtr, 0, i);
+      auto EltOffset = CharUnits::fromQuantity(Layout->getElementOffset(i));
+      Address EltPtr = CGF.Builder.CreateStructGEP(Dest, i, EltOffset);
       llvm::Value *Elt = CGF.Builder.CreateExtractValue(Val, i);
-      uint64_t EltOffset = Layout->getElementOffset(i);
-      CharUnits EltAlign =
-        DestAlign.alignmentAtOffset(CharUnits::fromQuantity(EltOffset));
-      CGF.Builder.CreateAlignedStore(Elt, EltPtr, EltAlign.getQuantity(),
-                                     DestIsVolatile);
+      CGF.Builder.CreateStore(Elt, EltPtr, DestIsVolatile);
     }
   } else {
-    CGF.Builder.CreateAlignedStore(Val, DestPtr, DestAlign.getQuantity(),
-                                   DestIsVolatile);
+    CGF.Builder.CreateStore(Val, Dest, DestIsVolatile);
   }
 }
 
@@ -1007,24 +1043,21 @@ static void BuildAggStore(CodeGenFunction &CGF, llvm::Value *Val,
 /// This safely handles the case when the src type is larger than the
 /// destination type; the upper bits of the src will be lost.
 static void CreateCoercedStore(llvm::Value *Src,
-                               llvm::Value *DstPtr,
+                               Address Dst,
                                bool DstIsVolatile,
-                               CharUnits DstAlign,
                                CodeGenFunction &CGF) {
   llvm::Type *SrcTy = Src->getType();
-  llvm::Type *DstTy =
-    cast<llvm::PointerType>(DstPtr->getType())->getElementType();
+  llvm::Type *DstTy = Dst.getType()->getElementType();
   if (SrcTy == DstTy) {
-    CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
-                                   DstIsVolatile);
+    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
     return;
   }
 
   uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
 
   if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
-    DstPtr = EnterStructPointerForCoercedAccess(DstPtr, DstSTy, SrcSize, CGF);
-    DstTy = cast<llvm::PointerType>(DstPtr->getType())->getElementType();
+    Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy, SrcSize, CGF);
+    DstTy = Dst.getType()->getElementType();
   }
 
   // If the source and destination are integer or pointer types, just do an
@@ -1032,8 +1065,7 @@ static void CreateCoercedStore(llvm::Value *Src,
   if ((isa<llvm::IntegerType>(SrcTy) || isa<llvm::PointerType>(SrcTy)) &&
       (isa<llvm::IntegerType>(DstTy) || isa<llvm::PointerType>(DstTy))) {
     Src = CoerceIntOrPtrToIntOrPtr(Src, DstTy, CGF);
-    CGF.Builder.CreateAlignedStore(Src, DstPtr, DstAlign.getQuantity(),
-                                   DstIsVolatile);
+    CGF.Builder.CreateStore(Src, Dst, DstIsVolatile);
     return;
   }
 
@@ -1041,9 +1073,8 @@ static void CreateCoercedStore(llvm::Value *Src,
 
   // If store is legal, just bitcast the src pointer.
   if (SrcSize <= DstSize) {
-    llvm::Value *Casted =
-      CGF.Builder.CreateBitCast(DstPtr, llvm::PointerType::getUnqual(SrcTy));
-    BuildAggStore(CGF, Src, Casted, DstIsVolatile, DstAlign);
+    Dst = CGF.Builder.CreateBitCast(Dst, llvm::PointerType::getUnqual(SrcTy));
+    BuildAggStore(CGF, Src, Dst, DstIsVolatile);
   } else {
     // Otherwise do coercion through memory. This is stupid, but
     // simple.
@@ -1054,18 +1085,27 @@ static void CreateCoercedStore(llvm::Value *Src,
     //
     // FIXME: Assert that we aren't truncating non-padding bits when have access
     // to that information.
-    llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(SrcTy);
-    Tmp->setAlignment(DstAlign.getQuantity());
-    CGF.Builder.CreateAlignedStore(Src, Tmp, DstAlign.getQuantity());
-    llvm::Type *I8PtrTy = CGF.Builder.getInt8PtrTy();
-    llvm::Value *Casted = CGF.Builder.CreateBitCast(Tmp, I8PtrTy);
-    llvm::Value *DstCasted = CGF.Builder.CreateBitCast(DstPtr, I8PtrTy);
+    Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
+    CGF.Builder.CreateStore(Src, Tmp);
+    Address Casted = CGF.Builder.CreateBitCast(Tmp, CGF.Int8PtrTy);
+    Address DstCasted = CGF.Builder.CreateBitCast(Dst, CGF.Int8PtrTy);
     CGF.Builder.CreateMemCpy(DstCasted, Casted,
         llvm::ConstantInt::get(CGF.IntPtrTy, DstSize),
-        DstAlign.getQuantity(), false);
+        false);
   }
 }
 
+static Address emitAddressAtOffset(CodeGenFunction &CGF, Address addr,
+                                   const ABIArgInfo &info) {      
+  if (unsigned offset = info.getDirectOffset()) {
+    addr = CGF.Builder.CreateElementBitCast(addr, CGF.Int8Ty);
+    addr = CGF.Builder.CreateConstInBoundsByteGEP(addr,
+                                             CharUnits::fromQuantity(offset));
+    addr = CGF.Builder.CreateElementBitCast(addr, info.getCoerceToType());
+  }
+  return addr;
+}
+
 namespace {
 
 /// Encapsulates information about the way function arguments from
@@ -1380,8 +1420,19 @@ llvm::Type *CodeGenTypes::GetFunctionTypeForVTable(GlobalDecl GD) {
   return GetFunctionType(*Info);
 }
 
+static void AddAttributesFromFunctionProtoType(ASTContext &Ctx,
+                                               llvm::AttrBuilder &FuncAttrs,
+                                               const FunctionProtoType *FPT) {
+  if (!FPT)
+    return;
+
+  if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) &&
+      FPT->isNothrow(Ctx))
+    FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+}
+
 void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
-                                           const Decl *TargetDecl,
+                                           CGCalleeInfo CalleeInfo,
                                            AttributeListType &PAL,
                                            unsigned &CallingConv,
                                            bool AttrOnCallSite) {
@@ -1394,6 +1445,13 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
   if (FI.isNoReturn())
     FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
 
+  // If we have information about the function prototype, we can learn
+  // attributes form there.
+  AddAttributesFromFunctionProtoType(getContext(), FuncAttrs,
+                                     CalleeInfo.getCalleeFunctionProtoType());
+
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+
   // FIXME: handle sseregparm someday...
   if (TargetDecl) {
     if (TargetDecl->hasAttr<ReturnsTwiceAttr>())
@@ -1406,9 +1464,8 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate);
 
     if (const FunctionDecl *Fn = dyn_cast<FunctionDecl>(TargetDecl)) {
-      const FunctionProtoType *FPT = Fn->getType()->getAs<FunctionProtoType>();
-      if (FPT && FPT->isNothrow(getContext()))
-        FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+      AddAttributesFromFunctionProtoType(
+          getContext(), FuncAttrs, Fn->getType()->getAs<FunctionProtoType>());
       // Don't use [[noreturn]] or _Noreturn for a call to a virtual function.
       // These attributes are not inherited by overloads.
       const CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(Fn);
@@ -1416,13 +1473,16 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         FuncAttrs.addAttribute(llvm::Attribute::NoReturn);
     }
 
-    // 'const' and 'pure' attribute functions are also nounwind.
+    // 'const', 'pure' and 'noalias' attributed functions are also nounwind.
     if (TargetDecl->hasAttr<ConstAttr>()) {
       FuncAttrs.addAttribute(llvm::Attribute::ReadNone);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     } else if (TargetDecl->hasAttr<PureAttr>()) {
       FuncAttrs.addAttribute(llvm::Attribute::ReadOnly);
       FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
+    } else if (TargetDecl->hasAttr<NoAliasAttr>()) {
+      FuncAttrs.addAttribute(llvm::Attribute::ArgMemOnly);
+      FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
     }
     if (TargetDecl->hasAttr<RestrictAttr>())
       RetAttrs.addAttribute(llvm::Attribute::NoAlias);
@@ -1466,8 +1526,12 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
       FuncAttrs.addAttribute("no-frame-pointer-elim-non-leaf");
     }
 
+    bool DisableTailCalls =
+        CodeGenOpts.DisableTailCalls ||
+        (TargetDecl && TargetDecl->hasAttr<DisableTailCallsAttr>());
     FuncAttrs.addAttribute("disable-tail-calls",
-                           llvm::toStringRef(CodeGenOpts.DisableTailCalls));
+                           llvm::toStringRef(DisableTailCalls));
+
     FuncAttrs.addAttribute("less-precise-fpmad",
                            llvm::toStringRef(CodeGenOpts.LessPreciseFPMAD));
     FuncAttrs.addAttribute("no-infs-fp-math",
@@ -1481,77 +1545,53 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
     FuncAttrs.addAttribute("stack-protector-buffer-size",
                            llvm::utostr(CodeGenOpts.SSPBufferSize));
 
-    if (!CodeGenOpts.StackRealignment)
-      FuncAttrs.addAttribute("no-realign-stack");
+    if (CodeGenOpts.StackRealignment)
+      FuncAttrs.addAttribute("stackrealign");
 
     // Add target-cpu and target-features attributes to functions. If
     // we have a decl for the function and it has a target attribute then
     // parse that and add it to the feature set.
     StringRef TargetCPU = getTarget().getTargetOpts().CPU;
-
-    // TODO: Features gets us the features on the command line including
-    // feature dependencies. For canonicalization purposes we might want to
-    // avoid putting features in the target-features set if we know it'll be
-    // one of the default features in the backend, e.g. corei7-avx and +avx or
-    // figure out non-explicit dependencies.
-    // Canonicalize the existing features in a new feature map.
-    // TODO: Migrate the existing backends to keep the map around rather than
-    // the vector.
-    llvm::StringMap<bool> FeatureMap;
-    for (auto F : getTarget().getTargetOpts().Features) {
-      const char *Name = F.c_str();
-      bool Enabled = Name[0] == '+';
-      getTarget().setFeatureEnabled(FeatureMap, Name + 1, Enabled);
-    }
-
     const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl);
-    if (FD) {
-      if (const auto *TD = FD->getAttr<TargetAttr>()) {
-        StringRef FeaturesStr = TD->getFeatures();
-        SmallVector<StringRef, 1> AttrFeatures;
-        FeaturesStr.split(AttrFeatures, ",");
+    if (FD && FD->hasAttr<TargetAttr>()) {
+      llvm::StringMap<bool> FeatureMap;
+      getFunctionFeatureMap(FeatureMap, FD);
 
-        // Grab the various features and prepend a "+" to turn on the feature to
-        // the backend and add them to our existing set of features.
-        for (auto &Feature : AttrFeatures) {
-	  // Go ahead and trim whitespace rather than either erroring or
-	  // accepting it weirdly.
-	  Feature = Feature.trim();
+      // Produce the canonical string for this set of features.
+      std::vector<std::string> Features;
+      for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
+                                                 ie = FeatureMap.end();
+           it != ie; ++it)
+        Features.push_back((it->second ? "+" : "-") + it->first().str());
 
-          // While we're here iterating check for a different target cpu.
-          if (Feature.startswith("arch="))
-            TargetCPU = Feature.split("=").second.trim();
-	  else if (Feature.startswith("tune="))
-	    // We don't support cpu tuning this way currently.
-	    ;
-	  else if (Feature.startswith("fpmath="))
-	    // TODO: Support the fpmath option this way. It will require checking
-	    // overall feature validity for the function with the rest of the
-	    // attributes on the function.
-	    ;
-	  else if (Feature.startswith("mno-"))
-            getTarget().setFeatureEnabled(FeatureMap, Feature.split("-").second,
-                                          false);
-          else
-            getTarget().setFeatureEnabled(FeatureMap, Feature, true);
-        }
+      // Now add the target-cpu and target-features to the function.
+      // While we populated the feature map above, we still need to
+      // get and parse the target attribute so we can get the cpu for
+      // the function.
+      const auto *TD = FD->getAttr<TargetAttr>();
+      TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+      if (ParsedAttr.second != "")
+        TargetCPU = ParsedAttr.second;
+      if (TargetCPU != "")
+        FuncAttrs.addAttribute("target-cpu", TargetCPU);
+      if (!Features.empty()) {
+        std::sort(Features.begin(), Features.end());
+        FuncAttrs.addAttribute(
+            "target-features",
+            llvm::join(Features.begin(), Features.end(), ","));
+      }
+    } else {
+      // Otherwise just add the existing target cpu and target features to the
+      // function.
+      std::vector<std::string> &Features = getTarget().getTargetOpts().Features;
+      if (TargetCPU != "")
+        FuncAttrs.addAttribute("target-cpu", TargetCPU);
+      if (!Features.empty()) {
+        std::sort(Features.begin(), Features.end());
+        FuncAttrs.addAttribute(
+            "target-features",
+            llvm::join(Features.begin(), Features.end(), ","));
       }
-    }
-
-    // Produce the canonical string for this set of features.
-    std::vector<std::string> Features;
-    for (llvm::StringMap<bool>::const_iterator it = FeatureMap.begin(),
-                                               ie = FeatureMap.end();
-         it != ie; ++it)
-      Features.push_back((it->second ? "+" : "-") + it->first().str());
-
-    // Now add the target-cpu and target-features to the function.
-    if (TargetCPU != "")
-      FuncAttrs.addAttribute("target-cpu", TargetCPU);
-    if (!Features.empty()) {
-      std::sort(Features.begin(), Features.end());
-      FuncAttrs.addAttribute("target-features",
-                             llvm::join(Features.begin(), Features.end(), ","));
     }
   }
 
@@ -1655,20 +1695,37 @@ void CodeGenModule::ConstructAttributeList(const CGFunctionInfo &FI,
         Attrs.addAttribute(llvm::Attribute::InReg);
       break;
 
-    case ABIArgInfo::Indirect:
+    case ABIArgInfo::Indirect: {
       if (AI.getInReg())
         Attrs.addAttribute(llvm::Attribute::InReg);
 
       if (AI.getIndirectByVal())
         Attrs.addAttribute(llvm::Attribute::ByVal);
 
-      Attrs.addAlignmentAttr(AI.getIndirectAlign());
+      CharUnits Align = AI.getIndirectAlign();
+
+      // In a byval argument, it is important that the required
+      // alignment of the type is honored, as LLVM might be creating a
+      // *new* stack object, and needs to know what alignment to give
+      // it. (Sometimes it can deduce a sensible alignment on its own,
+      // but not if clang decides it must emit a packed struct, or the
+      // user specifies increased alignment requirements.)
+      //
+      // This is different from indirect *not* byval, where the object
+      // exists already, and the align attribute is purely
+      // informative.
+      assert(!Align.isZero());
+
+      // For now, only add this when we have a byval argument.
+      // TODO: be less lazy about updating test cases.
+      if (AI.getIndirectByVal())
+        Attrs.addAlignmentAttr(Align.getQuantity());
 
       // byval disables readnone and readonly.
       FuncAttrs.removeAttribute(llvm::Attribute::ReadOnly)
         .removeAttribute(llvm::Attribute::ReadNone);
       break;
-
+    }
     case ABIArgInfo::Ignore:
     case ABIArgInfo::Expand:
       continue;
@@ -1788,10 +1845,14 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   // If we're using inalloca, all the memory arguments are GEPs off of the last
   // parameter, which is a pointer to the complete memory area.
-  llvm::Value *ArgStruct = nullptr;
+  Address ArgStruct = Address::invalid();
+  const llvm::StructLayout *ArgStructLayout = nullptr;
   if (IRFunctionArgs.hasInallocaArg()) {
-    ArgStruct = FnArgs[IRFunctionArgs.getInallocaArgNo()];
-    assert(ArgStruct->getType() == FI.getArgStruct()->getPointerTo());
+    ArgStructLayout = CGM.getDataLayout().getStructLayout(FI.getArgStruct());
+    ArgStruct = Address(FnArgs[IRFunctionArgs.getInallocaArgNo()],
+                        FI.getArgStructAlignment());
+
+    assert(ArgStruct.getType() == FI.getArgStruct()->getPointerTo());
   }
 
   // Name the struct return parameter.
@@ -1805,9 +1866,7 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
   // Track if we received the parameter as a pointer (indirect, byval, or
   // inalloca).  If already have a pointer, EmitParmDecl doesn't need to copy it
   // into a local alloca for us.
-  enum ValOrPointer { HaveValue = 0, HavePointer = 1 };
-  typedef llvm::PointerIntPair<llvm::Value *, 1> ValueAndIsPtr;
-  SmallVector<ValueAndIsPtr, 16> ArgVals;
+  SmallVector<ParamValue, 16> ArgVals;
   ArgVals.reserve(Args.size());
 
   // Create a pointer value for every parameter declaration.  This usually
@@ -1833,49 +1892,47 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
     switch (ArgI.getKind()) {
     case ABIArgInfo::InAlloca: {
       assert(NumIRArgs == 0);
-      llvm::Value *V =
-          Builder.CreateStructGEP(FI.getArgStruct(), ArgStruct,
-                                  ArgI.getInAllocaFieldIndex(), Arg->getName());
-      ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+      auto FieldIndex = ArgI.getInAllocaFieldIndex();
+      CharUnits FieldOffset =
+        CharUnits::fromQuantity(ArgStructLayout->getElementOffset(FieldIndex));
+      Address V = Builder.CreateStructGEP(ArgStruct, FieldIndex, FieldOffset,
+                                          Arg->getName());
+      ArgVals.push_back(ParamValue::forIndirect(V));
       break;
     }
 
     case ABIArgInfo::Indirect: {
       assert(NumIRArgs == 1);
-      llvm::Value *V = FnArgs[FirstIRArg];
+      Address ParamAddr = Address(FnArgs[FirstIRArg], ArgI.getIndirectAlign());
 
       if (!hasScalarEvaluationKind(Ty)) {
         // Aggregates and complex variables are accessed by reference.  All we
-        // need to do is realign the value, if requested
+        // need to do is realign the value, if requested.
+        Address V = ParamAddr;
         if (ArgI.getIndirectRealign()) {
-          llvm::Value *AlignedTemp = CreateMemTemp(Ty, "coerce");
+          Address AlignedTemp = CreateMemTemp(Ty, "coerce");
 
           // Copy from the incoming argument pointer to the temporary with the
           // appropriate alignment.
           //
           // FIXME: We should have a common utility for generating an aggregate
           // copy.
-          llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
           CharUnits Size = getContext().getTypeSizeInChars(Ty);
-          llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
-          llvm::Value *Src = Builder.CreateBitCast(V, I8PtrTy);
-          Builder.CreateMemCpy(Dst,
-                               Src,
-                               llvm::ConstantInt::get(IntPtrTy, 
-                                                      Size.getQuantity()),
-                               ArgI.getIndirectAlign(),
-                               false);
+          auto SizeVal = llvm::ConstantInt::get(IntPtrTy, Size.getQuantity());
+          Address Dst = Builder.CreateBitCast(AlignedTemp, Int8PtrTy);
+          Address Src = Builder.CreateBitCast(ParamAddr, Int8PtrTy);
+          Builder.CreateMemCpy(Dst, Src, SizeVal, false);
           V = AlignedTemp;
         }
-        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+        ArgVals.push_back(ParamValue::forIndirect(V));
       } else {
         // Load scalar value from indirect argument.
-        V = EmitLoadOfScalar(V, false, ArgI.getIndirectAlign(), Ty,
-                             Arg->getLocStart());
+        llvm::Value *V =
+          EmitLoadOfScalar(ParamAddr, false, Ty, Arg->getLocStart());
 
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
-        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+        ArgVals.push_back(ParamValue::forDirect(V));
       }
       break;
     }
@@ -1980,87 +2037,66 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
         if (V->getType() != LTy)
           V = Builder.CreateBitCast(V, LTy);
 
-        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+        ArgVals.push_back(ParamValue::forDirect(V));
         break;
       }
 
-      llvm::AllocaInst *Alloca = CreateMemTemp(Ty, Arg->getName());
+      Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg),
+                                     Arg->getName());
 
-      // The alignment we need to use is the max of the requested alignment for
-      // the argument plus the alignment required by our access code below.
-      unsigned AlignmentToUse =
-        CGM.getDataLayout().getABITypeAlignment(ArgI.getCoerceToType());
-      AlignmentToUse = std::max(AlignmentToUse,
-                        (unsigned)getContext().getDeclAlign(Arg).getQuantity());
-
-      Alloca->setAlignment(AlignmentToUse);
-      llvm::Value *V = Alloca;
-      llvm::Value *Ptr = V;    // Pointer to store into.
-      CharUnits PtrAlign = CharUnits::fromQuantity(AlignmentToUse);
-
-      // If the value is offset in memory, apply the offset now.
-      if (unsigned Offs = ArgI.getDirectOffset()) {
-        Ptr = Builder.CreateBitCast(Ptr, Builder.getInt8PtrTy());
-        Ptr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), Ptr, Offs);
-        Ptr = Builder.CreateBitCast(Ptr,
-                          llvm::PointerType::getUnqual(ArgI.getCoerceToType()));
-        PtrAlign = PtrAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
-      }
+      // Pointer to store into.
+      Address Ptr = emitAddressAtOffset(*this, Alloca, ArgI);
 
       // Fast-isel and the optimizer generally like scalar values better than
       // FCAs, so we flatten them if this is safe to do for this argument.
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
       if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
           STy->getNumElements() > 1) {
+        auto SrcLayout = CGM.getDataLayout().getStructLayout(STy);
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
-        llvm::Type *DstTy =
-          cast<llvm::PointerType>(Ptr->getType())->getElementType();
+        llvm::Type *DstTy = Ptr.getElementType();
         uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
 
+        Address AddrToStoreInto = Address::invalid();
         if (SrcSize <= DstSize) {
-          Ptr = Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
-
-          assert(STy->getNumElements() == NumIRArgs);
-          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-            auto AI = FnArgs[FirstIRArg + i];
-            AI->setName(Arg->getName() + ".coerce" + Twine(i));
-            llvm::Value *EltPtr = Builder.CreateConstGEP2_32(STy, Ptr, 0, i);
-            Builder.CreateStore(AI, EltPtr);
-          }
+          AddrToStoreInto =
+            Builder.CreateBitCast(Ptr, llvm::PointerType::getUnqual(STy));
         } else {
-          llvm::AllocaInst *TempAlloca =
-            CreateTempAlloca(ArgI.getCoerceToType(), "coerce");
-          TempAlloca->setAlignment(AlignmentToUse);
-          llvm::Value *TempV = TempAlloca;
-
-          assert(STy->getNumElements() == NumIRArgs);
-          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-            auto AI = FnArgs[FirstIRArg + i];
-            AI->setName(Arg->getName() + ".coerce" + Twine(i));
-            llvm::Value *EltPtr =
-                Builder.CreateConstGEP2_32(ArgI.getCoerceToType(), TempV, 0, i);
-            Builder.CreateStore(AI, EltPtr);
-          }
-
-          Builder.CreateMemCpy(Ptr, TempV, DstSize, AlignmentToUse);
+          AddrToStoreInto =
+            CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
         }
+
+        assert(STy->getNumElements() == NumIRArgs);
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          auto AI = FnArgs[FirstIRArg + i];
+          AI->setName(Arg->getName() + ".coerce" + Twine(i));
+          auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i));
+          Address EltPtr =
+            Builder.CreateStructGEP(AddrToStoreInto, i, Offset);
+          Builder.CreateStore(AI, EltPtr);
+        }
+
+        if (SrcSize > DstSize) {
+          Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+        }
+
       } else {
         // Simple case, just do a coerced store of the argument into the alloca.
         assert(NumIRArgs == 1);
         auto AI = FnArgs[FirstIRArg];
         AI->setName(Arg->getName() + ".coerce");
-        CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, PtrAlign, *this);
+        CreateCoercedStore(AI, Ptr, /*DestIsVolatile=*/false, *this);
       }
 
-
       // Match to what EmitParmDecl is expecting for this type.
       if (CodeGenFunction::hasScalarEvaluationKind(Ty)) {
-        V = EmitLoadOfScalar(V, false, AlignmentToUse, Ty, Arg->getLocStart());
+        llvm::Value *V =
+          EmitLoadOfScalar(Alloca, false, Ty, Arg->getLocStart());
         if (isPromoted)
           V = emitArgumentDemotion(*this, Arg, V);
-        ArgVals.push_back(ValueAndIsPtr(V, HaveValue));
+        ArgVals.push_back(ParamValue::forDirect(V));
       } else {
-        ArgVals.push_back(ValueAndIsPtr(V, HavePointer));
+        ArgVals.push_back(ParamValue::forIndirect(Alloca));
       }
       break;
     }
@@ -2069,11 +2105,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       // If this structure was expanded into multiple arguments then
       // we need to create a temporary and reconstruct it from the
       // arguments.
-      llvm::AllocaInst *Alloca = CreateMemTemp(Ty);
-      CharUnits Align = getContext().getDeclAlign(Arg);
-      Alloca->setAlignment(Align.getQuantity());
-      LValue LV = MakeAddrLValue(Alloca, Ty, Align);
-      ArgVals.push_back(ValueAndIsPtr(Alloca, HavePointer));
+      Address Alloca = CreateMemTemp(Ty, getContext().getDeclAlign(Arg));
+      LValue LV = MakeAddrLValue(Alloca, Ty);
+      ArgVals.push_back(ParamValue::forIndirect(Alloca));
 
       auto FnArgIter = FnArgs.begin() + FirstIRArg;
       ExpandTypeFromArgs(Ty, LV, FnArgIter);
@@ -2089,10 +2123,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
       assert(NumIRArgs == 0);
       // Initialize the local variable appropriately.
       if (!hasScalarEvaluationKind(Ty)) {
-        ArgVals.push_back(ValueAndIsPtr(CreateMemTemp(Ty), HavePointer));
+        ArgVals.push_back(ParamValue::forIndirect(CreateMemTemp(Ty)));
       } else {
         llvm::Value *U = llvm::UndefValue::get(ConvertType(Arg->getType()));
-        ArgVals.push_back(ValueAndIsPtr(U, HaveValue));
+        ArgVals.push_back(ParamValue::forDirect(U));
       }
       break;
     }
@@ -2100,12 +2134,10 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
 
   if (getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
     for (int I = Args.size() - 1; I >= 0; --I)
-      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
-                   I + 1);
+      EmitParmDecl(*Args[I], ArgVals[I], I + 1);
   } else {
     for (unsigned I = 0, E = Args.size(); I != E; ++I)
-      EmitParmDecl(*Args[I], ArgVals[I].getPointer(), ArgVals[I].getInt(),
-                   I + 1);
+      EmitParmDecl(*Args[I], ArgVals[I], I + 1);
   }
 }
 
@@ -2158,9 +2190,9 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
 
   bool doRetainAutorelease;
 
-  if (call->getCalledValue() == CGF.CGM.getARCEntrypoints().objc_retain) {
+  if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints().objc_retain) {
     doRetainAutorelease = true;
-  } else if (call->getCalledValue() == CGF.CGM.getARCEntrypoints()
+  } else if (call->getCalledValue() == CGF.CGM.getObjCEntrypoints()
                                           .objc_retainAutoreleasedReturnValue) {
     doRetainAutorelease = false;
 
@@ -2169,7 +2201,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
     // for that call.  If we can't find it, we can't do this
     // optimization.  But it should always be the immediately previous
     // instruction, unless we needed bitcasts around the call.
-    if (CGF.CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker) {
+    if (CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker) {
       llvm::Instruction *prev = call->getPrevNode();
       assert(prev);
       if (isa<llvm::BitCastInst>(prev)) {
@@ -2178,7 +2210,7 @@ static llvm::Value *tryEmitFusedAutoreleaseOfResult(CodeGenFunction &CGF,
       }
       assert(isa<llvm::CallInst>(prev));
       assert(cast<llvm::CallInst>(prev)->getCalledValue() ==
-               CGF.CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker);
+               CGF.CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker);
       insnsToKill.push_back(prev);
     }
   } else {
@@ -2223,7 +2255,7 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
   llvm::CallInst *retainCall =
     dyn_cast<llvm::CallInst>(result->stripPointerCasts());
   if (!retainCall ||
-      retainCall->getCalledValue() != CGF.CGM.getARCEntrypoints().objc_retain)
+      retainCall->getCalledValue() != CGF.CGM.getObjCEntrypoints().objc_retain)
     return nullptr;
 
   // Look for an ordinary load of 'self'.
@@ -2231,7 +2263,7 @@ static llvm::Value *tryRemoveRetainOfSelf(CodeGenFunction &CGF,
   llvm::LoadInst *load =
     dyn_cast<llvm::LoadInst>(retainedValue->stripPointerCasts());
   if (!load || load->isAtomic() || load->isVolatile() || 
-      load->getPointerOperand() != CGF.GetAddrOfLocalVar(self))
+      load->getPointerOperand() != CGF.GetAddrOfLocalVar(self).getPointer())
     return nullptr;
 
   // Okay!  Burn it all down.  This relies for correctness on the
@@ -2268,11 +2300,23 @@ static llvm::Value *emitAutoreleaseOfResult(CodeGenFunction &CGF,
 
 /// Heuristically search for a dominating store to the return-value slot.
 static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
+  // Check if a User is a store which pointerOperand is the ReturnValue.
+  // We are looking for stores to the ReturnValue, not for stores of the
+  // ReturnValue to some other location.
+  auto GetStoreIfValid = [&CGF](llvm::User *U) -> llvm::StoreInst * {
+    auto *SI = dyn_cast<llvm::StoreInst>(U);
+    if (!SI || SI->getPointerOperand() != CGF.ReturnValue.getPointer())
+      return nullptr;
+    // These aren't actually possible for non-coerced returns, and we
+    // only care about non-coerced returns on this code path.
+    assert(!SI->isAtomic() && !SI->isVolatile());
+    return SI;
+  };
   // If there are multiple uses of the return-value slot, just check
   // for something immediately preceding the IP.  Sometimes this can
   // happen with how we generate implicit-returns; it can also happen
   // with noreturn cleanups.
-  if (!CGF.ReturnValue->hasOneUse()) {
+  if (!CGF.ReturnValue.getPointer()->hasOneUse()) {
     llvm::BasicBlock *IP = CGF.Builder.GetInsertBlock();
     if (IP->empty()) return nullptr;
     llvm::Instruction *I = &IP->back();
@@ -2296,21 +2340,13 @@ static llvm::StoreInst *findDominatingStoreToReturnValue(CodeGenFunction &CGF) {
       break;
     }
 
-    llvm::StoreInst *store = dyn_cast<llvm::StoreInst>(I);
-    if (!store) return nullptr;
-    if (store->getPointerOperand() != CGF.ReturnValue) return nullptr;
-    assert(!store->isAtomic() && !store->isVolatile()); // see below
-    return store;
+    return GetStoreIfValid(I);
   }
 
   llvm::StoreInst *store =
-    dyn_cast<llvm::StoreInst>(CGF.ReturnValue->user_back());
+      GetStoreIfValid(CGF.ReturnValue.getPointer()->user_back());
   if (!store) return nullptr;
 
-  // These aren't actually possible for non-coerced returns, and we
-  // only care about non-coerced returns on this code path.
-  assert(!store->isAtomic() && !store->isVolatile());
-
   // Now do a first-and-dirty dominance check: just walk up the
   // single-predecessors chain from the current insertion point.
   llvm::BasicBlock *StoreBB = store->getParent();
@@ -2335,7 +2371,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
   }
 
   // Functions with no result always return void.
-  if (!ReturnValue) {
+  if (!ReturnValue.isValid()) {
     Builder.CreateRetVoid();
     return;
   }
@@ -2353,10 +2389,10 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     if (RetAI.getInAllocaSRet()) {
       llvm::Function::arg_iterator EI = CurFn->arg_end();
       --EI;
-      llvm::Value *ArgStruct = EI;
+      llvm::Value *ArgStruct = &*EI;
       llvm::Value *SRet = Builder.CreateStructGEP(
           nullptr, ArgStruct, RetAI.getInAllocaFieldIndex());
-      RV = Builder.CreateLoad(SRet, "sret");
+      RV = Builder.CreateAlignedLoad(SRet, getPointerAlign(), "sret");
     }
     break;
 
@@ -2367,9 +2403,8 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
     switch (getEvaluationKind(RetTy)) {
     case TEK_Complex: {
       ComplexPairTy RT =
-        EmitLoadOfComplex(MakeNaturalAlignAddrLValue(ReturnValue, RetTy),
-                          EndLoc);
-      EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(AI, RetTy),
+        EmitLoadOfComplex(MakeAddrLValue(ReturnValue, RetTy), EndLoc);
+      EmitStoreOfComplex(RT, MakeNaturalAlignAddrLValue(&*AI, RetTy),
                          /*isInit*/ true);
       break;
     }
@@ -2378,7 +2413,7 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
       break;
     case TEK_Scalar:
       EmitStoreOfScalar(Builder.CreateLoad(ReturnValue),
-                        MakeNaturalAlignAddrLValue(AI, RetTy),
+                        MakeNaturalAlignAddrLValue(&*AI, RetTy),
                         /*isInit*/ true);
       break;
     }
@@ -2406,9 +2441,12 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
         SI->eraseFromParent();
 
         // If that was the only use of the return value, nuke it as well now.
-        if (ReturnValue->use_empty() && isa<llvm::AllocaInst>(ReturnValue)) {
-          cast<llvm::AllocaInst>(ReturnValue)->eraseFromParent();
-          ReturnValue = nullptr;
+        auto returnValueInst = ReturnValue.getPointer();
+        if (returnValueInst->use_empty()) {
+          if (auto alloca = dyn_cast<llvm::AllocaInst>(returnValueInst)) {
+            alloca->eraseFromParent();
+            ReturnValue = Address::invalid();
+          }
         }
 
       // Otherwise, we have to do a simple load.
@@ -2416,18 +2454,10 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
         RV = Builder.CreateLoad(ReturnValue);
       }
     } else {
-      llvm::Value *V = ReturnValue;
-      CharUnits Align = getContext().getTypeAlignInChars(RetTy);
       // If the value is offset in memory, apply the offset now.
-      if (unsigned Offs = RetAI.getDirectOffset()) {
-        V = Builder.CreateBitCast(V, Builder.getInt8PtrTy());
-        V = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), V, Offs);
-        V = Builder.CreateBitCast(V,
-                         llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
-        Align = Align.alignmentAtOffset(CharUnits::fromQuantity(Offs));
-      }
+      Address V = emitAddressAtOffset(*this, ReturnValue, RetAI);
 
-      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), Align, *this);
+      RV = CreateCoercedLoad(V, RetAI.getCoerceToType(), *this);
     }
 
     // In ARC, end functions that return a retainable type with a call
@@ -2450,8 +2480,8 @@ void CodeGenFunction::EmitFunctionEpilog(const CGFunctionInfo &FI,
 
   llvm::Instruction *Ret;
   if (RV) {
-    if (SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) {
-      if (auto RetNNAttr = CurGD.getDecl()->getAttr<ReturnsNonNullAttr>()) {
+    if (CurCodeDecl && SanOpts.has(SanitizerKind::ReturnsNonnullAttribute)) {
+      if (auto RetNNAttr = CurCodeDecl->getAttr<ReturnsNonNullAttr>()) {
         SanitizerScope SanScope(this);
         llvm::Value *Cond = Builder.CreateICmpNE(
             RV, llvm::Constant::getNullValue(RV->getType()));
@@ -2477,14 +2507,20 @@ static bool isInAllocaArgument(CGCXXABI &ABI, QualType type) {
   return RD && ABI.getRecordArgABI(RD) == CGCXXABI::RAA_DirectInMemory;
 }
 
-static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF, QualType Ty) {
+static AggValueSlot createPlaceholderSlot(CodeGenFunction &CGF,
+                                          QualType Ty) {
   // FIXME: Generate IR in one pass, rather than going back and fixing up these
   // placeholders.
   llvm::Type *IRTy = CGF.ConvertTypeForMem(Ty);
   llvm::Value *Placeholder =
-      llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
-  Placeholder = CGF.Builder.CreateLoad(Placeholder);
-  return AggValueSlot::forAddr(Placeholder, CharUnits::Zero(),
+    llvm::UndefValue::get(IRTy->getPointerTo()->getPointerTo());
+  Placeholder = CGF.Builder.CreateDefaultAlignedLoad(Placeholder);
+
+  // FIXME: When we generate this IR in one pass, we shouldn't need
+  // this win32-specific alignment hack.
+  CharUnits Align = CharUnits::fromQuantity(4);
+
+  return AggValueSlot::forAddr(Address(Placeholder, Align),
                                Ty.getQualifiers(),
                                AggValueSlot::IsNotDestructed,
                                AggValueSlot::DoesNotNeedGCBarriers,
@@ -2497,7 +2533,7 @@ void CodeGenFunction::EmitDelegateCallArg(CallArgList &args,
   // StartFunction converted the ABI-lowered parameter(s) into a
   // local alloca.  We need to turn that into an r-value suitable
   // for EmitCall.
-  llvm::Value *local = GetAddrOfLocalVar(param);
+  Address local = GetAddrOfLocalVar(param);
 
   QualType type = param->getType();
 
@@ -2532,20 +2568,21 @@ static bool isProvablyNonNull(llvm::Value *addr) {
 static void emitWriteback(CodeGenFunction &CGF,
                           const CallArgList::Writeback &writeback) {
   const LValue &srcLV = writeback.Source;
-  llvm::Value *srcAddr = srcLV.getAddress();
-  assert(!isProvablyNull(srcAddr) &&
+  Address srcAddr = srcLV.getAddress();
+  assert(!isProvablyNull(srcAddr.getPointer()) &&
          "shouldn't have writeback for provably null argument");
 
   llvm::BasicBlock *contBB = nullptr;
 
   // If the argument wasn't provably non-null, we need to null check
   // before doing the store.
-  bool provablyNonNull = isProvablyNonNull(srcAddr);
+  bool provablyNonNull = isProvablyNonNull(srcAddr.getPointer());
   if (!provablyNonNull) {
     llvm::BasicBlock *writebackBB = CGF.createBasicBlock("icr.writeback");
     contBB = CGF.createBasicBlock("icr.done");
 
-    llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
+    llvm::Value *isNull =
+      CGF.Builder.CreateIsNull(srcAddr.getPointer(), "icr.isnull");
     CGF.Builder.CreateCondBr(isNull, contBB, writebackBB);
     CGF.EmitBlock(writebackBB);
   }
@@ -2554,9 +2591,8 @@ static void emitWriteback(CodeGenFunction &CGF,
   llvm::Value *value = CGF.Builder.CreateLoad(writeback.Temporary);
 
   // Cast it back, in case we're writing an id to a Foo* or something.
-  value = CGF.Builder.CreateBitCast(value,
-               cast<llvm::PointerType>(srcAddr->getType())->getElementType(),
-                            "icr.writeback-cast");
+  value = CGF.Builder.CreateBitCast(value, srcAddr.getElementType(),
+                                    "icr.writeback-cast");
   
   // Perform the writeback.
 
@@ -2606,10 +2642,9 @@ static void deactivateArgCleanupsBeforeCall(CodeGenFunction &CGF,
   ArrayRef<CallArgList::CallArgCleanup> Cleanups =
     CallArgs.getCleanupsToDeactivate();
   // Iterate in reverse to increase the likelihood of popping the cleanup.
-  for (ArrayRef<CallArgList::CallArgCleanup>::reverse_iterator
-         I = Cleanups.rbegin(), E = Cleanups.rend(); I != E; ++I) {
-    CGF.DeactivateCleanupBlock(I->Cleanup, I->IsActiveIP);
-    I->IsActiveIP->eraseFromParent();
+  for (const auto &I : llvm::reverse(Cleanups)) {
+    CGF.DeactivateCleanupBlock(I.Cleanup, I.IsActiveIP);
+    I.IsActiveIP->eraseFromParent();
   }
 }
 
@@ -2621,7 +2656,9 @@ static const Expr *maybeGetUnaryAddrOfOperand(const Expr *E) {
 }
 
 /// Emit an argument that's being passed call-by-writeback.  That is,
-/// we are passing the address of 
+/// we are passing the address of an __autoreleased temporary; it
+/// might be copy-initialized with the current value of the given
+/// address, but it will definitely be copied out of after the call.
 static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
                              const ObjCIndirectCopyRestoreExpr *CRE) {
   LValue srcLV;
@@ -2633,13 +2670,13 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
 
   // Otherwise, just emit it as a scalar.
   } else {
-    llvm::Value *srcAddr = CGF.EmitScalarExpr(CRE->getSubExpr());
+    Address srcAddr = CGF.EmitPointerWithAlignment(CRE->getSubExpr());
 
     QualType srcAddrType =
       CRE->getSubExpr()->getType()->castAs<PointerType>()->getPointeeType();
-    srcLV = CGF.MakeNaturalAlignAddrLValue(srcAddr, srcAddrType);
+    srcLV = CGF.MakeAddrLValue(srcAddr, srcAddrType);
   }
-  llvm::Value *srcAddr = srcLV.getAddress();
+  Address srcAddr = srcLV.getAddress();
 
   // The dest and src types don't necessarily match in LLVM terms
   // because of the crazy ObjC compatibility rules.
@@ -2648,15 +2685,16 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     cast<llvm::PointerType>(CGF.ConvertType(CRE->getType()));
 
   // If the address is a constant null, just pass the appropriate null.
-  if (isProvablyNull(srcAddr)) {
+  if (isProvablyNull(srcAddr.getPointer())) {
     args.add(RValue::get(llvm::ConstantPointerNull::get(destType)),
              CRE->getType());
     return;
   }
 
   // Create the temporary.
-  llvm::Value *temp = CGF.CreateTempAlloca(destType->getElementType(),
-                                           "icr.temp");
+  Address temp = CGF.CreateTempAlloca(destType->getElementType(),
+                                      CGF.getPointerAlign(),
+                                      "icr.temp");
   // Loading an l-value can introduce a cleanup if the l-value is __weak,
   // and that cleanup will be conditional if we can't prove that the l-value
   // isn't null, so we need to register a dominating point so that the cleanups
@@ -2678,15 +2716,16 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   // If the address is *not* known to be non-null, we need to switch.
   llvm::Value *finalArgument;
 
-  bool provablyNonNull = isProvablyNonNull(srcAddr);
+  bool provablyNonNull = isProvablyNonNull(srcAddr.getPointer());
   if (provablyNonNull) {
-    finalArgument = temp;
+    finalArgument = temp.getPointer();
   } else {
-    llvm::Value *isNull = CGF.Builder.CreateIsNull(srcAddr, "icr.isnull");
+    llvm::Value *isNull =
+      CGF.Builder.CreateIsNull(srcAddr.getPointer(), "icr.isnull");
 
     finalArgument = CGF.Builder.CreateSelect(isNull, 
                                    llvm::ConstantPointerNull::get(destType),
-                                             temp, "icr.argument");
+                                             temp.getPointer(), "icr.argument");
 
     // If we need to copy, then the load has to be conditional, which
     // means we need control flow.
@@ -2753,24 +2792,12 @@ void CallArgList::allocateArgumentMemory(CodeGenFunction &CGF) {
   // Save the stack.
   llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stacksave);
   StackBase = CGF.Builder.CreateCall(F, {}, "inalloca.save");
-
-  // Control gets really tied up in landing pads, so we have to spill the
-  // stacksave to an alloca to avoid violating SSA form.
-  // TODO: This is dead if we never emit the cleanup.  We should create the
-  // alloca and store lazily on the first cleanup emission.
-  StackBaseMem = CGF.CreateTempAlloca(CGF.Int8PtrTy, "inalloca.spmem");
-  CGF.Builder.CreateStore(StackBase, StackBaseMem);
-  CGF.pushStackRestore(EHCleanup, StackBaseMem);
-  StackCleanup = CGF.EHStack.getInnermostEHScope();
-  assert(StackCleanup.isValid());
 }
 
 void CallArgList::freeArgumentMemory(CodeGenFunction &CGF) const {
   if (StackBase) {
-    CGF.DeactivateCleanupBlock(StackCleanup, StackBase);
+    // Restore the stack after the call.
     llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
-    // We could load StackBase from StackBaseMem, but in the non-exceptional
-    // case we can skip it.
     CGF.Builder.CreateCall(F, StackBase);
   }
 }
@@ -2800,12 +2827,26 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType,
                 "nonnull_arg", StaticData, None);
 }
 
-void CodeGenFunction::EmitCallArgs(CallArgList &Args,
-                                   ArrayRef<QualType> ArgTypes,
-                                   CallExpr::const_arg_iterator ArgBeg,
-                                   CallExpr::const_arg_iterator ArgEnd,
-                                   const FunctionDecl *CalleeDecl,
-                                   unsigned ParamsToSkip) {
+void CodeGenFunction::EmitCallArgs(
+    CallArgList &Args, ArrayRef<QualType> ArgTypes,
+    llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
+    const FunctionDecl *CalleeDecl, unsigned ParamsToSkip) {
+  assert((int)ArgTypes.size() == (ArgRange.end() - ArgRange.begin()));
+
+  auto MaybeEmitImplicitObjectSize = [&](unsigned I, const Expr *Arg) {
+    if (CalleeDecl == nullptr || I >= CalleeDecl->getNumParams())
+      return;
+    auto *PS = CalleeDecl->getParamDecl(I)->getAttr<PassObjectSizeAttr>();
+    if (PS == nullptr)
+      return;
+
+    const auto &Context = getContext();
+    auto SizeTy = Context.getSizeType();
+    auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
+    llvm::Value *V = evaluateOrEmitBuiltinObjectSize(Arg, PS->getType(), T);
+    Args.add(RValue::get(V), SizeTy);
+  };
+
   // We *have* to evaluate arguments from right to left in the MS C++ ABI,
   // because arguments are destroyed left to right in the callee.
   if (CGM.getTarget().getCXXABI().areArgsDestroyedLeftToRightInCallee()) {
@@ -2822,10 +2863,11 @@ void CodeGenFunction::EmitCallArgs(CallArgList &Args,
     // Evaluate each argument.
     size_t CallArgsStart = Args.size();
     for (int I = ArgTypes.size() - 1; I >= 0; --I) {
-      CallExpr::const_arg_iterator Arg = ArgBeg + I;
+      CallExpr::const_arg_iterator Arg = ArgRange.begin() + I;
       EmitCallArg(Args, *Arg, ArgTypes[I]);
-      EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
+      EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(),
                           CalleeDecl, ParamsToSkip + I);
+      MaybeEmitImplicitObjectSize(I, *Arg);
     }
 
     // Un-reverse the arguments we just evaluated so they match up with the LLVM
@@ -2835,21 +2877,22 @@ void CodeGenFunction::EmitCallArgs(CallArgList &Args,
   }
 
   for (unsigned I = 0, E = ArgTypes.size(); I != E; ++I) {
-    CallExpr::const_arg_iterator Arg = ArgBeg + I;
-    assert(Arg != ArgEnd);
+    CallExpr::const_arg_iterator Arg = ArgRange.begin() + I;
+    assert(Arg != ArgRange.end());
     EmitCallArg(Args, *Arg, ArgTypes[I]);
-    EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], Arg->getExprLoc(),
+    EmitNonNullArgCheck(Args.back().RV, ArgTypes[I], (*Arg)->getExprLoc(),
                         CalleeDecl, ParamsToSkip + I);
+    MaybeEmitImplicitObjectSize(I, *Arg);
   }
 }
 
 namespace {
 
-struct DestroyUnpassedArg : EHScopeStack::Cleanup {
-  DestroyUnpassedArg(llvm::Value *Addr, QualType Ty)
+struct DestroyUnpassedArg final : EHScopeStack::Cleanup {
+  DestroyUnpassedArg(Address Addr, QualType Ty)
       : Addr(Addr), Ty(Ty) {}
 
-  llvm::Value *Addr;
+  Address Addr;
   QualType Ty;
 
   void Emit(CodeGenFunction &CGF, Flags flags) override {
@@ -2860,8 +2903,6 @@ struct DestroyUnpassedArg : EHScopeStack::Cleanup {
   }
 };
 
-}
-
 struct DisableDebugLocationUpdates {
   CodeGenFunction &CGF;
   bool disabledDebugInfo;
@@ -2875,6 +2916,8 @@ struct DisableDebugLocationUpdates {
   }
 };
 
+} // end anonymous namespace
+
 void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
                                   QualType type) {
   DisableDebugLocationUpdates Dis(*this, E);
@@ -2923,7 +2966,8 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
       // Create a no-op GEP between the placeholder and the cleanup so we can
       // RAUW it successfully.  It also serves as a marker of the first
       // instruction where the cleanup is active.
-      pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddr(), type);
+      pushFullExprCleanup<DestroyUnpassedArg>(EHCleanup, Slot.getAddress(),
+                                              type);
       // This unreachable is a temporary marker which will be removed later.
       llvm::Instruction *IsActive = Builder.CreateUnreachable();
       args.addArgCleanupDeactivation(EHStack.getInnermostEHScope(), IsActive);
@@ -2940,9 +2984,8 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E,
     } else {
       // We can't represent a misaligned lvalue in the CallArgList, so copy
       // to an aligned temporary now.
-      llvm::Value *tmp = CreateMemTemp(type);
-      EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile(),
-                        L.getAlignment());
+      Address tmp = CreateMemTemp(type);
+      EmitAggregateCopy(tmp, L.getAddress(), type, L.isVolatile());
       args.add(RValue::getAggregate(tmp), type);
     }
     return;
@@ -3015,19 +3058,41 @@ CodeGenFunction::EmitRuntimeCall(llvm::Value *callee,
   return call;
 }
 
+// Calls which may throw must have operand bundles indicating which funclet
+// they are nested within.
+static void
+getBundlesForFunclet(llvm::Value *Callee,
+                     llvm::Instruction *CurrentFuncletPad,
+                     SmallVectorImpl<llvm::OperandBundleDef> &BundleList) {
+  // There is no need for a funclet operand bundle if we aren't inside a funclet.
+  if (!CurrentFuncletPad)
+    return;
+
+  // Skip intrinsics which cannot throw.
+  auto *CalleeFn = dyn_cast<llvm::Function>(Callee->stripPointerCasts());
+  if (CalleeFn && CalleeFn->isIntrinsic() && CalleeFn->doesNotThrow())
+    return;
+
+  BundleList.emplace_back("funclet", CurrentFuncletPad);
+}
+
 /// Emits a call or invoke to the given noreturn runtime function.
 void CodeGenFunction::EmitNoreturnRuntimeCallOrInvoke(llvm::Value *callee,
                                                ArrayRef<llvm::Value*> args) {
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(callee, CurrentFuncletPad, BundleList);
+
   if (getInvokeDest()) {
     llvm::InvokeInst *invoke = 
       Builder.CreateInvoke(callee,
                            getUnreachableBlock(),
                            getInvokeDest(),
-                           args);
+                           args,
+                           BundleList);
     invoke->setDoesNotReturn();
     invoke->setCallingConv(getRuntimeCC());
   } else {
-    llvm::CallInst *call = Builder.CreateCall(callee, args);
+    llvm::CallInst *call = Builder.CreateCall(callee, args, BundleList);
     call->setDoesNotReturn();
     call->setCallingConv(getRuntimeCC());
     Builder.CreateUnreachable();
@@ -3052,12 +3117,6 @@ CodeGenFunction::EmitRuntimeCallOrInvoke(llvm::Value *callee,
   return callSite;
 }
 
-llvm::CallSite
-CodeGenFunction::EmitCallOrInvoke(llvm::Value *Callee,
-                                  const Twine &Name) {
-  return EmitCallOrInvoke(Callee, None, Name);
-}
-
 /// Emits a call or invoke instruction to the given function, depending
 /// on the current state of the EH stack.
 llvm::CallSite
@@ -3102,7 +3161,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
                                  llvm::Value *Callee,
                                  ReturnValueSlot ReturnValue,
                                  const CallArgList &CallArgs,
-                                 const Decl *TargetDecl,
+                                 CGCalleeInfo CalleeInfo,
                                  llvm::Instruction **callOrInvoke) {
   // FIXME: We no longer need the types from CallArgs; lift up and simplify.
 
@@ -3117,8 +3176,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   // If we're using inalloca, insert the allocation after the stack save.
   // FIXME: Do this earlier rather than hacking it in here!
-  llvm::AllocaInst *ArgMemory = nullptr;
+  Address ArgMemory = Address::invalid();
+  const llvm::StructLayout *ArgMemoryLayout = nullptr;
   if (llvm::StructType *ArgStruct = CallInfo.getArgStruct()) {
+    ArgMemoryLayout = CGM.getDataLayout().getStructLayout(ArgStruct);
     llvm::Instruction *IP = CallArgs.getStackBase();
     llvm::AllocaInst *AI;
     if (IP) {
@@ -3127,36 +3188,44 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     } else {
       AI = CreateTempAlloca(ArgStruct, "argmem");
     }
+    auto Align = CallInfo.getArgStructAlignment();
+    AI->setAlignment(Align.getQuantity());
     AI->setUsedWithInAlloca(true);
     assert(AI->isUsedWithInAlloca() && !AI->isStaticAlloca());
-    ArgMemory = AI;
+    ArgMemory = Address(AI, Align);
   }
 
+  // Helper function to drill into the inalloca allocation.
+  auto createInAllocaStructGEP = [&](unsigned FieldIndex) -> Address {
+    auto FieldOffset =
+      CharUnits::fromQuantity(ArgMemoryLayout->getElementOffset(FieldIndex));
+    return Builder.CreateStructGEP(ArgMemory, FieldIndex, FieldOffset);
+  };
+
   ClangToLLVMArgMapping IRFunctionArgs(CGM.getContext(), CallInfo);
   SmallVector<llvm::Value *, 16> IRCallArgs(IRFunctionArgs.totalIRArgs());
 
   // If the call returns a temporary with struct return, create a temporary
   // alloca to hold the result, unless one is given to us.
-  llvm::Value *SRetPtr = nullptr;
+  Address SRetPtr = Address::invalid();
   size_t UnusedReturnSize = 0;
   if (RetAI.isIndirect() || RetAI.isInAlloca()) {
-    SRetPtr = ReturnValue.getValue();
-    if (!SRetPtr) {
+    if (!ReturnValue.isNull()) {
+      SRetPtr = ReturnValue.getValue();
+    } else {
       SRetPtr = CreateMemTemp(RetTy);
       if (HaveInsertPoint() && ReturnValue.isUnused()) {
         uint64_t size =
             CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy));
-        if (EmitLifetimeStart(size, SRetPtr))
+        if (EmitLifetimeStart(size, SRetPtr.getPointer()))
           UnusedReturnSize = size;
       }
     }
     if (IRFunctionArgs.hasSRetArg()) {
-      IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr;
+      IRCallArgs[IRFunctionArgs.getSRetArgNo()] = SRetPtr.getPointer();
     } else {
-      llvm::Value *Addr =
-          Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
-                                  RetAI.getInAllocaFieldIndex());
-      Builder.CreateStore(SRetPtr, Addr);
+      Address Addr = createInAllocaStructGEP(RetAI.getInAllocaFieldIndex());
+      Builder.CreateStore(SRetPtr.getPointer(), Addr);
     }
   }
 
@@ -3169,8 +3238,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     const ABIArgInfo &ArgInfo = info_it->info;
     RValue RV = I->RV;
 
-    CharUnits TypeAlign = getContext().getTypeAlignInChars(I->Ty);
-
     // Insert a padding argument to ensure proper alignment.
     if (IRFunctionArgs.hasPaddingArg(ArgNo))
       IRCallArgs[IRFunctionArgs.getPaddingArgNo(ArgNo)] =
@@ -3186,27 +3253,23 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       if (RV.isAggregate()) {
         // Replace the placeholder with the appropriate argument slot GEP.
         llvm::Instruction *Placeholder =
-            cast<llvm::Instruction>(RV.getAggregateAddr());
+            cast<llvm::Instruction>(RV.getAggregatePointer());
         CGBuilderTy::InsertPoint IP = Builder.saveIP();
         Builder.SetInsertPoint(Placeholder);
-        llvm::Value *Addr =
-            Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
-                                    ArgInfo.getInAllocaFieldIndex());
+        Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
         Builder.restoreIP(IP);
-        deferPlaceholderReplacement(Placeholder, Addr);
+        deferPlaceholderReplacement(Placeholder, Addr.getPointer());
       } else {
         // Store the RValue into the argument struct.
-        llvm::Value *Addr =
-            Builder.CreateStructGEP(ArgMemory->getAllocatedType(), ArgMemory,
-                                    ArgInfo.getInAllocaFieldIndex());
-        unsigned AS = Addr->getType()->getPointerAddressSpace();
+        Address Addr = createInAllocaStructGEP(ArgInfo.getInAllocaFieldIndex());
+        unsigned AS = Addr.getType()->getPointerAddressSpace();
         llvm::Type *MemType = ConvertTypeForMem(I->Ty)->getPointerTo(AS);
         // There are some cases where a trivial bitcast is not avoidable.  The
         // definition of a type later in a translation unit may change it's type
         // from {}* to (%struct.foo*)*.
-        if (Addr->getType() != MemType)
+        if (Addr.getType() != MemType)
           Addr = Builder.CreateBitCast(Addr, MemType);
-        LValue argLV = MakeAddrLValue(Addr, I->Ty, TypeAlign);
+        LValue argLV = MakeAddrLValue(Addr, I->Ty);
         EmitInitStoreOfNonAggregate(*this, RV, argLV);
       }
       break;
@@ -3216,12 +3279,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       assert(NumIRArgs == 1);
       if (RV.isScalar() || RV.isComplex()) {
         // Make a temporary alloca to pass the argument.
-        llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
-        if (ArgInfo.getIndirectAlign() > AI->getAlignment())
-          AI->setAlignment(ArgInfo.getIndirectAlign());
-        IRCallArgs[FirstIRArg] = AI;
+        Address Addr = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+        IRCallArgs[FirstIRArg] = Addr.getPointer();
 
-        LValue argLV = MakeAddrLValue(AI, I->Ty, TypeAlign);
+        LValue argLV = MakeAddrLValue(Addr, I->Ty);
         EmitInitStoreOfNonAggregate(*this, RV, argLV);
       } else {
         // We want to avoid creating an unnecessary temporary+copy here;
@@ -3232,27 +3293,27 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         //    we cannot force it to be sufficiently aligned.
         // 3. If the argument is byval, but RV is located in an address space
         //    different than that of the argument (0).
-        llvm::Value *Addr = RV.getAggregateAddr();
-        unsigned Align = ArgInfo.getIndirectAlign();
+        Address Addr = RV.getAggregateAddress();
+        CharUnits Align = ArgInfo.getIndirectAlign();
         const llvm::DataLayout *TD = &CGM.getDataLayout();
-        const unsigned RVAddrSpace = Addr->getType()->getPointerAddressSpace();
+        const unsigned RVAddrSpace = Addr.getType()->getAddressSpace();
         const unsigned ArgAddrSpace =
             (FirstIRArg < IRFuncTy->getNumParams()
                  ? IRFuncTy->getParamType(FirstIRArg)->getPointerAddressSpace()
                  : 0);
         if ((!ArgInfo.getIndirectByVal() && I->NeedsCopy) ||
-            (ArgInfo.getIndirectByVal() && TypeAlign.getQuantity() < Align &&
-             llvm::getOrEnforceKnownAlignment(Addr, Align, *TD) < Align) ||
+            (ArgInfo.getIndirectByVal() && Addr.getAlignment() < Align &&
+             llvm::getOrEnforceKnownAlignment(Addr.getPointer(),
+                                              Align.getQuantity(), *TD)
+               < Align.getQuantity()) ||
             (ArgInfo.getIndirectByVal() && (RVAddrSpace != ArgAddrSpace))) {
           // Create an aligned temporary, and copy to it.
-          llvm::AllocaInst *AI = CreateMemTemp(I->Ty);
-          if (Align > AI->getAlignment())
-            AI->setAlignment(Align);
-          IRCallArgs[FirstIRArg] = AI;
+          Address AI = CreateMemTemp(I->Ty, ArgInfo.getIndirectAlign());
+          IRCallArgs[FirstIRArg] = AI.getPointer();
           EmitAggregateCopy(AI, Addr, I->Ty, RV.isVolatileQualified());
         } else {
           // Skip the extra memcpy call.
-          IRCallArgs[FirstIRArg] = Addr;
+          IRCallArgs[FirstIRArg] = Addr.getPointer();
         }
       }
       break;
@@ -3272,7 +3333,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         if (RV.isScalar())
           V = RV.getScalarVal();
         else
-          V = Builder.CreateLoad(RV.getAggregateAddr());
+          V = Builder.CreateLoad(RV.getAggregateAddress());
 
         // We might have to widen integers, but we should never truncate.
         if (ArgInfo.getCoerceToType() != V->getType() &&
@@ -3289,35 +3350,24 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       }
 
       // FIXME: Avoid the conversion through memory if possible.
-      llvm::Value *SrcPtr;
-      CharUnits SrcAlign;
+      Address Src = Address::invalid();
       if (RV.isScalar() || RV.isComplex()) {
-        SrcPtr = CreateMemTemp(I->Ty, "coerce");
-        SrcAlign = TypeAlign;
-        LValue SrcLV = MakeAddrLValue(SrcPtr, I->Ty, TypeAlign);
+        Src = CreateMemTemp(I->Ty, "coerce");
+        LValue SrcLV = MakeAddrLValue(Src, I->Ty);
         EmitInitStoreOfNonAggregate(*this, RV, SrcLV);
       } else {
-        SrcPtr = RV.getAggregateAddr();
-        // This alignment is guaranteed by EmitCallArg.
-        SrcAlign = TypeAlign;
+        Src = RV.getAggregateAddress();
       }
 
       // If the value is offset in memory, apply the offset now.
-      if (unsigned Offs = ArgInfo.getDirectOffset()) {
-        SrcPtr = Builder.CreateBitCast(SrcPtr, Builder.getInt8PtrTy());
-        SrcPtr = Builder.CreateConstGEP1_32(Builder.getInt8Ty(), SrcPtr, Offs);
-        SrcPtr = Builder.CreateBitCast(SrcPtr,
-                       llvm::PointerType::getUnqual(ArgInfo.getCoerceToType()));
-        SrcAlign = SrcAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
-      }
+      Src = emitAddressAtOffset(*this, Src, ArgInfo);
 
       // Fast-isel and the optimizer generally like scalar values better than
       // FCAs, so we flatten them if this is safe to do for this argument.
       llvm::StructType *STy =
             dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType());
       if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) {
-        llvm::Type *SrcTy =
-          cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+        llvm::Type *SrcTy = Src.getType()->getElementType();
         uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy);
         uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy);
 
@@ -3326,29 +3376,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         // of the destination type to allow loading all of it. The bits past
         // the source value are left undef.
         if (SrcSize < DstSize) {
-          llvm::AllocaInst *TempAlloca
-            = CreateTempAlloca(STy, SrcPtr->getName() + ".coerce");
-          Builder.CreateMemCpy(TempAlloca, SrcPtr, SrcSize, 0);
-          SrcPtr = TempAlloca;
+          Address TempAlloca
+            = CreateTempAlloca(STy, Src.getAlignment(),
+                               Src.getName() + ".coerce");
+          Builder.CreateMemCpy(TempAlloca, Src, SrcSize);
+          Src = TempAlloca;
         } else {
-          SrcPtr = Builder.CreateBitCast(SrcPtr,
-                                         llvm::PointerType::getUnqual(STy));
+          Src = Builder.CreateBitCast(Src, llvm::PointerType::getUnqual(STy));
         }
 
+        auto SrcLayout = CGM.getDataLayout().getStructLayout(STy);
         assert(NumIRArgs == STy->getNumElements());
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          llvm::Value *EltPtr = Builder.CreateConstGEP2_32(STy, SrcPtr, 0, i);
-          llvm::LoadInst *LI = Builder.CreateLoad(EltPtr);
-          // We don't know what we're loading from.
-          LI->setAlignment(1);
+          auto Offset = CharUnits::fromQuantity(SrcLayout->getElementOffset(i));
+          Address EltPtr = Builder.CreateStructGEP(Src, i, Offset);
+          llvm::Value *LI = Builder.CreateLoad(EltPtr);
           IRCallArgs[FirstIRArg + i] = LI;
         }
       } else {
         // In the simple case, just pass the coerced loaded value.
         assert(NumIRArgs == 1);
         IRCallArgs[FirstIRArg] =
-            CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
-                              SrcAlign, *this);
+          CreateCoercedLoad(Src, ArgInfo.getCoerceToType(), *this);
       }
 
       break;
@@ -3362,8 +3411,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     }
   }
 
-  if (ArgMemory) {
-    llvm::Value *Arg = ArgMemory;
+  if (ArgMemory.isValid()) {
+    llvm::Value *Arg = ArgMemory.getPointer();
     if (CallInfo.isVariadic()) {
       // When passing non-POD arguments by value to variadic functions, we will
       // end up with a variadic prototype and an inalloca call site.  In such
@@ -3441,23 +3490,37 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   unsigned CallingConv;
   CodeGen::AttributeListType AttributeList;
-  CGM.ConstructAttributeList(CallInfo, TargetDecl, AttributeList,
-                             CallingConv, true);
+  CGM.ConstructAttributeList(CallInfo, CalleeInfo, AttributeList, CallingConv,
+                             true);
   llvm::AttributeSet Attrs = llvm::AttributeSet::get(getLLVMContext(),
                                                      AttributeList);
 
-  llvm::BasicBlock *InvokeDest = nullptr;
-  if (!Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
-                          llvm::Attribute::NoUnwind) ||
-      currentFunctionUsesSEHTry())
-    InvokeDest = getInvokeDest();
+  bool CannotThrow;
+  if (currentFunctionUsesSEHTry()) {
+    // SEH cares about asynchronous exceptions, everything can "throw."
+    CannotThrow = false;
+  } else if (isCleanupPadScope() &&
+             EHPersonality::get(*this).isMSVCXXPersonality()) {
+    // The MSVC++ personality will implicitly terminate the program if an
+    // exception is thrown.  An unwind edge cannot be reached.
+    CannotThrow = true;
+  } else {
+    // Otherwise, nowunind callsites will never throw.
+    CannotThrow = Attrs.hasAttribute(llvm::AttributeSet::FunctionIndex,
+                                     llvm::Attribute::NoUnwind);
+  }
+  llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest();
+
+  SmallVector<llvm::OperandBundleDef, 1> BundleList;
+  getBundlesForFunclet(Callee, CurrentFuncletPad, BundleList);
 
   llvm::CallSite CS;
   if (!InvokeDest) {
-    CS = Builder.CreateCall(Callee, IRCallArgs);
+    CS = Builder.CreateCall(Callee, IRCallArgs, BundleList);
   } else {
     llvm::BasicBlock *Cont = createBasicBlock("invoke.cont");
-    CS = Builder.CreateInvoke(Callee, Cont, InvokeDest, IRCallArgs);
+    CS = Builder.CreateInvoke(Callee, Cont, InvokeDest, IRCallArgs,
+                              BundleList);
     EmitBlock(Cont);
   }
   if (callOrInvoke)
@@ -3489,7 +3552,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   if (CS.doesNotReturn()) {
     if (UnusedReturnSize)
       EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
-                      SRetPtr);
+                      SRetPtr.getPointer());
 
     Builder.CreateUnreachable();
     Builder.ClearInsertionPoint();
@@ -3516,6 +3579,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   // lexical order, so deactivate it and run it manually here.
   CallArgs.freeArgumentMemory(*this);
 
+  if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(CI)) {
+    const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+    if (TargetDecl && TargetDecl->hasAttr<NotTailCalledAttr>())
+      Call->setTailCallKind(llvm::CallInst::TCK_NoTail);
+  }
+
   RValue Ret = [&] {
     switch (RetAI.getKind()) {
     case ABIArgInfo::InAlloca:
@@ -3523,7 +3592,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
       RValue ret = convertTempToRValue(SRetPtr, RetTy, SourceLocation());
       if (UnusedReturnSize)
         EmitLifetimeEnd(llvm::ConstantInt::get(Int64Ty, UnusedReturnSize),
-                        SRetPtr);
+                        SRetPtr.getPointer());
       return ret;
     }
 
@@ -3543,15 +3612,14 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
           return RValue::getComplex(std::make_pair(Real, Imag));
         }
         case TEK_Aggregate: {
-          llvm::Value *DestPtr = ReturnValue.getValue();
+          Address DestPtr = ReturnValue.getValue();
           bool DestIsVolatile = ReturnValue.isVolatile();
-          CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
 
-          if (!DestPtr) {
+          if (!DestPtr.isValid()) {
             DestPtr = CreateMemTemp(RetTy, "agg.tmp");
             DestIsVolatile = false;
           }
-          BuildAggStore(*this, CI, DestPtr, DestIsVolatile, DestAlign);
+          BuildAggStore(*this, CI, DestPtr, DestIsVolatile);
           return RValue::getAggregate(DestPtr);
         }
         case TEK_Scalar: {
@@ -3566,28 +3634,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         llvm_unreachable("bad evaluation kind");
       }
 
-      llvm::Value *DestPtr = ReturnValue.getValue();
+      Address DestPtr = ReturnValue.getValue();
       bool DestIsVolatile = ReturnValue.isVolatile();
-      CharUnits DestAlign = getContext().getTypeAlignInChars(RetTy);
 
-      if (!DestPtr) {
+      if (!DestPtr.isValid()) {
         DestPtr = CreateMemTemp(RetTy, "coerce");
         DestIsVolatile = false;
       }
 
       // If the value is offset in memory, apply the offset now.
-      llvm::Value *StorePtr = DestPtr;
-      CharUnits StoreAlign = DestAlign;
-      if (unsigned Offs = RetAI.getDirectOffset()) {
-        StorePtr = Builder.CreateBitCast(StorePtr, Builder.getInt8PtrTy());
-        StorePtr =
-            Builder.CreateConstGEP1_32(Builder.getInt8Ty(), StorePtr, Offs);
-        StorePtr = Builder.CreateBitCast(StorePtr,
-                           llvm::PointerType::getUnqual(RetAI.getCoerceToType()));
-        StoreAlign =
-          StoreAlign.alignmentAtOffset(CharUnits::fromQuantity(Offs));
-      }
-      CreateCoercedStore(CI, StorePtr, DestIsVolatile, StoreAlign, *this);
+      Address StorePtr = emitAddressAtOffset(*this, DestPtr, RetAI);
+      CreateCoercedStore(CI, StorePtr, DestIsVolatile, *this);
 
       return convertTempToRValue(DestPtr, RetTy, SourceLocation());
     }
@@ -3599,6 +3656,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     llvm_unreachable("Unhandled ABIArgInfo::Kind");
   } ();
 
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+
   if (Ret.isScalar() && TargetDecl) {
     if (const auto *AA = TargetDecl->getAttr<AssumeAlignedAttr>()) {
       llvm::Value *OffsetValue = nullptr;
@@ -3617,6 +3676,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
 /* VarArg handling */
 
-llvm::Value *CodeGenFunction::EmitVAArg(llvm::Value *VAListAddr, QualType Ty) {
-  return CGM.getTypes().getABIInfo().EmitVAArg(VAListAddr, Ty, *this);
+Address CodeGenFunction::EmitVAArg(VAArgExpr *VE, Address &VAListAddr) {
+  VAListAddr = VE->isMicrosoftABI()
+                 ? EmitMSVAListRef(VE->getSubExpr())
+                 : EmitVAListRef(VE->getSubExpr());
+  QualType Ty = VE->getType();
+  if (VE->isMicrosoftABI())
+    return CGM.getTypes().getABIInfo().EmitMSVAArg(*this, VAListAddr, Ty);
+  return CGM.getTypes().getABIInfo().EmitVAArg(*this, VAListAddr, Ty);
 }
diff --git a/lib/CodeGen/CGCall.h b/lib/CodeGen/CGCall.h
index 7a4708e5ccfa..2ebd09b9eb57 100644
--- a/lib/CodeGen/CGCall.h
+++ b/lib/CodeGen/CGCall.h
@@ -56,7 +56,7 @@ namespace CodeGen {
   class CallArgList :
     public SmallVector<CallArg, 16> {
   public:
-    CallArgList() : StackBase(nullptr), StackBaseMem(nullptr) {}
+    CallArgList() : StackBase(nullptr) {}
 
     struct Writeback {
       /// The original argument.  Note that the argument l-value
@@ -64,7 +64,7 @@ namespace CodeGen {
       LValue Source;
 
       /// The temporary alloca.
-      llvm::Value *Temporary;
+      Address Temporary;
 
       /// A value to "use" after the writeback, or null.
       llvm::Value *ToUse;
@@ -88,12 +88,9 @@ namespace CodeGen {
                         other.Writebacks.begin(), other.Writebacks.end());
     }
 
-    void addWriteback(LValue srcLV, llvm::Value *temporary,
+    void addWriteback(LValue srcLV, Address temporary,
                       llvm::Value *toUse) {
-      Writeback writeback;
-      writeback.Source = srcLV;
-      writeback.Temporary = temporary;
-      writeback.ToUse = toUse;
+      Writeback writeback = { srcLV, temporary, toUse };
       Writebacks.push_back(writeback);
     }
 
@@ -137,9 +134,6 @@ namespace CodeGen {
     /// The stacksave call.  It dominates all of the argument evaluation.
     llvm::CallInst *StackBase;
 
-    /// The alloca holding the stackbase.  We need it to maintain SSA form.
-    llvm::AllocaInst *StackBaseMem;
-
     /// The iterator pointing to the stack restore cleanup.  We manually run and
     /// deactivate this cleanup after the call in the unexceptional case because
     /// it doesn't run in the normal order.
@@ -156,6 +150,7 @@ namespace CodeGen {
   /// function can be stored, and whether the address is volatile or not.
   class ReturnValueSlot {
     llvm::PointerIntPair<llvm::Value *, 2, unsigned int> Value;
+    CharUnits Alignment;
 
     // Return value slot flags
     enum Flags {
@@ -165,14 +160,15 @@ namespace CodeGen {
 
   public:
     ReturnValueSlot() {}
-    ReturnValueSlot(llvm::Value *Value, bool IsVolatile, bool IsUnused = false)
-      : Value(Value,
-              (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)) {}
+    ReturnValueSlot(Address Addr, bool IsVolatile, bool IsUnused = false)
+      : Value(Addr.isValid() ? Addr.getPointer() : nullptr,
+              (IsVolatile ? IS_VOLATILE : 0) | (IsUnused ? IS_UNUSED : 0)),
+        Alignment(Addr.isValid() ? Addr.getAlignment() : CharUnits::Zero()) {}
 
-    bool isNull() const { return !getValue(); }
+    bool isNull() const { return !getValue().isValid(); }
 
     bool isVolatile() const { return Value.getInt() & IS_VOLATILE; }
-    llvm::Value *getValue() const { return Value.getPointer(); }
+    Address getValue() const { return Address(Value.getPointer(), Alignment); }
     bool isUnused() const { return Value.getInt() & IS_UNUSED; }
   };
   
diff --git a/lib/CodeGen/CGClass.cpp b/lib/CodeGen/CGClass.cpp
index c49f182c21d8..2e566de6d8ac 100644
--- a/lib/CodeGen/CGClass.cpp
+++ b/lib/CodeGen/CGClass.cpp
@@ -1,4 +1,4 @@
-//===--- CGClass.cpp - Emit LLVM Code for C++ classes ---------------------===//
+//===--- CGClass.cpp - Emit LLVM Code for C++ classes -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -25,10 +25,124 @@
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Metadata.h"
 
 using namespace clang;
 using namespace CodeGen;
 
+/// Return the best known alignment for an unknown pointer to a
+/// particular class.
+CharUnits CodeGenModule::getClassPointerAlignment(const CXXRecordDecl *RD) {
+  if (!RD->isCompleteDefinition())
+    return CharUnits::One(); // Hopefully won't be used anywhere.
+
+  auto &layout = getContext().getASTRecordLayout(RD);
+
+  // If the class is final, then we know that the pointer points to an
+  // object of that type and can use the full alignment.
+  if (RD->hasAttr<FinalAttr>()) {
+    return layout.getAlignment();
+
+  // Otherwise, we have to assume it could be a subclass.
+  } else {
+    return layout.getNonVirtualAlignment();
+  }
+}
+
+/// Return the best known alignment for a pointer to a virtual base,
+/// given the alignment of a pointer to the derived class.
+CharUnits CodeGenModule::getVBaseAlignment(CharUnits actualDerivedAlign,
+                                           const CXXRecordDecl *derivedClass,
+                                           const CXXRecordDecl *vbaseClass) {
+  // The basic idea here is that an underaligned derived pointer might
+  // indicate an underaligned base pointer.
+
+  assert(vbaseClass->isCompleteDefinition());
+  auto &baseLayout = getContext().getASTRecordLayout(vbaseClass);
+  CharUnits expectedVBaseAlign = baseLayout.getNonVirtualAlignment();
+
+  return getDynamicOffsetAlignment(actualDerivedAlign, derivedClass,
+                                   expectedVBaseAlign);
+}
+
+CharUnits
+CodeGenModule::getDynamicOffsetAlignment(CharUnits actualBaseAlign,
+                                         const CXXRecordDecl *baseDecl,
+                                         CharUnits expectedTargetAlign) {
+  // If the base is an incomplete type (which is, alas, possible with
+  // member pointers), be pessimistic.
+  if (!baseDecl->isCompleteDefinition())
+    return std::min(actualBaseAlign, expectedTargetAlign);
+
+  auto &baseLayout = getContext().getASTRecordLayout(baseDecl);
+  CharUnits expectedBaseAlign = baseLayout.getNonVirtualAlignment();
+
+  // If the class is properly aligned, assume the target offset is, too.
+  //
+  // This actually isn't necessarily the right thing to do --- if the
+  // class is a complete object, but it's only properly aligned for a
+  // base subobject, then the alignments of things relative to it are
+  // probably off as well.  (Note that this requires the alignment of
+  // the target to be greater than the NV alignment of the derived
+  // class.)
+  //
+  // However, our approach to this kind of under-alignment can only
+  // ever be best effort; after all, we're never going to propagate
+  // alignments through variables or parameters.  Note, in particular,
+  // that constructing a polymorphic type in an address that's less
+  // than pointer-aligned will generally trap in the constructor,
+  // unless we someday add some sort of attribute to change the
+  // assumed alignment of 'this'.  So our goal here is pretty much
+  // just to allow the user to explicitly say that a pointer is
+  // under-aligned and then safely access its fields and v-tables.
+  if (actualBaseAlign >= expectedBaseAlign) {
+    return expectedTargetAlign;
+  }
+
+  // Otherwise, we might be offset by an arbitrary multiple of the
+  // actual alignment.  The correct adjustment is to take the min of
+  // the two alignments.
+  return std::min(actualBaseAlign, expectedTargetAlign);
+}
+
+Address CodeGenFunction::LoadCXXThisAddress() {
+  assert(CurFuncDecl && "loading 'this' without a func declaration?");
+  assert(isa<CXXMethodDecl>(CurFuncDecl));
+
+  // Lazily compute CXXThisAlignment.
+  if (CXXThisAlignment.isZero()) {
+    // Just use the best known alignment for the parent.
+    // TODO: if we're currently emitting a complete-object ctor/dtor,
+    // we can always use the complete-object alignment.
+    auto RD = cast<CXXMethodDecl>(CurFuncDecl)->getParent();
+    CXXThisAlignment = CGM.getClassPointerAlignment(RD);
+  }
+
+  return Address(LoadCXXThis(), CXXThisAlignment);
+}
+
+/// Emit the address of a field using a member data pointer.
+///
+/// \param E Only used for emergency diagnostics
+Address
+CodeGenFunction::EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
+                                                 llvm::Value *memberPtr,
+                                      const MemberPointerType *memberPtrType,
+                                                 AlignmentSource *alignSource) {
+  // Ask the ABI to compute the actual address.
+  llvm::Value *ptr =
+    CGM.getCXXABI().EmitMemberDataPointerAddress(*this, E, base,
+                                                 memberPtr, memberPtrType);
+
+  QualType memberType = memberPtrType->getPointeeType();
+  CharUnits memberAlign = getNaturalTypeAlignment(memberType, alignSource);
+  memberAlign =
+    CGM.getDynamicOffsetAlignment(base.getAlignment(),
+                            memberPtrType->getClass()->getAsCXXRecordDecl(),
+                                  memberAlign);
+  return Address(ptr, memberAlign);
+}
+
 CharUnits CodeGenModule::computeNonVirtualBaseClassOffset(
     const CXXRecordDecl *DerivedClass, CastExpr::path_const_iterator Start,
     CastExpr::path_const_iterator End) {
@@ -78,15 +192,13 @@ CodeGenModule::GetNonVirtualBaseClassOffset(const CXXRecordDecl *ClassDecl,
 /// when the type is known to be complete (e.g. in complete destructors).
 ///
 /// The object pointed to by 'This' is assumed to be non-null.
-llvm::Value *
-CodeGenFunction::GetAddressOfDirectBaseInCompleteClass(llvm::Value *This,
+Address
+CodeGenFunction::GetAddressOfDirectBaseInCompleteClass(Address This,
                                                    const CXXRecordDecl *Derived,
                                                    const CXXRecordDecl *Base,
                                                    bool BaseIsVirtual) {
   // 'this' must be a pointer (in some address space) to Derived.
-  assert(This->getType()->isPointerTy() &&
-         cast<llvm::PointerType>(This->getType())->getElementType()
-           == ConvertType(Derived));
+  assert(This.getElementType() == ConvertType(Derived));
 
   // Compute the offset of the virtual base.
   CharUnits Offset;
@@ -98,20 +210,22 @@ CodeGenFunction::GetAddressOfDirectBaseInCompleteClass(llvm::Value *This,
 
   // Shift and cast down to the base type.
   // TODO: for complete types, this should be possible with a GEP.
-  llvm::Value *V = This;
-  if (Offset.isPositive()) {
-    V = Builder.CreateBitCast(V, Int8PtrTy);
-    V = Builder.CreateConstInBoundsGEP1_64(V, Offset.getQuantity());
+  Address V = This;
+  if (!Offset.isZero()) {
+    V = Builder.CreateElementBitCast(V, Int8Ty);
+    V = Builder.CreateConstInBoundsByteGEP(V, Offset);
   }
-  V = Builder.CreateBitCast(V, ConvertType(Base)->getPointerTo());
+  V = Builder.CreateElementBitCast(V, ConvertType(Base));
 
   return V;
 }
 
-static llvm::Value *
-ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, llvm::Value *ptr,
+static Address
+ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, Address addr,
                                 CharUnits nonVirtualOffset,
-                                llvm::Value *virtualOffset) {
+                                llvm::Value *virtualOffset,
+                                const CXXRecordDecl *derivedClass,
+                                const CXXRecordDecl *nearestVBase) {
   // Assert that we have something to do.
   assert(!nonVirtualOffset.isZero() || virtualOffset != nullptr);
 
@@ -128,13 +242,27 @@ ApplyNonVirtualAndVirtualOffset(CodeGenFunction &CGF, llvm::Value *ptr,
   }
 
   // Apply the base offset.
+  llvm::Value *ptr = addr.getPointer();
   ptr = CGF.Builder.CreateBitCast(ptr, CGF.Int8PtrTy);
   ptr = CGF.Builder.CreateInBoundsGEP(ptr, baseOffset, "add.ptr");
-  return ptr;
+
+  // If we have a virtual component, the alignment of the result will
+  // be relative only to the known alignment of that vbase.
+  CharUnits alignment;
+  if (virtualOffset) {
+    assert(nearestVBase && "virtual offset without vbase?");
+    alignment = CGF.CGM.getVBaseAlignment(addr.getAlignment(),
+                                          derivedClass, nearestVBase);
+  } else {
+    alignment = addr.getAlignment();
+  }
+  alignment = alignment.alignmentAtOffset(nonVirtualOffset);
+
+  return Address(ptr, alignment);
 }
 
-llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
-    llvm::Value *Value, const CXXRecordDecl *Derived,
+Address CodeGenFunction::GetAddressOfBaseClass(
+    Address Value, const CXXRecordDecl *Derived,
     CastExpr::path_const_iterator PathBegin,
     CastExpr::path_const_iterator PathEnd, bool NullCheckValue,
     SourceLocation Loc) {
@@ -174,14 +302,14 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
     ConvertType((PathEnd[-1])->getType())->getPointerTo();
 
   QualType DerivedTy = getContext().getRecordType(Derived);
-  CharUnits DerivedAlign = getContext().getTypeAlignInChars(DerivedTy);
+  CharUnits DerivedAlign = CGM.getClassPointerAlignment(Derived);
 
   // If the static offset is zero and we don't have a virtual step,
   // just do a bitcast; null checks are unnecessary.
   if (NonVirtualOffset.isZero() && !VBase) {
     if (sanitizePerformTypeCheck()) {
-      EmitTypeCheck(TCK_Upcast, Loc, Value, DerivedTy, DerivedAlign,
-                    !NullCheckValue);
+      EmitTypeCheck(TCK_Upcast, Loc, Value.getPointer(),
+                    DerivedTy, DerivedAlign, !NullCheckValue);
     }
     return Builder.CreateBitCast(Value, BasePtrTy);
   }
@@ -196,14 +324,14 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
     llvm::BasicBlock *notNullBB = createBasicBlock("cast.notnull");
     endBB = createBasicBlock("cast.end");
 
-    llvm::Value *isNull = Builder.CreateIsNull(Value);
+    llvm::Value *isNull = Builder.CreateIsNull(Value.getPointer());
     Builder.CreateCondBr(isNull, endBB, notNullBB);
     EmitBlock(notNullBB);
   }
 
   if (sanitizePerformTypeCheck()) {
-    EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc, Value,
-                  DerivedTy, DerivedAlign, true);
+    EmitTypeCheck(VBase ? TCK_UpcastToVirtualBase : TCK_Upcast, Loc,
+                  Value.getPointer(), DerivedTy, DerivedAlign, true);
   }
 
   // Compute the virtual offset.
@@ -214,9 +342,8 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
   }
 
   // Apply both offsets.
-  Value = ApplyNonVirtualAndVirtualOffset(*this, Value,
-                                          NonVirtualOffset,
-                                          VirtualOffset);
+  Value = ApplyNonVirtualAndVirtualOffset(*this, Value, NonVirtualOffset,
+                                          VirtualOffset, Derived, VBase);
 
   // Cast to the destination type.
   Value = Builder.CreateBitCast(Value, BasePtrTy);
@@ -228,16 +355,16 @@ llvm::Value *CodeGenFunction::GetAddressOfBaseClass(
     EmitBlock(endBB);
 
     llvm::PHINode *PHI = Builder.CreatePHI(BasePtrTy, 2, "cast.result");
-    PHI->addIncoming(Value, notNullBB);
+    PHI->addIncoming(Value.getPointer(), notNullBB);
     PHI->addIncoming(llvm::Constant::getNullValue(BasePtrTy), origBB);
-    Value = PHI;
+    Value = Address(PHI, Value.getAlignment());
   }
 
   return Value;
 }
 
-llvm::Value *
-CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
+Address
+CodeGenFunction::GetAddressOfDerivedClass(Address BaseAddr,
                                           const CXXRecordDecl *Derived,
                                         CastExpr::path_const_iterator PathBegin,
                                           CastExpr::path_const_iterator PathEnd,
@@ -253,7 +380,7 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
 
   if (!NonVirtualOffset) {
     // No offset, we can just cast back.
-    return Builder.CreateBitCast(Value, DerivedPtrTy);
+    return Builder.CreateBitCast(BaseAddr, DerivedPtrTy);
   }
 
   llvm::BasicBlock *CastNull = nullptr;
@@ -265,19 +392,20 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
     CastNotNull = createBasicBlock("cast.notnull");
     CastEnd = createBasicBlock("cast.end");
 
-    llvm::Value *IsNull = Builder.CreateIsNull(Value);
+    llvm::Value *IsNull = Builder.CreateIsNull(BaseAddr.getPointer());
     Builder.CreateCondBr(IsNull, CastNull, CastNotNull);
     EmitBlock(CastNotNull);
   }
 
   // Apply the offset.
-  Value = Builder.CreateBitCast(Value, Int8PtrTy);
+  llvm::Value *Value = Builder.CreateBitCast(BaseAddr.getPointer(), Int8PtrTy);
   Value = Builder.CreateGEP(Value, Builder.CreateNeg(NonVirtualOffset),
                             "sub.ptr");
 
   // Just cast.
   Value = Builder.CreateBitCast(Value, DerivedPtrTy);
 
+  // Produce a PHI if we had a null-check.
   if (NullCheckValue) {
     Builder.CreateBr(CastEnd);
     EmitBlock(CastNull);
@@ -286,12 +414,11 @@ CodeGenFunction::GetAddressOfDerivedClass(llvm::Value *Value,
 
     llvm::PHINode *PHI = Builder.CreatePHI(Value->getType(), 2);
     PHI->addIncoming(Value, CastNotNull);
-    PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()),
-                     CastNull);
+    PHI->addIncoming(llvm::Constant::getNullValue(Value->getType()), CastNull);
     Value = PHI;
   }
 
-  return Value;
+  return Address(Value, CGM.getClassPointerAlignment(Derived));
 }
 
 llvm::Value *CodeGenFunction::GetVTTParameter(GlobalDecl GD,
@@ -345,7 +472,7 @@ llvm::Value *CodeGenFunction::GetVTTParameter(GlobalDecl GD,
 
 namespace {
   /// Call the destructor for a direct base class.
-  struct CallBaseDtor : EHScopeStack::Cleanup {
+  struct CallBaseDtor final : EHScopeStack::Cleanup {
     const CXXRecordDecl *BaseClass;
     bool BaseIsVirtual;
     CallBaseDtor(const CXXRecordDecl *Base, bool BaseIsVirtual)
@@ -356,8 +483,8 @@ namespace {
         cast<CXXMethodDecl>(CGF.CurCodeDecl)->getParent();
 
       const CXXDestructorDecl *D = BaseClass->getDestructor();
-      llvm::Value *Addr =
-        CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThis(),
+      Address Addr =
+        CGF.GetAddressOfDirectBaseInCompleteClass(CGF.LoadCXXThisAddress(),
                                                   DerivedClass, BaseClass,
                                                   BaseIsVirtual);
       CGF.EmitCXXDestructorCall(D, Dtor_Base, BaseIsVirtual,
@@ -381,7 +508,7 @@ namespace {
     // external code might potentially access the vtable.
     void VisitCXXThisExpr(const CXXThisExpr *E) { UsesThis = true; }
   };
-}
+} // end anonymous namespace
 
 static bool BaseInitializerUsesThis(ASTContext &C, const Expr *Init) {
   DynamicThisUseChecker Checker(C);
@@ -396,7 +523,7 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
   assert(BaseInit->isBaseInitializer() &&
          "Must have base initializer!");
 
-  llvm::Value *ThisPtr = CGF.LoadCXXThis();
+  Address ThisPtr = CGF.LoadCXXThisAddress();
 
   const Type *BaseType = BaseInit->getBaseClass();
   CXXRecordDecl *BaseClassDecl =
@@ -416,13 +543,12 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
 
   // We can pretend to be a complete class because it only matters for
   // virtual bases, and we only do virtual bases for complete ctors.
-  llvm::Value *V =
+  Address V =
     CGF.GetAddressOfDirectBaseInCompleteClass(ThisPtr, ClassDecl,
                                               BaseClassDecl,
                                               isBaseVirtual);
-  CharUnits Alignment = CGF.getContext().getTypeAlignInChars(BaseType);
   AggValueSlot AggSlot =
-    AggValueSlot::forAddr(V, Alignment, Qualifiers(),
+    AggValueSlot::forAddr(V, Qualifiers(),
                           AggValueSlot::IsDestructed,
                           AggValueSlot::DoesNotNeedGCBarriers,
                           AggValueSlot::IsNotAliased);
@@ -438,17 +564,17 @@ static void EmitBaseInitializer(CodeGenFunction &CGF,
 static void EmitAggMemberInitializer(CodeGenFunction &CGF,
                                      LValue LHS,
                                      Expr *Init,
-                                     llvm::Value *ArrayIndexVar,
+                                     Address ArrayIndexVar,
                                      QualType T,
                                      ArrayRef<VarDecl *> ArrayIndexes,
                                      unsigned Index) {
   if (Index == ArrayIndexes.size()) {
     LValue LV = LHS;
 
-    if (ArrayIndexVar) {
+    if (ArrayIndexVar.isValid()) {
       // If we have an array index variable, load it and use it as an offset.
       // Then, increment the value.
-      llvm::Value *Dest = LHS.getAddress();
+      llvm::Value *Dest = LHS.getPointer();
       llvm::Value *ArrayIndex = CGF.Builder.CreateLoad(ArrayIndexVar);
       Dest = CGF.Builder.CreateInBoundsGEP(Dest, ArrayIndex, "destaddress");
       llvm::Value *Next = llvm::ConstantInt::get(ArrayIndex->getType(), 1);
@@ -456,9 +582,9 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
       CGF.Builder.CreateStore(Next, ArrayIndexVar);
 
       // Update the LValue.
-      LV.setAddress(Dest);
-      CharUnits Align = CGF.getContext().getTypeAlignInChars(T);
-      LV.setAlignment(std::min(Align, LV.getAlignment()));
+      CharUnits EltSize = CGF.getContext().getTypeSizeInChars(T);
+      CharUnits Align = LV.getAlignment().alignmentOfArrayElement(EltSize);
+      LV.setAddress(Address(Dest, Align));
     }
 
     switch (CGF.getEvaluationKind(T)) {
@@ -485,14 +611,11 @@ static void EmitAggMemberInitializer(CodeGenFunction &CGF,
 
   const ConstantArrayType *Array = CGF.getContext().getAsConstantArrayType(T);
   assert(Array && "Array initialization without the array type?");
-  llvm::Value *IndexVar
-    = CGF.GetAddrOfLocalVar(ArrayIndexes[Index]);
-  assert(IndexVar && "Array index variable not loaded");
+  Address IndexVar = CGF.GetAddrOfLocalVar(ArrayIndexes[Index]);
 
   // Initialize this index variable to zero.
   llvm::Value* Zero
-    = llvm::Constant::getNullValue(
-                              CGF.ConvertType(CGF.getContext().getSizeType()));
+    = llvm::Constant::getNullValue(IndexVar.getElementType());
   CGF.Builder.CreateStore(Zero, IndexVar);
 
   // Start the loop with a block that tests the condition.
@@ -626,9 +749,8 @@ static void EmitMemberInitializer(CodeGenFunction &CGF,
   CGF.EmitInitializerForField(Field, LHS, MemberInit->getInit(), ArrayIndexes);
 }
 
-void CodeGenFunction::EmitInitializerForField(
-    FieldDecl *Field, LValue LHS, Expr *Init,
-    ArrayRef<VarDecl *> ArrayIndexes) {
+void CodeGenFunction::EmitInitializerForField(FieldDecl *Field, LValue LHS,
+                                Expr *Init, ArrayRef<VarDecl *> ArrayIndexes) {
   QualType FieldType = Field->getType();
   switch (getEvaluationKind(FieldType)) {
   case TEK_Scalar:
@@ -643,26 +765,23 @@ void CodeGenFunction::EmitInitializerForField(
     EmitComplexExprIntoLValue(Init, LHS, /*isInit*/ true);
     break;
   case TEK_Aggregate: {
-    llvm::Value *ArrayIndexVar = nullptr;
+    Address ArrayIndexVar = Address::invalid();
     if (ArrayIndexes.size()) {
-      llvm::Type *SizeTy = ConvertType(getContext().getSizeType());
-
       // The LHS is a pointer to the first object we'll be constructing, as
       // a flat array.
       QualType BaseElementTy = getContext().getBaseElementType(FieldType);
       llvm::Type *BasePtr = ConvertType(BaseElementTy);
       BasePtr = llvm::PointerType::getUnqual(BasePtr);
-      llvm::Value *BaseAddrPtr = Builder.CreateBitCast(LHS.getAddress(),
-                                                       BasePtr);
+      Address BaseAddrPtr = Builder.CreateBitCast(LHS.getAddress(), BasePtr);
       LHS = MakeAddrLValue(BaseAddrPtr, BaseElementTy);
 
       // Create an array index that will be used to walk over all of the
       // objects we're constructing.
-      ArrayIndexVar = CreateTempAlloca(SizeTy, "object.index");
-      llvm::Value *Zero = llvm::Constant::getNullValue(SizeTy);
+      ArrayIndexVar = CreateMemTemp(getContext().getSizeType(), "object.index");
+      llvm::Value *Zero =
+        llvm::Constant::getNullValue(ArrayIndexVar.getElementType());
       Builder.CreateStore(Zero, ArrayIndexVar);
 
-
       // Emit the block variables for the array indices, if any.
       for (unsigned I = 0, N = ArrayIndexes.size(); I != N; ++I)
         EmitAutoVarDecl(*ArrayIndexes[I]);
@@ -811,7 +930,7 @@ void CodeGenFunction::EmitConstructorBody(FunctionArgList &Args) {
     return;
   }
 
-  const FunctionDecl *Definition = 0;
+  const FunctionDecl *Definition = nullptr;
   Stmt *Body = Ctor->getBody(Definition);
   assert(Definition == Ctor && "emitting wrong constructor body");
 
@@ -868,7 +987,7 @@ namespace {
     SanitizerSet OldSanOpts;
   };
 }
-
+ 
 namespace {
   class FieldMemcpyizer {
   public:
@@ -930,19 +1049,16 @@ namespace {
 
       CharUnits MemcpySize = getMemcpySize(FirstByteOffset);
       QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
-      llvm::Value *ThisPtr = CGF.LoadCXXThis();
-      LValue DestLV = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+      Address ThisPtr = CGF.LoadCXXThisAddress();
+      LValue DestLV = CGF.MakeAddrLValue(ThisPtr, RecordTy);
       LValue Dest = CGF.EmitLValueForFieldInitialization(DestLV, FirstField);
       llvm::Value *SrcPtr = CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(SrcRec));
       LValue SrcLV = CGF.MakeNaturalAlignAddrLValue(SrcPtr, RecordTy);
       LValue Src = CGF.EmitLValueForFieldInitialization(SrcLV, FirstField);
 
-      CharUnits Offset = CGF.getContext().toCharUnitsFromBits(FirstByteOffset);
-      CharUnits Alignment = DestLV.getAlignment().alignmentAtOffset(Offset);
-
-      emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddr() : Dest.getAddress(),
-                   Src.isBitField() ? Src.getBitFieldAddr() : Src.getAddress(),
-                   MemcpySize, Alignment);
+      emitMemcpyIR(Dest.isBitField() ? Dest.getBitFieldAddress() : Dest.getAddress(),
+                   Src.isBitField() ? Src.getBitFieldAddress() : Src.getAddress(),
+                   MemcpySize);
       reset();
     }
 
@@ -956,20 +1072,18 @@ namespace {
 
   private:
 
-    void emitMemcpyIR(llvm::Value *DestPtr, llvm::Value *SrcPtr,
-                      CharUnits Size, CharUnits Alignment) {
-      llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
+    void emitMemcpyIR(Address DestPtr, Address SrcPtr, CharUnits Size) {
+      llvm::PointerType *DPT = DestPtr.getType();
       llvm::Type *DBP =
         llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), DPT->getAddressSpace());
       DestPtr = CGF.Builder.CreateBitCast(DestPtr, DBP);
 
-      llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
+      llvm::PointerType *SPT = SrcPtr.getType();
       llvm::Type *SBP =
         llvm::Type::getInt8PtrTy(CGF.getLLVMContext(), SPT->getAddressSpace());
       SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, SBP);
 
-      CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity(),
-                               Alignment.getQuantity());
+      CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, Size.getQuantity());
     }
 
     void addInitialField(FieldDecl *F) {
@@ -1089,9 +1203,9 @@ namespace {
     }
 
     void pushEHDestructors() {
-      llvm::Value *ThisPtr = CGF.LoadCXXThis();
+      Address ThisPtr = CGF.LoadCXXThisAddress();
       QualType RecordTy = CGF.getContext().getTypeDeclType(ClassDecl);
-      LValue LHS = CGF.MakeNaturalAlignAddrLValue(ThisPtr, RecordTy);
+      LValue LHS = CGF.MakeAddrLValue(ThisPtr, RecordTy);
 
       for (unsigned i = 0; i < AggregatedInits.size(); ++i) {
         CXXCtorInitializer *MemberInit = AggregatedInits[i];
@@ -1228,7 +1342,13 @@ namespace {
       emitAggregatedStmts();
     }
   };
+} // end anonymous namespace
 
+static bool isInitializerOfDynamicClass(const CXXCtorInitializer *BaseInit) {
+  const Type *BaseType = BaseInit->getBaseClass();
+  const auto *BaseClassDecl =
+          cast<CXXRecordDecl>(BaseType->getAs<RecordType>()->getDecl());
+  return BaseClassDecl->isDynamicClass();
 }
 
 /// EmitCtorPrologue - This routine generates necessary code to initialize
@@ -1254,8 +1374,13 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
     assert(BaseCtorContinueBB);
   }
 
+  llvm::Value *const OldThis = CXXThisValue;
   // Virtual base initializers first.
   for (; B != E && (*B)->isBaseInitializer() && (*B)->isBaseVirtual(); B++) {
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+        isInitializerOfDynamicClass(*B))
+      CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
     EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
   }
 
@@ -1268,13 +1393,20 @@ void CodeGenFunction::EmitCtorPrologue(const CXXConstructorDecl *CD,
   // Then, non-virtual base initializers.
   for (; B != E && (*B)->isBaseInitializer(); B++) {
     assert(!(*B)->isBaseVirtual());
+
+    if (CGM.getCodeGenOpts().StrictVTablePointers &&
+        CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+        isInitializerOfDynamicClass(*B))
+      CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
     EmitBaseInitializer(*this, ClassDecl, *B, CtorType);
   }
 
+  CXXThisValue = OldThis;
+
   InitializeVTablePointers(ClassDecl);
 
   // And finally, initialize class members.
-  FieldConstructionScope FCS(*this, CXXThisValue);
+  FieldConstructionScope FCS(*this, LoadCXXThisAddress());
   ConstructorMemcpyizer CM(*this, CD, Args);
   for (; B != E; B++) {
     CXXCtorInitializer *Member = (*B);
@@ -1334,7 +1466,7 @@ HasTrivialDestructorBody(ASTContext &Context,
 
 static bool
 FieldHasTrivialDestructorBody(ASTContext &Context,
-                              const FieldDecl *Field)
+                                          const FieldDecl *Field)
 {
   QualType FieldBaseElementType = Context.getBaseElementType(Field->getType());
 
@@ -1353,39 +1485,23 @@ FieldHasTrivialDestructorBody(ASTContext &Context,
 
 /// CanSkipVTablePointerInitialization - Check whether we need to initialize
 /// any vtable pointers before calling this destructor.
-static bool CanSkipVTablePointerInitialization(ASTContext &Context,
+static bool CanSkipVTablePointerInitialization(CodeGenFunction &CGF,
                                                const CXXDestructorDecl *Dtor) {
+  const CXXRecordDecl *ClassDecl = Dtor->getParent();
+  if (!ClassDecl->isDynamicClass())
+    return true;
+
   if (!Dtor->hasTrivialBody())
     return false;
 
   // Check the fields.
-  const CXXRecordDecl *ClassDecl = Dtor->getParent();
   for (const auto *Field : ClassDecl->fields())
-    if (!FieldHasTrivialDestructorBody(Context, Field))
+    if (!FieldHasTrivialDestructorBody(CGF.getContext(), Field))
       return false;
 
   return true;
 }
 
-// Generates function call for handling object poisoning, passing in
-// references to 'this' and its size as arguments.
-static void EmitDtorSanitizerCallback(CodeGenFunction &CGF,
-                                      const CXXDestructorDecl *Dtor) {
-  const ASTRecordLayout &Layout =
-      CGF.getContext().getASTRecordLayout(Dtor->getParent());
-
-  llvm::Value *Args[] = {
-      CGF.Builder.CreateBitCast(CGF.LoadCXXThis(), CGF.VoidPtrTy),
-      llvm::ConstantInt::get(CGF.SizeTy, Layout.getSize().getQuantity())};
-  llvm::Type *ArgTypes[] = {CGF.VoidPtrTy, CGF.SizeTy};
-
-  llvm::FunctionType *FnType =
-      llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false);
-  llvm::Value *Fn =
-      CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback");
-  CGF.EmitNounwindRuntimeCall(Fn, Args);
-}
-
 /// EmitDestructorBody - Emits the body of the current destructor.
 void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   const CXXDestructorDecl *Dtor = cast<CXXDestructorDecl>(CurGD.getDecl());
@@ -1402,7 +1518,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   if (DtorType == Dtor_Deleting) {
     EnterDtorCleanups(Dtor, Dtor_Deleting);
     EmitCXXDestructorCall(Dtor, Dtor_Complete, /*ForVirtualBase=*/false,
-                          /*Delegating=*/false, LoadCXXThis());
+                          /*Delegating=*/false, LoadCXXThisAddress());
     PopCleanupBlock();
     return;
   }
@@ -1437,7 +1553,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
 
     if (!isTryBody) {
       EmitCXXDestructorCall(Dtor, Dtor_Base, /*ForVirtualBase=*/false,
-                            /*Delegating=*/false, LoadCXXThis());
+                            /*Delegating=*/false, LoadCXXThisAddress());
       break;
     }
     // Fallthrough: act like we're in the base variant.
@@ -1449,8 +1565,14 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     EnterDtorCleanups(Dtor, Dtor_Base);
 
     // Initialize the vtable pointers before entering the body.
-    if (!CanSkipVTablePointerInitialization(getContext(), Dtor))
-        InitializeVTablePointers(Dtor->getParent());
+    if (!CanSkipVTablePointerInitialization(*this, Dtor)) {
+      // Insert the llvm.invariant.group.barrier intrinsic before initializing
+      // the vptrs to cancel any previous assumptions we might have made.
+      if (CGM.getCodeGenOpts().StrictVTablePointers &&
+          CGM.getCodeGenOpts().OptimizationLevel > 0)
+        CXXThisValue = Builder.CreateInvariantGroupBarrier(LoadCXXThis());
+      InitializeVTablePointers(Dtor->getParent());
+    }
 
     if (isTryBody)
       EmitStmt(cast<CXXTryStmt>(Body)->getTryBlock());
@@ -1464,6 +1586,7 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
     // the caller's body.
     if (getLangOpts().AppleKext)
       CurFn->addFnAttr(llvm::Attribute::AlwaysInline);
+
     break;
   }
 
@@ -1473,10 +1596,6 @@ void CodeGenFunction::EmitDestructorBody(FunctionArgList &Args) {
   // Exit the try if applicable.
   if (isTryBody)
     ExitCXXTryStmt(*cast<CXXTryStmt>(Body), true);
-
-  // Insert memory-poisoning instrumentation.
-  if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor)
-    EmitDtorSanitizerCallback(*this, Dtor);
 }
 
 void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args) {
@@ -1496,7 +1615,7 @@ void CodeGenFunction::emitImplicitAssignmentOperatorBody(FunctionArgList &Args)
 
 namespace {
   /// Call the operator delete associated with the current destructor.
-  struct CallDtorDelete : EHScopeStack::Cleanup {
+  struct CallDtorDelete final : EHScopeStack::Cleanup {
     CallDtorDelete() {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
@@ -1507,11 +1626,11 @@ namespace {
     }
   };
 
-  struct CallDtorDeleteConditional : EHScopeStack::Cleanup {
+  struct CallDtorDeleteConditional final : EHScopeStack::Cleanup {
     llvm::Value *ShouldDeleteCondition;
   public:
     CallDtorDeleteConditional(llvm::Value *ShouldDeleteCondition)
-      : ShouldDeleteCondition(ShouldDeleteCondition) {
+        : ShouldDeleteCondition(ShouldDeleteCondition) {
       assert(ShouldDeleteCondition != nullptr);
     }
 
@@ -1533,7 +1652,7 @@ namespace {
     }
   };
 
-  class DestroyField  : public EHScopeStack::Cleanup {
+  class DestroyField  final : public EHScopeStack::Cleanup {
     const FieldDecl *field;
     CodeGenFunction::Destroyer *destroyer;
     bool useEHCleanupForArray;
@@ -1541,12 +1660,12 @@ namespace {
   public:
     DestroyField(const FieldDecl *field, CodeGenFunction::Destroyer *destroyer,
                  bool useEHCleanupForArray)
-      : field(field), destroyer(destroyer),
-        useEHCleanupForArray(useEHCleanupForArray) {}
+        : field(field), destroyer(destroyer),
+          useEHCleanupForArray(useEHCleanupForArray) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Find the address of the field.
-      llvm::Value *thisValue = CGF.LoadCXXThis();
+      Address thisValue = CGF.LoadCXXThisAddress();
       QualType RecordTy = CGF.getContext().getTagDeclType(field->getParent());
       LValue ThisLV = CGF.MakeAddrLValue(thisValue, RecordTy);
       LValue LV = CGF.EmitLValueForField(ThisLV, field);
@@ -1556,7 +1675,133 @@ namespace {
                       flags.isForNormalCleanup() && useEHCleanupForArray);
     }
   };
-}
+
+ static void EmitSanitizerDtorCallback(CodeGenFunction &CGF, llvm::Value *Ptr,
+             CharUnits::QuantityType PoisonSize) {
+   // Pass in void pointer and size of region as arguments to runtime
+   // function
+   llvm::Value *Args[] = {CGF.Builder.CreateBitCast(Ptr, CGF.VoidPtrTy),
+                          llvm::ConstantInt::get(CGF.SizeTy, PoisonSize)};
+
+   llvm::Type *ArgTypes[] = {CGF.VoidPtrTy, CGF.SizeTy};
+
+   llvm::FunctionType *FnType =
+       llvm::FunctionType::get(CGF.VoidTy, ArgTypes, false);
+   llvm::Value *Fn =
+       CGF.CGM.CreateRuntimeFunction(FnType, "__sanitizer_dtor_callback");
+   CGF.EmitNounwindRuntimeCall(Fn, Args);
+ }
+
+  class SanitizeDtorMembers final : public EHScopeStack::Cleanup {
+    const CXXDestructorDecl *Dtor;
+
+  public:
+    SanitizeDtorMembers(const CXXDestructorDecl *Dtor) : Dtor(Dtor) {}
+
+    // Generate function call for handling object poisoning.
+    // Disables tail call elimination, to prevent the current stack frame
+    // from disappearing from the stack trace.
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      const ASTRecordLayout &Layout =
+          CGF.getContext().getASTRecordLayout(Dtor->getParent());
+
+      // Nothing to poison.
+      if (Layout.getFieldCount() == 0)
+        return;
+
+      // Prevent the current stack frame from disappearing from the stack trace.
+      CGF.CurFn->addFnAttr("disable-tail-calls", "true");
+
+      // Construct pointer to region to begin poisoning, and calculate poison
+      // size, so that only members declared in this class are poisoned.
+      ASTContext &Context = CGF.getContext();
+      unsigned fieldIndex = 0;
+      int startIndex = -1;
+      // RecordDecl::field_iterator Field;
+      for (const FieldDecl *Field : Dtor->getParent()->fields()) {
+        // Poison field if it is trivial
+        if (FieldHasTrivialDestructorBody(Context, Field)) {
+          // Start sanitizing at this field
+          if (startIndex < 0)
+            startIndex = fieldIndex;
+
+          // Currently on the last field, and it must be poisoned with the
+          // current block.
+          if (fieldIndex == Layout.getFieldCount() - 1) {
+            PoisonMembers(CGF, startIndex, Layout.getFieldCount());
+          }
+        } else if (startIndex >= 0) {
+          // No longer within a block of memory to poison, so poison the block
+          PoisonMembers(CGF, startIndex, fieldIndex);
+          // Re-set the start index
+          startIndex = -1;
+        }
+        fieldIndex += 1;
+      }
+    }
+
+  private:
+    /// \param layoutStartOffset index of the ASTRecordLayout field to
+    ///     start poisoning (inclusive)
+    /// \param layoutEndOffset index of the ASTRecordLayout field to
+    ///     end poisoning (exclusive)
+    void PoisonMembers(CodeGenFunction &CGF, unsigned layoutStartOffset,
+                     unsigned layoutEndOffset) {
+      ASTContext &Context = CGF.getContext();
+      const ASTRecordLayout &Layout =
+          Context.getASTRecordLayout(Dtor->getParent());
+
+      llvm::ConstantInt *OffsetSizePtr = llvm::ConstantInt::get(
+          CGF.SizeTy,
+          Context.toCharUnitsFromBits(Layout.getFieldOffset(layoutStartOffset))
+              .getQuantity());
+
+      llvm::Value *OffsetPtr = CGF.Builder.CreateGEP(
+          CGF.Builder.CreateBitCast(CGF.LoadCXXThis(), CGF.Int8PtrTy),
+          OffsetSizePtr);
+
+      CharUnits::QuantityType PoisonSize;
+      if (layoutEndOffset >= Layout.getFieldCount()) {
+        PoisonSize = Layout.getNonVirtualSize().getQuantity() -
+                     Context.toCharUnitsFromBits(
+                                Layout.getFieldOffset(layoutStartOffset))
+                         .getQuantity();
+      } else {
+        PoisonSize = Context.toCharUnitsFromBits(
+                                Layout.getFieldOffset(layoutEndOffset) -
+                                Layout.getFieldOffset(layoutStartOffset))
+                         .getQuantity();
+      }
+
+      if (PoisonSize == 0)
+        return;
+
+      EmitSanitizerDtorCallback(CGF, OffsetPtr, PoisonSize);
+    }
+  };
+
+ class SanitizeDtorVTable final : public EHScopeStack::Cleanup {
+    const CXXDestructorDecl *Dtor;
+
+  public:
+    SanitizeDtorVTable(const CXXDestructorDecl *Dtor) : Dtor(Dtor) {}
+
+    // Generate function call for handling vtable pointer poisoning.
+    void Emit(CodeGenFunction &CGF, Flags flags) override {
+      assert(Dtor->getParent()->isDynamicClass());
+      (void)Dtor;
+      ASTContext &Context = CGF.getContext();
+      // Poison vtable and vtable ptr if they exist for this class.
+      llvm::Value *VTablePtr = CGF.LoadCXXThis();
+
+      CharUnits::QuantityType PoisonSize =
+          Context.toCharUnitsFromBits(CGF.PointerWidthInBits).getQuantity();
+      // Pass in void pointer and size of region as arguments to runtime
+      // function
+      EmitSanitizerDtorCallback(CGF, VTablePtr, PoisonSize);
+    }
+ };
+} // end anonymous namespace
 
 /// \brief Emit all code that comes at the end of class's
 /// destructor. This is to call destructors on members and base classes
@@ -1590,6 +1835,12 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 
   // The complete-destructor phase just destructs all the virtual bases.
   if (DtorType == Dtor_Complete) {
+    // Poison the vtable pointer such that access after the base
+    // and member destructors are invoked is invalid.
+    if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+        SanOpts.has(SanitizerKind::Memory) && ClassDecl->getNumVBases() &&
+        ClassDecl->isPolymorphic())
+      EHStack.pushCleanup<SanitizeDtorVTable>(NormalAndEHCleanup, DD);
 
     // We push them in the forward order so that they'll be popped in
     // the reverse order.
@@ -1610,6 +1861,12 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
   }
 
   assert(DtorType == Dtor_Base);
+  // Poison the vtable pointer if it has no virtual bases, but inherits
+  // virtual functions.
+  if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+      SanOpts.has(SanitizerKind::Memory) && !ClassDecl->getNumVBases() &&
+      ClassDecl->isPolymorphic())
+    EHStack.pushCleanup<SanitizeDtorVTable>(NormalAndEHCleanup, DD);
 
   // Destroy non-virtual bases.
   for (const auto &Base : ClassDecl->bases()) {
@@ -1628,6 +1885,12 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
                                       /*BaseIsVirtual*/ false);
   }
 
+  // Poison fields such that access after their destructors are
+  // invoked, and before the base class destructor runs, is invalid.
+  if (CGM.getCodeGenOpts().SanitizeMemoryUseAfterDtor &&
+      SanOpts.has(SanitizerKind::Memory))
+    EHStack.pushCleanup<SanitizeDtorMembers>(NormalAndEHCleanup, DD);
+
   // Destroy direct fields.
   for (const auto *Field : ClassDecl->fields()) {
     QualType type = Field->getType();
@@ -1655,7 +1918,7 @@ void CodeGenFunction::EnterDtorCleanups(const CXXDestructorDecl *DD,
 ///   zero-initialized before it is constructed
 void CodeGenFunction::EmitCXXAggrConstructorCall(
     const CXXConstructorDecl *ctor, const ConstantArrayType *arrayType,
-    llvm::Value *arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) {
+    Address arrayBegin, const CXXConstructExpr *E, bool zeroInitialize) {
   QualType elementType;
   llvm::Value *numElements =
     emitArrayLength(arrayType, elementType, arrayBegin);
@@ -1669,15 +1932,14 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(
 /// \param ctor the constructor to call for each element
 /// \param numElements the number of elements in the array;
 ///   may be zero
-/// \param arrayBegin a T*, where T is the type constructed by ctor
+/// \param arrayBase a T*, where T is the type constructed by ctor
 /// \param zeroInitialize true if each element should be
 ///   zero-initialized before it is constructed
 void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
                                                  llvm::Value *numElements,
-                                                 llvm::Value *arrayBegin,
+                                                 Address arrayBase,
                                                  const CXXConstructExpr *E,
                                                  bool zeroInitialize) {
-
   // It's legal for numElements to be zero.  This can happen both
   // dynamically, because x can be zero in 'new A[x]', and statically,
   // because of GCC extensions that permit zero-length arrays.  There
@@ -1701,6 +1963,7 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
   }
 
   // Find the end of the array.
+  llvm::Value *arrayBegin = arrayBase.getPointer();
   llvm::Value *arrayEnd = Builder.CreateInBoundsGEP(arrayBegin, numElements,
                                                     "arrayctor.end");
 
@@ -1714,11 +1977,21 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
 
   // Inside the loop body, emit the constructor call on the array element.
 
+  // The alignment of the base, adjusted by the size of a single element,
+  // provides a conservative estimate of the alignment of every element.
+  // (This assumes we never start tracking offsetted alignments.)
+  // 
+  // Note that these are complete objects and so we don't need to
+  // use the non-virtual size or alignment.
   QualType type = getContext().getTypeDeclType(ctor->getParent());
+  CharUnits eltAlignment =
+    arrayBase.getAlignment()
+             .alignmentOfArrayElement(getContext().getTypeSizeInChars(type));
+  Address curAddr = Address(cur, eltAlignment);
 
   // Zero initialize the storage, if requested.
   if (zeroInitialize)
-    EmitNullInitialization(cur, type);
+    EmitNullInitialization(curAddr, type);
 
   // C++ [class.temporary]p4:
   // There are two contexts in which temporaries are destroyed at a different
@@ -1736,11 +2009,12 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
     if (getLangOpts().Exceptions &&
         !ctor->getParent()->hasTrivialDestructor()) {
       Destroyer *destroyer = destroyCXXObject;
-      pushRegularPartialArrayCleanup(arrayBegin, cur, type, *destroyer);
+      pushRegularPartialArrayCleanup(arrayBegin, cur, type, eltAlignment,
+                                     *destroyer);
     }
 
     EmitCXXConstructorCall(ctor, Ctor_Complete, /*ForVirtualBase=*/false,
-                           /*Delegating=*/false, cur, E);
+                           /*Delegating=*/false, curAddr, E);
   }
 
   // Go to the next element.
@@ -1761,7 +2035,7 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor,
 }
 
 void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF,
-                                       llvm::Value *addr,
+                                       Address addr,
                                        QualType type) {
   const RecordType *rtype = type->castAs<RecordType>();
   const CXXRecordDecl *record = cast<CXXRecordDecl>(rtype->getDecl());
@@ -1774,14 +2048,16 @@ void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF,
 void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
                                              CXXCtorType Type,
                                              bool ForVirtualBase,
-                                             bool Delegating, llvm::Value *This,
+                                             bool Delegating, Address This,
                                              const CXXConstructExpr *E) {
+  const CXXRecordDecl *ClassDecl = D->getParent();
+
   // C++11 [class.mfct.non-static]p2:
   //   If a non-static member function of a class X is called for an object that
   //   is not of type X, or of a type derived from X, the behavior is undefined.
   // FIXME: Provide a source location here.
-  EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(), This,
-                getContext().getRecordType(D->getParent()));
+  EmitTypeCheck(CodeGenFunction::TCK_ConstructorCall, SourceLocation(),
+                This.getPointer(), getContext().getRecordType(ClassDecl));
 
   if (D->isTrivial() && D->isDefaultConstructor()) {
     assert(E->getNumArgs() == 0 && "trivial default ctor with args");
@@ -1796,8 +2072,8 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
 
     const Expr *Arg = E->getArg(0);
     QualType SrcTy = Arg->getType();
-    llvm::Value *Src = EmitLValue(Arg).getAddress();
-    QualType DestTy = getContext().getTypeDeclType(D->getParent());
+    Address Src = EmitLValue(Arg).getAddress();
+    QualType DestTy = getContext().getTypeDeclType(ClassDecl);
     EmitAggregateCopyCtor(This, Src, DestTy, SrcTy);
     return;
   }
@@ -1805,11 +2081,11 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
   CallArgList Args;
 
   // Push the this ptr.
-  Args.add(RValue::get(This), D->getThisType(getContext()));
+  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
 
   // Add the rest of the user-supplied arguments.
   const FunctionProtoType *FPT = D->getType()->castAs<FunctionProtoType>();
-  EmitCallArgs(Args, FPT, E->arg_begin(), E->arg_end(), E->getConstructor());
+  EmitCallArgs(Args, FPT, E->arguments(), E->getConstructor());
 
   // Insert any ABI-specific implicit constructor arguments.
   unsigned ExtraArgs = CGM.getCXXABI().addImplicitConstructorArgs(
@@ -1820,19 +2096,64 @@ void CodeGenFunction::EmitCXXConstructorCall(const CXXConstructorDecl *D,
   const CGFunctionInfo &Info =
       CGM.getTypes().arrangeCXXConstructorCall(Args, D, Type, ExtraArgs);
   EmitCall(Info, Callee, ReturnValueSlot(), Args, D);
+
+  // Generate vtable assumptions if we're constructing a complete object
+  // with a vtable.  We don't do this for base subobjects for two reasons:
+  // first, it's incorrect for classes with virtual bases, and second, we're
+  // about to overwrite the vptrs anyway.
+  // We also have to make sure if we can refer to vtable:
+  // - Otherwise we can refer to vtable if it's safe to speculatively emit.
+  // FIXME: If vtable is used by ctor/dtor, or if vtable is external and we are
+  // sure that definition of vtable is not hidden,
+  // then we are always safe to refer to it.
+  // FIXME: It looks like InstCombine is very inefficient on dealing with
+  // assumes. Make assumption loads require -fstrict-vtable-pointers temporarily.
+  if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      ClassDecl->isDynamicClass() && Type != Ctor_Base &&
+      CGM.getCXXABI().canSpeculativelyEmitVTable(ClassDecl) &&
+      CGM.getCodeGenOpts().StrictVTablePointers)
+    EmitVTableAssumptionLoads(ClassDecl, This);
+}
+
+void CodeGenFunction::EmitVTableAssumptionLoad(const VPtr &Vptr, Address This) {
+  llvm::Value *VTableGlobal =
+      CGM.getCXXABI().getVTableAddressPoint(Vptr.Base, Vptr.VTableClass);
+  if (!VTableGlobal)
+    return;
+
+  // We can just use the base offset in the complete class.
+  CharUnits NonVirtualOffset = Vptr.Base.getBaseOffset();
+
+  if (!NonVirtualOffset.isZero())
+    This =
+        ApplyNonVirtualAndVirtualOffset(*this, This, NonVirtualOffset, nullptr,
+                                        Vptr.VTableClass, Vptr.NearestVBase);
+
+  llvm::Value *VPtrValue =
+      GetVTablePtr(This, VTableGlobal->getType(), Vptr.VTableClass);
+  llvm::Value *Cmp =
+      Builder.CreateICmpEQ(VPtrValue, VTableGlobal, "cmp.vtables");
+  Builder.CreateAssumption(Cmp);
+}
+
+void CodeGenFunction::EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl,
+                                                Address This) {
+  if (CGM.getCXXABI().doStructorsInitializeVPtrs(ClassDecl))
+    for (const VPtr &Vptr : getVTablePointers(ClassDecl))
+      EmitVTableAssumptionLoad(Vptr, This);
 }
 
 void
 CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
-                                        llvm::Value *This, llvm::Value *Src,
-                                        const CXXConstructExpr *E) {
+                                                Address This, Address Src,
+                                                const CXXConstructExpr *E) {
   if (isMemcpyEquivalentSpecialMember(D)) {
     assert(E->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
     assert(D->isCopyOrMoveConstructor() &&
            "trivial 1-arg ctor not a copy/move ctor");
     EmitAggregateCopyCtor(This, Src,
                           getContext().getTypeDeclType(D->getParent()),
-                          E->arg_begin()->getType());
+                          (*E->arg_begin())->getType());
     return;
   }
   llvm::Value *Callee = CGM.getAddrOfCXXStructor(D, StructorType::Complete);
@@ -1844,16 +2165,16 @@ CodeGenFunction::EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
   CallArgList Args;
 
   // Push the this ptr.
-  Args.add(RValue::get(This), D->getThisType(getContext()));
+  Args.add(RValue::get(This.getPointer()), D->getThisType(getContext()));
 
   // Push the src ptr.
   QualType QT = *(FPT->param_type_begin());
   llvm::Type *t = CGM.getTypes().ConvertType(QT);
   Src = Builder.CreateBitCast(Src, t);
-  Args.add(RValue::get(Src), QT);
+  Args.add(RValue::get(Src.getPointer()), QT);
 
   // Skip over first argument (Src).
-  EmitCallArgs(Args, FPT, E->arg_begin() + 1, E->arg_end(), E->getConstructor(),
+  EmitCallArgs(Args, FPT, drop_begin(E->arguments(), 1), E->getConstructor(),
                /*ParamsToSkip*/ 1);
 
   EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, RequiredArgs::All),
@@ -1903,12 +2224,12 @@ CodeGenFunction::EmitDelegateCXXConstructorCall(const CXXConstructorDecl *Ctor,
 }
 
 namespace {
-  struct CallDelegatingCtorDtor : EHScopeStack::Cleanup {
+  struct CallDelegatingCtorDtor final : EHScopeStack::Cleanup {
     const CXXDestructorDecl *Dtor;
-    llvm::Value *Addr;
+    Address Addr;
     CXXDtorType Type;
 
-    CallDelegatingCtorDtor(const CXXDestructorDecl *D, llvm::Value *Addr,
+    CallDelegatingCtorDtor(const CXXDestructorDecl *D, Address Addr,
                            CXXDtorType Type)
       : Dtor(D), Addr(Addr), Type(Type) {}
 
@@ -1917,19 +2238,17 @@ namespace {
                                 /*Delegating=*/true, Addr);
     }
   };
-}
+} // end anonymous namespace
 
 void
 CodeGenFunction::EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                                   const FunctionArgList &Args) {
   assert(Ctor->isDelegatingConstructor());
 
-  llvm::Value *ThisPtr = LoadCXXThis();
+  Address ThisPtr = LoadCXXThisAddress();
 
-  QualType Ty = getContext().getTagDeclType(Ctor->getParent());
-  CharUnits Alignment = getContext().getTypeAlignInChars(Ty);
   AggValueSlot AggSlot =
-    AggValueSlot::forAddr(ThisPtr, Alignment, Qualifiers(),
+    AggValueSlot::forAddr(ThisPtr, Qualifiers(),
                           AggValueSlot::IsDestructed,
                           AggValueSlot::DoesNotNeedGCBarriers,
                           AggValueSlot::IsNotAliased);
@@ -1951,17 +2270,17 @@ void CodeGenFunction::EmitCXXDestructorCall(const CXXDestructorDecl *DD,
                                             CXXDtorType Type,
                                             bool ForVirtualBase,
                                             bool Delegating,
-                                            llvm::Value *This) {
+                                            Address This) {
   CGM.getCXXABI().EmitDestructorCall(*this, DD, Type, ForVirtualBase,
                                      Delegating, This);
 }
 
 namespace {
-  struct CallLocalDtor : EHScopeStack::Cleanup {
+  struct CallLocalDtor final : EHScopeStack::Cleanup {
     const CXXDestructorDecl *Dtor;
-    llvm::Value *Addr;
+    Address Addr;
 
-    CallLocalDtor(const CXXDestructorDecl *D, llvm::Value *Addr)
+    CallLocalDtor(const CXXDestructorDecl *D, Address Addr)
       : Dtor(D), Addr(Addr) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
@@ -1973,11 +2292,11 @@ namespace {
 }
 
 void CodeGenFunction::PushDestructorCleanup(const CXXDestructorDecl *D,
-                                            llvm::Value *Addr) {
+                                            Address Addr) {
   EHStack.pushCleanup<CallLocalDtor>(NormalAndEHCleanup, D, Addr);
 }
 
-void CodeGenFunction::PushDestructorCleanup(QualType T, llvm::Value *Addr) {
+void CodeGenFunction::PushDestructorCleanup(QualType T, Address Addr) {
   CXXRecordDecl *ClassDecl = T->getAsCXXRecordDecl();
   if (!ClassDecl) return;
   if (ClassDecl->hasTrivialDestructor()) return;
@@ -1987,24 +2306,12 @@ void CodeGenFunction::PushDestructorCleanup(QualType T, llvm::Value *Addr) {
   PushDestructorCleanup(D, Addr);
 }
 
-void
-CodeGenFunction::InitializeVTablePointer(BaseSubobject Base,
-                                         const CXXRecordDecl *NearestVBase,
-                                         CharUnits OffsetFromNearestVBase,
-                                         const CXXRecordDecl *VTableClass) {
-  const CXXRecordDecl *RD = Base.getBase();
-
-  // Don't initialize the vtable pointer if the class is marked with the
-  // 'novtable' attribute.
-  if ((RD == VTableClass || RD == NearestVBase) &&
-      VTableClass->hasAttr<MSNoVTableAttr>())
-    return;
-
+void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
   // Compute the address point.
-  bool NeedsVirtualOffset;
   llvm::Value *VTableAddressPoint =
       CGM.getCXXABI().getVTableAddressPointInStructor(
-          *this, VTableClass, Base, NearestVBase, NeedsVirtualOffset);
+          *this, Vptr.VTableClass, Vptr.Base, Vptr.NearestVBase);
+
   if (!VTableAddressPoint)
     return;
 
@@ -2012,26 +2319,25 @@ CodeGenFunction::InitializeVTablePointer(BaseSubobject Base,
   llvm::Value *VirtualOffset = nullptr;
   CharUnits NonVirtualOffset = CharUnits::Zero();
 
-  if (NeedsVirtualOffset) {
+  if (CGM.getCXXABI().isVirtualOffsetNeededForVTableField(*this, Vptr)) {
     // We need to use the virtual base offset offset because the virtual base
     // might have a different offset in the most derived class.
-    VirtualOffset = CGM.getCXXABI().GetVirtualBaseClassOffset(*this,
-                                                              LoadCXXThis(),
-                                                              VTableClass,
-                                                              NearestVBase);
-    NonVirtualOffset = OffsetFromNearestVBase;
+
+    VirtualOffset = CGM.getCXXABI().GetVirtualBaseClassOffset(
+        *this, LoadCXXThisAddress(), Vptr.VTableClass, Vptr.NearestVBase);
+    NonVirtualOffset = Vptr.OffsetFromNearestVBase;
   } else {
     // We can just use the base offset in the complete class.
-    NonVirtualOffset = Base.getBaseOffset();
+    NonVirtualOffset = Vptr.Base.getBaseOffset();
   }
 
   // Apply the offsets.
-  llvm::Value *VTableField = LoadCXXThis();
+  Address VTableField = LoadCXXThisAddress();
 
   if (!NonVirtualOffset.isZero() || VirtualOffset)
-    VTableField = ApplyNonVirtualAndVirtualOffset(*this, VTableField,
-                                                  NonVirtualOffset,
-                                                  VirtualOffset);
+    VTableField = ApplyNonVirtualAndVirtualOffset(
+        *this, VTableField, NonVirtualOffset, VirtualOffset, Vptr.VTableClass,
+        Vptr.NearestVBase);
 
   // Finally, store the address point. Use the same LLVM types as the field to
   // support optimization.
@@ -2041,23 +2347,39 @@ CodeGenFunction::InitializeVTablePointer(BaseSubobject Base,
           ->getPointerTo();
   VTableField = Builder.CreateBitCast(VTableField, VTablePtrTy->getPointerTo());
   VTableAddressPoint = Builder.CreateBitCast(VTableAddressPoint, VTablePtrTy);
+
   llvm::StoreInst *Store = Builder.CreateStore(VTableAddressPoint, VTableField);
-  CGM.DecorateInstruction(Store, CGM.getTBAAInfoForVTablePtr());
+  CGM.DecorateInstructionWithTBAA(Store, CGM.getTBAAInfoForVTablePtr());
+  if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      CGM.getCodeGenOpts().StrictVTablePointers)
+    CGM.DecorateInstructionWithInvariantGroup(Store, Vptr.VTableClass);
 }
 
-void
-CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
-                                          const CXXRecordDecl *NearestVBase,
-                                          CharUnits OffsetFromNearestVBase,
-                                          bool BaseIsNonVirtualPrimaryBase,
-                                          const CXXRecordDecl *VTableClass,
-                                          VisitedVirtualBasesSetTy& VBases) {
+CodeGenFunction::VPtrsVector
+CodeGenFunction::getVTablePointers(const CXXRecordDecl *VTableClass) {
+  CodeGenFunction::VPtrsVector VPtrsResult;
+  VisitedVirtualBasesSetTy VBases;
+  getVTablePointers(BaseSubobject(VTableClass, CharUnits::Zero()),
+                    /*NearestVBase=*/nullptr,
+                    /*OffsetFromNearestVBase=*/CharUnits::Zero(),
+                    /*BaseIsNonVirtualPrimaryBase=*/false, VTableClass, VBases,
+                    VPtrsResult);
+  return VPtrsResult;
+}
+
+void CodeGenFunction::getVTablePointers(BaseSubobject Base,
+                                        const CXXRecordDecl *NearestVBase,
+                                        CharUnits OffsetFromNearestVBase,
+                                        bool BaseIsNonVirtualPrimaryBase,
+                                        const CXXRecordDecl *VTableClass,
+                                        VisitedVirtualBasesSetTy &VBases,
+                                        VPtrsVector &Vptrs) {
   // If this base is a non-virtual primary base the address point has already
   // been set.
   if (!BaseIsNonVirtualPrimaryBase) {
     // Initialize the vtable pointer for this base.
-    InitializeVTablePointer(Base, NearestVBase, OffsetFromNearestVBase,
-                            VTableClass);
+    VPtr Vptr = {Base, NearestVBase, OffsetFromNearestVBase, VTableClass};
+    Vptrs.push_back(Vptr);
   }
 
   const CXXRecordDecl *RD = Base.getBase();
@@ -2095,11 +2417,10 @@ CodeGenFunction::InitializeVTablePointers(BaseSubobject Base,
       BaseDeclIsNonVirtualPrimaryBase = Layout.getPrimaryBase() == BaseDecl;
     }
 
-    InitializeVTablePointers(BaseSubobject(BaseDecl, BaseOffset),
-                             I.isVirtual() ? BaseDecl : NearestVBase,
-                             BaseOffsetFromNearestVBase,
-                             BaseDeclIsNonVirtualPrimaryBase,
-                             VTableClass, VBases);
+    getVTablePointers(
+        BaseSubobject(BaseDecl, BaseOffset),
+        I.isVirtual() ? BaseDecl : NearestVBase, BaseOffsetFromNearestVBase,
+        BaseDeclIsNonVirtualPrimaryBase, VTableClass, VBases, Vptrs);
   }
 }
 
@@ -2109,21 +2430,25 @@ void CodeGenFunction::InitializeVTablePointers(const CXXRecordDecl *RD) {
     return;
 
   // Initialize the vtable pointers for this class and all of its bases.
-  VisitedVirtualBasesSetTy VBases;
-  InitializeVTablePointers(BaseSubobject(RD, CharUnits::Zero()),
-                           /*NearestVBase=*/nullptr,
-                           /*OffsetFromNearestVBase=*/CharUnits::Zero(),
-                           /*BaseIsNonVirtualPrimaryBase=*/false, RD, VBases);
+  if (CGM.getCXXABI().doStructorsInitializeVPtrs(RD))
+    for (const VPtr &Vptr : getVTablePointers(RD))
+      InitializeVTablePointer(Vptr);
 
   if (RD->getNumVBases())
     CGM.getCXXABI().initializeHiddenVirtualInheritanceMembers(*this, RD);
 }
 
-llvm::Value *CodeGenFunction::GetVTablePtr(llvm::Value *This,
-                                           llvm::Type *Ty) {
-  llvm::Value *VTablePtrSrc = Builder.CreateBitCast(This, Ty->getPointerTo());
+llvm::Value *CodeGenFunction::GetVTablePtr(Address This,
+                                           llvm::Type *VTableTy,
+                                           const CXXRecordDecl *RD) {
+  Address VTablePtrSrc = Builder.CreateElementBitCast(This, VTableTy);
   llvm::Instruction *VTable = Builder.CreateLoad(VTablePtrSrc, "vtable");
-  CGM.DecorateInstruction(VTable, CGM.getTBAAInfoForVTablePtr());
+  CGM.DecorateInstructionWithTBAA(VTable, CGM.getTBAAInfoForVTablePtr());
+
+  if (CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      CGM.getCodeGenOpts().StrictVTablePointers)
+    CGM.DecorateInstructionWithInvariantGroup(VTable, RD);
+
   return VTable;
 }
 
@@ -2190,19 +2515,10 @@ void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
   if (!ClassDecl->isCompleteDefinition() || !ClassDecl->isDynamicClass())
     return;
 
-  SmallString<64> MangledName;
-  llvm::raw_svector_ostream Out(MangledName);
-  CGM.getCXXABI().getMangleContext().mangleCXXRTTI(T.getUnqualifiedType(),
-                                                   Out);
-
-  // Blacklist based on the mangled type.
-  if (CGM.getContext().getSanitizerBlacklist().isBlacklistedType(Out.str()))
-    return;
-
   if (!SanOpts.has(SanitizerKind::CFICastStrict))
     ClassDecl = LeastDerivedClassWithSameLayout(ClassDecl);
 
-  llvm::BasicBlock *ContBlock = 0;
+  llvm::BasicBlock *ContBlock = nullptr;
 
   if (MayBeNull) {
     llvm::Value *DerivedNotNull =
@@ -2216,7 +2532,9 @@ void CodeGenFunction::EmitVTablePtrCheckForCast(QualType T,
     EmitBlock(CheckBlock);
   }
 
-  llvm::Value *VTable = GetVTablePtr(Derived, Int8PtrTy);
+  llvm::Value *VTable =
+    GetVTablePtr(Address(Derived, getPointerAlign()), Int8PtrTy, ClassDecl);
+
   EmitVTablePtrCheck(ClassDecl, VTable, TCK, Loc);
 
   if (MayBeNull) {
@@ -2234,18 +2552,22 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
 
   SanitizerScope SanScope(this);
 
-  std::string OutName;
-  llvm::raw_string_ostream Out(OutName);
-  CGM.getCXXABI().getMangleContext().mangleCXXVTableBitSet(RD, Out);
-
-  llvm::Value *BitSetName = llvm::MetadataAsValue::get(
-      getLLVMContext(), llvm::MDString::get(getLLVMContext(), Out.str()));
+  llvm::Metadata *MD =
+      CGM.CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+  llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
 
   llvm::Value *CastedVTable = Builder.CreateBitCast(VTable, Int8PtrTy);
   llvm::Value *BitSetTest =
       Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
                          {CastedVTable, BitSetName});
 
+  if (CGM.getCodeGenOpts().SanitizeCfiCrossDso) {
+    if (auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD)) {
+      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedVTable);
+      return;
+    }
+  }
+
   SanitizerMask M;
   switch (TCK) {
   case CFITCK_VCall:
@@ -2263,9 +2585,9 @@ void CodeGenFunction::EmitVTablePtrCheck(const CXXRecordDecl *RD,
   }
 
   llvm::Constant *StaticData[] = {
-    EmitCheckSourceLocation(Loc),
-    EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)),
-    llvm::ConstantInt::get(Int8Ty, TCK),
+      EmitCheckSourceLocation(Loc),
+      EmitCheckTypeDescriptor(QualType(RD->getTypeForDecl(), 0)),
+      llvm::ConstantInt::get(Int8Ty, TCK),
   };
   EmitCheck(std::make_pair(BitSetTest, M), "cfi_bad_type", StaticData,
             CastedVTable);
@@ -2405,8 +2727,8 @@ void CodeGenFunction::EmitLambdaBlockInvokeBody() {
   CallArgList CallArgs;
 
   QualType ThisType = getContext().getPointerType(getContext().getRecordType(Lambda));
-  llvm::Value *ThisPtr = GetAddrOfBlockDecl(variable, false);
-  CallArgs.add(RValue::get(ThisPtr), ThisType);
+  Address ThisPtr = GetAddrOfBlockDecl(variable, false);
+  CallArgs.add(RValue::get(ThisPtr.getPointer()), ThisType);
 
   // Add the rest of the parameters.
   for (auto param : BD->params())
diff --git a/lib/CodeGen/CGCleanup.cpp b/lib/CodeGen/CGCleanup.cpp
index d97e40554ef2..ba7dcf7de6c7 100644
--- a/lib/CodeGen/CGCleanup.cpp
+++ b/lib/CodeGen/CGCleanup.cpp
@@ -19,6 +19,7 @@
 
 #include "CGCleanup.h"
 #include "CodeGenFunction.h"
+#include "llvm/Support/SaveAndRestore.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -27,7 +28,7 @@ bool DominatingValue<RValue>::saved_type::needsSaving(RValue rv) {
   if (rv.isScalar())
     return DominatingLLVMValue::needsSaving(rv.getScalarVal());
   if (rv.isAggregate())
-    return DominatingLLVMValue::needsSaving(rv.getAggregateAddr());
+    return DominatingLLVMValue::needsSaving(rv.getAggregatePointer());
   return true;
 }
 
@@ -41,9 +42,10 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
       return saved_type(V, ScalarLiteral);
 
     // Everything else needs an alloca.
-    llvm::Value *addr = CGF.CreateTempAlloca(V->getType(), "saved-rvalue");
+    Address addr =
+      CGF.CreateDefaultAlignTempAlloca(V->getType(), "saved-rvalue");
     CGF.Builder.CreateStore(V, addr);
-    return saved_type(addr, ScalarAddress);
+    return saved_type(addr.getPointer(), ScalarAddress);
   }
 
   if (rv.isComplex()) {
@@ -51,42 +53,56 @@ DominatingValue<RValue>::saved_type::save(CodeGenFunction &CGF, RValue rv) {
     llvm::Type *ComplexTy =
       llvm::StructType::get(V.first->getType(), V.second->getType(),
                             (void*) nullptr);
-    llvm::Value *addr = CGF.CreateTempAlloca(ComplexTy, "saved-complex");
+    Address addr = CGF.CreateDefaultAlignTempAlloca(ComplexTy, "saved-complex");
     CGF.Builder.CreateStore(V.first,
-                            CGF.Builder.CreateStructGEP(ComplexTy, addr, 0));
+                            CGF.Builder.CreateStructGEP(addr, 0, CharUnits()));
+    CharUnits offset = CharUnits::fromQuantity(
+               CGF.CGM.getDataLayout().getTypeAllocSize(V.first->getType()));
     CGF.Builder.CreateStore(V.second,
-                            CGF.Builder.CreateStructGEP(ComplexTy, addr, 1));
-    return saved_type(addr, ComplexAddress);
+                            CGF.Builder.CreateStructGEP(addr, 1, offset));
+    return saved_type(addr.getPointer(), ComplexAddress);
   }
 
   assert(rv.isAggregate());
-  llvm::Value *V = rv.getAggregateAddr(); // TODO: volatile?
-  if (!DominatingLLVMValue::needsSaving(V))
-    return saved_type(V, AggregateLiteral);
+  Address V = rv.getAggregateAddress(); // TODO: volatile?
+  if (!DominatingLLVMValue::needsSaving(V.getPointer()))
+    return saved_type(V.getPointer(), AggregateLiteral,
+                      V.getAlignment().getQuantity());
 
-  llvm::Value *addr = CGF.CreateTempAlloca(V->getType(), "saved-rvalue");
-  CGF.Builder.CreateStore(V, addr);
-  return saved_type(addr, AggregateAddress);  
+  Address addr =
+    CGF.CreateTempAlloca(V.getType(), CGF.getPointerAlign(), "saved-rvalue");
+  CGF.Builder.CreateStore(V.getPointer(), addr);
+  return saved_type(addr.getPointer(), AggregateAddress,
+                    V.getAlignment().getQuantity());
 }
 
 /// Given a saved r-value produced by SaveRValue, perform the code
 /// necessary to restore it to usability at the current insertion
 /// point.
 RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) {
+  auto getSavingAddress = [&](llvm::Value *value) {
+    auto alignment = cast<llvm::AllocaInst>(value)->getAlignment();
+    return Address(value, CharUnits::fromQuantity(alignment));
+  };
   switch (K) {
   case ScalarLiteral:
     return RValue::get(Value);
   case ScalarAddress:
-    return RValue::get(CGF.Builder.CreateLoad(Value));
+    return RValue::get(CGF.Builder.CreateLoad(getSavingAddress(Value)));
   case AggregateLiteral:
-    return RValue::getAggregate(Value);
-  case AggregateAddress:
-    return RValue::getAggregate(CGF.Builder.CreateLoad(Value));
+    return RValue::getAggregate(Address(Value, CharUnits::fromQuantity(Align)));
+  case AggregateAddress: {
+    auto addr = CGF.Builder.CreateLoad(getSavingAddress(Value));
+    return RValue::getAggregate(Address(addr, CharUnits::fromQuantity(Align)));
+  }
   case ComplexAddress: {
-    llvm::Value *real =
-        CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(nullptr, Value, 0));
-    llvm::Value *imag =
-        CGF.Builder.CreateLoad(CGF.Builder.CreateStructGEP(nullptr, Value, 1));
+    Address address = getSavingAddress(Value);
+    llvm::Value *real = CGF.Builder.CreateLoad(
+                 CGF.Builder.CreateStructGEP(address, 0, CharUnits()));
+    CharUnits offset = CharUnits::fromQuantity(
+                 CGF.CGM.getDataLayout().getTypeAllocSize(real->getType()));
+    llvm::Value *imag = CGF.Builder.CreateLoad(
+                 CGF.Builder.CreateStructGEP(address, 1, offset));
     return RValue::getComplex(real, imag);
   }
   }
@@ -96,6 +112,7 @@ RValue DominatingValue<RValue>::saved_type::restore(CodeGenFunction &CGF) {
 
 /// Push an entry of the given size onto this protected-scope stack.
 char *EHScopeStack::allocate(size_t Size) {
+  Size = llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
   if (!StartOfBuffer) {
     unsigned Capacity = 1024;
     while (Capacity < Size) Capacity *= 2;
@@ -125,6 +142,10 @@ char *EHScopeStack::allocate(size_t Size) {
   return StartOfData;
 }
 
+void EHScopeStack::deallocate(size_t Size) {
+  StartOfData += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
+}
+
 bool EHScopeStack::containsOnlyLifetimeMarkers(
     EHScopeStack::stable_iterator Old) const {
   for (EHScopeStack::iterator it = begin(); stabilize(it) != Old; it++) {
@@ -147,26 +168,8 @@ EHScopeStack::getInnermostActiveNormalCleanup() const {
   return stable_end();
 }
 
-EHScopeStack::stable_iterator EHScopeStack::getInnermostActiveEHScope() const {
-  for (stable_iterator si = getInnermostEHScope(), se = stable_end();
-         si != se; ) {
-    // Skip over inactive cleanups.
-    EHCleanupScope *cleanup = dyn_cast<EHCleanupScope>(&*find(si));
-    if (cleanup && !cleanup->isActive()) {
-      si = cleanup->getEnclosingEHScope();
-      continue;
-    }
-
-    // All other scopes are always active.
-    return si;
-  }
-
-  return stable_end();
-}
-
 
 void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) {
-  assert(((Size % sizeof(void*)) == 0) && "cleanup type is misaligned");
   char *Buffer = allocate(EHCleanupScope::getSizeForCleanupSize(Size));
   bool IsNormalCleanup = Kind & NormalCleanup;
   bool IsEHCleanup = Kind & EHCleanup;
@@ -194,7 +197,7 @@ void EHScopeStack::popCleanup() {
   EHCleanupScope &Cleanup = cast<EHCleanupScope>(*begin());
   InnermostNormalCleanup = Cleanup.getEnclosingNormalCleanup();
   InnermostEHScope = Cleanup.getEnclosingEHScope();
-  StartOfData += Cleanup.getAllocatedSize();
+  deallocate(Cleanup.getAllocatedSize());
 
   // Destroy the cleanup.
   Cleanup.Destroy();
@@ -224,7 +227,7 @@ void EHScopeStack::popFilter() {
   assert(!empty() && "popping exception stack when not empty");
 
   EHFilterScope &filter = cast<EHFilterScope>(*begin());
-  StartOfData += EHFilterScope::getSizeForNumFilters(filter.getNumFilters());
+  deallocate(EHFilterScope::getSizeForNumFilters(filter.getNumFilters()));
 
   InnermostEHScope = filter.getEnclosingEHScope();
 }
@@ -264,8 +267,8 @@ void EHScopeStack::popNullFixups() {
 
 void CodeGenFunction::initFullExprCleanup() {
   // Create a variable to decide whether the cleanup needs to be run.
-  llvm::AllocaInst *active
-    = CreateTempAlloca(Builder.getInt1Ty(), "cleanup.cond");
+  Address active = CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(),
+                                    "cleanup.cond");
 
   // Initialize it to false at a site that's guaranteed to be run
   // before each evaluation.
@@ -276,7 +279,7 @@ void CodeGenFunction::initFullExprCleanup() {
 
   // Set that as the active flag in the cleanup.
   EHCleanupScope &cleanup = cast<EHCleanupScope>(*EHStack.begin());
-  assert(!cleanup.getActiveFlag() && "cleanup already has active flag?");
+  assert(!cleanup.hasActiveFlag() && "cleanup already has active flag?");
   cleanup.setActiveFlag(active);
 
   if (cleanup.isNormalCleanup()) cleanup.setTestFlagInNormalCleanup();
@@ -285,6 +288,19 @@ void CodeGenFunction::initFullExprCleanup() {
 
 void EHScopeStack::Cleanup::anchor() {}
 
+static void createStoreInstBefore(llvm::Value *value, Address addr,
+                                  llvm::Instruction *beforeInst) {
+  auto store = new llvm::StoreInst(value, addr.getPointer(), beforeInst);
+  store->setAlignment(addr.getAlignment().getQuantity());
+}
+
+static llvm::LoadInst *createLoadInstBefore(Address addr, const Twine &name,
+                                            llvm::Instruction *beforeInst) {
+  auto load = new llvm::LoadInst(addr.getPointer(), name, beforeInst);
+  load->setAlignment(addr.getAlignment().getQuantity());
+  return load;
+}                                 
+
 /// All the branch fixups on the EH stack have propagated out past the
 /// outermost normal cleanup; resolve them all by adding cases to the
 /// given switch instruction.
@@ -307,9 +323,9 @@ static void ResolveAllBranchFixups(CodeGenFunction &CGF,
     // i.e. where there's an unresolved fixup inside a single cleanup
     // entry which we're currently popping.
     if (Fixup.OptimisticBranchBlock == nullptr) {
-      new llvm::StoreInst(CGF.Builder.getInt32(Fixup.DestinationIndex),
-                          CGF.getNormalCleanupDestSlot(),
-                          Fixup.InitialBranch);
+      createStoreInstBefore(CGF.Builder.getInt32(Fixup.DestinationIndex),
+                            CGF.getNormalCleanupDestSlot(),
+                            Fixup.InitialBranch);
       Fixup.InitialBranch->setSuccessor(0, CleanupEntry);
     }
 
@@ -335,8 +351,8 @@ static llvm::SwitchInst *TransitionToCleanupSwitch(CodeGenFunction &CGF,
 
   if (llvm::BranchInst *Br = dyn_cast<llvm::BranchInst>(Term)) {
     assert(Br->isUnconditional());
-    llvm::LoadInst *Load =
-      new llvm::LoadInst(CGF.getNormalCleanupDestSlot(), "cleanup.dest", Term);
+    auto Load = createLoadInstBefore(CGF.getNormalCleanupDestSlot(),
+                                     "cleanup.dest", Term);
     llvm::SwitchInst *Switch =
       llvm::SwitchInst::Create(Load, Br->getSuccessor(0), 4, Block);
     Br->eraseFromParent();
@@ -481,20 +497,11 @@ static llvm::BasicBlock *SimplifyCleanupEntry(CodeGenFunction &CGF,
 static void EmitCleanup(CodeGenFunction &CGF,
                         EHScopeStack::Cleanup *Fn,
                         EHScopeStack::Cleanup::Flags flags,
-                        llvm::Value *ActiveFlag) {
-  // Itanium EH cleanups occur within a terminate scope. Microsoft SEH doesn't
-  // have this behavior, and the Microsoft C++ runtime will call terminate for
-  // us if the cleanup throws.
-  bool PushedTerminate = false;
-  if (flags.isForEHCleanup() && !CGF.getTarget().getCXXABI().isMicrosoft()) {
-    CGF.EHStack.pushTerminate();
-    PushedTerminate = true;
-  }
-
+                        Address ActiveFlag) {
   // If there's an active flag, load it and skip the cleanup if it's
   // false.
   llvm::BasicBlock *ContBB = nullptr;
-  if (ActiveFlag) {
+  if (ActiveFlag.isValid()) {
     ContBB = CGF.createBasicBlock("cleanup.done");
     llvm::BasicBlock *CleanupBB = CGF.createBasicBlock("cleanup.action");
     llvm::Value *IsActive
@@ -508,12 +515,8 @@ static void EmitCleanup(CodeGenFunction &CGF,
   assert(CGF.HaveInsertPoint() && "cleanup ended with no insertion point?");
 
   // Emit the continuation block if there was an active flag.
-  if (ActiveFlag)
+  if (ActiveFlag.isValid())
     CGF.EmitBlock(ContBB);
-
-  // Leave the terminate scope.
-  if (PushedTerminate)
-    CGF.EHStack.popTerminate();
 }
 
 static void ForwardPrebranchedFallthrough(llvm::BasicBlock *Exit,
@@ -588,10 +591,12 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
 
   // Remember activation information.
   bool IsActive = Scope.isActive();
-  llvm::Value *NormalActiveFlag =
-    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag() : nullptr;
-  llvm::Value *EHActiveFlag = 
-    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag() : nullptr;
+  Address NormalActiveFlag =
+    Scope.shouldTestFlagInNormalCleanup() ? Scope.getActiveFlag()
+                                          : Address::invalid();
+  Address EHActiveFlag = 
+    Scope.shouldTestFlagInEHCleanup() ? Scope.getActiveFlag()
+                                      : Address::invalid();
 
   // Check whether we need an EH cleanup.  This is only true if we've
   // generated a lazy EH cleanup block.
@@ -671,16 +676,25 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
     return;
   }
 
-  // Copy the cleanup emission data out.  Note that SmallVector
-  // guarantees maximal alignment for its buffer regardless of its
-  // type parameter.
-  SmallVector<char, 8*sizeof(void*)> CleanupBuffer;
-  CleanupBuffer.reserve(Scope.getCleanupSize());
-  memcpy(CleanupBuffer.data(),
-         Scope.getCleanupBuffer(), Scope.getCleanupSize());
-  CleanupBuffer.set_size(Scope.getCleanupSize());
-  EHScopeStack::Cleanup *Fn =
-    reinterpret_cast<EHScopeStack::Cleanup*>(CleanupBuffer.data());
+  // Copy the cleanup emission data out.  This uses either a stack
+  // array or malloc'd memory, depending on the size, which is
+  // behavior that SmallVector would provide, if we could use it
+  // here. Unfortunately, if you ask for a SmallVector<char>, the
+  // alignment isn't sufficient.
+  auto *CleanupSource = reinterpret_cast<char *>(Scope.getCleanupBuffer());
+  llvm::AlignedCharArray<EHScopeStack::ScopeStackAlignment, 8 * sizeof(void *)> CleanupBufferStack;
+  std::unique_ptr<char[]> CleanupBufferHeap;
+  size_t CleanupSize = Scope.getCleanupSize();
+  EHScopeStack::Cleanup *Fn;
+
+  if (CleanupSize <= sizeof(CleanupBufferStack)) {
+    memcpy(CleanupBufferStack.buffer, CleanupSource, CleanupSize);
+    Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferStack.buffer);
+  } else {
+    CleanupBufferHeap.reset(new char[CleanupSize]);
+    memcpy(CleanupBufferHeap.get(), CleanupSource, CleanupSize);
+    Fn = reinterpret_cast<EHScopeStack::Cleanup *>(CleanupBufferHeap.get());
+  }
 
   EHScopeStack::Cleanup::Flags cleanupFlags;
   if (Scope.isNormalCleanup())
@@ -761,7 +775,7 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
 
         // Clean up the possibly dead store to the cleanup dest slot.
         llvm::Instruction *NormalCleanupDestSlot =
-            cast<llvm::Instruction>(getNormalCleanupDestSlot());
+            cast<llvm::Instruction>(getNormalCleanupDestSlot().getPointer());
         if (NormalCleanupDestSlot->hasOneUse()) {
           NormalCleanupDestSlot->user_back()->eraseFromParent();
           NormalCleanupDestSlot->eraseFromParent();
@@ -787,7 +801,8 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
         const unsigned SwitchCapacity = 10;
 
         llvm::LoadInst *Load =
-          new llvm::LoadInst(getNormalCleanupDestSlot(), "cleanup.dest");
+          createLoadInstBefore(getNormalCleanupDestSlot(), "cleanup.dest",
+                               nullptr);
         llvm::SwitchInst *Switch =
           llvm::SwitchInst::Create(Load, Default, SwitchCapacity);
 
@@ -833,9 +848,9 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
         BranchFixup &Fixup = EHStack.getBranchFixup(I);
         if (!Fixup.Destination) continue;
         if (!Fixup.OptimisticBranchBlock) {
-          new llvm::StoreInst(Builder.getInt32(Fixup.DestinationIndex),
-                              getNormalCleanupDestSlot(),
-                              Fixup.InitialBranch);
+          createStoreInstBefore(Builder.getInt32(Fixup.DestinationIndex),
+                                getNormalCleanupDestSlot(),
+                                Fixup.InitialBranch);
           Fixup.InitialBranch->setSuccessor(0, NormalEntry);
         }
         Fixup.OptimisticBranchBlock = NormalExit;
@@ -893,15 +908,40 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) {
 
     EmitBlock(EHEntry);
 
+    llvm::BasicBlock *NextAction = getEHDispatchBlock(EHParent);
+
+    // Push a terminate scope or cleanupendpad scope around the potentially
+    // throwing cleanups. For funclet EH personalities, the cleanupendpad models
+    // program termination when cleanups throw.
+    bool PushedTerminate = false;
+    SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+        CurrentFuncletPad);
+    llvm::CleanupPadInst *CPI = nullptr;
+    if (!EHPersonality::get(*this).usesFuncletPads()) {
+      EHStack.pushTerminate();
+      PushedTerminate = true;
+    } else {
+      llvm::Value *ParentPad = CurrentFuncletPad;
+      if (!ParentPad)
+        ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
+      CurrentFuncletPad = CPI = Builder.CreateCleanupPad(ParentPad);
+    }
+
     // We only actually emit the cleanup code if the cleanup is either
     // active or was used before it was deactivated.
-    if (EHActiveFlag || IsActive) {
-
+    if (EHActiveFlag.isValid() || IsActive) {
       cleanupFlags.setIsForEHCleanup();
       EmitCleanup(*this, Fn, cleanupFlags, EHActiveFlag);
     }
 
-    Builder.CreateBr(getEHDispatchBlock(EHParent));
+    if (CPI)
+      Builder.CreateCleanupRet(CPI, NextAction);
+    else
+      Builder.CreateBr(NextAction);
+
+    // Leave the terminate scope.
+    if (PushedTerminate)
+      EHStack.popTerminate();
 
     Builder.restoreIP(SavedIP);
 
@@ -977,7 +1017,7 @@ void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) {
 
   // Store the index at the start.
   llvm::ConstantInt *Index = Builder.getInt32(Dest.getDestIndex());
-  new llvm::StoreInst(Index, getNormalCleanupDestSlot(), BI);
+  createStoreInstBefore(Index, getNormalCleanupDestSlot(), BI);
 
   // Adjust BI to point to the first cleanup block.
   {
@@ -1096,23 +1136,24 @@ static void SetupCleanupBlockActivation(CodeGenFunction &CGF,
   // If it hasn't yet been used as either, we're done.
   if (!needFlag) return;
 
-  llvm::AllocaInst *var = Scope.getActiveFlag();
-  if (!var) {
-    var = CGF.CreateTempAlloca(CGF.Builder.getInt1Ty(), "cleanup.isactive");
+  Address var = Scope.getActiveFlag();
+  if (!var.isValid()) {
+    var = CGF.CreateTempAlloca(CGF.Builder.getInt1Ty(), CharUnits::One(),
+                               "cleanup.isactive");
     Scope.setActiveFlag(var);
 
     assert(dominatingIP && "no existing variable and no dominating IP!");
 
     // Initialize to true or false depending on whether it was
     // active up to this point.
-    llvm::Value *value = CGF.Builder.getInt1(kind == ForDeactivation);
+    llvm::Constant *value = CGF.Builder.getInt1(kind == ForDeactivation);
 
     // If we're in a conditional block, ignore the dominating IP and
     // use the outermost conditional branch.
     if (CGF.isInConditionalBranch()) {
       CGF.setBeforeOutermostConditional(value, var);
     } else {
-      new llvm::StoreInst(value, var, dominatingIP);
+      createStoreInstBefore(value, var, dominatingIP);
     }
   }
 
@@ -1154,17 +1195,17 @@ void CodeGenFunction::DeactivateCleanupBlock(EHScopeStack::stable_iterator C,
   Scope.setActive(false);
 }
 
-llvm::Value *CodeGenFunction::getNormalCleanupDestSlot() {
+Address CodeGenFunction::getNormalCleanupDestSlot() {
   if (!NormalCleanupDest)
     NormalCleanupDest =
       CreateTempAlloca(Builder.getInt32Ty(), "cleanup.dest.slot");
-  return NormalCleanupDest;
+  return Address(NormalCleanupDest, CharUnits::fromQuantity(4));
 }
 
 /// Emits all the code to cause the given temporary to be cleaned up.
 void CodeGenFunction::EmitCXXTemporary(const CXXTemporary *Temporary,
                                        QualType TempType,
-                                       llvm::Value *Ptr) {
+                                       Address Ptr) {
   pushDestroy(NormalAndEHCleanup, Ptr, TempType, destroyCXXObject,
               /*useEHCleanup*/ true);
 }
diff --git a/lib/CodeGen/CGCleanup.h b/lib/CodeGen/CGCleanup.h
index 81c64123dfdb..909f00b05925 100644
--- a/lib/CodeGen/CGCleanup.h
+++ b/lib/CodeGen/CGCleanup.h
@@ -15,6 +15,8 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGCLEANUP_H
 
 #include "EHScopeStack.h"
+
+#include "Address.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 
@@ -26,7 +28,17 @@ class AllocaInst;
 }
 
 namespace clang {
+class FunctionDecl;
 namespace CodeGen {
+class CodeGenModule;
+class CodeGenFunction;
+
+/// The MS C++ ABI needs a pointer to RTTI data plus some flags to describe the
+/// type of a catch handler, so we use this wrapper.
+struct CatchTypeInfo {
+  llvm::Constant *RTTI;
+  unsigned Flags;
+};
 
 /// A protected scope for zero-cost EH handling.
 class EHScope {
@@ -37,9 +49,9 @@ class EHScope {
 
   class CommonBitFields {
     friend class EHScope;
-    unsigned Kind : 2;
+    unsigned Kind : 3;
   };
-  enum { NumCommonBits = 2 };
+  enum { NumCommonBits = 3 };
 
 protected:
   class CatchBitFields {
@@ -78,7 +90,7 @@ protected:
     /// The number of fixups required by enclosing scopes (not including
     /// this one).  If this is the top cleanup scope, all the fixups
     /// from this index onwards belong to this scope.
-    unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 13
+    unsigned FixupDepth : 32 - 18 - NumCommonBits; // currently 12
   };
 
   class FilterBitFields {
@@ -96,7 +108,7 @@ protected:
   };
 
 public:
-  enum Kind { Cleanup, Catch, Terminate, Filter };
+  enum Kind { Cleanup, Catch, Terminate, Filter, PadEnd };
 
   EHScope(Kind kind, EHScopeStack::stable_iterator enclosingEHScope)
     : CachedLandingPad(nullptr), CachedEHDispatchBlock(nullptr),
@@ -148,12 +160,12 @@ public:
   struct Handler {
     /// A type info value, or null (C++ null, not an LLVM null pointer)
     /// for a catch-all.
-    llvm::Constant *Type;
+    CatchTypeInfo Type;
 
     /// The catch handler for this type.
     llvm::BasicBlock *Block;
 
-    bool isCatchAll() const { return Type == nullptr; }
+    bool isCatchAll() const { return Type.RTTI == nullptr; }
   };
 
 private:
@@ -183,10 +195,16 @@ public:
   }
 
   void setCatchAllHandler(unsigned I, llvm::BasicBlock *Block) {
-    setHandler(I, /*catchall*/ nullptr, Block);
+    setHandler(I, CatchTypeInfo{nullptr, 0}, Block);
   }
 
   void setHandler(unsigned I, llvm::Constant *Type, llvm::BasicBlock *Block) {
+    assert(I < getNumHandlers());
+    getHandlers()[I].Type = CatchTypeInfo{Type, 0};
+    getHandlers()[I].Block = Block;
+  }
+
+  void setHandler(unsigned I, CatchTypeInfo Type, llvm::BasicBlock *Block) {
     assert(I < getNumHandlers());
     getHandlers()[I].Type = Type;
     getHandlers()[I].Block = Block;
@@ -216,7 +234,7 @@ public:
 };
 
 /// A cleanup scope which generates the cleanup blocks lazily.
-class EHCleanupScope : public EHScope {
+class LLVM_ALIGNAS(/*alignof(uint64_t)*/ 8) EHCleanupScope : public EHScope {
   /// The nearest normal cleanup scope enclosing this one.
   EHScopeStack::stable_iterator EnclosingNormal;
 
@@ -302,8 +320,14 @@ public:
   bool isLifetimeMarker() const { return CleanupBits.IsLifetimeMarker; }
   void setLifetimeMarker() { CleanupBits.IsLifetimeMarker = true; }
 
-  llvm::AllocaInst *getActiveFlag() const { return ActiveFlag; }
-  void setActiveFlag(llvm::AllocaInst *Var) { ActiveFlag = Var; }
+  bool hasActiveFlag() const { return ActiveFlag != nullptr; }
+  Address getActiveFlag() const {
+    return Address(ActiveFlag, CharUnits::One());
+  }
+  void setActiveFlag(Address Var) {
+    assert(Var.getAlignment().isOne());
+    ActiveFlag = cast<llvm::AllocaInst>(Var.getPointer());
+  }
 
   void setTestFlagInNormalCleanup() {
     CleanupBits.TestFlagInNormalCleanup = true;
@@ -396,6 +420,15 @@ public:
     return (Scope->getKind() == Cleanup);
   }
 };
+// NOTE: there's a bunch of different data classes tacked on after an
+// EHCleanupScope. It is asserted (in EHScopeStack::pushCleanup*) that
+// they don't require greater alignment than ScopeStackAlignment. So,
+// EHCleanupScope ought to have alignment equal to that -- not more
+// (would be misaligned by the stack allocator), and not less (would
+// break the appended classes).
+static_assert(llvm::AlignOf<EHCleanupScope>::Alignment ==
+                  EHScopeStack::ScopeStackAlignment,
+              "EHCleanupScope expected alignment");
 
 /// An exceptions scope which filters exceptions thrown through it.
 /// Only exceptions matching the filter types will be permitted to be
@@ -454,6 +487,17 @@ public:
   }
 };
 
+class EHPadEndScope : public EHScope {
+public:
+  EHPadEndScope(EHScopeStack::stable_iterator enclosingEHScope)
+      : EHScope(PadEnd, enclosingEHScope) {}
+  static size_t getSize() { return sizeof(EHPadEndScope); }
+
+  static bool classof(const EHScope *scope) {
+    return scope->getKind() == PadEnd;
+  }
+};
+
 /// A non-stable pointer into the scope stack.
 class EHScopeStack::iterator {
   char *Ptr;
@@ -472,27 +516,31 @@ public:
   EHScope &operator*() const { return *get(); }
 
   iterator &operator++() {
+    size_t Size;
     switch (get()->getKind()) {
     case EHScope::Catch:
-      Ptr += EHCatchScope::getSizeForNumHandlers(
-          static_cast<const EHCatchScope*>(get())->getNumHandlers());
+      Size = EHCatchScope::getSizeForNumHandlers(
+          static_cast<const EHCatchScope *>(get())->getNumHandlers());
       break;
 
     case EHScope::Filter:
-      Ptr += EHFilterScope::getSizeForNumFilters(
-          static_cast<const EHFilterScope*>(get())->getNumFilters());
+      Size = EHFilterScope::getSizeForNumFilters(
+          static_cast<const EHFilterScope *>(get())->getNumFilters());
       break;
 
     case EHScope::Cleanup:
-      Ptr += static_cast<const EHCleanupScope*>(get())
-        ->getAllocatedSize();
+      Size = static_cast<const EHCleanupScope *>(get())->getAllocatedSize();
       break;
 
     case EHScope::Terminate:
-      Ptr += EHTerminateScope::getSize();
+      Size = EHTerminateScope::getSize();
+      break;
+
+    case EHScope::PadEnd:
+      Size = EHPadEndScope::getSize();
       break;
     }
-
+    Ptr += llvm::RoundUpToAlignment(Size, ScopeStackAlignment);
     return *this;
   }
 
@@ -528,7 +576,7 @@ inline void EHScopeStack::popCatch() {
 
   EHCatchScope &scope = cast<EHCatchScope>(*begin());
   InnermostEHScope = scope.getEnclosingEHScope();
-  StartOfData += EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers());
+  deallocate(EHCatchScope::getSizeForNumHandlers(scope.getNumHandlers()));
 }
 
 inline void EHScopeStack::popTerminate() {
@@ -536,7 +584,7 @@ inline void EHScopeStack::popTerminate() {
 
   EHTerminateScope &scope = cast<EHTerminateScope>(*begin());
   InnermostEHScope = scope.getEnclosingEHScope();
-  StartOfData += EHTerminateScope::getSize();
+  deallocate(EHTerminateScope::getSize());
 }
 
 inline EHScopeStack::iterator EHScopeStack::find(stable_iterator sp) const {
@@ -551,6 +599,43 @@ EHScopeStack::stabilize(iterator ir) const {
   return stable_iterator(EndOfBuffer - ir.Ptr);
 }
 
+/// The exceptions personality for a function.
+struct EHPersonality {
+  const char *PersonalityFn;
+
+  // If this is non-null, this personality requires a non-standard
+  // function for rethrowing an exception after a catchall cleanup.
+  // This function must have prototype void(void*).
+  const char *CatchallRethrowFn;
+
+  static const EHPersonality &get(CodeGenModule &CGM, const FunctionDecl *FD);
+  static const EHPersonality &get(CodeGenFunction &CGF);
+
+  static const EHPersonality GNU_C;
+  static const EHPersonality GNU_C_SJLJ;
+  static const EHPersonality GNU_C_SEH;
+  static const EHPersonality GNU_ObjC;
+  static const EHPersonality GNUstep_ObjC;
+  static const EHPersonality GNU_ObjCXX;
+  static const EHPersonality NeXT_ObjC;
+  static const EHPersonality GNU_CPlusPlus;
+  static const EHPersonality GNU_CPlusPlus_SJLJ;
+  static const EHPersonality GNU_CPlusPlus_SEH;
+  static const EHPersonality MSVC_except_handler;
+  static const EHPersonality MSVC_C_specific_handler;
+  static const EHPersonality MSVC_CxxFrameHandler3;
+
+  /// Does this personality use landingpads or the family of pad instructions
+  /// designed to form funclets?
+  bool usesFuncletPads() const { return isMSVCPersonality(); }
+
+  bool isMSVCPersonality() const {
+    return this == &MSVC_except_handler || this == &MSVC_C_specific_handler ||
+           this == &MSVC_CxxFrameHandler3;
+  }
+
+  bool isMSVCXXPersonality() const { return this == &MSVC_CxxFrameHandler3; }
+};
 }
 }
 
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 93a2287b1e01..78e3978e0ff9 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -28,6 +28,7 @@
 #include "clang/Basic/Version.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PreprocessorOptions.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -37,7 +38,6 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 using namespace clang;
@@ -45,7 +45,10 @@ using namespace clang::CodeGen;
 
 CGDebugInfo::CGDebugInfo(CodeGenModule &CGM)
     : CGM(CGM), DebugKind(CGM.getCodeGenOpts().getDebugInfo()),
+      DebugTypeExtRefs(CGM.getCodeGenOpts().DebugTypeExtRefs),
       DBuilder(CGM.getModule()) {
+  for (const auto &KV : CGM.getCodeGenOpts().DebugPrefixMap)
+    DebugPrefixMap[KV.first] = KV.second;
   CreateCompileUnit();
 }
 
@@ -56,54 +59,63 @@ CGDebugInfo::~CGDebugInfo() {
 
 ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
                                        SourceLocation TemporaryLocation)
-    : CGF(CGF) {
+    : CGF(&CGF) {
   init(TemporaryLocation);
 }
 
 ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF,
                                        bool DefaultToEmpty,
                                        SourceLocation TemporaryLocation)
-    : CGF(CGF) {
+    : CGF(&CGF) {
   init(TemporaryLocation, DefaultToEmpty);
 }
 
 void ApplyDebugLocation::init(SourceLocation TemporaryLocation,
                               bool DefaultToEmpty) {
-  if (auto *DI = CGF.getDebugInfo()) {
-    OriginalLocation = CGF.Builder.getCurrentDebugLocation();
-    if (TemporaryLocation.isInvalid()) {
-      if (DefaultToEmpty)
-        CGF.Builder.SetCurrentDebugLocation(llvm::DebugLoc());
-      else {
-        // Construct a location that has a valid scope, but no line info.
-        assert(!DI->LexicalBlockStack.empty());
-        CGF.Builder.SetCurrentDebugLocation(
-            llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back()));
-      }
-    } else
-      DI->EmitLocation(CGF.Builder, TemporaryLocation);
+  auto *DI = CGF->getDebugInfo();
+  if (!DI) {
+    CGF = nullptr;
+    return;
   }
+
+  OriginalLocation = CGF->Builder.getCurrentDebugLocation();
+  if (TemporaryLocation.isValid()) {
+    DI->EmitLocation(CGF->Builder, TemporaryLocation);
+    return;
+  }
+
+  if (DefaultToEmpty) {
+    CGF->Builder.SetCurrentDebugLocation(llvm::DebugLoc());
+    return;
+  }
+
+  // Construct a location that has a valid scope, but no line info.
+  assert(!DI->LexicalBlockStack.empty());
+  CGF->Builder.SetCurrentDebugLocation(
+      llvm::DebugLoc::get(0, 0, DI->LexicalBlockStack.back()));
 }
 
 ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E)
-    : CGF(CGF) {
+    : CGF(&CGF) {
   init(E->getExprLoc());
 }
 
 ApplyDebugLocation::ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc)
-    : CGF(CGF) {
-  if (CGF.getDebugInfo()) {
-    OriginalLocation = CGF.Builder.getCurrentDebugLocation();
-    if (Loc)
-      CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
+    : CGF(&CGF) {
+  if (!CGF.getDebugInfo()) {
+    this->CGF = nullptr;
+    return;
   }
+  OriginalLocation = CGF.Builder.getCurrentDebugLocation();
+  if (Loc)
+    CGF.Builder.SetCurrentDebugLocation(std::move(Loc));
 }
 
 ApplyDebugLocation::~ApplyDebugLocation() {
   // Query CGF so the location isn't overwritten when location updates are
   // temporarily disabled (for C++ default function arguments)
-  if (CGF.getDebugInfo())
-    CGF.Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
+  if (CGF)
+    CGF->Builder.SetCurrentDebugLocation(std::move(OriginalLocation));
 }
 
 void CGDebugInfo::setLocation(SourceLocation Loc) {
@@ -138,9 +150,16 @@ void CGDebugInfo::setLocation(SourceLocation Loc) {
   }
 }
 
-llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context) {
+llvm::DIScope *CGDebugInfo::getDeclContextDescriptor(const Decl *D) {
+  llvm::DIScope *Mod = getParentModuleOrNull(D);
+  return getContextDescriptor(cast<Decl>(D->getDeclContext()),
+                              Mod ? Mod : TheCU);
+}
+
+llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context,
+                                                 llvm::DIScope *Default) {
   if (!Context)
-    return TheCU;
+    return Default;
 
   auto I = RegionMap.find(Context);
   if (I != RegionMap.end()) {
@@ -156,7 +175,7 @@ llvm::DIScope *CGDebugInfo::getContextDescriptor(const Decl *Context) {
     if (!RDecl->isDependentType())
       return getOrCreateType(CGM.getContext().getTypeDeclType(RDecl),
                              getOrCreateMainFile());
-  return TheCU;
+  return Default;
 }
 
 StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
@@ -164,22 +183,31 @@ StringRef CGDebugInfo::getFunctionName(const FunctionDecl *FD) {
   IdentifierInfo *FII = FD->getIdentifier();
   FunctionTemplateSpecializationInfo *Info =
       FD->getTemplateSpecializationInfo();
-  if (!Info && FII)
+
+  if (!Info && FII && !CGM.getCodeGenOpts().EmitCodeView)
     return FII->getName();
 
   // Otherwise construct human readable name for debug info.
   SmallString<128> NS;
   llvm::raw_svector_ostream OS(NS);
-  FD->printName(OS);
+  PrintingPolicy Policy(CGM.getLangOpts());
 
-  // Add any template specialization args.
-  if (Info) {
-    const TemplateArgumentList *TArgs = Info->TemplateArguments;
-    const TemplateArgument *Args = TArgs->data();
-    unsigned NumArgs = TArgs->size();
-    PrintingPolicy Policy(CGM.getLangOpts());
-    TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
-                                                          Policy);
+  if (CGM.getCodeGenOpts().EmitCodeView) {
+    // Print a fully qualified name like MSVC would.
+    Policy.MSVCFormatting = true;
+    FD->printQualifiedName(OS, Policy);
+  } else {
+    // Print the unqualified name with some template arguments. This is what
+    // DWARF-based debuggers expect.
+    FD->printName(OS);
+    // Add any template specialization args.
+    if (Info) {
+      const TemplateArgumentList *TArgs = Info->TemplateArguments;
+      const TemplateArgument *Args = TArgs->data();
+      unsigned NumArgs = TArgs->size();
+      TemplateSpecializationType::PrintTemplateArgumentList(OS, Args, NumArgs,
+                                                            Policy);
+    }
   }
 
   // Copy this name on the side and use its reference.
@@ -197,6 +225,13 @@ StringRef CGDebugInfo::getObjCMethodName(const ObjCMethodDecl *OMD) {
   } else if (const ObjCInterfaceDecl *OID =
                  dyn_cast<const ObjCInterfaceDecl>(DC)) {
     OS << OID->getName();
+  } else if (const ObjCCategoryDecl *OC = dyn_cast<ObjCCategoryDecl>(DC)) {
+    if (OC->IsClassExtension()) {
+      OS << OC->getClassInterface()->getName();
+    } else {
+      OS << ((const NamedDecl *)OC)->getIdentifier()->getNameStart() << '('
+         << OC->getIdentifier()->getNameStart() << ')';
+    }
   } else if (const ObjCCategoryImplDecl *OCD =
                  dyn_cast<const ObjCCategoryImplDecl>(DC)) {
     OS << ((const NamedDecl *)OCD)->getIdentifier()->getNameStart() << '('
@@ -238,14 +273,16 @@ StringRef CGDebugInfo::getClassName(const RecordDecl *RD) {
 llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
   if (!Loc.isValid())
     // If Location is not valid then use main input file.
-    return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
+    return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
+                               remapDIPath(TheCU->getDirectory()));
 
   SourceManager &SM = CGM.getContext().getSourceManager();
   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
 
   if (PLoc.isInvalid() || StringRef(PLoc.getFilename()).empty())
     // If the location is not valid then use main input file.
-    return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
+    return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
+                               remapDIPath(TheCU->getDirectory()));
 
   // Cache the results.
   const char *fname = PLoc.getFilename();
@@ -257,15 +294,23 @@ llvm::DIFile *CGDebugInfo::getOrCreateFile(SourceLocation Loc) {
       return cast<llvm::DIFile>(V);
   }
 
-  llvm::DIFile *F =
-      DBuilder.createFile(PLoc.getFilename(), getCurrentDirname());
+  llvm::DIFile *F = DBuilder.createFile(remapDIPath(PLoc.getFilename()),
+                                        remapDIPath(getCurrentDirname()));
 
   DIFileCache[fname].reset(F);
   return F;
 }
 
 llvm::DIFile *CGDebugInfo::getOrCreateMainFile() {
-  return DBuilder.createFile(TheCU->getFilename(), TheCU->getDirectory());
+  return DBuilder.createFile(remapDIPath(TheCU->getFilename()),
+                             remapDIPath(TheCU->getDirectory()));
+}
+
+std::string CGDebugInfo::remapDIPath(StringRef Path) const {
+  for (const auto &Entry : DebugPrefixMap)
+    if (Path.startswith(Entry.first))
+      return (Twine(Entry.second) + Path.substr(Entry.first.size())).str();
+  return Path.str();
 }
 
 unsigned CGDebugInfo::getLineNumber(SourceLocation Loc) {
@@ -321,7 +366,7 @@ void CGDebugInfo::CreateCompileUnit() {
   // file to determine the real absolute path for the file.
   std::string MainFileDir;
   if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
-    MainFileDir = MainFile->getDir()->getName();
+    MainFileDir = remapDIPath(MainFile->getDir()->getName());
     if (MainFileDir != ".") {
       llvm::SmallString<1024> MainFileDirSS(MainFileDir);
       llvm::sys::path::append(MainFileDirSS, MainFileName);
@@ -329,13 +374,6 @@ void CGDebugInfo::CreateCompileUnit() {
     }
   }
 
-  // Save filename string.
-  StringRef Filename = internString(MainFileName);
-
-  // Save split dwarf file string.
-  std::string SplitDwarfFile = CGM.getCodeGenOpts().SplitDwarfFile;
-  StringRef SplitDwarfFilename = internString(SplitDwarfFile);
-
   llvm::dwarf::SourceLanguage LangTag;
   const LangOptions &LO = CGM.getLangOpts();
   if (LO.CPlusPlus) {
@@ -361,13 +399,13 @@ void CGDebugInfo::CreateCompileUnit() {
   // Create new compile unit.
   // FIXME - Eliminate TheCU.
   TheCU = DBuilder.createCompileUnit(
-      LangTag, Filename, getCurrentDirname(), Producer, LO.Optimize,
-      CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers, SplitDwarfFilename,
+      LangTag, remapDIPath(MainFileName), remapDIPath(getCurrentDirname()),
+      Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
+      CGM.getCodeGenOpts().SplitDwarfFile,
       DebugKind <= CodeGenOptions::DebugLineTablesOnly
           ? llvm::DIBuilder::LineTablesOnly
           : llvm::DIBuilder::FullDebug,
-      0 /* DWOid */,
-      DebugKind != CodeGenOptions::LocTrackingOnly);
+      0 /* DWOid */, DebugKind != CodeGenOptions::LocTrackingOnly);
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
@@ -438,6 +476,24 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
   case BuiltinType::OCLImage2dArray:
     return getOrCreateStructPtrType("opencl_image2d_array_t",
                                     OCLImage2dArrayDITy);
+  case BuiltinType::OCLImage2dDepth:
+    return getOrCreateStructPtrType("opencl_image2d_depth_t",
+                                    OCLImage2dDepthDITy);
+  case BuiltinType::OCLImage2dArrayDepth:
+    return getOrCreateStructPtrType("opencl_image2d_array_depth_t",
+                                    OCLImage2dArrayDepthDITy);
+  case BuiltinType::OCLImage2dMSAA:
+    return getOrCreateStructPtrType("opencl_image2d_msaa_t",
+                                    OCLImage2dMSAADITy);
+  case BuiltinType::OCLImage2dArrayMSAA:
+    return getOrCreateStructPtrType("opencl_image2d_array_msaa_t",
+                                    OCLImage2dArrayMSAADITy);
+  case BuiltinType::OCLImage2dMSAADepth:
+    return getOrCreateStructPtrType("opencl_image2d_msaa_depth_t",
+                                    OCLImage2dMSAADepthDITy);
+  case BuiltinType::OCLImage2dArrayMSAADepth:
+    return getOrCreateStructPtrType("opencl_image2d_array_msaa_depth_t",
+                                    OCLImage2dArrayMSAADepthDITy);
   case BuiltinType::OCLImage3d:
     return getOrCreateStructPtrType("opencl_image3d_t", OCLImage3dDITy);
   case BuiltinType::OCLSampler:
@@ -446,6 +502,14 @@ llvm::DIType *CGDebugInfo::CreateType(const BuiltinType *BT) {
         CGM.getContext().getTypeAlign(BT), llvm::dwarf::DW_ATE_unsigned);
   case BuiltinType::OCLEvent:
     return getOrCreateStructPtrType("opencl_event_t", OCLEventDITy);
+  case BuiltinType::OCLClkEvent:
+    return getOrCreateStructPtrType("opencl_clk_event_t", OCLClkEventDITy);
+  case BuiltinType::OCLQueue:
+    return getOrCreateStructPtrType("opencl_queue_t", OCLQueueDITy);
+  case BuiltinType::OCLNDRange:
+    return getOrCreateStructPtrType("opencl_ndrange_t", OCLNDRangeDITy);
+  case BuiltinType::OCLReserveID:
+    return getOrCreateStructPtrType("opencl_reserve_id_t", OCLReserveIDDITy);
 
   case BuiltinType::UChar:
   case BuiltinType::Char_U:
@@ -604,7 +668,6 @@ static SmallString<256> getUniqueTagTypeName(const TagType *Ty,
   // a unique string for a type?
   llvm::raw_svector_ostream Out(FullName);
   CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(QualType(Ty, 0), Out);
-  Out.flush();
   return FullName;
 }
 
@@ -658,10 +721,6 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
                                                  const Type *Ty,
                                                  QualType PointeeTy,
                                                  llvm::DIFile *Unit) {
-  if (Tag == llvm::dwarf::DW_TAG_reference_type ||
-      Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
-    return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit));
-
   // Bit size, align and offset of the type.
   // Size is always the size of a pointer. We can't use getTypeSize here
   // because that does not return the correct value for references.
@@ -669,8 +728,13 @@ llvm::DIType *CGDebugInfo::CreatePointerLikeType(llvm::dwarf::Tag Tag,
   uint64_t Size = CGM.getTarget().getPointerWidth(AS);
   uint64_t Align = CGM.getContext().getTypeAlign(Ty);
 
-  return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
-                                    Align);
+  if (Tag == llvm::dwarf::DW_TAG_reference_type ||
+      Tag == llvm::dwarf::DW_TAG_rvalue_reference_type)
+    return DBuilder.createReferenceType(Tag, getOrCreateType(PointeeTy, Unit),
+                                        Size, Align);
+  else
+    return DBuilder.createPointerType(getOrCreateType(PointeeTy, Unit), Size,
+                                      Align);
 }
 
 llvm::DIType *CGDebugInfo::getOrCreateStructPtrType(StringRef Name,
@@ -760,9 +824,9 @@ llvm::DIType *CGDebugInfo::CreateType(const TemplateSpecializationType *Ty,
       Ty->getTemplateName().getAsTemplateDecl())->getTemplatedDecl();
 
   SourceLocation Loc = AliasDecl->getLocation();
-  return DBuilder.createTypedef(
-      Src, internString(OS.str()), getOrCreateFile(Loc), getLineNumber(Loc),
-      getContextDescriptor(cast<Decl>(AliasDecl->getDeclContext())));
+  return DBuilder.createTypedef(Src, OS.str(), getOrCreateFile(Loc),
+                                getLineNumber(Loc),
+                                getDeclContextDescriptor(AliasDecl));
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
@@ -775,7 +839,7 @@ llvm::DIType *CGDebugInfo::CreateType(const TypedefType *Ty,
   return DBuilder.createTypedef(
       getOrCreateType(Ty->getDecl()->getUnderlyingType(), Unit),
       Ty->getDecl()->getName(), getOrCreateFile(Loc), getLineNumber(Loc),
-      getContextDescriptor(cast<Decl>(Ty->getDecl()->getDeclContext())));
+      getDeclContextDescriptor(Ty->getDecl()));
 }
 
 llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
@@ -797,7 +861,7 @@ llvm::DIType *CGDebugInfo::CreateType(const FunctionType *Ty,
   }
 
   llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-  return DBuilder.createSubroutineType(Unit, EltTypeArray);
+  return DBuilder.createSubroutineType(EltTypeArray);
 }
 
 /// Convert an AccessSpecifier into the corresponding DINode flag.
@@ -972,7 +1036,7 @@ void CGDebugInfo::CollectRecordFields(
         if (MI != StaticDataMemberCache.end()) {
           assert(MI->second &&
                  "Static data member declaration should still exist");
-          elements.push_back(cast<llvm::DIDerivedTypeBase>(MI->second));
+          elements.push_back(MI->second);
         } else {
           auto Field = CreateRecordStaticField(V, RecordTy, record);
           elements.push_back(Field);
@@ -1048,7 +1112,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateInstanceMethodType(
   if (Func->getExtProtoInfo().RefQualifier == RQ_RValue)
     Flags |= llvm::DINode::FlagRValueReference;
 
-  return DBuilder.createSubroutineType(Unit, EltTypeArray, Flags);
+  return DBuilder.createSubroutineType(EltTypeArray, Flags);
 }
 
 /// isFunctionLocalClass - Return true if CXXRecordDecl is defined
@@ -1129,7 +1193,7 @@ llvm::DISubprogram *CGDebugInfo::CreateCXXMemberFunction(
       RecordTy, MethodName, MethodLinkageName, MethodDefUnit, MethodLine,
       MethodTy, /*isLocalToUnit=*/false,
       /* isDefinition=*/false, Virtuality, VIndex, ContainingType, Flags,
-      CGM.getLangOpts().Optimize, nullptr, TParamsArray.get());
+      CGM.getLangOpts().Optimize, TParamsArray.get());
 
   SPCache[Method->getCanonicalDecl()].reset(SP);
 
@@ -1348,7 +1412,7 @@ llvm::DIType *CGDebugInfo::getOrCreateVTablePtrType(llvm::DIFile *Unit) {
   /* Function type */
   llvm::Metadata *STy = getOrCreateType(Context.IntTy, Unit);
   llvm::DITypeRefArray SElements = DBuilder.getOrCreateTypeArray(STy);
-  llvm::DIType *SubTy = DBuilder.createSubroutineType(Unit, SElements);
+  llvm::DIType *SubTy = DBuilder.createSubroutineType(SElements);
   unsigned Size = Context.getTypeSize(Context.VoidPtrTy);
   llvm::DIType *vtbl_ptr_type =
       DBuilder.createPointerType(SubTy, Size, 0, "__vtbl_ptr_type");
@@ -1389,8 +1453,21 @@ llvm::DIType *CGDebugInfo::getOrCreateRecordType(QualType RTy,
 
 llvm::DIType *CGDebugInfo::getOrCreateInterfaceType(QualType D,
                                                     SourceLocation Loc) {
+  return getOrCreateStandaloneType(D, Loc);
+}
+
+llvm::DIType *CGDebugInfo::getOrCreateStandaloneType(QualType D,
+                                                     SourceLocation Loc) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
+  assert(!D.isNull() && "null type");
   llvm::DIType *T = getOrCreateType(D, getOrCreateFile(Loc));
+  assert(T && "could not create debug info for type");
+
+  // Composite types with UIDs were already retained by DIBuilder
+  // because they are only referenced by name in the IR.
+  if (auto *CTy = dyn_cast<llvm::DICompositeType>(T))
+    if (!CTy->getIdentifier().empty())
+      return T;
   RetainedTypes.push_back(D.getAsOpaquePtr());
   return T;
 }
@@ -1422,6 +1499,9 @@ void CGDebugInfo::completeRequiredType(const RecordDecl *RD) {
     if (CXXDecl->isDynamicClass())
       return;
 
+  if (DebugTypeExtRefs && RD->isFromASTFile())
+    return;
+
   QualType Ty = CGM.getContext().getRecordType(RD);
   llvm::DIType *T = getTypeOrNull(Ty);
   if (T && T->isForwardDecl())
@@ -1452,8 +1532,13 @@ static bool hasExplicitMemberDefinition(CXXRecordDecl::method_iterator I,
 }
 
 static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
+                                 bool DebugTypeExtRefs,
                                  const RecordDecl *RD,
                                  const LangOptions &LangOpts) {
+  // Does the type exist in an imported clang module?
+  if (DebugTypeExtRefs && RD->isFromASTFile() && RD->getDefinition())
+      return true;
+
   if (DebugKind > CodeGenOptions::LimitedDebugInfo)
     return false;
 
@@ -1487,10 +1572,10 @@ static bool shouldOmitDefinition(CodeGenOptions::DebugInfoKind DebugKind,
 llvm::DIType *CGDebugInfo::CreateType(const RecordType *Ty) {
   RecordDecl *RD = Ty->getDecl();
   llvm::DIType *T = cast_or_null<llvm::DIType>(getTypeOrNull(QualType(Ty, 0)));
-  if (T || shouldOmitDefinition(DebugKind, RD, CGM.getLangOpts())) {
+  if (T || shouldOmitDefinition(DebugKind, DebugTypeExtRefs, RD,
+                                CGM.getLangOpts())) {
     if (!T)
-      T = getOrCreateRecordFwdDecl(
-          Ty, getContextDescriptor(cast<Decl>(RD->getDeclContext())));
+      T = getOrCreateRecordFwdDecl(Ty, getDeclContextDescriptor(RD));
     return T;
   }
 
@@ -1509,9 +1594,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const RecordType *Ty) {
   // its members.  Finally, we create a descriptor for the complete type (which
   // may refer to the forward decl if the struct is recursive) and replace all
   // uses of the forward declaration with the final definition.
-
-  auto *FwdDecl =
-      cast<llvm::DICompositeType>(getOrCreateLimitedType(Ty, DefUnit));
+  llvm::DICompositeType *FwdDecl = getOrCreateLimitedType(Ty, DefUnit);
 
   const RecordDecl *D = RD->getDefinition();
   if (!D || !D->isCompleteDefinition())
@@ -1593,6 +1676,12 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   if (!ID)
     return nullptr;
 
+  // Return a forward declaration if this type was imported from a clang module.
+  if (DebugTypeExtRefs && ID->isFromASTFile() && ID->getDefinition())
+    return DBuilder.createForwardDecl(llvm::dwarf::DW_TAG_structure_type,
+                                      ID->getName(),
+                                      getDeclContextDescriptor(ID), Unit, 0);
+
   // Get overall information about the record type for the debug info.
   llvm::DIFile *DefUnit = getOrCreateFile(ID->getLocation());
   unsigned Line = getLineNumber(ID->getLocation());
@@ -1603,9 +1692,10 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
   // debug type since we won't be able to lay out the entire type.
   ObjCInterfaceDecl *Def = ID->getDefinition();
   if (!Def || !Def->getImplementation()) {
+    llvm::DIScope *Mod = getParentModuleOrNull(ID);
     llvm::DIType *FwdDecl = DBuilder.createReplaceableCompositeType(
-        llvm::dwarf::DW_TAG_structure_type, ID->getName(), TheCU, DefUnit, Line,
-        RuntimeLang);
+        llvm::dwarf::DW_TAG_structure_type, ID->getName(), Mod ? Mod : TheCU,
+        DefUnit, Line, RuntimeLang);
     ObjCInterfaceCache.push_back(ObjCInterfaceCacheEntry(Ty, FwdDecl, Unit));
     return FwdDecl;
   }
@@ -1614,10 +1704,15 @@ llvm::DIType *CGDebugInfo::CreateType(const ObjCInterfaceType *Ty,
 }
 
 llvm::DIModule *
-CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod) {
-  auto it = ModuleRefCache.find(Mod.Signature);
-  if (it != ModuleRefCache.end())
-    return it->second;
+CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
+                                  bool CreateSkeletonCU) {
+  // Use the Module pointer as the key into the cache. This is a
+  // nullptr if the "Module" is a PCH, which is safe because we don't
+  // support chained PCH debug info, so there can only be a single PCH.
+  const Module *M = Mod.getModuleOrNull();
+  auto ModRef = ModuleCache.find(M);
+  if (ModRef != ModuleCache.end())
+    return cast<llvm::DIModule>(ModRef->second);
 
   // Macro definitions that were defined with "-D" on the command line.
   SmallString<128> ConfigMacros;
@@ -1641,17 +1736,26 @@ CGDebugInfo::getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod) {
       OS << '\"';
     }
   }
-  llvm::DIBuilder DIB(CGM.getModule());
-  auto *CU = DIB.createCompileUnit(
-      TheCU->getSourceLanguage(), internString(Mod.ModuleName),
-      internString(Mod.Path), TheCU->getProducer(), true, StringRef(), 0,
-      internString(Mod.ASTFile), llvm::DIBuilder::FullDebug, Mod.Signature);
-  llvm::DIModule *ModuleRef =
-      DIB.createModule(CU, Mod.ModuleName, ConfigMacros, internString(Mod.Path),
-                       internString(CGM.getHeaderSearchOpts().Sysroot));
-  DIB.finalize();
-  ModuleRefCache.insert(std::make_pair(Mod.Signature, ModuleRef));
-  return ModuleRef;
+
+  bool IsRootModule = M ? !M->Parent : true;
+  if (CreateSkeletonCU && IsRootModule) {
+    llvm::DIBuilder DIB(CGM.getModule());
+    DIB.createCompileUnit(TheCU->getSourceLanguage(), Mod.getModuleName(),
+                          Mod.getPath(), TheCU->getProducer(), true,
+                          StringRef(), 0, Mod.getASTFile(),
+                          llvm::DIBuilder::FullDebug, Mod.getSignature());
+    DIB.finalize();
+  }
+  llvm::DIModule *Parent =
+      IsRootModule ? nullptr
+                   : getOrCreateModuleRef(
+                         ExternalASTSource::ASTSourceDescriptor(*M->Parent),
+                         CreateSkeletonCU);
+  llvm::DIModule *DIMod =
+      DBuilder.createModule(Parent, Mod.getModuleName(), ConfigMacros,
+                            Mod.getPath(), CGM.getHeaderSearchOpts().Sysroot);
+  ModuleCache[M].reset(DIMod);
+  return DIMod;
 }
 
 llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
@@ -1669,9 +1773,10 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
   if (ID->getImplementation())
     Flags |= llvm::DINode::FlagObjcClassComplete;
 
+  llvm::DIScope *Mod = getParentModuleOrNull(ID);
   llvm::DICompositeType *RealDecl = DBuilder.createStructType(
-      Unit, ID->getName(), DefUnit, Line, Size, Align, Flags, nullptr,
-      llvm::DINodeArray(), RuntimeLang);
+      Mod ? Mod : Unit, ID->getName(), DefUnit, Line, Size, Align, Flags,
+      nullptr, llvm::DINodeArray(), RuntimeLang);
 
   QualType QTy(Ty, 0);
   TypeCache[QTy.getAsOpaquePtr()].reset(RealDecl);
@@ -1695,7 +1800,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
   }
 
   // Create entries for all of the properties.
-  for (const auto *PD : ID->properties()) {
+  auto AddProperty = [&](const ObjCPropertyDecl *PD) {
     SourceLocation Loc = PD->getLocation();
     llvm::DIFile *PUnit = getOrCreateFile(Loc);
     unsigned PLine = getLineNumber(Loc);
@@ -1709,6 +1814,21 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const ObjCInterfaceType *Ty,
                                          : getSelectorName(PD->getSetterName()),
         PD->getPropertyAttributes(), getOrCreateType(PD->getType(), PUnit));
     EltTys.push_back(PropertyNode);
+  };
+  {
+    llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
+    for (const ObjCCategoryDecl *ClassExt : ID->known_extensions())
+      for (auto *PD : ClassExt->properties()) {
+        PropertySet.insert(PD->getIdentifier());
+        AddProperty(PD);
+      }
+    for (const auto *PD : ID->properties()) {
+      // Don't emit duplicate metadata for properties that were already in a
+      // class extension.
+      if (!PropertySet.insert(PD->getIdentifier()).second)
+        continue;
+      AddProperty(PD);
+    }
   }
 
   const ASTRecordLayout &RL = CGM.getContext().getASTObjCInterfaceLayout(ID);
@@ -1883,9 +2003,8 @@ llvm::DIType *CGDebugInfo::CreateType(const RValueReferenceType *Ty,
 
 llvm::DIType *CGDebugInfo::CreateType(const MemberPointerType *Ty,
                                       llvm::DIFile *U) {
-  uint64_t Size = CGM.getCXXABI().isTypeInfoCalculable(QualType(Ty, 0))
-                      ? CGM.getContext().getTypeSize(Ty)
-                      : 0;
+  uint64_t Size =
+      !Ty->isIncompleteType() ? CGM.getContext().getTypeSize(Ty) : 0;
   llvm::DIType *ClassType = getOrCreateType(QualType(Ty->getClass(), 0), U);
   if (Ty->isMemberDataPointerType())
     return DBuilder.createMemberPointerType(
@@ -1908,6 +2027,7 @@ llvm::DIType *CGDebugInfo::CreateType(const AtomicType *Ty, llvm::DIFile *U) {
 
 llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
   const EnumDecl *ED = Ty->getDecl();
+
   uint64_t Size = 0;
   uint64_t Align = 0;
   if (!ED->getTypeForDecl()->isIncompleteType()) {
@@ -1917,11 +2037,13 @@ llvm::DIType *CGDebugInfo::CreateEnumType(const EnumType *Ty) {
 
   SmallString<256> FullName = getUniqueTagTypeName(Ty, CGM, TheCU);
 
+  bool isImportedFromModule =
+      DebugTypeExtRefs && ED->isFromASTFile() && ED->getDefinition();
+
   // If this is just a forward declaration, construct an appropriately
   // marked node and just return it.
-  if (!ED->getDefinition()) {
-    llvm::DIScope *EDContext =
-        getContextDescriptor(cast<Decl>(ED->getDeclContext()));
+  if (isImportedFromModule || !ED->getDefinition()) {
+    llvm::DIScope *EDContext = getDeclContextDescriptor(ED);
     llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
     unsigned Line = getLineNumber(ED->getLocation());
     StringRef EDName = ED->getName();
@@ -1961,8 +2083,7 @@ llvm::DIType *CGDebugInfo::CreateTypeDefinition(const EnumType *Ty) {
 
   llvm::DIFile *DefUnit = getOrCreateFile(ED->getLocation());
   unsigned Line = getLineNumber(ED->getLocation());
-  llvm::DIScope *EnumContext =
-      getContextDescriptor(cast<Decl>(ED->getDeclContext()));
+  llvm::DIScope *EnumContext = getDeclContextDescriptor(ED);
   llvm::DIType *ClassTy =
       ED->isFixed() ? getOrCreateType(ED->getIntegerType(), DefUnit) : nullptr;
   return DBuilder.createEnumerationType(EnumContext, ED->getName(), DefUnit,
@@ -2061,9 +2182,8 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
   if (auto *T = getTypeOrNull(Ty))
     return T;
 
-  // Otherwise create the type.
   llvm::DIType *Res = CreateTypeNode(Ty, Unit);
-  void *TyPtr = Ty.getAsOpaquePtr();
+  void* TyPtr = Ty.getAsOpaquePtr();
 
   // And update the type cache.
   TypeCache[TyPtr].reset(Res);
@@ -2071,28 +2191,36 @@ llvm::DIType *CGDebugInfo::getOrCreateType(QualType Ty, llvm::DIFile *Unit) {
   return Res;
 }
 
-unsigned CGDebugInfo::Checksum(const ObjCInterfaceDecl *ID) {
-  // The assumption is that the number of ivars can only increase
-  // monotonically, so it is safe to just use their current number as
-  // a checksum.
-  unsigned Sum = 0;
-  for (const ObjCIvarDecl *Ivar = ID->all_declared_ivar_begin();
-       Ivar != nullptr; Ivar = Ivar->getNextIvar())
-    ++Sum;
-
-  return Sum;
-}
-
-ObjCInterfaceDecl *CGDebugInfo::getObjCInterfaceDecl(QualType Ty) {
-  switch (Ty->getTypeClass()) {
-  case Type::ObjCObjectPointer:
-    return getObjCInterfaceDecl(
-        cast<ObjCObjectPointerType>(Ty)->getPointeeType());
-  case Type::ObjCInterface:
-    return cast<ObjCInterfaceType>(Ty)->getDecl();
-  default:
+llvm::DIModule *CGDebugInfo::getParentModuleOrNull(const Decl *D) {
+  // A forward declaration inside a module header does not belong to the module.
+  if (isa<RecordDecl>(D) && !cast<RecordDecl>(D)->getDefinition())
     return nullptr;
+  if (DebugTypeExtRefs && D->isFromASTFile()) {
+    // Record a reference to an imported clang module or precompiled header.
+    auto *Reader = CGM.getContext().getExternalSource();
+    auto Idx = D->getOwningModuleID();
+    auto Info = Reader->getSourceDescriptor(Idx);
+    if (Info)
+      return getOrCreateModuleRef(*Info, /*SkeletonCU=*/true);
+  } else if (ClangModuleMap) {
+    // We are building a clang module or a precompiled header.
+    //
+    // TODO: When D is a CXXRecordDecl or a C++ Enum, the ODR applies
+    // and it wouldn't be necessary to specify the parent scope
+    // because the type is already unique by definition (it would look
+    // like the output of -fno-standalone-debug). On the other hand,
+    // the parent scope helps a consumer to quickly locate the object
+    // file where the type's definition is located, so it might be
+    // best to make this behavior a command line or debugger tuning
+    // option.
+    FullSourceLoc Loc(D->getLocation(), CGM.getContext().getSourceManager());
+    if (Module *M = ClangModuleMap->inferModuleFromLocation(Loc)) {
+      auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+      return getOrCreateModuleRef(Info, /*SkeletonCU=*/false);
+    }
   }
+
+  return nullptr;
 }
 
 llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
@@ -2175,11 +2303,11 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   llvm_unreachable("type should have been unwrapped!");
 }
 
-llvm::DIType *CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty,
-                                                  llvm::DIFile *Unit) {
+llvm::DICompositeType *CGDebugInfo::getOrCreateLimitedType(const RecordType *Ty,
+                                                           llvm::DIFile *Unit) {
   QualType QTy(Ty, 0);
 
-  auto *T = cast_or_null<llvm::DICompositeTypeBase>(getTypeOrNull(QTy));
+  auto *T = cast_or_null<llvm::DICompositeType>(getTypeOrNull(QTy));
 
   // We may have cached a forward decl when we could have created
   // a non-forward decl. Go ahead and create a non-forward decl
@@ -2209,8 +2337,7 @@ llvm::DICompositeType *CGDebugInfo::CreateLimitedType(const RecordType *Ty) {
   unsigned Line = getLineNumber(RD->getLocation());
   StringRef RDName = getClassName(RD);
 
-  llvm::DIScope *RDContext =
-      getContextDescriptor(cast<Decl>(RD->getDeclContext()));
+  llvm::DIScope *RDContext = getDeclContextDescriptor(RD);
 
   // If we ended up creating the type during the context chain construction,
   // just return that.
@@ -2306,8 +2433,10 @@ void CGDebugInfo::collectFunctionDeclProps(GlobalDecl GD, llvm::DIFile *Unit,
         dyn_cast_or_null<NamespaceDecl>(FD->getDeclContext()))
       FDContext = getOrCreateNameSpace(NSDecl);
     else if (const RecordDecl *RDecl =
-             dyn_cast_or_null<RecordDecl>(FD->getDeclContext()))
-      FDContext = getContextDescriptor(cast<Decl>(RDecl));
+             dyn_cast_or_null<RecordDecl>(FD->getDeclContext())) {
+      llvm::DIScope *Mod = getParentModuleOrNull(RDecl);
+      FDContext = getContextDescriptor(RDecl, Mod ? Mod : TheCU);
+    }
     // Collect template parameters.
     TParamsArray = CollectFunctionTemplateParams(FD, Unit);
   }
@@ -2355,7 +2484,9 @@ void CGDebugInfo::collectVarDeclProps(const VarDecl *VD, llvm::DIFile *&Unit,
   // outside the class by putting it in the global scope.
   if (DC->isRecord())
     DC = CGM.getContext().getTranslationUnitDecl();
-  VDContext = getContextDescriptor(dyn_cast<Decl>(DC));
+
+ llvm::DIScope *Mod = getParentModuleOrNull(VD);
+ VDContext = getContextDescriptor(cast<Decl>(DC), Mod ? Mod : TheCU);
 }
 
 llvm::DISubprogram *
@@ -2380,7 +2511,7 @@ CGDebugInfo::getFunctionForwardDeclaration(const FunctionDecl *FD) {
   llvm::DISubprogram *SP = DBuilder.createTempFunctionFwdDecl(
       DContext, Name, LinkageName, Unit, Line,
       getOrCreateFunctionType(FD, FnType, Unit), !FD->isExternallyVisible(),
-      false /*declaration*/, 0, Flags, CGM.getLangOpts().Optimize, nullptr,
+      /* isDefinition = */ false, 0, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(FD));
   const FunctionDecl *CanonDecl = cast<FunctionDecl>(FD->getCanonicalDecl());
   FwdDeclReplaceMap.emplace_back(std::piecewise_construct,
@@ -2441,7 +2572,7 @@ llvm::DISubprogram *CGDebugInfo::getFunctionDeclaration(const Decl *D) {
     return nullptr;
 
   // Setup context.
-  auto *S = getContextDescriptor(cast<Decl>(D->getDeclContext()));
+  auto *S = getDeclContextDescriptor(D);
 
   auto MI = SPCache.find(FD->getCanonicalDecl());
   if (MI == SPCache.end()) {
@@ -2476,8 +2607,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
   if (!D || DebugKind <= CodeGenOptions::DebugLineTablesOnly)
     // Create fake but valid subroutine type. Otherwise -verify would fail, and
     // subprogram DIE will miss DW_AT_decl_file and DW_AT_decl_line fields.
-    return DBuilder.createSubroutineType(F,
-                                         DBuilder.getOrCreateTypeArray(None));
+    return DBuilder.createSubroutineType(DBuilder.getOrCreateTypeArray(None));
 
   if (const CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D))
     return getOrCreateMethodType(Method, F);
@@ -2495,11 +2625,17 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
 
     Elts.push_back(getOrCreateType(ResultTy, F));
     // "self" pointer is always first argument.
-    QualType SelfDeclTy = OMethod->getSelfDecl()->getType();
-    Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
+    QualType SelfDeclTy;
+    if (auto *SelfDecl = OMethod->getSelfDecl())
+      SelfDeclTy = SelfDecl->getType();
+    else if (auto *FPT = dyn_cast<FunctionProtoType>(FnType))
+      if (FPT->getNumParams() > 1)
+        SelfDeclTy = FPT->getParamType(0);
+    if (!SelfDeclTy.isNull())
+      Elts.push_back(CreateSelfType(SelfDeclTy, getOrCreateType(SelfDeclTy, F)));
     // "_cmd" pointer is always second argument.
     Elts.push_back(DBuilder.createArtificialType(
-        getOrCreateType(OMethod->getCmdDecl()->getType(), F)));
+        getOrCreateType(CGM.getContext().getObjCSelType(), F)));
     // Get rest of the arguments.
     for (const auto *PI : OMethod->params())
       Elts.push_back(getOrCreateType(PI->getType(), F));
@@ -2508,7 +2644,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
       Elts.push_back(DBuilder.createUnspecifiedParameter());
 
     llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(Elts);
-    return DBuilder.createSubroutineType(F, EltTypeArray);
+    return DBuilder.createSubroutineType(EltTypeArray);
   }
 
   // Handle variadic function types; they need an additional
@@ -2522,7 +2658,7 @@ llvm::DISubroutineType *CGDebugInfo::getOrCreateFunctionType(const Decl *D,
           EltTys.push_back(getOrCreateType(FPT->getParamType(i), F));
       EltTys.push_back(DBuilder.createUnspecifiedParameter());
       llvm::DITypeRefArray EltTypeArray = DBuilder.getOrCreateTypeArray(EltTys);
-      return DBuilder.createSubroutineType(F, EltTypeArray);
+      return DBuilder.createSubroutineType(EltTypeArray);
     }
 
   return cast<llvm::DISubroutineType>(getOrCreateType(FnType, F));
@@ -2588,8 +2724,9 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
   llvm::DISubprogram *SP = DBuilder.createFunction(
       FDContext, Name, LinkageName, Unit, LineNo,
       getOrCreateFunctionType(D, FnType, Unit), Fn->hasInternalLinkage(),
-      true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize, Fn,
+      true /*definition*/, ScopeLine, Flags, CGM.getLangOpts().Optimize,
       TParamsArray.get(), getFunctionDeclaration(D));
+  Fn->setSubprogram(SP);
   // We might get here with a VarDecl in the case we're generating
   // code for the initialization of globals. Do not record these decls
   // as they will overwrite the actual VarDecl Decl in the cache.
@@ -2603,6 +2740,48 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, SourceLocation Loc,
     RegionMap[D].reset(SP);
 }
 
+void CGDebugInfo::EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc,
+                                   QualType FnType) {
+  StringRef Name;
+  StringRef LinkageName;
+
+  const Decl *D = GD.getDecl();
+  if (!D)
+    return;
+
+  unsigned Flags = 0;
+  llvm::DIFile *Unit = getOrCreateFile(Loc);
+  llvm::DIScope *FDContext = getDeclContextDescriptor(D);
+  llvm::DINodeArray TParamsArray;
+  if (isa<FunctionDecl>(D)) {
+    // If there is a DISubprogram for this function available then use it.
+    collectFunctionDeclProps(GD, Unit, Name, LinkageName, FDContext,
+                             TParamsArray, Flags);
+  } else if (const ObjCMethodDecl *OMD = dyn_cast<ObjCMethodDecl>(D)) {
+    Name = getObjCMethodName(OMD);
+    Flags |= llvm::DINode::FlagPrototyped;
+  } else {
+    llvm_unreachable("not a function or ObjC method");
+  }
+  if (!Name.empty() && Name[0] == '\01')
+    Name = Name.substr(1);
+
+  if (D->isImplicit()) {
+    Flags |= llvm::DINode::FlagArtificial;
+    // Artificial functions without a location should not silently reuse CurLoc.
+    if (Loc.isInvalid())
+      CurLoc = SourceLocation();
+  }
+  unsigned LineNo = getLineNumber(Loc);
+  unsigned ScopeLine = 0;
+
+  DBuilder.createFunction(FDContext, Name, LinkageName, Unit, LineNo,
+                          getOrCreateFunctionType(D, FnType, Unit),
+                          false /*internalLinkage*/, true /*definition*/,
+                          ScopeLine, Flags, CGM.getLangOpts().Optimize,
+                          TParamsArray.get(), getFunctionDeclaration(D));
+}
+
 void CGDebugInfo::EmitLocation(CGBuilderTy &Builder, SourceLocation Loc) {
   // Update our current location
   setLocation(Loc);
@@ -2740,8 +2919,8 @@ llvm::DIType *CGDebugInfo::EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                    nullptr, Elements);
 }
 
-void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
-                              llvm::Value *Storage, unsigned ArgNo,
+void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::Value *Storage,
+                              llvm::Optional<unsigned> ArgNo,
                               CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
   assert(!LexicalBlockStack.empty() && "Region stack mismatch, stack empty!");
@@ -2780,7 +2959,7 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
   // FIXME: There has to be a better way to do this, but for static
   // functions there won't be an implicit param at arg1 and
   // otherwise it is 'self' or 'this'.
-  if (isa<ImplicitParamDecl>(VD) && ArgNo == 1)
+  if (isa<ImplicitParamDecl>(VD) && ArgNo && *ArgNo == 1)
     Flags |= llvm::DINode::FlagObjectPointer;
   if (llvm::Argument *Arg = dyn_cast<llvm::Argument>(Storage))
     if (Arg->getType()->isPointerTy() && !Arg->hasByValAttr() &&
@@ -2805,8 +2984,11 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
       Expr.push_back(offset.getQuantity());
 
       // Create the descriptor for the variable.
-      auto *D = DBuilder.createLocalVariable(Tag, Scope, VD->getName(), Unit,
-                                             Line, Ty, ArgNo);
+      auto *D = ArgNo
+                    ? DBuilder.createParameterVariable(Scope, VD->getName(),
+                                                       *ArgNo, Unit, Line, Ty)
+                    : DBuilder.createAutoVariable(Scope, VD->getName(), Unit,
+                                                  Line, Ty);
 
       // Insert an llvm.dbg.declare into the current block.
       DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
@@ -2836,10 +3018,9 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
           continue;
 
         // Use VarDecl's Tag, Scope and Line number.
-        auto *D = DBuilder.createLocalVariable(
-            Tag, Scope, FieldName, Unit, Line, FieldTy,
-            CGM.getLangOpts().Optimize, Flags | llvm::DINode::FlagArtificial,
-            ArgNo);
+        auto *D = DBuilder.createAutoVariable(
+            Scope, FieldName, Unit, Line, FieldTy, CGM.getLangOpts().Optimize,
+            Flags | llvm::DINode::FlagArtificial);
 
         // Insert an llvm.dbg.declare into the current block.
         DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
@@ -2851,8 +3032,12 @@ void CGDebugInfo::EmitDeclare(const VarDecl *VD, llvm::dwarf::Tag Tag,
 
   // Create the descriptor for the variable.
   auto *D =
-      DBuilder.createLocalVariable(Tag, Scope, Name, Unit, Line, Ty,
-                                   CGM.getLangOpts().Optimize, Flags, ArgNo);
+      ArgNo
+          ? DBuilder.createParameterVariable(Scope, Name, *ArgNo, Unit, Line,
+                                             Ty, CGM.getLangOpts().Optimize,
+                                             Flags)
+          : DBuilder.createAutoVariable(Scope, Name, Unit, Line, Ty,
+                                        CGM.getLangOpts().Optimize, Flags);
 
   // Insert an llvm.dbg.declare into the current block.
   DBuilder.insertDeclare(Storage, D, DBuilder.createExpression(Expr),
@@ -2864,7 +3049,7 @@ void CGDebugInfo::EmitDeclareOfAutoVariable(const VarDecl *VD,
                                             llvm::Value *Storage,
                                             CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
-  EmitDeclare(VD, llvm::dwarf::DW_TAG_auto_variable, Storage, 0, Builder);
+  EmitDeclare(VD, Storage, llvm::None, Builder);
 }
 
 llvm::DIType *CGDebugInfo::CreateSelfType(const QualType &QualTy,
@@ -2929,8 +3114,7 @@ void CGDebugInfo::EmitDeclareOfBlockDeclRefVariable(
   }
 
   // Create the descriptor for the variable.
-  auto *D = DBuilder.createLocalVariable(
-      llvm::dwarf::DW_TAG_auto_variable,
+  auto *D = DBuilder.createAutoVariable(
       cast<llvm::DILocalScope>(LexicalBlockStack.back()), VD->getName(), Unit,
       Line, Ty);
 
@@ -2948,7 +3132,7 @@ void CGDebugInfo::EmitDeclareOfArgVariable(const VarDecl *VD, llvm::Value *AI,
                                            unsigned ArgNo,
                                            CGBuilderTy &Builder) {
   assert(DebugKind >= CodeGenOptions::LimitedDebugInfo);
-  EmitDeclare(VD, llvm::dwarf::DW_TAG_arg_variable, AI, ArgNo, Builder);
+  EmitDeclare(VD, AI, ArgNo, Builder);
 }
 
 namespace {
@@ -2977,7 +3161,7 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
   unsigned column = getColumnNumber(loc);
 
   // Build the debug-info type for the block literal.
-  getContextDescriptor(cast<Decl>(blockDecl->getDeclContext()));
+  getDeclContextDescriptor(blockDecl);
 
   const llvm::StructLayout *blockLayout =
       CGM.getDataLayout().getStructLayout(block.StructureType);
@@ -3090,9 +3274,9 @@ void CGDebugInfo::EmitDeclareOfBlockLiteralArgVariable(const CGBlockInfo &block,
   auto *scope = cast<llvm::DILocalScope>(LexicalBlockStack.back());
 
   // Create the descriptor for the parameter.
-  auto *debugVar = DBuilder.createLocalVariable(
-      llvm::dwarf::DW_TAG_arg_variable, scope, Arg->getName(), tunit, line,
-      type, CGM.getLangOpts().Optimize, flags, ArgNo);
+  auto *debugVar = DBuilder.createParameterVariable(
+      scope, Arg->getName(), ArgNo, tunit, line, type,
+      CGM.getLangOpts().Optimize, flags);
 
   if (LocalAddr) {
     // Insert an llvm.dbg.value into the current block.
@@ -3115,14 +3299,13 @@ CGDebugInfo::getOrCreateStaticDataMemberDeclarationOrNull(const VarDecl *D) {
   auto MI = StaticDataMemberCache.find(D->getCanonicalDecl());
   if (MI != StaticDataMemberCache.end()) {
     assert(MI->second && "Static data member declaration should still exist");
-    return cast<llvm::DIDerivedType>(MI->second);
+    return MI->second;
   }
 
   // If the member wasn't found in the cache, lazily construct and add it to the
   // type (used when a limited form of the type is emitted).
   auto DC = D->getDeclContext();
-  auto *Ctxt =
-      cast<llvm::DICompositeType>(getContextDescriptor(cast<Decl>(DC)));
+  auto *Ctxt = cast<llvm::DICompositeType>(getDeclContextDescriptor(D));
   return CreateRecordStaticField(D, Ctxt, cast<RecordDecl>(DC));
 }
 
@@ -3170,7 +3353,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var,
   // variable for each member of the anonymous union so that it's possible
   // to find the name of any field in the union.
   if (T->isUnionType() && DeclName.empty()) {
-    const RecordDecl *RD = cast<RecordType>(T)->getDecl();
+    const RecordDecl *RD = T->castAs<RecordType>()->getDecl();
     assert(RD->isAnonymousStructOrUnion() &&
            "unnamed non-anonymous struct or union?");
     GV = CollectAnonRecordDecls(RD, Unit, LineNo, LinkageName, Var, DContext);
@@ -3207,15 +3390,14 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
   auto *VarD = cast<VarDecl>(VD);
   if (VarD->isStaticDataMember()) {
     auto *RD = cast<RecordDecl>(VarD->getDeclContext());
-    getContextDescriptor(RD);
+    getDeclContextDescriptor(VarD);
     // Ensure that the type is retained even though it's otherwise unreferenced.
     RetainedTypes.push_back(
         CGM.getContext().getRecordType(RD).getAsOpaquePtr());
     return;
   }
 
-  llvm::DIScope *DContext =
-      getContextDescriptor(dyn_cast<Decl>(VD->getDeclContext()));
+  llvm::DIScope *DContext = getDeclContextDescriptor(VD);
 
   auto &GV = DeclCache[VD];
   if (GV)
@@ -3228,16 +3410,21 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD,
 llvm::DIScope *CGDebugInfo::getCurrentContextDescriptor(const Decl *D) {
   if (!LexicalBlockStack.empty())
     return LexicalBlockStack.back();
-  return getContextDescriptor(D);
+  llvm::DIScope *Mod = getParentModuleOrNull(D);
+  return getContextDescriptor(D, Mod ? Mod : TheCU);
 }
 
 void CGDebugInfo::EmitUsingDirective(const UsingDirectiveDecl &UD) {
   if (CGM.getCodeGenOpts().getDebugInfo() < CodeGenOptions::LimitedDebugInfo)
     return;
-  DBuilder.createImportedModule(
-      getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())),
-      getOrCreateNameSpace(UD.getNominatedNamespace()),
-      getLineNumber(UD.getLocation()));
+  const NamespaceDecl *NSDecl = UD.getNominatedNamespace();
+  if (!NSDecl->isAnonymousNamespace() || 
+      CGM.getCodeGenOpts().DebugExplicitImport) { 
+    DBuilder.createImportedModule(
+        getCurrentContextDescriptor(cast<Decl>(UD.getDeclContext())),
+        getOrCreateNameSpace(NSDecl),
+        getLineNumber(UD.getLocation()));
+  }
 }
 
 void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
@@ -3256,12 +3443,13 @@ void CGDebugInfo::EmitUsingDecl(const UsingDecl &UD) {
 }
 
 void CGDebugInfo::EmitImportDecl(const ImportDecl &ID) {
-  auto *Reader = CGM.getContext().getExternalSource();
-  auto Info = Reader->getSourceDescriptor(*ID.getImportedModule());
-  DBuilder.createImportedDeclaration(
-    getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
-                                getOrCreateModuleRef(Info),
-                                getLineNumber(ID.getLocation()));
+  if (Module *M = ID.getImportedModule()) {
+    auto Info = ExternalASTSource::ASTSourceDescriptor(*M);
+    DBuilder.createImportedDeclaration(
+        getCurrentContextDescriptor(cast<Decl>(ID.getDeclContext())),
+        getOrCreateModuleRef(Info, DebugTypeExtRefs),
+        getLineNumber(ID.getLocation()));
+  }
 }
 
 llvm::DIImportedEntity *
@@ -3297,14 +3485,19 @@ CGDebugInfo::getOrCreateNameSpace(const NamespaceDecl *NSDecl) {
 
   unsigned LineNo = getLineNumber(NSDecl->getLocation());
   llvm::DIFile *FileD = getOrCreateFile(NSDecl->getLocation());
-  llvm::DIScope *Context =
-      getContextDescriptor(dyn_cast<Decl>(NSDecl->getDeclContext()));
+  llvm::DIScope *Context = getDeclContextDescriptor(NSDecl);
   llvm::DINamespace *NS =
       DBuilder.createNameSpace(Context, NSDecl->getName(), FileD, LineNo);
   NameSpaceCache[NSDecl].reset(NS);
   return NS;
 }
 
+void CGDebugInfo::setDwoId(uint64_t Signature) {
+  assert(TheCU && "no main compile unit");
+  TheCU->setDWOId(Signature);
+}
+
+
 void CGDebugInfo::finalize() {
   // Creating types might create further types - invalidating the current
   // element and the size(), so don't cache/reference them.
@@ -3348,9 +3541,9 @@ void CGDebugInfo::finalize() {
 
   // We keep our own list of retained types, because we need to look
   // up the final type in the type cache.
-  for (std::vector<void *>::const_iterator RI = RetainedTypes.begin(),
-         RE = RetainedTypes.end(); RI != RE; ++RI)
-    DBuilder.retainType(cast<llvm::DIType>(TypeCache[*RI]));
+  for (auto &RT : RetainedTypes)
+    if (auto MD = TypeCache[RT])
+      DBuilder.retainType(cast<llvm::DIType>(MD));
 
   DBuilder.finalize();
 }
diff --git a/lib/CodeGen/CGDebugInfo.h b/lib/CodeGen/CGDebugInfo.h
index 82680a84d328..57d5c808f297 100644
--- a/lib/CodeGen/CGDebugInfo.h
+++ b/lib/CodeGen/CGDebugInfo.h
@@ -20,6 +20,7 @@
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/ValueHandle.h"
@@ -31,12 +32,13 @@ class MDNode;
 
 namespace clang {
 class CXXMethodDecl;
-class VarDecl;
-class ObjCInterfaceDecl;
-class ObjCIvarDecl;
 class ClassTemplateSpecializationDecl;
 class GlobalDecl;
+class ModuleMap;
+class ObjCInterfaceDecl;
+class ObjCIvarDecl;
 class UsingDecl;
+class VarDecl;
 
 namespace CodeGen {
 class CodeGenModule;
@@ -51,8 +53,10 @@ class CGDebugInfo {
   friend class SaveAndRestoreLocation;
   CodeGenModule &CGM;
   const CodeGenOptions::DebugInfoKind DebugKind;
+  bool DebugTypeExtRefs;
   llvm::DIBuilder DBuilder;
   llvm::DICompileUnit *TheCU = nullptr;
+  ModuleMap *ClangModuleMap = nullptr;
   SourceLocation CurLoc;
   llvm::DIType *VTablePtrType = nullptr;
   llvm::DIType *ClassTy = nullptr;
@@ -63,12 +67,24 @@ class CGDebugInfo {
   llvm::DIType *OCLImage1dBufferDITy = nullptr;
   llvm::DIType *OCLImage2dDITy = nullptr;
   llvm::DIType *OCLImage2dArrayDITy = nullptr;
+  llvm::DIType *OCLImage2dDepthDITy = nullptr;
+  llvm::DIType *OCLImage2dArrayDepthDITy = nullptr;
+  llvm::DIType *OCLImage2dMSAADITy = nullptr;
+  llvm::DIType *OCLImage2dArrayMSAADITy = nullptr;
+  llvm::DIType *OCLImage2dMSAADepthDITy = nullptr;
+  llvm::DIType *OCLImage2dArrayMSAADepthDITy = nullptr;
   llvm::DIType *OCLImage3dDITy = nullptr;
   llvm::DIType *OCLEventDITy = nullptr;
+  llvm::DIType *OCLClkEventDITy = nullptr;
+  llvm::DIType *OCLQueueDITy = nullptr;
+  llvm::DIType *OCLNDRangeDITy = nullptr;
+  llvm::DIType *OCLReserveIDDITy = nullptr;
 
   /// Cache of previously constructed Types.
   llvm::DenseMap<const void *, llvm::TrackingMDRef> TypeCache;
 
+  llvm::SmallDenseMap<llvm::StringRef, llvm::StringRef> DebugPrefixMap;
+
   struct ObjCInterfaceCacheEntry {
     const ObjCInterfaceType *Type;
     llvm::DIType *Decl;
@@ -81,8 +97,8 @@ class CGDebugInfo {
   /// Cache of previously constructed interfaces which may change.
   llvm::SmallVector<ObjCInterfaceCacheEntry, 32> ObjCInterfaceCache;
 
-  /// Cache of references to AST files such as PCHs or modules.
-  llvm::DenseMap<uint64_t, llvm::DIModule *> ModuleRefCache;
+  /// Cache of references to clang modules and precompiled headers.
+  llvm::DenseMap<const Module *, llvm::TrackingMDRef> ModuleCache;
 
   /// List of interfaces we want to keep even if orphaned.
   std::vector<void *> RetainedTypes;
@@ -117,13 +133,13 @@ class CGDebugInfo {
   llvm::DenseMap<const NamespaceDecl *, llvm::TrackingMDRef> NameSpaceCache;
   llvm::DenseMap<const NamespaceAliasDecl *, llvm::TrackingMDRef>
       NamespaceAliasCache;
-  llvm::DenseMap<const Decl *, llvm::TrackingMDRef> StaticDataMemberCache;
+  llvm::DenseMap<const Decl *, llvm::TypedTrackingMDRef<llvm::DIDerivedType>>
+      StaticDataMemberCache;
 
   /// Helper functions for getOrCreateType.
   /// @{
   /// Currently the checksum of an interface includes the number of
   /// ivars and property accessors.
-  unsigned Checksum(const ObjCInterfaceDecl *InterfaceDecl);
   llvm::DIType *CreateType(const BuiltinType *Ty);
   llvm::DIType *CreateType(const ComplexType *Ty);
   llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
@@ -182,11 +198,8 @@ class CGDebugInfo {
   llvm::DIType *getOrCreateVTablePtrType(llvm::DIFile *F);
   /// \return namespace descriptor for the given namespace decl.
   llvm::DINamespace *getOrCreateNameSpace(const NamespaceDecl *N);
-  llvm::DIType *getOrCreateTypeDeclaration(QualType PointeeTy, llvm::DIFile *F);
   llvm::DIType *CreatePointerLikeType(llvm::dwarf::Tag Tag, const Type *Ty,
                                       QualType PointeeTy, llvm::DIFile *F);
-
-  llvm::Value *getCachedInterfaceTypeOrNull(const QualType Ty);
   llvm::DIType *getOrCreateStructPtrType(StringRef Name, llvm::DIType *&Cache);
 
   /// A helper function to create a subprogram for a single member
@@ -261,6 +274,14 @@ public:
 
   void finalize();
 
+  /// Set the main CU's DwoId field to \p Signature.
+  void setDwoId(uint64_t Signature);
+
+  /// When generating debug information for a clang module or
+  /// precompiled header, this module map will be used to determine
+  /// the module of origin of each Decl.
+  void setModuleMap(ModuleMap &MMap) { ClangModuleMap = &MMap; }
+
   /// Update the current source location. If \arg loc is invalid it is
   /// ignored.
   void setLocation(SourceLocation Loc);
@@ -278,6 +299,9 @@ public:
                          SourceLocation ScopeLoc, QualType FnType,
                          llvm::Function *Fn, CGBuilderTy &Builder);
 
+  /// Emit debug info for a function declaration.
+  void EmitFunctionDecl(GlobalDecl GD, SourceLocation Loc, QualType FnType);
+
   /// Constructs the debug code for exiting a function.
   void EmitFunctionEnd(CGBuilderTy &Builder);
 
@@ -300,7 +324,7 @@ public:
                                          llvm::Value *storage,
                                          CGBuilderTy &Builder,
                                          const CGBlockInfo &blockInfo,
-                                         llvm::Instruction *InsertPoint = 0);
+                                         llvm::Instruction *InsertPoint = nullptr);
 
   /// Emit call to \c llvm.dbg.declare for an argument variable
   /// declaration.
@@ -341,6 +365,9 @@ public:
   /// Emit an Objective-C interface type standalone debug info.
   llvm::DIType *getOrCreateInterfaceType(QualType Ty, SourceLocation Loc);
 
+  /// Emit standalone debug info for a type.
+  llvm::DIType *getOrCreateStandaloneType(QualType Ty, SourceLocation Loc);
+
   void completeType(const EnumDecl *ED);
   void completeType(const RecordDecl *RD);
   void completeRequiredType(const RecordDecl *RD);
@@ -350,17 +377,18 @@ public:
 
 private:
   /// Emit call to llvm.dbg.declare for a variable declaration.
-  /// Tag accepts custom types DW_TAG_arg_variable and DW_TAG_auto_variable,
-  /// otherwise would be of type llvm::dwarf::Tag.
-  void EmitDeclare(const VarDecl *decl, llvm::dwarf::Tag Tag, llvm::Value *AI,
-                   unsigned ArgNo, CGBuilderTy &Builder);
+  void EmitDeclare(const VarDecl *decl, llvm::Value *AI,
+                   llvm::Optional<unsigned> ArgNo, CGBuilderTy &Builder);
 
   /// Build up structure info for the byref.  See \a BuildByRefType.
   llvm::DIType *EmitTypeForVarWithBlocksAttr(const VarDecl *VD,
                                              uint64_t *OffSet);
 
-  /// Get context info for the decl.
-  llvm::DIScope *getContextDescriptor(const Decl *Decl);
+  /// Get context info for the DeclContext of \p Decl.
+  llvm::DIScope *getDeclContextDescriptor(const Decl *D);
+  /// Get context info for a given DeclContext \p Decl.
+  llvm::DIScope *getContextDescriptor(const Decl *Context,
+                                      llvm::DIScope *Default);
 
   llvm::DIScope *getCurrentContextDescriptor(const Decl *Decl);
 
@@ -374,6 +402,9 @@ private:
   /// Create new compile unit.
   void CreateCompileUnit();
 
+  /// Remap a given path with the current debug prefix map
+  std::string remapDIPath(StringRef) const;
+
   /// Get the file debug info descriptor for the input location.
   llvm::DIFile *getOrCreateFile(SourceLocation Loc);
 
@@ -383,21 +414,23 @@ private:
   /// Get the type from the cache or create a new type if necessary.
   llvm::DIType *getOrCreateType(QualType Ty, llvm::DIFile *Fg);
 
-  /// Get a reference to a clang module.
+  /// Get a reference to a clang module.  If \p CreateSkeletonCU is true,
+  /// this also creates a split dwarf skeleton compile unit.
   llvm::DIModule *
-  getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod);
+  getOrCreateModuleRef(ExternalASTSource::ASTSourceDescriptor Mod,
+                       bool CreateSkeletonCU);
+
+  /// DebugTypeExtRefs: If \p D originated in a clang module, return it.
+  llvm::DIModule *getParentModuleOrNull(const Decl *D);
 
   /// Get the type from the cache or create a new partial type if
   /// necessary.
-  llvm::DIType *getOrCreateLimitedType(const RecordType *Ty, llvm::DIFile *F);
+  llvm::DICompositeType *getOrCreateLimitedType(const RecordType *Ty,
+                                                llvm::DIFile *F);
 
   /// Create type metadata for a source language type.
   llvm::DIType *CreateTypeNode(QualType Ty, llvm::DIFile *Fg);
 
-  /// Return the underlying ObjCInterfaceDecl if \arg Ty is an
-  /// ObjCInterface or a pointer to one.
-  ObjCInterfaceDecl *getObjCInterfaceDecl(QualType Ty);
-
   /// Create new member and increase Offset by FType's size.
   llvm::DIType *CreateMemberType(llvm::DIFile *Unit, QualType FType,
                                  StringRef Name, uint64_t *Offset);
@@ -501,13 +534,16 @@ private:
                      SourceLocation TemporaryLocation);
 
   llvm::DebugLoc OriginalLocation;
-  CodeGenFunction &CGF;
+  CodeGenFunction *CGF;
 
 public:
   /// Set the location to the (valid) TemporaryLocation.
   ApplyDebugLocation(CodeGenFunction &CGF, SourceLocation TemporaryLocation);
   ApplyDebugLocation(CodeGenFunction &CGF, const Expr *E);
   ApplyDebugLocation(CodeGenFunction &CGF, llvm::DebugLoc Loc);
+  ApplyDebugLocation(ApplyDebugLocation &&Other) : CGF(Other.CGF) {
+    Other.CGF = nullptr;
+  }
 
   ~ApplyDebugLocation();
 
@@ -538,20 +574,14 @@ public:
   /// passing an empty SourceLocation to \a CGDebugInfo::setLocation()
   /// will result in the last valid location being reused.  Note that
   /// all instructions that do not have a location at the beginning of
-  /// a function are counted towards to funciton prologue.
+  /// a function are counted towards to function prologue.
   static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF) {
     return ApplyDebugLocation(CGF, true, SourceLocation());
   }
 
-  /// \brief Apply TemporaryLocation if it is valid. Otherwise set the IRBuilder
-  /// to not attach debug locations.
-  static ApplyDebugLocation
-  CreateDefaultEmpty(CodeGenFunction &CGF, SourceLocation TemporaryLocation) {
-    return ApplyDebugLocation(CGF, true, TemporaryLocation);
-  }
 };
 
 } // namespace CodeGen
 } // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_CODEGEN_CGDEBUGINFO_H
diff --git a/lib/CodeGen/CGDecl.cpp b/lib/CodeGen/CGDecl.cpp
index 96aa8c68e004..b78e80d79ddd 100644
--- a/lib/CodeGen/CGDecl.cpp
+++ b/lib/CodeGen/CGDecl.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGBlocks.h"
 #include "CGCleanup.h"
 #include "CGDebugInfo.h"
 #include "CGOpenCLRuntime.h"
@@ -34,6 +35,7 @@ using namespace CodeGen;
 
 void CodeGenFunction::EmitDecl(const Decl &D) {
   switch (D.getKind()) {
+  case Decl::BuiltinTemplate:
   case Decl::TranslationUnit:
   case Decl::ExternCContext:
   case Decl::Namespace:
@@ -142,7 +144,7 @@ void CodeGenFunction::EmitVarDecl(const VarDecl &D) {
     // Don't emit it now, allow it to be emitted lazily on its first use.
     return;
 
-  if (D.getStorageClass() == SC_OpenCLWorkGroupLocal)
+  if (D.getType().getAddressSpace() == LangAS::opencl_local)
     return CGM.getOpenCLRuntime().EmitWorkGroupLocalVarDecl(*this, D);
 
   assert(D.hasLocalStorage());
@@ -311,6 +313,7 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
                                   OldGV->getThreadLocalMode(),
                            CGM.getContext().getTargetAddressSpace(D.getType()));
     GV->setVisibility(OldGV->getVisibility());
+    GV->setComdat(OldGV->getComdat());
 
     // Steal the name of the old global
     GV->takeName(OldGV);
@@ -339,17 +342,15 @@ CodeGenFunction::AddInitializerToStaticVarDecl(const VarDecl &D,
 
 void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
                                       llvm::GlobalValue::LinkageTypes Linkage) {
-  llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(!DMEntry && "Decl already exists in localdeclmap!");
-
   // Check to see if we already have a global variable for this
   // declaration.  This can happen when double-emitting function
   // bodies, e.g. with complete and base constructors.
   llvm::Constant *addr = CGM.getOrCreateStaticVarDecl(D, Linkage);
+  CharUnits alignment = getContext().getDeclAlign(&D);
 
   // Store into LocalDeclMap before generating initializer to handle
   // circular references.
-  DMEntry = addr;
+  setAddrOfLocalVar(&D, Address(addr, alignment));
 
   // We can't have a VLA here, but we can have a pointer to a VLA,
   // even though that doesn't really make any sense.
@@ -366,7 +367,7 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   if (D.getInit())
     var = AddInitializerToStaticVarDecl(D, var);
 
-  var->setAlignment(getContext().getDeclAlign(&D).getQuantity());
+  var->setAlignment(alignment.getQuantity());
 
   if (D.hasAttr<AnnotateAttr>())
     CGM.AddGlobalAnnotations(&D, var);
@@ -384,7 +385,8 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
   // RAUW's the GV uses of this constant will be invalid.
   llvm::Constant *castedAddr =
     llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(var, expectedType);
-  DMEntry = castedAddr;
+  if (var != castedAddr)
+    LocalDeclMap.find(&D)->second = Address(castedAddr, alignment);
   CGM.setStaticLocalDeclAddress(&D, castedAddr);
 
   CGM.getSanitizerMetadata()->reportGlobalToASan(var, D);
@@ -399,14 +401,14 @@ void CodeGenFunction::EmitStaticVarDecl(const VarDecl &D,
 }
 
 namespace {
-  struct DestroyObject : EHScopeStack::Cleanup {
-    DestroyObject(llvm::Value *addr, QualType type,
+  struct DestroyObject final : EHScopeStack::Cleanup {
+    DestroyObject(Address addr, QualType type,
                   CodeGenFunction::Destroyer *destroyer,
                   bool useEHCleanupForArray)
       : addr(addr), type(type), destroyer(destroyer),
         useEHCleanupForArray(useEHCleanupForArray) {}
 
-    llvm::Value *addr;
+    Address addr;
     QualType type;
     CodeGenFunction::Destroyer *destroyer;
     bool useEHCleanupForArray;
@@ -420,15 +422,15 @@ namespace {
     }
   };
 
-  struct DestroyNRVOVariable : EHScopeStack::Cleanup {
-    DestroyNRVOVariable(llvm::Value *addr,
+  struct DestroyNRVOVariable final : EHScopeStack::Cleanup {
+    DestroyNRVOVariable(Address addr,
                         const CXXDestructorDecl *Dtor,
                         llvm::Value *NRVOFlag)
       : Dtor(Dtor), NRVOFlag(NRVOFlag), Loc(addr) {}
 
     const CXXDestructorDecl *Dtor;
     llvm::Value *NRVOFlag;
-    llvm::Value *Loc;
+    Address Loc;
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       // Along the exceptions path we always execute the dtor.
@@ -439,7 +441,8 @@ namespace {
         // If we exited via NRVO, we skip the destructor call.
         llvm::BasicBlock *RunDtorBB = CGF.createBasicBlock("nrvo.unused");
         SkipDtorBB = CGF.createBasicBlock("nrvo.skipdtor");
-        llvm::Value *DidNRVO = CGF.Builder.CreateLoad(NRVOFlag, "nrvo.val");
+        llvm::Value *DidNRVO =
+          CGF.Builder.CreateFlagLoad(NRVOFlag, "nrvo.val");
         CGF.Builder.CreateCondBr(DidNRVO, SkipDtorBB, RunDtorBB);
         CGF.EmitBlock(RunDtorBB);
       }
@@ -453,9 +456,9 @@ namespace {
     }
   };
 
-  struct CallStackRestore : EHScopeStack::Cleanup {
-    llvm::Value *Stack;
-    CallStackRestore(llvm::Value *Stack) : Stack(Stack) {}
+  struct CallStackRestore final : EHScopeStack::Cleanup {
+    Address Stack;
+    CallStackRestore(Address Stack) : Stack(Stack) {}
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::Value *V = CGF.Builder.CreateLoad(Stack);
       llvm::Value *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::stackrestore);
@@ -463,7 +466,7 @@ namespace {
     }
   };
 
-  struct ExtendGCLifetime : EHScopeStack::Cleanup {
+  struct ExtendGCLifetime final : EHScopeStack::Cleanup {
     const VarDecl &Var;
     ExtendGCLifetime(const VarDecl *var) : Var(*var) {}
 
@@ -478,7 +481,7 @@ namespace {
     }
   };
 
-  struct CallCleanupFunction : EHScopeStack::Cleanup {
+  struct CallCleanupFunction final : EHScopeStack::Cleanup {
     llvm::Constant *CleanupFn;
     const CGFunctionInfo &FnInfo;
     const VarDecl &Var;
@@ -492,7 +495,7 @@ namespace {
                       Var.getType(), VK_LValue, SourceLocation());
       // Compute the address of the local variable, in case it's a byref
       // or something.
-      llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getAddress();
+      llvm::Value *Addr = CGF.EmitDeclRefLValue(&DRE).getPointer();
 
       // In some cases, the type of the function argument will be different from
       // the type of the pointer. An example of this is
@@ -512,12 +515,12 @@ namespace {
   };
 
   /// A cleanup to call @llvm.lifetime.end.
-  class CallLifetimeEnd : public EHScopeStack::Cleanup {
+  class CallLifetimeEnd final : public EHScopeStack::Cleanup {
     llvm::Value *Addr;
     llvm::Value *Size;
   public:
-    CallLifetimeEnd(llvm::Value *addr, llvm::Value *size)
-      : Addr(addr), Size(size) {}
+    CallLifetimeEnd(Address addr, llvm::Value *size)
+      : Addr(addr.getPointer()), Size(size) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitLifetimeEnd(Size, Addr);
@@ -528,7 +531,7 @@ namespace {
 /// EmitAutoVarWithLifetime - Does the setup required for an automatic
 /// variable with lifetime.
 static void EmitAutoVarWithLifetime(CodeGenFunction &CGF, const VarDecl &var,
-                                    llvm::Value *addr,
+                                    Address addr,
                                     Qualifiers::ObjCLifetime lifetime) {
   switch (lifetime) {
   case Qualifiers::OCL_None:
@@ -595,10 +598,61 @@ static bool isAccessedBy(const ValueDecl *decl, const Expr *e) {
   return isAccessedBy(*var, e);
 }
 
+static bool tryEmitARCCopyWeakInit(CodeGenFunction &CGF,
+                                   const LValue &destLV, const Expr *init) {
+  bool needsCast = false;
+
+  while (auto castExpr = dyn_cast<CastExpr>(init->IgnoreParens())) {
+    switch (castExpr->getCastKind()) {
+    // Look through casts that don't require representation changes.
+    case CK_NoOp:
+    case CK_BitCast:
+    case CK_BlockPointerToObjCPointerCast:
+      needsCast = true;
+      break;
+
+    // If we find an l-value to r-value cast from a __weak variable,
+    // emit this operation as a copy or move.
+    case CK_LValueToRValue: {
+      const Expr *srcExpr = castExpr->getSubExpr();
+      if (srcExpr->getType().getObjCLifetime() != Qualifiers::OCL_Weak)
+        return false;
+
+      // Emit the source l-value.
+      LValue srcLV = CGF.EmitLValue(srcExpr);
+
+      // Handle a formal type change to avoid asserting.
+      auto srcAddr = srcLV.getAddress();
+      if (needsCast) {
+        srcAddr = CGF.Builder.CreateElementBitCast(srcAddr,
+                                         destLV.getAddress().getElementType());
+      }
+
+      // If it was an l-value, use objc_copyWeak.
+      if (srcExpr->getValueKind() == VK_LValue) {
+        CGF.EmitARCCopyWeak(destLV.getAddress(), srcAddr);
+      } else {
+        assert(srcExpr->getValueKind() == VK_XValue);
+        CGF.EmitARCMoveWeak(destLV.getAddress(), srcAddr);
+      }
+      return true;
+    }
+
+    // Stop at anything else.
+    default:
+      return false;
+    }
+
+    init = castExpr->getSubExpr();
+    continue;
+  }
+  return false;
+}
+
 static void drillIntoBlockVariable(CodeGenFunction &CGF,
                                    LValue &lvalue,
                                    const VarDecl *var) {
-  lvalue.setAddress(CGF.BuildBlockByrefAddress(lvalue.getAddress(), var));
+  lvalue.setAddress(CGF.emitBlockByrefAddress(lvalue.getAddress(), var));
 }
 
 void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
@@ -636,15 +690,12 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
     if (capturedByInit) {
       // We can use a simple GEP for this because it can't have been
       // moved yet.
-      tempLV.setAddress(Builder.CreateStructGEP(
-          nullptr, tempLV.getAddress(),
-          getByRefValueLLVMField(cast<VarDecl>(D)).second));
+      tempLV.setAddress(emitBlockByrefAddress(tempLV.getAddress(),
+                                              cast<VarDecl>(D),
+                                              /*follow*/ false));
     }
 
-    llvm::PointerType *ty
-      = cast<llvm::PointerType>(tempLV.getAddress()->getType());
-    ty = cast<llvm::PointerType>(ty->getElementType());
-
+    auto ty = cast<llvm::PointerType>(tempLV.getAddress().getElementType());
     llvm::Value *zero = llvm::ConstantPointerNull::get(ty);
 
     // If __weak, we want to use a barrier under certain conditions.
@@ -674,6 +725,12 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D,
   }
 
   case Qualifiers::OCL_Weak: {
+    // If it's not accessed by the initializer, try to emit the
+    // initialization with a copy or move.
+    if (!accessedByInit && tryEmitARCCopyWeakInit(*this, lvalue, init)) {
+      return;
+    }
+
     // No way to optimize a producing initializer into this.  It's not
     // worth optimizing for, because the value will immediately
     // disappear in the common case.
@@ -788,7 +845,7 @@ static void emitStoresForInitAfterMemset(llvm::Constant *Init, llvm::Value *Loc,
   if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) ||
       isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) ||
       isa<llvm::ConstantExpr>(Init)) {
-    Builder.CreateStore(Init, Loc, isVolatile);
+    Builder.CreateDefaultAlignedStore(Init, Loc, isVolatile);
     return;
   }
   
@@ -891,13 +948,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
   emission.IsByRef = isByRef;
 
   CharUnits alignment = getContext().getDeclAlign(&D);
-  emission.Alignment = alignment;
 
   // If the type is variably-modified, emit all the VLA sizes for it.
   if (Ty->isVariablyModifiedType())
     EmitVariablyModifiedType(Ty);
 
-  llvm::Value *DeclPtr;
+  Address address = Address::invalid();
   if (Ty->isConstantSizeType()) {
     bool NRVO = getLangOpts().ElideConstructors &&
       D.isNRVOVariable();
@@ -923,7 +979,8 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
           CGM.isTypeConstant(Ty, true)) {
         EmitStaticVarDecl(D, llvm::GlobalValue::InternalLinkage);
 
-        emission.Address = nullptr; // signal this condition to later callbacks
+        // Signal this condition to later callbacks.
+        emission.Addr = Address::invalid();
         assert(emission.wasEmittedAsGlobal());
         return emission;
       }
@@ -934,13 +991,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
 
     // A normal fixed sized variable becomes an alloca in the entry block,
     // unless it's an NRVO variable.
-    llvm::Type *LTy = ConvertTypeForMem(Ty);
 
     if (NRVO) {
       // The named return value optimization: allocate this variable in the
       // return slot, so that we can elide the copy when returning this
       // variable (C++0x [class.copy]p34).
-      DeclPtr = ReturnValue;
+      address = ReturnValue;
 
       if (const RecordType *RecordTy = Ty->getAs<RecordType>()) {
         if (!cast<CXXRecordDecl>(RecordTy->getDecl())->hasTrivialDestructor()) {
@@ -948,34 +1004,46 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
           // to this variable. Set it to zero to indicate that NRVO was not
           // applied.
           llvm::Value *Zero = Builder.getFalse();
-          llvm::Value *NRVOFlag = CreateTempAlloca(Zero->getType(), "nrvo");
+          Address NRVOFlag =
+            CreateTempAlloca(Zero->getType(), CharUnits::One(), "nrvo");
           EnsureInsertPoint();
           Builder.CreateStore(Zero, NRVOFlag);
 
           // Record the NRVO flag for this variable.
-          NRVOFlags[&D] = NRVOFlag;
-          emission.NRVOFlag = NRVOFlag;
+          NRVOFlags[&D] = NRVOFlag.getPointer();
+          emission.NRVOFlag = NRVOFlag.getPointer();
         }
       }
     } else {
-      if (isByRef)
-        LTy = BuildByRefType(&D);
+      CharUnits allocaAlignment;
+      llvm::Type *allocaTy;
+      if (isByRef) {
+        auto &byrefInfo = getBlockByrefInfo(&D);
+        allocaTy = byrefInfo.Type;
+        allocaAlignment = byrefInfo.ByrefAlignment;
+      } else {
+        allocaTy = ConvertTypeForMem(Ty);
+        allocaAlignment = alignment;
+      }
 
-      llvm::AllocaInst *Alloc = CreateTempAlloca(LTy);
-      Alloc->setName(D.getName());
+      // Create the alloca.  Note that we set the name separately from
+      // building the instruction so that it's there even in no-asserts
+      // builds.
+      address = CreateTempAlloca(allocaTy, allocaAlignment);
+      address.getPointer()->setName(D.getName());
 
-      CharUnits allocaAlignment = alignment;
-      if (isByRef)
-        allocaAlignment = std::max(allocaAlignment,
-            getContext().toCharUnitsFromBits(getTarget().getPointerAlign(0)));
-      Alloc->setAlignment(allocaAlignment.getQuantity());
-      DeclPtr = Alloc;
+      // Don't emit lifetime markers for MSVC catch parameters. The lifetime of
+      // the catch parameter starts in the catchpad instruction, and we can't
+      // insert code in those basic blocks.
+      bool IsMSCatchParam =
+          D.isExceptionVariable() && getTarget().getCXXABI().isMicrosoft();
 
       // Emit a lifetime intrinsic if meaningful.  There's no point
       // in doing this if we don't have a valid insertion point (?).
-      uint64_t size = CGM.getDataLayout().getTypeAllocSize(LTy);
-      if (HaveInsertPoint()) {
-        emission.SizeForLifetimeMarkers = EmitLifetimeStart(size, Alloc);
+      if (HaveInsertPoint() && !IsMSCatchParam) {
+        uint64_t size = CGM.getDataLayout().getTypeAllocSize(allocaTy);
+        emission.SizeForLifetimeMarkers =
+          EmitLifetimeStart(size, address.getPointer());
       } else {
         assert(!emission.useLifetimeMarkers());
       }
@@ -985,11 +1053,11 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
 
     if (!DidCallStackSave) {
       // Save the stack.
-      llvm::Value *Stack = CreateTempAlloca(Int8PtrTy, "saved_stack");
+      Address Stack =
+        CreateTempAlloca(Int8PtrTy, getPointerAlign(), "saved_stack");
 
       llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::stacksave);
       llvm::Value *V = Builder.CreateCall(F);
-
       Builder.CreateStore(V, Stack);
 
       DidCallStackSave = true;
@@ -1009,13 +1077,11 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
     llvm::AllocaInst *vla = Builder.CreateAlloca(llvmTy, elementCount, "vla");
     vla->setAlignment(alignment.getQuantity());
 
-    DeclPtr = vla;
+    address = Address(vla, alignment);
   }
 
-  llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(!DMEntry && "Decl already exists in localdeclmap!");
-  DMEntry = DeclPtr;
-  emission.Address = DeclPtr;
+  setAddrOfLocalVar(&D, address);
+  emission.Addr = address;
 
   // Emit debug info for local var declaration.
   if (HaveInsertPoint())
@@ -1023,12 +1089,12 @@ CodeGenFunction::EmitAutoVarAlloca(const VarDecl &D) {
       if (CGM.getCodeGenOpts().getDebugInfo()
             >= CodeGenOptions::LimitedDebugInfo) {
         DI->setLocation(D.getLocation());
-        DI->EmitDeclareOfAutoVariable(&D, DeclPtr, Builder);
+        DI->EmitDeclareOfAutoVariable(&D, address.getPointer(), Builder);
       }
     }
 
   if (D.hasAttr<AnnotateAttr>())
-      EmitVarAnnotations(&D, emission.Address);
+    EmitVarAnnotations(&D, address.getPointer());
 
   return emission;
 }
@@ -1124,15 +1190,13 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   if (isTrivialInitializer(Init))
     return;
 
-  CharUnits alignment = emission.Alignment;
-
   // Check whether this is a byref variable that's potentially
   // captured and moved by its own initializer.  If so, we'll need to
   // emit the initializer first, then copy into the variable.
   bool capturedByInit = emission.IsByRef && isCapturedBy(D, Init);
 
-  llvm::Value *Loc =
-    capturedByInit ? emission.Address : emission.getObjectAddress(*this);
+  Address Loc =
+    capturedByInit ? emission.Addr : emission.getObjectAddress(*this);
 
   llvm::Constant *constant = nullptr;
   if (emission.IsConstantAggregate || D.isConstexpr()) {
@@ -1141,14 +1205,14 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   }
 
   if (!constant) {
-    LValue lv = MakeAddrLValue(Loc, type, alignment);
+    LValue lv = MakeAddrLValue(Loc, type);
     lv.setNonGC(true);
     return EmitExprAsInit(Init, &D, lv, capturedByInit);
   }
 
   if (!emission.IsConstantAggregate) {
     // For simple scalar/complex initialization, store the value directly.
-    LValue lv = MakeAddrLValue(Loc, type, alignment);
+    LValue lv = MakeAddrLValue(Loc, type);
     lv.setNonGC(true);
     return EmitStoreThroughLValue(RValue::get(constant), lv, true);
   }
@@ -1162,7 +1226,7 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
                            getContext().getTypeSizeInChars(type).getQuantity());
 
   llvm::Type *BP = Int8PtrTy;
-  if (Loc->getType() != BP)
+  if (Loc.getType() != BP)
     Loc = Builder.CreateBitCast(Loc, BP);
 
   // If the initializer is all or mostly zeros, codegen with memset then do
@@ -1170,11 +1234,12 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
   if (shouldUseMemSetPlusStoresToInitialize(constant,
                 CGM.getDataLayout().getTypeAllocSize(constant->getType()))) {
     Builder.CreateMemSet(Loc, llvm::ConstantInt::get(Int8Ty, 0), SizeVal,
-                         alignment.getQuantity(), isVolatile);
+                         isVolatile);
     // Zero and undef don't require a stores.
     if (!constant->isNullValue() && !isa<llvm::UndefValue>(constant)) {
       Loc = Builder.CreateBitCast(Loc, constant->getType()->getPointerTo());
-      emitStoresForInitAfterMemset(constant, Loc, isVolatile, Builder);
+      emitStoresForInitAfterMemset(constant, Loc.getPointer(),
+                                   isVolatile, Builder);
     }
   } else {
     // Otherwise, create a temporary global with the initializer then
@@ -1184,15 +1249,14 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) {
       new llvm::GlobalVariable(CGM.getModule(), constant->getType(), true,
                                llvm::GlobalValue::PrivateLinkage,
                                constant, Name);
-    GV->setAlignment(alignment.getQuantity());
+    GV->setAlignment(Loc.getAlignment().getQuantity());
     GV->setUnnamedAddr(true);
 
-    llvm::Value *SrcPtr = GV;
-    if (SrcPtr->getType() != BP)
+    Address SrcPtr = Address(GV, Loc.getAlignment());
+    if (SrcPtr.getType() != BP)
       SrcPtr = Builder.CreateBitCast(SrcPtr, BP);
 
-    Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, alignment.getQuantity(),
-                         isVolatile);
+    Builder.CreateMemCpy(Loc, SrcPtr, SizeVal, isVolatile);
   }
 }
 
@@ -1253,7 +1317,7 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
 
   // Note that for __block variables, we want to destroy the
   // original stack object, not the possibly forwarded object.
-  llvm::Value *addr = emission.getObjectAddress(*this);
+  Address addr = emission.getObjectAddress(*this);
 
   const VarDecl *var = emission.Variable;
   QualType type = var->getType();
@@ -1271,8 +1335,8 @@ void CodeGenFunction::emitAutoVarTypeCleanup(
     if (emission.NRVOFlag) {
       assert(!type->isArrayType());
       CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor();
-      EHStack.pushCleanup<DestroyNRVOVariable>(cleanupKind, addr, dtor,
-                                               emission.NRVOFlag);
+      EHStack.pushCleanup<DestroyNRVOVariable>(cleanupKind, addr,
+                                               dtor, emission.NRVOFlag);
       return;
     }
     break;
@@ -1369,7 +1433,7 @@ CodeGenFunction::getDestroyer(QualType::DestructionKind kind) {
 /// pushEHDestroy - Push the standard destructor for the given type as
 /// an EH-only cleanup.
 void CodeGenFunction::pushEHDestroy(QualType::DestructionKind dtorKind,
-                                  llvm::Value *addr, QualType type) {
+                                    Address addr, QualType type) {
   assert(dtorKind && "cannot push destructor for trivial type");
   assert(needsEHCleanup(dtorKind));
 
@@ -1379,7 +1443,7 @@ void CodeGenFunction::pushEHDestroy(QualType::DestructionKind dtorKind,
 /// pushDestroy - Push the standard destructor for the given type as
 /// at least a normal cleanup.
 void CodeGenFunction::pushDestroy(QualType::DestructionKind dtorKind,
-                                  llvm::Value *addr, QualType type) {
+                                  Address addr, QualType type) {
   assert(dtorKind && "cannot push destructor for trivial type");
 
   CleanupKind cleanupKind = getCleanupKind(dtorKind);
@@ -1387,19 +1451,19 @@ void CodeGenFunction::pushDestroy(QualType::DestructionKind dtorKind,
               cleanupKind & EHCleanup);
 }
 
-void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, llvm::Value *addr,
+void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr,
                                   QualType type, Destroyer *destroyer,
                                   bool useEHCleanupForArray) {
   pushFullExprCleanup<DestroyObject>(cleanupKind, addr, type,
                                      destroyer, useEHCleanupForArray);
 }
 
-void CodeGenFunction::pushStackRestore(CleanupKind Kind, llvm::Value *SPMem) {
+void CodeGenFunction::pushStackRestore(CleanupKind Kind, Address SPMem) {
   EHStack.pushCleanup<CallStackRestore>(Kind, SPMem);
 }
 
 void CodeGenFunction::pushLifetimeExtendedDestroy(
-    CleanupKind cleanupKind, llvm::Value *addr, QualType type,
+    CleanupKind cleanupKind, Address addr, QualType type,
     Destroyer *destroyer, bool useEHCleanupForArray) {
   assert(!isInConditionalBranch() &&
          "performing lifetime extension from within conditional");
@@ -1429,15 +1493,18 @@ void CodeGenFunction::pushLifetimeExtendedDestroy(
 /// \param useEHCleanupForArray - whether an EH cleanup should be
 ///   used when destroying array elements, in case one of the
 ///   destructions throws an exception
-void CodeGenFunction::emitDestroy(llvm::Value *addr, QualType type,
+void CodeGenFunction::emitDestroy(Address addr, QualType type,
                                   Destroyer *destroyer,
                                   bool useEHCleanupForArray) {
   const ArrayType *arrayType = getContext().getAsArrayType(type);
   if (!arrayType)
     return destroyer(*this, addr, type);
 
-  llvm::Value *begin = addr;
-  llvm::Value *length = emitArrayLength(arrayType, type, begin);
+  llvm::Value *length = emitArrayLength(arrayType, type, addr);
+
+  CharUnits elementAlign =
+    addr.getAlignment()
+        .alignmentOfArrayElement(getContext().getTypeSizeInChars(type));
 
   // Normally we have to check whether the array is zero-length.
   bool checkZeroLength = true;
@@ -1449,8 +1516,9 @@ void CodeGenFunction::emitDestroy(llvm::Value *addr, QualType type,
     checkZeroLength = false;
   }
 
+  llvm::Value *begin = addr.getPointer();
   llvm::Value *end = Builder.CreateInBoundsGEP(begin, length);
-  emitArrayDestroy(begin, end, type, destroyer,
+  emitArrayDestroy(begin, end, type, elementAlign, destroyer,
                    checkZeroLength, useEHCleanupForArray);
 }
 
@@ -1459,18 +1527,19 @@ void CodeGenFunction::emitDestroy(llvm::Value *addr, QualType type,
 ///
 /// \param begin - a type* denoting the first element of the array
 /// \param end - a type* denoting one past the end of the array
-/// \param type - the element type of the array
+/// \param elementType - the element type of the array
 /// \param destroyer - the function to call to destroy elements
 /// \param useEHCleanup - whether to push an EH cleanup to destroy
 ///   the remaining elements in case the destruction of a single
 ///   element throws
 void CodeGenFunction::emitArrayDestroy(llvm::Value *begin,
                                        llvm::Value *end,
-                                       QualType type,
+                                       QualType elementType,
+                                       CharUnits elementAlign,
                                        Destroyer *destroyer,
                                        bool checkZeroLength,
                                        bool useEHCleanup) {
-  assert(!type->isArrayType());
+  assert(!elementType->isArrayType());
 
   // The basic structure here is a do-while loop, because we don't
   // need to check for the zero-element case.
@@ -1496,10 +1565,11 @@ void CodeGenFunction::emitArrayDestroy(llvm::Value *begin,
                                                    "arraydestroy.element");
 
   if (useEHCleanup)
-    pushRegularPartialArrayCleanup(begin, element, type, destroyer);
+    pushRegularPartialArrayCleanup(begin, element, elementType, elementAlign,
+                                   destroyer);
 
   // Perform the actual destruction there.
-  destroyer(*this, element, type);
+  destroyer(*this, Address(element, elementAlign), elementType);
 
   if (useEHCleanup)
     PopCleanupBlock();
@@ -1517,7 +1587,7 @@ void CodeGenFunction::emitArrayDestroy(llvm::Value *begin,
 /// emitArrayDestroy, the element type here may still be an array type.
 static void emitPartialArrayDestroy(CodeGenFunction &CGF,
                                     llvm::Value *begin, llvm::Value *end,
-                                    QualType type,
+                                    QualType type, CharUnits elementAlign,
                                     CodeGenFunction::Destroyer *destroyer) {
   // If the element type is itself an array, drill down.
   unsigned arrayDepth = 0;
@@ -1529,9 +1599,9 @@ static void emitPartialArrayDestroy(CodeGenFunction &CGF,
   }
 
   if (arrayDepth) {
-    llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, arrayDepth+1);
+    llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
 
-    SmallVector<llvm::Value*,4> gepIndices(arrayDepth, zero);
+    SmallVector<llvm::Value*,4> gepIndices(arrayDepth+1, zero);
     begin = CGF.Builder.CreateInBoundsGEP(begin, gepIndices, "pad.arraybegin");
     end = CGF.Builder.CreateInBoundsGEP(end, gepIndices, "pad.arrayend");
   }
@@ -1539,7 +1609,7 @@ static void emitPartialArrayDestroy(CodeGenFunction &CGF,
   // Destroy the array.  We don't ever need an EH cleanup because we
   // assume that we're in an EH cleanup ourselves, so a throwing
   // destructor causes an immediate terminate.
-  CGF.emitArrayDestroy(begin, end, type, destroyer,
+  CGF.emitArrayDestroy(begin, end, type, elementAlign, destroyer,
                        /*checkZeroLength*/ true, /*useEHCleanup*/ false);
 }
 
@@ -1547,44 +1617,49 @@ namespace {
   /// RegularPartialArrayDestroy - a cleanup which performs a partial
   /// array destroy where the end pointer is regularly determined and
   /// does not need to be loaded from a local.
-  class RegularPartialArrayDestroy : public EHScopeStack::Cleanup {
+  class RegularPartialArrayDestroy final : public EHScopeStack::Cleanup {
     llvm::Value *ArrayBegin;
     llvm::Value *ArrayEnd;
     QualType ElementType;
     CodeGenFunction::Destroyer *Destroyer;
+    CharUnits ElementAlign;
   public:
     RegularPartialArrayDestroy(llvm::Value *arrayBegin, llvm::Value *arrayEnd,
-                               QualType elementType,
+                               QualType elementType, CharUnits elementAlign,
                                CodeGenFunction::Destroyer *destroyer)
       : ArrayBegin(arrayBegin), ArrayEnd(arrayEnd),
-        ElementType(elementType), Destroyer(destroyer) {}
+        ElementType(elementType), Destroyer(destroyer),
+        ElementAlign(elementAlign) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       emitPartialArrayDestroy(CGF, ArrayBegin, ArrayEnd,
-                              ElementType, Destroyer);
+                              ElementType, ElementAlign, Destroyer);
     }
   };
 
   /// IrregularPartialArrayDestroy - a cleanup which performs a
   /// partial array destroy where the end pointer is irregularly
   /// determined and must be loaded from a local.
-  class IrregularPartialArrayDestroy : public EHScopeStack::Cleanup {
+  class IrregularPartialArrayDestroy final : public EHScopeStack::Cleanup {
     llvm::Value *ArrayBegin;
-    llvm::Value *ArrayEndPointer;
+    Address ArrayEndPointer;
     QualType ElementType;
     CodeGenFunction::Destroyer *Destroyer;
+    CharUnits ElementAlign;
   public:
     IrregularPartialArrayDestroy(llvm::Value *arrayBegin,
-                                 llvm::Value *arrayEndPointer,
+                                 Address arrayEndPointer,
                                  QualType elementType,
+                                 CharUnits elementAlign,
                                  CodeGenFunction::Destroyer *destroyer)
       : ArrayBegin(arrayBegin), ArrayEndPointer(arrayEndPointer),
-        ElementType(elementType), Destroyer(destroyer) {}
+        ElementType(elementType), Destroyer(destroyer),
+        ElementAlign(elementAlign) {}
 
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       llvm::Value *arrayEnd = CGF.Builder.CreateLoad(ArrayEndPointer);
       emitPartialArrayDestroy(CGF, ArrayBegin, arrayEnd,
-                              ElementType, Destroyer);
+                              ElementType, ElementAlign, Destroyer);
     }
   };
 }
@@ -1596,12 +1671,14 @@ namespace {
 /// \param elementType - the immediate element type of the array;
 ///   possibly still an array type
 void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
-                                                 llvm::Value *arrayEndPointer,
+                                                       Address arrayEndPointer,
                                                        QualType elementType,
+                                                       CharUnits elementAlign,
                                                        Destroyer *destroyer) {
   pushFullExprCleanup<IrregularPartialArrayDestroy>(EHCleanup,
                                                     arrayBegin, arrayEndPointer,
-                                                    elementType, destroyer);
+                                                    elementType, elementAlign,
+                                                    destroyer);
 }
 
 /// pushRegularPartialArrayCleanup - Push an EH cleanup to destroy
@@ -1613,10 +1690,12 @@ void CodeGenFunction::pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
 void CodeGenFunction::pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
                                                      llvm::Value *arrayEnd,
                                                      QualType elementType,
+                                                     CharUnits elementAlign,
                                                      Destroyer *destroyer) {
   pushFullExprCleanup<RegularPartialArrayDestroy>(EHCleanup,
                                                   arrayBegin, arrayEnd,
-                                                  elementType, destroyer);
+                                                  elementType, elementAlign,
+                                                  destroyer);
 }
 
 /// Lazily declare the @llvm.lifetime.start intrinsic.
@@ -1640,7 +1719,7 @@ namespace {
   /// function.  This is used to balance out the incoming +1 of a
   /// ns_consumed argument when we can't reasonably do that just by
   /// not doing the initial retain for a __block argument.
-  struct ConsumeARCParameter : EHScopeStack::Cleanup {
+  struct ConsumeARCParameter final : EHScopeStack::Cleanup {
     ConsumeARCParameter(llvm::Value *param,
                         ARCPreciseLifetime_t precise)
       : Param(param), Precise(precise) {}
@@ -1656,56 +1735,38 @@ namespace {
 
 /// Emit an alloca (or GlobalValue depending on target)
 /// for the specified parameter and set up LocalDeclMap.
-void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
-                                   bool ArgIsPointer, unsigned ArgNo) {
+void CodeGenFunction::EmitParmDecl(const VarDecl &D, ParamValue Arg,
+                                   unsigned ArgNo) {
   // FIXME: Why isn't ImplicitParamDecl a ParmVarDecl?
   assert((isa<ParmVarDecl>(D) || isa<ImplicitParamDecl>(D)) &&
          "Invalid argument to EmitParmDecl");
 
-  Arg->setName(D.getName());
+  Arg.getAnyValue()->setName(D.getName());
 
   QualType Ty = D.getType();
 
   // Use better IR generation for certain implicit parameters.
-  if (isa<ImplicitParamDecl>(D)) {
+  if (auto IPD = dyn_cast<ImplicitParamDecl>(&D)) {
     // The only implicit argument a block has is its literal.
+    // We assume this is always passed directly.
     if (BlockInfo) {
-      LocalDeclMap[&D] = Arg;
-      llvm::Value *LocalAddr = nullptr;
-      if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-        // Allocate a stack slot to let the debug info survive the RA.
-        llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
-                                                   D.getName() + ".addr");
-        Alloc->setAlignment(getContext().getDeclAlign(&D).getQuantity());
-        LValue lv = MakeAddrLValue(Alloc, Ty, getContext().getDeclAlign(&D));
-        EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
-        LocalAddr = Builder.CreateLoad(Alloc);
-      }
-
-      if (CGDebugInfo *DI = getDebugInfo()) {
-        if (CGM.getCodeGenOpts().getDebugInfo()
-              >= CodeGenOptions::LimitedDebugInfo) {
-          DI->setLocation(D.getLocation());
-          DI->EmitDeclareOfBlockLiteralArgVariable(*BlockInfo, Arg, ArgNo,
-                                                   LocalAddr, Builder);
-        }
-      }
-
+      setBlockContextParameter(IPD, ArgNo, Arg.getDirectValue());
       return;
     }
   }
 
-  llvm::Value *DeclPtr;
+  Address DeclPtr = Address::invalid();
   bool DoStore = false;
   bool IsScalar = hasScalarEvaluationKind(Ty);
-  CharUnits Align = getContext().getDeclAlign(&D);
   // If we already have a pointer to the argument, reuse the input pointer.
-  if (ArgIsPointer) {
+  if (Arg.isIndirect()) {
+    DeclPtr = Arg.getIndirectAddress();
     // If we have a prettier pointer type at this point, bitcast to that.
-    unsigned AS = cast<llvm::PointerType>(Arg->getType())->getAddressSpace();
+    unsigned AS = DeclPtr.getType()->getAddressSpace();
     llvm::Type *IRTy = ConvertTypeForMem(Ty)->getPointerTo(AS);
-    DeclPtr = Arg->getType() == IRTy ? Arg : Builder.CreateBitCast(Arg, IRTy,
-                                                                   D.getName());
+    if (DeclPtr.getType() != IRTy)
+      DeclPtr = Builder.CreateBitCast(DeclPtr, IRTy, D.getName());
+
     // Push a destructor cleanup for this parameter if the ABI requires it.
     // Don't push a cleanup in a thunk for a method that will also emit a
     // cleanup.
@@ -1717,14 +1778,14 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
     }
   } else {
     // Otherwise, create a temporary to hold the value.
-    llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty),
-                                               D.getName() + ".addr");
-    Alloc->setAlignment(Align.getQuantity());
-    DeclPtr = Alloc;
+    DeclPtr = CreateMemTemp(Ty, getContext().getDeclAlign(&D),
+                            D.getName() + ".addr");
     DoStore = true;
   }
 
-  LValue lv = MakeAddrLValue(DeclPtr, Ty, Align);
+  llvm::Value *ArgVal = (DoStore ? Arg.getDirectValue() : nullptr);
+
+  LValue lv = MakeAddrLValue(DeclPtr, Ty);
   if (IsScalar) {
     Qualifiers qs = Ty.getQualifiers();
     if (Qualifiers::ObjCLifetime lt = qs.getObjCLifetime()) {
@@ -1754,26 +1815,26 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
             // objc_storeStrong attempts to release its old value.
             llvm::Value *Null = CGM.EmitNullConstant(D.getType());
             EmitStoreOfScalar(Null, lv, /* isInitialization */ true);
-            EmitARCStoreStrongCall(lv.getAddress(), Arg, true);
+            EmitARCStoreStrongCall(lv.getAddress(), ArgVal, true);
             DoStore = false;
           }
           else
           // Don't use objc_retainBlock for block pointers, because we
           // don't want to Block_copy something just because we got it
           // as a parameter.
-            Arg = EmitARCRetainNonBlock(Arg);
+            ArgVal = EmitARCRetainNonBlock(ArgVal);
         }
       } else {
         // Push the cleanup for a consumed parameter.
         if (isConsumed) {
           ARCPreciseLifetime_t precise = (D.hasAttr<ObjCPreciseLifetimeAttr>()
                                 ? ARCPreciseLifetime : ARCImpreciseLifetime);
-          EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), Arg,
+          EHStack.pushCleanup<ConsumeARCParameter>(getARCCleanupKind(), ArgVal,
                                                    precise);
         }
 
         if (lt == Qualifiers::OCL_Weak) {
-          EmitARCInitWeak(DeclPtr, Arg);
+          EmitARCInitWeak(DeclPtr, ArgVal);
           DoStore = false; // The weak init is a store, no need to do two.
         }
       }
@@ -1785,20 +1846,18 @@ void CodeGenFunction::EmitParmDecl(const VarDecl &D, llvm::Value *Arg,
 
   // Store the initial value into the alloca.
   if (DoStore)
-    EmitStoreOfScalar(Arg, lv, /* isInitialization */ true);
+    EmitStoreOfScalar(ArgVal, lv, /* isInitialization */ true);
 
-  llvm::Value *&DMEntry = LocalDeclMap[&D];
-  assert(!DMEntry && "Decl already exists in localdeclmap!");
-  DMEntry = DeclPtr;
+  setAddrOfLocalVar(&D, DeclPtr);
 
   // Emit debug info for param declaration.
   if (CGDebugInfo *DI = getDebugInfo()) {
     if (CGM.getCodeGenOpts().getDebugInfo()
           >= CodeGenOptions::LimitedDebugInfo) {
-      DI->EmitDeclareOfArgVariable(&D, DeclPtr, ArgNo, Builder);
+      DI->EmitDeclareOfArgVariable(&D, DeclPtr.getPointer(), ArgNo, Builder);
     }
   }
 
   if (D.hasAttr<AnnotateAttr>())
-      EmitVarAnnotations(&D, DeclPtr);
+    EmitVarAnnotations(&D, DeclPtr.getPointer());
 }
diff --git a/lib/CodeGen/CGDeclCXX.cpp b/lib/CodeGen/CGDeclCXX.cpp
index 00d6d5cee749..adba73168797 100644
--- a/lib/CodeGen/CGDeclCXX.cpp
+++ b/lib/CodeGen/CGDeclCXX.cpp
@@ -24,16 +24,13 @@ using namespace clang;
 using namespace CodeGen;
 
 static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
-                         llvm::Constant *DeclPtr) {
+                         ConstantAddress DeclPtr) {
   assert(D.hasGlobalStorage() && "VarDecl must have global storage!");
   assert(!D.getType()->isReferenceType() && 
          "Should not call EmitDeclInit on a reference!");
   
-  ASTContext &Context = CGF.getContext();
-
-  CharUnits alignment = Context.getDeclAlign(&D);
   QualType type = D.getType();
-  LValue lv = CGF.MakeAddrLValue(DeclPtr, type, alignment);
+  LValue lv = CGF.MakeAddrLValue(DeclPtr, type);
 
   const Expr *Init = D.getInit();
   switch (CGF.getEvaluationKind(type)) {
@@ -64,7 +61,7 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D,
 /// Emit code to cause the destruction of the given variable with
 /// static storage duration.
 static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
-                            llvm::Constant *addr) {
+                            ConstantAddress addr) {
   CodeGenModule &CGM = CGF.CGM;
 
   // FIXME:  __attribute__((cleanup)) ?
@@ -99,7 +96,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D,
 
     function = CGM.getAddrOfCXXStructor(dtor, StructorType::Complete);
     argument = llvm::ConstantExpr::getBitCast(
-        addr, CGF.getTypes().ConvertType(type)->getPointerTo());
+        addr.getPointer(), CGF.getTypes().ConvertType(type)->getPointerTo());
 
   // Otherwise, the standard logic requires a helper function.
   } else {
@@ -162,25 +159,26 @@ void CodeGenFunction::EmitCXXGlobalVarDeclInit(const VarDecl &D,
     DeclPtr = llvm::ConstantExpr::getAddrSpaceCast(DeclPtr, PTy);
   }
 
+  ConstantAddress DeclAddr(DeclPtr, getContext().getDeclAlign(&D));
+
   if (!T->isReferenceType()) {
     if (getLangOpts().OpenMP && D.hasAttr<OMPThreadPrivateDeclAttr>())
       (void)CGM.getOpenMPRuntime().emitThreadPrivateVarDefinition(
-          &D, DeclPtr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
+          &D, DeclAddr, D.getAttr<OMPThreadPrivateDeclAttr>()->getLocation(),
           PerformInit, this);
     if (PerformInit)
-      EmitDeclInit(*this, D, DeclPtr);
+      EmitDeclInit(*this, D, DeclAddr);
     if (CGM.isTypeConstant(D.getType(), true))
       EmitDeclInvariant(*this, D, DeclPtr);
     else
-      EmitDeclDestroy(*this, D, DeclPtr);
+      EmitDeclDestroy(*this, D, DeclAddr);
     return;
   }
 
   assert(PerformInit && "cannot have constant initializer which needs "
          "destruction for reference");
-  unsigned Alignment = getContext().getDeclAlign(&D).getQuantity();
   RValue RV = EmitReferenceBindingToExpr(Init);
-  EmitStoreOfScalar(RV.getScalarVal(), DeclPtr, false, Alignment, T);
+  EmitStoreOfScalar(RV.getScalarVal(), DeclAddr, false, T);
 }
 
 /// Create a stub function, suitable for being passed to atexit,
@@ -195,13 +193,15 @@ llvm::Constant *CodeGenFunction::createAtExitStub(const VarDecl &VD,
     llvm::raw_svector_ostream Out(FnName);
     CGM.getCXXABI().getMangleContext().mangleDynamicAtExitDestructor(&VD, Out);
   }
+
+  const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
   llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(ty, FnName.str(),
+                                                              FI,
                                                               VD.getLocation());
 
   CodeGenFunction CGF(CGM);
 
-  CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn,
-                    CGM.getTypes().arrangeNullaryFunction(), FunctionArgList());
+  CGF.StartFunction(&VD, CGM.getContext().VoidTy, fn, FI, FunctionArgList());
 
   llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
  
@@ -249,7 +249,8 @@ void CodeGenFunction::EmitCXXGuardedInit(const VarDecl &D,
 }
 
 llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
-    llvm::FunctionType *FTy, const Twine &Name, SourceLocation Loc, bool TLS) {
+    llvm::FunctionType *FTy, const Twine &Name, const CGFunctionInfo &FI,
+    SourceLocation Loc, bool TLS) {
   llvm::Function *Fn =
     llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage,
                            Name, &getModule());
@@ -259,7 +260,7 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction(
       Fn->setSection(Section);
   }
 
-  SetLLVMFunctionAttributes(nullptr, getTypes().arrangeNullaryFunction(), Fn);
+  SetInternalFunctionAttributes(nullptr, Fn, FI);
 
   Fn->setCallingConv(getRuntimeCC());
 
@@ -317,7 +318,9 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
 
   // Create a variable initialization function.
   llvm::Function *Fn =
-      CreateGlobalInitOrDestructFunction(FTy, FnName.str(), D->getLocation());
+      CreateGlobalInitOrDestructFunction(FTy, FnName.str(),
+                                         getTypes().arrangeNullaryFunction(),
+                                         D->getLocation());
 
   auto *ISA = D->getAttr<InitSegAttr>();
   CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr,
@@ -334,7 +337,7 @@ CodeGenModule::EmitCXXGlobalVarDeclInitFunc(const VarDecl *D,
     // FIXME: We only need to register one __cxa_thread_atexit function for the
     // entire TU.
     CXXThreadLocalInits.push_back(Fn);
-    CXXThreadLocalInitVars.push_back(Addr);
+    CXXThreadLocalInitVars.push_back(D);
   } else if (PerformInit && ISA) {
     EmitPointerToInitFunc(D, Addr, Fn, ISA);
   } else if (auto *IPA = D->getAttr<InitPriorityAttr>()) {
@@ -392,7 +395,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
     return;
 
   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
-
+  const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction();
 
   // Create our global initialization function.
   if (!PrioritizedCXXGlobalInits.empty()) {
@@ -416,7 +419,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
       // Priority is always <= 65535 (enforced by sema).
       PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix;
       llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
-          FTy, "_GLOBAL__I_" + PrioritySuffix);
+          FTy, "_GLOBAL__I_" + PrioritySuffix, FI);
 
       for (; I < PrioE; ++I)
         LocalCXXGlobalInits.push_back(I->second);
@@ -446,7 +449,7 @@ CodeGenModule::EmitCXXGlobalInitFunc() {
   }
 
   llvm::Function *Fn = CreateGlobalInitOrDestructFunction(
-      FTy, llvm::Twine("_GLOBAL__sub_I_", FileName));
+      FTy, llvm::Twine("_GLOBAL__sub_I_", FileName), FI);
 
   CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits);
   AddGlobalCtor(Fn);
@@ -461,7 +464,9 @@ void CodeGenModule::EmitCXXGlobalDtorFunc() {
   llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
 
   // Create our global destructor function.
-  llvm::Function *Fn = CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__D_a");
+  const CGFunctionInfo &FI = getTypes().arrangeNullaryFunction();
+  llvm::Function *Fn =
+      CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__D_a", FI);
 
   CodeGenFunction(*this).GenerateCXXGlobalDtorsFunc(Fn, CXXGlobalDtors);
   AddGlobalDtor(Fn);
@@ -498,7 +503,7 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
 void
 CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                            ArrayRef<llvm::Function *> Decls,
-                                           llvm::GlobalVariable *Guard) {
+                                           Address Guard) {
   {
     auto NL = ApplyDebugLocation::CreateEmpty(*this);
     StartFunction(GlobalDecl(), getContext().VoidTy, Fn,
@@ -507,20 +512,20 @@ CodeGenFunction::GenerateCXXGlobalInitFunc(llvm::Function *Fn,
     auto AL = ApplyDebugLocation::CreateArtificial(*this);
 
     llvm::BasicBlock *ExitBlock = nullptr;
-    if (Guard) {
+    if (Guard.isValid()) {
       // If we have a guard variable, check whether we've already performed
       // these initializations. This happens for TLS initialization functions.
       llvm::Value *GuardVal = Builder.CreateLoad(Guard);
       llvm::Value *Uninit = Builder.CreateIsNull(GuardVal,
                                                  "guard.uninitialized");
-      // Mark as initialized before initializing anything else. If the
-      // initializers use previously-initialized thread_local vars, that's
-      // probably supposed to be OK, but the standard doesn't say.
-      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
       llvm::BasicBlock *InitBlock = createBasicBlock("init");
       ExitBlock = createBasicBlock("exit");
       Builder.CreateCondBr(Uninit, InitBlock, ExitBlock);
       EmitBlock(InitBlock);
+      // Mark as initialized before initializing anything else. If the
+      // initializers use previously-initialized thread_local vars, that's
+      // probably supposed to be OK, but the standard doesn't say.
+      Builder.CreateStore(llvm::ConstantInt::get(GuardVal->getType(),1), Guard);
     }
 
     RunCleanupsScope Scope(*this);
@@ -572,9 +577,10 @@ void CodeGenFunction::GenerateCXXGlobalDtorsFunc(llvm::Function *Fn,
 }
 
 /// generateDestroyHelper - Generates a helper function which, when
-/// invoked, destroys the given object.
+/// invoked, destroys the given object.  The address of the object
+/// should be in global memory.
 llvm::Function *CodeGenFunction::generateDestroyHelper(
-    llvm::Constant *addr, QualType type, Destroyer *destroyer,
+    Address addr, QualType type, Destroyer *destroyer,
     bool useEHCleanupForArray, const VarDecl *VD) {
   FunctionArgList args;
   ImplicitParamDecl dst(getContext(), nullptr, SourceLocation(), nullptr,
@@ -585,7 +591,7 @@ llvm::Function *CodeGenFunction::generateDestroyHelper(
       getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false);
   llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
   llvm::Function *fn = CGM.CreateGlobalInitOrDestructFunction(
-      FTy, "__cxx_global_array_dtor", VD->getLocation());
+      FTy, "__cxx_global_array_dtor", FI, VD->getLocation());
 
   CurEHLocation = VD->getLocStart();
 
diff --git a/lib/CodeGen/CGException.cpp b/lib/CodeGen/CGException.cpp
index 7b8368ee2b32..fce2e7581962 100644
--- a/lib/CodeGen/CGException.cpp
+++ b/lib/CodeGen/CGException.cpp
@@ -1,4 +1,4 @@
-//===--- CGException.cpp - Emit LLVM Code for C++ exceptions --------------===//
+//===--- CGException.cpp - Emit LLVM Code for C++ exceptions ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -81,38 +81,6 @@ static llvm::Constant *getCatchallRethrowFn(CodeGenModule &CGM,
   return CGM.CreateRuntimeFunction(FTy, Name);
 }
 
-namespace {
-  /// The exceptions personality for a function.
-  struct EHPersonality {
-    const char *PersonalityFn;
-
-    // If this is non-null, this personality requires a non-standard
-    // function for rethrowing an exception after a catchall cleanup.
-    // This function must have prototype void(void*).
-    const char *CatchallRethrowFn;
-
-    static const EHPersonality &get(CodeGenModule &CGM,
-                                    const FunctionDecl *FD);
-    static const EHPersonality &get(CodeGenFunction &CGF) {
-      return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(CGF.CurCodeDecl));
-    }
-
-    static const EHPersonality GNU_C;
-    static const EHPersonality GNU_C_SJLJ;
-    static const EHPersonality GNU_C_SEH;
-    static const EHPersonality GNU_ObjC;
-    static const EHPersonality GNUstep_ObjC;
-    static const EHPersonality GNU_ObjCXX;
-    static const EHPersonality NeXT_ObjC;
-    static const EHPersonality GNU_CPlusPlus;
-    static const EHPersonality GNU_CPlusPlus_SJLJ;
-    static const EHPersonality GNU_CPlusPlus_SEH;
-    static const EHPersonality MSVC_except_handler;
-    static const EHPersonality MSVC_C_specific_handler;
-    static const EHPersonality MSVC_CxxFrameHandler3;
-  };
-}
-
 const EHPersonality EHPersonality::GNU_C = { "__gcc_personality_v0", nullptr };
 const EHPersonality
 EHPersonality::GNU_C_SJLJ = { "__gcc_personality_sj0", nullptr };
@@ -161,6 +129,7 @@ static const EHPersonality &getObjCPersonality(const llvm::Triple &T,
     return getCPersonality(T, L);
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
     return EHPersonality::NeXT_ObjC;
   case ObjCRuntime::GNUstep:
     if (L.ObjCRuntime.getVersion() >= VersionTuple(1, 7))
@@ -192,6 +161,7 @@ static const EHPersonality &getObjCXXPersonality(const llvm::Triple &T,
   // function on targets using (backend-driven) SJLJ EH.
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
     return EHPersonality::NeXT_ObjC;
 
   // In the fragile ABI, just use C++ exception handling and hope
@@ -221,14 +191,16 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
   const llvm::Triple &T = CGM.getTarget().getTriple();
   const LangOptions &L = CGM.getLangOpts();
 
+  // Functions using SEH get an SEH personality.
+  if (FD && FD->usesSEHTry())
+    return getSEHPersonalityMSVC(T);
+
   // Try to pick a personality function that is compatible with MSVC if we're
   // not compiling Obj-C. Obj-C users better have an Obj-C runtime that supports
   // the GCC-style personality function.
   if (T.isWindowsMSVCEnvironment() && !L.ObjC1) {
     if (L.SjLjExceptions)
       return EHPersonality::GNU_CPlusPlus_SJLJ;
-    else if (FD && FD->usesSEHTry())
-      return getSEHPersonalityMSVC(T);
     else
       return EHPersonality::MSVC_CxxFrameHandler3;
   }
@@ -243,6 +215,10 @@ const EHPersonality &EHPersonality::get(CodeGenModule &CGM,
     return getCPersonality(T, L);
 }
 
+const EHPersonality &EHPersonality::get(CodeGenFunction &CGF) {
+  return get(CGF.CGM, dyn_cast_or_null<FunctionDecl>(CGF.CurCodeDecl));
+}
+
 static llvm::Constant *getPersonalityFn(CodeGenModule &CGM,
                                         const EHPersonality &Personality) {
   llvm::Constant *Fn =
@@ -257,6 +233,36 @@ static llvm::Constant *getOpaquePersonalityFn(CodeGenModule &CGM,
   return llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
 }
 
+/// Check whether a landingpad instruction only uses C++ features.
+static bool LandingPadHasOnlyCXXUses(llvm::LandingPadInst *LPI) {
+  for (unsigned I = 0, E = LPI->getNumClauses(); I != E; ++I) {
+    // Look for something that would've been returned by the ObjC
+    // runtime's GetEHType() method.
+    llvm::Value *Val = LPI->getClause(I)->stripPointerCasts();
+    if (LPI->isCatch(I)) {
+      // Check if the catch value has the ObjC prefix.
+      if (llvm::GlobalVariable *GV = dyn_cast<llvm::GlobalVariable>(Val))
+        // ObjC EH selector entries are always global variables with
+        // names starting like this.
+        if (GV->getName().startswith("OBJC_EHTYPE"))
+          return false;
+    } else {
+      // Check if any of the filter values have the ObjC prefix.
+      llvm::Constant *CVal = cast<llvm::Constant>(Val);
+      for (llvm::User::op_iterator
+              II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) {
+        if (llvm::GlobalVariable *GV =
+            cast<llvm::GlobalVariable>((*II)->stripPointerCasts()))
+          // ObjC EH selector entries are always global variables with
+          // names starting like this.
+          if (GV->getName().startswith("OBJC_EHTYPE"))
+            return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Check whether a personality function could reasonably be swapped
 /// for a C++ personality function.
 static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
@@ -269,34 +275,14 @@ static bool PersonalityHasOnlyCXXUses(llvm::Constant *Fn) {
       continue;
     }
 
-    // Otherwise, it has to be a landingpad instruction.
-    llvm::LandingPadInst *LPI = dyn_cast<llvm::LandingPadInst>(U);
-    if (!LPI) return false;
+    // Otherwise it must be a function.
+    llvm::Function *F = dyn_cast<llvm::Function>(U);
+    if (!F) return false;
 
-    for (unsigned I = 0, E = LPI->getNumClauses(); I != E; ++I) {
-      // Look for something that would've been returned by the ObjC
-      // runtime's GetEHType() method.
-      llvm::Value *Val = LPI->getClause(I)->stripPointerCasts();
-      if (LPI->isCatch(I)) {
-        // Check if the catch value has the ObjC prefix.
-        if (llvm::GlobalVariable *GV = dyn_cast<llvm::GlobalVariable>(Val))
-          // ObjC EH selector entries are always global variables with
-          // names starting like this.
-          if (GV->getName().startswith("OBJC_EHTYPE"))
-            return false;
-      } else {
-        // Check if any of the filter values have the ObjC prefix.
-        llvm::Constant *CVal = cast<llvm::Constant>(Val);
-        for (llvm::User::op_iterator
-               II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) {
-          if (llvm::GlobalVariable *GV =
-              cast<llvm::GlobalVariable>((*II)->stripPointerCasts()))
-            // ObjC EH selector entries are always global variables with
-            // names starting like this.
-            if (GV->getName().startswith("OBJC_EHTYPE"))
-              return false;
-        }
-      }
+    for (auto BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      if (BB->isLandingPad())
+        if (!LandingPadHasOnlyCXXUses(BB->getLandingPadInst()))
+          return false;
     }
   }
 
@@ -355,29 +341,29 @@ static llvm::Constant *getCatchAllValue(CodeGenFunction &CGF) {
 namespace {
   /// A cleanup to free the exception object if its initialization
   /// throws.
-  struct FreeException : EHScopeStack::Cleanup {
+  struct FreeException final : EHScopeStack::Cleanup {
     llvm::Value *exn;
     FreeException(llvm::Value *exn) : exn(exn) {}
     void Emit(CodeGenFunction &CGF, Flags flags) override {
       CGF.EmitNounwindRuntimeCall(getFreeExceptionFn(CGF.CGM), exn);
     }
   };
-}
+} // end anonymous namespace
 
 // Emits an exception expression into the given location.  This
 // differs from EmitAnyExprToMem only in that, if a final copy-ctor
 // call is required, an exception within that copy ctor causes
 // std::terminate to be invoked.
-void CodeGenFunction::EmitAnyExprToExn(const Expr *e, llvm::Value *addr) {
+void CodeGenFunction::EmitAnyExprToExn(const Expr *e, Address addr) {
   // Make sure the exception object is cleaned up if there's an
   // exception during initialization.
-  pushFullExprCleanup<FreeException>(EHCleanup, addr);
+  pushFullExprCleanup<FreeException>(EHCleanup, addr.getPointer());
   EHScopeStack::stable_iterator cleanup = EHStack.stable_begin();
 
   // __cxa_allocate_exception returns a void*;  we need to cast this
   // to the appropriate type for the object.
   llvm::Type *ty = ConvertTypeForMem(e->getType())->getPointerTo();
-  llvm::Value *typedAddr = Builder.CreateBitCast(addr, ty);
+  Address typedAddr = Builder.CreateBitCast(addr, ty);
 
   // FIXME: this isn't quite right!  If there's a final unelided call
   // to a copy constructor, then according to [except.terminate]p1 we
@@ -390,19 +376,20 @@ void CodeGenFunction::EmitAnyExprToExn(const Expr *e, llvm::Value *addr) {
                    /*IsInit*/ true);
 
   // Deactivate the cleanup block.
-  DeactivateCleanupBlock(cleanup, cast<llvm::Instruction>(typedAddr));
+  DeactivateCleanupBlock(cleanup,
+                         cast<llvm::Instruction>(typedAddr.getPointer()));
 }
 
-llvm::Value *CodeGenFunction::getExceptionSlot() {
+Address CodeGenFunction::getExceptionSlot() {
   if (!ExceptionSlot)
     ExceptionSlot = CreateTempAlloca(Int8PtrTy, "exn.slot");
-  return ExceptionSlot;
+  return Address(ExceptionSlot, getPointerAlign());
 }
 
-llvm::Value *CodeGenFunction::getEHSelectorSlot() {
+Address CodeGenFunction::getEHSelectorSlot() {
   if (!EHSelectorSlot)
     EHSelectorSlot = CreateTempAlloca(Int32Ty, "ehselector.slot");
-  return EHSelectorSlot;
+  return Address(EHSelectorSlot, CharUnits::fromQuantity(4));
 }
 
 llvm::Value *CodeGenFunction::getExceptionFromSlot() {
@@ -571,22 +558,25 @@ void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
       QualType CaughtType = CGM.getContext().getUnqualifiedArrayType(
           C->getCaughtType().getNonReferenceType(), CaughtTypeQuals);
 
-      llvm::Constant *TypeInfo = nullptr;
+      CatchTypeInfo TypeInfo{nullptr, 0};
       if (CaughtType->isObjCObjectPointerType())
-        TypeInfo = CGM.getObjCRuntime().GetEHType(CaughtType);
+        TypeInfo.RTTI = CGM.getObjCRuntime().GetEHType(CaughtType);
       else
-        TypeInfo =
-            CGM.getAddrOfCXXCatchHandlerType(CaughtType, C->getCaughtType());
+        TypeInfo = CGM.getCXXABI().getAddrOfCXXCatchHandlerType(
+            CaughtType, C->getCaughtType());
       CatchScope->setHandler(I, TypeInfo, Handler);
     } else {
       // No exception decl indicates '...', a catch-all.
-      CatchScope->setCatchAllHandler(I, Handler);
+      CatchScope->setHandler(I, CGM.getCXXABI().getCatchAllTypeInfo(), Handler);
     }
   }
 }
 
 llvm::BasicBlock *
 CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
+  if (EHPersonality::get(*this).usesFuncletPads())
+    return getMSVCDispatchBlock(si);
+
   // The dispatch block for the end of the scope chain is a block that
   // just resumes unwinding.
   if (si == EHStack.stable_end())
@@ -623,12 +613,58 @@ CodeGenFunction::getEHDispatchBlock(EHScopeStack::stable_iterator si) {
     case EHScope::Terminate:
       dispatchBlock = getTerminateHandler();
       break;
+
+    case EHScope::PadEnd:
+      llvm_unreachable("PadEnd unnecessary for Itanium!");
     }
     scope.setCachedEHDispatchBlock(dispatchBlock);
   }
   return dispatchBlock;
 }
 
+llvm::BasicBlock *
+CodeGenFunction::getMSVCDispatchBlock(EHScopeStack::stable_iterator SI) {
+  // Returning nullptr indicates that the previous dispatch block should unwind
+  // to caller.
+  if (SI == EHStack.stable_end())
+    return nullptr;
+
+  // Otherwise, we should look at the actual scope.
+  EHScope &EHS = *EHStack.find(SI);
+
+  llvm::BasicBlock *DispatchBlock = EHS.getCachedEHDispatchBlock();
+  if (DispatchBlock)
+    return DispatchBlock;
+
+  if (EHS.getKind() == EHScope::Terminate)
+    DispatchBlock = getTerminateHandler();
+  else
+    DispatchBlock = createBasicBlock();
+  CGBuilderTy Builder(*this, DispatchBlock);
+
+  switch (EHS.getKind()) {
+  case EHScope::Catch:
+    DispatchBlock->setName("catch.dispatch");
+    break;
+
+  case EHScope::Cleanup:
+    DispatchBlock->setName("ehcleanup");
+    break;
+
+  case EHScope::Filter:
+    llvm_unreachable("exception specifications not handled yet!");
+
+  case EHScope::Terminate:
+    DispatchBlock->setName("terminate");
+    break;
+
+  case EHScope::PadEnd:
+    llvm_unreachable("PadEnd dispatch block missing!");
+  }
+  EHS.setCachedEHDispatchBlock(DispatchBlock);
+  return DispatchBlock;
+}
+
 /// Check whether this is a non-EH scope, i.e. a scope which doesn't
 /// affect exception handling.  Currently, the only non-EH scopes are
 /// normal-only cleanup scopes.
@@ -639,6 +675,7 @@ static bool isNonEHScope(const EHScope &S) {
   case EHScope::Filter:
   case EHScope::Catch:
   case EHScope::Terminate:
+  case EHScope::PadEnd:
     return false;
   }
 
@@ -664,8 +701,19 @@ llvm::BasicBlock *CodeGenFunction::getInvokeDestImpl() {
   llvm::BasicBlock *LP = EHStack.begin()->getCachedLandingPad();
   if (LP) return LP;
 
-  // Build the landing pad for this scope.
-  LP = EmitLandingPad();
+  const EHPersonality &Personality = EHPersonality::get(*this);
+
+  if (!CurFn->hasPersonalityFn())
+    CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, Personality));
+
+  if (Personality.usesFuncletPads()) {
+    // We don't need separate landing pads in the funclet model.
+    LP = getEHDispatchBlock(EHStack.getInnermostEHScope());
+  } else {
+    // Build the landing pad for this scope.
+    LP = EmitLandingPad();
+  }
+
   assert(LP);
 
   // Cache the landing pad on the innermost scope.  If this is a
@@ -686,6 +734,9 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
   case EHScope::Terminate:
     return getTerminateLandingPad();
 
+  case EHScope::PadEnd:
+    llvm_unreachable("PadEnd unnecessary for Itanium!");
+
   case EHScope::Catch:
   case EHScope::Cleanup:
   case EHScope::Filter:
@@ -697,11 +748,6 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
   CGBuilderTy::InsertPoint savedIP = Builder.saveAndClearIP();
   auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, CurEHLocation);
 
-  const EHPersonality &personality = EHPersonality::get(*this);
-
-  if (!CurFn->hasPersonalityFn())
-    CurFn->setPersonalityFn(getOpaquePersonalityFn(CGM, personality));
-
   // Create and configure the landing pad.
   llvm::BasicBlock *lpad = createBasicBlock("lpad");
   EmitBlock(lpad);
@@ -756,23 +802,28 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
 
     case EHScope::Catch:
       break;
+
+    case EHScope::PadEnd:
+      llvm_unreachable("PadEnd unnecessary for Itanium!");
     }
 
     EHCatchScope &catchScope = cast<EHCatchScope>(*I);
     for (unsigned hi = 0, he = catchScope.getNumHandlers(); hi != he; ++hi) {
       EHCatchScope::Handler handler = catchScope.getHandler(hi);
+      assert(handler.Type.Flags == 0 &&
+             "landingpads do not support catch handler flags");
 
       // If this is a catch-all, register that and abort.
-      if (!handler.Type) {
+      if (!handler.Type.RTTI) {
         assert(!hasCatchAll);
         hasCatchAll = true;
         goto done;
       }
 
       // Check whether we already have a handler for this type.
-      if (catchTypes.insert(handler.Type).second)
+      if (catchTypes.insert(handler.Type.RTTI).second)
         // If not, add it directly to the landingpad.
-        LPadInst->addClause(handler.Type);
+        LPadInst->addClause(handler.Type.RTTI);
     }
   }
 
@@ -820,10 +871,53 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() {
   return lpad;
 }
 
+static void emitCatchPadBlock(CodeGenFunction &CGF, EHCatchScope &CatchScope) {
+  llvm::BasicBlock *DispatchBlock = CatchScope.getCachedEHDispatchBlock();
+  assert(DispatchBlock);
+
+  CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveIP();
+  CGF.EmitBlockAfterUses(DispatchBlock);
+
+  llvm::Value *ParentPad = CGF.CurrentFuncletPad;
+  if (!ParentPad)
+    ParentPad = llvm::ConstantTokenNone::get(CGF.getLLVMContext());
+  llvm::BasicBlock *UnwindBB =
+      CGF.getEHDispatchBlock(CatchScope.getEnclosingEHScope());
+
+  unsigned NumHandlers = CatchScope.getNumHandlers();
+  llvm::CatchSwitchInst *CatchSwitch =
+      CGF.Builder.CreateCatchSwitch(ParentPad, UnwindBB, NumHandlers);
+
+  // Test against each of the exception types we claim to catch.
+  for (unsigned I = 0; I < NumHandlers; ++I) {
+    const EHCatchScope::Handler &Handler = CatchScope.getHandler(I);
+
+    CatchTypeInfo TypeInfo = Handler.Type;
+    if (!TypeInfo.RTTI)
+      TypeInfo.RTTI = llvm::Constant::getNullValue(CGF.VoidPtrTy);
+
+    CGF.Builder.SetInsertPoint(Handler.Block);
+
+    if (EHPersonality::get(CGF).isMSVCXXPersonality()) {
+      CGF.Builder.CreateCatchPad(
+          CatchSwitch, {TypeInfo.RTTI, CGF.Builder.getInt32(TypeInfo.Flags),
+                        llvm::Constant::getNullValue(CGF.VoidPtrTy)});
+    } else {
+      CGF.Builder.CreateCatchPad(CatchSwitch, {TypeInfo.RTTI});
+    }
+
+    CatchSwitch->addHandler(Handler.Block);
+  }
+  CGF.Builder.restoreIP(SavedIP);
+}
+
 /// Emit the structure of the dispatch block for the given catch scope.
 /// It is an invariant that the dispatch block already exists.
 static void emitCatchDispatchBlock(CodeGenFunction &CGF,
                                    EHCatchScope &catchScope) {
+  if (EHPersonality::get(CGF).usesFuncletPads())
+    return emitCatchPadBlock(CGF, catchScope);
+
   llvm::BasicBlock *dispatchBlock = catchScope.getCachedEHDispatchBlock();
   assert(dispatchBlock);
 
@@ -850,7 +944,9 @@ static void emitCatchDispatchBlock(CodeGenFunction &CGF,
     assert(i < e && "ran off end of handlers!");
     const EHCatchScope::Handler &handler = catchScope.getHandler(i);
 
-    llvm::Value *typeValue = handler.Type;
+    llvm::Value *typeValue = handler.Type.RTTI;
+    assert(handler.Type.Flags == 0 &&
+           "landingpads do not support catch handler flags");
     assert(typeValue && "fell into catch-all case!");
     typeValue = CGF.Builder.CreateBitCast(typeValue, CGF.Int8PtrTy);
 
@@ -919,9 +1015,8 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
 
   // Copy the handler blocks off before we pop the EH stack.  Emitting
   // the handlers might scribble on this memory.
-  SmallVector<EHCatchScope::Handler, 8> Handlers(NumHandlers);
-  memcpy(Handlers.data(), CatchScope.begin(),
-         NumHandlers * sizeof(EHCatchScope::Handler));
+  SmallVector<EHCatchScope::Handler, 8> Handlers(
+      CatchScope.begin(), CatchScope.begin() + NumHandlers);
 
   EHStack.popCatch();
 
@@ -958,6 +1053,8 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
     RunCleanupsScope CatchScope(*this);
 
     // Initialize the catch variable and set up the cleanups.
+    SaveAndRestore<llvm::Instruction *> RestoreCurrentFuncletPad(
+        CurrentFuncletPad);
     CGM.getCXXABI().emitBeginCatch(*this, C);
 
     // Emit the PGO counter increment.
@@ -994,7 +1091,7 @@ void CodeGenFunction::ExitCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) {
 }
 
 namespace {
-  struct CallEndCatchForFinally : EHScopeStack::Cleanup {
+  struct CallEndCatchForFinally final : EHScopeStack::Cleanup {
     llvm::Value *ForEHVar;
     llvm::Value *EndCatchFn;
     CallEndCatchForFinally(llvm::Value *ForEHVar, llvm::Value *EndCatchFn)
@@ -1006,7 +1103,7 @@ namespace {
         CGF.createBasicBlock("finally.cleanup.cont");
 
       llvm::Value *ShouldEndCatch =
-        CGF.Builder.CreateLoad(ForEHVar, "finally.endcatch");
+        CGF.Builder.CreateFlagLoad(ForEHVar, "finally.endcatch");
       CGF.Builder.CreateCondBr(ShouldEndCatch, EndCatchBB, CleanupContBB);
       CGF.EmitBlock(EndCatchBB);
       CGF.EmitRuntimeCallOrInvoke(EndCatchFn); // catch-all, so might throw
@@ -1014,7 +1111,7 @@ namespace {
     }
   };
 
-  struct PerformFinally : EHScopeStack::Cleanup {
+  struct PerformFinally final : EHScopeStack::Cleanup {
     const Stmt *Body;
     llvm::Value *ForEHVar;
     llvm::Value *EndCatchFn;
@@ -1049,13 +1146,13 @@ namespace {
         llvm::BasicBlock *ContBB = CGF.createBasicBlock("finally.cont");
 
         llvm::Value *ShouldRethrow =
-          CGF.Builder.CreateLoad(ForEHVar, "finally.shouldthrow");
+          CGF.Builder.CreateFlagLoad(ForEHVar, "finally.shouldthrow");
         CGF.Builder.CreateCondBr(ShouldRethrow, RethrowBB, ContBB);
 
         CGF.EmitBlock(RethrowBB);
         if (SavedExnVar) {
           CGF.EmitRuntimeCallOrInvoke(RethrowFn,
-                                      CGF.Builder.CreateLoad(SavedExnVar));
+            CGF.Builder.CreateAlignedLoad(SavedExnVar, CGF.getPointerAlign()));
         } else {
           CGF.EmitRuntimeCallOrInvoke(RethrowFn);
         }
@@ -1082,7 +1179,7 @@ namespace {
       CGF.EnsureInsertPoint();
     }
   };
-}
+} // end anonymous namespace
 
 /// Enters a finally block for an implementation using zero-cost
 /// exceptions.  This is mostly general, but hard-codes some
@@ -1130,7 +1227,7 @@ void CodeGenFunction::FinallyInfo::enter(CodeGenFunction &CGF,
 
   // Whether the finally block is being executed for EH purposes.
   ForEHVar = CGF.CreateTempAlloca(CGF.Builder.getInt1Ty(), "finally.for-eh");
-  CGF.Builder.CreateStore(CGF.Builder.getFalse(), ForEHVar);
+  CGF.Builder.CreateFlagStore(false, ForEHVar);
 
   // Enter a normal cleanup which will perform the @finally block.
   CGF.EHStack.pushCleanup<PerformFinally>(NormalCleanup, body,
@@ -1168,11 +1265,11 @@ void CodeGenFunction::FinallyInfo::exit(CodeGenFunction &CGF) {
     // If we need to remember the exception pointer to rethrow later, do so.
     if (SavedExnVar) {
       if (!exn) exn = CGF.getExceptionFromSlot();
-      CGF.Builder.CreateStore(exn, SavedExnVar);
+      CGF.Builder.CreateAlignedStore(exn, SavedExnVar, CGF.getPointerAlign());
     }
 
     // Tell the cleanups in the finally block that we're do this for EH.
-    CGF.Builder.CreateStore(CGF.Builder.getTrue(), ForEHVar);
+    CGF.Builder.CreateFlagStore(true, ForEHVar);
 
     // Thread a jump through the finally cleanup.
     CGF.EmitBranchThroughCleanup(RethrowDest);
@@ -1204,7 +1301,7 @@ llvm::BasicBlock *CodeGenFunction::getTerminateLandingPad() {
       llvm::StructType::get(Int8PtrTy, Int32Ty, nullptr), 0);
   LPadInst->addClause(getCatchAllValue(*this));
 
-  llvm::Value *Exn = 0;
+  llvm::Value *Exn = nullptr;
   if (getLangOpts().CPlusPlus)
     Exn = Builder.CreateExtractValue(LPadInst, 0);
   llvm::CallInst *terminateCall =
@@ -1228,9 +1325,16 @@ llvm::BasicBlock *CodeGenFunction::getTerminateHandler() {
   // end of the function by FinishFunction.
   TerminateHandler = createBasicBlock("terminate.handler");
   Builder.SetInsertPoint(TerminateHandler);
-  llvm::Value *Exn = 0;
-  if (getLangOpts().CPlusPlus)
-    Exn = getExceptionFromSlot();
+  llvm::Value *Exn = nullptr;
+  if (EHPersonality::get(*this).usesFuncletPads()) {
+    llvm::Value *ParentPad = CurrentFuncletPad;
+    if (!ParentPad)
+      ParentPad = llvm::ConstantTokenNone::get(CGM.getLLVMContext());
+    Builder.CreateCleanupPad(ParentPad);
+  } else {
+    if (getLangOpts().CPlusPlus)
+      Exn = getExceptionFromSlot();
+  }
   llvm::CallInst *terminateCall =
       CGM.getCXXABI().emitTerminateForUnexpectedException(*this, Exn);
   terminateCall->setDoesNotReturn();
@@ -1297,7 +1401,7 @@ void CodeGenFunction::EmitSEHTryStmt(const SEHTryStmt &S) {
 }
 
 namespace {
-struct PerformSEHFinally : EHScopeStack::Cleanup {
+struct PerformSEHFinally final : EHScopeStack::Cleanup {
   llvm::Function *OutlinedFinally;
   PerformSEHFinally(llvm::Function *OutlinedFinally)
       : OutlinedFinally(OutlinedFinally) {}
@@ -1328,21 +1432,21 @@ struct PerformSEHFinally : EHScopeStack::Cleanup {
     CGF.EmitCall(FnInfo, OutlinedFinally, ReturnValueSlot(), Args);
   }
 };
-}
+} // end anonymous namespace
 
 namespace {
 /// Find all local variable captures in the statement.
 struct CaptureFinder : ConstStmtVisitor<CaptureFinder> {
   CodeGenFunction &ParentCGF;
   const VarDecl *ParentThis;
-  SmallVector<const VarDecl *, 4> Captures;
-  llvm::Value *SEHCodeSlot = nullptr;
+  llvm::SmallSetVector<const VarDecl *, 4> Captures;
+  Address SEHCodeSlot = Address::invalid();
   CaptureFinder(CodeGenFunction &ParentCGF, const VarDecl *ParentThis)
       : ParentCGF(ParentCGF), ParentThis(ParentThis) {}
 
   // Return true if we need to do any capturing work.
   bool foundCaptures() {
-    return !Captures.empty() || SEHCodeSlot;
+    return !Captures.empty() || SEHCodeSlot.isValid();
   }
 
   void Visit(const Stmt *S) {
@@ -1356,17 +1460,17 @@ struct CaptureFinder : ConstStmtVisitor<CaptureFinder> {
   void VisitDeclRefExpr(const DeclRefExpr *E) {
     // If this is already a capture, just make sure we capture 'this'.
     if (E->refersToEnclosingVariableOrCapture()) {
-      Captures.push_back(ParentThis);
+      Captures.insert(ParentThis);
       return;
     }
 
     const auto *D = dyn_cast<VarDecl>(E->getDecl());
     if (D && D->isLocalVarDeclOrParm() && D->hasLocalStorage())
-      Captures.push_back(D);
+      Captures.insert(D);
   }
 
   void VisitCXXThisExpr(const CXXThisExpr *E) {
-    Captures.push_back(ParentThis);
+    Captures.insert(ParentThis);
   }
 
   void VisitCallExpr(const CallExpr *E) {
@@ -1381,19 +1485,20 @@ struct CaptureFinder : ConstStmtVisitor<CaptureFinder> {
       // This is the simple case where we are the outermost finally. All we
       // have to do here is make sure we escape this and recover it in the
       // outlined handler.
-      if (!SEHCodeSlot)
+      if (!SEHCodeSlot.isValid())
         SEHCodeSlot = ParentCGF.SEHCodeSlotStack.back();
       break;
     }
   }
 };
-}
+} // end anonymous namespace
 
-llvm::Value *CodeGenFunction::recoverAddrOfEscapedLocal(
-    CodeGenFunction &ParentCGF, llvm::Value *ParentVar, llvm::Value *ParentFP) {
+Address CodeGenFunction::recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF,
+                                                   Address ParentVar,
+                                                   llvm::Value *ParentFP) {
   llvm::CallInst *RecoverCall = nullptr;
-  CGBuilderTy Builder(AllocaInsertPt);
-  if (auto *ParentAlloca = dyn_cast<llvm::AllocaInst>(ParentVar)) {
+  CGBuilderTy Builder(*this, AllocaInsertPt);
+  if (auto *ParentAlloca = dyn_cast<llvm::AllocaInst>(ParentVar.getPointer())) {
     // Mark the variable escaped if nobody else referenced it and compute the
     // localescape index.
     auto InsertPair = ParentCGF.EscapedLocals.insert(
@@ -1413,7 +1518,7 @@ llvm::Value *CodeGenFunction::recoverAddrOfEscapedLocal(
     // Just clone the existing localrecover call, but tweak the FP argument to
     // use our FP value. All other arguments are constants.
     auto *ParentRecover =
-        cast<llvm::IntrinsicInst>(ParentVar->stripPointerCasts());
+        cast<llvm::IntrinsicInst>(ParentVar.getPointer()->stripPointerCasts());
     assert(ParentRecover->getIntrinsicID() == llvm::Intrinsic::localrecover &&
            "expected alloca or localrecover in parent LocalDeclMap");
     RecoverCall = cast<llvm::CallInst>(ParentRecover->clone());
@@ -1423,9 +1528,9 @@ llvm::Value *CodeGenFunction::recoverAddrOfEscapedLocal(
 
   // Bitcast the variable, rename it, and insert it in the local decl map.
   llvm::Value *ChildVar =
-      Builder.CreateBitCast(RecoverCall, ParentVar->getType());
-  ChildVar->setName(ParentVar->getName());
-  return ChildVar;
+      Builder.CreateBitCast(RecoverCall, ParentVar.getType());
+  ChildVar->setName(ParentVar.getName());
+  return Address(ChildVar, ParentVar.getAlignment());
 }
 
 void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
@@ -1444,27 +1549,32 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
     return;
   }
 
-  llvm::Value *EntryEBP = nullptr;
-  llvm::Value *ParentFP;
+  llvm::Value *EntryFP = nullptr;
+  CGBuilderTy Builder(CGM, AllocaInsertPt);
   if (IsFilter && CGM.getTarget().getTriple().getArch() == llvm::Triple::x86) {
     // 32-bit SEH filters need to be careful about FP recovery.  The end of the
     // EH registration is passed in as the EBP physical register.  We can
-    // recover that with llvm.frameaddress(1), and adjust that to recover the
-    // parent's true frame pointer.
-    CGBuilderTy Builder(AllocaInsertPt);
-    EntryEBP = Builder.CreateCall(
+    // recover that with llvm.frameaddress(1).
+    EntryFP = Builder.CreateCall(
         CGM.getIntrinsic(llvm::Intrinsic::frameaddress), {Builder.getInt32(1)});
-    llvm::Function *RecoverFPIntrin =
-        CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp);
-    llvm::Constant *ParentI8Fn =
-        llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
-    ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryEBP});
   } else {
     // Otherwise, for x64 and 32-bit finally functions, the parent FP is the
     // second parameter.
     auto AI = CurFn->arg_begin();
     ++AI;
-    ParentFP = AI;
+    EntryFP = &*AI;
+  }
+
+  llvm::Value *ParentFP = EntryFP;
+  if (IsFilter) {
+    // Given whatever FP the runtime provided us in EntryFP, recover the true
+    // frame pointer of the parent function. We only need to do this in filters,
+    // since finally funclets recover the parent FP for us.
+    llvm::Function *RecoverFPIntrin =
+        CGM.getIntrinsic(llvm::Intrinsic::x86_seh_recoverfp);
+    llvm::Constant *ParentI8Fn =
+        llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+    ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
   }
 
   // Create llvm.localrecover calls for all captures.
@@ -1486,19 +1596,19 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
     auto I = ParentCGF.LocalDeclMap.find(VD);
     if (I == ParentCGF.LocalDeclMap.end())
       continue;
-    llvm::Value *ParentVar = I->second;
 
-    LocalDeclMap[VD] =
-        recoverAddrOfEscapedLocal(ParentCGF, ParentVar, ParentFP);
+    Address ParentVar = I->second;
+    setAddrOfLocalVar(
+        VD, recoverAddrOfEscapedLocal(ParentCGF, ParentVar, ParentFP));
   }
 
-  if (Finder.SEHCodeSlot) {
+  if (Finder.SEHCodeSlot.isValid()) {
     SEHCodeSlotStack.push_back(
         recoverAddrOfEscapedLocal(ParentCGF, Finder.SEHCodeSlot, ParentFP));
   }
 
   if (IsFilter)
-    EmitSEHExceptionCodeSave(ParentCGF, ParentFP, EntryEBP);
+    EmitSEHExceptionCodeSave(ParentCGF, ParentFP, EntryFP);
 }
 
 /// Arrange a function prototype that can be called by Windows exception
@@ -1614,13 +1724,12 @@ CodeGenFunction::GenerateSEHFinallyFunction(CodeGenFunction &ParentCGF,
 
 void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
                                                llvm::Value *ParentFP,
-                                               llvm::Value *EntryEBP) {
+                                               llvm::Value *EntryFP) {
   // Get the pointer to the EXCEPTION_POINTERS struct. This is returned by the
   // __exception_info intrinsic.
   if (CGM.getTarget().getTriple().getArch() != llvm::Triple::x86) {
     // On Win64, the info is passed as the first parameter to the filter.
-    auto AI = CurFn->arg_begin();
-    SEHInfo = AI;
+    SEHInfo = &*CurFn->arg_begin();
     SEHCodeSlotStack.push_back(
         CreateMemTemp(getContext().IntTy, "__exception_code"));
   } else {
@@ -1628,9 +1737,9 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
     // exception registration object. It contains 6 32-bit fields, and the info
     // pointer is stored in the second field. So, GEP 20 bytes backwards and
     // load the pointer.
-    SEHInfo = Builder.CreateConstInBoundsGEP1_32(Int8Ty, EntryEBP, -20);
+    SEHInfo = Builder.CreateConstInBoundsGEP1_32(Int8Ty, EntryFP, -20);
     SEHInfo = Builder.CreateBitCast(SEHInfo, Int8PtrTy->getPointerTo());
-    SEHInfo = Builder.CreateLoad(Int8PtrTy, SEHInfo);
+    SEHInfo = Builder.CreateAlignedLoad(Int8PtrTy, SEHInfo, getPointerAlign());
     SEHCodeSlotStack.push_back(recoverAddrOfEscapedLocal(
         ParentCGF, ParentCGF.SEHCodeSlotStack.back(), ParentFP));
   }
@@ -1646,8 +1755,8 @@ void CodeGenFunction::EmitSEHExceptionCodeSave(CodeGenFunction &ParentCGF,
   llvm::Type *PtrsTy = llvm::StructType::get(RecordTy, CGM.VoidPtrTy, nullptr);
   llvm::Value *Ptrs = Builder.CreateBitCast(SEHInfo, PtrsTy->getPointerTo());
   llvm::Value *Rec = Builder.CreateStructGEP(PtrsTy, Ptrs, 0);
-  Rec = Builder.CreateLoad(Rec);
-  llvm::Value *Code = Builder.CreateLoad(Rec);
+  Rec = Builder.CreateAlignedLoad(Rec, getPointerAlign());
+  llvm::Value *Code = Builder.CreateAlignedLoad(Rec, getIntAlign());
   assert(!SEHCodeSlotStack.empty() && "emitting EH code outside of __except");
   Builder.CreateStore(Code, SEHCodeSlotStack.back());
 }
@@ -1663,7 +1772,7 @@ llvm::Value *CodeGenFunction::EmitSEHExceptionInfo() {
 
 llvm::Value *CodeGenFunction::EmitSEHExceptionCode() {
   assert(!SEHCodeSlotStack.empty() && "emitting EH code outside of __except");
-  return Builder.CreateLoad(Int32Ty, SEHCodeSlotStack.back());
+  return Builder.CreateLoad(SEHCodeSlotStack.back());
 }
 
 llvm::Value *CodeGenFunction::EmitSEHAbnormalTermination() {
@@ -1709,7 +1818,7 @@ void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
       HelperCGF.GenerateSEHFilterFunction(*this, *Except);
   llvm::Constant *OpaqueFunc =
       llvm::ConstantExpr::getBitCast(FilterFunc, Int8PtrTy);
-  CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except"));
+  CatchScope->setHandler(0, OpaqueFunc, createBasicBlock("__except.ret"));
 }
 
 void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S) {
@@ -1745,16 +1854,24 @@ void CodeGenFunction::ExitSEHTryStmt(const SEHTryStmt &S) {
   emitCatchDispatchBlock(*this, CatchScope);
 
   // Grab the block before we pop the handler.
-  llvm::BasicBlock *ExceptBB = CatchScope.getHandler(0).Block;
+  llvm::BasicBlock *CatchPadBB = CatchScope.getHandler(0).Block;
   EHStack.popCatch();
 
-  EmitBlockAfterUses(ExceptBB);
+  EmitBlockAfterUses(CatchPadBB);
 
-  // On Win64, the exception pointer is the exception code. Copy it to the slot.
+  // __except blocks don't get outlined into funclets, so immediately do a
+  // catchret.
+  llvm::CatchPadInst *CPI =
+      cast<llvm::CatchPadInst>(CatchPadBB->getFirstNonPHI());
+  llvm::BasicBlock *ExceptBB = createBasicBlock("__except");
+  Builder.CreateCatchRet(CPI, ExceptBB);
+  EmitBlock(ExceptBB);
+
+  // On Win64, the exception code is returned in EAX. Copy it into the slot.
   if (CGM.getTarget().getTriple().getArch() != llvm::Triple::x86) {
-    llvm::Value *Code =
-        Builder.CreatePtrToInt(getExceptionFromSlot(), IntPtrTy);
-    Code = Builder.CreateTrunc(Code, Int32Ty);
+    llvm::Function *SEHCodeIntrin =
+        CGM.getIntrinsic(llvm::Intrinsic::eh_exceptioncode);
+    llvm::Value *Code = Builder.CreateCall(SEHCodeIntrin, {CPI});
     Builder.CreateStore(Code, SEHCodeSlotStack.back());
   }
 
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 9c3dfe52716b..dabd2b1528bb 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -52,6 +52,15 @@ llvm::Value *CodeGenFunction::EmitCastToVoidPtr(llvm::Value *value) {
   return Builder.CreateBitCast(value, destType);
 }
 
+/// CreateTempAlloca - This creates a alloca and inserts it into the entry
+/// block.
+Address CodeGenFunction::CreateTempAlloca(llvm::Type *Ty, CharUnits Align,
+                                          const Twine &Name) {
+  auto Alloca = CreateTempAlloca(Ty, Name);
+  Alloca->setAlignment(Align.getQuantity());
+  return Address(Alloca, Align);
+}
+
 /// CreateTempAlloca - This creates a alloca and inserts it into the entry
 /// block.
 llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
@@ -61,29 +70,38 @@ llvm::AllocaInst *CodeGenFunction::CreateTempAlloca(llvm::Type *Ty,
   return new llvm::AllocaInst(Ty, nullptr, Name, AllocaInsertPt);
 }
 
-void CodeGenFunction::InitTempAlloca(llvm::AllocaInst *Var,
-                                     llvm::Value *Init) {
-  auto *Store = new llvm::StoreInst(Init, Var);
+/// CreateDefaultAlignTempAlloca - This creates an alloca with the
+/// default alignment of the corresponding LLVM type, which is *not*
+/// guaranteed to be related in any way to the expected alignment of
+/// an AST type that might have been lowered to Ty.
+Address CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
+                                                      const Twine &Name) {
+  CharUnits Align =
+    CharUnits::fromQuantity(CGM.getDataLayout().getABITypeAlignment(Ty));
+  return CreateTempAlloca(Ty, Align, Name);
+}
+
+void CodeGenFunction::InitTempAlloca(Address Var, llvm::Value *Init) {
+  assert(isa<llvm::AllocaInst>(Var.getPointer()));
+  auto *Store = new llvm::StoreInst(Init, Var.getPointer());
+  Store->setAlignment(Var.getAlignment().getQuantity());
   llvm::BasicBlock *Block = AllocaInsertPt->getParent();
-  Block->getInstList().insertAfter(&*AllocaInsertPt, Store);
+  Block->getInstList().insertAfter(AllocaInsertPt->getIterator(), Store);
 }
 
-llvm::AllocaInst *CodeGenFunction::CreateIRTemp(QualType Ty,
-                                                const Twine &Name) {
-  llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertType(Ty), Name);
-  // FIXME: Should we prefer the preferred type alignment here?
+Address CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
   CharUnits Align = getContext().getTypeAlignInChars(Ty);
-  Alloc->setAlignment(Align.getQuantity());
-  return Alloc;
+  return CreateTempAlloca(ConvertType(Ty), Align, Name);
 }
 
-llvm::AllocaInst *CodeGenFunction::CreateMemTemp(QualType Ty,
-                                                 const Twine &Name) {
-  llvm::AllocaInst *Alloc = CreateTempAlloca(ConvertTypeForMem(Ty), Name);
+Address CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name) {
   // FIXME: Should we prefer the preferred type alignment here?
-  CharUnits Align = getContext().getTypeAlignInChars(Ty);
-  Alloc->setAlignment(Align.getQuantity());
-  return Alloc;
+  return CreateMemTemp(Ty, getContext().getTypeAlignInChars(Ty), Name);
+}
+
+Address CodeGenFunction::CreateMemTemp(QualType Ty, CharUnits Align,
+                                       const Twine &Name) {
+  return CreateTempAlloca(ConvertTypeForMem(Ty), Align, Name);
 }
 
 /// EvaluateExprAsBool - Perform the usual unary conversions on the specified
@@ -96,10 +114,12 @@ llvm::Value *CodeGenFunction::EvaluateExprAsBool(const Expr *E) {
   }
 
   QualType BoolTy = getContext().BoolTy;
+  SourceLocation Loc = E->getExprLoc();
   if (!E->getType()->isAnyComplexType())
-    return EmitScalarConversion(EmitScalarExpr(E), E->getType(), BoolTy);
+    return EmitScalarConversion(EmitScalarExpr(E), E->getType(), BoolTy, Loc);
 
-  return EmitComplexToScalarConversion(EmitComplexExpr(E), E->getType(),BoolTy);
+  return EmitComplexToScalarConversion(EmitComplexExpr(E), E->getType(), BoolTy,
+                                       Loc);
 }
 
 /// EmitIgnoredExpr - Emit code to compute the specified expression,
@@ -146,20 +166,18 @@ RValue CodeGenFunction::EmitAnyExprToTemp(const Expr *E) {
 /// EmitAnyExprToMem - Evaluate an expression into a given memory
 /// location.
 void CodeGenFunction::EmitAnyExprToMem(const Expr *E,
-                                       llvm::Value *Location,
+                                       Address Location,
                                        Qualifiers Quals,
                                        bool IsInit) {
   // FIXME: This function should take an LValue as an argument.
   switch (getEvaluationKind(E->getType())) {
   case TEK_Complex:
-    EmitComplexExprIntoLValue(E,
-                         MakeNaturalAlignAddrLValue(Location, E->getType()),
+    EmitComplexExprIntoLValue(E, MakeAddrLValue(Location, E->getType()),
                               /*isInit*/ false);
     return;
 
   case TEK_Aggregate: {
-    CharUnits Alignment = getContext().getTypeAlignInChars(E->getType());
-    EmitAggExpr(E, AggValueSlot::forAddr(Location, Alignment, Quals,
+    EmitAggExpr(E, AggValueSlot::forAddr(Location, Quals,
                                          AggValueSlot::IsDestructed_t(IsInit),
                                          AggValueSlot::DoesNotNeedGCBarriers,
                                          AggValueSlot::IsAliased_t(!IsInit)));
@@ -178,17 +196,14 @@ void CodeGenFunction::EmitAnyExprToMem(const Expr *E,
 
 static void
 pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
-                     const Expr *E, llvm::Value *ReferenceTemporary) {
+                     const Expr *E, Address ReferenceTemporary) {
   // Objective-C++ ARC:
   //   If we are binding a reference to a temporary that has ownership, we
   //   need to perform retain/release operations on the temporary.
   //
   // FIXME: This should be looking at E, not M.
-  if (CGF.getLangOpts().ObjCAutoRefCount &&
-      M->getType()->isObjCLifetimeType()) {
-    QualType ObjCARCReferenceLifetimeType = M->getType();
-    switch (Qualifiers::ObjCLifetime Lifetime =
-                ObjCARCReferenceLifetimeType.getObjCLifetime()) {
+  if (auto Lifetime = M->getType().getObjCLifetime()) {
+    switch (Lifetime) {
     case Qualifiers::OCL_None:
     case Qualifiers::OCL_ExplicitNone:
       // Carry on to normal cleanup handling.
@@ -229,11 +244,11 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
         }
         if (Duration == SD_FullExpression)
           CGF.pushDestroy(CleanupKind, ReferenceTemporary,
-                          ObjCARCReferenceLifetimeType, *Destroy,
+                          M->getType(), *Destroy,
                           CleanupKind & EHCleanup);
         else
           CGF.pushLifetimeExtendedDestroy(CleanupKind, ReferenceTemporary,
-                                          ObjCARCReferenceLifetimeType,
+                                          M->getType(),
                                           *Destroy, CleanupKind & EHCleanup);
         return;
 
@@ -264,14 +279,14 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
     llvm::Constant *CleanupArg;
     if (E->getType()->isArrayType()) {
       CleanupFn = CodeGenFunction(CGF.CGM).generateDestroyHelper(
-          cast<llvm::Constant>(ReferenceTemporary), E->getType(),
+          ReferenceTemporary, E->getType(),
           CodeGenFunction::destroyCXXObject, CGF.getLangOpts().Exceptions,
           dyn_cast_or_null<VarDecl>(M->getExtendingDecl()));
       CleanupArg = llvm::Constant::getNullValue(CGF.Int8PtrTy);
     } else {
       CleanupFn = CGF.CGM.getAddrOfCXXStructor(ReferenceTemporaryDtor,
                                                StructorType::Complete);
-      CleanupArg = cast<llvm::Constant>(ReferenceTemporary);
+      CleanupArg = cast<llvm::Constant>(ReferenceTemporary.getPointer());
     }
     CGF.CGM.getCXXABI().registerGlobalDtor(
         CGF, *cast<VarDecl>(M->getExtendingDecl()), CleanupFn, CleanupArg);
@@ -296,7 +311,7 @@ pushTemporaryCleanup(CodeGenFunction &CGF, const MaterializeTemporaryExpr *M,
   }
 }
 
-static llvm::Value *
+static Address
 createReferenceTemporary(CodeGenFunction &CGF,
                          const MaterializeTemporaryExpr *M, const Expr *Inner) {
   switch (M->getStorageDuration()) {
@@ -314,10 +329,10 @@ createReferenceTemporary(CodeGenFunction &CGF,
         auto *GV = new llvm::GlobalVariable(
             CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
             llvm::GlobalValue::PrivateLinkage, Init, ".ref.tmp");
-        GV->setAlignment(
-            CGF.getContext().getTypeAlignInChars(Ty).getQuantity());
+        CharUnits alignment = CGF.getContext().getTypeAlignInChars(Ty);
+        GV->setAlignment(alignment.getQuantity());
         // FIXME: Should we put the new global into a COMDAT?
-        return GV;
+        return Address(GV, alignment);
       }
     return CGF.CreateMemTemp(Ty, "ref.tmp");
   }
@@ -337,20 +352,22 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
 
     // FIXME: ideally this would use EmitAnyExprToMem, however, we cannot do so
     // as that will cause the lifetime adjustment to be lost for ARC
-  if (getLangOpts().ObjCAutoRefCount &&
-      M->getType()->isObjCLifetimeType() &&
-      M->getType().getObjCLifetime() != Qualifiers::OCL_None &&
-      M->getType().getObjCLifetime() != Qualifiers::OCL_ExplicitNone) {
-    llvm::Value *Object = createReferenceTemporary(*this, M, E);
-    if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
-      Object = llvm::ConstantExpr::getBitCast(
-          Var, ConvertTypeForMem(E->getType())->getPointerTo());
+  auto ownership = M->getType().getObjCLifetime();
+  if (ownership != Qualifiers::OCL_None &&
+      ownership != Qualifiers::OCL_ExplicitNone) {
+    Address Object = createReferenceTemporary(*this, M, E);
+    if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
+      Object = Address(llvm::ConstantExpr::getBitCast(Var,
+                           ConvertTypeForMem(E->getType())
+                             ->getPointerTo(Object.getAddressSpace())),
+                       Object.getAlignment());
       // We should not have emitted the initializer for this temporary as a
       // constant.
       assert(!Var->hasInitializer());
       Var->setInitializer(CGM.EmitNullConstant(E->getType()));
     }
-    LValue RefTempDst = MakeAddrLValue(Object, M->getType());
+    LValue RefTempDst = MakeAddrLValue(Object, M->getType(),
+                                       AlignmentSource::Decl);
 
     switch (getEvaluationKind(E->getType())) {
     default: llvm_unreachable("expected scalar or aggregate expression");
@@ -358,8 +375,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
       EmitScalarInit(E, M->getExtendingDecl(), RefTempDst, false);
       break;
     case TEK_Aggregate: {
-      CharUnits Alignment = getContext().getTypeAlignInChars(E->getType());
-      EmitAggExpr(E, AggValueSlot::forAddr(Object, Alignment,
+      EmitAggExpr(E, AggValueSlot::forAddr(Object,
                                            E->getType().getQualifiers(),
                                            AggValueSlot::IsDestructed,
                                            AggValueSlot::DoesNotNeedGCBarriers,
@@ -387,10 +403,11 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
   }
 
   // Create and initialize the reference temporary.
-  llvm::Value *Object = createReferenceTemporary(*this, M, E);
-  if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object)) {
-    Object = llvm::ConstantExpr::getBitCast(
-        Var, ConvertTypeForMem(E->getType())->getPointerTo());
+  Address Object = createReferenceTemporary(*this, M, E);
+  if (auto *Var = dyn_cast<llvm::GlobalVariable>(Object.getPointer())) {
+    Object = Address(llvm::ConstantExpr::getBitCast(
+        Var, ConvertTypeForMem(E->getType())->getPointerTo()),
+                     Object.getAlignment());
     // If the temporary is a global and has a constant initializer or is a
     // constant temporary that we promoted to a global, we may have already
     // initialized it.
@@ -418,7 +435,8 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
       break;
 
     case SubobjectAdjustment::FieldAdjustment: {
-      LValue LV = MakeAddrLValue(Object, E->getType());
+      LValue LV = MakeAddrLValue(Object, E->getType(),
+                                 AlignmentSource::Decl);
       LV = EmitLValueForField(LV, Adjustment.Field);
       assert(LV.isSimple() &&
              "materialized temporary field is not a simple lvalue");
@@ -428,14 +446,14 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) {
 
     case SubobjectAdjustment::MemberPointerAdjustment: {
       llvm::Value *Ptr = EmitScalarExpr(Adjustment.Ptr.RHS);
-      Object = CGM.getCXXABI().EmitMemberDataPointerAddress(
-          *this, E, Object, Ptr, Adjustment.Ptr.MPT);
+      Object = EmitCXXMemberDataPointerAddress(E, Object, Ptr,
+                                               Adjustment.Ptr.MPT);
       break;
     }
     }
   }
 
-  return MakeAddrLValue(Object, M->getType());
+  return MakeAddrLValue(Object, M->getType(), AlignmentSource::Decl);
 }
 
 RValue
@@ -443,7 +461,7 @@ CodeGenFunction::EmitReferenceBindingToExpr(const Expr *E) {
   // Emit the expression as an lvalue.
   LValue LV = EmitLValue(E);
   assert(LV.isSimple());
-  llvm::Value *Value = LV.getAddress();
+  llvm::Value *Value = LV.getPointer();
 
   if (sanitizePerformTypeCheck() && !E->getType()->isFunctionType()) {
     // C++11 [dcl.ref]p5 (as amended by core issue 453):
@@ -487,7 +505,7 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const {
 }
 
 void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
-                                    llvm::Value *Address, QualType Ty,
+                                    llvm::Value *Ptr, QualType Ty,
                                     CharUnits Alignment, bool SkipNullCheck) {
   if (!sanitizePerformTypeCheck())
     return;
@@ -495,7 +513,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
   // Don't check pointers outside the default address space. The null check
   // isn't correct, the object-size check isn't supported by LLVM, and we can't
   // communicate the addresses to the runtime handler for the vptr check.
-  if (Address->getType()->getPointerAddressSpace())
+  if (Ptr->getType()->getPointerAddressSpace())
     return;
 
   SanitizerScope SanScope(this);
@@ -508,8 +526,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
   if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) &&
       !SkipNullCheck) {
     // The glvalue must not be an empty glvalue.
-    llvm::Value *IsNonNull = Builder.CreateICmpNE(
-        Address, llvm::Constant::getNullValue(Address->getType()));
+    llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr);
 
     if (AllowNullPointers) {
       // When performing pointer casts, it's OK if the value is null.
@@ -533,7 +550,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     llvm::Type *Tys[2] = { IntPtrTy, Int8PtrTy };
     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::objectsize, Tys);
     llvm::Value *Min = Builder.getFalse();
-    llvm::Value *CastAddr = Builder.CreateBitCast(Address, Int8PtrTy);
+    llvm::Value *CastAddr = Builder.CreateBitCast(Ptr, Int8PtrTy);
     llvm::Value *LargeEnough =
         Builder.CreateICmpUGE(Builder.CreateCall(F, {CastAddr, Min}),
                               llvm::ConstantInt::get(IntPtrTy, Size));
@@ -550,7 +567,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
     // The glvalue must be suitably aligned.
     if (AlignVal) {
       llvm::Value *Align =
-          Builder.CreateAnd(Builder.CreatePtrToInt(Address, IntPtrTy),
+          Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy),
                             llvm::ConstantInt::get(IntPtrTy, AlignVal - 1));
       llvm::Value *Aligned =
         Builder.CreateICmpEQ(Align, llvm::ConstantInt::get(IntPtrTy, 0));
@@ -565,7 +582,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
       llvm::ConstantInt::get(SizeTy, AlignVal),
       llvm::ConstantInt::get(Int8Ty, TCK)
     };
-    EmitCheck(Checks, "type_mismatch", StaticData, Address);
+    EmitCheck(Checks, "type_mismatch", StaticData, Ptr);
   }
 
   // If possible, check that the vptr indicates that there is a subobject of
@@ -600,7 +617,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
       // Load the vptr, and compute hash_16_bytes(TypeHash, vptr).
       llvm::Value *Low = llvm::ConstantInt::get(Int64Ty, TypeHash);
       llvm::Type *VPtrTy = llvm::PointerType::get(IntPtrTy, 0);
-      llvm::Value *VPtrAddr = Builder.CreateBitCast(Address, VPtrTy);
+      Address VPtrAddr(Builder.CreateBitCast(Ptr, VPtrTy), getPointerAlign());
       llvm::Value *VPtrVal = Builder.CreateLoad(VPtrAddr);
       llvm::Value *High = Builder.CreateZExt(VPtrVal, Int64Ty);
 
@@ -617,7 +634,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
                                                                    CacheSize-1));
       llvm::Value *Indices[] = { Builder.getInt32(0), Slot };
       llvm::Value *CacheVal =
-        Builder.CreateLoad(Builder.CreateInBoundsGEP(Cache, Indices));
+        Builder.CreateAlignedLoad(Builder.CreateInBoundsGEP(Cache, Indices),
+                                  getPointerAlign());
 
       // If the hash isn't in the cache, call a runtime handler to perform the
       // hard work of checking whether the vptr is for an object of the right
@@ -630,7 +648,7 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
         CGM.GetAddrOfRTTIDescriptor(Ty.getUnqualifiedType()),
         llvm::ConstantInt::get(Int8Ty, TCK)
       };
-      llvm::Value *DynamicData[] = { Address, Hash };
+      llvm::Value *DynamicData[] = { Ptr, Hash };
       EmitCheck(std::make_pair(EqualHash, SanitizerKind::Vptr),
                 "dynamic_type_cache_miss", StaticData, DynamicData);
     }
@@ -758,10 +776,104 @@ EmitComplexPrePostIncDec(const UnaryOperator *E, LValue LV,
   return isPre ? IncVal : InVal;
 }
 
+void CodeGenModule::EmitExplicitCastExprType(const ExplicitCastExpr *E,
+                                             CodeGenFunction *CGF) {
+  // Bind VLAs in the cast type.
+  if (CGF && E->getType()->isVariablyModifiedType())
+    CGF->EmitVariablyModifiedType(E->getType());
+
+  if (CGDebugInfo *DI = getModuleDebugInfo())
+    DI->EmitExplicitCastType(E->getType());
+}
+
 //===----------------------------------------------------------------------===//
 //                         LValue Expression Emission
 //===----------------------------------------------------------------------===//
 
+/// EmitPointerWithAlignment - Given an expression of pointer type, try to
+/// derive a more accurate bound on the alignment of the pointer.
+Address CodeGenFunction::EmitPointerWithAlignment(const Expr *E,
+                                                  AlignmentSource  *Source) {
+  // We allow this with ObjC object pointers because of fragile ABIs.
+  assert(E->getType()->isPointerType() ||
+         E->getType()->isObjCObjectPointerType());
+  E = E->IgnoreParens();
+
+  // Casts:
+  if (const CastExpr *CE = dyn_cast<CastExpr>(E)) {
+    if (const auto *ECE = dyn_cast<ExplicitCastExpr>(CE))
+      CGM.EmitExplicitCastExprType(ECE, this);
+
+    switch (CE->getCastKind()) {
+    // Non-converting casts (but not C's implicit conversion from void*).
+    case CK_BitCast:
+    case CK_NoOp:
+      if (auto PtrTy = CE->getSubExpr()->getType()->getAs<PointerType>()) {
+        if (PtrTy->getPointeeType()->isVoidType())
+          break;
+
+        AlignmentSource InnerSource;
+        Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), &InnerSource);
+        if (Source) *Source = InnerSource;
+
+        // If this is an explicit bitcast, and the source l-value is
+        // opaque, honor the alignment of the casted-to type.
+        if (isa<ExplicitCastExpr>(CE) &&
+            InnerSource != AlignmentSource::Decl) {
+          Addr = Address(Addr.getPointer(),
+                         getNaturalPointeeTypeAlignment(E->getType(), Source));
+        }
+
+        if (SanOpts.has(SanitizerKind::CFIUnrelatedCast)) {
+          if (auto PT = E->getType()->getAs<PointerType>())
+            EmitVTablePtrCheckForCast(PT->getPointeeType(), Addr.getPointer(),
+                                      /*MayBeNull=*/true,
+                                      CodeGenFunction::CFITCK_UnrelatedCast,
+                                      CE->getLocStart());
+        }
+
+        return Builder.CreateBitCast(Addr, ConvertType(E->getType()));
+      }
+      break;
+
+    // Array-to-pointer decay.
+    case CK_ArrayToPointerDecay:
+      return EmitArrayToPointerDecay(CE->getSubExpr(), Source);
+
+    // Derived-to-base conversions.
+    case CK_UncheckedDerivedToBase:
+    case CK_DerivedToBase: {
+      Address Addr = EmitPointerWithAlignment(CE->getSubExpr(), Source);
+      auto Derived = CE->getSubExpr()->getType()->getPointeeCXXRecordDecl();
+      return GetAddressOfBaseClass(Addr, Derived,
+                                   CE->path_begin(), CE->path_end(),
+                                   ShouldNullCheckClassCastValue(CE),
+                                   CE->getExprLoc());
+    }
+
+    // TODO: Is there any reason to treat base-to-derived conversions
+    // specially?
+    default:
+      break;
+    }
+  }
+
+  // Unary &.
+  if (const UnaryOperator *UO = dyn_cast<UnaryOperator>(E)) {
+    if (UO->getOpcode() == UO_AddrOf) {
+      LValue LV = EmitLValue(UO->getSubExpr());
+      if (Source) *Source = LV.getAlignmentSource();
+      return LV.getAddress();
+    }
+  }
+
+  // TODO: conditional operators, comma.
+
+  // Otherwise, use the alignment of the type.
+  CharUnits Align = getNaturalPointeeTypeAlignment(E->getType(), Source);
+  return Address(EmitScalarExpr(E), Align);
+}
+
 RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   if (Ty->isVoidType())
     return RValue::get(nullptr);
@@ -778,7 +890,7 @@ RValue CodeGenFunction::GetUndefRValue(QualType Ty) {
   // identifiable address.  Just because the contents of the value are undefined
   // doesn't mean that the address can't be taken and compared.
   case TEK_Aggregate: {
-    llvm::Value *DestPtr = CreateMemTemp(Ty, "undef.agg.tmp");
+    Address DestPtr = CreateMemTemp(Ty, "undef.agg.tmp");
     return RValue::getAggregate(DestPtr);
   }
 
@@ -798,7 +910,8 @@ LValue CodeGenFunction::EmitUnsupportedLValue(const Expr *E,
                                               const char *Name) {
   ErrorUnsupported(E, Name);
   llvm::Type *Ty = llvm::PointerType::getUnqual(ConvertType(E->getType()));
-  return MakeAddrLValue(llvm::UndefValue::get(Ty), E->getType());
+  return MakeAddrLValue(Address(llvm::UndefValue::get(Ty), CharUnits::One()),
+                        E->getType());
 }
 
 LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
@@ -808,7 +921,7 @@ LValue CodeGenFunction::EmitCheckedLValue(const Expr *E, TypeCheckKind TCK) {
   else
     LV = EmitLValue(E);
   if (!isa<DeclRefExpr>(E) && !LV.isBitField() && LV.isSimple())
-    EmitTypeCheck(TCK, E->getExprLoc(), LV.getAddress(),
+    EmitTypeCheck(TCK, E->getExprLoc(), LV.getPointer(),
                   E->getType(), LV.getAlignment());
   return LV;
 }
@@ -909,6 +1022,8 @@ LValue CodeGenFunction::EmitLValue(const Expr *E) {
     return EmitUnaryOpLValue(cast<UnaryOperator>(E));
   case Expr::ArraySubscriptExprClass:
     return EmitArraySubscriptExpr(cast<ArraySubscriptExpr>(E));
+  case Expr::OMPArraySectionExprClass:
+    return EmitOMPArraySectionExpr(cast<OMPArraySectionExpr>(E));
   case Expr::ExtVectorElementExprClass:
     return EmitExtVectorElementExpr(cast<ExtVectorElementExpr>(E));
   case Expr::MemberExprClass:
@@ -1055,9 +1170,10 @@ CodeGenFunction::tryEmitAsConstant(DeclRefExpr *refExpr) {
 llvm::Value *CodeGenFunction::EmitLoadOfScalar(LValue lvalue,
                                                SourceLocation Loc) {
   return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
-                          lvalue.getAlignment().getQuantity(),
-                          lvalue.getType(), Loc, lvalue.getTBAAInfo(),
-                          lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
+                          lvalue.getType(), Loc, lvalue.getAlignmentSource(),
+                          lvalue.getTBAAInfo(),
+                          lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
+                          lvalue.isNontemporal());
 }
 
 static bool hasBooleanRepresentation(QualType Ty) {
@@ -1117,68 +1233,56 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) {
   return MDHelper.createRange(Min, End);
 }
 
-llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
-                                               unsigned Alignment, QualType Ty,
+llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile,
+                                               QualType Ty,
                                                SourceLocation Loc,
+                                               AlignmentSource AlignSource,
                                                llvm::MDNode *TBAAInfo,
                                                QualType TBAABaseType,
-                                               uint64_t TBAAOffset) {
+                                               uint64_t TBAAOffset,
+                                               bool isNontemporal) {
   // For better performance, handle vector loads differently.
   if (Ty->isVectorType()) {
-    llvm::Value *V;
-    const llvm::Type *EltTy =
-    cast<llvm::PointerType>(Addr->getType())->getElementType();
+    const llvm::Type *EltTy = Addr.getElementType();
 
     const auto *VTy = cast<llvm::VectorType>(EltTy);
 
-    // Handle vectors of size 3, like size 4 for better performance.
+    // Handle vectors of size 3 like size 4 for better performance.
     if (VTy->getNumElements() == 3) {
 
       // Bitcast to vec4 type.
       llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
                                                          4);
-      llvm::PointerType *ptVec4Ty =
-      llvm::PointerType::get(vec4Ty,
-                             (cast<llvm::PointerType>(
-                                      Addr->getType()))->getAddressSpace());
-      llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty,
-                                                "castToVec4");
+      Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
       // Now load value.
-      llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+      llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
 
       // Shuffle vector to get vec3.
-      llvm::Constant *Mask[] = {
-        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0),
-        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1),
-        llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)
-      };
-
-      llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
-      V = Builder.CreateShuffleVector(LoadVal,
-                                      llvm::UndefValue::get(vec4Ty),
-                                      MaskV, "extractVec");
+      V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
+                                      {0, 1, 2}, "extractVec");
       return EmitFromMemory(V, Ty);
     }
   }
 
   // Atomic operations have to be done on integral types.
   if (Ty->isAtomicType() || typeIsSuitableForInlineAtomic(Ty, Volatile)) {
-    LValue lvalue = LValue::MakeAddr(Addr, Ty,
-                                     CharUnits::fromQuantity(Alignment),
-                                     getContext(), TBAAInfo);
+    LValue lvalue =
+      LValue::MakeAddr(Addr, Ty, getContext(), AlignSource, TBAAInfo);
     return EmitAtomicLoad(lvalue, Loc).getScalarVal();
   }
 
-  llvm::LoadInst *Load = Builder.CreateLoad(Addr);
-  if (Volatile)
-    Load->setVolatile(true);
-  if (Alignment)
-    Load->setAlignment(Alignment);
+  llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
+  if (isNontemporal) {
+    llvm::MDNode *Node = llvm::MDNode::get(
+        Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+    Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+  }
   if (TBAAInfo) {
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
     if (TBAAPath)
-      CGM.DecorateInstruction(Load, TBAAPath, false/*ConvertTypeToTag*/);
+      CGM.DecorateInstructionWithTBAA(Load, TBAAPath,
+                                      false /*ConvertTypeToTag*/);
   }
 
   bool NeedsBoolCheck =
@@ -1241,11 +1345,13 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
   return Value;
 }
 
-void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
-                                        bool Volatile, unsigned Alignment,
-                                        QualType Ty, llvm::MDNode *TBAAInfo,
+void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr,
+                                        bool Volatile, QualType Ty,
+                                        AlignmentSource AlignSource,
+                                        llvm::MDNode *TBAAInfo,
                                         bool isInit, QualType TBAABaseType,
-                                        uint64_t TBAAOffset) {
+                                        uint64_t TBAAOffset,
+                                        bool isNontemporal) {
 
   // Handle vectors differently to get better performance.
   if (Ty->isVectorType()) {
@@ -1253,29 +1359,18 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
     auto *VecTy = cast<llvm::VectorType>(SrcTy);
     // Handle vec3 special.
     if (VecTy->getNumElements() == 3) {
-      llvm::LLVMContext &VMContext = getLLVMContext();
-
       // Our source is a vec3, do a shuffle vector to make it a vec4.
-      SmallVector<llvm::Constant*, 4> Mask;
-      Mask.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
-                                            0));
-      Mask.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
-                                            1));
-      Mask.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(VMContext),
-                                            2));
-      Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext)));
-
+      llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
+                                Builder.getInt32(2),
+                                llvm::UndefValue::get(Builder.getInt32Ty())};
       llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
       Value = Builder.CreateShuffleVector(Value,
                                           llvm::UndefValue::get(VecTy),
                                           MaskV, "extractVec");
       SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
     }
-    auto *DstPtr = cast<llvm::PointerType>(Addr->getType());
-    if (DstPtr->getElementType() != SrcTy) {
-      llvm::Type *MemTy =
-      llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
-      Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp");
+    if (Addr.getElementType() != SrcTy) {
+      Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
     }
   }
 
@@ -1284,30 +1379,34 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
   if (Ty->isAtomicType() ||
       (!isInit && typeIsSuitableForInlineAtomic(Ty, Volatile))) {
     EmitAtomicStore(RValue::get(Value),
-                    LValue::MakeAddr(Addr, Ty,
-                                     CharUnits::fromQuantity(Alignment),
-                                     getContext(), TBAAInfo),
+                    LValue::MakeAddr(Addr, Ty, getContext(),
+                                     AlignSource, TBAAInfo),
                     isInit);
     return;
   }
 
   llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
-  if (Alignment)
-    Store->setAlignment(Alignment);
+  if (isNontemporal) {
+    llvm::MDNode *Node =
+        llvm::MDNode::get(Store->getContext(),
+                          llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+    Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+  }
   if (TBAAInfo) {
     llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
                                                       TBAAOffset);
     if (TBAAPath)
-      CGM.DecorateInstruction(Store, TBAAPath, false/*ConvertTypeToTag*/);
+      CGM.DecorateInstructionWithTBAA(Store, TBAAPath,
+                                      false /*ConvertTypeToTag*/);
   }
 }
 
 void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
                                         bool isInit) {
   EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
-                    lvalue.getAlignment().getQuantity(), lvalue.getType(),
+                    lvalue.getType(), lvalue.getAlignmentSource(),
                     lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
-                    lvalue.getTBAAOffset());
+                    lvalue.getTBAAOffset(), lvalue.isNontemporal());
 }
 
 /// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
@@ -1316,11 +1415,17 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *value, LValue lvalue,
 RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
   if (LV.isObjCWeak()) {
     // load of a __weak object.
-    llvm::Value *AddrWeakObj = LV.getAddress();
+    Address AddrWeakObj = LV.getAddress();
     return RValue::get(CGM.getObjCRuntime().EmitObjCWeakRead(*this,
                                                              AddrWeakObj));
   }
   if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) {
+    // In MRC mode, we do a load+autorelease.
+    if (!getLangOpts().ObjCAutoRefCount) {
+      return RValue::get(EmitARCLoadWeak(LV.getAddress()));
+    }
+
+    // In ARC mode, we load retained and then consume the value.
     llvm::Value *Object = EmitARCLoadWeakRetained(LV.getAddress());
     Object = EmitObjCConsumeObject(LV.getType(), Object);
     return RValue::get(Object);
@@ -1334,9 +1439,8 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
   }
 
   if (LV.isVectorElt()) {
-    llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddr(),
+    llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(),
                                               LV.isVolatileQualified());
-    Load->setAlignment(LV.getAlignment().getQuantity());
     return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(),
                                                     "vecext"));
   }
@@ -1356,15 +1460,12 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) {
 
 RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
   const CGBitFieldInfo &Info = LV.getBitFieldInfo();
-  CharUnits Align = LV.getAlignment().alignmentAtOffset(Info.StorageOffset);
 
   // Get the output type.
   llvm::Type *ResLTy = ConvertType(LV.getType());
 
-  llvm::Value *Ptr = LV.getBitFieldAddr();
-  llvm::Value *Val = Builder.CreateAlignedLoad(Ptr, Align.getQuantity(),
-                                               LV.isVolatileQualified(),
-                                               "bf.load");
+  Address Ptr = LV.getBitFieldAddress();
+  llvm::Value *Val = Builder.CreateLoad(Ptr, LV.isVolatileQualified(), "bf.load");
 
   if (Info.IsSigned) {
     assert(static_cast<unsigned>(Info.Offset + Info.Size) <= Info.StorageSize);
@@ -1389,10 +1490,8 @@ RValue CodeGenFunction::EmitLoadOfBitfieldLValue(LValue LV) {
 // If this is a reference to a subset of the elements of a vector, create an
 // appropriate shufflevector.
 RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
-  llvm::LoadInst *Load = Builder.CreateLoad(LV.getExtVectorAddr(),
-                                            LV.isVolatileQualified());
-  Load->setAlignment(LV.getAlignment().getQuantity());
-  llvm::Value *Vec = Load;
+  llvm::Value *Vec = Builder.CreateLoad(LV.getExtVectorAddress(),
+                                        LV.isVolatileQualified());
 
   const llvm::Constant *Elts = LV.getExtVectorElts();
 
@@ -1419,24 +1518,24 @@ RValue CodeGenFunction::EmitLoadOfExtVectorElementLValue(LValue LV) {
 }
 
 /// @brief Generates lvalue for partial ext_vector access.
-llvm::Value *CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
-  llvm::Value *VectorAddress = LV.getExtVectorAddr();
+Address CodeGenFunction::EmitExtVectorElementLValue(LValue LV) {
+  Address VectorAddress = LV.getExtVectorAddress();
   const VectorType *ExprVT = LV.getType()->getAs<VectorType>();
   QualType EQT = ExprVT->getElementType();
   llvm::Type *VectorElementTy = CGM.getTypes().ConvertType(EQT);
-  llvm::Type *VectorElementPtrToTy = VectorElementTy->getPointerTo();
   
-  llvm::Value *CastToPointerElement =
-    Builder.CreateBitCast(VectorAddress,
-                          VectorElementPtrToTy, "conv.ptr.element");
+  Address CastToPointerElement =
+    Builder.CreateElementBitCast(VectorAddress, VectorElementTy,
+                                 "conv.ptr.element");
   
   const llvm::Constant *Elts = LV.getExtVectorElts();
   unsigned ix = getAccessedFieldNo(0, Elts);
   
-  llvm::Value *VectorBasePtrPlusIx =
-    Builder.CreateInBoundsGEP(CastToPointerElement,
-                              llvm::ConstantInt::get(SizeTy, ix), "add.ptr");
-  
+  Address VectorBasePtrPlusIx =
+    Builder.CreateConstInBoundsGEP(CastToPointerElement, ix,
+                                   getContext().getTypeSizeInChars(EQT),
+                                   "vector.elt");
+
   return VectorBasePtrPlusIx;
 }
 
@@ -1471,15 +1570,12 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
   if (!Dst.isSimple()) {
     if (Dst.isVectorElt()) {
       // Read/modify/write the vector, inserting the new element.
-      llvm::LoadInst *Load = Builder.CreateLoad(Dst.getVectorAddr(),
-                                                Dst.isVolatileQualified());
-      Load->setAlignment(Dst.getAlignment().getQuantity());
-      llvm::Value *Vec = Load;
+      llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
+                                            Dst.isVolatileQualified());
       Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
                                         Dst.getVectorIdx(), "vecins");
-      llvm::StoreInst *Store = Builder.CreateStore(Vec, Dst.getVectorAddr(),
-                                                   Dst.isVolatileQualified());
-      Store->setAlignment(Dst.getAlignment().getQuantity());
+      Builder.CreateStore(Vec, Dst.getVectorAddress(),
+                          Dst.isVolatileQualified());
       return;
     }
 
@@ -1523,7 +1619,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 
   if (Dst.isObjCWeak() && !Dst.isNonGC()) {
     // load of a __weak object.
-    llvm::Value *LvalueDst = Dst.getAddress();
+    Address LvalueDst = Dst.getAddress();
     llvm::Value *src = Src.getScalarVal();
      CGM.getObjCRuntime().EmitObjCWeakAssign(*this, src, LvalueDst);
     return;
@@ -1531,16 +1627,17 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 
   if (Dst.isObjCStrong() && !Dst.isNonGC()) {
     // load of a __strong object.
-    llvm::Value *LvalueDst = Dst.getAddress();
+    Address LvalueDst = Dst.getAddress();
     llvm::Value *src = Src.getScalarVal();
     if (Dst.isObjCIvar()) {
       assert(Dst.getBaseIvarExp() && "BaseIvarExp is NULL");
-      llvm::Type *ResultType = ConvertType(getContext().LongTy);
-      llvm::Value *RHS = EmitScalarExpr(Dst.getBaseIvarExp());
-      llvm::Value *dst = RHS;
+      llvm::Type *ResultType = IntPtrTy;
+      Address dst = EmitPointerWithAlignment(Dst.getBaseIvarExp());
+      llvm::Value *RHS = dst.getPointer();
       RHS = Builder.CreatePtrToInt(RHS, ResultType, "sub.ptr.rhs.cast");
       llvm::Value *LHS =
-        Builder.CreatePtrToInt(LvalueDst, ResultType, "sub.ptr.lhs.cast");
+        Builder.CreatePtrToInt(LvalueDst.getPointer(), ResultType,
+                               "sub.ptr.lhs.cast");
       llvm::Value *BytesBetween = Builder.CreateSub(LHS, RHS, "ivar.offset");
       CGM.getObjCRuntime().EmitObjCIvarAssign(*this, src, dst,
                                               BytesBetween);
@@ -1560,16 +1657,14 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst,
 void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
                                                      llvm::Value **Result) {
   const CGBitFieldInfo &Info = Dst.getBitFieldInfo();
-  CharUnits Align = Dst.getAlignment().alignmentAtOffset(Info.StorageOffset);
   llvm::Type *ResLTy = ConvertTypeForMem(Dst.getType());
-  llvm::Value *Ptr = Dst.getBitFieldAddr();
+  Address Ptr = Dst.getBitFieldAddress();
 
   // Get the source value, truncated to the width of the bit-field.
   llvm::Value *SrcVal = Src.getScalarVal();
 
   // Cast the source to the storage type and shift it into place.
-  SrcVal = Builder.CreateIntCast(SrcVal,
-                                 Ptr->getType()->getPointerElementType(),
+  SrcVal = Builder.CreateIntCast(SrcVal, Ptr.getElementType(),
                                  /*IsSigned=*/false);
   llvm::Value *MaskedVal = SrcVal;
 
@@ -1577,9 +1672,8 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
   // and mask together with source before storing.
   if (Info.StorageSize != Info.Size) {
     assert(Info.StorageSize > Info.Size && "Invalid bitfield size.");
-    llvm::Value *Val = Builder.CreateAlignedLoad(Ptr, Align.getQuantity(),
-                                                 Dst.isVolatileQualified(),
-                                                 "bf.load");
+    llvm::Value *Val =
+      Builder.CreateLoad(Ptr, Dst.isVolatileQualified(), "bf.load");
 
     // Mask the source value as needed.
     if (!hasBooleanRepresentation(Dst.getType()))
@@ -1605,8 +1699,7 @@ void CodeGenFunction::EmitStoreThroughBitfieldLValue(RValue Src, LValue Dst,
   }
 
   // Write the new value back out.
-  Builder.CreateAlignedStore(SrcVal, Ptr, Align.getQuantity(),
-                             Dst.isVolatileQualified());
+  Builder.CreateStore(SrcVal, Ptr, Dst.isVolatileQualified());
 
   // Return the new value of the bit-field, if requested.
   if (Result) {
@@ -1632,10 +1725,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
                                                                LValue Dst) {
   // This access turns into a read/modify/write of the vector.  Load the input
   // value now.
-  llvm::LoadInst *Load = Builder.CreateLoad(Dst.getExtVectorAddr(),
-                                            Dst.isVolatileQualified());
-  Load->setAlignment(Dst.getAlignment().getQuantity());
-  llvm::Value *Vec = Load;
+  llvm::Value *Vec = Builder.CreateLoad(Dst.getExtVectorAddress(),
+                                        Dst.isVolatileQualified());
   const llvm::Constant *Elts = Dst.getExtVectorElts();
 
   llvm::Value *SrcVal = Src.getScalarVal();
@@ -1697,9 +1788,8 @@ void CodeGenFunction::EmitStoreThroughExtVectorComponentLValue(RValue Src,
     Vec = Builder.CreateInsertElement(Vec, SrcVal, Elt);
   }
 
-  llvm::StoreInst *Store = Builder.CreateStore(Vec, Dst.getExtVectorAddr(),
-                                               Dst.isVolatileQualified());
-  Store->setAlignment(Dst.getAlignment().getQuantity());
+  Builder.CreateStore(Vec, Dst.getExtVectorAddress(),
+                      Dst.isVolatileQualified());
 }
 
 /// @brief Store of global named registers are always calls to intrinsics.
@@ -1834,11 +1924,27 @@ EmitBitCastOfLValueToProperType(CodeGenFunction &CGF,
 }
 
 static LValue EmitThreadPrivateVarDeclLValue(
-    CodeGenFunction &CGF, const VarDecl *VD, QualType T, llvm::Value *V,
-    llvm::Type *RealVarTy, CharUnits Alignment, SourceLocation Loc) {
-  V = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, V, Loc);
-  V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
-  return CGF.MakeAddrLValue(V, T, Alignment);
+    CodeGenFunction &CGF, const VarDecl *VD, QualType T, Address Addr,
+    llvm::Type *RealVarTy, SourceLocation Loc) {
+  Addr = CGF.CGM.getOpenMPRuntime().getAddrOfThreadPrivate(CGF, VD, Addr, Loc);
+  Addr = CGF.Builder.CreateElementBitCast(Addr, RealVarTy);
+  return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
+}
+
+Address CodeGenFunction::EmitLoadOfReference(Address Addr,
+                                             const ReferenceType *RefTy,
+                                             AlignmentSource *Source) {
+  llvm::Value *Ptr = Builder.CreateLoad(Addr);
+  return Address(Ptr, getNaturalTypeAlignment(RefTy->getPointeeType(),
+                                              Source, /*forPointee*/ true));
+  
+}
+
+LValue CodeGenFunction::EmitLoadOfReferenceLValue(Address RefAddr,
+                                                  const ReferenceType *RefTy) {
+  AlignmentSource Source;
+  Address Addr = EmitLoadOfReference(RefAddr, RefTy, &Source);
+  return MakeAddrLValue(Addr, RefTy->getPointeeType(), Source);
 }
 
 static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
@@ -1854,19 +1960,17 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
   llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType());
   V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy);
   CharUnits Alignment = CGF.getContext().getDeclAlign(VD);
+  Address Addr(V, Alignment);
   LValue LV;
   // Emit reference to the private copy of the variable if it is an OpenMP
   // threadprivate variable.
   if (CGF.getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
-    return EmitThreadPrivateVarDeclLValue(CGF, VD, T, V, RealVarTy, Alignment,
+    return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy,
                                           E->getExprLoc());
-  if (VD->getType()->isReferenceType()) {
-    llvm::LoadInst *LI = CGF.Builder.CreateLoad(V);
-    LI->setAlignment(Alignment.getQuantity());
-    V = LI;
-    LV = CGF.MakeNaturalAlignAddrLValue(V, T);
+  if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
+    LV = CGF.EmitLoadOfReferenceLValue(Addr, RefTy);
   } else {
-    LV = CGF.MakeAddrLValue(V, T, Alignment);
+    LV = CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl);
   }
   setObjCGCLValueClass(CGF.getContext(), E, LV);
   return LV;
@@ -1888,7 +1992,7 @@ static LValue EmitFunctionDeclLValue(CodeGenFunction &CGF,
     }
   }
   CharUnits Alignment = CGF.getContext().getDeclAlign(FD);
-  return CGF.MakeAddrLValue(V, E->getType(), Alignment);
+  return CGF.MakeAddrLValue(V, E->getType(), Alignment, AlignmentSource::Decl);
 }
 
 static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
@@ -1904,9 +2008,7 @@ static LValue EmitCapturedFieldLValue(CodeGenFunction &CGF, const FieldDecl *FD,
 /// So far, only the name is being passed down, but other options such as
 /// register type, allocation type or even optimization options could be
 /// passed down via the metadata node.
-static LValue EmitGlobalNamedRegister(const VarDecl *VD,
-                                      CodeGenModule &CGM,
-                                      CharUnits Alignment) {
+static LValue EmitGlobalNamedRegister(const VarDecl *VD, CodeGenModule &CGM) {
   SmallString<64> Name("llvm.named.register.");
   AsmLabelAttr *Asm = VD->getAttr<AsmLabelAttr>();
   assert(Asm->getLabel().size() < 64-Name.size() &&
@@ -1920,33 +2022,43 @@ static LValue EmitGlobalNamedRegister(const VarDecl *VD,
     llvm::Metadata *Ops[] = {Str};
     M->addOperand(llvm::MDNode::get(CGM.getLLVMContext(), Ops));
   }
-  return LValue::MakeGlobalReg(
-      llvm::MetadataAsValue::get(CGM.getLLVMContext(), M->getOperand(0)),
-      VD->getType(), Alignment);
+
+  CharUnits Alignment = CGM.getContext().getDeclAlign(VD);
+
+  llvm::Value *Ptr =
+    llvm::MetadataAsValue::get(CGM.getLLVMContext(), M->getOperand(0));
+  return LValue::MakeGlobalReg(Address(Ptr, Alignment), VD->getType());
 }
 
 LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
   const NamedDecl *ND = E->getDecl();
-  CharUnits Alignment = getContext().getDeclAlign(ND);
   QualType T = E->getType();
 
   if (const auto *VD = dyn_cast<VarDecl>(ND)) {
     // Global Named registers access via intrinsics only
     if (VD->getStorageClass() == SC_Register &&
         VD->hasAttr<AsmLabelAttr>() && !VD->isLocalVarDecl())
-      return EmitGlobalNamedRegister(VD, CGM, Alignment);
+      return EmitGlobalNamedRegister(VD, CGM);
 
     // A DeclRefExpr for a reference initialized by a constant expression can
     // appear without being odr-used. Directly emit the constant initializer.
     const Expr *Init = VD->getAnyInitializer(VD);
     if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() &&
         VD->isUsableInConstantExpressions(getContext()) &&
-        VD->checkInitIsICE()) {
+        VD->checkInitIsICE() &&
+        // Do not emit if it is private OpenMP variable.
+        !(E->refersToEnclosingVariableOrCapture() && CapturedStmtInfo &&
+          LocalDeclMap.count(VD))) {
       llvm::Constant *Val =
         CGM.EmitConstantValue(*VD->evaluateValue(), VD->getType(), this);
       assert(Val && "failed to emit reference constant expression");
       // FIXME: Eventually we will want to emit vector element references.
-      return MakeAddrLValue(Val, T, Alignment);
+
+      // Should we be using the alignment of the constant pointer we emitted?
+      CharUnits Alignment = getNaturalTypeAlignment(E->getType(), nullptr,
+                                                    /*pointee*/ true);
+
+      return MakeAddrLValue(Address(Val, Alignment), T, AlignmentSource::Decl);
     }
 
     // Check for captured variables.
@@ -1954,15 +2066,24 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
       if (auto *FD = LambdaCaptureFields.lookup(VD))
         return EmitCapturedFieldLValue(*this, FD, CXXABIThisValue);
       else if (CapturedStmtInfo) {
-        if (auto *V = LocalDeclMap.lookup(VD))
-          return MakeAddrLValue(V, T, Alignment);
-        else
-          return EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
-                                         CapturedStmtInfo->getContextValue());
+        auto it = LocalDeclMap.find(VD);
+        if (it != LocalDeclMap.end()) {
+          if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
+            return EmitLoadOfReferenceLValue(it->second, RefTy);
+          }
+          return MakeAddrLValue(it->second, T);
+        }
+        LValue CapLVal =
+            EmitCapturedFieldLValue(*this, CapturedStmtInfo->lookup(VD),
+                                    CapturedStmtInfo->getContextValue());
+        return MakeAddrLValue(
+            Address(CapLVal.getPointer(), getContext().getDeclAlign(VD)),
+            CapLVal.getType(), AlignmentSource::Decl);
       }
+
       assert(isa<BlockDecl>(CurCodeDecl));
-      return MakeAddrLValue(GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>()),
-                            T, Alignment);
+      Address addr = GetAddrOfBlockDecl(VD, VD->hasAttr<BlocksAttr>());
+      return MakeAddrLValue(addr, T, AlignmentSource::Decl);
     }
   }
 
@@ -1975,8 +2096,8 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
 
   if (ND->hasAttr<WeakRefAttr>()) {
     const auto *VD = cast<ValueDecl>(ND);
-    llvm::Constant *Aliasee = CGM.GetWeakRefReference(VD);
-    return MakeAddrLValue(Aliasee, T, Alignment);
+    ConstantAddress Aliasee = CGM.GetWeakRefReference(VD);
+    return MakeAddrLValue(Aliasee, T, AlignmentSource::Decl);
   }
 
   if (const auto *VD = dyn_cast<VarDecl>(ND)) {
@@ -1984,39 +2105,52 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) {
     if (VD->hasLinkage() || VD->isStaticDataMember())
       return EmitGlobalVarDeclLValue(*this, E, VD);
 
-    bool isBlockVariable = VD->hasAttr<BlocksAttr>();
+    Address addr = Address::invalid();
 
-    llvm::Value *V = LocalDeclMap.lookup(VD);
-    if (!V && VD->isStaticLocal())
-      V = CGM.getOrCreateStaticVarDecl(
-          *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false));
+    // The variable should generally be present in the local decl map.
+    auto iter = LocalDeclMap.find(VD);
+    if (iter != LocalDeclMap.end()) {
+      addr = iter->second;
 
-    // Check if variable is threadprivate.
-    if (V && getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>())
-      return EmitThreadPrivateVarDeclLValue(
-          *this, VD, T, V, getTypes().ConvertTypeForMem(VD->getType()),
-          Alignment, E->getExprLoc());
+    // Otherwise, it might be static local we haven't emitted yet for
+    // some reason; most likely, because it's in an outer function.
+    } else if (VD->isStaticLocal()) {
+      addr = Address(CGM.getOrCreateStaticVarDecl(
+          *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)),
+                     getContext().getDeclAlign(VD));
 
-    assert(V && "DeclRefExpr not entered in LocalDeclMap?");
-
-    if (isBlockVariable)
-      V = BuildBlockByrefAddress(V, VD);
-
-    LValue LV;
-    if (VD->getType()->isReferenceType()) {
-      llvm::LoadInst *LI = Builder.CreateLoad(V);
-      LI->setAlignment(Alignment.getQuantity());
-      V = LI;
-      LV = MakeNaturalAlignAddrLValue(V, T);
+    // No other cases for now.
     } else {
-      LV = MakeAddrLValue(V, T, Alignment);
+      llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?");
+    }
+
+
+    // Check for OpenMP threadprivate variables.
+    if (getLangOpts().OpenMP && VD->hasAttr<OMPThreadPrivateDeclAttr>()) {
+      return EmitThreadPrivateVarDeclLValue(
+          *this, VD, T, addr, getTypes().ConvertTypeForMem(VD->getType()),
+          E->getExprLoc());
+    }
+
+    // Drill into block byref variables.
+    bool isBlockByref = VD->hasAttr<BlocksAttr>();
+    if (isBlockByref) {
+      addr = emitBlockByrefAddress(addr, VD);
+    }
+
+    // Drill into reference types.
+    LValue LV;
+    if (auto RefTy = VD->getType()->getAs<ReferenceType>()) {
+      LV = EmitLoadOfReferenceLValue(addr, RefTy);
+    } else {
+      LV = MakeAddrLValue(addr, T, AlignmentSource::Decl);
     }
 
     bool isLocalStorage = VD->hasLocalStorage();
 
     bool NonGCable = isLocalStorage &&
                      !VD->getType()->isReferenceType() &&
-                     !isBlockVariable;
+                     !isBlockByref;
     if (NonGCable) {
       LV.getQuals().removeObjCGCAttr();
       LV.setNonGC(true);
@@ -2048,7 +2182,9 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
     QualType T = E->getSubExpr()->getType()->getPointeeType();
     assert(!T.isNull() && "CodeGenFunction::EmitUnaryOpLValue: Illegal type");
 
-    LValue LV = MakeNaturalAlignAddrLValue(EmitScalarExpr(E->getSubExpr()), T);
+    AlignmentSource AlignSource;
+    Address Addr = EmitPointerWithAlignment(E->getSubExpr(), &AlignSource);
+    LValue LV = MakeAddrLValue(Addr, T, AlignSource);
     LV.getQuals().setAddressSpace(ExprTy.getAddressSpace());
 
     // We should not generate __weak write barrier on indirect reference
@@ -2065,22 +2201,22 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
   case UO_Imag: {
     LValue LV = EmitLValue(E->getSubExpr());
     assert(LV.isSimple() && "real/imag on non-ordinary l-value");
-    llvm::Value *Addr = LV.getAddress();
 
     // __real is valid on scalars.  This is a faster way of testing that.
     // __imag can only produce an rvalue on scalars.
     if (E->getOpcode() == UO_Real &&
-        !cast<llvm::PointerType>(Addr->getType())
-           ->getElementType()->isStructTy()) {
+        !LV.getAddress().getElementType()->isStructTy()) {
       assert(E->getSubExpr()->getType()->isArithmeticType());
       return LV;
     }
 
     assert(E->getSubExpr()->getType()->isAnyComplexType());
 
-    unsigned Idx = E->getOpcode() == UO_Imag;
-    return MakeAddrLValue(
-        Builder.CreateStructGEP(nullptr, LV.getAddress(), Idx, "idx"), ExprTy);
+    Address Component =
+      (E->getOpcode() == UO_Real
+         ? emitAddrOfRealComponent(LV.getAddress(), LV.getType())
+         : emitAddrOfImagComponent(LV.getAddress(), LV.getType()));
+    return MakeAddrLValue(Component, ExprTy, LV.getAlignmentSource());
   }
   case UO_PreInc:
   case UO_PreDec: {
@@ -2098,12 +2234,12 @@ LValue CodeGenFunction::EmitUnaryOpLValue(const UnaryOperator *E) {
 
 LValue CodeGenFunction::EmitStringLiteralLValue(const StringLiteral *E) {
   return MakeAddrLValue(CGM.GetAddrOfConstantStringFromLiteral(E),
-                        E->getType());
+                        E->getType(), AlignmentSource::Decl);
 }
 
 LValue CodeGenFunction::EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E) {
   return MakeAddrLValue(CGM.GetAddrOfConstantStringFromObjCEncode(E),
-                        E->getType());
+                        E->getType(), AlignmentSource::Decl);
 }
 
 LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
@@ -2116,11 +2252,11 @@ LValue CodeGenFunction::EmitPredefinedLValue(const PredefinedExpr *E) {
       PredefinedExpr::getIdentTypeName(E->getIdentType()), FnName};
   std::string GVName = llvm::join(NameItems, NameItems + 2, ".");
   if (CurCodeDecl && isa<BlockDecl>(CurCodeDecl)) {
-    auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str(), 1);
-    return MakeAddrLValue(C, E->getType());
+    auto C = CGM.GetAddrOfConstantCString(FnName, GVName.c_str());
+    return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
   }
   auto C = CGM.GetAddrOfConstantStringFromLiteral(SL, GVName);
-  return MakeAddrLValue(C, E->getType());
+  return MakeAddrLValue(C, E->getType(), AlignmentSource::Decl);
 }
 
 /// Emit a type description suitable for use by a runtime sanitizer library. The
@@ -2194,9 +2330,9 @@ llvm::Value *CodeGenFunction::EmitCheckValue(llvm::Value *V) {
 
   // Pointers are passed directly, everything else is passed by address.
   if (!V->getType()->isPointerTy()) {
-    llvm::Value *Ptr = CreateTempAlloca(V->getType());
+    Address Ptr = CreateDefaultAlignTempAlloca(V->getType());
     Builder.CreateStore(V, Ptr);
-    V = Ptr;
+    V = Ptr.getPointer();
   }
   return Builder.CreatePtrToInt(V, TargetTy);
 }
@@ -2217,8 +2353,9 @@ llvm::Constant *CodeGenFunction::EmitCheckSourceLocation(SourceLocation Loc) {
   PresumedLoc PLoc = getContext().getSourceManager().getPresumedLoc(Loc);
   if (PLoc.isValid()) {
     auto FilenameGV = CGM.GetAddrOfConstantCString(PLoc.getFilename(), ".src");
-    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(FilenameGV);
-    Filename = FilenameGV;
+    CGM.getSanitizerMetadata()->disableSanitizerForGlobal(
+                          cast<llvm::GlobalVariable>(FilenameGV.getPointer()));
+    Filename = FilenameGV.getPointer();
     Line = PLoc.getLine();
     Column = PLoc.getColumn();
   } else {
@@ -2395,6 +2532,34 @@ void CodeGenFunction::EmitCheck(
   EmitBlock(Cont);
 }
 
+void CodeGenFunction::EmitCfiSlowPathCheck(llvm::Value *Cond,
+                                           llvm::ConstantInt *TypeId,
+                                           llvm::Value *Ptr) {
+  auto &Ctx = getLLVMContext();
+  llvm::BasicBlock *Cont = createBasicBlock("cfi.cont");
+
+  llvm::BasicBlock *CheckBB = createBasicBlock("cfi.slowpath");
+  llvm::BranchInst *BI = Builder.CreateCondBr(Cond, Cont, CheckBB);
+
+  llvm::MDBuilder MDHelper(getLLVMContext());
+  llvm::MDNode *Node = MDHelper.createBranchWeights((1U << 20) - 1, 1);
+  BI->setMetadata(llvm::LLVMContext::MD_prof, Node);
+
+  EmitBlock(CheckBB);
+
+  llvm::Constant *SlowPathFn = CGM.getModule().getOrInsertFunction(
+      "__cfi_slowpath",
+      llvm::FunctionType::get(
+          llvm::Type::getVoidTy(Ctx),
+          {llvm::Type::getInt64Ty(Ctx),
+           llvm::PointerType::getUnqual(llvm::Type::getInt8Ty(Ctx))},
+          false));
+  llvm::CallInst *CheckCall = Builder.CreateCall(SlowPathFn, {TypeId, Ptr});
+  CheckCall->setDoesNotThrow();
+
+  EmitBlock(Cont);
+}
+
 void CodeGenFunction::EmitTrapCheck(llvm::Value *Checked) {
   llvm::BasicBlock *Cont = createBasicBlock("cont");
 
@@ -2426,6 +2591,33 @@ llvm::CallInst *CodeGenFunction::EmitTrapCall(llvm::Intrinsic::ID IntrID) {
   return TrapCall;
 }
 
+Address CodeGenFunction::EmitArrayToPointerDecay(const Expr *E,
+                                                 AlignmentSource *AlignSource) {
+  assert(E->getType()->isArrayType() &&
+         "Array to pointer decay must have array source type!");
+
+  // Expressions of array type can't be bitfields or vector elements.
+  LValue LV = EmitLValue(E);
+  Address Addr = LV.getAddress();
+  if (AlignSource) *AlignSource = LV.getAlignmentSource();
+
+  // If the array type was an incomplete type, we need to make sure
+  // the decay ends up being the right type.
+  llvm::Type *NewTy = ConvertType(E->getType());
+  Addr = Builder.CreateElementBitCast(Addr, NewTy);
+
+  // Note that VLA pointers are always decayed, so we don't need to do
+  // anything here.
+  if (!E->getType()->isVariableArrayType()) {
+    assert(isa<llvm::ArrayType>(Addr.getElementType()) &&
+           "Expected pointer to array");
+    Addr = Builder.CreateStructGEP(Addr, 0, CharUnits::Zero(), "arraydecay");
+  }
+
+  QualType EltType = E->getType()->castAsArrayTypeUnsafe()->getElementType();
+  return Builder.CreateElementBitCast(Addr, ConvertTypeForMem(EltType));
+}
+
 /// isSimpleArrayDecayOperand - If the specified expr is a simple decay from an
 /// array to pointer, return the array subexpression.
 static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
@@ -2442,6 +2634,69 @@ static const Expr *isSimpleArrayDecayOperand(const Expr *E) {
   return SubExpr;
 }
 
+static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF,
+                                          llvm::Value *ptr,
+                                          ArrayRef<llvm::Value*> indices,
+                                          bool inbounds,
+                                    const llvm::Twine &name = "arrayidx") {
+  if (inbounds) {
+    return CGF.Builder.CreateInBoundsGEP(ptr, indices, name);
+  } else {
+    return CGF.Builder.CreateGEP(ptr, indices, name);
+  }
+}
+
+static CharUnits getArrayElementAlign(CharUnits arrayAlign,
+                                      llvm::Value *idx,
+                                      CharUnits eltSize) {
+  // If we have a constant index, we can use the exact offset of the
+  // element we're accessing.
+  if (auto constantIdx = dyn_cast<llvm::ConstantInt>(idx)) {
+    CharUnits offset = constantIdx->getZExtValue() * eltSize;
+    return arrayAlign.alignmentAtOffset(offset);
+
+  // Otherwise, use the worst-case alignment for any element.
+  } else {
+    return arrayAlign.alignmentOfArrayElement(eltSize);
+  }
+}
+
+static QualType getFixedSizeElementType(const ASTContext &ctx,
+                                        const VariableArrayType *vla) {
+  QualType eltType;
+  do {
+    eltType = vla->getElementType();
+  } while ((vla = ctx.getAsVariableArrayType(eltType)));
+  return eltType;
+}
+
+static Address emitArraySubscriptGEP(CodeGenFunction &CGF, Address addr,
+                                     ArrayRef<llvm::Value*> indices,
+                                     QualType eltType, bool inbounds,
+                                     const llvm::Twine &name = "arrayidx") {
+  // All the indices except that last must be zero.
+#ifndef NDEBUG
+  for (auto idx : indices.drop_back())
+    assert(isa<llvm::ConstantInt>(idx) &&
+           cast<llvm::ConstantInt>(idx)->isZero());
+#endif  
+
+  // Determine the element size of the statically-sized base.  This is
+  // the thing that the indices are expressed in terms of.
+  if (auto vla = CGF.getContext().getAsVariableArrayType(eltType)) {
+    eltType = getFixedSizeElementType(CGF.getContext(), vla);
+  }
+
+  // We can use that to compute the best alignment of the element.
+  CharUnits eltSize = CGF.getContext().getTypeSizeInChars(eltType);
+  CharUnits eltAlign =
+    getArrayElementAlign(addr.getAlignment(), indices.back(), eltSize);
+
+  llvm::Value *eltPtr =
+    emitArraySubscriptGEP(CGF, addr.getPointer(), indices, inbounds, name);
+  return Address(eltPtr, eltAlign);
+}
+
 LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
                                                bool Accessed) {
   // The index must always be an integer, which is not an aggregate.  Emit it.
@@ -2460,32 +2715,34 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     LValue LHS = EmitLValue(E->getBase());
     assert(LHS.isSimple() && "Can only subscript lvalue vectors here!");
     return LValue::MakeVectorElt(LHS.getAddress(), Idx,
-                                 E->getBase()->getType(), LHS.getAlignment());
+                                 E->getBase()->getType(),
+                                 LHS.getAlignmentSource());
   }
 
+  // All the other cases basically behave like simple offsetting.
+
   // Extend or truncate the index type to 32 or 64-bits.
   if (Idx->getType() != IntPtrTy)
     Idx = Builder.CreateIntCast(Idx, IntPtrTy, IdxSigned, "idxprom");
 
-  // We know that the pointer points to a type of the correct size, unless the
-  // size is a VLA or Objective-C interface.
-  llvm::Value *Address = nullptr;
-  CharUnits ArrayAlignment;
+  // Handle the extvector case we ignored above.
   if (isa<ExtVectorElementExpr>(E->getBase())) {
     LValue LV = EmitLValue(E->getBase());
-    Address = EmitExtVectorElementLValue(LV);
-    Address = Builder.CreateInBoundsGEP(Address, Idx, "arrayidx");
-    const VectorType *ExprVT = LV.getType()->getAs<VectorType>();
-    QualType EQT = ExprVT->getElementType();
-    return MakeAddrLValue(Address, EQT,
-                          getContext().getTypeAlignInChars(EQT));
+    Address Addr = EmitExtVectorElementLValue(LV);
+
+    QualType EltType = LV.getType()->castAs<VectorType>()->getElementType();
+    Addr = emitArraySubscriptGEP(*this, Addr, Idx, EltType, /*inbounds*/ true);
+    return MakeAddrLValue(Addr, EltType, LV.getAlignmentSource());
   }
-  else if (const VariableArrayType *vla =
+
+  AlignmentSource AlignSource;
+  Address Addr = Address::invalid();
+  if (const VariableArrayType *vla =
            getContext().getAsVariableArrayType(E->getType())) {
     // The base must be a pointer, which is not an aggregate.  Emit
     // it.  It needs to be emitted first in case it's what captures
     // the VLA bounds.
-    Address = EmitScalarExpr(E->getBase());
+    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
 
     // The element count here is the total number of non-VLA elements.
     llvm::Value *numElements = getVLASize(vla).first;
@@ -2496,24 +2753,40 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
     // multiply.  We suppress this if overflow is not undefined behavior.
     if (getLangOpts().isSignedOverflowDefined()) {
       Idx = Builder.CreateMul(Idx, numElements);
-      Address = Builder.CreateGEP(Address, Idx, "arrayidx");
     } else {
       Idx = Builder.CreateNSWMul(Idx, numElements);
-      Address = Builder.CreateInBoundsGEP(Address, Idx, "arrayidx");
     }
+
+    Addr = emitArraySubscriptGEP(*this, Addr, Idx, vla->getElementType(),
+                                 !getLangOpts().isSignedOverflowDefined());
+
   } else if (const ObjCObjectType *OIT = E->getType()->getAs<ObjCObjectType>()){
     // Indexing over an interface, as in "NSString *P; P[4];"
-    llvm::Value *InterfaceSize =
-      llvm::ConstantInt::get(Idx->getType(),
-          getContext().getTypeSizeInChars(OIT).getQuantity());
+    CharUnits InterfaceSize = getContext().getTypeSizeInChars(OIT);
+    llvm::Value *InterfaceSizeVal = 
+      llvm::ConstantInt::get(Idx->getType(), InterfaceSize.getQuantity());;
 
-    Idx = Builder.CreateMul(Idx, InterfaceSize);
+    llvm::Value *ScaledIdx = Builder.CreateMul(Idx, InterfaceSizeVal);
 
-    // The base must be a pointer, which is not an aggregate.  Emit it.
-    llvm::Value *Base = EmitScalarExpr(E->getBase());
-    Address = EmitCastToVoidPtr(Base);
-    Address = Builder.CreateGEP(Address, Idx, "arrayidx");
-    Address = Builder.CreateBitCast(Address, Base->getType());
+    // Emit the base pointer.
+    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+
+    // We don't necessarily build correct LLVM struct types for ObjC
+    // interfaces, so we can't rely on GEP to do this scaling
+    // correctly, so we need to cast to i8*.  FIXME: is this actually
+    // true?  A lot of other things in the fragile ABI would break...
+    llvm::Type *OrigBaseTy = Addr.getType();
+    Addr = Builder.CreateElementBitCast(Addr, Int8Ty);
+
+    // Do the GEP.
+    CharUnits EltAlign =
+      getArrayElementAlign(Addr.getAlignment(), Idx, InterfaceSize);
+    llvm::Value *EltPtr =
+      emitArraySubscriptGEP(*this, Addr.getPointer(), ScaledIdx, false);
+    Addr = Address(EltPtr, EltAlign);
+
+    // Cast back.
+    Addr = Builder.CreateBitCast(Addr, OrigBaseTy);
   } else if (const Expr *Array = isSimpleArrayDecayOperand(E->getBase())) {
     // If this is A[i] where A is an array, the frontend will have decayed the
     // base to be a ArrayToPointerDecay implicit cast.  While correct, it is
@@ -2528,42 +2801,23 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
       ArrayLV = EmitArraySubscriptExpr(ASE, /*Accessed*/ true);
     else
       ArrayLV = EmitLValue(Array);
-    llvm::Value *ArrayPtr = ArrayLV.getAddress();
-    llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
-    llvm::Value *Args[] = { Zero, Idx };
 
     // Propagate the alignment from the array itself to the result.
-    ArrayAlignment = ArrayLV.getAlignment();
-
-    if (getLangOpts().isSignedOverflowDefined())
-      Address = Builder.CreateGEP(ArrayPtr, Args, "arrayidx");
-    else
-      Address = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx");
+    Addr = emitArraySubscriptGEP(*this, ArrayLV.getAddress(),
+                                 {CGM.getSize(CharUnits::Zero()), Idx},
+                                 E->getType(),
+                                 !getLangOpts().isSignedOverflowDefined());
+    AlignSource = ArrayLV.getAlignmentSource();
   } else {
-    // The base must be a pointer, which is not an aggregate.  Emit it.
-    llvm::Value *Base = EmitScalarExpr(E->getBase());
-    if (getLangOpts().isSignedOverflowDefined())
-      Address = Builder.CreateGEP(Base, Idx, "arrayidx");
-    else
-      Address = Builder.CreateInBoundsGEP(Base, Idx, "arrayidx");
+    // The base must be a pointer; emit it with an estimate of its alignment.
+    Addr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
+    Addr = emitArraySubscriptGEP(*this, Addr, Idx, E->getType(),
+                                 !getLangOpts().isSignedOverflowDefined());
   }
 
-  QualType T = E->getBase()->getType()->getPointeeType();
-  assert(!T.isNull() &&
-         "CodeGenFunction::EmitArraySubscriptExpr(): Illegal base type");
+  LValue LV = MakeAddrLValue(Addr, E->getType(), AlignSource);
 
-
-  // Limit the alignment to that of the result type.
-  LValue LV;
-  if (!ArrayAlignment.isZero()) {
-    CharUnits Align = getContext().getTypeAlignInChars(T);
-    ArrayAlignment = std::min(Align, ArrayAlignment);
-    LV = MakeAddrLValue(Address, T, ArrayAlignment);
-  } else {
-    LV = MakeNaturalAlignAddrLValue(Address, T);
-  }
-
-  LV.getQuals().setAddressSpace(E->getBase()->getType().getAddressSpace());
+  // TODO: Preserve/extend path TBAA metadata?
 
   if (getLangOpts().ObjC1 &&
       getLangOpts().getGC() != LangOptions::NonGC) {
@@ -2573,14 +2827,150 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
   return LV;
 }
 
-static
-llvm::Constant *GenerateConstantVector(CGBuilderTy &Builder,
-                                       SmallVectorImpl<unsigned> &Elts) {
-  SmallVector<llvm::Constant*, 4> CElts;
-  for (unsigned i = 0, e = Elts.size(); i != e; ++i)
-    CElts.push_back(Builder.getInt32(Elts[i]));
+LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
+                                                bool IsLowerBound) {
+  LValue Base;
+  if (auto *ASE =
+          dyn_cast<OMPArraySectionExpr>(E->getBase()->IgnoreParenImpCasts()))
+    Base = EmitOMPArraySectionExpr(ASE, IsLowerBound);
+  else
+    Base = EmitLValue(E->getBase());
+  QualType BaseTy = Base.getType();
+  llvm::Value *Idx = nullptr;
+  QualType ResultExprTy;
+  if (auto *AT = getContext().getAsArrayType(BaseTy))
+    ResultExprTy = AT->getElementType();
+  else
+    ResultExprTy = BaseTy->getPointeeType();
+  if (IsLowerBound || (!IsLowerBound && E->getColonLoc().isInvalid())) {
+    // Requesting lower bound or upper bound, but without provided length and
+    // without ':' symbol for the default length -> length = 1.
+    // Idx = LowerBound ?: 0;
+    if (auto *LowerBound = E->getLowerBound()) {
+      Idx = Builder.CreateIntCast(
+          EmitScalarExpr(LowerBound), IntPtrTy,
+          LowerBound->getType()->hasSignedIntegerRepresentation());
+    } else
+      Idx = llvm::ConstantInt::getNullValue(IntPtrTy);
+  } else {
+    // Try to emit length or lower bound as constant. If this is possible, 1 is
+    // subtracted from constant length or lower bound. Otherwise, emit LLVM IR
+    // (LB + Len) - 1.
+    auto &C = CGM.getContext();
+    auto *Length = E->getLength();
+    llvm::APSInt ConstLength;
+    if (Length) {
+      // Idx = LowerBound + Length - 1;
+      if (Length->isIntegerConstantExpr(ConstLength, C)) {
+        ConstLength = ConstLength.zextOrTrunc(PointerWidthInBits);
+        Length = nullptr;
+      }
+      auto *LowerBound = E->getLowerBound();
+      llvm::APSInt ConstLowerBound(PointerWidthInBits, /*isUnsigned=*/false);
+      if (LowerBound && LowerBound->isIntegerConstantExpr(ConstLowerBound, C)) {
+        ConstLowerBound = ConstLowerBound.zextOrTrunc(PointerWidthInBits);
+        LowerBound = nullptr;
+      }
+      if (!Length)
+        --ConstLength;
+      else if (!LowerBound)
+        --ConstLowerBound;
 
-  return llvm::ConstantVector::get(CElts);
+      if (Length || LowerBound) {
+        auto *LowerBoundVal =
+            LowerBound
+                ? Builder.CreateIntCast(
+                      EmitScalarExpr(LowerBound), IntPtrTy,
+                      LowerBound->getType()->hasSignedIntegerRepresentation())
+                : llvm::ConstantInt::get(IntPtrTy, ConstLowerBound);
+        auto *LengthVal =
+            Length
+                ? Builder.CreateIntCast(
+                      EmitScalarExpr(Length), IntPtrTy,
+                      Length->getType()->hasSignedIntegerRepresentation())
+                : llvm::ConstantInt::get(IntPtrTy, ConstLength);
+        Idx = Builder.CreateAdd(LowerBoundVal, LengthVal, "lb_add_len",
+                                /*HasNUW=*/false,
+                                !getLangOpts().isSignedOverflowDefined());
+        if (Length && LowerBound) {
+          Idx = Builder.CreateSub(
+              Idx, llvm::ConstantInt::get(IntPtrTy, /*V=*/1), "idx_sub_1",
+              /*HasNUW=*/false, !getLangOpts().isSignedOverflowDefined());
+        }
+      } else
+        Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength + ConstLowerBound);
+    } else {
+      // Idx = ArraySize - 1;
+      if (auto *VAT = C.getAsVariableArrayType(BaseTy)) {
+        Length = VAT->getSizeExpr();
+        if (Length->isIntegerConstantExpr(ConstLength, C))
+          Length = nullptr;
+      } else {
+        auto *CAT = C.getAsConstantArrayType(BaseTy);
+        ConstLength = CAT->getSize();
+      }
+      if (Length) {
+        auto *LengthVal = Builder.CreateIntCast(
+            EmitScalarExpr(Length), IntPtrTy,
+            Length->getType()->hasSignedIntegerRepresentation());
+        Idx = Builder.CreateSub(
+            LengthVal, llvm::ConstantInt::get(IntPtrTy, /*V=*/1), "len_sub_1",
+            /*HasNUW=*/false, !getLangOpts().isSignedOverflowDefined());
+      } else {
+        ConstLength = ConstLength.zextOrTrunc(PointerWidthInBits);
+        --ConstLength;
+        Idx = llvm::ConstantInt::get(IntPtrTy, ConstLength);
+      }
+    }
+  }
+  assert(Idx);
+
+  llvm::Value *EltPtr;
+  QualType FixedSizeEltType = ResultExprTy;
+  if (auto *VLA = getContext().getAsVariableArrayType(ResultExprTy)) {
+    // The element count here is the total number of non-VLA elements.
+    llvm::Value *numElements = getVLASize(VLA).first;
+    FixedSizeEltType = getFixedSizeElementType(getContext(), VLA);
+
+    // Effectively, the multiply by the VLA size is part of the GEP.
+    // GEP indexes are signed, and scaling an index isn't permitted to
+    // signed-overflow, so we use the same semantics for our explicit
+    // multiply.  We suppress this if overflow is not undefined behavior.
+    if (getLangOpts().isSignedOverflowDefined()) {
+      Idx = Builder.CreateMul(Idx, numElements);
+      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
+    } else {
+      Idx = Builder.CreateNSWMul(Idx, numElements);
+      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
+    }
+  } else if (BaseTy->isConstantArrayType()) {
+    llvm::Value *ArrayPtr = Base.getPointer();
+    llvm::Value *Zero = llvm::ConstantInt::getNullValue(IntPtrTy);
+    llvm::Value *Args[] = {Zero, Idx};
+
+    if (getLangOpts().isSignedOverflowDefined())
+      EltPtr = Builder.CreateGEP(ArrayPtr, Args, "arrayidx");
+    else
+      EltPtr = Builder.CreateInBoundsGEP(ArrayPtr, Args, "arrayidx");
+  } else {
+    // The base must be a pointer, which is not an aggregate.  Emit it.
+    if (getLangOpts().isSignedOverflowDefined())
+      EltPtr = Builder.CreateGEP(Base.getPointer(), Idx, "arrayidx");
+    else
+      EltPtr = Builder.CreateInBoundsGEP(Base.getPointer(), Idx, "arrayidx");
+  }
+
+  CharUnits EltAlign =
+    Base.getAlignment().alignmentOfArrayElement(
+                          getContext().getTypeSizeInChars(FixedSizeEltType));
+
+  // Limit the alignment to that of the result type.
+  LValue LV = MakeAddrLValue(Address(EltPtr, EltAlign), ResultExprTy,
+                             Base.getAlignmentSource());
+
+  LV.getQuals().setAddressSpace(BaseTy.getAddressSpace());
+
+  return LV;
 }
 
 LValue CodeGenFunction::
@@ -2592,9 +2982,10 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
   if (E->isArrow()) {
     // If it is a pointer to a vector, emit the address and form an lvalue with
     // it.
-    llvm::Value *Ptr = EmitScalarExpr(E->getBase());
+    AlignmentSource AlignSource;
+    Address Ptr = EmitPointerWithAlignment(E->getBase(), &AlignSource);
     const PointerType *PT = E->getBase()->getType()->getAs<PointerType>();
-    Base = MakeAddrLValue(Ptr, PT->getPointeeType());
+    Base = MakeAddrLValue(Ptr, PT->getPointeeType(), AlignSource);
     Base.getQuals().removeObjCGCAttr();
   } else if (E->getBase()->isGLValue()) {
     // Otherwise, if the base is an lvalue ( as in the case of foo.x.x),
@@ -2608,22 +2999,24 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
     llvm::Value *Vec = EmitScalarExpr(E->getBase());
 
     // Store the vector to memory (because LValue wants an address).
-    llvm::Value *VecMem = CreateMemTemp(E->getBase()->getType());
+    Address VecMem = CreateMemTemp(E->getBase()->getType());
     Builder.CreateStore(Vec, VecMem);
-    Base = MakeAddrLValue(VecMem, E->getBase()->getType());
+    Base = MakeAddrLValue(VecMem, E->getBase()->getType(),
+                          AlignmentSource::Decl);
   }
 
   QualType type =
     E->getType().withCVRQualifiers(Base.getQuals().getCVRQualifiers());
 
   // Encode the element access list into a vector of unsigned indices.
-  SmallVector<unsigned, 4> Indices;
+  SmallVector<uint32_t, 4> Indices;
   E->getEncodedElementAccess(Indices);
 
   if (Base.isSimple()) {
-    llvm::Constant *CV = GenerateConstantVector(Builder, Indices);
+    llvm::Constant *CV =
+        llvm::ConstantDataVector::get(getLLVMContext(), Indices);
     return LValue::MakeExtVectorElt(Base.getAddress(), CV, type,
-                                    Base.getAlignment());
+                                    Base.getAlignmentSource());
   }
   assert(Base.isExtVectorElt() && "Can only subscript lvalue vec elts here!");
 
@@ -2633,8 +3026,8 @@ EmitExtVectorElementExpr(const ExtVectorElementExpr *E) {
   for (unsigned i = 0, e = Indices.size(); i != e; ++i)
     CElts.push_back(BaseElts->getAggregateElement(Indices[i]));
   llvm::Constant *CV = llvm::ConstantVector::get(CElts);
-  return LValue::MakeExtVectorElt(Base.getExtVectorAddr(), CV, type,
-                                  Base.getAlignment());
+  return LValue::MakeExtVectorElt(Base.getExtVectorAddress(), CV, type,
+                                  Base.getAlignmentSource());
 }
 
 LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
@@ -2643,10 +3036,11 @@ LValue CodeGenFunction::EmitMemberExpr(const MemberExpr *E) {
   // If this is s.x, emit s as an lvalue.  If it is s->x, emit s as a scalar.
   LValue BaseLV;
   if (E->isArrow()) {
-    llvm::Value *Ptr = EmitScalarExpr(BaseExpr);
+    AlignmentSource AlignSource;
+    Address Addr = EmitPointerWithAlignment(BaseExpr, &AlignSource);
     QualType PtrTy = BaseExpr->getType()->getPointeeType();
-    EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Ptr, PtrTy);
-    BaseLV = MakeNaturalAlignAddrLValue(Ptr, PtrTy);
+    EmitTypeCheck(TCK_MemberAccess, E->getExprLoc(), Addr.getPointer(), PtrTy);
+    BaseLV = MakeAddrLValue(Addr, PtrTy, AlignSource);
   } else
     BaseLV = EmitCheckedLValue(BaseExpr, TCK_MemberAccess);
 
@@ -2677,41 +3071,65 @@ LValue CodeGenFunction::EmitLValueForLambdaField(const FieldDecl *Field) {
   return EmitLValueForField(LambdaLV, Field);
 }
 
+/// Drill down to the storage of a field without walking into
+/// reference types.
+///
+/// The resulting address doesn't necessarily have the right type.
+static Address emitAddrOfFieldStorage(CodeGenFunction &CGF, Address base,
+                                      const FieldDecl *field) {
+  const RecordDecl *rec = field->getParent();
+  
+  unsigned idx =
+    CGF.CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
+
+  CharUnits offset;
+  // Adjust the alignment down to the given offset.
+  // As a special case, if the LLVM field index is 0, we know that this
+  // is zero.
+  assert((idx != 0 || CGF.getContext().getASTRecordLayout(rec)
+                         .getFieldOffset(field->getFieldIndex()) == 0) &&
+         "LLVM field at index zero had non-zero offset?");
+  if (idx != 0) {
+    auto &recLayout = CGF.getContext().getASTRecordLayout(rec);
+    auto offsetInBits = recLayout.getFieldOffset(field->getFieldIndex());
+    offset = CGF.getContext().toCharUnitsFromBits(offsetInBits);
+  }
+
+  return CGF.Builder.CreateStructGEP(base, idx, offset, field->getName());
+}
+
 LValue CodeGenFunction::EmitLValueForField(LValue base,
                                            const FieldDecl *field) {
+  AlignmentSource fieldAlignSource =
+    getFieldAlignmentSource(base.getAlignmentSource());
+
   if (field->isBitField()) {
     const CGRecordLayout &RL =
       CGM.getTypes().getCGRecordLayout(field->getParent());
     const CGBitFieldInfo &Info = RL.getBitFieldInfo(field);
-    llvm::Value *Addr = base.getAddress();
+    Address Addr = base.getAddress();
     unsigned Idx = RL.getLLVMFieldNo(field);
     if (Idx != 0)
       // For structs, we GEP to the field that the record layout suggests.
-      Addr = Builder.CreateStructGEP(nullptr, Addr, Idx, field->getName());
+      Addr = Builder.CreateStructGEP(Addr, Idx, Info.StorageOffset,
+                                     field->getName());
     // Get the access type.
-    llvm::Type *PtrTy = llvm::Type::getIntNPtrTy(
-      getLLVMContext(), Info.StorageSize,
-      CGM.getContext().getTargetAddressSpace(base.getType()));
-    if (Addr->getType() != PtrTy)
-      Addr = Builder.CreateBitCast(Addr, PtrTy);
+    llvm::Type *FieldIntTy =
+      llvm::Type::getIntNTy(getLLVMContext(), Info.StorageSize);
+    if (Addr.getElementType() != FieldIntTy)
+      Addr = Builder.CreateElementBitCast(Addr, FieldIntTy);
 
     QualType fieldType =
       field->getType().withCVRQualifiers(base.getVRQualifiers());
-    return LValue::MakeBitfield(Addr, Info, fieldType, base.getAlignment());
+    return LValue::MakeBitfield(Addr, Info, fieldType, fieldAlignSource);
   }
 
   const RecordDecl *rec = field->getParent();
   QualType type = field->getType();
-  CharUnits alignment = getContext().getDeclAlign(field);
-
-  // FIXME: It should be impossible to have an LValue without alignment for a
-  // complete type.
-  if (!base.getAlignment().isZero())
-    alignment = std::min(alignment, base.getAlignment());
 
   bool mayAlias = rec->hasAttr<MayAliasAttr>();
 
-  llvm::Value *addr = base.getAddress();
+  Address addr = base.getAddress();
   unsigned cvr = base.getVRQualifiers();
   bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA;
   if (rec->isUnion()) {
@@ -2721,14 +3139,12 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
     TBAAPath = false;
   } else {
     // For structs, we GEP to the field that the record layout suggests.
-    unsigned idx = CGM.getTypes().getCGRecordLayout(rec).getLLVMFieldNo(field);
-    addr = Builder.CreateStructGEP(nullptr, addr, idx, field->getName());
+    addr = emitAddrOfFieldStorage(*this, addr, field);
 
     // If this is a reference field, load the reference right now.
     if (const ReferenceType *refType = type->getAs<ReferenceType>()) {
       llvm::LoadInst *load = Builder.CreateLoad(addr, "ref");
       if (cvr & Qualifiers::Volatile) load->setVolatile(true);
-      load->setAlignment(alignment.getQuantity());
 
       // Loading the reference will disable path-aware TBAA.
       TBAAPath = false;
@@ -2739,17 +3155,20 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
         else
           tbaa = CGM.getTBAAInfo(type);
         if (tbaa)
-          CGM.DecorateInstruction(load, tbaa);
+          CGM.DecorateInstructionWithTBAA(load, tbaa);
       }
 
-      addr = load;
       mayAlias = false;
       type = refType->getPointeeType();
-      if (type->isIncompleteType())
-        alignment = CharUnits();
-      else
-        alignment = getContext().getTypeAlignInChars(type);
-      cvr = 0; // qualifiers don't recursively apply to referencee
+
+      CharUnits alignment =
+        getNaturalTypeAlignment(type, &fieldAlignSource, /*pointee*/ true);
+      addr = Address(load, alignment);
+
+      // Qualifiers on the struct don't apply to the referencee, and
+      // we'll pick up CVR from the actual type later, so reset these
+      // additional qualifiers now.
+      cvr = 0;
     }
   }
 
@@ -2757,14 +3176,14 @@ LValue CodeGenFunction::EmitLValueForField(LValue base,
   // for both unions and structs.  A union needs a bitcast, a struct element
   // will need a bitcast if the LLVM type laid out doesn't match the desired
   // type.
-  addr = EmitBitCastOfLValueToProperType(*this, addr,
-                                         CGM.getTypes().ConvertTypeForMem(type),
-                                         field->getName());
+  addr = Builder.CreateElementBitCast(addr,
+                                      CGM.getTypes().ConvertTypeForMem(type),
+                                      field->getName());
 
   if (field->hasAttr<AnnotateAttr>())
     addr = EmitFieldAnnotations(field, addr);
 
-  LValue LV = MakeAddrLValue(addr, type, alignment);
+  LValue LV = MakeAddrLValue(addr, type, fieldAlignSource);
   LV.getQuals().addCVRQualifiers(cvr);
   if (TBAAPath) {
     const ASTRecordLayout &Layout =
@@ -2798,41 +3217,29 @@ CodeGenFunction::EmitLValueForFieldInitialization(LValue Base,
   if (!FieldType->isReferenceType())
     return EmitLValueForField(Base, Field);
 
-  const CGRecordLayout &RL =
-    CGM.getTypes().getCGRecordLayout(Field->getParent());
-  unsigned idx = RL.getLLVMFieldNo(Field);
-  llvm::Value *V = Builder.CreateStructGEP(nullptr, Base.getAddress(), idx);
-  assert(!FieldType.getObjCGCAttr() && "fields cannot have GC attrs");
+  Address V = emitAddrOfFieldStorage(*this, Base.getAddress(), Field);
 
-  // Make sure that the address is pointing to the right type.  This is critical
-  // for both unions and structs.  A union needs a bitcast, a struct element
-  // will need a bitcast if the LLVM type laid out doesn't match the desired
-  // type.
+  // Make sure that the address is pointing to the right type.
   llvm::Type *llvmType = ConvertTypeForMem(FieldType);
-  V = EmitBitCastOfLValueToProperType(*this, V, llvmType, Field->getName());
+  V = Builder.CreateElementBitCast(V, llvmType, Field->getName());
 
-  CharUnits Alignment = getContext().getDeclAlign(Field);
-
-  // FIXME: It should be impossible to have an LValue without alignment for a
-  // complete type.
-  if (!Base.getAlignment().isZero())
-    Alignment = std::min(Alignment, Base.getAlignment());
-
-  return MakeAddrLValue(V, FieldType, Alignment);
+  // TODO: access-path TBAA?
+  auto FieldAlignSource = getFieldAlignmentSource(Base.getAlignmentSource());
+  return MakeAddrLValue(V, FieldType, FieldAlignSource);
 }
 
 LValue CodeGenFunction::EmitCompoundLiteralLValue(const CompoundLiteralExpr *E){
   if (E->isFileScope()) {
-    llvm::Value *GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E);
-    return MakeAddrLValue(GlobalPtr, E->getType());
+    ConstantAddress GlobalPtr = CGM.GetAddrOfConstantCompoundLiteral(E);
+    return MakeAddrLValue(GlobalPtr, E->getType(), AlignmentSource::Decl);
   }
   if (E->getType()->isVariablyModifiedType())
     // make sure to emit the VLA size.
     EmitVariablyModifiedType(E->getType());
 
-  llvm::Value *DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral");
+  Address DeclPtr = CreateMemTemp(E->getType(), ".compoundliteral");
   const Expr *InitExpr = E->getInitializer();
-  LValue Result = MakeAddrLValue(DeclPtr, E->getType());
+  LValue Result = MakeAddrLValue(DeclPtr, E->getType(), AlignmentSource::Decl);
 
   EmitAnyExprToMem(InitExpr, DeclPtr, E->getType().getQualifiers(),
                    /*Init*/ true);
@@ -2923,11 +3330,14 @@ EmitConditionalOperatorLValue(const AbstractConditionalOperator *expr) {
   EmitBlock(contBlock);
 
   if (lhs && rhs) {
-    llvm::PHINode *phi = Builder.CreatePHI(lhs->getAddress()->getType(),
+    llvm::PHINode *phi = Builder.CreatePHI(lhs->getPointer()->getType(),
                                            2, "cond-lvalue");
-    phi->addIncoming(lhs->getAddress(), lhsBlock);
-    phi->addIncoming(rhs->getAddress(), rhsBlock);
-    return MakeAddrLValue(phi, expr->getType());
+    phi->addIncoming(lhs->getPointer(), lhsBlock);
+    phi->addIncoming(rhs->getPointer(), rhsBlock);
+    Address result(phi, std::min(lhs->getAlignment(), rhs->getAlignment()));
+    AlignmentSource alignSource =
+      std::max(lhs->getAlignmentSource(), rhs->getAlignmentSource());
+    return MakeAddrLValue(result, expr->getType(), alignSource);
   } else {
     assert((lhs || rhs) &&
            "both operands of glvalue conditional are throw-expressions?");
@@ -2996,9 +3406,9 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
 
   case CK_Dynamic: {
     LValue LV = EmitLValue(E->getSubExpr());
-    llvm::Value *V = LV.getAddress();
+    Address V = LV.getAddress();
     const auto *DCE = cast<CXXDynamicCastExpr>(E);
-    return MakeAddrLValue(EmitDynamicCast(V, DCE), E->getType());
+    return MakeNaturalAlignAddrLValue(EmitDynamicCast(V, DCE), E->getType());
   }
 
   case CK_ConstructorConversion:
@@ -3016,14 +3426,14 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
     auto *DerivedClassDecl = cast<CXXRecordDecl>(DerivedClassTy->getDecl());
 
     LValue LV = EmitLValue(E->getSubExpr());
-    llvm::Value *This = LV.getAddress();
+    Address This = LV.getAddress();
 
     // Perform the derived-to-base conversion
-    llvm::Value *Base = GetAddressOfBaseClass(
+    Address Base = GetAddressOfBaseClass(
         This, DerivedClassDecl, E->path_begin(), E->path_end(),
         /*NullCheckValue=*/false, E->getExprLoc());
 
-    return MakeAddrLValue(Base, E->getType());
+    return MakeAddrLValue(Base, E->getType(), LV.getAlignmentSource());
   }
   case CK_ToUnion:
     return EmitAggExprToLValue(E);
@@ -3034,7 +3444,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
     LValue LV = EmitLValue(E->getSubExpr());
 
     // Perform the base-to-derived conversion
-    llvm::Value *Derived =
+    Address Derived =
       GetAddressOfDerivedClass(LV.getAddress(), DerivedClassDecl,
                                E->path_begin(), E->path_end(),
                                /*NullCheckValue=*/false);
@@ -3043,34 +3453,36 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) {
     // performed and the object is not of the derived type.
     if (sanitizePerformTypeCheck())
       EmitTypeCheck(TCK_DowncastReference, E->getExprLoc(),
-                    Derived, E->getType());
+                    Derived.getPointer(), E->getType());
 
     if (SanOpts.has(SanitizerKind::CFIDerivedCast))
-      EmitVTablePtrCheckForCast(E->getType(), Derived, /*MayBeNull=*/false,
+      EmitVTablePtrCheckForCast(E->getType(), Derived.getPointer(),
+                                /*MayBeNull=*/false,
                                 CFITCK_DerivedCast, E->getLocStart());
 
-    return MakeAddrLValue(Derived, E->getType());
+    return MakeAddrLValue(Derived, E->getType(), LV.getAlignmentSource());
   }
   case CK_LValueBitCast: {
     // This must be a reinterpret_cast (or c-style equivalent).
     const auto *CE = cast<ExplicitCastExpr>(E);
 
+    CGM.EmitExplicitCastExprType(CE, this);
     LValue LV = EmitLValue(E->getSubExpr());
-    llvm::Value *V = Builder.CreateBitCast(LV.getAddress(),
-                                           ConvertType(CE->getTypeAsWritten()));
+    Address V = Builder.CreateBitCast(LV.getAddress(),
+                                      ConvertType(CE->getTypeAsWritten()));
 
     if (SanOpts.has(SanitizerKind::CFIUnrelatedCast))
-      EmitVTablePtrCheckForCast(E->getType(), V, /*MayBeNull=*/false,
+      EmitVTablePtrCheckForCast(E->getType(), V.getPointer(),
+                                /*MayBeNull=*/false,
                                 CFITCK_UnrelatedCast, E->getLocStart());
 
-    return MakeAddrLValue(V, E->getType());
+    return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
   }
   case CK_ObjCObjectLValueCast: {
     LValue LV = EmitLValue(E->getSubExpr());
-    QualType ToType = getContext().getLValueReferenceType(E->getType());
-    llvm::Value *V = Builder.CreateBitCast(LV.getAddress(),
-                                           ConvertType(ToType));
-    return MakeAddrLValue(V, E->getType());
+    Address V = Builder.CreateElementBitCast(LV.getAddress(),
+                                             ConvertType(E->getType()));
+    return MakeAddrLValue(V, E->getType(), LV.getAlignmentSource());
   }
   case CK_ZeroToOCLEvent:
     llvm_unreachable("NULL to OpenCL event lvalue cast is not valid");
@@ -3129,20 +3541,17 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
   if (const auto *PseudoDtor =
           dyn_cast<CXXPseudoDestructorExpr>(E->getCallee()->IgnoreParens())) {
     QualType DestroyedType = PseudoDtor->getDestroyedType();
-    if (getLangOpts().ObjCAutoRefCount &&
-        DestroyedType->isObjCLifetimeType() &&
-        (DestroyedType.getObjCLifetime() == Qualifiers::OCL_Strong ||
-         DestroyedType.getObjCLifetime() == Qualifiers::OCL_Weak)) {
+    if (DestroyedType.hasStrongOrWeakObjCLifetime()) {
       // Automatic Reference Counting:
       //   If the pseudo-expression names a retainable object with weak or
       //   strong lifetime, the object shall be released.
       Expr *BaseExpr = PseudoDtor->getBase();
-      llvm::Value *BaseValue = nullptr;
+      Address BaseValue = Address::invalid();
       Qualifiers BaseQuals;
 
       // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a scalar.
       if (PseudoDtor->isArrow()) {
-        BaseValue = EmitScalarExpr(BaseExpr);
+        BaseValue = EmitPointerWithAlignment(BaseExpr);
         const PointerType *PTy = BaseExpr->getType()->getAs<PointerType>();
         BaseQuals = PTy->getPointeeType().getQualifiers();
       } else {
@@ -3152,7 +3561,7 @@ RValue CodeGenFunction::EmitCallExpr(const CallExpr *E,
         BaseQuals = BaseTy.getQualifiers();
       }
 
-      switch (PseudoDtor->getDestroyedType().getObjCLifetime()) {
+      switch (DestroyedType.getObjCLifetime()) {
       case Qualifiers::OCL_None:
       case Qualifiers::OCL_ExplicitNone:
       case Qualifiers::OCL_Autoreleasing:
@@ -3237,13 +3646,14 @@ LValue CodeGenFunction::EmitCallExprLValue(const CallExpr *E) {
   RValue RV = EmitCallExpr(E);
 
   if (!RV.isScalar())
-    return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
+    return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
+                          AlignmentSource::Decl);
 
   assert(E->getCallReturnType(getContext())->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
          "reference type!");
 
-  return MakeAddrLValue(RV.getScalarVal(), E->getType());
+  return MakeNaturalAlignPointeeAddrLValue(RV.getScalarVal(), E->getType());
 }
 
 LValue CodeGenFunction::EmitVAArgExprLValue(const VAArgExpr *E) {
@@ -3256,21 +3666,23 @@ LValue CodeGenFunction::EmitCXXConstructLValue(const CXXConstructExpr *E) {
          && "binding l-value to type which needs a temporary");
   AggValueSlot Slot = CreateAggTemp(E->getType());
   EmitCXXConstructExpr(E, Slot);
-  return MakeAddrLValue(Slot.getAddr(), E->getType());
+  return MakeAddrLValue(Slot.getAddress(), E->getType(),
+                        AlignmentSource::Decl);
 }
 
 LValue
 CodeGenFunction::EmitCXXTypeidLValue(const CXXTypeidExpr *E) {
-  return MakeAddrLValue(EmitCXXTypeidExpr(E), E->getType());
+  return MakeNaturalAlignAddrLValue(EmitCXXTypeidExpr(E), E->getType());
 }
 
-llvm::Value *CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) {
-  return Builder.CreateBitCast(CGM.GetAddrOfUuidDescriptor(E),
-                               ConvertType(E->getType())->getPointerTo());
+Address CodeGenFunction::EmitCXXUuidofExpr(const CXXUuidofExpr *E) {
+  return Builder.CreateElementBitCast(CGM.GetAddrOfUuidDescriptor(E),
+                                      ConvertType(E->getType()));
 }
 
 LValue CodeGenFunction::EmitCXXUuidofLValue(const CXXUuidofExpr *E) {
-  return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType());
+  return MakeAddrLValue(EmitCXXUuidofExpr(E), E->getType(),
+                        AlignmentSource::Decl);
 }
 
 LValue
@@ -3278,34 +3690,37 @@ CodeGenFunction::EmitCXXBindTemporaryLValue(const CXXBindTemporaryExpr *E) {
   AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue");
   Slot.setExternallyDestructed();
   EmitAggExpr(E->getSubExpr(), Slot);
-  EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddr());
-  return MakeAddrLValue(Slot.getAddr(), E->getType());
+  EmitCXXTemporary(E->getTemporary(), E->getType(), Slot.getAddress());
+  return MakeAddrLValue(Slot.getAddress(), E->getType(),
+                        AlignmentSource::Decl);
 }
 
 LValue
 CodeGenFunction::EmitLambdaLValue(const LambdaExpr *E) {
   AggValueSlot Slot = CreateAggTemp(E->getType(), "temp.lvalue");
   EmitLambdaExpr(E, Slot);
-  return MakeAddrLValue(Slot.getAddr(), E->getType());
+  return MakeAddrLValue(Slot.getAddress(), E->getType(),
+                        AlignmentSource::Decl);
 }
 
 LValue CodeGenFunction::EmitObjCMessageExprLValue(const ObjCMessageExpr *E) {
   RValue RV = EmitObjCMessageExpr(E);
 
   if (!RV.isScalar())
-    return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
+    return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
+                          AlignmentSource::Decl);
 
   assert(E->getMethodDecl()->getReturnType()->isReferenceType() &&
          "Can't have a scalar return unless the return type is a "
          "reference type!");
 
-  return MakeAddrLValue(RV.getScalarVal(), E->getType());
+  return MakeNaturalAlignPointeeAddrLValue(RV.getScalarVal(), E->getType());
 }
 
 LValue CodeGenFunction::EmitObjCSelectorLValue(const ObjCSelectorExpr *E) {
-  llvm::Value *V =
-    CGM.getObjCRuntime().GetSelector(*this, E->getSelector(), true);
-  return MakeAddrLValue(V, E->getType());
+  Address V =
+    CGM.getObjCRuntime().GetAddrOfSelector(*this, E->getSelector());
+  return MakeAddrLValue(V, E->getType(), AlignmentSource::Decl);
 }
 
 llvm::Value *CodeGenFunction::EmitIvarOffset(const ObjCInterfaceDecl *Interface,
@@ -3333,8 +3748,7 @@ LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) {
     BaseQuals = ObjectTy.getQualifiers();
   } else {
     LValue BaseLV = EmitLValue(BaseExpr);
-    // FIXME: this isn't right for bitfields.
-    BaseValue = BaseLV.getAddress();
+    BaseValue = BaseLV.getPointer();
     ObjectTy = BaseExpr->getType();
     BaseQuals = ObjectTy.getQualifiers();
   }
@@ -3349,17 +3763,38 @@ LValue CodeGenFunction::EmitObjCIvarRefLValue(const ObjCIvarRefExpr *E) {
 LValue CodeGenFunction::EmitStmtExprLValue(const StmtExpr *E) {
   // Can only get l-value for message expression returning aggregate type
   RValue RV = EmitAnyExprToTemp(E);
-  return MakeAddrLValue(RV.getAggregateAddr(), E->getType());
+  return MakeAddrLValue(RV.getAggregateAddress(), E->getType(),
+                        AlignmentSource::Decl);
 }
 
 RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
                                  const CallExpr *E, ReturnValueSlot ReturnValue,
-                                 const Decl *TargetDecl, llvm::Value *Chain) {
+                                 CGCalleeInfo CalleeInfo, llvm::Value *Chain) {
   // Get the actual function type. The callee type will always be a pointer to
   // function type or a block pointer type.
   assert(CalleeType->isFunctionPointerType() &&
          "Call must have function pointer type!");
 
+  // Preserve the non-canonical function type because things like exception
+  // specifications disappear in the canonical type. That information is useful
+  // to drive the generation of more accurate code for this call later on.
+  const FunctionProtoType *NonCanonicalFTP = CalleeType->getAs<PointerType>()
+                                                 ->getPointeeType()
+                                                 ->getAs<FunctionProtoType>();
+
+  const Decl *TargetDecl = CalleeInfo.getCalleeDecl();
+
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl))
+    // We can only guarantee that a function is called from the correct
+    // context/function based on the appropriate target attributes,
+    // so only check in the case where we have both always_inline and target
+    // since otherwise we could be making a conditional call after a check for
+    // the proper cpu features (and it won't cause code generation issues due to
+    // function based code generation).
+    if (TargetDecl->hasAttr<AlwaysInlineAttr>() &&
+        TargetDecl->hasAttr<TargetAttr>())
+      checkTargetFeatures(E, FD);
+
   CalleeType = getContext().getCanonicalType(CalleeType);
 
   const auto *FnType =
@@ -3383,7 +3818,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
           Callee, llvm::PointerType::getUnqual(PrefixStructTy));
       llvm::Value *CalleeSigPtr =
           Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 0);
-      llvm::Value *CalleeSig = Builder.CreateLoad(CalleeSigPtr);
+      llvm::Value *CalleeSig =
+          Builder.CreateAlignedLoad(CalleeSigPtr, getIntAlign());
       llvm::Value *CalleeSigMatch = Builder.CreateICmpEQ(CalleeSig, PrefixSig);
 
       llvm::BasicBlock *Cont = createBasicBlock("cont");
@@ -3393,7 +3829,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
       EmitBlock(TypeCheck);
       llvm::Value *CalleeRTTIPtr =
           Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, 0, 1);
-      llvm::Value *CalleeRTTI = Builder.CreateLoad(CalleeRTTIPtr);
+      llvm::Value *CalleeRTTI =
+          Builder.CreateAlignedLoad(CalleeRTTIPtr, getPointerAlign());
       llvm::Value *CalleeRTTIMatch =
           Builder.CreateICmpEQ(CalleeRTTI, FTRTTIConst);
       llvm::Constant *StaticData[] = {
@@ -3408,12 +3845,39 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
     }
   }
 
+  // If we are checking indirect calls and this call is indirect, check that the
+  // function pointer is a member of the bit set for the function type.
+  if (SanOpts.has(SanitizerKind::CFIICall) &&
+      (!TargetDecl || !isa<FunctionDecl>(TargetDecl))) {
+    SanitizerScope SanScope(this);
+
+    llvm::Metadata *MD = CGM.CreateMetadataIdentifierForType(QualType(FnType, 0));
+    llvm::Value *BitSetName = llvm::MetadataAsValue::get(getLLVMContext(), MD);
+
+    llvm::Value *CastedCallee = Builder.CreateBitCast(Callee, Int8PtrTy);
+    llvm::Value *BitSetTest =
+        Builder.CreateCall(CGM.getIntrinsic(llvm::Intrinsic::bitset_test),
+                           {CastedCallee, BitSetName});
+
+    auto TypeId = CGM.CreateCfiIdForTypeMetadata(MD);
+    if (CGM.getCodeGenOpts().SanitizeCfiCrossDso && TypeId) {
+      EmitCfiSlowPathCheck(BitSetTest, TypeId, CastedCallee);
+    } else {
+      llvm::Constant *StaticData[] = {
+          EmitCheckSourceLocation(E->getLocStart()),
+          EmitCheckTypeDescriptor(QualType(FnType, 0)),
+      };
+      EmitCheck(std::make_pair(BitSetTest, SanitizerKind::CFIICall),
+                "cfi_bad_icall", StaticData, CastedCallee);
+    }
+  }
+
   CallArgList Args;
   if (Chain)
     Args.add(RValue::get(Builder.CreateBitCast(Chain, CGM.VoidPtrTy)),
              CGM.getContext().VoidPtrTy);
-  EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arg_begin(),
-               E->arg_end(), E->getDirectCallee(), /*ParamsToSkip*/ 0);
+  EmitCallArgs(Args, dyn_cast<FunctionProtoType>(FnType), E->arguments(),
+               E->getDirectCallee(), /*ParamsToSkip*/ 0);
 
   const CGFunctionInfo &FnInfo = CGM.getTypes().arrangeFreeFunctionCall(
       Args, FnType, /*isChainCall=*/Chain);
@@ -3444,34 +3908,38 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, llvm::Value *Callee,
     Callee = Builder.CreateBitCast(Callee, CalleeTy, "callee.knr.cast");
   }
 
-  return EmitCall(FnInfo, Callee, ReturnValue, Args, TargetDecl);
+  return EmitCall(FnInfo, Callee, ReturnValue, Args,
+                  CGCalleeInfo(NonCanonicalFTP, TargetDecl));
 }
 
 LValue CodeGenFunction::
 EmitPointerToDataMemberBinaryExpr(const BinaryOperator *E) {
-  llvm::Value *BaseV;
-  if (E->getOpcode() == BO_PtrMemI)
-    BaseV = EmitScalarExpr(E->getLHS());
-  else
-    BaseV = EmitLValue(E->getLHS()).getAddress();
+  Address BaseAddr = Address::invalid();
+  if (E->getOpcode() == BO_PtrMemI) {
+    BaseAddr = EmitPointerWithAlignment(E->getLHS());
+  } else {
+    BaseAddr = EmitLValue(E->getLHS()).getAddress();
+  }
 
   llvm::Value *OffsetV = EmitScalarExpr(E->getRHS());
 
   const MemberPointerType *MPT
     = E->getRHS()->getType()->getAs<MemberPointerType>();
 
-  llvm::Value *AddV = CGM.getCXXABI().EmitMemberDataPointerAddress(
-      *this, E, BaseV, OffsetV, MPT);
+  AlignmentSource AlignSource;
+  Address MemberAddr =
+    EmitCXXMemberDataPointerAddress(E, BaseAddr, OffsetV, MPT,
+                                    &AlignSource);
 
-  return MakeAddrLValue(AddV, MPT->getPointeeType());
+  return MakeAddrLValue(MemberAddr, MPT->getPointeeType(), AlignSource);
 }
 
 /// Given the address of a temporary variable, produce an r-value of
 /// its type.
-RValue CodeGenFunction::convertTempToRValue(llvm::Value *addr,
+RValue CodeGenFunction::convertTempToRValue(Address addr,
                                             QualType type,
                                             SourceLocation loc) {
-  LValue lvalue = MakeNaturalAlignAddrLValue(addr, type);
+  LValue lvalue = MakeAddrLValue(addr, type, AlignmentSource::Decl);
   switch (getEvaluationKind(type)) {
   case TEK_Complex:
     return RValue::getComplex(EmitLoadOfComplex(lvalue, loc));
@@ -3527,7 +3995,8 @@ static LValueOrRValue emitPseudoObjectExpr(CodeGenFunction &CGF,
           CodeGenFunction::hasAggregateEvaluationKind(ov->getType())) {
         CGF.EmitAggExpr(ov->getSourceExpr(), slot);
 
-        LValue LV = CGF.MakeAddrLValue(slot.getAddr(), ov->getType());
+        LValue LV = CGF.MakeAddrLValue(slot.getAddress(), ov->getType(),
+                                       AlignmentSource::Decl);
         opaqueData = OVMA::bind(CGF, ov, LV);
         result.RV = slot.asRValue();
 
diff --git a/lib/CodeGen/CGExprAgg.cpp b/lib/CodeGen/CGExprAgg.cpp
index 883b76bcfab0..20838db044c9 100644
--- a/lib/CodeGen/CGExprAgg.cpp
+++ b/lib/CodeGen/CGExprAgg.cpp
@@ -49,7 +49,8 @@ class AggExprEmitter : public StmtVisitor<AggExprEmitter> {
     if (!shouldUseDestForReturnSlot())
       return ReturnValueSlot();
 
-    return ReturnValueSlot(Dest.getAddr(), Dest.isVolatile(), IsResultUnused);
+    return ReturnValueSlot(Dest.getAddress(), Dest.isVolatile(),
+                           IsResultUnused);
   }
 
   AggValueSlot EnsureSlot(QualType T) {
@@ -77,14 +78,13 @@ public:
 
   /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
   void EmitFinalDestCopy(QualType type, const LValue &src);
-  void EmitFinalDestCopy(QualType type, RValue src,
-                         CharUnits srcAlignment = CharUnits::Zero());
+  void EmitFinalDestCopy(QualType type, RValue src);
   void EmitCopy(QualType type, const AggValueSlot &dest,
                 const AggValueSlot &src);
 
   void EmitMoveFromReturnSlot(const Expr *E, RValue Src);
 
-  void EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
+  void EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
                      QualType elementType, InitListExpr *E);
 
   AggValueSlot::NeedsGCBarriers_t needsGC(QualType T) {
@@ -199,7 +199,8 @@ public:
   //  case Expr::ChooseExprClass:
   void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); }
   void VisitAtomicExpr(AtomicExpr *E) {
-    CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddr());
+    RValue Res = CGF.EmitAtomicExpr(E);
+    EmitFinalDestCopy(E->getType(), Res);
   }
 };
 }  // end anonymous namespace.
@@ -259,17 +260,14 @@ void AggExprEmitter::EmitMoveFromReturnSlot(const Expr *E, RValue src) {
   }
 
   // Otherwise, copy from there to the destination.
-  assert(Dest.getAddr() != src.getAggregateAddr());
-  std::pair<CharUnits, CharUnits> typeInfo = 
-    CGF.getContext().getTypeInfoInChars(E->getType());
-  EmitFinalDestCopy(E->getType(), src, typeInfo.second);
+  assert(Dest.getPointer() != src.getAggregatePointer());
+  EmitFinalDestCopy(E->getType(), src);
 }
 
 /// EmitFinalDestCopy - Perform the final copy to DestPtr, if desired.
-void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src,
-                                       CharUnits srcAlign) {
+void AggExprEmitter::EmitFinalDestCopy(QualType type, RValue src) {
   assert(src.isAggregate() && "value must be aggregate value!");
-  LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddr(), type, srcAlign);
+  LValue srcLV = CGF.MakeAddrLValue(src.getAggregateAddress(), type);
   EmitFinalDestCopy(type, srcLV);
 }
 
@@ -298,8 +296,8 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
     CharUnits sz = CGF.getContext().getTypeSizeInChars(type);
     llvm::Value *size = llvm::ConstantInt::get(CGF.SizeTy, sz.getQuantity());
     CGF.CGM.getObjCRuntime().EmitGCMemmoveCollectable(CGF,
-                                                      dest.getAddr(),
-                                                      src.getAddr(),
+                                                      dest.getAddress(),
+                                                      src.getAddress(),
                                                       size);
     return;
   }
@@ -307,9 +305,8 @@ void AggExprEmitter::EmitCopy(QualType type, const AggValueSlot &dest,
   // If the result of the assignment is used, copy the LHS there also.
   // It's volatile if either side is.  Use the minimum alignment of
   // the two sides.
-  CGF.EmitAggregateCopy(dest.getAddr(), src.getAddr(), type,
-                        dest.isVolatile() || src.isVolatile(),
-                        std::min(dest.getAlignment(), src.getAlignment()));
+  CGF.EmitAggregateCopy(dest.getAddress(), src.getAddress(), type,
+                        dest.isVolatile() || src.isVolatile());
 }
 
 /// \brief Emit the initializer for a std::initializer_list initialized with a
@@ -321,7 +318,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
   ASTContext &Ctx = CGF.getContext();
   LValue Array = CGF.EmitLValue(E->getSubExpr());
   assert(Array.isSimple() && "initializer_list array not a simple lvalue");
-  llvm::Value *ArrayPtr = Array.getAddress();
+  Address ArrayPtr = Array.getAddress();
 
   const ConstantArrayType *ArrayType =
       Ctx.getAsConstantArrayType(E->getSubExpr()->getType());
@@ -344,13 +341,12 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
   }
 
   AggValueSlot Dest = EnsureSlot(E->getType());
-  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddr(), E->getType(),
-                                     Dest.getAlignment());
+  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
   LValue Start = CGF.EmitLValueForFieldInitialization(DestLV, *Field);
   llvm::Value *Zero = llvm::ConstantInt::get(CGF.PtrDiffTy, 0);
   llvm::Value *IdxStart[] = { Zero, Zero };
   llvm::Value *ArrayStart =
-      Builder.CreateInBoundsGEP(ArrayPtr, IdxStart, "arraystart");
+      Builder.CreateInBoundsGEP(ArrayPtr.getPointer(), IdxStart, "arraystart");
   CGF.EmitStoreThroughLValue(RValue::get(ArrayStart), Start);
   ++Field;
 
@@ -367,7 +363,7 @@ AggExprEmitter::VisitCXXStdInitializerListExpr(CXXStdInitializerListExpr *E) {
     // End pointer.
     llvm::Value *IdxEnd[] = { Zero, Size };
     llvm::Value *ArrayEnd =
-        Builder.CreateInBoundsGEP(ArrayPtr, IdxEnd, "arrayend");
+        Builder.CreateInBoundsGEP(ArrayPtr.getPointer(), IdxEnd, "arrayend");
     CGF.EmitStoreThroughLValue(RValue::get(ArrayEnd), EndOrLength);
   } else if (Ctx.hasSameType(Field->getType(), Ctx.getSizeType())) {
     // Length.
@@ -402,7 +398,7 @@ static bool isTrivialFiller(Expr *E) {
 }
 
 /// \brief Emit initialization of an array from an initializer list.
-void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
+void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType,
                                    QualType elementType, InitListExpr *E) {
   uint64_t NumInitElements = E->getNumInits();
 
@@ -414,13 +410,17 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
   llvm::Value *zero = llvm::ConstantInt::get(CGF.SizeTy, 0);
   llvm::Value *indices[] = { zero, zero };
   llvm::Value *begin =
-    Builder.CreateInBoundsGEP(DestPtr, indices, "arrayinit.begin");
+    Builder.CreateInBoundsGEP(DestPtr.getPointer(), indices, "arrayinit.begin");
+
+  CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType);
+  CharUnits elementAlign =
+    DestPtr.getAlignment().alignmentOfArrayElement(elementSize);
 
   // Exception safety requires us to destroy all the
   // already-constructed members if an initializer throws.
   // For that, we'll need an EH cleanup.
   QualType::DestructionKind dtorKind = elementType.isDestructedType();
-  llvm::AllocaInst *endOfInit = nullptr;
+  Address endOfInit = Address::invalid();
   EHScopeStack::stable_iterator cleanup;
   llvm::Instruction *cleanupDominator = nullptr;
   if (CGF.needsEHCleanup(dtorKind)) {
@@ -428,10 +428,11 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
     // directly, but the control flow can get so varied here that it
     // would actually be quite complex.  Therefore we go through an
     // alloca.
-    endOfInit = CGF.CreateTempAlloca(begin->getType(),
+    endOfInit = CGF.CreateTempAlloca(begin->getType(), CGF.getPointerAlign(),
                                      "arrayinit.endOfInit");
     cleanupDominator = Builder.CreateStore(begin, endOfInit);
     CGF.pushIrregularPartialArrayCleanup(begin, endOfInit, elementType,
+                                         elementAlign,
                                          CGF.getDestroyer(dtorKind));
     cleanup = CGF.EHStack.stable_begin();
 
@@ -458,10 +459,11 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
       // Tell the cleanup that it needs to destroy up to this
       // element.  TODO: some of these stores can be trivially
       // observed to be unnecessary.
-      if (endOfInit) Builder.CreateStore(element, endOfInit);
+      if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit);
     }
 
-    LValue elementLV = CGF.MakeAddrLValue(element, elementType);
+    LValue elementLV =
+      CGF.MakeAddrLValue(Address(element, elementAlign), elementType);
     EmitInitializationToLValue(E->getInit(i), elementLV);
   }
 
@@ -482,7 +484,7 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
     // Advance to the start of the rest of the array.
     if (NumInitElements) {
       element = Builder.CreateInBoundsGEP(element, one, "arrayinit.start");
-      if (endOfInit) Builder.CreateStore(element, endOfInit);
+      if (endOfInit.isValid()) Builder.CreateStore(element, endOfInit);
     }
 
     // Compute the end of the array.
@@ -500,7 +502,8 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
     currentElement->addIncoming(element, entryBB);
 
     // Emit the actual filler expression.
-    LValue elementLV = CGF.MakeAddrLValue(currentElement, elementType);
+    LValue elementLV =
+      CGF.MakeAddrLValue(Address(currentElement, elementAlign), elementType);
     if (filler)
       EmitInitializationToLValue(filler, elementLV);
     else
@@ -511,7 +514,7 @@ void AggExprEmitter::EmitArrayInit(llvm::Value *DestPtr, llvm::ArrayType *AType,
       Builder.CreateInBoundsGEP(currentElement, one, "arrayinit.next");
 
     // Tell the EH cleanup that we finished with the last element.
-    if (endOfInit) Builder.CreateStore(nextElement, endOfInit);
+    if (endOfInit.isValid()) Builder.CreateStore(nextElement, endOfInit);
 
     // Leave the loop if we're done.
     llvm::Value *done = Builder.CreateICmpEQ(nextElement, end,
@@ -569,6 +572,8 @@ static Expr *findPeephole(Expr *op, CastKind kind) {
 }
 
 void AggExprEmitter::VisitCastExpr(CastExpr *E) {
+  if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+    CGF.CGM.EmitExplicitCastExprType(ECE, &CGF);
   switch (E->getCastKind()) {
   case CK_Dynamic: {
     // FIXME: Can this actually happen? We have no test coverage for it.
@@ -596,9 +601,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
 
     // GCC union extension
     QualType Ty = E->getSubExpr()->getType();
-    QualType PtrTy = CGF.getContext().getPointerType(Ty);
-    llvm::Value *CastPtr = Builder.CreateBitCast(Dest.getAddr(),
-                                                 CGF.ConvertType(PtrTy));
+    Address CastPtr =
+      Builder.CreateElementBitCast(Dest.getAddress(), CGF.ConvertType(Ty));
     EmitInitializationToLValue(E->getSubExpr(),
                                CGF.MakeAddrLValue(CastPtr, Ty));
     break;
@@ -649,13 +653,13 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
         // Zero-initialize.  (Strictly speaking, we only need to intialize
         // the padding at the end, but this is simpler.)
         if (!Dest.isZeroed())
-          CGF.EmitNullInitialization(Dest.getAddr(), atomicType);
+          CGF.EmitNullInitialization(Dest.getAddress(), atomicType);
 
         // Build a GEP to refer to the subobject.
-        llvm::Value *valueAddr =
-            CGF.Builder.CreateStructGEP(nullptr, valueDest.getAddr(), 0);
+        Address valueAddr =
+            CGF.Builder.CreateStructGEP(valueDest.getAddress(), 0,
+                                        CharUnits());
         valueDest = AggValueSlot::forAddr(valueAddr,
-                                          valueDest.getAlignment(),
                                           valueDest.getQualifiers(),
                                           valueDest.isExternallyDestructed(),
                                           valueDest.requiresGCollection(),
@@ -673,8 +677,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
       CGF.CreateAggTemp(atomicType, "atomic-to-nonatomic.temp");
     CGF.EmitAggExpr(E->getSubExpr(), atomicSlot);
 
-    llvm::Value *valueAddr =
-        Builder.CreateStructGEP(nullptr, atomicSlot.getAddr(), 0);
+    Address valueAddr =
+      Builder.CreateStructGEP(atomicSlot.getAddress(), 0, CharUnits());
     RValue rvalue = RValue::getAggregate(valueAddr, atomicSlot.isVolatile());
     return EmitFinalDestCopy(valueType, rvalue);
   }
@@ -959,15 +963,15 @@ void AggExprEmitter::VisitChooseExpr(const ChooseExpr *CE) {
 }
 
 void AggExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
-  llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr());
-  llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
+  Address ArgValue = Address::invalid();
+  Address ArgPtr = CGF.EmitVAArg(VE, ArgValue);
 
-  if (!ArgPtr) {
+  if (!ArgPtr.isValid()) {
     // If EmitVAArg fails, we fall back to the LLVM instruction.
-    llvm::Value *Val =
-        Builder.CreateVAArg(ArgValue, CGF.ConvertType(VE->getType()));
+    llvm::Value *Val = Builder.CreateVAArg(ArgValue.getPointer(),
+                                           CGF.ConvertType(VE->getType()));
     if (!Dest.isIgnored())
-      Builder.CreateStore(Val, Dest.getAddr());
+      Builder.CreateStore(Val, Dest.getAddress());
     return;
   }
 
@@ -987,7 +991,7 @@ void AggExprEmitter::VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) {
 
   // Push that destructor we promised.
   if (!wasExternallyDestructed)
-    CGF.EmitCXXTemporary(E->getTemporary(), E->getType(), Dest.getAddr());
+    CGF.EmitCXXTemporary(E->getTemporary(), E->getType(), Dest.getAddress());
 }
 
 void
@@ -1011,13 +1015,13 @@ void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) {
 void AggExprEmitter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) {
   QualType T = E->getType();
   AggValueSlot Slot = EnsureSlot(T);
-  EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T));
+  EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddress(), T));
 }
 
 void AggExprEmitter::VisitImplicitValueInitExpr(ImplicitValueInitExpr *E) {
   QualType T = E->getType();
   AggValueSlot Slot = EnsureSlot(T);
-  EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddr(), T));
+  EmitNullInitializationToLValue(CGF.MakeAddrLValue(Slot.getAddress(), T));
 }
 
 /// isSimpleZero - If emitting this value will obviously just cause a store of
@@ -1135,8 +1139,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
 
   AggValueSlot Dest = EnsureSlot(E->getType());
 
-  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddr(), E->getType(),
-                                     Dest.getAlignment());
+  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
 
   // Handle initialization of an array.
   if (E->getType()->isArrayType()) {
@@ -1146,12 +1149,8 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     QualType elementType =
         CGF.getContext().getAsArrayType(E->getType())->getElementType();
 
-    llvm::PointerType *APType =
-      cast<llvm::PointerType>(Dest.getAddr()->getType());
-    llvm::ArrayType *AType =
-      cast<llvm::ArrayType>(APType->getElementType());
-
-    EmitArrayInit(Dest.getAddr(), AType, elementType, E);
+    auto AType = cast<llvm::ArrayType>(Dest.getAddress().getElementType());
+    EmitArrayInit(Dest.getAddress(), AType, elementType, E);
     return;
   }
 
@@ -1175,7 +1174,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
   RecordDecl *record = E->getType()->castAs<RecordType>()->getDecl();
 
   // Prepare a 'this' for CXXDefaultInitExprs.
-  CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddr());
+  CodeGenFunction::FieldConstructionScope FCS(CGF, Dest.getAddress());
 
   if (record->isUnion()) {
     // Only initialize one field of a union. The field itself is
@@ -1253,7 +1252,10 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
       assert(LV.isSimple());
       if (CGF.needsEHCleanup(dtorKind)) {
         if (!cleanupDominator)
-          cleanupDominator = CGF.Builder.CreateUnreachable(); // placeholder
+          cleanupDominator = CGF.Builder.CreateAlignedLoad(
+              CGF.Int8Ty,
+              llvm::Constant::getNullValue(CGF.Int8PtrTy),
+              CharUnits::One()); // placeholder
 
         CGF.pushDestroy(EHCleanup, LV.getAddress(), field->getType(),
                         CGF.getDestroyer(dtorKind), false);
@@ -1266,7 +1268,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
     // else, clean it up for -O0 builds and general tidiness.
     if (!pushedCleanup && LV.isSimple()) 
       if (llvm::GetElementPtrInst *GEP =
-            dyn_cast<llvm::GetElementPtrInst>(LV.getAddress()))
+            dyn_cast<llvm::GetElementPtrInst>(LV.getPointer()))
         if (GEP->use_empty())
           GEP->eraseFromParent();
   }
@@ -1284,8 +1286,7 @@ void AggExprEmitter::VisitInitListExpr(InitListExpr *E) {
 void AggExprEmitter::VisitDesignatedInitUpdateExpr(DesignatedInitUpdateExpr *E) {
   AggValueSlot Dest = EnsureSlot(E->getType());
 
-  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddr(), E->getType(),
-                                     Dest.getAlignment());
+  LValue DestLV = CGF.MakeAddrLValue(Dest.getAddress(), E->getType());
   EmitInitializationToLValue(E->getBase(), DestLV);
   VisitInitListExpr(E->getUpdater());
 }
@@ -1355,7 +1356,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
                                      CodeGenFunction &CGF) {
   // If the slot is already known to be zeroed, nothing to do.  Don't mess with
   // volatile stores.
-  if (Slot.isZeroed() || Slot.isVolatile() || Slot.getAddr() == nullptr)
+  if (Slot.isZeroed() || Slot.isVolatile() || !Slot.getAddress().isValid())
     return;
 
   // C++ objects with a user-declared constructor don't need zero'ing.
@@ -1368,26 +1369,22 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
     }
 
   // If the type is 16-bytes or smaller, prefer individual stores over memset.
-  std::pair<CharUnits, CharUnits> TypeInfo =
-    CGF.getContext().getTypeInfoInChars(E->getType());
-  if (TypeInfo.first <= CharUnits::fromQuantity(16))
+  CharUnits Size = CGF.getContext().getTypeSizeInChars(E->getType());
+  if (Size <= CharUnits::fromQuantity(16))
     return;
 
   // Check to see if over 3/4 of the initializer are known to be zero.  If so,
   // we prefer to emit memset + individual stores for the rest.
   CharUnits NumNonZeroBytes = GetNumNonZeroBytesInInit(E, CGF);
-  if (NumNonZeroBytes*4 > TypeInfo.first)
+  if (NumNonZeroBytes*4 > Size)
     return;
   
   // Okay, it seems like a good idea to use an initial memset, emit the call.
-  llvm::Constant *SizeVal = CGF.Builder.getInt64(TypeInfo.first.getQuantity());
-  CharUnits Align = TypeInfo.second;
+  llvm::Constant *SizeVal = CGF.Builder.getInt64(Size.getQuantity());
 
-  llvm::Value *Loc = Slot.getAddr();
-  
-  Loc = CGF.Builder.CreateBitCast(Loc, CGF.Int8PtrTy);
-  CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, 
-                           Align.getQuantity(), false);
+  Address Loc = Slot.getAddress();  
+  Loc = CGF.Builder.CreateElementBitCast(Loc, CGF.Int8Ty);
+  CGF.Builder.CreateMemSet(Loc, CGF.Builder.getInt8(0), SizeVal, false);
   
   // Tell the AggExprEmitter that the slot is known zero.
   Slot.setZeroed();
@@ -1403,7 +1400,7 @@ static void CheckAggExprForMemSetUse(AggValueSlot &Slot, const Expr *E,
 void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
   assert(E && hasAggregateEvaluationKind(E->getType()) &&
          "Invalid aggregate expression to emit");
-  assert((Slot.getAddr() != nullptr || Slot.isIgnored()) &&
+  assert((Slot.getAddress().isValid() || Slot.isIgnored()) &&
          "slot has bits but no address");
 
   // Optimize the slot if possible.
@@ -1414,7 +1411,7 @@ void CodeGenFunction::EmitAggExpr(const Expr *E, AggValueSlot Slot) {
 
 LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
   assert(hasAggregateEvaluationKind(E->getType()) && "Invalid argument!");
-  llvm::Value *Temp = CreateMemTemp(E->getType());
+  Address Temp = CreateMemTemp(E->getType());
   LValue LV = MakeAddrLValue(Temp, E->getType());
   EmitAggExpr(E, AggValueSlot::forLValue(LV, AggValueSlot::IsNotDestructed,
                                          AggValueSlot::DoesNotNeedGCBarriers,
@@ -1422,10 +1419,9 @@ LValue CodeGenFunction::EmitAggExprToLValue(const Expr *E) {
   return LV;
 }
 
-void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
-                                        llvm::Value *SrcPtr, QualType Ty,
+void CodeGenFunction::EmitAggregateCopy(Address DestPtr,
+                                        Address SrcPtr, QualType Ty,
                                         bool isVolatile,
-                                        CharUnits alignment,
                                         bool isAssignment) {
   assert(!Ty->isAnyComplexType() && "Shouldn't happen for complex");
 
@@ -1456,17 +1452,16 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
   // implementation handles this case safely.  If there is a libc that does not
   // safely handle this, we can add a target hook.
 
-  // Get data size and alignment info for this aggregate. If this is an
-  // assignment don't copy the tail padding. Otherwise copying it is fine.
+  // Get data size info for this aggregate. If this is an assignment,
+  // don't copy the tail padding, because we might be assigning into a
+  // base subobject where the tail padding is claimed.  Otherwise,
+  // copying it is fine.
   std::pair<CharUnits, CharUnits> TypeInfo;
   if (isAssignment)
     TypeInfo = getContext().getTypeInfoDataSizeInChars(Ty);
   else
     TypeInfo = getContext().getTypeInfoInChars(Ty);
 
-  if (alignment.isZero())
-    alignment = TypeInfo.second;
-
   llvm::Value *SizeVal = nullptr;
   if (TypeInfo.first.isZero()) {
     // But note that getTypeInfo returns 0 for a VLA.
@@ -1509,15 +1504,8 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
   // we need to use a different call here.  We use isVolatile to indicate when
   // either the source or the destination is volatile.
 
-  llvm::PointerType *DPT = cast<llvm::PointerType>(DestPtr->getType());
-  llvm::Type *DBP =
-    llvm::Type::getInt8PtrTy(getLLVMContext(), DPT->getAddressSpace());
-  DestPtr = Builder.CreateBitCast(DestPtr, DBP);
-
-  llvm::PointerType *SPT = cast<llvm::PointerType>(SrcPtr->getType());
-  llvm::Type *SBP =
-    llvm::Type::getInt8PtrTy(getLLVMContext(), SPT->getAddressSpace());
-  SrcPtr = Builder.CreateBitCast(SrcPtr, SBP);
+  DestPtr = Builder.CreateElementBitCast(DestPtr, Int8Ty);
+  SrcPtr = Builder.CreateElementBitCast(SrcPtr, Int8Ty);
 
   // Don't do any of the memmove_collectable tests if GC isn't set.
   if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
@@ -1540,11 +1528,11 @@ void CodeGenFunction::EmitAggregateCopy(llvm::Value *DestPtr,
     }
   }
 
+  auto Inst = Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, isVolatile);
+
   // Determine the metadata to describe the position of any padding in this
   // memcpy, as well as the TBAA tags for the members of the struct, in case
   // the optimizer wishes to expand it in to scalar memory operations.
-  llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty);
-
-  Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, alignment.getQuantity(),
-                       isVolatile, /*TBAATag=*/nullptr, TBAAStructTag);
+  if (llvm::MDNode *TBAAStructTag = CGM.getTBAAStructInfo(Ty))
+    Inst->setMetadata(llvm::LLVMContext::MD_tbaa_struct, TBAAStructTag);
 }
diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp
index c7adccaeeaea..604cde76a7b1 100644
--- a/lib/CodeGen/CGExprCXX.cpp
+++ b/lib/CodeGen/CGExprCXX.cpp
@@ -59,7 +59,7 @@ static RequiredArgs commonEmitCXXMemberOrOperatorCall(
   if (CE) {
     // Special case: skip first argument of CXXOperatorCall (it is "this").
     unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0;
-    CGF.EmitCallArgs(Args, FPT, CE->arg_begin() + ArgsToSkip, CE->arg_end(),
+    CGF.EmitCallArgs(Args, FPT, drop_begin(CE->arguments(), ArgsToSkip),
                      CE->getDirectCallee());
   } else {
     assert(
@@ -166,9 +166,9 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
     }
   }
 
-  llvm::Value *This;
+  Address This = Address::invalid();
   if (IsArrow)
-    This = EmitScalarExpr(Base);
+    This = EmitPointerWithAlignment(Base);
   else
     This = EmitLValue(Base).getAddress();
 
@@ -185,19 +185,18 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         // when it isn't necessary; just produce the proper effect here.
         // Special case: skip first argument of CXXOperatorCall (it is "this").
         unsigned ArgsToSkip = isa<CXXOperatorCallExpr>(CE) ? 1 : 0;
-        llvm::Value *RHS =
-            EmitLValue(*(CE->arg_begin() + ArgsToSkip)).getAddress();
+        Address RHS = EmitLValue(*(CE->arg_begin() + ArgsToSkip)).getAddress();
         EmitAggregateAssign(This, RHS, CE->getType());
-        return RValue::get(This);
+        return RValue::get(This.getPointer());
       }
 
       if (isa<CXXConstructorDecl>(MD) &&
           cast<CXXConstructorDecl>(MD)->isCopyOrMoveConstructor()) {
         // Trivial move and copy ctor are the same.
         assert(CE->getNumArgs() == 1 && "unexpected argcount for trivial ctor");
-        llvm::Value *RHS = EmitLValue(*CE->arg_begin()).getAddress();
-        EmitAggregateCopy(This, RHS, CE->arg_begin()->getType());
-        return RValue::get(This);
+        Address RHS = EmitLValue(*CE->arg_begin()).getAddress();
+        EmitAggregateCopy(This, RHS, (*CE->arg_begin())->getType());
+        return RValue::get(This.getPointer());
       }
       llvm_unreachable("unknown trivial member function");
     }
@@ -245,7 +244,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
           cast<CXXDestructorDecl>(DevirtualizedMethod);
         Callee = CGM.GetAddrOfFunction(GlobalDecl(DDtor, Dtor_Complete), Ty);
       }
-      EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This,
+      EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(),
                                   /*ImplicitParam=*/nullptr, QualType(), CE);
     }
     return RValue::get(nullptr);
@@ -259,7 +258,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
   } else {
     if (SanOpts.has(SanitizerKind::CFINVCall) &&
         MD->getParent()->isDynamicClass()) {
-      llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy);
+      llvm::Value *VTable = GetVTablePtr(This, Int8PtrTy, MD->getParent());
       EmitVTablePtrCheckForCall(MD, VTable, CFITCK_NVCall, CE->getLocStart());
     }
 
@@ -277,7 +276,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr(
         *this, MD, This, UseVirtualCall);
   }
 
-  return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This,
+  return EmitCXXMemberOrOperatorCall(MD, Callee, ReturnValue, This.getPointer(),
                                      /*ImplicitParam=*/nullptr, QualType(), CE);
 }
 
@@ -301,19 +300,20 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
   llvm::Value *MemFnPtr = EmitScalarExpr(MemFnExpr);
 
   // Emit the 'this' pointer.
-  llvm::Value *This;
-  
+  Address This = Address::invalid();
   if (BO->getOpcode() == BO_PtrMemI)
-    This = EmitScalarExpr(BaseExpr);
+    This = EmitPointerWithAlignment(BaseExpr);
   else 
     This = EmitLValue(BaseExpr).getAddress();
 
-  EmitTypeCheck(TCK_MemberCall, E->getExprLoc(), This,
+  EmitTypeCheck(TCK_MemberCall, E->getExprLoc(), This.getPointer(),
                 QualType(MPT->getClass(), 0));
 
   // Ask the ABI to load the callee.  Note that This is modified.
+  llvm::Value *ThisPtrForCall = nullptr;
   llvm::Value *Callee =
-    CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, BO, This, MemFnPtr, MPT);
+    CGM.getCXXABI().EmitLoadOfMemberFunctionPointer(*this, BO, This,
+                                             ThisPtrForCall, MemFnPtr, MPT);
   
   CallArgList Args;
 
@@ -321,12 +321,12 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
     getContext().getPointerType(getContext().getTagDeclType(RD));
 
   // Push the this ptr.
-  Args.add(RValue::get(This), ThisType);
+  Args.add(RValue::get(ThisPtrForCall), ThisType);
 
   RequiredArgs required = RequiredArgs::forPrototypePlus(FPT, 1);
   
   // And the rest of the call args
-  EmitCallArgs(Args, FPT, E->arg_begin(), E->arg_end(), E->getDirectCallee());
+  EmitCallArgs(Args, FPT, E->arguments(), E->getDirectCallee());
   return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required),
                   Callee, ReturnValue, Args);
 }
@@ -348,18 +348,43 @@ RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E,
 }
 
 static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
-                                            llvm::Value *DestPtr,
+                                            Address DestPtr,
                                             const CXXRecordDecl *Base) {
   if (Base->isEmpty())
     return;
 
-  DestPtr = CGF.EmitCastToVoidPtr(DestPtr);
+  DestPtr = CGF.Builder.CreateElementBitCast(DestPtr, CGF.Int8Ty);
 
   const ASTRecordLayout &Layout = CGF.getContext().getASTRecordLayout(Base);
-  CharUnits Size = Layout.getNonVirtualSize();
-  CharUnits Align = Layout.getNonVirtualAlignment();
+  CharUnits NVSize = Layout.getNonVirtualSize();
 
-  llvm::Value *SizeVal = CGF.CGM.getSize(Size);
+  // We cannot simply zero-initialize the entire base sub-object if vbptrs are
+  // present, they are initialized by the most derived class before calling the
+  // constructor.
+  SmallVector<std::pair<CharUnits, CharUnits>, 1> Stores;
+  Stores.emplace_back(CharUnits::Zero(), NVSize);
+
+  // Each store is split by the existence of a vbptr.
+  CharUnits VBPtrWidth = CGF.getPointerSize();
+  std::vector<CharUnits> VBPtrOffsets =
+      CGF.CGM.getCXXABI().getVBPtrOffsets(Base);
+  for (CharUnits VBPtrOffset : VBPtrOffsets) {
+    std::pair<CharUnits, CharUnits> LastStore = Stores.pop_back_val();
+    CharUnits LastStoreOffset = LastStore.first;
+    CharUnits LastStoreSize = LastStore.second;
+
+    CharUnits SplitBeforeOffset = LastStoreOffset;
+    CharUnits SplitBeforeSize = VBPtrOffset - SplitBeforeOffset;
+    assert(!SplitBeforeSize.isNegative() && "negative store size!");
+    if (!SplitBeforeSize.isZero())
+      Stores.emplace_back(SplitBeforeOffset, SplitBeforeSize);
+
+    CharUnits SplitAfterOffset = VBPtrOffset + VBPtrWidth;
+    CharUnits SplitAfterSize = LastStoreSize - SplitAfterOffset;
+    assert(!SplitAfterSize.isNegative() && "negative store size!");
+    if (!SplitAfterSize.isZero())
+      Stores.emplace_back(SplitAfterOffset, SplitAfterSize);
+  }
 
   // If the type contains a pointer to data member we can't memset it to zero.
   // Instead, create a null constant and copy it to the destination.
@@ -367,27 +392,43 @@ static void EmitNullBaseClassInitialization(CodeGenFunction &CGF,
   // like -1, which happens to be the pattern used by member-pointers.
   // TODO: isZeroInitializable can be over-conservative in the case where a
   // virtual base contains a member pointer.
-  if (!CGF.CGM.getTypes().isZeroInitializable(Base)) {
-    llvm::Constant *NullConstant = CGF.CGM.EmitNullConstantForBase(Base);
+  llvm::Constant *NullConstantForBase = CGF.CGM.EmitNullConstantForBase(Base);
+  if (!NullConstantForBase->isNullValue()) {
+    llvm::GlobalVariable *NullVariable = new llvm::GlobalVariable(
+        CGF.CGM.getModule(), NullConstantForBase->getType(),
+        /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage,
+        NullConstantForBase, Twine());
 
-    llvm::GlobalVariable *NullVariable = 
-      new llvm::GlobalVariable(CGF.CGM.getModule(), NullConstant->getType(),
-                               /*isConstant=*/true, 
-                               llvm::GlobalVariable::PrivateLinkage,
-                               NullConstant, Twine());
+    CharUnits Align = std::max(Layout.getNonVirtualAlignment(),
+                               DestPtr.getAlignment());
     NullVariable->setAlignment(Align.getQuantity());
-    llvm::Value *SrcPtr = CGF.EmitCastToVoidPtr(NullVariable);
+
+    Address SrcPtr = Address(CGF.EmitCastToVoidPtr(NullVariable), Align);
 
     // Get and call the appropriate llvm.memcpy overload.
-    CGF.Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, Align.getQuantity());
-    return;
-  } 
-  
+    for (std::pair<CharUnits, CharUnits> Store : Stores) {
+      CharUnits StoreOffset = Store.first;
+      CharUnits StoreSize = Store.second;
+      llvm::Value *StoreSizeVal = CGF.CGM.getSize(StoreSize);
+      CGF.Builder.CreateMemCpy(
+          CGF.Builder.CreateConstInBoundsByteGEP(DestPtr, StoreOffset),
+          CGF.Builder.CreateConstInBoundsByteGEP(SrcPtr, StoreOffset),
+          StoreSizeVal);
+    }
+
   // Otherwise, just memset the whole thing to zero.  This is legal
   // because in LLVM, all default initializers (other than the ones we just
   // handled above) are guaranteed to have a bit pattern of all zeros.
-  CGF.Builder.CreateMemSet(DestPtr, CGF.Builder.getInt8(0), SizeVal,
-                           Align.getQuantity());
+  } else {
+    for (std::pair<CharUnits, CharUnits> Store : Stores) {
+      CharUnits StoreOffset = Store.first;
+      CharUnits StoreSize = Store.second;
+      llvm::Value *StoreSizeVal = CGF.CGM.getSize(StoreSize);
+      CGF.Builder.CreateMemSet(
+          CGF.Builder.CreateConstInBoundsByteGEP(DestPtr, StoreOffset),
+          CGF.Builder.getInt8(0), StoreSizeVal);
+    }
+  }
 }
 
 void
@@ -404,11 +445,12 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
     switch (E->getConstructionKind()) {
     case CXXConstructExpr::CK_Delegating:
     case CXXConstructExpr::CK_Complete:
-      EmitNullInitialization(Dest.getAddr(), E->getType());
+      EmitNullInitialization(Dest.getAddress(), E->getType());
       break;
     case CXXConstructExpr::CK_VirtualBase:
     case CXXConstructExpr::CK_NonVirtualBase:
-      EmitNullBaseClassInitialization(*this, Dest.getAddr(), CD->getParent());
+      EmitNullBaseClassInitialization(*this, Dest.getAddress(),
+                                      CD->getParent());
       break;
     }
   }
@@ -431,7 +473,7 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
   
   if (const ConstantArrayType *arrayType 
         = getContext().getAsConstantArrayType(E->getType())) {
-    EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddr(), E);
+    EmitCXXAggrConstructorCall(CD, arrayType, Dest.getAddress(), E);
   } else {
     CXXCtorType Type = Ctor_Complete;
     bool ForVirtualBase = false;
@@ -457,15 +499,13 @@ CodeGenFunction::EmitCXXConstructExpr(const CXXConstructExpr *E,
     }
     
     // Call the constructor.
-    EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating, Dest.getAddr(),
-                           E);
+    EmitCXXConstructorCall(CD, Type, ForVirtualBase, Delegating,
+                           Dest.getAddress(), E);
   }
 }
 
-void
-CodeGenFunction::EmitSynthesizedCXXCopyCtor(llvm::Value *Dest, 
-                                            llvm::Value *Src,
-                                            const Expr *Exp) {
+void CodeGenFunction::EmitSynthesizedCXXCopyCtor(Address Dest, Address Src,
+                                                 const Expr *Exp) {
   if (const ExprWithCleanups *E = dyn_cast<ExprWithCleanups>(Exp))
     Exp = E->getSubExpr();
   assert(isa<CXXConstructExpr>(Exp) && 
@@ -759,22 +799,20 @@ static llvm::Value *EmitCXXNewAllocSize(CodeGenFunction &CGF,
 }
 
 static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
-                                    QualType AllocType, llvm::Value *NewPtr) {
+                                    QualType AllocType, Address NewPtr) {
   // FIXME: Refactor with EmitExprAsInit.
-  CharUnits Alignment = CGF.getContext().getTypeAlignInChars(AllocType);
   switch (CGF.getEvaluationKind(AllocType)) {
   case TEK_Scalar:
     CGF.EmitScalarInit(Init, nullptr,
-                       CGF.MakeAddrLValue(NewPtr, AllocType, Alignment), false);
+                       CGF.MakeAddrLValue(NewPtr, AllocType), false);
     return;
   case TEK_Complex:
-    CGF.EmitComplexExprIntoLValue(Init, CGF.MakeAddrLValue(NewPtr, AllocType,
-                                                           Alignment),
+    CGF.EmitComplexExprIntoLValue(Init, CGF.MakeAddrLValue(NewPtr, AllocType),
                                   /*isInit*/ true);
     return;
   case TEK_Aggregate: {
     AggValueSlot Slot
-      = AggValueSlot::forAddr(NewPtr, Alignment, AllocType.getQualifiers(),
+      = AggValueSlot::forAddr(NewPtr, AllocType.getQualifiers(),
                               AggValueSlot::IsDestructed,
                               AggValueSlot::DoesNotNeedGCBarriers,
                               AggValueSlot::IsNotAliased);
@@ -787,23 +825,27 @@ static void StoreAnyExprIntoOneUnit(CodeGenFunction &CGF, const Expr *Init,
 
 void CodeGenFunction::EmitNewArrayInitializer(
     const CXXNewExpr *E, QualType ElementType, llvm::Type *ElementTy,
-    llvm::Value *BeginPtr, llvm::Value *NumElements,
+    Address BeginPtr, llvm::Value *NumElements,
     llvm::Value *AllocSizeWithoutCookie) {
   // If we have a type with trivial initialization and no initializer,
   // there's nothing to do.
   if (!E->hasInitializer())
     return;
 
-  llvm::Value *CurPtr = BeginPtr;
+  Address CurPtr = BeginPtr;
 
   unsigned InitListElements = 0;
 
   const Expr *Init = E->getInitializer();
-  llvm::AllocaInst *EndOfInit = nullptr;
+  Address EndOfInit = Address::invalid();
   QualType::DestructionKind DtorKind = ElementType.isDestructedType();
   EHScopeStack::stable_iterator Cleanup;
   llvm::Instruction *CleanupDominator = nullptr;
 
+  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementType);
+  CharUnits ElementAlign =
+    BeginPtr.getAlignment().alignmentOfArrayElement(ElementSize);
+
   // If the initializer is an initializer list, first do the explicit elements.
   if (const InitListExpr *ILE = dyn_cast<InitListExpr>(Init)) {
     InitListElements = ILE->getNumInits();
@@ -813,10 +855,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
     QualType AllocType = E->getAllocatedType();
     if (const ConstantArrayType *CAT = dyn_cast_or_null<ConstantArrayType>(
             AllocType->getAsArrayTypeUnsafe())) {
-      unsigned AS = CurPtr->getType()->getPointerAddressSpace();
       ElementTy = ConvertTypeForMem(AllocType);
-      llvm::Type *AllocPtrTy = ElementTy->getPointerTo(AS);
-      CurPtr = Builder.CreateBitCast(CurPtr, AllocPtrTy);
+      CurPtr = Builder.CreateElementBitCast(CurPtr, ElementTy);
       InitListElements *= getContext().getConstantArrayElementCount(CAT);
     }
 
@@ -826,27 +866,34 @@ void CodeGenFunction::EmitNewArrayInitializer(
       // directly, but the control flow can get so varied here that it
       // would actually be quite complex.  Therefore we go through an
       // alloca.
-      EndOfInit = CreateTempAlloca(BeginPtr->getType(), "array.init.end");
-      CleanupDominator = Builder.CreateStore(BeginPtr, EndOfInit);
-      pushIrregularPartialArrayCleanup(BeginPtr, EndOfInit, ElementType,
+      EndOfInit = CreateTempAlloca(BeginPtr.getType(), getPointerAlign(),
+                                   "array.init.end");
+      CleanupDominator = Builder.CreateStore(BeginPtr.getPointer(), EndOfInit);
+      pushIrregularPartialArrayCleanup(BeginPtr.getPointer(), EndOfInit,
+                                       ElementType, ElementAlign,
                                        getDestroyer(DtorKind));
       Cleanup = EHStack.stable_begin();
     }
 
+    CharUnits StartAlign = CurPtr.getAlignment();
     for (unsigned i = 0, e = ILE->getNumInits(); i != e; ++i) {
       // Tell the cleanup that it needs to destroy up to this
       // element.  TODO: some of these stores can be trivially
       // observed to be unnecessary.
-      if (EndOfInit)
-        Builder.CreateStore(Builder.CreateBitCast(CurPtr, BeginPtr->getType()),
-                            EndOfInit);
+      if (EndOfInit.isValid()) {
+        auto FinishedPtr =
+          Builder.CreateBitCast(CurPtr.getPointer(), BeginPtr.getType());
+        Builder.CreateStore(FinishedPtr, EndOfInit);
+      }
       // FIXME: If the last initializer is an incomplete initializer list for
       // an array, and we have an array filler, we can fold together the two
       // initialization loops.
       StoreAnyExprIntoOneUnit(*this, ILE->getInit(i),
                               ILE->getInit(i)->getType(), CurPtr);
-      CurPtr = Builder.CreateConstInBoundsGEP1_32(ElementTy, CurPtr, 1,
-                                                  "array.exp.next");
+      CurPtr = Address(Builder.CreateInBoundsGEP(CurPtr.getPointer(),
+                                                 Builder.getSize(1),
+                                                 "array.exp.next"),
+                       StartAlign.alignmentAtOffset((i + 1) * ElementSize));
     }
 
     // The remaining elements are filled with the array filler expression.
@@ -864,7 +911,7 @@ void CodeGenFunction::EmitNewArrayInitializer(
     }
 
     // Switch back to initializing one base element at a time.
-    CurPtr = Builder.CreateBitCast(CurPtr, BeginPtr->getType());
+    CurPtr = Builder.CreateBitCast(CurPtr, BeginPtr.getType());
   }
 
   // Attempt to perform zero-initialization using memset.
@@ -889,9 +936,7 @@ void CodeGenFunction::EmitNewArrayInitializer(
     }
 
     // Create the memset.
-    CharUnits Alignment = getContext().getTypeAlignInChars(ElementType);
-    Builder.CreateMemSet(CurPtr, Builder.getInt8(0), RemainingSize,
-                         Alignment.getQuantity(), false);
+    Builder.CreateMemSet(CurPtr, Builder.getInt8(0), RemainingSize, false);
     return true;
   };
 
@@ -925,7 +970,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
     //
     // FIXME: Share this cleanup with the constructor call emission rather than
     // having it create a cleanup of its own.
-    if (EndOfInit) Builder.CreateStore(CurPtr, EndOfInit);
+    if (EndOfInit.isValid())
+      Builder.CreateStore(CurPtr.getPointer(), EndOfInit);
 
     // Emit a constructor call loop to initialize the remaining elements.
     if (InitListElements)
@@ -985,13 +1031,13 @@ void CodeGenFunction::EmitNewArrayInitializer(
 
   // Find the end of the array, hoisted out of the loop.
   llvm::Value *EndPtr =
-    Builder.CreateInBoundsGEP(BeginPtr, NumElements, "array.end");
+    Builder.CreateInBoundsGEP(BeginPtr.getPointer(), NumElements, "array.end");
 
   // If the number of elements isn't constant, we have to now check if there is
   // anything left to initialize.
   if (!ConstNum) {
-    llvm::Value *IsEmpty = Builder.CreateICmpEQ(CurPtr, EndPtr,
-                                                "array.isempty");
+    llvm::Value *IsEmpty =
+      Builder.CreateICmpEQ(CurPtr.getPointer(), EndPtr, "array.isempty");
     Builder.CreateCondBr(IsEmpty, ContBB, LoopBB);
   }
 
@@ -1000,16 +1046,19 @@ void CodeGenFunction::EmitNewArrayInitializer(
 
   // Set up the current-element phi.
   llvm::PHINode *CurPtrPhi =
-    Builder.CreatePHI(CurPtr->getType(), 2, "array.cur");
-  CurPtrPhi->addIncoming(CurPtr, EntryBB);
-  CurPtr = CurPtrPhi;
+    Builder.CreatePHI(CurPtr.getType(), 2, "array.cur");
+  CurPtrPhi->addIncoming(CurPtr.getPointer(), EntryBB);
+
+  CurPtr = Address(CurPtrPhi, ElementAlign);
 
   // Store the new Cleanup position for irregular Cleanups.
-  if (EndOfInit) Builder.CreateStore(CurPtr, EndOfInit);
+  if (EndOfInit.isValid()) 
+    Builder.CreateStore(CurPtr.getPointer(), EndOfInit);
 
   // Enter a partial-destruction Cleanup if necessary.
   if (!CleanupDominator && needsEHCleanup(DtorKind)) {
-    pushRegularPartialArrayCleanup(BeginPtr, CurPtr, ElementType,
+    pushRegularPartialArrayCleanup(BeginPtr.getPointer(), CurPtr.getPointer(),
+                                   ElementType, ElementAlign,
                                    getDestroyer(DtorKind));
     Cleanup = EHStack.stable_begin();
     CleanupDominator = Builder.CreateUnreachable();
@@ -1026,7 +1075,8 @@ void CodeGenFunction::EmitNewArrayInitializer(
 
   // Advance to the next element by adjusting the pointer type as necessary.
   llvm::Value *NextPtr =
-      Builder.CreateConstInBoundsGEP1_32(ElementTy, CurPtr, 1, "array.next");
+    Builder.CreateConstInBoundsGEP1_32(ElementTy, CurPtr.getPointer(), 1,
+                                       "array.next");
 
   // Check whether we've gotten to the end of the array and, if so,
   // exit the loop.
@@ -1039,7 +1089,7 @@ void CodeGenFunction::EmitNewArrayInitializer(
 
 static void EmitNewInitializer(CodeGenFunction &CGF, const CXXNewExpr *E,
                                QualType ElementType, llvm::Type *ElementTy,
-                               llvm::Value *NewPtr, llvm::Value *NumElements,
+                               Address NewPtr, llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie) {
   ApplyDebugLocation DL(CGF, E);
   if (E->isArray())
@@ -1089,8 +1139,7 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
                                                  bool IsDelete) {
   CallArgList Args;
   const Stmt *ArgS = Arg;
-  EmitCallArgs(Args, *Type->param_type_begin(),
-               ConstExprIterator(&ArgS), ConstExprIterator(&ArgS + 1));
+  EmitCallArgs(Args, *Type->param_type_begin(), llvm::makeArrayRef(ArgS));
   // Find the allocation or deallocation function that we're calling.
   ASTContext &Ctx = getContext();
   DeclarationName Name = Ctx.DeclarationNames
@@ -1105,7 +1154,7 @@ RValue CodeGenFunction::EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
 namespace {
   /// A cleanup to call the given 'operator delete' function upon
   /// abnormal exit from a new expression.
-  class CallDeleteDuringNew : public EHScopeStack::Cleanup {
+  class CallDeleteDuringNew final : public EHScopeStack::Cleanup {
     size_t NumPlacementArgs;
     const FunctionDecl *OperatorDelete;
     llvm::Value *Ptr;
@@ -1158,7 +1207,7 @@ namespace {
   /// A cleanup to call the given 'operator delete' function upon
   /// abnormal exit from a new expression when the new expression is
   /// conditional.
-  class CallDeleteDuringConditionalNew : public EHScopeStack::Cleanup {
+  class CallDeleteDuringConditionalNew final : public EHScopeStack::Cleanup {
     size_t NumPlacementArgs;
     const FunctionDecl *OperatorDelete;
     DominatingValue<RValue>::saved_type Ptr;
@@ -1219,7 +1268,7 @@ namespace {
 /// new-expression throws.
 static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
                                   const CXXNewExpr *E,
-                                  llvm::Value *NewPtr,
+                                  Address NewPtr,
                                   llvm::Value *AllocSize,
                                   const CallArgList &NewArgs) {
   // If we're not inside a conditional branch, then the cleanup will
@@ -1229,7 +1278,8 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
       .pushCleanupWithExtra<CallDeleteDuringNew>(EHCleanup,
                                                  E->getNumPlacementArgs(),
                                                  E->getOperatorDelete(),
-                                                 NewPtr, AllocSize);
+                                                 NewPtr.getPointer(),
+                                                 AllocSize);
     for (unsigned I = 0, N = E->getNumPlacementArgs(); I != N; ++I)
       Cleanup->setPlacementArg(I, NewArgs[I+1].RV);
 
@@ -1238,7 +1288,7 @@ static void EnterNewDeleteCleanup(CodeGenFunction &CGF,
 
   // Otherwise, we need to save all this stuff.
   DominatingValue<RValue>::saved_type SavedNewPtr =
-    DominatingValue<RValue>::save(CGF, RValue::get(NewPtr));
+    DominatingValue<RValue>::save(CGF, RValue::get(NewPtr.getPointer()));
   DominatingValue<RValue>::saved_type SavedAllocSize =
     DominatingValue<RValue>::save(CGF, RValue::get(AllocSize));
 
@@ -1261,13 +1311,6 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
 
   // 1. Build a call to the allocation function.
   FunctionDecl *allocator = E->getOperatorNew();
-  const FunctionProtoType *allocatorType =
-    allocator->getType()->castAs<FunctionProtoType>();
-
-  CallArgList allocatorArgs;
-
-  // The allocation size is the first argument.
-  QualType sizeType = getContext().getSizeType();
 
   // If there is a brace-initializer, cannot allocate fewer elements than inits.
   unsigned minElements = 0;
@@ -1282,24 +1325,61 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     EmitCXXNewAllocSize(*this, E, minElements, numElements,
                         allocSizeWithoutCookie);
 
-  allocatorArgs.add(RValue::get(allocSize), sizeType);
-
-  // We start at 1 here because the first argument (the allocation size)
-  // has already been emitted.
-  EmitCallArgs(allocatorArgs, allocatorType, E->placement_arg_begin(),
-               E->placement_arg_end(), /* CalleeDecl */ nullptr,
-               /*ParamsToSkip*/ 1);
-
   // Emit the allocation call.  If the allocator is a global placement
   // operator, just "inline" it directly.
-  RValue RV;
+  Address allocation = Address::invalid();
+  CallArgList allocatorArgs;
   if (allocator->isReservedGlobalPlacementOperator()) {
-    assert(allocatorArgs.size() == 2);
-    RV = allocatorArgs[1].RV;
-    // TODO: kill any unnecessary computations done for the size
-    // argument.
+    assert(E->getNumPlacementArgs() == 1);
+    const Expr *arg = *E->placement_arguments().begin();
+
+    AlignmentSource alignSource;
+    allocation = EmitPointerWithAlignment(arg, &alignSource);
+
+    // The pointer expression will, in many cases, be an opaque void*.
+    // In these cases, discard the computed alignment and use the
+    // formal alignment of the allocated type.
+    if (alignSource != AlignmentSource::Decl) {
+      allocation = Address(allocation.getPointer(),
+                           getContext().getTypeAlignInChars(allocType));
+    }
+
+    // Set up allocatorArgs for the call to operator delete if it's not
+    // the reserved global operator.
+    if (E->getOperatorDelete() &&
+        !E->getOperatorDelete()->isReservedGlobalPlacementOperator()) {
+      allocatorArgs.add(RValue::get(allocSize), getContext().getSizeType());
+      allocatorArgs.add(RValue::get(allocation.getPointer()), arg->getType());
+    }
+
   } else {
-    RV = EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
+    const FunctionProtoType *allocatorType =
+      allocator->getType()->castAs<FunctionProtoType>();
+
+    // The allocation size is the first argument.
+    QualType sizeType = getContext().getSizeType();
+    allocatorArgs.add(RValue::get(allocSize), sizeType);
+
+    // We start at 1 here because the first argument (the allocation size)
+    // has already been emitted.
+    EmitCallArgs(allocatorArgs, allocatorType, E->placement_arguments(),
+                 /* CalleeDecl */ nullptr,
+                 /*ParamsToSkip*/ 1);
+
+    RValue RV =
+      EmitNewDeleteCall(*this, allocator, allocatorType, allocatorArgs);
+
+    // For now, only assume that the allocation function returns
+    // something satisfactorily aligned for the element type, plus
+    // the cookie if we have one.
+    CharUnits allocationAlign =
+      getContext().getTypeAlignInChars(allocType);
+    if (allocSize != allocSizeWithoutCookie) {
+      CharUnits cookieAlign = getSizeAlign(); // FIXME?
+      allocationAlign = std::max(allocationAlign, cookieAlign);
+    }
+
+    allocation = Address(RV.getScalarVal(), allocationAlign);
   }
 
   // Emit a null check on the allocation result if the allocation
@@ -1312,9 +1392,6 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
   llvm::BasicBlock *nullCheckBB = nullptr;
   llvm::BasicBlock *contBB = nullptr;
 
-  llvm::Value *allocation = RV.getScalarVal();
-  unsigned AS = allocation->getType()->getPointerAddressSpace();
-
   // The null-check means that the initializer is conditionally
   // evaluated.
   ConditionalEvaluation conditional(*this);
@@ -1326,7 +1403,8 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     llvm::BasicBlock *notNullBB = createBasicBlock("new.notnull");
     contBB = createBasicBlock("new.cont");
 
-    llvm::Value *isNull = Builder.CreateIsNull(allocation, "new.isnull");
+    llvm::Value *isNull =
+      Builder.CreateIsNull(allocation.getPointer(), "new.isnull");
     Builder.CreateCondBr(isNull, contBB, notNullBB);
     EmitBlock(notNullBB);
   }
@@ -1352,8 +1430,15 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
   }
 
   llvm::Type *elementTy = ConvertTypeForMem(allocType);
-  llvm::Type *elementPtrTy = elementTy->getPointerTo(AS);
-  llvm::Value *result = Builder.CreateBitCast(allocation, elementPtrTy);
+  Address result = Builder.CreateElementBitCast(allocation, elementTy);
+
+  // Passing pointer through invariant.group.barrier to avoid propagation of
+  // vptrs information which may be included in previous type.
+  if (CGM.getCodeGenOpts().StrictVTablePointers &&
+      CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+      allocator->isReservedGlobalPlacementOperator())
+    result = Address(Builder.CreateInvariantGroupBarrier(result.getPointer()),
+                     result.getAlignment());
 
   EmitNewInitializer(*this, E, allocType, elementTy, result, numElements,
                      allocSizeWithoutCookie);
@@ -1362,7 +1447,7 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     // allocating an array of arrays, we'll need to cast back to the
     // array pointer type.
     llvm::Type *resultType = ConvertTypeForMem(E->getType());
-    if (result->getType() != resultType)
+    if (result.getType() != resultType)
       result = Builder.CreateBitCast(result, resultType);
   }
 
@@ -1373,21 +1458,22 @@ llvm::Value *CodeGenFunction::EmitCXXNewExpr(const CXXNewExpr *E) {
     cleanupDominator->eraseFromParent();
   }
 
+  llvm::Value *resultPtr = result.getPointer();
   if (nullCheck) {
     conditional.end(*this);
 
     llvm::BasicBlock *notNullBB = Builder.GetInsertBlock();
     EmitBlock(contBB);
 
-    llvm::PHINode *PHI = Builder.CreatePHI(result->getType(), 2);
-    PHI->addIncoming(result, notNullBB);
-    PHI->addIncoming(llvm::Constant::getNullValue(result->getType()),
+    llvm::PHINode *PHI = Builder.CreatePHI(resultPtr->getType(), 2);
+    PHI->addIncoming(resultPtr, notNullBB);
+    PHI->addIncoming(llvm::Constant::getNullValue(resultPtr->getType()),
                      nullCheckBB);
 
-    result = PHI;
+    resultPtr = PHI;
   }
   
-  return result;
+  return resultPtr;
 }
 
 void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD,
@@ -1423,7 +1509,7 @@ void CodeGenFunction::EmitDeleteCall(const FunctionDecl *DeleteFD,
 
 namespace {
   /// Calls the given 'operator delete' on a single object.
-  struct CallObjectDelete : EHScopeStack::Cleanup {
+  struct CallObjectDelete final : EHScopeStack::Cleanup {
     llvm::Value *Ptr;
     const FunctionDecl *OperatorDelete;
     QualType ElementType;
@@ -1450,7 +1536,7 @@ CodeGenFunction::pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete,
 /// Emit the code for deleting a single object.
 static void EmitObjectDelete(CodeGenFunction &CGF,
                              const CXXDeleteExpr *DE,
-                             llvm::Value *Ptr,
+                             Address Ptr,
                              QualType ElementType) {
   // Find the destructor for the type, if applicable.  If the
   // destructor is virtual, we'll just emit the vcall and return.
@@ -1473,29 +1559,24 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
   // to pop it off in a second.
   const FunctionDecl *OperatorDelete = DE->getOperatorDelete();
   CGF.EHStack.pushCleanup<CallObjectDelete>(NormalAndEHCleanup,
-                                            Ptr, OperatorDelete, ElementType);
+                                            Ptr.getPointer(),
+                                            OperatorDelete, ElementType);
 
   if (Dtor)
     CGF.EmitCXXDestructorCall(Dtor, Dtor_Complete,
                               /*ForVirtualBase=*/false,
                               /*Delegating=*/false,
                               Ptr);
-  else if (CGF.getLangOpts().ObjCAutoRefCount &&
-           ElementType->isObjCLifetimeType()) {
-    switch (ElementType.getObjCLifetime()) {
+  else if (auto Lifetime = ElementType.getObjCLifetime()) {
+    switch (Lifetime) {
     case Qualifiers::OCL_None:
     case Qualifiers::OCL_ExplicitNone:
     case Qualifiers::OCL_Autoreleasing:
       break;
 
-    case Qualifiers::OCL_Strong: {
-      // Load the pointer value.
-      llvm::Value *PtrValue = CGF.Builder.CreateLoad(Ptr, 
-                                             ElementType.isVolatileQualified());
-        
-      CGF.EmitARCRelease(PtrValue, ARCPreciseLifetime);
+    case Qualifiers::OCL_Strong:
+      CGF.EmitARCDestroyStrong(Ptr, ARCPreciseLifetime);
       break;
-    }
         
     case Qualifiers::OCL_Weak:
       CGF.EmitARCDestroyWeak(Ptr);
@@ -1508,7 +1589,7 @@ static void EmitObjectDelete(CodeGenFunction &CGF,
 
 namespace {
   /// Calls the given 'operator delete' on an array of objects.
-  struct CallArrayDelete : EHScopeStack::Cleanup {
+  struct CallArrayDelete final : EHScopeStack::Cleanup {
     llvm::Value *Ptr;
     const FunctionDecl *OperatorDelete;
     llvm::Value *NumElements;
@@ -1570,7 +1651,7 @@ namespace {
 /// Emit the code for deleting an array of objects.
 static void EmitArrayDelete(CodeGenFunction &CGF,
                             const CXXDeleteExpr *E,
-                            llvm::Value *deletedPtr,
+                            Address deletedPtr,
                             QualType elementType) {
   llvm::Value *numElements = nullptr;
   llvm::Value *allocatedPtr = nullptr;
@@ -1591,13 +1672,18 @@ static void EmitArrayDelete(CodeGenFunction &CGF,
   if (QualType::DestructionKind dtorKind = elementType.isDestructedType()) {
     assert(numElements && "no element count for a type with a destructor!");
 
+    CharUnits elementSize = CGF.getContext().getTypeSizeInChars(elementType);
+    CharUnits elementAlign =
+      deletedPtr.getAlignment().alignmentOfArrayElement(elementSize);
+
+    llvm::Value *arrayBegin = deletedPtr.getPointer();
     llvm::Value *arrayEnd =
-      CGF.Builder.CreateInBoundsGEP(deletedPtr, numElements, "delete.end");
+      CGF.Builder.CreateInBoundsGEP(arrayBegin, numElements, "delete.end");
 
     // Note that it is legal to allocate a zero-length array, and we
     // can never fold the check away because the length should always
     // come from a cookie.
-    CGF.emitArrayDestroy(deletedPtr, arrayEnd, elementType,
+    CGF.emitArrayDestroy(arrayBegin, arrayEnd, elementType, elementAlign,
                          CGF.getDestroyer(dtorKind),
                          /*checkZeroLength*/ true,
                          CGF.needsEHCleanup(dtorKind));
@@ -1609,13 +1695,13 @@ static void EmitArrayDelete(CodeGenFunction &CGF,
 
 void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
   const Expr *Arg = E->getArgument();
-  llvm::Value *Ptr = EmitScalarExpr(Arg);
+  Address Ptr = EmitPointerWithAlignment(Arg);
 
   // Null check the pointer.
   llvm::BasicBlock *DeleteNotNull = createBasicBlock("delete.notnull");
   llvm::BasicBlock *DeleteEnd = createBasicBlock("delete.end");
 
-  llvm::Value *IsNull = Builder.CreateIsNull(Ptr, "isnull");
+  llvm::Value *IsNull = Builder.CreateIsNull(Ptr.getPointer(), "isnull");
 
   Builder.CreateCondBr(IsNull, DeleteEnd, DeleteNotNull);
   EmitBlock(DeleteNotNull);
@@ -1640,11 +1726,11 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) {
       GEP.push_back(Zero);
     }
 
-    Ptr = Builder.CreateInBoundsGEP(Ptr, GEP, "del.first");
+    Ptr = Address(Builder.CreateInBoundsGEP(Ptr.getPointer(), GEP, "del.first"),
+                  Ptr.getAlignment());
   }
 
-  assert(ConvertTypeForMem(DeleteTy) ==
-         cast<llvm::PointerType>(Ptr->getType())->getElementType());
+  assert(ConvertTypeForMem(DeleteTy) == Ptr.getElementType());
 
   if (E->isArrayForm()) {
     EmitArrayDelete(*this, E, Ptr, DeleteTy);
@@ -1690,7 +1776,7 @@ static bool isGLValueFromPointerDeref(const Expr *E) {
 static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
                                          llvm::Type *StdTypeInfoPtrTy) {
   // Get the vtable pointer.
-  llvm::Value *ThisPtr = CGF.EmitLValue(E).getAddress();
+  Address ThisPtr = CGF.EmitLValue(E).getAddress();
 
   // C++ [expr.typeid]p2:
   //   If the glvalue expression is obtained by applying the unary * operator to
@@ -1707,7 +1793,7 @@ static llvm::Value *EmitTypeidFromVTable(CodeGenFunction &CGF, const Expr *E,
         CGF.createBasicBlock("typeid.bad_typeid");
     llvm::BasicBlock *EndBlock = CGF.createBasicBlock("typeid.end");
 
-    llvm::Value *IsNull = CGF.Builder.CreateIsNull(ThisPtr);
+    llvm::Value *IsNull = CGF.Builder.CreateIsNull(ThisPtr.getPointer());
     CGF.Builder.CreateCondBr(IsNull, BadTypeidBlock, EndBlock);
 
     CGF.EmitBlock(BadTypeidBlock);
@@ -1758,8 +1844,9 @@ static llvm::Value *EmitDynamicCastToNull(CodeGenFunction &CGF,
   return llvm::UndefValue::get(DestLTy);
 }
 
-llvm::Value *CodeGenFunction::EmitDynamicCast(llvm::Value *Value,
+llvm::Value *CodeGenFunction::EmitDynamicCast(Address ThisAddr,
                                               const CXXDynamicCastExpr *DCE) {
+  CGM.EmitExplicitCastExprType(DCE, this);
   QualType DestTy = DCE->getTypeAsWritten();
 
   if (DCE->isAlwaysNull())
@@ -1803,19 +1890,21 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(llvm::Value *Value,
     CastNull = createBasicBlock("dynamic_cast.null");
     CastNotNull = createBasicBlock("dynamic_cast.notnull");
 
-    llvm::Value *IsNull = Builder.CreateIsNull(Value);
+    llvm::Value *IsNull = Builder.CreateIsNull(ThisAddr.getPointer());
     Builder.CreateCondBr(IsNull, CastNull, CastNotNull);
     EmitBlock(CastNotNull);
   }
 
+  llvm::Value *Value;
   if (isDynamicCastToVoid) {
-    Value = CGM.getCXXABI().EmitDynamicCastToVoid(*this, Value, SrcRecordTy,
+    Value = CGM.getCXXABI().EmitDynamicCastToVoid(*this, ThisAddr, SrcRecordTy,
                                                   DestTy);
   } else {
     assert(DestRecordTy->isRecordType() &&
            "destination type must be a record type!");
-    Value = CGM.getCXXABI().EmitDynamicCastCall(*this, Value, SrcRecordTy,
+    Value = CGM.getCXXABI().EmitDynamicCastCall(*this, ThisAddr, SrcRecordTy,
                                                 DestTy, DestRecordTy, CastEnd);
+    CastNotNull = Builder.GetInsertBlock();
   }
 
   if (ShouldNullCheckSrcValue) {
@@ -1840,12 +1929,11 @@ llvm::Value *CodeGenFunction::EmitDynamicCast(llvm::Value *Value,
 
 void CodeGenFunction::EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Slot) {
   RunCleanupsScope Scope(*this);
-  LValue SlotLV =
-      MakeAddrLValue(Slot.getAddr(), E->getType(), Slot.getAlignment());
+  LValue SlotLV = MakeAddrLValue(Slot.getAddress(), E->getType());
 
   CXXRecordDecl::field_iterator CurField = E->getLambdaClass()->field_begin();
-  for (LambdaExpr::capture_init_iterator i = E->capture_init_begin(),
-                                         e = E->capture_init_end();
+  for (LambdaExpr::const_capture_init_iterator i = E->capture_init_begin(),
+                                               e = E->capture_init_end();
        i != e; ++i, ++CurField) {
     // Emit initialization
     LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField);
diff --git a/lib/CodeGen/CGExprComplex.cpp b/lib/CodeGen/CGExprComplex.cpp
index 27d1c689966b..ccdb53287e9f 100644
--- a/lib/CodeGen/CGExprComplex.cpp
+++ b/lib/CodeGen/CGExprComplex.cpp
@@ -83,12 +83,12 @@ public:
   /// specified value pointer.
   void EmitStoreOfComplex(ComplexPairTy Val, LValue LV, bool isInit);
 
-  /// EmitComplexToComplexCast - Emit a cast from complex value Val to DestType.
+  /// Emit a cast from complex value Val to DestType.
   ComplexPairTy EmitComplexToComplexCast(ComplexPairTy Val, QualType SrcType,
-                                         QualType DestType);
-  /// EmitComplexToComplexCast - Emit a cast from scalar value Val to DestType.
+                                         QualType DestType, SourceLocation Loc);
+  /// Emit a cast from scalar value Val to DestType.
   ComplexPairTy EmitScalarToComplexCast(llvm::Value *Val, QualType SrcType,
-                                        QualType DestType);
+                                        QualType DestType, SourceLocation Loc);
 
   //===--------------------------------------------------------------------===//
   //                            Visitor Methods
@@ -154,6 +154,8 @@ public:
     return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType());
   }
   ComplexPairTy VisitCastExpr(CastExpr *E) {
+    if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+      CGF.CGM.EmitExplicitCastExprType(ECE, &CGF);
     return EmitCast(E->getCastKind(), E->getSubExpr(), E->getType());
   }
   ComplexPairTy VisitCallExpr(const CallExpr *E);
@@ -298,6 +300,19 @@ public:
 //                                Utilities
 //===----------------------------------------------------------------------===//
 
+Address CodeGenFunction::emitAddrOfRealComponent(Address addr,
+                                                 QualType complexType) {
+  CharUnits offset = CharUnits::Zero();
+  return Builder.CreateStructGEP(addr, 0, offset, addr.getName() + ".realp");
+}
+
+Address CodeGenFunction::emitAddrOfImagComponent(Address addr,
+                                                 QualType complexType) {
+  QualType eltType = complexType->castAs<ComplexType>()->getElementType();
+  CharUnits offset = getContext().getTypeSizeInChars(eltType);
+  return Builder.CreateStructGEP(addr, 1, offset, addr.getName() + ".imagp");
+}
+
 /// EmitLoadOfLValue - Given an RValue reference for a complex, emit code to
 /// load the real and imaginary pieces, returning them as Real/Imag.
 ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue,
@@ -306,29 +321,21 @@ ComplexPairTy ComplexExprEmitter::EmitLoadOfLValue(LValue lvalue,
   if (lvalue.getType()->isAtomicType())
     return CGF.EmitAtomicLoad(lvalue, loc).getComplexVal();
 
-  llvm::Value *SrcPtr = lvalue.getAddress();
+  Address SrcPtr = lvalue.getAddress();
   bool isVolatile = lvalue.isVolatileQualified();
-  unsigned AlignR = lvalue.getAlignment().getQuantity();
-  ASTContext &C = CGF.getContext();
-  QualType ComplexTy = lvalue.getType();
-  unsigned ComplexAlign = C.getTypeAlignInChars(ComplexTy).getQuantity();
-  unsigned AlignI = std::min(AlignR, ComplexAlign);
 
-  llvm::Value *Real=nullptr, *Imag=nullptr;
+  llvm::Value *Real = nullptr, *Imag = nullptr;
 
   if (!IgnoreReal || isVolatile) {
-    llvm::Value *RealP = Builder.CreateStructGEP(nullptr, SrcPtr, 0,
-                                                 SrcPtr->getName() + ".realp");
-    Real = Builder.CreateAlignedLoad(RealP, AlignR, isVolatile,
-                                     SrcPtr->getName() + ".real");
+    Address RealP = CGF.emitAddrOfRealComponent(SrcPtr, lvalue.getType());
+    Real = Builder.CreateLoad(RealP, isVolatile, SrcPtr.getName() + ".real");
   }
 
   if (!IgnoreImag || isVolatile) {
-    llvm::Value *ImagP = Builder.CreateStructGEP(nullptr, SrcPtr, 1,
-                                                 SrcPtr->getName() + ".imagp");
-    Imag = Builder.CreateAlignedLoad(ImagP, AlignI, isVolatile,
-                                     SrcPtr->getName() + ".imag");
+    Address ImagP = CGF.emitAddrOfImagComponent(SrcPtr, lvalue.getType());
+    Imag = Builder.CreateLoad(ImagP, isVolatile, SrcPtr.getName() + ".imag");
   }
+
   return ComplexPairTy(Real, Imag);
 }
 
@@ -340,19 +347,12 @@ void ComplexExprEmitter::EmitStoreOfComplex(ComplexPairTy Val, LValue lvalue,
       (!isInit && CGF.LValueIsSuitableForInlineAtomic(lvalue)))
     return CGF.EmitAtomicStore(RValue::getComplex(Val), lvalue, isInit);
 
-  llvm::Value *Ptr = lvalue.getAddress();
-  llvm::Value *RealPtr = Builder.CreateStructGEP(nullptr, Ptr, 0, "real");
-  llvm::Value *ImagPtr = Builder.CreateStructGEP(nullptr, Ptr, 1, "imag");
-  unsigned AlignR = lvalue.getAlignment().getQuantity();
-  ASTContext &C = CGF.getContext();
-  QualType ComplexTy = lvalue.getType();
-  unsigned ComplexAlign = C.getTypeAlignInChars(ComplexTy).getQuantity();
-  unsigned AlignI = std::min(AlignR, ComplexAlign);
+  Address Ptr = lvalue.getAddress();
+  Address RealPtr = CGF.emitAddrOfRealComponent(Ptr, lvalue.getType());
+  Address ImagPtr = CGF.emitAddrOfImagComponent(Ptr, lvalue.getType());
 
-  Builder.CreateAlignedStore(Val.first, RealPtr, AlignR,
-                             lvalue.isVolatileQualified());
-  Builder.CreateAlignedStore(Val.second, ImagPtr, AlignI,
-                             lvalue.isVolatileQualified());
+  Builder.CreateStore(Val.first, RealPtr, lvalue.isVolatileQualified());
+  Builder.CreateStore(Val.second, ImagPtr, lvalue.isVolatileQualified());
 }
 
 
@@ -385,16 +385,17 @@ ComplexPairTy ComplexExprEmitter::VisitCallExpr(const CallExpr *E) {
 
 ComplexPairTy ComplexExprEmitter::VisitStmtExpr(const StmtExpr *E) {
   CodeGenFunction::StmtExprEvaluation eval(CGF);
-  llvm::Value *RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(), true);
-  assert(RetAlloca && "Expected complex return value");
+  Address RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(), true);
+  assert(RetAlloca.isValid() && "Expected complex return value");
   return EmitLoadOfLValue(CGF.MakeAddrLValue(RetAlloca, E->getType()),
                           E->getExprLoc());
 }
 
-/// EmitComplexToComplexCast - Emit a cast from complex value Val to DestType.
+/// Emit a cast from complex value Val to DestType.
 ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val,
                                                            QualType SrcType,
-                                                           QualType DestType) {
+                                                           QualType DestType,
+                                                           SourceLocation Loc) {
   // Get the src/dest element type.
   SrcType = SrcType->castAs<ComplexType>()->getElementType();
   DestType = DestType->castAs<ComplexType>()->getElementType();
@@ -402,17 +403,18 @@ ComplexPairTy ComplexExprEmitter::EmitComplexToComplexCast(ComplexPairTy Val,
   // C99 6.3.1.6: When a value of complex type is converted to another
   // complex type, both the real and imaginary parts follow the conversion
   // rules for the corresponding real types.
-  Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType);
-  Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType);
+  Val.first = CGF.EmitScalarConversion(Val.first, SrcType, DestType, Loc);
+  Val.second = CGF.EmitScalarConversion(Val.second, SrcType, DestType, Loc);
   return Val;
 }
 
 ComplexPairTy ComplexExprEmitter::EmitScalarToComplexCast(llvm::Value *Val,
                                                           QualType SrcType,
-                                                          QualType DestType) {
+                                                          QualType DestType,
+                                                          SourceLocation Loc) {
   // Convert the input element to the element type of the complex.
   DestType = DestType->castAs<ComplexType>()->getElementType();
-  Val = CGF.EmitScalarConversion(Val, SrcType, DestType);
+  Val = CGF.EmitScalarConversion(Val, SrcType, DestType, Loc);
 
   // Return (realval, 0).
   return ComplexPairTy(Val, llvm::Constant::getNullValue(Val->getType()));
@@ -434,12 +436,9 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
 
   case CK_LValueBitCast: {
     LValue origLV = CGF.EmitLValue(Op);
-    llvm::Value *V = origLV.getAddress();
-    V = Builder.CreateBitCast(V,
-                    CGF.ConvertType(CGF.getContext().getPointerType(DestTy)));
-    return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy,
-                                               origLV.getAlignment()),
-                            Op->getExprLoc());
+    Address V = origLV.getAddress();
+    V = Builder.CreateElementBitCast(V, CGF.ConvertType(DestTy));
+    return EmitLoadOfLValue(CGF.MakeAddrLValue(V, DestTy), Op->getExprLoc());
   }
 
   case CK_BitCast:
@@ -488,14 +487,15 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op,
 
   case CK_FloatingRealToComplex:
   case CK_IntegralRealToComplex:
-    return EmitScalarToComplexCast(CGF.EmitScalarExpr(Op),
-                                   Op->getType(), DestTy);
+    return EmitScalarToComplexCast(CGF.EmitScalarExpr(Op), Op->getType(),
+                                   DestTy, Op->getExprLoc());
 
   case CK_FloatingComplexCast:
   case CK_FloatingComplexToIntegralComplex:
   case CK_IntegralComplexCast:
   case CK_IntegralComplexToFloatingComplex:
-    return EmitComplexToComplexCast(Visit(Op), Op->getType(), DestTy);
+    return EmitComplexToComplexCast(Visit(Op), Op->getType(), DestTy,
+                                    Op->getExprLoc());
   }
 
   llvm_unreachable("unknown cast resulting in complex value");
@@ -585,19 +585,25 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName,
   // We *must* use the full CG function call building logic here because the
   // complex type has special ABI handling. We also should not forget about
   // special calling convention which may be used for compiler builtins.
-  const CGFunctionInfo &FuncInfo =
-    CGF.CGM.getTypes().arrangeFreeFunctionCall(
-      Op.Ty, Args, FunctionType::ExtInfo(/* No CC here - will be added later */),
-      RequiredArgs::All);
+
+  // We create a function qualified type to state that this call does not have
+  // any exceptions.
+  FunctionProtoType::ExtProtoInfo EPI;
+  EPI = EPI.withExceptionSpec(
+      FunctionProtoType::ExceptionSpecInfo(EST_BasicNoexcept));
+  SmallVector<QualType, 4> ArgsQTys(
+      4, Op.Ty->castAs<ComplexType>()->getElementType());
+  QualType FQTy = CGF.getContext().getFunctionType(Op.Ty, ArgsQTys, EPI);
+  const CGFunctionInfo &FuncInfo = CGF.CGM.getTypes().arrangeFreeFunctionCall(
+      Args, cast<FunctionType>(FQTy.getTypePtr()), false);
+
   llvm::FunctionType *FTy = CGF.CGM.getTypes().GetFunctionType(FuncInfo);
   llvm::Constant *Func = CGF.CGM.CreateBuiltinFunction(FTy, LibCallName);
   llvm::Instruction *Call;
 
   RValue Res = CGF.EmitCall(FuncInfo, Func, ReturnValueSlot(), Args,
-                            nullptr, &Call);
+                            FQTy->getAs<FunctionProtoType>(), &Call);
   cast<llvm::CallInst>(Call)->setCallingConv(CGF.CGM.getBuiltinCC());
-  cast<llvm::CallInst>(Call)->setDoesNotThrow();
-
   return Res.getComplexVal();
 }
 
@@ -846,19 +852,20 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
   LValue LHS = CGF.EmitLValue(E->getLHS());
 
   // Load from the l-value and convert it.
+  SourceLocation Loc = E->getExprLoc();
   if (LHSTy->isAnyComplexType()) {
-    ComplexPairTy LHSVal = EmitLoadOfLValue(LHS, E->getExprLoc());
-    OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty);
+    ComplexPairTy LHSVal = EmitLoadOfLValue(LHS, Loc);
+    OpInfo.LHS = EmitComplexToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
   } else {
-    llvm::Value *LHSVal = CGF.EmitLoadOfScalar(LHS, E->getExprLoc());
+    llvm::Value *LHSVal = CGF.EmitLoadOfScalar(LHS, Loc);
     // For floating point real operands we can directly pass the scalar form
     // to the binary operator emission and potentially get more efficient code.
     if (LHSTy->isRealFloatingType()) {
       if (!CGF.getContext().hasSameUnqualifiedType(ComplexElementTy, LHSTy))
-        LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy);
+        LHSVal = CGF.EmitScalarConversion(LHSVal, LHSTy, ComplexElementTy, Loc);
       OpInfo.LHS = ComplexPairTy(LHSVal, nullptr);
     } else {
-      OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty);
+      OpInfo.LHS = EmitScalarToComplexCast(LHSVal, LHSTy, OpInfo.Ty, Loc);
     }
   }
 
@@ -867,12 +874,13 @@ EmitCompoundAssignLValue(const CompoundAssignOperator *E,
 
   // Truncate the result and store it into the LHS lvalue.
   if (LHSTy->isAnyComplexType()) {
-    ComplexPairTy ResVal = EmitComplexToComplexCast(Result, OpInfo.Ty, LHSTy);
+    ComplexPairTy ResVal =
+        EmitComplexToComplexCast(Result, OpInfo.Ty, LHSTy, Loc);
     EmitStoreOfComplex(ResVal, LHS, /*isInit*/ false);
     Val = RValue::getComplex(ResVal);
   } else {
     llvm::Value *ResVal =
-        CGF.EmitComplexToScalarConversion(Result, OpInfo.Ty, LHSTy);
+        CGF.EmitComplexToScalarConversion(Result, OpInfo.Ty, LHSTy, Loc);
     CGF.EmitStoreOfScalar(ResVal, LHS, /*isInit*/ false);
     Val = RValue::get(ResVal);
   }
@@ -1011,10 +1019,10 @@ ComplexPairTy ComplexExprEmitter::VisitInitListExpr(InitListExpr *E) {
 }
 
 ComplexPairTy ComplexExprEmitter::VisitVAArgExpr(VAArgExpr *E) {
-  llvm::Value *ArgValue = CGF.EmitVAListRef(E->getSubExpr());
-  llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, E->getType());
+  Address ArgValue = Address::invalid();
+  Address ArgPtr = CGF.EmitVAArg(E, ArgValue);
 
-  if (!ArgPtr) {
+  if (!ArgPtr.isValid()) {
     CGF.ErrorUnsupported(E, "complex va_arg expression");
     llvm::Type *EltTy =
       CGF.ConvertType(E->getType()->castAs<ComplexType>()->getElementType());
@@ -1022,7 +1030,7 @@ ComplexPairTy ComplexExprEmitter::VisitVAArgExpr(VAArgExpr *E) {
     return ComplexPairTy(U, U);
   }
 
-  return EmitLoadOfLValue(CGF.MakeNaturalAlignAddrLValue(ArgPtr, E->getType()),
+  return EmitLoadOfLValue(CGF.MakeAddrLValue(ArgPtr, E->getType()),
                           E->getExprLoc());
 }
 
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index a15c151d6f9d..3839ab718fa3 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -636,6 +636,8 @@ public:
   }
 
   llvm::Constant *VisitCastExpr(CastExpr* E) {
+    if (const auto *ECE = dyn_cast<ExplicitCastExpr>(E))
+      CGM.EmitExplicitCastExprType(ECE, CGF);
     Expr *subExpr = E->getSubExpr();
     llvm::Constant *C = CGM.EmitConstantExpr(subExpr, subExpr->getType(), CGF);
     if (!C) return nullptr;
@@ -977,23 +979,26 @@ public:
   }
 
 public:
-  llvm::Constant *EmitLValue(APValue::LValueBase LVBase) {
+  ConstantAddress EmitLValue(APValue::LValueBase LVBase) {
     if (const ValueDecl *Decl = LVBase.dyn_cast<const ValueDecl*>()) {
       if (Decl->hasAttr<WeakRefAttr>())
         return CGM.GetWeakRefReference(Decl);
       if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(Decl))
-        return CGM.GetAddrOfFunction(FD);
+        return ConstantAddress(CGM.GetAddrOfFunction(FD), CharUnits::One());
       if (const VarDecl* VD = dyn_cast<VarDecl>(Decl)) {
         // We can never refer to a variable with local storage.
         if (!VD->hasLocalStorage()) {
+          CharUnits Align = CGM.getContext().getDeclAlign(VD);
           if (VD->isFileVarDecl() || VD->hasExternalStorage())
-            return CGM.GetAddrOfGlobalVar(VD);
-          else if (VD->isLocalVarDecl())
-            return CGM.getOrCreateStaticVarDecl(
+            return ConstantAddress(CGM.GetAddrOfGlobalVar(VD), Align);
+          else if (VD->isLocalVarDecl()) {
+            auto Ptr = CGM.getOrCreateStaticVarDecl(
                 *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false));
+            return ConstantAddress(Ptr, Align);
+          }
         }
       }
-      return nullptr;
+      return ConstantAddress::invalid();
     }
 
     Expr *E = const_cast<Expr*>(LVBase.get<const Expr*>());
@@ -1006,14 +1011,18 @@ public:
       llvm::Constant* C = CGM.EmitConstantExpr(CLE->getInitializer(),
                                                CLE->getType(), CGF);
       // FIXME: "Leaked" on failure.
-      if (C)
-        C = new llvm::GlobalVariable(CGM.getModule(), C->getType(),
+      if (!C) return ConstantAddress::invalid();
+
+      CharUnits Align = CGM.getContext().getTypeAlignInChars(E->getType());
+
+      auto GV = new llvm::GlobalVariable(CGM.getModule(), C->getType(),
                                      E->getType().isConstant(CGM.getContext()),
                                      llvm::GlobalValue::InternalLinkage,
                                      C, ".compoundliteral", nullptr,
                                      llvm::GlobalVariable::NotThreadLocal,
                           CGM.getContext().getTargetAddressSpace(E->getType()));
-      return C;
+      GV->setAlignment(Align.getQuantity());
+      return ConstantAddress(GV, Align);
     }
     case Expr::StringLiteralClass:
       return CGM.GetAddrOfConstantStringFromLiteral(cast<StringLiteral>(E));
@@ -1021,15 +1030,15 @@ public:
       return CGM.GetAddrOfConstantStringFromObjCEncode(cast<ObjCEncodeExpr>(E));
     case Expr::ObjCStringLiteralClass: {
       ObjCStringLiteral* SL = cast<ObjCStringLiteral>(E);
-      llvm::Constant *C =
+      ConstantAddress C =
           CGM.getObjCRuntime().GenerateConstantString(SL->getString());
-      return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType()));
+      return C.getElementBitCast(ConvertType(E->getType()));
     }
     case Expr::PredefinedExprClass: {
       unsigned Type = cast<PredefinedExpr>(E)->getIdentType();
       if (CGF) {
         LValue Res = CGF->EmitPredefinedLValue(cast<PredefinedExpr>(E));
-        return cast<llvm::Constant>(Res.getAddress());
+        return cast<ConstantAddress>(Res.getAddress());
       } else if (Type == PredefinedExpr::PrettyFunction) {
         return CGM.GetAddrOfConstantCString("top level", ".tmp");
       }
@@ -1040,7 +1049,8 @@ public:
       assert(CGF && "Invalid address of label expression outside function.");
       llvm::Constant *Ptr =
         CGF->GetAddrOfLabel(cast<AddrLabelExpr>(E)->getLabel());
-      return llvm::ConstantExpr::getBitCast(Ptr, ConvertType(E->getType()));
+      Ptr = llvm::ConstantExpr::getBitCast(Ptr, ConvertType(E->getType()));
+      return ConstantAddress(Ptr, CharUnits::One());
     }
     case Expr::CallExprClass: {
       CallExpr* CE = cast<CallExpr>(E);
@@ -1066,7 +1076,10 @@ public:
       else
         FunctionName = "global";
 
-      return CGM.GetAddrOfGlobalBlock(cast<BlockExpr>(E), FunctionName.c_str());
+      // This is not really an l-value.
+      llvm::Constant *Ptr =
+        CGM.GetAddrOfGlobalBlock(cast<BlockExpr>(E), FunctionName.c_str());
+      return ConstantAddress(Ptr, CGM.getPointerAlign());
     }
     case Expr::CXXTypeidExprClass: {
       CXXTypeidExpr *Typeid = cast<CXXTypeidExpr>(E);
@@ -1075,7 +1088,8 @@ public:
         T = Typeid->getTypeOperand(CGM.getContext());
       else
         T = Typeid->getExprOperand()->getType();
-      return CGM.GetAddrOfRTTIDescriptor(T);
+      return ConstantAddress(CGM.GetAddrOfRTTIDescriptor(T),
+                             CGM.getPointerAlign());
     }
     case Expr::CXXUuidofExprClass: {
       return CGM.GetAddrOfUuidDescriptor(cast<CXXUuidofExpr>(E));
@@ -1091,7 +1105,7 @@ public:
     }
     }
 
-    return nullptr;
+    return ConstantAddress::invalid();
   }
 };
 
@@ -1255,7 +1269,7 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
     llvm::Constant *Offset =
       llvm::ConstantInt::get(Int64Ty, Value.getLValueOffset().getQuantity());
 
-    llvm::Constant *C;
+    llvm::Constant *C = nullptr;
     if (APValue::LValueBase LVBase = Value.getLValueBase()) {
       // An array can be represented as an lvalue referring to the base.
       if (isa<llvm::ArrayType>(DestTy)) {
@@ -1264,7 +1278,7 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
           const_cast<Expr*>(LVBase.get<const Expr*>()));
       }
 
-      C = ConstExprEmitter(*this, CGF).EmitLValue(LVBase);
+      C = ConstExprEmitter(*this, CGF).EmitLValue(LVBase).getPointer();
 
       // Apply offset if necessary.
       if (!Offset->isNullValue()) {
@@ -1336,15 +1350,17 @@ llvm::Constant *CodeGenModule::EmitConstantValue(const APValue &Value,
     return llvm::ConstantStruct::get(STy, Complex);
   }
   case APValue::Vector: {
-    SmallVector<llvm::Constant *, 4> Inits;
     unsigned NumElts = Value.getVectorLength();
+    SmallVector<llvm::Constant *, 4> Inits(NumElts);
 
-    for (unsigned i = 0; i != NumElts; ++i) {
-      const APValue &Elt = Value.getVectorElt(i);
+    for (unsigned I = 0; I != NumElts; ++I) {
+      const APValue &Elt = Value.getVectorElt(I);
       if (Elt.isInt())
-        Inits.push_back(llvm::ConstantInt::get(VMContext, Elt.getInt()));
+        Inits[I] = llvm::ConstantInt::get(VMContext, Elt.getInt());
+      else if (Elt.isFloat())
+        Inits[I] = llvm::ConstantFP::get(VMContext, Elt.getFloat());
       else
-        Inits.push_back(llvm::ConstantFP::get(VMContext, Elt.getFloat()));
+        llvm_unreachable("unsupported vector element type");
     }
     return llvm::ConstantVector::get(Inits);
   }
@@ -1438,7 +1454,7 @@ CodeGenModule::EmitConstantValueForMemory(const APValue &Value,
   return C;
 }
 
-llvm::Constant *
+ConstantAddress
 CodeGenModule::GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr *E) {
   assert(E->isFileScope() && "not a file-scope compound literal expr");
   return ConstExprEmitter(*this, nullptr).EmitLValue(E);
diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp
index 74f6019b1a2c..725d96f29877 100644
--- a/lib/CodeGen/CGExprScalar.cpp
+++ b/lib/CodeGen/CGExprScalar.cpp
@@ -140,21 +140,25 @@ public:
   /// boolean (i1) truth value.  This is equivalent to "Val != 0".
   Value *EmitConversionToBool(Value *Src, QualType DstTy);
 
-  /// \brief Emit a check that a conversion to or from a floating-point type
-  /// does not overflow.
+  /// Emit a check that a conversion to or from a floating-point type does not
+  /// overflow.
   void EmitFloatConversionCheck(Value *OrigSrc, QualType OrigSrcType,
-                                Value *Src, QualType SrcType,
-                                QualType DstType, llvm::Type *DstTy);
+                                Value *Src, QualType SrcType, QualType DstType,
+                                llvm::Type *DstTy, SourceLocation Loc);
 
-  /// EmitScalarConversion - Emit a conversion from the specified type to the
-  /// specified destination type, both of which are LLVM scalar types.
-  Value *EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy);
+  /// Emit a conversion from the specified type to the specified destination
+  /// type, both of which are LLVM scalar types.
+  Value *EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy,
+                              SourceLocation Loc);
 
-  /// EmitComplexToScalarConversion - Emit a conversion from the specified
-  /// complex type to the specified destination type, where the destination type
-  /// is an LLVM scalar type.
+  Value *EmitScalarConversion(Value *Src, QualType SrcTy, QualType DstTy,
+                              SourceLocation Loc, bool TreatBooleanAsSigned);
+
+  /// Emit a conversion from the specified complex type to the specified
+  /// destination type, where the destination type is an LLVM scalar type.
   Value *EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
-                                       QualType SrcTy, QualType DstTy);
+                                       QualType SrcTy, QualType DstTy,
+                                       SourceLocation Loc);
 
   /// EmitNullValue - Emit a value that corresponds to null for the given type.
   Value *EmitNullValue(QualType Ty);
@@ -310,12 +314,7 @@ public:
     return EmitNullValue(E->getType());
   }
   Value *VisitExplicitCastExpr(ExplicitCastExpr *E) {
-    if (E->getType()->isVariablyModifiedType())
-      CGF.EmitVariablyModifiedType(E->getType());
-
-    if (CGDebugInfo *DI = CGF.getDebugInfo())
-      DI->EmitExplicitCastType(E->getType());
-
+    CGF.CGM.EmitExplicitCastExprType(E, &CGF);
     return VisitCastExpr(E);
   }
   Value *VisitCastExpr(CastExpr *E);
@@ -362,7 +361,7 @@ public:
     if (isa<MemberPointerType>(E->getType())) // never sugared
       return CGF.CGM.getMemberPointerConstant(E);
 
-    return EmitLValue(E->getSubExpr()).getAddress();
+    return EmitLValue(E->getSubExpr()).getPointer();
   }
   Value *VisitUnaryDeref(const UnaryOperator *E) {
     if (E->getType()->isVoidType())
@@ -524,8 +523,9 @@ public:
 #undef HANDLEBINOP
 
   // Comparisons.
-  Value *EmitCompare(const BinaryOperator *E, unsigned UICmpOpc,
-                     unsigned SICmpOpc, unsigned FCmpOpc);
+  Value *EmitCompare(const BinaryOperator *E, llvm::CmpInst::Predicate UICmpOpc,
+                     llvm::CmpInst::Predicate SICmpOpc,
+                     llvm::CmpInst::Predicate FCmpOpc);
 #define VISITCOMP(CODE, UI, SI, FP) \
     Value *VisitBin##CODE(const BinaryOperator *E) { \
       return EmitCompare(E, llvm::ICmpInst::UI, llvm::ICmpInst::SI, \
@@ -594,11 +594,9 @@ Value *ScalarExprEmitter::EmitConversionToBool(Value *Src, QualType SrcType) {
   return EmitPointerToBoolConversion(Src);
 }
 
-void ScalarExprEmitter::EmitFloatConversionCheck(Value *OrigSrc,
-                                                 QualType OrigSrcType,
-                                                 Value *Src, QualType SrcType,
-                                                 QualType DstType,
-                                                 llvm::Type *DstTy) {
+void ScalarExprEmitter::EmitFloatConversionCheck(
+    Value *OrigSrc, QualType OrigSrcType, Value *Src, QualType SrcType,
+    QualType DstType, llvm::Type *DstTy, SourceLocation Loc) {
   CodeGenFunction::SanitizerScope SanScope(&CGF);
   using llvm::APFloat;
   using llvm::APSInt;
@@ -722,19 +720,25 @@ void ScalarExprEmitter::EmitFloatConversionCheck(Value *OrigSrc,
     }
   }
 
-  // FIXME: Provide a SourceLocation.
-  llvm::Constant *StaticArgs[] = {
-    CGF.EmitCheckTypeDescriptor(OrigSrcType),
-    CGF.EmitCheckTypeDescriptor(DstType)
-  };
+  llvm::Constant *StaticArgs[] = {CGF.EmitCheckSourceLocation(Loc),
+                                  CGF.EmitCheckTypeDescriptor(OrigSrcType),
+                                  CGF.EmitCheckTypeDescriptor(DstType)};
   CGF.EmitCheck(std::make_pair(Check, SanitizerKind::FloatCastOverflow),
                 "float_cast_overflow", StaticArgs, OrigSrc);
 }
 
-/// EmitScalarConversion - Emit a conversion from the specified type to the
-/// specified destination type, both of which are LLVM scalar types.
+/// Emit a conversion from the specified type to the specified destination type,
+/// both of which are LLVM scalar types.
 Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
-                                               QualType DstType) {
+                                               QualType DstType,
+                                               SourceLocation Loc) {
+  return EmitScalarConversion(Src, SrcType, DstType, Loc, false);
+}
+
+Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
+                                               QualType DstType,
+                                               SourceLocation Loc,
+                                               bool TreatBooleanAsSigned) {
   SrcType = CGF.getContext().getCanonicalType(SrcType);
   DstType = CGF.getContext().getCanonicalType(DstType);
   if (SrcType == DstType) return Src;
@@ -809,7 +813,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   if (DstType->isExtVectorType() && !SrcType->isVectorType()) {
     // Cast the scalar to element type
     QualType EltTy = DstType->getAs<ExtVectorType>()->getElementType();
-    llvm::Value *Elt = EmitScalarConversion(Src, SrcType, EltTy);
+    llvm::Value *Elt = EmitScalarConversion(
+        Src, SrcType, EltTy, Loc, CGF.getContext().getLangOpts().OpenCL);
 
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
@@ -829,8 +834,8 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   // or the destination type is a floating-point type.
   if (CGF.SanOpts.has(SanitizerKind::FloatCastOverflow) &&
       (OrigSrcType->isFloatingType() || DstType->isFloatingType()))
-    EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType,
-                             DstTy);
+    EmitFloatConversionCheck(OrigSrc, OrigSrcType, Src, SrcType, DstType, DstTy,
+                             Loc);
 
   // Cast to half through float if half isn't a native type.
   if (DstType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) {
@@ -849,6 +854,9 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
 
   if (isa<llvm::IntegerType>(SrcTy)) {
     bool InputSigned = SrcType->isSignedIntegerOrEnumerationType();
+    if (SrcType->isBooleanType() && TreatBooleanAsSigned) {
+      InputSigned = true;
+    }
     if (isa<llvm::IntegerType>(DstTy))
       Res = Builder.CreateIntCast(Src, DstTy, InputSigned, "conv");
     else if (InputSigned)
@@ -884,20 +892,19 @@ Value *ScalarExprEmitter::EmitScalarConversion(Value *Src, QualType SrcType,
   return Res;
 }
 
-/// EmitComplexToScalarConversion - Emit a conversion from the specified complex
-/// type to the specified destination type, where the destination type is an
-/// LLVM scalar type.
-Value *ScalarExprEmitter::
-EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
-                              QualType SrcTy, QualType DstTy) {
+/// Emit a conversion from the specified complex type to the specified
+/// destination type, where the destination type is an LLVM scalar type.
+Value *ScalarExprEmitter::EmitComplexToScalarConversion(
+    CodeGenFunction::ComplexPairTy Src, QualType SrcTy, QualType DstTy,
+    SourceLocation Loc) {
   // Get the source element type.
   SrcTy = SrcTy->castAs<ComplexType>()->getElementType();
 
   // Handle conversions to bool first, they are special: comparisons against 0.
   if (DstTy->isBooleanType()) {
     //  Complex != 0  -> (Real != 0) | (Imag != 0)
-    Src.first  = EmitScalarConversion(Src.first, SrcTy, DstTy);
-    Src.second = EmitScalarConversion(Src.second, SrcTy, DstTy);
+    Src.first = EmitScalarConversion(Src.first, SrcTy, DstTy, Loc);
+    Src.second = EmitScalarConversion(Src.second, SrcTy, DstTy, Loc);
     return Builder.CreateOr(Src.first, Src.second, "tobool");
   }
 
@@ -905,7 +912,7 @@ EmitComplexToScalarConversion(CodeGenFunction::ComplexPairTy Src,
   // the imaginary part of the complex value is discarded and the value of the
   // real part is converted according to the conversion rules for the
   // corresponding real type.
-  return EmitScalarConversion(Src.first, SrcTy, DstTy);
+  return EmitScalarConversion(Src.first, SrcTy, DstTy, Loc);
 }
 
 Value *ScalarExprEmitter::EmitNullValue(QualType Ty) {
@@ -1003,14 +1010,10 @@ Value *ScalarExprEmitter::VisitShuffleVectorExpr(ShuffleVectorExpr *E) {
     }
 
     llvm::VectorType *MTy = cast<llvm::VectorType>(Mask->getType());
-    llvm::Constant* EltMask;
-
-    EltMask = llvm::ConstantInt::get(MTy->getElementType(),
-                                     llvm::NextPowerOf2(LHSElts-1)-1);
 
     // Mask off the high bits of each shuffle index.
-    Value *MaskBits = llvm::ConstantVector::getSplat(MTy->getNumElements(),
-                                                     EltMask);
+    Value *MaskBits =
+        llvm::ConstantInt::get(MTy, llvm::NextPowerOf2(LHSElts - 1) - 1);
     Mask = Builder.CreateAnd(Mask, MaskBits, "mask");
 
     // newv = undef
@@ -1334,13 +1337,13 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
   return V;
 }
 
-static bool ShouldNullCheckClassCastValue(const CastExpr *CE) {
+bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) {
   const Expr *E = CE->getSubExpr();
 
   if (CE->getCastKind() == CK_UncheckedDerivedToBase)
     return false;
 
-  if (isa<CXXThisExpr>(E)) {
+  if (isa<CXXThisExpr>(E->IgnoreParens())) {
     // We always assume that 'this' is never null.
     return false;
   }
@@ -1375,11 +1378,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
   case CK_LValueBitCast:
   case CK_ObjCObjectLValueCast: {
-    Value *V = EmitLValue(E).getAddress();
-    V = Builder.CreateBitCast(V,
-                          ConvertType(CGF.getContext().getPointerType(DestTy)));
-    return EmitLoadOfLValue(CGF.MakeNaturalAlignAddrLValue(V, DestTy),
-                            CE->getExprLoc());
+    Address Addr = EmitLValue(E).getAddress();
+    Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy));
+    LValue LV = CGF.MakeAddrLValue(Addr, DestTy);
+    return EmitLoadOfLValue(LV, CE->getExprLoc());
   }
 
   case CK_CPointerToObjCPointerCast:
@@ -1419,68 +1421,44 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     const CXXRecordDecl *DerivedClassDecl = DestTy->getPointeeCXXRecordDecl();
     assert(DerivedClassDecl && "BaseToDerived arg isn't a C++ object pointer!");
 
-    llvm::Value *V = Visit(E);
-
-    llvm::Value *Derived =
-      CGF.GetAddressOfDerivedClass(V, DerivedClassDecl,
+    Address Base = CGF.EmitPointerWithAlignment(E);
+    Address Derived =
+      CGF.GetAddressOfDerivedClass(Base, DerivedClassDecl,
                                    CE->path_begin(), CE->path_end(),
-                                   ShouldNullCheckClassCastValue(CE));
+                                   CGF.ShouldNullCheckClassCastValue(CE));
 
     // C++11 [expr.static.cast]p11: Behavior is undefined if a downcast is
     // performed and the object is not of the derived type.
     if (CGF.sanitizePerformTypeCheck())
       CGF.EmitTypeCheck(CodeGenFunction::TCK_DowncastPointer, CE->getExprLoc(),
-                        Derived, DestTy->getPointeeType());
+                        Derived.getPointer(), DestTy->getPointeeType());
 
     if (CGF.SanOpts.has(SanitizerKind::CFIDerivedCast))
-      CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(), Derived,
+      CGF.EmitVTablePtrCheckForCast(DestTy->getPointeeType(),
+                                    Derived.getPointer(),
                                     /*MayBeNull=*/true,
                                     CodeGenFunction::CFITCK_DerivedCast,
                                     CE->getLocStart());
 
-    return Derived;
+    return Derived.getPointer();
   }
   case CK_UncheckedDerivedToBase:
   case CK_DerivedToBase: {
-    const CXXRecordDecl *DerivedClassDecl =
-      E->getType()->getPointeeCXXRecordDecl();
-    assert(DerivedClassDecl && "DerivedToBase arg isn't a C++ object pointer!");
-
-    return CGF.GetAddressOfBaseClass(
-        Visit(E), DerivedClassDecl, CE->path_begin(), CE->path_end(),
-        ShouldNullCheckClassCastValue(CE), CE->getExprLoc());
+    // The EmitPointerWithAlignment path does this fine; just discard
+    // the alignment.
+    return CGF.EmitPointerWithAlignment(CE).getPointer();
   }
+
   case CK_Dynamic: {
-    Value *V = Visit(const_cast<Expr*>(E));
+    Address V = CGF.EmitPointerWithAlignment(E);
     const CXXDynamicCastExpr *DCE = cast<CXXDynamicCastExpr>(CE);
     return CGF.EmitDynamicCast(V, DCE);
   }
 
-  case CK_ArrayToPointerDecay: {
-    assert(E->getType()->isArrayType() &&
-           "Array to pointer decay must have array source type!");
-
-    Value *V = EmitLValue(E).getAddress();  // Bitfields can't be arrays.
-
-    // Note that VLA pointers are always decayed, so we don't need to do
-    // anything here.
-    if (!E->getType()->isVariableArrayType()) {
-      assert(isa<llvm::PointerType>(V->getType()) && "Expected pointer");
-      llvm::Type *NewTy = ConvertType(E->getType());
-      V = CGF.Builder.CreatePointerCast(
-          V, NewTy->getPointerTo(V->getType()->getPointerAddressSpace()));
-
-      assert(isa<llvm::ArrayType>(V->getType()->getPointerElementType()) &&
-             "Expected pointer to array");
-      V = Builder.CreateStructGEP(NewTy, V, 0, "arraydecay");
-    }
-
-    // Make sure the array decay ends up being the right type.  This matters if
-    // the array type was of an incomplete type.
-    return CGF.Builder.CreatePointerCast(V, ConvertType(CE->getType()));
-  }
+  case CK_ArrayToPointerDecay:
+    return CGF.EmitArrayToPointerDecay(E).getPointer();
   case CK_FunctionToPointerDecay:
-    return EmitLValue(E).getAddress();
+    return EmitLValue(E).getPointer();
 
   case CK_NullToPointer:
     if (MustVisitNullValue(E))
@@ -1563,9 +1541,14 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   }
   case CK_VectorSplat: {
     llvm::Type *DstTy = ConvertType(DestTy);
-    Value *Elt = Visit(const_cast<Expr*>(E));
-    Elt = EmitScalarConversion(Elt, E->getType(),
-                               DestTy->getAs<VectorType>()->getElementType());
+    // Need an IgnoreImpCasts here as by default a boolean will be promoted to
+    // an int, which will not perform the sign extension, so if we know we are
+    // going to cast to a vector we have to strip the implicit cast off.
+    Value *Elt = Visit(const_cast<Expr*>(E->IgnoreImpCasts()));
+    Elt = EmitScalarConversion(Elt, E->IgnoreImpCasts()->getType(),
+                               DestTy->getAs<VectorType>()->getElementType(),
+                               CE->getExprLoc(), 
+                               CGF.getContext().getLangOpts().OpenCL);
 
     // Splat the element across to all elements
     unsigned NumElements = cast<llvm::VectorType>(DstTy)->getNumElements();
@@ -1576,7 +1559,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
   case CK_IntegralToFloating:
   case CK_FloatingToIntegral:
   case CK_FloatingCast:
-    return EmitScalarConversion(Visit(E), E->getType(), DestTy);
+    return EmitScalarConversion(Visit(E), E->getType(), DestTy,
+                                CE->getExprLoc());
   case CK_IntegralToBoolean:
     return EmitIntToBoolConversion(Visit(E));
   case CK_PointerToBoolean:
@@ -1598,7 +1582,8 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
     CodeGenFunction::ComplexPairTy V = CGF.EmitComplexExpr(E);
 
     // TODO: kill this function off, inline appropriate case here
-    return EmitComplexToScalarConversion(V, E->getType(), DestTy);
+    return EmitComplexToScalarConversion(V, E->getType(), DestTy,
+                                         CE->getExprLoc());
   }
 
   case CK_ZeroToOCLEvent: {
@@ -1613,9 +1598,9 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) {
 
 Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) {
   CodeGenFunction::StmtExprEvaluation eval(CGF);
-  llvm::Value *RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(),
-                                                !E->getType()->isVoidType());
-  if (!RetAlloca)
+  Address RetAlloca = CGF.EmitCompoundStmt(*E->getSubStmt(),
+                                           !E->getType()->isVoidType());
+  if (!RetAlloca.isValid())
     return nullptr;
   return CGF.EmitLoadOfScalar(CGF.MakeAddrLValue(RetAlloca, E->getType()),
                               E->getExprLoc());
@@ -1671,16 +1656,14 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
     if (isInc && type->isBooleanType()) {
       llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
       if (isPre) {
-        Builder.Insert(new llvm::StoreInst(True,
-              LV.getAddress(), LV.isVolatileQualified(),
-              LV.getAlignment().getQuantity(),
-              llvm::SequentiallyConsistent));
+        Builder.CreateStore(True, LV.getAddress(), LV.isVolatileQualified())
+          ->setAtomic(llvm::SequentiallyConsistent);
         return Builder.getTrue();
       }
       // For atomic bool increment, we just store true and return it for
       // preincrement, do an atomic swap with true for postincrement
         return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
-            LV.getAddress(), True, llvm::SequentiallyConsistent);
+            LV.getPointer(), True, llvm::SequentiallyConsistent);
     }
     // Special case for atomic increment / decrement on integers, emit
     // atomicrmw instructions.  We skip this if we want to be doing overflow
@@ -1697,7 +1680,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
       llvm::Value *amt = CGF.EmitToMemory(
           llvm::ConstantInt::get(ConvertType(type), 1, true), type);
       llvm::Value *old = Builder.CreateAtomicRMW(aop,
-          LV.getAddress(), amt, llvm::SequentiallyConsistent);
+          LV.getPointer(), amt, llvm::SequentiallyConsistent);
       return isPre ? Builder.CreateBinOp(op, old, amt) : old;
     }
     value = EmitLoadOfLValue(LV, E->getExprLoc());
@@ -1941,10 +1924,10 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
   llvm::Value* Result = llvm::Constant::getNullValue(ResultType);
   QualType CurrentType = E->getTypeSourceInfo()->getType();
   for (unsigned i = 0; i != n; ++i) {
-    OffsetOfExpr::OffsetOfNode ON = E->getComponent(i);
+    OffsetOfNode ON = E->getComponent(i);
     llvm::Value *Offset = nullptr;
     switch (ON.getKind()) {
-    case OffsetOfExpr::OffsetOfNode::Array: {
+    case OffsetOfNode::Array: {
       // Compute the index
       Expr *IdxExpr = E->getIndexExpr(ON.getArrayExprIndex());
       llvm::Value* Idx = CGF.EmitScalarExpr(IdxExpr);
@@ -1964,7 +1947,7 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
       break;
     }
 
-    case OffsetOfExpr::OffsetOfNode::Field: {
+    case OffsetOfNode::Field: {
       FieldDecl *MemberDecl = ON.getField();
       RecordDecl *RD = CurrentType->getAs<RecordType>()->getDecl();
       const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
@@ -1990,10 +1973,10 @@ Value *ScalarExprEmitter::VisitOffsetOfExpr(OffsetOfExpr *E) {
       break;
     }
 
-    case OffsetOfExpr::OffsetOfNode::Identifier:
+    case OffsetOfNode::Identifier:
       llvm_unreachable("dependent __builtin_offsetof");
 
-    case OffsetOfExpr::OffsetOfNode::Base: {
+    case OffsetOfNode::Base: {
       if (ON.getBase()->isVirtual()) {
         CGF.ErrorUnsupported(E, "virtual base in offsetof");
         continue;
@@ -2134,7 +2117,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   OpInfo.RHS = Visit(E->getRHS());
   OpInfo.Ty = E->getComputationResultType();
   OpInfo.Opcode = E->getOpcode();
-  OpInfo.FPContractable = false;
+  OpInfo.FPContractable = E->isFPContractable();
   OpInfo.E = E;
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
@@ -2174,9 +2157,11 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
           llvm_unreachable("Invalid compound assignment type");
       }
       if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
-        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
-              E->getRHS()->getType(), LHSTy), LHSTy);
-        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
+        llvm::Value *amt = CGF.EmitToMemory(
+            EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy,
+                                 E->getExprLoc()),
+            LHSTy);
+        Builder.CreateAtomicRMW(aop, LHSLV.getPointer(), amt,
             llvm::SequentiallyConsistent);
         return LHSLV;
       }
@@ -2196,14 +2181,16 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   else
     OpInfo.LHS = EmitLoadOfLValue(LHSLV, E->getExprLoc());
 
-  OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
-                                    E->getComputationLHSType());
+  SourceLocation Loc = E->getExprLoc();
+  OpInfo.LHS =
+      EmitScalarConversion(OpInfo.LHS, LHSTy, E->getComputationLHSType(), Loc);
 
   // Expand the binary operator.
   Result = (this->*Func)(OpInfo);
 
   // Convert the result back to the LHS type.
-  Result = EmitScalarConversion(Result, E->getComputationResultType(), LHSTy);
+  Result =
+      EmitScalarConversion(Result, E->getComputationResultType(), LHSTy, Loc);
 
   if (atomicPHI) {
     llvm::BasicBlock *opBB = Builder.GetInsertBlock();
@@ -2389,9 +2376,9 @@ Value *ScalarExprEmitter::EmitOverflowCheckedBinOp(const BinOpInfo &Ops) {
 
   // Branch in case of overflow.
   llvm::BasicBlock *initialBB = Builder.GetInsertBlock();
-  llvm::Function::iterator insertPt = initialBB;
+  llvm::Function::iterator insertPt = initialBB->getIterator();
   llvm::BasicBlock *continueBB = CGF.createBasicBlock("nooverflow", CGF.CurFn,
-                                                      std::next(insertPt));
+                                                      &*std::next(insertPt));
   llvm::BasicBlock *overflowBB = CGF.createBasicBlock("overflow", CGF.CurFn);
 
   Builder.CreateCondBr(overflow, overflowBB, continueBB);
@@ -2578,19 +2565,17 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op,
     return nullptr;
 
   // We have a potentially fusable op. Look for a mul on one of the operands.
-  if (llvm::BinaryOperator* LHSBinOp = dyn_cast<llvm::BinaryOperator>(op.LHS)) {
-    if (LHSBinOp->getOpcode() == llvm::Instruction::FMul) {
-      assert(LHSBinOp->getNumUses() == 0 &&
-             "Operations with multiple uses shouldn't be contracted.");
+  // Also, make sure that the mul result isn't used directly. In that case,
+  // there's no point creating a muladd operation.
+  if (auto *LHSBinOp = dyn_cast<llvm::BinaryOperator>(op.LHS)) {
+    if (LHSBinOp->getOpcode() == llvm::Instruction::FMul &&
+        LHSBinOp->use_empty())
       return buildFMulAdd(LHSBinOp, op.RHS, CGF, Builder, false, isSub);
-    }
-  } else if (llvm::BinaryOperator* RHSBinOp =
-               dyn_cast<llvm::BinaryOperator>(op.RHS)) {
-    if (RHSBinOp->getOpcode() == llvm::Instruction::FMul) {
-      assert(RHSBinOp->getNumUses() == 0 &&
-             "Operations with multiple uses shouldn't be contracted.");
+  }
+  if (auto *RHSBinOp = dyn_cast<llvm::BinaryOperator>(op.RHS)) {
+    if (RHSBinOp->getOpcode() == llvm::Instruction::FMul &&
+        RHSBinOp->use_empty())
       return buildFMulAdd(RHSBinOp, op.LHS, CGF, Builder, isSub, false);
-    }
   }
 
   return nullptr;
@@ -2848,8 +2833,10 @@ static llvm::Intrinsic::ID GetIntrinsic(IntrinsicType IT,
   }
 }
 
-Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,unsigned UICmpOpc,
-                                      unsigned SICmpOpc, unsigned FCmpOpc) {
+Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,
+                                      llvm::CmpInst::Predicate UICmpOpc,
+                                      llvm::CmpInst::Predicate SICmpOpc,
+                                      llvm::CmpInst::Predicate FCmpOpc) {
   TestAndClearIgnoreResultAssign();
   Value *Result;
   QualType LHSTy = E->getLHS()->getType();
@@ -2927,19 +2914,17 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,unsigned UICmpOpc,
       Value *CR6Param = Builder.getInt32(CR6);
       llvm::Function *F = CGF.CGM.getIntrinsic(ID);
       Result = Builder.CreateCall(F, {CR6Param, FirstVecArg, SecondVecArg});
-      return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType());
+      return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType(),
+                                  E->getExprLoc());
     }
 
     if (LHS->getType()->isFPOrFPVectorTy()) {
-      Result = Builder.CreateFCmp((llvm::CmpInst::Predicate)FCmpOpc,
-                                  LHS, RHS, "cmp");
+      Result = Builder.CreateFCmp(FCmpOpc, LHS, RHS, "cmp");
     } else if (LHSTy->hasSignedIntegerRepresentation()) {
-      Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)SICmpOpc,
-                                  LHS, RHS, "cmp");
+      Result = Builder.CreateICmp(SICmpOpc, LHS, RHS, "cmp");
     } else {
       // Unsigned integers and pointers.
-      Result = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc,
-                                  LHS, RHS, "cmp");
+      Result = Builder.CreateICmp(UICmpOpc, LHS, RHS, "cmp");
     }
 
     // If this is a vector comparison, sign extend the result to the appropriate
@@ -2974,17 +2959,13 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,unsigned UICmpOpc,
 
     Value *ResultR, *ResultI;
     if (CETy->isRealFloatingType()) {
-      ResultR = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc,
-                                   LHS.first, RHS.first, "cmp.r");
-      ResultI = Builder.CreateFCmp((llvm::FCmpInst::Predicate)FCmpOpc,
-                                   LHS.second, RHS.second, "cmp.i");
+      ResultR = Builder.CreateFCmp(FCmpOpc, LHS.first, RHS.first, "cmp.r");
+      ResultI = Builder.CreateFCmp(FCmpOpc, LHS.second, RHS.second, "cmp.i");
     } else {
       // Complex comparisons can only be equality comparisons.  As such, signed
       // and unsigned opcodes are the same.
-      ResultR = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc,
-                                   LHS.first, RHS.first, "cmp.r");
-      ResultI = Builder.CreateICmp((llvm::ICmpInst::Predicate)UICmpOpc,
-                                   LHS.second, RHS.second, "cmp.i");
+      ResultR = Builder.CreateICmp(UICmpOpc, LHS.first, RHS.first, "cmp.r");
+      ResultI = Builder.CreateICmp(UICmpOpc, LHS.second, RHS.second, "cmp.i");
     }
 
     if (E->getOpcode() == BO_EQ) {
@@ -2996,7 +2977,8 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E,unsigned UICmpOpc,
     }
   }
 
-  return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType());
+  return EmitScalarConversion(Result, CGF.getContext().BoolTy, E->getType(),
+                              E->getExprLoc());
 }
 
 Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) {
@@ -3382,13 +3364,14 @@ Value *ScalarExprEmitter::VisitVAArgExpr(VAArgExpr *VE) {
   if (Ty->isVariablyModifiedType())
     CGF.EmitVariablyModifiedType(Ty);
 
-  llvm::Value *ArgValue = CGF.EmitVAListRef(VE->getSubExpr());
-  llvm::Value *ArgPtr = CGF.EmitVAArg(ArgValue, VE->getType());
+  Address ArgValue = Address::invalid();
+  Address ArgPtr = CGF.EmitVAArg(VE, ArgValue);
+
   llvm::Type *ArgTy = ConvertType(VE->getType());
 
   // If EmitVAArg fails, we fall back to the LLVM instruction.
-  if (!ArgPtr)
-    return Builder.CreateVAArg(ArgValue, ArgTy);
+  if (!ArgPtr.isValid())
+    return Builder.CreateVAArg(ArgValue.getPointer(), ArgTy);
 
   // FIXME Volatility.
   llvm::Value *Val = Builder.CreateLoad(ArgPtr);
@@ -3465,8 +3448,8 @@ Value *ScalarExprEmitter::VisitAtomicExpr(AtomicExpr *E) {
 //                         Entry Point into this File
 //===----------------------------------------------------------------------===//
 
-/// EmitScalarExpr - Emit the computation of the specified expression of scalar
-/// type, ignoring the result.
+/// Emit the computation of the specified expression of scalar type, ignoring
+/// the result.
 Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) {
   assert(E && hasScalarEvaluationKind(E->getType()) &&
          "Invalid scalar expression to emit");
@@ -3475,25 +3458,26 @@ Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) {
       .Visit(const_cast<Expr *>(E));
 }
 
-/// EmitScalarConversion - Emit a conversion from the specified type to the
-/// specified destination type, both of which are LLVM scalar types.
+/// Emit a conversion from the specified type to the specified destination type,
+/// both of which are LLVM scalar types.
 Value *CodeGenFunction::EmitScalarConversion(Value *Src, QualType SrcTy,
-                                             QualType DstTy) {
+                                             QualType DstTy,
+                                             SourceLocation Loc) {
   assert(hasScalarEvaluationKind(SrcTy) && hasScalarEvaluationKind(DstTy) &&
          "Invalid scalar expression to emit");
-  return ScalarExprEmitter(*this).EmitScalarConversion(Src, SrcTy, DstTy);
+  return ScalarExprEmitter(*this).EmitScalarConversion(Src, SrcTy, DstTy, Loc);
 }
 
-/// EmitComplexToScalarConversion - Emit a conversion from the specified complex
-/// type to the specified destination type, where the destination type is an
-/// LLVM scalar type.
+/// Emit a conversion from the specified complex type to the specified
+/// destination type, where the destination type is an LLVM scalar type.
 Value *CodeGenFunction::EmitComplexToScalarConversion(ComplexPairTy Src,
                                                       QualType SrcTy,
-                                                      QualType DstTy) {
+                                                      QualType DstTy,
+                                                      SourceLocation Loc) {
   assert(SrcTy->isAnyComplexType() && hasScalarEvaluationKind(DstTy) &&
          "Invalid complex -> scalar conversion");
-  return ScalarExprEmitter(*this).EmitComplexToScalarConversion(Src, SrcTy,
-                                                                DstTy);
+  return ScalarExprEmitter(*this)
+      .EmitComplexToScalarConversion(Src, SrcTy, DstTy, Loc);
 }
 
 
@@ -3504,30 +3488,20 @@ EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
 }
 
 LValue CodeGenFunction::EmitObjCIsaExpr(const ObjCIsaExpr *E) {
-  llvm::Value *V;
   // object->isa or (*object).isa
   // Generate code as for: *(Class*)object
-  // build Class* type
-  llvm::Type *ClassPtrTy = ConvertType(E->getType());
 
   Expr *BaseExpr = E->getBase();
+  Address Addr = Address::invalid();
   if (BaseExpr->isRValue()) {
-    V = CreateMemTemp(E->getType(), "resval");
-    llvm::Value *Src = EmitScalarExpr(BaseExpr);
-    Builder.CreateStore(Src, V);
-    V = ScalarExprEmitter(*this).EmitLoadOfLValue(
-      MakeNaturalAlignAddrLValue(V, E->getType()), E->getExprLoc());
+    Addr = Address(EmitScalarExpr(BaseExpr), getPointerAlign());
   } else {
-    if (E->isArrow())
-      V = ScalarExprEmitter(*this).EmitLoadOfLValue(BaseExpr);
-    else
-      V = EmitLValue(BaseExpr).getAddress();
+    Addr = EmitLValue(BaseExpr).getAddress();
   }
 
-  // build Class* type
-  ClassPtrTy = ClassPtrTy->getPointerTo();
-  V = Builder.CreateBitCast(V, ClassPtrTy);
-  return MakeNaturalAlignAddrLValue(V, E->getType());
+  // Cast the address to Class*.
+  Addr = Builder.CreateElementBitCast(Addr, ConvertType(E->getType()));
+  return MakeAddrLValue(Addr, E->getType());
 }
 
 
diff --git a/lib/CodeGen/CGLoopInfo.cpp b/lib/CodeGen/CGLoopInfo.cpp
index 1163d63b4a20..0afe7dbb9f1d 100644
--- a/lib/CodeGen/CGLoopInfo.cpp
+++ b/lib/CodeGen/CGLoopInfo.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGLoopInfo.h"
+#include "clang/AST/ASTContext.h"
 #include "clang/AST/Attr.h"
 #include "clang/Sema/LoopHint.h"
 #include "llvm/IR/BasicBlock.h"
@@ -20,9 +21,10 @@ using namespace llvm;
 
 static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
 
-  if (!Attrs.IsParallel && Attrs.VectorizerWidth == 0 &&
-      Attrs.VectorizerUnroll == 0 &&
-      Attrs.VectorizerEnable == LoopAttributes::VecUnspecified)
+  if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 &&
+      Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 &&
+      Attrs.VectorizeEnable == LoopAttributes::Unspecified &&
+      Attrs.UnrollEnable == LoopAttributes::Unspecified)
     return nullptr;
 
   SmallVector<Metadata *, 4> Args;
@@ -30,29 +32,49 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
   auto TempNode = MDNode::getTemporary(Ctx, None);
   Args.push_back(TempNode.get());
 
-  // Setting vectorizer.width
-  if (Attrs.VectorizerWidth > 0) {
+  // Setting vectorize.width
+  if (Attrs.VectorizeWidth > 0) {
     Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.width"),
                         ConstantAsMetadata::get(ConstantInt::get(
-                            Type::getInt32Ty(Ctx), Attrs.VectorizerWidth))};
+                            Type::getInt32Ty(Ctx), Attrs.VectorizeWidth))};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
-  // Setting vectorizer.unroll
-  if (Attrs.VectorizerUnroll > 0) {
+  // Setting interleave.count
+  if (Attrs.InterleaveCount > 0) {
     Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.interleave.count"),
                         ConstantAsMetadata::get(ConstantInt::get(
-                            Type::getInt32Ty(Ctx), Attrs.VectorizerUnroll))};
+                            Type::getInt32Ty(Ctx), Attrs.InterleaveCount))};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
-  // Setting vectorizer.enable
-  if (Attrs.VectorizerEnable != LoopAttributes::VecUnspecified) {
-    Metadata *Vals[] = {
-        MDString::get(Ctx, "llvm.loop.vectorize.enable"),
-        ConstantAsMetadata::get(ConstantInt::get(
-            Type::getInt1Ty(Ctx),
-            (Attrs.VectorizerEnable == LoopAttributes::VecEnable)))};
+  // Setting interleave.count
+  if (Attrs.UnrollCount > 0) {
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.unroll.count"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt32Ty(Ctx), Attrs.UnrollCount))};
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting vectorize.enable
+  if (Attrs.VectorizeEnable != LoopAttributes::Unspecified) {
+    Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
+                        ConstantAsMetadata::get(ConstantInt::get(
+                            Type::getInt1Ty(Ctx), (Attrs.VectorizeEnable ==
+                                                   LoopAttributes::Enable)))};
+    Args.push_back(MDNode::get(Ctx, Vals));
+  }
+
+  // Setting unroll.full or unroll.disable
+  if (Attrs.UnrollEnable != LoopAttributes::Unspecified) {
+    std::string Name;
+    if (Attrs.UnrollEnable == LoopAttributes::Enable)
+      Name = "llvm.loop.unroll.enable";
+    else if (Attrs.UnrollEnable == LoopAttributes::Full)
+      Name = "llvm.loop.unroll.full";
+    else
+      Name = "llvm.loop.unroll.disable";
+    Metadata *Vals[] = {MDString::get(Ctx, Name)};
     Args.push_back(MDNode::get(Ctx, Vals));
   }
 
@@ -63,14 +85,17 @@ static MDNode *createMetadata(LLVMContext &Ctx, const LoopAttributes &Attrs) {
 }
 
 LoopAttributes::LoopAttributes(bool IsParallel)
-    : IsParallel(IsParallel), VectorizerEnable(LoopAttributes::VecUnspecified),
-      VectorizerWidth(0), VectorizerUnroll(0) {}
+    : IsParallel(IsParallel), VectorizeEnable(LoopAttributes::Unspecified),
+      UnrollEnable(LoopAttributes::Unspecified), VectorizeWidth(0),
+      InterleaveCount(0), UnrollCount(0) {}
 
 void LoopAttributes::clear() {
   IsParallel = false;
-  VectorizerWidth = 0;
-  VectorizerUnroll = 0;
-  VectorizerEnable = LoopAttributes::VecUnspecified;
+  VectorizeWidth = 0;
+  InterleaveCount = 0;
+  UnrollCount = 0;
+  VectorizeEnable = LoopAttributes::Unspecified;
+  UnrollEnable = LoopAttributes::Unspecified;
 }
 
 LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
@@ -78,8 +103,16 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs)
   LoopID = createMetadata(Header->getContext(), Attrs);
 }
 
-void LoopInfoStack::push(BasicBlock *Header,
+void LoopInfoStack::push(BasicBlock *Header) {
+  Active.push_back(LoopInfo(Header, StagedAttrs));
+  // Clear the attributes so nested loops do not inherit them.
+  StagedAttrs.clear();
+}
+
+void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
                          ArrayRef<const clang::Attr *> Attrs) {
+
+  // Identify loop hint attributes from Attrs.
   for (const auto *Attr : Attrs) {
     const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
 
@@ -87,28 +120,105 @@ void LoopInfoStack::push(BasicBlock *Header,
     if (!LH)
       continue;
 
+    auto *ValueExpr = LH->getValue();
+    unsigned ValueInt = 1;
+    if (ValueExpr) {
+      llvm::APSInt ValueAPS = ValueExpr->EvaluateKnownConstInt(Ctx);
+      ValueInt = ValueAPS.getSExtValue();
+    }
+
     LoopHintAttr::OptionType Option = LH->getOption();
     LoopHintAttr::LoopHintState State = LH->getState();
-    switch (Option) {
-    case LoopHintAttr::Vectorize:
-    case LoopHintAttr::Interleave:
-      if (State == LoopHintAttr::AssumeSafety) {
-        // Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
-        setParallel(true);
+    switch (State) {
+    case LoopHintAttr::Disable:
+      switch (Option) {
+      case LoopHintAttr::Vectorize:
+        // Disable vectorization by specifying a width of 1.
+        setVectorizeWidth(1);
+        break;
+      case LoopHintAttr::Interleave:
+        // Disable interleaving by speciyfing a count of 1.
+        setInterleaveCount(1);
+        break;
+      case LoopHintAttr::Unroll:
+        setUnrollState(LoopAttributes::Disable);
+        break;
+      case LoopHintAttr::UnrollCount:
+      case LoopHintAttr::VectorizeWidth:
+      case LoopHintAttr::InterleaveCount:
+        llvm_unreachable("Options cannot be disabled.");
+        break;
       }
       break;
-    case LoopHintAttr::VectorizeWidth:
-    case LoopHintAttr::InterleaveCount:
-    case LoopHintAttr::Unroll:
-    case LoopHintAttr::UnrollCount:
-      // Nothing to do here for these loop hints.
+    case LoopHintAttr::Enable:
+      switch (Option) {
+      case LoopHintAttr::Vectorize:
+      case LoopHintAttr::Interleave:
+        setVectorizeEnable(true);
+        break;
+      case LoopHintAttr::Unroll:
+        setUnrollState(LoopAttributes::Enable);
+        break;
+      case LoopHintAttr::UnrollCount:
+      case LoopHintAttr::VectorizeWidth:
+      case LoopHintAttr::InterleaveCount:
+        llvm_unreachable("Options cannot enabled.");
+        break;
+      }
+      break;
+    case LoopHintAttr::AssumeSafety:
+      switch (Option) {
+      case LoopHintAttr::Vectorize:
+      case LoopHintAttr::Interleave:
+        // Apply "llvm.mem.parallel_loop_access" metadata to load/stores.
+        setParallel(true);
+        setVectorizeEnable(true);
+        break;
+      case LoopHintAttr::Unroll:
+      case LoopHintAttr::UnrollCount:
+      case LoopHintAttr::VectorizeWidth:
+      case LoopHintAttr::InterleaveCount:
+        llvm_unreachable("Options cannot be used to assume mem safety.");
+        break;
+      }
+      break;
+    case LoopHintAttr::Full:
+      switch (Option) {
+      case LoopHintAttr::Unroll:
+        setUnrollState(LoopAttributes::Full);
+        break;
+      case LoopHintAttr::Vectorize:
+      case LoopHintAttr::Interleave:
+      case LoopHintAttr::UnrollCount:
+      case LoopHintAttr::VectorizeWidth:
+      case LoopHintAttr::InterleaveCount:
+        llvm_unreachable("Options cannot be used with 'full' hint.");
+        break;
+      }
+      break;
+    case LoopHintAttr::Numeric:
+      switch (Option) {
+      case LoopHintAttr::VectorizeWidth:
+        setVectorizeWidth(ValueInt);
+        break;
+      case LoopHintAttr::InterleaveCount:
+        setInterleaveCount(ValueInt);
+        break;
+      case LoopHintAttr::UnrollCount:
+        setUnrollCount(ValueInt);
+        break;
+      case LoopHintAttr::Unroll:
+      case LoopHintAttr::Vectorize:
+      case LoopHintAttr::Interleave:
+        llvm_unreachable("Options cannot be assigned a value.");
+        break;
+      }
       break;
     }
   }
 
-  Active.push_back(LoopInfo(Header, StagedAttrs));
-  // Clear the attributes so nested loops do not inherit them.
-  StagedAttrs.clear();
+  /// Stage the attributes.
+  push(Header);
 }
 
 void LoopInfoStack::pop() {
diff --git a/lib/CodeGen/CGLoopInfo.h b/lib/CodeGen/CGLoopInfo.h
index 2249937cd0d0..ec3390677fa9 100644
--- a/lib/CodeGen/CGLoopInfo.h
+++ b/lib/CodeGen/CGLoopInfo.h
@@ -29,6 +29,7 @@ class MDNode;
 
 namespace clang {
 class Attr;
+class ASTContext;
 namespace CodeGen {
 
 /// \brief Attributes that may be specified on loops.
@@ -39,17 +40,23 @@ struct LoopAttributes {
   /// \brief Generate llvm.loop.parallel metadata for loads and stores.
   bool IsParallel;
 
-  /// \brief Values of llvm.loop.vectorize.enable metadata.
-  enum LVEnableState { VecUnspecified, VecEnable, VecDisable };
+  /// \brief State of loop vectorization or unrolling.
+  enum LVEnableState { Unspecified, Enable, Disable, Full };
 
-  /// \brief llvm.loop.vectorize.enable
-  LVEnableState VectorizerEnable;
+  /// \brief Value for llvm.loop.vectorize.enable metadata.
+  LVEnableState VectorizeEnable;
 
-  /// \brief llvm.loop.vectorize.width
-  unsigned VectorizerWidth;
+  /// \brief Value for llvm.loop.unroll.* metadata (enable, disable, or full).
+  LVEnableState UnrollEnable;
 
-  /// \brief llvm.loop.interleave.count
-  unsigned VectorizerUnroll;
+  /// \brief Value for llvm.loop.vectorize.width metadata.
+  unsigned VectorizeWidth;
+
+  /// \brief Value for llvm.loop.interleave.count metadata.
+  unsigned InterleaveCount;
+
+  /// \brief llvm.unroll.
+  unsigned UnrollCount;
 };
 
 /// \brief Information used when generating a structured loop.
@@ -88,8 +95,12 @@ public:
 
   /// \brief Begin a new structured loop. The set of staged attributes will be
   /// applied to the loop and then cleared.
-  void push(llvm::BasicBlock *Header,
-            llvm::ArrayRef<const Attr *> Attrs = llvm::None);
+  void push(llvm::BasicBlock *Header);
+
+  /// \brief Begin a new structured loop. Stage attributes from the Attrs list.
+  /// The staged attributes are applied to the loop and then cleared.
+  void push(llvm::BasicBlock *Header, clang::ASTContext &Ctx,
+            llvm::ArrayRef<const Attr *> Attrs);
 
   /// \brief End the current loop.
   void pop();
@@ -109,17 +120,25 @@ public:
   /// \brief Set the next pushed loop as parallel.
   void setParallel(bool Enable = true) { StagedAttrs.IsParallel = Enable; }
 
-  /// \brief Set the next pushed loop 'vectorizer.enable'
-  void setVectorizerEnable(bool Enable = true) {
-    StagedAttrs.VectorizerEnable =
-        Enable ? LoopAttributes::VecEnable : LoopAttributes::VecDisable;
+  /// \brief Set the next pushed loop 'vectorize.enable'
+  void setVectorizeEnable(bool Enable = true) {
+    StagedAttrs.VectorizeEnable =
+        Enable ? LoopAttributes::Enable : LoopAttributes::Disable;
   }
 
-  /// \brief Set the vectorizer width for the next loop pushed.
-  void setVectorizerWidth(unsigned W) { StagedAttrs.VectorizerWidth = W; }
+  /// \brief Set the next pushed loop unroll state.
+  void setUnrollState(const LoopAttributes::LVEnableState &State) {
+    StagedAttrs.UnrollEnable = State;
+  }
 
-  /// \brief Set the vectorizer unroll for the next loop pushed.
-  void setVectorizerUnroll(unsigned U) { StagedAttrs.VectorizerUnroll = U; }
+  /// \brief Set the vectorize width for the next loop pushed.
+  void setVectorizeWidth(unsigned W) { StagedAttrs.VectorizeWidth = W; }
+
+  /// \brief Set the interleave count for the next loop pushed.
+  void setInterleaveCount(unsigned C) { StagedAttrs.InterleaveCount = C; }
+
+  /// \brief Set the unroll count for the next loop pushed.
+  void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; }
 
 private:
   /// \brief Returns true if there is LoopInfo on the stack.
diff --git a/lib/CodeGen/CGObjC.cpp b/lib/CodeGen/CGObjC.cpp
index eb76ad1ce1f7..2d5991b71fca 100644
--- a/lib/CodeGen/CGObjC.cpp
+++ b/lib/CodeGen/CGObjC.cpp
@@ -37,9 +37,8 @@ static RValue AdjustObjCObjectType(CodeGenFunction &CGF,
 
 /// Given the address of a variable of pointer type, find the correct
 /// null to store into it.
-static llvm::Constant *getNullForVariable(llvm::Value *addr) {
-  llvm::Type *type =
-    cast<llvm::PointerType>(addr->getType())->getElementType();
+static llvm::Constant *getNullForVariable(Address addr) {
+  llvm::Type *type = addr.getElementType();
   return llvm::ConstantPointerNull::get(cast<llvm::PointerType>(type));
 }
 
@@ -47,7 +46,7 @@ static llvm::Constant *getNullForVariable(llvm::Value *addr) {
 llvm::Value *CodeGenFunction::EmitObjCStringLiteral(const ObjCStringLiteral *E)
 {
   llvm::Constant *C = 
-      CGM.getObjCRuntime().GenerateConstantString(E->getString());
+      CGM.getObjCRuntime().GenerateConstantString(E->getString()).getPointer();
   // FIXME: This bitcast should just be made an invariant on the Runtime.
   return llvm::ConstantExpr::getBitCast(C, ConvertType(E->getType()));
 }
@@ -84,16 +83,15 @@ CodeGenFunction::EmitObjCBoxedExpr(const ObjCBoxedExpr *E) {
   if (ValueType->isObjCBoxableRecordType()) {
     // Emit CodeGen for first parameter
     // and cast value to correct type
-    llvm::Value *Temporary = CreateMemTemp(SubExpr->getType());
+    Address Temporary = CreateMemTemp(SubExpr->getType());
     EmitAnyExprToMem(SubExpr, Temporary, Qualifiers(), /*isInit*/ true);
-    llvm::Value *BitCast = Builder.CreateBitCast(Temporary,
-                                                 ConvertType(ArgQT));
-    Args.add(RValue::get(BitCast), ArgQT);
+    Address BitCast = Builder.CreateBitCast(Temporary, ConvertType(ArgQT));
+    Args.add(RValue::get(BitCast.getPointer()), ArgQT);
 
     // Create char array to store type encoding
     std::string Str;
     getContext().getObjCEncodingForType(ValueType, Str);
-    llvm::GlobalVariable *GV = CGM.GetAddrOfConstantCString(Str);
+    llvm::Constant *GV = CGM.GetAddrOfConstantCString(Str).getPointer();
     
     // Cast type encoding to correct type
     const ParmVarDecl *EncodingDecl = BoxingMethod->parameters()[1];
@@ -131,8 +129,8 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
                                    ArrayType::Normal, /*IndexTypeQuals=*/0);
 
   // Allocate the temporary array(s).
-  llvm::AllocaInst *Objects = CreateMemTemp(ElementArrayType, "objects");
-  llvm::AllocaInst *Keys = nullptr;
+  Address Objects = CreateMemTemp(ElementArrayType, "objects");
+  Address Keys = Address::invalid();
   if (DLE)
     Keys = CreateMemTemp(ElementArrayType, "keys");
   
@@ -148,9 +146,9 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
     if (ALE) {
       // Emit the element and store it to the appropriate array slot.
       const Expr *Rhs = ALE->getElement(i);
-      LValue LV = LValue::MakeAddr(
-          Builder.CreateStructGEP(Objects->getAllocatedType(), Objects, i),
-          ElementType, Context.getTypeAlignInChars(Rhs->getType()), Context);
+      LValue LV = MakeAddrLValue(
+          Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
+          ElementType, AlignmentSource::Decl);
 
       llvm::Value *value = EmitScalarExpr(Rhs);
       EmitStoreThroughLValue(RValue::get(value), LV, true);
@@ -160,17 +158,17 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
     } else {      
       // Emit the key and store it to the appropriate array slot.
       const Expr *Key = DLE->getKeyValueElement(i).Key;
-      LValue KeyLV = LValue::MakeAddr(
-          Builder.CreateStructGEP(Keys->getAllocatedType(), Keys, i),
-          ElementType, Context.getTypeAlignInChars(Key->getType()), Context);
+      LValue KeyLV = MakeAddrLValue(
+          Builder.CreateConstArrayGEP(Keys, i, getPointerSize()),
+          ElementType, AlignmentSource::Decl);
       llvm::Value *keyValue = EmitScalarExpr(Key);
       EmitStoreThroughLValue(RValue::get(keyValue), KeyLV, /*isInit=*/true);
 
       // Emit the value and store it to the appropriate array slot.
       const Expr *Value = DLE->getKeyValueElement(i).Value;
-      LValue ValueLV = LValue::MakeAddr(
-          Builder.CreateStructGEP(Objects->getAllocatedType(), Objects, i),
-          ElementType, Context.getTypeAlignInChars(Value->getType()), Context);
+      LValue ValueLV = MakeAddrLValue(
+          Builder.CreateConstArrayGEP(Objects, i, getPointerSize()),
+          ElementType, AlignmentSource::Decl);
       llvm::Value *valueValue = EmitScalarExpr(Value);
       EmitStoreThroughLValue(RValue::get(valueValue), ValueLV, /*isInit=*/true);
       if (TrackNeededObjects) {
@@ -185,11 +183,11 @@ llvm::Value *CodeGenFunction::EmitObjCCollectionLiteral(const Expr *E,
   ObjCMethodDecl::param_const_iterator PI = MethodWithObjects->param_begin();
   const ParmVarDecl *argDecl = *PI++;
   QualType ArgQT = argDecl->getType().getUnqualifiedType();
-  Args.add(RValue::get(Objects), ArgQT);
+  Args.add(RValue::get(Objects.getPointer()), ArgQT);
   if (DLE) {
     argDecl = *PI++;
     ArgQT = argDecl->getType().getUnqualifiedType();
-    Args.add(RValue::get(Keys), ArgQT);
+    Args.add(RValue::get(Keys.getPointer()), ArgQT);
   }
   argDecl = *PI;
   ArgQT = argDecl->getType().getUnqualifiedType();
@@ -275,10 +273,23 @@ shouldExtendReceiverForInnerPointerMessage(const ObjCMessageExpr *message) {
   // receiver is loaded from a variable with precise lifetime.
   case ObjCMessageExpr::Instance: {
     const Expr *receiver = message->getInstanceReceiver();
+
+    // Look through OVEs.
+    if (auto opaque = dyn_cast<OpaqueValueExpr>(receiver)) {
+      if (opaque->getSourceExpr())
+        receiver = opaque->getSourceExpr()->IgnoreParens();
+    }
+
     const ImplicitCastExpr *ice = dyn_cast<ImplicitCastExpr>(receiver);
     if (!ice || ice->getCastKind() != CK_LValueToRValue) return true;
     receiver = ice->getSubExpr()->IgnoreParens();
 
+    // Look through OVEs.
+    if (auto opaque = dyn_cast<OpaqueValueExpr>(receiver)) {
+      if (opaque->getSourceExpr())
+        receiver = opaque->getSourceExpr()->IgnoreParens();
+    }
+
     // Only __strong variables.
     if (receiver->getType().getObjCLifetime() != Qualifiers::OCL_Strong)
       return true;
@@ -312,6 +323,21 @@ shouldExtendReceiverForInnerPointerMessage(const ObjCMessageExpr *message) {
   llvm_unreachable("invalid receiver kind");
 }
 
+/// Given an expression of ObjC pointer type, check whether it was
+/// immediately loaded from an ARC __weak l-value.
+static const Expr *findWeakLValue(const Expr *E) {
+  assert(E->getType()->isObjCRetainableType());
+  E = E->IgnoreParens();
+  if (auto CE = dyn_cast<CastExpr>(E)) {
+    if (CE->getCastKind() == CK_LValueToRValue) {
+      if (CE->getSubExpr()->getType().getObjCLifetime() == Qualifiers::OCL_Weak)
+        return CE->getSubExpr();
+    }
+  }
+
+  return nullptr;
+}
+
 RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
                                             ReturnValueSlot Return) {
   // Only the lookup mechanism and first two arguments of the method
@@ -322,6 +348,17 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
 
   const ObjCMethodDecl *method = E->getMethodDecl();
 
+  // If the method is -retain, and the receiver's being loaded from
+  // a __weak variable, peephole the entire operation to objc_loadWeakRetained.
+  if (method && E->getReceiverKind() == ObjCMessageExpr::Instance &&
+      method->getMethodFamily() == OMF_retain) {
+    if (auto lvalueExpr = findWeakLValue(E->getInstanceReceiver())) {
+      LValue lvalue = EmitLValue(lvalueExpr);
+      llvm::Value *result = EmitARCLoadWeakRetained(lvalue.getAddress());
+      return AdjustObjCObjectType(*this, E->getType(), RValue::get(result));
+    }
+  }
+
   // We don't retain the receiver in delegate init calls, and this is
   // safe because the receiver value is always loaded from 'self',
   // which we zero out.  We don't want to Block_copy block receivers,
@@ -390,7 +427,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
   QualType ResultType = method ? method->getReturnType() : E->getType();
 
   CallArgList Args;
-  EmitCallArgs(Args, method, E->arg_begin(), E->arg_end());
+  EmitCallArgs(Args, method, E->arguments());
 
   // For delegate init calls in ARC, do an unsafe store of null into
   // self.  This represents the call taking direct ownership of that
@@ -404,10 +441,8 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
            "delegate init calls should only be marked in ARC");
 
     // Do an unsafe store of null into self.
-    llvm::Value *selfAddr =
-      LocalDeclMap[cast<ObjCMethodDecl>(CurCodeDecl)->getSelfDecl()];
-    assert(selfAddr && "no self entry for a delegate init call?");
-
+    Address selfAddr =
+      GetAddrOfLocalVar(cast<ObjCMethodDecl>(CurCodeDecl)->getSelfDecl());
     Builder.CreateStore(getNullForVariable(selfAddr), selfAddr);
   }
 
@@ -434,14 +469,13 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
   // For delegate init calls in ARC, implicitly store the result of
   // the call back into self.  This takes ownership of the value.
   if (isDelegateInit) {
-    llvm::Value *selfAddr =
-      LocalDeclMap[cast<ObjCMethodDecl>(CurCodeDecl)->getSelfDecl()];
+    Address selfAddr =
+      GetAddrOfLocalVar(cast<ObjCMethodDecl>(CurCodeDecl)->getSelfDecl());
     llvm::Value *newSelf = result.getScalarVal();
 
     // The delegate return type isn't necessarily a matching type; in
     // fact, it's quite likely to be 'id'.
-    llvm::Type *selfTy =
-      cast<llvm::PointerType>(selfAddr->getType())->getElementType();
+    llvm::Type *selfTy = selfAddr.getElementType();
     newSelf = Builder.CreateBitCast(newSelf, selfTy);
 
     Builder.CreateStore(newSelf, selfAddr);
@@ -451,7 +485,7 @@ RValue CodeGenFunction::EmitObjCMessageExpr(const ObjCMessageExpr *E,
 }
 
 namespace {
-struct FinishARCDealloc : EHScopeStack::Cleanup {
+struct FinishARCDealloc final : EHScopeStack::Cleanup {
   void Emit(CodeGenFunction &CGF, Flags flags) override {
     const ObjCMethodDecl *method = cast<ObjCMethodDecl>(CGF.CurCodeDecl);
 
@@ -523,7 +557,7 @@ static llvm::Value *emitARCRetainLoadOfScalar(CodeGenFunction &CGF,
 /// its pointer, name, and types registered in the class struture.
 void CodeGenFunction::GenerateObjCMethod(const ObjCMethodDecl *OMD) {
   StartObjCMethod(OMD, OMD->getClassInterface());
-  PGO.assignRegionCounters(OMD, CurFn);
+  PGO.assignRegionCounters(GlobalDecl(OMD), CurFn);
   assert(isa<CompoundStmt>(OMD->getBody()));
   incrementProfileCounter(OMD->getBody());
   EmitCompoundStmtWithoutScope(*cast<CompoundStmt>(OMD->getBody()));
@@ -536,19 +570,19 @@ static void emitStructGetterCall(CodeGenFunction &CGF, ObjCIvarDecl *ivar,
                                  bool isAtomic, bool hasStrong) {
   ASTContext &Context = CGF.getContext();
 
-  llvm::Value *src =
-    CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(),
-                          ivar, 0).getAddress();
+  Address src =
+    CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), CGF.LoadObjCSelf(), ivar, 0)
+       .getAddress();
 
   // objc_copyStruct (ReturnValue, &structIvar, 
   //                  sizeof (Type of Ivar), isAtomic, false);
   CallArgList args;
 
-  llvm::Value *dest = CGF.Builder.CreateBitCast(CGF.ReturnValue, CGF.VoidPtrTy);
-  args.add(RValue::get(dest), Context.VoidPtrTy);
+  Address dest = CGF.Builder.CreateBitCast(CGF.ReturnValue, CGF.VoidPtrTy);
+  args.add(RValue::get(dest.getPointer()), Context.VoidPtrTy);
 
   src = CGF.Builder.CreateBitCast(src, CGF.VoidPtrTy);
-  args.add(RValue::get(src), Context.VoidPtrTy);
+  args.add(RValue::get(src.getPointer()), Context.VoidPtrTy);
 
   CharUnits size = CGF.getContext().getTypeSizeInChars(ivar->getType());
   args.add(RValue::get(CGF.CGM.getSize(size)), Context.getSizeType());
@@ -812,8 +846,8 @@ static void emitCPPObjectAtomicGetterCall(CodeGenFunction &CGF,
   
   // The 2nd argument is the address of the ivar.
   llvm::Value *ivarAddr = 
-  CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), 
-                        CGF.LoadObjCSelf(), ivar, 0).getAddress();
+    CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), 
+                          CGF.LoadObjCSelf(), ivar, 0).getPointer();
   ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy);
   args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy);
   
@@ -843,7 +877,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     }
     else {
       ObjCIvarDecl *ivar = propImpl->getPropertyIvarDecl();
-      emitCPPObjectAtomicGetterCall(*this, ReturnValue, 
+      emitCPPObjectAtomicGetterCall(*this, ReturnValue.getPointer(), 
                                     ivar, AtomicHelperFn);
     }
     return;
@@ -873,10 +907,9 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay
 
     // Perform an atomic load.  This does not impose ordering constraints.
-    llvm::Value *ivarAddr = LV.getAddress();
+    Address ivarAddr = LV.getAddress();
     ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType);
     llvm::LoadInst *load = Builder.CreateLoad(ivarAddr, "load");
-    load->setAlignment(strategy.getIvarAlignment().getQuantity());
     load->setAtomic(llvm::Unordered);
 
     // Store that value into the return address.  Doing this with a
@@ -901,7 +934,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     // FIXME: Can't this be simpler? This might even be worse than the
     // corresponding gcc code.
     llvm::Value *cmd =
-      Builder.CreateLoad(LocalDeclMap[getterMethod->getCmdDecl()], "cmd");
+      Builder.CreateLoad(GetAddrOfLocalVar(getterMethod->getCmdDecl()), "cmd");
     llvm::Value *self = Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy);
     llvm::Value *ivarOffset =
       EmitIvarOffset(classImpl->getClassInterface(), ivar);
@@ -916,11 +949,11 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     // FIXME: We shouldn't need to get the function info here, the
     // runtime already should have computed it to build the function.
     llvm::Instruction *CallInstruction;
-    RValue RV = EmitCall(getTypes().arrangeFreeFunctionCall(propType, args,
-                                                       FunctionType::ExtInfo(),
-                                                            RequiredArgs::All),
-                         getPropertyFn, ReturnValueSlot(), args, nullptr,
-                         &CallInstruction);
+    RValue RV = EmitCall(
+        getTypes().arrangeFreeFunctionCall(
+            propType, args, FunctionType::ExtInfo(), RequiredArgs::All),
+        getPropertyFn, ReturnValueSlot(), args, CGCalleeInfo(),
+        &CallInstruction);
     if (llvm::CallInst *call = dyn_cast<llvm::CallInst>(CallInstruction))
       call->setTailCall();
 
@@ -952,8 +985,7 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     switch (getEvaluationKind(ivarType)) {
     case TEK_Complex: {
       ComplexPairTy pair = EmitLoadOfComplex(LV, SourceLocation());
-      EmitStoreOfComplex(pair,
-                         MakeNaturalAlignAddrLValue(ReturnValue, ivarType),
+      EmitStoreOfComplex(pair, MakeAddrLValue(ReturnValue, ivarType),
                          /*init*/ true);
       return;
     }
@@ -966,11 +998,15 @@ CodeGenFunction::generateObjCGetterBody(const ObjCImplementationDecl *classImpl,
     case TEK_Scalar: {
       llvm::Value *value;
       if (propType->isReferenceType()) {
-        value = LV.getAddress();
+        value = LV.getAddress().getPointer();
       } else {
         // We want to load and autoreleaseReturnValue ARC __weak ivars.
         if (LV.getQuals().getObjCLifetime() == Qualifiers::OCL_Weak) {
-          value = emitARCRetainLoadOfScalar(*this, LV, ivarType);
+          if (getLangOpts().ObjCAutoRefCount) {
+            value = emitARCRetainLoadOfScalar(*this, LV, ivarType);
+          } else {
+            value = EmitARCLoadWeak(LV.getAddress());
+          }
 
         // Otherwise we want to do a simple load, suppressing the
         // final autorelease.
@@ -1006,7 +1042,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
   // The first argument is the address of the ivar.
   llvm::Value *ivarAddr = CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(),
                                                 CGF.LoadObjCSelf(), ivar, 0)
-    .getAddress();
+    .getPointer();
   ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy);
   args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy);
 
@@ -1014,7 +1050,7 @@ static void emitStructSetterCall(CodeGenFunction &CGF, ObjCMethodDecl *OMD,
   ParmVarDecl *argVar = *OMD->param_begin();
   DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(), 
                      VK_LValue, SourceLocation());
-  llvm::Value *argAddr = CGF.EmitLValue(&argRef).getAddress();
+  llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
   argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
   args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
 
@@ -1052,7 +1088,7 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
   // The first argument is the address of the ivar.
   llvm::Value *ivarAddr = 
     CGF.EmitLValueForIvar(CGF.TypeOfSelfObject(), 
-                          CGF.LoadObjCSelf(), ivar, 0).getAddress();
+                          CGF.LoadObjCSelf(), ivar, 0).getPointer();
   ivarAddr = CGF.Builder.CreateBitCast(ivarAddr, CGF.Int8PtrTy);
   args.add(RValue::get(ivarAddr), CGF.getContext().VoidPtrTy);
   
@@ -1060,7 +1096,7 @@ static void emitCPPObjectAtomicSetterCall(CodeGenFunction &CGF,
   ParmVarDecl *argVar = *OMD->param_begin();
   DeclRefExpr argRef(argVar, false, argVar->getType().getNonReferenceType(), 
                      VK_LValue, SourceLocation());
-  llvm::Value *argAddr = CGF.EmitLValue(&argRef).getAddress();
+  llvm::Value *argAddr = CGF.EmitLValue(&argRef).getPointer();
   argAddr = CGF.Builder.CreateBitCast(argAddr, CGF.Int8PtrTy);
   args.add(RValue::get(argAddr), CGF.getContext().VoidPtrTy);
   
@@ -1135,29 +1171,27 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
     if (strategy.getIvarSize().isZero())
       return;
 
-    llvm::Value *argAddr = LocalDeclMap[*setterMethod->param_begin()];
+    Address argAddr = GetAddrOfLocalVar(*setterMethod->param_begin());
 
     LValue ivarLValue =
       EmitLValueForIvar(TypeOfSelfObject(), LoadObjCSelf(), ivar, /*quals*/ 0);
-    llvm::Value *ivarAddr = ivarLValue.getAddress();
+    Address ivarAddr = ivarLValue.getAddress();
 
     // Currently, all atomic accesses have to be through integer
     // types, so there's no point in trying to pick a prettier type.
     llvm::Type *bitcastType =
       llvm::Type::getIntNTy(getLLVMContext(),
                             getContext().toBits(strategy.getIvarSize()));
-    bitcastType = bitcastType->getPointerTo(); // addrspace 0 okay
 
     // Cast both arguments to the chosen operation type.
-    argAddr = Builder.CreateBitCast(argAddr, bitcastType);
-    ivarAddr = Builder.CreateBitCast(ivarAddr, bitcastType);
+    argAddr = Builder.CreateElementBitCast(argAddr, bitcastType);
+    ivarAddr = Builder.CreateElementBitCast(ivarAddr, bitcastType);
 
     // This bitcast load is likely to cause some nasty IR.
     llvm::Value *load = Builder.CreateLoad(argAddr);
 
     // Perform an atomic store.  There are no memory ordering requirements.
     llvm::StoreInst *store = Builder.CreateStore(load, ivarAddr);
-    store->setAlignment(strategy.getIvarAlignment().getQuantity());
     store->setAtomic(llvm::Unordered);
     return;
   }
@@ -1189,13 +1223,14 @@ CodeGenFunction::generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
     // Emit objc_setProperty((id) self, _cmd, offset, arg,
     //                       <is-atomic>, <is-copy>).
     llvm::Value *cmd =
-      Builder.CreateLoad(LocalDeclMap[setterMethod->getCmdDecl()]);
+      Builder.CreateLoad(GetAddrOfLocalVar(setterMethod->getCmdDecl()));
     llvm::Value *self =
       Builder.CreateBitCast(LoadObjCSelf(), VoidPtrTy);
     llvm::Value *ivarOffset =
       EmitIvarOffset(classImpl->getClassInterface(), ivar);
-    llvm::Value *arg = LocalDeclMap[*setterMethod->param_begin()];
-    arg = Builder.CreateBitCast(Builder.CreateLoad(arg, "arg"), VoidPtrTy);
+    Address argAddr = GetAddrOfLocalVar(*setterMethod->param_begin());
+    llvm::Value *arg = Builder.CreateLoad(argAddr, "arg");
+    arg = Builder.CreateBitCast(arg, VoidPtrTy);
 
     CallArgList args;
     args.add(RValue::get(self), getContext().getObjCIdType());
@@ -1304,7 +1339,7 @@ void CodeGenFunction::GenerateObjCSetter(ObjCImplementationDecl *IMP,
 }
 
 namespace {
-  struct DestroyIvar : EHScopeStack::Cleanup {
+  struct DestroyIvar final : EHScopeStack::Cleanup {
   private:
     llvm::Value *addr;
     const ObjCIvarDecl *ivar;
@@ -1328,7 +1363,7 @@ namespace {
 
 /// Like CodeGenFunction::destroyARCStrong, but do it with a call.
 static void destroyARCStrongWithStore(CodeGenFunction &CGF,
-                                      llvm::Value *addr,
+                                      Address addr,
                                       QualType type) {
   llvm::Value *null = getNullForVariable(addr);
   CGF.EmitARCStoreStrongCall(addr, null, /*ignored*/ true);
@@ -1405,22 +1440,6 @@ void CodeGenFunction::GenerateObjCCtorDtorMethod(ObjCImplementationDecl *IMP,
   FinishFunction();
 }
 
-bool CodeGenFunction::IndirectObjCSetterArg(const CGFunctionInfo &FI) {
-  CGFunctionInfo::const_arg_iterator it = FI.arg_begin();
-  it++; it++;
-  const ABIArgInfo &AI = it->info;
-  // FIXME. Is this sufficient check?
-  return (AI.getKind() == ABIArgInfo::Indirect);
-}
-
-bool CodeGenFunction::IvarTypeWithAggrGCObjects(QualType Ty) {
-  if (CGM.getLangOpts().getGC() == LangOptions::NonGC)
-    return false;
-  if (const RecordType *FDTTy = Ty.getTypePtr()->getAs<RecordType>())
-    return FDTTy->getDecl()->hasObjectMember();
-  return false;
-}
-
 llvm::Value *CodeGenFunction::LoadObjCSelf() {
   VarDecl *Self = cast<ObjCMethodDecl>(CurFuncDecl)->getSelfDecl();
   DeclRefExpr DRE(Self, /*is enclosing local*/ (CurFuncDecl != CurCodeDecl),
@@ -1458,7 +1477,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
 
   // Fast enumeration state.
   QualType StateTy = CGM.getObjCFastEnumerationStateType();
-  llvm::AllocaInst *StatePtr = CreateMemTemp(StateTy, "state.ptr");
+  Address StatePtr = CreateMemTemp(StateTy, "state.ptr");
   EmitNullInitialization(StatePtr, StateTy);
 
   // Number of elements in the items array.
@@ -1477,7 +1496,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
     getContext().getConstantArrayType(getContext().getObjCIdType(),
                                       llvm::APInt(32, NumItems),
                                       ArrayType::Normal, 0);
-  llvm::Value *ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
+  Address ItemsPtr = CreateMemTemp(ItemsTy, "items.ptr");
 
   // Emit the collection pointer.  In ARC, we do a retain.
   llvm::Value *Collection;
@@ -1498,14 +1517,16 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   CallArgList Args;
 
   // The first argument is a temporary of the enumeration-state type.
-  Args.add(RValue::get(StatePtr), getContext().getPointerType(StateTy));
+  Args.add(RValue::get(StatePtr.getPointer()),
+           getContext().getPointerType(StateTy));
 
   // The second argument is a temporary array with space for NumItems
   // pointers.  We'll actually be loading elements from the array
   // pointer written into the control state; this buffer is so that
   // collections that *aren't* backed by arrays can still queue up
   // batches of elements.
-  Args.add(RValue::get(ItemsPtr), getContext().getPointerType(ItemsTy));
+  Args.add(RValue::get(ItemsPtr.getPointer()),
+           getContext().getPointerType(ItemsTy));
 
   // The third argument is the capacity of that temporary array.
   llvm::Type *UnsignedLongLTy = ConvertType(getContext().UnsignedLongTy);
@@ -1542,13 +1563,14 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   // Save the initial mutations value.  This is the value at an
   // address that was written into the state object by
   // countByEnumeratingWithState:objects:count:.
-  llvm::Value *StateMutationsPtrPtr = Builder.CreateStructGEP(
-      StatePtr->getAllocatedType(), StatePtr, 2, "mutationsptr.ptr");
-  llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr,
-                                                      "mutationsptr");
+  Address StateMutationsPtrPtr = Builder.CreateStructGEP(
+      StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr");
+  llvm::Value *StateMutationsPtr
+    = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr");
 
   llvm::Value *initialMutations =
-    Builder.CreateLoad(StateMutationsPtr, "forcoll.initial-mutations");
+    Builder.CreateAlignedLoad(StateMutationsPtr, getPointerAlign(),
+                              "forcoll.initial-mutations");
 
   // Start looping.  This is the point we return to whenever we have a
   // fresh, non-empty batch of objects.
@@ -1570,7 +1592,8 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   // refreshes.
   StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr");
   llvm::Value *currentMutations
-    = Builder.CreateLoad(StateMutationsPtr, "statemutations");
+    = Builder.CreateAlignedLoad(StateMutationsPtr, getPointerAlign(),
+                                "statemutations");
 
   llvm::BasicBlock *WasMutatedBB = createBasicBlock("forcoll.mutated");
   llvm::BasicBlock *WasNotMutatedBB = createBasicBlock("forcoll.notmutated");
@@ -1623,15 +1646,16 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   // Fetch the buffer out of the enumeration state.
   // TODO: this pointer should actually be invariant between
   // refreshes, which would help us do certain loop optimizations.
-  llvm::Value *StateItemsPtr = Builder.CreateStructGEP(
-      StatePtr->getAllocatedType(), StatePtr, 1, "stateitems.ptr");
+  Address StateItemsPtr = Builder.CreateStructGEP(
+      StatePtr, 1, getPointerSize(), "stateitems.ptr");
   llvm::Value *EnumStateItems =
     Builder.CreateLoad(StateItemsPtr, "stateitems");
 
   // Fetch the value at the current index from the buffer.
   llvm::Value *CurrentItemPtr =
     Builder.CreateGEP(EnumStateItems, index, "currentitem.ptr");
-  llvm::Value *CurrentItem = Builder.CreateLoad(CurrentItemPtr);
+  llvm::Value *CurrentItem =
+    Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
 
   // Cast that value to the right type.
   CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
@@ -1735,15 +1759,8 @@ void CodeGenFunction::EmitObjCAtSynchronizedStmt(
   CGM.getObjCRuntime().EmitSynchronizedStmt(*this, S);
 }
 
-/// Produce the code for a CK_ARCProduceObject.  Just does a
-/// primitive retain.
-llvm::Value *CodeGenFunction::EmitObjCProduceObject(QualType type,
-                                                    llvm::Value *value) {
-  return EmitARCRetain(type, value);
-}
-
 namespace {
-  struct CallObjCRelease : EHScopeStack::Cleanup {
+  struct CallObjCRelease final : EHScopeStack::Cleanup {
     CallObjCRelease(llvm::Value *object) : object(object) {}
     llvm::Value *object;
 
@@ -1772,7 +1789,7 @@ llvm::Value *CodeGenFunction::EmitObjCExtendObjectLifetime(QualType type,
 /// Given a number of pointers, inform the optimizer that they're
 /// being intrinsically used up until this point in the program.
 void CodeGenFunction::EmitARCIntrinsicUse(ArrayRef<llvm::Value*> values) {
-  llvm::Constant *&fn = CGM.getARCEntrypoints().clang_arc_use;
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().clang_arc_use;
   if (!fn) {
     llvm::FunctionType *fnType =
       llvm::FunctionType::get(CGM.VoidTy, None, true);
@@ -1838,7 +1855,7 @@ static llvm::Value *emitARCValueOperation(CodeGenFunction &CGF,
 /// Perform an operation having the following signature:
 ///   i8* (i8**)
 static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
-                                         llvm::Value *addr,
+                                         Address addr,
                                          llvm::Constant *&fn,
                                          StringRef fnName) {
   if (!fn) {
@@ -1848,16 +1865,15 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
   }
 
   // Cast the argument to 'id*'.
-  llvm::Type *origType = addr->getType();
+  llvm::Type *origType = addr.getElementType();
   addr = CGF.Builder.CreateBitCast(addr, CGF.Int8PtrPtrTy);
 
   // Call the function.
-  llvm::Value *result = CGF.EmitNounwindRuntimeCall(fn, addr);
+  llvm::Value *result = CGF.EmitNounwindRuntimeCall(fn, addr.getPointer());
 
   // Cast the result back to a dereference of the original type.
-  if (origType != CGF.Int8PtrPtrTy)
-    result = CGF.Builder.CreateBitCast(result,
-                        cast<llvm::PointerType>(origType)->getElementType());
+  if (origType != CGF.Int8PtrTy)
+    result = CGF.Builder.CreateBitCast(result, origType);
 
   return result;
 }
@@ -1865,13 +1881,12 @@ static llvm::Value *emitARCLoadOperation(CodeGenFunction &CGF,
 /// Perform an operation having the following signature:
 ///   i8* (i8**, i8*)
 static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
-                                          llvm::Value *addr,
+                                          Address addr,
                                           llvm::Value *value,
                                           llvm::Constant *&fn,
                                           StringRef fnName,
                                           bool ignored) {
-  assert(cast<llvm::PointerType>(addr->getType())->getElementType()
-           == value->getType());
+  assert(addr.getElementType() == value->getType());
 
   if (!fn) {
     llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrTy };
@@ -1884,7 +1899,7 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
   llvm::Type *origType = value->getType();
 
   llvm::Value *args[] = {
-    CGF.Builder.CreateBitCast(addr, CGF.Int8PtrPtrTy),
+    CGF.Builder.CreateBitCast(addr.getPointer(), CGF.Int8PtrPtrTy),
     CGF.Builder.CreateBitCast(value, CGF.Int8PtrTy)
   };
   llvm::CallInst *result = CGF.EmitNounwindRuntimeCall(fn, args);
@@ -1897,11 +1912,11 @@ static llvm::Value *emitARCStoreOperation(CodeGenFunction &CGF,
 /// Perform an operation having the following signature:
 ///   void (i8**, i8**)
 static void emitARCCopyOperation(CodeGenFunction &CGF,
-                                 llvm::Value *dst,
-                                 llvm::Value *src,
+                                 Address dst,
+                                 Address src,
                                  llvm::Constant *&fn,
                                  StringRef fnName) {
-  assert(dst->getType() == src->getType());
+  assert(dst.getType() == src.getType());
 
   if (!fn) {
     llvm::Type *argTypes[] = { CGF.Int8PtrPtrTy, CGF.Int8PtrPtrTy };
@@ -1912,8 +1927,8 @@ static void emitARCCopyOperation(CodeGenFunction &CGF,
   }
 
   llvm::Value *args[] = {
-    CGF.Builder.CreateBitCast(dst, CGF.Int8PtrPtrTy),
-    CGF.Builder.CreateBitCast(src, CGF.Int8PtrPtrTy)
+    CGF.Builder.CreateBitCast(dst.getPointer(), CGF.Int8PtrPtrTy),
+    CGF.Builder.CreateBitCast(src.getPointer(), CGF.Int8PtrPtrTy)
   };
   CGF.EmitNounwindRuntimeCall(fn, args);
 }
@@ -1932,7 +1947,7 @@ llvm::Value *CodeGenFunction::EmitARCRetain(QualType type, llvm::Value *value) {
 ///   call i8* \@objc_retain(i8* %value)
 llvm::Value *CodeGenFunction::EmitARCRetainNonBlock(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
-                               CGM.getARCEntrypoints().objc_retain,
+                               CGM.getObjCEntrypoints().objc_retain,
                                "objc_retain");
 }
 
@@ -1946,7 +1961,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
                                                  bool mandatory) {
   llvm::Value *result
     = emitARCValueOperation(*this, value,
-                            CGM.getARCEntrypoints().objc_retainBlock,
+                            CGM.getObjCEntrypoints().objc_retainBlock,
                             "objc_retainBlock");
 
   // If the copy isn't mandatory, add !clang.arc.copy_on_escape to
@@ -1956,7 +1971,7 @@ llvm::Value *CodeGenFunction::EmitARCRetainBlock(llvm::Value *value,
   if (!mandatory && isa<llvm::Instruction>(result)) {
     llvm::CallInst *call
       = cast<llvm::CallInst>(result->stripPointerCasts());
-    assert(call->getCalledValue() == CGM.getARCEntrypoints().objc_retainBlock);
+    assert(call->getCalledValue() == CGM.getObjCEntrypoints().objc_retainBlock);
 
     call->setMetadata("clang.arc.copy_on_escape",
                       llvm::MDNode::get(Builder.getContext(), None));
@@ -1975,7 +1990,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
   // Fetch the void(void) inline asm which marks that we're going to
   // retain the autoreleased return value.
   llvm::InlineAsm *&marker
-    = CGM.getARCEntrypoints().retainAutoreleasedReturnValueMarker;
+    = CGM.getObjCEntrypoints().retainAutoreleasedReturnValueMarker;
   if (!marker) {
     StringRef assembly
       = CGM.getTargetCodeGenInfo()
@@ -2012,7 +2027,7 @@ CodeGenFunction::EmitARCRetainAutoreleasedReturnValue(llvm::Value *value) {
     Builder.CreateCall(marker);
 
   return emitARCValueOperation(*this, value,
-                     CGM.getARCEntrypoints().objc_retainAutoreleasedReturnValue,
+                     CGM.getObjCEntrypoints().objc_retainAutoreleasedReturnValue,
                                "objc_retainAutoreleasedReturnValue");
 }
 
@@ -2022,7 +2037,7 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
                                      ARCPreciseLifetime_t precise) {
   if (isa<llvm::ConstantPointerNull>(value)) return;
 
-  llvm::Constant *&fn = CGM.getARCEntrypoints().objc_release;
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_release;
   if (!fn) {
     llvm::FunctionType *fnType =
       llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
@@ -2050,12 +2065,10 @@ void CodeGenFunction::EmitARCRelease(llvm::Value *value,
 /// At -O1 and above, just load and call objc_release.
 ///
 ///   call void \@objc_storeStrong(i8** %addr, i8* null)
-void CodeGenFunction::EmitARCDestroyStrong(llvm::Value *addr,
+void CodeGenFunction::EmitARCDestroyStrong(Address addr,
                                            ARCPreciseLifetime_t precise) {
   if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
-    llvm::PointerType *addrTy = cast<llvm::PointerType>(addr->getType());
-    llvm::Value *null = llvm::ConstantPointerNull::get(
-                          cast<llvm::PointerType>(addrTy->getElementType()));
+    llvm::Value *null = getNullForVariable(addr);
     EmitARCStoreStrongCall(addr, null, /*ignored*/ true);
     return;
   }
@@ -2066,13 +2079,12 @@ void CodeGenFunction::EmitARCDestroyStrong(llvm::Value *addr,
 
 /// Store into a strong object.  Always calls this:
 ///   call void \@objc_storeStrong(i8** %addr, i8* %value)
-llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr,
+llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(Address addr,
                                                      llvm::Value *value,
                                                      bool ignored) {
-  assert(cast<llvm::PointerType>(addr->getType())->getElementType()
-           == value->getType());
+  assert(addr.getElementType() == value->getType());
 
-  llvm::Constant *&fn = CGM.getARCEntrypoints().objc_storeStrong;
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_storeStrong;
   if (!fn) {
     llvm::Type *argTypes[] = { Int8PtrPtrTy, Int8PtrTy };
     llvm::FunctionType *fnType
@@ -2081,7 +2093,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrongCall(llvm::Value *addr,
   }
 
   llvm::Value *args[] = {
-    Builder.CreateBitCast(addr, Int8PtrPtrTy),
+    Builder.CreateBitCast(addr.getPointer(), Int8PtrPtrTy),
     Builder.CreateBitCast(value, Int8PtrTy)
   };
   EmitNounwindRuntimeCall(fn, args);
@@ -2130,7 +2142,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreStrong(LValue dst,
 ///   call i8* \@objc_autorelease(i8* %value)
 llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
-                               CGM.getARCEntrypoints().objc_autorelease,
+                               CGM.getObjCEntrypoints().objc_autorelease,
                                "objc_autorelease");
 }
 
@@ -2139,7 +2151,7 @@ llvm::Value *CodeGenFunction::EmitARCAutorelease(llvm::Value *value) {
 llvm::Value *
 CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
-                            CGM.getARCEntrypoints().objc_autoreleaseReturnValue,
+                            CGM.getObjCEntrypoints().objc_autoreleaseReturnValue,
                                "objc_autoreleaseReturnValue",
                                /*isTailCall*/ true);
 }
@@ -2149,7 +2161,7 @@ CodeGenFunction::EmitARCAutoreleaseReturnValue(llvm::Value *value) {
 llvm::Value *
 CodeGenFunction::EmitARCRetainAutoreleaseReturnValue(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
-                     CGM.getARCEntrypoints().objc_retainAutoreleaseReturnValue,
+                     CGM.getObjCEntrypoints().objc_retainAutoreleaseReturnValue,
                                "objc_retainAutoreleaseReturnValue",
                                /*isTailCall*/ true);
 }
@@ -2178,32 +2190,32 @@ llvm::Value *CodeGenFunction::EmitARCRetainAutorelease(QualType type,
 llvm::Value *
 CodeGenFunction::EmitARCRetainAutoreleaseNonBlock(llvm::Value *value) {
   return emitARCValueOperation(*this, value,
-                               CGM.getARCEntrypoints().objc_retainAutorelease,
+                               CGM.getObjCEntrypoints().objc_retainAutorelease,
                                "objc_retainAutorelease");
 }
 
 /// i8* \@objc_loadWeak(i8** %addr)
 /// Essentially objc_autorelease(objc_loadWeakRetained(addr)).
-llvm::Value *CodeGenFunction::EmitARCLoadWeak(llvm::Value *addr) {
+llvm::Value *CodeGenFunction::EmitARCLoadWeak(Address addr) {
   return emitARCLoadOperation(*this, addr,
-                              CGM.getARCEntrypoints().objc_loadWeak,
+                              CGM.getObjCEntrypoints().objc_loadWeak,
                               "objc_loadWeak");
 }
 
 /// i8* \@objc_loadWeakRetained(i8** %addr)
-llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(llvm::Value *addr) {
+llvm::Value *CodeGenFunction::EmitARCLoadWeakRetained(Address addr) {
   return emitARCLoadOperation(*this, addr,
-                              CGM.getARCEntrypoints().objc_loadWeakRetained,
+                              CGM.getObjCEntrypoints().objc_loadWeakRetained,
                               "objc_loadWeakRetained");
 }
 
 /// i8* \@objc_storeWeak(i8** %addr, i8* %value)
 /// Returns %value.
-llvm::Value *CodeGenFunction::EmitARCStoreWeak(llvm::Value *addr,
+llvm::Value *CodeGenFunction::EmitARCStoreWeak(Address addr,
                                                llvm::Value *value,
                                                bool ignored) {
   return emitARCStoreOperation(*this, addr, value,
-                               CGM.getARCEntrypoints().objc_storeWeak,
+                               CGM.getObjCEntrypoints().objc_storeWeak,
                                "objc_storeWeak", ignored);
 }
 
@@ -2211,7 +2223,7 @@ llvm::Value *CodeGenFunction::EmitARCStoreWeak(llvm::Value *addr,
 /// Returns %value.  %addr is known to not have a current weak entry.
 /// Essentially equivalent to:
 ///   *addr = nil; objc_storeWeak(addr, value);
-void CodeGenFunction::EmitARCInitWeak(llvm::Value *addr, llvm::Value *value) {
+void CodeGenFunction::EmitARCInitWeak(Address addr, llvm::Value *value) {
   // If we're initializing to null, just write null to memory; no need
   // to get the runtime involved.  But don't do this if optimization
   // is enabled, because accounting for this would make the optimizer
@@ -2223,14 +2235,14 @@ void CodeGenFunction::EmitARCInitWeak(llvm::Value *addr, llvm::Value *value) {
   }
 
   emitARCStoreOperation(*this, addr, value,
-                        CGM.getARCEntrypoints().objc_initWeak,
+                        CGM.getObjCEntrypoints().objc_initWeak,
                         "objc_initWeak", /*ignored*/ true);
 }
 
 /// void \@objc_destroyWeak(i8** %addr)
 /// Essentially objc_storeWeak(addr, nil).
-void CodeGenFunction::EmitARCDestroyWeak(llvm::Value *addr) {
-  llvm::Constant *&fn = CGM.getARCEntrypoints().objc_destroyWeak;
+void CodeGenFunction::EmitARCDestroyWeak(Address addr) {
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_destroyWeak;
   if (!fn) {
     llvm::FunctionType *fnType =
       llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrPtrTy, false);
@@ -2240,31 +2252,31 @@ void CodeGenFunction::EmitARCDestroyWeak(llvm::Value *addr) {
   // Cast the argument to 'id*'.
   addr = Builder.CreateBitCast(addr, Int8PtrPtrTy);
 
-  EmitNounwindRuntimeCall(fn, addr);
+  EmitNounwindRuntimeCall(fn, addr.getPointer());
 }
 
 /// void \@objc_moveWeak(i8** %dest, i8** %src)
 /// Disregards the current value in %dest.  Leaves %src pointing to nothing.
 /// Essentially (objc_copyWeak(dest, src), objc_destroyWeak(src)).
-void CodeGenFunction::EmitARCMoveWeak(llvm::Value *dst, llvm::Value *src) {
+void CodeGenFunction::EmitARCMoveWeak(Address dst, Address src) {
   emitARCCopyOperation(*this, dst, src,
-                       CGM.getARCEntrypoints().objc_moveWeak,
+                       CGM.getObjCEntrypoints().objc_moveWeak,
                        "objc_moveWeak");
 }
 
 /// void \@objc_copyWeak(i8** %dest, i8** %src)
 /// Disregards the current value in %dest.  Essentially
 ///   objc_release(objc_initWeak(dest, objc_readWeakRetained(src)))
-void CodeGenFunction::EmitARCCopyWeak(llvm::Value *dst, llvm::Value *src) {
+void CodeGenFunction::EmitARCCopyWeak(Address dst, Address src) {
   emitARCCopyOperation(*this, dst, src,
-                       CGM.getARCEntrypoints().objc_copyWeak,
+                       CGM.getObjCEntrypoints().objc_copyWeak,
                        "objc_copyWeak");
 }
 
 /// Produce the code to do a objc_autoreleasepool_push.
 ///   call i8* \@objc_autoreleasePoolPush(void)
 llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
-  llvm::Constant *&fn = CGM.getRREntrypoints().objc_autoreleasePoolPush;
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPush;
   if (!fn) {
     llvm::FunctionType *fnType =
       llvm::FunctionType::get(Int8PtrTy, false);
@@ -2279,7 +2291,7 @@ llvm::Value *CodeGenFunction::EmitObjCAutoreleasePoolPush() {
 void CodeGenFunction::EmitObjCAutoreleasePoolPop(llvm::Value *value) {
   assert(value->getType() == Int8PtrTy);
 
-  llvm::Constant *&fn = CGM.getRREntrypoints().objc_autoreleasePoolPop;
+  llvm::Constant *&fn = CGM.getObjCEntrypoints().objc_autoreleasePoolPop;
   if (!fn) {
     llvm::FunctionType *fnType =
       llvm::FunctionType::get(Builder.getVoidTy(), Int8PtrTy, false);
@@ -2332,25 +2344,25 @@ void CodeGenFunction::EmitObjCMRRAutoreleasePoolPop(llvm::Value *Arg) {
 }
 
 void CodeGenFunction::destroyARCStrongPrecise(CodeGenFunction &CGF,
-                                              llvm::Value *addr,
+                                              Address addr,
                                               QualType type) {
   CGF.EmitARCDestroyStrong(addr, ARCPreciseLifetime);
 }
 
 void CodeGenFunction::destroyARCStrongImprecise(CodeGenFunction &CGF,
-                                                llvm::Value *addr,
+                                                Address addr,
                                                 QualType type) {
   CGF.EmitARCDestroyStrong(addr, ARCImpreciseLifetime);
 }
 
 void CodeGenFunction::destroyARCWeak(CodeGenFunction &CGF,
-                                     llvm::Value *addr,
+                                     Address addr,
                                      QualType type) {
   CGF.EmitARCDestroyWeak(addr);
 }
 
 namespace {
-  struct CallObjCAutoreleasePoolObject : EHScopeStack::Cleanup {
+  struct CallObjCAutoreleasePoolObject final : EHScopeStack::Cleanup {
     llvm::Value *Token;
 
     CallObjCAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
@@ -2359,7 +2371,7 @@ namespace {
       CGF.EmitObjCAutoreleasePoolPop(Token);
     }
   };
-  struct CallObjCMRRAutoreleasePoolObject : EHScopeStack::Cleanup {
+  struct CallObjCMRRAutoreleasePoolObject final : EHScopeStack::Cleanup {
     llvm::Value *Token;
 
     CallObjCMRRAutoreleasePoolObject(llvm::Value *token) : Token(token) {}
@@ -2932,7 +2944,9 @@ CodeGenFunction::GenerateObjCAtomicSetterCopyHelperFunction(
     llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
                            "__assign_helper_atomic_property_",
                            &CGM.getModule());
-  
+
+  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr DstExpr(&dstDecl, false, DestTy,
@@ -3011,6 +3025,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage,
                          "__copy_helper_atomic_property_", &CGM.getModule());
   
+  CGM.SetInternalFunctionAttributes(nullptr, Fn, FI);
+
   StartFunction(FD, C.VoidTy, Fn, FI, args);
   
   DeclRefExpr SrcExpr(&srcDecl, false, SrcTy,
@@ -3046,7 +3062,8 @@ CodeGenFunction::GenerateObjCAtomicGetterCopyHelperFunction(
   CharUnits Alignment
     = getContext().getTypeAlignInChars(TheCXXConstructExpr->getType());
   EmitAggExpr(TheCXXConstructExpr, 
-              AggValueSlot::forAddr(DV.getScalarVal(), Alignment, Qualifiers(),
+              AggValueSlot::forAddr(Address(DV.getScalarVal(), Alignment),
+                                    Qualifiers(),
                                     AggValueSlot::IsDestructed,
                                     AggValueSlot::DoesNotNeedGCBarriers,
                                     AggValueSlot::IsNotAliased));
diff --git a/lib/CodeGen/CGObjCGNU.cpp b/lib/CodeGen/CGObjCGNU.cpp
index b52d623b948b..f0af3e924c09 100644
--- a/lib/CodeGen/CGObjCGNU.cpp
+++ b/lib/CodeGen/CGObjCGNU.cpp
@@ -166,9 +166,9 @@ protected:
   /// where the C code specifies const char*.  
   llvm::Constant *MakeConstantString(const std::string &Str,
                                      const std::string &Name="") {
-    auto *ConstStr = CGM.GetAddrOfConstantCString(Str, Name.c_str());
-    return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
-                                                ConstStr, Zeros);
+    ConstantAddress Array = CGM.GetAddrOfConstantCString(Str, Name.c_str());
+    return llvm::ConstantExpr::getGetElementPtr(Array.getElementType(),
+                                                Array.getPointer(), Zeros);
   }
   /// Emits a linkonce_odr string, whose name is the prefix followed by the
   /// string value.  This allows the linker to combine the strings between
@@ -191,34 +191,41 @@ protected:
   /// first argument.
   llvm::GlobalVariable *MakeGlobal(llvm::StructType *Ty,
                                    ArrayRef<llvm::Constant *> V,
+                                   CharUnits Align,
                                    StringRef Name="",
                                    llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
     llvm::Constant *C = llvm::ConstantStruct::get(Ty, V);
-    return new llvm::GlobalVariable(TheModule, Ty, false,
-        linkage, C, Name);
+    auto GV = new llvm::GlobalVariable(TheModule, Ty, false,
+                                       linkage, C, Name);
+    GV->setAlignment(Align.getQuantity());
+    return GV;
   }
   /// Generates a global array.  The vector must contain the same number of
   /// elements that the array type declares, of the type specified as the array
   /// element type.
   llvm::GlobalVariable *MakeGlobal(llvm::ArrayType *Ty,
                                    ArrayRef<llvm::Constant *> V,
+                                   CharUnits Align,
                                    StringRef Name="",
                                    llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
     llvm::Constant *C = llvm::ConstantArray::get(Ty, V);
-    return new llvm::GlobalVariable(TheModule, Ty, false,
-                                    linkage, C, Name);
+    auto GV = new llvm::GlobalVariable(TheModule, Ty, false,
+                                       linkage, C, Name);
+    GV->setAlignment(Align.getQuantity());
+    return GV;
   }
   /// Generates a global array, inferring the array type from the specified
   /// element type and the size of the initialiser.  
   llvm::GlobalVariable *MakeGlobalArray(llvm::Type *Ty,
                                         ArrayRef<llvm::Constant *> V,
+                                        CharUnits Align,
                                         StringRef Name="",
                                         llvm::GlobalValue::LinkageTypes linkage
                                          =llvm::GlobalValue::InternalLinkage) {
     llvm::ArrayType *ArrayTy = llvm::ArrayType::get(Ty, V.size());
-    return MakeGlobal(ArrayTy, V, Name, linkage);
+    return MakeGlobal(ArrayTy, V, Align, Name, linkage);
   }
   /// Returns a property name and encoding string.
   llvm::Constant *MakePropertyEncodingString(const ObjCPropertyDecl *PD,
@@ -234,9 +241,7 @@ protected:
       NameAndAttributes += TypeStr;
       NameAndAttributes += '\0';
       NameAndAttributes += PD->getNameAsString();
-      auto *ConstStr = CGM.GetAddrOfConstantCString(NameAndAttributes);
-      return llvm::ConstantExpr::getGetElementPtr(ConstStr->getValueType(),
-                                                  ConstStr, Zeros);
+      return MakeConstantString(NameAndAttributes);
     }
     return MakeConstantString(PD->getNameAsString());
   }
@@ -275,6 +280,10 @@ protected:
     if (V->getType() == Ty) return V;
     return B.CreateBitCast(V, Ty);
   }
+  Address EnforceType(CGBuilderTy &B, Address V, llvm::Type *Ty) {
+    if (V.getType() == Ty) return V;
+    return B.CreateBitCast(V, Ty);
+  }
   // Some zeros used for GEPs in lots of places.
   llvm::Constant *Zeros[2];
   /// Null pointer value.  Mainly used as a terminator in various arrays.
@@ -435,7 +444,7 @@ private:
   /// Returns a selector with the specified type encoding.  An empty string is
   /// used to return an untyped selector (with the types field set to NULL).
   llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-    const std::string &TypeEncoding, bool lval);
+                           const std::string &TypeEncoding);
   /// Returns the variable used to store the offset of an instance variable.
   llvm::GlobalVariable *ObjCIvarOffsetVariable(const ObjCInterfaceDecl *ID,
       const ObjCIvarDecl *Ivar);
@@ -458,7 +467,7 @@ protected:
   /// mechanism differs between the GCC and GNU runtimes, so this method must
   /// be overridden in subclasses.
   virtual llvm::Value *LookupIMPSuper(CodeGenFunction &CGF,
-                                      llvm::Value *ObjCSuper,
+                                      Address ObjCSuper,
                                       llvm::Value *cmd,
                                       MessageSendInfo &MSI) = 0;
   /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
@@ -477,7 +486,7 @@ public:
   CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
       unsigned protocolClassVersion);
 
-  llvm::Constant *GenerateConstantString(const StringLiteral *) override;
+  ConstantAddress GenerateConstantString(const StringLiteral *) override;
 
   RValue
   GenerateMessageSend(CodeGenFunction &CGF, ReturnValueSlot Return,
@@ -494,8 +503,8 @@ public:
                            const ObjCMethodDecl *Method) override;
   llvm::Value *GetClass(CodeGenFunction &CGF,
                         const ObjCInterfaceDecl *OID) override;
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-                           bool lval = false) override;
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) override;
+  Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override;
   llvm::Value *GetSelector(CodeGenFunction &CGF,
                            const ObjCMethodDecl *Method) override;
   llvm::Constant *GetEHType(QualType T) override;
@@ -527,18 +536,18 @@ public:
                      const ObjCAtThrowStmt &S,
                      bool ClearInsertionPoint=true) override;
   llvm::Value * EmitObjCWeakRead(CodeGenFunction &CGF,
-                                 llvm::Value *AddrWeakObj) override;
+                                 Address AddrWeakObj) override;
   void EmitObjCWeakAssign(CodeGenFunction &CGF,
-                          llvm::Value *src, llvm::Value *dst) override;
+                          llvm::Value *src, Address dst) override;
   void EmitObjCGlobalAssign(CodeGenFunction &CGF,
-                            llvm::Value *src, llvm::Value *dest,
+                            llvm::Value *src, Address dest,
                             bool threadlocal=false) override;
   void EmitObjCIvarAssign(CodeGenFunction &CGF, llvm::Value *src,
-                          llvm::Value *dest, llvm::Value *ivarOffset) override;
+                          Address dest, llvm::Value *ivarOffset) override;
   void EmitObjCStrongCastAssign(CodeGenFunction &CGF,
-                                llvm::Value *src, llvm::Value *dest) override;
-  void EmitGCMemmoveCollectable(CodeGenFunction &CGF, llvm::Value *DestPtr,
-                                llvm::Value *SrcPtr,
+                                llvm::Value *src, Address dest) override;
+  void EmitGCMemmoveCollectable(CodeGenFunction &CGF, Address DestPtr,
+                                Address SrcPtr,
                                 llvm::Value *Size) override;
   LValue EmitObjCValueForIvar(CodeGenFunction &CGF, QualType ObjectTy,
                               llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
@@ -593,11 +602,11 @@ protected:
     imp->setMetadata(msgSendMDKind, node);
     return imp.getInstruction();
   }
-  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
       llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
-          PtrToObjCSuperTy), cmd};
+          PtrToObjCSuperTy).getPointer(), cmd};
       return CGF.EmitNounwindRuntimeCall(MsgLookupSuperFn, lookupArgs);
     }
   public:
@@ -647,7 +656,8 @@ class CGObjCGNUstep : public CGObjCGNU {
       llvm::Function *LookupFn = SlotLookupFn;
 
       // Store the receiver on the stack so that we can reload it later
-      llvm::Value *ReceiverPtr = CGF.CreateTempAlloca(Receiver->getType());
+      Address ReceiverPtr =
+        CGF.CreateTempAlloca(Receiver->getType(), CGF.getPointerAlign());
       Builder.CreateStore(Receiver, ReceiverPtr);
 
       llvm::Value *self;
@@ -662,7 +672,7 @@ class CGObjCGNUstep : public CGObjCGNU {
       LookupFn->setDoesNotCapture(1);
 
       llvm::Value *args[] = {
-              EnforceType(Builder, ReceiverPtr, PtrToIdTy),
+              EnforceType(Builder, ReceiverPtr.getPointer(), PtrToIdTy),
               EnforceType(Builder, cmd, SelectorTy),
               EnforceType(Builder, self, IdTy) };
       llvm::CallSite slot = CGF.EmitRuntimeCallOrInvoke(LookupFn, args);
@@ -670,25 +680,27 @@ class CGObjCGNUstep : public CGObjCGNU {
       slot->setMetadata(msgSendMDKind, node);
 
       // Load the imp from the slot
-      llvm::Value *imp = Builder.CreateLoad(
-          Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4));
+      llvm::Value *imp = Builder.CreateAlignedLoad(
+          Builder.CreateStructGEP(nullptr, slot.getInstruction(), 4),
+          CGF.getPointerAlign());
 
       // The lookup function may have changed the receiver, so make sure we use
       // the new one.
       Receiver = Builder.CreateLoad(ReceiverPtr, true);
       return imp;
     }
-    llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+    llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                                 llvm::Value *cmd,
                                 MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {ObjCSuper, cmd};
+      llvm::Value *lookupArgs[] = {ObjCSuper.getPointer(), cmd};
 
       llvm::CallInst *slot =
         CGF.EmitNounwindRuntimeCall(SlotLookupSuperFn, lookupArgs);
       slot->setOnlyReadsMemory();
 
-      return Builder.CreateLoad(Builder.CreateStructGEP(nullptr, slot, 4));
+      return Builder.CreateAlignedLoad(Builder.CreateStructGEP(nullptr, slot, 4),
+                                       CGF.getPointerAlign());
     }
   public:
     CGObjCGNUstep(CodeGenModule &Mod) : CGObjCGNU(Mod, 9, 3) {
@@ -807,10 +819,10 @@ protected:
     return imp.getInstruction();
   }
 
-  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, llvm::Value *ObjCSuper,
+  llvm::Value *LookupIMPSuper(CodeGenFunction &CGF, Address ObjCSuper,
                               llvm::Value *cmd, MessageSendInfo &MSI) override {
       CGBuilderTy &Builder = CGF.Builder;
-      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper,
+      llvm::Value *lookupArgs[] = {EnforceType(Builder, ObjCSuper.getPointer(),
           PtrToObjCSuperTy), cmd};
 
       if (CGM.ReturnTypeUsesSRet(MSI.CallInfo))
@@ -1011,7 +1023,7 @@ CGObjCGNU::CGObjCGNU(CodeGenModule &cgm, unsigned runtimeABIVersion,
 llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
                                       const std::string &Name,
                                       bool isWeak) {
-  llvm::GlobalVariable *ClassNameGV = CGM.GetAddrOfConstantCString(Name);
+  llvm::Constant *ClassName = MakeConstantString(Name);
   // With the incompatible ABI, this will need to be replaced with a direct
   // reference to the class symbol.  For the compatible nonfragile ABI we are
   // still performing this lookup at run time but emitting the symbol for the
@@ -1021,8 +1033,6 @@ llvm::Value *CGObjCGNU::GetClassNamed(CodeGenFunction &CGF,
   // with memoized versions or with static references if it's safe to do so.
   if (!isWeak)
     EmitClassRef(Name);
-  llvm::Value *ClassName =
-      CGF.Builder.CreateStructGEP(ClassNameGV->getValueType(), ClassNameGV, 0);
 
   llvm::Constant *ClassLookupFn =
     CGM.CreateRuntimeFunction(llvm::FunctionType::get(IdTy, PtrToInt8Ty, true),
@@ -1041,7 +1051,7 @@ llvm::Value *CGObjCGNU::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-    const std::string &TypeEncoding, bool lval) {
+                                    const std::string &TypeEncoding) {
 
   SmallVectorImpl<TypedSelector> &Types = SelectorTable[Sel];
   llvm::GlobalAlias *SelValue = nullptr;
@@ -1055,29 +1065,34 @@ llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
   }
   if (!SelValue) {
     SelValue = llvm::GlobalAlias::create(
-        SelectorTy, llvm::GlobalValue::PrivateLinkage,
+        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
         ".objc_selector_" + Sel.getAsString(), &TheModule);
     Types.emplace_back(TypeEncoding, SelValue);
   }
 
-  if (lval) {
-    llvm::Value *tmp = CGF.CreateTempAlloca(SelValue->getType());
-    CGF.Builder.CreateStore(SelValue, tmp);
-    return tmp;
-  }
   return SelValue;
 }
 
-llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel,
-                                    bool lval) {
-  return GetSelector(CGF, Sel, std::string(), lval);
+Address CGObjCGNU::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) {
+  llvm::Value *SelValue = GetSelector(CGF, Sel);
+
+  // Store it to a temporary.  Does this satisfy the semantics of
+  // GetAddrOfSelector?  Hopefully.
+  Address tmp = CGF.CreateTempAlloca(SelValue->getType(),
+                                     CGF.getPointerAlign());
+  CGF.Builder.CreateStore(SelValue, tmp);
+  return tmp;
+}
+
+llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF, Selector Sel) {
+  return GetSelector(CGF, Sel, std::string());
 }
 
 llvm::Value *CGObjCGNU::GetSelector(CodeGenFunction &CGF,
                                     const ObjCMethodDecl *Method) {
   std::string SelTypes;
   CGM.getContext().getObjCEncodingForMethodDecl(Method, SelTypes);
-  return GetSelector(CGF, Method->getSelector(), SelTypes, false);
+  return GetSelector(CGF, Method->getSelector(), SelTypes);
 }
 
 llvm::Constant *CGObjCGNU::GetEHType(QualType T) {
@@ -1160,21 +1175,23 @@ llvm::Constant *CGObjCGNUstep::GetEHType(QualType T) {
   fields.push_back(BVtable);
   fields.push_back(typeName);
   llvm::Constant *TI = 
-      MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-              nullptr), fields, "__objc_eh_typeinfo_" + className,
+      MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty, nullptr),
+                 fields, CGM.getPointerAlign(),
+                 "__objc_eh_typeinfo_" + className,
           llvm::GlobalValue::LinkOnceODRLinkage);
   return llvm::ConstantExpr::getBitCast(TI, PtrToInt8Ty);
 }
 
 /// Generate an NSConstantString object.
-llvm::Constant *CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
+ConstantAddress CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
 
   std::string Str = SL->getString().str();
+  CharUnits Align = CGM.getPointerAlign();
 
   // Look for an existing one
   llvm::StringMap<llvm::Constant*>::iterator old = ObjCStrings.find(Str);
   if (old != ObjCStrings.end())
-    return old->getValue();
+    return ConstantAddress(old->getValue(), Align);
 
   StringRef StringClass = CGM.getLangOpts().ObjCConstantStringClass;
 
@@ -1197,11 +1214,11 @@ llvm::Constant *CGObjCGNU::GenerateConstantString(const StringLiteral *SL) {
   Ivars.push_back(llvm::ConstantInt::get(IntTy, Str.size()));
   llvm::Constant *ObjCStr = MakeGlobal(
     llvm::StructType::get(PtrToIdTy, PtrToInt8Ty, IntTy, nullptr),
-    Ivars, ".objc_str");
+    Ivars, Align, ".objc_str");
   ObjCStr = llvm::ConstantExpr::getBitCast(ObjCStr, PtrToInt8Ty);
   ObjCStrings[Str] = ObjCStr;
   ConstantStrings.push_back(ObjCStr);
-  return ObjCStr;
+  return ConstantAddress(ObjCStr, Align);
 }
 
 ///Generates a message send where the super is the receiver.  This is a message
@@ -1261,14 +1278,14 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
     if (IsClassMessage)  {
       if (!MetaClassPtrAlias) {
         MetaClassPtrAlias = llvm::GlobalAlias::create(
-            IdTy, llvm::GlobalValue::InternalLinkage,
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
             ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = MetaClassPtrAlias;
     } else {
       if (!ClassPtrAlias) {
         ClassPtrAlias = llvm::GlobalAlias::create(
-            IdTy, llvm::GlobalValue::InternalLinkage,
+            IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
             ".objc_class_ref" + Class->getNameAsString(), &TheModule);
       }
       ReceiverClass = ClassPtrAlias;
@@ -1281,16 +1298,20 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   // Get the superclass pointer
   ReceiverClass = Builder.CreateStructGEP(CastTy, ReceiverClass, 1);
   // Load the superclass pointer
-  ReceiverClass = Builder.CreateLoad(ReceiverClass);
+  ReceiverClass =
+    Builder.CreateAlignedLoad(ReceiverClass, CGF.getPointerAlign());
   // Construct the structure used to look up the IMP
   llvm::StructType *ObjCSuperTy = llvm::StructType::get(
       Receiver->getType(), IdTy, nullptr);
-  llvm::Value *ObjCSuper = Builder.CreateAlloca(ObjCSuperTy);
+
+  // FIXME: Is this really supposed to be a dynamic alloca?
+  Address ObjCSuper = Address(Builder.CreateAlloca(ObjCSuperTy),
+                              CGF.getPointerAlign());
 
   Builder.CreateStore(Receiver,
-                      Builder.CreateStructGEP(ObjCSuperTy, ObjCSuper, 0));
+                   Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero()));
   Builder.CreateStore(ReceiverClass,
-                      Builder.CreateStructGEP(ObjCSuperTy, ObjCSuper, 1));
+                   Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize()));
 
   ObjCSuper = EnforceType(Builder, ObjCSuper, PtrToObjCSuperTy);
 
@@ -1306,8 +1327,8 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
   llvm::MDNode *node = llvm::MDNode::get(VMContext, impMD);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
-                               &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs,
+                               CGCalleeInfo(), &call);
   call->setMetadata(msgSendMDKind, node);
   return msgRet;
 }
@@ -1419,8 +1440,8 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
   imp = EnforceType(Builder, imp, MSI.MessengerType);
 
   llvm::Instruction *call;
-  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs, nullptr,
-                               &call);
+  RValue msgRet = CGF.EmitCall(MSI.CallInfo, imp, Return, ActualArgs,
+                               CGCalleeInfo(), &call);
   call->setMetadata(msgSendMDKind, node);
 
 
@@ -1435,16 +1456,14 @@ CGObjCGNU::GenerateMessageSend(CodeGenFunction &CGF,
       phi->addIncoming(llvm::Constant::getNullValue(v->getType()), startBB);
       msgRet = RValue::get(phi);
     } else if (msgRet.isAggregate()) {
-      llvm::Value *v = msgRet.getAggregateAddr();
-      llvm::PHINode *phi = Builder.CreatePHI(v->getType(), 2);
-      llvm::PointerType *RetTy = cast<llvm::PointerType>(v->getType());
-      llvm::AllocaInst *NullVal = 
-          CGF.CreateTempAlloca(RetTy->getElementType(), "null");
-      CGF.InitTempAlloca(NullVal,
-          llvm::Constant::getNullValue(RetTy->getElementType()));
-      phi->addIncoming(v, messageBB);
-      phi->addIncoming(NullVal, startBB);
-      msgRet = RValue::getAggregate(phi);
+      Address v = msgRet.getAggregateAddress();
+      llvm::PHINode *phi = Builder.CreatePHI(v.getType(), 2);
+      llvm::Type *RetTy = v.getElementType();
+      Address NullVal = CGF.CreateTempAlloca(RetTy, v.getAlignment(), "null");
+      CGF.InitTempAlloca(NullVal, llvm::Constant::getNullValue(RetTy));
+      phi->addIncoming(v.getPointer(), messageBB);
+      phi->addIncoming(NullVal.getPointer(), startBB);
+      msgRet = RValue::getAggregate(Address(phi, v.getAlignment()));
     } else /* isComplex() */ {
       std::pair<llvm::Value*,llvm::Value*> v = msgRet.getComplexVal();
       llvm::PHINode *phi = Builder.CreatePHI(v.first->getType(), 2);
@@ -1517,7 +1536,8 @@ GenerateMethodList(StringRef ClassName,
   Methods.push_back(MethodArray);
 
   // Create an instance of the structure
-  return MakeGlobal(ObjCMethodListTy, Methods, ".objc_method_list");
+  return MakeGlobal(ObjCMethodListTy, Methods, CGM.getPointerAlign(),
+                    ".objc_method_list");
 }
 
 /// Generates an IvarList.  Used in construction of a objc_class.
@@ -1557,7 +1577,8 @@ GenerateIvarList(ArrayRef<llvm::Constant *> IvarNames,
     nullptr);
 
   // Create an instance of the structure
-  return MakeGlobal(ObjCIvarListTy, Elements, ".objc_ivar_list");
+  return MakeGlobal(ObjCIvarListTy, Elements, CGM.getPointerAlign(),
+                    ".objc_ivar_list");
 }
 
 /// Generate a class structure
@@ -1640,8 +1661,9 @@ llvm::Constant *CGObjCGNU::GenerateClassStructure(
   std::string ClassSym((isMeta ? "_OBJC_METACLASS_": "_OBJC_CLASS_") +
           std::string(Name));
   llvm::GlobalVariable *ClassRef = TheModule.getNamedGlobal(ClassSym);
-  llvm::Constant *Class = MakeGlobal(ClassTy, Elements, ClassSym,
-          llvm::GlobalValue::ExternalLinkage);
+  llvm::Constant *Class =
+    MakeGlobal(ClassTy, Elements, CGM.getPointerAlign(), ClassSym,
+               llvm::GlobalValue::ExternalLinkage);
   if (ClassRef) {
       ClassRef->replaceAllUsesWith(llvm::ConstantExpr::getBitCast(Class,
                   ClassRef->getType()));
@@ -1676,7 +1698,8 @@ GenerateProtocolMethodList(ArrayRef<llvm::Constant *> MethodNames,
   Methods.clear();
   Methods.push_back(llvm::ConstantInt::get(IntTy, MethodNames.size()));
   Methods.push_back(Array);
-  return MakeGlobal(ObjCMethodDescListTy, Methods, ".objc_method_list");
+  return MakeGlobal(ObjCMethodDescListTy, Methods, CGM.getPointerAlign(),
+                    ".objc_method_list");
 }
 
 // Create the protocol list structure used in classes, categories and so on
@@ -1709,7 +1732,8 @@ llvm::Constant *CGObjCGNU::GenerateProtocolList(ArrayRef<std::string>Protocols){
   Elements.push_back(NULLPtr);
   Elements.push_back(llvm::ConstantInt::get(LongTy, Protocols.size()));
   Elements.push_back(ProtocolArray);
-  return MakeGlobal(ProtocolListTy, Elements, ".objc_protocol_list");
+  return MakeGlobal(ProtocolListTy, Elements, CGM.getPointerAlign(),
+                    ".objc_protocol_list");
 }
 
 llvm::Value *CGObjCGNU::GenerateProtocolRef(CodeGenFunction &CGF,
@@ -1749,7 +1773,8 @@ llvm::Constant *CGObjCGNU::GenerateEmptyProtocol(
   Elements.push_back(MethodList);
   Elements.push_back(MethodList);
   Elements.push_back(MethodList);
-  return MakeGlobal(ProtocolTy, Elements, ".objc_protocol");
+  return MakeGlobal(ProtocolTy, Elements, CGM.getPointerAlign(),
+                    ".objc_protocol");
 }
 
 void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
@@ -1910,7 +1935,7 @@ void CGObjCGNU::GenerateProtocol(const ObjCProtocolDecl *PD) {
   Elements.push_back(OptionalPropertyList);
   ExistingProtocols[ProtocolName] =
     llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolTy, Elements,
-          ".objc_protocol"), IdTy);
+          CGM.getPointerAlign(), ".objc_protocol"), IdTy);
 }
 void CGObjCGNU::GenerateProtocolHolderCategory() {
   // Collect information about instance methods
@@ -1952,10 +1977,12 @@ void CGObjCGNU::GenerateProtocolHolderCategory() {
               ExistingProtocols.size()));
   ProtocolElements.push_back(ProtocolArray);
   Elements.push_back(llvm::ConstantExpr::getBitCast(MakeGlobal(ProtocolListTy,
-                  ProtocolElements, ".objc_protocol_list"), PtrTy));
+                  ProtocolElements, CGM.getPointerAlign(),
+                  ".objc_protocol_list"), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()),
+        PtrTy));
 }
 
 /// Libobjc2 uses a bitfield representation where small(ish) bitfields are
@@ -1995,7 +2022,7 @@ llvm::Constant *CGObjCGNU::MakeBitField(ArrayRef<bool> bits) {
       llvm::ConstantInt::get(Int32Ty, values.size()),
       array };
   llvm::Constant *GS = MakeGlobal(llvm::StructType::get(Int32Ty, arrayTy,
-        nullptr), fields);
+        nullptr), fields, CharUnits::fromQuantity(4));
   llvm::Constant *ptr = llvm::ConstantExpr::getPtrToInt(GS, IntPtrTy);
   return ptr;
 }
@@ -2047,7 +2074,8 @@ void CGObjCGNU::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
         GenerateProtocolList(Protocols), PtrTy));
   Categories.push_back(llvm::ConstantExpr::getBitCast(
         MakeGlobal(llvm::StructType::get(PtrToInt8Ty, PtrToInt8Ty,
-            PtrTy, PtrTy, PtrTy, nullptr), Elements), PtrTy));
+            PtrTy, PtrTy, PtrTy, nullptr), Elements, CGM.getPointerAlign()),
+        PtrTy));
 }
 
 llvm::Constant *CGObjCGNU::GeneratePropertyList(const ObjCImplementationDecl *OID,
@@ -2225,7 +2253,8 @@ void CGObjCGNU::GenerateClass(const ObjCImplementationDecl *OID) {
   llvm::Constant *StrongIvarBitmap = MakeBitField(StrongIvars);
   llvm::Constant *WeakIvarBitmap = MakeBitField(WeakIvars);
   llvm::GlobalVariable *IvarOffsetArray =
-    MakeGlobalArray(PtrToIntTy, IvarOffsetValues, ".ivar.offsets");
+    MakeGlobalArray(PtrToIntTy, IvarOffsetValues, CGM.getPointerAlign(),
+                    ".ivar.offsets");
 
 
   // Collect information about instance methods
@@ -2385,13 +2414,15 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
       llvm::StructType::get(PtrToInt8Ty, StaticsArrayTy, nullptr);
     llvm::Type *StaticsListPtrTy =
       llvm::PointerType::getUnqual(StaticsListTy);
-    Statics = MakeGlobal(StaticsListTy, Elements, ".objc_statics");
+    Statics = MakeGlobal(StaticsListTy, Elements, CGM.getPointerAlign(),
+                         ".objc_statics");
     llvm::ArrayType *StaticsListArrayTy =
       llvm::ArrayType::get(StaticsListPtrTy, 2);
     Elements.clear();
     Elements.push_back(Statics);
     Elements.push_back(llvm::Constant::getNullValue(StaticsListPtrTy));
-    Statics = MakeGlobal(StaticsListArrayTy, Elements, ".objc_statics_ptr");
+    Statics = MakeGlobal(StaticsListArrayTy, Elements,
+                         CGM.getPointerAlign(), ".objc_statics_ptr");
     Statics = llvm::ConstantExpr::getBitCast(Statics, PtrTy);
   }
   // Array of classes, categories, and constant objects
@@ -2442,7 +2473,8 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   // Number of static selectors
   Elements.push_back(llvm::ConstantInt::get(LongTy, SelectorCount));
   llvm::GlobalVariable *SelectorList =
-      MakeGlobalArray(SelStructTy, Selectors, ".objc_selector_list");
+      MakeGlobalArray(SelStructTy, Selectors, CGM.getPointerAlign(),
+                      ".objc_selector_list");
   Elements.push_back(llvm::ConstantExpr::getBitCast(SelectorList,
     SelStructPtrTy));
 
@@ -2475,7 +2507,8 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   llvm::Constant *ClassList = llvm::ConstantArray::get(ClassListTy, Classes);
   Elements.push_back(ClassList);
   // Construct the symbol table
-  llvm::Constant *SymTab= MakeGlobal(SymTabTy, Elements);
+  llvm::Constant *SymTab =
+    MakeGlobal(SymTabTy, Elements, CGM.getPointerAlign());
 
   // The symbol table is contained in a module which has some version-checking
   // constants
@@ -2516,7 +2549,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
         break;
     }
 
-  llvm::Value *Module = MakeGlobal(ModuleTy, Elements);
+  llvm::Value *Module = MakeGlobal(ModuleTy, Elements, CGM.getPointerAlign());
 
   // Create the load function calling the runtime entry point with the module
   // structure
@@ -2526,7 +2559,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
       &TheModule);
   llvm::BasicBlock *EntryBB =
       llvm::BasicBlock::Create(VMContext, "entry", LoadFunction);
-  CGBuilderTy Builder(VMContext);
+  CGBuilderTy Builder(CGM, VMContext);
   Builder.SetInsertPoint(EntryBB);
 
   llvm::FunctionType *FT =
@@ -2678,57 +2711,63 @@ void CGObjCGNU::EmitThrowStmt(CodeGenFunction &CGF,
 }
 
 llvm::Value * CGObjCGNU::EmitObjCWeakRead(CodeGenFunction &CGF,
-                                          llvm::Value *AddrWeakObj) {
+                                          Address AddrWeakObj) {
   CGBuilderTy &B = CGF.Builder;
   AddrWeakObj = EnforceType(B, AddrWeakObj, PtrToIdTy);
-  return B.CreateCall(WeakReadFn.getType(), WeakReadFn, AddrWeakObj);
+  return B.CreateCall(WeakReadFn.getType(), WeakReadFn,
+                      AddrWeakObj.getPointer());
 }
 
 void CGObjCGNU::EmitObjCWeakAssign(CodeGenFunction &CGF,
-                                   llvm::Value *src, llvm::Value *dst) {
+                                   llvm::Value *src, Address dst) {
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
-  B.CreateCall(WeakAssignFn.getType(), WeakAssignFn, {src, dst});
+  B.CreateCall(WeakAssignFn.getType(), WeakAssignFn,
+               {src, dst.getPointer()});
 }
 
 void CGObjCGNU::EmitObjCGlobalAssign(CodeGenFunction &CGF,
-                                     llvm::Value *src, llvm::Value *dst,
+                                     llvm::Value *src, Address dst,
                                      bool threadlocal) {
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
   // FIXME. Add threadloca assign API
   assert(!threadlocal && "EmitObjCGlobalAssign - Threal Local API NYI");
-  B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn, {src, dst});
+  B.CreateCall(GlobalAssignFn.getType(), GlobalAssignFn,
+               {src, dst.getPointer()});
 }
 
 void CGObjCGNU::EmitObjCIvarAssign(CodeGenFunction &CGF,
-                                   llvm::Value *src, llvm::Value *dst,
+                                   llvm::Value *src, Address dst,
                                    llvm::Value *ivarOffset) {
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, IdTy);
-  B.CreateCall(IvarAssignFn.getType(), IvarAssignFn, {src, dst, ivarOffset});
+  B.CreateCall(IvarAssignFn.getType(), IvarAssignFn,
+               {src, dst.getPointer(), ivarOffset});
 }
 
 void CGObjCGNU::EmitObjCStrongCastAssign(CodeGenFunction &CGF,
-                                         llvm::Value *src, llvm::Value *dst) {
+                                         llvm::Value *src, Address dst) {
   CGBuilderTy &B = CGF.Builder;
   src = EnforceType(B, src, IdTy);
   dst = EnforceType(B, dst, PtrToIdTy);
-  B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn, {src, dst});
+  B.CreateCall(StrongCastAssignFn.getType(), StrongCastAssignFn,
+               {src, dst.getPointer()});
 }
 
 void CGObjCGNU::EmitGCMemmoveCollectable(CodeGenFunction &CGF,
-                                         llvm::Value *DestPtr,
-                                         llvm::Value *SrcPtr,
+                                         Address DestPtr,
+                                         Address SrcPtr,
                                          llvm::Value *Size) {
   CGBuilderTy &B = CGF.Builder;
   DestPtr = EnforceType(B, DestPtr, PtrTy);
   SrcPtr = EnforceType(B, SrcPtr, PtrTy);
 
-  B.CreateCall(MemMoveFn.getType(), MemMoveFn, {DestPtr, SrcPtr, Size});
+  B.CreateCall(MemMoveFn.getType(), MemMoveFn,
+               {DestPtr.getPointer(), SrcPtr.getPointer(), Size});
 }
 
 llvm::GlobalVariable *CGObjCGNU::ObjCIvarOffsetVariable(
@@ -2811,17 +2850,22 @@ llvm::Value *CGObjCGNU::EmitIvarOffset(CodeGenFunction &CGF,
     Interface = FindIvarInterface(CGM.getContext(), Interface, Ivar);
     if (RuntimeVersion < 10)
       return CGF.Builder.CreateZExtOrBitCast(
-          CGF.Builder.CreateLoad(CGF.Builder.CreateLoad(
-                  ObjCIvarOffsetVariable(Interface, Ivar), false, "ivar")),
+          CGF.Builder.CreateDefaultAlignedLoad(CGF.Builder.CreateAlignedLoad(
+                  ObjCIvarOffsetVariable(Interface, Ivar),
+                  CGF.getPointerAlign(), "ivar")),
           PtrDiffTy);
     std::string name = "__objc_ivar_offset_value_" +
       Interface->getNameAsString() +"." + Ivar->getNameAsString();
+    CharUnits Align = CGM.getIntAlign();
     llvm::Value *Offset = TheModule.getGlobalVariable(name);
-    if (!Offset)
-      Offset = new llvm::GlobalVariable(TheModule, IntTy,
+    if (!Offset) {
+      auto GV = new llvm::GlobalVariable(TheModule, IntTy,
           false, llvm::GlobalValue::LinkOnceAnyLinkage,
           llvm::Constant::getNullValue(IntTy), name);
-    Offset = CGF.Builder.CreateLoad(Offset);
+      GV->setAlignment(Align.getQuantity());
+      Offset = GV;
+    }
+    Offset = CGF.Builder.CreateAlignedLoad(Offset, Align);
     if (Offset->getType() != PtrDiffTy)
       Offset = CGF.Builder.CreateZExtOrBitCast(Offset, PtrDiffTy);
     return Offset;
@@ -2845,6 +2889,7 @@ clang::CodeGen::CreateGNUObjCRuntime(CodeGenModule &CGM) {
   case ObjCRuntime::FragileMacOSX:
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
     llvm_unreachable("these runtimes are not GNU runtimes");
   }
   llvm_unreachable("bad runtime");
diff --git a/lib/CodeGen/CGObjCMac.cpp b/lib/CodeGen/CGObjCMac.cpp
index a45446a7065a..5f3ebbd75765 100644
--- a/lib/CodeGen/CGObjCMac.cpp
+++ b/lib/CodeGen/CGObjCMac.cpp
@@ -659,9 +659,6 @@ public:
   // MessageRefCPtrTy - clang type for struct _message_ref_t*
   QualType MessageRefCPtrTy;
 
-  // MessengerTy - Type of the messenger (shown as IMP above)
-  llvm::FunctionType *MessengerTy;
-
   // SuperMessageRefTy - LLVM for:
   // struct _super_message_ref_t {
   //   SUPER_IMP messenger;
@@ -735,20 +732,6 @@ public:
 
 class CGObjCCommonMac : public CodeGen::CGObjCRuntime {
 public:
-  // FIXME - accessibility
-  class GC_IVAR {
-  public:
-    unsigned ivar_bytepos;
-    unsigned ivar_size;
-    GC_IVAR(unsigned bytepos = 0, unsigned size = 0)
-      : ivar_bytepos(bytepos), ivar_size(size) {}
-
-    // Allow sorting based on byte pos.
-    bool operator<(const GC_IVAR &b) const {
-      return ivar_bytepos < b.ivar_bytepos;
-    }
-  };
-
   class SKIP_SCAN {
   public:
     unsigned skip;
@@ -830,10 +813,6 @@ protected:
   // FIXME! May not be needing this after all.
   unsigned ObjCABI;
 
-  // gc ivar layout bitmap calculation helper caches.
-  SmallVector<GC_IVAR, 16> SkipIvars;
-  SmallVector<GC_IVAR, 16> IvarsInfo;
-  
   // arc/mrr layout of captured block literal variables.
   SmallVector<RUN_SKIP, 16> RunSkipBlockVars;
 
@@ -854,7 +833,7 @@ protected:
   llvm::DenseMap<Selector, llvm::GlobalVariable*> MethodVarNames;
 
   /// DefinedCategoryNames - list of category names in form Class_Category.
-  llvm::SetVector<std::string> DefinedCategoryNames;
+  llvm::SmallSetVector<std::string, 16> DefinedCategoryNames;
 
   /// MethodVarTypes - uniqued method type signatures. We have to use
   /// a StringMap here because have no other unique reference.
@@ -934,20 +913,28 @@ protected:
   /// BuildIvarLayout - Builds ivar layout bitmap for the class
   /// implementation for the __strong or __weak case.
   ///
+  /// \param hasMRCWeakIvars - Whether we are compiling in MRC and there
+  ///   are any weak ivars defined directly in the class.  Meaningless unless
+  ///   building a weak layout.  Does not guarantee that the layout will
+  ///   actually have any entries, because the ivar might be under-aligned.
   llvm::Constant *BuildIvarLayout(const ObjCImplementationDecl *OI,
-                                  bool ForStrongLayout);
-  
-  llvm::Constant *BuildIvarLayoutBitmap(std::string &BitMap);
+                                  CharUnits beginOffset,
+                                  CharUnits endOffset,
+                                  bool forStrongLayout,
+                                  bool hasMRCWeakIvars);
 
-  void BuildAggrIvarRecordLayout(const RecordType *RT,
-                                 unsigned int BytePos, bool ForStrongLayout,
-                                 bool &HasUnion);
-  void BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
-                           const llvm::StructLayout *Layout,
-                           const RecordDecl *RD,
-                           ArrayRef<const FieldDecl*> RecFields,
-                           unsigned int BytePos, bool ForStrongLayout,
-                           bool &HasUnion);
+  llvm::Constant *BuildStrongIvarLayout(const ObjCImplementationDecl *OI,
+                                        CharUnits beginOffset,
+                                        CharUnits endOffset) {
+    return BuildIvarLayout(OI, beginOffset, endOffset, true, false);
+  }
+
+  llvm::Constant *BuildWeakIvarLayout(const ObjCImplementationDecl *OI,
+                                      CharUnits beginOffset,
+                                      CharUnits endOffset,
+                                      bool hasMRCWeakIvars) {
+    return BuildIvarLayout(OI, beginOffset, endOffset, false, hasMRCWeakIvars);
+  }
   
   Qualifiers::ObjCLifetime getBlockCaptureLifetime(QualType QT, bool ByrefLayout);
   
@@ -970,7 +957,6 @@ protected:
   
   llvm::Constant *getBitmapBlockLayout(bool ComputeByrefLayout);
   
-
   /// GetIvarLayoutName - Returns a unique constant for the given
   /// ivar layout bitmap.
   llvm::Constant *GetIvarLayoutName(IdentifierInfo *Ident,
@@ -1002,6 +988,7 @@ protected:
   /// defined. The return value has type ProtocolPtrTy.
   llvm::Constant *GetProtocolRef(const ObjCProtocolDecl *PD);
 
+public:
   /// CreateMetadataVar - Create a global variable with internal
   /// linkage for use by the Objective-C runtime.
   ///
@@ -1017,9 +1004,10 @@ protected:
   /// \param AddToUsed - Whether the variable should be added to
   /// "llvm.used".
   llvm::GlobalVariable *CreateMetadataVar(Twine Name, llvm::Constant *Init,
-                                          StringRef Section, unsigned Align,
+                                          StringRef Section, CharUnits Align,
                                           bool AddToUsed);
 
+protected:
   CodeGen::RValue EmitMessageSend(CodeGen::CodeGenFunction &CGF,
                                   ReturnValueSlot Return,
                                   QualType ResultType,
@@ -1029,6 +1017,7 @@ protected:
                                   bool IsSuper,
                                   const CallArgList &CallArgs,
                                   const ObjCMethodDecl *OMD,
+                                  const ObjCInterfaceDecl *ClassReceiver,
                                   const ObjCCommonTypesHelper &ObjCTypes);
 
   /// EmitImageInfo - Emit the image info marker used to encode some module
@@ -1039,7 +1028,11 @@ public:
   CGObjCCommonMac(CodeGen::CodeGenModule &cgm) :
     CGObjCRuntime(cgm), VMContext(cgm.getLLVMContext()) { }
 
-  llvm::Constant *GenerateConstantString(const StringLiteral *SL) override;
+  bool isNonFragileABI() const {
+    return ObjCABI == 2;
+  }
+
+  ConstantAddress GenerateConstantString(const StringLiteral *SL) override;
 
   llvm::Function *GenerateMethod(const ObjCMethodDecl *OMD,
                                  const ObjCContainerDecl *CD=nullptr) override;
@@ -1084,7 +1077,9 @@ private:
   /// EmitClassExtension - Generate the class extension structure used
   /// to store the weak ivar layout and properties. The return value
   /// has type ClassExtensionPtrTy.
-  llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID);
+  llvm::Constant *EmitClassExtension(const ObjCImplementationDecl *ID,
+                                     CharUnits instanceSize,
+                                     bool hasMRCWeakIvars);
 
   /// EmitClassRef - Return a Value*, of type ObjCTypes.ClassPtrTy,
   /// for the given class.
@@ -1172,8 +1167,8 @@ private:
 
   /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy,
   /// for the given selector.
-  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel, 
-                            bool lval=false);
+  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel);
+  Address EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel);
 
 public:
   CGObjCMac(CodeGen::CodeGenModule &cgm);
@@ -1199,8 +1194,8 @@ public:
   llvm::Value *GetClass(CodeGenFunction &CGF,
                         const ObjCInterfaceDecl *ID) override;
 
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-                           bool lval = false) override;
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) override;
+  Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override;
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
@@ -1236,19 +1231,19 @@ public:
   void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
                      bool ClearInsertionPoint=true) override;
   llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                 llvm::Value *AddrWeakObj) override;
+                                 Address AddrWeakObj) override;
   void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                          llvm::Value *src, llvm::Value *dst) override;
+                          llvm::Value *src, Address dst) override;
   void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                            llvm::Value *src, llvm::Value *dest,
+                            llvm::Value *src, Address dest,
                             bool threadlocal = false) override;
   void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                          llvm::Value *src, llvm::Value *dest,
+                          llvm::Value *src, Address dest,
                           llvm::Value *ivarOffset) override;
   void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                llvm::Value *src, llvm::Value *dest) override;
+                                llvm::Value *src, Address dest) override;
   void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                llvm::Value *dest, llvm::Value *src,
+                                Address dest, Address src,
                                 llvm::Value *size) override;
 
   LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
@@ -1395,8 +1390,8 @@ private:
 
   /// EmitSelector - Return a Value*, of type ObjCTypes.SelectorPtrTy,
   /// for the given selector.
-  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel, 
-                            bool lval=false);
+  llvm::Value *EmitSelector(CodeGenFunction &CGF, Selector Sel);
+  Address EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel);
 
   /// GetInterfaceEHType - Get the cached ehtype for the given Objective-C
   /// interface. The return value has type EHTypePtrTy.
@@ -1474,9 +1469,10 @@ public:
   llvm::Value *GetClass(CodeGenFunction &CGF,
                         const ObjCInterfaceDecl *ID) override;
 
-  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel,
-                           bool lvalue = false) override
-    { return EmitSelector(CGF, Sel, lvalue); }
+  llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) override
+    { return EmitSelector(CGF, Sel); }
+  Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) override
+    { return EmitSelectorAddr(CGF, Sel); }
 
   /// The NeXT/Apple runtimes do not support typed selectors; just emit an
   /// untyped one.
@@ -1531,19 +1527,19 @@ public:
   void EmitThrowStmt(CodeGen::CodeGenFunction &CGF, const ObjCAtThrowStmt &S,
                      bool ClearInsertionPoint=true) override;
   llvm::Value * EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                 llvm::Value *AddrWeakObj) override;
+                                 Address AddrWeakObj) override;
   void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                          llvm::Value *src, llvm::Value *dst) override;
+                          llvm::Value *src, Address edst) override;
   void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                            llvm::Value *src, llvm::Value *dest,
+                            llvm::Value *src, Address dest,
                             bool threadlocal = false) override;
   void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                          llvm::Value *src, llvm::Value *dest,
+                          llvm::Value *src, Address dest,
                           llvm::Value *ivarOffset) override;
   void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                llvm::Value *src, llvm::Value *dest) override;
+                                llvm::Value *src, Address dest) override;
   void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                llvm::Value *dest, llvm::Value *src,
+                                Address dest, Address src,
                                 llvm::Value *size) override;
   LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF, QualType ObjectTy,
                               llvm::Value *BaseValue, const ObjCIvarDecl *Ivar,
@@ -1645,7 +1641,7 @@ struct NullReturnState {
     // memory or (2) agg values in registers.
     if (result.isAggregate()) {
       assert(result.isAggregate() && "null init of non-aggregate result?");
-      CGF.EmitNullInitialization(result.getAggregateAddr(), resultType);
+      CGF.EmitNullInitialization(result.getAggregateAddress(), resultType);
       if (contBB) CGF.EmitBlock(contBB);
       return result;
     }
@@ -1711,9 +1707,11 @@ llvm::Value *CGObjCMac::GetClass(CodeGenFunction &CGF,
 }
 
 /// GetSelector - Return the pointer to the unique'd string for this selector.
-llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, Selector Sel, 
-                                    bool lval) {
-  return EmitSelector(CGF, Sel, lval);
+llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, Selector Sel) {
+  return EmitSelector(CGF, Sel);
+}
+Address CGObjCMac::GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) {
+  return EmitSelectorAddr(CGF, Sel);
 }
 llvm::Value *CGObjCMac::GetSelector(CodeGenFunction &CGF, const ObjCMethodDecl
                                     *Method) {
@@ -1756,7 +1754,7 @@ llvm::Constant *CGObjCMac::GetEHType(QualType T) {
    };
 */
 
-llvm::Constant *CGObjCCommonMac::GenerateConstantString(
+ConstantAddress CGObjCCommonMac::GenerateConstantString(
   const StringLiteral *SL) {
   return (CGM.getLangOpts().NoConstantCFStrings == 0 ? 
           CGM.GetAddrOfConstantCFString(SL) :
@@ -1783,13 +1781,14 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
                                     const ObjCMethodDecl *Method) {
   // Create and init a super structure; this is a (receiver, class)
   // pair we will pass to objc_msgSendSuper.
-  llvm::Value *ObjCSuper =
-    CGF.CreateTempAlloca(ObjCTypes.SuperTy, "objc_super");
+  Address ObjCSuper =
+    CGF.CreateTempAlloca(ObjCTypes.SuperTy, CGF.getPointerAlign(),
+                         "objc_super");
   llvm::Value *ReceiverAsObject =
     CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy);
   CGF.Builder.CreateStore(
       ReceiverAsObject,
-      CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 0));
+      CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero()));
 
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
@@ -1803,12 +1802,13 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
       // isa" is the first ivar in a class (which it must be).
       Target = EmitClassRef(CGF, Class->getSuperClass());
       Target = CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, Target, 0);
-      Target = CGF.Builder.CreateLoad(Target);
+      Target = CGF.Builder.CreateAlignedLoad(Target, CGF.getPointerAlign());
     } else {
       llvm::Constant *MetaClassPtr = EmitMetaClassRef(Class);
       llvm::Value *SuperPtr =
           CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, MetaClassPtr, 1);
-      llvm::Value *Super = CGF.Builder.CreateLoad(SuperPtr);
+      llvm::Value *Super =
+        CGF.Builder.CreateAlignedLoad(SuperPtr, CGF.getPointerAlign());
       Target = Super;
     }
   } else if (isCategoryImpl)
@@ -1816,19 +1816,19 @@ CGObjCMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
   else {
     llvm::Value *ClassPtr = EmitSuperClassRef(Class);
     ClassPtr = CGF.Builder.CreateStructGEP(ObjCTypes.ClassTy, ClassPtr, 1);
-    Target = CGF.Builder.CreateLoad(ClassPtr);
+    Target = CGF.Builder.CreateAlignedLoad(ClassPtr, CGF.getPointerAlign());
   }
   // FIXME: We shouldn't need to do this cast, rectify the ASTContext and
   // ObjCTypes types.
   llvm::Type *ClassTy =
     CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType());
   Target = CGF.Builder.CreateBitCast(Target, ClassTy);
-  CGF.Builder.CreateStore(
-      Target, CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 1));
+  CGF.Builder.CreateStore(Target,
+          CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize()));
   return EmitMessageSend(CGF, Return, ResultType,
                          EmitSelector(CGF, Sel),
-                         ObjCSuper, ObjCTypes.SuperPtrCTy,
-                         true, CallArgs, Method, ObjCTypes);
+                         ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy,
+                         true, CallArgs, Method, Class, ObjCTypes);
 }
 
 /// Generate code for a message send expression.
@@ -1843,7 +1843,16 @@ CodeGen::RValue CGObjCMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
   return EmitMessageSend(CGF, Return, ResultType,
                          EmitSelector(CGF, Sel),
                          Receiver, CGF.getContext().getObjCIdType(),
-                         false, CallArgs, Method, ObjCTypes);
+                         false, CallArgs, Method, Class, ObjCTypes);
+}
+
+static bool isWeakLinkedClass(const ObjCInterfaceDecl *ID) {
+  do {
+    if (ID->isWeakImported())
+      return true;
+  } while ((ID = ID->getSuperClass()));
+
+  return false;
 }
 
 CodeGen::RValue
@@ -1856,6 +1865,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
                                  bool IsSuper,
                                  const CallArgList &CallArgs,
                                  const ObjCMethodDecl *Method,
+                                 const ObjCInterfaceDecl *ClassReceiver,
                                  const ObjCCommonTypesHelper &ObjCTypes) {
   CallArgList ActualArgs;
   if (!IsSuper)
@@ -1872,11 +1882,38 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
                CGM.getContext().getCanonicalType(ResultType) &&
            "Result type mismatch!");
 
+  bool ReceiverCanBeNull = true;
+
+  // Super dispatch assumes that self is non-null; even the messenger
+  // doesn't have a null check internally.
+  if (IsSuper) {
+    ReceiverCanBeNull = false;
+
+  // If this is a direct dispatch of a class method, check whether the class,
+  // or anything in its hierarchy, was weak-linked.
+  } else if (ClassReceiver && Method && Method->isClassMethod()) {
+    ReceiverCanBeNull = isWeakLinkedClass(ClassReceiver);
+
+  // If we're emitting a method, and self is const (meaning just ARC, for now),
+  // and the receiver is a load of self, then self is a valid object.
+  } else if (auto CurMethod =
+               dyn_cast_or_null<ObjCMethodDecl>(CGF.CurCodeDecl)) {
+    auto Self = CurMethod->getSelfDecl();
+    if (Self->getType().isConstQualified()) {
+      if (auto LI = dyn_cast<llvm::LoadInst>(Arg0->stripPointerCasts())) {
+        llvm::Value *SelfAddr = CGF.GetAddrOfLocalVar(Self).getPointer();
+        if (SelfAddr == LI->getPointerOperand()) {
+          ReceiverCanBeNull = false;
+        }
+      }
+    }
+  }
+
   NullReturnState nullReturn;
 
   llvm::Constant *Fn = nullptr;
   if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
-    if (!IsSuper) nullReturn.init(CGF, Arg0);
+    if (ReceiverCanBeNull) nullReturn.init(CGF, Arg0);
     Fn = (ObjCABI == 2) ?  ObjCTypes.getSendStretFn2(IsSuper)
       : ObjCTypes.getSendStretFn(IsSuper);
   } else if (CGM.ReturnTypeUsesFPRet(ResultType)) {
@@ -1888,76 +1925,182 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
   } else {
     // arm64 uses objc_msgSend for stret methods and yet null receiver check
     // must be made for it.
-    if (!IsSuper && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
+    if (ReceiverCanBeNull && CGM.ReturnTypeUsesSRet(MSI.CallInfo))
       nullReturn.init(CGF, Arg0);
     Fn = (ObjCABI == 2) ? ObjCTypes.getSendFn2(IsSuper)
       : ObjCTypes.getSendFn(IsSuper);
   }
-  
-  bool requiresnullCheck = false;
-  if (CGM.getLangOpts().ObjCAutoRefCount && Method)
+
+  // Emit a null-check if there's a consumed argument other than the receiver.
+  bool RequiresNullCheck = false;
+  if (ReceiverCanBeNull && CGM.getLangOpts().ObjCAutoRefCount && Method) {
     for (const auto *ParamDecl : Method->params()) {
       if (ParamDecl->hasAttr<NSConsumedAttr>()) {
         if (!nullReturn.NullBB)
           nullReturn.init(CGF, Arg0);
-        requiresnullCheck = true;
+        RequiresNullCheck = true;
         break;
       }
     }
+  }
   
+  llvm::Instruction *CallSite;
   Fn = llvm::ConstantExpr::getBitCast(Fn, MSI.MessengerType);
-  RValue rvalue = CGF.EmitCall(MSI.CallInfo, Fn, Return, ActualArgs);
+  RValue rvalue = CGF.EmitCall(MSI.CallInfo, Fn, Return, ActualArgs,
+                               CGCalleeInfo(), &CallSite);
+
+  // Mark the call as noreturn if the method is marked noreturn and the
+  // receiver cannot be null.
+  if (Method && Method->hasAttr<NoReturnAttr>() && !ReceiverCanBeNull) {
+    llvm::CallSite(CallSite).setDoesNotReturn();
+  }
+
   return nullReturn.complete(CGF, rvalue, ResultType, CallArgs,
-                             requiresnullCheck ? Method : nullptr);
+                             RequiresNullCheck ? Method : nullptr);
 }
 
-static Qualifiers::GC GetGCAttrTypeForType(ASTContext &Ctx, QualType FQT) {
+static Qualifiers::GC GetGCAttrTypeForType(ASTContext &Ctx, QualType FQT,
+                                           bool pointee = false) {
+  // Note that GC qualification applies recursively to C pointer types
+  // that aren't otherwise decorated.  This is weird, but it's probably
+  // an intentional workaround to the unreliable placement of GC qualifiers.
   if (FQT.isObjCGCStrong())
     return Qualifiers::Strong;
-  
-  if (FQT.isObjCGCWeak() || FQT.getObjCLifetime() == Qualifiers::OCL_Weak)
+
+  if (FQT.isObjCGCWeak())
     return Qualifiers::Weak;
+
+  if (auto ownership = FQT.getObjCLifetime()) {
+    // Ownership does not apply recursively to C pointer types.
+    if (pointee) return Qualifiers::GCNone;
+    switch (ownership) {
+    case Qualifiers::OCL_Weak: return Qualifiers::Weak;
+    case Qualifiers::OCL_Strong: return Qualifiers::Strong;
+    case Qualifiers::OCL_ExplicitNone: return Qualifiers::GCNone;
+    case Qualifiers::OCL_Autoreleasing: llvm_unreachable("autoreleasing ivar?");
+    case Qualifiers::OCL_None: llvm_unreachable("known nonzero");
+    }
+    llvm_unreachable("bad objc ownership");
+  }
   
-  // check for __unsafe_unretained
-  if (FQT.getObjCLifetime() == Qualifiers::OCL_ExplicitNone)
-    return Qualifiers::GCNone;
-  
+  // Treat unqualified retainable pointers as strong.
   if (FQT->isObjCObjectPointerType() || FQT->isBlockPointerType())
     return Qualifiers::Strong;
   
-  if (const PointerType *PT = FQT->getAs<PointerType>())
-    return GetGCAttrTypeForType(Ctx, PT->getPointeeType());
+  // Walk into C pointer types, but only in GC.
+  if (Ctx.getLangOpts().getGC() != LangOptions::NonGC) {
+    if (const PointerType *PT = FQT->getAs<PointerType>())
+      return GetGCAttrTypeForType(Ctx, PT->getPointeeType(), /*pointee*/ true);
+  }
   
   return Qualifiers::GCNone;
 }
 
+namespace {
+  struct IvarInfo {
+    CharUnits Offset;
+    uint64_t SizeInWords;
+    IvarInfo(CharUnits offset, uint64_t sizeInWords)
+      : Offset(offset), SizeInWords(sizeInWords) {}
+
+    // Allow sorting based on byte pos.
+    bool operator<(const IvarInfo &other) const {
+      return Offset < other.Offset;
+    }
+  };
+
+  /// A helper class for building GC layout strings.
+  class IvarLayoutBuilder {
+    CodeGenModule &CGM;
+
+    /// The start of the layout.  Offsets will be relative to this value,
+    /// and entries less than this value will be silently discarded.
+    CharUnits InstanceBegin;
+
+    /// The end of the layout.  Offsets will never exceed this value.
+    CharUnits InstanceEnd;
+
+    /// Whether we're generating the strong layout or the weak layout.
+    bool ForStrongLayout;
+
+    /// Whether the offsets in IvarsInfo might be out-of-order.
+    bool IsDisordered = false;
+
+    llvm::SmallVector<IvarInfo, 8> IvarsInfo;
+  public:
+    IvarLayoutBuilder(CodeGenModule &CGM, CharUnits instanceBegin,
+                      CharUnits instanceEnd, bool forStrongLayout)
+      : CGM(CGM), InstanceBegin(instanceBegin), InstanceEnd(instanceEnd),
+        ForStrongLayout(forStrongLayout) {
+    }
+
+    void visitRecord(const RecordType *RT, CharUnits offset);
+
+    template <class Iterator, class GetOffsetFn>
+    void visitAggregate(Iterator begin, Iterator end, 
+                        CharUnits aggrOffset,
+                        const GetOffsetFn &getOffset);
+
+    void visitField(const FieldDecl *field, CharUnits offset);
+
+    /// Add the layout of a block implementation.
+    void visitBlock(const CGBlockInfo &blockInfo);
+
+    /// Is there any information for an interesting bitmap?
+    bool hasBitmapData() const { return !IvarsInfo.empty(); }
+
+    llvm::Constant *buildBitmap(CGObjCCommonMac &CGObjC,
+                                llvm::SmallVectorImpl<unsigned char> &buffer);
+
+    static void dump(ArrayRef<unsigned char> buffer) {
+      const unsigned char *s = buffer.data();
+      for (unsigned i = 0, e = buffer.size(); i < e; i++)
+        if (!(s[i] & 0xf0))
+          printf("0x0%x%s", s[i], s[i] != 0 ? ", " : "");
+        else
+          printf("0x%x%s",  s[i], s[i] != 0 ? ", " : "");
+      printf("\n");
+    }
+  };
+}
+
 llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
                                                 const CGBlockInfo &blockInfo) {
   
   llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy);
-  if (CGM.getLangOpts().getGC() == LangOptions::NonGC &&
-      !CGM.getLangOpts().ObjCAutoRefCount)
+  if (CGM.getLangOpts().getGC() == LangOptions::NonGC)
     return nullPtr;
 
-  bool hasUnion = false;
-  SkipIvars.clear();
-  IvarsInfo.clear();
-  unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0);
-  unsigned ByteSizeInBits = CGM.getTarget().getCharWidth();
+  IvarLayoutBuilder builder(CGM, CharUnits::Zero(), blockInfo.BlockSize,
+                            /*for strong layout*/ true);
+
+  builder.visitBlock(blockInfo);
+
+  if (!builder.hasBitmapData())
+    return nullPtr;
+
+  llvm::SmallVector<unsigned char, 32> buffer;
+  llvm::Constant *C = builder.buildBitmap(*this, buffer);
+  if (CGM.getLangOpts().ObjCGCBitmapPrint && !buffer.empty()) {
+    printf("\n block variable layout for block: ");
+    builder.dump(buffer);
+  }
   
+  return C;
+}
+
+void IvarLayoutBuilder::visitBlock(const CGBlockInfo &blockInfo) {
   // __isa is the first field in block descriptor and must assume by runtime's
   // convention that it is GC'able.
-  IvarsInfo.push_back(GC_IVAR(0, 1));
+  IvarsInfo.push_back(IvarInfo(CharUnits::Zero(), 1));
 
   const BlockDecl *blockDecl = blockInfo.getBlockDecl();
 
-  // Calculate the basic layout of the block structure.
-  const llvm::StructLayout *layout =
-    CGM.getDataLayout().getStructLayout(blockInfo.StructureType);
-
   // Ignore the optional 'this' capture: C++ objects are not assumed
   // to be GC'ed.
 
+  CharUnits lastFieldOffset;
+
   // Walk the captured variables.
   for (const auto &CI : blockDecl->captures()) {
     const VarDecl *variable = CI.getVariable();
@@ -1968,64 +2111,51 @@ llvm::Constant *CGObjCCommonMac::BuildGCBlockLayout(CodeGenModule &CGM,
     // Ignore constant captures.
     if (capture.isConstant()) continue;
 
-    uint64_t fieldOffset = layout->getElementOffset(capture.getIndex());
+    CharUnits fieldOffset = capture.getOffset();
+
+    // Block fields are not necessarily ordered; if we detect that we're
+    // adding them out-of-order, make sure we sort later.
+    if (fieldOffset < lastFieldOffset)
+      IsDisordered = true;
+    lastFieldOffset = fieldOffset;
 
     // __block variables are passed by their descriptor address.
     if (CI.isByRef()) {
-      IvarsInfo.push_back(GC_IVAR(fieldOffset, /*size in words*/ 1));
+      IvarsInfo.push_back(IvarInfo(fieldOffset, /*size in words*/ 1));
       continue;
     }
 
     assert(!type->isArrayType() && "array variable should not be caught");
     if (const RecordType *record = type->getAs<RecordType>()) {
-      BuildAggrIvarRecordLayout(record, fieldOffset, true, hasUnion);
+      visitRecord(record, fieldOffset);
       continue;
     }
       
     Qualifiers::GC GCAttr = GetGCAttrTypeForType(CGM.getContext(), type);
-    unsigned fieldSize = CGM.getContext().getTypeSize(type);
 
-    if (GCAttr == Qualifiers::Strong)
-      IvarsInfo.push_back(GC_IVAR(fieldOffset,
-                                  fieldSize / WordSizeInBits));
-    else if (GCAttr == Qualifiers::GCNone || GCAttr == Qualifiers::Weak)
-      SkipIvars.push_back(GC_IVAR(fieldOffset,
-                                  fieldSize / ByteSizeInBits));
+    if (GCAttr == Qualifiers::Strong) {
+      assert(CGM.getContext().getTypeSize(type)
+                == CGM.getTarget().getPointerWidth(0));
+      IvarsInfo.push_back(IvarInfo(fieldOffset, /*size in words*/ 1));
+    }
   }
-  
-  if (IvarsInfo.empty())
-    return nullPtr;
-
-  // Sort on byte position; captures might not be allocated in order,
-  // and unions can do funny things.
-  llvm::array_pod_sort(IvarsInfo.begin(), IvarsInfo.end());
-  llvm::array_pod_sort(SkipIvars.begin(), SkipIvars.end());
-  
-  std::string BitMap;
-  llvm::Constant *C = BuildIvarLayoutBitmap(BitMap);
-  if (CGM.getLangOpts().ObjCGCBitmapPrint) {
-    printf("\n block variable layout for block: ");
-    const unsigned char *s = (const unsigned char*)BitMap.c_str();
-    for (unsigned i = 0, e = BitMap.size(); i < e; i++)
-      if (!(s[i] & 0xf0))
-        printf("0x0%x%s", s[i], s[i] != 0 ? ", " : "");
-      else
-        printf("0x%x%s",  s[i], s[i] != 0 ? ", " : "");
-    printf("\n");
-  }
-  
-  return C;
 }
 
+
 /// getBlockCaptureLifetime - This routine returns life time of the captured
 /// block variable for the purpose of block layout meta-data generation. FQT is
 /// the type of the variable captured in the block.
 Qualifiers::ObjCLifetime CGObjCCommonMac::getBlockCaptureLifetime(QualType FQT,
                                                                   bool ByrefLayout) {
+  // If it has an ownership qualifier, we're done.
+  if (auto lifetime = FQT.getObjCLifetime())
+    return lifetime;
+
+  // If it doesn't, and this is ARC, it has no ownership.
   if (CGM.getLangOpts().ObjCAutoRefCount)
-    return FQT.getObjCLifetime();
+    return Qualifiers::OCL_None;
   
-  // MRR.
+  // In MRC, retainable pointers are owned by non-__block variables.
   if (FQT->isObjCObjectPointerType() || FQT->isBlockPointerType())
     return ByrefLayout ? Qualifiers::OCL_ExplicitNone : Qualifiers::OCL_Strong;
   
@@ -2361,9 +2491,8 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
     }
   }
   
-  int e = Layout.size()-1;
-  while (e >= 0) {
-    unsigned char inst = Layout[e--];
+  while (!Layout.empty()) {
+    unsigned char inst = Layout.back();
     enum BLOCK_LAYOUT_OPCODE opcode = (enum BLOCK_LAYOUT_OPCODE) (inst >> 4);
     if (opcode == BLOCK_LAYOUT_NON_OBJECT_BYTES || opcode == BLOCK_LAYOUT_NON_OBJECT_WORDS)
       Layout.pop_back();
@@ -2376,19 +2505,19 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
     // Block variable layout instruction has been inlined.
     if (CGM.getLangOpts().ObjCGCBitmapPrint) {
       if (ComputeByrefLayout)
-        printf("\n Inline instruction for BYREF variable layout: ");
+        printf("\n Inline BYREF variable layout: ");
       else
-        printf("\n Inline instruction for block variable layout: ");
-      printf("0x0%" PRIx64 "\n", Result);
-    }
-    if (WordSizeInBytes == 8) {
-      const llvm::APInt Instruction(64, Result);
-      return llvm::Constant::getIntegerValue(CGM.Int64Ty, Instruction);
-    }
-    else {
-      const llvm::APInt Instruction(32, Result);
-      return llvm::Constant::getIntegerValue(CGM.Int32Ty, Instruction);
+        printf("\n Inline block variable layout: ");
+      printf("0x0%" PRIx64 "", Result);
+      if (auto numStrong = (Result & 0xF00) >> 8)
+        printf(", BL_STRONG:%d", (int) numStrong);
+      if (auto numByref = (Result & 0x0F0) >> 4)
+        printf(", BL_BYREF:%d", (int) numByref);
+      if (auto numWeak = (Result & 0x00F) >> 0)
+        printf(", BL_WEAK:%d", (int) numWeak);
+      printf(", BL_OPERATOR:0\n");
     }
+    return llvm::ConstantInt::get(CGM.IntPtrTy, Result);
   }
   
   unsigned char inst = (BLOCK_LAYOUT_OPERATOR << 4) | 0;
@@ -2399,9 +2528,9 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
   
   if (CGM.getLangOpts().ObjCGCBitmapPrint) {
     if (ComputeByrefLayout)
-      printf("\n BYREF variable layout: ");
+      printf("\n Byref variable layout: ");
     else
-      printf("\n block variable layout: ");
+      printf("\n Block variable layout: ");
     for (unsigned i = 0, e = BitMap.size(); i != e; i++) {
       unsigned char inst = BitMap[i];
       enum BLOCK_LAYOUT_OPCODE opcode = (enum BLOCK_LAYOUT_OPCODE) (inst >> 4);
@@ -2443,7 +2572,7 @@ llvm::Constant *CGObjCCommonMac::getBitmapBlockLayout(bool ComputeByrefLayout) {
   llvm::GlobalVariable *Entry = CreateMetadataVar(
       "OBJC_CLASS_NAME_",
       llvm::ConstantDataArray::getString(VMContext, BitMap, false),
-      "__TEXT,__objc_classname,cstring_literals", 1, true);
+      "__TEXT,__objc_classname,cstring_literals", CharUnits::One(), true);
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
 
@@ -2511,6 +2640,8 @@ llvm::Constant *CGObjCCommonMac::BuildByrefLayout(CodeGen::CodeGenModule &CGM,
   if (const RecordType *record = T->getAs<RecordType>()) {
     BuildRCBlockVarRecordLayout(record, fieldOffset, hasUnion, true /*ByrefLayout */);
     llvm::Constant *Result = getBitmapBlockLayout(true);
+    if (isa<llvm::ConstantInt>(Result))
+      Result = llvm::ConstantExpr::getIntToPtr(Result, CGM.Int8PtrTy);
     return Result;
   }
   llvm::Constant *nullPtr = llvm::Constant::getNullValue(CGM.Int8PtrTy);
@@ -2699,7 +2830,7 @@ CGObjCMac::EmitProtocolExtension(const ObjCProtocolDecl *PD,
 
   // No special section, but goes in llvm.used
   return CreateMetadataVar("\01l_OBJC_PROTOCOLEXT_" + PD->getName(), Init,
-                           StringRef(), 0, true);
+                           StringRef(), CGM.getPointerAlign(), true);
 }
 
 /*
@@ -2738,7 +2869,7 @@ CGObjCMac::EmitProtocolList(Twine Name,
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
   llvm::GlobalVariable *GV =
     CreateMetadataVar(Name, Init, "__OBJC,__cat_cls_meth,regular,no_dead_strip",
-                      4, false);
+                      CGM.getPointerAlign(), false);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.ProtocolListPtrTy);
 }
 
@@ -2779,15 +2910,26 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
                                        const ObjCCommonTypesHelper &ObjCTypes) {
   SmallVector<llvm::Constant *, 16> Properties;
   llvm::SmallPtrSet<const IdentifierInfo*, 16> PropertySet;
+
+  auto AddProperty = [&](const ObjCPropertyDecl *PD) {
+    llvm::Constant *Prop[] = {GetPropertyName(PD->getIdentifier()),
+                              GetPropertyTypeString(PD, Container)};
+    Properties.push_back(llvm::ConstantStruct::get(ObjCTypes.PropertyTy, Prop));
+  };
+  if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD))
+    for (const ObjCCategoryDecl *ClassExt : OID->known_extensions())
+      for (auto *PD : ClassExt->properties()) {
+        PropertySet.insert(PD->getIdentifier());
+        AddProperty(PD);
+      }
   for (const auto *PD : OCD->properties()) {
-    PropertySet.insert(PD->getIdentifier());
-    llvm::Constant *Prop[] = {
-      GetPropertyName(PD->getIdentifier()),
-      GetPropertyTypeString(PD, Container)
-    };
-    Properties.push_back(llvm::ConstantStruct::get(ObjCTypes.PropertyTy,
-                                                   Prop));
+    // Don't emit duplicate metadata for properties that were already in a
+    // class extension.
+    if (!PropertySet.insert(PD->getIdentifier()).second)
+      continue;
+    AddProperty(PD);
   }
+
   if (const ObjCInterfaceDecl *OID = dyn_cast<ObjCInterfaceDecl>(OCD)) {
     for (const auto *P : OID->all_referenced_protocols())
       PushProtocolProperties(PropertySet, Properties, Container, P, ObjCTypes);
@@ -2815,7 +2957,7 @@ llvm::Constant *CGObjCCommonMac::EmitPropertyList(Twine Name,
     CreateMetadataVar(Name, Init,
                       (ObjCABI == 2) ? "__DATA, __objc_const" :
                       "__OBJC,__property,regular,no_dead_strip",
-                      (ObjCABI == 2) ? 8 : 4,
+                      CGM.getPointerAlign(),
                       true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.PropertyListPtrTy);
 }
@@ -2834,7 +2976,7 @@ CGObjCCommonMac::EmitProtocolMethodTypes(Twine Name,
 
   llvm::GlobalVariable *GV = CreateMetadataVar(
       Name, Init, (ObjCABI == 2) ? "__DATA, __objc_const" : StringRef(),
-      (ObjCABI == 2) ? 8 : 4, true);
+      CGM.getPointerAlign(), true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.Int8PtrPtrTy);
 }
 
@@ -2872,7 +3014,8 @@ CGObjCMac::EmitMethodDescList(Twine Name, const char *Section,
   Values[1] = llvm::ConstantArray::get(AT, Methods);
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
 
-  llvm::GlobalVariable *GV = CreateMetadataVar(Name, Init, Section, 4, true);
+  llvm::GlobalVariable *GV =
+    CreateMetadataVar(Name, Init, Section, CGM.getPointerAlign(), true);
   return llvm::ConstantExpr::getBitCast(GV,
                                         ObjCTypes.MethodDescriptionListPtrTy);
 }
@@ -2944,7 +3087,8 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
 
   llvm::GlobalVariable *GV =
       CreateMetadataVar("OBJC_CATEGORY_" + ExtName.str(), Init,
-                        "__OBJC,__category,regular,no_dead_strip", 4, true);
+                        "__OBJC,__category,regular,no_dead_strip",
+                        CGM.getPointerAlign(), true);
   DefinedCategories.push_back(GV);
   DefinedCategoryNames.insert(ExtName.str());
   // method definition entries must be clear for next implementation.
@@ -2952,10 +3096,24 @@ void CGObjCMac::GenerateCategory(const ObjCCategoryImplDecl *OCD) {
 }
 
 enum FragileClassFlags {
+  /// Apparently: is not a meta-class.
   FragileABI_Class_Factory                 = 0x00001,
+
+  /// Is a meta-class.
   FragileABI_Class_Meta                    = 0x00002,
+
+  /// Has a non-trivial constructor or destructor.
   FragileABI_Class_HasCXXStructors         = 0x02000,
-  FragileABI_Class_Hidden                  = 0x20000
+
+  /// Has hidden visibility.
+  FragileABI_Class_Hidden                  = 0x20000,
+
+  /// Class implementation was compiled under ARC.
+  FragileABI_Class_CompiledByARC           = 0x04000000,
+
+  /// Class implementation was compiled under MRC and has MRC weak ivars.
+  /// Exclusive with CompiledByARC.
+  FragileABI_Class_HasMRCWeakIvars         = 0x08000000,
 };
 
 enum NonFragileClassFlags {
@@ -2965,7 +3123,7 @@ enum NonFragileClassFlags {
   /// Is a root class.
   NonFragileABI_Class_Root                 = 0x00002,
 
-  /// Has a C++ constructor and destructor.
+  /// Has a non-trivial constructor or destructor.
   NonFragileABI_Class_HasCXXStructors      = 0x00004,
 
   /// Has hidden visibility.
@@ -2981,9 +3139,46 @@ enum NonFragileClassFlags {
   NonFragileABI_Class_CompiledByARC        = 0x00080,
 
   /// Class has non-trivial destructors, but zero-initialization is okay.
-  NonFragileABI_Class_HasCXXDestructorOnly = 0x00100
+  NonFragileABI_Class_HasCXXDestructorOnly = 0x00100,
+
+  /// Class implementation was compiled under MRC and has MRC weak ivars.
+  /// Exclusive with CompiledByARC.
+  NonFragileABI_Class_HasMRCWeakIvars      = 0x00200,
 };
 
+static bool hasWeakMember(QualType type) {
+  if (type.getObjCLifetime() == Qualifiers::OCL_Weak) {
+    return true;
+  }
+
+  if (auto recType = type->getAs<RecordType>()) {
+    for (auto field : recType->getDecl()->fields()) {
+      if (hasWeakMember(field->getType()))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// For compatibility, we only want to set the "HasMRCWeakIvars" flag
+/// (and actually fill in a layout string) if we really do have any
+/// __weak ivars.
+static bool hasMRCWeakIvars(CodeGenModule &CGM,
+                            const ObjCImplementationDecl *ID) {
+  if (!CGM.getLangOpts().ObjCWeak) return false;
+  assert(CGM.getLangOpts().getGC() == LangOptions::NonGC);
+
+  for (const ObjCIvarDecl *ivar =
+         ID->getClassInterface()->all_declared_ivar_begin();
+       ivar; ivar = ivar->getNextIvar()) {
+    if (hasWeakMember(ivar->getType()))
+      return true;
+  }
+
+  return false;
+}
+
 /*
   struct _objc_class {
   Class isa;
@@ -3017,8 +3212,16 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   unsigned Flags = FragileABI_Class_Factory;
   if (ID->hasNonZeroConstructors() || ID->hasDestructors())
     Flags |= FragileABI_Class_HasCXXStructors;
-  unsigned Size =
-    CGM.getContext().getASTObjCImplementationLayout(ID).getSize().getQuantity();
+
+  bool hasMRCWeak = false;
+
+  if (CGM.getLangOpts().ObjCAutoRefCount)
+    Flags |= FragileABI_Class_CompiledByARC;
+  else if ((hasMRCWeak = hasMRCWeakIvars(CGM, ID)))
+    Flags |= FragileABI_Class_HasMRCWeakIvars;
+
+  CharUnits Size =
+    CGM.getContext().getASTObjCImplementationLayout(ID).getSize();
 
   // FIXME: Set CXX-structors flag.
   if (ID->getClassInterface()->getVisibility() == HiddenVisibility)
@@ -3062,7 +3265,7 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   // Version is always 0.
   Values[ 3] = llvm::ConstantInt::get(ObjCTypes.LongTy, 0);
   Values[ 4] = llvm::ConstantInt::get(ObjCTypes.LongTy, Flags);
-  Values[ 5] = llvm::ConstantInt::get(ObjCTypes.LongTy, Size);
+  Values[ 5] = llvm::ConstantInt::get(ObjCTypes.LongTy, Size.getQuantity());
   Values[ 6] = EmitIvarList(ID, false);
   Values[7] = EmitMethodList("OBJC_INSTANCE_METHODS_" + ID->getName(),
                              "__OBJC,__inst_meth,regular,no_dead_strip",
@@ -3070,8 +3273,8 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
   // cache is always NULL.
   Values[ 8] = llvm::Constant::getNullValue(ObjCTypes.CachePtrTy);
   Values[ 9] = Protocols;
-  Values[10] = BuildIvarLayout(ID, true);
-  Values[11] = EmitClassExtension(ID);
+  Values[10] = BuildStrongIvarLayout(ID, CharUnits::Zero(), Size);
+  Values[11] = EmitClassExtension(ID, Size, hasMRCWeak);
   llvm::Constant *Init = llvm::ConstantStruct::get(ObjCTypes.ClassTy,
                                                    Values);
   std::string Name("OBJC_CLASS_");
@@ -3084,10 +3287,10 @@ void CGObjCMac::GenerateClass(const ObjCImplementationDecl *ID) {
            "Forward metaclass reference has incorrect type.");
     GV->setInitializer(Init);
     GV->setSection(Section);
-    GV->setAlignment(4);
+    GV->setAlignment(CGM.getPointerAlign().getQuantity());
     CGM.addCompilerUsedGlobal(GV);
   } else
-    GV = CreateMetadataVar(Name, Init, Section, 4, true);
+    GV = CreateMetadataVar(Name, Init, Section, CGM.getPointerAlign(), true);
   DefinedClasses.push_back(GV);
   ImplementedClasses.push_back(Interface);
   // method definition entries must be clear for next implementation.
@@ -3198,6 +3401,10 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
 }
 
 /*
+  Emit a "class extension", which in this specific context means extra
+  data that doesn't fit in the normal fragile-ABI class structure, and
+  has nothing to do with the language concept of a class extension.
+
   struct objc_class_ext {
   uint32_t size;
   const char *weak_ivar_layout;
@@ -3205,13 +3412,15 @@ llvm::Value *CGObjCMac::EmitSuperClassRef(const ObjCInterfaceDecl *ID) {
   };
 */
 llvm::Constant *
-CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID) {
+CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID,
+                              CharUnits InstanceSize, bool hasMRCWeakIvars) {
   uint64_t Size =
     CGM.getDataLayout().getTypeAllocSize(ObjCTypes.ClassExtensionTy);
 
   llvm::Constant *Values[3];
   Values[0] = llvm::ConstantInt::get(ObjCTypes.IntTy, Size);
-  Values[1] = BuildIvarLayout(ID, false);
+  Values[1] = BuildWeakIvarLayout(ID, CharUnits::Zero(), InstanceSize,
+                                  hasMRCWeakIvars);
   Values[2] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getName(),
                                ID, ID->getClassInterface(), ObjCTypes);
 
@@ -3222,7 +3431,8 @@ CGObjCMac::EmitClassExtension(const ObjCImplementationDecl *ID) {
   llvm::Constant *Init =
     llvm::ConstantStruct::get(ObjCTypes.ClassExtensionTy, Values);
   return CreateMetadataVar("OBJC_CLASSEXT_" + ID->getName(), Init,
-                           "__OBJC,__class_ext,regular,no_dead_strip", 4, true);
+                           "__OBJC,__class_ext,regular,no_dead_strip",
+                           CGM.getPointerAlign(), true);
 }
 
 /*
@@ -3280,11 +3490,12 @@ llvm::Constant *CGObjCMac::EmitIvarList(const ObjCImplementationDecl *ID,
   if (ForClass)
     GV =
         CreateMetadataVar("OBJC_CLASS_VARIABLES_" + ID->getName(), Init,
-                          "__OBJC,__class_vars,regular,no_dead_strip", 4, true);
+                          "__OBJC,__class_vars,regular,no_dead_strip",
+                          CGM.getPointerAlign(), true);
   else
     GV = CreateMetadataVar("OBJC_INSTANCE_VARIABLES_" + ID->getName(), Init,
-                           "__OBJC,__instance_vars,regular,no_dead_strip", 4,
-                           true);
+                           "__OBJC,__instance_vars,regular,no_dead_strip",
+                           CGM.getPointerAlign(), true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.IvarListPtrTy);
 }
 
@@ -3334,7 +3545,8 @@ llvm::Constant *CGObjCMac::EmitMethodList(Twine Name,
   Values[2] = llvm::ConstantArray::get(AT, Methods);
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
 
-  llvm::GlobalVariable *GV = CreateMetadataVar(Name, Init, Section, 4, true);
+  llvm::GlobalVariable *GV =
+    CreateMetadataVar(Name, Init, Section, CGM.getPointerAlign(), true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.MethodListPtrTy);
 }
 
@@ -3359,7 +3571,7 @@ llvm::Function *CGObjCCommonMac::GenerateMethod(const ObjCMethodDecl *OMD,
 llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name,
                                                          llvm::Constant *Init,
                                                          StringRef Section,
-                                                         unsigned Align,
+                                                         CharUnits Align,
                                                          bool AddToUsed) {
   llvm::Type *Ty = Init->getType();
   llvm::GlobalVariable *GV =
@@ -3367,8 +3579,7 @@ llvm::GlobalVariable *CGObjCCommonMac::CreateMetadataVar(Twine Name,
                              llvm::GlobalValue::PrivateLinkage, Init, Name);
   if (!Section.empty())
     GV->setSection(Section);
-  if (Align)
-    GV->setAlignment(Align);
+  GV->setAlignment(Align.getQuantity());
   if (AddToUsed)
     CGM.addCompilerUsedGlobal(GV);
   return GV;
@@ -3421,16 +3632,16 @@ void CGObjCMac::EmitSynchronizedStmt(CodeGenFunction &CGF,
 }
 
 namespace {
-  struct PerformFragileFinally : EHScopeStack::Cleanup {
+  struct PerformFragileFinally final : EHScopeStack::Cleanup {
     const Stmt &S;
-    llvm::Value *SyncArgSlot;
-    llvm::Value *CallTryExitVar;
-    llvm::Value *ExceptionData;
+    Address SyncArgSlot;
+    Address CallTryExitVar;
+    Address ExceptionData;
     ObjCTypesHelper &ObjCTypes;
     PerformFragileFinally(const Stmt *S,
-                          llvm::Value *SyncArgSlot,
-                          llvm::Value *CallTryExitVar,
-                          llvm::Value *ExceptionData,
+                          Address SyncArgSlot,
+                          Address CallTryExitVar,
+                          Address ExceptionData,
                           ObjCTypesHelper *ObjCTypes)
       : S(*S), SyncArgSlot(SyncArgSlot), CallTryExitVar(CallTryExitVar),
         ExceptionData(ExceptionData), ObjCTypes(*ObjCTypes) {}
@@ -3447,7 +3658,7 @@ namespace {
 
       CGF.EmitBlock(FinallyCallExit);
       CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryExitFn(),
-                                  ExceptionData);
+                                  ExceptionData.getPointer());
 
       CGF.EmitBlock(FinallyNoCallExit);
 
@@ -3568,7 +3779,7 @@ void FragileHazards::emitReadHazard(CGBuilderTy &Builder) {
 void FragileHazards::emitHazardsInNewBlocks() {
   if (Locals.empty()) return;
 
-  CGBuilderTy Builder(CGF.getLLVMContext());
+  CGBuilderTy Builder(CGF, CGF.getLLVMContext());
 
   // Iterate through all blocks, skipping those prior to the try.
   for (llvm::Function::iterator
@@ -3607,6 +3818,10 @@ static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, llvm::Value *V) {
   if (V) S.insert(V);
 }
 
+static void addIfPresent(llvm::DenseSet<llvm::Value*> &S, Address V) {
+  if (V.isValid()) S.insert(V.getPointer());
+}
+
 void FragileHazards::collectLocals() {
   // Compute a set of allocas to ignore.
   llvm::DenseSet<llvm::Value*> AllocasToIgnore;
@@ -3760,21 +3975,23 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   // @synchronized.  We can't avoid a temp here because we need the
   // value to be preserved.  If the backend ever does liveness
   // correctly after setjmp, this will be unnecessary.
-  llvm::Value *SyncArgSlot = nullptr;
+  Address SyncArgSlot = Address::invalid();
   if (!isTry) {
     llvm::Value *SyncArg =
       CGF.EmitScalarExpr(cast<ObjCAtSynchronizedStmt>(S).getSynchExpr());
     SyncArg = CGF.Builder.CreateBitCast(SyncArg, ObjCTypes.ObjectPtrTy);
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getSyncEnterFn(), SyncArg);
 
-    SyncArgSlot = CGF.CreateTempAlloca(SyncArg->getType(), "sync.arg");
+    SyncArgSlot = CGF.CreateTempAlloca(SyncArg->getType(),
+                                       CGF.getPointerAlign(), "sync.arg");
     CGF.Builder.CreateStore(SyncArg, SyncArgSlot);
   }
 
   // Allocate memory for the setjmp buffer.  This needs to be kept
   // live throughout the try and catch blocks.
-  llvm::Value *ExceptionData = CGF.CreateTempAlloca(ObjCTypes.ExceptionDataTy,
-                                                    "exceptiondata.ptr");
+  Address ExceptionData = CGF.CreateTempAlloca(ObjCTypes.ExceptionDataTy,
+                                               CGF.getPointerAlign(),
+                                               "exceptiondata.ptr");
 
   // Create the fragile hazards.  Note that this will not capture any
   // of the allocas required for exception processing, but will
@@ -3790,12 +4007,13 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   // The setjmp-safety rule here is that we should always store to this
   // variable in a place that dominates the branch through the cleanup
   // without passing through any setjmps.
-  llvm::Value *CallTryExitVar = CGF.CreateTempAlloca(CGF.Builder.getInt1Ty(),
-                                                     "_call_try_exit");
+  Address CallTryExitVar = CGF.CreateTempAlloca(CGF.Builder.getInt1Ty(),
+                                                CharUnits::One(),
+                                                "_call_try_exit");
 
   // A slot containing the exception to rethrow.  Only needed when we
   // have both a @catch and a @finally.
-  llvm::Value *PropagatingExnVar = nullptr;
+  Address PropagatingExnVar = Address::invalid();
 
   // Push a normal cleanup to leave the try scope.
   CGF.EHStack.pushCleanup<PerformFragileFinally>(NormalAndEHCleanup, &S,
@@ -3808,13 +4026,14 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   //  - Call objc_exception_try_enter to push ExceptionData on top of
   //    the EH stack.
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(),
-                              ExceptionData);
+                              ExceptionData.getPointer());
 
   //  - Call setjmp on the exception data buffer.
   llvm::Constant *Zero = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), 0);
   llvm::Value *GEPIndexes[] = { Zero, Zero, Zero };
   llvm::Value *SetJmpBuffer = CGF.Builder.CreateGEP(
-      ObjCTypes.ExceptionDataTy, ExceptionData, GEPIndexes, "setjmp_buffer");
+      ObjCTypes.ExceptionDataTy, ExceptionData.getPointer(), GEPIndexes,
+      "setjmp_buffer");
   llvm::CallInst *SetJmpResult = CGF.EmitNounwindRuntimeCall(
       ObjCTypes.getSetJmpFn(), SetJmpBuffer, "setjmp_result");
   SetJmpResult->setCanReturnTwice();
@@ -3854,7 +4073,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
     // nothing can cross this so the value is already in SSA form.
     llvm::CallInst *Caught =
       CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
-                                  ExceptionData, "caught");
+                                  ExceptionData.getPointer(), "caught");
 
     // Push the exception to rethrow onto the EH value stack for the
     // benefit of any @throws in the handlers.
@@ -3870,13 +4089,14 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
       // Save the currently-propagating exception before
       // objc_exception_try_enter clears the exception slot.
       PropagatingExnVar = CGF.CreateTempAlloca(Caught->getType(),
+                                               CGF.getPointerAlign(),
                                                "propagating_exception");
       CGF.Builder.CreateStore(Caught, PropagatingExnVar);
 
       // Enter a new exception try block (in case a @catch block
       // throws an exception).
       CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionTryEnterFn(),
-                                  ExceptionData);
+                                  ExceptionData.getPointer());
 
       llvm::CallInst *SetJmpResult =
         CGF.EmitNounwindRuntimeCall(ObjCTypes.getSetJmpFn(),
@@ -3928,7 +4148,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
           assert(CGF.HaveInsertPoint() && "DeclStmt destroyed insert point?");
 
           // These types work out because ConvertType(id) == i8*.
-          CGF.Builder.CreateStore(Caught, CGF.GetAddrOfLocalVar(CatchParam));
+          EmitInitOfCatchParam(CGF, Caught, CatchParam);
         }
 
         CGF.EmitStmt(CatchStmt->getCatchBody());
@@ -3975,7 +4195,7 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
       llvm::Value *Tmp =
         CGF.Builder.CreateBitCast(Caught,
                                   CGF.ConvertType(CatchParam->getType()));
-      CGF.Builder.CreateStore(Tmp, CGF.GetAddrOfLocalVar(CatchParam));
+      EmitInitOfCatchParam(CGF, Tmp, CatchParam);
 
       CGF.EmitStmt(CatchStmt->getCatchBody());
 
@@ -4008,10 +4228,10 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
 
       // Extract the new exception and save it to the
       // propagating-exception slot.
-      assert(PropagatingExnVar);
+      assert(PropagatingExnVar.isValid());
       llvm::CallInst *NewCaught =
         CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
-                                    ExceptionData, "caught");
+                                    ExceptionData.getPointer(), "caught");
       CGF.Builder.CreateStore(NewCaught, PropagatingExnVar);
 
       // Don't pop the catch handler; the throw already did.
@@ -4036,14 +4256,14 @@ void CGObjCMac::EmitTryOrSynchronizedStmt(CodeGen::CodeGenFunction &CGF,
   if (CGF.HaveInsertPoint()) {
     // If we have a propagating-exception variable, check it.
     llvm::Value *PropagatingExn;
-    if (PropagatingExnVar) {
+    if (PropagatingExnVar.isValid()) {
       PropagatingExn = CGF.Builder.CreateLoad(PropagatingExnVar);
 
     // Otherwise, just look in the buffer for the exception to throw.
     } else {
       llvm::CallInst *Caught =
         CGF.EmitNounwindRuntimeCall(ObjCTypes.getExceptionExtractFn(),
-                                    ExceptionData);
+                                    ExceptionData.getPointer());
       PropagatingExn = Caught;
     }
 
@@ -4083,14 +4303,13 @@ void CGObjCMac::EmitThrowStmt(CodeGen::CodeGenFunction &CGF,
 /// object: objc_read_weak (id *src)
 ///
 llvm::Value * CGObjCMac::EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                          llvm::Value *AddrWeakObj) {
-  llvm::Type* DestTy =
-    cast<llvm::PointerType>(AddrWeakObj->getType())->getElementType();
+                                          Address AddrWeakObj) {
+  llvm::Type* DestTy = AddrWeakObj.getElementType();
   AddrWeakObj = CGF.Builder.CreateBitCast(AddrWeakObj,
                                           ObjCTypes.PtrObjectPtrTy);
   llvm::Value *read_weak =
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcReadWeakFn(),
-                                AddrWeakObj, "weakread");
+                                AddrWeakObj.getPointer(), "weakread");
   read_weak = CGF.Builder.CreateBitCast(read_weak, DestTy);
   return read_weak;
 }
@@ -4099,7 +4318,7 @@ llvm::Value * CGObjCMac::EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
 /// objc_assign_weak (id src, id *dst)
 ///
 void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                                   llvm::Value *src, llvm::Value *dst) {
+                                   llvm::Value *src, Address dst) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
     unsigned Size = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -4110,7 +4329,7 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
                               args, "weakassign");
   return;
@@ -4120,7 +4339,7 @@ void CGObjCMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
 /// objc_assign_global (id src, id *dst)
 ///
 void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                                     llvm::Value *src, llvm::Value *dst,
+                                     llvm::Value *src, Address dst,
                                      bool threadlocal) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
@@ -4132,7 +4351,7 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   if (!threadlocal)
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignGlobalFn(),
                                 args, "globalassign");
@@ -4146,7 +4365,7 @@ void CGObjCMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
 /// objc_assign_ivar (id src, id *dst, ptrdiff_t ivaroffset)
 ///
 void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                                   llvm::Value *src, llvm::Value *dst,
+                                   llvm::Value *src, Address dst,
                                    llvm::Value *ivarOffset) {
   assert(ivarOffset && "EmitObjCIvarAssign - ivarOffset is NULL");
   llvm::Type * SrcTy = src->getType();
@@ -4159,7 +4378,7 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst, ivarOffset };
+  llvm::Value *args[] = { src, dst.getPointer(), ivarOffset };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
   return;
 }
@@ -4168,7 +4387,7 @@ void CGObjCMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
 /// objc_assign_strongCast (id src, id *dst)
 ///
 void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *src, llvm::Value *dst) {
+                                         llvm::Value *src, Address dst) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
     unsigned Size = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -4179,19 +4398,19 @@ void CGObjCMac::EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
-                              args, "weakassign");
+                              args, "strongassign");
   return;
 }
 
 void CGObjCMac::EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                         llvm::Value *DestPtr,
-                                         llvm::Value *SrcPtr,
+                                         Address DestPtr,
+                                         Address SrcPtr,
                                          llvm::Value *size) {
   SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, ObjCTypes.Int8PtrTy);
   DestPtr = CGF.Builder.CreateBitCast(DestPtr, ObjCTypes.Int8PtrTy);
-  llvm::Value *args[] = { DestPtr, SrcPtr, size };
+  llvm::Value *args[] = { DestPtr.getPointer(), SrcPtr.getPointer(), size };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args);
 }
 
@@ -4283,7 +4502,7 @@ void CGObjCCommonMac::EmitImageInfo() {
 
   // Indicate whether we're compiling this to run on a simulator.
   const llvm::Triple &Triple = CGM.getTarget().getTriple();
-  if (Triple.isiOS() &&
+  if ((Triple.isiOS() || Triple.isWatchOS()) &&
       (Triple.getArch() == llvm::Triple::x86 ||
        Triple.getArch() == llvm::Triple::x86_64))
     Mod.addModuleFlag(llvm::Module::Error, "Objective-C Is Simulated",
@@ -4312,7 +4531,8 @@ void CGObjCMac::EmitModuleInfo() {
   };
   CreateMetadataVar("OBJC_MODULES",
                     llvm::ConstantStruct::get(ObjCTypes.ModuleTy, Values),
-                    "__OBJC,__module_info,regular,no_dead_strip", 4, true);
+                    "__OBJC,__module_info,regular,no_dead_strip",
+                    CGM.getPointerAlign(), true);
 }
 
 llvm::Constant *CGObjCMac::EmitModuleSymbols() {
@@ -4356,7 +4576,8 @@ llvm::Constant *CGObjCMac::EmitModuleSymbols() {
   llvm::Constant *Init = llvm::ConstantStruct::getAnon(Values);
 
   llvm::GlobalVariable *GV = CreateMetadataVar(
-      "OBJC_SYMBOLS", Init, "__OBJC,__symbols,regular,no_dead_strip", 4, true);
+      "OBJC_SYMBOLS", Init, "__OBJC,__symbols,regular,no_dead_strip",
+      CGM.getPointerAlign(), true);
   return llvm::ConstantExpr::getBitCast(GV, ObjCTypes.SymtabPtrTy);
 }
 
@@ -4372,10 +4593,11 @@ llvm::Value *CGObjCMac::EmitClassRefFromId(CodeGenFunction &CGF,
                                    ObjCTypes.ClassPtrTy);
     Entry = CreateMetadataVar(
         "OBJC_CLASS_REFERENCES_", Casted,
-        "__OBJC,__cls_refs,literal_pointers,no_dead_strip", 4, true);
+        "__OBJC,__cls_refs,literal_pointers,no_dead_strip",
+        CGM.getPointerAlign(), true);
   }
   
-  return CGF.Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateAlignedLoad(Entry, CGF.getPointerAlign());
 }
 
 llvm::Value *CGObjCMac::EmitClassRef(CodeGenFunction &CGF,
@@ -4388,23 +4610,25 @@ llvm::Value *CGObjCMac::EmitNSAutoreleasePoolClassRef(CodeGenFunction &CGF) {
   return EmitClassRefFromId(CGF, II);
 }
 
-llvm::Value *CGObjCMac::EmitSelector(CodeGenFunction &CGF, Selector Sel,
-                                     bool lvalue) {
-  llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
+llvm::Value *CGObjCMac::EmitSelector(CodeGenFunction &CGF, Selector Sel) {
+  return CGF.Builder.CreateLoad(EmitSelectorAddr(CGF, Sel));
+}
 
+Address CGObjCMac::EmitSelectorAddr(CodeGenFunction &CGF, Selector Sel) {
+  CharUnits Align = CGF.getPointerAlign();
+
+  llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
   if (!Entry) {
     llvm::Constant *Casted =
       llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel),
                                      ObjCTypes.SelectorPtrTy);
     Entry = CreateMetadataVar(
         "OBJC_SELECTOR_REFERENCES_", Casted,
-        "__OBJC,__message_refs,literal_pointers,no_dead_strip", 4, true);
+        "__OBJC,__message_refs,literal_pointers,no_dead_strip", Align, true);
     Entry->setExternallyInitialized(true);
   }
 
-  if (lvalue)
-    return Entry;
-  return CGF.Builder.CreateLoad(Entry);
+  return Address(Entry, Align);
 }
 
 llvm::Constant *CGObjCCommonMac::GetClassName(StringRef RuntimeName) {
@@ -4415,7 +4639,7 @@ llvm::Constant *CGObjCCommonMac::GetClassName(StringRef RuntimeName) {
           llvm::ConstantDataArray::getString(VMContext, RuntimeName),
           ((ObjCABI == 2) ? "__TEXT,__objc_classname,cstring_literals"
                           : "__TEXT,__cstring,cstring_literals"),
-          1, true);
+          CharUnits::One(), true);
     return getConstantGEP(VMContext, Entry, 0, 0);
 }
 
@@ -4435,308 +4659,247 @@ llvm::Constant *CGObjCCommonMac::GetIvarLayoutName(IdentifierInfo *Ident,
   return llvm::Constant::getNullValue(ObjCTypes.Int8PtrTy);
 }
 
-void CGObjCCommonMac::BuildAggrIvarRecordLayout(const RecordType *RT,
-                                                unsigned int BytePos,
-                                                bool ForStrongLayout,
-                                                bool &HasUnion) {
+void IvarLayoutBuilder::visitRecord(const RecordType *RT,
+                                    CharUnits offset) {
   const RecordDecl *RD = RT->getDecl();
-  // FIXME - Use iterator.
-  SmallVector<const FieldDecl*, 16> Fields(RD->fields());
-  llvm::Type *Ty = CGM.getTypes().ConvertType(QualType(RT, 0));
-  const llvm::StructLayout *RecLayout =
-    CGM.getDataLayout().getStructLayout(cast<llvm::StructType>(Ty));
 
-  BuildAggrIvarLayout(nullptr, RecLayout, RD, Fields, BytePos, ForStrongLayout,
-                      HasUnion);
+  // If this is a union, remember that we had one, because it might mess
+  // up the ordering of layout entries.
+  if (RD->isUnion())
+    IsDisordered = true;
+
+  const ASTRecordLayout *recLayout = nullptr;
+  visitAggregate(RD->field_begin(), RD->field_end(), offset,
+                 [&](const FieldDecl *field) -> CharUnits {
+    if (!recLayout)
+      recLayout = &CGM.getContext().getASTRecordLayout(RD);
+    auto offsetInBits = recLayout->getFieldOffset(field->getFieldIndex());
+    return CGM.getContext().toCharUnitsFromBits(offsetInBits);
+  });
 }
 
-void CGObjCCommonMac::BuildAggrIvarLayout(const ObjCImplementationDecl *OI,
-                             const llvm::StructLayout *Layout,
-                             const RecordDecl *RD,
-                             ArrayRef<const FieldDecl*> RecFields,
-                             unsigned int BytePos, bool ForStrongLayout,
-                             bool &HasUnion) {
-  bool IsUnion = (RD && RD->isUnion());
-  uint64_t MaxUnionIvarSize = 0;
-  uint64_t MaxSkippedUnionIvarSize = 0;
-  const FieldDecl *MaxField = nullptr;
-  const FieldDecl *MaxSkippedField = nullptr;
-  const FieldDecl *LastFieldBitfieldOrUnnamed = nullptr;
-  uint64_t MaxFieldOffset = 0;
-  uint64_t MaxSkippedFieldOffset = 0;
-  uint64_t LastBitfieldOrUnnamedOffset = 0;
-  uint64_t FirstFieldDelta = 0;
+template <class Iterator, class GetOffsetFn>
+void IvarLayoutBuilder::visitAggregate(Iterator begin, Iterator end, 
+                                       CharUnits aggregateOffset,
+                                       const GetOffsetFn &getOffset) {
+  for (; begin != end; ++begin) {
+    auto field = *begin;
+
+    // Skip over bitfields.
+    if (field->isBitField()) {
+      continue;
+    }
+
+    // Compute the offset of the field within the aggregate.
+    CharUnits fieldOffset = aggregateOffset + getOffset(field);
+
+    visitField(field, fieldOffset);
+  }
+}
+
+/// Collect layout information for the given fields into IvarsInfo.
+void IvarLayoutBuilder::visitField(const FieldDecl *field,
+                                   CharUnits fieldOffset) {
+  QualType fieldType = field->getType();
+
+  // Drill down into arrays.
+  uint64_t numElts = 1;
+  while (auto arrayType = CGM.getContext().getAsConstantArrayType(fieldType)) {
+    numElts *= arrayType->getSize().getZExtValue();
+    fieldType = arrayType->getElementType();
+  }
+
+  assert(!fieldType->isArrayType() && "ivar of non-constant array type?");
+
+  // If we ended up with a zero-sized array, we've done what we can do within
+  // the limits of this layout encoding.
+  if (numElts == 0) return;
+
+  // Recurse if the base element type is a record type.
+  if (auto recType = fieldType->getAs<RecordType>()) {
+    size_t oldEnd = IvarsInfo.size();
+
+    visitRecord(recType, fieldOffset);
+
+    // If we have an array, replicate the first entry's layout information.
+    auto numEltEntries = IvarsInfo.size() - oldEnd;
+    if (numElts != 1 && numEltEntries != 0) {
+      CharUnits eltSize = CGM.getContext().getTypeSizeInChars(recType);
+      for (uint64_t eltIndex = 1; eltIndex != numElts; ++eltIndex) {
+        // Copy the last numEltEntries onto the end of the array, adjusting
+        // each for the element size.
+        for (size_t i = 0; i != numEltEntries; ++i) {
+          auto firstEntry = IvarsInfo[oldEnd + i];
+          IvarsInfo.push_back(IvarInfo(firstEntry.Offset + eltIndex * eltSize,
+                                       firstEntry.SizeInWords));
+        }
+      }
+    }
 
-  if (RecFields.empty())
     return;
-  unsigned WordSizeInBits = CGM.getTarget().getPointerWidth(0);
-  unsigned ByteSizeInBits = CGM.getTarget().getCharWidth();
-  if (!RD && CGM.getLangOpts().ObjCAutoRefCount) {
-    const FieldDecl *FirstField = RecFields[0];
-    FirstFieldDelta = 
-      ComputeIvarBaseOffset(CGM, OI, cast<ObjCIvarDecl>(FirstField));
-  }
-  
-  for (unsigned i = 0, e = RecFields.size(); i != e; ++i) {
-    const FieldDecl *Field = RecFields[i];
-    uint64_t FieldOffset;
-    if (RD) {
-      // Note that 'i' here is actually the field index inside RD of Field,
-      // although this dependency is hidden.
-      const ASTRecordLayout &RL = CGM.getContext().getASTRecordLayout(RD);
-      FieldOffset = (RL.getFieldOffset(i) / ByteSizeInBits) - FirstFieldDelta;
-    } else
-      FieldOffset = 
-        ComputeIvarBaseOffset(CGM, OI, cast<ObjCIvarDecl>(Field)) - FirstFieldDelta;
-
-    // Skip over unnamed or bitfields
-    if (!Field->getIdentifier() || Field->isBitField()) {
-      LastFieldBitfieldOrUnnamed = Field;
-      LastBitfieldOrUnnamedOffset = FieldOffset;
-      continue;
-    }
-
-    LastFieldBitfieldOrUnnamed = nullptr;
-    QualType FQT = Field->getType();
-    if (FQT->isRecordType() || FQT->isUnionType()) {
-      if (FQT->isUnionType())
-        HasUnion = true;
-
-      BuildAggrIvarRecordLayout(FQT->getAs<RecordType>(),
-                                BytePos + FieldOffset,
-                                ForStrongLayout, HasUnion);
-      continue;
-    }
-
-    if (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) {
-      const ConstantArrayType *CArray =
-        dyn_cast_or_null<ConstantArrayType>(Array);
-      uint64_t ElCount = CArray->getSize().getZExtValue();
-      assert(CArray && "only array with known element size is supported");
-      FQT = CArray->getElementType();
-      while (const ArrayType *Array = CGM.getContext().getAsArrayType(FQT)) {
-        const ConstantArrayType *CArray =
-          dyn_cast_or_null<ConstantArrayType>(Array);
-        ElCount *= CArray->getSize().getZExtValue();
-        FQT = CArray->getElementType();
-      }
-      if (FQT->isRecordType() && ElCount) {
-        int OldIndex = IvarsInfo.size() - 1;
-        int OldSkIndex = SkipIvars.size() -1;
-
-        const RecordType *RT = FQT->getAs<RecordType>();
-        BuildAggrIvarRecordLayout(RT, BytePos + FieldOffset,
-                                  ForStrongLayout, HasUnion);
-
-        // Replicate layout information for each array element. Note that
-        // one element is already done.
-        uint64_t ElIx = 1;
-        for (int FirstIndex = IvarsInfo.size() - 1,
-               FirstSkIndex = SkipIvars.size() - 1 ;ElIx < ElCount; ElIx++) {
-          uint64_t Size = CGM.getContext().getTypeSize(RT)/ByteSizeInBits;
-          for (int i = OldIndex+1; i <= FirstIndex; ++i)
-            IvarsInfo.push_back(GC_IVAR(IvarsInfo[i].ivar_bytepos + Size*ElIx,
-                                        IvarsInfo[i].ivar_size));
-          for (int i = OldSkIndex+1; i <= FirstSkIndex; ++i)
-            SkipIvars.push_back(GC_IVAR(SkipIvars[i].ivar_bytepos + Size*ElIx,
-                                        SkipIvars[i].ivar_size));
-        }
-        continue;
-      }
-    }
-    // At this point, we are done with Record/Union and array there of.
-    // For other arrays we are down to its element type.
-    Qualifiers::GC GCAttr = GetGCAttrTypeForType(CGM.getContext(), FQT);
-
-    unsigned FieldSize = CGM.getContext().getTypeSize(Field->getType());
-    if ((ForStrongLayout && GCAttr == Qualifiers::Strong)
-        || (!ForStrongLayout && GCAttr == Qualifiers::Weak)) {
-      if (IsUnion) {
-        uint64_t UnionIvarSize = FieldSize / WordSizeInBits;
-        if (UnionIvarSize > MaxUnionIvarSize) {
-          MaxUnionIvarSize = UnionIvarSize;
-          MaxField = Field;
-          MaxFieldOffset = FieldOffset;
-        }
-      } else {
-        IvarsInfo.push_back(GC_IVAR(BytePos + FieldOffset,
-                                    FieldSize / WordSizeInBits));
-      }
-    } else if ((ForStrongLayout &&
-                (GCAttr == Qualifiers::GCNone || GCAttr == Qualifiers::Weak))
-               || (!ForStrongLayout && GCAttr != Qualifiers::Weak)) {
-      if (IsUnion) {
-        // FIXME: Why the asymmetry? We divide by word size in bits on other
-        // side.
-        uint64_t UnionIvarSize = FieldSize / ByteSizeInBits;
-        if (UnionIvarSize > MaxSkippedUnionIvarSize) {
-          MaxSkippedUnionIvarSize = UnionIvarSize;
-          MaxSkippedField = Field;
-          MaxSkippedFieldOffset = FieldOffset;
-        }
-      } else {
-        // FIXME: Why the asymmetry, we divide by byte size in bits here?
-        SkipIvars.push_back(GC_IVAR(BytePos + FieldOffset,
-                                    FieldSize / ByteSizeInBits));
-      }
-    }
   }
 
-  if (LastFieldBitfieldOrUnnamed) {
-    if (LastFieldBitfieldOrUnnamed->isBitField()) {
-      // Last field was a bitfield. Must update skip info.
-      uint64_t BitFieldSize
-          = LastFieldBitfieldOrUnnamed->getBitWidthValue(CGM.getContext());
-      GC_IVAR skivar;
-      skivar.ivar_bytepos = BytePos + LastBitfieldOrUnnamedOffset;
-      skivar.ivar_size = (BitFieldSize / ByteSizeInBits)
-        + ((BitFieldSize % ByteSizeInBits) != 0);
-      SkipIvars.push_back(skivar);
-    } else {
-      assert(!LastFieldBitfieldOrUnnamed->getIdentifier() &&"Expected unnamed");
-      // Last field was unnamed. Must update skip info.
-      unsigned FieldSize
-          = CGM.getContext().getTypeSize(LastFieldBitfieldOrUnnamed->getType());
-      SkipIvars.push_back(GC_IVAR(BytePos + LastBitfieldOrUnnamedOffset,
-                                  FieldSize / ByteSizeInBits));
-    }
-  }
+  // Classify the element type.
+  Qualifiers::GC GCAttr = GetGCAttrTypeForType(CGM.getContext(), fieldType);
 
-  if (MaxField)
-    IvarsInfo.push_back(GC_IVAR(BytePos + MaxFieldOffset,
-                                MaxUnionIvarSize));
-  if (MaxSkippedField)
-    SkipIvars.push_back(GC_IVAR(BytePos + MaxSkippedFieldOffset,
-                                MaxSkippedUnionIvarSize));
+  // If it matches what we're looking for, add an entry.
+  if ((ForStrongLayout && GCAttr == Qualifiers::Strong)
+      || (!ForStrongLayout && GCAttr == Qualifiers::Weak)) {
+    assert(CGM.getContext().getTypeSizeInChars(fieldType)
+             == CGM.getPointerSize());
+    IvarsInfo.push_back(IvarInfo(fieldOffset, numElts));
+  }
 }
 
-/// BuildIvarLayoutBitmap - This routine is the horsework for doing all
-/// the computations and returning the layout bitmap (for ivar or blocks) in
-/// the given argument BitMap string container. Routine reads
-/// two containers, IvarsInfo and SkipIvars which are assumed to be
-/// filled already by the caller.
-llvm::Constant *CGObjCCommonMac::BuildIvarLayoutBitmap(std::string &BitMap) {
-  unsigned int WordsToScan, WordsToSkip;
-  llvm::Type *PtrTy = CGM.Int8PtrTy;
-  
-  // Build the string of skip/scan nibbles
-  SmallVector<SKIP_SCAN, 32> SkipScanIvars;
-  unsigned int WordSize =
-  CGM.getTypes().getDataLayout().getTypeAllocSize(PtrTy);
-  if (IvarsInfo[0].ivar_bytepos == 0) {
-    WordsToSkip = 0;
-    WordsToScan = IvarsInfo[0].ivar_size;
-  } else {
-    WordsToSkip = IvarsInfo[0].ivar_bytepos/WordSize;
-    WordsToScan = IvarsInfo[0].ivar_size;
-  }
-  for (unsigned int i=1, Last=IvarsInfo.size(); i != Last; i++) {
-    unsigned int TailPrevGCObjC =
-    IvarsInfo[i-1].ivar_bytepos + IvarsInfo[i-1].ivar_size * WordSize;
-    if (IvarsInfo[i].ivar_bytepos == TailPrevGCObjC) {
-      // consecutive 'scanned' object pointers.
-      WordsToScan += IvarsInfo[i].ivar_size;
-    } else {
-      // Skip over 'gc'able object pointer which lay over each other.
-      if (TailPrevGCObjC > IvarsInfo[i].ivar_bytepos)
-        continue;
-      // Must skip over 1 or more words. We save current skip/scan values
-      //  and start a new pair.
-      SKIP_SCAN SkScan;
-      SkScan.skip = WordsToSkip;
-      SkScan.scan = WordsToScan;
-      SkipScanIvars.push_back(SkScan);
-      
-      // Skip the hole.
-      SkScan.skip = (IvarsInfo[i].ivar_bytepos - TailPrevGCObjC) / WordSize;
-      SkScan.scan = 0;
-      SkipScanIvars.push_back(SkScan);
-      WordsToSkip = 0;
-      WordsToScan = IvarsInfo[i].ivar_size;
-    }
-  }
-  if (WordsToScan > 0) {
-    SKIP_SCAN SkScan;
-    SkScan.skip = WordsToSkip;
-    SkScan.scan = WordsToScan;
-    SkipScanIvars.push_back(SkScan);
-  }
-  
-  if (!SkipIvars.empty()) {
-    unsigned int LastIndex = SkipIvars.size()-1;
-    int LastByteSkipped =
-    SkipIvars[LastIndex].ivar_bytepos + SkipIvars[LastIndex].ivar_size;
-    LastIndex = IvarsInfo.size()-1;
-    int LastByteScanned =
-    IvarsInfo[LastIndex].ivar_bytepos +
-    IvarsInfo[LastIndex].ivar_size * WordSize;
-    // Compute number of bytes to skip at the tail end of the last ivar scanned.
-    if (LastByteSkipped > LastByteScanned) {
-      unsigned int TotalWords = (LastByteSkipped + (WordSize -1)) / WordSize;
-      SKIP_SCAN SkScan;
-      SkScan.skip = TotalWords - (LastByteScanned/WordSize);
-      SkScan.scan = 0;
-      SkipScanIvars.push_back(SkScan);
-    }
-  }
-  // Mini optimization of nibbles such that an 0xM0 followed by 0x0N is produced
-  // as 0xMN.
-  int SkipScan = SkipScanIvars.size()-1;
-  for (int i = 0; i <= SkipScan; i++) {
-    if ((i < SkipScan) && SkipScanIvars[i].skip && SkipScanIvars[i].scan == 0
-        && SkipScanIvars[i+1].skip == 0 && SkipScanIvars[i+1].scan) {
-      // 0xM0 followed by 0x0N detected.
-      SkipScanIvars[i].scan = SkipScanIvars[i+1].scan;
-      for (int j = i+1; j < SkipScan; j++)
-        SkipScanIvars[j] = SkipScanIvars[j+1];
-      --SkipScan;
-    }
-  }
-  
-  // Generate the string.
-  for (int i = 0; i <= SkipScan; i++) {
-    unsigned char byte;
-    unsigned int skip_small = SkipScanIvars[i].skip % 0xf;
-    unsigned int scan_small = SkipScanIvars[i].scan % 0xf;
-    unsigned int skip_big  = SkipScanIvars[i].skip / 0xf;
-    unsigned int scan_big  = SkipScanIvars[i].scan / 0xf;
-    
-    // first skip big.
-    for (unsigned int ix = 0; ix < skip_big; ix++)
-      BitMap += (unsigned char)(0xf0);
-    
-    // next (skip small, scan)
-    if (skip_small) {
-      byte = skip_small << 4;
-      if (scan_big > 0) {
-        byte |= 0xf;
-        --scan_big;
-      } else if (scan_small) {
-        byte |= scan_small;
-        scan_small = 0;
-      }
-      BitMap += byte;
-    }
-    // next scan big
-    for (unsigned int ix = 0; ix < scan_big; ix++)
-      BitMap += (unsigned char)(0x0f);
-    // last scan small
-    if (scan_small) {
-      byte = scan_small;
-      BitMap += byte;
-    }
-  }
-  // null terminate string.
-  unsigned char zero = 0;
-  BitMap += zero;
+/// buildBitmap - This routine does the horsework of taking the offsets of
+/// strong/weak references and creating a bitmap.  The bitmap is also
+/// returned in the given buffer, suitable for being passed to \c dump().
+llvm::Constant *IvarLayoutBuilder::buildBitmap(CGObjCCommonMac &CGObjC,
+                                llvm::SmallVectorImpl<unsigned char> &buffer) {
+  // The bitmap is a series of skip/scan instructions, aligned to word
+  // boundaries.  The skip is performed first.
+  const unsigned char MaxNibble = 0xF;
+  const unsigned char SkipMask = 0xF0, SkipShift = 4;
+  const unsigned char ScanMask = 0x0F, ScanShift = 0;
 
-  llvm::GlobalVariable *Entry = CreateMetadataVar(
+  assert(!IvarsInfo.empty() && "generating bitmap for no data");
+
+  // Sort the ivar info on byte position in case we encounterred a
+  // union nested in the ivar list.
+  if (IsDisordered) {
+    // This isn't a stable sort, but our algorithm should handle it fine.
+    llvm::array_pod_sort(IvarsInfo.begin(), IvarsInfo.end());
+  } else {
+#ifndef NDEBUG
+    for (unsigned i = 1; i != IvarsInfo.size(); ++i) {
+      assert(IvarsInfo[i - 1].Offset <= IvarsInfo[i].Offset);
+    }
+#endif
+  }
+  assert(IvarsInfo.back().Offset < InstanceEnd);
+
+  assert(buffer.empty());
+
+  // Skip the next N words.
+  auto skip = [&](unsigned numWords) {
+    assert(numWords > 0);
+
+    // Try to merge into the previous byte.  Since scans happen second, we
+    // can't do this if it includes a scan.
+    if (!buffer.empty() && !(buffer.back() & ScanMask)) {
+      unsigned lastSkip = buffer.back() >> SkipShift;
+      if (lastSkip < MaxNibble) {
+        unsigned claimed = std::min(MaxNibble - lastSkip, numWords);
+        numWords -= claimed;
+        lastSkip += claimed;
+        buffer.back() = (lastSkip << SkipShift);
+      }
+    }
+
+    while (numWords >= MaxNibble) {
+      buffer.push_back(MaxNibble << SkipShift);
+      numWords -= MaxNibble;
+    }
+    if (numWords) {
+      buffer.push_back(numWords << SkipShift);
+    }
+  };
+
+  // Scan the next N words.
+  auto scan = [&](unsigned numWords) {
+    assert(numWords > 0);
+
+    // Try to merge into the previous byte.  Since scans happen second, we can
+    // do this even if it includes a skip.
+    if (!buffer.empty()) {
+      unsigned lastScan = (buffer.back() & ScanMask) >> ScanShift;
+      if (lastScan < MaxNibble) {
+        unsigned claimed = std::min(MaxNibble - lastScan, numWords);
+        numWords -= claimed;
+        lastScan += claimed;
+        buffer.back() = (buffer.back() & SkipMask) | (lastScan << ScanShift);
+      }
+    }
+
+    while (numWords >= MaxNibble) {
+      buffer.push_back(MaxNibble << ScanShift);
+      numWords -= MaxNibble;
+    }
+    if (numWords) {
+      buffer.push_back(numWords << ScanShift);
+    }
+  };
+
+  // One past the end of the last scan.
+  unsigned endOfLastScanInWords = 0;
+  const CharUnits WordSize = CGM.getPointerSize();
+
+  // Consider all the scan requests.
+  for (auto &request : IvarsInfo) {
+    CharUnits beginOfScan = request.Offset - InstanceBegin;
+
+    // Ignore scan requests that don't start at an even multiple of the
+    // word size.  We can't encode them.
+    if ((beginOfScan % WordSize) != 0) continue;
+
+    // Ignore scan requests that start before the instance start.
+    // This assumes that scans never span that boundary.  The boundary
+    // isn't the true start of the ivars, because in the fragile-ARC case
+    // it's rounded up to word alignment, but the test above should leave
+    // us ignoring that possibility.
+    if (beginOfScan.isNegative()) {
+      assert(request.Offset + request.SizeInWords * WordSize <= InstanceBegin);
+      continue;
+    }
+
+    unsigned beginOfScanInWords = beginOfScan / WordSize;
+    unsigned endOfScanInWords = beginOfScanInWords + request.SizeInWords;
+
+    // If the scan starts some number of words after the last one ended,
+    // skip forward.
+    if (beginOfScanInWords > endOfLastScanInWords) {
+      skip(beginOfScanInWords - endOfLastScanInWords);
+
+    // Otherwise, start scanning where the last left off.
+    } else {
+      beginOfScanInWords = endOfLastScanInWords;
+
+      // If that leaves us with nothing to scan, ignore this request.
+      if (beginOfScanInWords >= endOfScanInWords) continue;
+    }
+
+    // Scan to the end of the request.
+    assert(beginOfScanInWords < endOfScanInWords);
+    scan(endOfScanInWords - beginOfScanInWords);
+    endOfLastScanInWords = endOfScanInWords;
+  }
+
+  if (buffer.empty())
+    return llvm::ConstantPointerNull::get(CGM.Int8PtrTy);
+
+  // For GC layouts, emit a skip to the end of the allocation so that we
+  // have precise information about the entire thing.  This isn't useful
+  // or necessary for the ARC-style layout strings.
+  if (CGM.getLangOpts().getGC() != LangOptions::NonGC) {
+    unsigned lastOffsetInWords =
+      (InstanceEnd - InstanceBegin + WordSize - CharUnits::One()) / WordSize;
+    if (lastOffsetInWords > endOfLastScanInWords) {
+      skip(lastOffsetInWords - endOfLastScanInWords);
+    }
+  }
+
+  // Null terminate the string.
+  buffer.push_back(0);
+
+  bool isNonFragileABI = CGObjC.isNonFragileABI();
+
+  llvm::GlobalVariable *Entry = CGObjC.CreateMetadataVar(
       "OBJC_CLASS_NAME_",
-      llvm::ConstantDataArray::getString(VMContext, BitMap, false),
-      ((ObjCABI == 2) ? "__TEXT,__objc_classname,cstring_literals"
-                      : "__TEXT,__cstring,cstring_literals"),
-      1, true);
-  return getConstantGEP(VMContext, Entry, 0, 0);
+      llvm::ConstantDataArray::get(CGM.getLLVMContext(), buffer),
+      (isNonFragileABI ? "__TEXT,__objc_classname,cstring_literals"
+                       : "__TEXT,__cstring,cstring_literals"),
+      CharUnits::One(), true);
+  return getConstantGEP(CGM.getLLVMContext(), Entry, 0, 0);
 }
 
 /// BuildIvarLayout - Builds ivar layout bitmap for the class
@@ -4755,62 +4918,75 @@ llvm::Constant *CGObjCCommonMac::BuildIvarLayoutBitmap(std::string &BitMap) {
 /// 2. When ForStrongLayout is false, following ivars are scanned:
 /// - __weak anything
 ///
-llvm::Constant *CGObjCCommonMac::BuildIvarLayout(
-  const ObjCImplementationDecl *OMD,
-  bool ForStrongLayout) {
-  bool hasUnion = false;
-
+llvm::Constant *
+CGObjCCommonMac::BuildIvarLayout(const ObjCImplementationDecl *OMD,
+                                 CharUnits beginOffset, CharUnits endOffset,
+                                 bool ForStrongLayout, bool HasMRCWeakIvars) {
+  // If this is MRC, and we're either building a strong layout or there
+  // are no weak ivars, bail out early.
   llvm::Type *PtrTy = CGM.Int8PtrTy;
   if (CGM.getLangOpts().getGC() == LangOptions::NonGC &&
-      !CGM.getLangOpts().ObjCAutoRefCount)
+      !CGM.getLangOpts().ObjCAutoRefCount &&
+      (ForStrongLayout || !HasMRCWeakIvars))
     return llvm::Constant::getNullValue(PtrTy);
 
   const ObjCInterfaceDecl *OI = OMD->getClassInterface();
-  SmallVector<const FieldDecl*, 32> RecFields;
-  if (CGM.getLangOpts().ObjCAutoRefCount) {
+  SmallVector<const ObjCIvarDecl*, 32> ivars;
+
+  // GC layout strings include the complete object layout, possibly
+  // inaccurately in the non-fragile ABI; the runtime knows how to fix this
+  // up.
+  //
+  // ARC layout strings only include the class's ivars.  In non-fragile
+  // runtimes, that means starting at InstanceStart, rounded up to word
+  // alignment.  In fragile runtimes, there's no InstanceStart, so it means
+  // starting at the offset of the first ivar, rounded up to word alignment.
+  //
+  // MRC weak layout strings follow the ARC style.
+  CharUnits baseOffset;
+  if (CGM.getLangOpts().getGC() == LangOptions::NonGC) {
     for (const ObjCIvarDecl *IVD = OI->all_declared_ivar_begin(); 
          IVD; IVD = IVD->getNextIvar())
-      RecFields.push_back(cast<FieldDecl>(IVD));
+      ivars.push_back(IVD);
+
+    if (isNonFragileABI()) {
+      baseOffset = beginOffset; // InstanceStart
+    } else if (!ivars.empty()) {
+      baseOffset =
+        CharUnits::fromQuantity(ComputeIvarBaseOffset(CGM, OMD, ivars[0]));
+    } else {
+      baseOffset = CharUnits::Zero();
+    }
+
+    baseOffset = baseOffset.RoundUpToAlignment(CGM.getPointerAlign());
   }
   else {
-    SmallVector<const ObjCIvarDecl*, 32> Ivars;
-    CGM.getContext().DeepCollectObjCIvars(OI, true, Ivars);
+    CGM.getContext().DeepCollectObjCIvars(OI, true, ivars);
 
-    // FIXME: This is not ideal; we shouldn't have to do this copy.
-    RecFields.append(Ivars.begin(), Ivars.end());
+    baseOffset = CharUnits::Zero();
   }
 
-  if (RecFields.empty())
+  if (ivars.empty())
     return llvm::Constant::getNullValue(PtrTy);
 
-  SkipIvars.clear();
-  IvarsInfo.clear();
+  IvarLayoutBuilder builder(CGM, baseOffset, endOffset, ForStrongLayout);
 
-  BuildAggrIvarLayout(OMD, nullptr, nullptr, RecFields, 0, ForStrongLayout,
-                      hasUnion);
-  if (IvarsInfo.empty())
+  builder.visitAggregate(ivars.begin(), ivars.end(), CharUnits::Zero(),
+                         [&](const ObjCIvarDecl *ivar) -> CharUnits {
+      return CharUnits::fromQuantity(ComputeIvarBaseOffset(CGM, OMD, ivar));
+  });
+
+  if (!builder.hasBitmapData())
     return llvm::Constant::getNullValue(PtrTy);
-  // Sort on byte position in case we encounterred a union nested in
-  // the ivar list.
-  if (hasUnion && !IvarsInfo.empty())
-    std::sort(IvarsInfo.begin(), IvarsInfo.end());
-  if (hasUnion && !SkipIvars.empty())
-    std::sort(SkipIvars.begin(), SkipIvars.end());
+
+  llvm::SmallVector<unsigned char, 4> buffer;
+  llvm::Constant *C = builder.buildBitmap(*this, buffer);
   
-  std::string BitMap;
-  llvm::Constant *C = BuildIvarLayoutBitmap(BitMap);
-  
-   if (CGM.getLangOpts().ObjCGCBitmapPrint) {
+   if (CGM.getLangOpts().ObjCGCBitmapPrint && !buffer.empty()) {
     printf("\n%s ivar layout for class '%s': ",
            ForStrongLayout ? "strong" : "weak",
            OMD->getClassInterface()->getName().str().c_str());
-    const unsigned char *s = (const unsigned char*)BitMap.c_str();
-    for (unsigned i = 0, e = BitMap.size(); i < e; i++)
-      if (!(s[i] & 0xf0))
-        printf("0x0%x%s", s[i], s[i] != 0 ? ", " : "");
-      else
-        printf("0x%x%s",  s[i], s[i] != 0 ? ", " : "");
-    printf("\n");
+    builder.dump(buffer);
   }
   return C;
 }
@@ -4825,7 +5001,7 @@ llvm::Constant *CGObjCCommonMac::GetMethodVarName(Selector Sel) {
         llvm::ConstantDataArray::getString(VMContext, Sel.getAsString()),
         ((ObjCABI == 2) ? "__TEXT,__objc_methname,cstring_literals"
                         : "__TEXT,__cstring,cstring_literals"),
-        1, true);
+        CharUnits::One(), true);
 
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
@@ -4847,7 +5023,7 @@ llvm::Constant *CGObjCCommonMac::GetMethodVarType(const FieldDecl *Field) {
         llvm::ConstantDataArray::getString(VMContext, TypeStr),
         ((ObjCABI == 2) ? "__TEXT,__objc_methtype,cstring_literals"
                         : "__TEXT,__cstring,cstring_literals"),
-        1, true);
+        CharUnits::One(), true);
 
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
@@ -4866,7 +5042,7 @@ llvm::Constant *CGObjCCommonMac::GetMethodVarType(const ObjCMethodDecl *D,
         llvm::ConstantDataArray::getString(VMContext, TypeStr),
         ((ObjCABI == 2) ? "__TEXT,__objc_methtype,cstring_literals"
                         : "__TEXT,__cstring,cstring_literals"),
-        1, true);
+        CharUnits::One(), true);
 
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
@@ -4879,7 +5055,7 @@ llvm::Constant *CGObjCCommonMac::GetPropertyName(IdentifierInfo *Ident) {
     Entry = CreateMetadataVar(
         "OBJC_PROP_NAME_ATTR_",
         llvm::ConstantDataArray::getString(VMContext, Ident->getName()),
-        "__TEXT,__cstring,cstring_literals", 1, true);
+        "__TEXT,__cstring,cstring_literals", CharUnits::One(), true);
 
   return getConstantGEP(VMContext, Entry, 0, 0);
 }
@@ -5583,8 +5759,14 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   std::string ClassName = ID->getObjCRuntimeNameAsString();
   llvm::Constant *Values[10]; // 11 for 64bit targets!
 
+  CharUnits beginInstance = CharUnits::fromQuantity(InstanceStart);
+  CharUnits endInstance = CharUnits::fromQuantity(InstanceSize);
+
+  bool hasMRCWeak = false;
   if (CGM.getLangOpts().ObjCAutoRefCount)
     flags |= NonFragileABI_Class_CompiledByARC;
+  else if ((hasMRCWeak = hasMRCWeakIvars(CGM, ID)))
+    flags |= NonFragileABI_Class_HasMRCWeakIvars;
 
   Values[ 0] = llvm::ConstantInt::get(ObjCTypes.IntTy, flags);
   Values[ 1] = llvm::ConstantInt::get(ObjCTypes.IntTy, InstanceStart);
@@ -5592,7 +5774,7 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
   // FIXME. For 64bit targets add 0 here.
   Values[ 3] = (flags & NonFragileABI_Class_Meta)
     ? GetIvarLayoutName(nullptr, ObjCTypes)
-    : BuildIvarLayout(ID, true);
+    : BuildStrongIvarLayout(ID, beginInstance, endInstance);
   Values[ 4] = GetClassName(ID->getObjCRuntimeNameAsString());
   // const struct _method_list_t * const baseMethods;
   std::vector<llvm::Constant*> Methods;
@@ -5639,7 +5821,8 @@ llvm::GlobalVariable * CGObjCNonFragileABIMac::BuildClassRoTInitializer(
     Values[ 9] = llvm::Constant::getNullValue(ObjCTypes.PropertyListPtrTy);
   } else {
     Values[ 7] = EmitIvarList(ID);
-    Values[ 8] = BuildIvarLayout(ID, false);
+    Values[ 8] = BuildWeakIvarLayout(ID, beginInstance, endInstance,
+                                     hasMRCWeak);
     Values[ 9] = EmitPropertyList("\01l_OBJC_$_PROP_LIST_" + ID->getObjCRuntimeNameAsString(),
                                   ID, ID->getClassInterface(), ObjCTypes);
   }
@@ -5731,7 +5914,8 @@ void CGObjCNonFragileABIMac::GenerateClass(const ObjCImplementationDecl *ID) {
     // Make this entry NULL for any iOS device target, any iOS simulator target,
     // OS X with deployment target 10.9 or later.
     const llvm::Triple &Triple = CGM.getTarget().getTriple();
-    if (Triple.isiOS() || (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
+    if (Triple.isiOS() || Triple.isWatchOS() ||
+        (Triple.isMacOSX() && !Triple.isMacOSXVersionLT(10, 9)))
       // This entry will be null.
       ObjCEmptyVtableVar = nullptr;
     else
@@ -5887,9 +6071,11 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
   std::string ProtocolName("\01l_OBJC_PROTOCOL_REFERENCE_$_");
   ProtocolName += PD->getObjCRuntimeNameAsString();
 
+  CharUnits Align = CGF.getPointerAlign();
+
   llvm::GlobalVariable *PTGV = CGM.getModule().getGlobalVariable(ProtocolName);
   if (PTGV)
-    return CGF.Builder.CreateLoad(PTGV);
+    return CGF.Builder.CreateAlignedLoad(PTGV, Align);
   PTGV = new llvm::GlobalVariable(
     CGM.getModule(),
     Init->getType(), false,
@@ -5898,8 +6084,9 @@ llvm::Value *CGObjCNonFragileABIMac::GenerateProtocolRef(CodeGenFunction &CGF,
     ProtocolName);
   PTGV->setSection("__DATA, __objc_protorefs, coalesced, no_dead_strip");
   PTGV->setVisibility(llvm::GlobalValue::HiddenVisibility);
+  PTGV->setAlignment(Align.getQuantity());
   CGM.addCompilerUsedGlobal(PTGV);
-  return CGF.Builder.CreateLoad(PTGV);
+  return CGF.Builder.CreateAlignedLoad(PTGV, Align);
 }
 
 /// GenerateCategory - Build metadata for a category implementation.
@@ -6428,7 +6615,8 @@ llvm::Value *CGObjCNonFragileABIMac::EmitIvarOffset(
   const ObjCInterfaceDecl *Interface,
   const ObjCIvarDecl *Ivar) {
   llvm::Value *IvarOffsetValue = ObjCIvarOffsetVariable(Interface, Ivar);
-  IvarOffsetValue = CGF.Builder.CreateLoad(IvarOffsetValue, "ivar");
+  IvarOffsetValue = CGF.Builder.CreateAlignedLoad(IvarOffsetValue,
+                                                  CGF.getSizeAlign(), "ivar");
   if (IsIvarOffsetKnownIdempotent(CGF, Ivar))
     cast<llvm::LoadInst>(IvarOffsetValue)
         ->setMetadata(CGM.getModule().getMDKindID("invariant.load"),
@@ -6559,16 +6747,17 @@ CGObjCNonFragileABIMac::EmitVTableMessageSend(CodeGenFunction &CGF,
       }
     }
   
-  llvm::Value *mref =
-    CGF.Builder.CreateBitCast(messageRef, ObjCTypes.MessageRefPtrTy);
+  Address mref =
+    Address(CGF.Builder.CreateBitCast(messageRef, ObjCTypes.MessageRefPtrTy),
+            CGF.getPointerAlign());
 
   // Update the message ref argument.
-  args[1].RV = RValue::get(mref);
+  args[1].RV = RValue::get(mref.getPointer());
 
   // Load the function to call from the message ref table.
-  llvm::Value *callee =
-      CGF.Builder.CreateStructGEP(ObjCTypes.MessageRefTy, mref, 0);
-  callee = CGF.Builder.CreateLoad(callee, "msgSend_fn");
+  Address calleeAddr =
+      CGF.Builder.CreateStructGEP(mref, 0, CharUnits::Zero());
+  llvm::Value *callee = CGF.Builder.CreateLoad(calleeAddr, "msgSend_fn");
 
   callee = CGF.Builder.CreateBitCast(callee, MSI.MessengerType);
 
@@ -6594,7 +6783,7 @@ CGObjCNonFragileABIMac::GenerateMessageSend(CodeGen::CodeGenFunction &CGF,
     : EmitMessageSend(CGF, Return, ResultType,
                       EmitSelector(CGF, Sel),
                       Receiver, CGF.getContext().getObjCIdType(),
-                      false, CallArgs, Method, ObjCTypes);
+                      false, CallArgs, Method, Class, ObjCTypes);
 }
 
 llvm::GlobalVariable *
@@ -6617,6 +6806,7 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
                                                         IdentifierInfo *II,
                                                         bool Weak,
                                                         const ObjCInterfaceDecl *ID) {
+  CharUnits Align = CGF.getPointerAlign();
   llvm::GlobalVariable *&Entry = ClassReferences[II];
   
   if (!Entry) {
@@ -6627,13 +6817,11 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRefFromId(CodeGenFunction &CGF,
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      ClassGV, "OBJC_CLASSLIST_REFERENCES_$_");
-    Entry->setAlignment(
-                        CGM.getDataLayout().getABITypeAlignment(
-                                                                ObjCTypes.ClassnfABIPtrTy));
+    Entry->setAlignment(Align.getQuantity());
     Entry->setSection("__DATA, __objc_classrefs, regular, no_dead_strip");
     CGM.addCompilerUsedGlobal(Entry);
   }
-  return CGF.Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateAlignedLoad(Entry, Align);
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
@@ -6644,12 +6832,13 @@ llvm::Value *CGObjCNonFragileABIMac::EmitClassRef(CodeGenFunction &CGF,
 llvm::Value *CGObjCNonFragileABIMac::EmitNSAutoreleasePoolClassRef(
                                                     CodeGenFunction &CGF) {
   IdentifierInfo *II = &CGM.getContext().Idents.get("NSAutoreleasePool");
-  return EmitClassRefFromId(CGF, II, false, 0);
+  return EmitClassRefFromId(CGF, II, false, nullptr);
 }
 
 llvm::Value *
 CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
                                           const ObjCInterfaceDecl *ID) {
+  CharUnits Align = CGF.getPointerAlign();
   llvm::GlobalVariable *&Entry = SuperClassReferences[ID->getIdentifier()];
 
   if (!Entry) {
@@ -6660,13 +6849,11 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      ClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
-    Entry->setAlignment(
-      CGM.getDataLayout().getABITypeAlignment(
-        ObjCTypes.ClassnfABIPtrTy));
+    Entry->setAlignment(Align.getQuantity());
     Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
     CGM.addCompilerUsedGlobal(Entry);
   }
-  return CGF.Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateAlignedLoad(Entry, Align);
 }
 
 /// EmitMetaClassRef - Return a Value * of the address of _class_t
@@ -6675,6 +6862,7 @@ CGObjCNonFragileABIMac::EmitSuperClassRef(CodeGenFunction &CGF,
 llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
                                                       const ObjCInterfaceDecl *ID,
                                                       bool Weak) {
+  CharUnits Align = CGF.getPointerAlign();
   llvm::GlobalVariable * &Entry = MetaClassReferences[ID->getIdentifier()];
   if (!Entry) {
     llvm::SmallString<64> MetaClassName(getMetaclassSymbolPrefix());
@@ -6685,14 +6873,13 @@ llvm::Value *CGObjCNonFragileABIMac::EmitMetaClassRef(CodeGenFunction &CGF,
     Entry = new llvm::GlobalVariable(CGM.getModule(), ObjCTypes.ClassnfABIPtrTy,
                                      false, llvm::GlobalValue::PrivateLinkage,
                                      MetaClassGV, "OBJC_CLASSLIST_SUP_REFS_$_");
-    Entry->setAlignment(
-        CGM.getDataLayout().getABITypeAlignment(ObjCTypes.ClassnfABIPtrTy));
+    Entry->setAlignment(Align.getQuantity());
 
     Entry->setSection("__DATA, __objc_superrefs, regular, no_dead_strip");
     CGM.addCompilerUsedGlobal(Entry);
   }
 
-  return CGF.Builder.CreateLoad(Entry);
+  return CGF.Builder.CreateAlignedLoad(Entry, Align);
 }
 
 /// GetClass - Return a reference to the class for the given interface
@@ -6727,14 +6914,15 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
   // ...
   // Create and init a super structure; this is a (receiver, class)
   // pair we will pass to objc_msgSendSuper.
-  llvm::Value *ObjCSuper =
-    CGF.CreateTempAlloca(ObjCTypes.SuperTy, "objc_super");
+  Address ObjCSuper =
+    CGF.CreateTempAlloca(ObjCTypes.SuperTy, CGF.getPointerAlign(),
+                         "objc_super");
 
   llvm::Value *ReceiverAsObject =
     CGF.Builder.CreateBitCast(Receiver, ObjCTypes.ObjectPtrTy);
   CGF.Builder.CreateStore(
       ReceiverAsObject,
-      CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 0));
+      CGF.Builder.CreateStructGEP(ObjCSuper, 0, CharUnits::Zero()));
 
   // If this is a class message the metaclass is passed as the target.
   llvm::Value *Target;
@@ -6749,22 +6937,33 @@ CGObjCNonFragileABIMac::GenerateMessageSendSuper(CodeGen::CodeGenFunction &CGF,
     CGM.getTypes().ConvertType(CGF.getContext().getObjCClassType());
   Target = CGF.Builder.CreateBitCast(Target, ClassTy);
   CGF.Builder.CreateStore(
-      Target, CGF.Builder.CreateStructGEP(ObjCTypes.SuperTy, ObjCSuper, 1));
+      Target, CGF.Builder.CreateStructGEP(ObjCSuper, 1, CGF.getPointerSize()));
 
   return (isVTableDispatchedSelector(Sel))
     ? EmitVTableMessageSend(CGF, Return, ResultType, Sel,
-                            ObjCSuper, ObjCTypes.SuperPtrCTy,
+                            ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy,
                             true, CallArgs, Method)
     : EmitMessageSend(CGF, Return, ResultType,
                       EmitSelector(CGF, Sel),
-                      ObjCSuper, ObjCTypes.SuperPtrCTy,
-                      true, CallArgs, Method, ObjCTypes);
+                      ObjCSuper.getPointer(), ObjCTypes.SuperPtrCTy,
+                      true, CallArgs, Method, Class, ObjCTypes);
 }
 
 llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF,
-                                                  Selector Sel, bool lval) {
+                                                  Selector Sel) {
+  Address Addr = EmitSelectorAddr(CGF, Sel);
+
+  llvm::LoadInst* LI = CGF.Builder.CreateLoad(Addr);
+  LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), 
+                  llvm::MDNode::get(VMContext, None));
+  return LI;
+}
+
+Address CGObjCNonFragileABIMac::EmitSelectorAddr(CodeGenFunction &CGF,
+                                                 Selector Sel) {
   llvm::GlobalVariable *&Entry = SelectorReferences[Sel];
 
+  CharUnits Align = CGF.getPointerAlign();
   if (!Entry) {
     llvm::Constant *Casted =
       llvm::ConstantExpr::getBitCast(GetMethodVarName(Sel),
@@ -6774,23 +6973,19 @@ llvm::Value *CGObjCNonFragileABIMac::EmitSelector(CodeGenFunction &CGF,
                                      Casted, "OBJC_SELECTOR_REFERENCES_");
     Entry->setExternallyInitialized(true);
     Entry->setSection("__DATA, __objc_selrefs, literal_pointers, no_dead_strip");
+    Entry->setAlignment(Align.getQuantity());
     CGM.addCompilerUsedGlobal(Entry);
   }
 
-  if (lval)
-    return Entry;
-  llvm::LoadInst* LI = CGF.Builder.CreateLoad(Entry);
-  
-  LI->setMetadata(CGM.getModule().getMDKindID("invariant.load"), 
-                  llvm::MDNode::get(VMContext, None));
-  return LI;
+  return Address(Entry, Align);
 }
+
 /// EmitObjCIvarAssign - Code gen for assigning to a __strong object.
 /// objc_assign_ivar (id src, id *dst, ptrdiff_t)
 ///
 void CGObjCNonFragileABIMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
                                                 llvm::Value *src,
-                                                llvm::Value *dst,
+                                                Address dst,
                                                 llvm::Value *ivarOffset) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
@@ -6802,7 +6997,7 @@ void CGObjCNonFragileABIMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst, ivarOffset };
+  llvm::Value *args[] = { src, dst.getPointer(), ivarOffset };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignIvarFn(), args);
 }
 
@@ -6811,7 +7006,7 @@ void CGObjCNonFragileABIMac::EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
 ///
 void CGObjCNonFragileABIMac::EmitObjCStrongCastAssign(
   CodeGen::CodeGenFunction &CGF,
-  llvm::Value *src, llvm::Value *dst) {
+  llvm::Value *src, Address dst) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
     unsigned Size = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -6822,19 +7017,19 @@ void CGObjCNonFragileABIMac::EmitObjCStrongCastAssign(
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignStrongCastFn(),
                               args, "weakassign");
 }
 
 void CGObjCNonFragileABIMac::EmitGCMemmoveCollectable(
   CodeGen::CodeGenFunction &CGF,
-  llvm::Value *DestPtr,
-  llvm::Value *SrcPtr,
+  Address DestPtr,
+  Address SrcPtr,
   llvm::Value *Size) {
   SrcPtr = CGF.Builder.CreateBitCast(SrcPtr, ObjCTypes.Int8PtrTy);
   DestPtr = CGF.Builder.CreateBitCast(DestPtr, ObjCTypes.Int8PtrTy);
-  llvm::Value *args[] = { DestPtr, SrcPtr, Size };
+  llvm::Value *args[] = { DestPtr.getPointer(), SrcPtr.getPointer(), Size };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.GcMemmoveCollectableFn(), args);
 }
 
@@ -6843,13 +7038,12 @@ void CGObjCNonFragileABIMac::EmitGCMemmoveCollectable(
 ///
 llvm::Value * CGObjCNonFragileABIMac::EmitObjCWeakRead(
   CodeGen::CodeGenFunction &CGF,
-  llvm::Value *AddrWeakObj) {
-  llvm::Type* DestTy =
-    cast<llvm::PointerType>(AddrWeakObj->getType())->getElementType();
+  Address AddrWeakObj) {
+  llvm::Type *DestTy = AddrWeakObj.getElementType();
   AddrWeakObj = CGF.Builder.CreateBitCast(AddrWeakObj, ObjCTypes.PtrObjectPtrTy);
   llvm::Value *read_weak =
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcReadWeakFn(),
-                                AddrWeakObj, "weakread");
+                                AddrWeakObj.getPointer(), "weakread");
   read_weak = CGF.Builder.CreateBitCast(read_weak, DestTy);
   return read_weak;
 }
@@ -6858,7 +7052,7 @@ llvm::Value * CGObjCNonFragileABIMac::EmitObjCWeakRead(
 /// objc_assign_weak (id src, id *dst)
 ///
 void CGObjCNonFragileABIMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                                                llvm::Value *src, llvm::Value *dst) {
+                                                llvm::Value *src, Address dst) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
     unsigned Size = CGM.getDataLayout().getTypeAllocSize(SrcTy);
@@ -6869,7 +7063,7 @@ void CGObjCNonFragileABIMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignWeakFn(),
                               args, "weakassign");
 }
@@ -6878,7 +7072,7 @@ void CGObjCNonFragileABIMac::EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
 /// objc_assign_global (id src, id *dst)
 ///
 void CGObjCNonFragileABIMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                                          llvm::Value *src, llvm::Value *dst,
+                                          llvm::Value *src, Address dst,
                                           bool threadlocal) {
   llvm::Type * SrcTy = src->getType();
   if (!isa<llvm::PointerType>(SrcTy)) {
@@ -6890,7 +7084,7 @@ void CGObjCNonFragileABIMac::EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
   }
   src = CGF.Builder.CreateBitCast(src, ObjCTypes.ObjectPtrTy);
   dst = CGF.Builder.CreateBitCast(dst, ObjCTypes.PtrObjectPtrTy);
-  llvm::Value *args[] = { src, dst };
+  llvm::Value *args[] = { src, dst.getPointer() };
   if (!threadlocal)
     CGF.EmitNounwindRuntimeCall(ObjCTypes.getGcAssignGlobalFn(),
                                 args, "globalassign");
@@ -7043,6 +7237,7 @@ CodeGen::CreateMacObjCRuntime(CodeGen::CodeGenModule &CGM) {
 
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
     return new CGObjCNonFragileABIMac(CGM);
 
   case ObjCRuntime::GNUstep:
diff --git a/lib/CodeGen/CGObjCRuntime.cpp b/lib/CodeGen/CGObjCRuntime.cpp
index 2ac6bb2e8a93..7be9ae996040 100644
--- a/lib/CodeGen/CGObjCRuntime.cpp
+++ b/lib/CodeGen/CGObjCRuntime.cpp
@@ -136,12 +136,13 @@ LValue CGObjCRuntime::EmitValueForIvarAtOffset(CodeGen::CodeGenFunction &CGF,
                              CGF.CGM.getContext().toBits(StorageSize),
                              CharUnits::fromQuantity(0)));
 
-  V = CGF.Builder.CreateBitCast(V,
-                                llvm::Type::getIntNPtrTy(CGF.getLLVMContext(),
+  Address Addr(V, Alignment);
+  Addr = CGF.Builder.CreateElementBitCast(Addr,
+                                   llvm::Type::getIntNTy(CGF.getLLVMContext(),
                                                          Info->StorageSize));
-  return LValue::MakeBitfield(V, *Info,
+  return LValue::MakeBitfield(Addr, *Info,
                               IvarTy.withCVRQualifiers(CVRQualifiers),
-                              Alignment);
+                              AlignmentSource::Decl);
 }
 
 namespace {
@@ -152,7 +153,7 @@ namespace {
     llvm::Constant *TypeInfo;
   };
 
-  struct CallObjCEndCatch : EHScopeStack::Cleanup {
+  struct CallObjCEndCatch final : EHScopeStack::Cleanup {
     CallObjCEndCatch(bool MightThrow, llvm::Value *Fn) :
       MightThrow(MightThrow), Fn(Fn) {}
     bool MightThrow;
@@ -255,24 +256,7 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
       llvm::Value *CastExn = CGF.Builder.CreateBitCast(Exn, CatchType);
 
       CGF.EmitAutoVarDecl(*CatchParam);
-
-      llvm::Value *CatchParamAddr = CGF.GetAddrOfLocalVar(CatchParam);
-
-      switch (CatchParam->getType().getQualifiers().getObjCLifetime()) {
-      case Qualifiers::OCL_Strong:
-        CastExn = CGF.EmitARCRetainNonBlock(CastExn);
-        // fallthrough
-
-      case Qualifiers::OCL_None:
-      case Qualifiers::OCL_ExplicitNone:
-      case Qualifiers::OCL_Autoreleasing:
-        CGF.Builder.CreateStore(CastExn, CatchParamAddr);
-        break;
-
-      case Qualifiers::OCL_Weak:
-        CGF.EmitARCInitWeak(CatchParamAddr, CastExn);
-        break;
-      }
+      EmitInitOfCatchParam(CGF, CastExn, CatchParam);
     }
 
     CGF.ObjCEHValueStack.push_back(Exn);
@@ -296,8 +280,32 @@ void CGObjCRuntime::EmitTryCatchStmt(CodeGenFunction &CGF,
     CGF.EmitBlock(Cont.getBlock());
 }
 
+void CGObjCRuntime::EmitInitOfCatchParam(CodeGenFunction &CGF,
+                                         llvm::Value *exn,
+                                         const VarDecl *paramDecl) {
+
+  Address paramAddr = CGF.GetAddrOfLocalVar(paramDecl);
+
+  switch (paramDecl->getType().getQualifiers().getObjCLifetime()) {
+  case Qualifiers::OCL_Strong:
+    exn = CGF.EmitARCRetainNonBlock(exn);
+    // fallthrough
+
+  case Qualifiers::OCL_None:
+  case Qualifiers::OCL_ExplicitNone:
+  case Qualifiers::OCL_Autoreleasing:
+    CGF.Builder.CreateStore(exn, paramAddr);
+    return;
+
+  case Qualifiers::OCL_Weak:
+    CGF.EmitARCInitWeak(paramAddr, exn);
+    return;
+  }
+  llvm_unreachable("invalid ownership qualifier");
+}
+
 namespace {
-  struct CallSyncExit : EHScopeStack::Cleanup {
+  struct CallSyncExit final : EHScopeStack::Cleanup {
     llvm::Value *SyncExitFn;
     llvm::Value *SyncArg;
     CallSyncExit(llvm::Value *SyncExitFn, llvm::Value *SyncArg)
diff --git a/lib/CodeGen/CGObjCRuntime.h b/lib/CodeGen/CGObjCRuntime.h
index 475254649866..28d88dd10be9 100644
--- a/lib/CodeGen/CGObjCRuntime.h
+++ b/lib/CodeGen/CGObjCRuntime.h
@@ -100,6 +100,10 @@ protected:
                         llvm::Constant *beginCatchFn,
                         llvm::Constant *endCatchFn,
                         llvm::Constant *exceptionRethrowFn);
+
+  void EmitInitOfCatchParam(CodeGenFunction &CGF, llvm::Value *exn,
+                            const VarDecl *paramDecl);
+
   /// Emits an \@synchronize() statement, using the \p syncEnterFn and
   /// \p syncExitFn arguments as the functions called to lock and unlock
   /// the object.  This function can be called by subclasses that use
@@ -116,11 +120,16 @@ public:
   /// this compilation unit with the runtime library.
   virtual llvm::Function *ModuleInitFunction() = 0;
 
-  /// Get a selector for the specified name and type values. The
-  /// return value should have the LLVM type for pointer-to
+  /// Get a selector for the specified name and type values.
+  /// The result should have the LLVM type for ASTContext::getObjCSelType().
+  virtual llvm::Value *GetSelector(CodeGenFunction &CGF, Selector Sel) = 0;
+
+  /// Get the address of a selector for the specified name and type values.
+  /// This is a rarely-used language extension, but sadly it exists.
+  ///
+  /// The result should have the LLVM type for a pointer to
   /// ASTContext::getObjCSelType().
-  virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
-                                   Selector Sel, bool lval=false) = 0;
+  virtual Address GetAddrOfSelector(CodeGenFunction &CGF, Selector Sel) = 0;
 
   /// Get a typed selector.
   virtual llvm::Value *GetSelector(CodeGenFunction &CGF,
@@ -133,7 +142,7 @@ public:
   virtual llvm::Constant *GetEHType(QualType T) = 0;
 
   /// Generate a constant string object.
-  virtual llvm::Constant *GenerateConstantString(const StringLiteral *) = 0;
+  virtual ConstantAddress GenerateConstantString(const StringLiteral *) = 0;
   
   /// Generate a category.  A category contains a list of methods (and
   /// accompanying metadata) and a list of protocols.
@@ -238,17 +247,17 @@ public:
                              const ObjCAtThrowStmt &S,
                              bool ClearInsertionPoint=true) = 0;
   virtual llvm::Value *EmitObjCWeakRead(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *AddrWeakObj) = 0;
+                                        Address AddrWeakObj) = 0;
   virtual void EmitObjCWeakAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dest) = 0;
+                                  llvm::Value *src, Address dest) = 0;
   virtual void EmitObjCGlobalAssign(CodeGen::CodeGenFunction &CGF,
-                                    llvm::Value *src, llvm::Value *dest,
+                                    llvm::Value *src, Address dest,
                                     bool threadlocal=false) = 0;
   virtual void EmitObjCIvarAssign(CodeGen::CodeGenFunction &CGF,
-                                  llvm::Value *src, llvm::Value *dest,
+                                  llvm::Value *src, Address dest,
                                   llvm::Value *ivarOffset) = 0;
   virtual void EmitObjCStrongCastAssign(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *src, llvm::Value *dest) = 0;
+                                        llvm::Value *src, Address dest) = 0;
 
   virtual LValue EmitObjCValueForIvar(CodeGen::CodeGenFunction &CGF,
                                       QualType ObjectTy,
@@ -259,15 +268,18 @@ public:
                                       const ObjCInterfaceDecl *Interface,
                                       const ObjCIvarDecl *Ivar) = 0;
   virtual void EmitGCMemmoveCollectable(CodeGen::CodeGenFunction &CGF,
-                                        llvm::Value *DestPtr,
-                                        llvm::Value *SrcPtr,
+                                        Address DestPtr,
+                                        Address SrcPtr,
                                         llvm::Value *Size) = 0;
   virtual llvm::Constant *BuildGCBlockLayout(CodeGen::CodeGenModule &CGM,
                                   const CodeGen::CGBlockInfo &blockInfo) = 0;
   virtual llvm::Constant *BuildRCBlockLayout(CodeGen::CodeGenModule &CGM,
                                   const CodeGen::CGBlockInfo &blockInfo) = 0;
+
+  /// Returns an i8* which points to the byref layout information.
   virtual llvm::Constant *BuildByrefLayout(CodeGen::CodeGenModule &CGM,
                                            QualType T) = 0;
+
   virtual llvm::GlobalVariable *GetClassGlobal(const std::string &Name,
                                                bool Weak = false) = 0;
 
diff --git a/lib/CodeGen/CGOpenCLRuntime.cpp b/lib/CodeGen/CGOpenCLRuntime.cpp
index 079ef7234d9c..8af39ceecdfe 100644
--- a/lib/CodeGen/CGOpenCLRuntime.cpp
+++ b/lib/CodeGen/CGOpenCLRuntime.cpp
@@ -55,6 +55,28 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
   case BuiltinType::OCLImage2dArray:
     return llvm::PointerType::get(llvm::StructType::create(
                            Ctx, "opencl.image2d_array_t"), ImgAddrSpc);
+  case BuiltinType::OCLImage2dDepth:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_depth_t"), ImgAddrSpc);
+  case BuiltinType::OCLImage2dArrayDepth:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_array_depth_t"),
+        ImgAddrSpc);
+  case BuiltinType::OCLImage2dMSAA:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_msaa_t"), ImgAddrSpc);
+  case BuiltinType::OCLImage2dArrayMSAA:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_t"),
+        ImgAddrSpc);
+  case BuiltinType::OCLImage2dMSAADepth:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_msaa_depth_t"),
+        ImgAddrSpc);
+  case BuiltinType::OCLImage2dArrayMSAADepth:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.image2d_array_msaa_depth_t"),
+        ImgAddrSpc);
   case BuiltinType::OCLImage3d:
     return llvm::PointerType::get(llvm::StructType::create(
                            Ctx, "opencl.image3d_t"), ImgAddrSpc);
@@ -63,5 +85,17 @@ llvm::Type *CGOpenCLRuntime::convertOpenCLSpecificType(const Type *T) {
   case BuiltinType::OCLEvent:
     return llvm::PointerType::get(llvm::StructType::create(
                            Ctx, "opencl.event_t"), 0);
+  case BuiltinType::OCLClkEvent:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.clk_event_t"), 0);
+  case BuiltinType::OCLQueue:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.queue_t"), 0);
+  case BuiltinType::OCLNDRange:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.ndrange_t"), 0);
+  case BuiltinType::OCLReserveID:
+    return llvm::PointerType::get(
+        llvm::StructType::create(Ctx, "opencl.reserve_id_t"), 0);
   }
 }
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 81488398bb86..0ba7e0639acc 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -41,25 +41,29 @@ public:
     /// \brief Region for constructs that do not require function outlining,
     /// like 'for', 'sections', 'atomic' etc. directives.
     InlinedRegion,
+    /// \brief Region with outlined function for standalone 'target' directive.
+    TargetRegion,
   };
 
   CGOpenMPRegionInfo(const CapturedStmt &CS,
                      const CGOpenMPRegionKind RegionKind,
-                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
+                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
+                     bool HasCancel)
       : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
-        CodeGen(CodeGen), Kind(Kind) {}
+        CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
 
   CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
-                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind)
+                     const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
+                     bool HasCancel)
       : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
-        Kind(Kind) {}
+        Kind(Kind), HasCancel(HasCancel) {}
 
   /// \brief Get a variable or parameter for storing global thread id
   /// inside OpenMP construct.
   virtual const VarDecl *getThreadIDVariable() const = 0;
 
   /// \brief Emit the captured statement body.
-  virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
+  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
 
   /// \brief Get an LValue for the current ThreadID variable.
   /// \return LValue for thread id variable. This LValue always has type int32*.
@@ -69,6 +73,8 @@ public:
 
   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
 
+  bool hasCancel() const { return HasCancel; }
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return Info->getKind() == CR_OpenMP;
   }
@@ -77,6 +83,7 @@ protected:
   CGOpenMPRegionKind RegionKind;
   const RegionCodeGenTy &CodeGen;
   OpenMPDirectiveKind Kind;
+  bool HasCancel;
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
@@ -84,8 +91,9 @@ class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
                              const RegionCodeGenTy &CodeGen,
-                             OpenMPDirectiveKind Kind)
-      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind),
+                             OpenMPDirectiveKind Kind, bool HasCancel)
+      : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
+                           HasCancel),
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
@@ -114,8 +122,8 @@ public:
   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
                                  const RegionCodeGenTy &CodeGen,
-                                 OpenMPDirectiveKind Kind)
-      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind),
+                                 OpenMPDirectiveKind Kind, bool HasCancel)
+      : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
         ThreadIDVar(ThreadIDVar) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
@@ -147,8 +155,9 @@ class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
 public:
   CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
                             const RegionCodeGenTy &CodeGen,
-                            OpenMPDirectiveKind Kind)
-      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind), OldCSI(OldCSI),
+                            OpenMPDirectiveKind Kind, bool HasCancel)
+      : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
+        OldCSI(OldCSI),
         OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
   // \brief Retrieve the value of the context parameter.
   llvm::Value *getContextValue() const override {
@@ -156,7 +165,7 @@ public:
       return OuterRegionInfo->getContextValue();
     llvm_unreachable("No context value for inlined OpenMP region");
   }
-  virtual void setContextValue(llvm::Value *V) override {
+  void setContextValue(llvm::Value *V) override {
     if (OuterRegionInfo) {
       OuterRegionInfo->setContextValue(V);
       return;
@@ -204,6 +213,29 @@ private:
   CGOpenMPRegionInfo *OuterRegionInfo;
 };
 
+/// \brief API for captured statement code generation in OpenMP target
+/// constructs. For this captures, implicit parameters are used instead of the
+/// captured fields.
+class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+public:
+  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
+                           const RegionCodeGenTy &CodeGen)
+      : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
+                           /*HasCancel = */ false) {}
+
+  /// \brief This is unused for target regions because each starts executing
+  /// with a single thread.
+  const VarDecl *getThreadIDVariable() const override { return nullptr; }
+
+  /// \brief Get the name of the capture helper.
+  StringRef getHelperName() const override { return ".omp_offloading."; }
+
+  static bool classof(const CGCapturedStmtInfo *Info) {
+    return CGOpenMPRegionInfo::classof(Info) &&
+           cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
+  }
+};
+
 /// \brief RAII for emitting code of OpenMP constructs.
 class InlinedOpenMPRegionRAII {
   CodeGenFunction &CGF;
@@ -214,11 +246,11 @@ public:
   /// a list of functions used for code generation of implicitly inlined
   /// regions.
   InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
-                          OpenMPDirectiveKind Kind)
+                          OpenMPDirectiveKind Kind, bool HasCancel)
       : CGF(CGF) {
     // Start emission for the construct.
-    CGF.CapturedStmtInfo =
-        new CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, CodeGen, Kind);
+    CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
+        CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
   }
   ~InlinedOpenMPRegionRAII() {
     // Restore original CapturedStmtInfo only if we're done with code emission.
@@ -229,20 +261,25 @@ public:
   }
 };
 
-} // namespace
+} // anonymous namespace
+
+static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr,
+                                      QualType Ty) {
+  AlignmentSource Source;
+  CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source);
+  return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align),
+                            Ty->getPointeeType(), Source);
+}
 
 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
-  return CGF.MakeNaturalAlignAddrLValue(
-      CGF.Builder.CreateAlignedLoad(
-          CGF.GetAddrOfLocalVar(getThreadIDVariable()),
-          CGF.PointerAlignInBytes),
-      getThreadIDVariable()
-          ->getType()
-          ->castAs<PointerType>()
-          ->getPointeeType());
+  return emitLoadOfPointerLValue(CGF,
+                                 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+                                 getThreadIDVariable()->getType());
 }
 
 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
   // top and a single exit at the bottom.
@@ -258,9 +295,9 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
 
 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
     CodeGenFunction &CGF) {
-  return CGF.MakeNaturalAlignAddrLValue(
-      CGF.GetAddrOfLocalVar(getThreadIDVariable()),
-      getThreadIDVariable()->getType());
+  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
+                            getThreadIDVariable()->getType(),
+                            AlignmentSource::Decl);
 }
 
 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
@@ -280,6 +317,25 @@ void CGOpenMPRuntime::clear() {
   InternalVars.clear();
 }
 
+// Layout information for ident_t.
+static CharUnits getIdentAlign(CodeGenModule &CGM) {
+  return CGM.getPointerAlign();
+}
+static CharUnits getIdentSize(CodeGenModule &CGM) {
+  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
+  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
+}
+static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
+  // All the fields except the last are i32, so this works beautifully.
+  return unsigned(Field) * CharUnits::fromQuantity(4);
+}
+static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
+                                   CGOpenMPRuntime::IdentFieldIndex Field,
+                                   const llvm::Twine &Name = "") {
+  auto Offset = getOffsetOfIdentField(Field);
+  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
+}
+
 llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
@@ -287,9 +343,17 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
          "thread id variable must be of type kmp_int32 *");
   const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind);
+  bool HasCancel = false;
+  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
+    HasCancel = OPD->hasCancel();
+  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
+    HasCancel = OPSD->hasCancel();
+  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
+    HasCancel = OPFD->hasCancel();
+  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
+                                    HasCancel);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateCapturedStmtFunction(*CS);
+  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
 }
 
 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
@@ -300,13 +364,14 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
   CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
-                                        InnermostKind);
+                                        InnermostKind,
+                                        cast<OMPTaskDirective>(D).hasCancel());
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
   return CGF.GenerateCapturedStmtFunction(*CS);
 }
 
-llvm::Value *
-CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
+  CharUnits Align = getIdentAlign(CGM);
   llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
   if (!Entry) {
     if (!DefaultOpenMPPSource) {
@@ -315,7 +380,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
       // Taken from
       // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
       DefaultOpenMPPSource =
-          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
+          CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
       DefaultOpenMPPSource =
           llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
     }
@@ -323,6 +388,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
         CGM.getModule(), IdentTy, /*isConstant*/ true,
         llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
     DefaultOpenMPLocation->setUnnamedAddr(true);
+    DefaultOpenMPLocation->setAlignment(Align.getQuantity());
 
     llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
     llvm::Constant *Values[] = {Zero,
@@ -330,10 +396,9 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
                                 Zero, Zero, DefaultOpenMPPSource};
     llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
     DefaultOpenMPLocation->setInitializer(Init);
-    OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
-    return DefaultOpenMPLocation;
+    OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
   }
-  return Entry;
+  return Address(Entry, Align);
 }
 
 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
@@ -342,34 +407,33 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
   // If no debug info is generated - return global default location.
   if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
       Loc.isInvalid())
-    return getOrCreateDefaultLocation(Flags);
+    return getOrCreateDefaultLocation(Flags).getPointer();
 
   assert(CGF.CurFn && "No function in current CodeGenFunction.");
 
-  llvm::Value *LocValue = nullptr;
+  Address LocValue = Address::invalid();
   auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
   if (I != OpenMPLocThreadIDMap.end())
-    LocValue = I->second.DebugLoc;
+    LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
+
   // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
   // GetOpenMPThreadID was called before this routine.
-  if (LocValue == nullptr) {
+  if (!LocValue.isValid()) {
     // Generate "ident_t .kmpc_loc.addr;"
-    llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
-    AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
+    Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
+                                      ".kmpc_loc.addr");
     auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
-    Elem.second.DebugLoc = AI;
+    Elem.second.DebugLoc = AI.getPointer();
     LocValue = AI;
 
     CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
     CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
     CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
-                             llvm::ConstantExpr::getSizeOf(IdentTy),
-                             CGM.PointerAlignInBytes);
+                             CGM.getSize(getIdentSize(CGF.CGM)));
   }
 
   // char **psource = &.kmpc_loc_<flags>.addr.psource;
-  auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
-                                                         IdentField_PSource);
+  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
 
   auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
   if (OMPDebugLoc == nullptr) {
@@ -389,7 +453,9 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
   // *psource = ";<File>;<Function>;<Line>;<Column>;;";
   CGF.Builder.CreateStore(OMPDebugLoc, PSource);
 
-  return LocValue;
+  // Our callers always pass this to a runtime function, so for
+  // convenience, go ahead and return a naked pointer.
+  return LocValue.getPointer();
 }
 
 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
@@ -493,6 +559,17 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
     break;
   }
+  case OMPRTL__kmpc_critical_with_hint: {
+    // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
+    // kmp_critical_name *crit, uintptr_t hint);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
+                                llvm::PointerType::getUnqual(KmpCriticalNameTy),
+                                CGM.IntPtrTy};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
+    break;
+  }
   case OMPRTL__kmpc_threadprivate_register: {
     // Build void __kmpc_threadprivate_register(ident_t *, void *data,
     // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
@@ -838,10 +915,46 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
     break;
   }
+  case OMPRTL__tgt_target: {
+    // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+    // *arg_types);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrPtrTy,
+                                CGM.VoidPtrPtrTy,
+                                CGM.SizeTy->getPointerTo(),
+                                CGM.Int32Ty->getPointerTo()};
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
+    break;
+  }
   }
   return RTLFn;
 }
 
+static llvm::Value *getTypeSize(CodeGenFunction &CGF, QualType Ty) {
+  auto &C = CGF.getContext();
+  llvm::Value *Size = nullptr;
+  auto SizeInChars = C.getTypeSizeInChars(Ty);
+  if (SizeInChars.isZero()) {
+    // getTypeSizeInChars() returns 0 for a VLA.
+    while (auto *VAT = C.getAsVariableArrayType(Ty)) {
+      llvm::Value *ArraySize;
+      std::tie(ArraySize, Ty) = CGF.getVLASize(VAT);
+      Size = Size ? CGF.Builder.CreateNUWMul(Size, ArraySize) : ArraySize;
+    }
+    SizeInChars = C.getTypeSizeInChars(Ty);
+    assert(!SizeInChars.isZero());
+    Size = CGF.Builder.CreateNUWMul(
+        Size, llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity()));
+  } else
+    Size = llvm::ConstantInt::get(CGF.SizeTy, SizeInChars.getQuantity());
+  return Size;
+}
+
 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
                                                              bool IVSigned) {
   assert((IVSize == 32 || IVSize == 64) &&
@@ -939,25 +1052,27 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
                                      Twine(CGM.getMangledName(VD)) + ".cache.");
 }
 
-llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
-                                                     const VarDecl *VD,
-                                                     llvm::Value *VDAddr,
-                                                     SourceLocation Loc) {
+Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                                const VarDecl *VD,
+                                                Address VDAddr,
+                                                SourceLocation Loc) {
   if (CGM.getLangOpts().OpenMPUseTLS &&
       CGM.getContext().getTargetInfo().isTLSSupported())
     return VDAddr;
 
-  auto VarTy = VDAddr->getType()->getPointerElementType();
+  auto VarTy = VDAddr.getElementType();
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
-                         CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
+                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
+                                                       CGM.Int8PtrTy),
                          CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
                          getOrCreateThreadPrivateCache(VD)};
-  return CGF.EmitRuntimeCall(
-      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
+  return Address(CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
+                 VDAddr.getAlignment());
 }
 
 void CGOpenMPRuntime::emitThreadPrivateVarInit(
-    CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
+    CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
     llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
   // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
   // library.
@@ -967,14 +1082,15 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
   // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
   // to register constructor/destructor for variable.
   llvm::Value *Args[] = {OMPLoc,
-                         CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
+                         CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
+                                                       CGM.VoidPtrTy),
                          Ctor, CopyCtor, Dtor};
   CGF.EmitRuntimeCall(
       createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
 }
 
 llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
-    const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
+    const VarDecl *VD, Address VDAddr, SourceLocation Loc,
     bool PerformInit, CodeGenFunction *CGF) {
   if (CGM.getLangOpts().OpenMPUseTLS &&
       CGM.getContext().getTargetInfo().isTLSSupported())
@@ -1001,21 +1117,19 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
           /*isVariadic=*/false);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
-          FTy, ".__kmpc_global_ctor_.", Loc);
+          FTy, ".__kmpc_global_ctor_.", FI, Loc);
       CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
                             Args, SourceLocation());
       auto ArgVal = CtorCGF.EmitLoadOfScalar(
-          CtorCGF.GetAddrOfLocalVar(&Dst),
-          /*Volatile=*/false, CGM.PointerAlignInBytes,
+          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
           CGM.getContext().VoidPtrTy, Dst.getLocation());
-      auto Arg = CtorCGF.Builder.CreatePointerCast(
-          ArgVal,
-          CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
+      Address Arg = Address(ArgVal, VDAddr.getAlignment());
+      Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
+                                             CtorCGF.ConvertTypeForMem(ASTTy));
       CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
                                /*IsInitializer=*/true);
       ArgVal = CtorCGF.EmitLoadOfScalar(
-          CtorCGF.GetAddrOfLocalVar(&Dst),
-          /*Volatile=*/false, CGM.PointerAlignInBytes,
+          CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
           CGM.getContext().VoidPtrTy, Dst.getLocation());
       CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
       CtorCGF.FinishFunction();
@@ -1035,14 +1149,13 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
           /*isVariadic=*/false);
       auto FTy = CGM.getTypes().GetFunctionType(FI);
       auto Fn = CGM.CreateGlobalInitOrDestructFunction(
-          FTy, ".__kmpc_global_dtor_.", Loc);
+          FTy, ".__kmpc_global_dtor_.", FI, Loc);
       DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
                             SourceLocation());
       auto ArgVal = DtorCGF.EmitLoadOfScalar(
           DtorCGF.GetAddrOfLocalVar(&Dst),
-          /*Volatile=*/false, CGM.PointerAlignInBytes,
-          CGM.getContext().VoidPtrTy, Dst.getLocation());
-      DtorCGF.emitDestroy(ArgVal, ASTTy,
+          /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
+      DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
                           DtorCGF.getDestroyer(ASTTy.isDestructedType()),
                           DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
       DtorCGF.FinishFunction();
@@ -1074,7 +1187,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
       auto InitFunctionTy =
           llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
       auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
-          InitFunctionTy, ".__omp_threadprivate_init_.");
+          InitFunctionTy, ".__omp_threadprivate_init_.",
+          CGM.getTypes().arrangeNullaryFunction());
       CodeGenFunction InitCGF(CGM);
       FunctionArgList ArgList;
       InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
@@ -1149,25 +1263,27 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
 
 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
                                        llvm::Value *OutlinedFn,
-                                       llvm::Value *CapturedStruct,
+                                       ArrayRef<llvm::Value *> CapturedVars,
                                        const Expr *IfCond) {
+  if (!CGF.HaveInsertPoint())
+    return;
   auto *RTLoc = emitUpdateLocation(CGF, Loc);
-  auto &&ThenGen =
-      [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) {
-        // Build call __kmpc_fork_call(loc, 1, microtask,
-        // captured_struct/*context*/)
-        llvm::Value *Args[] = {
-            RTLoc,
-            CGF.Builder.getInt32(
-                1), // Number of arguments after 'microtask' argument
-            // (there is only one additional argument - 'context')
-            CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
-            CGF.EmitCastToVoidPtr(CapturedStruct)};
-        auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
-        CGF.EmitRuntimeCall(RTLFn, Args);
-      };
-  auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc](
-      CodeGenFunction &CGF) {
+  auto &&ThenGen = [this, OutlinedFn, CapturedVars,
+                    RTLoc](CodeGenFunction &CGF) {
+    // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
+    llvm::Value *Args[] = {
+        RTLoc,
+        CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
+        CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
+    llvm::SmallVector<llvm::Value *, 16> RealArgs;
+    RealArgs.append(std::begin(Args), std::end(Args));
+    RealArgs.append(CapturedVars.begin(), CapturedVars.end());
+
+    auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
+    CGF.EmitRuntimeCall(RTLFn, RealArgs);
+  };
+  auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc,
+                    Loc](CodeGenFunction &CGF) {
     auto ThreadID = getThreadID(CGF, Loc);
     // Build calls:
     // __kmpc_serialized_parallel(&Loc, GTid);
@@ -1177,11 +1293,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
 
     // OutlinedFn(&GTid, &zero, CapturedStruct);
     auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
-    auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
-                                                          /*Signed*/ true);
-    auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
+    Address ZeroAddr =
+      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                           /*Name*/ ".zero.addr");
     CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-    llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
+    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
     CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
 
     // __kmpc_end_serialized_parallel(&Loc, GTid);
@@ -1203,8 +1322,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
 // regular serial code region, get thread ID by calling kmp_int32
 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
 // return the address of that temp.
-llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
-                                                  SourceLocation Loc) {
+Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
+                                             SourceLocation Loc) {
   if (auto OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
     if (OMPRegionInfo->getThreadIDVariable())
@@ -1215,7 +1334,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
   auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
   CGF.EmitStoreOfScalar(ThreadID,
-                        CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
+                        CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
 
   return ThreadIDTemp;
 }
@@ -1246,7 +1365,7 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
 }
 
 namespace {
-template <size_t N> class CallEndCleanup : public EHScopeStack::Cleanup {
+template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup {
   llvm::Value *Callee;
   llvm::Value *Args[N];
 
@@ -1257,39 +1376,50 @@ public:
     std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args));
   }
   void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+    if (!CGF.HaveInsertPoint())
+      return;
     CGF.EmitRuntimeCall(Callee, Args);
   }
 };
-} // namespace
+} // anonymous namespace
 
 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
                                          StringRef CriticalName,
                                          const RegionCodeGenTy &CriticalOpGen,
-                                         SourceLocation Loc) {
-  // __kmpc_critical(ident_t *, gtid, Lock);
+                                         SourceLocation Loc, const Expr *Hint) {
+  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
   // CriticalOpGen();
   // __kmpc_end_critical(ident_t *, gtid, Lock);
   // Prepare arguments and build a call to __kmpc_critical
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
-                           getCriticalRegionLock(CriticalName)};
+  if (!CGF.HaveInsertPoint())
+    return;
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
+                         getCriticalRegionLock(CriticalName)};
+  if (Hint) {
+    llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args),
+                                                     std::end(Args));
+    auto *HintVal = CGF.EmitScalarExpr(Hint);
+    ArgsWithHint.push_back(
+        CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false));
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint),
+                        ArgsWithHint);
+  } else
     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
-    // Build a call to __kmpc_end_critical
-    CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
-        llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
-  }
+  // Build a call to __kmpc_end_critical
+  CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
+      NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
+      llvm::makeArrayRef(Args));
+  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
 }
 
 static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
-                       OpenMPDirectiveKind Kind,
+                       OpenMPDirectiveKind Kind, SourceLocation Loc,
                        const RegionCodeGenTy &BodyOpGen) {
   llvm::Value *CallBool = CGF.EmitScalarConversion(
       IfCond,
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true),
-      CGF.getContext().BoolTy);
+      CGF.getContext().BoolTy, Loc);
 
   auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
   auto *ContBlock = CGF.createBasicBlock("omp_if.end");
@@ -1305,6 +1435,8 @@ static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond,
 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
                                        const RegionCodeGenTy &MasterOpGen,
                                        SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // if(__kmpc_master(ident_t *, gtid)) {
   //   MasterOpGen();
   //   __kmpc_end_master(ident_t *, gtid);
@@ -1315,17 +1447,20 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
       MasterCallEndCleanup;
-  emitIfStmt(CGF, IsMaster, OMPD_master, [&](CodeGenFunction &CGF) -> void {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
-        llvm::makeArrayRef(Args));
-    MasterOpGen(CGF);
-  });
+  emitIfStmt(
+      CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void {
+        CodeGenFunction::RunCleanupsScope Scope(CGF);
+        CGF.EHStack.pushCleanup<MasterCallEndCleanup>(
+            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
+            llvm::makeArrayRef(Args));
+        MasterOpGen(CGF);
+      });
 }
 
 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
                                         SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
   llvm::Value *Args[] = {
       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
@@ -1336,6 +1471,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
                                           const RegionCodeGenTy &TaskgroupOpGen,
                                           SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // __kmpc_taskgroup(ident_t *, gtid);
   // TaskgroupOpGen();
   // __kmpc_end_taskgroup(ident_t *, gtid);
@@ -1352,6 +1489,21 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
   }
 }
 
+/// Given an array of pointers to variables, project the address of a
+/// given variable.
+static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
+                                      unsigned Index, const VarDecl *Var) {
+  // Pull out the pointer to the variable.
+  Address PtrAddr =
+      CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
+  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
+
+  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
+  Addr = CGF.Builder.CreateElementBitCast(
+      Addr, CGF.ConvertTypeForMem(Var->getType()));
+  return Addr;
+}
+
 static llvm::Value *emitCopyprivateCopyFunction(
     CodeGenModule &CGM, llvm::Type *ArgsType,
     ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
@@ -1371,40 +1523,31 @@ static llvm::Value *emitCopyprivateCopyFunction(
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.copyprivate.copy_func", &CGM.getModule());
-  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
   CodeGenFunction CGF(CGM);
   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
   // Dest = (void*[n])(LHSArg);
   // Src = (void*[n])(RHSArg);
-  auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
-                                    CGF.PointerAlignInBytes),
-      ArgsType);
-  auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
-                                    CGF.PointerAlignInBytes),
-      ArgsType);
+  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
+      ArgsType), CGF.getPointerAlign());
+  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
+      ArgsType), CGF.getPointerAlign());
   // *(Type0*)Dst[0] = *(Type0*)Src[0];
   // *(Type1*)Dst[1] = *(Type1*)Src[1];
   // ...
   // *(Typen*)Dst[n] = *(Typen*)Src[n];
   for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
-    auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        CGF.Builder.CreateAlignedLoad(
-            CGF.Builder.CreateStructGEP(nullptr, LHS, I),
-            CGM.PointerAlignInBytes),
-        CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
-    auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        CGF.Builder.CreateAlignedLoad(
-            CGF.Builder.CreateStructGEP(nullptr, RHS, I),
-            CGM.PointerAlignInBytes),
-        CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+    auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
+    Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
+
+    auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
+    Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
+
     auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
     QualType Type = VD->getType();
-    CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
-                    cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
-                    cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
-                    AssignmentOps[I]);
+    CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
   }
   CGF.FinishFunction();
   return Fn;
@@ -1417,6 +1560,8 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
                                        ArrayRef<const Expr *> SrcExprs,
                                        ArrayRef<const Expr *> DstExprs,
                                        ArrayRef<const Expr *> AssignmentOps) {
+  if (!CGF.HaveInsertPoint())
+    return;
   assert(CopyprivateVars.size() == SrcExprs.size() &&
          CopyprivateVars.size() == DstExprs.size() &&
          CopyprivateVars.size() == AssignmentOps.size());
@@ -1430,13 +1575,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   // <copy_func>, did_it);
 
-  llvm::AllocaInst *DidIt = nullptr;
+  Address DidIt = Address::invalid();
   if (!CopyprivateVars.empty()) {
     // int32 did_it = 0;
     auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
     DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
-    CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
-                                   DidIt->getAlignment());
+    CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
   }
   // Prepare arguments and build a call to __kmpc_single
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
@@ -1444,52 +1588,51 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
   typedef CallEndCleanup<std::extent<decltype(Args)>::value>
       SingleCallEndCleanup;
-  emitIfStmt(CGF, IsSingle, OMPD_single, [&](CodeGenFunction &CGF) -> void {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
-        NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
-        llvm::makeArrayRef(Args));
-    SingleOpGen(CGF);
-    if (DidIt) {
-      // did_it = 1;
-      CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
-                                     DidIt->getAlignment());
-    }
-  });
+  emitIfStmt(
+      CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void {
+        CodeGenFunction::RunCleanupsScope Scope(CGF);
+        CGF.EHStack.pushCleanup<SingleCallEndCleanup>(
+            NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
+            llvm::makeArrayRef(Args));
+        SingleOpGen(CGF);
+        if (DidIt.isValid()) {
+          // did_it = 1;
+          CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
+        }
+      });
   // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
   // <copy_func>, did_it);
-  if (DidIt) {
+  if (DidIt.isValid()) {
     llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
     auto CopyprivateArrayTy =
         C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
                                /*IndexTypeQuals=*/0);
     // Create a list of all private variables for copyprivate.
-    auto *CopyprivateList =
+    Address CopyprivateList =
         CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
     for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
-      auto *Elem = CGF.Builder.CreateStructGEP(
-          CopyprivateList->getAllocatedType(), CopyprivateList, I);
-      CGF.Builder.CreateAlignedStore(
+      Address Elem = CGF.Builder.CreateConstArrayGEP(
+          CopyprivateList, I, CGF.getPointerSize());
+      CGF.Builder.CreateStore(
           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
-          Elem, CGM.PointerAlignInBytes);
+              CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
+          Elem);
     }
     // Build function that copies private values from single region to all other
     // threads in the corresponding parallel region.
     auto *CpyFn = emitCopyprivateCopyFunction(
         CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
         CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
-    auto *BufSize = llvm::ConstantInt::get(
-        CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
-    auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
-                                                               CGF.VoidPtrTy);
-    auto *DidItVal =
-        CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
+    auto *BufSize = getTypeSize(CGF, CopyprivateArrayTy);
+    Address CL =
+      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
+                                                      CGF.VoidPtrTy);
+    auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
     llvm::Value *Args[] = {
         emitUpdateLocation(CGF, Loc), // ident_t *<loc>
         getThreadID(CGF, Loc),        // i32 <gtid>
         BufSize,                      // size_t <buf_size>
-        CL,                           // void *<copyprivate list>
+        CL.getPointer(),              // void *<copyprivate list>
         CpyFn,                        // void (*) (void *, void *) <copy_func>
         DidItVal                      // i32 did_it
     };
@@ -1499,26 +1642,30 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
 
 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
                                         const RegionCodeGenTy &OrderedOpGen,
-                                        SourceLocation Loc) {
+                                        SourceLocation Loc, bool IsThreads) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // __kmpc_ordered(ident_t *, gtid);
   // OrderedOpGen();
   // __kmpc_end_ordered(ident_t *, gtid);
   // Prepare arguments and build a call to __kmpc_ordered
-  {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
+  CodeGenFunction::RunCleanupsScope Scope(CGF);
+  if (IsThreads) {
     llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
     CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args);
     // Build a call to __kmpc_end_ordered
     CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>(
         NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered),
         llvm::makeArrayRef(Args));
-    emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
   }
+  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
 }
 
 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                                      OpenMPDirectiveKind Kind,
-                                      bool CheckForCancel) {
+                                      OpenMPDirectiveKind Kind, bool EmitChecks,
+                                      bool ForceSimpleCall) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call __kmpc_cancel_barrier(loc, thread_id);
   // Build call __kmpc_barrier(loc, thread_id);
   OpenMPLocationFlags Flags = OMP_IDENT_KMPC;
@@ -1538,16 +1685,19 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
   }
   // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
   // thread_id);
+  auto *OMPRegionInfo =
+      dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
+  // Do not emit barrier call in the single directive emitted in some rare cases
+  // for sections directives.
+  if (OMPRegionInfo && OMPRegionInfo->getDirectiveKind() == OMPD_single)
+    return;
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
                          getThreadID(CGF, Loc)};
-  if (auto *OMPRegionInfo =
-          dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    auto CancelDestination =
-        CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
-    if (CancelDestination.isValid()) {
+  if (OMPRegionInfo) {
+    if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
       auto *Result = CGF.EmitRuntimeCall(
           createRuntimeFunction(OMPRTL__kmpc_cancel_barrier), Args);
-      if (CheckForCancel) {
+      if (EmitChecks) {
         // if (__kmpc_cancel_barrier()) {
         //   exit from construct;
         // }
@@ -1557,6 +1707,8 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
         CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
         CGF.EmitBlock(ExitBB);
         //   exit from construct;
+        auto CancelDestination =
+            CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
         CGF.EmitBranchThroughCleanup(CancelDestination);
         CGF.EmitBlock(ContBB, /*IsFinished=*/true);
       }
@@ -1623,65 +1775,87 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   return Schedule != OMP_sch_static;
 }
 
-void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
-                                  OpenMPScheduleClauseKind ScheduleKind,
-                                  unsigned IVSize, bool IVSigned, bool Ordered,
-                                  llvm::Value *IL, llvm::Value *LB,
-                                  llvm::Value *UB, llvm::Value *ST,
-                                  llvm::Value *Chunk) {
+void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
+                                          SourceLocation Loc,
+                                          OpenMPScheduleClauseKind ScheduleKind,
+                                          unsigned IVSize, bool IVSigned,
+                                          bool Ordered, llvm::Value *UB,
+                                          llvm::Value *Chunk) {
+  if (!CGF.HaveInsertPoint())
+    return;
   OpenMPSchedType Schedule =
       getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
-  if (Ordered ||
-      (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
-       Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
-    // Call __kmpc_dispatch_init(
-    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
-    //          kmp_int[32|64] lower, kmp_int[32|64] upper,
-    //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
+  assert(Ordered ||
+         (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
+          Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
+  // Call __kmpc_dispatch_init(
+  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
+  //          kmp_int[32|64] lower, kmp_int[32|64] upper,
+  //          kmp_int[32|64] stride, kmp_int[32|64] chunk);
 
+  // If the Chunk was not specified in the clause - use default value 1.
+  if (Chunk == nullptr)
+    Chunk = CGF.Builder.getIntN(IVSize, 1);
+  llvm::Value *Args[] = {
+    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+    getThreadID(CGF, Loc),
+    CGF.Builder.getInt32(Schedule), // Schedule type
+    CGF.Builder.getIntN(IVSize, 0), // Lower
+    UB,                             // Upper
+    CGF.Builder.getIntN(IVSize, 1), // Stride
+    Chunk                           // Chunk
+  };
+  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
+}
+
+void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
+                                        SourceLocation Loc,
+                                        OpenMPScheduleClauseKind ScheduleKind,
+                                        unsigned IVSize, bool IVSigned,
+                                        bool Ordered, Address IL, Address LB,
+                                        Address UB, Address ST,
+                                        llvm::Value *Chunk) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  OpenMPSchedType Schedule =
+    getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
+  assert(!Ordered);
+  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
+         Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
+
+  // Call __kmpc_for_static_init(
+  //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+  //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+  //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+  //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
+  if (Chunk == nullptr) {
+    assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
+           "expected static non-chunked schedule");
     // If the Chunk was not specified in the clause - use default value 1.
-    if (Chunk == nullptr)
       Chunk = CGF.Builder.getIntN(IVSize, 1);
-    llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                            getThreadID(CGF, Loc),
-                            CGF.Builder.getInt32(Schedule), // Schedule type
-                            CGF.Builder.getIntN(IVSize, 0), // Lower
-                            UB,                             // Upper
-                            CGF.Builder.getIntN(IVSize, 1), // Stride
-                            Chunk                           // Chunk
-    };
-    CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
   } else {
-    // Call __kmpc_for_static_init(
-    //          ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
-    //          kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
-    //          kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
-    //          kmp_int[32|64] incr, kmp_int[32|64] chunk);
-    if (Chunk == nullptr) {
-      assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
-             "expected static non-chunked schedule");
-      // If the Chunk was not specified in the clause - use default value 1.
-      Chunk = CGF.Builder.getIntN(IVSize, 1);
-    } else
-      assert((Schedule == OMP_sch_static_chunked ||
-              Schedule == OMP_ord_static_chunked) &&
-             "expected static chunked schedule");
-    llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
-                            getThreadID(CGF, Loc),
-                            CGF.Builder.getInt32(Schedule), // Schedule type
-                            IL,                             // &isLastIter
-                            LB,                             // &LB
-                            UB,                             // &UB
-                            ST,                             // &Stride
-                            CGF.Builder.getIntN(IVSize, 1), // Incr
-                            Chunk                           // Chunk
-    };
-    CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
+    assert((Schedule == OMP_sch_static_chunked ||
+            Schedule == OMP_ord_static_chunked) &&
+           "expected static chunked schedule");
   }
+  llvm::Value *Args[] = {
+    emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+    getThreadID(CGF, Loc),
+    CGF.Builder.getInt32(Schedule), // Schedule type
+    IL.getPointer(),                // &isLastIter
+    LB.getPointer(),                // &LB
+    UB.getPointer(),                // &UB
+    ST.getPointer(),                // &Stride
+    CGF.Builder.getIntN(IVSize, 1), // Incr
+    Chunk                           // Chunk
+  };
+  CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
 }
 
 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
                                           SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                          getThreadID(CGF, Loc)};
@@ -1693,6 +1867,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
                                                  SourceLocation Loc,
                                                  unsigned IVSize,
                                                  bool IVSigned) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
                          getThreadID(CGF, Loc)};
@@ -1701,30 +1877,32 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
 
 llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
                                           SourceLocation Loc, unsigned IVSize,
-                                          bool IVSigned, llvm::Value *IL,
-                                          llvm::Value *LB, llvm::Value *UB,
-                                          llvm::Value *ST) {
+                                          bool IVSigned, Address IL,
+                                          Address LB, Address UB,
+                                          Address ST) {
   // Call __kmpc_dispatch_next(
   //          ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
   //          kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
   //          kmp_int[32|64] *p_stride);
   llvm::Value *Args[] = {
       emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
-      IL, // &isLastIter
-      LB, // &Lower
-      UB, // &Upper
-      ST  // &Stride
+      IL.getPointer(), // &isLastIter
+      LB.getPointer(), // &Lower
+      UB.getPointer(), // &Upper
+      ST.getPointer()  // &Stride
   };
   llvm::Value *Call =
       CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
   return CGF.EmitScalarConversion(
       Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
-      CGF.getContext().BoolTy);
+      CGF.getContext().BoolTy, Loc);
 }
 
 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
                                            llvm::Value *NumThreads,
                                            SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
   llvm::Value *Args[] = {
       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
@@ -1736,6 +1914,8 @@ void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
                                          OpenMPProcBindClauseKind ProcBind,
                                          SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Constants for proc bind value accepted by the runtime.
   enum ProcBindTy {
     ProcBindFalse = 0,
@@ -1768,6 +1948,8 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
 
 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
                                 SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call void __kmpc_flush(ident_t *loc)
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_flush),
                       emitUpdateLocation(CGF, Loc));
@@ -1785,7 +1967,7 @@ enum KmpTaskTFields {
   /// \brief Function with call of destructors for private variables.
   KmpTaskTDestructors,
 };
-} // namespace
+} // anonymous namespace
 
 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
   if (!KmpRoutineEntryPtrTy) {
@@ -1799,14 +1981,15 @@ void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
   }
 }
 
-static void addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
-                                 QualType FieldTy) {
+static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
+                                       QualType FieldTy) {
   auto *Field = FieldDecl::Create(
       C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
       C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
       /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
   Field->setAccess(AS_public);
   DC->addDecl(Field);
+  return Field;
 }
 
 namespace {
@@ -1820,11 +2003,10 @@ struct PrivateHelpersTy {
   const VarDecl *PrivateElemInit;
 };
 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
-} // namespace
+} // anonymous namespace
 
 static RecordDecl *
-createPrivatesRecordDecl(CodeGenModule &CGM,
-                         const ArrayRef<PrivateDataTy> Privates) {
+createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
   if (!Privates.empty()) {
     auto &C = CGM.getContext();
     // Build struct .kmp_privates_t. {
@@ -1833,9 +2015,16 @@ createPrivatesRecordDecl(CodeGenModule &CGM,
     auto *RD = C.buildImplicitRecord(".kmp_privates.t");
     RD->startDefinition();
     for (auto &&Pair : Privates) {
-      auto Type = Pair.second.Original->getType();
+      auto *VD = Pair.second.Original;
+      auto Type = VD->getType();
       Type = Type.getNonReferenceType();
-      addFieldToRecordDecl(C, RD, Type);
+      auto *FD = addFieldToRecordDecl(C, RD, Type);
+      if (VD->hasAttrs()) {
+        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+             E(VD->getAttrs().end());
+             I != E; ++I)
+          FD->addAttr(*I);
+      }
     }
     RD->completeDefinition();
     return RD;
@@ -1865,7 +2054,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty,
 
 static RecordDecl *
 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
-                                     const ArrayRef<PrivateDataTy> Privates) {
+                                     ArrayRef<PrivateDataTy> Privates) {
   auto &C = CGM.getContext();
   // Build struct kmp_task_t_with_privates {
   //         kmp_task_t task_data;
@@ -1900,7 +2089,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   FunctionArgList Args;
   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+                                /*Id=*/nullptr,
+                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
@@ -1911,7 +2101,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   auto *TaskEntry =
       llvm::Function::Create(TaskEntryTy, llvm::GlobalValue::InternalLinkage,
                              ".omp_task_entry.", &CGM.getModule());
-  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskEntryFnInfo, TaskEntry);
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
   CodeGenFunction CGF(CGM);
   CGF.disableDebugInfo();
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
@@ -1919,12 +2109,9 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
   // tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
-      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
-      C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
-  auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
-      CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
-  LValue TDBase =
-      CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
+      CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
+  LValue TDBase = emitLoadOfPointerLValue(
+      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   LValue Base =
@@ -1945,7 +2132,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
     auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
     PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        PrivatesLVal.getAddress(), CGF.VoidPtrTy);
+        PrivatesLVal.getPointer(), CGF.VoidPtrTy);
   } else {
     PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
@@ -1955,7 +2142,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
-      CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
+      CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
   CGF.FinishFunction();
   return TaskEntry;
 }
@@ -1969,7 +2156,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   FunctionArgList Args;
   ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty);
   ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc,
-                                /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy);
+                                /*Id=*/nullptr,
+                                KmpTaskTWithPrivatesPtrQTy.withRestrict());
   Args.push_back(&GtidArg);
   Args.push_back(&TaskTypeArg);
   FunctionType::ExtInfo Info;
@@ -1980,16 +2168,15 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
   auto *DestructorFn =
       llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
                              ".omp_task_destructor.", &CGM.getModule());
-  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, DestructorFnInfo, DestructorFn);
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
+                                    DestructorFnInfo);
   CodeGenFunction CGF(CGM);
   CGF.disableDebugInfo();
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
                     Args);
 
-  auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
-      CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
-  LValue Base =
-      CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
+  LValue Base = emitLoadOfPointerLValue(
+      CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy);
   auto *KmpTaskTWithPrivatesQTyRD =
       cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
   auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
@@ -2017,10 +2204,10 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
 /// \endcode
 static llvm::Value *
 emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
-                               const ArrayRef<const Expr *> PrivateVars,
-                               const ArrayRef<const Expr *> FirstprivateVars,
+                               ArrayRef<const Expr *> PrivateVars,
+                               ArrayRef<const Expr *> FirstprivateVars,
                                QualType PrivatesQTy,
-                               const ArrayRef<PrivateDataTy> Privates) {
+                               ArrayRef<PrivateDataTy> Privates) {
   auto &C = CGM.getContext();
   FunctionArgList Args;
   ImplicitParamDecl TaskPrivatesArg(
@@ -2058,8 +2245,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
   auto *TaskPrivatesMap = llvm::Function::Create(
       TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
       ".omp_task_privates_map.", &CGM.getModule());
-  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, TaskPrivatesMapFnInfo,
-                                TaskPrivatesMap);
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
+                                    TaskPrivatesMapFnInfo);
   TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
   CodeGenFunction CGF(CGM);
   CGF.disableDebugInfo();
@@ -2067,22 +2254,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
                     TaskPrivatesMapFnInfo, Args);
 
   // *privi = &.privates.privi;
-  auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
-      CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
-  LValue Base =
-      CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
+  LValue Base = emitLoadOfPointerLValue(
+      CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType());
   auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
   Counter = 0;
   for (auto *Field : PrivatesQTyRD->fields()) {
     auto FieldLVal = CGF.EmitLValueForField(Base, Field);
     auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
-    auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
-                                                  VD->getType());
-    auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
-    CGF.EmitStoreOfScalar(
-        FieldLVal.getAddress(),
-        CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
-                                       RefLVal.getType()->getPointeeType()));
+    auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+    auto RefLoadLVal =
+        emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType());
+    CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
     ++Counter;
   }
   CGF.FinishFunction();
@@ -2097,13 +2279,15 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
 void CGOpenMPRuntime::emitTaskCall(
     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-    llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+    llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
     const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
     ArrayRef<const Expr *> PrivateCopies,
     ArrayRef<const Expr *> FirstprivateVars,
     ArrayRef<const Expr *> FirstprivateCopies,
     ArrayRef<const Expr *> FirstprivateInits,
     ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) {
+  if (!CGF.HaveInsertPoint())
+    return;
   auto &C = CGM.getContext();
   llvm::SmallVector<PrivateDataTy, 8> Privates;
   // Aggregate privates and sort them by the alignment.
@@ -2111,7 +2295,7 @@ void CGOpenMPRuntime::emitTaskCall(
   for (auto *E : PrivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
-        C.getTypeAlignInChars(VD->getType()),
+        C.getDeclAlign(VD),
         PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
                          /*PrivateElemInit=*/nullptr)));
     ++I;
@@ -2121,7 +2305,7 @@ void CGOpenMPRuntime::emitTaskCall(
   for (auto *E : FirstprivateVars) {
     auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
     Privates.push_back(std::make_pair(
-        C.getTypeAlignInChars(VD->getType()),
+        C.getDeclAlign(VD),
         PrivateHelpersTy(
             VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
             cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
@@ -2146,8 +2330,7 @@ void CGOpenMPRuntime::emitTaskCall(
       C.getPointerType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
   auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
-  auto KmpTaskTWithPrivatesTySize =
-      CGM.getSize(C.getTypeSizeInChars(KmpTaskTWithPrivatesQTy));
+  auto *KmpTaskTWithPrivatesTySize = getTypeSize(CGF, KmpTaskTWithPrivatesQTy);
   QualType SharedsPtrTy = C.getPointerType(SharedsTy);
 
   // Emit initial values for private copies (if any).
@@ -2188,12 +2371,12 @@ void CGOpenMPRuntime::emitTaskCall(
                                      CGF.Builder.getInt32(/*C=*/0))
           : CGF.Builder.getInt32(Final.getInt() ? FinalFlag : 0);
   TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
-  auto SharedsSize = C.getTypeSizeInChars(SharedsTy);
-  llvm::Value *AllocArgs[] = {
-      emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), TaskFlags,
-      KmpTaskTWithPrivatesTySize, CGM.getSize(SharedsSize),
-      CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskEntry,
-                                                      KmpRoutineEntryPtrTy)};
+  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
+  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
+                              getThreadID(CGF, Loc), TaskFlags,
+                              KmpTaskTWithPrivatesTySize, SharedsSize,
+                              CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                                  TaskEntry, KmpRoutineEntryPtrTy)};
   auto *NewTask = CGF.EmitRuntimeCall(
       createRuntimeFunction(OMPRTL__kmpc_omp_task_alloc), AllocArgs);
   auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -2204,12 +2387,15 @@ void CGOpenMPRuntime::emitTaskCall(
       CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
   // Fill the data in the resulting kmp_task_t record.
   // Copy shareds if there are any.
-  llvm::Value *KmpTaskSharedsPtr = nullptr;
+  Address KmpTaskSharedsPtr = Address::invalid();
   if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
-    KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
-        CGF.EmitLValueForField(
-            TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
-        Loc);
+    KmpTaskSharedsPtr =
+        Address(CGF.EmitLoadOfScalar(
+                    CGF.EmitLValueForField(
+                        TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
+                                           KmpTaskTShareds)),
+                    Loc),
+                CGF.getNaturalTypeAlignment(SharedsTy));
     CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
   }
   // Emit initial values for private copies (if any).
@@ -2220,7 +2406,7 @@ void CGOpenMPRuntime::emitTaskCall(
     FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
     LValue SharedsBase;
     if (!FirstprivateVars.empty()) {
-      SharedsBase = CGF.MakeNaturalAlignAddrLValue(
+      SharedsBase = CGF.MakeAddrLValue(
           CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
               KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
           SharedsTy);
@@ -2237,6 +2423,9 @@ void CGOpenMPRuntime::emitTaskCall(
           auto *SharedField = CapturesInfo.lookup(OriginalVD);
           auto SharedRefLValue =
               CGF.EmitLValueForField(SharedsBase, SharedField);
+          SharedRefLValue = CGF.MakeAddrLValue(
+              Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
+              SharedRefLValue.getType(), AlignmentSource::Decl);
           QualType Type = OriginalVD->getType();
           if (Type->isArrayType()) {
             // Initialize firstprivate array.
@@ -2251,10 +2440,10 @@ void CGOpenMPRuntime::emitTaskCall(
               CGF.EmitOMPAggregateAssign(
                   PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
                   Type, [&CGF, Elem, Init, &CapturesInfo](
-                            llvm::Value *DestElement, llvm::Value *SrcElement) {
+                            Address DestElement, Address SrcElement) {
                     // Clean up any temporaries needed by the initialization.
                     CodeGenFunction::OMPPrivateScope InitScope(CGF);
-                    InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
+                    InitScope.addPrivate(Elem, [SrcElement]() -> Address {
                       return SrcElement;
                     });
                     (void)InitScope.Privatize();
@@ -2268,7 +2457,7 @@ void CGOpenMPRuntime::emitTaskCall(
             }
           } else {
             CodeGenFunction::OMPPrivateScope InitScope(CGF);
-            InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
+            InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
               return SharedRefLValue.getAddress();
             });
             (void)InitScope.Privatize();
@@ -2298,15 +2487,15 @@ void CGOpenMPRuntime::emitTaskCall(
                         Destructor);
 
   // Process list of dependences.
-  llvm::Value *DependInfo = nullptr;
-  unsigned DependencesNumber = Dependences.size();
-  if (!Dependences.empty()) {
+  Address DependenciesArray = Address::invalid();
+  unsigned NumDependencies = Dependences.size();
+  if (NumDependencies) {
     // Dependence kind for RTL.
-    enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
+    enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
     enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
     RecordDecl *KmpDependInfoRD;
-    QualType FlagsTy = C.getIntTypeForBitwidth(
-        C.toBits(C.getTypeSizeInChars(C.BoolTy)), /*Signed=*/false);
+    QualType FlagsTy =
+        C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
     llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
     if (KmpDependInfoTy.isNull()) {
       KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
@@ -2319,25 +2508,37 @@ void CGOpenMPRuntime::emitTaskCall(
     } else {
       KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
     }
+    CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
     // Define type kmp_depend_info[<Dependences.size()>];
     QualType KmpDependInfoArrayTy = C.getConstantArrayType(
-        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
+        KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
         ArrayType::Normal, /*IndexTypeQuals=*/0);
     // kmp_depend_info[<Dependences.size()>] deps;
-    DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
-    for (unsigned i = 0; i < DependencesNumber; ++i) {
-      auto Addr = CGF.EmitLValue(Dependences[i].second);
-      auto *Size = llvm::ConstantInt::get(
-          CGF.SizeTy,
-          C.getTypeSizeInChars(Dependences[i].second->getType()).getQuantity());
-      auto Base = CGF.MakeNaturalAlignAddrLValue(
-          CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
+    DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+    for (unsigned i = 0; i < NumDependencies; ++i) {
+      const Expr *E = Dependences[i].second;
+      auto Addr = CGF.EmitLValue(E);
+      llvm::Value *Size;
+      QualType Ty = E->getType();
+      if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
+        LValue UpAddrLVal =
+            CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
+        llvm::Value *UpAddr =
+            CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
+        llvm::Value *LowIntPtr =
+            CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
+        llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
+        Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
+      } else
+        Size = getTypeSize(CGF, Ty);
+      auto Base = CGF.MakeAddrLValue(
+          CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
           KmpDependInfoTy);
       // deps[i].base_addr = &<Dependences[i].second>;
       auto BaseAddrLVal = CGF.EmitLValueForField(
           Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
       CGF.EmitStoreOfScalar(
-          CGF.Builder.CreatePtrToInt(Addr.getAddress(), CGF.IntPtrTy),
+          CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
           BaseAddrLVal);
       // deps[i].len = sizeof(<Dependences[i].second>);
       auto LenLVal = CGF.EmitLValueForField(
@@ -2349,12 +2550,13 @@ void CGOpenMPRuntime::emitTaskCall(
       case OMPC_DEPEND_in:
         DepKind = DepIn;
         break;
+      // Out and InOut dependencies must use the same code.
       case OMPC_DEPEND_out:
-        DepKind = DepOut;
-        break;
       case OMPC_DEPEND_inout:
         DepKind = DepInOut;
         break;
+      case OMPC_DEPEND_source:
+      case OMPC_DEPEND_sink:
       case OMPC_DEPEND_unknown:
         llvm_unreachable("Unknown task dependence type");
       }
@@ -2363,8 +2565,8 @@ void CGOpenMPRuntime::emitTaskCall(
       CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
                             FlagsLVal);
     }
-    DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
+    DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
         CGF.VoidPtrTy);
   }
 
@@ -2378,40 +2580,48 @@ void CGOpenMPRuntime::emitTaskCall(
   // list is not empty
   auto *ThreadID = getThreadID(CGF, Loc);
   auto *UpLoc = emitUpdateLocation(CGF, Loc);
-  llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
-  llvm::Value *DepTaskArgs[] = {
-      UpLoc,
-      ThreadID,
-      NewTask,
-      DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
-      DependInfo,
-      DependInfo ? CGF.Builder.getInt32(0) : nullptr,
-      DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
-  auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
-                        &DepTaskArgs](CodeGenFunction &CGF) {
-    // TODO: add check for untied tasks.
-    CGF.EmitRuntimeCall(
-        createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
-                                         : OMPRTL__kmpc_omp_task),
-        DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
+  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
+  llvm::Value *DepTaskArgs[7];
+  if (NumDependencies) {
+    DepTaskArgs[0] = UpLoc;
+    DepTaskArgs[1] = ThreadID;
+    DepTaskArgs[2] = NewTask;
+    DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
+    DepTaskArgs[4] = DependenciesArray.getPointer();
+    DepTaskArgs[5] = CGF.Builder.getInt32(0);
+    DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+  }
+  auto &&ThenCodeGen = [this, NumDependencies,
+                        &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
+    // TODO: add check for untied tasks.    
+    if (NumDependencies) {
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
+                          DepTaskArgs);
+    } else {
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
+                          TaskArgs);
+    }
   };
   typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
       IfCallEndCleanup;
-  llvm::Value *DepWaitTaskArgs[] = {
-      UpLoc,
-      ThreadID,
-      DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
-      DependInfo,
-      DependInfo ? CGF.Builder.getInt32(0) : nullptr,
-      DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
+
+  llvm::Value *DepWaitTaskArgs[6];
+  if (NumDependencies) {
+    DepWaitTaskArgs[0] = UpLoc;
+    DepWaitTaskArgs[1] = ThreadID;
+    DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
+    DepWaitTaskArgs[3] = DependenciesArray.getPointer();
+    DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
+    DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+  }
   auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
-                        DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
+                        NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
     CodeGenFunction::RunCleanupsScope LocalScope(CGF);
     // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
     // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
     // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
     // is specified.
-    if (DependInfo)
+    if (NumDependencies)
       CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
                           DepWaitTaskArgs);
     // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
@@ -2429,6 +2639,7 @@ void CGOpenMPRuntime::emitTaskCall(
     llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
     CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
   };
+
   if (IfCond) {
     emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
   } else {
@@ -2437,8 +2648,89 @@ void CGOpenMPRuntime::emitTaskCall(
   }
 }
 
+/// \brief Emit reduction operation for each element of array (required for
+/// array sections) LHS op = RHS.
+/// \param Type Type of array.
+/// \param LHSVar Variable on the left side of the reduction operation
+/// (references element of array in original variable).
+/// \param RHSVar Variable on the right side of the reduction operation
+/// (references element of array in original variable).
+/// \param RedOpGen Generator of reduction operation with use of LHSVar and
+/// RHSVar.
+static void EmitOMPAggregateReduction(
+    CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
+    const VarDecl *RHSVar,
+    const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
+                                  const Expr *, const Expr *)> &RedOpGen,
+    const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
+    const Expr *UpExpr = nullptr) {
+  // Perform element-by-element initialization.
+  QualType ElementTy;
+  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
+  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
+
+  // Drill down to the base element type on both arrays.
+  auto ArrayTy = Type->getAsArrayTypeUnsafe();
+  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
+
+  auto RHSBegin = RHSAddr.getPointer();
+  auto LHSBegin = LHSAddr.getPointer();
+  // Cast from pointer to array type to pointer to single element.
+  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
+  // The basic structure here is a while-do loop.
+  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
+  auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
+  auto IsEmpty =
+      CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
+  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+
+  // Enter the loop body, making that address the current address.
+  auto EntryBB = CGF.Builder.GetInsertBlock();
+  CGF.EmitBlock(BodyBB);
+
+  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
+
+  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
+      RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
+  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
+  Address RHSElementCurrent =
+      Address(RHSElementPHI,
+              RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
+      LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
+  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
+  Address LHSElementCurrent =
+      Address(LHSElementPHI,
+              LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+  // Emit copy.
+  CodeGenFunction::OMPPrivateScope Scope(CGF);
+  Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
+  Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
+  Scope.Privatize();
+  RedOpGen(CGF, XExpr, EExpr, UpExpr);
+  Scope.ForceCleanup();
+
+  // Shift the address forward by one element.
+  auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
+      LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+  auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
+      RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
+  // Check whether we've reached the end.
+  auto Done =
+      CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
+  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
+  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
+  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
+
+  // Done.
+  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
 static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
                                           llvm::Type *ArgsType,
+                                          ArrayRef<const Expr *> Privates,
                                           ArrayRef<const Expr *> LHSExprs,
                                           ArrayRef<const Expr *> RHSExprs,
                                           ArrayRef<const Expr *> ReductionOps) {
@@ -2458,48 +2750,66 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       ".omp.reduction.reduction_func", &CGM.getModule());
-  CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
   CodeGenFunction CGF(CGM);
   CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
 
   // Dst = (void*[n])(LHSArg);
   // Src = (void*[n])(RHSArg);
-  auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
-                                    CGF.PointerAlignInBytes),
-      ArgsType);
-  auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
-                                    CGF.PointerAlignInBytes),
-      ArgsType);
+  Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
+      ArgsType), CGF.getPointerAlign());
+  Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
+      ArgsType), CGF.getPointerAlign());
 
   //  ...
   //  *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
   //  ...
   CodeGenFunction::OMPPrivateScope Scope(CGF);
-  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
-    Scope.addPrivate(
-        cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
-        [&]() -> llvm::Value *{
-          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              CGF.Builder.CreateAlignedLoad(
-                  CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
-                  CGM.PointerAlignInBytes),
-              CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
-        });
-    Scope.addPrivate(
-        cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
-        [&]() -> llvm::Value *{
-          return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-              CGF.Builder.CreateAlignedLoad(
-                  CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
-                  CGM.PointerAlignInBytes),
-              CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
-        });
+  auto IPriv = Privates.begin();
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
+    auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
+    Scope.addPrivate(RHSVar, [&]() -> Address {
+      return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
+    });
+    auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
+    Scope.addPrivate(LHSVar, [&]() -> Address {
+      return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
+    });
+    QualType PrivTy = (*IPriv)->getType();
+    if (PrivTy->isArrayType()) {
+      // Get array size and emit VLA type.
+      ++Idx;
+      Address Elem =
+          CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
+      llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
+      CodeGenFunction::OpaqueValueMapping OpaqueMap(
+          CGF,
+          cast<OpaqueValueExpr>(
+              CGF.getContext().getAsVariableArrayType(PrivTy)->getSizeExpr()),
+          RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
+      CGF.EmitVariablyModifiedType(PrivTy);
+    }
   }
   Scope.Privatize();
+  IPriv = Privates.begin();
+  auto ILHS = LHSExprs.begin();
+  auto IRHS = RHSExprs.begin();
   for (auto *E : ReductionOps) {
-    CGF.EmitIgnoredExpr(E);
+    if ((*IPriv)->getType()->isArrayType()) {
+      // Emit reduction for array section.
+      auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+      auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+      EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+                                [=](CodeGenFunction &CGF, const Expr *,
+                                    const Expr *,
+                                    const Expr *) { CGF.EmitIgnoredExpr(E); });
+    } else
+      // Emit reduction for array subscript or single variable.
+      CGF.EmitIgnoredExpr(E);
+    ++IPriv, ++ILHS, ++IRHS;
   }
   Scope.ForceCleanup();
   CGF.FinishFunction();
@@ -2507,10 +2817,13 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
 }
 
 void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                                    ArrayRef<const Expr *> Privates,
                                     ArrayRef<const Expr *> LHSExprs,
                                     ArrayRef<const Expr *> RHSExprs,
                                     ArrayRef<const Expr *> ReductionOps,
                                     bool WithNowait, bool SimpleReduction) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Next code should be emitted for reduction:
   //
   // static kmp_critical_name lock = { 0 };
@@ -2550,32 +2863,68 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
 
   if (SimpleReduction) {
     CodeGenFunction::RunCleanupsScope Scope(CGF);
+    auto IPriv = Privates.begin();
+    auto ILHS = LHSExprs.begin();
+    auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      CGF.EmitIgnoredExpr(E);
+      if ((*IPriv)->getType()->isArrayType()) {
+        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+        EmitOMPAggregateReduction(
+            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
+                const Expr *) { CGF.EmitIgnoredExpr(E); });
+      } else
+        CGF.EmitIgnoredExpr(E);
+      ++IPriv, ++ILHS, ++IRHS;
     }
     return;
   }
 
   // 1. Build a list of reduction variables.
   // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
-  llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
+  auto Size = RHSExprs.size();
+  for (auto *E : Privates) {
+    if (E->getType()->isArrayType())
+      // Reserve place for array size.
+      ++Size;
+  }
+  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
   QualType ReductionArrayTy =
       C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
                              /*IndexTypeQuals=*/0);
-  auto *ReductionList =
+  Address ReductionList =
       CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
-    auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
-    CGF.Builder.CreateAlignedStore(
+  auto IPriv = Privates.begin();
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem =
+      CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
+    CGF.Builder.CreateStore(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-            CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
-        Elem, CGM.PointerAlignInBytes);
+            CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
+        Elem);
+    if ((*IPriv)->getType()->isArrayType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
+                                             CGF.getPointerSize());
+      CGF.Builder.CreateStore(
+          CGF.Builder.CreateIntToPtr(
+              CGF.Builder.CreateIntCast(
+                  CGF.getVLASize(CGF.getContext().getAsVariableArrayType(
+                                     (*IPriv)->getType()))
+                      .first,
+                  CGF.SizeTy, /*isSigned=*/false),
+              CGF.VoidPtrTy),
+          Elem);
+    }
   }
 
   // 2. Emit reduce_func().
   auto *ReductionFn = emitReductionFunction(
-      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
-      RHSExprs, ReductionOps);
+      CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+      LHSExprs, RHSExprs, ReductionOps);
 
   // 3. Create static kmp_critical_name lock = { 0 };
   auto *Lock = getCriticalRegionLock(".reduction");
@@ -2586,10 +2935,10 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
       CGF, Loc,
       static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
   auto *ThreadId = getThreadID(CGF, Loc);
-  auto *ReductionArrayTySize = llvm::ConstantInt::get(
-      CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
-  auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
-                                                             CGF.VoidPtrTy);
+  auto *ReductionArrayTySize = getTypeSize(CGF, ReductionArrayTy);
+  auto *RL =
+    CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
+                                                    CGF.VoidPtrTy);
   llvm::Value *Args[] = {
       IdentTLoc,                             // ident_t *<loc>
       ThreadId,                              // i32 <gtid>
@@ -2632,8 +2981,22 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
             createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
                                              : OMPRTL__kmpc_end_reduce),
             llvm::makeArrayRef(EndArgs));
+    auto IPriv = Privates.begin();
+    auto ILHS = LHSExprs.begin();
+    auto IRHS = RHSExprs.begin();
     for (auto *E : ReductionOps) {
-      CGF.EmitIgnoredExpr(E);
+      if ((*IPriv)->getType()->isArrayType()) {
+        // Emit reduction for array section.
+        auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+        auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+        EmitOMPAggregateReduction(
+            CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+            [=](CodeGenFunction &CGF, const Expr *, const Expr *,
+                const Expr *) { CGF.EmitIgnoredExpr(E); });
+      } else
+        // Emit reduction for array subscript or single variable.
+        CGF.EmitIgnoredExpr(E);
+      ++IPriv, ++ILHS, ++IRHS;
     }
   }
 
@@ -2663,62 +3026,84 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
               createRuntimeFunction(OMPRTL__kmpc_end_reduce),
               llvm::makeArrayRef(EndArgs));
     }
-    auto I = LHSExprs.begin();
+    auto ILHS = LHSExprs.begin();
+    auto IRHS = RHSExprs.begin();
+    auto IPriv = Privates.begin();
     for (auto *E : ReductionOps) {
-      const Expr *XExpr = nullptr;
-      const Expr *EExpr = nullptr;
-      const Expr *UpExpr = nullptr;
-      BinaryOperatorKind BO = BO_Comma;
-      if (auto *BO = dyn_cast<BinaryOperator>(E)) {
-        if (BO->getOpcode() == BO_Assign) {
-          XExpr = BO->getLHS();
-          UpExpr = BO->getRHS();
+        const Expr *XExpr = nullptr;
+        const Expr *EExpr = nullptr;
+        const Expr *UpExpr = nullptr;
+        BinaryOperatorKind BO = BO_Comma;
+        if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+          if (BO->getOpcode() == BO_Assign) {
+            XExpr = BO->getLHS();
+            UpExpr = BO->getRHS();
+          }
         }
-      }
-      // Try to emit update expression as a simple atomic.
-      auto *RHSExpr = UpExpr;
-      if (RHSExpr) {
-        // Analyze RHS part of the whole expression.
-        if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
-                RHSExpr->IgnoreParenImpCasts())) {
-          // If this is a conditional operator, analyze its condition for
-          // min/max reduction operator.
-          RHSExpr = ACO->getCond();
+        // Try to emit update expression as a simple atomic.
+        auto *RHSExpr = UpExpr;
+        if (RHSExpr) {
+          // Analyze RHS part of the whole expression.
+          if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
+                  RHSExpr->IgnoreParenImpCasts())) {
+            // If this is a conditional operator, analyze its condition for
+            // min/max reduction operator.
+            RHSExpr = ACO->getCond();
+          }
+          if (auto *BORHS =
+                  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
+            EExpr = BORHS->getRHS();
+            BO = BORHS->getOpcode();
+          }
         }
-        if (auto *BORHS =
-                dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
-          EExpr = BORHS->getRHS();
-          BO = BORHS->getOpcode();
-        }
-      }
-      if (XExpr) {
-        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
-        LValue X = CGF.EmitLValue(XExpr);
-        RValue E;
-        if (EExpr)
-          E = CGF.EmitAnyExpr(EExpr);
-        CGF.EmitOMPAtomicSimpleUpdateExpr(
-            X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
-            [&CGF, UpExpr, VD](RValue XRValue) {
-              CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
-              PrivateScope.addPrivate(
-                  VD, [&CGF, VD, XRValue]() -> llvm::Value *{
-                    auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
+        if (XExpr) {
+          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+          auto &&AtomicRedGen = [this, BO, VD, IPriv,
+                                 Loc](CodeGenFunction &CGF, const Expr *XExpr,
+                                      const Expr *EExpr, const Expr *UpExpr) {
+            LValue X = CGF.EmitLValue(XExpr);
+            RValue E;
+            if (EExpr)
+              E = CGF.EmitAnyExpr(EExpr);
+            CGF.EmitOMPAtomicSimpleUpdateExpr(
+                X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
+                [&CGF, UpExpr, VD, IPriv](RValue XRValue) {
+                  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+                  PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address {
+                    Address LHSTemp = CGF.CreateMemTemp(VD->getType());
                     CGF.EmitStoreThroughLValue(
-                        XRValue,
-                        CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
+                        XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType()));
                     return LHSTemp;
                   });
-              (void)PrivateScope.Privatize();
-              return CGF.EmitAnyExpr(UpExpr);
-            });
-      } else {
-        // Emit as a critical region.
-        emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
-          CGF.EmitIgnoredExpr(E);
-        }, Loc);
-      }
-      ++I;
+                  (void)PrivateScope.Privatize();
+                  return CGF.EmitAnyExpr(UpExpr);
+                });
+          };
+          if ((*IPriv)->getType()->isArrayType()) {
+            // Emit atomic reduction for array section.
+            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
+                                      AtomicRedGen, XExpr, EExpr, UpExpr);
+          } else
+            // Emit atomic reduction for array subscript or single variable.
+            AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
+        } else {
+          // Emit as a critical region.
+          auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *,
+                                             const Expr *, const Expr *) {
+            emitCriticalRegion(
+                CGF, ".atomic_reduction",
+                [E](CodeGenFunction &CGF) { CGF.EmitIgnoredExpr(E); }, Loc);
+          };
+          if ((*IPriv)->getType()->isArrayType()) {
+            auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+            auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+            EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
+                                      CritRedGen);
+          } else
+            CritRedGen(CGF, nullptr, nullptr, nullptr);
+        }
+      ++ILHS, ++IRHS, ++IPriv;
     }
   }
 
@@ -2728,6 +3113,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
 
 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
                                        SourceLocation Loc) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
   // global_tid);
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
@@ -2737,8 +3124,11 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
 
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
                                            OpenMPDirectiveKind InnerKind,
-                                           const RegionCodeGenTy &CodeGen) {
-  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind);
+                                           const RegionCodeGenTy &CodeGen,
+                                           bool HasCancel) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
   CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
 }
 
@@ -2770,13 +3160,15 @@ static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
 void CGOpenMPRuntime::emitCancellationPointCall(
     CodeGenFunction &CGF, SourceLocation Loc,
     OpenMPDirectiveKind CancelRegion) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
   // global_tid, kmp_int32 cncl_kind);
   if (auto *OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    auto CancelDest =
-        CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
-    if (CancelDest.isValid()) {
+    if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
+      return;
+    if (OMPRegionInfo->hasCancel()) {
       llvm::Value *Args[] = {
           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
@@ -2793,8 +3185,10 @@ void CGOpenMPRuntime::emitCancellationPointCall(
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
       // __kmpc_cancel_barrier();
-      emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
+      emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
+      auto CancelDest =
+          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
       CGF.EmitBranchThroughCleanup(CancelDest);
       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
     }
@@ -2802,14 +3196,18 @@ void CGOpenMPRuntime::emitCancellationPointCall(
 }
 
 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                                     const Expr *IfCond,
                                      OpenMPDirectiveKind CancelRegion) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
   // kmp_int32 cncl_kind);
   if (auto *OMPRegionInfo =
           dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
-    auto CancelDest =
-        CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
-    if (CancelDest.isValid()) {
+    if (OMPRegionInfo->getDirectiveKind() == OMPD_single)
+      return;
+    auto &&ThenGen = [this, Loc, CancelRegion,
+                      OMPRegionInfo](CodeGenFunction &CGF) {
       llvm::Value *Args[] = {
           emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
           CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
@@ -2826,11 +3224,332 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
       CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
       CGF.EmitBlock(ExitBB);
       // __kmpc_cancel_barrier();
-      emitBarrierCall(CGF, Loc, OMPD_unknown, /*CheckForCancel=*/false);
+      emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
       // exit from construct;
+      auto CancelDest =
+          CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
       CGF.EmitBranchThroughCleanup(CancelDest);
       CGF.EmitBlock(ContBB, /*IsFinished=*/true);
-    }
+    };
+    if (IfCond)
+      emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {});
+    else
+      ThenGen(CGF);
   }
 }
 
+llvm::Value *
+CGOpenMPRuntime::emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                            const RegionCodeGenTy &CodeGen) {
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+
+  CodeGenFunction CGF(CGM, true);
+  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen);
+  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
+  return CGF.GenerateOpenMPCapturedStmtFunction(CS);
+}
+
+void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D,
+                                     llvm::Value *OutlinedFn,
+                                     const Expr *IfCond, const Expr *Device,
+                                     ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  /// \brief Values for bit flags used to specify the mapping type for
+  /// offloading.
+  enum OpenMPOffloadMappingFlags {
+    /// \brief Allocate memory on the device and move data from host to device.
+    OMP_MAP_TO = 0x01,
+    /// \brief Allocate memory on the device and move data from device to host.
+    OMP_MAP_FROM = 0x02,
+    /// \brief The element passed to the device is a pointer.
+    OMP_MAP_PTR = 0x20,
+    /// \brief Pass the element to the device by value.
+    OMP_MAP_BYCOPY = 0x80,
+  };
+
+  enum OpenMPOffloadingReservedDeviceIDs {
+    /// \brief Device ID if the device was not defined, runtime should get it
+    /// from environment variables in the spec.
+    OMP_DEVICEID_UNDEF = -1,
+  };
+
+  auto &Ctx = CGF.getContext();
+
+  // Fill up the arrays with the all the captured variables.
+  SmallVector<llvm::Value *, 16> BasePointers;
+  SmallVector<llvm::Value *, 16> Pointers;
+  SmallVector<llvm::Value *, 16> Sizes;
+  SmallVector<unsigned, 16> MapTypes;
+
+  bool hasVLACaptures = false;
+
+  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
+  auto RI = CS.getCapturedRecordDecl()->field_begin();
+  // auto II = CS.capture_init_begin();
+  auto CV = CapturedVars.begin();
+  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
+                                            CE = CS.capture_end();
+       CI != CE; ++CI, ++RI, ++CV) {
+    StringRef Name;
+    QualType Ty;
+    llvm::Value *BasePointer;
+    llvm::Value *Pointer;
+    llvm::Value *Size;
+    unsigned MapType;
+
+    // VLA sizes are passed to the outlined region by copy.
+    if (CI->capturesVariableArrayType()) {
+      BasePointer = Pointer = *CV;
+      Size = getTypeSize(CGF, RI->getType());
+      // Copy to the device as an argument. No need to retrieve it.
+      MapType = OMP_MAP_BYCOPY;
+      hasVLACaptures = true;
+    } else if (CI->capturesThis()) {
+      BasePointer = Pointer = *CV;
+      const PointerType *PtrTy = cast<PointerType>(RI->getType().getTypePtr());
+      Size = getTypeSize(CGF, PtrTy->getPointeeType());
+      // Default map type.
+      MapType = OMP_MAP_TO | OMP_MAP_FROM;
+    } else if (CI->capturesVariableByCopy()) {
+      MapType = OMP_MAP_BYCOPY;
+      if (!RI->getType()->isAnyPointerType()) {
+        // If the field is not a pointer, we need to save the actual value and
+        // load it as a void pointer.
+        auto DstAddr = CGF.CreateMemTemp(
+            Ctx.getUIntPtrType(),
+            Twine(CI->getCapturedVar()->getName()) + ".casted");
+        LValue DstLV = CGF.MakeAddrLValue(DstAddr, Ctx.getUIntPtrType());
+
+        auto *SrcAddrVal = CGF.EmitScalarConversion(
+            DstAddr.getPointer(), Ctx.getPointerType(Ctx.getUIntPtrType()),
+            Ctx.getPointerType(RI->getType()), SourceLocation());
+        LValue SrcLV =
+            CGF.MakeNaturalAlignAddrLValue(SrcAddrVal, RI->getType());
+
+        // Store the value using the source type pointer.
+        CGF.EmitStoreThroughLValue(RValue::get(*CV), SrcLV);
+
+        // Load the value using the destination type pointer.
+        BasePointer = Pointer =
+            CGF.EmitLoadOfLValue(DstLV, SourceLocation()).getScalarVal();
+      } else {
+        MapType |= OMP_MAP_PTR;
+        BasePointer = Pointer = *CV;
+      }
+      Size = getTypeSize(CGF, RI->getType());
+    } else {
+      assert(CI->capturesVariable() && "Expected captured reference.");
+      BasePointer = Pointer = *CV;
+
+      const ReferenceType *PtrTy =
+          cast<ReferenceType>(RI->getType().getTypePtr());
+      QualType ElementType = PtrTy->getPointeeType();
+      Size = getTypeSize(CGF, ElementType);
+      // The default map type for a scalar/complex type is 'to' because by
+      // default the value doesn't have to be retrieved. For an aggregate type,
+      // the default is 'tofrom'.
+      MapType = ElementType->isAggregateType() ? (OMP_MAP_TO | OMP_MAP_FROM)
+                                               : OMP_MAP_TO;
+      if (ElementType->isAnyPointerType())
+        MapType |= OMP_MAP_PTR;
+    }
+
+    BasePointers.push_back(BasePointer);
+    Pointers.push_back(Pointer);
+    Sizes.push_back(Size);
+    MapTypes.push_back(MapType);
+  }
+
+  // Keep track on whether the host function has to be executed.
+  auto OffloadErrorQType =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
+  auto OffloadError = CGF.MakeAddrLValue(
+      CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
+      OffloadErrorQType);
+  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
+                        OffloadError);
+
+  // Fill up the pointer arrays and transfer execution to the device.
+  auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes,
+                    hasVLACaptures, Device, OffloadError,
+                    OffloadErrorQType](CodeGenFunction &CGF) {
+    unsigned PointerNumVal = BasePointers.size();
+    llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal);
+    llvm::Value *BasePointersArray;
+    llvm::Value *PointersArray;
+    llvm::Value *SizesArray;
+    llvm::Value *MapTypesArray;
+
+    if (PointerNumVal) {
+      llvm::APInt PointerNumAP(32, PointerNumVal, /*isSigned=*/true);
+      QualType PointerArrayType = Ctx.getConstantArrayType(
+          Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
+          /*IndexTypeQuals=*/0);
+
+      BasePointersArray =
+          CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
+      PointersArray =
+          CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+
+      // If we don't have any VLA types, we can use a constant array for the map
+      // sizes, otherwise we need to fill up the arrays as we do for the
+      // pointers.
+      if (hasVLACaptures) {
+        QualType SizeArrayType = Ctx.getConstantArrayType(
+            Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
+            /*IndexTypeQuals=*/0);
+        SizesArray =
+            CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
+      } else {
+        // We expect all the sizes to be constant, so we collect them to create
+        // a constant array.
+        SmallVector<llvm::Constant *, 16> ConstSizes;
+        for (auto S : Sizes)
+          ConstSizes.push_back(cast<llvm::Constant>(S));
+
+        auto *SizesArrayInit = llvm::ConstantArray::get(
+            llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
+        auto *SizesArrayGbl = new llvm::GlobalVariable(
+            CGM.getModule(), SizesArrayInit->getType(),
+            /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+            SizesArrayInit, ".offload_sizes");
+        SizesArrayGbl->setUnnamedAddr(true);
+        SizesArray = SizesArrayGbl;
+      }
+
+      // The map types are always constant so we don't need to generate code to
+      // fill arrays. Instead, we create an array constant.
+      llvm::Constant *MapTypesArrayInit =
+          llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
+      auto *MapTypesArrayGbl = new llvm::GlobalVariable(
+          CGM.getModule(), MapTypesArrayInit->getType(),
+          /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+          MapTypesArrayInit, ".offload_maptypes");
+      MapTypesArrayGbl->setUnnamedAddr(true);
+      MapTypesArray = MapTypesArrayGbl;
+
+      for (unsigned i = 0; i < PointerNumVal; ++i) {
+
+        llvm::Value *BPVal = BasePointers[i];
+        if (BPVal->getType()->isPointerTy())
+          BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy);
+        else {
+          assert(BPVal->getType()->isIntegerTy() &&
+                 "If not a pointer, the value type must be an integer.");
+          BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy);
+        }
+        llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
+            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal),
+            BasePointersArray, 0, i);
+        Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+        CGF.Builder.CreateStore(BPVal, BPAddr);
+
+        llvm::Value *PVal = Pointers[i];
+        if (PVal->getType()->isPointerTy())
+          PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy);
+        else {
+          assert(PVal->getType()->isIntegerTy() &&
+                 "If not a pointer, the value type must be an integer.");
+          PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy);
+        }
+        llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+            llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
+            0, i);
+        Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
+        CGF.Builder.CreateStore(PVal, PAddr);
+
+        if (hasVLACaptures) {
+          llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
+              llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
+              /*Idx0=*/0,
+              /*Idx1=*/i);
+          Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
+          CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(
+                                      Sizes[i], CGM.SizeTy, /*isSigned=*/true),
+                                  SAddr);
+        }
+      }
+
+      BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray,
+          /*Idx0=*/0, /*Idx1=*/0);
+      PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray,
+          /*Idx0=*/0,
+          /*Idx1=*/0);
+      SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray,
+          /*Idx0=*/0, /*Idx1=*/0);
+      MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32(
+          llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray,
+          /*Idx0=*/0,
+          /*Idx1=*/0);
+
+    } else {
+      BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+      PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+      SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
+      MapTypesArray =
+          llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
+    }
+
+    // On top of the arrays that were filled up, the target offloading call
+    // takes as arguments the device id as well as the host pointer. The host
+    // pointer is used by the runtime library to identify the current target
+    // region, so it only has to be unique and not necessarily point to
+    // anything. It could be the pointer to the outlined function that
+    // implements the target region, but we aren't using that so that the
+    // compiler doesn't need to keep that, and could therefore inline the host
+    // function if proven worthwhile during optimization.
+
+    llvm::Value *HostPtr = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+        llvm::GlobalValue::PrivateLinkage,
+        llvm::Constant::getNullValue(CGM.Int8Ty), ".offload_hstptr");
+
+    // Emit device ID if any.
+    llvm::Value *DeviceID;
+    if (Device)
+      DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+                                           CGM.Int32Ty, /*isSigned=*/true);
+    else
+      DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
+
+    llvm::Value *OffloadingArgs[] = {
+        DeviceID,      HostPtr,    PointerNum,   BasePointersArray,
+        PointersArray, SizesArray, MapTypesArray};
+    auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target),
+                                      OffloadingArgs);
+
+    CGF.EmitStoreOfScalar(Return, OffloadError);
+  };
+
+  if (IfCond) {
+    // Notify that the host version must be executed.
+    auto &&ElseGen = [this, OffloadError,
+                      OffloadErrorQType](CodeGenFunction &CGF) {
+      CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u),
+                            OffloadError);
+    };
+    emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
+  } else {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    ThenGen(CGF);
+  }
+
+  // Check the error code and execute the host version if required.
+  auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
+  auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
+  auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
+  auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
+  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
+
+  CGF.EmitBlock(OffloadFailedBlock);
+  CGF.Builder.CreateCall(OutlinedFn, BasePointers);
+  CGF.EmitBranch(OffloadContBlock);
+
+  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
+  return;
+}
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 44bc8a139b15..992f9a8805e2 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -39,7 +39,7 @@ class OMPExecutableDirective;
 class VarDecl;
 
 namespace CodeGen {
-
+class Address;
 class CodeGenFunction;
 class CodeGenModule;
 
@@ -62,6 +62,9 @@ private:
     // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
     // kmp_critical_name *crit);
     OMPRTL__kmpc_critical,
+    // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
+    // global_tid, kmp_critical_name *crit, uintptr_t hint);
+    OMPRTL__kmpc_critical_with_hint,
     // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
     // kmp_critical_name *crit);
     OMPRTL__kmpc_end_critical,
@@ -154,6 +157,14 @@ private:
     // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
     // kmp_int32 cncl_kind);
     OMPRTL__kmpc_cancel,
+
+    //
+    // Offloading related calls
+    //
+    // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
+    // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
+    // *arg_types);
+    OMPRTL__tgt_target,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -184,7 +195,9 @@ private:
   /// \brief Map of flags and corresponding default locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
   OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
-  llvm::Value *getOrCreateDefaultLocation(OpenMPLocationFlags Flags);
+  Address getOrCreateDefaultLocation(OpenMPLocationFlags Flags);
+
+public:
   /// \brief Describes ident structure that describes a source location.
   /// All descriptions are taken from
   /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
@@ -225,6 +238,7 @@ private:
     /// and a pair of line numbers that delimit the construct.
     IdentField_PSource
   };
+private:
   llvm::StructType *IdentTy;
   /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
   typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
@@ -321,8 +335,7 @@ private:
 
   /// \brief Emits address of the word in a memory where current thread id is
   /// stored.
-  virtual llvm::Value *emitThreadIDAddress(CodeGenFunction &CGF,
-                                           SourceLocation Loc);
+  virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
 
   /// \brief Gets thread id value for the current thread.
   ///
@@ -346,7 +359,7 @@ private:
   /// \param CopyCtor Pointer to a global copy function for \a VD.
   /// \param Dtor Pointer to a global destructor function for \a VD.
   /// \param Loc Location of threadprivate declaration.
-  void emitThreadPrivateVarInit(CodeGenFunction &CGF, llvm::Value *VDAddr,
+  void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr,
                                 llvm::Value *Ctor, llvm::Value *CopyCtor,
                                 llvm::Value *Dtor, SourceLocation Loc);
 
@@ -396,23 +409,25 @@ public:
   /// CapturedStruct.
   /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
   /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedStruct A pointer to the record with the references to
+  /// \param CapturedVars A pointer to the record with the references to
   /// variables used in \a OutlinedFn function.
   /// \param IfCond Condition in the associated 'if' clause, if it was
   /// specified, nullptr otherwise.
   ///
   virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
                                 llvm::Value *OutlinedFn,
-                                llvm::Value *CapturedStruct,
+                                ArrayRef<llvm::Value *> CapturedVars,
                                 const Expr *IfCond);
 
   /// \brief Emits a critical region.
   /// \param CriticalName Name of the critical region.
   /// \param CriticalOpGen Generator for the statement associated with the given
   /// critical region.
+  /// \param Hint Value of the 'hint' clause (optional).
   virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
                                   const RegionCodeGenTy &CriticalOpGen,
-                                  SourceLocation Loc);
+                                  SourceLocation Loc,
+                                  const Expr *Hint = nullptr);
 
   /// \brief Emits a master region.
   /// \param MasterOpGen Generator for the statement associated with the given
@@ -447,17 +462,20 @@ public:
   /// ordered region.
   virtual void emitOrderedRegion(CodeGenFunction &CGF,
                                  const RegionCodeGenTy &OrderedOpGen,
-                                 SourceLocation Loc);
+                                 SourceLocation Loc, bool IsThreads);
 
   /// \brief Emit an implicit/explicit barrier for OpenMP threads.
   /// \param Kind Directive for which this implicit barrier call must be
   /// generated. Must be OMPD_barrier for explicit barrier generation.
-  /// \param CheckForCancel true if check for possible cancellation must be
-  /// performed, false otherwise.
+  /// \param EmitChecks true if need to emit checks for cancellation barriers.
+  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+  /// runtime class decides which one to emit (simple or with cancellation
+  /// checks).
   ///
   virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
                                OpenMPDirectiveKind Kind,
-                               bool CheckForCancel = true);
+                               bool EmitChecks = true,
+                               bool ForceSimpleCall = false);
 
   /// \brief Check if the specified \a ScheduleKind is static non-chunked.
   /// This kind of worksharing directive is emitted without outer loop.
@@ -473,6 +491,12 @@ public:
   ///
   virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
 
+  virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                   OpenMPScheduleClauseKind SchedKind,
+                                   unsigned IVSize, bool IVSigned,
+                                   bool Ordered, llvm::Value *UB,
+                                   llvm::Value *Chunk = nullptr);
+
   /// \brief Call the appropriate runtime routine to initialize it before start
   /// of loop.
   ///
@@ -497,11 +521,12 @@ public:
   /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
   /// For the default (nullptr) value, the chunk 1 will be used.
   ///
-  virtual void emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
-                           OpenMPScheduleClauseKind SchedKind, unsigned IVSize,
-                           bool IVSigned, bool Ordered, llvm::Value *IL,
-                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
-                           llvm::Value *Chunk = nullptr);
+  virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc,
+                                 OpenMPScheduleClauseKind SchedKind,
+                                 unsigned IVSize, bool IVSigned, bool Ordered,
+                                 Address IL, Address LB,
+                                 Address UB, Address ST,
+                                 llvm::Value *Chunk = nullptr);
 
   /// \brief Call the appropriate runtime routine to notify that we finished
   /// iteration of the ordered loop with the dynamic scheduling.
@@ -539,8 +564,8 @@ public:
   /// returned.
   virtual llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc,
                                    unsigned IVSize, bool IVSigned,
-                                   llvm::Value *IL, llvm::Value *LB,
-                                   llvm::Value *UB, llvm::Value *ST);
+                                   Address IL, Address LB,
+                                   Address UB, Address ST);
 
   /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
   /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
@@ -562,10 +587,10 @@ public:
   /// \param VDAddr Address of the global variable \a VD.
   /// \param Loc Location of the reference to threadprivate var.
   /// \return Address of the threadprivate variable for the current thread.
-  virtual llvm::Value *getAddrOfThreadPrivate(CodeGenFunction &CGF,
-                                              const VarDecl *VD,
-                                              llvm::Value *VDAddr,
-                                              SourceLocation Loc);
+  virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF,
+                                         const VarDecl *VD,
+                                         Address VDAddr,
+                                         SourceLocation Loc);
 
   /// \brief Emit a code for initialization of threadprivate variable. It emits
   /// a call to runtime library which adds initial value to the newly created
@@ -576,7 +601,7 @@ public:
   /// \param Loc Location of threadprivate declaration.
   /// \param PerformInit true if initialization expression is not constant.
   virtual llvm::Function *
-  emitThreadPrivateVarDefinition(const VarDecl *VD, llvm::Value *VDAddr,
+  emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr,
                                  SourceLocation Loc, bool PerformInit,
                                  CodeGenFunction *CGF = nullptr);
 
@@ -632,7 +657,7 @@ public:
   virtual void emitTaskCall(
       CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
       bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-      llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
+      llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
       const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
       ArrayRef<const Expr *> PrivateCopies,
       ArrayRef<const Expr *> FirstprivateVars,
@@ -645,9 +670,12 @@ public:
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
+  /// \param HasCancel true if region has inner cancel directive, false
+  /// otherwise.
   virtual void emitInlinedDirective(CodeGenFunction &CGF,
                                     OpenMPDirectiveKind InnermostKind,
-                                    const RegionCodeGenTy &CodeGen);
+                                    const RegionCodeGenTy &CodeGen,
+                                    bool HasCancel = false);
   /// \brief Emit a code for reduction clause. Next code should be emitted for
   /// reduction:
   /// \code
@@ -679,6 +707,7 @@ public:
   /// }
   /// \endcode
   ///
+  /// \param Privates List of private copies for original reduction arguments.
   /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
   /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
   /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
@@ -686,6 +715,7 @@ public:
   /// \param WithNowait true if parent directive has also nowait clause, false
   /// otherwise.
   virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                             ArrayRef<const Expr *> Privates,
                              ArrayRef<const Expr *> LHSExprs,
                              ArrayRef<const Expr *> RHSExprs,
                              ArrayRef<const Expr *> ReductionOps,
@@ -703,10 +733,36 @@ public:
                                          OpenMPDirectiveKind CancelRegion);
 
   /// \brief Emit code for 'cancel' construct.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
   /// \param CancelRegion Region kind for which the cancel must be emitted.
   ///
   virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                              const Expr *IfCond,
                               OpenMPDirectiveKind CancelRegion);
+
+  /// \brief Emit outilined function for 'target' directive.
+  /// \param D Directive to emit.
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  virtual llvm::Value *
+  emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                             const RegionCodeGenTy &CodeGen);
+
+  /// \brief Emit the target offloading code associated with \a D. The emitted
+  /// code attempts offloading the execution to the device, an the event of
+  /// a failure it executes the host version outlined in \a OutlinedFn.
+  /// \param D Directive to emit.
+  /// \param OutlinedFn Host version of the code to be offloaded.
+  /// \param IfCond Expression evaluated in if clause associated with the target
+  /// directive, or null if no if clause is used.
+  /// \param Device Expression evaluated in device clause associated with the
+  /// target directive, or null if no device clause is used.
+  /// \param CapturedVars Values captured in the current region.
+  virtual void emitTargetCall(CodeGenFunction &CGF,
+                              const OMPExecutableDirective &D,
+                              llvm::Value *OutlinedFn, const Expr *IfCond,
+                              const Expr *Device,
+                              ArrayRef<llvm::Value *> CapturedVars);
 };
 
 } // namespace CodeGen
diff --git a/lib/CodeGen/CGRecordLayoutBuilder.cpp b/lib/CodeGen/CGRecordLayoutBuilder.cpp
index f91ecebd0926..375b59c5cb33 100644
--- a/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -454,7 +454,7 @@ void CGRecordLowering::accumulateBases() {
     // contain only a trailing array member.
     const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
     if (!BaseDecl->isEmpty() &&
-        !Context.getASTRecordLayout(BaseDecl).getSize().isZero())
+        !Context.getASTRecordLayout(BaseDecl).getNonVirtualSize().isZero())
       Members.push_back(MemberInfo(Layout.getBaseClassOffset(BaseDecl),
           MemberInfo::Base, getStorageType(BaseDecl), BaseDecl));
   }
diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp
index 7a0b8a35be01..cc4fa2ec5972 100644
--- a/lib/CodeGen/CGStmt.cpp
+++ b/lib/CodeGen/CGStmt.cpp
@@ -16,6 +16,7 @@
 #include "CodeGenModule.h"
 #include "TargetInfo.h"
 #include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/PrettyStackTrace.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/LoopHint.h"
@@ -25,6 +26,8 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/MDBuilder.h"
+
 using namespace clang;
 using namespace CodeGen;
 
@@ -138,6 +141,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::SwitchStmtClass:   EmitSwitchStmt(cast<SwitchStmt>(*S));     break;
   case Stmt::GCCAsmStmtClass:   // Intentional fall-through.
   case Stmt::MSAsmStmtClass:    EmitAsmStmt(cast<AsmStmt>(*S));           break;
+  case Stmt::CoroutineBodyStmtClass:
+  case Stmt::CoreturnStmtClass:
+    CGM.ErrorUnsupported(S, "coroutine");
+    break;
   case Stmt::CapturedStmtClass: {
     const CapturedStmt *CS = cast<CapturedStmt>(S);
     EmitCapturedStmt(*CS, CS->getCapturedRegionKind());
@@ -246,6 +253,18 @@ void CodeGenFunction::EmitStmt(const Stmt *S) {
   case Stmt::OMPCancelDirectiveClass:
     EmitOMPCancelDirective(cast<OMPCancelDirective>(*S));
     break;
+  case Stmt::OMPTargetDataDirectiveClass:
+    EmitOMPTargetDataDirective(cast<OMPTargetDataDirective>(*S));
+    break;
+  case Stmt::OMPTaskLoopDirectiveClass:
+    EmitOMPTaskLoopDirective(cast<OMPTaskLoopDirective>(*S));
+    break;
+  case Stmt::OMPTaskLoopSimdDirectiveClass:
+    EmitOMPTaskLoopSimdDirective(cast<OMPTaskLoopSimdDirective>(*S));
+    break;
+case Stmt::OMPDistributeDirectiveClass:
+    EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
+	break;
   }
 }
 
@@ -272,8 +291,8 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S) {
 /// EmitCompoundStmt - Emit a compound statement {..} node.  If GetLast is true,
 /// this captures the expression result of the last sub-statement and returns it
 /// (for use by the statement expression extension).
-llvm::Value* CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast,
-                                               AggValueSlot AggSlot) {
+Address CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast,
+                                          AggValueSlot AggSlot) {
   PrettyStackTraceLoc CrashInfo(getContext().getSourceManager(),S.getLBracLoc(),
                              "LLVM IR generation of compound statement ('{}')");
 
@@ -283,7 +302,7 @@ llvm::Value* CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLa
   return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot);
 }
 
-llvm::Value*
+Address
 CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
                                               bool GetLast,
                                               AggValueSlot AggSlot) {
@@ -292,7 +311,7 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S,
        E = S.body_end()-GetLast; I != E; ++I)
     EmitStmt(*I);
 
-  llvm::Value *RetAlloca = nullptr;
+  Address RetAlloca = Address::invalid();
   if (GetLast) {
     // We have to special case labels here.  They are statements, but when put
     // at the end of a statement expression, they yield the value of their
@@ -337,7 +356,7 @@ void CodeGenFunction::SimplifyForwardingBlocks(llvm::BasicBlock *BB) {
     return;
 
   // Can only simplify empty blocks.
-  if (BI != BB->begin())
+  if (BI->getIterator() != BB->begin())
     return;
 
   BB->replaceAllUsesWith(BI->getSuccessor(0));
@@ -359,7 +378,7 @@ void CodeGenFunction::EmitBlock(llvm::BasicBlock *BB, bool IsFinished) {
   // Place the block after the current block, if possible, or else at
   // the end of the function.
   if (CurBB && CurBB->getParent())
-    CurFn->getBasicBlockList().insertAfter(CurBB, BB);
+    CurFn->getBasicBlockList().insertAfter(CurBB->getIterator(), BB);
   else
     CurFn->getBasicBlockList().push_back(BB);
   Builder.SetInsertPoint(BB);
@@ -386,7 +405,8 @@ void CodeGenFunction::EmitBlockAfterUses(llvm::BasicBlock *block) {
   bool inserted = false;
   for (llvm::User *u : block->users()) {
     if (llvm::Instruction *insn = dyn_cast<llvm::Instruction>(u)) {
-      CurFn->getBasicBlockList().insertAfter(insn->getParent(), block);
+      CurFn->getBasicBlockList().insertAfter(insn->getParent()->getIterator(),
+                                             block);
       inserted = true;
       break;
     }
@@ -590,100 +610,6 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) {
   EmitBlock(ContBlock, true);
 }
 
-void CodeGenFunction::EmitCondBrHints(llvm::LLVMContext &Context,
-                                      llvm::BranchInst *CondBr,
-                                      ArrayRef<const Attr *> Attrs) {
-  // Return if there are no hints.
-  if (Attrs.empty())
-    return;
-
-  // Add vectorize and unroll hints to the metadata on the conditional branch.
-  //
-  // FIXME: Should this really start with a size of 1?
-  SmallVector<llvm::Metadata *, 2> Metadata(1);
-  for (const auto *Attr : Attrs) {
-    const LoopHintAttr *LH = dyn_cast<LoopHintAttr>(Attr);
-
-    // Skip non loop hint attributes
-    if (!LH)
-      continue;
-
-    LoopHintAttr::OptionType Option = LH->getOption();
-    LoopHintAttr::LoopHintState State = LH->getState();
-    const char *MetadataName;
-    switch (Option) {
-    case LoopHintAttr::Vectorize:
-    case LoopHintAttr::VectorizeWidth:
-      MetadataName = "llvm.loop.vectorize.width";
-      break;
-    case LoopHintAttr::Interleave:
-    case LoopHintAttr::InterleaveCount:
-      MetadataName = "llvm.loop.interleave.count";
-      break;
-    case LoopHintAttr::Unroll:
-      // With the unroll loop hint, a non-zero value indicates full unrolling.
-      MetadataName = State == LoopHintAttr::Disable ? "llvm.loop.unroll.disable"
-                                                    : "llvm.loop.unroll.full";
-      break;
-    case LoopHintAttr::UnrollCount:
-      MetadataName = "llvm.loop.unroll.count";
-      break;
-    }
-
-    Expr *ValueExpr = LH->getValue();
-    int ValueInt = 1;
-    if (ValueExpr) {
-      llvm::APSInt ValueAPS =
-          ValueExpr->EvaluateKnownConstInt(CGM.getContext());
-      ValueInt = static_cast<int>(ValueAPS.getSExtValue());
-    }
-
-    llvm::Constant *Value;
-    llvm::MDString *Name;
-    switch (Option) {
-    case LoopHintAttr::Vectorize:
-    case LoopHintAttr::Interleave:
-      if (State != LoopHintAttr::Disable) {
-        // FIXME: In the future I will modifiy the behavior of the metadata
-        // so we can enable/disable vectorization and interleaving separately.
-        Name = llvm::MDString::get(Context, "llvm.loop.vectorize.enable");
-        Value = Builder.getTrue();
-        break;
-      }
-      // Vectorization/interleaving is disabled, set width/count to 1.
-      ValueInt = 1;
-      // Fallthrough.
-    case LoopHintAttr::VectorizeWidth:
-    case LoopHintAttr::InterleaveCount:
-    case LoopHintAttr::UnrollCount:
-      Name = llvm::MDString::get(Context, MetadataName);
-      Value = llvm::ConstantInt::get(Int32Ty, ValueInt);
-      break;
-    case LoopHintAttr::Unroll:
-      Name = llvm::MDString::get(Context, MetadataName);
-      Value = nullptr;
-      break;
-    }
-
-    SmallVector<llvm::Metadata *, 2> OpValues;
-    OpValues.push_back(Name);
-    if (Value)
-      OpValues.push_back(llvm::ConstantAsMetadata::get(Value));
-
-    // Set or overwrite metadata indicated by Name.
-    Metadata.push_back(llvm::MDNode::get(Context, OpValues));
-  }
-
-  // FIXME: This condition is never false.  Should it be an assert?
-  if (!Metadata.empty()) {
-    // Add llvm.loop MDNode to CondBr.
-    llvm::MDNode *LoopID = llvm::MDNode::get(Context, Metadata);
-    LoopID->replaceOperandWith(0, LoopID); // First op points to itself.
-
-    CondBr->setMetadata("llvm.loop", LoopID);
-  }
-}
-
 void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
                                     ArrayRef<const Attr *> WhileAttrs) {
   // Emit the header for the loop, which will also become
@@ -691,7 +617,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
   JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond");
   EmitBlock(LoopHeader.getBlock());
 
-  LoopStack.push(LoopHeader.getBlock(), WhileAttrs);
+  LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), WhileAttrs);
 
   // Create an exit block for when the condition fails, which will
   // also become the break target.
@@ -730,7 +656,7 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
     llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
     if (ConditionScope.requiresCleanups())
       ExitBlock = createBasicBlock("while.exit");
-    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+    Builder.CreateCondBr(
         BoolCondVal, LoopBody, ExitBlock,
         createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
@@ -738,9 +664,6 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S,
       EmitBlock(ExitBlock);
       EmitBranchThroughCleanup(LoopExit);
     }
-
-    // Attach metadata to loop body conditional branch.
-    EmitCondBrHints(LoopBody->getContext(), CondBr, WhileAttrs);
   }
 
   // Emit the loop body.  We have to emit this in a cleanup scope
@@ -785,7 +708,7 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
   // Emit the body of the loop.
   llvm::BasicBlock *LoopBody = createBasicBlock("do.body");
 
-  LoopStack.push(LoopBody, DoAttrs);
+  LoopStack.push(LoopBody, CGM.getContext(), DoAttrs);
 
   EmitBlockWithFallThrough(LoopBody, &S);
   {
@@ -815,12 +738,9 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S,
   // As long as the condition is true, iterate the loop.
   if (EmitBoolCondBranch) {
     uint64_t BackedgeCount = getProfileCount(S.getBody()) - ParentCount;
-    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+    Builder.CreateCondBr(
         BoolCondVal, LoopBody, LoopExit.getBlock(),
         createProfileWeightsForLoop(S.getCond(), BackedgeCount));
-
-    // Attach metadata to loop body conditional branch.
-    EmitCondBrHints(LoopBody->getContext(), CondBr, DoAttrs);
   }
 
   LoopStack.pop();
@@ -851,7 +771,7 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
   llvm::BasicBlock *CondBlock = Continue.getBlock();
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
 
   // If the for loop doesn't have an increment we can just use the
   // condition as the continue block.  Otherwise we'll need to create
@@ -885,13 +805,10 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S,
     // C99 6.8.5p2/p4: The first substatement is executed if the expression
     // compares unequal to 0.  The condition must be a scalar type.
     llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
-    llvm::BranchInst *CondBr = Builder.CreateCondBr(
+    Builder.CreateCondBr(
         BoolCondVal, ForBody, ExitBlock,
         createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
-    // Attach metadata to loop body conditional branch.
-    EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
-
     if (ExitBlock != LoopExit.getBlock()) {
       EmitBlock(ExitBlock);
       EmitBranchThroughCleanup(LoopExit);
@@ -949,7 +866,7 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
   llvm::BasicBlock *CondBlock = createBasicBlock("for.cond");
   EmitBlock(CondBlock);
 
-  LoopStack.push(CondBlock, ForAttrs);
+  LoopStack.push(CondBlock, CGM.getContext(), ForAttrs);
 
   // If there are any cleanups between here and the loop-exit scope,
   // create a block to stage a loop exit along.
@@ -963,13 +880,10 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S,
   // The body is executed if the expression, contextually converted
   // to bool, is true.
   llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond());
-  llvm::BranchInst *CondBr = Builder.CreateCondBr(
+  Builder.CreateCondBr(
       BoolCondVal, ForBody, ExitBlock,
       createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody())));
 
-  // Attach metadata to loop body conditional branch.
-  EmitCondBrHints(ForBody->getContext(), CondBr, ForAttrs);
-
   if (ExitBlock != LoopExit.getBlock()) {
     EmitBlock(ExitBlock);
     EmitBranchThroughCleanup(LoopExit);
@@ -1012,10 +926,9 @@ void CodeGenFunction::EmitReturnOfRValue(RValue RV, QualType Ty) {
   if (RV.isScalar()) {
     Builder.CreateStore(RV.getScalarVal(), ReturnValue);
   } else if (RV.isAggregate()) {
-    EmitAggregateCopy(ReturnValue, RV.getAggregateAddr(), Ty);
+    EmitAggregateCopy(ReturnValue, RV.getAggregateAddress(), Ty);
   } else {
-    EmitStoreOfComplex(RV.getComplexVal(),
-                       MakeNaturalAlignAddrLValue(ReturnValue, Ty),
+    EmitStoreOfComplex(RV.getComplexVal(), MakeAddrLValue(ReturnValue, Ty),
                        /*init*/ true);
   }
   EmitBranchThroughCleanup(ReturnBlock);
@@ -1056,8 +969,8 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
     // If there is an NRVO flag for this variable, set it to 1 into indicate
     // that the cleanup code should not destroy the variable.
     if (llvm::Value *NRVOFlag = NRVOFlags[S.getNRVOCandidate()])
-      Builder.CreateStore(Builder.getTrue(), NRVOFlag);
-  } else if (!ReturnValue || (RV && RV->getType()->isVoidType())) {
+      Builder.CreateFlagStore(Builder.getTrue(), NRVOFlag);
+  } else if (!ReturnValue.isValid() || (RV && RV->getType()->isVoidType())) {
     // Make sure not to return anything, but evaluate the expression
     // for side effects.
     if (RV)
@@ -1075,20 +988,17 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) {
       Builder.CreateStore(EmitScalarExpr(RV), ReturnValue);
       break;
     case TEK_Complex:
-      EmitComplexExprIntoLValue(RV,
-                     MakeNaturalAlignAddrLValue(ReturnValue, RV->getType()),
+      EmitComplexExprIntoLValue(RV, MakeAddrLValue(ReturnValue, RV->getType()),
                                 /*isInit*/ true);
       break;
-    case TEK_Aggregate: {
-      CharUnits Alignment = getContext().getTypeAlignInChars(RV->getType());
-      EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue, Alignment,
+    case TEK_Aggregate:
+      EmitAggExpr(RV, AggValueSlot::forAddr(ReturnValue,
                                             Qualifiers(),
                                             AggValueSlot::IsDestructed,
                                             AggValueSlot::DoesNotNeedGCBarriers,
                                             AggValueSlot::IsNotAliased));
       break;
     }
-    }
   }
 
   ++NumReturnExprs;
@@ -1624,6 +1534,22 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) {
   EmitBlock(SwitchExit.getBlock(), true);
   incrementProfileCounter(&S);
 
+  // If the switch has a condition wrapped by __builtin_unpredictable,
+  // create metadata that specifies that the switch is unpredictable.
+  // Don't bother if not optimizing because that metadata would not be used.
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    if (const CallExpr *Call = dyn_cast<CallExpr>(S.getCond())) {
+      const Decl *TargetDecl = Call->getCalleeDecl();
+      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+          llvm::MDBuilder MDHelper(getLLVMContext());
+          SwitchInsn->setMetadata(llvm::LLVMContext::MD_unpredictable,
+                                  MDHelper.createUnpredictable());
+        }
+      }
+    }
+  }
+
   if (SwitchWeights) {
     assert(SwitchWeights->size() == 1 + SwitchInsn->getNumCases() &&
            "switch weights do not match switch cases");
@@ -1675,9 +1601,7 @@ SimplifyConstraint(const char *Constraint, const TargetInfo &Target,
       assert(OutCons &&
              "Must pass output names to constraints with a symbolic name");
       unsigned Index;
-      bool result = Target.resolveSymbolicName(Constraint,
-                                               &(*OutCons)[0],
-                                               OutCons->size(), Index);
+      bool result = Target.resolveSymbolicName(Constraint, *OutCons, Index);
       assert(result && "Could not resolve symbolic name"); (void)result;
       Result += llvm::utostr(Index);
       break;
@@ -1743,12 +1667,12 @@ CodeGenFunction::EmitAsmInputLValue(const TargetInfo::ConstraintInfo &Info,
         Arg = Builder.CreateLoad(Builder.CreateBitCast(InputValue.getAddress(),
                                                        Ty));
       } else {
-        Arg = InputValue.getAddress();
+        Arg = InputValue.getPointer();
         ConstraintStr += '*';
       }
     }
   } else {
-    Arg = InputValue.getAddress();
+    Arg = InputValue.getPointer();
     ConstraintStr += '*';
   }
 
@@ -1772,7 +1696,8 @@ llvm::Value* CodeGenFunction::EmitAsmInput(
   if (Info.allowsRegister() || !Info.allowsMemory())
     if (CodeGenFunction::hasScalarEvaluationKind(InputExpr->getType()))
       return EmitScalarExpr(InputExpr);
-
+  if (InputExpr->getStmtClass() == Expr::CXXThisExprClass)
+    return EmitScalarExpr(InputExpr);
   InputExpr = InputExpr->IgnoreParenNoopCasts(getContext());
   LValue Dest = EmitLValue(InputExpr);
   return EmitAsmInputLValue(Info, Dest, InputExpr->getType(), ConstraintStr,
@@ -1793,13 +1718,15 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
   if (!StrVal.empty()) {
     const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
     const LangOptions &LangOpts = CGF.CGM.getLangOpts();
+    unsigned StartToken = 0;
+    unsigned ByteOffset = 0;
 
     // Add the location of the start of each subsequent line of the asm to the
     // MDNode.
-    for (unsigned i = 0, e = StrVal.size()-1; i != e; ++i) {
+    for (unsigned i = 0, e = StrVal.size() - 1; i != e; ++i) {
       if (StrVal[i] != '\n') continue;
-      SourceLocation LineLoc = Str->getLocationOfByte(i+1, SM, LangOpts,
-                                                      CGF.getTarget());
+      SourceLocation LineLoc = Str->getLocationOfByte(
+          i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
       Locs.push_back(llvm::ConstantAsMetadata::get(
           llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding())));
     }
@@ -1832,8 +1759,7 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
       Name = GAS->getInputName(i);
     TargetInfo::ConstraintInfo Info(S.getInputConstraint(i), Name);
     bool IsValid =
-      getTarget().validateInputConstraint(OutputConstraintInfos.data(),
-                                          S.getNumOutputs(), Info);
+      getTarget().validateInputConstraint(OutputConstraintInfos, Info);
     assert(IsValid && "Failed to parse input constraint"); (void)IsValid;
     InputConstraintInfos.push_back(Info);
   }
@@ -1919,8 +1845,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
             << OutExpr->getType() << OutputConstraint;
       }
     } else {
-      ArgTypes.push_back(Dest.getAddress()->getType());
-      Args.push_back(Dest.getAddress());
+      ArgTypes.push_back(Dest.getAddress().getType());
+      Args.push_back(Dest.getPointer());
       Constraints += "=*";
       Constraints += OutputConstraint;
       ReadOnly = ReadNone = false;
@@ -2077,6 +2003,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   Result->addAttribute(llvm::AttributeSet::FunctionIndex,
                        llvm::Attribute::NoUnwind);
 
+  if (isa<MSAsmStmt>(&S)) {
+    // If the assembly contains any labels, mark the call noduplicate to prevent
+    // defining the same ASM label twice (PR23715). This is pretty hacky, but it
+    // works.
+    if (AsmString.find("__MSASMLABEL_") != std::string::npos)
+      Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+                           llvm::Attribute::NoDuplicate);
+  }
+
   // Attach readnone and readonly attributes.
   if (!HasSideEffect) {
     if (ReadNone)
@@ -2152,12 +2087,12 @@ LValue CodeGenFunction::InitCapturedStruct(const CapturedStmt &S) {
   QualType RecordTy = getContext().getRecordType(RD);
 
   // Initialize the captured struct.
-  LValue SlotLV = MakeNaturalAlignAddrLValue(
-      CreateMemTemp(RecordTy, "agg.captured"), RecordTy);
+  LValue SlotLV =
+    MakeAddrLValue(CreateMemTemp(RecordTy, "agg.captured"), RecordTy);
 
   RecordDecl::field_iterator CurField = RD->field_begin();
-  for (CapturedStmt::capture_init_iterator I = S.capture_init_begin(),
-                                           E = S.capture_init_end();
+  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
+                                                 E = S.capture_init_end();
        I != E; ++I, ++CurField) {
     LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField);
     if (CurField->hasCapturedVLAType()) {
@@ -2184,13 +2119,12 @@ CodeGenFunction::EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K) {
   delete CGF.CapturedStmtInfo;
 
   // Emit call to the helper function.
-  EmitCallOrInvoke(F, CapStruct.getAddress());
+  EmitCallOrInvoke(F, CapStruct.getPointer());
 
   return F;
 }
 
-llvm::Value *
-CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
+Address CodeGenFunction::GenerateCapturedStmtArgument(const CapturedStmt &S) {
   LValue CapStruct = InitCapturedStruct(S);
   return CapStruct.getAddress();
 }
@@ -2229,8 +2163,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
                 CD->getLocation(),
                 CD->getBody()->getLocStart());
   // Set the context parameter in CapturedStmtInfo.
-  llvm::Value *DeclPtr = LocalDeclMap[CD->getContextParam()];
-  assert(DeclPtr && "missing context parameter for CapturedStmt");
+  Address DeclPtr = GetAddrOfLocalVar(CD->getContextParam());
   CapturedStmtInfo->setContextValue(Builder.CreateLoad(DeclPtr));
 
   // Initialize variable-length arrays.
@@ -2252,7 +2185,7 @@ CodeGenFunction::GenerateCapturedStmtFunction(const CapturedStmt &S) {
     CXXThisValue = EmitLoadOfLValue(ThisLValue, Loc).getScalarVal();
   }
 
-  PGO.assignRegionCounters(CD, F);
+  PGO.assignRegionCounters(GlobalDecl(CD), F);
   CapturedStmtInfo->EmitBody(*this, CD->getBody());
   FinishFunction(CD->getBodyRBrace());
 
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index e5f507aa41bd..bcd2ac51929d 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -20,21 +20,195 @@
 using namespace clang;
 using namespace CodeGen;
 
+void CodeGenFunction::GenerateOpenMPCapturedVars(
+    const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
+  const RecordDecl *RD = S.getCapturedRecordDecl();
+  auto CurField = RD->field_begin();
+  auto CurCap = S.captures().begin();
+  for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
+                                                 E = S.capture_init_end();
+       I != E; ++I, ++CurField, ++CurCap) {
+    if (CurField->hasCapturedVLAType()) {
+      auto VAT = CurField->getCapturedVLAType();
+      auto *Val = VLASizeMap[VAT->getSizeExpr()];
+      CapturedVars.push_back(Val);
+    } else if (CurCap->capturesThis())
+      CapturedVars.push_back(CXXThisValue);
+    else if (CurCap->capturesVariableByCopy())
+      CapturedVars.push_back(
+          EmitLoadOfLValue(EmitLValue(*I), SourceLocation()).getScalarVal());
+    else {
+      assert(CurCap->capturesVariable() && "Expected capture by reference.");
+      CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer());
+    }
+  }
+}
+
+static Address castValueFromUintptr(CodeGenFunction &CGF, QualType DstType,
+                                    StringRef Name, LValue AddrLV,
+                                    bool isReferenceType = false) {
+  ASTContext &Ctx = CGF.getContext();
+
+  auto *CastedPtr = CGF.EmitScalarConversion(
+      AddrLV.getAddress().getPointer(), Ctx.getUIntPtrType(),
+      Ctx.getPointerType(DstType), SourceLocation());
+  auto TmpAddr =
+      CGF.MakeNaturalAlignAddrLValue(CastedPtr, Ctx.getPointerType(DstType))
+          .getAddress();
+
+  // If we are dealing with references we need to return the address of the
+  // reference instead of the reference of the value.
+  if (isReferenceType) {
+    QualType RefType = Ctx.getLValueReferenceType(DstType);
+    auto *RefVal = TmpAddr.getPointer();
+    TmpAddr = CGF.CreateMemTemp(RefType, Twine(Name) + ".ref");
+    auto TmpLVal = CGF.MakeAddrLValue(TmpAddr, RefType);
+    CGF.EmitScalarInit(RefVal, TmpLVal);
+  }
+
+  return TmpAddr;
+}
+
+llvm::Function *
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+  assert(
+      CapturedStmtInfo &&
+      "CapturedStmtInfo should be set when generating the captured function");
+  const CapturedDecl *CD = S.getCapturedDecl();
+  const RecordDecl *RD = S.getCapturedRecordDecl();
+  assert(CD->hasBody() && "missing CapturedDecl body");
+
+  // Build the argument list.
+  ASTContext &Ctx = CGM.getContext();
+  FunctionArgList Args;
+  Args.append(CD->param_begin(),
+              std::next(CD->param_begin(), CD->getContextParamPosition()));
+  auto I = S.captures().begin();
+  for (auto *FD : RD->fields()) {
+    QualType ArgType = FD->getType();
+    IdentifierInfo *II = nullptr;
+    VarDecl *CapVar = nullptr;
+
+    // If this is a capture by copy and the type is not a pointer, the outlined
+    // function argument type should be uintptr and the value properly casted to
+    // uintptr. This is necessary given that the runtime library is only able to
+    // deal with pointers. We can pass in the same way the VLA type sizes to the
+    // outlined function.
+    if ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
+        I->capturesVariableArrayType())
+      ArgType = Ctx.getUIntPtrType();
+
+    if (I->capturesVariable() || I->capturesVariableByCopy()) {
+      CapVar = I->getCapturedVar();
+      II = CapVar->getIdentifier();
+    } else if (I->capturesThis())
+      II = &getContext().Idents.get("this");
+    else {
+      assert(I->capturesVariableArrayType());
+      II = &getContext().Idents.get("vla");
+    }
+    if (ArgType->isVariablyModifiedType())
+      ArgType = getContext().getVariableArrayDecayedType(ArgType);
+    Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr,
+                                             FD->getLocation(), II, ArgType));
+    ++I;
+  }
+  Args.append(
+      std::next(CD->param_begin(), CD->getContextParamPosition() + 1),
+      CD->param_end());
+
+  // Create the function declaration.
+  FunctionType::ExtInfo ExtInfo;
+  const CGFunctionInfo &FuncInfo =
+      CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo,
+                                                    /*IsVariadic=*/false);
+  llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo);
+
+  llvm::Function *F = llvm::Function::Create(
+      FuncLLVMTy, llvm::GlobalValue::InternalLinkage,
+      CapturedStmtInfo->getHelperName(), &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(CD, F, FuncInfo);
+  if (CD->isNothrow())
+    F->addFnAttr(llvm::Attribute::NoUnwind);
+
+  // Generate the function.
+  StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(),
+                CD->getBody()->getLocStart());
+  unsigned Cnt = CD->getContextParamPosition();
+  I = S.captures().begin();
+  for (auto *FD : RD->fields()) {
+    // If we are capturing a pointer by copy we don't need to do anything, just
+    // use the value that we get from the arguments.
+    if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
+      setAddrOfLocalVar(I->getCapturedVar(), GetAddrOfLocalVar(Args[Cnt]));
+      ++Cnt, ++I;
+      continue;
+    }
+
+    LValue ArgLVal =
+        MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(),
+                       AlignmentSource::Decl);
+    if (FD->hasCapturedVLAType()) {
+      LValue CastedArgLVal =
+          MakeAddrLValue(castValueFromUintptr(*this, FD->getType(),
+                                              Args[Cnt]->getName(), ArgLVal),
+                         FD->getType(), AlignmentSource::Decl);
+      auto *ExprArg =
+          EmitLoadOfLValue(CastedArgLVal, SourceLocation()).getScalarVal();
+      auto VAT = FD->getCapturedVLAType();
+      VLASizeMap[VAT->getSizeExpr()] = ExprArg;
+    } else if (I->capturesVariable()) {
+      auto *Var = I->getCapturedVar();
+      QualType VarTy = Var->getType();
+      Address ArgAddr = ArgLVal.getAddress();
+      if (!VarTy->isReferenceType()) {
+        ArgAddr = EmitLoadOfReference(
+            ArgAddr, ArgLVal.getType()->castAs<ReferenceType>());
+      }
+      setAddrOfLocalVar(
+          Var, Address(ArgAddr.getPointer(), getContext().getDeclAlign(Var)));
+    } else if (I->capturesVariableByCopy()) {
+      assert(!FD->getType()->isAnyPointerType() &&
+             "Not expecting a captured pointer.");
+      auto *Var = I->getCapturedVar();
+      QualType VarTy = Var->getType();
+      setAddrOfLocalVar(I->getCapturedVar(),
+                        castValueFromUintptr(*this, FD->getType(),
+                                             Args[Cnt]->getName(), ArgLVal,
+                                             VarTy->isReferenceType()));
+    } else {
+      // If 'this' is captured, load it into CXXThisValue.
+      assert(I->capturesThis());
+      CXXThisValue =
+          EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal();
+    }
+    ++Cnt, ++I;
+  }
+
+  PGO.assignRegionCounters(GlobalDecl(CD), F);
+  CapturedStmtInfo->EmitBody(*this, CD->getBody());
+  FinishFunction(CD->getBodyRBrace());
+
+  return F;
+}
+
 //===----------------------------------------------------------------------===//
 //                              OpenMP Directive Emission
 //===----------------------------------------------------------------------===//
 void CodeGenFunction::EmitOMPAggregateAssign(
-    llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
-    const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen) {
+    Address DestAddr, Address SrcAddr, QualType OriginalType,
+    const llvm::function_ref<void(Address, Address)> &CopyGen) {
   // Perform element-by-element initialization.
   QualType ElementTy;
-  auto SrcBegin = SrcAddr;
-  auto DestBegin = DestAddr;
+
+  // Drill down to the base element type on both arrays.
   auto ArrayTy = OriginalType->getAsArrayTypeUnsafe();
-  auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestBegin);
+  auto NumElements = emitArrayLength(ArrayTy, ElementTy, DestAddr);
+  SrcAddr = Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
+
+  auto SrcBegin = SrcAddr.getPointer();
+  auto DestBegin = DestAddr.getPointer();
   // Cast from pointer to array type to pointer to single element.
-  SrcBegin = Builder.CreatePointerBitCastOrAddrSpaceCast(SrcBegin,
-                                                         DestBegin->getType());
   auto DestEnd = Builder.CreateGEP(DestBegin, NumElements);
   // The basic structure here is a while-do loop.
   auto BodyBB = createBasicBlock("omp.arraycpy.body");
@@ -46,77 +220,144 @@ void CodeGenFunction::EmitOMPAggregateAssign(
   // Enter the loop body, making that address the current address.
   auto EntryBB = Builder.GetInsertBlock();
   EmitBlock(BodyBB);
-  auto SrcElementCurrent =
-      Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
-  SrcElementCurrent->addIncoming(SrcBegin, EntryBB);
-  auto DestElementCurrent = Builder.CreatePHI(DestBegin->getType(), 2,
-                                              "omp.arraycpy.destElementPast");
-  DestElementCurrent->addIncoming(DestBegin, EntryBB);
+
+  CharUnits ElementSize = getContext().getTypeSizeInChars(ElementTy);
+
+  llvm::PHINode *SrcElementPHI =
+    Builder.CreatePHI(SrcBegin->getType(), 2, "omp.arraycpy.srcElementPast");
+  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
+  Address SrcElementCurrent =
+      Address(SrcElementPHI,
+              SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+  llvm::PHINode *DestElementPHI =
+    Builder.CreatePHI(DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+  DestElementPHI->addIncoming(DestBegin, EntryBB);
+  Address DestElementCurrent =
+    Address(DestElementPHI,
+            DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
 
   // Emit copy.
   CopyGen(DestElementCurrent, SrcElementCurrent);
 
   // Shift the address forward by one element.
   auto DestElementNext = Builder.CreateConstGEP1_32(
-      DestElementCurrent, /*Idx0=*/1, "omp.arraycpy.dest.element");
+      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
   auto SrcElementNext = Builder.CreateConstGEP1_32(
-      SrcElementCurrent, /*Idx0=*/1, "omp.arraycpy.src.element");
+      SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
   // Check whether we've reached the end.
   auto Done =
       Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
   Builder.CreateCondBr(Done, DoneBB, BodyBB);
-  DestElementCurrent->addIncoming(DestElementNext, Builder.GetInsertBlock());
-  SrcElementCurrent->addIncoming(SrcElementNext, Builder.GetInsertBlock());
+  DestElementPHI->addIncoming(DestElementNext, Builder.GetInsertBlock());
+  SrcElementPHI->addIncoming(SrcElementNext, Builder.GetInsertBlock());
 
   // Done.
   EmitBlock(DoneBB, /*IsFinished=*/true);
 }
 
-void CodeGenFunction::EmitOMPCopy(CodeGenFunction &CGF,
-                                  QualType OriginalType, llvm::Value *DestAddr,
-                                  llvm::Value *SrcAddr, const VarDecl *DestVD,
+/// \brief Emit initialization of arrays of complex types.
+/// \param DestAddr Address of the array.
+/// \param Type Type of array.
+/// \param Init Initial expression of array.
+static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
+                                 QualType Type, const Expr *Init) {
+  // Perform element-by-element initialization.
+  QualType ElementTy;
+
+  // Drill down to the base element type on both arrays.
+  auto ArrayTy = Type->getAsArrayTypeUnsafe();
+  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
+  DestAddr =
+      CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
+
+  auto DestBegin = DestAddr.getPointer();
+  // Cast from pointer to array type to pointer to single element.
+  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
+  // The basic structure here is a while-do loop.
+  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
+  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
+  auto IsEmpty =
+      CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
+  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
+
+  // Enter the loop body, making that address the current address.
+  auto EntryBB = CGF.Builder.GetInsertBlock();
+  CGF.EmitBlock(BodyBB);
+
+  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
+
+  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
+      DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
+  DestElementPHI->addIncoming(DestBegin, EntryBB);
+  Address DestElementCurrent =
+      Address(DestElementPHI,
+              DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
+
+  // Emit copy.
+  {
+    CodeGenFunction::RunCleanupsScope InitScope(CGF);
+    CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
+                         /*IsInitializer=*/false);
+  }
+
+  // Shift the address forward by one element.
+  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
+      DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+  // Check whether we've reached the end.
+  auto Done =
+      CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
+  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
+  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
+
+  // Done.
+  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
+}
+
+void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
+                                  Address SrcAddr, const VarDecl *DestVD,
                                   const VarDecl *SrcVD, const Expr *Copy) {
   if (OriginalType->isArrayType()) {
     auto *BO = dyn_cast<BinaryOperator>(Copy);
     if (BO && BO->getOpcode() == BO_Assign) {
       // Perform simple memcpy for simple copying.
-      CGF.EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
+      EmitAggregateAssign(DestAddr, SrcAddr, OriginalType);
     } else {
       // For arrays with complex element types perform element by element
       // copying.
-      CGF.EmitOMPAggregateAssign(
+      EmitOMPAggregateAssign(
           DestAddr, SrcAddr, OriginalType,
-          [&CGF, Copy, SrcVD, DestVD](llvm::Value *DestElement,
-                                          llvm::Value *SrcElement) {
+          [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
             // Working with the single array element, so have to remap
             // destination and source variables to corresponding array
             // elements.
-            CodeGenFunction::OMPPrivateScope Remap(CGF);
-            Remap.addPrivate(DestVD, [DestElement]() -> llvm::Value *{
+            CodeGenFunction::OMPPrivateScope Remap(*this);
+            Remap.addPrivate(DestVD, [DestElement]() -> Address {
               return DestElement;
             });
             Remap.addPrivate(
-                SrcVD, [SrcElement]() -> llvm::Value *{ return SrcElement; });
+                SrcVD, [SrcElement]() -> Address { return SrcElement; });
             (void)Remap.Privatize();
-            CGF.EmitIgnoredExpr(Copy);
+            EmitIgnoredExpr(Copy);
           });
     }
   } else {
     // Remap pseudo source variable to private copy.
-    CodeGenFunction::OMPPrivateScope Remap(CGF);
-    Remap.addPrivate(SrcVD, [SrcAddr]() -> llvm::Value *{ return SrcAddr; });
-    Remap.addPrivate(DestVD, [DestAddr]() -> llvm::Value *{ return DestAddr; });
+    CodeGenFunction::OMPPrivateScope Remap(*this);
+    Remap.addPrivate(SrcVD, [SrcAddr]() -> Address { return SrcAddr; });
+    Remap.addPrivate(DestVD, [DestAddr]() -> Address { return DestAddr; });
     (void)Remap.Privatize();
     // Emit copying of the whole variable.
-    CGF.EmitIgnoredExpr(Copy);
+    EmitIgnoredExpr(Copy);
   }
 }
 
 bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                                 OMPPrivateScope &PrivateScope) {
+  if (!HaveInsertPoint())
+    return false;
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
-  for (auto &&I = D.getClausesOfKind(OMPC_firstprivate); I; ++I) {
-    auto *C = cast<OMPFirstprivateClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto InitsRef = C->inits().begin();
     for (auto IInit : C->private_copies()) {
@@ -131,13 +372,13 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
             /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
                 OrigVD) != nullptr,
             (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
-        auto *OriginalAddr = EmitLValue(&DRE).getAddress();
+        Address OriginalAddr = EmitLValue(&DRE).getAddress();
         QualType Type = OrigVD->getType();
         if (Type->isArrayType()) {
           // Emit VarDecl with copy init for arrays.
           // Get the address of the original variable captured in current
           // captured region.
-          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
             auto Emission = EmitAutoVarAlloca(*VD);
             auto *Init = VD->getInit();
             if (!isa<CXXConstructExpr>(Init) || isTrivialInitializer(Init)) {
@@ -147,12 +388,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
             } else {
               EmitOMPAggregateAssign(
                   Emission.getAllocatedAddress(), OriginalAddr, Type,
-                  [this, VDInit, Init](llvm::Value *DestElement,
-                                       llvm::Value *SrcElement) {
+                  [this, VDInit, Init](Address DestElement,
+                                       Address SrcElement) {
                     // Clean up any temporaries needed by the initialization.
                     RunCleanupsScope InitScope(*this);
                     // Emit initialization for single element.
-                    LocalDeclMap[VDInit] = SrcElement;
+                    setAddrOfLocalVar(VDInit, SrcElement);
                     EmitAnyExprToMem(Init, DestElement,
                                      Init->getType().getQualifiers(),
                                      /*IsInitializer*/ false);
@@ -163,12 +404,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
             return Emission.getAllocatedAddress();
           });
         } else {
-          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+          IsRegistered = PrivateScope.addPrivate(OrigVD, [&]() -> Address {
             // Emit private VarDecl with copy init.
             // Remap temp VDInit variable to the address of the original
             // variable
             // (for proper handling of captured global variables).
-            LocalDeclMap[VDInit] = OriginalAddr;
+            setAddrOfLocalVar(VDInit, OriginalAddr);
             EmitDecl(*VD);
             LocalDeclMap.erase(VDInit);
             return GetAddrOfLocalVar(VD);
@@ -188,16 +429,17 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
 void CodeGenFunction::EmitOMPPrivateClause(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
+  if (!HaveInsertPoint())
+    return;
   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
-  for (auto &&I = D.getClausesOfKind(OMPC_private); I; ++I) {
-    auto *C = cast<OMPPrivateClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (auto IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
         auto VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
         bool IsRegistered =
-            PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+            PrivateScope.addPrivate(OrigVD, [&]() -> Address {
               // Emit private VarDecl with copy init.
               EmitDecl(*VD);
               return GetAddrOfLocalVar(VD);
@@ -212,14 +454,15 @@ void CodeGenFunction::EmitOMPPrivateClause(
 }
 
 bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
+  if (!HaveInsertPoint())
+    return false;
   // threadprivate_var1 = master_threadprivate_var1;
   // operator=(threadprivate_var2, master_threadprivate_var2);
   // ...
   // __kmpc_barrier(&loc, global_tid);
   llvm::DenseSet<const VarDecl *> CopiedVars;
   llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
-  for (auto &&I = D.getClausesOfKind(OMPC_copyin); I; ++I) {
-    auto *C = cast<OMPCopyinClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
     auto IRef = C->varlist_begin();
     auto ISrcRef = C->source_exprs().begin();
     auto IDestRef = C->destination_exprs().begin();
@@ -231,7 +474,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
         // Get the address of the master variable. If we are emitting code with
         // TLS support, the address is passed from the master as field in the
         // captured declaration.
-        llvm::Value *MasterAddr;
+        Address MasterAddr = Address::invalid();
         if (getLangOpts().OpenMPUseTLS &&
             getContext().getTargetInfo().isTLSSupported()) {
           assert(CapturedStmtInfo->lookup(VD) &&
@@ -239,12 +482,15 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
           DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(),
                           VK_LValue, (*IRef)->getExprLoc());
           MasterAddr = EmitLValue(&DRE).getAddress();
+          LocalDeclMap.erase(VD);
         } else {
-          MasterAddr = VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
-                                           : CGM.GetAddrOfGlobal(VD);
+          MasterAddr =
+            Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD)
+                                        : CGM.GetAddrOfGlobal(VD),
+                    getContext().getDeclAlign(VD));
         }
         // Get the address of the threadprivate variable.
-        auto *PrivateAddr = EmitLValue(*IRef).getAddress();
+        Address PrivateAddr = EmitLValue(*IRef).getAddress();
         if (CopiedVars.size() == 1) {
           // At first check if current thread is a master thread. If it is, no
           // need to copy data.
@@ -252,15 +498,14 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
           CopyEnd = createBasicBlock("copyin.not.master.end");
           Builder.CreateCondBr(
               Builder.CreateICmpNE(
-                  Builder.CreatePtrToInt(MasterAddr, CGM.IntPtrTy),
-                  Builder.CreatePtrToInt(PrivateAddr, CGM.IntPtrTy)),
+                  Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
+                  Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy)),
               CopyBegin, CopyEnd);
           EmitBlock(CopyBegin);
         }
         auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
-        EmitOMPCopy(*this, Type, PrivateAddr, MasterAddr, DestVD, SrcVD,
-                    AssignOp);
+        EmitOMPCopy(Type, PrivateAddr, MasterAddr, DestVD, SrcVD, AssignOp);
       }
       ++IRef;
       ++ISrcRef;
@@ -277,11 +522,12 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
 
 bool CodeGenFunction::EmitOMPLastprivateClauseInit(
     const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
+  if (!HaveInsertPoint())
+    return false;
   bool HasAtLeastOneLastprivate = false;
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
-  for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) {
+  for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
-    auto *C = cast<OMPLastprivateClause>(*I);
     auto IRef = C->varlist_begin();
     auto IDestRef = C->destination_exprs().begin();
     for (auto *IInit : C->private_copies()) {
@@ -290,7 +536,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
       if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
         auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
-        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> llvm::Value *{
+        PrivateScope.addPrivate(DestVD, [this, OrigVD, IRef]() -> Address {
           DeclRefExpr DRE(
               const_cast<VarDecl *>(OrigVD),
               /*RefersToEnclosingVariableOrCapture=*/CapturedStmtInfo->lookup(
@@ -304,7 +550,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
         if (IInit) {
           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
           bool IsRegistered =
-              PrivateScope.addPrivate(OrigVD, [&]() -> llvm::Value *{
+              PrivateScope.addPrivate(OrigVD, [&]() -> Address {
                 // Emit private VarDecl with copy init.
                 EmitDecl(*VD);
                 return GetAddrOfLocalVar(VD);
@@ -322,6 +568,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
 
 void CodeGenFunction::EmitOMPLastprivateClauseFinal(
     const OMPExecutableDirective &D, llvm::Value *IsLastIterCond) {
+  if (!HaveInsertPoint())
+    return;
   // Emit following code:
   // if (<IsLastIterCond>) {
   //   orig_var1 = private_orig_var1;
@@ -359,8 +607,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
   {
     llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
     bool FirstLCV = true;
-    for (auto &&I = D.getClausesOfKind(OMPC_lastprivate); I; ++I) {
-      auto *C = cast<OMPLastprivateClause>(*I);
+    for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
       auto IRef = C->varlist_begin();
       auto ISrcRef = C->source_exprs().begin();
       auto IDestRef = C->destination_exprs().begin();
@@ -385,11 +632,14 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
           auto *SrcVD = cast<VarDecl>(cast<DeclRefExpr>(*ISrcRef)->getDecl());
           auto *DestVD = cast<VarDecl>(cast<DeclRefExpr>(*IDestRef)->getDecl());
           // Get the address of the original variable.
-          auto *OriginalAddr = GetAddrOfLocalVar(DestVD);
+          Address OriginalAddr = GetAddrOfLocalVar(DestVD);
           // Get the address of the private variable.
-          auto *PrivateAddr = GetAddrOfLocalVar(PrivateVD);
-          EmitOMPCopy(*this, Type, OriginalAddr, PrivateAddr, DestVD, SrcVD,
-                      AssignOp);
+          Address PrivateAddr = GetAddrOfLocalVar(PrivateVD);
+          if (auto RefTy = PrivateVD->getType()->getAs<ReferenceType>())
+            PrivateAddr =
+              Address(Builder.CreateLoad(PrivateAddr),
+                      getNaturalTypeAlignment(RefTy->getPointeeType()));
+          EmitOMPCopy(Type, OriginalAddr, PrivateAddr, DestVD, SrcVD, AssignOp);
         }
         ++IRef;
         ++ISrcRef;
@@ -405,46 +655,174 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
 void CodeGenFunction::EmitOMPReductionClauseInit(
     const OMPExecutableDirective &D,
     CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) {
-    auto *C = cast<OMPReductionClause>(*I);
+  if (!HaveInsertPoint())
+    return;
+  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
     auto ILHS = C->lhs_exprs().begin();
     auto IRHS = C->rhs_exprs().begin();
+    auto IPriv = C->privates().begin();
     for (auto IRef : C->varlists()) {
-      auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
       auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
-      auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
-      // Store the address of the original variable associated with the LHS
-      // implicit variable.
-      PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
-        DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
-                        CapturedStmtInfo->lookup(OrigVD) != nullptr,
-                        IRef->getType(), VK_LValue, IRef->getExprLoc());
-        return EmitLValue(&DRE).getAddress();
-      });
-      // Emit reduction copy.
-      bool IsRegistered =
-          PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
-            // Emit private VarDecl with reduction init.
-            EmitDecl(*PrivateVD);
-            return GetAddrOfLocalVar(PrivateVD);
-          });
-      assert(IsRegistered && "private var already registered as private");
-      // Silence the warning about unused variable.
-      (void)IsRegistered;
-      ++ILHS, ++IRHS;
+      auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+      auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
+      if (auto *OASE = dyn_cast<OMPArraySectionExpr>(IRef)) {
+        auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+        while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+          Base = TempOASE->getBase()->IgnoreParenImpCasts();
+        while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+          Base = TempASE->getBase()->IgnoreParenImpCasts();
+        auto *DE = cast<DeclRefExpr>(Base);
+        auto *OrigVD = cast<VarDecl>(DE->getDecl());
+        auto OASELValueLB = EmitOMPArraySectionExpr(OASE);
+        auto OASELValueUB =
+            EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
+        auto OriginalBaseLValue = EmitLValue(DE);
+        auto BaseLValue = OriginalBaseLValue;
+        auto *Zero = Builder.getInt64(/*C=*/0);
+        llvm::SmallVector<llvm::Value *, 4> Indexes;
+        Indexes.push_back(Zero);
+        auto *ItemTy =
+            OASELValueLB.getPointer()->getType()->getPointerElementType();
+        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
+        while (Ty != ItemTy) {
+          Indexes.push_back(Zero);
+          Ty = Ty->getPointerElementType();
+        }
+        BaseLValue = MakeAddrLValue(
+            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
+                    OASELValueLB.getAlignment()),
+            OASELValueLB.getType(), OASELValueLB.getAlignmentSource());
+        // Store the address of the original variable associated with the LHS
+        // implicit variable.
+        PrivateScope.addPrivate(LHSVD, [this, OASELValueLB]() -> Address {
+          return OASELValueLB.getAddress();
+        });
+        // Emit reduction copy.
+        bool IsRegistered = PrivateScope.addPrivate(
+            OrigVD, [this, PrivateVD, BaseLValue, OASELValueLB, OASELValueUB,
+                     OriginalBaseLValue]() -> Address {
+              // Emit VarDecl with copy init for arrays.
+              // Get the address of the original variable captured in current
+              // captured region.
+              auto *Size = Builder.CreatePtrDiff(OASELValueUB.getPointer(),
+                                                 OASELValueLB.getPointer());
+              Size = Builder.CreateNUWAdd(
+                  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
+              CodeGenFunction::OpaqueValueMapping OpaqueMap(
+                  *this, cast<OpaqueValueExpr>(
+                             getContext()
+                                 .getAsVariableArrayType(PrivateVD->getType())
+                                 ->getSizeExpr()),
+                  RValue::get(Size));
+              EmitVariablyModifiedType(PrivateVD->getType());
+              auto Emission = EmitAutoVarAlloca(*PrivateVD);
+              auto Addr = Emission.getAllocatedAddress();
+              auto *Init = PrivateVD->getInit();
+              EmitOMPAggregateInit(*this, Addr, PrivateVD->getType(), Init);
+              EmitAutoVarCleanups(Emission);
+              // Emit private VarDecl with reduction init.
+              auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
+                                                   OASELValueLB.getPointer());
+              auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
+              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+                  Ptr, OriginalBaseLValue.getPointer()->getType());
+              return Address(Ptr, OriginalBaseLValue.getAlignment());
+            });
+        assert(IsRegistered && "private var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+          return GetAddrOfLocalVar(PrivateVD);
+        });
+      } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(IRef)) {
+        auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+        while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+          Base = TempASE->getBase()->IgnoreParenImpCasts();
+        auto *DE = cast<DeclRefExpr>(Base);
+        auto *OrigVD = cast<VarDecl>(DE->getDecl());
+        auto ASELValue = EmitLValue(ASE);
+        auto OriginalBaseLValue = EmitLValue(DE);
+        auto BaseLValue = OriginalBaseLValue;
+        auto *Zero = Builder.getInt64(/*C=*/0);
+        llvm::SmallVector<llvm::Value *, 4> Indexes;
+        Indexes.push_back(Zero);
+        auto *ItemTy =
+            ASELValue.getPointer()->getType()->getPointerElementType();
+        auto *Ty = BaseLValue.getPointer()->getType()->getPointerElementType();
+        while (Ty != ItemTy) {
+          Indexes.push_back(Zero);
+          Ty = Ty->getPointerElementType();
+        }
+        BaseLValue = MakeAddrLValue(
+            Address(Builder.CreateInBoundsGEP(BaseLValue.getPointer(), Indexes),
+                    ASELValue.getAlignment()),
+            ASELValue.getType(), ASELValue.getAlignmentSource());
+        // Store the address of the original variable associated with the LHS
+        // implicit variable.
+        PrivateScope.addPrivate(LHSVD, [this, ASELValue]() -> Address {
+          return ASELValue.getAddress();
+        });
+        // Emit reduction copy.
+        bool IsRegistered = PrivateScope.addPrivate(
+            OrigVD, [this, PrivateVD, BaseLValue, ASELValue,
+                     OriginalBaseLValue]() -> Address {
+              // Emit private VarDecl with reduction init.
+              EmitDecl(*PrivateVD);
+              auto Addr = GetAddrOfLocalVar(PrivateVD);
+              auto *Offset = Builder.CreatePtrDiff(BaseLValue.getPointer(),
+                                                   ASELValue.getPointer());
+              auto *Ptr = Builder.CreateGEP(Addr.getPointer(), Offset);
+              Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+                  Ptr, OriginalBaseLValue.getPointer()->getType());
+              return Address(Ptr, OriginalBaseLValue.getAlignment());
+            });
+        assert(IsRegistered && "private var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+          return GetAddrOfLocalVar(PrivateVD);
+        });
+      } else {
+        auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+        // Store the address of the original variable associated with the LHS
+        // implicit variable.
+        PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> Address {
+          DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+                          CapturedStmtInfo->lookup(OrigVD) != nullptr,
+                          IRef->getType(), VK_LValue, IRef->getExprLoc());
+          return EmitLValue(&DRE).getAddress();
+        });
+        // Emit reduction copy.
+        bool IsRegistered =
+            PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> Address {
+              // Emit private VarDecl with reduction init.
+              EmitDecl(*PrivateVD);
+              return GetAddrOfLocalVar(PrivateVD);
+            });
+        assert(IsRegistered && "private var already registered as private");
+        // Silence the warning about unused variable.
+        (void)IsRegistered;
+        PrivateScope.addPrivate(RHSVD, [this, PrivateVD]() -> Address {
+          return GetAddrOfLocalVar(PrivateVD);
+        });
+      }
+      ++ILHS, ++IRHS, ++IPriv;
     }
   }
 }
 
 void CodeGenFunction::EmitOMPReductionClauseFinal(
     const OMPExecutableDirective &D) {
+  if (!HaveInsertPoint())
+    return;
+  llvm::SmallVector<const Expr *, 8> Privates;
   llvm::SmallVector<const Expr *, 8> LHSExprs;
   llvm::SmallVector<const Expr *, 8> RHSExprs;
   llvm::SmallVector<const Expr *, 8> ReductionOps;
   bool HasAtLeastOneReduction = false;
-  for (auto &&I = D.getClausesOfKind(OMPC_reduction); I; ++I) {
+  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
     HasAtLeastOneReduction = true;
-    auto *C = cast<OMPReductionClause>(*I);
+    Privates.append(C->privates().begin(), C->privates().end());
     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
@@ -453,8 +831,8 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
     // Emit nowait reduction if nowait clause is present or directive is a
     // parallel directive (it always has implicit barrier).
     CGM.getOpenMPRuntime().emitReduction(
-        *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
-        D.getSingleClause(OMPC_nowait) ||
+        *this, D.getLocEnd(), Privates, LHSExprs, RHSExprs, ReductionOps,
+        D.getSingleClause<OMPNowaitClause>() ||
             isOpenMPParallelDirective(D.getDirectiveKind()) ||
             D.getDirectiveKind() == OMPD_simd,
         D.getDirectiveKind() == OMPD_simd);
@@ -466,29 +844,32 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
                                            OpenMPDirectiveKind InnermostKind,
                                            const RegionCodeGenTy &CodeGen) {
   auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
-  auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS);
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
   auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
       S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
-  if (auto C = S.getSingleClause(OMPC_num_threads)) {
+  if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
-    auto NumThreadsClause = cast<OMPNumThreadsClause>(C);
     auto NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
                                          /*IgnoreResultAssign*/ true);
     CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
         CGF, NumThreads, NumThreadsClause->getLocStart());
   }
-  if (auto *C = S.getSingleClause(OMPC_proc_bind)) {
+  if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
     CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
-    auto *ProcBindClause = cast<OMPProcBindClause>(C);
     CGF.CGM.getOpenMPRuntime().emitProcBindClause(
         CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart());
   }
   const Expr *IfCond = nullptr;
-  if (auto C = S.getSingleClause(OMPC_if)) {
-    IfCond = cast<OMPIfClause>(C)->getCondition();
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_parallel) {
+      IfCond = C->getCondition();
+      break;
+    }
   }
   CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
-                                              CapturedStruct, IfCond);
+                                              CapturedVars, IfCond);
 }
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
@@ -503,17 +884,15 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
       // initialization of firstprivate variables or propagation master's thread
       // values of threadprivate variables to local instances of that variables
       // of all other implicit threads.
-      CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                                 OMPD_unknown);
+      CGF.CGM.getOpenMPRuntime().emitBarrierCall(
+          CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+          /*ForceSimpleCall=*/true);
     }
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S);
-    // Emit implicit barrier at the end of the 'parallel' directive.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                               OMPD_unknown);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
 }
@@ -526,8 +905,7 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
     EmitIgnoredExpr(I);
   }
   // Update the linear variables.
-  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
-    auto *C = cast<OMPLinearClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     for (auto U : C->updates()) {
       EmitIgnoredExpr(U);
     }
@@ -595,9 +973,10 @@ void CodeGenFunction::EmitOMPInnerLoop(
 }
 
 void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
+  if (!HaveInsertPoint())
+    return;
   // Emit inits for the linear variables.
-  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
-    auto *C = cast<OMPLinearClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     for (auto Init : C->inits()) {
       auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
       auto *OrigVD = cast<VarDecl>(
@@ -608,8 +987,7 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
                       VD->getInit()->getExprLoc());
       AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
       EmitExprAsInit(&DRE, VD,
-                     MakeAddrLValue(Emission.getAllocatedAddress(),
-                                    VD->getType(), Emission.Alignment),
+               MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
                      /*capturedByInit=*/false);
       EmitAutoVarCleanups(Emission);
     }
@@ -626,19 +1004,20 @@ void CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
 
 static void emitLinearClauseFinal(CodeGenFunction &CGF,
                                   const OMPLoopDirective &D) {
+  if (!CGF.HaveInsertPoint())
+    return;
   // Emit the final values of the linear variables.
-  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
-    auto *C = cast<OMPLinearClause>(*I);
+  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
     auto IC = C->varlist_begin();
     for (auto F : C->finals()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                       CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      auto *OrigAddr = CGF.EmitLValue(&DRE).getAddress();
+      Address OrigAddr = CGF.EmitLValue(&DRE).getAddress();
       CodeGenFunction::OMPPrivateScope VarScope(CGF);
       VarScope.addPrivate(OrigVD,
-                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
+                          [OrigAddr]() -> Address { return OrigAddr; });
       (void)VarScope.Privatize();
       CGF.EmitIgnoredExpr(F);
       ++IC;
@@ -648,8 +1027,9 @@ static void emitLinearClauseFinal(CodeGenFunction &CGF,
 
 static void emitAlignedClause(CodeGenFunction &CGF,
                               const OMPExecutableDirective &D) {
-  for (auto &&I = D.getClausesOfKind(OMPC_aligned); I; ++I) {
-    auto *Clause = cast<OMPAlignedClause>(*I);
+  if (!CGF.HaveInsertPoint())
+    return;
+  for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
     unsigned ClauseAlignment = 0;
     if (auto AlignmentExpr = Clause->getAlignment()) {
       auto AlignmentCI =
@@ -680,24 +1060,36 @@ static void emitAlignedClause(CodeGenFunction &CGF,
 
 static void emitPrivateLoopCounters(CodeGenFunction &CGF,
                                     CodeGenFunction::OMPPrivateScope &LoopScope,
-                                    ArrayRef<Expr *> Counters) {
+                                    ArrayRef<Expr *> Counters,
+                                    ArrayRef<Expr *> PrivateCounters) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  auto I = PrivateCounters.begin();
   for (auto *E : Counters) {
-    auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-    (void)LoopScope.addPrivate(VD, [&]() -> llvm::Value *{
+    auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+    auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
+    Address Addr = Address::invalid();
+    (void)LoopScope.addPrivate(PrivateVD, [&]() -> Address {
       // Emit var without initialization.
-      auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
+      auto VarEmission = CGF.EmitAutoVarAlloca(*PrivateVD);
       CGF.EmitAutoVarCleanups(VarEmission);
-      return VarEmission.getAllocatedAddress();
+      Addr = VarEmission.getAllocatedAddress();
+      return Addr;
     });
+    (void)LoopScope.addPrivate(VD, [&]() -> Address { return Addr; });
+    ++I;
   }
 }
 
 static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
                         const Expr *Cond, llvm::BasicBlock *TrueBlock,
                         llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
+  if (!CGF.HaveInsertPoint())
+    return;
   {
     CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
-    emitPrivateLoopCounters(CGF, PreCondScope, S.counters());
+    emitPrivateLoopCounters(CGF, PreCondScope, S.counters(),
+                            S.private_counters());
     (void)PreCondScope.Privatize();
     // Get initial values of real counters.
     for (auto I : S.inits()) {
@@ -711,31 +1103,45 @@ static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
 static void
 emitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D,
                       CodeGenFunction::OMPPrivateScope &PrivateScope) {
-  for (auto &&I = D.getClausesOfKind(OMPC_linear); I; ++I) {
-    auto *C = cast<OMPLinearClause>(*I);
+  if (!CGF.HaveInsertPoint())
+    return;
+  for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
+    auto CurPrivate = C->privates().begin();
     for (auto *E : C->varlists()) {
-      auto VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-      bool IsRegistered = PrivateScope.addPrivate(VD, [&]()->llvm::Value * {
-        // Emit var without initialization.
-        auto VarEmission = CGF.EmitAutoVarAlloca(*VD);
-        CGF.EmitAutoVarCleanups(VarEmission);
-        return VarEmission.getAllocatedAddress();
+      auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+      auto *PrivateVD =
+          cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
+      bool IsRegistered = PrivateScope.addPrivate(VD, [&]() -> Address {
+        // Emit private VarDecl with copy init.
+        CGF.EmitVarDecl(*PrivateVD);
+        return CGF.GetAddrOfLocalVar(PrivateVD);
       });
       assert(IsRegistered && "linear var already registered as private");
       // Silence the warning about unused variable.
       (void)IsRegistered;
+      ++CurPrivate;
     }
   }
 }
 
-static void emitSafelenClause(CodeGenFunction &CGF,
-                              const OMPExecutableDirective &D) {
-  if (auto *C =
-          cast_or_null<OMPSafelenClause>(D.getSingleClause(OMPC_safelen))) {
+static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
+                                     const OMPExecutableDirective &D) {
+  if (!CGF.HaveInsertPoint())
+    return;
+  if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
+    RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
+                                 /*ignoreResult=*/true);
+    llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
+    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
+    // In presence of finite 'safelen', it may be unsafe to mark all
+    // the memory instructions parallel, because loop-carried
+    // dependences of 'safelen' iterations are possible.
+    CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
+  } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
     RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
                                  /*ignoreResult=*/true);
     llvm::ConstantInt *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
-    CGF.LoopStack.setVectorizerWidth(Val->getZExtValue());
+    CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
     // In presence of finite 'safelen', it may be unsafe to mark all
     // the memory instructions parallel, because loop-carried
     // dependences of 'safelen' iterations are possible.
@@ -746,22 +1152,24 @@ static void emitSafelenClause(CodeGenFunction &CGF,
 void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
   // Walk clauses and process safelen/lastprivate.
   LoopStack.setParallel();
-  LoopStack.setVectorizerEnable(true);
-  emitSafelenClause(*this, D);
+  LoopStack.setVectorizeEnable(true);
+  emitSimdlenSafelenClause(*this, D);
 }
 
 void CodeGenFunction::EmitOMPSimdFinal(const OMPLoopDirective &D) {
+  if (!HaveInsertPoint())
+    return;
   auto IC = D.counters().begin();
   for (auto F : D.finals()) {
     auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>((*IC))->getDecl());
-    if (LocalDeclMap.lookup(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
+    if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(OrigVD)) {
       DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
                       CapturedStmtInfo->lookup(OrigVD) != nullptr,
                       (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
-      auto *OrigAddr = EmitLValue(&DRE).getAddress();
+      Address OrigAddr = EmitLValue(&DRE).getAddress();
       OMPPrivateScope VarScope(*this);
       VarScope.addPrivate(OrigVD,
-                          [OrigAddr]() -> llvm::Value *{ return OrigAddr; });
+                          [OrigAddr]() -> Address { return OrigAddr; });
       (void)VarScope.Privatize();
       EmitIgnoredExpr(F);
     }
@@ -817,7 +1225,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
     bool HasLastprivateClause;
     {
       OMPPrivateScope LoopScope(CGF);
-      emitPrivateLoopCounters(CGF, LoopScope, S.counters());
+      emitPrivateLoopCounters(CGF, LoopScope, S.counters(),
+                              S.private_counters());
       emitPrivateLinearVars(CGF, S, LoopScope);
       CGF.EmitOMPPrivateClause(S, LoopScope);
       CGF.EmitOMPReductionClauseInit(S, LoopScope);
@@ -849,9 +1258,9 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
 void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                                           const OMPLoopDirective &S,
                                           OMPPrivateScope &LoopScope,
-                                          bool Ordered, llvm::Value *LB,
-                                          llvm::Value *UB, llvm::Value *ST,
-                                          llvm::Value *IL, llvm::Value *Chunk) {
+                                          bool Ordered, Address LB,
+                                          Address UB, Address ST,
+                                          Address IL, llvm::Value *Chunk) {
   auto &RT = CGM.getOpenMPRuntime();
 
   // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
@@ -915,11 +1324,14 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
 
-  RT.emitForInit(
-      *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, Ordered, IL, LB,
-      (DynamicOrOrdered ? EmitAnyExpr(S.getLastIteration()).getScalarVal()
-                        : UB),
-      ST, Chunk);
+  if (DynamicOrOrdered) {
+    llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+    RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind,
+                           IVSize, IVSigned, Ordered, UBVal, Chunk);
+  } else {
+    RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
+                         IVSize, IVSigned, Ordered, IL, LB, UB, ST, Chunk);
+  }
 
   auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end");
 
@@ -1019,8 +1431,7 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
   // Detect the loop schedule kind and chunk.
   auto ScheduleKind = OMPC_SCHEDULE_unknown;
   llvm::Value *Chunk = nullptr;
-  if (auto *C =
-          cast_or_null<OMPScheduleClause>(S.getSingleClause(OMPC_schedule))) {
+  if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
     ScheduleKind = C->getScheduleKind();
     if (const auto *Ch = C->getChunkSize()) {
       if (auto *ImpRef = cast_or_null<DeclRefExpr>(C->getHelperChunkSize())) {
@@ -1029,8 +1440,8 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
           CGF.EmitVarDecl(*ImpVar);
           CGF.EmitStoreThroughLValue(
               CGF.EmitAnyExpr(Ch),
-              CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
-                                             ImpVar->getType()));
+              CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(ImpVar),
+                                 ImpVar->getType()));
         } else {
           Ch = ImpRef;
         }
@@ -1038,7 +1449,8 @@ emitScheduleClause(CodeGenFunction &CGF, const OMPLoopDirective &S,
       if (!C->getHelperChunkSize() || !OuterRegion) {
         Chunk = CGF.EmitScalarExpr(Ch);
         Chunk = CGF.EmitScalarConversion(Chunk, Ch->getType(),
-                                         S.getIterationVariable()->getType());
+                                         S.getIterationVariable()->getType(),
+                                         S.getLocStart());
       }
     }
   }
@@ -1100,13 +1512,15 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       if (EmitOMPFirstprivateClause(S, LoopScope)) {
         // Emit implicit barrier to synchronize threads and avoid data races on
         // initialization of firstprivate variables.
-        CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
-                                               OMPD_unknown);
+        CGM.getOpenMPRuntime().emitBarrierCall(
+            *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+            /*ForceSimpleCall=*/true);
       }
       EmitOMPPrivateClause(S, LoopScope);
       HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
       EmitOMPReductionClauseInit(S, LoopScope);
-      emitPrivateLoopCounters(*this, LoopScope, S.counters());
+      emitPrivateLoopCounters(*this, LoopScope, S.counters(),
+                              S.private_counters());
       emitPrivateLinearVars(*this, S, LoopScope);
       (void)LoopScope.Privatize();
 
@@ -1119,7 +1533,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
       ScheduleKind = ScheduleInfo.second;
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
-      const bool Ordered = S.getSingleClause(OMPC_ordered) != nullptr;
+      const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr;
       if (RT.isStaticNonchunked(ScheduleKind,
                                 /* Chunked */ Chunk != nullptr) &&
           !Ordered) {
@@ -1131,9 +1545,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
         // chunks that are approximately equal in size, and at most one chunk is
         // distributed to each thread. Note that the size of the chunks is
         // unspecified in this case.
-        RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
-                       Ordered, IL.getAddress(), LB.getAddress(),
-                       UB.getAddress(), ST.getAddress());
+        RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind,
+                             IVSize, IVSigned, Ordered,
+                             IL.getAddress(), LB.getAddress(),
+                             UB.getAddress(), ST.getAddress());
         auto LoopExit = getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
         // UB = min(UB, GlobalUB);
         EmitIgnoredExpr(S.getEnsureUpperBound());
@@ -1181,10 +1596,11 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
   auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) {
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
+                                              S.hasCancel());
 
   // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
+  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
   }
 }
@@ -1198,7 +1614,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
 
   // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait) || HasLastprivates) {
+  if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_for);
   }
 }
@@ -1206,7 +1622,7 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
 static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
                                 const Twine &Name,
                                 llvm::Value *Init = nullptr) {
-  auto LVal = CGF.MakeNaturalAlignAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
+  auto LVal = CGF.MakeAddrLValue(CGF.CreateMemTemp(Ty, Name), Ty);
   if (Init)
     CGF.EmitScalarInit(Init, LVal);
   return LVal;
@@ -1276,8 +1692,9 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
       if (CGF.EmitOMPFirstprivateClause(S, LoopScope)) {
         // Emit implicit barrier to synchronize threads and avoid data races on
         // initialization of firstprivate variables.
-        CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                                   OMPD_unknown);
+        CGF.CGM.getOpenMPRuntime().emitBarrierCall(
+            CGF, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+            /*ForceSimpleCall=*/true);
       }
       CGF.EmitOMPPrivateClause(S, LoopScope);
       HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
@@ -1285,7 +1702,7 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
       (void)LoopScope.Privatize();
 
       // Emit static non-chunked loop.
-      CGF.CGM.getOpenMPRuntime().emitForInit(
+      CGF.CGM.getOpenMPRuntime().emitForStaticInit(
           CGF, S.getLocStart(), OMPC_SCHEDULE_static, /*IVSize=*/32,
           /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
           LB.getAddress(), UB.getAddress(), ST.getAddress());
@@ -1310,11 +1727,17 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
                    CGF.EmitLoadOfScalar(IL, S.getLocStart())));
     };
 
-    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen);
+    bool HasCancel = false;
+    if (auto *OSD = dyn_cast<OMPSectionsDirective>(&S))
+      HasCancel = OSD->hasCancel();
+    else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
+      HasCancel = OPSD->hasCancel();
+    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
+                                                HasCancel);
     // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
     // clause. Otherwise the barrier will be generated by the codegen for the
     // directive.
-    if (HasLastprivates && S.getSingleClause(OMPC_nowait)) {
+    if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
       // Emit implicit barrier to synchronize threads and avoid data races on
       // initialization of firstprivate variables.
       CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
@@ -1327,11 +1750,11 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   bool HasFirstprivates;
   // No need to generate reductions for sections with single section region, we
   // can use original shared variables for all operations.
-  bool HasReductions = !S.getClausesOfKind(OMPC_reduction).empty();
+  bool HasReductions = S.hasClausesOfKind<OMPReductionClause>();
   // No need to generate lastprivates for sections with single section region,
   // we can use original shared variable for all calculations with barrier at
   // the end of the sections.
-  bool HasLastprivates = !S.getClausesOfKind(OMPC_lastprivate).empty();
+  bool HasLastprivates = S.hasClausesOfKind<OMPLastprivateClause>();
   auto &&CodeGen = [Stmt, &S, &HasFirstprivates](CodeGenFunction &CGF) {
     CodeGenFunction::OMPPrivateScope SingleScope(CGF);
     HasFirstprivates = CGF.EmitOMPFirstprivateClause(S, SingleScope);
@@ -1347,10 +1770,12 @@ CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   // 'sections' directive has 'nowait' clause. Otherwise the barrier will be
   // generated by the codegen for the directive.
   if ((HasFirstprivates || HasLastprivates || HasReductions) &&
-      S.getSingleClause(OMPC_nowait)) {
+      S.getSingleClause<OMPNowaitClause>()) {
     // Emit implicit barrier to synchronize threads and avoid data races on
     // initialization of firstprivate variables.
-    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown);
+    CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), OMPD_unknown,
+                                           /*EmitChecks=*/false,
+                                           /*ForceSimpleCall=*/true);
   }
   return OMPD_single;
 }
@@ -1359,7 +1784,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
   OpenMPDirectiveKind EmittedAs = EmitSections(S);
   // Emit an implicit barrier at the end.
-  if (!S.getSingleClause(OMPC_nowait)) {
+  if (!S.getSingleClause<OMPNowaitClause>()) {
     CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), EmittedAs);
   }
 }
@@ -1368,9 +1793,9 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
   };
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen);
+  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen,
+                                              S.hasCancel());
 }
 
 void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
@@ -1383,8 +1808,7 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
   // construct.
   // Build a list of copyprivate variables along with helper expressions
   // (<source>, <destination>, <destination>=<source> expressions)
-  for (auto &&I = S.getClausesOfKind(OMPC_copyprivate); I; ++I) {
-    auto *C = cast<OMPCopyprivateClause>(*I);
+  for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
     CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
     DestExprs.append(C->destination_exprs().begin(),
                      C->destination_exprs().end());
@@ -1402,18 +1826,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
     (void)SingleScope.Privatize();
 
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
   };
   CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(),
                                           CopyprivateVars, DestExprs, SrcExprs,
                                           AssignmentOps);
   // Emit an implicit barrier at the end (to avoid data race on firstprivate
   // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
-  if ((!S.getSingleClause(OMPC_nowait) || HasFirstprivates) &&
+  if ((!S.getSingleClause<OMPNowaitClause>() || HasFirstprivates) &&
       CopyprivateVars.empty()) {
     CGM.getOpenMPRuntime().emitBarrierCall(
         *this, S.getLocStart(),
-        S.getSingleClause(OMPC_nowait) ? OMPD_unknown : OMPD_single);
+        S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
   }
 }
 
@@ -1421,7 +1844,6 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
   };
   CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart());
 }
@@ -1430,10 +1852,13 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
   };
-  CGM.getOpenMPRuntime().emitCriticalRegion(
-      *this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart());
+  Expr *Hint = nullptr;
+  if (auto *HintClause = S.getSingleClause<OMPHintClause>())
+    Hint = HintClause->getHint();
+  CGM.getOpenMPRuntime().emitCriticalRegion(*this,
+                                            S.getDirectiveName().getAsString(),
+                                            CodeGen, S.getLocStart(), Hint);
 }
 
 void CodeGenFunction::EmitOMPParallelForDirective(
@@ -1444,11 +1869,6 @@ void CodeGenFunction::EmitOMPParallelForDirective(
   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitOMPWorksharingLoop(S);
-    // Emit implicit barrier at the end of parallel region, but this barrier
-    // is at the end of 'for' directive, so emit it as the implicit barrier for
-    // this 'for' directive.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                               OMPD_parallel);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
 }
@@ -1461,11 +1881,6 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
   (void)emitScheduleClause(*this, S, /*OuterRegion=*/true);
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitOMPWorksharingLoop(S);
-    // Emit implicit barrier at the end of parallel region, but this barrier
-    // is at the end of 'for' directive, so emit it as the implicit barrier for
-    // this 'for' directive.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                               OMPD_parallel);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
 }
@@ -1477,9 +1892,6 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     (void)CGF.EmitSections(S);
-    // Emit implicit barrier at the end of parallel region.
-    CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
-                                               OMPD_parallel);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
 }
@@ -1497,8 +1909,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   // Get list of private variables.
   llvm::SmallVector<const Expr *, 8> PrivateVars;
   llvm::SmallVector<const Expr *, 8> PrivateCopies;
-  for (auto &&I = S.getClausesOfKind(OMPC_private); I; ++I) {
-    auto *C = cast<OMPPrivateClause>(*I);
+  for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
     auto IRef = C->varlist_begin();
     for (auto *IInit : C->private_copies()) {
       auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
@@ -1514,8 +1925,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   llvm::SmallVector<const Expr *, 8> FirstprivateVars;
   llvm::SmallVector<const Expr *, 8> FirstprivateCopies;
   llvm::SmallVector<const Expr *, 8> FirstprivateInits;
-  for (auto &&I = S.getClausesOfKind(OMPC_firstprivate); I; ++I) {
-    auto *C = cast<OMPFirstprivateClause>(*I);
+  for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
     auto IRef = C->varlist_begin();
     auto IElemInitRef = C->inits().begin();
     for (auto *IInit : C->private_copies()) {
@@ -1531,8 +1941,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   // Build list of dependences.
   llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8>
       Dependences;
-  for (auto &&I = S.getClausesOfKind(OMPC_depend); I; ++I) {
-    auto *C = cast<OMPDependClause>(*I);
+  for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
     for (auto *IRef : C->varlists()) {
       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
     }
@@ -1543,35 +1952,33 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
     OMPPrivateScope Scope(CGF);
     if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
-      auto *CopyFn = CGF.Builder.CreateAlignedLoad(
-          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)),
-          CGF.PointerAlignInBytes);
-      auto *PrivatesPtr = CGF.Builder.CreateAlignedLoad(
-          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)),
-          CGF.PointerAlignInBytes);
+      auto *CopyFn = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
+      auto *PrivatesPtr = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
       // Map privates.
-      llvm::SmallVector<std::pair<const VarDecl *, llvm::Value *>, 16>
+      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16>
           PrivatePtrs;
       llvm::SmallVector<llvm::Value *, 16> CallArgs;
       CallArgs.push_back(PrivatesPtr);
       for (auto *E : PrivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-        auto *PrivatePtr =
+        Address PrivatePtr =
             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
-        CallArgs.push_back(PrivatePtr);
+        CallArgs.push_back(PrivatePtr.getPointer());
       }
       for (auto *E : FirstprivateVars) {
         auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-        auto *PrivatePtr =
+        Address PrivatePtr =
             CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
         PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
-        CallArgs.push_back(PrivatePtr);
+        CallArgs.push_back(PrivatePtr.getPointer());
       }
       CGF.EmitRuntimeCall(CopyFn, CallArgs);
       for (auto &&Pair : PrivatePtrs) {
-        auto *Replacement =
-            CGF.Builder.CreateAlignedLoad(Pair.second, CGF.PointerAlignInBytes);
+        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
+                            CGF.getContext().getDeclAlign(Pair.first));
         Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
       }
     }
@@ -1584,13 +1991,13 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
       S, *I, OMPD_task, CodeGen);
   // Check if we should emit tied or untied task.
-  bool Tied = !S.getSingleClause(OMPC_untied);
+  bool Tied = !S.getSingleClause<OMPUntiedClause>();
   // Check if the task is final
   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
-  if (auto *Clause = S.getSingleClause(OMPC_final)) {
+  if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
     // If the condition constant folds and can be elided, try to avoid emitting
     // the condition and the dead arm of the if/else.
-    auto *Cond = cast<OMPFinalClause>(Clause)->getCondition();
+    auto *Cond = Clause->getCondition();
     bool CondConstant;
     if (ConstantFoldsToSimpleInteger(Cond, CondConstant))
       Final.setInt(CondConstant);
@@ -1602,8 +2009,12 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
   }
   auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
   const Expr *IfCond = nullptr;
-  if (auto C = S.getSingleClause(OMPC_if)) {
-    IfCond = cast<OMPIfClause>(C)->getCondition();
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_task) {
+      IfCond = C->getCondition();
+      break;
+    }
   }
   CGM.getOpenMPRuntime().emitTaskCall(
       *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
@@ -1629,15 +2040,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
   LexicalScope Scope(*this, S.getSourceRange());
   auto &&CodeGen = [&S](CodeGenFunction &CGF) {
     CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
   };
   CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   CGM.getOpenMPRuntime().emitFlush(*this, [&]() -> ArrayRef<const Expr *> {
-    if (auto C = S.getSingleClause(/*K*/ OMPC_flush)) {
-      auto FlushClause = cast<OMPFlushClause>(C);
+    if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) {
       return llvm::makeArrayRef(FlushClause->varlist_begin(),
                                 FlushClause->varlist_end());
     }
@@ -1645,37 +2054,65 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
   }(), S.getLocStart());
 }
 
+void CodeGenFunction::EmitOMPDistributeDirective(
+    const OMPDistributeDirective &S) {
+  llvm_unreachable("CodeGen for 'omp distribute' is not supported yet.");
+}
+
+static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
+                                                   const CapturedStmt *S) {
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
+  CGF.CapturedStmtInfo = &CapStmtInfo;
+  auto *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
+  Fn->addFnAttr(llvm::Attribute::NoInline);
+  return Fn;
+}
+
 void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
+  if (!S.getAssociatedStmt())
+    return;
   LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S](CodeGenFunction &CGF) {
-    CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
-    CGF.EnsureInsertPoint();
+  auto *C = S.getSingleClause<OMPSIMDClause>();
+  auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) {
+    if (C) {
+      auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+      llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+      CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
+      auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
+      CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars);
+    } else {
+      CGF.EmitStmt(
+          cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+    }
   };
-  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart());
+  CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C);
 }
 
 static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
-                                         QualType SrcType, QualType DestType) {
+                                         QualType SrcType, QualType DestType,
+                                         SourceLocation Loc) {
   assert(CGF.hasScalarEvaluationKind(DestType) &&
          "DestType must have scalar evaluation kind.");
   assert(!Val.isAggregate() && "Must be a scalar or complex.");
   return Val.isScalar()
-             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
+             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType,
+                                        Loc)
              : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
-                                                 DestType);
+                                                 DestType, Loc);
 }
 
 static CodeGenFunction::ComplexPairTy
 convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
-                      QualType DestType) {
+                      QualType DestType, SourceLocation Loc) {
   assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
          "DestType must have complex evaluation kind.");
   CodeGenFunction::ComplexPairTy ComplexVal;
   if (Val.isScalar()) {
     // Convert the input element to the element type of the complex.
     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
-    auto ScalarVal =
-        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
+    auto ScalarVal = CGF.EmitScalarConversion(Val.getScalarVal(), SrcType,
+                                              DestElementType, Loc);
     ComplexVal = CodeGenFunction::ComplexPairTy(
         ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
   } else {
@@ -1683,9 +2120,9 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
     auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
     auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
     ComplexVal.first = CGF.EmitScalarConversion(
-        Val.getComplexVal().first, SrcElementType, DestElementType);
+        Val.getComplexVal().first, SrcElementType, DestElementType, Loc);
     ComplexVal.second = CGF.EmitScalarConversion(
-        Val.getComplexVal().second, SrcElementType, DestElementType);
+        Val.getComplexVal().second, SrcElementType, DestElementType, Loc);
   }
   return ComplexVal;
 }
@@ -1702,16 +2139,16 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
 }
 
 static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal,
-                            QualType RValTy) {
+                            QualType RValTy, SourceLocation Loc) {
   switch (CGF.getEvaluationKind(LVal.getType())) {
   case TEK_Scalar:
-    CGF.EmitStoreThroughLValue(
-        RValue::get(convertToScalarValue(CGF, RVal, RValTy, LVal.getType())),
-        LVal);
+    CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue(
+                                   CGF, RVal, RValTy, LVal.getType(), Loc)),
+                               LVal);
     break;
   case TEK_Complex:
     CGF.EmitStoreOfComplex(
-        convertToComplexValue(CGF, RVal, RValTy, LVal.getType()), LVal,
+        convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal,
         /*isInit=*/false);
     break;
   case TEK_Aggregate:
@@ -1739,7 +2176,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
   // list.
   if (IsSeqCst)
     CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
-  emitSimpleStore(CGF,VLValue, Res, X->getType().getNonReferenceType());
+  emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc);
 }
 
 static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
@@ -1769,8 +2206,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
       !Update.getScalarVal()->getType()->isIntegerTy() ||
       !X.isSimple() || (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
                         (Update.getScalarVal()->getType() !=
-                         X.getAddress()->getType()->getPointerElementType())) ||
-      !X.getAddress()->getType()->getPointerElementType()->isIntegerTy() ||
+                         X.getAddress().getElementType())) ||
+      !X.getAddress().getElementType()->isIntegerTy() ||
       !Context.getTargetInfo().hasBuiltinAtomic(
           Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
     return std::make_pair(false, RValue::get(nullptr));
@@ -1841,10 +2278,10 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
   auto *UpdateVal = Update.getScalarVal();
   if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
     UpdateVal = CGF.Builder.CreateIntCast(
-        IC, X.getAddress()->getType()->getPointerElementType(),
+        IC, X.getAddress().getElementType(),
         X.getType()->hasSignedIntegerRepresentation());
   }
-  auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+  auto *Res = CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(), UpdateVal, AO);
   return std::make_pair(true, RValue::get(Res));
 }
 
@@ -1910,12 +2347,14 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
 }
 
 static RValue convertToType(CodeGenFunction &CGF, RValue Value,
-                            QualType SourceType, QualType ResType) {
+                            QualType SourceType, QualType ResType,
+                            SourceLocation Loc) {
   switch (CGF.getEvaluationKind(ResType)) {
   case TEK_Scalar:
-    return RValue::get(convertToScalarValue(CGF, Value, SourceType, ResType));
+    return RValue::get(
+        convertToScalarValue(CGF, Value, SourceType, ResType, Loc));
   case TEK_Complex: {
-    auto Res = convertToComplexValue(CGF, Value, SourceType, ResType);
+    auto Res = convertToComplexValue(CGF, Value, SourceType, ResType, Loc);
     return RValue::getComplex(Res.first, Res.second);
   }
   case TEK_Aggregate:
@@ -1980,7 +2419,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
     // 'x' is simply rewritten with some 'expr'.
     NewVValType = X->getType().getNonReferenceType();
     ExprRValue = convertToType(CGF, ExprRValue, E->getType(),
-                               X->getType().getNonReferenceType());
+                               X->getType().getNonReferenceType(), Loc);
     auto &&Gen = [&CGF, &NewVVal, ExprRValue](RValue XRValue) -> RValue {
       NewVVal = XRValue;
       return ExprRValue;
@@ -1995,7 +2434,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
     }
   }
   // Emit post-update store to 'v' of old/new 'x' value.
-  emitSimpleStore(CGF, VLValue, NewVVal, NewVValType);
+  emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc);
   // OpenMP, 2.12.6, atomic Construct
   // Any atomic construct with a seq_cst clause forces the atomically
   // performed operation to include an implicit flush operation without a
@@ -2032,6 +2471,7 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_lastprivate:
   case OMPC_reduction:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_default:
   case OMPC_seq_cst:
@@ -2049,12 +2489,23 @@ static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_threadprivate:
   case OMPC_depend:
   case OMPC_mergeable:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
     llvm_unreachable("Clause is not allowed in 'omp atomic'.");
   }
 }
 
 void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
-  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
   OpenMPClauseKind Kind = OMPC_unknown;
   for (auto *C : S.clauses()) {
     // Find first clause (skip seq_cst clause, if it is first).
@@ -2079,7 +2530,8 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
   }
 
   LexicalScope Scope(*this, S.getSourceRange());
-  auto &&CodeGen = [&S, Kind, IsSeqCst](CodeGenFunction &CGF) {
+  auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) {
+    CGF.EmitStopPoint(CS);
     EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
                       S.getV(), S.getExpr(), S.getUpdateExpr(),
                       S.isXLHSInRHSPart(), S.getLocStart());
@@ -2087,8 +2539,37 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
 }
 
-void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
-  llvm_unreachable("CodeGen for 'omp target' is not supported yet.");
+void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
+  LexicalScope Scope(*this, S.getSourceRange());
+  const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt());
+
+  llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+  GenerateOpenMPCapturedVars(CS, CapturedVars);
+
+  // Emit target region as a standalone region.
+  auto &&CodeGen = [&CS](CodeGenFunction &CGF) {
+    CGF.EmitStmt(CS.getCapturedStmt());
+  };
+
+  // Obtain the target region outlined function.
+  llvm::Value *Fn =
+      CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, CodeGen);
+
+  // Check if we have any if clause associated with the directive.
+  const Expr *IfCond = nullptr;
+
+  if (auto *C = S.getSingleClause<OMPIfClause>()) {
+    IfCond = C->getCondition();
+  }
+
+  // Check if we have any device clause associated with the directive.
+  const Expr *Device = nullptr;
+  if (auto *C = S.getSingleClause<OMPDeviceClause>()) {
+    Device = C->getDevice();
+  }
+
+  CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, IfCond, Device,
+                                        CapturedVars);
 }
 
 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) {
@@ -2102,7 +2583,15 @@ void CodeGenFunction::EmitOMPCancellationPointDirective(
 }
 
 void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
-  CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(),
+  const Expr *IfCond = nullptr;
+  for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+    if (C->getNameModifier() == OMPD_unknown ||
+        C->getNameModifier() == OMPD_cancel) {
+      IfCond = C->getCondition();
+      break;
+    }
+  }
+  CGM.getOpenMPRuntime().emitCancelCall(*this, S.getLocStart(), IfCond,
                                         S.getCancelRegion());
 }
 
@@ -2110,8 +2599,35 @@ CodeGenFunction::JumpDest
 CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
   if (Kind == OMPD_parallel || Kind == OMPD_task)
     return ReturnBlock;
-  else if (Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections)
-    return BreakContinueStack.empty() ? JumpDest()
-                                      : BreakContinueStack.back().BreakBlock;
-  return JumpDest();
+  assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
+         Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for);
+  return BreakContinueStack.back().BreakBlock;
 }
+
+// Generate the instructions for '#pragma omp target data' directive.
+void CodeGenFunction::EmitOMPTargetDataDirective(
+    const OMPTargetDataDirective &S) {
+  // emit the code inside the construct for now
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_target_data,
+      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
+  // emit the code inside the construct for now
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_taskloop,
+      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+}
+
+void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
+    const OMPTaskLoopSimdDirective &S) {
+  // emit the code inside the construct for now
+  auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+  CGM.getOpenMPRuntime().emitInlinedDirective(
+      *this, OMPD_taskloop_simd,
+      [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); });
+}
+
diff --git a/lib/CodeGen/CGVTT.cpp b/lib/CodeGen/CGVTT.cpp
index e3df5a4c05e3..4fb76710d2ad 100644
--- a/lib/CodeGen/CGVTT.cpp
+++ b/lib/CodeGen/CGVTT.cpp
@@ -108,7 +108,6 @@ llvm::GlobalVariable *CodeGenVTables::GetAddrOfVTT(const CXXRecordDecl *RD) {
   llvm::raw_svector_ostream Out(OutName);
   cast<ItaniumMangleContext>(CGM.getCXXABI().getMangleContext())
       .mangleCXXVTT(RD, Out);
-  Out.flush();
   StringRef Name = OutName.str();
 
   // This will also defer the definition of the VTT.
diff --git a/lib/CodeGen/CGVTables.cpp b/lib/CodeGen/CGVTables.cpp
index 1b7f1d76042d..c8f3add67762 100644
--- a/lib/CodeGen/CGVTables.cpp
+++ b/lib/CodeGen/CGVTables.cpp
@@ -44,7 +44,6 @@ llvm::Constant *CodeGenModule::GetAddrOfThunk(GlobalDecl GD,
                                                       Thunk.This, Out);
   else
     getCXXABI().getMangleContext().mangleThunk(MD, Thunk, Out);
-  Out.flush();
 
   llvm::Type *Ty = getTypes().GetFunctionTypeForVTable(GD);
   return GetOrCreateLLVMFunction(Name, Ty, GD, /*ForVTable=*/true,
@@ -103,8 +102,11 @@ static RValue PerformReturnAdjustment(CodeGenFunction &CGF,
     CGF.EmitBlock(AdjustNotNull);
   }
 
-  ReturnValue = CGF.CGM.getCXXABI().performReturnAdjustment(CGF, ReturnValue,
-                                                            Thunk.Return);
+  auto ClassDecl = ResultType->getPointeeType()->getAsCXXRecordDecl();
+  auto ClassAlign = CGF.CGM.getClassPointerAlignment(ClassDecl);
+  ReturnValue = CGF.CGM.getCXXABI().performReturnAdjustment(CGF,
+                                            Address(ReturnValue, ClassAlign),
+                                            Thunk.Return);
 
   if (NullCheckValue) {
     CGF.Builder.CreateBr(AdjustEnd);
@@ -172,27 +174,29 @@ CodeGenFunction::GenerateVarArgsThunk(llvm::Function *Fn,
 
   // Find the first store of "this", which will be to the alloca associated
   // with "this".
-  llvm::Value *ThisPtr = &*AI;
-  llvm::BasicBlock *EntryBB = Fn->begin();
-  llvm::Instruction *ThisStore =
+  Address ThisPtr(&*AI, CGM.getClassPointerAlignment(MD->getParent()));
+  llvm::BasicBlock *EntryBB = &Fn->front();
+  llvm::BasicBlock::iterator ThisStore =
       std::find_if(EntryBB->begin(), EntryBB->end(), [&](llvm::Instruction &I) {
-    return isa<llvm::StoreInst>(I) && I.getOperand(0) == ThisPtr;
-  });
-  assert(ThisStore && "Store of this should be in entry block?");
+        return isa<llvm::StoreInst>(I) &&
+               I.getOperand(0) == ThisPtr.getPointer();
+      });
+  assert(ThisStore != EntryBB->end() &&
+         "Store of this should be in entry block?");
   // Adjust "this", if necessary.
-  Builder.SetInsertPoint(ThisStore);
+  Builder.SetInsertPoint(&*ThisStore);
   llvm::Value *AdjustedThisPtr =
       CGM.getCXXABI().performThisAdjustment(*this, ThisPtr, Thunk.This);
   ThisStore->setOperand(0, AdjustedThisPtr);
 
   if (!Thunk.Return.isEmpty()) {
     // Fix up the returned value, if necessary.
-    for (llvm::Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++) {
-      llvm::Instruction *T = I->getTerminator();
+    for (llvm::BasicBlock &BB : *Fn) {
+      llvm::Instruction *T = BB.getTerminator();
       if (isa<llvm::ReturnInst>(T)) {
         RValue RV = RValue::get(T->getOperand(0));
         T->eraseFromParent();
-        Builder.SetInsertPoint(&*I);
+        Builder.SetInsertPoint(&BB);
         RV = PerformReturnAdjustment(*this, ResultType, RV, Thunk);
         Builder.CreateRet(RV.getScalarVal());
         break;
@@ -236,6 +240,17 @@ void CodeGenFunction::StartThunk(llvm::Function *Fn, GlobalDecl GD,
   // Since we didn't pass a GlobalDecl to StartFunction, do this ourselves.
   CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
   CXXThisValue = CXXABIThisValue;
+  CurCodeDecl = MD;
+  CurFuncDecl = MD;
+}
+
+void CodeGenFunction::FinishThunk() {
+  // Clear these to restore the invariants expected by
+  // StartFunction/FinishFunction.
+  CurCodeDecl = nullptr;
+  CurFuncDecl = nullptr;
+
+  FinishFunction();
 }
 
 void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
@@ -245,9 +260,10 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
   const CXXMethodDecl *MD = cast<CXXMethodDecl>(CurGD.getDecl());
 
   // Adjust the 'this' pointer if necessary
-  llvm::Value *AdjustedThisPtr = Thunk ? CGM.getCXXABI().performThisAdjustment(
-                                             *this, LoadCXXThis(), Thunk->This)
-                                       : LoadCXXThis();
+  llvm::Value *AdjustedThisPtr =
+    Thunk ? CGM.getCXXABI().performThisAdjustment(
+                          *this, LoadCXXThisAddress(), Thunk->This)
+          : LoadCXXThis();
 
   if (CurFnInfo->usesInAlloca()) {
     // We don't handle return adjusting thunks, because they require us to call
@@ -312,6 +328,8 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
   // Consider return adjustment if we have ThunkInfo.
   if (Thunk && !Thunk->Return.isEmpty())
     RV = PerformReturnAdjustment(*this, ResultType, RV, *Thunk);
+  else if (llvm::CallInst* Call = dyn_cast<llvm::CallInst>(CallOrInvoke))
+    Call->setTailCallKind(llvm::CallInst::TCK_Tail);
 
   // Emit return.
   if (!ResultType->isVoidType() && Slot.isNull())
@@ -320,7 +338,7 @@ void CodeGenFunction::EmitCallAndReturnForThunk(llvm::Value *Callee,
   // Disable the final ARC autorelease.
   AutoreleaseResult = false;
 
-  FinishFunction();
+  FinishThunk();
 }
 
 void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
@@ -345,9 +363,8 @@ void CodeGenFunction::EmitMustTailThunk(const CXXMethodDecl *MD,
     Args[ThisArgNo] = AdjustedThisPtr;
   } else {
     assert(ThisAI.isInAlloca() && "this is passed directly or inalloca");
-    llvm::Value *ThisAddr = GetAddrOfLocalVar(CXXABIThisDecl);
-    llvm::Type *ThisType =
-        cast<llvm::PointerType>(ThisAddr->getType())->getElementType();
+    Address ThisAddr = GetAddrOfLocalVar(CXXABIThisDecl);
+    llvm::Type *ThisType = ThisAddr.getElementType();
     if (ThisType != AdjustedThisPtr->getType())
       AdjustedThisPtr = Builder.CreateBitCast(AdjustedThisPtr, ThisType);
     Builder.CreateStore(AdjustedThisPtr, ThisAddr);
@@ -502,8 +519,8 @@ void CodeGenVTables::EmitThunks(GlobalDecl GD)
   if (!ThunkInfoVector)
     return;
 
-  for (unsigned I = 0, E = ThunkInfoVector->size(); I != E; ++I)
-    emitThunk(GD, (*ThunkInfoVector)[I], /*ForVTable=*/false);
+  for (const ThunkInfo& Thunk : *ThunkInfoVector)
+    emitThunk(GD, Thunk, /*ForVTable=*/false);
 }
 
 llvm::Constant *CodeGenVTables::CreateVTableInitializer(
@@ -565,6 +582,24 @@ llvm::Constant *CodeGenVTables::CreateVTableInitializer(
         break;
       }
 
+      if (CGM.getLangOpts().CUDA) {
+        // Emit NULL for methods we can't codegen on this
+        // side. Otherwise we'd end up with vtable with unresolved
+        // references.
+        const CXXMethodDecl *MD = cast<CXXMethodDecl>(GD.getDecl());
+        // OK on device side: functions w/ __device__ attribute
+        // OK on host side: anything except __device__-only functions.
+        bool CanEmitMethod = CGM.getLangOpts().CUDAIsDevice
+                                 ? MD->hasAttr<CUDADeviceAttr>()
+                                 : (MD->hasAttr<CUDAHostAttr>() ||
+                                    !MD->hasAttr<CUDADeviceAttr>());
+        if (!CanEmitMethod) {
+          Init = llvm::ConstantExpr::getNullValue(Int8PtrTy);
+          break;
+        }
+        // Method is acceptable, continue processing as usual.
+      }
+
       if (cast<CXXMethodDecl>(GD.getDecl())->isPure()) {
         // We have a pure virtual member function.
         if (!PureVirtualFn) {
@@ -642,7 +677,6 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   cast<ItaniumMangleContext>(CGM.getCXXABI().getMangleContext())
       .mangleCXXCtorVTable(RD, Base.getBaseOffset().getQuantity(),
                            Base.getBase(), Out);
-  Out.flush();
   StringRef Name = OutName.str();
 
   llvm::ArrayType *ArrayType = 
@@ -679,6 +713,12 @@ CodeGenVTables::GenerateConstructionVTable(const CXXRecordDecl *RD,
   return VTable;
 }
 
+static bool shouldEmitAvailableExternallyVTable(const CodeGenModule &CGM,
+                                                const CXXRecordDecl *RD) {
+  return CGM.getCodeGenOpts().OptimizationLevel > 0 &&
+         CGM.getCXXABI().canSpeculativelyEmitVTable(RD);
+}
+
 /// Compute the required linkage of the v-table for the given class.
 ///
 /// Note that we only call this at the end of the translation unit.
@@ -700,7 +740,12 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
     switch (keyFunction->getTemplateSpecializationKind()) {
       case TSK_Undeclared:
       case TSK_ExplicitSpecialization:
-        assert(def && "Should not have been asked to emit this");
+        assert((def || CodeGenOpts.OptimizationLevel > 0) &&
+               "Shouldn't query vtable linkage without key function or "
+               "optimizations");
+        if (!def && CodeGenOpts.OptimizationLevel > 0)
+          return llvm::GlobalVariable::AvailableExternallyLinkage;
+
         if (keyFunction->isInlined())
           return !Context.getLangOpts().AppleKext ?
                    llvm::GlobalVariable::LinkOnceODRLinkage :
@@ -742,16 +787,18 @@ CodeGenModule::getVTableLinkage(const CXXRecordDecl *RD) {
   }
 
   switch (RD->getTemplateSpecializationKind()) {
-  case TSK_Undeclared:
-  case TSK_ExplicitSpecialization:
-  case TSK_ImplicitInstantiation:
-    return DiscardableODRLinkage;
+    case TSK_Undeclared:
+    case TSK_ExplicitSpecialization:
+    case TSK_ImplicitInstantiation:
+      return DiscardableODRLinkage;
 
-  case TSK_ExplicitInstantiationDeclaration:
-    return llvm::GlobalVariable::ExternalLinkage;
+    case TSK_ExplicitInstantiationDeclaration:
+      return shouldEmitAvailableExternallyVTable(*this, RD)
+                 ? llvm::GlobalVariable::AvailableExternallyLinkage
+                 : llvm::GlobalVariable::ExternalLinkage;
 
-  case TSK_ExplicitInstantiationDefinition:
-    return NonDiscardableODRLinkage;
+    case TSK_ExplicitInstantiationDefinition:
+      return NonDiscardableODRLinkage;
   }
 
   llvm_unreachable("Invalid TemplateSpecializationKind!");
@@ -819,7 +866,12 @@ bool CodeGenVTables::isVTableExternal(const CXXRecordDecl *RD) {
 /// we define that v-table?
 static bool shouldEmitVTableAtEndOfTranslationUnit(CodeGenModule &CGM,
                                                    const CXXRecordDecl *RD) {
-  return !CGM.getVTables().isVTableExternal(RD);
+  // If vtable is internal then it has to be done.
+  if (!CGM.getVTables().isVTableExternal(RD))
+    return true;
+
+  // If it's external then maybe we will need it as available_externally.
+  return shouldEmitAvailableExternallyVTable(CGM, RD);
 }
 
 /// Given that at some point we emitted a reference to one or more
@@ -832,13 +884,9 @@ void CodeGenModule::EmitDeferredVTables() {
   size_t savedSize = DeferredVTables.size();
 #endif
 
-  typedef std::vector<const CXXRecordDecl *>::const_iterator const_iterator;
-  for (const_iterator i = DeferredVTables.begin(),
-                      e = DeferredVTables.end(); i != e; ++i) {
-    const CXXRecordDecl *RD = *i;
+  for (const CXXRecordDecl *RD : DeferredVTables)
     if (shouldEmitVTableAtEndOfTranslationUnit(*this, RD))
       VTables.GenerateClassData(RD);
-  }
 
   assert(savedSize == DeferredVTables.size() &&
          "deferred extra v-tables during v-table emission?");
@@ -846,8 +894,12 @@ void CodeGenModule::EmitDeferredVTables() {
 }
 
 bool CodeGenModule::IsCFIBlacklistedRecord(const CXXRecordDecl *RD) {
-  // FIXME: Make this user configurable.
-  return RD->isInStdNamespace();
+  if (RD->hasAttr<UuidAttr>() &&
+      getContext().getSanitizerBlacklist().isBlacklistedType("attr:uuid"))
+    return true;
+
+  return getContext().getSanitizerBlacklist().isBlacklistedType(
+      RD->getQualifiedNameAsString());
 }
 
 void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
@@ -861,41 +913,46 @@ void CodeGenModule::EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
   CharUnits PointerWidth =
       Context.toCharUnitsFromBits(Context.getTargetInfo().getPointerWidth(0));
 
-  std::vector<llvm::MDTuple *> BitsetEntries;
+  typedef std::pair<const CXXRecordDecl *, unsigned> BSEntry;
+  std::vector<BSEntry> BitsetEntries;
   // Create a bit set entry for each address point.
   for (auto &&AP : VTLayout.getAddressPoints()) {
     if (IsCFIBlacklistedRecord(AP.first.getBase()))
       continue;
 
-    BitsetEntries.push_back(CreateVTableBitSetEntry(
-        VTable, PointerWidth * AP.second, AP.first.getBase()));
+    BitsetEntries.push_back(std::make_pair(AP.first.getBase(), AP.second));
   }
 
   // Sort the bit set entries for determinism.
-  std::sort(BitsetEntries.begin(), BitsetEntries.end(), [](llvm::MDTuple *T1,
-                                                           llvm::MDTuple *T2) {
-    if (T1 == T2)
+  std::sort(BitsetEntries.begin(), BitsetEntries.end(),
+            [this](const BSEntry &E1, const BSEntry &E2) {
+    if (&E1 == &E2)
       return false;
 
-    StringRef S1 = cast<llvm::MDString>(T1->getOperand(0))->getString();
-    StringRef S2 = cast<llvm::MDString>(T2->getOperand(0))->getString();
+    std::string S1;
+    llvm::raw_string_ostream O1(S1);
+    getCXXABI().getMangleContext().mangleTypeName(
+        QualType(E1.first->getTypeForDecl(), 0), O1);
+    O1.flush();
+
+    std::string S2;
+    llvm::raw_string_ostream O2(S2);
+    getCXXABI().getMangleContext().mangleTypeName(
+        QualType(E2.first->getTypeForDecl(), 0), O2);
+    O2.flush();
+
     if (S1 < S2)
       return true;
     if (S1 != S2)
       return false;
 
-    uint64_t Offset1 = cast<llvm::ConstantInt>(
-                           cast<llvm::ConstantAsMetadata>(T1->getOperand(2))
-                               ->getValue())->getZExtValue();
-    uint64_t Offset2 = cast<llvm::ConstantInt>(
-                           cast<llvm::ConstantAsMetadata>(T2->getOperand(2))
-                               ->getValue())->getZExtValue();
-    assert(Offset1 != Offset2);
-    return Offset1 < Offset2;
+    return E1.second < E2.second;
   });
 
   llvm::NamedMDNode *BitsetsMD =
       getModule().getOrInsertNamedMetadata("llvm.bitsets");
   for (auto BitsetEntry : BitsetEntries)
-    BitsetsMD->addOperand(BitsetEntry);
+    CreateVTableBitSetEntry(BitsetsMD, VTable,
+                            PointerWidth * BitsetEntry.second,
+                            BitsetEntry.first);
 }
diff --git a/lib/CodeGen/CGVTables.h b/lib/CodeGen/CGVTables.h
index e0195a22eb1e..c27e54af258d 100644
--- a/lib/CodeGen/CGVTables.h
+++ b/lib/CodeGen/CGVTables.h
@@ -85,10 +85,6 @@ public:
   uint64_t getSecondaryVirtualPointerIndex(const CXXRecordDecl *RD,
                                            BaseSubobject Base);
 
-  /// getAddressPoint - Get the address point of the given subobject in the
-  /// class decl.
-  uint64_t getAddressPoint(BaseSubobject Base, const CXXRecordDecl *RD);
-  
   /// GenerateConstructionVTable - Generate a construction vtable for the given 
   /// base subobject.
   llvm::GlobalVariable *
diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h
index 92055917dba9..3ccc4cda89f9 100644
--- a/lib/CodeGen/CGValue.h
+++ b/lib/CodeGen/CGValue.h
@@ -16,10 +16,10 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGVALUE_H
 
 #include "clang/AST/ASTContext.h"
-#include "clang/AST/CharUnits.h"
 #include "clang/AST/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/Type.h"
+#include "Address.h"
 
 namespace llvm {
   class Constant;
@@ -38,6 +38,10 @@ namespace CodeGen {
 class RValue {
   enum Flavor { Scalar, Complex, Aggregate };
 
+  // The shift to make to an aggregate's alignment to make it look
+  // like a pointer.
+  enum { AggAlignShift = 4 };
+
   // Stores first value and flavor.
   llvm::PointerIntPair<llvm::Value *, 2, Flavor> V1;
   // Stores second value and volatility.
@@ -63,11 +67,21 @@ public:
   }
 
   /// getAggregateAddr() - Return the Value* of the address of the aggregate.
-  llvm::Value *getAggregateAddr() const {
+  Address getAggregateAddress() const {
+    assert(isAggregate() && "Not an aggregate!");
+    auto align = reinterpret_cast<uintptr_t>(V2.getPointer()) >> AggAlignShift;
+    return Address(V1.getPointer(), CharUnits::fromQuantity(align));
+  }
+  llvm::Value *getAggregatePointer() const {
     assert(isAggregate() && "Not an aggregate!");
     return V1.getPointer();
   }
 
+  static RValue getIgnored() {
+    // FIXME: should we make this a more explicit state?
+    return get(nullptr);
+  }
+
   static RValue get(llvm::Value *V) {
     RValue ER;
     ER.V1.setPointer(V);
@@ -89,11 +103,14 @@ public:
   // FIXME: Aggregate rvalues need to retain information about whether they are
   // volatile or not.  Remove default to find all places that probably get this
   // wrong.
-  static RValue getAggregate(llvm::Value *V, bool Volatile = false) {
+  static RValue getAggregate(Address addr, bool isVolatile = false) {
     RValue ER;
-    ER.V1.setPointer(V);
+    ER.V1.setPointer(addr.getPointer());
     ER.V1.setInt(Aggregate);
-    ER.V2.setInt(Volatile);
+
+    auto align = static_cast<uintptr_t>(addr.getAlignment().getQuantity());
+    ER.V2.setPointer(reinterpret_cast<llvm::Value*>(align << AggAlignShift));
+    ER.V2.setInt(isVolatile);
     return ER;
   }
 };
@@ -103,6 +120,32 @@ enum ARCPreciseLifetime_t {
   ARCImpreciseLifetime, ARCPreciseLifetime
 };
 
+/// The source of the alignment of an l-value; an expression of
+/// confidence in the alignment actually matching the estimate.
+enum class AlignmentSource {
+  /// The l-value was an access to a declared entity or something
+  /// equivalently strong, like the address of an array allocated by a
+  /// language runtime.
+  Decl,
+
+  /// The l-value was considered opaque, so the alignment was
+  /// determined from a type, but that type was an explicitly-aligned
+  /// typedef.
+  AttributedType,
+
+  /// The l-value was considered opaque, so the alignment was
+  /// determined from a type.
+  Type
+};
+
+/// Given that the base address has the given alignment source, what's
+/// our confidence in the alignment of the field?
+static inline AlignmentSource getFieldAlignmentSource(AlignmentSource Source) {
+  // For now, we don't distinguish fields of opaque pointers from
+  // top-level declarations, but maybe we should.
+  return AlignmentSource::Decl;
+}
+
 /// LValue - This represents an lvalue references.  Because C/C++ allow
 /// bitfields, this is not a simple LLVM pointer, it may be a pointer plus a
 /// bitrange.
@@ -157,6 +200,12 @@ class LValue {
   // to make the default bitfield pattern all-zeroes.
   bool ImpreciseLifetime : 1;
 
+  unsigned AlignSource : 2;
+
+  // This flag shows if a nontemporal load/stores should be used when accessing
+  // this lvalue.
+  bool Nontemporal : 1;
+
   Expr *BaseIvarExp;
 
   /// Used by struct-path-aware TBAA.
@@ -169,17 +218,21 @@ class LValue {
 
 private:
   void Initialize(QualType Type, Qualifiers Quals,
-                  CharUnits Alignment,
+                  CharUnits Alignment, AlignmentSource AlignSource,
                   llvm::MDNode *TBAAInfo = nullptr) {
+    assert((!Alignment.isZero() || Type->isIncompleteType()) &&
+           "initializing l-value with zero alignment!");
     this->Type = Type;
     this->Quals = Quals;
     this->Alignment = Alignment.getQuantity();
     assert(this->Alignment == Alignment.getQuantity() &&
            "Alignment exceeds allowed max!");
+    this->AlignSource = unsigned(AlignSource);
 
     // Initialize Objective-C flags.
     this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
     this->ImpreciseLifetime = false;
+    this->Nontemporal = false;
     this->ThreadLocalRef = false;
     this->BaseIvarExp = nullptr;
 
@@ -229,6 +282,8 @@ public:
   void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
     ImpreciseLifetime = (value == ARCImpreciseLifetime);
   }
+  bool isNontemporal() const { return Nontemporal; }
+  void setNontemporal(bool Value) { Nontemporal = Value; }
 
   bool isObjCWeak() const {
     return Quals.getObjCGCAttr() == Qualifiers::Weak;
@@ -261,29 +316,50 @@ public:
   CharUnits getAlignment() const { return CharUnits::fromQuantity(Alignment); }
   void setAlignment(CharUnits A) { Alignment = A.getQuantity(); }
 
+  AlignmentSource getAlignmentSource() const {
+    return AlignmentSource(AlignSource);
+  }
+  void setAlignmentSource(AlignmentSource Source) {
+    AlignSource = unsigned(Source);
+  }
+
   // simple lvalue
-  llvm::Value *getAddress() const { assert(isSimple()); return V; }
-  void setAddress(llvm::Value *address) {
+  llvm::Value *getPointer() const {
     assert(isSimple());
-    V = address;
+    return V;
+  }
+  Address getAddress() const { return Address(getPointer(), getAlignment()); }
+  void setAddress(Address address) {
+    assert(isSimple());
+    V = address.getPointer();
+    Alignment = address.getAlignment().getQuantity();
   }
 
   // vector elt lvalue
-  llvm::Value *getVectorAddr() const { assert(isVectorElt()); return V; }
+  Address getVectorAddress() const {
+    return Address(getVectorPointer(), getAlignment());
+  }
+  llvm::Value *getVectorPointer() const { assert(isVectorElt()); return V; }
   llvm::Value *getVectorIdx() const { assert(isVectorElt()); return VectorIdx; }
 
   // extended vector elements.
-  llvm::Value *getExtVectorAddr() const { assert(isExtVectorElt()); return V; }
+  Address getExtVectorAddress() const {
+    return Address(getExtVectorPointer(), getAlignment());
+  }
+  llvm::Value *getExtVectorPointer() const {
+    assert(isExtVectorElt());
+    return V;
+  }
   llvm::Constant *getExtVectorElts() const {
     assert(isExtVectorElt());
     return VectorElts;
   }
 
   // bitfield lvalue
-  llvm::Value *getBitFieldAddr() const {
-    assert(isBitField());
-    return V;
+  Address getBitFieldAddress() const {
+    return Address(getBitFieldPointer(), getAlignment());
   }
+  llvm::Value *getBitFieldPointer() const { assert(isBitField()); return V; }
   const CGBitFieldInfo &getBitFieldInfo() const {
     assert(isBitField());
     return *BitFieldInfo;
@@ -292,37 +368,40 @@ public:
   // global register lvalue
   llvm::Value *getGlobalReg() const { assert(isGlobalReg()); return V; }
 
-  static LValue MakeAddr(llvm::Value *address, QualType type,
-                         CharUnits alignment, ASTContext &Context,
+  static LValue MakeAddr(Address address, QualType type,
+                         ASTContext &Context,
+                         AlignmentSource alignSource,
                          llvm::MDNode *TBAAInfo = nullptr) {
     Qualifiers qs = type.getQualifiers();
     qs.setObjCGCAttr(Context.getObjCGCAttrKind(type));
 
     LValue R;
     R.LVType = Simple;
-    assert(address->getType()->isPointerTy());
-    R.V = address;
-    R.Initialize(type, qs, alignment, TBAAInfo);
+    assert(address.getPointer()->getType()->isPointerTy());
+    R.V = address.getPointer();
+    R.Initialize(type, qs, address.getAlignment(), alignSource, TBAAInfo);
     return R;
   }
 
-  static LValue MakeVectorElt(llvm::Value *Vec, llvm::Value *Idx,
-                              QualType type, CharUnits Alignment) {
+  static LValue MakeVectorElt(Address vecAddress, llvm::Value *Idx,
+                              QualType type, AlignmentSource alignSource) {
     LValue R;
     R.LVType = VectorElt;
-    R.V = Vec;
+    R.V = vecAddress.getPointer();
     R.VectorIdx = Idx;
-    R.Initialize(type, type.getQualifiers(), Alignment);
+    R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
+                 alignSource);
     return R;
   }
 
-  static LValue MakeExtVectorElt(llvm::Value *Vec, llvm::Constant *Elts,
-                                 QualType type, CharUnits Alignment) {
+  static LValue MakeExtVectorElt(Address vecAddress, llvm::Constant *Elts,
+                                 QualType type, AlignmentSource alignSource) {
     LValue R;
     R.LVType = ExtVectorElt;
-    R.V = Vec;
+    R.V = vecAddress.getPointer();
     R.VectorElts = Elts;
-    R.Initialize(type, type.getQualifiers(), Alignment);
+    R.Initialize(type, type.getQualifiers(), vecAddress.getAlignment(),
+                 alignSource);
     return R;
   }
 
@@ -332,29 +411,28 @@ public:
   /// bit-field refers to.
   /// \param Info - The information describing how to perform the bit-field
   /// access.
-  static LValue MakeBitfield(llvm::Value *Addr,
+  static LValue MakeBitfield(Address Addr,
                              const CGBitFieldInfo &Info,
-                             QualType type, CharUnits Alignment) {
+                             QualType type,
+                             AlignmentSource alignSource) {
     LValue R;
     R.LVType = BitField;
-    R.V = Addr;
+    R.V = Addr.getPointer();
     R.BitFieldInfo = &Info;
-    R.Initialize(type, type.getQualifiers(), Alignment);
+    R.Initialize(type, type.getQualifiers(), Addr.getAlignment(), alignSource);
     return R;
   }
 
-  static LValue MakeGlobalReg(llvm::Value *Reg,
-                              QualType type,
-                              CharUnits Alignment) {
+  static LValue MakeGlobalReg(Address Reg, QualType type) {
     LValue R;
     R.LVType = GlobalReg;
-    R.V = Reg;
-    R.Initialize(type, type.getQualifiers(), Alignment);
+    R.V = Reg.getPointer();
+    R.Initialize(type, type.getQualifiers(), Reg.getAlignment(),
+                 AlignmentSource::Decl);
     return R;
   }
 
   RValue asAggregateRValue() const {
-    // FIMXE: Alignment
     return RValue::getAggregate(getAddress(), isVolatileQualified());
   }
 };
@@ -407,7 +485,7 @@ public:
   /// ignored - Returns an aggregate value slot indicating that the
   /// aggregate value is being ignored.
   static AggValueSlot ignored() {
-    return forAddr(nullptr, CharUnits(), Qualifiers(), IsNotDestructed,
+    return forAddr(Address::invalid(), Qualifiers(), IsNotDestructed,
                    DoesNotNeedGCBarriers, IsNotAliased);
   }
 
@@ -421,15 +499,20 @@ public:
   ///   for calling destructors on this object
   /// \param needsGC - true if the slot is potentially located
   ///   somewhere that ObjC GC calls should be emitted for
-  static AggValueSlot forAddr(llvm::Value *addr, CharUnits align,
+  static AggValueSlot forAddr(Address addr,
                               Qualifiers quals,
                               IsDestructed_t isDestructed,
                               NeedsGCBarriers_t needsGC,
                               IsAliased_t isAliased,
                               IsZeroed_t isZeroed = IsNotZeroed) {
     AggValueSlot AV;
-    AV.Addr = addr;
-    AV.Alignment = align.getQuantity();
+    if (addr.isValid()) {
+      AV.Addr = addr.getPointer();
+      AV.Alignment = addr.getAlignment().getQuantity();
+    } else {
+      AV.Addr = nullptr;
+      AV.Alignment = 0;
+    }
     AV.Quals = quals;
     AV.DestructedFlag = isDestructed;
     AV.ObjCGCFlag = needsGC;
@@ -443,7 +526,7 @@ public:
                                 NeedsGCBarriers_t needsGC,
                                 IsAliased_t isAliased,
                                 IsZeroed_t isZeroed = IsNotZeroed) {
-    return forAddr(LV.getAddress(), LV.getAlignment(),
+    return forAddr(LV.getAddress(),
                    LV.getQuals(), isDestructed, needsGC, isAliased, isZeroed);
   }
 
@@ -471,11 +554,15 @@ public:
   NeedsGCBarriers_t requiresGCollection() const {
     return NeedsGCBarriers_t(ObjCGCFlag);
   }
-  
-  llvm::Value *getAddr() const {
+
+  llvm::Value *getPointer() const {
     return Addr;
   }
 
+  Address getAddress() const {
+    return Address(Addr, getAlignment());
+  }
+
   bool isIgnored() const {
     return Addr == nullptr;
   }
@@ -488,9 +575,12 @@ public:
     return IsAliased_t(AliasedFlag);
   }
 
-  // FIXME: Alignment?
   RValue asRValue() const {
-    return RValue::getAggregate(getAddr(), isVolatile());
+    if (isIgnored()) {
+      return RValue::getIgnored();
+    } else {
+      return RValue::getAggregate(getAddress(), isVolatile());
+    }
   }
 
   void setZeroed(bool V = true) { ZeroedFlag = V; }
diff --git a/lib/CodeGen/CodeGenABITypes.cpp b/lib/CodeGen/CodeGenABITypes.cpp
index 755e8aa628ce..643c996e2ec9 100644
--- a/lib/CodeGen/CodeGenABITypes.cpp
+++ b/lib/CodeGen/CodeGenABITypes.cpp
@@ -26,23 +26,16 @@
 using namespace clang;
 using namespace CodeGen;
 
-CodeGenABITypes::CodeGenABITypes(ASTContext &C,
-                                 llvm::Module &M,
-                                 const llvm::DataLayout &TD,
+CodeGenABITypes::CodeGenABITypes(ASTContext &C, llvm::Module &M,
                                  CoverageSourceInfo *CoverageInfo)
-  : CGO(new CodeGenOptions),
-    HSO(new HeaderSearchOptions),
-    PPO(new PreprocessorOptions),
-    CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO,
-                                   M, TD, C.getDiagnostics(),
-                                   CoverageInfo)) {
-}
+    : CGO(new CodeGenOptions), HSO(new HeaderSearchOptions),
+      PPO(new PreprocessorOptions),
+      CGM(new CodeGen::CodeGenModule(C, *HSO, *PPO, *CGO, M, C.getDiagnostics(),
+                                     CoverageInfo)) {}
 
-CodeGenABITypes::~CodeGenABITypes()
-{
-  delete CGO;
-  delete CGM;
-}
+// Explicitly out-of-line because ~CodeGenModule() is private but
+// CodeGenABITypes.h is part of clang's API.
+CodeGenABITypes::~CodeGenABITypes() = default;
 
 const CGFunctionInfo &
 CodeGenABITypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
@@ -51,8 +44,9 @@ CodeGenABITypes::arrangeObjCMessageSendSignature(const ObjCMethodDecl *MD,
 }
 
 const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty) {
-  return CGM->getTypes().arrangeFreeFunctionType(Ty);
+CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
+                                         const FunctionDecl *FD) {
+  return CGM->getTypes().arrangeFreeFunctionType(Ty, FD);
 }
 
 const CGFunctionInfo &
@@ -62,15 +56,14 @@ CodeGenABITypes::arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty) {
 
 const CGFunctionInfo &
 CodeGenABITypes::arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                      const FunctionProtoType *FTP) {
-  return CGM->getTypes().arrangeCXXMethodType(RD, FTP);
+                                      const FunctionProtoType *FTP,
+                                      const CXXMethodDecl *MD) {
+  return CGM->getTypes().arrangeCXXMethodType(RD, FTP, MD);
 }
 
-const CGFunctionInfo &
-CodeGenABITypes::arrangeFreeFunctionCall(CanQualType returnType,
-                                         ArrayRef<CanQualType> argTypes,
-                                         FunctionType::ExtInfo info,
-                                         RequiredArgs args) {
+const CGFunctionInfo &CodeGenABITypes::arrangeFreeFunctionCall(
+    CanQualType returnType, ArrayRef<CanQualType> argTypes,
+    FunctionType::ExtInfo info, RequiredArgs args) {
   return CGM->getTypes().arrangeLLVMFunctionInfo(
       returnType, /*IsInstanceMethod=*/false, /*IsChainCall=*/false, argTypes,
       info, args);
diff --git a/lib/CodeGen/CodeGenAction.cpp b/lib/CodeGen/CodeGenAction.cpp
index 0e7b6d8a71d4..abef5432518e 100644
--- a/lib/CodeGen/CodeGenAction.cpp
+++ b/lib/CodeGen/CodeGenAction.cpp
@@ -26,10 +26,12 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/FunctionInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
+#include "llvm/Object/FunctionIndexObjectFile.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -53,40 +55,47 @@ namespace clang {
 
     std::unique_ptr<CodeGenerator> Gen;
 
-    std::unique_ptr<llvm::Module> TheModule, LinkModule;
+    std::unique_ptr<llvm::Module> TheModule;
+    SmallVector<std::pair<unsigned, std::unique_ptr<llvm::Module>>, 4>
+        LinkModules;
+
+    // This is here so that the diagnostic printer knows the module a diagnostic
+    // refers to.
+    llvm::Module *CurLinkModule = nullptr;
 
   public:
-    BackendConsumer(BackendAction Action, DiagnosticsEngine &Diags,
-                    const HeaderSearchOptions &HeaderSearchOpts,
-                    const PreprocessorOptions &PPOpts,
-                    const CodeGenOptions &CodeGenOpts,
-                    const TargetOptions &TargetOpts,
-                    const LangOptions &LangOpts, bool TimePasses,
-                    const std::string &InFile, llvm::Module *LinkModule,
-                    raw_pwrite_stream *OS, LLVMContext &C,
-                    CoverageSourceInfo *CoverageInfo = nullptr)
+    BackendConsumer(
+        BackendAction Action, DiagnosticsEngine &Diags,
+        const HeaderSearchOptions &HeaderSearchOpts,
+        const PreprocessorOptions &PPOpts, const CodeGenOptions &CodeGenOpts,
+        const TargetOptions &TargetOpts, const LangOptions &LangOpts,
+        bool TimePasses, const std::string &InFile,
+        const SmallVectorImpl<std::pair<unsigned, llvm::Module *>> &LinkModules,
+        raw_pwrite_stream *OS, LLVMContext &C,
+        CoverageSourceInfo *CoverageInfo = nullptr)
         : Diags(Diags), Action(Action), CodeGenOpts(CodeGenOpts),
           TargetOpts(TargetOpts), LangOpts(LangOpts), AsmOutStream(OS),
           Context(nullptr), LLVMIRGeneration("LLVM IR Generation Time"),
           Gen(CreateLLVMCodeGen(Diags, InFile, HeaderSearchOpts, PPOpts,
-                                CodeGenOpts, C, CoverageInfo)),
-          LinkModule(LinkModule) {
+                                CodeGenOpts, C, CoverageInfo)) {
       llvm::TimePassesIsEnabled = TimePasses;
+      for (auto &I : LinkModules)
+        this->LinkModules.push_back(
+            std::make_pair(I.first, std::unique_ptr<llvm::Module>(I.second)));
     }
-
     std::unique_ptr<llvm::Module> takeModule() { return std::move(TheModule); }
-    llvm::Module *takeLinkModule() { return LinkModule.release(); }
+    void releaseLinkModules() {
+      for (auto &I : LinkModules)
+        I.second.release();
+    }
 
     void HandleCXXStaticMemberVarInstantiation(VarDecl *VD) override {
       Gen->HandleCXXStaticMemberVarInstantiation(VD);
     }
 
     void Initialize(ASTContext &Ctx) override {
-      if (Context) {
-        assert(Context == &Ctx);
-        return;
-      }
-        
+      assert(!Context && "initialized multiple times");
+
       Context = &Ctx;
 
       if (llvm::TimePassesIsEnabled)
@@ -158,14 +167,6 @@ namespace clang {
       assert(TheModule.get() == M &&
              "Unexpected module change during IR generation");
 
-      // Link LinkModule into this module if present, preserving its validity.
-      if (LinkModule) {
-        if (Linker::LinkModules(
-                M, LinkModule.get(),
-                [=](const DiagnosticInfo &DI) { linkerDiagnosticHandler(DI); }))
-          return;
-      }
-
       // Install an inline asm handler so that diagnostics get printed through
       // our diagnostics hooks.
       LLVMContext &Ctx = TheModule->getContext();
@@ -179,8 +180,16 @@ namespace clang {
       void *OldDiagnosticContext = Ctx.getDiagnosticContext();
       Ctx.setDiagnosticHandler(DiagnosticHandler, this);
 
+      // Link LinkModule into this module if present, preserving its validity.
+      for (auto &I : LinkModules) {
+        unsigned LinkFlags = I.first;
+        CurLinkModule = I.second.get();
+        if (Linker::linkModules(*M, std::move(I.second), LinkFlags))
+          return;
+      }
+
       EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                        C.getTargetInfo().getTargetDescription(),
+                        C.getTargetInfo().getDataLayoutString(),
                         TheModule.get(), Action, AsmOutStream);
 
       Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext);
@@ -226,8 +235,6 @@ namespace clang {
       ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc);
     }
 
-    void linkerDiagnosticHandler(const llvm::DiagnosticInfo &DI);
-
     static void DiagnosticHandler(const llvm::DiagnosticInfo &DI,
                                   void *Context) {
       ((BackendConsumer *)Context)->DiagnosticHandlerImpl(DI);
@@ -256,6 +263,10 @@ namespace clang {
         const llvm::DiagnosticInfoOptimizationRemarkMissed &D);
     void OptimizationRemarkHandler(
         const llvm::DiagnosticInfoOptimizationRemarkAnalysis &D);
+    void OptimizationRemarkHandler(
+        const llvm::DiagnosticInfoOptimizationRemarkAnalysisFPCommute &D);
+    void OptimizationRemarkHandler(
+        const llvm::DiagnosticInfoOptimizationRemarkAnalysisAliasing &D);
     void OptimizationFailureHandler(
         const llvm::DiagnosticInfoOptimizationFailure &D);
   };
@@ -333,8 +344,7 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D,
       DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here);
       // Convert the SMDiagnostic ranges into SourceRange and attach them
       // to the diagnostic.
-      for (unsigned i = 0, e = D.getRanges().size(); i != e; ++i) {
-        std::pair<unsigned, unsigned> Range = D.getRanges()[i];
+      for (const std::pair<unsigned, unsigned> &Range : D.getRanges()) {
         unsigned Column = D.getColumnNo();
         B << SourceRange(Loc.getLocWithOffset(Range.first - Column),
                          Loc.getLocWithOffset(Range.second - Column));
@@ -492,35 +502,48 @@ void BackendConsumer::OptimizationRemarkHandler(
 
 void BackendConsumer::OptimizationRemarkHandler(
     const llvm::DiagnosticInfoOptimizationRemarkAnalysis &D) {
-  // Optimization analysis remarks are active only if the -Rpass-analysis
-  // flag has a regular expression that matches the name of the pass
-  // name in \p D.
-  if (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
-      CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName()))
+  // Optimization analysis remarks are active if the pass name is set to
+  // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
+  // regular expression that matches the name of the pass name in \p D.
+
+  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+      (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+       CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
     EmitOptimizationMessage(
         D, diag::remark_fe_backend_optimization_remark_analysis);
 }
 
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemarkAnalysisFPCommute &D) {
+  // Optimization analysis remarks are active if the pass name is set to
+  // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
+  // regular expression that matches the name of the pass name in \p D.
+
+  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+      (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+       CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
+    EmitOptimizationMessage(
+        D, diag::remark_fe_backend_optimization_remark_analysis_fpcommute);
+}
+
+void BackendConsumer::OptimizationRemarkHandler(
+    const llvm::DiagnosticInfoOptimizationRemarkAnalysisAliasing &D) {
+  // Optimization analysis remarks are active if the pass name is set to
+  // llvm::DiagnosticInfo::AlwasyPrint or if the -Rpass-analysis flag has a
+  // regular expression that matches the name of the pass name in \p D.
+
+  if (D.getPassName() == llvm::DiagnosticInfo::AlwaysPrint ||
+      (CodeGenOpts.OptimizationRemarkAnalysisPattern &&
+       CodeGenOpts.OptimizationRemarkAnalysisPattern->match(D.getPassName())))
+    EmitOptimizationMessage(
+        D, diag::remark_fe_backend_optimization_remark_analysis_aliasing);
+}
+
 void BackendConsumer::OptimizationFailureHandler(
     const llvm::DiagnosticInfoOptimizationFailure &D) {
   EmitOptimizationMessage(D, diag::warn_fe_backend_optimization_failure);
 }
 
-void BackendConsumer::linkerDiagnosticHandler(const DiagnosticInfo &DI) {
-  if (DI.getSeverity() != DS_Error)
-    return;
-
-  std::string MsgStorage;
-  {
-    raw_string_ostream Stream(MsgStorage);
-    DiagnosticPrinterRawOStream DP(Stream);
-    DI.print(DP);
-  }
-
-  Diags.Report(diag::err_fe_cannot_link_module)
-      << LinkModule->getModuleIdentifier() << MsgStorage;
-}
-
 /// \brief This function is invoked when the backend needs
 /// to report something to the user.
 void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
@@ -538,6 +561,13 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
       return;
     ComputeDiagID(Severity, backend_frame_larger_than, DiagID);
     break;
+  case DK_Linker:
+    assert(CurLinkModule);
+    // FIXME: stop eating the warnings and notes.
+    if (Severity != DS_Error)
+      return;
+    DiagID = diag::err_fe_cannot_link_module;
+    break;
   case llvm::DK_OptimizationRemark:
     // Optimization remarks are always handled completely by this
     // handler. There is no generic way of emitting them.
@@ -554,6 +584,18 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
     OptimizationRemarkHandler(
         cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI));
     return;
+  case llvm::DK_OptimizationRemarkAnalysisFPCommute:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(
+        cast<DiagnosticInfoOptimizationRemarkAnalysisFPCommute>(DI));
+    return;
+  case llvm::DK_OptimizationRemarkAnalysisAliasing:
+    // Optimization remarks are always handled completely by this
+    // handler. There is no generic way of emitting them.
+    OptimizationRemarkHandler(
+        cast<DiagnosticInfoOptimizationRemarkAnalysisAliasing>(DI));
+    return;
   case llvm::DK_OptimizationFailure:
     // Optimization failures are always handled completely by this
     // handler.
@@ -571,6 +613,12 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
     DI.print(DP);
   }
 
+  if (DiagID == diag::err_fe_cannot_link_module) {
+    Diags.Report(diag::err_fe_cannot_link_module)
+        << CurLinkModule->getModuleIdentifier() << MsgStorage;
+    return;
+  }
+
   // Report the backend message using the usual diagnostic mechanism.
   FullSourceLoc Loc;
   Diags.Report(Loc, DiagID).AddString(MsgStorage);
@@ -578,9 +626,8 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) {
 #undef ComputeDiagID
 
 CodeGenAction::CodeGenAction(unsigned _Act, LLVMContext *_VMContext)
-  : Act(_Act), LinkModule(nullptr),
-    VMContext(_VMContext ? _VMContext : new LLVMContext),
-    OwnsVMContext(!_VMContext) {}
+    : Act(_Act), VMContext(_VMContext ? _VMContext : new LLVMContext),
+      OwnsVMContext(!_VMContext) {}
 
 CodeGenAction::~CodeGenAction() {
   TheModule.reset();
@@ -595,9 +642,9 @@ void CodeGenAction::EndSourceFileAction() {
   if (!getCompilerInstance().hasASTConsumer())
     return;
 
-  // If we were given a link module, release consumer's ownership of it.
-  if (LinkModule)
-    BEConsumer->takeLinkModule();
+  // Take back ownership of link modules we passed to consumer.
+  if (!LinkModules.empty())
+    BEConsumer->releaseLinkModules();
 
   // Steal the module from the consumer.
   TheModule = BEConsumer->takeModule();
@@ -639,28 +686,29 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   if (BA != Backend_EmitNothing && !OS)
     return nullptr;
 
-  llvm::Module *LinkModuleToUse = LinkModule;
+  // Load bitcode modules to link with, if we need to.
+  if (LinkModules.empty())
+    for (auto &I : CI.getCodeGenOpts().LinkBitcodeFiles) {
+      const std::string &LinkBCFile = I.second;
 
-  // If we were not given a link module, and the user requested that one be
-  // loaded from bitcode, do so now.
-  const std::string &LinkBCFile = CI.getCodeGenOpts().LinkBitcodeFile;
-  if (!LinkModuleToUse && !LinkBCFile.empty()) {
-    auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
-    if (!BCBuf) {
-      CI.getDiagnostics().Report(diag::err_cannot_open_file)
-          << LinkBCFile << BCBuf.getError().message();
-      return nullptr;
-    }
+      auto BCBuf = CI.getFileManager().getBufferForFile(LinkBCFile);
+      if (!BCBuf) {
+        CI.getDiagnostics().Report(diag::err_cannot_open_file)
+            << LinkBCFile << BCBuf.getError().message();
+        LinkModules.clear();
+        return nullptr;
+      }
 
-    ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr =
-        getLazyBitcodeModule(std::move(*BCBuf), *VMContext);
-    if (std::error_code EC = ModuleOrErr.getError()) {
-      CI.getDiagnostics().Report(diag::err_cannot_open_file)
-        << LinkBCFile << EC.message();
-      return nullptr;
+      ErrorOr<std::unique_ptr<llvm::Module>> ModuleOrErr =
+          getLazyBitcodeModule(std::move(*BCBuf), *VMContext);
+      if (std::error_code EC = ModuleOrErr.getError()) {
+        CI.getDiagnostics().Report(diag::err_cannot_open_file) << LinkBCFile
+                                                               << EC.message();
+        LinkModules.clear();
+        return nullptr;
+      }
+      addLinkModule(ModuleOrErr.get().release(), I.first);
     }
-    LinkModuleToUse = ModuleOrErr.get().release();
-  }
 
   CoverageSourceInfo *CoverageInfo = nullptr;
   // Add the preprocessor callback only when the coverage mapping is generated.
@@ -669,11 +717,12 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
     CI.getPreprocessor().addPPCallbacks(
                                     std::unique_ptr<PPCallbacks>(CoverageInfo));
   }
+
   std::unique_ptr<BackendConsumer> Result(new BackendConsumer(
       BA, CI.getDiagnostics(), CI.getHeaderSearchOpts(),
       CI.getPreprocessorOpts(), CI.getCodeGenOpts(), CI.getTargetOpts(),
-      CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile,
-      LinkModuleToUse, OS, *VMContext, CoverageInfo));
+      CI.getLangOpts(), CI.getFrontendOpts().ShowTimers, InFile, LinkModules,
+      OS, *VMContext, CoverageInfo));
   BEConsumer = Result.get();
   return std::move(Result);
 }
@@ -732,11 +781,43 @@ void CodeGenAction::ExecuteAction() {
       TheModule->setTargetTriple(TargetOpts.Triple);
     }
 
+    auto DiagHandler = [&](const DiagnosticInfo &DI) {
+      TheModule->getContext().diagnose(DI);
+    };
+
+    // If we are performing ThinLTO importing compilation (indicated by
+    // a non-empty index file option), then we need promote to global scope
+    // and rename any local values that are potentially exported to other
+    // modules. Do this early so that the rest of the compilation sees the
+    // promoted symbols.
+    std::unique_ptr<FunctionInfoIndex> Index;
+    if (!CI.getCodeGenOpts().ThinLTOIndexFile.empty()) {
+      ErrorOr<std::unique_ptr<FunctionInfoIndex>> IndexOrErr =
+          llvm::getFunctionIndexForFile(CI.getCodeGenOpts().ThinLTOIndexFile,
+                                        DiagHandler);
+      if (std::error_code EC = IndexOrErr.getError()) {
+        std::string Error = EC.message();
+        errs() << "Error loading index file '"
+               << CI.getCodeGenOpts().ThinLTOIndexFile << "': " << Error
+               << "\n";
+        return;
+      }
+      Index = std::move(IndexOrErr.get());
+      assert(Index);
+      // Currently this requires creating a new Module object.
+      std::unique_ptr<llvm::Module> RenamedModule =
+          renameModuleForThinLTO(std::move(TheModule), Index.get());
+      if (!RenamedModule)
+        return;
+
+      TheModule = std::move(RenamedModule);
+    }
+
     LLVMContext &Ctx = TheModule->getContext();
     Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler);
     EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts,
-                      CI.getLangOpts(), CI.getTarget().getTargetDescription(),
-                      TheModule.get(), BA, OS);
+                      CI.getLangOpts(), CI.getTarget().getDataLayoutString(),
+                      TheModule.get(), BA, OS, std::move(Index));
     return;
   }
 
diff --git a/lib/CodeGen/CodeGenFunction.cpp b/lib/CodeGen/CodeGenFunction.cpp
index ec3c75ccd257..048a04328fc2 100644
--- a/lib/CodeGen/CodeGenFunction.cpp
+++ b/lib/CodeGen/CodeGenFunction.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenFunction.h"
+#include "CGBlocks.h"
 #include "CGCleanup.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
@@ -24,9 +25,11 @@
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/MDBuilder.h"
@@ -36,12 +39,14 @@ using namespace CodeGen;
 
 CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
     : CodeGenTypeCache(cgm), CGM(cgm), Target(cgm.getTarget()),
-      Builder(cgm.getModule().getContext(), llvm::ConstantFolder(),
+      Builder(cgm, cgm.getModule().getContext(), llvm::ConstantFolder(),
               CGBuilderInserterTy(this)),
-      CurFn(nullptr), CapturedStmtInfo(nullptr),
+      CurFn(nullptr), ReturnValue(Address::invalid()),
+      CapturedStmtInfo(nullptr),
       SanOpts(CGM.getLangOpts().Sanitize), IsSanitizerScope(false),
       CurFuncIsThunk(false), AutoreleaseResult(false), SawAsmBlock(false),
-      IsOutlinedSEHHelper(false), BlockInfo(nullptr), BlockPointer(nullptr),
+      IsOutlinedSEHHelper(false),
+      BlockInfo(nullptr), BlockPointer(nullptr),
       LambdaThisCaptureField(nullptr), NormalCleanupDest(nullptr),
       NextCleanupDestIndex(1), FirstBlockInfo(nullptr), EHResumeBlock(nullptr),
       ExceptionSlot(nullptr), EHSelectorSlot(nullptr),
@@ -51,7 +56,7 @@ CodeGenFunction::CodeGenFunction(CodeGenModule &cgm, bool suppressNewContext)
       CaseRangeBlock(nullptr), UnreachableBlock(nullptr), NumReturnExprs(0),
       NumSimpleReturnExprs(0), CXXABIThisDecl(nullptr),
       CXXABIThisValue(nullptr), CXXThisValue(nullptr),
-      CXXDefaultInitExprThis(nullptr), CXXStructorImplicitParamDecl(nullptr),
+      CXXStructorImplicitParamDecl(nullptr),
       CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
       CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
       TerminateHandler(nullptr), TrapBB(nullptr) {
@@ -91,18 +96,69 @@ CodeGenFunction::~CodeGenFunction() {
   }
 }
 
-LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
-  CharUnits Alignment;
-  if (CGM.getCXXABI().isTypeInfoCalculable(T)) {
-    Alignment = getContext().getTypeAlignInChars(T);
-    unsigned MaxAlign = getContext().getLangOpts().MaxTypeAlign;
-    if (MaxAlign && Alignment.getQuantity() > MaxAlign &&
-        !getContext().isAlignmentRequired(T))
-      Alignment = CharUnits::fromQuantity(MaxAlign);
-  }
-  return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T));
+CharUnits CodeGenFunction::getNaturalPointeeTypeAlignment(QualType T,
+                                                     AlignmentSource *Source) {
+  return getNaturalTypeAlignment(T->getPointeeType(), Source,
+                                 /*forPointee*/ true);
 }
 
+CharUnits CodeGenFunction::getNaturalTypeAlignment(QualType T,
+                                                   AlignmentSource *Source,
+                                                   bool forPointeeType) {
+  // Honor alignment typedef attributes even on incomplete types.
+  // We also honor them straight for C++ class types, even as pointees;
+  // there's an expressivity gap here.
+  if (auto TT = T->getAs<TypedefType>()) {
+    if (auto Align = TT->getDecl()->getMaxAlignment()) {
+      if (Source) *Source = AlignmentSource::AttributedType;
+      return getContext().toCharUnitsFromBits(Align);
+    }
+  }
+
+  if (Source) *Source = AlignmentSource::Type;
+
+  CharUnits Alignment;
+  if (T->isIncompleteType()) {
+    Alignment = CharUnits::One(); // Shouldn't be used, but pessimistic is best.
+  } else {
+    // For C++ class pointees, we don't know whether we're pointing at a
+    // base or a complete object, so we generally need to use the
+    // non-virtual alignment.
+    const CXXRecordDecl *RD;
+    if (forPointeeType && (RD = T->getAsCXXRecordDecl())) {
+      Alignment = CGM.getClassPointerAlignment(RD);
+    } else {
+      Alignment = getContext().getTypeAlignInChars(T);
+    }
+
+    // Cap to the global maximum type alignment unless the alignment
+    // was somehow explicit on the type.
+    if (unsigned MaxAlign = getLangOpts().MaxTypeAlign) {
+      if (Alignment.getQuantity() > MaxAlign &&
+          !getContext().isAlignmentRequired(T))
+        Alignment = CharUnits::fromQuantity(MaxAlign);
+    }
+  }
+  return Alignment;
+}
+
+LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) {
+  AlignmentSource AlignSource;
+  CharUnits Alignment = getNaturalTypeAlignment(T, &AlignSource);
+  return LValue::MakeAddr(Address(V, Alignment), T, getContext(), AlignSource,
+                          CGM.getTBAAInfo(T));
+}
+
+/// Given a value of type T* that may not be to a complete object,
+/// construct an l-value with the natural pointee alignment of T.
+LValue
+CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) {
+  AlignmentSource AlignSource;
+  CharUnits Align = getNaturalTypeAlignment(T, &AlignSource, /*pointee*/ true);
+  return MakeAddrLValue(Address(V, Align), T, AlignSource);
+}
+
+
 llvm::Type *CodeGenFunction::ConvertTypeForMem(QualType T) {
   return CGM.getTypes().ConvertTypeForMem(T);
 }
@@ -295,7 +351,7 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
       EscapeArgs[Pair.second] = Pair.first;
     llvm::Function *FrameEscapeFn = llvm::Intrinsic::getDeclaration(
         &CGM.getModule(), llvm::Intrinsic::localescape);
-    CGBuilderTy(AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
+    CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs);
   }
 
   // Remove the AllocaInsertPt instruction, which is just a convenience for us.
@@ -660,6 +716,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     }
   }
 
+  // If we're in C++ mode and the function name is "main", it is guaranteed
+  // to be norecurse by the standard (3.6.1.3 "The function main shall not be
+  // used within a program").
+  if (getLangOpts().CPlusPlus)
+    if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
+      if (FD->isMain())
+        Fn->addFnAttr(llvm::Attribute::NoRecurse);
+  
   llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn);
 
   // Create a marker to make it easy to insert allocas into the entryblock
@@ -696,7 +760,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
 
   if (RetTy->isVoidType()) {
     // Void type; nothing to return.
-    ReturnValue = nullptr;
+    ReturnValue = Address::invalid();
 
     // Count the implicit return.
     if (!endsWithReturn(D))
@@ -708,15 +772,16 @@ void CodeGenFunction::StartFunction(GlobalDecl GD,
     auto AI = CurFn->arg_begin();
     if (CurFnInfo->getReturnInfo().isSRetAfterThis())
       ++AI;
-    ReturnValue = AI;
+    ReturnValue = Address(&*AI, CurFnInfo->getReturnInfo().getIndirectAlign());
   } else if (CurFnInfo->getReturnInfo().getKind() == ABIArgInfo::InAlloca &&
              !hasScalarEvaluationKind(CurFnInfo->getReturnType())) {
     // Load the sret pointer from the argument struct and return into that.
     unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex();
     llvm::Function::arg_iterator EI = CurFn->arg_end();
     --EI;
-    llvm::Value *Addr = Builder.CreateStructGEP(nullptr, EI, Idx);
-    ReturnValue = Builder.CreateLoad(Addr, "agg.result");
+    llvm::Value *Addr = Builder.CreateStructGEP(nullptr, &*EI, Idx);
+    Addr = Builder.CreateAlignedLoad(Addr, getPointerAlign(), "agg.result");
+    ReturnValue = Address(Addr, getNaturalTypeAlignment(RetTy));
   } else {
     ReturnValue = CreateIRTemp(RetTy, "retval");
 
@@ -826,15 +891,11 @@ static void TryMarkNoThrow(llvm::Function *F) {
   // can't do this on functions that can be overwritten.
   if (F->mayBeOverridden()) return;
 
-  for (llvm::Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
-    for (llvm::BasicBlock::iterator
-           BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
-      if (llvm::CallInst *Call = dyn_cast<llvm::CallInst>(&*BI)) {
-        if (!Call->doesNotThrow())
-          return;
-      } else if (isa<llvm::ResumeInst>(&*BI)) {
+  for (llvm::BasicBlock &BB : *F)
+    for (llvm::Instruction &I : BB)
+      if (I.mayThrow())
         return;
-      }
+
   F->setDoesNotThrow();
 }
 
@@ -859,7 +920,18 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
     CGM.getCXXABI().buildThisParam(*this, Args);
   }
 
-  Args.append(FD->param_begin(), FD->param_end());
+  for (auto *Param : FD->params()) {
+    Args.push_back(Param);
+    if (!Param->hasAttr<PassObjectSizeAttr>())
+      continue;
+
+    IdentifierInfo *NoID = nullptr;
+    auto *Implicit = ImplicitParamDecl::Create(
+        getContext(), Param->getDeclContext(), Param->getLocation(), NoID,
+        getContext().getSizeType());
+    SizeArguments[Param] = Implicit;
+    Args.push_back(Implicit);
+  }
 
   if (MD && (isa<CXXConstructorDecl>(MD) || isa<CXXDestructorDecl>(MD)))
     CGM.getCXXABI().addImplicitStructorParams(*this, ResTy, Args);
@@ -885,8 +957,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin());
 
   // Generate the body of the function.
-  PGO.checkGlobalDecl(GD);
-  PGO.assignRegionCounters(GD.getDecl(), CurFn);
+  PGO.assignRegionCounters(GD, CurFn);
   if (isa<CXXDestructorDecl>(FD))
     EmitDestructorBody(Args);
   else if (isa<CXXConstructorDecl>(FD))
@@ -1207,6 +1278,22 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
     return;
   }
 
+  // If the branch has a condition wrapped by __builtin_unpredictable,
+  // create metadata that specifies that the branch is unpredictable.
+  // Don't bother if not optimizing because that metadata would not be used.
+  llvm::MDNode *Unpredictable = nullptr;
+  if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
+    if (const CallExpr *Call = dyn_cast<CallExpr>(Cond)) {
+      const Decl *TargetDecl = Call->getCalleeDecl();
+      if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(TargetDecl)) {
+        if (FD->getBuiltinID() == Builtin::BI__builtin_unpredictable) {
+          llvm::MDBuilder MDHelper(getLLVMContext());
+          Unpredictable = MDHelper.createUnpredictable();
+        }
+      }
+    }
+  }
+
   // Create branch weights based on the number of times we get here and the
   // number of times the condition should be true.
   uint64_t CurrentCount = std::max(getCurrentProfileCount(), TrueCount);
@@ -1219,7 +1306,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr(const Expr *Cond,
     ApplyDebugLocation DL(*this, Cond);
     CondV = EvaluateExprAsBool(Cond);
   }
-  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights);
+  Builder.CreateCondBr(CondV, TrueBlock, FalseBlock, Weights, Unpredictable);
 }
 
 /// ErrorUnsupported - Print out an error that codegen doesn't support the
@@ -1236,20 +1323,18 @@ void CodeGenFunction::ErrorUnsupported(const Stmt *S, const char *Type) {
 /// base element of the array
 /// \param sizeInChars - the total size of the VLA, in chars
 static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
-                               llvm::Value *dest, llvm::Value *src,
+                               Address dest, Address src,
                                llvm::Value *sizeInChars) {
-  std::pair<CharUnits,CharUnits> baseSizeAndAlign
-    = CGF.getContext().getTypeInfoInChars(baseType);
-
   CGBuilderTy &Builder = CGF.Builder;
 
+  CharUnits baseSize = CGF.getContext().getTypeSizeInChars(baseType);
   llvm::Value *baseSizeInChars
-    = llvm::ConstantInt::get(CGF.IntPtrTy, baseSizeAndAlign.first.getQuantity());
+    = llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity());
 
-  llvm::Type *i8p = Builder.getInt8PtrTy();
-
-  llvm::Value *begin = Builder.CreateBitCast(dest, i8p, "vla.begin");
-  llvm::Value *end = Builder.CreateInBoundsGEP(dest, sizeInChars, "vla.end");
+  Address begin =
+    Builder.CreateElementBitCast(dest, CGF.Int8Ty, "vla.begin");
+  llvm::Value *end =
+    Builder.CreateInBoundsGEP(begin.getPointer(), sizeInChars, "vla.end");
 
   llvm::BasicBlock *originBB = CGF.Builder.GetInsertBlock();
   llvm::BasicBlock *loopBB = CGF.createBasicBlock("vla-init.loop");
@@ -1259,17 +1344,19 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
   // count must be nonzero.
   CGF.EmitBlock(loopBB);
 
-  llvm::PHINode *cur = Builder.CreatePHI(i8p, 2, "vla.cur");
-  cur->addIncoming(begin, originBB);
+  llvm::PHINode *cur = Builder.CreatePHI(begin.getType(), 2, "vla.cur");
+  cur->addIncoming(begin.getPointer(), originBB);
+
+  CharUnits curAlign =
+    dest.getAlignment().alignmentOfArrayElement(baseSize);
 
   // memcpy the individual element bit-pattern.
-  Builder.CreateMemCpy(cur, src, baseSizeInChars,
-                       baseSizeAndAlign.second.getQuantity(),
+  Builder.CreateMemCpy(Address(cur, curAlign), src, baseSizeInChars,
                        /*volatile*/ false);
 
   // Go to the next element.
-  llvm::Value *next = Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(),
-                                                         cur, 1, "vla.next");
+  llvm::Value *next =
+    Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next");
 
   // Leave if that's the end of the VLA.
   llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone");
@@ -1280,7 +1367,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType,
 }
 
 void
-CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
+CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) {
   // Ignore empty classes in C++.
   if (getLangOpts().CPlusPlus) {
     if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -1290,23 +1377,17 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
   }
 
   // Cast the dest ptr to the appropriate i8 pointer type.
-  unsigned DestAS =
-    cast<llvm::PointerType>(DestPtr->getType())->getAddressSpace();
-  llvm::Type *BP = Builder.getInt8PtrTy(DestAS);
-  if (DestPtr->getType() != BP)
-    DestPtr = Builder.CreateBitCast(DestPtr, BP);
+  if (DestPtr.getElementType() != Int8Ty)
+    DestPtr = Builder.CreateElementBitCast(DestPtr, Int8Ty);
 
   // Get size and alignment info for this aggregate.
-  std::pair<CharUnits, CharUnits> TypeInfo =
-    getContext().getTypeInfoInChars(Ty);
-  CharUnits Size = TypeInfo.first;
-  CharUnits Align = TypeInfo.second;
+  CharUnits size = getContext().getTypeSizeInChars(Ty);
 
   llvm::Value *SizeVal;
   const VariableArrayType *vla;
 
   // Don't bother emitting a zero-byte memset.
-  if (Size.isZero()) {
+  if (size.isZero()) {
     // But note that getTypeInfo returns 0 for a VLA.
     if (const VariableArrayType *vlaType =
           dyn_cast_or_null<VariableArrayType>(
@@ -1324,7 +1405,7 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
       return;
     }
   } else {
-    SizeVal = CGM.getSize(Size);
+    SizeVal = CGM.getSize(size);
     vla = nullptr;
   }
 
@@ -1343,21 +1424,22 @@ CodeGenFunction::EmitNullInitialization(llvm::Value *DestPtr, QualType Ty) {
                                /*isConstant=*/true,
                                llvm::GlobalVariable::PrivateLinkage,
                                NullConstant, Twine());
-    llvm::Value *SrcPtr =
-      Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy());
+    CharUnits NullAlign = DestPtr.getAlignment();
+    NullVariable->setAlignment(NullAlign.getQuantity());
+    Address SrcPtr(Builder.CreateBitCast(NullVariable, Builder.getInt8PtrTy()),
+                   NullAlign);
 
     if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal);
 
     // Get and call the appropriate llvm.memcpy overload.
-    Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, Align.getQuantity(), false);
+    Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, false);
     return;
   }
 
   // Otherwise, just memset the whole thing to zero.  This is legal
   // because in LLVM, all default initializers (other than the ones we just
   // handled above) are guaranteed to have a bit pattern of all zeros.
-  Builder.CreateMemSet(DestPtr, Builder.getInt8(0), SizeVal,
-                       Align.getQuantity(), false);
+  Builder.CreateMemSet(DestPtr, Builder.getInt8(0), SizeVal, false);
 }
 
 llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) {
@@ -1376,7 +1458,7 @@ llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() {
   // If we already made the indirect branch for indirect goto, return its block.
   if (IndirectBranch) return IndirectBranch->getParent();
 
-  CGBuilderTy TmpBuilder(createBasicBlock("indirectgoto"));
+  CGBuilderTy TmpBuilder(*this, createBasicBlock("indirectgoto"));
 
   // Create the PHI node that indirect gotos will add entries to.
   llvm::Value *DestVal = TmpBuilder.CreatePHI(Int8PtrTy, 0,
@@ -1391,7 +1473,7 @@ llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() {
 /// element type and a properly-typed first element pointer.
 llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
                                               QualType &baseType,
-                                              llvm::Value *&addr) {
+                                              Address &addr) {
   const ArrayType *arrayType = origArrayType;
 
   // If it's a VLA, we have to load the stored size.  Note that
@@ -1430,8 +1512,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
   QualType eltType;
 
   llvm::ArrayType *llvmArrayType =
-    dyn_cast<llvm::ArrayType>(
-      cast<llvm::PointerType>(addr->getType())->getElementType());
+    dyn_cast<llvm::ArrayType>(addr.getElementType());
   while (llvmArrayType) {
     assert(isa<ConstantArrayType>(arrayType));
     assert(cast<ConstantArrayType>(arrayType)->getSize().getZExtValue()
@@ -1459,12 +1540,13 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType,
       arrayType = getContext().getAsArrayType(eltType);
     }
 
-    unsigned AddressSpace = addr->getType()->getPointerAddressSpace();
-    llvm::Type *BaseType = ConvertType(eltType)->getPointerTo(AddressSpace);
-    addr = Builder.CreateBitCast(addr, BaseType, "array.begin");
+    llvm::Type *baseType = ConvertType(eltType);
+    addr = Builder.CreateElementBitCast(addr, baseType, "array.begin");
   } else {
     // Create the actual GEP.
-    addr = Builder.CreateInBoundsGEP(addr, gepIndices, "array.begin");
+    addr = Address(Builder.CreateInBoundsGEP(addr.getPointer(),
+                                             gepIndices, "array.begin"),
+                   addr.getAlignment());
   }
 
   baseType = eltType;
@@ -1649,9 +1731,13 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
   } while (type->isVariablyModifiedType());
 }
 
-llvm::Value* CodeGenFunction::EmitVAListRef(const Expr* E) {
+Address CodeGenFunction::EmitVAListRef(const Expr* E) {
   if (getContext().getBuiltinVaListType()->isArrayType())
-    return EmitScalarExpr(E);
+    return EmitPointerWithAlignment(E);
+  return EmitLValue(E).getAddress();
+}
+
+Address CodeGenFunction::EmitMSVAListRef(const Expr *E) {
   return EmitLValue(E).getAddress();
 }
 
@@ -1713,9 +1799,10 @@ void CodeGenFunction::EmitVarAnnotations(const VarDecl *D, llvm::Value *V) {
                        I->getAnnotation(), D->getLocation());
 }
 
-llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
-                                                   llvm::Value *V) {
+Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
+                                              Address Addr) {
   assert(D->hasAttr<AnnotateAttr>() && "no annotate attribute");
+  llvm::Value *V = Addr.getPointer();
   llvm::Type *VTy = V->getType();
   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::ptr_annotation,
                                     CGM.Int8PtrTy);
@@ -1730,7 +1817,7 @@ llvm::Value *CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D,
     V = Builder.CreateBitCast(V, VTy);
   }
 
-  return V;
+  return Address(V, Addr.getAlignment());
 }
 
 CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { }
@@ -1773,3 +1860,80 @@ template void CGBuilderInserter<PreserveNames>::InsertHelper(
     llvm::Instruction *I, const llvm::Twine &Name, llvm::BasicBlock *BB,
     llvm::BasicBlock::iterator InsertPt) const;
 #undef PreserveNames
+
+static bool hasRequiredFeatures(const SmallVectorImpl<StringRef> &ReqFeatures,
+                                CodeGenModule &CGM, const FunctionDecl *FD,
+                                std::string &FirstMissing) {
+  // If there aren't any required features listed then go ahead and return.
+  if (ReqFeatures.empty())
+    return false;
+
+  // Now build up the set of caller features and verify that all the required
+  // features are there.
+  llvm::StringMap<bool> CallerFeatureMap;
+  CGM.getFunctionFeatureMap(CallerFeatureMap, FD);
+
+  // If we have at least one of the features in the feature list return
+  // true, otherwise return false.
+  return std::all_of(
+      ReqFeatures.begin(), ReqFeatures.end(), [&](StringRef Feature) {
+        SmallVector<StringRef, 1> OrFeatures;
+        Feature.split(OrFeatures, "|");
+        return std::any_of(OrFeatures.begin(), OrFeatures.end(),
+                           [&](StringRef Feature) {
+                             if (!CallerFeatureMap.lookup(Feature)) {
+                               FirstMissing = Feature.str();
+                               return false;
+                             }
+                             return true;
+                           });
+      });
+}
+
+// Emits an error if we don't have a valid set of target features for the
+// called function.
+void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
+                                          const FunctionDecl *TargetDecl) {
+  // Early exit if this is an indirect call.
+  if (!TargetDecl)
+    return;
+
+  // Get the current enclosing function if it exists. If it doesn't
+  // we can't check the target features anyhow.
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(CurFuncDecl);
+  if (!FD)
+    return;
+
+  // Grab the required features for the call. For a builtin this is listed in
+  // the td file with the default cpu, for an always_inline function this is any
+  // listed cpu and any listed features.
+  unsigned BuiltinID = TargetDecl->getBuiltinID();
+  std::string MissingFeature;
+  if (BuiltinID) {
+    SmallVector<StringRef, 1> ReqFeatures;
+    const char *FeatureList =
+        CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
+    // Return if the builtin doesn't have any required features.
+    if (!FeatureList || StringRef(FeatureList) == "")
+      return;
+    StringRef(FeatureList).split(ReqFeatures, ",");
+    if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
+      CGM.getDiags().Report(E->getLocStart(), diag::err_builtin_needs_feature)
+          << TargetDecl->getDeclName()
+          << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
+
+  } else if (TargetDecl->hasAttr<TargetAttr>()) {
+    // Get the required features for the callee.
+    SmallVector<StringRef, 1> ReqFeatures;
+    llvm::StringMap<bool> CalleeFeatureMap;
+    CGM.getFunctionFeatureMap(CalleeFeatureMap, TargetDecl);
+    for (const auto &F : CalleeFeatureMap) {
+      // Only positive features are "required".
+      if (F.getValue())
+        ReqFeatures.push_back(F.getKey());
+    }
+    if (!hasRequiredFeatures(ReqFeatures, CGM, FD, MissingFeature))
+      CGM.getDiags().Report(E->getLocStart(), diag::err_function_needs_feature)
+          << FD->getDeclName() << TargetDecl->getDeclName() << MissingFeature;
+  }
+}
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index f2bc402f8b25..b4a9186462ec 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -24,6 +24,7 @@
 #include "clang/AST/CharUnits.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/ABI.h"
 #include "clang/Basic/CapturedStmt.h"
@@ -80,6 +81,8 @@ class CGFunctionInfo;
 class CGRecordLayout;
 class CGBlockInfo;
 class CGCXXABI;
+class BlockByrefHelpers;
+class BlockByrefInfo;
 class BlockFlags;
 class BlockFieldFlags;
 
@@ -159,9 +162,9 @@ public:
   /// ReturnBlock - Unified return block.
   JumpDest ReturnBlock;
 
-  /// ReturnValue - The temporary alloca to hold the return value. This is null
-  /// iff the function has no return value.
-  llvm::Value *ReturnValue;
+  /// ReturnValue - The temporary alloca to hold the return
+  /// value. This is invalid iff the function has no return value.
+  Address ReturnValue;
 
   /// AllocaInsertPoint - This is an instruction in the entry block before which
   /// we prefer to insert allocas.
@@ -247,10 +250,6 @@ public:
     ~CGCapturedStmtRAII() { CGF.CapturedStmtInfo = PrevCapturedStmtInfo; }
   };
 
-  /// BoundsChecking - Emit run-time bounds checks. Higher values mean
-  /// potentially higher performance penalties.
-  unsigned char BoundsChecking;
-
   /// \brief Sanitizers enabled for this function.
   SanitizerSet SanOpts;
 
@@ -294,6 +293,8 @@ public:
   llvm::SmallVector<char, 256> LifetimeExtendedCleanupStack;
   llvm::SmallVector<const JumpDest *, 2> SEHTryEpilogueStack;
 
+  llvm::Instruction *CurrentFuncletPad = nullptr;
+
   /// Header for data within LifetimeExtendedCleanupStack.
   struct LifetimeExtendedCleanupHeader {
     /// The size of the following cleanup object.
@@ -327,7 +328,7 @@ public:
   /// A stack of exception code slots. Entering an __except block pushes a slot
   /// on the stack and leaving pops one. The __exception_code() intrinsic loads
   /// a value from the top of the stack.
-  SmallVector<llvm::Value *, 1> SEHCodeSlotStack;
+  SmallVector<Address, 1> SEHCodeSlotStack;
 
   /// Value returned by __exception_info intrinsic.
   llvm::Value *SEHInfo = nullptr;
@@ -373,6 +374,11 @@ public:
   /// Returns true inside SEH __try blocks.
   bool isSEHTryScope() const { return !SEHTryEpilogueStack.empty(); }
 
+  /// Returns true while emitting a cleanuppad.
+  bool isCleanupPadScope() const {
+    return CurrentFuncletPad && isa<llvm::CleanupPadInst>(CurrentFuncletPad);
+  }
+
   /// pushFullExprCleanup - Push a cleanup to be run at the end of the
   /// current full-expression.  Safe against the possibility that
   /// we're currently inside a conditionally-evaluated expression.
@@ -419,13 +425,12 @@ public:
   /// complete-object destructor of an object of the given type at the
   /// given address.  Does nothing if T is not a C++ class type with a
   /// non-trivial destructor.
-  void PushDestructorCleanup(QualType T, llvm::Value *Addr);
+  void PushDestructorCleanup(QualType T, Address Addr);
 
   /// PushDestructorCleanup - Push a cleanup to call the
   /// complete-object variant of the given destructor on the object at
   /// the given address.
-  void PushDestructorCleanup(const CXXDestructorDecl *Dtor,
-                             llvm::Value *Addr);
+  void PushDestructorCleanup(const CXXDestructorDecl *Dtor, Address Addr);
 
   /// PopCleanupBlock - Will pop the cleanup entry on the stack and
   /// process all branch fixups.
@@ -555,13 +560,14 @@ public:
     void rescopeLabels();
   };
 
+  typedef llvm::DenseMap<const Decl *, Address> DeclMapTy;
+
   /// \brief The scope used to remap some variables as private in the OpenMP
   /// loop body (or other captured region emitted without outlining), and to
   /// restore old vars back on exit.
   class OMPPrivateScope : public RunCleanupsScope {
-    typedef llvm::DenseMap<const VarDecl *, llvm::Value *> VarDeclMapTy;
-    VarDeclMapTy SavedLocals;
-    VarDeclMapTy SavedPrivates;
+    DeclMapTy SavedLocals;
+    DeclMapTy SavedPrivates;
 
   private:
     OMPPrivateScope(const OMPPrivateScope &) = delete;
@@ -578,13 +584,30 @@ public:
     /// been privatized already.
     bool
     addPrivate(const VarDecl *LocalVD,
-               const std::function<llvm::Value *()> &PrivateGen) {
+               llvm::function_ref<Address()> PrivateGen) {
       assert(PerformCleanup && "adding private to dead scope");
-      if (SavedLocals.count(LocalVD) > 0) return false;
-      SavedLocals[LocalVD] = CGF.LocalDeclMap.lookup(LocalVD);
-      CGF.LocalDeclMap.erase(LocalVD);
-      SavedPrivates[LocalVD] = PrivateGen();
-      CGF.LocalDeclMap[LocalVD] = SavedLocals[LocalVD];
+
+      // Only save it once.
+      if (SavedLocals.count(LocalVD)) return false;
+
+      // Copy the existing local entry to SavedLocals.
+      auto it = CGF.LocalDeclMap.find(LocalVD);
+      if (it != CGF.LocalDeclMap.end()) {
+        SavedLocals.insert({LocalVD, it->second});
+      } else {
+        SavedLocals.insert({LocalVD, Address::invalid()});
+      }
+
+      // Generate the private entry.
+      Address Addr = PrivateGen();
+      QualType VarTy = LocalVD->getType();
+      if (VarTy->isReferenceType()) {
+        Address Temp = CGF.CreateMemTemp(VarTy);
+        CGF.Builder.CreateStore(Addr.getPointer(), Temp);
+        Addr = Temp;
+      }
+      SavedPrivates.insert({LocalVD, Addr});
+
       return true;
     }
 
@@ -597,19 +620,14 @@ public:
     /// private copies.
     /// \return true if at least one variable was privatized, false otherwise.
     bool Privatize() {
-      for (auto VDPair : SavedPrivates) {
-        CGF.LocalDeclMap[VDPair.first] = VDPair.second;
-      }
+      copyInto(SavedPrivates, CGF.LocalDeclMap);
       SavedPrivates.clear();
       return !SavedLocals.empty();
     }
 
     void ForceCleanup() {
       RunCleanupsScope::ForceCleanup();
-      // Remap vars back to the original values.
-      for (auto I : SavedLocals) {
-        CGF.LocalDeclMap[I.first] = I.second;
-      }
+      copyInto(SavedLocals, CGF.LocalDeclMap);
       SavedLocals.clear();
     }
 
@@ -618,6 +636,25 @@ public:
       if (PerformCleanup)
         ForceCleanup();
     }
+
+  private:
+    /// Copy all the entries in the source map over the corresponding
+    /// entries in the destination, which must exist.
+    static void copyInto(const DeclMapTy &src, DeclMapTy &dest) {
+      for (auto &pair : src) {
+        if (!pair.second.isValid()) {
+          dest.erase(pair.first);
+          continue;
+        }
+
+        auto it = dest.find(pair.first);
+        if (it != dest.end()) {
+          it->second = pair.second;
+        } else {
+          dest.insert(pair);
+        }
+      }
+    }
   };
 
   /// \brief Takes the old cleanup stack size and emits the cleanup blocks
@@ -665,6 +702,7 @@ public:
 
   llvm::BasicBlock *getEHResumeBlock(bool isCleanup);
   llvm::BasicBlock *getEHDispatchBlock(EHScopeStack::stable_iterator scope);
+  llvm::BasicBlock *getMSVCDispatchBlock(EHScopeStack::stable_iterator scope);
 
   /// An object to manage conditionally-evaluated expressions.
   class ConditionalEvaluation {
@@ -697,10 +735,11 @@ public:
   /// one branch or the other of a conditional expression.
   bool isInConditionalBranch() const { return OutermostConditional != nullptr; }
 
-  void setBeforeOutermostConditional(llvm::Value *value, llvm::Value *addr) {
+  void setBeforeOutermostConditional(llvm::Value *value, Address addr) {
     assert(isInConditionalBranch());
     llvm::BasicBlock *block = OutermostConditional->getStartingBlock();
-    new llvm::StoreInst(value, addr, &block->back());    
+    auto store = new llvm::StoreInst(value, addr.getPointer(), &block->back());
+    store->setAlignment(addr.getAlignment().getQuantity());
   }
 
   /// An RAII object to record that we're evaluating a statement
@@ -859,15 +898,6 @@ public:
     }
   };
   
-  /// getByrefValueFieldNumber - Given a declaration, returns the LLVM field
-  /// number that holds the value.
-  std::pair<llvm::Type *, unsigned>
-  getByRefValueLLVMField(const ValueDecl *VD) const;
-
-  /// BuildBlockByrefAddress - Computes address location of the
-  /// variable which is declared as __block.
-  llvm::Value *BuildBlockByrefAddress(llvm::Value *BaseAddr,
-                                      const VarDecl *V);
 private:
   CGDebugInfo *DebugInfo;
   bool DisableDebugInfo;
@@ -884,9 +914,14 @@ private:
 
   /// LocalDeclMap - This keeps track of the LLVM allocas or globals for local C
   /// decls.
-  typedef llvm::DenseMap<const Decl*, llvm::Value*> DeclMapTy;
   DeclMapTy LocalDeclMap;
 
+  /// SizeArguments - If a ParmVarDecl had the pass_object_size attribute, this
+  /// will contain a mapping from said ParmVarDecl to its implicit "object_size"
+  /// parameter.
+  llvm::SmallDenseMap<const ParmVarDecl *, const ImplicitParamDecl *, 2>
+      SizeArguments;
+
   /// Track escaped local variables with auto storage. Used during SEH
   /// outlining to produce a call to llvm.localescape.
   llvm::DenseMap<llvm::AllocaInst *, int> EscapedLocals;
@@ -984,7 +1019,7 @@ public:
   /// if we need to evaluate a CXXDefaultInitExpr within the evaluation.
   class FieldConstructionScope {
   public:
-    FieldConstructionScope(CodeGenFunction &CGF, llvm::Value *This)
+    FieldConstructionScope(CodeGenFunction &CGF, Address This)
         : CGF(CGF), OldCXXDefaultInitExprThis(CGF.CXXDefaultInitExprThis) {
       CGF.CXXDefaultInitExprThis = This;
     }
@@ -994,7 +1029,7 @@ public:
 
   private:
     CodeGenFunction &CGF;
-    llvm::Value *OldCXXDefaultInitExprThis;
+    Address OldCXXDefaultInitExprThis;
   };
 
   /// The scope of a CXXDefaultInitExpr. Within this scope, the value of 'this'
@@ -1002,16 +1037,20 @@ public:
   class CXXDefaultInitExprScope {
   public:
     CXXDefaultInitExprScope(CodeGenFunction &CGF)
-        : CGF(CGF), OldCXXThisValue(CGF.CXXThisValue) {
-      CGF.CXXThisValue = CGF.CXXDefaultInitExprThis;
+      : CGF(CGF), OldCXXThisValue(CGF.CXXThisValue),
+        OldCXXThisAlignment(CGF.CXXThisAlignment) {
+      CGF.CXXThisValue = CGF.CXXDefaultInitExprThis.getPointer();
+      CGF.CXXThisAlignment = CGF.CXXDefaultInitExprThis.getAlignment();
     }
     ~CXXDefaultInitExprScope() {
       CGF.CXXThisValue = OldCXXThisValue;
+      CGF.CXXThisAlignment = OldCXXThisAlignment;
     }
 
   public:
     CodeGenFunction &CGF;
     llvm::Value *OldCXXThisValue;
+    CharUnits OldCXXThisAlignment;
   };
 
 private:
@@ -1020,10 +1059,12 @@ private:
   ImplicitParamDecl *CXXABIThisDecl;
   llvm::Value *CXXABIThisValue;
   llvm::Value *CXXThisValue;
+  CharUnits CXXABIThisAlignment;
+  CharUnits CXXThisAlignment;
 
   /// The value of 'this' to use when evaluating CXXDefaultInitExprs within
   /// this expression.
-  llvm::Value *CXXDefaultInitExprThis;
+  Address CXXDefaultInitExprThis = Address::invalid();
 
   /// CXXStructorImplicitParamDecl - When generating code for a constructor or
   /// destructor, this will hold the implicit argument (e.g. VTT).
@@ -1042,10 +1083,9 @@ private:
   /// handling code.
   SourceLocation CurEHLocation;
 
-  /// ByrefValueInfoMap - For each __block variable, contains a pair of the LLVM
-  /// type as well as the field number that contains the actual data.
-  llvm::DenseMap<const ValueDecl *, std::pair<llvm::Type *,
-                                              unsigned> > ByRefValueInfo;
+  /// BlockByrefInfos - For each __block variable, contains
+  /// information about the layout of the variable.
+  llvm::DenseMap<const ValueDecl *, BlockByrefInfo> BlockByrefInfos;
 
   llvm::BasicBlock *TerminateLandingPad;
   llvm::BasicBlock *TerminateHandler;
@@ -1086,15 +1126,15 @@ public:
 
   /// Returns a pointer to the function's exception object and selector slot,
   /// which is assigned in every landing pad.
-  llvm::Value *getExceptionSlot();
-  llvm::Value *getEHSelectorSlot();
+  Address getExceptionSlot();
+  Address getEHSelectorSlot();
 
   /// Returns the contents of the function's exception object and selector
   /// slots.
   llvm::Value *getExceptionFromSlot();
   llvm::Value *getSelectorFromSlot();
 
-  llvm::Value *getNormalCleanupDestSlot();
+  Address getNormalCleanupDestSlot();
 
   llvm::BasicBlock *getUnreachableBlock() {
     if (!UnreachableBlock) {
@@ -1121,38 +1161,41 @@ public:
   //                                  Cleanups
   //===--------------------------------------------------------------------===//
 
-  typedef void Destroyer(CodeGenFunction &CGF, llvm::Value *addr, QualType ty);
+  typedef void Destroyer(CodeGenFunction &CGF, Address addr, QualType ty);
 
   void pushIrregularPartialArrayCleanup(llvm::Value *arrayBegin,
-                                        llvm::Value *arrayEndPointer,
+                                        Address arrayEndPointer,
                                         QualType elementType,
+                                        CharUnits elementAlignment,
                                         Destroyer *destroyer);
   void pushRegularPartialArrayCleanup(llvm::Value *arrayBegin,
                                       llvm::Value *arrayEnd,
                                       QualType elementType,
+                                      CharUnits elementAlignment,
                                       Destroyer *destroyer);
 
   void pushDestroy(QualType::DestructionKind dtorKind,
-                   llvm::Value *addr, QualType type);
+                   Address addr, QualType type);
   void pushEHDestroy(QualType::DestructionKind dtorKind,
-                     llvm::Value *addr, QualType type);
-  void pushDestroy(CleanupKind kind, llvm::Value *addr, QualType type,
+                     Address addr, QualType type);
+  void pushDestroy(CleanupKind kind, Address addr, QualType type,
                    Destroyer *destroyer, bool useEHCleanupForArray);
-  void pushLifetimeExtendedDestroy(CleanupKind kind, llvm::Value *addr,
+  void pushLifetimeExtendedDestroy(CleanupKind kind, Address addr,
                                    QualType type, Destroyer *destroyer,
                                    bool useEHCleanupForArray);
   void pushCallObjectDeleteCleanup(const FunctionDecl *OperatorDelete,
                                    llvm::Value *CompletePtr,
                                    QualType ElementType);
-  void pushStackRestore(CleanupKind kind, llvm::Value *SPMem);
-  void emitDestroy(llvm::Value *addr, QualType type, Destroyer *destroyer,
+  void pushStackRestore(CleanupKind kind, Address SPMem);
+  void emitDestroy(Address addr, QualType type, Destroyer *destroyer,
                    bool useEHCleanupForArray);
-  llvm::Function *generateDestroyHelper(llvm::Constant *addr, QualType type,
+  llvm::Function *generateDestroyHelper(Address addr, QualType type,
                                         Destroyer *destroyer,
                                         bool useEHCleanupForArray,
                                         const VarDecl *VD);
   void emitArrayDestroy(llvm::Value *begin, llvm::Value *end,
-                        QualType type, Destroyer *destroyer,
+                        QualType elementType, CharUnits elementAlign,
+                        Destroyer *destroyer,
                         bool checkZeroLength, bool useEHCleanup);
 
   Destroyer *getDestroyer(QualType::DestructionKind destructionKind);
@@ -1203,8 +1246,6 @@ public:
   void generateObjCSetterBody(const ObjCImplementationDecl *classImpl,
                               const ObjCPropertyImplDecl *propImpl,
                               llvm::Constant *AtomicHelperFn);
-  bool IndirectObjCSetterArg(const CGFunctionInfo &FI);
-  bool IvarTypeWithAggrGCObjects(QualType Ty);
 
   //===--------------------------------------------------------------------===//
   //                                  Block Bits
@@ -1213,10 +1254,6 @@ public:
   llvm::Value *EmitBlockLiteral(const BlockExpr *);
   llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
   static void destroyBlockInfos(CGBlockInfo *info);
-  llvm::Constant *BuildDescriptorBlockDecl(const BlockExpr *,
-                                           const CGBlockInfo &Info,
-                                           llvm::StructType *,
-                                           llvm::Constant *BlockVarLayout);
 
   llvm::Function *GenerateBlockFunction(GlobalDecl GD,
                                         const CGBlockInfo &Info,
@@ -1238,15 +1275,22 @@ public:
   void emitByrefStructureInit(const AutoVarEmission &emission);
   void enterByrefCleanup(const AutoVarEmission &emission);
 
-  llvm::Value *LoadBlockStruct() {
-    assert(BlockPointer && "no block pointer set!");
-    return BlockPointer;
-  }
+  void setBlockContextParameter(const ImplicitParamDecl *D, unsigned argNum,
+                                llvm::Value *ptr);
 
-  void AllocateBlockCXXThisPointer(const CXXThisExpr *E);
-  void AllocateBlockDecl(const DeclRefExpr *E);
-  llvm::Value *GetAddrOfBlockDecl(const VarDecl *var, bool ByRef);
-  llvm::Type *BuildByRefType(const VarDecl *var);
+  Address LoadBlockStruct();
+  Address GetAddrOfBlockDecl(const VarDecl *var, bool ByRef);
+
+  /// BuildBlockByrefAddress - Computes the location of the
+  /// data in a variable which is declared as __block.
+  Address emitBlockByrefAddress(Address baseAddr, const VarDecl *V,
+                                bool followForward = true);
+  Address emitBlockByrefAddress(Address baseAddr,
+                                const BlockByrefInfo &info,
+                                bool followForward,
+                                const llvm::Twine &name);
+
+  const BlockByrefInfo &getBlockByrefInfo(const VarDecl *var);
 
   void GenerateCode(GlobalDecl GD, llvm::Function *Fn,
                     const CGFunctionInfo &FnInfo);
@@ -1290,6 +1334,8 @@ public:
 
   void EmitCallAndReturnForThunk(llvm::Value *Callee, const ThunkInfo *Thunk);
 
+  void FinishThunk();
+
   /// Emit a musttail call for a thunk with a potentially adjusted this pointer.
   void EmitMustTailThunk(const CXXMethodDecl *MD, llvm::Value *AdjustedThisPtr,
                          llvm::Value *Callee);
@@ -1308,27 +1354,34 @@ public:
   void EmitInitializerForField(FieldDecl *Field, LValue LHS, Expr *Init,
                                ArrayRef<VarDecl *> ArrayIndexes);
 
-  /// InitializeVTablePointer - Initialize the vtable pointer of the given
-  /// subobject.
-  ///
-  void InitializeVTablePointer(BaseSubobject Base,
-                               const CXXRecordDecl *NearestVBase,
-                               CharUnits OffsetFromNearestVBase,
-                               const CXXRecordDecl *VTableClass);
+  /// Struct with all informations about dynamic [sub]class needed to set vptr.
+  struct VPtr {
+    BaseSubobject Base;
+    const CXXRecordDecl *NearestVBase;
+    CharUnits OffsetFromNearestVBase;
+    const CXXRecordDecl *VTableClass;
+  };
+
+  /// Initialize the vtable pointer of the given subobject.
+  void InitializeVTablePointer(const VPtr &vptr);
+
+  typedef llvm::SmallVector<VPtr, 4> VPtrsVector;
 
   typedef llvm::SmallPtrSet<const CXXRecordDecl *, 4> VisitedVirtualBasesSetTy;
-  void InitializeVTablePointers(BaseSubobject Base,
-                                const CXXRecordDecl *NearestVBase,
-                                CharUnits OffsetFromNearestVBase,
-                                bool BaseIsNonVirtualPrimaryBase,
-                                const CXXRecordDecl *VTableClass,
-                                VisitedVirtualBasesSetTy& VBases);
+  VPtrsVector getVTablePointers(const CXXRecordDecl *VTableClass);
+
+  void getVTablePointers(BaseSubobject Base, const CXXRecordDecl *NearestVBase,
+                         CharUnits OffsetFromNearestVBase,
+                         bool BaseIsNonVirtualPrimaryBase,
+                         const CXXRecordDecl *VTableClass,
+                         VisitedVirtualBasesSetTy &VBases, VPtrsVector &vptrs);
 
   void InitializeVTablePointers(const CXXRecordDecl *ClassDecl);
 
   /// GetVTablePtr - Return the Value of the vtable pointer member pointed
   /// to by This.
-  llvm::Value *GetVTablePtr(llvm::Value *This, llvm::Type *Ty);
+  llvm::Value *GetVTablePtr(Address This, llvm::Type *VTableTy,
+                            const CXXRecordDecl *VTableClass);
 
   enum CFITypeCheckKind {
     CFITCK_VCall,
@@ -1495,49 +1548,85 @@ public:
   //                                  Helpers
   //===--------------------------------------------------------------------===//
 
-  LValue MakeAddrLValue(llvm::Value *V, QualType T,
-                        CharUnits Alignment = CharUnits()) {
-    return LValue::MakeAddr(V, T, Alignment, getContext(),
+  LValue MakeAddrLValue(Address Addr, QualType T,
+                        AlignmentSource AlignSource = AlignmentSource::Type) {
+    return LValue::MakeAddr(Addr, T, getContext(), AlignSource,
                             CGM.getTBAAInfo(T));
   }
 
+  LValue MakeAddrLValue(llvm::Value *V, QualType T, CharUnits Alignment,
+                        AlignmentSource AlignSource = AlignmentSource::Type) {
+    return LValue::MakeAddr(Address(V, Alignment), T, getContext(),
+                            AlignSource, CGM.getTBAAInfo(T));
+  }
+
+  LValue MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T);
   LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T);
+  CharUnits getNaturalTypeAlignment(QualType T,
+                                    AlignmentSource *Source = nullptr,
+                                    bool forPointeeType = false);
+  CharUnits getNaturalPointeeTypeAlignment(QualType T,
+                                           AlignmentSource *Source = nullptr);
+
+  Address EmitLoadOfReference(Address Ref, const ReferenceType *RefTy,
+                              AlignmentSource *Source = nullptr);
+  LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy);
 
   /// CreateTempAlloca - This creates a alloca and inserts it into the entry
   /// block. The caller is responsible for setting an appropriate alignment on
   /// the alloca.
   llvm::AllocaInst *CreateTempAlloca(llvm::Type *Ty,
                                      const Twine &Name = "tmp");
+  Address CreateTempAlloca(llvm::Type *Ty, CharUnits align,
+                           const Twine &Name = "tmp");
 
-  /// InitTempAlloca - Provide an initial value for the given alloca.
-  void InitTempAlloca(llvm::AllocaInst *Alloca, llvm::Value *Value);
+  /// CreateDefaultAlignedTempAlloca - This creates an alloca with the
+  /// default ABI alignment of the given LLVM type.
+  ///
+  /// IMPORTANT NOTE: This is *not* generally the right alignment for
+  /// any given AST type that happens to have been lowered to the
+  /// given IR type.  This should only ever be used for function-local,
+  /// IR-driven manipulations like saving and restoring a value.  Do
+  /// not hand this address off to arbitrary IRGen routines, and especially
+  /// do not pass it as an argument to a function that might expect a
+  /// properly ABI-aligned value.
+  Address CreateDefaultAlignTempAlloca(llvm::Type *Ty,
+                                       const Twine &Name = "tmp");
+
+  /// InitTempAlloca - Provide an initial value for the given alloca which
+  /// will be observable at all locations in the function.
+  ///
+  /// The address should be something that was returned from one of
+  /// the CreateTempAlloca or CreateMemTemp routines, and the
+  /// initializer must be valid in the entry block (i.e. it must
+  /// either be a constant or an argument value).
+  void InitTempAlloca(Address Alloca, llvm::Value *Value);
 
   /// CreateIRTemp - Create a temporary IR object of the given type, with
   /// appropriate alignment. This routine should only be used when an temporary
   /// value needs to be stored into an alloca (for example, to avoid explicit
   /// PHI construction), but the type is the IR type, not the type appropriate
   /// for storing in memory.
-  llvm::AllocaInst *CreateIRTemp(QualType T, const Twine &Name = "tmp");
+  ///
+  /// That is, this is exactly equivalent to CreateMemTemp, but calling
+  /// ConvertType instead of ConvertTypeForMem.
+  Address CreateIRTemp(QualType T, const Twine &Name = "tmp");
 
   /// CreateMemTemp - Create a temporary memory object of the given type, with
   /// appropriate alignment.
-  llvm::AllocaInst *CreateMemTemp(QualType T, const Twine &Name = "tmp");
+  Address CreateMemTemp(QualType T, const Twine &Name = "tmp");
+  Address CreateMemTemp(QualType T, CharUnits Align, const Twine &Name = "tmp");
 
   /// CreateAggTemp - Create a temporary memory object for the given
   /// aggregate type.
   AggValueSlot CreateAggTemp(QualType T, const Twine &Name = "tmp") {
-    CharUnits Alignment = getContext().getTypeAlignInChars(T);
-    return AggValueSlot::forAddr(CreateMemTemp(T, Name), Alignment,
+    return AggValueSlot::forAddr(CreateMemTemp(T, Name),
                                  T.getQualifiers(),
                                  AggValueSlot::IsNotDestructed,
                                  AggValueSlot::DoesNotNeedGCBarriers,
                                  AggValueSlot::IsNotAliased);
   }
 
-  /// CreateInAllocaTmp - Create a temporary memory object for the given
-  /// aggregate type.
-  AggValueSlot CreateInAllocaTmp(QualType T, const Twine &Name = "inalloca");
-
   /// Emit a cast to void* in the appropriate address space.
   llvm::Value *EmitCastToVoidPtr(llvm::Value *value);
 
@@ -1560,7 +1649,12 @@ public:
 
   // EmitVAListRef - Emit a "reference" to a va_list; this is either the address
   // or the value of the expression, depending on how va_list is defined.
-  llvm::Value *EmitVAListRef(const Expr *E);
+  Address EmitVAListRef(const Expr *E);
+
+  /// Emit a "reference" to a __builtin_ms_va_list; this is
+  /// always the value of the expression, because a __builtin_ms_va_list is a
+  /// pointer to a char.
+  Address EmitMSVAListRef(const Expr *E);
 
   /// EmitAnyExprToTemp - Similary to EmitAnyExpr(), however, the result will
   /// always be accessible even if no aggregate location is provided.
@@ -1568,10 +1662,10 @@ public:
 
   /// EmitAnyExprToMem - Emits the code necessary to evaluate an
   /// arbitrary expression into the given memory location.
-  void EmitAnyExprToMem(const Expr *E, llvm::Value *Location,
+  void EmitAnyExprToMem(const Expr *E, Address Location,
                         Qualifiers Quals, bool IsInitializer);
 
-  void EmitAnyExprToExn(const Expr *E, llvm::Value *Addr);
+  void EmitAnyExprToExn(const Expr *E, Address Addr);
 
   /// EmitExprAsInit - Emits the code necessary to initialize a
   /// location in memory with the given initializer.
@@ -1591,19 +1685,15 @@ public:
   ///
   /// The difference to EmitAggregateCopy is that tail padding is not copied.
   /// This is required for correctness when assigning non-POD structures in C++.
-  void EmitAggregateAssign(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+  void EmitAggregateAssign(Address DestPtr, Address SrcPtr,
                            QualType EltTy) {
     bool IsVolatile = hasVolatileMember(EltTy);
-    EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, CharUnits::Zero(),
-                      true);
+    EmitAggregateCopy(DestPtr, SrcPtr, EltTy, IsVolatile, true);
   }
 
-  void EmitAggregateCopyCtor(llvm::Value *DestPtr, llvm::Value *SrcPtr,
-                           QualType DestTy, QualType SrcTy) {
-    CharUnits DestTypeAlign = getContext().getTypeAlignInChars(DestTy);
-    CharUnits SrcTypeAlign = getContext().getTypeAlignInChars(SrcTy);
+  void EmitAggregateCopyCtor(Address DestPtr, Address SrcPtr,
+                             QualType DestTy, QualType SrcTy) {
     EmitAggregateCopy(DestPtr, SrcPtr, SrcTy, /*IsVolatile=*/false,
-                      std::min(DestTypeAlign, SrcTypeAlign),
                       /*IsAssignment=*/false);
   }
 
@@ -1613,20 +1703,16 @@ public:
   /// volatile.
   /// \param isAssignment - If false, allow padding to be copied.  This often
   /// yields more efficient.
-  void EmitAggregateCopy(llvm::Value *DestPtr, llvm::Value *SrcPtr,
+  void EmitAggregateCopy(Address DestPtr, Address SrcPtr,
                          QualType EltTy, bool isVolatile=false,
-                         CharUnits Alignment = CharUnits::Zero(),
                          bool isAssignment = false);
 
-  /// StartBlock - Start new block named N. If insert block is a dummy block
-  /// then reuse it.
-  void StartBlock(const char *N);
-
   /// GetAddrOfLocalVar - Return the address of a local variable.
-  llvm::Value *GetAddrOfLocalVar(const VarDecl *VD) {
-    llvm::Value *Res = LocalDeclMap[VD];
-    assert(Res && "Invalid argument to GetAddrOfLocalVar(), no decl!");
-    return Res;
+  Address GetAddrOfLocalVar(const VarDecl *VD) {
+    auto it = LocalDeclMap.find(VD);
+    assert(it != LocalDeclMap.end() &&
+           "Invalid argument to GetAddrOfLocalVar(), no decl!");
+    return it->second;
   }
 
   /// getOpaqueLValueMapping - Given an opaque value expression (which
@@ -1661,19 +1747,31 @@ public:
   /// EmitNullInitialization - Generate code to set a value of the given type to
   /// null, If the type contains data member pointers, they will be initialized
   /// to -1 in accordance with the Itanium C++ ABI.
-  void EmitNullInitialization(llvm::Value *DestPtr, QualType Ty);
+  void EmitNullInitialization(Address DestPtr, QualType Ty);
 
-  // EmitVAArg - Generate code to get an argument from the passed in pointer
-  // and update it accordingly. The return value is a pointer to the argument.
+  /// Emits a call to an LLVM variable-argument intrinsic, either
+  /// \c llvm.va_start or \c llvm.va_end.
+  /// \param ArgValue A reference to the \c va_list as emitted by either
+  /// \c EmitVAListRef or \c EmitMSVAListRef.
+  /// \param IsStart If \c true, emits a call to \c llvm.va_start; otherwise,
+  /// calls \c llvm.va_end.
+  llvm::Value *EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart);
+
+  /// Generate code to get an argument from the passed in pointer
+  /// and update it accordingly.
+  /// \param VE The \c VAArgExpr for which to generate code.
+  /// \param VAListAddr Receives a reference to the \c va_list as emitted by
+  /// either \c EmitVAListRef or \c EmitMSVAListRef.
+  /// \returns A pointer to the argument.
   // FIXME: We should be able to get rid of this method and use the va_arg
   // instruction in LLVM instead once it works well enough.
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty);
+  Address EmitVAArg(VAArgExpr *VE, Address &VAListAddr);
 
   /// emitArrayLength - Compute the length of an array, even if it's a
   /// VLA, and drill down to the base element type.
   llvm::Value *emitArrayLength(const ArrayType *arrayType,
                                QualType &baseType,
-                               llvm::Value *&addr);
+                               Address &addr);
 
   /// EmitVLASize - Capture all the sizes for the VLA expressions in
   /// the given variably-modified type and store them in the VLASizeMap.
@@ -1694,6 +1792,7 @@ public:
     assert(CXXThisValue && "no 'this' value for this function");
     return CXXThisValue;
   }
+  Address LoadCXXThisAddress();
 
   /// LoadCXXVTT - Load the VTT parameter to base constructors/destructors have
   /// virtual bases.
@@ -1704,35 +1803,29 @@ public:
     return CXXStructorImplicitParamValue;
   }
 
-  /// LoadCXXStructorImplicitParam - Load the implicit parameter
-  /// for a constructor/destructor.
-  llvm::Value *LoadCXXStructorImplicitParam() {
-    assert(CXXStructorImplicitParamValue &&
-           "no implicit argument value for this function");
-    return CXXStructorImplicitParamValue;
-  }
-
   /// GetAddressOfBaseOfCompleteClass - Convert the given pointer to a
   /// complete class to the given direct base.
-  llvm::Value *
-  GetAddressOfDirectBaseInCompleteClass(llvm::Value *Value,
+  Address
+  GetAddressOfDirectBaseInCompleteClass(Address Value,
                                         const CXXRecordDecl *Derived,
                                         const CXXRecordDecl *Base,
                                         bool BaseIsVirtual);
 
+  static bool ShouldNullCheckClassCastValue(const CastExpr *Cast);
+
   /// GetAddressOfBaseClass - This function will add the necessary delta to the
   /// load of 'this' and returns address of the base class.
-  llvm::Value *GetAddressOfBaseClass(llvm::Value *Value,
-                                     const CXXRecordDecl *Derived,
-                                     CastExpr::path_const_iterator PathBegin,
-                                     CastExpr::path_const_iterator PathEnd,
-                                     bool NullCheckValue, SourceLocation Loc);
+  Address GetAddressOfBaseClass(Address Value,
+                                const CXXRecordDecl *Derived,
+                                CastExpr::path_const_iterator PathBegin,
+                                CastExpr::path_const_iterator PathEnd,
+                                bool NullCheckValue, SourceLocation Loc);
 
-  llvm::Value *GetAddressOfDerivedClass(llvm::Value *Value,
-                                        const CXXRecordDecl *Derived,
-                                        CastExpr::path_const_iterator PathBegin,
-                                        CastExpr::path_const_iterator PathEnd,
-                                        bool NullCheckValue);
+  Address GetAddressOfDerivedClass(Address Value,
+                                   const CXXRecordDecl *Derived,
+                                   CastExpr::path_const_iterator PathBegin,
+                                   CastExpr::path_const_iterator PathEnd,
+                                   bool NullCheckValue);
 
   /// GetVTTParameter - Return the VTT parameter that should be passed to a
   /// base constructor/destructor with virtual bases.
@@ -1751,23 +1844,31 @@ public:
   // they are substantially the same.
   void EmitDelegatingCXXConstructorCall(const CXXConstructorDecl *Ctor,
                                         const FunctionArgList &Args);
+
   void EmitCXXConstructorCall(const CXXConstructorDecl *D, CXXCtorType Type,
                               bool ForVirtualBase, bool Delegating,
-                              llvm::Value *This, const CXXConstructExpr *E);
+                              Address This, const CXXConstructExpr *E);
+
+  /// Emit assumption load for all bases. Requires to be be called only on
+  /// most-derived class and not under construction of the object.
+  void EmitVTableAssumptionLoads(const CXXRecordDecl *ClassDecl, Address This);
+
+  /// Emit assumption that vptr load == global vtable.
+  void EmitVTableAssumptionLoad(const VPtr &vptr, Address This);
 
   void EmitSynthesizedCXXCopyCtorCall(const CXXConstructorDecl *D,
-                              llvm::Value *This, llvm::Value *Src,
-                              const CXXConstructExpr *E);
+                                      Address This, Address Src,
+                                      const CXXConstructExpr *E);
 
   void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
                                   const ConstantArrayType *ArrayTy,
-                                  llvm::Value *ArrayPtr,
+                                  Address ArrayPtr,
                                   const CXXConstructExpr *E,
                                   bool ZeroInitialization = false);
 
   void EmitCXXAggrConstructorCall(const CXXConstructorDecl *D,
                                   llvm::Value *NumElements,
-                                  llvm::Value *ArrayPtr,
+                                  Address ArrayPtr,
                                   const CXXConstructExpr *E,
                                   bool ZeroInitialization = false);
 
@@ -1775,15 +1876,15 @@ public:
 
   void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type,
                              bool ForVirtualBase, bool Delegating,
-                             llvm::Value *This);
+                             Address This);
 
   void EmitNewArrayInitializer(const CXXNewExpr *E, QualType elementType,
-                               llvm::Type *ElementTy, llvm::Value *NewPtr,
+                               llvm::Type *ElementTy, Address NewPtr,
                                llvm::Value *NumElements,
                                llvm::Value *AllocSizeWithoutCookie);
 
   void EmitCXXTemporary(const CXXTemporary *Temporary, QualType TempType,
-                        llvm::Value *Ptr);
+                        Address Ptr);
 
   llvm::Value *EmitLifetimeStart(uint64_t Size, llvm::Value *Addr);
   void EmitLifetimeEnd(llvm::Value *Size, llvm::Value *Addr);
@@ -1797,9 +1898,9 @@ public:
   RValue EmitBuiltinNewDeleteCall(const FunctionProtoType *Type,
                                   const Expr *Arg, bool IsDelete);
 
-  llvm::Value* EmitCXXTypeidExpr(const CXXTypeidExpr *E);
-  llvm::Value *EmitDynamicCast(llvm::Value *V, const CXXDynamicCastExpr *DCE);
-  llvm::Value* EmitCXXUuidofExpr(const CXXUuidofExpr *E);
+  llvm::Value *EmitCXXTypeidExpr(const CXXTypeidExpr *E);
+  llvm::Value *EmitDynamicCast(Address V, const CXXDynamicCastExpr *DCE);
+  Address EmitCXXUuidofExpr(const CXXUuidofExpr *E);
 
   /// \brief Situations in which we might emit a check for the suitability of a
   ///        pointer or glvalue.
@@ -1896,12 +1997,9 @@ public:
 
     const VarDecl *Variable;
 
-    /// The alignment of the variable.
-    CharUnits Alignment;
-
-    /// The address of the alloca.  Null if the variable was emitted
+    /// The address of the alloca.  Invalid if the variable was emitted
     /// as a global constant.
-    llvm::Value *Address;
+    Address Addr;
 
     llvm::Value *NRVOFlag;
 
@@ -1916,14 +2014,14 @@ public:
     llvm::Value *SizeForLifetimeMarkers;
 
     struct Invalid {};
-    AutoVarEmission(Invalid) : Variable(nullptr) {}
+    AutoVarEmission(Invalid) : Variable(nullptr), Addr(Address::invalid()) {}
 
     AutoVarEmission(const VarDecl &variable)
-      : Variable(&variable), Address(nullptr), NRVOFlag(nullptr),
+      : Variable(&variable), Addr(Address::invalid()), NRVOFlag(nullptr),
         IsByRef(false), IsConstantAggregate(false),
         SizeForLifetimeMarkers(nullptr) {}
 
-    bool wasEmittedAsGlobal() const { return Address == nullptr; }
+    bool wasEmittedAsGlobal() const { return !Addr.isValid(); }
 
   public:
     static AutoVarEmission invalid() { return AutoVarEmission(Invalid()); }
@@ -1938,19 +2036,17 @@ public:
 
     /// Returns the raw, allocated address, which is not necessarily
     /// the address of the object itself.
-    llvm::Value *getAllocatedAddress() const {
-      return Address;
+    Address getAllocatedAddress() const {
+      return Addr;
     }
 
     /// Returns the address of the object within this declaration.
     /// Note that this does not chase the forwarding pointer for
     /// __block decls.
-    llvm::Value *getObjectAddress(CodeGenFunction &CGF) const {
-      if (!IsByRef) return Address;
+    Address getObjectAddress(CodeGenFunction &CGF) const {
+      if (!IsByRef) return Addr;
 
-      auto F = CGF.getByRefValueLLVMField(Variable);
-      return CGF.Builder.CreateStructGEP(F.first, Address, F.second,
-                                         Variable->getNameAsString());
+      return CGF.emitBlockByrefAddress(Addr, Variable, /*forward*/ false);
     }
   };
   AutoVarEmission EmitAutoVarAlloca(const VarDecl &var);
@@ -1962,9 +2058,35 @@ public:
   void EmitStaticVarDecl(const VarDecl &D,
                          llvm::GlobalValue::LinkageTypes Linkage);
 
+  class ParamValue {
+    llvm::Value *Value;
+    unsigned Alignment;
+    ParamValue(llvm::Value *V, unsigned A) : Value(V), Alignment(A) {}
+  public:
+    static ParamValue forDirect(llvm::Value *value) {
+      return ParamValue(value, 0);
+    }
+    static ParamValue forIndirect(Address addr) {
+      assert(!addr.getAlignment().isZero());
+      return ParamValue(addr.getPointer(), addr.getAlignment().getQuantity());
+    }
+
+    bool isIndirect() const { return Alignment != 0; }
+    llvm::Value *getAnyValue() const { return Value; }
+    
+    llvm::Value *getDirectValue() const {
+      assert(!isIndirect());
+      return Value;
+    }
+
+    Address getIndirectAddress() const {
+      assert(isIndirect());
+      return Address(Value, CharUnits::fromQuantity(Alignment));
+    }
+  };
+
   /// EmitParmDecl - Emit a ParmVarDecl or an ImplicitParamDecl.
-  void EmitParmDecl(const VarDecl &D, llvm::Value *Arg, bool ArgIsPointer,
-                    unsigned ArgNo);
+  void EmitParmDecl(const VarDecl &D, ParamValue Arg, unsigned ArgNo);
 
   /// protectFromPeepholes - Protect a value that we're intending to
   /// store to the side, but which will probably be used later, from
@@ -2001,11 +2123,11 @@ public:
   /// \return True if the statement was handled.
   bool EmitSimpleStmt(const Stmt *S);
 
-  llvm::Value *EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false,
-                                AggValueSlot AVS = AggValueSlot::ignored());
-  llvm::Value *EmitCompoundStmtWithoutScope(const CompoundStmt &S,
-                                            bool GetLast = false,
-                                            AggValueSlot AVS =
+  Address EmitCompoundStmt(const CompoundStmt &S, bool GetLast = false,
+                           AggValueSlot AVS = AggValueSlot::ignored());
+  Address EmitCompoundStmtWithoutScope(const CompoundStmt &S,
+                                       bool GetLast = false,
+                                       AggValueSlot AVS =
                                                 AggValueSlot::ignored());
 
   /// EmitLabel - Emit the block for the given label. It is legal to call this
@@ -2018,8 +2140,6 @@ public:
   void EmitIndirectGotoStmt(const IndirectGotoStmt &S);
   void EmitIfStmt(const IfStmt &S);
 
-  void EmitCondBrHints(llvm::LLVMContext &Context, llvm::BranchInst *CondBr,
-                       ArrayRef<const Attr *> Attrs);
   void EmitWhileStmt(const WhileStmt &S,
                      ArrayRef<const Attr *> Attrs = None);
   void EmitDoStmt(const DoStmt &S, ArrayRef<const Attr *> Attrs = None);
@@ -2077,19 +2197,20 @@ public:
   /// either be an alloca or a call to llvm.localrecover if there are nested
   /// outlined functions. ParentFP is the frame pointer of the outermost parent
   /// frame.
-  llvm::Value *recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF,
-                                         llvm::Value *ParentVar,
-                                         llvm::Value *ParentFP);
+  Address recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF,
+                                    Address ParentVar,
+                                    llvm::Value *ParentFP);
 
   void EmitCXXForRangeStmt(const CXXForRangeStmt &S,
                            ArrayRef<const Attr *> Attrs = None);
 
   LValue InitCapturedStruct(const CapturedStmt &S);
   llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K);
-  void GenerateCapturedStmtFunctionProlog(const CapturedStmt &S);
-  llvm::Function *GenerateCapturedStmtFunctionEpilog(const CapturedStmt &S);
   llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S);
-  llvm::Value *GenerateCapturedStmtArgument(const CapturedStmt &S);
+  Address GenerateCapturedStmtArgument(const CapturedStmt &S);
+  llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S);
+  void GenerateOpenMPCapturedVars(const CapturedStmt &S,
+                                  SmallVectorImpl<llvm::Value *> &CapturedVars);
   /// \brief Perform element by element copying of arrays with type \a
   /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure
   /// generated by \a CopyGen.
@@ -2100,8 +2221,8 @@ public:
   /// \param CopyGen Copying procedure that copies value of single array element
   /// to another single array element.
   void EmitOMPAggregateAssign(
-      llvm::Value *DestAddr, llvm::Value *SrcAddr, QualType OriginalType,
-      const llvm::function_ref<void(llvm::Value *, llvm::Value *)> &CopyGen);
+      Address DestAddr, Address SrcAddr, QualType OriginalType,
+      const llvm::function_ref<void(Address, Address)> &CopyGen);
   /// \brief Emit proper copying of data from one variable to another.
   ///
   /// \param OriginalType Original type of the copied variables.
@@ -2113,8 +2234,8 @@ public:
   /// the base array element).
   /// \param Copy Actual copygin expression for copying data from \a SrcVD to \a
   /// DestVD.
-  void EmitOMPCopy(CodeGenFunction &CGF, QualType OriginalType,
-                   llvm::Value *DestAddr, llvm::Value *SrcAddr,
+  void EmitOMPCopy(QualType OriginalType,
+                   Address DestAddr, Address SrcAddr,
                    const VarDecl *DestVD, const VarDecl *SrcVD,
                    const Expr *Copy);
   /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or
@@ -2213,10 +2334,14 @@ public:
   void EmitOMPOrderedDirective(const OMPOrderedDirective &S);
   void EmitOMPAtomicDirective(const OMPAtomicDirective &S);
   void EmitOMPTargetDirective(const OMPTargetDirective &S);
+  void EmitOMPTargetDataDirective(const OMPTargetDataDirective &S);
   void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
   void
   EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S);
   void EmitOMPCancelDirective(const OMPCancelDirective &S);
+  void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
+  void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
+  void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
 
   /// \brief Emit inner loop of the worksharing/simd construct.
   ///
@@ -2249,8 +2374,8 @@ private:
   void EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
                            const OMPLoopDirective &S,
                            OMPPrivateScope &LoopScope, bool Ordered,
-                           llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
-                           llvm::Value *IL, llvm::Value *Chunk);
+                           Address LB, Address UB, Address ST,
+                           Address IL, llvm::Value *Chunk);
   /// \brief Emit code for sections directive.
   OpenMPDirectiveKind EmitSections(const OMPExecutableDirective &S);
 
@@ -2297,7 +2422,7 @@ public:
   /// that the address will be used to access the object.
   LValue EmitCheckedLValue(const Expr *E, TypeCheckKind TCK);
 
-  RValue convertTempToRValue(llvm::Value *addr, QualType type,
+  RValue convertTempToRValue(Address addr, QualType type,
                              SourceLocation Loc);
 
   void EmitAtomicInit(Expr *E, LValue lvalue);
@@ -2338,12 +2463,14 @@ public:
   /// EmitLoadOfScalar - Load a scalar value from an address, taking
   /// care to appropriately convert from the memory representation to
   /// the LLVM value representation.
-  llvm::Value *EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
-                                unsigned Alignment, QualType Ty,
+  llvm::Value *EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty,
                                 SourceLocation Loc,
+                                AlignmentSource AlignSource =
+                                  AlignmentSource::Type,
                                 llvm::MDNode *TBAAInfo = nullptr,
                                 QualType TBAABaseTy = QualType(),
-                                uint64_t TBAAOffset = 0);
+                                uint64_t TBAAOffset = 0,
+                                bool isNontemporal = false);
 
   /// EmitLoadOfScalar - Load a scalar value from an address, taking
   /// care to appropriately convert from the memory representation to
@@ -2354,11 +2481,12 @@ public:
   /// EmitStoreOfScalar - Store a scalar value to an address, taking
   /// care to appropriately convert from the memory representation to
   /// the LLVM value representation.
-  void EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr,
-                         bool Volatile, unsigned Alignment, QualType Ty,
+  void EmitStoreOfScalar(llvm::Value *Value, Address Addr,
+                         bool Volatile, QualType Ty,
+                         AlignmentSource AlignSource = AlignmentSource::Type,
                          llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
                          QualType TBAABaseTy = QualType(),
-                         uint64_t TBAAOffset = 0);
+                         uint64_t TBAAOffset = 0, bool isNontemporal = false);
 
   /// EmitStoreOfScalar - Store a scalar value to an address, taking
   /// care to appropriately convert from the memory representation to
@@ -2405,13 +2533,14 @@ public:
   // Note: only available for agg return types
   LValue EmitVAArgExprLValue(const VAArgExpr *E);
   LValue EmitDeclRefLValue(const DeclRefExpr *E);
-  LValue EmitReadRegister(const VarDecl *VD);
   LValue EmitStringLiteralLValue(const StringLiteral *E);
   LValue EmitObjCEncodeExprLValue(const ObjCEncodeExpr *E);
   LValue EmitPredefinedLValue(const PredefinedExpr *E);
   LValue EmitUnaryOpLValue(const UnaryOperator *E);
   LValue EmitArraySubscriptExpr(const ArraySubscriptExpr *E,
                                 bool Accessed = false);
+  LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
+                                 bool IsLowerBound = true);
   LValue EmitExtVectorElementExpr(const ExtVectorElementExpr *E);
   LValue EmitMemberExpr(const MemberExpr *E);
   LValue EmitObjCIsaExpr(const ObjCIsaExpr *E);
@@ -2422,10 +2551,13 @@ public:
   LValue EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *E);
   LValue EmitOpaqueValueLValue(const OpaqueValueExpr *e);
   
-  llvm::Value *EmitExtVectorElementLValue(LValue V);
+  Address EmitExtVectorElementLValue(LValue V);
 
   RValue EmitRValueForField(LValue LV, const FieldDecl *FD, SourceLocation Loc);
 
+  Address EmitArrayToPointerDecay(const Expr *Array,
+                                  AlignmentSource *AlignSource = nullptr);
+
   class ConstantEmission {
     llvm::PointerIntPair<llvm::Constant*, 1, bool> ValueAndIsReference;
     ConstantEmission(llvm::Constant *C, bool isReference)
@@ -2497,23 +2629,20 @@ public:
   /// EmitCall - Generate a call of the given function, expecting the given
   /// result type, and using the given argument list which specifies both the
   /// LLVM arguments and the types they were derived from.
-  ///
-  /// \param TargetDecl - If given, the decl of the function in a direct call;
-  /// used to set attributes on the call (noreturn, etc.).
-  RValue EmitCall(const CGFunctionInfo &FnInfo,
-                  llvm::Value *Callee,
-                  ReturnValueSlot ReturnValue,
-                  const CallArgList &Args,
-                  const Decl *TargetDecl = nullptr,
+  RValue EmitCall(const CGFunctionInfo &FnInfo, llvm::Value *Callee,
+                  ReturnValueSlot ReturnValue, const CallArgList &Args,
+                  CGCalleeInfo CalleeInfo = CGCalleeInfo(),
                   llvm::Instruction **callOrInvoke = nullptr);
 
   RValue EmitCall(QualType FnType, llvm::Value *Callee, const CallExpr *E,
                   ReturnValueSlot ReturnValue,
-                  const Decl *TargetDecl = nullptr,
+                  CGCalleeInfo CalleeInfo = CGCalleeInfo(),
                   llvm::Value *Chain = nullptr);
   RValue EmitCallExpr(const CallExpr *E,
                       ReturnValueSlot ReturnValue = ReturnValueSlot());
 
+  void checkTargetFeatures(const CallExpr *E, const FunctionDecl *TargetDecl);
+
   llvm::CallInst *EmitRuntimeCall(llvm::Value *callee,
                                   const Twine &name = "");
   llvm::CallInst *EmitRuntimeCall(llvm::Value *callee,
@@ -2528,8 +2657,6 @@ public:
   llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
                                   ArrayRef<llvm::Value *> Args,
                                   const Twine &Name = "");
-  llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee,
-                                  const Twine &Name = "");
   llvm::CallSite EmitRuntimeCallOrInvoke(llvm::Value *callee,
                                          ArrayRef<llvm::Value*> args,
                                          const Twine &name = "");
@@ -2565,6 +2692,10 @@ public:
                                                NestedNameSpecifier *Qualifier,
                                                bool IsArrow, const Expr *Base);
   // Compute the object pointer.
+  Address EmitCXXMemberDataPointerAddress(const Expr *E, Address base,
+                                          llvm::Value *memberPtr,
+                                          const MemberPointerType *memberPtrType,
+                                          AlignmentSource *AlignSource = nullptr);
   RValue EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E,
                                       ReturnValueSlot ReturnValue);
 
@@ -2599,7 +2730,7 @@ public:
                                          unsigned Modifier,
                                          const CallExpr *E,
                                          SmallVectorImpl<llvm::Value *> &Ops,
-                                         llvm::Value *Align = nullptr);
+                                         Address PtrOp0, Address PtrOp1);
   llvm::Function *LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
                                           unsigned Modifier, llvm::Type *ArgTy,
                                           const CallExpr *E);
@@ -2612,8 +2743,6 @@ public:
                                    bool negateForRightShift);
   llvm::Value *EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt,
                                  llvm::Type *Ty, bool usgn, const char *name);
-  // Helper functions for EmitAArch64BuiltinExpr.
-  llvm::Value *vectorWrapScalar8(llvm::Value *Op);
   llvm::Value *vectorWrapScalar16(llvm::Value *Op);
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
@@ -2623,6 +2752,8 @@ public:
   llvm::Value *EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
+                                          const CallExpr *E);
 
   llvm::Value *EmitObjCProtocolExpr(const ObjCProtocolExpr *E);
   llvm::Value *EmitObjCStringLiteral(const ObjCStringLiteral *E);
@@ -2643,24 +2774,23 @@ public:
   }
 
   // ARC primitives.
-  void EmitARCInitWeak(llvm::Value *value, llvm::Value *addr);
-  void EmitARCDestroyWeak(llvm::Value *addr);
-  llvm::Value *EmitARCLoadWeak(llvm::Value *addr);
-  llvm::Value *EmitARCLoadWeakRetained(llvm::Value *addr);
-  llvm::Value *EmitARCStoreWeak(llvm::Value *value, llvm::Value *addr,
-                                bool ignored);
-  void EmitARCCopyWeak(llvm::Value *dst, llvm::Value *src);
-  void EmitARCMoveWeak(llvm::Value *dst, llvm::Value *src);
+  void EmitARCInitWeak(Address addr, llvm::Value *value);
+  void EmitARCDestroyWeak(Address addr);
+  llvm::Value *EmitARCLoadWeak(Address addr);
+  llvm::Value *EmitARCLoadWeakRetained(Address addr);
+  llvm::Value *EmitARCStoreWeak(Address addr, llvm::Value *value, bool ignored);
+  void EmitARCCopyWeak(Address dst, Address src);
+  void EmitARCMoveWeak(Address dst, Address src);
   llvm::Value *EmitARCRetainAutorelease(QualType type, llvm::Value *value);
   llvm::Value *EmitARCRetainAutoreleaseNonBlock(llvm::Value *value);
   llvm::Value *EmitARCStoreStrong(LValue lvalue, llvm::Value *value,
                                   bool resultIgnored);
-  llvm::Value *EmitARCStoreStrongCall(llvm::Value *addr, llvm::Value *value,
+  llvm::Value *EmitARCStoreStrongCall(Address addr, llvm::Value *value,
                                       bool resultIgnored);
   llvm::Value *EmitARCRetain(QualType type, llvm::Value *value);
   llvm::Value *EmitARCRetainNonBlock(llvm::Value *value);
   llvm::Value *EmitARCRetainBlock(llvm::Value *value, bool mandatory);
-  void EmitARCDestroyStrong(llvm::Value *addr, ARCPreciseLifetime_t precise);
+  void EmitARCDestroyStrong(Address addr, ARCPreciseLifetime_t precise);
   void EmitARCRelease(llvm::Value *value, ARCPreciseLifetime_t precise);
   llvm::Value *EmitARCAutorelease(llvm::Value *value);
   llvm::Value *EmitARCAutoreleaseReturnValue(llvm::Value *value);
@@ -2673,8 +2803,6 @@ public:
   EmitARCStoreStrong(const BinaryOperator *e, bool ignored);
 
   llvm::Value *EmitObjCThrowOperand(const Expr *expr);
-
-  llvm::Value *EmitObjCProduceObject(QualType T, llvm::Value *Ptr);
   llvm::Value *EmitObjCConsumeObject(QualType T, llvm::Value *Ptr);
   llvm::Value *EmitObjCExtendObjectLifetime(QualType T, llvm::Value *Ptr);
 
@@ -2707,17 +2835,16 @@ public:
   /// scalar type, returning the result.
   llvm::Value *EmitScalarExpr(const Expr *E , bool IgnoreResultAssign = false);
 
-  /// EmitScalarConversion - Emit a conversion from the specified type to the
-  /// specified destination type, both of which are LLVM scalar types.
+  /// Emit a conversion from the specified type to the specified destination
+  /// type, both of which are LLVM scalar types.
   llvm::Value *EmitScalarConversion(llvm::Value *Src, QualType SrcTy,
-                                    QualType DstTy);
+                                    QualType DstTy, SourceLocation Loc);
 
-  /// EmitComplexToScalarConversion - Emit a conversion from the specified
-  /// complex type to the specified destination type, where the destination type
-  /// is an LLVM scalar type.
+  /// Emit a conversion from the specified complex type to the specified
+  /// destination type, where the destination type is an LLVM scalar type.
   llvm::Value *EmitComplexToScalarConversion(ComplexPairTy Src, QualType SrcTy,
-                                             QualType DstTy);
-
+                                             QualType DstTy,
+                                             SourceLocation Loc);
 
   /// EmitAggExpr - Emit the computation of the specified expression
   /// of aggregate type.  The result is computed into the given slot,
@@ -2728,11 +2855,6 @@ public:
   /// aggregate type into a temporary LValue.
   LValue EmitAggExprToLValue(const Expr *E);
 
-  /// EmitGCMemmoveCollectable - Emit special API for structs with object
-  /// pointers.
-  void EmitGCMemmoveCollectable(llvm::Value *DestPtr, llvm::Value *SrcPtr,
-                                QualType Ty);
-
   /// EmitExtendGCLifetime - Given a pointer to an Objective-C object,
   /// make sure it survives garbage collection until this point.
   void EmitExtendGCLifetime(llvm::Value *object);
@@ -2753,6 +2875,9 @@ public:
   /// EmitLoadOfComplex - Load a complex number from the specified l-value.
   ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc);
 
+  Address emitAddrOfRealComponent(Address complex, QualType complexType);
+  Address emitAddrOfImagComponent(Address complex, QualType complexType);
+
   /// AddInitializerToStaticVarDecl - Add the initializer for 'D' to the
   /// global variable that has already been created for it.  If the initializer
   /// has a different type than GV does, this may free GV and return a different
@@ -2787,7 +2912,7 @@ public:
   /// variables.
   void GenerateCXXGlobalInitFunc(llvm::Function *Fn,
                                  ArrayRef<llvm::Function *> CXXThreadLocals,
-                                 llvm::GlobalVariable *Guard = nullptr);
+                                 Address Guard = Address::invalid());
 
   /// GenerateCXXGlobalDtorsFunc - Generates code for destroying global
   /// variables.
@@ -2802,8 +2927,7 @@ public:
 
   void EmitCXXConstructExpr(const CXXConstructExpr *E, AggValueSlot Dest);
   
-  void EmitSynthesizedCXXCopyCtor(llvm::Value *Dest, llvm::Value *Src,
-                                  const Expr *Exp);
+  void EmitSynthesizedCXXCopyCtor(Address Dest, Address Src, const Expr *Exp);
 
   void enterFullExpression(const ExprWithCleanups *E) {
     if (E->getNumObjects() == 0) return;
@@ -2815,7 +2939,7 @@ public:
 
   void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest);
 
-  RValue EmitAtomicExpr(AtomicExpr *E, llvm::Value *Dest = nullptr);
+  RValue EmitAtomicExpr(AtomicExpr *E);
 
   //===--------------------------------------------------------------------===//
   //                         Annotations Emission
@@ -2832,7 +2956,7 @@ public:
 
   /// Emit field annotations for the given field & value. Returns the
   /// annotation result.
-  llvm::Value *EmitFieldAnnotations(const FieldDecl *D, llvm::Value *V);
+  Address EmitFieldAnnotations(const FieldDecl *D, Address V);
 
   //===--------------------------------------------------------------------===//
   //                             Internal Helpers
@@ -2885,6 +3009,11 @@ public:
                  StringRef CheckName, ArrayRef<llvm::Constant *> StaticArgs,
                  ArrayRef<llvm::Value *> DynamicArgs);
 
+  /// \brief Emit a slow path cross-DSO CFI check which calls __cfi_slowpath
+  /// if Cond if false.
+  void EmitCfiSlowPathCheck(llvm::Value *Cond, llvm::ConstantInt *TypeId,
+                            llvm::Value *Ptr);
+
   /// \brief Create a basic block that will call the trap intrinsic, and emit a
   /// conditional branch to it, for the -ftrapv checks.
   void EmitTrapCheck(llvm::Value *Checked);
@@ -2920,6 +3049,12 @@ private:
   llvm::SmallVector<std::pair<llvm::Instruction *, llvm::Value *>, 4>
   DeferredReplacements;
 
+  /// Set the address of a local variable.
+  void setAddrOfLocalVar(const VarDecl *VD, Address Addr) {
+    assert(!LocalDeclMap.count(VD) && "Decl already exists in LocalDeclMap!");
+    LocalDeclMap.insert({VD, Addr});
+  }
+
   /// ExpandTypeFromArgs - Reconstruct a structure of type \arg Ty
   /// from function arguments into \arg Dst. See ABIArgInfo::Expand.
   ///
@@ -2942,6 +3077,18 @@ private:
                                   std::string &ConstraintStr,
                                   SourceLocation Loc);
 
+  /// \brief Attempts to statically evaluate the object size of E. If that
+  /// fails, emits code to figure the size of E out for us. This is
+  /// pass_object_size aware.
+  llvm::Value *evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                               llvm::IntegerType *ResType);
+
+  /// \brief Emits the size of E, as required by __builtin_object_size. This
+  /// function is aware of pass_object_size parameters, and will act accordingly
+  /// if E is a parameter with the pass_object_size attribute.
+  llvm::Value *emitBuiltinObjectSize(const Expr *E, unsigned Type,
+                                     llvm::IntegerType *ResType);
+
 public:
 #ifndef NDEBUG
   // Determine whether the given argument is an Objective-C method
@@ -2966,12 +3113,11 @@ public:
   /// EmitCallArgs - Emit call arguments for a function.
   template <typename T>
   void EmitCallArgs(CallArgList &Args, const T *CallArgTypeInfo,
-                    CallExpr::const_arg_iterator ArgBeg,
-                    CallExpr::const_arg_iterator ArgEnd,
+                    llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
                     const FunctionDecl *CalleeDecl = nullptr,
                     unsigned ParamsToSkip = 0) {
     SmallVector<QualType, 16> ArgTypes;
-    CallExpr::const_arg_iterator Arg = ArgBeg;
+    CallExpr::const_arg_iterator Arg = ArgRange.begin();
 
     assert((ParamsToSkip == 0 || CallArgTypeInfo) &&
            "Can't skip parameters if type info is not provided");
@@ -2984,7 +3130,7 @@ public:
       for (auto I = CallArgTypeInfo->param_type_begin() + ParamsToSkip,
                 E = CallArgTypeInfo->param_type_end();
            I != E; ++I, ++Arg) {
-        assert(Arg != ArgEnd && "Running over edge of argument list!");
+        assert(Arg != ArgRange.end() && "Running over edge of argument list!");
         assert((isGenericMethod ||
                 ((*I)->isVariablyModifiedType() ||
                  (*I).getNonReferenceType()->isObjCRetainableType() ||
@@ -2992,7 +3138,7 @@ public:
                          .getCanonicalType((*I).getNonReferenceType())
                          .getTypePtr() ==
                      getContext()
-                         .getCanonicalType(Arg->getType())
+                         .getCanonicalType((*Arg)->getType())
                          .getTypePtr())) &&
                "type mismatch in call argument!");
         ArgTypes.push_back(*I);
@@ -3001,23 +3147,46 @@ public:
 
     // Either we've emitted all the call args, or we have a call to variadic
     // function.
-    assert(
-        (Arg == ArgEnd || !CallArgTypeInfo || CallArgTypeInfo->isVariadic()) &&
-        "Extra arguments in non-variadic function!");
+    assert((Arg == ArgRange.end() || !CallArgTypeInfo ||
+            CallArgTypeInfo->isVariadic()) &&
+           "Extra arguments in non-variadic function!");
 
     // If we still have any arguments, emit them using the type of the argument.
-    for (; Arg != ArgEnd; ++Arg)
-      ArgTypes.push_back(getVarArgType(*Arg));
+    for (auto *A : llvm::make_range(Arg, ArgRange.end()))
+      ArgTypes.push_back(getVarArgType(A));
 
-    EmitCallArgs(Args, ArgTypes, ArgBeg, ArgEnd, CalleeDecl, ParamsToSkip);
+    EmitCallArgs(Args, ArgTypes, ArgRange, CalleeDecl, ParamsToSkip);
   }
 
   void EmitCallArgs(CallArgList &Args, ArrayRef<QualType> ArgTypes,
-                    CallExpr::const_arg_iterator ArgBeg,
-                    CallExpr::const_arg_iterator ArgEnd,
+                    llvm::iterator_range<CallExpr::const_arg_iterator> ArgRange,
                     const FunctionDecl *CalleeDecl = nullptr,
                     unsigned ParamsToSkip = 0);
 
+  /// EmitPointerWithAlignment - Given an expression with a pointer
+  /// type, emit the value and compute our best estimate of the
+  /// alignment of the pointee.
+  ///
+  /// Note that this function will conservatively fall back on the type
+  /// when it doesn't 
+  ///
+  /// \param Source - If non-null, this will be initialized with
+  ///   information about the source of the alignment.  Note that this
+  ///   function will conservatively fall back on the type when it
+  ///   doesn't recognize the expression, which means that sometimes
+  ///   
+  ///   a worst-case One
+  ///   reasonable way to use this information is when there's a
+  ///   language guarantee that the pointer must be aligned to some
+  ///   stricter value, and we're simply trying to ensure that
+  ///   sufficiently obvious uses of under-aligned objects don't get
+  ///   miscompiled; for example, a placement new into the address of
+  ///   a local variable.  In such a case, it's quite reasonable to
+  ///   just ignore the returned alignment when it isn't from an
+  ///   explicit source.
+  Address EmitPointerWithAlignment(const Expr *Addr,
+                                   AlignmentSource *Source = nullptr);
+
 private:
   QualType getVarArgType(const Expr *Arg);
 
@@ -3027,16 +3196,11 @@ private:
 
   void EmitDeclMetadata();
 
-  CodeGenModule::ByrefHelpers *
-  buildByrefHelpers(llvm::StructType &byrefType,
-                    const AutoVarEmission &emission);
+  BlockByrefHelpers *buildByrefHelpers(llvm::StructType &byrefType,
+                                  const AutoVarEmission &emission);
 
   void AddObjCARCExceptionMetadata(llvm::Instruction *Inst);
 
-  /// GetPointeeAlignment - Given an expression with a pointer type, emit the
-  /// value and compute our best estimate of the alignment of the pointee.
-  std::pair<llvm::Value*, unsigned> EmitPointerWithAlignment(const Expr *Addr);
-
   llvm::Value *GetValueForARMHint(unsigned BuiltinID);
 };
 
@@ -3059,17 +3223,23 @@ struct DominatingLLVMValue {
   static saved_type save(CodeGenFunction &CGF, llvm::Value *value) {
     if (!needsSaving(value)) return saved_type(value, false);
 
-    // Otherwise we need an alloca.
-    llvm::Value *alloca =
-      CGF.CreateTempAlloca(value->getType(), "cond-cleanup.save");
+    // Otherwise, we need an alloca.
+    auto align = CharUnits::fromQuantity(
+              CGF.CGM.getDataLayout().getPrefTypeAlignment(value->getType()));
+    Address alloca =
+      CGF.CreateTempAlloca(value->getType(), align, "cond-cleanup.save");
     CGF.Builder.CreateStore(value, alloca);
 
-    return saved_type(alloca, true);
+    return saved_type(alloca.getPointer(), true);
   }
 
   static llvm::Value *restore(CodeGenFunction &CGF, saved_type value) {
+    // If the value says it wasn't saved, trust that it's still dominating.
     if (!value.getInt()) return value.getPointer();
-    return CGF.Builder.CreateLoad(value.getPointer());
+
+    // Otherwise, it should be an alloca instruction, as set up in save().
+    auto alloca = cast<llvm::AllocaInst>(value.getPointer());
+    return CGF.Builder.CreateAlignedLoad(alloca, alloca->getAlignment());
   }
 };
 
@@ -3082,6 +3252,28 @@ template <class T> struct DominatingPointer<T,true> : DominatingLLVMValue {
   }
 };
 
+/// A specialization of DominatingValue for Address.
+template <> struct DominatingValue<Address> {
+  typedef Address type;
+
+  struct saved_type {
+    DominatingLLVMValue::saved_type SavedValue;
+    CharUnits Alignment;
+  };
+
+  static bool needsSaving(type value) {
+    return DominatingLLVMValue::needsSaving(value.getPointer());
+  }
+  static saved_type save(CodeGenFunction &CGF, type value) {
+    return { DominatingLLVMValue::save(CGF, value.getPointer()),
+             value.getAlignment() };
+  }
+  static type restore(CodeGenFunction &CGF, saved_type value) {
+    return Address(DominatingLLVMValue::restore(CGF, value.SavedValue),
+                   value.Alignment);
+  }
+};
+
 /// A specialization of DominatingValue for RValue.
 template <> struct DominatingValue<RValue> {
   typedef RValue type;
@@ -3090,15 +3282,17 @@ template <> struct DominatingValue<RValue> {
                 AggregateAddress, ComplexAddress };
 
     llvm::Value *Value;
-    Kind K;
-    saved_type(llvm::Value *v, Kind k) : Value(v), K(k) {}
+    unsigned K : 3;
+    unsigned Align : 29;
+    saved_type(llvm::Value *v, Kind k, unsigned a = 0)
+      : Value(v), K(k), Align(a) {}
 
   public:
     static bool needsSaving(RValue value);
     static saved_type save(CodeGenFunction &CGF, RValue value);
     RValue restore(CodeGenFunction &CGF);
 
-    // implementations in CGExprCXX.cpp
+    // implementations in CGCleanup.cpp
   };
 
   static bool needsSaving(type value) {
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index c9c48c7628de..173b0dcba1c2 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenModule.h"
+#include "CGBlocks.h"
 #include "CGCUDARuntime.h"
 #include "CGCXXABI.h"
 #include "CGCall.h"
@@ -52,6 +53,7 @@
 #include "llvm/ProfileData/InstrProfReader.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -64,8 +66,10 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
   case TargetCXXABI::iOS64:
+  case TargetCXXABI::WatchOS:
   case TargetCXXABI::GenericMIPS:
   case TargetCXXABI::GenericItanium:
+  case TargetCXXABI::WebAssembly:
     return CreateItaniumCXXABI(CGM);
   case TargetCXXABI::Microsoft:
     return CreateMicrosoftCXXABI(CGM);
@@ -77,17 +81,16 @@ static CGCXXABI *createCXXABI(CodeGenModule &CGM) {
 CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
                              const PreprocessorOptions &PPO,
                              const CodeGenOptions &CGO, llvm::Module &M,
-                             const llvm::DataLayout &TD,
                              DiagnosticsEngine &diags,
                              CoverageSourceInfo *CoverageInfo)
     : Context(C), LangOpts(C.getLangOpts()), HeaderSearchOpts(HSO),
       PreprocessorOpts(PPO), CodeGenOpts(CGO), TheModule(M), Diags(diags),
-      TheDataLayout(TD), Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
+      Target(C.getTargetInfo()), ABI(createCXXABI(*this)),
       VMContext(M.getContext()), TBAA(nullptr), TheTargetCodeGenInfo(nullptr),
       Types(*this), VTables(*this), ObjCRuntime(nullptr),
       OpenCLRuntime(nullptr), OpenMPRuntime(nullptr), CUDARuntime(nullptr),
-      DebugInfo(nullptr), ARCData(nullptr),
-      NoObjCARCExceptionsMetadata(nullptr), RRData(nullptr), PGOReader(nullptr),
+      DebugInfo(nullptr), ObjCData(nullptr),
+      NoObjCARCExceptionsMetadata(nullptr), PGOReader(nullptr),
       CFConstantStringClassRef(nullptr), ConstantStringClassRef(nullptr),
       NSConstantStringType(nullptr), NSConcreteGlobalBlock(nullptr),
       NSConcreteStackBlock(nullptr), BlockObjectAssign(nullptr),
@@ -106,7 +109,9 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
   DoubleTy = llvm::Type::getDoubleTy(LLVMContext);
   PointerWidthInBits = C.getTargetInfo().getPointerWidth(0);
   PointerAlignInBytes =
-  C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity();
+    C.toCharUnitsFromBits(C.getTargetInfo().getPointerAlign(0)).getQuantity();
+  IntAlignInBytes =
+    C.toCharUnitsFromBits(C.getTargetInfo().getIntAlign()).getQuantity();
   IntTy = llvm::IntegerType::get(LLVMContext, C.getTargetInfo().getIntWidth());
   IntPtrTy = llvm::IntegerType::get(LLVMContext, PointerWidthInBits);
   Int8PtrTy = Int8Ty->getPointerTo(0);
@@ -139,9 +144,8 @@ CodeGenModule::CodeGenModule(ASTContext &C, const HeaderSearchOptions &HSO,
 
   Block.GlobalUniqueCount = 0;
 
-  if (C.getLangOpts().ObjCAutoRefCount)
-    ARCData = new ARCEntrypoints();
-  RRData = new RREntrypoints();
+  if (C.getLangOpts().ObjC1)
+    ObjCData = new ObjCEntrypoints();
 
   if (!CodeGenOpts.InstrProfileInput.empty()) {
     auto ReaderOrErr =
@@ -169,8 +173,7 @@ CodeGenModule::~CodeGenModule() {
   delete TheTargetCodeGenInfo;
   delete TBAA;
   delete DebugInfo;
-  delete ARCData;
-  delete RRData;
+  delete ObjCData;
 }
 
 void CodeGenModule::createObjCRuntime() {
@@ -186,6 +189,7 @@ void CodeGenModule::createObjCRuntime() {
   case ObjCRuntime::FragileMacOSX:
   case ObjCRuntime::MacOSX:
   case ObjCRuntime::iOS:
+  case ObjCRuntime::WatchOS:
     ObjCRuntime = CreateMacObjCRuntime(*this);
     return;
   }
@@ -232,12 +236,27 @@ void CodeGenModule::applyReplacements() {
     OldF->replaceAllUsesWith(Replacement);
     if (NewF) {
       NewF->removeFromParent();
-      OldF->getParent()->getFunctionList().insertAfter(OldF, NewF);
+      OldF->getParent()->getFunctionList().insertAfter(OldF->getIterator(),
+                                                       NewF);
     }
     OldF->eraseFromParent();
   }
 }
 
+void CodeGenModule::addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C) {
+  GlobalValReplacements.push_back(std::make_pair(GV, C));
+}
+
+void CodeGenModule::applyGlobalValReplacements() {
+  for (auto &I : GlobalValReplacements) {
+    llvm::GlobalValue *GV = I.first;
+    llvm::Constant *C = I.second;
+
+    GV->replaceAllUsesWith(C);
+    GV->eraseFromParent();
+  }
+}
+
 // This is only used in aliases that we created and we know they have a
 // linear structure.
 static const llvm::GlobalObject *getAliasedGlobal(const llvm::GlobalAlias &GA) {
@@ -340,6 +359,7 @@ void InstrProfStats::reportDiagnostics(DiagnosticsEngine &Diags,
 
 void CodeGenModule::Release() {
   EmitDeferred();
+  applyGlobalValReplacements();
   applyReplacements();
   checkAliases();
   EmitCXXGlobalInitFunc();
@@ -355,8 +375,11 @@ void CodeGenModule::Release() {
     if (llvm::Function *CudaDtorFunction = CUDARuntime->makeModuleDtorFunction())
       AddGlobalDtor(CudaDtorFunction);
   }
-  if (PGOReader && PGOStats.hasDiagnostics())
-    PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
+  if (PGOReader) {
+    getModule().setMaximumFunctionCount(PGOReader->getMaximumFunctionCount());
+    if (PGOStats.hasDiagnostics())
+      PGOStats.reportDiagnostics(getDiags(), getCodeGenOpts().MainFileName);
+  }
   EmitCtorList(GlobalCtors, "llvm.global_ctors");
   EmitCtorList(GlobalDtors, "llvm.global_dtors");
   EmitGlobalAnnotations();
@@ -370,11 +393,32 @@ void CodeGenModule::Release() {
       (Context.getLangOpts().Modules || !LinkerOptionsMetadata.empty())) {
     EmitModuleLinkOptions();
   }
-  if (CodeGenOpts.DwarfVersion)
+  if (CodeGenOpts.DwarfVersion) {
     // We actually want the latest version when there are conflicts.
     // We can change from Warning to Latest if such mode is supported.
     getModule().addModuleFlag(llvm::Module::Warning, "Dwarf Version",
                               CodeGenOpts.DwarfVersion);
+  }
+  if (CodeGenOpts.EmitCodeView) {
+    // Indicate that we want CodeView in the metadata.
+    getModule().addModuleFlag(llvm::Module::Warning, "CodeView", 1);
+  }
+  if (CodeGenOpts.OptimizationLevel > 0 && CodeGenOpts.StrictVTablePointers) {
+    // We don't support LTO with 2 with different StrictVTablePointers
+    // FIXME: we could support it by stripping all the information introduced
+    // by StrictVTablePointers.
+
+    getModule().addModuleFlag(llvm::Module::Error, "StrictVTablePointers",1);
+
+    llvm::Metadata *Ops[2] = {
+              llvm::MDString::get(VMContext, "StrictVTablePointers"),
+              llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
+                  llvm::Type::getInt32Ty(VMContext), 1))};
+
+    getModule().addModuleFlag(llvm::Module::Require,
+                              "StrictVTablePointersRequirement",
+                              llvm::MDNode::get(VMContext, Ops));
+  }
   if (DebugInfo)
     // We support a single version in the linked module. The LLVM
     // parser will drop debug info with a different version number
@@ -399,6 +443,11 @@ void CodeGenModule::Release() {
     getModule().addModuleFlag(llvm::Module::Error, "min_enum_size", EnumWidth);
   }
 
+  if (CodeGenOpts.SanitizeCfiCrossDso) {
+    // Indicate that we want cross-DSO control flow integrity checks.
+    getModule().addModuleFlag(llvm::Module::Override, "Cross-DSO CFI", 1);
+  }
+
   if (uint32_t PLevel = Context.getLangOpts().PICLevel) {
     llvm::PICLevel::Level PL = llvm::PICLevel::Default;
     switch (PLevel) {
@@ -450,12 +499,6 @@ llvm::MDNode *CodeGenModule::getTBAAStructInfo(QualType QTy) {
   return TBAA->getTBAAStructInfo(QTy);
 }
 
-llvm::MDNode *CodeGenModule::getTBAAStructTypeInfo(QualType QTy) {
-  if (!TBAA)
-    return nullptr;
-  return TBAA->getTBAAStructTypeInfo(QTy);
-}
-
 llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy,
                                                   llvm::MDNode *AccessN,
                                                   uint64_t O) {
@@ -468,9 +511,9 @@ llvm::MDNode *CodeGenModule::getTBAAStructTagInfo(QualType BaseTy,
 /// and struct-path aware TBAA, the tag has the same format:
 /// base type, access type and offset.
 /// When ConvertTypeToTag is true, we create a tag based on the scalar type.
-void CodeGenModule::DecorateInstruction(llvm::Instruction *Inst,
-                                        llvm::MDNode *TBAAInfo,
-                                        bool ConvertTypeToTag) {
+void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
+                                                llvm::MDNode *TBAAInfo,
+                                                bool ConvertTypeToTag) {
   if (ConvertTypeToTag && TBAA)
     Inst->setMetadata(llvm::LLVMContext::MD_tbaa,
                       TBAA->getTBAAScalarTagInfo(TBAAInfo));
@@ -478,6 +521,16 @@ void CodeGenModule::DecorateInstruction(llvm::Instruction *Inst,
     Inst->setMetadata(llvm::LLVMContext::MD_tbaa, TBAAInfo);
 }
 
+void CodeGenModule::DecorateInstructionWithInvariantGroup(
+    llvm::Instruction *I, const CXXRecordDecl *RD) {
+  llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
+  auto *MetaDataNode = dyn_cast<llvm::MDNode>(MD);
+  // Check if we have to wrap MDString in MDNode.
+  if (!MetaDataNode)
+    MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD);
+  I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode);
+}
+
 void CodeGenModule::Error(SourceLocation loc, StringRef message) {
   unsigned diagID = getDiags().getCustomDiagID(DiagnosticsEngine::Error, "%0");
   getDiags().Report(Context.getFullLoc(loc), diagID) << message;
@@ -692,6 +745,21 @@ void CodeGenModule::setFunctionDLLStorageClass(GlobalDecl GD, llvm::Function *F)
     F->setDLLStorageClass(llvm::GlobalVariable::DefaultStorageClass);
 }
 
+llvm::ConstantInt *
+CodeGenModule::CreateCfiIdForTypeMetadata(llvm::Metadata *MD) {
+  llvm::MDString *MDS = dyn_cast<llvm::MDString>(MD);
+  if (!MDS) return nullptr;
+
+  llvm::MD5 md5;
+  llvm::MD5::MD5Result result;
+  md5.update(MDS->getString());
+  md5.final(result);
+  uint64_t id = 0;
+  for (int i = 0; i < 8; ++i)
+    id |= static_cast<uint64_t>(result[i]) << (i * 8);
+  return llvm::ConstantInt::get(Int64Ty, id);
+}
+
 void CodeGenModule::setFunctionDefinitionAttributes(const FunctionDecl *D,
                                                     llvm::Function *F) {
   setNonAliasAttributes(D, F);
@@ -737,6 +805,21 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (!hasUnwindExceptions(LangOpts))
     B.addAttribute(llvm::Attribute::NoUnwind);
 
+  if (LangOpts.getStackProtector() == LangOptions::SSPOn)
+    B.addAttribute(llvm::Attribute::StackProtect);
+  else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
+    B.addAttribute(llvm::Attribute::StackProtectStrong);
+  else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
+    B.addAttribute(llvm::Attribute::StackProtectReq);
+
+  if (!D) {
+    F->addAttributes(llvm::AttributeSet::FunctionIndex,
+                     llvm::AttributeSet::get(
+                         F->getContext(),
+                         llvm::AttributeSet::FunctionIndex, B));
+    return;
+  }
+
   if (D->hasAttr<NakedAttr>()) {
     // Naked implies noinline: we should not be inlining such functions.
     B.addAttribute(llvm::Attribute::Naked);
@@ -761,13 +844,6 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (D->hasAttr<MinSizeAttr>())
     B.addAttribute(llvm::Attribute::MinSize);
 
-  if (LangOpts.getStackProtector() == LangOptions::SSPOn)
-    B.addAttribute(llvm::Attribute::StackProtect);
-  else if (LangOpts.getStackProtector() == LangOptions::SSPStrong)
-    B.addAttribute(llvm::Attribute::StackProtectStrong);
-  else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
-    B.addAttribute(llvm::Attribute::StackProtectReq);
-
   F->addAttributes(llvm::AttributeSet::FunctionIndex,
                    llvm::AttributeSet::get(
                        F->getContext(), llvm::AttributeSet::FunctionIndex, B));
@@ -778,10 +854,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     F->addFnAttr(llvm::Attribute::NoInline);
 
     // OptimizeNone wins over OptimizeForSize, MinSize, AlwaysInline.
-    assert(!F->hasFnAttribute(llvm::Attribute::OptimizeForSize) &&
-           "OptimizeNone and OptimizeForSize on same function!");
-    assert(!F->hasFnAttribute(llvm::Attribute::MinSize) &&
-           "OptimizeNone and MinSize on same function!");
+    F->removeFnAttr(llvm::Attribute::OptimizeForSize);
+    F->removeFnAttr(llvm::Attribute::MinSize);
     assert(!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
            "OptimizeNone and AlwaysInline on same function!");
 
@@ -800,19 +874,24 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   if (alignment)
     F->setAlignment(alignment);
 
-  // C++ ABI requires 2-byte alignment for member functions.
-  if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D))
-    F->setAlignment(2);
+  // Some C++ ABIs require 2-byte alignment for member functions, in order to
+  // reserve a bit for differentiating between virtual and non-virtual member
+  // functions. If the current target's C++ ABI requires this and this is a
+  // member function, set its alignment accordingly.
+  if (getTarget().getCXXABI().areMemberFunctionsAligned()) {
+    if (F->getAlignment() < 2 && isa<CXXMethodDecl>(D))
+      F->setAlignment(2);
+  }
 }
 
 void CodeGenModule::SetCommonAttributes(const Decl *D,
                                         llvm::GlobalValue *GV) {
-  if (const auto *ND = dyn_cast<NamedDecl>(D))
+  if (const auto *ND = dyn_cast_or_null<NamedDecl>(D))
     setGlobalVisibility(GV, ND);
   else
     GV->setVisibility(llvm::GlobalValue::DefaultVisibility);
 
-  if (D->hasAttr<UsedAttr>())
+  if (D && D->hasAttr<UsedAttr>())
     addUsedGlobal(GV);
 }
 
@@ -830,8 +909,9 @@ void CodeGenModule::setNonAliasAttributes(const Decl *D,
                                           llvm::GlobalObject *GO) {
   SetCommonAttributes(D, GO);
 
-  if (const SectionAttr *SA = D->getAttr<SectionAttr>())
-    GO->setSection(SA->getName());
+  if (D)
+    if (const SectionAttr *SA = D->getAttr<SectionAttr>())
+      GO->setSection(SA->getName());
 
   getTargetCodeGenInfo().setTargetAttributes(D, GO, *this);
 }
@@ -872,6 +952,49 @@ static void setLinkageAndVisibilityForGV(llvm::GlobalValue *GV,
   }
 }
 
+void CodeGenModule::CreateFunctionBitSetEntry(const FunctionDecl *FD,
+                                              llvm::Function *F) {
+  // Only if we are checking indirect calls.
+  if (!LangOpts.Sanitize.has(SanitizerKind::CFIICall))
+    return;
+
+  // Non-static class methods are handled via vtable pointer checks elsewhere.
+  if (isa<CXXMethodDecl>(FD) && !cast<CXXMethodDecl>(FD)->isStatic())
+    return;
+
+  // Additionally, if building with cross-DSO support...
+  if (CodeGenOpts.SanitizeCfiCrossDso) {
+    // Don't emit entries for function declarations. In cross-DSO mode these are
+    // handled with better precision at run time.
+    if (!FD->hasBody())
+      return;
+    // Skip available_externally functions. They won't be codegen'ed in the
+    // current module anyway.
+    if (getContext().GetGVALinkageForFunction(FD) == GVA_AvailableExternally)
+      return;
+  }
+
+  llvm::NamedMDNode *BitsetsMD =
+      getModule().getOrInsertNamedMetadata("llvm.bitsets");
+
+  llvm::Metadata *MD = CreateMetadataIdentifierForType(FD->getType());
+  llvm::Metadata *BitsetOps[] = {
+      MD, llvm::ConstantAsMetadata::get(F),
+      llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
+  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+
+  // Emit a hash-based bit set entry for cross-DSO calls.
+  if (CodeGenOpts.SanitizeCfiCrossDso) {
+    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
+      llvm::Metadata *BitsetOps2[] = {
+          llvm::ConstantAsMetadata::get(TypeId),
+          llvm::ConstantAsMetadata::get(F),
+          llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(Int64Ty, 0))};
+      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
+    }
+  }
+}
+
 void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
                                           bool IsIncompleteFunction,
                                           bool IsThunk) {
@@ -913,6 +1036,8 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
   if (FD->isReplaceableGlobalAllocationFunction())
     F->addAttribute(llvm::AttributeSet::FunctionIndex,
                     llvm::Attribute::NoBuiltin);
+
+  CreateFunctionBitSetEntry(FD, F);
 }
 
 void CodeGenModule::addUsedGlobal(llvm::GlobalValue *GV) {
@@ -1104,9 +1229,16 @@ void CodeGenModule::EmitDeferred() {
     llvm::GlobalValue *GV = G.GV;
     G.GV = nullptr;
 
-    assert(!GV || GV == GetGlobalValue(getMangledName(D)));
-    if (!GV)
-      GV = GetGlobalValue(getMangledName(D));
+    // We should call GetAddrOfGlobal with IsForDefinition set to true in order
+    // to get GlobalValue with exactly the type we need, not something that
+    // might had been created for another decl with the same mangled name but
+    // different type.
+    // FIXME: Support for variables is not implemented yet.
+    if (isa<FunctionDecl>(D.getDecl()))
+      GV = cast<llvm::GlobalValue>(GetAddrOfGlobal(D, /*IsForDefinition=*/true));
+    else
+      if (!GV)
+        GV = GetGlobalValue(getMangledName(D));
 
     // Check to see if we've already emitted this.  This is necessary
     // for a couple of reasons: first, decls can end up in the
@@ -1208,7 +1340,7 @@ bool CodeGenModule::isInSanitizerBlacklist(llvm::Function *Fn,
   if (SanitizerBL.isBlacklistedFunction(Fn->getName()))
     return true;
   // Blacklist by location.
-  if (!Loc.isInvalid())
+  if (Loc.isValid())
     return SanitizerBL.isBlacklistedLocation(Loc);
   // If location is unknown, this may be a compiler-generated function. Assume
   // it's located in the main file.
@@ -1271,7 +1403,7 @@ bool CodeGenModule::MayBeEmittedEagerly(const ValueDecl *Global) {
   return true;
 }
 
-llvm::Constant *CodeGenModule::GetAddrOfUuidDescriptor(
+ConstantAddress CodeGenModule::GetAddrOfUuidDescriptor(
     const CXXUuidofExpr* E) {
   // Sema has verified that IIDSource has a __declspec(uuid()), and that its
   // well-formed.
@@ -1279,9 +1411,12 @@ llvm::Constant *CodeGenModule::GetAddrOfUuidDescriptor(
   std::string Name = "_GUID_" + Uuid.lower();
   std::replace(Name.begin(), Name.end(), '-', '_');
 
+  // Contains a 32-bit field.
+  CharUnits Alignment = CharUnits::fromQuantity(4);
+
   // Look for an existing global.
   if (llvm::GlobalVariable *GV = getModule().getNamedGlobal(Name))
-    return GV;
+    return ConstantAddress(GV, Alignment);
 
   llvm::Constant *Init = EmitUuidofInitializer(Uuid);
   assert(Init && "failed to initialize as constant");
@@ -1291,20 +1426,22 @@ llvm::Constant *CodeGenModule::GetAddrOfUuidDescriptor(
       /*isConstant=*/true, llvm::GlobalValue::LinkOnceODRLinkage, Init, Name);
   if (supportsCOMDAT())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
-  return GV;
+  return ConstantAddress(GV, Alignment);
 }
 
-llvm::Constant *CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
+ConstantAddress CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
   const AliasAttr *AA = VD->getAttr<AliasAttr>();
   assert(AA && "No alias?");
 
+  CharUnits Alignment = getContext().getDeclAlign(VD);
   llvm::Type *DeclTy = getTypes().ConvertTypeForMem(VD->getType());
 
   // See if there is already something with the target's name in the module.
   llvm::GlobalValue *Entry = GetGlobalValue(AA->getAliasee());
   if (Entry) {
     unsigned AS = getContext().getTargetAddressSpace(VD->getType());
-    return llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS));
+    auto Ptr = llvm::ConstantExpr::getBitCast(Entry, DeclTy->getPointerTo(AS));
+    return ConstantAddress(Ptr, Alignment);
   }
 
   llvm::Constant *Aliasee;
@@ -1321,7 +1458,7 @@ llvm::Constant *CodeGenModule::GetWeakRefReference(const ValueDecl *VD) {
   F->setLinkage(llvm::Function::ExternalWeakLinkage);
   WeakRefReferences.insert(F);
 
-  return Aliasee;
+  return ConstantAddress(Aliasee, Alignment);
 }
 
 void CodeGenModule::EmitGlobal(GlobalDecl GD) {
@@ -1435,7 +1572,7 @@ namespace {
       unsigned BuiltinID = FD->getBuiltinID();
       if (!BuiltinID || !BI.isLibFunction(BuiltinID))
         return true;
-      StringRef BuiltinName = BI.GetName(BuiltinID);
+      StringRef BuiltinName = BI.getName(BuiltinID);
       if (BuiltinName.startswith("__builtin_") &&
           Name == BuiltinName.slice(strlen("__builtin_"), StringRef::npos)) {
         Result = true;
@@ -1444,6 +1581,35 @@ namespace {
       return true;
     }
   };
+
+  struct DLLImportFunctionVisitor
+      : public RecursiveASTVisitor<DLLImportFunctionVisitor> {
+    bool SafeToInline = true;
+
+    bool VisitVarDecl(VarDecl *VD) {
+      // A thread-local variable cannot be imported.
+      SafeToInline = !VD->getTLSKind();
+      return SafeToInline;
+    }
+
+    // Make sure we're not referencing non-imported vars or functions.
+    bool VisitDeclRefExpr(DeclRefExpr *E) {
+      ValueDecl *VD = E->getDecl();
+      if (isa<FunctionDecl>(VD))
+        SafeToInline = VD->hasAttr<DLLImportAttr>();
+      else if (VarDecl *V = dyn_cast<VarDecl>(VD))
+        SafeToInline = !V->hasGlobalStorage() || V->hasAttr<DLLImportAttr>();
+      return SafeToInline;
+    }
+    bool VisitCXXDeleteExpr(CXXDeleteExpr *E) {
+      SafeToInline = E->getOperatorDelete()->hasAttr<DLLImportAttr>();
+      return SafeToInline;
+    }
+    bool VisitCXXNewExpr(CXXNewExpr *E) {
+      SafeToInline = E->getOperatorNew()->hasAttr<DLLImportAttr>();
+      return SafeToInline;
+    }
+  };
 }
 
 // isTriviallyRecursive - Check if this function calls another
@@ -1474,6 +1640,15 @@ CodeGenModule::shouldEmitFunction(GlobalDecl GD) {
   const auto *F = cast<FunctionDecl>(GD.getDecl());
   if (CodeGenOpts.OptimizationLevel == 0 && !F->hasAttr<AlwaysInlineAttr>())
     return false;
+
+  if (F->hasAttr<DLLImportAttr>()) {
+    // Check whether it would be safe to inline this dllimport function.
+    DLLImportFunctionVisitor Visitor;
+    Visitor.TraverseFunctionDecl(const_cast<FunctionDecl*>(F));
+    if (!Visitor.SafeToInline)
+      return false;
+  }
+
   // PR9614. Avoid cases where the source code is lying to us. An available
   // externally function should have an equivalent function somewhere else,
   // but a function that calls itself is clearly not equivalent to the real
@@ -1537,6 +1712,9 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) {
   llvm_unreachable("Invalid argument to EmitGlobalDefinition()");
 }
 
+static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old,
+                                                      llvm::Function *NewFn);
+
 /// GetOrCreateLLVMFunction - If the specified mangled name is not in the
 /// module, create and return an llvm Function with the specified type. If there
 /// is something in the module with the specified name, return it potentially
@@ -1549,7 +1727,8 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
                                        llvm::Type *Ty,
                                        GlobalDecl GD, bool ForVTable,
                                        bool DontDefer, bool IsThunk,
-                                       llvm::AttributeSet ExtraAttrs) {
+                                       llvm::AttributeSet ExtraAttrs,
+                                       bool IsForDefinition) {
   const Decl *D = GD.getDecl();
 
   // Lookup the entry, lazily creating it if necessary.
@@ -1565,11 +1744,33 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
     if (D && !D->hasAttr<DLLImportAttr>() && !D->hasAttr<DLLExportAttr>())
       Entry->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
 
-    if (Entry->getType()->getElementType() == Ty)
+    // If there are two attempts to define the same mangled name, issue an
+    // error.
+    if (IsForDefinition && !Entry->isDeclaration()) {
+      GlobalDecl OtherGD;
+      // Check that GD is not yet in ExplicitDefinitions is required to make
+      // sure that we issue an error only once.
+      if (lookupRepresentativeDecl(MangledName, OtherGD) &&
+          (GD.getCanonicalDecl().getDecl() !=
+           OtherGD.getCanonicalDecl().getDecl()) &&
+          DiagnosedConflictingDefinitions.insert(GD).second) {
+        getDiags().Report(D->getLocation(),
+                          diag::err_duplicate_mangled_name);
+        getDiags().Report(OtherGD.getDecl()->getLocation(),
+                          diag::note_previous_definition);
+      }
+    }
+
+    if ((isa<llvm::Function>(Entry) || isa<llvm::GlobalAlias>(Entry)) &&
+        (Entry->getType()->getElementType() == Ty)) {
       return Entry;
+    }
 
     // Make sure the result is of the correct type.
-    return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
+    // (If function is requested for a definition, we always need to create a new
+    // function, not just return a bitcast.)
+    if (!IsForDefinition)
+      return llvm::ConstantExpr::getBitCast(Entry, Ty->getPointerTo());
   }
 
   // This function doesn't have a complete type (for example, the return
@@ -1584,10 +1785,36 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
     FTy = llvm::FunctionType::get(VoidTy, false);
     IsIncompleteFunction = true;
   }
-  
-  llvm::Function *F = llvm::Function::Create(FTy,
-                                             llvm::Function::ExternalLinkage,
-                                             MangledName, &getModule());
+
+  llvm::Function *F =
+      llvm::Function::Create(FTy, llvm::Function::ExternalLinkage,
+                             Entry ? StringRef() : MangledName, &getModule());
+
+  // If we already created a function with the same mangled name (but different
+  // type) before, take its name and add it to the list of functions to be
+  // replaced with F at the end of CodeGen.
+  //
+  // This happens if there is a prototype for a function (e.g. "int f()") and
+  // then a definition of a different type (e.g. "int f(int x)").
+  if (Entry) {
+    F->takeName(Entry);
+
+    // This might be an implementation of a function without a prototype, in
+    // which case, try to do special replacement of calls which match the new
+    // prototype.  The really key thing here is that we also potentially drop
+    // arguments from the call site so as to make a direct call, which makes the
+    // inliner happier and suppresses a number of optimizer warnings (!) about
+    // dropping arguments.
+    if (!Entry->use_empty()) {
+      ReplaceUsesOfNonProtoTypeWithRealFunction(Entry, F);
+      Entry->removeDeadConstantUsers();
+    }
+
+    llvm::Constant *BC = llvm::ConstantExpr::getBitCast(
+        F, Entry->getType()->getElementType()->getPointerTo());
+    addGlobalValReplacement(Entry, BC);
+  }
+
   assert(F->getName() == MangledName && "name was uniqued!");
   if (D)
     SetFunctionAttributes(GD, F, IsIncompleteFunction, IsThunk);
@@ -1660,13 +1887,19 @@ CodeGenModule::GetOrCreateLLVMFunction(StringRef MangledName,
 llvm::Constant *CodeGenModule::GetAddrOfFunction(GlobalDecl GD,
                                                  llvm::Type *Ty,
                                                  bool ForVTable,
-                                                 bool DontDefer) {
+                                                 bool DontDefer,
+                                                 bool IsForDefinition) {
   // If there was no specific requested type, just convert it now.
-  if (!Ty)
-    Ty = getTypes().ConvertType(cast<ValueDecl>(GD.getDecl())->getType());
-  
+  if (!Ty) {
+    const auto *FD = cast<FunctionDecl>(GD.getDecl());
+    auto CanonTy = Context.getCanonicalType(FD->getType());
+    Ty = getTypes().ConvertFunctionType(CanonTy, FD);
+  }
+
   StringRef MangledName = getMangledName(GD);
-  return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer);
+  return GetOrCreateLLVMFunction(MangledName, Ty, GD, ForVTable, DontDefer,
+                                 /*IsThunk=*/false, llvm::AttributeSet(),
+                                 IsForDefinition);
 }
 
 /// CreateRuntimeFunction - Create a new runtime function with the specified
@@ -1781,7 +2014,7 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
 
     if (D->getTLSKind()) {
       if (D->getTLSKind() == VarDecl::TLS_Dynamic)
-        CXXThreadLocals.push_back(std::make_pair(D, GV));
+        CXXThreadLocals.push_back(D);
       setTLSMode(GV, *D);
     }
 
@@ -1805,6 +2038,33 @@ CodeGenModule::GetOrCreateLLVMGlobal(StringRef MangledName,
   return GV;
 }
 
+llvm::Constant *
+CodeGenModule::GetAddrOfGlobal(GlobalDecl GD,
+                               bool IsForDefinition) {
+  if (isa<CXXConstructorDecl>(GD.getDecl()))
+    return getAddrOfCXXStructor(cast<CXXConstructorDecl>(GD.getDecl()),
+                                getFromCtorType(GD.getCtorType()),
+                                /*FnInfo=*/nullptr, /*FnType=*/nullptr,
+                                /*DontDefer=*/false, IsForDefinition);
+  else if (isa<CXXDestructorDecl>(GD.getDecl()))
+    return getAddrOfCXXStructor(cast<CXXDestructorDecl>(GD.getDecl()),
+                                getFromDtorType(GD.getDtorType()),
+                                /*FnInfo=*/nullptr, /*FnType=*/nullptr,
+                                /*DontDefer=*/false, IsForDefinition);
+  else if (isa<CXXMethodDecl>(GD.getDecl())) {
+    auto FInfo = &getTypes().arrangeCXXMethodDeclaration(
+        cast<CXXMethodDecl>(GD.getDecl()));
+    auto Ty = getTypes().GetFunctionType(*FInfo);
+    return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
+                             IsForDefinition);
+  } else if (isa<FunctionDecl>(GD.getDecl())) {
+    const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
+    llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
+    return GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer=*/false,
+                             IsForDefinition);
+  } else
+    return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
+}
 
 llvm::GlobalVariable *
 CodeGenModule::CreateOrReplaceCXXRuntimeVariable(StringRef Name, 
@@ -1893,8 +2153,8 @@ void CodeGenModule::EmitTentativeDefinition(const VarDecl *D) {
 }
 
 CharUnits CodeGenModule::GetTargetTypeStoreSize(llvm::Type *Ty) const {
-    return Context.toCharUnitsFromBits(
-      TheDataLayout.getTypeStoreSizeInBits(Ty));
+  return Context.toCharUnitsFromBits(
+      getDataLayout().getTypeStoreSizeInBits(Ty));
 }
 
 unsigned CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D,
@@ -1986,7 +2246,18 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  if (!InitExpr) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+  // of their declaration."
+  if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
+      && D->hasAttr<CUDASharedAttr>()) {
+    if (InitExpr) {
+      const auto *C = dyn_cast<CXXConstructExpr>(InitExpr);
+      if (C == nullptr || !C->getConstructor()->hasTrivialBody())
+        Error(D->getLocation(),
+              "__shared__ variable cannot have an initialization.");
+    }
+    Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+  } else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2072,6 +2343,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+    GV->setExternallyInitialized(true);
+  }
   GV->setInitializer(Init);
 
   // If it is safe to mark the global 'constant', do so now.
@@ -2091,12 +2373,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   llvm::GlobalValue::LinkageTypes Linkage =
       getLLVMLinkageVarDefinition(D, GV->isConstant());
 
-  // On Darwin, the backing variable for a C++11 thread_local variable always
-  // has internal linkage; all accesses should just be calls to the
+  // On Darwin, if the normal linkage of a C++ thread_local variable is
+  // LinkOnce or Weak, we keep the normal linkage to prevent multiple
+  // copies within a linkage unit; otherwise, the backing variable has
+  // internal linkage and all accesses should just be calls to the
   // Itanium-specified entry point, which has the normal linkage of the
-  // variable.
+  // variable. This is to preserve the ability to change the implementation
+  // behind the scenes.
   if (!D->isStaticLocal() && D->getTLSKind() == VarDecl::TLS_Dynamic &&
-      Context.getTargetInfo().getTriple().isMacOSX())
+      Context.getTargetInfo().getTriple().isOSDarwin() &&
+      !llvm::GlobalVariable::isLinkOnceLinkage(Linkage) &&
+      !llvm::GlobalVariable::isWeakLinkage(Linkage))
     Linkage = llvm::GlobalValue::InternalLinkage;
 
   GV->setLinkage(Linkage);
@@ -2115,7 +2402,7 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
 
   if (D->getTLSKind() && !GV->isThreadLocal()) {
     if (D->getTLSKind() == VarDecl::TLS_Dynamic)
-      CXXThreadLocals.push_back(std::make_pair(D, GV));
+      CXXThreadLocals.push_back(D);
     setTLSMode(GV, *D);
   }
 
@@ -2166,7 +2453,7 @@ static bool isVarDeclStrongDefinition(const ASTContext &Context,
 
   // Declarations with a required alignment do not have common linakge in MSVC
   // mode.
-  if (Context.getLangOpts().MSVCCompat) {
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
     if (D->hasAttr<AlignedAttr>())
       return true;
     QualType VarType = D->getType();
@@ -2263,6 +2550,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
 
   llvm::Type *newRetTy = newFn->getReturnType();
   SmallVector<llvm::Value*, 4> newArgs;
+  SmallVector<llvm::OperandBundleDef, 1> newBundles;
 
   for (llvm::Value::use_iterator ui = old->use_begin(), ue = old->use_end();
          ui != ue; ) {
@@ -2330,16 +2618,19 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
     // over the required information.
     newArgs.append(callSite.arg_begin(), callSite.arg_begin() + argNo);
 
+    // Copy over any operand bundles.
+    callSite.getOperandBundlesAsDefs(newBundles);
+
     llvm::CallSite newCall;
     if (callSite.isCall()) {
-      newCall = llvm::CallInst::Create(newFn, newArgs, "",
+      newCall = llvm::CallInst::Create(newFn, newArgs, newBundles, "",
                                        callSite.getInstruction());
     } else {
       auto *oldInvoke = cast<llvm::InvokeInst>(callSite.getInstruction());
       newCall = llvm::InvokeInst::Create(newFn,
                                          oldInvoke->getNormalDest(),
                                          oldInvoke->getUnwindDest(),
-                                         newArgs, "",
+                                         newArgs, newBundles, "",
                                          callSite.getInstruction());
     }
     newArgs.clear(); // for the next iteration
@@ -2357,6 +2648,7 @@ static void replaceUsesOfNonProtoConstant(llvm::Constant *old,
     // Copy debug location attached to CI.
     if (callSite->getDebugLoc())
       newCall->setDebugLoc(callSite->getDebugLoc());
+
     callSite->eraseFromParent();
   }
 }
@@ -2397,66 +2689,14 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
   llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
 
   // Get or create the prototype for the function.
-  if (!GV) {
-    llvm::Constant *C =
-        GetAddrOfFunction(GD, Ty, /*ForVTable=*/false, /*DontDefer*/ true);
+  if (!GV || (GV->getType()->getElementType() != Ty))
+    GV = cast<llvm::GlobalValue>(GetAddrOfFunction(GD, Ty, /*ForVTable=*/false,
+                                                   /*DontDefer=*/true,
+                                                   /*IsForDefinition=*/true));
 
-    // Strip off a bitcast if we got one back.
-    if (auto *CE = dyn_cast<llvm::ConstantExpr>(C)) {
-      assert(CE->getOpcode() == llvm::Instruction::BitCast);
-      GV = cast<llvm::GlobalValue>(CE->getOperand(0));
-    } else {
-      GV = cast<llvm::GlobalValue>(C);
-    }
-  }
-
-  if (!GV->isDeclaration()) {
-    getDiags().Report(D->getLocation(), diag::err_duplicate_mangled_name);
-    GlobalDecl OldGD = Manglings.lookup(GV->getName());
-    if (auto *Prev = OldGD.getDecl())
-      getDiags().Report(Prev->getLocation(), diag::note_previous_definition);
+  // Already emitted.
+  if (!GV->isDeclaration())
     return;
-  }
-
-  if (GV->getType()->getElementType() != Ty) {
-    // If the types mismatch then we have to rewrite the definition.
-    assert(GV->isDeclaration() && "Shouldn't replace non-declaration");
-
-    // F is the Function* for the one with the wrong type, we must make a new
-    // Function* and update everything that used F (a declaration) with the new
-    // Function* (which will be a definition).
-    //
-    // This happens if there is a prototype for a function
-    // (e.g. "int f()") and then a definition of a different type
-    // (e.g. "int f(int x)").  Move the old function aside so that it
-    // doesn't interfere with GetAddrOfFunction.
-    GV->setName(StringRef());
-    auto *NewFn = cast<llvm::Function>(GetAddrOfFunction(GD, Ty));
-
-    // This might be an implementation of a function without a
-    // prototype, in which case, try to do special replacement of
-    // calls which match the new prototype.  The really key thing here
-    // is that we also potentially drop arguments from the call site
-    // so as to make a direct call, which makes the inliner happier
-    // and suppresses a number of optimizer warnings (!) about
-    // dropping arguments.
-    if (!GV->use_empty()) {
-      ReplaceUsesOfNonProtoTypeWithRealFunction(GV, NewFn);
-      GV->removeDeadConstantUsers();
-    }
-
-    // Replace uses of F with the Function we will endow with a body.
-    if (!GV->use_empty()) {
-      llvm::Constant *NewPtrForOldDecl =
-          llvm::ConstantExpr::getBitCast(NewFn, GV->getType());
-      GV->replaceAllUsesWith(NewPtrForOldDecl);
-    }
-
-    // Ok, delete the old function now, which is dead.
-    GV->eraseFromParent();
-
-    GV = NewFn;
-  }
 
   // We need to set linkage and visibility on the function before
   // generating code for it because various parts of IR generation
@@ -2521,8 +2761,7 @@ void CodeGenModule::EmitAliasDefinition(GlobalDecl GD) {
 
   // Create the new alias itself, but don't set a name yet.
   auto *GA = llvm::GlobalAlias::create(
-      cast<llvm::PointerType>(Aliasee->getType()),
-      llvm::Function::ExternalLinkage, "", Aliasee, &getModule());
+      DeclTy, 0, llvm::Function::ExternalLinkage, "", Aliasee, &getModule());
 
   if (Entry) {
     if (GA->getAliasee() == Entry) {
@@ -2612,7 +2851,7 @@ GetConstantStringEntry(llvm::StringMap<llvm::GlobalVariable *> &Map,
   return *Map.insert(std::make_pair(String, nullptr)).first;
 }
 
-llvm::Constant *
+ConstantAddress
 CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   unsigned StringLength = 0;
   bool isUTF16 = false;
@@ -2622,7 +2861,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
                                StringLength);
 
   if (auto *C = Entry.second)
-    return C;
+    return ConstantAddress(C, CharUnits::fromQuantity(C->getAlignment()));
 
   llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
   llvm::Constant *Zeros[] = { Zero, Zero };
@@ -2658,7 +2897,7 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   // String pointer.
   llvm::Constant *C = nullptr;
   if (isUTF16) {
-    ArrayRef<uint16_t> Arr = llvm::makeArrayRef<uint16_t>(
+    auto Arr = llvm::makeArrayRef(
         reinterpret_cast<uint16_t *>(const_cast<char *>(Entry.first().data())),
         Entry.first().size() / 2);
     C = llvm::ConstantDataArray::get(VMContext, Arr);
@@ -2699,25 +2938,28 @@ CodeGenModule::GetAddrOfConstantCFString(const StringLiteral *Literal) {
   Ty = getTypes().ConvertType(getContext().LongTy);
   Fields[3] = llvm::ConstantInt::get(Ty, StringLength);
 
+  CharUnits Alignment = getPointerAlign();
+
   // The struct.
   C = llvm::ConstantStruct::get(STy, Fields);
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_cfstring_");
   GV->setSection("__DATA,__cfstring");
+  GV->setAlignment(Alignment.getQuantity());
   Entry.second = GV;
 
-  return GV;
+  return ConstantAddress(GV, Alignment);
 }
 
-llvm::GlobalVariable *
+ConstantAddress
 CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   unsigned StringLength = 0;
   llvm::StringMapEntry<llvm::GlobalVariable *> &Entry =
       GetConstantStringEntry(CFConstantStringMap, Literal, StringLength);
 
   if (auto *C = Entry.second)
-    return C;
+    return ConstantAddress(C, CharUnits::fromQuantity(C->getAlignment()));
   
   llvm::Constant *Zero = llvm::Constant::getNullValue(Int32Ty);
   llvm::Constant *Zeros[] = { Zero, Zero };
@@ -2810,10 +3052,12 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
   Fields[2] = llvm::ConstantInt::get(Ty, StringLength);
   
   // The struct.
+  CharUnits Alignment = getPointerAlign();
   C = llvm::ConstantStruct::get(NSConstantStringType, Fields);
   GV = new llvm::GlobalVariable(getModule(), C->getType(), true,
                                 llvm::GlobalVariable::PrivateLinkage, C,
                                 "_unnamed_nsstring_");
+  GV->setAlignment(Alignment.getQuantity());
   const char *NSStringSection = "__OBJC,__cstring_object,regular,no_dead_strip";
   const char *NSStringNonFragileABISection =
       "__DATA,__objc_stringobj,regular,no_dead_strip";
@@ -2823,7 +3067,7 @@ CodeGenModule::GetAddrOfConstantString(const StringLiteral *Literal) {
                      : NSStringSection);
   Entry.second = GV;
 
-  return GV;
+  return ConstantAddress(GV, Alignment);
 }
 
 QualType CodeGenModule::getObjCFastEnumerationStateType() {
@@ -2902,7 +3146,7 @@ CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
 static llvm::GlobalVariable *
 GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
                       CodeGenModule &CGM, StringRef GlobalName,
-                      unsigned Alignment) {
+                      CharUnits Alignment) {
   // OpenCL v1.2 s6.5.3: a string literal is in the constant address space.
   unsigned AddrSpace = 0;
   if (CGM.getLangOpts().OpenCL)
@@ -2913,7 +3157,7 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
   auto *GV = new llvm::GlobalVariable(
       M, C->getType(), !CGM.getLangOpts().WritableStrings, LT, C, GlobalName,
       nullptr, llvm::GlobalVariable::NotThreadLocal, AddrSpace);
-  GV->setAlignment(Alignment);
+  GV->setAlignment(Alignment.getQuantity());
   GV->setUnnamedAddr(true);
   if (GV->isWeakForLinker()) {
     assert(CGM.supportsCOMDAT() && "Only COFF uses weak string literals");
@@ -2925,20 +3169,19 @@ GenerateStringLiteral(llvm::Constant *C, llvm::GlobalValue::LinkageTypes LT,
 
 /// GetAddrOfConstantStringFromLiteral - Return a pointer to a
 /// constant array for the given string literal.
-llvm::GlobalVariable *
+ConstantAddress
 CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
                                                   StringRef Name) {
-  auto Alignment =
-      getContext().getAlignOfGlobalVarInChars(S->getType()).getQuantity();
+  CharUnits Alignment = getContext().getAlignOfGlobalVarInChars(S->getType());
 
   llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
   llvm::GlobalVariable **Entry = nullptr;
   if (!LangOpts.WritableStrings) {
     Entry = &ConstantStringMap[C];
     if (auto GV = *Entry) {
-      if (Alignment > GV->getAlignment())
-        GV->setAlignment(Alignment);
-      return GV;
+      if (Alignment.getQuantity() > GV->getAlignment())
+        GV->setAlignment(Alignment.getQuantity());
+      return ConstantAddress(GV, Alignment);
     }
   }
 
@@ -2954,7 +3197,6 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
       getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) {
     llvm::raw_svector_ostream Out(MangledNameBuffer);
     getCXXABI().getMangleContext().mangleStringLiteral(S, Out);
-    Out.flush();
 
     LT = llvm::GlobalValue::LinkOnceODRLinkage;
     GlobalVariableName = MangledNameBuffer;
@@ -2969,12 +3211,12 @@ CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
 
   SanitizerMD->reportGlobalToASan(GV, S->getStrTokenLoc(0), "<string literal>",
                                   QualType());
-  return GV;
+  return ConstantAddress(GV, Alignment);
 }
 
 /// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
 /// array for the given ObjCEncodeExpr node.
-llvm::GlobalVariable *
+ConstantAddress
 CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) {
   std::string Str;
   getContext().getObjCEncodingForType(E->getEncodedType(), Str);
@@ -2985,14 +3227,11 @@ CodeGenModule::GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *E) {
 /// GetAddrOfConstantCString - Returns a pointer to a character array containing
 /// the literal and a terminating '\0' character.
 /// The result has pointer to array type.
-llvm::GlobalVariable *CodeGenModule::GetAddrOfConstantCString(
-    const std::string &Str, const char *GlobalName, unsigned Alignment) {
+ConstantAddress CodeGenModule::GetAddrOfConstantCString(
+    const std::string &Str, const char *GlobalName) {
   StringRef StrWithNull(Str.c_str(), Str.size() + 1);
-  if (Alignment == 0) {
-    Alignment = getContext()
-                    .getAlignOfGlobalVarInChars(getContext().CharTy)
-                    .getQuantity();
-  }
+  CharUnits Alignment =
+    getContext().getAlignOfGlobalVarInChars(getContext().CharTy);
 
   llvm::Constant *C =
       llvm::ConstantDataArray::getString(getLLVMContext(), StrWithNull, false);
@@ -3002,9 +3241,9 @@ llvm::GlobalVariable *CodeGenModule::GetAddrOfConstantCString(
   if (!LangOpts.WritableStrings) {
     Entry = &ConstantStringMap[C];
     if (auto GV = *Entry) {
-      if (Alignment > GV->getAlignment())
-        GV->setAlignment(Alignment);
-      return GV;
+      if (Alignment.getQuantity() > GV->getAlignment())
+        GV->setAlignment(Alignment.getQuantity());
+      return ConstantAddress(GV, Alignment);
     }
   }
 
@@ -3016,10 +3255,10 @@ llvm::GlobalVariable *CodeGenModule::GetAddrOfConstantCString(
                                   GlobalName, Alignment);
   if (Entry)
     *Entry = GV;
-  return GV;
+  return ConstantAddress(GV, Alignment);
 }
 
-llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
+ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary(
     const MaterializeTemporaryExpr *E, const Expr *Init) {
   assert((E->getStorageDuration() == SD_Static ||
           E->getStorageDuration() == SD_Thread) && "not a global temporary");
@@ -3031,9 +3270,10 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
   if (Init == E->GetTemporaryExpr())
     MaterializedType = E->getType();
 
-  llvm::Constant *&Slot = MaterializedGlobalTemporaryMap[E];
-  if (Slot)
-    return Slot;
+  CharUnits Align = getContext().getTypeAlignInChars(MaterializedType);
+
+  if (llvm::Constant *Slot = MaterializedGlobalTemporaryMap[E])
+    return ConstantAddress(Slot, Align);
 
   // FIXME: If an externally-visible declaration extends multiple temporaries,
   // we need to give each temporary the same name in every translation unit (and
@@ -3042,7 +3282,6 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
   llvm::raw_svector_ostream Out(Name);
   getCXXABI().getMangleContext().mangleReferenceTemporary(
       VD, E->getManglingNumber(), Out);
-  Out.flush();
 
   APValue *Value = nullptr;
   if (E->getStorageDuration() == SD_Static) {
@@ -3098,14 +3337,13 @@ llvm::Constant *CodeGenModule::GetAddrOfGlobalTemporary(
       /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
       AddrSpace);
   setGlobalVisibility(GV, VD);
-  GV->setAlignment(
-      getContext().getTypeAlignInChars(MaterializedType).getQuantity());
+  GV->setAlignment(Align.getQuantity());
   if (supportsCOMDAT() && GV->isWeakForLinker())
     GV->setComdat(TheModule.getOrInsertComdat(GV->getName()));
   if (VD->getTLSKind())
     setTLSMode(GV, *VD);
-  Slot = GV;
-  return GV;
+  MaterializedGlobalTemporaryMap[E] = GV;
+  return ConstantAddress(GV, Align);
 }
 
 /// EmitObjCPropertyImplementations - Emit information for synthesized
@@ -3367,11 +3605,8 @@ void CodeGenModule::EmitTopLevelDecl(Decl *D) {
     auto *Import = cast<ImportDecl>(D);
 
     // Ignore import declarations that come from imported modules.
-    if (clang::Module *Owner = Import->getImportedOwningModule()) {
-      if (getLangOpts().CurrentModule.empty() ||
-          Owner->getTopLevelModule()->Name == getLangOpts().CurrentModule)
-        break;
-    }
+    if (Import->getImportedOwningModule())
+      break;
     if (CGDebugInfo *DI = getModuleDebugInfo())
       DI->EmitImportDecl(*Import);
 
@@ -3412,7 +3647,7 @@ void CodeGenModule::AddDeferredUnusedCoverageMapping(Decl *D) {
   case Decl::ObjCMethod:
   case Decl::CXXConstructor:
   case Decl::CXXDestructor: {
-    if (!cast<FunctionDecl>(D)->hasBody())
+    if (!cast<FunctionDecl>(D)->doesThisDeclarationHaveABody())
       return;
     auto I = DeferredEmptyCoverageMappingDecls.find(D);
     if (I == DeferredEmptyCoverageMappingDecls.end())
@@ -3541,10 +3776,12 @@ bool CodeGenModule::lookupRepresentativeDecl(StringRef MangledName,
 void CodeGenModule::EmitDeclMetadata() {
   llvm::NamedMDNode *GlobalMetadata = nullptr;
 
-  // StaticLocalDeclMap
   for (auto &I : MangledDeclNames) {
     llvm::GlobalValue *Addr = getModule().getNamedValue(I.second);
-    EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr);
+    // Some mangled names don't necessarily have an associated GlobalValue
+    // in this module, e.g. if we mangled it for DebugInfo.
+    if (Addr)
+      EmitGlobalDeclMetadata(*this, GlobalMetadata, I.first, Addr);
   }
 }
 
@@ -3562,7 +3799,7 @@ void CodeGenFunction::EmitDeclMetadata() {
 
   for (auto &I : LocalDeclMap) {
     const Decl *D = I.first;
-    llvm::Value *Addr = I.second;
+    llvm::Value *Addr = I.second.getPointer();
     if (auto *Alloca = dyn_cast<llvm::AllocaInst>(Addr)) {
       llvm::Value *DAddr = GetPointerConstant(getLLVMContext(), D);
       Alloca->setMetadata(
@@ -3643,12 +3880,6 @@ llvm::Constant *CodeGenModule::EmitUuidofInitializer(StringRef Uuid) {
   return llvm::ConstantStruct::getAnon(Fields);
 }
 
-llvm::Constant *
-CodeGenModule::getAddrOfCXXCatchHandlerType(QualType Ty,
-                                            QualType CatchHandlerType) {
-  return getCXXABI().getAddrOfCXXCatchHandlerType(Ty, CatchHandlerType);
-}
-
 llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty,
                                                        bool ForEH) {
   // Return a bogus pointer if RTTI is disabled, unless it's for EH.
@@ -3671,22 +3902,82 @@ void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) {
         VD->getAnyInitializer() &&
         !VD->getAnyInitializer()->isConstantInitializer(getContext(),
                                                         /*ForRef=*/false);
+
+    Address Addr(GetAddrOfGlobalVar(VD), getContext().getDeclAlign(VD));
     if (auto InitFunction = getOpenMPRuntime().emitThreadPrivateVarDefinition(
-            VD, GetAddrOfGlobalVar(VD), RefExpr->getLocStart(), PerformInit))
+            VD, Addr, RefExpr->getLocStart(), PerformInit))
       CXXGlobalInits.push_back(InitFunction);
   }
 }
 
-llvm::MDTuple *CodeGenModule::CreateVTableBitSetEntry(
-    llvm::GlobalVariable *VTable, CharUnits Offset, const CXXRecordDecl *RD) {
-  std::string OutName;
-  llvm::raw_string_ostream Out(OutName);
-  getCXXABI().getMangleContext().mangleCXXVTableBitSet(RD, Out);
+llvm::Metadata *CodeGenModule::CreateMetadataIdentifierForType(QualType T) {
+  llvm::Metadata *&InternalId = MetadataIdMap[T.getCanonicalType()];
+  if (InternalId)
+    return InternalId;
 
+  if (isExternallyVisible(T->getLinkage())) {
+    std::string OutName;
+    llvm::raw_string_ostream Out(OutName);
+    getCXXABI().getMangleContext().mangleTypeName(T, Out);
+
+    InternalId = llvm::MDString::get(getLLVMContext(), Out.str());
+  } else {
+    InternalId = llvm::MDNode::getDistinct(getLLVMContext(),
+                                           llvm::ArrayRef<llvm::Metadata *>());
+  }
+
+  return InternalId;
+}
+
+void CodeGenModule::CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
+                                            llvm::GlobalVariable *VTable,
+                                            CharUnits Offset,
+                                            const CXXRecordDecl *RD) {
+  llvm::Metadata *MD =
+      CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
   llvm::Metadata *BitsetOps[] = {
-      llvm::MDString::get(getLLVMContext(), Out.str()),
-      llvm::ConstantAsMetadata::get(VTable),
+      MD, llvm::ConstantAsMetadata::get(VTable),
       llvm::ConstantAsMetadata::get(
           llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
-  return llvm::MDTuple::get(getLLVMContext(), BitsetOps);
+  BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps));
+
+  if (CodeGenOpts.SanitizeCfiCrossDso) {
+    if (auto TypeId = CreateCfiIdForTypeMetadata(MD)) {
+      llvm::Metadata *BitsetOps2[] = {
+          llvm::ConstantAsMetadata::get(TypeId),
+          llvm::ConstantAsMetadata::get(VTable),
+          llvm::ConstantAsMetadata::get(
+              llvm::ConstantInt::get(Int64Ty, Offset.getQuantity()))};
+      BitsetsMD->addOperand(llvm::MDTuple::get(getLLVMContext(), BitsetOps2));
+    }
+  }
+}
+
+// Fills in the supplied string map with the set of target features for the
+// passed in function.
+void CodeGenModule::getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
+                                          const FunctionDecl *FD) {
+  StringRef TargetCPU = Target.getTargetOpts().CPU;
+  if (const auto *TD = FD->getAttr<TargetAttr>()) {
+    // If we have a TargetAttr build up the feature map based on that.
+    TargetAttr::ParsedTargetAttr ParsedAttr = TD->parse();
+
+    // Make a copy of the features as passed on the command line into the
+    // beginning of the additional features from the function to override.
+    ParsedAttr.first.insert(ParsedAttr.first.begin(),
+                            Target.getTargetOpts().FeaturesAsWritten.begin(),
+                            Target.getTargetOpts().FeaturesAsWritten.end());
+
+    if (ParsedAttr.second != "")
+      TargetCPU = ParsedAttr.second;
+
+    // Now populate the feature map, first with the TargetCPU which is either
+    // the default or a new one from the target attribute string. Then we'll use
+    // the passed in features (FeaturesAsWritten) along with the new ones from
+    // the attribute.
+    Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU, ParsedAttr.first);
+  } else {
+    Target.initFeatureMap(FeatureMap, getDiags(), TargetCPU,
+                          Target.getTargetOpts().Features);
+  }
 }
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index dd167a29f5ac..33113837a4cf 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_LIB_CODEGEN_CODEGENMODULE_H
 
 #include "CGVTables.h"
+#include "CodeGenTypeCache.h"
 #include "CodeGenTypes.h"
 #include "SanitizerMetadata.h"
 #include "clang/AST/Attr.h"
@@ -30,7 +31,6 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ValueHandle.h"
 
@@ -108,65 +108,14 @@ struct OrderGlobalInits {
   }
 };
 
-struct CodeGenTypeCache {
-  /// void
-  llvm::Type *VoidTy;
+struct ObjCEntrypoints {
+  ObjCEntrypoints() { memset(this, 0, sizeof(*this)); }
 
-  /// i8, i16, i32, and i64
-  llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
-  /// float, double
-  llvm::Type *FloatTy, *DoubleTy;
-
-  /// int
-  llvm::IntegerType *IntTy;
-
-  /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
-  union {
-    llvm::IntegerType *IntPtrTy;
-    llvm::IntegerType *SizeTy;
-    llvm::IntegerType *PtrDiffTy;
-  };
-
-  /// void* in address space 0
-  union {
-    llvm::PointerType *VoidPtrTy;
-    llvm::PointerType *Int8PtrTy;
-  };
-
-  /// void** in address space 0
-  union {
-    llvm::PointerType *VoidPtrPtrTy;
-    llvm::PointerType *Int8PtrPtrTy;
-  };
-
-  /// The width of a pointer into the generic address space.
-  unsigned char PointerWidthInBits;
-
-  /// The size and alignment of a pointer into the generic address
-  /// space.
-  union {
-    unsigned char PointerAlignInBytes;
-    unsigned char PointerSizeInBytes;
-    unsigned char SizeSizeInBytes; // sizeof(size_t)
-  };
-
-  llvm::CallingConv::ID RuntimeCC;
-  llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
-  llvm::CallingConv::ID BuiltinCC;
-  llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
-};
-
-struct RREntrypoints {
-  RREntrypoints() { memset(this, 0, sizeof(*this)); }
-  /// void objc_autoreleasePoolPop(void*);
+    /// void objc_autoreleasePoolPop(void*);
   llvm::Constant *objc_autoreleasePoolPop;
 
   /// void *objc_autoreleasePoolPush(void);
   llvm::Constant *objc_autoreleasePoolPush;
-};
-
-struct ARCEntrypoints {
-  ARCEntrypoints() { memset(this, 0, sizeof(*this)); }
 
   /// id objc_autorelease(id);
   llvm::Constant *objc_autorelease;
@@ -257,6 +206,36 @@ public:
   void reportDiagnostics(DiagnosticsEngine &Diags, StringRef MainFile);
 };
 
+/// A pair of helper functions for a __block variable.
+class BlockByrefHelpers : public llvm::FoldingSetNode {
+  // MSVC requires this type to be complete in order to process this
+  // header.
+public:
+  llvm::Constant *CopyHelper;
+  llvm::Constant *DisposeHelper;
+
+  /// The alignment of the field.  This is important because
+  /// different offsets to the field within the byref struct need to
+  /// have different helper functions.
+  CharUnits Alignment;
+
+  BlockByrefHelpers(CharUnits alignment) : Alignment(alignment) {}
+  BlockByrefHelpers(const BlockByrefHelpers &) = default;
+  virtual ~BlockByrefHelpers();
+
+  void Profile(llvm::FoldingSetNodeID &id) const {
+    id.AddInteger(Alignment.getQuantity());
+    profileImpl(id);
+  }
+  virtual void profileImpl(llvm::FoldingSetNodeID &id) const = 0;
+
+  virtual bool needsCopy() const { return true; }
+  virtual void emitCopy(CodeGenFunction &CGF, Address dest, Address src) = 0;
+
+  virtual bool needsDispose() const { return true; }
+  virtual void emitDispose(CodeGenFunction &CGF, Address field) = 0;
+};
+
 /// This class organizes the cross-function state that is used while generating
 /// LLVM code.
 class CodeGenModule : public CodeGenTypeCache {
@@ -285,7 +264,6 @@ private:
   const CodeGenOptions &CodeGenOpts;
   llvm::Module &TheModule;
   DiagnosticsEngine &Diags;
-  const llvm::DataLayout &TheDataLayout;
   const TargetInfo &Target;
   std::unique_ptr<CGCXXABI> ABI;
   llvm::LLVMContext &VMContext;
@@ -307,9 +285,8 @@ private:
   CGOpenMPRuntime* OpenMPRuntime;
   CGCUDARuntime* CUDARuntime;
   CGDebugInfo* DebugInfo;
-  ARCEntrypoints *ARCData;
+  ObjCEntrypoints *ObjCData;
   llvm::MDNode *NoObjCARCExceptionsMetadata;
-  RREntrypoints *RRData;
   std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader;
   InstrProfStats PGOStats;
 
@@ -343,6 +320,17 @@ private:
   typedef llvm::StringMap<llvm::TrackingVH<llvm::Constant> > ReplacementsTy;
   ReplacementsTy Replacements;
 
+  /// List of global values to be replaced with something else. Used when we
+  /// want to replace a GlobalValue but can't identify it by its mangled name
+  /// anymore (because the name is already taken).
+  llvm::SmallVector<std::pair<llvm::GlobalValue *, llvm::Constant *>, 8>
+    GlobalValReplacements;
+
+  /// Set of global decls for which we already diagnosed mangled name conflict.
+  /// Required to not issue a warning (on a mangling conflict) multiple times
+  /// for the same decl.
+  llvm::DenseSet<GlobalDecl> DiagnosedConflictingDefinitions;
+
   /// A queue of (optional) vtables to consider emitting.
   std::vector<const CXXRecordDecl*> DeferredVTables;
 
@@ -390,13 +378,12 @@ private:
   StaticExternCMap StaticExternCValues;
 
   /// \brief thread_local variables defined or used in this TU.
-  std::vector<std::pair<const VarDecl *, llvm::GlobalVariable *> >
-    CXXThreadLocals;
+  std::vector<const VarDecl *> CXXThreadLocals;
 
   /// \brief thread_local variables with initializers that need to run
   /// before any thread_local variable in this TU is odr-used.
   std::vector<llvm::Function *> CXXThreadLocalInits;
-  std::vector<llvm::GlobalVariable *> CXXThreadLocalInitVars;
+  std::vector<const VarDecl *> CXXThreadLocalInitVars;
 
   /// Global variables with initializers that need to run before main.
   std::vector<llvm::Function *> CXXGlobalInits;
@@ -491,12 +478,16 @@ private:
   llvm::DenseMap<const Decl *, bool> DeferredEmptyCoverageMappingDecls;
 
   std::unique_ptr<CoverageMappingModuleGen> CoverageMapping;
+
+  /// Mapping from canonical types to their metadata identifiers. We need to
+  /// maintain this mapping because identifiers may be formed from distinct
+  /// MDNodes.
+  llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
+
 public:
-  CodeGenModule(ASTContext &C,
-                const HeaderSearchOptions &headersearchopts,
+  CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
                 const PreprocessorOptions &ppopts,
-                const CodeGenOptions &CodeGenOpts,
-                llvm::Module &M, const llvm::DataLayout &TD,
+                const CodeGenOptions &CodeGenOpts, llvm::Module &M,
                 DiagnosticsEngine &Diags,
                 CoverageSourceInfo *CoverageInfo = nullptr);
 
@@ -534,14 +525,9 @@ public:
     return *CUDARuntime;
   }
 
-  ARCEntrypoints &getARCEntrypoints() const {
-    assert(getLangOpts().ObjCAutoRefCount && ARCData != nullptr);
-    return *ARCData;
-  }
-
-  RREntrypoints &getRREntrypoints() const {
-    assert(RRData != nullptr);
-    return *RRData;
+  ObjCEntrypoints &getObjCEntrypoints() const {
+    assert(ObjCData != nullptr);
+    return *ObjCData;
   }
 
   InstrProfStats &getPGOStats() { return PGOStats; }
@@ -614,7 +600,9 @@ public:
   const CodeGenOptions &getCodeGenOpts() const { return CodeGenOpts; }
   llvm::Module &getModule() const { return TheModule; }
   DiagnosticsEngine &getDiags() const { return Diags; }
-  const llvm::DataLayout &getDataLayout() const { return TheDataLayout; }
+  const llvm::DataLayout &getDataLayout() const {
+    return TheModule.getDataLayout();
+  }
   const TargetInfo &getTarget() const { return Target; }
   const llvm::Triple &getTriple() const;
   bool supportsCOMDAT() const;
@@ -645,8 +633,6 @@ public:
   llvm::MDNode *getTBAAInfo(QualType QTy);
   llvm::MDNode *getTBAAInfoForVTablePtr();
   llvm::MDNode *getTBAAStructInfo(QualType QTy);
-  /// Return the MDNode in the type DAG for the given struct type.
-  llvm::MDNode *getTBAAStructTypeInfo(QualType QTy);
   /// Return the path-aware tag for given base type, access node and offset.
   llvm::MDNode *getTBAAStructTagInfo(QualType BaseTy, llvm::MDNode *AccessN,
                                      uint64_t O);
@@ -660,9 +646,13 @@ public:
   /// is the same as the type. For struct-path aware TBAA, the tag
   /// is different from the type: base type, access type and offset.
   /// When ConvertTypeToTag is true, we create a tag based on the scalar type.
-  void DecorateInstruction(llvm::Instruction *Inst,
-                           llvm::MDNode *TBAAInfo,
-                           bool ConvertTypeToTag = true);
+  void DecorateInstructionWithTBAA(llvm::Instruction *Inst,
+                                   llvm::MDNode *TBAAInfo,
+                                   bool ConvertTypeToTag = true);
+
+  /// Adds !invariant.barrier !tag to instruction
+  void DecorateInstructionWithInvariantGroup(llvm::Instruction *I,
+                                             const CXXRecordDecl *RD);
 
   /// Emit the given number of characters as a value of type size_t.
   llvm::ConstantInt *getSize(CharUnits numChars);
@@ -683,18 +673,7 @@ public:
     llvm_unreachable("unknown visibility!");
   }
 
-  llvm::Constant *GetAddrOfGlobal(GlobalDecl GD) {
-    if (isa<CXXConstructorDecl>(GD.getDecl()))
-      return getAddrOfCXXStructor(cast<CXXConstructorDecl>(GD.getDecl()),
-                                  getFromCtorType(GD.getCtorType()));
-    else if (isa<CXXDestructorDecl>(GD.getDecl()))
-      return getAddrOfCXXStructor(cast<CXXDestructorDecl>(GD.getDecl()),
-                                  getFromDtorType(GD.getDtorType()));
-    else if (isa<FunctionDecl>(GD.getDecl()))
-      return GetAddrOfFunction(GD);
-    else
-      return GetAddrOfGlobalVar(cast<VarDecl>(GD.getDecl()));
-  }
+  llvm::Constant *GetAddrOfGlobal(GlobalDecl GD, bool IsForDefinition = false);
 
   /// Will return a global variable of the given type. If a variable with a
   /// different type already exists then a new  variable with the right type
@@ -706,6 +685,7 @@ public:
 
   llvm::Function *
   CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name,
+                                     const CGFunctionInfo &FI,
                                      SourceLocation Loc = SourceLocation(),
                                      bool TLS = false);
 
@@ -724,24 +704,37 @@ public:
 
   /// Return the address of the given function. If Ty is non-null, then this
   /// function will use the specified type if it has to create it.
-  llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = 0,
+  llvm::Constant *GetAddrOfFunction(GlobalDecl GD, llvm::Type *Ty = nullptr,
                                     bool ForVTable = false,
-                                    bool DontDefer = false);
+                                    bool DontDefer = false,
+                                    bool IsForDefinition = false);
 
   /// Get the address of the RTTI descriptor for the given type.
   llvm::Constant *GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH = false);
 
-  llvm::Constant *getAddrOfCXXCatchHandlerType(QualType Ty,
-                                               QualType CatchHandlerType);
-
   /// Get the address of a uuid descriptor .
-  llvm::Constant *GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
+  ConstantAddress GetAddrOfUuidDescriptor(const CXXUuidofExpr* E);
 
   /// Get the address of the thunk for the given global decl.
   llvm::Constant *GetAddrOfThunk(GlobalDecl GD, const ThunkInfo &Thunk);
 
   /// Get a reference to the target of VD.
-  llvm::Constant *GetWeakRefReference(const ValueDecl *VD);
+  ConstantAddress GetWeakRefReference(const ValueDecl *VD);
+
+  /// Returns the assumed alignment of an opaque pointer to the given class.
+  CharUnits getClassPointerAlignment(const CXXRecordDecl *CD);
+
+  /// Returns the assumed alignment of a virtual base of a class.
+  CharUnits getVBaseAlignment(CharUnits DerivedAlign,
+                              const CXXRecordDecl *Derived,
+                              const CXXRecordDecl *VBase);
+
+  /// Given a class pointer with an actual known alignment, and the
+  /// expected alignment of an object at a dynamic offset w.r.t that
+  /// pointer, return the alignment to assume at the offset.
+  CharUnits getDynamicOffsetAlignment(CharUnits ActualAlign,
+                                      const CXXRecordDecl *Class,
+                                      CharUnits ExpectedTargetAlign);
 
   CharUnits
   computeNonVirtualBaseClassOffset(const CXXRecordDecl *DerivedClass,
@@ -755,35 +748,7 @@ public:
                                CastExpr::path_const_iterator PathBegin,
                                CastExpr::path_const_iterator PathEnd);
 
-  /// A pair of helper functions for a __block variable.
-  class ByrefHelpers : public llvm::FoldingSetNode {
-  public:
-    llvm::Constant *CopyHelper;
-    llvm::Constant *DisposeHelper;
-
-    /// The alignment of the field.  This is important because
-    /// different offsets to the field within the byref struct need to
-    /// have different helper functions.
-    CharUnits Alignment;
-
-    ByrefHelpers(CharUnits alignment) : Alignment(alignment) {}
-    virtual ~ByrefHelpers();
-
-    void Profile(llvm::FoldingSetNodeID &id) const {
-      id.AddInteger(Alignment.getQuantity());
-      profileImpl(id);
-    }
-    virtual void profileImpl(llvm::FoldingSetNodeID &id) const = 0;
-
-    virtual bool needsCopy() const { return true; }
-    virtual void emitCopy(CodeGenFunction &CGF,
-                          llvm::Value *dest, llvm::Value *src) = 0;
-
-    virtual bool needsDispose() const { return true; }
-    virtual void emitDispose(CodeGenFunction &CGF, llvm::Value *field) = 0;
-  };
-
-  llvm::FoldingSet<ByrefHelpers> ByrefHelpersCache;
+  llvm::FoldingSet<BlockByrefHelpers> ByrefHelpersCache;
 
   /// Fetches the global unique block count.
   int getUniqueBlockCount() { return ++Block.GlobalUniqueCount; }
@@ -798,23 +763,23 @@ public:
   llvm::Constant *GetAddrOfGlobalBlock(const BlockExpr *BE, const char *);
   
   /// Return a pointer to a constant CFString object for the given string.
-  llvm::Constant *GetAddrOfConstantCFString(const StringLiteral *Literal);
+  ConstantAddress GetAddrOfConstantCFString(const StringLiteral *Literal);
 
   /// Return a pointer to a constant NSString object for the given string. Or a
   /// user defined String object as defined via
   /// -fconstant-string-class=class_name option.
-  llvm::GlobalVariable *GetAddrOfConstantString(const StringLiteral *Literal);
+  ConstantAddress GetAddrOfConstantString(const StringLiteral *Literal);
 
   /// Return a constant array for the given string.
   llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
 
   /// Return a pointer to a constant array for the given string literal.
-  llvm::GlobalVariable *
+  ConstantAddress
   GetAddrOfConstantStringFromLiteral(const StringLiteral *S,
                                      StringRef Name = ".str");
 
   /// Return a pointer to a constant array for the given ObjCEncodeExpr node.
-  llvm::GlobalVariable *
+  ConstantAddress
   GetAddrOfConstantStringFromObjCEncode(const ObjCEncodeExpr *);
 
   /// Returns a pointer to a character array containing the literal and a
@@ -822,18 +787,17 @@ public:
   ///
   /// \param GlobalName If provided, the name to use for the global (if one is
   /// created).
-  llvm::GlobalVariable *
+  ConstantAddress
   GetAddrOfConstantCString(const std::string &Str,
-                           const char *GlobalName = nullptr,
-                           unsigned Alignment = 0);
+                           const char *GlobalName = nullptr);
 
   /// Returns a pointer to a constant global variable for the given file-scope
   /// compound literal expression.
-  llvm::Constant *GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr*E);
+  ConstantAddress GetAddrOfConstantCompoundLiteral(const CompoundLiteralExpr*E);
 
   /// \brief Returns a pointer to a global variable representing a temporary
   /// with static or thread storage duration.
-  llvm::Constant *GetAddrOfGlobalTemporary(const MaterializeTemporaryExpr *E,
+  ConstantAddress GetAddrOfGlobalTemporary(const MaterializeTemporaryExpr *E,
                                            const Expr *Inner);
 
   /// \brief Retrieve the record type that describes the state of an
@@ -847,11 +811,11 @@ public:
                                      StructorType Type);
 
   /// Return the address of the constructor/destructor of the given type.
-  llvm::GlobalValue *
+  llvm::Constant *
   getAddrOfCXXStructor(const CXXMethodDecl *MD, StructorType Type,
                        const CGFunctionInfo *FnInfo = nullptr,
                        llvm::FunctionType *FnType = nullptr,
-                       bool DontDefer = false);
+                       bool DontDefer = false, bool IsForDefinition = false);
 
   /// Given a builtin id for a function like "__builtin_fabsf", return a
   /// Function* for "fabsf".
@@ -948,6 +912,11 @@ public:
                                              QualType DestType,
                                              CodeGenFunction *CGF = nullptr);
 
+  /// \brief Emit type info if type of an expression is a variably modified
+  /// type. Also emit proper debug info for cast types.
+  void EmitExplicitCastExprType(const ExplicitCastExpr *E,
+                                CodeGenFunction *CGF = nullptr);
+
   /// Return the result of value-initializing the given type, i.e. a null
   /// expression of the given type.  This is usually, but not always, an LLVM
   /// null constant.
@@ -998,16 +967,19 @@ public:
   /// function type.
   ///
   /// \param Info - The function type information.
-  /// \param TargetDecl - The decl these attributes are being constructed
-  /// for. If supplied the attributes applied to this decl may contribute to the
-  /// function attributes and calling convention.
+  /// \param CalleeInfo - The callee information these attributes are being
+  /// constructed for. If valid, the attributes applied to this decl may
+  /// contribute to the function attributes and calling convention.
   /// \param PAL [out] - On return, the attribute list to use.
   /// \param CallingConv [out] - On return, the LLVM calling convention to use.
   void ConstructAttributeList(const CGFunctionInfo &Info,
-                              const Decl *TargetDecl,
-                              AttributeListType &PAL,
-                              unsigned &CallingConv,
-                              bool AttrOnCallSite);
+                              CGCalleeInfo CalleeInfo, AttributeListType &PAL,
+                              unsigned &CallingConv, bool AttrOnCallSite);
+
+  // Fills in the supplied string map with the set of target features for the
+  // passed in function.
+  void getFunctionFeatureMap(llvm::StringMap<bool> &FeatureMap,
+                             const FunctionDecl *FD);
 
   StringRef getMangledName(GlobalDecl GD);
   StringRef getBlockMangledName(GlobalDecl GD, const BlockDecl *BD);
@@ -1016,9 +988,6 @@ public:
 
   void EmitVTable(CXXRecordDecl *Class);
 
-  /// Emit the RTTI descriptors for the builtin types.
-  void EmitFundamentalRTTIDescriptors();
-
   /// \brief Appends Opts to the "Linker Options" metadata value.
   void AppendLinkerOptions(StringRef Opts);
 
@@ -1122,6 +1091,8 @@ public:
 
   void addReplacement(StringRef Name, llvm::Constant *C);
 
+  void addGlobalValReplacement(llvm::GlobalValue *GV, llvm::Constant *C);
+
   /// \brief Emit a code for threadprivate directive.
   /// \param D Threadprivate declaration.
   void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D);
@@ -1135,10 +1106,21 @@ public:
   void EmitVTableBitSetEntries(llvm::GlobalVariable *VTable,
                                const VTableLayout &VTLayout);
 
-  /// Create a bitset entry for the given vtable.
-  llvm::MDTuple *CreateVTableBitSetEntry(llvm::GlobalVariable *VTable,
-                                         CharUnits Offset,
-                                         const CXXRecordDecl *RD);
+  /// Generate a cross-DSO type identifier for type.
+  llvm::ConstantInt *CreateCfiIdForTypeMetadata(llvm::Metadata *MD);
+
+  /// Create a metadata identifier for the given type. This may either be an
+  /// MDString (for external identifiers) or a distinct unnamed MDNode (for
+  /// internal identifiers).
+  llvm::Metadata *CreateMetadataIdentifierForType(QualType T);
+
+  /// Create a bitset entry for the given function and add it to BitsetsMD.
+  void CreateFunctionBitSetEntry(const FunctionDecl *FD, llvm::Function *F);
+
+  /// Create a bitset entry for the given vtable and add it to BitsetsMD.
+  void CreateVTableBitSetEntry(llvm::NamedMDNode *BitsetsMD,
+                               llvm::GlobalVariable *VTable, CharUnits Offset,
+                               const CXXRecordDecl *RD);
 
   /// \breif Get the declaration of std::terminate for the platform.
   llvm::Constant *getTerminateFn();
@@ -1148,7 +1130,8 @@ private:
   GetOrCreateLLVMFunction(StringRef MangledName, llvm::Type *Ty, GlobalDecl D,
                           bool ForVTable, bool DontDefer = false,
                           bool IsThunk = false,
-                          llvm::AttributeSet ExtraAttrs = llvm::AttributeSet());
+                          llvm::AttributeSet ExtraAttrs = llvm::AttributeSet(),
+                          bool IsForDefinition = false);
 
   llvm::Constant *GetOrCreateLLVMGlobal(StringRef MangledName,
                                         llvm::PointerType *PTy,
@@ -1194,7 +1177,7 @@ private:
 
   // FIXME: Hardcoding priority here is gross.
   void AddGlobalCtor(llvm::Function *Ctor, int Priority = 65535,
-                     llvm::Constant *AssociatedData = 0);
+                     llvm::Constant *AssociatedData = nullptr);
   void AddGlobalDtor(llvm::Function *Dtor, int Priority = 65535);
 
   /// Generates a global array of functions and priorities using the given list
@@ -1202,15 +1185,15 @@ private:
   /// as a LLVM constructor or destructor array.
   void EmitCtorList(const CtorList &Fns, const char *GlobalName);
 
-  /// Emit the RTTI descriptors for the given type.
-  void EmitFundamentalRTTIDescriptor(QualType Type);
-
   /// Emit any needed decls for which code generation was deferred.
   void EmitDeferred();
 
   /// Call replaceAllUsesWith on all pairs in Replacements.
   void applyReplacements();
 
+  /// Call replaceAllUsesWith on all pairs in GlobalValReplacements.
+  void applyGlobalValReplacements();
+
   void checkAliases();
 
   /// Emit any vtables which we deferred and still have a use for.
@@ -1258,4 +1241,4 @@ private:
 }  // end namespace CodeGen
 }  // end namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_CODEGEN_CODEGENMODULE_H
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index 8dffefc871f2..38774332f31d 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -28,58 +28,20 @@ using namespace CodeGen;
 
 void CodeGenPGO::setFuncName(StringRef Name,
                              llvm::GlobalValue::LinkageTypes Linkage) {
-  StringRef RawFuncName = Name;
-
-  // Function names may be prefixed with a binary '1' to indicate
-  // that the backend should not modify the symbols due to any platform
-  // naming convention. Do not include that '1' in the PGO profile name.
-  if (RawFuncName[0] == '\1')
-    RawFuncName = RawFuncName.substr(1);
-
-  FuncName = RawFuncName;
-  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
-    // For local symbols, prepend the main file name to distinguish them.
-    // Do not include the full path in the file name since there's no guarantee
-    // that it will stay the same, e.g., if the files are checked out from
-    // version control in different locations.
-    if (CGM.getCodeGenOpts().MainFileName.empty())
-      FuncName = FuncName.insert(0, "<unknown>:");
-    else
-      FuncName = FuncName.insert(0, CGM.getCodeGenOpts().MainFileName + ":");
-  }
+  llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
+  FuncName = llvm::getPGOFuncName(
+      Name, Linkage, CGM.getCodeGenOpts().MainFileName,
+      PGOReader ? PGOReader->getVersion() : llvm::IndexedInstrProf::Version);
 
   // If we're generating a profile, create a variable for the name.
   if (CGM.getCodeGenOpts().ProfileInstrGenerate)
-    createFuncNameVar(Linkage);
+    FuncNameVar = llvm::createPGOFuncNameVar(CGM.getModule(), Linkage, FuncName);
 }
 
 void CodeGenPGO::setFuncName(llvm::Function *Fn) {
   setFuncName(Fn->getName(), Fn->getLinkage());
 }
 
-void CodeGenPGO::createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage) {
-  // We generally want to match the function's linkage, but available_externally
-  // and extern_weak both have the wrong semantics, and anything that doesn't
-  // need to link across compilation units doesn't need to be visible at all.
-  if (Linkage == llvm::GlobalValue::ExternalWeakLinkage)
-    Linkage = llvm::GlobalValue::LinkOnceAnyLinkage;
-  else if (Linkage == llvm::GlobalValue::AvailableExternallyLinkage)
-    Linkage = llvm::GlobalValue::LinkOnceODRLinkage;
-  else if (Linkage == llvm::GlobalValue::InternalLinkage ||
-           Linkage == llvm::GlobalValue::ExternalLinkage)
-    Linkage = llvm::GlobalValue::PrivateLinkage;
-
-  auto *Value =
-      llvm::ConstantDataArray::getString(CGM.getLLVMContext(), FuncName, false);
-  FuncNameVar =
-      new llvm::GlobalVariable(CGM.getModule(), Value->getType(), true, Linkage,
-                               Value, "__llvm_profile_name_" + FuncName);
-
-  // Hide the symbol so that we correctly get a copy for each executable.
-  if (!llvm::GlobalValue::isLocalLinkage(FuncNameVar->getLinkage()))
-    FuncNameVar->setVisibility(llvm::GlobalValue::HiddenVisibility);
-}
-
 namespace {
 /// \brief Stable hasher for PGO region counters.
 ///
@@ -604,7 +566,7 @@ struct ComputeRegionCounts : public ConstStmtVisitor<ComputeRegionCounts> {
     RecordNextStmtCount = true;
   }
 };
-}
+} // end anonymous namespace
 
 void PGOHash::combine(HashType Type) {
   // Check that we never combine 0 and only have six bits.
@@ -643,27 +605,24 @@ uint64_t PGOHash::finalize() {
   return endian::read<uint64_t, little, unaligned>(Result);
 }
 
-void CodeGenPGO::checkGlobalDecl(GlobalDecl GD) {
-  // Make sure we only emit coverage mapping for one constructor/destructor.
-  // Clang emits several functions for the constructor and the destructor of
-  // a class. Every function is instrumented, but we only want to provide
-  // coverage for one of them. Because of that we only emit the coverage mapping
-  // for the base constructor/destructor.
-  if ((isa<CXXConstructorDecl>(GD.getDecl()) &&
-       GD.getCtorType() != Ctor_Base) ||
-      (isa<CXXDestructorDecl>(GD.getDecl()) &&
-       GD.getDtorType() != Dtor_Base)) {
-    SkipCoverageMapping = true;
-  }
-}
-
-void CodeGenPGO::assignRegionCounters(const Decl *D, llvm::Function *Fn) {
+void CodeGenPGO::assignRegionCounters(GlobalDecl GD, llvm::Function *Fn) {
+  const Decl *D = GD.getDecl();
   bool InstrumentRegions = CGM.getCodeGenOpts().ProfileInstrGenerate;
   llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
   if (!InstrumentRegions && !PGOReader)
     return;
   if (D->isImplicit())
     return;
+  // Constructors and destructors may be represented by several functions in IR.
+  // If so, instrument only base variant, others are implemented by delegation
+  // to the base one, it would be counted twice otherwise.
+  if (CGM.getTarget().getCXXABI().hasConstructorVariants() &&
+      ((isa<CXXConstructorDecl>(GD.getDecl()) &&
+        GD.getCtorType() != Ctor_Base) ||
+       (isa<CXXDestructorDecl>(GD.getDecl()) &&
+        GD.getDtorType() != Dtor_Base))) {
+      return;
+  }
   CGM.ClearUnusedCoverageMapping(D);
   setFuncName(Fn);
 
@@ -763,7 +722,7 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
     return;
 
   uint64_t MaxFunctionCount = PGOReader->getMaximumFunctionCount();
-  uint64_t FunctionCount = getRegionCount(0);
+  uint64_t FunctionCount = getRegionCount(nullptr);
   if (FunctionCount >= (uint64_t)(0.3 * (double)MaxFunctionCount))
     // Turn on InlineHint attribute for hot functions.
     // FIXME: 30% is from preliminary tuning on SPEC, it may not be optimal.
@@ -779,7 +738,7 @@ CodeGenPGO::applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
 void CodeGenPGO::emitCounterIncrement(CGBuilderTy &Builder, const Stmt *S) {
   if (!CGM.getCodeGenOpts().ProfileInstrGenerate || !RegionCounterMap)
     return;
-  if (!Builder.GetInsertPoint())
+  if (!Builder.GetInsertBlock())
     return;
 
   unsigned Counter = (*RegionCounterMap)[S];
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index de6f369fb351..6bf29ecaa7c4 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -78,13 +78,11 @@ public:
       setCurrentRegionCount(*Count);
   }
 
-  /// Check if we need to emit coverage mapping for a given declaration
-  void checkGlobalDecl(GlobalDecl GD);
   /// Assign counters to regions and configure them for PGO of a given
   /// function. Does nothing if instrumentation is not enabled and either
   /// generates global variables or associates PGO data with each of the
   /// counters depending on whether we are generating or using instrumentation.
-  void assignRegionCounters(const Decl *D, llvm::Function *Fn);
+  void assignRegionCounters(GlobalDecl GD, llvm::Function *Fn);
   /// Emit a coverage mapping range with a counter zero
   /// for an unused declaration.
   void emitEmptyCounterMapping(const Decl *D, StringRef FuncName,
@@ -92,14 +90,12 @@ public:
 private:
   void setFuncName(llvm::Function *Fn);
   void setFuncName(StringRef Name, llvm::GlobalValue::LinkageTypes Linkage);
-  void createFuncNameVar(llvm::GlobalValue::LinkageTypes Linkage);
   void mapRegionCounters(const Decl *D);
   void computeRegionCounts(const Decl *D);
   void applyFunctionAttributes(llvm::IndexedInstrProfReader *PGOReader,
                                llvm::Function *Fn);
   void loadRegionCounts(llvm::IndexedInstrProfReader *PGOReader,
                         bool IsInMainFile);
-  void emitCounterVariables();
   void emitCounterRegionMapping(const Decl *D);
 
 public:
diff --git a/lib/CodeGen/CodeGenTBAA.cpp b/lib/CodeGen/CodeGenTBAA.cpp
index 53ba02a81397..c3c925cde2fd 100644
--- a/lib/CodeGen/CodeGenTBAA.cpp
+++ b/lib/CodeGen/CodeGenTBAA.cpp
@@ -155,7 +155,6 @@ CodeGenTBAA::getTBAAInfo(QualType QTy) {
     SmallString<256> OutName;
     llvm::raw_svector_ostream Out(OutName);
     MContext.mangleTypeName(QualType(ETy, 0), Out);
-    Out.flush();
     return MetadataCache[Ty] = createTBAAScalarType(OutName, getChar());
   }
 
@@ -271,7 +270,6 @@ CodeGenTBAA::getTBAAStructTypeInfo(QualType QTy) {
       // Don't use the mangler for C code.
       llvm::raw_svector_ostream Out(OutName);
       MContext.mangleTypeName(QualType(Ty, 0), Out);
-      Out.flush();
     } else {
       OutName = RD->getName();
     }
diff --git a/lib/CodeGen/CodeGenTypeCache.h b/lib/CodeGen/CodeGenTypeCache.h
new file mode 100644
index 000000000000..c32b66d129da
--- /dev/null
+++ b/lib/CodeGen/CodeGenTypeCache.h
@@ -0,0 +1,108 @@
+//===--- CodeGenTypeCache.h - Commonly used LLVM types and info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This structure provides a set of common types useful during IR emission.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CODEGENTYPECACHE_H
+#define LLVM_CLANG_LIB_CODEGEN_CODEGENTYPECACHE_H
+
+#include "clang/AST/CharUnits.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+  class Type;
+  class IntegerType;
+  class PointerType;
+}
+
+namespace clang {
+namespace CodeGen {
+
+/// This structure provides a set of types that are commonly used
+/// during IR emission.  It's initialized once in CodeGenModule's
+/// constructor and then copied around into new CodeGenFunctions.
+struct CodeGenTypeCache {
+  /// void
+  llvm::Type *VoidTy;
+
+  /// i8, i16, i32, and i64
+  llvm::IntegerType *Int8Ty, *Int16Ty, *Int32Ty, *Int64Ty;
+  /// float, double
+  llvm::Type *FloatTy, *DoubleTy;
+
+  /// int
+  llvm::IntegerType *IntTy;
+
+  /// intptr_t, size_t, and ptrdiff_t, which we assume are the same size.
+  union {
+    llvm::IntegerType *IntPtrTy;
+    llvm::IntegerType *SizeTy;
+    llvm::IntegerType *PtrDiffTy;
+  };
+
+  /// void* in address space 0
+  union {
+    llvm::PointerType *VoidPtrTy;
+    llvm::PointerType *Int8PtrTy;
+  };
+
+  /// void** in address space 0
+  union {
+    llvm::PointerType *VoidPtrPtrTy;
+    llvm::PointerType *Int8PtrPtrTy;
+  };
+
+  /// The size and alignment of the builtin C type 'int'.  This comes
+  /// up enough in various ABI lowering tasks to be worth pre-computing.
+  union {
+    unsigned char IntSizeInBytes;
+    unsigned char IntAlignInBytes;
+  };
+  CharUnits getIntSize() const {
+    return CharUnits::fromQuantity(IntSizeInBytes);
+  }
+  CharUnits getIntAlign() const {
+    return CharUnits::fromQuantity(IntAlignInBytes);
+  }
+
+  /// The width of a pointer into the generic address space.
+  unsigned char PointerWidthInBits;
+
+  /// The size and alignment of a pointer into the generic address space.
+  union {
+    unsigned char PointerAlignInBytes;
+    unsigned char PointerSizeInBytes;
+    unsigned char SizeSizeInBytes; // sizeof(size_t)
+    unsigned char SizeAlignInBytes;
+  };
+  CharUnits getSizeSize() const {
+    return CharUnits::fromQuantity(SizeSizeInBytes);
+  }
+  CharUnits getSizeAlign() const {
+    return CharUnits::fromQuantity(SizeAlignInBytes);
+  }
+  CharUnits getPointerSize() const {
+    return CharUnits::fromQuantity(PointerSizeInBytes);
+  }
+  CharUnits getPointerAlign() const {
+    return CharUnits::fromQuantity(PointerAlignInBytes);
+  }
+
+  llvm::CallingConv::ID RuntimeCC;
+  llvm::CallingConv::ID getRuntimeCC() const { return RuntimeCC; }
+  llvm::CallingConv::ID BuiltinCC;
+  llvm::CallingConv::ID getBuiltinCC() const { return BuiltinCC; }
+};
+
+}  // end namespace CodeGen
+}  // end namespace clang
+
+#endif
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index a4a8654eb36b..fcda05320551 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -31,7 +31,6 @@ using namespace CodeGen;
 
 CodeGenTypes::CodeGenTypes(CodeGenModule &cgm)
   : CGM(cgm), Context(cgm.getContext()), TheModule(cgm.getModule()),
-    TheDataLayout(cgm.getDataLayout()),
     Target(cgm.getTarget()), TheCXXABI(cgm.getCXXABI()),
     TheABIInfo(cgm.getTargetCodeGenInfo().getABIInfo()) {
   SkippedLayout = false;
@@ -295,6 +294,76 @@ static llvm::Type *getTypeForFormat(llvm::LLVMContext &VMContext,
   llvm_unreachable("Unknown float format!");
 }
 
+llvm::Type *CodeGenTypes::ConvertFunctionType(QualType QFT,
+                                              const FunctionDecl *FD) {
+  assert(QFT.isCanonical());
+  const Type *Ty = QFT.getTypePtr();
+  const FunctionType *FT = cast<FunctionType>(QFT.getTypePtr());
+  // First, check whether we can build the full function type.  If the
+  // function type depends on an incomplete type (e.g. a struct or enum), we
+  // cannot lower the function type.
+  if (!isFuncTypeConvertible(FT)) {
+    // This function's type depends on an incomplete tag type.
+
+    // Force conversion of all the relevant record types, to make sure
+    // we re-convert the FunctionType when appropriate.
+    if (const RecordType *RT = FT->getReturnType()->getAs<RecordType>())
+      ConvertRecordDeclType(RT->getDecl());
+    if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
+      for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
+        if (const RecordType *RT = FPT->getParamType(i)->getAs<RecordType>())
+          ConvertRecordDeclType(RT->getDecl());
+
+    SkippedLayout = true;
+
+    // Return a placeholder type.
+    return llvm::StructType::get(getLLVMContext());
+  }
+
+  // While we're converting the parameter types for a function, we don't want
+  // to recursively convert any pointed-to structs.  Converting directly-used
+  // structs is ok though.
+  if (!RecordsBeingLaidOut.insert(Ty).second) {
+    SkippedLayout = true;
+    return llvm::StructType::get(getLLVMContext());
+  }
+
+  // The function type can be built; call the appropriate routines to
+  // build it.
+  const CGFunctionInfo *FI;
+  if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT)) {
+    FI = &arrangeFreeFunctionType(
+        CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0)), FD);
+  } else {
+    const FunctionNoProtoType *FNPT = cast<FunctionNoProtoType>(FT);
+    FI = &arrangeFreeFunctionType(
+        CanQual<FunctionNoProtoType>::CreateUnsafe(QualType(FNPT, 0)));
+  }
+
+  llvm::Type *ResultType = nullptr;
+  // If there is something higher level prodding our CGFunctionInfo, then
+  // don't recurse into it again.
+  if (FunctionsBeingProcessed.count(FI)) {
+
+    ResultType = llvm::StructType::get(getLLVMContext());
+    SkippedLayout = true;
+  } else {
+
+    // Otherwise, we're good to go, go ahead and convert it.
+    ResultType = GetFunctionType(*FI);
+  }
+
+  RecordsBeingLaidOut.erase(Ty);
+
+  if (SkippedLayout)
+    TypeCache.clear();
+
+  if (RecordsBeingLaidOut.empty())
+    while (!DeferredRecords.empty())
+      ConvertRecordDeclType(DeferredRecords.pop_back_val());
+  return ResultType;
+}
+
 /// ConvertType - Convert the specified type to its LLVM form.
 llvm::Type *CodeGenTypes::ConvertType(QualType T) {
   T = Context.getCanonicalType(T);
@@ -389,9 +458,19 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     case BuiltinType::OCLImage1dBuffer:
     case BuiltinType::OCLImage2d:
     case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage2dDepth:
+    case BuiltinType::OCLImage2dArrayDepth:
+    case BuiltinType::OCLImage2dMSAA:
+    case BuiltinType::OCLImage2dArrayMSAA:
+    case BuiltinType::OCLImage2dMSAADepth:
+    case BuiltinType::OCLImage2dArrayMSAADepth:
     case BuiltinType::OCLImage3d:
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
       ResultType = CGM.getOpenCLRuntime().convertOpenCLSpecificType(Ty);
       break;
     
@@ -476,75 +555,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     break;
   }
   case Type::FunctionNoProto:
-  case Type::FunctionProto: {
-    const FunctionType *FT = cast<FunctionType>(Ty);
-    // First, check whether we can build the full function type.  If the
-    // function type depends on an incomplete type (e.g. a struct or enum), we
-    // cannot lower the function type.
-    if (!isFuncTypeConvertible(FT)) {
-      // This function's type depends on an incomplete tag type.
-
-      // Force conversion of all the relevant record types, to make sure
-      // we re-convert the FunctionType when appropriate.
-      if (const RecordType *RT = FT->getReturnType()->getAs<RecordType>())
-        ConvertRecordDeclType(RT->getDecl());
-      if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT))
-        for (unsigned i = 0, e = FPT->getNumParams(); i != e; i++)
-          if (const RecordType *RT = FPT->getParamType(i)->getAs<RecordType>())
-            ConvertRecordDeclType(RT->getDecl());
-
-      // Return a placeholder type.
-      ResultType = llvm::StructType::get(getLLVMContext());
-
-      SkippedLayout = true;
-      break;
-    }
-
-    // While we're converting the parameter types for a function, we don't want
-    // to recursively convert any pointed-to structs.  Converting directly-used
-    // structs is ok though.
-    if (!RecordsBeingLaidOut.insert(Ty).second) {
-      ResultType = llvm::StructType::get(getLLVMContext());
-      
-      SkippedLayout = true;
-      break;
-    }
-    
-    // The function type can be built; call the appropriate routines to
-    // build it.
-    const CGFunctionInfo *FI;
-    if (const FunctionProtoType *FPT = dyn_cast<FunctionProtoType>(FT)) {
-      FI = &arrangeFreeFunctionType(
-                   CanQual<FunctionProtoType>::CreateUnsafe(QualType(FPT, 0)));
-    } else {
-      const FunctionNoProtoType *FNPT = cast<FunctionNoProtoType>(FT);
-      FI = &arrangeFreeFunctionType(
-                CanQual<FunctionNoProtoType>::CreateUnsafe(QualType(FNPT, 0)));
-    }
-    
-    // If there is something higher level prodding our CGFunctionInfo, then
-    // don't recurse into it again.
-    if (FunctionsBeingProcessed.count(FI)) {
-
-      ResultType = llvm::StructType::get(getLLVMContext());
-      SkippedLayout = true;
-    } else {
-
-      // Otherwise, we're good to go, go ahead and convert it.
-      ResultType = GetFunctionType(*FI);
-    }
-
-    RecordsBeingLaidOut.erase(Ty);
-
-    if (SkippedLayout)
-      TypeCache.clear();
-    
-    if (RecordsBeingLaidOut.empty())
-      while (!DeferredRecords.empty())
-        ConvertRecordDeclType(DeferredRecords.pop_back_val());
+  case Type::FunctionProto:
+    ResultType = ConvertFunctionType(T);
     break;
-  }
-
   case Type::ObjCObject:
     ResultType = ConvertType(cast<ObjCObjectType>(Ty)->getBaseType());
     break;
diff --git a/lib/CodeGen/CodeGenTypes.h b/lib/CodeGen/CodeGenTypes.h
index 1580e21d11dc..a96f23c44894 100644
--- a/lib/CodeGen/CodeGenTypes.h
+++ b/lib/CodeGen/CodeGenTypes.h
@@ -122,7 +122,6 @@ class CodeGenTypes {
   // Some of this stuff should probably be left on the CGM.
   ASTContext &Context;
   llvm::Module &TheModule;
-  const llvm::DataLayout &TheDataLayout;
   const TargetInfo &Target;
   CGCXXABI &TheCXXABI;
 
@@ -159,7 +158,6 @@ class CodeGenTypes {
 
   SmallVector<const RecordDecl *, 8> DeferredRecords;
   
-private:
   /// This map keeps cache of llvm::Types and maps clang::Type to
   /// corresponding llvm::Type.
   llvm::DenseMap<const Type *, llvm::Type *> TypeCache;
@@ -168,7 +166,9 @@ public:
   CodeGenTypes(CodeGenModule &cgm);
   ~CodeGenTypes();
 
-  const llvm::DataLayout &getDataLayout() const { return TheDataLayout; }
+  const llvm::DataLayout &getDataLayout() const {
+    return TheModule.getDataLayout();
+  }
   ASTContext &getContext() const { return Context; }
   const ABIInfo &getABIInfo() const { return TheABIInfo; }
   const TargetInfo &getTarget() const { return Target; }
@@ -178,6 +178,14 @@ public:
   /// ConvertType - Convert type T into a llvm::Type.
   llvm::Type *ConvertType(QualType T);
 
+  /// \brief Converts the GlobalDecl into an llvm::Type. This should be used
+  /// when we know the target of the function we want to convert.  This is
+  /// because some functions (explicitly, those with pass_object_size
+  /// parameters) may not have the same signature as their type portrays, and
+  /// can only be called directly.
+  llvm::Type *ConvertFunctionType(QualType FT,
+                                  const FunctionDecl *FD = nullptr);
+
   /// ConvertTypeForMem - Convert type T into a llvm::Type.  This differs from
   /// ConvertType in that it is used to convert to the memory representation for
   /// a type.  For example, the scalar representation for _Bool is i1, but the
@@ -264,11 +272,12 @@ public:
   const CGFunctionInfo &arrangeMSMemberPointerThunk(const CXXMethodDecl *MD);
   const CGFunctionInfo &arrangeMSCtorClosure(const CXXConstructorDecl *CD,
                                                  CXXCtorType CT);
-
-  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty);
+  const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionProtoType> Ty,
+                                                const FunctionDecl *FD);
   const CGFunctionInfo &arrangeFreeFunctionType(CanQual<FunctionNoProtoType> Ty);
   const CGFunctionInfo &arrangeCXXMethodType(const CXXRecordDecl *RD,
-                                             const FunctionProtoType *FTP);
+                                             const FunctionProtoType *FTP,
+                                             const CXXMethodDecl *MD);
 
   /// "Arrange" the LLVM information for a call or type with the given
   /// signature.  This is largely an internal method; other clients
diff --git a/lib/CodeGen/CoverageMappingGen.cpp b/lib/CodeGen/CoverageMappingGen.cpp
index eca91590e602..eb6edeac4427 100644
--- a/lib/CodeGen/CoverageMappingGen.cpp
+++ b/lib/CodeGen/CoverageMappingGen.cpp
@@ -47,17 +47,6 @@ public:
                       Optional<SourceLocation> LocEnd)
       : Count(Count), LocStart(LocStart), LocEnd(LocEnd) {}
 
-  SourceMappingRegion(SourceMappingRegion &&Region)
-      : Count(std::move(Region.Count)), LocStart(std::move(Region.LocStart)),
-        LocEnd(std::move(Region.LocEnd)) {}
-
-  SourceMappingRegion &operator=(SourceMappingRegion &&RHS) {
-    Count = std::move(RHS.Count);
-    LocStart = std::move(RHS.LocStart);
-    LocEnd = std::move(RHS.LocEnd);
-    return *this;
-  }
-
   const Counter &getCounter() const { return Count; }
 
   void setCounter(Counter C) { Count = C; }
@@ -66,7 +55,7 @@ public:
 
   void setStartLoc(SourceLocation Loc) { LocStart = Loc; }
 
-  const SourceLocation &getStartLoc() const {
+  SourceLocation getStartLoc() const {
     assert(LocStart && "Region has no start location");
     return *LocStart;
   }
@@ -75,7 +64,7 @@ public:
 
   void setEndLoc(SourceLocation Loc) { LocEnd = Loc; }
 
-  const SourceLocation &getEndLoc() const {
+  SourceLocation getEndLoc() const {
     assert(LocEnd && "Region has no end location");
     return *LocEnd;
   }
@@ -174,7 +163,7 @@ public:
 
       unsigned Depth = 0;
       for (SourceLocation Parent = getIncludeOrExpansionLoc(Loc);
-           !Parent.isInvalid(); Parent = getIncludeOrExpansionLoc(Parent))
+           Parent.isValid(); Parent = getIncludeOrExpansionLoc(Parent))
         ++Depth;
       FileLocs.push_back(std::make_pair(Loc, Depth));
     }
@@ -255,7 +244,7 @@ public:
       assert(Region.hasEndLoc() && "incomplete region");
 
       SourceLocation LocStart = Region.getStartLoc();
-      assert(!SM.getFileID(LocStart).isInvalid() && "region in invalid file");
+      assert(SM.getFileID(LocStart).isValid() && "region in invalid file");
 
       auto CovFileID = getCoverageFileID(LocStart);
       // Ignore regions that don't have a file, such as builtin macros.
@@ -413,8 +402,8 @@ struct CounterCoverageMappingBuilder
           SourceRegions.emplace_back(Region.getCounter(), NestedLoc, EndLoc);
 
           EndLoc = getPreciseTokenLocEnd(getIncludeOrExpansionLoc(EndLoc));
-          assert(!EndLoc.isInvalid() &&
-                 "File exit was not handled before popRegions");
+          if (EndLoc.isInvalid())
+            llvm::report_fatal_error("File exit not handled before popRegions");
         }
         Region.setEndLoc(EndLoc);
 
@@ -426,7 +415,7 @@ struct CounterCoverageMappingBuilder
           MostRecentLocation = getIncludeOrExpansionLoc(EndLoc);
 
         assert(SM.isWrittenInSameFile(Region.getStartLoc(), EndLoc));
-        SourceRegions.push_back(std::move(Region));
+        SourceRegions.push_back(Region);
       }
       RegionStack.pop_back();
     }
@@ -496,12 +485,12 @@ struct CounterCoverageMappingBuilder
 
     llvm::SmallSet<SourceLocation, 8> StartLocs;
     Optional<Counter> ParentCounter;
-    for (auto I = RegionStack.rbegin(), E = RegionStack.rend(); I != E; ++I) {
-      if (!I->hasStartLoc())
+    for (SourceMappingRegion &I : llvm::reverse(RegionStack)) {
+      if (!I.hasStartLoc())
         continue;
-      SourceLocation Loc = I->getStartLoc();
+      SourceLocation Loc = I.getStartLoc();
       if (!isNestedIn(Loc, ParentFile)) {
-        ParentCounter = I->getCounter();
+        ParentCounter = I.getCounter();
         break;
       }
 
@@ -510,11 +499,11 @@ struct CounterCoverageMappingBuilder
         // correct count. We avoid creating redundant regions by stopping once
         // we've seen this region.
         if (StartLocs.insert(Loc).second)
-          SourceRegions.emplace_back(I->getCounter(), Loc,
+          SourceRegions.emplace_back(I.getCounter(), Loc,
                                      getEndOfFileOrMacro(Loc));
         Loc = getIncludeOrExpansionLoc(Loc);
       }
-      I->setStartLoc(getPreciseTokenLocEnd(Loc));
+      I.setStartLoc(getPreciseTokenLocEnd(Loc));
     }
 
     if (ParentCounter) {
@@ -580,7 +569,7 @@ struct CounterCoverageMappingBuilder
   }
 
   void VisitStmt(const Stmt *S) {
-    if (!S->getLocStart().isInvalid())
+    if (S->getLocStart().isValid())
       extendRegion(S);
     for (const Stmt *Child : S->children())
       if (Child)
@@ -796,7 +785,7 @@ struct CounterCoverageMappingBuilder
     else
       pushRegion(Count, getStart(S));
 
-    if (const CaseStmt *CS = dyn_cast<CaseStmt>(S)) {
+    if (const auto *CS = dyn_cast<CaseStmt>(S)) {
       Visit(CS->getLHS());
       if (const Expr *RHS = CS->getRHS())
         Visit(RHS);
@@ -842,7 +831,6 @@ struct CounterCoverageMappingBuilder
   }
 
   void VisitCXXCatchStmt(const CXXCatchStmt *S) {
-    extendRegion(S);
     propagateCounts(getRegionCounter(S), S->getHandlerBlock());
   }
 
@@ -891,7 +879,7 @@ static bool isMachO(const CodeGenModule &CGM) {
 }
 
 static StringRef getCoverageSection(const CodeGenModule &CGM) {
-  return isMachO(CGM) ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+  return llvm::getInstrProfCoverageSectionName(isMachO(CGM));
 }
 
 static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
@@ -922,24 +910,23 @@ static void dump(llvm::raw_ostream &OS, StringRef FunctionName,
 }
 
 void CoverageMappingModuleGen::addFunctionMappingRecord(
-    llvm::GlobalVariable *FunctionName, StringRef FunctionNameValue,
-    uint64_t FunctionHash, const std::string &CoverageMapping) {
+    llvm::GlobalVariable *NamePtr, StringRef NameValue,
+    uint64_t FuncHash, const std::string &CoverageMapping) {
   llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-  auto *Int32Ty = llvm::Type::getInt32Ty(Ctx);
-  auto *Int64Ty = llvm::Type::getInt64Ty(Ctx);
-  auto *Int8PtrTy = llvm::Type::getInt8PtrTy(Ctx);
   if (!FunctionRecordTy) {
-    llvm::Type *FunctionRecordTypes[] = {Int8PtrTy, Int32Ty, Int32Ty, Int64Ty};
+    #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) LLVMType,
+    llvm::Type *FunctionRecordTypes[] = {
+      #include "llvm/ProfileData/InstrProfData.inc"
+    };
     FunctionRecordTy =
         llvm::StructType::get(Ctx, makeArrayRef(FunctionRecordTypes),
                               /*isPacked=*/true);
   }
 
+  #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Init,
   llvm::Constant *FunctionRecordVals[] = {
-      llvm::ConstantExpr::getBitCast(FunctionName, Int8PtrTy),
-      llvm::ConstantInt::get(Int32Ty, FunctionNameValue.size()),
-      llvm::ConstantInt::get(Int32Ty, CoverageMapping.size()),
-      llvm::ConstantInt::get(Int64Ty, FunctionHash)};
+      #include "llvm/ProfileData/InstrProfData.inc"
+  };
   FunctionRecords.push_back(llvm::ConstantStruct::get(
       FunctionRecordTy, makeArrayRef(FunctionRecordVals)));
   CoverageMappings += CoverageMapping;
@@ -961,7 +948,7 @@ void CoverageMappingModuleGen::addFunctionMappingRecord(
                                     Expressions, Regions);
     if (Reader.read())
       return;
-    dump(llvm::outs(), FunctionNameValue, Expressions, Regions);
+    dump(llvm::outs(), NameValue, Expressions, Regions);
   }
 }
 
@@ -1023,7 +1010,7 @@ void CoverageMappingModuleGen::emit() {
   auto CovData = new llvm::GlobalVariable(CGM.getModule(), CovDataTy, true,
                                           llvm::GlobalValue::InternalLinkage,
                                           CovDataVal,
-                                          "__llvm_coverage_mapping");
+                                          llvm::getCoverageMappingVarName());
 
   CovData->setSection(getCoverageSection(CGM));
   CovData->setAlignment(8);
diff --git a/lib/CodeGen/EHScopeStack.h b/lib/CodeGen/EHScopeStack.h
index a7951888c825..85cd1543e5bf 100644
--- a/lib/CodeGen/EHScopeStack.h
+++ b/lib/CodeGen/EHScopeStack.h
@@ -96,6 +96,9 @@ enum CleanupKind : unsigned {
 /// and catch blocks.
 class EHScopeStack {
 public:
+  /* Should switch to alignof(uint64_t) instead of 8, when EHCleanupScope can */
+  enum { ScopeStackAlignment = 8 };
+
   /// A saved depth on the scope stack.  This is necessary because
   /// pushing scopes onto the stack invalidates iterators.
   class stable_iterator {
@@ -141,7 +144,15 @@ public:
   class Cleanup {
     // Anchor the construction vtable.
     virtual void anchor();
+
+  protected:
+    ~Cleanup() = default;
+
   public:
+    Cleanup(const Cleanup &) = default;
+    Cleanup(Cleanup &&) {}
+    Cleanup() = default;
+
     /// Generation flags.
     class Flags {
       enum {
@@ -168,10 +179,6 @@ public:
       void setIsEHCleanupKind() { flags |= F_IsEHCleanupKind; }
     };
 
-    // Provide a virtual destructor to suppress a very common warning
-    // that unfortunately cannot be suppressed without this.  Cleanups
-    // should not rely on this destructor ever being called.
-    virtual ~Cleanup() {}
 
     /// Emit the cleanup.  For normal cleanups, this is run in the
     /// same EH context as when the cleanup was pushed, i.e. the
@@ -184,7 +191,8 @@ public:
 
   /// ConditionalCleanup stores the saved form of its parameters,
   /// then restores them and performs the cleanup.
-  template <class T, class... As> class ConditionalCleanup : public Cleanup {
+  template <class T, class... As>
+  class ConditionalCleanup final : public Cleanup {
     typedef std::tuple<typename DominatingValue<As>::saved_type...> SavedTuple;
     SavedTuple Saved;
 
@@ -248,6 +256,7 @@ private:
   SmallVector<BranchFixup, 8> BranchFixups;
 
   char *allocate(size_t Size);
+  void deallocate(size_t Size);
 
   void *pushCleanup(CleanupKind K, size_t DataSize);
 
@@ -259,6 +268,8 @@ public:
 
   /// Push a lazily-created cleanup on the stack.
   template <class T, class... As> void pushCleanup(CleanupKind Kind, As... A) {
+    static_assert(llvm::AlignOf<T>::Alignment <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
     void *Buffer = pushCleanup(Kind, sizeof(T));
     Cleanup *Obj = new (Buffer) T(A...);
     (void) Obj;
@@ -267,6 +278,8 @@ public:
   /// Push a lazily-created cleanup on the stack. Tuple version.
   template <class T, class... As>
   void pushCleanupTuple(CleanupKind Kind, std::tuple<As...> A) {
+    static_assert(llvm::AlignOf<T>::Alignment <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
     void *Buffer = pushCleanup(Kind, sizeof(T));
     Cleanup *Obj = new (Buffer) T(std::move(A));
     (void) Obj;
@@ -287,6 +300,8 @@ public:
   /// stack is modified.
   template <class T, class... As>
   T *pushCleanupWithExtra(CleanupKind Kind, size_t N, As... A) {
+    static_assert(llvm::AlignOf<T>::Alignment <= ScopeStackAlignment,
+                  "Cleanup's alignment is too large.");
     void *Buffer = pushCleanup(Kind, sizeof(T) + T::getExtraSize(N));
     return new (Buffer) T(N, A...);
   }
@@ -346,7 +361,6 @@ public:
     return InnermostEHScope;
   }
 
-  stable_iterator getInnermostActiveEHScope() const;
 
   /// An unstable reference to a scope-stack depth.  Invalidated by
   /// pushes but not pops.
@@ -377,9 +391,6 @@ public:
   /// to the EH stack.
   iterator find(stable_iterator save) const;
 
-  /// Removes the cleanup pointed to by the given stable_iterator.
-  void removeCleanup(stable_iterator save);
-
   /// Add a branch fixup to the current cleanup scope.
   BranchFixup &addBranchFixup() {
     assert(hasNormalCleanups() && "adding fixup in scope without cleanups");
diff --git a/lib/CodeGen/ItaniumCXXABI.cpp b/lib/CodeGen/ItaniumCXXABI.cpp
index 2be9ceb1d637..0c4008f8ee78 100644
--- a/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/lib/CodeGen/ItaniumCXXABI.cpp
@@ -69,6 +69,45 @@ public:
     return RAA_Default;
   }
 
+  bool isThisCompleteObject(GlobalDecl GD) const override {
+    // The Itanium ABI has separate complete-object vs.  base-object
+    // variants of both constructors and destructors.
+    if (isa<CXXDestructorDecl>(GD.getDecl())) {
+      switch (GD.getDtorType()) {
+      case Dtor_Complete:
+      case Dtor_Deleting:
+        return true;
+
+      case Dtor_Base:
+        return false;
+
+      case Dtor_Comdat:
+        llvm_unreachable("emitting dtor comdat as function?");
+      }
+      llvm_unreachable("bad dtor kind");
+    }
+    if (isa<CXXConstructorDecl>(GD.getDecl())) {
+      switch (GD.getCtorType()) {
+      case Ctor_Complete:
+        return true;
+
+      case Ctor_Base:
+        return false;
+
+      case Ctor_CopyingClosure:
+      case Ctor_DefaultClosure:
+        llvm_unreachable("closure ctors in Itanium ABI?");
+
+      case Ctor_Comdat:
+        llvm_unreachable("emitting ctor comdat as function?");
+      }
+      llvm_unreachable("bad dtor kind");
+    }
+
+    // No other kinds.
+    return false;
+  }
+
   bool isZeroInitializable(const MemberPointerType *MPT) override;
 
   llvm::Type *ConvertMemberPointerType(const MemberPointerType *MPT) override;
@@ -76,13 +115,14 @@ public:
   llvm::Value *
     EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF,
                                     const Expr *E,
-                                    llvm::Value *&This,
+                                    Address This,
+                                    llvm::Value *&ThisPtrForCall,
                                     llvm::Value *MemFnPtr,
                                     const MemberPointerType *MPT) override;
 
   llvm::Value *
     EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                                 llvm::Value *Base,
+                                 Address Base,
                                  llvm::Value *MemPtr,
                                  const MemberPointerType *MPT) override;
 
@@ -111,9 +151,22 @@ public:
                                          const MemberPointerType *MPT) override;
 
   void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
-                               llvm::Value *Ptr, QualType ElementType,
+                               Address Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
+  /// Itanium says that an _Unwind_Exception has to be "double-word"
+  /// aligned (and thus the end of it is also so-aligned), meaning 16
+  /// bytes.  Of course, that was written for the actual Itanium,
+  /// which is a 64-bit platform.  Classically, the ABI doesn't really
+  /// specify the alignment on other platforms, but in practice
+  /// libUnwind declares the struct with __attribute__((aligned)), so
+  /// we assume that alignment here.  (It's generally 16 bytes, but
+  /// some targets overwrite it.)
+  CharUnits getAlignmentOfExnObject() {
+    auto align = CGM.getContext().getTargetDefaultAlignForAttributeAligned();
+    return CGM.getContext().toCharUnitsFromBits(align);
+  }
+
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
   void emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) override;
 
@@ -126,34 +179,34 @@ public:
   void EmitFundamentalRTTIDescriptor(QualType Type);
   void EmitFundamentalRTTIDescriptors();
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
-  llvm::Constant *
+  CatchTypeInfo
   getAddrOfCXXCatchHandlerType(QualType Ty,
                                QualType CatchHandlerType) override {
-    return getAddrOfRTTIDescriptor(Ty);
+    return CatchTypeInfo{getAddrOfRTTIDescriptor(Ty), 0};
   }
 
   bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
   void EmitBadTypeidCall(CodeGenFunction &CGF) override;
   llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
-                          llvm::Value *ThisPtr,
+                          Address ThisPtr,
                           llvm::Type *StdTypeInfoPtrTy) override;
 
   bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
                                           QualType SrcRecordTy) override;
 
-  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
                                    QualType SrcRecordTy, QualType DestTy,
                                    QualType DestRecordTy,
                                    llvm::BasicBlock *CastEnd) override;
 
-  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                      QualType SrcRecordTy,
                                      QualType DestTy) override;
 
   bool EmitBadCastCall(CodeGenFunction &CGF) override;
 
   llvm::Value *
-    GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+    GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
                               const CXXRecordDecl *ClassDecl,
                               const CXXRecordDecl *BaseClassDecl) override;
 
@@ -185,15 +238,29 @@ public:
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, llvm::Value *This) override;
+                          bool Delegating, Address This) override;
 
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
 
+  bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
+                                           CodeGenFunction::VPtr Vptr) override;
+
+  bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
+    return true;
+  }
+
+  llvm::Constant *
+  getVTableAddressPoint(BaseSubobject Base,
+                        const CXXRecordDecl *VTableClass) override;
+
   llvm::Value *getVTableAddressPointInStructor(
       CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
-      BaseSubobject Base, const CXXRecordDecl *NearestVBase,
-      bool &NeedsVirtualOffset) override;
+      BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;
+
+  llvm::Value *getVTableAddressPointInStructorWithVTT(
+      CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
+      BaseSubobject Base, const CXXRecordDecl *NearestVBase);
 
   llvm::Constant *
   getVTableAddressPointForConstExpr(BaseSubobject Base,
@@ -203,18 +270,19 @@ public:
                                         CharUnits VPtrOffset) override;
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
-                                         llvm::Value *This,
-                                         llvm::Type *Ty,
+                                         Address This, llvm::Type *Ty,
                                          SourceLocation Loc) override;
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
                                          CXXDtorType DtorType,
-                                         llvm::Value *This,
+                                         Address This,
                                          const CXXMemberCallExpr *CE) override;
 
   void emitVirtualInheritanceTables(const CXXRecordDecl *RD) override;
 
+  bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override;
+
   void setThunkLinkage(llvm::Function *Thunk, bool ForVTable, GlobalDecl GD,
                        bool ReturnAdjustment) override {
     // Allow inlining of thunks by emitting them with available_externally
@@ -223,10 +291,10 @@ public:
       Thunk->setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
   }
 
-  llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
+  llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
                                      const ThisAdjustment &TA) override;
 
-  llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
+  llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
                                        const ReturnAdjustment &RA) override;
 
   size_t getSrcArgforCopyCtor(const CXXConstructorDecl *,
@@ -240,13 +308,13 @@ public:
     { return "__cxa_deleted_virtual"; }
 
   CharUnits getArrayCookieSizeImpl(QualType elementType) override;
-  llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
-                                     llvm::Value *NewPtr,
-                                     llvm::Value *NumElements,
-                                     const CXXNewExpr *expr,
-                                     QualType ElementType) override;
+  Address InitializeArrayCookie(CodeGenFunction &CGF,
+                                Address NewPtr,
+                                llvm::Value *NumElements,
+                                const CXXNewExpr *expr,
+                                QualType ElementType) override;
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
-                                   llvm::Value *allocPtr,
+                                   Address allocPtr,
                                    CharUnits cookieSize) override;
 
   void EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
@@ -259,10 +327,9 @@ public:
                                                 llvm::Value *Val);
   void EmitThreadLocalInitFuncs(
       CodeGenModule &CGM,
-      ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *>>
-          CXXThreadLocals,
+      ArrayRef<const VarDecl *> CXXThreadLocals,
       ArrayRef<llvm::Function *> CXXThreadLocalInits,
-      ArrayRef<llvm::GlobalVariable *> CXXThreadLocalInitVars) override;
+      ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
 
   bool usesThreadWrapperFunction() const override { return true; }
   LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
@@ -302,6 +369,41 @@ public:
   friend class ItaniumRTTIBuilder;
 
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
+
+ private:
+   bool hasAnyUsedVirtualInlineFunction(const CXXRecordDecl *RD) const {
+    const auto &VtableLayout =
+        CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+    for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+      if (!VtableComponent.isUsedFunctionPointerKind())
+        continue;
+
+      const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+      if (Method->getCanonicalDecl()->isInlined())
+        return true;
+    }
+    return false;
+  }
+
+  bool isVTableHidden(const CXXRecordDecl *RD) const {
+    const auto &VtableLayout =
+            CGM.getItaniumVTableContext().getVTableLayout(RD);
+
+    for (const auto &VtableComponent : VtableLayout.vtable_components()) {
+      if (VtableComponent.isRTTIKind()) {
+        const CXXRecordDecl *RTTIDecl = VtableComponent.getRTTIDecl();
+        if (RTTIDecl->getVisibility() == Visibility::HiddenVisibility)
+          return true;
+      } else if (VtableComponent.isUsedFunctionPointerKind()) {
+        const CXXMethodDecl *Method = VtableComponent.getFunctionDecl();
+        if (Method->getVisibility() == Visibility::HiddenVisibility &&
+            !Method->isDefined())
+          return true;
+      }
+    }
+    return false;
+  }
 };
 
 class ARMCXXABI : public ItaniumCXXABI {
@@ -320,12 +422,12 @@ public:
                            QualType ResTy) override;
 
   CharUnits getArrayCookieSizeImpl(QualType elementType) override;
-  llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
-                                     llvm::Value *NewPtr,
-                                     llvm::Value *NumElements,
-                                     const CXXNewExpr *expr,
-                                     QualType ElementType) override;
-  llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, llvm::Value *allocPtr,
+  Address InitializeArrayCookie(CodeGenFunction &CGF,
+                                Address NewPtr,
+                                llvm::Value *NumElements,
+                                const CXXNewExpr *expr,
+                                QualType ElementType) override;
+  llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF, Address allocPtr,
                                    CharUnits cookieSize) override;
 };
 
@@ -336,6 +438,20 @@ public:
   // ARM64 libraries are prepared for non-unique RTTI.
   bool shouldRTTIBeUnique() const override { return false; }
 };
+
+class WebAssemblyCXXABI final : public ItaniumCXXABI {
+public:
+  explicit WebAssemblyCXXABI(CodeGen::CodeGenModule &CGM)
+      : ItaniumCXXABI(CGM, /*UseARMMethodPtrABI=*/true,
+                      /*UseARMGuardVarABI=*/true) {}
+
+private:
+  bool HasThisReturn(GlobalDecl GD) const override {
+    return isa<CXXConstructorDecl>(GD.getDecl()) ||
+           (isa<CXXDestructorDecl>(GD.getDecl()) &&
+            GD.getDtorType() != Dtor_Deleting);
+  }
+};
 }
 
 CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
@@ -344,6 +460,7 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
   // between the ARM and iOS ABIs.
   case TargetCXXABI::GenericARM:
   case TargetCXXABI::iOS:
+  case TargetCXXABI::WatchOS:
     return new ARMCXXABI(CGM);
 
   case TargetCXXABI::iOS64:
@@ -359,6 +476,9 @@ CodeGen::CGCXXABI *CodeGen::CreateItaniumCXXABI(CodeGenModule &CGM) {
   case TargetCXXABI::GenericMIPS:
     return new ItaniumCXXABI(CGM, /* UseARMMethodPtrABI = */ true);
 
+  case TargetCXXABI::WebAssembly:
+    return new WebAssemblyCXXABI(CGM);
+
   case TargetCXXABI::GenericItanium:
     if (CGM.getContext().getTargetInfo().getTriple().getArch()
         == llvm::Triple::le32) {
@@ -404,7 +524,8 @@ ItaniumCXXABI::ConvertMemberPointerType(const MemberPointerType *MPT) {
 /// If the member is non-virtual, memptr.ptr is the address of
 /// the function to call.
 llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
-    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
+    CodeGenFunction &CGF, const Expr *E, Address ThisAddr,
+    llvm::Value *&ThisPtrForCall,
     llvm::Value *MemFnPtr, const MemberPointerType *MPT) {
   CGBuilderTy &Builder = CGF.Builder;
 
@@ -413,9 +534,8 @@ llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
   const CXXRecordDecl *RD = 
     cast<CXXRecordDecl>(MPT->getClass()->getAs<RecordType>()->getDecl());
 
-  llvm::FunctionType *FTy = 
-    CGM.getTypes().GetFunctionType(
-      CGM.getTypes().arrangeCXXMethodType(RD, FPT));
+  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
+      CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
 
   llvm::Constant *ptrdiff_1 = llvm::ConstantInt::get(CGM.PtrDiffTy, 1);
 
@@ -433,9 +553,11 @@ llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 
   // Apply the adjustment and cast back to the original struct type
   // for consistency.
+  llvm::Value *This = ThisAddr.getPointer();
   llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy());
   Ptr = Builder.CreateInBoundsGEP(Ptr, Adj);
   This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
+  ThisPtrForCall = This;
   
   // Load the function pointer.
   llvm::Value *FnAsInt = Builder.CreateExtractValue(MemFnPtr, 0, "memptr.ptr");
@@ -457,7 +579,11 @@ llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 
   // Cast the adjusted this to a pointer to vtable pointer and load.
   llvm::Type *VTableTy = Builder.getInt8PtrTy();
-  llvm::Value *VTable = CGF.GetVTablePtr(This, VTableTy);
+  CharUnits VTablePtrAlign =
+    CGF.CGM.getDynamicOffsetAlignment(ThisAddr.getAlignment(), RD,
+                                      CGF.getPointerAlign());
+  llvm::Value *VTable =
+    CGF.GetVTablePtr(Address(This, VTablePtrAlign), VTableTy, RD);
 
   // Apply the offset.
   llvm::Value *VTableOffset = FnAsInt;
@@ -467,7 +593,9 @@ llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 
   // Load the virtual function to call.
   VTable = Builder.CreateBitCast(VTable, FTy->getPointerTo()->getPointerTo());
-  llvm::Value *VirtualFn = Builder.CreateLoad(VTable, "memptr.virtualfn");
+  llvm::Value *VirtualFn =
+    Builder.CreateAlignedLoad(VTable, CGF.getPointerAlign(),
+                              "memptr.virtualfn");
   CGF.EmitBranch(FnEnd);
 
   // In the non-virtual path, the function pointer is actually a
@@ -487,24 +615,23 @@ llvm::Value *ItaniumCXXABI::EmitLoadOfMemberFunctionPointer(
 /// Compute an l-value by applying the given pointer-to-member to a
 /// base object.
 llvm::Value *ItaniumCXXABI::EmitMemberDataPointerAddress(
-    CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+    CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
     const MemberPointerType *MPT) {
   assert(MemPtr->getType() == CGM.PtrDiffTy);
 
   CGBuilderTy &Builder = CGF.Builder;
 
-  unsigned AS = Base->getType()->getPointerAddressSpace();
-
   // Cast to char*.
-  Base = Builder.CreateBitCast(Base, Builder.getInt8Ty()->getPointerTo(AS));
+  Base = Builder.CreateElementBitCast(Base, CGF.Int8Ty);
 
   // Apply the offset, which we assume is non-null.
-  llvm::Value *Addr = Builder.CreateInBoundsGEP(Base, MemPtr, "memptr.offset");
+  llvm::Value *Addr =
+    Builder.CreateInBoundsGEP(Base.getPointer(), MemPtr, "memptr.offset");
 
   // Cast the address to the appropriate pointer type, adopting the
   // address space of the base pointer.
-  llvm::Type *PType
-    = CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
+  llvm::Type *PType = CGF.ConvertTypeForMem(MPT->getPointeeType())
+                            ->getPointerTo(Base.getAddressSpace());
   return Builder.CreateBitCast(Addr, PType);
 }
 
@@ -858,7 +985,8 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
   // FIXME: Use canCopyArgument() when it is fixed to handle lazily declared
   // special members.
   if (RD->hasNonTrivialDestructor() || RD->hasNonTrivialCopyConstructor()) {
-    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
     return true;
   }
   return false;
@@ -874,7 +1002,7 @@ bool ItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) {
 /// at entry -2 in the vtable.
 void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
                                             const CXXDeleteExpr *DE,
-                                            llvm::Value *Ptr,
+                                            Address Ptr,
                                             QualType ElementType,
                                             const CXXDestructorDecl *Dtor) {
   bool UseGlobalDelete = DE->isGlobalDelete();
@@ -883,16 +1011,20 @@ void ItaniumCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
     // to pass to the deallocation function.
 
     // Grab the vtable pointer as an intptr_t*.
-    llvm::Value *VTable = CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo());
+    auto *ClassDecl =
+        cast<CXXRecordDecl>(ElementType->getAs<RecordType>()->getDecl());
+    llvm::Value *VTable =
+        CGF.GetVTablePtr(Ptr, CGF.IntPtrTy->getPointerTo(), ClassDecl);
 
     // Track back to entry -2 and pull out the offset there.
     llvm::Value *OffsetPtr = CGF.Builder.CreateConstInBoundsGEP1_64(
         VTable, -2, "complete-offset.ptr");
-    llvm::LoadInst *Offset = CGF.Builder.CreateLoad(OffsetPtr);
-    Offset->setAlignment(CGF.PointerAlignInBytes);
+    llvm::Value *Offset =
+      CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
 
     // Apply the offset.
-    llvm::Value *CompletePtr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+    llvm::Value *CompletePtr =
+      CGF.Builder.CreateBitCast(Ptr.getPointer(), CGF.Int8PtrTy);
     CompletePtr = CGF.Builder.CreateInBoundsGEP(CompletePtr, Offset);
 
     // If we're supposed to call the global delete, make sure we do so
@@ -954,7 +1086,8 @@ void ItaniumCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
   llvm::CallInst *ExceptionPtr = CGF.EmitNounwindRuntimeCall(
       AllocExceptionFn, llvm::ConstantInt::get(SizeTy, TypeSize), "exception");
 
-  CGF.EmitAnyExprToExn(E->getSubExpr(), ExceptionPtr);
+  CharUnits ExnAlign = getAlignmentOfExnObject();
+  CGF.EmitAnyExprToExn(E->getSubExpr(), Address(ExceptionPtr, ExnAlign));
 
   // Now throw the exception.
   llvm::Constant *TypeInfo = CGM.GetAddrOfRTTIDescriptor(ThrowType,
@@ -1023,25 +1156,25 @@ static CharUnits computeOffsetHint(ASTContext &Context,
   CharUnits Offset;
 
   // Now walk all possible inheritance paths.
-  for (CXXBasePaths::paths_iterator I = Paths.begin(), E = Paths.end(); I != E;
-       ++I) {
-    if (I->Access != AS_public) // Ignore non-public inheritance.
+  for (const CXXBasePath &Path : Paths) {
+    if (Path.Access != AS_public)  // Ignore non-public inheritance.
       continue;
 
     ++NumPublicPaths;
 
-    for (CXXBasePath::iterator J = I->begin(), JE = I->end(); J != JE; ++J) {
+    for (const CXXBasePathElement &PathElement : Path) {
       // If the path contains a virtual base class we can't give any hint.
       // -1: no hint.
-      if (J->Base->isVirtual())
+      if (PathElement.Base->isVirtual())
         return CharUnits::fromQuantity(-1ULL);
 
       if (NumPublicPaths > 1) // Won't use offsets, skip computation.
         continue;
 
       // Accumulate the base class offsets.
-      const ASTRecordLayout &L = Context.getASTRecordLayout(J->Class);
-      Offset += L.getBaseClassOffset(J->Base->getType()->getAsCXXRecordDecl());
+      const ASTRecordLayout &L = Context.getASTRecordLayout(PathElement.Class);
+      Offset += L.getBaseClassOffset(
+          PathElement.Base->getType()->getAsCXXRecordDecl());
     }
   }
 
@@ -1078,14 +1211,16 @@ void ItaniumCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
 
 llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF,
                                        QualType SrcRecordTy,
-                                       llvm::Value *ThisPtr,
+                                       Address ThisPtr,
                                        llvm::Type *StdTypeInfoPtrTy) {
+  auto *ClassDecl =
+      cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
   llvm::Value *Value =
-      CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo());
+      CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo(), ClassDecl);
 
   // Load the type info.
   Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL);
-  return CGF.Builder.CreateLoad(Value);
+  return CGF.Builder.CreateAlignedLoad(Value, CGF.getPointerAlign());
 }
 
 bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
@@ -1094,7 +1229,7 @@ bool ItaniumCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
 }
 
 llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
-    CodeGenFunction &CGF, llvm::Value *Value, QualType SrcRecordTy,
+    CodeGenFunction &CGF, Address ThisAddr, QualType SrcRecordTy,
     QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
   llvm::Type *PtrDiffLTy =
       CGF.ConvertType(CGF.getContext().getPointerDiffType());
@@ -1113,6 +1248,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
       computeOffsetHint(CGF.getContext(), SrcDecl, DestDecl).getQuantity());
 
   // Emit the call to __dynamic_cast.
+  llvm::Value *Value = ThisAddr.getPointer();
   Value = CGF.EmitCastToVoidPtr(Value);
 
   llvm::Value *args[] = {Value, SrcRTTI, DestRTTI, OffsetHint};
@@ -1136,22 +1272,28 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastCall(
 }
 
 llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF,
-                                                  llvm::Value *Value,
+                                                  Address ThisAddr,
                                                   QualType SrcRecordTy,
                                                   QualType DestTy) {
   llvm::Type *PtrDiffLTy =
       CGF.ConvertType(CGF.getContext().getPointerDiffType());
   llvm::Type *DestLTy = CGF.ConvertType(DestTy);
 
+  auto *ClassDecl =
+      cast<CXXRecordDecl>(SrcRecordTy->getAs<RecordType>()->getDecl());
   // Get the vtable pointer.
-  llvm::Value *VTable = CGF.GetVTablePtr(Value, PtrDiffLTy->getPointerTo());
+  llvm::Value *VTable = CGF.GetVTablePtr(ThisAddr, PtrDiffLTy->getPointerTo(),
+      ClassDecl);
 
   // Get the offset-to-top from the vtable.
   llvm::Value *OffsetToTop =
       CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL);
-  OffsetToTop = CGF.Builder.CreateLoad(OffsetToTop, "offset.to.top");
+  OffsetToTop =
+    CGF.Builder.CreateAlignedLoad(OffsetToTop, CGF.getPointerAlign(),
+                                  "offset.to.top");
 
   // Finally, add the offset to the pointer.
+  llvm::Value *Value = ThisAddr.getPointer();
   Value = CGF.EmitCastToVoidPtr(Value);
   Value = CGF.Builder.CreateInBoundsGEP(Value, OffsetToTop);
 
@@ -1167,10 +1309,10 @@ bool ItaniumCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
 
 llvm::Value *
 ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
-                                         llvm::Value *This,
+                                         Address This,
                                          const CXXRecordDecl *ClassDecl,
                                          const CXXRecordDecl *BaseClassDecl) {
-  llvm::Value *VTablePtr = CGF.GetVTablePtr(This, CGM.Int8PtrTy);
+  llvm::Value *VTablePtr = CGF.GetVTablePtr(This, CGM.Int8PtrTy, ClassDecl);
   CharUnits VBaseOffsetOffset =
       CGM.getItaniumVTableContext().getVirtualBaseOffsetOffset(ClassDecl,
                                                                BaseClassDecl);
@@ -1182,7 +1324,8 @@ ItaniumCXXABI::GetVirtualBaseClassOffset(CodeGenFunction &CGF,
                                              CGM.PtrDiffTy->getPointerTo());
 
   llvm::Value *VBaseOffset =
-    CGF.Builder.CreateLoad(VBaseOffsetPtr, "vbase.offset");
+    CGF.Builder.CreateAlignedLoad(VBaseOffsetPtr, CGF.getPointerAlign(),
+                                  "vbase.offset");
 
   return VBaseOffset;
 }
@@ -1293,7 +1436,7 @@ unsigned ItaniumCXXABI::addImplicitConstructorArgs(
 void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                        const CXXDestructorDecl *DD,
                                        CXXDtorType Type, bool ForVirtualBase,
-                                       bool Delegating, llvm::Value *This) {
+                                       bool Delegating, Address This) {
   GlobalDecl GD(DD, Type);
   llvm::Value *VTT = CGF.GetVTTParameter(GD, ForVirtualBase, Delegating);
   QualType VTTTy = getContext().getPointerType(getContext().VoidPtrTy);
@@ -1305,8 +1448,8 @@ void ItaniumCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
   if (!Callee)
     Callee = CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type));
 
-  CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(), This, VTT,
-                                  VTTTy, nullptr);
+  CGF.EmitCXXMemberOrOperatorCall(DD, Callee, ReturnValueSlot(),
+                                  This.getPointer(), VTT, VTTTy, nullptr);
 }
 
 void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1356,41 +1499,29 @@ void ItaniumCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
   CGM.EmitVTableBitSetEntries(VTable, VTLayout);
 }
 
-llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
-    CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
-    const CXXRecordDecl *NearestVBase, bool &NeedsVirtualOffset) {
-  bool NeedsVTTParam = CGM.getCXXABI().NeedsVTTParameter(CGF.CurGD);
-  NeedsVirtualOffset = (NeedsVTTParam && NearestVBase);
-
-  llvm::Value *VTableAddressPoint;
-  if (NeedsVTTParam && (Base.getBase()->getNumVBases() || NearestVBase)) {
-    // Get the secondary vpointer index.
-    uint64_t VirtualPointerIndex =
-        CGM.getVTables().getSecondaryVirtualPointerIndex(VTableClass, Base);
-
-    /// Load the VTT.
-    llvm::Value *VTT = CGF.LoadCXXVTT();
-    if (VirtualPointerIndex)
-      VTT = CGF.Builder.CreateConstInBoundsGEP1_64(VTT, VirtualPointerIndex);
-
-    // And load the address point from the VTT.
-    VTableAddressPoint = CGF.Builder.CreateLoad(VTT);
-  } else {
-    llvm::Constant *VTable =
-        CGM.getCXXABI().getAddrOfVTable(VTableClass, CharUnits());
-    uint64_t AddressPoint = CGM.getItaniumVTableContext()
-                                .getVTableLayout(VTableClass)
-                                .getAddressPoint(Base);
-    VTableAddressPoint =
-        CGF.Builder.CreateConstInBoundsGEP2_64(VTable, 0, AddressPoint);
-  }
-
-  return VTableAddressPoint;
+bool ItaniumCXXABI::isVirtualOffsetNeededForVTableField(
+    CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
+  if (Vptr.NearestVBase == nullptr)
+    return false;
+  return NeedsVTTParameter(CGF.CurGD);
 }
 
-llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
-    BaseSubobject Base, const CXXRecordDecl *VTableClass) {
-  auto *VTable = getAddrOfVTable(VTableClass, CharUnits());
+llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructor(
+    CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
+    const CXXRecordDecl *NearestVBase) {
+
+  if ((Base.getBase()->getNumVBases() || NearestVBase != nullptr) &&
+      NeedsVTTParameter(CGF.CurGD)) {
+    return getVTableAddressPointInStructorWithVTT(CGF, VTableClass, Base,
+                                                  NearestVBase);
+  }
+  return getVTableAddressPoint(Base, VTableClass);
+}
+
+llvm::Constant *
+ItaniumCXXABI::getVTableAddressPoint(BaseSubobject Base,
+                                     const CXXRecordDecl *VTableClass) {
+  llvm::GlobalValue *VTable = getAddrOfVTable(VTableClass, CharUnits());
 
   // Find the appropriate vtable within the vtable group.
   uint64_t AddressPoint = CGM.getItaniumVTableContext()
@@ -1405,6 +1536,30 @@ llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
                                                       VTable, Indices);
 }
 
+llvm::Value *ItaniumCXXABI::getVTableAddressPointInStructorWithVTT(
+    CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
+    const CXXRecordDecl *NearestVBase) {
+  assert((Base.getBase()->getNumVBases() || NearestVBase != nullptr) &&
+         NeedsVTTParameter(CGF.CurGD) && "This class doesn't have VTT");
+
+  // Get the secondary vpointer index.
+  uint64_t VirtualPointerIndex =
+      CGM.getVTables().getSecondaryVirtualPointerIndex(VTableClass, Base);
+
+  /// Load the VTT.
+  llvm::Value *VTT = CGF.LoadCXXVTT();
+  if (VirtualPointerIndex)
+    VTT = CGF.Builder.CreateConstInBoundsGEP1_64(VTT, VirtualPointerIndex);
+
+  // And load the address point from the VTT.
+  return CGF.Builder.CreateAlignedLoad(VTT, CGF.getPointerAlign());
+}
+
+llvm::Constant *ItaniumCXXABI::getVTableAddressPointForConstExpr(
+    BaseSubobject Base, const CXXRecordDecl *VTableClass) {
+  return getVTableAddressPoint(Base, VTableClass);
+}
+
 llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
                                                      CharUnits VPtrOffset) {
   assert(VPtrOffset.isZero() && "Itanium ABI only supports zero vptr offsets");
@@ -1416,11 +1571,9 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   // Queue up this v-table for possible deferred emission.
   CGM.addDeferredVTable(RD);
 
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
   getMangleContext().mangleCXXVTable(RD, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
 
   ItaniumVTableContext &VTContext = CGM.getItaniumVTableContext();
   llvm::ArrayType *ArrayType = llvm::ArrayType::get(
@@ -1440,26 +1593,27 @@ llvm::GlobalVariable *ItaniumCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
 
 llvm::Value *ItaniumCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                       GlobalDecl GD,
-                                                      llvm::Value *This,
+                                                      Address This,
                                                       llvm::Type *Ty,
                                                       SourceLocation Loc) {
   GD = GD.getCanonicalDecl();
   Ty = Ty->getPointerTo()->getPointerTo();
-  llvm::Value *VTable = CGF.GetVTablePtr(This, Ty);
+  auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
+  llvm::Value *VTable = CGF.GetVTablePtr(This, Ty, MethodDecl->getParent());
 
   if (CGF.SanOpts.has(SanitizerKind::CFIVCall))
-    CGF.EmitVTablePtrCheckForCall(cast<CXXMethodDecl>(GD.getDecl()), VTable,
+    CGF.EmitVTablePtrCheckForCall(MethodDecl, VTable,
                                   CodeGenFunction::CFITCK_VCall, Loc);
 
   uint64_t VTableIndex = CGM.getItaniumVTableContext().getMethodVTableIndex(GD);
   llvm::Value *VFuncPtr =
       CGF.Builder.CreateConstInBoundsGEP1_64(VTable, VTableIndex, "vfn");
-  return CGF.Builder.CreateLoad(VFuncPtr);
+  return CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
 }
 
 llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    llvm::Value *This, const CXXMemberCallExpr *CE) {
+    Address This, const CXXMemberCallExpr *CE) {
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1470,8 +1624,9 @@ llvm::Value *ItaniumCXXABI::EmitVirtualDestructorCall(
       getVirtualFunctionPointer(CGF, GlobalDecl(Dtor, DtorType), This, Ty,
                                 CE ? CE->getLocStart() : SourceLocation());
 
-  CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(), This,
-                                  /*ImplicitParam=*/nullptr, QualType(), CE);
+  CGF.EmitCXXMemberOrOperatorCall(Dtor, Callee, ReturnValueSlot(),
+                                  This.getPointer(), /*ImplicitParam=*/nullptr,
+                                  QualType(), CE);
   return nullptr;
 }
 
@@ -1481,30 +1636,41 @@ void ItaniumCXXABI::emitVirtualInheritanceTables(const CXXRecordDecl *RD) {
   VTables.EmitVTTDefinition(VTT, CGM.getVTableLinkage(RD), RD);
 }
 
+bool ItaniumCXXABI::canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const {
+  // We don't emit available_externally vtables if we are in -fapple-kext mode
+  // because kext mode does not permit devirtualization.
+  if (CGM.getLangOpts().AppleKext)
+    return false;
+
+  // If we don't have any inline virtual functions, and if vtable is not hidden,
+  // then we are safe to emit available_externally copy of vtable.
+  // FIXME we can still emit a copy of the vtable if we
+  // can emit definition of the inline functions.
+  return !hasAnyUsedVirtualInlineFunction(RD) && !isVTableHidden(RD);
+}
 static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
-                                          llvm::Value *Ptr,
+                                          Address InitialPtr,
                                           int64_t NonVirtualAdjustment,
                                           int64_t VirtualAdjustment,
                                           bool IsReturnAdjustment) {
   if (!NonVirtualAdjustment && !VirtualAdjustment)
-    return Ptr;
+    return InitialPtr.getPointer();
 
-  llvm::Type *Int8PtrTy = CGF.Int8PtrTy;
-  llvm::Value *V = CGF.Builder.CreateBitCast(Ptr, Int8PtrTy);
+  Address V = CGF.Builder.CreateElementBitCast(InitialPtr, CGF.Int8Ty);
 
+  // In a base-to-derived cast, the non-virtual adjustment is applied first.
   if (NonVirtualAdjustment && !IsReturnAdjustment) {
-    // Perform the non-virtual adjustment for a base-to-derived cast.
-    V = CGF.Builder.CreateConstInBoundsGEP1_64(V, NonVirtualAdjustment);
+    V = CGF.Builder.CreateConstInBoundsByteGEP(V,
+                              CharUnits::fromQuantity(NonVirtualAdjustment));
   }
 
+  // Perform the virtual adjustment if we have one.
+  llvm::Value *ResultPtr;
   if (VirtualAdjustment) {
     llvm::Type *PtrDiffTy =
         CGF.ConvertType(CGF.getContext().getPointerDiffType());
 
-    // Perform the virtual adjustment.
-    llvm::Value *VTablePtrPtr =
-        CGF.Builder.CreateBitCast(V, Int8PtrTy->getPointerTo());
-
+    Address VTablePtrPtr = CGF.Builder.CreateElementBitCast(V, CGF.Int8PtrTy);
     llvm::Value *VTablePtr = CGF.Builder.CreateLoad(VTablePtrPtr);
 
     llvm::Value *OffsetPtr =
@@ -1513,23 +1679,28 @@ static llvm::Value *performTypeAdjustment(CodeGenFunction &CGF,
     OffsetPtr = CGF.Builder.CreateBitCast(OffsetPtr, PtrDiffTy->getPointerTo());
 
     // Load the adjustment offset from the vtable.
-    llvm::Value *Offset = CGF.Builder.CreateLoad(OffsetPtr);
+    llvm::Value *Offset =
+      CGF.Builder.CreateAlignedLoad(OffsetPtr, CGF.getPointerAlign());
 
     // Adjust our pointer.
-    V = CGF.Builder.CreateInBoundsGEP(V, Offset);
+    ResultPtr = CGF.Builder.CreateInBoundsGEP(V.getPointer(), Offset);
+  } else {
+    ResultPtr = V.getPointer();
   }
 
+  // In a derived-to-base conversion, the non-virtual adjustment is
+  // applied second.
   if (NonVirtualAdjustment && IsReturnAdjustment) {
-    // Perform the non-virtual adjustment for a derived-to-base cast.
-    V = CGF.Builder.CreateConstInBoundsGEP1_64(V, NonVirtualAdjustment);
+    ResultPtr = CGF.Builder.CreateConstInBoundsGEP1_64(ResultPtr,
+                                                       NonVirtualAdjustment);
   }
 
   // Cast back to the original type.
-  return CGF.Builder.CreateBitCast(V, Ptr->getType());
+  return CGF.Builder.CreateBitCast(ResultPtr, InitialPtr.getType());
 }
 
 llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF,
-                                                  llvm::Value *This,
+                                                  Address This,
                                                   const ThisAdjustment &TA) {
   return performTypeAdjustment(CGF, This, TA.NonVirtual,
                                TA.Virtual.Itanium.VCallOffsetOffset,
@@ -1537,7 +1708,7 @@ llvm::Value *ItaniumCXXABI::performThisAdjustment(CodeGenFunction &CGF,
 }
 
 llvm::Value *
-ItaniumCXXABI::performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
+ItaniumCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
                                        const ReturnAdjustment &RA) {
   return performTypeAdjustment(CGF, Ret, RA.NonVirtual,
                                RA.Virtual.Itanium.VBaseOffsetOffset,
@@ -1550,8 +1721,7 @@ void ARMCXXABI::EmitReturnFromThunk(CodeGenFunction &CGF,
     return ItaniumCXXABI::EmitReturnFromThunk(CGF, RV, ResultType);
 
   // Destructor thunks in the ARM ABI have indeterminate results.
-  llvm::Type *T =
-    cast<llvm::PointerType>(CGF.ReturnValue->getType())->getElementType();
+  llvm::Type *T = CGF.ReturnValue.getElementType();
   RValue Undef = RValue::get(llvm::UndefValue::get(T));
   return ItaniumCXXABI::EmitReturnFromThunk(CGF, Undef, ResultType);
 }
@@ -1565,18 +1735,17 @@ CharUnits ItaniumCXXABI::getArrayCookieSizeImpl(QualType elementType) {
                   CGM.getContext().getTypeAlignInChars(elementType));
 }
 
-llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
-                                                  llvm::Value *NewPtr,
-                                                  llvm::Value *NumElements,
-                                                  const CXXNewExpr *expr,
-                                                  QualType ElementType) {
+Address ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
+                                             Address NewPtr,
+                                             llvm::Value *NumElements,
+                                             const CXXNewExpr *expr,
+                                             QualType ElementType) {
   assert(requiresArrayCookie(expr));
 
-  unsigned AS = NewPtr->getType()->getPointerAddressSpace();
+  unsigned AS = NewPtr.getAddressSpace();
 
   ASTContext &Ctx = getContext();
-  QualType SizeTy = Ctx.getSizeType();
-  CharUnits SizeSize = Ctx.getTypeSizeInChars(SizeTy);
+  CharUnits SizeSize = CGF.getSizeSize();
 
   // The size of the cookie.
   CharUnits CookieSize =
@@ -1584,49 +1753,45 @@ llvm::Value *ItaniumCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
   assert(CookieSize == getArrayCookieSizeImpl(ElementType));
 
   // Compute an offset to the cookie.
-  llvm::Value *CookiePtr = NewPtr;
+  Address CookiePtr = NewPtr;
   CharUnits CookieOffset = CookieSize - SizeSize;
   if (!CookieOffset.isZero())
-    CookiePtr = CGF.Builder.CreateConstInBoundsGEP1_64(CookiePtr,
-                                                 CookieOffset.getQuantity());
+    CookiePtr = CGF.Builder.CreateConstInBoundsByteGEP(CookiePtr, CookieOffset);
 
   // Write the number of elements into the appropriate slot.
-  llvm::Type *NumElementsTy = CGF.ConvertType(SizeTy)->getPointerTo(AS);
-  llvm::Value *NumElementsPtr =
-      CGF.Builder.CreateBitCast(CookiePtr, NumElementsTy);
+  Address NumElementsPtr =
+      CGF.Builder.CreateElementBitCast(CookiePtr, CGF.SizeTy);
   llvm::Instruction *SI = CGF.Builder.CreateStore(NumElements, NumElementsPtr);
+
+  // Handle the array cookie specially in ASan.
   if (CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) && AS == 0 &&
       expr->getOperatorNew()->isReplaceableGlobalAllocationFunction()) {
     // The store to the CookiePtr does not need to be instrumented.
     CGM.getSanitizerMetadata()->disableSanitizerForInstruction(SI);
     llvm::FunctionType *FTy =
-        llvm::FunctionType::get(CGM.VoidTy, NumElementsTy, false);
+        llvm::FunctionType::get(CGM.VoidTy, NumElementsPtr.getType(), false);
     llvm::Constant *F =
         CGM.CreateRuntimeFunction(FTy, "__asan_poison_cxx_array_cookie");
-    CGF.Builder.CreateCall(F, NumElementsPtr);
+    CGF.Builder.CreateCall(F, NumElementsPtr.getPointer());
   }
 
   // Finally, compute a pointer to the actual data buffer by skipping
   // over the cookie completely.
-  return CGF.Builder.CreateConstInBoundsGEP1_64(NewPtr,
-                                                CookieSize.getQuantity());  
+  return CGF.Builder.CreateConstInBoundsByteGEP(NewPtr, CookieSize);
 }
 
 llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
-                                                llvm::Value *allocPtr,
+                                                Address allocPtr,
                                                 CharUnits cookieSize) {
   // The element size is right-justified in the cookie.
-  llvm::Value *numElementsPtr = allocPtr;
-  CharUnits numElementsOffset =
-    cookieSize - CharUnits::fromQuantity(CGF.SizeSizeInBytes);
+  Address numElementsPtr = allocPtr;
+  CharUnits numElementsOffset = cookieSize - CGF.getSizeSize();
   if (!numElementsOffset.isZero())
     numElementsPtr =
-      CGF.Builder.CreateConstInBoundsGEP1_64(numElementsPtr,
-                                             numElementsOffset.getQuantity());
+      CGF.Builder.CreateConstInBoundsByteGEP(numElementsPtr, numElementsOffset);
 
-  unsigned AS = allocPtr->getType()->getPointerAddressSpace();
-  numElementsPtr = 
-    CGF.Builder.CreateBitCast(numElementsPtr, CGF.SizeTy->getPointerTo(AS));
+  unsigned AS = allocPtr.getAddressSpace();
+  numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
   if (!CGM.getLangOpts().Sanitize.has(SanitizerKind::Address) || AS != 0)
     return CGF.Builder.CreateLoad(numElementsPtr);
   // In asan mode emit a function call instead of a regular load and let the
@@ -1638,7 +1803,7 @@ llvm::Value *ItaniumCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
       llvm::FunctionType::get(CGF.SizeTy, CGF.SizeTy->getPointerTo(0), false);
   llvm::Constant *F =
       CGM.CreateRuntimeFunction(FTy, "__asan_load_cxx_array_cookie");
-  return CGF.Builder.CreateCall(F, numElementsPtr);
+  return CGF.Builder.CreateCall(F, numElementsPtr.getPointer());
 }
 
 CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) {
@@ -1654,47 +1819,41 @@ CharUnits ARMCXXABI::getArrayCookieSizeImpl(QualType elementType) {
                   CGM.getContext().getTypeAlignInChars(elementType));
 }
 
-llvm::Value *ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
-                                              llvm::Value *newPtr,
-                                              llvm::Value *numElements,
-                                              const CXXNewExpr *expr,
-                                              QualType elementType) {
+Address ARMCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
+                                         Address newPtr,
+                                         llvm::Value *numElements,
+                                         const CXXNewExpr *expr,
+                                         QualType elementType) {
   assert(requiresArrayCookie(expr));
 
-  // NewPtr is a char*, but we generalize to arbitrary addrspaces.
-  unsigned AS = newPtr->getType()->getPointerAddressSpace();
-
   // The cookie is always at the start of the buffer.
-  llvm::Value *cookie = newPtr;
+  Address cookie = newPtr;
 
   // The first element is the element size.
-  cookie = CGF.Builder.CreateBitCast(cookie, CGF.SizeTy->getPointerTo(AS));
+  cookie = CGF.Builder.CreateElementBitCast(cookie, CGF.SizeTy);
   llvm::Value *elementSize = llvm::ConstantInt::get(CGF.SizeTy,
                  getContext().getTypeSizeInChars(elementType).getQuantity());
   CGF.Builder.CreateStore(elementSize, cookie);
 
   // The second element is the element count.
-  cookie = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.SizeTy, cookie, 1);
+  cookie = CGF.Builder.CreateConstInBoundsGEP(cookie, 1, CGF.getSizeSize());
   CGF.Builder.CreateStore(numElements, cookie);
 
   // Finally, compute a pointer to the actual data buffer by skipping
   // over the cookie completely.
   CharUnits cookieSize = ARMCXXABI::getArrayCookieSizeImpl(elementType);
-  return CGF.Builder.CreateConstInBoundsGEP1_64(newPtr,
-                                                cookieSize.getQuantity());
+  return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
 }
 
 llvm::Value *ARMCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
-                                            llvm::Value *allocPtr,
+                                            Address allocPtr,
                                             CharUnits cookieSize) {
   // The number of elements is at offset sizeof(size_t) relative to
   // the allocated pointer.
-  llvm::Value *numElementsPtr
-    = CGF.Builder.CreateConstInBoundsGEP1_64(allocPtr, CGF.SizeSizeInBytes);
+  Address numElementsPtr
+    = CGF.Builder.CreateConstInBoundsByteGEP(allocPtr, CGF.getSizeSize());
 
-  unsigned AS = allocPtr->getType()->getPointerAddressSpace();
-  numElementsPtr = 
-    CGF.Builder.CreateBitCast(numElementsPtr, CGF.SizeTy->getPointerTo(AS));
+  numElementsPtr = CGF.Builder.CreateElementBitCast(numElementsPtr, CGF.SizeTy);
   return CGF.Builder.CreateLoad(numElementsPtr);
 }
 
@@ -1735,7 +1894,7 @@ static llvm::Constant *getGuardAbortFn(CodeGenModule &CGM,
 }
 
 namespace {
-  struct CallGuardAbort : EHScopeStack::Cleanup {
+  struct CallGuardAbort final : EHScopeStack::Cleanup {
     llvm::GlobalVariable *Guard;
     CallGuardAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
 
@@ -1764,12 +1923,21 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
   bool useInt8GuardVariable = !threadsafe && var->hasInternalLinkage();
 
   llvm::IntegerType *guardTy;
+  CharUnits guardAlignment;
   if (useInt8GuardVariable) {
     guardTy = CGF.Int8Ty;
+    guardAlignment = CharUnits::One();
   } else {
     // Guard variables are 64 bits in the generic ABI and size width on ARM
     // (i.e. 32-bit on AArch32, 64-bit on AArch64).
-    guardTy = (UseARMGuardVarABI ? CGF.SizeTy : CGF.Int64Ty);
+    if (UseARMGuardVarABI) {
+      guardTy = CGF.SizeTy;
+      guardAlignment = CGF.getSizeAlign();
+    } else {
+      guardTy = CGF.Int64Ty;
+      guardAlignment = CharUnits::fromQuantity(
+                             CGM.getDataLayout().getABITypeAlignment(guardTy));
+    }
   }
   llvm::PointerType *guardPtrTy = guardTy->getPointerTo();
 
@@ -1782,7 +1950,6 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     {
       llvm::raw_svector_ostream out(guardName);
       getMangleContext().mangleStaticGuardVariable(&D, out);
-      out.flush();
     }
 
     // Create the guard variable with a zero-initializer.
@@ -1794,11 +1961,14 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     guard->setVisibility(var->getVisibility());
     // If the variable is thread-local, so is its guard variable.
     guard->setThreadLocalMode(var->getThreadLocalMode());
+    guard->setAlignment(guardAlignment.getQuantity());
 
-    // The ABI says: It is suggested that it be emitted in the same COMDAT group
-    // as the associated data object
+    // The ABI says: "It is suggested that it be emitted in the same COMDAT
+    // group as the associated data object." In practice, this doesn't work for
+    // non-ELF object formats, so only do it for ELF.
     llvm::Comdat *C = var->getComdat();
-    if (!D.isLocalVarDecl() && C) {
+    if (!D.isLocalVarDecl() && C &&
+        CGM.getTarget().getTriple().isOSBinFormatELF()) {
       guard->setComdat(C);
       CGF.CurFn->setComdat(C);
     } else if (CGM.supportsCOMDAT() && guard->isWeakForLinker()) {
@@ -1808,6 +1978,8 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     CGM.setStaticLocalDeclGuardAddress(&D, guard);
   }
 
+  Address guardAddr = Address(guard, guardAlignment);
+
   // Test whether the variable has completed initialization.
   //
   // Itanium C++ ABI 3.3.2:
@@ -1827,8 +1999,7 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
 
   // Load the first byte of the guard variable.
   llvm::LoadInst *LI =
-      Builder.CreateLoad(Builder.CreateBitCast(guard, CGM.Int8PtrTy));
-  LI->setAlignment(1);
+      Builder.CreateLoad(Builder.CreateElementBitCast(guardAddr, CGM.Int8Ty));
 
   // Itanium ABI:
   //   An implementation supporting thread-safety on multiprocessor
@@ -1898,9 +2069,10 @@ void ItaniumCXXABI::EmitGuardedInit(CodeGenFunction &CGF,
     CGF.PopCleanupBlock();
 
     // Call __cxa_guard_release.  This cannot throw.
-    CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy), guard);
+    CGF.EmitNounwindRuntimeCall(getGuardReleaseFn(CGM, guardPtrTy),
+                                guardAddr.getPointer());
   } else {
-    Builder.CreateStore(llvm::ConstantInt::get(guardTy, 1), guard);
+    Builder.CreateStore(llvm::ConstantInt::get(guardTy, 1), guardAddr);
   }
 
   CGF.EmitBlock(EndBlock);
@@ -1914,7 +2086,7 @@ static void emitGlobalDtorWithCXAAtExit(CodeGenFunction &CGF,
   const char *Name = "__cxa_atexit";
   if (TLS) {
     const llvm::Triple &T = CGF.getTarget().getTriple();
-    Name = T.isMacOSX() ?  "_tlv_atexit" : "__cxa_thread_atexit";
+    Name = T.isOSDarwin() ?  "_tlv_atexit" : "__cxa_thread_atexit";
   }
 
   // We're assuming that the destructor function is something we can
@@ -1970,10 +2142,10 @@ void ItaniumCXXABI::registerGlobalDtor(CodeGenFunction &CGF,
 static bool isThreadWrapperReplaceable(const VarDecl *VD,
                                        CodeGen::CodeGenModule &CGM) {
   assert(!VD->isStaticLocal() && "static local VarDecls don't need wrappers!");
-  // OS X prefers to have references to thread local variables to go through
+  // Darwin prefers to have references to thread local variables to go through
   // the thread wrapper instead of directly referencing the backing variable.
   return VD->getTLSKind() == VarDecl::TLS_Dynamic &&
-         CGM.getTarget().getTriple().isMacOSX();
+         CGM.getTarget().getTriple().isOSDarwin();
 }
 
 /// Get the appropriate linkage for the wrapper function. This is essentially
@@ -1989,12 +2161,10 @@ getThreadLocalWrapperLinkage(const VarDecl *VD, CodeGen::CodeGenModule &CGM) {
     return VarLinkage;
 
   // If the thread wrapper is replaceable, give it appropriate linkage.
-  if (isThreadWrapperReplaceable(VD, CGM)) {
-    if (llvm::GlobalVariable::isLinkOnceLinkage(VarLinkage) ||
-        llvm::GlobalVariable::isWeakODRLinkage(VarLinkage))
-      return llvm::GlobalVariable::WeakAnyLinkage;
-    return VarLinkage;
-  }
+  if (isThreadWrapperReplaceable(VD, CGM))
+    if (!llvm::GlobalVariable::isLinkOnceLinkage(VarLinkage) &&
+        !llvm::GlobalVariable::isWeakODRLinkage(VarLinkage))
+      return VarLinkage;
   return llvm::GlobalValue::WeakODRLinkage;
 }
 
@@ -2006,7 +2176,6 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
   {
     llvm::raw_svector_ostream Out(WrapperName);
     getMangleContext().mangleItaniumThreadLocalWrapper(VD, Out);
-    Out.flush();
   }
 
   if (llvm::Value *V = CGM.getModule().getNamedValue(WrapperName))
@@ -2021,22 +2190,29 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
       llvm::Function::Create(FnTy, getThreadLocalWrapperLinkage(VD, CGM),
                              WrapperName.str(), &CGM.getModule());
   // Always resolve references to the wrapper at link time.
-  if (!Wrapper->hasLocalLinkage() && !isThreadWrapperReplaceable(VD, CGM))
+  if (!Wrapper->hasLocalLinkage() && !(isThreadWrapperReplaceable(VD, CGM) &&
+      !llvm::GlobalVariable::isLinkOnceLinkage(Wrapper->getLinkage()) &&
+      !llvm::GlobalVariable::isWeakODRLinkage(Wrapper->getLinkage())))
     Wrapper->setVisibility(llvm::GlobalValue::HiddenVisibility);
+
+  if (isThreadWrapperReplaceable(VD, CGM)) {
+    Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
+    Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
+  }
   return Wrapper;
 }
 
 void ItaniumCXXABI::EmitThreadLocalInitFuncs(
-    CodeGenModule &CGM,
-    ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *>>
-        CXXThreadLocals, ArrayRef<llvm::Function *> CXXThreadLocalInits,
-    ArrayRef<llvm::GlobalVariable *> CXXThreadLocalInitVars) {
+    CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
+    ArrayRef<llvm::Function *> CXXThreadLocalInits,
+    ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
   llvm::Function *InitFunc = nullptr;
   if (!CXXThreadLocalInits.empty()) {
     // Generate a guarded initialization function.
     llvm::FunctionType *FTy =
         llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
-    InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init",
+    const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
+    InitFunc = CGM.CreateGlobalInitOrDestructFunction(FTy, "__tls_init", FI,
                                                       SourceLocation(),
                                                       /*TLS=*/true);
     llvm::GlobalVariable *Guard = new llvm::GlobalVariable(
@@ -2044,12 +2220,17 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
         llvm::GlobalVariable::InternalLinkage,
         llvm::ConstantInt::get(CGM.Int8Ty, 0), "__tls_guard");
     Guard->setThreadLocal(true);
+
+    CharUnits GuardAlign = CharUnits::One();
+    Guard->setAlignment(GuardAlign.getQuantity());
+
     CodeGenFunction(CGM)
-        .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits, Guard);
+        .GenerateCXXGlobalInitFunc(InitFunc, CXXThreadLocalInits,
+                                   Address(Guard, GuardAlign));
   }
-  for (unsigned I = 0, N = CXXThreadLocals.size(); I != N; ++I) {
-    const VarDecl *VD = CXXThreadLocals[I].first;
-    llvm::GlobalVariable *Var = CXXThreadLocals[I].second;
+  for (const VarDecl *VD : CXXThreadLocals) {
+    llvm::GlobalVariable *Var =
+        cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
 
     // Some targets require that all access to thread local variables go through
     // the thread wrapper.  This means that we cannot attempt to create a thread
@@ -2062,7 +2243,6 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     {
       llvm::raw_svector_ostream Out(InitFnName);
       getMangleContext().mangleItaniumThreadLocalInit(VD, Out);
-      Out.flush();
     }
 
     // If we have a definition for the variable, emit the initialization
@@ -2092,7 +2272,7 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);
     llvm::LLVMContext &Context = CGM.getModule().getContext();
     llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
-    CGBuilderTy Builder(Entry);
+    CGBuilderTy Builder(CGM, Entry);
     if (InitIsInitFunc) {
       if (Init)
         Builder.CreateCall(Init);
@@ -2114,9 +2294,8 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
     // the referenced object.
     llvm::Value *Val = Var;
     if (VD->getType()->isReferenceType()) {
-      llvm::LoadInst *LI = Builder.CreateLoad(Val);
-      LI->setAlignment(CGM.getContext().getDeclAlign(VD).getQuantity());
-      Val = LI;
+      CharUnits Align = CGM.getContext().getDeclAlign(VD);
+      Val = Builder.CreateAlignedLoad(Val, Align);
     }
     if (Val->getType() != Wrapper->getReturnType())
       Val = Builder.CreatePointerBitCastOrAddrSpaceCast(
@@ -2128,18 +2307,19 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
 LValue ItaniumCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
                                                    const VarDecl *VD,
                                                    QualType LValType) {
-  QualType T = VD->getType();
-  llvm::Type *Ty = CGF.getTypes().ConvertTypeForMem(T);
-  llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD, Ty);
+  llvm::Value *Val = CGF.CGM.GetAddrOfGlobalVar(VD);
   llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Val);
 
-  Val = CGF.Builder.CreateCall(Wrapper);
+  llvm::CallInst *CallVal = CGF.Builder.CreateCall(Wrapper);
+  if (isThreadWrapperReplaceable(VD, CGF.CGM))
+    CallVal->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
 
   LValue LV;
   if (VD->getType()->isReferenceType())
-    LV = CGF.MakeNaturalAlignAddrLValue(Val, LValType);
+    LV = CGF.MakeNaturalAlignAddrLValue(CallVal, LValType);
   else
-    LV = CGF.MakeAddrLValue(Val, LValType, CGF.getContext().getDeclAlign(VD));
+    LV = CGF.MakeAddrLValue(CallVal, LValType,
+                            CGF.getContext().getDeclAlign(VD));
   // FIXME: need setObjCGCLValueClass?
   return LV;
 }
@@ -2255,11 +2435,9 @@ public:
 
 llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
     QualType Ty, llvm::GlobalVariable::LinkageTypes Linkage) {
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
   CGM.getCXXABI().getMangleContext().mangleCXXRTTIName(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
 
   // We know that the mangled name of the type starts at index 4 of the
   // mangled name of the typename, so we can just index into it in order to
@@ -2278,11 +2456,9 @@ llvm::GlobalVariable *ItaniumRTTIBuilder::GetAddrOfTypeName(
 llvm::Constant *
 ItaniumRTTIBuilder::GetAddrOfExternalRTTIDescriptor(QualType Ty) {
   // Mangle the RTTI name.
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
   CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
 
   // Look for an existing global.
   llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name);
@@ -2346,9 +2522,19 @@ static bool TypeInfoIsInStandardLibrary(const BuiltinType *Ty) {
     case BuiltinType::OCLImage1dBuffer:
     case BuiltinType::OCLImage2d:
     case BuiltinType::OCLImage2dArray:
+    case BuiltinType::OCLImage2dDepth:
+    case BuiltinType::OCLImage2dArrayDepth:
+    case BuiltinType::OCLImage2dMSAA:
+    case BuiltinType::OCLImage2dArrayMSAA:
+    case BuiltinType::OCLImage2dMSAADepth:
+    case BuiltinType::OCLImage2dArrayMSAADepth:
     case BuiltinType::OCLImage3d:
     case BuiltinType::OCLSampler:
     case BuiltinType::OCLEvent:
+    case BuiltinType::OCLClkEvent:
+    case BuiltinType::OCLQueue:
+    case BuiltinType::OCLNDRange:
+    case BuiltinType::OCLReserveID:
       return true;
 
     case BuiltinType::Dependent:
@@ -2678,11 +2864,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
   Ty = CGM.getContext().getCanonicalType(Ty);
 
   // Check if we've already emitted an RTTI descriptor for this type.
-  SmallString<256> OutName;
-  llvm::raw_svector_ostream Out(OutName);
+  SmallString<256> Name;
+  llvm::raw_svector_ostream Out(Name);
   CGM.getCXXABI().getMangleContext().mangleCXXRTTI(Ty, Out);
-  Out.flush();
-  StringRef Name = OutName.str();
 
   llvm::GlobalVariable *OldGV = CGM.getModule().getNamedGlobal(Name);
   if (OldGV && !OldGV->isDeclaration()) {
@@ -2818,9 +3002,6 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
       new llvm::GlobalVariable(M, Init->getType(),
                                /*Constant=*/true, Linkage, Init, Name);
 
-  if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
-    GV->setComdat(M.getOrInsertComdat(GV->getName()));
-
   // If there's already an old global variable, replace it with the new one.
   if (OldGV) {
     GV->takeName(OldGV);
@@ -2830,6 +3011,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(QualType Ty, bool Force) {
     OldGV->eraseFromParent();
   }
 
+  if (CGM.supportsCOMDAT() && GV->isWeakForLinker())
+    GV->setComdat(M.getOrInsertComdat(GV->getName()));
+
   // The Itanium ABI specifies that type_info objects must be globally
   // unique, with one exception: if the type is an incomplete class
   // type or a (possibly indirect) pointer to one.  That exception
@@ -3232,15 +3416,13 @@ static void emitConstructorDestructorAlias(CodeGenModule &CGM,
     return;
 
   auto *Aliasee = cast<llvm::GlobalValue>(CGM.GetAddrOfGlobal(TargetDecl));
-  llvm::PointerType *AliasType = Aliasee->getType();
 
   // Create the alias with no name.
-  auto *Alias = llvm::GlobalAlias::create(AliasType, Linkage, "", Aliasee,
-                                          &CGM.getModule());
+  auto *Alias = llvm::GlobalAlias::create(Linkage, "", Aliasee);
 
   // Switch any previous uses to the alias.
   if (Entry) {
-    assert(Entry->getType() == AliasType &&
+    assert(Entry->getType() == Aliasee->getType() &&
            "declaration exists with different type");
     Alias->takeName(Entry);
     Entry->replaceAllUsesWith(Alias);
@@ -3278,7 +3460,7 @@ void ItaniumCXXABI::emitCXXStructor(const CXXMethodDecl *MD,
 
     if (CGType == StructorCodegen::RAUW) {
       StringRef MangledName = CGM.getMangledName(CompleteDecl);
-      auto *Aliasee = cast<llvm::GlobalValue>(CGM.GetAddrOfGlobal(BaseDecl));
+      auto *Aliasee = CGM.GetAddrOfGlobal(BaseDecl);
       CGM.addReplacement(MangledName, Aliasee);
       return;
     }
@@ -3345,7 +3527,7 @@ namespace {
   ///     of the caught type, so we have to assume the actual thrown
   ///     exception type might have a throwing destructor, even if the
   ///     caught type's destructor is trivial or nothrow.
-  struct CallEndCatch : EHScopeStack::Cleanup {
+  struct CallEndCatch final : EHScopeStack::Cleanup {
     CallEndCatch(bool MightThrow) : MightThrow(MightThrow) {}
     bool MightThrow;
 
@@ -3379,7 +3561,7 @@ static llvm::Value *CallBeginCatch(CodeGenFunction &CGF,
 /// parameter during catch initialization.
 static void InitCatchParam(CodeGenFunction &CGF,
                            const VarDecl &CatchParam,
-                           llvm::Value *ParamAddr,
+                           Address ParamAddr,
                            SourceLocation Loc) {
   // Load the exception from where the landing pad saved it.
   llvm::Value *Exn = CGF.getExceptionFromSlot();
@@ -3433,12 +3615,13 @@ static void InitCatchParam(CodeGenFunction &CGF,
           cast<llvm::PointerType>(LLVMCatchTy)->getElementType();
 
         // Create the temporary and write the adjusted pointer into it.
-        llvm::Value *ExnPtrTmp = CGF.CreateTempAlloca(PtrTy, "exn.byref.tmp");
+        Address ExnPtrTmp =
+          CGF.CreateTempAlloca(PtrTy, CGF.getPointerAlign(), "exn.byref.tmp");
         llvm::Value *Casted = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
         CGF.Builder.CreateStore(Casted, ExnPtrTmp);
 
         // Bind the reference to the temporary.
-        AdjustedExn = ExnPtrTmp;
+        AdjustedExn = ExnPtrTmp.getPointer();
       }
     }
 
@@ -3483,8 +3666,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
     llvm::Value *Cast = CGF.Builder.CreateBitCast(AdjustedExn, PtrTy);
 
     LValue srcLV = CGF.MakeNaturalAlignAddrLValue(Cast, CatchType);
-    LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType,
-                                  CGF.getContext().getDeclAlign(&CatchParam));
+    LValue destLV = CGF.MakeAddrLValue(ParamAddr, CatchType);
     switch (TEK) {
     case TEK_Complex:
       CGF.EmitStoreOfComplex(CGF.EmitLoadOfComplex(srcLV, Loc), destLV,
@@ -3502,6 +3684,8 @@ static void InitCatchParam(CodeGenFunction &CGF,
   }
 
   assert(isa<RecordType>(CatchType) && "unexpected catch type!");
+  auto catchRD = CatchType->getAsCXXRecordDecl();
+  CharUnits caughtExnAlignment = CGF.CGM.getClassPointerAlignment(catchRD);
 
   llvm::Type *PtrTy = LLVMCatchTy->getPointerTo(0); // addrspace 0 ok
 
@@ -3510,7 +3694,8 @@ static void InitCatchParam(CodeGenFunction &CGF,
   const Expr *copyExpr = CatchParam.getInit();
   if (!copyExpr) {
     llvm::Value *rawAdjustedExn = CallBeginCatch(CGF, Exn, true);
-    llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
+    Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+                        caughtExnAlignment);
     CGF.EmitAggregateCopy(ParamAddr, adjustedExn, CatchType);
     return;
   }
@@ -3521,7 +3706,8 @@ static void InitCatchParam(CodeGenFunction &CGF,
     CGF.EmitNounwindRuntimeCall(getGetExceptionPtrFn(CGF.CGM), Exn);
 
   // Cast that to the appropriate type.
-  llvm::Value *adjustedExn = CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy);
+  Address adjustedExn(CGF.Builder.CreateBitCast(rawAdjustedExn, PtrTy),
+                      caughtExnAlignment);
 
   // The copy expression is defined in terms of an OpaqueValueExpr.
   // Find it and map it to the adjusted expression.
@@ -3533,9 +3719,8 @@ static void InitCatchParam(CodeGenFunction &CGF,
   CGF.EHStack.pushTerminate();
 
   // Perform the copy construction.
-  CharUnits Alignment = CGF.getContext().getDeclAlign(&CatchParam);
   CGF.EmitAggExpr(copyExpr,
-                  AggValueSlot::forAddr(ParamAddr, Alignment, Qualifiers(),
+                  AggValueSlot::forAddr(ParamAddr, Qualifiers(),
                                         AggValueSlot::IsNotDestructed,
                                         AggValueSlot::DoesNotNeedGCBarriers,
                                         AggValueSlot::IsNotAliased));
@@ -3619,7 +3804,7 @@ static llvm::Constant *getClangCallTerminateFn(CodeGenModule &CGM) {
     // Set up the function.
     llvm::BasicBlock *entry =
       llvm::BasicBlock::Create(CGM.getLLVMContext(), "", fn);
-    CGBuilderTy builder(entry);
+    CGBuilderTy builder(CGM, entry);
 
     // Pull the exception pointer out of the parameter list.
     llvm::Value *exn = &*fn->arg_begin();
diff --git a/lib/CodeGen/MicrosoftCXXABI.cpp b/lib/CodeGen/MicrosoftCXXABI.cpp
index 3433990e1288..93210d54d4bb 100644
--- a/lib/CodeGen/MicrosoftCXXABI.cpp
+++ b/lib/CodeGen/MicrosoftCXXABI.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CGCXXABI.h"
+#include "CGCleanup.h"
 #include "CGVTables.h"
 #include "CodeGenModule.h"
 #include "CodeGenTypes.h"
@@ -45,7 +46,7 @@ public:
       : CGCXXABI(CGM), BaseClassDescriptorType(nullptr),
         ClassHierarchyDescriptorType(nullptr),
         CompleteObjectLocatorType(nullptr), CatchableTypeType(nullptr),
-        ThrowInfoType(nullptr), CatchHandlerTypeType(nullptr) {}
+        ThrowInfoType(nullptr) {}
 
   bool HasThisReturn(GlobalDecl GD) const override;
   bool hasMostDerivedReturn(GlobalDecl GD) const override;
@@ -56,6 +57,27 @@ public:
 
   bool isSRetParameterAfterThis() const override { return true; }
 
+  bool isThisCompleteObject(GlobalDecl GD) const override {
+    // The Microsoft ABI doesn't use separate complete-object vs.
+    // base-object variants of constructors, but it does of destructors.
+    if (isa<CXXDestructorDecl>(GD.getDecl())) {
+      switch (GD.getDtorType()) {
+      case Dtor_Complete:
+      case Dtor_Deleting:
+        return true;
+
+      case Dtor_Base:
+        return false;
+
+      case Dtor_Comdat: llvm_unreachable("emitting dtor comdat as function?");
+      }
+      llvm_unreachable("bad dtor kind");
+    }
+
+    // No other kinds.
+    return false;
+  }
+
   size_t getSrcArgforCopyCtor(const CXXConstructorDecl *CD,
                               FunctionArgList &Args) const override {
     assert(Args.size() >= 2 &&
@@ -68,11 +90,30 @@ public:
     return 1;
   }
 
+  std::vector<CharUnits> getVBPtrOffsets(const CXXRecordDecl *RD) override {
+    std::vector<CharUnits> VBPtrOffsets;
+    const ASTContext &Context = getContext();
+    const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
+
+    const VBTableGlobals &VBGlobals = enumerateVBTables(RD);
+    for (const VPtrInfo *VBT : *VBGlobals.VBTables) {
+      const ASTRecordLayout &SubobjectLayout =
+          Context.getASTRecordLayout(VBT->BaseWithVPtr);
+      CharUnits Offs = VBT->NonVirtualOffset;
+      Offs += SubobjectLayout.getVBPtrOffset();
+      if (VBT->getVBaseWithVPtr())
+        Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
+      VBPtrOffsets.push_back(Offs);
+    }
+    llvm::array_pod_sort(VBPtrOffsets.begin(), VBPtrOffsets.end());
+    return VBPtrOffsets;
+  }
+
   StringRef GetPureVirtualCallName() override { return "_purecall"; }
   StringRef GetDeletedVirtualCallName() override { return "_purecall"; }
 
   void emitVirtualObjectDelete(CodeGenFunction &CGF, const CXXDeleteExpr *DE,
-                               llvm::Value *Ptr, QualType ElementType,
+                               Address Ptr, QualType ElementType,
                                const CXXDestructorDecl *Dtor) override;
 
   void emitRethrow(CodeGenFunction &CGF, bool isNoReturn) override;
@@ -84,31 +125,39 @@ public:
                                                    const VPtrInfo *Info);
 
   llvm::Constant *getAddrOfRTTIDescriptor(QualType Ty) override;
-  llvm::Constant *
+  CatchTypeInfo
   getAddrOfCXXCatchHandlerType(QualType Ty, QualType CatchHandlerType) override;
 
+  /// MSVC needs an extra flag to indicate a catchall.
+  CatchTypeInfo getCatchAllTypeInfo() override {
+    return CatchTypeInfo{nullptr, 0x40};
+  }
+
   bool shouldTypeidBeNullChecked(bool IsDeref, QualType SrcRecordTy) override;
   void EmitBadTypeidCall(CodeGenFunction &CGF) override;
   llvm::Value *EmitTypeid(CodeGenFunction &CGF, QualType SrcRecordTy,
-                          llvm::Value *ThisPtr,
+                          Address ThisPtr,
                           llvm::Type *StdTypeInfoPtrTy) override;
 
   bool shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
                                           QualType SrcRecordTy) override;
 
-  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, llvm::Value *Value,
+  llvm::Value *EmitDynamicCastCall(CodeGenFunction &CGF, Address Value,
                                    QualType SrcRecordTy, QualType DestTy,
                                    QualType DestRecordTy,
                                    llvm::BasicBlock *CastEnd) override;
 
-  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+  llvm::Value *EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                      QualType SrcRecordTy,
                                      QualType DestTy) override;
 
   bool EmitBadCastCall(CodeGenFunction &CGF) override;
+  bool canSpeculativelyEmitVTable(const CXXRecordDecl *RD) const override {
+    return false;
+  }
 
   llvm::Value *
-  GetVirtualBaseClassOffset(CodeGenFunction &CGF, llvm::Value *This,
+  GetVirtualBaseClassOffset(CodeGenFunction &CGF, Address This,
                             const CXXRecordDecl *ClassDecl,
                             const CXXRecordDecl *BaseClassDecl) override;
 
@@ -182,9 +231,9 @@ public:
     return MD->getParent();
   }
 
-  llvm::Value *
+  Address
   adjustThisArgumentForVirtualFunctionCall(CodeGenFunction &CGF, GlobalDecl GD,
-                                           llvm::Value *This,
+                                           Address This,
                                            bool VirtualCall) override;
 
   void addImplicitStructorParams(CodeGenFunction &CGF, QualType &ResTy,
@@ -203,7 +252,7 @@ public:
 
   void EmitDestructorCall(CodeGenFunction &CGF, const CXXDestructorDecl *DD,
                           CXXDtorType Type, bool ForVirtualBase,
-                          bool Delegating, llvm::Value *This) override;
+                          bool Delegating, Address This) override;
 
   void emitVTableBitSetEntries(VPtrInfo *Info, const CXXRecordDecl *RD,
                                llvm::GlobalVariable *VTable);
@@ -211,10 +260,22 @@ public:
   void emitVTableDefinitions(CodeGenVTables &CGVT,
                              const CXXRecordDecl *RD) override;
 
+  bool isVirtualOffsetNeededForVTableField(CodeGenFunction &CGF,
+                                           CodeGenFunction::VPtr Vptr) override;
+
+  /// Don't initialize vptrs if dynamic class
+  /// is marked with with the 'novtable' attribute.
+  bool doStructorsInitializeVPtrs(const CXXRecordDecl *VTableClass) override {
+    return !VTableClass->hasAttr<MSNoVTableAttr>();
+  }
+
+  llvm::Constant *
+  getVTableAddressPoint(BaseSubobject Base,
+                        const CXXRecordDecl *VTableClass) override;
+
   llvm::Value *getVTableAddressPointInStructor(
       CodeGenFunction &CGF, const CXXRecordDecl *VTableClass,
-      BaseSubobject Base, const CXXRecordDecl *NearestVBase,
-      bool &NeedsVirtualOffset) override;
+      BaseSubobject Base, const CXXRecordDecl *NearestVBase) override;
 
   llvm::Constant *
   getVTableAddressPointForConstExpr(BaseSubobject Base,
@@ -224,13 +285,13 @@ public:
                                         CharUnits VPtrOffset) override;
 
   llvm::Value *getVirtualFunctionPointer(CodeGenFunction &CGF, GlobalDecl GD,
-                                         llvm::Value *This, llvm::Type *Ty,
+                                         Address This, llvm::Type *Ty,
                                          SourceLocation Loc) override;
 
   llvm::Value *EmitVirtualDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *Dtor,
                                          CXXDtorType DtorType,
-                                         llvm::Value *This,
+                                         Address This,
                                          const CXXMemberCallExpr *CE) override;
 
   void adjustCallArgsForDestructorThunk(CodeGenFunction &CGF, GlobalDecl GD,
@@ -253,7 +314,6 @@ public:
     SmallString<256> OutName;
     llvm::raw_svector_ostream Out(OutName);
     getMangleContext().mangleCXXVirtualDisplacementMap(SrcRD, DstRD, Out);
-    Out.flush();
     StringRef MangledName = OutName.str();
 
     if (auto *VDispMap = CGM.getModule().getNamedGlobal(MangledName))
@@ -310,18 +370,16 @@ public:
       Thunk->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
   }
 
-  llvm::Value *performThisAdjustment(CodeGenFunction &CGF, llvm::Value *This,
+  llvm::Value *performThisAdjustment(CodeGenFunction &CGF, Address This,
                                      const ThisAdjustment &TA) override;
 
-  llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
+  llvm::Value *performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
                                        const ReturnAdjustment &RA) override;
 
   void EmitThreadLocalInitFuncs(
-      CodeGenModule &CGM,
-      ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *>>
-          CXXThreadLocals,
+      CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
       ArrayRef<llvm::Function *> CXXThreadLocalInits,
-      ArrayRef<llvm::GlobalVariable *> CXXThreadLocalInitVars) override;
+      ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
 
   bool usesThreadWrapperFunction() const override { return false; }
   LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
@@ -360,13 +418,13 @@ public:
                            QualType elementType) override;
   bool requiresArrayCookie(const CXXNewExpr *expr) override;
   CharUnits getArrayCookieSizeImpl(QualType type) override;
-  llvm::Value *InitializeArrayCookie(CodeGenFunction &CGF,
-                                     llvm::Value *NewPtr,
-                                     llvm::Value *NumElements,
-                                     const CXXNewExpr *expr,
-                                     QualType ElementType) override;
+  Address InitializeArrayCookie(CodeGenFunction &CGF,
+                                Address NewPtr,
+                                llvm::Value *NumElements,
+                                const CXXNewExpr *expr,
+                                QualType ElementType) override;
   llvm::Value *readArrayCookieImpl(CodeGenFunction &CGF,
-                                   llvm::Value *allocPtr,
+                                   Address allocPtr,
                                    CharUnits cookieSize) override;
 
   friend struct MSRTTIBuilder;
@@ -493,14 +551,6 @@ private:
     return  llvm::Constant::getAllOnesValue(CGM.IntTy);
   }
 
-  llvm::Constant *getConstantOrZeroInt(llvm::Constant *C) {
-    return C ? C : getZeroInt();
-  }
-
-  llvm::Value *getValueOrZeroInt(llvm::Value *C) {
-    return C ? C : getZeroInt();
-  }
-
   CharUnits getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD);
 
   void
@@ -511,13 +561,13 @@ private:
   /// the vbptr to the virtual base.  Optionally returns the address of the
   /// vbptr itself.
   llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
-                                       llvm::Value *Base,
+                                       Address Base,
                                        llvm::Value *VBPtrOffset,
                                        llvm::Value *VBTableOffset,
                                        llvm::Value **VBPtr = nullptr);
 
   llvm::Value *GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
-                                       llvm::Value *Base,
+                                       Address Base,
                                        int32_t VBPtrOffset,
                                        int32_t VBTableOffset,
                                        llvm::Value **VBPtr = nullptr) {
@@ -527,14 +577,14 @@ private:
     return GetVBaseOffsetFromVBPtr(CGF, Base, VBPOffset, VBTOffset, VBPtr);
   }
 
-  std::pair<llvm::Value *, llvm::Value *>
-  performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
+  std::pair<Address, llvm::Value *>
+  performBaseAdjustment(CodeGenFunction &CGF, Address Value,
                         QualType SrcRecordTy);
 
   /// \brief Performs a full virtual base adjustment.  Used to dereference
   /// pointers to members of virtual bases.
   llvm::Value *AdjustVirtualBase(CodeGenFunction &CGF, const Expr *E,
-                                 const CXXRecordDecl *RD, llvm::Value *Base,
+                                 const CXXRecordDecl *RD, Address Base,
                                  llvm::Value *VirtualBaseAdjustmentOffset,
                                  llvm::Value *VBPtrOffset /* optional */);
 
@@ -570,17 +620,6 @@ public:
     return RD->hasAttr<MSInheritanceAttr>();
   }
 
-  bool isTypeInfoCalculable(QualType Ty) const override {
-    if (!CGCXXABI::isTypeInfoCalculable(Ty))
-      return false;
-    if (const auto *MPT = Ty->getAs<MemberPointerType>()) {
-      const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
-      if (!RD->hasAttr<MSInheritanceAttr>())
-        return false;
-    }
-    return true;
-  }
-
   llvm::Constant *EmitNullMemberPointer(const MemberPointerType *MPT) override;
 
   llvm::Constant *EmitMemberDataPointer(const MemberPointerType *MPT,
@@ -600,7 +639,7 @@ public:
 
   llvm::Value *
   EmitMemberDataPointerAddress(CodeGenFunction &CGF, const Expr *E,
-                               llvm::Value *Base, llvm::Value *MemPtr,
+                               Address Base, llvm::Value *MemPtr,
                                const MemberPointerType *MPT) override;
 
   llvm::Value *EmitNonNullMemberPointerConversion(
@@ -623,23 +662,12 @@ public:
 
   llvm::Value *
   EmitLoadOfMemberFunctionPointer(CodeGenFunction &CGF, const Expr *E,
-                                  llvm::Value *&This, llvm::Value *MemPtr,
+                                  Address This, llvm::Value *&ThisPtrForCall,
+                                  llvm::Value *MemPtr,
                                   const MemberPointerType *MPT) override;
 
   void emitCXXStructor(const CXXMethodDecl *MD, StructorType Type) override;
 
-  llvm::StructType *getCatchHandlerTypeType() {
-    if (!CatchHandlerTypeType) {
-      llvm::Type *FieldTypes[] = {
-          CGM.IntTy,     // Flags
-          CGM.Int8PtrTy, // TypeDescriptor
-      };
-      CatchHandlerTypeType = llvm::StructType::create(
-          CGM.getLLVMContext(), FieldTypes, "eh.CatchHandlerType");
-    }
-    return CatchHandlerTypeType;
-  }
-
   llvm::StructType *getCatchableTypeType() {
     if (CatchableTypeType)
       return CatchableTypeType;
@@ -755,7 +783,6 @@ private:
   llvm::StructType *CatchableTypeType;
   llvm::DenseMap<uint32_t, llvm::StructType *> CatchableTypeArrayTypeMap;
   llvm::StructType *ThrowInfoType;
-  llvm::StructType *CatchHandlerTypeType;
 };
 
 }
@@ -823,7 +850,7 @@ MicrosoftCXXABI::getRecordArgABI(const CXXRecordDecl *RD) const {
 
 void MicrosoftCXXABI::emitVirtualObjectDelete(CodeGenFunction &CGF,
                                               const CXXDeleteExpr *DE,
-                                              llvm::Value *Ptr,
+                                              Address Ptr,
                                               QualType ElementType,
                                               const CXXDestructorDecl *Dtor) {
   // FIXME: Provide a source location here even though there's no
@@ -848,11 +875,15 @@ void MicrosoftCXXABI::emitRethrow(CodeGenFunction &CGF, bool isNoReturn) {
 }
 
 namespace {
-struct CallEndCatchMSVC : EHScopeStack::Cleanup {
-  CallEndCatchMSVC() {}
+struct CatchRetScope final : EHScopeStack::Cleanup {
+  llvm::CatchPadInst *CPI;
+
+  CatchRetScope(llvm::CatchPadInst *CPI) : CPI(CPI) {}
+
   void Emit(CodeGenFunction &CGF, Flags flags) override {
-    CGF.EmitNounwindRuntimeCall(
-        CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_endcatch));
+    llvm::BasicBlock *BB = CGF.createBasicBlock("catchret.dest");
+    CGF.Builder.CreateCatchRet(CPI, BB);
+    CGF.EmitBlock(BB);
   }
 };
 }
@@ -862,50 +893,59 @@ void MicrosoftCXXABI::emitBeginCatch(CodeGenFunction &CGF,
   // In the MS ABI, the runtime handles the copy, and the catch handler is
   // responsible for destruction.
   VarDecl *CatchParam = S->getExceptionDecl();
-  llvm::Value *Exn = CGF.getExceptionFromSlot();
-  llvm::Function *BeginCatch =
-      CGF.CGM.getIntrinsic(llvm::Intrinsic::eh_begincatch);
+  llvm::BasicBlock *CatchPadBB = CGF.Builder.GetInsertBlock();
+  llvm::CatchPadInst *CPI =
+      cast<llvm::CatchPadInst>(CatchPadBB->getFirstNonPHI());
+  CGF.CurrentFuncletPad = CPI;
 
   // If this is a catch-all or the catch parameter is unnamed, we don't need to
   // emit an alloca to the object.
   if (!CatchParam || !CatchParam->getDeclName()) {
-    llvm::Value *Args[2] = {Exn, llvm::Constant::getNullValue(CGF.Int8PtrTy)};
-    CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
-    CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalCleanup);
+    CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
     return;
   }
 
   CodeGenFunction::AutoVarEmission var = CGF.EmitAutoVarAlloca(*CatchParam);
-  llvm::Value *ParamAddr =
-      CGF.Builder.CreateBitCast(var.getObjectAddress(CGF), CGF.Int8PtrTy);
-  llvm::Value *Args[2] = {Exn, ParamAddr};
-  CGF.EmitNounwindRuntimeCall(BeginCatch, Args);
-  CGF.EHStack.pushCleanup<CallEndCatchMSVC>(NormalCleanup);
+  CPI->setArgOperand(2, var.getObjectAddress(CGF).getPointer());
+  CGF.EHStack.pushCleanup<CatchRetScope>(NormalCleanup, CPI);
   CGF.EmitAutoVarCleanups(var);
 }
 
-std::pair<llvm::Value *, llvm::Value *>
-MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, llvm::Value *Value,
+/// We need to perform a generic polymorphic operation (like a typeid
+/// or a cast), which requires an object with a vfptr.  Adjust the
+/// address to point to an object with a vfptr.
+std::pair<Address, llvm::Value *>
+MicrosoftCXXABI::performBaseAdjustment(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy) {
   Value = CGF.Builder.CreateBitCast(Value, CGF.Int8PtrTy);
   const CXXRecordDecl *SrcDecl = SrcRecordTy->getAsCXXRecordDecl();
   const ASTContext &Context = getContext();
 
+  // If the class itself has a vfptr, great.  This check implicitly
+  // covers non-virtual base subobjects: a class with its own virtual
+  // functions would be a candidate to be a primary base.
   if (Context.getASTRecordLayout(SrcDecl).hasExtendableVFPtr())
     return std::make_pair(Value, llvm::ConstantInt::get(CGF.Int32Ty, 0));
 
-  // Perform a base adjustment.
-  const CXXBaseSpecifier *PolymorphicBase = std::find_if(
-      SrcDecl->vbases_begin(), SrcDecl->vbases_end(),
-      [&](const CXXBaseSpecifier &Base) {
-        const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
-        return Context.getASTRecordLayout(BaseDecl).hasExtendableVFPtr();
-      });
-  llvm::Value *Offset = GetVirtualBaseClassOffset(
-      CGF, Value, SrcDecl, PolymorphicBase->getType()->getAsCXXRecordDecl());
-  Value = CGF.Builder.CreateInBoundsGEP(Value, Offset);
+  // Okay, one of the vbases must have a vfptr, or else this isn't
+  // actually a polymorphic class.
+  const CXXRecordDecl *PolymorphicBase = nullptr;
+  for (auto &Base : SrcDecl->vbases()) {
+    const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl();
+    if (Context.getASTRecordLayout(BaseDecl).hasExtendableVFPtr()) {
+      PolymorphicBase = BaseDecl;
+      break;
+    }
+  }
+  assert(PolymorphicBase && "polymorphic class has no apparent vfptr?");
+
+  llvm::Value *Offset =
+    GetVirtualBaseClassOffset(CGF, Value, SrcDecl, PolymorphicBase);
+  llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(Value.getPointer(), Offset);
   Offset = CGF.Builder.CreateTrunc(Offset, CGF.Int32Ty);
-  return std::make_pair(Value, Offset);
+  CharUnits VBaseAlign =
+    CGF.CGM.getVBaseAlignment(Value.getAlignment(), SrcDecl, PolymorphicBase);
+  return std::make_pair(Address(Ptr, VBaseAlign), Offset);
 }
 
 bool MicrosoftCXXABI::shouldTypeidBeNullChecked(bool IsDeref,
@@ -934,12 +974,12 @@ void MicrosoftCXXABI::EmitBadTypeidCall(CodeGenFunction &CGF) {
 
 llvm::Value *MicrosoftCXXABI::EmitTypeid(CodeGenFunction &CGF,
                                          QualType SrcRecordTy,
-                                         llvm::Value *ThisPtr,
+                                         Address ThisPtr,
                                          llvm::Type *StdTypeInfoPtrTy) {
   llvm::Value *Offset;
   std::tie(ThisPtr, Offset) = performBaseAdjustment(CGF, ThisPtr, SrcRecordTy);
-  return CGF.Builder.CreateBitCast(
-      emitRTtypeidCall(CGF, ThisPtr).getInstruction(), StdTypeInfoPtrTy);
+  auto Typeid = emitRTtypeidCall(CGF, ThisPtr.getPointer()).getInstruction();
+  return CGF.Builder.CreateBitCast(Typeid, StdTypeInfoPtrTy);
 }
 
 bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
@@ -950,7 +990,7 @@ bool MicrosoftCXXABI::shouldDynamicCastCallBeNullChecked(bool SrcIsPtr,
 }
 
 llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
-    CodeGenFunction &CGF, llvm::Value *Value, QualType SrcRecordTy,
+    CodeGenFunction &CGF, Address This, QualType SrcRecordTy,
     QualType DestTy, QualType DestRecordTy, llvm::BasicBlock *CastEnd) {
   llvm::Type *DestLTy = CGF.ConvertType(DestTy);
 
@@ -960,7 +1000,8 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
       CGF.CGM.GetAddrOfRTTIDescriptor(DestRecordTy.getUnqualifiedType());
 
   llvm::Value *Offset;
-  std::tie(Value, Offset) = performBaseAdjustment(CGF, Value, SrcRecordTy);
+  std::tie(This, Offset) = performBaseAdjustment(CGF, This, SrcRecordTy);
+  llvm::Value *ThisPtr = This.getPointer();
 
   // PVOID __RTDynamicCast(
   //   PVOID inptr,
@@ -974,14 +1015,14 @@ llvm::Value *MicrosoftCXXABI::EmitDynamicCastCall(
       llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
       "__RTDynamicCast");
   llvm::Value *Args[] = {
-      Value, Offset, SrcRTTI, DestRTTI,
+      ThisPtr, Offset, SrcRTTI, DestRTTI,
       llvm::ConstantInt::get(CGF.Int32Ty, DestTy->isReferenceType())};
-  Value = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction();
-  return CGF.Builder.CreateBitCast(Value, DestLTy);
+  ThisPtr = CGF.EmitRuntimeCallOrInvoke(Function, Args).getInstruction();
+  return CGF.Builder.CreateBitCast(ThisPtr, DestLTy);
 }
 
 llvm::Value *
-MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
+MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, Address Value,
                                        QualType SrcRecordTy,
                                        QualType DestTy) {
   llvm::Value *Offset;
@@ -993,7 +1034,7 @@ MicrosoftCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, llvm::Value *Value,
   llvm::Constant *Function = CGF.CGM.CreateRuntimeFunction(
       llvm::FunctionType::get(CGF.Int8PtrTy, ArgTypes, false),
       "__RTCastToVoid");
-  llvm::Value *Args[] = {Value};
+  llvm::Value *Args[] = {Value.getPointer()};
   return CGF.EmitRuntimeCall(Function, Args);
 }
 
@@ -1002,7 +1043,7 @@ bool MicrosoftCXXABI::EmitBadCastCall(CodeGenFunction &CGF) {
 }
 
 llvm::Value *MicrosoftCXXABI::GetVirtualBaseClassOffset(
-    CodeGenFunction &CGF, llvm::Value *This, const CXXRecordDecl *ClassDecl,
+    CodeGenFunction &CGF, Address This, const CXXRecordDecl *ClassDecl,
     const CXXRecordDecl *BaseClassDecl) {
   const ASTContext &Context = getContext();
   int64_t VBPtrChars =
@@ -1040,15 +1081,16 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const {
   if (!RD)
     return false;
 
+  CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType());
   if (FI.isInstanceMethod()) {
     // If it's an instance method, aggregates are always returned indirectly via
     // the second parameter.
-    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
     FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod());
     return true;
   } else if (!RD->isPOD()) {
     // If it's a free function, non-POD types are returned indirectly.
-    FI.getReturnInfo() = ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    FI.getReturnInfo() = ABIArgInfo::getIndirect(Align, /*ByVal=*/false);
     return true;
   }
 
@@ -1100,8 +1142,7 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
   const VBOffsets &VBaseMap = Layout.getVBaseOffsetsMap();
   CGBuilderTy &Builder = CGF.Builder;
 
-  unsigned AS =
-      cast<llvm::PointerType>(getThisValue(CGF)->getType())->getAddressSpace();
+  unsigned AS = getThisAddress(CGF).getAddressSpace();
   llvm::Value *Int8This = nullptr;  // Initialize lazily.
 
   for (VBOffsets::const_iterator I = VBaseMap.begin(), E = VBaseMap.end();
@@ -1110,7 +1151,7 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
       continue;
 
     llvm::Value *VBaseOffset =
-        GetVirtualBaseClassOffset(CGF, getThisValue(CGF), RD, I->first);
+        GetVirtualBaseClassOffset(CGF, getThisAddress(CGF), RD, I->first);
     // FIXME: it doesn't look right that we SExt in GetVirtualBaseClassOffset()
     // just to Trunc back immediately.
     VBaseOffset = Builder.CreateTruncOrBitCast(VBaseOffset, CGF.Int32Ty);
@@ -1131,7 +1172,8 @@ void MicrosoftCXXABI::initializeHiddenVirtualInheritanceMembers(
     VtorDispPtr = Builder.CreateBitCast(
         VtorDispPtr, CGF.Int32Ty->getPointerTo(AS), "vtordisp.ptr");
 
-    Builder.CreateStore(VtorDispValue, VtorDispPtr);
+    Builder.CreateAlignedStore(VtorDispValue, VtorDispPtr,
+                               CharUnits::fromQuantity(4));
   }
 }
 
@@ -1162,8 +1204,8 @@ void MicrosoftCXXABI::EmitCXXConstructors(const CXXConstructorDecl *D) {
 
 void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
                                       const CXXRecordDecl *RD) {
-  llvm::Value *ThisInt8Ptr =
-    CGF.Builder.CreateBitCast(getThisValue(CGF), CGM.Int8PtrTy, "this.int8");
+  Address This = getThisAddress(CGF);
+  This = CGF.Builder.CreateElementBitCast(This, CGM.Int8Ty, "this.int8");
   const ASTContext &Context = getContext();
   const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD);
 
@@ -1177,11 +1219,10 @@ void MicrosoftCXXABI::EmitVBPtrStores(CodeGenFunction &CGF,
     Offs += SubobjectLayout.getVBPtrOffset();
     if (VBT->getVBaseWithVPtr())
       Offs += Layout.getVBaseClassOffset(VBT->getVBaseWithVPtr());
-    llvm::Value *VBPtr =
-        CGF.Builder.CreateConstInBoundsGEP1_64(ThisInt8Ptr, Offs.getQuantity());
+    Address VBPtr = CGF.Builder.CreateConstInBoundsByteGEP(This, Offs);
     llvm::Value *GVPtr =
         CGF.Builder.CreateConstInBoundsGEP2_32(GV->getValueType(), GV, 0, 0);
-    VBPtr = CGF.Builder.CreateBitCast(VBPtr, GVPtr->getType()->getPointerTo(0),
+    VBPtr = CGF.Builder.CreateElementBitCast(VBPtr, GVPtr->getType(),
                                       "vbptr." + VBT->ReusingBase->getName());
     CGF.Builder.CreateStore(GVPtr, VBPtr);
   }
@@ -1255,8 +1296,9 @@ MicrosoftCXXABI::getVirtualFunctionPrologueThisAdjustment(GlobalDecl GD) {
   return Adjustment;
 }
 
-llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
-    CodeGenFunction &CGF, GlobalDecl GD, llvm::Value *This, bool VirtualCall) {
+Address MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
+    CodeGenFunction &CGF, GlobalDecl GD, Address This,
+    bool VirtualCall) {
   if (!VirtualCall) {
     // If the call of a virtual function is not virtual, we just have to
     // compensate for the adjustment the virtual function does in its prologue.
@@ -1264,11 +1306,9 @@ llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
     if (Adjustment.isZero())
       return This;
 
-    unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
-    llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
-    This = CGF.Builder.CreateBitCast(This, charPtrTy);
+    This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);
     assert(Adjustment.isPositive());
-    return CGF.Builder.CreateConstGEP1_32(This, Adjustment.getQuantity());
+    return CGF.Builder.CreateConstByteGEP(This, Adjustment);
   }
 
   GD = GD.getCanonicalDecl();
@@ -1288,8 +1328,6 @@ llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
   MicrosoftVTableContext::MethodVFTableLocation ML =
       CGM.getMicrosoftVTableContext().getMethodVFTableLocation(LookupGD);
 
-  unsigned AS = cast<llvm::PointerType>(This->getType())->getAddressSpace();
-  llvm::Type *charPtrTy = CGF.Int8Ty->getPointerTo(AS);
   CharUnits StaticOffset = ML.VFPtrOffset;
 
   // Base destructors expect 'this' to point to the beginning of the base
@@ -1298,27 +1336,34 @@ llvm::Value *MicrosoftCXXABI::adjustThisArgumentForVirtualFunctionCall(
   if (isa<CXXDestructorDecl>(MD) && GD.getDtorType() == Dtor_Base)
     StaticOffset = CharUnits::Zero();
 
+  Address Result = This;
   if (ML.VBase) {
-    This = CGF.Builder.CreateBitCast(This, charPtrTy);
+    Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);
+    
+    const CXXRecordDecl *Derived = MD->getParent();
+    const CXXRecordDecl *VBase = ML.VBase;
     llvm::Value *VBaseOffset =
-        GetVirtualBaseClassOffset(CGF, This, MD->getParent(), ML.VBase);
-    This = CGF.Builder.CreateInBoundsGEP(This, VBaseOffset);
+      GetVirtualBaseClassOffset(CGF, Result, Derived, VBase);
+    llvm::Value *VBasePtr =
+      CGF.Builder.CreateInBoundsGEP(Result.getPointer(), VBaseOffset);
+    CharUnits VBaseAlign =
+      CGF.CGM.getVBaseAlignment(Result.getAlignment(), Derived, VBase);
+    Result = Address(VBasePtr, VBaseAlign);
   }
   if (!StaticOffset.isZero()) {
     assert(StaticOffset.isPositive());
-    This = CGF.Builder.CreateBitCast(This, charPtrTy);
+    Result = CGF.Builder.CreateElementBitCast(Result, CGF.Int8Ty);
     if (ML.VBase) {
       // Non-virtual adjustment might result in a pointer outside the allocated
       // object, e.g. if the final overrider class is laid out after the virtual
       // base that declares a method in the most derived class.
       // FIXME: Update the code that emits this adjustment in thunks prologues.
-      This = CGF.Builder.CreateConstGEP1_32(This, StaticOffset.getQuantity());
+      Result = CGF.Builder.CreateConstByteGEP(Result, StaticOffset);
     } else {
-      This = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, This,
-                                                    StaticOffset.getQuantity());
+      Result = CGF.Builder.CreateConstInBoundsByteGEP(Result, StaticOffset);
     }
   }
-  return This;
+  return Result;
 }
 
 void MicrosoftCXXABI::addImplicitStructorParams(CodeGenFunction &CGF,
@@ -1439,7 +1484,7 @@ unsigned MicrosoftCXXABI::addImplicitConstructorArgs(
 void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                          const CXXDestructorDecl *DD,
                                          CXXDtorType Type, bool ForVirtualBase,
-                                         bool Delegating, llvm::Value *This) {
+                                         bool Delegating, Address This) {
   llvm::Value *Callee = CGM.getAddrOfCXXStructor(DD, getFromDtorType(Type));
 
   if (DD->isVirtual()) {
@@ -1449,7 +1494,7 @@ void MicrosoftCXXABI::EmitDestructorCall(CodeGenFunction &CGF,
                                                     This, false);
   }
 
-  CGF.EmitCXXStructorCall(DD, Callee, ReturnValueSlot(), This,
+  CGF.EmitCXXStructorCall(DD, Callee, ReturnValueSlot(), This.getPointer(),
                           /*ImplicitParam=*/nullptr,
                           /*ImplicitParamTy=*/QualType(), nullptr,
                           getFromDtorType(Type));
@@ -1478,15 +1523,14 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
 
   if (Info->PathToBaseWithVPtr.empty()) {
     if (!CGM.IsCFIBlacklistedRecord(RD))
-      BitsetsMD->addOperand(
-          CGM.CreateVTableBitSetEntry(VTable, AddressPoint, RD));
+      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
     return;
   }
 
   // Add a bitset entry for the least derived base belonging to this vftable.
   if (!CGM.IsCFIBlacklistedRecord(Info->PathToBaseWithVPtr.back()))
-    BitsetsMD->addOperand(CGM.CreateVTableBitSetEntry(
-        VTable, AddressPoint, Info->PathToBaseWithVPtr.back()));
+    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint,
+                                Info->PathToBaseWithVPtr.back());
 
   // Add a bitset entry for each derived class that is laid out at the same
   // offset as the least derived base.
@@ -1505,14 +1549,12 @@ void MicrosoftCXXABI::emitVTableBitSetEntries(VPtrInfo *Info,
     if (!Offset.isZero())
       return;
     if (!CGM.IsCFIBlacklistedRecord(DerivedRD))
-      BitsetsMD->addOperand(
-          CGM.CreateVTableBitSetEntry(VTable, AddressPoint, DerivedRD));
+      CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, DerivedRD);
   }
 
   // Finally do the same for the most derived class.
   if (Info->FullOffsetInMDC.isZero() && !CGM.IsCFIBlacklistedRecord(RD))
-    BitsetsMD->addOperand(
-        CGM.CreateVTableBitSetEntry(VTable, AddressPoint, RD));
+    CGM.CreateVTableBitSetEntry(BitsetsMD, VTable, AddressPoint, RD);
 }
 
 void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
@@ -1542,14 +1584,15 @@ void MicrosoftCXXABI::emitVTableDefinitions(CodeGenVTables &CGVT,
   }
 }
 
+bool MicrosoftCXXABI::isVirtualOffsetNeededForVTableField(
+    CodeGenFunction &CGF, CodeGenFunction::VPtr Vptr) {
+  return Vptr.NearestVBase != nullptr;
+}
+
 llvm::Value *MicrosoftCXXABI::getVTableAddressPointInStructor(
     CodeGenFunction &CGF, const CXXRecordDecl *VTableClass, BaseSubobject Base,
-    const CXXRecordDecl *NearestVBase, bool &NeedsVirtualOffset) {
-  NeedsVirtualOffset = (NearestVBase != nullptr);
-
-  (void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
-  VFTableIdTy ID(VTableClass, Base.getBaseOffset());
-  llvm::GlobalValue *VTableAddressPoint = VFTablesMap[ID];
+    const CXXRecordDecl *NearestVBase) {
+  llvm::Constant *VTableAddressPoint = getVTableAddressPoint(Base, VTableClass);
   if (!VTableAddressPoint) {
     assert(Base.getBase()->getNumVBases() &&
            !getContext().getASTRecordLayout(Base.getBase()).hasOwnVFPtr());
@@ -1564,11 +1607,17 @@ static void mangleVFTableName(MicrosoftMangleContext &MangleContext,
   MangleContext.mangleCXXVFTable(RD, VFPtr->MangledPath, Out);
 }
 
-llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
-    BaseSubobject Base, const CXXRecordDecl *VTableClass) {
+llvm::Constant *
+MicrosoftCXXABI::getVTableAddressPoint(BaseSubobject Base,
+                                       const CXXRecordDecl *VTableClass) {
   (void)getAddrOfVTable(VTableClass, Base.getBaseOffset());
   VFTableIdTy ID(VTableClass, Base.getBaseOffset());
-  llvm::GlobalValue *VFTable = VFTablesMap[ID];
+  return VFTablesMap[ID];
+}
+
+llvm::Constant *MicrosoftCXXABI::getVTableAddressPointForConstExpr(
+    BaseSubobject Base, const CXXRecordDecl *VTableClass) {
+  llvm::Constant *VFTable = getVTableAddressPoint(Base, VTableClass);
   assert(VFTable && "Couldn't find a vftable for the given base?");
   return VFTable;
 }
@@ -1578,6 +1627,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   // getAddrOfVTable may return 0 if asked to get an address of a vtable which
   // shouldn't be used in the given record type. We want to cache this result in
   // VFTablesMap, thus a simple zero check is not sufficient.
+
   VFTableIdTy ID(RD, VPtrOffset);
   VTablesMapTy::iterator I;
   bool Inserted;
@@ -1631,10 +1681,11 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
   if (llvm::GlobalValue *VFTable =
           CGM.getModule().getNamedGlobal(VFTableName)) {
     VFTablesMap[ID] = VFTable;
-    return VTableAliasIsRequred
-               ? cast<llvm::GlobalVariable>(
-                     cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
-               : cast<llvm::GlobalVariable>(VFTable);
+    VTable = VTableAliasIsRequred
+                 ? cast<llvm::GlobalVariable>(
+                       cast<llvm::GlobalAlias>(VFTable)->getBaseObject())
+                 : cast<llvm::GlobalVariable>(VFTable);
+    return VTable;
   }
 
   uint64_t NumVTableSlots =
@@ -1678,9 +1729,10 @@ llvm::GlobalVariable *MicrosoftCXXABI::getAddrOfVTable(const CXXRecordDecl *RD,
       if (C)
         C->setSelectionKind(llvm::Comdat::Largest);
     }
-    VFTable = llvm::GlobalAlias::create(
-        cast<llvm::PointerType>(VTableGEP->getType()), VFTableLinkage,
-        VFTableName.str(), VTableGEP, &CGM.getModule());
+    VFTable = llvm::GlobalAlias::create(CGM.Int8PtrTy,
+                                        /*AddressSpace=*/0, VFTableLinkage,
+                                        VFTableName.str(), VTableGEP,
+                                        &CGM.getModule());
     VFTable->setUnnamedAddr(true);
   } else {
     // We don't need a GlobalAlias to be a symbol for the VTable if we won't
@@ -1746,16 +1798,18 @@ getClassAtVTableLocation(ASTContext &Ctx, GlobalDecl GD,
 
 llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
                                                         GlobalDecl GD,
-                                                        llvm::Value *This,
+                                                        Address This,
                                                         llvm::Type *Ty,
                                                         SourceLocation Loc) {
   GD = GD.getCanonicalDecl();
   CGBuilderTy &Builder = CGF.Builder;
 
   Ty = Ty->getPointerTo()->getPointerTo();
-  llvm::Value *VPtr =
+  Address VPtr =
       adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty);
+
+  auto *MethodDecl = cast<CXXMethodDecl>(GD.getDecl());
+  llvm::Value *VTable = CGF.GetVTablePtr(VPtr, Ty, MethodDecl->getParent());
 
   MicrosoftVTableContext::MethodVFTableLocation ML =
       CGM.getMicrosoftVTableContext().getMethodVFTableLocation(GD);
@@ -1765,12 +1819,12 @@ llvm::Value *MicrosoftCXXABI::getVirtualFunctionPointer(CodeGenFunction &CGF,
 
   llvm::Value *VFuncPtr =
       Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
-  return Builder.CreateLoad(VFuncPtr);
+  return Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
 }
 
 llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
     CodeGenFunction &CGF, const CXXDestructorDecl *Dtor, CXXDtorType DtorType,
-    llvm::Value *This, const CXXMemberCallExpr *CE) {
+    Address This, const CXXMemberCallExpr *CE) {
   assert(CE == nullptr || CE->arg_begin() == CE->arg_end());
   assert(DtorType == Dtor_Deleting || DtorType == Dtor_Complete);
 
@@ -1789,7 +1843,8 @@ llvm::Value *MicrosoftCXXABI::EmitVirtualDestructorCall(
       DtorType == Dtor_Deleting);
 
   This = adjustThisArgumentForVirtualFunctionCall(CGF, GD, This, true);
-  RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(), This,
+  RValue RV = CGF.EmitCXXStructorCall(Dtor, Callee, ReturnValueSlot(),
+                                      This.getPointer(),
                                       ImplicitParam, Context.IntTy, CE,
                                       StructorType::Deleting);
   return RV.getScalarVal();
@@ -1832,7 +1887,6 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
   SmallString<256> ThunkName;
   llvm::raw_svector_ostream Out(ThunkName);
   getMangleContext().mangleVirtualMemPtrThunk(MD, Out);
-  Out.flush();
 
   // If the thunk has been generated previously, just return it.
   if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
@@ -1882,10 +1936,12 @@ llvm::Function *MicrosoftCXXABI::EmitVirtualMemPtrThunk(
   // Load the vfptr and then callee from the vftable.  The callee should have
   // adjusted 'this' so that the vfptr is at offset zero.
   llvm::Value *VTable = CGF.GetVTablePtr(
-      getThisValue(CGF), ThunkTy->getPointerTo()->getPointerTo());
+      getThisAddress(CGF), ThunkTy->getPointerTo()->getPointerTo(), MD->getParent());
+
   llvm::Value *VFuncPtr =
       CGF.Builder.CreateConstInBoundsGEP1_64(VTable, ML.Index, "vfn");
-  llvm::Value *Callee = CGF.Builder.CreateLoad(VFuncPtr);
+  llvm::Value *Callee =
+    CGF.Builder.CreateAlignedLoad(VFuncPtr, CGF.getPointerAlign());
 
   CGF.EmitMustTailThunk(MD, getThisValue(CGF), Callee);
 
@@ -1908,7 +1964,6 @@ MicrosoftCXXABI::getAddrOfVBTable(const VPtrInfo &VBT, const CXXRecordDecl *RD,
   SmallString<256> OutName;
   llvm::raw_svector_ostream Out(OutName);
   getMangleContext().mangleCXXVBTable(RD, VBT.MangledPath, Out);
-  Out.flush();
   StringRef Name = OutName.str();
 
   llvm::ArrayType *VBTableType =
@@ -1978,22 +2033,30 @@ void MicrosoftCXXABI::emitVBTableDefinition(const VPtrInfo &VBT,
 }
 
 llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
-                                                    llvm::Value *This,
+                                                    Address This,
                                                     const ThisAdjustment &TA) {
   if (TA.isEmpty())
-    return This;
+    return This.getPointer();
 
-  llvm::Value *V = CGF.Builder.CreateBitCast(This, CGF.Int8PtrTy);
+  This = CGF.Builder.CreateElementBitCast(This, CGF.Int8Ty);
 
-  if (!TA.Virtual.isEmpty()) {
+  llvm::Value *V;
+  if (TA.Virtual.isEmpty()) {
+    V = This.getPointer();
+  } else {
     assert(TA.Virtual.Microsoft.VtordispOffset < 0);
     // Adjust the this argument based on the vtordisp value.
-    llvm::Value *VtorDispPtr =
-        CGF.Builder.CreateConstGEP1_32(V, TA.Virtual.Microsoft.VtordispOffset);
-    VtorDispPtr =
-        CGF.Builder.CreateBitCast(VtorDispPtr, CGF.Int32Ty->getPointerTo());
+    Address VtorDispPtr =
+        CGF.Builder.CreateConstInBoundsByteGEP(This,
+                 CharUnits::fromQuantity(TA.Virtual.Microsoft.VtordispOffset));
+    VtorDispPtr = CGF.Builder.CreateElementBitCast(VtorDispPtr, CGF.Int32Ty);
     llvm::Value *VtorDisp = CGF.Builder.CreateLoad(VtorDispPtr, "vtordisp");
-    V = CGF.Builder.CreateGEP(V, CGF.Builder.CreateNeg(VtorDisp));
+    V = CGF.Builder.CreateGEP(This.getPointer(),
+                              CGF.Builder.CreateNeg(VtorDisp));
+
+    // Unfortunately, having applied the vtordisp means that we no
+    // longer really have a known alignment for the vbptr step.
+    // We'll assume the vbptr is pointer-aligned.
 
     if (TA.Virtual.Microsoft.VBPtrOffset) {
       // If the final overrider is defined in a virtual base other than the one
@@ -2003,7 +2066,8 @@ llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
       assert(TA.Virtual.Microsoft.VBOffsetOffset >= 0);
       llvm::Value *VBPtr;
       llvm::Value *VBaseOffset =
-          GetVBaseOffsetFromVBPtr(CGF, V, -TA.Virtual.Microsoft.VBPtrOffset,
+          GetVBaseOffsetFromVBPtr(CGF, Address(V, CGF.getPointerAlign()),
+                                  -TA.Virtual.Microsoft.VBPtrOffset,
                                   TA.Virtual.Microsoft.VBOffsetOffset, &VBPtr);
       V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
     }
@@ -2021,20 +2085,21 @@ llvm::Value *MicrosoftCXXABI::performThisAdjustment(CodeGenFunction &CGF,
 }
 
 llvm::Value *
-MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
+MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, Address Ret,
                                          const ReturnAdjustment &RA) {
   if (RA.isEmpty())
-    return Ret;
+    return Ret.getPointer();
 
-  llvm::Value *V = CGF.Builder.CreateBitCast(Ret, CGF.Int8PtrTy);
+  auto OrigTy = Ret.getType();
+  Ret = CGF.Builder.CreateElementBitCast(Ret, CGF.Int8Ty);
 
+  llvm::Value *V = Ret.getPointer();
   if (RA.Virtual.Microsoft.VBIndex) {
     assert(RA.Virtual.Microsoft.VBIndex > 0);
-    const ASTContext &Context = getContext();
-    int32_t IntSize = Context.getTypeSizeInChars(Context.IntTy).getQuantity();
+    int32_t IntSize = CGF.getIntSize().getQuantity();
     llvm::Value *VBPtr;
     llvm::Value *VBaseOffset =
-        GetVBaseOffsetFromVBPtr(CGF, V, RA.Virtual.Microsoft.VBPtrOffset,
+        GetVBaseOffsetFromVBPtr(CGF, Ret, RA.Virtual.Microsoft.VBPtrOffset,
                                 IntSize * RA.Virtual.Microsoft.VBIndex, &VBPtr);
     V = CGF.Builder.CreateInBoundsGEP(VBPtr, VBaseOffset);
   }
@@ -2043,7 +2108,7 @@ MicrosoftCXXABI::performReturnAdjustment(CodeGenFunction &CGF, llvm::Value *Ret,
     V = CGF.Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, V, RA.NonVirtual);
 
   // Cast back to the original type.
-  return CGF.Builder.CreateBitCast(V, Ret->getType());
+  return CGF.Builder.CreateBitCast(V, OrigTy);
 }
 
 bool MicrosoftCXXABI::requiresArrayCookie(const CXXDeleteExpr *expr,
@@ -2068,37 +2133,34 @@ CharUnits MicrosoftCXXABI::getArrayCookieSizeImpl(QualType type) {
 }
 
 llvm::Value *MicrosoftCXXABI::readArrayCookieImpl(CodeGenFunction &CGF,
-                                                  llvm::Value *allocPtr,
+                                                  Address allocPtr,
                                                   CharUnits cookieSize) {
-  unsigned AS = allocPtr->getType()->getPointerAddressSpace();
-  llvm::Value *numElementsPtr =
-    CGF.Builder.CreateBitCast(allocPtr, CGF.SizeTy->getPointerTo(AS));
+  Address numElementsPtr =
+    CGF.Builder.CreateElementBitCast(allocPtr, CGF.SizeTy);
   return CGF.Builder.CreateLoad(numElementsPtr);
 }
 
-llvm::Value* MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
-                                                    llvm::Value *newPtr,
-                                                    llvm::Value *numElements,
-                                                    const CXXNewExpr *expr,
-                                                    QualType elementType) {
+Address MicrosoftCXXABI::InitializeArrayCookie(CodeGenFunction &CGF,
+                                               Address newPtr,
+                                               llvm::Value *numElements,
+                                               const CXXNewExpr *expr,
+                                               QualType elementType) {
   assert(requiresArrayCookie(expr));
 
   // The size of the cookie.
   CharUnits cookieSize = getArrayCookieSizeImpl(elementType);
 
   // Compute an offset to the cookie.
-  llvm::Value *cookiePtr = newPtr;
+  Address cookiePtr = newPtr;
 
   // Write the number of elements into the appropriate slot.
-  unsigned AS = newPtr->getType()->getPointerAddressSpace();
-  llvm::Value *numElementsPtr
-    = CGF.Builder.CreateBitCast(cookiePtr, CGF.SizeTy->getPointerTo(AS));
+  Address numElementsPtr
+    = CGF.Builder.CreateElementBitCast(cookiePtr, CGF.SizeTy);
   CGF.Builder.CreateStore(numElements, numElementsPtr);
 
   // Finally, compute a pointer to the actual data buffer by skipping
   // over the cookie completely.
-  return CGF.Builder.CreateConstInBoundsGEP1_64(newPtr,
-                                                cookieSize.getQuantity());
+  return CGF.Builder.CreateConstInBoundsByteGEP(newPtr, cookieSize);
 }
 
 static void emitGlobalDtorWithTLRegDtor(CodeGenFunction &CGF, const VarDecl &VD,
@@ -2130,11 +2192,9 @@ void MicrosoftCXXABI::registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D,
 }
 
 void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
-    CodeGenModule &CGM,
-    ArrayRef<std::pair<const VarDecl *, llvm::GlobalVariable *>>
-        CXXThreadLocals,
+    CodeGenModule &CGM, ArrayRef<const VarDecl *> CXXThreadLocals,
     ArrayRef<llvm::Function *> CXXThreadLocalInits,
-    ArrayRef<llvm::GlobalVariable *> CXXThreadLocalInitVars) {
+    ArrayRef<const VarDecl *> CXXThreadLocalInitVars) {
   // This will create a GV in the .CRT$XDU section.  It will point to our
   // initialization function.  The CRT will call all of these function
   // pointers at start-up time and, eventually, at thread-creation time.
@@ -2152,7 +2212,8 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
 
   std::vector<llvm::Function *> NonComdatInits;
   for (size_t I = 0, E = CXXThreadLocalInitVars.size(); I != E; ++I) {
-    llvm::GlobalVariable *GV = CXXThreadLocalInitVars[I];
+    llvm::GlobalVariable *GV = cast<llvm::GlobalVariable>(
+        CGM.GetGlobalValue(CGM.getMangledName(CXXThreadLocalInitVars[I])));
     llvm::Function *F = CXXThreadLocalInits[I];
 
     // If the GV is already in a comdat group, then we have to join it.
@@ -2166,8 +2227,8 @@ void MicrosoftCXXABI::EmitThreadLocalInitFuncs(
     llvm::FunctionType *FTy =
         llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
     llvm::Function *InitFunc = CGM.CreateGlobalInitOrDestructFunction(
-        FTy, "__tls_init", SourceLocation(),
-        /*TLS=*/true);
+        FTy, "__tls_init", CGM.getTypes().arrangeNullaryFunction(),
+        SourceLocation(), /*TLS=*/true);
     CodeGenFunction(CGM).GenerateCXXGlobalInitFunc(InitFunc, NonComdatInits);
 
     AddToXDU(InitFunc);
@@ -2181,17 +2242,18 @@ LValue MicrosoftCXXABI::EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF,
   return LValue();
 }
 
-static llvm::GlobalVariable *getInitThreadEpochPtr(CodeGenModule &CGM) {
+static ConstantAddress getInitThreadEpochPtr(CodeGenModule &CGM) {
   StringRef VarName("_Init_thread_epoch");
+  CharUnits Align = CGM.getIntAlign();
   if (auto *GV = CGM.getModule().getNamedGlobal(VarName))
-    return GV;
+    return ConstantAddress(GV, Align);
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), CGM.IntTy,
       /*Constant=*/false, llvm::GlobalVariable::ExternalLinkage,
       /*Initializer=*/nullptr, VarName,
       /*InsertBefore=*/nullptr, llvm::GlobalVariable::GeneralDynamicTLSModel);
-  GV->setAlignment(CGM.getTarget().getIntAlign() / 8);
-  return GV;
+  GV->setAlignment(Align.getQuantity());
+  return ConstantAddress(GV, Align);
 }
 
 static llvm::Constant *getInitThreadHeaderFn(CodeGenModule &CGM) {
@@ -2228,10 +2290,10 @@ static llvm::Constant *getInitThreadAbortFn(CodeGenModule &CGM) {
 }
 
 namespace {
-struct ResetGuardBit : EHScopeStack::Cleanup {
-  llvm::GlobalVariable *Guard;
+struct ResetGuardBit final : EHScopeStack::Cleanup {
+  Address Guard;
   unsigned GuardNum;
-  ResetGuardBit(llvm::GlobalVariable *Guard, unsigned GuardNum)
+  ResetGuardBit(Address Guard, unsigned GuardNum)
       : Guard(Guard), GuardNum(GuardNum) {}
 
   void Emit(CodeGenFunction &CGF, Flags flags) override {
@@ -2245,9 +2307,9 @@ struct ResetGuardBit : EHScopeStack::Cleanup {
   }
 };
 
-struct CallInitThreadAbort : EHScopeStack::Cleanup {
-  llvm::GlobalVariable *Guard;
-  CallInitThreadAbort(llvm::GlobalVariable *Guard) : Guard(Guard) {}
+struct CallInitThreadAbort final : EHScopeStack::Cleanup {
+  llvm::Value *Guard;
+  CallInitThreadAbort(Address Guard) : Guard(Guard.getPointer()) {}
 
   void Emit(CodeGenFunction &CGF, Flags flags) override {
     // Calling _Init_thread_abort will reset the guard's state.
@@ -2280,6 +2342,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
   CGBuilderTy &Builder = CGF.Builder;
   llvm::IntegerType *GuardTy = CGF.Int32Ty;
   llvm::ConstantInt *Zero = llvm::ConstantInt::get(GuardTy, 0);
+  CharUnits GuardAlign = CharUnits::fromQuantity(4);
 
   // Get the guard variable for this function if we have one already.
   GuardInfo *GI = nullptr;
@@ -2320,7 +2383,6 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                                                                Out);
       else
         getMangleContext().mangleStaticGuardVariable(&D, Out);
-      Out.flush();
     }
 
     // Create the guard variable with a zero-initializer. Just absorb linkage,
@@ -2330,6 +2392,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
                                  GV->getLinkage(), Zero, GuardName.str());
     GuardVar->setVisibility(GV->getVisibility());
     GuardVar->setDLLStorageClass(GV->getDLLStorageClass());
+    GuardVar->setAlignment(GuardAlign.getQuantity());
     if (GuardVar->isWeakForLinker())
       GuardVar->setComdat(
           CGM.getModule().getOrInsertComdat(GuardVar->getName()));
@@ -2339,6 +2402,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
       GI->Guard = GuardVar;
   }
 
+  ConstantAddress GuardAddr(GuardVar, GuardAlign);
+
   assert(GuardVar->getLinkage() == GV->getLinkage() &&
          "static local from the same function had different linkage");
 
@@ -2351,7 +2416,7 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
 
     // Test our bit from the guard variable.
     llvm::ConstantInt *Bit = llvm::ConstantInt::get(GuardTy, 1U << GuardNum);
-    llvm::LoadInst *LI = Builder.CreateLoad(GuardVar);
+    llvm::LoadInst *LI = Builder.CreateLoad(GuardAddr);
     llvm::Value *IsInitialized =
         Builder.CreateICmpNE(Builder.CreateAnd(LI, Bit), Zero);
     llvm::BasicBlock *InitBlock = CGF.createBasicBlock("init");
@@ -2361,8 +2426,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
     // Set our bit in the guard variable and emit the initializer and add a global
     // destructor if appropriate.
     CGF.EmitBlock(InitBlock);
-    Builder.CreateStore(Builder.CreateOr(LI, Bit), GuardVar);
-    CGF.EHStack.pushCleanup<ResetGuardBit>(EHCleanup, GuardVar, GuardNum);
+    Builder.CreateStore(Builder.CreateOr(LI, Bit), GuardAddr);
+    CGF.EHStack.pushCleanup<ResetGuardBit>(EHCleanup, GuardAddr, GuardNum);
     CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
     CGF.PopCleanupBlock();
     Builder.CreateBr(EndBlock);
@@ -2382,11 +2447,8 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
     // The algorithm is almost identical to what can be found in the appendix
     // found in N2325.
 
-    unsigned IntAlign = CGM.getTarget().getIntAlign() / 8;
-
     // This BasicBLock determines whether or not we have any work to do.
-    llvm::LoadInst *FirstGuardLoad =
-        Builder.CreateAlignedLoad(GuardVar, IntAlign);
+    llvm::LoadInst *FirstGuardLoad = Builder.CreateLoad(GuardAddr);
     FirstGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
     llvm::LoadInst *InitThreadEpoch =
         Builder.CreateLoad(getInitThreadEpochPtr(CGM));
@@ -2399,9 +2461,9 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
     // This BasicBlock attempts to determine whether or not this thread is
     // responsible for doing the initialization.
     CGF.EmitBlock(AttemptInitBlock);
-    CGF.EmitNounwindRuntimeCall(getInitThreadHeaderFn(CGM), GuardVar);
-    llvm::LoadInst *SecondGuardLoad =
-        Builder.CreateAlignedLoad(GuardVar, IntAlign);
+    CGF.EmitNounwindRuntimeCall(getInitThreadHeaderFn(CGM),
+                                GuardAddr.getPointer());
+    llvm::LoadInst *SecondGuardLoad = Builder.CreateLoad(GuardAddr);
     SecondGuardLoad->setOrdering(llvm::AtomicOrdering::Unordered);
     llvm::Value *ShouldDoInit =
         Builder.CreateICmpEQ(SecondGuardLoad, getAllOnesInt());
@@ -2410,10 +2472,11 @@ void MicrosoftCXXABI::EmitGuardedInit(CodeGenFunction &CGF, const VarDecl &D,
 
     // Ok, we ended up getting selected as the initializing thread.
     CGF.EmitBlock(InitBlock);
-    CGF.EHStack.pushCleanup<CallInitThreadAbort>(EHCleanup, GuardVar);
+    CGF.EHStack.pushCleanup<CallInitThreadAbort>(EHCleanup, GuardAddr);
     CGF.EmitCXXGlobalVarDeclInit(D, GV, PerformInit);
     CGF.PopCleanupBlock();
-    CGF.EmitNounwindRuntimeCall(getInitThreadFooterFn(CGM), GuardVar);
+    CGF.EmitNounwindRuntimeCall(getInitThreadFooterFn(CGM),
+                                GuardAddr.getPointer());
     Builder.CreateBr(EndBlock);
 
     CGF.EmitBlock(EndBlock);
@@ -2768,19 +2831,28 @@ bool MicrosoftCXXABI::MemberPointerConstantIsNull(const MemberPointerType *MPT,
 
 llvm::Value *
 MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
-                                         llvm::Value *This,
+                                         Address This,
                                          llvm::Value *VBPtrOffset,
                                          llvm::Value *VBTableOffset,
                                          llvm::Value **VBPtrOut) {
   CGBuilderTy &Builder = CGF.Builder;
   // Load the vbtable pointer from the vbptr in the instance.
-  This = Builder.CreateBitCast(This, CGM.Int8PtrTy);
+  This = Builder.CreateElementBitCast(This, CGM.Int8Ty);
   llvm::Value *VBPtr =
-    Builder.CreateInBoundsGEP(This, VBPtrOffset, "vbptr");
+    Builder.CreateInBoundsGEP(This.getPointer(), VBPtrOffset, "vbptr");
   if (VBPtrOut) *VBPtrOut = VBPtr;
   VBPtr = Builder.CreateBitCast(VBPtr,
-                                CGM.Int32Ty->getPointerTo(0)->getPointerTo(0));
-  llvm::Value *VBTable = Builder.CreateLoad(VBPtr, "vbtable");
+            CGM.Int32Ty->getPointerTo(0)->getPointerTo(This.getAddressSpace()));
+
+  CharUnits VBPtrAlign;
+  if (auto CI = dyn_cast<llvm::ConstantInt>(VBPtrOffset)) {
+    VBPtrAlign = This.getAlignment().alignmentAtOffset(
+                                   CharUnits::fromQuantity(CI->getSExtValue()));
+  } else {
+    VBPtrAlign = CGF.getPointerAlign();
+  }
+
+  llvm::Value *VBTable = Builder.CreateAlignedLoad(VBPtr, VBPtrAlign, "vbtable");
 
   // Translate from byte offset to table index. It improves analyzability.
   llvm::Value *VBTableIndex = Builder.CreateAShr(
@@ -2790,16 +2862,17 @@ MicrosoftCXXABI::GetVBaseOffsetFromVBPtr(CodeGenFunction &CGF,
   // Load an i32 offset from the vb-table.
   llvm::Value *VBaseOffs = Builder.CreateInBoundsGEP(VBTable, VBTableIndex);
   VBaseOffs = Builder.CreateBitCast(VBaseOffs, CGM.Int32Ty->getPointerTo(0));
-  return Builder.CreateLoad(VBaseOffs, "vbase_offs");
+  return Builder.CreateAlignedLoad(VBaseOffs, CharUnits::fromQuantity(4),
+                                   "vbase_offs");
 }
 
 // Returns an adjusted base cast to i8*, since we do more address arithmetic on
 // it.
 llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
     CodeGenFunction &CGF, const Expr *E, const CXXRecordDecl *RD,
-    llvm::Value *Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
+    Address Base, llvm::Value *VBTableOffset, llvm::Value *VBPtrOffset) {
   CGBuilderTy &Builder = CGF.Builder;
-  Base = Builder.CreateBitCast(Base, CGM.Int8PtrTy);
+  Base = Builder.CreateElementBitCast(Base, CGM.Int8Ty);
   llvm::BasicBlock *OriginalBB = nullptr;
   llvm::BasicBlock *SkipAdjustBB = nullptr;
   llvm::BasicBlock *VBaseAdjustBB = nullptr;
@@ -2844,7 +2917,7 @@ llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
     Builder.CreateBr(SkipAdjustBB);
     CGF.EmitBlock(SkipAdjustBB);
     llvm::PHINode *Phi = Builder.CreatePHI(CGM.Int8PtrTy, 2, "memptr.base");
-    Phi->addIncoming(Base, OriginalBB);
+    Phi->addIncoming(Base.getPointer(), OriginalBB);
     Phi->addIncoming(AdjustedBase, VBaseAdjustBB);
     return Phi;
   }
@@ -2852,10 +2925,10 @@ llvm::Value *MicrosoftCXXABI::AdjustVirtualBase(
 }
 
 llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
-    CodeGenFunction &CGF, const Expr *E, llvm::Value *Base, llvm::Value *MemPtr,
+    CodeGenFunction &CGF, const Expr *E, Address Base, llvm::Value *MemPtr,
     const MemberPointerType *MPT) {
   assert(MPT->isMemberDataPointer());
-  unsigned AS = Base->getType()->getPointerAddressSpace();
+  unsigned AS = Base.getAddressSpace();
   llvm::Type *PType =
       CGF.ConvertTypeForMem(MPT->getPointeeType())->getPointerTo(AS);
   CGBuilderTy &Builder = CGF.Builder;
@@ -2877,17 +2950,19 @@ llvm::Value *MicrosoftCXXABI::EmitMemberDataPointerAddress(
       VirtualBaseAdjustmentOffset = Builder.CreateExtractValue(MemPtr, I++);
   }
 
+  llvm::Value *Addr;
   if (VirtualBaseAdjustmentOffset) {
-    Base = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
+    Addr = AdjustVirtualBase(CGF, E, RD, Base, VirtualBaseAdjustmentOffset,
                              VBPtrOffset);
+  } else {
+    Addr = Base.getPointer();
   }
 
   // Cast to char*.
-  Base = Builder.CreateBitCast(Base, Builder.getInt8Ty()->getPointerTo(AS));
+  Addr = Builder.CreateBitCast(Addr, CGF.Int8Ty->getPointerTo(AS));
 
   // Apply the offset, which we assume is non-null.
-  llvm::Value *Addr =
-    Builder.CreateInBoundsGEP(Base, FieldOffset, "memptr.offset");
+  Addr = Builder.CreateInBoundsGEP(Addr, FieldOffset, "memptr.offset");
 
   // Cast the address to the appropriate pointer type, adopting the address
   // space of the base pointer.
@@ -3050,7 +3125,8 @@ llvm::Value *MicrosoftCXXABI::EmitNonNullMemberPointerConversion(
       } else {
         llvm::Value *Idxs[] = {getZeroInt(), VBIndex};
         VirtualBaseAdjustmentOffset =
-            Builder.CreateLoad(Builder.CreateInBoundsGEP(VDispMap, Idxs));
+            Builder.CreateAlignedLoad(Builder.CreateInBoundsGEP(VDispMap, Idxs),
+                                      CharUnits::fromQuantity(4));
       }
 
       DstVBIndexEqZero =
@@ -3131,7 +3207,7 @@ llvm::Constant *MicrosoftCXXABI::EmitMemberPointerConversion(
   if (CK == CK_ReinterpretMemberPointer)
     return Src;
 
-  CGBuilderTy Builder(CGM.getLLVMContext());
+  CGBuilderTy Builder(CGM, CGM.getLLVMContext());
   auto *Dst = cast<llvm::Constant>(EmitNonNullMemberPointerConversion(
       SrcTy, DstTy, CK, PathBegin, PathEnd, Src, Builder));
 
@@ -3139,15 +3215,15 @@ llvm::Constant *MicrosoftCXXABI::EmitMemberPointerConversion(
 }
 
 llvm::Value *MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
-    CodeGenFunction &CGF, const Expr *E, llvm::Value *&This,
-    llvm::Value *MemPtr, const MemberPointerType *MPT) {
+    CodeGenFunction &CGF, const Expr *E, Address This,
+    llvm::Value *&ThisPtrForCall, llvm::Value *MemPtr,
+    const MemberPointerType *MPT) {
   assert(MPT->isMemberFunctionPointer());
   const FunctionProtoType *FPT =
     MPT->getPointeeType()->castAs<FunctionProtoType>();
   const CXXRecordDecl *RD = MPT->getMostRecentCXXRecordDecl();
-  llvm::FunctionType *FTy =
-    CGM.getTypes().GetFunctionType(
-      CGM.getTypes().arrangeCXXMethodType(RD, FPT));
+  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(
+      CGM.getTypes().arrangeCXXMethodType(RD, FPT, /*FD=*/nullptr));
   CGBuilderTy &Builder = CGF.Builder;
 
   MSInheritanceAttr::Spelling Inheritance = RD->getMSInheritanceModel();
@@ -3171,15 +3247,18 @@ llvm::Value *MicrosoftCXXABI::EmitLoadOfMemberFunctionPointer(
   }
 
   if (VirtualBaseAdjustmentOffset) {
-    This = AdjustVirtualBase(CGF, E, RD, This, VirtualBaseAdjustmentOffset,
-                             VBPtrOffset);
+    ThisPtrForCall = AdjustVirtualBase(CGF, E, RD, This,
+                                   VirtualBaseAdjustmentOffset, VBPtrOffset);
+  } else {
+    ThisPtrForCall = This.getPointer();
   }
 
   if (NonVirtualBaseAdjustment) {
     // Apply the adjustment and cast back to the original struct type.
-    llvm::Value *Ptr = Builder.CreateBitCast(This, Builder.getInt8PtrTy());
+    llvm::Value *Ptr = Builder.CreateBitCast(ThisPtrForCall, CGF.Int8PtrTy);
     Ptr = Builder.CreateInBoundsGEP(Ptr, NonVirtualBaseAdjustment);
-    This = Builder.CreateBitCast(Ptr, This->getType(), "this.adjusted");
+    ThisPtrForCall = Builder.CreateBitCast(Ptr, ThisPtrForCall->getType(),
+                                           "this.adjusted");
   }
 
   return Builder.CreateBitCast(FunctionPointer, FTy->getPointerTo());
@@ -3404,7 +3483,7 @@ llvm::GlobalVariable *MSRTTIBuilder::getClassHierarchyDescriptor() {
   auto Type = ABI.getClassHierarchyDescriptorType();
   auto CHD = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
                                       /*Initializer=*/nullptr,
-                                      StringRef(MangledName));
+                                      MangledName);
   if (CHD->isWeakForLinker())
     CHD->setComdat(CGM.getModule().getOrInsertComdat(CHD->getName()));
 
@@ -3442,7 +3521,7 @@ MSRTTIBuilder::getBaseClassArray(SmallVectorImpl<MSRTTIClass> &Classes) {
   auto *BCA =
       new llvm::GlobalVariable(Module, ArrType,
                                /*Constant=*/true, Linkage,
-                               /*Initializer=*/nullptr, StringRef(MangledName));
+                               /*Initializer=*/nullptr, MangledName);
   if (BCA->isWeakForLinker())
     BCA->setComdat(CGM.getModule().getOrInsertComdat(BCA->getName()));
 
@@ -3484,7 +3563,7 @@ MSRTTIBuilder::getBaseClassDescriptor(const MSRTTIClass &Class) {
   auto Type = ABI.getBaseClassDescriptorType();
   auto BCD =
       new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
-                               /*Initializer=*/nullptr, StringRef(MangledName));
+                               /*Initializer=*/nullptr, MangledName);
   if (BCD->isWeakForLinker())
     BCD->setComdat(CGM.getModule().getOrInsertComdat(BCD->getName()));
 
@@ -3530,7 +3609,7 @@ MSRTTIBuilder::getCompleteObjectLocator(const VPtrInfo *Info) {
   // Forward-declare the complete object locator.
   llvm::StructType *Type = ABI.getCompleteObjectLocatorType();
   auto COL = new llvm::GlobalVariable(Module, Type, /*Constant=*/true, Linkage,
-    /*Initializer=*/nullptr, StringRef(MangledName));
+    /*Initializer=*/nullptr, MangledName);
 
   // Initialize the CompleteObjectLocator.
   llvm::Constant *Fields[] = {
@@ -3582,7 +3661,7 @@ static QualType decomposeTypeForEH(ASTContext &Context, QualType T,
   return T;
 }
 
-llvm::Constant *
+CatchTypeInfo
 MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
                                               QualType CatchHandlerType) {
   // TypeDescriptors for exceptions never have qualified pointer types,
@@ -3601,28 +3680,8 @@ MicrosoftCXXABI::getAddrOfCXXCatchHandlerType(QualType Type,
   if (IsReference)
     Flags |= 8;
 
-  SmallString<256> MangledName;
-  {
-    llvm::raw_svector_ostream Out(MangledName);
-    getMangleContext().mangleCXXCatchHandlerType(Type, Flags, Out);
-  }
-
-  if (llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(MangledName))
-    return llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy);
-
-  llvm::Constant *Fields[] = {
-      llvm::ConstantInt::get(CGM.IntTy, Flags), // Flags
-      getAddrOfRTTIDescriptor(Type),            // TypeDescriptor
-  };
-  llvm::StructType *CatchHandlerTypeType = getCatchHandlerTypeType();
-  auto *Var = new llvm::GlobalVariable(
-      CGM.getModule(), CatchHandlerTypeType, /*Constant=*/true,
-      llvm::GlobalValue::PrivateLinkage,
-      llvm::ConstantStruct::get(CatchHandlerTypeType, Fields),
-      StringRef(MangledName));
-  Var->setUnnamedAddr(true);
-  Var->setSection("llvm.metadata");
-  return Var;
+  return CatchTypeInfo{getAddrOfRTTIDescriptor(Type)->stripPointerCasts(),
+                       Flags};
 }
 
 /// \brief Gets a TypeDescriptor.  Returns a llvm::Constant * rather than a
@@ -3658,7 +3717,7 @@ llvm::Constant *MicrosoftCXXABI::getAddrOfRTTIDescriptor(QualType Type) {
       CGM.getModule(), TypeDescriptorType, /*Constant=*/false,
       getLinkageForRTTI(Type),
       llvm::ConstantStruct::get(TypeDescriptorType, Fields),
-      StringRef(MangledName));
+      MangledName);
   if (Var->isWeakForLinker())
     Var->setComdat(CGM.getModule().getOrInsertComdat(Var->getName()));
   return llvm::ConstantExpr::getBitCast(Var, CGM.Int8PtrTy);
@@ -3725,7 +3784,6 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
   SmallString<256> ThunkName;
   llvm::raw_svector_ostream Out(ThunkName);
   getMangleContext().mangleCXXCtor(CD, CT, Out);
-  Out.flush();
 
   // If the thunk has been generated previously, just return it.
   if (llvm::GlobalValue *GV = CGM.getModule().getNamedValue(ThunkName))
@@ -3803,9 +3861,7 @@ MicrosoftCXXABI::getAddrOfCXXCtorClosure(const CXXConstructorDecl *CD,
   CodeGenFunction::RunCleanupsScope Cleanups(CGF);
 
   const auto *FPT = CD->getType()->castAs<FunctionProtoType>();
-  ConstExprIterator ArgBegin(ArgVec.data()),
-      ArgEnd(ArgVec.data() + ArgVec.size());
-  CGF.EmitCallArgs(Args, FPT, ArgBegin, ArgEnd, CD, IsCopy ? 1 : 0);
+  CGF.EmitCallArgs(Args, FPT, llvm::makeArrayRef(ArgVec), CD, IsCopy ? 1 : 0);
 
   // Insert any ABI-specific implicit constructor arguments.
   unsigned ExtraArgs = addImplicitConstructorArgs(CGF, CD, Ctor_Complete,
@@ -3904,7 +3960,7 @@ llvm::Constant *MicrosoftCXXABI::getCatchableType(QualType T,
   llvm::StructType *CTType = getCatchableTypeType();
   auto *GV = new llvm::GlobalVariable(
       CGM.getModule(), CTType, /*Constant=*/true, getLinkageForRTTI(T),
-      llvm::ConstantStruct::get(CTType, Fields), StringRef(MangledName));
+      llvm::ConstantStruct::get(CTType, Fields), MangledName);
   GV->setUnnamedAddr(true);
   GV->setSection(".xdata");
   if (GV->isWeakForLinker())
@@ -4022,7 +4078,7 @@ llvm::GlobalVariable *MicrosoftCXXABI::getCatchableTypeArray(QualType T) {
   }
   CTA = new llvm::GlobalVariable(
       CGM.getModule(), CTAType, /*Constant=*/true, getLinkageForRTTI(T),
-      llvm::ConstantStruct::get(CTAType, Fields), StringRef(MangledName));
+      llvm::ConstantStruct::get(CTAType, Fields), MangledName);
   CTA->setUnnamedAddr(true);
   CTA->setSection(".xdata");
   if (CTA->isWeakForLinker())
@@ -4102,7 +4158,7 @@ void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
   QualType ThrowType = SubExpr->getType();
   // The exception object lives on the stack and it's address is passed to the
   // runtime function.
-  llvm::AllocaInst *AI = CGF.CreateMemTemp(ThrowType);
+  Address AI = CGF.CreateMemTemp(ThrowType);
   CGF.EmitAnyExprToMem(SubExpr, AI, ThrowType.getQualifiers(),
                        /*IsInit=*/true);
 
@@ -4111,6 +4167,9 @@ void MicrosoftCXXABI::emitThrow(CodeGenFunction &CGF, const CXXThrowExpr *E) {
   llvm::GlobalVariable *TI = getThrowInfo(ThrowType);
 
   // Call into the runtime to throw the exception.
-  llvm::Value *Args[] = {CGF.Builder.CreateBitCast(AI, CGM.Int8PtrTy), TI};
+  llvm::Value *Args[] = {
+    CGF.Builder.CreateBitCast(AI.getPointer(), CGM.Int8PtrTy),
+    TI
+  };
   CGF.EmitNoreturnRuntimeCallOrInvoke(getThrowFn(), Args);
 }
diff --git a/lib/CodeGen/ModuleBuilder.cpp b/lib/CodeGen/ModuleBuilder.cpp
index def56a963126..0be5c5592b22 100644
--- a/lib/CodeGen/ModuleBuilder.cpp
+++ b/lib/CodeGen/ModuleBuilder.cpp
@@ -30,7 +30,6 @@ using namespace clang;
 namespace {
   class CodeGeneratorImpl : public CodeGenerator {
     DiagnosticsEngine &Diags;
-    std::unique_ptr<const llvm::DataLayout> TD;
     ASTContext *Ctx;
     const HeaderSearchOptions &HeaderSearchOpts; // Only used for debug info.
     const PreprocessorOptions &PreprocessorOpts; // Only used for debug info.
@@ -99,14 +98,10 @@ namespace {
       Ctx = &Context;
 
       M->setTargetTriple(Ctx->getTargetInfo().getTriple().getTriple());
-      M->setDataLayout(Ctx->getTargetInfo().getTargetDescription());
-      TD.reset(
-          new llvm::DataLayout(Ctx->getTargetInfo().getTargetDescription()));
-      Builder.reset(new CodeGen::CodeGenModule(Context,
-                                               HeaderSearchOpts,
-                                               PreprocessorOpts,
-                                               CodeGenOpts, *M, *TD,
-                                               Diags, CoverageInfo));
+      M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+      Builder.reset(new CodeGen::CodeGenModule(Context, HeaderSearchOpts,
+                                               PreprocessorOpts, CodeGenOpts,
+                                               *M, Diags, CoverageInfo));
 
       for (size_t i = 0, e = CodeGenOpts.DependentLibraries.size(); i < e; ++i)
         HandleDependentLibrary(CodeGenOpts.DependentLibraries[i]);
@@ -180,7 +175,7 @@ namespace {
 
       // For MSVC compatibility, treat declarations of static data members with
       // inline initializers as definitions.
-      if (Ctx->getLangOpts().MSVCCompat) {
+      if (Ctx->getTargetInfo().getCXXABI().isMicrosoft()) {
         for (Decl *Member : D->decls()) {
           if (VarDecl *VD = dyn_cast<VarDecl>(Member)) {
             if (Ctx->isMSStaticDataMemberInlineDefinition(VD) &&
diff --git a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
index 9c9b1234a66f..b397eb352a60 100644
--- a/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
+++ b/lib/CodeGen/ObjectFilePCHContainerOperations.cpp
@@ -18,6 +18,9 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/CodeGen/BackendUtil.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/HeaderSearch.h"
 #include "clang/Serialization/ASTWriter.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Bitcode/BitstreamReader.h"
@@ -30,6 +33,7 @@
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/TargetRegistry.h"
 #include <memory>
+
 using namespace clang;
 
 #define DEBUG_TYPE "pchcontainer"
@@ -39,6 +43,7 @@ class PCHContainerGenerator : public ASTConsumer {
   DiagnosticsEngine &Diags;
   const std::string MainFileName;
   ASTContext *Ctx;
+  ModuleMap &MMap;
   const HeaderSearchOptions &HeaderSearchOpts;
   const PreprocessorOptions &PreprocessorOpts;
   CodeGenOptions CodeGenOpts;
@@ -50,34 +55,139 @@ class PCHContainerGenerator : public ASTConsumer {
   raw_pwrite_stream *OS;
   std::shared_ptr<PCHBuffer> Buffer;
 
+  /// Visit every type and emit debug info for it.
+  struct DebugTypeVisitor : public RecursiveASTVisitor<DebugTypeVisitor> {
+    clang::CodeGen::CGDebugInfo &DI;
+    ASTContext &Ctx;
+    DebugTypeVisitor(clang::CodeGen::CGDebugInfo &DI, ASTContext &Ctx)
+        : DI(DI), Ctx(Ctx) {}
+
+    /// Determine whether this type can be represented in DWARF.
+    static bool CanRepresent(const Type *Ty) {
+      return !Ty->isDependentType() && !Ty->isUndeducedType();
+    }
+
+    bool VisitImportDecl(ImportDecl *D) {
+      auto *Import = cast<ImportDecl>(D);
+      if (!Import->getImportedOwningModule())
+        DI.EmitImportDecl(*Import);
+      return true;
+    }
+
+    bool VisitTypeDecl(TypeDecl *D) {
+      QualType QualTy = Ctx.getTypeDeclType(D);
+      if (!QualTy.isNull() && CanRepresent(QualTy.getTypePtr()))
+        DI.getOrCreateStandaloneType(QualTy, D->getLocation());
+      return true;
+    }
+
+    bool VisitObjCInterfaceDecl(ObjCInterfaceDecl *D) {
+      QualType QualTy(D->getTypeForDecl(), 0);
+      if (!QualTy.isNull() && CanRepresent(QualTy.getTypePtr()))
+        DI.getOrCreateStandaloneType(QualTy, D->getLocation());
+      return true;
+    }
+
+    bool VisitFunctionDecl(FunctionDecl *D) {
+      if (isa<CXXMethodDecl>(D))
+        // This is not yet supported. Constructing the `this' argument
+        // mandates a CodeGenFunction.
+        return true;
+
+      SmallVector<QualType, 16> ArgTypes;
+      for (auto i : D->params())
+        ArgTypes.push_back(i->getType());
+      QualType RetTy = D->getReturnType();
+      QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
+                                          FunctionProtoType::ExtProtoInfo());
+      if (CanRepresent(FnTy.getTypePtr()))
+        DI.EmitFunctionDecl(D, D->getLocation(), FnTy);
+      return true;
+    }
+
+    bool VisitObjCMethodDecl(ObjCMethodDecl *D) {
+      if (!D->getClassInterface())
+        return true;
+
+      bool selfIsPseudoStrong, selfIsConsumed;
+      SmallVector<QualType, 16> ArgTypes;
+      ArgTypes.push_back(D->getSelfType(Ctx, D->getClassInterface(),
+                                        selfIsPseudoStrong, selfIsConsumed));
+      ArgTypes.push_back(Ctx.getObjCSelType());
+      for (auto i : D->params())
+        ArgTypes.push_back(i->getType());
+      QualType RetTy = D->getReturnType();
+      QualType FnTy = Ctx.getFunctionType(RetTy, ArgTypes,
+                                          FunctionProtoType::ExtProtoInfo());
+      if (CanRepresent(FnTy.getTypePtr()))
+        DI.EmitFunctionDecl(D, D->getLocation(), FnTy);
+      return true;
+    }
+  };
+
 public:
-  PCHContainerGenerator(DiagnosticsEngine &diags,
-                        const HeaderSearchOptions &HSO,
-                        const PreprocessorOptions &PPO, const TargetOptions &TO,
-                        const LangOptions &LO, const std::string &MainFileName,
+  PCHContainerGenerator(CompilerInstance &CI, const std::string &MainFileName,
                         const std::string &OutputFileName,
                         raw_pwrite_stream *OS,
                         std::shared_ptr<PCHBuffer> Buffer)
-      : Diags(diags), HeaderSearchOpts(HSO), PreprocessorOpts(PPO),
-        TargetOpts(TO), LangOpts(LO), OS(OS), Buffer(Buffer) {
+      : Diags(CI.getDiagnostics()), Ctx(nullptr),
+        MMap(CI.getPreprocessor().getHeaderSearchInfo().getModuleMap()),
+        HeaderSearchOpts(CI.getHeaderSearchOpts()),
+        PreprocessorOpts(CI.getPreprocessorOpts()),
+        TargetOpts(CI.getTargetOpts()), LangOpts(CI.getLangOpts()), OS(OS),
+        Buffer(Buffer) {
     // The debug info output isn't affected by CodeModel and
     // ThreadModel, but the backend expects them to be nonempty.
     CodeGenOpts.CodeModel = "default";
     CodeGenOpts.ThreadModel = "single";
+    CodeGenOpts.DebugTypeExtRefs = true;
     CodeGenOpts.setDebugInfo(CodeGenOptions::FullDebugInfo);
-    CodeGenOpts.SplitDwarfFile = OutputFileName;
   }
 
-  virtual ~PCHContainerGenerator() {}
+  ~PCHContainerGenerator() override = default;
 
   void Initialize(ASTContext &Context) override {
+    assert(!Ctx && "initialized multiple times");
+
     Ctx = &Context;
     VMContext.reset(new llvm::LLVMContext());
     M.reset(new llvm::Module(MainFileName, *VMContext));
-    M->setDataLayout(Ctx->getTargetInfo().getTargetDescription());
-    Builder.reset(new CodeGen::CodeGenModule(*Ctx, HeaderSearchOpts,
-                                             PreprocessorOpts, CodeGenOpts, *M,
-                                             M->getDataLayout(), Diags));
+    M->setDataLayout(Ctx->getTargetInfo().getDataLayoutString());
+    Builder.reset(new CodeGen::CodeGenModule(
+        *Ctx, HeaderSearchOpts, PreprocessorOpts, CodeGenOpts, *M, Diags));
+    Builder->getModuleDebugInfo()->setModuleMap(MMap);
+  }
+
+  bool HandleTopLevelDecl(DeclGroupRef D) override {
+    if (Diags.hasErrorOccurred())
+      return true;
+
+    // Collect debug info for all decls in this group.
+    for (auto *I : D)
+      if (!I->isFromASTFile()) {
+        DebugTypeVisitor DTV(*Builder->getModuleDebugInfo(), *Ctx);
+        DTV.TraverseDecl(I);
+      }
+    return true;
+  }
+
+  void HandleTopLevelDeclInObjCContainer(DeclGroupRef D) override {
+    HandleTopLevelDecl(D);
+  }
+
+  void HandleTagDeclDefinition(TagDecl *D) override {
+    if (Diags.hasErrorOccurred())
+      return;
+
+    Builder->UpdateCompletedType(D);
+  }
+
+  void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
+    if (Diags.hasErrorOccurred())
+      return;
+
+    if (const RecordDecl *RD = dyn_cast<RecordDecl>(D))
+      Builder->getModuleDebugInfo()->completeRequiredType(RD);
   }
 
   /// Emit a container holding the serialized AST.
@@ -92,7 +202,8 @@ public:
       return;
 
     M->setTargetTriple(Ctx.getTargetInfo().getTriple().getTriple());
-    M->setDataLayout(Ctx.getTargetInfo().getTargetDescription());
+    M->setDataLayout(Ctx.getTargetInfo().getDataLayoutString());
+    Builder->getModuleDebugInfo()->setDwoId(Buffer->Signature);
 
     // Finalize the Builder.
     if (Builder)
@@ -133,15 +244,14 @@ public:
       llvm::SmallString<0> Buffer;
       llvm::raw_svector_ostream OS(Buffer);
       clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                               Ctx.getTargetInfo().getTargetDescription(),
+                               Ctx.getTargetInfo().getDataLayoutString(),
                                M.get(), BackendAction::Backend_EmitLL, &OS);
-      OS.flush();
       llvm::dbgs() << Buffer;
     });
 
     // Use the LLVM backend to emit the pch container.
     clang::EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts,
-                             Ctx.getTargetInfo().getTargetDescription(),
+                             Ctx.getTargetInfo().getDataLayoutString(),
                              M.get(), BackendAction::Backend_EmitObj, OS);
 
     // Make sure the pch container hits disk.
@@ -153,17 +263,15 @@ public:
   }
 };
 
-} // namespace
+} // anonymous namespace
 
 std::unique_ptr<ASTConsumer>
 ObjectFilePCHContainerWriter::CreatePCHContainerGenerator(
-    DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
-    const PreprocessorOptions &PPO, const TargetOptions &TO,
-    const LangOptions &LO, const std::string &MainFileName,
+    CompilerInstance &CI, const std::string &MainFileName,
     const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
     std::shared_ptr<PCHBuffer> Buffer) const {
-  return llvm::make_unique<PCHContainerGenerator>(
-      Diags, HSO, PPO, TO, LO, MainFileName, OutputFileName, OS, Buffer);
+  return llvm::make_unique<PCHContainerGenerator>(CI, MainFileName,
+                                                  OutputFileName, OS, Buffer);
 }
 
 void ObjectFilePCHContainerReader::ExtractPCH(
@@ -189,5 +297,4 @@ void ObjectFilePCHContainerReader::ExtractPCH(
   // As a fallback, treat the buffer as a raw AST.
   StreamFile.init((const unsigned char *)Buffer.getBufferStart(),
                   (const unsigned char *)Buffer.getBufferEnd());
-  return;
 }
diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp
index 25bd7335fadb..4566fdbebf88 100644
--- a/lib/CodeGen/TargetInfo.cpp
+++ b/lib/CodeGen/TargetInfo.cpp
@@ -39,7 +39,7 @@ static void AssignToArrayRange(CodeGen::CGBuilderTy &Builder,
   for (unsigned I = FirstIndex; I <= LastIndex; ++I) {
     llvm::Value *Cell =
         Builder.CreateConstInBoundsGEP1_32(Builder.getInt8Ty(), Array, I);
-    Builder.CreateStore(Value, Cell);
+    Builder.CreateAlignedStore(Value, Cell, CharUnits::One());
   }
 }
 
@@ -48,6 +48,24 @@ static bool isAggregateTypeForABI(QualType T) {
          T->isMemberFunctionPointerType();
 }
 
+ABIArgInfo
+ABIInfo::getNaturalAlignIndirect(QualType Ty, bool ByRef, bool Realign,
+                                 llvm::Type *Padding) const {
+  return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty),
+                                 ByRef, Realign, Padding);
+}
+
+ABIArgInfo
+ABIInfo::getNaturalAlignIndirectInReg(QualType Ty, bool Realign) const {
+  return ABIArgInfo::getIndirectInReg(getContext().getTypeAlignInChars(Ty),
+                                      /*ByRef*/ false, Realign);
+}
+
+Address ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                             QualType Ty) const {
+  return Address::invalid();
+}
+
 ABIInfo::~ABIInfo() {}
 
 static CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT,
@@ -133,7 +151,7 @@ void ABIArgInfo::dump() const {
     OS << "InAlloca Offset=" << getInAllocaFieldIndex();
     break;
   case Indirect:
-    OS << "Indirect Align=" << getIndirectAlign()
+    OS << "Indirect Align=" << getIndirectAlign().getQuantity()
        << " ByVal=" << getIndirectByVal()
        << " Realign=" << getIndirectRealign();
     break;
@@ -144,6 +162,135 @@ void ABIArgInfo::dump() const {
   OS << ")\n";
 }
 
+// Dynamically round a pointer up to a multiple of the given alignment.
+static llvm::Value *emitRoundPointerUpToAlignment(CodeGenFunction &CGF,
+                                                  llvm::Value *Ptr,
+                                                  CharUnits Align) {
+  llvm::Value *PtrAsInt = Ptr;
+  // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align;
+  PtrAsInt = CGF.Builder.CreatePtrToInt(PtrAsInt, CGF.IntPtrTy);
+  PtrAsInt = CGF.Builder.CreateAdd(PtrAsInt,
+        llvm::ConstantInt::get(CGF.IntPtrTy, Align.getQuantity() - 1));
+  PtrAsInt = CGF.Builder.CreateAnd(PtrAsInt,
+           llvm::ConstantInt::get(CGF.IntPtrTy, -Align.getQuantity()));
+  PtrAsInt = CGF.Builder.CreateIntToPtr(PtrAsInt,
+                                        Ptr->getType(),
+                                        Ptr->getName() + ".aligned");
+  return PtrAsInt;
+}
+
+/// Emit va_arg for a platform using the common void* representation,
+/// where arguments are simply emitted in an array of slots on the stack.
+///
+/// This version implements the core direct-value passing rules.
+///
+/// \param SlotSize - The size and alignment of a stack slot.
+///   Each argument will be allocated to a multiple of this number of
+///   slots, and all the slots will be aligned to this value.
+/// \param AllowHigherAlign - The slot alignment is not a cap;
+///   an argument type with an alignment greater than the slot size
+///   will be emitted on a higher-alignment address, potentially
+///   leaving one or more empty slots behind as padding.  If this
+///   is false, the returned address might be less-aligned than
+///   DirectAlign.
+static Address emitVoidPtrDirectVAArg(CodeGenFunction &CGF,
+                                      Address VAListAddr,
+                                      llvm::Type *DirectTy,
+                                      CharUnits DirectSize,
+                                      CharUnits DirectAlign,
+                                      CharUnits SlotSize,
+                                      bool AllowHigherAlign) {
+  // Cast the element type to i8* if necessary.  Some platforms define
+  // va_list as a struct containing an i8* instead of just an i8*.
+  if (VAListAddr.getElementType() != CGF.Int8PtrTy)
+    VAListAddr = CGF.Builder.CreateElementBitCast(VAListAddr, CGF.Int8PtrTy);
+
+  llvm::Value *Ptr = CGF.Builder.CreateLoad(VAListAddr, "argp.cur");
+
+  // If the CC aligns values higher than the slot size, do so if needed.
+  Address Addr = Address::invalid();
+  if (AllowHigherAlign && DirectAlign > SlotSize) {
+    Addr = Address(emitRoundPointerUpToAlignment(CGF, Ptr, DirectAlign),
+                                                 DirectAlign);
+  } else {
+    Addr = Address(Ptr, SlotSize); 
+  }
+
+  // Advance the pointer past the argument, then store that back.
+  CharUnits FullDirectSize = DirectSize.RoundUpToAlignment(SlotSize);
+  llvm::Value *NextPtr =
+    CGF.Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), FullDirectSize,
+                                           "argp.next");
+  CGF.Builder.CreateStore(NextPtr, VAListAddr);
+
+  // If the argument is smaller than a slot, and this is a big-endian
+  // target, the argument will be right-adjusted in its slot.
+  if (DirectSize < SlotSize && CGF.CGM.getDataLayout().isBigEndian()) {
+    Addr = CGF.Builder.CreateConstInBoundsByteGEP(Addr, SlotSize - DirectSize);
+  }
+
+  Addr = CGF.Builder.CreateElementBitCast(Addr, DirectTy);
+  return Addr;
+}
+
+/// Emit va_arg for a platform using the common void* representation,
+/// where arguments are simply emitted in an array of slots on the stack.
+///
+/// \param IsIndirect - Values of this type are passed indirectly.
+/// \param ValueInfo - The size and alignment of this type, generally
+///   computed with getContext().getTypeInfoInChars(ValueTy).
+/// \param SlotSizeAndAlign - The size and alignment of a stack slot.
+///   Each argument will be allocated to a multiple of this number of
+///   slots, and all the slots will be aligned to this value.
+/// \param AllowHigherAlign - The slot alignment is not a cap;
+///   an argument type with an alignment greater than the slot size
+///   will be emitted on a higher-alignment address, potentially
+///   leaving one or more empty slots behind as padding.
+static Address emitVoidPtrVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                QualType ValueTy, bool IsIndirect,
+                                std::pair<CharUnits, CharUnits> ValueInfo,
+                                CharUnits SlotSizeAndAlign,
+                                bool AllowHigherAlign) {
+  // The size and alignment of the value that was passed directly.
+  CharUnits DirectSize, DirectAlign;
+  if (IsIndirect) {
+    DirectSize = CGF.getPointerSize();
+    DirectAlign = CGF.getPointerAlign();
+  } else {
+    DirectSize = ValueInfo.first;
+    DirectAlign = ValueInfo.second;
+  }
+
+  // Cast the address we've calculated to the right type.
+  llvm::Type *DirectTy = CGF.ConvertTypeForMem(ValueTy);
+  if (IsIndirect)
+    DirectTy = DirectTy->getPointerTo(0);
+
+  Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, DirectTy,
+                                        DirectSize, DirectAlign,
+                                        SlotSizeAndAlign,
+                                        AllowHigherAlign);
+
+  if (IsIndirect) {
+    Addr = Address(CGF.Builder.CreateLoad(Addr), ValueInfo.second);
+  }
+
+  return Addr;
+  
+}
+
+static Address emitMergePHI(CodeGenFunction &CGF,
+                            Address Addr1, llvm::BasicBlock *Block1,
+                            Address Addr2, llvm::BasicBlock *Block2,
+                            const llvm::Twine &Name = "") {
+  assert(Addr1.getType() == Addr2.getType());
+  llvm::PHINode *PHI = CGF.Builder.CreatePHI(Addr1.getType(), 2, Name);
+  PHI->addIncoming(Addr1.getPointer(), Block1);
+  PHI->addIncoming(Addr2.getPointer(), Block2);
+  CharUnits Align = std::min(Addr1.getAlignment(), Addr2.getAlignment());
+  return Address(PHI, Align);
+}
+
 TargetCodeGenInfo::~TargetCodeGenInfo() { delete Info; }
 
 // If someone can figure out a general rule for this, that would be great.
@@ -394,8 +541,8 @@ public:
       I.info = classifyArgumentType(I.type);
   }
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class DefaultTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -404,9 +551,9 @@ public:
     : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
 };
 
-llvm::Value *DefaultABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  return nullptr;
+Address DefaultABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                  QualType Ty) const {
+  return Address::invalid();
 }
 
 ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
@@ -416,9 +563,9 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const {
     // Records with non-trivial destructors/copy-constructors should not be
     // passed by value.
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-      return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(Ty);
   }
 
   // Treat an enum type as its underlying type.
@@ -434,7 +581,7 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getIgnore();
 
   if (isAggregateTypeForABI(RetTy))
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
@@ -444,6 +591,80 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const {
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
 
+//===----------------------------------------------------------------------===//
+// WebAssembly ABI Implementation
+//
+// This is a very simple ABI that relies a lot on DefaultABIInfo.
+//===----------------------------------------------------------------------===//
+
+class WebAssemblyABIInfo final : public DefaultABIInfo {
+public:
+  explicit WebAssemblyABIInfo(CodeGen::CodeGenTypes &CGT)
+      : DefaultABIInfo(CGT) {}
+
+private:
+  ABIArgInfo classifyReturnType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType Ty) const;
+
+  // DefaultABIInfo's classifyReturnType and classifyArgumentType are
+  // non-virtual, but computeInfo is virtual, so we overload that.
+  void computeInfo(CGFunctionInfo &FI) const override {
+    if (!getCXXABI().classifyReturnType(FI))
+      FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
+    for (auto &Arg : FI.arguments())
+      Arg.info = classifyArgumentType(Arg.type);
+  }
+};
+
+class WebAssemblyTargetCodeGenInfo final : public TargetCodeGenInfo {
+public:
+  explicit WebAssemblyTargetCodeGenInfo(CodeGen::CodeGenTypes &CGT)
+      : TargetCodeGenInfo(new WebAssemblyABIInfo(CGT)) {}
+};
+
+/// \brief Classify argument of given type \p Ty.
+ABIArgInfo WebAssemblyABIInfo::classifyArgumentType(QualType Ty) const {
+  Ty = useFirstFieldIfTransparentUnion(Ty);
+
+  if (isAggregateTypeForABI(Ty)) {
+    // Records with non-trivial destructors/copy-constructors should not be
+    // passed by value.
+    if (auto RAA = getRecordArgABI(Ty, getCXXABI()))
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+    // Ignore empty structs/unions.
+    if (isEmptyRecord(getContext(), Ty, true))
+      return ABIArgInfo::getIgnore();
+    // Lower single-element structs to just pass a regular value. TODO: We
+    // could do reasonable-size multiple-element structs too, using getExpand(),
+    // though watch out for things like bitfields.
+    if (const Type *SeltTy = isSingleElementStruct(Ty, getContext()))
+      return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+  }
+
+  // Otherwise just do the default thing.
+  return DefaultABIInfo::classifyArgumentType(Ty);
+}
+
+ABIArgInfo WebAssemblyABIInfo::classifyReturnType(QualType RetTy) const {
+  if (isAggregateTypeForABI(RetTy)) {
+    // Records with non-trivial destructors/copy-constructors should not be
+    // returned by value.
+    if (!getRecordArgABI(RetTy, getCXXABI())) {
+      // Ignore empty structs/unions.
+      if (isEmptyRecord(getContext(), RetTy, true))
+        return ABIArgInfo::getIgnore();
+      // Lower single-element structs to just return a regular value. TODO: We
+      // could do reasonable-size multiple-element structs too, using
+      // ABIArgInfo::getDirect().
+      if (const Type *SeltTy = isSingleElementStruct(RetTy, getContext()))
+        return ABIArgInfo::getDirect(CGT.ConvertType(QualType(SeltTy, 0)));
+    }
+  }
+
+  // Otherwise just do the default thing.
+  return DefaultABIInfo::classifyReturnType(RetTy);
+}
+
 //===----------------------------------------------------------------------===//
 // le32/PNaCl bitcode ABI Implementation
 //
@@ -459,8 +680,8 @@ class PNaClABIInfo : public ABIInfo {
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
 
   void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF,
+                    Address VAListAddr, QualType Ty) const override;
 };
 
 class PNaClTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -477,17 +698,17 @@ void PNaClABIInfo::computeInfo(CGFunctionInfo &FI) const {
     I.info = classifyArgumentType(I.type);
 }
 
-llvm::Value *PNaClABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  return nullptr;
+Address PNaClABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                QualType Ty) const {
+  return Address::invalid();
 }
 
 /// \brief Classify argument of given type \p Ty.
 ABIArgInfo PNaClABIInfo::classifyArgumentType(QualType Ty) const {
   if (isAggregateTypeForABI(Ty)) {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-      return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
-    return ABIArgInfo::getIndirect(0);
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty);
   } else if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
     // Treat an enum type as its underlying type.
     Ty = EnumTy->getDecl()->getIntegerType();
@@ -506,7 +727,7 @@ ABIArgInfo PNaClABIInfo::classifyReturnType(QualType RetTy) const {
 
   // In the PNaCl ABI we always return records/structures on the stack.
   if (isAggregateTypeForABI(RetTy))
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
@@ -585,8 +806,10 @@ class X86_32ABIInfo : public ABIInfo {
   static const unsigned MinABIStackAlignInBytes = 4;
 
   bool IsDarwinVectorABI;
-  bool IsSmallStructInRegABI;
+  bool IsRetSmallStructInRegABI;
   bool IsWin32StructABI;
+  bool IsSoftFloatABI;
+  bool IsMCUABI;
   unsigned DefaultNumRegisterParameters;
 
   static bool isRegisterSize(unsigned Size) {
@@ -610,7 +833,7 @@ class X86_32ABIInfo : public ABIInfo {
   /// such that the argument will be passed in memory.
   ABIArgInfo getIndirectResult(QualType Ty, bool ByVal, CCState &State) const;
 
-  ABIArgInfo getIndirectReturnResult(CCState &State) const;
+  ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const;
 
   /// \brief Return the alignment to use for the given type on the stack.
   unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const;
@@ -618,33 +841,47 @@ class X86_32ABIInfo : public ABIInfo {
   Class classify(QualType Ty) const;
   ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const;
-  bool shouldUseInReg(QualType Ty, CCState &State, bool &NeedsPadding) const;
+  /// \brief Updates the number of available free registers, returns 
+  /// true if any registers were allocated.
+  bool updateFreeRegs(QualType Ty, CCState &State) const;
+
+  bool shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg,
+                                bool &NeedsPadding) const;
+  bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const;
 
   /// \brief Rewrite the function info so that all memory arguments use
   /// inalloca.
   void rewriteWithInAlloca(CGFunctionInfo &FI) const;
 
   void addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
-                           unsigned &StackOffset, ABIArgInfo &Info,
+                           CharUnits &StackOffset, ABIArgInfo &Info,
                            QualType Type) const;
 
 public:
 
   void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 
-  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool d, bool p, bool w,
-                unsigned r)
-    : ABIInfo(CGT), IsDarwinVectorABI(d), IsSmallStructInRegABI(p),
-      IsWin32StructABI(w), DefaultNumRegisterParameters(r) {}
+  X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
+                bool RetSmallStructInRegABI, bool Win32StructABI,
+                unsigned NumRegisterParameters, bool SoftFloatABI)
+    : ABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI),
+      IsRetSmallStructInRegABI(RetSmallStructInRegABI), 
+      IsWin32StructABI(Win32StructABI),
+      IsSoftFloatABI(SoftFloatABI),
+      IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()),
+      DefaultNumRegisterParameters(NumRegisterParameters) {}
 };
 
 class X86_32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
-      bool d, bool p, bool w, unsigned r)
-    :TargetCodeGenInfo(new X86_32ABIInfo(CGT, d, p, w, r)) {}
+  X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI,
+                          bool RetSmallStructInRegABI, bool Win32StructABI,
+                          unsigned NumRegisterParameters, bool SoftFloatABI)
+      : TargetCodeGenInfo(new X86_32ABIInfo(
+            CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI,
+            NumRegisterParameters, SoftFloatABI)) {}
 
   static bool isStructReturnInRegABI(
       const llvm::Triple &Triple, const CodeGenOptions &Opts);
@@ -767,14 +1004,15 @@ void X86_32TargetCodeGenInfo::addReturnRegisterOutputs(
 }
 
 /// shouldReturnTypeInRegister - Determine if the given type should be
-/// passed in a register (for the Darwin ABI).
+/// returned in a register (for the Darwin and MCU ABI).
 bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
                                                ASTContext &Context) const {
   uint64_t Size = Context.getTypeSize(Ty);
 
-  // Type must be register sized.
-  if (!isRegisterSize(Size))
-    return false;
+  // For i386, type must be register sized.
+  // For the MCU ABI, it only needs to be <= 8-byte
+  if ((IsMCUABI && Size > 64) || (!IsMCUABI && !isRegisterSize(Size)))
+   return false;
 
   if (Ty->isVectorType()) {
     // 64- and 128- bit vectors inside structures are not returned in
@@ -816,14 +1054,15 @@ bool X86_32ABIInfo::shouldReturnTypeInRegister(QualType Ty,
   return true;
 }
 
-ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(CCState &State) const {
+ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const {
   // If the return value is indirect, then the hidden argument is consuming one
   // integer register.
   if (State.FreeRegs) {
     --State.FreeRegs;
-    return ABIArgInfo::getIndirectInReg(/*Align=*/0, /*ByVal=*/false);
+    if (!IsMCUABI)
+      return getNaturalAlignIndirectInReg(RetTy);
   }
-  return ABIArgInfo::getIndirect(/*Align=*/0, /*ByVal=*/false);
+  return getNaturalAlignIndirect(RetTy, /*ByVal=*/false);
 }
 
 ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
@@ -858,7 +1097,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
         return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
                                                             Size));
 
-      return getIndirectReturnResult(State);
+      return getIndirectReturnResult(RetTy, State);
     }
 
     return ABIArgInfo::getDirect();
@@ -868,12 +1107,12 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
     if (const RecordType *RT = RetTy->getAs<RecordType>()) {
       // Structures with flexible arrays are always indirect.
       if (RT->getDecl()->hasFlexibleArrayMember())
-        return getIndirectReturnResult(State);
+        return getIndirectReturnResult(RetTy, State);
     }
 
     // If specified, structs and unions are always indirect.
-    if (!IsSmallStructInRegABI && !RetTy->isAnyComplexType())
-      return getIndirectReturnResult(State);
+    if (!IsRetSmallStructInRegABI && !RetTy->isAnyComplexType())
+      return getIndirectReturnResult(RetTy, State);
 
     // Small structures which are register sized are generally returned
     // in a register.
@@ -895,7 +1134,7 @@ ABIArgInfo X86_32ABIInfo::classifyReturnType(QualType RetTy,
       return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),Size));
     }
 
-    return getIndirectReturnResult(State);
+    return getIndirectReturnResult(RetTy, State);
   }
 
   // Treat an enum type as its underlying type.
@@ -961,21 +1200,23 @@ ABIArgInfo X86_32ABIInfo::getIndirectResult(QualType Ty, bool ByVal,
   if (!ByVal) {
     if (State.FreeRegs) {
       --State.FreeRegs; // Non-byval indirects just use one pointer.
-      return ABIArgInfo::getIndirectInReg(0, false);
+      if (!IsMCUABI)
+        return getNaturalAlignIndirectInReg(Ty);
     }
-    return ABIArgInfo::getIndirect(0, false);
+    return getNaturalAlignIndirect(Ty, false);
   }
 
   // Compute the byval alignment.
   unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8;
   unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign);
   if (StackAlign == 0)
-    return ABIArgInfo::getIndirect(4, /*ByVal=*/true);
+    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true);
 
   // If the stack alignment is less than the type alignment, realign the
   // argument.
   bool Realign = TypeAlign > StackAlign;
-  return ABIArgInfo::getIndirect(StackAlign, /*ByVal=*/true, Realign);
+  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(StackAlign),
+                                 /*ByVal=*/true, Realign);
 }
 
 X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
@@ -991,12 +1232,12 @@ X86_32ABIInfo::Class X86_32ABIInfo::classify(QualType Ty) const {
   return Integer;
 }
 
-bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
-                                   bool &NeedsPadding) const {
-  NeedsPadding = false;
-  Class C = classify(Ty);
-  if (C == Float)
-    return false;
+bool X86_32ABIInfo::updateFreeRegs(QualType Ty, CCState &State) const {
+  if (!IsSoftFloatABI) {
+    Class C = classify(Ty);
+    if (C == Float)
+      return false;
+  }
 
   unsigned Size = getContext().getTypeSize(Ty);
   unsigned SizeInRegs = (Size + 31) / 32;
@@ -1004,28 +1245,39 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
   if (SizeInRegs == 0)
     return false;
 
-  if (SizeInRegs > State.FreeRegs) {
-    State.FreeRegs = 0;
-    return false;
+  if (!IsMCUABI) {
+    if (SizeInRegs > State.FreeRegs) {
+      State.FreeRegs = 0;
+      return false;
+    }
+  } else {
+    // The MCU psABI allows passing parameters in-reg even if there are
+    // earlier parameters that are passed on the stack. Also,
+    // it does not allow passing >8-byte structs in-register,
+    // even if there are 3 free registers available.
+    if (SizeInRegs > State.FreeRegs || SizeInRegs > 2)
+      return false;
   }
 
   State.FreeRegs -= SizeInRegs;
+  return true;
+}
+
+bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, 
+                                             bool &InReg,
+                                             bool &NeedsPadding) const {
+  NeedsPadding = false;
+  InReg = !IsMCUABI;
+
+  if (!updateFreeRegs(Ty, State))
+    return false;
+
+  if (IsMCUABI)
+    return true;
 
   if (State.CC == llvm::CallingConv::X86_FastCall ||
       State.CC == llvm::CallingConv::X86_VectorCall) {
-    if (Size > 32)
-      return false;
-
-    if (Ty->isIntegralOrEnumerationType())
-      return true;
-
-    if (Ty->isPointerType())
-      return true;
-
-    if (Ty->isReferenceType())
-      return true;
-
-    if (State.FreeRegs)
+    if (getContext().getTypeSize(Ty) <= 32 && State.FreeRegs)
       NeedsPadding = true;
 
     return false;
@@ -1034,6 +1286,25 @@ bool X86_32ABIInfo::shouldUseInReg(QualType Ty, CCState &State,
   return true;
 }
 
+bool X86_32ABIInfo::shouldPrimitiveUseInReg(QualType Ty, CCState &State) const {
+  if (!updateFreeRegs(Ty, State))
+    return false;
+
+  if (IsMCUABI)
+    return false;
+
+  if (State.CC == llvm::CallingConv::X86_FastCall ||
+      State.CC == llvm::CallingConv::X86_VectorCall) {
+    if (getContext().getTypeSize(Ty) > 32)
+      return false;
+
+    return (Ty->isIntegralOrEnumerationType() || Ty->isPointerType() || 
+        Ty->isReferenceType());
+  }
+
+  return true;
+}
+
 ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
                                                CCState &State) const {
   // FIXME: Set alignment on indirect arguments.
@@ -1084,12 +1355,15 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
 
     llvm::LLVMContext &LLVMContext = getVMContext();
     llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext);
-    bool NeedsPadding;
-    if (shouldUseInReg(Ty, State, NeedsPadding)) {
+    bool NeedsPadding, InReg;
+    if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
       unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
       SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
       llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
-      return ABIArgInfo::getDirectInReg(Result);
+      if (InReg)
+        return ABIArgInfo::getDirectInReg(Result);
+      else
+        return ABIArgInfo::getDirect(Result);
     }
     llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
 
@@ -1097,8 +1371,11 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
     // of those arguments will match the struct. This is important because the
     // LLVM backend isn't smart enough to remove byval, which inhibits many
     // optimizations.
+    // Don't do this for the MCU if there are still free integer registers
+    // (see X86_64 ABI for full explanation).
     if (getContext().getTypeSize(Ty) <= 4*32 &&
-        canExpandIndirectArgument(Ty, getContext()))
+        canExpandIndirectArgument(Ty, getContext()) &&
+        (!IsMCUABI || State.FreeRegs == 0))
       return ABIArgInfo::getExpandWithPadding(
           State.CC == llvm::CallingConv::X86_FastCall ||
               State.CC == llvm::CallingConv::X86_VectorCall,
@@ -1128,14 +1405,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
     Ty = EnumTy->getDecl()->getIntegerType();
 
-  bool NeedsPadding;
-  bool InReg = shouldUseInReg(Ty, State, NeedsPadding);
+  bool InReg = shouldPrimitiveUseInReg(Ty, State);
 
   if (Ty->isPromotableIntegerType()) {
     if (InReg)
       return ABIArgInfo::getExtendInReg();
     return ABIArgInfo::getExtend();
   }
+
   if (InReg)
     return ABIArgInfo::getDirectInReg();
   return ABIArgInfo::getDirect();
@@ -1143,7 +1420,9 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
 
 void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   CCState State(FI.getCallingConvention());
-  if (State.CC == llvm::CallingConv::X86_FastCall)
+  if (IsMCUABI)
+    State.FreeRegs = 3;
+  else if (State.CC == llvm::CallingConv::X86_FastCall)
     State.FreeRegs = 2;
   else if (State.CC == llvm::CallingConv::X86_VectorCall) {
     State.FreeRegs = 2;
@@ -1160,7 +1439,8 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
     // return value was sret and put it in a register ourselves if appropriate.
     if (State.FreeRegs) {
       --State.FreeRegs;  // The sret parameter consumes a register.
-      FI.getReturnInfo().setInReg(true);
+      if (!IsMCUABI)
+        FI.getReturnInfo().setInReg(true);
     }
   }
 
@@ -1182,22 +1462,23 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
 
 void
 X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
-                                   unsigned &StackOffset,
-                                   ABIArgInfo &Info, QualType Type) const {
-  assert(StackOffset % 4U == 0 && "unaligned inalloca struct");
+                                   CharUnits &StackOffset, ABIArgInfo &Info,
+                                   QualType Type) const {
+  // Arguments are always 4-byte-aligned.
+  CharUnits FieldAlign = CharUnits::fromQuantity(4);
+
+  assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
   Info = ABIArgInfo::getInAlloca(FrameFields.size());
   FrameFields.push_back(CGT.ConvertTypeForMem(Type));
-  StackOffset += getContext().getTypeSizeInChars(Type).getQuantity();
+  StackOffset += getContext().getTypeSizeInChars(Type);
 
-  // Insert padding bytes to respect alignment.  For x86_32, each argument is 4
-  // byte aligned.
-  if (StackOffset % 4U) {
-    unsigned OldOffset = StackOffset;
-    StackOffset = llvm::RoundUpToAlignment(StackOffset, 4U);
-    unsigned NumBytes = StackOffset - OldOffset;
-    assert(NumBytes);
+  // Insert padding bytes to respect alignment.
+  CharUnits FieldEnd = StackOffset;
+  StackOffset = FieldEnd.RoundUpToAlignment(FieldAlign);
+  if (StackOffset != FieldEnd) {
+    CharUnits NumBytes = StackOffset - FieldEnd;
     llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
-    Ty = llvm::ArrayType::get(Ty, NumBytes);
+    Ty = llvm::ArrayType::get(Ty, NumBytes.getQuantity());
     FrameFields.push_back(Ty);
   }
 }
@@ -1228,7 +1509,10 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
   // Build a packed struct type for all of the arguments in memory.
   SmallVector<llvm::Type *, 6> FrameFields;
 
-  unsigned StackOffset = 0;
+  // The stack alignment is always 4.
+  CharUnits StackAlign = CharUnits::fromQuantity(4);
+
+  CharUnits StackOffset;
   CGFunctionInfo::arg_iterator I = FI.arg_begin(), E = FI.arg_end();
 
   // Put 'this' into the struct before 'sret', if necessary.
@@ -1260,47 +1544,25 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
   }
 
   FI.setArgStruct(llvm::StructType::get(getVMContext(), FrameFields,
-                                        /*isPacked=*/true));
+                                        /*isPacked=*/true),
+                  StackAlign);
 }
 
-llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                      CodeGenFunction &CGF) const {
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
+Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF,
+                                 Address VAListAddr, QualType Ty) const {
 
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
-                                                       "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
 
-  // Compute if the address needs to be aligned
-  unsigned Align = CGF.getContext().getTypeAlignInChars(Ty).getQuantity();
-  Align = getTypeStackAlignInBytes(Ty, Align);
-  Align = std::max(Align, 4U);
-  if (Align > 4) {
-    // addr = (addr + align - 1) & -align;
-    llvm::Value *Offset =
-      llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
-    Addr = CGF.Builder.CreateGEP(Addr, Offset);
-    llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(Addr,
-                                                    CGF.Int32Ty);
-    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -Align);
-    Addr = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask),
-                                      Addr->getType(),
-                                      "ap.cur.aligned");
-  }
+  // x86-32 changes the alignment of certain arguments on the stack.
+  //
+  // Just messing with TypeInfo like this works because we never pass
+  // anything indirectly.
+  TypeInfo.second = CharUnits::fromQuantity(
+                getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity()));
 
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
-
-  uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, Align);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  return AddrTyped;
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
+                          TypeInfo, CharUnits::fromQuantity(4),
+                          /*AllowHigherAlign*/ true);
 }
 
 bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
@@ -1316,7 +1578,7 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
     return true;
   }
 
-  if (Triple.isOSDarwin())
+  if (Triple.isOSDarwin() || Triple.isOSIAMCU())
     return true;
 
   switch (Triple.getOS()) {
@@ -1334,7 +1596,7 @@ bool X86_32TargetCodeGenInfo::isStructReturnInRegABI(
 void X86_32TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                             CodeGen::CodeGenModule &CGM) const {
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (FD->hasAttr<X86ForceAlignArgPointerAttr>()) {
       // Get the LLVM function.
       llvm::Function *Fn = cast<llvm::Function>(GV);
@@ -1372,8 +1634,9 @@ bool X86_32TargetCodeGenInfo::initDwarfEHRegSizeTable(
   } else {
     // 9 is %eflags, which doesn't get a size on Darwin for some
     // reason.
-    Builder.CreateStore(
-        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9));
+    Builder.CreateAlignedStore(
+        Four8, Builder.CreateConstInBoundsGEP1_32(CGF.Int8Ty, Address, 9),
+                               CharUnits::One());
 
     // 11-16 are st(0..5).  Not sure why we stop at 5.
     // These have size 12, which is sizeof(long double) on
@@ -1542,8 +1805,10 @@ public:
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
+  Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                      QualType Ty) const override;
 
   bool has64BitPointers() const {
     return Has64BitPointers;
@@ -1559,8 +1824,8 @@ public:
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 
   bool isHomogeneousAggregateBaseType(QualType Ty) const override {
     // FIXME: Assumes vectorcall is in use.
@@ -1659,7 +1924,11 @@ public:
   void getDependentLibraryOption(llvm::StringRef Lib,
                                  llvm::SmallString<24> &Opt) const override {
     Opt = "\01";
-    Opt += Lib;
+    // If the argument contains a space, enclose it in quotes.
+    if (Lib.find(" ") != StringRef::npos)
+      Opt += "\"" + Lib.str() + "\"";
+    else
+      Opt += Lib;
   }
 };
 
@@ -1679,8 +1948,10 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) {
 class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo {
 public:
   WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT,
-        bool d, bool p, bool w, unsigned RegParms)
-    : X86_32TargetCodeGenInfo(CGT, d, p, w, RegParms) {}
+        bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI,
+        unsigned NumRegisterParameters)
+    : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI,
+        Win32StructABI, NumRegisterParameters, false) {}
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override;
@@ -1701,7 +1972,7 @@ public:
 static void addStackProbeSizeTargetAttribute(const Decl *D,
                                              llvm::GlobalValue *GV,
                                              CodeGen::CodeGenModule &CGM) {
-  if (isa<FunctionDecl>(D)) {
+  if (D && isa<FunctionDecl>(D)) {
     if (CGM.getCodeGenOpts().StackProbeSize != 4096) {
       llvm::Function *Fn = cast<llvm::Function>(GV);
 
@@ -1918,16 +2189,18 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
 
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
     uint64_t Size = getContext().getTypeSize(VT);
-    if (Size == 32) {
-      // gcc passes all <4 x char>, <2 x short>, <1 x int>, <1 x
-      // float> as integer.
+    if (Size == 1 || Size == 8 || Size == 16 || Size == 32) {
+      // gcc passes the following as integer:
+      // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float>
+      // 2 bytes - <2 x char>, <1 x short>
+      // 1 byte  - <1 x char>
       Current = Integer;
 
       // If this type crosses an eightbyte boundary, it should be
       // split.
-      uint64_t EB_Real = (OffsetBase) / 64;
-      uint64_t EB_Imag = (OffsetBase + Size - 1) / 64;
-      if (EB_Real != EB_Imag)
+      uint64_t EB_Lo = (OffsetBase) / 64;
+      uint64_t EB_Hi = (OffsetBase + Size - 1) / 64;
+      if (EB_Lo != EB_Hi)
         Hi = Lo;
     } else if (Size == 64) {
       // gcc passes <1 x double> in memory. :(
@@ -2178,7 +2451,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectReturnResult(QualType Ty) const {
             ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
   }
 
-  return ABIArgInfo::getIndirect(0);
+  return getNaturalAlignIndirect(Ty);
 }
 
 bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
@@ -2212,7 +2485,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // Compute the byval alignment. We specify the alignment of the byval in all
   // cases so that the mid-level optimizer knows the alignment of the byval.
@@ -2249,7 +2522,7 @@ ABIArgInfo X86_64ABIInfo::getIndirectResult(QualType Ty,
                                                           Size));
   }
 
-  return ABIArgInfo::getIndirect(Align);
+  return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align));
 }
 
 /// The ABI specifies that a value should be passed in a full vector XMM/YMM
@@ -2833,11 +3106,10 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
   }
 }
 
-static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
-                                        QualType Ty,
-                                        CodeGenFunction &CGF) {
-  llvm::Value *overflow_arg_area_p = CGF.Builder.CreateStructGEP(
-      nullptr, VAListAddr, 2, "overflow_arg_area_p");
+static Address EmitX86_64VAArgFromMemory(CodeGenFunction &CGF,
+                                         Address VAListAddr, QualType Ty) {
+  Address overflow_arg_area_p = CGF.Builder.CreateStructGEP(
+      VAListAddr, 2, CharUnits::fromQuantity(8), "overflow_arg_area_p");
   llvm::Value *overflow_arg_area =
     CGF.Builder.CreateLoad(overflow_arg_area_p, "overflow_arg_area");
 
@@ -2845,19 +3117,10 @@ static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
   // byte boundary if alignment needed by type exceeds 8 byte boundary.
   // It isn't stated explicitly in the standard, but in practice we use
   // alignment greater than 16 where necessary.
-  uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
-  if (Align > 8) {
-    // overflow_arg_area = (overflow_arg_area + align - 1) & -align;
-    llvm::Value *Offset =
-      llvm::ConstantInt::get(CGF.Int64Ty, Align - 1);
-    overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset);
-    llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(overflow_arg_area,
-                                                    CGF.Int64Ty);
-    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, -(uint64_t)Align);
-    overflow_arg_area =
-      CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask),
-                                 overflow_arg_area->getType(),
-                                 "overflow_arg_area.align");
+  CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
+  if (Align > CharUnits::fromQuantity(8)) {
+    overflow_arg_area = emitRoundPointerUpToAlignment(CGF, overflow_arg_area,
+                                                      Align);
   }
 
   // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area.
@@ -2879,11 +3142,11 @@ static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr,
   CGF.Builder.CreateStore(overflow_arg_area, overflow_arg_area_p);
 
   // AMD64-ABI 3.5.7p5: Step 11. Return the fetched type.
-  return Res;
+  return Address(Res, Align);
 }
 
-llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                      CodeGenFunction &CGF) const {
+Address X86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                 QualType Ty) const {
   // Assume that va_list type is correct; should be pointer to LLVM type:
   // struct {
   //   i32 gp_offset;
@@ -2893,14 +3156,14 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // };
   unsigned neededInt, neededSSE;
 
-  Ty = CGF.getContext().getCanonicalType(Ty);
+  Ty = getContext().getCanonicalType(Ty);
   ABIArgInfo AI = classifyArgumentType(Ty, 0, neededInt, neededSSE,
                                        /*isNamedArg*/false);
 
   // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed
   // in the registers. If not go to step 7.
   if (!neededInt && !neededSSE)
-    return EmitVAArgFromMemory(VAListAddr, Ty, CGF);
+    return EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
 
   // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of
   // general purpose registers needed to pass type and num_fp to hold
@@ -2914,11 +3177,12 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // register save space).
 
   llvm::Value *InRegs = nullptr;
-  llvm::Value *gp_offset_p = nullptr, *gp_offset = nullptr;
-  llvm::Value *fp_offset_p = nullptr, *fp_offset = nullptr;
+  Address gp_offset_p = Address::invalid(), fp_offset_p = Address::invalid();
+  llvm::Value *gp_offset = nullptr, *fp_offset = nullptr;
   if (neededInt) {
     gp_offset_p =
-        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0, "gp_offset_p");
+        CGF.Builder.CreateStructGEP(VAListAddr, 0, CharUnits::Zero(),
+                                    "gp_offset_p");
     gp_offset = CGF.Builder.CreateLoad(gp_offset_p, "gp_offset");
     InRegs = llvm::ConstantInt::get(CGF.Int32Ty, 48 - neededInt * 8);
     InRegs = CGF.Builder.CreateICmpULE(gp_offset, InRegs, "fits_in_gp");
@@ -2926,7 +3190,8 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
 
   if (neededSSE) {
     fp_offset_p =
-        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 1, "fp_offset_p");
+        CGF.Builder.CreateStructGEP(VAListAddr, 1, CharUnits::fromQuantity(4),
+                                    "fp_offset_p");
     fp_offset = CGF.Builder.CreateLoad(fp_offset_p, "fp_offset");
     llvm::Value *FitsInFP =
       llvm::ConstantInt::get(CGF.Int32Ty, 176 - neededSSE * 16);
@@ -2954,14 +3219,17 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // simple assembling of a structure from scattered addresses has many more
   // loads than necessary. Can we clean this up?
   llvm::Type *LTy = CGF.ConvertTypeForMem(Ty);
-  llvm::Value *RegAddr = CGF.Builder.CreateLoad(
-      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3), "reg_save_area");
+  llvm::Value *RegSaveArea = CGF.Builder.CreateLoad(
+      CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(16)),
+                                  "reg_save_area");
+
+  Address RegAddr = Address::invalid();
   if (neededInt && neededSSE) {
     // FIXME: Cleanup.
     assert(AI.isDirect() && "Unexpected ABI info for mixed regs");
     llvm::StructType *ST = cast<llvm::StructType>(AI.getCoerceToType());
-    llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
-    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
+    Address Tmp = CGF.CreateMemTemp(Ty);
+    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
     assert(ST->getNumElements() == 2 && "Unexpected ABI info for mixed regs");
     llvm::Type *TyLo = ST->getElementType(0);
     llvm::Type *TyHi = ST->getElementType(1);
@@ -2969,57 +3237,77 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
            "Unexpected ABI info for mixed regs");
     llvm::Type *PTyLo = llvm::PointerType::getUnqual(TyLo);
     llvm::Type *PTyHi = llvm::PointerType::getUnqual(TyHi);
-    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
-    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
+    llvm::Value *GPAddr = CGF.Builder.CreateGEP(RegSaveArea, gp_offset);
+    llvm::Value *FPAddr = CGF.Builder.CreateGEP(RegSaveArea, fp_offset);
     llvm::Value *RegLoAddr = TyLo->isFPOrFPVectorTy() ? FPAddr : GPAddr;
     llvm::Value *RegHiAddr = TyLo->isFPOrFPVectorTy() ? GPAddr : FPAddr;
-    llvm::Value *V =
-      CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
-    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegHiAddr, PTyHi));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
 
-    RegAddr = CGF.Builder.CreateBitCast(Tmp,
-                                        llvm::PointerType::getUnqual(LTy));
+    // Copy the first element.
+    llvm::Value *V =
+      CGF.Builder.CreateDefaultAlignedLoad(
+                               CGF.Builder.CreateBitCast(RegLoAddr, PTyLo));
+    CGF.Builder.CreateStore(V,
+                    CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
+
+    // Copy the second element.
+    V = CGF.Builder.CreateDefaultAlignedLoad(
+                               CGF.Builder.CreateBitCast(RegHiAddr, PTyHi));
+    CharUnits Offset = CharUnits::fromQuantity(
+                   getDataLayout().getStructLayout(ST)->getElementOffset(1));
+    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(Tmp, 1, Offset));
+
+    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
   } else if (neededInt) {
-    RegAddr = CGF.Builder.CreateGEP(RegAddr, gp_offset);
-    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
-                                        llvm::PointerType::getUnqual(LTy));
+    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, gp_offset),
+                      CharUnits::fromQuantity(8));
+    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
 
     // Copy to a temporary if necessary to ensure the appropriate alignment.
     std::pair<CharUnits, CharUnits> SizeAlign =
-        CGF.getContext().getTypeInfoInChars(Ty);
+        getContext().getTypeInfoInChars(Ty);
     uint64_t TySize = SizeAlign.first.getQuantity();
-    unsigned TyAlign = SizeAlign.second.getQuantity();
-    if (TyAlign > 8) {
-      llvm::Value *Tmp = CGF.CreateMemTemp(Ty);
-      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, 8, false);
+    CharUnits TyAlign = SizeAlign.second;
+
+    // Copy into a temporary if the type is more aligned than the
+    // register save area.
+    if (TyAlign.getQuantity() > 8) {
+      Address Tmp = CGF.CreateMemTemp(Ty);
+      CGF.Builder.CreateMemCpy(Tmp, RegAddr, TySize, false);
       RegAddr = Tmp;
     }
+    
   } else if (neededSSE == 1) {
-    RegAddr = CGF.Builder.CreateGEP(RegAddr, fp_offset);
-    RegAddr = CGF.Builder.CreateBitCast(RegAddr,
-                                        llvm::PointerType::getUnqual(LTy));
+    RegAddr = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
+                      CharUnits::fromQuantity(16));
+    RegAddr = CGF.Builder.CreateElementBitCast(RegAddr, LTy);
   } else {
     assert(neededSSE == 2 && "Invalid number of needed registers!");
     // SSE registers are spaced 16 bytes apart in the register save
     // area, we need to collect the two eightbytes together.
-    llvm::Value *RegAddrLo = CGF.Builder.CreateGEP(RegAddr, fp_offset);
-    llvm::Value *RegAddrHi = CGF.Builder.CreateConstGEP1_32(RegAddrLo, 16);
+    // The ABI isn't explicit about this, but it seems reasonable
+    // to assume that the slots are 16-byte aligned, since the stack is
+    // naturally 16-byte aligned and the prologue is expected to store
+    // all the SSE registers to the RSA.
+    Address RegAddrLo = Address(CGF.Builder.CreateGEP(RegSaveArea, fp_offset),
+                                CharUnits::fromQuantity(16));
+    Address RegAddrHi =
+      CGF.Builder.CreateConstInBoundsByteGEP(RegAddrLo,
+                                             CharUnits::fromQuantity(16));
     llvm::Type *DoubleTy = CGF.DoubleTy;
-    llvm::Type *DblPtrTy =
-      llvm::PointerType::getUnqual(DoubleTy);
     llvm::StructType *ST = llvm::StructType::get(DoubleTy, DoubleTy, nullptr);
-    llvm::Value *V, *Tmp = CGF.CreateMemTemp(Ty);
-    Tmp = CGF.Builder.CreateBitCast(Tmp, ST->getPointerTo());
-    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrLo,
-                                                         DblPtrTy));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 0));
-    V = CGF.Builder.CreateLoad(CGF.Builder.CreateBitCast(RegAddrHi,
-                                                         DblPtrTy));
-    CGF.Builder.CreateStore(V, CGF.Builder.CreateStructGEP(ST, Tmp, 1));
-    RegAddr = CGF.Builder.CreateBitCast(Tmp,
-                                        llvm::PointerType::getUnqual(LTy));
+    llvm::Value *V;
+    Address Tmp = CGF.CreateMemTemp(Ty);
+    Tmp = CGF.Builder.CreateElementBitCast(Tmp, ST);
+    V = CGF.Builder.CreateLoad(
+                   CGF.Builder.CreateElementBitCast(RegAddrLo, DoubleTy));
+    CGF.Builder.CreateStore(V,
+                   CGF.Builder.CreateStructGEP(Tmp, 0, CharUnits::Zero()));
+    V = CGF.Builder.CreateLoad(
+                   CGF.Builder.CreateElementBitCast(RegAddrHi, DoubleTy));
+    CGF.Builder.CreateStore(V,
+          CGF.Builder.CreateStructGEP(Tmp, 1, CharUnits::fromQuantity(8)));
+
+    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, LTy);
   }
 
   // AMD64-ABI 3.5.7p5: Step 5. Set:
@@ -3040,18 +3328,24 @@ llvm::Value *X86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // Emit code to load the value if it was passed in memory.
 
   CGF.EmitBlock(InMemBlock);
-  llvm::Value *MemAddr = EmitVAArgFromMemory(VAListAddr, Ty, CGF);
+  Address MemAddr = EmitX86_64VAArgFromMemory(CGF, VAListAddr, Ty);
 
   // Return the appropriate result.
 
   CGF.EmitBlock(ContBlock);
-  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(RegAddr->getType(), 2,
-                                                 "vaarg.addr");
-  ResAddr->addIncoming(RegAddr, InRegBlock);
-  ResAddr->addIncoming(MemAddr, InMemBlock);
+  Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock, MemAddr, InMemBlock,
+                                 "vaarg.addr");
   return ResAddr;
 }
 
+Address X86_64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                   QualType Ty) const {
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+                          CGF.getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(8),
+                          /*allowHigherAlign*/ false);
+}
+
 ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
                                       bool IsReturnType) const {
 
@@ -3063,17 +3357,18 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
 
   TypeInfo Info = getContext().getTypeInfo(Ty);
   uint64_t Width = Info.Width;
-  unsigned Align = getContext().toCharUnitsFromBits(Info.Align).getQuantity();
+  CharUnits Align = getContext().toCharUnitsFromBits(Info.Align);
 
   const RecordType *RT = Ty->getAs<RecordType>();
   if (RT) {
     if (!IsReturnType) {
       if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(RT, getCXXABI()))
-        return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+        return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
     if (RT->getDecl()->hasFlexibleArrayMember())
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+
   }
 
   // vectorcall adds the concept of a homogenous vector aggregate, similar to
@@ -3103,7 +3398,7 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs,
     // MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
     // not 1, 2, 4, or 8 bytes, must be passed by reference."
     if (Width > 64 || !llvm::isPowerOf2_64(Width))
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
     // Otherwise, coerce it to a small integer.
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Width));
@@ -3141,43 +3436,31 @@ void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const {
     I.info = classify(I.type, FreeSSERegs, false);
 }
 
-llvm::Value *WinX86_64ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                      CodeGenFunction &CGF) const {
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
-
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
-                                                       "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
-
-  uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 8);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  return AddrTyped;
+Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                    QualType Ty) const {
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+                          CGF.getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(8),
+                          /*allowHigherAlign*/ false);
 }
 
 // PowerPC-32
 namespace {
 /// PPC32_SVR4_ABIInfo - The 32-bit PowerPC ELF (SVR4) ABI information.
 class PPC32_SVR4_ABIInfo : public DefaultABIInfo {
+bool IsSoftFloatABI;
 public:
-  PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+  PPC32_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, bool SoftFloatABI)
+      : DefaultABIInfo(CGT), IsSoftFloatABI(SoftFloatABI) {}
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class PPC32TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  PPC32TargetCodeGenInfo(CodeGenTypes &CGT)
-      : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT)) {}
+  PPC32TargetCodeGenInfo(CodeGenTypes &CGT, bool SoftFloatABI)
+      : TargetCodeGenInfo(new PPC32_SVR4_ABIInfo(CGT, SoftFloatABI)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3190,64 +3473,51 @@ public:
 
 }
 
-llvm::Value *PPC32_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
-                                           QualType Ty,
-                                           CodeGenFunction &CGF) const {
+Address PPC32_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAList,
+                                      QualType Ty) const {
   if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
     // TODO: Implement this. For now ignore.
     (void)CTy;
-    return nullptr;
+    return Address::invalid();
   }
 
+  // struct __va_list_tag {
+  //   unsigned char gpr;
+  //   unsigned char fpr;
+  //   unsigned short reserved;
+  //   void *overflow_arg_area;
+  //   void *reg_save_area;
+  // };
+
   bool isI64 = Ty->isIntegerType() && getContext().getTypeSize(Ty) == 64;
   bool isInt =
       Ty->isIntegerType() || Ty->isPointerType() || Ty->isAggregateType();
-  llvm::Type *CharPtr = CGF.Int8PtrTy;
-  llvm::Type *CharPtrPtr = CGF.Int8PtrPtrTy;
+  bool isF64 = Ty->isFloatingType() && getContext().getTypeSize(Ty) == 64;
+
+  // All aggregates are passed indirectly?  That doesn't seem consistent
+  // with the argument-lowering code.
+  bool isIndirect = Ty->isAggregateType();
 
   CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *GPRPtr = Builder.CreateBitCast(VAListAddr, CharPtr, "gprptr");
-  llvm::Value *GPRPtrAsInt = Builder.CreatePtrToInt(GPRPtr, CGF.Int32Ty);
-  llvm::Value *FPRPtrAsInt =
-      Builder.CreateAdd(GPRPtrAsInt, Builder.getInt32(1));
-  llvm::Value *FPRPtr = Builder.CreateIntToPtr(FPRPtrAsInt, CharPtr);
-  llvm::Value *OverflowAreaPtrAsInt =
-      Builder.CreateAdd(FPRPtrAsInt, Builder.getInt32(3));
-  llvm::Value *OverflowAreaPtr =
-      Builder.CreateIntToPtr(OverflowAreaPtrAsInt, CharPtrPtr);
-  llvm::Value *RegsaveAreaPtrAsInt =
-      Builder.CreateAdd(OverflowAreaPtrAsInt, Builder.getInt32(4));
-  llvm::Value *RegsaveAreaPtr =
-      Builder.CreateIntToPtr(RegsaveAreaPtrAsInt, CharPtrPtr);
-  llvm::Value *GPR = Builder.CreateLoad(GPRPtr, false, "gpr");
-  // Align GPR when TY is i64.
-  if (isI64) {
-    llvm::Value *GPRAnd = Builder.CreateAnd(GPR, Builder.getInt8(1));
-    llvm::Value *CC64 = Builder.CreateICmpEQ(GPRAnd, Builder.getInt8(1));
-    llvm::Value *GPRPlusOne = Builder.CreateAdd(GPR, Builder.getInt8(1));
-    GPR = Builder.CreateSelect(CC64, GPRPlusOne, GPR);
+
+  // The calling convention either uses 1-2 GPRs or 1 FPR.
+  Address NumRegsAddr = Address::invalid();
+  if (isInt || IsSoftFloatABI) {
+    NumRegsAddr = Builder.CreateStructGEP(VAList, 0, CharUnits::Zero(), "gpr");
+  } else {
+    NumRegsAddr = Builder.CreateStructGEP(VAList, 1, CharUnits::One(), "fpr");
+  }
+
+  llvm::Value *NumRegs = Builder.CreateLoad(NumRegsAddr, "numUsedRegs");
+
+  // "Align" the register count when TY is i64.
+  if (isI64 || (isF64 && IsSoftFloatABI)) {
+    NumRegs = Builder.CreateAdd(NumRegs, Builder.getInt8(1));
+    NumRegs = Builder.CreateAnd(NumRegs, Builder.getInt8((uint8_t) ~1U));
   }
-  llvm::Value *FPR = Builder.CreateLoad(FPRPtr, false, "fpr");
-  llvm::Value *OverflowArea =
-      Builder.CreateLoad(OverflowAreaPtr, false, "overflow_area");
-  llvm::Value *OverflowAreaAsInt =
-      Builder.CreatePtrToInt(OverflowArea, CGF.Int32Ty);
-  llvm::Value *RegsaveArea =
-      Builder.CreateLoad(RegsaveAreaPtr, false, "regsave_area");
-  llvm::Value *RegsaveAreaAsInt =
-      Builder.CreatePtrToInt(RegsaveArea, CGF.Int32Ty);
 
   llvm::Value *CC =
-      Builder.CreateICmpULT(isInt ? GPR : FPR, Builder.getInt8(8), "cond");
-
-  llvm::Value *RegConstant =
-      Builder.CreateMul(isInt ? GPR : FPR, Builder.getInt8(isInt ? 4 : 8));
-
-  llvm::Value *OurReg = Builder.CreateAdd(
-      RegsaveAreaAsInt, Builder.CreateSExt(RegConstant, CGF.Int32Ty));
-
-  if (Ty->isFloatingType())
-    OurReg = Builder.CreateAdd(OurReg, Builder.getInt32(32));
+      Builder.CreateICmpULT(NumRegs, Builder.getInt8(8), "cond");
 
   llvm::BasicBlock *UsingRegs = CGF.createBasicBlock("using_regs");
   llvm::BasicBlock *UsingOverflow = CGF.createBasicBlock("using_overflow");
@@ -3255,39 +3525,91 @@ llvm::Value *PPC32_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
 
   Builder.CreateCondBr(CC, UsingRegs, UsingOverflow);
 
-  CGF.EmitBlock(UsingRegs);
+  llvm::Type *DirectTy = CGF.ConvertType(Ty);
+  if (isIndirect) DirectTy = DirectTy->getPointerTo(0);
 
-  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *Result1 = Builder.CreateIntToPtr(OurReg, PTy);
-  // Increase the GPR/FPR indexes.
-  if (isInt) {
-    GPR = Builder.CreateAdd(GPR, Builder.getInt8(isI64 ? 2 : 1));
-    Builder.CreateStore(GPR, GPRPtr);
-  } else {
-    FPR = Builder.CreateAdd(FPR, Builder.getInt8(1));
-    Builder.CreateStore(FPR, FPRPtr);
+  // Case 1: consume registers.
+  Address RegAddr = Address::invalid();
+  {
+    CGF.EmitBlock(UsingRegs);
+
+    Address RegSaveAreaPtr =
+      Builder.CreateStructGEP(VAList, 4, CharUnits::fromQuantity(8));
+    RegAddr = Address(Builder.CreateLoad(RegSaveAreaPtr),
+                      CharUnits::fromQuantity(8));
+    assert(RegAddr.getElementType() == CGF.Int8Ty);
+
+    // Floating-point registers start after the general-purpose registers.
+    if (!(isInt || IsSoftFloatABI)) {
+      RegAddr = Builder.CreateConstInBoundsByteGEP(RegAddr,
+                                                   CharUnits::fromQuantity(32));
+    }
+
+    // Get the address of the saved value by scaling the number of
+    // registers we've used by the number of 
+    CharUnits RegSize = CharUnits::fromQuantity((isInt || IsSoftFloatABI) ? 4 : 8);
+    llvm::Value *RegOffset =
+      Builder.CreateMul(NumRegs, Builder.getInt8(RegSize.getQuantity()));
+    RegAddr = Address(Builder.CreateInBoundsGEP(CGF.Int8Ty,
+                                            RegAddr.getPointer(), RegOffset),
+                      RegAddr.getAlignment().alignmentOfArrayElement(RegSize));
+    RegAddr = Builder.CreateElementBitCast(RegAddr, DirectTy);
+
+    // Increase the used-register count.
+    NumRegs =
+      Builder.CreateAdd(NumRegs, 
+                        Builder.getInt8((isI64 || (isF64 && IsSoftFloatABI)) ? 2 : 1));
+    Builder.CreateStore(NumRegs, NumRegsAddr);
+
+    CGF.EmitBranch(Cont);
   }
-  CGF.EmitBranch(Cont);
 
-  CGF.EmitBlock(UsingOverflow);
+  // Case 2: consume space in the overflow area.
+  Address MemAddr = Address::invalid();
+  {
+    CGF.EmitBlock(UsingOverflow);
 
-  // Increase the overflow area.
-  llvm::Value *Result2 = Builder.CreateIntToPtr(OverflowAreaAsInt, PTy);
-  OverflowAreaAsInt =
-      Builder.CreateAdd(OverflowAreaAsInt, Builder.getInt32(isInt ? 4 : 8));
-  Builder.CreateStore(Builder.CreateIntToPtr(OverflowAreaAsInt, CharPtr),
-                      OverflowAreaPtr);
-  CGF.EmitBranch(Cont);
+    // Everything in the overflow area is rounded up to a size of at least 4.
+    CharUnits OverflowAreaAlign = CharUnits::fromQuantity(4);
+
+    CharUnits Size;
+    if (!isIndirect) {
+      auto TypeInfo = CGF.getContext().getTypeInfoInChars(Ty);
+      Size = TypeInfo.first.RoundUpToAlignment(OverflowAreaAlign);
+    } else {
+      Size = CGF.getPointerSize();
+    }
+
+    Address OverflowAreaAddr =
+      Builder.CreateStructGEP(VAList, 3, CharUnits::fromQuantity(4));
+    Address OverflowArea(Builder.CreateLoad(OverflowAreaAddr, "argp.cur"),
+                         OverflowAreaAlign);
+    // Round up address of argument to alignment
+    CharUnits Align = CGF.getContext().getTypeAlignInChars(Ty);
+    if (Align > OverflowAreaAlign) {
+      llvm::Value *Ptr = OverflowArea.getPointer();
+      OverflowArea = Address(emitRoundPointerUpToAlignment(CGF, Ptr, Align),
+                                                           Align);
+    }
+ 
+    MemAddr = Builder.CreateElementBitCast(OverflowArea, DirectTy);
+
+    // Increase the overflow area.
+    OverflowArea = Builder.CreateConstInBoundsByteGEP(OverflowArea, Size);
+    Builder.CreateStore(OverflowArea.getPointer(), OverflowAreaAddr);
+    CGF.EmitBranch(Cont);
+  }
 
   CGF.EmitBlock(Cont);
 
-  llvm::PHINode *Result = CGF.Builder.CreatePHI(PTy, 2, "vaarg.addr");
-  Result->addIncoming(Result1, UsingRegs);
-  Result->addIncoming(Result2, UsingOverflow);
+  // Merge the cases with a phi.
+  Address Result = emitMergePHI(CGF, RegAddr, UsingRegs, MemAddr, UsingOverflow,
+                                "vaarg.addr");
 
-  if (Ty->isAggregateType()) {
-    llvm::Value *AGGPtr = Builder.CreateBitCast(Result, CharPtrPtr, "aggrptr");
-    return Builder.CreateLoad(AGGPtr, false, "aggr");
+  // Load the pointer if the argument was passed indirectly.
+  if (isIndirect) {
+    Result = Address(Builder.CreateLoad(Result, "aggr"),
+                     getContext().getTypeAlignInChars(Ty));
   }
 
   return Result;
@@ -3383,7 +3705,7 @@ public:
     : DefaultABIInfo(CGT), Kind(Kind), HasQPX(HasQPX) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
-  bool isAlignedParamType(QualType Ty, bool &Align32) const;
+  CharUnits getParamTypeAlignment(QualType Ty) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -3420,8 +3742,8 @@ public:
     }
   }
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
@@ -3481,12 +3803,9 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForABI(QualType Ty) const {
   return false;
 }
 
-/// isAlignedParamType - Determine whether a type requires 16-byte
-/// alignment in the parameter area.
-bool
-PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
-  Align32 = false;
-
+/// isAlignedParamType - Determine whether a type requires 16-byte or
+/// higher alignment in the parameter area.  Always returns at least 8.
+CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const {
   // Complex types are passed just like their elements.
   if (const ComplexType *CTy = Ty->getAs<ComplexType>())
     Ty = CTy->getElementType();
@@ -3495,11 +3814,11 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
   // passed via reference, smaller types are not aligned).
   if (IsQPXVectorTy(Ty)) {
     if (getContext().getTypeSize(Ty) > 128)
-      Align32 = true;
+      return CharUnits::fromQuantity(32);
 
-    return true;
+    return CharUnits::fromQuantity(16);
   } else if (Ty->isVectorType()) {
-    return getContext().getTypeSize(Ty) == 128;
+    return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8);
   }
 
   // For single-element float/vector structs, we consider the whole type
@@ -3524,22 +3843,22 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty, bool &Align32) const {
   // With special case aggregates, only vector base types need alignment.
   if (AlignAsType && IsQPXVectorTy(AlignAsType)) {
     if (getContext().getTypeSize(AlignAsType) > 128)
-      Align32 = true;
+      return CharUnits::fromQuantity(32);
 
-    return true;
+    return CharUnits::fromQuantity(16);
   } else if (AlignAsType) {
-    return AlignAsType->isVectorType();
+    return CharUnits::fromQuantity(AlignAsType->isVectorType() ? 16 : 8);
   }
 
   // Otherwise, we only need alignment for any aggregate type that
   // has an alignment requirement of >= 16 bytes.
   if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128) {
     if (HasQPX && getContext().getTypeAlign(Ty) >= 256)
-      Align32 = true;
-    return true;
+      return CharUnits::fromQuantity(32);
+    return CharUnits::fromQuantity(16);
   }
 
-  return false;
+  return CharUnits::fromQuantity(8);
 }
 
 /// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
@@ -3672,7 +3991,7 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
   if (Ty->isVectorType() && !IsQPXVectorTy(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size > 128)
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
     else if (Size < 128) {
       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
       return ABIArgInfo::getDirect(CoerceTy);
@@ -3681,12 +4000,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
 
   if (isAggregateTypeForABI(Ty)) {
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-      return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
-    bool Align32;
-    uint64_t ABIAlign = isAlignedParamType(Ty, Align32) ?
-                          (Align32 ? 32 : 16) : 8;
-    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+    uint64_t ABIAlign = getParamTypeAlignment(Ty).getQuantity();
+    uint64_t TyAlign = getContext().getTypeAlignInChars(Ty).getQuantity();
 
     // ELFv2 homogeneous aggregates are passed as array types.
     const Type *Base = nullptr;
@@ -3724,7 +4041,8 @@ PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
     }
 
     // All other aggregates are passed ByVal.
-    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
+    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
+                                   /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
 
@@ -3745,7 +4063,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
   if (RetTy->isVectorType() && !IsQPXVectorTy(RetTy)) {
     uint64_t Size = getContext().getTypeSize(RetTy);
     if (Size > 128)
-      return ABIArgInfo::getIndirect(0);
+      return getNaturalAlignIndirect(RetTy);
     else if (Size < 128) {
       llvm::Type *CoerceTy = llvm::IntegerType::get(getVMContext(), Size);
       return ABIArgInfo::getDirect(CoerceTy);
@@ -3780,7 +4098,7 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
     }
 
     // All other aggregates are returned indirectly.
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
   }
 
   return (isPromotableTypeForABI(RetTy) ?
@@ -3788,47 +4106,12 @@ PPC64_SVR4_ABIInfo::classifyReturnType(QualType RetTy) const {
 }
 
 // Based on ARMABIInfo::EmitVAArg, adjusted for 64-bit machine.
-llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
-                                           QualType Ty,
-                                           CodeGenFunction &CGF) const {
-  llvm::Type *BP = CGF.Int8PtrTy;
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
+Address PPC64_SVR4_ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                      QualType Ty) const {
+  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
+  TypeInfo.second = getParamTypeAlignment(Ty);
 
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-
-  // Handle types that require 16-byte alignment in the parameter save area.
-  bool Align32;
-  if (isAlignedParamType(Ty, Align32)) {
-    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
-    AddrAsInt = Builder.CreateAdd(AddrAsInt,
-                                  Builder.getInt64(Align32 ? 31 : 15));
-    AddrAsInt = Builder.CreateAnd(AddrAsInt,
-                                  Builder.getInt64(Align32 ? -32 : -16));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
-  }
-
-  // Update the va_list pointer.  The pointer should be bumped by the
-  // size of the object.  We can trust getTypeSize() except for a complex
-  // type whose base type is smaller than a doubleword.  For these, the
-  // size of the object is 16 bytes; see below for further explanation.
-  unsigned SizeInBytes = CGF.getContext().getTypeSize(Ty) / 8;
-  QualType BaseTy;
-  unsigned CplxBaseSize = 0;
-
-  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
-    BaseTy = CTy->getElementType();
-    CplxBaseSize = CGF.getContext().getTypeSize(BaseTy) / 8;
-    if (CplxBaseSize < 8)
-      SizeInBytes = 16;
-  }
-
-  unsigned Offset = llvm::RoundUpToAlignment(SizeInBytes, 8);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int64Ty, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
+  CharUnits SlotSize = CharUnits::fromQuantity(8);
 
   // If we have a complex type and the base type is smaller than 8 bytes,
   // the ABI calls for the real and imaginary parts to be right-adjusted
@@ -3836,44 +4119,40 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAArg(llvm::Value *VAListAddr,
   // pointer to a structure with the two parts packed tightly.  So generate
   // loads of the real and imaginary parts relative to the va_list pointer,
   // and store them to a temporary structure.
-  if (CplxBaseSize && CplxBaseSize < 8) {
-    llvm::Value *RealAddr = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
-    llvm::Value *ImagAddr = RealAddr;
-    if (CGF.CGM.getDataLayout().isBigEndian()) {
-      RealAddr =
-          Builder.CreateAdd(RealAddr, Builder.getInt64(8 - CplxBaseSize));
-      ImagAddr =
-          Builder.CreateAdd(ImagAddr, Builder.getInt64(16 - CplxBaseSize));
-    } else {
-      ImagAddr = Builder.CreateAdd(ImagAddr, Builder.getInt64(8));
+  if (const ComplexType *CTy = Ty->getAs<ComplexType>()) {
+    CharUnits EltSize = TypeInfo.first / 2;
+    if (EltSize < SlotSize) {
+      Address Addr = emitVoidPtrDirectVAArg(CGF, VAListAddr, CGF.Int8Ty,
+                                            SlotSize * 2, SlotSize,
+                                            SlotSize, /*AllowHigher*/ true);
+
+      Address RealAddr = Addr;
+      Address ImagAddr = RealAddr;
+      if (CGF.CGM.getDataLayout().isBigEndian()) {
+        RealAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr,
+                                                          SlotSize - EltSize);
+        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(ImagAddr,
+                                                      2 * SlotSize - EltSize);
+      } else {
+        ImagAddr = CGF.Builder.CreateConstInBoundsByteGEP(RealAddr, SlotSize);
+      }
+
+      llvm::Type *EltTy = CGF.ConvertTypeForMem(CTy->getElementType());
+      RealAddr = CGF.Builder.CreateElementBitCast(RealAddr, EltTy);
+      ImagAddr = CGF.Builder.CreateElementBitCast(ImagAddr, EltTy);
+      llvm::Value *Real = CGF.Builder.CreateLoad(RealAddr, ".vareal");
+      llvm::Value *Imag = CGF.Builder.CreateLoad(ImagAddr, ".vaimag");
+
+      Address Temp = CGF.CreateMemTemp(Ty, "vacplx");
+      CGF.EmitStoreOfComplex({Real, Imag}, CGF.MakeAddrLValue(Temp, Ty),
+                             /*init*/ true);
+      return Temp;
     }
-    llvm::Type *PBaseTy = llvm::PointerType::getUnqual(CGF.ConvertType(BaseTy));
-    RealAddr = Builder.CreateIntToPtr(RealAddr, PBaseTy);
-    ImagAddr = Builder.CreateIntToPtr(ImagAddr, PBaseTy);
-    llvm::Value *Real = Builder.CreateLoad(RealAddr, false, ".vareal");
-    llvm::Value *Imag = Builder.CreateLoad(ImagAddr, false, ".vaimag");
-    llvm::AllocaInst *Ptr =
-        CGF.CreateTempAlloca(CGT.ConvertTypeForMem(Ty), "vacplx");
-    llvm::Value *RealPtr =
-        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 0, ".real");
-    llvm::Value *ImagPtr =
-        Builder.CreateStructGEP(Ptr->getAllocatedType(), Ptr, 1, ".imag");
-    Builder.CreateStore(Real, RealPtr, false);
-    Builder.CreateStore(Imag, ImagPtr, false);
-    return Ptr;
   }
 
-  // If the argument is smaller than 8 bytes, it is right-adjusted in
-  // its doubleword slot.  Adjust the pointer to pick it up from the
-  // correct offset.
-  if (SizeInBytes < 8 && CGF.CGM.getDataLayout().isBigEndian()) {
-    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
-    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(8 - SizeInBytes));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP);
-  }
-
-  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  return Builder.CreateBitCast(Addr, PTy);
+  // Otherwise, just use the general rule.
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false,
+                          TypeInfo, SlotSize, /*AllowHigher*/ true);
 }
 
 static bool
@@ -3971,14 +4250,14 @@ private:
       it.info = classifyArgumentType(it.type);
   }
 
-  llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
-                               CodeGenFunction &CGF) const;
+  Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
+                          CodeGenFunction &CGF) const;
 
-  llvm::Value *EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                              CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
+                         CodeGenFunction &CGF) const;
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override {
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override {
     return isDarwinPCS() ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
                          : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
   }
@@ -4021,7 +4300,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
       return ABIArgInfo::getDirect(ResType);
     }
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
   }
 
   if (!isAggregateTypeForABI(Ty)) {
@@ -4037,8 +4316,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
-                                   CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/RAA ==
+                                     CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -4073,7 +4352,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
-  return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 }
 
 ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
@@ -4082,7 +4361,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
@@ -4118,7 +4397,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
-  return ABIArgInfo::getIndirect(0);
+  return getNaturalAlignIndirect(RetTy);
 }
 
 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
@@ -4156,7 +4435,7 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
+Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
                                             QualType Ty,
                                             CodeGenFunction &CGF) const {
   ABIArgInfo AI = classifyArgumentType(Ty);
@@ -4190,24 +4469,32 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   llvm::BasicBlock *InRegBlock = CGF.createBasicBlock("vaarg.in_reg");
   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
-  auto &Ctx = CGF.getContext();
 
-  llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
+  CharUnits TyAlign = TyInfo.second;
+
+  Address reg_offs_p = Address::invalid();
+  llvm::Value *reg_offs = nullptr;
   int reg_top_index;
-  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
+  CharUnits reg_top_offset;
+  int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity();
   if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p =
-        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3, "gr_offs_p");
+        CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24),
+                                    "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
+    reg_top_offset = CharUnits::fromQuantity(8);
     RegSize = llvm::RoundUpToAlignment(RegSize, 8);
   } else {
     // 4 is the field number of __vr_offs.
     reg_offs_p =
-        CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 4, "vr_offs_p");
+        CGF.Builder.CreateStructGEP(VAListAddr, 4, CharUnits::fromQuantity(28),
+                                    "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
+    reg_top_offset = CharUnits::fromQuantity(16);
     RegSize = 16 * NumRegs;
   }
 
@@ -4232,8 +4519,8 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   // Integer arguments may need to correct register alignment (for example a
   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
   // align __gr_offs to calculate the potential address.
-  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
-    int Align = Ctx.getTypeAlign(Ty) / 8;
+  if (!IsFPR && !IsIndirect && TyAlign.getQuantity() > 8) {
+    int Align = TyAlign.getQuantity();
 
     reg_offs = CGF.Builder.CreateAdd(
         reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, Align - 1),
@@ -4244,6 +4531,9 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   }
 
   // Update the gr_offs/vr_offs pointer for next call to va_arg on this va_list.
+  // The fact that this is done unconditionally reflects the fact that
+  // allocating an argument to the stack also uses up all the remaining
+  // registers of the appropriate kind.
   llvm::Value *NewOffset = nullptr;
   NewOffset = CGF.Builder.CreateAdd(
       reg_offs, llvm::ConstantInt::get(CGF.Int32Ty, RegSize), "new_reg_offs");
@@ -4265,13 +4555,14 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   // registers. First start the appropriate block:
   CGF.EmitBlock(InRegBlock);
 
-  llvm::Value *reg_top_p = nullptr, *reg_top = nullptr;
-  reg_top_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr, reg_top_index,
-                                          "reg_top_p");
+  llvm::Value *reg_top = nullptr;
+  Address reg_top_p = CGF.Builder.CreateStructGEP(VAListAddr, reg_top_index,
+                                                  reg_top_offset, "reg_top_p");
   reg_top = CGF.Builder.CreateLoad(reg_top_p, "reg_top");
-  llvm::Value *BaseAddr = CGF.Builder.CreateGEP(reg_top, reg_offs);
-  llvm::Value *RegAddr = nullptr;
-  llvm::Type *MemTy = llvm::PointerType::getUnqual(CGF.ConvertTypeForMem(Ty));
+  Address BaseAddr(CGF.Builder.CreateInBoundsGEP(reg_top, reg_offs),
+                   CharUnits::fromQuantity(IsFPR ? 16 : 8));
+  Address RegAddr = Address::invalid();
+  llvm::Type *MemTy = CGF.ConvertTypeForMem(Ty);
 
   if (IsIndirect) {
     // If it's been passed indirectly (actually a struct), whatever we find from
@@ -4288,43 +4579,45 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
     // qN+1, ...). We reload and store into a temporary local variable
     // contiguously.
     assert(!IsIndirect && "Homogeneous aggregates should be passed directly");
+    auto BaseTyInfo = getContext().getTypeInfoInChars(QualType(Base, 0));
     llvm::Type *BaseTy = CGF.ConvertType(QualType(Base, 0));
     llvm::Type *HFATy = llvm::ArrayType::get(BaseTy, NumMembers);
-    llvm::AllocaInst *Tmp = CGF.CreateTempAlloca(HFATy);
-    int Offset = 0;
+    Address Tmp = CGF.CreateTempAlloca(HFATy,
+                                       std::max(TyAlign, BaseTyInfo.second));
+
+    // On big-endian platforms, the value will be right-aligned in its slot.
+    int Offset = 0;
+    if (CGF.CGM.getDataLayout().isBigEndian() &&
+        BaseTyInfo.first.getQuantity() < 16)
+      Offset = 16 - BaseTyInfo.first.getQuantity();
 
-    if (CGF.CGM.getDataLayout().isBigEndian() && Ctx.getTypeSize(Base) < 128)
-      Offset = 16 - Ctx.getTypeSize(Base) / 8;
     for (unsigned i = 0; i < NumMembers; ++i) {
-      llvm::Value *BaseOffset =
-          llvm::ConstantInt::get(CGF.Int32Ty, 16 * i + Offset);
-      llvm::Value *LoadAddr = CGF.Builder.CreateGEP(BaseAddr, BaseOffset);
-      LoadAddr = CGF.Builder.CreateBitCast(
-          LoadAddr, llvm::PointerType::getUnqual(BaseTy));
-      llvm::Value *StoreAddr =
-          CGF.Builder.CreateStructGEP(Tmp->getAllocatedType(), Tmp, i);
+      CharUnits BaseOffset = CharUnits::fromQuantity(16 * i + Offset);
+      Address LoadAddr =
+        CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, BaseOffset);
+      LoadAddr = CGF.Builder.CreateElementBitCast(LoadAddr, BaseTy);
+
+      Address StoreAddr =
+        CGF.Builder.CreateConstArrayGEP(Tmp, i, BaseTyInfo.first);
 
       llvm::Value *Elem = CGF.Builder.CreateLoad(LoadAddr);
       CGF.Builder.CreateStore(Elem, StoreAddr);
     }
 
-    RegAddr = CGF.Builder.CreateBitCast(Tmp, MemTy);
+    RegAddr = CGF.Builder.CreateElementBitCast(Tmp, MemTy);
   } else {
-    // Otherwise the object is contiguous in memory
-    unsigned BeAlign = reg_top_index == 2 ? 16 : 8;
-    if (CGF.CGM.getDataLayout().isBigEndian() &&
+    // Otherwise the object is contiguous in memory.
+
+    // It might be right-aligned in its slot.
+    CharUnits SlotSize = BaseAddr.getAlignment();
+    if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
         (IsHFA || !isAggregateTypeForABI(Ty)) &&
-        Ctx.getTypeSize(Ty) < (BeAlign * 8)) {
-      int Offset = BeAlign - Ctx.getTypeSize(Ty) / 8;
-      BaseAddr = CGF.Builder.CreatePtrToInt(BaseAddr, CGF.Int64Ty);
-
-      BaseAddr = CGF.Builder.CreateAdd(
-          BaseAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
-
-      BaseAddr = CGF.Builder.CreateIntToPtr(BaseAddr, CGF.Int8PtrTy);
+        TyInfo.first < SlotSize) {
+      CharUnits Offset = SlotSize - TyInfo.first;
+      BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
     }
 
-    RegAddr = CGF.Builder.CreateBitCast(BaseAddr, MemTy);
+    RegAddr = CGF.Builder.CreateElementBitCast(BaseAddr, MemTy);
   }
 
   CGF.EmitBranch(ContBlock);
@@ -4334,55 +4627,51 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   //=======================================
   CGF.EmitBlock(OnStackBlock);
 
-  llvm::Value *stack_p = nullptr, *OnStackAddr = nullptr;
-  stack_p = CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 0, "stack_p");
-  OnStackAddr = CGF.Builder.CreateLoad(stack_p, "stack");
+  Address stack_p = CGF.Builder.CreateStructGEP(VAListAddr, 0,
+                                                CharUnits::Zero(), "stack_p");
+  llvm::Value *OnStackPtr = CGF.Builder.CreateLoad(stack_p, "stack");
 
-  // Again, stack arguments may need realigmnent. In this case both integer and
+  // Again, stack arguments may need realignment. In this case both integer and
   // floating-point ones might be affected.
-  if (!IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
-    int Align = Ctx.getTypeAlign(Ty) / 8;
+  if (!IsIndirect && TyAlign.getQuantity() > 8) {
+    int Align = TyAlign.getQuantity();
 
-    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
+    OnStackPtr = CGF.Builder.CreatePtrToInt(OnStackPtr, CGF.Int64Ty);
 
-    OnStackAddr = CGF.Builder.CreateAdd(
-        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
+    OnStackPtr = CGF.Builder.CreateAdd(
+        OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, Align - 1),
         "align_stack");
-    OnStackAddr = CGF.Builder.CreateAnd(
-        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
+    OnStackPtr = CGF.Builder.CreateAnd(
+        OnStackPtr, llvm::ConstantInt::get(CGF.Int64Ty, -Align),
         "align_stack");
 
-    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+    OnStackPtr = CGF.Builder.CreateIntToPtr(OnStackPtr, CGF.Int8PtrTy);
   }
+  Address OnStackAddr(OnStackPtr,
+                      std::max(CharUnits::fromQuantity(8), TyAlign));
 
-  uint64_t StackSize;
+  // All stack slots are multiples of 8 bytes.
+  CharUnits StackSlotSize = CharUnits::fromQuantity(8);
+  CharUnits StackSize;
   if (IsIndirect)
-    StackSize = 8;
+    StackSize = StackSlotSize;
   else
-    StackSize = Ctx.getTypeSize(Ty) / 8;
+    StackSize = TyInfo.first.RoundUpToAlignment(StackSlotSize);
 
-  // All stack slots are 8 bytes
-  StackSize = llvm::RoundUpToAlignment(StackSize, 8);
-
-  llvm::Value *StackSizeC = llvm::ConstantInt::get(CGF.Int32Ty, StackSize);
+  llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
   llvm::Value *NewStack =
-      CGF.Builder.CreateGEP(OnStackAddr, StackSizeC, "new_stack");
+      CGF.Builder.CreateInBoundsGEP(OnStackPtr, StackSizeC, "new_stack");
 
   // Write the new value of __stack for the next call to va_arg
   CGF.Builder.CreateStore(NewStack, stack_p);
 
   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
-      Ctx.getTypeSize(Ty) < 64) {
-    int Offset = 8 - Ctx.getTypeSize(Ty) / 8;
-    OnStackAddr = CGF.Builder.CreatePtrToInt(OnStackAddr, CGF.Int64Ty);
-
-    OnStackAddr = CGF.Builder.CreateAdd(
-        OnStackAddr, llvm::ConstantInt::get(CGF.Int64Ty, Offset), "align_be");
-
-    OnStackAddr = CGF.Builder.CreateIntToPtr(OnStackAddr, CGF.Int8PtrTy);
+      TyInfo.first < StackSlotSize) {
+    CharUnits Offset = StackSlotSize - TyInfo.first;
+    OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
   }
 
-  OnStackAddr = CGF.Builder.CreateBitCast(OnStackAddr, MemTy);
+  OnStackAddr = CGF.Builder.CreateElementBitCast(OnStackAddr, MemTy);
 
   CGF.EmitBranch(ContBlock);
 
@@ -4391,75 +4680,48 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
   //=======================================
   CGF.EmitBlock(ContBlock);
 
-  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(MemTy, 2, "vaarg.addr");
-  ResAddr->addIncoming(RegAddr, InRegBlock);
-  ResAddr->addIncoming(OnStackAddr, OnStackBlock);
+  Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock,
+                                 OnStackAddr, OnStackBlock, "vaargs.addr");
 
   if (IsIndirect)
-    return CGF.Builder.CreateLoad(ResAddr, "vaarg.addr");
+    return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"),
+                   TyInfo.second);
 
   return ResAddr;
 }
 
-llvm::Value *AArch64ABIInfo::EmitDarwinVAArg(llvm::Value *VAListAddr,
-                                             QualType Ty,
-                                             CodeGenFunction &CGF) const {
-  // We do not support va_arg for aggregates or illegal vector types.
-  // Lower VAArg here for these cases and use the LLVM va_arg instruction for
-  // other cases.
+Address AArch64ABIInfo::EmitDarwinVAArg(Address VAListAddr, QualType Ty,
+                                        CodeGenFunction &CGF) const {
+  // The backend's lowering doesn't support va_arg for aggregates or
+  // illegal vector types.  Lower VAArg here for these cases and use
+  // the LLVM va_arg instruction for everything else.
   if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty))
-    return nullptr;
+    return Address::invalid();
 
-  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
-  uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
-
-  const Type *Base = nullptr;
-  uint64_t Members = 0;
-  bool isHA = isHomogeneousAggregate(Ty, Base, Members);
-
-  bool isIndirect = false;
-  // Arguments bigger than 16 bytes which aren't homogeneous aggregates should
-  // be passed indirectly.
-  if (Size > 16 && !isHA) {
-    isIndirect = true;
-    Size = 8;
-    Align = 8;
-  }
-
-  llvm::Type *BP = llvm::Type::getInt8PtrTy(CGF.getLLVMContext());
-  llvm::Type *BPP = llvm::PointerType::getUnqual(BP);
-
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+  CharUnits SlotSize = CharUnits::fromQuantity(8);
 
+  // Empty records are ignored for parameter passing purposes.
   if (isEmptyRecord(getContext(), Ty, true)) {
-    // These are ignored for parameter passing purposes.
-    llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-    return Builder.CreateBitCast(Addr, PTy);
+    Address Addr(CGF.Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize);
+    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+    return Addr;
   }
 
-  const uint64_t MinABIAlign = 8;
-  if (Align > MinABIAlign) {
-    llvm::Value *Offset = llvm::ConstantInt::get(CGF.Int32Ty, Align - 1);
-    Addr = Builder.CreateGEP(Addr, Offset);
-    llvm::Value *AsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
-    llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, ~(Align - 1));
-    llvm::Value *Aligned = Builder.CreateAnd(AsInt, Mask);
-    Addr = Builder.CreateIntToPtr(Aligned, BP, "ap.align");
+  // The size of the actual thing passed, which might end up just
+  // being a pointer for indirect types.
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
+
+  // Arguments bigger than 16 bytes which aren't homogeneous
+  // aggregates should be passed indirectly.
+  bool IsIndirect = false;
+  if (TyInfo.first.getQuantity() > 16) {
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    IsIndirect = !isHomogeneousAggregate(Ty, Base, Members);
   }
 
-  uint64_t Offset = llvm::RoundUpToAlignment(Size, MinABIAlign);
-  llvm::Value *NextAddr = Builder.CreateGEP(
-      Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset), "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  if (isIndirect)
-    Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
-  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
-
-  return AddrTyped;
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
+                          TyInfo, SlotSize, /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
@@ -4473,7 +4735,8 @@ public:
   enum ABIKind {
     APCS = 0,
     AAPCS = 1,
-    AAPCS_VFP
+    AAPCS_VFP = 2,
+    AAPCS16_VFP = 3,
   };
 
 private:
@@ -4507,6 +4770,11 @@ public:
     }
   }
 
+  bool isAndroid() const {
+    return (getTarget().getTriple().getEnvironment() ==
+            llvm::Triple::Android);
+  }
+
   ABIKind getABIKind() const { return Kind; }
 
 private:
@@ -4520,8 +4788,8 @@ private:
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 
   llvm::CallingConv::ID getLLVMDefaultCC() const;
   llvm::CallingConv::ID getABIDefaultCC() const;
@@ -4561,7 +4829,7 @@ public:
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
-    const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD)
       return;
 
@@ -4583,7 +4851,8 @@ public:
 
     Fn->addFnAttr("interrupt", Kind);
 
-    if (cast<ARMABIInfo>(getABIInfo()).getABIKind() == ARMABIInfo::APCS)
+    ARMABIInfo::ABIKind ABI = cast<ARMABIInfo>(getABIInfo()).getABIKind();
+    if (ABI == ARMABIInfo::APCS)
       return;
 
     // AAPCS guarantees that sp will be 8-byte aligned on any public interface,
@@ -4649,7 +4918,7 @@ void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
 /// Return the default calling convention that LLVM will use.
 llvm::CallingConv::ID ARMABIInfo::getLLVMDefaultCC() const {
   // The default calling convention that LLVM will infer.
-  if (isEABIHF())
+  if (isEABIHF() || getTarget().getTriple().isWatchOS())
     return llvm::CallingConv::ARM_AAPCS_VFP;
   else if (isEABI())
     return llvm::CallingConv::ARM_AAPCS;
@@ -4664,6 +4933,7 @@ llvm::CallingConv::ID ARMABIInfo::getABIDefaultCC() const {
   case APCS: return llvm::CallingConv::ARM_APCS;
   case AAPCS: return llvm::CallingConv::ARM_AAPCS;
   case AAPCS_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
+  case AAPCS16_VFP: return llvm::CallingConv::ARM_AAPCS_VFP;
   }
   llvm_unreachable("bad ABI kind");
 }
@@ -4677,8 +4947,20 @@ void ARMABIInfo::setCCs() {
   if (abiCC != getLLVMDefaultCC())
     RuntimeCC = abiCC;
 
-  BuiltinCC = (getABIKind() == APCS ?
-               llvm::CallingConv::ARM_APCS : llvm::CallingConv::ARM_AAPCS);
+  // AAPCS apparently requires runtime support functions to be soft-float, but
+  // that's almost certainly for historic reasons (Thumb1 not supporting VFP
+  // most likely). It's more convenient for AAPCS16_VFP to be hard-float.
+  switch (getABIKind()) {
+  case APCS:
+  case AAPCS16_VFP:
+    if (abiCC != getLLVMDefaultCC())
+      BuiltinCC = abiCC;
+    break;
+  case AAPCS:
+  case AAPCS_VFP:
+    BuiltinCC = llvm::CallingConv::ARM_AAPCS;
+    break;
+  }
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
@@ -4712,7 +4994,17 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
           llvm::Type::getInt32Ty(getVMContext()), 4);
       return ABIArgInfo::getDirect(ResType);
     }
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+  }
+
+  // __fp16 gets passed as if it were an int or float, but with the top 16 bits
+  // unspecified. This is not done for OpenCL as it handles the half type
+  // natively, and does not need to interwork with AAPCS code.
+  if (Ty->isHalfType() && !getContext().getLangOpts().OpenCL) {
+    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+      llvm::Type::getFloatTy(getVMContext()) :
+      llvm::Type::getInt32Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
   }
 
   if (!isAggregateTypeForABI(Ty)) {
@@ -4726,7 +5018,7 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
   }
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
   }
 
   // Ignore empty records.
@@ -4743,6 +5035,27 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
       // Base can be a floating-point or a vector.
       return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
     }
+  } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
+    // WatchOS does have homogeneous aggregates. Note that we intentionally use
+    // this convention even for a variadic function: the backend will use GPRs
+    // if needed.
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    if (isHomogeneousAggregate(Ty, Base, Members)) {
+      assert(Base && Members <= 4 && "unexpected homogeneous aggregate");
+      llvm::Type *Ty =
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members);
+      return ABIArgInfo::getDirect(Ty, 0, nullptr, false);
+    }
+  }
+
+  if (getABIKind() == ARMABIInfo::AAPCS16_VFP &&
+      getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(16)) {
+    // WatchOS is adopting the 64-bit AAPCS rule on composite types: if they're
+    // bigger than 128-bits, they get placed in space allocated by the caller,
+    // and a pointer is passed.
+    return ABIArgInfo::getIndirect(
+        CharUnits::fromQuantity(getContext().getTypeAlign(Ty) / 8), false);
   }
 
   // Support byval for ARM.
@@ -4756,8 +5069,10 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
 
   if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
-    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
-           /*Realign=*/TyAlign > ABIAlign);
+    assert(getABIKind() != ARMABIInfo::AAPCS16_VFP && "unexpected byval");
+    return ABIArgInfo::getIndirect(CharUnits::fromQuantity(ABIAlign),
+                                   /*ByVal=*/true,
+                                   /*Realign=*/TyAlign > ABIAlign);
   }
 
   // Otherwise, pass by coercing to a structure of the appropriate size.
@@ -4863,14 +5178,25 @@ static bool isIntegerLikeType(QualType Ty, ASTContext &Context,
 
 ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
                                           bool isVariadic) const {
-  bool IsEffectivelyAAPCS_VFP = getABIKind() == AAPCS_VFP && !isVariadic;
+  bool IsEffectivelyAAPCS_VFP =
+      (getABIKind() == AAPCS_VFP || getABIKind() == AAPCS16_VFP) && !isVariadic;
 
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
+  }
+
+  // __fp16 gets returned as if it were an int or float, but with the top 16
+  // bits unspecified. This is not done for OpenCL as it handles the half type
+  // natively, and does not need to interwork with AAPCS code.
+  if (RetTy->isHalfType() && !getContext().getLangOpts().OpenCL) {
+    llvm::Type *ResType = IsEffectivelyAAPCS_VFP ?
+      llvm::Type::getFloatTy(getVMContext()) :
+      llvm::Type::getInt32Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
   }
 
   if (!isAggregateTypeForABI(RetTy)) {
@@ -4907,7 +5233,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
     }
 
     // Otherwise return in memory.
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
   }
 
   // Otherwise this is an AAPCS variant.
@@ -4918,7 +5244,7 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
   // Check for homogeneous aggregates with AAPCS-VFP.
   if (IsEffectivelyAAPCS_VFP) {
     const Type *Base = nullptr;
-    uint64_t Members;
+    uint64_t Members = 0;
     if (isHomogeneousAggregate(RetTy, Base, Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
       // Homogeneous Aggregates are returned directly.
@@ -4940,22 +5266,39 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
     if (Size <= 16)
       return ABIArgInfo::getDirect(llvm::Type::getInt16Ty(getVMContext()));
     return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
+  } else if (Size <= 128 && getABIKind() == AAPCS16_VFP) {
+    llvm::Type *Int32Ty = llvm::Type::getInt32Ty(getVMContext());
+    llvm::Type *CoerceTy =
+        llvm::ArrayType::get(Int32Ty, llvm::RoundUpToAlignment(Size, 32) / 32);
+    return ABIArgInfo::getDirect(CoerceTy);
   }
 
-  return ABIArgInfo::getIndirect(0);
+  return getNaturalAlignIndirect(RetTy);
 }
 
 /// isIllegalVector - check whether Ty is an illegal vector type.
 bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
-  if (const VectorType *VT = Ty->getAs<VectorType>()) {
-    // Check whether VT is legal.
-    unsigned NumElements = VT->getNumElements();
-    uint64_t Size = getContext().getTypeSize(VT);
-    // NumElements should be power of 2.
-    if ((NumElements & (NumElements - 1)) != 0)
-      return true;
-    // Size should be greater than 32 bits.
-    return Size <= 32;
+  if (const VectorType *VT = Ty->getAs<VectorType> ()) {
+    if (isAndroid()) {
+      // Android shipped using Clang 3.1, which supported a slightly different
+      // vector ABI. The primary differences were that 3-element vector types
+      // were legal, and so were sub 32-bit vectors (i.e. <2 x i8>). This path
+      // accepts that legacy behavior for Android only.
+      // Check whether VT is legal.
+      unsigned NumElements = VT->getNumElements();
+      // NumElements should be power of 2 or equal to 3.
+      if (!llvm::isPowerOf2_32(NumElements) && NumElements != 3)
+        return true;
+    } else {
+      // Check whether VT is legal.
+      unsigned NumElements = VT->getNumElements();
+      uint64_t Size = getContext().getTypeSize(VT);
+      // NumElements should be power of 2.
+      if (!llvm::isPowerOf2_32(NumElements))
+        return true;
+      // Size should be greater than 32 bits.
+      return Size <= 32;
+    }
   }
   return false;
 }
@@ -4981,80 +5324,53 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
-llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                   CodeGenFunction &CGF) const {
-  llvm::Type *BP = CGF.Int8PtrTy;
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
-
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
+Address ARMABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                              QualType Ty) const {
+  CharUnits SlotSize = CharUnits::fromQuantity(4);
 
+  // Empty records are ignored for parameter passing purposes.
   if (isEmptyRecord(getContext(), Ty, true)) {
-    // These are ignored for parameter passing purposes.
-    llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-    return Builder.CreateBitCast(Addr, PTy);
+    Address Addr(CGF.Builder.CreateLoad(VAListAddr), SlotSize);
+    Addr = CGF.Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(Ty));
+    return Addr;
   }
 
-  uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
-  uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
-  bool IsIndirect = false;
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
+  CharUnits TyAlignForABI = TyInfo.second;
 
+  // Use indirect if size of the illegal vector is bigger than 16 bytes.
+  bool IsIndirect = false;
+  const Type *Base = nullptr;
+  uint64_t Members = 0;
+  if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
+    IsIndirect = true;
+
+  // ARMv7k passes structs bigger than 16 bytes indirectly, in space
+  // allocated by the caller.
+  } else if (TyInfo.first > CharUnits::fromQuantity(16) &&
+             getABIKind() == ARMABIInfo::AAPCS16_VFP &&
+             !isHomogeneousAggregate(Ty, Base, Members)) {
+    IsIndirect = true;
+
+  // Otherwise, bound the type's ABI alignment.
   // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
   // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
-  if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
-      getABIKind() == ARMABIInfo::AAPCS)
-    TyAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
-  else
-    TyAlign = 4;
-  // Use indirect if size of the illegal vector is bigger than 16 bytes.
-  if (isIllegalVectorType(Ty) && Size > 16) {
-    IsIndirect = true;
-    Size = 4;
-    TyAlign = 4;
+  // Our callers should be prepared to handle an under-aligned address.
+  } else if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
+             getABIKind() == ARMABIInfo::AAPCS) {
+    TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
+    TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(8));
+  } else if (getABIKind() == ARMABIInfo::AAPCS16_VFP) {
+    // ARMv7k allows type alignment up to 16 bytes.
+    TyAlignForABI = std::max(TyAlignForABI, CharUnits::fromQuantity(4));
+    TyAlignForABI = std::min(TyAlignForABI, CharUnits::fromQuantity(16));
+  } else {
+    TyAlignForABI = CharUnits::fromQuantity(4);
   }
+  TyInfo.second = TyAlignForABI;
 
-  // Handle address alignment for ABI alignment > 4 bytes.
-  if (TyAlign > 4) {
-    assert((TyAlign & (TyAlign - 1)) == 0 &&
-           "Alignment is not power of 2!");
-    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int32Ty);
-    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt32(TyAlign - 1));
-    AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt32(~(TyAlign - 1)));
-    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
-  }
-
-  uint64_t Offset =
-    llvm::RoundUpToAlignment(Size, 4);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  if (IsIndirect)
-    Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
-  else if (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8) {
-    // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
-    // may not be correctly aligned for the vector type. We create an aligned
-    // temporary space and copy the content over from ap.cur to the temporary
-    // space. This is necessary if the natural alignment of the type is greater
-    // than the ABI alignment.
-    llvm::Type *I8PtrTy = Builder.getInt8PtrTy();
-    CharUnits CharSize = getContext().getTypeSizeInChars(Ty);
-    llvm::Value *AlignedTemp = CGF.CreateTempAlloca(CGF.ConvertType(Ty),
-                                                    "var.align");
-    llvm::Value *Dst = Builder.CreateBitCast(AlignedTemp, I8PtrTy);
-    llvm::Value *Src = Builder.CreateBitCast(Addr, I8PtrTy);
-    Builder.CreateMemCpy(Dst, Src,
-        llvm::ConstantInt::get(CGF.IntPtrTy, CharSize.getQuantity()),
-        TyAlign, false);
-    Addr = AlignedTemp; //The content is in aligned location.
-  }
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
-
-  return AddrTyped;
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
+                          SlotSize, /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
@@ -5071,8 +5387,8 @@ public:
   ABIArgInfo classifyArgumentType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CFG) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5111,7 +5427,7 @@ ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
 
   // Return aggregates type as indirect by value
   if (isAggregateTypeForABI(Ty))
-    return ABIArgInfo::getIndirect(0, /* byval */ true);
+    return getNaturalAlignIndirect(Ty, /* byval */ true);
 
   return (Ty->isPromotableIntegerType() ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
@@ -5130,15 +5446,15 @@ void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
   FI.setEffectiveCallingConvention(getRuntimeCC());
 }
 
-llvm::Value *NVPTXABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                     CodeGenFunction &CFG) const {
+Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                QualType Ty) const {
   llvm_unreachable("NVPTX does not support varargs");
 }
 
 void NVPTXTargetCodeGenInfo::
 setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                     CodeGen::CodeGenModule &M) const{
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
 
   llvm::Function *F = cast<llvm::Function>(GV);
@@ -5232,8 +5548,8 @@ public:
       I.info = classifyArgumentType(I.type);
   }
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -5334,8 +5650,8 @@ QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
   return Ty;
 }
 
-llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+Address SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                  QualType Ty) const {
   // Assume that va_list type is correct; should be pointer to LLVM type:
   // struct {
   //   i64 __gpr;
@@ -5347,59 +5663,69 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   // Every non-vector argument occupies 8 bytes and is passed by preference
   // in either GPRs or FPRs.  Vector arguments occupy 8 or 16 bytes and are
   // always passed on the stack.
-  Ty = CGF.getContext().getCanonicalType(Ty);
+  Ty = getContext().getCanonicalType(Ty);
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
   llvm::Type *ArgTy = CGF.ConvertTypeForMem(Ty);
-  llvm::Type *APTy = llvm::PointerType::getUnqual(ArgTy);
+  llvm::Type *DirectTy = ArgTy;
   ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
   bool InFPRs = false;
   bool IsVector = false;
-  unsigned UnpaddedBitSize;
+  CharUnits UnpaddedSize;
+  CharUnits DirectAlign;
   if (IsIndirect) {
-    APTy = llvm::PointerType::getUnqual(APTy);
-    UnpaddedBitSize = 64;
+    DirectTy = llvm::PointerType::getUnqual(DirectTy);
+    UnpaddedSize = DirectAlign = CharUnits::fromQuantity(8);
   } else {
     if (AI.getCoerceToType())
       ArgTy = AI.getCoerceToType();
     InFPRs = ArgTy->isFloatTy() || ArgTy->isDoubleTy();
     IsVector = ArgTy->isVectorTy();
-    UnpaddedBitSize = getContext().getTypeSize(Ty);
+    UnpaddedSize = TyInfo.first;
+    DirectAlign = TyInfo.second;
   }
-  unsigned PaddedBitSize = (IsVector && UnpaddedBitSize > 64) ? 128 : 64;
-  assert((UnpaddedBitSize <= PaddedBitSize) && "Invalid argument size.");
+  CharUnits PaddedSize = CharUnits::fromQuantity(8);
+  if (IsVector && UnpaddedSize > PaddedSize)
+    PaddedSize = CharUnits::fromQuantity(16);
+  assert((UnpaddedSize <= PaddedSize) && "Invalid argument size.");
 
-  unsigned PaddedSize = PaddedBitSize / 8;
-  unsigned Padding = (PaddedBitSize - UnpaddedBitSize) / 8;
+  CharUnits Padding = (PaddedSize - UnpaddedSize);
 
   llvm::Type *IndexTy = CGF.Int64Ty;
-  llvm::Value *PaddedSizeV = llvm::ConstantInt::get(IndexTy, PaddedSize);
+  llvm::Value *PaddedSizeV =
+    llvm::ConstantInt::get(IndexTy, PaddedSize.getQuantity());
 
   if (IsVector) {
     // Work out the address of a vector argument on the stack.
     // Vector arguments are always passed in the high bits of a
     // single (8 byte) or double (16 byte) stack slot.
-    llvm::Value *OverflowArgAreaPtr =
-      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 2,
+    Address OverflowArgAreaPtr =
+      CGF.Builder.CreateStructGEP(VAListAddr, 2, CharUnits::fromQuantity(16),
                                   "overflow_arg_area_ptr");
-    llvm::Value *OverflowArgArea =
-      CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area");
-    llvm::Value *MemAddr =
-      CGF.Builder.CreateBitCast(OverflowArgArea, APTy, "mem_addr");
+    Address OverflowArgArea =
+      Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
+              TyInfo.second);
+    Address MemAddr =
+      CGF.Builder.CreateElementBitCast(OverflowArgArea, DirectTy, "mem_addr");
 
     // Update overflow_arg_area_ptr pointer
     llvm::Value *NewOverflowArgArea =
-      CGF.Builder.CreateGEP(OverflowArgArea, PaddedSizeV, "overflow_arg_area");
+      CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV,
+                            "overflow_arg_area");
     CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
 
     return MemAddr;
   }
 
-  unsigned MaxRegs, RegCountField, RegSaveIndex, RegPadding;
+  assert(PaddedSize.getQuantity() == 8);
+
+  unsigned MaxRegs, RegCountField, RegSaveIndex;
+  CharUnits RegPadding;
   if (InFPRs) {
     MaxRegs = 4; // Maximum of 4 FPR arguments
     RegCountField = 1; // __fpr
     RegSaveIndex = 16; // save offset for f0
-    RegPadding = 0; // floats are passed in the high bits of an FPR
+    RegPadding = CharUnits(); // floats are passed in the high bits of an FPR
   } else {
     MaxRegs = 5; // Maximum of 5 GPR arguments
     RegCountField = 0; // __gpr
@@ -5407,8 +5733,9 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
     RegPadding = Padding; // values are passed in the low bits of a GPR
   }
 
-  llvm::Value *RegCountPtr = CGF.Builder.CreateStructGEP(
-      nullptr, VAListAddr, RegCountField, "reg_count_ptr");
+  Address RegCountPtr = CGF.Builder.CreateStructGEP(
+      VAListAddr, RegCountField, RegCountField * CharUnits::fromQuantity(8),
+      "reg_count_ptr");
   llvm::Value *RegCount = CGF.Builder.CreateLoad(RegCountPtr, "reg_count");
   llvm::Value *MaxRegsV = llvm::ConstantInt::get(IndexTy, MaxRegs);
   llvm::Value *InRegs = CGF.Builder.CreateICmpULT(RegCount, MaxRegsV,
@@ -5426,17 +5753,20 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   llvm::Value *ScaledRegCount =
     CGF.Builder.CreateMul(RegCount, PaddedSizeV, "scaled_reg_count");
   llvm::Value *RegBase =
-    llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize + RegPadding);
+    llvm::ConstantInt::get(IndexTy, RegSaveIndex * PaddedSize.getQuantity()
+                                      + RegPadding.getQuantity());
   llvm::Value *RegOffset =
     CGF.Builder.CreateAdd(ScaledRegCount, RegBase, "reg_offset");
-  llvm::Value *RegSaveAreaPtr =
-      CGF.Builder.CreateStructGEP(nullptr, VAListAddr, 3, "reg_save_area_ptr");
+  Address RegSaveAreaPtr =
+      CGF.Builder.CreateStructGEP(VAListAddr, 3, CharUnits::fromQuantity(24),
+                                  "reg_save_area_ptr");
   llvm::Value *RegSaveArea =
     CGF.Builder.CreateLoad(RegSaveAreaPtr, "reg_save_area");
-  llvm::Value *RawRegAddr =
-    CGF.Builder.CreateGEP(RegSaveArea, RegOffset, "raw_reg_addr");
-  llvm::Value *RegAddr =
-    CGF.Builder.CreateBitCast(RawRegAddr, APTy, "reg_addr");
+  Address RawRegAddr(CGF.Builder.CreateGEP(RegSaveArea, RegOffset,
+                                           "raw_reg_addr"),
+                     PaddedSize);
+  Address RegAddr =
+    CGF.Builder.CreateElementBitCast(RawRegAddr, DirectTy, "reg_addr");
 
   // Update the register count
   llvm::Value *One = llvm::ConstantInt::get(IndexTy, 1);
@@ -5449,30 +5779,31 @@ llvm::Value *SystemZABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
   CGF.EmitBlock(InMemBlock);
 
   // Work out the address of a stack argument.
-  llvm::Value *OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(
-      nullptr, VAListAddr, 2, "overflow_arg_area_ptr");
-  llvm::Value *OverflowArgArea =
-    CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area");
-  llvm::Value *PaddingV = llvm::ConstantInt::get(IndexTy, Padding);
-  llvm::Value *RawMemAddr =
-    CGF.Builder.CreateGEP(OverflowArgArea, PaddingV, "raw_mem_addr");
-  llvm::Value *MemAddr =
-    CGF.Builder.CreateBitCast(RawMemAddr, APTy, "mem_addr");
+  Address OverflowArgAreaPtr = CGF.Builder.CreateStructGEP(
+      VAListAddr, 2, CharUnits::fromQuantity(16), "overflow_arg_area_ptr");
+  Address OverflowArgArea =
+    Address(CGF.Builder.CreateLoad(OverflowArgAreaPtr, "overflow_arg_area"),
+            PaddedSize);
+  Address RawMemAddr =
+    CGF.Builder.CreateConstByteGEP(OverflowArgArea, Padding, "raw_mem_addr");
+  Address MemAddr =
+    CGF.Builder.CreateElementBitCast(RawMemAddr, DirectTy, "mem_addr");
 
   // Update overflow_arg_area_ptr pointer
   llvm::Value *NewOverflowArgArea =
-    CGF.Builder.CreateGEP(OverflowArgArea, PaddedSizeV, "overflow_arg_area");
+    CGF.Builder.CreateGEP(OverflowArgArea.getPointer(), PaddedSizeV,
+                          "overflow_arg_area");
   CGF.Builder.CreateStore(NewOverflowArgArea, OverflowArgAreaPtr);
   CGF.EmitBranch(ContBlock);
 
   // Return the appropriate result.
   CGF.EmitBlock(ContBlock);
-  llvm::PHINode *ResAddr = CGF.Builder.CreatePHI(APTy, 2, "va_arg.addr");
-  ResAddr->addIncoming(RegAddr, InRegBlock);
-  ResAddr->addIncoming(MemAddr, InMemBlock);
+  Address ResAddr = emitMergePHI(CGF, RegAddr, InRegBlock,
+                                 MemAddr, InMemBlock, "va_arg.addr");
 
   if (IsIndirect)
-    return CGF.Builder.CreateLoad(ResAddr, "indirect_arg");
+    ResAddr = Address(CGF.Builder.CreateLoad(ResAddr, "indirect_arg"),
+                      TyInfo.second);
 
   return ResAddr;
 }
@@ -5483,7 +5814,7 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
   if (isVectorArgumentType(RetTy))
     return ABIArgInfo::getDirect();
   if (isCompoundType(RetTy) || getContext().getTypeSize(RetTy) > 64)
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
   return (isPromotableIntegerType(RetTy) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
 }
@@ -5491,7 +5822,7 @@ ABIArgInfo SystemZABIInfo::classifyReturnType(QualType RetTy) const {
 ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
   // Handle the generic C++ ABI.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // Integers and enums are extended to full register width.
   if (isPromotableIntegerType(Ty))
@@ -5508,7 +5839,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
 
   // Values that are not 1, 2, 4 or 8 bytes in size are passed indirectly.
   if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   // Handle small structures.
   if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -5516,7 +5847,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
     // fail the size test above.
     const RecordDecl *RD = RT->getDecl();
     if (RD->hasFlexibleArrayMember())
-      return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+      return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
     // The structure is passed as an unextended integer, a float, or a double.
     llvm::Type *PassTy;
@@ -5533,7 +5864,7 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
 
   // Non-structure compounds are passed indirectly.
   if (isCompoundType(Ty))
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   return ABIArgInfo::getDirect(nullptr);
 }
@@ -5557,7 +5888,7 @@ public:
 void MSP430TargetCodeGenInfo::setTargetAttributes(const Decl *D,
                                                   llvm::GlobalValue *GV,
                                              CodeGen::CodeGenModule &M) const {
-  if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(D)) {
+  if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) {
     if (const MSP430InterruptAttr *attr = FD->getAttr<MSP430InterruptAttr>()) {
       // Handle 'interrupt' attribute:
       llvm::Function *F = cast<llvm::Function>(GV);
@@ -5598,8 +5929,8 @@ public:
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy, uint64_t &Offset) const;
   void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
   bool shouldSignExtUnsignedType(QualType Ty) const override;
 };
 
@@ -5616,7 +5947,7 @@ public:
 
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &CGM) const override {
-    const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+    const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
     if (!FD) return;
     llvm::Function *Fn = cast<llvm::Function>(GV);
     if (FD->hasAttr<Mips16Attr>()) {
@@ -5625,6 +5956,26 @@ public:
     else if (FD->hasAttr<NoMips16Attr>()) {
       Fn->addFnAttr("nomips16");
     }
+
+    const MipsInterruptAttr *Attr = FD->getAttr<MipsInterruptAttr>();
+    if (!Attr)
+      return;
+
+    const char *Kind;
+    switch (Attr->getInterrupt()) {
+    case MipsInterruptAttr::eic:     Kind = "eic"; break;
+    case MipsInterruptAttr::sw0:     Kind = "sw0"; break;
+    case MipsInterruptAttr::sw1:     Kind = "sw1"; break;
+    case MipsInterruptAttr::hw0:     Kind = "hw0"; break;
+    case MipsInterruptAttr::hw1:     Kind = "hw1"; break;
+    case MipsInterruptAttr::hw2:     Kind = "hw2"; break;
+    case MipsInterruptAttr::hw3:     Kind = "hw3"; break;
+    case MipsInterruptAttr::hw4:     Kind = "hw4"; break;
+    case MipsInterruptAttr::hw5:     Kind = "hw5"; break;
+    }
+
+    Fn->addFnAttr("interrupt", Kind);
+
   }
 
   bool initDwarfEHRegSizeTable(CodeGen::CodeGenFunction &CGF,
@@ -5738,7 +6089,7 @@ MipsABIInfo::classifyArgumentType(QualType Ty, uint64_t &Offset) const {
 
     if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
       Offset = OrigOffset + MinABIStackAlignInBytes;
-      return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+      return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
     }
 
     // If we have reached here, aggregates are passed directly by coercing to
@@ -5832,7 +6183,7 @@ ABIArgInfo MipsABIInfo::classifyReturnType(QualType RetTy) const {
       }
     }
 
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
   }
 
   // Treat an enum type as its underlying type.
@@ -5855,52 +6206,55 @@ void MipsABIInfo::computeInfo(CGFunctionInfo &FI) const {
     I.info = classifyArgumentType(I.type, Offset);
 }
 
-llvm::Value* MipsABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                    CodeGenFunction &CGF) const {
-  llvm::Type *BP = CGF.Int8PtrTy;
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
+Address MipsABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                               QualType OrigTy) const {
+  QualType Ty = OrigTy;
 
   // Integer arguments are promoted to 32-bit on O32 and 64-bit on N32/N64.
   // Pointers are also promoted in the same way but this only matters for N32.
   unsigned SlotSizeInBits = IsO32 ? 32 : 64;
   unsigned PtrWidth = getTarget().getPointerWidth(0);
+  bool DidPromote = false;
   if ((Ty->isIntegerType() &&
-          CGF.getContext().getIntWidth(Ty) < SlotSizeInBits) ||
+          getContext().getIntWidth(Ty) < SlotSizeInBits) ||
       (Ty->isPointerType() && PtrWidth < SlotSizeInBits)) {
-    Ty = CGF.getContext().getIntTypeForBitwidth(SlotSizeInBits,
-                                                Ty->isSignedIntegerType());
+    DidPromote = true;
+    Ty = getContext().getIntTypeForBitwidth(SlotSizeInBits,
+                                            Ty->isSignedIntegerType());
   }
 
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  int64_t TypeAlign =
-      std::min(getContext().getTypeAlign(Ty) / 8, StackAlignInBytes);
-  llvm::Type *PTy = llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped;
-  llvm::IntegerType *IntTy = (PtrWidth == 32) ? CGF.Int32Ty : CGF.Int64Ty;
+  auto TyInfo = getContext().getTypeInfoInChars(Ty);
 
-  if (TypeAlign > MinABIStackAlignInBytes) {
-    llvm::Value *AddrAsInt = CGF.Builder.CreatePtrToInt(Addr, IntTy);
-    llvm::Value *Inc = llvm::ConstantInt::get(IntTy, TypeAlign - 1);
-    llvm::Value *Mask = llvm::ConstantInt::get(IntTy, -TypeAlign);
-    llvm::Value *Add = CGF.Builder.CreateAdd(AddrAsInt, Inc);
-    llvm::Value *And = CGF.Builder.CreateAnd(Add, Mask);
-    AddrTyped = CGF.Builder.CreateIntToPtr(And, PTy);
+  // The alignment of things in the argument area is never larger than
+  // StackAlignInBytes.
+  TyInfo.second =
+    std::min(TyInfo.second, CharUnits::fromQuantity(StackAlignInBytes));
+
+  // MinABIStackAlignInBytes is the size of argument slots on the stack.
+  CharUnits ArgSlotSize = CharUnits::fromQuantity(MinABIStackAlignInBytes);
+
+  Address Addr = emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+                          TyInfo, ArgSlotSize, /*AllowHigherAlign*/ true);
+
+
+  // If there was a promotion, "unpromote" into a temporary.
+  // TODO: can we just use a pointer into a subset of the original slot?
+  if (DidPromote) {
+    Address Temp = CGF.CreateMemTemp(OrigTy, "vaarg.promotion-temp");
+    llvm::Value *Promoted = CGF.Builder.CreateLoad(Addr);
+
+    // Truncate down to the right width.
+    llvm::Type *IntTy = (OrigTy->isIntegerType() ? Temp.getElementType()
+                                                 : CGF.IntPtrTy);
+    llvm::Value *V = CGF.Builder.CreateTrunc(Promoted, IntTy);
+    if (OrigTy->isPointerType())
+      V = CGF.Builder.CreateIntToPtr(V, Temp.getElementType());
+
+    CGF.Builder.CreateStore(V, Temp);
+    Addr = Temp;
   }
-  else
-    AddrTyped = Builder.CreateBitCast(Addr, PTy);
 
-  llvm::Value *AlignedAddr = Builder.CreateBitCast(AddrTyped, BP);
-  TypeAlign = std::max((unsigned)TypeAlign, MinABIStackAlignInBytes);
-  unsigned ArgSizeInBits = CGF.getContext().getTypeSize(Ty);
-  uint64_t Offset = llvm::RoundUpToAlignment(ArgSizeInBits / 8, TypeAlign);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(AlignedAddr, llvm::ConstantInt::get(IntTy, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  return AddrTyped;
+  return Addr;
 }
 
 bool MipsABIInfo::shouldSignExtUnsignedType(QualType Ty) const {
@@ -5960,7 +6314,7 @@ public:
 
 void TCETargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD) return;
 
   llvm::Function *F = cast<llvm::Function>(GV);
@@ -6022,8 +6376,8 @@ private:
 
   void computeInfo(CGFunctionInfo &FI) const override;
 
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class HexagonTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6060,11 +6414,11 @@ ABIArgInfo HexagonABIInfo::classifyArgumentType(QualType Ty) const {
     return ABIArgInfo::getIgnore();
 
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size > 64)
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/true);
     // Pass in the smallest viable integer type.
   else if (Size > 32)
       return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
@@ -6082,7 +6436,7 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
 
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 64)
-    return ABIArgInfo::getIndirect(0);
+    return getNaturalAlignIndirect(RetTy);
 
   if (!isAggregateTypeForABI(RetTy)) {
     // Treat an enum type as its underlying type.
@@ -6110,30 +6464,16 @@ ABIArgInfo HexagonABIInfo::classifyReturnType(QualType RetTy) const {
     return ABIArgInfo::getDirect(llvm::Type::getInt64Ty(getVMContext()));
   }
 
-  return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
+  return getNaturalAlignIndirect(RetTy, /*ByVal=*/true);
 }
 
-llvm::Value *HexagonABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
-  // FIXME: Need to handle alignment
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
-
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP,
-                                                       "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  llvm::Type *PTy =
-    llvm::PointerType::getUnqual(CGF.ConvertType(Ty));
-  llvm::Value *AddrTyped = Builder.CreateBitCast(Addr, PTy);
-
-  uint64_t Offset =
-    llvm::RoundUpToAlignment(CGF.getContext().getTypeSize(Ty) / 8, 4);
-  llvm::Value *NextAddr =
-    Builder.CreateGEP(Addr, llvm::ConstantInt::get(CGF.Int32Ty, Offset),
-                      "ap.next");
-  Builder.CreateStore(NextAddr, VAListAddrAsBPP);
-
-  return AddrTyped;
+Address HexagonABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                  QualType Ty) const {
+  // FIXME: Someone needs to audit that this handle alignment correctly.
+  return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*indirect*/ false,
+                          getContext().getTypeInfoInChars(Ty),
+                          CharUnits::fromQuantity(4),
+                          /*AllowHigherAlign*/ true);
 }
 
 //===----------------------------------------------------------------------===//
@@ -6156,7 +6496,7 @@ void AMDGPUTargetCodeGenInfo::setTargetAttributes(
   const Decl *D,
   llvm::GlobalValue *GV,
   CodeGen::CodeGenModule &M) const {
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
   if (!FD)
     return;
 
@@ -6210,8 +6550,8 @@ public:
 private:
   ABIArgInfo classifyType(QualType RetTy, unsigned SizeLimit) const;
   void computeInfo(CGFunctionInfo &FI) const override;
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 
   // Coercion type builder for structs passed in registers. The coercion type
   // serves two purposes:
@@ -6331,7 +6671,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
   // Anything too big to fit in registers is passed with an explicit indirect
   // pointer / sret pointer.
   if (Size > SizeLimit)
-    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 
   // Treat an enum type as its underlying type.
   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -6348,7 +6688,7 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
   // If a C++ object has either a non-trivial copy constructor or a non-trivial
   // destructor, it is passed with an explicit indirect pointer / sret pointer.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
-    return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
+    return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory);
 
   // This is a small aggregate type that should be passed in registers.
   // Build a coercion type from the LLVM struct type.
@@ -6369,55 +6709,59 @@ SparcV9ABIInfo::classifyType(QualType Ty, unsigned SizeLimit) const {
     return ABIArgInfo::getDirect(CoerceTy);
 }
 
-llvm::Value *SparcV9ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+Address SparcV9ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                  QualType Ty) const {
   ABIArgInfo AI = classifyType(Ty, 16 * 8);
   llvm::Type *ArgTy = CGT.ConvertType(Ty);
   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
     AI.setCoerceToType(ArgTy);
 
-  llvm::Type *BPP = CGF.Int8PtrPtrTy;
-  CGBuilderTy &Builder = CGF.Builder;
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
-  llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
-  llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
-  llvm::Value *ArgAddr;
-  unsigned Stride;
+  CharUnits SlotSize = CharUnits::fromQuantity(8);
 
+  CGBuilderTy &Builder = CGF.Builder;
+  Address Addr(Builder.CreateLoad(VAListAddr, "ap.cur"), SlotSize);
+  llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
+
+  auto TypeInfo = getContext().getTypeInfoInChars(Ty);
+
+  Address ArgAddr = Address::invalid();
+  CharUnits Stride;
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
 
-  case ABIArgInfo::Extend:
-    Stride = 8;
-    ArgAddr = Builder
-      .CreateConstGEP1_32(Addr, 8 - getDataLayout().getTypeAllocSize(ArgTy),
-                          "extend");
+  case ABIArgInfo::Extend: {
+    Stride = SlotSize;
+    CharUnits Offset = SlotSize - TypeInfo.first;
+    ArgAddr = Builder.CreateConstInBoundsByteGEP(Addr, Offset, "extend");
     break;
+  }
 
-  case ABIArgInfo::Direct:
-    Stride = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
+  case ABIArgInfo::Direct: {
+    auto AllocSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
+    Stride = CharUnits::fromQuantity(AllocSize).RoundUpToAlignment(SlotSize);
     ArgAddr = Addr;
     break;
+  }
 
   case ABIArgInfo::Indirect:
-    Stride = 8;
-    ArgAddr = Builder.CreateBitCast(Addr,
-                                    llvm::PointerType::getUnqual(ArgPtrTy),
-                                    "indirect");
-    ArgAddr = Builder.CreateLoad(ArgAddr, "indirect.arg");
+    Stride = SlotSize;
+    ArgAddr = Builder.CreateElementBitCast(Addr, ArgPtrTy, "indirect");
+    ArgAddr = Address(Builder.CreateLoad(ArgAddr, "indirect.arg"),
+                      TypeInfo.second);
     break;
 
   case ABIArgInfo::Ignore:
-    return llvm::UndefValue::get(ArgPtrTy);
+    return Address(llvm::UndefValue::get(ArgPtrTy), TypeInfo.second);
   }
 
   // Update VAList.
-  Addr = Builder.CreateConstGEP1_32(Addr, Stride, "ap.next");
-  Builder.CreateStore(Addr, VAListAddrAsBPP);
+  llvm::Value *NextPtr =
+    Builder.CreateConstInBoundsByteGEP(Addr.getPointer(), Stride, "ap.next");
+  Builder.CreateStore(NextPtr, VAListAddr);
 
-  return Builder.CreatePointerCast(ArgAddr, ArgPtrTy, "arg.addr");
+  return Builder.CreateBitCast(ArgAddr, ArgPtrTy, "arg.addr");
 }
 
 void SparcV9ABIInfo::computeInfo(CGFunctionInfo &FI) const {
@@ -6552,7 +6896,7 @@ class TypeStringCache {
   unsigned IncompleteCount;     // Number of Incomplete entries in the Map.
   unsigned IncompleteUsedCount; // Number of IncompleteUsed entries in the Map.
 public:
-  TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {};
+  TypeStringCache() : IncompleteCount(0), IncompleteUsedCount(0) {}
   void addIncomplete(const IdentifierInfo *ID, std::string StubEnc);
   bool removeIncomplete(const IdentifierInfo *ID);
   void addIfComplete(const IdentifierInfo *ID, StringRef Str,
@@ -6566,8 +6910,8 @@ class FieldEncoding {
   bool HasName;
   std::string Enc;
 public:
-  FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {};
-  StringRef str() {return Enc.c_str();};
+  FieldEncoding(bool b, SmallStringEnc &e) : HasName(b), Enc(e.c_str()) {}
+  StringRef str() {return Enc.c_str();}
   bool operator<(const FieldEncoding &rhs) const {
     if (HasName != rhs.HasName) return HasName;
     return Enc < rhs.Enc;
@@ -6577,8 +6921,8 @@ public:
 class XCoreABIInfo : public DefaultABIInfo {
 public:
   XCoreABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
-  llvm::Value *EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const override;
+  Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                    QualType Ty) const override;
 };
 
 class XCoreTargetCodeGenInfo : public TargetCodeGenInfo {
@@ -6592,52 +6936,53 @@ public:
 
 } // End anonymous namespace.
 
-llvm::Value *XCoreABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                     CodeGenFunction &CGF) const {
+Address XCoreABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
+                                QualType Ty) const {
   CGBuilderTy &Builder = CGF.Builder;
 
   // Get the VAList.
-  llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr,
-                                                       CGF.Int8PtrPtrTy);
-  llvm::Value *AP = Builder.CreateLoad(VAListAddrAsBPP);
+  CharUnits SlotSize = CharUnits::fromQuantity(4);
+  Address AP(Builder.CreateLoad(VAListAddr), SlotSize);
 
   // Handle the argument.
   ABIArgInfo AI = classifyArgumentType(Ty);
+  CharUnits TypeAlign = getContext().getTypeAlignInChars(Ty);
   llvm::Type *ArgTy = CGT.ConvertType(Ty);
   if (AI.canHaveCoerceToType() && !AI.getCoerceToType())
     AI.setCoerceToType(ArgTy);
   llvm::Type *ArgPtrTy = llvm::PointerType::getUnqual(ArgTy);
-  llvm::Value *Val;
-  uint64_t ArgSize = 0;
+
+  Address Val = Address::invalid();
+  CharUnits ArgSize = CharUnits::Zero();
   switch (AI.getKind()) {
   case ABIArgInfo::Expand:
   case ABIArgInfo::InAlloca:
     llvm_unreachable("Unsupported ABI kind for va_arg");
   case ABIArgInfo::Ignore:
-    Val = llvm::UndefValue::get(ArgPtrTy);
-    ArgSize = 0;
+    Val = Address(llvm::UndefValue::get(ArgPtrTy), TypeAlign);
+    ArgSize = CharUnits::Zero();
     break;
   case ABIArgInfo::Extend:
   case ABIArgInfo::Direct:
-    Val = Builder.CreatePointerCast(AP, ArgPtrTy);
-    ArgSize = getDataLayout().getTypeAllocSize(AI.getCoerceToType());
-    if (ArgSize < 4)
-      ArgSize = 4;
+    Val = Builder.CreateBitCast(AP, ArgPtrTy);
+    ArgSize = CharUnits::fromQuantity(
+                       getDataLayout().getTypeAllocSize(AI.getCoerceToType()));
+    ArgSize = ArgSize.RoundUpToAlignment(SlotSize);
     break;
   case ABIArgInfo::Indirect:
-    llvm::Value *ArgAddr;
-    ArgAddr = Builder.CreateBitCast(AP, llvm::PointerType::getUnqual(ArgPtrTy));
-    ArgAddr = Builder.CreateLoad(ArgAddr);
-    Val = Builder.CreatePointerCast(ArgAddr, ArgPtrTy);
-    ArgSize = 4;
+    Val = Builder.CreateElementBitCast(AP, ArgPtrTy);
+    Val = Address(Builder.CreateLoad(Val), TypeAlign);
+    ArgSize = SlotSize;
     break;
   }
 
   // Increment the VAList.
-  if (ArgSize) {
-    llvm::Value *APN = Builder.CreateConstGEP1_32(AP, ArgSize);
-    Builder.CreateStore(APN, VAListAddrAsBPP);
+  if (!ArgSize.isZero()) {
+    llvm::Value *APN =
+      Builder.CreateConstInBoundsByteGEP(AP.getPointer(), ArgSize);
+    Builder.CreateStore(APN, VAListAddr);
   }
+
   return Val;
 }
 
@@ -6781,9 +7126,7 @@ static bool extractFieldType(SmallVectorImpl<FieldEncoding> &FE,
     if (Field->isBitField()) {
       Enc += "b(";
       llvm::raw_svector_ostream OS(Enc);
-      OS.resync();
       OS << Field->getBitWidthValue(CGM.getContext());
-      OS.flush();
       Enc += ':';
     }
     if (!appendType(Enc, Field->getType(), CGM, TSC))
@@ -6897,7 +7240,7 @@ static bool appendEnumType(SmallStringEnc &Enc, const EnumType *ET,
 /// This is done prior to appending the type's encoding.
 static void appendQualifier(SmallStringEnc &Enc, QualType QT) {
   // Qualifiers are emitted in alphabetical order.
-  static const char *Table[] = {"","c:","r:","cr:","v:","cv:","rv:","crv:"};
+  static const char *const Table[]={"","c:","r:","cr:","v:","cv:","rv:","crv:"};
   int Lookup = 0;
   if (QT.isConstQualified())
     Lookup += 1<<0;
@@ -7138,6 +7481,10 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     return *(TheTargetCodeGenInfo = new AArch64TargetCodeGenInfo(Types, Kind));
   }
 
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    return *(TheTargetCodeGenInfo = new WebAssemblyTargetCodeGenInfo(Types));
+
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
@@ -7150,8 +7497,11 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
       }
 
       ARMABIInfo::ABIKind Kind = ARMABIInfo::AAPCS;
-      if (getTarget().getABI() == "apcs-gnu")
+      StringRef ABIStr = getTarget().getABI();
+      if (ABIStr == "apcs-gnu")
         Kind = ARMABIInfo::APCS;
+      else if (ABIStr == "aapcs16")
+        Kind = ARMABIInfo::AAPCS16_VFP;
       else if (CodeGenOpts.FloatABI == "hard" ||
                (CodeGenOpts.FloatABI != "soft" &&
                 Triple.getEnvironment() == llvm::Triple::GNUEABIHF))
@@ -7161,7 +7511,8 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
     }
 
   case llvm::Triple::ppc:
-    return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types));
+    return *(TheTargetCodeGenInfo = 
+             new PPC32TargetCodeGenInfo(Types, CodeGenOpts.FloatABI == "soft"));
   case llvm::Triple::ppc64:
     if (Triple.isOSBinFormatELF()) {
       PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
@@ -7202,18 +7553,19 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() {
 
   case llvm::Triple::x86: {
     bool IsDarwinVectorABI = Triple.isOSDarwin();
-    bool IsSmallStructInRegABI =
+    bool RetSmallStructInRegABI =
         X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts);
     bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing();
 
     if (Triple.getOS() == llvm::Triple::Win32) {
       return *(TheTargetCodeGenInfo = new WinX86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, IsSmallStructInRegABI,
+                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
                    IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
     } else {
       return *(TheTargetCodeGenInfo = new X86_32TargetCodeGenInfo(
-                   Types, IsDarwinVectorABI, IsSmallStructInRegABI,
-                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters));
+                   Types, IsDarwinVectorABI, RetSmallStructInRegABI,
+                   IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters,
+                   CodeGenOpts.FloatABI == "soft"));
     }
   }
 
diff --git a/lib/CodeGen/TargetInfo.h b/lib/CodeGen/TargetInfo.h
index 95275d5d42ee..87b470498623 100644
--- a/lib/CodeGen/TargetInfo.h
+++ b/lib/CodeGen/TargetInfo.h
@@ -47,7 +47,7 @@ class TargetCodeGenInfo {
 
 public:
   // WARNING: Acquires the ownership of ABIInfo.
-  TargetCodeGenInfo(ABIInfo *info = 0) : Info(info) {}
+  TargetCodeGenInfo(ABIInfo *info = nullptr) : Info(info) {}
   virtual ~TargetCodeGenInfo();
 
   /// getABIInfo() - Returns ABI info helper for the target.
@@ -219,6 +219,6 @@ public:
                                        llvm::StringRef Value,
                                        llvm::SmallString<32> &Opt) const {}
 };
-}
+} // namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_CODEGEN_TARGETINFO_H
diff --git a/lib/Driver/Action.cpp b/lib/Driver/Action.cpp
index 3219dc1cc0e9..49dccd224bff 100644
--- a/lib/Driver/Action.cpp
+++ b/lib/Driver/Action.cpp
@@ -65,13 +65,12 @@ CudaDeviceAction::CudaDeviceAction(std::unique_ptr<Action> Input,
 void CudaHostAction::anchor() {}
 
 CudaHostAction::CudaHostAction(std::unique_ptr<Action> Input,
-                               const ActionList &_DeviceActions)
-    : Action(CudaHostClass, std::move(Input)), DeviceActions(_DeviceActions) {}
+                               const ActionList &DeviceActions)
+    : Action(CudaHostClass, std::move(Input)), DeviceActions(DeviceActions) {}
 
 CudaHostAction::~CudaHostAction() {
-  for (iterator it = DeviceActions.begin(), ie = DeviceActions.end(); it != ie;
-       ++it)
-    delete *it;
+  for (auto &DA : DeviceActions)
+    delete DA;
 }
 
 void JobAction::anchor() {}
@@ -153,13 +152,6 @@ VerifyJobAction::VerifyJobAction(ActionClass Kind,
          "ActionClass is not a valid VerifyJobAction");
 }
 
-VerifyJobAction::VerifyJobAction(ActionClass Kind, ActionList &Inputs,
-                                 types::ID Type)
-    : JobAction(Kind, Inputs, Type) {
-  assert((Kind == VerifyDebugInfoJobClass || Kind == VerifyPCHJobClass) &&
-           "ActionClass is not a valid VerifyJobAction");
-}
-
 void VerifyDebugInfoJobAction::anchor() {}
 
 VerifyDebugInfoJobAction::VerifyDebugInfoJobAction(
diff --git a/lib/Driver/Compilation.cpp b/lib/Driver/Compilation.cpp
index 101d1fcc832a..e4af2a6ced8a 100644
--- a/lib/Driver/Compilation.cpp
+++ b/lib/Driver/Compilation.cpp
@@ -24,8 +24,9 @@ using namespace llvm::opt;
 
 Compilation::Compilation(const Driver &D, const ToolChain &_DefaultToolChain,
                          InputArgList *_Args, DerivedArgList *_TranslatedArgs)
-    : TheDriver(D), DefaultToolChain(_DefaultToolChain), Args(_Args),
-      TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
+    : TheDriver(D), DefaultToolChain(_DefaultToolChain),
+      CudaHostToolChain(&DefaultToolChain), CudaDeviceToolChain(nullptr),
+      Args(_Args), TranslatedArgs(_TranslatedArgs), Redirects(nullptr),
       ForDiagnostics(false) {}
 
 Compilation::~Compilation() {
diff --git a/lib/Driver/CrossWindowsToolChain.cpp b/lib/Driver/CrossWindowsToolChain.cpp
index ffb1469df21d..57bf89635987 100644
--- a/lib/Driver/CrossWindowsToolChain.cpp
+++ b/lib/Driver/CrossWindowsToolChain.cpp
@@ -107,6 +107,12 @@ AddCXXStdlibLibArgs(const llvm::opt::ArgList &DriverArgs,
   }
 }
 
+clang::SanitizerMask CrossWindowsToolChain::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::Address;
+  return Res;
+}
+
 Tool *CrossWindowsToolChain::buildLinker() const {
   return new tools::CrossWindows::Linker(*this);
 }
diff --git a/lib/Driver/Driver.cpp b/lib/Driver/Driver.cpp
index 180c412bd791..4f8481c0beec 100644
--- a/lib/Driver/Driver.cpp
+++ b/lib/Driver/Driver.cpp
@@ -11,6 +11,7 @@
 #include "InputInfo.h"
 #include "ToolChains.h"
 #include "clang/Basic/Version.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h"
 #include "clang/Driver/Action.h"
 #include "clang/Driver/Compilation.h"
@@ -46,9 +47,11 @@ using namespace clang;
 using namespace llvm::opt;
 
 Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
-               DiagnosticsEngine &Diags)
-    : Opts(createDriverOptTable()), Diags(Diags), Mode(GCCMode),
-      SaveTemps(SaveTempsNone), ClangExecutable(ClangExecutable),
+               DiagnosticsEngine &Diags,
+               IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+    : Opts(createDriverOptTable()), Diags(Diags), VFS(VFS), Mode(GCCMode),
+      SaveTemps(SaveTempsNone), LTOMode(LTOK_None),
+      ClangExecutable(ClangExecutable),
       SysRoot(DEFAULT_SYSROOT), UseStdLib(true),
       DefaultTargetTriple(DefaultTargetTriple),
       DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr),
@@ -57,8 +60,13 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple,
       CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true),
       CCCUsePCH(true), SuppressMissingInputWarning(false) {
 
+  // Provide a sane fallback if no VFS is specified.
+  if (!this->VFS)
+    this->VFS = vfs::getRealFileSystem();
+
   Name = llvm::sys::path::filename(ClangExecutable);
   Dir = llvm::sys::path::parent_path(ClangExecutable);
+  InstalledDir = Dir; // Provide a sensible default installed dir.
 
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
@@ -174,10 +182,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
   } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) {
     FinalPhase = phases::Backend;
 
-    // -c and partial CUDA compilations only run up to the assembler.
-  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) {
+    // -c compilation only runs up to the assembler.
+  } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) {
     FinalPhase = phases::Assemble;
 
     // Otherwise do everything.
@@ -203,6 +209,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
   DerivedArgList *DAL = new DerivedArgList(Args);
 
   bool HasNostdlib = Args.hasArg(options::OPT_nostdlib);
+  bool HasNodefaultlib = Args.hasArg(options::OPT_nodefaultlibs);
   for (Arg *A : Args) {
     // Unfortunately, we have to parse some forwarding options (-Xassembler,
     // -Xlinker, -Xpreprocessor) because we either integrate their functionality
@@ -217,7 +224,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
       DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_Xlinker__no_demangle));
 
       // Add the remaining values as Xlinker arguments.
-      for (const StringRef Val : A->getValues())
+      for (StringRef Val : A->getValues())
         if (Val != "--no-demangle")
           DAL->AddSeparateArg(A, Opts->getOption(options::OPT_Xlinker), Val);
 
@@ -246,7 +253,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
       StringRef Value = A->getValue();
 
       // Rewrite unless -nostdlib is present.
-      if (!HasNostdlib && Value == "stdc++") {
+      if (!HasNostdlib && !HasNodefaultlib && Value == "stdc++") {
         DAL->AddFlagArg(A, Opts->getOption(options::OPT_Z_reserved_lib_stdcxx));
         continue;
       }
@@ -261,7 +268,7 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const {
     // Pick up inputs via the -- option.
     if (A->getOption().matches(options::OPT__DASH_DASH)) {
       A->claim();
-      for (const StringRef Val : A->getValues())
+      for (StringRef Val : A->getValues())
         DAL->append(MakeInputArg(*DAL, Opts, Val));
       continue;
     }
@@ -327,7 +334,8 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
   }
 
   // Skip further flag support on OSes which don't support '-m32' or '-m64'.
-  if (Target.getArchName() == "tce" || Target.getOS() == llvm::Triple::Minix)
+  if (Target.getArch() == llvm::Triple::tce ||
+      Target.getOS() == llvm::Triple::Minix)
     return Target;
 
   // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'.
@@ -360,6 +368,32 @@ static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple,
   return Target;
 }
 
+// \brief Parse the LTO options and record the type of LTO compilation
+// based on which -f(no-)?lto(=.*)? option occurs last.
+void Driver::setLTOMode(const llvm::opt::ArgList &Args) {
+  LTOMode = LTOK_None;
+  if (!Args.hasFlag(options::OPT_flto, options::OPT_flto_EQ,
+                    options::OPT_fno_lto, false))
+    return;
+
+  StringRef LTOName("full");
+
+  const Arg *A = Args.getLastArg(options::OPT_flto_EQ);
+  if (A)
+    LTOName = A->getValue();
+
+  LTOMode = llvm::StringSwitch<LTOKind>(LTOName)
+                .Case("full", LTOK_Full)
+                .Case("thin", LTOK_Thin)
+                .Default(LTOK_Unknown);
+
+  if (LTOMode == LTOK_Unknown) {
+    assert(A);
+    Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName()
+                                                    << A->getValue();
+  }
+}
+
 Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   llvm::PrettyStackTraceString CrashInfo("Compilation construction");
 
@@ -387,6 +421,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
 
   InputArgList Args = ParseArgStrings(ArgList.slice(1));
 
+  // Silence driver warnings if requested
+  Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w));
+
   // -no-canonical-prefixes is used very early in main.
   Args.ClaimAllArgs(options::OPT_no_canonical_prefixes);
 
@@ -411,6 +448,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
     // clang-cl targets MSVC-style Win32.
     llvm::Triple T(DefaultTargetTriple);
     T.setOS(llvm::Triple::Win32);
+    T.setVendor(llvm::Triple::PC);
     T.setEnvironment(llvm::Triple::MSVC);
     DefaultTargetTriple = T.str();
   }
@@ -439,6 +477,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
                     .Default(SaveTempsCwd);
   }
 
+  setLTOMode(Args);
+
   std::unique_ptr<llvm::opt::InputArgList> UArgs =
       llvm::make_unique<InputArgList>(std::move(Args));
 
@@ -452,6 +492,10 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // The compilation takes ownership of Args.
   Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs);
 
+  C->setCudaDeviceToolChain(
+      &getToolChain(C->getArgs(), llvm::Triple(TC.getTriple().isArch64Bit()
+                                                   ? "nvptx64-nvidia-cuda"
+                                                   : "nvptx-nvidia-cuda")));
   if (!HandleImmediateArgs(*C))
     return C;
 
@@ -462,10 +506,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // Construct the list of abstract actions to perform for this compilation. On
   // MachO targets this uses the driver-driver and universal actions.
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
-                          C->getActions());
+    BuildUniversalActions(*C, C->getDefaultToolChain(), Inputs);
   else
-    BuildActions(C->getDefaultToolChain(), C->getArgs(), Inputs,
+    BuildActions(*C, C->getDefaultToolChain(), C->getArgs(), Inputs,
                  C->getActions());
 
   if (CCCPrintPhases) {
@@ -578,9 +621,9 @@ void Driver::generateCompilationDiagnostics(Compilation &C,
   // Darwin OSes this uses the driver-driver and builds universal actions.
   const ToolChain &TC = C.getDefaultToolChain();
   if (TC.getTriple().isOSBinFormatMachO())
-    BuildUniversalActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildUniversalActions(C, TC, Inputs);
   else
-    BuildActions(TC, C.getArgs(), Inputs, C.getActions());
+    BuildActions(C, TC, C.getArgs(), Inputs, C.getActions());
 
   BuildJobs(C);
 
@@ -761,6 +804,9 @@ void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const {
   } else
     OS << "Thread model: " << TC.getThreadModel();
   OS << '\n';
+
+  // Print out the install directory.
+  OS << "InstalledDir: " << InstalledDir << '\n';
 }
 
 /// PrintDiagnosticCategories - Implement the --print-diagnostic-categories
@@ -906,7 +952,7 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
     os << '"' << CDA->getGpuArchName() << '"' << ", {"
        << PrintActions1(C, *CDA->begin(), Ids) << "}";
   } else {
-    ActionList *AL;
+    const ActionList *AL;
     if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) {
       os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}"
          << ", gpu binaries ";
@@ -914,12 +960,15 @@ static unsigned PrintActions1(const Compilation &C, Action *A,
     } else
       AL = &A->getInputs();
 
-    const char *Prefix = "{";
-    for (Action *PreRequisite : *AL) {
-      os << Prefix << PrintActions1(C, PreRequisite, Ids);
-      Prefix = ", ";
-    }
-    os << "}";
+    if (AL->size()) {
+      const char *Prefix = "{";
+      for (Action *PreRequisite : *AL) {
+        os << Prefix << PrintActions1(C, PreRequisite, Ids);
+        Prefix = ", ";
+      }
+      os << "}";
+    } else
+      os << "{}";
   }
 
   unsigned Id = Ids.size();
@@ -945,16 +994,17 @@ static bool ContainsCompileOrAssembleAction(const Action *A) {
       isa<AssembleJobAction>(A))
     return true;
 
-  for (Action::const_iterator it = A->begin(), ie = A->end(); it != ie; ++it)
-    if (ContainsCompileOrAssembleAction(*it))
+  for (const Action *Input : *A)
+    if (ContainsCompileOrAssembleAction(Input))
       return true;
 
   return false;
 }
 
-void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
-                                   const InputList &BAInputs,
-                                   ActionList &Actions) const {
+void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC,
+                                   const InputList &BAInputs) const {
+  DerivedArgList &Args = C.getArgs();
+  ActionList &Actions = C.getActions();
   llvm::PrettyStackTraceString CrashInfo("Building universal build actions");
   // Collect the list of architectures. Duplicates are allowed, but should only
   // be handled once (in the order seen).
@@ -983,13 +1033,11 @@ void Driver::BuildUniversalActions(const ToolChain &TC, DerivedArgList &Args,
     Archs.push_back(Args.MakeArgString(TC.getDefaultUniversalArchName()));
 
   ActionList SingleActions;
-  BuildActions(TC, Args, BAInputs, SingleActions);
+  BuildActions(C, TC, Args, BAInputs, SingleActions);
 
   // Add in arch bindings for every top level action, as well as lipo and
   // dsymutil steps if needed.
-  for (unsigned i = 0, e = SingleActions.size(); i != e; ++i) {
-    Action *Act = SingleActions[i];
-
+  for (Action* Act : SingleActions) {
     // Make sure we can lipo this kind of output. If not (and it is an actual
     // output) then we disallow, since we can't create an output file with the
     // right name without overwriting it. We could remove this oddity by just
@@ -1228,18 +1276,23 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
   }
 }
 
-// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input
-// action and then wraps each in CudaDeviceAction paired with appropriate GPU
-// arch name. If we're only building device-side code, each action remains
-// independent. Otherwise we pass device-side actions as inputs to a new
-// CudaHostAction which combines both host and device side actions.
+// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE
+// input action and then wraps each in CudaDeviceAction paired with
+// appropriate GPU arch name. In case of partial (i.e preprocessing
+// only) or device-only compilation, each device action is added to /p
+// Actions and /p Current is released. Otherwise the function creates
+// and returns a new CudaHostAction which wraps /p Current and device
+// side actions.
 static std::unique_ptr<Action>
-buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
-                 const Arg *InputArg, const types::ID InputType,
-                 std::unique_ptr<Action> Current, ActionList &Actions) {
-
-  assert(InputType == types::TY_CUDA &&
-         "CUDA Actions only apply to CUDA inputs.");
+buildCudaActions(Compilation &C, DerivedArgList &Args, const Arg *InputArg,
+                 std::unique_ptr<Action> HostAction, ActionList &Actions) {
+  Arg *PartialCompilationArg = Args.getLastArg(options::OPT_cuda_host_only,
+                                               options::OPT_cuda_device_only);
+  // Host-only compilation case.
+  if (PartialCompilationArg &&
+      PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only))
+    return std::unique_ptr<Action>(
+        new CudaHostAction(std::move(HostAction), {}));
 
   // Collect all cuda_gpu_arch parameters, removing duplicates.
   SmallVector<const char *, 4> GpuArchList;
@@ -1259,20 +1312,22 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
 
   // Replicate inputs for each GPU architecture.
   Driver::InputList CudaDeviceInputs;
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
     CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg));
 
   // Build actions for all device inputs.
+  assert(C.getCudaDeviceToolChain() &&
+         "Missing toolchain for device-side compilation.");
   ActionList CudaDeviceActions;
-  D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions);
+  C.getDriver().BuildActions(C, *C.getCudaDeviceToolChain(), Args,
+                             CudaDeviceInputs, CudaDeviceActions);
   assert(GpuArchList.size() == CudaDeviceActions.size() &&
          "Failed to create actions for all devices");
 
   // Check whether any of device actions stopped before they could generate PTX.
   bool PartialCompilation = false;
-  bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only);
-  for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) {
-    if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) {
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) {
+    if (CudaDeviceActions[I]->getKind() != Action::BackendJobClass) {
       PartialCompilation = true;
       break;
     }
@@ -1280,6 +1335,7 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
 
   // Figure out what to do with device actions -- pass them as inputs to the
   // host action or run each of them independently.
+  bool DeviceOnlyCompilation = PartialCompilationArg != nullptr;
   if (PartialCompilation || DeviceOnlyCompilation) {
     // In case of partial or device-only compilation results of device actions
     // are not consumed by the host action device actions have to be added to
@@ -1288,35 +1344,37 @@ buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args,
     // -o is ambiguous if we have more than one top-level action.
     if (Args.hasArg(options::OPT_o) &&
         (!DeviceOnlyCompilation || GpuArchList.size() > 1)) {
-      D.Diag(clang::diag::err_drv_output_argument_with_multiple_files);
+      C.getDriver().Diag(
+          clang::diag::err_drv_output_argument_with_multiple_files);
       return nullptr;
     }
 
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      Actions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ true));
+    for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+      Actions.push_back(new CudaDeviceAction(
+          std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
+          /* AtTopLevel */ true));
     // Kill host action in case of device-only compilation.
     if (DeviceOnlyCompilation)
-      Current.reset(nullptr);
-    return Current;
-  } else {
-    // Outputs of device actions during complete CUDA compilation get created
-    // with AtTopLevel=false and become inputs for the host action.
-    ActionList DeviceActions;
-    for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i)
-      DeviceActions.push_back(
-          new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]),
-                               GpuArchList[i], /* AtTopLevel */ false));
-    // Return a new host action that incorporates original host action and all
-    // device actions.
-    return std::unique_ptr<Action>(
-        new CudaHostAction(std::move(Current), DeviceActions));
+      HostAction.reset(nullptr);
+    return HostAction;
   }
+
+  // Outputs of device actions during complete CUDA compilation get created
+  // with AtTopLevel=false and become inputs for the host action.
+  ActionList DeviceActions;
+  for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I)
+    DeviceActions.push_back(new CudaDeviceAction(
+        std::unique_ptr<Action>(CudaDeviceActions[I]), GpuArchList[I],
+        /* AtTopLevel */ false));
+  // Return a new host action that incorporates original host action and all
+  // device actions.
+  return std::unique_ptr<Action>(
+      new CudaHostAction(std::move(HostAction), DeviceActions));
 }
 
-void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
-                          const InputList &Inputs, ActionList &Actions) const {
+void Driver::BuildActions(Compilation &C, const ToolChain &TC,
+                          DerivedArgList &Args, const InputList &Inputs,
+                          ActionList &Actions) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation actions");
 
   if (!SuppressMissingInputWarning && Inputs.empty()) {
@@ -1373,9 +1431,9 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
   ActionList LinkerInputs;
 
   llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PL;
-  for (unsigned i = 0, e = Inputs.size(); i != e; ++i) {
-    types::ID InputType = Inputs[i].first;
-    const Arg *InputArg = Inputs[i].second;
+  for (auto &I : Inputs) {
+    types::ID InputType = I.first;
+    const Arg *InputArg = I.second;
 
     PL.clear();
     types::getCompilationPhases(InputType, PL);
@@ -1412,24 +1470,12 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       continue;
     }
 
-    phases::ID CudaInjectionPhase;
-    if (isSaveTempsEnabled()) {
-      // All phases are done independently, inject GPU blobs during compilation
-      // phase as that's where we generate glue code to init them.
-      CudaInjectionPhase = phases::Compile;
-    } else {
-      // Assumes that clang does everything up until linking phase, so we inject
-      // cuda device actions at the last step before linking. Otherwise CUDA
-      // host action forces preprocessor into a separate invocation.
-      if (FinalPhase == phases::Link) {
-        for (auto i = PL.begin(), e = PL.end(); i != e; ++i) {
-          auto next = i + 1;
-          if (next != e && *next == phases::Link)
-            CudaInjectionPhase = *i;
-        }
-      } else
-        CudaInjectionPhase = FinalPhase;
-    }
+    phases::ID CudaInjectionPhase = FinalPhase;
+    for (const auto &Phase : PL)
+      if (Phase <= FinalPhase && Phase == phases::Compile) {
+        CudaInjectionPhase = Phase;
+        break;
+      }
 
     // Build the pipeline for this file.
     std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType));
@@ -1457,10 +1503,9 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
       // Otherwise construct the appropriate action.
       Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current));
 
-      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase &&
-          !Args.hasArg(options::OPT_cuda_host_only)) {
-        Current = buildCudaActions(*this, TC, Args, InputArg, InputType,
-                                   std::move(Current), Actions);
+      if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) {
+        Current =
+            buildCudaActions(C, Args, InputArg, std::move(Current), Actions);
         if (!Current)
           break;
       }
@@ -1487,6 +1532,10 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args,
 
   // Claim ignored clang-cl options.
   Args.ClaimAllArgs(options::OPT_cl_ignored_Group);
+
+  // Claim --cuda-host-only arg which may be passed to non-CUDA
+  // compilations and should not trigger warnings there.
+  Args.ClaimAllArgs(options::OPT_cuda_host_only);
 }
 
 std::unique_ptr<Action>
@@ -1551,7 +1600,7 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
                                                types::TY_LLVM_BC);
   }
   case phases::Backend: {
-    if (IsUsingLTO(Args)) {
+    if (isUsingLTO()) {
       types::ID Output =
           Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC;
       return llvm::make_unique<BackendJobAction>(std::move(Input), Output);
@@ -1572,10 +1621,6 @@ Driver::ConstructPhaseAction(const ToolChain &TC, const ArgList &Args,
   llvm_unreachable("invalid phase in ConstructPhaseAction");
 }
 
-bool Driver::IsUsingLTO(const ArgList &Args) const {
-  return Args.hasFlag(options::OPT_flto, options::OPT_fno_lto, false);
-}
-
 void Driver::BuildJobs(Compilation &C) const {
   llvm::PrettyStackTraceString CrashInfo("Building compilation jobs");
 
@@ -1668,10 +1713,17 @@ void Driver::BuildJobs(Compilation &C) const {
   }
 }
 
-static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
+// Returns a Tool for a given JobAction.  In case the action and its
+// predecessors can be combined, updates Inputs with the inputs of the
+// first combined action. If one of the collapsed actions is a
+// CudaHostAction, updates CollapsedCHA with the pointer to it so the
+// caller can deal with extra handling such action requires.
+static const Tool *selectToolForJob(Compilation &C, bool SaveTemps,
                                     const ToolChain *TC, const JobAction *JA,
-                                    const ActionList *&Inputs) {
+                                    const ActionList *&Inputs,
+                                    const CudaHostAction *&CollapsedCHA) {
   const Tool *ToolForJob = nullptr;
+  CollapsedCHA = nullptr;
 
   // See if we should look for a compiler with an integrated assembler. We match
   // bottom up, so what we are actually looking for is an assembler job with a
@@ -1688,13 +1740,19 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
     // checking the backend tool, check if the tool for the CompileJob
     // has an integrated assembler.
     const ActionList *BackendInputs = &(*Inputs)[0]->getInputs();
-    JobAction *CompileJA = cast<CompileJobAction>(*BackendInputs->begin());
+    // Compile job may be wrapped in CudaHostAction, extract it if
+    // that's the case and update CollapsedCHA if we combine phases.
+    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*BackendInputs->begin());
+    JobAction *CompileJA =
+        cast<CompileJobAction>(CHA ? *CHA->begin() : *BackendInputs->begin());
+    assert(CompileJA && "Backend job is not preceeded by compile job.");
     const Tool *Compiler = TC->SelectTool(*CompileJA);
     if (!Compiler)
       return nullptr;
     if (Compiler->hasIntegratedAssembler()) {
-      Inputs = &(*BackendInputs)[0]->getInputs();
+      Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
+      CollapsedCHA = CHA;
     }
   }
 
@@ -1704,19 +1762,19 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps,
   if (isa<BackendJobAction>(JA)) {
     // Check if the compiler supports emitting LLVM IR.
     assert(Inputs->size() == 1);
-    JobAction *CompileJA;
-    // Extract real host action, if it's a CudaHostAction.
-    if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin()))
-      CompileJA = cast<CompileJobAction>(*CudaHA->begin());
-    else
-      CompileJA = cast<CompileJobAction>(*Inputs->begin());
-
+    // Compile job may be wrapped in CudaHostAction, extract it if
+    // that's the case and update CollapsedCHA if we combine phases.
+    CudaHostAction *CHA = dyn_cast<CudaHostAction>(*Inputs->begin());
+    JobAction *CompileJA =
+        cast<CompileJobAction>(CHA ? *CHA->begin() : *Inputs->begin());
+    assert(CompileJA && "Backend job is not preceeded by compile job.");
     const Tool *Compiler = TC->SelectTool(*CompileJA);
     if (!Compiler)
       return nullptr;
     if (!Compiler->canEmitIR() || !SaveTemps) {
-      Inputs = &(*Inputs)[0]->getInputs();
+      Inputs = &CompileJA->getInputs();
       ToolForJob = Compiler;
+      CollapsedCHA = CHA;
     }
   }
 
@@ -1749,7 +1807,7 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
     InputInfo II;
     // Append outputs of device jobs to the input list.
     for (const Action *DA : CHA->getDeviceActions()) {
-      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+      BuildJobsForAction(C, DA, TC, nullptr, AtTopLevel,
                          /*MultipleArchs*/ false, LinkingOutput, II);
       CudaDeviceInputInfos.push_back(II);
     }
@@ -1789,13 +1847,10 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   }
 
   if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) {
-    // Figure out which NVPTX triple to use for device-side compilation based on
-    // whether host is 64-bit.
-    llvm::Triple DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit()
-                                  ? "nvptx64-nvidia-cuda"
-                                  : "nvptx-nvidia-cuda");
-    BuildJobsForAction(C, *CDA->begin(),
-                       &getToolChain(C.getArgs(), DeviceTriple),
+    // Initial processing of CudaDeviceAction carries host params.
+    // Call BuildJobsForAction() again, now with correct device parameters.
+    assert(CDA->getGpuArchName() && "No GPU name in device action.");
+    BuildJobsForAction(C, *CDA->begin(), C.getCudaDeviceToolChain(),
                        CDA->getGpuArchName(), CDA->isAtTopLevel(),
                        /*MultipleArchs*/ true, LinkingOutput, Result);
     return;
@@ -1804,10 +1859,23 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A,
   const ActionList *Inputs = &A->getInputs();
 
   const JobAction *JA = cast<JobAction>(A);
-  const Tool *T = SelectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs);
+  const CudaHostAction *CollapsedCHA = nullptr;
+  const Tool *T =
+      selectToolForJob(C, isSaveTempsEnabled(), TC, JA, Inputs, CollapsedCHA);
   if (!T)
     return;
 
+  // If we've collapsed action list that contained CudaHostAction we
+  // need to build jobs for device-side inputs it may have held.
+  if (CollapsedCHA) {
+    InputInfo II;
+    for (const Action *DA : CollapsedCHA->getDeviceActions()) {
+      BuildJobsForAction(C, DA, TC, "", AtTopLevel,
+                         /*MultipleArchs*/ false, LinkingOutput, II);
+      CudaDeviceInputInfos.push_back(II);
+    }
+  }
+
   // Only use pipes when there is exactly one input.
   InputInfoList InputInfos;
   for (const Action *Input : *Inputs) {
@@ -2091,6 +2159,11 @@ void Driver::generatePrefixedToolNames(
   // FIXME: Needs a better variable than DefaultTargetTriple
   Names.emplace_back(DefaultTargetTriple + "-" + Tool);
   Names.emplace_back(Tool);
+
+  // Allow the discovery of tools prefixed with LLVM's default target triple.
+  std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple();
+  if (LLVMDefaultTargetTriple != DefaultTargetTriple)
+    Names.emplace_back(LLVMDefaultTargetTriple + "-" + Tool);
 }
 
 static bool ScanDirForExecutable(SmallString<128> &Dir,
@@ -2163,6 +2236,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
     case llvm::Triple::Darwin:
     case llvm::Triple::MacOSX:
     case llvm::Triple::IOS:
+    case llvm::Triple::TvOS:
+    case llvm::Triple::WatchOS:
       TC = new toolchains::DarwinClang(*this, Target, Args);
       break;
     case llvm::Triple::DragonFly:
@@ -2185,16 +2260,22 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
       break;
     case llvm::Triple::Linux:
       if (Target.getArch() == llvm::Triple::hexagon)
-        TC = new toolchains::Hexagon_TC(*this, Target, Args);
+        TC = new toolchains::HexagonToolChain(*this, Target, Args);
+      else if ((Target.getVendor() == llvm::Triple::MipsTechnologies) &&
+               !Target.hasEnvironment())
+        TC = new toolchains::MipsLLVMToolChain(*this, Target, Args);
       else
         TC = new toolchains::Linux(*this, Target, Args);
       break;
     case llvm::Triple::NaCl:
-      TC = new toolchains::NaCl_TC(*this, Target, Args);
+      TC = new toolchains::NaClToolChain(*this, Target, Args);
       break;
     case llvm::Triple::Solaris:
       TC = new toolchains::Solaris(*this, Target, Args);
       break;
+    case llvm::Triple::AMDHSA:
+      TC = new toolchains::AMDGPUToolChain(*this, Target, Args);
+      break;
     case llvm::Triple::Win32:
       switch (Target.getEnvironment()) {
       default:
@@ -2220,24 +2301,36 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
     case llvm::Triple::CUDA:
       TC = new toolchains::CudaToolChain(*this, Target, Args);
       break;
+    case llvm::Triple::PS4:
+      TC = new toolchains::PS4CPU(*this, Target, Args);
+      break;
     default:
       // Of these targets, Hexagon is the only one that might have
       // an OS of Linux, in which case it got handled above already.
-      if (Target.getArchName() == "tce")
+      switch (Target.getArch()) {
+      case llvm::Triple::tce:
         TC = new toolchains::TCEToolChain(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::hexagon)
-        TC = new toolchains::Hexagon_TC(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::xcore)
-        TC = new toolchains::XCore(*this, Target, Args);
-      else if (Target.getArch() == llvm::Triple::shave)
-        TC = new toolchains::SHAVEToolChain(*this, Target, Args);
-      else if (Target.isOSBinFormatELF())
-        TC = new toolchains::Generic_ELF(*this, Target, Args);
-      else if (Target.isOSBinFormatMachO())
-        TC = new toolchains::MachO(*this, Target, Args);
-      else
-        TC = new toolchains::Generic_GCC(*this, Target, Args);
-      break;
+        break;
+      case llvm::Triple::hexagon:
+        TC = new toolchains::HexagonToolChain(*this, Target, Args);
+        break;
+      case llvm::Triple::xcore:
+        TC = new toolchains::XCoreToolChain(*this, Target, Args);
+        break;
+      case llvm::Triple::wasm32:
+      case llvm::Triple::wasm64:
+        TC = new toolchains::WebAssembly(*this, Target, Args);
+        break;
+      default:
+        if (Target.getVendor() == llvm::Triple::Myriad)
+          TC = new toolchains::MyriadToolChain(*this, Target, Args);
+        else if (Target.isOSBinFormatELF())
+          TC = new toolchains::Generic_ELF(*this, Target, Args);
+        else if (Target.isOSBinFormatMachO())
+          TC = new toolchains::MachO(*this, Target, Args);
+        else
+          TC = new toolchains::Generic_GCC(*this, Target, Args);
+      }
     }
   }
   return *TC;
diff --git a/lib/Driver/DriverOptions.cpp b/lib/Driver/DriverOptions.cpp
index 6ff1cbafb3bc..8d5332b5cc24 100644
--- a/lib/Driver/DriverOptions.cpp
+++ b/lib/Driver/DriverOptions.cpp
@@ -34,7 +34,7 @@ namespace {
 class DriverOptTable : public OptTable {
 public:
   DriverOptTable()
-    : OptTable(InfoTable, llvm::array_lengthof(InfoTable)) {}
+    : OptTable(InfoTable) {}
 };
 
 }
diff --git a/lib/Driver/Job.cpp b/lib/Driver/Job.cpp
index 42bba56f5d41..22904e5398a0 100644
--- a/lib/Driver/Job.cpp
+++ b/lib/Driver/Job.cpp
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "InputInfo.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Job.h"
@@ -26,9 +27,14 @@ using llvm::StringRef;
 using llvm::ArrayRef;
 
 Command::Command(const Action &Source, const Tool &Creator,
-                 const char *Executable, const ArgStringList &Arguments)
+                 const char *Executable, const ArgStringList &Arguments,
+                 ArrayRef<InputInfo> Inputs)
     : Source(Source), Creator(Creator), Executable(Executable),
-      Arguments(Arguments), ResponseFile(nullptr) {}
+      Arguments(Arguments), ResponseFile(nullptr) {
+  for (const auto &II : Inputs)
+    if (II.isFilename())
+      InputFilenames.push_back(II.getFilename());
+}
 
 static int skipArgs(const char *Flag, bool HaveCrashVFS) {
   // These flags are all of the form -Flag <Arg> and are treated as two
@@ -42,6 +48,7 @@ static int skipArgs(const char *Flag, bool HaveCrashVFS) {
     .Cases("-iwithprefixbefore", "-isystem", "-iquote", true)
     .Cases("-resource-dir", "-serialize-diagnostic-file", true)
     .Cases("-dwarf-debug-flags", "-ivfsoverlay", true)
+    .Cases("-header-include-file", "-diagnostic-log-file", true)
     // Some include flags shouldn't be skipped if we have a crash VFS
     .Case("-isysroot", !HaveCrashVFS)
     .Default(false);
@@ -98,7 +105,9 @@ void Command::writeResponseFile(raw_ostream &OS) const {
     return;
   }
 
-  // In regular response files, we send all arguments to the response file
+  // In regular response files, we send all arguments to the response file.
+  // Wrapping all arguments in double quotes ensures that both Unix tools and
+  // Windows tools understand the response file.
   for (const char *Arg : Arguments) {
     OS << '"';
 
@@ -155,13 +164,6 @@ void Command::Print(raw_ostream &OS, const char *Terminator, bool Quote,
     Args = ArrayRef<const char *>(ArgsRespFile).slice(1); // no executable name
   }
 
-  StringRef MainFilename;
-  // We'll need the argument to -main-file-name to find the input file name.
-  if (CrashInfo)
-    for (size_t I = 0, E = Args.size(); I + 1 < E; ++I)
-      if (StringRef(Args[I]).equals("-main-file-name"))
-        MainFilename = Args[I + 1];
-
   bool HaveCrashVFS = CrashInfo && !CrashInfo->VFSPath.empty();
   for (size_t i = 0, e = Args.size(); i < e; ++i) {
     const char *const Arg = Args[i];
@@ -170,8 +172,11 @@ void Command::Print(raw_ostream &OS, const char *Terminator, bool Quote,
       if (int Skip = skipArgs(Arg, HaveCrashVFS)) {
         i += Skip - 1;
         continue;
-      } else if (llvm::sys::path::filename(Arg) == MainFilename &&
-                 (i == 0 || StringRef(Args[i - 1]) != "-main-file-name")) {
+      }
+      auto Found = std::find_if(InputFilenames.begin(), InputFilenames.end(),
+                                [&Arg](StringRef IF) { return IF == Arg; });
+      if (Found != InputFilenames.end() &&
+          (i == 0 || StringRef(Args[i - 1]) != "-main-file-name")) {
         // Replace the input file name with the crashinfo's file name.
         OS << ' ';
         StringRef ShortName = llvm::sys::path::filename(CrashInfo->Filename);
@@ -254,8 +259,9 @@ int Command::Execute(const StringRef **Redirects, std::string *ErrMsg,
 FallbackCommand::FallbackCommand(const Action &Source_, const Tool &Creator_,
                                  const char *Executable_,
                                  const ArgStringList &Arguments_,
+                                 ArrayRef<InputInfo> Inputs,
                                  std::unique_ptr<Command> Fallback_)
-    : Command(Source_, Creator_, Executable_, Arguments_),
+    : Command(Source_, Creator_, Executable_, Arguments_, Inputs),
       Fallback(std::move(Fallback_)) {}
 
 void FallbackCommand::Print(raw_ostream &OS, const char *Terminator,
diff --git a/lib/Driver/MSVCToolChain.cpp b/lib/Driver/MSVCToolChain.cpp
index c816b29dca23..b7e576e53e8f 100644
--- a/lib/Driver/MSVCToolChain.cpp
+++ b/lib/Driver/MSVCToolChain.cpp
@@ -205,27 +205,103 @@ static bool getSystemRegistryString(const char *keyPath, const char *valueName,
 #endif // USE_WIN32
 }
 
+// Convert LLVM's ArchType
+// to the corresponding name of Windows SDK libraries subfolder
+static StringRef getWindowsSDKArch(llvm::Triple::ArchType Arch) {
+  switch (Arch) {
+  case llvm::Triple::x86:
+    return "x86";
+  case llvm::Triple::x86_64:
+    return "x64";
+  case llvm::Triple::arm:
+    return "arm";
+  default:
+    return "";
+  }
+}
+
+// Find the most recent version of Universal CRT or Windows 10 SDK.
+// vcvarsqueryregistry.bat from Visual Studio 2015 sorts entries in the include
+// directory by name and uses the last one of the list.
+// So we compare entry names lexicographically to find the greatest one.
+static bool getWindows10SDKVersion(const std::string &SDKPath,
+                                   std::string &SDKVersion) {
+  SDKVersion.clear();
+
+  std::error_code EC;
+  llvm::SmallString<128> IncludePath(SDKPath);
+  llvm::sys::path::append(IncludePath, "Include");
+  for (llvm::sys::fs::directory_iterator DirIt(IncludePath, EC), DirEnd;
+       DirIt != DirEnd && !EC; DirIt.increment(EC)) {
+    if (!llvm::sys::fs::is_directory(DirIt->path()))
+      continue;
+    StringRef CandidateName = llvm::sys::path::filename(DirIt->path());
+    // If WDK is installed, there could be subfolders like "wdf" in the
+    // "Include" directory.
+    // Allow only directories which names start with "10.".
+    if (!CandidateName.startswith("10."))
+      continue;
+    if (CandidateName > SDKVersion)
+      SDKVersion = CandidateName;
+  }
+
+  return !SDKVersion.empty();
+}
+
 /// \brief Get Windows SDK installation directory.
-bool MSVCToolChain::getWindowsSDKDir(std::string &path, int &major,
-                                     int &minor) const {
-  std::string sdkVersion;
+bool MSVCToolChain::getWindowsSDKDir(std::string &Path, int &Major,
+                                     std::string &WindowsSDKIncludeVersion,
+                                     std::string &WindowsSDKLibVersion) const {
+  std::string RegistrySDKVersion;
   // Try the Windows registry.
-  bool hasSDKDir = getSystemRegistryString(
-      "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION",
-      "InstallationFolder", path, &sdkVersion);
-  if (!sdkVersion.empty())
-    std::sscanf(sdkVersion.c_str(), "v%d.%d", &major, &minor);
-  return hasSDKDir && !path.empty();
+  if (!getSystemRegistryString(
+          "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\$VERSION",
+          "InstallationFolder", Path, &RegistrySDKVersion))
+    return false;
+  if (Path.empty() || RegistrySDKVersion.empty())
+    return false;
+
+  WindowsSDKIncludeVersion.clear();
+  WindowsSDKLibVersion.clear();
+  Major = 0;
+  std::sscanf(RegistrySDKVersion.c_str(), "v%d.", &Major);
+  if (Major <= 7)
+    return true;
+  if (Major == 8) {
+    // Windows SDK 8.x installs libraries in a folder whose names depend on the
+    // version of the OS you're targeting.  By default choose the newest, which
+    // usually corresponds to the version of the OS you've installed the SDK on.
+    const char *Tests[] = {"winv6.3", "win8", "win7"};
+    for (const char *Test : Tests) {
+      llvm::SmallString<128> TestPath(Path);
+      llvm::sys::path::append(TestPath, "Lib", Test);
+      if (llvm::sys::fs::exists(TestPath.c_str())) {
+        WindowsSDKLibVersion = Test;
+        break;
+      }
+    }
+    return !WindowsSDKLibVersion.empty();
+  }
+  if (Major == 10) {
+    if (!getWindows10SDKVersion(Path, WindowsSDKIncludeVersion))
+      return false;
+    WindowsSDKLibVersion = WindowsSDKIncludeVersion;
+    return true;
+  }
+  // Unsupported SDK version
+  return false;
 }
 
 // Gets the library path required to link against the Windows SDK.
 bool MSVCToolChain::getWindowsSDKLibraryPath(std::string &path) const {
   std::string sdkPath;
   int sdkMajor = 0;
-  int sdkMinor = 0;
+  std::string windowsSDKIncludeVersion;
+  std::string windowsSDKLibVersion;
 
   path.clear();
-  if (!getWindowsSDKDir(sdkPath, sdkMajor, sdkMinor))
+  if (!getWindowsSDKDir(sdkPath, sdkMajor, windowsSDKIncludeVersion,
+                        windowsSDKLibVersion))
     return false;
 
   llvm::SmallString<128> libPath(sdkPath);
@@ -245,44 +321,57 @@ bool MSVCToolChain::getWindowsSDKLibraryPath(std::string &path) const {
       return false;
     }
   } else {
-    // Windows SDK 8.x installs libraries in a folder whose names depend on the
-    // version of the OS you're targeting.  By default choose the newest, which
-    // usually corresponds to the version of the OS you've installed the SDK on.
-    const char *tests[] = {"winv6.3", "win8", "win7"};
-    bool found = false;
-    for (const char *test : tests) {
-      llvm::SmallString<128> testPath(libPath);
-      llvm::sys::path::append(testPath, test);
-      if (llvm::sys::fs::exists(testPath.c_str())) {
-        libPath = testPath;
-        found = true;
-        break;
-      }
-    }
-
-    if (!found)
+    const StringRef archName = getWindowsSDKArch(getArch());
+    if (archName.empty())
       return false;
-
-    llvm::sys::path::append(libPath, "um");
-    switch (getArch()) {
-    case llvm::Triple::x86:
-      llvm::sys::path::append(libPath, "x86");
-      break;
-    case llvm::Triple::x86_64:
-      llvm::sys::path::append(libPath, "x64");
-      break;
-    case llvm::Triple::arm:
-      llvm::sys::path::append(libPath, "arm");
-      break;
-    default:
-      return false;
-    }
+    llvm::sys::path::append(libPath, windowsSDKLibVersion, "um", archName);
   }
 
   path = libPath.str();
   return true;
 }
 
+// Check if the Include path of a specified version of Visual Studio contains
+// specific header files. If not, they are probably shipped with Universal CRT.
+bool clang::driver::toolchains::MSVCToolChain::useUniversalCRT(
+    std::string &VisualStudioDir) const {
+  llvm::SmallString<128> TestPath(VisualStudioDir);
+  llvm::sys::path::append(TestPath, "VC\\include\\stdlib.h");
+
+  return !llvm::sys::fs::exists(TestPath);
+}
+
+bool MSVCToolChain::getUniversalCRTSdkDir(std::string &Path,
+                                          std::string &UCRTVersion) const {
+  // vcvarsqueryregistry.bat for Visual Studio 2015 queries the registry
+  // for the specific key "KitsRoot10". So do we.
+  if (!getSystemRegistryString(
+          "SOFTWARE\\Microsoft\\Windows Kits\\Installed Roots", "KitsRoot10",
+          Path, nullptr))
+    return false;
+
+  return getWindows10SDKVersion(Path, UCRTVersion);
+}
+
+bool MSVCToolChain::getUniversalCRTLibraryPath(std::string &Path) const {
+  std::string UniversalCRTSdkPath;
+  std::string UCRTVersion;
+
+  Path.clear();
+  if (!getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion))
+    return false;
+
+  StringRef ArchName = getWindowsSDKArch(getArch());
+  if (ArchName.empty())
+    return false;
+
+  llvm::SmallString<128> LibPath(UniversalCRTSdkPath);
+  llvm::sys::path::append(LibPath, "Lib", UCRTVersion, "ucrt", ArchName);
+
+  Path = LibPath.str();
+  return true;
+}
+
 // Get the location to use for Visual Studio binaries.  The location priority
 // is: %VCINSTALLDIR% > %PATH% > newest copy of Visual Studio installed on
 // system (as reported by the registry).
@@ -419,12 +508,12 @@ bool MSVCToolChain::getVisualStudioInstallDir(std::string &path) const {
   return false;
 }
 
-void MSVCToolChain::AddSystemIncludeWithSubfolder(const ArgList &DriverArgs,
-                                                  ArgStringList &CC1Args,
-                                                  const std::string &folder,
-                                                  const char *subfolder) const {
+void MSVCToolChain::AddSystemIncludeWithSubfolder(
+    const ArgList &DriverArgs, ArgStringList &CC1Args,
+    const std::string &folder, const Twine &subfolder1, const Twine &subfolder2,
+    const Twine &subfolder3) const {
   llvm::SmallString<128> path(folder);
-  llvm::sys::path::append(path, subfolder);
+  llvm::sys::path::append(path, subfolder1, subfolder2, subfolder3);
   addSystemInclude(DriverArgs, CC1Args, path);
 }
 
@@ -434,9 +523,8 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
 
   if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
-    SmallString<128> P(getDriver().ResourceDir);
-    llvm::sys::path::append(P, "include");
-    addSystemInclude(DriverArgs, CC1Args, P);
+    AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, getDriver().ResourceDir,
+                                  "include");
   }
 
   if (DriverArgs.hasArg(options::OPT_nostdlibinc))
@@ -460,16 +548,33 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   if (getVisualStudioInstallDir(VSDir)) {
     AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, VSDir, "VC\\include");
 
+    if (useUniversalCRT(VSDir)) {
+      std::string UniversalCRTSdkPath;
+      std::string UCRTVersion;
+      if (getUniversalCRTSdkDir(UniversalCRTSdkPath, UCRTVersion)) {
+        AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, UniversalCRTSdkPath,
+                                      "Include", UCRTVersion, "ucrt");
+      }
+    }
+
     std::string WindowsSDKDir;
-    int major, minor;
-    if (getWindowsSDKDir(WindowsSDKDir, major, minor)) {
+    int major;
+    std::string windowsSDKIncludeVersion;
+    std::string windowsSDKLibVersion;
+    if (getWindowsSDKDir(WindowsSDKDir, major, windowsSDKIncludeVersion,
+                         windowsSDKLibVersion)) {
       if (major >= 8) {
+        // Note: windowsSDKIncludeVersion is empty for SDKs prior to v10.
+        // Anyway, llvm::sys::path::append is able to manage it.
         AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
-                                      "include\\shared");
+                                      "include", windowsSDKIncludeVersion,
+                                      "shared");
         AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
-                                      "include\\um");
+                                      "include", windowsSDKIncludeVersion,
+                                      "um");
         AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
-                                      "include\\winrt");
+                                      "include", windowsSDKIncludeVersion,
+                                      "winrt");
       } else {
         AddSystemIncludeWithSubfolder(DriverArgs, CC1Args, WindowsSDKDir,
                                       "include");
@@ -528,3 +633,112 @@ SanitizerMask MSVCToolChain::getSupportedSanitizers() const {
   Res |= SanitizerKind::Address;
   return Res;
 }
+
+llvm::opt::DerivedArgList *
+MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
+                             const char *BoundArch) const {
+  DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs());
+  const OptTable &Opts = getDriver().getOpts();
+
+  // /Oy and /Oy- only has an effect under X86-32.
+  bool SupportsForcingFramePointer = getArch() == llvm::Triple::x86;
+
+  // The -O[12xd] flag actually expands to several flags.  We must desugar the
+  // flags so that options embedded can be negated.  For example, the '-O2' flag
+  // enables '-Oy'.  Expanding '-O2' into its constituent flags allows us to
+  // correctly handle '-O2 -Oy-' where the trailing '-Oy-' disables a single
+  // aspect of '-O2'.
+  //
+  // Note that this expansion logic only applies to the *last* of '[12xd]'.
+
+  // First step is to search for the character we'd like to expand.
+  const char *ExpandChar = nullptr;
+  for (Arg *A : Args) {
+    if (!A->getOption().matches(options::OPT__SLASH_O))
+      continue;
+    StringRef OptStr = A->getValue();
+    for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
+      const char &OptChar = *(OptStr.data() + I);
+      if (OptChar == '1' || OptChar == '2' || OptChar == 'x' || OptChar == 'd')
+        ExpandChar = OptStr.data() + I;
+    }
+  }
+
+  // The -O flag actually takes an amalgam of other options.  For example,
+  // '/Ogyb2' is equivalent to '/Og' '/Oy' '/Ob2'.
+  for (Arg *A : Args) {
+    if (!A->getOption().matches(options::OPT__SLASH_O)) {
+      DAL->append(A);
+      continue;
+    }
+
+    StringRef OptStr = A->getValue();
+    for (size_t I = 0, E = OptStr.size(); I != E; ++I) {
+      const char &OptChar = *(OptStr.data() + I);
+      switch (OptChar) {
+      default:
+        break;
+      case '1':
+      case '2':
+      case 'x':
+      case 'd':
+        if (&OptChar == ExpandChar) {
+          if (OptChar == 'd') {
+            DAL->AddFlagArg(A, Opts.getOption(options::OPT_O0));
+          } else {
+            if (OptChar == '1') {
+              DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
+            } else if (OptChar == '2' || OptChar == 'x') {
+              DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
+              DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
+            }
+            if (SupportsForcingFramePointer)
+              DAL->AddFlagArg(A,
+                              Opts.getOption(options::OPT_fomit_frame_pointer));
+            if (OptChar == '1' || OptChar == '2')
+              DAL->AddFlagArg(A,
+                              Opts.getOption(options::OPT_ffunction_sections));
+          }
+        }
+        break;
+      case 'b':
+        if (I + 1 != E && isdigit(OptStr[I + 1]))
+          ++I;
+        break;
+      case 'g':
+        break;
+      case 'i':
+        if (I + 1 != E && OptStr[I + 1] == '-') {
+          ++I;
+          DAL->AddFlagArg(A, Opts.getOption(options::OPT_fno_builtin));
+        } else {
+          DAL->AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin));
+        }
+        break;
+      case 's':
+        DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "s");
+        break;
+      case 't':
+        DAL->AddJoinedArg(A, Opts.getOption(options::OPT_O), "2");
+        break;
+      case 'y': {
+        bool OmitFramePointer = true;
+        if (I + 1 != E && OptStr[I + 1] == '-') {
+          OmitFramePointer = false;
+          ++I;
+        }
+        if (SupportsForcingFramePointer) {
+          if (OmitFramePointer)
+            DAL->AddFlagArg(A,
+                            Opts.getOption(options::OPT_fomit_frame_pointer));
+          else
+            DAL->AddFlagArg(
+                A, Opts.getOption(options::OPT_fno_omit_frame_pointer));
+        }
+        break;
+      }
+      }
+    }
+  }
+  return DAL;
+}
diff --git a/lib/Driver/MinGWToolChain.cpp b/lib/Driver/MinGWToolChain.cpp
index 938440b08f60..c5287bb41575 100644
--- a/lib/Driver/MinGWToolChain.cpp
+++ b/lib/Driver/MinGWToolChain.cpp
@@ -66,23 +66,17 @@ MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : ToolChain(D, Triple, Args) {
   getProgramPaths().push_back(getDriver().getInstalledDir());
 
-// In Windows there aren't any standard install locations, we search
-// for gcc on the PATH. In Linux the base is always /usr.
-#ifdef LLVM_ON_WIN32
+  // On Windows if there is no sysroot we search for gcc on the PATH.
   if (getDriver().SysRoot.size())
-    Base = getDriver().SysRoot;
+  Base = getDriver().SysRoot;
+#ifdef LLVM_ON_WIN32
   else if (llvm::ErrorOr<std::string> GPPName =
                llvm::sys::findProgramByName("gcc"))
     Base = llvm::sys::path::parent_path(
         llvm::sys::path::parent_path(GPPName.get()));
-  else
-    Base = llvm::sys::path::parent_path(getDriver().getInstalledDir());
-#else
-  if (getDriver().SysRoot.size())
-    Base = getDriver().SysRoot;
-  else
-    Base = "/usr";
 #endif
+  if (!Base.size())
+    Base = llvm::sys::path::parent_path(getDriver().getInstalledDir());
 
   Base += llvm::sys::path::get_separator();
   findGccLibDir();
diff --git a/lib/Driver/Multilib.cpp b/lib/Driver/Multilib.cpp
index 8acda6794d72..34ad6a7efb24 100644
--- a/lib/Driver/Multilib.cpp
+++ b/lib/Driver/Multilib.cpp
@@ -260,16 +260,15 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, Multilib &M) const {
     return false;
   }, Multilibs);
 
-  if (Filtered.size() == 0) {
+  if (Filtered.size() == 0)
     return false;
-  } else if (Filtered.size() == 1) {
+  if (Filtered.size() == 1) {
     M = Filtered[0];
     return true;
   }
 
   // TODO: pick the "best" multlib when more than one is suitable
   assert(false);
-
   return false;
 }
 
diff --git a/lib/Driver/SanitizerArgs.cpp b/lib/Driver/SanitizerArgs.cpp
index c3ad8ef9c1ef..2fded1c80da9 100644
--- a/lib/Driver/SanitizerArgs.cpp
+++ b/lib/Driver/SanitizerArgs.cpp
@@ -29,11 +29,11 @@ enum : SanitizerMask {
   NeedsUbsanRt = Undefined | Integer | CFI,
   NeedsUbsanCxxRt = Vptr | CFI,
   NotAllowedWithTrap = Vptr,
-  RequiresPIE = Memory | DataFlow,
+  RequiresPIE = DataFlow,
   NeedsUnwindTables = Address | Thread | Memory | DataFlow,
   SupportsCoverage = Address | Memory | Leak | Undefined | Integer | DataFlow,
   RecoverableByDefault = Undefined | Integer,
-  Unrecoverable = Address | Unreachable | Return,
+  Unrecoverable = Unreachable | Return,
   LegacyFsanitizeRecoverMask = Undefined | Integer,
   NeedsLTO = CFI,
   TrappingSupported =
@@ -90,6 +90,8 @@ static bool getDefaultBlacklist(const Driver &D, SanitizerMask Kinds,
     BlacklistFile = "tsan_blacklist.txt";
   else if (Kinds & DataFlow)
     BlacklistFile = "dfsan_abilist.txt";
+  else if (Kinds & CFI)
+    BlacklistFile = "cfi_blacklist.txt";
 
   if (BlacklistFile) {
     clang::SmallString<64> Path(D.ResourceDir);
@@ -158,11 +160,20 @@ bool SanitizerArgs::needsUbsanRt() const {
   return (Sanitizers.Mask & NeedsUbsanRt & ~TrapSanitizers.Mask) &&
          !Sanitizers.has(Address) &&
          !Sanitizers.has(Memory) &&
-         !Sanitizers.has(Thread);
+         !Sanitizers.has(Thread) &&
+         !CfiCrossDso;
+}
+
+bool SanitizerArgs::needsCfiRt() const {
+  return !(Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso;
+}
+
+bool SanitizerArgs::needsCfiDiagRt() const {
+  return (Sanitizers.Mask & CFI & ~TrapSanitizers.Mask) && CfiCrossDso;
 }
 
 bool SanitizerArgs::requiresPIE() const {
-  return AsanZeroBaseShadow || (Sanitizers.Mask & RequiresPIE);
+  return NeedPIE || (Sanitizers.Mask & RequiresPIE);
 }
 
 bool SanitizerArgs::needsUnwindTables() const {
@@ -174,13 +185,15 @@ void SanitizerArgs::clear() {
   RecoverableSanitizers.clear();
   TrapSanitizers.clear();
   BlacklistFiles.clear();
+  ExtraDeps.clear();
   CoverageFeatures = 0;
   MsanTrackOrigins = 0;
   MsanUseAfterDtor = false;
+  NeedPIE = false;
   AsanFieldPadding = 0;
-  AsanZeroBaseShadow = false;
   AsanSharedRuntime = false;
   LinkCXXRuntimes = false;
+  CfiCrossDso = false;
 }
 
 SanitizerArgs::SanitizerArgs(const ToolChain &TC,
@@ -280,7 +293,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
   }
 
   // Check that LTO is enabled if we need it.
-  if ((Kinds & NeedsLTO) && !D.IsUsingLTO(Args)) {
+  if ((Kinds & NeedsLTO) && !D.isUsingLTO()) {
     D.Diag(diag::err_drv_argument_only_allowed_with)
         << lastArgumentForMask(D, Args, Kinds & NeedsLTO) << "-flto";
   }
@@ -381,13 +394,16 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
     if (Arg->getOption().matches(options::OPT_fsanitize_blacklist)) {
       Arg->claim();
       std::string BLPath = Arg->getValue();
-      if (llvm::sys::fs::exists(BLPath))
+      if (llvm::sys::fs::exists(BLPath)) {
         BlacklistFiles.push_back(BLPath);
-      else
+        ExtraDeps.push_back(BLPath);
+      } else
         D.Diag(clang::diag::err_drv_no_such_file) << BLPath;
+
     } else if (Arg->getOption().matches(options::OPT_fno_sanitize_blacklist)) {
       Arg->claim();
       BlacklistFiles.clear();
+      ExtraDeps.clear();
     }
   }
   // Validate blacklists format.
@@ -418,8 +434,18 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
         }
       }
     }
-    MsanUseAfterDtor = 
-      Args.hasArg(options::OPT_fsanitize_memory_use_after_dtor);
+    MsanUseAfterDtor =
+        Args.hasArg(options::OPT_fsanitize_memory_use_after_dtor);
+    NeedPIE |= !(TC.getTriple().isOSLinux() &&
+                 TC.getTriple().getArch() == llvm::Triple::x86_64);
+  }
+
+  if (AllAddedKinds & CFI) {
+    CfiCrossDso = Args.hasFlag(options::OPT_fsanitize_cfi_cross_dso,
+                               options::OPT_fno_sanitize_cfi_cross_dso, false);
+    // Without PIE, external function address may resolve to a PLT record, which
+    // can not be verified by the target module.
+    NeedPIE |= CfiCrossDso;
   }
 
   // Parse -f(no-)?sanitize-coverage flags if coverage is supported by the
@@ -489,10 +515,8 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC,
 
   if (AllAddedKinds & Address) {
     AsanSharedRuntime =
-        Args.hasArg(options::OPT_shared_libasan) ||
-        (TC.getTriple().getEnvironment() == llvm::Triple::Android);
-    AsanZeroBaseShadow =
-        (TC.getTriple().getEnvironment() == llvm::Triple::Android);
+        Args.hasArg(options::OPT_shared_libasan) || TC.getTriple().isAndroid();
+    NeedPIE |= TC.getTriple().isAndroid();
     if (Arg *A =
             Args.getLastArg(options::OPT_fsanitize_address_field_padding)) {
         StringRef S = A->getValue();
@@ -561,6 +585,11 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     BlacklistOpt += BLPath;
     CmdArgs.push_back(Args.MakeArgString(BlacklistOpt));
   }
+  for (const auto &Dep : ExtraDeps) {
+    SmallString<64> ExtraDepOpt("-fdepfile-entry=");
+    ExtraDepOpt += Dep;
+    CmdArgs.push_back(Args.MakeArgString(ExtraDepOpt));
+  }
 
   if (MsanTrackOrigins)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-track-origins=" +
@@ -569,6 +598,9 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
   if (MsanUseAfterDtor)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-memory-use-after-dtor"));
 
+  if (CfiCrossDso)
+    CmdArgs.push_back(Args.MakeArgString("-fsanitize-cfi-cross-dso"));
+
   if (AsanFieldPadding)
     CmdArgs.push_back(Args.MakeArgString("-fsanitize-address-field-padding=" +
                                          llvm::utostr(AsanFieldPadding)));
@@ -599,11 +631,10 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
     // Instruct the code generator to embed linker directives in the object file
     // that cause the required runtime libraries to be linked.
     CmdArgs.push_back(Args.MakeArgString(
-        "--dependent-lib=" + tools::getCompilerRT(TC, "ubsan_standalone")));
+        "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone")));
     if (types::isCXX(InputType))
-      CmdArgs.push_back(
-          Args.MakeArgString("--dependent-lib=" +
-                             tools::getCompilerRT(TC, "ubsan_standalone_cxx")));
+      CmdArgs.push_back(Args.MakeArgString(
+          "--dependent-lib=" + TC.getCompilerRT(Args, "ubsan_standalone_cxx")));
   }
 }
 
diff --git a/lib/Driver/ToolChain.cpp b/lib/Driver/ToolChain.cpp
index d40bb951e50a..cbbd485a9b77 100644
--- a/lib/Driver/ToolChain.cpp
+++ b/lib/Driver/ToolChain.cpp
@@ -22,8 +22,13 @@
 #include "llvm/Option/Option.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetParser.h"
+
 using namespace clang::driver;
+using namespace clang::driver::tools;
 using namespace clang;
+using namespace llvm;
 using namespace llvm::opt;
 
 static llvm::opt::Arg *GetRTTIArgument(const ArgList &Args) {
@@ -72,9 +77,7 @@ ToolChain::ToolChain(const Driver &D, const llvm::Triple &T,
 ToolChain::~ToolChain() {
 }
 
-const Driver &ToolChain::getDriver() const {
- return D;
-}
+vfs::FileSystem &ToolChain::getVFS() const { return getDriver().getVFS(); }
 
 bool ToolChain::useIntegratedAs() const {
   return Args.hasFlag(options::OPT_fintegrated_as,
@@ -88,6 +91,99 @@ const SanitizerArgs& ToolChain::getSanitizerArgs() const {
   return *SanitizerArguments.get();
 }
 
+namespace {
+struct DriverSuffix {
+  const char *Suffix;
+  const char *ModeFlag;
+};
+
+const DriverSuffix *FindDriverSuffix(StringRef ProgName) {
+  // A list of known driver suffixes. Suffixes are compared against the
+  // program name in order. If there is a match, the frontend type is updated as
+  // necessary by applying the ModeFlag.
+  static const DriverSuffix DriverSuffixes[] = {
+      {"clang", nullptr},
+      {"clang++", "--driver-mode=g++"},
+      {"clang-c++", "--driver-mode=g++"},
+      {"clang-cc", nullptr},
+      {"clang-cpp", "--driver-mode=cpp"},
+      {"clang-g++", "--driver-mode=g++"},
+      {"clang-gcc", nullptr},
+      {"clang-cl", "--driver-mode=cl"},
+      {"cc", nullptr},
+      {"cpp", "--driver-mode=cpp"},
+      {"cl", "--driver-mode=cl"},
+      {"++", "--driver-mode=g++"},
+  };
+
+  for (size_t i = 0; i < llvm::array_lengthof(DriverSuffixes); ++i)
+    if (ProgName.endswith(DriverSuffixes[i].Suffix))
+      return &DriverSuffixes[i];
+  return nullptr;
+}
+
+/// Normalize the program name from argv[0] by stripping the file extension if
+/// present and lower-casing the string on Windows.
+std::string normalizeProgramName(llvm::StringRef Argv0) {
+  std::string ProgName = llvm::sys::path::stem(Argv0);
+#ifdef LLVM_ON_WIN32
+  // Transform to lowercase for case insensitive file systems.
+  std::transform(ProgName.begin(), ProgName.end(), ProgName.begin(), ::tolower);
+#endif
+  return ProgName;
+}
+
+const DriverSuffix *parseDriverSuffix(StringRef ProgName) {
+  // Try to infer frontend type and default target from the program name by
+  // comparing it against DriverSuffixes in order.
+
+  // If there is a match, the function tries to identify a target as prefix.
+  // E.g. "x86_64-linux-clang" as interpreted as suffix "clang" with target
+  // prefix "x86_64-linux". If such a target prefix is found, it may be
+  // added via -target as implicit first argument.
+  const DriverSuffix *DS = FindDriverSuffix(ProgName);
+
+  if (!DS) {
+    // Try again after stripping any trailing version number:
+    // clang++3.5 -> clang++
+    ProgName = ProgName.rtrim("0123456789.");
+    DS = FindDriverSuffix(ProgName);
+  }
+
+  if (!DS) {
+    // Try again after stripping trailing -component.
+    // clang++-tot -> clang++
+    ProgName = ProgName.slice(0, ProgName.rfind('-'));
+    DS = FindDriverSuffix(ProgName);
+  }
+  return DS;
+}
+} // anonymous namespace
+
+std::pair<std::string, std::string>
+ToolChain::getTargetAndModeFromProgramName(StringRef PN) {
+  std::string ProgName = normalizeProgramName(PN);
+  const DriverSuffix *DS = parseDriverSuffix(ProgName);
+  if (!DS)
+    return std::make_pair("", "");
+  std::string ModeFlag = DS->ModeFlag == nullptr ? "" : DS->ModeFlag;
+
+  std::string::size_type LastComponent =
+      ProgName.rfind('-', ProgName.size() - strlen(DS->Suffix));
+  if (LastComponent == std::string::npos)
+    return std::make_pair("", ModeFlag);
+
+  // Infer target from the prefix.
+  StringRef Prefix(ProgName);
+  Prefix = Prefix.slice(0, LastComponent);
+  std::string IgnoredError;
+  std::string Target;
+  if (llvm::TargetRegistry::lookupTarget(Prefix, IgnoredError)) {
+    Target = Prefix;
+  }
+  return std::make_pair(Target, ModeFlag);
+}
+
 StringRef ToolChain::getDefaultUniversalArchName() const {
   // In universal driver terms, the arch name accepted by -arch isn't exactly
   // the same as the ones that appear in the triple. Roughly speaking, this is
@@ -171,9 +267,64 @@ Tool *ToolChain::getTool(Action::ActionClass AC) const {
   llvm_unreachable("Invalid tool kind.");
 }
 
+static StringRef getArchNameForCompilerRTLib(const ToolChain &TC,
+                                             const ArgList &Args) {
+  const llvm::Triple &Triple = TC.getTriple();
+  bool IsWindows = Triple.isOSWindows();
+
+  if (Triple.isWindowsMSVCEnvironment() && TC.getArch() == llvm::Triple::x86)
+    return "i386";
+
+  if (TC.getArch() == llvm::Triple::arm || TC.getArch() == llvm::Triple::armeb)
+    return (arm::getARMFloatABI(TC, Args) == arm::FloatABI::Hard && !IsWindows)
+               ? "armhf"
+               : "arm";
+
+  return TC.getArchName();
+}
+
+std::string ToolChain::getCompilerRT(const ArgList &Args, StringRef Component,
+                                     bool Shared) const {
+  const llvm::Triple &TT = getTriple();
+  const char *Env = TT.isAndroid() ? "-android" : "";
+  bool IsITANMSVCWindows =
+      TT.isWindowsMSVCEnvironment() || TT.isWindowsItaniumEnvironment();
+
+  StringRef Arch = getArchNameForCompilerRTLib(*this, Args);
+  const char *Prefix = IsITANMSVCWindows ? "" : "lib";
+  const char *Suffix = Shared ? (Triple.isOSWindows() ? ".dll" : ".so")
+                              : (IsITANMSVCWindows ? ".lib" : ".a");
+
+  SmallString<128> Path(getDriver().ResourceDir);
+  StringRef OSLibName = Triple.isOSFreeBSD() ? "freebsd" : getOS();
+  llvm::sys::path::append(Path, "lib", OSLibName);
+  llvm::sys::path::append(Path, Prefix + Twine("clang_rt.") + Component + "-" +
+                                    Arch + Env + Suffix);
+  return Path.str();
+}
+
+const char *ToolChain::getCompilerRTArgString(const llvm::opt::ArgList &Args,
+                                              StringRef Component,
+                                              bool Shared) const {
+  return Args.MakeArgString(getCompilerRT(Args, Component, Shared));
+}
+
+bool ToolChain::needsProfileRT(const ArgList &Args) {
+  if (Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
+                   false) ||
+      Args.hasArg(options::OPT_fprofile_generate) ||
+      Args.hasArg(options::OPT_fprofile_generate_EQ) ||
+      Args.hasArg(options::OPT_fprofile_instr_generate) ||
+      Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
+      Args.hasArg(options::OPT_fcreate_profile) ||
+      Args.hasArg(options::OPT_coverage))
+    return true;
+
+  return false;
+}
+
 Tool *ToolChain::SelectTool(const JobAction &JA) const {
-  if (getDriver().ShouldUseClangCompiler(JA))
-    return getClang();
+  if (getDriver().ShouldUseClangCompiler(JA)) return getClang();
   Action::ActionClass AC = JA.getKind();
   if (AC == Action::AssembleJobClass && useIntegratedAs())
     return getClangAs();
@@ -182,7 +333,6 @@ Tool *ToolChain::SelectTool(const JobAction &JA) const {
 
 std::string ToolChain::GetFilePath(const char *Name) const {
   return D.GetFilePath(Name, *this);
-
 }
 
 std::string ToolChain::GetProgramPath(const char *Name) const {
@@ -209,10 +359,9 @@ std::string ToolChain::GetLinkerPath() const {
     return "";
   }
 
-  return GetProgramPath("ld");
+  return GetProgramPath(DefaultLinker);
 }
 
-
 types::ID ToolChain::LookupTypeForExtension(const char *Ext) const {
   return types::lookupTypeForExtension(Ext);
 }
@@ -244,11 +393,13 @@ ObjCRuntime ToolChain::getDefaultObjCRuntime(bool isNonFragile) const {
 
 bool ToolChain::isThreadModelSupported(const StringRef Model) const {
   if (Model == "single") {
-    // FIXME: 'single' is only supported on ARM so far.
+    // FIXME: 'single' is only supported on ARM and WebAssembly so far.
     return Triple.getArch() == llvm::Triple::arm ||
            Triple.getArch() == llvm::Triple::armeb ||
            Triple.getArch() == llvm::Triple::thumb ||
-           Triple.getArch() == llvm::Triple::thumbeb;
+           Triple.getArch() == llvm::Triple::thumbeb ||
+           Triple.getArch() == llvm::Triple::wasm32 ||
+           Triple.getArch() == llvm::Triple::wasm64;
   } else if (Model == "posix")
     return true;
 
@@ -310,15 +461,15 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
       MCPU = A->getValue();
     if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
       MArch = A->getValue();
-    std::string CPU = Triple.isOSBinFormatMachO()
-      ? tools::arm::getARMCPUForMArch(MArch, Triple)
-      : tools::arm::getARMTargetCPU(MCPU, MArch, Triple);
-    StringRef Suffix = 
-      tools::arm::getLLVMArchSuffixForARM(CPU,
-                                          tools::arm::getARMArch(MArch, Triple));
-    bool ThumbDefault = Suffix.startswith("v6m") || Suffix.startswith("v7m") ||
-      Suffix.startswith("v7em") ||
-      (Suffix.startswith("v7") && getTriple().isOSBinFormatMachO());
+    std::string CPU =
+        Triple.isOSBinFormatMachO()
+            ? tools::arm::getARMCPUForMArch(MArch, Triple).str()
+            : tools::arm::getARMTargetCPU(MCPU, MArch, Triple);
+    StringRef Suffix =
+      tools::arm::getLLVMArchSuffixForARM(CPU, MArch, Triple);
+    bool IsMProfile = ARM::parseArchProfile(Suffix) == ARM::PK_M;
+    bool ThumbDefault = IsMProfile || (ARM::parseArchVersion(Suffix) == 7 && 
+                                       getTriple().isOSBinFormatMachO());
     // FIXME: this is invalid for WindowsCE
     if (getTriple().isOSWindows())
       ThumbDefault = true;
@@ -328,10 +479,9 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
     else
       ArchName = "arm";
 
-    // Assembly files should start in ARM mode.
-    if (InputType != types::TY_PP_Asm &&
-        Args.hasFlag(options::OPT_mthumb, options::OPT_mno_thumb, ThumbDefault))
-    {
+    // Assembly files should start in ARM mode, unless arch is M-profile.
+    if ((InputType != types::TY_PP_Asm && Args.hasFlag(options::OPT_mthumb,
+         options::OPT_mno_thumb, ThumbDefault)) || IsMProfile) {
       if (IsBigEndian)
         ArchName = "thumbeb";
       else
@@ -344,7 +494,7 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args,
   }
 }
 
-std::string ToolChain::ComputeEffectiveClangTriple(const ArgList &Args, 
+std::string ToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
                                                    types::ID InputType) const {
   return ComputeLLVMTriple(Args, InputType);
 }
@@ -360,9 +510,16 @@ void ToolChain::addClangTargetOptions(const ArgList &DriverArgs,
 
 void ToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {}
 
+void ToolChain::addProfileRTLibs(const llvm::opt::ArgList &Args,
+                                 llvm::opt::ArgStringList &CmdArgs) const {
+  if (!needsProfileRT(Args)) return;
+
+  CmdArgs.push_back(getCompilerRTArgString(Args, "profile"));
+  return;
+}
+
 ToolChain::RuntimeLibType ToolChain::GetRuntimeLibType(
-  const ArgList &Args) const
-{
+    const ArgList &Args) const {
   if (Arg *A = Args.getLastArg(options::OPT_rtlib_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "compiler-rt")
@@ -424,10 +581,9 @@ void ToolChain::addExternCSystemIncludeIfExists(const ArgList &DriverArgs,
 /*static*/ void ToolChain::addSystemIncludes(const ArgList &DriverArgs,
                                              ArgStringList &CC1Args,
                                              ArrayRef<StringRef> Paths) {
-  for (ArrayRef<StringRef>::iterator I = Paths.begin(), E = Paths.end();
-       I != E; ++I) {
+  for (StringRef Path : Paths) {
     CC1Args.push_back("-internal-isystem");
-    CC1Args.push_back(DriverArgs.MakeArgString(*I));
+    CC1Args.push_back(DriverArgs.MakeArgString(Path));
   }
 }
 
@@ -460,6 +616,13 @@ void ToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
   }
 }
 
+void ToolChain::AddFilePathLibArgs(const ArgList &Args,
+                                   ArgStringList &CmdArgs) const {
+  for (const auto &LibPath : getFilePaths())
+    if(LibPath.length() > 0)
+      CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
+}
+
 void ToolChain::AddCCKextLibArgs(const ArgList &Args,
                                  ArgStringList &CmdArgs) const {
   CmdArgs.push_back("-lcc_kext");
@@ -491,9 +654,15 @@ bool ToolChain::AddFastMathRuntimeIfAvailable(const ArgList &Args,
 
 SanitizerMask ToolChain::getSupportedSanitizers() const {
   // Return sanitizers which don't require runtime support and are not
-  // platform or architecture-dependent.
+  // platform dependent.
   using namespace SanitizerKind;
-  return (Undefined & ~Vptr & ~Function) | CFI | CFICastStrict |
-         UnsignedIntegerOverflow | LocalBounds;
+  SanitizerMask Res = (Undefined & ~Vptr & ~Function) | (CFI & ~CFIICall) |
+                      CFICastStrict | UnsignedIntegerOverflow | LocalBounds;
+  if (getTriple().getArch() == llvm::Triple::x86 ||
+      getTriple().getArch() == llvm::Triple::x86_64)
+    Res |= CFIICall;
+  return Res;
 }
 
+void ToolChain::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                                   ArgStringList &CC1Args) const {}
diff --git a/lib/Driver/ToolChains.cpp b/lib/Driver/ToolChains.cpp
index 15e36a1e6ce0..b02430e01763 100644
--- a/lib/Driver/ToolChains.cpp
+++ b/lib/Driver/ToolChains.cpp
@@ -1,4 +1,4 @@
-//===--- ToolChains.cpp - ToolChain Implementations -----------------------===//
+//===--- ToolChains.cpp - ToolChain Implementations -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -10,6 +10,7 @@
 #include "ToolChains.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/Version.h"
+#include "clang/Basic/VirtualFileSystem.h"
 #include "clang/Config/config.h" // for GCC_INSTALL_PREFIX
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
@@ -24,6 +25,7 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptTable.h"
 #include "llvm/Option/Option.h"
+#include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -65,6 +67,8 @@ bool MachO::HasNativeLLVMSupport() const { return true; }
 
 /// Darwin provides an ARC runtime starting in MacOS X 10.7 and iOS 5.0.
 ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
+  if (isTargetWatchOSBased())
+    return ObjCRuntime(ObjCRuntime::WatchOS, TargetVersion);
   if (isTargetIOSBased())
     return ObjCRuntime(ObjCRuntime::iOS, TargetVersion);
   if (isNonFragile)
@@ -74,7 +78,9 @@ ObjCRuntime Darwin::getDefaultObjCRuntime(bool isNonFragile) const {
 
 /// Darwin provides a blocks runtime starting in MacOS X 10.6 and iOS 3.2.
 bool Darwin::hasBlocksRuntime() const {
-  if (isTargetIOSBased())
+  if (isTargetWatchOSBased())
+    return true;
+  else if (isTargetIOSBased())
     return !isIPhoneOSVersionLT(3, 2);
   else {
     assert(isTargetMacOS() && "unexpected darwin target");
@@ -104,10 +110,10 @@ static const char *ArmMachOArchName(StringRef Arch) {
 }
 
 static const char *ArmMachOArchNameCPU(StringRef CPU) {
-  unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(CPU);
+  unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
   if (ArchKind == llvm::ARM::AK_INVALID)
     return nullptr;
-  StringRef Arch = llvm::ARMTargetParser::getArchName(ArchKind);
+  StringRef Arch = llvm::ARM::getArchName(ArchKind);
 
   // FIXME: Make sure this MachO triple mangling is really necessary.
   // ARMv5* normalises to ARMv5.
@@ -142,7 +148,7 @@ StringRef MachO::getMachOArchName(const ArgList &Args) const {
     return "arm64";
 
   case llvm::Triple::thumb:
-  case llvm::Triple::arm: {
+  case llvm::Triple::arm:
     if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
       if (const char *Arch = ArmMachOArchName(A->getValue()))
         return Arch;
@@ -153,7 +159,6 @@ StringRef MachO::getMachOArchName(const ArgList &Args) const {
 
     return "arm";
   }
-  }
 }
 
 Darwin::~Darwin() {}
@@ -177,7 +182,14 @@ std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args,
     return Triple.getTriple();
 
   SmallString<16> Str;
-  Str += isTargetIOSBased() ? "ios" : "macosx";
+  if (isTargetWatchOSBased())
+    Str += "watchos";
+  else if (isTargetTvOSBased())
+    Str += "tvos";
+  else if (isTargetIOSBased())
+    Str += "ios";
+  else
+    Str += "macosx";
   Str += getTargetVersion().getAsString();
   Triple.setOSName(Str);
 
@@ -216,16 +228,17 @@ DarwinClang::DarwinClang(const Driver &D, const llvm::Triple &Triple,
     : Darwin(D, Triple, Args) {}
 
 void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const {
-  // For iOS, 64-bit, promote certain warnings to errors.
-  if (!isTargetMacOS() && getTriple().isArch64Bit()) {
+  // For modern targets, promote certain warnings to errors.
+  if (isTargetWatchOSBased() || getTriple().isArch64Bit()) {
     // Always enable -Wdeprecated-objc-isa-usage and promote it
     // to an error.
     CC1Args.push_back("-Wdeprecated-objc-isa-usage");
     CC1Args.push_back("-Werror=deprecated-objc-isa-usage");
 
-    // Also error about implicit function declarations, as that
-    // can impact calling conventions.
-    CC1Args.push_back("-Werror=implicit-function-declaration");
+    // For iOS and watchOS, also error about implicit function declarations,
+    // as that can impact calling conventions.
+    if (!isTargetMacOS())
+      CC1Args.push_back("-Werror=implicit-function-declaration");
   }
 }
 
@@ -253,7 +266,15 @@ void DarwinClang::AddLinkARCArgs(const ArgList &Args,
   llvm::sys::path::remove_filename(P); // 'bin'
   llvm::sys::path::append(P, "lib", "arc", "libarclite_");
   // Mash in the platform.
-  if (isTargetIOSSimulator())
+  if (isTargetWatchOSSimulator())
+    P += "watchsimulator";
+  else if (isTargetWatchOS())
+    P += "watchos";
+  else if (isTargetTvOSSimulator())
+    P += "appletvsimulator";
+  else if (isTargetTvOS())
+    P += "appletvos";
+  else if (isTargetIOSSimulator())
     P += "iphonesimulator";
   else if (isTargetIPhoneOS())
     P += "iphoneos";
@@ -276,7 +297,7 @@ void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
   // For now, allow missing resource libraries to support developers who may
   // not have compiler-rt checked out or integrated into their build (unless
   // we explicitly force linking with this library).
-  if (AlwaysLink || llvm::sys::fs::exists(P))
+  if (AlwaysLink || getVFS().exists(P))
     CmdArgs.push_back(Args.MakeArgString(P));
 
   // Adding the rpaths might negatively interact when other rpaths are involved,
@@ -300,23 +321,38 @@ void MachO::AddLinkRuntimeLib(const ArgList &Args, ArgStringList &CmdArgs,
 
 void Darwin::addProfileRTLibs(const ArgList &Args,
                               ArgStringList &CmdArgs) const {
-  if (!(Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
-                     false) ||
-        Args.hasArg(options::OPT_fprofile_generate) ||
-        Args.hasArg(options::OPT_fprofile_generate_EQ) ||
-        Args.hasArg(options::OPT_fprofile_instr_generate) ||
-        Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
-        Args.hasArg(options::OPT_fcreate_profile) ||
-        Args.hasArg(options::OPT_coverage)))
-    return;
+  if (!needsProfileRT(Args)) return;
+
+  // TODO: Clean this up once autoconf is gone
+  SmallString<128> P(getDriver().ResourceDir);
+  llvm::sys::path::append(P, "lib", "darwin");
+  const char *Library = "libclang_rt.profile_osx.a";
 
   // Select the appropriate runtime library for the target.
-  if (isTargetIOSBased())
-    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_ios.a",
-                      /*AlwaysLink*/ true);
-  else
-    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.profile_osx.a",
-                      /*AlwaysLink*/ true);
+  if (isTargetWatchOS()) {
+    Library = "libclang_rt.profile_watchos.a";
+  } else if (isTargetWatchOSSimulator()) {
+    llvm::sys::path::append(P, "libclang_rt.profile_watchossim.a");
+    Library = getVFS().exists(P) ? "libclang_rt.profile_watchossim.a"
+                                 : "libclang_rt.profile_watchos.a";
+  } else if (isTargetTvOS()) {
+    Library = "libclang_rt.profile_tvos.a";
+  } else if (isTargetTvOSSimulator()) {
+    llvm::sys::path::append(P, "libclang_rt.profile_tvossim.a");
+    Library = getVFS().exists(P) ? "libclang_rt.profile_tvossim.a"
+                                 : "libclang_rt.profile_tvos.a";
+  } else if (isTargetIPhoneOS()) {
+    Library = "libclang_rt.profile_ios.a";
+  } else if (isTargetIOSSimulator()) {
+    llvm::sys::path::append(P, "libclang_rt.profile_iossim.a");
+    Library = getVFS().exists(P) ? "libclang_rt.profile_iossim.a"
+                                 : "libclang_rt.profile_ios.a";
+  } else {
+    assert(isTargetMacOS() && "unexpected non MacOS platform");
+  }
+  AddLinkRuntimeLib(Args, CmdArgs, Library,
+                    /*AlwaysLink*/ true);
+  return;
 }
 
 void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
@@ -327,6 +363,7 @@ void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args,
     // Sanitizer runtime libraries requires C++.
     AddCXXStdlibLibArgs(Args, CmdArgs);
   }
+  // ASan is not supported on watchOS.
   assert(isTargetMacOS() || isTargetIOSSimulator());
   StringRef OS = isTargetMacOS() ? "osx" : "iossim";
   AddLinkRuntimeLib(
@@ -374,13 +411,21 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args,
     AddLinkSanitizerLibArgs(Args, CmdArgs, "asan");
   if (Sanitize.needsUbsanRt())
     AddLinkSanitizerLibArgs(Args, CmdArgs, "ubsan");
+  if (Sanitize.needsTsanRt())
+    AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan");
 
   // Otherwise link libSystem, then the dynamic runtime library, and finally any
   // target specific static runtime library.
   CmdArgs.push_back("-lSystem");
 
   // Select the dynamic runtime library and the target specific static library.
-  if (isTargetIOSBased()) {
+  if (isTargetWatchOSBased()) {
+    // We currently always need a static runtime library for watchOS.
+    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.watchos.a");
+  } else if (isTargetTvOSBased()) {
+    // We currently always need a static runtime library for tvOS.
+    AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.tvos.a");
+  } else if (isTargetIOSBased()) {
     // If we are compiling as iOS / simulator, don't attempt to link libgcc_s.1,
     // it never went into the SDK.
     // Linking against libgcc_s.1 isn't needed for iOS 5.0+
@@ -425,13 +470,13 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   // isysroot.
   if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
     // Warn if the path does not exist.
-    if (!llvm::sys::fs::exists(A->getValue()))
+    if (!getVFS().exists(A->getValue()))
       getDriver().Diag(clang::diag::warn_missing_sysroot) << A->getValue();
   } else {
     if (char *env = ::getenv("SDKROOT")) {
       // We only use this value as the default if it is an absolute path,
       // exists, and it is not the root path.
-      if (llvm::sys::path::is_absolute(env) && llvm::sys::fs::exists(env) &&
+      if (llvm::sys::path::is_absolute(env) && getVFS().exists(env) &&
           StringRef(env) != "/") {
         Args.append(Args.MakeSeparateArg(
             nullptr, Opts.getOption(options::OPT_isysroot), env));
@@ -441,25 +486,46 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
 
   Arg *OSXVersion = Args.getLastArg(options::OPT_mmacosx_version_min_EQ);
   Arg *iOSVersion = Args.getLastArg(options::OPT_miphoneos_version_min_EQ);
+  Arg *TvOSVersion = Args.getLastArg(options::OPT_mtvos_version_min_EQ);
+  Arg *WatchOSVersion = Args.getLastArg(options::OPT_mwatchos_version_min_EQ);
 
-  if (OSXVersion && iOSVersion) {
+  if (OSXVersion && (iOSVersion || TvOSVersion || WatchOSVersion)) {
     getDriver().Diag(diag::err_drv_argument_not_allowed_with)
-        << OSXVersion->getAsString(Args) << iOSVersion->getAsString(Args);
-    iOSVersion = nullptr;
-  } else if (!OSXVersion && !iOSVersion) {
+        << OSXVersion->getAsString(Args)
+        << (iOSVersion ? iOSVersion :
+            TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
+    iOSVersion = TvOSVersion = WatchOSVersion = nullptr;
+  } else if (iOSVersion && (TvOSVersion || WatchOSVersion)) {
+    getDriver().Diag(diag::err_drv_argument_not_allowed_with)
+        << iOSVersion->getAsString(Args)
+        << (TvOSVersion ? TvOSVersion : WatchOSVersion)->getAsString(Args);
+    TvOSVersion = WatchOSVersion = nullptr;
+  } else if (TvOSVersion && WatchOSVersion) {
+     getDriver().Diag(diag::err_drv_argument_not_allowed_with)
+        << TvOSVersion->getAsString(Args)
+        << WatchOSVersion->getAsString(Args);
+    WatchOSVersion = nullptr;
+  } else if (!OSXVersion && !iOSVersion && !TvOSVersion && !WatchOSVersion) {
     // If no deployment target was specified on the command line, check for
     // environment defines.
     std::string OSXTarget;
     std::string iOSTarget;
+    std::string TvOSTarget;
+    std::string WatchOSTarget;
+
     if (char *env = ::getenv("MACOSX_DEPLOYMENT_TARGET"))
       OSXTarget = env;
     if (char *env = ::getenv("IPHONEOS_DEPLOYMENT_TARGET"))
       iOSTarget = env;
+    if (char *env = ::getenv("TVOS_DEPLOYMENT_TARGET"))
+      TvOSTarget = env;
+    if (char *env = ::getenv("WATCHOS_DEPLOYMENT_TARGET"))
+      WatchOSTarget = env;
 
     // If there is no command-line argument to specify the Target version and
     // no environment variable defined, see if we can set the default based
     // on -isysroot.
-    if (iOSTarget.empty() && OSXTarget.empty() &&
+    if (OSXTarget.empty() && iOSTarget.empty() && WatchOSTarget.empty() &&
         Args.hasArg(options::OPT_isysroot)) {
       if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
         StringRef isysroot = A->getValue();
@@ -479,6 +545,12 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
               iOSTarget = Version;
             else if (SDK.startswith("MacOSX"))
               OSXTarget = Version;
+            else if (SDK.startswith("WatchOS") ||
+                     SDK.startswith("WatchSimulator"))
+              WatchOSTarget = Version;
+            else if (SDK.startswith("AppleTVOS") ||
+                     SDK.startswith("AppleTVSimulator"))
+              TvOSTarget = Version;
           }
         }
       }
@@ -486,7 +558,8 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
 
     // If no OSX or iOS target has been specified, try to guess platform
     // from arch name and compute the version from the triple.
-    if (OSXTarget.empty() && iOSTarget.empty()) {
+    if (OSXTarget.empty() && iOSTarget.empty() && TvOSTarget.empty() &&
+        WatchOSTarget.empty()) {
       StringRef MachOArchName = getMachOArchName(Args);
       unsigned Major, Minor, Micro;
       if (MachOArchName == "armv7" || MachOArchName == "armv7s" ||
@@ -494,6 +567,10 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
         getTriple().getiOSVersion(Major, Minor, Micro);
         llvm::raw_string_ostream(iOSTarget) << Major << '.' << Minor << '.'
                                             << Micro;
+      } else if (MachOArchName == "armv7k") {
+        getTriple().getWatchOSVersion(Major, Minor, Micro);
+        llvm::raw_string_ostream(WatchOSTarget) << Major << '.' << Minor << '.'
+                                                << Micro;
       } else if (MachOArchName != "armv6m" && MachOArchName != "armv7m" &&
                  MachOArchName != "armv7em") {
         if (!getTriple().getMacOSXVersion(Major, Minor, Micro)) {
@@ -505,15 +582,32 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       }
     }
 
+    // Do not allow conflicts with the watchOS target.
+    if (!WatchOSTarget.empty() && (!iOSTarget.empty() || !TvOSTarget.empty())) {
+      getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
+        << "WATCHOS_DEPLOYMENT_TARGET"
+        << (!iOSTarget.empty() ? "IPHONEOS_DEPLOYMENT_TARGET" :
+            "TVOS_DEPLOYMENT_TARGET");
+    }
+
+    // Do not allow conflicts with the tvOS target.
+    if (!TvOSTarget.empty() && !iOSTarget.empty()) {
+      getDriver().Diag(diag::err_drv_conflicting_deployment_targets)
+        << "TVOS_DEPLOYMENT_TARGET"
+        << "IPHONEOS_DEPLOYMENT_TARGET";
+    }
+
     // Allow conflicts among OSX and iOS for historical reasons, but choose the
     // default platform.
-    if (!OSXTarget.empty() && !iOSTarget.empty()) {
+    if (!OSXTarget.empty() && (!iOSTarget.empty() ||
+                               !WatchOSTarget.empty() ||
+                               !TvOSTarget.empty())) {
       if (getTriple().getArch() == llvm::Triple::arm ||
           getTriple().getArch() == llvm::Triple::aarch64 ||
           getTriple().getArch() == llvm::Triple::thumb)
         OSXTarget = "";
       else
-        iOSTarget = "";
+        iOSTarget = WatchOSTarget = TvOSTarget = "";
     }
 
     if (!OSXTarget.empty()) {
@@ -524,6 +618,14 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
       const Option O = Opts.getOption(options::OPT_miphoneos_version_min_EQ);
       iOSVersion = Args.MakeJoinedArg(nullptr, O, iOSTarget);
       Args.append(iOSVersion);
+    } else if (!TvOSTarget.empty()) {
+      const Option O = Opts.getOption(options::OPT_mtvos_version_min_EQ);
+      TvOSVersion = Args.MakeJoinedArg(nullptr, O, TvOSTarget);
+      Args.append(TvOSVersion);
+    } else if (!WatchOSTarget.empty()) {
+      const Option O = Opts.getOption(options::OPT_mwatchos_version_min_EQ);
+      WatchOSVersion = Args.MakeJoinedArg(nullptr, O, WatchOSTarget);
+      Args.append(WatchOSVersion);
     }
   }
 
@@ -532,6 +634,10 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
     Platform = MacOS;
   else if (iOSVersion)
     Platform = IPhoneOS;
+  else if (TvOSVersion)
+    Platform = TvOS;
+  else if (WatchOSVersion)
+    Platform = WatchOS;
   else
     llvm_unreachable("Unable to infer Darwin variant");
 
@@ -539,7 +645,8 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   unsigned Major, Minor, Micro;
   bool HadExtra;
   if (Platform == MacOS) {
-    assert(!iOSVersion && "Unknown target platform!");
+    assert((!iOSVersion && !TvOSVersion && !WatchOSVersion) &&
+           "Unknown target platform!");
     if (!Driver::GetReleaseVersion(OSXVersion->getValue(), Major, Minor, Micro,
                                    HadExtra) ||
         HadExtra || Major != 10 || Minor >= 100 || Micro >= 100)
@@ -552,6 +659,18 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
         HadExtra || Major >= 10 || Minor >= 100 || Micro >= 100)
       getDriver().Diag(diag::err_drv_invalid_version_number)
           << iOSVersion->getAsString(Args);
+  } else if (Platform == TvOS) {
+    if (!Driver::GetReleaseVersion(TvOSVersion->getValue(), Major, Minor,
+                                   Micro, HadExtra) || HadExtra ||
+        Major >= 10 || Minor >= 100 || Micro >= 100)
+      getDriver().Diag(diag::err_drv_invalid_version_number)
+          << TvOSVersion->getAsString(Args);
+  } else if (Platform == WatchOS) {
+    if (!Driver::GetReleaseVersion(WatchOSVersion->getValue(), Major, Minor,
+                                   Micro, HadExtra) || HadExtra ||
+        Major >= 10 || Minor >= 100 || Micro >= 100)
+      getDriver().Diag(diag::err_drv_invalid_version_number)
+          << WatchOSVersion->getAsString(Args);
   } else
     llvm_unreachable("unknown kind of Darwin platform");
 
@@ -559,6 +678,12 @@ void Darwin::AddDeploymentTarget(DerivedArgList &Args) const {
   if (iOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
                      getTriple().getArch() == llvm::Triple::x86_64))
     Platform = IPhoneOSSimulator;
+  if (TvOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
+                      getTriple().getArch() == llvm::Triple::x86_64))
+    Platform = TvOSSimulator;
+  if (WatchOSVersion && (getTriple().getArch() == llvm::Triple::x86 ||
+                         getTriple().getArch() == llvm::Triple::x86_64))
+    Platform = WatchOSSimulator;
 
   setTarget(Platform, Major, Minor, Micro);
 }
@@ -572,7 +697,7 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
     CmdArgs.push_back("-lc++");
     break;
 
-  case ToolChain::CST_Libstdcxx: {
+  case ToolChain::CST_Libstdcxx:
     // Unfortunately, -lstdc++ doesn't always exist in the standard search path;
     // it was previously found in the gcc lib dir. However, for all the Darwin
     // platforms we care about it was -lstdc++.6, so we search for that
@@ -583,10 +708,10 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
       SmallString<128> P(A->getValue());
       llvm::sys::path::append(P, "usr", "lib", "libstdc++.dylib");
 
-      if (!llvm::sys::fs::exists(P)) {
+      if (!getVFS().exists(P)) {
         llvm::sys::path::remove_filename(P);
         llvm::sys::path::append(P, "libstdc++.6.dylib");
-        if (llvm::sys::fs::exists(P)) {
+        if (getVFS().exists(P)) {
           CmdArgs.push_back(Args.MakeArgString(P));
           return;
         }
@@ -596,8 +721,8 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
     // Otherwise, look in the root.
     // FIXME: This should be removed someday when we don't have to care about
     // 10.6 and earlier, where /usr/lib/libstdc++.dylib does not exist.
-    if (!llvm::sys::fs::exists("/usr/lib/libstdc++.dylib") &&
-        llvm::sys::fs::exists("/usr/lib/libstdc++.6.dylib")) {
+    if (!getVFS().exists("/usr/lib/libstdc++.dylib") &&
+        getVFS().exists("/usr/lib/libstdc++.6.dylib")) {
       CmdArgs.push_back("/usr/lib/libstdc++.6.dylib");
       return;
     }
@@ -606,7 +731,6 @@ void DarwinClang::AddCXXStdlibLibArgs(const ArgList &Args,
     CmdArgs.push_back("-lstdc++");
     break;
   }
-  }
 }
 
 void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
@@ -620,17 +744,19 @@ void DarwinClang::AddCCKextLibArgs(const ArgList &Args,
   llvm::sys::path::append(P, "lib", "darwin");
 
   // Use the newer cc_kext for iOS ARM after 6.0.
-  if (!isTargetIPhoneOS() || isTargetIOSSimulator() ||
-      getTriple().getArch() == llvm::Triple::aarch64 ||
-      !isIPhoneOSVersionLT(6, 0)) {
-    llvm::sys::path::append(P, "libclang_rt.cc_kext.a");
+  if (isTargetWatchOS()) {
+    llvm::sys::path::append(P, "libclang_rt.cc_kext_watchos.a");
+  } else if (isTargetTvOS()) {
+    llvm::sys::path::append(P, "libclang_rt.cc_kext_tvos.a");
+  } else if (isTargetIPhoneOS()) {
+    llvm::sys::path::append(P, "libclang_rt.cc_kext_ios.a");
   } else {
-    llvm::sys::path::append(P, "libclang_rt.cc_kext_ios5.a");
+    llvm::sys::path::append(P, "libclang_rt.cc_kext.a");
   }
 
   // For now, allow missing resource libraries to support developers who may
   // not have compiler-rt checked out or integrated into their build.
-  if (llvm::sys::fs::exists(P))
+  if (getVFS().exists(P))
     CmdArgs.push_back(Args.MakeArgString(P));
 }
 
@@ -856,7 +982,7 @@ void MachO::AddLinkRuntimeLibArgs(const ArgList &Args,
   // { hard-float, soft-float }
   llvm::SmallString<32> CompilerRT = StringRef("libclang_rt.");
   CompilerRT +=
-      tools::arm::getARMFloatABI(getDriver(), Args, getTriple()) == "hard"
+      (tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)
           ? "hard"
           : "soft";
   CompilerRT += Args.hasArg(options::OPT_fPIC) ? "_pic.a" : "_static.a";
@@ -883,8 +1009,9 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
   // FIXME: It would be far better to avoid inserting those -static arguments,
   // but we can't check the deployment target in the translation code until
   // it is set here.
-  if (isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0)) {
-    for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie;) {
+  if (isTargetWatchOSBased() ||
+      (isTargetIOSBased() && !isIPhoneOSVersionLT(6, 0))) {
+    for (ArgList::iterator it = DAL->begin(), ie = DAL->end(); it != ie; ) {
       Arg *A = *it;
       ++it;
       if (A->getOption().getID() != options::OPT_mkernel &&
@@ -900,7 +1027,8 @@ DerivedArgList *Darwin::TranslateArgs(const DerivedArgList &Args,
 
   // Default to use libc++ on OS X 10.9+ and iOS 7+.
   if (((isTargetMacOS() && !isMacosxVersionLT(10, 9)) ||
-       (isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0))) &&
+       (isTargetIOSBased() && !isIPhoneOSVersionLT(7, 0)) ||
+       isTargetWatchOSBased()) &&
       !Args.getLastArg(options::OPT_stdlib_EQ))
     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_stdlib_EQ),
                       "libc++");
@@ -933,10 +1061,14 @@ bool MachO::UseDwarfDebugFlags() const {
   return false;
 }
 
-bool Darwin::UseSjLjExceptions() const {
+bool Darwin::UseSjLjExceptions(const ArgList &Args) const {
   // Darwin uses SjLj exceptions on ARM.
-  return (getTriple().getArch() == llvm::Triple::arm ||
-          getTriple().getArch() == llvm::Triple::thumb);
+  if (getTriple().getArch() != llvm::Triple::arm &&
+      getTriple().getArch() != llvm::Triple::thumb)
+    return false;
+
+  // Only watchOS uses the new DWARF/Compact unwinding method.
+  return !isTargetWatchOS();
 }
 
 bool MachO::isPICDefault() const { return true; }
@@ -957,7 +1089,15 @@ void Darwin::addMinVersionArgs(const ArgList &Args,
                                ArgStringList &CmdArgs) const {
   VersionTuple TargetVersion = getTargetVersion();
 
-  if (isTargetIOSSimulator())
+  if (isTargetWatchOS())
+    CmdArgs.push_back("-watchos_version_min");
+  else if (isTargetWatchOSSimulator())
+    CmdArgs.push_back("-watchos_simulator_version_min");
+  else if (isTargetTvOS())
+    CmdArgs.push_back("-tvos_version_min");
+  else if (isTargetTvOSSimulator())
+    CmdArgs.push_back("-tvos_simulator_version_min");
+  else if (isTargetIOSSimulator())
     CmdArgs.push_back("-ios_simulator_version_min");
   else if (isTargetIOSBased())
     CmdArgs.push_back("-iphoneos_version_min");
@@ -974,7 +1114,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
   // Derived from startfile spec.
   if (Args.hasArg(options::OPT_dynamiclib)) {
     // Derived from darwin_dylib1 spec.
-    if (isTargetIOSSimulator()) {
+    if (isTargetWatchOSBased()) {
+      ; // watchOS does not need dylib1.o.
+    } else if (isTargetIOSSimulator()) {
       ; // iOS simulator does not need dylib1.o.
     } else if (isTargetIPhoneOS()) {
       if (isIPhoneOSVersionLT(3, 1))
@@ -989,7 +1131,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
     if (Args.hasArg(options::OPT_bundle)) {
       if (!Args.hasArg(options::OPT_static)) {
         // Derived from darwin_bundle1 spec.
-        if (isTargetIOSSimulator()) {
+        if (isTargetWatchOSBased()) {
+          ; // watchOS does not need bundle1.o.
+        } else if (isTargetIOSSimulator()) {
           ; // iOS simulator does not need bundle1.o.
         } else if (isTargetIPhoneOS()) {
           if (isIPhoneOSVersionLT(3, 1))
@@ -1024,7 +1168,9 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
           CmdArgs.push_back("-lcrt0.o");
         } else {
           // Derived from darwin_crt1 spec.
-          if (isTargetIOSSimulator()) {
+          if (isTargetWatchOSBased()) {
+            ; // watchOS does not need crt1.o.
+          } else if (isTargetIOSSimulator()) {
             ; // iOS simulator does not need crt1.o.
           } else if (isTargetIPhoneOS()) {
             if (getArch() == llvm::Triple::aarch64)
@@ -1049,6 +1195,7 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
   }
 
   if (!isTargetIPhoneOS() && Args.hasArg(options::OPT_shared_libgcc) &&
+      !isTargetWatchOS() &&
       isMacosxVersionLT(10, 5)) {
     const char *Str = Args.MakeArgString(GetFilePath("crt3.o"));
     CmdArgs.push_back(Str);
@@ -1058,7 +1205,8 @@ void Darwin::addStartObjectFileArgs(const ArgList &Args,
 bool Darwin::SupportsObjCGC() const { return isTargetMacOS(); }
 
 void Darwin::CheckObjCARC() const {
-  if (isTargetIOSBased() || (isTargetMacOS() && !isMacosxVersionLT(10, 6)))
+  if (isTargetIOSBased() || isTargetWatchOSBased() ||
+      (isTargetMacOS() && !isMacosxVersionLT(10, 6)))
     return;
   getDriver().Diag(diag::err_arc_unsupported_on_toolchain);
 }
@@ -1071,6 +1219,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
     if (!isMacosxVersionLT(10, 9))
       Res |= SanitizerKind::Vptr;
     Res |= SanitizerKind::SafeStack;
+    Res |= SanitizerKind::Thread;
   }
   return Res;
 }
@@ -1170,7 +1319,8 @@ static llvm::StringRef getGCCToolchainDir(const ArgList &Args) {
 /// necessary because the driver doesn't store the final version of the target
 /// triple.
 void Generic_GCC::GCCInstallationDetector::init(
-    const Driver &D, const llvm::Triple &TargetTriple, const ArgList &Args) {
+    const llvm::Triple &TargetTriple, const ArgList &Args,
+    ArrayRef<std::string> ExtraTripleAliases) {
   llvm::Triple BiarchVariantTriple = TargetTriple.isArch32Bit()
                                          ? TargetTriple.get64BitArchVariant()
                                          : TargetTriple.get32BitArchVariant();
@@ -1212,20 +1362,22 @@ void Generic_GCC::GCCInstallationDetector::init(
   // installation available. GCC installs are ranked by version number.
   Version = GCCVersion::Parse("0.0.0");
   for (const std::string &Prefix : Prefixes) {
-    if (!llvm::sys::fs::exists(Prefix))
+    if (!D.getVFS().exists(Prefix))
       continue;
-    for (const StringRef Suffix : CandidateLibDirs) {
+    for (StringRef Suffix : CandidateLibDirs) {
       const std::string LibDir = Prefix + Suffix.str();
-      if (!llvm::sys::fs::exists(LibDir))
+      if (!D.getVFS().exists(LibDir))
         continue;
-      for (const StringRef Candidate : CandidateTripleAliases)
+      for (StringRef Candidate : ExtraTripleAliases) // Try these first.
+        ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate);
+      for (StringRef Candidate : CandidateTripleAliases)
         ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate);
     }
-    for (const StringRef Suffix : CandidateBiarchLibDirs) {
+    for (StringRef Suffix : CandidateBiarchLibDirs) {
       const std::string LibDir = Prefix + Suffix.str();
-      if (!llvm::sys::fs::exists(LibDir))
+      if (!D.getVFS().exists(LibDir))
         continue;
-      for (const StringRef Candidate : CandidateBiarchTripleAliases)
+      for (StringRef Candidate : CandidateBiarchTripleAliases)
         ScanLibDirForGCCTriple(TargetTriple, Args, LibDir, Candidate,
                                /*NeedsBiarchSuffix=*/ true);
     }
@@ -1300,8 +1452,9 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
       "i586-linux-gnu"};
 
   static const char *const MIPSLibDirs[] = {"/lib"};
-  static const char *const MIPSTriples[] = {
-      "mips-linux-gnu", "mips-mti-linux-gnu", "mips-img-linux-gnu"};
+  static const char *const MIPSTriples[] = {"mips-linux-gnu", "mips-mti-linux",
+                                            "mips-mti-linux-gnu",
+                                            "mips-img-linux-gnu"};
   static const char *const MIPSELLibDirs[] = {"/lib"};
   static const char *const MIPSELTriples[] = {
       "mipsel-linux-gnu", "mipsel-linux-android", "mips-img-linux-gnu"};
@@ -1340,9 +1493,20 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
       "s390x-linux-gnu", "s390x-unknown-linux-gnu", "s390x-ibm-linux-gnu",
       "s390x-suse-linux", "s390x-redhat-linux"};
 
+  // Solaris.
+  static const char *const SolarisSPARCLibDirs[] = {"/gcc"};
+  static const char *const SolarisSPARCTriples[] = {"sparc-sun-solaris2.11",
+                                                    "i386-pc-solaris2.11"};
+
   using std::begin;
   using std::end;
 
+  if (TargetTriple.getOS() == llvm::Triple::Solaris) {
+    LibDirs.append(begin(SolarisSPARCLibDirs), end(SolarisSPARCLibDirs));
+    TripleAliases.append(begin(SolarisSPARCTriples), end(SolarisSPARCTriples));
+    return;
+  }
+
   switch (TargetTriple.getArch()) {
   case llvm::Triple::aarch64:
     LibDirs.append(begin(AArch64LibDirs), end(AArch64LibDirs));
@@ -1436,6 +1600,7 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
     TripleAliases.append(begin(PPC64LETriples), end(PPC64LETriples));
     break;
   case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
     LibDirs.append(begin(SPARCv8LibDirs), end(SPARCv8LibDirs));
     TripleAliases.append(begin(SPARCv8Triples), end(SPARCv8Triples));
     BiarchLibDirs.append(begin(SPARCv9LibDirs), end(SPARCv9LibDirs));
@@ -1451,7 +1616,6 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
     LibDirs.append(begin(SystemZLibDirs), end(SystemZLibDirs));
     TripleAliases.append(begin(SystemZTriples), end(SystemZTriples));
     break;
-
   default:
     // By default, just rely on the standard lib directories and the original
     // triple.
@@ -1467,15 +1631,83 @@ bool Generic_GCC::GCCInstallationDetector::getBiarchSibling(Multilib &M) const {
     BiarchTripleAliases.push_back(BiarchTriple.str());
 }
 
+// \brief -- try common CUDA installation paths looking for files we need for
+// CUDA compilation.
+
+void Generic_GCC::CudaInstallationDetector::init(
+    const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args) {
+  SmallVector<std::string, 4> CudaPathCandidates;
+
+  if (Args.hasArg(options::OPT_cuda_path_EQ))
+    CudaPathCandidates.push_back(
+        Args.getLastArgValue(options::OPT_cuda_path_EQ));
+  else {
+    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda");
+    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.5");
+    CudaPathCandidates.push_back(D.SysRoot + "/usr/local/cuda-7.0");
+  }
+
+  for (const auto &CudaPath : CudaPathCandidates) {
+    if (CudaPath.empty() || !D.getVFS().exists(CudaPath))
+      continue;
+
+    CudaInstallPath = CudaPath;
+    CudaIncludePath = CudaInstallPath + "/include";
+    CudaLibDevicePath = CudaInstallPath + "/nvvm/libdevice";
+    CudaLibPath =
+        CudaInstallPath + (TargetTriple.isArch64Bit() ? "/lib64" : "/lib");
+
+    if (!(D.getVFS().exists(CudaIncludePath) &&
+          D.getVFS().exists(CudaLibPath) &&
+          D.getVFS().exists(CudaLibDevicePath)))
+      continue;
+
+    std::error_code EC;
+    for (llvm::sys::fs::directory_iterator LI(CudaLibDevicePath, EC), LE;
+         !EC && LI != LE; LI = LI.increment(EC)) {
+      StringRef FilePath = LI->path();
+      StringRef FileName = llvm::sys::path::filename(FilePath);
+      // Process all bitcode filenames that look like libdevice.compute_XX.YY.bc
+      const StringRef LibDeviceName = "libdevice.";
+      if (!(FileName.startswith(LibDeviceName) && FileName.endswith(".bc")))
+        continue;
+      StringRef GpuArch = FileName.slice(
+          LibDeviceName.size(), FileName.find('.', LibDeviceName.size()));
+      CudaLibDeviceMap[GpuArch] = FilePath.str();
+      // Insert map entries for specifc devices with this compute capability.
+      if (GpuArch == "compute_20") {
+        CudaLibDeviceMap["sm_20"] = FilePath;
+        CudaLibDeviceMap["sm_21"] = FilePath;
+      } else if (GpuArch == "compute_30") {
+        CudaLibDeviceMap["sm_30"] = FilePath;
+        CudaLibDeviceMap["sm_32"] = FilePath;
+      } else if (GpuArch == "compute_35") {
+        CudaLibDeviceMap["sm_35"] = FilePath;
+        CudaLibDeviceMap["sm_37"] = FilePath;
+      }
+    }
+
+    IsValid = true;
+    break;
+  }
+}
+
+void Generic_GCC::CudaInstallationDetector::print(raw_ostream &OS) const {
+  if (isValid())
+    OS << "Found CUDA installation: " << CudaInstallPath << "\n";
+}
+
 namespace {
 // Filter to remove Multilibs that don't exist as a suffix to Path
 class FilterNonExistent {
   StringRef Base;
+  vfs::FileSystem &VFS;
 
 public:
-  FilterNonExistent(StringRef Base) : Base(Base) {}
+  FilterNonExistent(StringRef Base, vfs::FileSystem &VFS)
+      : Base(Base), VFS(VFS) {}
   bool operator()(const Multilib &M) {
-    return !llvm::sys::fs::exists(Base + M.gccSuffix() + "/crtbegin.o");
+    return !VFS.exists(Base + M.gccSuffix() + "/crtbegin.o");
   }
 };
 } // end anonymous namespace
@@ -1515,6 +1747,7 @@ static bool isMicroMips(const ArgList &Args) {
   return A && A->getOption().matches(options::OPT_mmicromips);
 }
 
+namespace {
 struct DetectedMultilibs {
   /// The set of multilibs that the detected installation supports.
   MultilibSet Multilibs;
@@ -1526,13 +1759,15 @@ struct DetectedMultilibs {
   /// targeting the non-default multilib. Otherwise, it is empty.
   llvm::Optional<Multilib> BiarchSibling;
 };
+} // end anonymous namespace
 
 static Multilib makeMultilib(StringRef commonSuffix) {
   return Multilib(commonSuffix, commonSuffix, commonSuffix);
 }
 
-static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
-                              const ArgList &Args, DetectedMultilibs &Result) {
+static bool findMIPSMultilibs(const Driver &D, const llvm::Triple &TargetTriple,
+                              StringRef Path, const ArgList &Args,
+                              DetectedMultilibs &Result) {
   // Some MIPS toolchains put libraries and object files compiled
   // using different options in to the sub-directoris which names
   // reflects the flags used for compilation. For example sysroot
@@ -1558,7 +1793,7 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
   //     /usr
   //       /lib  <= crt*.o files compiled with '-mips32'
 
-  FilterNonExistent NonExistent(Path);
+  FilterNonExistent NonExistent(Path, D.getVFS());
 
   // Check for FSF toolchain multilibs
   MultilibSet FSFMipsMultilibs;
@@ -1636,6 +1871,32 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
             });
   }
 
+  // Check for Musl toolchain multilibs
+  MultilibSet MuslMipsMultilibs;
+  {
+    auto MArchMipsR2 = makeMultilib("")
+                           .osSuffix("/mips-r2-hard-musl")
+                           .flag("+EB")
+                           .flag("-EL")
+                           .flag("+march=mips32r2");
+
+    auto MArchMipselR2 = makeMultilib("/mipsel-r2-hard-musl")
+                             .flag("-EB")
+                             .flag("+EL")
+                             .flag("+march=mips32r2");
+
+    MuslMipsMultilibs = MultilibSet().Either(MArchMipsR2, MArchMipselR2);
+
+    // Specify the callback that computes the include directories.
+    MuslMipsMultilibs.setIncludeDirsCallback([](
+        StringRef InstallDir, StringRef TripleStr, const Multilib &M) {
+      std::vector<std::string> Dirs;
+      Dirs.push_back(
+          (InstallDir + "/../sysroot" + M.osSuffix() + "/usr/include").str());
+      return Dirs;
+    });
+  }
+
   // Check for Code Sourcery toolchain multilibs
   MultilibSet CSMipsMultilibs;
   {
@@ -1754,7 +2015,7 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
   addMultilibFlag(isMips16(Args), "mips16", Flags);
   addMultilibFlag(CPUName == "mips32", "march=mips32", Flags);
   addMultilibFlag(CPUName == "mips32r2" || CPUName == "mips32r3" ||
-                      CPUName == "mips32r5",
+                      CPUName == "mips32r5" || CPUName == "p5600",
                   "march=mips32r2", Flags);
   addMultilibFlag(CPUName == "mips32r6", "march=mips32r6", Flags);
   addMultilibFlag(CPUName == "mips64", "march=mips64", Flags);
@@ -1772,7 +2033,7 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
   addMultilibFlag(isMipsEL(TargetArch), "EL", Flags);
   addMultilibFlag(!isMipsEL(TargetArch), "EB", Flags);
 
-  if (TargetTriple.getEnvironment() == llvm::Triple::Android) {
+  if (TargetTriple.isAndroid()) {
     // Select Android toolchain. It's the only choice in that case.
     if (AndroidMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
       Result.Multilibs = AndroidMipsMultilibs;
@@ -1781,6 +2042,16 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
     return false;
   }
 
+  if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies &&
+      TargetTriple.getOS() == llvm::Triple::Linux &&
+      TargetTriple.getEnvironment() == llvm::Triple::UnknownEnvironment) {
+    if (MuslMipsMultilibs.select(Flags, Result.SelectedMultilib)) {
+      Result.Multilibs = MuslMipsMultilibs;
+      return true;
+    }
+    return false;
+  }
+
   if (TargetTriple.getVendor() == llvm::Triple::ImaginationTechnologies &&
       TargetTriple.getOS() == llvm::Triple::Linux &&
       TargetTriple.getEnvironment() == llvm::Triple::GNU) {
@@ -1823,11 +2094,11 @@ static bool findMIPSMultilibs(const llvm::Triple &TargetTriple, StringRef Path,
   return false;
 }
 
-static bool findBiarchMultilibs(const llvm::Triple &TargetTriple,
+static bool findBiarchMultilibs(const Driver &D,
+                                const llvm::Triple &TargetTriple,
                                 StringRef Path, const ArgList &Args,
                                 bool NeedsBiarchSuffix,
                                 DetectedMultilibs &Result) {
-
   // Some versions of SUSE and Fedora on ppc64 put 32-bit libs
   // in what would normally be GCCInstallPath and put the 64-bit
   // libs in a subdirectory named 64. The simple logic we follow is that
@@ -1855,7 +2126,7 @@ static bool findBiarchMultilibs(const llvm::Triple &TargetTriple,
                         .flag("-m64")
                         .flag("+mx32");
 
-  FilterNonExistent NonExistent(Path);
+  FilterNonExistent NonExistent(Path, D.getVFS());
 
   // Determine default multilib from: 32, 64, x32
   // Also handle cases such as 64 on 32, 32 on 64, etc.
@@ -1907,6 +2178,56 @@ static bool findBiarchMultilibs(const llvm::Triple &TargetTriple,
   return true;
 }
 
+void Generic_GCC::GCCInstallationDetector::scanLibDirForGCCTripleSolaris(
+    const llvm::Triple &TargetArch, const llvm::opt::ArgList &Args,
+    const std::string &LibDir, StringRef CandidateTriple,
+    bool NeedsBiarchSuffix) {
+  // Solaris is a special case. The GCC installation is under
+  // /usr/gcc/<major>.<minor>/lib/gcc/<triple>/<major>.<minor>.<patch>/, so we
+  // need to iterate twice.
+  std::error_code EC;
+  for (vfs::directory_iterator LI = D.getVFS().dir_begin(LibDir, EC), LE;
+       !EC && LI != LE; LI = LI.increment(EC)) {
+    StringRef VersionText = llvm::sys::path::filename(LI->getName());
+    GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
+
+    if (CandidateVersion.Major != -1) // Filter obviously bad entries.
+      if (!CandidateGCCInstallPaths.insert(LI->getName()).second)
+        continue; // Saw this path before; no need to look at it again.
+    if (CandidateVersion.isOlderThan(4, 1, 1))
+      continue;
+    if (CandidateVersion <= Version)
+      continue;
+
+    GCCInstallPath =
+        LibDir + "/" + VersionText.str() + "/lib/gcc/" + CandidateTriple.str();
+    if (!D.getVFS().exists(GCCInstallPath))
+      continue;
+
+    // If we make it here there has to be at least one GCC version, let's just
+    // use the latest one.
+    std::error_code EEC;
+    for (vfs::directory_iterator
+             LLI = D.getVFS().dir_begin(GCCInstallPath, EEC),
+             LLE;
+         !EEC && LLI != LLE; LLI = LLI.increment(EEC)) {
+
+      StringRef SubVersionText = llvm::sys::path::filename(LLI->getName());
+      GCCVersion CandidateSubVersion = GCCVersion::Parse(SubVersionText);
+
+      if (CandidateSubVersion > Version)
+        Version = CandidateSubVersion;
+    }
+
+    GCCTriple.setTriple(CandidateTriple);
+
+    GCCInstallPath += "/" + Version.Text;
+    GCCParentLibPath = GCCInstallPath + "/../../../../";
+
+    IsValid = true;
+  }
+}
+
 void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
     const llvm::Triple &TargetTriple, const ArgList &Args,
     const std::string &LibDir, StringRef CandidateTriple,
@@ -1914,41 +2235,48 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
   llvm::Triple::ArchType TargetArch = TargetTriple.getArch();
   // There are various different suffixes involving the triple we
   // check for. We also record what is necessary to walk from each back
-  // up to the lib directory.
-  const std::string LibSuffixes[] = {
-      "/gcc/" + CandidateTriple.str(),
+  // up to the lib directory. Specifically, the number of "up" steps
+  // in the second half of each row is 1 + the number of path separators
+  // in the first half.
+  const std::string LibAndInstallSuffixes[][2] = {
+      {"/gcc/" + CandidateTriple.str(), "/../../.."},
+
       // Debian puts cross-compilers in gcc-cross
-      "/gcc-cross/" + CandidateTriple.str(),
-      "/" + CandidateTriple.str() + "/gcc/" + CandidateTriple.str(),
+      {"/gcc-cross/" + CandidateTriple.str(), "/../../.."},
+
+      {"/" + CandidateTriple.str() + "/gcc/" + CandidateTriple.str(),
+       "/../../../.."},
 
       // The Freescale PPC SDK has the gcc libraries in
       // <sysroot>/usr/lib/<triple>/x.y.z so have a look there as well.
-      "/" + CandidateTriple.str(),
+      {"/" + CandidateTriple.str(), "/../.."},
 
       // Ubuntu has a strange mis-matched pair of triples that this happens to
       // match.
       // FIXME: It may be worthwhile to generalize this and look for a second
       // triple.
-      "/i386-linux-gnu/gcc/" + CandidateTriple.str()};
-  const std::string InstallSuffixes[] = {
-      "/../../..",    // gcc/
-      "/../../..",    // gcc-cross/
-      "/../../../..", // <triple>/gcc/
-      "/../..",       // <triple>/
-      "/../../../.."  // i386-linux-gnu/gcc/<triple>/
-  };
+      {"/i386-linux-gnu/gcc/" + CandidateTriple.str(), "/../../../.."}};
+
+  if (TargetTriple.getOS() == llvm::Triple::Solaris) {
+    scanLibDirForGCCTripleSolaris(TargetTriple, Args, LibDir, CandidateTriple,
+                                  NeedsBiarchSuffix);
+    return;
+  }
+
   // Only look at the final, weird Ubuntu suffix for i386-linux-gnu.
-  const unsigned NumLibSuffixes =
-      (llvm::array_lengthof(LibSuffixes) - (TargetArch != llvm::Triple::x86));
+  const unsigned NumLibSuffixes = (llvm::array_lengthof(LibAndInstallSuffixes) -
+                                   (TargetArch != llvm::Triple::x86));
   for (unsigned i = 0; i < NumLibSuffixes; ++i) {
-    StringRef LibSuffix = LibSuffixes[i];
+    StringRef LibSuffix = LibAndInstallSuffixes[i][0];
     std::error_code EC;
-    for (llvm::sys::fs::directory_iterator LI(LibDir + LibSuffix, EC), LE;
+    for (vfs::directory_iterator
+             LI = D.getVFS().dir_begin(LibDir + LibSuffix, EC),
+             LE;
          !EC && LI != LE; LI = LI.increment(EC)) {
-      StringRef VersionText = llvm::sys::path::filename(LI->path());
+      StringRef VersionText = llvm::sys::path::filename(LI->getName());
       GCCVersion CandidateVersion = GCCVersion::Parse(VersionText);
       if (CandidateVersion.Major != -1) // Filter obviously bad entries.
-        if (!CandidateGCCInstallPaths.insert(LI->path()).second)
+        if (!CandidateGCCInstallPaths.insert(LI->getName()).second)
           continue; // Saw this path before; no need to look at it again.
       if (CandidateVersion.isOlderThan(4, 1, 1))
         continue;
@@ -1960,9 +2288,9 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
       // Debian mips multilibs behave more like the rest of the biarch ones,
       // so handle them there
       if (isMipsArch(TargetArch)) {
-        if (!findMIPSMultilibs(TargetTriple, LI->path(), Args, Detected))
+        if (!findMIPSMultilibs(D, TargetTriple, LI->getName(), Args, Detected))
           continue;
-      } else if (!findBiarchMultilibs(TargetTriple, LI->path(), Args,
+      } else if (!findBiarchMultilibs(D, TargetTriple, LI->getName(), Args,
                                       NeedsBiarchSuffix, Detected)) {
         continue;
       }
@@ -1975,8 +2303,9 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
       // FIXME: We hack together the directory name here instead of
       // using LI to ensure stable path separators across Windows and
       // Linux.
-      GCCInstallPath = LibDir + LibSuffixes[i] + "/" + VersionText.str();
-      GCCParentLibPath = GCCInstallPath + InstallSuffixes[i];
+      GCCInstallPath =
+          LibDir + LibAndInstallSuffixes[i][0] + "/" + VersionText.str();
+      GCCParentLibPath = GCCInstallPath + LibAndInstallSuffixes[i][1];
       IsValid = true;
     }
   }
@@ -1984,7 +2313,7 @@ void Generic_GCC::GCCInstallationDetector::ScanLibDirForGCCTriple(
 
 Generic_GCC::Generic_GCC(const Driver &D, const llvm::Triple &Triple,
                          const ArgList &Args)
-    : ToolChain(D, Triple, Args), GCCInstallation() {
+    : ToolChain(D, Triple, Args), GCCInstallation(D), CudaInstallation(D) {
   getProgramPaths().push_back(getDriver().getInstalledDir());
   if (getDriver().getInstalledDir() != getDriver().Dir)
     getProgramPaths().push_back(getDriver().Dir);
@@ -2016,6 +2345,7 @@ Tool *Generic_GCC::buildLinker() const { return new tools::gcc::Linker(*this); }
 void Generic_GCC::printVerboseInfo(raw_ostream &OS) const {
   // Print the information about how we detected the GCC installation.
   GCCInstallation.print(OS);
+  CudaInstallation.print(OS);
 }
 
 bool Generic_GCC::IsUnwindTablesDefault() const {
@@ -2047,9 +2377,6 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
-  case llvm::Triple::sparc:
-  case llvm::Triple::sparcel:
-  case llvm::Triple::sparcv9:
   case llvm::Triple::systemz:
     return true;
   default:
@@ -2057,6 +2384,40 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const {
   }
 }
 
+/// \brief Helper to add the variant paths of a libstdc++ installation.
+bool Generic_GCC::addLibStdCXXIncludePaths(
+    Twine Base, Twine Suffix, StringRef GCCTriple, StringRef GCCMultiarchTriple,
+    StringRef TargetMultiarchTriple, Twine IncludeSuffix,
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+  if (!getVFS().exists(Base + Suffix))
+    return false;
+
+  addSystemInclude(DriverArgs, CC1Args, Base + Suffix);
+
+  // The vanilla GCC layout of libstdc++ headers uses a triple subdirectory. If
+  // that path exists or we have neither a GCC nor target multiarch triple, use
+  // this vanilla search path.
+  if ((GCCMultiarchTriple.empty() && TargetMultiarchTriple.empty()) ||
+      getVFS().exists(Base + Suffix + "/" + GCCTriple + IncludeSuffix)) {
+    addSystemInclude(DriverArgs, CC1Args,
+                     Base + Suffix + "/" + GCCTriple + IncludeSuffix);
+  } else {
+    // Otherwise try to use multiarch naming schemes which have normalized the
+    // triples and put the triple before the suffix.
+    //
+    // GCC surprisingly uses *both* the GCC triple with a multilib suffix and
+    // the target triple, so we support that here.
+    addSystemInclude(DriverArgs, CC1Args,
+                     Base + "/" + GCCMultiarchTriple + Suffix + IncludeSuffix);
+    addSystemInclude(DriverArgs, CC1Args,
+                     Base + "/" + TargetMultiarchTriple + Suffix);
+  }
+
+  addSystemInclude(DriverArgs, CC1Args, Base + Suffix + "/backward");
+  return true;
+}
+
+
 void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
                                         ArgStringList &CC1Args) const {
   const Generic_GCC::GCCVersion &V = GCCInstallation.getVersion();
@@ -2064,238 +2425,324 @@ void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs,
       getTriple().getArch() == llvm::Triple::aarch64 ||
       getTriple().getArch() == llvm::Triple::aarch64_be ||
       (getTriple().getOS() == llvm::Triple::Linux &&
-       (!V.isOlderThan(4, 7, 0) ||
-        getTriple().getEnvironment() == llvm::Triple::Android)) ||
-      getTriple().getOS() == llvm::Triple::NaCl;
+       (!V.isOlderThan(4, 7, 0) || getTriple().isAndroid())) ||
+      getTriple().getOS() == llvm::Triple::NaCl ||
+      (getTriple().getVendor() == llvm::Triple::MipsTechnologies &&
+       !getTriple().hasEnvironment());
 
   if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
                          options::OPT_fno_use_init_array, UseInitArrayDefault))
     CC1Args.push_back("-fuse-init-array");
 }
 
+/// Mips Toolchain
+MipsLLVMToolChain::MipsLLVMToolChain(const Driver &D,
+                                     const llvm::Triple &Triple,
+                                     const ArgList &Args)
+    : Linux(D, Triple, Args) {
+  // Select the correct multilib according to the given arguments.
+  DetectedMultilibs Result;
+  findMIPSMultilibs(D, Triple, "", Args, Result);
+  Multilibs = Result.Multilibs;
+  SelectedMultilib = Result.SelectedMultilib;
+
+  // Find out the library suffix based on the ABI.
+  LibSuffix = tools::mips::getMipsABILibSuffix(Args, Triple);
+  getFilePaths().clear();
+  getFilePaths().push_back(computeSysRoot() + "/usr/lib" + LibSuffix);
+
+  // Use LLD by default.
+  DefaultLinker = "lld";
+}
+
+void MipsLLVMToolChain::AddClangSystemIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdinc))
+    return;
+
+  const Driver &D = getDriver();
+
+  if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
+    SmallString<128> P(D.ResourceDir);
+    llvm::sys::path::append(P, "include");
+    addSystemInclude(DriverArgs, CC1Args, P);
+  }
+
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc))
+    return;
+
+  const auto &Callback = Multilibs.includeDirsCallback();
+  if (Callback) {
+    const auto IncludePaths =
+        Callback(D.getInstalledDir(), getTripleString(), SelectedMultilib);
+    for (const auto &Path : IncludePaths)
+      addExternCSystemIncludeIfExists(DriverArgs, CC1Args, Path);
+  }
+}
+
+Tool *MipsLLVMToolChain::buildLinker() const {
+  return new tools::gnutools::Linker(*this);
+}
+
+std::string MipsLLVMToolChain::computeSysRoot() const {
+  if (!getDriver().SysRoot.empty())
+    return getDriver().SysRoot + SelectedMultilib.osSuffix();
+
+  const std::string InstalledDir(getDriver().getInstalledDir());
+  std::string SysRootPath =
+      InstalledDir + "/../sysroot" + SelectedMultilib.osSuffix();
+  if (llvm::sys::fs::exists(SysRootPath))
+    return SysRootPath;
+
+  return std::string();
+}
+
+ToolChain::CXXStdlibType
+MipsLLVMToolChain::GetCXXStdlibType(const ArgList &Args) const {
+  Arg *A = Args.getLastArg(options::OPT_stdlib_EQ);
+  if (A) {
+    StringRef Value = A->getValue();
+    if (Value != "libc++")
+      getDriver().Diag(diag::err_drv_invalid_stdlib_name)
+          << A->getAsString(Args);
+  }
+
+  return ToolChain::CST_Libcxx;
+}
+
+void MipsLLVMToolChain::AddClangCXXStdlibIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  assert((GetCXXStdlibType(DriverArgs) == ToolChain::CST_Libcxx) &&
+         "Only -lc++ (aka libcxx) is suported in this toolchain.");
+
+  const auto &Callback = Multilibs.includeDirsCallback();
+  if (Callback) {
+    const auto IncludePaths = Callback(getDriver().getInstalledDir(),
+                                       getTripleString(), SelectedMultilib);
+    for (const auto &Path : IncludePaths) {
+      if (llvm::sys::fs::exists(Path + "/c++/v1")) {
+        addSystemInclude(DriverArgs, CC1Args, Path + "/c++/v1");
+        break;
+      }
+    }
+  }
+}
+
+void MipsLLVMToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
+                                            ArgStringList &CmdArgs) const {
+  assert((GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) &&
+         "Only -lc++ (aka libxx) is suported in this toolchain.");
+
+  CmdArgs.push_back("-lc++");
+  CmdArgs.push_back("-lc++abi");
+  CmdArgs.push_back("-lunwind");
+}
+
+std::string MipsLLVMToolChain::getCompilerRT(const ArgList &Args,
+                                             StringRef Component,
+                                             bool Shared) const {
+  SmallString<128> Path(getDriver().ResourceDir);
+  llvm::sys::path::append(Path, SelectedMultilib.osSuffix(), "lib" + LibSuffix,
+                          getOS());
+  llvm::sys::path::append(Path, Twine("libclang_rt." + Component + "-" +
+                                      "mips" + (Shared ? ".so" : ".a")));
+  return Path.str();
+}
+
 /// Hexagon Toolchain
 
-std::string Hexagon_TC::GetGnuDir(const std::string &InstalledDir,
-                                  const ArgList &Args) {
+std::string HexagonToolChain::getHexagonTargetDir(
+      const std::string &InstalledDir,
+      const SmallVectorImpl<std::string> &PrefixDirs) const {
+  std::string InstallRelDir;
+  const Driver &D = getDriver();
 
   // Locate the rest of the toolchain ...
-  std::string GccToolchain = getGCCToolchainDir(Args);
+  for (auto &I : PrefixDirs)
+    if (D.getVFS().exists(I))
+      return I;
 
-  if (!GccToolchain.empty())
-    return GccToolchain;
-
-  std::string InstallRelDir = InstalledDir + "/../../gnu";
-  if (llvm::sys::fs::exists(InstallRelDir))
+  if (getVFS().exists(InstallRelDir = InstalledDir + "/../target"))
     return InstallRelDir;
 
-  std::string PrefixRelDir = std::string(LLVM_PREFIX) + "/../gnu";
-  if (llvm::sys::fs::exists(PrefixRelDir))
+  std::string PrefixRelDir = std::string(LLVM_PREFIX) + "/target";
+  if (getVFS().exists(PrefixRelDir))
     return PrefixRelDir;
 
   return InstallRelDir;
 }
 
-const char *Hexagon_TC::GetSmallDataThreshold(const ArgList &Args) {
-  Arg *A;
 
-  A = Args.getLastArg(options::OPT_G, options::OPT_G_EQ,
-                      options::OPT_msmall_data_threshold_EQ);
-  if (A)
-    return A->getValue();
+Optional<unsigned> HexagonToolChain::getSmallDataThreshold(
+      const ArgList &Args) {
+  StringRef Gn = "";
+  if (Arg *A = Args.getLastArg(options::OPT_G, options::OPT_G_EQ,
+                               options::OPT_msmall_data_threshold_EQ)) {
+    Gn = A->getValue();
+  } else if (Args.getLastArg(options::OPT_shared, options::OPT_fpic,
+                             options::OPT_fPIC)) {
+    Gn = "0";
+  }
 
-  A = Args.getLastArg(options::OPT_shared, options::OPT_fpic,
-                      options::OPT_fPIC);
-  if (A)
-    return "0";
+  unsigned G;
+  if (!Gn.getAsInteger(10, G))
+    return G;
 
-  return 0;
+  return None;
 }
 
-bool Hexagon_TC::UsesG0(const char *smallDataThreshold) {
-  return smallDataThreshold && smallDataThreshold[0] == '0';
-}
 
-static void GetHexagonLibraryPaths(const ArgList &Args, const std::string &Ver,
-                                   const std::string &MarchString,
-                                   const std::string &InstalledDir,
-                                   ToolChain::path_list *LibPaths) {
-  bool buildingLib = Args.hasArg(options::OPT_shared);
+void HexagonToolChain::getHexagonLibraryPaths(const ArgList &Args,
+      ToolChain::path_list &LibPaths) const {
+  const Driver &D = getDriver();
 
   //----------------------------------------------------------------------------
   // -L Args
   //----------------------------------------------------------------------------
   for (Arg *A : Args.filtered(options::OPT_L))
     for (const char *Value : A->getValues())
-      LibPaths->push_back(Value);
+      LibPaths.push_back(Value);
 
   //----------------------------------------------------------------------------
   // Other standard paths
   //----------------------------------------------------------------------------
-  const std::string MarchSuffix = "/" + MarchString;
-  const std::string G0Suffix = "/G0";
-  const std::string MarchG0Suffix = MarchSuffix + G0Suffix;
-  const std::string RootDir = Hexagon_TC::GetGnuDir(InstalledDir, Args) + "/";
+  std::vector<std::string> RootDirs;
+  std::copy(D.PrefixDirs.begin(), D.PrefixDirs.end(), RootDirs.begin());
 
-  // lib/gcc/hexagon/...
-  std::string LibGCCHexagonDir = RootDir + "lib/gcc/hexagon/";
-  if (buildingLib) {
-    LibPaths->push_back(LibGCCHexagonDir + Ver + MarchG0Suffix);
-    LibPaths->push_back(LibGCCHexagonDir + Ver + G0Suffix);
+  std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
+                                              D.PrefixDirs);
+  if (std::find(RootDirs.begin(), RootDirs.end(), TargetDir) == RootDirs.end())
+    RootDirs.push_back(TargetDir);
+
+  bool HasPIC = Args.hasArg(options::OPT_fpic, options::OPT_fPIC);
+  // Assume G0 with -shared.
+  bool HasG0 = Args.hasArg(options::OPT_shared);
+  if (auto G = getSmallDataThreshold(Args))
+    HasG0 = G.getValue() == 0;
+
+  const std::string CpuVer = GetTargetCPUVersion(Args).str();
+  for (auto &Dir : RootDirs) {
+    std::string LibDir = Dir + "/hexagon/lib";
+    std::string LibDirCpu = LibDir + '/' + CpuVer;
+    if (HasG0) {
+      if (HasPIC)
+        LibPaths.push_back(LibDirCpu + "/G0/pic");
+      LibPaths.push_back(LibDirCpu + "/G0");
+    }
+    LibPaths.push_back(LibDirCpu);
+    LibPaths.push_back(LibDir);
   }
-  LibPaths->push_back(LibGCCHexagonDir + Ver + MarchSuffix);
-  LibPaths->push_back(LibGCCHexagonDir + Ver);
-
-  // lib/gcc/...
-  LibPaths->push_back(RootDir + "lib/gcc");
-
-  // hexagon/lib/...
-  std::string HexagonLibDir = RootDir + "hexagon/lib";
-  if (buildingLib) {
-    LibPaths->push_back(HexagonLibDir + MarchG0Suffix);
-    LibPaths->push_back(HexagonLibDir + G0Suffix);
-  }
-  LibPaths->push_back(HexagonLibDir + MarchSuffix);
-  LibPaths->push_back(HexagonLibDir);
 }
 
-Hexagon_TC::Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
-                       const ArgList &Args)
+HexagonToolChain::HexagonToolChain(const Driver &D, const llvm::Triple &Triple,
+                                   const llvm::opt::ArgList &Args)
     : Linux(D, Triple, Args) {
-  const std::string InstalledDir(getDriver().getInstalledDir());
-  const std::string GnuDir = Hexagon_TC::GetGnuDir(InstalledDir, Args);
+  const std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
+                                                    D.PrefixDirs);
 
   // Note: Generic_GCC::Generic_GCC adds InstalledDir and getDriver().Dir to
   // program paths
-  const std::string BinDir(GnuDir + "/bin");
-  if (llvm::sys::fs::exists(BinDir))
+  const std::string BinDir(TargetDir + "/bin");
+  if (D.getVFS().exists(BinDir))
     getProgramPaths().push_back(BinDir);
 
-  // Determine version of GCC libraries and headers to use.
-  const std::string HexagonDir(GnuDir + "/lib/gcc/hexagon");
-  std::error_code ec;
-  GCCVersion MaxVersion = GCCVersion::Parse("0.0.0");
-  for (llvm::sys::fs::directory_iterator di(HexagonDir, ec), de;
-       !ec && di != de; di = di.increment(ec)) {
-    GCCVersion cv = GCCVersion::Parse(llvm::sys::path::filename(di->path()));
-    if (MaxVersion < cv)
-      MaxVersion = cv;
-  }
-  GCCLibAndIncVersion = MaxVersion;
-
-  ToolChain::path_list *LibPaths = &getFilePaths();
+  ToolChain::path_list &LibPaths = getFilePaths();
 
   // Remove paths added by Linux toolchain. Currently Hexagon_TC really targets
   // 'elf' OS type, so the Linux paths are not appropriate. When we actually
   // support 'linux' we'll need to fix this up
-  LibPaths->clear();
-
-  GetHexagonLibraryPaths(Args, GetGCCLibAndIncVersion(), GetTargetCPU(Args),
-                         InstalledDir, LibPaths);
+  LibPaths.clear();
+  getHexagonLibraryPaths(Args, LibPaths);
 }
 
-Hexagon_TC::~Hexagon_TC() {}
+HexagonToolChain::~HexagonToolChain() {}
 
-Tool *Hexagon_TC::buildAssembler() const {
+Tool *HexagonToolChain::buildAssembler() const {
   return new tools::hexagon::Assembler(*this);
 }
 
-Tool *Hexagon_TC::buildLinker() const {
+Tool *HexagonToolChain::buildLinker() const {
   return new tools::hexagon::Linker(*this);
 }
 
-void Hexagon_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
-                                           ArgStringList &CC1Args) const {
-  const Driver &D = getDriver();
-
+void HexagonToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                                 ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc) ||
       DriverArgs.hasArg(options::OPT_nostdlibinc))
     return;
 
-  std::string Ver(GetGCCLibAndIncVersion());
-  std::string GnuDir = Hexagon_TC::GetGnuDir(D.InstalledDir, DriverArgs);
-  std::string HexagonDir(GnuDir + "/lib/gcc/hexagon/" + Ver);
-  addExternCSystemInclude(DriverArgs, CC1Args, HexagonDir + "/include");
-  addExternCSystemInclude(DriverArgs, CC1Args, HexagonDir + "/include-fixed");
-  addExternCSystemInclude(DriverArgs, CC1Args, GnuDir + "/hexagon/include");
+  const Driver &D = getDriver();
+  std::string TargetDir = getHexagonTargetDir(D.getInstalledDir(),
+                                              D.PrefixDirs);
+  addExternCSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include");
 }
 
-void Hexagon_TC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
-                                              ArgStringList &CC1Args) const {
-
+void HexagonToolChain::AddClangCXXStdlibIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
       DriverArgs.hasArg(options::OPT_nostdincxx))
     return;
 
   const Driver &D = getDriver();
-  std::string Ver(GetGCCLibAndIncVersion());
-  SmallString<128> IncludeDir(
-      Hexagon_TC::GetGnuDir(D.InstalledDir, DriverArgs));
-
-  llvm::sys::path::append(IncludeDir, "hexagon/include/c++/");
-  llvm::sys::path::append(IncludeDir, Ver);
-  addSystemInclude(DriverArgs, CC1Args, IncludeDir);
+  std::string TargetDir = getHexagonTargetDir(D.InstalledDir, D.PrefixDirs);
+  addSystemInclude(DriverArgs, CC1Args, TargetDir + "/hexagon/include/c++");
 }
 
 ToolChain::CXXStdlibType
-Hexagon_TC::GetCXXStdlibType(const ArgList &Args) const {
+HexagonToolChain::GetCXXStdlibType(const ArgList &Args) const {
   Arg *A = Args.getLastArg(options::OPT_stdlib_EQ);
   if (!A)
     return ToolChain::CST_Libstdcxx;
 
   StringRef Value = A->getValue();
-  if (Value != "libstdc++") {
+  if (Value != "libstdc++")
     getDriver().Diag(diag::err_drv_invalid_stdlib_name) << A->getAsString(Args);
-  }
 
   return ToolChain::CST_Libstdcxx;
 }
 
-static int getHexagonVersion(const ArgList &Args) {
-  Arg *A = Args.getLastArg(options::OPT_march_EQ, options::OPT_mcpu_EQ);
-  // Select the default CPU (v4) if none was given.
-  if (!A)
-    return 4;
-
-  // FIXME: produce errors if we cannot parse the version.
-  StringRef WhichHexagon = A->getValue();
-  if (WhichHexagon.startswith("hexagonv")) {
-    int Val;
-    if (!WhichHexagon.substr(sizeof("hexagonv") - 1).getAsInteger(10, Val))
-      return Val;
-  }
-  if (WhichHexagon.startswith("v")) {
-    int Val;
-    if (!WhichHexagon.substr(1).getAsInteger(10, Val))
-      return Val;
-  }
-
-  // FIXME: should probably be an error.
-  return 4;
+//
+// Returns the default CPU for Hexagon. This is the default compilation target
+// if no Hexagon processor is selected at the command-line.
+//
+const StringRef HexagonToolChain::GetDefaultCPU() {
+  return "hexagonv60";
 }
 
-StringRef Hexagon_TC::GetTargetCPU(const ArgList &Args) {
-  int V = getHexagonVersion(Args);
-  // FIXME: We don't support versions < 4. We should error on them.
-  switch (V) {
-  default:
-    llvm_unreachable("Unexpected version");
-  case 5:
-    return "v5";
-  case 4:
-    return "v4";
-  case 3:
-    return "v3";
-  case 2:
-    return "v2";
-  case 1:
-    return "v1";
+const StringRef HexagonToolChain::GetTargetCPUVersion(const ArgList &Args) {
+  Arg *CpuArg = nullptr;
+
+  for (auto &A : Args) {
+    if (A->getOption().matches(options::OPT_mcpu_EQ)) {
+      CpuArg = A;
+      A->claim();
+    }
   }
+
+  StringRef CPU = CpuArg ? CpuArg->getValue() : GetDefaultCPU();
+  if (CPU.startswith("hexagon"))
+    return CPU.substr(sizeof("hexagon") - 1);
+  return CPU;
 }
 // End Hexagon
 
+/// AMDGPU Toolchain
+AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
+                                 const ArgList &Args)
+  : Generic_ELF(D, Triple, Args) { }
+
+Tool *AMDGPUToolChain::buildLinker() const {
+  return new tools::amdgpu::Linker(*this);
+}
+// End AMDGPU
+
 /// NaCl Toolchain
-NaCl_TC::NaCl_TC(const Driver &D, const llvm::Triple &Triple,
-                 const ArgList &Args)
+NaClToolChain::NaClToolChain(const Driver &D, const llvm::Triple &Triple,
+                             const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
 
   // Remove paths added by Generic_GCC. NaCl Toolchain cannot use the
@@ -2317,45 +2764,39 @@ NaCl_TC::NaCl_TC(const Driver &D, const llvm::Triple &Triple,
   std::string ToolPath(getDriver().ResourceDir + "/lib/");
 
   switch (Triple.getArch()) {
-  case llvm::Triple::x86: {
+  case llvm::Triple::x86:
     file_paths.push_back(FilePath + "x86_64-nacl/lib32");
-    file_paths.push_back(FilePath + "x86_64-nacl/usr/lib32");
+    file_paths.push_back(FilePath + "i686-nacl/usr/lib");
     prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
     file_paths.push_back(ToolPath + "i686-nacl");
     break;
-  }
-  case llvm::Triple::x86_64: {
+  case llvm::Triple::x86_64:
     file_paths.push_back(FilePath + "x86_64-nacl/lib");
     file_paths.push_back(FilePath + "x86_64-nacl/usr/lib");
     prog_paths.push_back(ProgPath + "x86_64-nacl/bin");
     file_paths.push_back(ToolPath + "x86_64-nacl");
     break;
-  }
-  case llvm::Triple::arm: {
+  case llvm::Triple::arm:
     file_paths.push_back(FilePath + "arm-nacl/lib");
     file_paths.push_back(FilePath + "arm-nacl/usr/lib");
     prog_paths.push_back(ProgPath + "arm-nacl/bin");
     file_paths.push_back(ToolPath + "arm-nacl");
     break;
-  }
-  case llvm::Triple::mipsel: {
+  case llvm::Triple::mipsel:
     file_paths.push_back(FilePath + "mipsel-nacl/lib");
     file_paths.push_back(FilePath + "mipsel-nacl/usr/lib");
     prog_paths.push_back(ProgPath + "bin");
     file_paths.push_back(ToolPath + "mipsel-nacl");
     break;
-  }
   default:
     break;
   }
 
-  // Use provided linker, not system linker
-  Linker = GetProgramPath("ld");
   NaClArmMacrosPath = GetFilePath("nacl-arm-macros.s");
 }
 
-void NaCl_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
-                                        ArgStringList &CC1Args) const {
+void NaClToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                              ArgStringList &CC1Args) const {
   const Driver &D = getDriver();
   if (DriverArgs.hasArg(options::OPT_nostdinc))
     return;
@@ -2371,12 +2812,21 @@ void NaCl_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
 
   SmallString<128> P(D.Dir + "/../");
   switch (getTriple().getArch()) {
+  case llvm::Triple::x86:
+    // x86 is special because multilib style uses x86_64-nacl/include for libc
+    // headers but the SDK wants i686-nacl/usr/include. The other architectures
+    // have the same substring.
+    llvm::sys::path::append(P, "i686-nacl/usr/include");
+    addSystemInclude(DriverArgs, CC1Args, P.str());
+    llvm::sys::path::remove_filename(P);
+    llvm::sys::path::remove_filename(P);
+    llvm::sys::path::remove_filename(P);
+    llvm::sys::path::append(P, "x86_64-nacl/include");
+    addSystemInclude(DriverArgs, CC1Args, P.str());
+    return;
   case llvm::Triple::arm:
     llvm::sys::path::append(P, "arm-nacl/usr/include");
     break;
-  case llvm::Triple::x86:
-    llvm::sys::path::append(P, "x86_64-nacl/usr/include");
-    break;
   case llvm::Triple::x86_64:
     llvm::sys::path::append(P, "x86_64-nacl/usr/include");
     break;
@@ -2394,16 +2844,16 @@ void NaCl_TC::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   addSystemInclude(DriverArgs, CC1Args, P.str());
 }
 
-void NaCl_TC::AddCXXStdlibLibArgs(const ArgList &Args,
-                                  ArgStringList &CmdArgs) const {
+void NaClToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
+                                        ArgStringList &CmdArgs) const {
   // Check for -stdlib= flags. We only support libc++ but this consumes the arg
   // if the value is libc++, and emits an error for other values.
   GetCXXStdlibType(Args);
   CmdArgs.push_back("-lc++");
 }
 
-void NaCl_TC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
-                                           ArgStringList &CC1Args) const {
+void NaClToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
+                                                 ArgStringList &CC1Args) const {
   const Driver &D = getDriver();
   if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
       DriverArgs.hasArg(options::OPT_nostdincxx))
@@ -2436,7 +2886,8 @@ void NaCl_TC::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   }
 }
 
-ToolChain::CXXStdlibType NaCl_TC::GetCXXStdlibType(const ArgList &Args) const {
+ToolChain::CXXStdlibType
+NaClToolChain::GetCXXStdlibType(const ArgList &Args) const {
   if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) {
     StringRef Value = A->getValue();
     if (Value == "libc++")
@@ -2447,8 +2898,9 @@ ToolChain::CXXStdlibType NaCl_TC::GetCXXStdlibType(const ArgList &Args) const {
   return ToolChain::CST_Libcxx;
 }
 
-std::string NaCl_TC::ComputeEffectiveClangTriple(const ArgList &Args,
-                                                 types::ID InputType) const {
+std::string
+NaClToolChain::ComputeEffectiveClangTriple(const ArgList &Args,
+                                           types::ID InputType) const {
   llvm::Triple TheTriple(ComputeLLVMTriple(Args, InputType));
   if (TheTriple.getArch() == llvm::Triple::arm &&
       TheTriple.getEnvironment() == llvm::Triple::UnknownEnvironment)
@@ -2456,11 +2908,11 @@ std::string NaCl_TC::ComputeEffectiveClangTriple(const ArgList &Args,
   return TheTriple.getTriple();
 }
 
-Tool *NaCl_TC::buildLinker() const {
+Tool *NaClToolChain::buildLinker() const {
   return new tools::nacltools::Linker(*this);
 }
 
-Tool *NaCl_TC::buildAssembler() const {
+Tool *NaClToolChain::buildAssembler() const {
   if (getTriple().getArch() == llvm::Triple::arm)
     return new tools::nacltools::AssemblerARM(*this);
   return new tools::gnutools::Assembler(*this);
@@ -2619,7 +3071,7 @@ FreeBSD::FreeBSD(const Driver &D, const llvm::Triple &Triple,
   // back to '/usr/lib' if it doesn't exist.
   if ((Triple.getArch() == llvm::Triple::x86 ||
        Triple.getArch() == llvm::Triple::ppc) &&
-      llvm::sys::fs::exists(getDriver().SysRoot + "/usr/lib32/crt1.o"))
+      D.getVFS().exists(getDriver().SysRoot + "/usr/lib32/crt1.o"))
     getFilePaths().push_back(getDriver().SysRoot + "/usr/lib32");
   else
     getFilePaths().push_back(getDriver().SysRoot + "/usr/lib");
@@ -2666,7 +3118,7 @@ Tool *FreeBSD::buildAssembler() const {
 
 Tool *FreeBSD::buildLinker() const { return new tools::freebsd::Linker(*this); }
 
-bool FreeBSD::UseSjLjExceptions() const {
+bool FreeBSD::UseSjLjExceptions(const ArgList &Args) const {
   // FreeBSD uses SjLj exceptions on ARM oabi.
   switch (getTriple().getEnvironment()) {
   case llvm::Triple::GNUEABIHF:
@@ -2829,18 +3281,46 @@ Tool *Minix::buildAssembler() const {
 
 Tool *Minix::buildLinker() const { return new tools::minix::Linker(*this); }
 
+static void addPathIfExists(const Driver &D, const Twine &Path,
+                            ToolChain::path_list &Paths) {
+  if (D.getVFS().exists(Path))
+    Paths.push_back(Path.str());
+}
+
 /// Solaris - Solaris tool chain which can call as(1) and ld(1) directly.
 
 Solaris::Solaris(const Driver &D, const llvm::Triple &Triple,
                  const ArgList &Args)
     : Generic_GCC(D, Triple, Args) {
 
-  getProgramPaths().push_back(getDriver().getInstalledDir());
-  if (getDriver().getInstalledDir() != getDriver().Dir)
-    getProgramPaths().push_back(getDriver().Dir);
+  GCCInstallation.init(Triple, Args);
 
-  getFilePaths().push_back(getDriver().Dir + "/../lib");
-  getFilePaths().push_back("/usr/lib");
+  path_list &Paths = getFilePaths();
+  if (GCCInstallation.isValid())
+    addPathIfExists(D, GCCInstallation.getInstallPath(), Paths);
+
+  addPathIfExists(D, getDriver().getInstalledDir(), Paths);
+  if (getDriver().getInstalledDir() != getDriver().Dir)
+    addPathIfExists(D, getDriver().Dir, Paths);
+
+  addPathIfExists(D, getDriver().SysRoot + getDriver().Dir + "/../lib", Paths);
+
+  std::string LibPath = "/usr/lib/";
+  switch (Triple.getArch()) {
+  case llvm::Triple::x86:
+  case llvm::Triple::sparc:
+    break;
+  case llvm::Triple::x86_64:
+    LibPath += "amd64/";
+    break;
+  case llvm::Triple::sparcv9:
+    LibPath += "sparcv9/";
+    break;
+  default:
+    llvm_unreachable("Unsupported architecture");
+  }
+
+  addPathIfExists(D, getDriver().SysRoot + LibPath, Paths);
 }
 
 Tool *Solaris::buildAssembler() const {
@@ -2849,6 +3329,31 @@ Tool *Solaris::buildAssembler() const {
 
 Tool *Solaris::buildLinker() const { return new tools::solaris::Linker(*this); }
 
+void Solaris::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
+                                           ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  // Include the support directory for things like xlocale and fudged system
+  // headers.
+  addSystemInclude(DriverArgs, CC1Args, "/usr/include/c++/v1/support/solaris");
+
+  if (GCCInstallation.isValid()) {
+    GCCVersion Version = GCCInstallation.getVersion();
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/usr/gcc/" +
+                     Version.MajorStr + "." +
+                     Version.MinorStr +
+                     "/include/c++/" + Version.Text);
+    addSystemInclude(DriverArgs, CC1Args,
+                     getDriver().SysRoot + "/usr/gcc/" + Version.MajorStr +
+                     "." + Version.MinorStr + "/include/c++/" +
+                     Version.Text + "/" +
+                     GCCInstallation.getTriple().str());
+  }
+}
+
 /// Distribution (very bare-bones at the moment).
 
 enum Distro {
@@ -2884,6 +3389,7 @@ enum Distro {
   UbuntuUtopic,
   UbuntuVivid,
   UbuntuWily,
+  UbuntuXenial,
   UnknownDistro
 };
 
@@ -2898,10 +3404,10 @@ static bool IsDebian(enum Distro Distro) {
 }
 
 static bool IsUbuntu(enum Distro Distro) {
-  return Distro >= UbuntuHardy && Distro <= UbuntuWily;
+  return Distro >= UbuntuHardy && Distro <= UbuntuXenial;
 }
 
-static Distro DetectDistro(llvm::Triple::ArchType Arch) {
+static Distro DetectDistro(const Driver &D, llvm::Triple::ArchType Arch) {
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> File =
       llvm::MemoryBuffer::getFile("/etc/lsb-release");
   if (File) {
@@ -2909,7 +3415,7 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
     SmallVector<StringRef, 16> Lines;
     Data.split(Lines, "\n");
     Distro Version = UnknownDistro;
-    for (const StringRef Line : Lines)
+    for (StringRef Line : Lines)
       if (Version == UnknownDistro && Line.startswith("DISTRIB_CODENAME="))
         Version = llvm::StringSwitch<Distro>(Line.substr(17))
                       .Case("hardy", UbuntuHardy)
@@ -2928,6 +3434,7 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
                       .Case("utopic", UbuntuUtopic)
                       .Case("vivid", UbuntuVivid)
                       .Case("wily", UbuntuWily)
+                      .Case("xenial", UbuntuXenial)
                       .Default(UnknownDistro);
     return Version;
   }
@@ -2967,13 +3474,13 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
     return UnknownDistro;
   }
 
-  if (llvm::sys::fs::exists("/etc/SuSE-release"))
+  if (D.getVFS().exists("/etc/SuSE-release"))
     return OpenSUSE;
 
-  if (llvm::sys::fs::exists("/etc/exherbo-release"))
+  if (D.getVFS().exists("/etc/exherbo-release"))
     return Exherbo;
 
-  if (llvm::sys::fs::exists("/etc/arch-release"))
+  if (D.getVFS().exists("/etc/arch-release"))
     return ArchLinux;
 
   return UnknownDistro;
@@ -2985,9 +3492,11 @@ static Distro DetectDistro(llvm::Triple::ArchType Arch) {
 /// a target-triple directory in the library and header search paths.
 /// Unfortunately, this triple does not align with the vanilla target triple,
 /// so we provide a rough mapping here.
-static std::string getMultiarchTriple(const llvm::Triple &TargetTriple,
+static std::string getMultiarchTriple(const Driver &D,
+                                      const llvm::Triple &TargetTriple,
                                       StringRef SysRoot) {
-  llvm::Triple::EnvironmentType TargetEnvironment = TargetTriple.getEnvironment();
+  llvm::Triple::EnvironmentType TargetEnvironment =
+      TargetTriple.getEnvironment();
 
   // For most architectures, just use whatever we have rather than trying to be
   // clever.
@@ -3002,92 +3511,91 @@ static std::string getMultiarchTriple(const llvm::Triple &TargetTriple,
   case llvm::Triple::arm:
   case llvm::Triple::thumb:
     if (TargetEnvironment == llvm::Triple::GNUEABIHF) {
-      if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabihf"))
+      if (D.getVFS().exists(SysRoot + "/lib/arm-linux-gnueabihf"))
         return "arm-linux-gnueabihf";
     } else {
-      if (llvm::sys::fs::exists(SysRoot + "/lib/arm-linux-gnueabi"))
+      if (D.getVFS().exists(SysRoot + "/lib/arm-linux-gnueabi"))
         return "arm-linux-gnueabi";
     }
     break;
   case llvm::Triple::armeb:
   case llvm::Triple::thumbeb:
     if (TargetEnvironment == llvm::Triple::GNUEABIHF) {
-      if (llvm::sys::fs::exists(SysRoot + "/lib/armeb-linux-gnueabihf"))
+      if (D.getVFS().exists(SysRoot + "/lib/armeb-linux-gnueabihf"))
         return "armeb-linux-gnueabihf";
     } else {
-      if (llvm::sys::fs::exists(SysRoot + "/lib/armeb-linux-gnueabi"))
+      if (D.getVFS().exists(SysRoot + "/lib/armeb-linux-gnueabi"))
         return "armeb-linux-gnueabi";
     }
     break;
   case llvm::Triple::x86:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/i386-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/i386-linux-gnu"))
       return "i386-linux-gnu";
     break;
   case llvm::Triple::x86_64:
     // We don't want this for x32, otherwise it will match x86_64 libs
     if (TargetEnvironment != llvm::Triple::GNUX32 &&
-        llvm::sys::fs::exists(SysRoot + "/lib/x86_64-linux-gnu"))
+        D.getVFS().exists(SysRoot + "/lib/x86_64-linux-gnu"))
       return "x86_64-linux-gnu";
     break;
   case llvm::Triple::aarch64:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/aarch64-linux-gnu"))
       return "aarch64-linux-gnu";
     break;
   case llvm::Triple::aarch64_be:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/aarch64_be-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/aarch64_be-linux-gnu"))
       return "aarch64_be-linux-gnu";
     break;
   case llvm::Triple::mips:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mips-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/mips-linux-gnu"))
       return "mips-linux-gnu";
     break;
   case llvm::Triple::mipsel:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mipsel-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/mipsel-linux-gnu"))
       return "mipsel-linux-gnu";
     break;
   case llvm::Triple::mips64:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mips64-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/mips64-linux-gnu"))
       return "mips64-linux-gnu";
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mips64-linux-gnuabi64"))
+    if (D.getVFS().exists(SysRoot + "/lib/mips64-linux-gnuabi64"))
       return "mips64-linux-gnuabi64";
     break;
   case llvm::Triple::mips64el:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mips64el-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/mips64el-linux-gnu"))
       return "mips64el-linux-gnu";
-    if (llvm::sys::fs::exists(SysRoot + "/lib/mips64el-linux-gnuabi64"))
+    if (D.getVFS().exists(SysRoot + "/lib/mips64el-linux-gnuabi64"))
       return "mips64el-linux-gnuabi64";
     break;
   case llvm::Triple::ppc:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnuspe"))
+    if (D.getVFS().exists(SysRoot + "/lib/powerpc-linux-gnuspe"))
       return "powerpc-linux-gnuspe";
-    if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/powerpc-linux-gnu"))
       return "powerpc-linux-gnu";
     break;
   case llvm::Triple::ppc64:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/powerpc64-linux-gnu"))
       return "powerpc64-linux-gnu";
     break;
   case llvm::Triple::ppc64le:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/powerpc64le-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/powerpc64le-linux-gnu"))
       return "powerpc64le-linux-gnu";
     break;
   case llvm::Triple::sparc:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/sparc-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/sparc-linux-gnu"))
       return "sparc-linux-gnu";
     break;
   case llvm::Triple::sparcv9:
-    if (llvm::sys::fs::exists(SysRoot + "/lib/sparc64-linux-gnu"))
+    if (D.getVFS().exists(SysRoot + "/lib/sparc64-linux-gnu"))
       return "sparc64-linux-gnu";
     break;
+  case llvm::Triple::systemz:
+    if (D.getVFS().exists(SysRoot + "/lib/s390x-linux-gnu"))
+      return "s390x-linux-gnu";
+    break;
   }
   return TargetTriple.str();
 }
 
-static void addPathIfExists(Twine Path, ToolChain::path_list &Paths) {
-  if (llvm::sys::fs::exists(Path))
-    Paths.push_back(Path.str());
-}
-
 static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
   if (isMipsArch(Triple.getArch())) {
     // lib32 directory has a special meaning on MIPS targets.
@@ -3120,7 +3628,8 @@ static StringRef getOSLibDir(const llvm::Triple &Triple, const ArgList &Args) {
 
 Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : Generic_ELF(D, Triple, Args) {
-  GCCInstallation.init(D, Triple, Args);
+  GCCInstallation.init(Triple, Args);
+  CudaInstallation.init(Triple, Args);
   Multilibs = GCCInstallation.getMultilibs();
   llvm::Triple::ArchType Arch = Triple.getArch();
   std::string SysRoot = computeSysRoot();
@@ -3138,9 +3647,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
                          GCCInstallation.getTriple().str() + "/bin")
                        .str());
 
-  Linker = GetLinkerPath();
-
-  Distro Distro = DetectDistro(Arch);
+  Distro Distro = DetectDistro(D, Arch);
 
   if (IsOpenSUSE(Distro) || IsUbuntu(Distro)) {
     ExtraOpts.push_back("-z");
@@ -3150,7 +3657,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)
     ExtraOpts.push_back("-X");
 
-  const bool IsAndroid = Triple.getEnvironment() == llvm::Triple::Android;
+  const bool IsAndroid = Triple.isAndroid();
   const bool IsMips = isMipsArch(Arch);
 
   if (IsMips && !SysRoot.empty())
@@ -3190,7 +3697,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   path_list &Paths = getFilePaths();
 
   const std::string OSLibDir = getOSLibDir(Triple, Args);
-  const std::string MultiarchTriple = getMultiarchTriple(Triple, SysRoot);
+  const std::string MultiarchTriple = getMultiarchTriple(D, Triple, SysRoot);
 
   // Add the multilib suffixed paths where they are available.
   if (GCCInstallation.isValid()) {
@@ -3200,7 +3707,7 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
 
     // Sourcery CodeBench MIPS toolchain holds some libraries under
     // a biarch-like suffix of the GCC installation.
-    addPathIfExists((GCCInstallation.getInstallPath() + Multilib.gccSuffix()),
+    addPathIfExists(D, GCCInstallation.getInstallPath() + Multilib.gccSuffix(),
                     Paths);
 
     // GCC cross compiling toolchains will install target libraries which ship
@@ -3221,8 +3728,8 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     //
     // Note that this matches the GCC behavior. See the below comment for where
     // Clang diverges from GCC's behavior.
-    addPathIfExists(LibPath + "/../" + GCCTriple.str() + "/lib/../" + OSLibDir +
-                        Multilib.osSuffix(),
+    addPathIfExists(D, LibPath + "/../" + GCCTriple.str() + "/lib/../" +
+                           OSLibDir + Multilib.osSuffix(),
                     Paths);
 
     // If the GCC installation we found is inside of the sysroot, we want to
@@ -3235,8 +3742,8 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     // configurations but this seems somewhere between questionable and simply
     // a bug.
     if (StringRef(LibPath).startswith(SysRoot)) {
-      addPathIfExists(LibPath + "/" + MultiarchTriple, Paths);
-      addPathIfExists(LibPath + "/../" + OSLibDir, Paths);
+      addPathIfExists(D, LibPath + "/" + MultiarchTriple, Paths);
+      addPathIfExists(D, LibPath + "/../" + OSLibDir, Paths);
     }
   }
 
@@ -3246,27 +3753,29 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   // FIXME: It's not clear whether we should use the driver's installed
   // directory ('Dir' below) or the ResourceDir.
   if (StringRef(D.Dir).startswith(SysRoot)) {
-    addPathIfExists(D.Dir + "/../lib/" + MultiarchTriple, Paths);
-    addPathIfExists(D.Dir + "/../" + OSLibDir, Paths);
+    addPathIfExists(D, D.Dir + "/../lib/" + MultiarchTriple, Paths);
+    addPathIfExists(D, D.Dir + "/../" + OSLibDir, Paths);
   }
 
-  addPathIfExists(SysRoot + "/lib/" + MultiarchTriple, Paths);
-  addPathIfExists(SysRoot + "/lib/../" + OSLibDir, Paths);
-  addPathIfExists(SysRoot + "/usr/lib/" + MultiarchTriple, Paths);
-  addPathIfExists(SysRoot + "/usr/lib/../" + OSLibDir, Paths);
+  addPathIfExists(D, SysRoot + "/lib/" + MultiarchTriple, Paths);
+  addPathIfExists(D, SysRoot + "/lib/../" + OSLibDir, Paths);
+  addPathIfExists(D, SysRoot + "/usr/lib/" + MultiarchTriple, Paths);
+  addPathIfExists(D, SysRoot + "/usr/lib/../" + OSLibDir, Paths);
 
   // Try walking via the GCC triple path in case of biarch or multiarch GCC
   // installations with strange symlinks.
   if (GCCInstallation.isValid()) {
-    addPathIfExists(SysRoot + "/usr/lib/" + GCCInstallation.getTriple().str() +
+    addPathIfExists(D,
+                    SysRoot + "/usr/lib/" + GCCInstallation.getTriple().str() +
                         "/../../" + OSLibDir,
                     Paths);
 
     // Add the 'other' biarch variant path
     Multilib BiarchSibling;
     if (GCCInstallation.getBiarchSibling(BiarchSibling)) {
-      addPathIfExists(
-          GCCInstallation.getInstallPath() + BiarchSibling.gccSuffix(), Paths);
+      addPathIfExists(D, GCCInstallation.getInstallPath() +
+                             BiarchSibling.gccSuffix(),
+                      Paths);
     }
 
     // See comments above on the multilib variant for details of why this is
@@ -3274,14 +3783,14 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     const std::string &LibPath = GCCInstallation.getParentLibPath();
     const llvm::Triple &GCCTriple = GCCInstallation.getTriple();
     const Multilib &Multilib = GCCInstallation.getMultilib();
-    addPathIfExists(LibPath + "/../" + GCCTriple.str() + "/lib" +
-                        Multilib.osSuffix(),
+    addPathIfExists(D, LibPath + "/../" + GCCTriple.str() + "/lib" +
+                           Multilib.osSuffix(),
                     Paths);
 
     // See comments above on the multilib variant for details of why this is
     // only included from within the sysroot.
     if (StringRef(LibPath).startswith(SysRoot))
-      addPathIfExists(LibPath, Paths);
+      addPathIfExists(D, LibPath, Paths);
   }
 
   // Similar to the logic for GCC above, if we are currently running Clang
@@ -3290,10 +3799,10 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
   // FIXME: It's not clear whether we should use the driver's installed
   // directory ('Dir' below) or the ResourceDir.
   if (StringRef(D.Dir).startswith(SysRoot))
-    addPathIfExists(D.Dir + "/../lib", Paths);
+    addPathIfExists(D, D.Dir + "/../lib", Paths);
 
-  addPathIfExists(SysRoot + "/lib", Paths);
-  addPathIfExists(SysRoot + "/usr/lib", Paths);
+  addPathIfExists(D, SysRoot + "/lib", Paths);
+  addPathIfExists(D, SysRoot + "/usr/lib", Paths);
 }
 
 bool Linux::HasNativeLLVMSupport() const { return true; }
@@ -3323,12 +3832,12 @@ std::string Linux::computeSysRoot() const {
       (InstallDir + "/../../../../" + TripleStr + "/libc" + Multilib.osSuffix())
           .str();
 
-  if (llvm::sys::fs::exists(Path))
+  if (getVFS().exists(Path))
     return Path;
 
   Path = (InstallDir + "/../../../../sysroot" + Multilib.osSuffix()).str();
 
-  if (llvm::sys::fs::exists(Path))
+  if (getVFS().exists(Path))
     return Path;
 
   return std::string();
@@ -3404,6 +3913,10 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
       "/usr/include/arm-linux-gnueabi"};
   const StringRef ARMHFMultiarchIncludeDirs[] = {
       "/usr/include/arm-linux-gnueabihf"};
+  const StringRef ARMEBMultiarchIncludeDirs[] = {
+      "/usr/include/armeb-linux-gnueabi"};
+  const StringRef ARMEBHFMultiarchIncludeDirs[] = {
+      "/usr/include/armeb-linux-gnueabihf"};
   const StringRef MIPSMultiarchIncludeDirs[] = {"/usr/include/mips-linux-gnu"};
   const StringRef MIPSELMultiarchIncludeDirs[] = {
       "/usr/include/mipsel-linux-gnu"};
@@ -3422,6 +3935,8 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
       "/usr/include/sparc-linux-gnu"};
   const StringRef Sparc64MultiarchIncludeDirs[] = {
       "/usr/include/sparc64-linux-gnu"};
+  const StringRef SYSTEMZMultiarchIncludeDirs[] = {
+      "/usr/include/s390x-linux-gnu"};
   ArrayRef<StringRef> MultiarchIncludeDirs;
   switch (getTriple().getArch()) {
   case llvm::Triple::x86_64:
@@ -3435,11 +3950,19 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     MultiarchIncludeDirs = AArch64MultiarchIncludeDirs;
     break;
   case llvm::Triple::arm:
+  case llvm::Triple::thumb:
     if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
       MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs;
     else
       MultiarchIncludeDirs = ARMMultiarchIncludeDirs;
     break;
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumbeb:
+    if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF)
+      MultiarchIncludeDirs = ARMEBHFMultiarchIncludeDirs;
+    else
+      MultiarchIncludeDirs = ARMEBMultiarchIncludeDirs;
+    break;
   case llvm::Triple::mips:
     MultiarchIncludeDirs = MIPSMultiarchIncludeDirs;
     break;
@@ -3467,11 +3990,14 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   case llvm::Triple::sparcv9:
     MultiarchIncludeDirs = Sparc64MultiarchIncludeDirs;
     break;
+  case llvm::Triple::systemz:
+    MultiarchIncludeDirs = SYSTEMZMultiarchIncludeDirs;
+    break;
   default:
     break;
   }
   for (StringRef Dir : MultiarchIncludeDirs) {
-    if (llvm::sys::fs::exists(SysRoot + Dir)) {
+    if (D.getVFS().exists(SysRoot + Dir)) {
       addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + Dir);
       break;
     }
@@ -3488,37 +4014,24 @@ void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include");
 }
 
-/// \brief Helper to add the variant paths of a libstdc++ installation.
-/*static*/ bool Linux::addLibStdCXXIncludePaths(
-    Twine Base, Twine Suffix, StringRef GCCTriple, StringRef GCCMultiarchTriple,
-    StringRef TargetMultiarchTriple, Twine IncludeSuffix,
-    const ArgList &DriverArgs, ArgStringList &CC1Args) {
-  if (!llvm::sys::fs::exists(Base + Suffix))
-    return false;
 
-  addSystemInclude(DriverArgs, CC1Args, Base + Suffix);
-
-  // The vanilla GCC layout of libstdc++ headers uses a triple subdirectory. If
-  // that path exists or we have neither a GCC nor target multiarch triple, use
-  // this vanilla search path.
-  if ((GCCMultiarchTriple.empty() && TargetMultiarchTriple.empty()) ||
-      llvm::sys::fs::exists(Base + Suffix + "/" + GCCTriple + IncludeSuffix)) {
-    addSystemInclude(DriverArgs, CC1Args,
-                     Base + Suffix + "/" + GCCTriple + IncludeSuffix);
-  } else {
-    // Otherwise try to use multiarch naming schemes which have normalized the
-    // triples and put the triple before the suffix.
-    //
-    // GCC surprisingly uses *both* the GCC triple with a multilib suffix and
-    // the target triple, so we support that here.
-    addSystemInclude(DriverArgs, CC1Args,
-                     Base + "/" + GCCMultiarchTriple + Suffix + IncludeSuffix);
-    addSystemInclude(DriverArgs, CC1Args,
-                     Base + "/" + TargetMultiarchTriple + Suffix);
+static std::string DetectLibcxxIncludePath(StringRef base) {
+  std::error_code EC;
+  int MaxVersion = 0;
+  std::string MaxVersionString = "";
+  for (llvm::sys::fs::directory_iterator LI(base, EC), LE; !EC && LI != LE;
+       LI = LI.increment(EC)) {
+    StringRef VersionText = llvm::sys::path::filename(LI->path());
+    int Version;
+    if (VersionText[0] == 'v' &&
+        !VersionText.slice(1, StringRef::npos).getAsInteger(10, Version)) {
+      if (Version > MaxVersion) {
+        MaxVersion = Version;
+        MaxVersionString = VersionText;
+      }
+    }
   }
-
-  addSystemInclude(DriverArgs, CC1Args, Base + Suffix + "/backward");
-  return true;
+  return MaxVersion ? (base + "/" + MaxVersionString).str() : "";
 }
 
 void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
@@ -3530,17 +4043,14 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   // Check if libc++ has been enabled and provide its include paths if so.
   if (GetCXXStdlibType(DriverArgs) == ToolChain::CST_Libcxx) {
     const std::string LibCXXIncludePathCandidates[] = {
-        // The primary location is within the Clang installation.
-        // FIXME: We shouldn't hard code 'v1' here to make Clang future proof to
-        // newer ABI versions.
-        getDriver().Dir + "/../include/c++/v1",
+        DetectLibcxxIncludePath(getDriver().Dir + "/../include/c++"),
 
         // We also check the system as for a long time this is the only place
         // Clang looked.
         // FIXME: We should really remove this. It doesn't make any sense.
-        getDriver().SysRoot + "/usr/include/c++/v1"};
+        DetectLibcxxIncludePath(getDriver().SysRoot + "/usr/include/c++")};
     for (const auto &IncludePath : LibCXXIncludePathCandidates) {
-      if (!llvm::sys::fs::exists(IncludePath))
+      if (IncludePath.empty() || !getVFS().exists(IncludePath))
         continue;
       // Add the first candidate that exists.
       addSystemInclude(DriverArgs, CC1Args, IncludePath);
@@ -3561,10 +4071,10 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   StringRef InstallDir = GCCInstallation.getInstallPath();
   StringRef TripleStr = GCCInstallation.getTriple().str();
   const Multilib &Multilib = GCCInstallation.getMultilib();
-  const std::string GCCMultiarchTriple =
-      getMultiarchTriple(GCCInstallation.getTriple(), getDriver().SysRoot);
+  const std::string GCCMultiarchTriple = getMultiarchTriple(
+      getDriver(), GCCInstallation.getTriple(), getDriver().SysRoot);
   const std::string TargetMultiarchTriple =
-      getMultiarchTriple(getTriple(), getDriver().SysRoot);
+      getMultiarchTriple(getDriver(), getTriple(), getDriver().SysRoot);
   const GCCVersion &Version = GCCInstallation.getVersion();
 
   // The primary search for libstdc++ supports multiarch variants.
@@ -3598,6 +4108,18 @@ void Linux::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   }
 }
 
+void Linux::AddCudaIncludeArgs(const ArgList &DriverArgs,
+                               ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nocudainc))
+    return;
+
+  if (CudaInstallation.isValid()) {
+    addSystemInclude(DriverArgs, CC1Args, CudaInstallation.getIncludePath());
+    CC1Args.push_back("-include");
+    CC1Args.push_back("__clang_cuda_runtime_wrapper.h");
+  }
+}
+
 bool Linux::isPIEDefault() const { return getSanitizerArgs().requiresPIE(); }
 
 SanitizerMask Linux::getSupportedSanitizers() const {
@@ -3607,24 +4129,39 @@ SanitizerMask Linux::getSupportedSanitizers() const {
                         getTriple().getArch() == llvm::Triple::mips64el;
   const bool IsPowerPC64 = getTriple().getArch() == llvm::Triple::ppc64 ||
                            getTriple().getArch() == llvm::Triple::ppc64le;
+  const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64 ||
+                         getTriple().getArch() == llvm::Triple::aarch64_be;
   SanitizerMask Res = ToolChain::getSupportedSanitizers();
   Res |= SanitizerKind::Address;
   Res |= SanitizerKind::KernelAddress;
   Res |= SanitizerKind::Vptr;
-  if (IsX86_64 || IsMIPS64) {
+  Res |= SanitizerKind::SafeStack;
+  if (IsX86_64 || IsMIPS64 || IsAArch64)
     Res |= SanitizerKind::DataFlow;
+  if (IsX86_64 || IsMIPS64 || IsAArch64)
     Res |= SanitizerKind::Leak;
+  if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64)
     Res |= SanitizerKind::Thread;
-  }
-  if (IsX86_64 || IsMIPS64 || IsPowerPC64)
+  if (IsX86_64 || IsMIPS64 || IsPowerPC64 || IsAArch64)
     Res |= SanitizerKind::Memory;
   if (IsX86 || IsX86_64) {
     Res |= SanitizerKind::Function;
-    Res |= SanitizerKind::SafeStack;
   }
   return Res;
 }
 
+void Linux::addProfileRTLibs(const llvm::opt::ArgList &Args,
+                             llvm::opt::ArgStringList &CmdArgs) const {
+  if (!needsProfileRT(Args)) return;
+
+  // Add linker option -u__llvm_runtime_variable to cause runtime
+  // initialization module to be linked in.
+  if (!Args.hasArg(options::OPT_coverage))
+    CmdArgs.push_back(Args.MakeArgString(
+        Twine("-u", llvm::getInstrProfRuntimeHookVarName())));
+  ToolChain::addProfileRTLibs(Args, CmdArgs);
+}
+
 /// DragonFly - DragonFly tool chain which can call as(1) and ld(1) directly.
 
 DragonFly::DragonFly(const Driver &D, const llvm::Triple &Triple,
@@ -3638,10 +4175,7 @@ DragonFly::DragonFly(const Driver &D, const llvm::Triple &Triple,
 
   getFilePaths().push_back(getDriver().Dir + "/../lib");
   getFilePaths().push_back("/usr/lib");
-  if (llvm::sys::fs::exists("/usr/lib/gcc47"))
-    getFilePaths().push_back("/usr/lib/gcc47");
-  else
-    getFilePaths().push_back("/usr/lib/gcc44");
+  getFilePaths().push_back("/usr/lib/gcc50");
 }
 
 Tool *DragonFly::buildAssembler() const {
@@ -3665,6 +4199,22 @@ CudaToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                                      llvm::opt::ArgStringList &CC1Args) const {
   Linux::addClangTargetOptions(DriverArgs, CC1Args);
   CC1Args.push_back("-fcuda-is-device");
+
+  if (DriverArgs.hasArg(options::OPT_nocudalib))
+    return;
+
+  std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(
+      DriverArgs.getLastArgValue(options::OPT_march_EQ));
+  if (!LibDeviceFile.empty()) {
+    CC1Args.push_back("-mlink-cuda-bitcode");
+    CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
+
+    // Libdevice in CUDA-7.0 requires PTX version that's more recent
+    // than LLVM defaults to. Use PTX4.2 which is the PTX version that
+    // came with CUDA-7.0.
+    CC1Args.push_back("-target-feature");
+    CC1Args.push_back("+ptx42");
+  }
 }
 
 llvm::opt::DerivedArgList *
@@ -3712,29 +4262,32 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
 }
 
 /// XCore tool chain
-XCore::XCore(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+XCoreToolChain::XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
+                               const ArgList &Args)
     : ToolChain(D, Triple, Args) {
   // ProgramPaths are found via 'PATH' environment variable.
 }
 
-Tool *XCore::buildAssembler() const {
+Tool *XCoreToolChain::buildAssembler() const {
   return new tools::XCore::Assembler(*this);
 }
 
-Tool *XCore::buildLinker() const { return new tools::XCore::Linker(*this); }
+Tool *XCoreToolChain::buildLinker() const {
+  return new tools::XCore::Linker(*this);
+}
 
-bool XCore::isPICDefault() const { return false; }
+bool XCoreToolChain::isPICDefault() const { return false; }
 
-bool XCore::isPIEDefault() const { return false; }
+bool XCoreToolChain::isPIEDefault() const { return false; }
 
-bool XCore::isPICDefaultForced() const { return false; }
+bool XCoreToolChain::isPICDefaultForced() const { return false; }
 
-bool XCore::SupportsProfiling() const { return false; }
+bool XCoreToolChain::SupportsProfiling() const { return false; }
 
-bool XCore::hasBlocksRuntime() const { return false; }
+bool XCoreToolChain::hasBlocksRuntime() const { return false; }
 
-void XCore::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
-                                      ArgStringList &CC1Args) const {
+void XCoreToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                               ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc) ||
       DriverArgs.hasArg(options::OPT_nostdlibinc))
     return;
@@ -3747,13 +4300,13 @@ void XCore::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
   }
 }
 
-void XCore::addClangTargetOptions(const ArgList &DriverArgs,
-                                  ArgStringList &CC1Args) const {
+void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs,
+                                           ArgStringList &CC1Args) const {
   CC1Args.push_back("-nostdsysteminc");
 }
 
-void XCore::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
-                                         ArgStringList &CC1Args) const {
+void XCoreToolChain::AddClangCXXStdlibIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
   if (DriverArgs.hasArg(options::OPT_nostdinc) ||
       DriverArgs.hasArg(options::OPT_nostdlibinc) ||
       DriverArgs.hasArg(options::OPT_nostdincxx))
@@ -3767,15 +4320,84 @@ void XCore::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs,
   }
 }
 
-void XCore::AddCXXStdlibLibArgs(const ArgList &Args,
-                                ArgStringList &CmdArgs) const {
+void XCoreToolChain::AddCXXStdlibLibArgs(const ArgList &Args,
+                                         ArgStringList &CmdArgs) const {
   // We don't output any lib args. This is handled by xcc.
 }
 
-// SHAVEToolChain does not call Clang's C compiler.
-// We override SelectTool to avoid testing ShouldUseClangCompiler().
-Tool *SHAVEToolChain::SelectTool(const JobAction &JA) const {
+MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple,
+                                 const ArgList &Args)
+    : Generic_GCC(D, Triple, Args) {
+  // If a target of 'sparc-myriad-elf' is specified to clang, it wants to use
+  // 'sparc-myriad--elf' (note the unknown OS) as the canonical triple.
+  // This won't work to find gcc. Instead we give the installation detector an
+  // extra triple, which is preferable to further hacks of the logic that at
+  // present is based solely on getArch(). In particular, it would be wrong to
+  // choose the myriad installation when targeting a non-myriad sparc install.
+  switch (Triple.getArch()) {
+  default:
+    D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
+                                              << "myriad";
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+  case llvm::Triple::shave:
+    GCCInstallation.init(Triple, Args, {"sparc-myriad-elf"});
+  }
+
+  if (GCCInstallation.isValid()) {
+    // The contents of LibDir are independent of the version of gcc.
+    // This contains libc, libg (a superset of libc), libm, libstdc++, libssp.
+    SmallString<128> LibDir(GCCInstallation.getParentLibPath());
+    if (Triple.getArch() == llvm::Triple::sparcel)
+      llvm::sys::path::append(LibDir, "../sparc-myriad-elf/lib/le");
+    else
+      llvm::sys::path::append(LibDir, "../sparc-myriad-elf/lib");
+    addPathIfExists(D, LibDir, getFilePaths());
+
+    // This directory contains crt{i,n,begin,end}.o as well as libgcc.
+    // These files are tied to a particular version of gcc.
+    SmallString<128> CompilerSupportDir(GCCInstallation.getInstallPath());
+    // There are actually 4 choices: {le,be} x {fpu,nofpu}
+    // but as this toolchain is for LEON sparc, it can assume FPU.
+    if (Triple.getArch() == llvm::Triple::sparcel)
+      llvm::sys::path::append(CompilerSupportDir, "le");
+    addPathIfExists(D, CompilerSupportDir, getFilePaths());
+  }
+}
+
+MyriadToolChain::~MyriadToolChain() {}
+
+void MyriadToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
+                                                ArgStringList &CC1Args) const {
+  if (!DriverArgs.hasArg(options::OPT_nostdinc))
+    addSystemInclude(DriverArgs, CC1Args, getDriver().SysRoot + "/include");
+}
+
+void MyriadToolChain::AddClangCXXStdlibIncludeArgs(
+    const ArgList &DriverArgs, ArgStringList &CC1Args) const {
+  if (DriverArgs.hasArg(options::OPT_nostdlibinc) ||
+      DriverArgs.hasArg(options::OPT_nostdincxx))
+    return;
+
+  // Only libstdc++, for now.
+  StringRef LibDir = GCCInstallation.getParentLibPath();
+  const GCCVersion &Version = GCCInstallation.getVersion();
+  StringRef TripleStr = GCCInstallation.getTriple().str();
+  const Multilib &Multilib = GCCInstallation.getMultilib();
+
+  addLibStdCXXIncludePaths(
+      LibDir.str() + "/../" + TripleStr.str() + "/include/c++/" + Version.Text,
+      "", TripleStr, "", "", Multilib.includeSuffix(), DriverArgs, CC1Args);
+}
+
+// MyriadToolChain handles several triples:
+//  {shave,sparc{,el}}-myriad-{rtems,unknown}-elf
+Tool *MyriadToolChain::SelectTool(const JobAction &JA) const {
+  // The inherited method works fine if not targeting the SHAVE.
+  if (!isShaveCompilation(getTriple()))
+    return ToolChain::SelectTool(JA);
   switch (JA.getKind()) {
+  case Action::PreprocessJobClass:
   case Action::CompileJobClass:
     if (!Compiler)
       Compiler.reset(new tools::SHAVE::Compiler(*this));
@@ -3789,28 +4411,122 @@ Tool *SHAVEToolChain::SelectTool(const JobAction &JA) const {
   }
 }
 
-SHAVEToolChain::SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
-                               const ArgList &Args)
-    : Generic_GCC(D, Triple, Args) {}
-
-SHAVEToolChain::~SHAVEToolChain() {}
-
-/// Following are methods necessary to avoid having moviClang be an abstract
-/// class.
-
-Tool *SHAVEToolChain::getTool(Action::ActionClass AC) const {
-  // SelectTool() must find a tool using the method in the superclass.
-  // There's nothing we can do if that fails.
-  llvm_unreachable("SHAVEToolChain can't getTool");
+Tool *MyriadToolChain::buildLinker() const {
+  return new tools::Myriad::Linker(*this);
 }
 
-Tool *SHAVEToolChain::buildLinker() const {
-  // SHAVEToolChain executables can not be linked except by the vendor tools.
-  llvm_unreachable("SHAVEToolChain can't buildLinker");
+WebAssembly::WebAssembly(const Driver &D, const llvm::Triple &Triple,
+                         const llvm::opt::ArgList &Args)
+  : ToolChain(D, Triple, Args) {
+  // Use LLD by default.
+  DefaultLinker = "lld";
 }
 
-Tool *SHAVEToolChain::buildAssembler() const {
-  // This one you'd think should be reachable since we expose an
-  // assembler to the driver, except not the way it expects.
-  llvm_unreachable("SHAVEToolChain can't buildAssembler");
+bool WebAssembly::IsMathErrnoDefault() const { return false; }
+
+bool WebAssembly::IsObjCNonFragileABIDefault() const { return true; }
+
+bool WebAssembly::UseObjCMixedDispatch() const { return true; }
+
+bool WebAssembly::isPICDefault() const { return false; }
+
+bool WebAssembly::isPIEDefault() const { return false; }
+
+bool WebAssembly::isPICDefaultForced() const { return false; }
+
+bool WebAssembly::IsIntegratedAssemblerDefault() const { return true; }
+
+// TODO: Support Objective C stuff.
+bool WebAssembly::SupportsObjCGC() const { return false; }
+
+bool WebAssembly::hasBlocksRuntime() const { return false; }
+
+// TODO: Support profiling.
+bool WebAssembly::SupportsProfiling() const { return false; }
+
+bool WebAssembly::HasNativeLLVMSupport() const { return true; }
+
+void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs,
+                                        ArgStringList &CC1Args) const {
+  if (DriverArgs.hasFlag(options::OPT_fuse_init_array,
+                         options::OPT_fno_use_init_array, true))
+    CC1Args.push_back("-fuse-init-array");
+}
+
+Tool *WebAssembly::buildLinker() const {
+  return new tools::wasm::Linker(*this);
+}
+
+PS4CPU::PS4CPU(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
+    : Generic_ELF(D, Triple, Args) {
+  if (Args.hasArg(options::OPT_static))
+    D.Diag(diag::err_drv_unsupported_opt_for_target) << "-static" << "PS4";
+
+  // Determine where to find the PS4 libraries. We use SCE_PS4_SDK_DIR
+  // if it exists; otherwise use the driver's installation path, which
+  // should be <SDK_DIR>/host_tools/bin.
+
+  SmallString<512> PS4SDKDir;
+  if (const char *EnvValue = getenv("SCE_PS4_SDK_DIR")) {
+    if (!llvm::sys::fs::exists(EnvValue))
+      getDriver().Diag(clang::diag::warn_drv_ps4_sdk_dir) << EnvValue;
+    PS4SDKDir = EnvValue;
+  } else {
+    PS4SDKDir = getDriver().Dir;
+    llvm::sys::path::append(PS4SDKDir, "/../../");
+  }
+
+  // By default, the driver won't report a warning if it can't find
+  // PS4's include or lib directories. This behavior could be changed if
+  // -Weverything or -Winvalid-or-nonexistent-directory options are passed.
+  // If -isysroot was passed, use that as the SDK base path.
+  std::string PrefixDir;
+  if (const Arg *A = Args.getLastArg(options::OPT_isysroot)) {
+    PrefixDir = A->getValue();
+    if (!llvm::sys::fs::exists(PrefixDir))
+      getDriver().Diag(clang::diag::warn_missing_sysroot) << PrefixDir;
+  } else
+    PrefixDir = PS4SDKDir.str();
+
+  SmallString<512> PS4SDKIncludeDir(PrefixDir);
+  llvm::sys::path::append(PS4SDKIncludeDir, "target/include");
+  if (!Args.hasArg(options::OPT_nostdinc) &&
+      !Args.hasArg(options::OPT_nostdlibinc) &&
+      !Args.hasArg(options::OPT_isysroot) &&
+      !Args.hasArg(options::OPT__sysroot_EQ) &&
+      !llvm::sys::fs::exists(PS4SDKIncludeDir)) {
+    getDriver().Diag(clang::diag::warn_drv_unable_to_find_directory_expected)
+        << "PS4 system headers" << PS4SDKIncludeDir;
+  }
+
+  SmallString<512> PS4SDKLibDir(PS4SDKDir);
+  llvm::sys::path::append(PS4SDKLibDir, "target/lib");
+  if (!Args.hasArg(options::OPT_nostdlib) &&
+      !Args.hasArg(options::OPT_nodefaultlibs) &&
+      !Args.hasArg(options::OPT__sysroot_EQ) && !Args.hasArg(options::OPT_E) &&
+      !Args.hasArg(options::OPT_c) && !Args.hasArg(options::OPT_S) &&
+      !Args.hasArg(options::OPT_emit_ast) &&
+      !llvm::sys::fs::exists(PS4SDKLibDir)) {
+    getDriver().Diag(clang::diag::warn_drv_unable_to_find_directory_expected)
+        << "PS4 system libraries" << PS4SDKLibDir;
+    return;
+  }
+  getFilePaths().push_back(PS4SDKLibDir.str());
+}
+
+Tool *PS4CPU::buildAssembler() const {
+  return new tools::PS4cpu::Assemble(*this);
+}
+
+Tool *PS4CPU::buildLinker() const { return new tools::PS4cpu::Link(*this); }
+
+bool PS4CPU::isPICDefault() const { return true; }
+
+bool PS4CPU::HasNativeLLVMSupport() const { return true; }
+
+SanitizerMask PS4CPU::getSupportedSanitizers() const {
+  SanitizerMask Res = ToolChain::getSupportedSanitizers();
+  Res |= SanitizerKind::Address;
+  Res |= SanitizerKind::Vptr;
+  return Res;
 }
diff --git a/lib/Driver/ToolChains.h b/lib/Driver/ToolChains.h
index 59eaade6b51c..f4b6b1529b30 100644
--- a/lib/Driver/ToolChains.h
+++ b/lib/Driver/ToolChains.h
@@ -78,6 +78,7 @@ public:
   class GCCInstallationDetector {
     bool IsValid;
     llvm::Triple GCCTriple;
+    const Driver &D;
 
     // FIXME: These might be better as path objects.
     std::string GCCInstallPath;
@@ -99,9 +100,9 @@ public:
     MultilibSet Multilibs;
 
   public:
-    GCCInstallationDetector() : IsValid(false) {}
-    void init(const Driver &D, const llvm::Triple &TargetTriple,
-              const llvm::opt::ArgList &Args);
+    explicit GCCInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
+    void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args,
+              ArrayRef<std::string> ExtraTripleAliases = None);
 
     /// \brief Check whether we detected a valid GCC install.
     bool isValid() const { return IsValid; }
@@ -145,11 +146,53 @@ public:
                                 const std::string &LibDir,
                                 StringRef CandidateTriple,
                                 bool NeedsBiarchSuffix = false);
+
+    void scanLibDirForGCCTripleSolaris(const llvm::Triple &TargetArch,
+                                       const llvm::opt::ArgList &Args,
+                                       const std::string &LibDir,
+                                       StringRef CandidateTriple,
+                                       bool NeedsBiarchSuffix = false);
   };
 
 protected:
   GCCInstallationDetector GCCInstallation;
 
+  // \brief A class to find a viable CUDA installation
+
+  class CudaInstallationDetector {
+    bool IsValid;
+    const Driver &D;
+    std::string CudaInstallPath;
+    std::string CudaLibPath;
+    std::string CudaLibDevicePath;
+    std::string CudaIncludePath;
+    llvm::StringMap<std::string> CudaLibDeviceMap;
+
+  public:
+    CudaInstallationDetector(const Driver &D) : IsValid(false), D(D) {}
+    void init(const llvm::Triple &TargetTriple, const llvm::opt::ArgList &Args);
+
+    /// \brief Check whether we detected a valid Cuda install.
+    bool isValid() const { return IsValid; }
+    /// \brief Print information about the detected CUDA installation.
+    void print(raw_ostream &OS) const;
+
+    /// \brief Get the detected Cuda installation path.
+    StringRef getInstallPath() const { return CudaInstallPath; }
+    /// \brief Get the detected Cuda Include path.
+    StringRef getIncludePath() const { return CudaIncludePath; }
+    /// \brief Get the detected Cuda library path.
+    StringRef getLibPath() const { return CudaLibPath; }
+    /// \brief Get the detected Cuda device library path.
+    StringRef getLibDevicePath() const { return CudaLibDevicePath; }
+    /// \brief Get libdevice file for given architecture
+    std::string getLibDeviceFile(StringRef Gpu) const {
+      return CudaLibDeviceMap.lookup(Gpu);
+    }
+  };
+
+  CudaInstallationDetector CudaInstallation;
+
 public:
   Generic_GCC(const Driver &D, const llvm::Triple &Triple,
               const llvm::opt::ArgList &Args);
@@ -177,6 +220,13 @@ protected:
   /// \brief Check whether the target triple's architecture is 32-bits.
   bool isTarget32Bit() const { return getTriple().isArch32Bit(); }
 
+  bool addLibStdCXXIncludePaths(Twine Base, Twine Suffix, StringRef GCCTriple,
+                                StringRef GCCMultiarchTriple,
+                                StringRef TargetMultiarchTriple,
+                                Twine IncludeSuffix,
+                                const llvm::opt::ArgList &DriverArgs,
+                                llvm::opt::ArgStringList &CC1Args) const;
+
   /// @}
 
 private:
@@ -236,8 +286,8 @@ public:
 
   /// Add any profiling runtime libraries that are needed. This is essentially a
   /// MachO specific version of addProfileRT in Tools.cpp.
-  virtual void addProfileRTLibs(const llvm::opt::ArgList &Args,
-                                llvm::opt::ArgStringList &CmdArgs) const {
+  void addProfileRTLibs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const override {
     // There aren't any profiling libs for embedded targets currently.
   }
 
@@ -293,7 +343,9 @@ public:
 
   bool UseDwarfDebugFlags() const override;
 
-  bool UseSjLjExceptions() const override { return false; }
+  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override {
+    return false;
+  }
 
   /// }
 };
@@ -308,7 +360,15 @@ public:
   // the argument translation business.
   mutable bool TargetInitialized;
 
-  enum DarwinPlatformKind { MacOS, IPhoneOS, IPhoneOSSimulator };
+  enum DarwinPlatformKind {
+    MacOS,
+    IPhoneOS,
+    IPhoneOSSimulator,
+    TvOS,
+    TvOSSimulator,
+    WatchOS,
+    WatchOSSimulator
+  };
 
   mutable DarwinPlatformKind TargetPlatform;
 
@@ -336,7 +396,8 @@ public:
                               llvm::opt::ArgStringList &CmdArgs) const override;
 
   bool isKernelStatic() const override {
-    return !isTargetIPhoneOS() || isIPhoneOSVersionLT(6, 0);
+    return (!(isTargetIPhoneOS() && !isIPhoneOSVersionLT(6, 0)) &&
+            !isTargetWatchOS());
   }
 
   void addProfileRTLibs(const llvm::opt::ArgList &Args,
@@ -365,12 +426,13 @@ protected:
 
   bool isTargetIPhoneOS() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == IPhoneOS;
+    return TargetPlatform == IPhoneOS || TargetPlatform == TvOS;
   }
 
   bool isTargetIOSSimulator() const {
     assert(TargetInitialized && "Target not initialized!");
-    return TargetPlatform == IPhoneOSSimulator;
+    return TargetPlatform == IPhoneOSSimulator ||
+           TargetPlatform == TvOSSimulator;
   }
 
   bool isTargetIOSBased() const {
@@ -378,6 +440,36 @@ protected:
     return isTargetIPhoneOS() || isTargetIOSSimulator();
   }
 
+  bool isTargetTvOS() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == TvOS;
+  }
+
+  bool isTargetTvOSSimulator() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == TvOSSimulator;
+  }
+
+  bool isTargetTvOSBased() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == TvOS || TargetPlatform == TvOSSimulator;
+  }
+
+  bool isTargetWatchOS() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == WatchOS;
+  }
+
+  bool isTargetWatchOSSimulator() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == WatchOSSimulator;
+  }
+
+  bool isTargetWatchOSBased() const {
+    assert(TargetInitialized && "Target not initialized!");
+    return TargetPlatform == WatchOS || TargetPlatform == WatchOSSimulator;
+  }
+
   bool isTargetMacOS() const {
     assert(TargetInitialized && "Target not initialized!");
     return TargetPlatform == MacOS;
@@ -428,7 +520,7 @@ public:
   unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
     // Stack protectors default to on for user code on 10.5,
     // and for everything in 10.6 and beyond
-    if (isTargetIOSBased())
+    if (isTargetIOSBased() || isTargetWatchOSBased())
       return 1;
     else if (isTargetMacOS() && !isMacosxVersionLT(10, 6))
       return 1;
@@ -442,7 +534,7 @@ public:
 
   void CheckObjCARC() const override;
 
-  bool UseSjLjExceptions() const override;
+  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override;
 
   SanitizerMask getSupportedSanitizers() const override;
 };
@@ -469,6 +561,15 @@ public:
 
   void AddLinkARCArgs(const llvm::opt::ArgList &Args,
                       llvm::opt::ArgStringList &CmdArgs) const override;
+
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
+  // Until dtrace (via CTF) and LLDB can deal with distributed debug info,
+  // Darwin defaults to standalone/full debug info.
+  bool GetDefaultStandaloneDebug() const override { return true; }
+  llvm::DebuggerKind getDefaultDebuggerTuning() const override {
+    return llvm::DebuggerKind::LLDB;
+  }
+
   /// }
 
 private:
@@ -521,6 +622,12 @@ public:
 
   bool IsIntegratedAssemblerDefault() const override { return true; }
 
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
+
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
@@ -572,6 +679,7 @@ public:
   unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
     return 2;
   }
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
 
 protected:
   Tool *buildAssembler() const override;
@@ -615,9 +723,16 @@ public:
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
 
-  bool UseSjLjExceptions() const override;
+  bool UseSjLjExceptions(const llvm::opt::ArgList &Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
+  // Until dtrace (via CTF) and LLDB can deal with distributed debug info,
+  // FreeBSD defaults to standalone/full debug info.
+  bool GetDefaultStandaloneDebug() const override { return true; }
+  llvm::DebuggerKind getDefaultDebuggerTuning() const override {
+    return llvm::DebuggerKind::LLDB;
+  }
 
 protected:
   Tool *buildAssembler() const override;
@@ -679,26 +794,19 @@ public:
   void AddClangCXXStdlibIncludeArgs(
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
+  void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                          llvm::opt::ArgStringList &CC1Args) const override;
   bool isPIEDefault() const override;
   SanitizerMask getSupportedSanitizers() const override;
+  void addProfileRTLibs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const override;
+  virtual std::string computeSysRoot() const;
 
-  std::string Linker;
   std::vector<std::string> ExtraOpts;
 
 protected:
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
-
-private:
-  static bool addLibStdCXXIncludePaths(Twine Base, Twine Suffix,
-                                       StringRef GCCTriple,
-                                       StringRef GCCMultiarchTriple,
-                                       StringRef TargetMultiarchTriple,
-                                       Twine IncludeSuffix,
-                                       const llvm::opt::ArgList &DriverArgs,
-                                       llvm::opt::ArgStringList &CC1Args);
-
-  std::string computeSysRoot() const;
 };
 
 class LLVM_LIBRARY_VISIBILITY CudaToolChain : public Linux {
@@ -713,16 +821,52 @@ public:
                              llvm::opt::ArgStringList &CC1Args) const override;
 };
 
-class LLVM_LIBRARY_VISIBILITY Hexagon_TC : public Linux {
+class LLVM_LIBRARY_VISIBILITY MipsLLVMToolChain : public Linux {
+protected:
+  Tool *buildLinker() const override;
+
+public:
+  MipsLLVMToolChain(const Driver &D, const llvm::Triple &Triple,
+                    const llvm::opt::ArgList &Args);
+
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+
+  CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
+
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+
+  void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+
+  std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component,
+                            bool Shared = false) const override;
+
+  std::string computeSysRoot() const override;
+
+  RuntimeLibType GetDefaultRuntimeLibType() const override {
+    return GCCInstallation.isValid() ? RuntimeLibType::RLT_Libgcc
+                                     : RuntimeLibType::RLT_CompilerRT;
+  }
+
+private:
+  Multilib SelectedMultilib;
+  std::string LibSuffix;
+};
+
+class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux {
 protected:
   GCCVersion GCCLibAndIncVersion;
   Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
 
 public:
-  Hexagon_TC(const Driver &D, const llvm::Triple &Triple,
-             const llvm::opt::ArgList &Args);
-  ~Hexagon_TC() override;
+  HexagonToolChain(const Driver &D, const llvm::Triple &Triple,
+                   const llvm::opt::ArgList &Args);
+  ~HexagonToolChain() override;
 
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
@@ -733,21 +877,37 @@ public:
   CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override;
 
   StringRef GetGCCLibAndIncVersion() const { return GCCLibAndIncVersion.Text; }
+  bool IsIntegratedAssemblerDefault() const override {
+    return true;
+  }
 
-  static std::string GetGnuDir(const std::string &InstalledDir,
-                               const llvm::opt::ArgList &Args);
+  std::string getHexagonTargetDir(
+      const std::string &InstalledDir,
+      const SmallVectorImpl<std::string> &PrefixDirs) const;
+  void getHexagonLibraryPaths(const llvm::opt::ArgList &Args,
+      ToolChain::path_list &LibPaths) const;
 
-  static StringRef GetTargetCPU(const llvm::opt::ArgList &Args);
+  static const StringRef GetDefaultCPU();
+  static const StringRef GetTargetCPUVersion(const llvm::opt::ArgList &Args);
 
-  static const char *GetSmallDataThreshold(const llvm::opt::ArgList &Args);
-
-  static bool UsesG0(const char *smallDataThreshold);
+  static Optional<unsigned> getSmallDataThreshold(
+      const llvm::opt::ArgList &Args);
 };
 
-class LLVM_LIBRARY_VISIBILITY NaCl_TC : public Generic_ELF {
+class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
+protected:
+  Tool *buildLinker() const override;
+
 public:
-  NaCl_TC(const Driver &D, const llvm::Triple &Triple,
-          const llvm::opt::ArgList &Args);
+  AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
+            const llvm::opt::ArgList &Args);
+  bool IsIntegratedAssemblerDefault() const override { return true; }
+};
+
+class LLVM_LIBRARY_VISIBILITY NaClToolChain : public Generic_ELF {
+public:
+  NaClToolChain(const Driver &D, const llvm::Triple &Triple,
+                const llvm::opt::ArgList &Args);
 
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
@@ -765,14 +925,13 @@ public:
     return getTriple().getArch() == llvm::Triple::mipsel;
   }
 
-  // Get the path to the file containing NaCl's ARM macros. It lives in NaCl_TC
-  // because the AssembleARM tool needs a const char * that it can pass around
-  // and the toolchain outlives all the jobs.
+  // Get the path to the file containing NaCl's ARM macros.
+  // It lives in NaClToolChain because the ARMAssembler tool needs a
+  // const char * that it can pass around,
   const char *GetNaClArmMacrosPath() const { return NaClArmMacrosPath.c_str(); }
 
   std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args,
                                           types::ID InputType) const override;
-  std::string Linker;
 
 protected:
   Tool *buildLinker() const override;
@@ -801,6 +960,10 @@ public:
   MSVCToolChain(const Driver &D, const llvm::Triple &Triple,
                 const llvm::opt::ArgList &Args);
 
+  llvm::opt::DerivedArgList *
+  TranslateArgs(const llvm::opt::DerivedArgList &Args,
+                const char *BoundArch) const override;
+
   bool IsIntegratedAssemblerDefault() const override;
   bool IsUnwindTablesDefault() const override;
   bool isPICDefault() const override;
@@ -814,8 +977,14 @@ public:
       const llvm::opt::ArgList &DriverArgs,
       llvm::opt::ArgStringList &CC1Args) const override;
 
-  bool getWindowsSDKDir(std::string &path, int &major, int &minor) const;
+  bool getWindowsSDKDir(std::string &path, int &major,
+                        std::string &windowsSDKIncludeVersion,
+                        std::string &windowsSDKLibVersion) const;
   bool getWindowsSDKLibraryPath(std::string &path) const;
+  /// \brief Check if Universal CRT should be used if available
+  bool useUniversalCRT(std::string &visualStudioDir) const;
+  bool getUniversalCRTSdkDir(std::string &path, std::string &ucrtVersion) const;
+  bool getUniversalCRTLibraryPath(std::string &path) const;
   bool getVisualStudioInstallDir(std::string &path) const;
   bool getVisualStudioBinariesFolder(const char *clangProgramPath,
                                      std::string &path) const;
@@ -828,7 +997,9 @@ protected:
   void AddSystemIncludeWithSubfolder(const llvm::opt::ArgList &DriverArgs,
                                      llvm::opt::ArgStringList &CC1Args,
                                      const std::string &folder,
-                                     const char *subfolder) const;
+                                     const Twine &subfolder1,
+                                     const Twine &subfolder2 = "",
+                                     const Twine &subfolder3 = "") const;
 
   Tool *buildLinker() const override;
   Tool *buildAssembler() const override;
@@ -858,15 +1029,17 @@ public:
   void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args,
                            llvm::opt::ArgStringList &CmdArgs) const override;
 
+  SanitizerMask getSupportedSanitizers() const override;
+
 protected:
   Tool *buildLinker() const override;
   Tool *buildAssembler() const override;
 };
 
-class LLVM_LIBRARY_VISIBILITY XCore : public ToolChain {
+class LLVM_LIBRARY_VISIBILITY XCoreToolChain : public ToolChain {
 public:
-  XCore(const Driver &D, const llvm::Triple &Triple,
-        const llvm::opt::ArgList &Args);
+  XCoreToolChain(const Driver &D, const llvm::Triple &Triple,
+                 const llvm::opt::ArgList &Args);
 
 protected:
   Tool *buildAssembler() const override;
@@ -890,29 +1063,84 @@ public:
                            llvm::opt::ArgStringList &CmdArgs) const override;
 };
 
-/// SHAVEToolChain - A tool chain using the compiler installed by the the
-// Movidius SDK into MV_TOOLS_DIR (which we assume will be copied to llvm's
-// installation dir) to perform all subcommands.
-class LLVM_LIBRARY_VISIBILITY SHAVEToolChain : public Generic_GCC {
+/// MyriadToolChain - A tool chain using either clang or the external compiler
+/// installed by the Movidius SDK to perform all subcommands.
+class LLVM_LIBRARY_VISIBILITY MyriadToolChain : public Generic_GCC {
 public:
-  SHAVEToolChain(const Driver &D, const llvm::Triple &Triple,
-                 const llvm::opt::ArgList &Args);
-  ~SHAVEToolChain() override;
+  MyriadToolChain(const Driver &D, const llvm::Triple &Triple,
+                  const llvm::opt::ArgList &Args);
+  ~MyriadToolChain() override;
 
-  virtual Tool *SelectTool(const JobAction &JA) const override;
+  void
+  AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                            llvm::opt::ArgStringList &CC1Args) const override;
+  void AddClangCXXStdlibIncludeArgs(
+      const llvm::opt::ArgList &DriverArgs,
+      llvm::opt::ArgStringList &CC1Args) const override;
+  Tool *SelectTool(const JobAction &JA) const override;
+  unsigned GetDefaultDwarfVersion() const override { return 2; }
 
 protected:
-  Tool *getTool(Action::ActionClass AC) const override;
-  Tool *buildAssembler() const override;
   Tool *buildLinker() const override;
+  bool isShaveCompilation(const llvm::Triple &T) const {
+    return T.getArch() == llvm::Triple::shave;
+  }
 
 private:
   mutable std::unique_ptr<Tool> Compiler;
   mutable std::unique_ptr<Tool> Assembler;
 };
 
+class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
+public:
+  WebAssembly(const Driver &D, const llvm::Triple &Triple,
+              const llvm::opt::ArgList &Args);
+
+private:
+  bool IsMathErrnoDefault() const override;
+  bool IsObjCNonFragileABIDefault() const override;
+  bool UseObjCMixedDispatch() const override;
+  bool isPICDefault() const override;
+  bool isPIEDefault() const override;
+  bool isPICDefaultForced() const override;
+  bool IsIntegratedAssemblerDefault() const override;
+  bool hasBlocksRuntime() const override;
+  bool SupportsObjCGC() const override;
+  bool SupportsProfiling() const override;
+  bool HasNativeLLVMSupport() const override;
+  void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
+                             llvm::opt::ArgStringList &CC1Args) const override;
+
+  Tool *buildLinker() const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY PS4CPU : public Generic_ELF {
+public:
+  PS4CPU(const Driver &D, const llvm::Triple &Triple,
+         const llvm::opt::ArgList &Args);
+
+  bool IsMathErrnoDefault() const override { return false; }
+  bool IsObjCNonFragileABIDefault() const override { return true; }
+  bool HasNativeLLVMSupport() const override;
+  bool isPICDefault() const override;
+
+  unsigned GetDefaultStackProtectorLevel(bool KernelOrKext) const override {
+    return 2; // SSPStrong
+  }
+
+  llvm::DebuggerKind getDefaultDebuggerTuning() const override {
+    return llvm::DebuggerKind::SCE;
+  }
+
+  SanitizerMask getSupportedSanitizers() const override;
+
+protected:
+  Tool *buildAssembler() const override;
+  Tool *buildLinker() const override;
+};
+
 } // end namespace toolchains
 } // end namespace driver
 } // end namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_H
diff --git a/lib/Driver/Tools.cpp b/lib/Driver/Tools.cpp
index b801705a8f57..7a185dc0764d 100644
--- a/lib/Driver/Tools.cpp
+++ b/lib/Driver/Tools.cpp
@@ -1,4 +1,4 @@
-//===--- Tools.cpp - Tools Implementations --------------------------------===//
+//===--- Tools.cpp - Tools Implementations ----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -32,7 +32,7 @@
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
@@ -41,6 +41,7 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetParser.h"
 
 #ifdef LLVM_ON_UNIX
 #include <unistd.h> // For getuid().
@@ -51,18 +52,51 @@ using namespace clang::driver::tools;
 using namespace clang;
 using namespace llvm::opt;
 
-static void addAssemblerKPIC(const ArgList &Args, ArgStringList &CmdArgs) {
-  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
-                                    options::OPT_fpic, options::OPT_fno_pic,
-                                    options::OPT_fPIE, options::OPT_fno_PIE,
-                                    options::OPT_fpie, options::OPT_fno_pie);
-  if (!LastPICArg)
-    return;
-  if (LastPICArg->getOption().matches(options::OPT_fPIC) ||
-      LastPICArg->getOption().matches(options::OPT_fpic) ||
-      LastPICArg->getOption().matches(options::OPT_fPIE) ||
-      LastPICArg->getOption().matches(options::OPT_fpie)) {
-    CmdArgs.push_back("-KPIC");
+static void handleTargetFeaturesGroup(const ArgList &Args,
+                                      std::vector<const char *> &Features,
+                                      OptSpecifier Group) {
+  for (const Arg *A : Args.filtered(Group)) {
+    StringRef Name = A->getOption().getName();
+    A->claim();
+
+    // Skip over "-m".
+    assert(Name.startswith("m") && "Invalid feature name.");
+    Name = Name.substr(1);
+
+    bool IsNegative = Name.startswith("no-");
+    if (IsNegative)
+      Name = Name.substr(3);
+    Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
+  }
+}
+
+static const char *getSparcAsmModeForCPU(StringRef Name,
+                                         const llvm::Triple &Triple) {
+  if (Triple.getArch() == llvm::Triple::sparcv9) {
+    return llvm::StringSwitch<const char *>(Name)
+          .Case("niagara", "-Av9b")
+          .Case("niagara2", "-Av9b")
+          .Case("niagara3", "-Av9d")
+          .Case("niagara4", "-Av9d")
+          .Default("-Av9");
+  } else {
+    return llvm::StringSwitch<const char *>(Name)
+          .Case("v8", "-Av8")
+          .Case("supersparc", "-Av8")
+          .Case("sparclite", "-Asparclite")
+          .Case("f934", "-Asparclite")
+          .Case("hypersparc", "-Av8")
+          .Case("sparclite86x", "-Asparclite")
+          .Case("sparclet", "-Asparclet")
+          .Case("tsc701", "-Asparclet")
+          .Case("v9", "-Av8plus")
+          .Case("ultrasparc", "-Av8plus")
+          .Case("ultrasparc3", "-Av8plus")
+          .Case("niagara", "-Av8plusb")
+          .Case("niagara2", "-Av8plusb")
+          .Case("niagara3", "-Av8plusd")
+          .Case("niagara4", "-Av8plusd")
+          .Default("-Av8");
   }
 }
 
@@ -199,13 +233,9 @@ static void AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs,
   Args.AddAllArgValues(CmdArgs, options::OPT_Zlinker_input);
 
   for (const auto &II : Inputs) {
-    if (!TC.HasNativeLLVMSupport()) {
+    if (!TC.HasNativeLLVMSupport() && types::isLLVMIR(II.getType()))
       // Don't try to pass LLVM inputs unless we have native support.
-      if (II.getType() == types::TY_LLVM_IR ||
-          II.getType() == types::TY_LTO_IR ||
-          II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
-        D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
-    }
+      D.Diag(diag::err_drv_no_linker_llvm_support) << TC.getTripleString();
 
     // Add filenames immediately.
     if (II.isFilename()) {
@@ -262,7 +292,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
                                     const Driver &D, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     const InputInfo &Output,
-                                    const InputInfoList &Inputs) const {
+                                    const InputInfoList &Inputs,
+                                    const ToolChain *AuxToolChain) const {
   Arg *A;
 
   CheckPreprocessingOptions(D, Args);
@@ -412,11 +443,11 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
     A->render(Args, CmdArgs);
   }
 
-  Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
-  Args.AddAllArgs(CmdArgs, options::OPT_I_Group, options::OPT_F,
-                  options::OPT_index_header_map);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_D, options::OPT_U, options::OPT_I_Group,
+                   options::OPT_F, options::OPT_index_header_map});
 
-  // Add -Wp, and -Xassembler if using the preprocessor.
+  // Add -Wp, and -Xpreprocessor if using the preprocessor.
 
   // FIXME: There is a very unfortunate problem here, some troubled
   // souls abuse -Wp, to pass preprocessor options in gcc syntax. To
@@ -454,12 +485,26 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
   // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++.
   addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH");
 
+  // Optional AuxToolChain indicates that we need to include headers
+  // for more than one target. If that's the case, add include paths
+  // from AuxToolChain right after include paths of the same kind for
+  // the current target.
+
   // Add C++ include arguments, if needed.
-  if (types::isCXX(Inputs[0].getType()))
+  if (types::isCXX(Inputs[0].getType())) {
     getToolChain().AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+    if (AuxToolChain)
+      AuxToolChain->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+  }
 
   // Add system include arguments.
   getToolChain().AddClangSystemIncludeArgs(Args, CmdArgs);
+  if (AuxToolChain)
+      AuxToolChain->AddClangCXXStdlibIncludeArgs(Args, CmdArgs);
+
+  // Add CUDA include arguments, if needed.
+  if (types::isCuda(Inputs[0].getType()))
+    getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
 }
 
 // FIXME: Move to target hook.
@@ -498,6 +543,8 @@ static bool isNoCommonDefault(const llvm::Triple &Triple) {
     return false;
 
   case llvm::Triple::xcore:
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
     return true;
   }
 }
@@ -507,13 +554,13 @@ static bool isNoCommonDefault(const llvm::Triple &Triple) {
 // Get SubArch (vN).
 static int getARMSubArchVersionNumber(const llvm::Triple &Triple) {
   llvm::StringRef Arch = Triple.getArchName();
-  return llvm::ARMTargetParser::parseArchVersion(Arch);
+  return llvm::ARM::parseArchVersion(Arch);
 }
 
 // True if M-profile.
 static bool isARMMProfile(const llvm::Triple &Triple) {
   llvm::StringRef Arch = Triple.getArchName();
-  unsigned Profile = llvm::ARMTargetParser::parseArchProfile(Arch);
+  unsigned Profile = llvm::ARM::parseArchProfile(Arch);
   return Profile == llvm::ARM::PK_M;
 }
 
@@ -542,19 +589,8 @@ static void getARMArchCPUFromArgs(const ArgList &Args, llvm::StringRef &Arch,
 static void getARMHWDivFeatures(const Driver &D, const Arg *A,
                                 const ArgList &Args, StringRef HWDiv,
                                 std::vector<const char *> &Features) {
-  if (HWDiv == "arm") {
-    Features.push_back("+hwdiv-arm");
-    Features.push_back("-hwdiv");
-  } else if (HWDiv == "thumb") {
-    Features.push_back("-hwdiv-arm");
-    Features.push_back("+hwdiv");
-  } else if (HWDiv == "arm,thumb" || HWDiv == "thumb,arm") {
-    Features.push_back("+hwdiv-arm");
-    Features.push_back("+hwdiv");
-  } else if (HWDiv == "none") {
-    Features.push_back("-hwdiv-arm");
-    Features.push_back("-hwdiv");
-  } else
+  unsigned HWDivID = llvm::ARM::parseHWDiv(HWDiv);
+  if (!llvm::ARM::getHWDivFeatures(HWDivID, Features))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
@@ -562,82 +598,125 @@ static void getARMHWDivFeatures(const Driver &D, const Arg *A,
 static void getARMFPUFeatures(const Driver &D, const Arg *A,
                               const ArgList &Args, StringRef FPU,
                               std::vector<const char *> &Features) {
-  unsigned FPUID = llvm::ARMTargetParser::parseFPU(FPU);
-  if (!llvm::ARMTargetParser::getFPUFeatures(FPUID, Features))
+  unsigned FPUID = llvm::ARM::parseFPU(FPU);
+  if (!llvm::ARM::getFPUFeatures(FPUID, Features))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
+// Decode ARM features from string like +[no]featureA+[no]featureB+...
+static bool DecodeARMFeatures(const Driver &D, StringRef text,
+                              std::vector<const char *> &Features) {
+  SmallVector<StringRef, 8> Split;
+  text.split(Split, StringRef("+"), -1, false);
+
+  for (StringRef Feature : Split) {
+    const char *FeatureName = llvm::ARM::getArchExtFeature(Feature);
+    if (FeatureName)
+      Features.push_back(FeatureName);
+    else
+      return false;
+  }
+  return true;
+}
+
 // Check if -march is valid by checking if it can be canonicalised and parsed.
 // getARMArch is used here instead of just checking the -march value in order
 // to handle -march=native correctly.
 static void checkARMArchName(const Driver &D, const Arg *A, const ArgList &Args,
                              llvm::StringRef ArchName,
+                             std::vector<const char *> &Features,
                              const llvm::Triple &Triple) {
+  std::pair<StringRef, StringRef> Split = ArchName.split("+");
+
   std::string MArch = arm::getARMArch(ArchName, Triple);
-  if (llvm::ARMTargetParser::parseArch(MArch) == llvm::ARM::AK_INVALID)
+  if (llvm::ARM::parseArch(MArch) == llvm::ARM::AK_INVALID ||
+      (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
 // Check -mcpu=. Needs ArchName to handle -mcpu=generic.
 static void checkARMCPUName(const Driver &D, const Arg *A, const ArgList &Args,
                             llvm::StringRef CPUName, llvm::StringRef ArchName,
+                            std::vector<const char *> &Features,
                             const llvm::Triple &Triple) {
+  std::pair<StringRef, StringRef> Split = CPUName.split("+");
+
   std::string CPU = arm::getARMTargetCPU(CPUName, ArchName, Triple);
-  std::string Arch = arm::getARMArch(ArchName, Triple);
-  if (strcmp(arm::getLLVMArchSuffixForARM(CPU, Arch), "") == 0)
+  if (arm::getLLVMArchSuffixForARM(CPU, ArchName, Triple).empty() ||
+      (Split.second.size() && !DecodeARMFeatures(D, Split.second, Features)))
     D.Diag(diag::err_drv_clang_unsupported) << A->getAsString(Args);
 }
 
+static bool useAAPCSForMachO(const llvm::Triple &T) {
+  // The backend is hardwired to assume AAPCS for M-class processors, ensure
+  // the frontend matches that.
+  return T.getEnvironment() == llvm::Triple::EABI ||
+         T.getOS() == llvm::Triple::UnknownOS || isARMMProfile(T);
+}
+
 // Select the float ABI as determined by -msoft-float, -mhard-float, and
 // -mfloat-abi=.
-StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
-                                     const llvm::Triple &Triple) {
-  StringRef FloatABI;
+arm::FloatABI arm::getARMFloatABI(const ToolChain &TC, const ArgList &Args) {
+  const Driver &D = TC.getDriver();
+  const llvm::Triple Triple(TC.ComputeEffectiveClangTriple(Args));
+  auto SubArch = getARMSubArchVersionNumber(Triple);
+  arm::FloatABI ABI = FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
-    if (A->getOption().matches(options::OPT_msoft_float))
-      FloatABI = "soft";
-    else if (A->getOption().matches(options::OPT_mhard_float))
-      FloatABI = "hard";
-    else {
-      FloatABI = A->getValue();
-      if (FloatABI != "soft" && FloatABI != "softfp" && FloatABI != "hard") {
+    if (A->getOption().matches(options::OPT_msoft_float)) {
+      ABI = FloatABI::Soft;
+    } else if (A->getOption().matches(options::OPT_mhard_float)) {
+      ABI = FloatABI::Hard;
+    } else {
+      ABI = llvm::StringSwitch<arm::FloatABI>(A->getValue())
+                .Case("soft", FloatABI::Soft)
+                .Case("softfp", FloatABI::SoftFP)
+                .Case("hard", FloatABI::Hard)
+                .Default(FloatABI::Invalid);
+      if (ABI == FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
-        FloatABI = "soft";
+        ABI = FloatABI::Soft;
       }
     }
+
+    // It is incorrect to select hard float ABI on MachO platforms if the ABI is
+    // "apcs-gnu".
+    if (Triple.isOSBinFormatMachO() && !useAAPCSForMachO(Triple) &&
+        ABI == FloatABI::Hard) {
+      D.Diag(diag::err_drv_unsupported_opt_for_target) << A->getAsString(Args)
+                                                       << Triple.getArchName();
+    }
   }
 
   // If unspecified, choose the default based on the platform.
-  if (FloatABI.empty()) {
+  if (ABI == FloatABI::Invalid) {
     switch (Triple.getOS()) {
     case llvm::Triple::Darwin:
     case llvm::Triple::MacOSX:
-    case llvm::Triple::IOS: {
+    case llvm::Triple::IOS:
+    case llvm::Triple::TvOS: {
       // Darwin defaults to "softfp" for v6 and v7.
-      //
-      if (getARMSubArchVersionNumber(Triple) == 6 ||
-          getARMSubArchVersionNumber(Triple) == 7)
-        FloatABI = "softfp";
-      else
-        FloatABI = "soft";
+      ABI = (SubArch == 6 || SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft;
       break;
     }
+    case llvm::Triple::WatchOS:
+      ABI = FloatABI::Hard;
+      break;
 
     // FIXME: this is invalid for WindowsCE
     case llvm::Triple::Win32:
-      FloatABI = "hard";
+      ABI = FloatABI::Hard;
       break;
 
     case llvm::Triple::FreeBSD:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
-        FloatABI = "hard";
+        ABI = FloatABI::Hard;
         break;
       default:
         // FreeBSD defaults to soft float
-        FloatABI = "soft";
+        ABI = FloatABI::Soft;
         break;
       }
       break;
@@ -645,28 +724,20 @@ StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
     default:
       switch (Triple.getEnvironment()) {
       case llvm::Triple::GNUEABIHF:
-        FloatABI = "hard";
+      case llvm::Triple::EABIHF:
+        ABI = FloatABI::Hard;
         break;
       case llvm::Triple::GNUEABI:
-        FloatABI = "softfp";
-        break;
-      case llvm::Triple::EABIHF:
-        FloatABI = "hard";
-        break;
       case llvm::Triple::EABI:
         // EABI is always AAPCS, and if it was not marked 'hard', it's softfp
-        FloatABI = "softfp";
+        ABI = FloatABI::SoftFP;
         break;
-      case llvm::Triple::Android: {
-        if (getARMSubArchVersionNumber(Triple) == 7)
-          FloatABI = "softfp";
-        else
-          FloatABI = "soft";
+      case llvm::Triple::Android:
+        ABI = (SubArch == 7) ? FloatABI::SoftFP : FloatABI::Soft;
         break;
-      }
       default:
         // Assume "soft", but warn the user we are guessing.
-        FloatABI = "soft";
+        ABI = FloatABI::Soft;
         if (Triple.getOS() != llvm::Triple::UnknownOS ||
             !Triple.isOSBinFormatMachO())
           D.Diag(diag::warn_drv_assuming_mfloat_abi_is) << "soft";
@@ -675,16 +746,20 @@ StringRef tools::arm::getARMFloatABI(const Driver &D, const ArgList &Args,
     }
   }
 
-  return FloatABI;
+  assert(ABI != FloatABI::Invalid && "must select an ABI");
+  return ABI;
 }
 
-static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+static void getARMTargetFeatures(const ToolChain &TC,
+                                 const llvm::Triple &Triple,
                                  const ArgList &Args,
                                  std::vector<const char *> &Features,
                                  bool ForAS) {
+  const Driver &D = TC.getDriver();
+
   bool KernelOrKext =
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
-  StringRef FloatABI = tools::arm::getARMFloatABI(D, Args, Triple);
+  arm::FloatABI ABI = arm::getARMFloatABI(TC, Args);
   const Arg *WaCPU = nullptr, *WaFPU = nullptr;
   const Arg *WaHDiv = nullptr, *WaArch = nullptr;
 
@@ -702,11 +777,11 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     // assembler and the frontend behave the same.
 
     // Use software floating point operations?
-    if (FloatABI == "soft")
+    if (ABI == arm::FloatABI::Soft)
       Features.push_back("+soft-float");
 
     // Use software floating point argument passing?
-    if (FloatABI != "hard")
+    if (ABI != arm::FloatABI::Hard)
       Features.push_back("+soft-float-abi");
   } else {
     // Here, we make sure that -Wa,-mfpu/cpu/arch/hwdiv will be passed down
@@ -726,6 +801,45 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     }
   }
 
+  // Check -march. ClangAs gives preference to -Wa,-march=.
+  const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ);
+  StringRef ArchName;
+  if (WaArch) {
+    if (ArchArg)
+      D.Diag(clang::diag::warn_drv_unused_argument)
+          << ArchArg->getAsString(Args);
+    ArchName = StringRef(WaArch->getValue()).substr(7);
+    checkARMArchName(D, WaArch, Args, ArchName, Features, Triple);
+    // FIXME: Set Arch.
+    D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
+  } else if (ArchArg) {
+    ArchName = ArchArg->getValue();
+    checkARMArchName(D, ArchArg, Args, ArchName, Features, Triple);
+  }
+
+  // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=.
+  const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
+  StringRef CPUName;
+  if (WaCPU) {
+    if (CPUArg)
+      D.Diag(clang::diag::warn_drv_unused_argument)
+          << CPUArg->getAsString(Args);
+    CPUName = StringRef(WaCPU->getValue()).substr(6);
+    checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Features, Triple);
+  } else if (CPUArg) {
+    CPUName = CPUArg->getValue();
+    checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Features, Triple);
+  }
+
+  // Add CPU features for generic CPUs
+  if (CPUName == "native") {
+    llvm::StringMap<bool> HostFeatures;
+    if (llvm::sys::getHostCPUFeatures(HostFeatures))
+      for (auto &F : HostFeatures)
+        Features.push_back(
+            Args.MakeArgString((F.second ? "+" : "-") + F.first()));
+  }
+
   // Honor -mfpu=. ClangAs gives preference to -Wa,-mfpu=.
   const Arg *FPUArg = Args.getLastArg(options::OPT_mfpu_EQ);
   if (WaFPU) {
@@ -749,39 +863,9 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   } else if (HDivArg)
     getARMHWDivFeatures(D, HDivArg, Args, HDivArg->getValue(), Features);
 
-  // Check -march. ClangAs gives preference to -Wa,-march=.
-  const Arg *ArchArg = Args.getLastArg(options::OPT_march_EQ);
-  StringRef ArchName;
-  if (WaArch) {
-    if (ArchArg)
-      D.Diag(clang::diag::warn_drv_unused_argument)
-          << ArchArg->getAsString(Args);
-    ArchName = StringRef(WaArch->getValue()).substr(7);
-    checkARMArchName(D, WaArch, Args, ArchName, Triple);
-    // FIXME: Set Arch.
-    D.Diag(clang::diag::warn_drv_unused_argument) << WaArch->getAsString(Args);
-  } else if (ArchArg) {
-    ArchName = ArchArg->getValue();
-    checkARMArchName(D, ArchArg, Args, ArchName, Triple);
-  }
-
-  // Check -mcpu. ClangAs gives preference to -Wa,-mcpu=.
-  const Arg *CPUArg = Args.getLastArg(options::OPT_mcpu_EQ);
-  StringRef CPUName;
-  if (WaCPU) {
-    if (CPUArg)
-      D.Diag(clang::diag::warn_drv_unused_argument)
-          << CPUArg->getAsString(Args);
-    CPUName = StringRef(WaCPU->getValue()).substr(6);
-    checkARMCPUName(D, WaCPU, Args, CPUName, ArchName, Triple);
-  } else if (CPUArg) {
-    CPUName = CPUArg->getValue();
-    checkARMCPUName(D, CPUArg, Args, CPUName, ArchName, Triple);
-  }
-
   // Setting -msoft-float effectively disables NEON because of the GCC
   // implementation, although the same isn't true of VFP or VFP3.
-  if (FloatABI == "soft") {
+  if (ABI == arm::FloatABI::Soft) {
     Features.push_back("-neon");
     // Also need to explicitly disable features which imply NEON.
     Features.push_back("-crypto");
@@ -806,31 +890,72 @@ static void getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                options::OPT_mno_long_calls)) {
     if (A->getOption().matches(options::OPT_mlong_calls))
       Features.push_back("+long-calls");
-  } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6))) {
+  } else if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)) &&
+             !Triple.isWatchOS()) {
       Features.push_back("+long-calls");
   }
+
+  // Kernel code has more strict alignment requirements.
+  if (KernelOrKext)
+    Features.push_back("+strict-align");
+  else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
+                                    options::OPT_munaligned_access)) {
+    if (A->getOption().matches(options::OPT_munaligned_access)) {
+      // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
+      if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
+        D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
+    } else
+      Features.push_back("+strict-align");
+  } else {
+    // Assume pre-ARMv6 doesn't support unaligned accesses.
+    //
+    // ARMv6 may or may not support unaligned accesses depending on the
+    // SCTLR.U bit, which is architecture-specific. We assume ARMv6
+    // Darwin and NetBSD targets support unaligned accesses, and others don't.
+    //
+    // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit
+    // which raises an alignment fault on unaligned accesses. Linux
+    // defaults this bit to 0 and handles it as a system-wide (not
+    // per-process) setting. It is therefore safe to assume that ARMv7+
+    // Linux targets support unaligned accesses. The same goes for NaCl.
+    //
+    // The above behavior is consistent with GCC.
+    int VersionNum = getARMSubArchVersionNumber(Triple);
+    if (Triple.isOSDarwin() || Triple.isOSNetBSD()) {
+      if (VersionNum < 6 ||
+          Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
+        Features.push_back("+strict-align");
+    } else if (Triple.isOSLinux() || Triple.isOSNaCl()) {
+      if (VersionNum < 7)
+        Features.push_back("+strict-align");
+    } else
+      Features.push_back("+strict-align");
+  }
+
+  // llvm does not support reserving registers in general. There is support
+  // for reserving r9 on ARM though (defined as a platform-specific register
+  // in ARM EABI).
+  if (Args.hasArg(options::OPT_ffixed_r9))
+    Features.push_back("+reserve-r9");
+
+  // The kext linker doesn't know how to deal with movw/movt.
+  if (KernelOrKext)
+    Features.push_back("+no-movt");
 }
 
-void Clang::AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
-                             bool KernelOrKext) const {
-  const Driver &D = getToolChain().getDriver();
-  // Get the effective triple, which takes into account the deployment target.
-  std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
-  llvm::Triple Triple(TripleStr);
-
+void Clang::AddARMTargetArgs(const llvm::Triple &Triple, const ArgList &Args,
+                             ArgStringList &CmdArgs, bool KernelOrKext) const {
   // Select the ABI to use.
-  //
   // FIXME: Support -meabi.
   // FIXME: Parts of this are duplicated in the backend, unify this somehow.
   const char *ABIName = nullptr;
   if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
     ABIName = A->getValue();
   } else if (Triple.isOSBinFormatMachO()) {
-    // The backend is hardwired to assume AAPCS for M-class processors, ensure
-    // the frontend matches that.
-    if (Triple.getEnvironment() == llvm::Triple::EABI ||
-        Triple.getOS() == llvm::Triple::UnknownOS || isARMMProfile(Triple)) {
+    if (useAAPCSForMachO(Triple)) {
       ABIName = "aapcs";
+    } else if (Triple.isWatchOS()) {
+      ABIName = "aapcs16";
     } else {
       ABIName = "apcs-gnu";
     }
@@ -861,50 +986,24 @@ void Clang::AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
   CmdArgs.push_back(ABIName);
 
   // Determine floating point ABI from the options & target defaults.
-  StringRef FloatABI = tools::arm::getARMFloatABI(D, Args, Triple);
-  if (FloatABI == "soft") {
+  arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
+  if (ABI == arm::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
-    //
     // FIXME: This changes CPP defines, we need -target-soft-float.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
-  } else if (FloatABI == "softfp") {
+  } else if (ABI == arm::FloatABI::SoftFP) {
     // Floating point operations are hard, but argument passing is soft.
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
-    assert(FloatABI == "hard" && "Invalid float abi!");
+    assert(ABI == arm::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
 
-  // Kernel code has more strict alignment requirements.
-  if (KernelOrKext) {
-    CmdArgs.push_back("-backend-option");
-    CmdArgs.push_back("-arm-strict-align");
-
-    // The kext linker doesn't know how to deal with movw/movt.
-    CmdArgs.push_back("-backend-option");
-    CmdArgs.push_back("-arm-use-movt=0");
-  }
-
-  // -mkernel implies -mstrict-align; don't add the redundant option.
-  if (!KernelOrKext) {
-    if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                                 options::OPT_munaligned_access)) {
-      CmdArgs.push_back("-backend-option");
-      if (A->getOption().matches(options::OPT_mno_unaligned_access))
-        CmdArgs.push_back("-arm-strict-align");
-      else {
-        if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
-          D.Diag(diag::err_target_unsupported_unaligned) << "v6m";
-        CmdArgs.push_back("-arm-no-strict-align");
-      }
-    }
-  }
-
   // Forward the -mglobal-merge option for explicit control over the pass.
   if (Arg *A = Args.getLastArg(options::OPT_mglobal_merge,
                                options::OPT_mno_global_merge)) {
@@ -918,14 +1017,6 @@ void Clang::AddARMTargetArgs(const ArgList &Args, ArgStringList &CmdArgs,
   if (!Args.hasFlag(options::OPT_mimplicit_float,
                     options::OPT_mno_implicit_float, true))
     CmdArgs.push_back("-no-implicit-float");
-
-  // llvm does not support reserving registers in general. There is support
-  // for reserving r9 on ARM though (defined as a platform-specific register
-  // in ARM EABI).
-  if (Args.hasArg(options::OPT_ffixed_r9)) {
-    CmdArgs.push_back("-backend-option");
-    CmdArgs.push_back("-arm-reserve-r9");
-  }
 }
 // ARM tools end.
 
@@ -936,7 +1027,7 @@ static std::string getAArch64TargetCPU(const ArgList &Args) {
   std::string CPU;
   // If we have -mtune or -mcpu, use that.
   if ((A = Args.getLastArg(options::OPT_mtune_EQ))) {
-    CPU = A->getValue();
+    CPU = StringRef(A->getValue()).lower();
   } else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) {
     StringRef Mcpu = A->getValue();
     CPU = Mcpu.split("+").first.lower();
@@ -981,15 +1072,6 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName);
 
-  if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                               options::OPT_munaligned_access)) {
-    CmdArgs.push_back("-backend-option");
-    if (A->getOption().matches(options::OPT_mno_unaligned_access))
-      CmdArgs.push_back("-aarch64-strict-align");
-    else
-      CmdArgs.push_back("-aarch64-no-strict-align");
-  }
-
   if (Arg *A = Args.getLastArg(options::OPT_mfix_cortex_a53_835769,
                                options::OPT_mno_fix_cortex_a53_835769)) {
     CmdArgs.push_back("-backend-option");
@@ -997,7 +1079,7 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
       CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
     else
       CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=0");
-  } else if (Triple.getEnvironment() == llvm::Triple::Android) {
+  } else if (Triple.isAndroid()) {
     // Enabled A53 errata (835769) workaround by default on android
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-aarch64-fix-cortex-a53-835769=1");
@@ -1012,11 +1094,6 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
     else
       CmdArgs.push_back("-aarch64-global-merge=true");
   }
-
-  if (Args.hasArg(options::OPT_ffixed_x18)) {
-    CmdArgs.push_back("-backend-option");
-    CmdArgs.push_back("-aarch64-reserve-x18");
-  }
 }
 
 // Get CPU and ABI names. They are not independent
@@ -1034,6 +1111,10 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
     DefMips64CPU = "mips64r6";
   }
 
+  // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android).
+  if (Triple.isAndroid())
+    DefMips64CPU = "mips64r6";
+
   // MIPS3 is the default for mips64*-unknown-openbsd.
   if (Triple.getOS() == llvm::Triple::OpenBSD)
     DefMips64CPU = "mips3";
@@ -1087,6 +1168,16 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple,
   // FIXME: Warn on inconsistent use of -march and -mabi.
 }
 
+std::string mips::getMipsABILibSuffix(const ArgList &Args,
+                                      const llvm::Triple &Triple) {
+  StringRef CPUName, ABIName;
+  tools::mips::getMipsCPUAndABI(Args, Triple, CPUName, ABIName);
+  return llvm::StringSwitch<std::string>(ABIName)
+      .Case("o32", "")
+      .Case("n32", "32")
+      .Case("n64", "64");
+}
+
 // Convert ABI name to the GNU tools acceptable variant.
 static StringRef getGnuCompatibleMipsABIName(StringRef ABI) {
   return llvm::StringSwitch<llvm::StringRef>(ABI)
@@ -1097,33 +1188,37 @@ static StringRef getGnuCompatibleMipsABIName(StringRef ABI) {
 
 // Select the MIPS float ABI as determined by -msoft-float, -mhard-float,
 // and -mfloat-abi=.
-static StringRef getMipsFloatABI(const Driver &D, const ArgList &Args) {
-  StringRef FloatABI;
+static mips::FloatABI getMipsFloatABI(const Driver &D, const ArgList &Args) {
+  mips::FloatABI ABI = mips::FloatABI::Invalid;
   if (Arg *A =
           Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
                           options::OPT_mfloat_abi_EQ)) {
     if (A->getOption().matches(options::OPT_msoft_float))
-      FloatABI = "soft";
+      ABI = mips::FloatABI::Soft;
     else if (A->getOption().matches(options::OPT_mhard_float))
-      FloatABI = "hard";
+      ABI = mips::FloatABI::Hard;
     else {
-      FloatABI = A->getValue();
-      if (FloatABI != "soft" && FloatABI != "hard") {
+      ABI = llvm::StringSwitch<mips::FloatABI>(A->getValue())
+                .Case("soft", mips::FloatABI::Soft)
+                .Case("hard", mips::FloatABI::Hard)
+                .Default(mips::FloatABI::Invalid);
+      if (ABI == mips::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
         D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
-        FloatABI = "hard";
+        ABI = mips::FloatABI::Hard;
       }
     }
   }
 
   // If unspecified, choose the default based on the platform.
-  if (FloatABI.empty()) {
+  if (ABI == mips::FloatABI::Invalid) {
     // Assume "hard", because it's a default value used by gcc.
     // When we start to recognize specific target MIPS processors,
     // we will be able to select the default more correctly.
-    FloatABI = "hard";
+    ABI = mips::FloatABI::Hard;
   }
 
-  return FloatABI;
+  assert(ABI != mips::FloatABI::Invalid && "must select an ABI");
+  return ABI;
 }
 
 static void AddTargetFeature(const ArgList &Args,
@@ -1149,8 +1244,8 @@ static void getMIPSTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   AddTargetFeature(Args, Features, options::OPT_mno_abicalls,
                    options::OPT_mabicalls, "noabicalls");
 
-  StringRef FloatABI = getMipsFloatABI(D, Args);
-  if (FloatABI == "soft") {
+  mips::FloatABI FloatABI = getMipsFloatABI(D, Args);
+  if (FloatABI == mips::FloatABI::Soft) {
     // FIXME: Note, this is a hack. We need to pass the selected float
     // mode to the MipsTargetInfoBase to define appropriate macros there.
     // Now it is the only method.
@@ -1222,16 +1317,15 @@ void Clang::AddMIPSTargetArgs(const ArgList &Args,
   CmdArgs.push_back("-target-abi");
   CmdArgs.push_back(ABIName.data());
 
-  StringRef FloatABI = getMipsFloatABI(D, Args);
-
-  if (FloatABI == "soft") {
+  mips::FloatABI ABI = getMipsFloatABI(D, Args);
+  if (ABI == mips::FloatABI::Soft) {
     // Floating point operations and argument passing are soft.
     CmdArgs.push_back("-msoft-float");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("soft");
   } else {
     // Floating point operations and argument passing are hard.
-    assert(FloatABI == "hard" && "Invalid float abi!");
+    assert(ABI == mips::FloatABI::Hard && "Invalid float abi!");
     CmdArgs.push_back("-mfloat-abi");
     CmdArgs.push_back("hard");
   }
@@ -1331,35 +1425,56 @@ static std::string getPPCTargetCPU(const ArgList &Args) {
   return "";
 }
 
-static void getPPCTargetFeatures(const ArgList &Args,
+static void getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+                                 const ArgList &Args,
                                  std::vector<const char *> &Features) {
-  for (const Arg *A : Args.filtered(options::OPT_m_ppc_Features_Group)) {
-    StringRef Name = A->getOption().getName();
-    A->claim();
+  handleTargetFeaturesGroup(Args, Features, options::OPT_m_ppc_Features_Group);
 
-    // Skip over "-m".
-    assert(Name.startswith("m") && "Invalid feature name.");
-    Name = Name.substr(1);
-
-    bool IsNegative = Name.startswith("no-");
-    if (IsNegative)
-      Name = Name.substr(3);
-
-    // Note that gcc calls this mfcrf and LLVM calls this mfocrf so we
-    // pass the correct option to the backend while calling the frontend
-    // option the same.
-    // TODO: Change the LLVM backend option maybe?
-    if (Name == "mfcrf")
-      Name = "mfocrf";
-
-    Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
-  }
+  ppc::FloatABI FloatABI = ppc::getPPCFloatABI(D, Args);
+  if (FloatABI == ppc::FloatABI::Soft &&
+      !(Triple.getArch() == llvm::Triple::ppc64 ||
+        Triple.getArch() == llvm::Triple::ppc64le))
+    Features.push_back("+soft-float");
+  else if (FloatABI == ppc::FloatABI::Soft &&
+           (Triple.getArch() == llvm::Triple::ppc64 ||
+            Triple.getArch() == llvm::Triple::ppc64le))
+    D.Diag(diag::err_drv_invalid_mfloat_abi)
+        << "soft float is not supported for ppc64";
 
   // Altivec is a bit weird, allow overriding of the Altivec feature here.
   AddTargetFeature(Args, Features, options::OPT_faltivec,
                    options::OPT_fno_altivec, "altivec");
 }
 
+ppc::FloatABI ppc::getPPCFloatABI(const Driver &D, const ArgList &Args) {
+  ppc::FloatABI ABI = ppc::FloatABI::Invalid;
+  if (Arg *A =
+          Args.getLastArg(options::OPT_msoft_float, options::OPT_mhard_float,
+                          options::OPT_mfloat_abi_EQ)) {
+    if (A->getOption().matches(options::OPT_msoft_float))
+      ABI = ppc::FloatABI::Soft;
+    else if (A->getOption().matches(options::OPT_mhard_float))
+      ABI = ppc::FloatABI::Hard;
+    else {
+      ABI = llvm::StringSwitch<ppc::FloatABI>(A->getValue())
+                .Case("soft", ppc::FloatABI::Soft)
+                .Case("hard", ppc::FloatABI::Hard)
+                .Default(ppc::FloatABI::Invalid);
+      if (ABI == ppc::FloatABI::Invalid && !StringRef(A->getValue()).empty()) {
+        D.Diag(diag::err_drv_invalid_mfloat_abi) << A->getAsString(Args);
+        ABI = ppc::FloatABI::Hard;
+      }
+    }
+  }
+
+  // If unspecified, choose the default based on the platform.
+  if (ABI == ppc::FloatABI::Invalid) {
+    ABI = ppc::FloatABI::Hard;
+  }
+
+  return ABI;
+}
+
 void Clang::AddPPCTargetArgs(const ArgList &Args,
                              ArgStringList &CmdArgs) const {
   // Select the ABI to use.
@@ -1396,6 +1511,21 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
     if (StringRef(A->getValue()) != "altivec")
       ABIName = A->getValue();
 
+  ppc::FloatABI FloatABI =
+      ppc::getPPCFloatABI(getToolChain().getDriver(), Args);
+
+  if (FloatABI == ppc::FloatABI::Soft) {
+    // Floating point operations and argument passing are soft.
+    CmdArgs.push_back("-msoft-float");
+    CmdArgs.push_back("-mfloat-abi");
+    CmdArgs.push_back("soft");
+  } else {
+    // Floating point operations and argument passing are hard.
+    assert(FloatABI == ppc::FloatABI::Hard && "Invalid float abi!");
+    CmdArgs.push_back("-mfloat-abi");
+    CmdArgs.push_back("hard");
+  }
+
   if (ABIName) {
     CmdArgs.push_back("-target-abi");
     CmdArgs.push_back(ABIName);
@@ -1533,7 +1663,7 @@ static const char *getX86TargetCPU(const ArgList &Args,
     return "btver2";
 
   // On Android use targets compatible with gcc
-  if (Triple.getEnvironment() == llvm::Triple::Android)
+  if (Triple.isAndroid())
     return Is64Bit ? "x86-64" : "i686";
 
   // Everything else goes to x86-64 in 64-bit mode.
@@ -1555,6 +1685,25 @@ static const char *getX86TargetCPU(const ArgList &Args,
   }
 }
 
+/// Get the (LLVM) name of the WebAssembly cpu we are targeting.
+static StringRef getWebAssemblyTargetCPU(const ArgList &Args) {
+  // If we have -mcpu=, use that.
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef CPU = A->getValue();
+
+#ifdef __wasm__
+    // Handle "native" by examining the host. "native" isn't meaningful when
+    // cross compiling, so only support this when the host is also WebAssembly.
+    if (CPU == "native")
+      return llvm::sys::getHostCPUName();
+#endif
+
+    return CPU;
+  }
+
+  return "generic";
+}
+
 static std::string getCPUName(const ArgList &Args, const llvm::Triple &T,
                               bool FromAs = false) {
   switch (T.getArch()) {
@@ -1619,7 +1768,8 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T,
     return getX86TargetCPU(Args, T);
 
   case llvm::Triple::hexagon:
-    return "hexagon" + toolchains::Hexagon_TC::GetTargetCPU(Args).str();
+    return "hexagon" +
+           toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
 
   case llvm::Triple::systemz:
     return getSystemZTargetCPU(Args);
@@ -1627,11 +1777,15 @@ static std::string getCPUName(const ArgList &Args, const llvm::Triple &T,
   case llvm::Triple::r600:
   case llvm::Triple::amdgcn:
     return getR600TargetGPU(Args);
+
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    return getWebAssemblyTargetCPU(Args);
   }
 }
 
 static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
-                          ArgStringList &CmdArgs) {
+                          ArgStringList &CmdArgs, bool IsThinLTO) {
   // Tell the linker to load the plugin. This has to come before AddLinkerInputs
   // as gold requires -plugin to come before any -plugin-opt that -Wl might
   // forward.
@@ -1647,13 +1801,29 @@ static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
   std::string CPU = getCPUName(Args, ToolChain.getTriple());
   if (!CPU.empty())
     CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));
+
+  if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
+    StringRef OOpt;
+    if (A->getOption().matches(options::OPT_O4) ||
+        A->getOption().matches(options::OPT_Ofast))
+      OOpt = "3";
+    else if (A->getOption().matches(options::OPT_O))
+      OOpt = A->getValue();
+    else if (A->getOption().matches(options::OPT_O0))
+      OOpt = "0";
+    if (!OOpt.empty())
+      CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=O") + OOpt));
+  }
+
+  if (IsThinLTO)
+    CmdArgs.push_back("-plugin-opt=thinlto");
 }
 
 /// This is a helper function for validating the optional refinement step
 /// parameter in reciprocal argument strings. Return false if there is an error
 /// parsing the refinement step. Otherwise, return true and set the Position
 /// of the refinement step in the input string.
-static bool getRefinementStep(const StringRef &In, const Driver &D,
+static bool getRefinementStep(StringRef In, const Driver &D,
                               const Arg &A, size_t &Position) {
   const char RefinementStepToken = ':';
   Position = In.find(RefinementStepToken);
@@ -1807,7 +1977,7 @@ static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
 
   const llvm::Triple::ArchType ArchType = Triple.getArch();
   // Add features to be compatible with gcc for Android.
-  if (Triple.getEnvironment() == llvm::Triple::Android) {
+  if (Triple.isAndroid()) {
     if (ArchType == llvm::Triple::x86_64) {
       Features.push_back("+sse4.2");
       Features.push_back("+popcnt");
@@ -1841,20 +2011,7 @@ static void getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
 
   // Now add any that the user explicitly requested on the command line,
   // which may override the defaults.
-  for (const Arg *A : Args.filtered(options::OPT_m_x86_Features_Group)) {
-    StringRef Name = A->getOption().getName();
-    A->claim();
-
-    // Skip over "-m".
-    assert(Name.startswith("m") && "Invalid feature name.");
-    Name = Name.substr(1);
-
-    bool IsNegative = Name.startswith("no-");
-    if (IsNegative)
-      Name = Name.substr(3);
-
-    Features.push_back(Args.MakeArgString((IsNegative ? "-" : "+") + Name));
-  }
+  handleTargetFeaturesGroup(Args, Features, options::OPT_m_x86_Features_Group);
 }
 
 void Clang::AddX86TargetArgs(const ArgList &Args,
@@ -1895,11 +2052,11 @@ void Clang::AddHexagonTargetArgs(const ArgList &Args,
   CmdArgs.push_back("-mqdsp6-compat");
   CmdArgs.push_back("-Wreturn-type");
 
-  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
-    std::string SmallDataThreshold = "-hexagon-small-data-threshold=";
-    SmallDataThreshold += v;
+  if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
+    std::string N = llvm::utostr(G.getValue());
+    std::string Opt = std::string("-hexagon-small-data-threshold=") + N;
     CmdArgs.push_back("-mllvm");
-    CmdArgs.push_back(Args.MakeArgString(SmallDataThreshold));
+    CmdArgs.push_back(Args.MakeArgString(Opt));
   }
 
   if (!Args.hasArg(options::OPT_fno_short_enums))
@@ -1918,16 +2075,20 @@ static bool DecodeAArch64Features(const Driver &D, StringRef text,
   SmallVector<StringRef, 8> Split;
   text.split(Split, StringRef("+"), -1, false);
 
-  for (const StringRef Feature : Split) {
+  for (StringRef Feature : Split) {
     const char *result = llvm::StringSwitch<const char *>(Feature)
                              .Case("fp", "+fp-armv8")
                              .Case("simd", "+neon")
                              .Case("crc", "+crc")
                              .Case("crypto", "+crypto")
+                             .Case("fp16", "+fullfp16")
+                             .Case("profile", "+spe")
                              .Case("nofp", "-fp-armv8")
                              .Case("nosimd", "-neon")
                              .Case("nocrc", "-crc")
                              .Case("nocrypto", "-crypto")
+                             .Case("nofp16", "-fullfp16")
+                             .Case("noprofile", "-spe")
                              .Default(nullptr);
     if (result)
       Features.push_back(result);
@@ -1946,7 +2107,7 @@ static bool DecodeAArch64Mcpu(const Driver &D, StringRef Mcpu, StringRef &CPU,
   std::pair<StringRef, StringRef> Split = Mcpu.split("+");
   CPU = Split.first;
   if (CPU == "cyclone" || CPU == "cortex-a53" || CPU == "cortex-a57" ||
-      CPU == "cortex-a72") {
+      CPU == "cortex-a72" || CPU == "cortex-a35") {
     Features.push_back("+neon");
     Features.push_back("+crc");
     Features.push_back("+crypto");
@@ -1973,6 +2134,8 @@ getAArch64ArchFeaturesFromMarch(const Driver &D, StringRef March,
     // ok, no additional features.
   } else if (Split.first == "armv8.1-a" || Split.first == "armv8.1a") {
     Features.push_back("+v8.1a");
+  } else if (Split.first == "armv8.2-a" || Split.first == "armv8.2a" ) {
+    Features.push_back("+v8.2a");
   } else {
     return false;
   }
@@ -1999,10 +2162,11 @@ static bool
 getAArch64MicroArchFeaturesFromMtune(const Driver &D, StringRef Mtune,
                                      const ArgList &Args,
                                      std::vector<const char *> &Features) {
+  std::string MtuneLowerCase = Mtune.lower();
   // Handle CPU name is 'native'.
-  if (Mtune == "native")
-    Mtune = llvm::sys::getHostCPUName();
-  if (Mtune == "cyclone") {
+  if (MtuneLowerCase == "native")
+    MtuneLowerCase = llvm::sys::getHostCPUName();
+  if (MtuneLowerCase == "cyclone") {
     Features.push_back("+zcm");
     Features.push_back("+zcz");
   }
@@ -2062,11 +2226,51 @@ static void getAArch64TargetFeatures(const Driver &D, const ArgList &Args,
     else
       Features.push_back("-crc");
   }
+
+  if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
+                               options::OPT_munaligned_access))
+    if (A->getOption().matches(options::OPT_mno_unaligned_access))
+      Features.push_back("+strict-align");
+
+  if (Args.hasArg(options::OPT_ffixed_x18))
+    Features.push_back("+reserve-x18");
 }
 
-static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
+static void getHexagonTargetFeatures(const ArgList &Args,
+                                     std::vector<const char *> &Features) {
+  bool HasHVX = false, HasHVXD = false;
+
+  // FIXME: This should be able to use handleTargetFeaturesGroup except it is
+  // doing dependent option handling here rather than in initFeatureMap or a
+  // similar handler.
+  for (auto &A : Args) {
+    auto &Opt = A->getOption();
+    if (Opt.matches(options::OPT_mhexagon_hvx))
+      HasHVX = true;
+    else if (Opt.matches(options::OPT_mno_hexagon_hvx))
+      HasHVXD = HasHVX = false;
+    else if (Opt.matches(options::OPT_mhexagon_hvx_double))
+      HasHVXD = HasHVX = true;
+    else if (Opt.matches(options::OPT_mno_hexagon_hvx_double))
+      HasHVXD = false;
+    else
+      continue;
+    A->claim();
+  }
+
+  Features.push_back(HasHVX  ? "+hvx" : "-hvx");
+  Features.push_back(HasHVXD ? "+hvx-double" : "-hvx-double");
+}
+
+static void getWebAssemblyTargetFeatures(const ArgList &Args,
+                                         std::vector<const char *> &Features) {
+  handleTargetFeaturesGroup(Args, Features, options::OPT_m_wasm_Features_Group);
+}
+
+static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
                               const ArgList &Args, ArgStringList &CmdArgs,
                               bool ForAS) {
+  const Driver &D = TC.getDriver();
   std::vector<const char *> Features;
   switch (Triple.getArch()) {
   default:
@@ -2082,13 +2286,13 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-    getARMTargetFeatures(D, Triple, Args, Features, ForAS);
+    getARMTargetFeatures(TC, Triple, Args, Features, ForAS);
     break;
 
   case llvm::Triple::ppc:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
-    getPPCTargetFeatures(Args, Features);
+    getPPCTargetFeatures(D, Triple, Args, Features);
     break;
   case llvm::Triple::systemz:
     getSystemZTargetFeatures(Args, Features);
@@ -2101,6 +2305,13 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case llvm::Triple::x86_64:
     getX86TargetFeatures(D, Triple, Args, Features);
     break;
+  case llvm::Triple::hexagon:
+    getHexagonTargetFeatures(Args, Features);
+    break;
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    getWebAssemblyTargetFeatures(Args, Features);
+    break;
   }
 
   // Find the last of each feature.
@@ -2272,6 +2483,67 @@ static bool UseRelaxAll(Compilation &C, const ArgList &Args) {
                       RelaxDefault);
 }
 
+// Convert an arg of the form "-gN" or "-ggdbN" or one of their aliases
+// to the corresponding DebugInfoKind.
+static CodeGenOptions::DebugInfoKind DebugLevelToInfoKind(const Arg &A) {
+  assert(A.getOption().matches(options::OPT_gN_Group) &&
+         "Not a -g option that specifies a debug-info level");
+  if (A.getOption().matches(options::OPT_g0) ||
+      A.getOption().matches(options::OPT_ggdb0))
+    return CodeGenOptions::NoDebugInfo;
+  if (A.getOption().matches(options::OPT_gline_tables_only) ||
+      A.getOption().matches(options::OPT_ggdb1))
+    return CodeGenOptions::DebugLineTablesOnly;
+  return CodeGenOptions::LimitedDebugInfo;
+}
+
+// Extract the integer N from a string spelled "-dwarf-N", returning 0
+// on mismatch. The StringRef input (rather than an Arg) allows
+// for use by the "-Xassembler" option parser.
+static unsigned DwarfVersionNum(StringRef ArgValue) {
+  return llvm::StringSwitch<unsigned>(ArgValue)
+      .Case("-gdwarf-2", 2)
+      .Case("-gdwarf-3", 3)
+      .Case("-gdwarf-4", 4)
+      .Case("-gdwarf-5", 5)
+      .Default(0);
+}
+
+static void RenderDebugEnablingArgs(const ArgList &Args, ArgStringList &CmdArgs,
+                                    CodeGenOptions::DebugInfoKind DebugInfoKind,
+                                    unsigned DwarfVersion,
+                                    llvm::DebuggerKind DebuggerTuning) {
+  switch (DebugInfoKind) {
+  case CodeGenOptions::DebugLineTablesOnly:
+    CmdArgs.push_back("-debug-info-kind=line-tables-only");
+    break;
+  case CodeGenOptions::LimitedDebugInfo:
+    CmdArgs.push_back("-debug-info-kind=limited");
+    break;
+  case CodeGenOptions::FullDebugInfo:
+    CmdArgs.push_back("-debug-info-kind=standalone");
+    break;
+  default:
+    break;
+  }
+  if (DwarfVersion > 0)
+    CmdArgs.push_back(
+        Args.MakeArgString("-dwarf-version=" + Twine(DwarfVersion)));
+  switch (DebuggerTuning) {
+  case llvm::DebuggerKind::GDB:
+    CmdArgs.push_back("-debugger-tuning=gdb");
+    break;
+  case llvm::DebuggerKind::LLDB:
+    CmdArgs.push_back("-debugger-tuning=lldb");
+    break;
+  case llvm::DebuggerKind::SCE:
+    CmdArgs.push_back("-debugger-tuning=sce");
+    break;
+  default:
+    break;
+  }
+}
+
 static void CollectArgsForIntegratedAssembler(Compilation &C,
                                               const ArgList &Args,
                                               ArgStringList &CmdArgs,
@@ -2279,6 +2551,15 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
   if (UseRelaxAll(C, Args))
     CmdArgs.push_back("-mrelax-all");
 
+  // Only default to -mincremental-linker-compatible if we think we are
+  // targeting the MSVC linker.
+  bool DefaultIncrementalLinkerCompatible =
+      C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
+  if (Args.hasFlag(options::OPT_mincremental_linker_compatible,
+                   options::OPT_mno_incremental_linker_compatible,
+                   DefaultIncrementalLinkerCompatible))
+    CmdArgs.push_back("-mincremental-linker-compatible");
+
   // When passing -I arguments to the assembler we sometimes need to
   // unconditionally take the next argument.  For example, when parsing
   // '-Wa,-I -Wa,foo' we need to accept the -Wa,foo arg after seeing the
@@ -2293,13 +2574,43 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
        Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) {
     A->claim();
 
-    for (const StringRef Value : A->getValues()) {
+    for (StringRef Value : A->getValues()) {
       if (TakeNextArg) {
         CmdArgs.push_back(Value.data());
         TakeNextArg = false;
         continue;
       }
 
+      switch (C.getDefaultToolChain().getArch()) {
+      default:
+        break;
+      case llvm::Triple::mips:
+      case llvm::Triple::mipsel:
+      case llvm::Triple::mips64:
+      case llvm::Triple::mips64el:
+        if (Value == "--trap") {
+          CmdArgs.push_back("-target-feature");
+          CmdArgs.push_back("+use-tcc-in-div");
+          continue;
+        }
+        if (Value == "--break") {
+          CmdArgs.push_back("-target-feature");
+          CmdArgs.push_back("-use-tcc-in-div");
+          continue;
+        }
+        if (Value.startswith("-msoft-float")) {
+          CmdArgs.push_back("-target-feature");
+          CmdArgs.push_back("+soft-float");
+          continue;
+        }
+        if (Value.startswith("-mhard-float")) {
+          CmdArgs.push_back("-target-feature");
+          CmdArgs.push_back("-soft-float");
+          continue;
+        }
+        break;
+      }
+
       if (Value == "-force_cpusubtype_ALL") {
         // Do nothing, this is the default and we don't support anything else.
       } else if (Value == "-L") {
@@ -2321,7 +2632,15 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
         if (Value == "-I")
           TakeNextArg = true;
       } else if (Value.startswith("-gdwarf-")) {
-        CmdArgs.push_back(Value.data());
+        // "-gdwarf-N" options are not cc1as options.
+        unsigned DwarfVersion = DwarfVersionNum(Value);
+        if (DwarfVersion == 0) { // Send it onward, and let cc1as complain.
+          CmdArgs.push_back(Value.data());
+        } else {
+          RenderDebugEnablingArgs(
+              Args, CmdArgs, CodeGenOptions::LimitedDebugInfo, DwarfVersion,
+              llvm::DebuggerKind::Default);
+        }
       } else if (Value.startswith("-mcpu") || Value.startswith("-mfpu") ||
                  Value.startswith("-mhwdiv") || Value.startswith("-march")) {
         // Do nothing, we'll validate it later.
@@ -2339,76 +2658,12 @@ static void CollectArgsForIntegratedAssembler(Compilation &C,
   }
 }
 
-// Until ARM libraries are build separately, we have them all in one library
-static StringRef getArchNameForCompilerRTLib(const ToolChain &TC) {
-  if (TC.getTriple().isWindowsMSVCEnvironment() &&
-      TC.getArch() == llvm::Triple::x86)
-    return "i386";
-  if (TC.getArch() == llvm::Triple::arm || TC.getArch() == llvm::Triple::armeb)
-    return "arm";
-  return TC.getArchName();
-}
-
-static SmallString<128> getCompilerRTLibDir(const ToolChain &TC) {
-  // The runtimes are located in the OS-specific resource directory.
-  SmallString<128> Res(TC.getDriver().ResourceDir);
-  const llvm::Triple &Triple = TC.getTriple();
-  // TC.getOS() yield "freebsd10.0" whereas "freebsd" is expected.
-  StringRef OSLibName =
-      (Triple.getOS() == llvm::Triple::FreeBSD) ? "freebsd" : TC.getOS();
-  llvm::sys::path::append(Res, "lib", OSLibName);
-  return Res;
-}
-
-SmallString<128> tools::getCompilerRT(const ToolChain &TC, StringRef Component,
-                                      bool Shared) {
-  const char *Env = TC.getTriple().getEnvironment() == llvm::Triple::Android
-                        ? "-android"
-                        : "";
-
-  bool IsOSWindows = TC.getTriple().isOSWindows();
-  bool IsITANMSVCWindows = TC.getTriple().isWindowsMSVCEnvironment() ||
-                           TC.getTriple().isWindowsItaniumEnvironment();
-  StringRef Arch = getArchNameForCompilerRTLib(TC);
-  const char *Prefix = IsITANMSVCWindows ? "" : "lib";
-  const char *Suffix =
-      Shared ? (IsOSWindows ? ".dll" : ".so") : (IsITANMSVCWindows ? ".lib" : ".a");
-
-  SmallString<128> Path = getCompilerRTLibDir(TC);
-  llvm::sys::path::append(Path, Prefix + Twine("clang_rt.") + Component + "-" +
-                                    Arch + Env + Suffix);
-
-  return Path;
-}
-
 // This adds the static libclang_rt.builtins-arch.a directly to the command line
 // FIXME: Make sure we can also emit shared objects if they're requested
 // and available, check for possible errors, etc.
 static void addClangRT(const ToolChain &TC, const ArgList &Args,
                        ArgStringList &CmdArgs) {
-  CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, "builtins")));
-
-  if (!TC.getTriple().isOSWindows()) {
-    // FIXME: why do we link against gcc when we are using compiler-rt?
-    CmdArgs.push_back("-lgcc_s");
-    if (TC.getDriver().CCCIsCXX())
-      CmdArgs.push_back("-lgcc_eh");
-  }
-}
-
-static void addProfileRT(const ToolChain &TC, const ArgList &Args,
-                         ArgStringList &CmdArgs) {
-  if (!(Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
-                     false) ||
-        Args.hasArg(options::OPT_fprofile_generate) ||
-        Args.hasArg(options::OPT_fprofile_generate_EQ) ||
-        Args.hasArg(options::OPT_fprofile_instr_generate) ||
-        Args.hasArg(options::OPT_fprofile_instr_generate_EQ) ||
-        Args.hasArg(options::OPT_fcreate_profile) ||
-        Args.hasArg(options::OPT_coverage)))
-    return;
-
-  CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, "profile")));
+  CmdArgs.push_back(TC.getCompilerRTArgString(Args, "builtins"));
 }
 
 namespace {
@@ -2487,11 +2742,9 @@ static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
                                 bool IsShared) {
   // Static runtimes must be forced into executable, so we wrap them in
   // whole-archive.
-  if (!IsShared)
-    CmdArgs.push_back("-whole-archive");
-  CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, Sanitizer, IsShared)));
-  if (!IsShared)
-    CmdArgs.push_back("-no-whole-archive");
+  if (!IsShared) CmdArgs.push_back("-whole-archive");
+  CmdArgs.push_back(TC.getCompilerRTArgString(Args, Sanitizer, IsShared));
+  if (!IsShared) CmdArgs.push_back("-no-whole-archive");
 }
 
 // Tries to use a file with the list of dynamic symbols that need to be exported
@@ -2499,7 +2752,7 @@ static void addSanitizerRuntime(const ToolChain &TC, const ArgList &Args,
 static bool addSanitizerDynamicList(const ToolChain &TC, const ArgList &Args,
                                     ArgStringList &CmdArgs,
                                     StringRef Sanitizer) {
-  SmallString<128> SanRT = getCompilerRT(TC, Sanitizer);
+  SmallString<128> SanRT(TC.getCompilerRT(Args, Sanitizer));
   if (llvm::sys::fs::exists(SanRT + ".syms")) {
     CmdArgs.push_back(Args.MakeArgString("--dynamic-list=" + SanRT + ".syms"));
     return true;
@@ -2532,8 +2785,7 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
   }
 
   // Collect static runtimes.
-  if (Args.hasArg(options::OPT_shared) ||
-      (TC.getTriple().getEnvironment() == llvm::Triple::Android)) {
+  if (Args.hasArg(options::OPT_shared) || TC.getTriple().isAndroid()) {
     // Don't link static runtimes into DSOs or if compiling for Android.
     return;
   }
@@ -2567,6 +2819,10 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
   }
   if (SanArgs.needsSafeStackRt())
     StaticRuntimes.push_back("safestack");
+  if (SanArgs.needsCfiRt())
+    StaticRuntimes.push_back("cfi");
+  if (SanArgs.needsCfiDiagRt())
+    StaticRuntimes.push_back("cfi_diag");
 }
 
 // Should be called before we add system libraries (C++ ABI, libstdc++/libc++,
@@ -2603,9 +2859,15 @@ static bool areOptimizationsEnabled(const ArgList &Args) {
 
 static bool shouldUseFramePointerForTarget(const ArgList &Args,
                                            const llvm::Triple &Triple) {
-  // XCore never wants frame pointers, regardless of OS.
-  if (Triple.getArch() == llvm::Triple::xcore) {
+  switch (Triple.getArch()) {
+  case llvm::Triple::xcore:
+  case llvm::Triple::wasm32:
+  case llvm::Triple::wasm64:
+    // XCore never wants frame pointers, regardless of OS.
+    // WebAssembly never wants frame pointers.
     return false;
+  default:
+    break;
   }
 
   if (Triple.isOSLinux()) {
@@ -2628,6 +2890,10 @@ static bool shouldUseFramePointerForTarget(const ArgList &Args,
     switch (Triple.getArch()) {
     case llvm::Triple::x86:
       return !areOptimizationsEnabled(Args);
+    case llvm::Triple::arm:
+    case llvm::Triple::thumb:
+      // Windows on ARM builds with FPO disabled to aid fast stack walking
+      return true;
     default:
       // All other supported Windows ISAs use xdata unwind information, so frame
       // pointers are not generally useful.
@@ -2643,6 +2909,8 @@ static bool shouldUseFramePointer(const ArgList &Args,
   if (Arg *A = Args.getLastArg(options::OPT_fno_omit_frame_pointer,
                                options::OPT_fomit_frame_pointer))
     return A->getOption().matches(options::OPT_fno_omit_frame_pointer);
+  if (Args.hasArg(options::OPT_pg))
+    return true;
 
   return shouldUseFramePointerForTarget(Args, Triple);
 }
@@ -2652,6 +2920,8 @@ static bool shouldUseLeafFramePointer(const ArgList &Args,
   if (Arg *A = Args.getLastArg(options::OPT_mno_omit_leaf_frame_pointer,
                                options::OPT_momit_leaf_frame_pointer))
     return A->getOption().matches(options::OPT_mno_omit_leaf_frame_pointer);
+  if (Args.hasArg(options::OPT_pg))
+    return true;
 
   if (Triple.isPS4CPU())
     return false;
@@ -2700,12 +2970,13 @@ static void SplitDebugInfo(const ToolChain &TC, Compilation &C, const Tool &T,
   ExtractArgs.push_back(OutFile);
 
   const char *Exec = Args.MakeArgString(TC.GetProgramPath("objcopy"));
+  InputInfo II(Output.getFilename(), types::TY_Object, Output.getFilename());
 
   // First extract the dwo sections.
-  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, ExtractArgs, II));
 
   // Then remove them from the original .o file.
-  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, StripArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, StripArgs, II));
 }
 
 /// \brief Vectorize at all optimization levels greater than 1 except for -Oz.
@@ -2774,6 +3045,7 @@ static VersionTuple getMSCompatibilityVersion(unsigned Version) {
 static void claimNoWarnArgs(const ArgList &Args) {
   // Don't warn about unused -f(no-)?lto.  This can happen when we're
   // preprocessing, precompiling or assembling.
+  Args.ClaimAllArgs(options::OPT_flto_EQ);
   Args.ClaimAllArgs(options::OPT_flto);
   Args.ClaimAllArgs(options::OPT_fno_lto);
 }
@@ -2863,42 +3135,53 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
   auto *ProfileGenerateArg = Args.getLastArg(
       options::OPT_fprofile_instr_generate,
       options::OPT_fprofile_instr_generate_EQ, options::OPT_fprofile_generate,
-      options::OPT_fprofile_generate_EQ);
+      options::OPT_fprofile_generate_EQ,
+      options::OPT_fno_profile_instr_generate);
+  if (ProfileGenerateArg &&
+      ProfileGenerateArg->getOption().matches(
+          options::OPT_fno_profile_instr_generate))
+    ProfileGenerateArg = nullptr;
 
   auto *ProfileUseArg = Args.getLastArg(
       options::OPT_fprofile_instr_use, options::OPT_fprofile_instr_use_EQ,
-      options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
+      options::OPT_fprofile_use, options::OPT_fprofile_use_EQ,
+      options::OPT_fno_profile_instr_use);
+  if (ProfileUseArg &&
+      ProfileUseArg->getOption().matches(options::OPT_fno_profile_instr_use))
+    ProfileUseArg = nullptr;
 
   if (ProfileGenerateArg && ProfileUseArg)
     D.Diag(diag::err_drv_argument_not_allowed_with)
         << ProfileGenerateArg->getSpelling() << ProfileUseArg->getSpelling();
 
-  if (ProfileGenerateArg &&
-      ProfileGenerateArg->getOption().matches(
-          options::OPT_fprofile_instr_generate_EQ))
-    ProfileGenerateArg->render(Args, CmdArgs);
-  else if (ProfileGenerateArg &&
-           ProfileGenerateArg->getOption().matches(
-               options::OPT_fprofile_generate_EQ)) {
-    SmallString<128> Path(ProfileGenerateArg->getValue());
-    llvm::sys::path::append(Path, "default.profraw");
-    CmdArgs.push_back(
-        Args.MakeArgString(Twine("-fprofile-instr-generate=") + Path));
-  } else
-    Args.AddAllArgs(CmdArgs, options::OPT_fprofile_instr_generate);
+  if (ProfileGenerateArg) {
+    if (ProfileGenerateArg->getOption().matches(
+            options::OPT_fprofile_instr_generate_EQ))
+      ProfileGenerateArg->render(Args, CmdArgs);
+    else if (ProfileGenerateArg->getOption().matches(
+                 options::OPT_fprofile_generate_EQ)) {
+      SmallString<128> Path(ProfileGenerateArg->getValue());
+      llvm::sys::path::append(Path, "default.profraw");
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-fprofile-instr-generate=") + Path));
+    } else
+      Args.AddAllArgs(CmdArgs, options::OPT_fprofile_instr_generate);
+  }
 
-  if (ProfileUseArg &&
-      ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
-    ProfileUseArg->render(Args, CmdArgs);
-  else if (ProfileUseArg &&
-           (ProfileUseArg->getOption().matches(options::OPT_fprofile_use_EQ) ||
-            ProfileUseArg->getOption().matches(
-                options::OPT_fprofile_instr_use))) {
-    SmallString<128> Path(
-        ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
-    if (Path.empty() || llvm::sys::fs::is_directory(Path))
-      llvm::sys::path::append(Path, "default.profdata");
-    CmdArgs.push_back(Args.MakeArgString(Twine("-fprofile-instr-use=") + Path));
+  if (ProfileUseArg) {
+    if (ProfileUseArg->getOption().matches(options::OPT_fprofile_instr_use_EQ))
+      ProfileUseArg->render(Args, CmdArgs);
+    else if ((ProfileUseArg->getOption().matches(
+                  options::OPT_fprofile_use_EQ) ||
+              ProfileUseArg->getOption().matches(
+                  options::OPT_fprofile_instr_use))) {
+      SmallString<128> Path(
+          ProfileUseArg->getNumValues() == 0 ? "" : ProfileUseArg->getValue());
+      if (Path.empty() || llvm::sys::fs::is_directory(Path))
+        llvm::sys::path::append(Path, "default.profdata");
+      CmdArgs.push_back(
+          Args.MakeArgString(Twine("-fprofile-instr-use=") + Path));
+    }
   }
 
   if (Args.hasArg(options::OPT_ftest_coverage) ||
@@ -2909,12 +3192,15 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
       Args.hasArg(options::OPT_coverage))
     CmdArgs.push_back("-femit-coverage-data");
 
-  if (Args.hasArg(options::OPT_fcoverage_mapping) && !ProfileGenerateArg)
+  if (Args.hasFlag(options::OPT_fcoverage_mapping,
+                   options::OPT_fno_coverage_mapping, false) &&
+      !ProfileGenerateArg)
     D.Diag(diag::err_drv_argument_only_allowed_with)
         << "-fcoverage-mapping"
         << "-fprofile-instr-generate";
 
-  if (Args.hasArg(options::OPT_fcoverage_mapping))
+  if (Args.hasFlag(options::OPT_fcoverage_mapping,
+                   options::OPT_fno_coverage_mapping, false))
     CmdArgs.push_back("-fcoverage-mapping");
 
   if (C.getArgs().hasArg(options::OPT_c) ||
@@ -2939,9 +3225,192 @@ static void addPGOAndCoverageFlags(Compilation &C, const Driver &D,
   }
 }
 
+static void addPS4ProfileRTArgs(const ToolChain &TC, const ArgList &Args,
+                                ArgStringList &CmdArgs) {
+  if ((Args.hasFlag(options::OPT_fprofile_arcs, options::OPT_fno_profile_arcs,
+                    false) ||
+       Args.hasFlag(options::OPT_fprofile_generate,
+                    options::OPT_fno_profile_instr_generate, false) ||
+       Args.hasFlag(options::OPT_fprofile_generate_EQ,
+                    options::OPT_fno_profile_instr_generate, false) ||
+       Args.hasFlag(options::OPT_fprofile_instr_generate,
+                    options::OPT_fno_profile_instr_generate, false) ||
+       Args.hasFlag(options::OPT_fprofile_instr_generate_EQ,
+                    options::OPT_fno_profile_instr_generate, false) ||
+       Args.hasArg(options::OPT_fcreate_profile) ||
+       Args.hasArg(options::OPT_coverage)))
+    CmdArgs.push_back("--dependent-lib=libclang_rt.profile-x86_64.a");
+}
+
+/// Parses the various -fpic/-fPIC/-fpie/-fPIE arguments.  Then,
+/// smooshes them together with platform defaults, to decide whether
+/// this compile should be using PIC mode or not. Returns a tuple of
+/// (RelocationModel, PICLevel, IsPIE).
+static std::tuple<llvm::Reloc::Model, unsigned, bool>
+ParsePICArgs(const ToolChain &ToolChain, const llvm::Triple &Triple,
+             const ArgList &Args) {
+  // FIXME: why does this code...and so much everywhere else, use both
+  // ToolChain.getTriple() and Triple?
+  bool PIE = ToolChain.isPIEDefault();
+  bool PIC = PIE || ToolChain.isPICDefault();
+  // The Darwin default to use PIC does not apply when using -static.
+  if (ToolChain.getTriple().isOSDarwin() && Args.hasArg(options::OPT_static))
+    PIE = PIC = false;
+  bool IsPICLevelTwo = PIC;
+
+  bool KernelOrKext =
+      Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
+
+  // Android-specific defaults for PIC/PIE
+  if (ToolChain.getTriple().isAndroid()) {
+    switch (ToolChain.getArch()) {
+    case llvm::Triple::arm:
+    case llvm::Triple::armeb:
+    case llvm::Triple::thumb:
+    case llvm::Triple::thumbeb:
+    case llvm::Triple::aarch64:
+    case llvm::Triple::mips:
+    case llvm::Triple::mipsel:
+    case llvm::Triple::mips64:
+    case llvm::Triple::mips64el:
+      PIC = true; // "-fpic"
+      break;
+
+    case llvm::Triple::x86:
+    case llvm::Triple::x86_64:
+      PIC = true; // "-fPIC"
+      IsPICLevelTwo = true;
+      break;
+
+    default:
+      break;
+    }
+  }
+
+  // OpenBSD-specific defaults for PIE
+  if (ToolChain.getTriple().getOS() == llvm::Triple::OpenBSD) {
+    switch (ToolChain.getArch()) {
+    case llvm::Triple::mips64:
+    case llvm::Triple::mips64el:
+    case llvm::Triple::sparcel:
+    case llvm::Triple::x86:
+    case llvm::Triple::x86_64:
+      IsPICLevelTwo = false; // "-fpie"
+      break;
+
+    case llvm::Triple::ppc:
+    case llvm::Triple::sparc:
+    case llvm::Triple::sparcv9:
+      IsPICLevelTwo = true; // "-fPIE"
+      break;
+
+    default:
+      break;
+    }
+  }
+
+  // The last argument relating to either PIC or PIE wins, and no
+  // other argument is used. If the last argument is any flavor of the
+  // '-fno-...' arguments, both PIC and PIE are disabled. Any PIE
+  // option implicitly enables PIC at the same level.
+  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
+                                    options::OPT_fpic, options::OPT_fno_pic,
+                                    options::OPT_fPIE, options::OPT_fno_PIE,
+                                    options::OPT_fpie, options::OPT_fno_pie);
+  // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
+  // is forced, then neither PIC nor PIE flags will have no effect.
+  if (!ToolChain.isPICDefaultForced()) {
+    if (LastPICArg) {
+      Option O = LastPICArg->getOption();
+      if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
+          O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
+        PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
+        PIC =
+            PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
+        IsPICLevelTwo =
+            O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
+      } else {
+        PIE = PIC = false;
+        if (Triple.isPS4CPU()) {
+          Arg *ModelArg = Args.getLastArg(options::OPT_mcmodel_EQ);
+          StringRef Model = ModelArg ? ModelArg->getValue() : "";
+          if (Model != "kernel") {
+            PIC = true;
+            ToolChain.getDriver().Diag(diag::warn_drv_ps4_force_pic)
+                << LastPICArg->getSpelling();
+          }
+        }
+      }
+    }
+  }
+
+  // Introduce a Darwin and PS4-specific hack. If the default is PIC, but the
+  // PIC level would've been set to level 1, force it back to level 2 PIC
+  // instead.
+  if (PIC && (ToolChain.getTriple().isOSDarwin() || Triple.isPS4CPU()))
+    IsPICLevelTwo |= ToolChain.isPICDefault();
+
+  // This kernel flags are a trump-card: they will disable PIC/PIE
+  // generation, independent of the argument order.
+  if (KernelOrKext && ((!Triple.isiOS() || Triple.isOSVersionLT(6)) &&
+                       !Triple.isWatchOS()))
+    PIC = PIE = false;
+
+  if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
+    // This is a very special mode. It trumps the other modes, almost no one
+    // uses it, and it isn't even valid on any OS but Darwin.
+    if (!ToolChain.getTriple().isOSDarwin())
+      ToolChain.getDriver().Diag(diag::err_drv_unsupported_opt_for_target)
+          << A->getSpelling() << ToolChain.getTriple().str();
+
+    // FIXME: Warn when this flag trumps some other PIC or PIE flag.
+
+    // Only a forced PIC mode can cause the actual compile to have PIC defines
+    // etc., no flags are sufficient. This behavior was selected to closely
+    // match that of llvm-gcc and Apple GCC before that.
+    PIC = ToolChain.isPICDefault() && ToolChain.isPICDefaultForced();
+
+    return std::make_tuple(llvm::Reloc::DynamicNoPIC, PIC ? 2 : 0, false);
+  }
+
+  if (PIC)
+    return std::make_tuple(llvm::Reloc::PIC_, IsPICLevelTwo ? 2 : 1, PIE);
+
+  return std::make_tuple(llvm::Reloc::Static, 0, false);
+}
+
+static const char *RelocationModelName(llvm::Reloc::Model Model) {
+  switch (Model) {
+  case llvm::Reloc::Default:
+    return nullptr;
+  case llvm::Reloc::Static:
+    return "static";
+  case llvm::Reloc::PIC_:
+    return "pic";
+  case llvm::Reloc::DynamicNoPIC:
+    return "dynamic-no-pic";
+  }
+  llvm_unreachable("Unknown Reloc::Model kind");
+}
+
+static void AddAssemblerKPIC(const ToolChain &ToolChain, const ArgList &Args,
+                             ArgStringList &CmdArgs) {
+  llvm::Reloc::Model RelocationModel;
+  unsigned PICLevel;
+  bool IsPIE;
+  std::tie(RelocationModel, PICLevel, IsPIE) =
+      ParsePICArgs(ToolChain, ToolChain.getTriple(), Args);
+
+  if (RelocationModel != llvm::Reloc::Static)
+    CmdArgs.push_back("-KPIC");
+}
+
 void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                          const InputInfo &Output, const InputInfoList &Inputs,
                          const ArgList &Args, const char *LinkingOutput) const {
+  std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
+  const llvm::Triple Triple(TripleStr);
+
   bool KernelOrKext =
       Args.hasArg(options::OPT_mkernel, options::OPT_fapple_kext);
   const Driver &D = getToolChain().getDriver();
@@ -2951,6 +3420,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   bool IsWindowsCygnus =
       getToolChain().getTriple().isWindowsCygwinEnvironment();
   bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment();
+  bool IsPS4CPU = getToolChain().getTriple().isPS4CPU();
 
   // Check number of inputs for sanity. We need at least one input.
   assert(Inputs.size() >= 1 && "Must have at least one input.");
@@ -2968,17 +3438,34 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Add the "effective" target triple.
   CmdArgs.push_back("-triple");
-  std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
-  const llvm::Triple TT(TripleStr);
-  if (TT.isOSWindows() && (TT.getArch() == llvm::Triple::arm ||
-                           TT.getArch() == llvm::Triple::thumb)) {
-    unsigned Offset = TT.getArch() == llvm::Triple::arm ? 4 : 6;
+  const ToolChain *AuxToolChain = nullptr;
+  if (IsCuda) {
+    // FIXME: We need a (better) way to pass information about
+    // particular compilation pass we're constructing here. For now we
+    // can check which toolchain we're using and pick the other one to
+    // extract the triple.
+    if (&getToolChain() == C.getCudaDeviceToolChain())
+      AuxToolChain = C.getCudaHostToolChain();
+    else if (&getToolChain() == C.getCudaHostToolChain())
+      AuxToolChain = C.getCudaDeviceToolChain();
+    else
+      llvm_unreachable("Can't figure out CUDA compilation mode.");
+    assert(AuxToolChain != nullptr && "No aux toolchain.");
+    CmdArgs.push_back("-aux-triple");
+    CmdArgs.push_back(Args.MakeArgString(AuxToolChain->getTriple().str()));
+    CmdArgs.push_back("-fcuda-target-overloads");
+    CmdArgs.push_back("-fcuda-disable-target-call-checks");
+  }
+
+  if (Triple.isOSWindows() && (Triple.getArch() == llvm::Triple::arm ||
+                               Triple.getArch() == llvm::Triple::thumb)) {
+    unsigned Offset = Triple.getArch() == llvm::Triple::arm ? 4 : 6;
     unsigned Version;
-    TT.getArchName().substr(Offset).getAsInteger(10, Version);
+    Triple.getArchName().substr(Offset).getAsInteger(10, Version);
     if (Version < 7)
-      D.Diag(diag::err_target_unsupported_arch) << TT.getArchName()
+      D.Diag(diag::err_target_unsupported_arch) << Triple.getArchName()
                                                 << TripleStr;
   }
 
@@ -3026,10 +3513,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   } else {
     assert((isa<CompileJobAction>(JA) || isa<BackendJobAction>(JA)) &&
            "Invalid action for clang tool.");
-
-    if (JA.getType() == types::TY_LTO_IR || JA.getType() == types::TY_LTO_BC) {
-      CmdArgs.push_back("-flto");
-    }
     if (JA.getType() == types::TY_Nothing) {
       CmdArgs.push_back("-fsyntax-only");
     } else if (JA.getType() == types::TY_LLVM_IR ||
@@ -3060,6 +3543,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // the use-list order, since serialization to bitcode is part of the flow.
     if (JA.getType() == types::TY_LLVM_BC)
       CmdArgs.push_back("-emit-llvm-uselists");
+
+    if (D.isUsingLTO())
+      Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
+  }
+
+  if (const Arg *A = Args.getLastArg(options::OPT_fthinlto_index_EQ)) {
+    if (!types::isLLVMIR(Input.getType()))
+      D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args)
+                                                       << "-x ir";
+    Args.AddLastArg(CmdArgs, options::OPT_fthinlto_index_EQ);
   }
 
   // We normally speed up the clang process a bit by skipping destructors at
@@ -3115,6 +3608,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mktemp");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.mkstemp");
       CmdArgs.push_back("-analyzer-checker=security.insecureAPI.vfork");
+
+      // Default nullability checks.
+      CmdArgs.push_back("-analyzer-checker=nullability.NullPassedToNonnull");
+      CmdArgs.push_back(
+          "-analyzer-checker=nullability.NullReturnedFromNonnull");
     }
 
     // Set the output format. The default is plist, for (lame) historical
@@ -3136,136 +3634,29 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   CheckCodeGenerationOptions(D, Args);
 
-  bool PIE = getToolChain().isPIEDefault();
-  bool PIC = PIE || getToolChain().isPICDefault();
-  bool IsPICLevelTwo = PIC;
-
-  // Android-specific defaults for PIC/PIE
-  if (getToolChain().getTriple().getEnvironment() == llvm::Triple::Android) {
-    switch (getToolChain().getArch()) {
-    case llvm::Triple::arm:
-    case llvm::Triple::armeb:
-    case llvm::Triple::thumb:
-    case llvm::Triple::thumbeb:
-    case llvm::Triple::aarch64:
-    case llvm::Triple::mips:
-    case llvm::Triple::mipsel:
-    case llvm::Triple::mips64:
-    case llvm::Triple::mips64el:
-      PIC = true; // "-fpic"
-      break;
-
-    case llvm::Triple::x86:
-    case llvm::Triple::x86_64:
-      PIC = true; // "-fPIC"
-      IsPICLevelTwo = true;
-      break;
-
-    default:
-      break;
-    }
-  }
-
-  // OpenBSD-specific defaults for PIE
-  if (getToolChain().getTriple().getOS() == llvm::Triple::OpenBSD) {
-    switch (getToolChain().getArch()) {
-    case llvm::Triple::mips64:
-    case llvm::Triple::mips64el:
-    case llvm::Triple::sparcel:
-    case llvm::Triple::x86:
-    case llvm::Triple::x86_64:
-      IsPICLevelTwo = false; // "-fpie"
-      break;
-
-    case llvm::Triple::ppc:
-    case llvm::Triple::sparc:
-    case llvm::Triple::sparcv9:
-      IsPICLevelTwo = true; // "-fPIE"
-      break;
-
-    default:
-      break;
-    }
-  }
-
-  // For the PIC and PIE flag options, this logic is different from the
-  // legacy logic in very old versions of GCC, as that logic was just
-  // a bug no one had ever fixed. This logic is both more rational and
-  // consistent with GCC's new logic now that the bugs are fixed. The last
-  // argument relating to either PIC or PIE wins, and no other argument is
-  // used. If the last argument is any flavor of the '-fno-...' arguments,
-  // both PIC and PIE are disabled. Any PIE option implicitly enables PIC
-  // at the same level.
-  Arg *LastPICArg = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
-                                    options::OPT_fpic, options::OPT_fno_pic,
-                                    options::OPT_fPIE, options::OPT_fno_PIE,
-                                    options::OPT_fpie, options::OPT_fno_pie);
-  // Check whether the tool chain trumps the PIC-ness decision. If the PIC-ness
-  // is forced, then neither PIC nor PIE flags will have no effect.
-  if (!getToolChain().isPICDefaultForced()) {
-    if (LastPICArg) {
-      Option O = LastPICArg->getOption();
-      if (O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic) ||
-          O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) {
-        PIE = O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie);
-        PIC =
-            PIE || O.matches(options::OPT_fPIC) || O.matches(options::OPT_fpic);
-        IsPICLevelTwo =
-            O.matches(options::OPT_fPIE) || O.matches(options::OPT_fPIC);
-      } else {
-        PIE = PIC = false;
-      }
-    }
-  }
-
-  // Introduce a Darwin-specific hack. If the default is PIC but the flags
-  // specified while enabling PIC enabled level 1 PIC, just force it back to
-  // level 2 PIC instead. This matches the behavior of Darwin GCC (based on my
-  // informal testing).
-  if (PIC && getToolChain().getTriple().isOSDarwin())
-    IsPICLevelTwo |= getToolChain().isPICDefault();
-
-  // Note that these flags are trump-cards. Regardless of the order w.r.t. the
-  // PIC or PIE options above, if these show up, PIC is disabled.
-  llvm::Triple Triple(TripleStr);
-  if (KernelOrKext && (!Triple.isiOS() || Triple.isOSVersionLT(6)))
-    PIC = PIE = false;
-  if (Args.hasArg(options::OPT_static))
-    PIC = PIE = false;
-
-  if (Arg *A = Args.getLastArg(options::OPT_mdynamic_no_pic)) {
-    // This is a very special mode. It trumps the other modes, almost no one
-    // uses it, and it isn't even valid on any OS but Darwin.
-    if (!getToolChain().getTriple().isOSDarwin())
-      D.Diag(diag::err_drv_unsupported_opt_for_target)
-          << A->getSpelling() << getToolChain().getTriple().str();
-
-    // FIXME: Warn when this flag trumps some other PIC or PIE flag.
+  llvm::Reloc::Model RelocationModel;
+  unsigned PICLevel;
+  bool IsPIE;
+  std::tie(RelocationModel, PICLevel, IsPIE) =
+      ParsePICArgs(getToolChain(), Triple, Args);
 
+  const char *RMName = RelocationModelName(RelocationModel);
+  if (RMName) {
     CmdArgs.push_back("-mrelocation-model");
-    CmdArgs.push_back("dynamic-no-pic");
-
-    // Only a forced PIC mode can cause the actual compile to have PIC defines
-    // etc., no flags are sufficient. This behavior was selected to closely
-    // match that of llvm-gcc and Apple GCC before that.
-    if (getToolChain().isPICDefault() && getToolChain().isPICDefaultForced()) {
-      CmdArgs.push_back("-pic-level");
-      CmdArgs.push_back("2");
+    CmdArgs.push_back(RMName);
+  }
+  if (PICLevel > 0) {
+    CmdArgs.push_back("-pic-level");
+    CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
+    if (IsPIE) {
+      CmdArgs.push_back("-pie-level");
+      CmdArgs.push_back(PICLevel == 1 ? "1" : "2");
     }
-  } else {
-    // Currently, LLVM only knows about PIC vs. static; the PIE differences are
-    // handled in Clang's IRGen by the -pie-level flag.
-    CmdArgs.push_back("-mrelocation-model");
-    CmdArgs.push_back(PIC ? "pic" : "static");
+  }
 
-    if (PIC) {
-      CmdArgs.push_back("-pic-level");
-      CmdArgs.push_back(IsPICLevelTwo ? "2" : "1");
-      if (PIE) {
-        CmdArgs.push_back("-pie-level");
-        CmdArgs.push_back(IsPICLevelTwo ? "2" : "1");
-      }
-    }
+  if (Arg *A = Args.getLastArg(options::OPT_meabi)) {
+    CmdArgs.push_back("-meabi");
+    CmdArgs.push_back(A->getValue());
   }
 
   CmdArgs.push_back("-mthread-model");
@@ -3343,6 +3734,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasFlag(options::OPT_fstrict_enums, options::OPT_fno_strict_enums,
                    false))
     CmdArgs.push_back("-fstrict-enums");
+  if (Args.hasFlag(options::OPT_fstrict_vtable_pointers,
+                   options::OPT_fno_strict_vtable_pointers,
+                   false))
+    CmdArgs.push_back("-fstrict-vtable-pointers");
   if (!Args.hasFlag(options::OPT_foptimize_sibling_calls,
                     options::OPT_fno_optimize_sibling_calls))
     CmdArgs.push_back("-mdisable-tail-calls");
@@ -3520,7 +3915,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   if (KernelOrKext && getToolChain().getTriple().isOSDarwin())
     CmdArgs.push_back("-fforbid-guard-variables");
 
-  if (Args.hasArg(options::OPT_mms_bitfields)) {
+  if (Args.hasFlag(options::OPT_mms_bitfields, options::OPT_mno_ms_bitfields,
+                   false)) {
     CmdArgs.push_back("-mms-bitfields");
   }
 
@@ -3565,7 +3961,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // Add the target features
-  getTargetFeatures(D, Triple, Args, CmdArgs, false);
+  getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, false);
 
   // Add target specific flags.
   switch (getToolChain().getArch()) {
@@ -3576,7 +3972,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
-    AddARMTargetArgs(Args, CmdArgs, KernelOrKext);
+    // Use the effective triple, which takes into account the deployment target.
+    AddARMTargetArgs(Triple, Args, CmdArgs, KernelOrKext);
     break;
 
   case llvm::Triple::aarch64:
@@ -3613,9 +4010,25 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     break;
   }
 
+  // The 'g' groups options involve a somewhat intricate sequence of decisions
+  // about what to pass from the driver to the frontend, but by the time they
+  // reach cc1 they've been factored into three well-defined orthogonal choices:
+  //  * what level of debug info to generate
+  //  * what dwarf version to write
+  //  * what debugger tuning to use
+  // This avoids having to monkey around further in cc1 other than to disable
+  // codeview if not running in a Windows environment. Perhaps even that
+  // decision should be made in the driver as well though.
+  unsigned DwarfVersion = 0;
+  llvm::DebuggerKind DebuggerTuning = getToolChain().getDefaultDebuggerTuning();
+  // These two are potentially updated by AddClangCLArgs.
+  enum CodeGenOptions::DebugInfoKind DebugInfoKind =
+      CodeGenOptions::NoDebugInfo;
+  bool EmitCodeView = false;
+
   // Add clang-cl arguments.
   if (getToolChain().getDriver().IsCLMode())
-    AddClangCLArgs(Args, CmdArgs);
+    AddClangCLArgs(Args, CmdArgs, &DebugInfoKind, &EmitCodeView);
 
   // Pass the linker version in use.
   if (Arg *A = Args.getLastArg(options::OPT_mlinker_version_EQ)) {
@@ -3656,57 +4069,86 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                                                  : "-");
   }
 
-  // Use the last option from "-g" group. "-gline-tables-only" and "-gdwarf-x"
-  // are preserved, all other debug options are substituted with "-g".
   Args.ClaimAllArgs(options::OPT_g_Group);
+  Arg *SplitDwarfArg = Args.getLastArg(options::OPT_gsplit_dwarf);
   if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
-    if (A->getOption().matches(options::OPT_gline_tables_only) ||
-        A->getOption().matches(options::OPT_g1)) {
-      // FIXME: we should support specifying dwarf version with
-      // -gline-tables-only.
-      CmdArgs.push_back("-gline-tables-only");
-      // Default is dwarf-2 for Darwin, OpenBSD, FreeBSD and Solaris.
-      const llvm::Triple &Triple = getToolChain().getTriple();
-      if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::OpenBSD ||
-          Triple.getOS() == llvm::Triple::FreeBSD ||
-          Triple.getOS() == llvm::Triple::Solaris)
-        CmdArgs.push_back("-gdwarf-2");
-    } else if (A->getOption().matches(options::OPT_gdwarf_2))
-      CmdArgs.push_back("-gdwarf-2");
-    else if (A->getOption().matches(options::OPT_gdwarf_3))
-      CmdArgs.push_back("-gdwarf-3");
-    else if (A->getOption().matches(options::OPT_gdwarf_4))
-      CmdArgs.push_back("-gdwarf-4");
-    else if (!A->getOption().matches(options::OPT_g0) &&
-             !A->getOption().matches(options::OPT_ggdb0)) {
-      // Default is dwarf-2 for Darwin, OpenBSD, FreeBSD and Solaris.
-      const llvm::Triple &Triple = getToolChain().getTriple();
-      if (Triple.isOSDarwin() || Triple.getOS() == llvm::Triple::OpenBSD ||
-          Triple.getOS() == llvm::Triple::FreeBSD ||
-          Triple.getOS() == llvm::Triple::Solaris)
-        CmdArgs.push_back("-gdwarf-2");
-      else
-        CmdArgs.push_back("-g");
-    }
+    // If the last option explicitly specified a debug-info level, use it.
+    if (A->getOption().matches(options::OPT_gN_Group)) {
+      DebugInfoKind = DebugLevelToInfoKind(*A);
+      // If you say "-gsplit-dwarf -gline-tables-only", -gsplit-dwarf loses.
+      // But -gsplit-dwarf is not a g_group option, hence we have to check the
+      // order explicitly. (If -gsplit-dwarf wins, we fix DebugInfoKind later.)
+      if (SplitDwarfArg && DebugInfoKind < CodeGenOptions::LimitedDebugInfo &&
+          A->getIndex() > SplitDwarfArg->getIndex())
+        SplitDwarfArg = nullptr;
+    } else
+      // For any other 'g' option, use Limited.
+      DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
+  }
+
+  // If a debugger tuning argument appeared, remember it.
+  if (Arg *A = Args.getLastArg(options::OPT_gTune_Group,
+                               options::OPT_ggdbN_Group)) {
+    if (A->getOption().matches(options::OPT_glldb))
+      DebuggerTuning = llvm::DebuggerKind::LLDB;
+    else if (A->getOption().matches(options::OPT_gsce))
+      DebuggerTuning = llvm::DebuggerKind::SCE;
+    else
+      DebuggerTuning = llvm::DebuggerKind::GDB;
+  }
+
+  // If a -gdwarf argument appeared, remember it.
+  if (Arg *A = Args.getLastArg(options::OPT_gdwarf_2, options::OPT_gdwarf_3,
+                               options::OPT_gdwarf_4, options::OPT_gdwarf_5))
+    DwarfVersion = DwarfVersionNum(A->getSpelling());
+
+  // Forward -gcodeview.
+  // 'EmitCodeView might have been set by CL-compatibility argument parsing.
+  if (Args.hasArg(options::OPT_gcodeview) || EmitCodeView) {
+    // DwarfVersion remains at 0 if no explicit choice was made.
+    CmdArgs.push_back("-gcodeview");
+  } else if (DwarfVersion == 0 &&
+             DebugInfoKind != CodeGenOptions::NoDebugInfo) {
+    DwarfVersion = getToolChain().GetDefaultDwarfVersion();
   }
 
   // We ignore flags -gstrict-dwarf and -grecord-gcc-switches for now.
   Args.ClaimAllArgs(options::OPT_g_flags_Group);
+
+  // PS4 defaults to no column info
   if (Args.hasFlag(options::OPT_gcolumn_info, options::OPT_gno_column_info,
-                   /*Default*/ true))
+                   /*Default=*/ !IsPS4CPU))
     CmdArgs.push_back("-dwarf-column-info");
 
   // FIXME: Move backend command line options to the module.
+  if (Args.hasArg(options::OPT_gmodules)) {
+    DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
+    CmdArgs.push_back("-dwarf-ext-refs");
+    CmdArgs.push_back("-fmodule-format=obj");
+  }
+
   // -gsplit-dwarf should turn on -g and enable the backend dwarf
   // splitting and extraction.
   // FIXME: Currently only works on Linux.
-  if (getToolChain().getTriple().isOSLinux() &&
-      Args.hasArg(options::OPT_gsplit_dwarf)) {
-    CmdArgs.push_back("-g");
+  if (getToolChain().getTriple().isOSLinux() && SplitDwarfArg) {
+    DebugInfoKind = CodeGenOptions::LimitedDebugInfo;
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-split-dwarf=Enable");
   }
 
+  // After we've dealt with all combinations of things that could
+  // make DebugInfoKind be other than None or DebugLineTablesOnly,
+  // figure out if we need to "upgrade" it to standalone debug info.
+  // We parse these two '-f' options whether or not they will be used,
+  // to claim them even if you wrote "-fstandalone-debug -gline-tables-only"
+  bool NeedFullDebug = Args.hasFlag(options::OPT_fstandalone_debug,
+                                    options::OPT_fno_standalone_debug,
+                                    getToolChain().GetDefaultStandaloneDebug());
+  if (DebugInfoKind == CodeGenOptions::LimitedDebugInfo && NeedFullDebug)
+    DebugInfoKind = CodeGenOptions::FullDebugInfo;
+  RenderDebugEnablingArgs(Args, CmdArgs, DebugInfoKind, DwarfVersion,
+                          DebuggerTuning);
+
   // -ggnu-pubnames turns on gnu style pubnames in the backend.
   if (Args.hasArg(options::OPT_ggnu_pubnames)) {
     CmdArgs.push_back("-backend-option");
@@ -3715,7 +4157,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // -gdwarf-aranges turns on the emission of the aranges section in the
   // backend.
-  if (Args.hasArg(options::OPT_gdwarf_aranges)) {
+  // Always enabled on the PS4.
+  if (Args.hasArg(options::OPT_gdwarf_aranges) || IsPS4CPU) {
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-generate-arange-section");
   }
@@ -3747,6 +4190,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   addPGOAndCoverageFlags(C, D, Output, Args, CmdArgs);
 
+  // Add runtime flag for PS4 when PGO or Coverage are enabled.
+  if (getToolChain().getTriple().isPS4CPU())
+    addPS4ProfileRTArgs(getToolChain(), Args, CmdArgs);
+
   // Pass options for controlling the default header search paths.
   if (Args.hasArg(options::OPT_nostdinc)) {
     CmdArgs.push_back("-nostdsysteminc");
@@ -3839,7 +4286,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   //
   // FIXME: Support -fpreprocessed
   if (types::getPreprocessedType(InputType) != types::TY_INVALID)
-    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs);
+    AddPreprocessingOptions(C, JA, D, Args, CmdArgs, Output, Inputs,
+                            AuxToolChain);
 
   // Don't warn about "clang -c -DPIC -fPIC test.i" because libtool.m4 assumes
   // that "The compiler can only warn and ignore the option if not recognized".
@@ -3861,6 +4309,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   for (const Arg *A :
        Args.filtered(options::OPT_clang_ignored_gcc_optimization_f_Group)) {
     D.Diag(diag::warn_ignored_gcc_optimization) << A->getAsString(Args);
+    A->claim();
   }
 
   claimNoWarnArgs(Args);
@@ -3956,6 +4405,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Add in -fdebug-compilation-dir if necessary.
   addDebugCompDirArg(Args, CmdArgs);
 
+  for (const Arg *A : Args.filtered(options::OPT_fdebug_prefix_map_EQ)) {
+    StringRef Map = A->getValue();
+    if (Map.find('=') == StringRef::npos)
+      D.Diag(diag::err_drv_invalid_argument_to_fdebug_prefix_map) << Map;
+    else
+      CmdArgs.push_back(Args.MakeArgString("-fdebug-prefix-map=" + Map));
+    A->claim();
+  }
+
   if (Arg *A = Args.getLastArg(options::OPT_ftemplate_depth_,
                                options::OPT_ftemplate_depth_EQ)) {
     CmdArgs.push_back("-ftemplate-depth");
@@ -4068,9 +4526,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Forward -f (flag) options which we can pass directly.
   Args.AddLastArg(CmdArgs, options::OPT_femit_all_decls);
   Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions);
-  Args.AddLastArg(CmdArgs, options::OPT_fstandalone_debug);
-  Args.AddLastArg(CmdArgs, options::OPT_fno_standalone_debug);
   Args.AddLastArg(CmdArgs, options::OPT_fno_operator_names);
+  // Emulated TLS is enabled by default on Android, and can be enabled manually
+  // with -femulated-tls.
+  bool EmulatedTLSDefault = Triple.isAndroid();
+  if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls,
+                   EmulatedTLSDefault))
+    CmdArgs.push_back("-femulated-tls");
   // AltiVec-like language extensions aren't relevant for assembling.
   if (!isa<PreprocessJobAction>(JA) || Output.getType() != types::TY_PP_Asm) {
     Args.AddLastArg(CmdArgs, options::OPT_faltivec);
@@ -4092,10 +4554,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       // given, decide a default based on the target. Otherwise rely on the
       // options and pass the right information to the frontend.
       if (!Args.hasFlag(options::OPT_fopenmp_use_tls,
-                        options::OPT_fnoopenmp_use_tls,
-                        getToolChain().getArch() == llvm::Triple::ppc ||
-                            getToolChain().getArch() == llvm::Triple::ppc64 ||
-                            getToolChain().getArch() == llvm::Triple::ppc64le))
+                        options::OPT_fnoopenmp_use_tls, /*Default=*/true))
         CmdArgs.push_back("-fnoopenmp-use-tls");
       break;
     default:
@@ -4134,7 +4593,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                     options::OPT_fno_lax_vector_conversions))
     CmdArgs.push_back("-fno-lax-vector-conversions");
 
-  if (Args.getLastArg(options::OPT_fapple_kext))
+  if (Args.getLastArg(options::OPT_fapple_kext) ||
+      (Args.hasArg(options::OPT_mkernel) && types::isCXX(InputType)))
     CmdArgs.push_back("-fapple-kext");
 
   Args.AddLastArg(CmdArgs, options::OPT_fobjc_sender_dependent_dispatch);
@@ -4215,14 +4675,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Translate -mstackrealign
   if (Args.hasFlag(options::OPT_mstackrealign, options::OPT_mno_stackrealign,
-                   false)) {
-    CmdArgs.push_back("-backend-option");
-    CmdArgs.push_back("-force-align-stack");
-  }
-  if (!Args.hasFlag(options::OPT_mno_stackrealign, options::OPT_mstackrealign,
-                    false)) {
+                   false))
     CmdArgs.push_back(Args.MakeArgString("-mstackrealign"));
-  }
 
   if (Args.hasArg(options::OPT_mstack_alignment)) {
     StringRef alignment = Args.getLastArgValue(options::OPT_mstack_alignment);
@@ -4238,9 +4692,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-mstack-probe-size=0");
   }
 
-  if (getToolChain().getArch() == llvm::Triple::aarch64 ||
-      getToolChain().getArch() == llvm::Triple::aarch64_be)
+  switch (getToolChain().getArch()) {
+  case llvm::Triple::aarch64:
+  case llvm::Triple::aarch64_be:
+  case llvm::Triple::arm:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb:
     CmdArgs.push_back("-fallow-half-arguments-and-returns");
+    break;
+
+  default:
+    break;
+  }
 
   if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
                                options::OPT_mno_restrict_it)) {
@@ -4251,8 +4715,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-backend-option");
       CmdArgs.push_back("-arm-no-restrict-it");
     }
-  } else if (TT.isOSWindows() && (TT.getArch() == llvm::Triple::arm ||
-                                  TT.getArch() == llvm::Triple::thumb)) {
+  } else if (Triple.isOSWindows() &&
+             (Triple.getArch() == llvm::Triple::arm ||
+              Triple.getArch() == llvm::Triple::thumb)) {
     // Windows on ARM expects restricted IT blocks
     CmdArgs.push_back("-backend-option");
     CmdArgs.push_back("-arm-restrict-it");
@@ -4268,15 +4733,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       A->render(Args, CmdArgs);
   }
 
-  if (Args.hasArg(options::OPT_mkernel)) {
-    if (!Args.hasArg(options::OPT_fapple_kext) && types::isCXX(InputType))
-      CmdArgs.push_back("-fapple-kext");
-    if (!Args.hasArg(options::OPT_fbuiltin))
-      CmdArgs.push_back("-fno-builtin");
-    Args.ClaimAllArgs(options::OPT_fno_builtin);
-  }
-  // -fbuiltin is default.
-  else if (!Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin))
+  // -fbuiltin is default unless -mkernel is used
+  if (!Args.hasFlag(options::OPT_fbuiltin, options::OPT_fno_builtin,
+                    !Args.hasArg(options::OPT_mkernel)))
     CmdArgs.push_back("-fno-builtin");
 
   if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,
@@ -4345,7 +4804,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_fmodule_map_file);
 
   // -fmodule-file can be used to specify files containing precompiled modules.
-  Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
+  if (HaveModules)
+    Args.AddAllArgs(CmdArgs, options::OPT_fmodule_file);
+  else
+    Args.ClaimAllArgs(options::OPT_fmodule_file);
 
   // -fmodule-cache-path specifies where our implicitly-built module files
   // should be written.
@@ -4453,11 +4915,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // -fuse-cxa-atexit is default.
-  if (!Args.hasFlag(options::OPT_fuse_cxa_atexit,
-                    options::OPT_fno_use_cxa_atexit,
-                    !IsWindowsCygnus && !IsWindowsGNU &&
-                        getToolChain().getArch() != llvm::Triple::hexagon &&
-                        getToolChain().getArch() != llvm::Triple::xcore) ||
+  if (!Args.hasFlag(
+          options::OPT_fuse_cxa_atexit, options::OPT_fno_use_cxa_atexit,
+          !IsWindowsCygnus && !IsWindowsGNU &&
+              getToolChain().getTriple().getOS() != llvm::Triple::Solaris &&
+              getToolChain().getArch() != llvm::Triple::hexagon &&
+              getToolChain().getArch() != llvm::Triple::xcore &&
+              ((getToolChain().getTriple().getVendor() !=
+                llvm::Triple::MipsTechnologies) ||
+               getToolChain().getTriple().hasEnvironment())) ||
       KernelOrKext)
     CmdArgs.push_back("-fno-use-cxa-atexit");
 
@@ -4499,6 +4965,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                    options::OPT_fno_borland_extensions, false))
     CmdArgs.push_back("-fborland-extensions");
 
+  // -fno-declspec is default, except for PS4.
+  if (Args.hasFlag(options::OPT_fdeclspec, options::OPT_fno_declspec,
+                   getToolChain().getTriple().isPS4()))
+    CmdArgs.push_back("-fdeclspec");
+  else if (Args.hasArg(options::OPT_fno_declspec))
+    CmdArgs.push_back("-fno-declspec"); // Explicitly disabling __declspec.
+
   // -fthreadsafe-static is default, except for MSVC compatibility versions less
   // than 19.
   if (!Args.hasFlag(options::OPT_fthreadsafe_statics,
@@ -4585,6 +5058,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
                      options::OPT_fno_objc_arc_exceptions,
                      /*default*/ types::isCXX(InputType)))
       CmdArgs.push_back("-fobjc-arc-exceptions");
+
   }
 
   // -fobjc-infer-related-result-type is the default, except in the Objective-C
@@ -4608,6 +5082,23 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  // Pass down -fobjc-weak or -fno-objc-weak if present.
+  if (types::isObjC(InputType)) {
+    auto WeakArg = Args.getLastArg(options::OPT_fobjc_weak,
+                                   options::OPT_fno_objc_weak);
+    if (!WeakArg) {
+      // nothing to do
+    } else if (GCArg) {
+      if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
+        D.Diag(diag::err_objc_weak_with_gc);
+    } else if (!objcRuntime.allowsWeak()) {
+      if (WeakArg->getOption().matches(options::OPT_fobjc_weak))
+        D.Diag(diag::err_objc_weak_unsupported);
+    } else {
+      WeakArg->render(Args, CmdArgs);
+    }
+  }
+
   if (Args.hasFlag(options::OPT_fapplication_extension,
                    options::OPT_fno_application_extension, false))
     CmdArgs.push_back("-fapplication-extension");
@@ -4617,7 +5108,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     addExceptionArgs(Args, InputType, getToolChain(), KernelOrKext, objcRuntime,
                      CmdArgs);
 
-  if (getToolChain().UseSjLjExceptions())
+  if (getToolChain().UseSjLjExceptions(Args))
     CmdArgs.push_back("-fsjlj-exceptions");
 
   // C++ "sane" operator new.
@@ -4676,14 +5167,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (KernelOrKext || isNoCommonDefault(getToolChain().getTriple())) {
-    if (!Args.hasArg(options::OPT_fcommon))
-      CmdArgs.push_back("-fno-common");
-    Args.ClaimAllArgs(options::OPT_fno_common);
-  }
-
-  // -fcommon is default, only pass non-default.
-  else if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common))
+  // -fcommon is the default unless compiling kernel code or the target says so
+  bool NoCommonDefault =
+      KernelOrKext || isNoCommonDefault(getToolChain().getTriple());
+  if (!Args.hasFlag(options::OPT_fcommon, options::OPT_fno_common,
+                    !NoCommonDefault))
     CmdArgs.push_back("-fno-common");
 
   // -fsigned-bitfields is default, and clang doesn't yet support
@@ -4918,10 +5406,16 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Forward -fparse-all-comments to -cc1.
   Args.AddAllArgs(CmdArgs, options::OPT_fparse_all_comments);
 
+  // Turn -fplugin=name.so into -load name.so
+  for (const Arg *A : Args.filtered(options::OPT_fplugin_EQ)) {
+    CmdArgs.push_back("-load");
+    CmdArgs.push_back(A->getValue());
+    A->claim();
+  }
+
   // Forward -Xclang arguments to -cc1, and -mllvm arguments to the LLVM option
   // parser.
   Args.AddAllArgValues(CmdArgs, options::OPT_Xclang);
-  bool OptDisabled = false;
   for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
     A->claim();
 
@@ -4929,17 +5423,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     // it and developers have been trained to spell it with -mllvm.
     if (StringRef(A->getValue(0)) == "-disable-llvm-optzns") {
       CmdArgs.push_back("-disable-llvm-optzns");
-      OptDisabled = true;
     } else
       A->render(Args, CmdArgs);
   }
 
   // With -save-temps, we want to save the unoptimized bitcode output from the
-  // CompileJobAction, so disable optimizations if they are not already
-  // disabled.
-  if (C.getDriver().isSaveTempsEnabled() && !OptDisabled &&
-      isa<CompileJobAction>(JA))
-    CmdArgs.push_back("-disable-llvm-optzns");
+  // CompileJobAction, use -disable-llvm-passes to get pristine IR generated
+  // by the frontend.
+  if (C.getDriver().isSaveTempsEnabled() && isa<CompileJobAction>(JA))
+    CmdArgs.push_back("-disable-llvm-passes");
 
   if (Output.getType() == types::TY_Dependencies) {
     // Handled with other dependency code.
@@ -4982,8 +5474,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Add the split debug info name to the command lines here so we
   // can propagate it to the backend.
-  bool SplitDwarf = Args.hasArg(options::OPT_gsplit_dwarf) &&
-                    getToolChain().getTriple().isOSLinux() &&
+  bool SplitDwarf = SplitDwarfArg && getToolChain().getTriple().isOSLinux() &&
                     (isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
                      isa<BackendJobAction>(JA));
   const char *SplitDwarfOut;
@@ -4996,11 +5487,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // Host-side cuda compilation receives device-side outputs as Inputs[1...].
   // Include them with -fcuda-include-gpubinary.
   if (IsCuda && Inputs.size() > 1)
-    for (InputInfoList::const_iterator it = std::next(Inputs.begin()),
-                                       ie = Inputs.end();
-         it != ie; ++it) {
+    for (auto I = std::next(Inputs.begin()), E = Inputs.end(); I != E; ++I) {
       CmdArgs.push_back("-fcuda-include-gpubinary");
-      CmdArgs.push_back(it->getFilename());
+      CmdArgs.push_back(I->getFilename());
     }
 
   // Finally add the compile command to the compilation.
@@ -5009,10 +5498,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       (InputType == types::TY_C || InputType == types::TY_CXX)) {
     auto CLCommand =
         getCLFallback()->GetCommand(C, JA, Output, Inputs, Args, LinkingOutput);
-    C.addCommand(llvm::make_unique<FallbackCommand>(JA, *this, Exec, CmdArgs,
-                                                    std::move(CLCommand)));
+    C.addCommand(llvm::make_unique<FallbackCommand>(
+        JA, *this, Exec, CmdArgs, Inputs, std::move(CLCommand)));
   } else {
-    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
   }
 
   // Handle the debug info splitting at object creation time if we're
@@ -5153,7 +5642,7 @@ ObjCRuntime Clang::AddObjCRuntimeArgs(const ArgList &args,
     // -fgnu-runtime
   } else {
     assert(runtimeArg->getOption().matches(options::OPT_fgnu_runtime));
-    // Legacy behaviour is to target the gnustep runtime if we are i
+    // Legacy behaviour is to target the gnustep runtime if we are in
     // non-fragile mode or the GCC runtime in fragile mode.
     if (isNonFragile)
       runtime = ObjCRuntime(ObjCRuntime::GNUstep, VersionTuple(1, 6));
@@ -5172,12 +5661,14 @@ static bool maybeConsumeDash(const std::string &EH, size_t &I) {
   return !HaveDash;
 }
 
+namespace {
 struct EHFlags {
   EHFlags() : Synch(false), Asynch(false), NoExceptC(false) {}
   bool Synch;
   bool Asynch;
   bool NoExceptC;
 };
+} // end anonymous namespace
 
 /// /EH controls whether to run destructor cleanups when exceptions are
 /// thrown.  There are three modifiers:
@@ -5212,14 +5703,12 @@ static EHFlags parseClangCLEHFlags(const Driver &D, const ArgList &Args) {
     }
   }
 
-  // FIXME: Disable C++ EH completely, until it becomes more reliable. Users
-  // can use -Xclang to manually enable C++ EH until then.
-  EH = EHFlags();
-
   return EH;
 }
 
-void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
+void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs,
+                           enum CodeGenOptions::DebugInfoKind *DebugInfoKind,
+                           bool *EmitCodeView) const {
   unsigned RTOptionID = options::OPT__SLASH_MT;
 
   if (Args.hasArg(options::OPT__SLASH_LDd))
@@ -5230,39 +5719,46 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_M_Group))
     RTOptionID = A->getOption().getID();
 
+  StringRef FlagForCRT;
   switch (RTOptionID) {
   case options::OPT__SLASH_MD:
     if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-D_DLL");
-    CmdArgs.push_back("--dependent-lib=msvcrt");
+    FlagForCRT = "--dependent-lib=msvcrt";
     break;
   case options::OPT__SLASH_MDd:
     CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
     CmdArgs.push_back("-D_DLL");
-    CmdArgs.push_back("--dependent-lib=msvcrtd");
+    FlagForCRT = "--dependent-lib=msvcrtd";
     break;
   case options::OPT__SLASH_MT:
     if (Args.hasArg(options::OPT__SLASH_LDd))
       CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
-    CmdArgs.push_back("--dependent-lib=libcmt");
+    FlagForCRT = "--dependent-lib=libcmt";
     break;
   case options::OPT__SLASH_MTd:
     CmdArgs.push_back("-D_DEBUG");
     CmdArgs.push_back("-D_MT");
-    CmdArgs.push_back("--dependent-lib=libcmtd");
+    FlagForCRT = "--dependent-lib=libcmtd";
     break;
   default:
     llvm_unreachable("Unexpected option ID.");
   }
 
-  // This provides POSIX compatibility (maps 'open' to '_open'), which most
-  // users want.  The /Za flag to cl.exe turns this off, but it's not
-  // implemented in clang.
-  CmdArgs.push_back("--dependent-lib=oldnames");
+  if (Args.hasArg(options::OPT__SLASH_Zl)) {
+    CmdArgs.push_back("-D_VC_NODEFAULTLIB");
+  } else {
+    CmdArgs.push_back(FlagForCRT.data());
+
+    // This provides POSIX compatibility (maps 'open' to '_open'), which most
+    // users want.  The /Za flag to cl.exe turns this off, but it's not
+    // implemented in clang.
+    CmdArgs.push_back("--dependent-lib=oldnames");
+  }
 
   // Both /showIncludes and /E (and /EP) write to stdout. Allowing both
   // would produce interleaved output, so ignore /showIncludes in such cases.
@@ -5275,6 +5771,16 @@ void Clang::AddClangCLArgs(const ArgList &Args, ArgStringList &CmdArgs) const {
                    /*default=*/false))
     CmdArgs.push_back("-fno-rtti-data");
 
+  // Emit CodeView if -Z7 is present.
+  *EmitCodeView = Args.hasArg(options::OPT__SLASH_Z7);
+  bool EmitDwarf = Args.hasArg(options::OPT_gdwarf);
+  // If we are emitting CV but not DWARF, don't build information that LLVM
+  // can't yet process.
+  if (*EmitCodeView && !EmitDwarf)
+    *DebugInfoKind = CodeGenOptions::DebugLineTablesOnly;
+  if (*EmitCodeView)
+    CmdArgs.push_back("-gcodeview");
+
   const Driver &D = getToolChain().getDriver();
   EHFlags EH = parseClangCLEHFlags(D, Args);
   // FIXME: Do something with NoExceptC.
@@ -5366,6 +5872,10 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   assert(Inputs.size() == 1 && "Unexpected number of inputs.");
   const InputInfo &Input = Inputs[0];
 
+  std::string TripleStr =
+      getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
+  const llvm::Triple Triple(TripleStr);
+
   // Don't warn about "clang -w -c foo.s"
   Args.ClaimAllArgs(options::OPT_w);
   // and "clang -emit-llvm -c foo.s"
@@ -5380,8 +5890,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
 
   // Add the "effective" target triple.
   CmdArgs.push_back("-triple");
-  std::string TripleStr =
-      getToolChain().ComputeEffectiveClangTriple(Args, Input.getType());
   CmdArgs.push_back(Args.MakeArgString(TripleStr));
 
   // Set the output mode, we currently only expect to be used as a real
@@ -5395,7 +5903,6 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(Clang::getBaseInputName(Args, Input));
 
   // Add the target cpu
-  const llvm::Triple Triple(TripleStr);
   std::string CPU = getCPUName(Args, Triple, /*FromAs*/ true);
   if (!CPU.empty()) {
     CmdArgs.push_back("-target-cpu");
@@ -5403,8 +5910,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   // Add the target features
-  const Driver &D = getToolChain().getDriver();
-  getTargetFeatures(D, Triple, Args, CmdArgs, true);
+  getTargetFeatures(getToolChain(), Triple, Args, CmdArgs, true);
 
   // Ignore explicit -force_cpusubtype_ALL option.
   (void)Args.hasArg(options::OPT_force__cpusubtype__ALL);
@@ -5423,17 +5929,21 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   // with an actual assembly file.
   if (SourceAction->getType() == types::TY_Asm ||
       SourceAction->getType() == types::TY_PP_Asm) {
+    bool WantDebug = false;
+    unsigned DwarfVersion = 0;
     Args.ClaimAllArgs(options::OPT_g_Group);
-    if (Arg *A = Args.getLastArg(options::OPT_g_Group))
-      if (!A->getOption().matches(options::OPT_g0))
-        CmdArgs.push_back("-g");
-
-    if (Args.hasArg(options::OPT_gdwarf_2))
-      CmdArgs.push_back("-gdwarf-2");
-    if (Args.hasArg(options::OPT_gdwarf_3))
-      CmdArgs.push_back("-gdwarf-3");
-    if (Args.hasArg(options::OPT_gdwarf_4))
-      CmdArgs.push_back("-gdwarf-4");
+    if (Arg *A = Args.getLastArg(options::OPT_g_Group)) {
+      WantDebug = !A->getOption().matches(options::OPT_g0) &&
+        !A->getOption().matches(options::OPT_ggdb0);
+      if (WantDebug)
+        DwarfVersion = DwarfVersionNum(A->getSpelling());
+    }
+    if (DwarfVersion == 0)
+      DwarfVersion = getToolChain().GetDefaultDwarfVersion();
+    RenderDebugEnablingArgs(Args, CmdArgs,
+                            (WantDebug ? CodeGenOptions::LimitedDebugInfo
+                                       : CodeGenOptions::NoDebugInfo),
+                            DwarfVersion, llvm::DebuggerKind::Default);
 
     // Add the -fdebug-compilation-dir flag if needed.
     addDebugCompDirArg(Args, CmdArgs);
@@ -5442,6 +5952,23 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
     // assembler on assembly source files.
     CmdArgs.push_back("-dwarf-debug-producer");
     CmdArgs.push_back(Args.MakeArgString(getClangFullVersion()));
+
+    // And pass along -I options
+    Args.AddAllArgs(CmdArgs, options::OPT_I);
+  }
+
+  // Handle -fPIC et al -- the relocation-model affects the assembler
+  // for some targets.
+  llvm::Reloc::Model RelocationModel;
+  unsigned PICLevel;
+  bool IsPIE;
+  std::tie(RelocationModel, PICLevel, IsPIE) =
+      ParsePICArgs(getToolChain(), Triple, Args);
+
+  const char *RMName = RelocationModelName(RelocationModel);
+  if (RMName) {
+    CmdArgs.push_back("-mrelocation-model");
+    CmdArgs.push_back(RMName);
   }
 
   // Optionally embed the -cc1as level arguments into the debug info, for build
@@ -5500,7 +6027,7 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(Input.getFilename());
 
   const char *Exec = getToolChain().getDriver().getClangProgramPath();
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
@@ -5555,12 +6082,22 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
   //
   // FIXME: The triple class should directly provide the information we want
   // here.
-  const llvm::Triple::ArchType Arch = getToolChain().getArch();
-  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::ppc)
+  switch (getToolChain().getArch()) {
+  default:
+    break;
+  case llvm::Triple::x86:
+  case llvm::Triple::ppc:
     CmdArgs.push_back("-m32");
-  else if (Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::ppc64 ||
-           Arch == llvm::Triple::ppc64le)
+    break;
+  case llvm::Triple::x86_64:
+  case llvm::Triple::ppc64:
+  case llvm::Triple::ppc64le:
     CmdArgs.push_back("-m64");
+    break;
+  case llvm::Triple::sparcel:
+    CmdArgs.push_back("-EL");
+    break;
+  }
 
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
@@ -5582,8 +6119,7 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
   // inputs into '-Wl,' options?
   for (const auto &II : Inputs) {
     // Don't try to pass LLVM or AST inputs to a generic gcc.
-    if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
-        II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
+    if (types::isLLVMIR(II.getType()))
       D.Diag(diag::err_drv_no_linker_llvm_support)
           << getToolChain().getTripleString();
     else if (II.getType() == types::TY_AST)
@@ -5623,7 +6159,7 @@ void gcc::Common::ConstructJob(Compilation &C, const JobAction &JA,
     GCCName = "gcc";
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void gcc::Preprocessor::RenderExtraToolArgs(const JobAction &JA,
@@ -5661,7 +6197,9 @@ void gcc::Linker::RenderExtraToolArgs(const JobAction &JA,
 
 // Hexagon tools start.
 void hexagon::Assembler::RenderExtraToolArgs(const JobAction &JA,
-                                             ArgStringList &CmdArgs) const {}
+                                             ArgStringList &CmdArgs) const {
+}
+
 void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const InputInfo &Output,
                                       const InputInfoList &Inputs,
@@ -5669,15 +6207,21 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
-  const Driver &D = getToolChain().getDriver();
+  auto &HTC = static_cast<const toolchains::HexagonToolChain&>(getToolChain());
+  const Driver &D = HTC.getDriver();
   ArgStringList CmdArgs;
 
-  std::string MarchString = "-march=";
-  MarchString += toolchains::Hexagon_TC::GetTargetCPU(Args);
-  CmdArgs.push_back(Args.MakeArgString(MarchString));
+  std::string MArchString = "-march=hexagon";
+  CmdArgs.push_back(Args.MakeArgString(MArchString));
 
   RenderExtraToolArgs(JA, CmdArgs);
 
+  std::string AsName = "hexagon-llvm-mc";
+  std::string MCpuString = "-mcpu=hexagon" +
+        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
+  CmdArgs.push_back("-filetype=obj");
+  CmdArgs.push_back(Args.MakeArgString(MCpuString));
+
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
@@ -5686,8 +6230,10 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-fsyntax-only");
   }
 
-  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args))
-    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
+  if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
+    std::string N = llvm::utostr(G.getValue());
+    CmdArgs.push_back(Args.MakeArgString(std::string("-gpsize=") + N));
+  }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
@@ -5701,61 +6247,56 @@ void hexagon::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   // inputs into '-Wl,' options?
   for (const auto &II : Inputs) {
     // Don't try to pass LLVM or AST inputs to a generic gcc.
-    if (II.getType() == types::TY_LLVM_IR || II.getType() == types::TY_LTO_IR ||
-        II.getType() == types::TY_LLVM_BC || II.getType() == types::TY_LTO_BC)
+    if (types::isLLVMIR(II.getType()))
       D.Diag(clang::diag::err_drv_no_linker_llvm_support)
-          << getToolChain().getTripleString();
+          << HTC.getTripleString();
     else if (II.getType() == types::TY_AST)
       D.Diag(clang::diag::err_drv_no_ast_support)
-          << getToolChain().getTripleString();
+          << HTC.getTripleString();
     else if (II.getType() == types::TY_ModuleFile)
       D.Diag(diag::err_drv_no_module_support)
-          << getToolChain().getTripleString();
+          << HTC.getTripleString();
 
     if (II.isFilename())
       CmdArgs.push_back(II.getFilename());
     else
       // Don't render as input, we need gcc to do the translations.
-      // FIXME: Pranav: What is this ?
+      // FIXME: What is this?
       II.getInputArg().render(Args, CmdArgs);
   }
 
-  const char *GCCName = "hexagon-as";
-  const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath(GCCName));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  auto *Exec = Args.MakeArgString(HTC.GetProgramPath(AsName.c_str()));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void hexagon::Linker::RenderExtraToolArgs(const JobAction &JA,
                                           ArgStringList &CmdArgs) const {
-  // The types are (hopefully) good enough.
 }
 
-static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
-                                     const toolchains::Hexagon_TC &ToolChain,
-                                     const InputInfo &Output,
-                                     const InputInfoList &Inputs,
-                                     const ArgList &Args,
-                                     ArgStringList &CmdArgs,
-                                     const char *LinkingOutput) {
+static void
+constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
+                         const toolchains::HexagonToolChain &HTC,
+                         const InputInfo &Output, const InputInfoList &Inputs,
+                         const ArgList &Args, ArgStringList &CmdArgs,
+                         const char *LinkingOutput) {
 
-  const Driver &D = ToolChain.getDriver();
+  const Driver &D = HTC.getDriver();
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
-  bool hasStaticArg = Args.hasArg(options::OPT_static);
-  bool buildingLib = Args.hasArg(options::OPT_shared);
-  bool buildPIE = Args.hasArg(options::OPT_pie);
-  bool incStdLib = !Args.hasArg(options::OPT_nostdlib);
-  bool incStartFiles = !Args.hasArg(options::OPT_nostartfiles);
-  bool incDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
-  bool useG0 = false;
-  bool useShared = buildingLib && !hasStaticArg;
+  bool IsStatic = Args.hasArg(options::OPT_static);
+  bool IsShared = Args.hasArg(options::OPT_shared);
+  bool IsPIE = Args.hasArg(options::OPT_pie);
+  bool IncStdLib = !Args.hasArg(options::OPT_nostdlib);
+  bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles);
+  bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
+  bool UseG0 = false;
+  bool UseShared = IsShared && !IsStatic;
 
   //----------------------------------------------------------------------------
   // Silence warnings for various options
   //----------------------------------------------------------------------------
-
   Args.ClaimAllArgs(options::OPT_g_Group);
   Args.ClaimAllArgs(options::OPT_emit_llvm);
   Args.ClaimAllArgs(options::OPT_w); // Other warning options are already
@@ -5765,27 +6306,37 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
-  for (const auto &Opt : ToolChain.ExtraOpts)
+  if (Args.hasArg(options::OPT_s))
+    CmdArgs.push_back("-s");
+
+  if (Args.hasArg(options::OPT_r))
+    CmdArgs.push_back("-r");
+
+  for (const auto &Opt : HTC.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
 
-  std::string MarchString = toolchains::Hexagon_TC::GetTargetCPU(Args);
-  CmdArgs.push_back(Args.MakeArgString("-m" + MarchString));
+  CmdArgs.push_back("-march=hexagon");
+  std::string CpuVer =
+        toolchains::HexagonToolChain::GetTargetCPUVersion(Args).str();
+  std::string MCpuString = "-mcpu=hexagon" + CpuVer;
+  CmdArgs.push_back(Args.MakeArgString(MCpuString));
 
-  if (buildingLib) {
+  if (IsShared) {
     CmdArgs.push_back("-shared");
-    CmdArgs.push_back("-call_shared"); // should be the default, but doing as
-                                       // hexagon-gcc does
+    // The following should be the default, but doing as hexagon-gcc does.
+    CmdArgs.push_back("-call_shared");
   }
 
-  if (hasStaticArg)
+  if (IsStatic)
     CmdArgs.push_back("-static");
 
-  if (buildPIE && !buildingLib)
+  if (IsPIE && !IsShared)
     CmdArgs.push_back("-pie");
 
-  if (const char *v = toolchains::Hexagon_TC::GetSmallDataThreshold(Args)) {
-    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + v));
-    useG0 = toolchains::Hexagon_TC::UsesG0(v);
+  if (auto G = toolchains::HexagonToolChain::getSmallDataThreshold(Args)) {
+    std::string N = llvm::utostr(G.getValue());
+    CmdArgs.push_back(Args.MakeArgString(std::string("-G") + N));
+    UseG0 = G.getValue() == 0;
   }
 
   //----------------------------------------------------------------------------
@@ -5794,77 +6345,85 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
-  const std::string MarchSuffix = "/" + MarchString;
-  const std::string G0Suffix = "/G0";
-  const std::string MarchG0Suffix = MarchSuffix + G0Suffix;
-  const std::string RootDir =
-      toolchains::Hexagon_TC::GetGnuDir(D.InstalledDir, Args) + "/";
-  const std::string StartFilesDir =
-      RootDir + "hexagon/lib" + (useG0 ? MarchG0Suffix : MarchSuffix);
-
   //----------------------------------------------------------------------------
   // moslib
   //----------------------------------------------------------------------------
-  std::vector<std::string> oslibs;
-  bool hasStandalone = false;
+  std::vector<std::string> OsLibs;
+  bool HasStandalone = false;
 
   for (const Arg *A : Args.filtered(options::OPT_moslib_EQ)) {
     A->claim();
-    oslibs.emplace_back(A->getValue());
-    hasStandalone = hasStandalone || (oslibs.back() == "standalone");
+    OsLibs.emplace_back(A->getValue());
+    HasStandalone = HasStandalone || (OsLibs.back() == "standalone");
   }
-  if (oslibs.empty()) {
-    oslibs.push_back("standalone");
-    hasStandalone = true;
+  if (OsLibs.empty()) {
+    OsLibs.push_back("standalone");
+    HasStandalone = true;
   }
 
   //----------------------------------------------------------------------------
   // Start Files
   //----------------------------------------------------------------------------
-  if (incStdLib && incStartFiles) {
+  const std::string MCpuSuffix = "/" + CpuVer;
+  const std::string MCpuG0Suffix = MCpuSuffix + "/G0";
+  const std::string RootDir =
+      HTC.getHexagonTargetDir(D.InstalledDir, D.PrefixDirs) + "/";
+  const std::string StartSubDir =
+      "hexagon/lib" + (UseG0 ? MCpuG0Suffix : MCpuSuffix);
 
-    if (!buildingLib) {
-      if (hasStandalone) {
-        CmdArgs.push_back(
-            Args.MakeArgString(StartFilesDir + "/crt0_standalone.o"));
+  auto Find = [&HTC] (const std::string &RootDir, const std::string &SubDir,
+                      const char *Name) -> std::string {
+    std::string RelName = SubDir + Name;
+    std::string P = HTC.GetFilePath(RelName.c_str());
+    if (llvm::sys::fs::exists(P))
+      return P;
+    return RootDir + RelName;
+  };
+
+  if (IncStdLib && IncStartFiles) {
+    if (!IsShared) {
+      if (HasStandalone) {
+        std::string Crt0SA = Find(RootDir, StartSubDir, "/crt0_standalone.o");
+        CmdArgs.push_back(Args.MakeArgString(Crt0SA));
       }
-      CmdArgs.push_back(Args.MakeArgString(StartFilesDir + "/crt0.o"));
+      std::string Crt0 = Find(RootDir, StartSubDir, "/crt0.o");
+      CmdArgs.push_back(Args.MakeArgString(Crt0));
     }
-    std::string initObj = useShared ? "/initS.o" : "/init.o";
-    CmdArgs.push_back(Args.MakeArgString(StartFilesDir + initObj));
+    std::string Init = UseShared
+          ? Find(RootDir, StartSubDir + "/pic", "/initS.o")
+          : Find(RootDir, StartSubDir, "/init.o");
+    CmdArgs.push_back(Args.MakeArgString(Init));
   }
 
   //----------------------------------------------------------------------------
   // Library Search Paths
   //----------------------------------------------------------------------------
-  const ToolChain::path_list &LibPaths = ToolChain.getFilePaths();
+  const ToolChain::path_list &LibPaths = HTC.getFilePaths();
   for (const auto &LibPath : LibPaths)
     CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibPath));
 
   //----------------------------------------------------------------------------
   //
   //----------------------------------------------------------------------------
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
-  Args.AddAllArgs(CmdArgs, options::OPT_s);
-  Args.AddAllArgs(CmdArgs, options::OPT_t);
-  Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_T_Group, options::OPT_e, options::OPT_s,
+                   options::OPT_t, options::OPT_u_Group});
 
-  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
+  AddLinkerInputs(HTC, Inputs, Args, CmdArgs);
 
   //----------------------------------------------------------------------------
   // Libraries
   //----------------------------------------------------------------------------
-  if (incStdLib && incDefLibs) {
+  if (IncStdLib && IncDefLibs) {
     if (D.CCCIsCXX()) {
-      ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
+      HTC.AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
 
     CmdArgs.push_back("--start-group");
 
-    if (!buildingLib) {
-      for (const std::string &Lib : oslibs)
+    if (!IsShared) {
+      for (const std::string &Lib : OsLibs)
         CmdArgs.push_back(Args.MakeArgString("-l" + Lib));
       CmdArgs.push_back("-lc");
     }
@@ -5876,9 +6435,11 @@ static void constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   //----------------------------------------------------------------------------
   // End files
   //----------------------------------------------------------------------------
-  if (incStdLib && incStartFiles) {
-    std::string finiObj = useShared ? "/finiS.o" : "/fini.o";
-    CmdArgs.push_back(Args.MakeArgString(StartFilesDir + finiObj));
+  if (IncStdLib && IncStartFiles) {
+    std::string Fini = UseShared
+          ? Find(RootDir, StartSubDir + "/pic", "/finiS.o")
+          : Find(RootDir, StartSubDir, "/fini.o");
+    CmdArgs.push_back(Args.MakeArgString(Fini));
   }
 }
 
@@ -5887,60 +6448,101 @@ void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
-
-  const toolchains::Hexagon_TC &ToolChain =
-      static_cast<const toolchains::Hexagon_TC &>(getToolChain());
+  auto &HTC = static_cast<const toolchains::HexagonToolChain&>(getToolChain());
 
   ArgStringList CmdArgs;
-  constructHexagonLinkArgs(C, JA, ToolChain, Output, Inputs, Args, CmdArgs,
+  constructHexagonLinkArgs(C, JA, HTC, Output, Inputs, Args, CmdArgs,
                            LinkingOutput);
 
-  std::string Linker = ToolChain.GetProgramPath("hexagon-ld");
+  std::string Linker = HTC.GetProgramPath("hexagon-link");
   C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
-                                          CmdArgs));
+                                          CmdArgs, Inputs));
 }
 // Hexagon tools end.
 
+void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                  const InputInfo &Output,
+                                  const InputInfoList &Inputs,
+                                  const ArgList &Args,
+                                  const char *LinkingOutput) const {
+
+  std::string Linker = getToolChain().GetProgramPath(getShortName());
+  ArgStringList CmdArgs;
+  CmdArgs.push_back("-flavor");
+  CmdArgs.push_back("old-gnu");
+  CmdArgs.push_back("-target");
+  CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString()));
+  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Linker),
+                                          CmdArgs, Inputs));
+}
+// AMDGPU tools end.
+
+wasm::Linker::Linker(const ToolChain &TC)
+  : GnuTool("wasm::Linker", "lld", TC) {}
+
+bool wasm::Linker::isLinkJob() const {
+  return true;
+}
+
+bool wasm::Linker::hasIntegratedCPP() const {
+  return false;
+}
+
+void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                const InputInfo &Output,
+                                const InputInfoList &Inputs,
+                                const ArgList &Args,
+                                const char *LinkingOutput) const {
+  const char *Linker = Args.MakeArgString(getToolChain().GetLinkerPath());
+  ArgStringList CmdArgs;
+  CmdArgs.push_back("-flavor");
+  CmdArgs.push_back("ld");
+  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Linker, CmdArgs, Inputs));
+}
+
 const std::string arm::getARMArch(StringRef Arch, const llvm::Triple &Triple) {
   std::string MArch;
   if (!Arch.empty())
     MArch = Arch;
   else
     MArch = Triple.getArchName();
-  MArch = StringRef(MArch).lower();
+  MArch = StringRef(MArch).split("+").first.lower();
 
   // Handle -march=native.
   if (MArch == "native") {
     std::string CPU = llvm::sys::getHostCPUName();
     if (CPU != "generic") {
       // Translate the native cpu into the architecture suffix for that CPU.
-      const char *Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch);
+      StringRef Suffix = arm::getLLVMArchSuffixForARM(CPU, MArch, Triple);
       // If there is no valid architecture suffix for this CPU we don't know how
       // to handle it, so return no architecture.
-      if (strcmp(Suffix, "") == 0)
+      if (Suffix.empty())
         MArch = "";
       else
-        MArch = std::string("arm") + Suffix;
+        MArch = std::string("arm") + Suffix.str();
     }
   }
 
   return MArch;
 }
+
 /// Get the (LLVM) name of the minimum ARM CPU for the arch we are targeting.
-const char *arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) {
+StringRef arm::getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple) {
   std::string MArch = getARMArch(Arch, Triple);
   // getARMCPUForArch defaults to the triple if MArch is empty, but empty MArch
   // here means an -march=native that we can't handle, so instead return no CPU.
   if (MArch.empty())
-    return "";
+    return StringRef();
 
   // We need to return an empty string here on invalid MArch values as the
   // various places that call this function can't cope with a null result.
-  const char *result = Triple.getARMCPUForArch(MArch);
-  if (result)
-    return result;
-  else
-    return "";
+  return Triple.getARMCPUForArch(MArch);
 }
 
 /// getARMTargetCPU - Get the (LLVM) name of the ARM cpu we are targeting.
@@ -5949,7 +6551,7 @@ std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch,
   // FIXME: Warn on inconsistent use of -mcpu and -march.
   // If we have -mcpu=, use that.
   if (!CPU.empty()) {
-    std::string MCPU = StringRef(CPU).lower();
+    std::string MCPU = StringRef(CPU).split("+").first.lower();
     // Handle -mcpu=native.
     if (MCPU == "native")
       return llvm::sys::getHostCPUName();
@@ -5963,15 +6565,26 @@ std::string arm::getARMTargetCPU(StringRef CPU, StringRef Arch,
 /// getLLVMArchSuffixForARM - Get the LLVM arch name to use for a particular
 /// CPU  (or Arch, if CPU is generic).
 // FIXME: This is redundant with -mcpu, why does LLVM use this.
-const char *arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch) {
-  if (CPU == "generic" &&
-      llvm::ARMTargetParser::parseArch(Arch) == llvm::ARM::AK_ARMV8_1A)
-    return "v8.1a";
-
-  unsigned ArchKind = llvm::ARMTargetParser::parseCPUArch(CPU);
+StringRef arm::getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
+                                       const llvm::Triple &Triple) {
+  unsigned ArchKind;
+  if (CPU == "generic") {
+    std::string ARMArch = tools::arm::getARMArch(Arch, Triple);
+    ArchKind = llvm::ARM::parseArch(ARMArch);
+    if (ArchKind == llvm::ARM::AK_INVALID)
+      // In case of generic Arch, i.e. "arm",
+      // extract arch from default cpu of the Triple
+      ArchKind = llvm::ARM::parseCPUArch(Triple.getARMCPUForArch(ARMArch));
+  } else {
+    // FIXME: horrible hack to get around the fact that Cortex-A7 is only an
+    // armv7k triple if it's actually been specified via "-arch armv7k".
+    ArchKind = (Arch == "armv7k" || Arch == "thumbv7k")
+                          ? (unsigned)llvm::ARM::AK_ARMV7K
+                          : llvm::ARM::parseCPUArch(CPU);
+  }
   if (ArchKind == llvm::ARM::AK_INVALID)
     return "";
-  return llvm::ARMTargetParser::getSubArch(ArchKind);
+  return llvm::ARM::getSubArch(ArchKind);
 }
 
 void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs,
@@ -5986,6 +6599,9 @@ void arm::appendEBLinkFlags(const ArgList &Args, ArgStringList &CmdArgs,
 }
 
 mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
+  // Strictly speaking, mips32r2 and mips64r2 are NanLegacy-only since Nan2008
+  // was first introduced in Release 3. However, other compilers have
+  // traditionally allowed it for Release 2 so we should do the same.
   return (NanEncoding)llvm::StringSwitch<int>(CPU)
       .Case("mips1", NanLegacy)
       .Case("mips2", NanLegacy)
@@ -5993,12 +6609,12 @@ mips::NanEncoding mips::getSupportedNanEncoding(StringRef &CPU) {
       .Case("mips4", NanLegacy)
       .Case("mips5", NanLegacy)
       .Case("mips32", NanLegacy)
-      .Case("mips32r2", NanLegacy)
+      .Case("mips32r2", NanLegacy | Nan2008)
       .Case("mips32r3", NanLegacy | Nan2008)
       .Case("mips32r5", NanLegacy | Nan2008)
       .Case("mips32r6", Nan2008)
       .Case("mips64", NanLegacy)
-      .Case("mips64r2", NanLegacy)
+      .Case("mips64r2", NanLegacy | Nan2008)
       .Case("mips64r3", NanLegacy | Nan2008)
       .Case("mips64r5", NanLegacy | Nan2008)
       .Case("mips64r6", Nan2008)
@@ -6031,7 +6647,7 @@ bool mips::isNaN2008(const ArgList &Args, const llvm::Triple &Triple) {
 }
 
 bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
-                         StringRef ABIName, StringRef FloatABI) {
+                         StringRef ABIName, mips::FloatABI FloatABI) {
   if (Triple.getVendor() != llvm::Triple::ImaginationTechnologies &&
       Triple.getVendor() != llvm::Triple::MipsTechnologies)
     return false;
@@ -6041,7 +6657,7 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
 
   // FPXX shouldn't be used if either -msoft-float or -mfloat-abi=soft is
   // present.
-  if (FloatABI == "soft")
+  if (FloatABI == mips::FloatABI::Soft)
     return false;
 
   return llvm::StringSwitch<bool>(CPUName)
@@ -6053,7 +6669,7 @@ bool mips::isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
 
 bool mips::shouldUseFPXX(const ArgList &Args, const llvm::Triple &Triple,
                          StringRef CPUName, StringRef ABIName,
-                         StringRef FloatABI) {
+                         mips::FloatABI FloatABI) {
   bool UseFPXX = isFPXXDefault(Triple, CPUName, ABIName, FloatABI);
 
   // FPXX shouldn't be used if -msingle-float is present.
@@ -6173,42 +6789,34 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
-  const ToolChain::path_list &Paths = ToolChain.getFilePaths();
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
-  Args.AddAllArgs(CmdArgs, options::OPT_s);
-  Args.AddAllArgs(CmdArgs, options::OPT_t);
-  Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
-  Args.AddAllArgs(CmdArgs, options::OPT_r);
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_T_Group, options::OPT_e, options::OPT_s,
+                   options::OPT_t, options::OPT_Z_Flag, options::OPT_r});
 
-  if (D.IsUsingLTO(Args))
-    AddGoldPlugin(ToolChain, Args, CmdArgs);
+  if (D.isUsingLTO())
+    AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX())
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
     CmdArgs.push_back("-lc");
     CmdArgs.push_back("-lcompiler_rt");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles))
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6228,11 +6836,11 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     SourceAction = SourceAction->getInputs()[0];
   }
 
-  // If -fno_integrated_as is used add -Q to the darwin assember driver to make
+  // If -fno-integrated-as is used add -Q to the darwin assember driver to make
   // sure it runs its system assembler not clang's integrated assembler.
   // Applicable to darwin11+ and Xcode 4+.  darwin<10 lacked integrated-as.
   // FIXME: at run-time detect assembler capabilities or rely on version
-  // information forwarded by -target-assembler-version (future)
+  // information forwarded by -target-assembler-version.
   if (Args.hasArg(options::OPT_fno_integrated_as)) {
     const llvm::Triple &T(getToolChain().getTriple());
     if (!(T.isMacOSX() && T.isMacOSXVersionLT(10, 7)))
@@ -6276,7 +6884,7 @@ void darwin::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   // asm_final spec is empty.
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::MachOTool::anchor() {}
@@ -6334,15 +6942,34 @@ void darwin::Linker::AddLinkArgs(Compilation &C, const ArgList &Args,
                    options::OPT_fno_application_extension, false))
     CmdArgs.push_back("-application_extension");
 
-  // If we are using LTO, then automatically create a temporary file path for
-  // the linker to use, so that it's lifetime will extend past a possible
-  // dsymutil step.
-  if (Version[0] >= 116 && D.IsUsingLTO(Args) && NeedsTempPath(Inputs)) {
-    const char *TmpPath = C.getArgs().MakeArgString(
-        D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
-    C.addTempFile(TmpPath);
-    CmdArgs.push_back("-object_path_lto");
-    CmdArgs.push_back(TmpPath);
+  if (D.isUsingLTO()) {
+    // If we are using LTO, then automatically create a temporary file path for
+    // the linker to use, so that it's lifetime will extend past a possible
+    // dsymutil step.
+    if (Version[0] >= 116 && NeedsTempPath(Inputs)) {
+      const char *TmpPath = C.getArgs().MakeArgString(
+          D.GetTemporaryPath("cc", types::getTypeTempSuffix(types::TY_Object)));
+      C.addTempFile(TmpPath);
+      CmdArgs.push_back("-object_path_lto");
+      CmdArgs.push_back(TmpPath);
+    }
+
+    // Use -lto_library option to specify the libLTO.dylib path. Try to find
+    // it in clang installed libraries. If not found, the option is not used
+    // and 'ld' will use its default mechanism to search for libLTO.dylib.
+    if (Version[0] >= 133) {
+      // Search for libLTO in <InstalledDir>/../lib/libLTO.dylib
+      StringRef P = llvm::sys::path::parent_path(D.getInstalledDir());
+      SmallString<128> LibLTOPath(P);
+      llvm::sys::path::append(LibLTOPath, "lib");
+      llvm::sys::path::append(LibLTOPath, "libLTO.dylib");
+      if (llvm::sys::fs::exists(LibLTOPath)) {
+        CmdArgs.push_back("-lto_library");
+        CmdArgs.push_back(C.getArgs().MakeArgString(LibLTOPath));
+      } else {
+        D.Diag(diag::warn_drv_lto_libpath);
+      }
+    }
   }
 
   // Derived from the "link" spec.
@@ -6509,7 +7136,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     const char *Exec =
         Args.MakeArgString(getToolChain().GetProgramPath("touch"));
     CmdArgs.push_back(Output.getFilename());
-    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+    C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, None));
     return;
   }
 
@@ -6517,13 +7144,11 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   // we follow suite for ease of comparison.
   AddLinkArgs(C, Args, CmdArgs, Inputs);
 
-  Args.AddAllArgs(CmdArgs, options::OPT_d_Flag);
-  Args.AddAllArgs(CmdArgs, options::OPT_s);
-  Args.AddAllArgs(CmdArgs, options::OPT_t);
-  Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
-  Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
-  Args.AddLastArg(CmdArgs, options::OPT_e);
-  Args.AddAllArgs(CmdArgs, options::OPT_r);
+  // It seems that the 'e' option is completely ignored for dynamic executables
+  // (the default), and with static executables, the last one wins, as expected.
+  Args.AddAllArgs(CmdArgs, {options::OPT_d_Flag, options::OPT_s, options::OPT_t,
+                            options::OPT_Z_Flag, options::OPT_u_Group,
+                            options::OPT_e, options::OPT_r});
 
   // Forward -ObjC when either -ObjC or -ObjC++ is used, to force loading
   // members of static archive libraries which implement Objective-C classes or
@@ -6534,8 +7159,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles))
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles))
     getMachOToolChain().addStartObjectFileArgs(Args, CmdArgs);
 
   // SafeStack requires its own runtime libraries
@@ -6567,12 +7191,11 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     InputFileList.push_back(II.getFilename());
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs))
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs))
     addOpenMPRuntime(CmdArgs, getToolChain(), Args);
 
-  if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (isObjCRuntimeLinked(Args) &&
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     // We use arclite library for both ARC and subscripting support.
     getMachOToolChain().AddLinkARCArgs(Args, CmdArgs);
 
@@ -6591,13 +7214,9 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_fnested_functions))
     CmdArgs.push_back("-allow_stack_execute");
 
-  // TODO: It would be nice to use addProfileRT() here, but darwin's compiler-rt
-  // paths are different enough from other toolchains that this needs a fair
-  // amount of refactoring done first.
   getMachOToolChain().addProfileRTLibs(Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (getToolChain().getDriver().CCCIsCXX())
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
 
@@ -6607,8 +7226,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     getMachOToolChain().AddLinkRuntimeLibArgs(Args, CmdArgs);
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     // endfile_spec is empty.
   }
 
@@ -6619,8 +7237,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   for (const Arg *A : Args.filtered(options::OPT_iframework))
     CmdArgs.push_back(Args.MakeArgString(std::string("-F") + A->getValue()));
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (Arg *A = Args.getLastArg(options::OPT_fveclib)) {
       if (A->getValue() == StringRef("Accelerate")) {
         CmdArgs.push_back("-framework");
@@ -6631,7 +7248,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
   std::unique_ptr<Command> Cmd =
-      llvm::make_unique<Command>(JA, *this, Exec, CmdArgs);
+      llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs);
   Cmd->setInputFileList(std::move(InputFileList));
   C.addCommand(std::move(Cmd));
 }
@@ -6655,7 +7272,7 @@ void darwin::Lipo::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("lipo"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6675,7 +7292,7 @@ void darwin::Dsymutil::ConstructJob(Compilation &C, const JobAction &JA,
 
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("dsymutil"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6698,7 +7315,7 @@ void darwin::VerifyDebug::ConstructJob(Compilation &C, const JobAction &JA,
 
   const char *Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("dwarfdump"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6718,7 +7335,7 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6726,32 +7343,12 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                    const InputInfoList &Inputs,
                                    const ArgList &Args,
                                    const char *LinkingOutput) const {
-  // FIXME: Find a real GCC, don't hard-code versions here
-  std::string GCCLibPath = "/usr/gcc/4.5/lib/gcc/";
-  const llvm::Triple &T = getToolChain().getTriple();
-  std::string LibPath = "/usr/lib/";
-  const llvm::Triple::ArchType Arch = T.getArch();
-  switch (Arch) {
-  case llvm::Triple::x86:
-    GCCLibPath +=
-        ("i386-" + T.getVendorName() + "-" + T.getOSName()).str() + "/4.5.2/";
-    break;
-  case llvm::Triple::x86_64:
-    GCCLibPath += ("i386-" + T.getVendorName() + "-" + T.getOSName()).str();
-    GCCLibPath += "/4.5.2/amd64/";
-    LibPath += "amd64/";
-    break;
-  default:
-    llvm_unreachable("Unsupported architecture");
-  }
-
   ArgStringList CmdArgs;
 
   // Demangle C++ names in errors
   CmdArgs.push_back("-C");
 
-  if ((!Args.hasArg(options::OPT_nostdlib)) &&
-      (!Args.hasArg(options::OPT_shared))) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("_start");
   }
@@ -6765,7 +7362,8 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-shared");
     } else {
       CmdArgs.push_back("--dynamic-linker");
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "ld.so.1"));
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("ld.so.1")));
     }
   }
 
@@ -6776,53 +7374,46 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
-    if (!Args.hasArg(options::OPT_shared)) {
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "crt1.o"));
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "crti.o"));
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "values-Xa.o"));
-      CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtbegin.o"));
-    } else {
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "crti.o"));
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "values-Xa.o"));
-      CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtbegin.o"));
-    }
-    if (getToolChain().getDriver().CCCIsCXX())
-      CmdArgs.push_back(Args.MakeArgString(LibPath + "cxa_finalize.o"));
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    if (!Args.hasArg(options::OPT_shared))
+      CmdArgs.push_back(
+          Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
+
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("values-Xa.o")));
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
-  CmdArgs.push_back(Args.MakeArgString("-L" + GCCLibPath));
+  getToolChain().AddFilePathLibArgs(Args, CmdArgs);
 
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
-  Args.AddAllArgs(CmdArgs, options::OPT_r);
+  Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
+                            options::OPT_e, options::OPT_r});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (getToolChain().getDriver().CCCIsCXX())
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
     CmdArgs.push_back("-lgcc_s");
+    CmdArgs.push_back("-lc");
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back("-lgcc");
-      CmdArgs.push_back("-lc");
       CmdArgs.push_back("-lm");
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(GCCLibPath + "crtend.o"));
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    CmdArgs.push_back(
+        Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
   }
-  CmdArgs.push_back(Args.MakeArgString(LibPath + "crtn.o"));
+  CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
 
-  addProfileRT(getToolChain(), Args, CmdArgs);
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6832,7 +7423,6 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                       const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
   ArgStringList CmdArgs;
-  bool NeedsKPIC = false;
 
   switch (getToolChain().getArch()) {
   case llvm::Triple::x86:
@@ -6847,16 +7437,21 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     break;
 
   case llvm::Triple::sparc:
-  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
-    NeedsKPIC = true;
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
+  }
 
-  case llvm::Triple::sparcv9:
+  case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
-    CmdArgs.push_back("-Av9a");
-    NeedsKPIC = true;
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
+  }
 
   case llvm::Triple::mips64:
   case llvm::Triple::mips64el: {
@@ -6872,7 +7467,7 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     else
       CmdArgs.push_back("-EL");
 
-    NeedsKPIC = true;
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
@@ -6880,9 +7475,6 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     break;
   }
 
-  if (NeedsKPIC)
-    addAssemblerKPIC(Args, CmdArgs);
-
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
@@ -6892,7 +7484,7 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -6916,8 +7508,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   else if (getToolChain().getArch() == llvm::Triple::mips64el)
     CmdArgs.push_back("-EL");
 
-  if ((!Args.hasArg(options::OPT_nostdlib)) &&
-      (!Args.hasArg(options::OPT_shared))) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("__start");
   }
@@ -6947,8 +7538,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
@@ -6970,18 +7560,13 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back(
       Args.MakeArgString("-L/usr/lib/gcc-lib/" + Triple + "/4.2.1"));
 
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
-  Args.AddAllArgs(CmdArgs, options::OPT_s);
-  Args.AddAllArgs(CmdArgs, options::OPT_t);
-  Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
-  Args.AddAllArgs(CmdArgs, options::OPT_r);
+  Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
+                            options::OPT_e, options::OPT_s, options::OPT_t,
+                            options::OPT_Z_Flag, options::OPT_r});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
@@ -7011,8 +7596,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-lgcc");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
@@ -7022,7 +7606,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7042,7 +7626,7 @@ void bitrig::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7053,8 +7637,7 @@ void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
 
-  if ((!Args.hasArg(options::OPT_nostdlib)) &&
-      (!Args.hasArg(options::OPT_shared))) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_shared)) {
     CmdArgs.push_back("-e");
     CmdArgs.push_back("__start");
   }
@@ -7081,8 +7664,7 @@ void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
@@ -7098,14 +7680,12 @@ void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       if (Args.hasArg(options::OPT_pg))
@@ -7145,8 +7725,7 @@ void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString("-lclang_rt." + MyArch));
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
@@ -7156,7 +7735,7 @@ void bitrig::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7169,14 +7748,19 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 
   // When building 32-bit code on FreeBSD/amd64, we have to explicitly
   // instruct as in the base system to assemble 32-bit code.
-  if (getToolChain().getArch() == llvm::Triple::x86)
+  switch (getToolChain().getArch()) {
+  default:
+    break;
+  case llvm::Triple::x86:
     CmdArgs.push_back("--32");
-  else if (getToolChain().getArch() == llvm::Triple::ppc)
+    break;
+  case llvm::Triple::ppc:
     CmdArgs.push_back("-a32");
-  else if (getToolChain().getArch() == llvm::Triple::mips ||
-           getToolChain().getArch() == llvm::Triple::mipsel ||
-           getToolChain().getArch() == llvm::Triple::mips64 ||
-           getToolChain().getArch() == llvm::Triple::mips64el) {
+    break;
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el: {
     StringRef CPUName;
     StringRef ABIName;
     mips::getMipsCPUAndABI(Args, getToolChain().getTriple(), CPUName, ABIName);
@@ -7193,21 +7777,26 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     else
       CmdArgs.push_back("-EL");
 
-    addAssemblerKPIC(Args, CmdArgs);
-  } else if (getToolChain().getArch() == llvm::Triple::arm ||
-             getToolChain().getArch() == llvm::Triple::armeb ||
-             getToolChain().getArch() == llvm::Triple::thumb ||
-             getToolChain().getArch() == llvm::Triple::thumbeb) {
-    const Driver &D = getToolChain().getDriver();
-    const llvm::Triple &Triple = getToolChain().getTriple();
-    StringRef FloatABI = arm::getARMFloatABI(D, Args, Triple);
-
-    if (FloatABI == "hard") {
-      CmdArgs.push_back("-mfpu=vfp");
-    } else {
-      CmdArgs.push_back("-mfpu=softvfp");
+    if (Arg *A = Args.getLastArg(options::OPT_G)) {
+      StringRef v = A->getValue();
+      CmdArgs.push_back(Args.MakeArgString("-G" + v));
+      A->claim();
     }
 
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+    break;
+  }
+  case llvm::Triple::arm:
+  case llvm::Triple::armeb:
+  case llvm::Triple::thumb:
+  case llvm::Triple::thumbeb: {
+    arm::FloatABI ABI = arm::getARMFloatABI(getToolChain(), Args);
+
+    if (ABI == arm::FloatABI::Hard)
+      CmdArgs.push_back("-mfpu=vfp");
+    else
+      CmdArgs.push_back("-mfpu=softvfp");
+
     switch (getToolChain().getTriple().getEnvironment()) {
     case llvm::Triple::GNUEABIHF:
     case llvm::Triple::GNUEABI:
@@ -7218,15 +7807,16 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     default:
       CmdArgs.push_back("-matpcs");
     }
-  } else if (getToolChain().getArch() == llvm::Triple::sparc ||
-             getToolChain().getArch() == llvm::Triple::sparcel ||
-             getToolChain().getArch() == llvm::Triple::sparcv9) {
-    if (getToolChain().getArch() == llvm::Triple::sparc)
-      CmdArgs.push_back("-Av8plusa");
-    else
-      CmdArgs.push_back("-Av9a");
-
-    addAssemblerKPIC(Args, CmdArgs);
+    break;
+  }
+  case llvm::Triple::sparc:
+  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcv9: {
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
+    break;
+  }
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
@@ -7238,7 +7828,7 @@ void freebsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7302,6 +7892,17 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("elf32ppc_fbsd");
   }
 
+  if (Arg *A = Args.getLastArg(options::OPT_G)) {
+    if (ToolChain.getArch() == llvm::Triple::mips ||
+      ToolChain.getArch() == llvm::Triple::mipsel ||
+      ToolChain.getArch() == llvm::Triple::mips64 ||
+      ToolChain.getArch() == llvm::Triple::mips64el) {
+      StringRef v = A->getValue();
+      CmdArgs.push_back(Args.MakeArgString("-G" + v));
+      A->claim();
+    }
+  }
+
   if (Output.isFilename()) {
     CmdArgs.push_back("-o");
     CmdArgs.push_back(Output.getFilename());
@@ -7309,8 +7910,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     const char *crt1 = nullptr;
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
@@ -7337,9 +7937,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
-  const ToolChain::path_list &Paths = ToolChain.getFilePaths();
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
   Args.AddAllArgs(CmdArgs, options::OPT_e);
   Args.AddAllArgs(CmdArgs, options::OPT_s);
@@ -7347,14 +7945,13 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_Z_Flag);
   Args.AddAllArgs(CmdArgs, options::OPT_r);
 
-  if (D.IsUsingLTO(Args))
-    AddGoldPlugin(ToolChain, Args, CmdArgs);
+  if (D.isUsingLTO())
+    AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     addOpenMPRuntime(CmdArgs, ToolChain, Args);
     if (D.CCCIsCXX()) {
       ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
@@ -7410,8 +8007,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || IsPIE)
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
     else
@@ -7419,10 +8015,10 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
   }
 
-  addProfileRT(ToolChain, Args, CmdArgs);
+  ToolChain.addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7471,21 +8067,26 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     else
       CmdArgs.push_back("-EL");
 
-    addAssemblerKPIC(Args, CmdArgs);
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
 
   case llvm::Triple::sparc:
-  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
-    addAssemblerKPIC(Args, CmdArgs);
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
+  }
 
-  case llvm::Triple::sparcv9:
+  case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
-    CmdArgs.push_back("-Av9");
-    addAssemblerKPIC(Args, CmdArgs);
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
+  }
 
   default:
     break;
@@ -7500,7 +8101,7 @@ void netbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString((getToolChain().GetProgramPath("as")));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7620,8 +8221,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crt0.o")));
@@ -7669,8 +8269,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     addOpenMPRuntime(CmdArgs, getToolChain(), Args);
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
@@ -7697,8 +8296,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtend.o")));
@@ -7708,10 +8306,10 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
-  addProfileRT(getToolChain(), Args, CmdArgs);
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -7721,8 +8319,16 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
                                        const char *LinkingOutput) const {
   claimNoWarnArgs(Args);
 
+  std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
+  llvm::Triple Triple = llvm::Triple(TripleStr);
+
   ArgStringList CmdArgs;
-  bool NeedsKPIC = false;
+
+  llvm::Reloc::Model RelocationModel;
+  unsigned PICLevel;
+  bool IsPIE;
+  std::tie(RelocationModel, PICLevel, IsPIE) =
+      ParsePICArgs(getToolChain(), Triple, Args);
 
   switch (getToolChain().getArch()) {
   default:
@@ -7755,22 +8361,26 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-mlittle-endian");
     break;
   case llvm::Triple::sparc:
-  case llvm::Triple::sparcel:
+  case llvm::Triple::sparcel: {
     CmdArgs.push_back("-32");
-    CmdArgs.push_back("-Av8plusa");
-    NeedsKPIC = true;
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
-  case llvm::Triple::sparcv9:
+  }
+  case llvm::Triple::sparcv9: {
     CmdArgs.push_back("-64");
-    CmdArgs.push_back("-Av9a");
-    NeedsKPIC = true;
+    std::string CPU = getCPUName(Args, getToolChain().getTriple());
+    CmdArgs.push_back(getSparcAsmModeForCPU(CPU, getToolChain().getTriple()));
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
+  }
   case llvm::Triple::arm:
   case llvm::Triple::armeb:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb: {
-    const llvm::Triple &Triple = getToolChain().getTriple();
-    switch (Triple.getSubArch()) {
+    const llvm::Triple &Triple2 = getToolChain().getTriple();
+    switch (Triple2.getSubArch()) {
     case llvm::Triple::ARMSubArch_v7:
       CmdArgs.push_back("-mfpu=neon");
       break;
@@ -7781,10 +8391,18 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
       break;
     }
 
-    StringRef ARMFloatABI = tools::arm::getARMFloatABI(
-        getToolChain().getDriver(), Args,
-        llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
-    CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=" + ARMFloatABI));
+    switch (arm::getARMFloatABI(getToolChain(), Args)) {
+    case arm::FloatABI::Invalid: llvm_unreachable("must have an ABI!");
+    case arm::FloatABI::Soft:
+      CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=soft"));
+      break;
+    case arm::FloatABI::SoftFP:
+      CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=softfp"));
+      break;
+    case arm::FloatABI::Hard:
+      CmdArgs.push_back(Args.MakeArgString("-mfloat-abi=hard"));
+      break;
+    }
 
     Args.AddLastArg(CmdArgs, options::OPT_march_EQ);
 
@@ -7817,18 +8435,7 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 
     // -mno-shared should be emitted unless -fpic, -fpie, -fPIC, -fPIE,
     // or -mshared (not implemented) is in effect.
-    bool IsPicOrPie = false;
-    if (Arg *A = Args.getLastArg(options::OPT_fPIC, options::OPT_fno_PIC,
-                                 options::OPT_fpic, options::OPT_fno_pic,
-                                 options::OPT_fPIE, options::OPT_fno_PIE,
-                                 options::OPT_fpie, options::OPT_fno_pie)) {
-      if (A->getOption().matches(options::OPT_fPIC) ||
-          A->getOption().matches(options::OPT_fpic) ||
-          A->getOption().matches(options::OPT_fPIE) ||
-          A->getOption().matches(options::OPT_fpie))
-        IsPicOrPie = true;
-    }
-    if (!IsPicOrPie)
+    if (RelocationModel == llvm::Reloc::Static)
       CmdArgs.push_back("-mno-shared");
 
     // LLVM doesn't support -mplt yet and acts as if it is always given.
@@ -7847,13 +8454,13 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     }
 
     // Add the last -mfp32/-mfpxx/-mfp64 or -mfpxx if it is enabled by default.
-    StringRef MIPSFloatABI = getMipsFloatABI(getToolChain().getDriver(), Args);
     if (Arg *A = Args.getLastArg(options::OPT_mfp32, options::OPT_mfpxx,
                                  options::OPT_mfp64)) {
       A->claim();
       A->render(Args, CmdArgs);
-    } else if (mips::shouldUseFPXX(Args, getToolChain().getTriple(), CPUName,
-                                   ABIName, MIPSFloatABI))
+    } else if (mips::shouldUseFPXX(
+                   Args, getToolChain().getTriple(), CPUName, ABIName,
+                   getMipsFloatABI(getToolChain().getDriver(), Args)))
       CmdArgs.push_back("-mfpxx");
 
     // Pass on -mmips16 or -mno-mips16. However, the assembler equivalent of
@@ -7890,7 +8497,7 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     Args.AddLastArg(CmdArgs, options::OPT_modd_spreg,
                     options::OPT_mno_odd_spreg);
 
-    NeedsKPIC = true;
+    AddAssemblerKPIC(getToolChain(), Args, CmdArgs);
     break;
   }
   case llvm::Triple::systemz: {
@@ -7902,9 +8509,7 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   }
   }
 
-  if (NeedsKPIC)
-    addAssemblerKPIC(Args, CmdArgs);
-
+  Args.AddAllArgs(CmdArgs, options::OPT_I);
   Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
 
   CmdArgs.push_back("-o");
@@ -7914,7 +8519,7 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   // Handle the debug info splitting at object creation time if we're
   // creating an object.
@@ -7927,7 +8532,7 @@ void gnutools::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 
 static void AddLibgcc(const llvm::Triple &Triple, const Driver &D,
                       ArgStringList &CmdArgs, const ArgList &Args) {
-  bool isAndroid = Triple.getEnvironment() == llvm::Triple::Android;
+  bool isAndroid = Triple.isAndroid();
   bool isCygMing = Triple.isOSCygMing();
   bool StaticLibgcc = Args.hasArg(options::OPT_static_libgcc) ||
                       Args.hasArg(options::OPT_static);
@@ -7963,7 +8568,7 @@ static std::string getLinuxDynamicLinker(const ArgList &Args,
                                          const toolchains::Linux &ToolChain) {
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
 
-  if (ToolChain.getTriple().getEnvironment() == llvm::Triple::Android) {
+  if (ToolChain.getTriple().isAndroid()) {
     if (ToolChain.getTriple().isArch64Bit())
       return "/system/bin/linker64";
     else
@@ -7977,33 +8582,30 @@ static std::string getLinuxDynamicLinker(const ArgList &Args,
     return "/lib/ld-linux-aarch64_be.so.1";
   else if (Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb) {
     if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF ||
-        tools::arm::getARMFloatABI(ToolChain.getDriver(), Args, ToolChain.getTriple()) == "hard")
+        arm::getARMFloatABI(ToolChain, Args) == arm::FloatABI::Hard)
       return "/lib/ld-linux-armhf.so.3";
     else
       return "/lib/ld-linux.so.3";
   } else if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb) {
     // TODO: check which dynamic linker name.
     if (ToolChain.getTriple().getEnvironment() == llvm::Triple::GNUEABIHF ||
-        tools::arm::getARMFloatABI(ToolChain.getDriver(), Args, ToolChain.getTriple()) == "hard")
+        arm::getARMFloatABI(ToolChain, Args) == arm::FloatABI::Hard)
       return "/lib/ld-linux-armhf.so.3";
     else
       return "/lib/ld-linux.so.3";
   } else if (Arch == llvm::Triple::mips || Arch == llvm::Triple::mipsel ||
              Arch == llvm::Triple::mips64 || Arch == llvm::Triple::mips64el) {
-    StringRef CPUName;
-    StringRef ABIName;
-    mips::getMipsCPUAndABI(Args, ToolChain.getTriple(), CPUName, ABIName);
-    bool IsNaN2008 = mips::isNaN2008(Args, ToolChain.getTriple());
-
-    StringRef LibDir = llvm::StringSwitch<llvm::StringRef>(ABIName)
-                           .Case("o32", "/lib")
-                           .Case("n32", "/lib32")
-                           .Case("n64", "/lib64")
-                           .Default("/lib");
+    std::string LibDir =
+        "/lib" + mips::getMipsABILibSuffix(Args, ToolChain.getTriple());
     StringRef LibName;
+    bool IsNaN2008 = mips::isNaN2008(Args, ToolChain.getTriple());
     if (mips::isUCLibc(Args))
       LibName = IsNaN2008 ? "ld-uClibc-mipsn8.so.0" : "ld-uClibc.so.0";
-    else
+    else if (!ToolChain.getTriple().hasEnvironment()) {
+      bool LE = (ToolChain.getTriple().getArch() == llvm::Triple::mipsel) ||
+                (ToolChain.getTriple().getArch() == llvm::Triple::mips64el);
+      LibName = LE ? "ld-musl-mipsel.so.1" : "ld-musl-mips.so.1";
+    } else
       LibName = IsNaN2008 ? "ld-linux-mipsn8.so.1" : "ld.so.1";
 
     return (LibDir + "/" + LibName).str();
@@ -8018,7 +8620,7 @@ static std::string getLinuxDynamicLinker(const ArgList &Args,
       return "/lib64/ld64.so.1";
     return "/lib64/ld64.so.2";
   } else if (Arch == llvm::Triple::systemz)
-    return "/lib64/ld64.so.1";
+    return "/lib/ld64.so.1";
   else if (Arch == llvm::Triple::sparcv9)
     return "/lib64/ld-linux.so.2";
   else if (Arch == llvm::Triple::x86_64 &&
@@ -8106,12 +8708,18 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const toolchains::Linux &ToolChain =
       static_cast<const toolchains::Linux &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
+
+  std::string TripleStr = getToolChain().ComputeEffectiveClangTriple(Args);
+  llvm::Triple Triple = llvm::Triple(TripleStr);
+
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
-  const bool isAndroid =
-      ToolChain.getTriple().getEnvironment() == llvm::Triple::Android;
+  const bool isAndroid = ToolChain.getTriple().isAndroid();
   const bool IsPIE =
       !Args.hasArg(options::OPT_shared) && !Args.hasArg(options::OPT_static) &&
       (Args.hasArg(options::OPT_pie) || ToolChain.isPIEDefault());
+  const bool HasCRTBeginEndFiles =
+      ToolChain.getTriple().hasEnvironment() ||
+      (ToolChain.getTriple().getVendor() != llvm::Triple::MipsTechnologies);
 
   ArgStringList CmdArgs;
 
@@ -8123,6 +8731,14 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   // handled somewhere else.
   Args.ClaimAllArgs(options::OPT_w);
 
+  const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
+  if (llvm::sys::path::filename(Exec) == "lld") {
+    CmdArgs.push_back("-flavor");
+    CmdArgs.push_back("old-gnu");
+    CmdArgs.push_back("-target");
+    CmdArgs.push_back(Args.MakeArgString(getToolChain().getTripleString()));
+  }
+
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
@@ -8136,9 +8752,7 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back("-s");
 
   if (Arch == llvm::Triple::armeb || Arch == llvm::Triple::thumbeb)
-    arm::appendEBLinkFlags(
-        Args, CmdArgs,
-        llvm::Triple(getToolChain().ComputeEffectiveClangTriple(Args)));
+    arm::appendEBLinkFlags(Args, CmdArgs, Triple);
 
   for (const auto &Opt : ToolChain.ExtraOpts)
     CmdArgs.push_back(Opt.c_str());
@@ -8172,8 +8786,7 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!isAndroid) {
       const char *crt1 = nullptr;
       if (!Args.hasArg(options::OPT_shared)) {
@@ -8199,7 +8812,9 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbeginS.o";
     else
       crtbegin = isAndroid ? "crtbegin_dynamic.o" : "crtbegin.o";
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+
+    if (HasCRTBeginEndFiles)
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
 
     // Add crtfastmath.o if available and fast math is enabled.
     ToolChain.AddFastMathRuntimeIfAvailable(Args, CmdArgs);
@@ -8208,13 +8823,10 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_u);
 
-  const ToolChain::path_list &Paths = ToolChain.getFilePaths();
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
-
-  if (D.IsUsingLTO(Args))
-    AddGoldPlugin(ToolChain, Args, CmdArgs);
+  if (D.isUsingLTO())
+    AddGoldPlugin(ToolChain, Args, CmdArgs, D.getLTOMode() == LTOK_Thin);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
@@ -8222,10 +8834,10 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs);
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
   // The profile runtime also needs access to system libraries.
-  addProfileRT(getToolChain(), Args, CmdArgs);
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
 
-  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (D.CCCIsCXX() &&
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
                                !Args.hasArg(options::OPT_static);
     if (OnlyLibstdcxxStatic)
@@ -8298,14 +8910,14 @@ void gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       else
         crtend = isAndroid ? "crtend_android.o" : "crtend.o";
 
-      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
+      if (HasCRTBeginEndFiles)
+        CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
       if (!isAndroid)
         CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
     }
   }
 
-  C.addCommand(
-      llvm::make_unique<Command>(JA, *this, ToolChain.Linker.c_str(), CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 // NaCl ARM assembly (inline or standalone) can be written with a set of macros
@@ -8317,8 +8929,8 @@ void nacltools::AssemblerARM::ConstructJob(Compilation &C, const JobAction &JA,
                                            const InputInfoList &Inputs,
                                            const ArgList &Args,
                                            const char *LinkingOutput) const {
-  const toolchains::NaCl_TC &ToolChain =
-      static_cast<const toolchains::NaCl_TC &>(getToolChain());
+  const toolchains::NaClToolChain &ToolChain =
+      static_cast<const toolchains::NaClToolChain &>(getToolChain());
   InputInfo NaClMacros(ToolChain.GetNaClArmMacrosPath(), types::TY_PP_Asm,
                        "nacl-arm-macros.s");
   InputInfoList NewInputs;
@@ -8338,8 +8950,8 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                      const ArgList &Args,
                                      const char *LinkingOutput) const {
 
-  const toolchains::NaCl_TC &ToolChain =
-      static_cast<const toolchains::NaCl_TC &>(getToolChain());
+  const toolchains::NaClToolChain &ToolChain =
+      static_cast<const toolchains::NaClToolChain &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
   const bool IsStatic =
@@ -8364,8 +8976,8 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
-  // NaCl_TC doesn't have ExtraOpts like Linux; the only relevant flag from
-  // there is --build-id, which we do want.
+  // NaClToolChain doesn't have ExtraOpts like Linux; the only relevant flag
+  // from there is --build-id, which we do want.
   CmdArgs.push_back("--build-id");
 
   if (!IsStatic)
@@ -8391,8 +9003,7 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared))
       CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt1.o")));
     CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
@@ -8410,18 +9021,15 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   Args.AddAllArgs(CmdArgs, options::OPT_u);
 
-  const ToolChain::path_list &Paths = ToolChain.getFilePaths();
-
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
 
   if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
     CmdArgs.push_back("--no-demangle");
 
   AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
 
-  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (D.CCCIsCXX() &&
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic =
         Args.hasArg(options::OPT_static_libstdcxx) && !IsStatic;
     if (OnlyLibstdcxxStatic)
@@ -8480,8 +9088,8 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  C.addCommand(
-      llvm::make_unique<Command>(JA, *this, ToolChain.Linker.c_str(), CmdArgs));
+  const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -8501,7 +9109,7 @@ void minix::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -8519,8 +9127,7 @@ void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crt1.o")));
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crti.o")));
     CmdArgs.push_back(
@@ -8528,24 +9135,21 @@ void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
-  addProfileRT(getToolChain(), Args, CmdArgs);
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     if (D.CCCIsCXX()) {
       getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs);
       CmdArgs.push_back("-lm");
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_pthread))
       CmdArgs.push_back("-lpthread");
     CmdArgs.push_back("-lc");
@@ -8556,7 +9160,7 @@ void minix::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 /// DragonFly Tools
@@ -8585,7 +9189,7 @@ void dragonfly::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -8595,7 +9199,6 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                      const char *LinkingOutput) const {
   const Driver &D = getToolChain().getDriver();
   ArgStringList CmdArgs;
-  bool UseGCC47 = llvm::sys::fs::exists("/usr/lib/gcc47");
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
@@ -8612,7 +9215,8 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-dynamic-linker");
       CmdArgs.push_back("/usr/libexec/ld-elf.so.2");
     }
-    CmdArgs.push_back("--hash-style=both");
+    CmdArgs.push_back("--hash-style=gnu");
+    CmdArgs.push_back("--enable-new-dtags");
   }
 
   // When building 32-bit code on DragonFly/pc64, we have to explicitly
@@ -8629,8 +9233,7 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     assert(Output.isNothing() && "Invalid output.");
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (!Args.hasArg(options::OPT_shared)) {
       if (Args.hasArg(options::OPT_pg))
         CmdArgs.push_back(
@@ -8653,29 +9256,17 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
           Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o")));
   }
 
-  Args.AddAllArgs(CmdArgs, options::OPT_L);
-  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
-  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  Args.AddAllArgs(CmdArgs,
+                  {options::OPT_L, options::OPT_T_Group, options::OPT_e});
 
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
-    // FIXME: GCC passes on -lgcc, -lgcc_pic and a whole lot of
-    //         rpaths
-    if (UseGCC47)
-      CmdArgs.push_back("-L/usr/lib/gcc47");
-    else
-      CmdArgs.push_back("-L/usr/lib/gcc44");
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    CmdArgs.push_back("-L/usr/lib/gcc50");
 
     if (!Args.hasArg(options::OPT_static)) {
-      if (UseGCC47) {
-        CmdArgs.push_back("-rpath");
-        CmdArgs.push_back("/usr/lib/gcc47");
-      } else {
-        CmdArgs.push_back("-rpath");
-        CmdArgs.push_back("/usr/lib/gcc44");
-      }
+      CmdArgs.push_back("-rpath");
+      CmdArgs.push_back("/usr/lib/gcc50");
     }
 
     if (D.CCCIsCXX()) {
@@ -8690,34 +9281,25 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-lc");
     }
 
-    if (UseGCC47) {
-      if (Args.hasArg(options::OPT_static) ||
-          Args.hasArg(options::OPT_static_libgcc)) {
+    if (Args.hasArg(options::OPT_static) ||
+        Args.hasArg(options::OPT_static_libgcc)) {
         CmdArgs.push_back("-lgcc");
         CmdArgs.push_back("-lgcc_eh");
-      } else {
-        if (Args.hasArg(options::OPT_shared_libgcc)) {
+    } else {
+      if (Args.hasArg(options::OPT_shared_libgcc)) {
           CmdArgs.push_back("-lgcc_pic");
           if (!Args.hasArg(options::OPT_shared))
             CmdArgs.push_back("-lgcc");
-        } else {
+      } else {
           CmdArgs.push_back("-lgcc");
           CmdArgs.push_back("--as-needed");
           CmdArgs.push_back("-lgcc_pic");
           CmdArgs.push_back("--no-as-needed");
-        }
-      }
-    } else {
-      if (Args.hasArg(options::OPT_shared)) {
-        CmdArgs.push_back("-lgcc_pic");
-      } else {
-        CmdArgs.push_back("-lgcc");
       }
     }
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
       CmdArgs.push_back(
           Args.MakeArgString(getToolChain().GetFilePath("crtendS.o")));
@@ -8727,10 +9309,10 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath("crtn.o")));
   }
 
-  addProfileRT(getToolChain(), Args, CmdArgs);
+  getToolChain().addProfileRTLibs(Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetLinkerPath());
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 // Try to find Exe from a Visual Studio distribution.  This first tries to find
@@ -8766,8 +9348,8 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(
         Args.MakeArgString(std::string("-out:") + Output.getFilename()));
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles) && !C.getDriver().IsCLMode())
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles) &&
+      !C.getDriver().IsCLMode())
     CmdArgs.push_back("-defaultlib:libcmt");
 
   if (!llvm::sys::Process::GetEnv("LIB")) {
@@ -8795,6 +9377,13 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
       }
       CmdArgs.push_back(
           Args.MakeArgString(std::string("-libpath:") + LibDir.c_str()));
+
+      if (MSVC.useUniversalCRT(VisualStudioDir)) {
+        std::string UniversalCRTLibPath;
+        if (MSVC.getUniversalCRTLibraryPath(UniversalCRTLibPath))
+          CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
+                                               UniversalCRTLibPath.c_str()));
+      }
     }
 
     std::string WindowsSdkLibPath;
@@ -8805,7 +9394,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
 
   CmdArgs.push_back("-nologo");
 
-  if (Args.hasArg(options::OPT_g_Group))
+  if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7))
     CmdArgs.push_back("-debug");
 
   bool DLL = Args.hasArg(options::OPT__SLASH_LD, options::OPT__SLASH_LDd,
@@ -8822,28 +9411,42 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString("-debug"));
     CmdArgs.push_back(Args.MakeArgString("-incremental:no"));
     if (Args.hasArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd)) {
-      static const char *CompilerRTComponents[] = {
-          "asan_dynamic", "asan_dynamic_runtime_thunk",
-      };
-      for (const auto &Component : CompilerRTComponents)
-        CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, Component)));
+      for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
+        CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
       // Make sure the dynamic runtime thunk is not optimized out at link time
       // to ensure proper SEH handling.
       CmdArgs.push_back(Args.MakeArgString("-include:___asan_seh_interceptor"));
     } else if (DLL) {
-      CmdArgs.push_back(
-          Args.MakeArgString(getCompilerRT(TC, "asan_dll_thunk")));
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
     } else {
-      static const char *CompilerRTComponents[] = {
-          "asan", "asan_cxx",
-      };
-      for (const auto &Component : CompilerRTComponents)
-        CmdArgs.push_back(Args.MakeArgString(getCompilerRT(TC, Component)));
+      for (const auto &Lib : {"asan", "asan_cxx"})
+        CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
     }
   }
 
   Args.AddAllArgValues(CmdArgs, options::OPT__SLASH_link);
 
+  if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ,
+                   options::OPT_fno_openmp, false)) {
+    CmdArgs.push_back("-nodefaultlib:vcomp.lib");
+    CmdArgs.push_back("-nodefaultlib:vcompd.lib");
+    CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") +
+                                         TC.getDriver().Dir + "/../lib"));
+    switch (getOpenMPRuntime(getToolChain(), Args)) {
+    case OMPRT_OMP:
+      CmdArgs.push_back("-defaultlib:libomp.lib");
+      break;
+    case OMPRT_IOMP5:
+      CmdArgs.push_back("-defaultlib:libiomp5md.lib");
+      break;
+    case OMPRT_GOMP:
+      break;
+    case OMPRT_Unknown:
+      // Already diagnosed.
+      break;
+    }
+  }
+
   // Add filenames, libraries, and other linker inputs.
   for (const auto &Input : Inputs) {
     if (Input.isFilename()) {
@@ -8891,7 +9494,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   const char *Exec = Args.MakeArgString(linkPath);
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void visualstudio::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -8915,21 +9518,34 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   // any flag accepted by clang-cl.
 
   // These are spelled the same way in clang and cl.exe,.
-  Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
-  Args.AddAllArgs(CmdArgs, options::OPT_I);
+  Args.AddAllArgs(CmdArgs, {options::OPT_D, options::OPT_U, options::OPT_I});
 
   // Optimization level.
+  if (Arg *A = Args.getLastArg(options::OPT_fbuiltin, options::OPT_fno_builtin))
+    CmdArgs.push_back(A->getOption().getID() == options::OPT_fbuiltin ? "/Oi"
+                                                                      : "/Oi-");
   if (Arg *A = Args.getLastArg(options::OPT_O, options::OPT_O0)) {
     if (A->getOption().getID() == options::OPT_O0) {
       CmdArgs.push_back("/Od");
     } else {
+      CmdArgs.push_back("/Og");
+
       StringRef OptLevel = A->getValue();
-      if (OptLevel == "1" || OptLevel == "2" || OptLevel == "s")
-        A->render(Args, CmdArgs);
-      else if (OptLevel == "3")
-        CmdArgs.push_back("/Ox");
+      if (OptLevel == "s" || OptLevel == "z")
+        CmdArgs.push_back("/Os");
+      else
+        CmdArgs.push_back("/Ot");
+
+      CmdArgs.push_back("/Ob2");
     }
   }
+  if (Arg *A = Args.getLastArg(options::OPT_fomit_frame_pointer,
+                               options::OPT_fno_omit_frame_pointer))
+    CmdArgs.push_back(A->getOption().getID() == options::OPT_fomit_frame_pointer
+                          ? "/Oy"
+                          : "/Oy-");
+  if (!Args.hasArg(options::OPT_fwritable_strings))
+    CmdArgs.push_back("/GF");
 
   // Flags for which clang-cl has an alias.
   // FIXME: How can we ensure this stays in sync with relevant clang-cl options?
@@ -8948,7 +9564,8 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
         A->getOption().getID() == options::OPT_fdata_sections ? "/Gw" : "/Gw-");
   if (Args.hasArg(options::OPT_fsyntax_only))
     CmdArgs.push_back("/Zs");
-  if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only))
+  if (Args.hasArg(options::OPT_g_Flag, options::OPT_gline_tables_only,
+                  options::OPT__SLASH_Z7))
     CmdArgs.push_back("/Z7");
 
   std::vector<std::string> Includes =
@@ -8960,6 +9577,7 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LD);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_LDd);
   Args.AddAllArgs(CmdArgs, options::OPT__SLASH_EH);
+  Args.AddAllArgs(CmdArgs, options::OPT__SLASH_Zl);
 
   // The order of these flags is relevant, so pick the last one.
   if (Arg *A = Args.getLastArg(options::OPT__SLASH_MD, options::OPT__SLASH_MDd,
@@ -8986,7 +9604,7 @@ std::unique_ptr<Command> visualstudio::Compiler::GetCommand(
   std::string Exec = FindVisualStudioExecutable(getToolChain(), "cl.exe",
                                                 D.getClangProgramPath());
   return llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
-                                    CmdArgs);
+                                    CmdArgs, Inputs);
 }
 
 /// MinGW Tools
@@ -9013,7 +9631,7 @@ void MinGW::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("as"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 
   if (Args.hasArg(options::OPT_gsplit_dwarf))
     SplitDebugInfo(getToolChain(), C, *this, JA, Args, Output,
@@ -9026,8 +9644,24 @@ void MinGW::Linker::AddLibGCC(const ArgList &Args,
     CmdArgs.push_back("-lmingwthrd");
   CmdArgs.push_back("-lmingw32");
 
-  // Add libgcc or compiler-rt.
-  AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args);
+  // Make use of compiler-rt if --rtlib option is used
+  ToolChain::RuntimeLibType RLT = getToolChain().GetRuntimeLibType(Args);
+  if (RLT == ToolChain::RLT_Libgcc) {
+    bool Static = Args.hasArg(options::OPT_static_libgcc) ||
+                  Args.hasArg(options::OPT_static);
+    bool Shared = Args.hasArg(options::OPT_shared);
+    bool CXX = getToolChain().getDriver().CCCIsCXX();
+
+    if (Static || (!CXX && !Shared)) {
+      CmdArgs.push_back("-lgcc");
+      CmdArgs.push_back("-lgcc_eh");
+    } else {
+      CmdArgs.push_back("-lgcc_s");
+      CmdArgs.push_back("-lgcc");
+    }
+  } else {
+    AddRunTimeLibs(getToolChain(), getToolChain().getDriver(), CmdArgs, Args);
+  }
 
   CmdArgs.push_back("-lmoldname");
   CmdArgs.push_back("-lmingwex");
@@ -9057,6 +9691,8 @@ void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (LinkerName.equals_lower("lld")) {
     CmdArgs.push_back("-flavor");
     CmdArgs.push_back("gnu");
+  } else if (!LinkerName.equals_lower("ld")) {
+    D.Diag(diag::err_drv_unsupported_linker) << LinkerName;
   }
 
   if (!D.SysRoot.empty())
@@ -9110,8 +9746,7 @@ void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   Args.AddAllArgs(CmdArgs, options::OPT_u_Group);
   Args.AddLastArg(CmdArgs, options::OPT_Z_Flag);
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_mdll)) {
       CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("dllcrt2.o")));
     } else {
@@ -9126,18 +9761,15 @@ void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
-  const ToolChain::path_list Paths = TC.getFilePaths();
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
-
+  TC.AddFilePathLibArgs(Args, CmdArgs);
   AddLinkerInputs(TC, Inputs, Args, CmdArgs);
 
   // TODO: Add ASan stuff here
 
   // TODO: Add profile stuff here
 
-  if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nodefaultlibs)) {
+  if (D.CCCIsCXX() &&
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
     bool OnlyLibstdcxxStatic = Args.hasArg(options::OPT_static_libstdcxx) &&
                                !Args.hasArg(options::OPT_static);
     if (OnlyLibstdcxxStatic)
@@ -9193,7 +9825,7 @@ void MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
   const char *Exec = Args.MakeArgString(TC.GetProgramPath(LinkerName.data()));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 /// XCore Tools
@@ -9229,7 +9861,7 @@ void XCore::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(II.getFilename());
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -9257,7 +9889,7 @@ void XCore::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
 
   const char *Exec = Args.MakeArgString(getToolChain().GetProgramPath("xcc"));
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -9296,7 +9928,7 @@ void CrossWindows::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   const std::string Assembler = TC.GetProgramPath("as");
   Exec = Args.MakeArgString(Assembler);
 
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
@@ -9375,8 +10007,7 @@ void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.hasArg(options::OPT_static) ? "-Bstatic"
                                                        : "-Bdynamic");
 
-    if (!Args.hasArg(options::OPT_nostdlib) &&
-        !Args.hasArg(options::OPT_nostartfiles)) {
+    if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
       CmdArgs.push_back("--entry");
       CmdArgs.push_back(Args.MakeArgString(EntryPoint));
     }
@@ -9398,8 +10029,7 @@ void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     CmdArgs.push_back(Args.MakeArgString(ImpLib));
   }
 
-  if (!Args.hasArg(options::OPT_nostdlib) &&
-      !Args.hasArg(options::OPT_nostartfiles)) {
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
     const std::string CRTPath(D.SysRoot + "/usr/lib/");
     const char *CRTBegin;
 
@@ -9409,11 +10039,7 @@ void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   }
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
-
-  const auto &Paths = TC.getFilePaths();
-  for (const auto &Path : Paths)
-    CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + Path));
-
+  TC.AddFilePathLibArgs(Args, CmdArgs);
   AddLinkerInputs(TC, Inputs, Args, CmdArgs);
 
   if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib) &&
@@ -9435,10 +10061,25 @@ void CrossWindows::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
-  const std::string Linker = TC.GetProgramPath("ld");
-  Exec = Args.MakeArgString(Linker);
+  if (TC.getSanitizerArgs().needsAsanRt()) {
+    // TODO handle /MT[d] /MD[d]
+    if (Args.hasArg(options::OPT_shared)) {
+      CmdArgs.push_back(TC.getCompilerRTArgString(Args, "asan_dll_thunk"));
+    } else {
+      for (const auto &Lib : {"asan_dynamic", "asan_dynamic_runtime_thunk"})
+        CmdArgs.push_back(TC.getCompilerRTArgString(Args, Lib));
+        // Make sure the dynamic runtime thunk is not optimized out at link time
+        // to ensure proper SEH handling.
+        CmdArgs.push_back(Args.MakeArgString("--undefined"));
+        CmdArgs.push_back(Args.MakeArgString(TC.getArch() == llvm::Triple::x86
+                                                 ? "___asan_seh_interceptor"
+                                                 : "__asan_seh_interceptor"));
+    }
+  }
 
-  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs));
+  Exec = Args.MakeArgString(TC.GetLinkerPath());
+
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
 }
 
 void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -9446,35 +10087,46 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
                                           const InputInfoList &Inputs,
                                           const ArgList &Args,
                                           const char *LinkingOutput) const {
-
   ArgStringList CmdArgs;
-
   assert(Inputs.size() == 1);
   const InputInfo &II = Inputs[0];
-  assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX);
-  assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm.
+  assert(II.getType() == types::TY_C || II.getType() == types::TY_CXX ||
+         II.getType() == types::TY_PP_CXX);
 
-  // Append all -I, -iquote, -isystem paths.
-  Args.AddAllArgs(CmdArgs, options::OPT_clang_i_Group);
-  // These are spelled the same way in clang and moviCompile.
-  Args.AddAllArgs(CmdArgs, options::OPT_D, options::OPT_U);
-
-  CmdArgs.push_back("-DMYRIAD2");
-  CmdArgs.push_back("-mcpu=myriad2");
-  CmdArgs.push_back("-S");
-
-  // Any -O option passes through without translation. What about -Ofast ?
-  if (Arg *A = Args.getLastArg(options::OPT_O_Group))
-    A->render(Args, CmdArgs);
-
-  if (Args.hasFlag(options::OPT_ffunction_sections,
-                   options::OPT_fno_function_sections)) {
-    CmdArgs.push_back("-ffunction-sections");
+  if (JA.getKind() == Action::PreprocessJobClass) {
+    Args.ClaimAllArgs();
+    CmdArgs.push_back("-E");
+  } else {
+    assert(Output.getType() == types::TY_PP_Asm); // Require preprocessed asm.
+    CmdArgs.push_back("-S");
+    CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified.
   }
-  if (Args.hasArg(options::OPT_fno_inline_functions))
-    CmdArgs.push_back("-fno-inline-functions");
+  CmdArgs.push_back("-mcpu=myriad2");
+  CmdArgs.push_back("-DMYRIAD2");
 
-  CmdArgs.push_back("-fno-exceptions"); // Always do this even if unspecified.
+  // Append all -I, -iquote, -isystem paths, defines/undefines,
+  // 'f' flags, optimize flags, and warning options.
+  // These are spelled the same way in clang and moviCompile.
+  Args.AddAllArgs(CmdArgs, {options::OPT_I_Group, options::OPT_clang_i_Group,
+                            options::OPT_std_EQ, options::OPT_D, options::OPT_U,
+                            options::OPT_f_Group, options::OPT_f_clang_Group,
+                            options::OPT_g_Group, options::OPT_M_Group,
+                            options::OPT_O_Group, options::OPT_W_Group});
+
+  // If we're producing a dependency file, and assembly is the final action,
+  // then the name of the target in the dependency file should be the '.o'
+  // file, not the '.s' file produced by this step. For example, instead of
+  //  /tmp/mumble.s: mumble.c .../someheader.h
+  // the filename on the lefthand side should be "mumble.o"
+  if (Args.getLastArg(options::OPT_MF) && !Args.getLastArg(options::OPT_MT) &&
+      C.getActions().size() == 1 &&
+      C.getActions()[0]->getKind() == Action::AssembleJobClass) {
+    Arg *A = Args.getLastArg(options::OPT_o);
+    if (A) {
+      CmdArgs.push_back("-MT");
+      CmdArgs.push_back(Args.MakeArgString(A->getValue()));
+    }
+  }
 
   CmdArgs.push_back(II.getFilename());
   CmdArgs.push_back("-o");
@@ -9482,8 +10134,8 @@ void tools::SHAVE::Compiler::ConstructJob(Compilation &C, const JobAction &JA,
 
   std::string Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("moviCompile"));
-  C.addCommand(
-      llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec), CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
+                                          CmdArgs, Inputs));
 }
 
 void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
@@ -9499,13 +10151,14 @@ void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   assert(Output.getType() == types::TY_Object);
 
   CmdArgs.push_back("-no6thSlotCompression");
-  CmdArgs.push_back("-cv:myriad2"); // Chip Version ?
+  CmdArgs.push_back("-cv:myriad2"); // Chip Version
   CmdArgs.push_back("-noSPrefixing");
   CmdArgs.push_back("-a"); // Mystery option.
-  for (auto Arg : Args.filtered(options::OPT_I)) {
-    Arg->claim();
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
+  for (const Arg *A : Args.filtered(options::OPT_I, options::OPT_isystem)) {
+    A->claim();
     CmdArgs.push_back(
-        Args.MakeArgString(std::string("-i:") + Arg->getValue(0)));
+        Args.MakeArgString(std::string("-i:") + A->getValue(0)));
   }
   CmdArgs.push_back("-elf"); // Output format.
   CmdArgs.push_back(II.getFilename());
@@ -9514,6 +10167,378 @@ void tools::SHAVE::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
 
   std::string Exec =
       Args.MakeArgString(getToolChain().GetProgramPath("moviAsm"));
-  C.addCommand(
-      llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec), CmdArgs));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
+                                          CmdArgs, Inputs));
+}
+
+void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA,
+                                         const InputInfo &Output,
+                                         const InputInfoList &Inputs,
+                                         const ArgList &Args,
+                                         const char *LinkingOutput) const {
+  const auto &TC =
+      static_cast<const toolchains::MyriadToolChain &>(getToolChain());
+  const llvm::Triple &T = TC.getTriple();
+  ArgStringList CmdArgs;
+  bool UseStartfiles =
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles);
+  bool UseDefaultLibs =
+      !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs);
+
+  if (T.getArch() == llvm::Triple::sparc)
+    CmdArgs.push_back("-EB");
+  else // SHAVE assumes little-endian, and sparcel is expressly so.
+    CmdArgs.push_back("-EL");
+
+  // The remaining logic is mostly like gnutools::Linker::ConstructJob,
+  // but we never pass through a --sysroot option and various other bits.
+  // For example, there are no sanitizers (yet) nor gold linker.
+
+  // Eat some arguments that may be present but have no effect.
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  Args.ClaimAllArgs(options::OPT_w);
+  Args.ClaimAllArgs(options::OPT_static_libgcc);
+
+  if (Args.hasArg(options::OPT_s)) // Pass the 'strip' option.
+    CmdArgs.push_back("-s");
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  if (UseStartfiles) {
+    // If you want startfiles, it means you want the builtin crti and crtbegin,
+    // but not crt0. Myriad link commands provide their own crt0.o as needed.
+    CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crti.o")));
+    CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtbegin.o")));
+  }
+
+  Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group,
+                            options::OPT_e, options::OPT_s, options::OPT_t,
+                            options::OPT_Z_Flag, options::OPT_r});
+
+  TC.AddFilePathLibArgs(Args, CmdArgs);
+
+  AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs);
+
+  if (UseDefaultLibs) {
+    if (C.getDriver().CCCIsCXX())
+      CmdArgs.push_back("-lstdc++");
+    if (T.getOS() == llvm::Triple::RTEMS) {
+      CmdArgs.push_back("--start-group");
+      CmdArgs.push_back("-lc");
+      // You must provide your own "-L" option to enable finding these.
+      CmdArgs.push_back("-lrtemscpu");
+      CmdArgs.push_back("-lrtemsbsp");
+      CmdArgs.push_back("--end-group");
+    } else {
+      CmdArgs.push_back("-lc");
+    }
+    CmdArgs.push_back("-lgcc");
+  }
+  if (UseStartfiles) {
+    CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtend.o")));
+    CmdArgs.push_back(Args.MakeArgString(TC.GetFilePath("crtn.o")));
+  }
+
+  std::string Exec =
+      Args.MakeArgString(TC.GetProgramPath("sparc-myriad-elf-ld"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Args.MakeArgString(Exec),
+                                          CmdArgs, Inputs));
+}
+
+void PS4cpu::Assemble::ConstructJob(Compilation &C, const JobAction &JA,
+                                    const InputInfo &Output,
+                                    const InputInfoList &Inputs,
+                                    const ArgList &Args,
+                                    const char *LinkingOutput) const {
+  claimNoWarnArgs(Args);
+  ArgStringList CmdArgs;
+
+  Args.AddAllArgValues(CmdArgs, options::OPT_Wa_COMMA, options::OPT_Xassembler);
+
+  CmdArgs.push_back("-o");
+  CmdArgs.push_back(Output.getFilename());
+
+  assert(Inputs.size() == 1 && "Unexpected number of inputs.");
+  const InputInfo &Input = Inputs[0];
+  assert(Input.isFilename() && "Invalid input.");
+  CmdArgs.push_back(Input.getFilename());
+
+  const char *Exec =
+      Args.MakeArgString(getToolChain().GetProgramPath("ps4-as"));
+  C.addCommand(llvm::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
+}
+
+static void AddPS4SanitizerArgs(const ToolChain &TC, ArgStringList &CmdArgs) {
+  const SanitizerArgs &SanArgs = TC.getSanitizerArgs();
+  if (SanArgs.needsUbsanRt()) {
+    CmdArgs.push_back("-lSceDbgUBSanitizer_stub_weak");
+  }
+  if (SanArgs.needsAsanRt()) {
+    CmdArgs.push_back("-lSceDbgAddressSanitizer_stub_weak");
+  }
+}
+
+static void ConstructPS4LinkJob(const Tool &T, Compilation &C,
+                                const JobAction &JA, const InputInfo &Output,
+                                const InputInfoList &Inputs,
+                                const ArgList &Args,
+                                const char *LinkingOutput) {
+  const toolchains::FreeBSD &ToolChain =
+      static_cast<const toolchains::FreeBSD &>(T.getToolChain());
+  const Driver &D = ToolChain.getDriver();
+  ArgStringList CmdArgs;
+
+  // Silence warning for "clang -g foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  // and "clang -emit-llvm foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_emit_llvm);
+  // and for "clang -w foo.o -o foo". Other warning options are already
+  // handled somewhere else.
+  Args.ClaimAllArgs(options::OPT_w);
+
+  if (!D.SysRoot.empty())
+    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
+  if (Args.hasArg(options::OPT_pie))
+    CmdArgs.push_back("-pie");
+
+  if (Args.hasArg(options::OPT_rdynamic))
+    CmdArgs.push_back("-export-dynamic");
+  if (Args.hasArg(options::OPT_shared))
+    CmdArgs.push_back("--oformat=so");
+
+  if (Output.isFilename()) {
+    CmdArgs.push_back("-o");
+    CmdArgs.push_back(Output.getFilename());
+  } else {
+    assert(Output.isNothing() && "Invalid output.");
+  }
+
+  AddPS4SanitizerArgs(ToolChain, CmdArgs);
+
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
+  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  Args.AddAllArgs(CmdArgs, options::OPT_s);
+  Args.AddAllArgs(CmdArgs, options::OPT_t);
+  Args.AddAllArgs(CmdArgs, options::OPT_r);
+
+  if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
+    CmdArgs.push_back("--no-demangle");
+
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
+
+  if (Args.hasArg(options::OPT_pthread)) {
+    CmdArgs.push_back("-lpthread");
+  }
+
+  const char *Exec = Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld"));
+
+  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
+}
+
+static void ConstructGoldLinkJob(const Tool &T, Compilation &C,
+                                 const JobAction &JA, const InputInfo &Output,
+                                 const InputInfoList &Inputs,
+                                 const ArgList &Args,
+                                 const char *LinkingOutput) {
+  const toolchains::FreeBSD &ToolChain =
+      static_cast<const toolchains::FreeBSD &>(T.getToolChain());
+  const Driver &D = ToolChain.getDriver();
+  ArgStringList CmdArgs;
+
+  // Silence warning for "clang -g foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_g_Group);
+  // and "clang -emit-llvm foo.o -o foo"
+  Args.ClaimAllArgs(options::OPT_emit_llvm);
+  // and for "clang -w foo.o -o foo". Other warning options are already
+  // handled somewhere else.
+  Args.ClaimAllArgs(options::OPT_w);
+
+  if (!D.SysRoot.empty())
+    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+
+  if (Args.hasArg(options::OPT_pie))
+    CmdArgs.push_back("-pie");
+
+  if (Args.hasArg(options::OPT_static)) {
+    CmdArgs.push_back("-Bstatic");
+  } else {
+    if (Args.hasArg(options::OPT_rdynamic))
+      CmdArgs.push_back("-export-dynamic");
+    CmdArgs.push_back("--eh-frame-hdr");
+    if (Args.hasArg(options::OPT_shared)) {
+      CmdArgs.push_back("-Bshareable");
+    } else {
+      CmdArgs.push_back("-dynamic-linker");
+      CmdArgs.push_back("/libexec/ld-elf.so.1");
+    }
+    CmdArgs.push_back("--enable-new-dtags");
+  }
+
+  if (Output.isFilename()) {
+    CmdArgs.push_back("-o");
+    CmdArgs.push_back(Output.getFilename());
+  } else {
+    assert(Output.isNothing() && "Invalid output.");
+  }
+
+  AddPS4SanitizerArgs(ToolChain, CmdArgs);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    const char *crt1 = nullptr;
+    if (!Args.hasArg(options::OPT_shared)) {
+      if (Args.hasArg(options::OPT_pg))
+        crt1 = "gcrt1.o";
+      else if (Args.hasArg(options::OPT_pie))
+        crt1 = "Scrt1.o";
+      else
+        crt1 = "crt1.o";
+    }
+    if (crt1)
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crt1)));
+
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crti.o")));
+
+    const char *crtbegin = nullptr;
+    if (Args.hasArg(options::OPT_static))
+      crtbegin = "crtbeginT.o";
+    else if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
+      crtbegin = "crtbeginS.o";
+    else
+      crtbegin = "crtbegin.o";
+
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+  }
+
+  Args.AddAllArgs(CmdArgs, options::OPT_L);
+  ToolChain.AddFilePathLibArgs(Args, CmdArgs);
+  Args.AddAllArgs(CmdArgs, options::OPT_T_Group);
+  Args.AddAllArgs(CmdArgs, options::OPT_e);
+  Args.AddAllArgs(CmdArgs, options::OPT_s);
+  Args.AddAllArgs(CmdArgs, options::OPT_t);
+  Args.AddAllArgs(CmdArgs, options::OPT_r);
+
+  if (Args.hasArg(options::OPT_Z_Xlinker__no_demangle))
+    CmdArgs.push_back("--no-demangle");
+
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs);
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
+    // For PS4, we always want to pass libm, libstdc++ and libkernel
+    // libraries for both C and C++ compilations.
+    CmdArgs.push_back("-lkernel");
+    if (D.CCCIsCXX()) {
+      ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs);
+      if (Args.hasArg(options::OPT_pg))
+        CmdArgs.push_back("-lm_p");
+      else
+        CmdArgs.push_back("-lm");
+    }
+    // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding
+    // the default system libraries. Just mimic this for now.
+    if (Args.hasArg(options::OPT_pg))
+      CmdArgs.push_back("-lgcc_p");
+    else
+      CmdArgs.push_back("-lcompiler_rt");
+    if (Args.hasArg(options::OPT_static)) {
+      CmdArgs.push_back("-lstdc++");
+    } else if (Args.hasArg(options::OPT_pg)) {
+      CmdArgs.push_back("-lgcc_eh_p");
+    } else {
+      CmdArgs.push_back("--as-needed");
+      CmdArgs.push_back("-lstdc++");
+      CmdArgs.push_back("--no-as-needed");
+    }
+
+    if (Args.hasArg(options::OPT_pthread)) {
+      if (Args.hasArg(options::OPT_pg))
+        CmdArgs.push_back("-lpthread_p");
+      else
+        CmdArgs.push_back("-lpthread");
+    }
+
+    if (Args.hasArg(options::OPT_pg)) {
+      if (Args.hasArg(options::OPT_shared))
+        CmdArgs.push_back("-lc");
+      else {
+        if (Args.hasArg(options::OPT_static)) {
+          CmdArgs.push_back("--start-group");
+          CmdArgs.push_back("-lc_p");
+          CmdArgs.push_back("-lpthread_p");
+          CmdArgs.push_back("--end-group");
+        } else {
+          CmdArgs.push_back("-lc_p");
+        }
+      }
+      CmdArgs.push_back("-lgcc_p");
+    } else {
+      if (Args.hasArg(options::OPT_static)) {
+        CmdArgs.push_back("--start-group");
+        CmdArgs.push_back("-lc");
+        CmdArgs.push_back("-lpthread");
+        CmdArgs.push_back("--end-group");
+      } else {
+        CmdArgs.push_back("-lc");
+      }
+      CmdArgs.push_back("-lcompiler_rt");
+    }
+
+    if (Args.hasArg(options::OPT_static)) {
+      CmdArgs.push_back("-lstdc++");
+    } else if (Args.hasArg(options::OPT_pg)) {
+      CmdArgs.push_back("-lgcc_eh_p");
+    } else {
+      CmdArgs.push_back("--as-needed");
+      CmdArgs.push_back("-lstdc++");
+      CmdArgs.push_back("--no-as-needed");
+    }
+  }
+
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
+    if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_pie))
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtendS.o")));
+    else
+      CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
+    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
+  }
+
+  const char *Exec =
+#ifdef LLVM_ON_WIN32
+      Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld.gold"));
+#else
+      Args.MakeArgString(ToolChain.GetProgramPath("ps4-ld"));
+#endif
+
+  C.addCommand(llvm::make_unique<Command>(JA, T, Exec, CmdArgs, Inputs));
+}
+
+void PS4cpu::Link::ConstructJob(Compilation &C, const JobAction &JA,
+                                const InputInfo &Output,
+                                const InputInfoList &Inputs,
+                                const ArgList &Args,
+                                const char *LinkingOutput) const {
+  const toolchains::FreeBSD &ToolChain =
+      static_cast<const toolchains::FreeBSD &>(getToolChain());
+  const Driver &D = ToolChain.getDriver();
+  bool PS4Linker;
+  StringRef LinkerOptName;
+  if (const Arg *A = Args.getLastArg(options::OPT_fuse_ld_EQ)) {
+    LinkerOptName = A->getValue();
+    if (LinkerOptName != "ps4" && LinkerOptName != "gold")
+      D.Diag(diag::err_drv_unsupported_linker) << LinkerOptName;
+  }
+
+  if (LinkerOptName == "gold")
+    PS4Linker = false;
+  else if (LinkerOptName == "ps4")
+    PS4Linker = true;
+  else
+    PS4Linker = !Args.hasArg(options::OPT_shared);
+
+  if (PS4Linker)
+    ConstructPS4LinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
+  else
+    ConstructGoldLinkJob(*this, C, JA, Output, Inputs, Args, LinkingOutput);
 }
diff --git a/lib/Driver/Tools.h b/lib/Driver/Tools.h
index 651ddc8ff572..314315dea006 100644
--- a/lib/Driver/Tools.h
+++ b/lib/Driver/Tools.h
@@ -14,19 +14,20 @@
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/Types.h"
 #include "clang/Driver/Util.h"
+#include "clang/Frontend/CodeGenOptions.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Option/Option.h"
 #include "llvm/Support/Compiler.h"
 
 namespace clang {
-  class ObjCRuntime;
+class ObjCRuntime;
 
 namespace driver {
-  class Command;
-  class Driver;
+class Command;
+class Driver;
 
 namespace toolchains {
-  class MachO;
+class MachO;
 }
 
 namespace tools {
@@ -37,162 +38,161 @@ class Compiler;
 
 using llvm::opt::ArgStringList;
 
-SmallString<128> getCompilerRT(const ToolChain &TC, StringRef Component,
-                               bool Shared = false);
+SmallString<128> getCompilerRT(const ToolChain &TC,
+                               const llvm::opt::ArgList &Args,
+                               StringRef Component, bool Shared = false);
 
-  /// \brief Clang compiler tool.
-  class LLVM_LIBRARY_VISIBILITY Clang : public Tool {
-  public:
-    static const char *getBaseInputName(const llvm::opt::ArgList &Args,
-                                        const InputInfo &Input);
-    static const char *getBaseInputStem(const llvm::opt::ArgList &Args,
-                                        const InputInfoList &Inputs);
-    static const char *getDependencyFileName(const llvm::opt::ArgList &Args,
-                                             const InputInfoList &Inputs);
+/// \brief Clang compiler tool.
+class LLVM_LIBRARY_VISIBILITY Clang : public Tool {
+public:
+  static const char *getBaseInputName(const llvm::opt::ArgList &Args,
+                                      const InputInfo &Input);
+  static const char *getBaseInputStem(const llvm::opt::ArgList &Args,
+                                      const InputInfoList &Inputs);
+  static const char *getDependencyFileName(const llvm::opt::ArgList &Args,
+                                           const InputInfoList &Inputs);
 
-  private:
-    void AddPreprocessingOptions(Compilation &C, const JobAction &JA,
-                                 const Driver &D,
-                                 const llvm::opt::ArgList &Args,
-                                 llvm::opt::ArgStringList &CmdArgs,
-                                 const InputInfo &Output,
-                                 const InputInfoList &Inputs) const;
+private:
+  void AddPreprocessingOptions(Compilation &C, const JobAction &JA,
+                               const Driver &D, const llvm::opt::ArgList &Args,
+                               llvm::opt::ArgStringList &CmdArgs,
+                               const InputInfo &Output,
+                               const InputInfoList &Inputs,
+                               const ToolChain *AuxToolChain) const;
 
-    void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
-                              llvm::opt::ArgStringList &CmdArgs) const;
-    void AddARMTargetArgs(const llvm::opt::ArgList &Args,
-                          llvm::opt::ArgStringList &CmdArgs,
-                          bool KernelOrKext) const;
-    void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
+  void AddAArch64TargetArgs(const llvm::opt::ArgList &Args,
                             llvm::opt::ArgStringList &CmdArgs) const;
-    void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
-                           llvm::opt::ArgStringList &CmdArgs) const;
-    void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
+  void AddARMTargetArgs(const llvm::Triple &Triple,
+                        const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs,
+                        bool KernelOrKext) const;
+  void AddARM64TargetArgs(const llvm::opt::ArgList &Args,
                           llvm::opt::ArgStringList &CmdArgs) const;
-    void AddR600TargetArgs(const llvm::opt::ArgList &Args,
-                           llvm::opt::ArgStringList &CmdArgs) const;
-    void AddSparcTargetArgs(const llvm::opt::ArgList &Args,
-                            llvm::opt::ArgStringList &CmdArgs) const;
-    void AddSystemZTargetArgs(const llvm::opt::ArgList &Args,
-                              llvm::opt::ArgStringList &CmdArgs) const;
-    void AddX86TargetArgs(const llvm::opt::ArgList &Args,
-                          llvm::opt::ArgStringList &CmdArgs) const;
-    void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
-                              llvm::opt::ArgStringList &CmdArgs) const;
-
-    enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile };
-
-    ObjCRuntime AddObjCRuntimeArgs(const llvm::opt::ArgList &args,
-                                   llvm::opt::ArgStringList &cmdArgs,
-                                   RewriteKind rewrite) const;
-
-    void AddClangCLArgs(const llvm::opt::ArgList &Args,
+  void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+  void AddPPCTargetArgs(const llvm::opt::ArgList &Args,
                         llvm::opt::ArgStringList &CmdArgs) const;
+  void AddR600TargetArgs(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+  void AddSparcTargetArgs(const llvm::opt::ArgList &Args,
+                          llvm::opt::ArgStringList &CmdArgs) const;
+  void AddSystemZTargetArgs(const llvm::opt::ArgList &Args,
+                            llvm::opt::ArgStringList &CmdArgs) const;
+  void AddX86TargetArgs(const llvm::opt::ArgList &Args,
+                        llvm::opt::ArgStringList &CmdArgs) const;
+  void AddHexagonTargetArgs(const llvm::opt::ArgList &Args,
+                            llvm::opt::ArgStringList &CmdArgs) const;
 
-    visualstudio::Compiler *getCLFallback() const;
+  enum RewriteKind { RK_None, RK_Fragile, RK_NonFragile };
 
-    mutable std::unique_ptr<visualstudio::Compiler> CLFallback;
+  ObjCRuntime AddObjCRuntimeArgs(const llvm::opt::ArgList &args,
+                                 llvm::opt::ArgStringList &cmdArgs,
+                                 RewriteKind rewrite) const;
 
-  public:
-    // CAUTION! The first constructor argument ("clang") is not arbitrary,
-    // as it is for other tools. Some operations on a Tool actually test
-    // whether that tool is Clang based on the Tool's Name as a string.
-    Clang(const ToolChain &TC) : Tool("clang", "clang frontend", TC, RF_Full) {}
+  void AddClangCLArgs(const llvm::opt::ArgList &Args,
+                      llvm::opt::ArgStringList &CmdArgs,
+                      enum CodeGenOptions::DebugInfoKind *DebugInfoKind,
+                      bool *EmitCodeView) const;
 
-    bool hasGoodDiagnostics() const override { return true; }
-    bool hasIntegratedAssembler() const override { return true; }
-    bool hasIntegratedCPP() const override { return true; }
-    bool canEmitIR() const override { return true; }
+  visualstudio::Compiler *getCLFallback() const;
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  mutable std::unique_ptr<visualstudio::Compiler> CLFallback;
 
-  /// \brief Clang integrated assembler tool.
-  class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool {
-  public:
-    ClangAs(const ToolChain &TC) : Tool("clang::as",
-                                        "clang integrated assembler", TC,
-                                        RF_Full) {}
-    void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
-                           llvm::opt::ArgStringList &CmdArgs) const;
-    bool hasGoodDiagnostics() const override { return true; }
-    bool hasIntegratedAssembler() const override { return false; }
-    bool hasIntegratedCPP() const override { return false; }
+public:
+  // CAUTION! The first constructor argument ("clang") is not arbitrary,
+  // as it is for other tools. Some operations on a Tool actually test
+  // whether that tool is Clang based on the Tool's Name as a string.
+  Clang(const ToolChain &TC) : Tool("clang", "clang frontend", TC, RF_Full) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasGoodDiagnostics() const override { return true; }
+  bool hasIntegratedAssembler() const override { return true; }
+  bool hasIntegratedCPP() const override { return true; }
+  bool canEmitIR() const override { return true; }
 
-  /// \brief Base class for all GNU tools that provide the same behavior when
-  /// it comes to response files support
-  class LLVM_LIBRARY_VISIBILITY GnuTool : public Tool {
-    virtual void anchor();
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-  public:
-    GnuTool(const char *Name, const char *ShortName, const ToolChain &TC)
-        : Tool(Name, ShortName, TC, RF_Full, llvm::sys::WEM_CurrentCodePage) {}
-  };
+/// \brief Clang integrated assembler tool.
+class LLVM_LIBRARY_VISIBILITY ClangAs : public Tool {
+public:
+  ClangAs(const ToolChain &TC)
+      : Tool("clang::as", "clang integrated assembler", TC, RF_Full) {}
+  void AddMIPSTargetArgs(const llvm::opt::ArgList &Args,
+                         llvm::opt::ArgStringList &CmdArgs) const;
+  bool hasGoodDiagnostics() const override { return true; }
+  bool hasIntegratedAssembler() const override { return false; }
+  bool hasIntegratedCPP() const override { return false; }
 
-  /// gcc - Generic GCC tool implementations.
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+/// \brief Base class for all GNU tools that provide the same behavior when
+/// it comes to response files support
+class LLVM_LIBRARY_VISIBILITY GnuTool : public Tool {
+  virtual void anchor();
+
+public:
+  GnuTool(const char *Name, const char *ShortName, const ToolChain &TC)
+      : Tool(Name, ShortName, TC, RF_Full, llvm::sys::WEM_CurrentCodePage) {}
+};
+
+/// gcc - Generic GCC tool implementations.
 namespace gcc {
-  class LLVM_LIBRARY_VISIBILITY Common : public GnuTool {
-  public:
-    Common(const char *Name, const char *ShortName,
-           const ToolChain &TC) : GnuTool(Name, ShortName, TC) {}
+class LLVM_LIBRARY_VISIBILITY Common : public GnuTool {
+public:
+  Common(const char *Name, const char *ShortName, const ToolChain &TC)
+      : GnuTool(Name, ShortName, TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
 
-    /// RenderExtraToolArgs - Render any arguments necessary to force
-    /// the particular tool mode.
-    virtual void
-    RenderExtraToolArgs(const JobAction &JA,
-                        llvm::opt::ArgStringList &CmdArgs) const = 0;
-  };
+  /// RenderExtraToolArgs - Render any arguments necessary to force
+  /// the particular tool mode.
+  virtual void RenderExtraToolArgs(const JobAction &JA,
+                                   llvm::opt::ArgStringList &CmdArgs) const = 0;
+};
 
-  class LLVM_LIBRARY_VISIBILITY Preprocessor : public Common {
-  public:
-    Preprocessor(const ToolChain &TC)
-        : Common("gcc::Preprocessor", "gcc preprocessor", TC) {}
+class LLVM_LIBRARY_VISIBILITY Preprocessor : public Common {
+public:
+  Preprocessor(const ToolChain &TC)
+      : Common("gcc::Preprocessor", "gcc preprocessor", TC) {}
 
-    bool hasGoodDiagnostics() const override { return true; }
-    bool hasIntegratedCPP() const override { return false; }
+  bool hasGoodDiagnostics() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
 
-    void RenderExtraToolArgs(const JobAction &JA,
-                             llvm::opt::ArgStringList &CmdArgs) const override;
-  };
+  void RenderExtraToolArgs(const JobAction &JA,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+};
 
-  class LLVM_LIBRARY_VISIBILITY Compiler : public Common {
-  public:
-    Compiler(const ToolChain &TC)
-        : Common("gcc::Compiler", "gcc frontend", TC) {}
+class LLVM_LIBRARY_VISIBILITY Compiler : public Common {
+public:
+  Compiler(const ToolChain &TC) : Common("gcc::Compiler", "gcc frontend", TC) {}
 
-    bool hasGoodDiagnostics() const override { return true; }
-    bool hasIntegratedCPP() const override { return true; }
+  bool hasGoodDiagnostics() const override { return true; }
+  bool hasIntegratedCPP() const override { return true; }
 
-    void RenderExtraToolArgs(const JobAction &JA,
-                             llvm::opt::ArgStringList &CmdArgs) const override;
-  };
+  void RenderExtraToolArgs(const JobAction &JA,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+};
 
-  class LLVM_LIBRARY_VISIBILITY Linker : public Common {
-  public:
-    Linker(const ToolChain &TC)
-        : Common("gcc::Linker", "linker (via gcc)", TC) {}
+class LLVM_LIBRARY_VISIBILITY Linker : public Common {
+public:
+  Linker(const ToolChain &TC) : Common("gcc::Linker", "linker (via gcc)", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
-    void RenderExtraToolArgs(const JobAction &JA,
-                             llvm::opt::ArgStringList &CmdArgs) const override;
-  };
+  void RenderExtraToolArgs(const JobAction &JA,
+                           llvm::opt::ArgStringList &CmdArgs) const override;
+};
 } // end namespace gcc
 
 namespace hexagon {
@@ -206,12 +206,12 @@ public:
 
   bool hasIntegratedCPP() const override { return false; }
 
-    void RenderExtraToolArgs(const JobAction &JA,
-                             llvm::opt::ArgStringList &CmdArgs) const;
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
+  void RenderExtraToolArgs(const JobAction &JA,
+                           llvm::opt::ArgStringList &CmdArgs) const;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
@@ -219,50 +219,88 @@ public:
   Linker(const ToolChain &TC) : GnuTool("hexagon::Linker", "hexagon-ld", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool isLinkJob() const override { return true; }
 
-    virtual void RenderExtraToolArgs(const JobAction &JA,
-                                     llvm::opt::ArgStringList &CmdArgs) const;
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  virtual void RenderExtraToolArgs(const JobAction &JA,
+                                   llvm::opt::ArgStringList &CmdArgs) const;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace hexagon.
 
-namespace arm {
-  std::string getARMTargetCPU(StringRef CPU, StringRef Arch,
-                              const llvm::Triple &Triple);
-  const std::string getARMArch(StringRef Arch,
-                               const llvm::Triple &Triple);
-  const char* getARMCPUForMArch(StringRef Arch,
-                                const llvm::Triple &Triple);
-  const char* getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch);
+namespace amdgpu {
 
-  void appendEBLinkFlags(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs, const llvm::Triple &Triple);
-}
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("amdgpu::Linker", "lld", TC) {}
+  bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+} // end namespace amdgpu
+
+namespace wasm {
+
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  explicit Linker(const ToolChain &TC);
+  bool isLinkJob() const override;
+  bool hasIntegratedCPP() const override;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+} // end namespace wasm
+
+namespace arm {
+std::string getARMTargetCPU(StringRef CPU, StringRef Arch,
+                            const llvm::Triple &Triple);
+const std::string getARMArch(StringRef Arch,
+                             const llvm::Triple &Triple);
+StringRef getARMCPUForMArch(StringRef Arch, const llvm::Triple &Triple);
+StringRef getLLVMArchSuffixForARM(StringRef CPU, StringRef Arch,
+                                  const llvm::Triple &Triple);
+
+void appendEBLinkFlags(const llvm::opt::ArgList &Args, ArgStringList &CmdArgs,
+                       const llvm::Triple &Triple);
+} // end namespace arm
 
 namespace mips {
-  typedef enum {
-    NanLegacy = 1,
-    Nan2008 = 2
-  } NanEncoding;
-  NanEncoding getSupportedNanEncoding(StringRef &CPU);
-  void getMipsCPUAndABI(const llvm::opt::ArgList &Args,
-                        const llvm::Triple &Triple, StringRef &CPUName,
-                        StringRef &ABIName);
-  bool hasMipsAbiArg(const llvm::opt::ArgList &Args, const char *Value);
-  bool isUCLibc(const llvm::opt::ArgList &Args);
-  bool isNaN2008(const llvm::opt::ArgList &Args, const llvm::Triple &Triple);
-  bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
-                     StringRef ABIName, StringRef FloatABI);
-  bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
-                     StringRef CPUName, StringRef ABIName, StringRef FloatABI);
-}
+typedef enum { NanLegacy = 1, Nan2008 = 2 } NanEncoding;
+
+enum class FloatABI {
+  Invalid,
+  Soft,
+  Hard,
+};
+
+NanEncoding getSupportedNanEncoding(StringRef &CPU);
+void getMipsCPUAndABI(const llvm::opt::ArgList &Args,
+                      const llvm::Triple &Triple, StringRef &CPUName,
+                      StringRef &ABIName);
+std::string getMipsABILibSuffix(const llvm::opt::ArgList &Args,
+                                const llvm::Triple &Triple);
+bool hasMipsAbiArg(const llvm::opt::ArgList &Args, const char *Value);
+bool isUCLibc(const llvm::opt::ArgList &Args);
+bool isNaN2008(const llvm::opt::ArgList &Args, const llvm::Triple &Triple);
+bool isFPXXDefault(const llvm::Triple &Triple, StringRef CPUName,
+                   StringRef ABIName, mips::FloatABI FloatABI);
+bool shouldUseFPXX(const llvm::opt::ArgList &Args, const llvm::Triple &Triple,
+                   StringRef CPUName, StringRef ABIName,
+                   mips::FloatABI FloatABI);
+} // end namespace mips
 
 namespace ppc {
-  bool hasPPCAbiArg(const llvm::opt::ArgList &Args, const char *Value);
-}
+bool hasPPCAbiArg(const llvm::opt::ArgList &Args, const char *Value);
+} // end namespace ppc
 
 /// cloudabi -- Directly call GNU Binutils linker
 namespace cloudabi {
@@ -281,103 +319,102 @@ public:
 } // end namespace cloudabi
 
 namespace darwin {
-  llvm::Triple::ArchType getArchTypeForMachOArchName(StringRef Str);
-  void setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str);
+llvm::Triple::ArchType getArchTypeForMachOArchName(StringRef Str);
+void setTripleTypeForMachOArchName(llvm::Triple &T, StringRef Str);
 
-  class LLVM_LIBRARY_VISIBILITY MachOTool : public Tool {
-    virtual void anchor();
-  protected:
-    void AddMachOArch(const llvm::opt::ArgList &Args,
-                       llvm::opt::ArgStringList &CmdArgs) const;
+class LLVM_LIBRARY_VISIBILITY MachOTool : public Tool {
+  virtual void anchor();
 
-    const toolchains::MachO &getMachOToolChain() const {
-      return reinterpret_cast<const toolchains::MachO&>(getToolChain());
-    }
+protected:
+  void AddMachOArch(const llvm::opt::ArgList &Args,
+                    llvm::opt::ArgStringList &CmdArgs) const;
 
-  public:
-    MachOTool(
-        const char *Name, const char *ShortName, const ToolChain &TC,
-        ResponseFileSupport ResponseSupport = RF_None,
-        llvm::sys::WindowsEncodingMethod ResponseEncoding = llvm::sys::WEM_UTF8,
-        const char *ResponseFlag = "@")
-        : Tool(Name, ShortName, TC, ResponseSupport, ResponseEncoding,
-               ResponseFlag) {}
-  };
+  const toolchains::MachO &getMachOToolChain() const {
+    return reinterpret_cast<const toolchains::MachO &>(getToolChain());
+  }
 
-  class LLVM_LIBRARY_VISIBILITY Assembler : public MachOTool {
-  public:
-    Assembler(const ToolChain &TC)
-        : MachOTool("darwin::Assembler", "assembler", TC) {}
+public:
+  MachOTool(
+      const char *Name, const char *ShortName, const ToolChain &TC,
+      ResponseFileSupport ResponseSupport = RF_None,
+      llvm::sys::WindowsEncodingMethod ResponseEncoding = llvm::sys::WEM_UTF8,
+      const char *ResponseFlag = "@")
+      : Tool(Name, ShortName, TC, ResponseSupport, ResponseEncoding,
+             ResponseFlag) {}
+};
 
-    bool hasIntegratedCPP() const override { return false; }
+class LLVM_LIBRARY_VISIBILITY Assembler : public MachOTool {
+public:
+  Assembler(const ToolChain &TC)
+      : MachOTool("darwin::Assembler", "assembler", TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasIntegratedCPP() const override { return false; }
 
-  class LLVM_LIBRARY_VISIBILITY Linker : public MachOTool {
-    bool NeedsTempPath(const InputInfoList &Inputs) const;
-    void AddLinkArgs(Compilation &C, const llvm::opt::ArgList &Args,
-                     llvm::opt::ArgStringList &CmdArgs,
-                     const InputInfoList &Inputs) const;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-  public:
-    Linker(const ToolChain &TC)
-        : MachOTool("darwin::Linker", "linker", TC, RF_FileList,
-                    llvm::sys::WEM_UTF8, "-filelist") {}
+class LLVM_LIBRARY_VISIBILITY Linker : public MachOTool {
+  bool NeedsTempPath(const InputInfoList &Inputs) const;
+  void AddLinkArgs(Compilation &C, const llvm::opt::ArgList &Args,
+                   llvm::opt::ArgStringList &CmdArgs,
+                   const InputInfoList &Inputs) const;
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+public:
+  Linker(const ToolChain &TC)
+      : MachOTool("darwin::Linker", "linker", TC, RF_FileList,
+                  llvm::sys::WEM_UTF8, "-filelist") {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
 
-  class LLVM_LIBRARY_VISIBILITY Lipo : public MachOTool  {
-  public:
-    Lipo(const ToolChain &TC) : MachOTool("darwin::Lipo", "lipo", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-    bool hasIntegratedCPP() const override { return false; }
+class LLVM_LIBRARY_VISIBILITY Lipo : public MachOTool {
+public:
+  Lipo(const ToolChain &TC) : MachOTool("darwin::Lipo", "lipo", TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasIntegratedCPP() const override { return false; }
 
-  class LLVM_LIBRARY_VISIBILITY Dsymutil : public MachOTool  {
-  public:
-    Dsymutil(const ToolChain &TC) : MachOTool("darwin::Dsymutil",
-                                              "dsymutil", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isDsymutilJob() const override { return true; }
+class LLVM_LIBRARY_VISIBILITY Dsymutil : public MachOTool {
+public:
+  Dsymutil(const ToolChain &TC)
+      : MachOTool("darwin::Dsymutil", "dsymutil", TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasIntegratedCPP() const override { return false; }
+  bool isDsymutilJob() const override { return true; }
 
-  class LLVM_LIBRARY_VISIBILITY VerifyDebug : public MachOTool  {
-  public:
-    VerifyDebug(const ToolChain &TC) : MachOTool("darwin::VerifyDebug",
-                                                 "dwarfdump", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-    bool hasIntegratedCPP() const override { return false; }
+class LLVM_LIBRARY_VISIBILITY VerifyDebug : public MachOTool {
+public:
+  VerifyDebug(const ToolChain &TC)
+      : MachOTool("darwin::VerifyDebug", "dwarfdump", TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  bool hasIntegratedCPP() const override { return false; }
 
-}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace darwin
 
 /// openbsd -- Directly call GNU Binutils assembler and linker
 namespace openbsd {
@@ -393,6 +430,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("openbsd::Linker", "linker", TC) {}
@@ -400,11 +438,11 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace openbsd
 
 /// bitrig -- Directly call GNU Binutils assembler and linker
@@ -421,6 +459,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("bitrig::Linker", "linker", TC) {}
@@ -428,11 +467,11 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace bitrig
 
 /// freebsd -- Directly call GNU Binutils assembler and linker
@@ -449,6 +488,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("freebsd::Linker", "linker", TC) {}
@@ -456,17 +496,16 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace freebsd
 
 /// netbsd -- Directly call GNU Binutils assembler and linker
 namespace netbsd {
 class LLVM_LIBRARY_VISIBILITY Assembler : public GnuTool {
-
 public:
   Assembler(const ToolChain &TC)
       : GnuTool("netbsd::Assembler", "assembler", TC) {}
@@ -478,19 +517,19 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
-class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("netbsd::Linker", "linker", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace netbsd
 
 /// Directly call GNU Binutils' assembler and linker.
@@ -506,6 +545,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("GNU::Linker", "linker", TC) {}
@@ -513,38 +553,37 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace gnutools
 
-  namespace nacltools {
-  class LLVM_LIBRARY_VISIBILITY AssemblerARM : public gnutools::Assembler {
-  public:
-    AssemblerARM(const ToolChain &TC) : gnutools::Assembler(TC) {}
+namespace nacltools {
+class LLVM_LIBRARY_VISIBILITY AssemblerARM : public gnutools::Assembler {
+public:
+  AssemblerARM(const ToolChain &TC) : gnutools::Assembler(TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
-  public:
-    Linker(const ToolChain &TC) : Tool("NaCl::Linker", "linker", TC) {}
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("NaCl::Linker", "linker", TC) {}
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                              const InputInfo &Output,
-                              const InputInfoList &Inputs,
-                              const llvm::opt::ArgList &TCArgs,
-                              const char *LinkingOutput) const override;
-  };
-}
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace nacltools
 
 /// minix -- Directly call GNU Binutils assembler and linker
 namespace minix {
@@ -560,6 +599,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("minix::Linker", "linker", TC) {}
@@ -567,12 +607,11 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace minix
 
 /// solaris -- Directly call Solaris assembler and linker
@@ -589,6 +628,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
 public:
   Linker(const ToolChain &TC) : Tool("solaris::Linker", "linker", TC) {}
@@ -596,11 +636,11 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace solaris
 
 /// dragonfly -- Directly call GNU Binutils assembler and linker
@@ -617,6 +657,7 @@ public:
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
+
 class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
 public:
   Linker(const ToolChain &TC) : GnuTool("dragonfly::Linker", "linker", TC) {}
@@ -624,12 +665,11 @@ public:
   bool hasIntegratedCPP() const override { return false; }
   bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output,
-                      const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
 } // end namespace dragonfly
 
 /// Visual studio tools.
@@ -644,12 +684,12 @@ public:
              llvm::sys::WEM_UTF16) {}
 
   bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool isLinkJob() const override { return true; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY Compiler : public Tool {
@@ -659,20 +699,20 @@ public:
              llvm::sys::WEM_UTF16) {}
 
   bool hasIntegratedAssembler() const override { return true; }
-    bool hasIntegratedCPP() const override { return true; }
-    bool isLinkJob() const override { return false; }
+  bool hasIntegratedCPP() const override { return true; }
+  bool isLinkJob() const override { return false; }
 
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
 
-    std::unique_ptr<Command> GetCommand(Compilation &C, const JobAction &JA,
-                                        const InputInfo &Output,
-                                        const InputInfoList &Inputs,
-                                        const llvm::opt::ArgList &TCArgs,
-                                        const char *LinkingOutput) const;
-  };
+  std::unique_ptr<Command> GetCommand(Compilation &C, const JobAction &JA,
+                                      const InputInfo &Output,
+                                      const InputInfoList &Inputs,
+                                      const llvm::opt::ArgList &TCArgs,
+                                      const char *LinkingOutput) const;
+};
 } // end namespace visualstudio
 
 /// MinGW -- Directly call GNU Binutils assembler and linker
@@ -707,9 +747,26 @@ private:
 } // end namespace MinGW
 
 namespace arm {
-  StringRef getARMFloatABI(const Driver &D, const llvm::opt::ArgList &Args,
-                         const llvm::Triple &Triple);
-}
+enum class FloatABI {
+  Invalid,
+  Soft,
+  SoftFP,
+  Hard,
+};
+
+FloatABI getARMFloatABI(const ToolChain &TC, const llvm::opt::ArgList &Args);
+} // end namespace arm
+
+namespace ppc {
+enum class FloatABI {
+  Invalid,
+  Soft,
+  Hard,
+};
+
+FloatABI getPPCFloatABI(const Driver &D, const llvm::opt::ArgList &Args);
+} // end namespace ppc
+
 namespace XCore {
 // For XCore, we do not need to instantiate tools for PreProcess, PreCompile and
 // Compile.
@@ -730,42 +787,41 @@ public:
   Linker(const ToolChain &TC) : Tool("XCore::Linker", "XCore-ld", TC) {}
 
   bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-  } // end namespace XCore.
+  bool isLinkJob() const override { return true; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace XCore.
 
-  namespace CrossWindows {
-  class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
-  public:
-    Assembler(const ToolChain &TC)
-        : Tool("CrossWindows::Assembler", "as", TC) {}
+namespace CrossWindows {
+class LLVM_LIBRARY_VISIBILITY Assembler : public Tool {
+public:
+  Assembler(const ToolChain &TC) : Tool("CrossWindows::Assembler", "as", TC) {}
 
-    bool hasIntegratedCPP() const override { return false; }
-
-    void ConstructJob(Compilation &C, const JobAction &JA,
-                      const InputInfo &Output, const InputInfoList &Inputs,
-                      const llvm::opt::ArgList &TCArgs,
-                      const char *LinkingOutput) const override;
-  };
-
-  class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
-  public:
-    Linker(const ToolChain &TC)
-        : Tool("CrossWindows::Linker", "ld", TC, RF_Full) {}
-
-    bool hasIntegratedCPP() const override { return false; }
-    bool isLinkJob() const override { return true; }
+  bool hasIntegratedCPP() const override { return false; }
 
   void ConstructJob(Compilation &C, const JobAction &JA,
                     const InputInfo &Output, const InputInfoList &Inputs,
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
 };
-}
+
+class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
+public:
+  Linker(const ToolChain &TC)
+      : Tool("CrossWindows::Linker", "ld", TC, RF_Full) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace CrossWindows
 
 /// SHAVE tools -- Directly call moviCompile and moviAsm
 namespace SHAVE {
@@ -794,8 +850,55 @@ public:
 };
 } // end namespace SHAVE
 
+/// The Myriad toolchain uses tools that are in two different namespaces.
+/// The Compiler and Assembler as defined above are in the SHAVE namespace,
+/// whereas the linker, which accepts code for a mixture of Sparc and SHAVE,
+/// is in the Myriad namespace.
+namespace Myriad {
+class LLVM_LIBRARY_VISIBILITY Linker : public GnuTool {
+public:
+  Linker(const ToolChain &TC) : GnuTool("shave::Linker", "ld", TC) {}
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output, const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace Myriad
+
+namespace PS4cpu {
+class LLVM_LIBRARY_VISIBILITY Assemble : public Tool {
+public:
+  Assemble(const ToolChain &TC)
+      : Tool("PS4cpu::Assemble", "assembler", TC, RF_Full) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output,
+                    const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+
+class LLVM_LIBRARY_VISIBILITY Link : public Tool {
+public:
+  Link(const ToolChain &TC) : Tool("PS4cpu::Link", "linker", TC, RF_Full) {}
+
+  bool hasIntegratedCPP() const override { return false; }
+  bool isLinkJob() const override { return true; }
+
+  void ConstructJob(Compilation &C, const JobAction &JA,
+                    const InputInfo &Output,
+                    const InputInfoList &Inputs,
+                    const llvm::opt::ArgList &TCArgs,
+                    const char *LinkingOutput) const override;
+};
+} // end namespace PS4cpu
+
 } // end namespace tools
 } // end namespace driver
 } // end namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_DRIVER_TOOLS_H
diff --git a/lib/Driver/Types.cpp b/lib/Driver/Types.cpp
index 2085b0124298..c29ce9462a07 100644
--- a/lib/Driver/Types.cpp
+++ b/lib/Driver/Types.cpp
@@ -128,6 +128,19 @@ bool types::isCXX(ID Id) {
   }
 }
 
+bool types::isLLVMIR(ID Id) {
+  switch (Id) {
+  default:
+    return false;
+
+  case TY_LLVM_IR:
+  case TY_LLVM_BC:
+  case TY_LTO_IR:
+  case TY_LTO_BC:
+    return true;
+  }
+}
+
 bool types::isCuda(ID Id) {
   switch (Id) {
   default:
diff --git a/lib/Edit/Commit.cpp b/lib/Edit/Commit.cpp
index 9c08cc28ac9b..cb7a784a41af 100644
--- a/lib/Edit/Commit.cpp
+++ b/lib/Edit/Commit.cpp
@@ -183,7 +183,7 @@ void Commit::addInsert(SourceLocation OrigLoc, FileOffset Offs, StringRef text,
   data.Kind = Act_Insert;
   data.OrigLoc = OrigLoc;
   data.Offset = Offs;
-  data.Text = copyString(text);
+  data.Text = text.copy(StrAlloc);
   data.BeforePrev = beforePreviousInsertions;
   CachedEdits.push_back(data);
 }
diff --git a/lib/Edit/EditedSource.cpp b/lib/Edit/EditedSource.cpp
index e557de92410e..5292a58a9ccd 100644
--- a/lib/Edit/EditedSource.cpp
+++ b/lib/Edit/EditedSource.cpp
@@ -23,6 +23,36 @@ void EditsReceiver::remove(CharSourceRange range) {
   replace(range, StringRef());
 }
 
+void EditedSource::deconstructMacroArgLoc(SourceLocation Loc,
+                                          SourceLocation &ExpansionLoc,
+                                          IdentifierInfo *&II) {
+  assert(SourceMgr.isMacroArgExpansion(Loc));
+  SourceLocation DefArgLoc = SourceMgr.getImmediateExpansionRange(Loc).first;
+  ExpansionLoc = SourceMgr.getImmediateExpansionRange(DefArgLoc).first;
+  SmallString<20> Buf;
+  StringRef ArgName = Lexer::getSpelling(SourceMgr.getSpellingLoc(DefArgLoc),
+                                         Buf, SourceMgr, LangOpts);
+  II = nullptr;
+  if (!ArgName.empty()) {
+    II = &IdentTable.get(ArgName);
+  }
+}
+
+void EditedSource::startingCommit() {}
+
+void EditedSource::finishedCommit() {
+  for (auto &ExpArg : CurrCommitMacroArgExps) {
+    SourceLocation ExpLoc;
+    IdentifierInfo *II;
+    std::tie(ExpLoc, II) = ExpArg;
+    auto &ArgNames = ExpansionToArgMap[ExpLoc.getRawEncoding()];
+    if (std::find(ArgNames.begin(), ArgNames.end(), II) == ArgNames.end()) {
+      ArgNames.push_back(II);
+    }
+  }
+  CurrCommitMacroArgExps.clear();
+}
+
 StringRef EditedSource::copyString(const Twine &twine) {
   SmallString<128> Data;
   return copyString(twine.toStringRef(Data));
@@ -36,15 +66,27 @@ bool EditedSource::canInsertInOffset(SourceLocation OrigLoc, FileOffset Offs) {
   }
 
   if (SourceMgr.isMacroArgExpansion(OrigLoc)) {
-    SourceLocation
-      DefArgLoc = SourceMgr.getImmediateExpansionRange(OrigLoc).first;
-    SourceLocation
-      ExpLoc = SourceMgr.getImmediateExpansionRange(DefArgLoc).first;
-    llvm::DenseMap<unsigned, SourceLocation>::iterator
-      I = ExpansionToArgMap.find(ExpLoc.getRawEncoding());
-    if (I != ExpansionToArgMap.end() && I->second != DefArgLoc)
-      return false; // Trying to write in a macro argument input that has
-                 // already been written for another argument of the same macro. 
+    IdentifierInfo *II;
+    SourceLocation ExpLoc;
+    deconstructMacroArgLoc(OrigLoc, ExpLoc, II);
+    auto I = ExpansionToArgMap.find(ExpLoc.getRawEncoding());
+    if (I != ExpansionToArgMap.end() &&
+        std::find(I->second.begin(), I->second.end(), II) != I->second.end()) {
+      // Trying to write in a macro argument input that has already been
+      // written by a previous commit for another expansion of the same macro
+      // argument name. For example:
+      //
+      // \code
+      //   #define MAC(x) ((x)+(x))
+      //   MAC(a)
+      // \endcode
+      //
+      // A commit modified the macro argument 'a' due to the first '(x)'
+      // expansion inside the macro definition, and a subsequent commit tried
+      // to modify 'a' again for the second '(x)' expansion. The edits of the
+      // second commit will be rejected.
+      return false;
+    }
   }
 
   return true;
@@ -59,11 +101,11 @@ bool EditedSource::commitInsert(SourceLocation OrigLoc,
     return true;
 
   if (SourceMgr.isMacroArgExpansion(OrigLoc)) {
-    SourceLocation
-      DefArgLoc = SourceMgr.getImmediateExpansionRange(OrigLoc).first;
-    SourceLocation
-      ExpLoc = SourceMgr.getImmediateExpansionRange(DefArgLoc).first;
-    ExpansionToArgMap[ExpLoc.getRawEncoding()] = DefArgLoc;
+    IdentifierInfo *II;
+    SourceLocation ExpLoc;
+    deconstructMacroArgLoc(OrigLoc, ExpLoc, II);
+    if (II)
+      CurrCommitMacroArgExps.emplace_back(ExpLoc, II);
   }
   
   FileEdit &FA = FileEdits[Offs];
@@ -219,6 +261,16 @@ bool EditedSource::commit(const Commit &commit) {
   if (!commit.isCommitable())
     return false;
 
+  struct CommitRAII {
+    EditedSource &Editor;
+    CommitRAII(EditedSource &Editor) : Editor(Editor) {
+      Editor.startingCommit();
+    }
+    ~CommitRAII() {
+      Editor.finishedCommit();
+    }
+  } CommitRAII(*this);
+
   for (edit::Commit::edit_iterator
          I = commit.edit_begin(), E = commit.edit_end(); I != E; ++I) {
     const edit::Commit::Edit &edit = *I;
@@ -312,7 +364,7 @@ static void adjustRemoval(const SourceManager &SM, const LangOptions &LangOpts,
 static void applyRewrite(EditsReceiver &receiver,
                          StringRef text, FileOffset offs, unsigned len,
                          const SourceManager &SM, const LangOptions &LangOpts) {
-  assert(!offs.getFID().isInvalid());
+  assert(offs.getFID().isValid());
   SourceLocation Loc = SM.getLocForStartOfFile(offs.getFID());
   Loc = Loc.getLocWithOffset(offs.getOffset());
   assert(Loc.isFileID());
diff --git a/lib/Format/ContinuationIndenter.cpp b/lib/Format/ContinuationIndenter.cpp
index dd56831a3bc5..41451b91f881 100644
--- a/lib/Format/ContinuationIndenter.cpp
+++ b/lib/Format/ContinuationIndenter.cpp
@@ -95,7 +95,7 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
   assert(&Previous == Current.Previous);
   if (!Current.CanBreakBefore &&
       !(State.Stack.back().BreakBeforeClosingBrace &&
-        Current.closesBlockTypeList(Style)))
+        Current.closesBlockOrBlockTypeList(Style)))
     return false;
   // The opening "{" of a braced list has to be on the same line as the first
   // element if it is nested in another braced init list or function call.
@@ -125,10 +125,10 @@ bool ContinuationIndenter::canBreak(const LineState &State) {
 
   // Don't break after very short return types (e.g. "void") as that is often
   // unexpected.
-  if (Current.is(TT_FunctionDeclarationName) &&
-      Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_None &&
-      State.Column < 6)
-    return false;
+  if (Current.is(TT_FunctionDeclarationName) && State.Column < 6) {
+    if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None)
+      return false;
+  }
 
   return !State.Stack.back().NoLineBreak;
 }
@@ -139,11 +139,12 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
   if (Current.MustBreakBefore || Current.is(TT_InlineASMColon))
     return true;
   if (State.Stack.back().BreakBeforeClosingBrace &&
-      Current.closesBlockTypeList(Style))
+      Current.closesBlockOrBlockTypeList(Style))
     return true;
   if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection)
     return true;
   if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) ||
+       (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName)) ||
        (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) &&
         Previous.isNot(tok::question)) ||
        (!Style.BreakBeforeTernaryOperators &&
@@ -158,6 +159,9 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
           getColumnLimit(State))
     return true;
   if (Current.is(TT_CtorInitializerColon) &&
+      (State.Column + State.Line->Last->TotalLength - Current.TotalLength + 2 >
+           getColumnLimit(State) ||
+       State.Stack.back().BreakBeforeParameter) &&
       ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) ||
        Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0))
     return true;
@@ -225,7 +229,8 @@ bool ContinuationIndenter::mustBreak(const LineState &State) {
   }
 
   // If the return type spans multiple lines, wrap before the function name.
-  if (Current.isOneOf(TT_FunctionDeclarationName, tok::kw_operator) &&
+  if ((Current.is(TT_FunctionDeclarationName) ||
+       (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) &&
       State.Stack.back().BreakBeforeParameter)
     return true;
 
@@ -323,8 +328,17 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
       State.Stack.back().ColonPos = State.Column + Spaces + Current.ColumnWidth;
   }
 
-  if (Style.AlignAfterOpenBracket && Previous.opensScope() &&
-      Previous.isNot(TT_ObjCMethodExpr) &&
+  // In "AlwaysBreak" mode, enforce wrapping directly after the parenthesis by
+  // disallowing any further line breaks if there is no line break after the
+  // opening parenthesis. Don't break if it doesn't conserve columns.
+  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak &&
+      Previous.is(tok::l_paren) && State.Column > getNewLineColumn(State) &&
+      (!Previous.Previous ||
+       !Previous.Previous->isOneOf(tok::kw_for, tok::kw_while, tok::kw_switch)))
+    State.Stack.back().NoLineBreak = true;
+
+  if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign &&
+      Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) &&
       (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit))
     State.Stack.back().Indent = State.Column + Spaces;
   if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style))
@@ -500,6 +514,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
   // Any break on this level means that the parent level has been broken
   // and we need to avoid bin packing there.
   bool NestedBlockSpecialCase =
+      Style.Language != FormatStyle::LK_Cpp &&
       Current.is(tok::r_brace) && State.Stack.size() > 1 &&
       State.Stack[State.Stack.size() - 2].NestedBlockInlined;
   if (!NestedBlockSpecialCase)
@@ -560,7 +575,7 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) {
     return Current.NestingLevel == 0 ? State.FirstIndent
                                      : State.Stack.back().Indent;
   if (Current.isOneOf(tok::r_brace, tok::r_square) && State.Stack.size() > 1) {
-    if (Current.closesBlockTypeList(Style))
+    if (Current.closesBlockOrBlockTypeList(Style))
       return State.Stack[State.Stack.size() - 2].NestedBlockIndent;
     if (Current.MatchingParen &&
         Current.MatchingParen->BlockKind == BK_BracedInit)
@@ -678,8 +693,13 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State,
   if (Current.isMemberAccess())
     State.Stack.back().StartOfFunctionCall =
         Current.LastOperator ? 0 : State.Column;
-  if (Current.is(TT_SelectorName))
+  if (Current.is(TT_SelectorName)) {
     State.Stack.back().ObjCSelectorNameFound = true;
+    if (Style.IndentWrappedFunctionNames) {
+      State.Stack.back().Indent =
+          State.FirstIndent + Style.ContinuationIndentWidth;
+    }
+  }
   if (Current.is(TT_CtorInitializerColon)) {
     // Indent 2 from the column, so:
     // SomeClass::SomeClass()
@@ -784,8 +804,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State,
         (Style.AlignOperands || *I < prec::Assignment) &&
         (!Previous || Previous->isNot(tok::kw_return) ||
          (Style.Language != FormatStyle::LK_Java && *I > 0)) &&
-        (Style.AlignAfterOpenBracket || *I != prec::Comma ||
-         Current.NestingLevel == 0))
+        (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign ||
+         *I != prec::Comma || Current.NestingLevel == 0))
       NewParenState.Indent =
           std::max(std::max(State.Column, NewParenState.Indent),
                    State.Stack.back().LastSpace);
@@ -865,7 +885,7 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
   unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall,
                                         State.Stack.back().NestedBlockIndent);
   if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) {
-    if (Current.opensBlockTypeList(Style)) {
+    if (Current.opensBlockOrBlockTypeList(Style)) {
       NewIndent = State.Stack.back().NestedBlockIndent + Style.IndentWidth;
       NewIndent = std::min(State.Column + 2, NewIndent);
       ++NewIndentLevel;
@@ -923,9 +943,15 @@ void ContinuationIndenter::moveStatePastScopeOpener(LineState &State,
       }
     }
   }
-  bool NoLineBreak = State.Stack.back().NoLineBreak ||
-                     (Current.is(TT_TemplateOpener) &&
-                      State.Stack.back().ContainsUnwrappedBuilder);
+  // Generally inherit NoLineBreak from the current scope to nested scope.
+  // However, don't do this for non-empty nested blocks, dict literals and
+  // array literals as these follow different indentation rules.
+  bool NoLineBreak =
+      Current.Children.empty() &&
+      !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) &&
+      (State.Stack.back().NoLineBreak ||
+       (Current.is(TT_TemplateOpener) &&
+        State.Stack.back().ContainsUnwrappedBuilder));
   State.Stack.push_back(ParenState(NewIndent, NewIndentLevel, LastSpace,
                                    AvoidBinPacking, NoLineBreak));
   State.Stack.back().NestedBlockIndent = NestedBlockIndent;
@@ -964,7 +990,7 @@ void ContinuationIndenter::moveStateToNewBlock(LineState &State) {
   State.Stack.push_back(ParenState(
       NewIndent, /*NewIndentLevel=*/State.Stack.back().IndentLevel + 1,
       State.Stack.back().LastSpace, /*AvoidBinPacking=*/true,
-      State.Stack.back().NoLineBreak));
+      /*NoLineBreak=*/false));
   State.Stack.back().NestedBlockIndent = NestedBlockIndent;
   State.Stack.back().BreakBeforeParameter = true;
 }
@@ -1050,7 +1076,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
       return 0;
     }
   } else if (Current.is(TT_BlockComment) && Current.isTrailingComment()) {
-    if (CommentPragmasRegex.match(Current.TokenText.substr(2)))
+    if (!Style.ReflowComments ||
+        CommentPragmasRegex.match(Current.TokenText.substr(2)))
       return 0;
     Token.reset(new BreakableBlockComment(
         Current, State.Line->Level, StartColumn, Current.OriginalColumn,
@@ -1058,7 +1085,8 @@ unsigned ContinuationIndenter::breakProtrudingToken(const FormatToken &Current,
   } else if (Current.is(TT_LineComment) &&
              (Current.Previous == nullptr ||
               Current.Previous->isNot(TT_ImplicitStringLiteral))) {
-    if (CommentPragmasRegex.match(Current.TokenText.substr(2)))
+    if (!Style.ReflowComments ||
+        CommentPragmasRegex.match(Current.TokenText.substr(2)))
       return 0;
     Token.reset(new BreakableLineComment(Current, State.Line->Level,
                                          StartColumn, /*InPPDirective=*/false,
diff --git a/lib/Format/Encoding.h b/lib/Format/Encoding.h
index 766d29274ce6..592d7201a8ac 100644
--- a/lib/Format/Encoding.h
+++ b/lib/Format/Encoding.h
@@ -135,7 +135,7 @@ inline unsigned getEscapeSequenceLength(StringRef Text) {
         ++I;
       return I;
     }
-    return 2;
+    return 1 + getNumBytesForUTF8(Text[1]);
   }
 }
 
diff --git a/lib/Format/Format.cpp b/lib/Format/Format.cpp
index 382ae819ebfd..5068fca5c44d 100644
--- a/lib/Format/Format.cpp
+++ b/lib/Format/Format.cpp
@@ -13,6 +13,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "clang/Format/Format.h"
 #include "ContinuationIndenter.h"
 #include "TokenAnnotator.h"
 #include "UnwrappedLineFormatter.h"
@@ -21,7 +22,6 @@
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticOptions.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Format/Format.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Allocator.h"
@@ -37,6 +37,7 @@
 using clang::format::FormatStyle;
 
 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::IncludeCategory)
 
 namespace llvm {
 namespace yaml {
@@ -46,6 +47,7 @@ template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> {
     IO.enumCase(Value, "Java", FormatStyle::LK_Java);
     IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript);
     IO.enumCase(Value, "Proto", FormatStyle::LK_Proto);
+    IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen);
   }
 };
 
@@ -98,11 +100,27 @@ template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> {
     IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup);
     IO.enumCase(Value, "Allman", FormatStyle::BS_Allman);
     IO.enumCase(Value, "GNU", FormatStyle::BS_GNU);
+    IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit);
+    IO.enumCase(Value, "Custom", FormatStyle::BS_Custom);
   }
 };
 
-template <> struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
-  static void enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
+template <>
+struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> {
+  static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) {
+    IO.enumCase(Value, "None", FormatStyle::RTBS_None);
+    IO.enumCase(Value, "All", FormatStyle::RTBS_All);
+    IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel);
+    IO.enumCase(Value, "TopLevelDefinitions",
+                FormatStyle::RTBS_TopLevelDefinitions);
+    IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions);
+  }
+};
+
+template <>
+struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> {
+  static void
+  enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) {
     IO.enumCase(Value, "None", FormatStyle::DRTBS_None);
     IO.enumCase(Value, "All", FormatStyle::DRTBS_All);
     IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel);
@@ -123,6 +141,18 @@ struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> {
   }
 };
 
+template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> {
+  static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) {
+    IO.enumCase(Value, "Align", FormatStyle::BAS_Align);
+    IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign);
+    IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak);
+
+    // For backward compatibility.
+    IO.enumCase(Value, "true", FormatStyle::BAS_Align);
+    IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign);
+  }
+};
+
 template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> {
   static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) {
     IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle);
@@ -197,6 +227,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket);
     IO.mapOptional("AlignConsecutiveAssignments",
                    Style.AlignConsecutiveAssignments);
+    IO.mapOptional("AlignConsecutiveDeclarations",
+                   Style.AlignConsecutiveDeclarations);
     IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft);
     IO.mapOptional("AlignOperands", Style.AlignOperands);
     IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments);
@@ -214,12 +246,28 @@ template <> struct MappingTraits<FormatStyle> {
                    Style.AllowShortLoopsOnASingleLine);
     IO.mapOptional("AlwaysBreakAfterDefinitionReturnType",
                    Style.AlwaysBreakAfterDefinitionReturnType);
+    IO.mapOptional("AlwaysBreakAfterReturnType",
+                   Style.AlwaysBreakAfterReturnType);
+    // If AlwaysBreakAfterDefinitionReturnType was specified but
+    // AlwaysBreakAfterReturnType was not, initialize the latter from the
+    // former for backwards compatibility.
+    if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None &&
+        Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) {
+      if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All)
+        Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
+      else if (Style.AlwaysBreakAfterDefinitionReturnType ==
+               FormatStyle::DRTBS_TopLevel)
+        Style.AlwaysBreakAfterReturnType =
+            FormatStyle::RTBS_TopLevelDefinitions;
+    }
+
     IO.mapOptional("AlwaysBreakBeforeMultilineStrings",
                    Style.AlwaysBreakBeforeMultilineStrings);
     IO.mapOptional("AlwaysBreakTemplateDeclarations",
                    Style.AlwaysBreakTemplateDeclarations);
     IO.mapOptional("BinPackArguments", Style.BinPackArguments);
     IO.mapOptional("BinPackParameters", Style.BinPackParameters);
+    IO.mapOptional("BraceWrapping", Style.BraceWrapping);
     IO.mapOptional("BreakBeforeBinaryOperators",
                    Style.BreakBeforeBinaryOperators);
     IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces);
@@ -240,6 +288,7 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("ExperimentalAutoDetectBinPacking",
                    Style.ExperimentalAutoDetectBinPacking);
     IO.mapOptional("ForEachMacros", Style.ForEachMacros);
+    IO.mapOptional("IncludeCategories", Style.IncludeCategories);
     IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels);
     IO.mapOptional("IndentWidth", Style.IndentWidth);
     IO.mapOptional("IndentWrappedFunctionNames",
@@ -264,6 +313,8 @@ template <> struct MappingTraits<FormatStyle> {
     IO.mapOptional("PenaltyReturnTypeOnItsOwnLine",
                    Style.PenaltyReturnTypeOnItsOwnLine);
     IO.mapOptional("PointerAlignment", Style.PointerAlignment);
+    IO.mapOptional("ReflowComments", Style.ReflowComments);
+    IO.mapOptional("SortIncludes", Style.SortIncludes);
     IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast);
     IO.mapOptional("SpaceBeforeAssignmentOperators",
                    Style.SpaceBeforeAssignmentOperators);
@@ -284,6 +335,29 @@ template <> struct MappingTraits<FormatStyle> {
   }
 };
 
+template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> {
+  static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) {
+    IO.mapOptional("AfterClass", Wrapping.AfterClass);
+    IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement);
+    IO.mapOptional("AfterEnum", Wrapping.AfterEnum);
+    IO.mapOptional("AfterFunction", Wrapping.AfterFunction);
+    IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace);
+    IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration);
+    IO.mapOptional("AfterStruct", Wrapping.AfterStruct);
+    IO.mapOptional("AfterUnion", Wrapping.AfterUnion);
+    IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch);
+    IO.mapOptional("BeforeElse", Wrapping.BeforeElse);
+    IO.mapOptional("IndentBraces", Wrapping.IndentBraces);
+  }
+};
+
+template <> struct MappingTraits<FormatStyle::IncludeCategory> {
+  static void mapping(IO &IO, FormatStyle::IncludeCategory &Category) {
+    IO.mapOptional("Regex", Category.Regex);
+    IO.mapOptional("Priority", Category.Priority);
+  }
+};
+
 // Allows to read vector<FormatStyle> while keeping default values.
 // IO.getContext() should contain a pointer to the FormatStyle structure, that
 // will be used to get default values for missing keys.
@@ -309,8 +383,8 @@ template <> struct DocumentListTraits<std::vector<FormatStyle>> {
     return Seq[Index];
   }
 };
-}
-}
+} // namespace yaml
+} // namespace llvm
 
 namespace clang {
 namespace format {
@@ -339,21 +413,71 @@ std::string ParseErrorCategory::message(int EV) const {
   llvm_unreachable("unexpected parse error");
 }
 
+static FormatStyle expandPresets(const FormatStyle &Style) {
+  if (Style.BreakBeforeBraces == FormatStyle::BS_Custom)
+    return Style;
+  FormatStyle Expanded = Style;
+  Expanded.BraceWrapping = {false, false, false, false, false, false,
+                            false, false, false, false, false};
+  switch (Style.BreakBeforeBraces) {
+  case FormatStyle::BS_Linux:
+    Expanded.BraceWrapping.AfterClass = true;
+    Expanded.BraceWrapping.AfterFunction = true;
+    Expanded.BraceWrapping.AfterNamespace = true;
+    break;
+  case FormatStyle::BS_Mozilla:
+    Expanded.BraceWrapping.AfterClass = true;
+    Expanded.BraceWrapping.AfterEnum = true;
+    Expanded.BraceWrapping.AfterFunction = true;
+    Expanded.BraceWrapping.AfterStruct = true;
+    Expanded.BraceWrapping.AfterUnion = true;
+    break;
+  case FormatStyle::BS_Stroustrup:
+    Expanded.BraceWrapping.AfterFunction = true;
+    Expanded.BraceWrapping.BeforeCatch = true;
+    Expanded.BraceWrapping.BeforeElse = true;
+    break;
+  case FormatStyle::BS_Allman:
+    Expanded.BraceWrapping.AfterClass = true;
+    Expanded.BraceWrapping.AfterControlStatement = true;
+    Expanded.BraceWrapping.AfterEnum = true;
+    Expanded.BraceWrapping.AfterFunction = true;
+    Expanded.BraceWrapping.AfterNamespace = true;
+    Expanded.BraceWrapping.AfterObjCDeclaration = true;
+    Expanded.BraceWrapping.AfterStruct = true;
+    Expanded.BraceWrapping.BeforeCatch = true;
+    Expanded.BraceWrapping.BeforeElse = true;
+    break;
+  case FormatStyle::BS_GNU:
+    Expanded.BraceWrapping = {true, true, true, true, true, true,
+                              true, true, true, true, true};
+    break;
+  case FormatStyle::BS_WebKit:
+    Expanded.BraceWrapping.AfterFunction = true;
+    break;
+  default:
+    break;
+  }
+  return Expanded;
+}
+
 FormatStyle getLLVMStyle() {
   FormatStyle LLVMStyle;
   LLVMStyle.Language = FormatStyle::LK_Cpp;
   LLVMStyle.AccessModifierOffset = -2;
   LLVMStyle.AlignEscapedNewlinesLeft = false;
-  LLVMStyle.AlignAfterOpenBracket = true;
+  LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align;
   LLVMStyle.AlignOperands = true;
   LLVMStyle.AlignTrailingComments = true;
   LLVMStyle.AlignConsecutiveAssignments = false;
+  LLVMStyle.AlignConsecutiveDeclarations = false;
   LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
   LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All;
   LLVMStyle.AllowShortBlocksOnASingleLine = false;
   LLVMStyle.AllowShortCaseLabelsOnASingleLine = false;
   LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
   LLVMStyle.AllowShortLoopsOnASingleLine = false;
+  LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
   LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None;
   LLVMStyle.AlwaysBreakBeforeMultilineStrings = false;
   LLVMStyle.AlwaysBreakTemplateDeclarations = false;
@@ -362,7 +486,10 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
   LLVMStyle.BreakBeforeTernaryOperators = true;
   LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach;
+  LLVMStyle.BraceWrapping = {false, false, false, false, false, false,
+                             false, false, false, false, false};
   LLVMStyle.BreakConstructorInitializersBeforeComma = false;
+  LLVMStyle.BreakAfterJavaFieldAnnotations = false;
   LLVMStyle.ColumnLimit = 80;
   LLVMStyle.CommentPragmas = "^ IWYU pragma:";
   LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
@@ -374,6 +501,9 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.ForEachMacros.push_back("foreach");
   LLVMStyle.ForEachMacros.push_back("Q_FOREACH");
   LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH");
+  LLVMStyle.IncludeCategories = {{"^\"(llvm|llvm-c|clang|clang-c)/", 2},
+                                 {"^(<|\"(gtest|isl|json)/)", 3},
+                                 {".*", 1}};
   LLVMStyle.IndentCaseLabels = false;
   LLVMStyle.IndentWrappedFunctionNames = false;
   LLVMStyle.IndentWidth = 2;
@@ -388,6 +518,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.SpacesBeforeTrailingComments = 1;
   LLVMStyle.Standard = FormatStyle::LS_Cpp11;
   LLVMStyle.UseTab = FormatStyle::UT_Never;
+  LLVMStyle.ReflowComments = true;
   LLVMStyle.SpacesInParentheses = false;
   LLVMStyle.SpacesInSquareBrackets = false;
   LLVMStyle.SpaceInEmptyParentheses = false;
@@ -406,6 +537,7 @@ FormatStyle getLLVMStyle() {
   LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19;
 
   LLVMStyle.DisableFormat = false;
+  LLVMStyle.SortIncludes = true;
 
   return LLVMStyle;
 }
@@ -422,6 +554,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   GoogleStyle.AlwaysBreakTemplateDeclarations = true;
   GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
   GoogleStyle.DerivePointerAlignment = true;
+  GoogleStyle.IncludeCategories = {{"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}};
   GoogleStyle.IndentCaseLabels = true;
   GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false;
   GoogleStyle.ObjCSpaceAfterProperty = false;
@@ -434,7 +567,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
   GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1;
 
   if (Language == FormatStyle::LK_Java) {
-    GoogleStyle.AlignAfterOpenBracket = false;
+    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
     GoogleStyle.AlignOperands = false;
     GoogleStyle.AlignTrailingComments = false;
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
@@ -445,11 +578,13 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) {
     GoogleStyle.SpaceAfterCStyleCast = true;
     GoogleStyle.SpacesBeforeTrailingComments = 1;
   } else if (Language == FormatStyle::LK_JavaScript) {
+    GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+    GoogleStyle.AlignOperands = false;
+    GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+    GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
     GoogleStyle.BreakBeforeTernaryOperators = false;
     GoogleStyle.MaxEmptyLinesToKeep = 3;
     GoogleStyle.SpacesInContainerLiterals = false;
-    GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
-    GoogleStyle.AlwaysBreakBeforeMultilineStrings = false;
   } else if (Language == FormatStyle::LK_Proto) {
     GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_None;
     GoogleStyle.SpacesInContainerLiterals = false;
@@ -462,8 +597,9 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
   FormatStyle ChromiumStyle = getGoogleStyle(Language);
   if (Language == FormatStyle::LK_Java) {
     ChromiumStyle.AllowShortIfStatementsOnASingleLine = true;
-    ChromiumStyle.IndentWidth = 4;
+    ChromiumStyle.BreakAfterJavaFieldAnnotations = true;
     ChromiumStyle.ContinuationIndentWidth = 8;
+    ChromiumStyle.IndentWidth = 4;
   } else {
     ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
     ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
@@ -472,8 +608,7 @@ FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) {
     ChromiumStyle.BinPackParameters = false;
     ChromiumStyle.DerivePointerAlignment = false;
   }
-  ChromiumStyle.MacroBlockBegin = "^IPC_BEGIN_MESSAGE_MAP$";
-  ChromiumStyle.MacroBlockBegin = "^IPC_END_MESSAGE_MAP$";
+  ChromiumStyle.SortIncludes = false;
   return ChromiumStyle;
 }
 
@@ -481,6 +616,8 @@ FormatStyle getMozillaStyle() {
   FormatStyle MozillaStyle = getLLVMStyle();
   MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false;
   MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline;
+  MozillaStyle.AlwaysBreakAfterReturnType =
+      FormatStyle::RTBS_TopLevelDefinitions;
   MozillaStyle.AlwaysBreakAfterDefinitionReturnType =
       FormatStyle::DRTBS_TopLevel;
   MozillaStyle.AlwaysBreakTemplateDeclarations = true;
@@ -500,11 +637,11 @@ FormatStyle getMozillaStyle() {
 FormatStyle getWebKitStyle() {
   FormatStyle Style = getLLVMStyle();
   Style.AccessModifierOffset = -4;
-  Style.AlignAfterOpenBracket = false;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
   Style.AlignOperands = false;
   Style.AlignTrailingComments = false;
   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
-  Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
+  Style.BreakBeforeBraces = FormatStyle::BS_WebKit;
   Style.BreakConstructorInitializersBeforeComma = true;
   Style.Cpp11BracedListStyle = false;
   Style.ColumnLimit = 0;
@@ -520,6 +657,7 @@ FormatStyle getWebKitStyle() {
 FormatStyle getGNUStyle() {
   FormatStyle Style = getLLVMStyle();
   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
   Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
   Style.BreakBeforeBraces = FormatStyle::BS_GNU;
   Style.BreakBeforeTernaryOperators = true;
@@ -533,6 +671,7 @@ FormatStyle getGNUStyle() {
 FormatStyle getNoStyle() {
   FormatStyle NoStyle = getLLVMStyle();
   NoStyle.DisableFormat = true;
+  NoStyle.SortIncludes = false;
   return NoStyle;
 }
 
@@ -612,7 +751,7 @@ std::string configurationAsText(const FormatStyle &Style) {
   llvm::yaml::Output Output(Stream);
   // We use the same mapping method for input and output, so we need a non-const
   // reference here.
-  FormatStyle NonConstStyle = Style;
+  FormatStyle NonConstStyle = expandPresets(Style);
   Output << NonConstStyle;
   return Stream.str();
 }
@@ -644,6 +783,8 @@ public:
     assert(FirstInLineIndex == 0);
     do {
       Tokens.push_back(getNextToken());
+      if (Style.Language == FormatStyle::LK_JavaScript)
+        tryParseJSRegexLiteral();
       tryMergePreviousTokens();
       if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline)
         FirstInLineIndex = Tokens.size() - 1;
@@ -663,10 +804,6 @@ private:
       return;
 
     if (Style.Language == FormatStyle::LK_JavaScript) {
-      if (tryMergeJSRegexLiteral())
-        return;
-      if (tryMergeEscapeSequence())
-        return;
       if (tryMergeTemplateString())
         return;
 
@@ -738,96 +875,97 @@ private:
     return true;
   }
 
-  // Tries to merge an escape sequence, i.e. a "\\" and the following
-  // character. Use e.g. inside JavaScript regex literals.
-  bool tryMergeEscapeSequence() {
-    if (Tokens.size() < 2)
+  // Returns \c true if \p Tok can only be followed by an operand in JavaScript.
+  bool precedesOperand(FormatToken *Tok) {
+    // NB: This is not entirely correct, as an r_paren can introduce an operand
+    // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough
+    // corner case to not matter in practice, though.
+    return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace,
+                        tok::r_brace, tok::l_square, tok::semi, tok::exclaim,
+                        tok::colon, tok::question, tok::tilde) ||
+           Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw,
+                        tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void,
+                        tok::kw_typeof, Keywords.kw_instanceof,
+                        Keywords.kw_in) ||
+           Tok->isBinaryOperator();
+  }
+
+  bool canPrecedeRegexLiteral(FormatToken *Prev) {
+    if (!Prev)
+      return true;
+
+    // Regex literals can only follow after prefix unary operators, not after
+    // postfix unary operators. If the '++' is followed by a non-operand
+    // introducing token, the slash here is the operand and not the start of a
+    // regex.
+    if (Prev->isOneOf(tok::plusplus, tok::minusminus))
+      return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3]));
+
+    // The previous token must introduce an operand location where regex
+    // literals can occur.
+    if (!precedesOperand(Prev))
       return false;
-    FormatToken *Previous = Tokens[Tokens.size() - 2];
-    if (Previous->isNot(tok::unknown) || Previous->TokenText != "\\")
-      return false;
-    ++Previous->ColumnWidth;
-    StringRef Text = Previous->TokenText;
-    Previous->TokenText = StringRef(Text.data(), Text.size() + 1);
-    resetLexer(SourceMgr.getFileOffset(Tokens.back()->Tok.getLocation()) + 1);
-    Tokens.resize(Tokens.size() - 1);
-    Column = Previous->OriginalColumn + Previous->ColumnWidth;
+
     return true;
   }
 
-  // Try to determine whether the current token ends a JavaScript regex literal.
-  // We heuristically assume that this is a regex literal if we find two
-  // unescaped slashes on a line and the token before the first slash is one of
-  // "(;,{}![:?", a binary operator or 'return', as those cannot be followed by
-  // a division.
-  bool tryMergeJSRegexLiteral() {
-    if (Tokens.size() < 2)
-      return false;
+  // Tries to parse a JavaScript Regex literal starting at the current token,
+  // if that begins with a slash and is in a location where JavaScript allows
+  // regex literals. Changes the current token to a regex literal and updates
+  // its text if successful.
+  void tryParseJSRegexLiteral() {
+    FormatToken *RegexToken = Tokens.back();
+    if (!RegexToken->isOneOf(tok::slash, tok::slashequal))
+      return;
 
-    // If this is a string literal with a slash inside, compute the slash's
-    // offset and try to find the beginning of the regex literal.
-    // Also look at tok::unknown, as it can be an unterminated char literal.
-    size_t SlashInStringPos = StringRef::npos;
-    if (Tokens.back()->isOneOf(tok::string_literal, tok::char_constant,
-                               tok::unknown)) {
-      // Start search from position 1 as otherwise, this is an unknown token
-      // for an unterminated /*-comment which is handled elsewhere.
-      SlashInStringPos = Tokens.back()->TokenText.find('/', 1);
-      if (SlashInStringPos == StringRef::npos)
-        return false;
-    }
-
-    // If a regex literal ends in "\//", this gets represented by an unknown
-    // token "\" and a comment.
-    bool MightEndWithEscapedSlash =
-        Tokens.back()->is(tok::comment) &&
-        Tokens.back()->TokenText.startswith("//") &&
-        Tokens[Tokens.size() - 2]->TokenText == "\\";
-    if (!MightEndWithEscapedSlash && SlashInStringPos == StringRef::npos &&
-        (Tokens.back()->isNot(tok::slash) ||
-         (Tokens[Tokens.size() - 2]->is(tok::unknown) &&
-          Tokens[Tokens.size() - 2]->TokenText == "\\")))
-      return false;
-
-    unsigned TokenCount = 0;
+    FormatToken *Prev = nullptr;
     for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) {
-      ++TokenCount;
-      auto Prev = I + 1;
-      while (Prev != E && Prev[0]->is(tok::comment))
-        ++Prev;
-      if (I[0]->isOneOf(tok::slash, tok::slashequal) &&
-          (Prev == E ||
-           ((Prev[0]->isOneOf(tok::l_paren, tok::semi, tok::l_brace,
-                              tok::r_brace, tok::exclaim, tok::l_square,
-                              tok::colon, tok::comma, tok::question,
-                              tok::kw_return) ||
-             Prev[0]->isBinaryOperator())))) {
-        unsigned LastColumn = Tokens.back()->OriginalColumn;
-        SourceLocation Loc = Tokens.back()->Tok.getLocation();
-        if (MightEndWithEscapedSlash) {
-          // This regex literal ends in '\//'. Skip past the '//' of the last
-          // token and re-start lexing from there.
-          resetLexer(SourceMgr.getFileOffset(Loc) + 2);
-        } else if (SlashInStringPos != StringRef::npos) {
-          // This regex literal ends in a string_literal with a slash inside.
-          // Calculate end column and reset lexer appropriately.
-          resetLexer(SourceMgr.getFileOffset(Loc) + SlashInStringPos + 1);
-          LastColumn += SlashInStringPos;
-        }
-        Tokens.resize(Tokens.size() - TokenCount);
-        Tokens.back()->Tok.setKind(tok::unknown);
-        Tokens.back()->Type = TT_RegexLiteral;
-        // Treat regex literals like other string_literals.
-        Tokens.back()->Tok.setKind(tok::string_literal);
-        Tokens.back()->ColumnWidth += LastColumn - I[0]->OriginalColumn;
-        return true;
+      // NB: Because previous pointers are not initialized yet, this cannot use
+      // Token.getPreviousNonComment.
+      if ((*I)->isNot(tok::comment)) {
+        Prev = *I;
+        break;
       }
-
-      // There can't be a newline inside a regex literal.
-      if (I[0]->NewlinesBefore > 0)
-        return false;
     }
-    return false;
+
+    if (!canPrecedeRegexLiteral(Prev))
+      return;
+
+    // 'Manually' lex ahead in the current file buffer.
+    const char *Offset = Lex->getBufferLocation();
+    const char *RegexBegin = Offset - RegexToken->TokenText.size();
+    StringRef Buffer = Lex->getBuffer();
+    bool InCharacterClass = false;
+    bool HaveClosingSlash = false;
+    for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) {
+      // Regular expressions are terminated with a '/', which can only be
+      // escaped using '\' or a character class between '[' and ']'.
+      // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5.
+      switch (*Offset) {
+      case '\\':
+        // Skip the escaped character.
+        ++Offset;
+        break;
+      case '[':
+        InCharacterClass = true;
+        break;
+      case ']':
+        InCharacterClass = false;
+        break;
+      case '/':
+        if (!InCharacterClass)
+          HaveClosingSlash = true;
+        break;
+      }
+    }
+
+    RegexToken->Type = TT_RegexLiteral;
+    // Treat regex literals like other string_literals.
+    RegexToken->Tok.setKind(tok::string_literal);
+    RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin);
+    RegexToken->ColumnWidth = RegexToken->TokenText.size();
+
+    resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset)));
   }
 
   bool tryMergeTemplateString() {
@@ -1138,7 +1276,13 @@ private:
       FormatTok->Tok.setIdentifierInfo(&Info);
       FormatTok->Tok.setKind(Info.getTokenID());
       if (Style.Language == FormatStyle::LK_Java &&
-          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete)) {
+          FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete,
+                             tok::kw_operator)) {
+        FormatTok->Tok.setKind(tok::identifier);
+        FormatTok->Tok.setIdentifierInfo(nullptr);
+      } else if (Style.Language == FormatStyle::LK_JavaScript &&
+                 FormatTok->isOneOf(tok::kw_struct, tok::kw_union,
+                                    tok::kw_operator)) {
         FormatTok->Tok.setKind(tok::identifier);
         FormatTok->Tok.setIdentifierInfo(nullptr);
       }
@@ -1485,11 +1629,46 @@ private:
     return Text.count('\r') * 2 > Text.count('\n');
   }
 
+  bool
+  hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) {
+    for (const AnnotatedLine* Line : Lines) {
+      if (hasCpp03IncompatibleFormat(Line->Children))
+        return true;
+      for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
+        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
+          if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
+            return true;
+          if (Tok->is(TT_TemplateCloser) &&
+              Tok->Previous->is(TT_TemplateCloser))
+            return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) {
+    int AlignmentDiff = 0;
+    for (const AnnotatedLine* Line : Lines) {
+      AlignmentDiff += countVariableAlignments(Line->Children);
+      for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
+        if (!Tok->is(TT_PointerOrReference))
+          continue;
+        bool SpaceBefore =
+            Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
+        bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
+                          Tok->Next->WhitespaceRange.getEnd();
+        if (SpaceBefore && !SpaceAfter)
+          ++AlignmentDiff;
+        if (!SpaceBefore && SpaceAfter)
+          --AlignmentDiff;
+      }
+    }
+    return AlignmentDiff;
+  }
+
   void
   deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
-    unsigned CountBoundToVariable = 0;
-    unsigned CountBoundToType = 0;
-    bool HasCpp03IncompatibleFormat = false;
     bool HasBinPackedFunction = false;
     bool HasOnePerLineFunction = false;
     for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
@@ -1497,25 +1676,6 @@ private:
         continue;
       FormatToken *Tok = AnnotatedLines[i]->First->Next;
       while (Tok->Next) {
-        if (Tok->is(TT_PointerOrReference)) {
-          bool SpacesBefore =
-              Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
-          bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() !=
-                             Tok->Next->WhitespaceRange.getEnd();
-          if (SpacesBefore && !SpacesAfter)
-            ++CountBoundToVariable;
-          else if (!SpacesBefore && SpacesAfter)
-            ++CountBoundToType;
-        }
-
-        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
-          if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
-            HasCpp03IncompatibleFormat = true;
-          if (Tok->is(TT_TemplateCloser) &&
-              Tok->Previous->is(TT_TemplateCloser))
-            HasCpp03IncompatibleFormat = true;
-        }
-
         if (Tok->PackingKind == PPK_BinPacked)
           HasBinPackedFunction = true;
         if (Tok->PackingKind == PPK_OnePerLine)
@@ -1524,16 +1684,14 @@ private:
         Tok = Tok->Next;
       }
     }
-    if (Style.DerivePointerAlignment) {
-      if (CountBoundToType > CountBoundToVariable)
-        Style.PointerAlignment = FormatStyle::PAS_Left;
-      else if (CountBoundToType < CountBoundToVariable)
-        Style.PointerAlignment = FormatStyle::PAS_Right;
-    }
-    if (Style.Standard == FormatStyle::LS_Auto) {
-      Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
-                                                  : FormatStyle::LS_Cpp03;
-    }
+    if (Style.DerivePointerAlignment)
+      Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0
+                                   ? FormatStyle::PAS_Left
+                                   : FormatStyle::PAS_Right;
+    if (Style.Standard == FormatStyle::LS_Auto)
+      Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines)
+                           ? FormatStyle::LS_Cpp11
+                           : FormatStyle::LS_Cpp03;
     BinPackInconclusiveFunctions =
         HasBinPackedFunction || !HasOnePerLineFunction;
   }
@@ -1558,15 +1716,175 @@ private:
   bool BinPackInconclusiveFunctions;
 };
 
+struct IncludeDirective {
+  StringRef Filename;
+  StringRef Text;
+  unsigned Offset;
+  int Category;
+};
+
 } // end anonymous namespace
 
+// Determines whether 'Ranges' intersects with ('Start', 'End').
+static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start,
+                         unsigned End) {
+  for (auto Range : Ranges) {
+    if (Range.getOffset() < End &&
+        Range.getOffset() + Range.getLength() > Start)
+      return true;
+  }
+  return false;
+}
+
+// Sorts a block of includes given by 'Includes' alphabetically adding the
+// necessary replacement to 'Replaces'. 'Includes' must be in strict source
+// order.
+static void sortIncludes(const FormatStyle &Style,
+                         const SmallVectorImpl<IncludeDirective> &Includes,
+                         ArrayRef<tooling::Range> Ranges, StringRef FileName,
+                         tooling::Replacements &Replaces, unsigned *Cursor) {
+  if (!affectsRange(Ranges, Includes.front().Offset,
+                    Includes.back().Offset + Includes.back().Text.size()))
+    return;
+  SmallVector<unsigned, 16> Indices;
+  for (unsigned i = 0, e = Includes.size(); i != e; ++i)
+    Indices.push_back(i);
+  std::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) {
+    return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) <
+           std::tie(Includes[RHSI].Category, Includes[RHSI].Filename);
+  });
+
+  // If the #includes are out of order, we generate a single replacement fixing
+  // the entire block. Otherwise, no replacement is generated.
+  bool OutOfOrder = false;
+  for (unsigned i = 1, e = Indices.size(); i != e; ++i) {
+    if (Indices[i] != i) {
+      OutOfOrder = true;
+      break;
+    }
+  }
+  if (!OutOfOrder)
+    return;
+
+  std::string result;
+  bool CursorMoved = false;
+  for (unsigned Index : Indices) {
+    if (!result.empty())
+      result += "\n";
+    result += Includes[Index].Text;
+
+    if (Cursor && !CursorMoved) {
+      unsigned Start = Includes[Index].Offset;
+      unsigned End = Start + Includes[Index].Text.size();
+      if (*Cursor >= Start && *Cursor < End) {
+        *Cursor = Includes.front().Offset + result.size() + *Cursor - End;
+        CursorMoved = true;
+      }
+    }
+  }
+
+  // Sorting #includes shouldn't change their total number of characters.
+  // This would otherwise mess up 'Ranges'.
+  assert(result.size() ==
+         Includes.back().Offset + Includes.back().Text.size() -
+             Includes.front().Offset);
+
+  Replaces.insert(tooling::Replacement(FileName, Includes.front().Offset,
+                                       result.size(), result));
+}
+
+tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code,
+                                   ArrayRef<tooling::Range> Ranges,
+                                   StringRef FileName, unsigned *Cursor) {
+  tooling::Replacements Replaces;
+  if (!Style.SortIncludes)
+    return Replaces;
+
+  unsigned Prev = 0;
+  unsigned SearchFrom = 0;
+  llvm::Regex IncludeRegex(
+      R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
+  SmallVector<StringRef, 4> Matches;
+  SmallVector<IncludeDirective, 16> IncludesInBlock;
+
+  // In compiled files, consider the first #include to be the main #include of
+  // the file if it is not a system #include. This ensures that the header
+  // doesn't have hidden dependencies
+  // (http://llvm.org/docs/CodingStandards.html#include-style).
+  //
+  // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
+  // cases where the first #include is unlikely to be the main header.
+  bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
+                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
+                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
+                  FileName.endswith(".mm");
+  StringRef FileStem = llvm::sys::path::stem(FileName);
+  bool FirstIncludeBlock = true;
+  bool MainIncludeFound = false;
+
+  // Create pre-compiled regular expressions for the #include categories.
+  SmallVector<llvm::Regex, 4> CategoryRegexs;
+  for (const auto &Category : Style.IncludeCategories)
+    CategoryRegexs.emplace_back(Category.Regex);
+
+  bool FormattingOff = false;
+
+  for (;;) {
+    auto Pos = Code.find('\n', SearchFrom);
+    StringRef Line =
+        Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev);
+
+    StringRef Trimmed = Line.trim();
+    if (Trimmed == "// clang-format off")
+      FormattingOff = true;
+    else if (Trimmed == "// clang-format on")
+      FormattingOff = false;
+
+    if (!FormattingOff && !Line.endswith("\\")) {
+      if (IncludeRegex.match(Line, &Matches)) {
+        StringRef IncludeName = Matches[2];
+        int Category = INT_MAX;
+        for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
+          if (CategoryRegexs[i].match(IncludeName)) {
+            Category = Style.IncludeCategories[i].Priority;
+            break;
+          }
+        }
+        if (IsSource && !MainIncludeFound && Category > 0 &&
+            FirstIncludeBlock && IncludeName.startswith("\"")) {
+          StringRef HeaderStem =
+              llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
+          if (FileStem.startswith(HeaderStem)) {
+            Category = 0;
+            MainIncludeFound = true;
+          }
+        }
+        IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
+      } else if (!IncludesInBlock.empty()) {
+        sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
+                     Cursor);
+        IncludesInBlock.clear();
+        FirstIncludeBlock = false;
+      }
+      Prev = Pos + 1;
+    }
+    if (Pos == StringRef::npos || Pos + 1 == Code.size())
+      break;
+    SearchFrom = Pos + 1;
+  }
+  if (!IncludesInBlock.empty())
+    sortIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor);
+  return Replaces;
+}
+
 tooling::Replacements reformat(const FormatStyle &Style,
                                SourceManager &SourceMgr, FileID ID,
                                ArrayRef<CharSourceRange> Ranges,
                                bool *IncompleteFormat) {
-  if (Style.DisableFormat)
+  FormatStyle Expanded = expandPresets(Style);
+  if (Expanded.DisableFormat)
     return tooling::Replacements();
-  Formatter formatter(Style, SourceMgr, ID, Ranges);
+  Formatter formatter(Expanded, SourceMgr, ID, Ranges);
   return formatter.format(IncompleteFormat);
 }
 
@@ -1576,18 +1894,17 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code,
   if (Style.DisableFormat)
     return tooling::Replacements();
 
-  FileManager Files((FileSystemOptions()));
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticsEngine Diagnostics(
       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
       new DiagnosticOptions);
   SourceManager SourceMgr(Diagnostics, Files);
-  std::unique_ptr<llvm::MemoryBuffer> Buf =
-      llvm::MemoryBuffer::getMemBuffer(Code, FileName);
-  const clang::FileEntry *Entry =
-      Files.getVirtualFile(FileName, Buf->getBufferSize(), 0);
-  SourceMgr.overrideFileContents(Entry, std::move(Buf));
-  FileID ID =
-      SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
+  InMemoryFileSystem->addFile(FileName, 0,
+                              llvm::MemoryBuffer::getMemBuffer(Code, FileName));
+  FileID ID = SourceMgr.createFileID(Files.getFile(FileName), SourceLocation(),
+                                     clang::SrcMgr::C_User);
   SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID);
   std::vector<CharSourceRange> CharRanges;
   for (const tooling::Range &Range : Ranges) {
@@ -1610,6 +1927,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) {
   LangOpts.ObjC1 = 1;
   LangOpts.ObjC2 = 1;
   LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
+  LangOpts.DeclSpecKeyword = 1; // To get __declspec.
   return LangOpts;
 }
 
@@ -1625,15 +1943,15 @@ const char *StyleOptionHelpDescription =
     "  -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\"";
 
 static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) {
-  if (FileName.endswith(".java")) {
+  if (FileName.endswith(".java"))
     return FormatStyle::LK_Java;
-  } else if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) {
-    // JavaScript or TypeScript.
-    return FormatStyle::LK_JavaScript;
-  } else if (FileName.endswith_lower(".proto") ||
-             FileName.endswith_lower(".protodevel")) {
+  if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts"))
+    return FormatStyle::LK_JavaScript; // JavaScript or TypeScript.
+  if (FileName.endswith_lower(".proto") ||
+      FileName.endswith_lower(".protodevel"))
     return FormatStyle::LK_Proto;
-  }
+  if (FileName.endswith_lower(".td"))
+    return FormatStyle::LK_TableGen;
   return FormatStyle::LK_Cpp;
 }
 
diff --git a/lib/Format/FormatToken.cpp b/lib/Format/FormatToken.cpp
index 6c244c316604..63af0d6088d1 100644
--- a/lib/Format/FormatToken.cpp
+++ b/lib/Format/FormatToken.cpp
@@ -13,8 +13,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "FormatToken.h"
 #include "ContinuationIndenter.h"
+#include "FormatToken.h"
 #include "clang/Format/Format.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
@@ -80,8 +80,8 @@ unsigned CommaSeparatedList::formatAfterToken(LineState &State,
   // Ensure that we start on the opening brace.
   const FormatToken *LBrace =
       State.NextToken->Previous->getPreviousNonComment();
-  if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->BlockKind == BK_Block ||
-      LBrace->Type == TT_DictLiteral ||
+  if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
+      LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral ||
       LBrace->Next->Type == TT_DesignatedInitializerPeriod)
     return 0;
 
@@ -144,7 +144,8 @@ static unsigned CodePointsBetween(const FormatToken *Begin,
 
 void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
   // FIXME: At some point we might want to do this for other lists, too.
-  if (!Token->MatchingParen || Token->isNot(tok::l_brace))
+  if (!Token->MatchingParen ||
+      !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare))
     return;
 
   // In C++11 braced list style, we should not format in columns unless they
@@ -154,8 +155,14 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
       Commas.size() < 19)
     return;
 
+  // Limit column layout for JavaScript array initializers to 20 or more items
+  // for now to introduce it carefully. We can become more aggressive if this
+  // necessary.
+  if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19)
+    return;
+
   // Column format doesn't really make sense if we don't align after brackets.
-  if (!Style.AlignAfterOpenBracket)
+  if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
     return;
 
   FormatToken *ItemBegin = Token->Next;
@@ -183,7 +190,8 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
       ItemEnd = Token->MatchingParen;
       const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment();
       ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd));
-      if (Style.Cpp11BracedListStyle) {
+      if (Style.Cpp11BracedListStyle &&
+          !ItemEnd->Previous->isTrailingComment()) {
         // In Cpp11 braced list style, the } and possibly other subsequent
         // tokens will need to stay on a line with the last element.
         while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore)
@@ -212,7 +220,8 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
 
   // Don't use column layout for nested lists, lists with few elements and in
   // presence of separating comments.
-  if (Token->NestingLevel != 0 || Commas.size() < 5 || HasSeparatingComment)
+  if ((Token->NestingLevel != 0 && Token->is(tok::l_brace)) ||
+      Commas.size() < 5 || HasSeparatingComment)
     return;
 
   // We can never place more than ColumnLimit / 3 items in a row (because of the
diff --git a/lib/Format/FormatToken.h b/lib/Format/FormatToken.h
index f335eda086c0..78bc0edc45c0 100644
--- a/lib/Format/FormatToken.h
+++ b/lib/Format/FormatToken.h
@@ -283,6 +283,10 @@ struct FormatToken {
   bool is(const IdentifierInfo *II) const {
     return II && II == Tok.getIdentifierInfo();
   }
+  bool is(tok::PPKeywordKind Kind) const {
+    return Tok.getIdentifierInfo() &&
+           Tok.getIdentifierInfo()->getPPKeywordID() == Kind;
+  }
   template <typename A, typename B> bool isOneOf(A K1, B K2) const {
     return is(K1) || is(K2);
   }
@@ -408,16 +412,16 @@ struct FormatToken {
 
   /// \brief Returns \c true if this tokens starts a block-type list, i.e. a
   /// list that should be indented with a block indent.
-  bool opensBlockTypeList(const FormatStyle &Style) const {
+  bool opensBlockOrBlockTypeList(const FormatStyle &Style) const {
     return is(TT_ArrayInitializerLSquare) ||
            (is(tok::l_brace) &&
             (BlockKind == BK_Block || is(TT_DictLiteral) ||
              (!Style.Cpp11BracedListStyle && NestingLevel == 0)));
   }
 
-  /// \brief Same as opensBlockTypeList, but for the closing token.
-  bool closesBlockTypeList(const FormatStyle &Style) const {
-    return MatchingParen && MatchingParen->opensBlockTypeList(Style);
+  /// \brief Same as opensBlockOrBlockTypeList, but for the closing token.
+  bool closesBlockOrBlockTypeList(const FormatStyle &Style) const {
+    return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
   }
 
 private:
@@ -521,6 +525,8 @@ private:
 /// properly supported by Clang's lexer.
 struct AdditionalKeywords {
   AdditionalKeywords(IdentifierTable &IdentTable) {
+    kw_final = &IdentTable.get("final");
+    kw_override = &IdentTable.get("override");
     kw_in = &IdentTable.get("in");
     kw_CF_ENUM = &IdentTable.get("CF_ENUM");
     kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
@@ -530,11 +536,13 @@ struct AdditionalKeywords {
     kw_finally = &IdentTable.get("finally");
     kw_function = &IdentTable.get("function");
     kw_import = &IdentTable.get("import");
+    kw_is = &IdentTable.get("is");
+    kw_let = &IdentTable.get("let");
     kw_var = &IdentTable.get("var");
 
     kw_abstract = &IdentTable.get("abstract");
+    kw_assert = &IdentTable.get("assert");
     kw_extends = &IdentTable.get("extends");
-    kw_final = &IdentTable.get("final");
     kw_implements = &IdentTable.get("implements");
     kw_instanceof = &IdentTable.get("instanceof");
     kw_interface = &IdentTable.get("interface");
@@ -546,6 +554,7 @@ struct AdditionalKeywords {
 
     kw_mark = &IdentTable.get("mark");
 
+    kw_extend = &IdentTable.get("extend");
     kw_option = &IdentTable.get("option");
     kw_optional = &IdentTable.get("optional");
     kw_repeated = &IdentTable.get("repeated");
@@ -553,11 +562,14 @@ struct AdditionalKeywords {
     kw_returns = &IdentTable.get("returns");
 
     kw_signals = &IdentTable.get("signals");
+    kw_qsignals = &IdentTable.get("Q_SIGNALS");
     kw_slots = &IdentTable.get("slots");
     kw_qslots = &IdentTable.get("Q_SLOTS");
   }
 
   // Context sensitive keywords.
+  IdentifierInfo *kw_final;
+  IdentifierInfo *kw_override;
   IdentifierInfo *kw_in;
   IdentifierInfo *kw_CF_ENUM;
   IdentifierInfo *kw_CF_OPTIONS;
@@ -569,12 +581,14 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_finally;
   IdentifierInfo *kw_function;
   IdentifierInfo *kw_import;
+  IdentifierInfo *kw_is;
+  IdentifierInfo *kw_let;
   IdentifierInfo *kw_var;
 
   // Java keywords.
   IdentifierInfo *kw_abstract;
+  IdentifierInfo *kw_assert;
   IdentifierInfo *kw_extends;
-  IdentifierInfo *kw_final;
   IdentifierInfo *kw_implements;
   IdentifierInfo *kw_instanceof;
   IdentifierInfo *kw_interface;
@@ -587,6 +601,7 @@ struct AdditionalKeywords {
   IdentifierInfo *kw_mark;
 
   // Proto keywords.
+  IdentifierInfo *kw_extend;
   IdentifierInfo *kw_option;
   IdentifierInfo *kw_optional;
   IdentifierInfo *kw_repeated;
@@ -595,6 +610,7 @@ struct AdditionalKeywords {
 
   // QT keywords.
   IdentifierInfo *kw_signals;
+  IdentifierInfo *kw_qsignals;
   IdentifierInfo *kw_slots;
   IdentifierInfo *kw_qslots;
 };
diff --git a/lib/Format/TokenAnnotator.cpp b/lib/Format/TokenAnnotator.cpp
index ea8b30de8dfb..c3ea935e727b 100644
--- a/lib/Format/TokenAnnotator.cpp
+++ b/lib/Format/TokenAnnotator.cpp
@@ -120,8 +120,9 @@ private:
     }
 
     if (Left->Previous &&
-        (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if,
-                                 tok::kw_while, tok::l_paren, tok::comma) ||
+        (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype,
+                                 tok::kw_if, tok::kw_while, tok::l_paren,
+                                 tok::comma) ||
          Left->Previous->is(TT_BinaryOperator))) {
       // static_assert, if and while usually contain expressions.
       Contexts.back().IsExpression = true;
@@ -147,6 +148,10 @@ private:
     } else if (Left->Previous && Left->Previous->MatchingParen &&
                Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) {
       Contexts.back().IsExpression = false;
+    } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
+      bool IsForOrCatch =
+          Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch);
+      Contexts.back().IsExpression = !IsForOrCatch;
     }
 
     if (StartsObjCMethodExpr) {
@@ -154,7 +159,8 @@ private:
       Left->Type = TT_ObjCMethodExpr;
     }
 
-    bool MightBeFunctionType = CurrentToken->is(tok::star);
+    bool MightBeFunctionType = CurrentToken->isOneOf(tok::star, tok::amp) &&
+                               !Contexts[Contexts.size() - 2].IsExpression;
     bool HasMultipleLines = false;
     bool HasMultipleParametersOnALine = false;
     bool MightBeObjCForRangeLoop =
@@ -188,7 +194,7 @@ private:
         if (MightBeFunctionType && CurrentToken->Next &&
             (CurrentToken->Next->is(tok::l_paren) ||
              (CurrentToken->Next->is(tok::l_square) &&
-              !Contexts.back().IsExpression)))
+              Line.MustBeDeclaration)))
           Left->Type = TT_FunctionTypeLParen;
         Left->MatchingParen = CurrentToken;
         CurrentToken->MatchingParen = Left;
@@ -371,9 +377,11 @@ private:
         updateParameterCount(Left, CurrentToken);
         if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
           FormatToken *Previous = CurrentToken->getPreviousNonComment();
-          if ((CurrentToken->is(tok::colon) ||
+          if (((CurrentToken->is(tok::colon) &&
+                (!Contexts.back().ColonIsDictLiteral ||
+                 Style.Language != FormatStyle::LK_Cpp)) ||
                Style.Language == FormatStyle::LK_Proto) &&
-              Previous->is(tok::identifier))
+              Previous->Tok.getIdentifierInfo())
             Previous->Type = TT_SelectorName;
           if (CurrentToken->is(tok::colon) ||
               Style.Language == FormatStyle::LK_JavaScript)
@@ -387,12 +395,8 @@ private:
   }
 
   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
-    if (Current->is(TT_LambdaLSquare) ||
-        (Current->is(tok::caret) && Current->is(TT_UnaryOperator)) ||
-        (Style.Language == FormatStyle::LK_JavaScript &&
-         Current->is(Keywords.kw_function))) {
+    if (Current->is(tok::l_brace) && !Current->is(TT_DictLiteral))
       ++Left->BlockParameterCount;
-    }
     if (Current->is(tok::comma)) {
       ++Left->ParameterCount;
       if (!Left->Role)
@@ -500,6 +504,19 @@ private:
         return false;
       break;
     case tok::l_paren:
+      // When faced with 'operator()()', the kw_operator handler incorrectly
+      // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
+      // the first two parens OverloadedOperators and the second l_paren an
+      // OverloadedOperatorLParen.
+      if (Tok->Previous &&
+          Tok->Previous->is(tok::r_paren) &&
+          Tok->Previous->MatchingParen &&
+          Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
+        Tok->Previous->Type = TT_OverloadedOperator;
+        Tok->Previous->MatchingParen->Type = TT_OverloadedOperator;
+        Tok->Type = TT_OverloadedOperatorLParen;
+      }
+
       if (!parseParens())
         return false;
       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
@@ -715,7 +732,7 @@ public:
     while (CurrentToken) {
       if (CurrentToken->is(tok::kw_virtual))
         KeywordVirtualFound = true;
-      if (IsImportStatement(*CurrentToken))
+      if (isImportStatement(*CurrentToken))
         ImportStatement = true;
       if (!consumeToken())
         return LT_Invalid;
@@ -736,14 +753,15 @@ public:
   }
 
 private:
-  bool IsImportStatement(const FormatToken &Tok) {
+  bool isImportStatement(const FormatToken &Tok) {
     // FIXME: Closure-library specific stuff should not be hard-coded but be
     // configurable.
     return Style.Language == FormatStyle::LK_JavaScript &&
            Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
            Tok.Next->Next && (Tok.Next->Next->TokenText == "module" ||
+                              Tok.Next->Next->TokenText == "provide" ||
                               Tok.Next->Next->TokenText == "require" ||
-                              Tok.Next->Next->TokenText == "provide") &&
+                              Tok.Next->Next->TokenText == "setTestOnly") &&
            Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
   }
 
@@ -818,12 +836,13 @@ private:
 
   void modifyContext(const FormatToken &Current) {
     if (Current.getPrecedence() == prec::Assignment &&
-        !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
+        !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) &&
         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
       Contexts.back().IsExpression = true;
       if (!Line.startsWith(TT_UnaryOperator)) {
         for (FormatToken *Previous = Current.Previous;
-             Previous && !Previous->isOneOf(tok::comma, tok::semi);
+             Previous && Previous->Previous &&
+             !Previous->Previous->isOneOf(tok::comma, tok::semi);
              Previous = Previous->Previous) {
           if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
             Previous = Previous->MatchingParen;
@@ -845,19 +864,8 @@ private:
       Contexts.back().IsExpression = true;
     } else if (Current.is(TT_TrailingReturnArrow)) {
       Contexts.back().IsExpression = false;
-    } else if (Current.is(TT_LambdaArrow)) {
+    } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) {
       Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
-    } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
-               !Line.InPPDirective &&
-               (!Current.Previous ||
-                Current.Previous->isNot(tok::kw_decltype))) {
-      bool ParametersOfFunctionType =
-          Current.Previous && Current.Previous->is(tok::r_paren) &&
-          Current.Previous->MatchingParen &&
-          Current.Previous->MatchingParen->is(TT_FunctionTypeLParen);
-      bool IsForOrCatch = Current.Previous &&
-                          Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
-      Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
       for (FormatToken *Previous = Current.Previous;
            Previous && Previous->isOneOf(tok::star, tok::amp);
@@ -891,7 +899,7 @@ private:
                (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
       Contexts.back().FirstStartOfName = &Current;
       Current.Type = TT_StartOfName;
-    } else if (Current.is(tok::kw_auto)) {
+    } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
       AutoFound = true;
     } else if (Current.is(tok::arrow) &&
                Style.Language == FormatStyle::LK_Java) {
@@ -1035,82 +1043,101 @@ private:
         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
       return true;
 
-    return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
+    return (!IsPPKeyword &&
+            PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) ||
            PreviousNotConst->is(TT_PointerOrReference) ||
            PreviousNotConst->isSimpleTypeSpecifier();
   }
 
   /// \brief Determine whether ')' is ending a cast.
   bool rParenEndsCast(const FormatToken &Tok) {
-    FormatToken *LeftOfParens = nullptr;
-    if (Tok.MatchingParen)
-      LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
-    if (LeftOfParens && LeftOfParens->is(tok::r_paren) &&
-        LeftOfParens->MatchingParen)
-      LeftOfParens = LeftOfParens->MatchingParen->Previous;
-    if (LeftOfParens && LeftOfParens->is(tok::r_square) &&
-        LeftOfParens->MatchingParen &&
-        LeftOfParens->MatchingParen->is(TT_LambdaLSquare))
+    // C-style casts are only used in C++ and Java.
+    if (Style.Language != FormatStyle::LK_Cpp &&
+        Style.Language != FormatStyle::LK_Java)
       return false;
-    if (Tok.Next) {
-      if (Tok.Next->is(tok::question))
+
+    // Empty parens aren't casts and there are no casts at the end of the line.
+    if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
+      return false;
+
+    FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
+    if (LeftOfParens) {
+      // If there is an opening parenthesis left of the current parentheses,
+      // look past it as these might be chained casts.
+      if (LeftOfParens->is(tok::r_paren)) {
+        if (!LeftOfParens->MatchingParen ||
+            !LeftOfParens->MatchingParen->Previous)
+          return false;
+        LeftOfParens = LeftOfParens->MatchingParen->Previous;
+      }
+
+      // If there is an identifier (or with a few exceptions a keyword) right
+      // before the parentheses, this is unlikely to be a cast.
+      if (LeftOfParens->Tok.getIdentifierInfo() &&
+          !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
+                                 tok::kw_delete))
         return false;
-      if (Style.Language == FormatStyle::LK_JavaScript &&
-          Tok.Next->is(Keywords.kw_in))
+
+      // Certain other tokens right before the parentheses are also signals that
+      // this cannot be a cast.
+      if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
+                                TT_TemplateCloser))
         return false;
-      if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
-        return true;
     }
-    bool IsCast = false;
-    bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen;
+
+    if (Tok.Next->is(tok::question))
+      return false;
+
+    // As Java has no function types, a "(" after the ")" likely means that this
+    // is a cast.
+    if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
+      return true;
+
+    // If a (non-string) literal follows, this is likely a cast.
+    if (Tok.Next->isNot(tok::string_literal) &&
+        (Tok.Next->Tok.isLiteral() ||
+         Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
+      return true;
+
+    // Heuristically try to determine whether the parentheses contain a type.
     bool ParensAreType =
         !Tok.Previous ||
         Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) ||
         Tok.Previous->isSimpleTypeSpecifier();
     bool ParensCouldEndDecl =
-        Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
-    bool IsSizeOfOrAlignOf =
-        LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
-    if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
-        (Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression))
-      IsCast = true;
-    else if (Tok.Next && Tok.Next->isNot(tok::string_literal) &&
-             (Tok.Next->Tok.isLiteral() ||
-              Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
-      IsCast = true;
-    // If there is an identifier after the (), it is likely a cast, unless
-    // there is also an identifier before the ().
-    else if (LeftOfParens && Tok.Next &&
-             (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
-              LeftOfParens->isOneOf(tok::kw_return, tok::kw_case)) &&
-             !LeftOfParens->isOneOf(TT_OverloadedOperator, tok::at,
-                                    TT_TemplateCloser)) {
-      if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
-        IsCast = true;
-      } else {
-        // Use heuristics to recognize c style casting.
-        FormatToken *Prev = Tok.Previous;
-        if (Prev && Prev->isOneOf(tok::amp, tok::star))
-          Prev = Prev->Previous;
+        Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
+    if (ParensAreType && !ParensCouldEndDecl)
+      return true;
 
-        if (Prev && Tok.Next && Tok.Next->Next) {
-          bool NextIsUnary = Tok.Next->isUnaryOperator() ||
-                             Tok.Next->isOneOf(tok::amp, tok::star);
-          IsCast =
-              NextIsUnary && !Tok.Next->is(tok::plus) &&
-              Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant);
-        }
+    // At this point, we heuristically assume that there are no casts at the
+    // start of the line. We assume that we have found most cases where there
+    // are by the logic above, e.g. "(void)x;".
+    if (!LeftOfParens)
+      return false;
 
-        for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
-          if (!Prev ||
-              !Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) {
-            IsCast = false;
-            break;
-          }
-        }
-      }
+    // If the following token is an identifier, this is a cast. All cases where
+    // this can be something else are handled above.
+    if (Tok.Next->is(tok::identifier))
+      return true;
+
+    if (!Tok.Next->Next)
+      return false;
+
+    // If the next token after the parenthesis is a unary operator, assume
+    // that this is cast, unless there are unexpected tokens inside the
+    // parenthesis.
+    bool NextIsUnary =
+        Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star);
+    if (!NextIsUnary || Tok.Next->is(tok::plus) ||
+        !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant))
+      return false;
+    // Search for unexpected tokens.
+    for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
+         Prev = Prev->Previous) {
+      if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
+        return false;
     }
-    return IsCast && !ParensAreEmpty;
+    return true;
   }
 
   /// \brief Return the type of the given token assuming it is * or &.
@@ -1124,9 +1151,11 @@ private:
       return TT_UnaryOperator;
 
     const FormatToken *NextToken = Tok.getNextNonComment();
-    if (!NextToken || NextToken->is(tok::arrow) ||
+    if (!NextToken ||
+        NextToken->isOneOf(tok::arrow, Keywords.kw_final,
+                           Keywords.kw_override) ||
         (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment()))
-      return TT_Unknown;
+      return TT_PointerOrReference;
 
     if (PrevToken->is(tok::coloncolon))
       return TT_PointerOrReference;
@@ -1140,7 +1169,9 @@ private:
 
     if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
       return TT_PointerOrReference;
-    if (NextToken->isOneOf(tok::kw_operator, tok::comma, tok::semi))
+    if (NextToken->is(tok::kw_operator) && !IsExpression)
+      return TT_PointerOrReference;
+    if (NextToken->isOneOf(tok::comma, tok::semi))
       return TT_PointerOrReference;
 
     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
@@ -1460,25 +1491,56 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
 // This function heuristically determines whether 'Current' starts the name of a
 // function declaration.
 static bool isFunctionDeclarationName(const FormatToken &Current) {
-  if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
-    return false;
-  const FormatToken *Next = Current.Next;
-  for (; Next; Next = Next->Next) {
-    if (Next->is(TT_TemplateOpener)) {
-      Next = Next->MatchingParen;
-    } else if (Next->is(tok::coloncolon)) {
-      Next = Next->Next;
-      if (!Next || !Next->is(tok::identifier))
-        return false;
-    } else if (Next->is(tok::l_paren)) {
+  auto skipOperatorName = [](const FormatToken* Next) -> const FormatToken* {
+    for (; Next; Next = Next->Next) {
+      if (Next->is(TT_OverloadedOperatorLParen))
+        return Next;
+      if (Next->is(TT_OverloadedOperator))
+        continue;
+      if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
+        // For 'new[]' and 'delete[]'.
+        if (Next->Next && Next->Next->is(tok::l_square) &&
+            Next->Next->Next && Next->Next->Next->is(tok::r_square))
+          Next = Next->Next->Next;
+        continue;
+      }
+
       break;
-    } else {
+    }
+    return nullptr;
+  };
+
+  const FormatToken *Next = Current.Next;
+  if (Current.is(tok::kw_operator)) {
+    if (Current.Previous && Current.Previous->is(tok::coloncolon))
       return false;
+    Next = skipOperatorName(Next);
+  } else {
+    if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0)
+      return false;
+    for (; Next; Next = Next->Next) {
+      if (Next->is(TT_TemplateOpener)) {
+        Next = Next->MatchingParen;
+      } else if (Next->is(tok::coloncolon)) {
+        Next = Next->Next;
+        if (!Next)
+          return false;
+        if (Next->is(tok::kw_operator)) {
+          Next = skipOperatorName(Next->Next);
+          break;
+        }
+        if (!Next->is(tok::identifier))
+          return false;
+      } else if (Next->is(tok::l_paren)) {
+        break;
+      } else {
+        return false;
+      }
     }
   }
-  if (!Next)
+
+  if (!Next || !Next->is(tok::l_paren))
     return false;
-  assert(Next->is(tok::l_paren));
   if (Next->Next == Next->MatchingParen)
     return true;
   for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
@@ -1493,6 +1555,29 @@ static bool isFunctionDeclarationName(const FormatToken &Current) {
   return false;
 }
 
+bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
+  assert(Line.MightBeFunctionDecl);
+
+  if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
+       Style.AlwaysBreakAfterReturnType ==
+           FormatStyle::RTBS_TopLevelDefinitions) &&
+      Line.Level > 0)
+    return false;
+
+  switch (Style.AlwaysBreakAfterReturnType) {
+  case FormatStyle::RTBS_None:
+    return false;
+  case FormatStyle::RTBS_All:
+  case FormatStyle::RTBS_TopLevel:
+    return true;
+  case FormatStyle::RTBS_AllDefinitions:
+  case FormatStyle::RTBS_TopLevelDefinitions:
+    return Line.mightBeFunctionDefinition();
+  }
+
+  return false;
+}
+
 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
                                                   E = Line.Children.end();
@@ -1544,15 +1629,9 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
     Current->MustBreakBefore =
         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
 
-    if ((Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All ||
-         (Style.AlwaysBreakAfterDefinitionReturnType ==
-              FormatStyle::DRTBS_TopLevel &&
-          Line.Level == 0)) &&
-        InFunctionDecl && Current->is(TT_FunctionDeclarationName) &&
-        !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions.
-      // FIXME: Line.Last points to other characters than tok::semi
-      // and tok::lbrace.
-      Current->MustBreakBefore = true;
+    if (!Current->MustBreakBefore && InFunctionDecl &&
+        Current->is(TT_FunctionDeclarationName))
+      Current->MustBreakBefore = mustBreakForReturnType(Line);
 
     Current->CanBreakBefore =
         Current->MustBreakBefore || canBreakBefore(Line, *Current);
@@ -1636,7 +1715,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
                               Right.Next->is(TT_DictLiteral)))
     return 1;
   if (Right.is(tok::l_square)) {
-    if (Style.Language == FormatStyle::LK_Proto)
+    if (Style.Language == FormatStyle::LK_Proto || Left.is(tok::r_square))
       return 1;
     // Slightly prefer formatting local lambda definitions like functions.
     if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
@@ -1674,10 +1753,20 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
     return 2;
 
   if (Right.isMemberAccess()) {
-    if (Left.is(tok::r_paren) && Left.MatchingParen &&
-        Left.MatchingParen->ParameterCount > 0)
-      return 20; // Should be smaller than breaking at a nested comma.
-    return 150;
+    // Breaking before the "./->" of a chained call/member access is reasonably
+    // cheap, as formatting those with one call per line is generally
+    // desirable. In particular, it should be cheaper to break before the call
+    // than it is to break inside a call's parameters, which could lead to weird
+    // "hanging" indents. The exception is the very last "./->" to support this
+    // frequent pattern:
+    //
+    //   aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
+    //       dddddddd);
+    //
+    // which might otherwise be blown up onto many lines. Here, clang-format
+    // won't produce "hanging" indents anyway as there is no other trailing
+    // call.
+    return Right.LastOperator ? 150 : 40;
   }
 
   if (Right.is(TT_TrailingAnnotation) &&
@@ -1706,7 +1795,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
     return Line.MightBeFunctionDecl ? 50 : 500;
 
-  if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket)
+  if (Left.is(tok::l_paren) && InFunctionDecl &&
+      Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign)
     return 100;
   if (Left.is(tok::l_paren) && Left.Previous &&
       Left.Previous->isOneOf(tok::kw_if, tok::kw_for))
@@ -1718,7 +1808,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
   if (Left.is(TT_TemplateOpener))
     return 100;
   if (Left.opensScope()) {
-    if (!Style.AlignAfterOpenBracket)
+    if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign)
       return 0;
     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
                                    : 19;
@@ -1794,11 +1884,10 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
   if (Left.is(tok::l_square) && Right.is(tok::amp))
     return false;
   if (Right.is(TT_PointerOrReference))
-    return !(Left.is(tok::r_paren) && Left.MatchingParen &&
-             (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
-              (Left.MatchingParen->Previous &&
-               Left.MatchingParen->Previous->is(
-                   TT_FunctionDeclarationName)))) &&
+    return (Left.is(tok::r_paren) && Left.MatchingParen &&
+            (Left.MatchingParen->is(TT_OverloadedOperatorLParen) ||
+             (Left.MatchingParen->Previous &&
+              Left.MatchingParen->Previous->is(TT_FunctionDeclarationName)))) ||
            (Left.Tok.isLiteral() ||
             (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
              (Style.PointerAlignment != FormatStyle::PAS_Left ||
@@ -1809,7 +1898,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
         !Line.IsMultiVariableDeclStmt)))
     return true;
   if (Left.is(TT_PointerOrReference))
-    return Right.Tok.isLiteral() || Right.is(TT_BlockComment) ||
+    return Right.Tok.isLiteral() ||
+           Right.isOneOf(TT_BlockComment, Keywords.kw_final,
+                         Keywords.kw_override) ||
            (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) ||
            (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
                            tok::l_paren) &&
@@ -1849,8 +1940,9 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
       return true;
     return Line.Type == LT_ObjCDecl || Left.is(tok::semi) ||
            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
-            (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
-                          tok::kw_switch, tok::kw_case, TT_ForEachMacro) ||
+            (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while,
+                          tok::kw_switch, tok::kw_case, TT_ForEachMacro,
+                          TT_ObjCForIn) ||
              (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch,
                            tok::kw_new, tok::kw_delete) &&
               (!Left.Previous || Left.Previous->isNot(tok::period))))) ||
@@ -1895,13 +1987,16 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   } else if (Style.Language == FormatStyle::LK_Proto) {
     if (Right.is(tok::period) &&
         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
-                     Keywords.kw_repeated))
+                     Keywords.kw_repeated, Keywords.kw_extend))
       return true;
     if (Right.is(tok::l_paren) &&
         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
       return true;
   } else if (Style.Language == FormatStyle::LK_JavaScript) {
-    if (Left.isOneOf(Keywords.kw_var, TT_JsFatArrow))
+    if (Left.isOneOf(Keywords.kw_let, Keywords.kw_var, TT_JsFatArrow,
+                     Keywords.kw_in))
+      return true;
+    if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
       return true;
     if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
       return false;
@@ -1952,7 +2047,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if (Right.isOneOf(TT_CtorInitializerColon, TT_ObjCBlockLParen))
     return true;
   if (Right.is(TT_OverloadedOperatorLParen))
-    return false;
+    return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
   if (Right.is(tok::colon)) {
     if (Line.First->isOneOf(tok::kw_case, tok::kw_default) ||
         !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi))
@@ -1993,7 +2088,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
   if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
     return Style.SpacesInAngles;
   if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) ||
-      Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr))
+      (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
+       !Right.is(tok::r_paren)))
     return true;
   if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) &&
       Right.isNot(TT_FunctionTypeLParen))
@@ -2020,7 +2116,7 @@ static bool isAllmanBrace(const FormatToken &Tok) {
 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
                                      const FormatToken &Right) {
   const FormatToken &Left = *Right.Previous;
-  if (Right.NewlinesBefore > 1)
+  if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
     return true;
 
   if (Style.Language == FormatStyle::LK_JavaScript) {
@@ -2032,8 +2128,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
         Left.Previous && Left.Previous->is(tok::equal) &&
         Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
                             tok::kw_const) &&
-        // kw_var is a pseudo-token that's a tok::identifier, so matches above.
-        !Line.startsWith(Keywords.kw_var))
+        // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
+        // above.
+        !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let))
       // Object literals on the top level of a file are treated as "enum-style".
       // Each key/value pair is put on a separate line, instead of bin-packing.
       return true;
@@ -2047,6 +2144,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
         !Left.Children.empty())
       // Support AllowShortFunctionsOnASingleLine for JavaScript.
       return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
+             Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
              (Left.NestingLevel == 0 && Line.Level == 0 &&
               Style.AllowShortFunctionsOnASingleLine ==
                   FormatStyle::SFS_Inline);
@@ -2107,10 +2205,9 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
   if (Right.is(TT_InlineASMBrace))
     return Right.HasUnescapedNewline;
   if (isAllmanBrace(Left) || isAllmanBrace(Right))
-    return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-           Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
-           (Style.BreakBeforeBraces == FormatStyle::BS_Mozilla &&
-            Line.startsWith(tok::kw_enum));
+    return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) ||
+           (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) ||
+           (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct);
   if (Style.Language == FormatStyle::LK_Proto && Left.isNot(tok::l_brace) &&
       Right.is(TT_SelectorName))
     return true;
@@ -2121,7 +2218,7 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
        Style.Language == FormatStyle::LK_JavaScript) &&
       Left.is(TT_LeadingJavaAnnotation) &&
       Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
-      Line.Last->is(tok::l_brace))
+      (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations))
     return true;
 
   return false;
@@ -2144,6 +2241,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
       return false;
     if (Left.is(TT_JsTypeColon))
       return true;
+    if (Right.NestingLevel == 0 && Right.is(Keywords.kw_is))
+      return false;
   }
 
   if (Left.is(tok::at))
@@ -2186,7 +2285,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
     return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
     return true;
-  if (Left.ClosesTemplateDeclaration)
+  if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen))
     return true;
   if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
                     TT_OverloadedOperator))
@@ -2199,7 +2298,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
       Left.is(tok::kw_operator))
     return false;
   if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
-      Line.Type == LT_VirtualFunctionDecl)
+      Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0)
     return false;
   if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen))
     return false;
diff --git a/lib/Format/TokenAnnotator.h b/lib/Format/TokenAnnotator.h
index b8a6be057a6c..5329f1f3f2fc 100644
--- a/lib/Format/TokenAnnotator.h
+++ b/lib/Format/TokenAnnotator.h
@@ -86,6 +86,15 @@ public:
     return startsWith(First, Tokens...);
   }
 
+  /// \c true if this line looks like a function definition instead of a
+  /// function declaration. Asserts MightBeFunctionDecl.
+  bool mightBeFunctionDefinition() const {
+    assert(MightBeFunctionDecl);
+    // FIXME: Line.Last points to other characters than tok::semi
+    // and tok::lbrace.
+    return !Last->isOneOf(tok::semi, tok::comment);
+  }
+
   FormatToken *First;
   FormatToken *Last;
 
@@ -156,6 +165,8 @@ private:
 
   bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right);
 
+  bool mustBreakForReturnType(const AnnotatedLine &Line) const;
+
   void printDebugInfo(const AnnotatedLine &Line);
 
   void calculateUnbreakableTailLengths(AnnotatedLine &Line);
diff --git a/lib/Format/UnwrappedLineFormatter.cpp b/lib/Format/UnwrappedLineFormatter.cpp
index b6784b369edf..f65056907963 100644
--- a/lib/Format/UnwrappedLineFormatter.cpp
+++ b/lib/Format/UnwrappedLineFormatter.cpp
@@ -90,8 +90,8 @@ private:
       return 0;
     if (RootToken.isAccessSpecifier(false) ||
         RootToken.isObjCAccessSpecifier() ||
-        (RootToken.is(Keywords.kw_signals) && RootToken.Next &&
-         RootToken.Next->is(tok::colon)))
+        (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) &&
+         RootToken.Next && RootToken.Next->is(tok::colon)))
       return Style.AccessModifierOffset;
     return 0;
   }
@@ -199,12 +199,12 @@ private:
       return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
     }
     if (TheLine->Last->is(tok::l_brace)) {
-      return Style.BreakBeforeBraces == FormatStyle::BS_Attach
+      return !Style.BraceWrapping.AfterFunction
                  ? tryMergeSimpleBlock(I, E, Limit)
                  : 0;
     }
     if (I[1]->First->is(TT_FunctionLBrace) &&
-        Style.BreakBeforeBraces != FormatStyle::BS_Attach) {
+        Style.BraceWrapping.AfterFunction) {
       if (I[1]->Last->is(TT_LineComment))
         return 0;
 
@@ -263,8 +263,7 @@ private:
       SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
     if (Limit == 0)
       return 0;
-    if ((Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-         Style.BreakBeforeBraces == FormatStyle::BS_GNU) &&
+    if (Style.BraceWrapping.AfterControlStatement &&
         (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
       return 0;
     if (I[1]->InPPDirective != (*I)->InPPDirective ||
@@ -305,7 +304,8 @@ private:
       if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
         break;
       if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
-                               tok::kw_while, tok::comment))
+                               tok::kw_while, tok::comment) ||
+          Line->Last->is(tok::comment))
         return 0;
       Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
     }
@@ -606,7 +606,7 @@ public:
 
   /// \brief Puts all tokens into a single line.
   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
-                      bool DryRun) {
+                      bool DryRun) override {
     unsigned Penalty = 0;
     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
     while (State.NextToken) {
@@ -629,7 +629,7 @@ public:
   /// \brief Formats the line by finding the best line breaks with line lengths
   /// below the column limit.
   unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
-                      bool DryRun) {
+                      bool DryRun) override {
     LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
 
     // If the ObjC method declaration does not fit on a line, we should format
@@ -709,7 +709,7 @@ private:
 
       // Cut off the analysis of certain solutions if the analysis gets too
       // complex. See description of IgnoreStackForComparison.
-      if (Count > 10000)
+      if (Count > 50000)
         Node->State.IgnoreStackForComparison = true;
 
       if (!Seen.insert(&Node->State).second)
@@ -791,7 +791,7 @@ private:
   llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
 };
 
-} // namespace
+} // anonymous namespace
 
 unsigned
 UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
@@ -812,13 +812,26 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
                                    AdditionalIndent);
   const AnnotatedLine *PreviousLine = nullptr;
   const AnnotatedLine *NextLine = nullptr;
+
+  // The minimum level of consecutive lines that have been formatted.
+  unsigned RangeMinLevel = UINT_MAX;
+
   for (const AnnotatedLine *Line =
            Joiner.getNextMergedLine(DryRun, IndentTracker);
        Line; Line = NextLine) {
     const AnnotatedLine &TheLine = *Line;
     unsigned Indent = IndentTracker.getIndent();
-    bool FixIndentation =
-        FixBadIndentation && (Indent != TheLine.First->OriginalColumn);
+
+    // We continue formatting unchanged lines to adjust their indent, e.g. if a
+    // scope was added. However, we need to carefully stop doing this when we
+    // exit the scope of affected lines to prevent indenting a the entire
+    // remaining file if it currently missing a closing brace.
+    bool ContinueFormatting =
+        TheLine.Level > RangeMinLevel ||
+        (TheLine.Level == RangeMinLevel && !TheLine.startsWith(tok::r_brace));
+
+    bool FixIndentation = (FixBadIndentation || ContinueFormatting) &&
+                          Indent != TheLine.First->OriginalColumn;
     bool ShouldFormat = TheLine.Affected || FixIndentation;
     // We cannot format this line; if the reason is that the line had a
     // parsing error, remember that.
@@ -845,6 +858,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
       else
         Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
                        .formatLine(TheLine, Indent, DryRun);
+      RangeMinLevel = std::min(RangeMinLevel, TheLine.Level);
     } else {
       // If no token in the current line is affected, we still need to format
       // affected children.
@@ -875,6 +889,7 @@ UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
           Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
       }
       NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
+      RangeMinLevel = UINT_MAX;
     }
     if (!DryRun)
       markFinalized(TheLine.First);
diff --git a/lib/Format/UnwrappedLineParser.cpp b/lib/Format/UnwrappedLineParser.cpp
index 97fd98ecb947..94b849881941 100644
--- a/lib/Format/UnwrappedLineParser.cpp
+++ b/lib/Format/UnwrappedLineParser.cpp
@@ -154,12 +154,10 @@ public:
   CompoundStatementIndenter(UnwrappedLineParser *Parser,
                             const FormatStyle &Style, unsigned &LineLevel)
       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
-      Parser->addUnwrappedLine();
-    } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
+    if (Style.BraceWrapping.AfterControlStatement)
       Parser->addUnwrappedLine();
+    if (Style.BraceWrapping.IndentBraces)
       ++LineLevel;
-    }
   }
   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
 
@@ -284,6 +282,8 @@ void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
     case tok::l_brace:
       // FIXME: Add parameter whether this can happen - if this happens, we must
       // be in a non-declaration context.
+      if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
+        continue;
       parseBlock(/*MustBeDeclaration=*/false);
       addUnwrappedLine();
       break;
@@ -321,7 +321,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
   SmallVector<FormatToken *, 8> LBraceStack;
   assert(Tok->Tok.is(tok::l_brace));
   do {
-    // Get next none-comment token.
+    // Get next non-comment token.
     FormatToken *NextTok;
     unsigned ReadTokens = 0;
     do {
@@ -357,7 +357,7 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
             ProbablyBracedList =
                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
                                  tok::r_paren, tok::r_square, tok::l_brace,
-                                 tok::l_paren, tok::ellipsis) ||
+                                 tok::l_square, tok::l_paren, tok::ellipsis) ||
                 (NextTok->is(tok::semi) &&
                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
@@ -403,6 +403,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
          "'{' or macro block token expected");
   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
+  FormatTok->BlockKind = BK_Block;
 
   unsigned InitialLevel = Line->Level;
   nextToken();
@@ -421,6 +422,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
                  : !FormatTok->is(tok::r_brace)) {
     Line->Level = InitialLevel;
+    FormatTok->BlockKind = BK_Block;
     return;
   }
 
@@ -454,17 +456,15 @@ static bool isGoogScope(const UnwrappedLine &Line) {
 
 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
                                    const FormatToken &InitialToken) {
-  switch (Style.BreakBeforeBraces) {
-  case FormatStyle::BS_Linux:
-    return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
-  case FormatStyle::BS_Mozilla:
-    return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union);
-  case FormatStyle::BS_Allman:
-  case FormatStyle::BS_GNU:
-    return true;
-  default:
-    return false;
-  }
+  if (InitialToken.is(tok::kw_namespace))
+    return Style.BraceWrapping.AfterNamespace;
+  if (InitialToken.is(tok::kw_class))
+    return Style.BraceWrapping.AfterClass;
+  if (InitialToken.is(tok::kw_union))
+    return Style.BraceWrapping.AfterUnion;
+  if (InitialToken.is(tok::kw_struct))
+    return Style.BraceWrapping.AfterStruct;
+  return false;
 }
 
 void UnwrappedLineParser::parseChildBlock() {
@@ -650,7 +650,15 @@ static bool tokenCanStartNewLine(const clang::Token &Tok) {
 }
 
 void UnwrappedLineParser::parseStructuralElement() {
-  assert(!FormatTok->Tok.is(tok::l_brace));
+  assert(!FormatTok->is(tok::l_brace));
+  if (Style.Language == FormatStyle::LK_TableGen &&
+      FormatTok->is(tok::pp_include)) {
+    nextToken();
+    if (FormatTok->is(tok::string_literal))
+      nextToken();
+    addUnwrappedLine();
+    return;
+  }
   switch (FormatTok->Tok.getKind()) {
   case tok::at:
     nextToken();
@@ -679,8 +687,7 @@ void UnwrappedLineParser::parseStructuralElement() {
     case tok::objc_autoreleasepool:
       nextToken();
       if (FormatTok->Tok.is(tok::l_brace)) {
-        if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-            Style.BreakBeforeBraces == FormatStyle::BS_GNU)
+        if (Style.BraceWrapping.AfterObjCDeclaration)
           addUnwrappedLine();
         parseBlock(/*MustBeDeclaration=*/false);
       }
@@ -787,7 +794,8 @@ void UnwrappedLineParser::parseStructuralElement() {
       parseJavaScriptEs6ImportExport();
       return;
     }
-    if (FormatTok->is(Keywords.kw_signals)) {
+    if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
+                           Keywords.kw_slots, Keywords.kw_qslots)) {
       nextToken();
       if (FormatTok->is(tok::colon)) {
         nextToken();
@@ -810,10 +818,10 @@ void UnwrappedLineParser::parseStructuralElement() {
     case tok::kw_enum:
       // parseEnum falls through and does not yet add an unwrapped line as an
       // enum definition can start a structural element.
-      parseEnum();
-      // This does not apply for Java and JavaScript.
-      if (Style.Language == FormatStyle::LK_Java ||
-          Style.Language == FormatStyle::LK_JavaScript) {
+      if (!parseEnum())
+        break;
+      // This only applies for C++.
+      if (Style.Language != FormatStyle::LK_Cpp) {
         addUnwrappedLine();
         return;
       }
@@ -843,6 +851,11 @@ void UnwrappedLineParser::parseStructuralElement() {
       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
           FormatTok->is(tok::kw_class))
         nextToken();
+      if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
+          FormatTok->Tok.getIdentifierInfo())
+        // JavaScript only has pseudo keywords, all keywords are allowed to
+        // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
+        nextToken();
       break;
     case tok::semi:
       nextToken();
@@ -854,6 +867,11 @@ void UnwrappedLineParser::parseStructuralElement() {
     case tok::l_paren:
       parseParens();
       break;
+    case tok::kw_operator:
+      nextToken();
+      if (FormatTok->isBinaryOperator())
+        nextToken();
+      break;
     case tok::caret:
       nextToken();
       if (FormatTok->Tok.isAnyIdentifier() ||
@@ -870,7 +888,7 @@ void UnwrappedLineParser::parseStructuralElement() {
         // structural element.
         // FIXME: Figure out cases where this is not true, and add projections
         // for them (the one we know is missing are lambdas).
-        if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
+        if (Style.BraceWrapping.AfterFunction)
           addUnwrappedLine();
         FormatTok->Type = TT_FunctionLBrace;
         parseBlock(/*MustBeDeclaration=*/false);
@@ -1001,6 +1019,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
     case tok::less:
     case tok::greater:
     case tok::identifier:
+    case tok::numeric_constant:
     case tok::coloncolon:
     case tok::kw_mutable:
       nextToken();
@@ -1257,12 +1276,10 @@ void UnwrappedLineParser::parseIfThenElse() {
   if (FormatTok->Tok.is(tok::l_brace)) {
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock(/*MustBeDeclaration=*/false);
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-        Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
+    if (Style.BraceWrapping.BeforeElse)
       addUnwrappedLine();
-    } else {
+    else
       NeedsUnwrappedLine = true;
-    }
   } else {
     addUnwrappedLine();
     ++Line->Level;
@@ -1270,8 +1287,6 @@ void UnwrappedLineParser::parseIfThenElse() {
     --Line->Level;
   }
   if (FormatTok->Tok.is(tok::kw_else)) {
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
-      addUnwrappedLine();
     nextToken();
     if (FormatTok->Tok.is(tok::l_brace)) {
       CompoundStatementIndenter Indenter(this, Style, Line->Level);
@@ -1312,9 +1327,7 @@ void UnwrappedLineParser::parseTryCatch() {
   if (FormatTok->is(tok::l_brace)) {
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock(/*MustBeDeclaration=*/false);
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-        Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
-        Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
+    if (Style.BraceWrapping.BeforeCatch) {
       addUnwrappedLine();
     } else {
       NeedsUnwrappedLine = true;
@@ -1352,17 +1365,13 @@ void UnwrappedLineParser::parseTryCatch() {
     NeedsUnwrappedLine = false;
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock(/*MustBeDeclaration=*/false);
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-        Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
-        Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
+    if (Style.BraceWrapping.BeforeCatch)
       addUnwrappedLine();
-    } else {
+    else
       NeedsUnwrappedLine = true;
-    }
   }
-  if (NeedsUnwrappedLine) {
+  if (NeedsUnwrappedLine)
     addUnwrappedLine();
-  }
 }
 
 void UnwrappedLineParser::parseNamespace() {
@@ -1370,7 +1379,7 @@ void UnwrappedLineParser::parseNamespace() {
 
   const FormatToken &InitialToken = *FormatTok;
   nextToken();
-  if (FormatTok->Tok.is(tok::identifier))
+  while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
     nextToken();
   if (FormatTok->Tok.is(tok::l_brace)) {
     if (ShouldBreakBeforeBrace(Style, InitialToken))
@@ -1438,7 +1447,7 @@ void UnwrappedLineParser::parseDoWhile() {
   if (FormatTok->Tok.is(tok::l_brace)) {
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock(/*MustBeDeclaration=*/false);
-    if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
+    if (Style.BraceWrapping.IndentBraces)
       addUnwrappedLine();
   } else {
     addUnwrappedLine();
@@ -1466,11 +1475,8 @@ void UnwrappedLineParser::parseLabel() {
     CompoundStatementIndenter Indenter(this, Style, Line->Level);
     parseBlock(/*MustBeDeclaration=*/false);
     if (FormatTok->Tok.is(tok::kw_break)) {
-      // "break;" after "}" on its own line only for BS_Allman and BS_GNU
-      if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-          Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
+      if (Style.BraceWrapping.AfterControlStatement)
         addUnwrappedLine();
-      }
       parseStructuralElement();
     }
     addUnwrappedLine();
@@ -1519,11 +1525,17 @@ void UnwrappedLineParser::parseAccessSpecifier() {
   addUnwrappedLine();
 }
 
-void UnwrappedLineParser::parseEnum() {
+bool UnwrappedLineParser::parseEnum() {
   // Won't be 'enum' for NS_ENUMs.
   if (FormatTok->Tok.is(tok::kw_enum))
     nextToken();
 
+  // In TypeScript, "enum" can also be used as property name, e.g. in interface
+  // declarations. An "enum" keyword followed by a colon would be a syntax
+  // error and thus assume it is just an identifier.
+  if (Style.Language == FormatStyle::LK_JavaScript && FormatTok->is(tok::colon))
+    return false;
+
   // Eat up enum class ...
   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
     nextToken();
@@ -1541,19 +1553,23 @@ void UnwrappedLineParser::parseEnum() {
       // return type. In Java, this can be "implements", etc.
       if (Style.Language == FormatStyle::LK_Cpp &&
           FormatTok->is(tok::identifier))
-        return;
+        return false;
     }
   }
 
   // Just a declaration or something is wrong.
   if (FormatTok->isNot(tok::l_brace))
-    return;
+    return true;
   FormatTok->BlockKind = BK_Block;
 
   if (Style.Language == FormatStyle::LK_Java) {
     // Java enums are different.
     parseJavaEnumBody();
-    return;
+    return true;
+  }
+  if (Style.Language == FormatStyle::LK_Proto) {
+    parseBlock(/*MustBeDeclaration=*/true);
+    return true;
   }
 
   // Parse enum body.
@@ -1563,6 +1579,7 @@ void UnwrappedLineParser::parseEnum() {
       nextToken();
     addUnwrappedLine();
   }
+  return true;
 
   // There is no addUnwrappedLine() here so that we fall through to parsing a
   // structural element afterwards. Thus, in "enum A {} n, m;",
@@ -1731,8 +1748,7 @@ void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
     parseObjCProtocolList();
 
   if (FormatTok->Tok.is(tok::l_brace)) {
-    if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
-        Style.BreakBeforeBraces == FormatStyle::BS_GNU)
+    if (Style.BraceWrapping.AfterObjCDeclaration)
       addUnwrappedLine();
     parseBlock(/*MustBeDeclaration=*/true);
   }
@@ -1777,7 +1793,7 @@ void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
   }
 
   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
-                         Keywords.kw_var))
+                         Keywords.kw_let, Keywords.kw_var))
     return; // Fall through to parsing the corresponding structure.
 
   if (FormatTok->is(tok::l_brace)) {
diff --git a/lib/Format/UnwrappedLineParser.h b/lib/Format/UnwrappedLineParser.h
index c2fa02957685..a13c03f94086 100644
--- a/lib/Format/UnwrappedLineParser.h
+++ b/lib/Format/UnwrappedLineParser.h
@@ -96,7 +96,7 @@ private:
   void parseNamespace();
   void parseNew();
   void parseAccessSpecifier();
-  void parseEnum();
+  bool parseEnum();
   void parseJavaEnumBody();
   void parseRecord();
   void parseObjCProtocolList();
diff --git a/lib/Format/WhitespaceManager.cpp b/lib/Format/WhitespaceManager.cpp
index 65395277f89a..725f05bcd8fc 100644
--- a/lib/Format/WhitespaceManager.cpp
+++ b/lib/Format/WhitespaceManager.cpp
@@ -26,16 +26,18 @@ operator()(const Change &C1, const Change &C2) const {
 }
 
 WhitespaceManager::Change::Change(
-    bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
+    bool CreateReplacement, SourceRange OriginalWhitespaceRange,
     unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
     unsigned NewlinesBefore, StringRef PreviousLinePostfix,
-    StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective)
+    StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective,
+    bool IsStartOfDeclName)
     : CreateReplacement(CreateReplacement),
       OriginalWhitespaceRange(OriginalWhitespaceRange),
       StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
       PreviousLinePostfix(PreviousLinePostfix),
       CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
-      ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel),
+      ContinuesPPDirective(ContinuesPPDirective),
+      IsStartOfDeclName(IsStartOfDeclName), IndentLevel(IndentLevel),
       Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
       PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
       StartOfBlockComment(nullptr), IndentationOffset(0) {}
@@ -52,19 +54,21 @@ void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
   if (Tok.Finalized)
     return;
   Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
-  Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces,
-                           StartOfTokenColumn, Newlines, "", "",
-                           Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
+  Changes.push_back(
+      Change(true, Tok.WhitespaceRange, IndentLevel, Spaces, StartOfTokenColumn,
+             Newlines, "", "", Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
+             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
 }
 
 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
                                             bool InPPDirective) {
   if (Tok.Finalized)
     return;
-  Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
-                           /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore,
-                           "", "", Tok.Tok.getKind(),
-                           InPPDirective && !Tok.IsFirst));
+  Changes.push_back(
+      Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
+             /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
+             Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst,
+             Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
 }
 
 void WhitespaceManager::replaceWhitespaceInToken(
@@ -84,7 +88,8 @@ void WhitespaceManager::replaceWhitespaceInToken(
       // calculate the new length of the comment and to calculate the changes
       // for which to do the alignment when aligning comments.
       Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
-      InPPDirective && !Tok.IsFirst));
+      InPPDirective && !Tok.IsFirst,
+      Tok.is(TT_StartOfName) || Tok.is(TT_FunctionDeclarationName)));
 }
 
 const tooling::Replacements &WhitespaceManager::generateReplacements() {
@@ -93,6 +98,7 @@ const tooling::Replacements &WhitespaceManager::generateReplacements() {
 
   std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
   calculateLineBreakInformation();
+  alignConsecutiveDeclarations();
   alignConsecutiveAssignments();
   alignTrailingComments();
   alignEscapedNewlines();
@@ -142,94 +148,183 @@ void WhitespaceManager::calculateLineBreakInformation() {
   }
 }
 
-// Walk through all of the changes and find sequences of "=" to align.  To do
-// so, keep track of the lines and whether or not an "=" was found on align. If
-// a "=" is found on a line, extend the current sequence. If the current line
-// cannot be part of a sequence, e.g. because there is an empty line before it
-// or it contains non-assignments, finalize the previous sequence.
-void WhitespaceManager::alignConsecutiveAssignments() {
-  if (!Style.AlignConsecutiveAssignments)
-    return;
+// Align a single sequence of tokens, see AlignTokens below.
+template <typename F>
+static void
+AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
+                   SmallVector<WhitespaceManager::Change, 16> &Changes) {
+  bool FoundMatchOnLine = false;
+  int Shift = 0;
+  for (unsigned i = Start; i != End; ++i) {
+    if (Changes[i].NewlinesBefore > 0) {
+      FoundMatchOnLine = false;
+      Shift = 0;
+    }
 
+    // If this is the first matching token to be aligned, remember by how many
+    // spaces it has to be shifted, so the rest of the changes on the line are
+    // shifted by the same amount
+    if (!FoundMatchOnLine && Matches(Changes[i])) {
+      FoundMatchOnLine = true;
+      Shift = Column - Changes[i].StartOfTokenColumn;
+      Changes[i].Spaces += Shift;
+    }
+
+    assert(Shift >= 0);
+    Changes[i].StartOfTokenColumn += Shift;
+    if (i + 1 != Changes.size())
+      Changes[i + 1].PreviousEndOfTokenColumn += Shift;
+  }
+}
+
+// Walk through all of the changes and find sequences of matching tokens to
+// align. To do so, keep track of the lines and whether or not a matching token
+// was found on a line. If a matching token is found, extend the current
+// sequence. If the current line cannot be part of a sequence, e.g. because
+// there is an empty line before it or it contains only non-matching tokens,
+// finalize the previous sequence.
+template <typename F>
+static void AlignTokens(const FormatStyle &Style, F &&Matches,
+                        SmallVector<WhitespaceManager::Change, 16> &Changes) {
   unsigned MinColumn = 0;
+  unsigned MaxColumn = UINT_MAX;
+
+  // Line number of the start and the end of the current token sequence.
   unsigned StartOfSequence = 0;
   unsigned EndOfSequence = 0;
-  bool FoundAssignmentOnLine = false;
-  bool FoundLeftParenOnLine = false;
-  unsigned CurrentLine = 0;
 
-  auto AlignSequence = [&] {
-    alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn);
+  // Keep track of the nesting level of matching tokens, i.e. the number of
+  // surrounding (), [], or {}. We will only align a sequence of matching
+  // token that share the same scope depth.
+  //
+  // FIXME: This could use FormatToken::NestingLevel information, but there is
+  // an outstanding issue wrt the brace scopes.
+  unsigned NestingLevelOfLastMatch = 0;
+  unsigned NestingLevel = 0;
+
+  // Keep track of the number of commas before the matching tokens, we will only
+  // align a sequence of matching tokens if they are preceded by the same number
+  // of commas.
+  unsigned CommasBeforeLastMatch = 0;
+  unsigned CommasBeforeMatch = 0;
+
+  // Whether a matching token has been found on the current line.
+  bool FoundMatchOnLine = false;
+
+  // Aligns a sequence of matching tokens, on the MinColumn column.
+  //
+  // Sequences start from the first matching token to align, and end at the
+  // first token of the first line that doesn't need to be aligned.
+  //
+  // We need to adjust the StartOfTokenColumn of each Change that is on a line
+  // containing any matching token to be aligned and located after such token.
+  auto AlignCurrentSequence = [&] {
+    if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
+      AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
+                         Changes);
     MinColumn = 0;
+    MaxColumn = UINT_MAX;
     StartOfSequence = 0;
     EndOfSequence = 0;
   };
 
   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
     if (Changes[i].NewlinesBefore != 0) {
-      CurrentLine += Changes[i].NewlinesBefore;
-      if (StartOfSequence > 0 &&
-          (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine)) {
-        EndOfSequence = i;
-        AlignSequence();
-      }
-      FoundAssignmentOnLine = false;
-      FoundLeftParenOnLine = false;
-    }
-
-    if ((Changes[i].Kind == tok::equal &&
-         (FoundAssignmentOnLine || ((Changes[i].NewlinesBefore > 0 ||
-                                     Changes[i + 1].NewlinesBefore > 0)))) ||
-        (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren)) {
-      if (StartOfSequence > 0)
-        AlignSequence();
-    } else if (Changes[i].Kind == tok::l_paren) {
-      FoundLeftParenOnLine = true;
-      if (!FoundAssignmentOnLine && StartOfSequence > 0)
-        AlignSequence();
-    } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine &&
-               Changes[i].Kind == tok::equal) {
-      FoundAssignmentOnLine = true;
+      CommasBeforeMatch = 0;
       EndOfSequence = i;
-      if (StartOfSequence == 0)
-        StartOfSequence = i;
+      // If there is a blank line, or if the last line didn't contain any
+      // matching token, the sequence ends here.
+      if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
+        AlignCurrentSequence();
 
-      unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
-      MinColumn = std::max(MinColumn, ChangeMinColumn);
+      FoundMatchOnLine = false;
     }
+
+    if (Changes[i].Kind == tok::comma) {
+      ++CommasBeforeMatch;
+    } else if (Changes[i].Kind == tok::r_brace ||
+               Changes[i].Kind == tok::r_paren ||
+               Changes[i].Kind == tok::r_square) {
+      --NestingLevel;
+    } else if (Changes[i].Kind == tok::l_brace ||
+               Changes[i].Kind == tok::l_paren ||
+               Changes[i].Kind == tok::l_square) {
+      // We want sequences to skip over child scopes if possible, but not the
+      // other way around.
+      NestingLevelOfLastMatch = std::min(NestingLevelOfLastMatch, NestingLevel);
+      ++NestingLevel;
+    }
+
+    if (!Matches(Changes[i]))
+      continue;
+
+    // If there is more than one matching token per line, or if the number of
+    // preceding commas, or the scope depth, do not match anymore, end the
+    // sequence.
+    if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch ||
+        NestingLevel != NestingLevelOfLastMatch)
+      AlignCurrentSequence();
+
+    CommasBeforeLastMatch = CommasBeforeMatch;
+    NestingLevelOfLastMatch = NestingLevel;
+    FoundMatchOnLine = true;
+
+    if (StartOfSequence == 0)
+      StartOfSequence = i;
+
+    unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
+    int LineLengthAfter = -Changes[i].Spaces;
+    for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
+      LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
+    unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
+
+    // If we are restricted by the maximum column width, end the sequence.
+    if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
+        CommasBeforeLastMatch != CommasBeforeMatch) {
+      AlignCurrentSequence();
+      StartOfSequence = i;
+    }
+
+    MinColumn = std::max(MinColumn, ChangeMinColumn);
+    MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
   }
 
-  if (StartOfSequence > 0) {
-    EndOfSequence = Changes.size();
-    AlignSequence();
-  }
+  EndOfSequence = Changes.size();
+  AlignCurrentSequence();
 }
 
-void WhitespaceManager::alignConsecutiveAssignments(unsigned Start,
-                                                    unsigned End,
-                                                    unsigned Column) {
-  bool AlignedAssignment = false;
-  int PreviousShift = 0;
-  for (unsigned i = Start; i != End; ++i) {
-    int Shift = 0;
-    if (Changes[i].NewlinesBefore > 0)
-      AlignedAssignment = false;
-    if (!AlignedAssignment && Changes[i].Kind == tok::equal) {
-      Shift = Column - Changes[i].StartOfTokenColumn;
-      AlignedAssignment = true;
-      PreviousShift = Shift;
-    }
-    assert(Shift >= 0);
-    Changes[i].Spaces += Shift;
-    if (i + 1 != Changes.size())
-      Changes[i + 1].PreviousEndOfTokenColumn += Shift;
-    Changes[i].StartOfTokenColumn += Shift;
-    if (AlignedAssignment) {
-      Changes[i].StartOfTokenColumn += PreviousShift;
-      if (i + 1 != Changes.size())
-        Changes[i + 1].PreviousEndOfTokenColumn += PreviousShift;
-    }
-  }
+void WhitespaceManager::alignConsecutiveAssignments() {
+  if (!Style.AlignConsecutiveAssignments)
+    return;
+
+  AlignTokens(Style,
+              [&](const Change &C) {
+                // Do not align on equal signs that are first on a line.
+                if (C.NewlinesBefore > 0)
+                  return false;
+
+                // Do not align on equal signs that are last on a line.
+                if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
+                  return false;
+
+                return C.Kind == tok::equal;
+              },
+              Changes);
+}
+
+void WhitespaceManager::alignConsecutiveDeclarations() {
+  if (!Style.AlignConsecutiveDeclarations)
+    return;
+
+  // FIXME: Currently we don't handle properly the PointerAlignment: Right
+  // The * and & are not aligned and are left dangling. Something has to be done
+  // about it, but it raises the question of alignment of code like:
+  //   const char* const* v1;
+  //   float const* v2;
+  //   SomeVeryLongType const& v3;
+
+  AlignTokens(Style, [](Change const &C) { return C.IsStartOfDeclName; },
+              Changes);
 }
 
 void WhitespaceManager::alignTrailingComments() {
@@ -377,7 +472,7 @@ void WhitespaceManager::generateChanges() {
   }
 }
 
-void WhitespaceManager::storeReplacement(const SourceRange &Range,
+void WhitespaceManager::storeReplacement(SourceRange Range,
                                          StringRef Text) {
   unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
                               SourceMgr.getFileOffset(Range.getBegin());
diff --git a/lib/Format/WhitespaceManager.h b/lib/Format/WhitespaceManager.h
index d97383832981..f83971b4add6 100644
--- a/lib/Format/WhitespaceManager.h
+++ b/lib/Format/WhitespaceManager.h
@@ -81,7 +81,6 @@ public:
   /// \brief Returns all the \c Replacements created during formatting.
   const tooling::Replacements &generateReplacements();
 
-private:
   /// \brief Represents a change before a token, a break inside a token,
   /// or the layout of an unchanged token (or whitespace within).
   struct Change {
@@ -106,11 +105,11 @@ private:
     ///
     /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
     /// trailing comments and escaped newlines.
-    Change(bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
+    Change(bool CreateReplacement, SourceRange OriginalWhitespaceRange,
            unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
            unsigned NewlinesBefore, StringRef PreviousLinePostfix,
            StringRef CurrentLinePrefix, tok::TokenKind Kind,
-           bool ContinuesPPDirective);
+           bool ContinuesPPDirective, bool IsStartOfDeclName);
 
     bool CreateReplacement;
     // Changes might be in the middle of a token, so we cannot just keep the
@@ -126,6 +125,7 @@ private:
     // the \c BreakableToken is still doing its own alignment.
     tok::TokenKind Kind;
     bool ContinuesPPDirective;
+    bool IsStartOfDeclName;
 
     // The number of nested blocks the token is in. This is used to add tabs
     // only for the indentation, and not for alignment, when
@@ -159,6 +159,7 @@ private:
     int IndentationOffset;
   };
 
+private:
   /// \brief Calculate \c IsTrailingComment, \c TokenLength for the last tokens
   /// or token parts in a line and \c PreviousEndOfTokenColumn and
   /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
@@ -167,11 +168,8 @@ private:
   /// \brief Align consecutive assignments over all \c Changes.
   void alignConsecutiveAssignments();
 
-  /// \brief Align consecutive assignments from change \p Start to change \p End
-  /// at
-  /// the specified \p Column.
-  void alignConsecutiveAssignments(unsigned Start, unsigned End,
-                                   unsigned Column);
+  /// \brief Align consecutive declarations over all \c Changes.
+  void alignConsecutiveDeclarations();
 
   /// \brief Align trailing comments over all \c Changes.
   void alignTrailingComments();
@@ -191,7 +189,7 @@ private:
   void generateChanges();
 
   /// \brief Stores \p Text as the replacement for the whitespace in \p Range.
-  void storeReplacement(const SourceRange &Range, StringRef Text);
+  void storeReplacement(SourceRange Range, StringRef Text);
   void appendNewlineText(std::string &Text, unsigned Newlines);
   void appendNewlineText(std::string &Text, unsigned Newlines,
                          unsigned PreviousEndOfTokenColumn,
diff --git a/lib/Frontend/ASTMerge.cpp b/lib/Frontend/ASTMerge.cpp
index 762c7a5da5e6..b499fa2b0e68 100644
--- a/lib/Frontend/ASTMerge.cpp
+++ b/lib/Frontend/ASTMerge.cpp
@@ -59,7 +59,6 @@ void ASTMergeAction::ExecuteAction() {
                          /*MinimalImport=*/false);
 
     TranslationUnitDecl *TU = Unit->getASTContext().getTranslationUnitDecl();
-    CI.getASTConsumer().Initialize(CI.getASTContext());
     for (auto *D : TU->decls()) {
       // Don't re-import __va_list_tag, __builtin_va_list.
       if (const auto *ND = dyn_cast<NamedDecl>(D))
diff --git a/lib/Frontend/ASTUnit.cpp b/lib/Frontend/ASTUnit.cpp
index 1bb5c3ff279d..e6ba29201f85 100644
--- a/lib/Frontend/ASTUnit.cpp
+++ b/lib/Frontend/ASTUnit.cpp
@@ -1,4 +1,4 @@
-//===--- ASTUnit.cpp - ASTUnit utility ------------------------------------===//
+//===--- ASTUnit.cpp - ASTUnit utility --------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -47,6 +47,7 @@
 #include <atomic>
 #include <cstdio>
 #include <cstdlib>
+
 using namespace clang;
 
 using llvm::TimeRecord;
@@ -185,7 +186,7 @@ struct ASTUnit::ASTWriterData {
   llvm::BitstreamWriter Stream;
   ASTWriter Writer;
 
-  ASTWriterData() : Stream(Buffer), Writer(Stream) { }
+  ASTWriterData() : Stream(Buffer), Writer(Stream, { }) { }
 };
 
 void ASTUnit::clearFileLevelDecls() {
@@ -649,12 +650,12 @@ void ASTUnit::ConfigureDiags(IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
 }
 
 std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
-    const std::string &Filename,
-    const PCHContainerReader &PCHContainerRdr,
+    const std::string &Filename, const PCHContainerReader &PCHContainerRdr,
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags,
-    const FileSystemOptions &FileSystemOpts, bool OnlyLocalDecls,
-    ArrayRef<RemappedFile> RemappedFiles, bool CaptureDiagnostics,
-    bool AllowPCHWithCompilerErrors, bool UserFilesAreVolatile) {
+    const FileSystemOptions &FileSystemOpts, bool UseDebugInfo,
+    bool OnlyLocalDecls, ArrayRef<RemappedFile> RemappedFiles,
+    bool CaptureDiagnostics, bool AllowPCHWithCompilerErrors,
+    bool UserFilesAreVolatile) {
   std::unique_ptr<ASTUnit> AST(new ASTUnit(true));
 
   // Recover resources if we crash before exiting this method.
@@ -708,7 +709,7 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromASTFile(
   bool disableValid = false;
   if (::getenv("LIBCLANG_DISABLE_PCH_VALIDATION"))
     disableValid = true;
-  AST->Reader = new ASTReader(PP, Context, PCHContainerRdr,
+  AST->Reader = new ASTReader(PP, Context, PCHContainerRdr, { },
                               /*isysroot=*/"",
                               /*DisableValidation=*/disableValid,
                               AllowPCHWithCompilerErrors);
@@ -926,6 +927,7 @@ public:
                              const Preprocessor &PP, StringRef isysroot,
                              raw_ostream *Out)
       : PCHGenerator(PP, "", nullptr, isysroot, std::make_shared<PCHBuffer>(),
+                     ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>>(),
                      /*AllowASTWithErrors=*/true),
         Unit(Unit), Hash(Unit.getCurrentTopLevelHashValue()), Action(Action),
         Out(Out) {
@@ -973,7 +975,7 @@ public:
   }
 };
 
-}
+} // anonymous namespace
 
 std::unique_ptr<ASTConsumer>
 PrecompilePreambleAction::CreateASTConsumer(CompilerInstance &CI,
@@ -1076,11 +1078,10 @@ bool ASTUnit::Parse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
   // Configure the various subsystems.
   LangOpts = Clang->getInvocation().LangOpts;
   FileSystemOpts = Clang->getFileSystemOpts();
-  IntrusiveRefCntPtr<vfs::FileSystem> VFS =
-      createVFSFromCompilerInvocation(Clang->getInvocation(), getDiagnostics());
-  if (!VFS)
-    return true;
-  FileMgr = new FileManager(FileSystemOpts, VFS);
+  if (!FileMgr) {
+    Clang->createFileManager();
+    FileMgr = &Clang->getFileManager();
+  }
   SourceMgr = new SourceManager(getDiagnostics(), *FileMgr,
                                 UserFilesAreVolatile);
   TheSema.reset();
@@ -1724,9 +1725,10 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags, ASTFrontendAction *Action,
     ASTUnit *Unit, bool Persistent, StringRef ResourceFilesPath,
-    bool OnlyLocalDecls, bool CaptureDiagnostics, bool PrecompilePreamble,
-    bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion,
-    bool UserFilesAreVolatile, std::unique_ptr<ASTUnit> *ErrAST) {
+    bool OnlyLocalDecls, bool CaptureDiagnostics,
+    unsigned PrecompilePreambleAfterNParses, bool CacheCodeCompletionResults,
+    bool IncludeBriefCommentsInCodeCompletion, bool UserFilesAreVolatile,
+    std::unique_ptr<ASTUnit> *ErrAST) {
   assert(CI && "A CompilerInvocation is required");
 
   std::unique_ptr<ASTUnit> OwnAST;
@@ -1745,8 +1747,8 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
   }
   AST->OnlyLocalDecls = OnlyLocalDecls;
   AST->CaptureDiagnostics = CaptureDiagnostics;
-  if (PrecompilePreamble)
-    AST->PreambleRebuildCounter = 2;
+  if (PrecompilePreambleAfterNParses > 0)
+    AST->PreambleRebuildCounter = PrecompilePreambleAfterNParses;
   AST->TUKind = Action ? Action->getTranslationUnitKind() : TU_Complete;
   AST->ShouldCacheCodeCompletionResults = CacheCodeCompletionResults;
   AST->IncludeBriefCommentsInCodeCompletion
@@ -1863,7 +1865,7 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction(
 
 bool ASTUnit::LoadFromCompilerInvocation(
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-    bool PrecompilePreamble) {
+    unsigned PrecompilePreambleAfterNParses) {
   if (!Invocation)
     return true;
   
@@ -1873,8 +1875,8 @@ bool ASTUnit::LoadFromCompilerInvocation(
   ProcessWarningOptions(getDiagnostics(), Invocation->getDiagnosticOpts());
 
   std::unique_ptr<llvm::MemoryBuffer> OverrideMainBuffer;
-  if (PrecompilePreamble) {
-    PreambleRebuildCounter = 2;
+  if (PrecompilePreambleAfterNParses > 0) {
+    PreambleRebuildCounter = PrecompilePreambleAfterNParses;
     OverrideMainBuffer =
         getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
   }
@@ -1892,10 +1894,11 @@ bool ASTUnit::LoadFromCompilerInvocation(
 std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
     CompilerInvocation *CI,
     std::shared_ptr<PCHContainerOperations> PCHContainerOps,
-    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, bool OnlyLocalDecls,
-    bool CaptureDiagnostics, bool PrecompilePreamble,
-    TranslationUnitKind TUKind, bool CacheCodeCompletionResults,
-    bool IncludeBriefCommentsInCodeCompletion, bool UserFilesAreVolatile) {
+    IntrusiveRefCntPtr<DiagnosticsEngine> Diags, FileManager *FileMgr,
+    bool OnlyLocalDecls, bool CaptureDiagnostics,
+    unsigned PrecompilePreambleAfterNParses, TranslationUnitKind TUKind,
+    bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion,
+    bool UserFilesAreVolatile) {
   // Create the AST unit.
   std::unique_ptr<ASTUnit> AST(new ASTUnit(false));
   ConfigureDiags(Diags, *AST, CaptureDiagnostics);
@@ -1907,12 +1910,8 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
   AST->IncludeBriefCommentsInCodeCompletion
     = IncludeBriefCommentsInCodeCompletion;
   AST->Invocation = CI;
-  AST->FileSystemOpts = CI->getFileSystemOpts();
-  IntrusiveRefCntPtr<vfs::FileSystem> VFS =
-      createVFSFromCompilerInvocation(*CI, *Diags);
-  if (!VFS)
-    return nullptr;
-  AST->FileMgr = new FileManager(AST->FileSystemOpts, VFS);
+  AST->FileSystemOpts = FileMgr->getFileSystemOpts();
+  AST->FileMgr = FileMgr;
   AST->UserFilesAreVolatile = UserFilesAreVolatile;
   
   // Recover resources if we crash before exiting this method.
@@ -1922,7 +1921,8 @@ std::unique_ptr<ASTUnit> ASTUnit::LoadFromCompilerInvocation(
     llvm::CrashRecoveryContextReleaseRefCleanup<DiagnosticsEngine> >
     DiagCleanup(Diags.get());
 
-  if (AST->LoadFromCompilerInvocation(PCHContainerOps, PrecompilePreamble))
+  if (AST->LoadFromCompilerInvocation(PCHContainerOps,
+                                      PrecompilePreambleAfterNParses))
     return nullptr;
   return AST;
 }
@@ -1933,11 +1933,11 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags, StringRef ResourceFilesPath,
     bool OnlyLocalDecls, bool CaptureDiagnostics,
     ArrayRef<RemappedFile> RemappedFiles, bool RemappedFilesKeepOriginalName,
-    bool PrecompilePreamble, TranslationUnitKind TUKind,
+    unsigned PrecompilePreambleAfterNParses, TranslationUnitKind TUKind,
     bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion,
     bool AllowPCHWithCompilerErrors, bool SkipFunctionBodies,
     bool UserFilesAreVolatile, bool ForSerialization,
-    std::unique_ptr<ASTUnit> *ErrAST) {
+    llvm::Optional<StringRef> ModuleFormat, std::unique_ptr<ASTUnit> *ErrAST) {
   assert(Diags.get() && "no DiagnosticsEngine was provided");
 
   SmallVector<StoredDiagnostic, 4> StoredDiagnostics;
@@ -1970,6 +1970,9 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
 
   CI->getFrontendOpts().SkipFunctionBodies = SkipFunctionBodies;
 
+  if (ModuleFormat)
+    CI->getHeaderSearchOpts().ModuleFormat = ModuleFormat.getValue();
+
   // Create the AST unit.
   std::unique_ptr<ASTUnit> AST;
   AST.reset(new ASTUnit(false));
@@ -2001,7 +2004,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine(
   llvm::CrashRecoveryContextCleanupRegistrar<ASTUnit>
     ASTUnitCleanup(AST.get());
 
-  if (AST->LoadFromCompilerInvocation(PCHContainerOps, PrecompilePreamble)) {
+  if (AST->LoadFromCompilerInvocation(PCHContainerOps,
+                                      PrecompilePreambleAfterNParses)) {
     // Some error occurred, if caller wants to examine diagnostics, pass it the
     // ASTUnit.
     if (ErrAST) {
@@ -2043,6 +2047,7 @@ bool ASTUnit::Reparse(std::shared_ptr<PCHContainerOperations> PCHContainerOps,
         getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation);
 
   // Clear out the diagnostics state.
+  FileMgr.reset();
   getDiagnostics().Reset();
   ProcessWarningOptions(getDiagnostics(), Invocation->getDiagnosticOpts());
   if (OverrideMainBuffer)
@@ -2124,7 +2129,7 @@ namespace {
       return Next.getCodeCompletionTUInfo();
     }
   };
-}
+} // anonymous namespace
 
 /// \brief Helper function that computes which global names are hidden by the
 /// local code-completion results.
@@ -2210,7 +2215,6 @@ static void CalculateHiddenNames(const CodeCompletionContext &Context,
   }
 }
 
-
 void AugmentedCodeCompleteConsumer::ProcessCodeCompleteResults(Sema &S,
                                             CodeCompletionContext Context,
                                             CodeCompletionResult *Results,
@@ -2504,7 +2508,7 @@ bool ASTUnit::serialize(raw_ostream &OS) {
 
   SmallString<128> Buffer;
   llvm::BitstreamWriter Stream(Buffer);
-  ASTWriter Writer(Stream);
+  ASTWriter Writer(Stream, { });
   return serializeUnit(Writer, Buffer, getSema(), hasErrors, OS);
 }
 
@@ -2782,39 +2786,29 @@ bool ASTUnit::visitLocalTopLevelDecls(void *context, DeclVisitorFn Fn) {
   return true;
 }
 
-namespace {
-struct PCHLocatorInfo {
-  serialization::ModuleFile *Mod;
-  PCHLocatorInfo() : Mod(nullptr) {}
-};
-}
-
-static bool PCHLocator(serialization::ModuleFile &M, void *UserData) {
-  PCHLocatorInfo &Info = *static_cast<PCHLocatorInfo*>(UserData);
-  switch (M.Kind) {
-  case serialization::MK_ImplicitModule:
-  case serialization::MK_ExplicitModule:
-    return true; // skip dependencies.
-  case serialization::MK_PCH:
-    Info.Mod = &M;
-    return true; // found it.
-  case serialization::MK_Preamble:
-    return false; // look in dependencies.
-  case serialization::MK_MainFile:
-    return false; // look in dependencies.
-  }
-
-  return true;
-}
-
 const FileEntry *ASTUnit::getPCHFile() {
   if (!Reader)
     return nullptr;
 
-  PCHLocatorInfo Info;
-  Reader->getModuleManager().visit(PCHLocator, &Info);
-  if (Info.Mod)
-    return Info.Mod->File;
+  serialization::ModuleFile *Mod = nullptr;
+  Reader->getModuleManager().visit([&Mod](serialization::ModuleFile &M) {
+    switch (M.Kind) {
+    case serialization::MK_ImplicitModule:
+    case serialization::MK_ExplicitModule:
+      return true; // skip dependencies.
+    case serialization::MK_PCH:
+      Mod = &M;
+      return true; // found it.
+    case serialization::MK_Preamble:
+      return false; // look in dependencies.
+    case serialization::MK_MainFile:
+      return false; // look in dependencies.
+    }
+
+    return true;
+  });
+  if (Mod)
+    return Mod->File;
 
   return nullptr;
 }
@@ -2854,9 +2848,9 @@ void ASTUnit::ConcurrencyState::finish() {
 
 #else // NDEBUG
 
-ASTUnit::ConcurrencyState::ConcurrencyState() { Mutex = 0; }
+ASTUnit::ConcurrencyState::ConcurrencyState() { Mutex = nullptr; }
 ASTUnit::ConcurrencyState::~ConcurrencyState() {}
 void ASTUnit::ConcurrencyState::start() {}
 void ASTUnit::ConcurrencyState::finish() {}
 
-#endif
+#endif // NDEBUG
diff --git a/lib/Frontend/CMakeLists.txt b/lib/Frontend/CMakeLists.txt
index 9a3e459640a5..476812046241 100644
--- a/lib/Frontend/CMakeLists.txt
+++ b/lib/Frontend/CMakeLists.txt
@@ -35,6 +35,7 @@ add_clang_library(clangFrontend
   PrintPreprocessedOutput.cpp
   SerializedDiagnosticPrinter.cpp
   SerializedDiagnosticReader.cpp
+  TestModuleFileExtension.cpp
   TextDiagnostic.cpp
   TextDiagnosticBuffer.cpp
   TextDiagnosticPrinter.cpp
@@ -42,6 +43,7 @@ add_clang_library(clangFrontend
 
   DEPENDS
   ClangDriverOptions
+  intrinsics_gen
 
   LINK_LIBS
   clangAST
diff --git a/lib/Frontend/CacheTokens.cpp b/lib/Frontend/CacheTokens.cpp
index 7d2a09cd7ca0..87f3d1725814 100644
--- a/lib/Frontend/CacheTokens.cpp
+++ b/lib/Frontend/CacheTokens.cpp
@@ -19,6 +19,7 @@
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
+#include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Preprocessor.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -105,7 +106,7 @@ public:
   }
 
   unsigned getRepresentationLength() const {
-    return Kind == IsNoExist ? 0 : 4 + 4 + 2 + 8 + 8;
+    return Kind == IsNoExist ? 0 : 4 * 8;
   }
 };
 
diff --git a/lib/Frontend/ChainedIncludesSource.cpp b/lib/Frontend/ChainedIncludesSource.cpp
index cc0504ba8b29..1c1081fbe08e 100644
--- a/lib/Frontend/ChainedIncludesSource.cpp
+++ b/lib/Frontend/ChainedIncludesSource.cpp
@@ -47,9 +47,9 @@ protected:
   CXXBaseSpecifier *GetExternalCXXBaseSpecifiers(uint64_t Offset) override;
   bool FindExternalVisibleDeclsByName(const DeclContext *DC,
                                       DeclarationName Name) override;
-  ExternalLoadResult
+  void
   FindExternalLexicalDecls(const DeclContext *DC,
-                           bool (*isKindWeWant)(Decl::Kind),
+                           llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
                            SmallVectorImpl<Decl *> &Result) override;
   void CompleteType(TagDecl *Tag) override;
   void CompleteType(ObjCInterfaceDecl *Class) override;
@@ -82,6 +82,7 @@ createASTReader(CompilerInstance &CI, StringRef pchFile,
   std::unique_ptr<ASTReader> Reader;
   Reader.reset(new ASTReader(PP, CI.getASTContext(),
                              CI.getPCHContainerReader(),
+                             /*Extensions=*/{ },
                              /*isysroot=*/"", /*DisableValidation=*/true));
   for (unsigned ti = 0; ti < bufNames.size(); ++ti) {
     StringRef sr(bufNames[ti]);
@@ -160,8 +161,10 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
     Clang->createASTContext();
 
     auto Buffer = std::make_shared<PCHBuffer>();
+    ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions;
     auto consumer = llvm::make_unique<PCHGenerator>(
-        Clang->getPreprocessor(), "-", nullptr, /*isysroot=*/"", Buffer);
+        Clang->getPreprocessor(), "-", nullptr, /*isysroot=*/"", Buffer,
+        Extensions);
     Clang->getASTContext().setASTMutationListener(
                                             consumer->GetASTMutationListener());
     Clang->setASTConsumer(std::move(consumer));
@@ -169,7 +172,7 @@ IntrusiveRefCntPtr<ExternalSemaSource> clang::createChainedIncludesSource(
 
     if (firstInclude) {
       Preprocessor &PP = Clang->getPreprocessor();
-      PP.getBuiltinInfo().InitializeBuiltins(PP.getIdentifierTable(),
+      PP.getBuiltinInfo().initializeBuiltins(PP.getIdentifierTable(),
                                              PP.getLangOpts());
     } else {
       assert(!SerialBufs.empty());
@@ -246,11 +249,10 @@ ChainedIncludesSource::FindExternalVisibleDeclsByName(const DeclContext *DC,
                                                       DeclarationName Name) {
   return getFinalReader().FindExternalVisibleDeclsByName(DC, Name);
 }
-ExternalLoadResult 
-ChainedIncludesSource::FindExternalLexicalDecls(const DeclContext *DC,
-                                      bool (*isKindWeWant)(Decl::Kind),
-                                      SmallVectorImpl<Decl*> &Result) {
-  return getFinalReader().FindExternalLexicalDecls(DC, isKindWeWant, Result);
+void ChainedIncludesSource::FindExternalLexicalDecls(
+    const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+    SmallVectorImpl<Decl *> &Result) {
+  return getFinalReader().FindExternalLexicalDecls(DC, IsKindWeWant, Result);
 }
 void ChainedIncludesSource::CompleteType(TagDecl *Tag) {
   return getFinalReader().CompleteType(Tag);
diff --git a/lib/Frontend/CompilerInstance.cpp b/lib/Frontend/CompilerInstance.cpp
index c33b150e3047..3edcf5d654b9 100644
--- a/lib/Frontend/CompilerInstance.cpp
+++ b/lib/Frontend/CompilerInstance.cpp
@@ -78,9 +78,8 @@ void CompilerInstance::setDiagnostics(DiagnosticsEngine *Value) {
   Diagnostics = Value;
 }
 
-void CompilerInstance::setTarget(TargetInfo *Value) {
-  Target = Value;
-}
+void CompilerInstance::setTarget(TargetInfo *Value) { Target = Value; }
+void CompilerInstance::setAuxTarget(TargetInfo *Value) { AuxTarget = Value; }
 
 void CompilerInstance::setFileManager(FileManager *Value) {
   FileMgr = Value;
@@ -96,7 +95,12 @@ void CompilerInstance::setSourceManager(SourceManager *Value) {
 
 void CompilerInstance::setPreprocessor(Preprocessor *Value) { PP = Value; }
 
-void CompilerInstance::setASTContext(ASTContext *Value) { Context = Value; }
+void CompilerInstance::setASTContext(ASTContext *Value) {
+  Context = Value;
+
+  if (Context && Consumer)
+    getASTConsumer().Initialize(getASTContext());
+}
 
 void CompilerInstance::setSema(Sema *S) {
   TheSema.reset(S);
@@ -104,6 +108,9 @@ void CompilerInstance::setSema(Sema *S) {
 
 void CompilerInstance::setASTConsumer(std::unique_ptr<ASTConsumer> Value) {
   Consumer = std::move(Value);
+
+  if (Context && Consumer)
+    getASTConsumer().Initialize(getASTContext());
 }
 
 void CompilerInstance::setCodeCompletionConsumer(CodeCompleteConsumer *Value) {
@@ -148,7 +155,6 @@ static void SetUpDiagnosticLog(DiagnosticOptions *DiagOpts,
           << DiagOpts->DiagnosticLogFile << EC.message();
     } else {
       FileOS->SetUnbuffered();
-      FileOS->SetUseAtomicWrites(true);
       OS = FileOS.get();
       StreamOwner = std::move(FileOS);
     }
@@ -304,7 +310,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
   PP = new Preprocessor(&getPreprocessorOpts(), getDiagnostics(), getLangOpts(),
                         getSourceManager(), *HeaderInfo, *this, PTHMgr,
                         /*OwnsHeaderSearch=*/true, TUKind);
-  PP->Initialize(getTarget());
+  PP->Initialize(getTarget(), getAuxTarget());
 
   // Note that this is different then passing PTHMgr to Preprocessor's ctor.
   // That argument is used as the IdentifierInfoLookup argument to
@@ -331,7 +337,7 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
 
   PP->setPreprocessedOutput(getPreprocessorOutputOpts().ShowCPP);
 
-  if (PP->getLangOpts().Modules)
+  if (PP->getLangOpts().Modules && PP->getLangOpts().ImplicitModules)
     PP->getHeaderSearchInfo().setModuleCachePath(getSpecificModuleCachePath());
 
   // Handle generating dependencies, if requested.
@@ -354,17 +360,19 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
 
   // Handle generating header include information, if requested.
   if (DepOpts.ShowHeaderIncludes)
-    AttachHeaderIncludeGen(*PP);
+    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps);
   if (!DepOpts.HeaderIncludeOutputFile.empty()) {
     StringRef OutputPath = DepOpts.HeaderIncludeOutputFile;
     if (OutputPath == "-")
       OutputPath = "";
-    AttachHeaderIncludeGen(*PP, /*ShowAllHeaders=*/true, OutputPath,
+    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps,
+                           /*ShowAllHeaders=*/true, OutputPath,
                            /*ShowDepth=*/false);
   }
 
   if (DepOpts.PrintShowIncludes) {
-    AttachHeaderIncludeGen(*PP, /*ShowAllHeaders=*/false, /*OutputPath=*/"",
+    AttachHeaderIncludeGen(*PP, DepOpts.ExtraDeps,
+                           /*ShowAllHeaders=*/false, /*OutputPath=*/"",
                            /*ShowDepth=*/true, /*MSStyle=*/true);
   }
 }
@@ -372,9 +380,8 @@ void CompilerInstance::createPreprocessor(TranslationUnitKind TUKind) {
 std::string CompilerInstance::getSpecificModuleCachePath() {
   // Set up the module path, including the hash for the
   // module-creation options.
-  SmallString<256> SpecificModuleCache(
-                           getHeaderSearchOpts().ModuleCachePath);
-  if (!getHeaderSearchOpts().DisableModuleHash)
+  SmallString<256> SpecificModuleCache(getHeaderSearchOpts().ModuleCachePath);
+  if (!SpecificModuleCache.empty() && !getHeaderSearchOpts().DisableModuleHash)
     llvm::sys::path::append(SpecificModuleCache,
                             getInvocation().getModuleHash());
   return SpecificModuleCache.str();
@@ -384,10 +391,11 @@ std::string CompilerInstance::getSpecificModuleCachePath() {
 
 void CompilerInstance::createASTContext() {
   Preprocessor &PP = getPreprocessor();
-  Context = new ASTContext(getLangOpts(), PP.getSourceManager(),
-                           PP.getIdentifierTable(), PP.getSelectorTable(),
-                           PP.getBuiltinInfo());
-  Context->InitBuiltinTypes(getTarget());
+  auto *Context = new ASTContext(getLangOpts(), PP.getSourceManager(),
+                                 PP.getIdentifierTable(), PP.getSelectorTable(),
+                                 PP.getBuiltinInfo());
+  Context->InitBuiltinTypes(getTarget(), getAuxTarget());
+  setASTContext(Context);
 }
 
 // ExternalASTSource
@@ -399,7 +407,9 @@ void CompilerInstance::createPCHExternalASTSource(
   ModuleManager = createPCHExternalASTSource(
       Path, getHeaderSearchOpts().Sysroot, DisablePCHValidation,
       AllowPCHWithCompilerErrors, getPreprocessor(), getASTContext(),
-      getPCHContainerReader(), DeserializationListener,
+      getPCHContainerReader(),
+      getFrontendOpts().ModuleFileExtensions,
+      DeserializationListener,
       OwnDeserializationListener, Preamble,
       getFrontendOpts().UseGlobalModuleIndex);
 }
@@ -408,15 +418,16 @@ IntrusiveRefCntPtr<ASTReader> CompilerInstance::createPCHExternalASTSource(
     StringRef Path, StringRef Sysroot, bool DisablePCHValidation,
     bool AllowPCHWithCompilerErrors, Preprocessor &PP, ASTContext &Context,
     const PCHContainerReader &PCHContainerRdr,
+    ArrayRef<IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
     void *DeserializationListener, bool OwnDeserializationListener,
     bool Preamble, bool UseGlobalModuleIndex) {
   HeaderSearchOptions &HSOpts = PP.getHeaderSearchInfo().getHeaderSearchOpts();
 
   IntrusiveRefCntPtr<ASTReader> Reader(new ASTReader(
-      PP, Context, PCHContainerRdr, Sysroot.empty() ? "" : Sysroot.data(),
-      DisablePCHValidation, AllowPCHWithCompilerErrors,
-      /*AllowConfigurationMismatch*/ false, HSOpts.ModulesValidateSystemHeaders,
-      UseGlobalModuleIndex));
+      PP, Context, PCHContainerRdr, Extensions,
+      Sysroot.empty() ? "" : Sysroot.data(), DisablePCHValidation,
+      AllowPCHWithCompilerErrors, /*AllowConfigurationMismatch*/ false,
+      HSOpts.ModulesValidateSystemHeaders, UseGlobalModuleIndex));
 
   // We need the external source to be set up before we read the AST, because
   // eagerly-deserialized declarations may use it.
@@ -631,8 +642,10 @@ std::unique_ptr<llvm::raw_pwrite_stream> CompilerInstance::createOutputFile(
       llvm::sys::fs::status(OutputPath, Status);
       if (llvm::sys::fs::exists(Status)) {
         // Fail early if we can't write to the final destination.
-        if (!llvm::sys::fs::can_write(OutputPath))
+        if (!llvm::sys::fs::can_write(OutputPath)) {
+          Error = make_error_code(llvm::errc::operation_not_permitted);
           return nullptr;
+        }
 
         // Don't use a temporary if the output is a special file. This handles
         // things like '-o /dev/null'
@@ -715,7 +728,7 @@ bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input,
   if (Input.isBuffer()) {
     SourceMgr.setMainFileID(SourceMgr.createFileID(
         std::unique_ptr<llvm::MemoryBuffer>(Input.getBuffer()), Kind));
-    assert(!SourceMgr.getMainFileID().isInvalid() &&
+    assert(SourceMgr.getMainFileID().isValid() &&
            "Couldn't establish MainFileID!");
     return true;
   }
@@ -766,7 +779,7 @@ bool CompilerInstance::InitializeSourceManager(const FrontendInputFile &Input,
     SourceMgr.overrideFileContents(File, std::move(SB));
   }
 
-  assert(!SourceMgr.getMainFileID().isInvalid() &&
+  assert(SourceMgr.getMainFileID().isValid() &&
          "Couldn't establish MainFileID!");
   return true;
 }
@@ -788,6 +801,13 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
   if (!hasTarget())
     return false;
 
+  // Create TargetInfo for the other side of CUDA compilation.
+  if (getLangOpts().CUDA && !getFrontendOpts().AuxTriple.empty()) {
+    std::shared_ptr<TargetOptions> TO(new TargetOptions);
+    TO->Triple = getFrontendOpts().AuxTriple;
+    setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
+  }
+
   // Inform the target of the language options.
   //
   // FIXME: We shouldn't need to do this, the target should be immutable once
@@ -810,13 +830,13 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
   if (getFrontendOpts().ShowStats)
     llvm::EnableStatistics();
 
-  for (unsigned i = 0, e = getFrontendOpts().Inputs.size(); i != e; ++i) {
+  for (const FrontendInputFile &FIF : getFrontendOpts().Inputs) {
     // Reset the ID tables if we are reusing the SourceManager and parsing
     // regular files.
     if (hasSourceManager() && !Act.isModelParsingAction())
       getSourceManager().clearIDTables();
 
-    if (Act.BeginSourceFile(*this, getFrontendOpts().Inputs[i])) {
+    if (Act.BeginSourceFile(*this, FIF)) {
       Act.Execute();
       Act.EndSourceFile();
     }
@@ -912,6 +932,7 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
   FrontendOpts.OutputFile = ModuleFileName.str();
   FrontendOpts.DisableFree = false;
   FrontendOpts.GenerateGlobalModuleIndex = false;
+  FrontendOpts.BuildingImplicitModule = true;
   FrontendOpts.Inputs.clear();
   InputKind IK = getSourceInputKindFromOptions(*Invocation->getLangOpts());
 
@@ -945,8 +966,10 @@ static bool compileModuleImpl(CompilerInstance &ImportingInstance,
     FullSourceLoc(ImportLoc, ImportingInstance.getSourceManager()));
 
   // If we're collecting module dependencies, we need to share a collector
-  // between all of the module CompilerInstances.
+  // between all of the module CompilerInstances. Other than that, we don't
+  // want to produce any dependency output from the module build.
   Instance.setModuleDepCollector(ImportingInstance.getModuleDepCollector());
+  Invocation->getDependencyOutputOpts() = DependencyOutputOptions();
 
   // Get or create the module map that we'll use to build this module.
   std::string InferredModuleMapContent;
@@ -1151,6 +1174,7 @@ static void pruneModuleCache(const HeaderSearchOptions &HSOpts) {
   struct stat StatBuf;
   llvm::SmallString<128> TimestampFile;
   TimestampFile = HSOpts.ModuleCachePath;
+  assert(!TimestampFile.empty());
   llvm::sys::path::append(TimestampFile, "modules.timestamp");
 
   // Try to stat() the timestamp file.
@@ -1229,8 +1253,8 @@ void CompilerInstance::createModuleManager() {
 
     // If we're implicitly building modules but not currently recursively
     // building a module, check whether we need to prune the module cache.
-    if (getLangOpts().ImplicitModules &&
-        getSourceManager().getModuleBuildStack().empty() &&
+    if (getSourceManager().getModuleBuildStack().empty() &&
+        !getPreprocessor().getHeaderSearchInfo().getModuleCachePath().empty() &&
         getHeaderSearchOpts().ModuleCachePruneInterval > 0 &&
         getHeaderSearchOpts().ModuleCachePruneAfter > 0) {
       pruneModuleCache(getHeaderSearchOpts());
@@ -1244,7 +1268,8 @@ void CompilerInstance::createModuleManager() {
       ReadTimer = llvm::make_unique<llvm::Timer>("Reading modules",
                                                  *FrontendTimerGroup);
     ModuleManager = new ASTReader(
-        getPreprocessor(), *Context, getPCHContainerReader(),
+        getPreprocessor(), getASTContext(), getPCHContainerReader(),
+        getFrontendOpts().ModuleFileExtensions,
         Sysroot.empty() ? "" : Sysroot.c_str(), PPOpts.DisablePCHValidation,
         /*AllowASTWithCompilerErrors=*/false,
         /*AllowConfigurationMismatch=*/false,
@@ -1262,6 +1287,13 @@ void CompilerInstance::createModuleManager() {
       ModuleManager->InitializeSema(getSema());
     if (hasASTConsumer())
       ModuleManager->StartTranslationUnit(&getASTConsumer());
+
+    if (TheDependencyFileGenerator)
+      TheDependencyFileGenerator->AttachToASTReader(*ModuleManager);
+    if (ModuleDepCollector)
+      ModuleDepCollector->attachToASTReader(*ModuleManager);
+    for (auto &Listener : DependencyCollectors)
+      Listener->attachToASTReader(*ModuleManager);
   }
 }
 
@@ -1276,87 +1308,68 @@ bool CompilerInstance::loadModuleFile(StringRef FileName) {
   // the files we were handed.
   struct ReadModuleNames : ASTReaderListener {
     CompilerInstance &CI;
-    std::vector<StringRef> ModuleFileStack;
-    std::vector<StringRef> ModuleNameStack;
-    bool Failed;
-    bool TopFileIsModule;
+    llvm::SmallVector<IdentifierInfo*, 8> LoadedModules;
 
-    ReadModuleNames(CompilerInstance &CI)
-        : CI(CI), Failed(false), TopFileIsModule(false) {}
-
-    bool needsImportVisitation() const override { return true; }
-
-    void visitImport(StringRef FileName) override {
-      if (!CI.ExplicitlyLoadedModuleFiles.insert(FileName).second) {
-        if (ModuleFileStack.size() == 0)
-          TopFileIsModule = true;
-        return;
-      }
-
-      ModuleFileStack.push_back(FileName);
-      ModuleNameStack.push_back(StringRef());
-      if (ASTReader::readASTFileControlBlock(FileName, CI.getFileManager(),
-                                             CI.getPCHContainerReader(),
-                                             *this)) {
-        CI.getDiagnostics().Report(
-            SourceLocation(), CI.getFileManager().getBufferForFile(FileName)
-                                  ? diag::err_module_file_invalid
-                                  : diag::err_module_file_not_found)
-            << FileName;
-        for (int I = ModuleFileStack.size() - 2; I >= 0; --I)
-          CI.getDiagnostics().Report(SourceLocation(),
-                                     diag::note_module_file_imported_by)
-              << ModuleFileStack[I]
-              << !ModuleNameStack[I].empty() << ModuleNameStack[I];
-        Failed = true;
-      }
-      ModuleNameStack.pop_back();
-      ModuleFileStack.pop_back();
-    }
+    ReadModuleNames(CompilerInstance &CI) : CI(CI) {}
 
     void ReadModuleName(StringRef ModuleName) override {
-      if (ModuleFileStack.size() == 1)
-        TopFileIsModule = true;
-      ModuleNameStack.back() = ModuleName;
-
-      auto &ModuleFile = CI.ModuleFileOverrides[ModuleName];
-      if (!ModuleFile.empty() &&
-          CI.getFileManager().getFile(ModuleFile) !=
-              CI.getFileManager().getFile(ModuleFileStack.back()))
-        CI.getDiagnostics().Report(SourceLocation(),
-                                   diag::err_conflicting_module_files)
-            << ModuleName << ModuleFile << ModuleFileStack.back();
-      ModuleFile = ModuleFileStack.back();
+      LoadedModules.push_back(
+          CI.getPreprocessor().getIdentifierInfo(ModuleName));
     }
-  } RMN(*this);
+
+    void registerAll() {
+      for (auto *II : LoadedModules) {
+        CI.KnownModules[II] = CI.getPreprocessor()
+                                  .getHeaderSearchInfo()
+                                  .getModuleMap()
+                                  .findModule(II->getName());
+      }
+      LoadedModules.clear();
+    }
+
+    void markAllUnavailable() {
+      for (auto *II : LoadedModules) {
+        if (Module *M = CI.getPreprocessor()
+                            .getHeaderSearchInfo()
+                            .getModuleMap()
+                            .findModule(II->getName()))
+          M->HasIncompatibleModuleFile = true;
+      }
+      LoadedModules.clear();
+    }
+  };
 
   // If we don't already have an ASTReader, create one now.
   if (!ModuleManager)
     createModuleManager();
 
-  // Tell the module manager about this module file.
-  if (getModuleManager()->getModuleManager().addKnownModuleFile(FileName)) {
-    getDiagnostics().Report(SourceLocation(), diag::err_module_file_not_found)
-      << FileName;
+  auto Listener = llvm::make_unique<ReadModuleNames>(*this);
+  auto &ListenerRef = *Listener;
+  ASTReader::ListenerScope ReadModuleNamesListener(*ModuleManager,
+                                                   std::move(Listener));
+
+  // Try to load the module file.
+  switch (ModuleManager->ReadAST(FileName, serialization::MK_ExplicitModule,
+                                 SourceLocation(),
+                                 ASTReader::ARR_ConfigurationMismatch)) {
+  case ASTReader::Success:
+    // We successfully loaded the module file; remember the set of provided
+    // modules so that we don't try to load implicit modules for them.
+    ListenerRef.registerAll();
+    return true;
+
+  case ASTReader::ConfigurationMismatch:
+    // Ignore unusable module files.
+    getDiagnostics().Report(SourceLocation(), diag::warn_module_config_mismatch)
+        << FileName;
+    // All modules provided by any files we tried and failed to load are now
+    // unavailable; includes of those modules should now be handled textually.
+    ListenerRef.markAllUnavailable();
+    return true;
+
+  default:
     return false;
   }
-
-  // Build our mapping of module names to module files from this file
-  // and its imports.
-  RMN.visitImport(FileName);
-
-  if (RMN.Failed)
-    return false;
-
-  // If we never found a module name for the top file, then it's not a module,
-  // it's a PCH or preamble or something.
-  if (!RMN.TopFileIsModule) {
-    getDiagnostics().Report(SourceLocation(), diag::err_module_file_not_module)
-      << FileName;
-    return false;
-  }
-
-  return true;
 }
 
 ModuleLoadResult
@@ -1371,7 +1384,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
   // If we've already handled this import, just return the cached result.
   // This one-element cache is important to eliminate redundant diagnostics
   // when both the preprocessor and parser see the same import declaration.
-  if (!ImportLoc.isInvalid() && LastModuleImportLoc == ImportLoc) {
+  if (ImportLoc.isValid() && LastModuleImportLoc == ImportLoc) {
     // Make the named module visible.
     if (LastModuleImportResult && ModuleName != getLangOpts().CurrentModule &&
         ModuleName != getLangOpts().ImplementationOfModule)
@@ -1404,56 +1417,40 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
       return ModuleLoadResult();
     }
 
-    auto Override = ModuleFileOverrides.find(ModuleName);
-    bool Explicit = Override != ModuleFileOverrides.end();
-    if (!Explicit && !getLangOpts().ImplicitModules) {
+    std::string ModuleFileName =
+        PP->getHeaderSearchInfo().getModuleFileName(Module);
+    if (ModuleFileName.empty()) {
+      if (Module->HasIncompatibleModuleFile) {
+        // We tried and failed to load a module file for this module. Fall
+        // back to textual inclusion for its headers.
+        return ModuleLoadResult(nullptr, /*missingExpected*/true);
+      }
+
       getDiagnostics().Report(ModuleNameLoc, diag::err_module_build_disabled)
           << ModuleName;
       ModuleBuildFailed = true;
       return ModuleLoadResult();
     }
 
-    std::string ModuleFileName =
-        Explicit ? Override->second
-                 : PP->getHeaderSearchInfo().getModuleFileName(Module);
-
     // If we don't already have an ASTReader, create one now.
     if (!ModuleManager)
       createModuleManager();
 
-    if (TheDependencyFileGenerator)
-      TheDependencyFileGenerator->AttachToASTReader(*ModuleManager);
-
-    if (ModuleDepCollector)
-      ModuleDepCollector->attachToASTReader(*ModuleManager);
-
-    for (auto &Listener : DependencyCollectors)
-      Listener->attachToASTReader(*ModuleManager);
-
     llvm::Timer Timer;
     if (FrontendTimerGroup)
       Timer.init("Loading " + ModuleFileName, *FrontendTimerGroup);
     llvm::TimeRegion TimeLoading(FrontendTimerGroup ? &Timer : nullptr);
 
     // Try to load the module file.
-    unsigned ARRFlags =
-        Explicit ? 0 : ASTReader::ARR_OutOfDate | ASTReader::ARR_Missing;
+    unsigned ARRFlags = ASTReader::ARR_OutOfDate | ASTReader::ARR_Missing;
     switch (ModuleManager->ReadAST(ModuleFileName,
-                                   Explicit ? serialization::MK_ExplicitModule
-                                            : serialization::MK_ImplicitModule,
+                                   serialization::MK_ImplicitModule,
                                    ImportLoc, ARRFlags)) {
     case ASTReader::Success:
       break;
 
     case ASTReader::OutOfDate:
     case ASTReader::Missing: {
-      if (Explicit) {
-        // ReadAST has already complained for us.
-        ModuleLoader::HadFatalFailure = true;
-        KnownModules[Path[0].first] = nullptr;
-        return ModuleLoadResult();
-      }
-
       // The module file is missing or out-of-date. Build it.
       assert(Module && "missing module file");
       // Check whether there is a cycle in the module graph.
@@ -1508,7 +1505,7 @@ CompilerInstance::loadModule(SourceLocation ImportLoc,
     case ASTReader::ConfigurationMismatch:
     case ASTReader::HadErrors:
       ModuleLoader::HadFatalFailure = true;
-      // FIXME: The ASTReader will already have complained, but can we showhorn
+      // FIXME: The ASTReader will already have complained, but can we shoehorn
       // that diagnostic information into a more useful form?
       KnownModules[Path[0].first] = nullptr;
       return ModuleLoadResult();
@@ -1652,6 +1649,8 @@ void CompilerInstance::makeModuleVisible(Module *Mod,
 
 GlobalModuleIndex *CompilerInstance::loadGlobalModuleIndex(
     SourceLocation TriggerLoc) {
+  if (getPreprocessor().getHeaderSearchInfo().getModuleCachePath().empty())
+    return nullptr;
   if (!ModuleManager)
     createModuleManager();
   // Can't do anything if we don't have the module manager.
@@ -1685,11 +1684,10 @@ GlobalModuleIndex *CompilerInstance::loadGlobalModuleIndex(
       if (!Entry) {
         SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
         Path.push_back(std::make_pair(
-				  getPreprocessor().getIdentifierInfo(TheModule->Name), TriggerLoc));
+            getPreprocessor().getIdentifierInfo(TheModule->Name), TriggerLoc));
         std::reverse(Path.begin(), Path.end());
-		    // Load a module as hidden.  This also adds it to the global index.
-        loadModule(TheModule->DefinitionLoc, Path,
-                                             Module::Hidden, false);
+        // Load a module as hidden.  This also adds it to the global index.
+        loadModule(TheModule->DefinitionLoc, Path, Module::Hidden, false);
         RecreateIndex = true;
       }
     }
diff --git a/lib/Frontend/CompilerInvocation.cpp b/lib/Frontend/CompilerInvocation.cpp
index fbeba09e1cf1..d3870424b6bb 100644
--- a/lib/Frontend/CompilerInvocation.cpp
+++ b/lib/Frontend/CompilerInvocation.cpp
@@ -1,4 +1,4 @@
-//===--- CompilerInvocation.cpp -------------------------------------------===//
+//===--- 
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,6 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "TestModuleFileExtension.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/Version.h"
@@ -19,12 +20,14 @@
 #include "clang/Frontend/Utils.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Linker/Linker.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/OptTable.h"
@@ -35,6 +38,7 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Target/TargetOptions.h"
 #include <atomic>
 #include <memory>
 #include <sys/stat.h>
@@ -53,7 +57,7 @@ CompilerInvocationBase::CompilerInvocationBase()
 
 CompilerInvocationBase::CompilerInvocationBase(const CompilerInvocationBase &X)
   : RefCountedBase<CompilerInvocation>(),
-    LangOpts(new LangOptions(*X.getLangOpts())), 
+    LangOpts(new LangOptions(*X.getLangOpts())),
     TargetOpts(new TargetOptions(X.getTargetOpts())),
     DiagnosticOpts(new DiagnosticOptions(X.getDiagnosticOpts())),
     HeaderSearchOpts(new HeaderSearchOptions(X.getHeaderSearchOpts())),
@@ -361,6 +365,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
                              const TargetOptions &TargetOpts) {
   using namespace options;
   bool Success = true;
+  llvm::Triple Triple = llvm::Triple(TargetOpts.Triple);
 
   unsigned OptimizationLevel = getOptimizationLevel(Args, IK, Diags);
   // TODO: This could be done in Driver
@@ -393,39 +398,36 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
   }
 
-  if (Args.hasArg(OPT_gline_tables_only)) {
-    Opts.setDebugInfo(CodeGenOptions::DebugLineTablesOnly);
-  } else if (Args.hasArg(OPT_g_Flag) || Args.hasArg(OPT_gdwarf_2) ||
-             Args.hasArg(OPT_gdwarf_3) || Args.hasArg(OPT_gdwarf_4)) {
-    bool Default = false;
-    // Until dtrace (via CTF) and LLDB can deal with distributed debug info,
-    // Darwin and FreeBSD default to standalone/full debug info.
-    if (llvm::Triple(TargetOpts.Triple).isOSDarwin() ||
-        llvm::Triple(TargetOpts.Triple).isOSFreeBSD())
-      Default = true;
-
-    if (Args.hasFlag(OPT_fstandalone_debug, OPT_fno_standalone_debug, Default))
-      Opts.setDebugInfo(CodeGenOptions::FullDebugInfo);
-    else
-      Opts.setDebugInfo(CodeGenOptions::LimitedDebugInfo);
+  if (Arg *A = Args.getLastArg(OPT_debug_info_kind_EQ)) {
+    Opts.setDebugInfo(
+        llvm::StringSwitch<CodeGenOptions::DebugInfoKind>(A->getValue())
+            .Case("line-tables-only", CodeGenOptions::DebugLineTablesOnly)
+            .Case("limited", CodeGenOptions::LimitedDebugInfo)
+            .Case("standalone", CodeGenOptions::FullDebugInfo));
   }
+  if (Arg *A = Args.getLastArg(OPT_debugger_tuning_EQ)) {
+    Opts.setDebuggerTuning(
+        llvm::StringSwitch<CodeGenOptions::DebuggerKind>(A->getValue())
+            .Case("gdb", CodeGenOptions::DebuggerKindGDB)
+            .Case("lldb", CodeGenOptions::DebuggerKindLLDB)
+            .Case("sce", CodeGenOptions::DebuggerKindSCE));
+  }
+  Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 0, Diags);
   Opts.DebugColumnInfo = Args.hasArg(OPT_dwarf_column_info);
+  Opts.EmitCodeView = Args.hasArg(OPT_gcodeview);
   Opts.SplitDwarfFile = Args.getLastArgValue(OPT_split_dwarf_file);
-  if (Args.hasArg(OPT_gdwarf_2))
-    Opts.DwarfVersion = 2;
-  else if (Args.hasArg(OPT_gdwarf_3))
-    Opts.DwarfVersion = 3;
-  else if (Args.hasArg(OPT_gdwarf_4))
-    Opts.DwarfVersion = 4;
-  else if (Opts.getDebugInfo() != CodeGenOptions::NoDebugInfo)
-    // Default Dwarf version is 4 if we are generating debug information.
-    Opts.DwarfVersion = 4;
+  Opts.DebugTypeExtRefs = Args.hasArg(OPT_dwarf_ext_refs);
+  Opts.DebugExplicitImport = Triple.isPS4CPU(); 
+
+  for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ))
+    Opts.DebugPrefixMap.insert(StringRef(Arg).split('='));
 
   if (const Arg *A =
           Args.getLastArg(OPT_emit_llvm_uselists, OPT_no_emit_llvm_uselists))
     Opts.EmitLLVMUseLists = A->getOption().getID() == OPT_emit_llvm_uselists;
 
   Opts.DisableLLVMOpts = Args.hasArg(OPT_disable_llvm_optzns);
+  Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes);
   Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone);
   Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables);
   Opts.UseRegisterSizedBitfieldAccess = Args.hasArg(
@@ -451,7 +453,8 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       Args.hasArg(OPT_fprofile_instr_generate_EQ);
   Opts.InstrProfileOutput = Args.getLastArgValue(OPT_fprofile_instr_generate_EQ);
   Opts.InstrProfileInput = Args.getLastArgValue(OPT_fprofile_instr_use_EQ);
-  Opts.CoverageMapping = Args.hasArg(OPT_fcoverage_mapping);
+  Opts.CoverageMapping =
+      Args.hasFlag(OPT_fcoverage_mapping, OPT_fno_coverage_mapping, false);
   Opts.DumpCoverageMapping = Args.hasArg(OPT_dump_coverage_mapping);
   Opts.AsmVerbose = Args.hasArg(OPT_masm_verbose);
   Opts.ObjCAutoRefCountExceptions = Args.hasArg(OPT_fobjc_arc_exceptions);
@@ -459,10 +462,25 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.CXXCtorDtorAliases = Args.hasArg(OPT_mconstructor_aliases);
   Opts.CodeModel = getCodeModel(Args, Diags);
   Opts.DebugPass = Args.getLastArgValue(OPT_mdebug_pass);
-  Opts.DisableFPElim = Args.hasArg(OPT_mdisable_fp_elim);
+  Opts.DisableFPElim =
+      (Args.hasArg(OPT_mdisable_fp_elim) || Args.hasArg(OPT_pg));
   Opts.DisableFree = Args.hasArg(OPT_disable_free);
   Opts.DisableTailCalls = Args.hasArg(OPT_mdisable_tail_calls);
   Opts.FloatABI = Args.getLastArgValue(OPT_mfloat_abi);
+  if (Arg *A = Args.getLastArg(OPT_meabi)) {
+    StringRef Value = A->getValue();
+    llvm::EABI EABIVersion = llvm::StringSwitch<llvm::EABI>(Value)
+                                 .Case("default", llvm::EABI::Default)
+                                 .Case("4", llvm::EABI::EABI4)
+                                 .Case("5", llvm::EABI::EABI5)
+                                 .Case("gnu", llvm::EABI::GNU)
+                                 .Default(llvm::EABI::Unknown);
+    if (EABIVersion == llvm::EABI::Unknown)
+      Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args)
+                                                << Value;
+    else
+      Opts.EABIVersion = Value;
+  }
   Opts.LessPreciseFPMAD = Args.hasArg(OPT_cl_mad_enable);
   Opts.LimitFloatPrecision = Args.getLastArgValue(OPT_mlimit_float_precision);
   Opts.NoInfsFPMath = (Args.hasArg(OPT_menable_no_infinities) ||
@@ -481,11 +499,14 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings);
   Opts.EnableSegmentedStacks = Args.hasArg(OPT_split_stacks);
   Opts.RelaxAll = Args.hasArg(OPT_mrelax_all);
+  Opts.IncrementalLinkerCompatible =
+      Args.hasArg(OPT_mincremental_linker_compatible);
   Opts.OmitLeafFramePointer = Args.hasArg(OPT_momit_leaf_frame_pointer);
   Opts.SaveTempLabels = Args.hasArg(OPT_msave_temp_labels);
   Opts.NoDwarfDirectoryAsm = Args.hasArg(OPT_fno_dwarf_directory_asm);
   Opts.SoftFloat = Args.hasArg(OPT_msoft_float);
   Opts.StrictEnums = Args.hasArg(OPT_fstrict_enums);
+  Opts.StrictVTablePointers = Args.hasArg(OPT_fstrict_vtable_pointers);
   Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
                       Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
                       Args.hasArg(OPT_cl_fast_relaxed_math);
@@ -508,7 +529,15 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
 
   Opts.MergeFunctions = Args.hasArg(OPT_fmerge_functions);
 
-  Opts.PrepareForLTO = Args.hasArg(OPT_flto);
+  Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
+  const Arg *A = Args.getLastArg(OPT_flto, OPT_flto_EQ);
+  Opts.EmitFunctionSummary = A && A->containsValue("thin");
+  if (Arg *A = Args.getLastArg(OPT_fthinlto_index_EQ)) {
+    if (IK != IK_LLVM_IR)
+      Diags.Report(diag::err_drv_argument_only_allowed_with)
+          << A->getAsString(Args) << "-x ir";
+    Opts.ThinLTOIndexFile = Args.getLastArgValue(OPT_fthinlto_index_EQ);
+  }
 
   Opts.MSVolatile = Args.hasArg(OPT_fms_volatile);
 
@@ -546,7 +575,13 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
   Opts.EmitOpenCLArgMetadata = Args.hasArg(OPT_cl_kernel_arg_info);
   Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
-  Opts.LinkBitcodeFile = Args.getLastArgValue(OPT_mlink_bitcode_file);
+  for (auto A : Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_cuda_bitcode)) {
+    unsigned LinkFlags = llvm::Linker::Flags::None;
+    if (A->getOption().matches(OPT_mlink_cuda_bitcode))
+      LinkFlags = llvm::Linker::Flags::LinkOnlyNeeded |
+                  llvm::Linker::Flags::InternalizeLinkedSymbols;
+    Opts.LinkBitcodeFiles.push_back(std::make_pair(LinkFlags, A->getValue()));
+  }
   Opts.SanitizeCoverageType =
       getLastArgIntValue(Args, OPT_fsanitize_coverage_type, 0, Diags);
   Opts.SanitizeCoverageIndirectCalls =
@@ -559,6 +594,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
       getLastArgIntValue(Args, OPT_fsanitize_memory_track_origins_EQ, 0, Diags);
   Opts.SanitizeMemoryUseAfterDtor =
       Args.hasArg(OPT_fsanitize_memory_use_after_dtor);
+  Opts.SanitizeCfiCrossDso = Args.hasArg(OPT_fsanitize_cfi_cross_dso);
   Opts.SSPBufferSize =
       getLastArgIntValue(Args, OPT_stack_protector_buffer_size, 8, Diags);
   Opts.StackRealignment = Args.hasArg(OPT_mstackrealign);
@@ -592,6 +628,9 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
     }
   }
 
+  Opts.EmulatedTLS =
+      Args.hasFlag(OPT_femulated_tls, OPT_fno_emulated_tls, false);
+
   if (Arg *A = Args.getLastArg(OPT_ftlsmodel_EQ)) {
     StringRef Name = A->getValue();
     unsigned Model = llvm::StringSwitch<unsigned>(Name)
@@ -695,6 +734,13 @@ static void ParseDependencyOutputArgs(DependencyOutputOptions &Opts,
       Args.getLastArgValue(OPT_module_dependency_dir);
   if (Args.hasArg(OPT_MV))
     Opts.OutputFormat = DependencyOutputFormat::NMake;
+  // Add sanitizer blacklists as extra dependencies.
+  // They won't be discovered by the regular preprocessor, so
+  // we let make / ninja to know about this implicit dependency.
+  Opts.ExtraDeps = Args.getAllArgValues(OPT_fdepfile_entry);
+  auto ModuleFiles = Args.getAllArgValues(OPT_fmodule_file);
+  Opts.ExtraDeps.insert(Opts.ExtraDeps.end(), ModuleFiles.begin(),
+                        ModuleFiles.end());
 }
 
 bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args,
@@ -822,6 +868,30 @@ static void ParseFileSystemArgs(FileSystemOptions &Opts, ArgList &Args) {
   Opts.WorkingDir = Args.getLastArgValue(OPT_working_directory);
 }
 
+/// Parse the argument to the -ftest-module-file-extension
+/// command-line argument.
+///
+/// \returns true on error, false on success.
+static bool parseTestModuleFileExtensionArg(StringRef Arg,
+                                            std::string &BlockName,
+                                            unsigned &MajorVersion,
+                                            unsigned &MinorVersion,
+                                            bool &Hashed,
+                                            std::string &UserInfo) {
+  SmallVector<StringRef, 5> Args;
+  Arg.split(Args, ':', 5);
+  if (Args.size() < 5)
+    return true;
+
+  BlockName = Args[0];
+  if (Args[1].getAsInteger(10, MajorVersion)) return true;
+  if (Args[2].getAsInteger(10, MinorVersion)) return true;
+  if (Args[3].getAsInteger(2, Hashed)) return true;
+  if (Args.size() > 4)
+    UserInfo = Args[4];
+  return false;
+}
+
 static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
                                    DiagnosticsEngine &Diags) {
   using namespace options;
@@ -914,6 +984,26 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
       if (A->getValue(0) == Opts.AddPluginActions[i])
         Opts.AddPluginArgs[i].emplace_back(A->getValue(1));
 
+  for (const std::string &Arg :
+         Args.getAllArgValues(OPT_ftest_module_file_extension_EQ)) {
+    std::string BlockName;
+    unsigned MajorVersion;
+    unsigned MinorVersion;
+    bool Hashed;
+    std::string UserInfo;
+    if (parseTestModuleFileExtensionArg(Arg, BlockName, MajorVersion,
+                                        MinorVersion, Hashed, UserInfo)) {
+      Diags.Report(diag::err_test_module_file_extension_format) << Arg;
+
+      continue;
+    }
+
+    // Add the testing module file extension.
+    Opts.ModuleFileExtensions.push_back(
+      new TestModuleFileExtension(BlockName, MajorVersion, MinorVersion,
+                                  Hashed, UserInfo));
+  }
+
   if (const Arg *A = Args.getLastArg(OPT_code_completion_at)) {
     Opts.CodeCompletionAt =
       ParsedSourceLocation::FromString(A->getValue());
@@ -943,6 +1033,8 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
   Opts.GenerateGlobalModuleIndex = Opts.UseGlobalModuleIndex;
   Opts.ModuleMapFiles = Args.getAllArgValues(OPT_fmodule_map_file);
   Opts.ModuleFiles = Args.getAllArgValues(OPT_fmodule_file);
+  Opts.ModulesEmbedFiles = Args.getAllArgValues(OPT_fmodules_embed_file_EQ);
+  Opts.ModulesEmbedAllFiles = Args.hasArg(OPT_fmodules_embed_all_files);
 
   Opts.CodeCompleteOpts.IncludeMacros
     = Args.hasArg(OPT_code_completion_macros);
@@ -955,6 +1047,9 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
 
   Opts.OverrideRecordLayoutsFile
     = Args.getLastArgValue(OPT_foverride_record_layout_EQ);
+  Opts.AuxTriple =
+      llvm::Triple::normalize(Args.getLastArgValue(OPT_aux_triple));
+
   if (const Arg *A = Args.getLastArg(OPT_arcmt_check,
                                      OPT_arcmt_modify,
                                      OPT_arcmt_migrate)) {
@@ -1076,13 +1171,11 @@ std::string CompilerInvocation::GetResourcesPath(const char *Argv0,
   // Compute the path to the resource directory.
   StringRef ClangResourceDir(CLANG_RESOURCE_DIR);
   SmallString<128> P(Dir);
-  if (ClangResourceDir != "") {
+  if (ClangResourceDir != "")
     llvm::sys::path::append(P, ClangResourceDir);
-  } else {
-    StringRef ClangLibdirSuffix(CLANG_LIBDIR_SUFFIX);
-    llvm::sys::path::append(P, "..", Twine("lib") + ClangLibdirSuffix, "clang",
-                            CLANG_VERSION_STRING);
-  }
+  else
+    llvm::sys::path::append(P, "..", Twine("lib") + CLANG_LIBDIR_SUFFIX,
+                            "clang", CLANG_VERSION_STRING);
 
   return P.str();
 }
@@ -1291,7 +1384,7 @@ static Visibility parseVisibility(Arg *arg, ArgList &args,
   StringRef value = arg->getValue();
   if (value == "default") {
     return DefaultVisibility;
-  } else if (value == "hidden") {
+  } else if (value == "hidden" || value == "internal") {
     return HiddenVisibility;
   } else if (value == "protected") {
     // FIXME: diagnose if target does not support protected visibility
@@ -1364,7 +1457,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     .Case("CL1.2", LangStandard::lang_opencl12)
     .Case("CL2.0", LangStandard::lang_opencl20)
     .Default(LangStandard::lang_unspecified);
-    
+
     if (OpenCLLangStd == LangStandard::lang_unspecified) {
       Diags.Report(diag::err_drv_invalid_value)
       << A->getAsString(Args) << A->getValue();
@@ -1372,7 +1465,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
     else
       LangStd = OpenCLLangStd;
   }
-  
+
   CompilerInvocation::setLangDefaults(Opts, IK, LangStd);
 
   // We abuse '-f[no-]gnu-keywords' to force overriding all GNU-extension
@@ -1395,6 +1488,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   if (Args.hasArg(OPT_fcuda_disable_target_call_checks))
     Opts.CUDADisableTargetCallChecks = 1;
 
+  if (Args.hasArg(OPT_fcuda_target_overloads))
+    Opts.CUDATargetOverloads = 1;
+
   if (Opts.ObjC1) {
     if (Arg *arg = Args.getLastArg(OPT_fobjc_runtime_EQ)) {
       StringRef value = arg->getValue();
@@ -1410,22 +1506,41 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
       Opts.ObjCAutoRefCount = 1;
       if (!Opts.ObjCRuntime.allowsARC())
         Diags.Report(diag::err_arc_unsupported_on_runtime);
+    }
 
-      // Only set ObjCARCWeak if ARC is enabled.
-      if (Args.hasArg(OPT_fobjc_runtime_has_weak))
-        Opts.ObjCARCWeak = 1;
-      else
-        Opts.ObjCARCWeak = Opts.ObjCRuntime.allowsWeak();
+    // ObjCWeakRuntime tracks whether the runtime supports __weak, not
+    // whether the feature is actually enabled.  This is predominantly
+    // determined by -fobjc-runtime, but we allow it to be overridden
+    // from the command line for testing purposes.
+    if (Args.hasArg(OPT_fobjc_runtime_has_weak))
+      Opts.ObjCWeakRuntime = 1;
+    else
+      Opts.ObjCWeakRuntime = Opts.ObjCRuntime.allowsWeak();
+
+    // ObjCWeak determines whether __weak is actually enabled.
+    // Note that we allow -fno-objc-weak to disable this even in ARC mode.
+    if (auto weakArg = Args.getLastArg(OPT_fobjc_weak, OPT_fno_objc_weak)) {
+      if (!weakArg->getOption().matches(OPT_fobjc_weak)) {
+        assert(!Opts.ObjCWeak);
+      } else if (Opts.getGC() != LangOptions::NonGC) {
+        Diags.Report(diag::err_objc_weak_with_gc);
+      } else if (!Opts.ObjCWeakRuntime) {
+        Diags.Report(diag::err_objc_weak_unsupported);
+      } else {
+        Opts.ObjCWeak = 1;
+      }
+    } else if (Opts.ObjCAutoRefCount) {
+      Opts.ObjCWeak = Opts.ObjCWeakRuntime;
     }
 
     if (Args.hasArg(OPT_fno_objc_infer_related_result_type))
       Opts.ObjCInferRelatedResultType = 0;
-    
+
     if (Args.hasArg(OPT_fobjc_subscripting_legacy_runtime))
       Opts.ObjCSubscriptingLegacyRuntime =
         (Opts.ObjCRuntime.getKind() == ObjCRuntime::FragileMacOSX);
   }
-    
+
   if (Args.hasArg(OPT_fgnu89_inline)) {
     if (Opts.CPlusPlus)
       Diags.Report(diag::err_drv_argument_not_allowed_with) << "-fgnu89-inline"
@@ -1525,14 +1640,13 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.RTTIData = Opts.RTTI && !Args.hasArg(OPT_fno_rtti_data);
   Opts.Blocks = Args.hasArg(OPT_fblocks);
   Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
+  Opts.Coroutines = Args.hasArg(OPT_fcoroutines);
   Opts.Modules = Args.hasArg(OPT_fmodules);
   Opts.ModulesStrictDeclUse = Args.hasArg(OPT_fmodules_strict_decluse);
   Opts.ModulesDeclUse =
       Args.hasArg(OPT_fmodules_decluse) || Opts.ModulesStrictDeclUse;
   Opts.ModulesLocalVisibility =
       Args.hasArg(OPT_fmodules_local_submodule_visibility);
-  Opts.ModulesHideInternalLinkage =
-      !Args.hasArg(OPT_fno_modules_hide_internal_linkage);
   Opts.ModulesSearchAll = Opts.Modules &&
     !Args.hasArg(OPT_fno_modules_search_all) &&
     Args.hasArg(OPT_fmodules_search_all);
@@ -1578,7 +1692,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.PIELevel = getLastArgIntValue(Args, OPT_pie_level, 0, Diags);
   Opts.Static = Args.hasArg(OPT_static_define);
   Opts.DumpRecordLayoutsSimple = Args.hasArg(OPT_fdump_record_layouts_simple);
-  Opts.DumpRecordLayouts = Opts.DumpRecordLayoutsSimple 
+  Opts.DumpRecordLayouts = Opts.DumpRecordLayoutsSimple
                         || Args.hasArg(OPT_fdump_record_layouts);
   Opts.DumpVTableLayouts = Args.hasArg(OPT_fdump_vtable_layouts);
   Opts.SpellChecking = !Args.hasArg(OPT_fno_spell_checking);
@@ -1603,6 +1717,17 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
   Opts.GNUAsm = !Args.hasArg(OPT_fno_gnu_inline_asm);
 
+  // __declspec is enabled by default for the PS4 by the driver, and also
+  // enabled for Microsoft Extensions or Borland Extensions, here.
+  //
+  // FIXME: __declspec is also currently enabled for CUDA, but isn't really a
+  // CUDA extension, however it is required for supporting cuda_builtin_vars.h,
+  // which uses __declspec(property). Once that has been rewritten in terms of
+  // something more generic, remove the Opts.CUDA term here.
+  Opts.DeclSpecKeyword =
+      Args.hasFlag(OPT_fdeclspec, OPT_fno_declspec,
+                   (Opts.MicrosoftExt || Opts.Borland || Opts.CUDA));
+
   if (!Opts.CurrentModule.empty() && !Opts.ImplementationOfModule.empty() &&
       Opts.CurrentModule != Opts.ImplementationOfModule) {
     Diags.Report(diag::err_conflicting_module_names)
@@ -1622,7 +1747,7 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
       .Case("yes", LangOptions::ASMM_On)
       .Default(255)) {
     default:
-      Diags.Report(diag::err_drv_invalid_value) 
+      Diags.Report(diag::err_drv_invalid_value)
         << "-faddress-space-map-mangling=" << A->getValue();
       break;
     case LangOptions::ASMM_Target:
@@ -1681,6 +1806,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.FiniteMathOnly = Args.hasArg(OPT_ffinite_math_only) ||
       Args.hasArg(OPT_cl_finite_math_only) ||
       Args.hasArg(OPT_cl_fast_relaxed_math);
+  Opts.UnsafeFPMath = Args.hasArg(OPT_menable_unsafe_fp_math) ||
+                      Args.hasArg(OPT_cl_unsafe_math_optimizations) ||
+                      Args.hasArg(OPT_cl_fast_relaxed_math);
 
   Opts.RetainCommentsFromSystemHeaders =
       Args.hasArg(OPT_fretain_comments_from_system_headers);
@@ -1891,14 +2019,23 @@ bool CompilerInvocation::CreateFromArgs(CompilerInvocation &Res,
   Success &= ParseCodeGenArgs(Res.getCodeGenOpts(), Args, DashX, Diags,
                               Res.getTargetOpts());
   ParseHeaderSearchArgs(Res.getHeaderSearchOpts(), Args);
-  if (DashX != IK_AST && DashX != IK_LLVM_IR) {
+  if (DashX == IK_AST || DashX == IK_LLVM_IR) {
+    // ObjCAAutoRefCount and Sanitize LangOpts are used to setup the
+    // PassManager in BackendUtil.cpp. They need to be initializd no matter
+    // what the input type is.
+    if (Args.hasArg(OPT_fobjc_arc))
+      Res.getLangOpts()->ObjCAutoRefCount = 1;
+    parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ),
+                        Diags, Res.getLangOpts()->Sanitize);
+  } else {
+    // Other LangOpts are only initialzed when the input is not AST or LLVM IR.
     ParseLangArgs(*Res.getLangOpts(), Args, DashX, Diags);
     if (Res.getFrontendOpts().ProgramAction == frontend::RewriteObjC)
       Res.getLangOpts()->ObjCExceptions = 1;
   }
   // FIXME: ParsePreprocessorArgs uses the FileManager to read the contents of
   // PCH file and find the original header name. Remove the need to do that in
-  // ParsePreprocessorArgs and remove the FileManager 
+  // ParsePreprocessorArgs and remove the FileManager
   // parameters from the function and the "FileManager.h" #include.
   FileManager FileMgr(Res.getFileSystemOpts());
   ParsePreprocessorArgs(Res.getPreprocessorOpts(), Args, FileMgr, Diags);
@@ -1913,14 +2050,14 @@ namespace {
     SmallVector<uint64_t, 16> Data;
     unsigned CurBit;
     uint64_t CurValue;
-    
+
   public:
     ModuleSignature() : CurBit(0), CurValue(0) { }
-    
+
     void add(uint64_t Value, unsigned Bits);
     void add(StringRef Value);
     void flush();
-    
+
     llvm::APInt getAsInteger() const;
   };
 }
@@ -1931,10 +2068,10 @@ void ModuleSignature::add(uint64_t Value, unsigned int NumBits) {
     CurBit += NumBits;
     return;
   }
-  
+
   // Add the current word.
   Data.push_back(CurValue);
-  
+
   if (CurBit)
     CurValue = Value >> (64-CurBit);
   else
@@ -1945,15 +2082,15 @@ void ModuleSignature::add(uint64_t Value, unsigned int NumBits) {
 void ModuleSignature::flush() {
   if (CurBit == 0)
     return;
-  
+
   Data.push_back(CurValue);
   CurBit = 0;
   CurValue = 0;
 }
 
 void ModuleSignature::add(StringRef Value) {
-  for (StringRef::iterator I = Value.begin(), IEnd = Value.end(); I != IEnd;++I)
-    add(*I, 8);
+  for (auto &c : Value)
+    add(c, 8);
 }
 
 llvm::APInt ModuleSignature::getAsInteger() const {
@@ -1983,7 +2120,7 @@ std::string CompilerInvocation::getModuleHash() const {
 
   for (StringRef Feature : LangOpts->ModuleFeatures)
     code = hash_combine(code, Feature);
-  
+
   // Extend the signature with the target options.
   code = hash_combine(code, TargetOpts->Triple, TargetOpts->CPU,
                       TargetOpts->ABI);
@@ -1995,7 +2132,7 @@ std::string CompilerInvocation::getModuleHash() const {
   const HeaderSearchOptions &hsOpts = getHeaderSearchOpts();
   code = hash_combine(code, ppOpts.UsePredefines, ppOpts.DetailedRecord);
 
-  for (std::vector<std::pair<std::string, bool/*isUndef*/> >::const_iterator 
+  for (std::vector<std::pair<std::string, bool/*isUndef*/>>::const_iterator
             I = getPreprocessorOpts().Macros.begin(),
          IEnd = getPreprocessorOpts().Macros.end();
        I != IEnd; ++I) {
@@ -2021,6 +2158,12 @@ std::string CompilerInvocation::getModuleHash() const {
   // Extend the signature with the user build path.
   code = hash_combine(code, hsOpts.ModuleUserBuildPath);
 
+  // Extend the signature with the module file extensions.
+  const FrontendOptions &frontendOpts = getFrontendOpts();
+  for (auto ext : frontendOpts.ModuleFileExtensions) {
+    code = ext->hashExtension(code);
+  }
+
   // Darwin-specific hack: if we have a sysroot, use the contents and
   // modification time of
   //   $sysroot/System/Library/CoreServices/SystemVersion.plist
diff --git a/lib/Frontend/CreateInvocationFromCommandLine.cpp b/lib/Frontend/CreateInvocationFromCommandLine.cpp
index 2afd23fcb9e8..301916422564 100644
--- a/lib/Frontend/CreateInvocationFromCommandLine.cpp
+++ b/lib/Frontend/CreateInvocationFromCommandLine.cpp
@@ -39,15 +39,13 @@ clang::createInvocationFromCommandLine(ArrayRef<const char *> ArgList,
     Diags = CompilerInstance::createDiagnostics(new DiagnosticOptions);
   }
 
-  SmallVector<const char *, 16> Args;
-  Args.push_back("<clang>"); // FIXME: Remove dummy argument.
-  Args.insert(Args.end(), ArgList.begin(), ArgList.end());
+  SmallVector<const char *, 16> Args(ArgList.begin(), ArgList.end());
 
   // FIXME: Find a cleaner way to force the driver into restricted modes.
   Args.push_back("-fsyntax-only");
 
   // FIXME: We shouldn't have to pass in the path info.
-  driver::Driver TheDriver("clang", llvm::sys::getDefaultTargetTriple(),
+  driver::Driver TheDriver(Args[0], llvm::sys::getDefaultTargetTriple(),
                            *Diags);
 
   // Don't check that inputs exist, they may have been remapped.
diff --git a/lib/Frontend/DependencyFile.cpp b/lib/Frontend/DependencyFile.cpp
index 0995ab4bf077..93d4a8034696 100644
--- a/lib/Frontend/DependencyFile.cpp
+++ b/lib/Frontend/DependencyFile.cpp
@@ -18,6 +18,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Lex/DirectoryLookup.h"
 #include "clang/Lex/LexDiagnostic.h"
+#include "clang/Lex/ModuleMap.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Serialization/ASTReader.h"
@@ -50,15 +51,8 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
     if (!FE)
       return;
 
-    StringRef Filename = FE->getName();
-
-    // Remove leading "./" (or ".//" or "././" etc.)
-    while (Filename.size() > 2 && Filename[0] == '.' &&
-           llvm::sys::path::is_separator(Filename[1])) {
-      Filename = Filename.substr(1);
-      while (llvm::sys::path::is_separator(Filename[0]))
-        Filename = Filename.substr(1);
-    }
+    StringRef Filename =
+        llvm::sys::path::remove_leading_dotslash(FE->getName());
 
     DepCollector.maybeAddDependency(Filename, /*FromModule*/false,
                                    FileType != SrcMgr::C_User,
@@ -82,6 +76,20 @@ struct DepCollectorPPCallbacks : public PPCallbacks {
   }
 };
 
+struct DepCollectorMMCallbacks : public ModuleMapCallbacks {
+  DependencyCollector &DepCollector;
+  DepCollectorMMCallbacks(DependencyCollector &DC) : DepCollector(DC) {}
+
+  void moduleMapFileRead(SourceLocation Loc, const FileEntry &Entry,
+                         bool IsSystem) override {
+    StringRef Filename = Entry.getName();
+    DepCollector.maybeAddDependency(Filename, /*FromModule*/false,
+                                    /*IsSystem*/IsSystem,
+                                    /*IsModuleFile*/false,
+                                    /*IsMissing*/false);
+  }
+};
+
 struct DepCollectorASTListener : public ASTReaderListener {
   DependencyCollector &DepCollector;
   DepCollectorASTListener(DependencyCollector &L) : DepCollector(L) { }
@@ -89,14 +97,15 @@ struct DepCollectorASTListener : public ASTReaderListener {
   bool needsSystemInputFileVisitation() override {
     return DepCollector.needSystemDependencies();
   }
-  void visitModuleFile(StringRef Filename) override {
+  void visitModuleFile(StringRef Filename,
+                       serialization::ModuleKind Kind) override {
     DepCollector.maybeAddDependency(Filename, /*FromModule*/true,
                                    /*IsSystem*/false, /*IsModuleFile*/true,
                                    /*IsMissing*/false);
   }
   bool visitInputFile(StringRef Filename, bool IsSystem,
-                      bool IsOverridden) override {
-    if (IsOverridden)
+                      bool IsOverridden, bool IsExplicitModule) override {
+    if (IsOverridden || IsExplicitModule)
       return true;
 
     DepCollector.maybeAddDependency(Filename, /*FromModule*/true, IsSystem,
@@ -132,6 +141,8 @@ DependencyCollector::~DependencyCollector() { }
 void DependencyCollector::attachToPreprocessor(Preprocessor &PP) {
   PP.addPPCallbacks(
       llvm::make_unique<DepCollectorPPCallbacks>(*this, PP.getSourceManager()));
+  PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
+      llvm::make_unique<DepCollectorMMCallbacks>(*this));
 }
 void DependencyCollector::attachToASTReader(ASTReader &R) {
   R.addListener(llvm::make_unique<DepCollectorASTListener>(*this));
@@ -165,7 +176,11 @@ public:
       AddMissingHeaderDeps(Opts.AddMissingHeaderDeps),
       SeenMissingHeader(false),
       IncludeModuleFiles(Opts.IncludeModuleFiles),
-      OutputFormat(Opts.OutputFormat) {}
+      OutputFormat(Opts.OutputFormat) {
+    for (auto ExtraDep : Opts.ExtraDeps) {
+      AddFilename(ExtraDep);
+    }
+  }
 
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
@@ -185,6 +200,17 @@ public:
   bool includeModuleFiles() const { return IncludeModuleFiles; }
 };
 
+class DFGMMCallback : public ModuleMapCallbacks {
+  DFGImpl &Parent;
+public:
+  DFGMMCallback(DFGImpl &Parent) : Parent(Parent) {}
+  void moduleMapFileRead(SourceLocation Loc, const FileEntry &Entry,
+                         bool IsSystem) override {
+    if (!IsSystem || Parent.includeSystemHeaders())
+      Parent.AddFilename(Entry.getName());
+  }
+};
+
 class DFGASTReaderListener : public ASTReaderListener {
   DFGImpl &Parent;
 public:
@@ -194,9 +220,10 @@ public:
   bool needsSystemInputFileVisitation() override {
     return Parent.includeSystemHeaders();
   }
-  void visitModuleFile(StringRef Filename) override;
+  void visitModuleFile(StringRef Filename,
+                       serialization::ModuleKind Kind) override;
   bool visitInputFile(StringRef Filename, bool isSystem,
-                      bool isOverridden) override;
+                      bool isOverridden, bool isExplicitModule) override;
 };
 }
 
@@ -217,6 +244,8 @@ DependencyFileGenerator *DependencyFileGenerator::CreateAndAttachToPreprocessor(
 
   DFGImpl *Callback = new DFGImpl(&PP, Opts);
   PP.addPPCallbacks(std::unique_ptr<PPCallbacks>(Callback));
+  PP.getHeaderSearchInfo().getModuleMap().addModuleMapCallbacks(
+      llvm::make_unique<DFGMMCallback>(*Callback));
   return new DependencyFileGenerator(Callback);
 }
 
@@ -259,15 +288,7 @@ void DFGImpl::FileChanged(SourceLocation Loc,
   if (!FileMatchesDepCriteria(Filename.data(), FileType))
     return;
 
-  // Remove leading "./" (or ".//" or "././" etc.)
-  while (Filename.size() > 2 && Filename[0] == '.' &&
-         llvm::sys::path::is_separator(Filename[1])) {
-    Filename = Filename.substr(1);
-    while (llvm::sys::path::is_separator(Filename[0]))
-      Filename = Filename.substr(1);
-  }
-    
-  AddFilename(Filename);
+  AddFilename(llvm::sys::path::remove_leading_dotslash(Filename));
 }
 
 void DFGImpl::InclusionDirective(SourceLocation HashLoc,
@@ -438,16 +459,18 @@ void DFGImpl::OutputDependencyFile() {
 }
 
 bool DFGASTReaderListener::visitInputFile(llvm::StringRef Filename,
-                                          bool IsSystem, bool IsOverridden) {
+                                          bool IsSystem, bool IsOverridden,
+                                          bool IsExplicitModule) {
   assert(!IsSystem || needsSystemInputFileVisitation());
-  if (IsOverridden)
+  if (IsOverridden || IsExplicitModule)
     return true;
 
   Parent.AddFilename(Filename);
   return true;
 }
 
-void DFGASTReaderListener::visitModuleFile(llvm::StringRef Filename) {
+void DFGASTReaderListener::visitModuleFile(llvm::StringRef Filename,
+                                           serialization::ModuleKind Kind) {
   if (Parent.includeModuleFiles())
     Parent.AddFilename(Filename);
 }
diff --git a/lib/Frontend/DiagnosticRenderer.cpp b/lib/Frontend/DiagnosticRenderer.cpp
index c63e98dbe4f1..caf1f0dce99f 100644
--- a/lib/Frontend/DiagnosticRenderer.cpp
+++ b/lib/Frontend/DiagnosticRenderer.cpp
@@ -169,9 +169,7 @@ void DiagnosticRenderer::emitDiagnostic(SourceLocation Loc,
     // If this location is within a macro, walk from UnexpandedLoc up to Loc
     // and produce a macro backtrace.
     if (UnexpandedLoc.isValid() && UnexpandedLoc.isMacroID()) {
-      unsigned MacroDepth = 0;
-      emitMacroExpansions(UnexpandedLoc, Level, MutableRanges, FixItHints, *SM,
-                          MacroDepth);
+      emitMacroExpansions(UnexpandedLoc, Level, MutableRanges, FixItHints, *SM);
     }
   }
 
@@ -247,7 +245,7 @@ void DiagnosticRenderer::emitIncludeStackRecursively(SourceLocation Loc,
   // import stack rather than the 
   // FIXME: We want submodule granularity here.
   std::pair<SourceLocation, StringRef> Imported = SM.getModuleImportLoc(Loc);
-  if (Imported.first.isValid()) {
+  if (!Imported.second.empty()) {
     // This location was imported by a module. Emit the module import stack.
     emitImportStackRecursively(Imported.first, Imported.second, SM);
     return;
@@ -278,13 +276,11 @@ void DiagnosticRenderer::emitImportStack(SourceLocation Loc,
 void DiagnosticRenderer::emitImportStackRecursively(SourceLocation Loc,
                                                     StringRef ModuleName,
                                                     const SourceManager &SM) {
-  if (Loc.isInvalid()) {
+  if (ModuleName.empty()) {
     return;
   }
 
   PresumedLoc PLoc = SM.getPresumedLoc(Loc, DiagOpts->ShowPresumedLoc);
-  if (PLoc.isInvalid())
-    return;
 
   // Emit the other import frames first.
   std::pair<SourceLocation, StringRef> NextImportLoc
@@ -310,6 +306,81 @@ void DiagnosticRenderer::emitModuleBuildStack(const SourceManager &SM) {
   }
 }
 
+/// A recursive function to trace all possible backtrace locations
+/// to match the \p CaretLocFileID.
+static SourceLocation
+retrieveMacroLocation(SourceLocation Loc, FileID MacroFileID,
+                      FileID CaretFileID,
+                      const SmallVectorImpl<FileID> &CommonArgExpansions,
+                      bool IsBegin, const SourceManager *SM) {
+  assert(SM->getFileID(Loc) == MacroFileID);
+  if (MacroFileID == CaretFileID)
+    return Loc;
+  if (!Loc.isMacroID())
+    return SourceLocation();
+
+  SourceLocation MacroLocation, MacroArgLocation;
+
+  if (SM->isMacroArgExpansion(Loc)) {
+    // Only look at the immediate spelling location of this macro argument if
+    // the other location in the source range is also present in that expansion.
+    if (std::binary_search(CommonArgExpansions.begin(),
+                           CommonArgExpansions.end(), MacroFileID))
+      MacroLocation = SM->getImmediateSpellingLoc(Loc);
+    MacroArgLocation = IsBegin ? SM->getImmediateExpansionRange(Loc).first
+                               : SM->getImmediateExpansionRange(Loc).second;
+  } else {
+    MacroLocation = IsBegin ? SM->getImmediateExpansionRange(Loc).first
+                            : SM->getImmediateExpansionRange(Loc).second;
+    MacroArgLocation = SM->getImmediateSpellingLoc(Loc);
+  }
+
+  if (MacroLocation.isValid()) {
+    MacroFileID = SM->getFileID(MacroLocation);
+    MacroLocation =
+        retrieveMacroLocation(MacroLocation, MacroFileID, CaretFileID,
+                              CommonArgExpansions, IsBegin, SM);
+    if (MacroLocation.isValid())
+      return MacroLocation;
+  }
+
+  MacroFileID = SM->getFileID(MacroArgLocation);
+  return retrieveMacroLocation(MacroArgLocation, MacroFileID, CaretFileID,
+                               CommonArgExpansions, IsBegin, SM);
+}
+
+/// Walk up the chain of macro expansions and collect the FileIDs identifying the
+/// expansions.
+static void getMacroArgExpansionFileIDs(SourceLocation Loc,
+                                        SmallVectorImpl<FileID> &IDs,
+                                        bool IsBegin, const SourceManager *SM) {
+  while (Loc.isMacroID()) {
+    if (SM->isMacroArgExpansion(Loc)) {
+      IDs.push_back(SM->getFileID(Loc));
+      Loc = SM->getImmediateSpellingLoc(Loc);
+    } else {
+      auto ExpRange = SM->getImmediateExpansionRange(Loc);
+      Loc = IsBegin ? ExpRange.first : ExpRange.second;
+    }
+  }
+}
+
+/// Collect the expansions of the begin and end locations and compute the set
+/// intersection. Produces a sorted vector of FileIDs in CommonArgExpansions.
+static void computeCommonMacroArgExpansionFileIDs(
+    SourceLocation Begin, SourceLocation End, const SourceManager *SM,
+    SmallVectorImpl<FileID> &CommonArgExpansions) {
+  SmallVector<FileID, 4> BeginArgExpansions;
+  SmallVector<FileID, 4> EndArgExpansions;
+  getMacroArgExpansionFileIDs(Begin, BeginArgExpansions, /*IsBegin=*/true, SM);
+  getMacroArgExpansionFileIDs(End, EndArgExpansions, /*IsBegin=*/false, SM);
+  std::sort(BeginArgExpansions.begin(), BeginArgExpansions.end());
+  std::sort(EndArgExpansions.begin(), EndArgExpansions.end());
+  std::set_intersection(BeginArgExpansions.begin(), BeginArgExpansions.end(),
+                        EndArgExpansions.begin(), EndArgExpansions.end(),
+                        std::back_inserter(CommonArgExpansions));
+}
+
 // Helper function to fix up source ranges.  It takes in an array of ranges,
 // and outputs an array of ranges where we want to draw the range highlighting
 // around the location specified by CaretLoc.
@@ -327,9 +398,9 @@ static void mapDiagnosticRanges(
     const SourceManager *SM) {
   FileID CaretLocFileID = SM->getFileID(CaretLoc);
 
-  for (ArrayRef<CharSourceRange>::const_iterator I = Ranges.begin(),
-       E = Ranges.end();
-       I != E; ++I) {
+  for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+    if (I->isInvalid()) continue;
+
     SourceLocation Begin = I->getBegin(), End = I->getEnd();
     bool IsTokenRange = I->isTokenRange();
 
@@ -358,27 +429,19 @@ static void mapDiagnosticRanges(
       }
     }
 
-    while (Begin.isMacroID() && BeginFileID != CaretLocFileID) {
-      if (SM->isMacroArgExpansion(Begin)) {
-        Begin = SM->getImmediateSpellingLoc(Begin);
-        End = SM->getImmediateSpellingLoc(End);
-      } else {
-        Begin = SM->getImmediateExpansionRange(Begin).first;
-        End = SM->getImmediateExpansionRange(End).second;
-      }
-      BeginFileID = SM->getFileID(Begin);
-      if (BeginFileID != SM->getFileID(End)) {
-        // FIXME: Ugly hack to stop a crash; this code is making bad
-        // assumptions and it's too complicated for me to reason
-        // about.
-        Begin = End = SourceLocation();
-        break;
-      }
-    }
+    // Do the backtracking.
+    SmallVector<FileID, 4> CommonArgExpansions;
+    computeCommonMacroArgExpansionFileIDs(Begin, End, SM, CommonArgExpansions);
+    Begin = retrieveMacroLocation(Begin, BeginFileID, CaretLocFileID,
+                                  CommonArgExpansions, /*IsBegin=*/true, SM);
+    End = retrieveMacroLocation(End, BeginFileID, CaretLocFileID,
+                                CommonArgExpansions, /*IsBegin=*/false, SM);
+    if (Begin.isInvalid() || End.isInvalid()) continue;
 
     // Return the spelling location of the beginning and end of the range.
     Begin = SM->getSpellingLoc(Begin);
     End = SM->getSpellingLoc(End);
+
     SpellingRanges.push_back(CharSourceRange(SourceRange(Begin, End),
                                              IsTokenRange));
   }
@@ -394,68 +457,16 @@ void DiagnosticRenderer::emitCaret(SourceLocation Loc,
   emitCodeContext(Loc, Level, SpellingRanges, Hints, SM);
 }
 
-/// \brief Recursively emit notes for each macro expansion and caret
-/// diagnostics where appropriate.
-///
-/// Walks up the macro expansion stack printing expansion notes, the code
-/// snippet, caret, underlines and FixItHint display as appropriate at each
-/// level.
-///
-/// \param Loc The location for this caret.
-/// \param Level The diagnostic level currently being emitted.
-/// \param Ranges The underlined ranges for this code snippet.
-/// \param Hints The FixIt hints active for this diagnostic.
-/// \param OnMacroInst The current depth of the macro expansion stack.
-void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
-                                             DiagnosticsEngine::Level Level,
-                                             ArrayRef<CharSourceRange> Ranges,
-                                             ArrayRef<FixItHint> Hints,
-                                             const SourceManager &SM,
-                                             unsigned &MacroDepth,
-                                             unsigned OnMacroInst) {
-  assert(!Loc.isInvalid() && "must have a valid source location here");
-
-  // Walk up to the caller of this macro, and produce a backtrace down to there.
-  SourceLocation OneLevelUp = SM.getImmediateMacroCallerLoc(Loc);
-  if (OneLevelUp.isMacroID())
-    emitMacroExpansions(OneLevelUp, Level, Ranges, Hints, SM,
-                        MacroDepth, OnMacroInst + 1);
-  else
-    MacroDepth = OnMacroInst + 1;
-
-  unsigned MacroSkipStart = 0, MacroSkipEnd = 0;
-  if (MacroDepth > DiagOpts->MacroBacktraceLimit &&
-      DiagOpts->MacroBacktraceLimit != 0) {
-    MacroSkipStart = DiagOpts->MacroBacktraceLimit / 2 +
-    DiagOpts->MacroBacktraceLimit % 2;
-    MacroSkipEnd = MacroDepth - DiagOpts->MacroBacktraceLimit / 2;
-  }
-
-  // Whether to suppress printing this macro expansion.
-  bool Suppressed = (OnMacroInst >= MacroSkipStart &&
-                     OnMacroInst < MacroSkipEnd);
-
-  if (Suppressed) {
-    // Tell the user that we've skipped contexts.
-    if (OnMacroInst == MacroSkipStart) {
-      SmallString<200> MessageStorage;
-      llvm::raw_svector_ostream Message(MessageStorage);
-      Message << "(skipping " << (MacroSkipEnd - MacroSkipStart)
-              << " expansions in backtrace; use -fmacro-backtrace-limit=0 to "
-                 "see all)";
-      emitBasicNote(Message.str());      
-    }
-    return;
-  }
-
+/// \brief A helper function for emitMacroExpansion to print the
+/// macro expansion message
+void DiagnosticRenderer::emitSingleMacroExpansion(
+    SourceLocation Loc,
+    DiagnosticsEngine::Level Level,
+    ArrayRef<CharSourceRange> Ranges,
+    const SourceManager &SM) {
   // Find the spelling location for the macro definition. We must use the
-  // spelling location here to avoid emitting a macro bactrace for the note.
-  SourceLocation SpellingLoc = Loc;
-  // If this is the expansion of a macro argument, point the caret at the
-  // use of the argument in the definition of the macro, not the expansion.
-  if (SM.isMacroArgExpansion(Loc))
-    SpellingLoc = SM.getImmediateExpansionRange(Loc).first;
-  SpellingLoc = SM.getSpellingLoc(SpellingLoc);
+  // spelling location here to avoid emitting a macro backtrace for the note.
+  SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
 
   // Map the ranges into the FileID of the diagnostic location.
   SmallVector<CharSourceRange, 4> SpellingRanges;
@@ -468,10 +479,149 @@ void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
     Message << "expanded from here";
   else
     Message << "expanded from macro '" << MacroName << "'";
+
   emitDiagnostic(SpellingLoc, DiagnosticsEngine::Note, Message.str(),
                  SpellingRanges, None, &SM);
 }
 
+/// Check that the macro argument location of Loc starts with ArgumentLoc.
+/// The starting location of the macro expansions is used to differeniate
+/// different macro expansions.
+static bool checkLocForMacroArgExpansion(SourceLocation Loc,
+                                         const SourceManager &SM,
+                                         SourceLocation ArgumentLoc) {
+  SourceLocation MacroLoc;
+  if (SM.isMacroArgExpansion(Loc, &MacroLoc)) {
+    if (ArgumentLoc == MacroLoc) return true;
+  }
+
+  return false;
+}
+
+/// Check if all the locations in the range have the same macro argument
+/// expansion, and that that expansion starts with ArgumentLoc.
+static bool checkRangeForMacroArgExpansion(CharSourceRange Range,
+                                           const SourceManager &SM,
+                                           SourceLocation ArgumentLoc) {
+  SourceLocation BegLoc = Range.getBegin(), EndLoc = Range.getEnd();
+  while (BegLoc != EndLoc) {
+    if (!checkLocForMacroArgExpansion(BegLoc, SM, ArgumentLoc))
+      return false;
+    BegLoc.getLocWithOffset(1);
+  }
+
+  return checkLocForMacroArgExpansion(BegLoc, SM, ArgumentLoc);
+}
+
+/// A helper function to check if the current ranges are all inside the same
+/// macro argument expansion as Loc.
+static bool checkRangesForMacroArgExpansion(SourceLocation Loc,
+                                            ArrayRef<CharSourceRange> Ranges,
+                                            const SourceManager &SM) {
+  assert(Loc.isMacroID() && "Must be a macro expansion!");
+
+  SmallVector<CharSourceRange, 4> SpellingRanges;
+  mapDiagnosticRanges(Loc, Ranges, SpellingRanges, &SM);
+
+  /// Count all valid ranges.
+  unsigned ValidCount = 0;
+  for (auto I : Ranges)
+    if (I.isValid()) ValidCount++;
+
+  if (ValidCount > SpellingRanges.size())
+    return false;
+
+  /// To store the source location of the argument location.
+  SourceLocation ArgumentLoc;
+
+  /// Set the ArgumentLoc to the beginning location of the expansion of Loc
+  /// so to check if the ranges expands to the same beginning location.
+  if (!SM.isMacroArgExpansion(Loc,&ArgumentLoc))
+    return false;
+
+  for (auto I = SpellingRanges.begin(), E = SpellingRanges.end(); I != E; ++I) {
+    if (!checkRangeForMacroArgExpansion(*I, SM, ArgumentLoc))
+      return false;
+  }
+
+  return true;
+}
+
+/// \brief Recursively emit notes for each macro expansion and caret
+/// diagnostics where appropriate.
+///
+/// Walks up the macro expansion stack printing expansion notes, the code
+/// snippet, caret, underlines and FixItHint display as appropriate at each
+/// level.
+///
+/// \param Loc The location for this caret.
+/// \param Level The diagnostic level currently being emitted.
+/// \param Ranges The underlined ranges for this code snippet.
+/// \param Hints The FixIt hints active for this diagnostic.
+void DiagnosticRenderer::emitMacroExpansions(SourceLocation Loc,
+                                             DiagnosticsEngine::Level Level,
+                                             ArrayRef<CharSourceRange> Ranges,
+                                             ArrayRef<FixItHint> Hints,
+                                             const SourceManager &SM) {
+  assert(Loc.isValid() && "must have a valid source location here");
+
+  // Produce a stack of macro backtraces.
+  SmallVector<SourceLocation, 8> LocationStack;
+  unsigned IgnoredEnd = 0;
+  while (Loc.isMacroID()) {
+    // If this is the expansion of a macro argument, point the caret at the
+    // use of the argument in the definition of the macro, not the expansion.
+    if (SM.isMacroArgExpansion(Loc))
+      LocationStack.push_back(SM.getImmediateExpansionRange(Loc).first);
+    else
+      LocationStack.push_back(Loc);
+
+    if (checkRangesForMacroArgExpansion(Loc, Ranges, SM))
+      IgnoredEnd = LocationStack.size();
+
+    Loc = SM.getImmediateMacroCallerLoc(Loc);
+
+    // Once the location no longer points into a macro, try stepping through
+    // the last found location.  This sometimes produces additional useful
+    // backtraces.
+    if (Loc.isFileID())
+      Loc = SM.getImmediateMacroCallerLoc(LocationStack.back());
+    assert(Loc.isValid() && "must have a valid source location here");
+  }
+
+  LocationStack.erase(LocationStack.begin(),
+                      LocationStack.begin() + IgnoredEnd);
+
+  unsigned MacroDepth = LocationStack.size();
+  unsigned MacroLimit = DiagOpts->MacroBacktraceLimit;
+  if (MacroDepth <= MacroLimit || MacroLimit == 0) {
+    for (auto I = LocationStack.rbegin(), E = LocationStack.rend();
+         I != E; ++I)
+      emitSingleMacroExpansion(*I, Level, Ranges, SM);
+    return;
+  }
+
+  unsigned MacroStartMessages = MacroLimit / 2;
+  unsigned MacroEndMessages = MacroLimit / 2 + MacroLimit % 2;
+
+  for (auto I = LocationStack.rbegin(),
+            E = LocationStack.rbegin() + MacroStartMessages;
+       I != E; ++I)
+    emitSingleMacroExpansion(*I, Level, Ranges, SM);
+
+  SmallString<200> MessageStorage;
+  llvm::raw_svector_ostream Message(MessageStorage);
+  Message << "(skipping " << (MacroDepth - MacroLimit)
+          << " expansions in backtrace; use -fmacro-backtrace-limit=0 to "
+             "see all)";
+  emitBasicNote(Message.str());
+
+  for (auto I = LocationStack.rend() - MacroEndMessages,
+            E = LocationStack.rend();
+       I != E; ++I)
+    emitSingleMacroExpansion(*I, Level, Ranges, SM);
+}
+
 DiagnosticNoteRenderer::~DiagnosticNoteRenderer() {}
 
 void DiagnosticNoteRenderer::emitIncludeLocation(SourceLocation Loc,
@@ -492,8 +642,11 @@ void DiagnosticNoteRenderer::emitImportLocation(SourceLocation Loc,
   // Generate a note indicating the include location.
   SmallString<200> MessageStorage;
   llvm::raw_svector_ostream Message(MessageStorage);
-  Message << "in module '" << ModuleName << "' imported from "
-          << PLoc.getFilename() << ':' << PLoc.getLine() << ":";
+  Message << "in module '" << ModuleName;
+  if (PLoc.isValid())
+    Message << "' imported from " << PLoc.getFilename() << ':'
+            << PLoc.getLine();
+  Message << ":";
   emitNote(Loc, Message.str(), &SM);
 }
 
diff --git a/lib/Frontend/FrontendAction.cpp b/lib/Frontend/FrontendAction.cpp
index 3e0f7a12c3b3..ecef92e0a7dd 100644
--- a/lib/Frontend/FrontendAction.cpp
+++ b/lib/Frontend/FrontendAction.cpp
@@ -190,9 +190,9 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
     IntrusiveRefCntPtr<DiagnosticsEngine> Diags(&CI.getDiagnostics());
 
-    std::unique_ptr<ASTUnit> AST =
-        ASTUnit::LoadFromASTFile(InputFile, CI.getPCHContainerReader(),
-                                 Diags, CI.getFileSystemOpts());
+    std::unique_ptr<ASTUnit> AST = ASTUnit::LoadFromASTFile(
+        InputFile, CI.getPCHContainerReader(), Diags, CI.getFileSystemOpts(),
+        CI.getCodeGenOpts().DebugTypeExtRefs);
 
     if (!AST)
       goto failure;
@@ -284,7 +284,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
 
       if (!Found) {
         CI.getDiagnostics().Report(diag::err_fe_no_pch_in_dir) << PCHInclude;
-        return true;
+        goto failure;
       }
     }
   }
@@ -375,7 +375,7 @@ bool FrontendAction::BeginSourceFile(CompilerInstance &CI,
     if (CI.getLangOpts().Modules)
       CI.createModuleManager();
 
-    PP.getBuiltinInfo().InitializeBuiltins(PP.getIdentifierTable(),
+    PP.getBuiltinInfo().initializeBuiltins(PP.getIdentifierTable(),
                                            PP.getLangOpts());
   } else {
     // FIXME: If this is a problem, recover from it by creating a multiplex
@@ -442,9 +442,11 @@ bool FrontendAction::Execute() {
   // there were any module-build failures.
   if (CI.shouldBuildGlobalModuleIndex() && CI.hasFileManager() &&
       CI.hasPreprocessor()) {
-    GlobalModuleIndex::writeIndex(
-        CI.getFileManager(), CI.getPCHContainerReader(),
-        CI.getPreprocessor().getHeaderSearchInfo().getModuleCachePath());
+    StringRef Cache =
+        CI.getPreprocessor().getHeaderSearchInfo().getModuleCachePath();
+    if (!Cache.empty())
+      GlobalModuleIndex::writeIndex(CI.getFileManager(),
+                                    CI.getPCHContainerReader(), Cache);
   }
 
   return true;
diff --git a/lib/Frontend/FrontendActions.cpp b/lib/Frontend/FrontendActions.cpp
index 40277bdaa52d..d6c88d20fc2a 100644
--- a/lib/Frontend/FrontendActions.cpp
+++ b/lib/Frontend/FrontendActions.cpp
@@ -91,12 +91,10 @@ GeneratePCHAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
   auto Buffer = std::make_shared<PCHBuffer>();
   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
   Consumers.push_back(llvm::make_unique<PCHGenerator>(
-      CI.getPreprocessor(), OutputFile, nullptr, Sysroot, Buffer));
-  Consumers.push_back(
-      CI.getPCHContainerWriter().CreatePCHContainerGenerator(
-          CI.getDiagnostics(), CI.getHeaderSearchOpts(),
-          CI.getPreprocessorOpts(), CI.getTargetOpts(), CI.getLangOpts(),
-          InFile, OutputFile, OS, Buffer));
+                        CI.getPreprocessor(), OutputFile, nullptr, Sysroot,
+                        Buffer, CI.getFrontendOpts().ModuleFileExtensions));
+  Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator(
+      CI, InFile, OutputFile, OS, Buffer));
 
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
@@ -136,13 +134,15 @@ GenerateModuleAction::CreateASTConsumer(CompilerInstance &CI,
 
   auto Buffer = std::make_shared<PCHBuffer>();
   std::vector<std::unique_ptr<ASTConsumer>> Consumers;
+
   Consumers.push_back(llvm::make_unique<PCHGenerator>(
-      CI.getPreprocessor(), OutputFile, Module, Sysroot, Buffer));
-  Consumers.push_back(
-      CI.getPCHContainerWriter().CreatePCHContainerGenerator(
-          CI.getDiagnostics(), CI.getHeaderSearchOpts(),
-          CI.getPreprocessorOpts(), CI.getTargetOpts(), CI.getLangOpts(),
-          InFile, OutputFile, OS, Buffer));
+                        CI.getPreprocessor(), OutputFile, Module, Sysroot,
+                        Buffer, CI.getFrontendOpts().ModuleFileExtensions,
+                        /*AllowASTWithErrors=*/false,
+                        /*IncludeTimestamps=*/
+                          +CI.getFrontendOpts().BuildingImplicitModule));
+  Consumers.push_back(CI.getPCHContainerWriter().CreatePCHContainerGenerator(
+      CI, InFile, OutputFile, OS, Buffer));
   return llvm::make_unique<MultiplexConsumer>(std::move(Consumers));
 }
 
@@ -268,14 +268,26 @@ collectModuleHeaderIncludes(const LangOptions &LangOpts, FileManager &FileMgr,
 
 bool GenerateModuleAction::BeginSourceFileAction(CompilerInstance &CI, 
                                                  StringRef Filename) {
-  // Find the module map file.  
-  const FileEntry *ModuleMap = CI.getFileManager().getFile(Filename);
+  // Find the module map file.
+  const FileEntry *ModuleMap =
+      CI.getFileManager().getFile(Filename, /*openFile*/true);
   if (!ModuleMap)  {
     CI.getDiagnostics().Report(diag::err_module_map_not_found)
       << Filename;
     return false;
   }
   
+  // Set up embedding for any specified files. Do this before we load any
+  // source files, including the primary module map for the compilation.
+  for (const auto &F : CI.getFrontendOpts().ModulesEmbedFiles) {
+    if (const auto *FE = CI.getFileManager().getFile(F, /*openFile*/true))
+      CI.getSourceManager().setFileIsTransient(FE);
+    else
+      CI.getDiagnostics().Report(diag::err_modules_embed_file_not_found) << F;
+  }
+  if (CI.getFrontendOpts().ModulesEmbedAllFiles)
+    CI.getSourceManager().setAllFilesAreTransient(true);
+
   // Parse the module map file.
   HeaderSearch &HS = CI.getPreprocessor().getHeaderSearchInfo();
   if (HS.loadModuleMapFile(ModuleMap, IsSystem))
@@ -416,6 +428,7 @@ void VerifyPCHAction::ExecuteAction() {
   const std::string &Sysroot = CI.getHeaderSearchOpts().Sysroot;
   std::unique_ptr<ASTReader> Reader(new ASTReader(
       CI.getPreprocessor(), CI.getASTContext(), CI.getPCHContainerReader(),
+      CI.getFrontendOpts().ModuleFileExtensions,
       Sysroot.empty() ? "" : Sysroot.c_str(),
       /*DisableValidation*/ false,
       /*AllowPCHWithCompilerErrors*/ false,
@@ -559,6 +572,20 @@ namespace {
       }
       return false;
     }
+
+    /// Indicates that a particular module file extension has been read.
+    void readModuleFileExtension(
+           const ModuleFileExtensionMetadata &Metadata) override {
+      Out.indent(2) << "Module file extension '"
+                    << Metadata.BlockName << "' " << Metadata.MajorVersion
+                    << "." << Metadata.MinorVersion;
+      if (!Metadata.UserInfo.empty()) {
+        Out << ": ";
+        Out.write_escaped(Metadata.UserInfo);
+      }
+
+      Out << "\n";
+    }
 #undef DUMP_BOOLEAN
   };
 }
@@ -578,7 +605,8 @@ void DumpModuleInfoAction::ExecuteAction() {
   DumpModuleInfoListener Listener(Out);
   ASTReader::readASTFileControlBlock(
       getCurrentFile(), getCompilerInstance().getFileManager(),
-      getCompilerInstance().getPCHContainerReader(), Listener);
+      getCompilerInstance().getPCHContainerReader(),
+      /*FindModuleFileExtensions=*/true, Listener);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Frontend/HeaderIncludeGen.cpp b/lib/Frontend/HeaderIncludeGen.cpp
index 5732e5b3fb73..0bc1169ba0a9 100644
--- a/lib/Frontend/HeaderIncludeGen.cpp
+++ b/lib/Frontend/HeaderIncludeGen.cpp
@@ -46,7 +46,36 @@ public:
 };
 }
 
-void clang::AttachHeaderIncludeGen(Preprocessor &PP, bool ShowAllHeaders,
+static void PrintHeaderInfo(raw_ostream *OutputFile, const char* Filename,
+                            bool ShowDepth, unsigned CurrentIncludeDepth,
+                            bool MSStyle) {
+    // Write to a temporary string to avoid unnecessary flushing on errs().
+    SmallString<512> Pathname(Filename);
+    if (!MSStyle)
+      Lexer::Stringify(Pathname);
+
+    SmallString<256> Msg;
+    if (MSStyle)
+      Msg += "Note: including file:";
+
+    if (ShowDepth) {
+      // The main source file is at depth 1, so skip one dot.
+      for (unsigned i = 1; i != CurrentIncludeDepth; ++i)
+        Msg += MSStyle ? ' ' : '.';
+
+      if (!MSStyle)
+        Msg += ' ';
+    }
+    Msg += Pathname;
+    Msg += '\n';
+
+    OutputFile->write(Msg.data(), Msg.size());
+    OutputFile->flush();
+}
+
+void clang::AttachHeaderIncludeGen(Preprocessor &PP,
+                                   const std::vector<std::string> &ExtraHeaders,
+                                   bool ShowAllHeaders,
                                    StringRef OutputPath, bool ShowDepth,
                                    bool MSStyle) {
   raw_ostream *OutputFile = MSStyle ? &llvm::outs() : &llvm::errs();
@@ -63,12 +92,19 @@ void clang::AttachHeaderIncludeGen(Preprocessor &PP, bool ShowAllHeaders,
       delete OS;
     } else {
       OS->SetUnbuffered();
-      OS->SetUseAtomicWrites(true);
       OutputFile = OS;
       OwnsOutputFile = true;
     }
   }
 
+  // Print header info for extra headers, pretending they were discovered
+  // by the regular preprocessor. The primary use case is to support
+  // proper generation of Make / Ninja file dependencies for implicit includes,
+  // such as sanitizer blacklists. It's only important for cl.exe
+  // compatibility, the GNU way to generate rules is -M / -MM / -MD / -MMD.
+  for (auto Header : ExtraHeaders) {
+    PrintHeaderInfo(OutputFile, Header.c_str(), ShowDepth, 2, MSStyle);
+  }
   PP.addPPCallbacks(llvm::make_unique<HeaderIncludesCallback>(&PP,
                                                               ShowAllHeaders,
                                                               OutputFile,
@@ -112,27 +148,7 @@ void HeaderIncludesCallback::FileChanged(SourceLocation Loc,
   // Dump the header include information we are past the predefines buffer or
   // are showing all headers.
   if (ShowHeader && Reason == PPCallbacks::EnterFile) {
-    // Write to a temporary string to avoid unnecessary flushing on errs().
-    SmallString<512> Filename(UserLoc.getFilename());
-    if (!MSStyle)
-      Lexer::Stringify(Filename);
-
-    SmallString<256> Msg;
-    if (MSStyle)
-      Msg += "Note: including file:";
-
-    if (ShowDepth) {
-      // The main source file is at depth 1, so skip one dot.
-      for (unsigned i = 1; i != CurrentIncludeDepth; ++i)
-        Msg += MSStyle ? ' ' : '.';
-
-      if (!MSStyle)
-        Msg += ' ';
-    }
-    Msg += Filename;
-    Msg += '\n';
-
-    OutputFile->write(Msg.data(), Msg.size());
-    OutputFile->flush();
+    PrintHeaderInfo(OutputFile, UserLoc.getFilename(),
+                    ShowDepth, CurrentIncludeDepth, MSStyle);
   }
 }
diff --git a/lib/Frontend/InitHeaderSearch.cpp b/lib/Frontend/InitHeaderSearch.cpp
index e3a17c922fb8..26bab0db5347 100644
--- a/lib/Frontend/InitHeaderSearch.cpp
+++ b/lib/Frontend/InitHeaderSearch.cpp
@@ -15,6 +15,7 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Config/config.h" // C_INCLUDE_DIRS
+#include "clang/Lex/HeaderMap.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -215,6 +216,8 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
     case llvm::Triple::OpenBSD:
     case llvm::Triple::Bitrig:
     case llvm::Triple::NaCl:
+    case llvm::Triple::PS4:
+    case llvm::Triple::ELFIAMCU:
       break;
     case llvm::Triple::Win32:
       if (triple.getEnvironment() != llvm::Triple::Cygnus)
@@ -246,10 +249,8 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
   if (CIncludeDirs != "") {
     SmallVector<StringRef, 5> dirs;
     CIncludeDirs.split(dirs, ":");
-    for (SmallVectorImpl<StringRef>::iterator i = dirs.begin();
-         i != dirs.end();
-         ++i)
-      AddPath(*i, ExternCSystem, false);
+    for (StringRef dir : dirs)
+      AddPath(dir, ExternCSystem, false);
     return;
   }
 
@@ -319,7 +320,30 @@ void InitHeaderSearch::AddDefaultCIncludePaths(const llvm::Triple &triple,
   case llvm::Triple::CloudABI:
   case llvm::Triple::RTEMS:
   case llvm::Triple::NaCl:
+  case llvm::Triple::ELFIAMCU:
     break;
+  case llvm::Triple::PS4: {
+    // <isysroot> gets prepended later in AddPath().
+    std::string BaseSDKPath = "";
+    if (!HasSysroot) {
+      const char *envValue = getenv("SCE_PS4_SDK_DIR");
+      if (envValue)
+        BaseSDKPath = envValue;
+      else {
+        // HSOpts.ResourceDir variable contains the location of Clang's
+        // resource files.
+        // Assuming that Clang is configured for PS4 without
+        // --with-clang-resource-dir option, the location of Clang's resource
+        // files is <SDK_DIR>/host_tools/lib/clang
+        SmallString<128> P = StringRef(HSOpts.ResourceDir);
+        llvm::sys::path::append(P, "../../..");
+        BaseSDKPath = P.str();
+      }
+    }
+    AddPath(BaseSDKPath + "/target/include", System, false);
+    if (triple.isPS4CPU())
+      AddPath(BaseSDKPath + "/target/include_common", System, false);
+  }
   default:
     AddPath("/usr/include", ExternCSystem, false);
     break;
@@ -387,10 +411,7 @@ AddDefaultCPlusPlusIncludePaths(const llvm::Triple &triple, const HeaderSearchOp
     }
     break;
   case llvm::Triple::DragonFly:
-    if (llvm::sys::fs::exists("/usr/lib/gcc47"))
-      AddPath("/usr/include/c++/4.7", CXXSystem, false);
-    else
-      AddPath("/usr/include/c++/4.4", CXXSystem, false);
+    AddPath("/usr/include/c++/5.0", CXXSystem, false);
     break;
   case llvm::Triple::OpenBSD: {
     std::string t = triple.getTriple();
@@ -404,10 +425,6 @@ AddDefaultCPlusPlusIncludePaths(const llvm::Triple &triple, const HeaderSearchOp
     AddGnuCPlusPlusIncludePaths("/usr/gnu/include/c++/4.4.3",
                                 "", "", "", triple);
     break;
-  case llvm::Triple::Solaris:
-    AddGnuCPlusPlusIncludePaths("/usr/gcc/4.5/include/c++/4.5.2/",
-                                "i386-pc-solaris2.11", "", "", triple);
-    break;
   default:
     break;
   }
@@ -453,11 +470,6 @@ void InitHeaderSearch::AddDefaultIncludePaths(const LangOptions &Lang,
           AddUnmappedPath(P, CXXSystem, false);
         }
       }
-      // On Solaris, include the support directory for things like xlocale and
-      // fudged system headers.
-      if (triple.getOS() == llvm::Triple::Solaris) 
-        AddPath("/usr/include/c++/v1/support/solaris", CXXSystem, false);
-      
       AddPath("/usr/include/c++/v1", CXXSystem, false);
     } else {
       AddDefaultCPlusPlusIncludePaths(triple, HSOpts);
@@ -568,39 +580,33 @@ void InitHeaderSearch::Realize(const LangOptions &Lang) {
   SearchList.reserve(IncludePath.size());
 
   // Quoted arguments go first.
-  for (path_iterator it = IncludePath.begin(), ie = IncludePath.end();
-       it != ie; ++it) {
-    if (it->first == Quoted)
-      SearchList.push_back(it->second);
-  }
+  for (auto &Include : IncludePath)
+    if (Include.first == Quoted)
+      SearchList.push_back(Include.second);
+
   // Deduplicate and remember index.
   RemoveDuplicates(SearchList, 0, Verbose);
   unsigned NumQuoted = SearchList.size();
 
-  for (path_iterator it = IncludePath.begin(), ie = IncludePath.end();
-       it != ie; ++it) {
-    if (it->first == Angled || it->first == IndexHeaderMap)
-      SearchList.push_back(it->second);
-  }
+  for (auto &Include : IncludePath)
+    if (Include.first == Angled || Include.first == IndexHeaderMap)
+      SearchList.push_back(Include.second);
 
   RemoveDuplicates(SearchList, NumQuoted, Verbose);
   unsigned NumAngled = SearchList.size();
 
-  for (path_iterator it = IncludePath.begin(), ie = IncludePath.end();
-       it != ie; ++it) {
-    if (it->first == System || it->first == ExternCSystem ||
-        (!Lang.ObjC1 && !Lang.CPlusPlus && it->first == CSystem)    ||
-        (/*FIXME !Lang.ObjC1 && */Lang.CPlusPlus  && it->first == CXXSystem)  ||
-        (Lang.ObjC1  && !Lang.CPlusPlus && it->first == ObjCSystem) ||
-        (Lang.ObjC1  && Lang.CPlusPlus  && it->first == ObjCXXSystem))
-      SearchList.push_back(it->second);
-  }
+  for (auto &Include : IncludePath)
+    if (Include.first == System || Include.first == ExternCSystem ||
+        (!Lang.ObjC1 && !Lang.CPlusPlus && Include.first == CSystem) ||
+        (/*FIXME !Lang.ObjC1 && */ Lang.CPlusPlus &&
+         Include.first == CXXSystem) ||
+        (Lang.ObjC1 && !Lang.CPlusPlus && Include.first == ObjCSystem) ||
+        (Lang.ObjC1 && Lang.CPlusPlus && Include.first == ObjCXXSystem))
+      SearchList.push_back(Include.second);
 
-  for (path_iterator it = IncludePath.begin(), ie = IncludePath.end();
-       it != ie; ++it) {
-    if (it->first == After)
-      SearchList.push_back(it->second);
-  }
+  for (auto &Include : IncludePath)
+    if (Include.first == After)
+      SearchList.push_back(Include.second);
 
   // Remove duplicates across both the Angled and System directories.  GCC does
   // this and failing to remove duplicates across these two groups breaks
diff --git a/lib/Frontend/InitPreprocessor.cpp b/lib/Frontend/InitPreprocessor.cpp
index 0791494f7919..15aa54607ced 100644
--- a/lib/Frontend/InitPreprocessor.cpp
+++ b/lib/Frontend/InitPreprocessor.cpp
@@ -20,6 +20,7 @@
 #include "clang/Frontend/FrontendDiagnostic.h"
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Lex/PreprocessorOptions.h"
 #include "clang/Serialization/ASTReader.h"
@@ -323,15 +324,17 @@ static void AddObjCXXARCLibstdcxxDefines(const LangOptions &LangOpts,
     
     Out << "template<typename _Tp> struct __is_scalar;\n"
         << "\n";
+
+    if (LangOpts.ObjCAutoRefCount) {
+      Out << "template<typename _Tp>\n"
+          << "struct __is_scalar<__attribute__((objc_ownership(strong))) _Tp> {\n"
+          << "  enum { __value = 0 };\n"
+          << "  typedef __false_type __type;\n"
+          << "};\n"
+          << "\n";
+    }
       
-    Out << "template<typename _Tp>\n"
-        << "struct __is_scalar<__attribute__((objc_ownership(strong))) _Tp> {\n"
-        << "  enum { __value = 0 };\n"
-        << "  typedef __false_type __type;\n"
-        << "};\n"
-        << "\n";
-      
-    if (LangOpts.ObjCARCWeak) {
+    if (LangOpts.ObjCWeak) {
       Out << "template<typename _Tp>\n"
           << "struct __is_scalar<__attribute__((objc_ownership(weak))) _Tp> {\n"
           << "  enum { __value = 0 };\n"
@@ -340,13 +343,15 @@ static void AddObjCXXARCLibstdcxxDefines(const LangOptions &LangOpts,
           << "\n";
     }
     
-    Out << "template<typename _Tp>\n"
-        << "struct __is_scalar<__attribute__((objc_ownership(autoreleasing)))"
-        << " _Tp> {\n"
-        << "  enum { __value = 0 };\n"
-        << "  typedef __false_type __type;\n"
-        << "};\n"
-        << "\n";
+    if (LangOpts.ObjCAutoRefCount) {
+      Out << "template<typename _Tp>\n"
+          << "struct __is_scalar<__attribute__((objc_ownership(autoreleasing)))"
+          << " _Tp> {\n"
+          << "  enum { __value = 0 };\n"
+          << "  typedef __false_type __type;\n"
+          << "};\n"
+          << "\n";
+    }
       
     Out << "}\n";
   }
@@ -406,6 +411,8 @@ static void InitializeStandardPredefinedMacros(const TargetInfo &TI,
   // Not "standard" per se, but available even with the -undef flag.
   if (LangOpts.AsmPreprocessor)
     Builder.defineMacro("__ASSEMBLER__");
+  if (LangOpts.CUDA)
+    Builder.defineMacro("__CUDA__");
 }
 
 /// Initialize the predefined C++ language feature test macros defined in
@@ -456,6 +463,8 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts,
     Builder.defineMacro("__cpp_sized_deallocation", "201309");
   if (LangOpts.ConceptsTS)
     Builder.defineMacro("__cpp_experimental_concepts", "1");
+  if (LangOpts.Coroutines)
+    Builder.defineMacro("__cpp_coroutines", "1");
 }
 
 static void InitializePredefinedMacros(const TargetInfo &TI,
@@ -849,9 +858,6 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   else if (LangOpts.getStackProtector() == LangOptions::SSPReq)
     Builder.defineMacro("__SSP_ALL__", "3");
 
-  if (FEOpts.ProgramAction == frontend::RewriteObjC)
-    Builder.defineMacro("__weak", "__attribute__((objc_gc(weak)))");
-
   // Define a macro that exists only when using the static analyzer.
   if (FEOpts.ProgramAction == frontend::RunAnalysis)
     Builder.defineMacro("__clang_analyzer__");
@@ -859,7 +865,13 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
   if (LangOpts.FastRelaxedMath)
     Builder.defineMacro("__FAST_RELAXED_MATH__");
 
-  if (LangOpts.ObjCAutoRefCount) {
+  if (FEOpts.ProgramAction == frontend::RewriteObjC ||
+      LangOpts.getGC() != LangOptions::NonGC) {
+    Builder.defineMacro("__weak", "__attribute__((objc_gc(weak)))");
+    Builder.defineMacro("__strong", "__attribute__((objc_gc(strong)))");
+    Builder.defineMacro("__autoreleasing", "");
+    Builder.defineMacro("__unsafe_unretained", "");
+  } else if (LangOpts.ObjC1) {
     Builder.defineMacro("__weak", "__attribute__((objc_ownership(weak)))");
     Builder.defineMacro("__strong", "__attribute__((objc_ownership(strong)))");
     Builder.defineMacro("__autoreleasing",
@@ -918,14 +930,19 @@ void clang::InitializePreprocessor(
 
   // Install things like __POWERPC__, __GNUC__, etc into the macro table.
   if (InitOpts.UsePredefines) {
+    if (LangOpts.CUDA && PP.getAuxTargetInfo())
+      InitializePredefinedMacros(*PP.getAuxTargetInfo(), LangOpts, FEOpts,
+                                 Builder);
+
     InitializePredefinedMacros(PP.getTargetInfo(), LangOpts, FEOpts, Builder);
 
     // Install definitions to make Objective-C++ ARC work well with various
     // C++ Standard Library implementations.
-    if (LangOpts.ObjC1 && LangOpts.CPlusPlus && LangOpts.ObjCAutoRefCount) {
+    if (LangOpts.ObjC1 && LangOpts.CPlusPlus &&
+        (LangOpts.ObjCAutoRefCount || LangOpts.ObjCWeak)) {
       switch (InitOpts.ObjCXXARCStandardLibrary) {
       case ARCXX_nolib:
-        case ARCXX_libcxx:
+      case ARCXX_libcxx:
         break;
 
       case ARCXX_libstdcxx:
diff --git a/lib/Frontend/LogDiagnosticPrinter.cpp b/lib/Frontend/LogDiagnosticPrinter.cpp
index c6a18e0d80d2..9998f65457cf 100644
--- a/lib/Frontend/LogDiagnosticPrinter.cpp
+++ b/lib/Frontend/LogDiagnosticPrinter.cpp
@@ -118,7 +118,7 @@ void LogDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level,
   if (MainFilename.empty() && Info.hasSourceManager()) {
     const SourceManager &SM = Info.getSourceManager();
     FileID FID = SM.getMainFileID();
-    if (!FID.isInvalid()) {
+    if (FID.isValid()) {
       const FileEntry *FE = SM.getFileEntryForID(FID);
       if (FE && FE->isValid())
         MainFilename = FE->getName();
@@ -147,7 +147,7 @@ void LogDiagnosticPrinter::HandleDiagnostic(DiagnosticsEngine::Level Level,
     if (PLoc.isInvalid()) {
       // At least print the file name if available:
       FileID FID = SM.getFileID(Info.getLocation());
-      if (!FID.isInvalid()) {
+      if (FID.isValid()) {
         const FileEntry *FE = SM.getFileEntryForID(FID);
         if (FE && FE->isValid())
           DE.Filename = FE->getName();
diff --git a/lib/Frontend/ModuleDependencyCollector.cpp b/lib/Frontend/ModuleDependencyCollector.cpp
index 67852dc02036..9768a164acbc 100644
--- a/lib/Frontend/ModuleDependencyCollector.cpp
+++ b/lib/Frontend/ModuleDependencyCollector.cpp
@@ -32,8 +32,8 @@ public:
       : Collector(Collector) {}
   bool needsInputFileVisitation() override { return true; }
   bool needsSystemInputFileVisitation() override { return true; }
-  bool visitInputFile(StringRef Filename, bool IsSystem,
-                      bool IsOverridden) override;
+  bool visitInputFile(StringRef Filename, bool IsSystem, bool IsOverridden,
+                      bool IsExplicitModule) override;
 };
 }
 
@@ -67,7 +67,7 @@ std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) {
   path::native(AbsoluteSrc);
   // TODO: We probably need to handle .. as well as . in order to have valid
   // input to the YAMLVFSWriter.
-  FileManager::removeDotPaths(AbsoluteSrc);
+  path::remove_dots(AbsoluteSrc);
 
   // Build the destination path.
   SmallString<256> Dest = Collector.getDest();
@@ -85,7 +85,8 @@ std::error_code ModuleDependencyListener::copyToRoot(StringRef Src) {
 }
 
 bool ModuleDependencyListener::visitInputFile(StringRef Filename, bool IsSystem,
-                                              bool IsOverridden) {
+                                              bool IsOverridden,
+                                              bool IsExplicitModule) {
   if (Collector.insertSeen(Filename))
     if (copyToRoot(Filename))
       Collector.setHasErrors();
diff --git a/lib/Frontend/MultiplexConsumer.cpp b/lib/Frontend/MultiplexConsumer.cpp
index 91ee100f6394..12c85240bd75 100644
--- a/lib/Frontend/MultiplexConsumer.cpp
+++ b/lib/Frontend/MultiplexConsumer.cpp
@@ -122,9 +122,6 @@ public:
   void AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
                                     const ObjCInterfaceDecl *IFD) override;
   void FunctionDefinitionInstantiated(const FunctionDecl *D) override;
-  void AddedObjCPropertyInClassExtension(const ObjCPropertyDecl *Prop,
-                                    const ObjCPropertyDecl *OrigProp,
-                                    const ObjCCategoryDecl *ClassExt) override;
   void DeclarationMarkedUsed(const Decl *D) override;
   void DeclarationMarkedOpenMPThreadPrivate(const Decl *D) override;
   void RedefinedHiddenDefinition(const NamedDecl *D, Module *M) override;
@@ -207,13 +204,6 @@ void MultiplexASTMutationListener::FunctionDefinitionInstantiated(
   for (auto &Listener : Listeners)
     Listener->FunctionDefinitionInstantiated(D);
 }
-void MultiplexASTMutationListener::AddedObjCPropertyInClassExtension(
-                                             const ObjCPropertyDecl *Prop,
-                                             const ObjCPropertyDecl *OrigProp,
-                                             const ObjCCategoryDecl *ClassExt) {
-  for (size_t i = 0, e = Listeners.size(); i != e; ++i)
-    Listeners[i]->AddedObjCPropertyInClassExtension(Prop, OrigProp, ClassExt);
-}
 void MultiplexASTMutationListener::DeclarationMarkedUsed(const Decl *D) {
   for (size_t i = 0, e = Listeners.size(); i != e; ++i)
     Listeners[i]->DeclarationMarkedUsed(D);
diff --git a/lib/Frontend/PCHContainerOperations.cpp b/lib/Frontend/PCHContainerOperations.cpp
index cde3ba139bcf..5e1d77205098 100644
--- a/lib/Frontend/PCHContainerOperations.cpp
+++ b/lib/Frontend/PCHContainerOperations.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/raw_ostream.h"
 #include "clang/Lex/ModuleLoader.h"
+
 using namespace clang;
 
 namespace {
@@ -26,17 +27,11 @@ class RawPCHContainerGenerator : public ASTConsumer {
   raw_pwrite_stream *OS;
 
 public:
-  RawPCHContainerGenerator(DiagnosticsEngine &Diags,
-                           const HeaderSearchOptions &HSO,
-                           const PreprocessorOptions &PPO,
-                           const TargetOptions &TO, const LangOptions &LO,
-                           const std::string &MainFileName,
-                           const std::string &OutputFileName,
-                           llvm::raw_pwrite_stream *OS,
+  RawPCHContainerGenerator(llvm::raw_pwrite_stream *OS,
                            std::shared_ptr<PCHBuffer> Buffer)
       : Buffer(Buffer), OS(OS) {}
 
-  virtual ~RawPCHContainerGenerator() {}
+  ~RawPCHContainerGenerator() override = default;
 
   void HandleTranslationUnit(ASTContext &Ctx) override {
     if (Buffer->IsComplete) {
@@ -49,16 +44,14 @@ public:
     Buffer->Data = std::move(Empty);
   }
 };
-}
+
+} // anonymous namespace
 
 std::unique_ptr<ASTConsumer> RawPCHContainerWriter::CreatePCHContainerGenerator(
-    DiagnosticsEngine &Diags, const HeaderSearchOptions &HSO,
-    const PreprocessorOptions &PPO, const TargetOptions &TO,
-    const LangOptions &LO, const std::string &MainFileName,
+    CompilerInstance &CI, const std::string &MainFileName,
     const std::string &OutputFileName, llvm::raw_pwrite_stream *OS,
     std::shared_ptr<PCHBuffer> Buffer) const {
-  return llvm::make_unique<RawPCHContainerGenerator>(
-      Diags, HSO, PPO, TO, LO, MainFileName, OutputFileName, OS, Buffer);
+  return llvm::make_unique<RawPCHContainerGenerator>(OS, Buffer);
 }
 
 void RawPCHContainerReader::ExtractPCH(
diff --git a/lib/Frontend/Rewrite/FrontendActions.cpp b/lib/Frontend/Rewrite/FrontendActions.cpp
index dbc661b71905..8cf8adf37ed6 100644
--- a/lib/Frontend/Rewrite/FrontendActions.cpp
+++ b/lib/Frontend/Rewrite/FrontendActions.cpp
@@ -78,7 +78,7 @@ public:
   std::string RewriteFilename(const std::string &Filename, int &fd) override {
     SmallString<128> Path;
     llvm::sys::fs::createTemporaryFile(llvm::sys::path::filename(Filename),
-                                       llvm::sys::path::extension(Filename), fd,
+                                       llvm::sys::path::extension(Filename).drop_front(), fd,
                                        Path);
     return Path.str();
   }
diff --git a/lib/Frontend/Rewrite/InclusionRewriter.cpp b/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 08d6cf1f92c2..ca8226251fd9 100644
--- a/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -160,7 +160,7 @@ void InclusionRewriter::FileChanged(SourceLocation Loc,
 void InclusionRewriter::FileSkipped(const FileEntry &/*SkippedFile*/,
                                     const Token &/*FilenameTok*/,
                                     SrcMgr::CharacteristicKind /*FileType*/) {
-  assert(!LastInclusionLocation.isInvalid() &&
+  assert(LastInclusionLocation.isValid() &&
          "A file, that wasn't found via an inclusion directive, was skipped");
   LastInclusionLocation = SourceLocation();
 }
@@ -389,9 +389,10 @@ bool InclusionRewriter::HandleHasInclude(
   SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 1>
       Includers;
   Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
+  // FIXME: Why don't we call PP.LookupFile here?
   const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
       Filename, SourceLocation(), isAngled, nullptr, CurDir, Includers, nullptr,
-      nullptr, nullptr, false);
+      nullptr, nullptr, nullptr, false);
 
   FileExists = File != nullptr;
   return true;
diff --git a/lib/Frontend/Rewrite/RewriteModernObjC.cpp b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
index 2902ba78c4ef..be68d42affa1 100644
--- a/lib/Frontend/Rewrite/RewriteModernObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteModernObjC.cpp
@@ -412,7 +412,7 @@ namespace {
     // Misc. AST transformation routines. Sometimes they end up calling
     // rewriting routines on the new ASTs.
     CallExpr *SynthesizeCallToFunctionDecl(FunctionDecl *FD,
-                                           Expr **args, unsigned nargs,
+                                           ArrayRef<Expr *> Args,
                                            SourceLocation StartLoc=SourceLocation(),
                                            SourceLocation EndLoc=SourceLocation());
     
@@ -2105,15 +2105,17 @@ Stmt *RewriteModernObjC::RewriteAtSelector(ObjCSelectorExpr *Exp) {
   SmallVector<Expr*, 8> SelExprs;
   SelExprs.push_back(getStringLiteral(Exp->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                 &SelExprs[0], SelExprs.size());
+                                                  SelExprs);
   ReplaceStmt(Exp, SelExp);
   // delete Exp; leak for now, see RewritePropertyOrImplicitSetter() usage for more info.
   return SelExp;
 }
 
-CallExpr *RewriteModernObjC::SynthesizeCallToFunctionDecl(
-  FunctionDecl *FD, Expr **args, unsigned nargs, SourceLocation StartLoc,
-                                                    SourceLocation EndLoc) {
+CallExpr *
+RewriteModernObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD,
+                                                ArrayRef<Expr *> Args,
+                                                SourceLocation StartLoc,
+                                                SourceLocation EndLoc) {
   // Get the type, we will need to reference it in a couple spots.
   QualType msgSendType = FD->getType();
 
@@ -2129,10 +2131,9 @@ CallExpr *RewriteModernObjC::SynthesizeCallToFunctionDecl(
 
   const FunctionType *FT = msgSendType->getAs<FunctionType>();
 
-  CallExpr *Exp =  
-    new (Context) CallExpr(*Context, ICE, llvm::makeArrayRef(args, nargs),
-                           FT->getCallResultType(*Context),
-                           VK_RValue, EndLoc);
+  CallExpr *Exp =  new (Context) CallExpr(*Context, ICE, Args,
+                                          FT->getCallResultType(*Context),
+                                          VK_RValue, EndLoc);
   return Exp;
 }
 
@@ -2660,9 +2661,7 @@ Stmt *RewriteModernObjC::RewriteObjCBoxedExpr(ObjCBoxedExpr *Exp) {
   
   IdentifierInfo *clsName = BoxingClass->getIdentifier();
   ClsExprs.push_back(getStringLiteral(clsName->getName()));
-  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                               &ClsExprs[0],
-                                               ClsExprs.size(), 
+  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                StartLoc, EndLoc);
   MsgExprs.push_back(Cls);
   
@@ -2672,8 +2671,7 @@ Stmt *RewriteModernObjC::RewriteObjCBoxedExpr(ObjCBoxedExpr *Exp) {
   SelExprs.push_back(
       getStringLiteral(BoxingMethod->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                  &SelExprs[0], SelExprs.size(),
-                                                  StartLoc, EndLoc);
+                                                  SelExprs, StartLoc, EndLoc);
   MsgExprs.push_back(SelExp);
   
   // User provided sub-expression is the 3rd, and last, argument.
@@ -2788,9 +2786,7 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) {
   
   IdentifierInfo *clsName = Class->getIdentifier();
   ClsExprs.push_back(getStringLiteral(clsName->getName()));
-  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                               &ClsExprs[0],
-                                               ClsExprs.size(), 
+  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                StartLoc, EndLoc);
   MsgExprs.push_back(Cls);
   
@@ -2801,8 +2797,7 @@ Stmt *RewriteModernObjC::RewriteObjCArrayLiteralExpr(ObjCArrayLiteral *Exp) {
   SelExprs.push_back(
       getStringLiteral(ArrayMethod->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                  &SelExprs[0], SelExprs.size(),
-                                                  StartLoc, EndLoc);
+                                                  SelExprs, StartLoc, EndLoc);
   MsgExprs.push_back(SelExp);
   
   // (const id [])objects
@@ -2939,9 +2934,7 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
   
   IdentifierInfo *clsName = Class->getIdentifier();
   ClsExprs.push_back(getStringLiteral(clsName->getName()));
-  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                               &ClsExprs[0],
-                                               ClsExprs.size(), 
+  CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                StartLoc, EndLoc);
   MsgExprs.push_back(Cls);
   
@@ -2951,8 +2944,7 @@ Stmt *RewriteModernObjC::RewriteObjCDictionaryLiteralExpr(ObjCDictionaryLiteral
   ObjCMethodDecl *DictMethod = Exp->getDictWithObjectsMethod();
   SelExprs.push_back(getStringLiteral(DictMethod->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                  &SelExprs[0], SelExprs.size(),
-                                                  StartLoc, EndLoc);
+                                                  SelExprs, StartLoc, EndLoc);
   MsgExprs.push_back(SelExp);
   
   // (const id [])objects
@@ -3298,14 +3290,10 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
     ClsExprs.push_back(getStringLiteral(ClassDecl->getIdentifier()->getName()));
     // (Class)objc_getClass("CurrentClass")
     CallExpr *Cls = SynthesizeCallToFunctionDecl(GetMetaClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(),
-                                                 StartLoc,
-                                                 EndLoc);
+                                                 ClsExprs, StartLoc, EndLoc);
     ClsExprs.clear();
     ClsExprs.push_back(Cls);
-    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl,
-                                       &ClsExprs[0], ClsExprs.size(),
+    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, ClsExprs,
                                        StartLoc, EndLoc);
     
     // (id)class_getSuperclass((Class)objc_getClass("CurrentClass"))
@@ -3366,9 +3354,7 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
       = Exp->getClassReceiver()->getAs<ObjCObjectType>()->getInterface();
     IdentifierInfo *clsName = Class->getIdentifier();
     ClsExprs.push_back(getStringLiteral(clsName->getName()));
-    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(), 
+    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                  StartLoc, EndLoc);
     CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context,
                                                  Context->getObjCIdType(),
@@ -3398,14 +3384,11 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
     SmallVector<Expr*, 8> ClsExprs;
     ClsExprs.push_back(getStringLiteral(ClassDecl->getIdentifier()->getName()));
     // (Class)objc_getClass("CurrentClass")
-    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(), 
+    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                  StartLoc, EndLoc);
     ClsExprs.clear();
     ClsExprs.push_back(Cls);
-    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl,
-                                       &ClsExprs[0], ClsExprs.size(),
+    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, ClsExprs,
                                        StartLoc, EndLoc);
     
     // (id)class_getSuperclass((Class)objc_getClass("CurrentClass"))
@@ -3476,9 +3459,7 @@ Stmt *RewriteModernObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
   SmallVector<Expr*, 8> SelExprs;
   SelExprs.push_back(getStringLiteral(Exp->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                 &SelExprs[0], SelExprs.size(),
-                                                  StartLoc,
-                                                  EndLoc);
+                                                  SelExprs, StartLoc, EndLoc);
   MsgExprs.push_back(SelExp);
 
   // Now push any user supplied arguments.
@@ -4862,7 +4843,7 @@ void RewriteModernObjC::RewriteImplicitCastObjCExpr(CastExpr *IC) {
   std::string Str = "(";
   Str += TypeString;
   Str += ")";
-  InsertText(IC->getSubExpr()->getLocStart(), &Str[0], Str.size());
+  InsertText(IC->getSubExpr()->getLocStart(), Str);
 
   return;
 }
@@ -5641,7 +5622,7 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
 
     // FIXME: Missing definition of
     // InsertText(clang::SourceLocation, char const*, unsigned int).
-    // InsertText(startLoc, messString.c_str(), messString.size());
+    // InsertText(startLoc, messString);
     // Tried this, but it didn't work either...
     // ReplaceText(startLoc, 0, messString.c_str(), messString.size());
 #endif
@@ -5767,7 +5748,7 @@ Stmt *RewriteModernObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
     const std::string &Str = Buf.str();
 
     printf("CAST = %s\n", &Str[0]);
-    InsertText(ICE->getSubExpr()->getLocStart(), &Str[0], Str.size());
+    InsertText(ICE->getSubExpr()->getLocStart(), Str);
     delete S;
     return Replacement;
   }
diff --git a/lib/Frontend/Rewrite/RewriteObjC.cpp b/lib/Frontend/Rewrite/RewriteObjC.cpp
index 204820b3041a..e0ddadb12306 100644
--- a/lib/Frontend/Rewrite/RewriteObjC.cpp
+++ b/lib/Frontend/Rewrite/RewriteObjC.cpp
@@ -346,7 +346,7 @@ namespace {
     // Misc. AST transformation routines. Sometimes they end up calling
     // rewriting routines on the new ASTs.
     CallExpr *SynthesizeCallToFunctionDecl(FunctionDecl *FD,
-                                           Expr **args, unsigned nargs,
+                                           ArrayRef<Expr *> Args,
                                            SourceLocation StartLoc=SourceLocation(),
                                            SourceLocation EndLoc=SourceLocation());
     CallExpr *SynthMsgSendStretCallExpr(FunctionDecl *MsgSendStretFlavor,
@@ -1997,15 +1997,17 @@ Stmt *RewriteObjC::RewriteAtSelector(ObjCSelectorExpr *Exp) {
   SmallVector<Expr*, 8> SelExprs;
   SelExprs.push_back(getStringLiteral(Exp->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                 &SelExprs[0], SelExprs.size());
+                                                  SelExprs);
   ReplaceStmt(Exp, SelExp);
   // delete Exp; leak for now, see RewritePropertyOrImplicitSetter() usage for more info.
   return SelExp;
 }
 
-CallExpr *RewriteObjC::SynthesizeCallToFunctionDecl(
-  FunctionDecl *FD, Expr **args, unsigned nargs, SourceLocation StartLoc,
-                                                    SourceLocation EndLoc) {
+CallExpr *
+RewriteObjC::SynthesizeCallToFunctionDecl(FunctionDecl *FD,
+                                          ArrayRef<Expr *> Args,
+                                          SourceLocation StartLoc,
+                                          SourceLocation EndLoc) {
   // Get the type, we will need to reference it in a couple spots.
   QualType msgSendType = FD->getType();
 
@@ -2021,10 +2023,9 @@ CallExpr *RewriteObjC::SynthesizeCallToFunctionDecl(
 
   const FunctionType *FT = msgSendType->getAs<FunctionType>();
 
-  CallExpr *Exp =  
-    new (Context) CallExpr(*Context, ICE, llvm::makeArrayRef(args, nargs),
-                           FT->getCallResultType(*Context),
-                           VK_RValue, EndLoc);
+  CallExpr *Exp = new (Context) CallExpr(*Context, ICE, Args,
+                                         FT->getCallResultType(*Context),
+                                         VK_RValue, EndLoc);
   return Exp;
 }
 
@@ -2680,20 +2681,15 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
     SmallVector<Expr*, 8> ClsExprs;
     ClsExprs.push_back(getStringLiteral(ClassDecl->getIdentifier()->getName()));
     CallExpr *Cls = SynthesizeCallToFunctionDecl(GetMetaClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(),
-                                                 StartLoc,
-                                                 EndLoc);
+                                                 ClsExprs, StartLoc, EndLoc);
     // (Class)objc_getClass("CurrentClass")
     CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context,
                                              Context->getObjCClassType(),
                                              CK_BitCast, Cls);
     ClsExprs.clear();
     ClsExprs.push_back(ArgExpr);
-    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl,
-                                       &ClsExprs[0], ClsExprs.size(),
+    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, ClsExprs,
                                        StartLoc, EndLoc);
-    
     // (id)class_getSuperclass((Class)objc_getClass("CurrentClass"))
     // To turn off a warning, type-cast to 'id'
     InitExprs.push_back( // set 'super class', using class_getSuperclass().
@@ -2752,9 +2748,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
       = Exp->getClassReceiver()->getAs<ObjCObjectType>()->getInterface();
     IdentifierInfo *clsName = Class->getIdentifier();
     ClsExprs.push_back(getStringLiteral(clsName->getName()));
-    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(), 
+    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                  StartLoc, EndLoc);
     MsgExprs.push_back(Cls);
     break;
@@ -2780,9 +2774,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
     // (id)class_getSuperclass((Class)objc_getClass("CurrentClass"))
     SmallVector<Expr*, 8> ClsExprs;
     ClsExprs.push_back(getStringLiteral(ClassDecl->getIdentifier()->getName()));
-    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl,
-                                                 &ClsExprs[0],
-                                                 ClsExprs.size(), 
+    CallExpr *Cls = SynthesizeCallToFunctionDecl(GetClassFunctionDecl, ClsExprs,
                                                  StartLoc, EndLoc);
     // (Class)objc_getClass("CurrentClass")
     CastExpr *ArgExpr = NoTypeInfoCStyleCastExpr(Context,
@@ -2790,8 +2782,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
                                                  CK_BitCast, Cls);
     ClsExprs.clear();
     ClsExprs.push_back(ArgExpr);
-    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl,
-                                       &ClsExprs[0], ClsExprs.size(),
+    Cls = SynthesizeCallToFunctionDecl(GetSuperClassFunctionDecl, ClsExprs,
                                        StartLoc, EndLoc);
     
     // (id)class_getSuperclass((Class)objc_getClass("CurrentClass"))
@@ -2862,9 +2853,7 @@ Stmt *RewriteObjC::SynthMessageExpr(ObjCMessageExpr *Exp,
   SmallVector<Expr*, 8> SelExprs;
   SelExprs.push_back(getStringLiteral(Exp->getSelector().getAsString()));
   CallExpr *SelExp = SynthesizeCallToFunctionDecl(SelGetUidFunctionDecl,
-                                                 &SelExprs[0], SelExprs.size(),
-                                                  StartLoc,
-                                                  EndLoc);
+                                                  SelExprs, StartLoc, EndLoc);
   MsgExprs.push_back(SelExp);
 
   // Now push any user supplied arguments.
@@ -4675,7 +4664,7 @@ Stmt *RewriteObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
 
     // FIXME: Missing definition of
     // InsertText(clang::SourceLocation, char const*, unsigned int).
-    // InsertText(startLoc, messString.c_str(), messString.size());
+    // InsertText(startLoc, messString);
     // Tried this, but it didn't work either...
     // ReplaceText(startLoc, 0, messString.c_str(), messString.size());
 #endif
@@ -4790,7 +4779,7 @@ Stmt *RewriteObjC::RewriteFunctionBodyOrGlobalInitializer(Stmt *S) {
     const std::string &Str = Buf.str();
 
     printf("CAST = %s\n", &Str[0]);
-    InsertText(ICE->getSubExpr()->getLocStart(), &Str[0], Str.size());
+    InsertText(ICE->getSubExpr()->getLocStart(), Str);
     delete S;
     return Replacement;
   }
diff --git a/lib/Frontend/SerializedDiagnosticPrinter.cpp b/lib/Frontend/SerializedDiagnosticPrinter.cpp
index d31b12e87a4e..1bf10d276945 100644
--- a/lib/Frontend/SerializedDiagnosticPrinter.cpp
+++ b/lib/Frontend/SerializedDiagnosticPrinter.cpp
@@ -51,6 +51,7 @@ public:
  
 typedef SmallVector<uint64_t, 64> RecordData;
 typedef SmallVectorImpl<uint64_t> RecordDataImpl;
+typedef ArrayRef<uint64_t> RecordDataRef;
 
 class SDiagsWriter;
   
@@ -393,13 +394,9 @@ unsigned SDiagsWriter::getEmitFile(const char *FileName){
   
   // Lazily generate the record for the file.
   entry = State->Files.size();
-  RecordData Record;
-  Record.push_back(RECORD_FILENAME);
-  Record.push_back(entry);
-  Record.push_back(0); // For legacy.
-  Record.push_back(0); // For legacy.
   StringRef Name(FileName);
-  Record.push_back(Name.size());
+  RecordData::value_type Record[] = {RECORD_FILENAME, entry, 0 /* For legacy */,
+                                     0 /* For legacy */, Name.size()};
   State->Stream.EmitRecordWithBlob(State->Abbrevs.get(RECORD_FILENAME), Record,
                                    Name);
 
@@ -478,7 +475,7 @@ void SDiagsWriter::EmitBlockInfoBlock() {
   AddSourceLocationAbbrev(Abbrev);
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Category.  
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 10)); // Mapped Diag ID.
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Text size.
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // Text size.
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Diagnostc text.
   Abbrevs.set(RECORD_DIAG, Stream.EmitBlockInfoAbbrev(BLOCK_DIAG, Abbrev));
   
@@ -531,14 +528,11 @@ void SDiagsWriter::EmitBlockInfoBlock() {
 
 void SDiagsWriter::EmitMetaBlock() {
   llvm::BitstreamWriter &Stream = State->Stream;
-  RecordData &Record = State->Record;
   AbbreviationMap &Abbrevs = State->Abbrevs;
 
   Stream.EnterSubblock(BLOCK_META, 3);
-  Record.clear();
-  Record.push_back(RECORD_VERSION);
-  Record.push_back(VersionNumber);
-  Stream.EmitRecordWithAbbrev(Abbrevs.get(RECORD_VERSION), Record);  
+  RecordData::value_type Record[] = {RECORD_VERSION, VersionNumber};
+  Stream.EmitRecordWithAbbrev(Abbrevs.get(RECORD_VERSION), Record);
   Stream.ExitBlock();
 }
 
@@ -548,11 +542,8 @@ unsigned SDiagsWriter::getEmitCategory(unsigned int category) {
 
   // We use a local version of 'Record' so that we can be generating
   // another record when we lazily generate one for the category entry.
-  RecordData Record;
-  Record.push_back(RECORD_CATEGORY);
-  Record.push_back(category);
   StringRef catName = DiagnosticIDs::getCategoryNameFromID(category);
-  Record.push_back(catName.size());
+  RecordData::value_type Record[] = {RECORD_CATEGORY, category, catName.size()};
   State->Stream.EmitRecordWithBlob(State->Abbrevs.get(RECORD_CATEGORY), Record,
                                    catName);
   
@@ -581,10 +572,8 @@ unsigned SDiagsWriter::getEmitDiagnosticFlag(StringRef FlagName) {
     entry.second = FlagName;
     
     // Lazily emit the string in a separate record.
-    RecordData Record;
-    Record.push_back(RECORD_DIAG_FLAG);
-    Record.push_back(entry.first);
-    Record.push_back(FlagName.size());
+    RecordData::value_type Record[] = {RECORD_DIAG_FLAG, entry.first,
+                                       FlagName.size()};
     State->Stream.EmitRecordWithBlob(State->Abbrevs.get(RECORD_DIAG_FLAG),
                                      Record, FlagName);
   }
@@ -844,17 +833,9 @@ std::error_code SDiagsMerger::visitEndOfDiagnostic() {
 std::error_code
 SDiagsMerger::visitSourceRangeRecord(const serialized_diags::Location &Start,
                                      const serialized_diags::Location &End) {
-  RecordData Record;
-  Record.push_back(RECORD_SOURCE_RANGE);
-  Record.push_back(FileLookup[Start.FileID]);
-  Record.push_back(Start.Line);
-  Record.push_back(Start.Col);
-  Record.push_back(Start.Offset);
-  Record.push_back(FileLookup[End.FileID]);
-  Record.push_back(End.Line);
-  Record.push_back(End.Col);
-  Record.push_back(End.Offset);
-
+  RecordData::value_type Record[] = {
+      RECORD_SOURCE_RANGE, FileLookup[Start.FileID], Start.Line, Start.Col,
+      Start.Offset, FileLookup[End.FileID], End.Line, End.Col, End.Offset};
   Writer.State->Stream.EmitRecordWithAbbrev(
       Writer.State->Abbrevs.get(RECORD_SOURCE_RANGE), Record);
   return std::error_code();
@@ -863,19 +844,13 @@ SDiagsMerger::visitSourceRangeRecord(const serialized_diags::Location &Start,
 std::error_code SDiagsMerger::visitDiagnosticRecord(
     unsigned Severity, const serialized_diags::Location &Location,
     unsigned Category, unsigned Flag, StringRef Message) {
-  RecordData MergedRecord;
-  MergedRecord.push_back(RECORD_DIAG);
-  MergedRecord.push_back(Severity);
-  MergedRecord.push_back(FileLookup[Location.FileID]);
-  MergedRecord.push_back(Location.Line);
-  MergedRecord.push_back(Location.Col);
-  MergedRecord.push_back(Location.Offset);
-  MergedRecord.push_back(CategoryLookup[Category]);
-  MergedRecord.push_back(Flag ? DiagFlagLookup[Flag] : 0);
-  MergedRecord.push_back(Message.size());
+  RecordData::value_type Record[] = {
+      RECORD_DIAG, Severity, FileLookup[Location.FileID], Location.Line,
+      Location.Col, Location.Offset, CategoryLookup[Category],
+      Flag ? DiagFlagLookup[Flag] : 0, Message.size()};
 
   Writer.State->Stream.EmitRecordWithBlob(
-      Writer.State->Abbrevs.get(RECORD_DIAG), MergedRecord, Message);
+      Writer.State->Abbrevs.get(RECORD_DIAG), Record, Message);
   return std::error_code();
 }
 
@@ -883,17 +858,10 @@ std::error_code
 SDiagsMerger::visitFixitRecord(const serialized_diags::Location &Start,
                                const serialized_diags::Location &End,
                                StringRef Text) {
-  RecordData Record;
-  Record.push_back(RECORD_FIXIT);
-  Record.push_back(FileLookup[Start.FileID]);
-  Record.push_back(Start.Line);
-  Record.push_back(Start.Col);
-  Record.push_back(Start.Offset);
-  Record.push_back(FileLookup[End.FileID]);
-  Record.push_back(End.Line);
-  Record.push_back(End.Col);
-  Record.push_back(End.Offset);
-  Record.push_back(Text.size());
+  RecordData::value_type Record[] = {RECORD_FIXIT, FileLookup[Start.FileID],
+                                     Start.Line, Start.Col, Start.Offset,
+                                     FileLookup[End.FileID], End.Line, End.Col,
+                                     End.Offset, Text.size()};
 
   Writer.State->Stream.EmitRecordWithBlob(
       Writer.State->Abbrevs.get(RECORD_FIXIT), Record, Text);
diff --git a/lib/Frontend/TestModuleFileExtension.cpp b/lib/Frontend/TestModuleFileExtension.cpp
new file mode 100644
index 000000000000..d1b20c4a80b3
--- /dev/null
+++ b/lib/Frontend/TestModuleFileExtension.cpp
@@ -0,0 +1,123 @@
+//===-- TestModuleFileExtension.cpp - Module Extension Tester -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "TestModuleFileExtension.h"
+#include "clang/Frontend/FrontendDiagnostic.h"
+#include "clang/Serialization/ASTReader.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+using namespace clang;
+using namespace clang::serialization;
+
+TestModuleFileExtension::Writer::~Writer() { }
+
+void TestModuleFileExtension::Writer::writeExtensionContents(
+       Sema &SemaRef,
+       llvm::BitstreamWriter &Stream) {
+  using namespace llvm;
+
+  // Write an abbreviation for this record.
+  BitCodeAbbrev *Abv = new llvm::BitCodeAbbrev();
+  Abv->Add(BitCodeAbbrevOp(FIRST_EXTENSION_RECORD_ID));
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of characters
+  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));   // message
+  auto Abbrev = Stream.EmitAbbrev(Abv);
+
+  // Write a message into the extension block.
+  SmallString<64> Message;
+  {
+    auto Ext = static_cast<TestModuleFileExtension *>(getExtension());
+    raw_svector_ostream OS(Message);
+    OS << "Hello from " << Ext->BlockName << " v" << Ext->MajorVersion << "."
+       << Ext->MinorVersion;
+  }
+  SmallVector<uint64_t, 4> Record;
+  Record.push_back(FIRST_EXTENSION_RECORD_ID);
+  Record.push_back(Message.size());
+  Stream.EmitRecordWithBlob(Abbrev, Record, Message);
+}
+
+TestModuleFileExtension::Reader::Reader(ModuleFileExtension *Ext,
+                                        const llvm::BitstreamCursor &InStream)
+  : ModuleFileExtensionReader(Ext), Stream(InStream)
+{
+  // Read the extension block.
+  SmallVector<uint64_t, 4> Record;
+  while (true) {
+    llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::SubBlock:
+    case llvm::BitstreamEntry::EndBlock:
+    case llvm::BitstreamEntry::Error:
+      return;
+
+    case llvm::BitstreamEntry::Record:
+      break;
+    }
+
+    Record.clear();
+    StringRef Blob;
+    unsigned RecCode = Stream.readRecord(Entry.ID, Record, &Blob);
+    switch (RecCode) {
+    case FIRST_EXTENSION_RECORD_ID: {
+      StringRef Message = Blob.substr(0, Record[0]);
+      fprintf(stderr, "Read extension block message: %s\n",
+              Message.str().c_str());
+      break;
+    }
+    }
+  }
+}
+
+TestModuleFileExtension::Reader::~Reader() { }
+
+TestModuleFileExtension::~TestModuleFileExtension() { }
+
+ModuleFileExtensionMetadata
+TestModuleFileExtension::getExtensionMetadata() const {
+  return { BlockName, MajorVersion, MinorVersion, UserInfo };
+}
+
+llvm::hash_code TestModuleFileExtension::hashExtension(
+                  llvm::hash_code Code) const {
+  if (Hashed) {
+    Code = llvm::hash_combine(Code, BlockName);
+    Code = llvm::hash_combine(Code, MajorVersion);
+    Code = llvm::hash_combine(Code, MinorVersion);
+    Code = llvm::hash_combine(Code, UserInfo);
+  }
+  
+  return Code;
+}
+
+std::unique_ptr<ModuleFileExtensionWriter>
+TestModuleFileExtension::createExtensionWriter(ASTWriter &) {
+  return std::unique_ptr<ModuleFileExtensionWriter>(new Writer(this));
+}
+
+std::unique_ptr<ModuleFileExtensionReader>
+TestModuleFileExtension::createExtensionReader(
+  const ModuleFileExtensionMetadata &Metadata,
+  ASTReader &Reader, serialization::ModuleFile &Mod,
+  const llvm::BitstreamCursor &Stream)
+{
+  assert(Metadata.BlockName == BlockName && "Wrong block name");
+  if (std::make_pair(Metadata.MajorVersion, Metadata.MinorVersion) !=
+        std::make_pair(MajorVersion, MinorVersion)) {
+    Reader.getDiags().Report(Mod.ImportLoc,
+                             diag::err_test_module_file_extension_version)
+      << BlockName << Metadata.MajorVersion << Metadata.MinorVersion
+      << MajorVersion << MinorVersion;
+    return nullptr;
+  }
+
+  return std::unique_ptr<ModuleFileExtensionReader>(
+                                                    new TestModuleFileExtension::Reader(this, Stream));
+}
diff --git a/lib/Frontend/TestModuleFileExtension.h b/lib/Frontend/TestModuleFileExtension.h
new file mode 100644
index 000000000000..41f3ca9f05fc
--- /dev/null
+++ b/lib/Frontend/TestModuleFileExtension.h
@@ -0,0 +1,72 @@
+//===-- TestModuleFileExtension.h - Module Extension Tester -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_FRONTEND_TESTMODULEFILEEXTENSION_H
+#define LLVM_CLANG_FRONTEND_TESTMODULEFILEEXTENSION_H
+
+#include "clang/Serialization/ModuleFileExtension.h"
+#include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include <string>
+
+namespace clang {
+
+/// A module file extension used for testing purposes.
+class TestModuleFileExtension : public ModuleFileExtension {
+  std::string BlockName;
+  unsigned MajorVersion;
+  unsigned MinorVersion;
+  bool Hashed;
+  std::string UserInfo;
+
+  class Writer : public ModuleFileExtensionWriter {
+  public:
+    Writer(ModuleFileExtension *Ext) : ModuleFileExtensionWriter(Ext) { }
+    ~Writer() override;
+
+    void writeExtensionContents(Sema &SemaRef,
+                                llvm::BitstreamWriter &Stream) override;
+  };
+
+  class Reader : public ModuleFileExtensionReader {
+    llvm::BitstreamCursor Stream;
+
+  public:
+    ~Reader() override;
+
+    Reader(ModuleFileExtension *Ext, const llvm::BitstreamCursor &InStream);
+  };
+
+public:
+  TestModuleFileExtension(StringRef BlockName,
+                          unsigned MajorVersion,
+                          unsigned MinorVersion,
+                          bool Hashed,
+                          StringRef UserInfo)
+    : BlockName(BlockName),
+      MajorVersion(MajorVersion), MinorVersion(MinorVersion),
+      Hashed(Hashed), UserInfo(UserInfo) { }
+  ~TestModuleFileExtension() override;
+
+  ModuleFileExtensionMetadata getExtensionMetadata() const override;
+
+  llvm::hash_code hashExtension(llvm::hash_code Code) const override;
+
+  std::unique_ptr<ModuleFileExtensionWriter>
+  createExtensionWriter(ASTWriter &Writer) override;
+
+  std::unique_ptr<ModuleFileExtensionReader>
+  createExtensionReader(const ModuleFileExtensionMetadata &Metadata,
+                        ASTReader &Reader, serialization::ModuleFile &Mod,
+                        const llvm::BitstreamCursor &Stream) override;
+};
+
+} // end namespace clang
+
+#endif // LLVM_CLANG_FRONTEND_TESTMODULEFILEEXTENSION_H
diff --git a/lib/Frontend/TextDiagnostic.cpp b/lib/Frontend/TextDiagnostic.cpp
index aaf17a983371..d4e156d44582 100644
--- a/lib/Frontend/TextDiagnostic.cpp
+++ b/lib/Frontend/TextDiagnostic.cpp
@@ -777,7 +777,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
   if (PLoc.isInvalid()) {
     // At least print the file name if available:
     FileID FID = SM.getFileID(Loc);
-    if (!FID.isInvalid()) {
+    if (FID.isValid()) {
       const FileEntry* FE = SM.getFileEntryForID(FID);
       if (FE && FE->isValid()) {
         OS << FE->getName();
@@ -875,7 +875,7 @@ void TextDiagnostic::emitDiagnosticLoc(SourceLocation Loc, PresumedLoc PLoc,
 void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
                                          PresumedLoc PLoc,
                                          const SourceManager &SM) {
-  if (DiagOpts->ShowLocation)
+  if (DiagOpts->ShowLocation && PLoc.getFilename())
     OS << "In file included from " << PLoc.getFilename() << ':'
        << PLoc.getLine() << ":\n";
   else
@@ -885,11 +885,11 @@ void TextDiagnostic::emitIncludeLocation(SourceLocation Loc,
 void TextDiagnostic::emitImportLocation(SourceLocation Loc, PresumedLoc PLoc,
                                         StringRef ModuleName,
                                         const SourceManager &SM) {
-  if (DiagOpts->ShowLocation)
+  if (DiagOpts->ShowLocation && PLoc.getFilename())
     OS << "In module '" << ModuleName << "' imported from "
        << PLoc.getFilename() << ':' << PLoc.getLine() << ":\n";
   else
-    OS << "In module " << ModuleName << "':\n";
+    OS << "In module '" << ModuleName << "':\n";
 }
 
 void TextDiagnostic::emitBuildingModuleLocation(SourceLocation Loc,
@@ -1060,7 +1060,7 @@ void TextDiagnostic::emitSnippetAndCaret(
     SmallVectorImpl<CharSourceRange>& Ranges,
     ArrayRef<FixItHint> Hints,
     const SourceManager &SM) {
-  assert(!Loc.isInvalid() && "must have a valid source location here");
+  assert(Loc.isValid() && "must have a valid source location here");
   assert(Loc.isFileID() && "must have a file location here");
 
   // If caret diagnostics are enabled and we have location, we want to
diff --git a/lib/Frontend/VerifyDiagnosticConsumer.cpp b/lib/Frontend/VerifyDiagnosticConsumer.cpp
index 55df9361b58e..7331d77d1c18 100644
--- a/lib/Frontend/VerifyDiagnosticConsumer.cpp
+++ b/lib/Frontend/VerifyDiagnosticConsumer.cpp
@@ -186,9 +186,7 @@ public:
       Regex(RegexStr) { }
 
   bool isValid(std::string &Error) override {
-    if (Regex.isValid(Error))
-      return true;
-    return false;
+    return Regex.isValid(Error);
   }
 
   bool match(StringRef S) override {
diff --git a/lib/Headers/CMakeLists.txt b/lib/Headers/CMakeLists.txt
index 7de5fbe8be0e..9393f69d41fa 100644
--- a/lib/Headers/CMakeLists.txt
+++ b/lib/Headers/CMakeLists.txt
@@ -15,6 +15,7 @@ set(files
   avxintrin.h
   bmi2intrin.h
   bmiintrin.h
+  __clang_cuda_runtime_wrapper.h
   cpuid.h
   cuda_builtin_vars.h
   emmintrin.h
@@ -66,6 +67,10 @@ set(files
   x86intrin.h
   xmmintrin.h
   xopintrin.h
+  xsaveintrin.h
+  xsaveoptintrin.h
+  xsavecintrin.h
+  xsavesintrin.h
   xtestintrin.h
   )
 
@@ -97,5 +102,14 @@ set_target_properties(clang-headers PROPERTIES FOLDER "Misc")
 
 install(
   FILES ${files} ${CMAKE_CURRENT_BINARY_DIR}/arm_neon.h
+  COMPONENT clang-headers
   PERMISSIONS OWNER_READ OWNER_WRITE GROUP_READ WORLD_READ
   DESTINATION lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}/include)
+
+if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
+  add_custom_target(install-clang-headers
+    DEPENDS
+    COMMAND "${CMAKE_COMMAND}"
+            -DCMAKE_INSTALL_COMPONENT=clang-headers
+            -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+endif()
diff --git a/lib/Headers/Intrin.h b/lib/Headers/Intrin.h
index 24b3eae8bf86..6c1d0d16eabf 100644
--- a/lib/Headers/Intrin.h
+++ b/lib/Headers/Intrin.h
@@ -49,10 +49,7 @@ extern "C" {
 #if defined(__MMX__)
 /* And the random ones that aren't in those files. */
 __m64 _m_from_float(float);
-__m64 _m_from_int(int _l);
-void _m_prefetch(void *);
 float _m_to_float(__m64);
-int _m_to_int(__m64 _M);
 #endif
 
 /* Other assorted instruction intrinsics. */
@@ -292,9 +289,6 @@ void _xend(void);
 static __inline__
 #define _XCR_XFEATURE_ENABLED_MASK 0
 unsigned __int64 __cdecl _xgetbv(unsigned int);
-void __cdecl _xrstor(void const *, unsigned __int64);
-void __cdecl _xsave(void *, unsigned __int64);
-void __cdecl _xsaveopt(void *, unsigned __int64);
 void __cdecl _xsetbv(unsigned int, unsigned __int64);
 
 /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
@@ -434,12 +428,20 @@ __umulh(unsigned __int64 _Multiplier, unsigned __int64 _Multiplicand) {
       (unsigned __int128)_Multiplier * (unsigned __int128)_Multiplicand;
   return _FullProduct >> 64;
 }
-void __cdecl _xrstor64(void const *, unsigned __int64);
-void __cdecl _xsave64(void *, unsigned __int64);
-void __cdecl _xsaveopt64(void *, unsigned __int64);
 
 #endif /* __x86_64__ */
 
+/*----------------------------------------------------------------------------*\
+|* Multiplication
+\*----------------------------------------------------------------------------*/
+static __inline__ __int64 __DEFAULT_FN_ATTRS
+__emul(int __in1, int __in2) {
+  return (__int64)__in1 * (__int64)__in2;
+}
+static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
+__emulu(unsigned int __in1, unsigned int __in2) {
+  return (unsigned __int64)__in1 * (unsigned __int64)__in2;
+}
 /*----------------------------------------------------------------------------*\
 |* Bit Twiddling
 \*----------------------------------------------------------------------------*/
@@ -770,27 +772,25 @@ _InterlockedCompareExchange64(__int64 volatile *_Destination,
 /*----------------------------------------------------------------------------*\
 |* Barriers
 \*----------------------------------------------------------------------------*/
-#if defined(__i386__) || defined(__x86_64__)
 static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadWriteBarrier(void) {
-  __asm__ volatile ("" : : : "memory");
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
 }
 static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _ReadBarrier(void) {
-  __asm__ volatile ("" : : : "memory");
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
 }
 static __inline__ void __DEFAULT_FN_ATTRS
 __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead")))
 _WriteBarrier(void) {
-  __asm__ volatile ("" : : : "memory");
+  __atomic_signal_fence(__ATOMIC_SEQ_CST);
 }
-#endif
 #ifdef __x86_64__
 static __inline__ void __DEFAULT_FN_ATTRS
 __faststorefence(void) {
-  __asm__ volatile("lock orq $0, (%%rsp)" : : : "memory");
+  __atomic_thread_fence(__ATOMIC_SEQ_CST);
 }
 #endif
 /*----------------------------------------------------------------------------*\
@@ -851,7 +851,7 @@ __movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) {
 }
 static __inline__ void __DEFAULT_FN_ATTRS
 __movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) {
-  __asm__("rep movsh" : : "D"(__dst), "S"(__src), "c"(__n)
+  __asm__("rep movsw" : : "D"(__dst), "S"(__src), "c"(__n)
                         : "%edi", "%esi", "%ecx");
 }
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -866,7 +866,7 @@ __stosd(unsigned long *__dst, unsigned long __x, size_t __n) {
 }
 static __inline__ void __DEFAULT_FN_ATTRS
 __stosw(unsigned short *__dst, unsigned short __x, size_t __n) {
-  __asm__("rep stosh" : : "D"(__dst), "a"(__x), "c"(__n)
+  __asm__("rep stosw" : : "D"(__dst), "a"(__x), "c"(__n)
                         : "%edi", "%ecx");
 }
 #endif
diff --git a/lib/Headers/__clang_cuda_runtime_wrapper.h b/lib/Headers/__clang_cuda_runtime_wrapper.h
new file mode 100644
index 000000000000..8e5f0331cb38
--- /dev/null
+++ b/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -0,0 +1,216 @@
+/*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+/*
+ * WARNING: This header is intended to be directly -include'd by
+ * the compiler and is not supposed to be included by users.
+ *
+ * CUDA headers are implemented in a way that currently makes it
+ * impossible for user code to #include directly when compiling with
+ * Clang. They present different view of CUDA-supplied functions
+ * depending on where in NVCC's compilation pipeline the headers are
+ * included. Neither of these modes provides function definitions with
+ * correct attributes, so we use preprocessor to force the headers
+ * into a form that Clang can use.
+ *
+ * Similarly to NVCC which -include's cuda_runtime.h, Clang -include's
+ * this file during every CUDA compilation.
+ */
+
+#ifndef __CLANG_CUDA_RUNTIME_WRAPPER_H__
+#define __CLANG_CUDA_RUNTIME_WRAPPER_H__
+
+#if defined(__CUDA__) && defined(__clang__)
+
+// Include some standard headers to avoid CUDA headers including them
+// while some required macros (like __THROW) are in a weird state.
+#include <stdlib.h>
+#include <cmath>
+
+// Preserve common macros that will be changed below by us or by CUDA
+// headers.
+#pragma push_macro("__THROW")
+#pragma push_macro("__CUDA_ARCH__")
+
+// WARNING: Preprocessor hacks below are based on specific details of
+// CUDA-7.x headers and are not expected to work with any other
+// version of CUDA headers.
+#include "cuda.h"
+#if !defined(CUDA_VERSION)
+#error "cuda.h did not define CUDA_VERSION"
+#elif CUDA_VERSION < 7000 || CUDA_VERSION > 7050
+#error "Unsupported CUDA version!"
+#endif
+
+// Make largest subset of device functions available during host
+// compilation -- SM_35 for the time being.
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 350
+#endif
+
+#include "cuda_builtin_vars.h"
+
+// No need for device_launch_parameters.h as cuda_builtin_vars.h above
+// has taken care of builtin variables declared in the file.
+#define __DEVICE_LAUNCH_PARAMETERS_H__
+
+// {math,device}_functions.h only have declarations of the
+// functions. We don't need them as we're going to pull in their
+// definitions from .hpp files.
+#define __DEVICE_FUNCTIONS_H__
+#define __MATH_FUNCTIONS_H__
+
+#undef __CUDACC__
+#define __CUDABE__
+// Disables definitions of device-side runtime support stubs in
+// cuda_device_runtime_api.h
+#define __CUDADEVRT_INTERNAL__
+#include "host_config.h"
+#include "host_defines.h"
+#include "driver_types.h"
+#include "common_functions.h"
+#undef __CUDADEVRT_INTERNAL__
+
+#undef __CUDABE__
+#define __CUDACC__
+#include "cuda_runtime.h"
+
+#undef __CUDACC__
+#define __CUDABE__
+
+// CUDA headers use __nvvm_memcpy and __nvvm_memset which Clang does
+// not have at the moment. Emulate them with a builtin memcpy/memset.
+#define __nvvm_memcpy(s,d,n,a) __builtin_memcpy(s,d,n)
+#define __nvvm_memset(d,c,n,a) __builtin_memset(d,c,n)
+
+#include "crt/host_runtime.h"
+#include "crt/device_runtime.h"
+// device_runtime.h defines __cxa_* macros that will conflict with
+// cxxabi.h.
+// FIXME: redefine these as __device__ functions.
+#undef __cxa_vec_ctor
+#undef __cxa_vec_cctor
+#undef __cxa_vec_dtor
+#undef __cxa_vec_new2
+#undef __cxa_vec_new3
+#undef __cxa_vec_delete2
+#undef __cxa_vec_delete
+#undef __cxa_vec_delete3
+#undef __cxa_pure_virtual
+
+// We need decls for functions in CUDA's libdevice with __device__
+// attribute only. Alas they come either as __host__ __device__ or
+// with no attributes at all. To work around that, define __CUDA_RTC__
+// which produces HD variant and undef __host__ which gives us desided
+// decls with __device__ attribute.
+#pragma push_macro("__host__")
+#define __host__
+#define __CUDACC_RTC__
+#include "device_functions_decls.h"
+#undef __CUDACC_RTC__
+
+// Temporarily poison __host__ macro to ensure it's not used by any of
+// the headers we're about to include.
+#define __host__ UNEXPECTED_HOST_ATTRIBUTE
+
+// device_functions.hpp and math_functions*.hpp use 'static
+// __forceinline__' (with no __device__) for definitions of device
+// functions. Temporarily redefine __forceinline__ to include
+// __device__.
+#pragma push_macro("__forceinline__")
+#define __forceinline__ __device__ __inline__ __attribute__((always_inline))
+#include "device_functions.hpp"
+#include "math_functions.hpp"
+#include "math_functions_dbl_ptx3.hpp"
+#pragma pop_macro("__forceinline__")
+
+// Pull in host-only functions that are only available when neither
+// __CUDACC__ nor __CUDABE__ are defined.
+#undef __MATH_FUNCTIONS_HPP__
+#undef __CUDABE__
+#include "math_functions.hpp"
+// Alas, additional overloads for these functions are hard to get to.
+// Considering that we only need these overloads for a few functions,
+// we can provide them here.
+static inline float rsqrt(float a) { return rsqrtf(a); }
+static inline float rcbrt(float a) { return rcbrtf(a); }
+static inline float sinpi(float a) { return sinpif(a); }
+static inline float cospi(float a) { return cospif(a); }
+static inline void sincospi(float a, float *b, float *c) {
+  return sincospi(a, b, c);
+}
+static inline float erfcinv(float a) { return erfcinvf(a); }
+static inline float normcdfinv(float a) { return normcdfinvf(a); }
+static inline float normcdf(float a) { return normcdff(a); }
+static inline float erfcx(float a) { return erfcxf(a); }
+
+// For some reason single-argument variant is not always declared by
+// CUDA headers. Alas, device_functions.hpp included below needs it.
+static inline __device__ void __brkpt(int c) { __brkpt(); }
+
+// Now include *.hpp with definitions of various GPU functions.  Alas,
+// a lot of thins get declared/defined with __host__ attribute which
+// we don't want and we have to define it out. We also have to include
+// {device,math}_functions.hpp again in order to extract the other
+// branch of #if/else inside.
+
+#define __host__
+#undef __CUDABE__
+#define __CUDACC__
+#undef __DEVICE_FUNCTIONS_HPP__
+#include "device_functions.hpp"
+#include "device_atomic_functions.hpp"
+#include "sm_20_atomic_functions.hpp"
+#include "sm_32_atomic_functions.hpp"
+#include "sm_20_intrinsics.hpp"
+// sm_30_intrinsics.h has declarations that use default argument, so
+// we have to include it and it will in turn include .hpp
+#include "sm_30_intrinsics.h"
+#include "sm_32_intrinsics.hpp"
+#undef __MATH_FUNCTIONS_HPP__
+#include "math_functions.hpp"
+#pragma pop_macro("__host__")
+
+#include "texture_indirect_functions.h"
+
+// Restore state of __CUDA_ARCH__ and __THROW we had on entry.
+#pragma pop_macro("__CUDA_ARCH__")
+#pragma pop_macro("__THROW")
+
+// Set up compiler macros expected to be seen during compilation.
+#undef __CUDABE__
+#define __CUDACC__
+#define __NVCC__
+
+#if defined(__CUDA_ARCH__)
+// We need to emit IR declaration for non-existing __nvvm_reflect() to
+// let backend know that it should be treated as const nothrow
+// function which is what NVVMReflect pass expects to see.
+extern "C" __device__ __attribute__((const)) int __nvvm_reflect(const void *);
+static __device__ __attribute__((used)) int __nvvm_reflect_anchor() {
+  return __nvvm_reflect("NONE");
+}
+#endif
+
+#endif // __CUDA__
+#endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__
diff --git a/lib/Headers/__wmmintrin_aes.h b/lib/Headers/__wmmintrin_aes.h
index 9f594ee56092..100799ebfdb8 100644
--- a/lib/Headers/__wmmintrin_aes.h
+++ b/lib/Headers/__wmmintrin_aes.h
@@ -25,12 +25,8 @@
 
 #include <emmintrin.h>
 
-#if !defined (__AES__)
-#  error "AES instructions not enabled"
-#else
-
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_aesenc_si128(__m128i __V, __m128i __R)
@@ -63,10 +59,8 @@ _mm_aesimc_si128(__m128i __V)
 }
 
 #define _mm_aeskeygenassist_si128(C, R) \
-  __builtin_ia32_aeskeygenassist128((C), (R))
+  (__m128i)__builtin_ia32_aeskeygenassist128((__v2di)(__m128i)(C), (int)(R))
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif
-
 #endif  /* _WMMINTRIN_AES_H */
diff --git a/lib/Headers/__wmmintrin_pclmul.h b/lib/Headers/__wmmintrin_pclmul.h
index 8d1f1b7c0868..68e944e92198 100644
--- a/lib/Headers/__wmmintrin_pclmul.h
+++ b/lib/Headers/__wmmintrin_pclmul.h
@@ -1,4 +1,4 @@
-/*===---- __wmmintrin_pclmul.h - AES intrinsics ----------------------------===
+/*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -23,12 +23,8 @@
 #ifndef _WMMINTRIN_PCLMUL_H
 #define _WMMINTRIN_PCLMUL_H
 
-#if !defined (__PCLMUL__)
-# error "PCLMUL instruction is not enabled"
-#else
 #define _mm_clmulepi64_si128(__X, __Y, __I) \
   ((__m128i)__builtin_ia32_pclmulqdq128((__v2di)(__m128i)(__X), \
                                         (__v2di)(__m128i)(__Y), (char)(__I)))
-#endif
 
 #endif /* _WMMINTRIN_PCLMUL_H */
diff --git a/lib/Headers/adxintrin.h b/lib/Headers/adxintrin.h
index b8eb9cbf6ebb..ee347284178e 100644
--- a/lib/Headers/adxintrin.h
+++ b/lib/Headers/adxintrin.h
@@ -32,8 +32,7 @@
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 
 /* Intrinsics that are available only if __ADX__ defined */
-#ifdef __ADX__
-static __inline unsigned char __DEFAULT_FN_ATTRS
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
 _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
                unsigned int *__p)
 {
@@ -41,14 +40,13 @@ _addcarryx_u32(unsigned char __cf, unsigned int __x, unsigned int __y,
 }
 
 #ifdef __x86_64__
-static __inline unsigned char __DEFAULT_FN_ATTRS
+static __inline unsigned char __attribute__((__always_inline__, __nodebug__, __target__("adx")))
 _addcarryx_u64(unsigned char __cf, unsigned long long __x,
                unsigned long long __y, unsigned long long  *__p)
 {
   return __builtin_ia32_addcarryx_u64(__cf, __x, __y, __p);
 }
 #endif
-#endif
 
 /* Intrinsics that are also available if __ADX__ undefined */
 static __inline unsigned char __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/altivec.h b/lib/Headers/altivec.h
index 5c8eb5640546..dc0dcbc7385c 100644
--- a/lib/Headers/altivec.h
+++ b/lib/Headers/altivec.h
@@ -27,7 +27,7 @@
 #error "AltiVec support not enabled"
 #endif
 
-/* constants for mapping CR6 bits to predicate result. */
+/* Constants for mapping CR6 bits to predicate result. */
 
 #define __CR6_EQ 0
 #define __CR6_EQ_REV 1
@@ -137,7 +137,7 @@ static vector double __ATTRS_o_ai vec_abs(vector double __a) {
 }
 #endif
 
-/* vec_abss */ 
+/* vec_abss */
 #define __builtin_altivec_abss_v16qi vec_abss
 #define __builtin_altivec_abss_v8hi vec_abss
 #define __builtin_altivec_abss_v4si vec_abss
@@ -278,6 +278,38 @@ vec_add(vector double __a, vector double __b) {
 }
 #endif // __VSX__
 
+/* vec_adde */
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai
+vec_adde(vector signed __int128 __a, vector signed __int128 __b,
+         vector signed __int128 __c) {
+  return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_adde(vector unsigned __int128 __a, vector unsigned __int128 __b,
+         vector unsigned __int128 __c) {
+  return __builtin_altivec_vaddeuqm(__a, __b, __c);
+}
+#endif
+
+/* vec_addec */
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai
+vec_addec(vector signed __int128 __a, vector signed __int128 __b,
+          vector signed __int128 __c) {
+  return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_addec(vector unsigned __int128 __a, vector unsigned __int128 __b,
+          vector unsigned __int128 __c) {
+  return __builtin_altivec_vaddecuq(__a, __b, __c);
+}
+#endif
+
 /* vec_vaddubm */
 
 #define __builtin_altivec_vaddubm vec_vaddubm
@@ -390,6 +422,12 @@ vec_vaddfp(vector float __a, vector float __b) {
 
 /* vec_addc */
 
+static vector signed int __ATTRS_o_ai vec_addc(vector signed int __a,
+                                               vector signed int __b) {
+  return (vector signed int)__builtin_altivec_vaddcuw((vector unsigned int)__a,
+                                                      (vector unsigned int)__b);
+}
+
 static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
                                                  vector unsigned int __b) {
   return __builtin_altivec_vaddcuw(__a, __b);
@@ -398,7 +436,9 @@ static vector unsigned int __ATTRS_o_ai vec_addc(vector unsigned int __a,
 #if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 static vector signed __int128 __ATTRS_o_ai
 vec_addc(vector signed __int128 __a, vector signed __int128 __b) {
-  return __builtin_altivec_vaddcuq(__a, __b);
+  return (vector signed __int128)__builtin_altivec_vaddcuq(
+    (vector unsigned __int128)__a,
+    (vector unsigned __int128)__b);
 }
 
 static vector unsigned __int128 __ATTRS_o_ai
@@ -1512,48 +1552,6 @@ vec_cmpeq(vector double __a, vector double __b) {
 }
 #endif
 
-/* vec_cmpge */
-
-static vector bool int __ATTRS_o_ai
-vec_cmpge(vector float __a, vector float __b) {
-#ifdef __VSX__
-  return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
-#else
-  return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
-#endif
-}
-
-#ifdef __VSX__
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector double __a, vector double __b) {
-  return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
-}
-#endif
-
-#ifdef __POWER8_VECTOR__
-/*  Forwrad declarations as the functions are used here */
-static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector unsigned long long __a, vector unsigned long long __b);
-static vector bool long long __ATTRS_o_ai
-vec_cmpgt(vector signed long long __a, vector signed long long __b);
-
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector signed long long __a, vector signed long long __b) {
-  return ~(vec_cmpgt(__b, __a));
-}
-
-static vector bool long long __ATTRS_o_ai
-vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
-  return ~(vec_cmpgt(__b, __a));
-}
-#endif
-
-/* vec_vcmpgefp */
-
-static vector bool int __attribute__((__always_inline__))
-vec_vcmpgefp(vector float __a, vector float __b) {
-  return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
-}
 
 /* vec_cmpgt */
 
@@ -1613,6 +1611,74 @@ vec_cmpgt(vector double __a, vector double __b) {
   return (vector bool long long)__builtin_vsx_xvcmpgtdp(__a, __b);
 }
 #endif
+
+/* vec_cmpge */
+
+static vector bool char __ATTRS_o_ai
+vec_cmpge (vector signed char __a, vector signed char __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool char __ATTRS_o_ai
+vec_cmpge (vector unsigned char __a, vector unsigned char __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmpge (vector signed short __a, vector signed short __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmpge (vector unsigned short __a, vector unsigned short __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge (vector signed int __a, vector signed int __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge (vector unsigned int __a, vector unsigned int __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmpge(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return (vector bool int)__builtin_vsx_xvcmpgesp(__a, __b);
+#else
+  return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
+#endif
+}
+
+#ifdef __VSX__
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector double __a, vector double __b) {
+  return (vector bool long long)__builtin_vsx_xvcmpgedp(__a, __b);
+}
+#endif
+
+#ifdef __POWER8_VECTOR__
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector signed long long __a, vector signed long long __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) {
+  return ~(vec_cmpgt(__b, __a));
+}
+#endif
+
+/* vec_vcmpgefp */
+
+static vector bool int __attribute__((__always_inline__))
+vec_vcmpgefp(vector float __a, vector float __b) {
+  return (vector bool int)__builtin_altivec_vcmpgefp(__a, __b);
+}
+
 /* vec_vcmpgtsb */
 
 static vector bool char __attribute__((__always_inline__))
@@ -1664,6 +1730,36 @@ vec_vcmpgtfp(vector float __a, vector float __b) {
 
 /* vec_cmple */
 
+static vector bool char __ATTRS_o_ai
+vec_cmple (vector signed char __a, vector signed char __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool char __ATTRS_o_ai
+vec_cmple (vector unsigned char __a, vector unsigned char __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmple (vector signed short __a, vector signed short __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool short __ATTRS_o_ai
+vec_cmple (vector unsigned short __a, vector unsigned short __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmple (vector signed int __a, vector signed int __b) {
+  return vec_cmpge(__b, __a);
+}
+
+static vector bool int __ATTRS_o_ai
+vec_cmple (vector unsigned int __a, vector unsigned int __b) {
+  return vec_cmpge(__b, __a);
+}
+
 static vector bool int __ATTRS_o_ai
 vec_cmple(vector float __a, vector float __b) {
   return vec_cmpge(__b, __a);
@@ -1837,6 +1933,20 @@ vec_vctuxs(vector float __a, int __b) {
   return __builtin_altivec_vctuxs(__a, __b);
 }
 
+/* vec_double */
+
+#ifdef __VSX__
+static vector double __ATTRS_o_ai vec_double (vector signed long long __a) {
+  vector double __ret = { __a[0], __a[1] };
+  return __ret;
+}
+
+static vector double __ATTRS_o_ai vec_double (vector unsigned long long __a) {
+  vector double __ret = { __a[0], __a[1] };
+  return __ret;
+}
+#endif
+
 /* vec_div */
 
 /* Integer vector divides (vectors are scalarized, elements divided
@@ -1942,34 +2052,16 @@ static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
                                                   (vector unsigned int)__b);
 }
 
-static vector signed char __ATTRS_o_ai vec_eqv(vector bool char __a,
-                                               vector signed char __b) {
-  return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                  (vector unsigned int)__b);
-}
-
-static vector signed char __ATTRS_o_ai vec_eqv(vector signed char __a,
-                                               vector bool char __b) {
-  return (vector signed char)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                  (vector unsigned int)__b);
-}
-
 static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
                                                  vector unsigned char __b) {
   return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                     (vector unsigned int)__b);
 }
 
-static vector unsigned char __ATTRS_o_ai vec_eqv(vector bool char __a,
-                                                 vector unsigned char __b) {
-  return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                    (vector unsigned int)__b);
-}
-
-static vector unsigned char __ATTRS_o_ai vec_eqv(vector unsigned char __a,
-                                                 vector bool char __b) {
-  return (vector unsigned char)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                    (vector unsigned int)__b);
+static vector bool char __ATTRS_o_ai vec_eqv(vector bool char __a,
+                                             vector bool char __b) {
+  return (vector bool char)__builtin_vsx_xxleqv((vector unsigned int)__a,
+                                                (vector unsigned int)__b);
 }
 
 static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
@@ -1978,34 +2070,16 @@ static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
                                                    (vector unsigned int)__b);
 }
 
-static vector signed short __ATTRS_o_ai vec_eqv(vector bool short __a,
-                                                vector signed short __b) {
-  return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                   (vector unsigned int)__b);
-}
-
-static vector signed short __ATTRS_o_ai vec_eqv(vector signed short __a,
-                                                vector bool short __b) {
-  return (vector signed short)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                   (vector unsigned int)__b);
-}
-
 static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
                                                   vector unsigned short __b) {
   return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                                      (vector unsigned int)__b);
 }
 
-static vector unsigned short __ATTRS_o_ai vec_eqv(vector bool short __a,
-                                                  vector unsigned short __b) {
-  return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                     (vector unsigned int)__b);
-}
-
-static vector unsigned short __ATTRS_o_ai vec_eqv(vector unsigned short __a,
-                                                  vector bool short __b) {
-  return (vector unsigned short)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                     (vector unsigned int)__b);
+static vector bool short __ATTRS_o_ai vec_eqv(vector bool short __a,
+                                              vector bool short __b) {
+  return (vector bool short)__builtin_vsx_xxleqv((vector unsigned int)__a,
+                                                 (vector unsigned int)__b);
 }
 
 static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
@@ -2014,34 +2088,15 @@ static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
                                                  (vector unsigned int)__b);
 }
 
-static vector signed int __ATTRS_o_ai vec_eqv(vector bool int __a,
-                                              vector signed int __b) {
-  return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                 (vector unsigned int)__b);
-}
-
-static vector signed int __ATTRS_o_ai vec_eqv(vector signed int __a,
-                                              vector bool int __b) {
-  return (vector signed int)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                                 (vector unsigned int)__b);
-}
-
 static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
                                                 vector unsigned int __b) {
-  return __builtin_vsx_xxleqv((vector unsigned int)__a,
-                              (vector unsigned int)__b);
+  return __builtin_vsx_xxleqv(__a, __b);
 }
 
-static vector unsigned int __ATTRS_o_ai vec_eqv(vector bool int __a,
-                                                vector unsigned int __b) {
-  return __builtin_vsx_xxleqv((vector unsigned int)__a,
-                              (vector unsigned int)__b);
-}
-
-static vector unsigned int __ATTRS_o_ai vec_eqv(vector unsigned int __a,
-                                                vector bool int __b) {
-  return __builtin_vsx_xxleqv((vector unsigned int)__a,
-                              (vector unsigned int)__b);
+static vector bool int __ATTRS_o_ai vec_eqv(vector bool int __a,
+                                            vector bool int __b) {
+  return (vector bool int)__builtin_vsx_xxleqv((vector unsigned int)__a,
+                                                 (vector unsigned int)__b);
 }
 
 static vector signed long long __ATTRS_o_ai
@@ -2050,33 +2105,15 @@ vec_eqv(vector signed long long __a, vector signed long long __b) {
     __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
 }
 
-static vector signed long long __ATTRS_o_ai
-vec_eqv(vector bool long long __a, vector signed long long __b) {
-  return (vector signed long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
-static vector signed long long __ATTRS_o_ai
-vec_eqv(vector signed long long __a, vector bool long long __b) {
-  return (vector signed long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
 static vector unsigned long long __ATTRS_o_ai
 vec_eqv(vector unsigned long long __a, vector unsigned long long __b) {
   return (vector unsigned long long)
     __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
 }
 
-static vector unsigned long long __ATTRS_o_ai
-vec_eqv(vector bool long long __a, vector unsigned long long __b) {
-  return (vector unsigned long long)
-    __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
-}
-
-static vector unsigned long long __ATTRS_o_ai
-vec_eqv(vector unsigned long long __a, vector bool long long __b) {
-  return (vector unsigned long long)
+static vector bool long long __ATTRS_o_ai
+vec_eqv(vector bool long long __a, vector bool long long __b) {
+  return (vector bool long long)
     __builtin_vsx_xxleqv((vector unsigned int)__a, (vector unsigned int)__b);
 }
 
@@ -2085,35 +2122,11 @@ static vector float __ATTRS_o_ai vec_eqv(vector float __a, vector float __b) {
                                             (vector unsigned int)__b);
 }
 
-static vector float __ATTRS_o_ai vec_eqv(vector bool int __a,
-                                         vector float __b) {
-  return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                            (vector unsigned int)__b);
-}
-
-static vector float __ATTRS_o_ai vec_eqv(vector float __a,
-                                         vector bool int __b) {
-  return (vector float)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                            (vector unsigned int)__b);
-}
-
 static vector double __ATTRS_o_ai vec_eqv(vector double __a,
                                           vector double __b) {
   return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
                                              (vector unsigned int)__b);
 }
-
-static vector double __ATTRS_o_ai vec_eqv(vector bool long long __a,
-                                          vector double __b) {
-  return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                             (vector unsigned int)__b);
-}
-
-static vector double __ATTRS_o_ai vec_eqv(vector double __a,
-                                          vector bool long long __b) {
-  return (vector double)__builtin_vsx_xxleqv((vector unsigned int)__a,
-                                             (vector unsigned int)__b);
-}
 #endif
 
 /* vec_expte */
@@ -2815,6 +2828,38 @@ static vector unsigned char __ATTRS_o_ai vec_lvsr(int __a, const float *__b) {
 #endif
 
 /* vec_madd */
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector signed short, vector signed short, vector signed short);
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector signed short, vector unsigned short, vector unsigned short);
+static vector signed short __ATTRS_o_ai
+vec_mladd(vector unsigned short, vector signed short, vector signed short);
+static vector unsigned short __ATTRS_o_ai
+vec_mladd(vector unsigned short, vector unsigned short, vector unsigned short);
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector signed short __a, vector signed short __b,
+         vector signed short __c) {
+  return  vec_mladd(__a, __b, __c);
+}
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector signed short __a, vector unsigned short __b,
+         vector unsigned short __c) {
+  return vec_mladd(__a, __b, __c);
+}
+
+static vector signed short __ATTRS_o_ai
+vec_madd(vector unsigned short __a, vector signed short __b,
+         vector signed short __c) {
+  return vec_mladd(__a, __b, __c);
+}
+
+static vector unsigned short __ATTRS_o_ai
+vec_madd(vector unsigned short __a, vector unsigned short __b,
+         vector unsigned short __c) {
+  return vec_mladd(__a, __b, __c);
+}
 
 static vector float __ATTRS_o_ai
 vec_madd(vector float __a, vector float __b, vector float __c) {
@@ -3256,6 +3301,16 @@ vec_mergeh(vector bool long long __a, vector unsigned long long __b) {
                                          0x10, 0x11, 0x12, 0x13,
                                          0x14, 0x15, 0x16, 0x17));
 }
+
+static vector bool long long __ATTRS_o_ai
+vec_mergeh(vector bool long long __a, vector bool long long __b) {
+  return vec_perm(__a, __b,
+                 (vector unsigned char)(0x00, 0x01, 0x02, 0x03,
+                                        0x04, 0x05, 0x06, 0x07,
+                                        0x10, 0x11, 0x12, 0x13,
+                                        0x14, 0x15, 0x16, 0x17));
+}
+
 static vector double __ATTRS_o_ai vec_mergeh(vector double __a,
                                              vector double __b) {
   return vec_perm(__a, __b,
@@ -3519,6 +3574,14 @@ vec_mergel(vector bool long long __a, vector unsigned long long __b) {
                                          0x18, 0X19, 0x1A, 0x1B,
                                          0x1C, 0x1D, 0x1E, 0x1F));
 }
+static vector bool long long __ATTRS_o_ai
+vec_mergel(vector bool long long __a, vector bool long long __b) {
+  return vec_perm(__a, __b,
+                  (vector unsigned char)(0x08, 0x09, 0x0A, 0x0B,
+                                         0x0C, 0x0D, 0x0E, 0x0F,
+                                         0x18, 0X19, 0x1A, 0x1B,
+                                         0x1C, 0x1D, 0x1E, 0x1F));
+}
 static vector double __ATTRS_o_ai
 vec_mergel(vector double __a, vector double __b) {
   return vec_perm(__a, __b,
@@ -3651,21 +3714,21 @@ static vector float __ATTRS_o_ai vec_vmrglw(vector float __a,
 static vector bool int __ATTRS_o_ai
 vec_mergee(vector bool int __a, vector bool int __b) {
   return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
                    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
 }
 
 static vector signed int __ATTRS_o_ai
 vec_mergee(vector signed int __a, vector signed int __b) {
   return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
                    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
 }
 
 static vector unsigned int __ATTRS_o_ai
 vec_mergee(vector unsigned int __a, vector unsigned int __b) {
   return vec_perm(__a, __b, (vector unsigned char)
-                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13, 
+                  (0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13,
                    0x08, 0x09, 0x0A, 0x0B, 0x18, 0x19, 0x1A, 0x1B));
 }
 
@@ -4439,6 +4502,11 @@ static vector unsigned char __ATTRS_o_ai vec_nand(vector bool char __a,
   return ~(__a & __b);
 }
 
+static vector bool char __ATTRS_o_ai vec_nand(vector bool char __a,
+                                              vector bool char __b) {
+  return ~(__a & __b);
+}
+
 static vector signed short __ATTRS_o_ai vec_nand(vector signed short __a,
                                                  vector signed short __b) {
   return ~(__a & __b);
@@ -4465,8 +4533,8 @@ static vector unsigned short __ATTRS_o_ai vec_nand(vector unsigned short __a,
 
 }
 
-static vector unsigned short __ATTRS_o_ai vec_nand(vector bool short __a,
-                                                   vector unsigned short __b) {
+static vector bool short __ATTRS_o_ai vec_nand(vector bool short __a,
+                                               vector bool short __b) {
   return ~(__a & __b);
 
 }
@@ -4501,6 +4569,11 @@ static vector unsigned int __ATTRS_o_ai vec_nand(vector bool int __a,
   return ~(__a & __b);
 }
 
+static vector bool int __ATTRS_o_ai vec_nand(vector bool int __a,
+                                             vector bool int __b) {
+  return ~(__a & __b);
+}
+
 static vector signed long long __ATTRS_o_ai
 vec_nand(vector signed long long __a, vector signed long long __b) {
   return ~(__a & __b);
@@ -4531,6 +4604,11 @@ vec_nand(vector bool long long __a, vector unsigned long long __b) {
   return ~(__a & __b);
 }
 
+static vector bool long long __ATTRS_o_ai
+vec_nand(vector bool long long __a, vector bool long long __b) {
+  return ~(__a & __b);
+}
+
 #endif
 
 /* vec_nmadd */
@@ -4909,6 +4987,11 @@ static vector unsigned char __ATTRS_o_ai vec_orc(vector bool char __a,
   return __a | ~__b;
 }
 
+static vector bool char __ATTRS_o_ai vec_orc(vector bool char __a,
+                                             vector bool char __b) {
+  return __a | ~__b;
+}
+
 static vector signed short __ATTRS_o_ai vec_orc(vector signed short __a,
                                                 vector signed short __b) {
   return __a | ~__b;
@@ -4939,6 +5022,11 @@ vec_orc(vector bool short __a, vector unsigned short __b) {
   return __a | ~__b;
 }
 
+static vector bool short __ATTRS_o_ai vec_orc(vector bool short __a,
+                                              vector bool short __b) {
+  return __a | ~__b;
+}
+
 static vector signed int __ATTRS_o_ai vec_orc(vector signed int __a,
                                               vector signed int __b) {
   return __a | ~__b;
@@ -4969,6 +5057,11 @@ static vector unsigned int __ATTRS_o_ai vec_orc(vector bool int __a,
   return __a | ~__b;
 }
 
+static vector bool int __ATTRS_o_ai vec_orc(vector bool int __a,
+                                            vector bool int __b) {
+  return __a | ~__b;
+}
+
 static vector signed long long __ATTRS_o_ai
 vec_orc(vector signed long long __a, vector signed long long __b) {
   return __a | ~__b;
@@ -4998,6 +5091,11 @@ static vector unsigned long long __ATTRS_o_ai
 vec_orc(vector bool long long __a, vector unsigned long long __b) {
   return __a | ~__b;
 }
+
+static vector bool long long __ATTRS_o_ai
+vec_orc(vector bool long long __a, vector bool long long __b) {
+  return __a | ~__b;
+}
 #endif
 
 /* vec_vor */
@@ -9191,17 +9289,27 @@ vec_sub(vector unsigned __int128 __a, vector unsigned __int128 __b) {
 }
 #endif // defined(__POWER8_VECTOR__) && defined(__powerpc64__)
 
-static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_sub(vector signed long long __a, vector signed long long __b) {
+  return __a - __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_sub(vector unsigned long long __a, vector unsigned long long __b) {
   return __a - __b;
 }
 
-#ifdef __VSX__
 static vector double __ATTRS_o_ai
 vec_sub(vector double __a, vector double __b) {
   return __a - __b;
 }
 #endif
 
+static vector float __ATTRS_o_ai vec_sub(vector float __a, vector float __b) {
+  return __a - __b;
+}
+
 /* vec_vsububm */
 
 #define __builtin_altivec_vsububm vec_vsububm
@@ -10390,7 +10498,12 @@ static unsigned char __ATTRS_o_ai vec_extract(vector unsigned char __a,
   return __a[__b];
 }
 
-static short __ATTRS_o_ai vec_extract(vector short __a, int __b) {
+static unsigned char __ATTRS_o_ai vec_extract(vector bool char __a,
+                                              int __b) {
+  return __a[__b];
+}
+
+static signed short __ATTRS_o_ai vec_extract(vector signed short __a, int __b) {
   return __a[__b];
 }
 
@@ -10399,7 +10512,12 @@ static unsigned short __ATTRS_o_ai vec_extract(vector unsigned short __a,
   return __a[__b];
 }
 
-static int __ATTRS_o_ai vec_extract(vector int __a, int __b) {
+static unsigned short __ATTRS_o_ai vec_extract(vector bool short __a,
+                                               int __b) {
+  return __a[__b];
+}
+
+static signed int __ATTRS_o_ai vec_extract(vector signed int __a, int __b) {
   return __a[__b];
 }
 
@@ -10407,6 +10525,31 @@ static unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, int __b) {
   return __a[__b];
 }
 
+static unsigned int __ATTRS_o_ai vec_extract(vector bool int __a, int __b) {
+  return __a[__b];
+}
+
+#ifdef __VSX__
+static signed long long __ATTRS_o_ai vec_extract(vector signed long long __a,
+                                                 int __b) {
+  return __a[__b];
+}
+
+static unsigned long long __ATTRS_o_ai
+vec_extract(vector unsigned long long __a, int __b) {
+  return __a[__b];
+}
+
+static unsigned long long __ATTRS_o_ai vec_extract(vector bool long long __a,
+                                                   int __b) {
+  return __a[__b];
+}
+
+static double __ATTRS_o_ai vec_extract(vector double __a, int __b) {
+  return __a[__b];
+}
+#endif
+
 static float __ATTRS_o_ai vec_extract(vector float __a, int __b) {
   return __a[__b];
 }
@@ -10427,8 +10570,16 @@ static vector unsigned char __ATTRS_o_ai vec_insert(unsigned char __a,
   return __b;
 }
 
-static vector short __ATTRS_o_ai vec_insert(short __a, vector short __b,
-                                            int __c) {
+static vector bool char __ATTRS_o_ai vec_insert(unsigned char __a,
+                                                vector bool char __b,
+                                                int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+
+static vector signed short __ATTRS_o_ai vec_insert(signed short __a,
+                                                   vector signed short __b,
+                                                   int __c) {
   __b[__c] = __a;
   return __b;
 }
@@ -10440,7 +10591,16 @@ static vector unsigned short __ATTRS_o_ai vec_insert(unsigned short __a,
   return __b;
 }
 
-static vector int __ATTRS_o_ai vec_insert(int __a, vector int __b, int __c) {
+static vector bool short __ATTRS_o_ai vec_insert(unsigned short __a,
+                                                 vector bool short __b,
+                                                 int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+
+static vector signed int __ATTRS_o_ai vec_insert(signed int __a,
+                                                 vector signed int __b,
+                                                 int __c) {
   __b[__c] = __a;
   return __b;
 }
@@ -10452,6 +10612,38 @@ static vector unsigned int __ATTRS_o_ai vec_insert(unsigned int __a,
   return __b;
 }
 
+static vector bool int __ATTRS_o_ai vec_insert(unsigned int __a,
+                                               vector bool int __b,
+                                               int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai
+vec_insert(signed long long __a, vector signed long long __b, int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_insert(unsigned long long __a, vector unsigned long long __b, int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+
+static vector bool long long __ATTRS_o_ai
+vec_insert(unsigned long long __a, vector bool long long __b, int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+static vector double __ATTRS_o_ai vec_insert(double __a, vector double __b,
+                                             int __c) {
+  __b[__c] = __a;
+  return __b;
+}
+#endif
+
 static vector float __ATTRS_o_ai vec_insert(float __a, vector float __b,
                                             int __c) {
   __b[__c] = __a;
@@ -11376,6 +11568,33 @@ static vector unsigned int __ATTRS_o_ai vec_splats(unsigned int __a) {
   return (vector unsigned int)(__a);
 }
 
+#ifdef __VSX__
+static vector signed long long __ATTRS_o_ai vec_splats(signed long long __a) {
+  return (vector signed long long)(__a);
+}
+
+static vector unsigned long long __ATTRS_o_ai
+vec_splats(unsigned long long __a) {
+  return (vector unsigned long long)(__a);
+}
+
+#if defined(__POWER8_VECTOR__) && defined(__powerpc64__)
+static vector signed __int128 __ATTRS_o_ai vec_splats(signed __int128 __a) {
+  return (vector signed __int128)(__a);
+}
+
+static vector unsigned __int128 __ATTRS_o_ai
+vec_splats(unsigned __int128 __a) {
+  return (vector unsigned __int128)(__a);
+}
+
+#endif
+
+static vector double __ATTRS_o_ai vec_splats(double __a) {
+  return (vector double)(__a);
+}
+#endif
+
 static vector float __ATTRS_o_ai vec_splats(float __a) {
   return (vector float)(__a);
 }
@@ -11546,9 +11765,19 @@ static int __ATTRS_o_ai vec_all_eq(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_eq(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpeqsp_p(__CR6_LT, __a, __b);
+#else
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_eq(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_LT, __a, __b);
+}
+#endif
+
 /* vec_all_ge */
 
 static int __ATTRS_o_ai vec_all_ge(vector signed char __a,
@@ -11698,9 +11927,19 @@ static int __ATTRS_o_ai vec_all_ge(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_ge(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __a, __b);
+#else
   return __builtin_altivec_vcmpgefp_p(__CR6_LT, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_ge(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __a, __b);
+}
+#endif
+
 /* vec_all_gt */
 
 static int __ATTRS_o_ai vec_all_gt(vector signed char __a,
@@ -11850,9 +12089,19 @@ static int __ATTRS_o_ai vec_all_gt(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_gt(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __a, __b);
+#else
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_gt(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __a, __b);
+}
+#endif
+
 /* vec_all_in */
 
 static int __attribute__((__always_inline__))
@@ -12010,9 +12259,19 @@ static int __ATTRS_o_ai vec_all_le(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_le(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgesp_p(__CR6_LT, __b, __a);
+#else
   return __builtin_altivec_vcmpgefp_p(__CR6_LT, __b, __a);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_le(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgedp_p(__CR6_LT, __b, __a);
+}
+#endif
+
 /* vec_all_lt */
 
 static int __ATTRS_o_ai vec_all_lt(vector signed char __a,
@@ -12163,15 +12422,35 @@ static int __ATTRS_o_ai vec_all_lt(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_lt(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgtsp_p(__CR6_LT, __b, __a);
+#else
   return __builtin_altivec_vcmpgtfp_p(__CR6_LT, __b, __a);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_lt(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgtdp_p(__CR6_LT, __b, __a);
+}
+#endif
+
 /* vec_all_nan */
 
-static int __attribute__((__always_inline__)) vec_all_nan(vector float __a) {
+static int __ATTRS_o_ai vec_all_nan(vector float __a) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ, __a, __a);
+#else
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __a);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_nan(vector double __a) {
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __a);
+}
+#endif
+
 /* vec_all_ne */
 
 static int __ATTRS_o_ai vec_all_ne(vector signed char __a,
@@ -12337,23 +12616,55 @@ static int __ATTRS_o_ai vec_all_ne(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_all_ne(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
+#else
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_all_ne(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ, __a, __b);
+}
+#endif
+
 /* vec_all_nge */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_nge(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgesp_p(__CR6_EQ, __a, __b);
+#else
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai
+vec_all_nge(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgedp_p(__CR6_EQ, __a, __b);
+}
+#endif
+
 /* vec_all_ngt */
 
-static int __attribute__((__always_inline__))
+static int __ATTRS_o_ai
 vec_all_ngt(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ, __a, __b);
+#else
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai
+vec_all_ngt(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ, __a, __b);
+}
+#endif
+
 /* vec_all_nle */
 
 static int __attribute__((__always_inline__))
@@ -12540,9 +12851,19 @@ static int __ATTRS_o_ai vec_any_eq(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_eq(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpeqsp_p(__CR6_EQ_REV, __a, __b);
+#else
   return __builtin_altivec_vcmpeqfp_p(__CR6_EQ_REV, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_eq(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_EQ_REV, __a, __b);
+}
+#endif
+
 /* vec_any_ge */
 
 static int __ATTRS_o_ai vec_any_ge(vector signed char __a,
@@ -12700,9 +13021,19 @@ static int __ATTRS_o_ai vec_any_ge(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_ge(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __a, __b);
+#else
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_ge(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __a, __b);
+}
+#endif
+
 /* vec_any_gt */
 
 static int __ATTRS_o_ai vec_any_gt(vector signed char __a,
@@ -12860,9 +13191,19 @@ static int __ATTRS_o_ai vec_any_gt(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_gt(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __a, __b);
+#else
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_gt(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __a, __b);
+}
+#endif
+
 /* vec_any_le */
 
 static int __ATTRS_o_ai vec_any_le(vector signed char __a,
@@ -13020,9 +13361,19 @@ static int __ATTRS_o_ai vec_any_le(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_le(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgesp_p(__CR6_EQ_REV, __b, __a);
+#else
   return __builtin_altivec_vcmpgefp_p(__CR6_EQ_REV, __b, __a);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_le(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgedp_p(__CR6_EQ_REV, __b, __a);
+}
+#endif
+
 /* vec_any_lt */
 
 static int __ATTRS_o_ai vec_any_lt(vector signed char __a,
@@ -13180,9 +13531,19 @@ static int __ATTRS_o_ai vec_any_lt(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_lt(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpgtsp_p(__CR6_EQ_REV, __b, __a);
+#else
   return __builtin_altivec_vcmpgtfp_p(__CR6_EQ_REV, __b, __a);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_lt(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpgtdp_p(__CR6_EQ_REV, __b, __a);
+}
+#endif
+
 /* vec_any_nan */
 
 static int __attribute__((__always_inline__)) vec_any_nan(vector float __a) {
@@ -13354,9 +13715,19 @@ static int __ATTRS_o_ai vec_any_ne(vector bool long long __a,
 #endif
 
 static int __ATTRS_o_ai vec_any_ne(vector float __a, vector float __b) {
+#ifdef __VSX__
+  return __builtin_vsx_xvcmpeqsp_p(__CR6_LT_REV, __a, __b);
+#else
   return __builtin_altivec_vcmpeqfp_p(__CR6_LT_REV, __a, __b);
+#endif
 }
 
+#ifdef __VSX__
+static int __ATTRS_o_ai vec_any_ne(vector double __a, vector double __b) {
+  return __builtin_vsx_xvcmpeqdp_p(__CR6_LT_REV, __a, __b);
+}
+#endif
+
 /* vec_any_nge */
 
 static int __attribute__((__always_inline__))
@@ -13411,11 +13782,14 @@ support). As a result, we need to be able to turn off support for those.
 The remaining ones (currently controlled by -mcrypto for GCC) still
 need to be provided on compliant hardware even if Vector.Crypto is not
 provided.
-FIXME: the naming convention for the builtins will be adjusted due
-to the inconsistency (__builtin_crypto_ prefix on builtins that cannot be
-removed with -mno-crypto). This is under development.
 */
 #ifdef __CRYPTO__
+#define vec_sbox_be __builtin_altivec_crypto_vsbox
+#define vec_cipher_be __builtin_altivec_crypto_vcipher
+#define vec_cipherlast_be __builtin_altivec_crypto_vcipherlast
+#define vec_ncipher_be __builtin_altivec_crypto_vncipher
+#define vec_ncipherlast_be __builtin_altivec_crypto_vncipherlast
+
 static vector unsigned long long __attribute__((__always_inline__))
 __builtin_crypto_vsbox(vector unsigned long long __a) {
   return __builtin_altivec_crypto_vsbox(__a);
@@ -13447,6 +13821,11 @@ __builtin_crypto_vncipherlast(vector unsigned long long __a,
 
 #define __builtin_crypto_vshasigmad __builtin_altivec_crypto_vshasigmad
 #define __builtin_crypto_vshasigmaw __builtin_altivec_crypto_vshasigmaw
+
+#define vec_shasigma_be(X, Y, Z) \
+  _Generic((X), vector unsigned int: __builtin_crypto_vshasigmaw, \
+                vector unsigned long long: __builtin_crypto_vshasigmad) \
+((X), (Y), (Z))
 #endif
 
 #ifdef __POWER8_VECTOR__
@@ -13494,8 +13873,9 @@ __builtin_crypto_vpmsumb(vector unsigned int __a, vector unsigned int __b) {
   return __builtin_altivec_crypto_vpmsumw(__a, __b);
 }
 
-static vector unsigned long long __ATTRS_o_ai __builtin_crypto_vpmsumb(
-    vector unsigned long long __a, vector unsigned long long __b) {
+static vector unsigned long long __ATTRS_o_ai
+__builtin_crypto_vpmsumb(vector unsigned long long __a,
+                         vector unsigned long long __b) {
   return __builtin_altivec_crypto_vpmsumd(__a, __b);
 }
 
@@ -13504,6 +13884,9 @@ static vector signed char __ATTRS_o_ai vec_vgbbd (vector signed char __a)
   return __builtin_altivec_vgbbd((vector unsigned char) __a);
 }
 
+#define vec_pmsum_be __builtin_crypto_vpmsumb
+#define vec_gb __builtin_altivec_vgbbd
+
 static vector unsigned char __ATTRS_o_ai vec_vgbbd (vector unsigned char __a)
 {
   return __builtin_altivec_vgbbd(__a);
@@ -13521,6 +13904,14 @@ vec_vbpermq (vector unsigned char __a, vector unsigned char __b)
 {
   return __builtin_altivec_vbpermq(__a, __b);
 }
+
+#ifdef __powerpc64__
+static vector unsigned long long __attribute__((__always_inline__))
+vec_bperm (vector unsigned __int128 __a, vector unsigned char __b) {
+  return __builtin_altivec_vbpermq((vector unsigned char) __a,
+                                   (vector unsigned char) __b);
+}
+#endif
 #endif
 
 #undef __ATTRS_o_ai
diff --git a/lib/Headers/ammintrin.h b/lib/Headers/ammintrin.h
index 4d0e770ff9e4..4880fd7ebad1 100644
--- a/lib/Headers/ammintrin.h
+++ b/lib/Headers/ammintrin.h
@@ -24,27 +24,23 @@
 #ifndef __AMMINTRIN_H
 #define __AMMINTRIN_H
 
-#ifndef __SSE4A__
-#error "SSE4A instruction set not enabled"
-#else
-
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
 
 /// \brief Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index idx and of the length len.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// __m128i _mm_extracti_si64(__m128i x, const int len, const int idx);
-/// \endcode 
+/// \endcode
 ///
-/// \code                                                    
+/// \code
 /// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param x
 ///    The value from which bits are extracted.
@@ -52,10 +48,10 @@
 ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
 ///    are zero, the length is interpreted as 64.
 /// \param idx
-///    Bits [5:0] specify the index of the least significant bit; the other 
-///    bits are ignored. If the sum of the index and length is greater than 
-///    64, the result is undefined. If the length and index are both zero, 
-///    bits [63:0] of parameter x are extracted. If the length is zero 
+///    Bits [5:0] specify the index of the least significant bit; the other
+///    bits are ignored. If the sum of the index and length is greater than
+///    64, the result is undefined. If the length and index are both zero,
+///    bits [63:0] of parameter x are extracted. If the length is zero
 ///    but the index is non-zero, the result is undefined.
 /// \returns A 128-bit integer vector whose lower 64 bits contain the bits
 ///    extracted from the source operand.
@@ -68,21 +64,21 @@
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// This intrinsic corresponds to the \c EXTRQ instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param __x
 ///    The value from which bits are extracted.
 /// \param __y
-///    Specifies the index of the least significant bit at [13:8] 
-///    and the length at [5:0]; all other bits are ignored. 
+///    Specifies the index of the least significant bit at [13:8]
+///    and the length at [5:0]; all other bits are ignored.
 ///    If bits [5:0] are zero, the length is interpreted as 64.
-///    If the sum of the index and length is greater than 64, the result is 
-///    undefined. If the length and index are both zero, bits [63:0] of 
-///    parameter __x are extracted. If the length is zero but the index is 
-///    non-zero, the result is undefined. 
-/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted 
+///    If the sum of the index and length is greater than 64, the result is
+///    undefined. If the length and index are both zero, bits [63:0] of
+///    parameter __x are extracted. If the length is zero but the index is
+///    non-zero, the result is undefined.
+/// \returns A 128-bit vector whose lower 64 bits contain the bits extracted
 ///    from the source operand.
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_extract_si64(__m128i __x, __m128i __y)
@@ -90,40 +86,40 @@ _mm_extract_si64(__m128i __x, __m128i __y)
   return (__m128i)__builtin_ia32_extrq((__v2di)__x, (__v16qi)__y);
 }
 
-/// \brief Inserts bits of a specified length from the source integer vector 
-///    y into the lower 64 bits of the destination integer vector x at the 
+/// \brief Inserts bits of a specified length from the source integer vector
+///    y into the lower 64 bits of the destination integer vector x at the
 ///    index idx and of the length len.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// __m128i _mm_inserti_si64(__m128i x, __m128i y, const int len,
 /// const int idx);
-/// \endcode 
+/// \endcode
 ///
-/// \code 
+/// \code
 /// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param x
-///    The destination operand where bits will be inserted. The inserted bits 
-///    are defined by the length len and by the index idx specifying the least 
+///    The destination operand where bits will be inserted. The inserted bits
+///    are defined by the length len and by the index idx specifying the least
 ///    significant bit.
 /// \param y
-///    The source operand containing the bits to be extracted. The extracted 
+///    The source operand containing the bits to be extracted. The extracted
 ///    bits are the least significant bits of operand y of length len.
 /// \param len
 ///    Bits [5:0] specify the length; the other bits are ignored. If bits [5:0]
 ///    are zero, the length is interpreted as 64.
 /// \param idx
-///    Bits [5:0] specify the index of the least significant bit; the other 
-///    bits are ignored. If the sum of the index and length is greater than 
-///    64, the result is undefined. If the length and index are both zero, 
-///    bits [63:0] of parameter y are inserted into parameter x. If the 
+///    Bits [5:0] specify the index of the least significant bit; the other
+///    bits are ignored. If the sum of the index and length is greater than
+///    64, the result is undefined. If the length and index are both zero,
+///    bits [63:0] of parameter y are inserted into parameter x. If the
 ///    length is zero but the index is non-zero, the result is undefined.
-/// \returns A 128-bit integer vector containing the original lower 64-bits 
+/// \returns A 128-bit integer vector containing the original lower 64-bits
 ///    of destination operand x with the specified bitfields replaced by the
-///    lower bits of source operand y. The upper 64 bits of the return value 
+///    lower bits of source operand y. The upper 64 bits of the return value
 ///    are undefined.
 
 #define _mm_inserti_si64(x, y, len, idx) \
@@ -131,33 +127,33 @@ _mm_extract_si64(__m128i __x, __m128i __y)
                                     (__v2di)(__m128i)(y), \
                                     (char)(len), (char)(idx)))
 
-/// \brief Inserts bits of a specified length from the source integer vector 
-///    __y into the lower 64 bits of the destination integer vector __x at 
+/// \brief Inserts bits of a specified length from the source integer vector
+///    __y into the lower 64 bits of the destination integer vector __x at
 ///    the index and of the length specified by __y.
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// This intrinsic corresponds to the \c INSERTQ instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param __x
-///    The destination operand where bits will be inserted. The inserted bits 
-///    are defined by the length and by the index of the least significant bit 
+///    The destination operand where bits will be inserted. The inserted bits
+///    are defined by the length and by the index of the least significant bit
 ///    specified by operand __y.
 /// \param __y
-///    The source operand containing the bits to be extracted. The extracted 
+///    The source operand containing the bits to be extracted. The extracted
 ///    bits are the least significant bits of operand __y with length specified
-///    by bits [69:64]. These are inserted into the destination at the index 
+///    by bits [69:64]. These are inserted into the destination at the index
 ///    specified by bits [77:72]; all other bits are ignored.
 ///    If bits [69:64] are zero, the length is interpreted as 64.
-///    If the sum of the index and length is greater than 64, the result is 
-///    undefined. If the length and index are both zero, bits [63:0] of 
+///    If the sum of the index and length is greater than 64, the result is
+///    undefined. If the length and index are both zero, bits [63:0] of
 ///    parameter __y are inserted into parameter __x. If the length
-///    is zero but the index is non-zero, the result is undefined. 
-/// \returns A 128-bit integer vector containing the original lower 64-bits 
+///    is zero but the index is non-zero, the result is undefined.
+/// \returns A 128-bit integer vector containing the original lower 64-bits
 ///    of destination operand __x with the specified bitfields replaced by the
-///    lower bits of source operand __y. The upper 64 bits of the return value 
+///    lower bits of source operand __y. The upper 64 bits of the return value
 ///    are undefined.
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -166,15 +162,15 @@ _mm_insert_si64(__m128i __x, __m128i __y)
   return (__m128i)__builtin_ia32_insertq((__v2di)__x, (__v2di)__y);
 }
 
-/// \brief Stores a 64-bit double-precision value in a 64-bit memory location. 
+/// \brief Stores a 64-bit double-precision value in a 64-bit memory location.
 ///    To minimize caching, the data is flagged as non-temporal (unlikely to be
 ///    used again soon).
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// This intrinsic corresponds to the \c MOVNTSD instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param __p
 ///    The 64-bit memory location used to store the register value.
@@ -193,9 +189,9 @@ _mm_stream_sd(double *__p, __m128d __a)
 ///
 /// \headerfile <x86intrin.h>
 ///
-/// \code 
+/// \code
 /// This intrinsic corresponds to the \c MOVNTSS instruction.
-/// \endcode 
+/// \endcode
 ///
 /// \param __p
 ///    The 32-bit memory location used to store the register value.
@@ -210,6 +206,4 @@ _mm_stream_ss(float *__p, __m128 __a)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __SSE4A__ */
-
 #endif /* __AMMINTRIN_H */
diff --git a/lib/Headers/arm_acle.h b/lib/Headers/arm_acle.h
index 73a7e76ce3c4..4be1d097dc5e 100644
--- a/lib/Headers/arm_acle.h
+++ b/lib/Headers/arm_acle.h
@@ -175,14 +175,18 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
   return __ror(__rev(t), 16);
 }
 
-static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
-  __rev16l(unsigned long t) {
-    return __rorl(__revl(t), sizeof(long) / 2);
-}
-
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
   __rev16ll(uint64_t t) {
-  return __rorll(__revll(t), 32);
+  return (((uint64_t)__rev16(t >> 32)) << 32) | __rev16(t);
+}
+
+static __inline__ unsigned long __attribute__((__always_inline__, __nodebug__))
+  __rev16l(unsigned long t) {
+#if __SIZEOF_LONG__ == 4
+    return __rev16(t);
+#else
+    return __rev16ll(t);
+#endif
 }
 
 /* REVSH */
diff --git a/lib/Headers/avx2intrin.h b/lib/Headers/avx2intrin.h
index d8b6b0aa4d23..f786572dae7d 100644
--- a/lib/Headers/avx2intrin.h
+++ b/lib/Headers/avx2intrin.h
@@ -29,7 +29,7 @@
 #define __AVX2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 #define _mm256_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw256((X), (Y), (M))
@@ -124,10 +124,9 @@ _mm256_adds_epu16(__m256i __a, __m256i __b)
   return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
 }
 
-#define _mm256_alignr_epi8(a, b, n) __extension__ ({ \
-  __m256i __a = (a); \
-  __m256i __b = (b); \
-  (__m256i)__builtin_ia32_palignr256((__v32qi)__a, (__v32qi)__b, (n)); })
+#define _mm256_alignr_epi8(a, b, n) __extension__ ({        \
+  (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
+                                     (__v32qi)(__m256i)(b), (n)); })
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_and_si256(__m256i __a, __m256i __b)
@@ -160,20 +159,19 @@ _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
                                               (__v32qi)__M);
 }
 
-#define _mm256_blend_epi16(V1, V2, M) __extension__ ({ \
-  __m256i __V1 = (V1); \
-  __m256i __V2 = (V2); \
-  (__m256i)__builtin_shufflevector((__v16hi)__V1, (__v16hi)__V2, \
-                                   (((M) & 0x01) ? 16 : 0), \
-                                   (((M) & 0x02) ? 17 : 1), \
-                                   (((M) & 0x04) ? 18 : 2), \
-                                   (((M) & 0x08) ? 19 : 3), \
-                                   (((M) & 0x10) ? 20 : 4), \
-                                   (((M) & 0x20) ? 21 : 5), \
-                                   (((M) & 0x40) ? 22 : 6), \
-                                   (((M) & 0x80) ? 23 : 7), \
-                                   (((M) & 0x01) ? 24 : 8), \
-                                   (((M) & 0x02) ? 25 : 9), \
+#define _mm256_blend_epi16(V1, V2, M) __extension__ ({       \
+  (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1),   \
+                                   (__v16hi)(__m256i)(V2),   \
+                                   (((M) & 0x01) ? 16 : 0),  \
+                                   (((M) & 0x02) ? 17 : 1),  \
+                                   (((M) & 0x04) ? 18 : 2),  \
+                                   (((M) & 0x08) ? 19 : 3),  \
+                                   (((M) & 0x10) ? 20 : 4),  \
+                                   (((M) & 0x20) ? 21 : 5),  \
+                                   (((M) & 0x40) ? 22 : 6),  \
+                                   (((M) & 0x80) ? 23 : 7),  \
+                                   (((M) & 0x01) ? 24 : 8),  \
+                                   (((M) & 0x02) ? 25 : 9),  \
                                    (((M) & 0x04) ? 26 : 10), \
                                    (((M) & 0x08) ? 27 : 11), \
                                    (((M) & 0x10) ? 28 : 12), \
@@ -208,7 +206,9 @@ _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)((__v32qi)__a > (__v32qi)__b);
+  /* This function always performs a signed comparison, but __v32qi is a char
+     which may be signed or unsigned, so use __v32qs. */
+  return (__m256i)((__v32qs)__a > (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -488,8 +488,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 }
 
 #define _mm256_shuffle_epi32(a, imm) __extension__ ({ \
-  __m256i __a = (a); \
-  (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)_mm256_set1_epi32(0), \
+  (__m256i)__builtin_shufflevector((__v8si)(__m256i)(a), \
+                                   (__v8si)_mm256_setzero_si256(), \
                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
                                    4 + (((imm) & 0x03) >> 0), \
@@ -498,8 +498,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
                                    4 + (((imm) & 0xc0) >> 6)); })
 
 #define _mm256_shufflehi_epi16(a, imm) __extension__ ({ \
-  __m256i __a = (a); \
-  (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+  (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+                                   (__v16hi)_mm256_setzero_si256(), \
                                    0, 1, 2, 3, \
                                    4 + (((imm) & 0x03) >> 0), \
                                    4 + (((imm) & 0x0c) >> 2), \
@@ -512,8 +512,8 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b)
                                    12 + (((imm) & 0xc0) >> 6)); })
 
 #define _mm256_shufflelo_epi16(a, imm) __extension__ ({ \
-  __m256i __a = (a); \
-  (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)_mm256_set1_epi16(0), \
+  (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(a), \
+                                   (__v16hi)_mm256_setzero_si256(), \
                                    (imm) & 0x3,((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
                                    4, 5, 6, 7, \
@@ -542,8 +542,7 @@ _mm256_sign_epi32(__m256i __a, __m256i __b)
 }
 
 #define _mm256_slli_si256(a, count) __extension__ ({ \
-  __m256i __a = (a); \
-  (__m256i)__builtin_ia32_pslldqi256(__a, (count)*8); })
+  (__m256i)__builtin_ia32_pslldqi256((__m256i)(a), (count)*8); })
 
 #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count))
 
@@ -608,8 +607,7 @@ _mm256_sra_epi32(__m256i __a, __m128i __count)
 }
 
 #define _mm256_srli_si256(a, count) __extension__ ({ \
-  __m256i __a = (a); \
-  (__m256i)__builtin_ia32_psrldqi256(__a, (count)*8); })
+  (__m256i)__builtin_ia32_psrldqi256((__m256i)(a), (count)*8); })
 
 #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count))
 
@@ -752,15 +750,15 @@ _mm256_xor_si256(__m256i __a, __m256i __b)
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
-_mm256_stream_load_si256(__m256i *__V)
+_mm256_stream_load_si256(__m256i const *__V)
 {
-  return (__m256i)__builtin_ia32_movntdqa256((__v4di *)__V);
+  return (__m256i)__builtin_ia32_movntdqa256((const __v4di *)__V);
 }
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_broadcastss_ps(__m128 __X)
 {
-  return (__m128)__builtin_ia32_vbroadcastss_ps((__v4sf)__X);
+  return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
@@ -772,13 +770,13 @@ _mm_broadcastsd_pd(__m128d __a)
 static __inline__ __m256 __DEFAULT_FN_ATTRS
 _mm256_broadcastss_ps(__m128 __X)
 {
-  return (__m256)__builtin_ia32_vbroadcastss_ps256((__v4sf)__X);
+  return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256d __DEFAULT_FN_ATTRS
 _mm256_broadcastsd_pd(__m128d __X)
 {
-  return (__m256d)__builtin_ia32_vbroadcastsd_pd256((__v2df)__X);
+  return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -788,18 +786,16 @@ _mm256_broadcastsi128_si256(__m128i __X)
 }
 
 #define _mm_blend_epi32(V1, V2, M) __extension__ ({ \
-  __m128i __V1 = (V1); \
-  __m128i __V2 = (V2); \
-  (__m128i)__builtin_shufflevector((__v4si)__V1, (__v4si)__V2, \
+  (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1),  \
+                                   (__v4si)(__m128i)(V2),  \
                                    (((M) & 0x01) ? 4 : 0), \
                                    (((M) & 0x02) ? 5 : 1), \
                                    (((M) & 0x04) ? 6 : 2), \
                                    (((M) & 0x08) ? 7 : 3)); })
 
 #define _mm256_blend_epi32(V1, V2, M) __extension__ ({ \
-  __m256i __V1 = (V1); \
-  __m256i __V2 = (V2); \
-  (__m256i)__builtin_shufflevector((__v8si)__V1, (__v8si)__V2, \
+  (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1),   \
+                                   (__v8si)(__m256i)(V2),   \
                                    (((M) & 0x01) ?  8 : 0), \
                                    (((M) & 0x02) ?  9 : 1), \
                                    (((M) & 0x04) ? 10 : 2), \
@@ -812,50 +808,50 @@ _mm256_broadcastsi128_si256(__m128i __X)
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastb_epi8(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastb256((__v16qi)__X);
+  return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastw_epi16(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastw256((__v8hi)__X);
+  return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastd_epi32(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastd256((__v4si)__X);
+  return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_broadcastq_epi64(__m128i __X)
 {
-  return (__m256i)__builtin_ia32_pbroadcastq256(__X);
+  return (__m256i)__builtin_shufflevector(__X, __X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastb_epi8(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastb128((__v16qi)__X);
+  return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastw_epi16(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastw128((__v8hi)__X);
+  return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastd_epi32(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastd128((__v4si)__X);
+  return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_broadcastq_epi64(__m128i __X)
 {
-  return (__m128i)__builtin_ia32_pbroadcastq128(__X);
+  return (__m128i)__builtin_shufflevector(__X, __X, 0, 0);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -865,43 +861,39 @@ _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 }
 
 #define _mm256_permute4x64_pd(V, M) __extension__ ({ \
-  __m256d __V = (V); \
-  (__m256d)__builtin_shufflevector((__v4df)__V, (__v4df) _mm256_setzero_pd(), \
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V), \
+                                   (__v4df)_mm256_setzero_pd(), \
                                    (M) & 0x3, ((M) & 0xc) >> 2, \
                                    ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
 
 static __inline__ __m256 __DEFAULT_FN_ATTRS
-_mm256_permutevar8x32_ps(__m256 __a, __m256 __b)
+_mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
 {
-  return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8sf)__b);
+  return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
 }
 
 #define _mm256_permute4x64_epi64(V, M) __extension__ ({ \
-  __m256i __V = (V); \
-  (__m256i)__builtin_shufflevector((__v4di)__V, (__v4di) _mm256_setzero_si256(), \
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V), \
+                                   (__v4di)_mm256_setzero_si256(), \
                                    (M) & 0x3, ((M) & 0xc) >> 2, \
                                    ((M) & 0x30) >> 4, ((M) & 0xc0) >> 6); })
 
 #define _mm256_permute2x128_si256(V1, V2, M) __extension__ ({ \
-  __m256i __V1 = (V1); \
-  __m256i __V2 = (V2); \
-  (__m256i)__builtin_ia32_permti256(__V1, __V2, (M)); })
+  (__m256i)__builtin_ia32_permti256((__m256i)(V1), (__m256i)(V2), (M)); })
 
 #define _mm256_extracti128_si256(V, M) __extension__ ({ \
-  (__m128i)__builtin_shufflevector( \
-    (__v4di)(V), \
-    (__v4di)(_mm256_setzero_si256()), \
-    (((M) & 1) ? 2 : 0), \
-    (((M) & 1) ? 3 : 1) );})
+  (__m128i)__builtin_shufflevector((__v4di)(__m256i)(V), \
+                                   (__v4di)_mm256_setzero_si256(), \
+                                   (((M) & 1) ? 2 : 0), \
+                                   (((M) & 1) ? 3 : 1) ); })
 
 #define _mm256_inserti128_si256(V1, V2, M) __extension__ ({ \
-  (__m256i)__builtin_shufflevector( \
-    (__v4di)(V1), \
-    (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
-    (((M) & 1) ? 0 : 4), \
-    (((M) & 1) ? 1 : 5), \
-    (((M) & 1) ? 4 : 2), \
-    (((M) & 1) ? 5 : 3) );})
+  (__m256i)__builtin_shufflevector((__v4di)(__m256i)(V1), \
+                                   (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
+                                   (((M) & 1) ? 0 : 4), \
+                                   (((M) & 1) ? 1 : 5), \
+                                   (((M) & 1) ? 4 : 2), \
+                                   (((M) & 1) ? 5 : 3) ); })
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_maskload_epi32(int const *__X, __m256i __M)
@@ -1012,244 +1004,211 @@ _mm_srlv_epi64(__m128i __X, __m128i __Y)
 }
 
 #define _mm_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
-  __m128d __a = (a); \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  __m128d __mask = (mask); \
-  (__m128d)__builtin_ia32_gatherd_pd((__v2df)__a, (const __v2df *)__m, \
-             (__v4si)__i, (__v2df)__mask, (s)); })
+  (__m128d)__builtin_ia32_gatherd_pd((__v2df)(__m128i)(a), \
+                                     (double const *)(m), \
+                                     (__v4si)(__m128i)(i), \
+                                     (__v2df)(__m128d)(mask), (s)); })
 
 #define _mm256_mask_i32gather_pd(a, m, i, mask, s) __extension__ ({ \
-  __m256d __a = (a); \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  __m256d __mask = (mask); \
-  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)__a, (const __v4df *)__m, \
-             (__v4si)__i, (__v4df)__mask, (s)); })
+  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)(__m256d)(a), \
+                                        (double const *)(m), \
+                                        (__v4si)(__m128i)(i), \
+                                        (__v4df)(__m256d)(mask), (s)); })
 
 #define _mm_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
-  __m128d __a = (a); \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  __m128d __mask = (mask); \
-  (__m128d)__builtin_ia32_gatherq_pd((__v2df)__a, (const __v2df *)__m, \
-             (__v2di)__i, (__v2df)__mask, (s)); })
+  (__m128d)__builtin_ia32_gatherq_pd((__v2df)(__m128d)(a), \
+                                     (double const *)(m), \
+                                     (__v2di)(__m128i)(i), \
+                                     (__v2df)(__m128d)(mask), (s)); })
 
 #define _mm256_mask_i64gather_pd(a, m, i, mask, s) __extension__ ({ \
-  __m256d __a = (a); \
-  double const *__m = (m); \
-  __m256i __i = (i); \
-  __m256d __mask = (mask); \
-  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)__a, (const __v4df *)__m, \
-             (__v4di)__i, (__v4df)__mask, (s)); })
+  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)(__m256d)(a), \
+                                        (double const *)(m), \
+                                        (__v4di)(__m256i)(i), \
+                                        (__v4df)(__m256d)(mask), (s)); })
 
 #define _mm_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
-  __m128 __a = (a); \
-  float const *__m = (m); \
-  __m128i __i = (i); \
-  __m128 __mask = (mask); \
-  (__m128)__builtin_ia32_gatherd_ps((__v4sf)__a, (const __v4sf *)__m, \
-            (__v4si)__i, (__v4sf)__mask, (s)); })
+  (__m128)__builtin_ia32_gatherd_ps((__v4sf)(__m128)(a), \
+                                    (float const *)(m), \
+                                    (__v4si)(__m128i)(i), \
+                                    (__v4sf)(__m128)(mask), (s)); })
 
 #define _mm256_mask_i32gather_ps(a, m, i, mask, s) __extension__ ({ \
-  __m256 __a = (a); \
-  float const *__m = (m); \
-  __m256i __i = (i); \
-  __m256 __mask = (mask); \
-  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)__a, (const __v8sf *)__m, \
-            (__v8si)__i, (__v8sf)__mask, (s)); })
+  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)(__m256)(a), \
+                                       (float const *)(m), \
+                                       (__v8si)(__m256i)(i), \
+                                       (__v8sf)(__m256)(mask), (s)); })
 
 #define _mm_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
-  __m128 __a = (a); \
-  float const *__m = (m); \
-  __m128i __i = (i); \
-  __m128 __mask = (mask); \
-  (__m128)__builtin_ia32_gatherq_ps((__v4sf)__a, (const __v4sf *)__m, \
-            (__v2di)__i, (__v4sf)__mask, (s)); })
+  (__m128)__builtin_ia32_gatherq_ps((__v4sf)(__m128)(a), \
+                                    (float const *)(m), \
+                                    (__v2di)(__m128i)(i), \
+                                    (__v4sf)(__m128)(mask), (s)); })
 
 #define _mm256_mask_i64gather_ps(a, m, i, mask, s) __extension__ ({ \
-  __m128 __a = (a); \
-  float const *__m = (m); \
-  __m256i __i = (i); \
-  __m128 __mask = (mask); \
-  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)__a, (const __v4sf *)__m, \
-            (__v4di)__i, (__v4sf)__mask, (s)); })
+  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)(__m128)(a), \
+                                       (float const *)(m), \
+                                       (__v4di)(__m256i)(i), \
+                                       (__v4sf)(__m128)(mask), (s)); })
 
 #define _mm_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
-  __m128i __a = (a); \
-  int const *__m = (m); \
-  __m128i __i = (i); \
-  __m128i __mask = (mask); \
-  (__m128i)__builtin_ia32_gatherd_d((__v4si)__a, (const __v4si *)__m, \
-            (__v4si)__i, (__v4si)__mask, (s)); })
+  (__m128i)__builtin_ia32_gatherd_d((__v4si)(__m128i)(a), \
+                                    (int const *)(m), \
+                                    (__v4si)(__m128i)(i), \
+                                    (__v4si)(__m128i)(mask), (s)); })
 
 #define _mm256_mask_i32gather_epi32(a, m, i, mask, s) __extension__ ({ \
-  __m256i __a = (a); \
-  int const *__m = (m); \
-  __m256i __i = (i); \
-  __m256i __mask = (mask); \
-  (__m256i)__builtin_ia32_gatherd_d256((__v8si)__a, (const __v8si *)__m, \
-            (__v8si)__i, (__v8si)__mask, (s)); })
+  (__m256i)__builtin_ia32_gatherd_d256((__v8si)(__m256i)(a), \
+                                       (int const *)(m), \
+                                       (__v8si)(__m256i)(i), \
+                                       (__v8si)(__m256i)(mask), (s)); })
 
 #define _mm_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
-  __m128i __a = (a); \
-  int const *__m = (m); \
-  __m128i __i = (i); \
-  __m128i __mask = (mask); \
-  (__m128i)__builtin_ia32_gatherq_d((__v4si)__a, (const __v4si *)__m, \
-            (__v2di)__i, (__v4si)__mask, (s)); })
+  (__m128i)__builtin_ia32_gatherq_d((__v4si)(__m128i)(a), \
+                                    (int const *)(m), \
+                                    (__v2di)(__m128i)(i), \
+                                    (__v4si)(__m128i)(mask), (s)); })
 
 #define _mm256_mask_i64gather_epi32(a, m, i, mask, s) __extension__ ({ \
-  __m128i __a = (a); \
-  int const *__m = (m); \
-  __m256i __i = (i); \
-  __m128i __mask = (mask); \
-  (__m128i)__builtin_ia32_gatherq_d256((__v4si)__a, (const __v4si *)__m, \
-            (__v4di)__i, (__v4si)__mask, (s)); })
+  (__m128i)__builtin_ia32_gatherq_d256((__v4si)(__m128i)(a), \
+                                       (int const *)(m), \
+                                       (__v4di)(__m256i)(i), \
+                                       (__v4si)(__m128i)(mask), (s)); })
 
 #define _mm_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
-  __m128i __a = (a); \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  __m128i __mask = (mask); \
-  (__m128i)__builtin_ia32_gatherd_q((__v2di)__a, (const __v2di *)__m, \
-             (__v4si)__i, (__v2di)__mask, (s)); })
+  (__m128i)__builtin_ia32_gatherd_q((__v2di)(__m128i)(a), \
+                                    (long long const *)(m), \
+                                    (__v4si)(__m128i)(i), \
+                                    (__v2di)(__m128i)(mask), (s)); })
 
 #define _mm256_mask_i32gather_epi64(a, m, i, mask, s) __extension__ ({ \
-  __m256i __a = (a); \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  __m256i __mask = (mask); \
-  (__m256i)__builtin_ia32_gatherd_q256((__v4di)__a, (const __v4di *)__m, \
-             (__v4si)__i, (__v4di)__mask, (s)); })
+  (__m256i)__builtin_ia32_gatherd_q256((__v4di)(__m256i)(a), \
+                                       (long long const *)(m), \
+                                       (__v4si)(__m128i)(i), \
+                                       (__v4di)(__m256i)(mask), (s)); })
 
 #define _mm_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
-  __m128i __a = (a); \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  __m128i __mask = (mask); \
-  (__m128i)__builtin_ia32_gatherq_q((__v2di)__a, (const __v2di *)__m, \
-             (__v2di)__i, (__v2di)__mask, (s)); })
+  (__m128i)__builtin_ia32_gatherq_q((__v2di)(__m128i)(a), \
+                                    (long long const *)(m), \
+                                    (__v2di)(__m128i)(i), \
+                                    (__v2di)(__m128i)(mask), (s)); })
 
 #define _mm256_mask_i64gather_epi64(a, m, i, mask, s) __extension__ ({ \
-  __m256i __a = (a); \
-  long long const *__m = (m); \
-  __m256i __i = (i); \
-  __m256i __mask = (mask); \
-  (__m256i)__builtin_ia32_gatherq_q256((__v4di)__a, (const __v4di *)__m, \
-             (__v4di)__i, (__v4di)__mask, (s)); })
+  (__m256i)__builtin_ia32_gatherq_q256((__v4di)(__m256i)(a), \
+                                       (long long const *)(m), \
+                                       (__v4di)(__m256i)(i), \
+                                       (__v4di)(__m256i)(mask), (s)); })
 
 #define _mm_i32gather_pd(m, i, s) __extension__ ({ \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_setzero_pd(), \
-             (const __v2df *)__m, (__v4si)__i, \
-             (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
+  (__m128d)__builtin_ia32_gatherd_pd((__v2df)_mm_undefined_pd(), \
+                                     (double const *)(m), \
+                                     (__v4si)(__m128i)(i), \
+                                     (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+                                                          _mm_setzero_pd()), \
+                                     (s)); })
 
 #define _mm256_i32gather_pd(m, i, s) __extension__ ({ \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_setzero_pd(), \
-             (const __v4df *)__m, (__v4si)__i, \
-             (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
+  (__m256d)__builtin_ia32_gatherd_pd256((__v4df)_mm256_undefined_pd(), \
+                                        (double const *)(m), \
+                                        (__v4si)(__m128i)(i), \
+                                        (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+                                                              _mm256_setzero_pd(), \
+                                                              _CMP_EQ_OQ), \
+                                        (s)); })
 
 #define _mm_i64gather_pd(m, i, s) __extension__ ({ \
-  double const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_setzero_pd(), \
-             (const __v2df *)__m, (__v2di)__i, \
-             (__v2df)_mm_set1_pd((double)(long long int)-1), (s)); })
+  (__m128d)__builtin_ia32_gatherq_pd((__v2df)_mm_undefined_pd(), \
+                                     (double const *)(m), \
+                                     (__v2di)(__m128i)(i), \
+                                     (__v2df)_mm_cmpeq_pd(_mm_setzero_pd(), \
+                                                          _mm_setzero_pd()), \
+                                     (s)); })
 
 #define _mm256_i64gather_pd(m, i, s) __extension__ ({ \
-  double const *__m = (m); \
-  __m256i __i = (i); \
-  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_setzero_pd(), \
-             (const __v4df *)__m, (__v4di)__i, \
-             (__v4df)_mm256_set1_pd((double)(long long int)-1), (s)); })
+  (__m256d)__builtin_ia32_gatherq_pd256((__v4df)_mm256_undefined_pd(), \
+                                        (double const *)(m), \
+                                        (__v4di)(__m256i)(i), \
+                                        (__v4df)_mm256_cmp_pd(_mm256_setzero_pd(), \
+                                                              _mm256_setzero_pd(), \
+                                                              _CMP_EQ_OQ), \
+                                        (s)); })
 
 #define _mm_i32gather_ps(m, i, s) __extension__ ({ \
-  float const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_setzero_ps(), \
-             (const __v4sf *)__m, (__v4si)__i, \
-             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+  (__m128)__builtin_ia32_gatherd_ps((__v4sf)_mm_undefined_ps(), \
+                                    (float const *)(m), \
+                                    (__v4si)(__m128i)(i), \
+                                    (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+                                                         _mm_setzero_ps()), \
+                                    (s)); })
 
 #define _mm256_i32gather_ps(m, i, s) __extension__ ({ \
-  float const *__m = (m); \
-  __m256i __i = (i); \
-  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_setzero_ps(), \
-             (const __v8sf *)__m, (__v8si)__i, \
-             (__v8sf)_mm256_set1_ps((float)(int)-1), (s)); })
+  (__m256)__builtin_ia32_gatherd_ps256((__v8sf)_mm256_undefined_ps(), \
+                                       (float const *)(m), \
+                                       (__v8si)(__m256i)(i), \
+                                       (__v8sf)_mm256_cmp_ps(_mm256_setzero_ps(), \
+                                                             _mm256_setzero_ps(), \
+                                                             _CMP_EQ_OQ), \
+                                       (s)); })
 
 #define _mm_i64gather_ps(m, i, s) __extension__ ({ \
-  float const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_setzero_ps(), \
-             (const __v4sf *)__m, (__v2di)__i, \
-             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+  (__m128)__builtin_ia32_gatherq_ps((__v4sf)_mm_undefined_ps(), \
+                                    (float const *)(m), \
+                                    (__v2di)(__m128i)(i), \
+                                    (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+                                                         _mm_setzero_ps()), \
+                                    (s)); })
 
 #define _mm256_i64gather_ps(m, i, s) __extension__ ({ \
-  float const *__m = (m); \
-  __m256i __i = (i); \
-  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_setzero_ps(), \
-             (const __v4sf *)__m, (__v4di)__i, \
-             (__v4sf)_mm_set1_ps((float)(int)-1), (s)); })
+  (__m128)__builtin_ia32_gatherq_ps256((__v4sf)_mm_undefined_ps(), \
+                                       (float const *)(m), \
+                                       (__v4di)(__m256i)(i), \
+                                       (__v4sf)_mm_cmpeq_ps(_mm_setzero_ps(), \
+                                                            _mm_setzero_ps()), \
+                                       (s)); })
 
 #define _mm_i32gather_epi32(m, i, s) __extension__ ({ \
-  int const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_setzero_si128(), \
-            (const __v4si *)__m, (__v4si)__i, \
-            (__v4si)_mm_set1_epi32(-1), (s)); })
+  (__m128i)__builtin_ia32_gatherd_d((__v4si)_mm_undefined_si128(), \
+                                    (int const *)(m), (__v4si)(__m128i)(i), \
+                                    (__v4si)_mm_set1_epi32(-1), (s)); })
 
 #define _mm256_i32gather_epi32(m, i, s) __extension__ ({ \
-  int const *__m = (m); \
-  __m256i __i = (i); \
-  (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_setzero_si256(), \
-            (const __v8si *)__m, (__v8si)__i, \
-            (__v8si)_mm256_set1_epi32(-1), (s)); })
+  (__m256i)__builtin_ia32_gatherd_d256((__v8si)_mm256_undefined_si256(), \
+                                       (int const *)(m), (__v8si)(__m256i)(i), \
+                                       (__v8si)_mm256_set1_epi32(-1), (s)); })
 
 #define _mm_i64gather_epi32(m, i, s) __extension__ ({ \
-  int const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_setzero_si128(), \
-            (const __v4si *)__m, (__v2di)__i, \
-            (__v4si)_mm_set1_epi32(-1), (s)); })
+  (__m128i)__builtin_ia32_gatherq_d((__v4si)_mm_undefined_si128(), \
+                                    (int const *)(m), (__v2di)(__m128i)(i), \
+                                    (__v4si)_mm_set1_epi32(-1), (s)); })
 
 #define _mm256_i64gather_epi32(m, i, s) __extension__ ({ \
-  int const *__m = (m); \
-  __m256i __i = (i); \
-  (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_setzero_si128(), \
-            (const __v4si *)__m, (__v4di)__i, \
-            (__v4si)_mm_set1_epi32(-1), (s)); })
+  (__m128i)__builtin_ia32_gatherq_d256((__v4si)_mm_undefined_si128(), \
+                                       (int const *)(m), (__v4di)(__m256i)(i), \
+                                       (__v4si)_mm_set1_epi32(-1), (s)); })
 
 #define _mm_i32gather_epi64(m, i, s) __extension__ ({ \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_setzero_si128(), \
-             (const __v2di *)__m, (__v4si)__i, \
-             (__v2di)_mm_set1_epi64x(-1), (s)); })
+  (__m128i)__builtin_ia32_gatherd_q((__v2di)_mm_undefined_si128(), \
+                                    (long long const *)(m), \
+                                    (__v4si)(__m128i)(i), \
+                                    (__v2di)_mm_set1_epi64x(-1), (s)); })
 
 #define _mm256_i32gather_epi64(m, i, s) __extension__ ({ \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_setzero_si256(), \
-             (const __v4di *)__m, (__v4si)__i, \
-             (__v4di)_mm256_set1_epi64x(-1), (s)); })
+  (__m256i)__builtin_ia32_gatherd_q256((__v4di)_mm256_undefined_si256(), \
+                                       (long long const *)(m), \
+                                       (__v4si)(__m128i)(i), \
+                                       (__v4di)_mm256_set1_epi64x(-1), (s)); })
 
 #define _mm_i64gather_epi64(m, i, s) __extension__ ({ \
-  long long const *__m = (m); \
-  __m128i __i = (i); \
-  (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_setzero_si128(), \
-             (const __v2di *)__m, (__v2di)__i, \
-             (__v2di)_mm_set1_epi64x(-1), (s)); })
+  (__m128i)__builtin_ia32_gatherq_q((__v2di)_mm_undefined_si128(), \
+                                    (long long const *)(m), \
+                                    (__v2di)(__m128i)(i), \
+                                    (__v2di)_mm_set1_epi64x(-1), (s)); })
 
 #define _mm256_i64gather_epi64(m, i, s) __extension__ ({ \
-  long long const *__m = (m); \
-  __m256i __i = (i); \
-  (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_setzero_si256(), \
-             (const __v4di *)__m, (__v4di)__i, \
-             (__v4di)_mm256_set1_epi64x(-1), (s)); })
+  (__m256i)__builtin_ia32_gatherq_q256((__v4di)_mm256_undefined_si256(), \
+                                       (long long const *)(m), \
+                                       (__v4di)(__m256i)(i), \
+                                       (__v4di)_mm256_set1_epi64x(-1), (s)); })
 
 #undef __DEFAULT_FN_ATTRS
 
diff --git a/lib/Headers/avx512bwintrin.h b/lib/Headers/avx512bwintrin.h
index 9e8297a9c9a5..f289ed71a332 100644
--- a/lib/Headers/avx512bwintrin.h
+++ b/lib/Headers/avx512bwintrin.h
@@ -34,10 +34,10 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
 
 static  __inline __v64qi __DEFAULT_FN_ATTRS
-_mm512_setzero_qi (void) {
+_mm512_setzero_qi(void) {
   return (__v64qi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -49,7 +49,7 @@ _mm512_setzero_qi (void) {
 }
 
 static  __inline __v32hi __DEFAULT_FN_ATTRS
-_mm512_setzero_hi (void) {
+_mm512_setzero_hi(void) {
   return (__v32hi){ 0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
                        0, 0, 0, 0, 0, 0, 0, 0,
@@ -363,8 +363,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddb512_mask ((__v64qi) __A,
              (__v64qi) __B,
-             (__v64qi)
-             _mm512_setzero_qi (),
+             (__v64qi) _mm512_setzero_qi(),
              (__mmask64) __U);
 }
 
@@ -385,8 +384,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubb512_mask ((__v64qi) __A,
              (__v64qi) __B,
-             (__v64qi)
-             _mm512_setzero_qi (),
+             (__v64qi) _mm512_setzero_qi(),
              (__mmask64) __U);
 }
 
@@ -407,8 +405,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_add_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_paddw512_mask ((__v32hi) __A,
              (__v32hi) __B,
-             (__v32hi)
-             _mm512_setzero_hi (),
+             (__v32hi) _mm512_setzero_hi(),
              (__mmask32) __U);
 }
 
@@ -429,8 +426,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_sub_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_psubw512_mask ((__v32hi) __A,
              (__v32hi) __B,
-             (__v32hi)
-             _mm512_setzero_hi (),
+             (__v32hi) _mm512_setzero_hi(),
              (__mmask32) __U);
 }
 
@@ -451,8 +447,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_mullo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
   return (__m512i) __builtin_ia32_pmullw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi)
-              _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -476,7 +471,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi8 (__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -492,7 +487,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsb512_mask ((__v64qi) __A,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -500,7 +495,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_abs_epi16 (__m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -516,7 +511,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_pabsw512_mask ((__v32hi) __A,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -525,7 +520,7 @@ _mm512_packs_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_packssdw512_mask ((__v16si) __A,
               (__v16si) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -553,7 +548,7 @@ _mm512_packs_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_packsswb512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -581,7 +576,7 @@ _mm512_packus_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_packusdw512_mask ((__v16si) __A,
               (__v16si) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -609,7 +604,7 @@ _mm512_packus_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_packuswb512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -637,7 +632,7 @@ _mm512_adds_epi8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -656,7 +651,7 @@ _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -665,7 +660,7 @@ _mm512_adds_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -684,7 +679,7 @@ _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -693,7 +688,7 @@ _mm512_adds_epu8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -712,7 +707,7 @@ _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -721,7 +716,7 @@ _mm512_adds_epu16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -740,7 +735,7 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -749,7 +744,7 @@ _mm512_avg_epu8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -777,7 +772,7 @@ _mm512_avg_epu16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -805,7 +800,7 @@ _mm512_max_epi8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -833,7 +828,7 @@ _mm512_max_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -861,7 +856,7 @@ _mm512_max_epu8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxub512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -889,7 +884,7 @@ _mm512_max_epu16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pmaxuw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -917,7 +912,7 @@ _mm512_min_epi8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -945,7 +940,7 @@ _mm512_min_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -973,7 +968,7 @@ _mm512_min_epu8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminub512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -1001,7 +996,7 @@ _mm512_min_epu16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pminuw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -1029,7 +1024,7 @@ _mm512_shuffle_epi8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -1048,7 +1043,7 @@ _mm512_maskz_shuffle_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_pshufb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -1057,7 +1052,7 @@ _mm512_subs_epi8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -1076,7 +1071,7 @@ _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -1085,7 +1080,7 @@ _mm512_subs_epi16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -1104,7 +1099,7 @@ _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -1113,7 +1108,7 @@ _mm512_subs_epu8 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) -1);
 }
 
@@ -1132,7 +1127,7 @@ _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A,
               (__v64qi) __B,
-              (__v64qi) _mm512_setzero_qi (),
+              (__v64qi) _mm512_setzero_qi(),
               (__mmask64) __U);
 }
 
@@ -1141,7 +1136,7 @@ _mm512_subs_epu16 (__m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) -1);
 }
 
@@ -1160,7 +1155,7 @@ _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 {
   return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A,
               (__v32hi) __B,
-              (__v32hi) _mm512_setzero_hi (),
+              (__v32hi) _mm512_setzero_hi(),
               (__mmask32) __U);
 }
 
@@ -1204,6 +1199,303 @@ _mm512_maskz_permutex2var_epi16 (__mmask32 __U, __m512i __A,
               (__mmask32) __U);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhrs_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+                (__v32hi) __B,
+                (__v32hi) _mm512_setzero_hi(),
+                (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhrs_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+        __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+                (__v32hi) __B,
+                (__v32hi) __W,
+                (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhrs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhrsw512_mask ((__v32hi) __A,
+                (__v32hi) __B,
+                (__v32hi) _mm512_setzero_hi(),
+                (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhi_epi16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+       __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) __W,
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhw512_mask ((__v32hi) __A,
+              (__v32hi) __B,
+              (__v32hi) _mm512_setzero_hi(),
+              (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mulhi_epu16 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v32hi) _mm512_setzero_hi(),
+               (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_mulhi_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
+       __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v32hi) __W,
+               (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_pmulhuw512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v32hi) _mm512_setzero_hi(),
+               (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maddubs_epi16 (__m512i __X, __m512i __Y) {
+  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+                 (__v64qi) __Y,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_maddubs_epi16 (__m512i __W, __mmask32 __U, __m512i __X,
+         __m512i __Y) {
+  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+                 (__v64qi) __Y,
+                 (__v32hi) __W,
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_maddubs_epi16 (__mmask32 __U, __m512i __X, __m512i __Y) {
+  return (__m512i) __builtin_ia32_pmaddubsw512_mask ((__v64qi) __X,
+                 (__v64qi) __Y,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_madd_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v16si) _mm512_setzero_si512(),
+               (__mmask16) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_madd_epi16 (__m512i __W, __mmask16 __U, __m512i __A,
+      __m512i __B) {
+  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v16si) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_madd_epi16 (__mmask16 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_pmaddwd512_mask ((__v32hi) __A,
+               (__v32hi) __B,
+               (__v16si) _mm512_setzero_si512(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtsepi16_epi8 (__m512i __A) {
+  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+               (__v32qi)_mm256_setzero_si256(),
+               (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+               (__v32qi)__O,
+               __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
+               (__v32qi) _mm256_setzero_si256(),
+               __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtusepi16_epi8 (__m512i __A) {
+  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+                (__v32qi) _mm256_setzero_si256(),
+                (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+                (__v32qi) __O,
+                __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
+                (__v32qi) _mm256_setzero_si256(),
+                __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_cvtepi16_epi8 (__m512i __A) {
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) _mm256_setzero_si256(),
+              (__mmask32) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) __O,
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) _mm256_setzero_si256(),
+              __M);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi8 (__m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) _mm512_setzero_qi(),
+                 (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+         __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) __W,
+                 (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) _mm512_setzero_qi(),
+                 (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpackhi_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpackhi_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+          __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) __W,
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpackhi_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpckhwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi8 (__m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) _mm512_setzero_qi(),
+                 (__mmask64) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi8 (__m512i __W, __mmask64 __U, __m512i __A,
+         __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) __W,
+                 (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi8 (__mmask64 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklbw512_mask ((__v64qi) __A,
+                 (__v64qi) __B,
+                 (__v64qi) _mm512_setzero_qi(),
+                 (__mmask64) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_unpacklo_epi16 (__m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) -1);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_unpacklo_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
+          __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) __W,
+                 (__mmask32) __U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_unpacklo_epi16 (__mmask32 __U, __m512i __A, __m512i __B) {
+  return (__m512i) __builtin_ia32_punpcklwd512_mask ((__v32hi) __A,
+                 (__v32hi) __B,
+                 (__v32hi) _mm512_setzero_hi(),
+                 (__mmask32) __U);
+}
+
 #define _mm512_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
                                          (__v64qi)(__m512i)(b), \
diff --git a/lib/Headers/avx512dqintrin.h b/lib/Headers/avx512dqintrin.h
index c946de286742..afee4903ba77 100644
--- a/lib/Headers/avx512dqintrin.h
+++ b/lib/Headers/avx512dqintrin.h
@@ -29,7 +29,7 @@
 #define __AVX512DQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
@@ -237,6 +237,542 @@ _mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
              (__mmask16) __U);
 }
 
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epi64 (__m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+                (__v8di) _mm512_setzero_si512(),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+                (__v8di) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
+                (__v8di) _mm512_setzero_si512(),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundpd_epi64(__A, __R) __extension__ ({              \
+  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,               \
+                (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,                 \
+                (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundpd_epi64(__U, __A, __R) __extension__ ({   \
+  (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,        \
+                (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R); })
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtpd_epu64 (__m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+                 (__v8di) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundpd_epu64(__A, __R) __extension__ ({               \
+  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,               \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,                \
+                 (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundpd_epu64(__U, __A, __R) __extension__ ({     \
+  (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,                \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epi64 (__m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+                (__v8di) _mm512_setzero_si512(),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+                (__v8di) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
+                (__v8di) _mm512_setzero_si512(),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundps_epi64(__A, __R) __extension__ ({             \
+  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,              \
+                (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,                 \
+                (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundps_epi64(__U, __A, __R) __extension__ ({   \
+  (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,               \
+                (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvtps_epu64 (__m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+                 (__v8di) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundps_epu64(__A, __R) __extension__ ({              \
+  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,              \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,                \
+                 (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundps_epu64(__U, __A, __R) __extension__ ({   \
+  (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,              \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_pd (__m512i __A) {
+  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+                (__v8df) _mm512_setzero_pd(),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+                (__v8df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
+  return (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,
+                (__v8df) _mm512_setzero_pd(),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_pd(__A, __R) __extension__ ({          \
+  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,           \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,                 \
+                (__v8df) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepi64_pd(__U, __A, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_cvtqq2pd512_mask ((__v8di) __A,             \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_cvtepi64_ps (__m512i __A) {
+  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+               (__v8sf) _mm256_setzero_ps(),
+               (__mmask8) -1,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
+  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+               (__v8sf) __W,
+               (__mmask8) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
+  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
+               (__v8sf) _mm256_setzero_ps(),
+               (__mmask8) __U,
+               _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepi64_ps(__A, __R) __extension__ ({        \
+  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,          \
+               (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, __R) __extension__ ({ \
+  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,                  \
+               (__v8sf) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepi64_ps(__U, __A, __R) __extension__ ({ \
+  (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,              \
+               (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttpd_epi64 (__m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+                 (__v8di) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundpd_epi64(__A, __R) __extension__ ({             \
+  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,              \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,                 \
+                 (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundpd_epi64(__U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,             \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttpd_epu64 (__m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+                  (__v8di) _mm512_setzero_si512(),
+                  (__mmask8) -1,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+                  (__v8di) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
+  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
+                  (__v8di) _mm512_setzero_si512(),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundpd_epu64(__A, __R) __extension__ ({              \
+  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,              \
+                  (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,                \
+                  (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundpd_epu64(__U, __A, __R) __extension__ ({   \
+  (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,              \
+                  (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttps_epi64 (__m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+                 (__v8di) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
+                 (__v8di) _mm512_setzero_si512(),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundps_epi64(__A, __R) __extension__ ({            \
+  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,             \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,                 \
+                 (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundps_epi64(__U, __A, __R) __extension__ ({  \
+  (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,              \
+                 (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_cvttps_epu64 (__m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+                  (__v8di) _mm512_setzero_si512(),
+                  (__mmask8) -1,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+                  (__v8di) __W,
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
+  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
+                  (__v8di) _mm512_setzero_si512(),
+                  (__mmask8) __U,
+                  _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtt_roundps_epu64(__A, __R) __extension__ ({            \
+  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,            \
+                  (__v8di) _mm512_setzero_si512(),(__mmask8) -1, __R);})
+
+#define _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, __R) __extension__ ({ \
+  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,                \
+                  (__v8di) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvtt_roundps_epu64(__U, __A, __R) __extension__ ({  \
+  (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,             \
+                  (__v8di) _mm512_setzero_si512(), (__mmask8) __U, __R);})
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_cvtepu64_pd (__m512i __A) {
+  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+                 (__v8df) _mm512_setzero_pd(),
+                 (__mmask8) -1,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
+  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+                 (__v8df) __W,
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
+  return (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,
+                 (__v8df) _mm512_setzero_pd(),
+                 (__mmask8) __U,
+                 _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_pd(__A, __R) __extension__ ({          \
+  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,          \
+                 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,                \
+                 (__v8df) __W, (__mmask8) __U, __R);})
+
+
+#define _mm512_maskz_cvt_roundepu64_pd(__U, __A, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_cvtuqq2pd512_mask ((__v8di) __A,            \
+                 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_cvtepu64_ps (__m512i __A) {
+  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+                (__v8sf) _mm256_setzero_ps(),
+                (__mmask8) -1,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
+  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+                (__v8sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
+  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
+                (__v8sf) _mm256_setzero_ps(),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvt_roundepu64_ps(__A, __R) __extension__ ({         \
+  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,          \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, __R) __extension__ ({ \
+  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,                 \
+                (__v8sf) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_cvt_roundepu64_ps(__U, __A, __R) __extension__ ({ \
+  (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,             \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U, __R);})
+
+#define _mm512_range_pd(__A, __B, __C) __extension__ ({                     \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
+               (__v8df) _mm512_setzero_pd(), (__mmask8) -1,                 \
+               _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({      \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,\
+               (__v8df) __W, (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_range_pd(__U, __A, __B, __C) __extension__ ({           \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+               (__v8df) _mm512_setzero_pd(), (__mmask8) __U,                 \
+               _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_range_round_pd(__A, __B, __C, __R) __extension__ ({           \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C, \
+               (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_range_round_pd(__W, __U, __A, __B, __C, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,      \
+               (__v8df) __W, (__mmask8) __U, __R);})
+
+#define _mm512_maskz_range_round_pd(__U, __A, __B, __C, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_rangepd512_mask ((__v8df) __A, (__v8df) __B, __C,   \
+               (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_range_ps(__A, __B, __C) __extension__ ({                       \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B, __C, \
+               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1,                 \
+               _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({         \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,       \
+               __C, (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_range_ps(__U, __A, __B, __C) __extension__ ({      \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A,(__v16sf) __B, \
+              __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U,      \
+              _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_range_round_ps(__A, __B, __C, __R) __extension__ ({         \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,   \
+                __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
+
+#define _mm512_mask_range_round_ps(__W, __U, __A, __B, __C, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,          \
+                __C, (__v16sf) __W, (__mmask16) __U, __R);})
+
+#define _mm512_maskz_range_round_ps(__U, __A, __B, __C, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_rangeps512_mask ((__v16sf) __A, (__v16sf) __B,      \
+                __C, (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+
+#define _mm512_reduce_pd(__A, __B) __extension__ ({             \
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,    \
+                (__v8df) __W,(__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_reduce_pd(__U, __A, __B) __extension__ ({  \
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_reduce_ps(__A, __B) __extension__ ({              \
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,  \
+               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({   \
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
+               (__v16sf) __W, (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_maskz_reduce_ps(__U, __A, __B) __extension__ ({       \
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
+               (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);})
+
+#define _mm512_reduce_round_pd(__A, __B, __R) __extension__ ({\
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R);})
+
+#define _mm512_mask_reduce_round_pd(__W, __U, __A, __B, __R) __extension__ ({\
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B,    \
+                (__v8df) __W,(__mmask8) __U, __R);})
+
+#define _mm512_maskz_reduce_round_pd(__U, __A, __B, __R) __extension__ ({\
+  (__m512d) __builtin_ia32_reducepd512_mask ((__v8df) __A, __B, \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_reduce_round_ps(__A, __B, __R) __extension__ ({\
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,  \
+               (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R);})
+
+#define _mm512_mask_reduce_round_ps(__W, __U, __A, __B, __R) __extension__ ({\
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
+               (__v16sf) __W, (__mmask16) __U, __R);})
+
+#define _mm512_maskz_reduce_round_ps(__U, __A, __B, __R) __extension__ ({\
+  (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B,      \
+               (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);})
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512erintrin.h b/lib/Headers/avx512erintrin.h
index 56edffc11ca8..40a912189e5d 100644
--- a/lib/Headers/avx512erintrin.h
+++ b/lib/Headers/avx512erintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512fintrin.h - AVX2 intrinsics -----------------------------------===
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -126,19 +126,19 @@
   _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
                                         (__v4sf)(__m128)(B), \
                                         (__v4sf)_mm_setzero_ps(), \
                                         (__mmask8)-1, (R)); })
 
 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
                                         (__v4sf)(__m128)(B), \
                                         (__v4sf)(__m128)(S), \
                                         (__mmask8)(M), (R)); })
 
 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rsqrt28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \
                                         (__v4sf)(__m128)(B), \
                                         (__v4sf)_mm_setzero_ps(), \
                                         (__mmask8)(M), (R)); })
@@ -153,19 +153,19 @@
   _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
                                          (__v2df)(__m128d)(B), \
                                          (__v2df)_mm_setzero_pd(), \
                                          (__mmask8)-1, (R)); })
 
 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
                                          (__v2df)(__m128d)(B), \
                                          (__v2df)(__m128d)(S), \
                                          (__mmask8)(M), (R)); })
 
 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rsqrt28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \
                                          (__v2df)(__m128d)(B), \
                                          (__v2df)_mm_setzero_pd(), \
                                          (__mmask8)(M), (R)); })
@@ -229,19 +229,19 @@
   _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
                                       (__v4sf)(__m128)(B), \
                                       (__v4sf)_mm_setzero_ps(), \
                                       (__mmask8)-1, (R)); })
 
 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
                                       (__v4sf)(__m128)(B), \
                                       (__v4sf)(__m128)(S), \
                                       (__mmask8)(M), (R)); })
 
 #define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \
-  (__m128)__builtin_ia32_rcp28ss_mask((__v4sf)(__m128)(A), \
+  (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \
                                       (__v4sf)(__m128)(B), \
                                       (__v4sf)_mm_setzero_ps(), \
                                       (__mmask8)(M), (R)); })
@@ -256,19 +256,19 @@
   _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
                                        (__v2df)(__m128d)(B), \
                                        (__v2df)_mm_setzero_pd(), \
                                        (__mmask8)-1, (R)); })
 
 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
                                        (__v2df)(__m128d)(B), \
                                        (__v2df)(__m128d)(S), \
                                        (__mmask8)(M), (R)); })
 
 #define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \
-  (__m128d)__builtin_ia32_rcp28sd_mask((__v2df)(__m128d)(A), \
+  (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \
                                        (__v2df)(__m128d)(B), \
                                        (__v2df)_mm_setzero_pd(), \
                                        (__mmask8)(M), (R)); })
diff --git a/lib/Headers/avx512fintrin.h b/lib/Headers/avx512fintrin.h
index 4f7cba0b1507..8dcdc710d5c3 100644
--- a/lib/Headers/avx512fintrin.h
+++ b/lib/Headers/avx512fintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512fintrin.h - AVX2 intrinsics --------------------------------===
+/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -47,7 +47,7 @@ typedef unsigned short __mmask16;
 #define _MM_FROUND_CUR_DIRECTION    0x04
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
 
 /* Create vectors with repeated elements */
 
@@ -57,6 +57,30 @@ _mm512_setzero_si512(void)
   return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
 }
 
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_undefined_pd()
+{
+  return (__m512d)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined()
+{
+  return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_undefined_ps()
+{
+  return (__m512)__builtin_ia32_undef512();
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS
+_mm512_undefined_epi32()
+{
+  return (__m512i)__builtin_ia32_undef512();
+}
+
 static __inline __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
 {
@@ -543,6 +567,66 @@ _mm512_max_ps(__m512 __A, __m512 __B)
             _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
 static __inline __m512i
 __DEFAULT_FN_ATTRS
 _mm512_max_epi32(__m512i __A, __m512i __B)
@@ -606,6 +690,66 @@ _mm512_min_ps(__m512 __A, __m512 __B)
             _MM_FROUND_CUR_DIRECTION);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_minss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
 static __inline __m512i
 __DEFAULT_FN_ATTRS
 _mm512_min_epi32(__m512i __A, __m512i __B)
@@ -728,18 +872,18 @@ _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 }
 
 static  __inline__ __m512d __DEFAULT_FN_ATTRS
-_mm512_sqrt_pd(__m512d a)
+_mm512_sqrt_pd(__m512d __a)
 {
-  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)a,
+  return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a,
                                                 (__v8df) _mm512_setzero_pd (),
                                                 (__mmask8) -1,
                                                 _MM_FROUND_CUR_DIRECTION);
 }
 
 static  __inline__ __m512 __DEFAULT_FN_ATTRS
-_mm512_sqrt_ps(__m512 a)
+_mm512_sqrt_ps(__m512 __a)
 {
-  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)a,
+  return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a,
                                                (__v16sf) _mm512_setzero_ps (),
                                                (__mmask16) -1,
                                                _MM_FROUND_CUR_DIRECTION);
@@ -765,7 +909,7 @@ _mm512_rsqrt14_ps(__m512 __A)
 static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
              (__v4sf) __B,
              (__v4sf)
              _mm_setzero_ps (),
@@ -775,7 +919,7 @@ _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
               (__v2df) __B,
               (__v2df)
               _mm_setzero_pd (),
@@ -802,7 +946,7 @@ _mm512_rcp14_ps(__m512 __A)
 static  __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_rcp14_ss(__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
+  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
                  (__v4sf) __B,
                  (__v4sf)
                  _mm_setzero_ps (),
@@ -812,7 +956,7 @@ _mm_rcp14_ss(__m128 __A, __m128 __B)
 static  __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
+  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
             (__v2df) __B,
             (__v2df)
             _mm_setzero_pd (),
@@ -873,6 +1017,489 @@ _mm512_abs_epi32(__m512i __A)
              (__mmask16) -1);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) _mm512_setzero_pd (),
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) _mm512_setzero_ps (),
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+               (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \
+                (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \
+                (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); })
+
+#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+                (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); })
+
+#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+                (__v16sf) __W, (__mmask16)__U, __R); })
+
+#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \
+                (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); })
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\
+             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \
+   (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\
+             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \
+   (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
+                (__v4sf) __B,
+                (__v4sf)  _mm_setzero_ps (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \
+  (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \
+                (__v4sf)  _mm_setzero_ps(), (__mmask8) __U,__R); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)  _mm_setzero_pd (),
+                (__mmask8) __U,
+                _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  __W, (__mmask8) __U,__R); })
+
+#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \
+  (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \
+                (__v2df)  _mm_setzero_pd(), (__mmask8) __U,__R); })
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df) __W,
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512d __DEFAULT_FN_ATTRS
+_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A,
+             (__v8df) __B,
+             (__v8df)
+             _mm512_setzero_pd (),
+             (__mmask8) __U,
+             _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf) __W,
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512 __DEFAULT_FN_ATTRS
+_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
+            (__v16sf) __B,
+            (__v16sf)
+            _mm512_setzero_ps (),
+            (__mmask16) __U,
+            _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\
+             (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); })
+
+#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \
+  (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) __W, (__mmask8) __U, __R); })
+
+#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \
+   (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \
+             (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);})
+
+#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \
+  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);})
+
+#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) __W, (__mmask16) __U, __R); });
+
+#define _mm512_maskz_div_round_ps(__U, __A, __B, __R)  __extension__ ({ \
+  (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \
+            (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);});
+
 #define _mm512_roundscale_ps(A, B) __extension__ ({ \
   (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \
                                          -1, _MM_FROUND_CUR_DIRECTION); })
@@ -1706,17 +2333,15 @@ _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 /* Vector Extract */
 
 #define _mm512_extractf64x4_pd(A, I) __extension__ ({                    \
-      __m512d __A = (A);                                                 \
       (__m256d)                                                          \
-        __builtin_ia32_extractf64x4_mask((__v8df)__A,                    \
+        __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A),           \
                                          (I),                            \
                                          (__v4df)_mm256_setzero_si256(), \
                                          (__mmask8) -1); })
 
 #define _mm512_extractf32x4_ps(A, I) __extension__ ({                    \
-      __m512 __A = (A);                                                  \
       (__m128)                                                           \
-        __builtin_ia32_extractf32x4_mask((__v16sf)__A,                   \
+        __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A),           \
                                          (I),                            \
                                          (__v4sf)_mm_setzero_ps(),       \
                                          (__mmask8) -1); })
@@ -1850,18 +2475,18 @@ _mm512_cvtph_ps(__m256i __A)
 }
 
 static __inline __m512i __DEFAULT_FN_ATTRS
-_mm512_cvttps_epi32(__m512 a)
+_mm512_cvttps_epi32(__m512 __a)
 {
   return (__m512i)
-    __builtin_ia32_cvttps2dq512_mask((__v16sf) a,
+    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
                                      (__v16si) _mm512_setzero_si512 (),
                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
 static __inline __m256i __DEFAULT_FN_ATTRS
-_mm512_cvttpd_epi32(__m512d a)
+_mm512_cvttpd_epi32(__m512d __a)
 {
-  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) a,
+  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
                                                    (__v8si)_mm256_setzero_si256(),
                                                    (__mmask8) -1,
                                                     _MM_FROUND_CUR_DIRECTION);
@@ -2405,51 +3030,43 @@ _mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) {
 }
 
 #define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+                                         (__v16si)(__m512i)(b), (p), \
                                          (__mmask16)-1); })
 
 #define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+                                          (__v16si)(__m512i)(b), (p), \
                                           (__mmask16)-1); })
 
 #define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+                                        (__v8di)(__m512i)(b), (p), \
                                         (__mmask8)-1); })
 
 #define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+                                         (__v8di)(__m512i)(b), (p), \
                                          (__mmask8)-1); })
 
 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+  (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
+                                         (__v16si)(__m512i)(b), (p), \
                                          (__mmask16)(m)); })
 
 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, (p), \
+  (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
+                                          (__v16si)(__m512i)(b), (p), \
                                           (__mmask16)(m)); })
 
 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+  (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
+                                        (__v8di)(__m512i)(b), (p), \
                                         (__mmask8)(m)); })
 
 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \
-  __m512i __a = (a); \
-  __m512i __b = (b); \
-  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, (p), \
+  (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
+                                         (__v8di)(__m512i)(b), (p), \
                                          (__mmask8)(m)); })
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/avx512vlbwintrin.h b/lib/Headers/avx512vlbwintrin.h
index 74ec17583096..b4542d69ab08 100644
--- a/lib/Headers/avx512vlbwintrin.h
+++ b/lib/Headers/avx512vlbwintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ----------===
+/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -29,7 +29,7 @@
 #define __AVX512VLBWINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
 
 /* Integer compare */
 
@@ -1822,6 +1822,435 @@ _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A,
                (__mmask16) __U);
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_maddubs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+               (__v16qi) __Y,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_maddubs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) {
+  return (__m128i) __builtin_ia32_pmaddubsw128_mask ((__v16qi) __X,
+               (__v16qi) __Y,
+              (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_maddubs_epi16 (__m256i __W, __mmask16 __U, __m256i __X,
+         __m256i __Y) {
+  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+               (__v32qi) __Y,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_maddubs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y) {
+  return (__m256i) __builtin_ia32_pmaddubsw256_mask ((__v32qi) __X,
+               (__v32qi) __Y,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_madd_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+         __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v4si) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_madd_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaddwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v4si) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_madd_epi16 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v8si) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_madd_epi16 (__mmask8 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaddwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v8si) _mm256_setzero_si256(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtsepi16_epi8 (__m128i __A) {
+  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+               (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtsepi16_epi8 (__m256i __A) {
+  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+               (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtusepi16_epi8 (__m128i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+                (__v16qi) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
+                (__v16qi) _mm_setzero_si128(),
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtusepi16_epi8 (__m256i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+                (__v16qi) _mm_setzero_si128(),
+                (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+                (__v16qi) __O,
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
+                (__v16qi) _mm_setzero_si128(),
+                __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtepi16_epi8 (__m128i __A) {
+
+  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+               (__v16qi) __O,
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
+  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtepi16_epi8 (__m256i __A) {
+  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask16) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+               (__v16qi) __O,
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
+  return (__m128i) __builtin_ia32_pmovwb256_mask ((__v16hi) __A,
+               (__v16qi) _mm_setzero_si128(),
+               __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhrs_epi16 (__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+               (__v8hi) __Y,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhrs_epi16 (__mmask8 __U, __m128i __X, __m128i __Y) {
+  return (__m128i) __builtin_ia32_pmulhrsw128_mask ((__v8hi) __X,
+               (__v8hi) __Y,
+              (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhrs_epi16 (__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+               (__v16hi) __Y,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhrs_epi16 (__mmask16 __U, __m256i __X, __m256i __Y) {
+  return (__m256i) __builtin_ia32_pmulhrsw256_mask ((__v16hi) __X,
+               (__v16hi) __Y,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhi_epu16 (__m128i __W, __mmask8 __U, __m128i __A,
+          __m128i __B) {
+  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhi_epu16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmulhuw128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+              (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhi_epu16 (__m256i __W, __mmask16 __U, __m256i __A,
+       __m256i __B) {
+  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhi_epu16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmulhuw256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_mulhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+          __m128i __B) {
+  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_mulhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmulhw128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+              (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_mulhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+       __m256i __B) {
+  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_mulhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmulhw256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+      __m128i __B) {
+  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+               (__v16qi) __B,
+               (__v16qi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_punpckhbw128_mask ((__v16qi) __A,
+               (__v16qi) __B,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+         __m256i __B) {
+  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+               (__v32qi) __B,
+               (__v32qi) __W,
+               (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_punpckhbw256_mask ((__v32qi) __A,
+               (__v32qi) __B,
+               (__v32qi) _mm256_setzero_si256(),
+               (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpackhi_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+       __m128i __B) {
+  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpackhi_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_punpckhwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpackhi_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+          __m256i __B) {
+  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpackhi_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_punpckhwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi8 (__m128i __W, __mmask16 __U, __m128i __A,
+      __m128i __B) {
+  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+               (__v16qi) __B,
+               (__v16qi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi8 (__mmask16 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_punpcklbw128_mask ((__v16qi) __A,
+               (__v16qi) __B,
+               (__v16qi) _mm_setzero_si128(),
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi8 (__m256i __W, __mmask32 __U, __m256i __A,
+         __m256i __B) {
+  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+               (__v32qi) __B,
+               (__v32qi) __W,
+               (__mmask32) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi8 (__mmask32 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_punpcklbw256_mask ((__v32qi) __A,
+               (__v32qi) __B,
+               (__v32qi) _mm256_setzero_si256(),
+               (__mmask32) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_unpacklo_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
+       __m128i __B) {
+  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_unpacklo_epi16 (__mmask8 __U, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_punpcklwd128_mask ((__v8hi) __A,
+               (__v8hi) __B,
+               (__v8hi) _mm_setzero_si128(),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_unpacklo_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
+          __m256i __B) {
+  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) __W,
+               (__mmask16) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_unpacklo_epi16 (__mmask16 __U, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_punpcklwd256_mask ((__v16hi) __A,
+               (__v16hi) __B,
+               (__v16hi) _mm256_setzero_si256(),
+               (__mmask16) __U);
+}
+
 #define _mm_cmp_epi8_mask(a, b, p) __extension__ ({ \
   (__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \
                                          (__v16qi)(__m128i)(b), \
diff --git a/lib/Headers/avx512vldqintrin.h b/lib/Headers/avx512vldqintrin.h
index 1edf29d128ee..dfd858e013da 100644
--- a/lib/Headers/avx512vldqintrin.h
+++ b/lib/Headers/avx512vldqintrin.h
@@ -1,4 +1,4 @@
-/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
+/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -20,7 +20,7 @@
  *
  *===-----------------------------------------------------------------------===
  */
- 
+
 #ifndef __IMMINTRIN_H
 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
 #endif
@@ -29,7 +29,7 @@
 #define __AVX512VLDQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
@@ -348,6 +348,606 @@ _mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
                  (__mmask8) __U);
 }
 
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epi64 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epi64 (__m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epu64 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epu64 (__m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epi64 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epi64 (__m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epu64 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epu64 (__m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepi64_pd (__m128i __A) {
+  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+                (__v2df) _mm_setzero_pd(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A,
+                (__v2df) _mm_setzero_pd(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_pd (__m256i __A) {
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+                (__v4df) _mm256_setzero_pd(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
+  return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A,
+                (__v4df) _mm256_setzero_pd(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepi64_ps (__m128i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_cvtepi64_ps (__m256i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
+  return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epi64 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epi64 (__m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epu64 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epu64 (__m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
+  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epi64 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epi64 (__m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epu64 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
+                (__v2di) _mm_setzero_si128(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epu64 (__m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
+  return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
+                (__v4di) _mm256_setzero_si256(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepu64_pd (__m128i __A) {
+  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+                (__v2df) _mm_setzero_pd(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A,
+                (__v2df) _mm_setzero_pd(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepu64_pd (__m256i __A) {
+  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+                (__v4df) _mm256_setzero_pd(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
+  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
+  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A,
+                (__v4df) _mm256_setzero_pd(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepu64_ps (__m128i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_cvtepu64_ps (__m256i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
+  return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
+                (__v4sf) _mm_setzero_ps(),
+                (__mmask8) __U);
+}
+
+#define _mm_range_pd(__A, __B, __C) __extension__ ({                         \
+  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
+
+#define _mm_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({          \
+  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+                (__v2df) __W, (__mmask8) __U); })
+
+#define _mm_maskz_range_pd(__U, __A, __B, __C) __extension__ ({              \
+  (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, (__v2df) __B, __C, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+
+#define _mm256_range_pd(__A, __B, __C) __extension__ ({                      \
+  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+                (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+
+#define _mm256_mask_range_pd(__W, __U, __A, __B, __C) __extension__ ({       \
+  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+                (__v4df) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_range_pd(__U, __A, __B, __C) __extension__ ({           \
+  (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, (__v4df) __B, __C, \
+                (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_range_ps(__A, __B, __C) __extension__ ({                         \
+  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+#define _mm_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({          \
+  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
+                (__v4sf) __W, (__mmask8) __U); })
+
+#define _mm_maskz_range_ps(__U, __A, __B, __C) __extension__ ({              \
+  (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, (__v4sf) __B, __C,  \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_range_ps(__A, __B, __C) __extension__ ({                      \
+  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_range_ps(__W, __U, __A, __B, __C) __extension__ ({       \
+  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
+                (__v8sf) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_range_ps(__U, __A, __B, __C) __extension__ ({           \
+  (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, (__v8sf) __B, __C,  \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
+#define _mm_reduce_pd(__A, __B) __extension__ ({                \
+  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) -1); })
+
+#define _mm_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+                (__v2df) __W, (__mmask8) __U); })
+
+#define _mm_maskz_reduce_pd(__U, __A, __B) __extension__ ({     \
+  (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, \
+                (__v2df) _mm_setzero_pd(), (__mmask8) __U); })
+
+#define _mm256_reduce_pd(__A, __B) __extension__ ({                \
+  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
+                (__v4df) _mm256_setzero_pd(), (__mmask8) -1); })
+
+#define _mm256_mask_reduce_pd(__W, __U, __A, __B) __extension__ ({ \
+  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
+                (__v4df) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_reduce_pd(__U, __A, __B) __extension__ ({     \
+  (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B,    \
+                (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_reduce_ps(__A, __B) __extension__ ({                   \
+  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+#define _mm_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({    \
+  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
+                (__v4sf) __W, (__mmask8) __U); })
+
+#define _mm_maskz_reduce_ps(__U, __A, __B) __extension__ ({        \
+  (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B,     \
+                (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_reduce_ps(__A, __B) __extension__ ({                \
+  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_reduce_ps(__W, __U, __A, __B) __extension__ ({ \
+  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
+                (__v8sf) __W, (__mmask8) __U); })
+
+#define _mm256_maskz_reduce_ps(__U, __A, __B) __extension__ ({     \
+  (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B,     \
+                (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
 #undef __DEFAULT_FN_ATTRS
 
 #endif
diff --git a/lib/Headers/avx512vlintrin.h b/lib/Headers/avx512vlintrin.h
index fc1b9d6e7a23..8f13536fbb0e 100644
--- a/lib/Headers/avx512vlintrin.h
+++ b/lib/Headers/avx512vlintrin.h
@@ -28,18 +28,18 @@
 #ifndef __AVX512VLINTRIN_H
 #define __AVX512VLINTRIN_H
 
-/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+#define __DEFAULT_FN_ATTRS_BOTH __attribute__((__always_inline__, __nodebug__, __target__("avx512vl, avx512bw")))
 
 /* Integer compare */
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
@@ -57,13 +57,13 @@ _mm_mask_cmpeq_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
@@ -81,13 +81,13 @@ _mm256_mask_cmpeq_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
@@ -105,13 +105,13 @@ _mm_mask_cmpeq_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpeqq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
@@ -226,16 +226,13 @@ _mm256_mask_cmpge_epu64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-
-
-
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd128_mask((__v4si)__a, (__v4si)__b,
                                                   __u);
@@ -253,13 +250,13 @@ _mm_mask_cmpgt_epu32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtd256_mask((__v8si)__a, (__v8si)__b,
                                                   __u);
@@ -277,13 +274,13 @@ _mm256_mask_cmpgt_epu32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq128_mask((__v2di)__a, (__v2di)__b,
                                                   __u);
@@ -301,13 +298,13 @@ _mm_mask_cmpgt_epu64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
                                                 __u);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   (__mmask8)-1);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS_BOTH
 _mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
   return (__mmask8)__builtin_ia32_pcmpgtq256_mask((__v4di)__a, (__v4di)__b,
                                                   __u);
@@ -1977,6 +1974,2633 @@ _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
                                                      (__mmask8) __U);
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_add_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_add_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_addpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df)
+             _mm_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_add_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_add_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_addpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_add_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_add_ps (__mmask16 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_addps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf)
+            _mm_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_add_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_add_ps (__mmask16 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_addps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
+  return (__m128i) __builtin_ia32_blendmd_128_mask ((__v4si) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
+  return (__m256i) __builtin_ia32_blendmd_256_mask ((__v8si) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
+  return (__m128d) __builtin_ia32_blendmpd_128_mask ((__v2df) __A,
+                 (__v2df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
+  return (__m256d) __builtin_ia32_blendmpd_256_mask ((__v4df) __A,
+                 (__v4df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
+  return (__m128) __builtin_ia32_blendmps_128_mask ((__v4sf) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
+  return (__m256) __builtin_ia32_blendmps_256_mask ((__v8sf) __A,
+                (__v8sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
+  return (__m128i) __builtin_ia32_blendmq_128_mask ((__v2di) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
+  return (__m256i) __builtin_ia32_blendmq_256_mask ((__v4di) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+                  (__v2df) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
+                  (__v2df)
+                  _mm_setzero_pd (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+                  (__v4df) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
+                  (__v4df)
+                  _mm256_setzero_pd (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+                  (__v2di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
+                  (__v2di)
+                  _mm_setzero_si128 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+                  (__v4di) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
+                  (__v4di)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+                 (__v4sf) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
+                 (__v4sf)
+                 _mm_setzero_ps (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+                 (__v8sf) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
+                 (__v8sf)
+                 _mm256_setzero_ps (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+                  (__v4si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+                  (__v8si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
+                  (__v8si)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
+  __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
+            (__v2df) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
+  __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
+            (__v4df) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
+  __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
+            (__v2di) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
+  __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
+            (__v4di) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
+  __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
+            (__v4sf) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
+  __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
+            (__v8sf) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
+  __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
+            (__v4si) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
+  __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
+            (__v8si) __A,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtdq2pd128_mask ((__v4si) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
+  return (__m256d) __builtin_ia32_cvtdq2pd256_mask ((__v4si) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtdq2ps128_mask ((__v4si) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+               (__v8sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) {
+  return (__m256) __builtin_ia32_cvtdq2ps256_mask ((__v8si) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2dq256_mask ((__v4df) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
+  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+            (__v4sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
+  return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
+            (__v4sf)
+            _mm_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
+  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
+  return (__m128) __builtin_ia32_cvtpd2ps256_mask ((__v4df) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtpd_epu32 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvtpd_epu32 (__m256d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2dq128_mask ((__v4sf) __A,
+                (__v4si)
+                _mm_setzero_si128 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvtps2dq256_mask ((__v8sf) __A,
+                (__v8si)
+                _mm256_setzero_si256 (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
+  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
+  return (__m128d) __builtin_ia32_cvtps2pd128_mask ((__v4sf) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
+  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
+  return (__m256d) __builtin_ia32_cvtps2pd256_mask ((__v4sf) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvtps_epu32 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvtps_epu32 (__m256 __A) {
+  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+                 (__v8si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2dq256_mask ((__v4df) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttpd_epu32 (__m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+                  (__v4si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_cvttpd_epu32 (__m256d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+                  (__v4si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
+  return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+                 (__v4si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2dq128_mask ((__v4sf) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+                 (__v8si) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvttps2dq256_mask ((__v8sf) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_cvttps_epu32 (__m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+                  (__v4si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
+  return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
+                  (__v4si)
+                  _mm_setzero_si128 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_cvttps_epu32 (__m256 __A) {
+  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+                  (__v8si)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+                  (__v8si) __W,
+                  (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
+  return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
+                  (__v8si)
+                  _mm256_setzero_si256 (),
+                  (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_cvtepu32_pd (__m128i __A) {
+  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+                 (__v2df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+  return (__m128d) __builtin_ia32_cvtudq2pd128_mask ((__v4si) __A,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_cvtepu32_pd (__m128i __A) {
+  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+                 (__v4df)
+                 _mm256_setzero_pd (),
+                 (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
+  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+                 (__v4df) __W,
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
+  return (__m256d) __builtin_ia32_cvtudq2pd256_mask ((__v4si) __A,
+                 (__v4df)
+                 _mm256_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_cvtepu32_ps (__m128i __A) {
+  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+                (__v4sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
+  return (__m128) __builtin_ia32_cvtudq2ps128_mask ((__v4si) __A,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_cvtepu32_ps (__m256i __A) {
+  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
+  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+                (__v8sf) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
+  return (__m256) __builtin_ia32_cvtudq2ps256_mask ((__v8si) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_div_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_div_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_divpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_div_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_div_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_divpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_div_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_div_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_divps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_div_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_div_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_divps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
+                 (__v2df)
+                 _mm_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
+                 (__v4df)
+                 _mm256_setzero_pd (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+                (__v2di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
+                 (__v2di)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+                (__v4di) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
+                 (__v4di)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
+  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+              (__v2df) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
+  return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
+               (__v2df)
+               _mm_setzero_pd (),
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
+  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+              (__v4df) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
+  return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
+               (__v4df)
+               _mm256_setzero_pd (),
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
+  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+              (__v2di) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
+  return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
+               (__v2di)
+               _mm_setzero_si128 (),
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
+             void const *__P) {
+  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+              (__v4di) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
+  return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
+               (__v4di)
+               _mm256_setzero_si256 (),
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
+  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+                   (__v4sf) __W,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
+  return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
+              (__v4sf)
+              _mm_setzero_ps (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
+  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+                   (__v8sf) __W,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
+  return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
+              (__v8sf)
+              _mm256_setzero_ps (),
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
+  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+              (__v4si) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
+  return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
+               (__v4si)
+               _mm_setzero_si128 (),
+               (__mmask8)     __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
+             void const *__P) {
+  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+              (__v8si) __W,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
+  return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
+               (__v8si)
+               _mm256_setzero_si256 (),
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
+                (__v4sf)
+                _mm_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+               (__v8sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
+                (__v8sf)
+                _mm256_setzero_ps (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+                (__v4si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
+                 (__v4si)
+                 _mm_setzero_si128 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+                (__v8si) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
+                 (__v8si)
+                 _mm256_setzero_si256 (),
+                 (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_getexp_pd (__m128d __A) {
+  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_getexp_pd (__m256d __A) {
+  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_getexp_ps (__m128 __A) {
+  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_getexp_ps (__m256 __A) {
+  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+               (__v8sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_max_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_max_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_maxpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_max_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_max_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_maxpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_max_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_max_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_maxps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_max_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_max_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_maxps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_min_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_min_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_minpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_min_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_min_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_minpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_min_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_min_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_minps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_min_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_min_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_minps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_mul_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_mul_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_mulpd_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_mul_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_mul_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_mulpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_mul_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_mul_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_mulps_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_mul_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_mul_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_mulps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+             (__v4si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi32 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_pabsd128_mask ((__v4si) __A,
+             (__v4si)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+             (__v8si) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi32 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_pabsd256_mask ((__v8si) __A,
+             (__v8si)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_abs_epi64 (__m128i __A) {
+  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+             (__v2di)
+             _mm_setzero_si128 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+             (__v2di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
+  return (__m128i) __builtin_ia32_pabsq128_mask ((__v2di) __A,
+             (__v2di)
+             _mm_setzero_si128 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_abs_epi64 (__m256i __A) {
+  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+             (__v4di) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
+  return (__m256i) __builtin_ia32_pabsq256_mask ((__v4di) __A,
+             (__v4di)
+             _mm256_setzero_si256 (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxsd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxsd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_max_epi64 (__m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_max_epi64 (__m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxud128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxud256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_max_epu64 (__m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pmaxuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_max_epu64 (__m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pmaxuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi32 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi32 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pminsd128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi32 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi32 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pminsd256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_min_epi64 (__m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminsq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_min_epi64 (__m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminsq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu32 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu32 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pminud128_mask ((__v4si) __A,
+              (__v4si) __B,
+              (__v4si) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu32 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu32 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pminud256_mask ((__v8si) __A,
+              (__v8si) __B,
+              (__v8si) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_min_epu64 (__m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A,
+        __m128i __B) {
+  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di) __W, __M);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
+  return (__m128i) __builtin_ia32_pminuq128_mask ((__v2di) __A,
+              (__v2di) __B,
+              (__v2di)
+              _mm_setzero_si128 (),
+              __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_min_epu64 (__m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A,
+           __m256i __B) {
+  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di) __W, __M);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
+  return (__m256i) __builtin_ia32_pminuq256_mask ((__v4di) __A,
+              (__v4di) __B,
+              (__v4di)
+              _mm256_setzero_si256 (),
+              __M);
+}
+
+#define _mm_roundscale_pd(__A, __imm) __extension__ ({ \
+  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, \
+                   __imm, (__v2df) _mm_setzero_pd (), (__mmask8) -1); })
+
+
+#define _mm_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
+  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
+                   (__v2df) __W, (__mmask8) __U); })
+
+
+#define _mm_maskz_roundscale_pd(__U, __A, __imm) __extension__ ({ \
+  (__m128d) __builtin_ia32_rndscalepd_128_mask ((__v2df) __A, __imm, \
+                   (__v2df) _mm_setzero_pd (), (__mmask8) __U); })
+
+
+#define _mm256_roundscale_pd(__A, __imm) __extension__ ({ \
+  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+                   (__v4df) _mm256_setzero_pd (), (__mmask8) -1); })
+
+
+#define _mm256_mask_roundscale_pd(__W, __U, __A, __imm) __extension__ ({ \
+  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+                   (__v4df) __W, (__mmask8) __U); })
+
+
+#define _mm256_maskz_roundscale_pd(__U, __A, __imm)  __extension__ ({ \
+  (__m256d) __builtin_ia32_rndscalepd_256_mask ((__v4df) __A, __imm, \
+                   (__v4df) _mm256_setzero_pd(), (__mmask8) __U); })
+
+#define _mm_roundscale_ps(__A, __imm)  __extension__ ({ \
+  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+                  (__v4sf) _mm_setzero_ps(), (__mmask8) -1); })
+
+
+#define _mm_mask_roundscale_ps(__W, __U, __A, __imm)  __extension__ ({ \
+  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+                  (__v4sf) __W, (__mmask8) __U); })
+
+
+#define _mm_maskz_roundscale_ps(__U, __A, __imm)  __extension__ ({ \
+  (__m128) __builtin_ia32_rndscaleps_128_mask ((__v4sf) __A, __imm, \
+                  (__v4sf) _mm_setzero_ps(), (__mmask8) __U); })
+
+#define _mm256_roundscale_ps(__A, __imm)  __extension__ ({ \
+  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A,__imm, \
+                  (__v8sf) _mm256_setzero_ps(), (__mmask8) -1); })
+
+#define _mm256_mask_roundscale_ps(__W, __U, __A,__imm)  __extension__ ({ \
+  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
+                  (__v8sf) __W, (__mmask8) __U); })
+
+
+#define _mm256_maskz_roundscale_ps(__U, __A, __imm)  __extension__ ({ \
+  (__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf) __A, __imm, \
+                  (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_scalef_pd (__m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
+        __m128d __B) {
+  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
+                (__v2df) __B,
+                (__v2df)
+                _mm_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_scalef_pd (__m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+                (__v4df) __B,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) -1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
+           __m256d __B) {
+  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+                (__v4df) __B,
+                (__v4df) __W,
+                (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
+                (__v4df) __B,
+                (__v4df)
+                _mm256_setzero_pd (),
+                (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_scalef_ps (__m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
+               (__v4sf) __B,
+               (__v4sf)
+               _mm_setzero_ps (),
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_scalef_ps (__m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+               (__v8sf) __B,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
+           __m256 __B) {
+  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+               (__v8sf) __B,
+               (__v8sf) __W,
+               (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
+               (__v8sf) __B,
+               (__v8sf)
+               _mm256_setzero_ps (),
+               (__mmask8) __U);
+}
+
+#define _mm_i64scatter_pd(__addr,__index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2df(__addr, (__mmask8) 0xFF, (__v2di) __index, \
+                              (__v2df) __v1, __scale); })
+
+#define _mm_mask_i64scatter_pd(__addr, __mask, __index, __v1, \
+                               __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2df (__addr, __mask, (__v2di) __index, \
+                               (__v2df) __v1, __scale); })
+
+
+#define _mm_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2di (__addr, (__mmask8) 0xFF, \
+        (__v2di) __index, (__v2di) __v1, __scale); })
+
+#define _mm_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
+                                  __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv2di (__addr, __mask, (__v2di) __index,\
+        (__v2di) __v1, __scale); })
+
+#define _mm256_i64scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4df (__addr, (__mmask8) 0xFF,\
+        (__v4di) __index, (__v4df) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_pd(__addr, __mask, __index, __v1,\
+                                   __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4df (__addr, __mask, (__v4di) __index,\
+        (__v4df) __v1, __scale); })
+
+#define _mm256_i64scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4di (__addr, (__mmask8) 0xFF, (__v4di) __index,\
+                               (__v4di) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_epi64(__addr, __mask, __index, __v1,\
+                                      __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4di (__addr, __mask, (__v4di) __index,\
+        (__v4di) __v1, __scale); })
+
+#define _mm_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4sf (__addr, (__mmask8) 0xFF,\
+        (__v2di) __index, (__v4sf) __v1, __scale); })
+
+#define _mm_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
+                                __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4sf (__addr, __mask, (__v2di) __index,\
+        (__v4sf) __v1, __scale); })
+
+#define _mm_i64scatter_epi32(__addr, __index, __v1, \
+                              __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4si (__addr, (__mmask8) 0xFF,\
+        (__v2di) __index, (__v4si) __v1, __scale); })
+
+#define _mm_mask_i64scatter_epi32(__addr, __mask, __index, __v1,\
+         __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv4si (__addr, __mask, (__v2di) __index,\
+        (__v4si) __v1, __scale); })
+
+#define _mm256_i64scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8sf (__addr, (__mmask8) 0xFF, (__v4di) __index, \
+                              (__v4sf) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_ps(__addr, __mask, __index, __v1, \
+                                   __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8sf (__addr, __mask, (__v4di) __index, \
+        (__v4sf) __v1, __scale); })
+
+#define _mm256_i64scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scatterdiv8si (__addr, (__mmask8) 0xFF, \
+        (__v4di) __index, (__v4si) __v1, __scale); })
+
+#define _mm256_mask_i64scatter_epi32(__addr, __mask, __index, __v1, \
+                                      __scale) __extension__ ({  \
+  __builtin_ia32_scatterdiv8si(__addr, __mask, (__v4di) __index, \
+        (__v4si) __v1, __scale); })
+
+#define _mm_i32scatter_pd(__addr, __index, __v1,         \
+                          __scale) __extension__ ({      \
+  __builtin_ia32_scattersiv2df (__addr, (__mmask8) 0xFF, \
+        (__v4si) __index, (__v2df) __v1, __scale); })
+
+#define _mm_mask_i32scatter_pd(__addr, __mask, __index, __v1,    \
+                                __scale) __extension__ ({        \
+  __builtin_ia32_scattersiv2df (__addr, __mask, (__v4si) __index,\
+         (__v2df) __v1, __scale); })
+
+#define _mm_i32scatter_epi64(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv2di (__addr, (__mmask8) 0xFF,                       \
+        (__v4si) __index, (__v2di) __v1, __scale); })
+
+#define _mm_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
+         __scale) __extension__ ({                                \
+  __builtin_ia32_scattersiv2di (__addr, __mask, (__v4si) __index, \
+        (__v2di) __v1, __scale); })
+
+#define _mm256_i32scatter_pd(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4df (__addr, (__mmask8) 0xFF,                      \
+        (__v4si) __index, (__v4df) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_pd(__addr, __mask, __index, __v1, \
+         __scale) __extension__ ({                                \
+  __builtin_ia32_scattersiv4df (__addr, __mask, (__v4si) __index, \
+        (__v4df) __v1, __scale); })
+
+#define _mm256_i32scatter_epi64(__addr, __index, __v1,    \
+                                __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4di (__addr, (__mmask8) 0xFF,  \
+        (__v4si) __index, (__v4di) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_epi64(__addr, __mask, __index, __v1, \
+            __scale) __extension__ ({                               \
+  __builtin_ia32_scattersiv4di (__addr, __mask, (__v4si) __index,   \
+        (__v4di) __v1, __scale); })
+
+#define _mm_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4sf (__addr, (__mmask8) 0xFF,                   \
+        (__v4si) __index, (__v4sf) __v1, __scale); })
+
+#define _mm_mask_i32scatter_ps(__addr, __mask, __index, __v1,     \
+                               __scale) __extension__ ({          \
+  __builtin_ia32_scattersiv4sf (__addr, __mask, (__v4si) __index, \
+        (__v4sf) __v1, __scale); })
+
+#define _mm_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv4si (__addr, (__mmask8) 0xFF,                       \
+        (__v4si) __index, (__v4si) __v1, __scale); })
+
+#define _mm_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
+                                  __scale) __extension__ ({      \
+  __builtin_ia32_scattersiv4si (__addr, __mask, (__v4si) __index,\
+        (__v4si) __v1, __scale); })
+
+#define _mm256_i32scatter_ps(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8sf (__addr, (__mmask8) 0xFF,                      \
+        (__v8si) __index, (__v8sf) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_ps(__addr, __mask, __index, __v1, \
+                                   __scale) __extension__ ({     \
+  __builtin_ia32_scattersiv8sf (__addr, __mask, (__v8si) __index,\
+        (__v8sf) __v1, __scale); })
+
+#define _mm256_i32scatter_epi32(__addr, __index, __v1, __scale) __extension__ ({ \
+  __builtin_ia32_scattersiv8si (__addr, (__mmask8) 0xFF,                         \
+        (__v8si) __index, (__v8si) __v1, __scale); })
+
+#define _mm256_mask_i32scatter_epi32(__addr, __mask, __index, __v1, \
+            __scale) __extension__ ({                                \
+  __builtin_ia32_scattersiv8si (__addr, __mask, (__v8si) __index,    \
+        (__v8si) __v1, __scale); })
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_pd (__m128d __W, __mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+              (__v2df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_pd (__mmask8 __U, __m128d __A) {
+  return (__m128d) __builtin_ia32_sqrtpd128_mask ((__v2df) __A,
+              (__v2df)
+              _mm_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_sqrt_pd (__m256d __W, __mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+              (__v4df) __W,
+              (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_sqrt_pd (__mmask8 __U, __m256d __A) {
+  return (__m256d) __builtin_ia32_sqrtpd256_mask ((__v4df) __A,
+              (__v4df)
+              _mm256_setzero_pd (),
+              (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sqrt_ps (__m128 __W, __mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+             (__v4sf) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sqrt_ps (__mmask8 __U, __m128 __A) {
+  return (__m128) __builtin_ia32_sqrtps128_mask ((__v4sf) __A,
+             (__v4sf)
+             _mm_setzero_ps (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_sqrt_ps (__m256 __W, __mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+             (__v8sf) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_sqrt_ps (__mmask8 __U, __m256 __A) {
+  return (__m256) __builtin_ia32_sqrtps256_mask ((__v8sf) __A,
+             (__v8sf)
+             _mm256_setzero_ps (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_sub_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_sub_pd (__mmask8 __U, __m128d __A, __m128d __B) {
+  return (__m128d) __builtin_ia32_subpd128_mask ((__v2df) __A,
+             (__v2df) __B,
+             (__v2df)
+             _mm_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_sub_pd (__m256d __W, __mmask8 __U, __m256d __A,
+        __m256d __B) {
+  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df) __W,
+             (__mmask8) __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_sub_pd (__mmask8 __U, __m256d __A, __m256d __B) {
+  return (__m256d) __builtin_ia32_subpd256_mask ((__v4df) __A,
+             (__v4df) __B,
+             (__v4df)
+             _mm256_setzero_pd (),
+             (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_sub_ps (__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_sub_ps (__mmask16 __U, __m128 __A, __m128 __B) {
+  return (__m128) __builtin_ia32_subps128_mask ((__v4sf) __A,
+            (__v4sf) __B,
+            (__v4sf)
+            _mm_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_sub_ps (__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf) __W,
+            (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_sub_ps (__mmask16 __U, __m256 __A, __m256 __B) {
+  return (__m256) __builtin_ia32_subps256_mask ((__v8sf) __A,
+            (__v8sf) __B,
+            (__v8sf)
+            _mm256_setzero_ps (),
+            (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi32 (__m128i __A, __m128i __I, __mmask8 __U,
+            __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermi2vard128_mask ((__v4si) __A,
+                   (__v4si) __I
+                   /* idx */ ,
+                   (__v4si) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi32 (__m256i __A, __m256i __I,
+         __mmask8 __U, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermi2vard256_mask ((__v8si) __A,
+                   (__v8si) __I
+                   /* idx */ ,
+                   (__v8si) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_pd (__m128d __A, __m128i __I, __mmask8 __U,
+         __m128d __B) {
+  return (__m128d) __builtin_ia32_vpermi2varpd128_mask ((__v2df) __A,
+              (__v2di) __I
+              /* idx */ ,
+              (__v2df) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_pd (__m256d __A, __m256i __I, __mmask8 __U,
+            __m256d __B) {
+  return (__m256d) __builtin_ia32_vpermi2varpd256_mask ((__v4df) __A,
+              (__v4di) __I
+              /* idx */ ,
+              (__v4df) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_ps (__m128 __A, __m128i __I, __mmask8 __U,
+         __m128 __B) {
+  return (__m128) __builtin_ia32_vpermi2varps128_mask ((__v4sf) __A,
+                   (__v4si) __I
+                   /* idx */ ,
+                   (__v4sf) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_ps (__m256 __A, __m256i __I, __mmask8 __U,
+            __m256 __B) {
+  return (__m256) __builtin_ia32_vpermi2varps256_mask ((__v8sf) __A,
+                   (__v8si) __I
+                   /* idx */ ,
+                   (__v8sf) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask2_permutex2var_epi64 (__m128i __A, __m128i __I, __mmask8 __U,
+            __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermi2varq128_mask ((__v2di) __A,
+                   (__v2di) __I
+                   /* idx */ ,
+                   (__v2di) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask2_permutex2var_epi64 (__m256i __A, __m256i __I,
+         __mmask8 __U, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermi2varq256_mask ((__v4di) __A,
+                   (__v4di) __I
+                   /* idx */ ,
+                   (__v4di) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi32 (__m128i __A, __m128i __I, __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+                   /* idx */ ,
+                   (__v4si) __A,
+                   (__v4si) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi32 (__m128i __A, __mmask8 __U, __m128i __I,
+           __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2vard128_mask ((__v4si) __I
+                   /* idx */ ,
+                   (__v4si) __A,
+                   (__v4si) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi32 (__mmask8 __U, __m128i __A, __m128i __I,
+            __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2vard128_maskz ((__v4si) __I
+              /* idx */ ,
+              (__v4si) __A,
+              (__v4si) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi32 (__m256i __A, __m256i __I, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+                   /* idx */ ,
+                   (__v8si) __A,
+                   (__v8si) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi32 (__m256i __A, __mmask8 __U, __m256i __I,
+        __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2vard256_mask ((__v8si) __I
+                   /* idx */ ,
+                   (__v8si) __A,
+                   (__v8si) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi32 (__mmask8 __U, __m256i __A,
+         __m256i __I, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2vard256_maskz ((__v8si) __I
+              /* idx */ ,
+              (__v8si) __A,
+              (__v8si) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_permutex2var_pd (__m128d __A, __m128i __I, __m128d __B) {
+  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+              /* idx */ ,
+              (__v2df) __A,
+              (__v2df) __B,
+              (__mmask8) -
+              1);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_pd (__m128d __A, __mmask8 __U, __m128i __I,
+        __m128d __B) {
+  return (__m128d) __builtin_ia32_vpermt2varpd128_mask ((__v2di) __I
+              /* idx */ ,
+              (__v2df) __A,
+              (__v2df) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_pd (__mmask8 __U, __m128d __A, __m128i __I,
+         __m128d __B) {
+  return (__m128d) __builtin_ia32_vpermt2varpd128_maskz ((__v2di) __I
+               /* idx */ ,
+               (__v2df) __A,
+               (__v2df) __B,
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_permutex2var_pd (__m256d __A, __m256i __I, __m256d __B) {
+  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+              /* idx */ ,
+              (__v4df) __A,
+              (__v4df) __B,
+              (__mmask8) -
+              1);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_pd (__m256d __A, __mmask8 __U, __m256i __I,
+           __m256d __B) {
+  return (__m256d) __builtin_ia32_vpermt2varpd256_mask ((__v4di) __I
+              /* idx */ ,
+              (__v4df) __A,
+              (__v4df) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_pd (__mmask8 __U, __m256d __A, __m256i __I,
+            __m256d __B) {
+  return (__m256d) __builtin_ia32_vpermt2varpd256_maskz ((__v4di) __I
+               /* idx */ ,
+               (__v4df) __A,
+               (__v4df) __B,
+               (__mmask8)
+               __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_permutex2var_ps (__m128 __A, __m128i __I, __m128 __B) {
+  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+                   /* idx */ ,
+                   (__v4sf) __A,
+                   (__v4sf) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_ps (__m128 __A, __mmask8 __U, __m128i __I,
+        __m128 __B) {
+  return (__m128) __builtin_ia32_vpermt2varps128_mask ((__v4si) __I
+                   /* idx */ ,
+                   (__v4sf) __A,
+                   (__v4sf) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_ps (__mmask8 __U, __m128 __A, __m128i __I,
+         __m128 __B) {
+  return (__m128) __builtin_ia32_vpermt2varps128_maskz ((__v4si) __I
+              /* idx */ ,
+              (__v4sf) __A,
+              (__v4sf) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_permutex2var_ps (__m256 __A, __m256i __I, __m256 __B) {
+  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+                   /* idx */ ,
+                   (__v8sf) __A,
+                   (__v8sf) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_ps (__m256 __A, __mmask8 __U, __m256i __I,
+           __m256 __B) {
+  return (__m256) __builtin_ia32_vpermt2varps256_mask ((__v8si) __I
+                   /* idx */ ,
+                   (__v8sf) __A,
+                   (__v8sf) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_ps (__mmask8 __U, __m256 __A, __m256i __I,
+            __m256 __B) {
+  return (__m256) __builtin_ia32_vpermt2varps256_maskz ((__v8si) __I
+              /* idx */ ,
+              (__v8sf) __A,
+              (__v8sf) __B,
+              (__mmask8)
+              __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_permutex2var_epi64 (__m128i __A, __m128i __I, __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+                   /* idx */ ,
+                   (__v2di) __A,
+                   (__v2di) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_mask_permutex2var_epi64 (__m128i __A, __mmask8 __U, __m128i __I,
+           __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2varq128_mask ((__v2di) __I
+                   /* idx */ ,
+                   (__v2di) __A,
+                   (__v2di) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_maskz_permutex2var_epi64 (__mmask8 __U, __m128i __A, __m128i __I,
+            __m128i __B) {
+  return (__m128i) __builtin_ia32_vpermt2varq128_maskz ((__v2di) __I
+              /* idx */ ,
+              (__v2di) __A,
+              (__v2di) __B,
+              (__mmask8)
+              __U);
+}
+
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_permutex2var_epi64 (__m256i __A, __m256i __I, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+                   /* idx */ ,
+                   (__v4di) __A,
+                   (__v4di) __B,
+                   (__mmask8) -1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_mask_permutex2var_epi64 (__m256i __A, __mmask8 __U, __m256i __I,
+        __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2varq256_mask ((__v4di) __I
+                   /* idx */ ,
+                   (__v4di) __A,
+                   (__v4di) __B,
+                   (__mmask8) __U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_maskz_permutex2var_epi64 (__mmask8 __U, __m256i __A,
+         __m256i __I, __m256i __B) {
+  return (__m256i) __builtin_ia32_vpermt2varq256_maskz ((__v4di) __I
+              /* idx */ ,
+              (__v4di) __A,
+              (__v4di) __B,
+              (__mmask8)
+              __U);
+}
+
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS_BOTH
 
 #endif /* __AVX512VLINTRIN_H */
diff --git a/lib/Headers/avxintrin.h b/lib/Headers/avxintrin.h
index c1bc85b39e82..6d1ca5473dcf 100644
--- a/lib/Headers/avxintrin.h
+++ b/lib/Headers/avxintrin.h
@@ -35,12 +35,16 @@ typedef int __v8si __attribute__ ((__vector_size__ (32)));
 typedef short __v16hi __attribute__ ((__vector_size__ (32)));
 typedef char __v32qi __attribute__ ((__vector_size__ (32)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v32qs __attribute__((__vector_size__(32)));
+
 typedef float __m256 __attribute__ ((__vector_size__ (32)));
 typedef double __m256d __attribute__((__vector_size__(32)));
 typedef long long __m256i __attribute__((__vector_size__(32)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 
 /* Arithmetic */
 static __inline __m256d __DEFAULT_FN_ATTRS
@@ -152,12 +156,10 @@ _mm256_rcp_ps(__m256 __a)
 }
 
 #define _mm256_round_pd(V, M) __extension__ ({ \
-    __m256d __V = (V); \
-    (__m256d)__builtin_ia32_roundpd256((__v4df)__V, (M)); })
+    (__m256d)__builtin_ia32_roundpd256((__v4df)(__m256d)(V), (M)); })
 
 #define _mm256_round_ps(V, M) __extension__ ({ \
-  __m256 __V = (V); \
-  (__m256)__builtin_ia32_roundps256((__v8sf)__V, (M)); })
+  (__m256)__builtin_ia32_roundps256((__v8sf)(__m256)(V), (M)); })
 
 #define _mm256_ceil_pd(V)  _mm256_round_pd((V), _MM_FROUND_CEIL)
 #define _mm256_floor_pd(V) _mm256_round_pd((V), _MM_FROUND_FLOOR)
@@ -264,26 +266,26 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
 }
 
 #define _mm_permute_pd(A, C) __extension__ ({ \
-  __m128d __A = (A); \
-  (__m128d)__builtin_shufflevector((__v2df)__A, (__v2df) _mm_setzero_pd(), \
+  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(A), \
+                                   (__v2df)_mm_setzero_pd(), \
                                    (C) & 0x1, ((C) & 0x2) >> 1); })
 
 #define _mm256_permute_pd(A, C) __extension__ ({ \
-  __m256d __A = (A); \
-  (__m256d)__builtin_shufflevector((__v4df)__A, (__v4df) _mm256_setzero_pd(), \
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+                                   (__v4df)_mm256_setzero_pd(), \
                                    (C) & 0x1, ((C) & 0x2) >> 1, \
                                    2 + (((C) & 0x4) >> 2), \
                                    2 + (((C) & 0x8) >> 3)); })
 
 #define _mm_permute_ps(A, C) __extension__ ({ \
-  __m128 __A = (A); \
-  (__m128)__builtin_shufflevector((__v4sf)__A, (__v4sf) _mm_setzero_ps(), \
+  (__m128)__builtin_shufflevector((__v4sf)(__m128)(A), \
+                                  (__v4sf)_mm_setzero_ps(), \
                                    (C) & 0x3, ((C) & 0xc) >> 2, \
                                    ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6); })
 
 #define _mm256_permute_ps(A, C) __extension__ ({ \
-  __m256 __A = (A); \
-  (__m256)__builtin_shufflevector((__v8sf)__A, (__v8sf) _mm256_setzero_ps(), \
+  (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
+                                  (__v8sf)_mm256_setzero_ps(), \
                                   (C) & 0x3, ((C) & 0xc) >> 2, \
                                   ((C) & 0x30) >> 4, ((C) & 0xc0) >> 6, \
                                   4 + (((C) & 0x03) >> 0), \
@@ -292,34 +294,29 @@ _mm256_permutevar_ps(__m256 __a, __m256i __c)
                                   4 + (((C) & 0xc0) >> 6)); })
 
 #define _mm256_permute2f128_pd(V1, V2, M) __extension__ ({ \
-  __m256d __V1 = (V1); \
-  __m256d __V2 = (V2); \
-  (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)__V1, (__v4df)__V2, (M)); })
+  (__m256d)__builtin_ia32_vperm2f128_pd256((__v4df)(__m256d)(V1), \
+                                           (__v4df)(__m256d)(V2), (M)); })
 
 #define _mm256_permute2f128_ps(V1, V2, M) __extension__ ({ \
-  __m256 __V1 = (V1); \
-  __m256 __V2 = (V2); \
-  (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+  (__m256)__builtin_ia32_vperm2f128_ps256((__v8sf)(__m256)(V1), \
+                                          (__v8sf)(__m256)(V2), (M)); })
 
 #define _mm256_permute2f128_si256(V1, V2, M) __extension__ ({ \
-  __m256i __V1 = (V1); \
-  __m256i __V2 = (V2); \
-  (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)__V1, (__v8si)__V2, (M)); })
+  (__m256i)__builtin_ia32_vperm2f128_si256((__v8si)(__m256i)(V1), \
+                                           (__v8si)(__m256i)(V2), (M)); })
 
 /* Vector Blend */
 #define _mm256_blend_pd(V1, V2, M) __extension__ ({ \
-  __m256d __V1 = (V1); \
-  __m256d __V2 = (V2); \
-  (__m256d)__builtin_shufflevector((__v4df)__V1, (__v4df)__V2, \
+  (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
+                                   (__v4df)(__m256d)(V2), \
                                    (((M) & 0x01) ? 4 : 0), \
                                    (((M) & 0x02) ? 5 : 1), \
                                    (((M) & 0x04) ? 6 : 2), \
                                    (((M) & 0x08) ? 7 : 3)); })
 
 #define _mm256_blend_ps(V1, V2, M) __extension__ ({ \
-  __m256 __V1 = (V1); \
-  __m256 __V2 = (V2); \
-  (__m256)__builtin_shufflevector((__v8sf)__V1, (__v8sf)__V2, \
+  (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
+                                  (__v8sf)(__m256)(V2), \
                                   (((M) & 0x01) ?  8 : 0), \
                                   (((M) & 0x02) ?  9 : 1), \
                                   (((M) & 0x04) ? 10 : 2), \
@@ -345,28 +342,29 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 
 /* Vector Dot Product */
 #define _mm256_dp_ps(V1, V2, M) __extension__ ({ \
-  __m256 __V1 = (V1); \
-  __m256 __V2 = (V2); \
-  (__m256)__builtin_ia32_dpps256((__v8sf)__V1, (__v8sf)__V2, (M)); })
+  (__m256)__builtin_ia32_dpps256((__v8sf)(__m256)(V1), \
+                                 (__v8sf)(__m256)(V2), (M)); })
 
 /* Vector shuffle */
 #define _mm256_shuffle_ps(a, b, mask) __extension__ ({ \
-        __m256 __a = (a); \
-        __m256 __b = (b); \
-        (__m256)__builtin_shufflevector((__v8sf)__a, (__v8sf)__b, \
-        (mask) & 0x3,                ((mask) & 0xc) >> 2, \
-        (((mask) & 0x30) >> 4) + 8,  (((mask) & 0xc0) >> 6) + 8, \
-        ((mask) & 0x3) + 4,          (((mask) & 0xc) >> 2) + 4, \
-        (((mask) & 0x30) >> 4) + 12, (((mask) & 0xc0) >> 6) + 12); })
+        (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
+                                        (__v8sf)(__m256)(b), \
+                                        (mask) & 0x3, \
+                                        ((mask) & 0xc) >> 2, \
+                                        (((mask) & 0x30) >> 4) + 8, \
+                                        (((mask) & 0xc0) >> 6) + 8, \
+                                        ((mask) & 0x3) + 4, \
+                                        (((mask) & 0xc) >> 2) + 4, \
+                                        (((mask) & 0x30) >> 4) + 12, \
+                                        (((mask) & 0xc0) >> 6) + 12); })
 
 #define _mm256_shuffle_pd(a, b, mask) __extension__ ({ \
-        __m256d __a = (a); \
-        __m256d __b = (b); \
-        (__m256d)__builtin_shufflevector((__v4df)__a, (__v4df)__b, \
-        (mask) & 0x1, \
-        (((mask) & 0x2) >> 1) + 4, \
-        (((mask) & 0x4) >> 2) + 2, \
-        (((mask) & 0x8) >> 3) + 6); })
+        (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
+                                         (__v4df)(__m256d)(b), \
+                                         (mask) & 0x1, \
+                                         (((mask) & 0x2) >> 1) + 4, \
+                                         (((mask) & 0x4) >> 2) + 2, \
+                                         (((mask) & 0x8) >> 3) + 6); })
 
 /* Compare */
 #define _CMP_EQ_OQ    0x00 /* Equal (ordered, non-signaling)  */
@@ -403,34 +401,28 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
 #define _CMP_TRUE_US  0x1f /* True (unordered, signaling)  */
 
 #define _mm_cmp_pd(a, b, c) __extension__ ({ \
-  __m128d __a = (a); \
-  __m128d __b = (b); \
-  (__m128d)__builtin_ia32_cmppd((__v2df)__a, (__v2df)__b, (c)); })
+  (__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
+                                (__v2df)(__m128d)(b), (c)); })
 
 #define _mm_cmp_ps(a, b, c) __extension__ ({ \
-  __m128 __a = (a); \
-  __m128 __b = (b); \
-  (__m128)__builtin_ia32_cmpps((__v4sf)__a, (__v4sf)__b, (c)); })
+  (__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
+                               (__v4sf)(__m128)(b), (c)); })
 
 #define _mm256_cmp_pd(a, b, c) __extension__ ({ \
-  __m256d __a = (a); \
-  __m256d __b = (b); \
-  (__m256d)__builtin_ia32_cmppd256((__v4df)__a, (__v4df)__b, (c)); })
+  (__m256d)__builtin_ia32_cmppd256((__v4df)(__m256d)(a), \
+                                   (__v4df)(__m256d)(b), (c)); })
 
 #define _mm256_cmp_ps(a, b, c) __extension__ ({ \
-  __m256 __a = (a); \
-  __m256 __b = (b); \
-  (__m256)__builtin_ia32_cmpps256((__v8sf)__a, (__v8sf)__b, (c)); })
+  (__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
+                                  (__v8sf)(__m256)(b), (c)); })
 
 #define _mm_cmp_sd(a, b, c) __extension__ ({ \
-  __m128d __a = (a); \
-  __m128d __b = (b); \
-  (__m128d)__builtin_ia32_cmpsd((__v2df)__a, (__v2df)__b, (c)); })
+  (__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
+                                (__v2df)(__m128d)(b), (c)); })
 
 #define _mm_cmp_ss(a, b, c) __extension__ ({ \
-  __m128 __a = (a); \
-  __m128 __b = (b); \
-  (__m128)__builtin_ia32_cmpss((__v4sf)__a, (__v4sf)__b, (c)); })
+  (__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
+                               (__v4sf)(__m128)(b), (c)); })
 
 static __inline int __DEFAULT_FN_ATTRS
 _mm256_extract_epi32(__m256i __a, const int __imm)
@@ -831,53 +823,53 @@ _mm256_storeu_si256(__m256i *__p, __m256i __a)
 
 /* Conditional load ops */
 static __inline __m128d __DEFAULT_FN_ATTRS
-_mm_maskload_pd(double const *__p, __m128d __m)
+_mm_maskload_pd(double const *__p, __m128i __m)
 {
-  return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2df)__m);
+  return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
 }
 
 static __inline __m256d __DEFAULT_FN_ATTRS
-_mm256_maskload_pd(double const *__p, __m256d __m)
+_mm256_maskload_pd(double const *__p, __m256i __m)
 {
   return (__m256d)__builtin_ia32_maskloadpd256((const __v4df *)__p,
-                                               (__v4df)__m);
+                                               (__v4di)__m);
 }
 
 static __inline __m128 __DEFAULT_FN_ATTRS
-_mm_maskload_ps(float const *__p, __m128 __m)
+_mm_maskload_ps(float const *__p, __m128i __m)
 {
-  return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4sf)__m);
+  return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
 }
 
 static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_maskload_ps(float const *__p, __m256 __m)
+_mm256_maskload_ps(float const *__p, __m256i __m)
 {
-  return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8sf)__m);
+  return (__m256)__builtin_ia32_maskloadps256((const __v8sf *)__p, (__v8si)__m);
 }
 
 /* Conditional store ops */
 static __inline void __DEFAULT_FN_ATTRS
-_mm256_maskstore_ps(float *__p, __m256 __m, __m256 __a)
+_mm256_maskstore_ps(float *__p, __m256i __m, __m256 __a)
 {
-  __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8sf)__m, (__v8sf)__a);
+  __builtin_ia32_maskstoreps256((__v8sf *)__p, (__v8si)__m, (__v8sf)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
-_mm_maskstore_pd(double *__p, __m128d __m, __m128d __a)
+_mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
 {
-  __builtin_ia32_maskstorepd((__v2df *)__p, (__v2df)__m, (__v2df)__a);
+  __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
-_mm256_maskstore_pd(double *__p, __m256d __m, __m256d __a)
+_mm256_maskstore_pd(double *__p, __m256i __m, __m256d __a)
 {
-  __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4df)__m, (__v4df)__a);
+  __builtin_ia32_maskstorepd256((__v4df *)__p, (__v4di)__m, (__v4df)__a);
 }
 
 static __inline void __DEFAULT_FN_ATTRS
-_mm_maskstore_ps(float *__p, __m128 __m, __m128 __a)
+_mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
 {
-  __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4sf)__m, (__v4sf)__a);
+  __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
 }
 
 /* Cacheability support ops */
@@ -900,6 +892,24 @@ _mm256_stream_ps(float *__p, __m256 __a)
 }
 
 /* Create vectors */
+static __inline__ __m256d __DEFAULT_FN_ATTRS
+_mm256_undefined_pd()
+{
+  return (__m256d)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256 __DEFAULT_FN_ATTRS
+_mm256_undefined_ps()
+{
+  return (__m256)__builtin_ia32_undef256();
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS
+_mm256_undefined_si256()
+{
+  return (__m256i)__builtin_ia32_undef256();
+}
+
 static __inline __m256d __DEFAULT_FN_ATTRS
 _mm256_set_pd(double __a, double __b, double __c, double __d)
 {
@@ -1140,14 +1150,14 @@ _mm256_castsi128_si256(__m128i __a)
   return __builtin_shufflevector(__a, __a, 0, 1, -1, -1);
 }
 
-/* 
+/*
    Vector insert.
    We use macros rather than inlines because we only want to accept
    invocations where the immediate M is a constant expression.
 */
 #define _mm256_insertf128_ps(V1, V2, M) __extension__ ({ \
   (__m256)__builtin_shufflevector( \
-    (__v8sf)(V1), \
+    (__v8sf)(__m256)(V1), \
     (__v8sf)_mm256_castps128_ps256((__m128)(V2)), \
     (((M) & 1) ?  0 :  8), \
     (((M) & 1) ?  1 :  9), \
@@ -1160,7 +1170,7 @@ _mm256_castsi128_si256(__m128i __a)
 
 #define _mm256_insertf128_pd(V1, V2, M) __extension__ ({ \
   (__m256d)__builtin_shufflevector( \
-    (__v4df)(V1), \
+    (__v4df)(__m256d)(V1), \
     (__v4df)_mm256_castpd128_pd256((__m128d)(V2)), \
     (((M) & 1) ? 0 : 4), \
     (((M) & 1) ? 1 : 5), \
@@ -1169,21 +1179,21 @@ _mm256_castsi128_si256(__m128i __a)
 
 #define _mm256_insertf128_si256(V1, V2, M) __extension__ ({ \
   (__m256i)__builtin_shufflevector( \
-    (__v4di)(V1), \
+    (__v4di)(__m256i)(V1), \
     (__v4di)_mm256_castsi128_si256((__m128i)(V2)), \
     (((M) & 1) ? 0 : 4), \
     (((M) & 1) ? 1 : 5), \
     (((M) & 1) ? 4 : 2), \
     (((M) & 1) ? 5 : 3) );})
 
-/* 
+/*
    Vector extract.
    We use macros rather than inlines because we only want to accept
    invocations where the immediate M is a constant expression.
 */
 #define _mm256_extractf128_ps(V, M) __extension__ ({ \
   (__m128)__builtin_shufflevector( \
-    (__v8sf)(V), \
+    (__v8sf)(__m256)(V), \
     (__v8sf)(_mm256_setzero_ps()), \
     (((M) & 1) ? 4 : 0), \
     (((M) & 1) ? 5 : 1), \
@@ -1192,14 +1202,14 @@ _mm256_castsi128_si256(__m128i __a)
 
 #define _mm256_extractf128_pd(V, M) __extension__ ({ \
   (__m128d)__builtin_shufflevector( \
-    (__v4df)(V), \
+    (__v4df)(__m256d)(V), \
     (__v4df)(_mm256_setzero_pd()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
 
 #define _mm256_extractf128_si256(V, M) __extension__ ({ \
   (__m128i)__builtin_shufflevector( \
-    (__v4di)(V), \
+    (__v4di)(__m256i)(V), \
     (__v4di)(_mm256_setzero_si256()), \
     (((M) & 1) ? 2 : 0), \
     (((M) & 1) ? 3 : 1) );})
@@ -1222,7 +1232,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo)
   struct __loadu_pd {
     __m128d __v;
   } __attribute__((__packed__, __may_alias__));
-  
+
   __m256d __v256 = _mm256_castpd128_pd256(((struct __loadu_pd*)__addr_lo)->__v);
   return _mm256_insertf128_pd(__v256, ((struct __loadu_pd*)__addr_hi)->__v, 1);
 }
diff --git a/lib/Headers/bmi2intrin.h b/lib/Headers/bmi2intrin.h
index c63397c96ebe..fdae82cf2ba7 100644
--- a/lib/Headers/bmi2intrin.h
+++ b/lib/Headers/bmi2intrin.h
@@ -25,15 +25,11 @@
 #error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI2__
-# error "BMI2 instruction set not enabled"
-#endif /* __BMI2__ */
-
 #ifndef __BMI2INTRIN_H
 #define __BMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _bzhi_u32(unsigned int __X, unsigned int __Y)
diff --git a/lib/Headers/bmiintrin.h b/lib/Headers/bmiintrin.h
index 0e93d575cb8b..da98792d8307 100644
--- a/lib/Headers/bmiintrin.h
+++ b/lib/Headers/bmiintrin.h
@@ -25,10 +25,6 @@
 #error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __BMI__
-# error "BMI instruction set not enabled"
-#endif /* __BMI__ */
-
 #ifndef __BMIINTRIN_H
 #define __BMIINTRIN_H
 
@@ -41,9 +37,14 @@
 #define _tzcnt_u32(a)     (__tzcnt_u32((a)))
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
 
-static __inline__ unsigned short __DEFAULT_FN_ATTRS
+/* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
+   instruction behaves as BSF on non-BMI targets, there is code that expects
+   to use it as a potentially faster version of BSF. */
+#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+static __inline__ unsigned short __RELAXED_FN_ATTRS
 __tzcnt_u16(unsigned short __X)
 {
   return __X ? __builtin_ctzs(__X) : 16;
@@ -87,7 +88,7 @@ __blsr_u32(unsigned int __X)
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __RELAXED_FN_ATTRS
 __tzcnt_u32(unsigned int __X)
 {
   return __X ? __builtin_ctz(__X) : 32;
@@ -140,7 +141,7 @@ __blsr_u64(unsigned long long __X)
   return __X & (__X - 1);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __RELAXED_FN_ATTRS
 __tzcnt_u64(unsigned long long __X)
 {
   return __X ? __builtin_ctzll(__X) : 64;
@@ -149,5 +150,6 @@ __tzcnt_u64(unsigned long long __X)
 #endif /* __x86_64__ */
 
 #undef __DEFAULT_FN_ATTRS
+#undef __RELAXED_FN_ATTRS
 
 #endif /* __BMIINTRIN_H */
diff --git a/lib/Headers/emmintrin.h b/lib/Headers/emmintrin.h
index 656bc19d3dea..cfc2c7161460 100644
--- a/lib/Headers/emmintrin.h
+++ b/lib/Headers/emmintrin.h
@@ -24,10 +24,6 @@
 #ifndef __EMMINTRIN_H
 #define __EMMINTRIN_H
 
-#ifndef __SSE2__
-#error "SSE2 instruction set not enabled"
-#else
-
 #include <xmmintrin.h>
 
 typedef double __m128d __attribute__((__vector_size__(16)));
@@ -39,8 +35,14 @@ typedef long long __v2di __attribute__ ((__vector_size__ (16)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef char __v16qi __attribute__((__vector_size__(16)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v16qs __attribute__((__vector_size__(16)));
+
+#include <f16cintrin.h>
+
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
 
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_add_sd(__m128d __a, __m128d __b)
@@ -526,6 +528,12 @@ _mm_loadl_pd(__m128d __a, double const *__dp)
   return (__m128d){ __u, __a[1] };
 }
 
+static __inline__ __m128d __DEFAULT_FN_ATTRS
+_mm_undefined_pd()
+{
+  return (__m128d)__builtin_ia32_undef128();
+}
+
 static __inline__ __m128d __DEFAULT_FN_ATTRS
 _mm_set_sd(double __w)
 {
@@ -639,7 +647,7 @@ _mm_add_epi32(__m128i __a, __m128i __b)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_add_si64(__m64 __a, __m64 __b)
 {
-  return __a + __b;
+  return (__m64)__builtin_ia32_paddq(__a, __b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -771,7 +779,7 @@ _mm_sub_epi32(__m128i __a, __m128i __b)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_si64(__m64 __a, __m64 __b)
 {
-  return __a - __b;
+  return (__m64)__builtin_ia32_psubq(__a, __b);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -992,8 +1000,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
 {
   /* This function always performs a signed comparison, but __v16qi is a char
-     which may be signed or unsigned. */
-  typedef signed char __v16qs __attribute__((__vector_size__(16)));
+     which may be signed or unsigned, so use __v16qs. */
   return (__m128i)((__v16qs)__a > (__v16qs)__b);
 }
 
@@ -1120,33 +1127,39 @@ _mm_loadl_epi64(__m128i const *__p)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi64x(long long q1, long long q0)
+_mm_undefined_si128()
 {
-  return (__m128i){ q0, q1 };
+  return (__m128i)__builtin_ia32_undef128();
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi64(__m64 q1, __m64 q0)
+_mm_set_epi64x(long long __q1, long long __q0)
 {
-  return (__m128i){ (long long)q0, (long long)q1 };
+  return (__m128i){ __q0, __q1 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi32(int i3, int i2, int i1, int i0)
+_mm_set_epi64(__m64 __q1, __m64 __q0)
 {
-  return (__m128i)(__v4si){ i0, i1, i2, i3};
+  return (__m128i){ (long long)__q0, (long long)__q1 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0)
+_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
 {
-  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+  return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0)
+_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
 {
-  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+  return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
+{
+  return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1180,27 +1193,27 @@ _mm_set1_epi8(char __b)
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi64(__m64 q0, __m64 q1)
+_mm_setr_epi64(__m64 __q0, __m64 __q1)
 {
-  return (__m128i){ (long long)q0, (long long)q1 };
+  return (__m128i){ (long long)__q0, (long long)__q1 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi32(int i0, int i1, int i2, int i3)
+_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
 {
-  return (__m128i)(__v4si){ i0, i1, i2, i3};
+  return (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7)
+_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
 {
-  return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 };
+  return (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
-_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15)
+_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
 {
-  return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 };
+  return (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -1321,20 +1334,20 @@ _mm_movemask_epi8(__m128i __a)
 
 #define _mm_shuffle_epi32(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v4si)(__m128i)(a), \
-                                   (__v4si)_mm_set1_epi32(0), \
+                                   (__v4si)_mm_setzero_si128(), \
                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); })
 
 #define _mm_shufflelo_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_set1_epi16(0), \
+                                   (__v8hi)_mm_setzero_si128(), \
                                    (imm) & 0x3, ((imm) & 0xc) >> 2, \
                                    ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \
                                    4, 5, 6, 7); })
 
 #define _mm_shufflehi_epi16(a, imm) __extension__ ({ \
   (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(a), \
-                                   (__v8hi)_mm_set1_epi16(0), \
+                                   (__v8hi)_mm_setzero_si128(), \
                                    0, 1, 2, 3, \
                                    4 + (((imm) & 0x03) >> 0), \
                                    4 + (((imm) & 0x0c) >> 2), \
@@ -1426,8 +1439,8 @@ _mm_movemask_pd(__m128d __a)
 }
 
 #define _mm_shuffle_pd(a, b, i) __extension__ ({ \
-  __builtin_shufflevector((__m128d)(a), (__m128d)(b), \
-                          (i) & 1, (((i) & 2) >> 1) + 2); })
+  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+                                   (i) & 1, (((i) & 2) >> 1) + 2); })
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_castpd_ps(__m128d __a)
@@ -1468,13 +1481,11 @@ _mm_castsi128_pd(__m128i __a)
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_pause(void)
 {
-  __asm__ volatile ("pause");
+  __builtin_ia32_pause();
 }
 
 #undef __DEFAULT_FN_ATTRS
 
 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
 
-#endif /* __SSE2__ */
-
 #endif /* __EMMINTRIN_H */
diff --git a/lib/Headers/f16cintrin.h b/lib/Headers/f16cintrin.h
index 3730ae0d3eeb..c655d98ee9ab 100644
--- a/lib/Headers/f16cintrin.h
+++ b/lib/Headers/f16cintrin.h
@@ -21,30 +21,18 @@
  *===-----------------------------------------------------------------------===
  */
 
-#if !defined __X86INTRIN_H && !defined __IMMINTRIN_H
-#error "Never use <f16cintrin.h> directly; include <x86intrin.h> instead."
+#if !defined __X86INTRIN_H && !defined __EMMINTRIN_H && !defined __IMMINTRIN_H
+#error "Never use <f16cintrin.h> directly; include <emmintrin.h> instead."
 #endif
 
-#ifndef __F16C__
-# error "F16C instruction is not enabled"
-#endif /* __F16C__ */
-
 #ifndef __F16CINTRIN_H
 #define __F16CINTRIN_H
 
-typedef float __v8sf __attribute__ ((__vector_size__ (32)));
-typedef float __m256 __attribute__ ((__vector_size__ (32)));
-
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
 
 #define _mm_cvtps_ph(a, imm) __extension__ ({ \
-  __m128 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)__a, (imm)); })
-
-#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
-  __m256 __a = (a); \
- (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)__a, (imm)); })
+ (__m128i)__builtin_ia32_vcvtps2ph((__v4sf)(__m128)(a), (imm)); })
 
 static __inline __m128 __DEFAULT_FN_ATTRS
 _mm_cvtph_ps(__m128i __a)
@@ -52,12 +40,6 @@ _mm_cvtph_ps(__m128i __a)
   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
 }
 
-static __inline __m256 __DEFAULT_FN_ATTRS
-_mm256_cvtph_ps(__m128i __a)
-{
-  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
-}
-
 #undef __DEFAULT_FN_ATTRS
 
 #endif /* __F16CINTRIN_H */
diff --git a/lib/Headers/fma4intrin.h b/lib/Headers/fma4intrin.h
index d6405cf02922..f1178877b252 100644
--- a/lib/Headers/fma4intrin.h
+++ b/lib/Headers/fma4intrin.h
@@ -28,14 +28,10 @@
 #ifndef __FMA4INTRIN_H
 #define __FMA4INTRIN_H
 
-#ifndef __FMA4__
-# error "FMA4 instruction set is not enabled"
-#else
-
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
@@ -231,6 +227,4 @@ _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __FMA4__ */
-
 #endif /* __FMA4INTRIN_H */
diff --git a/lib/Headers/fmaintrin.h b/lib/Headers/fmaintrin.h
index ad693fed0bfd..114a14380ea0 100644
--- a/lib/Headers/fmaintrin.h
+++ b/lib/Headers/fmaintrin.h
@@ -28,12 +28,8 @@
 #ifndef __FMAINTRIN_H
 #define __FMAINTRIN_H
 
-#ifndef __FMA__
-# error "FMA instruction set is not enabled"
-#else
-
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
@@ -229,6 +225,4 @@ _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __FMA__ */
-
 #endif /* __FMAINTRIN_H */
diff --git a/lib/Headers/fxsrintrin.h b/lib/Headers/fxsrintrin.h
index 2b3549c057a1..ac6026aa5ba2 100644
--- a/lib/Headers/fxsrintrin.h
+++ b/lib/Headers/fxsrintrin.h
@@ -28,7 +28,7 @@
 #ifndef __FXSRINTRIN_H
 #define __FXSRINTRIN_H
 
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("fxsr")))
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _fxsave(void *__p) {
diff --git a/lib/Headers/htmxlintrin.h b/lib/Headers/htmxlintrin.h
index 30f524d5df49..c7571ecd0661 100644
--- a/lib/Headers/htmxlintrin.h
+++ b/lib/Headers/htmxlintrin.h
@@ -46,7 +46,7 @@ extern "C" {
 
 typedef char TM_buff_type[16];
 
-/* This macro can be used to determine whether a transaction was successfully 
+/* This macro can be used to determine whether a transaction was successfully
    started from the __TM_begin() and __TM_simple_begin() intrinsic functions
    below.  */
 #define _HTM_TBEGIN_STARTED     1
diff --git a/lib/Headers/immintrin.h b/lib/Headers/immintrin.h
index 21ad3281f850..f3c6d1914d61 100644
--- a/lib/Headers/immintrin.h
+++ b/lib/Headers/immintrin.h
@@ -24,178 +24,147 @@
 #ifndef __IMMINTRIN_H
 #define __IMMINTRIN_H
 
-#ifdef __MMX__
 #include <mmintrin.h>
-#endif
 
-#ifdef __SSE__
 #include <xmmintrin.h>
-#endif
 
-#ifdef __SSE2__
 #include <emmintrin.h>
-#endif
 
-#ifdef __SSE3__
 #include <pmmintrin.h>
-#endif
 
-#ifdef __SSSE3__
 #include <tmmintrin.h>
-#endif
 
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
 #include <smmintrin.h>
-#endif
 
-#if defined (__AES__) || defined (__PCLMUL__)
 #include <wmmintrin.h>
-#endif
 
-#ifdef __AVX__
 #include <avxintrin.h>
-#endif
 
-#ifdef __AVX2__
 #include <avx2intrin.h>
-#endif
 
-#ifdef __BMI__
+/* The 256-bit versions of functions in f16cintrin.h.
+   Intel documents these as being in immintrin.h, and
+   they depend on typedefs from avxintrin.h. */
+
+#define _mm256_cvtps_ph(a, imm) __extension__ ({ \
+ (__m128i)__builtin_ia32_vcvtps2ph256((__v8sf)(__m256)(a), (imm)); })
+
+static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+_mm256_cvtph_ps(__m128i __a)
+{
+  return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
+}
+
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __FMA__
 #include <fmaintrin.h>
-#endif
 
-#ifdef __AVX512F__
 #include <avx512fintrin.h>
-#endif
 
-#ifdef __AVX512VL__
 #include <avx512vlintrin.h>
-#endif
 
-#ifdef __AVX512BW__
 #include <avx512bwintrin.h>
-#endif
 
-#ifdef __AVX512CD__
 #include <avx512cdintrin.h>
-#endif
 
-#ifdef __AVX512DQ__
 #include <avx512dqintrin.h>
-#endif
 
-#if defined (__AVX512VL__) && defined (__AVX512BW__)
 #include <avx512vlbwintrin.h>
-#endif
 
-#if defined (__AVX512VL__) && defined (__AVX512DQ__)
 #include <avx512vldqintrin.h>
-#endif
 
-#ifdef __AVX512ER__
 #include <avx512erintrin.h>
-#endif
 
-#ifdef __RDRND__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand16_step(unsigned short *__p)
 {
   return __builtin_ia32_rdrand16_step(__p);
 }
 
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand32_step(unsigned int *__p)
 {
   return __builtin_ia32_rdrand32_step(__p);
 }
 
 #ifdef __x86_64__
-static __inline__ int __attribute__((__always_inline__, __nodebug__))
+static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
 _rdrand64_step(unsigned long long *__p)
 {
   return __builtin_ia32_rdrand64_step(__p);
 }
 #endif
-#endif /* __RDRND__ */
 
-#ifdef __FSGSBASE__
 #ifdef __x86_64__
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readfsbase_u32(void)
 {
   return __builtin_ia32_rdfsbase32();
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readfsbase_u64(void)
 {
   return __builtin_ia32_rdfsbase64();
 }
 
-static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readgsbase_u32(void)
 {
   return __builtin_ia32_rdgsbase32();
 }
 
-static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
+static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _readgsbase_u64(void)
 {
   return __builtin_ia32_rdgsbase64();
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writefsbase_u32(unsigned int __V)
 {
   return __builtin_ia32_wrfsbase32(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writefsbase_u64(unsigned long long __V)
 {
   return __builtin_ia32_wrfsbase64(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writegsbase_u32(unsigned int __V)
 {
   return __builtin_ia32_wrgsbase32(__V);
 }
 
-static __inline__ void __attribute__((__always_inline__, __nodebug__))
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
 _writegsbase_u64(unsigned long long __V)
 {
   return __builtin_ia32_wrgsbase64(__V);
 }
 #endif
-#endif /* __FSGSBASE__ */
 
-#ifdef __RTM__
 #include <rtmintrin.h>
-#endif
 
-#ifdef __RTM__
 #include <xtestintrin.h>
-#endif
 
-#ifdef __SHA__
 #include <shaintrin.h>
-#endif
 
 #include <fxsrintrin.h>
 
+#include <xsaveintrin.h>
+
+#include <xsaveoptintrin.h>
+
+#include <xsavecintrin.h>
+
+#include <xsavesintrin.h>
+
 /* Some intrinsics inside adxintrin.h are available only on processors with ADX,
  * whereas others are also available at all times. */
 #include <adxintrin.h>
diff --git a/lib/Headers/lzcntintrin.h b/lib/Headers/lzcntintrin.h
index 8ee29975c2eb..4c00e42ac3a9 100644
--- a/lib/Headers/lzcntintrin.h
+++ b/lib/Headers/lzcntintrin.h
@@ -25,15 +25,11 @@
 #error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
 #endif
 
-#ifndef __LZCNT__
-# error "LZCNT instruction is not enabled"
-#endif /* __LZCNT__ */
-
 #ifndef __LZCNTINTRIN_H
 #define __LZCNTINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
 
 static __inline__ unsigned short __DEFAULT_FN_ATTRS
 __lzcnt16(unsigned short __X)
diff --git a/lib/Headers/mm3dnow.h b/lib/Headers/mm3dnow.h
index ac8e0f4af1bf..cb93faf2b6a4 100644
--- a/lib/Headers/mm3dnow.h
+++ b/lib/Headers/mm3dnow.h
@@ -30,7 +30,7 @@
 typedef float __v2sf __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _m_femms() {
@@ -132,6 +132,10 @@ _m_pmulhrw(__m64 __m1, __m64 __m2) {
   return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
 }
 
+/* Handle the 3dnowa instructions here. */
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa")))
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pf2iw(__m64 __m) {
   return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
diff --git a/lib/Headers/mmintrin.h b/lib/Headers/mmintrin.h
index 0be5f32c7d02..162cb1aa1711 100644
--- a/lib/Headers/mmintrin.h
+++ b/lib/Headers/mmintrin.h
@@ -24,10 +24,6 @@
 #ifndef __MMINTRIN_H
 #define __MMINTRIN_H
 
-#ifndef __MMX__
-#error "MMX instruction set not enabled"
-#else
-
 typedef long long __m64 __attribute__((__vector_size__(8)));
 
 typedef int __v2si __attribute__((__vector_size__(8)));
@@ -35,7 +31,7 @@ typedef short __v4hi __attribute__((__vector_size__(8)));
 typedef char __v8qi __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_empty(void)
@@ -140,7 +136,7 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pi8(__m64 __m1, __m64 __m2) 
+_mm_adds_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
 }
@@ -148,17 +144,17 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_adds_pi16(__m64 __m1, __m64 __m2)
 {
-    return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);    
+    return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pu8(__m64 __m1, __m64 __m2) 
+_mm_adds_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
 }
- 
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_adds_pu16(__m64 __m1, __m64 __m2) 
+_mm_adds_pu16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
 }
@@ -168,13 +164,13 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
 }
- 
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
 }
- 
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sub_pi32(__m64 __m1, __m64 __m2)
 {
@@ -198,7 +194,7 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
 }
- 
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_subs_pu16(__m64 __m1, __m64 __m2)
 {
@@ -216,9 +212,9 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
 }
- 
+
 static __inline__ __m64 __DEFAULT_FN_ATTRS
-_mm_mullo_pi16(__m64 __m1, __m64 __m2) 
+_mm_mullo_pi16(__m64 __m1, __m64 __m2)
 {
     return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
 }
@@ -232,7 +228,7 @@ _mm_sll_pi16(__m64 __m, __m64 __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_pi16(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);    
+    return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -256,13 +252,13 @@ _mm_sll_si64(__m64 __m, __m64 __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_slli_si64(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psllqi(__m, __count);    
+    return (__m64)__builtin_ia32_psllqi(__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi16(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);    
+    return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -274,7 +270,7 @@ _mm_srai_pi16(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_sra_pi32(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);    
+    return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -286,19 +282,19 @@ _mm_srai_pi32(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi16(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);    
+    return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_pi16(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);    
+    return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_pi32(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);       
+    return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -310,13 +306,13 @@ _mm_srli_pi32(__m64 __m, int __count)
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srl_si64(__m64 __m, __m64 __count)
 {
-    return (__m64)__builtin_ia32_psrlq(__m, __count);    
+    return (__m64)__builtin_ia32_psrlq(__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_srli_si64(__m64 __m, int __count)
 {
-    return (__m64)__builtin_ia32_psrlqi(__m, __count);    
+    return (__m64)__builtin_ia32_psrlqi(__m, __count);
 }
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
@@ -447,7 +443,9 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
 /* Aliases for compatibility. */
 #define _m_empty _mm_empty
 #define _m_from_int _mm_cvtsi32_si64
+#define _m_from_int64 _mm_cvtsi64_m64
 #define _m_to_int _mm_cvtsi64_si32
+#define _m_to_int64 _mm_cvtm64_si64
 #define _m_packsswb _mm_packs_pi16
 #define _m_packssdw _mm_packs_pi32
 #define _m_packuswb _mm_packs_pu16
@@ -501,7 +499,5 @@ _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
 #define _m_pcmpgtw _mm_cmpgt_pi16
 #define _m_pcmpgtd _mm_cmpgt_pi32
 
-#endif /* __MMX__ */
-
 #endif /* __MMINTRIN_H */
 
diff --git a/lib/Headers/module.modulemap b/lib/Headers/module.modulemap
index b861fdd8c2a1..b147e891dceb 100644
--- a/lib/Headers/module.modulemap
+++ b/lib/Headers/module.modulemap
@@ -32,142 +32,117 @@ module _Builtin_intrinsics [system] [extern_c] {
     }
 
     explicit module cpuid {
-      requires x86
       header "cpuid.h"
     }
 
     explicit module mmx {
-      requires mmx
       header "mmintrin.h"
     }
 
     explicit module f16c {
-      requires f16c
       header "f16cintrin.h"
     }
 
     explicit module sse {
-      requires sse
       export mmx
       export sse2 // note: for hackish <emmintrin.h> dependency
       header "xmmintrin.h"
     }
 
     explicit module sse2 {
-      requires sse2
       export sse
       header "emmintrin.h"
     }
 
     explicit module sse3 {
-      requires sse3
       export sse2
       header "pmmintrin.h"
     }
 
     explicit module ssse3 {
-      requires ssse3
       export sse3
       header "tmmintrin.h"
     }
 
     explicit module sse4_1 {
-      requires sse41
       export ssse3
       header "smmintrin.h"
     }
 
     explicit module sse4_2 {
-      requires sse42
       export sse4_1
       header "nmmintrin.h"
     }
 
     explicit module sse4a {
-      requires sse4a
       export sse3
       header "ammintrin.h"
     }
 
     explicit module avx {
-      requires avx
       export sse4_2
       header "avxintrin.h"
     }
 
     explicit module avx2 {
-      requires avx2
       export avx
       header "avx2intrin.h"
     }
 
     explicit module avx512f {
-      requires avx512f
       export avx2
       header "avx512fintrin.h"
     }
 
     explicit module avx512er {
-      requires avx512er
       header "avx512erintrin.h"
     }
 
     explicit module bmi {
-      requires bmi
       header "bmiintrin.h"
     }
 
     explicit module bmi2 {
-      requires bmi2
       header "bmi2intrin.h"
     }
 
     explicit module fma {
-      requires fma
       header "fmaintrin.h"
     }
 
     explicit module fma4 {
-      requires fma4
       export sse3
       header "fma4intrin.h"
     }
 
     explicit module lzcnt {
-      requires lzcnt
       header "lzcntintrin.h"
     }
 
     explicit module popcnt {
-      requires popcnt
       header "popcntintrin.h"
     }
 
     explicit module mm3dnow {
-      requires mm3dnow
       header "mm3dnow.h"
     }
 
     explicit module xop {
-      requires xop
       export fma4
       header "xopintrin.h"
     }
 
     explicit module aes_pclmul {
-      requires aes, pclmul
       header "wmmintrin.h"
       export aes
       export pclmul
     }
 
     explicit module aes {
-      requires aes
       header "__wmmintrin_aes.h"
     }
 
     explicit module pclmul {
-      requires pclmul
       header "__wmmintrin_pclmul.h"
     }
   }
diff --git a/lib/Headers/nmmintrin.h b/lib/Headers/nmmintrin.h
index f12622d7be68..57fec15963d1 100644
--- a/lib/Headers/nmmintrin.h
+++ b/lib/Headers/nmmintrin.h
@@ -24,12 +24,7 @@
 #ifndef _NMMINTRIN_H
 #define _NMMINTRIN_H
 
-#ifndef __SSE4_2__
-#error "SSE4.2 instruction set not enabled"
-#else
-
 /* To match expectations of gcc we put the sse4.2 definitions into smmintrin.h,
    just include it now then.  */
 #include <smmintrin.h>
-#endif /* __SSE4_2__ */
 #endif /* _NMMINTRIN_H */
diff --git a/lib/Headers/pmmintrin.h b/lib/Headers/pmmintrin.h
index e1b8d9b603d1..0ff940912483 100644
--- a/lib/Headers/pmmintrin.h
+++ b/lib/Headers/pmmintrin.h
@@ -20,18 +20,14 @@
  *
  *===-----------------------------------------------------------------------===
  */
- 
+
 #ifndef __PMMINTRIN_H
 #define __PMMINTRIN_H
 
-#ifndef __SSE3__
-#error "SSE3 instruction set not enabled"
-#else
-
 #include <emmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_lddqu_si128(__m128i const *__p)
@@ -117,6 +113,4 @@ _mm_mwait(unsigned __extensions, unsigned __hints)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __SSE3__ */
-
 #endif /* __PMMINTRIN_H */
diff --git a/lib/Headers/popcntintrin.h b/lib/Headers/popcntintrin.h
index 1a4e9000aeb6..6fcda65c7807 100644
--- a/lib/Headers/popcntintrin.h
+++ b/lib/Headers/popcntintrin.h
@@ -21,15 +21,11 @@
  *===-----------------------------------------------------------------------===
  */
 
-#ifndef __POPCNT__
-#error "POPCNT instruction set not enabled"
-#endif
-
 #ifndef _POPCNTINTRIN_H
 #define _POPCNTINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt")))
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _mm_popcnt_u32(unsigned int __A)
@@ -37,12 +33,24 @@ _mm_popcnt_u32(unsigned int __A)
   return __builtin_popcount(__A);
 }
 
+static __inline__ int __DEFAULT_FN_ATTRS
+_popcnt32(int __A)
+{
+  return __builtin_popcount(__A);
+}
+
 #ifdef __x86_64__
 static __inline__ long long __DEFAULT_FN_ATTRS
 _mm_popcnt_u64(unsigned long long __A)
 {
   return __builtin_popcountll(__A);
 }
+
+static __inline__ long long __DEFAULT_FN_ATTRS
+_popcnt64(long long __A)
+{
+  return __builtin_popcountll(__A);
+}
 #endif /* __x86_64__ */
 
 #undef __DEFAULT_FN_ATTRS
diff --git a/lib/Headers/prfchwintrin.h b/lib/Headers/prfchwintrin.h
index 9825bd8c9700..ba0285751823 100644
--- a/lib/Headers/prfchwintrin.h
+++ b/lib/Headers/prfchwintrin.h
@@ -29,6 +29,12 @@
 #define __PRFCHWINTRIN_H
 
 #if defined(__PRFCHW__) || defined(__3dNOW__)
+static __inline__ void __attribute__((__always_inline__, __nodebug__))
+_m_prefetch(void *__P)
+{
+  __builtin_prefetch (__P, 0, 3 /* _MM_HINT_T0 */);
+}
+
 static __inline__ void __attribute__((__always_inline__, __nodebug__))
 _m_prefetchw(void *__P)
 {
diff --git a/lib/Headers/rdseedintrin.h b/lib/Headers/rdseedintrin.h
index fdf7e18afa95..421f4ea48702 100644
--- a/lib/Headers/rdseedintrin.h
+++ b/lib/Headers/rdseedintrin.h
@@ -28,10 +28,8 @@
 #ifndef __RDSEEDINTRIN_H
 #define __RDSEEDINTRIN_H
 
-#ifdef __RDSEED__
-
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rdseed")))
 
 static __inline__ int __DEFAULT_FN_ATTRS
 _rdseed16_step(unsigned short *__p)
@@ -55,5 +53,4 @@ _rdseed64_step(unsigned long long *__p)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __RDSEED__ */
 #endif /* __RDSEEDINTRIN_H */
diff --git a/lib/Headers/rtmintrin.h b/lib/Headers/rtmintrin.h
index 17256815fb8d..e6a58d743bc9 100644
--- a/lib/Headers/rtmintrin.h
+++ b/lib/Headers/rtmintrin.h
@@ -38,7 +38,7 @@
 #define _XABORT_CODE(x)   (((x) >> 24) & 0xFF)
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("rtm")))
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 _xbegin(void)
diff --git a/lib/Headers/shaintrin.h b/lib/Headers/shaintrin.h
index 960cced7a55c..9b5d21800819 100644
--- a/lib/Headers/shaintrin.h
+++ b/lib/Headers/shaintrin.h
@@ -28,15 +28,11 @@
 #ifndef __SHAINTRIN_H
 #define __SHAINTRIN_H
 
-#if !defined (__SHA__)
-#  error "SHA instructions not enabled"
-#endif
-
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
 
 #define _mm_sha1rnds4_epu32(V1, V2, M) __extension__ ({ \
-  __builtin_ia32_sha1rnds4((V1), (V2), (M)); })
+  __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_sha1nexte_epu32(__m128i __X, __m128i __Y)
diff --git a/lib/Headers/smmintrin.h b/lib/Headers/smmintrin.h
index 04bd0722b11f..69ad07f42ad6 100644
--- a/lib/Headers/smmintrin.h
+++ b/lib/Headers/smmintrin.h
@@ -24,14 +24,10 @@
 #ifndef _SMMINTRIN_H
 #define _SMMINTRIN_H
 
-#ifndef __SSE4_1__
-#error "SSE4.1 instruction set not enabled"
-#else
-
 #include <tmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
 
 /* SSE4 Rounding macros. */
 #define _MM_FROUND_TO_NEAREST_INT    0x00
@@ -61,35 +57,28 @@
 #define _mm_floor_sd(X, Y)   _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
 
 #define _mm_round_ps(X, M) __extension__ ({ \
-  __m128 __X = (X); \
-  (__m128) __builtin_ia32_roundps((__v4sf)__X, (M)); })
+  (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); })
 
 #define _mm_round_ss(X, Y, M) __extension__ ({ \
-  __m128 __X = (X); \
-  __m128 __Y = (Y); \
-  (__m128) __builtin_ia32_roundss((__v4sf)__X, (__v4sf)__Y, (M)); })
+  (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \
+                                 (__v4sf)(__m128)(Y), (M)); })
 
 #define _mm_round_pd(X, M) __extension__ ({ \
-  __m128d __X = (X); \
-  (__m128d) __builtin_ia32_roundpd((__v2df)__X, (M)); })
+  (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); })
 
 #define _mm_round_sd(X, Y, M) __extension__ ({ \
-  __m128d __X = (X); \
-  __m128d __Y = (Y); \
-  (__m128d) __builtin_ia32_roundsd((__v2df)__X, (__v2df)__Y, (M)); })
+  (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \
+                                  (__v2df)(__m128d)(Y), (M)); })
 
 /* SSE4 Packed Blending Intrinsics.  */
 #define _mm_blend_pd(V1, V2, M) __extension__ ({ \
-  __m128d __V1 = (V1); \
-  __m128d __V2 = (V2); \
-  (__m128d)__builtin_shufflevector((__v2df)__V1, (__v2df)__V2, \
+  (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
+                                   (__v2df)(__m128d)(V2), \
                                    (((M) & 0x01) ? 2 : 0), \
                                    (((M) & 0x02) ? 3 : 1)); })
 
 #define _mm_blend_ps(V1, V2, M) __extension__ ({ \
-  __m128 __V1 = (V1); \
-  __m128 __V2 = (V2); \
-  (__m128)__builtin_shufflevector((__v4sf)__V1, (__v4sf)__V2, \
+  (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
                                   (((M) & 0x01) ? 4 : 0), \
                                   (((M) & 0x02) ? 5 : 1), \
                                   (((M) & 0x04) ? 6 : 2), \
@@ -117,9 +106,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
 }
 
 #define _mm_blend_epi16(V1, V2, M) __extension__ ({ \
-  __m128i __V1 = (V1); \
-  __m128i __V2 = (V2); \
-  (__m128i)__builtin_shufflevector((__v8hi)__V1, (__v8hi)__V2, \
+  (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
+                                   (__v8hi)(__m128i)(V2), \
                                    (((M) & 0x01) ?  8 : 0), \
                                    (((M) & 0x02) ?  9 : 1), \
                                    (((M) & 0x04) ? 10 : 2), \
@@ -144,20 +132,18 @@ _mm_mul_epi32 (__m128i __V1, __m128i __V2)
 
 /* SSE4 Floating Point Dot Product Instructions.  */
 #define _mm_dp_ps(X, Y, M) __extension__ ({ \
-  __m128 __X = (X); \
-  __m128 __Y = (Y); \
-  (__m128) __builtin_ia32_dpps((__v4sf)__X, (__v4sf)__Y, (M)); })
+  (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \
+                               (__v4sf)(__m128)(Y), (M)); })
 
 #define _mm_dp_pd(X, Y, M) __extension__ ({\
-  __m128d __X = (X); \
-  __m128d __Y = (Y); \
-  (__m128d) __builtin_ia32_dppd((__v2df)__X, (__v2df)__Y, (M)); })
+  (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \
+                                (__v2df)(__m128d)(Y), (M)); })
 
 /* SSE4 Streaming Load Hint Instruction.  */
 static __inline__  __m128i __DEFAULT_FN_ATTRS
-_mm_stream_load_si128 (__m128i *__V)
+_mm_stream_load_si128 (__m128i const *__V)
 {
-  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
+  return (__m128i) __builtin_ia32_movntdqa ((const __v2di *) __V);
 }
 
 /* SSE4 Packed Integer Min/Max Instructions.  */
@@ -213,7 +199,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 #define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
 #define _mm_extract_ps(X, N) (__extension__                      \
                               ({ union { int __i; float __f; } __t;  \
-                                 __v4sf __a = (__v4sf)(X);       \
+                                 __v4sf __a = (__v4sf)(__m128)(X);       \
                                  __t.__f = __a[(N) & 3];                 \
                                  __t.__i;}))
 
@@ -221,39 +207,44 @@ _mm_max_epu32 (__m128i __V1, __m128i __V2)
 /* Extract a single-precision float from X at index N into D.  */
 #define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \
                                                     (D) = __a[N]; }))
-                                                    
+
 /* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
    an index suitable for _mm_insert_ps.  */
 #define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
-                                           
+
 /* Extract a float from X at index N into the first index of the return.  */
 #define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X),   \
                                              _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
-                                             
+
 /* Insert int into packed integer array at index.  */
-#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
-                                                   __a[(N) & 15] = (I);             \
-                                                   __a;}))
-#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
-                                                    __a[(N) & 3] = (I);           \
-                                                    __a;}))
+#define _mm_insert_epi8(X, I, N) (__extension__                           \
+                                  ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+                                     __a[(N) & 15] = (I);                 \
+                                     __a;}))
+#define _mm_insert_epi32(X, I, N) (__extension__                         \
+                                   ({ __v4si __a = (__v4si)(__m128i)(X); \
+                                      __a[(N) & 3] = (I);                \
+                                      __a;}))
 #ifdef __x86_64__
-#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
-                                                    __a[(N) & 1] = (I);           \
-                                                    __a;}))
+#define _mm_insert_epi64(X, I, N) (__extension__                         \
+                                   ({ __v2di __a = (__v2di)(__m128i)(X); \
+                                      __a[(N) & 1] = (I);                \
+                                      __a;}))
 #endif /* __x86_64__ */
 
 /* Extract int from packed integer array at index.  This returns the element
  * as a zero extended value, so it is unsigned.
  */
-#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
-                                                 (int)(unsigned char) \
-                                                     __a[(N) & 15];}))
-#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
-                                                  __a[(N) & 3];}))
+#define _mm_extract_epi8(X, N) (__extension__                           \
+                                ({ __v16qi __a = (__v16qi)(__m128i)(X); \
+                                   (int)(unsigned char) __a[(N) & 15];}))
+#define _mm_extract_epi32(X, N) (__extension__                         \
+                                 ({ __v4si __a = (__v4si)(__m128i)(X); \
+                                    (int)__a[(N) & 3];}))
 #ifdef __x86_64__
-#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
-                                                  __a[(N) & 1];}))
+#define _mm_extract_epi64(X, N) (__extension__                         \
+                                 ({ __v2di __a = (__v2di)(__m128i)(X); \
+                                    (long long)__a[(N) & 1];}))
 #endif /* __x86_64 */
 
 /* SSE4 128-bit Packed Integer Comparisons.  */
@@ -290,37 +281,44 @@ _mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi16(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi8_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
+  /* This function always performs a signed extension, but __v16qi is a char
+     which may be signed or unsigned, so use __v16qs. */
+  typedef signed char __v16qs __attribute__((__vector_size__(16)));
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi32(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); 
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi16_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_cvtepi32_epi64(__m128i __V)
 {
-  return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
+  return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di);
 }
 
 /* SSE4 Packed Integer Zero-Extension.  */
@@ -369,9 +367,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2)
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 #define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \
-  __m128i __X = (X); \
-  __m128i __Y = (Y); \
-  (__m128i) __builtin_ia32_mpsadbw128((__v16qi)__X, (__v16qi)__Y, (M)); })
+  (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \
+                                      (__v16qi)(__m128i)(Y), (M)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_minpos_epu16(__m128i __V)
@@ -379,9 +376,13 @@ _mm_minpos_epu16(__m128i __V)
   return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V);
 }
 
+/* Handle the sse4.2 definitions here. */
+
 /* These definitions are normally in nmmintrin.h, but gcc puts them in here
    so we'll do the same.  */
-#ifdef __SSE4_2__
+
+#undef __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
 
 /* These specify the type of data that we're comparing.  */
 #define _SIDD_UBYTE_OPS                 0x00
@@ -410,36 +411,59 @@ _mm_minpos_epu16(__m128i __V)
 #define _SIDD_UNIT_MASK                 0x40
 
 /* SSE4.2 Packed Comparison Intrinsics.  */
-#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))
-#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))
+#define _mm_cmpistrm(A, B, M) \
+  (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \
+                                       (__v16qi)(__m128i)(B), (int)(M))
+#define _mm_cmpistri(A, B, M) \
+  (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \
+                                   (__v16qi)(__m128i)(B), (int)(M))
 
 #define _mm_cmpestrm(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))
+  (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \
+                                       (__v16qi)(__m128i)(B), (int)(LB), \
+                                       (int)(M))
 #define _mm_cmpestri(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))
-     
+  (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \
+                                   (__v16qi)(__m128i)(B), (int)(LB), \
+                                   (int)(M))
+
 /* SSE4.2 Packed Comparison Intrinsics and EFlag Reading.  */
 #define _mm_cmpistra(A, B, M) \
-     __builtin_ia32_pcmpistria128((A), (B), (M))
+  (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
 #define _mm_cmpistrc(A, B, M) \
-     __builtin_ia32_pcmpistric128((A), (B), (M))
+  (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
 #define _mm_cmpistro(A, B, M) \
-     __builtin_ia32_pcmpistrio128((A), (B), (M))
+  (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
 #define _mm_cmpistrs(A, B, M) \
-     __builtin_ia32_pcmpistris128((A), (B), (M))
+  (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
 #define _mm_cmpistrz(A, B, M) \
-     __builtin_ia32_pcmpistriz128((A), (B), (M))
+  (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \
+                                    (__v16qi)(__m128i)(B), (int)(M))
 
 #define _mm_cmpestra(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))
+  (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
 #define _mm_cmpestrc(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))
+  (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
 #define _mm_cmpestro(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))
+  (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
 #define _mm_cmpestrs(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))
+  (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
 #define _mm_cmpestrz(A, LA, B, LB, M) \
-     __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
+  (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \
+                                    (__v16qi)(__m128i)(B), (int)(LB), \
+                                    (int)(M))
 
 /* SSE4.2 Compare Packed Data -- Greater Than.  */
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -481,7 +505,4 @@ _mm_crc32_u64(unsigned long long __C, unsigned long long __D)
 #include <popcntintrin.h>
 #endif
 
-#endif /* __SSE4_2__ */
-#endif /* __SSE4_1__ */
-
 #endif /* _SMMINTRIN_H */
diff --git a/lib/Headers/stdint.h b/lib/Headers/stdint.h
index 0303db90be1f..3f2fcbc57023 100644
--- a/lib/Headers/stdint.h
+++ b/lib/Headers/stdint.h
@@ -77,14 +77,14 @@
  * C99 7.18.1.2 Minimum-width integer types.
  * C99 7.18.1.3 Fastest minimum-width integer types.
  *
- * The standard requires that exact-width type be defined for 8-, 16-, 32-, and 
+ * The standard requires that exact-width type be defined for 8-, 16-, 32-, and
  * 64-bit types if they are implemented. Other exact width types are optional.
  * This implementation defines an exact-width types for every integer width
  * that is represented in the standard integer types.
  *
  * The standard also requires minimum-width types be defined for 8-, 16-, 32-,
  * and 64-bit widths regardless of whether there are corresponding exact-width
- * types. 
+ * types.
  *
  * To accommodate targets that are missing types that are exactly 8, 16, 32, or
  * 64 bits wide, this implementation takes an approach of cascading
@@ -97,7 +97,7 @@
  * suboptimal.
  *
  * In violation of the standard, some targets do not implement a type that is
- * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).  
+ * wide enough to represent all of the required widths (8-, 16-, 32-, 64-bit).
  * To accommodate these targets, a required minimum-width type is only
  * defined if there exists an exact-width type of equal or greater width.
  */
@@ -247,7 +247,7 @@ typedef __uint_least8_t uint_fast8_t;
 #endif /* __int_least8_t */
 
 /* prevent glibc sys/types.h from defining conflicting types */
-#ifndef __int8_t_defined  
+#ifndef __int8_t_defined
 # define __int8_t_defined
 #endif /* __int8_t_defined */
 
@@ -280,9 +280,9 @@ typedef __UINTMAX_TYPE__ uintmax_t;
  *
  * The standard requires that integer constant macros be defined for all the
  * minimum-width types defined above. As 8-, 16-, 32-, and 64-bit minimum-width
- * types are required, the corresponding integer constant macros are defined 
+ * types are required, the corresponding integer constant macros are defined
  * here. This implementation also defines minimum-width types for every other
- * integer width that the target implements, so corresponding macros are 
+ * integer width that the target implements, so corresponding macros are
  * defined below, too.
  *
  * These macros are defined using the same successive-shrinking approach as
@@ -452,7 +452,7 @@ typedef __UINTMAX_TYPE__ uintmax_t;
 #endif /* __int_least8_t */
 
 
-/* C99 7.18.2.1 Limits of exact-width integer types. 
+/* C99 7.18.2.1 Limits of exact-width integer types.
  * C99 7.18.2.2 Limits of minimum-width integer types.
  * C99 7.18.2.3 Limits of fastest minimum-width integer types.
  *
diff --git a/lib/Headers/tbmintrin.h b/lib/Headers/tbmintrin.h
index 48c0b07f423f..785961c6ab86 100644
--- a/lib/Headers/tbmintrin.h
+++ b/lib/Headers/tbmintrin.h
@@ -21,10 +21,6 @@
  *===-----------------------------------------------------------------------===
  */
 
-#ifndef __TBM__
-#error "TBM instruction set is not enabled"
-#endif
-
 #ifndef __X86INTRIN_H
 #error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
 #endif
@@ -33,9 +29,11 @@
 #define __TBMINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm")))
 
-#define __bextri_u32(a, b) (__builtin_ia32_bextri_u32((a), (b)))
+#define __bextri_u32(a, b) \
+  ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \
+                                           (unsigned int)(b)))
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
 __blcfill_u32(unsigned int a)
@@ -92,7 +90,9 @@ __tzmsk_u32(unsigned int a)
 }
 
 #ifdef __x86_64__
-#define __bextri_u64(a, b) (__builtin_ia32_bextri_u64((a), (int)(b)))
+#define __bextri_u64(a, b) \
+  ((unsigned long long)__builtin_ia32_bextri_u64((unsigned long long)(a), \
+                                                 (unsigned long long)(b)))
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 __blcfill_u64(unsigned long long a)
diff --git a/lib/Headers/tgmath.h b/lib/Headers/tgmath.h
index a48e267e60d0..318e1185feee 100644
--- a/lib/Headers/tgmath.h
+++ b/lib/Headers/tgmath.h
@@ -490,7 +490,7 @@ static double _Complex
 
 static long double _Complex
     _TG_ATTRS
-    __tg_pow(long double _Complex __x, long double _Complex __y) 
+    __tg_pow(long double _Complex __x, long double _Complex __y)
     {return cpowl(__x, __y);}
 
 #undef pow
diff --git a/lib/Headers/tmmintrin.h b/lib/Headers/tmmintrin.h
index 2ecc730e90e9..0002890c1393 100644
--- a/lib/Headers/tmmintrin.h
+++ b/lib/Headers/tmmintrin.h
@@ -20,18 +20,14 @@
  *
  *===-----------------------------------------------------------------------===
  */
- 
+
 #ifndef __TMMINTRIN_H
 #define __TMMINTRIN_H
 
-#ifndef __SSSE3__
-#error "SSSE3 instruction set not enabled"
-#else
-
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_abs_pi8(__m64 __a)
@@ -70,14 +66,11 @@ _mm_abs_epi32(__m128i __a)
 }
 
 #define _mm_alignr_epi8(a, b, n) __extension__ ({ \
-  __m128i __a = (a); \
-  __m128i __b = (b); \
-  (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
+  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
+                                     (__v16qi)(__m128i)(b), (n)); })
 
 #define _mm_alignr_pi8(a, b, n) __extension__ ({ \
-  __m64 __a = (a); \
-  __m64 __b = (b); \
-  (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
+  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_hadd_epi16(__m128i __a, __m128i __b)
@@ -225,6 +218,4 @@ _mm_sign_pi32(__m64 __a, __m64 __b)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __SSSE3__ */
-
 #endif /* __TMMINTRIN_H */
diff --git a/lib/Headers/wmmintrin.h b/lib/Headers/wmmintrin.h
index 369e3c208e53..a2d931010aea 100644
--- a/lib/Headers/wmmintrin.h
+++ b/lib/Headers/wmmintrin.h
@@ -26,17 +26,8 @@
 
 #include <emmintrin.h>
 
-#if !defined (__AES__) && !defined (__PCLMUL__)
-# error "AES/PCLMUL instructions not enabled"
-#else
-
-#ifdef __AES__
 #include <__wmmintrin_aes.h>
-#endif /* __AES__ */
 
-#ifdef __PCLMUL__
 #include <__wmmintrin_pclmul.h>
-#endif /* __PCLMUL__ */
 
-#endif /* __AES__ || __PCLMUL__ */
 #endif /* _WMMINTRIN_H */
diff --git a/lib/Headers/x86intrin.h b/lib/Headers/x86intrin.h
index 21a43daf3c2d..4d8077e38291 100644
--- a/lib/Headers/x86intrin.h
+++ b/lib/Headers/x86intrin.h
@@ -28,53 +28,29 @@
 
 #include <immintrin.h>
 
-#ifdef __3dNOW__
 #include <mm3dnow.h>
-#endif
 
-#ifdef __BMI__
 #include <bmiintrin.h>
-#endif
 
-#ifdef __BMI2__
 #include <bmi2intrin.h>
-#endif
 
-#ifdef __LZCNT__
 #include <lzcntintrin.h>
-#endif
 
-#ifdef __POPCNT__
 #include <popcntintrin.h>
-#endif
 
-#ifdef __RDSEED__
 #include <rdseedintrin.h>
-#endif
 
-#ifdef __PRFCHW__
 #include <prfchwintrin.h>
-#endif
 
-#ifdef __SSE4A__
 #include <ammintrin.h>
-#endif
 
-#ifdef __FMA4__
 #include <fma4intrin.h>
-#endif
 
-#ifdef __XOP__
 #include <xopintrin.h>
-#endif
 
-#ifdef __TBM__
 #include <tbmintrin.h>
-#endif
 
-#ifdef __F16C__
 #include <f16cintrin.h>
-#endif
 
 /* FIXME: LWP */
 
diff --git a/lib/Headers/xmmintrin.h b/lib/Headers/xmmintrin.h
index 0d58c753029f..ae0b2cd1b26e 100644
--- a/lib/Headers/xmmintrin.h
+++ b/lib/Headers/xmmintrin.h
@@ -20,13 +20,9 @@
  *
  *===-----------------------------------------------------------------------===
  */
- 
+
 #ifndef __XMMINTRIN_H
 #define __XMMINTRIN_H
- 
-#ifndef __SSE__
-#error "SSE instruction set not enabled"
-#else
 
 #include <mmintrin.h>
 
@@ -41,7 +37,7 @@ typedef float __m128 __attribute__((__vector_size__(16)));
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_add_ss(__m128 __a, __m128 __b)
@@ -580,6 +576,12 @@ _mm_loadr_ps(const float *__p)
   return __builtin_shufflevector(__a, __a, 3, 2, 1, 0);
 }
 
+static __inline__ __m128 __DEFAULT_FN_ATTRS
+_mm_undefined_ps()
+{
+  return (__m128)__builtin_ia32_undef128();
+}
+
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_set_ss(float __w)
 {
@@ -752,8 +754,7 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b)
 }
 
 #define _mm_shuffle_pi16(a, n) __extension__ ({ \
-  __m64 __a = (a); \
-  (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); })
+  (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n)); })
 
 static __inline__ void __DEFAULT_FN_ATTRS
 _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
@@ -792,9 +793,7 @@ _mm_setcsr(unsigned int __i)
 }
 
 #define _mm_shuffle_ps(a, b, mask) __extension__ ({ \
-  __m128 __a = (a); \
-  __m128 __b = (b); \
-  (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \
+  (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
                                   (mask) & 0x3, ((mask) & 0xc) >> 2, \
                                   (((mask) & 0x30) >> 4) + 4, \
                                   (((mask) & 0xc0) >> 6) + 4); })
@@ -868,7 +867,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi8_ps(__m64 __a)
 {
   __m64 __b;
-  
+
   __b = _mm_setzero_si64();
   __b = _mm_cmpgt_pi8(__b, __a);
   __b = _mm_unpacklo_pi8(__a, __b);
@@ -880,7 +879,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpu8_ps(__m64 __a)
 {
   __m64 __b;
-  
+
   __b = _mm_setzero_si64();
   __b = _mm_unpacklo_pi8(__a, __b);
 
@@ -891,7 +890,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
 {
   __m128 __c;
-  
+
   __c = _mm_setzero_ps();
   __c = _mm_cvtpi32_ps(__c, __b);
   __c = _mm_movelh_ps(__c, __c);
@@ -903,11 +902,11 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi16(__m128 __a)
 {
   __m64 __b, __c;
-  
+
   __b = _mm_cvtps_pi32(__a);
   __a = _mm_movehl_ps(__a, __a);
   __c = _mm_cvtps_pi32(__a);
-  
+
   return _mm_packs_pi32(__b, __c);
 }
 
@@ -915,10 +914,10 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS
 _mm_cvtps_pi8(__m128 __a)
 {
   __m64 __b, __c;
-  
+
   __b = _mm_cvtps_pi16(__a);
   __c = _mm_setzero_si64();
-  
+
   return _mm_packs_pi16(__b, __c);
 }
 
@@ -928,6 +927,11 @@ _mm_movemask_ps(__m128 __a)
   return __builtin_ia32_movmskps(__a);
 }
 
+
+#ifdef _MSC_VER
+#define _MM_ALIGN16 __declspec(align(16))
+#endif
+
 #define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
 
 #define _MM_EXCEPT_INVALID    (0x0001)
@@ -1003,6 +1007,4 @@ do { \
 #include <emmintrin.h>
 #endif
 
-#endif /* __SSE__ */
-
 #endif /* __XMMINTRIN_H */
diff --git a/lib/Headers/xopintrin.h b/lib/Headers/xopintrin.h
index 2eb35c4be844..f07f51c27515 100644
--- a/lib/Headers/xopintrin.h
+++ b/lib/Headers/xopintrin.h
@@ -28,14 +28,10 @@
 #ifndef __XOPINTRIN_H
 #define __XOPINTRIN_H
 
-#ifndef __XOP__
-# error "XOP instruction set is not enabled"
-#else
-
 #include <fma4intrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -242,20 +238,16 @@ _mm_rot_epi64(__m128i __A, __m128i __B)
 }
 
 #define _mm_roti_epi8(A, N) __extension__ ({ \
-  __m128i __A = (A); \
-  (__m128i)__builtin_ia32_vprotbi((__v16qi)__A, (N)); })
+  (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N)); })
 
 #define _mm_roti_epi16(A, N) __extension__ ({ \
-  __m128i __A = (A); \
-  (__m128i)__builtin_ia32_vprotwi((__v8hi)__A, (N)); })
+  (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N)); })
 
 #define _mm_roti_epi32(A, N) __extension__ ({ \
-  __m128i __A = (A); \
-  (__m128i)__builtin_ia32_vprotdi((__v4si)__A, (N)); })
+  (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N)); })
 
 #define _mm_roti_epi64(A, N) __extension__ ({ \
-  __m128i __A = (A); \
-  (__m128i)__builtin_ia32_vprotqi((__v2di)__A, (N)); })
+  (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N)); })
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_shl_epi8(__m128i __A, __m128i __B)
@@ -306,44 +298,36 @@ _mm_sha_epi64(__m128i __A, __m128i __B)
 }
 
 #define _mm_com_epu8(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomub((__v16qi)__A, (__v16qi)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
+                                  (__v16qi)(__m128i)(B), (N)); })
 
 #define _mm_com_epu16(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomuw((__v8hi)__A, (__v8hi)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
+                                  (__v8hi)(__m128i)(B), (N)); })
 
 #define _mm_com_epu32(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomud((__v4si)__A, (__v4si)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
+                                  (__v4si)(__m128i)(B), (N)); })
 
 #define _mm_com_epu64(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomuq((__v2di)__A, (__v2di)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
+                                  (__v2di)(__m128i)(B), (N)); })
 
 #define _mm_com_epi8(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomb((__v16qi)__A, (__v16qi)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
+                                 (__v16qi)(__m128i)(B), (N)); })
 
 #define _mm_com_epi16(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomw((__v8hi)__A, (__v8hi)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
+                                 (__v8hi)(__m128i)(B), (N)); })
 
 #define _mm_com_epi32(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomd((__v4si)__A, (__v4si)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
+                                 (__v4si)(__m128i)(B), (N)); })
 
 #define _mm_com_epi64(A, B, N) __extension__ ({ \
-  __m128i __A = (A); \
-  __m128i __B = (B); \
-  (__m128i)__builtin_ia32_vpcomq((__v2di)__A, (__v2di)__B, (N)); })
+  (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
+                                 (__v2di)(__m128i)(B), (N)); })
 
 #define _MM_PCOMCTRL_LT    0
 #define _MM_PCOMCTRL_LE    1
@@ -739,32 +723,23 @@ _mm_comtrue_epi64(__m128i __A, __m128i __B)
 }
 
 #define _mm_permute2_pd(X, Y, C, I) __extension__ ({ \
-  __m128d __X = (X); \
-  __m128d __Y = (Y); \
-  __m128i __C = (C); \
-  (__m128d)__builtin_ia32_vpermil2pd((__v2df)__X, (__v2df)__Y, \
-                                     (__v2di)__C, (I)); })
+  (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
+                                     (__v2df)(__m128d)(Y), \
+                                     (__v2di)(__m128i)(C), (I)); })
 
 #define _mm256_permute2_pd(X, Y, C, I) __extension__ ({ \
-  __m256d __X = (X); \
-  __m256d __Y = (Y); \
-  __m256i __C = (C); \
-  (__m256d)__builtin_ia32_vpermil2pd256((__v4df)__X, (__v4df)__Y, \
-                                        (__v4di)__C, (I)); })
+  (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
+                                        (__v4df)(__m256d)(Y), \
+                                        (__v4di)(__m256i)(C), (I)); })
 
 #define _mm_permute2_ps(X, Y, C, I) __extension__ ({ \
-  __m128 __X = (X); \
-  __m128 __Y = (Y); \
-  __m128i __C = (C); \
-  (__m128)__builtin_ia32_vpermil2ps((__v4sf)__X, (__v4sf)__Y, \
-                                    (__v4si)__C, (I)); })
+  (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
+                                    (__v4si)(__m128i)(C), (I)); })
 
 #define _mm256_permute2_ps(X, Y, C, I) __extension__ ({ \
-  __m256 __X = (X); \
-  __m256 __Y = (Y); \
-  __m256i __C = (C); \
-  (__m256)__builtin_ia32_vpermil2ps256((__v8sf)__X, (__v8sf)__Y, \
-                                       (__v8si)__C, (I)); })
+  (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
+                                       (__v8sf)(__m256)(Y), \
+                                       (__v8si)(__m256i)(C), (I)); })
 
 static __inline__ __m128 __DEFAULT_FN_ATTRS
 _mm_frcz_ss(__m128 __A)
@@ -804,6 +779,4 @@ _mm256_frcz_pd(__m256d __A)
 
 #undef __DEFAULT_FN_ATTRS
 
-#endif /* __XOP__ */
-
 #endif /* __XOPINTRIN_H */
diff --git a/lib/Headers/xsavecintrin.h b/lib/Headers/xsavecintrin.h
new file mode 100644
index 000000000000..598470a682e2
--- /dev/null
+++ b/lib/Headers/xsavecintrin.h
@@ -0,0 +1,48 @@
+/*===---- xsavecintrin.h - XSAVEC intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsavecintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVECINTRIN_H
+#define __XSAVECINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("xsavec")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsavec(void *__p, unsigned long long __m) {
+  __builtin_ia32_xsavec(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsavec64(void *__p, unsigned long long __m) {
+  __builtin_ia32_xsavec64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/xsaveintrin.h b/lib/Headers/xsaveintrin.h
new file mode 100644
index 000000000000..a2e6b2e742ff
--- /dev/null
+++ b/lib/Headers/xsaveintrin.h
@@ -0,0 +1,58 @@
+/*===---- xsaveintrin.h - XSAVE intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsaveintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVEINTRIN_H
+#define __XSAVEINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("xsave")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsave(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xsave(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstor(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xrstor(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsave64(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xsave64(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstor64(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xrstor64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/xsaveoptintrin.h b/lib/Headers/xsaveoptintrin.h
new file mode 100644
index 000000000000..d3faae78be4f
--- /dev/null
+++ b/lib/Headers/xsaveoptintrin.h
@@ -0,0 +1,48 @@
+/*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsaveoptintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVEOPTINTRIN_H
+#define __XSAVEOPTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("xsaveopt")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaveopt(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xsaveopt(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaveopt64(void *__p, unsigned long long __m) {
+  return __builtin_ia32_xsaveopt64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Headers/xsavesintrin.h b/lib/Headers/xsavesintrin.h
new file mode 100644
index 000000000000..c5e540a86edb
--- /dev/null
+++ b/lib/Headers/xsavesintrin.h
@@ -0,0 +1,58 @@
+/*===---- xsavesintrin.h - XSAVES intrinsic ------------------------------------===
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <xsavesintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __XSAVESINTRIN_H
+#define __XSAVESINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__,  __target__("xsaves")))
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaves(void *__p, unsigned long long __m) {
+  __builtin_ia32_xsaves(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstors(void *__p, unsigned long long __m) {
+  __builtin_ia32_xrstors(__p, __m);
+}
+
+#ifdef __x86_64__
+static __inline__ void __DEFAULT_FN_ATTRS
+_xrstors64(void *__p, unsigned long long __m) {
+  __builtin_ia32_xrstors64(__p, __m);
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
+_xsaves64(void *__p, unsigned long long __m) {
+  __builtin_ia32_xsaves64(__p, __m);
+}
+#endif
+
+#undef __DEFAULT_FN_ATTRS
+
+#endif
diff --git a/lib/Index/CommentToXML.cpp b/lib/Index/CommentToXML.cpp
index ef6aeefa6526..15f1696cbe91 100644
--- a/lib/Index/CommentToXML.cpp
+++ b/lib/Index/CommentToXML.cpp
@@ -481,7 +481,6 @@ void CommentASTToHTMLConverter::visitFullComment(const FullComment *C) {
     Result << "</div>";
   }
 
-  Result.flush();
 }
 
 void CommentASTToHTMLConverter::visitNonStandaloneParagraphComment(
@@ -895,7 +894,7 @@ void CommentASTToXMLConverter::visitFullComment(const FullComment *C) {
       FileID FID = LocInfo.first;
       unsigned FileOffset = LocInfo.second;
 
-      if (!FID.isInvalid()) {
+      if (FID.isValid()) {
         if (const FileEntry *FE = SM.getFileEntryForID(FID)) {
           Result << " file=\"";
           appendToResultWithXMLEscaping(FE->getName());
@@ -1078,8 +1077,6 @@ void CommentASTToXMLConverter::visitFullComment(const FullComment *C) {
   }
 
   Result << RootEndTag;
-
-  Result.flush();
 }
 
 void CommentASTToXMLConverter::appendToResultWithXMLEscaping(StringRef S) {
diff --git a/lib/Index/SimpleFormatContext.h b/lib/Index/SimpleFormatContext.h
index b88421498959..2c26e4d82e08 100644
--- a/lib/Index/SimpleFormatContext.h
+++ b/lib/Index/SimpleFormatContext.h
@@ -38,18 +38,17 @@ public:
       : DiagOpts(new DiagnosticOptions()),
         Diagnostics(new DiagnosticsEngine(new DiagnosticIDs,
                                           DiagOpts.get())),
-        Files((FileSystemOptions())),
+        InMemoryFileSystem(new vfs::InMemoryFileSystem),
+        Files(FileSystemOptions(), InMemoryFileSystem),
         Sources(*Diagnostics, Files),
         Rewrite(Sources, Options) {
     Diagnostics->setClient(new IgnoringDiagConsumer, true);
   }
 
   FileID createInMemoryFile(StringRef Name, StringRef Content) {
-    std::unique_ptr<llvm::MemoryBuffer> Source =
-        llvm::MemoryBuffer::getMemBuffer(Content);
-    const FileEntry *Entry =
-        Files.getVirtualFile(Name, Source->getBufferSize(), 0);
-    Sources.overrideFileContents(Entry, std::move(Source));
+    InMemoryFileSystem->addFile(Name, 0,
+                                llvm::MemoryBuffer::getMemBuffer(Content));
+    const FileEntry *Entry = Files.getFile(Name);
     assert(Entry != nullptr);
     return Sources.createFileID(Entry, SourceLocation(), SrcMgr::C_User);
   }
@@ -64,6 +63,7 @@ public:
 
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
   IntrusiveRefCntPtr<DiagnosticsEngine> Diagnostics;
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem;
   FileManager Files;
   SourceManager Sources;
   Rewriter Rewrite;
diff --git a/lib/Index/USRGeneration.cpp b/lib/Index/USRGeneration.cpp
index 8cdd283ba5c5..c57694fc10a8 100644
--- a/lib/Index/USRGeneration.cpp
+++ b/lib/Index/USRGeneration.cpp
@@ -156,10 +156,8 @@ public:
 //===----------------------------------------------------------------------===//
 
 bool USRGenerator::EmitDeclName(const NamedDecl *D) {
-  Out.flush();
   const unsigned startSize = Buf.size();
   D->printName(Out);
-  Out.flush();
   const unsigned endSize = Buf.size();
   return startSize == endSize;
 }
@@ -462,7 +460,6 @@ void USRGenerator::VisitTagDecl(const TagDecl *D) {
   }
   
   Out << '@';
-  Out.flush();
   assert(Buf.size() > 0);
   const unsigned off = Buf.size() - 1;
 
@@ -613,8 +610,18 @@ void USRGenerator::VisitType(QualType T) {
         case BuiltinType::OCLImage1dBuffer:
         case BuiltinType::OCLImage2d:
         case BuiltinType::OCLImage2dArray:
+        case BuiltinType::OCLImage2dDepth:
+        case BuiltinType::OCLImage2dArrayDepth:
+        case BuiltinType::OCLImage2dMSAA:
+        case BuiltinType::OCLImage2dArrayMSAA:
+        case BuiltinType::OCLImage2dMSAADepth:
+        case BuiltinType::OCLImage2dArrayMSAADepth:
         case BuiltinType::OCLImage3d:
         case BuiltinType::OCLEvent:
+        case BuiltinType::OCLClkEvent:
+        case BuiltinType::OCLQueue:
+        case BuiltinType::OCLNDRange:
+        case BuiltinType::OCLReserveID:
         case BuiltinType::OCLSampler:
           IgnoreResults = true;
           return;
diff --git a/lib/Lex/HeaderSearch.cpp b/lib/Lex/HeaderSearch.cpp
index 983dc18b57af..8a686a7f3d74 100644
--- a/lib/Lex/HeaderSearch.cpp
+++ b/lib/Lex/HeaderSearch.cpp
@@ -127,11 +127,12 @@ std::string HeaderSearch::getModuleFileName(Module *Module) {
 
 std::string HeaderSearch::getModuleFileName(StringRef ModuleName,
                                             StringRef ModuleMapPath) {
-  // If we don't have a module cache path, we can't do anything.
-  if (ModuleCachePath.empty()) 
+  // If we don't have a module cache path or aren't supposed to use one, we
+  // can't do anything.
+  if (getModuleCachePath().empty())
     return std::string();
 
-  SmallString<256> Result(ModuleCachePath);
+  SmallString<256> Result(getModuleCachePath());
   llvm::sys::fs::make_absolute(Result);
 
   if (HSOpts->DisableModuleHash) {
@@ -153,7 +154,7 @@ std::string HeaderSearch::getModuleFileName(StringRef ModuleName,
 
     llvm::hash_code Hash =
       llvm::hash_combine(DirName.lower(), FileName.lower(),
-                         HSOpts->ModuleFormat);
+                         HSOpts->ModuleFormat, HSOpts->UseDebugInfo);
 
     SmallString<128> HashStr;
     llvm::APInt(64, size_t(Hash)).toStringUnsigned(HashStr, /*Radix*/36);
@@ -249,31 +250,22 @@ const char *DirectoryLookup::getName() const {
   return getHeaderMap()->getFileName();
 }
 
-static const FileEntry *
-getFileAndSuggestModule(HeaderSearch &HS, StringRef FileName,
-                        const DirectoryEntry *Dir, bool IsSystemHeaderDir,
-                        ModuleMap::KnownHeader *SuggestedModule) {
+const FileEntry *HeaderSearch::getFileAndSuggestModule(
+    StringRef FileName, const DirectoryEntry *Dir, bool IsSystemHeaderDir,
+    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule) {
   // If we have a module map that might map this header, load it and
   // check whether we'll have a suggestion for a module.
-  HS.hasModuleMap(FileName, Dir, IsSystemHeaderDir);
-  if (SuggestedModule) {
-    const FileEntry *File = HS.getFileMgr().getFile(FileName,
-                                                    /*OpenFile=*/false);
-    if (File) {
-      // If there is a module that corresponds to this header, suggest it.
-      *SuggestedModule = HS.findModuleForHeader(File);
+  const FileEntry *File = getFileMgr().getFile(FileName, /*OpenFile=*/true);
+  if (!File)
+    return nullptr;
 
-      // FIXME: This appears to be a no-op. We loaded the module map for this
-      // directory at the start of this function.
-      if (!SuggestedModule->getModule() &&
-          HS.hasModuleMap(FileName, Dir, IsSystemHeaderDir))
-        *SuggestedModule = HS.findModuleForHeader(File);
-    }
+  // If there is a module that corresponds to this header, suggest it.
+  if (!findUsableModuleForHeader(File, Dir ? Dir : File->getDir(),
+                                 RequestingModule, SuggestedModule,
+                                 IsSystemHeaderDir))
+    return nullptr;
 
-    return File;
-  }
-
-  return HS.getFileMgr().getFile(FileName, /*openFile=*/true);
+  return File;
 }
 
 /// LookupFile - Lookup the specified file in this search path, returning it
@@ -283,6 +275,7 @@ const FileEntry *DirectoryLookup::LookupFile(
     HeaderSearch &HS,
     SmallVectorImpl<char> *SearchPath,
     SmallVectorImpl<char> *RelativePath,
+    Module *RequestingModule,
     ModuleMap::KnownHeader *SuggestedModule,
     bool &InUserSpecifiedSystemFramework,
     bool &HasBeenMapped,
@@ -305,14 +298,15 @@ const FileEntry *DirectoryLookup::LookupFile(
       RelativePath->append(Filename.begin(), Filename.end());
     }
 
-    return getFileAndSuggestModule(HS, TmpDir, getDir(),
-                                   isSystemHeaderDirectory(),
-                                   SuggestedModule);
+    return HS.getFileAndSuggestModule(TmpDir, getDir(),
+                                      isSystemHeaderDirectory(),
+                                      RequestingModule, SuggestedModule);
   }
 
   if (isFramework())
     return DoFrameworkLookup(Filename, HS, SearchPath, RelativePath,
-                             SuggestedModule, InUserSpecifiedSystemFramework);
+                             RequestingModule, SuggestedModule,
+                             InUserSpecifiedSystemFramework);
 
   assert(isHeaderMap() && "Unknown directory lookup");
   const HeaderMap *HM = getHeaderMap();
@@ -404,13 +398,10 @@ getTopFrameworkDir(FileManager &FileMgr, StringRef DirName,
 /// DoFrameworkLookup - Do a lookup of the specified file in the current
 /// DirectoryLookup, which is a framework directory.
 const FileEntry *DirectoryLookup::DoFrameworkLookup(
-    StringRef Filename,
-    HeaderSearch &HS,
-    SmallVectorImpl<char> *SearchPath,
-    SmallVectorImpl<char> *RelativePath,
+    StringRef Filename, HeaderSearch &HS, SmallVectorImpl<char> *SearchPath,
+    SmallVectorImpl<char> *RelativePath, Module *RequestingModule,
     ModuleMap::KnownHeader *SuggestedModule,
-    bool &InUserSpecifiedSystemFramework) const
-{
+    bool &InUserSpecifiedSystemFramework) const {
   FileManager &FileMgr = HS.getFileMgr();
 
   // Framework names must have a '/' in the filename.
@@ -522,27 +513,15 @@ const FileEntry *DirectoryLookup::DoFrameworkLookup(
         break;
     } while (true);
 
+    bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
     if (FoundFramework) {
-      // Find the top-level framework based on this framework.
-      SmallVector<std::string, 4> SubmodulePath;
-      const DirectoryEntry *TopFrameworkDir
-        = ::getTopFrameworkDir(FileMgr, FrameworkPath, SubmodulePath);
-
-      // Determine the name of the top-level framework.
-      StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
-
-      // Load this framework module. If that succeeds, find the suggested module
-      // for this header, if any.
-      bool IsSystem = getDirCharacteristic() != SrcMgr::C_User;
-      HS.loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem);
-
-      // FIXME: This can find a module not part of ModuleName, which is
-      // important so that we're consistent about whether this header
-      // corresponds to a module. Possibly we should lock down framework modules
-      // so that this is not possible.
-      *SuggestedModule = HS.findModuleForHeader(FE);
+      if (!HS.findUsableModuleForFrameworkHeader(
+              FE, FrameworkPath, RequestingModule, SuggestedModule, IsSystem))
+        return nullptr;
     } else {
-      *SuggestedModule = HS.findModuleForHeader(FE);
+      if (!HS.findUsableModuleForHeader(FE, getDir(), RequestingModule,
+                                        SuggestedModule, IsSystem))
+        return nullptr;
     }
   }
   return FE;
@@ -588,7 +567,8 @@ const FileEntry *HeaderSearch::LookupFile(
     const DirectoryLookup *FromDir, const DirectoryLookup *&CurDir,
     ArrayRef<std::pair<const FileEntry *, const DirectoryEntry *>> Includers,
     SmallVectorImpl<char> *SearchPath, SmallVectorImpl<char> *RelativePath,
-    ModuleMap::KnownHeader *SuggestedModule, bool SkipCache) {
+    Module *RequestingModule, ModuleMap::KnownHeader *SuggestedModule,
+    bool SkipCache) {
   if (SuggestedModule)
     *SuggestedModule = ModuleMap::KnownHeader();
     
@@ -606,13 +586,9 @@ const FileEntry *HeaderSearch::LookupFile(
       RelativePath->append(Filename.begin(), Filename.end());
     }
     // Otherwise, just return the file.
-    const FileEntry *File = FileMgr.getFile(Filename, /*openFile=*/true);
-    if (File && SuggestedModule) {
-      // If there is a module that corresponds to this header, suggest it.
-      hasModuleMap(Filename, File->getDir(), /*SystemHeaderDir*/false);
-      *SuggestedModule = findModuleForHeader(File);
-    }
-    return File;
+    return getFileAndSuggestModule(Filename, nullptr,
+                                   /*IsSystemHeaderDir*/false,
+                                   RequestingModule, SuggestedModule);
   }
 
   // This is the header that MSVC's header search would have found.
@@ -646,8 +622,8 @@ const FileEntry *HeaderSearch::LookupFile(
       bool IncluderIsSystemHeader =
           Includer && getFileInfo(Includer).DirInfo != SrcMgr::C_User;
       if (const FileEntry *FE = getFileAndSuggestModule(
-              *this, TmpDir, IncluderAndDir.second,
-              IncluderIsSystemHeader, SuggestedModule)) {
+              TmpDir, IncluderAndDir.second, IncluderIsSystemHeader,
+              RequestingModule, SuggestedModule)) {
         if (!Includer) {
           assert(First && "only first includer can have no file");
           return FE;
@@ -736,10 +712,10 @@ const FileEntry *HeaderSearch::LookupFile(
   for (; i != SearchDirs.size(); ++i) {
     bool InUserSpecifiedSystemFramework = false;
     bool HasBeenMapped = false;
-    const FileEntry *FE =
-      SearchDirs[i].LookupFile(Filename, *this, SearchPath, RelativePath,
-                               SuggestedModule, InUserSpecifiedSystemFramework,
-                               HasBeenMapped, MappedName);
+    const FileEntry *FE = SearchDirs[i].LookupFile(
+        Filename, *this, SearchPath, RelativePath, RequestingModule,
+        SuggestedModule, InUserSpecifiedSystemFramework, HasBeenMapped,
+        MappedName);
     if (HasBeenMapped) {
       CacheLookup.MappedName =
           copyString(Filename, LookupFileCache.getAllocator());
@@ -803,9 +779,10 @@ const FileEntry *HeaderSearch::LookupFile(
       ScratchFilename += '/';
       ScratchFilename += Filename;
 
-      const FileEntry *FE = LookupFile(
-          ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir, CurDir,
-          Includers.front(), SearchPath, RelativePath, SuggestedModule);
+      const FileEntry *FE =
+          LookupFile(ScratchFilename, IncludeLoc, /*isAngled=*/true, FromDir,
+                     CurDir, Includers.front(), SearchPath, RelativePath,
+                     RequestingModule, SuggestedModule);
 
       if (checkMSVCHeaderSearch(Diags, MSFE, FE, IncludeLoc)) {
         if (SuggestedModule)
@@ -841,6 +818,7 @@ LookupSubframeworkHeader(StringRef Filename,
                          const FileEntry *ContextFileEnt,
                          SmallVectorImpl<char> *SearchPath,
                          SmallVectorImpl<char> *RelativePath,
+                         Module *RequestingModule,
                          ModuleMap::KnownHeader *SuggestedModule) {
   assert(ContextFileEnt && "No context file?");
 
@@ -932,24 +910,10 @@ LookupSubframeworkHeader(StringRef Filename,
   unsigned DirInfo = getFileInfo(ContextFileEnt).DirInfo;
   getFileInfo(FE).DirInfo = DirInfo;
 
-  // If we're supposed to suggest a module, look for one now.
-  if (SuggestedModule) {
-    // Find the top-level framework based on this framework.
-    FrameworkName.pop_back(); // remove the trailing '/'
-    SmallVector<std::string, 4> SubmodulePath;
-    const DirectoryEntry *TopFrameworkDir
-      = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);
-    
-    // Determine the name of the top-level framework.
-    StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
-
-    // Load this framework module. If that succeeds, find the suggested module
-    // for this header, if any.
-    bool IsSystem = false;
-    if (loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystem)) {
-      *SuggestedModule = findModuleForHeader(FE);
-    }
-  }
+  FrameworkName.pop_back(); // remove the trailing '/'
+  if (!findUsableModuleForFrameworkHeader(FE, FrameworkName, RequestingModule,
+                                          SuggestedModule, /*IsSystem*/ false))
+    return nullptr;
 
   return FE;
 }
@@ -962,77 +926,112 @@ LookupSubframeworkHeader(StringRef Filename,
 /// header file info (\p HFI)
 static void mergeHeaderFileInfo(HeaderFileInfo &HFI, 
                                 const HeaderFileInfo &OtherHFI) {
+  assert(OtherHFI.External && "expected to merge external HFI");
+
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
   HFI.isModuleHeader |= OtherHFI.isModuleHeader;
   HFI.NumIncludes += OtherHFI.NumIncludes;
-  
+
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
     HFI.ControllingMacro = OtherHFI.ControllingMacro;
     HFI.ControllingMacroID = OtherHFI.ControllingMacroID;
   }
-  
-  if (OtherHFI.External) {
-    HFI.DirInfo = OtherHFI.DirInfo;
-    HFI.External = OtherHFI.External;
-    HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader;
-  }
+
+  HFI.DirInfo = OtherHFI.DirInfo;
+  HFI.External = (!HFI.IsValid || HFI.External);
+  HFI.IsValid = true;
+  HFI.IndexHeaderMapHeader = OtherHFI.IndexHeaderMapHeader;
 
   if (HFI.Framework.empty())
     HFI.Framework = OtherHFI.Framework;
-  
-  HFI.Resolved = true;
 }
                                 
 /// getFileInfo - Return the HeaderFileInfo structure for the specified
 /// FileEntry.
 HeaderFileInfo &HeaderSearch::getFileInfo(const FileEntry *FE) {
   if (FE->getUID() >= FileInfo.size())
-    FileInfo.resize(FE->getUID()+1);
-  
-  HeaderFileInfo &HFI = FileInfo[FE->getUID()];
-  if (ExternalSource && !HFI.Resolved)
-    mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(FE));
-  HFI.IsValid = 1;
-  return HFI;
+    FileInfo.resize(FE->getUID() + 1);
+
+  HeaderFileInfo *HFI = &FileInfo[FE->getUID()];
+  // FIXME: Use a generation count to check whether this is really up to date.
+  if (ExternalSource && !HFI->Resolved) {
+    HFI->Resolved = true;
+    auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
+
+    HFI = &FileInfo[FE->getUID()];
+    if (ExternalHFI.External)
+      mergeHeaderFileInfo(*HFI, ExternalHFI);
+  }
+
+  HFI->IsValid = true;
+  // We have local information about this header file, so it's no longer
+  // strictly external.
+  HFI->External = false;
+  return *HFI;
 }
 
-bool HeaderSearch::tryGetFileInfo(const FileEntry *FE,
-                                  HeaderFileInfo &Result) const {
-  if (FE->getUID() >= FileInfo.size())
-    return false;
-  const HeaderFileInfo &HFI = FileInfo[FE->getUID()];
-  if (HFI.IsValid) {
-    Result = HFI;
-    return true;
+const HeaderFileInfo *
+HeaderSearch::getExistingFileInfo(const FileEntry *FE,
+                                  bool WantExternal) const {
+  // If we have an external source, ensure we have the latest information.
+  // FIXME: Use a generation count to check whether this is really up to date.
+  HeaderFileInfo *HFI;
+  if (ExternalSource) {
+    if (FE->getUID() >= FileInfo.size()) {
+      if (!WantExternal)
+        return nullptr;
+      FileInfo.resize(FE->getUID() + 1);
+    }
+
+    HFI = &FileInfo[FE->getUID()];
+    if (!WantExternal && (!HFI->IsValid || HFI->External))
+      return nullptr;
+    if (!HFI->Resolved) {
+      HFI->Resolved = true;
+      auto ExternalHFI = ExternalSource->GetHeaderFileInfo(FE);
+
+      HFI = &FileInfo[FE->getUID()];
+      if (ExternalHFI.External)
+        mergeHeaderFileInfo(*HFI, ExternalHFI);
+    }
+  } else if (FE->getUID() >= FileInfo.size()) {
+    return nullptr;
+  } else {
+    HFI = &FileInfo[FE->getUID()];
   }
-  return false;
+
+  if (!HFI->IsValid || (HFI->External && !WantExternal))
+    return nullptr;
+
+  return HFI;
 }
 
 bool HeaderSearch::isFileMultipleIncludeGuarded(const FileEntry *File) {
   // Check if we've ever seen this file as a header.
-  if (File->getUID() >= FileInfo.size())
-    return false;
-
-  // Resolve header file info from the external source, if needed.
-  HeaderFileInfo &HFI = FileInfo[File->getUID()];
-  if (ExternalSource && !HFI.Resolved)
-    mergeHeaderFileInfo(HFI, ExternalSource->GetHeaderFileInfo(File));
-
-  return HFI.isPragmaOnce || HFI.isImport ||
-      HFI.ControllingMacro || HFI.ControllingMacroID;
+  if (auto *HFI = getExistingFileInfo(File))
+    return HFI->isPragmaOnce || HFI->isImport || HFI->ControllingMacro ||
+           HFI->ControllingMacroID;
+  return false;
 }
 
 void HeaderSearch::MarkFileModuleHeader(const FileEntry *FE,
                                         ModuleMap::ModuleHeaderRole Role,
                                         bool isCompilingModuleHeader) {
-  if (FE->getUID() >= FileInfo.size())
-    FileInfo.resize(FE->getUID()+1);
+  bool isModularHeader = !(Role & ModuleMap::TextualHeader);
 
-  HeaderFileInfo &HFI = FileInfo[FE->getUID()];
-  HFI.isModuleHeader = true;
+  // Don't mark the file info as non-external if there's nothing to change.
+  if (!isCompilingModuleHeader) {
+    if (!isModularHeader)
+      return;
+    auto *HFI = getExistingFileInfo(FE);
+    if (HFI && HFI->isModuleHeader)
+      return;
+  }
+
+  auto &HFI = getFileInfo(FE);
+  HFI.isModuleHeader |= isModularHeader;
   HFI.isCompilingModuleHeader |= isCompilingModuleHeader;
-  HFI.setHeaderRole(Role);
 }
 
 bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
@@ -1142,11 +1141,48 @@ HeaderSearch::findModuleForHeader(const FileEntry *File) const {
   if (ExternalSource) {
     // Make sure the external source has handled header info about this file,
     // which includes whether the file is part of a module.
-    (void)getFileInfo(File);
+    (void)getExistingFileInfo(File);
   }
   return ModMap.findModuleForHeader(File);
 }
 
+bool HeaderSearch::findUsableModuleForHeader(
+    const FileEntry *File, const DirectoryEntry *Root, Module *RequestingModule,
+    ModuleMap::KnownHeader *SuggestedModule, bool IsSystemHeaderDir) {
+  if (File && SuggestedModule) {
+    // If there is a module that corresponds to this header, suggest it.
+    hasModuleMap(File->getName(), Root, IsSystemHeaderDir);
+    *SuggestedModule = findModuleForHeader(File);
+  }
+  return true;
+}
+
+bool HeaderSearch::findUsableModuleForFrameworkHeader(
+    const FileEntry *File, StringRef FrameworkName, Module *RequestingModule,
+    ModuleMap::KnownHeader *SuggestedModule, bool IsSystemFramework) {
+  // If we're supposed to suggest a module, look for one now.
+  if (SuggestedModule) {
+    // Find the top-level framework based on this framework.
+    SmallVector<std::string, 4> SubmodulePath;
+    const DirectoryEntry *TopFrameworkDir
+      = ::getTopFrameworkDir(FileMgr, FrameworkName, SubmodulePath);
+    
+    // Determine the name of the top-level framework.
+    StringRef ModuleName = llvm::sys::path::stem(TopFrameworkDir->getName());
+
+    // Load this framework module. If that succeeds, find the suggested module
+    // for this header, if any.
+    loadFrameworkModule(ModuleName, TopFrameworkDir, IsSystemFramework);
+
+    // FIXME: This can find a module not part of ModuleName, which is
+    // important so that we're consistent about whether this header
+    // corresponds to a module. Possibly we should lock down framework modules
+    // so that this is not possible.
+    *SuggestedModule = findModuleForHeader(File);
+  }
+  return true;
+}
+
 static const FileEntry *getPrivateModuleMap(const FileEntry *File,
                                             FileManager &FileMgr) {
   StringRef Filename = llvm::sys::path::filename(File->getName());
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 4007914b6c08..27b0feb48270 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -235,7 +235,7 @@ static size_t getSpellingSlow(const Token &Tok, const char *BufPtr,
   size_t Length = 0;
   const char *BufEnd = BufPtr + Tok.getLength();
 
-  if (Tok.is(tok::string_literal)) {
+  if (tok::isStringLiteral(Tok.getKind())) {
     // Munch the encoding-prefix and opening double-quote.
     while (BufPtr < BufEnd) {
       unsigned Size;
@@ -1354,7 +1354,9 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
 }
 
 static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
-  if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+  if (LangOpts.AsmPreprocessor) {
+    return false;
+  } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
     static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
         C11AllowedIDCharRanges);
     return C11AllowedIDChars.contains(C);
@@ -1371,7 +1373,9 @@ static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts) {
 
 static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts) {
   assert(isAllowedIDChar(C, LangOpts));
-  if (LangOpts.CPlusPlus11 || LangOpts.C11) {
+  if (LangOpts.AsmPreprocessor) {
+    return false;
+  } else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
     static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
         C11DisallowedInitialIDCharRanges);
     return !C11DisallowedInitialIDChars.contains(C);
@@ -1732,7 +1736,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
     if (C == '\n' || C == '\r' ||             // Newline.
         (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
-        Diag(BufferPtr, diag::ext_unterminated_string);
+        Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return true;
     }
@@ -1756,7 +1760,7 @@ bool Lexer::LexStringLiteral(Token &Result, const char *CurPtr,
 
   // If a nul character existed in the string, warn about it.
   if (NulCharacter && !isLexingRawMode())
-    Diag(NulCharacter, diag::null_in_string);
+    Diag(NulCharacter, diag::null_in_char_or_string) << 1;
 
   // Update the location of the token as well as the BufferPtr instance var.
   const char *TokStart = BufferPtr;
@@ -1872,7 +1876,7 @@ bool Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
 
   // If a nul character existed in the string, warn about it.
   if (NulCharacter && !isLexingRawMode())
-    Diag(NulCharacter, diag::null_in_string);
+    Diag(NulCharacter, diag::null_in_char_or_string) << 1;
 
   // Update the location of token as well as BufferPtr.
   const char *TokStart = BufferPtr;
@@ -1914,7 +1918,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
     if (C == '\n' || C == '\r' ||             // Newline.
         (C == 0 && CurPtr-1 == BufferEnd)) {  // End of file.
       if (!isLexingRawMode() && !LangOpts.AsmPreprocessor)
-        Diag(BufferPtr, diag::ext_unterminated_char);
+        Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
       FormTokenWithChars(Result, CurPtr-1, tok::unknown);
       return true;
     }
@@ -1938,7 +1942,7 @@ bool Lexer::LexCharConstant(Token &Result, const char *CurPtr,
 
   // If a nul character existed in the character, warn about it.
   if (NulCharacter && !isLexingRawMode())
-    Diag(NulCharacter, diag::null_in_char);
+    Diag(NulCharacter, diag::null_in_char_or_string) << 0;
 
   // Update the location of token as well as BufferPtr.
   const char *TokStart = BufferPtr;
@@ -2956,8 +2960,11 @@ LexNextToken:
       
   case 26:  // DOS & CP/M EOF: "^Z".
     // If we're in Microsoft extensions mode, treat this as end of file.
-    if (LangOpts.MicrosoftExt)
+    if (LangOpts.MicrosoftExt) {
+      if (!isLexingRawMode())
+        Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
       return LexEndOfFile(Result, CurPtr-1);
+    }
 
     // If Microsoft extensions are disabled, this is just random garbage.
     Kind = tok::unknown;
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index aed91648799b..1e7858af8948 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@@ -159,7 +159,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     // Check for overflow.
     if (Overflow && Diags)   // Too many digits to fit in
       Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
-           diag::err_hex_escape_too_large);
+           diag::err_escape_too_large) << 0;
     break;
   }
   case '0': case '1': case '2': case '3':
@@ -182,7 +182,7 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin,
     if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
       if (Diags)
         Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
-             diag::err_octal_escape_too_large);
+             diag::err_escape_too_large) << 1;
       ResultChar &= ~0U >> (32-CharWidth);
     }
     break;
@@ -538,7 +538,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
       // Done.
     } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-              diag::err_invalid_decimal_digit) << StringRef(s, 1);
+              diag::err_invalid_digit) << StringRef(s, 1) << 0;
       hadError = true;
       return;
     } else if (*s == '.') {
@@ -613,7 +613,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
           break;
 
         if (!isFPConstant) {
-          // Allow i8, i16, i32, i64, and i128.
+          // Allow i8, i16, i32, and i64.
           switch (s[1]) {
           case '8':
             s += 2; // i8 suffix
@@ -623,9 +623,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
             if (s[2] == '6') {
               s += 3; // i16 suffix
               MicrosoftInteger = 16;
-            } else if (s[2] == '2' && s[3] == '8') {
-              s += 4; // i128 suffix
-              MicrosoftInteger = 128;
             }
             break;
           case '3':
@@ -683,9 +680,8 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
 
     // Report an error if there are any.
     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
-            isFPConstant ? diag::err_invalid_suffix_float_constant :
-                           diag::err_invalid_suffix_integer_constant)
-      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
+            diag::err_invalid_suffix_constant)
+      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin) << isFPConstant;
     hadError = true;
     return;
   }
@@ -770,7 +766,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
 
     if (noSignificand) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
-        diag::err_hexconstant_requires_digits);
+        diag::err_hexconstant_requires) << 1;
       hadError = true;
       return;
     }
@@ -797,7 +793,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
         PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
     } else if (saw_period) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-              diag::err_hexconstant_requires_exponent);
+              diag::err_hexconstant_requires) << 0;
       hadError = true;
     }
     return;
@@ -821,7 +817,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
       // Done.
     } else if (isHexDigit(*s)) {
       PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-              diag::err_invalid_binary_digit) << StringRef(s, 1);
+              diag::err_invalid_digit) << StringRef(s, 1) << 2;
       hadError = true;
     }
     // Other suffixes will be diagnosed by the caller.
@@ -851,7 +847,7 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
   // the code is using an incorrect base.
   if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
     PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
-            diag::err_invalid_octal_digit) << StringRef(s, 1);
+            diag::err_invalid_digit) << StringRef(s, 1) << 1;
     hadError = true;
     return;
   }
@@ -1420,10 +1416,23 @@ void StringLiteralParser::init(ArrayRef<Token> StringToks){
       ThisTokEnd -= ThisTokBuf - Prefix;
       assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
 
-      // Copy the string over
-      if (CopyStringFragment(StringToks[i], ThisTokBegin,
-                             StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
-        hadError = true;
+      // C++14 [lex.string]p4: A source-file new-line in a raw string literal
+      // results in a new-line in the resulting execution string-literal.
+      StringRef RemainingTokenSpan(ThisTokBuf, ThisTokEnd - ThisTokBuf);
+      while (!RemainingTokenSpan.empty()) {
+        // Split the string literal on \r\n boundaries.
+        size_t CRLFPos = RemainingTokenSpan.find("\r\n");
+        StringRef BeforeCRLF = RemainingTokenSpan.substr(0, CRLFPos);
+        StringRef AfterCRLF = RemainingTokenSpan.substr(CRLFPos);
+
+        // Copy everything before the \r\n sequence into the string literal.
+        if (CopyStringFragment(StringToks[i], ThisTokBegin, BeforeCRLF))
+          hadError = true;
+
+        // Point into the \n inside the \r\n sequence and operate on the
+        // remaining portion of the literal.
+        RemainingTokenSpan = AfterCRLF.substr(1);
+      }
     } else {
       if (ThisTokBuf[0] != '"') {
         // The file may have come from PCH and then changed after loading the
diff --git a/lib/Lex/MacroInfo.cpp b/lib/Lex/MacroInfo.cpp
index 109b6c12b89b..0b4292fbeae5 100644
--- a/lib/Lex/MacroInfo.cpp
+++ b/lib/Lex/MacroInfo.cpp
@@ -154,16 +154,20 @@ void MacroInfo::dump() const {
     Out << ")";
   }
 
+  bool First = true;
   for (const Token &Tok : ReplacementTokens) {
-    Out << " ";
+    // Leading space is semantically meaningful in a macro definition,
+    // so preserve it in the dump output.
+    if (First || Tok.hasLeadingSpace())
+      Out << " ";
+    First = false;
+
     if (const char *Punc = tok::getPunctuatorSpelling(Tok.getKind()))
       Out << Punc;
-    else if (const char *Kwd = tok::getKeywordSpelling(Tok.getKind()))
-      Out << Kwd;
-    else if (Tok.is(tok::identifier))
-      Out << Tok.getIdentifierInfo()->getName();
     else if (Tok.isLiteral() && Tok.getLiteralData())
       Out << StringRef(Tok.getLiteralData(), Tok.getLength());
+    else if (auto *II = Tok.getIdentifierInfo())
+      Out << II->getName();
     else
       Out << Tok.getName();
   }
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 96d3e4b8fe65..a7524028a229 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -231,11 +231,9 @@ static bool violatesPrivateInclude(Module *RequestingModule,
     assert((!IsPrivateRole || IsPrivate) && "inconsistent headers and roles");
   }
 #endif
-  return IsPrivateRole &&
-         // FIXME: Should we map RequestingModule to its top-level module here
-         //        too? This check is redundant with the isSubModuleOf check in
-         //        diagnoseHeaderInclusion.
-         RequestedModule->getTopLevelModule() != RequestingModule;
+  return IsPrivateRole && (!RequestingModule ||
+                           RequestedModule->getTopLevelModule() !=
+                               RequestingModule->getTopLevelModule());
 }
 
 static Module *getTopLevelOrNull(Module *M) {
@@ -261,11 +259,6 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
   HeadersMap::iterator Known = findKnownHeader(File);
   if (Known != Headers.end()) {
     for (const KnownHeader &Header : Known->second) {
-      // If 'File' is part of 'RequestingModule' we can definitely include it.
-      if (Header.getModule() &&
-          Header.getModule()->isSubModuleOf(RequestingModule))
-        return;
-
       // Remember private headers for later printing of a diagnostic.
       if (violatesPrivateInclude(RequestingModule, File, Header.getRole(),
                                  Header.getModule())) {
@@ -320,6 +313,10 @@ void ModuleMap::diagnoseHeaderInclusion(Module *RequestingModule,
 
 static bool isBetterKnownHeader(const ModuleMap::KnownHeader &New,
                                 const ModuleMap::KnownHeader &Old) {
+  // Prefer available modules.
+  if (New.getModule()->isAvailable() && !Old.getModule()->isAvailable())
+    return true;
+
   // Prefer a public header over a private header.
   if ((New.getRole() & ModuleMap::PrivateHeader) !=
       (Old.getRole() & ModuleMap::PrivateHeader))
@@ -349,15 +346,19 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
       // Prefer a header from the current module over all others.
       if (H.getModule()->getTopLevelModule() == CompilingModule)
         return MakeResult(H);
-      // Cannot use a module if it is unavailable.
-      if (!H.getModule()->isAvailable())
-        continue;
       if (!Result || isBetterKnownHeader(H, Result))
         Result = H;
     }
     return MakeResult(Result);
   }
 
+  return MakeResult(findOrCreateModuleForHeaderInUmbrellaDir(File));
+}
+
+ModuleMap::KnownHeader
+ModuleMap::findOrCreateModuleForHeaderInUmbrellaDir(const FileEntry *File) {
+  assert(!Headers.count(File) && "already have a module for this header");
+
   SmallVector<const DirectoryEntry *, 2> SkippedDirs;
   KnownHeader H = findHeaderInUmbrellaDirs(File, SkippedDirs);
   if (H) {
@@ -418,19 +419,22 @@ ModuleMap::KnownHeader ModuleMap::findModuleForHeader(const FileEntry *File) {
         UmbrellaDirs[SkippedDirs[I]] = Result;
     }
 
-    Headers[File].push_back(KnownHeader(Result, NormalHeader));
-
-    // If a header corresponds to an unavailable module, don't report
-    // that it maps to anything.
-    if (!Result->isAvailable())
-      return KnownHeader();
-
-    return MakeResult(Headers[File].back());
+    KnownHeader Header(Result, NormalHeader);
+    Headers[File].push_back(Header);
+    return Header;
   }
 
   return KnownHeader();
 }
 
+ArrayRef<ModuleMap::KnownHeader>
+ModuleMap::findAllModulesForHeader(const FileEntry *File) const {
+  auto It = Headers.find(File);
+  if (It == Headers.end())
+    return None;
+  return It->second;
+}
+
 bool ModuleMap::isHeaderInUnavailableModule(const FileEntry *Header) const {
   return isHeaderUnavailableInModule(Header, nullptr);
 }
@@ -577,9 +581,18 @@ static void inferFrameworkLink(Module *Mod, const DirectoryEntry *FrameworkDir,
   SmallString<128> LibName;
   LibName += FrameworkDir->getName();
   llvm::sys::path::append(LibName, Mod->Name);
-  if (FileMgr.getFile(LibName)) {
-    Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name,
-                                                     /*IsFramework=*/true));
+
+  // The library name of a framework has more than one possible extension since
+  // the introduction of the text-based dynamic library format. We need to check
+  // for both before we give up.
+  static const char *frameworkExtensions[] = {"", ".tbd"};
+  for (const auto *extension : frameworkExtensions) {
+    llvm::sys::path::replace_extension(LibName, extension);
+    if (FileMgr.getFile(LibName)) {
+      Mod->LinkLibraries.push_back(Module::LinkLibrary(Mod->Name,
+                                                       /*IsFramework=*/true));
+      return;
+    }
   }
 }
 
@@ -785,15 +798,27 @@ static Module::HeaderKind headerRoleToKind(ModuleMap::ModuleHeaderRole Role) {
 }
 
 void ModuleMap::addHeader(Module *Mod, Module::Header Header,
-                          ModuleHeaderRole Role) {
-  if (!(Role & TextualHeader)) {
-    bool isCompilingModuleHeader = Mod->getTopLevelModule() == CompilingModule;
+                          ModuleHeaderRole Role, bool Imported) {
+  KnownHeader KH(Mod, Role);
+
+  // Only add each header to the headers list once.
+  // FIXME: Should we diagnose if a header is listed twice in the
+  // same module definition?
+  auto &HeaderList = Headers[Header.Entry];
+  for (auto H : HeaderList)
+    if (H == KH)
+      return;
+
+  HeaderList.push_back(KH);
+  Mod->Headers[headerRoleToKind(Role)].push_back(std::move(Header));
+
+  bool isCompilingModuleHeader = Mod->getTopLevelModule() == CompilingModule;
+  if (!Imported || isCompilingModuleHeader) {
+    // When we import HeaderFileInfo, the external source is expected to
+    // set the isModuleHeader flag itself.
     HeaderInfo.MarkFileModuleHeader(Header.Entry, Role,
                                     isCompilingModuleHeader);
   }
-  Headers[Header.Entry].push_back(KnownHeader(Mod, Role));
-
-  Mod->Headers[headerRoleToKind(Role)].push_back(std::move(Header));
 }
 
 void ModuleMap::excludeHeader(Module *Mod, Module::Header Header) {
@@ -1015,7 +1040,17 @@ namespace clang {
     
     /// \brief The active module.
     Module *ActiveModule;
-    
+
+    /// \brief Whether a module uses the 'requires excluded' hack to mark its
+    /// contents as 'textual'.
+    ///
+    /// On older Darwin SDK versions, 'requires excluded' is used to mark the
+    /// contents of the Darwin.C.excluded (assert.h) and Tcl.Private modules as
+    /// non-modular headers.  For backwards compatibility, we continue to
+    /// support this idiom for just these modules, and map the headers to
+    /// 'textual' to match the original intent.
+    llvm::SmallPtrSet<Module *, 2> UsesRequiresExcludedHack;
+
     /// \brief Consume the current token and return its location.
     SourceLocation consumeToken();
     
@@ -1570,6 +1605,38 @@ void ModuleMapParser::parseExternModuleDecl() {
             : File->getDir(), ExternLoc);
 }
 
+/// Whether to add the requirement \p Feature to the module \p M.
+///
+/// This preserves backwards compatibility for two hacks in the Darwin system
+/// module map files:
+///
+/// 1. The use of 'requires excluded' to make headers non-modular, which
+///    should really be mapped to 'textual' now that we have this feature.  We
+///    drop the 'excluded' requirement, and set \p IsRequiresExcludedHack to
+///    true.  Later, this bit will be used to map all the headers inside this
+///    module to 'textual'.
+///
+///    This affects Darwin.C.excluded (for assert.h) and Tcl.Private.
+///
+/// 2. Removes a bogus cplusplus requirement from IOKit.avc.  This requirement
+///    was never correct and causes issues now that we check it, so drop it.
+static bool shouldAddRequirement(Module *M, StringRef Feature,
+                                 bool &IsRequiresExcludedHack) {
+  static const StringRef DarwinCExcluded[] = {"Darwin", "C", "excluded"};
+  static const StringRef TclPrivate[] = {"Tcl", "Private"};
+  static const StringRef IOKitAVC[] = {"IOKit", "avc"};
+
+  if (Feature == "excluded" && (M->fullModuleNameIs(DarwinCExcluded) ||
+                                M->fullModuleNameIs(TclPrivate))) {
+    IsRequiresExcludedHack = true;
+    return false;
+  } else if (Feature == "cplusplus" && M->fullModuleNameIs(IOKitAVC)) {
+    return false;
+  }
+
+  return true;
+}
+
 /// \brief Parse a requires declaration.
 ///
 ///   requires-declaration:
@@ -1605,9 +1672,18 @@ void ModuleMapParser::parseRequiresDecl() {
     std::string Feature = Tok.getString();
     consumeToken();
 
-    // Add this feature.
-    ActiveModule->addRequirement(Feature, RequiredState,
-                                 Map.LangOpts, *Map.Target);
+    bool IsRequiresExcludedHack = false;
+    bool ShouldAddRequirement =
+        shouldAddRequirement(ActiveModule, Feature, IsRequiresExcludedHack);
+
+    if (IsRequiresExcludedHack)
+      UsesRequiresExcludedHack.insert(ActiveModule);
+
+    if (ShouldAddRequirement) {
+      // Add this feature.
+      ActiveModule->addRequirement(Feature, RequiredState, Map.LangOpts,
+                                   *Map.Target);
+    }
 
     if (!Tok.is(MMToken::Comma))
       break;
@@ -1657,9 +1733,16 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
       consumeToken();
     }
   }
+
   if (LeadingToken == MMToken::TextualKeyword)
     Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader);
 
+  if (UsesRequiresExcludedHack.count(ActiveModule)) {
+    // Mark this header 'textual' (see doc comment for
+    // Module::UsesRequiresExcludedHack).
+    Role = ModuleMap::ModuleHeaderRole(Role | ModuleMap::TextualHeader);
+  }
+
   if (LeadingToken != MMToken::HeaderKeyword) {
     if (!Tok.is(MMToken::HeaderKeyword)) {
       Diags.Report(Tok.getLocation(), diag::err_mmap_expected_header)
@@ -1797,6 +1880,11 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken,
   }
 }
 
+static int compareModuleHeaders(const Module::Header *A,
+                                const Module::Header *B) {
+  return A->NameAsWritten.compare(B->NameAsWritten);
+}
+
 /// \brief Parse an umbrella directory declaration.
 ///
 ///   umbrella-dir-declaration:
@@ -1838,14 +1926,38 @@ void ModuleMapParser::parseUmbrellaDirDecl(SourceLocation UmbrellaLoc) {
     HadError = true;
     return;
   }
-  
+
+  if (UsesRequiresExcludedHack.count(ActiveModule)) {
+    // Mark this header 'textual' (see doc comment for
+    // ModuleMapParser::UsesRequiresExcludedHack). Although iterating over the
+    // directory is relatively expensive, in practice this only applies to the
+    // uncommonly used Tcl module on Darwin platforms.
+    std::error_code EC;
+    SmallVector<Module::Header, 6> Headers;
+    for (llvm::sys::fs::recursive_directory_iterator I(Dir->getName(), EC), E;
+         I != E && !EC; I.increment(EC)) {
+      if (const FileEntry *FE = SourceMgr.getFileManager().getFile(I->path())) {
+
+        Module::Header Header = {I->path(), FE};
+        Headers.push_back(std::move(Header));
+      }
+    }
+
+    // Sort header paths so that the pcm doesn't depend on iteration order.
+    llvm::array_pod_sort(Headers.begin(), Headers.end(), compareModuleHeaders);
+
+    for (auto &Header : Headers)
+      Map.addHeader(ActiveModule, std::move(Header), ModuleMap::TextualHeader);
+    return;
+  }
+
   if (Module *OwningModule = Map.UmbrellaDirs[Dir]) {
     Diags.Report(UmbrellaLoc, diag::err_mmap_umbrella_clash)
       << OwningModule->getFullModuleName();
     HadError = true;
     return;
-  } 
-  
+  }
+
   // Record this umbrella directory.
   Map.setUmbrellaDir(ActiveModule, Dir, DirName);
 }
@@ -2335,9 +2447,14 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem,
 
   // Parse this module map file.
   Lexer L(ID, SourceMgr.getBuffer(ID), SourceMgr, MMapLangOpts);
+  SourceLocation Start = L.getSourceLocation();
   ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir,
                          BuiltinIncludeDir, IsSystem);
   bool Result = Parser.parseModuleMapFile();
   ParsedModuleMap[File] = Result;
+
+  // Notify callbacks that we parsed it.
+  for (const auto &Cb : Callbacks)
+    Cb->moduleMapFileRead(Start, *File, IsSystem);
   return Result;
 }
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index ce64538de41b..c02a0cb8d302 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -611,6 +611,8 @@ const FileEntry *Preprocessor::LookupFile(
     SmallVectorImpl<char> *RelativePath,
     ModuleMap::KnownHeader *SuggestedModule,
     bool SkipCache) {
+  Module *RequestingModule = getModuleForLocation(FilenameLoc); 
+
   // If the header lookup mechanism may be relative to the current inclusion
   // stack, record the parent #includes.
   SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 16>
@@ -648,8 +650,7 @@ const FileEntry *Preprocessor::LookupFile(
       for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
         IncludeStackInfo &ISEntry = IncludeMacroStack[e - i - 1];
         if (IsFileLexer(ISEntry))
-          if ((FileEnt = SourceMgr.getFileEntryForID(
-                   ISEntry.ThePPLexer->getFileID())))
+          if ((FileEnt = ISEntry.ThePPLexer->getFileEntry()))
             Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
       }
     }
@@ -664,8 +665,8 @@ const FileEntry *Preprocessor::LookupFile(
     const DirectoryLookup *TmpFromDir = nullptr;
     while (const FileEntry *FE = HeaderInfo.LookupFile(
                Filename, FilenameLoc, isAngled, TmpFromDir, TmpCurDir,
-               Includers, SearchPath, RelativePath, SuggestedModule,
-               SkipCache)) {
+               Includers, SearchPath, RelativePath, RequestingModule,
+               SuggestedModule, SkipCache)) {
       // Keep looking as if this file did a #include_next.
       TmpFromDir = TmpCurDir;
       ++TmpFromDir;
@@ -681,11 +682,11 @@ const FileEntry *Preprocessor::LookupFile(
   // Do a standard file entry lookup.
   const FileEntry *FE = HeaderInfo.LookupFile(
       Filename, FilenameLoc, isAngled, FromDir, CurDir, Includers, SearchPath,
-      RelativePath, SuggestedModule, SkipCache);
+      RelativePath, RequestingModule, SuggestedModule, SkipCache);
   if (FE) {
     if (SuggestedModule && !LangOpts.AsmPreprocessor)
       HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-          getModuleForLocation(FilenameLoc), FilenameLoc, Filename, FE);
+          RequestingModule, FilenameLoc, Filename, FE);
     return FE;
   }
 
@@ -694,13 +695,14 @@ const FileEntry *Preprocessor::LookupFile(
   // to one of the headers on the #include stack.  Walk the list of the current
   // headers on the #include stack and pass them to HeaderInfo.
   if (IsFileLexer()) {
-    if ((CurFileEnt = SourceMgr.getFileEntryForID(CurPPLexer->getFileID()))) {
+    if ((CurFileEnt = CurPPLexer->getFileEntry())) {
       if ((FE = HeaderInfo.LookupSubframeworkHeader(Filename, CurFileEnt,
                                                     SearchPath, RelativePath,
+                                                    RequestingModule,
                                                     SuggestedModule))) {
         if (SuggestedModule && !LangOpts.AsmPreprocessor)
           HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-              getModuleForLocation(FilenameLoc), FilenameLoc, Filename, FE);
+              RequestingModule, FilenameLoc, Filename, FE);
         return FE;
       }
     }
@@ -709,14 +711,13 @@ const FileEntry *Preprocessor::LookupFile(
   for (unsigned i = 0, e = IncludeMacroStack.size(); i != e; ++i) {
     IncludeStackInfo &ISEntry = IncludeMacroStack[e-i-1];
     if (IsFileLexer(ISEntry)) {
-      if ((CurFileEnt =
-           SourceMgr.getFileEntryForID(ISEntry.ThePPLexer->getFileID()))) {
+      if ((CurFileEnt = ISEntry.ThePPLexer->getFileEntry())) {
         if ((FE = HeaderInfo.LookupSubframeworkHeader(
                 Filename, CurFileEnt, SearchPath, RelativePath,
-                SuggestedModule))) {
+                RequestingModule, SuggestedModule))) {
           if (SuggestedModule && !LangOpts.AsmPreprocessor)
             HeaderInfo.getModuleMap().diagnoseHeaderInclusion(
-                getModuleForLocation(FilenameLoc), FilenameLoc, Filename, FE);
+                RequestingModule, FilenameLoc, Filename, FE);
           return FE;
         }
       }
@@ -1674,6 +1675,29 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
           getLangOpts().CurrentModule &&
       SuggestedModule.getModule()->getTopLevelModuleName() !=
           getLangOpts().ImplementationOfModule) {
+
+    // If this include corresponds to a module but that module is
+    // unavailable, diagnose the situation and bail out.
+    if (!SuggestedModule.getModule()->isAvailable()) {
+      clang::Module::Requirement Requirement;
+      clang::Module::UnresolvedHeaderDirective MissingHeader;
+      Module *M = SuggestedModule.getModule();
+      // Identify the cause.
+      (void)M->isAvailable(getLangOpts(), getTargetInfo(), Requirement,
+                           MissingHeader);
+      if (MissingHeader.FileNameLoc.isValid()) {
+        Diag(MissingHeader.FileNameLoc, diag::err_module_header_missing)
+            << MissingHeader.IsUmbrella << MissingHeader.FileName;
+      } else {
+        Diag(M->DefinitionLoc, diag::err_module_unavailable)
+            << M->getFullModuleName() << Requirement.second << Requirement.first;
+      }
+      Diag(FilenameTok.getLocation(),
+           diag::note_implicit_top_level_module_import_here)
+          << M->getTopLevelModuleName();
+      return;
+    }
+
     // Compute the module access path corresponding to this module.
     // FIXME: Should we have a second loadModule() overload to avoid this
     // extra lookup step?
@@ -1776,7 +1800,7 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
   if (IncludePos.isMacroID())
     IncludePos = SourceMgr.getExpansionRange(IncludePos).second;
   FileID FID = SourceMgr.createFileID(File, IncludePos, FileCharacter);
-  assert(!FID.isInvalid() && "Expected valid file ID");
+  assert(FID.isValid() && "Expected valid file ID");
 
   // If all is good, enter the new file!
   if (EnterSourceFile(FID, CurDir, FilenameTok.getLocation()))
@@ -1925,7 +1949,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
       // Add the __VA_ARGS__ identifier as an argument.
       Arguments.push_back(Ident__VA_ARGS__);
       MI->setIsC99Varargs();
-      MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+      MI->setArgumentList(Arguments, BP);
       return false;
     case tok::eod:  // #define X(
       Diag(Tok, diag::err_pp_missing_rparen_in_macro_def);
@@ -1959,7 +1983,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
         Diag(Tok, diag::err_pp_expected_comma_in_arg_list);
         return true;
       case tok::r_paren: // #define X(A)
-        MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+        MI->setArgumentList(Arguments, BP);
         return false;
       case tok::comma:  // #define X(A,
         break;
@@ -1975,7 +1999,7 @@ bool Preprocessor::ReadMacroDefinitionArgList(MacroInfo *MI, Token &Tok) {
         }
 
         MI->setIsGNUVarargs();
-        MI->setArgumentList(&Arguments[0], Arguments.size(), BP);
+        MI->setArgumentList(Arguments, BP);
         return false;
       }
     }
@@ -2019,13 +2043,9 @@ static bool isConfigurationPattern(Token &MacroName, MacroInfo *MI,
   }
 
   // #define inline
-  if (MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
-                        tok::kw_const) &&
-      MI->getNumTokens() == 0) {
-    return true;
-  }
-
-  return false;
+  return MacroName.isOneOf(tok::kw_extern, tok::kw_inline, tok::kw_static,
+                           tok::kw_const) &&
+         MI->getNumTokens() == 0;
 }
 
 /// HandleDefineDirective - Implements \#define.  This consumes the entire macro
@@ -2240,6 +2260,30 @@ void Preprocessor::HandleDefineDirective(Token &DefineTok,
   // Finally, if this identifier already had a macro defined for it, verify that
   // the macro bodies are identical, and issue diagnostics if they are not.
   if (const MacroInfo *OtherMI=getMacroInfo(MacroNameTok.getIdentifierInfo())) {
+    // In Objective-C, ignore attempts to directly redefine the builtin
+    // definitions of the ownership qualifiers.  It's still possible to
+    // #undef them.
+    auto isObjCProtectedMacro = [](const IdentifierInfo *II) -> bool {
+      return II->isStr("__strong") ||
+             II->isStr("__weak") ||
+             II->isStr("__unsafe_unretained") ||
+             II->isStr("__autoreleasing");
+    };
+   if (getLangOpts().ObjC1 &&
+        SourceMgr.getFileID(OtherMI->getDefinitionLoc())
+          == getPredefinesFileID() &&
+        isObjCProtectedMacro(MacroNameTok.getIdentifierInfo())) {
+      // Warn if it changes the tokens.
+      if ((!getDiagnostics().getSuppressSystemWarnings() ||
+           !SourceMgr.isInSystemHeader(DefineTok.getLocation())) &&
+          !MI->isIdenticalTo(*OtherMI, *this,
+                             /*Syntactic=*/LangOpts.MicrosoftExt)) {
+        Diag(MI->getDefinitionLoc(), diag::warn_pp_objc_macro_redef_ignored);
+      }
+      assert(!OtherMI->isWarnIfUnused());
+      return;
+    }
+
     // It is very common for system headers to have tons of macro redefinitions
     // and for warnings to be disabled in system headers.  If this is the case,
     // then don't bother calling MacroInfo::isIdenticalTo.
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index 44513023395d..c40598c06756 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -42,7 +42,7 @@ public:
   unsigned getBitWidth() const { return Val.getBitWidth(); }
   bool isUnsigned() const { return Val.isUnsigned(); }
 
-  const SourceRange &getRange() const { return Range; }
+  SourceRange getRange() const { return Range; }
 
   void setRange(SourceLocation L) { Range.setBegin(L); Range.setEnd(L); }
   void setRange(SourceLocation B, SourceLocation E) {
@@ -549,12 +549,12 @@ static bool EvaluateDirectiveSubExpr(PPValue &LHS, unsigned MinPrec,
       // value was negative, warn about it.
       if (ValueLive && Res.isUnsigned()) {
         if (!LHS.isUnsigned() && LHS.Val.isNegative())
-          PP.Diag(OpLoc, diag::warn_pp_convert_lhs_to_positive)
+          PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 0
             << LHS.Val.toString(10, true) + " to " +
                LHS.Val.toString(10, false)
             << LHS.getRange() << RHS.getRange();
         if (!RHS.isUnsigned() && RHS.Val.isNegative())
-          PP.Diag(OpLoc, diag::warn_pp_convert_rhs_to_positive)
+          PP.Diag(OpLoc, diag::warn_pp_convert_to_positive) << 1
             << RHS.Val.toString(10, true) + " to " +
                RHS.Val.toString(10, false)
             << LHS.getRange() << RHS.getRange();
diff --git a/lib/Lex/PPLexerChange.cpp b/lib/Lex/PPLexerChange.cpp
index c231e18eecc3..2f09841c5b5d 100644
--- a/lib/Lex/PPLexerChange.cpp
+++ b/lib/Lex/PPLexerChange.cpp
@@ -18,6 +18,7 @@
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/MacroInfo.h"
+#include "clang/Lex/PTHManager.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -120,7 +121,7 @@ void Preprocessor::EnterSourceFileWithLexer(Lexer *TheLexer,
   CurSubmodule = nullptr;
   if (CurLexerKind != CLK_LexAfterModuleImport)
     CurLexerKind = CLK_Lexer;
-  
+
   // Notify the client, if desired, that we are in a new source file.
   if (Callbacks && !CurLexer->Is_PragmaLexer) {
     SrcMgr::CharacteristicKind FileType =
@@ -300,8 +301,7 @@ bool Preprocessor::HandleEndOfFile(Token &Result, bool isEndOfMacro) {
     if (const IdentifierInfo *ControllingMacro =
           CurPPLexer->MIOpt.GetControllingMacroAtEndOfFile()) {
       // Okay, this has a controlling macro, remember in HeaderFileInfo.
-      if (const FileEntry *FE =
-            SourceMgr.getFileEntryForID(CurPPLexer->getFileID())) {
+      if (const FileEntry *FE = CurPPLexer->getFileEntry()) {
         HeaderInfo.SetFileControllingMacro(FE, ControllingMacro);
         if (MacroInfo *MI =
               getMacroInfo(const_cast<IdentifierInfo*>(ControllingMacro))) {
@@ -561,7 +561,6 @@ void Preprocessor::RemoveTopOfLexerStack() {
 void Preprocessor::HandleMicrosoftCommentPaste(Token &Tok) {
   assert(CurTokenLexer && !CurPPLexer &&
          "Pasted comment can only be formed from macro");
-
   // We handle this by scanning for the closest real lexer, switching it to
   // raw mode and preprocessor mode.  This will cause it to return \n as an
   // explicit EOD token.
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 64ce8c918258..18348df0a39e 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -145,8 +145,12 @@ void Preprocessor::updateModuleMacroInfo(const IdentifierInfo *II,
     NumHiddenOverrides[O] = -1;
 
   // Collect all macros that are not overridden by a visible macro.
-  llvm::SmallVector<ModuleMacro *, 16> Worklist(Leaf->second.begin(),
-                                                Leaf->second.end());
+  llvm::SmallVector<ModuleMacro *, 16> Worklist;
+  for (auto *LeafMM : Leaf->second) {
+    assert(LeafMM->getNumOverridingMacros() == 0 && "leaf macro overridden");
+    if (NumHiddenOverrides.lookup(LeafMM) == 0)
+      Worklist.push_back(LeafMM);
+  }
   while (!Worklist.empty()) {
     auto *MM = Worklist.pop_back_val();
     if (CurSubmoduleState->VisibleModules.isVisible(MM->getOwningModule())) {
@@ -593,9 +597,7 @@ static bool CheckMatchedBrackets(const SmallVectorImpl<Token> &Tokens) {
       Brackets.pop_back();
     }
   }
-  if (!Brackets.empty())
-    return false;
-  return true;
+  return Brackets.empty();
 }
 
 /// GenerateNewArgTokens - Returns true if OldTokens can be converted to a new
@@ -867,7 +869,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
         DiagnosticBuilder DB =
             Diag(MacroName,
                  diag::note_init_list_at_beginning_of_macro_argument);
-        for (const SourceRange &Range : InitLists)
+        for (SourceRange Range : InitLists)
           DB << Range;
       }
       return nullptr;
@@ -876,7 +878,7 @@ MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
       return nullptr;
 
     DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro);
-    for (const SourceRange &ParenLocation : ParenHints) {
+    for (SourceRange ParenLocation : ParenHints) {
       DB << FixItHint::CreateInsertion(ParenLocation.getBegin(), "(");
       DB << FixItHint::CreateInsertion(ParenLocation.getEnd(), ")");
     }
@@ -1057,6 +1059,9 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("attribute_availability", true)
       .Case("attribute_availability_with_message", true)
       .Case("attribute_availability_app_extension", true)
+      .Case("attribute_availability_with_version_underscores", true)
+      .Case("attribute_availability_tvos", true)
+      .Case("attribute_availability_watchos", true)
       .Case("attribute_cf_returns_not_retained", true)
       .Case("attribute_cf_returns_retained", true)
       .Case("attribute_cf_returns_on_parameters", true)
@@ -1075,7 +1080,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       .Case("blocks", LangOpts.Blocks)
       .Case("c_thread_safety_attributes", true)
       .Case("cxx_exceptions", LangOpts.CXXExceptions)
-      .Case("cxx_rtti", LangOpts.RTTI)
+      .Case("cxx_rtti", LangOpts.RTTI && LangOpts.RTTIData)
       .Case("enumerator_attributes", true)
       .Case("nullability", true)
       .Case("memory_sanitizer", LangOpts.Sanitize.has(SanitizerKind::Memory))
@@ -1084,7 +1089,7 @@ static bool HasFeature(const Preprocessor &PP, const IdentifierInfo *II) {
       // Objective-C features
       .Case("objc_arr", LangOpts.ObjCAutoRefCount) // FIXME: REMOVE?
       .Case("objc_arc", LangOpts.ObjCAutoRefCount)
-      .Case("objc_arc_weak", LangOpts.ObjCARCWeak)
+      .Case("objc_arc_weak", LangOpts.ObjCWeak)
       .Case("objc_default_synthesize_properties", LangOpts.ObjC2)
       .Case("objc_fixed_enum", LangOpts.ObjC2)
       .Case("objc_instancetype", LangOpts.ObjC2)
@@ -1629,16 +1634,23 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
       Value = FeatureII->getTokenID() == tok::identifier;
     else if (II == Ident__has_builtin) {
       // Check for a builtin is trivial.
-      Value = FeatureII->getBuiltinID() != 0;
+      if (FeatureII->getBuiltinID() != 0) {
+        Value = true;
+      } else {
+        StringRef Feature = FeatureII->getName();
+        Value = llvm::StringSwitch<bool>(Feature)
+                    .Case("__make_integer_seq", getLangOpts().CPlusPlus)
+                    .Default(false);
+      }
     } else if (II == Ident__has_attribute)
       Value = hasAttribute(AttrSyntax::GNU, nullptr, FeatureII,
-                           getTargetInfo().getTriple(), getLangOpts());
+                           getTargetInfo(), getLangOpts());
     else if (II == Ident__has_cpp_attribute)
       Value = hasAttribute(AttrSyntax::CXX, ScopeII, FeatureII,
-                           getTargetInfo().getTriple(), getLangOpts());
+                           getTargetInfo(), getLangOpts());
     else if (II == Ident__has_declspec)
       Value = hasAttribute(AttrSyntax::Declspec, nullptr, FeatureII,
-                           getTargetInfo().getTriple(), getLangOpts());
+                           getTargetInfo(), getLangOpts());
     else if (II == Ident__has_extension)
       Value = HasExtension(*this, FeatureII);
     else {
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 5eb665549e86..3134790ccb90 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -38,7 +38,7 @@ PragmaHandler::~PragmaHandler() {
 // EmptyPragmaHandler Implementation.
 //===----------------------------------------------------------------------===//
 
-EmptyPragmaHandler::EmptyPragmaHandler() {}
+EmptyPragmaHandler::EmptyPragmaHandler(StringRef Name) : PragmaHandler(Name) {}
 
 void EmptyPragmaHandler::HandlePragma(Preprocessor &PP, 
                                       PragmaIntroducerKind Introducer,
@@ -191,9 +191,13 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
   Lex(Tok);
   if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    // Skip this token, and the ')', if present.
+    // Skip bad tokens, and the ')', if present.
     if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::eof))
       Lex(Tok);
+    while (Tok.isNot(tok::r_paren) &&
+           !Tok.isAtStartOfLine() &&
+           Tok.isNot(tok::eof))
+      Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
     return _PragmaLexing.failed();
diff --git a/lib/Lex/PreprocessingRecord.cpp b/lib/Lex/PreprocessingRecord.cpp
index a423041a2d95..32e6de69f0db 100644
--- a/lib/Lex/PreprocessingRecord.cpp
+++ b/lib/Lex/PreprocessingRecord.cpp
@@ -66,7 +66,7 @@ PreprocessingRecord::getPreprocessedEntitiesInRange(SourceRange Range) {
 
 static bool isPreprocessedEntityIfInFileID(PreprocessedEntity *PPE, FileID FID,
                                            SourceManager &SM) {
-  assert(!FID.isInvalid());
+  assert(FID.isValid());
   if (!PPE)
     return false;
 
diff --git a/lib/Lex/Preprocessor.cpp b/lib/Lex/Preprocessor.cpp
index e2db638a33d0..142d9ce09049 100644
--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@@ -38,6 +38,7 @@
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/MacroInfo.h"
 #include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/PTHManager.h"
 #include "clang/Lex/Pragma.h"
 #include "clang/Lex/PreprocessingRecord.h"
 #include "clang/Lex/PreprocessorOptions.h"
@@ -62,20 +63,19 @@ Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
                            TranslationUnitKind TUKind)
     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
-      FileMgr(Headers.getFileMgr()), SourceMgr(SM),
-      ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
+      AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
+      ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
       Identifiers(opts, IILookup),
       PragmaHandlers(new PragmaNamespace(StringRef())),
-      IncrementalProcessing(false), TUKind(TUKind),
-      CodeComplete(nullptr), CodeCompletionFile(nullptr),
-      CodeCompletionOffset(0), LastTokenWasAt(false),
-      ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
-      MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
-      CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
-      Callbacks(nullptr), CurSubmoduleState(&NullSubmoduleState),
-      MacroArgCache(nullptr), Record(nullptr),
-      MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
+      IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
+      CodeCompletionFile(nullptr), CodeCompletionOffset(0),
+      LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
+      CodeCompletionReached(0), MainFileDir(nullptr),
+      SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
+      CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
+      CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
+      Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
   OwnsHeaderSearch = OwnsHeaders;
   
   CounterValue = 0; // __COUNTER__ starts at 0.
@@ -170,13 +170,18 @@ Preprocessor::~Preprocessor() {
     delete &HeaderInfo;
 }
 
-void Preprocessor::Initialize(const TargetInfo &Target) {
+void Preprocessor::Initialize(const TargetInfo &Target,
+                              const TargetInfo *AuxTarget) {
   assert((!this->Target || this->Target == &Target) &&
          "Invalid override of target information");
   this->Target = &Target;
-  
+
+  assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
+         "Invalid override of aux target information.");
+  this->AuxTarget = AuxTarget;
+
   // Initialize information about built-ins.
-  BuiltinInfo.InitializeTarget(Target);
+  BuiltinInfo.InitializeTarget(Target, AuxTarget);
   HeaderInfo.setTarget(Target);
 }
 
@@ -515,7 +520,7 @@ void Preprocessor::EnterMainSourceFile() {
     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
   assert(SB && "Cannot create predefined source buffer");
   FileID FID = SourceMgr.createFileID(std::move(SB));
-  assert(!FID.isInvalid() && "Could not create FileID for predefines?");
+  assert(FID.isValid() && "Could not create FileID for predefines?");
   setPredefinesFileID(FID);
 
   // Start parsing the predefines.
@@ -712,7 +717,7 @@ bool Preprocessor::HandleIdentifier(Token &Identifier) {
 }
 
 void Preprocessor::Lex(Token &Result) {
-  // We loop here until a lex function retuns a token; this avoids recursion.
+  // We loop here until a lex function returns a token; this avoids recursion.
   bool ReturnedToken;
   do {
     switch (CurLexerKind) {
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp
index e7512fa2831a..c42966928e52 100644
--- a/lib/Lex/TokenLexer.cpp
+++ b/lib/Lex/TokenLexer.cpp
@@ -624,21 +624,22 @@ bool TokenLexer::PasteTokens(Token &Tok) {
       // error.  This occurs with "x ## +"  and other stuff.  Return with Tok
       // unmodified and with RHS as the next token to lex.
       if (isInvalid) {
+        // Explicitly convert the token location to have proper expansion
+        // information so that the user knows where it came from.
+        SourceManager &SM = PP.getSourceManager();
+        SourceLocation Loc =
+          SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
+
         // Test for the Microsoft extension of /##/ turning into // here on the
         // error path.
         if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) &&
             RHS.is(tok::slash)) {
-          HandleMicrosoftCommentPaste(Tok);
+          HandleMicrosoftCommentPaste(Tok, Loc);
           return true;
         }
 
         // Do not emit the error when preprocessing assembler code.
         if (!PP.getLangOpts().AsmPreprocessor) {
-          // Explicitly convert the token location to have proper expansion
-          // information so that the user knows where it came from.
-          SourceManager &SM = PP.getSourceManager();
-          SourceLocation Loc =
-            SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
           // If we're in microsoft extensions mode, downgrade this from a hard
           // error to an extension that defaults to an error.  This allows
           // disabling it.
@@ -719,7 +720,9 @@ bool TokenLexer::isParsingPreprocessorDirective() const {
 /// macro, other active macros, and anything left on the current physical
 /// source line of the expanded buffer.  Handle this by returning the
 /// first token on the next line.
-void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
+void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) {
+  PP.Diag(OpLoc, diag::ext_comment_paste_microsoft);
+
   // We 'comment out' the rest of this macro by just ignoring the rest of the
   // tokens that have not been lexed yet, if any.
 
diff --git a/lib/Parse/ParseAST.cpp b/lib/Parse/ParseAST.cpp
index 6727afc1dd6c..ccf947984945 100644
--- a/lib/Parse/ParseAST.cpp
+++ b/lib/Parse/ParseAST.cpp
@@ -30,6 +30,21 @@ using namespace clang;
 
 namespace {
 
+/// Resets LLVM's pretty stack state so that stack traces are printed correctly
+/// when there are nested CrashRecoveryContexts and the inner one recovers from
+/// a crash.
+class ResetStackCleanup
+    : public llvm::CrashRecoveryContextCleanupBase<ResetStackCleanup,
+                                                   const void> {
+public:
+  ResetStackCleanup(llvm::CrashRecoveryContext *Context, const void *Top)
+      : llvm::CrashRecoveryContextCleanupBase<ResetStackCleanup, const void>(
+            Context, Top) {}
+  void recoverResources() override {
+    llvm::RestorePrettyStackState(resource);
+  }
+};
+
 /// If a crash happens while the parser is active, an entry is printed for it.
 class PrettyStackTraceParserEntry : public llvm::PrettyStackTraceEntry {
   const Parser &P;
@@ -113,6 +128,8 @@ void clang::ParseAST(Sema &S, bool PrintStats, bool SkipFunctionBodies) {
       new Parser(S.getPreprocessor(), S, SkipFunctionBodies));
   Parser &P = *ParseOP.get();
 
+  llvm::CrashRecoveryContextCleanupRegistrar<const void, ResetStackCleanup>
+      CleanupPrettyStack(llvm::SavePrettyStackState());
   PrettyStackTraceParserEntry CrashInfo(P);
 
   // Recover resources if we crash before exiting this method.
diff --git a/lib/Parse/ParseCXXInlineMethods.cpp b/lib/Parse/ParseCXXInlineMethods.cpp
index ab1f97d31a69..e536644d5bf6 100644
--- a/lib/Parse/ParseCXXInlineMethods.cpp
+++ b/lib/Parse/ParseCXXInlineMethods.cpp
@@ -67,8 +67,9 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
     SourceLocation KWEndLoc = Tok.getEndLoc().getLocWithOffset(-1);
     if (TryConsumeToken(tok::kw_delete, KWLoc)) {
       Diag(KWLoc, getLangOpts().CPlusPlus11
-                      ? diag::warn_cxx98_compat_deleted_function
-                      : diag::ext_deleted_function);
+                      ? diag::warn_cxx98_compat_defaulted_deleted_function
+                      : diag::ext_defaulted_deleted_function)
+        << 1 /* deleted */;
       Actions.SetDeclDeleted(FnD, KWLoc);
       Delete = true;
       if (auto *DeclAsFunction = dyn_cast<FunctionDecl>(FnD)) {
@@ -76,8 +77,9 @@ NamedDecl *Parser::ParseCXXInlineMethodDef(AccessSpecifier AS,
       }
     } else if (TryConsumeToken(tok::kw_default, KWLoc)) {
       Diag(KWLoc, getLangOpts().CPlusPlus11
-                      ? diag::warn_cxx98_compat_defaulted_function
-                      : diag::ext_defaulted_function);
+                      ? diag::warn_cxx98_compat_defaulted_deleted_function
+                      : diag::ext_defaulted_deleted_function)
+        << 0 /* defaulted */;
       Actions.SetDeclDefaulted(FnD, KWLoc);
       if (auto *DeclAsFunction = dyn_cast<FunctionDecl>(FnD)) {
         DeclAsFunction->setRangeEnd(KWEndLoc);
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 45878b9b1508..e69bb2745c43 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -1,4 +1,4 @@
-//===--- ParseDecl.cpp - Declaration Parsing ------------------------------===//
+//===--- ParseDecl.cpp - Declaration Parsing --------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -24,9 +24,11 @@
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/PrettyDeclStackTrace.h"
 #include "clang/Sema/Scope.h"
+#include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -67,7 +69,6 @@ TypeResult Parser::ParseTypeName(SourceRange *Range,
   return Actions.ActOnTypeName(getCurScope(), DeclaratorInfo);
 }
 
-
 /// isAttributeLateParsed - Return true if the attribute has arguments that
 /// require late parsing.
 static bool isAttributeLateParsed(const IdentifierInfo &II) {
@@ -387,7 +388,7 @@ bool Parser::ParseMicrosoftDeclSpecArgs(IdentifierInfo *AttrName,
   // If the attribute isn't known, we will not attempt to parse any
   // arguments.
   if (!hasAttribute(AttrSyntax::Declspec, nullptr, AttrName,
-                    getTargetInfo().getTriple(), getLangOpts())) {
+                    getTargetInfo(), getLangOpts())) {
     // Eat the left paren, then skip to the ending right paren.
     ConsumeParen();
     SkipUntil(tok::r_paren);
@@ -532,9 +533,7 @@ bool Parser::ParseMicrosoftDeclSpecArgs(IdentifierInfo *AttrName,
 ///             extended-decl-modifier extended-decl-modifier-seq
 void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs,
                                      SourceLocation *End) {
-  assert((getLangOpts().MicrosoftExt || getLangOpts().Borland ||
-          getLangOpts().CUDA) &&
-         "Incorrect language options for parsing __declspec");
+  assert(getLangOpts().DeclSpecKeyword && "__declspec keyword is not enabled");
   assert(Tok.is(tok::kw___declspec) && "Not a declspec!");
 
   while (Tok.is(tok::kw___declspec)) {
@@ -1155,7 +1154,6 @@ void Parser::ParseLexedAttributes(ParsingClass &Class) {
                                                  Class.TagOrTemplate);
 }
 
-
 /// \brief Parse all attributes in LAs, and attach them to Decl D.
 void Parser::ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D,
                                      bool EnterScope, bool OnDefinition) {
@@ -1170,7 +1168,6 @@ void Parser::ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D,
   LAs.clear();
 }
 
-
 /// \brief Finish parsing an attribute for which parsing was delayed.
 /// This will be called at the end of parsing a class declaration
 /// for each LateParsedAttribute. We consume the saved tokens and
@@ -1468,15 +1465,13 @@ Parser::DeclGroupPtrTy Parser::ParseDeclaration(unsigned Context,
     if (getLangOpts().CPlusPlus && NextToken().is(tok::kw_namespace)) {
       ProhibitAttributes(attrs);
       SourceLocation InlineLoc = ConsumeToken();
-      SingleDecl = ParseNamespace(Context, DeclEnd, InlineLoc);
-      break;
+      return ParseNamespace(Context, DeclEnd, InlineLoc);
     }
     return ParseSimpleDeclaration(Context, DeclEnd, attrs,
                                   true);
   case tok::kw_namespace:
     ProhibitAttributes(attrs);
-    SingleDecl = ParseNamespace(Context, DeclEnd);
-    break;
+    return ParseNamespace(Context, DeclEnd);
   case tok::kw_using:
     SingleDecl = ParseUsingDirectiveOrDeclaration(Context, ParsedTemplateInfo(),
                                                   DeclEnd, attrs, &OwnedType);
@@ -1979,8 +1974,8 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(
         // Recover as if it were an explicit specialization.
         TemplateParameterLists FakedParamLists;
         FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
-            0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, nullptr,
-            0, LAngleLoc));
+            0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
+            LAngleLoc));
 
         ThisDecl =
             Actions.ActOnTemplateDeclarator(getCurScope(), FakedParamLists, D);
@@ -2161,7 +2156,7 @@ void Parser::ParseSpecifierQualifierList(DeclSpec &DS, AccessSpecifier AS,
     DS.ClearStorageClassSpecs();
   }
 
-  // Issue diagnostic and remove function specfier if present.
+  // Issue diagnostic and remove function specifier if present.
   if (Specs & DeclSpec::PQ_FunctionSpecifier) {
     if (DS.isInlineSpecified())
       Diag(DS.getInlineSpecLoc(), diag::err_typename_invalid_functionspec);
@@ -2203,7 +2198,6 @@ static bool isValidAfterIdentifierInDeclarator(const Token &T) {
                    tok::colon);
 }
 
-
 /// ParseImplicitInt - This method is called when we have an non-typename
 /// identifier in a declspec (which normally terminates the decl spec) when
 /// the declspec has no type specifier.  In this case, the declspec is either
@@ -2621,8 +2615,6 @@ Parser::DiagnoseMissingSemiAfterTagDefinition(DeclSpec &DS, AccessSpecifier AS,
 /// [OpenCL] '__kernel'
 ///       'friend': [C++ dcl.friend]
 ///       'constexpr': [C++0x dcl.constexpr]
-
-///
 void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
                                         const ParsedTemplateInfo &TemplateInfo,
                                         AccessSpecifier AS,
@@ -2647,6 +2639,18 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
     const char *PrevSpec = nullptr;
     unsigned DiagID = 0;
 
+    // HACK: MSVC doesn't consider _Atomic to be a keyword and its STL
+    // implementation for VS2013 uses _Atomic as an identifier for one of the
+    // classes in <atomic>.
+    //
+    // A typedef declaration containing _Atomic<...> is among the places where
+    // the class is used.  If we are currently parsing such a declaration, treat
+    // the token as an identifier.
+    if (getLangOpts().MSVCCompat && Tok.is(tok::kw__Atomic) &&
+        DS.getStorageClassSpec() == clang::DeclSpec::SCS_typedef &&
+        !DS.hasTypeSpecifier() && GetLookAheadToken(1).is(tok::less))
+      Tok.setKind(tok::identifier);
+
     SourceLocation Loc = Tok.getLocation();
 
     switch (Tok.getKind()) {
@@ -2665,7 +2669,7 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
 
       // If this is not a declaration specifier token, we're done reading decl
       // specifiers.  First verify that DeclSpec's are consistent.
-      DS.Finish(Diags, PP, Policy);
+      DS.Finish(Actions, Policy);
       return;
 
     case tok::l_square:
@@ -2780,8 +2784,8 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
           // arguments. Complain, then parse it as a type as the user
           // intended.
           Diag(TemplateId->TemplateNameLoc,
-               diag::err_out_of_line_template_id_names_constructor)
-            << TemplateId->Name;
+               diag::err_out_of_line_template_id_type_names_constructor)
+            << TemplateId->Name << 0 /* template name */;
         }
 
         DS.getTypeSpecScope() = SS;
@@ -2826,8 +2830,9 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
         // looked at the declarator, and the user probably meant this
         // to be a type. Complain that it isn't supposed to be treated
         // as a type, then proceed to parse it as a type.
-        Diag(Next.getLocation(), diag::err_out_of_line_type_names_constructor)
-          << Next.getIdentifierInfo();
+        Diag(Next.getLocation(),
+             diag::err_out_of_line_template_id_type_names_constructor)
+          << Next.getIdentifierInfo() << 1 /* type */;
       }
 
       ParsedType TypeRep = Actions.getTypeName(*Next.getIdentifierInfo(),
@@ -3133,6 +3138,11 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
                                            PrevSpec, DiagID, Policy);
       isStorageClass = true;
       break;
+    case tok::kw___auto_type:
+      Diag(Tok, diag::ext_auto_type);
+      isInvalid = DS.SetTypeSpecType(DeclSpec::TST_auto_type, Loc, PrevSpec,
+                                     DiagID, Policy);
+      break;
     case tok::kw_register:
       isInvalid = DS.SetStorageClassSpec(Actions, DeclSpec::SCS_register, Loc,
                                          PrevSpec, DiagID, Policy);
@@ -3577,7 +3587,8 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
   SmallVector<Decl *, 32> FieldDecls;
 
   // While we still have something to read, read the declarations in the struct.
-  while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
+  while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
+         Tok.isNot(tok::eof)) {
     // Each iteration of this loop reads one struct-declaration.
 
     // Check for extraneous top-level semicolon.
@@ -4434,6 +4445,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw___private_extern__:
   case tok::kw_static:
   case tok::kw_auto:
+  case tok::kw___auto_type:
   case tok::kw_register:
   case tok::kw___thread:
   case tok::kw_thread_local:
@@ -4807,7 +4819,7 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
       DoneWithTypeQuals:
       // If this is not a type-qualifier token, we're done reading type
       // qualifiers.  First verify that DeclSpec's are consistent.
-      DS.Finish(Diags, PP, Actions.getASTContext().getPrintingPolicy());
+      DS.Finish(Actions, Actions.getASTContext().getPrintingPolicy());
       if (EndLoc.isValid())
         DS.SetRangeEnd(EndLoc);
       return;
@@ -4822,7 +4834,6 @@ void Parser::ParseTypeQualifierListOpt(DeclSpec &DS, unsigned AttrReqs,
   }
 }
 
-
 /// ParseDeclarator - Parse and verify a newly-initialized declarator.
 ///
 void Parser::ParseDeclarator(Declarator &D) {
@@ -5187,6 +5198,15 @@ void Parser::ParseDirectDeclarator(Declarator &D) {
       }
       goto PastIdentifier;
     }
+
+    if (D.getCXXScopeSpec().isNotEmpty()) {
+      // We have a scope specifier but no following unqualified-id.
+      Diag(PP.getLocForEndOfToken(D.getCXXScopeSpec().getEndLoc()),
+           diag::err_expected_unqualified_id)
+          << /*C++*/1;
+      D.SetIdentifier(nullptr, Tok.getLocation());
+      goto PastIdentifier;
+    }
   } else if (Tok.is(tok::identifier) && D.mayHaveIdentifier()) {
     assert(!getLangOpts().CPlusPlus &&
            "There's a C++-specific check for tok::identifier above");
@@ -5470,7 +5490,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D,
   SmallVector<ParsedType, 2> DynamicExceptions;
   SmallVector<SourceRange, 2> DynamicExceptionRanges;
   ExprResult NoexceptExpr;
-  CachedTokens *ExceptionSpecTokens = 0;
+  CachedTokens *ExceptionSpecTokens = nullptr;
   ParsedAttributes FnAttrs(AttrFactory);
   TypeResult TrailingReturnType;
 
@@ -5608,7 +5628,7 @@ void Parser::ParseFunctionDeclarator(Declarator &D,
                                              VolatileQualifierLoc,
                                              RestrictQualifierLoc,
                                              /*MutableLoc=*/SourceLocation(),
-                                             ESpecType, ESpecRange.getBegin(),
+                                             ESpecType, ESpecRange,
                                              DynamicExceptions.data(),
                                              DynamicExceptionRanges.data(),
                                              DynamicExceptions.size(),
@@ -6227,7 +6247,6 @@ void Parser::ParseAtomicSpecifier(DeclSpec &DS) {
     Diag(StartLoc, DiagID) << PrevSpec;
 }
 
-
 /// TryAltiVecVectorTokenOutOfLine - Out of line body that should only be called
 /// from TryAltiVecVectorToken.
 bool Parser::TryAltiVecVectorTokenOutOfLine() {
diff --git a/lib/Parse/ParseDeclCXX.cpp b/lib/Parse/ParseDeclCXX.cpp
index e347d4e27e26..a4de9751f9a0 100644
--- a/lib/Parse/ParseDeclCXX.cpp
+++ b/lib/Parse/ParseDeclCXX.cpp
@@ -1,4 +1,4 @@
-//===--- ParseDeclCXX.cpp - C++ Declaration Parsing -----------------------===//
+//===--- ParseDeclCXX.cpp - C++ Declaration Parsing -------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -26,6 +26,7 @@
 #include "clang/Sema/Scope.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/SmallString.h"
+
 using namespace clang;
 
 /// ParseNamespace - We know that the current token is a namespace keyword. This
@@ -54,9 +55,9 @@ using namespace clang;
 ///       namespace-alias-definition:  [C++ 7.3.2: namespace.alias]
 ///         'namespace' identifier '=' qualified-namespace-specifier ';'
 ///
-Decl *Parser::ParseNamespace(unsigned Context,
-                             SourceLocation &DeclEnd,
-                             SourceLocation InlineLoc) {
+Parser::DeclGroupPtrTy Parser::ParseNamespace(unsigned Context,
+                                              SourceLocation &DeclEnd,
+                                              SourceLocation InlineLoc) {
   assert(Tok.is(tok::kw_namespace) && "Not a namespace!");
   SourceLocation NamespaceLoc = ConsumeToken();  // eat the 'namespace'.
   ObjCDeclContextSwitch ObjCDC(*this);
@@ -64,7 +65,7 @@ Decl *Parser::ParseNamespace(unsigned Context,
   if (Tok.is(tok::code_completion)) {
     Actions.CodeCompleteNamespaceDecl(getCurScope());
     cutOffParsing();
-    return nullptr;
+    return DeclGroupPtrTy();
   }
 
   SourceLocation IdentLoc;
@@ -108,16 +109,16 @@ Decl *Parser::ParseNamespace(unsigned Context,
       Diag(Tok, diag::err_expected) << tok::identifier;
       // Skip to end of the definition and eat the ';'.
       SkipUntil(tok::semi);
-      return nullptr;
+      return DeclGroupPtrTy();
     }
     if (attrLoc.isValid())
       Diag(attrLoc, diag::err_unexpected_namespace_attributes_alias);
     if (InlineLoc.isValid())
       Diag(InlineLoc, diag::err_inline_namespace_alias)
           << FixItHint::CreateRemoval(InlineLoc);
-    return ParseNamespaceAlias(NamespaceLoc, IdentLoc, Ident, DeclEnd);
-  }
-
+    Decl *NSAlias = ParseNamespaceAlias(NamespaceLoc, IdentLoc, Ident, DeclEnd);
+    return Actions.ConvertDeclToDeclGroup(NSAlias);
+}
 
   BalancedDelimiterTracker T(*this, tok::l_brace);
   if (T.consumeOpen()) {
@@ -125,7 +126,7 @@ Decl *Parser::ParseNamespace(unsigned Context,
       Diag(Tok, diag::err_expected) << tok::l_brace;
     else
       Diag(Tok, diag::err_expected_either) << tok::identifier << tok::l_brace;
-    return nullptr;
+    return DeclGroupPtrTy();
   }
 
   if (getCurScope()->isClassScope() || getCurScope()->isTemplateParamScope() || 
@@ -133,7 +134,7 @@ Decl *Parser::ParseNamespace(unsigned Context,
       getCurScope()->getFnParent()) {
     Diag(T.getOpenLocation(), diag::err_namespace_nonnamespace_scope);
     SkipUntil(tok::r_brace);
-    return nullptr;
+    return DeclGroupPtrTy();
   }
 
   if (ExtraIdent.empty()) {
@@ -180,10 +181,11 @@ Decl *Parser::ParseNamespace(unsigned Context,
   // Enter a scope for the namespace.
   ParseScope NamespaceScope(this, Scope::DeclScope);
 
+  UsingDirectiveDecl *ImplicitUsingDirectiveDecl = nullptr;
   Decl *NamespcDecl =
     Actions.ActOnStartNamespaceDef(getCurScope(), InlineLoc, NamespaceLoc,
                                    IdentLoc, Ident, T.getOpenLocation(), 
-                                   attrs.getList());
+                                   attrs.getList(), ImplicitUsingDirectiveDecl);
 
   PrettyDeclStackTraceEntry CrashInfo(Actions, NamespcDecl, NamespaceLoc,
                                       "parsing namespace");
@@ -198,8 +200,9 @@ Decl *Parser::ParseNamespace(unsigned Context,
 
   DeclEnd = T.getCloseLocation();
   Actions.ActOnFinishNamespaceDef(NamespcDecl, DeclEnd);
-
-  return NamespcDecl;
+  
+  return Actions.ConvertDeclToDeclGroup(NamespcDecl, 
+                                        ImplicitUsingDirectiveDecl);
 }
 
 /// ParseInnerNamespace - Parse the contents of a namespace.
@@ -210,7 +213,8 @@ void Parser::ParseInnerNamespace(std::vector<SourceLocation> &IdentLoc,
                                  ParsedAttributes &attrs,
                                  BalancedDelimiterTracker &Tracker) {
   if (index == Ident.size()) {
-    while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
+    while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
+           Tok.isNot(tok::eof)) {
       ParsedAttributesWithRange attrs(AttrFactory);
       MaybeParseCXX11Attributes(attrs);
       MaybeParseMicrosoftAttributes(attrs);
@@ -228,17 +232,19 @@ void Parser::ParseInnerNamespace(std::vector<SourceLocation> &IdentLoc,
   // FIXME: Preserve the source information through to the AST rather than
   // desugaring it here.
   ParseScope NamespaceScope(this, Scope::DeclScope);
+  UsingDirectiveDecl *ImplicitUsingDirectiveDecl = nullptr;
   Decl *NamespcDecl =
     Actions.ActOnStartNamespaceDef(getCurScope(), SourceLocation(),
                                    NamespaceLoc[index], IdentLoc[index],
                                    Ident[index], Tracker.getOpenLocation(), 
-                                   attrs.getList());
+                                   attrs.getList(), ImplicitUsingDirectiveDecl);
+  assert(!ImplicitUsingDirectiveDecl && 
+         "nested namespace definition cannot define anonymous namespace");
 
   ParseInnerNamespace(IdentLoc, Ident, NamespaceLoc, ++index, InlineLoc,
                       attrs, Tracker);
 
   NamespaceScope.Exit();
-
   Actions.ActOnFinishNamespaceDef(NamespcDecl, Tracker.getCloseLocation());
 }
 
@@ -279,8 +285,8 @@ Decl *Parser::ParseNamespaceAlias(SourceLocation NamespaceLoc,
   if (ExpectAndConsume(tok::semi, diag::err_expected_semi_after_namespace_name))
     SkipUntil(tok::semi);
 
-  return Actions.ActOnNamespaceAliasDef(getCurScope(), NamespaceLoc, AliasLoc, Alias,
-                                        SS, IdentLoc, Ident);
+  return Actions.ActOnNamespaceAliasDef(getCurScope(), NamespaceLoc, AliasLoc,
+                                        Alias, SS, IdentLoc, Ident);
 }
 
 /// ParseLinkage - We know that the current token is a string_literal
@@ -393,8 +399,8 @@ Decl *Parser::ParseUsingDirectiveOrDeclaration(unsigned Context,
     // Template parameters are always an error here.
     if (TemplateInfo.Kind) {
       SourceRange R = TemplateInfo.getSourceRange();
-      Diag(UsingLoc, diag::err_templated_using_directive)
-        << R << FixItHint::CreateRemoval(R);
+      Diag(UsingLoc, diag::err_templated_using_directive_declaration)
+        << 0 /* directive */ << R << FixItHint::CreateRemoval(R);
     }
 
     return ParseUsingDirective(Context, UsingLoc, DeclEnd, attrs);
@@ -546,7 +552,8 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context,
   } else if (ParseUnqualifiedId(
                  SS, /*EnteringContext=*/false,
                  /*AllowDestructorName=*/true,
-                 /*AllowConstructorName=*/NextToken().isNot(tok::equal),
+                 /*AllowConstructorName=*/!(Tok.is(tok::identifier) &&
+                                            NextToken().is(tok::equal)),
                  ParsedType(), TemplateKWLoc, Name)) {
     SkipUntil(tok::semi);
     return nullptr;
@@ -643,8 +650,8 @@ Decl *Parser::ParseUsingDeclaration(unsigned Context,
   //   template <...> using id = type;
   if (TemplateInfo.Kind && !IsAliasDecl) {
     SourceRange R = TemplateInfo.getSourceRange();
-    Diag(UsingLoc, diag::err_templated_using_declaration)
-      << R << FixItHint::CreateRemoval(R);
+    Diag(UsingLoc, diag::err_templated_using_directive_declaration)
+      << 1 /* declaration */ << R << FixItHint::CreateRemoval(R);
 
     // Unfortunately, we have to bail out instead of recovering by
     // ignoring the parameters, just in case the nested name specifier
@@ -1307,6 +1314,35 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
     // allow libstdc++ 4.2 and libc++ to work properly.
     TryKeywordIdentFallback(true);
 
+  struct PreserveAtomicIdentifierInfoRAII {
+    PreserveAtomicIdentifierInfoRAII(Token &Tok, bool Enabled)
+        : AtomicII(nullptr) {
+      if (!Enabled)
+        return;
+      assert(Tok.is(tok::kw__Atomic));
+      AtomicII = Tok.getIdentifierInfo();
+      AtomicII->revertTokenIDToIdentifier();
+      Tok.setKind(tok::identifier);
+    }
+    ~PreserveAtomicIdentifierInfoRAII() {
+      if (!AtomicII)
+        return;
+      AtomicII->revertIdentifierToTokenID(tok::kw__Atomic);
+    }
+    IdentifierInfo *AtomicII;
+  };
+
+  // HACK: MSVC doesn't consider _Atomic to be a keyword and its STL
+  // implementation for VS2013 uses _Atomic as an identifier for one of the
+  // classes in <atomic>.  When we are parsing 'struct _Atomic', don't consider
+  // '_Atomic' to be a keyword.  We are careful to undo this so that clang can
+  // use '_Atomic' in its own header files.
+  bool ShouldChangeAtomicToIdentifier = getLangOpts().MSVCCompat &&
+                                        Tok.is(tok::kw__Atomic) &&
+                                        TagType == DeclSpec::TST_struct;
+  PreserveAtomicIdentifierInfoRAII AtomicTokenGuard(
+      Tok, ShouldChangeAtomicToIdentifier);
+
   // Parse the (optional) nested-name-specifier.
   CXXScopeSpec &SS = DS.getTypeSpecScope();
   if (getLangOpts().CPlusPlus) {
@@ -1626,8 +1662,8 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind,
           // "template<>", so that we treat this construct as a class
           // template specialization.
           FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
-              0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, nullptr,
-              0, LAngleLoc));
+              0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
+              LAngleLoc));
           TemplateParams = &FakedParamLists;
         }
       }
@@ -1808,7 +1844,7 @@ void Parser::ParseBaseClause(Decl *ClassDecl) {
   }
 
   // Attach the base specifiers
-  Actions.ActOnBaseSpecifiers(ClassDecl, BaseInfo.data(), BaseInfo.size());
+  Actions.ActOnBaseSpecifiers(ClassDecl, BaseInfo);
 }
 
 /// ParseBaseSpecifier - Parse a C++ base-specifier. A base-specifier is
@@ -1858,6 +1894,15 @@ BaseResult Parser::ParseBaseSpecifier(Decl *ClassDecl) {
   CheckMisplacedCXX11Attribute(Attributes, StartLoc);
 
   // Parse the class-name.
+
+  // HACK: MSVC doesn't consider _Atomic to be a keyword and its STL
+  // implementation for VS2013 uses _Atomic as an identifier for one of the
+  // classes in <atomic>.  Treat '_Atomic' to be an identifier when we are
+  // parsing the class-name for a base specifier.
+  if (getLangOpts().MSVCCompat && Tok.is(tok::kw__Atomic) &&
+      NextToken().is(tok::less))
+    Tok.setKind(tok::identifier);
+
   SourceLocation EndLocation;
   SourceLocation BaseLoc;
   TypeResult BaseType = ParseBaseTypeSpecifier(BaseLoc, EndLocation);
@@ -1928,7 +1973,7 @@ void Parser::HandleMemberFunctionDeclDelays(Declarator& DeclaratorInfo,
 
     // Stash the exception-specification tokens in the late-pased method.
     LateMethod->ExceptionSpecTokens = FTI.ExceptionSpecTokens;
-    FTI.ExceptionSpecTokens = 0;
+    FTI.ExceptionSpecTokens = nullptr;
 
     // Push tokens for each parameter.  Those that do not have
     // defaults will be NULL.
@@ -2195,8 +2240,9 @@ void Parser::MaybeParseAndDiagnoseDeclSpecAfterCXX11VirtSpecifierSeq(
 ///       constant-initializer:
 ///         '=' constant-expression
 ///
-void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
-                                            AttributeList *AccessAttrs,
+Parser::DeclGroupPtrTy
+Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
+                                       AttributeList *AccessAttrs,
                                        const ParsedTemplateInfo &TemplateInfo,
                                        ParsingDeclRAIIObject *TemplateDiags) {
   if (Tok.is(tok::at)) {
@@ -2207,7 +2253,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
     ConsumeToken();
     SkipUntil(tok::r_brace, StopAtSemi);
-    return;
+    return DeclGroupPtrTy();
   }
 
   // Turn on colon protection early, while parsing declspec, although there is
@@ -2241,7 +2287,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
 
       if (SS.isInvalid()) {
         SkipUntil(tok::semi);
-        return;
+        return DeclGroupPtrTy();
       }
 
       // Try to parse an unqualified-id.
@@ -2250,24 +2296,21 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
       if (ParseUnqualifiedId(SS, false, true, true, ParsedType(),
                              TemplateKWLoc, Name)) {
         SkipUntil(tok::semi);
-        return;
+        return DeclGroupPtrTy();
       }
 
       // TODO: recover from mistakenly-qualified operator declarations.
       if (ExpectAndConsume(tok::semi, diag::err_expected_after,
                            "access declaration")) {
         SkipUntil(tok::semi);
-        return;
+        return DeclGroupPtrTy();
       }
 
-      Actions.ActOnUsingDeclaration(getCurScope(), AS,
-                                    /* HasUsingKeyword */ false,
-                                    SourceLocation(),
-                                    SS, Name,
-                                    /* AttrList */ nullptr,
-                                    /* HasTypenameKeyword */ false,
-                                    SourceLocation());
-      return;
+      return DeclGroupPtrTy::make(DeclGroupRef(Actions.ActOnUsingDeclaration(
+          getCurScope(), AS,
+          /* HasUsingKeyword */ false, SourceLocation(), SS, Name,
+          /* AttrList */ nullptr,
+          /* HasTypenameKeyword */ false, SourceLocation())));
     }
   }
 
@@ -2276,17 +2319,17 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   if (!TemplateInfo.Kind &&
       Tok.isOneOf(tok::kw_static_assert, tok::kw__Static_assert)) {
     SourceLocation DeclEnd;
-    ParseStaticAssertDeclaration(DeclEnd);
-    return;
+    return DeclGroupPtrTy::make(
+        DeclGroupRef(ParseStaticAssertDeclaration(DeclEnd)));
   }
 
   if (Tok.is(tok::kw_template)) {
     assert(!TemplateInfo.TemplateParams &&
            "Nested template improperly parsed?");
     SourceLocation DeclEnd;
-    ParseDeclarationStartingWithTemplate(Declarator::MemberContext, DeclEnd,
-                                         AS, AccessAttrs);
-    return;
+    return DeclGroupPtrTy::make(
+        DeclGroupRef(ParseDeclarationStartingWithTemplate(
+            Declarator::MemberContext, DeclEnd, AS, AccessAttrs)));
   }
 
   // Handle:  member-declaration ::= '__extension__' member-declaration
@@ -2318,13 +2361,12 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     if (Tok.is(tok::kw_namespace)) {
       Diag(UsingLoc, diag::err_using_namespace_in_class);
       SkipUntil(tok::semi, StopBeforeMatch);
-    } else {
-      SourceLocation DeclEnd;
-      // Otherwise, it must be a using-declaration or an alias-declaration.
-      ParseUsingDeclaration(Declarator::MemberContext, TemplateInfo,
-                            UsingLoc, DeclEnd, AS);
+      return DeclGroupPtrTy();
     }
-    return;
+    SourceLocation DeclEnd;
+    // Otherwise, it must be a using-declaration or an alias-declaration.
+    return DeclGroupPtrTy::make(DeclGroupRef(ParseUsingDeclaration(
+        Declarator::MemberContext, TemplateInfo, UsingLoc, DeclEnd, AS)));
   }
 
   // Hold late-parsed attributes so we can attach a Decl to them later.
@@ -2349,7 +2391,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
       TemplateInfo.Kind == ParsedTemplateInfo::NonTemplate &&
       DiagnoseMissingSemiAfterTagDefinition(DS, AS, DSC_class,
                                             &CommonLateParsedAttrs))
-    return;
+    return DeclGroupPtrTy();
 
   MultiTemplateParamsArg TemplateParams(
       TemplateInfo.TemplateParams? TemplateInfo.TemplateParams->data()
@@ -2363,7 +2405,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     Decl *TheDecl =
       Actions.ParsedFreeStandingDeclSpec(getCurScope(), AS, DS, TemplateParams);
     DS.complete(TheDecl);
-    return;
+    return DeclGroupPtrTy::make(DeclGroupRef(TheDecl));
   }
 
   ParsingDeclarator DeclaratorInfo(*this, DS, Declarator::MemberContext);
@@ -2404,7 +2446,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
   if (ParseCXXMemberDeclaratorBeforeInitializer(
           DeclaratorInfo, VS, BitfieldSize, LateParsedAttrs)) {
     TryConsumeToken(tok::semi);
-    return;
+    return DeclGroupPtrTy();
   }
 
   // Check for a member function definition.
@@ -2453,7 +2495,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
         // Consume the optional ';'
         TryConsumeToken(tok::semi);
 
-        return;
+        return DeclGroupPtrTy();
       }
 
       if (DS.getStorageClassSpec() == DeclSpec::SCS_typedef) {
@@ -2482,7 +2524,7 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
       if (Tok.is(tok::semi))
         ConsumeExtraSemi(AfterMemberFunctionDefinition);
 
-      return;
+      return DeclGroupPtrTy::make(DeclGroupRef(FunDecl));
     }
   }
 
@@ -2656,10 +2698,10 @@ void Parser::ParseCXXClassMemberDeclaration(AccessSpecifier AS,
     SkipUntil(tok::r_brace, StopAtSemi | StopBeforeMatch);
     // If we stopped at a ';', eat it.
     TryConsumeToken(tok::semi);
-    return;
+    return DeclGroupPtrTy();
   }
 
-  Actions.FinalizeDeclaratorGroup(getCurScope(), DS, DeclsInGroup);
+  return Actions.FinalizeDeclaratorGroup(getCurScope(), DS, DeclsInGroup);
 }
 
 /// ParseCXXMemberInitializer - Parse the brace-or-equal-initializer.
@@ -2777,6 +2819,100 @@ void Parser::SkipCXXMemberSpecification(SourceLocation RecordLoc,
     MaybeParseGNUAttributes(Attrs);
 }
 
+Parser::DeclGroupPtrTy Parser::ParseCXXClassMemberDeclarationWithPragmas(
+    AccessSpecifier &AS, ParsedAttributesWithRange &AccessAttrs,
+    DeclSpec::TST TagType, Decl *TagDecl) {
+  if (getLangOpts().MicrosoftExt &&
+      Tok.isOneOf(tok::kw___if_exists, tok::kw___if_not_exists)) {
+    ParseMicrosoftIfExistsClassDeclaration(TagType, AS);
+    return DeclGroupPtrTy();
+  }
+
+  // Check for extraneous top-level semicolon.
+  if (Tok.is(tok::semi)) {
+    ConsumeExtraSemi(InsideStruct, TagType);
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_vis)) {
+    HandlePragmaVisibility();
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_pack)) {
+    HandlePragmaPack();
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_align)) {
+    HandlePragmaAlign();
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_ms_pointers_to_members)) {
+    HandlePragmaMSPointersToMembers();
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_ms_pragma)) {
+    HandlePragmaMSPragma();
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_ms_vtordisp)) {
+    HandlePragmaMSVtorDisp();
+    return DeclGroupPtrTy();
+  }
+
+  // If we see a namespace here, a close brace was missing somewhere.
+  if (Tok.is(tok::kw_namespace)) {
+    DiagnoseUnexpectedNamespace(cast<NamedDecl>(TagDecl));
+    return DeclGroupPtrTy();
+  }
+
+  AccessSpecifier NewAS = getAccessSpecifierIfPresent();
+  if (NewAS != AS_none) {
+    // Current token is a C++ access specifier.
+    AS = NewAS;
+    SourceLocation ASLoc = Tok.getLocation();
+    unsigned TokLength = Tok.getLength();
+    ConsumeToken();
+    AccessAttrs.clear();
+    MaybeParseGNUAttributes(AccessAttrs);
+
+    SourceLocation EndLoc;
+    if (TryConsumeToken(tok::colon, EndLoc)) {
+    } else if (TryConsumeToken(tok::semi, EndLoc)) {
+      Diag(EndLoc, diag::err_expected)
+          << tok::colon << FixItHint::CreateReplacement(EndLoc, ":");
+    } else {
+      EndLoc = ASLoc.getLocWithOffset(TokLength);
+      Diag(EndLoc, diag::err_expected)
+          << tok::colon << FixItHint::CreateInsertion(EndLoc, ":");
+    }
+
+    // The Microsoft extension __interface does not permit non-public
+    // access specifiers.
+    if (TagType == DeclSpec::TST_interface && AS != AS_public) {
+      Diag(ASLoc, diag::err_access_specifier_interface) << (AS == AS_protected);
+    }
+
+    if (Actions.ActOnAccessSpecifier(NewAS, ASLoc, EndLoc,
+                                     AccessAttrs.getList())) {
+      // found another attribute than only annotations
+      AccessAttrs.clear();
+    }
+
+    return DeclGroupPtrTy();
+  }
+
+  if (Tok.is(tok::annot_pragma_openmp))
+    return ParseOpenMPDeclarativeDirective();
+
+  // Parse all the comma separated declarators.
+  return ParseCXXClassMemberDeclaration(AS, AccessAttrs.getList());
+}
+
 /// ParseCXXMemberSpecification - Parse the class definition.
 ///
 ///       member-specification:
@@ -2934,102 +3070,16 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc,
     CurAS = AS_private;
   else
     CurAS = AS_public;
-  ParsedAttributes AccessAttrs(AttrFactory);
+  ParsedAttributesWithRange AccessAttrs(AttrFactory);
 
   if (TagDecl) {
     // While we still have something to read, read the member-declarations.
-    while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
+    while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
+           Tok.isNot(tok::eof)) {
       // Each iteration of this loop reads one member-declaration.
-
-      if (getLangOpts().MicrosoftExt && Tok.isOneOf(tok::kw___if_exists,
-                                                    tok::kw___if_not_exists)) {
-        ParseMicrosoftIfExistsClassDeclaration((DeclSpec::TST)TagType, CurAS);
-        continue;
-      }
-
-      // Check for extraneous top-level semicolon.
-      if (Tok.is(tok::semi)) {
-        ConsumeExtraSemi(InsideStruct, TagType);
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_vis)) {
-        HandlePragmaVisibility();
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_pack)) {
-        HandlePragmaPack();
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_align)) {
-        HandlePragmaAlign();
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_openmp)) {
-        ParseOpenMPDeclarativeDirective();
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_ms_pointers_to_members)) {
-        HandlePragmaMSPointersToMembers();
-        continue;
-      }
-
-      if (Tok.is(tok::annot_pragma_ms_pragma)) {
-        HandlePragmaMSPragma();
-        continue;
-      }
-
-      // If we see a namespace here, a close brace was missing somewhere.
-      if (Tok.is(tok::kw_namespace)) {
-        DiagnoseUnexpectedNamespace(cast<NamedDecl>(TagDecl));
-        break;
-      }
-
-      AccessSpecifier AS = getAccessSpecifierIfPresent();
-      if (AS != AS_none) {
-        // Current token is a C++ access specifier.
-        CurAS = AS;
-        SourceLocation ASLoc = Tok.getLocation();
-        unsigned TokLength = Tok.getLength();
-        ConsumeToken();
-        AccessAttrs.clear();
-        MaybeParseGNUAttributes(AccessAttrs);
-
-        SourceLocation EndLoc;
-        if (TryConsumeToken(tok::colon, EndLoc)) {
-        } else if (TryConsumeToken(tok::semi, EndLoc)) {
-          Diag(EndLoc, diag::err_expected)
-              << tok::colon << FixItHint::CreateReplacement(EndLoc, ":");
-        } else {
-          EndLoc = ASLoc.getLocWithOffset(TokLength);
-          Diag(EndLoc, diag::err_expected)
-              << tok::colon << FixItHint::CreateInsertion(EndLoc, ":");
-        }
-
-        // The Microsoft extension __interface does not permit non-public
-        // access specifiers.
-        if (TagType == DeclSpec::TST_interface && CurAS != AS_public) {
-          Diag(ASLoc, diag::err_access_specifier_interface)
-            << (CurAS == AS_protected);
-        }
-
-        if (Actions.ActOnAccessSpecifier(AS, ASLoc, EndLoc,
-                                         AccessAttrs.getList())) {
-          // found another attribute than only annotations
-          AccessAttrs.clear();
-        }
-
-        continue;
-      }
-
-      // Parse all the comma separated declarators.
-      ParseCXXClassMemberDeclaration(CurAS, AccessAttrs.getList());
+      ParseCXXClassMemberDeclarationWithPragmas(
+          CurAS, AccessAttrs, static_cast<DeclSpec::TST>(TagType), TagDecl);
     }
-
     T.consumeClose();
   } else {
     SkipUntil(tok::r_brace);
@@ -3068,7 +3118,7 @@ void Parser::ParseCXXMemberSpecification(SourceLocation RecordLoc,
 
     // We've finished parsing everything, including default argument
     // initializers.
-    Actions.ActOnFinishCXXMemberDefaultArgs(TagDecl);
+    Actions.ActOnFinishCXXNonNestedClass(TagDecl);
   }
 
   if (TagDecl)
@@ -3279,7 +3329,7 @@ Parser::tryParseExceptionSpecification(bool Delayed,
                     ExprResult &NoexceptExpr,
                     CachedTokens *&ExceptionSpecTokens) {
   ExceptionSpecificationType Result = EST_None;
-  ExceptionSpecTokens = 0;
+  ExceptionSpecTokens = nullptr;
   
   // Handle delayed parsing of exception-specifications.
   if (Delayed) {
@@ -3296,7 +3346,7 @@ Parser::tryParseExceptionSpecification(bool Delayed,
       // If this is a bare 'noexcept', we're done.
       if (IsNoexcept) {
         Diag(Tok, diag::warn_cxx98_compat_noexcept_decl);
-        NoexceptExpr = 0;
+        NoexceptExpr = nullptr;
         return EST_BasicNoexcept;
       }
       
@@ -3379,7 +3429,7 @@ Parser::tryParseExceptionSpecification(bool Delayed,
 }
 
 static void diagnoseDynamicExceptionSpecification(
-    Parser &P, const SourceRange &Range, bool IsNoexcept) {
+    Parser &P, SourceRange Range, bool IsNoexcept) {
   if (P.getLangOpts().CPlusPlus11) {
     const char *Replacement = IsNoexcept ? "noexcept" : "noexcept(false)";
     P.Diag(Range.getBegin(), diag::warn_exception_spec_deprecated) << Range;
@@ -3577,9 +3627,8 @@ static bool IsBuiltInOrStandardCXX11Attribute(IdentifierInfo *AttrName,
   case AttributeList::AT_CarriesDependency:
   case AttributeList::AT_Deprecated:
   case AttributeList::AT_FallThrough:
-  case AttributeList::AT_CXX11NoReturn: {
+  case AttributeList::AT_CXX11NoReturn:
     return true;
-  }
 
   default:
     return false;
@@ -3612,7 +3661,7 @@ bool Parser::ParseCXX11AttributeArgs(IdentifierInfo *AttrName,
   // If the attribute isn't known, we will not attempt to parse any
   // arguments.
   if (!hasAttribute(AttrSyntax::CXX, ScopeName, AttrName,
-                    getTargetInfo().getTriple(), getLangOpts())) {
+                    getTargetInfo(), getLangOpts())) {
     // Eat the left paren, then skip to the ending right paren.
     ConsumeParen();
     SkipUntil(tok::r_paren);
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index b866798a1c61..490bd5ada62d 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -163,6 +163,8 @@ ExprResult Parser::ParseAssignmentExpression(TypeCastState isTypeCast) {
 
   if (Tok.is(tok::kw_throw))
     return ParseThrowExpression();
+  if (Tok.is(tok::kw_co_yield))
+    return ParseCoyieldExpression();
 
   ExprResult LHS = ParseCastExpression(/*isUnaryExpression=*/false,
                                        /*isAddressOfOperand=*/false,
@@ -522,6 +524,7 @@ class CastExpressionIdValidator : public CorrectionCandidateCallback {
 ///         postfix-expression
 ///         '++' unary-expression
 ///         '--' unary-expression
+/// [Coro]  'co_await' cast-expression
 ///         unary-operator cast-expression
 ///         'sizeof' unary-expression
 ///         'sizeof' '(' type-name ')'
@@ -1041,6 +1044,14 @@ ExprResult Parser::ParseCastExpression(bool isUnaryExpression,
     return Res;
   }
 
+  case tok::kw_co_await: {  // unary-expression: 'co_await' cast-expression
+    SourceLocation CoawaitLoc = ConsumeToken();
+    Res = ParseCastExpression(false);
+    if (!Res.isInvalid())
+      Res = Actions.ActOnCoawaitExpr(getCurScope(), CoawaitLoc, Res.get());
+    return Res;
+  }
+
   case tok::kw___extension__:{//unary-expression:'__extension__' cast-expr [GNU]
     // __extension__ silences extension warnings in the subexpression.
     ExtensionRAIIObject O(Diags);  // Use RAII to do this.
@@ -1396,21 +1407,42 @@ Parser::ParsePostfixExpressionSuffix(ExprResult LHS) {
       BalancedDelimiterTracker T(*this, tok::l_square);
       T.consumeOpen();
       Loc = T.getOpenLocation();
-      ExprResult Idx;
+      ExprResult Idx, Length;
+      SourceLocation ColonLoc;
       if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace)) {
         Diag(Tok, diag::warn_cxx98_compat_generalized_initializer_lists);
         Idx = ParseBraceInitializer();
+      } else if (getLangOpts().OpenMP) {
+        ColonProtectionRAIIObject RAII(*this);
+        // Parse [: or [ expr or [ expr :
+        if (!Tok.is(tok::colon)) {
+          // [ expr
+          Idx = ParseExpression();
+        }
+        if (Tok.is(tok::colon)) {
+          // Consume ':'
+          ColonLoc = ConsumeToken();
+          if (Tok.isNot(tok::r_square))
+            Length = ParseExpression();
+        }
       } else
         Idx = ParseExpression();
 
       SourceLocation RLoc = Tok.getLocation();
 
-      if (!LHS.isInvalid() && !Idx.isInvalid() && Tok.is(tok::r_square)) {
-        LHS = Actions.ActOnArraySubscriptExpr(getCurScope(), LHS.get(), Loc,
-                                              Idx.get(), RLoc);
+      if (!LHS.isInvalid() && !Idx.isInvalid() && !Length.isInvalid() &&
+          Tok.is(tok::r_square)) {
+        if (ColonLoc.isValid()) {
+          LHS = Actions.ActOnOMPArraySectionExpr(LHS.get(), Loc, Idx.get(),
+                                                 ColonLoc, Length.get(), RLoc);
+        } else {
+          LHS = Actions.ActOnArraySubscriptExpr(getCurScope(), LHS.get(), Loc,
+                                                Idx.get(), RLoc);
+        }
       } else {
         (void)Actions.CorrectDelayedTyposInExpr(LHS);
         (void)Actions.CorrectDelayedTyposInExpr(Idx);
+        (void)Actions.CorrectDelayedTyposInExpr(Length);
         LHS = ExprError();
         Idx = ExprError();
       }
@@ -1965,7 +1997,7 @@ ExprResult Parser::ParseBuiltinPrimaryExpression() {
         } else {
           PT.consumeClose();
           Res = Actions.ActOnBuiltinOffsetOf(getCurScope(), StartLoc, TypeLoc,
-                                             Ty.get(), &Comps[0], Comps.size(),
+                                             Ty.get(), Comps,
                                              PT.getCloseLocation());
         }
         break;
@@ -2747,7 +2779,7 @@ ExprResult Parser::ParseBlockLiteralExpression() {
                                              /*RestrictQualifierLoc=*/NoLoc,
                                              /*MutableLoc=*/NoLoc,
                                              EST_None,
-                                             /*ESpecLoc=*/NoLoc,
+                                             /*ESpecRange=*/SourceRange(),
                                              /*Exceptions=*/nullptr,
                                              /*ExceptionRanges=*/nullptr,
                                              /*NumExceptions=*/0,
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index c1dafe9b49b1..f8938ba3495b 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -841,6 +841,7 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
     
     // Parse capture.
     LambdaCaptureKind Kind = LCK_ByCopy;
+    LambdaCaptureInitKind InitKind = LambdaCaptureInitKind::NoInit;
     SourceLocation Loc;
     IdentifierInfo *Id = nullptr;
     SourceLocation EllipsisLoc;
@@ -878,6 +879,8 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
         BalancedDelimiterTracker Parens(*this, tok::l_paren);
         Parens.consumeOpen();
 
+        InitKind = LambdaCaptureInitKind::DirectInit;
+
         ExprVector Exprs;
         CommaLocsTy Commas;
         if (SkippedInits) {
@@ -898,14 +901,13 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
         // to save the necessary state, and restore it later.
         EnterExpressionEvaluationContext EC(Actions,
                                             Sema::PotentiallyEvaluated);
-        bool HadEquals = TryConsumeToken(tok::equal);
+
+        if (TryConsumeToken(tok::equal))
+          InitKind = LambdaCaptureInitKind::CopyInit;
+        else
+          InitKind = LambdaCaptureInitKind::ListInit;
 
         if (!SkippedInits) {
-          // Warn on constructs that will change meaning when we implement N3922
-          if (!HadEquals && Tok.is(tok::l_brace)) {
-            Diag(Tok, diag::warn_init_capture_direct_list_init)
-              << FixItHint::CreateInsertion(Tok.getLocation(), "=");
-          }
           Init = ParseInitializer();
         } else if (Tok.is(tok::l_brace)) {
           BalancedDelimiterTracker Braces(*this, tok::l_brace);
@@ -993,19 +995,19 @@ Optional<unsigned> Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
     // If x was not const, the second use would require 'L' to capture, and
     // that would be an error.
 
-    ParsedType InitCaptureParsedType;
+    ParsedType InitCaptureType;
     if (Init.isUsable()) {
       // Get the pointer and store it in an lvalue, so we can use it as an
       // out argument.
       Expr *InitExpr = Init.get();
       // This performs any lvalue-to-rvalue conversions if necessary, which
       // can affect what gets captured in the containing decl-context.
-      QualType InitCaptureType = Actions.performLambdaInitCaptureInitialization(
-        Loc, Kind == LCK_ByRef, Id, InitExpr);
+      InitCaptureType = Actions.actOnLambdaInitCaptureInitialization(
+          Loc, Kind == LCK_ByRef, Id, InitKind, InitExpr);
       Init = InitExpr;
-      InitCaptureParsedType.set(InitCaptureType);
     }
-    Intro.addCapture(Kind, Loc, Id, EllipsisLoc, Init, InitCaptureParsedType);
+    Intro.addCapture(Kind, Loc, Id, EllipsisLoc, InitKind, Init,
+                     InitCaptureType);
   }
 
   T.consumeClose();
@@ -1149,7 +1151,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
                                            /*VolatileQualifierLoc=*/NoLoc,
                                            /*RestrictQualifierLoc=*/NoLoc,
                                            MutableLoc,
-                                           ESpecType, ESpecRange.getBegin(),
+                                           ESpecType, ESpecRange,
                                            DynamicExceptions.data(),
                                            DynamicExceptionRanges.data(),
                                            DynamicExceptions.size(),
@@ -1217,7 +1219,7 @@ ExprResult Parser::ParseLambdaExpressionAfterIntroducer(
                                                /*RestrictQualifierLoc=*/NoLoc,
                                                MutableLoc,
                                                EST_None,
-                                               /*ESpecLoc=*/NoLoc,
+                                               /*ESpecRange=*/SourceRange(),
                                                /*Exceptions=*/nullptr,
                                                /*ExceptionRanges=*/nullptr,
                                                /*NumExceptions=*/0,
@@ -1558,6 +1560,21 @@ ExprResult Parser::ParseThrowExpression() {
   }
 }
 
+/// \brief Parse the C++ Coroutines co_yield expression.
+///
+///       co_yield-expression:
+///         'co_yield' assignment-expression[opt]
+ExprResult Parser::ParseCoyieldExpression() {
+  assert(Tok.is(tok::kw_co_yield) && "Not co_yield!");
+
+  SourceLocation Loc = ConsumeToken();
+  ExprResult Expr = Tok.is(tok::l_brace) ? ParseBraceInitializer()
+                                         : ParseAssignmentExpression();
+  if (!Expr.isInvalid())
+    Expr = Actions.ActOnCoyieldExpr(getCurScope(), Loc, Expr.get());
+  return Expr;
+}
+
 /// ParseCXXThis - This handles the C++ 'this' pointer.
 ///
 /// C++ 9.3.2: In the body of a non-static member function, the keyword this is
@@ -1805,7 +1822,7 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
     DS.SetRangeEnd(Tok.getAnnotationEndLoc());
     ConsumeToken();
     
-    DS.Finish(Diags, PP, Policy);
+    DS.Finish(Actions, Policy);
     return;
   }
 
@@ -1861,12 +1878,12 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
   case tok::annot_decltype:
   case tok::kw_decltype:
     DS.SetRangeEnd(ParseDecltypeSpecifier(DS));
-    return DS.Finish(Diags, PP, Policy);
+    return DS.Finish(Actions, Policy);
 
   // GNU typeof support.
   case tok::kw_typeof:
     ParseTypeofSpecifier(DS);
-    DS.Finish(Diags, PP, Policy);
+    DS.Finish(Actions, Policy);
     return;
   }
   if (Tok.is(tok::annot_typename))
@@ -1874,7 +1891,7 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
   else
     DS.SetRangeEnd(Tok.getLocation());
   ConsumeToken();
-  DS.Finish(Diags, PP, Policy);
+  DS.Finish(Actions, Policy);
 }
 
 /// ParseCXXTypeSpecifierSeq - Parse a C++ type-specifier-seq (C++
@@ -1890,7 +1907,7 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
 ///
 bool Parser::ParseCXXTypeSpecifierSeq(DeclSpec &DS) {
   ParseSpecifierQualifierList(DS, AS_none, DSC_type_specifier);
-  DS.Finish(Diags, PP, Actions.getASTContext().getPrintingPolicy());
+  DS.Finish(Actions, Actions.getASTContext().getPrintingPolicy());
   return false;
 }
 
@@ -2289,7 +2306,7 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext,
       // This isn't a valid literal-operator-id, but we think we know
       // what the user meant. Tell them what they should have written.
       SmallString<32> Str;
-      Str += "\"\" ";
+      Str += "\"\"";
       Str += II->getName();
       Diag(DiagLoc, DiagId) << FixItHint::CreateReplacement(
           SourceRange(TokLocs.front(), TokLocs.back()), Str);
diff --git a/lib/Parse/ParseObjc.cpp b/lib/Parse/ParseObjc.cpp
index ed6090453daa..e72a1f62f942 100644
--- a/lib/Parse/ParseObjc.cpp
+++ b/lib/Parse/ParseObjc.cpp
@@ -94,6 +94,28 @@ Parser::DeclGroupPtrTy Parser::ParseObjCAtDirectives() {
   return Actions.ConvertDeclToDeclGroup(SingleDecl);
 }
 
+/// Class to handle popping type parameters when leaving the scope.
+class Parser::ObjCTypeParamListScope {
+  Sema &Actions;
+  Scope *S;
+  ObjCTypeParamList *Params;
+public:
+  ObjCTypeParamListScope(Sema &Actions, Scope *S)
+      : Actions(Actions), S(S), Params(nullptr) {}
+  ~ObjCTypeParamListScope() {
+    leave();
+  }
+  void enter(ObjCTypeParamList *P) {
+    assert(!Params);
+    Params = P;
+  }
+  void leave() {
+    if (Params)
+      Actions.popObjCTypeParamList(S, Params);
+    Params = nullptr;
+  }
+};
+
 ///
 /// objc-class-declaration:
 ///    '@' 'class' objc-class-forward-decl (',' objc-class-forward-decl)* ';'
@@ -121,11 +143,8 @@ Parser::ParseObjCAtClassDeclaration(SourceLocation atLoc) {
 
     // Parse the optional objc-type-parameter-list.
     ObjCTypeParamList *TypeParams = nullptr;
-    if (Tok.is(tok::less)) {
+    if (Tok.is(tok::less))
       TypeParams = parseObjCTypeParamList();
-      if (TypeParams)
-        Actions.popObjCTypeParamList(getCurScope(), TypeParams);
-    }
     ClassTypeParams.push_back(TypeParams);
     if (!TryConsumeToken(tok::comma))
       break;
@@ -221,11 +240,10 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
   SourceLocation LAngleLoc, EndProtoLoc;
   SmallVector<IdentifierLocPair, 8> ProtocolIdents;
   ObjCTypeParamList *typeParameterList = nullptr;
-  if (Tok.is(tok::less)) {
-    typeParameterList = parseObjCTypeParamListOrProtocolRefs(LAngleLoc, 
-                                                             ProtocolIdents,
-                                                             EndProtoLoc);
-  }
+  ObjCTypeParamListScope typeParamScope(Actions, getCurScope());
+  if (Tok.is(tok::less))
+    typeParameterList = parseObjCTypeParamListOrProtocolRefs(
+        typeParamScope, LAngleLoc, ProtocolIdents, EndProtoLoc);
 
   if (Tok.is(tok::l_paren) &&
       !isKnownToBeTypeSpecifier(GetLookAheadToken(1))) { // we have a category.
@@ -286,9 +304,6 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
       
     ParseObjCInterfaceDeclList(tok::objc_not_keyword, CategoryType);
 
-    if (typeParameterList)
-      Actions.popObjCTypeParamList(getCurScope(), typeParameterList);
-
     return CategoryType;
   }
   // Parse a class interface.
@@ -342,8 +357,7 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
       }
       Actions.FindProtocolDeclaration(/*WarnOnDeclarations=*/true,
                                       /*ForObjCContainer=*/true,
-                                      &ProtocolIdents[0], ProtocolIdents.size(),
-                                      protocols);
+                                      ProtocolIdents, protocols);
     }
   } else if (protocols.empty() && Tok.is(tok::less) &&
              ParseObjCProtocolReferences(protocols, protocolLocs, true, true,
@@ -371,9 +385,6 @@ Decl *Parser::ParseObjCAtInterfaceDeclaration(SourceLocation AtLoc,
 
   ParseObjCInterfaceDeclList(tok::objc_interface, ClsType);
 
-  if (typeParameterList)
-    Actions.popObjCTypeParamList(getCurScope(), typeParameterList);
-
   return ClsType;
 }
 
@@ -433,10 +444,9 @@ static void addContextSensitiveTypeNullability(Parser &P,
 ///
 /// \param rAngleLoc The location of the ending '>'.
 ObjCTypeParamList *Parser::parseObjCTypeParamListOrProtocolRefs(
-                         SourceLocation &lAngleLoc,
-                         SmallVectorImpl<IdentifierLocPair> &protocolIdents,
-                         SourceLocation &rAngleLoc,
-                         bool mayBeProtocolList) {
+    ObjCTypeParamListScope &Scope, SourceLocation &lAngleLoc,
+    SmallVectorImpl<IdentifierLocPair> &protocolIdents,
+    SourceLocation &rAngleLoc, bool mayBeProtocolList) {
   assert(Tok.is(tok::less) && "Not at the beginning of a type parameter list");
 
   // Within the type parameter list, don't treat '>' as an operator.
@@ -493,8 +503,7 @@ ObjCTypeParamList *Parser::parseObjCTypeParamListOrProtocolRefs(
       if (Tok.is(tok::code_completion)) {
         // FIXME: If these aren't protocol references, we'll need different
         // completions.
-        Actions.CodeCompleteObjCProtocolReferences(protocolIdents.data(),
-                                                   protocolIdents.size());
+        Actions.CodeCompleteObjCProtocolReferences(protocolIdents);
         cutOffParsing();
 
         // FIXME: Better recovery here?.
@@ -581,18 +590,19 @@ ObjCTypeParamList *Parser::parseObjCTypeParamListOrProtocolRefs(
     makeProtocolIdentsIntoTypeParameters();
   }
 
-  // Form the type parameter list.
+  // Form the type parameter list and enter its scope.
   ObjCTypeParamList *list = Actions.actOnObjCTypeParamList(
                               getCurScope(),
                               lAngleLoc,
                               typeParams,
                               rAngleLoc);
+  Scope.enter(list);
 
   // Clear out the angle locations; they're used by the caller to indicate
   // whether there are any protocol references.
   lAngleLoc = SourceLocation();
   rAngleLoc = SourceLocation();
-  return list;
+  return invalid ? nullptr : list;
 }
 
 /// Parse an objc-type-parameter-list.
@@ -600,8 +610,10 @@ ObjCTypeParamList *Parser::parseObjCTypeParamList() {
   SourceLocation lAngleLoc;
   SmallVector<IdentifierLocPair, 1> protocolIdents;
   SourceLocation rAngleLoc;
-  return parseObjCTypeParamListOrProtocolRefs(lAngleLoc, protocolIdents, 
-                                              rAngleLoc, 
+
+  ObjCTypeParamListScope Scope(Actions, getCurScope());
+  return parseObjCTypeParamListOrProtocolRefs(Scope, lAngleLoc, protocolIdents,
+                                              rAngleLoc,
                                               /*mayBeProtocolList=*/false);
 }
 
@@ -620,7 +632,6 @@ ObjCTypeParamList *Parser::parseObjCTypeParamList() {
 void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey, 
                                         Decl *CDecl) {
   SmallVector<Decl *, 32> allMethods;
-  SmallVector<Decl *, 16> allProperties;
   SmallVector<DeclGroupPtrTy, 8> allTUVariables;
   tok::ObjCKeywordKind MethodImplKind = tok::objc_not_keyword;
 
@@ -776,12 +787,9 @@ void Parser::ParseObjCInterfaceDeclList(tok::ObjCKeywordKind contextKey,
           SetterSel = SelectorTable::constructSetterSelector(
               PP.getIdentifierTable(), PP.getSelectorTable(),
               FD.D.getIdentifier());
-        bool isOverridingProperty = false;
         Decl *Property = Actions.ActOnProperty(
             getCurScope(), AtLoc, LParenLoc, FD, OCDS, GetterSel, SetterSel,
-            &isOverridingProperty, MethodImplKind);
-        if (!isOverridingProperty)
-          allProperties.push_back(Property);
+            MethodImplKind);
 
         FD.complete(Property);
       };
@@ -903,7 +911,7 @@ void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
 
       // getter/setter require extra treatment.
       unsigned DiagID = IsSetter ? diag::err_objc_expected_equal_for_setter :
-        diag::err_objc_expected_equal_for_getter;
+                                   diag::err_objc_expected_equal_for_getter;
 
       if (ExpectAndConsume(tok::equal, DiagID)) {
         SkipUntil(tok::r_paren, StopAtSemi);
@@ -918,7 +926,6 @@ void Parser::ParseObjCPropertyAttribute(ObjCDeclSpec &DS) {
         return cutOffParsing();
       }
 
-      
       SourceLocation SelLoc;
       IdentifierInfo *SelIdent = ParseObjCSelectorPiece(SelLoc);
 
@@ -1114,6 +1121,7 @@ IdentifierInfo *Parser::ParseObjCSelectorPiece(SourceLocation &SelectorLoc) {
   case tok::kw__Bool:
   case tok::kw__Complex:
   case tok::kw___alignof:
+  case tok::kw___auto_type:
     IdentifierInfo *II = Tok.getIdentifierInfo();
     SelectorLoc = ConsumeToken();
     return II;
@@ -1557,8 +1565,7 @@ ParseObjCProtocolReferences(SmallVectorImpl<Decl *> &Protocols,
 
   while (1) {
     if (Tok.is(tok::code_completion)) {
-      Actions.CodeCompleteObjCProtocolReferences(ProtocolIdents.data(), 
-                                                 ProtocolIdents.size());
+      Actions.CodeCompleteObjCProtocolReferences(ProtocolIdents);
       cutOffParsing();
       return true;
     }
@@ -1584,8 +1591,7 @@ ParseObjCProtocolReferences(SmallVectorImpl<Decl *> &Protocols,
 
   // Convert the list of protocols identifiers into a list of protocol decls.
   Actions.FindProtocolDeclaration(WarnOnDeclarations, ForObjCContainer,
-                                  &ProtocolIdents[0], ProtocolIdents.size(),
-                                  Protocols);
+                                  ProtocolIdents, Protocols);
   return false;
 }
 
@@ -1662,8 +1668,7 @@ void Parser::parseObjCTypeArgsOrProtocolQualifiers(
       if (!BaseT.isNull() && BaseT->acceptsObjCTypeParams()) {
         Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_Type);
       } else {
-        Actions.CodeCompleteObjCProtocolReferences(identifierLocPairs.data(),
-                                                   identifierLocPairs.size());
+        Actions.CodeCompleteObjCProtocolReferences(identifierLocPairs);
       }
       cutOffParsing();
       return;
@@ -2015,7 +2020,7 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
 
   if (TryConsumeToken(tok::semi)) { // forward declaration of one protocol.
     IdentifierLocPair ProtoInfo(protocolName, nameLoc);
-    return Actions.ActOnForwardProtocolDeclaration(AtLoc, &ProtoInfo, 1,
+    return Actions.ActOnForwardProtocolDeclaration(AtLoc, ProtoInfo,
                                                    attrs.getList());
   }
 
@@ -2044,9 +2049,7 @@ Parser::ParseObjCAtProtocolDeclaration(SourceLocation AtLoc,
     if (ExpectAndConsume(tok::semi, diag::err_expected_after, "@protocol"))
       return DeclGroupPtrTy();
 
-    return Actions.ActOnForwardProtocolDeclaration(AtLoc,
-                                                   &ProtocolRefs[0],
-                                                   ProtocolRefs.size(),
+    return Actions.ActOnForwardProtocolDeclaration(AtLoc, ProtocolRefs,
                                                    attrs.getList());
   }
 
@@ -2114,8 +2117,9 @@ Parser::ParseObjCAtImplementationDeclaration(SourceLocation AtLoc) {
     SourceLocation lAngleLoc, rAngleLoc;
     SmallVector<IdentifierLocPair, 8> protocolIdents;
     SourceLocation diagLoc = Tok.getLocation();
-    if (parseObjCTypeParamListOrProtocolRefs(lAngleLoc, protocolIdents, 
-                                             rAngleLoc)) {
+    ObjCTypeParamListScope typeParamScope(Actions, getCurScope());
+    if (parseObjCTypeParamListOrProtocolRefs(typeParamScope, lAngleLoc,
+                                             protocolIdents, rAngleLoc)) {
       Diag(diagLoc, diag::err_objc_parameterized_implementation)
         << SourceRange(diagLoc, PrevTokLocation);
     } else if (lAngleLoc.isValid()) {
@@ -2614,6 +2618,7 @@ void Parser::StashAwayMethodOrFunctionBodyTokens(Decl *MDecl) {
   }
   else if (Tok.is(tok::colon)) {
     ConsumeToken();
+    // FIXME: This is wrong, due to C++11 braced initialization.
     while (Tok.isNot(tok::l_brace)) {
       ConsumeAndStoreUntil(tok::l_paren, Toks, /*StopAtSemi=*/false);
       ConsumeAndStoreUntil(tok::r_paren, Toks, /*StopAtSemi=*/false);
@@ -3279,8 +3284,7 @@ ExprResult Parser::ParseObjCStringLiteral(SourceLocation AtLoc) {
     AtStrings.push_back(Lit.get());
   }
 
-  return Actions.ParseObjCStringLiteral(&AtLocs[0], AtStrings.data(),
-                                        AtStrings.size());
+  return Actions.ParseObjCStringLiteral(AtLocs.data(), AtStrings);
 }
 
 /// ParseObjCBooleanLiteral -
@@ -3431,7 +3435,7 @@ ExprResult Parser::ParseObjCDictionaryLiteral(SourceLocation AtLoc) {
   
   // Create the ObjCDictionaryLiteral.
   return Actions.BuildObjCDictionaryLiteral(SourceRange(AtLoc, EndLoc),
-                                            Elements.data(), Elements.size());
+                                            Elements);
 }
 
 ///    objc-encode-expression:
diff --git a/lib/Parse/ParseOpenMP.cpp b/lib/Parse/ParseOpenMP.cpp
index 0113a3157c25..078f4c388705 100644
--- a/lib/Parse/ParseOpenMP.cpp
+++ b/lib/Parse/ParseOpenMP.cpp
@@ -19,6 +19,7 @@
 #include "clang/Parse/Parser.h"
 #include "clang/Sema/Scope.h"
 #include "llvm/ADT/PointerIntPair.h"
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -32,15 +33,18 @@ static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) {
   const OpenMPDirectiveKind F[][3] = {
       {OMPD_unknown /*cancellation*/, OMPD_unknown /*point*/,
        OMPD_cancellation_point},
+      {OMPD_target, OMPD_unknown /*data*/, OMPD_target_data},
       {OMPD_for, OMPD_simd, OMPD_for_simd},
       {OMPD_parallel, OMPD_for, OMPD_parallel_for},
       {OMPD_parallel_for, OMPD_simd, OMPD_parallel_for_simd},
-      {OMPD_parallel, OMPD_sections, OMPD_parallel_sections}};
+      {OMPD_parallel, OMPD_sections, OMPD_parallel_sections},
+      {OMPD_taskloop, OMPD_simd, OMPD_taskloop_simd}};
   auto Tok = P.getCurToken();
   auto DKind =
       Tok.isAnnotation()
           ? OMPD_unknown
           : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
+
   bool TokenMatched = false;
   for (unsigned i = 0; i < llvm::array_lengthof(F); ++i) {
     if (!Tok.isAnnotation() && DKind == OMPD_unknown) {
@@ -50,18 +54,24 @@ static OpenMPDirectiveKind ParseOpenMPDirectiveKind(Parser &P) {
     } else {
       TokenMatched = DKind == F[i][0] && DKind != OMPD_unknown;
     }
+
     if (TokenMatched) {
       Tok = P.getPreprocessor().LookAhead(0);
+      auto TokenIsAnnotation = Tok.isAnnotation();
       auto SDKind =
-          Tok.isAnnotation()
+          TokenIsAnnotation
               ? OMPD_unknown
               : getOpenMPDirectiveKind(P.getPreprocessor().getSpelling(Tok));
-      if (!Tok.isAnnotation() && DKind == OMPD_unknown) {
+
+      if (!TokenIsAnnotation && SDKind == OMPD_unknown) {
         TokenMatched =
-            (i == 0) && !P.getPreprocessor().getSpelling(Tok).compare("point");
+            ((i == 0) &&
+             !P.getPreprocessor().getSpelling(Tok).compare("point")) ||
+            ((i == 1) && !P.getPreprocessor().getSpelling(Tok).compare("data"));
       } else {
         TokenMatched = SDKind == F[i][1] && SDKind != OMPD_unknown;
       }
+
       if (TokenMatched) {
         P.ConsumeToken();
         DKind = F[i][2];
@@ -127,6 +137,10 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
   case OMPD_teams:
   case OMPD_cancellation_point:
   case OMPD_cancel:
+  case OMPD_target_data:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_distribute:
     Diag(Tok, diag::err_omp_unexpected_directive)
         << getOpenMPDirectiveName(DKind);
     break;
@@ -146,8 +160,9 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirective() {
 ///         'section' | 'single' | 'master' | 'critical' [ '(' <name> ')' ] |
 ///         'parallel for' | 'parallel sections' | 'task' | 'taskyield' |
 ///         'barrier' | 'taskwait' | 'flush' | 'ordered' | 'atomic' |
-///         'for simd' | 'parallel for simd' | 'target' | 'teams' | 'taskgroup'
-///         {clause}
+///         'for simd' | 'parallel for simd' | 'target' | 'target data' |
+///         'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' {clause} |
+///         'distribute'
 ///         annot_pragma_openmp_end
 ///
 StmtResult
@@ -200,7 +215,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
   case OMPD_cancel:
     if (!StandAloneAllowed) {
       Diag(Tok, diag::err_omp_immediate_directive)
-          << getOpenMPDirectiveName(DKind);
+          << getOpenMPDirectiveName(DKind) << 0;
     }
     HasAssociatedStatement = false;
     // Fall through for further analysis.
@@ -221,7 +236,11 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
   case OMPD_atomic:
   case OMPD_target:
   case OMPD_teams:
-  case OMPD_taskgroup: {
+  case OMPD_taskgroup:
+  case OMPD_target_data:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_distribute: {
     ConsumeToken();
     // Parse directive name of the 'critical' directive if any.
     if (DKind == OMPD_critical) {
@@ -276,8 +295,19 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
     // Consume final annot_pragma_openmp_end.
     ConsumeToken();
 
+    // OpenMP [2.13.8, ordered Construct, Syntax]
+    // If the depend clause is specified, the ordered construct is a stand-alone
+    // directive.
+    if (DKind == OMPD_ordered && FirstClauses[OMPC_depend].getInt()) {
+      if (!StandAloneAllowed) {
+        Diag(Loc, diag::err_omp_immediate_directive)
+            << getOpenMPDirectiveName(DKind) << 1
+            << getOpenMPClauseName(OMPC_depend);
+      }
+      HasAssociatedStatement = false;
+    }
+
     StmtResult AssociatedStmt;
-    bool CreateDirective = true;
     if (HasAssociatedStatement) {
       // The body is a block scope like in Lambdas and Blocks.
       Sema::CompoundScopeRAII CompoundScope(Actions);
@@ -287,12 +317,10 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(bool StandAloneAllowed) {
       AssociatedStmt = ParseStatement();
       Actions.ActOnFinishOfCompoundStmt();
       AssociatedStmt = Actions.ActOnOpenMPRegionEnd(AssociatedStmt, Clauses);
-      CreateDirective = AssociatedStmt.isUsable();
     }
-    if (CreateDirective)
-      Directive = Actions.ActOnOpenMPExecutableDirective(
-          DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc,
-          EndLoc);
+    Directive = Actions.ActOnOpenMPExecutableDirective(
+        DKind, DirName, CancelRegion, Clauses, AssociatedStmt.get(), Loc,
+        EndLoc);
 
     // Exit scope.
     Actions.EndOpenMPDSABlock(Directive.get());
@@ -385,7 +413,10 @@ bool Parser::ParseOpenMPSimpleVarList(OpenMPDirectiveKind Kind,
 ///       lastprivate-clause | reduction-clause | proc_bind-clause |
 ///       schedule-clause | copyin-clause | copyprivate-clause | untied-clause |
 ///       mergeable-clause | flush-clause | read-clause | write-clause |
-///       update-clause | capture-clause | seq_cst-clause
+///       update-clause | capture-clause | seq_cst-clause | device-clause |
+///       simdlen-clause | threads-clause | simd-clause | num_teams-clause |
+///       thread_limit-clause | priority-clause | grainsize-clause |
+///       nogroup-clause | num_tasks-clause | hint-clause
 ///
 OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
                                      OpenMPClauseKind CKind, bool FirstClause) {
@@ -399,27 +430,49 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   }
 
   switch (CKind) {
-  case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
+  case OMPC_ordered:
+  case OMPC_device:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_num_tasks:
+  case OMPC_hint:
     // OpenMP [2.5, Restrictions]
-    //  At most one if clause can appear on the directive.
     //  At most one num_threads clause can appear on the directive.
     // OpenMP [2.8.1, simd construct, Restrictions]
     //  Only one safelen  clause can appear on a simd directive.
+    //  Only one simdlen  clause can appear on a simd directive.
     //  Only one collapse clause can appear on a simd directive.
+    // OpenMP [2.9.1, target data construct, Restrictions]
+    //  At most one device clause can appear on the directive.
     // OpenMP [2.11.1, task Construct, Restrictions]
     //  At most one if clause can appear on the directive.
     //  At most one final clause can appear on the directive.
+    // OpenMP [teams Construct, Restrictions]
+    //  At most one num_teams clause can appear on the directive.
+    //  At most one thread_limit clause can appear on the directive.
+    // OpenMP [2.9.1, task Construct, Restrictions]
+    // At most one priority clause can appear on the directive.
+    // OpenMP [2.9.2, taskloop Construct, Restrictions]
+    // At most one grainsize clause can appear on the directive.
+    // OpenMP [2.9.2, taskloop Construct, Restrictions]
+    // At most one num_tasks clause can appear on the directive.
     if (!FirstClause) {
-      Diag(Tok, diag::err_omp_more_one_clause) << getOpenMPDirectiveName(DKind)
-                                               << getOpenMPClauseName(CKind);
+      Diag(Tok, diag::err_omp_more_one_clause)
+          << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
       ErrorFound = true;
     }
 
-    Clause = ParseOpenMPSingleExprClause(CKind);
+    if (CKind == OMPC_ordered && PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
+      Clause = ParseOpenMPClause(CKind);
+    else
+      Clause = ParseOpenMPSingleExprClause(CKind);
     break;
   case OMPC_default:
   case OMPC_proc_bind:
@@ -429,8 +482,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     // OpenMP [2.5, parallel Construct, Restrictions]
     //  At most one proc_bind clause can appear on the directive.
     if (!FirstClause) {
-      Diag(Tok, diag::err_omp_more_one_clause) << getOpenMPDirectiveName(DKind)
-                                               << getOpenMPClauseName(CKind);
+      Diag(Tok, diag::err_omp_more_one_clause)
+          << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
       ErrorFound = true;
     }
 
@@ -440,14 +493,14 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
     // OpenMP [2.7.1, Restrictions, p. 3]
     //  Only one schedule clause can appear on a loop directive.
     if (!FirstClause) {
-      Diag(Tok, diag::err_omp_more_one_clause) << getOpenMPDirectiveName(DKind)
-                                               << getOpenMPClauseName(CKind);
+      Diag(Tok, diag::err_omp_more_one_clause)
+          << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
       ErrorFound = true;
     }
 
+  case OMPC_if:
     Clause = ParseOpenMPSingleExprWithArgClause(CKind);
     break;
-  case OMPC_ordered:
   case OMPC_nowait:
   case OMPC_untied:
   case OMPC_mergeable:
@@ -456,13 +509,16 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_nogroup:
     // OpenMP [2.7.1, Restrictions, p. 9]
     //  Only one ordered clause can appear on a loop directive.
     // OpenMP [2.7.1, Restrictions, C/C++, p. 4]
     //  Only one nowait clause can appear on a for directive.
     if (!FirstClause) {
-      Diag(Tok, diag::err_omp_more_one_clause) << getOpenMPDirectiveName(DKind)
-                                               << getOpenMPClauseName(CKind);
+      Diag(Tok, diag::err_omp_more_one_clause)
+          << getOpenMPDirectiveName(DKind) << getOpenMPClauseName(CKind) << 0;
       ErrorFound = true;
     }
 
@@ -479,7 +535,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_copyprivate:
   case OMPC_flush:
   case OMPC_depend:
-    Clause = ParseOpenMPVarListClause(CKind);
+  case OMPC_map:
+    Clause = ParseOpenMPVarListClause(DKind, CKind);
     break;
   case OMPC_unknown:
     Diag(Tok, diag::warn_omp_extra_tokens_at_eol)
@@ -495,12 +552,9 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   return ErrorFound ? nullptr : Clause;
 }
 
-/// \brief Parsing of OpenMP clauses with single expressions like 'if',
-/// 'final', 'collapse', 'safelen', 'num_threads', 'simdlen', 'num_teams' or
-/// 'thread_limit'.
-///
-///    if-clause:
-///      'if' '(' expression ')'
+/// \brief Parsing of OpenMP clauses with single expressions like 'final',
+/// 'collapse', 'safelen', 'num_threads', 'simdlen', 'num_teams',
+/// 'thread_limit', 'simdlen', 'priority', 'grainsize', 'num_tasks' or 'hint'.
 ///
 ///    final-clause:
 ///      'final' '(' expression ')'
@@ -511,9 +565,24 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
 ///    safelen-clause:
 ///      'safelen' '(' expression ')'
 ///
+///    simdlen-clause:
+///      'simdlen' '(' expression ')'
+///
 ///    collapse-clause:
 ///      'collapse' '(' expression ')'
 ///
+///    priority-clause:
+///      'priority' '(' expression ')'
+///
+///    grainsize-clause:
+///      'grainsize' '(' expression ')'
+///
+///    num_tasks-clause:
+///      'num_tasks' '(' expression ')'
+///
+///    hint-clause:
+///      'hint' '(' expression ')'
+///
 OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = ConsumeToken();
 
@@ -522,8 +591,10 @@ OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind) {
                          getOpenMPClauseName(Kind)))
     return nullptr;
 
+  SourceLocation ELoc = Tok.getLocation();
   ExprResult LHS(ParseCastExpression(false, false, NotTypeCast));
   ExprResult Val(ParseRHSOfBinaryExpression(LHS, prec::Conditional));
+  Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc);
 
   // Parse ')'.
   T.consumeClose();
@@ -583,6 +654,15 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind) {
 ///    read-clause:
 ///         'read'
 ///
+///    threads-clause:
+///         'threads'
+///
+///    simd-clause:
+///         'simd'
+///
+///    nogroup-clause:
+///         'nogroup'
+///
 OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = Tok.getLocation();
   ConsumeAnyToken();
@@ -595,11 +675,15 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPClauseKind Kind) {
 /// argument like 'schedule' or 'dist_schedule'.
 ///
 ///    schedule-clause:
-///      'schedule' '(' kind [',' expression ] ')'
+///      'schedule' '(' [ modifier [ ',' modifier ] ':' ] kind [',' expression ]
+///      ')'
+///
+///    if-clause:
+///      'if' '(' [ directive-name-modifier ':' ] expression ')'
 ///
 OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
   SourceLocation Loc = ConsumeToken();
-  SourceLocation CommaLoc;
+  SourceLocation DelimLoc;
   // Parse '('.
   BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
   if (T.expectAndConsume(diag::err_expected_lparen_after,
@@ -607,29 +691,86 @@ OMPClause *Parser::ParseOpenMPSingleExprWithArgClause(OpenMPClauseKind Kind) {
     return nullptr;
 
   ExprResult Val;
-  unsigned Type = getOpenMPSimpleClauseType(
-      Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok));
-  SourceLocation KLoc = Tok.getLocation();
-  if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
-      Tok.isNot(tok::annot_pragma_openmp_end))
-    ConsumeAnyToken();
+  SmallVector<unsigned, 4> Arg;
+  SmallVector<SourceLocation, 4> KLoc;
+  if (Kind == OMPC_schedule) {
+    enum { Modifier1, Modifier2, ScheduleKind, NumberOfElements };
+    Arg.resize(NumberOfElements);
+    KLoc.resize(NumberOfElements);
+    Arg[Modifier1] = OMPC_SCHEDULE_MODIFIER_unknown;
+    Arg[Modifier2] = OMPC_SCHEDULE_MODIFIER_unknown;
+    Arg[ScheduleKind] = OMPC_SCHEDULE_unknown;
+    auto KindModifier = getOpenMPSimpleClauseType(
+        Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok));
+    if (KindModifier > OMPC_SCHEDULE_unknown) {
+      // Parse 'modifier'
+      Arg[Modifier1] = KindModifier;
+      KLoc[Modifier1] = Tok.getLocation();
+      if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+          Tok.isNot(tok::annot_pragma_openmp_end))
+        ConsumeAnyToken();
+      if (Tok.is(tok::comma)) {
+        // Parse ',' 'modifier'
+        ConsumeAnyToken();
+        KindModifier = getOpenMPSimpleClauseType(
+            Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok));
+        Arg[Modifier2] = KindModifier > OMPC_SCHEDULE_unknown
+                             ? KindModifier
+                             : (unsigned)OMPC_SCHEDULE_unknown;
+        KLoc[Modifier2] = Tok.getLocation();
+        if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+            Tok.isNot(tok::annot_pragma_openmp_end))
+          ConsumeAnyToken();
+      }
+      // Parse ':'
+      if (Tok.is(tok::colon))
+        ConsumeAnyToken();
+      else
+        Diag(Tok, diag::warn_pragma_expected_colon) << "schedule modifier";
+      KindModifier = getOpenMPSimpleClauseType(
+          Kind, Tok.isAnnotation() ? "" : PP.getSpelling(Tok));
+    }
+    Arg[ScheduleKind] = KindModifier;
+    KLoc[ScheduleKind] = Tok.getLocation();
+    if (Tok.isNot(tok::r_paren) && Tok.isNot(tok::comma) &&
+        Tok.isNot(tok::annot_pragma_openmp_end))
+      ConsumeAnyToken();
+    if ((Arg[ScheduleKind] == OMPC_SCHEDULE_static ||
+         Arg[ScheduleKind] == OMPC_SCHEDULE_dynamic ||
+         Arg[ScheduleKind] == OMPC_SCHEDULE_guided) &&
+        Tok.is(tok::comma))
+      DelimLoc = ConsumeAnyToken();
+  } else {
+    assert(Kind == OMPC_if);
+    KLoc.push_back(Tok.getLocation());
+    Arg.push_back(ParseOpenMPDirectiveKind(*this));
+    if (Arg.back() != OMPD_unknown) {
+      ConsumeToken();
+      if (Tok.is(tok::colon))
+        DelimLoc = ConsumeToken();
+      else
+        Diag(Tok, diag::warn_pragma_expected_colon)
+            << "directive name modifier";
+    }
+  }
 
-  if (Kind == OMPC_schedule &&
-      (Type == OMPC_SCHEDULE_static || Type == OMPC_SCHEDULE_dynamic ||
-       Type == OMPC_SCHEDULE_guided) &&
-      Tok.is(tok::comma)) {
-    CommaLoc = ConsumeAnyToken();
+  bool NeedAnExpression =
+      (Kind == OMPC_schedule && DelimLoc.isValid()) || Kind == OMPC_if;
+  if (NeedAnExpression) {
+    SourceLocation ELoc = Tok.getLocation();
     ExprResult LHS(ParseCastExpression(false, false, NotTypeCast));
     Val = ParseRHSOfBinaryExpression(LHS, prec::Conditional);
-    if (Val.isInvalid())
-      return nullptr;
+    Val = Actions.ActOnFinishFullExpr(Val.get(), ELoc);
   }
 
   // Parse ')'.
   T.consumeClose();
 
+  if (NeedAnExpression && Val.isInvalid())
+    return nullptr;
+
   return Actions.ActOnOpenMPSingleExprWithArgClause(
-      Kind, Type, Val.get(), Loc, T.getOpenLocation(), KLoc, CommaLoc,
+      Kind, Arg, Val.get(), Loc, T.getOpenLocation(), KLoc, DelimLoc,
       T.getCloseLocation());
 }
 
@@ -691,7 +832,7 @@ static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
 ///    shared-clause:
 ///       'shared' '(' list ')'
 ///    linear-clause:
-///       'linear' '(' list [ ':' linear-step ] ')'
+///       'linear' '(' linear-list [ ':' linear-step ] ')'
 ///    aligned-clause:
 ///       'aligned' '(' list [ ':' alignment ] ')'
 ///    reduction-clause:
@@ -701,9 +842,17 @@ static bool ParseReductionId(Parser &P, CXXScopeSpec &ReductionIdScopeSpec,
 ///    flush-clause:
 ///       'flush' '(' list ')'
 ///    depend-clause:
-///       'depend' '(' in | out | inout : list ')'
+///       'depend' '(' in | out | inout : list | source ')'
+///    map-clause:
+///       'map' '(' [ [ always , ]
+///          to | from | tofrom | alloc | release | delete ':' ] list ')';
 ///
-OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
+/// For 'linear' clause linear-list may have the following forms:
+///  list
+///  modifier(list)
+/// where modifier is 'val' (C) or 'ref', 'val' or 'uval'(C++).
+OMPClause *Parser::ParseOpenMPVarListClause(OpenMPDirectiveKind DKind,
+                                            OpenMPClauseKind Kind) {
   SourceLocation Loc = Tok.getLocation();
   SourceLocation LOpen = ConsumeToken();
   SourceLocation ColonLoc = SourceLocation();
@@ -712,7 +861,14 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
   UnqualifiedId ReductionId;
   bool InvalidReductionId = false;
   OpenMPDependClauseKind DepKind = OMPC_DEPEND_unknown;
-  SourceLocation DepLoc;
+  // OpenMP 4.1 [2.15.3.7, linear Clause]
+  //  If no modifier is specified it is assumed to be val.
+  OpenMPLinearClauseKind LinearModifier = OMPC_LINEAR_val;
+  OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
+  OpenMPMapClauseKind MapTypeModifier = OMPC_MAP_unknown;
+  bool MapTypeModifierSpecified = false;
+  bool UnexpectedId = false;
+  SourceLocation DepLinMapLoc;
 
   // Parse '('.
   BalancedDelimiterTracker T(*this, tok::l_paren, tok::annot_pragma_openmp_end);
@@ -720,6 +876,9 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
                          getOpenMPClauseName(Kind)))
     return nullptr;
 
+  bool NeedRParenForLinear = false;
+  BalancedDelimiterTracker LinearT(*this, tok::l_paren,
+                                  tok::annot_pragma_openmp_end);
   // Handle reduction-identifier for reduction clause.
   if (Kind == OMPC_reduction) {
     ColonProtectionRAIIObject ColonRAII(*this);
@@ -742,25 +901,109 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
     ColonProtectionRAIIObject ColonRAII(*this);
     DepKind = static_cast<OpenMPDependClauseKind>(getOpenMPSimpleClauseType(
         Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
-    DepLoc = Tok.getLocation();
+    DepLinMapLoc = Tok.getLocation();
 
     if (DepKind == OMPC_DEPEND_unknown) {
       SkipUntil(tok::colon, tok::r_paren, tok::annot_pragma_openmp_end,
                 StopBeforeMatch);
     } else {
       ConsumeToken();
+      // Special processing for depend(source) clause.
+      if (DKind == OMPD_ordered && DepKind == OMPC_DEPEND_source) {
+        // Parse ')'.
+        T.consumeClose();
+        return Actions.ActOnOpenMPVarListClause(
+            Kind, llvm::None, /*TailExpr=*/nullptr, Loc, LOpen,
+            /*ColonLoc=*/SourceLocation(), Tok.getLocation(),
+            ReductionIdScopeSpec, DeclarationNameInfo(), DepKind,
+            LinearModifier, MapTypeModifier, MapType, DepLinMapLoc);
+      }
     }
     if (Tok.is(tok::colon)) {
       ColonLoc = ConsumeToken();
     } else {
-      Diag(Tok, diag::warn_pragma_expected_colon) << "dependency type";
+      Diag(Tok, DKind == OMPD_ordered ? diag::warn_pragma_expected_colon_r_paren
+                                      : diag::warn_pragma_expected_colon)
+          << "dependency type";
+    }
+  } else if (Kind == OMPC_linear) {
+    // Try to parse modifier if any.
+    if (Tok.is(tok::identifier) && PP.LookAhead(0).is(tok::l_paren)) {
+      LinearModifier = static_cast<OpenMPLinearClauseKind>(
+          getOpenMPSimpleClauseType(Kind, PP.getSpelling(Tok)));
+      DepLinMapLoc = ConsumeToken();
+      LinearT.consumeOpen();
+      NeedRParenForLinear = true;
+    }
+  } else if (Kind == OMPC_map) {
+    // Handle map type for map clause.
+    ColonProtectionRAIIObject ColonRAII(*this);
+
+    // the first identifier may be a list item, a map-type or
+    //   a map-type-modifier
+    MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
+        Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+    DepLinMapLoc = Tok.getLocation();
+    bool ColonExpected = false;
+
+    if (Tok.is(tok::identifier)) {
+      if (PP.LookAhead(0).is(tok::colon)) {
+        MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
+            Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+        if (MapType == OMPC_MAP_unknown) {
+          Diag(Tok, diag::err_omp_unknown_map_type);
+        } else if (MapType == OMPC_MAP_always) {
+          Diag(Tok, diag::err_omp_map_type_missing);
+        }
+        ConsumeToken();
+      } else if (PP.LookAhead(0).is(tok::comma)) {
+        if (PP.LookAhead(1).is(tok::identifier) &&
+            PP.LookAhead(2).is(tok::colon)) {
+          MapTypeModifier =
+              static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
+                   Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+          if (MapTypeModifier != OMPC_MAP_always) {
+            Diag(Tok, diag::err_omp_unknown_map_type_modifier);
+            MapTypeModifier = OMPC_MAP_unknown;
+          } else {
+            MapTypeModifierSpecified = true;
+          }
+
+          ConsumeToken();
+          ConsumeToken();
+
+          MapType = static_cast<OpenMPMapClauseKind>(getOpenMPSimpleClauseType(
+              Kind, Tok.is(tok::identifier) ? PP.getSpelling(Tok) : ""));
+          if (MapType == OMPC_MAP_unknown || MapType == OMPC_MAP_always) {
+            Diag(Tok, diag::err_omp_unknown_map_type);
+          }
+          ConsumeToken();
+        } else {
+          MapType = OMPC_MAP_tofrom;
+        }
+      } else {
+        MapType = OMPC_MAP_tofrom;
+      }
+    } else {
+      UnexpectedId = true;
+    }
+
+    if (Tok.is(tok::colon)) {
+      ColonLoc = ConsumeToken();
+    } else if (ColonExpected) {
+      Diag(Tok, diag::warn_pragma_expected_colon) << "map type";
     }
   }
 
   SmallVector<Expr *, 5> Vars;
-  bool IsComma = ((Kind != OMPC_reduction) && (Kind != OMPC_depend)) ||
-                 ((Kind == OMPC_reduction) && !InvalidReductionId) ||
-                 ((Kind == OMPC_depend) && DepKind != OMPC_DEPEND_unknown);
+  bool IsComma =
+      ((Kind != OMPC_reduction) && (Kind != OMPC_depend) &&
+       (Kind != OMPC_map)) ||
+      ((Kind == OMPC_reduction) && !InvalidReductionId) ||
+      ((Kind == OMPC_map) && (UnexpectedId || MapType != OMPC_MAP_unknown) &&
+       (!MapTypeModifierSpecified ||
+        (MapTypeModifierSpecified && MapTypeModifier == OMPC_MAP_always))) ||
+      ((Kind == OMPC_depend) && DepKind != OMPC_DEPEND_unknown);
   const bool MayHaveTail = (Kind == OMPC_linear || Kind == OMPC_aligned);
   while (IsComma || (Tok.isNot(tok::r_paren) && Tok.isNot(tok::colon) &&
                      Tok.isNot(tok::annot_pragma_openmp_end))) {
@@ -787,14 +1030,18 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
           << (Kind == OMPC_flush);
   }
 
+  // Parse ')' for linear clause with modifier.
+  if (NeedRParenForLinear)
+    LinearT.consumeClose();
+
   // Parse ':' linear-step (or ':' alignment).
   Expr *TailExpr = nullptr;
   const bool MustHaveTail = MayHaveTail && Tok.is(tok::colon);
   if (MustHaveTail) {
     ColonLoc = Tok.getLocation();
-    ConsumeToken();
-    ExprResult Tail =
-        Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression());
+    SourceLocation ELoc = ConsumeToken();
+    ExprResult Tail = ParseAssignmentExpression();
+    Tail = Actions.ActOnFinishFullExpr(Tail.get(), ELoc);
     if (Tail.isUsable())
       TailExpr = Tail.get();
     else
@@ -806,14 +1053,16 @@ OMPClause *Parser::ParseOpenMPVarListClause(OpenMPClauseKind Kind) {
   T.consumeClose();
   if ((Kind == OMPC_depend && DepKind != OMPC_DEPEND_unknown && Vars.empty()) ||
       (Kind != OMPC_depend && Vars.empty()) || (MustHaveTail && !TailExpr) ||
-      InvalidReductionId)
+      (Kind == OMPC_map && MapType == OMPC_MAP_unknown) ||
+      InvalidReductionId) {
     return nullptr;
+  }
 
   return Actions.ActOnOpenMPVarListClause(
       Kind, Vars, TailExpr, Loc, LOpen, ColonLoc, Tok.getLocation(),
       ReductionIdScopeSpec,
       ReductionId.isValid() ? Actions.GetNameFromUnqualifiedId(ReductionId)
                             : DeclarationNameInfo(),
-      DepKind, DepLoc);
+      DepKind, LinearModifier, MapTypeModifier, MapType, DepLinMapLoc);
 }
 
diff --git a/lib/Parse/ParsePragma.cpp b/lib/Parse/ParsePragma.cpp
index 892d3c6a52ce..4430eb8d03da 100644
--- a/lib/Parse/ParsePragma.cpp
+++ b/lib/Parse/ParsePragma.cpp
@@ -156,6 +156,10 @@ struct PragmaUnrollHintHandler : public PragmaHandler {
                     Token &FirstToken) override;
 };
 
+struct PragmaMSRuntimeChecksHandler : public EmptyPragmaHandler {
+  PragmaMSRuntimeChecksHandler() : EmptyPragmaHandler("runtime_checks") {}
+};
+
 }  // end namespace
 
 void Parser::initializePragmaHandlers() {
@@ -222,6 +226,8 @@ void Parser::initializePragmaHandlers() {
     PP.AddPragmaHandler(MSCodeSeg.get());
     MSSection.reset(new PragmaMSPragma("section"));
     PP.AddPragmaHandler(MSSection.get());
+    MSRuntimeChecks.reset(new PragmaMSRuntimeChecksHandler());
+    PP.AddPragmaHandler(MSRuntimeChecks.get());
   }
 
   OptimizeHandler.reset(new PragmaOptimizeHandler(Actions));
@@ -288,6 +294,8 @@ void Parser::resetPragmaHandlers() {
     MSCodeSeg.reset();
     PP.RemovePragmaHandler(MSSection.get());
     MSSection.reset();
+    PP.RemovePragmaHandler(MSRuntimeChecks.get());
+    MSRuntimeChecks.reset();
   }
 
   PP.RemovePragmaHandler("STDC", FPContractHandler.get());
@@ -326,6 +334,7 @@ void Parser::HandlePragmaVisibility() {
   Actions.ActOnPragmaVisibility(VisType, VisLoc);
 }
 
+namespace {
 struct PragmaPackInfo {
   Sema::PragmaPackKind Kind;
   IdentifierInfo *Name;
@@ -333,6 +342,7 @@ struct PragmaPackInfo {
   SourceLocation LParenLoc;
   SourceLocation RParenLoc;
 };
+} // end anonymous namespace
 
 void Parser::HandlePragmaPack() {
   assert(Tok.is(tok::annot_pragma_pack));
@@ -742,13 +752,13 @@ bool Parser::HandlePragmaMSInitSeg(StringRef PragmaName,
   return true;
 }
 
+namespace {
 struct PragmaLoopHintInfo {
   Token PragmaName;
   Token Option;
-  Token *Toks;
-  size_t TokSize;
-  PragmaLoopHintInfo() : Toks(nullptr), TokSize(0) {}
+  ArrayRef<Token> Toks;
 };
+} // end anonymous namespace
 
 static std::string PragmaLoopHintString(Token PragmaName, Token Option) {
   std::string PragmaString;
@@ -780,8 +790,8 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
   Hint.OptionLoc = IdentifierLoc::create(
       Actions.Context, Info->Option.getLocation(), OptionInfo);
 
-  Token *Toks = Info->Toks;
-  size_t TokSize = Info->TokSize;
+  const Token *Toks = Info->Toks.data();
+  size_t TokSize = Info->Toks.size();
 
   // Return a valid hint if pragma unroll or nounroll were specified
   // without an argument.
@@ -824,10 +834,9 @@ bool Parser::HandlePragmaLoopHint(LoopHint &Hint) {
     SourceLocation StateLoc = Toks[0].getLocation();
     IdentifierInfo *StateInfo = Toks[0].getIdentifierInfo();
     if (!StateInfo ||
-        ((OptionUnroll ? !StateInfo->isStr("full")
-                       : !StateInfo->isStr("enable") &&
-                             !StateInfo->isStr("assume_safety")) &&
-         !StateInfo->isStr("disable"))) {
+        (!StateInfo->isStr("enable") && !StateInfo->isStr("disable") &&
+         ((OptionUnroll && !StateInfo->isStr("full")) ||
+          (!OptionUnroll && !StateInfo->isStr("assume_safety"))))) {
       Diag(Toks[0].getLocation(), diag::err_pragma_invalid_keyword)
           << /*FullKeyword=*/OptionUnroll;
       return false;
@@ -1928,11 +1937,7 @@ static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName,
   EOFTok.setLocation(Tok.getLocation());
   ValueList.push_back(EOFTok); // Terminates expression for parsing.
 
-  Token *TokenArray = (Token *)PP.getPreprocessorAllocator().Allocate(
-      ValueList.size() * sizeof(Token), llvm::alignOf<Token>());
-  std::copy(ValueList.begin(), ValueList.end(), TokenArray);
-  Info.Toks = TokenArray;
-  Info.TokSize = ValueList.size();
+  Info.Toks = llvm::makeArrayRef(ValueList).copy(PP.getPreprocessorAllocator());
 
   Info.PragmaName = PragmaName;
   Info.Option = Option;
@@ -1959,8 +1964,9 @@ static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName,
 ///    'assume_safety'
 ///
 ///  unroll-hint-keyword:
-///    'full'
+///    'enable'
 ///    'disable'
+///    'full'
 ///
 ///  loop-hint-value:
 ///    constant-expression
@@ -1976,10 +1982,13 @@ static bool ParseLoopHintValue(Preprocessor &PP, Token &Tok, Token PragmaName,
 /// only works on inner loops.
 ///
 /// The unroll and unroll_count directives control the concatenation
-/// unroller. Specifying unroll(full) instructs llvm to try to
-/// unroll the loop completely, and unroll(disable) disables unrolling
-/// for the loop. Specifying unroll_count(_value_) instructs llvm to
-/// try to unroll the loop the number of times indicated by the value.
+/// unroller. Specifying unroll(enable) instructs llvm to unroll the loop
+/// completely if the trip count is known at compile time and unroll partially
+/// if the trip count is not known.  Specifying unroll(full) is similar to
+/// unroll(enable) but will unroll the loop only if the trip count is known at
+/// compile time.  Specifying unroll(disable) disables unrolling for the
+/// loop. Specifying unroll_count(_value_) instructs llvm to try to unroll the
+/// loop the number of times indicated by the value.
 void PragmaLoopHintHandler::HandlePragma(Preprocessor &PP,
                                          PragmaIntroducerKind Introducer,
                                          Token &Tok) {
diff --git a/lib/Parse/ParseStmt.cpp b/lib/Parse/ParseStmt.cpp
index b658cef234ec..717bcff0c168 100644
--- a/lib/Parse/ParseStmt.cpp
+++ b/lib/Parse/ParseStmt.cpp
@@ -259,6 +259,10 @@ Retry:
     Res = ParseReturnStatement();
     SemiError = "return";
     break;
+  case tok::kw_co_return:            // C++ Coroutines: co_return statement
+    Res = ParseReturnStatement();
+    SemiError = "co_return";
+    break;
 
   case tok::kw_asm: {
     ProhibitAttributes(Attrs);
@@ -354,6 +358,11 @@ Retry:
     HandlePragmaMSPragma();
     return StmtEmpty();
 
+  case tok::annot_pragma_ms_vtordisp:
+    ProhibitAttributes(Attrs);
+    HandlePragmaMSVtorDisp();
+    return StmtEmpty();
+
   case tok::annot_pragma_loop_hint:
     ProhibitAttributes(Attrs);
     return ParsePragmaLoopHint(Stmts, OnlyStatement, TrailingElseLoc, Attrs);
@@ -881,6 +890,9 @@ void Parser::ParseCompoundStatementLeadingPragmas() {
     case tok::annot_pragma_ms_pragma:
       HandlePragmaMSPragma();
       break;
+    case tok::annot_pragma_ms_vtordisp:
+      HandlePragmaMSVtorDisp();
+      break;
     default:
       checkForPragmas = false;
       break;
@@ -944,7 +956,8 @@ StmtResult Parser::ParseCompoundStatementBody(bool isStmtExpr) {
       Stmts.push_back(R.get());
   }
 
-  while (Tok.isNot(tok::r_brace) && !isEofOrEom()) {
+  while (!tryParseMisplacedModuleImport() && Tok.isNot(tok::r_brace) &&
+         Tok.isNot(tok::eof)) {
     if (Tok.is(tok::annot_pragma_unused)) {
       HandlePragmaUnused();
       continue;
@@ -1442,7 +1455,10 @@ bool Parser::isForRangeIdentifier() {
 ///         'for' '(' declaration expr[opt] ';' expr[opt] ')' statement
 /// [C++]   'for' '(' for-init-statement condition[opt] ';' expression[opt] ')'
 /// [C++]       statement
-/// [C++0x] 'for' '(' for-range-declaration : for-range-initializer ) statement
+/// [C++0x] 'for'
+///             'co_await'[opt]    [Coroutines]
+///             '(' for-range-declaration ':' for-range-initializer ')'
+///             statement
 /// [OBJC2] 'for' '(' declaration 'in' expr ')' statement
 /// [OBJC2] 'for' '(' expr 'in' expr ')' statement
 ///
@@ -1459,6 +1475,10 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
   assert(Tok.is(tok::kw_for) && "Not a for stmt!");
   SourceLocation ForLoc = ConsumeToken();  // eat the 'for'.
 
+  SourceLocation CoawaitLoc;
+  if (Tok.is(tok::kw_co_await))
+    CoawaitLoc = ConsumeToken();
+
   if (Tok.isNot(tok::l_paren)) {
     Diag(Tok, diag::err_expected_lparen_after) << "for";
     SkipUntil(tok::semi);
@@ -1665,6 +1685,13 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
   // Match the ')'.
   T.consumeClose();
 
+  // C++ Coroutines [stmt.iter]:
+  //   'co_await' can only be used for a range-based for statement.
+  if (CoawaitLoc.isValid() && !ForRange) {
+    Diag(CoawaitLoc, diag::err_for_co_await_not_range_for);
+    CoawaitLoc = SourceLocation();
+  }
+
   // We need to perform most of the semantic analysis for a C++0x for-range
   // statememt before parsing the body, in order to be able to deduce the type
   // of an auto-typed loop variable.
@@ -1672,12 +1699,10 @@ StmtResult Parser::ParseForStatement(SourceLocation *TrailingElseLoc) {
   StmtResult ForEachStmt;
 
   if (ForRange) {
-    ForRangeStmt = Actions.ActOnCXXForRangeStmt(ForLoc, FirstPart.get(),
-                                                ForRangeInit.ColonLoc,
-                                                ForRangeInit.RangeExpr.get(),
-                                                T.getCloseLocation(),
-                                                Sema::BFRK_Build);
-
+    ForRangeStmt = Actions.ActOnCXXForRangeStmt(
+        getCurScope(), ForLoc, CoawaitLoc, FirstPart.get(),
+        ForRangeInit.ColonLoc, ForRangeInit.RangeExpr.get(),
+        T.getCloseLocation(), Sema::BFRK_Build);
 
   // Similarly, we need to do the semantic analysis for a for-range
   // statement immediately in order to close over temporaries correctly.
@@ -1799,13 +1824,19 @@ StmtResult Parser::ParseBreakStatement() {
 /// ParseReturnStatement
 ///       jump-statement:
 ///         'return' expression[opt] ';'
+///         'return' braced-init-list ';'
+///         'co_return' expression[opt] ';'
+///         'co_return' braced-init-list ';'
 StmtResult Parser::ParseReturnStatement() {
-  assert(Tok.is(tok::kw_return) && "Not a return stmt!");
+  assert((Tok.is(tok::kw_return) || Tok.is(tok::kw_co_return)) &&
+         "Not a return stmt!");
+  bool IsCoreturn = Tok.is(tok::kw_co_return);
   SourceLocation ReturnLoc = ConsumeToken();  // eat the 'return'.
 
   ExprResult R;
   if (Tok.isNot(tok::semi)) {
-    if (Tok.is(tok::code_completion)) {
+    // FIXME: Code completion for co_return.
+    if (Tok.is(tok::code_completion) && !IsCoreturn) {
       Actions.CodeCompleteReturn(getCurScope());
       cutOffParsing();
       return StmtError();
@@ -1825,6 +1856,8 @@ StmtResult Parser::ParseReturnStatement() {
       return StmtError();
     }
   }
+  if (IsCoreturn)
+    return Actions.ActOnCoreturnStmt(ReturnLoc, R.get());
   return Actions.ActOnReturnStmt(ReturnLoc, R.get(), getCurScope());
 }
 
@@ -1870,6 +1903,11 @@ Decl *Parser::ParseFunctionStatementBody(Decl *Decl, ParseScope &BodyScope) {
   PrettyDeclStackTraceEntry CrashInfo(Actions, Decl, LBraceLoc,
                                       "parsing function body");
 
+  // Save and reset current vtordisp stack if we have entered a C++ method body.
+  bool IsCXXMethod =
+      getLangOpts().CPlusPlus && Decl && isa<CXXMethodDecl>(Decl);
+  Sema::VtorDispStackRAII SavedVtorDispStack(Actions, IsCXXMethod);
+
   // Do not enter a scope for the brace, as the arguments are in the same scope
   // (the function body) as the body itself.  Instead, just read the statement
   // list and put it into a CompoundStmt for safe keeping.
@@ -1909,6 +1947,11 @@ Decl *Parser::ParseFunctionTryBlock(Decl *Decl, ParseScope &BodyScope) {
     return Actions.ActOnSkippedFunctionBody(Decl);
   }
 
+  // Save and reset current vtordisp stack if we have entered a C++ method body.
+  bool IsCXXMethod =
+      getLangOpts().CPlusPlus && Decl && isa<CXXMethodDecl>(Decl);
+  Sema::VtorDispStackRAII SavedVtorDispStack(Actions, IsCXXMethod);
+
   SourceLocation LBraceLoc = Tok.getLocation();
   StmtResult FnBody(ParseCXXTryBlockCommon(TryLoc, /*FnTry*/true));
   // If we failed to parse the try-catch, we just give the function an empty
diff --git a/lib/Parse/ParseStmtAsm.cpp b/lib/Parse/ParseStmtAsm.cpp
index 8cdae6a74b20..f469a064f896 100644
--- a/lib/Parse/ParseStmtAsm.cpp
+++ b/lib/Parse/ParseStmtAsm.cpp
@@ -215,12 +215,36 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
   // Require an identifier here.
   SourceLocation TemplateKWLoc;
   UnqualifiedId Id;
-  bool Invalid =
-      ParseUnqualifiedId(SS,
-                         /*EnteringContext=*/false,
-                         /*AllowDestructorName=*/false,
-                         /*AllowConstructorName=*/false,
-                         /*ObjectType=*/ParsedType(), TemplateKWLoc, Id);
+  bool Invalid = true;
+  ExprResult Result;
+  if (Tok.is(tok::kw_this)) {
+    Result = ParseCXXThis();
+    Invalid = false;
+  } else {
+    Invalid =
+        ParseUnqualifiedId(SS,
+                           /*EnteringContext=*/false,
+                           /*AllowDestructorName=*/false,
+                           /*AllowConstructorName=*/false,
+                           /*ObjectType=*/ParsedType(), TemplateKWLoc, Id);
+    // Perform the lookup.
+    Result = Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, Info,
+                                               IsUnevaluatedContext);
+  }
+  // While the next two tokens are 'period' 'identifier', repeatedly parse it as
+  // a field access. We have to avoid consuming assembler directives that look
+  // like '.' 'else'.
+  while (Result.isUsable() && Tok.is(tok::period)) {
+    Token IdTok = PP.LookAhead(0);
+    if (IdTok.isNot(tok::identifier))
+      break;
+    ConsumeToken(); // Consume the period.
+    IdentifierInfo *Id = Tok.getIdentifierInfo();
+    ConsumeToken(); // Consume the identifier.
+    unsigned OffsetUnused;
+    Result = Actions.LookupInlineAsmVarDeclField(
+        Result.get(), Id->getName(), OffsetUnused, Info, Tok.getLocation());
+  }
 
   // Figure out how many tokens we are into LineToks.
   unsigned LineIndex = 0;
@@ -254,9 +278,7 @@ ExprResult Parser::ParseMSAsmIdentifier(llvm::SmallVectorImpl<Token> &LineToks,
   LineToks.pop_back();
   LineToks.pop_back();
 
-  // Perform the lookup.
-  return Actions.LookupInlineAsmIdentifier(SS, TemplateKWLoc, Id, Info,
-                                           IsUnevaluatedContext);
+  return Result;
 }
 
 /// Turn a sequence of our tokens back into a string that we can hand
diff --git a/lib/Parse/ParseTemplate.cpp b/lib/Parse/ParseTemplate.cpp
index 3a964dd20528..a4dcdb1e2a09 100644
--- a/lib/Parse/ParseTemplate.cpp
+++ b/lib/Parse/ParseTemplate.cpp
@@ -126,8 +126,7 @@ Parser::ParseTemplateDeclarationOrSpecialization(unsigned Context,
       Actions.ActOnTemplateParameterList(CurTemplateDepthTracker.getDepth(), 
                                          ExportLoc,
                                          TemplateLoc, LAngleLoc,
-                                         TemplateParams.data(),
-                                         TemplateParams.size(), RAngleLoc));
+                                         TemplateParams, RAngleLoc));
 
     if (!TemplateParams.empty()) {
       isSpecialization = false;
@@ -280,8 +279,8 @@ Parser::ParseSingleDeclarationAfterTemplate(
         // Recover as if it were an explicit specialization.
         TemplateParameterLists FakedParamLists;
         FakedParamLists.push_back(Actions.ActOnTemplateParameterList(
-            0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, nullptr,
-            0, LAngleLoc));
+            0, SourceLocation(), TemplateInfo.TemplateLoc, LAngleLoc, None,
+            LAngleLoc));
 
         return ParseFunctionDefinition(
             DeclaratorInfo, ParsedTemplateInfo(&FakedParamLists,
@@ -631,8 +630,7 @@ Parser::ParseTemplateTemplateParameter(unsigned Depth, unsigned Position) {
   TemplateParameterList *ParamList =
     Actions.ActOnTemplateParameterList(Depth, SourceLocation(),
                                        TemplateLoc, LAngleLoc,
-                                       TemplateParams.data(),
-                                       TemplateParams.size(),
+                                       TemplateParams,
                                        RAngleLoc);
 
   // Grab a default argument (if available).
@@ -695,7 +693,8 @@ Parser::ParseNonTypeTemplateParameter(unsigned Depth, unsigned Position) {
     //   end of the template-parameter-list rather than a greater-than
     //   operator.
     GreaterThanIsOperatorScope G(GreaterThanIsOperator, false);
-    EnterExpressionEvaluationContext Unevaluated(Actions, Sema::Unevaluated);
+    EnterExpressionEvaluationContext ConstantEvaluated(Actions,
+                                                       Sema::ConstantEvaluated);
 
     DefaultArg = Actions.CorrectDelayedTyposInExpr(ParseAssignmentExpression());
     if (DefaultArg.isInvalid())
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index 9d2a2b931e88..6fbcfd9bd217 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -1089,6 +1089,7 @@ public:
 /// [GNU]     typeof-specifier
 /// [GNU]     '_Complex'
 /// [C++11]   'auto'
+/// [GNU]     '__auto_type'
 /// [C++11]   'decltype' ( expression )
 /// [C++1y]   'decltype' ( 'auto' )
 ///
@@ -1262,6 +1263,7 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw_restrict:
   case tok::kw__Complex:
   case tok::kw___attribute:
+  case tok::kw___auto_type:
     return TPResult::True;
 
     // Microsoft
@@ -1515,6 +1517,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
   case tok::kw_double:
   case tok::kw_void:
   case tok::kw___unknown_anytype:
+  case tok::kw___auto_type:
     return true;
 
   case tok::kw_auto:
diff --git a/lib/Parse/Parser.cpp b/lib/Parse/Parser.cpp
index 0574a636337f..b3eeb9d58ff4 100644
--- a/lib/Parse/Parser.cpp
+++ b/lib/Parse/Parser.cpp
@@ -282,6 +282,7 @@ bool Parser::SkipUntil(ArrayRef<tok::TokenKind> Toks, SkipUntilFlags Flags) {
       // Ran out of tokens.
       return false;
 
+    case tok::annot_pragma_openmp:
     case tok::annot_pragma_openmp_end:
       // Stop before an OpenMP pragma boundary.
     case tok::annot_module_begin:
@@ -1067,10 +1068,17 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
 
   // Tell the actions module that we have entered a function definition with the
   // specified Declarator for the function.
-  Decl *Res = TemplateInfo.TemplateParams?
-      Actions.ActOnStartOfFunctionTemplateDef(getCurScope(),
-                                              *TemplateInfo.TemplateParams, D)
-    : Actions.ActOnStartOfFunctionDef(getCurScope(), D);
+  Sema::SkipBodyInfo SkipBody;
+  Decl *Res = Actions.ActOnStartOfFunctionDef(getCurScope(), D,
+                                              TemplateInfo.TemplateParams
+                                                  ? *TemplateInfo.TemplateParams
+                                                  : MultiTemplateParamsArg(),
+                                              &SkipBody);
+
+  if (SkipBody.ShouldSkip) {
+    SkipFunctionBody();
+    return Res;
+  }
 
   // Break out of the ParsingDeclarator context before we parse the body.
   D.complete(Res);
@@ -1086,14 +1094,16 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
     SourceLocation KWLoc;
     if (TryConsumeToken(tok::kw_delete, KWLoc)) {
       Diag(KWLoc, getLangOpts().CPlusPlus11
-                      ? diag::warn_cxx98_compat_deleted_function
-                      : diag::ext_deleted_function);
+                      ? diag::warn_cxx98_compat_defaulted_deleted_function
+                      : diag::ext_defaulted_deleted_function)
+        << 1 /* deleted */;
       Actions.SetDeclDeleted(Res, KWLoc);
       Delete = true;
     } else if (TryConsumeToken(tok::kw_default, KWLoc)) {
       Diag(KWLoc, getLangOpts().CPlusPlus11
-                      ? diag::warn_cxx98_compat_defaulted_function
-                      : diag::ext_defaulted_function);
+                      ? diag::warn_cxx98_compat_defaulted_deleted_function
+                      : diag::ext_defaulted_deleted_function)
+        << 0 /* defaulted */;
       Actions.SetDeclDefaulted(Res, KWLoc);
     } else {
       llvm_unreachable("function definition after = not 'delete' or 'default'");
@@ -1137,6 +1147,28 @@ Decl *Parser::ParseFunctionDefinition(ParsingDeclarator &D,
   return ParseFunctionStatementBody(Res, BodyScope);
 }
 
+void Parser::SkipFunctionBody() {
+  if (Tok.is(tok::equal)) {
+    SkipUntil(tok::semi);
+    return;
+  }
+
+  bool IsFunctionTryBlock = Tok.is(tok::kw_try);
+  if (IsFunctionTryBlock)
+    ConsumeToken();
+
+  CachedTokens Skipped;
+  if (ConsumeAndStoreFunctionPrologue(Skipped))
+    SkipMalformedDecl();
+  else {
+    SkipUntil(tok::r_brace);
+    while (IsFunctionTryBlock && Tok.is(tok::kw_catch)) {
+      SkipUntil(tok::l_brace);
+      SkipUntil(tok::r_brace);
+    }
+  }
+}
+
 /// ParseKNRParamDeclarations - Parse 'declaration-list[opt]' which provides
 /// types for a function with a K&R-style identifier list for arguments.
 void Parser::ParseKNRParamDeclarations(Declarator &D) {
@@ -1493,7 +1525,7 @@ bool Parser::TryKeywordIdentFallback(bool DisableKeyword) {
     << PP.getSpelling(Tok)
     << DisableKeyword;
   if (DisableKeyword)
-    Tok.getIdentifierInfo()->RevertTokenIDToIdentifier();
+    Tok.getIdentifierInfo()->revertTokenIDToIdentifier();
   Tok.setKind(tok::identifier);
   return true;
 }
@@ -1989,6 +2021,37 @@ Parser::DeclGroupPtrTy Parser::ParseModuleImport(SourceLocation AtLoc) {
   return Actions.ConvertDeclToDeclGroup(Import.get());
 }
 
+/// \brief Try recover parser when module annotation appears where it must not
+/// be found.
+/// \returns false if the recover was successful and parsing may be continued, or
+/// true if parser must bail out to top level and handle the token there.
+bool Parser::parseMisplacedModuleImport() {
+  while (true) {
+    switch (Tok.getKind()) {
+    case tok::annot_module_end:
+      // Inform caller that recovery failed, the error must be handled at upper
+      // level.
+      return true;
+    case tok::annot_module_begin:
+      Actions.diagnoseMisplacedModuleImport(reinterpret_cast<Module *>(
+        Tok.getAnnotationValue()), Tok.getLocation());
+      return true;
+    case tok::annot_module_include:
+      // Module import found where it should not be, for instance, inside a
+      // namespace. Recover by importing the module.
+      Actions.ActOnModuleInclude(Tok.getLocation(),
+                                 reinterpret_cast<Module *>(
+                                 Tok.getAnnotationValue()));
+      ConsumeToken();
+      // If there is another module import, process it.
+      continue;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
 bool BalancedDelimiterTracker::diagnoseOverflow() {
   P.Diag(P.Tok, diag::err_bracket_depth_exceeded)
     << P.getLangOpts().BracketDepth;
@@ -2016,7 +2079,10 @@ bool BalancedDelimiterTracker::expectAndConsume(unsigned DiagID,
 bool BalancedDelimiterTracker::diagnoseMissingClose() {
   assert(!P.Tok.is(Close) && "Should have consumed closing delimiter");
 
-  P.Diag(P.Tok, diag::err_expected) << Close;
+  if (P.Tok.is(tok::annot_module_end))
+    P.Diag(P.Tok, diag::err_missing_before_module_end) << Close;
+  else
+    P.Diag(P.Tok, diag::err_expected) << Close;
   P.Diag(LOpen, diag::note_matching) << Kind;
 
   // If we're not already at some kind of closing bracket, skip to our closing
diff --git a/lib/Rewrite/Rewriter.cpp b/lib/Rewrite/Rewriter.cpp
index be09a363a61f..ae41decc64a3 100644
--- a/lib/Rewrite/Rewriter.cpp
+++ b/lib/Rewrite/Rewriter.cpp
@@ -15,11 +15,9 @@
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/DiagnosticIDs.h"
-#include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace clang;
@@ -35,7 +33,7 @@ raw_ostream &RewriteBuffer::write(raw_ostream &os) const {
 
 /// \brief Return true if this character is non-new-line whitespace:
 /// ' ', '\\t', '\\f', '\\v', '\\r'.
-static inline bool isWhitespace(unsigned char c) {
+static inline bool isWhitespaceExceptNL(unsigned char c) {
   switch (c) {
   case ' ':
   case '\t':
@@ -80,7 +78,7 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size,
   
     unsigned lineSize = 0;
     posI = curLineStart;
-    while (posI != end() && isWhitespace(*posI)) {
+    while (posI != end() && isWhitespaceExceptNL(*posI)) {
       ++posI;
       ++lineSize;
     }
@@ -256,7 +254,7 @@ bool Rewriter::InsertText(SourceLocation Loc, StringRef Str,
     StringRef indentSpace;
     {
       unsigned i = lineOffs;
-      while (isWhitespace(MB[i]))
+      while (isWhitespaceExceptNL(MB[i]))
         ++i;
       indentSpace = MB.substr(lineOffs, i-lineOffs);
     }
@@ -363,12 +361,12 @@ bool Rewriter::IncreaseIndentation(CharSourceRange range,
   StringRef parentSpace, startSpace;
   {
     unsigned i = parentLineOffs;
-    while (isWhitespace(MB[i]))
+    while (isWhitespaceExceptNL(MB[i]))
       ++i;
     parentSpace = MB.substr(parentLineOffs, i-parentLineOffs);
 
     i = startLineOffs;
-    while (isWhitespace(MB[i]))
+    while (isWhitespaceExceptNL(MB[i]))
       ++i;
     startSpace = MB.substr(startLineOffs, i-startLineOffs);
   }
@@ -384,7 +382,7 @@ bool Rewriter::IncreaseIndentation(CharSourceRange range,
   for (unsigned lineNo = startLineNo; lineNo <= endLineNo; ++lineNo) {
     unsigned offs = Content->SourceLineCache[lineNo];
     unsigned i = offs;
-    while (isWhitespace(MB[i]))
+    while (isWhitespaceExceptNL(MB[i]))
       ++i;
     StringRef origIndent = MB.substr(offs, i-offs);
     if (origIndent.startswith(startSpace))
@@ -409,7 +407,7 @@ public:
     TempFilename = Filename;
     TempFilename += "-%%%%%%%%";
     int FD;
-    if (llvm::sys::fs::createUniqueFile(TempFilename.str(), FD, TempFilename)) {
+    if (llvm::sys::fs::createUniqueFile(TempFilename, FD, TempFilename)) {
       AllWritten = false;
       Diagnostics.Report(clang::diag::err_unable_to_make_temp)
         << TempFilename;
@@ -421,19 +419,15 @@ public:
   ~AtomicallyMovedFile() {
     if (!ok()) return;
 
-    FileStream->flush();
-#ifdef LLVM_ON_WIN32
-    // Win32 does not allow rename/removing opened files.
-    FileStream.reset();
-#endif
-    if (std::error_code ec =
-            llvm::sys::fs::rename(TempFilename.str(), Filename)) {
+    // Close (will also flush) theFileStream.
+    FileStream->close();
+    if (std::error_code ec = llvm::sys::fs::rename(TempFilename, Filename)) {
       AllWritten = false;
       Diagnostics.Report(clang::diag::err_unable_to_rename_temp)
         << TempFilename << Filename << ec.message();
       // If the remove fails, there's not a lot we can do - this is already an
       // error.
-      llvm::sys::fs::remove(TempFilename.str());
+      llvm::sys::fs::remove(TempFilename);
     }
   }
 
diff --git a/lib/Sema/AnalysisBasedWarnings.cpp b/lib/Sema/AnalysisBasedWarnings.cpp
index f2ff48ad69f4..5f74343fbd95 100644
--- a/lib/Sema/AnalysisBasedWarnings.cpp
+++ b/lib/Sema/AnalysisBasedWarnings.cpp
@@ -34,7 +34,6 @@
 #include "clang/Analysis/CFGStmtMap.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Lexer.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
@@ -99,7 +98,7 @@ namespace {
       }
     }
   };
-}
+} // anonymous namespace
 
 /// CheckUnreachable - Check for unreachable code.
 static void CheckUnreachable(Sema &S, AnalysisDeclContext &AC) {
@@ -157,12 +156,45 @@ public:
         << DiagRange << isAlwaysTrue;
   }
 };
-} // namespace
+} // anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // Check for infinite self-recursion in functions
 //===----------------------------------------------------------------------===//
 
+// Returns true if the function is called anywhere within the CFGBlock.
+// For member functions, the additional condition of being call from the
+// this pointer is required.
+static bool hasRecursiveCallInPath(const FunctionDecl *FD, CFGBlock &Block) {
+  // Process all the Stmt's in this block to find any calls to FD.
+  for (const auto &B : Block) {
+    if (B.getKind() != CFGElement::Statement)
+      continue;
+
+    const CallExpr *CE = dyn_cast<CallExpr>(B.getAs<CFGStmt>()->getStmt());
+    if (!CE || !CE->getCalleeDecl() ||
+        CE->getCalleeDecl()->getCanonicalDecl() != FD)
+      continue;
+
+    // Skip function calls which are qualified with a templated class.
+    if (const DeclRefExpr *DRE =
+            dyn_cast<DeclRefExpr>(CE->getCallee()->IgnoreParenImpCasts())) {
+      if (NestedNameSpecifier *NNS = DRE->getQualifier()) {
+        if (NNS->getKind() == NestedNameSpecifier::TypeSpec &&
+            isa<TemplateSpecializationType>(NNS->getAsType())) {
+          continue;
+        }
+      }
+    }
+
+    const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(CE);
+    if (!MCE || isa<CXXThisExpr>(MCE->getImplicitObjectArgument()) ||
+        !MCE->getMethodDecl()->isVirtual())
+      return true;
+  }
+  return false;
+}
+
 // All blocks are in one of three states.  States are ordered so that blocks
 // can only move to higher states.
 enum RecursiveState {
@@ -171,68 +203,56 @@ enum RecursiveState {
   FoundPathWithNoRecursiveCall
 };
 
-static void checkForFunctionCall(Sema &S, const FunctionDecl *FD,
-                                 CFGBlock &Block, unsigned ExitID,
-                                 llvm::SmallVectorImpl<RecursiveState> &States,
-                                 RecursiveState State) {
-  unsigned ID = Block.getBlockID();
+// Returns true if there exists a path to the exit block and every path
+// to the exit block passes through a call to FD.
+static bool checkForRecursiveFunctionCall(const FunctionDecl *FD, CFG *cfg) {
 
-  // A block's state can only move to a higher state.
-  if (States[ID] >= State)
-    return;
+  const unsigned ExitID = cfg->getExit().getBlockID();
 
-  States[ID] = State;
+  // Mark all nodes as FoundNoPath, then set the status of the entry block.
+  SmallVector<RecursiveState, 16> States(cfg->getNumBlockIDs(), FoundNoPath);
+  States[cfg->getEntry().getBlockID()] = FoundPathWithNoRecursiveCall;
 
-  // Found a path to the exit node without a recursive call.
-  if (ID == ExitID && State == FoundPathWithNoRecursiveCall)
-    return;
+  // Make the processing stack and seed it with the entry block.
+  SmallVector<CFGBlock *, 16> Stack;
+  Stack.push_back(&cfg->getEntry());
 
-  if (State == FoundPathWithNoRecursiveCall) {
-    // If the current state is FoundPathWithNoRecursiveCall, the successors
-    // will be either FoundPathWithNoRecursiveCall or FoundPath.  To determine
-    // which, process all the Stmt's in this block to find any recursive calls.
-    for (const auto &B : Block) {
-      if (B.getKind() != CFGElement::Statement)
-        continue;
+  while (!Stack.empty()) {
+    CFGBlock *CurBlock = Stack.back();
+    Stack.pop_back();
 
-      const CallExpr *CE = dyn_cast<CallExpr>(B.getAs<CFGStmt>()->getStmt());
-      if (CE && CE->getCalleeDecl() &&
-          CE->getCalleeDecl()->getCanonicalDecl() == FD) {
+    unsigned ID = CurBlock->getBlockID();
+    RecursiveState CurState = States[ID];
 
-        // Skip function calls which are qualified with a templated class.
-        if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(
-                CE->getCallee()->IgnoreParenImpCasts())) {
-          if (NestedNameSpecifier *NNS = DRE->getQualifier()) {
-            if (NNS->getKind() == NestedNameSpecifier::TypeSpec &&
-                isa<TemplateSpecializationType>(NNS->getAsType())) {
-               continue;
-            }
-          }
-        }
+    if (CurState == FoundPathWithNoRecursiveCall) {
+      // Found a path to the exit node without a recursive call.
+      if (ExitID == ID)
+        return false;
 
-        if (const CXXMemberCallExpr *MCE = dyn_cast<CXXMemberCallExpr>(CE)) {
-          if (isa<CXXThisExpr>(MCE->getImplicitObjectArgument()) ||
-              !MCE->getMethodDecl()->isVirtual()) {
-            State = FoundPath;
-            break;
-          }
-        } else {
-          State = FoundPath;
-          break;
+      // Only change state if the block has a recursive call.
+      if (hasRecursiveCallInPath(FD, *CurBlock))
+        CurState = FoundPath;
+    }
+
+    // Loop over successor blocks and add them to the Stack if their state
+    // changes.
+    for (auto I = CurBlock->succ_begin(), E = CurBlock->succ_end(); I != E; ++I)
+      if (*I) {
+        unsigned next_ID = (*I)->getBlockID();
+        if (States[next_ID] < CurState) {
+          States[next_ID] = CurState;
+          Stack.push_back(*I);
         }
       }
-    }
   }
 
-  for (CFGBlock::succ_iterator I = Block.succ_begin(), E = Block.succ_end();
-       I != E; ++I)
-    if (*I)
-      checkForFunctionCall(S, FD, **I, ExitID, States, State);
+  // Return true if the exit node is reachable, and only reachable through
+  // a recursive call.
+  return States[ExitID] == FoundPath;
 }
 
 static void checkRecursiveFunction(Sema &S, const FunctionDecl *FD,
-                                   const Stmt *Body,
-                                   AnalysisDeclContext &AC) {
+                                   const Stmt *Body, AnalysisDeclContext &AC) {
   FD = FD->getCanonicalDecl();
 
   // Only run on non-templated functions and non-templated members of
@@ -248,15 +268,8 @@ static void checkRecursiveFunction(Sema &S, const FunctionDecl *FD,
   if (cfg->getExit().pred_empty())
     return;
 
-  // Mark all nodes as FoundNoPath, then begin processing the entry block.
-  llvm::SmallVector<RecursiveState, 16> states(cfg->getNumBlockIDs(),
-                                               FoundNoPath);
-  checkForFunctionCall(S, FD, cfg->getEntry(), cfg->getExit().getBlockID(),
-                       states, FoundPathWithNoRecursiveCall);
-
-  // Check that the exit block is reachable.  This prevents triggering the
-  // warning on functions that do not terminate.
-  if (states[cfg->getExit().getBlockID()] == FoundPath)
+  // Emit diagnostic if a recursive function call is detected for all paths.
+  if (checkForRecursiveFunctionCall(FD, cfg))
     S.Diag(Body->getLocStart(), diag::warn_infinite_recursive_function);
 }
 
@@ -492,7 +505,7 @@ struct CheckFallThroughDiagnostics {
   }
 };
 
-}
+} // anonymous namespace
 
 /// CheckFallThroughForFunctionDef - Check that we don't fall off the end of a
 /// function that should return a value.  Check that we don't fall off the end
@@ -600,7 +613,7 @@ public:
 
   bool doesContainReference() const { return FoundReference; }
 };
-}
+} // anonymous namespace
 
 static bool SuggestInitializationFixit(Sema &S, const VarDecl *VD) {
   QualType VariableTy = VD->getType().getCanonicalType();
@@ -643,8 +656,7 @@ static void CreateIfFixit(Sema &S, const Stmt *If, const Stmt *Then,
         CharSourceRange::getCharRange(If->getLocStart(),
                                       Then->getLocStart()));
     if (Else) {
-      SourceLocation ElseKwLoc = Lexer::getLocForEndOfToken(
-          Then->getLocEnd(), 0, S.getSourceManager(), S.getLangOpts());
+      SourceLocation ElseKwLoc = S.getLocForEndOfToken(Then->getLocEnd());
       Fixit2 = FixItHint::CreateRemoval(
           SourceRange(ElseKwLoc, Else->getLocEnd()));
     }
@@ -836,7 +848,6 @@ static void DiagUninitUse(Sema &S, const VarDecl *VD, const UninitUse &Use,
 static bool DiagnoseUninitializedUse(Sema &S, const VarDecl *VD,
                                      const UninitUse &Use,
                                      bool alwaysReportSelfInit = false) {
-
   if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Use.getUser())) {
     // Inspect the initializer of the variable declaration which is
     // being referenced prior to its initialization. We emit
@@ -1058,7 +1069,7 @@ namespace {
     Sema &S;
     llvm::SmallPtrSet<const CFGBlock *, 16> ReachableBlocks;
   };
-}
+} // anonymous namespace
 
 static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
                                             bool PerFunction) {
@@ -1090,8 +1101,7 @@ static void DiagnoseSwitchLabelsFallthrough(Sema &S, AnalysisDeclContext &AC,
 
   FM.fillReachableBlocks(Cfg);
 
-  for (CFG::reverse_iterator I = Cfg->rbegin(), E = Cfg->rend(); I != E; ++I) {
-    const CFGBlock *B = *I;
+  for (const CFGBlock *B : llvm::reverse(*Cfg)) {
     const Stmt *Label = B->getLabel();
 
     if (!Label || !isa<SwitchCase>(Label))
@@ -1170,7 +1180,6 @@ static bool isInLoop(const ASTContext &Ctx, const ParentMap &PM,
   return false;
 }
 
-
 static void diagnoseRepeatedUseOfWeak(Sema &S,
                                       const sema::FunctionScopeInfo *CurFn,
                                       const Decl *D,
@@ -1330,20 +1339,16 @@ class UninitValsDiagReporter : public UninitVariablesHandler {
   // the same as insertion order. This is needed to obtain a deterministic
   // order of diagnostics when calling flushDiagnostics().
   typedef llvm::MapVector<const VarDecl *, MappedType> UsesMap;
-  UsesMap *uses;
+  UsesMap uses;
   
 public:
-  UninitValsDiagReporter(Sema &S) : S(S), uses(nullptr) {}
+  UninitValsDiagReporter(Sema &S) : S(S) {}
   ~UninitValsDiagReporter() override { flushDiagnostics(); }
 
   MappedType &getUses(const VarDecl *vd) {
-    if (!uses)
-      uses = new UsesMap();
-
-    MappedType &V = (*uses)[vd];
+    MappedType &V = uses[vd];
     if (!V.getPointer())
       V.setPointer(new UsesVec());
-    
     return V;
   }
 
@@ -1357,10 +1362,7 @@ public:
   }
   
   void flushDiagnostics() {
-    if (!uses)
-      return;
-
-    for (const auto &P : *uses) {
+    for (const auto &P : uses) {
       const VarDecl *vd = P.first;
       const MappedType &V = P.second;
 
@@ -1401,7 +1403,8 @@ public:
       // Release the uses vector.
       delete vec;
     }
-    delete uses;
+
+    uses.clear();
   }
 
 private:
@@ -1413,7 +1416,7 @@ private:
     });
   }
 };
-}
+} // anonymous namespace
 
 namespace clang {
 namespace {
@@ -1431,7 +1434,8 @@ struct SortDiagBySourceLocation {
     return SM.isBeforeInTranslationUnit(left.first.first, right.first.first);
   }
 };
-}}
+} // anonymous namespace
+} // namespace clang
 
 //===----------------------------------------------------------------------===//
 // -Wthread-safety
@@ -1670,7 +1674,6 @@ class ThreadSafetyReporter : public clang::threadSafety::ThreadSafetyHandler {
     Warnings.emplace_back(std::move(Warning), getNotes());
   }
 
-
   void handleFunExcludesLock(StringRef Kind, Name FunName, Name LockName,
                              SourceLocation Loc) override {
     PartialDiagnosticAt Warning(Loc, S.PDiag(diag::warn_fun_excludes_mutex)
@@ -1696,10 +1699,10 @@ class ThreadSafetyReporter : public clang::threadSafety::ThreadSafetyHandler {
   }
 
   void leaveFunction(const FunctionDecl* FD) override {
-    CurrentFunction = 0;
+    CurrentFunction = nullptr;
   }
 };
-} // namespace
+} // anonymous namespace
 } // namespace threadSafety
 } // namespace clang
 
@@ -1792,7 +1795,9 @@ public:
     Warnings.emplace_back(std::move(Warning), OptionalNotes());
   }
 };
-}}}
+} // anonymous namespace
+} // namespace consumed
+} // namespace clang
 
 //===----------------------------------------------------------------------===//
 // AnalysisBasedWarnings - Worker object used by Sema to execute analysis-based
@@ -1958,7 +1963,6 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
       flushDiagnostics(S, fscope);
   }
   
-  
   // Warning: check missing 'return'
   if (P.enableCheckFallThrough) {
     const CheckFallThroughDiagnostics &CD =
@@ -2038,7 +2042,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P,
     DiagnoseSwitchLabelsFallthrough(S, AC, !FallThroughDiagFull);
   }
 
-  if (S.getLangOpts().ObjCARCWeak &&
+  if (S.getLangOpts().ObjCWeak &&
       !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, D->getLocStart()))
     diagnoseRepeatedUseOfWeak(S, fscope, D, AC.getParentMap());
 
diff --git a/lib/Sema/AttributeList.cpp b/lib/Sema/AttributeList.cpp
index 34af6cf63c87..3c61c95ad8ec 100644
--- a/lib/Sema/AttributeList.cpp
+++ b/lib/Sema/AttributeList.cpp
@@ -17,6 +17,7 @@
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
 #include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/SemaInternal.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -108,6 +109,19 @@ void AttributePool::takePool(AttributeList *pool) {
 
 #include "clang/Sema/AttrParsedAttrKinds.inc"
 
+static StringRef normalizeAttrName(StringRef AttrName, StringRef ScopeName,
+                                   AttributeList::Syntax SyntaxUsed) {
+  // Normalize the attribute name, __foo__ becomes foo. This is only allowable
+  // for GNU attributes.
+  bool IsGNU = SyntaxUsed == AttributeList::AS_GNU ||
+               (SyntaxUsed == AttributeList::AS_CXX11 && ScopeName == "gnu");
+  if (IsGNU && AttrName.size() >= 4 && AttrName.startswith("__") &&
+      AttrName.endswith("__"))
+    AttrName = AttrName.slice(2, AttrName.size() - 2);
+
+  return AttrName;
+}
+
 AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name,
                                            const IdentifierInfo *ScopeName,
                                            Syntax SyntaxUsed) {
@@ -117,13 +131,7 @@ AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name,
   if (ScopeName)
     FullName += ScopeName->getName();
 
-  // Normalize the attribute name, __foo__ becomes foo. This is only allowable
-  // for GNU attributes.
-  bool IsGNU = SyntaxUsed == AS_GNU || (SyntaxUsed == AS_CXX11 &&
-                                        FullName == "gnu");
-  if (IsGNU && AttrName.size() >= 4 && AttrName.startswith("__") &&
-      AttrName.endswith("__"))
-    AttrName = AttrName.slice(2, AttrName.size() - 2);
+  AttrName = normalizeAttrName(AttrName, FullName, SyntaxUsed);
 
   // Ensure that in the case of C++11 attributes, we look for '::foo' if it is
   // unscoped.
@@ -137,8 +145,9 @@ AttributeList::Kind AttributeList::getKind(const IdentifierInfo *Name,
 unsigned AttributeList::getAttributeSpellingListIndex() const {
   // Both variables will be used in tablegen generated
   // attribute spell list index matching code.
-  StringRef Name = AttrName->getName();
   StringRef Scope = ScopeName ? ScopeName->getName() : "";
+  StringRef Name = normalizeAttrName(AttrName->getName(), Scope,
+                                     (AttributeList::Syntax)SyntaxUsed);
 
 #include "clang/Sema/AttrSpellingListIndex.inc"
 
@@ -155,7 +164,7 @@ struct ParsedAttrInfo {
   bool (*DiagAppertainsToDecl)(Sema &S, const AttributeList &Attr,
                                const Decl *);
   bool (*DiagLangOpts)(Sema &S, const AttributeList &Attr);
-  bool (*ExistsInTarget)(const llvm::Triple &T);
+  bool (*ExistsInTarget)(const TargetInfo &Target);
   unsigned (*SpellingIndexToSemanticSpelling)(const AttributeList &Attr);
 };
 
@@ -195,8 +204,8 @@ bool AttributeList::isTypeAttr() const {
   return getInfo(*this).IsType;
 }
 
-bool AttributeList::existsInTarget(const llvm::Triple &T) const {
-  return getInfo(*this).ExistsInTarget(T);
+bool AttributeList::existsInTarget(const TargetInfo &Target) const {
+  return getInfo(*this).ExistsInTarget(Target);
 }
 
 bool AttributeList::isKnownToGCC() const {
diff --git a/lib/Sema/CMakeLists.txt b/lib/Sema/CMakeLists.txt
index 4a772d8972a9..8aa005102fe4 100644
--- a/lib/Sema/CMakeLists.txt
+++ b/lib/Sema/CMakeLists.txt
@@ -21,6 +21,7 @@ add_clang_library(clangSema
   SemaChecking.cpp
   SemaCodeComplete.cpp
   SemaConsumer.cpp
+  SemaCoroutine.cpp
   SemaCUDA.cpp
   SemaDecl.cpp
   SemaDeclAttr.cpp
diff --git a/lib/Sema/DeclSpec.cpp b/lib/Sema/DeclSpec.cpp
index 4adbb2b6af2a..d664d8704003 100644
--- a/lib/Sema/DeclSpec.cpp
+++ b/lib/Sema/DeclSpec.cpp
@@ -1,4 +1,4 @@
-//===--- SemaDeclSpec.cpp - Declaration Specifier Semantic Analysis -------===//
+//===--- DeclSpec.cpp - Declaration Specifier Semantic Analysis -----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,29 +15,19 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetInfo.h"
-#include "clang/Lex/Preprocessor.h"
-#include "clang/Parse/ParseDiagnostic.h" // FIXME: remove this back-dependency!
 #include "clang/Sema/LocInfoType.h"
 #include "clang/Sema/ParsedTemplate.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
-#include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 using namespace clang;
 
 
-static DiagnosticBuilder Diag(DiagnosticsEngine &D, SourceLocation Loc,
-                              unsigned DiagID) {
-  return D.Report(Loc, DiagID);
-}
-
-
 void UnqualifiedId::setTemplateId(TemplateIdAnnotation *TemplateId) {
   assert(TemplateId && "NULL template-id annotation?");
   Kind = IK_TemplateId;
@@ -177,7 +167,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto,
                                              SourceLocation MutableLoc,
                                              ExceptionSpecificationType
                                                  ESpecType,
-                                             SourceLocation ESpecLoc,
+                                             SourceRange ESpecRange,
                                              ParsedType *Exceptions,
                                              SourceRange *ExceptionRanges,
                                              unsigned NumExceptions,
@@ -212,7 +202,8 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto,
   I.Fun.RestrictQualifierLoc    = RestrictQualifierLoc.getRawEncoding();
   I.Fun.MutableLoc              = MutableLoc.getRawEncoding();
   I.Fun.ExceptionSpecType       = ESpecType;
-  I.Fun.ExceptionSpecLoc        = ESpecLoc.getRawEncoding();
+  I.Fun.ExceptionSpecLocBeg     = ESpecRange.getBegin().getRawEncoding();
+  I.Fun.ExceptionSpecLocEnd     = ESpecRange.getEnd().getRawEncoding();
   I.Fun.NumExceptions           = 0;
   I.Fun.Exceptions              = nullptr;
   I.Fun.NoexceptExpr            = nullptr;
@@ -287,6 +278,7 @@ bool Declarator::isDeclarationOfFunction() const {
   switch (DS.getTypeSpecType()) {
     case TST_atomic:
     case TST_auto:
+    case TST_auto_type:
     case TST_bool:
     case TST_char:
     case TST_char16:
@@ -350,6 +342,11 @@ bool Declarator::isStaticMember() {
               getName().OperatorFunctionId.Operator));
 }
 
+bool Declarator::isCtorOrDtor() {
+  return (getName().getKind() == UnqualifiedId::IK_ConstructorName) ||
+         (getName().getKind() == UnqualifiedId::IK_DestructorName);
+}
+
 bool DeclSpec::hasTagDefinition() const {
   if (!TypeSpecOwned)
     return false;
@@ -470,6 +467,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_typeofType:
   case DeclSpec::TST_typeofExpr:  return "typeof";
   case DeclSpec::TST_auto:        return "auto";
+  case DeclSpec::TST_auto_type:   return "__auto_type";
   case DeclSpec::TST_decltype:    return "(decltype)";
   case DeclSpec::TST_decltype_auto: return "decltype(auto)";
   case DeclSpec::TST_underlyingType: return "__underlying_type";
@@ -508,12 +506,12 @@ bool DeclSpec::SetStorageClassSpec(Sema &S, SCS SC, SourceLocation Loc,
     case SCS_extern:
     case SCS_private_extern:
     case SCS_static:
-        if (S.getLangOpts().OpenCLVersion < 120) {
-          DiagID   = diag::err_opencl_unknown_type_specifier;
-          PrevSpec = getSpecifierName(SC);
-          return true;
-        }
-        break;
+      if (S.getLangOpts().OpenCLVersion < 120) {
+        DiagID   = diag::err_opencl_unknown_type_specifier;
+        PrevSpec = getSpecifierName(SC);
+        return true;
+      }
+      break;
     case SCS_auto:
     case SCS_register:
       DiagID   = diag::err_opencl_unknown_type_specifier;
@@ -925,7 +923,7 @@ void DeclSpec::SaveWrittenBuiltinSpecs() {
 /// "_Imaginary" (lacking an FP type).  This returns a diagnostic to issue or
 /// diag::NUM_DIAGNOSTICS if there is no error.  After calling this method,
 /// DeclSpec is guaranteed self-consistent, even if an error occurred.
-void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPolicy &Policy) {
+void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
   // Before possibly changing their values, save specs as written.
   SaveWrittenBuiltinSpecs();
 
@@ -946,10 +944,10 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     FixItHint Hints[NumLocs];
     SourceLocation FirstLoc;
     for (unsigned I = 0; I != NumLocs; ++I) {
-      if (!ExtraLocs[I].isInvalid()) {
+      if (ExtraLocs[I].isValid()) {
         if (FirstLoc.isInvalid() ||
-            PP.getSourceManager().isBeforeInTranslationUnit(ExtraLocs[I],
-                                                            FirstLoc))
+            S.getSourceManager().isBeforeInTranslationUnit(ExtraLocs[I],
+                                                           FirstLoc))
           FirstLoc = ExtraLocs[I];
         Hints[I] = FixItHint::CreateRemoval(ExtraLocs[I]);
       }
@@ -959,7 +957,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     TypeSpecSign = TSS_unspecified;
     TypeAltiVecVector = TypeAltiVecPixel = TypeAltiVecBool = false;
     TypeQualifiers = 0;
-    Diag(D, TSTLoc, diag::err_decltype_auto_cannot_be_combined)
+    S.Diag(TSTLoc, diag::err_decltype_auto_cannot_be_combined)
       << Hints[0] << Hints[1] << Hints[2] << Hints[3]
       << Hints[4] << Hints[5] << Hints[6] << Hints[7];
   }
@@ -969,14 +967,14 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     if (TypeAltiVecBool) {
       // Sign specifiers are not allowed with vector bool. (PIM 2.1)
       if (TypeSpecSign != TSS_unspecified) {
-        Diag(D, TSSLoc, diag::err_invalid_vector_bool_decl_spec)
+        S.Diag(TSSLoc, diag::err_invalid_vector_bool_decl_spec)
           << getSpecifierName((TSS)TypeSpecSign);
       }
 
       // Only char/int are valid with vector bool. (PIM 2.1)
       if (((TypeSpecType != TST_unspecified) && (TypeSpecType != TST_char) &&
            (TypeSpecType != TST_int)) || TypeAltiVecPixel) {
-        Diag(D, TSTLoc, diag::err_invalid_vector_bool_decl_spec)
+        S.Diag(TSTLoc, diag::err_invalid_vector_bool_decl_spec)
           << (TypeAltiVecPixel ? "__pixel" :
                                  getSpecifierName((TST)TypeSpecType, Policy));
       }
@@ -984,15 +982,15 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
       // Only 'short' and 'long long' are valid with vector bool. (PIM 2.1)
       if ((TypeSpecWidth != TSW_unspecified) && (TypeSpecWidth != TSW_short) &&
           (TypeSpecWidth != TSW_longlong))
-        Diag(D, TSWLoc, diag::err_invalid_vector_bool_decl_spec)
+        S.Diag(TSWLoc, diag::err_invalid_vector_bool_decl_spec)
           << getSpecifierName((TSW)TypeSpecWidth);
 
       // vector bool long long requires VSX support or ZVector.
       if ((TypeSpecWidth == TSW_longlong) &&
-          (!PP.getTargetInfo().hasFeature("vsx")) &&
-          (!PP.getTargetInfo().hasFeature("power8-vector")) &&
-          !PP.getLangOpts().ZVector)
-        Diag(D, TSTLoc, diag::err_invalid_vector_long_long_decl_spec);
+          (!S.Context.getTargetInfo().hasFeature("vsx")) &&
+          (!S.Context.getTargetInfo().hasFeature("power8-vector")) &&
+          !S.getLangOpts().ZVector)
+        S.Diag(TSTLoc, diag::err_invalid_vector_long_long_decl_spec);
 
       // Elements of vector bool are interpreted as unsigned. (PIM 2.1)
       if ((TypeSpecType == TST_char) || (TypeSpecType == TST_int) ||
@@ -1002,20 +1000,20 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
       // vector long double and vector long long double are never allowed.
       // vector double is OK for Power7 and later, and ZVector.
       if (TypeSpecWidth == TSW_long || TypeSpecWidth == TSW_longlong)
-        Diag(D, TSWLoc, diag::err_invalid_vector_long_double_decl_spec);
-      else if (!PP.getTargetInfo().hasFeature("vsx") &&
-               !PP.getLangOpts().ZVector)
-        Diag(D, TSTLoc, diag::err_invalid_vector_double_decl_spec);
+        S.Diag(TSWLoc, diag::err_invalid_vector_long_double_decl_spec);
+      else if (!S.Context.getTargetInfo().hasFeature("vsx") &&
+               !S.getLangOpts().ZVector)
+        S.Diag(TSTLoc, diag::err_invalid_vector_double_decl_spec);
     } else if (TypeSpecType == TST_float) {
       // vector float is unsupported for ZVector.
-      if (PP.getLangOpts().ZVector)
-        Diag(D, TSTLoc, diag::err_invalid_vector_float_decl_spec);
+      if (S.getLangOpts().ZVector)
+        S.Diag(TSTLoc, diag::err_invalid_vector_float_decl_spec);
     } else if (TypeSpecWidth == TSW_long) {
       // vector long is unsupported for ZVector and deprecated for AltiVec.
-      if (PP.getLangOpts().ZVector)
-        Diag(D, TSWLoc, diag::err_invalid_vector_long_decl_spec);
+      if (S.getLangOpts().ZVector)
+        S.Diag(TSWLoc, diag::err_invalid_vector_long_decl_spec);
       else
-        Diag(D, TSWLoc, diag::warn_vector_long_decl_spec_combination)
+        S.Diag(TSWLoc, diag::warn_vector_long_decl_spec_combination)
           << getSpecifierName((TST)TypeSpecType, Policy);
     }
 
@@ -1034,7 +1032,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
       TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int.
     else if (TypeSpecType != TST_int  && TypeSpecType != TST_int128 &&
              TypeSpecType != TST_char && TypeSpecType != TST_wchar) {
-      Diag(D, TSSLoc, diag::err_invalid_sign_spec)
+      S.Diag(TSSLoc, diag::err_invalid_sign_spec)
         << getSpecifierName((TST)TypeSpecType, Policy);
       // signed double -> double.
       TypeSpecSign = TSS_unspecified;
@@ -1049,9 +1047,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     if (TypeSpecType == TST_unspecified)
       TypeSpecType = TST_int; // short -> short int, long long -> long long int.
     else if (TypeSpecType != TST_int) {
-      Diag(D, TSWLoc,
-           TypeSpecWidth == TSW_short ? diag::err_invalid_short_spec
-                                      : diag::err_invalid_longlong_spec)
+      S.Diag(TSWLoc, diag::err_invalid_width_spec) << (int)TypeSpecWidth
         <<  getSpecifierName((TST)TypeSpecType, Policy);
       TypeSpecType = TST_int;
       TypeSpecOwned = false;
@@ -1061,7 +1057,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     if (TypeSpecType == TST_unspecified)
       TypeSpecType = TST_int;  // long -> long int.
     else if (TypeSpecType != TST_int && TypeSpecType != TST_double) {
-      Diag(D, TSWLoc, diag::err_invalid_long_spec)
+      S.Diag(TSWLoc, diag::err_invalid_width_spec) << (int)TypeSpecWidth
         << getSpecifierName((TST)TypeSpecType, Policy);
       TypeSpecType = TST_int;
       TypeSpecOwned = false;
@@ -1073,17 +1069,17 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
   // disallow their use.  Need information about the backend.
   if (TypeSpecComplex != TSC_unspecified) {
     if (TypeSpecType == TST_unspecified) {
-      Diag(D, TSCLoc, diag::ext_plain_complex)
+      S.Diag(TSCLoc, diag::ext_plain_complex)
         << FixItHint::CreateInsertion(
-                              PP.getLocForEndOfToken(getTypeSpecComplexLoc()),
+                              S.getLocForEndOfToken(getTypeSpecComplexLoc()),
                                                  " double");
       TypeSpecType = TST_double;   // _Complex -> _Complex double.
     } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) {
       // Note that this intentionally doesn't include _Complex _Bool.
-      if (!PP.getLangOpts().CPlusPlus)
-        Diag(D, TSTLoc, diag::ext_integer_complex);
+      if (!S.getLangOpts().CPlusPlus)
+        S.Diag(TSTLoc, diag::ext_integer_complex);
     } else if (TypeSpecType != TST_float && TypeSpecType != TST_double) {
-      Diag(D, TSCLoc, diag::err_invalid_complex_spec)
+      S.Diag(TSCLoc, diag::err_invalid_complex_spec)
         << getSpecifierName((TST)TypeSpecType, Policy);
       TypeSpecComplex = TSC_unspecified;
     }
@@ -1100,14 +1096,14 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     case SCS_static:
       break;
     default:
-      if (PP.getSourceManager().isBeforeInTranslationUnit(
+      if (S.getSourceManager().isBeforeInTranslationUnit(
             getThreadStorageClassSpecLoc(), getStorageClassSpecLoc()))
-        Diag(D, getStorageClassSpecLoc(),
+        S.Diag(getStorageClassSpecLoc(),
              diag::err_invalid_decl_spec_combination)
           << DeclSpec::getSpecifierName(getThreadStorageClassSpec())
           << SourceRange(getThreadStorageClassSpecLoc());
       else
-        Diag(D, getThreadStorageClassSpecLoc(),
+        S.Diag(getThreadStorageClassSpecLoc(),
              diag::err_invalid_decl_spec_combination)
           << DeclSpec::getSpecifierName(getStorageClassSpec())
           << SourceRange(getStorageClassSpecLoc());
@@ -1121,7 +1117,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
   // the type specifier is not optional, but we got 'auto' as a storage
   // class specifier, then assume this is an attempt to use C++0x's 'auto'
   // type specifier.
-  if (PP.getLangOpts().CPlusPlus &&
+  if (S.getLangOpts().CPlusPlus &&
       TypeSpecType == TST_unspecified && StorageClassSpec == SCS_auto) {
     TypeSpecType = TST_auto;
     StorageClassSpec = SCS_unspecified;
@@ -1130,17 +1126,17 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
   }
   // Diagnose if we've recovered from an ill-formed 'auto' storage class
   // specifier in a pre-C++11 dialect of C++.
-  if (!PP.getLangOpts().CPlusPlus11 && TypeSpecType == TST_auto)
-    Diag(D, TSTLoc, diag::ext_auto_type_specifier);
-  if (PP.getLangOpts().CPlusPlus && !PP.getLangOpts().CPlusPlus11 &&
+  if (!S.getLangOpts().CPlusPlus11 && TypeSpecType == TST_auto)
+    S.Diag(TSTLoc, diag::ext_auto_type_specifier);
+  if (S.getLangOpts().CPlusPlus && !S.getLangOpts().CPlusPlus11 &&
       StorageClassSpec == SCS_auto)
-    Diag(D, StorageClassSpecLoc, diag::warn_auto_storage_class)
+    S.Diag(StorageClassSpecLoc, diag::warn_auto_storage_class)
       << FixItHint::CreateRemoval(StorageClassSpecLoc);
   if (TypeSpecType == TST_char16 || TypeSpecType == TST_char32)
-    Diag(D, TSTLoc, diag::warn_cxx98_compat_unicode_type)
+    S.Diag(TSTLoc, diag::warn_cxx98_compat_unicode_type)
       << (TypeSpecType == TST_char16 ? "char16_t" : "char32_t");
   if (Constexpr_specified)
-    Diag(D, ConstexprLoc, diag::warn_cxx98_compat_constexpr);
+    S.Diag(ConstexprLoc, diag::warn_cxx98_compat_constexpr);
 
   // C++ [class.friend]p6:
   //   No storage-class-specifier shall appear in the decl-specifier-seq
@@ -1164,7 +1160,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
       ThreadHint = FixItHint::CreateRemoval(SCLoc);
     }
 
-    Diag(D, SCLoc, diag::err_friend_decl_spec)
+    S.Diag(SCLoc, diag::err_friend_decl_spec)
       << SpecName << StorageHint << ThreadHint;
 
     ClearStorageClassSpecs();
@@ -1190,7 +1186,7 @@ void DeclSpec::Finish(DiagnosticsEngine &D, Preprocessor &PP, const PrintingPoli
     }
 
     FixItHint Hint = FixItHint::CreateRemoval(SCLoc);
-    Diag(D, SCLoc, diag::err_friend_decl_spec)
+    S.Diag(SCLoc, diag::err_friend_decl_spec)
       << Keyword << Hint;
 
     FS_virtual_specified = FS_explicit_specified = false;
diff --git a/lib/Sema/JumpDiagnostics.cpp b/lib/Sema/JumpDiagnostics.cpp
index 775fe85740d4..c394d24d5fdc 100644
--- a/lib/Sema/JumpDiagnostics.cpp
+++ b/lib/Sema/JumpDiagnostics.cpp
@@ -147,9 +147,12 @@ static ScopePair GetDiagForGotoScopeDecl(Sema &S, const Decl *D) {
     if (VD->hasLocalStorage()) {
       switch (VD->getType().isDestructedType()) {
       case QualType::DK_objc_strong_lifetime:
+        return ScopePair(diag::note_protected_by_objc_strong_init,
+                         diag::note_exits_objc_strong);
+
       case QualType::DK_objc_weak_lifetime:
-        return ScopePair(diag::note_protected_by_objc_ownership,
-                         diag::note_exits_objc_ownership);
+        return ScopePair(diag::note_protected_by_objc_weak_init,
+                         diag::note_exits_objc_weak);
 
       case QualType::DK_cxx_destructor:
         OutDiag = diag::note_exits_dtor;
diff --git a/lib/Sema/MultiplexExternalSemaSource.cpp b/lib/Sema/MultiplexExternalSemaSource.cpp
index 9ecb5a7fefbc..0f93421ac21b 100644
--- a/lib/Sema/MultiplexExternalSemaSource.cpp
+++ b/lib/Sema/MultiplexExternalSemaSource.cpp
@@ -107,15 +107,11 @@ void MultiplexExternalSemaSource::completeVisibleDeclsMap(const DeclContext *DC)
     Sources[i]->completeVisibleDeclsMap(DC);
 }
 
-ExternalLoadResult MultiplexExternalSemaSource::
-FindExternalLexicalDecls(const DeclContext *DC,
-                         bool (*isKindWeWant)(Decl::Kind),
-                         SmallVectorImpl<Decl*> &Result) {
+void MultiplexExternalSemaSource::FindExternalLexicalDecls(
+    const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+    SmallVectorImpl<Decl *> &Result) {
   for(size_t i = 0; i < Sources.size(); ++i)
-    // FIXME: The semantics of the return result is unclear to me...
-    Sources[i]->FindExternalLexicalDecls(DC, isKindWeWant, Result);
-
-  return ELR_Success;
+    Sources[i]->FindExternalLexicalDecls(DC, IsKindWeWant, Result);
 }
 
 void MultiplexExternalSemaSource::FindFileRegionDecls(FileID File, 
diff --git a/lib/Sema/ScopeInfo.cpp b/lib/Sema/ScopeInfo.cpp
index f80eadf18d56..cbd7ef7abb41 100644
--- a/lib/Sema/ScopeInfo.cpp
+++ b/lib/Sema/ScopeInfo.cpp
@@ -33,11 +33,14 @@ void FunctionScopeInfo::Clear() {
   ObjCWarnForNoDesignatedInitChain = false;
   ObjCIsSecondaryInit = false;
   ObjCWarnForNoInitDelegation = false;
+  FirstReturnLoc = SourceLocation();
   FirstCXXTryLoc = SourceLocation();
   FirstSEHTryLoc = SourceLocation();
 
   SwitchStack.clear();
   Returns.clear();
+  CoroutinePromise = nullptr;
+  CoroutineStmts.clear();
   ErrorTrap.reset();
   PossiblyUnreachableDiags.clear();
   WeakObjectUses.clear();
@@ -234,5 +237,4 @@ void LambdaScopeInfo::getPotentialVariableCapture(unsigned Idx, VarDecl *&VD,
 
 FunctionScopeInfo::~FunctionScopeInfo() { }
 BlockScopeInfo::~BlockScopeInfo() { }
-LambdaScopeInfo::~LambdaScopeInfo() { }
 CapturedRegionScopeInfo::~CapturedRegionScopeInfo() { }
diff --git a/lib/Sema/Sema.cpp b/lib/Sema/Sema.cpp
index 18d7e9dcf548..39b8cc9f0c63 100644
--- a/lib/Sema/Sema.cpp
+++ b/lib/Sema/Sema.cpp
@@ -40,7 +40,6 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/CrashRecoveryContext.h"
 using namespace clang;
 using namespace sema;
 
@@ -121,8 +120,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
     FieldCollector.reset(new CXXFieldCollector());
 
   // Tell diagnostics how to render things from the AST library.
-  PP.getDiagnostics().SetArgToStringFn(&FormatASTNodeDiagnosticArgument,
-                                       &Context);
+  Diags.SetArgToStringFn(&FormatASTNodeDiagnosticArgument, &Context);
 
   ExprEvalContexts.emplace_back(PotentiallyEvaluated, 0, false, nullptr, false);
 
@@ -139,10 +137,6 @@ void Sema::addImplicitTypedef(StringRef Name, QualType T) {
 }
 
 void Sema::Initialize() {
-  // Tell the AST consumer about this Sema object.
-  Consumer.Initialize(Context);
-
-  // FIXME: Isn't this redundant with the initialization above?
   if (SemaConsumer *SC = dyn_cast<SemaConsumer>(&Consumer))
     SC->InitializeSema(*this);
 
@@ -155,6 +149,9 @@ void Sema::Initialize() {
   // will not be able to merge any duplicate __va_list_tag decls correctly.
   VAListTagName = PP.getIdentifierInfo("__va_list_tag");
 
+  if (!TUScope)
+    return;
+
   // Initialize predefined 128-bit integer types, if needed.
   if (Context.getTargetInfo().hasInt128Type()) {
     // If either of the 128-bit integer types are unavailable to name lookup,
@@ -170,7 +167,7 @@ void Sema::Initialize() {
 
 
   // Initialize predefined Objective-C types:
-  if (PP.getLangOpts().ObjC1) {
+  if (getLangOpts().ObjC1) {
     // If 'SEL' does not yet refer to any declarations, make it refer to the
     // predefined 'SEL'.
     DeclarationName SEL = &Context.Idents.get("SEL");
@@ -195,8 +192,8 @@ void Sema::Initialize() {
   }
 
   // Initialize Microsoft "predefined C++ types".
-  if (PP.getLangOpts().MSVCCompat) {
-    if (PP.getLangOpts().CPlusPlus &&
+  if (getLangOpts().MSVCCompat) {
+    if (getLangOpts().CPlusPlus &&
         IdResolver.begin(&Context.Idents.get("type_info")) == IdResolver.end())
       PushOnScopeChains(Context.buildImplicitRecord("type_info", TTK_Class),
                         TUScope);
@@ -205,7 +202,7 @@ void Sema::Initialize() {
   }
 
   // Initialize predefined OpenCL types.
-  if (PP.getLangOpts().OpenCL) {
+  if (getLangOpts().OpenCL) {
     addImplicitTypedef("image1d_t", Context.OCLImage1dTy);
     addImplicitTypedef("image1d_array_t", Context.OCLImage1dArrayTy);
     addImplicitTypedef("image1d_buffer_t", Context.OCLImage1dBufferTy);
@@ -215,6 +212,18 @@ void Sema::Initialize() {
     addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
     addImplicitTypedef("event_t", Context.OCLEventTy);
     if (getLangOpts().OpenCLVersion >= 200) {
+      addImplicitTypedef("image2d_depth_t", Context.OCLImage2dDepthTy);
+      addImplicitTypedef("image2d_array_depth_t",
+                         Context.OCLImage2dArrayDepthTy);
+      addImplicitTypedef("image2d_msaa_t", Context.OCLImage2dMSAATy);
+      addImplicitTypedef("image2d_array_msaa_t", Context.OCLImage2dArrayMSAATy);
+      addImplicitTypedef("image2d_msaa_depth_t", Context.OCLImage2dMSAADepthTy);
+      addImplicitTypedef("image2d_array_msaa_depth_t",
+                         Context.OCLImage2dArrayMSAADepthTy);
+      addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
+      addImplicitTypedef("queue_t", Context.OCLQueueTy);
+      addImplicitTypedef("ndrange_t", Context.OCLNDRangeTy);
+      addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy);
       addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy));
       addImplicitTypedef("atomic_uint",
                          Context.getAtomicType(Context.UnsignedIntTy));
@@ -239,6 +248,12 @@ void Sema::Initialize() {
     }
   }
 
+  if (Context.getTargetInfo().hasBuiltinMSVaList()) {
+    DeclarationName MSVaList = &Context.Idents.get("__builtin_ms_va_list");
+    if (IdResolver.begin(MSVaList) == IdResolver.end())
+      PushOnScopeChains(Context.getBuiltinMSVaListDecl(), TUScope);
+  }
+
   DeclarationName BuiltinVaList = &Context.Idents.get("__builtin_va_list");
   if (IdResolver.begin(BuiltinVaList) == IdResolver.end())
     PushOnScopeChains(Context.getBuiltinVaListDecl(), TUScope);
@@ -280,7 +295,7 @@ Sema::~Sema() {
 /// make the relevant declaration unavailable instead of erroring, do
 /// so and return true.
 bool Sema::makeUnavailableInSystemHeader(SourceLocation loc,
-                                         StringRef msg) {
+                                      UnavailableAttr::ImplicitReason reason) {
   // If we're not in a function, it's an error.
   FunctionDecl *fn = dyn_cast<FunctionDecl>(CurContext);
   if (!fn) return false;
@@ -296,7 +311,7 @@ bool Sema::makeUnavailableInSystemHeader(SourceLocation loc,
   // If the function is already unavailable, it's not an error.
   if (fn->hasAttr<UnavailableAttr>()) return true;
 
-  fn->addAttr(UnavailableAttr::CreateImplicit(Context, msg, loc));
+  fn->addAttr(UnavailableAttr::CreateImplicit(Context, "", reason, loc));
   return true;
 }
 
@@ -334,6 +349,20 @@ void Sema::PrintStats() const {
   AnalysisWarnings.PrintStats();
 }
 
+void Sema::diagnoseNullableToNonnullConversion(QualType DstType,
+                                               QualType SrcType,
+                                               SourceLocation Loc) {
+  Optional<NullabilityKind> ExprNullability = SrcType->getNullability(Context);
+  if (!ExprNullability || *ExprNullability != NullabilityKind::Nullable)
+    return;
+
+  Optional<NullabilityKind> TypeNullability = DstType->getNullability(Context);
+  if (!TypeNullability || *TypeNullability != NullabilityKind::NonNull)
+    return;
+
+  Diag(Loc, diag::warn_nullability_lost) << SrcType << DstType;
+}
+
 /// ImpCastExprToType - If Expr is not of type 'Type', insert an implicit cast.
 /// If there is already an implicit cast, merge into the existing one.
 /// The result is of the given category.
@@ -357,18 +386,7 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty,
   assert((VK == VK_RValue || !E->isRValue()) && "can't cast rvalue to lvalue");
 #endif
 
-  // Check whether we're implicitly casting from a nullable type to a nonnull
-  // type.
-  if (auto exprNullability = E->getType()->getNullability(Context)) {
-    if (*exprNullability == NullabilityKind::Nullable) {
-      if (auto typeNullability = Ty->getNullability(Context)) {
-        if (*typeNullability == NullabilityKind::NonNull) {
-          Diag(E->getLocStart(), diag::warn_nullability_lost)
-            << E->getType() << Ty;
-        }
-      }
-    }
-  }
+  diagnoseNullableToNonnullConversion(Ty, E->getType(), E->getLocStart());
 
   QualType ExprTy = Context.getCanonicalType(E->getType());
   QualType TypeTy = Context.getCanonicalType(Ty);
@@ -689,6 +707,9 @@ void Sema::ActOnEndOfTranslationUnit() {
   assert(DelayedDefaultedMemberExceptionSpecs.empty());
   assert(DelayedExceptionSpecChecks.empty());
 
+  // All dllexport classes should have been processed already.
+  assert(DelayedDllExportClasses.empty());
+
   // Remove file scoped decls that turned out to be used.
   UnusedFileScopedDecls.erase(
       std::remove_if(UnusedFileScopedDecls.begin(nullptr, true),
@@ -708,8 +729,15 @@ void Sema::ActOnEndOfTranslationUnit() {
     if (WeakID.second.getUsed())
       continue;
 
-    Diag(WeakID.second.getLocation(), diag::warn_weak_identifier_undeclared)
-        << WeakID.first;
+    Decl *PrevDecl = LookupSingleName(TUScope, WeakID.first, SourceLocation(),
+                                      LookupOrdinaryName);
+    if (PrevDecl != nullptr &&
+        !(isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl)))
+      Diag(WeakID.second.getLocation(), diag::warn_attribute_wrong_decl_type)
+          << "'weak'" << ExpectedVariableOrFunction;
+    else
+      Diag(WeakID.second.getLocation(), diag::warn_weak_identifier_undeclared)
+          << WeakID.first;
   }
 
   if (LangOpts.CPlusPlus11 &&
@@ -1443,7 +1471,7 @@ bool Sema::tryToRecoverWithCall(ExprResult &E, const PartialDiagnostic &PD,
     // arguments and that it returns something of a reasonable type,
     // so we can emit a fixit and carry on pretending that E was
     // actually a CallExpr.
-    SourceLocation ParenInsertionLoc = PP.getLocForEndOfToken(Range.getEnd());
+    SourceLocation ParenInsertionLoc = getLocForEndOfToken(Range.getEnd());
     Diag(Loc, PD)
       << /*zero-arg*/ 1 << Range
       << (IsCallableWithAppend(E.get())
diff --git a/lib/Sema/SemaAccess.cpp b/lib/Sema/SemaAccess.cpp
index 0e973cc5ebaa..e9772bc52049 100644
--- a/lib/Sema/SemaAccess.cpp
+++ b/lib/Sema/SemaAccess.cpp
@@ -182,15 +182,20 @@ struct AccessTarget : public AccessedEntity {
 
   class SavedInstanceContext {
   public:
+    SavedInstanceContext(SavedInstanceContext &&S)
+        : Target(S.Target), Has(S.Has) {
+      S.Target = nullptr;
+    }
     ~SavedInstanceContext() {
-      Target.HasInstanceContext = Has;
+      if (Target)
+        Target->HasInstanceContext = Has;
     }
 
   private:
     friend struct AccessTarget;
     explicit SavedInstanceContext(AccessTarget &Target)
-      : Target(Target), Has(Target.HasInstanceContext) {}
-    AccessTarget &Target;
+        : Target(&Target), Has(Target.HasInstanceContext) {}
+    AccessTarget *Target;
     bool Has;
   };
 
@@ -1766,7 +1771,7 @@ Sema::AccessResult Sema::CheckFriendAccess(NamedDecl *target) {
   case AR_inaccessible: return Sema::AR_inaccessible;
   case AR_dependent: return Sema::AR_dependent;
   }
-  llvm_unreachable("falling off end");
+  llvm_unreachable("invalid access result");
 }
 
 Sema::AccessResult Sema::CheckAddressOfMemberAccess(Expr *OvlExpr,
diff --git a/lib/Sema/SemaCUDA.cpp b/lib/Sema/SemaCUDA.cpp
index 5973500826e3..61dfdd3f7206 100644
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -60,8 +60,101 @@ Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
   return CFT_Host;
 }
 
+// * CUDA Call preference table
+//
+// F - from,
+// T - to
+// Ph - preference in host mode
+// Pd - preference in device mode
+// H  - handled in (x)
+// Preferences: b-best, f-fallback, l-last resort, n-never.
+//
+// | F  | T  | Ph | Pd |  H  |
+// |----+----+----+----+-----+
+// | d  | d  | b  | b  | (b) |
+// | d  | g  | n  | n  | (a) |
+// | d  | h  | l  | l  | (e) |
+// | d  | hd | f  | f  | (c) |
+// | g  | d  | b  | b  | (b) |
+// | g  | g  | n  | n  | (a) |
+// | g  | h  | l  | l  | (e) |
+// | g  | hd | f  | f  | (c) |
+// | h  | d  | l  | l  | (e) |
+// | h  | g  | b  | b  | (b) |
+// | h  | h  | b  | b  | (b) |
+// | h  | hd | f  | f  | (c) |
+// | hd | d  | l  | f  | (d) |
+// | hd | g  | f  | n  |(d/a)|
+// | hd | h  | f  | l  | (d) |
+// | hd | hd | b  | b  | (b) |
+
+Sema::CUDAFunctionPreference
+Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
+                             const FunctionDecl *Callee) {
+  assert(getLangOpts().CUDATargetOverloads &&
+         "Should not be called w/o enabled target overloads.");
+
+  assert(Callee && "Callee must be valid.");
+  CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
+  CUDAFunctionTarget CallerTarget =
+      (Caller != nullptr) ? IdentifyCUDATarget(Caller) : Sema::CFT_Host;
+
+  // If one of the targets is invalid, the check always fails, no matter what
+  // the other target is.
+  if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
+    return CFP_Never;
+
+  // (a) Can't call global from some contexts until we support CUDA's
+  // dynamic parallelism.
+  if (CalleeTarget == CFT_Global &&
+      (CallerTarget == CFT_Global || CallerTarget == CFT_Device ||
+       (CallerTarget == CFT_HostDevice && getLangOpts().CUDAIsDevice)))
+    return CFP_Never;
+
+  // (b) Best case scenarios
+  if (CalleeTarget == CallerTarget ||
+      (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
+      (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
+    return CFP_Best;
+
+  // (c) Calling HostDevice is OK as a fallback that works for everyone.
+  if (CalleeTarget == CFT_HostDevice)
+    return CFP_Fallback;
+
+  // Figure out what should be returned 'last resort' cases. Normally
+  // those would not be allowed, but we'll consider them if
+  // CUDADisableTargetCallChecks is true.
+  CUDAFunctionPreference QuestionableResult =
+      getLangOpts().CUDADisableTargetCallChecks ? CFP_LastResort : CFP_Never;
+
+  // (d) HostDevice behavior depends on compilation mode.
+  if (CallerTarget == CFT_HostDevice) {
+    // Calling a function that matches compilation mode is OK.
+    // Calling a function from the other side is frowned upon.
+    if (getLangOpts().CUDAIsDevice)
+      return CalleeTarget == CFT_Device ? CFP_Fallback : QuestionableResult;
+    else
+      return (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)
+                 ? CFP_Fallback
+                 : QuestionableResult;
+  }
+
+  // (e) Calling across device/host boundary is not something you should do.
+  if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
+      (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
+      (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
+    return QuestionableResult;
+
+  llvm_unreachable("All cases should've been handled by now.");
+}
+
 bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
                            const FunctionDecl *Callee) {
+  // With target overloads enabled, we only disallow calling
+  // combinations with CFP_Never.
+  if (getLangOpts().CUDATargetOverloads)
+    return IdentifyCUDAPreference(Caller,Callee) == CFP_Never;
+
   // The CUDADisableTargetCallChecks short-circuits this check: we assume all
   // cross-target calls are valid.
   if (getLangOpts().CUDADisableTargetCallChecks)
@@ -117,6 +210,57 @@ bool Sema::CheckCUDATarget(const FunctionDecl *Caller,
   return false;
 }
 
+template <typename T, typename FetchDeclFn>
+static void EraseUnwantedCUDAMatchesImpl(Sema &S, const FunctionDecl *Caller,
+                                         llvm::SmallVectorImpl<T> &Matches,
+                                         FetchDeclFn FetchDecl) {
+  assert(S.getLangOpts().CUDATargetOverloads &&
+         "Should not be called w/o enabled target overloads.");
+  if (Matches.size() <= 1)
+    return;
+
+  // Find the best call preference among the functions in Matches.
+  Sema::CUDAFunctionPreference P, BestCFP = Sema::CFP_Never;
+  for (auto const &Match : Matches) {
+    P = S.IdentifyCUDAPreference(Caller, FetchDecl(Match));
+    if (P > BestCFP)
+      BestCFP = P;
+  }
+
+  // Erase all functions with lower priority.
+  for (unsigned I = 0, N = Matches.size(); I != N;)
+    if (S.IdentifyCUDAPreference(Caller, FetchDecl(Matches[I])) < BestCFP) {
+      Matches[I] = Matches[--N];
+      Matches.resize(N);
+    } else {
+      ++I;
+    }
+}
+
+void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
+                                    SmallVectorImpl<FunctionDecl *> &Matches){
+  EraseUnwantedCUDAMatchesImpl<FunctionDecl *>(
+      *this, Caller, Matches, [](const FunctionDecl *item) { return item; });
+}
+
+void Sema::EraseUnwantedCUDAMatches(const FunctionDecl *Caller,
+                                    SmallVectorImpl<DeclAccessPair> &Matches) {
+  EraseUnwantedCUDAMatchesImpl<DeclAccessPair>(
+      *this, Caller, Matches, [](const DeclAccessPair &item) {
+        return dyn_cast<FunctionDecl>(item.getDecl());
+      });
+}
+
+void Sema::EraseUnwantedCUDAMatches(
+    const FunctionDecl *Caller,
+    SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches){
+  EraseUnwantedCUDAMatchesImpl<std::pair<DeclAccessPair, FunctionDecl *>>(
+      *this, Caller, Matches,
+      [](const std::pair<DeclAccessPair, FunctionDecl *> &item) {
+        return dyn_cast<FunctionDecl>(item.second);
+      });
+}
+
 /// When an implicitly-declared special member has to invoke more than one
 /// base/field special member, conflicts may occur in the targets of these
 /// members. For example, if one base's member __host__ and another's is
diff --git a/lib/Sema/SemaCXXScopeSpec.cpp b/lib/Sema/SemaCXXScopeSpec.cpp
index 9e146ed3a642..f7aace625a92 100644
--- a/lib/Sema/SemaCXXScopeSpec.cpp
+++ b/lib/Sema/SemaCXXScopeSpec.cpp
@@ -291,8 +291,10 @@ bool Sema::isAcceptableNestedNameSpecifier(const NamedDecl *SD,
   if (!SD)
     return false;
 
+  SD = SD->getUnderlyingDecl();
+
   // Namespace and namespace aliases are fine.
-  if (isa<NamespaceDecl>(SD) || isa<NamespaceAliasDecl>(SD))
+  if (isa<NamespaceDecl>(SD))
     return true;
 
   if (!isa<TypeDecl>(SD))
@@ -396,10 +398,7 @@ bool Sema::isNonTypeNestedNameSpecifier(Scope *S, CXXScopeSpec &SS,
   }
   Found.suppressDiagnostics();
   
-  if (NamedDecl *ND = Found.getAsSingle<NamedDecl>())
-    return isa<NamespaceDecl>(ND) || isa<NamespaceAliasDecl>(ND);
-  
-  return false;
+  return Found.getAsSingle<NamespaceDecl>();
 }
 
 namespace {
@@ -533,6 +532,9 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
     LookupName(Found, S);
   }
 
+  if (Found.isAmbiguous())
+    return true;
+
   // If we performed lookup into a dependent context and did not find anything,
   // that's fine: just build a dependent nested-name-specifier.
   if (Found.empty() && isDependent &&
@@ -551,8 +553,6 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
     return false;
   }
 
-  // FIXME: Deal with ambiguities cleanly.
-
   if (Found.empty() && !ErrorRecoveryLookup) {
     // If identifier is not found as class-name-or-namespace-name, but is found
     // as other entity, don't look for typos.
@@ -562,6 +562,8 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
     else if (S && !isDependent)
       LookupName(R, S);
     if (!R.empty()) {
+      // Don't diagnose problems with this speculative lookup.
+      R.suppressDiagnostics();
       // The identifier is found in ordinary lookup. If correction to colon is
       // allowed, suggest replacement to ':'.
       if (IsCorrectedToColon) {
@@ -604,7 +606,7 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
         diagnoseTypo(Corrected, PDiag(diag::err_undeclared_var_use_suggest)
                                   << Name);
 
-      if (NamedDecl *ND = Corrected.getCorrectionDecl())
+      if (NamedDecl *ND = Corrected.getFoundDecl())
         Found.addDecl(ND);
       Found.setLookupName(Corrected.getCorrection());
     } else {
@@ -612,7 +614,8 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
     }
   }
 
-  NamedDecl *SD = Found.getAsSingle<NamedDecl>();
+  NamedDecl *SD =
+      Found.isSingleResult() ? Found.getRepresentativeDecl() : nullptr;
   bool IsExtension = false;
   bool AcceptSpec = isAcceptableNestedNameSpecifier(SD, &IsExtension);
   if (!AcceptSpec && IsExtension) {
@@ -684,7 +687,8 @@ bool Sema::BuildCXXNestedNameSpecifier(Scope *S,
       return false;
     }
 
-    QualType T = Context.getTypeDeclType(cast<TypeDecl>(SD));
+    QualType T =
+        Context.getTypeDeclType(cast<TypeDecl>(SD->getUnderlyingDecl()));
     TypeLocBuilder TLB;
     if (isa<InjectedClassNameType>(T)) {
       InjectedClassNameTypeLoc InjectedTL
diff --git a/lib/Sema/SemaCast.cpp b/lib/Sema/SemaCast.cpp
index c0754ba7902a..07b058911c2d 100644
--- a/lib/Sema/SemaCast.cpp
+++ b/lib/Sema/SemaCast.cpp
@@ -160,19 +160,19 @@ static TryCastResult TryLValueToRValueCast(Sema &Self, Expr *SrcExpr,
                                            unsigned &msg);
 static TryCastResult TryStaticReferenceDowncast(Sema &Self, Expr *SrcExpr,
                                                QualType DestType, bool CStyle,
-                                               const SourceRange &OpRange,
+                                               SourceRange OpRange,
                                                unsigned &msg,
                                                CastKind &Kind,
                                                CXXCastPath &BasePath);
 static TryCastResult TryStaticPointerDowncast(Sema &Self, QualType SrcType,
                                               QualType DestType, bool CStyle,
-                                              const SourceRange &OpRange,
+                                              SourceRange OpRange,
                                               unsigned &msg,
                                               CastKind &Kind,
                                               CXXCastPath &BasePath);
 static TryCastResult TryStaticDowncast(Sema &Self, CanQualType SrcType,
                                        CanQualType DestType, bool CStyle,
-                                       const SourceRange &OpRange,
+                                       SourceRange OpRange,
                                        QualType OrigSrcType,
                                        QualType OrigDestType, unsigned &msg,
                                        CastKind &Kind,
@@ -180,7 +180,7 @@ static TryCastResult TryStaticDowncast(Sema &Self, CanQualType SrcType,
 static TryCastResult TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr,
                                                QualType SrcType,
                                                QualType DestType,bool CStyle,
-                                               const SourceRange &OpRange,
+                                               SourceRange OpRange,
                                                unsigned &msg,
                                                CastKind &Kind,
                                                CXXCastPath &BasePath);
@@ -188,13 +188,13 @@ static TryCastResult TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExp
 static TryCastResult TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr,
                                            QualType DestType, 
                                            Sema::CheckedConversionKind CCK,
-                                           const SourceRange &OpRange,
+                                           SourceRange OpRange,
                                            unsigned &msg, CastKind &Kind,
                                            bool ListInitialization);
 static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
                                    QualType DestType, 
                                    Sema::CheckedConversionKind CCK,
-                                   const SourceRange &OpRange,
+                                   SourceRange OpRange,
                                    unsigned &msg, CastKind &Kind,
                                    CXXCastPath &BasePath,
                                    bool ListInitialization);
@@ -203,7 +203,7 @@ static TryCastResult TryConstCast(Sema &Self, ExprResult &SrcExpr,
                                   unsigned &msg);
 static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
                                         QualType DestType, bool CStyle,
-                                        const SourceRange &OpRange,
+                                        SourceRange OpRange,
                                         unsigned &msg,
                                         CastKind &Kind);
 
@@ -489,9 +489,9 @@ CastsAwayConstness(Sema &Self, QualType SrcType, QualType DestType,
                    QualType *TheOffendingDestType = nullptr,
                    Qualifiers *CastAwayQualifiers = nullptr) {
   // If the only checking we care about is for Objective-C lifetime qualifiers,
-  // and we're not in ARC mode, there's nothing to check.
+  // and we're not in ObjC mode, there's nothing to check.
   if (!CheckCVR && CheckObjCLifetime && 
-      !Self.Context.getLangOpts().ObjCAutoRefCount)
+      !Self.Context.getLangOpts().ObjC1)
     return false;
     
   // Casting away constness is defined in C++ 5.2.11p8 with reference to
@@ -683,7 +683,8 @@ void CastOperation::CheckDynamicCast() {
 
   // C++ 5.2.7p5
   // Upcasts are resolved statically.
-  if (DestRecord && Self.IsDerivedFrom(SrcPointee, DestPointee)) {
+  if (DestRecord &&
+      Self.IsDerivedFrom(OpRange.getBegin(), SrcPointee, DestPointee)) {
     if (Self.CheckDerivedToBaseConversion(SrcPointee, DestPointee,
                                            OpRange.getBegin(), OpRange, 
                                            &BasePath)) {
@@ -943,7 +944,7 @@ void CastOperation::CheckStaticCast() {
 static TryCastResult TryStaticCast(Sema &Self, ExprResult &SrcExpr,
                                    QualType DestType, 
                                    Sema::CheckedConversionKind CCK,
-                                   const SourceRange &OpRange, unsigned &msg,
+                                   SourceRange OpRange, unsigned &msg,
                                    CastKind &Kind, CXXCastPath &BasePath,
                                    bool ListInitialization) {
   // Determine whether we have the semantics of a C-style cast.
@@ -1171,7 +1172,8 @@ TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, QualType DestType,
     Kind = CK_DerivedToBase;
     CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                        /*DetectVirtual=*/true);
-    if (!Self.IsDerivedFrom(SrcExpr->getType(), R->getPointeeType(), Paths))
+    if (!Self.IsDerivedFrom(SrcExpr->getLocStart(), SrcExpr->getType(),
+                            R->getPointeeType(), Paths))
       return TC_NotApplicable;
   
     Self.BuildBasePathArray(Paths, BasePath);
@@ -1184,7 +1186,7 @@ TryLValueToRValueCast(Sema &Self, Expr *SrcExpr, QualType DestType,
 /// Tests whether a conversion according to C++ 5.2.9p5 is valid.
 TryCastResult
 TryStaticReferenceDowncast(Sema &Self, Expr *SrcExpr, QualType DestType,
-                           bool CStyle, const SourceRange &OpRange,
+                           bool CStyle, SourceRange OpRange,
                            unsigned &msg, CastKind &Kind,
                            CXXCastPath &BasePath) {
   // C++ 5.2.9p5: An lvalue of type "cv1 B", where B is a class type, can be
@@ -1222,7 +1224,7 @@ TryStaticReferenceDowncast(Sema &Self, Expr *SrcExpr, QualType DestType,
 /// Tests whether a conversion according to C++ 5.2.9p8 is valid.
 TryCastResult
 TryStaticPointerDowncast(Sema &Self, QualType SrcType, QualType DestType,
-                         bool CStyle, const SourceRange &OpRange,
+                         bool CStyle, SourceRange OpRange,
                          unsigned &msg, CastKind &Kind,
                          CXXCastPath &BasePath) {
   // C++ 5.2.9p8: An rvalue of type "pointer to cv1 B", where B is a class
@@ -1256,12 +1258,12 @@ TryStaticPointerDowncast(Sema &Self, QualType SrcType, QualType DestType,
 /// DestType is possible and allowed.
 TryCastResult
 TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType,
-                  bool CStyle, const SourceRange &OpRange, QualType OrigSrcType,
+                  bool CStyle, SourceRange OpRange, QualType OrigSrcType,
                   QualType OrigDestType, unsigned &msg, 
                   CastKind &Kind, CXXCastPath &BasePath) {
   // We can only work with complete types. But don't complain if it doesn't work
-  if (Self.RequireCompleteType(OpRange.getBegin(), SrcType, 0) ||
-      Self.RequireCompleteType(OpRange.getBegin(), DestType, 0))
+  if (!Self.isCompleteType(OpRange.getBegin(), SrcType) ||
+      !Self.isCompleteType(OpRange.getBegin(), DestType))
     return TC_NotApplicable;
 
   // Downcast can only happen in class hierarchies, so we need classes.
@@ -1271,7 +1273,7 @@ TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType,
 
   CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                      /*DetectVirtual=*/true);
-  if (!Self.IsDerivedFrom(DestType, SrcType, Paths)) {
+  if (!Self.IsDerivedFrom(OpRange.getBegin(), DestType, SrcType, Paths)) {
     return TC_NotApplicable;
   }
 
@@ -1307,7 +1309,7 @@ TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType,
     if (!Paths.isRecordingPaths()) {
       Paths.clear();
       Paths.setRecordingPaths(true);
-      Self.IsDerivedFrom(DestType, SrcType, Paths);
+      Self.IsDerivedFrom(OpRange.getBegin(), DestType, SrcType, Paths);
     }
     std::string PathDisplayStr;
     std::set<unsigned> DisplayedPaths;
@@ -1372,7 +1374,7 @@ TryStaticDowncast(Sema &Self, CanQualType SrcType, CanQualType DestType,
 TryCastResult
 TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr, QualType SrcType, 
                              QualType DestType, bool CStyle, 
-                             const SourceRange &OpRange,
+                             SourceRange OpRange,
                              unsigned &msg, CastKind &Kind,
                              CXXCastPath &BasePath) {
   const MemberPointerType *DestMemPtr = DestType->getAs<MemberPointerType>();
@@ -1398,6 +1400,11 @@ TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr, QualType SrcType,
     return TC_NotApplicable;
   }
 
+  // Lock down the inheritance model right now in MS ABI, whether or not the
+  // pointee types are the same.
+  if (Self.Context.getTargetInfo().getCXXABI().isMicrosoft())
+    (void)Self.isCompleteType(OpRange.getBegin(), SrcType);
+
   // T == T, modulo cv
   if (!Self.Context.hasSameUnqualifiedType(SrcMemPtr->getPointeeType(),
                                            DestMemPtr->getPointeeType()))
@@ -1408,16 +1415,15 @@ TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr, QualType SrcType,
   QualType DestClass(DestMemPtr->getClass(), 0);
   CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                   /*DetectVirtual=*/true);
-  if (Self.RequireCompleteType(OpRange.getBegin(), SrcClass, 0) ||
-      !Self.IsDerivedFrom(SrcClass, DestClass, Paths)) {
+  if (!Self.IsDerivedFrom(OpRange.getBegin(), SrcClass, DestClass, Paths))
     return TC_NotApplicable;
-  }
 
   // B is a base of D. But is it an allowed base? If not, it's a hard error.
   if (Paths.isAmbiguous(Self.Context.getCanonicalType(DestClass))) {
     Paths.clear();
     Paths.setRecordingPaths(true);
-    bool StillOkay = Self.IsDerivedFrom(SrcClass, DestClass, Paths);
+    bool StillOkay =
+        Self.IsDerivedFrom(OpRange.getBegin(), SrcClass, DestClass, Paths);
     assert(StillOkay);
     (void)StillOkay;
     std::string PathDisplayStr = Self.getAmbiguousPathsDisplayString(Paths);
@@ -1484,7 +1490,7 @@ TryStaticMemberPointerUpcast(Sema &Self, ExprResult &SrcExpr, QualType SrcType,
 TryCastResult
 TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType,
                       Sema::CheckedConversionKind CCK, 
-                      const SourceRange &OpRange, unsigned &msg,
+                      SourceRange OpRange, unsigned &msg,
                       CastKind &Kind, bool ListInitialization) {
   if (DestType->isRecordType()) {
     if (Self.RequireCompleteType(OpRange.getBegin(), DestType,
@@ -1494,10 +1500,6 @@ TryStaticImplicitCast(Sema &Self, ExprResult &SrcExpr, QualType DestType,
       msg = 0;
       return TC_Failed;
     }
-  } else if (DestType->isMemberPointerType()) {
-    if (Self.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
-      Self.RequireCompleteType(OpRange.getBegin(), DestType, 0);
-    }
   }
 
   InitializedEntity Entity = InitializedEntity::InitializeTemporary(DestType);
@@ -1750,7 +1752,7 @@ static void checkIntToPointerCast(bool CStyle, SourceLocation Loc,
 
 static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
                                         QualType DestType, bool CStyle,
-                                        const SourceRange &OpRange,
+                                        SourceRange OpRange,
                                         unsigned &msg,
                                         CastKind &Kind) {
   bool IsLValueCast = false;
@@ -1845,8 +1847,8 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
     if (Self.Context.getTargetInfo().getCXXABI().isMicrosoft()) {
       // We need to determine the inheritance model that the class will use if
       // haven't yet.
-      Self.RequireCompleteType(OpRange.getBegin(), SrcType, 0);
-      Self.RequireCompleteType(OpRange.getBegin(), DestType, 0);
+      (void)Self.isCompleteType(OpRange.getBegin(), SrcType);
+      (void)Self.isCompleteType(OpRange.getBegin(), DestType);
     }
 
     // Don't allow casting between member pointers of different sizes.
@@ -1877,28 +1879,29 @@ static TryCastResult TryReinterpretCast(Sema &Self, ExprResult &SrcExpr,
     return TC_Success;
   }
 
+  // Allow reinterpret_casts between vectors of the same size and
+  // between vectors and integers of the same size.
   bool destIsVector = DestType->isVectorType();
   bool srcIsVector = SrcType->isVectorType();
   if (srcIsVector || destIsVector) {
-    // FIXME: Should this also apply to floating point types?
-    bool srcIsScalar = SrcType->isIntegralType(Self.Context);
-    bool destIsScalar = DestType->isIntegralType(Self.Context);
-    
-    // Check if this is a cast between a vector and something else.
-    if (!(srcIsScalar && destIsVector) && !(srcIsVector && destIsScalar) &&
-        !(srcIsVector && destIsVector))
+    // The non-vector type, if any, must have integral type.  This is
+    // the same rule that C vector casts use; note, however, that enum
+    // types are not integral in C++.
+    if ((!destIsVector && !DestType->isIntegralType(Self.Context)) ||
+        (!srcIsVector && !SrcType->isIntegralType(Self.Context)))
       return TC_NotApplicable;
 
-    // If both types have the same size, we can successfully cast.
-    if (Self.Context.getTypeSize(SrcType)
-          == Self.Context.getTypeSize(DestType)) {
+    // The size we want to consider is eltCount * eltSize.
+    // That's exactly what the lax-conversion rules will check.
+    if (Self.areLaxCompatibleVectorTypes(SrcType, DestType)) {
       Kind = CK_BitCast;
       return TC_Success;
     }
-    
-    if (destIsScalar)
+
+    // Otherwise, pick a reasonable diagnostic.
+    if (!destIsVector)
       msg = diag::err_bad_cxx_cast_vector_to_scalar_different_size;
-    else if (srcIsScalar)
+    else if (!srcIsVector)
       msg = diag::err_bad_cxx_cast_scalar_to_vector_different_size;
     else
       msg = diag::err_bad_cxx_cast_vector_to_vector_different_size;
@@ -2237,6 +2240,16 @@ void CastOperation::CheckCStyleCast() {
     return;
   }
 
+  // Overloads are allowed with C extensions, so we need to support them.
+  if (SrcExpr.get()->getType() == Self.Context.OverloadTy) {
+    DeclAccessPair DAP;
+    if (FunctionDecl *FD = Self.ResolveAddressOfOverloadedFunction(
+            SrcExpr.get(), DestType, /*Complain=*/true, DAP))
+      SrcExpr = Self.FixOverloadedFunctionReference(SrcExpr.get(), DAP, FD);
+    else
+      return;
+    assert(SrcExpr.isUsable());
+  }
   SrcExpr = Self.DefaultFunctionArrayLvalueConversion(SrcExpr.get());
   if (SrcExpr.isInvalid())
     return;
@@ -2480,8 +2493,11 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo,
   Op.CheckCXXCStyleCast(/*FunctionalStyle=*/true, /*ListInit=*/false);
   if (Op.SrcExpr.isInvalid())
     return ExprError();
-  
-  if (CXXConstructExpr *ConstructExpr = dyn_cast<CXXConstructExpr>(Op.SrcExpr.get()))
+
+  auto *SubExpr = Op.SrcExpr.get();
+  if (auto *BindExpr = dyn_cast<CXXBindTemporaryExpr>(SubExpr))
+    SubExpr = BindExpr->getSubExpr();
+  if (auto *ConstructExpr = dyn_cast<CXXConstructExpr>(SubExpr))
     ConstructExpr->setParenOrBraceRange(SourceRange(LPLoc, RPLoc));
 
   return Op.complete(CXXFunctionalCastExpr::Create(Context, Op.ResultType,
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index 1a8ab6e209e5..59d51f7e84ca 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -21,6 +21,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
 #include "clang/Analysis/Analyses/FormatString.h"
@@ -111,6 +112,39 @@ static bool SemaBuiltinAddressof(Sema &S, CallExpr *TheCall) {
   return false;
 }
 
+static bool SemaBuiltinOverflow(Sema &S, CallExpr *TheCall) {
+  if (checkArgCount(S, TheCall, 3))
+    return true;
+
+  // First two arguments should be integers.
+  for (unsigned I = 0; I < 2; ++I) {
+    Expr *Arg = TheCall->getArg(I);
+    QualType Ty = Arg->getType();
+    if (!Ty->isIntegerType()) {
+      S.Diag(Arg->getLocStart(), diag::err_overflow_builtin_must_be_int)
+          << Ty << Arg->getSourceRange();
+      return true;
+    }
+  }
+
+  // Third argument should be a pointer to a non-const integer.
+  // IRGen correctly handles volatile, restrict, and address spaces, and
+  // the other qualifiers aren't possible.
+  {
+    Expr *Arg = TheCall->getArg(2);
+    QualType Ty = Arg->getType();
+    const auto *PtrTy = Ty->getAs<PointerType>();
+    if (!(PtrTy && PtrTy->getPointeeType()->isIntegerType() &&
+          !PtrTy->getPointeeType().isConstQualified())) {
+      S.Diag(Arg->getLocStart(), diag::err_overflow_builtin_must_be_ptr_int)
+          << Ty << Arg->getSourceRange();
+      return true;
+    }
+  }
+
+  return false;
+}
+
 static void SemaBuiltinMemChkCall(Sema &S, FunctionDecl *FDecl,
 		                  CallExpr *TheCall, unsigned SizeIdx,
                                   unsigned DstSizeIdx) {
@@ -440,6 +474,9 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__sync_swap_8:
   case Builtin::BI__sync_swap_16:
     return SemaBuiltinAtomicOverloaded(TheCallResult);
+  case Builtin::BI__builtin_nontemporal_load:
+  case Builtin::BI__builtin_nontemporal_store:
+    return SemaBuiltinNontemporalOverloaded(TheCallResult);
 #define BUILTIN(ID, TYPE, ATTRS)
 #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
   case Builtin::BI##ID: \
@@ -453,6 +490,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (SemaBuiltinAddressof(*this, TheCall))
       return ExprError();
     break;
+  case Builtin::BI__builtin_add_overflow:
+  case Builtin::BI__builtin_sub_overflow:
+  case Builtin::BI__builtin_mul_overflow:
+    if (SemaBuiltinOverflow(*this, TheCall))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_operator_new:
   case Builtin::BI__builtin_operator_delete:
     if (!getLangOpts().CPlusPlus) {
@@ -525,7 +568,7 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
 
   // Since the target specific builtins for each arch overlap, only check those
   // of the arch we are compiling for.
-  if (BuiltinID >= Builtin::FirstTSBuiltin) {
+  if (Context.BuiltinInfo.isTSBuiltin(BuiltinID)) {
     switch (Context.getTargetInfo().getTriple().getArch()) {
       case llvm::Triple::arm:
       case llvm::Triple::armeb:
@@ -1027,12 +1070,34 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
 
+/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *).
+/// This checks that the target supports __builtin_cpu_supports and
+/// that the string argument is constant and valid.
+static bool SemaBuiltinCpuSupports(Sema &S, CallExpr *TheCall) {
+  Expr *Arg = TheCall->getArg(0);
+
+  // Check if the argument is a string literal.
+  if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts()))
+    return S.Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal)
+           << Arg->getSourceRange();
+
+  // Check the contents of the string.
+  StringRef Feature =
+      cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString();
+  if (!S.Context.getTargetInfo().validateCpuSupports(Feature))
+    return S.Diag(TheCall->getLocStart(), diag::err_invalid_cpu_supports)
+           << Arg->getSourceRange();
+  return false;
+}
+
 bool Sema::CheckX86BuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
   unsigned i = 0, l = 0, u = 0;
   switch (BuiltinID) {
   default: return false;
   case X86::BI__builtin_cpu_supports:
-    return SemaBuiltinCpuSupports(TheCall);
+    return SemaBuiltinCpuSupports(*this, TheCall);
+  case X86::BI__builtin_ms_va_start:
+    return SemaBuiltinMSVAStart(TheCall);
   case X86::BI_mm_prefetch: i = 1; l = 0; u = 3; break;
   case X86::BI__builtin_ia32_sha1rnds4: i = 2, l = 0; u = 3; break;
   case X86::BI__builtin_ia32_vpermil2pd:
@@ -1115,8 +1180,7 @@ bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember,
 /// Checks if a the given expression evaluates to null.
 ///
 /// \brief Returns true if the value evaluates to null.
-static bool CheckNonNullExpr(Sema &S,
-                             const Expr *Expr) {
+static bool CheckNonNullExpr(Sema &S, const Expr *Expr) {
   // If the expression has non-null type, it doesn't evaluate to null.
   if (auto nullability
         = Expr->IgnoreImplicit()->getType()->getNullability(S.Context)) {
@@ -1145,7 +1209,8 @@ static void CheckNonNullArgument(Sema &S,
                                  const Expr *ArgExpr,
                                  SourceLocation CallSiteLoc) {
   if (CheckNonNullExpr(S, ArgExpr))
-    S.Diag(CallSiteLoc, diag::warn_null_arg) << ArgExpr->getSourceRange();
+    S.DiagRuntimeBehavior(CallSiteLoc, ArgExpr,
+           S.PDiag(diag::warn_null_arg) << ArgExpr->getSourceRange());
 }
 
 bool Sema::GetFormatNSStringIdx(const FormatAttr *Format, unsigned &Idx) {
@@ -1638,6 +1703,12 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
       return ExprError();
     }
     ValType = AtomTy->getAs<AtomicType>()->getValueType();
+  } else if (Form != Load && Op != AtomicExpr::AO__atomic_load) {
+    if (ValType.isConstQualified()) {
+      Diag(DRE->getLocStart(), diag::err_atomic_op_needs_non_const_pointer)
+        << Ptr->getType() << Ptr->getSourceRange();
+      return ExprError();
+    }
   }
 
   // For an arithmetic operation, the implied arithmetic must be well-formed.
@@ -1675,9 +1746,6 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
     return ExprError();
   }
 
-  // FIXME: For any builtin other than a load, the ValType must not be
-  // const-qualified.
-
   switch (ValType.getObjCLifetime()) {
   case Qualifiers::OCL_None:
   case Qualifiers::OCL_ExplicitNone:
@@ -1710,6 +1778,10 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
   if (!IsC11 && !IsN)
     ByValType = Ptr->getType();
 
+  // FIXME: __atomic_load allows the first argument to be a a pointer to const
+  // but not the second argument. We need to manually remove possible const
+  // qualifiers.
+
   // The first argument --- the pointer --- has a fixed type; we
   // deduce the types of the rest of the arguments accordingly.  Walk
   // the remaining arguments, converting them to the deduced value type.
@@ -1729,8 +1801,17 @@ ExprResult Sema::SemaAtomicOpsOverloaded(ExprResult TheCallResult,
           Ty = ByValType;
         else if (Form == Arithmetic)
           Ty = Context.getPointerDiffType();
-        else
-          Ty = Context.getPointerType(ValType.getUnqualifiedType());
+        else {
+          Expr *ValArg = TheCall->getArg(i);
+          unsigned AS = 0;
+          // Keep address space of non-atomic pointer type.
+          if (const PointerType *PtrTy =
+                  ValArg->getType()->getAs<PointerType>()) {
+            AS = PtrTy->getPointeeType().getAddressSpace();
+          }
+          Ty = Context.getPointerType(
+              Context.getAddrSpaceQualType(ValType.getUnqualifiedType(), AS));
+        }
         break;
       case 2:
         // The third argument to compare_exchange / GNU exchange is a
@@ -2142,7 +2223,7 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
   // Get the decl for the concrete builtin from this, we can tell what the
   // concrete integer type we should convert to is.
   unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
-  const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID);
+  const char *NewBuiltinName = Context.BuiltinInfo.getName(NewBuiltinID);
   FunctionDecl *NewBuiltinDecl;
   if (NewBuiltinID == BuiltinID)
     NewBuiltinDecl = FDecl;
@@ -2209,6 +2290,78 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
   return TheCallResult;
 }
 
+/// SemaBuiltinNontemporalOverloaded - We have a call to
+/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an
+/// overloaded function based on the pointer type of its last argument.
+///
+/// This function goes through and does final semantic checking for these
+/// builtins.
+ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) {
+  CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+  DeclRefExpr *DRE =
+      cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+  FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
+  unsigned BuiltinID = FDecl->getBuiltinID();
+  assert((BuiltinID == Builtin::BI__builtin_nontemporal_store ||
+          BuiltinID == Builtin::BI__builtin_nontemporal_load) &&
+         "Unexpected nontemporal load/store builtin!");
+  bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store;
+  unsigned numArgs = isStore ? 2 : 1;
+
+  // Ensure that we have the proper number of arguments.
+  if (checkArgCount(*this, TheCall, numArgs))
+    return ExprError();
+
+  // Inspect the last argument of the nontemporal builtin.  This should always
+  // be a pointer type, from which we imply the type of the memory access.
+  // Because it is a pointer type, we don't have to worry about any implicit
+  // casts here.
+  Expr *PointerArg = TheCall->getArg(numArgs - 1);
+  ExprResult PointerArgResult =
+      DefaultFunctionArrayLvalueConversion(PointerArg);
+
+  if (PointerArgResult.isInvalid())
+    return ExprError();
+  PointerArg = PointerArgResult.get();
+  TheCall->setArg(numArgs - 1, PointerArg);
+
+  const PointerType *pointerType = PointerArg->getType()->getAs<PointerType>();
+  if (!pointerType) {
+    Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return ExprError();
+  }
+
+  QualType ValType = pointerType->getPointeeType();
+
+  // Strip any qualifiers off ValType.
+  ValType = ValType.getUnqualifiedType();
+  if (!ValType->isIntegerType() && !ValType->isAnyPointerType() &&
+      !ValType->isBlockPointerType() && !ValType->isFloatingType() &&
+      !ValType->isVectorType()) {
+    Diag(DRE->getLocStart(),
+         diag::err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector)
+        << PointerArg->getType() << PointerArg->getSourceRange();
+    return ExprError();
+  }
+
+  if (!isStore) {
+    TheCall->setType(ValType);
+    return TheCallResult;
+  }
+
+  ExprResult ValArg = TheCall->getArg(0);
+  InitializedEntity Entity = InitializedEntity::InitializeParameter(
+      Context, ValType, /*consume*/ false);
+  ValArg = PerformCopyInitialization(Entity, SourceLocation(), ValArg);
+  if (ValArg.isInvalid())
+    return ExprError();
+
+  TheCall->setArg(0, ValArg.get());
+  TheCall->setType(Context.VoidTy);
+  return TheCallResult;
+}
+
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
 /// Note: It might also make sense to do the UTF-16 conversion here (would
@@ -2241,9 +2394,10 @@ bool Sema::CheckObjCString(Expr *Arg) {
   return false;
 }
 
-/// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
-/// Emit an error and return true on failure, return false on success.
-bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
+/// Check the arguments to '__builtin_va_start' or '__builtin_ms_va_start'
+/// for validity.  Emit an error and return true on failure; return false
+/// on success.
+bool Sema::SemaBuiltinVAStartImpl(CallExpr *TheCall) {
   Expr *Fn = TheCall->getCallee();
   if (TheCall->getNumArgs() > 2) {
     Diag(TheCall->getArg(2)->getLocStart(),
@@ -2321,6 +2475,48 @@ bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
   return false;
 }
 
+/// Check the arguments to '__builtin_va_start' for validity, and that
+/// it was called from a function of the native ABI.
+/// Emit an error and return true on failure; return false on success.
+bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
+  // On x86-64 Unix, don't allow this in Win64 ABI functions.
+  // On x64 Windows, don't allow this in System V ABI functions.
+  // (Yes, that means there's no corresponding way to support variadic
+  // System V ABI functions on Windows.)
+  if (Context.getTargetInfo().getTriple().getArch() == llvm::Triple::x86_64) {
+    unsigned OS = Context.getTargetInfo().getTriple().getOS();
+    clang::CallingConv CC = CC_C;
+    if (const FunctionDecl *FD = getCurFunctionDecl())
+      CC = FD->getType()->getAs<FunctionType>()->getCallConv();
+    if ((OS == llvm::Triple::Win32 && CC == CC_X86_64SysV) ||
+        (OS != llvm::Triple::Win32 && CC == CC_X86_64Win64))
+      return Diag(TheCall->getCallee()->getLocStart(),
+                  diag::err_va_start_used_in_wrong_abi_function)
+             << (OS != llvm::Triple::Win32);
+  }
+  return SemaBuiltinVAStartImpl(TheCall);
+}
+
+/// Check the arguments to '__builtin_ms_va_start' for validity, and that
+/// it was called from a Win64 ABI function.
+/// Emit an error and return true on failure; return false on success.
+bool Sema::SemaBuiltinMSVAStart(CallExpr *TheCall) {
+  // This only makes sense for x86-64.
+  const llvm::Triple &TT = Context.getTargetInfo().getTriple();
+  Expr *Callee = TheCall->getCallee();
+  if (TT.getArch() != llvm::Triple::x86_64)
+    return Diag(Callee->getLocStart(), diag::err_x86_builtin_32_bit_tgt);
+  // Don't allow this in System V ABI functions.
+  clang::CallingConv CC = CC_C;
+  if (const FunctionDecl *FD = getCurFunctionDecl())
+    CC = FD->getType()->getAs<FunctionType>()->getCallConv();
+  if (CC == CC_X86_64SysV ||
+      (TT.getOS() != llvm::Triple::Win32 && CC != CC_X86_64Win64))
+    return Diag(Callee->getLocStart(),
+                diag::err_ms_va_start_used_in_sysv_function);
+  return SemaBuiltinVAStartImpl(TheCall);
+}
+
 bool Sema::SemaBuiltinVAStartARM(CallExpr *Call) {
   // void __va_start(va_list *ap, const char *named_addr, size_t slot_size,
   //                 const char *named_addr);
@@ -2784,26 +2980,6 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall,
   return false;
 }
 
-/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *).
-/// This checks that the target supports __builtin_cpu_supports and
-/// that the string argument is constant and valid.
-bool Sema::SemaBuiltinCpuSupports(CallExpr *TheCall) {
-  Expr *Arg = TheCall->getArg(0);
-
-  // Check if the argument is a string literal.
-  if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts()))
-    return Diag(TheCall->getLocStart(), diag::err_expr_not_string_literal)
-           << Arg->getSourceRange();
-
-  // Check the contents of the string.
-  StringRef Feature =
-      cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString();
-  if (!Context.getTargetInfo().validateCpuSupports(Feature))
-    return Diag(TheCall->getLocStart(), diag::err_invalid_cpu_supports)
-           << Arg->getSourceRange();
-  return false;
-}
-
 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
 /// This checks that the target supports __builtin_longjmp and
 /// that val is a constant 1.
@@ -4833,7 +5009,7 @@ static void emitReplacement(Sema &S, SourceLocation Loc, SourceRange Range,
       }
     }
   } else {
-    FunctionName = S.Context.BuiltinInfo.GetName(AbsKind);
+    FunctionName = S.Context.BuiltinInfo.getName(AbsKind);
     HeaderName = S.Context.BuiltinInfo.getHeaderName(AbsKind);
 
     if (HeaderName) {
@@ -4909,7 +5085,7 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call,
   // function call.
   if (ArgType->isUnsignedIntegerType()) {
     const char *FunctionName =
-        IsStdAbs ? "std::abs" : Context.BuiltinInfo.GetName(AbsKind);
+        IsStdAbs ? "std::abs" : Context.BuiltinInfo.getName(AbsKind);
     Diag(Call->getExprLoc(), diag::warn_unsigned_abs) << ArgType << ParamType;
     Diag(Call->getExprLoc(), diag::note_remove_abs)
         << FunctionName
@@ -4917,6 +5093,19 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call,
     return;
   }
 
+  // Taking the absolute value of a pointer is very suspicious, they probably
+  // wanted to index into an array, dereference a pointer, call a function, etc.
+  if (ArgType->isPointerType() || ArgType->canDecayToPointerType()) {
+    unsigned DiagType = 0;
+    if (ArgType->isFunctionType())
+      DiagType = 1;
+    else if (ArgType->isArrayType())
+      DiagType = 2;
+
+    Diag(Call->getExprLoc(), diag::warn_pointer_abs) << DiagType << ArgType;
+    return;
+  }
+
   // std::abs has overloads which prevent most of the absolute value problems
   // from occurring.
   if (IsStdAbs)
@@ -5465,17 +5654,15 @@ CheckReturnStackAddr(Sema &S, Expr *RetValExp, QualType lhsType,
   }
 
   if (DeclRefExpr *DR = dyn_cast<DeclRefExpr>(stackE)) { //address of local var.
-    S.Diag(diagLoc, lhsType->isReferenceType() ? diag::warn_ret_stack_ref
-                                             : diag::warn_ret_stack_addr)
+    S.Diag(diagLoc, diag::warn_ret_stack_addr_ref) << lhsType->isReferenceType()
      << DR->getDecl()->getDeclName() << diagRange;
   } else if (isa<BlockExpr>(stackE)) { // local block.
     S.Diag(diagLoc, diag::err_ret_local_block) << diagRange;
   } else if (isa<AddrLabelExpr>(stackE)) { // address of label.
     S.Diag(diagLoc, diag::warn_ret_addr_label) << diagRange;
   } else { // local temporary.
-    S.Diag(diagLoc, lhsType->isReferenceType() ? diag::warn_ret_local_temp_ref
-                                               : diag::warn_ret_local_temp_addr)
-     << diagRange;
+    S.Diag(diagLoc, diag::warn_ret_local_temp_addr_ref)
+     << lhsType->isReferenceType() << diagRange;
   }
 
   // Display the "trail" of reference variables that we followed until we
@@ -5750,6 +5937,11 @@ do {
     return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase(), refVars,ParentDecl);
   }
 
+  case Stmt::OMPArraySectionExprClass: {
+    return EvalAddr(cast<OMPArraySectionExpr>(E)->getBase(), refVars,
+                    ParentDecl);
+  }
+
   case Stmt::ConditionalOperatorClass: {
     // For conditional operators we need to see if either the LHS or RHS are
     // non-NULL Expr's.  If one is non-NULL, we return it.
@@ -7083,6 +7275,14 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
           return;
 
         DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_float_precision);
+
+      }
+      // ... or possibly if we're increasing rank, too
+      else if (TargetBT->getKind() > SourceBT->getKind()) {
+        if (S.SourceMgr.isInSystemMacro(CC))
+          return;
+
+        DiagnoseImpCast(S, E, T, CC, diag::warn_impcast_double_promotion);
       }
       return;
     }
@@ -7105,20 +7305,24 @@ void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
       }
     }
 
-    // If the target is bool, warn if expr is a function or method call.
-    if (Target->isSpecificBuiltinType(BuiltinType::Bool) &&
-        isa<CallExpr>(E)) {
+    // Detect the case where a call result is converted from floating-point to
+    // to bool, and the final argument to the call is converted from bool, to
+    // discover this typo:
+    //
+    //    bool b = fabs(x < 1.0);  // should be "bool b = fabs(x) < 1.0;"
+    //
+    // FIXME: This is an incredibly special case; is there some more general
+    // way to detect this class of misplaced-parentheses bug?
+    if (Target->isBooleanType() && isa<CallExpr>(E)) {
       // Check last argument of function call to see if it is an
       // implicit cast from a type matching the type the result
       // is being cast to.
       CallExpr *CEx = cast<CallExpr>(E);
-      unsigned NumArgs = CEx->getNumArgs();
-      if (NumArgs > 0) {
+      if (unsigned NumArgs = CEx->getNumArgs()) {
         Expr *LastA = CEx->getArg(NumArgs - 1);
         Expr *InnerE = LastA->IgnoreParenImpCasts();
-        const Type *InnerType =
-          S.Context.getCanonicalType(InnerE->getType()).getTypePtr();
-        if (isa<ImplicitCastExpr>(LastA) && (InnerType == Target)) {
+        if (isa<ImplicitCastExpr>(LastA) &&
+            InnerE->getType()->isBooleanType()) {
           // Warn on this floating-point to bool conversion
           DiagnoseImpCast(S, E, T, CC,
                           diag::warn_impcast_floating_point_to_bool);
@@ -7301,18 +7505,16 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) {
     CheckImplicitConversion(S, E, T, CC);
 
   // Now continue drilling into this expression.
-  
-  if (PseudoObjectExpr * POE = dyn_cast<PseudoObjectExpr>(E)) {
-    if (POE->getResultExpr())
-      E = POE->getResultExpr();
+
+  if (PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E)) {
+    // The bound subexpressions in a PseudoObjectExpr are not reachable
+    // as transitive children.
+    // FIXME: Use a more uniform representation for this.
+    for (auto *SE : POE->semantics())
+      if (auto *OVE = dyn_cast<OpaqueValueExpr>(SE))
+        AnalyzeImplicitConversions(S, OVE->getSourceExpr(), CC);
   }
-  
-  if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E)) {
-    if (OVE->getSourceExpr())
-      AnalyzeImplicitConversions(S, OVE->getSourceExpr(), CC);
-    return;
-  }
-  
+
   // Skip past explicit casts.
   if (isa<ExplicitCastExpr>(E)) {
     E = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreParenImpCasts();
@@ -7372,12 +7574,6 @@ void AnalyzeImplicitConversions(Sema &S, Expr *OrigE, SourceLocation CC) {
 
 } // end anonymous namespace
 
-enum {
-  AddressOf,
-  FunctionPointer,
-  ArrayPointer
-};
-
 // Helper function for Sema::DiagnoseAlwaysNonNullPointer.
 // Returns true when emitting a warning about taking the address of a reference.
 static bool CheckForReference(Sema &SemaRef, const Expr *E,
@@ -7476,6 +7672,26 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
     }
   }
 
+  auto ComplainAboutNonnullParamOrCall = [&](bool IsParam) {
+    std::string Str;
+    llvm::raw_string_ostream S(Str);
+    E->printPretty(S, nullptr, getPrintingPolicy());
+    unsigned DiagID = IsCompare ? diag::warn_nonnull_expr_compare
+                                : diag::warn_cast_nonnull_to_bool;
+    Diag(E->getExprLoc(), DiagID) << IsParam << S.str()
+      << E->getSourceRange() << Range << IsEqual;
+  };
+
+  // If we have a CallExpr that is tagged with returns_nonnull, we can complain.
+  if (auto *Call = dyn_cast<CallExpr>(E->IgnoreParenImpCasts())) {
+    if (auto *Callee = Call->getDirectCallee()) {
+      if (Callee->hasAttr<ReturnsNonNullAttr>()) {
+        ComplainAboutNonnullParamOrCall(false);
+        return;
+      }
+    }
+  }
+
   // Expect to find a single Decl.  Skip anything more complicated.
   ValueDecl *D = nullptr;
   if (DeclRefExpr *R = dyn_cast<DeclRefExpr>(E)) {
@@ -7487,40 +7703,38 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
   // Weak Decls can be null.
   if (!D || D->isWeak())
     return;
-  
+
   // Check for parameter decl with nonnull attribute
-  if (const ParmVarDecl* PV = dyn_cast<ParmVarDecl>(D)) {
-    if (getCurFunction() && !getCurFunction()->ModifiedNonNullParams.count(PV))
-      if (const FunctionDecl* FD = dyn_cast<FunctionDecl>(PV->getDeclContext())) {
-        unsigned NumArgs = FD->getNumParams();
-        llvm::SmallBitVector AttrNonNull(NumArgs);
+  if (const auto* PV = dyn_cast<ParmVarDecl>(D)) {
+    if (getCurFunction() &&
+        !getCurFunction()->ModifiedNonNullParams.count(PV)) {
+      if (PV->hasAttr<NonNullAttr>()) {
+        ComplainAboutNonnullParamOrCall(true);
+        return;
+      }
+
+      if (const auto *FD = dyn_cast<FunctionDecl>(PV->getDeclContext())) {
+        auto ParamIter = std::find(FD->param_begin(), FD->param_end(), PV);
+        assert(ParamIter != FD->param_end());
+        unsigned ParamNo = std::distance(FD->param_begin(), ParamIter);
+
         for (const auto *NonNull : FD->specific_attrs<NonNullAttr>()) {
           if (!NonNull->args_size()) {
-            AttrNonNull.set(0, NumArgs);
-            break;
+              ComplainAboutNonnullParamOrCall(true);
+              return;
           }
-          for (unsigned Val : NonNull->args()) {
-            if (Val >= NumArgs)
-              continue;
-            AttrNonNull.set(Val);
-          }
-        }
-        if (!AttrNonNull.empty())
-          for (unsigned i = 0; i < NumArgs; ++i)
-            if (FD->getParamDecl(i) == PV &&
-                (AttrNonNull[i] || PV->hasAttr<NonNullAttr>())) {
-              std::string Str;
-              llvm::raw_string_ostream S(Str);
-              E->printPretty(S, nullptr, getPrintingPolicy());
-              unsigned DiagID = IsCompare ? diag::warn_nonnull_parameter_compare
-                                          : diag::warn_cast_nonnull_to_bool;
-              Diag(E->getExprLoc(), DiagID) << S.str() << E->getSourceRange()
-                << Range << IsEqual;
+
+          for (unsigned ArgNo : NonNull->args()) {
+            if (ArgNo == ParamNo) {
+              ComplainAboutNonnullParamOrCall(true);
               return;
             }
+          }
+        }
       }
     }
-  
+  }
+
   QualType T = D->getType();
   const bool IsArray = T->isArrayType();
   const bool IsFunction = T->isFunctionType();
@@ -7541,7 +7755,11 @@ void Sema::DiagnoseAlwaysNonNullPointer(Expr *E,
 
   unsigned DiagID = IsCompare ? diag::warn_null_pointer_compare
                               : diag::warn_impcast_pointer_to_bool;
-  unsigned DiagType;
+  enum {
+    AddressOf,
+    FunctionPointer,
+    ArrayPointer
+  } DiagType;
   if (IsAddressOf)
     DiagType = AddressOf;
   else if (IsFunction)
@@ -8229,6 +8447,15 @@ bool Sema::CheckParmsForFunctionDef(ParmVarDecl *const *P,
         }
       }
     }
+
+    // Parameters with the pass_object_size attribute only need to be marked
+    // constant at function definitions. Because we lack information about
+    // whether we're on a declaration or definition when we're instantiating the
+    // attribute, we need to check for constness here.
+    if (const auto *Attr = Param->getAttr<PassObjectSizeAttr>())
+      if (!Param->getType().isConstQualified())
+        Diag(Param->getLocation(), diag::err_attribute_pointers_only)
+            << Attr->getSpelling() << 1;
   }
 
   return HasInvalidParm;
@@ -8348,7 +8575,7 @@ void Sema::CheckArrayAccess(const Expr *BaseExpr, const Expr *IndexExpr,
     return;
 
   llvm::APSInt index;
-  if (!IndexExpr->EvaluateAsInt(index, Context))
+  if (!IndexExpr->EvaluateAsInt(index, Context, Expr::SE_AllowSideEffects))
     return;
   if (IndexNegated)
     index = -index;
@@ -8462,6 +8689,13 @@ void Sema::CheckArrayAccess(const Expr *expr) {
                          AllowOnePastEnd > 0);
         return;
       }
+      case Stmt::OMPArraySectionExprClass: {
+        const OMPArraySectionExpr *ASE = cast<OMPArraySectionExpr>(expr);
+        if (ASE->getLowerBound())
+          CheckArrayAccess(ASE->getBase(), ASE->getLowerBound(),
+                           /*ASE=*/nullptr, AllowOnePastEnd > 0);
+        return;
+      }
       case Stmt::UnaryOperatorClass: {
         // Only unwrap the * and & unary operators
         const UnaryOperator *UO = cast<UnaryOperator>(expr);
@@ -9672,4 +9906,3 @@ void Sema::CheckArgumentWithTypeTag(const ArgumentWithTypeTagAttr *Attr,
         << ArgumentExpr->getSourceRange()
         << TypeTagExpr->getSourceRange();
 }
-
diff --git a/lib/Sema/SemaCodeComplete.cpp b/lib/Sema/SemaCodeComplete.cpp
index 86265275d05e..21cf62585142 100644
--- a/lib/Sema/SemaCodeComplete.cpp
+++ b/lib/Sema/SemaCodeComplete.cpp
@@ -494,6 +494,7 @@ bool ResultBuilder::isInterestingDecl(const NamedDecl *ND,
                                       bool &AsNestedNameSpecifier) const {
   AsNestedNameSpecifier = false;
 
+  auto *Named = ND;
   ND = ND->getUnderlyingDecl();
 
   // Skip unnamed entities.
@@ -526,14 +527,14 @@ bool ResultBuilder::isInterestingDecl(const NamedDecl *ND,
         return false;
 
   if (Filter == &ResultBuilder::IsNestedNameSpecifier ||
-      ((isa<NamespaceDecl>(ND) || isa<NamespaceAliasDecl>(ND)) &&
+      (isa<NamespaceDecl>(ND) &&
        Filter != &ResultBuilder::IsNamespace &&
        Filter != &ResultBuilder::IsNamespaceOrAlias &&
        Filter != nullptr))
     AsNestedNameSpecifier = true;
 
   // Filter out any unwanted results.
-  if (Filter && !(this->*Filter)(ND)) {
+  if (Filter && !(this->*Filter)(Named)) {
     // Check whether it is interesting as a nested-name-specifier.
     if (AllowNestedNameSpecifiers && SemaRef.getLangOpts().CPlusPlus && 
         IsNestedNameSpecifier(ND) &&
@@ -1142,14 +1143,12 @@ bool ResultBuilder::IsNamespace(const NamedDecl *ND) const {
 /// \brief Determines whether the given declaration is a namespace or 
 /// namespace alias.
 bool ResultBuilder::IsNamespaceOrAlias(const NamedDecl *ND) const {
-  return isa<NamespaceDecl>(ND) || isa<NamespaceAliasDecl>(ND);
+  return isa<NamespaceDecl>(ND->getUnderlyingDecl());
 }
 
 /// \brief Determines whether the given declaration is a type.
 bool ResultBuilder::IsType(const NamedDecl *ND) const {
-  if (const UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(ND))
-    ND = Using->getTargetDecl();
-  
+  ND = ND->getUnderlyingDecl();
   return isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND);
 }
 
@@ -1157,11 +1156,9 @@ bool ResultBuilder::IsType(const NamedDecl *ND) const {
 /// "." or "->".  Only value declarations, nested name specifiers, and
 /// using declarations thereof should show up.
 bool ResultBuilder::IsMember(const NamedDecl *ND) const {
-  if (const UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(ND))
-    ND = Using->getTargetDecl();
-
+  ND = ND->getUnderlyingDecl();
   return isa<ValueDecl>(ND) || isa<FunctionTemplateDecl>(ND) ||
-    isa<ObjCPropertyDecl>(ND);
+         isa<ObjCPropertyDecl>(ND);
 }
 
 static bool isObjCReceiverType(ASTContext &C, QualType T) {
@@ -3036,6 +3033,7 @@ CXCursorKind clang::getCursorKindForDecl(const Decl *D) {
     case Decl::ParmVar:            return CXCursor_ParmDecl;
     case Decl::Typedef:            return CXCursor_TypedefDecl;
     case Decl::TypeAlias:          return CXCursor_TypeAliasDecl;
+    case Decl::TypeAliasTemplate:  return CXCursor_TypeAliasTemplateDecl;
     case Decl::Var:                return CXCursor_VarDecl;
     case Decl::Namespace:          return CXCursor_Namespace;
     case Decl::NamespaceAlias:     return CXCursor_NamespaceAlias;
@@ -3376,7 +3374,7 @@ void Sema::CodeCompleteOrdinaryName(Scope *S,
   case PCC_Statement:
   case PCC_RecoveryInFunction:
     if (S->getFnParent())
-      AddPrettyFunctionResults(PP.getLangOpts(), Results);        
+      AddPrettyFunctionResults(getLangOpts(), Results);
     break;
     
   case PCC_Namespace:
@@ -3520,7 +3518,7 @@ void Sema::CodeCompleteExpression(Scope *S,
   if (S->getFnParent() && 
       !Data.ObjCCollection && 
       !Data.IntegralConstantExpression)
-    AddPrettyFunctionResults(PP.getLangOpts(), Results);        
+    AddPrettyFunctionResults(getLangOpts(), Results);
 
   if (CodeCompleter->includeMacros())
     AddMacroResults(PP, Results, false, PreferredTypeIsPointer);
@@ -4051,7 +4049,7 @@ void Sema::CodeCompleteCall(Scope *S, Expr *Fn, ArrayRef<Expr *> Args) {
       // If expression's type is CXXRecordDecl, it may overload the function
       // call operator, so we check if it does and add them as candidates.
       // A complete type is needed to lookup for member function call operators.
-      if (!RequireCompleteType(Loc, NakedFn->getType(), 0)) {
+      if (isCompleteType(Loc, NakedFn->getType())) {
         DeclarationName OpName = Context.DeclarationNames
                                  .getCXXOperatorName(OO_Call);
         LookupResult R(*this, OpName, Loc, LookupOrdinaryName);
@@ -4093,7 +4091,7 @@ void Sema::CodeCompleteConstructor(Scope *S, QualType Type, SourceLocation Loc,
     return;
 
   // A complete type is needed to lookup for constructors.
-  if (RequireCompleteType(Loc, Type, 0))
+  if (!isCompleteType(Loc, Type))
     return;
 
   CXXRecordDecl *RD = Type->getAsCXXRecordDecl();
@@ -4205,7 +4203,7 @@ void Sema::CodeCompleteAfterIf(Scope *S) {
   Results.ExitScope();
   
   if (S->getFnParent())
-    AddPrettyFunctionResults(PP.getLangOpts(), Results);        
+    AddPrettyFunctionResults(getLangOpts(), Results);
   
   if (CodeCompleter->includeMacros())
     AddMacroResults(PP, Results, false);
@@ -4912,7 +4910,7 @@ void Sema::CodeCompleteObjCPropertyFlags(Scope *S, ObjCDeclSpec &ODS) {
     Results.AddResult(CodeCompletionResult("atomic"));
 
   // Only suggest "weak" if we're compiling for ARC-with-weak-references or GC.
-  if (getLangOpts().ObjCARCWeak || getLangOpts().getGC() != LangOptions::NonGC)
+  if (getLangOpts().ObjCWeak || getLangOpts().getGC() != LangOptions::NonGC)
     if (!ObjCPropertyFlagConflicts(Attributes, ObjCDeclSpec::DQ_PR_weak))
       Results.AddResult(CodeCompletionResult("weak"));
 
@@ -5925,8 +5923,8 @@ static void AddProtocolResults(DeclContext *Ctx, DeclContext *CurContext,
   }
 }
 
-void Sema::CodeCompleteObjCProtocolReferences(IdentifierLocPair *Protocols,
-                                              unsigned NumProtocols) {
+void Sema::CodeCompleteObjCProtocolReferences(
+                                        ArrayRef<IdentifierLocPair> Protocols) {
   ResultBuilder Results(*this, CodeCompleter->getAllocator(),
                         CodeCompleter->getCodeCompletionTUInfo(),
                         CodeCompletionContext::CCC_ObjCProtocolName);
@@ -5937,9 +5935,9 @@ void Sema::CodeCompleteObjCProtocolReferences(IdentifierLocPair *Protocols,
     // Tell the result set to ignore all of the protocols we have
     // already seen.
     // FIXME: This doesn't work when caching code-completion results.
-    for (unsigned I = 0; I != NumProtocols; ++I)
-      if (ObjCProtocolDecl *Protocol = LookupProtocol(Protocols[I].first,
-                                                      Protocols[I].second))
+    for (const IdentifierLocPair &Pair : Protocols)
+      if (ObjCProtocolDecl *Protocol = LookupProtocol(Pair.first,
+                                                      Pair.second))
         Results.Ignore(Protocol);
 
     // Add all protocols.
@@ -7079,11 +7077,13 @@ void Sema::CodeCompleteObjCMethodDecl(Scope *S,
     
     // If the result type was not already provided, add it to the
     // pattern as (type).
-    if (ReturnType.isNull())
-      AddObjCPassingTypeChunk(Method->getSendResultType()
-                                  .stripObjCKindOfType(Context),
+    if (ReturnType.isNull()) {
+      QualType ResTy = Method->getSendResultType().stripObjCKindOfType(Context);
+      AttributedType::stripOuterNullability(ResTy);
+      AddObjCPassingTypeChunk(ResTy,
                               Method->getObjCDeclQualifier(), Context, Policy,
                               Builder);
+    }
 
     Selector Sel = Method->getSelector();
 
@@ -7114,6 +7114,7 @@ void Sema::CodeCompleteObjCMethodDecl(Scope *S,
         ParamType = (*P)->getOriginalType();
       ParamType = ParamType.substObjCTypeArgs(Context, {},
                                             ObjCSubstitutionContext::Parameter);
+      AttributedType::stripOuterNullability(ParamType);
       AddObjCPassingTypeChunk(ParamType,
                               (*P)->getObjCDeclQualifier(),
                               Context, Policy,
diff --git a/lib/Sema/SemaCoroutine.cpp b/lib/Sema/SemaCoroutine.cpp
new file mode 100644
index 000000000000..4b4fd6b16a06
--- /dev/null
+++ b/lib/Sema/SemaCoroutine.cpp
@@ -0,0 +1,448 @@
+//===--- SemaCoroutines.cpp - Semantic Analysis for Coroutines ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements semantic analysis for C++ Coroutines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Sema/SemaInternal.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
+#include "clang/AST/StmtCXX.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Sema/Initialization.h"
+#include "clang/Sema/Overload.h"
+using namespace clang;
+using namespace sema;
+
+/// Look up the std::coroutine_traits<...>::promise_type for the given
+/// function type.
+static QualType lookupPromiseType(Sema &S, const FunctionProtoType *FnType,
+                                  SourceLocation Loc) {
+  // FIXME: Cache std::coroutine_traits once we've found it.
+  NamespaceDecl *Std = S.getStdNamespace();
+  if (!Std) {
+    S.Diag(Loc, diag::err_implied_std_coroutine_traits_not_found);
+    return QualType();
+  }
+
+  LookupResult Result(S, &S.PP.getIdentifierTable().get("coroutine_traits"),
+                      Loc, Sema::LookupOrdinaryName);
+  if (!S.LookupQualifiedName(Result, Std)) {
+    S.Diag(Loc, diag::err_implied_std_coroutine_traits_not_found);
+    return QualType();
+  }
+
+  ClassTemplateDecl *CoroTraits = Result.getAsSingle<ClassTemplateDecl>();
+  if (!CoroTraits) {
+    Result.suppressDiagnostics();
+    // We found something weird. Complain about the first thing we found.
+    NamedDecl *Found = *Result.begin();
+    S.Diag(Found->getLocation(), diag::err_malformed_std_coroutine_traits);
+    return QualType();
+  }
+
+  // Form template argument list for coroutine_traits<R, P1, P2, ...>.
+  TemplateArgumentListInfo Args(Loc, Loc);
+  Args.addArgument(TemplateArgumentLoc(
+      TemplateArgument(FnType->getReturnType()),
+      S.Context.getTrivialTypeSourceInfo(FnType->getReturnType(), Loc)));
+  // FIXME: If the function is a non-static member function, add the type
+  // of the implicit object parameter before the formal parameters.
+  for (QualType T : FnType->getParamTypes())
+    Args.addArgument(TemplateArgumentLoc(
+        TemplateArgument(T), S.Context.getTrivialTypeSourceInfo(T, Loc)));
+
+  // Build the template-id.
+  QualType CoroTrait =
+      S.CheckTemplateIdType(TemplateName(CoroTraits), Loc, Args);
+  if (CoroTrait.isNull())
+    return QualType();
+  if (S.RequireCompleteType(Loc, CoroTrait,
+                            diag::err_coroutine_traits_missing_specialization))
+    return QualType();
+
+  CXXRecordDecl *RD = CoroTrait->getAsCXXRecordDecl();
+  assert(RD && "specialization of class template is not a class?");
+
+  // Look up the ::promise_type member.
+  LookupResult R(S, &S.PP.getIdentifierTable().get("promise_type"), Loc,
+                 Sema::LookupOrdinaryName);
+  S.LookupQualifiedName(R, RD);
+  auto *Promise = R.getAsSingle<TypeDecl>();
+  if (!Promise) {
+    S.Diag(Loc, diag::err_implied_std_coroutine_traits_promise_type_not_found)
+      << RD;
+    return QualType();
+  }
+
+  // The promise type is required to be a class type.
+  QualType PromiseType = S.Context.getTypeDeclType(Promise);
+  if (!PromiseType->getAsCXXRecordDecl()) {
+    // Use the fully-qualified name of the type.
+    auto *NNS = NestedNameSpecifier::Create(S.Context, nullptr, Std);
+    NNS = NestedNameSpecifier::Create(S.Context, NNS, false,
+                                      CoroTrait.getTypePtr());
+    PromiseType = S.Context.getElaboratedType(ETK_None, NNS, PromiseType);
+
+    S.Diag(Loc, diag::err_implied_std_coroutine_traits_promise_type_not_class)
+      << PromiseType;
+    return QualType();
+  }
+
+  return PromiseType;
+}
+
+/// Check that this is a context in which a coroutine suspension can appear.
+static FunctionScopeInfo *
+checkCoroutineContext(Sema &S, SourceLocation Loc, StringRef Keyword) {
+  // 'co_await' and 'co_yield' are not permitted in unevaluated operands.
+  if (S.isUnevaluatedContext()) {
+    S.Diag(Loc, diag::err_coroutine_unevaluated_context) << Keyword;
+    return nullptr;
+  }
+
+  // Any other usage must be within a function.
+  // FIXME: Reject a coroutine with a deduced return type.
+  auto *FD = dyn_cast<FunctionDecl>(S.CurContext);
+  if (!FD) {
+    S.Diag(Loc, isa<ObjCMethodDecl>(S.CurContext)
+                    ? diag::err_coroutine_objc_method
+                    : diag::err_coroutine_outside_function) << Keyword;
+  } else if (isa<CXXConstructorDecl>(FD) || isa<CXXDestructorDecl>(FD)) {
+    // Coroutines TS [special]/6:
+    //   A special member function shall not be a coroutine.
+    //
+    // FIXME: We assume that this really means that a coroutine cannot
+    //        be a constructor or destructor.
+    S.Diag(Loc, diag::err_coroutine_ctor_dtor)
+      << isa<CXXDestructorDecl>(FD) << Keyword;
+  } else if (FD->isConstexpr()) {
+    S.Diag(Loc, diag::err_coroutine_constexpr) << Keyword;
+  } else if (FD->isVariadic()) {
+    S.Diag(Loc, diag::err_coroutine_varargs) << Keyword;
+  } else {
+    auto *ScopeInfo = S.getCurFunction();
+    assert(ScopeInfo && "missing function scope for function");
+
+    // If we don't have a promise variable, build one now.
+    if (!ScopeInfo->CoroutinePromise) {
+      QualType T =
+          FD->getType()->isDependentType()
+              ? S.Context.DependentTy
+              : lookupPromiseType(S, FD->getType()->castAs<FunctionProtoType>(),
+                                  Loc);
+      if (T.isNull())
+        return nullptr;
+
+      // Create and default-initialize the promise.
+      ScopeInfo->CoroutinePromise =
+          VarDecl::Create(S.Context, FD, FD->getLocation(), FD->getLocation(),
+                          &S.PP.getIdentifierTable().get("__promise"), T,
+                          S.Context.getTrivialTypeSourceInfo(T, Loc), SC_None);
+      S.CheckVariableDeclarationType(ScopeInfo->CoroutinePromise);
+      if (!ScopeInfo->CoroutinePromise->isInvalidDecl())
+        S.ActOnUninitializedDecl(ScopeInfo->CoroutinePromise, false);
+    }
+
+    return ScopeInfo;
+  }
+
+  return nullptr;
+}
+
+/// Build a call to 'operator co_await' if there is a suitable operator for
+/// the given expression.
+static ExprResult buildOperatorCoawaitCall(Sema &SemaRef, Scope *S,
+                                           SourceLocation Loc, Expr *E) {
+  UnresolvedSet<16> Functions;
+  SemaRef.LookupOverloadedOperatorName(OO_Coawait, S, E->getType(), QualType(),
+                                       Functions);
+  return SemaRef.CreateOverloadedUnaryOp(Loc, UO_Coawait, Functions, E);
+}
+
+struct ReadySuspendResumeResult {
+  bool IsInvalid;
+  Expr *Results[3];
+};
+
+static ExprResult buildMemberCall(Sema &S, Expr *Base, SourceLocation Loc,
+                                  StringRef Name,
+                                  MutableArrayRef<Expr *> Args) {
+  DeclarationNameInfo NameInfo(&S.PP.getIdentifierTable().get(Name), Loc);
+
+  // FIXME: Fix BuildMemberReferenceExpr to take a const CXXScopeSpec&.
+  CXXScopeSpec SS;
+  ExprResult Result = S.BuildMemberReferenceExpr(
+      Base, Base->getType(), Loc, /*IsPtr=*/false, SS,
+      SourceLocation(), nullptr, NameInfo, /*TemplateArgs=*/nullptr,
+      /*Scope=*/nullptr);
+  if (Result.isInvalid())
+    return ExprError();
+
+  return S.ActOnCallExpr(nullptr, Result.get(), Loc, Args, Loc, nullptr);
+}
+
+/// Build calls to await_ready, await_suspend, and await_resume for a co_await
+/// expression.
+static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, SourceLocation Loc,
+                                                  Expr *E) {
+  // Assume invalid until we see otherwise.
+  ReadySuspendResumeResult Calls = {true, {}};
+
+  const StringRef Funcs[] = {"await_ready", "await_suspend", "await_resume"};
+  for (size_t I = 0, N = llvm::array_lengthof(Funcs); I != N; ++I) {
+    Expr *Operand = new (S.Context) OpaqueValueExpr(
+        Loc, E->getType(), VK_LValue, E->getObjectKind(), E);
+
+    // FIXME: Pass coroutine handle to await_suspend.
+    ExprResult Result = buildMemberCall(S, Operand, Loc, Funcs[I], None);
+    if (Result.isInvalid())
+      return Calls;
+    Calls.Results[I] = Result.get();
+  }
+
+  Calls.IsInvalid = false;
+  return Calls;
+}
+
+ExprResult Sema::ActOnCoawaitExpr(Scope *S, SourceLocation Loc, Expr *E) {
+  if (E->getType()->isPlaceholderType()) {
+    ExprResult R = CheckPlaceholderExpr(E);
+    if (R.isInvalid()) return ExprError();
+    E = R.get();
+  }
+
+  ExprResult Awaitable = buildOperatorCoawaitCall(*this, S, Loc, E);
+  if (Awaitable.isInvalid())
+    return ExprError();
+  return BuildCoawaitExpr(Loc, Awaitable.get());
+}
+ExprResult Sema::BuildCoawaitExpr(SourceLocation Loc, Expr *E) {
+  auto *Coroutine = checkCoroutineContext(*this, Loc, "co_await");
+  if (!Coroutine)
+    return ExprError();
+
+  if (E->getType()->isPlaceholderType()) {
+    ExprResult R = CheckPlaceholderExpr(E);
+    if (R.isInvalid()) return ExprError();
+    E = R.get();
+  }
+
+  if (E->getType()->isDependentType()) {
+    Expr *Res = new (Context) CoawaitExpr(Loc, Context.DependentTy, E);
+    Coroutine->CoroutineStmts.push_back(Res);
+    return Res;
+  }
+
+  // If the expression is a temporary, materialize it as an lvalue so that we
+  // can use it multiple times.
+  if (E->getValueKind() == VK_RValue)
+    E = new (Context) MaterializeTemporaryExpr(E->getType(), E, true);
+
+  // Build the await_ready, await_suspend, await_resume calls.
+  ReadySuspendResumeResult RSS = buildCoawaitCalls(*this, Loc, E);
+  if (RSS.IsInvalid)
+    return ExprError();
+
+  Expr *Res = new (Context) CoawaitExpr(Loc, E, RSS.Results[0], RSS.Results[1],
+                                        RSS.Results[2]);
+  Coroutine->CoroutineStmts.push_back(Res);
+  return Res;
+}
+
+static ExprResult buildPromiseCall(Sema &S, FunctionScopeInfo *Coroutine,
+                                   SourceLocation Loc, StringRef Name,
+                                   MutableArrayRef<Expr *> Args) {
+  assert(Coroutine->CoroutinePromise && "no promise for coroutine");
+
+  // Form a reference to the promise.
+  auto *Promise = Coroutine->CoroutinePromise;
+  ExprResult PromiseRef = S.BuildDeclRefExpr(
+      Promise, Promise->getType().getNonReferenceType(), VK_LValue, Loc);
+  if (PromiseRef.isInvalid())
+    return ExprError();
+
+  // Call 'yield_value', passing in E.
+  return buildMemberCall(S, PromiseRef.get(), Loc, Name, Args);
+}
+
+ExprResult Sema::ActOnCoyieldExpr(Scope *S, SourceLocation Loc, Expr *E) {
+  auto *Coroutine = checkCoroutineContext(*this, Loc, "co_yield");
+  if (!Coroutine)
+    return ExprError();
+
+  // Build yield_value call.
+  ExprResult Awaitable =
+      buildPromiseCall(*this, Coroutine, Loc, "yield_value", E);
+  if (Awaitable.isInvalid())
+    return ExprError();
+
+  // Build 'operator co_await' call.
+  Awaitable = buildOperatorCoawaitCall(*this, S, Loc, Awaitable.get());
+  if (Awaitable.isInvalid())
+    return ExprError();
+
+  return BuildCoyieldExpr(Loc, Awaitable.get());
+}
+ExprResult Sema::BuildCoyieldExpr(SourceLocation Loc, Expr *E) {
+  auto *Coroutine = checkCoroutineContext(*this, Loc, "co_yield");
+  if (!Coroutine)
+    return ExprError();
+
+  if (E->getType()->isPlaceholderType()) {
+    ExprResult R = CheckPlaceholderExpr(E);
+    if (R.isInvalid()) return ExprError();
+    E = R.get();
+  }
+
+  if (E->getType()->isDependentType()) {
+    Expr *Res = new (Context) CoyieldExpr(Loc, Context.DependentTy, E);
+    Coroutine->CoroutineStmts.push_back(Res);
+    return Res;
+  }
+
+  // If the expression is a temporary, materialize it as an lvalue so that we
+  // can use it multiple times.
+  if (E->getValueKind() == VK_RValue)
+    E = new (Context) MaterializeTemporaryExpr(E->getType(), E, true);
+
+  // Build the await_ready, await_suspend, await_resume calls.
+  ReadySuspendResumeResult RSS = buildCoawaitCalls(*this, Loc, E);
+  if (RSS.IsInvalid)
+    return ExprError();
+
+  Expr *Res = new (Context) CoyieldExpr(Loc, E, RSS.Results[0], RSS.Results[1],
+                                        RSS.Results[2]);
+  Coroutine->CoroutineStmts.push_back(Res);
+  return Res;
+}
+
+StmtResult Sema::ActOnCoreturnStmt(SourceLocation Loc, Expr *E) {
+  return BuildCoreturnStmt(Loc, E);
+}
+StmtResult Sema::BuildCoreturnStmt(SourceLocation Loc, Expr *E) {
+  auto *Coroutine = checkCoroutineContext(*this, Loc, "co_return");
+  if (!Coroutine)
+    return StmtError();
+
+  if (E && E->getType()->isPlaceholderType() &&
+      !E->getType()->isSpecificPlaceholderType(BuiltinType::Overload)) {
+    ExprResult R = CheckPlaceholderExpr(E);
+    if (R.isInvalid()) return StmtError();
+    E = R.get();
+  }
+
+  // FIXME: If the operand is a reference to a variable that's about to go out
+  // of scope, we should treat the operand as an xvalue for this overload
+  // resolution.
+  ExprResult PC;
+  if (E && !E->getType()->isVoidType()) {
+    PC = buildPromiseCall(*this, Coroutine, Loc, "return_value", E);
+  } else {
+    E = MakeFullDiscardedValueExpr(E).get();
+    PC = buildPromiseCall(*this, Coroutine, Loc, "return_void", None);
+  }
+  if (PC.isInvalid())
+    return StmtError();
+
+  Expr *PCE = ActOnFinishFullExpr(PC.get()).get();
+
+  Stmt *Res = new (Context) CoreturnStmt(Loc, E, PCE);
+  Coroutine->CoroutineStmts.push_back(Res);
+  return Res;
+}
+
+void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) {
+  FunctionScopeInfo *Fn = getCurFunction();
+  assert(Fn && !Fn->CoroutineStmts.empty() && "not a coroutine");
+
+  // Coroutines [stmt.return]p1:
+  //   A return statement shall not appear in a coroutine.
+  if (Fn->FirstReturnLoc.isValid()) {
+    Diag(Fn->FirstReturnLoc, diag::err_return_in_coroutine);
+    auto *First = Fn->CoroutineStmts[0];
+    Diag(First->getLocStart(), diag::note_declared_coroutine_here)
+      << (isa<CoawaitExpr>(First) ? 0 :
+          isa<CoyieldExpr>(First) ? 1 : 2);
+  }
+
+  bool AnyCoawaits = false;
+  bool AnyCoyields = false;
+  for (auto *CoroutineStmt : Fn->CoroutineStmts) {
+    AnyCoawaits |= isa<CoawaitExpr>(CoroutineStmt);
+    AnyCoyields |= isa<CoyieldExpr>(CoroutineStmt);
+  }
+
+  if (!AnyCoawaits && !AnyCoyields)
+    Diag(Fn->CoroutineStmts.front()->getLocStart(),
+         diag::ext_coroutine_without_co_await_co_yield);
+
+  SourceLocation Loc = FD->getLocation();
+
+  // Form a declaration statement for the promise declaration, so that AST
+  // visitors can more easily find it.
+  StmtResult PromiseStmt =
+      ActOnDeclStmt(ConvertDeclToDeclGroup(Fn->CoroutinePromise), Loc, Loc);
+  if (PromiseStmt.isInvalid())
+    return FD->setInvalidDecl();
+
+  // Form and check implicit 'co_await p.initial_suspend();' statement.
+  ExprResult InitialSuspend =
+      buildPromiseCall(*this, Fn, Loc, "initial_suspend", None);
+  // FIXME: Support operator co_await here.
+  if (!InitialSuspend.isInvalid())
+    InitialSuspend = BuildCoawaitExpr(Loc, InitialSuspend.get());
+  InitialSuspend = ActOnFinishFullExpr(InitialSuspend.get());
+  if (InitialSuspend.isInvalid())
+    return FD->setInvalidDecl();
+
+  // Form and check implicit 'co_await p.final_suspend();' statement.
+  ExprResult FinalSuspend =
+      buildPromiseCall(*this, Fn, Loc, "final_suspend", None);
+  // FIXME: Support operator co_await here.
+  if (!FinalSuspend.isInvalid())
+    FinalSuspend = BuildCoawaitExpr(Loc, FinalSuspend.get());
+  FinalSuspend = ActOnFinishFullExpr(FinalSuspend.get());
+  if (FinalSuspend.isInvalid())
+    return FD->setInvalidDecl();
+
+  // FIXME: Perform analysis of set_exception call.
+
+  // FIXME: Try to form 'p.return_void();' expression statement to handle
+  // control flowing off the end of the coroutine.
+
+  // Build implicit 'p.get_return_object()' expression and form initialization
+  // of return type from it.
+  ExprResult ReturnObject =
+    buildPromiseCall(*this, Fn, Loc, "get_return_object", None);
+  if (ReturnObject.isInvalid())
+    return FD->setInvalidDecl();
+  QualType RetType = FD->getReturnType();
+  if (!RetType->isDependentType()) {
+    InitializedEntity Entity =
+        InitializedEntity::InitializeResult(Loc, RetType, false);
+    ReturnObject = PerformMoveOrCopyInitialization(Entity, nullptr, RetType,
+                                                   ReturnObject.get());
+    if (ReturnObject.isInvalid())
+      return FD->setInvalidDecl();
+  }
+  ReturnObject = ActOnFinishFullExpr(ReturnObject.get(), Loc);
+  if (ReturnObject.isInvalid())
+    return FD->setInvalidDecl();
+
+  // FIXME: Perform move-initialization of parameters into frame-local copies.
+  SmallVector<Expr*, 16> ParamMoves;
+
+  // Build body for the coroutine wrapper statement.
+  Body = new (Context) CoroutineBodyStmt(
+      Body, PromiseStmt.get(), InitialSuspend.get(), FinalSuspend.get(),
+      /*SetException*/nullptr, /*Fallthrough*/nullptr,
+      ReturnObject.get(), ParamMoves);
+}
diff --git a/lib/Sema/SemaDecl.cpp b/lib/Sema/SemaDecl.cpp
index c694a20319b0..2c5516a48d64 100644
--- a/lib/Sema/SemaDecl.cpp
+++ b/lib/Sema/SemaDecl.cpp
@@ -33,7 +33,6 @@
 #include "clang/Lex/Lexer.h" // TODO: Extract static functions to fix layering.
 #include "clang/Lex/ModuleLoader.h" // TODO: Sema shouldn't depend on Lex
 #include "clang/Lex/Preprocessor.h" // Included for isCodeCompletionEnabled()
-#include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/CXXFieldCollector.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/DelayedDiagnostic.h"
@@ -111,6 +110,7 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
   case tok::kw_wchar_t:
   case tok::kw_bool:
   case tok::kw___underlying_type:
+  case tok::kw___auto_type:
     return true;
 
   case tok::annot_typename:
@@ -795,7 +795,7 @@ Corrected:
     }
     
     // In C, we first see whether there is a tag type by the same name, in 
-    // which case it's likely that the user just forget to write "enum", 
+    // which case it's likely that the user just forgot to write "enum", 
     // "struct", or "union".
     if (!getLangOpts().CPlusPlus && !SecondTry &&
         isTagTypeWithMissingTag(*this, Result, S, SS, Name, NameLoc)) {
@@ -813,9 +813,8 @@ Corrected:
         unsigned UnqualifiedDiag = diag::err_undeclared_var_use_suggest;
         unsigned QualifiedDiag = diag::err_no_member_suggest;
 
-        NamedDecl *FirstDecl = Corrected.getCorrectionDecl();
-        NamedDecl *UnderlyingFirstDecl
-          = FirstDecl? FirstDecl->getUnderlyingDecl() : nullptr;
+        NamedDecl *FirstDecl = Corrected.getFoundDecl();
+        NamedDecl *UnderlyingFirstDecl = Corrected.getCorrectionDecl();
         if (getLangOpts().CPlusPlus && NextToken.is(tok::less) &&
             UnderlyingFirstDecl && isa<TemplateDecl>(UnderlyingFirstDecl)) {
           UnqualifiedDiag = diag::err_no_template_suggest;
@@ -1022,7 +1021,7 @@ Corrected:
   
   if (FirstDecl->isCXXClassMember())
     return BuildPossibleImplicitMemberExpr(SS, SourceLocation(), Result,
-                                           nullptr);
+                                           nullptr, S);
 
   bool ADL = UseArgumentDependentLookup(SS, Result, NextToken.is(tok::l_paren));
   return BuildDeclarationNameExpr(SS, Result, ADL);
@@ -1089,7 +1088,9 @@ Sema::SkippedDefinitionContext Sema::ActOnTagStartSkippedDefinition(Scope *S,
   auto Result = static_cast<SkippedDefinitionContext>(CurContext);
   CurContext = cast<TagDecl>(D)->getDefinition();
   assert(CurContext && "skipping definition of undefined tag");
-  S->setEntity(CurContext);
+  // Start lookups from the parent of the current context; we don't want to look
+  // into the pre-existing complete definition.
+  S->setEntity(CurContext->getLookupParent());
   return Result;
 }
 
@@ -1733,20 +1734,18 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID,
   if (Error) {
     if (ForRedeclaration)
       Diag(Loc, diag::warn_implicit_decl_requires_sysheader)
-          << getHeaderName(Error)
-          << Context.BuiltinInfo.GetName(ID);
+          << getHeaderName(Error) << Context.BuiltinInfo.getName(ID);
     return nullptr;
   }
 
   if (!ForRedeclaration && Context.BuiltinInfo.isPredefinedLibFunction(ID)) {
     Diag(Loc, diag::ext_implicit_lib_function_decl)
-      << Context.BuiltinInfo.GetName(ID)
-      << R;
+        << Context.BuiltinInfo.getName(ID) << R;
     if (Context.BuiltinInfo.getHeaderName(ID) &&
         !Diags.isIgnored(diag::ext_implicit_lib_function_decl, Loc))
       Diag(Loc, diag::note_include_header_or_declare)
           << Context.BuiltinInfo.getHeaderName(ID)
-          << Context.BuiltinInfo.GetName(ID);
+          << Context.BuiltinInfo.getName(ID);
   }
 
   DeclContext *Parent = Context.getTranslationUnitDecl();
@@ -1796,37 +1795,6 @@ NamedDecl *Sema::LazilyCreateBuiltin(IdentifierInfo *II, unsigned ID,
   return New;
 }
 
-/// \brief Filter out any previous declarations that the given declaration
-/// should not consider because they are not permitted to conflict, e.g.,
-/// because they come from hidden sub-modules and do not refer to the same
-/// entity.
-static void filterNonConflictingPreviousDecls(Sema &S,
-                                              NamedDecl *decl,
-                                              LookupResult &previous){
-  // This is only interesting when modules are enabled.
-  if ((!S.getLangOpts().Modules && !S.getLangOpts().ModulesLocalVisibility) ||
-      !S.getLangOpts().ModulesHideInternalLinkage)
-    return;
-
-  // Empty sets are uninteresting.
-  if (previous.empty())
-    return;
-
-  LookupResult::Filter filter = previous.makeFilter();
-  while (filter.hasNext()) {
-    NamedDecl *old = filter.next();
-
-    // Non-hidden declarations are never ignored.
-    if (S.isVisible(old))
-      continue;
-
-    if (!old->isExternallyVisible())
-      filter.erase();
-  }
-
-  filter.done();
-}
-
 /// Typedef declarations don't have linkage, but they still denote the same
 /// entity if their types are the same.
 /// FIXME: This is notionally doing the same thing as ASTReaderDecl's
@@ -1859,13 +1827,13 @@ static void filterNonConflictingPreviousTypedefDecls(Sema &S,
 
       // If both declarations give a tag declaration a typedef name for linkage
       // purposes, then they declare the same entity.
-      if (OldTD->getAnonDeclWithTypedefName(/*AnyRedecl*/true) &&
+      if (S.getLangOpts().CPlusPlus &&
+          OldTD->getAnonDeclWithTypedefName(/*AnyRedecl*/true) &&
           Decl->getAnonDeclWithTypedefName())
         continue;
     }
 
-    if (!Old->isExternallyVisible())
-      Filter.erase();
+    Filter.erase();
   }
 
   Filter.done();
@@ -1910,7 +1878,8 @@ bool Sema::isIncompatibleTypedef(TypeDecl *Old, TypedefNameDecl *New) {
 /// how to resolve this situation, merging decls or emitting
 /// diagnostics as appropriate. If there was an error, set New to be invalid.
 ///
-void Sema::MergeTypedefNameDecl(TypedefNameDecl *New, LookupResult &OldDecls) {
+void Sema::MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New,
+                                LookupResult &OldDecls) {
   // If the new decl is known invalid already, don't bother doing any
   // merging checks.
   if (New->isInvalidDecl()) return;
@@ -1991,6 +1960,19 @@ void Sema::MergeTypedefNameDecl(TypedefNameDecl *New, LookupResult &OldDecls) {
 
       // Make the old tag definition visible.
       makeMergedDefinitionVisible(Hidden, NewTag->getLocation());
+
+      // If this was an unscoped enumeration, yank all of its enumerators
+      // out of the scope.
+      if (isa<EnumDecl>(NewTag)) {
+        Scope *EnumScope = getNonFieldDeclScope(S);
+        for (auto *D : NewTag->decls()) {
+          auto *ED = cast<EnumConstantDecl>(D);
+          assert(EnumScope->isDeclScope(ED));
+          EnumScope->RemoveDecl(ED);
+          IdResolver.RemoveDecl(ED);
+          ED->getLexicalDeclContext()->removeDecl(ED);
+        }
+      }
     }
   }
 
@@ -2206,14 +2188,15 @@ static bool mergeAlignedAttrs(Sema &S, NamedDecl *New, Decl *Old) {
 }
 
 static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
-                               const InheritableAttr *Attr, bool Override) {
+                               const InheritableAttr *Attr,
+                               Sema::AvailabilityMergeKind AMK) {
   InheritableAttr *NewAttr = nullptr;
   unsigned AttrSpellingListIndex = Attr->getSpellingListIndex();
   if (const auto *AA = dyn_cast<AvailabilityAttr>(Attr))
     NewAttr = S.mergeAvailabilityAttr(D, AA->getRange(), AA->getPlatform(),
                                       AA->getIntroduced(), AA->getDeprecated(),
                                       AA->getObsoleted(), AA->getUnavailable(),
-                                      AA->getMessage(), Override,
+                                      AA->getMessage(), AMK,
                                       AttrSpellingListIndex);
   else if (const auto *VA = dyn_cast<VisibilityAttr>(Attr))
     NewAttr = S.mergeVisibilityAttr(D, VA->getRange(), VA->getVisibility(),
@@ -2246,11 +2229,22 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D,
     NewAttr = S.mergeMinSizeAttr(D, MA->getRange(), AttrSpellingListIndex);
   else if (const auto *OA = dyn_cast<OptimizeNoneAttr>(Attr))
     NewAttr = S.mergeOptimizeNoneAttr(D, OA->getRange(), AttrSpellingListIndex);
+  else if (const auto *InternalLinkageA = dyn_cast<InternalLinkageAttr>(Attr))
+    NewAttr = S.mergeInternalLinkageAttr(
+        D, InternalLinkageA->getRange(),
+        &S.Context.Idents.get(InternalLinkageA->getSpelling()),
+        AttrSpellingListIndex);
+  else if (const auto *CommonA = dyn_cast<CommonAttr>(Attr))
+    NewAttr = S.mergeCommonAttr(D, CommonA->getRange(),
+                                &S.Context.Idents.get(CommonA->getSpelling()),
+                                AttrSpellingListIndex);
   else if (isa<AlignedAttr>(Attr))
     // AlignedAttrs are handled separately, because we need to handle all
     // such attributes on a declaration at the same time.
     NewAttr = nullptr;
-  else if (isa<DeprecatedAttr>(Attr) && Override)
+  else if ((isa<DeprecatedAttr>(Attr) || isa<UnavailableAttr>(Attr)) &&
+           (AMK == Sema::AMK_Override ||
+            AMK == Sema::AMK_ProtocolImplementation))
     NewAttr = nullptr;
   else if (Attr->duplicatesAllowed() || !DeclHasAttr(D, Attr))
     NewAttr = cast<InheritableAttr>(Attr->clone(S.Context));
@@ -2303,9 +2297,17 @@ static void checkNewAttributesAfterDef(Sema &S, Decl *New, const Decl *Old) {
     const Attr *NewAttribute = NewAttributes[I];
 
     if (isa<AliasAttr>(NewAttribute)) {
-      if (FunctionDecl *FD = dyn_cast<FunctionDecl>(New))
-        S.CheckForFunctionRedefinition(FD, cast<FunctionDecl>(Def));
-      else {
+      if (FunctionDecl *FD = dyn_cast<FunctionDecl>(New)) {
+        Sema::SkipBodyInfo SkipBody;
+        S.CheckForFunctionRedefinition(FD, cast<FunctionDecl>(Def), &SkipBody);
+
+        // If we're skipping this definition, drop the "alias" attribute.
+        if (SkipBody.ShouldSkip) {
+          NewAttributes.erase(NewAttributes.begin() + I);
+          --E;
+          continue;
+        }
+      } else {
         VarDecl *VD = cast<VarDecl>(New);
         unsigned Diag = cast<VarDecl>(Def)->isThisDeclarationADefinition() ==
                                 VarDecl::TentativeDefinition
@@ -2376,9 +2378,24 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
   if (!Old->hasAttrs() && !New->hasAttrs())
     return;
 
-  // attributes declared post-definition are currently ignored
+  // Attributes declared post-definition are currently ignored.
   checkNewAttributesAfterDef(*this, New, Old);
 
+  if (AsmLabelAttr *NewA = New->getAttr<AsmLabelAttr>()) {
+    if (AsmLabelAttr *OldA = Old->getAttr<AsmLabelAttr>()) {
+      if (OldA->getLabel() != NewA->getLabel()) {
+        // This redeclaration changes __asm__ label.
+        Diag(New->getLocation(), diag::err_different_asm_label);
+        Diag(OldA->getLocation(), diag::note_previous_declaration);
+      }
+    } else if (Old->isUsed()) {
+      // This redeclaration adds an __asm__ label to a declaration that has
+      // already been ODR-used.
+      Diag(New->getLocation(), diag::err_late_asm_label_name)
+        << isa<FunctionDecl>(Old) << New->getAttr<AsmLabelAttr>()->getRange();
+    }
+  }
+
   if (!Old->hasAttrs())
     return;
 
@@ -2389,8 +2406,8 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
   if (!foundAny) New->setAttrs(AttrVec());
 
   for (auto *I : Old->specific_attrs<InheritableAttr>()) {
-    bool Override = false;
     // Ignore deprecated/unavailable/availability attributes if requested.
+    AvailabilityMergeKind LocalAMK = AMK_None;
     if (isa<DeprecatedAttr>(I) ||
         isa<UnavailableAttr>(I) ||
         isa<AvailabilityAttr>(I)) {
@@ -2399,10 +2416,9 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
         continue;
 
       case AMK_Redeclaration:
-        break;
-
       case AMK_Override:
-        Override = true;
+      case AMK_ProtocolImplementation:
+        LocalAMK = AMK;
         break;
       }
     }
@@ -2411,7 +2427,7 @@ void Sema::mergeDeclAttributes(NamedDecl *New, Decl *Old,
     if (isa<UsedAttr>(I))
       continue;
 
-    if (mergeDeclAttribute(*this, New, I, Override))
+    if (mergeDeclAttribute(*this, New, I, LocalAMK))
       foundAny = true;
   }
 
@@ -2619,6 +2635,21 @@ static bool checkUsingShadowRedecl(Sema &S, UsingShadowDecl *OldS,
   return false;
 }
 
+static bool hasIdenticalPassObjectSizeAttrs(const FunctionDecl *A,
+                                            const FunctionDecl *B) {
+  assert(A->getNumParams() == B->getNumParams());
+
+  auto AttrEq = [](const ParmVarDecl *A, const ParmVarDecl *B) {
+    const auto *AttrA = A->getAttr<PassObjectSizeAttr>();
+    const auto *AttrB = B->getAttr<PassObjectSizeAttr>();
+    if (AttrA == AttrB)
+      return true;
+    return AttrA && AttrB && AttrA->getType() == AttrB->getType();
+  };
+
+  return std::equal(A->param_begin(), A->param_end(), B->param_begin(), AttrEq);
+}
+
 /// MergeFunctionDecl - We just parsed a function 'New' from
 /// declarator D which has the same name and scope as a previous
 /// declaration 'Old'.  Figure out how to resolve this situation,
@@ -2685,6 +2716,13 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
     }
   }
 
+  if (New->hasAttr<InternalLinkageAttr>() &&
+      !Old->hasAttr<InternalLinkageAttr>()) {
+    Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
+        << New->getDeclName();
+    Diag(Old->getLocation(), diag::note_previous_definition);
+    New->dropAttr<InternalLinkageAttr>();
+  }
 
   // If a function is first declared with a calling convention, but is later
   // declared or defined without one, all following decls assume the calling
@@ -2790,7 +2828,17 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       Old->isInlined() && !Old->hasAttr<GNUInlineAttr>()) {
     UndefinedButUsed.erase(Old->getCanonicalDecl());
   }
-  
+
+  // If pass_object_size params don't match up perfectly, this isn't a valid
+  // redeclaration.
+  if (Old->getNumParams() > 0 && Old->getNumParams() == New->getNumParams() &&
+      !hasIdenticalPassObjectSizeAttrs(Old, New)) {
+    Diag(New->getLocation(), diag::err_different_pass_object_size_params)
+        << New->getDeclName();
+    Diag(OldLocation, PrevDiag) << Old << Old->getType();
+    return true;
+  }
+
   if (getLangOpts().CPlusPlus) {
     // (C++98 13.1p2):
     //   Certain function declarations cannot be overloaded:
@@ -3115,7 +3163,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
       // remain visible, a single bogus local redeclaration (which is
       // actually only a warning) could break all the downstream code.
       if (!New->getLexicalDeclContext()->isFunctionOrMethod())
-        New->getIdentifier()->setBuiltinID(Builtin::NotBuiltin);
+        New->getIdentifier()->revertBuiltin();
 
       return false;
     }
@@ -3179,8 +3227,11 @@ void Sema::mergeObjCMethodDecls(ObjCMethodDecl *newMethod,
 
   // Merge the attributes, including deprecated/unavailable
   AvailabilityMergeKind MergeKind =
-    isa<ObjCImplDecl>(newMethod->getDeclContext()) ? AMK_Redeclaration
-                                                   : AMK_Override;
+    isa<ObjCProtocolDecl>(oldMethod->getDeclContext())
+      ? AMK_ProtocolImplementation
+      : isa<ObjCImplDecl>(newMethod->getDeclContext()) ? AMK_Redeclaration
+                                                       : AMK_Override;
+
   mergeDeclAttributes(newMethod, oldMethod, MergeKind);
 
   // Merge attributes from the parameters.
@@ -3331,6 +3382,9 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
   if (New->isInvalidDecl())
     return;
 
+  if (!shouldLinkPossiblyHiddenDecl(Previous, New))
+    return;
+
   VarTemplateDecl *NewTemplate = New->getDescribedVarTemplate();
 
   // Verify the old decl was also a variable or variable template.
@@ -3362,15 +3416,12 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     return New->setInvalidDecl();
   }
 
-  if (!shouldLinkPossiblyHiddenDecl(Old, New))
-    return;
-
   // Ensure the template parameters are compatible.
   if (NewTemplate &&
       !TemplateParameterListsAreEqual(NewTemplate->getTemplateParameters(),
                                       OldTemplate->getTemplateParameters(),
                                       /*Complain=*/true, TPL_TemplateMatch))
-    return;
+    return New->setInvalidDecl();
 
   // C++ [class.mem]p1:
   //   A member shall not be declared twice in the member-specification [...]
@@ -3395,6 +3446,14 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     New->dropAttr<WeakImportAttr>();
   }
 
+  if (New->hasAttr<InternalLinkageAttr>() &&
+      !Old->hasAttr<InternalLinkageAttr>()) {
+    Diag(New->getLocation(), diag::err_internal_linkage_redeclaration)
+        << New->getDeclName();
+    Diag(Old->getLocation(), diag::note_previous_definition);
+    New->dropAttr<InternalLinkageAttr>();
+  }
+
   // Merge the types.
   VarDecl *MostRecent = Old->getMostRecentDecl();
   if (MostRecent != Old) {
@@ -3583,11 +3642,11 @@ void Sema::handleTagNumbering(const TagDecl *Tag, Scope *TagScope) {
 
 void Sema::setTagNameForLinkagePurposes(TagDecl *TagFromDeclSpec,
                                         TypedefNameDecl *NewTD) {
-  // Do nothing if the tag is not anonymous or already has an
-  // associated typedef (from an earlier typedef in this decl group).
-  if (TagFromDeclSpec->getIdentifier())
+  if (TagFromDeclSpec->isInvalidDecl())
     return;
-  if (TagFromDeclSpec->getTypedefNameForAnonDecl())
+
+  // Do nothing if the tag already has a name for linkage purposes.
+  if (TagFromDeclSpec->hasNameForLinkage())
     return;
 
   // A well-formed anonymous tag must always be a TUK_Definition.
@@ -3595,8 +3654,11 @@ void Sema::setTagNameForLinkagePurposes(TagDecl *TagFromDeclSpec,
 
   // The type must match the tag exactly;  no qualifiers allowed.
   if (!Context.hasSameType(NewTD->getUnderlyingType(),
-                           Context.getTagDeclType(TagFromDeclSpec)))
+                           Context.getTagDeclType(TagFromDeclSpec))) {
+    if (getLangOpts().CPlusPlus)
+      Context.addTypedefNameForUnnamedTagDecl(TagFromDeclSpec, NewTD);
     return;
+  }
 
   // If we've already computed linkage for the anonymous tag, then
   // adding a typedef name for the anonymous decl can change that
@@ -3695,6 +3757,14 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
     return TagD;
   }
 
+  if (DS.isConceptSpecified()) {
+    // C++ Concepts TS [dcl.spec.concept]p1: A concept definition refers to
+    // either a function concept and its definition or a variable concept and
+    // its initializer.
+    Diag(DS.getConceptSpecLoc(), diag::err_concept_wrong_decl_kind);
+    return TagD;
+  }
+
   DiagnoseFunctionSpecifiers(DS);
 
   if (DS.isFriendSpecified()) {
@@ -3709,10 +3779,15 @@ Decl *Sema::ParsedFreeStandingDeclSpec(Scope *S, AccessSpecifier AS,
   bool IsExplicitSpecialization =
     !TemplateParams.empty() && TemplateParams.back()->size() == 0;
   if (Tag && SS.isNotEmpty() && !Tag->isCompleteDefinition() &&
-      !IsExplicitInstantiation && !IsExplicitSpecialization) {
+      !IsExplicitInstantiation && !IsExplicitSpecialization &&
+      !isa<ClassTemplatePartialSpecializationDecl>(Tag)) {
     // Per C++ [dcl.type.elab]p1, a class declaration cannot have a
     // nested-name-specifier unless it is an explicit instantiation
     // or an explicit specialization.
+    //
+    // FIXME: We allow class template partial specializations here too, per the
+    // obvious intent of DR1819.
+    //
     // Per C++ [dcl.enum]p1, an opaque-enum-declaration can't either.
     Diag(SS.getBeginLoc(), diag::err_standalone_class_nested_name_specifier)
         << GetDiagnosticTypeSpecifierID(DS.getTypeSpecType()) << SS.getRange();
@@ -3882,7 +3957,7 @@ static bool CheckAnonMemberRedeclaration(Sema &SemaRef,
                                          DeclContext *Owner,
                                          DeclarationName Name,
                                          SourceLocation NameLoc,
-                                         unsigned diagnostic) {
+                                         bool IsUnion) {
   LookupResult R(SemaRef, Name, NameLoc, Sema::LookupMemberName,
                  Sema::ForRedeclaration);
   if (!SemaRef.LookupName(R, S)) return false;
@@ -3897,7 +3972,8 @@ static bool CheckAnonMemberRedeclaration(Sema &SemaRef,
   if (!SemaRef.isDeclInScope(PrevDecl, Owner, S))
     return false;
 
-  SemaRef.Diag(NameLoc, diagnostic) << Name;
+  SemaRef.Diag(NameLoc, diag::err_anonymous_record_member_redecl)
+    << IsUnion << Name;
   SemaRef.Diag(PrevDecl->getLocation(), diag::note_previous_declaration);
 
   return true;
@@ -3925,10 +4001,6 @@ static bool InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope *S,
                                          AccessSpecifier AS,
                                          SmallVectorImpl<NamedDecl *> &Chaining,
                                          bool MSAnonStruct) {
-  unsigned diagKind
-    = AnonRecord->isUnion() ? diag::err_anonymous_union_member_redecl
-                            : diag::err_anonymous_struct_member_redecl;
-
   bool Invalid = false;
 
   // Look every FieldDecl and IndirectFieldDecl with a name.
@@ -3937,7 +4009,8 @@ static bool InjectAnonymousStructOrUnionMembers(Sema &SemaRef, Scope *S,
         cast<NamedDecl>(D)->getDeclName()) {
       ValueDecl *VD = cast<ValueDecl>(D);
       if (CheckAnonMemberRedeclaration(SemaRef, S, Owner, VD->getDeclName(),
-                                       VD->getLocation(), diagKind)) {
+                                       VD->getLocation(),
+                                       AnonRecord->isUnion())) {
         // C++ [class.union]p2:
         //   The names of the members of an anonymous union shall be
         //   distinct from the names of any other entity in the
@@ -4131,7 +4204,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
         assert(FD->getAccess() != AS_none);
         if (FD->getAccess() != AS_public) {
           Diag(FD->getLocation(), diag::err_anonymous_record_nonpublic_member)
-            << (int)Record->isUnion() << (int)(FD->getAccess() == AS_protected);
+            << Record->isUnion() << (FD->getAccess() == AS_protected);
           Invalid = true;
         }
 
@@ -4155,11 +4228,11 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
           // Visual C++ allows type definition in anonymous struct or union.
           if (getLangOpts().MicrosoftExt)
             Diag(MemRecord->getLocation(), diag::ext_anonymous_record_with_type)
-              << (int)Record->isUnion();
+              << Record->isUnion();
           else {
             // This is a nested type declaration.
             Diag(MemRecord->getLocation(), diag::err_anonymous_record_with_type)
-              << (int)Record->isUnion();
+              << Record->isUnion();
             Invalid = true;
           }
         } else {
@@ -4168,7 +4241,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
           // not part of standard C++.
           Diag(MemRecord->getLocation(),
                diag::ext_anonymous_record_with_anonymous_type)
-            << (int)Record->isUnion();
+            << Record->isUnion();
         }
       } else if (isa<AccessSpecDecl>(Mem)) {
         // Any access specifier is fine.
@@ -4189,10 +4262,9 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
         if (getLangOpts().MicrosoftExt &&
             DK == diag::err_anonymous_record_with_type)
           Diag(Mem->getLocation(), diag::ext_anonymous_record_with_type)
-            << (int)Record->isUnion();
+            << Record->isUnion();
         else {
-          Diag(Mem->getLocation(), DK)
-              << (int)Record->isUnion();
+          Diag(Mem->getLocation(), DK) << Record->isUnion();
           Invalid = true;
         }
       }
@@ -4209,7 +4281,7 @@ Decl *Sema::BuildAnonymousStructOrUnion(Scope *S, DeclSpec &DS,
 
   if (!Record->isUnion() && !Owner->isRecord()) {
     Diag(Record->getLocation(), diag::err_anonymous_struct_not_member)
-      << (int)getLangOpts().CPlusPlus;
+      << getLangOpts().CPlusPlus;
     Invalid = true;
   }
 
@@ -4885,6 +4957,23 @@ NamedDecl *Sema::HandleDeclarator(Scope *S, Declarator &D,
   if (getLangOpts().CPlusPlus)
     CheckExtraCXXDefaultArguments(D);
 
+  if (D.getDeclSpec().isConceptSpecified()) {
+    // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
+    // applied only to the definition of a function template or variable
+    // template, declared in namespace scope
+    if (!TemplateParamLists.size()) {
+      Diag(D.getDeclSpec().getConceptSpecLoc(),
+           diag:: err_concept_wrong_decl_kind);
+      return nullptr;
+    }
+
+    if (!DC->getRedeclContext()->isFileContext()) {
+      Diag(D.getIdentifierLoc(),
+           diag::err_concept_decls_may_only_appear_in_namespace_scope);
+      return nullptr;
+    }
+  }
+
   NamedDecl *New;
 
   bool AddToScope = true;
@@ -5102,6 +5191,9 @@ Sema::ActOnTypedefDeclarator(Scope* S, Declarator& D, DeclContext* DC,
   if (D.getDeclSpec().isConstexprSpecified())
     Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 1;
+  if (D.getDeclSpec().isConceptSpecified())
+    Diag(D.getDeclSpec().getConceptSpecLoc(),
+         diag::err_concept_wrong_decl_kind);
 
   if (D.getName().Kind != UnqualifiedId::IK_Identifier) {
     Diag(D.getName().StartLocation, diag::err_typedef_not_identifier)
@@ -5174,7 +5266,7 @@ Sema::ActOnTypedefNameDecl(Scope *S, DeclContext *DC, TypedefNameDecl *NewTD,
   filterNonConflictingPreviousTypedefDecls(*this, NewTD, Previous);
   if (!Previous.empty()) {
     Redeclaration = true;
-    MergeTypedefNameDecl(NewTD, Previous);
+    MergeTypedefNameDecl(S, NewTD, Previous);
   }
 
   // If this is the C FILE type, notify the AST context.
@@ -5343,14 +5435,26 @@ static void checkAttributesAfterMerging(Sema &S, NamedDecl &ND) {
     }
   }
 
-  // dll attributes require external linkage.
   if (const InheritableAttr *Attr = getDLLAttr(&ND)) {
-    if (!ND.isExternallyVisible()) {
+    // dll attributes require external linkage. Static locals may have external
+    // linkage but still cannot be explicitly imported or exported.
+    auto *VD = dyn_cast<VarDecl>(&ND);
+    if (!ND.isExternallyVisible() || (VD && VD->isStaticLocal())) {
       S.Diag(ND.getLocation(), diag::err_attribute_dll_not_extern)
         << &ND << Attr;
       ND.setInvalidDecl();
     }
   }
+
+  // Virtual functions cannot be marked as 'notail'.
+  if (auto *Attr = ND.getAttr<NotTailCalledAttr>())
+    if (auto *MD = dyn_cast<CXXMethodDecl>(&ND))
+      if (MD->isVirtual()) {
+        S.Diag(ND.getLocation(),
+               diag::err_invalid_attribute_on_virtual_function)
+            << Attr;
+        ND.dropAttr<NotTailCalledAttr>();
+      }
 }
 
 static void checkDLLAttributeRedeclaration(Sema &S, NamedDecl *OldDecl,
@@ -5501,6 +5605,12 @@ static bool isIncompleteDeclExternC(Sema &S, const T *D) {
     // In C++, the overloadable attribute negates the effects of extern "C".
     if (!D->isInExternCContext() || D->template hasAttr<OverloadableAttr>())
       return false;
+
+    // So do CUDA's host/device attributes if overloading is enabled.
+    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
+        (D->template hasAttr<CUDADeviceAttr>() ||
+         D->template hasAttr<CUDAHostAttr>()))
+      return false;
   }
   return D->isExternC();
 }
@@ -5574,15 +5684,12 @@ bool Sema::adjustContextForLocalExternDecl(DeclContext *&DC) {
   return true;
 }
 
-/// \brief Returns true if given declaration is TU-scoped and externally
-/// visible.
-static bool isDeclTUScopedExternallyVisible(const Decl *D) {
-  if (auto *FD = dyn_cast<FunctionDecl>(D))
-    return (FD->getDeclContext()->isTranslationUnit() || FD->isExternC()) &&
-           FD->hasExternalFormalLinkage();
-  else if (auto *VD = dyn_cast<VarDecl>(D))
-    return (VD->getDeclContext()->isTranslationUnit() || VD->isExternC()) &&
-           VD->hasExternalFormalLinkage();
+/// \brief Returns true if given declaration has external C language linkage.
+static bool isDeclExternC(const Decl *D) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D))
+    return FD->isExternC();
+  if (const auto *VD = dyn_cast<VarDecl>(D))
+    return VD->isExternC();
 
   llvm_unreachable("Unknown type of decl!");
 }
@@ -5646,7 +5753,8 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     // Suppress the warning in system macros, it's used in macros in some
     // popular C system headers, such as in glibc's htonl() macro.
     Diag(D.getDeclSpec().getStorageClassSpecLoc(),
-         diag::warn_deprecated_register)
+         getLangOpts().CPlusPlus1z ? diag::ext_register_storage_class
+                                   : diag::warn_deprecated_register)
       << FixItHint::CreateRemoval(D.getDeclSpec().getStorageClassSpecLoc());
   }
 
@@ -5670,12 +5778,6 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
   }
 
   if (getLangOpts().OpenCL) {
-    // Set up the special work-group-local storage class for variables in the
-    // OpenCL __local address space.
-    if (R.getAddressSpace() == LangAS::opencl_local) {
-      SC = SC_OpenCLWorkGroupLocal;
-    }
-
     // OpenCL v1.2 s6.9.b p4:
     // The sampler type cannot be used with the __local and __global address
     // space qualifiers.
@@ -5712,7 +5814,10 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     NewVD = VarDecl::Create(Context, DC, D.getLocStart(),
                             D.getIdentifierLoc(), II,
                             R, TInfo, SC);
-  
+
+    if (D.getDeclSpec().containsPlaceholderType() && R->getContainedAutoType())
+      ParsingInitForAutoVars.insert(NewVD);
+
     if (D.isInvalidType())
       NewVD->setInvalidDecl();
   } else {
@@ -5742,8 +5847,6 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
         break;
       case SC_PrivateExtern:
         llvm_unreachable("C storage class in c++!");
-      case SC_OpenCLWorkGroupLocal:
-        llvm_unreachable("OpenCL storage class in c++!");
       }
     }    
 
@@ -5860,11 +5963,31 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     unsigned VDTemplateParamLists = TemplateParams ? 1 : 0;
     if (TemplateParamLists.size() > VDTemplateParamLists)
       NewVD->setTemplateParameterListsInfo(
-          Context, TemplateParamLists.size() - VDTemplateParamLists,
-          TemplateParamLists.data());
+          Context, TemplateParamLists.drop_back(VDTemplateParamLists));
 
     if (D.getDeclSpec().isConstexprSpecified())
       NewVD->setConstexpr(true);
+
+    if (D.getDeclSpec().isConceptSpecified()) {
+      NewVD->setConcept(true);
+
+      // C++ Concepts TS [dcl.spec.concept]p2: A concept definition shall not
+      // be declared with the thread_local, inline, friend, or constexpr
+      // specifiers, [...]
+      if (D.getDeclSpec().getThreadStorageClassSpec() == TSCS_thread_local) {
+        Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(),
+             diag::err_concept_decl_invalid_specifiers)
+            << 0 << 0;
+        NewVD->setInvalidDecl(true);
+      }
+
+      if (D.getDeclSpec().isConstexprSpecified()) {
+        Diag(D.getDeclSpec().getConstexprSpecLoc(),
+             diag::err_concept_decl_invalid_specifiers)
+            << 0 << 3;
+        NewVD->setInvalidDecl(true);
+      }
+    }
   }
 
   // Set the lexical context. If the declarator has a C++ scope specifier, the
@@ -5990,19 +6113,31 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
         break;
       case SC_Register:
         // Local Named register
-        if (!Context.getTargetInfo().isValidGCCRegisterName(Label))
+        if (!Context.getTargetInfo().isValidGCCRegisterName(Label) &&
+            DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl()))
           Diag(E->getExprLoc(), diag::err_asm_unknown_register_name) << Label;
         break;
       case SC_Static:
       case SC_Extern:
       case SC_PrivateExtern:
-      case SC_OpenCLWorkGroupLocal:
         break;
       }
     } else if (SC == SC_Register) {
       // Global Named register
-      if (!Context.getTargetInfo().isValidGCCRegisterName(Label))
-        Diag(E->getExprLoc(), diag::err_asm_unknown_register_name) << Label;
+      if (DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) {
+        const auto &TI = Context.getTargetInfo();
+        bool HasSizeMismatch;
+
+        if (!TI.isValidGCCRegisterName(Label))
+          Diag(E->getExprLoc(), diag::err_asm_unknown_register_name) << Label;
+        else if (!TI.validateGlobalRegisterVariable(Label,
+                                                    Context.getTypeSize(R),
+                                                    HasSizeMismatch))
+          Diag(E->getExprLoc(), diag::err_asm_invalid_global_var_reg) << Label;
+        else if (HasSizeMismatch)
+          Diag(E->getExprLoc(), diag::err_asm_register_size_mismatch) << Label;
+      }
+
       if (!R->isIntegralType(Context) && !R->isPointerType()) {
         Diag(D.getLocStart(), diag::err_asm_bad_register_type);
         NewVD->setInvalidDecl(true);
@@ -6011,13 +6146,16 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
 
     NewVD->addAttr(::new (Context) AsmLabelAttr(SE->getStrTokenLoc(0),
                                                 Context, Label, 0));
-  } else if (!ExtnameUndeclaredIdentifiers.empty() &&
-             isDeclTUScopedExternallyVisible(NewVD)) {
+  } else if (!ExtnameUndeclaredIdentifiers.empty()) {
     llvm::DenseMap<IdentifierInfo*,AsmLabelAttr*>::iterator I =
       ExtnameUndeclaredIdentifiers.find(NewVD->getIdentifier());
     if (I != ExtnameUndeclaredIdentifiers.end()) {
-      NewVD->addAttr(I->second);
-      ExtnameUndeclaredIdentifiers.erase(I);
+      if (isDeclExternC(NewVD)) {
+        NewVD->addAttr(I->second);
+        ExtnameUndeclaredIdentifiers.erase(I);
+      } else
+        Diag(NewVD->getLocation(), diag::warn_redefine_extname_not_applied)
+            << /*Variable*/1 << NewVD;
     }
   }
 
@@ -6118,6 +6256,22 @@ Sema::ActOnVariableDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     }
   }
 
+  // Special handling of variable named 'main'.
+  if (Name.isIdentifier() && Name.getAsIdentifierInfo()->isStr("main") &&
+      NewVD->getDeclContext()->getRedeclContext()->isTranslationUnit() &&
+      !getLangOpts().Freestanding && !NewVD->getDescribedVarTemplate()) {
+
+    // C++ [basic.start.main]p3
+    // A program that declares a variable main at global scope is ill-formed.
+    if (getLangOpts().CPlusPlus)
+      Diag(D.getLocStart(), diag::err_main_global_variable);
+
+    // In C, and external-linkage variable named main results in undefined
+    // behavior.
+    else if (NewVD->hasExternalFormalLinkage())
+      Diag(D.getLocStart(), diag::warn_main_redefined);
+  }
+
   if (D.isRedeclaration() && !Previous.empty()) {
     checkDLLAttributeRedeclaration(
         *this, dyn_cast<NamedDecl>(Previous.getRepresentativeDecl()), NewVD,
@@ -6370,29 +6524,77 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
   // This includes arrays of objects with address space qualifiers, but not
   // automatic variables that point to other address spaces.
   // ISO/IEC TR 18037 S5.1.2
-  if (NewVD->hasLocalStorage() && T.getAddressSpace() != 0) {
+  if (!getLangOpts().OpenCL
+      && NewVD->hasLocalStorage() && T.getAddressSpace() != 0) {
     Diag(NewVD->getLocation(), diag::err_as_qualified_auto_decl);
     NewVD->setInvalidDecl();
     return;
   }
 
+  // OpenCL v1.2 s6.8 -- The static qualifier is valid only in program
+  // scope.
+  if (getLangOpts().OpenCLVersion == 120 &&
+      !getOpenCLOptions().cl_clang_storage_class_specifiers &&
+      NewVD->isStaticLocal()) {
+    Diag(NewVD->getLocation(), diag::err_static_function_scope);
+    NewVD->setInvalidDecl();
+    return;
+  }
+
   // OpenCL v1.2 s6.5 - All program scope variables must be declared in the
   // __constant address space.
-  if (getLangOpts().OpenCL && NewVD->isFileVarDecl()
-      && T.getAddressSpace() != LangAS::opencl_constant
-      && !T->isSamplerT()){
-    Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space);
-    NewVD->setInvalidDecl();
-    return;
-  }
-  
-  // OpenCL v1.2 s6.8 -- The static qualifier is valid only in program
-  // scope.
-  if ((getLangOpts().OpenCLVersion >= 120)
-      && NewVD->isStaticLocal()) {
-    Diag(NewVD->getLocation(), diag::err_static_function_scope);
-    NewVD->setInvalidDecl();
-    return;
+  // OpenCL v2.0 s6.5.1 - Variables defined at program scope and static
+  // variables inside a function can also be declared in the global
+  // address space.
+  if (getLangOpts().OpenCL) {
+    if (NewVD->isFileVarDecl()) {
+      if (!T->isSamplerT() &&
+          !(T.getAddressSpace() == LangAS::opencl_constant ||
+            (T.getAddressSpace() == LangAS::opencl_global &&
+             getLangOpts().OpenCLVersion == 200))) {
+        if (getLangOpts().OpenCLVersion == 200)
+          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
+              << "global or constant";
+        else
+          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
+              << "constant";
+        NewVD->setInvalidDecl();
+        return;
+      }
+    } else {
+      // OpenCL v2.0 s6.5.1 - Variables defined at program scope and static
+      // variables inside a function can also be declared in the global
+      // address space.
+      if (NewVD->isStaticLocal() &&
+          !(T.getAddressSpace() == LangAS::opencl_constant ||
+            (T.getAddressSpace() == LangAS::opencl_global &&
+             getLangOpts().OpenCLVersion == 200))) {
+        if (getLangOpts().OpenCLVersion == 200)
+          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
+              << "global or constant";
+        else
+          Diag(NewVD->getLocation(), diag::err_opencl_global_invalid_addr_space)
+              << "constant";
+        NewVD->setInvalidDecl();
+        return;
+      }
+      // OpenCL v1.1 s6.5.2 and s6.5.3 no local or constant variables
+      // in functions.
+      if (T.getAddressSpace() == LangAS::opencl_constant ||
+          T.getAddressSpace() == LangAS::opencl_local) {
+        FunctionDecl *FD = getCurFunctionDecl();
+        if (FD && !FD->hasAttr<OpenCLKernelAttr>()) {
+          if (T.getAddressSpace() == LangAS::opencl_constant)
+            Diag(NewVD->getLocation(), diag::err_opencl_non_kernel_variable)
+                << "constant";
+          else
+            Diag(NewVD->getLocation(), diag::err_opencl_non_kernel_variable)
+                << "local";
+          NewVD->setInvalidDecl();
+          return;
+        }
+      }
+    }
   }
 
   if (NewVD->hasLocalStorage() && T.isObjCGCWeak()
@@ -6506,9 +6708,6 @@ bool Sema::CheckVariableDeclaration(VarDecl *NewVD, LookupResult &Previous) {
       checkForConflictWithNonVisibleExternC(*this, NewVD, Previous))
     Previous.setShadowed();
 
-  // Filter out any non-conflicting previous declarations.
-  filterNonConflictingPreviousDecls(*this, NewVD, Previous);
-
   if (!Previous.empty()) {
     MergeVarDecl(NewVD, Previous);
     return true;
@@ -6516,50 +6715,45 @@ bool Sema::CheckVariableDeclaration(VarDecl *NewVD, LookupResult &Previous) {
   return false;
 }
 
-/// \brief Data used with FindOverriddenMethod
-struct FindOverriddenMethodData {
+namespace {
+struct FindOverriddenMethod {
   Sema *S;
   CXXMethodDecl *Method;
+
+  /// Member lookup function that determines whether a given C++
+  /// method overrides a method in a base class, to be used with
+  /// CXXRecordDecl::lookupInBases().
+  bool operator()(const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+    RecordDecl *BaseRecord =
+        Specifier->getType()->getAs<RecordType>()->getDecl();
+
+    DeclarationName Name = Method->getDeclName();
+
+    // FIXME: Do we care about other names here too?
+    if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
+      // We really want to find the base class destructor here.
+      QualType T = S->Context.getTypeDeclType(BaseRecord);
+      CanQualType CT = S->Context.getCanonicalType(T);
+
+      Name = S->Context.DeclarationNames.getCXXDestructorName(CT);
+    }
+
+    for (Path.Decls = BaseRecord->lookup(Name); !Path.Decls.empty();
+         Path.Decls = Path.Decls.slice(1)) {
+      NamedDecl *D = Path.Decls.front();
+      if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
+        if (MD->isVirtual() && !S->IsOverload(Method, MD, false))
+          return true;
+      }
+    }
+
+    return false;
+  }
 };
 
-/// \brief Member lookup function that determines whether a given C++
-/// method overrides a method in a base class, to be used with
-/// CXXRecordDecl::lookupInBases().
-static bool FindOverriddenMethod(const CXXBaseSpecifier *Specifier,
-                                 CXXBasePath &Path,
-                                 void *UserData) {
-  RecordDecl *BaseRecord = Specifier->getType()->getAs<RecordType>()->getDecl();
+enum OverrideErrorKind { OEK_All, OEK_NonDeleted, OEK_Deleted };
+} // end anonymous namespace
 
-  FindOverriddenMethodData *Data 
-    = reinterpret_cast<FindOverriddenMethodData*>(UserData);
-  
-  DeclarationName Name = Data->Method->getDeclName();
-  
-  // FIXME: Do we care about other names here too?
-  if (Name.getNameKind() == DeclarationName::CXXDestructorName) {
-    // We really want to find the base class destructor here.
-    QualType T = Data->S->Context.getTypeDeclType(BaseRecord);
-    CanQualType CT = Data->S->Context.getCanonicalType(T);
-    
-    Name = Data->S->Context.DeclarationNames.getCXXDestructorName(CT);
-  }    
-  
-  for (Path.Decls = BaseRecord->lookup(Name);
-       !Path.Decls.empty();
-       Path.Decls = Path.Decls.slice(1)) {
-    NamedDecl *D = Path.Decls.front();
-    if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
-      if (MD->isVirtual() && !Data->S->IsOverload(Data->Method, MD, false))
-        return true;
-    }
-  }
-  
-  return false;
-}
-
-namespace {
-  enum OverrideErrorKind { OEK_All, OEK_NonDeleted, OEK_Deleted };
-}
 /// \brief Report an error regarding overriding, along with any relevant
 /// overriden methods.
 ///
@@ -6587,13 +6781,13 @@ static void ReportOverrides(Sema& S, unsigned DiagID, const CXXMethodDecl *MD,
 bool Sema::AddOverriddenMethods(CXXRecordDecl *DC, CXXMethodDecl *MD) {
   // Look for methods in base classes that this method might override.
   CXXBasePaths Paths;
-  FindOverriddenMethodData Data;
-  Data.Method = MD;
-  Data.S = this;
+  FindOverriddenMethod FOM;
+  FOM.Method = MD;
+  FOM.S = this;
   bool hasDeletedOverridenMethods = false;
   bool hasNonDeletedOverridenMethods = false;
   bool AddedAny = false;
-  if (DC->lookupInBases(&FindOverriddenMethod, &Data, Paths)) {
+  if (DC->lookupInBases(FOM, Paths)) {
     for (auto *I : Paths.found_decls()) {
       if (CXXMethodDecl *OldMD = dyn_cast<CXXMethodDecl>(I)) {
         MD->addOverriddenMethod(OldMD->getCanonicalDecl());
@@ -7200,7 +7394,8 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       << DeclSpec::getSpecifierName(TSCS);
 
   if (D.isFirstDeclarationOfMember())
-    adjustMemberFunctionCC(R, D.isStaticMember());
+    adjustMemberFunctionCC(R, D.isStaticMember(), D.isCtorOrDtor(),
+                           D.getIdentifierLoc());
 
   bool isFriend = false;
   FunctionTemplateDecl *FunctionTemplate = nullptr;
@@ -7236,6 +7431,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     bool isVirtual = D.getDeclSpec().isVirtualSpecified();
     bool isExplicit = D.getDeclSpec().isExplicitSpecified();
     bool isConstexpr = D.getDeclSpec().isConstexprSpecified();
+    bool isConcept = D.getDeclSpec().isConceptSpecified();
     isFriend = D.getDeclSpec().isFriendSpecified();
     if (isFriend && !isInline && D.isFunctionDefinition()) {
       // C++ [class.friend]p5
@@ -7309,17 +7505,14 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
         // For source fidelity, store the other template param lists.
         if (TemplateParamLists.size() > 1) {
           NewFD->setTemplateParameterListsInfo(Context,
-                                               TemplateParamLists.size() - 1,
-                                               TemplateParamLists.data());
+                                               TemplateParamLists.drop_back(1));
         }
       } else {
         // This is a function template specialization.
         isFunctionTemplateSpecialization = true;
         // For source fidelity, store all the template param lists.
         if (TemplateParamLists.size() > 0)
-          NewFD->setTemplateParameterListsInfo(Context,
-                                               TemplateParamLists.size(),
-                                               TemplateParamLists.data());
+          NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists);
 
         // C++0x [temp.expl.spec]p20 forbids "template<> friend void foo(int);".
         if (isFriend) {
@@ -7349,9 +7542,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       // this is NOT (an explicit specialization of) a template.
       if (TemplateParamLists.size() > 0)
         // For source fidelity, store all the template param lists.
-        NewFD->setTemplateParameterListsInfo(Context,
-                                             TemplateParamLists.size(),
-                                             TemplateParamLists.data());
+        NewFD->setTemplateParameterListsInfo(Context, TemplateParamLists);
     }
 
     if (Invalid) {
@@ -7452,6 +7643,67 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
         Diag(D.getDeclSpec().getConstexprSpecLoc(), diag::err_constexpr_dtor);
     }
 
+    if (isConcept) {
+      // C++ Concepts TS [dcl.spec.concept]p1: The concept specifier shall be
+      // applied only to the definition of a function template [...]
+      if (!D.isFunctionDefinition()) {
+        Diag(D.getDeclSpec().getConceptSpecLoc(),
+             diag::err_function_concept_not_defined);
+        NewFD->setInvalidDecl();
+      }
+
+      // C++ Concepts TS [dcl.spec.concept]p1: [...] A function concept shall
+      // have no exception-specification and is treated as if it were specified
+      // with noexcept(true) (15.4). [...]
+      if (const FunctionProtoType *FPT = R->getAs<FunctionProtoType>()) {
+        if (FPT->hasExceptionSpec()) {
+          SourceRange Range;
+          if (D.isFunctionDeclarator())
+            Range = D.getFunctionTypeInfo().getExceptionSpecRange();
+          Diag(NewFD->getLocation(), diag::err_function_concept_exception_spec)
+              << FixItHint::CreateRemoval(Range);
+          NewFD->setInvalidDecl();
+        } else {
+          Context.adjustExceptionSpec(NewFD, EST_BasicNoexcept);
+        }
+
+        // C++ Concepts TS [dcl.spec.concept]p5: A function concept has the
+        // following restrictions:
+        // - The declaration's parameter list shall be equivalent to an empty
+        //   parameter list.
+        if (FPT->getNumParams() > 0 || FPT->isVariadic())
+          Diag(NewFD->getLocation(), diag::err_function_concept_with_params);
+      }
+
+      // C++ Concepts TS [dcl.spec.concept]p2: Every concept definition is
+      // implicity defined to be a constexpr declaration (implicitly inline)
+      NewFD->setImplicitlyInline();
+
+      // C++ Concepts TS [dcl.spec.concept]p2: A concept definition shall not
+      // be declared with the thread_local, inline, friend, or constexpr
+      // specifiers, [...]
+      if (isInline) {
+        Diag(D.getDeclSpec().getInlineSpecLoc(),
+             diag::err_concept_decl_invalid_specifiers)
+            << 1 << 1;
+        NewFD->setInvalidDecl(true);
+      }
+
+      if (isFriend) {
+        Diag(D.getDeclSpec().getFriendSpecLoc(),
+             diag::err_concept_decl_invalid_specifiers)
+            << 1 << 2;
+        NewFD->setInvalidDecl(true);
+      }
+
+      if (isConstexpr) {
+        Diag(D.getDeclSpec().getConstexprSpecLoc(),
+             diag::err_concept_decl_invalid_specifiers)
+            << 1 << 3;
+        NewFD->setInvalidDecl(true);
+      }
+    }
+
     // If __module_private__ was specified, mark the function accordingly.
     if (D.getDeclSpec().isModulePrivateSpecified()) {
       if (isFunctionTemplateSpecialization) {
@@ -7539,13 +7791,16 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
     StringLiteral *SE = cast<StringLiteral>(E);
     NewFD->addAttr(::new (Context) AsmLabelAttr(SE->getStrTokenLoc(0), Context,
                                                 SE->getString(), 0));
-  } else if (!ExtnameUndeclaredIdentifiers.empty() &&
-             isDeclTUScopedExternallyVisible(NewFD)) {
+  } else if (!ExtnameUndeclaredIdentifiers.empty()) {
     llvm::DenseMap<IdentifierInfo*,AsmLabelAttr*>::iterator I =
       ExtnameUndeclaredIdentifiers.find(NewFD->getIdentifier());
     if (I != ExtnameUndeclaredIdentifiers.end()) {
-      NewFD->addAttr(I->second);
-      ExtnameUndeclaredIdentifiers.erase(I);
+      if (isDeclExternC(NewFD)) {
+        NewFD->addAttr(I->second);
+        ExtnameUndeclaredIdentifiers.erase(I);
+      } else
+        Diag(NewFD->getLocation(), diag::warn_redefine_extname_not_applied)
+            << /*Variable*/0 << NewFD;
     }
   }
 
@@ -8061,9 +8316,6 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
   bool MergeTypeWithPrevious = !getLangOpts().CPlusPlus &&
                                !Previous.isShadowed();
 
-  // Filter out any non-conflicting previous declarations.
-  filterNonConflictingPreviousDecls(*this, NewFD, Previous);
-
   bool Redeclaration = false;
   NamedDecl *OldDecl = nullptr;
 
@@ -8075,7 +8327,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
     // there's no more work to do here; we'll just add the new
     // function to the scope.
     if (!AllowOverloadingOfFunction(Previous, Context)) {
-      NamedDecl *Candidate = Previous.getFoundDecl();
+      NamedDecl *Candidate = Previous.getRepresentativeDecl();
       if (shouldLinkPossiblyHiddenDecl(Candidate, NewFD)) {
         Redeclaration = true;
         OldDecl = Candidate;
@@ -8117,7 +8369,6 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
   // Check for a previous extern "C" declaration with this name.
   if (!Redeclaration &&
       checkForConflictWithNonVisibleExternC(*this, NewFD, Previous)) {
-    filterNonConflictingPreviousDecls(*this, NewFD, Previous);
     if (!Previous.empty()) {
       // This is an extern "C" declaration with the same name as a previous
       // declaration, and thus redeclares that entity...
@@ -8294,7 +8545,7 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD,
       if (!T.isNull() && !Context.hasSameType(T, NewFD->getType())) {
         // The type of this function differs from the type of the builtin,
         // so forget about the builtin entirely.
-        Context.BuiltinInfo.ForgetBuiltin(BuiltinID, Context.Idents);
+        Context.BuiltinInfo.forgetBuiltin(BuiltinID, Context.Idents);
       }
     }
 
@@ -8577,9 +8828,8 @@ namespace {
 
       // Convert FieldDecls to their index number.
       llvm::SmallVector<unsigned, 4> UsedFieldIndex;
-      for (auto I = Fields.rbegin(), E = Fields.rend(); I != E; ++I) {
-        UsedFieldIndex.push_back((*I)->getFieldIndex());
-      }
+      for (const FieldDecl *I : llvm::reverse(Fields))
+        UsedFieldIndex.push_back(I->getFieldIndex());
 
       // See if a warning is needed by checking the first difference in index
       // numbers.  If field being used has index less than the field being
@@ -8832,6 +9082,96 @@ namespace {
   }
 }
 
+QualType Sema::deduceVarTypeFromInitializer(VarDecl *VDecl,
+                                            DeclarationName Name, QualType Type,
+                                            TypeSourceInfo *TSI,
+                                            SourceRange Range, bool DirectInit,
+                                            Expr *Init) {
+  bool IsInitCapture = !VDecl;
+  assert((!VDecl || !VDecl->isInitCapture()) &&
+         "init captures are expected to be deduced prior to initialization");
+
+  ArrayRef<Expr *> DeduceInits = Init;
+  if (DirectInit) {
+    if (auto *PL = dyn_cast<ParenListExpr>(Init))
+      DeduceInits = PL->exprs();
+    else if (auto *IL = dyn_cast<InitListExpr>(Init))
+      DeduceInits = IL->inits();
+  }
+
+  // Deduction only works if we have exactly one source expression.
+  if (DeduceInits.empty()) {
+    // It isn't possible to write this directly, but it is possible to
+    // end up in this situation with "auto x(some_pack...);"
+    Diag(Init->getLocStart(), IsInitCapture
+                                  ? diag::err_init_capture_no_expression
+                                  : diag::err_auto_var_init_no_expression)
+        << Name << Type << Range;
+    return QualType();
+  }
+
+  if (DeduceInits.size() > 1) {
+    Diag(DeduceInits[1]->getLocStart(),
+         IsInitCapture ? diag::err_init_capture_multiple_expressions
+                       : diag::err_auto_var_init_multiple_expressions)
+        << Name << Type << Range;
+    return QualType();
+  }
+
+  Expr *DeduceInit = DeduceInits[0];
+  if (DirectInit && isa<InitListExpr>(DeduceInit)) {
+    Diag(Init->getLocStart(), IsInitCapture
+                                  ? diag::err_init_capture_paren_braces
+                                  : diag::err_auto_var_init_paren_braces)
+        << isa<InitListExpr>(Init) << Name << Type << Range;
+    return QualType();
+  }
+
+  // Expressions default to 'id' when we're in a debugger.
+  bool DefaultedAnyToId = false;
+  if (getLangOpts().DebuggerCastResultToId &&
+      Init->getType() == Context.UnknownAnyTy && !IsInitCapture) {
+    ExprResult Result = forceUnknownAnyToType(Init, Context.getObjCIdType());
+    if (Result.isInvalid()) {
+      return QualType();
+    }
+    Init = Result.get();
+    DefaultedAnyToId = true;
+  }
+
+  QualType DeducedType;
+  if (DeduceAutoType(TSI, DeduceInit, DeducedType) == DAR_Failed) {
+    if (!IsInitCapture)
+      DiagnoseAutoDeductionFailure(VDecl, DeduceInit);
+    else if (isa<InitListExpr>(Init))
+      Diag(Range.getBegin(),
+           diag::err_init_capture_deduction_failure_from_init_list)
+          << Name
+          << (DeduceInit->getType().isNull() ? TSI->getType()
+                                             : DeduceInit->getType())
+          << DeduceInit->getSourceRange();
+    else
+      Diag(Range.getBegin(), diag::err_init_capture_deduction_failure)
+          << Name << TSI->getType()
+          << (DeduceInit->getType().isNull() ? TSI->getType()
+                                             : DeduceInit->getType())
+          << DeduceInit->getSourceRange();
+  }
+
+  // Warn if we deduced 'id'. 'auto' usually implies type-safety, but using
+  // 'id' instead of a specific object type prevents most of our usual
+  // checks.
+  // We only want to warn outside of template instantiations, though:
+  // inside a template, the 'id' could have come from a parameter.
+  if (ActiveTemplateInstantiations.empty() && !DefaultedAnyToId &&
+      !IsInitCapture && !DeducedType.isNull() && DeducedType->isObjCIdType()) {
+    SourceLocation Loc = TSI->getTypeLoc().getBeginLoc();
+    Diag(Loc, diag::warn_auto_var_is_id) << Name << Range;
+  }
+
+  return DeducedType;
+}
+
 /// AddInitializerToDecl - Adds the initializer Init to the
 /// declaration dcl. If DirectInit is true, this is C++ direct
 /// initialization rather than copy initialization.
@@ -8859,79 +9199,27 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
     RealDecl->setInvalidDecl();
     return;
   }
-  ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);
 
   // C++11 [decl.spec.auto]p6. Deduce the type which 'auto' stands in for.
   if (TypeMayContainAuto && VDecl->getType()->isUndeducedType()) {
     // Attempt typo correction early so that the type of the init expression can
-    // be deduced based on the chosen correction:if the original init contains a
+    // be deduced based on the chosen correction if the original init contains a
     // TypoExpr.
     ExprResult Res = CorrectDelayedTyposInExpr(Init, VDecl);
     if (!Res.isUsable()) {
       RealDecl->setInvalidDecl();
       return;
     }
+    Init = Res.get();
 
-    if (Res.get() != Init) {
-      Init = Res.get();
-      if (CXXDirectInit)
-        CXXDirectInit = dyn_cast<ParenListExpr>(Init);
-    }
-
-    Expr *DeduceInit = Init;
-    // Initializer could be a C++ direct-initializer. Deduction only works if it
-    // contains exactly one expression.
-    if (CXXDirectInit) {
-      if (CXXDirectInit->getNumExprs() == 0) {
-        // It isn't possible to write this directly, but it is possible to
-        // end up in this situation with "auto x(some_pack...);"
-        Diag(CXXDirectInit->getLocStart(),
-             VDecl->isInitCapture() ? diag::err_init_capture_no_expression
-                                    : diag::err_auto_var_init_no_expression)
-          << VDecl->getDeclName() << VDecl->getType()
-          << VDecl->getSourceRange();
-        RealDecl->setInvalidDecl();
-        return;
-      } else if (CXXDirectInit->getNumExprs() > 1) {
-        Diag(CXXDirectInit->getExpr(1)->getLocStart(),
-             VDecl->isInitCapture()
-                 ? diag::err_init_capture_multiple_expressions
-                 : diag::err_auto_var_init_multiple_expressions)
-          << VDecl->getDeclName() << VDecl->getType()
-          << VDecl->getSourceRange();
-        RealDecl->setInvalidDecl();
-        return;
-      } else {
-        DeduceInit = CXXDirectInit->getExpr(0);
-        if (isa<InitListExpr>(DeduceInit))
-          Diag(CXXDirectInit->getLocStart(),
-               diag::err_auto_var_init_paren_braces)
-            << VDecl->getDeclName() << VDecl->getType()
-            << VDecl->getSourceRange();
-      }
-    }
-
-    // Expressions default to 'id' when we're in a debugger.
-    bool DefaultedToAuto = false;
-    if (getLangOpts().DebuggerCastResultToId &&
-        Init->getType() == Context.UnknownAnyTy) {
-      ExprResult Result = forceUnknownAnyToType(Init, Context.getObjCIdType());
-      if (Result.isInvalid()) {
-        VDecl->setInvalidDecl();
-        return;
-      }
-      Init = Result.get();
-      DefaultedToAuto = true;
-    }
-
-    QualType DeducedType;
-    if (DeduceAutoType(VDecl->getTypeSourceInfo(), DeduceInit, DeducedType) ==
-            DAR_Failed)
-      DiagnoseAutoDeductionFailure(VDecl, DeduceInit);
+    QualType DeducedType = deduceVarTypeFromInitializer(
+        VDecl, VDecl->getDeclName(), VDecl->getType(),
+        VDecl->getTypeSourceInfo(), VDecl->getSourceRange(), DirectInit, Init);
     if (DeducedType.isNull()) {
       RealDecl->setInvalidDecl();
       return;
     }
+
     VDecl->setType(DeducedType);
     assert(VDecl->isLinkageValid());
 
@@ -8939,38 +9227,18 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
     if (getLangOpts().ObjCAutoRefCount && inferObjCARCLifetime(VDecl))
       VDecl->setInvalidDecl();
 
-    // Warn if we deduced 'id'. 'auto' usually implies type-safety, but using
-    // 'id' instead of a specific object type prevents most of our usual checks.
-    // We only want to warn outside of template instantiations, though:
-    // inside a template, the 'id' could have come from a parameter.
-    if (ActiveTemplateInstantiations.empty() && !DefaultedToAuto &&
-        DeducedType->isObjCIdType()) {
-      SourceLocation Loc =
-          VDecl->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
-      Diag(Loc, diag::warn_auto_var_is_id)
-        << VDecl->getDeclName() << DeduceInit->getSourceRange();
-    }
-
     // If this is a redeclaration, check that the type we just deduced matches
     // the previously declared type.
     if (VarDecl *Old = VDecl->getPreviousDecl()) {
       // We never need to merge the type, because we cannot form an incomplete
       // array of auto, nor deduce such a type.
-      MergeVarDeclTypes(VDecl, Old, /*MergeTypeWithPrevious*/false);
+      MergeVarDeclTypes(VDecl, Old, /*MergeTypeWithPrevious*/ false);
     }
 
     // Check the deduced type is valid for a variable declaration.
     CheckVariableDeclarationType(VDecl);
     if (VDecl->isInvalidDecl())
       return;
-
-    // If all looks well, warn if this is a case that will change meaning when
-    // we implement N3922.
-    if (DirectInit && !CXXDirectInit && isa<InitListExpr>(Init)) {
-      Diag(Init->getLocStart(),
-           diag::warn_auto_var_direct_list_init)
-        << FixItHint::CreateInsertion(Init->getLocStart(), "=");
-    }
   }
 
   // dllimport cannot be used on variable definitions.
@@ -9059,7 +9327,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
 
   // OpenCL 1.1 6.5.2: "Variables allocated in the __local address space inside
   // a kernel function cannot be initialized."
-  if (VDecl->getStorageClass() == SC_OpenCLWorkGroupLocal) {
+  if (VDecl->getType().getAddressSpace() == LangAS::opencl_local) {
     Diag(VDecl->getLocation(), diag::err_local_cant_init);
     VDecl->setInvalidDecl();
     return;
@@ -9082,17 +9350,18 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
   }
 
   // Perform the initialization.
+  ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);
   if (!VDecl->isInvalidDecl()) {
     InitializedEntity Entity = InitializedEntity::InitializeVariable(VDecl);
-    InitializationKind Kind
-      = DirectInit ?
-          CXXDirectInit ? InitializationKind::CreateDirect(VDecl->getLocation(),
-                                                           Init->getLocStart(),
-                                                           Init->getLocEnd())
-                        : InitializationKind::CreateDirectList(
-                                                          VDecl->getLocation())
-                   : InitializationKind::CreateCopy(VDecl->getLocation(),
-                                                    Init->getLocStart());
+    InitializationKind Kind =
+        DirectInit
+            ? CXXDirectInit
+                  ? InitializationKind::CreateDirect(VDecl->getLocation(),
+                                                     Init->getLocStart(),
+                                                     Init->getLocEnd())
+                  : InitializationKind::CreateDirectList(VDecl->getLocation())
+            : InitializationKind::CreateCopy(VDecl->getLocation(),
+                                             Init->getLocStart());
 
     MultiExprArg Args = Init;
     if (CXXDirectInit)
@@ -9156,7 +9425,7 @@ void Sema::AddInitializerToDecl(Decl *RealDecl, Expr *Init,
     if (VDecl->getType().getObjCLifetime() == Qualifiers::OCL_Strong &&
         !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak,
                          Init->getLocStart()))
-        getCurFunction()->markSafeWeakUse(Init);
+      getCurFunction()->markSafeWeakUse(Init);
   }
 
   // The initialization is usually a full-expression.
@@ -9409,6 +9678,15 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl,
       return;
     }
 
+    // C++ Concepts TS [dcl.spec.concept]p1: [...]  A variable template
+    // definition having the concept specifier is called a variable concept. A
+    // concept definition refers to [...] a variable concept and its initializer.
+    if (Var->isConcept()) {
+      Diag(Var->getLocation(), diag::err_var_concept_not_initialized);
+      Var->setInvalidDecl();
+      return;
+    }
+
     // OpenCL v1.1 s6.5.3: variables declared in the constant address space must
     // be initialized.
     if (!Var->isInvalidDecl() &&
@@ -9621,8 +9899,6 @@ void Sema::ActOnCXXForRangeDecl(Decl *D) {
   case SC_Register:
     Error = 4;
     break;
-  case SC_OpenCLWorkGroupLocal:
-    llvm_unreachable("Unexpected storage class");
   }
   if (Error != -1) {
     Diag(VD->getOuterLocStart(), diag::err_for_range_storage_class)
@@ -9665,9 +9941,9 @@ Sema::ActOnCXXForRangeIdentifier(Scope *S, SourceLocation IdentLoc,
 void Sema::CheckCompleteVariableDeclaration(VarDecl *var) {
   if (var->isInvalidDecl()) return;
 
-  // In ARC, don't allow jumps past the implicit initialization of a
+  // In Objective-C, don't allow jumps past the implicit initialization of a
   // local retaining variable.
-  if (getLangOpts().ObjCAutoRefCount &&
+  if (getLangOpts().ObjC1 &&
       var->hasLocalStorage()) {
     switch (var->getType().getObjCLifetime()) {
     case Qualifiers::OCL_None:
@@ -9913,9 +10189,17 @@ Sema::FinalizeDeclaration(Decl *ThisDecl) {
   // dllimport/dllexport variables cannot be thread local, their TLS index
   // isn't exported with the variable.
   if (DLLAttr && VD->getTLSKind()) {
-    Diag(VD->getLocation(), diag::err_attribute_dll_thread_local) << VD
-                                                                  << DLLAttr;
-    VD->setInvalidDecl();
+    auto *F = dyn_cast_or_null<FunctionDecl>(VD->getParentFunctionOrMethod());
+    if (F && getDLLAttr(F)) {
+      assert(VD->isStaticLocal());
+      // But if this is a static local in a dlimport/dllexport function, the
+      // function will never be inlined, which means the var would never be
+      // imported, so having it marked import/export is safe.
+    } else {
+      Diag(VD->getLocation(), diag::err_attribute_dll_thread_local) << VD
+                                                                    << DLLAttr;
+      VD->setInvalidDecl();
+    }
   }
 
   if (UsedAttr *Attr = VD->getAttr<UsedAttr>()) {
@@ -9988,8 +10272,9 @@ Sema::DeclGroupPtrTy Sema::FinalizeDeclaratorGroup(Scope *S, const DeclSpec &DS,
   if (DeclSpec::isDeclRep(DS.getTypeSpecType())) {
     if (TagDecl *Tag = dyn_cast_or_null<TagDecl>(DS.getRepAsDecl())) {
       handleTagNumbering(Tag, S);
-      if (!Tag->hasNameForLinkage() && !Tag->hasDeclaratorForAnonDecl())
-        Tag->setDeclaratorForAnonDecl(FirstDeclaratorInGroup);
+      if (FirstDeclaratorInGroup && !Tag->hasNameForLinkage() &&
+          getLangOpts().CPlusPlus)
+        Context.addDeclaratorForUnnamedTagDecl(Tag, FirstDeclaratorInGroup);
     }
   }
 
@@ -10028,7 +10313,7 @@ Sema::BuildDeclaratorGroup(MutableArrayRef<Decl *> Group,
           } else if (DeducedCanon != UCanon) {
             Diag(D->getTypeSourceInfo()->getTypeLoc().getBeginLoc(),
                  diag::err_auto_different_deductions)
-              << (AT->isDecltypeAuto() ? 1 : 0)
+              << (unsigned)AT->getKeyword()
               << Deduced << DeducedDecl->getDeclName()
               << U << D->getDeclName()
               << DeducedDecl->getInit()->getSourceRange()
@@ -10118,6 +10403,8 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) {
   if (DS.isConstexprSpecified())
     Diag(DS.getConstexprSpecLoc(), diag::err_invalid_constexpr)
       << 0;
+  if (DS.isConceptSpecified())
+    Diag(DS.getConceptSpecLoc(), diag::err_concept_wrong_decl_kind);
 
   DiagnoseFunctionSpecifiers(DS);
 
@@ -10370,14 +10657,17 @@ void Sema::ActOnFinishKNRParamDeclarations(Scope *S, Declarator &D,
   }
 }
 
-Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D) {
+Decl *
+Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Declarator &D,
+                              MultiTemplateParamsArg TemplateParameterLists,
+                              SkipBodyInfo *SkipBody) {
   assert(getCurFunctionDecl() == nullptr && "Function parsing confused");
   assert(D.isFunctionDeclarator() && "Not a function declarator!");
   Scope *ParentScope = FnBodyScope->getParent();
 
   D.setFunctionDefinitionKind(FDK_Definition);
-  Decl *DP = HandleDeclarator(ParentScope, D, MultiTemplateParamsArg());
-  return ActOnStartOfFunctionDef(FnBodyScope, DP);
+  Decl *DP = HandleDeclarator(ParentScope, D, TemplateParameterLists);
+  return ActOnStartOfFunctionDef(FnBodyScope, DP, SkipBody);
 }
 
 void Sema::ActOnFinishInlineMethodDef(CXXMethodDecl *D) {
@@ -10441,7 +10731,8 @@ static bool ShouldWarnAboutMissingPrototype(const FunctionDecl *FD,
 
 void
 Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
-                                   const FunctionDecl *EffectiveDefinition) {
+                                   const FunctionDecl *EffectiveDefinition,
+                                   SkipBodyInfo *SkipBody) {
   // Don't complain if we're in GNU89 mode and the previous definition
   // was an extern inline function.
   const FunctionDecl *Definition = EffectiveDefinition;
@@ -10453,17 +10744,20 @@ Sema::CheckForFunctionRedefinition(FunctionDecl *FD,
     return;
 
   // If we don't have a visible definition of the function, and it's inline or
-  // a template, it's OK to form another definition of it.
-  //
-  // FIXME: Should we skip the body of the function and use the old definition
-  // in this case? That may be necessary for functions that return local types
-  // through a deduced return type, or instantiate templates with local types.
-  if (!hasVisibleDefinition(Definition) &&
+  // a template, skip the new definition.
+  if (SkipBody && !hasVisibleDefinition(Definition) &&
       (Definition->getFormalLinkage() == InternalLinkage ||
        Definition->isInlined() ||
        Definition->getDescribedFunctionTemplate() ||
-       Definition->getNumTemplateParameterLists()))
+       Definition->getNumTemplateParameterLists())) {
+    SkipBody->ShouldSkip = true;
+    if (auto *TD = Definition->getDescribedFunctionTemplate())
+      makeMergedDefinitionVisible(TD, FD->getLocation());
+    else
+      makeMergedDefinitionVisible(const_cast<FunctionDecl*>(Definition),
+                                  FD->getLocation());
     return;
+  }
 
   if (getLangOpts().GNUMode && Definition->isInlineSpecified() &&
       Definition->getStorageClass() == SC_Extern)
@@ -10524,7 +10818,8 @@ static void RebuildLambdaScopeInfo(CXXMethodDecl *CallOperator,
   }
 }
 
-Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) {
+Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D,
+                                    SkipBodyInfo *SkipBody) {
   // Clear the last template instantiation error context.
   LastTemplateInstantiationErrorContext = ActiveTemplateInstantiation();
   
@@ -10536,6 +10831,16 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) {
     FD = FunTmpl->getTemplatedDecl();
   else
     FD = cast<FunctionDecl>(D);
+
+  // See if this is a redefinition.
+  if (!FD->isLateTemplateParsed()) {
+    CheckForFunctionRedefinition(FD, nullptr, SkipBody);
+
+    // If we're skipping the body, we're done. Don't enter the scope.
+    if (SkipBody && SkipBody->ShouldSkip)
+      return D;
+  }
+
   // If we are instantiating a generic lambda call operator, push
   // a LambdaScopeInfo onto the function stack.  But use the information
   // that's already been calculated (ActOnLambdaExpr) to prime the current 
@@ -10555,10 +10860,6 @@ Decl *Sema::ActOnStartOfFunctionDef(Scope *FnBodyScope, Decl *D) {
     // Enter a new function scope
     PushFunctionScope();
 
-  // See if this is a redefinition.
-  if (!FD->isLateTemplateParsed())
-    CheckForFunctionRedefinition(FD);
-
   // Builtin functions cannot be defined.
   if (unsigned BuiltinID = FD->getBuiltinID()) {
     if (!Context.BuiltinInfo.isPredefinedLibFunction(BuiltinID) &&
@@ -10734,6 +11035,9 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
   sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy();
   sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr;
 
+  if (getLangOpts().Coroutines && !getCurFunction()->CoroutineStmts.empty())
+    CheckCompletedCoroutineBody(FD, Body);
+
   if (FD) {
     FD->setBody(Body);
 
@@ -11073,7 +11377,7 @@ NamedDecl *Sema::ImplicitlyDefineFunction(SourceLocation Loc,
                                              /*RestrictQualifierLoc=*/NoLoc,
                                              /*MutableLoc=*/NoLoc,
                                              EST_None,
-                                             /*ESpecLoc=*/NoLoc,
+                                             /*ESpecRange=*/SourceRange(),
                                              /*Exceptions=*/nullptr,
                                              /*ExceptionRanges=*/nullptr,
                                              /*NumExceptions=*/0,
@@ -11159,6 +11463,18 @@ void Sema::AddKnownFunctionAttributes(FunctionDecl *FD) {
       FD->addAttr(NoThrowAttr::CreateImplicit(Context, FD->getLocation()));
     if (Context.BuiltinInfo.isConst(BuiltinID) && !FD->hasAttr<ConstAttr>())
       FD->addAttr(ConstAttr::CreateImplicit(Context, FD->getLocation()));
+    if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads &&
+        Context.BuiltinInfo.isTSBuiltin(BuiltinID) &&
+        !FD->hasAttr<CUDADeviceAttr>() && !FD->hasAttr<CUDAHostAttr>()) {
+      // Assign appropriate attribute depending on CUDA compilation
+      // mode and the target builtin belongs to. E.g. during host
+      // compilation, aux builtins are __device__, the rest are __host__.
+      if (getLangOpts().CUDAIsDevice !=
+          Context.BuiltinInfo.isAuxBuiltinID(BuiltinID))
+        FD->addAttr(CUDADeviceAttr::CreateImplicit(Context, FD->getLocation()));
+      else
+        FD->addAttr(CUDAHostAttr::CreateImplicit(Context, FD->getLocation()));
+    }
   }
 
   IdentifierInfo *Name = FD->getIdentifier();
@@ -11269,9 +11585,9 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
 /// \return true if the redeclaration was invalid.
-bool Sema::CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
-                                  QualType EnumUnderlyingTy,
-                                  const EnumDecl *Prev) {
+bool Sema::CheckEnumRedeclaration(
+    SourceLocation EnumLoc, bool IsScoped, QualType EnumUnderlyingTy,
+    bool EnumUnderlyingIsImplicit, const EnumDecl *Prev) {
   bool IsFixed = !EnumUnderlyingTy.isNull();
 
   if (IsScoped != Prev->isScoped()) {
@@ -11293,6 +11609,10 @@ bool Sema::CheckEnumRedeclaration(SourceLocation EnumLoc, bool IsScoped,
           << Prev->getIntegerTypeRange();
       return true;
     }
+  } else if (IsFixed && !Prev->isFixed() && EnumUnderlyingIsImplicit) {
+    ;
+  } else if (!IsFixed && Prev->isFixed() && !Prev->getIntegerTypeSourceInfo()) {
+    ;
   } else if (IsFixed != Prev->isFixed()) {
     Diag(EnumLoc, diag::err_enum_redeclare_fixed_mismatch)
       << Prev->isFixed();
@@ -11459,7 +11779,6 @@ static FixItHint createFriendTagNNSFixIt(Sema &SemaRef, NamedDecl *ND, Scope *S,
   std::reverse(Namespaces.begin(), Namespaces.end());
   for (auto *II : Namespaces)
     OS << II->getName() << "::";
-  OS.flush();
   return FixItHint::CreateInsertion(NameLoc, Insertion);
 }
 
@@ -11563,6 +11882,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
   // this early, because it's needed to detect if this is an incompatible
   // redeclaration.
   llvm::PointerUnion<const Type*, TypeSourceInfo*> EnumUnderlying;
+  bool EnumUnderlyingIsImplicit = false;
 
   if (Kind == TTK_Enum) {
     if (UnderlyingType.isInvalid() || (!UnderlyingType.get() && ScopedEnum))
@@ -11584,9 +11904,13 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
                                           UPPC_FixedUnderlyingType))
         EnumUnderlying = Context.IntTy.getTypePtr();
 
-    } else if (getLangOpts().MSVCCompat)
-      // Microsoft enums are always of int type.
-      EnumUnderlying = Context.IntTy.getTypePtr();
+    } else if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+      if (getLangOpts().MSVCCompat || TUK == TUK_Definition) {
+        // Microsoft enums are always of int type.
+        EnumUnderlying = Context.IntTy.getTypePtr();
+        EnumUnderlyingIsImplicit = true;
+      }
+    }
   }
 
   DeclContext *SearchDC = CurContext;
@@ -11816,9 +12140,16 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
 
     // In C++, we need to do a redeclaration lookup to properly
     // diagnose some problems.
+    // FIXME: redeclaration lookup is also used (with and without C++) to find a
+    // hidden declaration so that we don't get ambiguity errors when using a
+    // type declared by an elaborated-type-specifier.  In C that is not correct
+    // and we should instead merge compatible types found by lookup.
     if (getLangOpts().CPlusPlus) {
       Previous.setRedeclarationKind(ForRedeclaration);
       LookupQualifiedName(Previous, SearchDC);
+    } else {
+      Previous.setRedeclarationKind(ForRedeclaration);
+      LookupName(Previous, S);
     }
   }
 
@@ -11932,7 +12263,8 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
           // returning the previous declaration, unless this is a definition,
           // in which case we want the caller to bail out.
           if (CheckEnumRedeclaration(NameLoc.isValid() ? NameLoc : KWLoc,
-                                     ScopedEnum, EnumUnderlyingTy, PrevEnum))
+                                     ScopedEnum, EnumUnderlyingTy,
+                                     EnumUnderlyingIsImplicit, PrevEnum))
             return TUK == TUK_Declaration ? PrevTagDecl : nullptr;
         }
 
@@ -12203,9 +12535,7 @@ CreateNewDecl:
 
       New->setQualifierInfo(SS.getWithLocInContext(Context));
       if (TemplateParameterLists.size() > 0) {
-        New->setTemplateParameterListsInfo(Context,
-                                           TemplateParameterLists.size(),
-                                           TemplateParameterLists.data());
+        New->setTemplateParameterListsInfo(Context, TemplateParameterLists);
       }
     }
     else
@@ -12504,26 +12834,41 @@ ExprResult Sema::VerifyBitField(SourceLocation FieldLoc,
   }
 
   if (!FieldTy->isDependentType()) {
-    uint64_t TypeSize = Context.getTypeSize(FieldTy);
-    if (Value.getZExtValue() > TypeSize) {
-      if (!getLangOpts().CPlusPlus || IsMsStruct ||
-          Context.getTargetInfo().getCXXABI().isMicrosoft()) {
-        if (FieldName) 
-          return Diag(FieldLoc, diag::err_bitfield_width_exceeds_type_size)
-            << FieldName << (unsigned)Value.getZExtValue() 
-            << (unsigned)TypeSize;
-        
-        return Diag(FieldLoc, diag::err_anon_bitfield_width_exceeds_type_size)
-          << (unsigned)Value.getZExtValue() << (unsigned)TypeSize;
-      }
-      
+    uint64_t TypeStorageSize = Context.getTypeSize(FieldTy);
+    uint64_t TypeWidth = Context.getIntWidth(FieldTy);
+    bool BitfieldIsOverwide = Value.ugt(TypeWidth);
+
+    // Over-wide bitfields are an error in C or when using the MSVC bitfield
+    // ABI.
+    bool CStdConstraintViolation =
+        BitfieldIsOverwide && !getLangOpts().CPlusPlus;
+    bool MSBitfieldViolation =
+        Value.ugt(TypeStorageSize) &&
+        (IsMsStruct || Context.getTargetInfo().getCXXABI().isMicrosoft());
+    if (CStdConstraintViolation || MSBitfieldViolation) {
+      unsigned DiagWidth =
+          CStdConstraintViolation ? TypeWidth : TypeStorageSize;
       if (FieldName)
-        Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_size)
-          << FieldName << (unsigned)Value.getZExtValue() 
-          << (unsigned)TypeSize;
+        return Diag(FieldLoc, diag::err_bitfield_width_exceeds_type_width)
+               << FieldName << (unsigned)Value.getZExtValue()
+               << !CStdConstraintViolation << DiagWidth;
+
+      return Diag(FieldLoc, diag::err_anon_bitfield_width_exceeds_type_width)
+             << (unsigned)Value.getZExtValue() << !CStdConstraintViolation
+             << DiagWidth;
+    }
+
+    // Warn on types where the user might conceivably expect to get all
+    // specified bits as value bits: that's all integral types other than
+    // 'bool'.
+    if (BitfieldIsOverwide && !FieldTy->isBooleanType()) {
+      if (FieldName)
+        Diag(FieldLoc, diag::warn_bitfield_width_exceeds_type_width)
+            << FieldName << (unsigned)Value.getZExtValue()
+            << (unsigned)TypeWidth;
       else
-        Diag(FieldLoc, diag::warn_anon_bitfield_width_exceeds_type_size)
-          << (unsigned)Value.getZExtValue() << (unsigned)TypeSize;        
+        Diag(FieldLoc, diag::warn_anon_bitfield_width_exceeds_type_width)
+            << (unsigned)Value.getZExtValue() << (unsigned)TypeWidth;
     }
   }
 
@@ -12866,9 +13211,8 @@ bool Sema::CheckNontrivialField(FieldDecl *FD) {
           SourceLocation Loc = FD->getLocation();
           if (getSourceManager().isInSystemHeader(Loc)) {
             if (!FD->hasAttr<UnavailableAttr>())
-              FD->addAttr(UnavailableAttr::CreateImplicit(Context,
-                                  "this system field has retaining ownership",
-                                  Loc));
+              FD->addAttr(UnavailableAttr::CreateImplicit(Context, "",
+                            UnavailableAttr::IR_ARCFieldWithOwnership, Loc));
             return false;
           }
         }
@@ -12876,7 +13220,7 @@ bool Sema::CheckNontrivialField(FieldDecl *FD) {
         Diag(FD->getLocation(), getLangOpts().CPlusPlus11 ?
                diag::warn_cxx98_compat_nontrivial_union_or_anon_struct_member :
                diag::err_illegal_union_or_anon_struct_member)
-          << (int)FD->getParent()->isUnion() << FD->getDeclName() << member;
+          << FD->getParent()->isUnion() << FD->getDeclName() << member;
         DiagnoseNontrivial(RDecl, member);
         return !getLangOpts().CPlusPlus11;
       }
@@ -13237,9 +13581,8 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl,
         SourceLocation loc = FD->getLocation();
         if (getSourceManager().isInSystemHeader(loc)) {
           if (!FD->hasAttr<UnavailableAttr>()) {
-            FD->addAttr(UnavailableAttr::CreateImplicit(Context,
-                              "this system field has retaining ownership",
-                              loc));
+            FD->addAttr(UnavailableAttr::CreateImplicit(Context, "",
+                          UnavailableAttr::IR_ARCFieldWithOwnership, loc));
           }
         } else {
           Diag(FD->getLocation(), diag::err_arc_objc_object_in_tag) 
@@ -13687,10 +14030,12 @@ Sema::SkipBodyInfo Sema::shouldSkipAnonEnumBody(Scope *S, IdentifierInfo *II,
   NamedDecl *PrevDecl = LookupSingleName(S, II, IILoc, LookupOrdinaryName,
                                          ForRedeclaration);
   auto *PrevECD = dyn_cast_or_null<EnumConstantDecl>(PrevDecl);
+  if (!PrevECD)
+    return SkipBodyInfo();
+
+  EnumDecl *PrevED = cast<EnumDecl>(PrevECD->getDeclContext());
   NamedDecl *Hidden;
-  if (PrevECD &&
-      !hasVisibleDefinition(cast<NamedDecl>(PrevECD->getDeclContext()),
-                            &Hidden)) {
+  if (!PrevED->getDeclName() && !hasVisibleDefinition(PrevED, &Hidden)) {
     SkipBodyInfo Skip;
     Skip.Previous = Hidden;
     return Skip;
@@ -13722,21 +14067,6 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
     PrevDecl = nullptr;
   }
 
-  if (PrevDecl) {
-    // When in C++, we may get a TagDecl with the same name; in this case the
-    // enum constant will 'hide' the tag.
-    assert((getLangOpts().CPlusPlus || !isa<TagDecl>(PrevDecl)) &&
-           "Received TagDecl when not in C++!");
-    if (!isa<TagDecl>(PrevDecl) && isDeclInScope(PrevDecl, CurContext, S)) {
-      if (isa<EnumConstantDecl>(PrevDecl))
-        Diag(IdLoc, diag::err_redefinition_of_enumerator) << Id;
-      else
-        Diag(IdLoc, diag::err_redefinition) << Id;
-      Diag(PrevDecl->getLocation(), diag::note_previous_definition);
-      return nullptr;
-    }
-  }
-
   // C++ [class.mem]p15:
   // If T is the name of a class, then each of the following shall have a name 
   // different from T:
@@ -13748,16 +14078,32 @@ Decl *Sema::ActOnEnumConstant(Scope *S, Decl *theEnumDecl, Decl *lastEnumConst,
   
   EnumConstantDecl *New =
     CheckEnumConstant(TheEnumDecl, LastEnumConst, IdLoc, Id, Val);
+  if (!New)
+    return nullptr;
 
-  if (New) {
-    // Process attributes.
-    if (Attr) ProcessDeclAttributeList(S, New, Attr);
-
-    // Register this decl in the current scope stack.
-    New->setAccess(TheEnumDecl->getAccess());
-    PushOnScopeChains(New, S);
+  if (PrevDecl) {
+    // When in C++, we may get a TagDecl with the same name; in this case the
+    // enum constant will 'hide' the tag.
+    assert((getLangOpts().CPlusPlus || !isa<TagDecl>(PrevDecl)) &&
+           "Received TagDecl when not in C++!");
+    if (!isa<TagDecl>(PrevDecl) && isDeclInScope(PrevDecl, CurContext, S) &&
+        shouldLinkPossiblyHiddenDecl(PrevDecl, New)) {
+      if (isa<EnumConstantDecl>(PrevDecl))
+        Diag(IdLoc, diag::err_redefinition_of_enumerator) << Id;
+      else
+        Diag(IdLoc, diag::err_redefinition) << Id;
+      Diag(PrevDecl->getLocation(), diag::note_previous_definition);
+      return nullptr;
+    }
   }
 
+  // Process attributes.
+  if (Attr) ProcessDeclAttributeList(S, New, Attr);
+
+  // Register this decl in the current scope stack.
+  New->setAccess(TheEnumDecl->getAccess());
+  PushOnScopeChains(New, S);
+
   ActOnDocumentableDecl(New);
 
   return New;
@@ -13803,6 +14149,7 @@ static bool ValidDuplicateEnum(EnumConstantDecl *ECD, EnumDecl *Enum) {
   return false;
 }
 
+namespace {
 struct DupKey {
   int64_t val;
   bool isTombstoneOrEmptyKey;
@@ -13826,6 +14173,7 @@ struct DenseMapInfoDupKey {
            LHS.val == RHS.val;
   }
 };
+} // end anonymous namespace
 
 // Emits a warning when an element is implicitly set a value that
 // a previous element has already been set to.
@@ -13937,17 +14285,22 @@ static void CheckForDuplicateEnumValues(Sema &S, ArrayRef<Decl *> Elements,
   }
 }
 
-bool
-Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
-                        bool AllowMask) const {
-  FlagEnumAttr *FEAttr = ED->getAttr<FlagEnumAttr>();
-  assert(FEAttr && "looking for value in non-flag enum");
+bool Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
+                             bool AllowMask) const {
+  assert(ED->hasAttr<FlagEnumAttr>() && "looking for value in non-flag enum");
+  assert(ED->isCompleteDefinition() && "expected enum definition");
 
-  llvm::APInt FlagMask = ~FEAttr->getFlagBits();
-  unsigned Width = FlagMask.getBitWidth();
+  auto R = FlagBitsCache.insert(std::make_pair(ED, llvm::APInt()));
+  llvm::APInt &FlagBits = R.first->second;
 
-  // We will try a zero-extended value for the regular check first.
-  llvm::APInt ExtVal = Val.zextOrSelf(Width);
+  if (R.second) {
+    for (auto *E : ED->enumerators()) {
+      const auto &EVal = E->getInitVal();
+      // Only single-bit enumerators introduce new flag values.
+      if (EVal.isPowerOf2())
+        FlagBits = FlagBits.zextOrSelf(EVal.getBitWidth()) | EVal;
+    }
+  }
 
   // A value is in a flag enum if either its bits are a subset of the enum's
   // flag bits (the first condition) or we are allowing masks and the same is
@@ -13957,27 +14310,8 @@ Sema::IsValueInFlagEnum(const EnumDecl *ED, const llvm::APInt &Val,
   // While it's true that any value could be used as a mask, the assumption is
   // that a mask will have all of the insignificant bits set. Anything else is
   // likely a logic error.
-  if (!(FlagMask & ExtVal))
-    return true;
-
-  if (AllowMask) {
-    // Try a one-extended value instead. This can happen if the enum is wider
-    // than the constant used, in C with extensions to allow for wider enums.
-    // The mask will still have the correct behaviour, so we give the user the
-    // benefit of the doubt.
-    //
-    // FIXME: This heuristic can cause weird results if the enum was extended
-    // to a larger type and is signed, because then bit-masks of smaller types
-    // that get extended will fall out of range (e.g. ~0x1u). We currently don't
-    // detect that case and will get a false positive for it. In most cases,
-    // though, it can be fixed by making it a signed type (e.g. ~0x1), so it may
-    // be fine just to accept this as a warning.
-    ExtVal |= llvm::APInt::getHighBitsSet(Width, Width - Val.getBitWidth());
-    if (!(FlagMask & ~ExtVal))
-      return true;
-  }
-
-  return false;
+  llvm::APInt FlagMask = ~FlagBits.zextOrTrunc(Val.getBitWidth());
+  return !(FlagMask & Val) || (AllowMask && !(FlagMask & ~Val));
 }
 
 void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
@@ -14131,13 +14465,8 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
     }
   }
 
-  FlagEnumAttr *FEAttr = Enum->getAttr<FlagEnumAttr>();
-  if (FEAttr)
-    FEAttr->getFlagBits() = llvm::APInt(BestWidth, 0);
-
   // Loop over all of the enumerator constants, changing their types to match
-  // the type of the enum if needed. If we have a flag type, we also prepare the
-  // FlagBits cache.
+  // the type of the enum if needed.
   for (auto *D : Elements) {
     auto *ECD = cast_or_null<EnumConstantDecl>(D);
     if (!ECD) continue;  // Already issued a diagnostic.
@@ -14169,7 +14498,7 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
         // enum-specifier, each enumerator has the type of its
         // enumeration.
         ECD->setType(EnumType);
-      goto flagbits;
+      continue;
     } else {
       NewTy = BestType;
       NewWidth = BestWidth;
@@ -14196,37 +14525,26 @@ void Sema::ActOnEnumBody(SourceLocation EnumLoc, SourceLocation LBraceLoc,
       ECD->setType(EnumType);
     else
       ECD->setType(NewTy);
-
-flagbits:
-    // Check to see if we have a constant with exactly one bit set. Note that x
-    // & (x - 1) will be nonzero if and only if x has more than one bit set.
-    if (FEAttr) {
-      llvm::APInt ExtVal = InitVal.zextOrSelf(BestWidth);
-      if (ExtVal != 0 && !(ExtVal & (ExtVal - 1))) {
-        FEAttr->getFlagBits() |= ExtVal;
-      }
-    }
   }
 
-  if (FEAttr) {
-    for (Decl *D : Elements) {
-      EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(D);
-      if (!ECD) continue;  // Already issued a diagnostic.
-
-      llvm::APSInt InitVal = ECD->getInitVal();
-      if (InitVal != 0 && !IsValueInFlagEnum(Enum, InitVal, true))
-        Diag(ECD->getLocation(), diag::warn_flag_enum_constant_out_of_range)
-          << ECD << Enum;
-    }
-  }
-
-
-
   Enum->completeDefinition(BestType, BestPromotionType,
                            NumPositiveBits, NumNegativeBits);
 
   CheckForDuplicateEnumValues(*this, Elements, Enum, EnumType);
 
+  if (Enum->hasAttr<FlagEnumAttr>()) {
+    for (Decl *D : Elements) {
+      EnumConstantDecl *ECD = cast_or_null<EnumConstantDecl>(D);
+      if (!ECD) continue;  // Already issued a diagnostic.
+
+      llvm::APSInt InitVal = ECD->getInitVal();
+      if (InitVal != 0 && !InitVal.isPowerOf2() &&
+          !IsValueInFlagEnum(Enum, InitVal, true))
+        Diag(ECD->getLocation(), diag::warn_flag_enum_constant_out_of_range)
+          << ECD << Enum;
+    }
+  }
+
   // Now that the enum type is defined, ensure it's not been underaligned.
   if (Enum->hasAttrs())
     CheckAlignasUnderalignment(Enum);
@@ -14245,17 +14563,15 @@ Decl *Sema::ActOnFileScopeAsmDecl(Expr *expr,
 }
 
 static void checkModuleImportContext(Sema &S, Module *M,
-                                     SourceLocation ImportLoc,
-                                     DeclContext *DC) {
+                                     SourceLocation ImportLoc, DeclContext *DC,
+                                     bool FromInclude = false) {
+  SourceLocation ExternCLoc;
+
   if (auto *LSD = dyn_cast<LinkageSpecDecl>(DC)) {
     switch (LSD->getLanguage()) {
     case LinkageSpecDecl::lang_c:
-      if (!M->IsExternC) {
-        S.Diag(ImportLoc, diag::err_module_import_in_extern_c)
-          << M->getFullModuleName();
-        S.Diag(LSD->getLocStart(), diag::note_module_import_in_extern_c);
-        return;
-      }
+      if (ExternCLoc.isInvalid())
+        ExternCLoc = LSD->getLocStart();
       break;
     case LinkageSpecDecl::lang_cxx:
       break;
@@ -14265,15 +14581,25 @@ static void checkModuleImportContext(Sema &S, Module *M,
 
   while (isa<LinkageSpecDecl>(DC))
     DC = DC->getParent();
+
   if (!isa<TranslationUnitDecl>(DC)) {
-    S.Diag(ImportLoc, diag::err_module_import_not_at_top_level)
-      << M->getFullModuleName() << DC;
+    S.Diag(ImportLoc, (FromInclude && S.isModuleVisible(M))
+                          ? diag::ext_module_import_not_at_top_level_noop
+                          : diag::err_module_import_not_at_top_level_fatal)
+        << M->getFullModuleName() << DC;
     S.Diag(cast<Decl>(DC)->getLocStart(),
-           diag::note_module_import_not_at_top_level)
-      << DC;
+           diag::note_module_import_not_at_top_level) << DC;
+  } else if (!M->IsExternC && ExternCLoc.isValid()) {
+    S.Diag(ImportLoc, diag::ext_module_import_in_extern_c)
+      << M->getFullModuleName();
+    S.Diag(ExternCLoc, diag::note_module_import_in_extern_c);
   }
 }
 
+void Sema::diagnoseMisplacedModuleImport(Module *M, SourceLocation ImportLoc) {
+  return checkModuleImportContext(*this, M, ImportLoc, CurContext);
+}
+
 DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc, 
                                    SourceLocation ImportLoc, 
                                    ModuleIdPath Path) {
@@ -14318,7 +14644,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation AtLoc,
 }
 
 void Sema::ActOnModuleInclude(SourceLocation DirectiveLoc, Module *Mod) {
-  checkModuleImportContext(*this, Mod, DirectiveLoc, CurContext);
+  checkModuleImportContext(*this, Mod, DirectiveLoc, CurContext, true);
 
   // Determine whether we're in the #include buffer for a module. The #includes
   // in that buffer do not qualify as module imports; they're just an
@@ -14394,12 +14720,14 @@ void Sema::ActOnPragmaRedefineExtname(IdentifierInfo* Name,
   // 1) declares a function or a variable
   // 2) has external linkage
   // already exists, add a label attribute to it.
-  if (PrevDecl &&
-      (isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl)) &&
-      PrevDecl->hasExternalFormalLinkage())
-    PrevDecl->addAttr(Attr);
+  if (PrevDecl && (isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl))) {
+    if (isDeclExternC(PrevDecl))
+      PrevDecl->addAttr(Attr);
+    else
+      Diag(PrevDecl->getLocation(), diag::warn_redefine_extname_not_applied)
+          << /*Variable*/(isa<FunctionDecl>(PrevDecl) ? 0 : 1) << PrevDecl;
   // Otherwise, add a label atttibute to ExtnameUndeclaredIdentifiers.
-  else
+  } else
     (void)ExtnameUndeclaredIdentifiers.insert(std::make_pair(Name, Attr));
 }
 
@@ -14426,7 +14754,7 @@ void Sema::ActOnPragmaWeakAlias(IdentifierInfo* Name,
                                     LookupOrdinaryName);
   WeakInfo W = WeakInfo(Name, NameLoc);
 
-  if (PrevDecl) {
+  if (PrevDecl && (isa<FunctionDecl>(PrevDecl) || isa<VarDecl>(PrevDecl))) {
     if (!PrevDecl->hasAttr<AliasAttr>())
       if (NamedDecl *ND = dyn_cast<NamedDecl>(PrevDecl))
         DeclApplyPragmaWeak(TUScope, ND, W);
diff --git a/lib/Sema/SemaDeclAttr.cpp b/lib/Sema/SemaDeclAttr.cpp
index 191dbd05c9bd..5a0f0f84af7e 100644
--- a/lib/Sema/SemaDeclAttr.cpp
+++ b/lib/Sema/SemaDeclAttr.cpp
@@ -244,11 +244,12 @@ static bool checkUInt32Argument(Sema &S, const AttributeList &Attr,
 /// \brief Diagnose mutually exclusive attributes when present on a given
 /// declaration. Returns true if diagnosed.
 template <typename AttrTy>
-static bool checkAttrMutualExclusion(Sema &S, Decl *D,
-                                     const AttributeList &Attr) {
+static bool checkAttrMutualExclusion(Sema &S, Decl *D, SourceRange Range,
+                                     IdentifierInfo *Ident) {
   if (AttrTy *A = D->getAttr<AttrTy>()) {
-    S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible)
-      << Attr.getName() << A;
+    S.Diag(Range.getBegin(), diag::err_attributes_are_not_compatible) << Ident
+                                                                      << A;
+    S.Diag(A->getLocation(), diag::note_conflicting_attribute);
     return true;
   }
   return false;
@@ -315,7 +316,7 @@ bool Sema::checkStringLiteralArgumentAttr(const AttributeList &Attr,
     Diag(Loc->Loc, diag::err_attribute_argument_type)
         << Attr.getName() << AANT_ArgumentString
         << FixItHint::CreateInsertion(Loc->Loc, "\"")
-        << FixItHint::CreateInsertion(PP.getLocForEndOfToken(Loc->Loc), "\"");
+        << FixItHint::CreateInsertion(getLocForEndOfToken(Loc->Loc), "\"");
     Str = Loc->Ident->getName();
     if (ArgLocation)
       *ArgLocation = Loc->Loc;
@@ -432,11 +433,10 @@ static bool checkRecordTypeForCapability(Sema &S, QualType Ty) {
   // Else check if any base classes have a capability.
   if (CXXRecordDecl *CRD = dyn_cast<CXXRecordDecl>(RD)) {
     CXXBasePaths BPaths(false, false);
-    if (CRD->lookupInBases([](const CXXBaseSpecifier *BS, CXXBasePath &P,
-      void *) {
-      return BS->getType()->getAs<RecordType>()
-        ->getDecl()->hasAttr<CapabilityAttr>();
-    }, nullptr, BPaths))
+    if (CRD->lookupInBases([](const CXXBaseSpecifier *BS, CXXBasePath &) {
+          const auto *Type = BS->getType()->getAs<RecordType>();
+          return Type->getDecl()->hasAttr<CapabilityAttr>();
+        }, BPaths))
       return true;
   }
   return false;
@@ -629,13 +629,10 @@ static bool checkAcquireOrderAttrCommon(Sema &S, Decl *D,
 
   // Check that this attribute only applies to lockable types.
   QualType QT = cast<ValueDecl>(D)->getType();
-  if (!QT->isDependentType()) {
-    const RecordType *RT = getRecordType(QT);
-    if (!RT || !RT->getDecl()->hasAttr<CapabilityAttr>()) {
-      S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_lockable)
-        << Attr.getName();
-      return false;
-    }
+  if (!QT->isDependentType() && !typeHasCapability(S, QT)) {
+    S.Diag(Attr.getLoc(), diag::warn_thread_attribute_decl_not_lockable)
+      << Attr.getName();
+    return false;
   }
 
   // Check that all arguments are lockable objects.
@@ -812,6 +809,43 @@ static void handleEnableIfAttr(Sema &S, Decl *D, const AttributeList &Attr) {
                           Attr.getAttributeSpellingListIndex()));
 }
 
+static void handlePassObjectSizeAttr(Sema &S, Decl *D,
+                                     const AttributeList &Attr) {
+  if (D->hasAttr<PassObjectSizeAttr>()) {
+    S.Diag(D->getLocStart(), diag::err_attribute_only_once_per_parameter)
+        << Attr.getName();
+    return;
+  }
+
+  Expr *E = Attr.getArgAsExpr(0);
+  uint32_t Type;
+  if (!checkUInt32Argument(S, Attr, E, Type, /*Idx=*/1))
+    return;
+
+  // pass_object_size's argument is passed in as the second argument of
+  // __builtin_object_size. So, it has the same constraints as that second
+  // argument; namely, it must be in the range [0, 3].
+  if (Type > 3) {
+    S.Diag(E->getLocStart(), diag::err_attribute_argument_outof_range)
+        << Attr.getName() << 0 << 3 << E->getSourceRange();
+    return;
+  }
+
+  // pass_object_size is only supported on constant pointer parameters; as a
+  // kindness to users, we allow the parameter to be non-const for declarations.
+  // At this point, we have no clue if `D` belongs to a function declaration or
+  // definition, so we defer the constness check until later.
+  if (!cast<ParmVarDecl>(D)->getType()->isPointerType()) {
+    S.Diag(D->getLocStart(), diag::err_attribute_pointers_only)
+        << Attr.getName() << 1;
+    return;
+  }
+
+  D->addAttr(::new (S.Context)
+                 PassObjectSizeAttr(Attr.getRange(), S.Context, (int)Type,
+                                    Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleConsumableAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   ConsumableAttr::ConsumedState DefaultState;
 
@@ -1039,17 +1073,14 @@ static void handlePackedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
     TD->addAttr(::new (S.Context) PackedAttr(Attr.getRange(), S.Context,
                                         Attr.getAttributeSpellingListIndex()));
   else if (FieldDecl *FD = dyn_cast<FieldDecl>(D)) {
-    // If the alignment is less than or equal to 8 bits, the packed attribute
-    // has no effect.
+    // Report warning about changed offset in the newer compiler versions.
     if (!FD->getType()->isDependentType() &&
-        !FD->getType()->isIncompleteType() &&
+        !FD->getType()->isIncompleteType() && FD->isBitField() &&
         S.Context.getTypeAlign(FD->getType()) <= 8)
-      S.Diag(Attr.getLoc(), diag::warn_attribute_ignored_for_field_of_type)
-        << Attr.getName() << FD->getType();
-    else
-      FD->addAttr(::new (S.Context)
-                  PackedAttr(Attr.getRange(), S.Context,
-                             Attr.getAttributeSpellingListIndex()));
+      S.Diag(Attr.getLoc(), diag::warn_attribute_packed_for_bitfield);
+
+    FD->addAttr(::new (S.Context) PackedAttr(
+        Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
   } else
     S.Diag(Attr.getLoc(), diag::warn_attribute_ignored) << Attr.getName();
 }
@@ -1165,10 +1196,12 @@ static bool attrNonNullArgCheck(Sema &S, QualType T, const AttributeList &Attr,
                                 SourceRange TypeRange,
                                 bool isReturnValue = false) {
   if (!S.isValidPointerAttrType(T)) {
-    S.Diag(Attr.getLoc(), isReturnValue
-                              ? diag::warn_attribute_return_pointers_only
-                              : diag::warn_attribute_pointers_only)
-        << Attr.getName() << AttrParmRange << TypeRange;
+    if (isReturnValue)
+      S.Diag(Attr.getLoc(), diag::warn_attribute_return_pointers_only)
+          << Attr.getName() << AttrParmRange << TypeRange;
+    else
+      S.Diag(Attr.getLoc(), diag::warn_attribute_pointers_only)
+          << Attr.getName() << AttrParmRange << TypeRange << 0;
     return false;
   }
   return true;
@@ -1312,6 +1345,17 @@ void Sema::AddAssumeAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E,
             AssumeAlignedAttr(AttrRange, Context, E, OE, SpellingListIndex));
 }
 
+/// Normalize the attribute, __foo__ becomes foo.
+/// Returns true if normalization was applied.
+static bool normalizeName(StringRef &AttrName) {
+  if (AttrName.size() > 4 && AttrName.startswith("__") &&
+      AttrName.endswith("__")) {
+    AttrName = AttrName.drop_front(2).drop_back(2);
+    return true;
+  }
+  return false;
+}
+
 static void handleOwnershipAttr(Sema &S, Decl *D, const AttributeList &AL) {
   // This attribute must be applied to a function declaration. The first
   // argument to the attribute must be an identifier, the name of the resource,
@@ -1353,11 +1397,8 @@ static void handleOwnershipAttr(Sema &S, Decl *D, const AttributeList &AL) {
 
   IdentifierInfo *Module = AL.getArgAsIdent(0)->Ident;
 
-  // Normalize the argument, __foo__ becomes foo.
   StringRef ModuleName = Module->getName();
-  if (ModuleName.startswith("__") && ModuleName.endswith("__") &&
-      ModuleName.size() > 4) {
-    ModuleName = ModuleName.drop_front(2).drop_back(2);
+  if (normalizeName(ModuleName)) {
     Module = &S.PP.getIdentifierTable().get(ModuleName);
   }
 
@@ -1519,7 +1560,7 @@ static void handleAliasAttr(Sema &S, Decl *D, const AttributeList &Attr) {
 }
 
 static void handleColdAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<HotAttr>(S, D, Attr))
+  if (checkAttrMutualExclusion<HotAttr>(S, D, Attr.getRange(), Attr.getName()))
     return;
 
   D->addAttr(::new (S.Context) ColdAttr(Attr.getRange(), S.Context,
@@ -1527,7 +1568,7 @@ static void handleColdAttr(Sema &S, Decl *D, const AttributeList &Attr) {
 }
 
 static void handleHotAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<ColdAttr>(S, D, Attr))
+  if (checkAttrMutualExclusion<ColdAttr>(S, D, Attr.getRange(), Attr.getName()))
     return;
 
   D->addAttr(::new (S.Context) HotAttr(Attr.getRange(), S.Context,
@@ -1569,12 +1610,22 @@ static void handleRestrictAttr(Sema &S, Decl *D, const AttributeList &Attr) {
 static void handleCommonAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (S.LangOpts.CPlusPlus) {
     S.Diag(Attr.getLoc(), diag::err_attribute_not_supported_in_lang)
-      << Attr.getName() << AttributeLangSupport::Cpp;
+        << Attr.getName() << AttributeLangSupport::Cpp;
     return;
   }
 
-  D->addAttr(::new (S.Context) CommonAttr(Attr.getRange(), S.Context,
-                                        Attr.getAttributeSpellingListIndex()));
+  if (CommonAttr *CA = S.mergeCommonAttr(D, Attr.getRange(), Attr.getName(),
+                                         Attr.getAttributeSpellingListIndex()))
+    D->addAttr(CA);
+}
+
+static void handleNakedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<DisableTailCallsAttr>(S, D, Attr.getRange(),
+                                                     Attr.getName()))
+    return;
+
+  D->addAttr(::new (S.Context) NakedAttr(Attr.getRange(), S.Context,
+                                         Attr.getAttributeSpellingListIndex()));
 }
 
 static void handleNoReturnAttr(Sema &S, Decl *D, const AttributeList &attr) {
@@ -1613,7 +1664,7 @@ static void handleAnalyzerNoReturnAttr(Sema &S, Decl *D,
                 !VD->getType()->isFunctionPointerType())) {
       S.Diag(Attr.getLoc(),
              Attr.isCXX11Attribute() ? diag::err_attribute_wrong_decl_type
-             : diag::warn_attribute_wrong_decl_type)
+                                     : diag::warn_attribute_wrong_decl_type)
         << Attr.getName() << ExpectedFunctionMethodOrBlock;
       return;
     }
@@ -1697,6 +1748,26 @@ static void handleDependencyAttr(Sema &S, Scope *Scope, Decl *D,
                                    Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleNotTailCalledAttr(Sema &S, Decl *D,
+                                    const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<AlwaysInlineAttr>(S, D, Attr.getRange(),
+                                                 Attr.getName()))
+    return;
+
+  D->addAttr(::new (S.Context) NotTailCalledAttr(
+      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleDisableTailCallsAttr(Sema &S, Decl *D,
+                                       const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<NakedAttr>(S, D, Attr.getRange(),
+                                          Attr.getName()))
+    return;
+
+  D->addAttr(::new (S.Context) DisableTailCallsAttr(
+      Attr.getRange(), S.Context, Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleUsedAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
     if (VD->hasLocalStorage()) {
@@ -1825,12 +1896,24 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
                                               VersionTuple Obsoleted,
                                               bool IsUnavailable,
                                               StringRef Message,
-                                              bool Override,
+                                              AvailabilityMergeKind AMK,
                                               unsigned AttrSpellingListIndex) {
   VersionTuple MergedIntroduced = Introduced;
   VersionTuple MergedDeprecated = Deprecated;
   VersionTuple MergedObsoleted = Obsoleted;
   bool FoundAny = false;
+  bool OverrideOrImpl = false;
+  switch (AMK) {
+  case AMK_None:
+  case AMK_Redeclaration:
+    OverrideOrImpl = false;
+    break;
+
+  case AMK_Override:
+  case AMK_ProtocolImplementation:
+    OverrideOrImpl = true;
+    break;
+  }
 
   if (D->hasAttrs()) {
     AttrVec &Attrs = D->getAttrs();
@@ -1847,30 +1930,46 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
         continue;
       }
 
+      // If there is an existing availability attribute for this platform that
+      // is explicit and the new one is implicit use the explicit one and
+      // discard the new implicit attribute.
+      if (OldAA->getRange().isValid() && Range.isInvalid()) {
+        return nullptr;
+      }
+
+      // If there is an existing attribute for this platform that is implicit
+      // and the new attribute is explicit then erase the old one and
+      // continue processing the attributes.
+      if (Range.isValid() && OldAA->getRange().isInvalid()) {
+        Attrs.erase(Attrs.begin() + i);
+        --e;
+        continue;
+      }
+
       FoundAny = true;
       VersionTuple OldIntroduced = OldAA->getIntroduced();
       VersionTuple OldDeprecated = OldAA->getDeprecated();
       VersionTuple OldObsoleted = OldAA->getObsoleted();
       bool OldIsUnavailable = OldAA->getUnavailable();
 
-      if (!versionsMatch(OldIntroduced, Introduced, Override) ||
-          !versionsMatch(Deprecated, OldDeprecated, Override) ||
-          !versionsMatch(Obsoleted, OldObsoleted, Override) ||
+      if (!versionsMatch(OldIntroduced, Introduced, OverrideOrImpl) ||
+          !versionsMatch(Deprecated, OldDeprecated, OverrideOrImpl) ||
+          !versionsMatch(Obsoleted, OldObsoleted, OverrideOrImpl) ||
           !(OldIsUnavailable == IsUnavailable ||
-            (Override && !OldIsUnavailable && IsUnavailable))) {
-        if (Override) {
+            (OverrideOrImpl && !OldIsUnavailable && IsUnavailable))) {
+        if (OverrideOrImpl) {
           int Which = -1;
           VersionTuple FirstVersion;
           VersionTuple SecondVersion;
-          if (!versionsMatch(OldIntroduced, Introduced, Override)) {
+          if (!versionsMatch(OldIntroduced, Introduced, OverrideOrImpl)) {
             Which = 0;
             FirstVersion = OldIntroduced;
             SecondVersion = Introduced;
-          } else if (!versionsMatch(Deprecated, OldDeprecated, Override)) {
+          } else if (!versionsMatch(Deprecated, OldDeprecated, OverrideOrImpl)) {
             Which = 1;
             FirstVersion = Deprecated;
             SecondVersion = OldDeprecated;
-          } else if (!versionsMatch(Obsoleted, OldObsoleted, Override)) {
+          } else if (!versionsMatch(Obsoleted, OldObsoleted, OverrideOrImpl)) {
             Which = 2;
             FirstVersion = Obsoleted;
             SecondVersion = OldObsoleted;
@@ -1879,15 +1978,20 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
           if (Which == -1) {
             Diag(OldAA->getLocation(),
                  diag::warn_mismatched_availability_override_unavail)
-              << AvailabilityAttr::getPrettyPlatformName(Platform->getName());
+              << AvailabilityAttr::getPrettyPlatformName(Platform->getName())
+              << (AMK == AMK_Override);
           } else {
             Diag(OldAA->getLocation(),
                  diag::warn_mismatched_availability_override)
               << Which
               << AvailabilityAttr::getPrettyPlatformName(Platform->getName())
-              << FirstVersion.getAsString() << SecondVersion.getAsString();
+              << FirstVersion.getAsString() << SecondVersion.getAsString()
+              << (AMK == AMK_Override);
           }
-          Diag(Range.getBegin(), diag::note_overridden_method);
+          if (AMK == AMK_Override)
+            Diag(Range.getBegin(), diag::note_overridden_method);
+          else
+            Diag(Range.getBegin(), diag::note_protocol_method);
         } else {
           Diag(OldAA->getLocation(), diag::warn_mismatched_availability);
           Diag(Range.getBegin(), diag::note_previous_attribute);
@@ -1930,11 +2034,11 @@ AvailabilityAttr *Sema::mergeAvailabilityAttr(NamedDecl *D, SourceRange Range,
       MergedObsoleted == Obsoleted)
     return nullptr;
 
-  // Only create a new attribute if !Override, but we want to do
+  // Only create a new attribute if !OverrideOrImpl, but we want to do
   // the checking.
   if (!checkAvailabilityAttr(*this, Range, Platform, MergedIntroduced,
                              MergedDeprecated, MergedObsoleted) &&
-      !Override) {
+      !OverrideOrImpl) {
     return ::new (Context) AvailabilityAttr(Range, Context, Platform,
                                             Introduced, Deprecated,
                                             Obsoleted, IsUnavailable, Message,
@@ -1975,10 +2079,78 @@ static void handleAvailabilityAttr(Sema &S, Decl *D,
                                                       Deprecated.Version,
                                                       Obsoleted.Version,
                                                       IsUnavailable, Str,
-                                                      /*Override=*/false,
+                                                      Sema::AMK_None,
                                                       Index);
   if (NewAttr)
     D->addAttr(NewAttr);
+
+  // Transcribe "ios" to "watchos" (and add a new attribute) if the versioning
+  // matches before the start of the watchOS platform.
+  if (S.Context.getTargetInfo().getTriple().isWatchOS()) {
+    IdentifierInfo *NewII = nullptr;
+    if (II->getName() == "ios")
+      NewII = &S.Context.Idents.get("watchos");
+    else if (II->getName() == "ios_app_extension")
+      NewII = &S.Context.Idents.get("watchos_app_extension");
+
+    if (NewII) {
+        auto adjustWatchOSVersion = [](VersionTuple Version) -> VersionTuple {
+          if (Version.empty())
+            return Version;
+          auto Major = Version.getMajor();
+          auto NewMajor = Major >= 9 ? Major - 7 : 0;
+          if (NewMajor >= 2) {
+            if (Version.getMinor().hasValue()) {
+              if (Version.getSubminor().hasValue())
+                return VersionTuple(NewMajor, Version.getMinor().getValue(),
+                                    Version.getSubminor().getValue());
+              else
+                return VersionTuple(NewMajor, Version.getMinor().getValue());
+            }
+          }
+
+          return VersionTuple(2, 0);
+        };
+
+        auto NewIntroduced = adjustWatchOSVersion(Introduced.Version);
+        auto NewDeprecated = adjustWatchOSVersion(Deprecated.Version);
+        auto NewObsoleted = adjustWatchOSVersion(Obsoleted.Version);
+
+        AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
+                                                            SourceRange(),
+                                                            NewII,
+                                                            NewIntroduced,
+                                                            NewDeprecated,
+                                                            NewObsoleted,
+                                                            IsUnavailable, Str,
+                                                            Sema::AMK_None,
+                                                            Index);
+        if (NewAttr)
+          D->addAttr(NewAttr);
+      }
+  } else if (S.Context.getTargetInfo().getTriple().isTvOS()) {
+    // Transcribe "ios" to "tvos" (and add a new attribute) if the versioning
+    // matches before the start of the tvOS platform.
+    IdentifierInfo *NewII = nullptr;
+    if (II->getName() == "ios")
+      NewII = &S.Context.Idents.get("tvos");
+    else if (II->getName() == "ios_app_extension")
+      NewII = &S.Context.Idents.get("tvos_app_extension");
+
+    if (NewII) {
+        AvailabilityAttr *NewAttr = S.mergeAvailabilityAttr(ND,
+                                                            SourceRange(),
+                                                            NewII,
+                                                            Introduced.Version,
+                                                            Deprecated.Version,
+                                                            Obsoleted.Version,
+                                                            IsUnavailable, Str,
+                                                            Sema::AMK_None,
+                                                            Index);
+        if (NewAttr)
+          D->addAttr(NewAttr);
+      }
+  }
 }
 
 template <class T>
@@ -2492,17 +2664,17 @@ static void handleFormatArgAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   if (!checkFunctionOrMethodParameterIndex(S, D, Attr, 1, IdxExpr, Idx))
     return;
 
-  // make sure the format string is really a string
+  // Make sure the format string is really a string.
   QualType Ty = getFunctionOrMethodParamType(D, Idx);
 
-  bool not_nsstring_type = !isNSStringType(Ty, S.Context);
-  if (not_nsstring_type &&
+  bool NotNSStringTy = !isNSStringType(Ty, S.Context);
+  if (NotNSStringTy &&
       !isCFStringType(Ty, S.Context) &&
       (!Ty->isPointerType() ||
        !Ty->getAs<PointerType>()->getPointeeType()->isCharType())) {
     S.Diag(Attr.getLoc(), diag::err_format_attribute_not)
-        << (not_nsstring_type ? "a string type" : "an NSString")
-        << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, 0);
+        << "a string type" << IdxExpr->getSourceRange()
+        << getFunctionOrMethodParamRange(D, 0);
     return;
   }
   Ty = getFunctionOrMethodResultType(D);
@@ -2511,7 +2683,7 @@ static void handleFormatArgAttr(Sema &S, Decl *D, const AttributeList &Attr) {
       (!Ty->isPointerType() ||
        !Ty->getAs<PointerType>()->getPointeeType()->isCharType())) {
     S.Diag(Attr.getLoc(), diag::err_format_attribute_result_not)
-        << (not_nsstring_type ? "string type" : "NSString")
+        << (NotNSStringTy ? "string type" : "NSString")
         << IdxExpr->getSourceRange() << getFunctionOrMethodParamRange(D, 0);
     return;
   }
@@ -2588,7 +2760,7 @@ static void handleInitPriorityAttr(Sema &S, Decl *D,
 
   if (prioritynum < 101 || prioritynum > 65535) {
     S.Diag(Attr.getLoc(), diag::err_attribute_argument_outof_range)
-      << E->getSourceRange();
+      << E->getSourceRange() << Attr.getName() << 101 << 65535;
     Attr.setInvalid();
     return;
   }
@@ -2635,9 +2807,7 @@ static void handleFormatAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   IdentifierInfo *II = Attr.getArgAsIdent(0)->Ident;
   StringRef Format = II->getName();
 
-  // Normalize the argument, __foo__ becomes foo.
-  if (Format.startswith("__") && Format.endswith("__")) {
-    Format = Format.substr(2, Format.size() - 4);
+  if (normalizeName(Format)) {
     // If we've modified the string name, we need a new identifier for it.
     II = &S.Context.Idents.get(Format);
   }
@@ -2858,7 +3028,7 @@ void Sema::AddAlignValueAttr(SourceRange AttrRange, Decl *D, Expr *E,
   }
 
   if (!E->isValueDependent()) {
-    llvm::APSInt Alignment(32);
+    llvm::APSInt Alignment;
     ExprResult ICE
       = VerifyIntegerConstantExpression(E, &Alignment,
           diag::err_align_value_attribute_argument_not_int,
@@ -2972,7 +3142,7 @@ void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E,
   }
 
   // FIXME: Cache the number on the Attr object?
-  llvm::APSInt Alignment(32);
+  llvm::APSInt Alignment;
   ExprResult ICE
     = VerifyIntegerConstantExpression(E, &Alignment,
         diag::err_aligned_attribute_argument_not_int,
@@ -2980,42 +3150,44 @@ void Sema::AddAlignedAttr(SourceRange AttrRange, Decl *D, Expr *E,
   if (ICE.isInvalid())
     return;
 
+  uint64_t AlignVal = Alignment.getZExtValue();
+
   // C++11 [dcl.align]p2:
   //   -- if the constant expression evaluates to zero, the alignment
   //      specifier shall have no effect
   // C11 6.7.5p6:
   //   An alignment specification of zero has no effect.
   if (!(TmpAttr.isAlignas() && !Alignment)) {
-    if(!llvm::isPowerOf2_64(Alignment.getZExtValue())) {
+    if (!llvm::isPowerOf2_64(AlignVal)) {
       Diag(AttrLoc, diag::err_alignment_not_power_of_two)
         << E->getSourceRange();
       return;
     }
-    if (Context.getTargetInfo().isTLSSupported()) {
-      if (unsigned MaxAlign = Context.getTargetInfo().getMaxTLSAlign()) {
-        if (VarDecl *VD = dyn_cast<VarDecl>(D)) {
-          if (VD->getTLSKind()) {
-            CharUnits MaxAlignChars = Context.toCharUnitsFromBits(MaxAlign);
-            if (Alignment.getSExtValue() > MaxAlignChars.getQuantity()) {
-              Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum)
-                << (unsigned)Alignment.getZExtValue() << VD
-                << (unsigned)MaxAlignChars.getQuantity();
-              return;
-            }
-          }
-        }
-      }
-    }
   }
 
   // Alignment calculations can wrap around if it's greater than 2**28.
-  unsigned MaxValidAlignment = TmpAttr.isDeclspec() ? 8192 : 268435456;
-  if (Alignment.getZExtValue() > MaxValidAlignment) {
+  unsigned MaxValidAlignment =
+      Context.getTargetInfo().getTriple().isOSBinFormatCOFF() ? 8192
+                                                              : 268435456;
+  if (AlignVal > MaxValidAlignment) {
     Diag(AttrLoc, diag::err_attribute_aligned_too_great) << MaxValidAlignment
                                                          << E->getSourceRange();
     return;
   }
 
+  if (Context.getTargetInfo().isTLSSupported()) {
+    unsigned MaxTLSAlign =
+        Context.toCharUnitsFromBits(Context.getTargetInfo().getMaxTLSAlign())
+            .getQuantity();
+    auto *VD = dyn_cast<VarDecl>(D);
+    if (MaxTLSAlign && AlignVal > MaxTLSAlign && VD &&
+        VD->getTLSKind() != VarDecl::TLS_None) {
+      Diag(VD->getLocation(), diag::err_tls_var_aligned_over_maximum)
+          << (unsigned)AlignVal << VD << MaxTLSAlign;
+      return;
+    }
+  }
+
   AlignedAttr *AA = ::new (Context) AlignedAttr(AttrRange, Context, true,
                                                 ICE.get(), SpellingListIndex);
   AA->setPackExpansion(IsPackExpansion);
@@ -3098,40 +3270,31 @@ bool Sema::checkMSInheritanceAttrOnDefinition(
   return true;
 }
 
-/// handleModeAttr - This attribute modifies the width of a decl with primitive
-/// type.
-///
-/// Despite what would be logical, the mode attribute is a decl attribute, not a
-/// type attribute: 'int ** __attribute((mode(HI))) *G;' tries to make 'G' be
-/// HImode, not an intermediate pointer.
-static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
-  // This attribute isn't documented, but glibc uses it.  It changes
-  // the width of an int or unsigned int to the specified size.
-  if (!Attr.isArgIdent(0)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName()
-      << AANT_ArgumentIdentifier;
-    return;
-  }
-  
-  IdentifierInfo *Name = Attr.getArgAsIdent(0)->Ident;
-  StringRef Str = Name->getName();
-
-  // Normalize the attribute name, __foo__ becomes foo.
-  if (Str.startswith("__") && Str.endswith("__"))
-    Str = Str.substr(2, Str.size() - 4);
-
-  unsigned DestWidth = 0;
-  bool IntegerMode = true;
-  bool ComplexMode = false;
+/// parseModeAttrArg - Parses attribute mode string and returns parsed type
+/// attribute.
+static void parseModeAttrArg(Sema &S, StringRef Str, unsigned &DestWidth,
+                             bool &IntegerMode, bool &ComplexMode) {
   switch (Str.size()) {
   case 2:
     switch (Str[0]) {
-    case 'Q': DestWidth = 8; break;
-    case 'H': DestWidth = 16; break;
-    case 'S': DestWidth = 32; break;
-    case 'D': DestWidth = 64; break;
-    case 'X': DestWidth = 96; break;
-    case 'T': DestWidth = 128; break;
+    case 'Q':
+      DestWidth = 8;
+      break;
+    case 'H':
+      DestWidth = 16;
+      break;
+    case 'S':
+      DestWidth = 32;
+      break;
+    case 'D':
+      DestWidth = 64;
+      break;
+    case 'X':
+      DestWidth = 96;
+      break;
+    case 'T':
+      DestWidth = 128;
+      break;
     }
     if (Str[1] == 'F') {
       IntegerMode = false;
@@ -3159,6 +3322,52 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
       DestWidth = S.Context.getTargetInfo().getUnwindWordWidth();
     break;
   }
+}
+
+/// handleModeAttr - This attribute modifies the width of a decl with primitive
+/// type.
+///
+/// Despite what would be logical, the mode attribute is a decl attribute, not a
+/// type attribute: 'int ** __attribute((mode(HI))) *G;' tries to make 'G' be
+/// HImode, not an intermediate pointer.
+static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
+  // This attribute isn't documented, but glibc uses it.  It changes
+  // the width of an int or unsigned int to the specified size.
+  if (!Attr.isArgIdent(0)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type) << Attr.getName()
+      << AANT_ArgumentIdentifier;
+    return;
+  }
+
+  IdentifierInfo *Name = Attr.getArgAsIdent(0)->Ident;
+  StringRef Str = Name->getName();
+
+  normalizeName(Str);
+
+  unsigned DestWidth = 0;
+  bool IntegerMode = true;
+  bool ComplexMode = false;
+  llvm::APInt VectorSize(64, 0);
+  if (Str.size() >= 4 && Str[0] == 'V') {
+    // Minimal length of vector mode is 4: 'V' + NUMBER(>=1) + TYPE(>=2).
+    size_t StrSize = Str.size();
+    size_t VectorStringLength = 0;
+    while ((VectorStringLength + 1) < StrSize &&
+           isdigit(Str[VectorStringLength + 1]))
+      ++VectorStringLength;
+    if (VectorStringLength &&
+        !Str.substr(1, VectorStringLength).getAsInteger(10, VectorSize) &&
+        VectorSize.isPowerOf2()) {
+      parseModeAttrArg(S, Str.substr(VectorStringLength + 1), DestWidth,
+                       IntegerMode, ComplexMode);
+      S.Diag(Attr.getLoc(), diag::warn_vector_mode_deprecated);
+    } else {
+      VectorSize = 0;
+    }
+  }
+
+  if (!VectorSize)
+    parseModeAttrArg(S, Str, DestWidth, IntegerMode, ComplexMode);
 
   QualType OldTy;
   if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(D))
@@ -3217,7 +3426,10 @@ static void handleModeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   }
 
   QualType NewTy = NewElemTy;
-  if (const VectorType *OldVT = OldTy->getAs<VectorType>()) {
+  if (VectorSize.getBoolValue()) {
+    NewTy = S.Context.getVectorType(NewTy, VectorSize.getZExtValue(),
+                                    VectorType::GenericVector);
+  } else if (const VectorType *OldVT = OldTy->getAs<VectorType>()) {
     // Complex machine mode does not support base vector types.
     if (ComplexMode) {
       S.Diag(Attr.getLoc(), diag::err_complex_mode_vector_type);
@@ -3280,6 +3492,42 @@ AlwaysInlineAttr *Sema::mergeAlwaysInlineAttr(Decl *D, SourceRange Range,
                                           AttrSpellingListIndex);
 }
 
+CommonAttr *Sema::mergeCommonAttr(Decl *D, SourceRange Range,
+                                  IdentifierInfo *Ident,
+                                  unsigned AttrSpellingListIndex) {
+  if (checkAttrMutualExclusion<InternalLinkageAttr>(*this, D, Range, Ident))
+    return nullptr;
+
+  return ::new (Context) CommonAttr(Range, Context, AttrSpellingListIndex);
+}
+
+InternalLinkageAttr *
+Sema::mergeInternalLinkageAttr(Decl *D, SourceRange Range,
+                               IdentifierInfo *Ident,
+                               unsigned AttrSpellingListIndex) {
+  if (auto VD = dyn_cast<VarDecl>(D)) {
+    // Attribute applies to Var but not any subclass of it (like ParmVar,
+    // ImplicitParm or VarTemplateSpecialization).
+    if (VD->getKind() != Decl::Var) {
+      Diag(Range.getBegin(), diag::warn_attribute_wrong_decl_type)
+          << Ident << (getLangOpts().CPlusPlus ? ExpectedFunctionVariableOrClass
+                                               : ExpectedVariableOrFunction);
+      return nullptr;
+    }
+    // Attribute does not apply to non-static local variables.
+    if (VD->hasLocalStorage()) {
+      Diag(VD->getLocation(), diag::warn_internal_linkage_local_storage);
+      return nullptr;
+    }
+  }
+
+  if (checkAttrMutualExclusion<CommonAttr>(*this, D, Range, Ident))
+    return nullptr;
+
+  return ::new (Context)
+      InternalLinkageAttr(Range, Context, AttrSpellingListIndex);
+}
+
 MinSizeAttr *Sema::mergeMinSizeAttr(Decl *D, SourceRange Range,
                                     unsigned AttrSpellingListIndex) {
   if (OptimizeNoneAttr *Optnone = D->getAttr<OptimizeNoneAttr>()) {
@@ -3316,6 +3564,10 @@ OptimizeNoneAttr *Sema::mergeOptimizeNoneAttr(Decl *D, SourceRange Range,
 
 static void handleAlwaysInlineAttr(Sema &S, Decl *D,
                                    const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<NotTailCalledAttr>(S, D, Attr.getRange(),
+                                                  Attr.getName()))
+    return;
+
   if (AlwaysInlineAttr *Inline = S.mergeAlwaysInlineAttr(
           D, Attr.getRange(), Attr.getName(),
           Attr.getAttributeSpellingListIndex()))
@@ -3349,6 +3601,7 @@ static void handleGlobalAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   D->addAttr(::new (S.Context)
               CUDAGlobalAttr(Attr.getRange(), S.Context,
                              Attr.getAttributeSpellingListIndex()));
+
 }
 
 static void handleGNUInlineAttr(Sema &S, Decl *D, const AttributeList &Attr) {
@@ -3645,7 +3898,7 @@ static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
     QualType BufferTy = getFunctionOrMethodParamType(D, ArgumentIdx);
     if (!BufferTy->isPointerType()) {
       S.Diag(Attr.getLoc(), diag::err_attribute_pointers_only)
-        << Attr.getName();
+        << Attr.getName() << 0;
     }
   }
 
@@ -3898,7 +4151,8 @@ static void handleObjCRequiresSuperAttr(Sema &S, Decl *D,
 
 static void handleCFAuditedTransferAttr(Sema &S, Decl *D,
                                         const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CFUnknownTransferAttr>(S, D, Attr))
+  if (checkAttrMutualExclusion<CFUnknownTransferAttr>(S, D, Attr.getRange(),
+                                                      Attr.getName()))
     return;
 
   D->addAttr(::new (S.Context)
@@ -3908,7 +4162,8 @@ static void handleCFAuditedTransferAttr(Sema &S, Decl *D,
 
 static void handleCFUnknownTransferAttr(Sema &S, Decl *D,
                                         const AttributeList &Attr) {
-  if (checkAttrMutualExclusion<CFAuditedTransferAttr>(S, D, Attr))
+  if (checkAttrMutualExclusion<CFAuditedTransferAttr>(S, D, Attr.getRange(),
+                                                      Attr.getName()))
     return;
 
   D->addAttr(::new (S.Context)
@@ -4231,14 +4486,86 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D,
   D->addAttr(UsedAttr::CreateImplicit(S.Context));
 }
 
+static void handleMipsInterruptAttr(Sema &S, Decl *D,
+                                    const AttributeList &Attr) {
+  // Only one optional argument permitted.
+  if (Attr.getNumArgs() > 1) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_too_many_arguments)
+        << Attr.getName() << 1;
+    return;
+  }
+
+  StringRef Str;
+  SourceLocation ArgLoc;
+
+  if (Attr.getNumArgs() == 0)
+    Str = "";
+  else if (!S.checkStringLiteralArgumentAttr(Attr, 0, Str, &ArgLoc))
+    return;
+
+  // Semantic checks for a function with the 'interrupt' attribute for MIPS:
+  // a) Must be a function.
+  // b) Must have no parameters.
+  // c) Must have the 'void' return type.
+  // d) Cannot have the 'mips16' attribute, as that instruction set
+  //    lacks the 'eret' instruction.
+  // e) The attribute itself must either have no argument or one of the
+  //    valid interrupt types, see [MipsInterruptDocs].
+
+  if (!isFunctionOrMethod(D)) {
+    S.Diag(D->getLocation(), diag::warn_attribute_wrong_decl_type)
+        << "'interrupt'" << ExpectedFunctionOrMethod;
+    return;
+  }
+
+  if (hasFunctionProto(D) && getFunctionOrMethodNumParams(D) != 0) {
+    S.Diag(D->getLocation(), diag::warn_mips_interrupt_attribute)
+        << 0;
+    return;
+  }
+
+  if (!getFunctionOrMethodResultType(D)->isVoidType()) {
+    S.Diag(D->getLocation(), diag::warn_mips_interrupt_attribute)
+        << 1;
+    return;
+  }
+
+  if (checkAttrMutualExclusion<Mips16Attr>(S, D, Attr.getRange(),
+                                           Attr.getName()))
+    return;
+
+  MipsInterruptAttr::InterruptType Kind;
+  if (!MipsInterruptAttr::ConvertStrToInterruptType(Str, Kind)) {
+    S.Diag(Attr.getLoc(), diag::warn_attribute_type_not_supported)
+        << Attr.getName() << "'" + std::string(Str) + "'";
+    return;
+  }
+
+  D->addAttr(::new (S.Context) MipsInterruptAttr(
+      Attr.getLoc(), S.Context, Kind, Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleInterruptAttr(Sema &S, Decl *D, const AttributeList &Attr) {
   // Dispatch the interrupt attribute based on the current target.
   if (S.Context.getTargetInfo().getTriple().getArch() == llvm::Triple::msp430)
     handleMSP430InterruptAttr(S, D, Attr);
+  else if (S.Context.getTargetInfo().getTriple().getArch() ==
+               llvm::Triple::mipsel ||
+           S.Context.getTargetInfo().getTriple().getArch() ==
+               llvm::Triple::mips)
+    handleMipsInterruptAttr(S, D, Attr);
   else
     handleARMInterruptAttr(S, D, Attr);
 }
 
+static void handleMips16Attribute(Sema &S, Decl *D, const AttributeList &Attr) {
+  if (checkAttrMutualExclusion<MipsInterruptAttr>(S, D, Attr.getRange(),
+                                                  Attr.getName()))
+    return;
+
+  handleSimpleAttribute<Mips16Attr>(S, D, Attr);
+}
+
 static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
                                     const AttributeList &Attr) {
   uint32_t NumRegs;
@@ -4334,6 +4661,14 @@ static void handleDLLAttr(Sema &S, Decl *D, const AttributeList &A) {
     }
   }
 
+  if (auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+    if (S.Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+        MD->getParent()->isLambda()) {
+      S.Diag(A.getRange().getBegin(), diag::err_attribute_dll_lambda) << A.getName();
+      return;
+    }
+  }
+
   unsigned Index = A.getAttributeSpellingListIndex();
   Attr *NewAttr = A.getKind() == AttributeList::AT_DLLExport
                       ? (Attr *)S.mergeDLLExportAttr(D, A.getRange(), Index)
@@ -4509,8 +4844,10 @@ static void handleNoSanitizeAttr(Sema &S, Decl *D, const AttributeList &Attr) {
 
 static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
                                          const AttributeList &Attr) {
+  StringRef AttrName = Attr.getName()->getName();
+  normalizeName(AttrName);
   std::string SanitizerName =
-      llvm::StringSwitch<std::string>(Attr.getName()->getName())
+      llvm::StringSwitch<std::string>(AttrName)
           .Case("no_address_safety_analysis", "address")
           .Case("no_sanitize_address", "address")
           .Case("no_sanitize_thread", "thread")
@@ -4520,6 +4857,14 @@ static void handleNoSanitizeSpecificAttr(Sema &S, Decl *D,
                                 Attr.getAttributeSpellingListIndex()));
 }
 
+static void handleInternalLinkageAttr(Sema &S, Decl *D,
+                                      const AttributeList &Attr) {
+  if (InternalLinkageAttr *Internal =
+          S.mergeInternalLinkageAttr(D, Attr.getRange(), Attr.getName(),
+                                     Attr.getAttributeSpellingListIndex()))
+    D->addAttr(Internal);
+}
+
 /// Handles semantic checking for features that are common to all attributes,
 /// such as checking whether a parameter was properly specified, or the correct
 /// number of arguments were passed, etc.
@@ -4583,7 +4928,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   // which do not apply to the current target architecture are treated as
   // though they were unknown attributes.
   if (Attr.getKind() == AttributeList::UnknownAttribute ||
-      !Attr.existsInTarget(S.Context.getTargetInfo().getTriple())) {
+      !Attr.existsInTarget(S.Context.getTargetInfo())) {
     S.Diag(Attr.getLoc(), Attr.isDeclspecAttribute()
                               ? diag::warn_unhandled_ms_attribute_ignored
                               : diag::warn_unknown_attribute_ignored)
@@ -4610,7 +4955,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleDLLAttr(S, D, Attr);
     break;
   case AttributeList::AT_Mips16:
-    handleSimpleAttribute<Mips16Attr>(S, D, Attr);
+    handleMips16Attribute(S, D, Attr);
     break;
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
@@ -4663,6 +5008,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_CUDAConstant:
     handleSimpleAttribute<CUDAConstantAttr>(S, D, Attr);
     break;
+  case AttributeList::AT_PassObjectSize:
+    handlePassObjectSizeAttr(S, D, Attr);
+    break;
   case AttributeList::AT_Constructor:
     handleConstructorAttr(S, D, Attr);
     break;
@@ -4723,6 +5071,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_Mode:
     handleModeAttr(S, D, Attr);
     break;
+  case AttributeList::AT_NoAlias:
+    handleSimpleAttribute<NoAliasAttr>(S, D, Attr);
+    break;
   case AttributeList::AT_NoCommon:
     handleSimpleAttribute<NoCommonAttr>(S, D, Attr);
     break;
@@ -4754,7 +5105,7 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
     handleHotAttr(S, D, Attr);
     break;
   case AttributeList::AT_Naked:
-    handleSimpleAttribute<NakedAttr>(S, D, Attr);
+    handleNakedAttr(S, D, Attr);
     break;
   case AttributeList::AT_NoReturn:
     handleNoReturnAttr(S, D, Attr);
@@ -4874,6 +5225,12 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_ReturnsTwice:
     handleSimpleAttribute<ReturnsTwiceAttr>(S, D, Attr);
     break;
+  case AttributeList::AT_NotTailCalled:
+    handleNotTailCalledAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_DisableTailCalls:
+    handleDisableTailCallsAttr(S, D, Attr);
+    break;
   case AttributeList::AT_Used:
     handleUsedAttr(S, D, Attr);
     break;
@@ -4958,6 +5315,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case AttributeList::AT_OpenCLImageAccess:
     handleSimpleAttribute<OpenCLImageAccessAttr>(S, D, Attr);
     break;
+  case AttributeList::AT_InternalLinkage:
+    handleInternalLinkageAttr(S, D, Attr);
+    break;
 
   // Microsoft attributes:
   case AttributeList::AT_MSNoVTable:
@@ -5299,26 +5659,50 @@ void Sema::ProcessDeclAttributes(Scope *S, Decl *D, const Declarator &PD) {
 }
 
 /// Is the given declaration allowed to use a forbidden type?
-static bool isForbiddenTypeAllowed(Sema &S, Decl *decl) {
+/// If so, it'll still be annotated with an attribute that makes it
+/// illegal to actually use.
+static bool isForbiddenTypeAllowed(Sema &S, Decl *decl,
+                                   const DelayedDiagnostic &diag,
+                                   UnavailableAttr::ImplicitReason &reason) {
   // Private ivars are always okay.  Unfortunately, people don't
   // always properly make their ivars private, even in system headers.
   // Plus we need to make fields okay, too.
-  // Function declarations in sys headers will be marked unavailable.
   if (!isa<FieldDecl>(decl) && !isa<ObjCPropertyDecl>(decl) &&
       !isa<FunctionDecl>(decl))
     return false;
 
-  // Require it to be declared in a system header.
-  return S.Context.getSourceManager().isInSystemHeader(decl->getLocation());
+  // Silently accept unsupported uses of __weak in both user and system
+  // declarations when it's been disabled, for ease of integration with
+  // -fno-objc-arc files.  We do have to take some care against attempts
+  // to define such things;  for now, we've only done that for ivars
+  // and properties.
+  if ((isa<ObjCIvarDecl>(decl) || isa<ObjCPropertyDecl>(decl))) {
+    if (diag.getForbiddenTypeDiagnostic() == diag::err_arc_weak_disabled ||
+        diag.getForbiddenTypeDiagnostic() == diag::err_arc_weak_no_runtime) {
+      reason = UnavailableAttr::IR_ForbiddenWeak;
+      return true;
+    }
+  }
+
+  // Allow all sorts of things in system headers.
+  if (S.Context.getSourceManager().isInSystemHeader(decl->getLocation())) {
+    // Currently, all the failures dealt with this way are due to ARC
+    // restrictions.
+    reason = UnavailableAttr::IR_ARCForbiddenType;
+    return true;
+  }
+
+  return false;
 }
 
 /// Handle a delayed forbidden-type diagnostic.
 static void handleDelayedForbiddenType(Sema &S, DelayedDiagnostic &diag,
                                        Decl *decl) {
-  if (decl && isForbiddenTypeAllowed(S, decl)) {
-    decl->addAttr(UnavailableAttr::CreateImplicit(S.Context,
-                        "this system declaration uses an unsupported type",
-                        diag.Loc));
+  auto reason = UnavailableAttr::IR_None;
+  if (decl && isForbiddenTypeAllowed(S, decl, diag, reason)) {
+    assert(reason && "didn't set reason?");
+    decl->addAttr(UnavailableAttr::CreateImplicit(S.Context, "", reason,
+                                                  diag.Loc));
     return;
   }
   if (S.getLangOpts().ObjCAutoRefCount)
@@ -5371,6 +5755,7 @@ static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
                                       bool ObjCPropertyAccess) {
   // Diagnostics for deprecated or unavailable.
   unsigned diag, diag_message, diag_fwdclass_message;
+  unsigned diag_available_here = diag::note_availability_specified_here;
 
   // Matches 'diag::note_property_attribute' options.
   unsigned property_note_select;
@@ -5400,6 +5785,50 @@ static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
     diag_fwdclass_message = diag::warn_unavailable_fwdclass_message;
     property_note_select = /* unavailable */ 1;
     available_here_select_kind = /* unavailable */ 0;
+
+    if (auto attr = D->getAttr<UnavailableAttr>()) {
+      if (attr->isImplicit() && attr->getImplicitReason()) {
+        // Most of these failures are due to extra restrictions in ARC;
+        // reflect that in the primary diagnostic when applicable.
+        auto flagARCError = [&] {
+          if (S.getLangOpts().ObjCAutoRefCount &&
+              S.getSourceManager().isInSystemHeader(D->getLocation()))
+            diag = diag::err_unavailable_in_arc;
+        };
+
+        switch (attr->getImplicitReason()) {
+        case UnavailableAttr::IR_None: break;
+
+        case UnavailableAttr::IR_ARCForbiddenType:
+          flagARCError();
+          diag_available_here = diag::note_arc_forbidden_type;
+          break;
+
+        case UnavailableAttr::IR_ForbiddenWeak:
+          if (S.getLangOpts().ObjCWeakRuntime)
+            diag_available_here = diag::note_arc_weak_disabled;
+          else
+            diag_available_here = diag::note_arc_weak_no_runtime;
+          break;
+
+        case UnavailableAttr::IR_ARCForbiddenConversion:
+          flagARCError();
+          diag_available_here = diag::note_performs_forbidden_arc_conversion;
+          break;
+
+        case UnavailableAttr::IR_ARCInitReturnsUnrelated:
+          flagARCError();
+          diag_available_here = diag::note_arc_init_returns_unrelated;
+          break;
+
+        case UnavailableAttr::IR_ARCFieldWithOwnership:
+          flagARCError();
+          diag_available_here = diag::note_arc_field_with_ownership;
+          break;
+        }
+      }
+    }
+
     break;
 
   case Sema::AD_Partial:
@@ -5426,7 +5855,7 @@ static void DoEmitAvailabilityWarning(Sema &S, Sema::AvailabilityDiagnostic K,
     S.Diag(UnknownObjCClass->getLocation(), diag::note_forward_class);
   }
 
-  S.Diag(D->getLocation(), diag::note_availability_specified_here)
+  S.Diag(D->getLocation(), diag_available_here)
       << D << available_here_select_kind;
   if (K == Sema::AD_Partial)
     S.Diag(Loc, diag::note_partial_availability_silence) << D;
diff --git a/lib/Sema/SemaDeclCXX.cpp b/lib/Sema/SemaDeclCXX.cpp
index 0d7cbf45e525..3f6c6b00d902 100644
--- a/lib/Sema/SemaDeclCXX.cpp
+++ b/lib/Sema/SemaDeclCXX.cpp
@@ -162,34 +162,31 @@ Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
 
   ExceptionSpecificationType EST = Proto->getExceptionSpecType();
 
-  // If this function can throw any exceptions, make a note of that.
-  if (EST == EST_MSAny || EST == EST_None) {
-    ClearExceptions();
-    ComputedEST = EST;
-    return;
-  }
-
-  // FIXME: If the call to this decl is using any of its default arguments, we
-  // need to search them for potentially-throwing calls.
-
-  // If this function has a basic noexcept, it doesn't affect the outcome.
-  if (EST == EST_BasicNoexcept)
-    return;
-
   // If we have a throw-all spec at this point, ignore the function.
   if (ComputedEST == EST_None)
     return;
 
+  switch(EST) {
+  // If this function can throw any exceptions, make a note of that.
+  case EST_MSAny:
+  case EST_None:
+    ClearExceptions();
+    ComputedEST = EST;
+    return;
+  // FIXME: If the call to this decl is using any of its default arguments, we
+  // need to search them for potentially-throwing calls.
+  // If this function has a basic noexcept, it doesn't affect the outcome.
+  case EST_BasicNoexcept:
+    return;
   // If we're still at noexcept(true) and there's a nothrow() callee,
   // change to that specification.
-  if (EST == EST_DynamicNone) {
+  case EST_DynamicNone:
     if (ComputedEST == EST_BasicNoexcept)
       ComputedEST = EST_DynamicNone;
     return;
-  }
-
   // Check out noexcept specs.
-  if (EST == EST_ComputedNoexcept) {
+  case EST_ComputedNoexcept:
+  {
     FunctionProtoType::NoexceptResult NR =
         Proto->getNoexceptSpec(Self->Context);
     assert(NR != FunctionProtoType::NR_NoNoexcept &&
@@ -197,7 +194,6 @@ Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
     assert(NR != FunctionProtoType::NR_Dependent &&
            "Should not generate implicit declarations for dependent cases, "
            "and don't know how to handle them anyway.");
-
     // noexcept(false) -> no spec on the new function
     if (NR == FunctionProtoType::NR_Throw) {
       ClearExceptions();
@@ -206,7 +202,9 @@ Sema::ImplicitExceptionSpecification::CalledDecl(SourceLocation CallLoc,
     // noexcept(true) won't change anything either.
     return;
   }
-
+  default:
+    break;
+  }
   assert(EST == EST_Dynamic && "EST case not considered earlier.");
   assert(ComputedEST != EST_None &&
          "Shouldn't collect exceptions when throw-all is guaranteed.");
@@ -1232,9 +1230,9 @@ bool Sema::CheckConstexprFunctionBody(const FunctionDecl *Dcl, Stmt *Body) {
       Diag(Dcl->getLocation(),
            OK ? diag::warn_cxx11_compat_constexpr_body_no_return
               : diag::err_constexpr_body_no_return);
-      return OK;
-    }
-    if (ReturnStmts.size() > 1) {
+      if (!OK)
+        return false;
+    } else if (ReturnStmts.size() > 1) {
       Diag(ReturnStmts.back(),
            getLangOpts().CPlusPlus14
              ? diag::warn_cxx11_compat_constexpr_body_multiple_return
@@ -1555,9 +1553,9 @@ NoteIndirectBases(ASTContext &Context, IndirectBaseSet &Set,
 
 /// \brief Performs the actual work of attaching the given base class
 /// specifiers to a C++ class.
-bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
-                                unsigned NumBases) {
- if (NumBases == 0)
+bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class,
+                                MutableArrayRef<CXXBaseSpecifier *> Bases) {
+ if (Bases.empty())
     return false;
 
   // Used to keep track of which base types we have already seen, so
@@ -1573,7 +1571,7 @@ bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
   // Copy non-redundant base specifiers into permanent storage.
   unsigned NumGoodBases = 0;
   bool Invalid = false;
-  for (unsigned idx = 0; idx < NumBases; ++idx) {
+  for (unsigned idx = 0; idx < Bases.size(); ++idx) {
     QualType NewBaseType
       = Context.getCanonicalType(Bases[idx]->getType());
     NewBaseType = NewBaseType.getLocalUnqualifiedType();
@@ -1599,7 +1597,7 @@ bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
       Bases[NumGoodBases++] = Bases[idx];
 
       // Note this base's direct & indirect bases, if there could be ambiguity.
-      if (NumBases > 1)
+      if (Bases.size() > 1)
         NoteIndirectBases(Context, IndirectBaseTypes, NewBaseType);
       
       if (const RecordType *Record = NewBaseType->getAs<RecordType>()) {
@@ -1621,7 +1619,7 @@ bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
   }
 
   // Attach the remaining base class specifiers to the derived class.
-  Class->setBases(Bases, NumGoodBases);
+  Class->setBases(Bases.data(), NumGoodBases);
   
   for (unsigned idx = 0; idx < NumGoodBases; ++idx) {
     // Check whether this direct base is inaccessible due to ambiguity.
@@ -1656,21 +1654,21 @@ bool Sema::AttachBaseSpecifiers(CXXRecordDecl *Class, CXXBaseSpecifier **Bases,
 /// ActOnBaseSpecifiers - Attach the given base specifiers to the
 /// class, after checking whether there are any duplicate base
 /// classes.
-void Sema::ActOnBaseSpecifiers(Decl *ClassDecl, CXXBaseSpecifier **Bases,
-                               unsigned NumBases) {
-  if (!ClassDecl || !Bases || !NumBases)
+void Sema::ActOnBaseSpecifiers(Decl *ClassDecl,
+                               MutableArrayRef<CXXBaseSpecifier *> Bases) {
+  if (!ClassDecl || Bases.empty())
     return;
 
   AdjustDeclIfTemplate(ClassDecl);
-  AttachBaseSpecifiers(cast<CXXRecordDecl>(ClassDecl), Bases, NumBases);
+  AttachBaseSpecifiers(cast<CXXRecordDecl>(ClassDecl), Bases);
 }
 
 /// \brief Determine whether the type \p Derived is a C++ class that is
 /// derived from the type \p Base.
-bool Sema::IsDerivedFrom(QualType Derived, QualType Base) {
+bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base) {
   if (!getLangOpts().CPlusPlus)
     return false;
-  
+
   CXXRecordDecl *DerivedRD = Derived->getAsCXXRecordDecl();
   if (!DerivedRD)
     return false;
@@ -1684,13 +1682,18 @@ bool Sema::IsDerivedFrom(QualType Derived, QualType Base) {
   if (BaseRD->isInvalidDecl() || DerivedRD->isInvalidDecl())
     return false;
 
-  // FIXME: instantiate DerivedRD if necessary.  We need a PoI for this.
-  return DerivedRD->hasDefinition() && DerivedRD->isDerivedFrom(BaseRD);
+  // FIXME: In a modules build, do we need the entire path to be visible for us
+  // to be able to use the inheritance relationship?
+  if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
+    return false;
+  
+  return DerivedRD->isDerivedFrom(BaseRD);
 }
 
 /// \brief Determine whether the type \p Derived is a C++ class that is
 /// derived from the type \p Base.
-bool Sema::IsDerivedFrom(QualType Derived, QualType Base, CXXBasePaths &Paths) {
+bool Sema::IsDerivedFrom(SourceLocation Loc, QualType Derived, QualType Base,
+                         CXXBasePaths &Paths) {
   if (!getLangOpts().CPlusPlus)
     return false;
   
@@ -1702,6 +1705,9 @@ bool Sema::IsDerivedFrom(QualType Derived, QualType Base, CXXBasePaths &Paths) {
   if (!BaseRD)
     return false;
   
+  if (!isCompleteType(Loc, Derived) && !DerivedRD->isBeingDefined())
+    return false;
+  
   return DerivedRD->isDerivedFrom(BaseRD, Paths);
 }
 
@@ -1749,7 +1755,7 @@ Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
   // explore multiple paths to determine if there is an ambiguity.
   CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                      /*DetectVirtual=*/false);
-  bool DerivationOkay = IsDerivedFrom(Derived, Base, Paths);
+  bool DerivationOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
   assert(DerivationOkay &&
          "Can only be used with a derived-to-base conversion");
   (void)DerivationOkay;
@@ -1783,7 +1789,7 @@ Sema::CheckDerivedToBaseConversion(QualType Derived, QualType Base,
     // performance isn't as much of an issue.
     Paths.clear();
     Paths.setRecordingPaths(true);
-    bool StillOkay = IsDerivedFrom(Derived, Base, Paths);
+    bool StillOkay = IsDerivedFrom(Loc, Derived, Base, Paths);
     assert(StillOkay && "Can only be used with a derived-to-base conversion");
     (void)StillOkay;
 
@@ -2759,7 +2765,8 @@ static bool FindBaseInitializer(Sema &SemaRef,
     // virtual base class.
     CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                        /*DetectVirtual=*/false);
-    if (SemaRef.IsDerivedFrom(SemaRef.Context.getTypeDeclType(ClassDecl), 
+    if (SemaRef.IsDerivedFrom(ClassDecl->getLocation(),
+                              SemaRef.Context.getTypeDeclType(ClassDecl),
                               BaseType, Paths)) {
       for (CXXBasePaths::paths_iterator Path = Paths.begin();
            Path != Paths.end(); ++Path) {
@@ -2982,10 +2989,15 @@ Sema::BuildMemInitializer(Decl *ConstructorD,
     if (BaseType.isNull()) {
       BaseType = Context.getTypeDeclType(TyD);
       MarkAnyDeclReferenced(TyD->getLocation(), TyD, /*OdrUse=*/false);
-      if (SS.isSet())
-        // FIXME: preserve source range information
+      if (SS.isSet()) {
         BaseType = Context.getElaboratedType(ETK_None, SS.getScopeRep(),
                                              BaseType);
+        TInfo = Context.CreateTypeSourceInfo(BaseType);
+        ElaboratedTypeLoc TL = TInfo->getTypeLoc().castAs<ElaboratedTypeLoc>();
+        TL.getNamedTypeLoc().castAs<TypeSpecTypeLoc>().setNameLoc(IdLoc);
+        TL.setElaboratedKeywordLoc(SourceLocation());
+        TL.setQualifierLoc(SS.getWithLocInContext(Context));
+      }
     }
   }
 
@@ -3483,7 +3495,8 @@ BuildImplicitMemberInitializer(Sema &SemaRef, CXXConstructorDecl *Constructor,
                                          /*TemplateKWLoc=*/SourceLocation(),
                                          /*FirstQualifierInScope=*/nullptr,
                                          MemberLookup,
-                                         /*TemplateArgs=*/nullptr);
+                                         /*TemplateArgs=*/nullptr,
+                                         /*S*/nullptr);
     if (CtorArg.isInvalid())
       return true;
 
@@ -4140,7 +4153,7 @@ static void DiagnoseBaseOrMemInitializerOrder(
         if (InitKey == IdealInitKeys[IdealIndex])
           break;
 
-      assert(IdealIndex != NumIdealInits &&
+      assert(IdealIndex < NumIdealInits &&
              "initializer not found in initializer list");
     }
 
@@ -4407,64 +4420,35 @@ void Sema::ActOnDefaultCtorInitializers(Decl *CDtorDecl) {
   }
 }
 
-bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T,
-                                  unsigned DiagID, AbstractDiagSelID SelID) {
-  class NonAbstractTypeDiagnoser : public TypeDiagnoser {
-    unsigned DiagID;
-    AbstractDiagSelID SelID;
-    
-  public:
-    NonAbstractTypeDiagnoser(unsigned DiagID, AbstractDiagSelID SelID)
-      : TypeDiagnoser(DiagID == 0), DiagID(DiagID), SelID(SelID) { }
-
-    void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
-      if (Suppressed) return;
-      if (SelID == -1)
-        S.Diag(Loc, DiagID) << T;
-      else
-        S.Diag(Loc, DiagID) << SelID << T;
-    }
-  } Diagnoser(DiagID, SelID);
-  
-  return RequireNonAbstractType(Loc, T, Diagnoser);
-}
-
-bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T,
-                                  TypeDiagnoser &Diagnoser) {
+bool Sema::isAbstractType(SourceLocation Loc, QualType T) {
   if (!getLangOpts().CPlusPlus)
     return false;
 
-  if (const ArrayType *AT = Context.getAsArrayType(T))
-    return RequireNonAbstractType(Loc, AT->getElementType(), Diagnoser);
-
-  if (const PointerType *PT = T->getAs<PointerType>()) {
-    // Find the innermost pointer type.
-    while (const PointerType *T = PT->getPointeeType()->getAs<PointerType>())
-      PT = T;
-
-    if (const ArrayType *AT = Context.getAsArrayType(PT->getPointeeType()))
-      return RequireNonAbstractType(Loc, AT->getElementType(), Diagnoser);
-  }
-
-  const RecordType *RT = T->getAs<RecordType>();
-  if (!RT)
+  const auto *RD = Context.getBaseElementType(T)->getAsCXXRecordDecl();
+  if (!RD)
     return false;
 
-  const CXXRecordDecl *RD = cast<CXXRecordDecl>(RT->getDecl());
+  // FIXME: Per [temp.inst]p1, we are supposed to trigger instantiation of a
+  // class template specialization here, but doing so breaks a lot of code.
 
   // We can't answer whether something is abstract until it has a
-  // definition.  If it's currently being defined, we'll walk back
+  // definition. If it's currently being defined, we'll walk back
   // over all the declarations when we have a full definition.
   const CXXRecordDecl *Def = RD->getDefinition();
   if (!Def || Def->isBeingDefined())
     return false;
 
-  if (!RD->isAbstract())
+  return RD->isAbstract();
+}
+
+bool Sema::RequireNonAbstractType(SourceLocation Loc, QualType T,
+                                  TypeDiagnoser &Diagnoser) {
+  if (!isAbstractType(Loc, T))
     return false;
 
+  T = Context.getBaseElementType(T);
   Diagnoser.diagnose(*this, Loc, T);
-  DiagnoseAbstractType(RD);
-
+  DiagnoseAbstractType(T->getAsCXXRecordDecl());
   return true;
 }
 
@@ -4684,6 +4668,60 @@ static void CheckAbstractClassUsage(AbstractUsageInfo &Info,
   }
 }
 
+static void ReferenceDllExportedMethods(Sema &S, CXXRecordDecl *Class) {
+  Attr *ClassAttr = getDLLAttr(Class);
+  if (!ClassAttr)
+    return;
+
+  assert(ClassAttr->getKind() == attr::DLLExport);
+
+  TemplateSpecializationKind TSK = Class->getTemplateSpecializationKind();
+
+  if (TSK == TSK_ExplicitInstantiationDeclaration)
+    // Don't go any further if this is just an explicit instantiation
+    // declaration.
+    return;
+
+  for (Decl *Member : Class->decls()) {
+    auto *MD = dyn_cast<CXXMethodDecl>(Member);
+    if (!MD)
+      continue;
+
+    if (Member->getAttr<DLLExportAttr>()) {
+      if (MD->isUserProvided()) {
+        // Instantiate non-default class member functions ...
+
+        // .. except for certain kinds of template specializations.
+        if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
+          continue;
+
+        S.MarkFunctionReferenced(Class->getLocation(), MD);
+
+        // The function will be passed to the consumer when its definition is
+        // encountered.
+      } else if (!MD->isTrivial() || MD->isExplicitlyDefaulted() ||
+                 MD->isCopyAssignmentOperator() ||
+                 MD->isMoveAssignmentOperator()) {
+        // Synthesize and instantiate non-trivial implicit methods, explicitly
+        // defaulted methods, and the copy and move assignment operators. The
+        // latter are exported even if they are trivial, because the address of
+        // an operator can be taken and should compare equal accross libraries.
+        DiagnosticErrorTrap Trap(S.Diags);
+        S.MarkFunctionReferenced(Class->getLocation(), MD);
+        if (Trap.hasErrorOccurred()) {
+          S.Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
+              << Class->getName() << !S.getLangOpts().CPlusPlus11;
+          break;
+        }
+
+        // There is no later point when we will see the definition of this
+        // function, so pass it to the consumer now.
+        S.Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
+      }
+    }
+  }
+}
+
 /// \brief Check class-level dllimport/dllexport attribute.
 void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
   Attr *ClassAttr = getDLLAttr(Class);
@@ -4785,45 +4823,10 @@ void Sema::checkClassLevelDLLAttribute(CXXRecordDecl *Class) {
       NewAttr->setInherited(true);
       Member->addAttr(NewAttr);
     }
-
-    if (MD && ClassExported) {
-      if (TSK == TSK_ExplicitInstantiationDeclaration)
-        // Don't go any further if this is just an explicit instantiation
-        // declaration.
-        continue;
-
-      if (MD->isUserProvided()) {
-        // Instantiate non-default class member functions ...
-
-        // .. except for certain kinds of template specializations.
-        if (TSK == TSK_ImplicitInstantiation && !ClassAttr->isInherited())
-          continue;
-
-        MarkFunctionReferenced(Class->getLocation(), MD);
-
-        // The function will be passed to the consumer when its definition is
-        // encountered.
-      } else if (!MD->isTrivial() || MD->isExplicitlyDefaulted() ||
-                 MD->isCopyAssignmentOperator() ||
-                 MD->isMoveAssignmentOperator()) {
-        // Synthesize and instantiate non-trivial implicit methods, explicitly
-        // defaulted methods, and the copy and move assignment operators. The
-        // latter are exported even if they are trivial, because the address of
-        // an operator can be taken and should compare equal accross libraries.
-        DiagnosticErrorTrap Trap(Diags);
-        MarkFunctionReferenced(Class->getLocation(), MD);
-        if (Trap.hasErrorOccurred()) {
-          Diag(ClassAttr->getLocation(), diag::note_due_to_dllexported_class)
-              << Class->getName() << !getLangOpts().CPlusPlus11;
-          break;
-        }
-
-        // There is no later point when we will see the definition of this
-        // function, so pass it to the consumer now.
-        Consumer.HandleTopLevelDecl(DeclGroupRef(MD));
-      }
-    }
   }
+
+  if (ClassExported)
+    DelayedDllExportClasses.push_back(Class);
 }
 
 /// \brief Perform propagation of DLL attributes from a derived class to a
@@ -5623,7 +5626,9 @@ bool SpecialMemberDeletionInfo::shouldDeleteForClassSubobject(
 /// having a particular direct or virtual base class.
 bool SpecialMemberDeletionInfo::shouldDeleteForBase(CXXBaseSpecifier *Base) {
   CXXRecordDecl *BaseClass = Base->getType()->getAsCXXRecordDecl();
-  return shouldDeleteForClassSubobject(BaseClass, Base, 0);
+  // If program is correct, BaseClass cannot be null, but if it is, the error
+  // must be reported elsewhere.
+  return BaseClass && shouldDeleteForClassSubobject(BaseClass, Base, 0);
 }
 
 /// Check whether we should delete a special member function due to the class
@@ -6270,77 +6275,75 @@ bool Sema::SpecialMemberIsTrivial(CXXMethodDecl *MD, CXXSpecialMember CSM,
   return true;
 }
 
-/// \brief Data used with FindHiddenVirtualMethod
 namespace {
-  struct FindHiddenVirtualMethodData {
-    Sema *S;
-    CXXMethodDecl *Method;
-    llvm::SmallPtrSet<const CXXMethodDecl *, 8> OverridenAndUsingBaseMethods;
-    SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
-  };
-}
+struct FindHiddenVirtualMethod {
+  Sema *S;
+  CXXMethodDecl *Method;
+  llvm::SmallPtrSet<const CXXMethodDecl *, 8> OverridenAndUsingBaseMethods;
+  SmallVector<CXXMethodDecl *, 8> OverloadedMethods;
 
-/// \brief Check whether any most overriden method from MD in Methods
-static bool CheckMostOverridenMethods(const CXXMethodDecl *MD,
-                  const llvm::SmallPtrSetImpl<const CXXMethodDecl *>& Methods) {
-  if (MD->size_overridden_methods() == 0)
-    return Methods.count(MD->getCanonicalDecl());
-  for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
-                                      E = MD->end_overridden_methods();
-       I != E; ++I)
-    if (CheckMostOverridenMethods(*I, Methods))
-      return true;
-  return false;
-}
-
-/// \brief Member lookup function that determines whether a given C++
-/// method overloads virtual methods in a base class without overriding any,
-/// to be used with CXXRecordDecl::lookupInBases().
-static bool FindHiddenVirtualMethod(const CXXBaseSpecifier *Specifier,
-                                    CXXBasePath &Path,
-                                    void *UserData) {
-  RecordDecl *BaseRecord = Specifier->getType()->getAs<RecordType>()->getDecl();
-
-  FindHiddenVirtualMethodData &Data
-    = *static_cast<FindHiddenVirtualMethodData*>(UserData);
-
-  DeclarationName Name = Data.Method->getDeclName();
-  assert(Name.getNameKind() == DeclarationName::Identifier);
-
-  bool foundSameNameMethod = false;
-  SmallVector<CXXMethodDecl *, 8> overloadedMethods;
-  for (Path.Decls = BaseRecord->lookup(Name);
-       !Path.Decls.empty();
-       Path.Decls = Path.Decls.slice(1)) {
-    NamedDecl *D = Path.Decls.front();
-    if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
-      MD = MD->getCanonicalDecl();
-      foundSameNameMethod = true;
-      // Interested only in hidden virtual methods.
-      if (!MD->isVirtual())
-        continue;
-      // If the method we are checking overrides a method from its base
-      // don't warn about the other overloaded methods. Clang deviates from GCC
-      // by only diagnosing overloads of inherited virtual functions that do not
-      // override any other virtual functions in the base. GCC's
-      // -Woverloaded-virtual diagnoses any derived function hiding a virtual
-      // function from a base class. These cases may be better served by a
-      // warning (not specific to virtual functions) on call sites when the call
-      // would select a different function from the base class, were it visible.
-      // See FIXME in test/SemaCXX/warn-overload-virtual.cpp for an example.
-      if (!Data.S->IsOverload(Data.Method, MD, false))
+private:
+  /// Check whether any most overriden method from MD in Methods
+  static bool CheckMostOverridenMethods(
+      const CXXMethodDecl *MD,
+      const llvm::SmallPtrSetImpl<const CXXMethodDecl *> &Methods) {
+    if (MD->size_overridden_methods() == 0)
+      return Methods.count(MD->getCanonicalDecl());
+    for (CXXMethodDecl::method_iterator I = MD->begin_overridden_methods(),
+                                        E = MD->end_overridden_methods();
+         I != E; ++I)
+      if (CheckMostOverridenMethods(*I, Methods))
         return true;
-      // Collect the overload only if its hidden.
-      if (!CheckMostOverridenMethods(MD, Data.OverridenAndUsingBaseMethods))
-        overloadedMethods.push_back(MD);
-    }
+    return false;
   }
 
-  if (foundSameNameMethod)
-    Data.OverloadedMethods.append(overloadedMethods.begin(),
-                                   overloadedMethods.end());
-  return foundSameNameMethod;
-}
+public:
+  /// Member lookup function that determines whether a given C++
+  /// method overloads virtual methods in a base class without overriding any,
+  /// to be used with CXXRecordDecl::lookupInBases().
+  bool operator()(const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+    RecordDecl *BaseRecord =
+        Specifier->getType()->getAs<RecordType>()->getDecl();
+
+    DeclarationName Name = Method->getDeclName();
+    assert(Name.getNameKind() == DeclarationName::Identifier);
+
+    bool foundSameNameMethod = false;
+    SmallVector<CXXMethodDecl *, 8> overloadedMethods;
+    for (Path.Decls = BaseRecord->lookup(Name); !Path.Decls.empty();
+         Path.Decls = Path.Decls.slice(1)) {
+      NamedDecl *D = Path.Decls.front();
+      if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D)) {
+        MD = MD->getCanonicalDecl();
+        foundSameNameMethod = true;
+        // Interested only in hidden virtual methods.
+        if (!MD->isVirtual())
+          continue;
+        // If the method we are checking overrides a method from its base
+        // don't warn about the other overloaded methods. Clang deviates from
+        // GCC by only diagnosing overloads of inherited virtual functions that
+        // do not override any other virtual functions in the base. GCC's
+        // -Woverloaded-virtual diagnoses any derived function hiding a virtual
+        // function from a base class. These cases may be better served by a
+        // warning (not specific to virtual functions) on call sites when the
+        // call would select a different function from the base class, were it
+        // visible.
+        // See FIXME in test/SemaCXX/warn-overload-virtual.cpp for an example.
+        if (!S->IsOverload(Method, MD, false))
+          return true;
+        // Collect the overload only if its hidden.
+        if (!CheckMostOverridenMethods(MD, OverridenAndUsingBaseMethods))
+          overloadedMethods.push_back(MD);
+      }
+    }
+
+    if (foundSameNameMethod)
+      OverloadedMethods.append(overloadedMethods.begin(),
+                               overloadedMethods.end());
+    return foundSameNameMethod;
+  }
+};
+} // end anonymous namespace
 
 /// \brief Add the most overriden methods from MD to Methods
 static void AddMostOverridenMethods(const CXXMethodDecl *MD,
@@ -6363,9 +6366,9 @@ void Sema::FindHiddenVirtualMethods(CXXMethodDecl *MD,
   CXXBasePaths Paths(/*FindAmbiguities=*/true, // true to look in all bases.
                      /*bool RecordPaths=*/false,
                      /*bool DetectVirtual=*/false);
-  FindHiddenVirtualMethodData Data;
-  Data.Method = MD;
-  Data.S = this;
+  FindHiddenVirtualMethod FHVM;
+  FHVM.Method = MD;
+  FHVM.S = this;
 
   // Keep the base methods that were overriden or introduced in the subclass
   // by 'using' in a set. A base method not in this set is hidden.
@@ -6376,11 +6379,11 @@ void Sema::FindHiddenVirtualMethods(CXXMethodDecl *MD,
     if (UsingShadowDecl *shad = dyn_cast<UsingShadowDecl>(*I))
       ND = shad->getTargetDecl();
     if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(ND))
-      AddMostOverridenMethods(MD, Data.OverridenAndUsingBaseMethods);
+      AddMostOverridenMethods(MD, FHVM.OverridenAndUsingBaseMethods);
   }
 
-  if (DC->lookupInBases(&FindHiddenVirtualMethod, &Data, Paths))
-    OverloadedMethods = Data.OverloadedMethods;
+  if (DC->lookupInBases(FHVM, Paths))
+    OverloadedMethods = FHVM.OverloadedMethods;
 }
 
 void Sema::NoteHiddenVirtualMethods(CXXMethodDecl *MD,
@@ -6897,7 +6900,7 @@ QualType Sema::CheckDestructorDeclarator(Declarator &D, QualType R,
   return Context.getFunctionType(Context.VoidTy, None, EPI);
 }
 
-static void extendLeft(SourceRange &R, const SourceRange &Before) {
+static void extendLeft(SourceRange &R, SourceRange Before) {
   if (Before.isInvalid())
     return;
   R.setBegin(Before.getBegin());
@@ -6905,7 +6908,7 @@ static void extendLeft(SourceRange &R, const SourceRange &Before) {
     R.setEnd(Before.getEnd());
 }
 
-static void extendRight(SourceRange &R, const SourceRange &After) {
+static void extendRight(SourceRange &R, SourceRange After) {
   if (After.isInvalid())
     return;
   if (R.getBegin().isInvalid())
@@ -7019,7 +7022,7 @@ void Sema::CheckConversionDeclarator(Declarator &D, QualType &R,
       // If we can provide a correct fix-it hint, do so.
       if (After.isInvalid() && ConvTSI) {
         SourceLocation InsertLoc =
-            PP.getLocForEndOfToken(ConvTSI->getTypeLoc().getLocEnd());
+            getLocForEndOfToken(ConvTSI->getTypeLoc().getLocEnd());
         DB << FixItHint::CreateInsertion(InsertLoc, " ")
            << FixItHint::CreateInsertionFromRange(
                   InsertLoc, CharSourceRange::getTokenRange(Before))
@@ -7102,7 +7105,7 @@ Decl *Sema::ActOnConversionDeclarator(CXXConversionDecl *Conversion) {
     if (ConvType == ClassType)
       Diag(Conversion->getLocation(), diag::warn_conv_to_self_not_used)
         << ClassType;
-    else if (IsDerivedFrom(ClassType, ConvType))
+    else if (IsDerivedFrom(Conversion->getLocation(), ClassType, ConvType))
       Diag(Conversion->getLocation(), diag::warn_conv_to_base_not_used)
         <<  ClassType << ConvType;
   } else if (ConvType->isVoidType()) {
@@ -7169,7 +7172,8 @@ Decl *Sema::ActOnStartNamespaceDef(Scope *NamespcScope,
                                    SourceLocation IdentLoc,
                                    IdentifierInfo *II,
                                    SourceLocation LBrace,
-                                   AttributeList *AttrList) {
+                                   AttributeList *AttrList,
+                                   UsingDirectiveDecl *&UD) {
   SourceLocation StartLoc = InlineLoc.isValid() ? InlineLoc : NamespaceLoc;
   // For anonymous namespace, take the location of the left brace.
   SourceLocation Loc = II ? IdentLoc : LBrace;
@@ -7190,23 +7194,14 @@ Decl *Sema::ActOnStartNamespaceDef(Scope *NamespcScope,
     //   treated as an original-namespace-name.
     //
     // Since namespace names are unique in their scope, and we don't
-    // look through using directives, just look for any ordinary names.
-    
-    const unsigned IDNS = Decl::IDNS_Ordinary | Decl::IDNS_Member | 
-    Decl::IDNS_Type | Decl::IDNS_Using | Decl::IDNS_Tag | 
-    Decl::IDNS_Namespace;
-    NamedDecl *PrevDecl = nullptr;
-    DeclContext::lookup_result R = CurContext->getRedeclContext()->lookup(II);
-    for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E;
-         ++I) {
-      if ((*I)->getIdentifierNamespace() & IDNS) {
-        PrevDecl = *I;
-        break;
-      }
-    }
-    
+    // look through using directives, just look for any ordinary names
+    // as if by qualified name lookup.
+    LookupResult R(*this, II, IdentLoc, LookupOrdinaryName, ForRedeclaration);
+    LookupQualifiedName(R, CurContext->getRedeclContext());
+    NamedDecl *PrevDecl =
+        R.isSingleResult() ? R.getRepresentativeDecl() : nullptr;
     PrevNS = dyn_cast_or_null<NamespaceDecl>(PrevDecl);
-    
+
     if (PrevNS) {
       // This is an extended namespace definition.
       if (IsInline != PrevNS->isInline())
@@ -7293,14 +7288,13 @@ Decl *Sema::ActOnStartNamespaceDef(Scope *NamespcScope,
     // namespace internal linkage.
 
     if (!PrevNS) {
-      UsingDirectiveDecl* UD
-        = UsingDirectiveDecl::Create(Context, Parent,
-                                     /* 'using' */ LBrace,
-                                     /* 'namespace' */ SourceLocation(),
-                                     /* qualifier */ NestedNameSpecifierLoc(),
-                                     /* identifier */ SourceLocation(),
-                                     Namespc,
-                                     /* Ancestor */ Parent);
+      UD = UsingDirectiveDecl::Create(Context, Parent,
+                                      /* 'using' */ LBrace,
+                                      /* 'namespace' */ SourceLocation(),
+                                      /* qualifier */ NestedNameSpecifierLoc(),
+                                      /* identifier */ SourceLocation(),
+                                      Namespc,
+                                      /* Ancestor */ Parent);
       UD->setImplicit();
       Parent->addDecl(UD);
     }
@@ -7538,7 +7532,7 @@ static bool TryNamespaceTypoCorrection(Sema &S, LookupResult &R, Scope *Sc,
                      S.PDiag(diag::err_using_directive_suggest) << Ident,
                      S.PDiag(diag::note_namespace_defined_here));
     }
-    R.addDecl(Corrected.getCorrectionDecl());
+    R.addDecl(Corrected.getFoundDecl());
     return true;
   }
   return false;
@@ -7556,7 +7550,7 @@ Decl *Sema::ActOnUsingDirective(Scope *S,
   assert(IdentLoc.isValid() && "Invalid NamespceName location.");
 
   // This can only happen along a recovery path.
-  while (S->getFlags() & Scope::TemplateParamScope)
+  while (S->isTemplateParamScope())
     S = S->getParent();
   assert(S->getFlags() & Scope::DeclScope && "Invalid Scope.");
 
@@ -7586,9 +7580,9 @@ Decl *Sema::ActOnUsingDirective(Scope *S,
   }
   
   if (!R.empty()) {
-    NamedDecl *Named = R.getFoundDecl();
-    assert((isa<NamespaceDecl>(Named) || isa<NamespaceAliasDecl>(Named))
-        && "expected namespace decl");
+    NamedDecl *Named = R.getRepresentativeDecl();
+    NamespaceDecl *NS = R.getAsSingle<NamespaceDecl>();
+    assert(NS && "expected namespace decl");
 
     // The use of a nested name specifier may trigger deprecation warnings.
     DiagnoseUseOfDecl(Named, IdentLoc);
@@ -7605,7 +7599,6 @@ Decl *Sema::ActOnUsingDirective(Scope *S,
 
     // Find enclosing context containing both using-directive and
     // nominated namespace.
-    NamespaceDecl *NS = getNamespaceDecl(Named);
     DeclContext *CommonAncestor = cast<DeclContext>(NS);
     while (CommonAncestor && !CommonAncestor->Encloses(CurContext))
       CommonAncestor = CommonAncestor->getParent();
@@ -7805,7 +7798,8 @@ bool Sema::CheckUsingShadowDecl(UsingDecl *Using, NamedDecl *Orig,
       FoundEquivalentDecl = true;
     }
 
-    (isa<TagDecl>(D) ? Tag : NonTag) = D;
+    if (isVisible(D))
+      (isa<TagDecl>(D) ? Tag : NonTag) = D;
   }
 
   if (FoundEquivalentDecl)
@@ -8014,6 +8008,10 @@ public:
 
         // FIXME: Check that the base class member is accessible?
       }
+    } else {
+      auto *FoundRecord = dyn_cast<CXXRecordDecl>(ND);
+      if (FoundRecord && FoundRecord->isInjectedClassName())
+        return false;
     }
 
     if (isa<TypeDecl>(ND))
@@ -8377,7 +8375,7 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
         } else {
           // Convert 'using X::Y;' to 'typedef X::Y Y;'.
           SourceLocation InsertLoc =
-              PP.getLocForEndOfToken(NameInfo.getLocEnd());
+              getLocForEndOfToken(NameInfo.getLocEnd());
           Diag(InsertLoc, diag::note_using_decl_class_member_workaround)
             << 1 // typedef declaration
             << FixItHint::CreateReplacement(UsingLoc, "typedef")
@@ -8470,40 +8468,26 @@ bool Sema::CheckUsingDeclQualifier(SourceLocation UsingLoc,
   // in the UsingDecl and UsingShadowDecl so that these checks didn't
   // need to be repeated.
 
-  struct UserData {
-    llvm::SmallPtrSet<const CXXRecordDecl*, 4> Bases;
-
-    static bool collect(const CXXRecordDecl *Base, void *OpaqueData) {
-      UserData *Data = reinterpret_cast<UserData*>(OpaqueData);
-      Data->Bases.insert(Base);
-      return true;
-    }
-
-    bool hasDependentBases(const CXXRecordDecl *Class) {
-      return !Class->forallBases(collect, this);
-    }
-
-    /// Returns true if the base is dependent or is one of the
-    /// accumulated base classes.
-    static bool doesNotContain(const CXXRecordDecl *Base, void *OpaqueData) {
-      UserData *Data = reinterpret_cast<UserData*>(OpaqueData);
-      return !Data->Bases.count(Base);
-    }
-
-    bool mightShareBases(const CXXRecordDecl *Class) {
-      return Bases.count(Class) || !Class->forallBases(doesNotContain, this);
-    }
+  llvm::SmallPtrSet<const CXXRecordDecl *, 4> Bases;
+  auto Collect = [&Bases](const CXXRecordDecl *Base) {
+    Bases.insert(Base);
+    return true;
   };
 
-  UserData Data;
-
-  // Returns false if we find a dependent base.
-  if (Data.hasDependentBases(cast<CXXRecordDecl>(CurContext)))
+  // Collect all bases. Return false if we find a dependent base.
+  if (!cast<CXXRecordDecl>(CurContext)->forallBases(Collect))
     return false;
 
-  // Returns false if the class has a dependent base or if it or one
+  // Returns true if the base is dependent or is one of the accumulated base
+  // classes.
+  auto IsNotBase = [&Bases](const CXXRecordDecl *Base) {
+    return !Bases.count(Base);
+  };
+
+  // Return false if the class has a dependent base or if it or one
   // of its bases is present in the base set of the current context.
-  if (Data.mightShareBases(cast<CXXRecordDecl>(NamedContext)))
+  if (Bases.count(cast<CXXRecordDecl>(NamedContext)) ||
+      !cast<CXXRecordDecl>(NamedContext)->forallBases(IsNotBase))
     return false;
 
   Diag(SS.getRange().getBegin(),
@@ -8524,7 +8508,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S,
                                   TypeResult Type,
                                   Decl *DeclFromDeclSpec) {
   // Skip up to the relevant declaration scope.
-  while (S->getFlags() & Scope::TemplateParamScope)
+  while (S->isTemplateParamScope())
     S = S->getParent();
   assert((S->getFlags() & Scope::DeclScope) &&
          "got alias-declaration outside of declaration scope");
@@ -8685,28 +8669,41 @@ Decl *Sema::ActOnNamespaceAliasDef(Scope *S, SourceLocation NamespaceLoc,
     }
   }
   assert(!R.isAmbiguous() && !R.empty());
+  NamedDecl *ND = R.getRepresentativeDecl();
 
   // Check if we have a previous declaration with the same name.
-  NamedDecl *PrevDecl = LookupSingleName(S, Alias, AliasLoc, LookupOrdinaryName,
-                                         ForRedeclaration);
-  if (PrevDecl && !isDeclInScope(PrevDecl, CurContext, S))
-    PrevDecl = nullptr;
+  LookupResult PrevR(*this, Alias, AliasLoc, LookupOrdinaryName,
+                     ForRedeclaration);
+  LookupName(PrevR, S);
 
-  NamedDecl *ND = R.getFoundDecl();
+  // Check we're not shadowing a template parameter.
+  if (PrevR.isSingleResult() && PrevR.getFoundDecl()->isTemplateParameter()) {
+    DiagnoseTemplateParameterShadow(AliasLoc, PrevR.getFoundDecl());
+    PrevR.clear();
+  }
 
-  if (PrevDecl) {
+  // Filter out any other lookup result from an enclosing scope.
+  FilterLookupForScope(PrevR, CurContext, S, /*ConsiderLinkage*/false,
+                       /*AllowInlineNamespace*/false);
+
+  // Find the previous declaration and check that we can redeclare it.
+  NamespaceAliasDecl *Prev = nullptr; 
+  if (PrevR.isSingleResult()) {
+    NamedDecl *PrevDecl = PrevR.getRepresentativeDecl();
     if (NamespaceAliasDecl *AD = dyn_cast<NamespaceAliasDecl>(PrevDecl)) {
       // We already have an alias with the same name that points to the same
       // namespace; check that it matches.
-      if (!AD->getNamespace()->Equals(getNamespaceDecl(ND))) {
+      if (AD->getNamespace()->Equals(getNamespaceDecl(ND))) {
+        Prev = AD;
+      } else if (isVisible(PrevDecl)) {
         Diag(AliasLoc, diag::err_redefinition_different_namespace_alias)
           << Alias;
-        Diag(PrevDecl->getLocation(), diag::note_previous_namespace_alias)
+        Diag(AD->getLocation(), diag::note_previous_namespace_alias)
           << AD->getNamespace();
         return nullptr;
       }
-    } else {
-      unsigned DiagID = isa<NamespaceDecl>(PrevDecl)
+    } else if (isVisible(PrevDecl)) {
+      unsigned DiagID = isa<NamespaceDecl>(PrevDecl->getUnderlyingDecl())
                             ? diag::err_redefinition
                             : diag::err_redefinition_different_kind;
       Diag(AliasLoc, DiagID) << Alias;
@@ -8722,8 +8719,8 @@ Decl *Sema::ActOnNamespaceAliasDef(Scope *S, SourceLocation NamespaceLoc,
     NamespaceAliasDecl::Create(Context, CurContext, NamespaceLoc, AliasLoc,
                                Alias, SS.getWithLocInContext(Context),
                                IdentLoc, ND);
-  if (PrevDecl)
-    AliasDecl->setPreviousDecl(cast<NamespaceAliasDecl>(PrevDecl));
+  if (Prev)
+    AliasDecl->setPreviousDecl(Prev);
 
   PushOnScopeChains(AliasDecl, S);
   return AliasDecl;
@@ -9497,7 +9494,7 @@ static void getDefaultArgExprsForConstructors(Sema &S, CXXRecordDecl *Class) {
   }
 }
 
-void Sema::ActOnFinishCXXMemberDefaultArgs(Decl *D) {
+void Sema::ActOnFinishCXXNonNestedClass(Decl *D) {
   auto *RD = dyn_cast<CXXRecordDecl>(D);
 
   // Default constructors that are annotated with __declspec(dllexport) which
@@ -9505,6 +9502,15 @@ void Sema::ActOnFinishCXXMemberDefaultArgs(Decl *D) {
   // wrapped with a thunk called the default constructor closure.
   if (RD && Context.getTargetInfo().getCXXABI().isMicrosoft())
     getDefaultArgExprsForConstructors(*this, RD);
+
+  if (!DelayedDllExportClasses.empty()) {
+    // Calling ReferenceDllExportedMethods might cause the current function to
+    // be called again, so use a local copy of DelayedDllExportClasses.
+    SmallVector<CXXRecordDecl *, 4> WorkList;
+    std::swap(DelayedDllExportClasses, WorkList);
+    for (CXXRecordDecl *Class : WorkList)
+      ReferenceDllExportedMethods(*this, Class);
+  }
 }
 
 void Sema::AdjustDestructorExceptionSpec(CXXRecordDecl *ClassDecl,
@@ -9618,7 +9624,7 @@ public:
   Expr *build(Sema &S, SourceLocation Loc) const override {
     return assertNotNull(S.BuildMemberReferenceExpr(
         Builder.build(S, Loc), Type, Loc, IsArrow, SS, SourceLocation(),
-        nullptr, MemberLookup, nullptr).get());
+        nullptr, MemberLookup, nullptr, nullptr).get());
   }
 
   MemberBuilder(const ExprBuilder &Builder, QualType Type, bool IsArrow,
@@ -9828,7 +9834,7 @@ buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
                                    SS, /*TemplateKWLoc=*/SourceLocation(),
                                    /*FirstQualifierInScope=*/nullptr,
                                    OpLookup,
-                                   /*TemplateArgs=*/nullptr,
+                                   /*TemplateArgs=*/nullptr, /*S*/nullptr,
                                    /*SuppressQualifierCheck=*/true);
     if (OpEqualRef.isInvalid())
       return StmtError();
@@ -12207,7 +12213,7 @@ FriendDecl *Sema::CheckFriendTypeDecl(SourceLocation LocStart,
                diag::ext_unelaborated_friend_type)
           << (unsigned) RD->getTagKind()
           << T
-          << FixItHint::CreateInsertion(PP.getLocForEndOfToken(FriendLoc),
+          << FixItHint::CreateInsertion(getLocForEndOfToken(FriendLoc),
                                         InsertionText);
       } else {
         Diag(FriendLoc,
@@ -12673,15 +12679,30 @@ NamedDecl *Sema::ActOnFriendFunctionDecl(Scope *S, Declarator &D,
     DC = CurContext;
     assert(isa<CXXRecordDecl>(DC) && "friend declaration not in class?");
   }
-  
+
   if (!DC->isRecord()) {
+    int DiagArg = -1;
+    switch (D.getName().getKind()) {
+    case UnqualifiedId::IK_ConstructorTemplateId:
+    case UnqualifiedId::IK_ConstructorName:
+      DiagArg = 0;
+      break;
+    case UnqualifiedId::IK_DestructorName:
+      DiagArg = 1;
+      break;
+    case UnqualifiedId::IK_ConversionFunctionId:
+      DiagArg = 2;
+      break;
+    case UnqualifiedId::IK_Identifier:
+    case UnqualifiedId::IK_ImplicitSelfParam:
+    case UnqualifiedId::IK_LiteralOperatorId:
+    case UnqualifiedId::IK_OperatorFunctionId:
+    case UnqualifiedId::IK_TemplateId:
+      break;
+    }
     // This implies that it has to be an operator or function.
-    if (D.getName().getKind() == UnqualifiedId::IK_ConstructorName ||
-        D.getName().getKind() == UnqualifiedId::IK_DestructorName ||
-        D.getName().getKind() == UnqualifiedId::IK_ConversionFunctionId) {
-      Diag(Loc, diag::err_introducing_special_friend) <<
-        (D.getName().getKind() == UnqualifiedId::IK_ConstructorName ? 0 :
-         D.getName().getKind() == UnqualifiedId::IK_DestructorName ? 1 : 2);
+    if (DiagArg >= 0) {
+      Diag(Loc, diag::err_introducing_special_friend) << DiagArg;
       return nullptr;
     }
   }
@@ -12983,7 +13004,7 @@ bool Sema::CheckOverridingFunctionReturnType(const CXXMethodDecl *New,
 
   if (!Context.hasSameUnqualifiedType(NewClassTy, OldClassTy)) {
     // Check if the new class derives from the old class.
-    if (!IsDerivedFrom(NewClassTy, OldClassTy)) {
+    if (!IsDerivedFrom(New->getLocation(), NewClassTy, OldClassTy)) {
       Diag(New->getLocation(), diag::err_covariant_return_not_derived)
           << New->getDeclName() << NewTy << OldTy
           << New->getReturnTypeSourceRange();
diff --git a/lib/Sema/SemaDeclObjC.cpp b/lib/Sema/SemaDeclObjC.cpp
index f42c4b7546ce..a2f41a7cc30a 100644
--- a/lib/Sema/SemaDeclObjC.cpp
+++ b/lib/Sema/SemaDeclObjC.cpp
@@ -15,12 +15,11 @@
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
-#include "clang/AST/DataRecursiveASTVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/DeclObjC.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/Basic/SourceManager.h"
-#include "clang/Lex/Preprocessor.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/ExternalSemaSource.h"
 #include "clang/Sema/Lookup.h"
@@ -100,9 +99,8 @@ bool Sema::checkInitMethod(ObjCMethodDecl *method,
   // If we're in a system header, and this is not a call, just make
   // the method unusable.
   if (receiverTypeIfCall.isNull() && getSourceManager().isInSystemHeader(loc)) {
-    method->addAttr(UnavailableAttr::CreateImplicit(Context,
-                "init method returns a type unrelated to its receiver type",
-                loc));
+    method->addAttr(UnavailableAttr::CreateImplicit(Context, "",
+                      UnavailableAttr::IR_ARCInitReturnsUnrelated, loc));
     return true;
   }
 
@@ -449,7 +447,7 @@ class ObjCInterfaceValidatorCCC : public CorrectionCandidateCallback {
   ObjCInterfaceDecl *CurrentIDecl;
 };
 
-}
+} // end anonymous namespace
 
 static void diagnoseUseOfProtocols(Sema &TheSema,
                                    ObjCContainerDecl *CD,
@@ -484,7 +482,7 @@ ActOnSuperClassOfClassInterface(Scope *S,
     if (TypoCorrection Corrected = CorrectTypo(
             DeclarationNameInfo(SuperName, SuperLoc),
             LookupOrdinaryName, TUScope,
-            NULL, llvm::make_unique<ObjCInterfaceValidatorCCC>(IDecl),
+            nullptr, llvm::make_unique<ObjCInterfaceValidatorCCC>(IDecl),
             CTK_ErrorRecovery)) {
       diagnoseTypo(Corrected, PDiag(diag::err_undef_superclass_suggest)
                    << SuperName << ClassName);
@@ -507,7 +505,7 @@ ActOnSuperClassOfClassInterface(Scope *S,
       SuperClassType = Context.getObjCInterfaceType(SuperClassDecl);
     }
 
-    if (PrevDecl && SuperClassDecl == 0) {
+    if (PrevDecl && !SuperClassDecl) {
       // The previous declaration was not a class decl. Check if we have a
       // typedef. If we do, get the underlying class type.
       if (const TypedefNameDecl *TDecl =
@@ -548,7 +546,7 @@ ActOnSuperClassOfClassInterface(Scope *S,
                                    SuperClassDecl->getDeclName(),
                                    ClassName,
                                    SourceRange(AtInterfaceLoc, ClassLoc))) {
-        SuperClassDecl = 0;
+        SuperClassDecl = nullptr;
         SuperClassType = QualType();
       }
     }
@@ -608,7 +606,7 @@ DeclResult Sema::actOnObjCTypeParam(Scope *S,
     } else if (typeBound->isObjCObjectType()) {
       // The user forgot the * on an Objective-C pointer type, e.g.,
       // "T : NSView".
-      SourceLocation starLoc = PP.getLocForEndOfToken(
+      SourceLocation starLoc = getLocForEndOfToken(
                                  typeBoundInfo->getTypeLoc().getEndLoc());
       Diag(typeBoundInfo->getTypeLoc().getBeginLoc(),
            diag::err_objc_type_param_bound_missing_pointer)
@@ -638,20 +636,44 @@ DeclResult Sema::actOnObjCTypeParam(Scope *S,
       typeBoundInfo = nullptr;
     }
 
-    // Type bounds cannot have explicit nullability.
+    // Type bounds cannot have qualifiers (even indirectly) or explicit
+    // nullability.
     if (typeBoundInfo) {
-      // Type arguments cannot explicitly specify nullability.
-      if (auto nullability = AttributedType::stripOuterNullability(typeBound)) {
-        // Look at the type location information to find the nullability
-        // specifier so we can zap it.
-        SourceLocation nullabilityLoc
-          = typeBoundInfo->getTypeLoc().findNullabilityLoc();
-        SourceLocation diagLoc
-          = nullabilityLoc.isValid()? nullabilityLoc
-                                    : typeBoundInfo->getTypeLoc().getLocStart();
-        Diag(diagLoc, diag::err_type_param_bound_explicit_nullability)
-          << paramName << typeBoundInfo->getType()
-          << FixItHint::CreateRemoval(nullabilityLoc);
+      QualType typeBound = typeBoundInfo->getType();
+      TypeLoc qual = typeBoundInfo->getTypeLoc().findExplicitQualifierLoc();
+      if (qual || typeBound.hasQualifiers()) {
+        bool diagnosed = false;
+        SourceRange rangeToRemove;
+        if (qual) {
+          if (auto attr = qual.getAs<AttributedTypeLoc>()) {
+            rangeToRemove = attr.getLocalSourceRange();
+            if (attr.getTypePtr()->getImmediateNullability()) {
+              Diag(attr.getLocStart(),
+                   diag::err_objc_type_param_bound_explicit_nullability)
+                << paramName << typeBound
+                << FixItHint::CreateRemoval(rangeToRemove);
+              diagnosed = true;
+            }
+          }
+        }
+
+        if (!diagnosed) {
+          Diag(qual ? qual.getLocStart()
+                    : typeBoundInfo->getTypeLoc().getLocStart(),
+              diag::err_objc_type_param_bound_qualified)
+            << paramName << typeBound << typeBound.getQualifiers().getAsString()
+            << FixItHint::CreateRemoval(rangeToRemove);
+        }
+
+        // If the type bound has qualifiers other than CVR, we need to strip
+        // them or we'll probably assert later when trying to apply new
+        // qualifiers.
+        Qualifiers quals = typeBound.getQualifiers();
+        quals.removeCVRQualifiers();
+        if (!quals.empty()) {
+          typeBoundInfo =
+             Context.getTrivialTypeSourceInfo(typeBound.getUnqualifiedType());
+        }
       }
     }
   }
@@ -722,7 +744,7 @@ namespace {
     Category,
     Extension
   };
-}
+} // end anonymous namespace
 
 /// Check consistency between two Objective-C type parameter lists, e.g.,
 /// between a category/extension and an \@interface or between an \@class and an
@@ -737,7 +759,7 @@ static bool checkTypeParamListConsistency(Sema &S,
     if (newTypeParams->size() > prevTypeParams->size()) {
       diagLoc = newTypeParams->begin()[prevTypeParams->size()]->getLocation();
     } else {
-      diagLoc = S.PP.getLocForEndOfToken(newTypeParams->back()->getLocEnd());
+      diagLoc = S.getLocForEndOfToken(newTypeParams->back()->getLocEnd());
     }
 
     S.Diag(diagLoc, diag::err_objc_type_param_arity_mismatch)
@@ -852,7 +874,7 @@ static bool checkTypeParamListConsistency(Sema &S,
         newContext == TypeParamListContext::Definition) {
       // Diagnose this problem for forward declarations and definitions.
       SourceLocation insertionLoc
-        = S.PP.getLocForEndOfToken(newTypeParam->getLocation());
+        = S.getLocForEndOfToken(newTypeParam->getLocation());
       std::string newCode
         = " : " + prevTypeParam->getUnderlyingType().getAsString(
                     S.Context.getPrintingPolicy());
@@ -1184,26 +1206,23 @@ static bool NestedProtocolHasNoDefinition(ObjCProtocolDecl *PDecl,
 /// protocol declarations in its 'Protocols' argument.
 void
 Sema::FindProtocolDeclaration(bool WarnOnDeclarations, bool ForObjCContainer,
-                              const IdentifierLocPair *ProtocolId,
-                              unsigned NumProtocols,
+                              ArrayRef<IdentifierLocPair> ProtocolId,
                               SmallVectorImpl<Decl *> &Protocols) {
-  for (unsigned i = 0; i != NumProtocols; ++i) {
-    ObjCProtocolDecl *PDecl = LookupProtocol(ProtocolId[i].first,
-                                             ProtocolId[i].second);
+  for (const IdentifierLocPair &Pair : ProtocolId) {
+    ObjCProtocolDecl *PDecl = LookupProtocol(Pair.first, Pair.second);
     if (!PDecl) {
       TypoCorrection Corrected = CorrectTypo(
-          DeclarationNameInfo(ProtocolId[i].first, ProtocolId[i].second),
+          DeclarationNameInfo(Pair.first, Pair.second),
           LookupObjCProtocolName, TUScope, nullptr,
           llvm::make_unique<DeclFilterCCC<ObjCProtocolDecl>>(),
           CTK_ErrorRecovery);
       if ((PDecl = Corrected.getCorrectionDeclAs<ObjCProtocolDecl>()))
         diagnoseTypo(Corrected, PDiag(diag::err_undeclared_protocol_suggest)
-                                    << ProtocolId[i].first);
+                                    << Pair.first);
     }
 
     if (!PDecl) {
-      Diag(ProtocolId[i].second, diag::err_undeclared_protocol)
-        << ProtocolId[i].first;
+      Diag(Pair.second, diag::err_undeclared_protocol) << Pair.first;
       continue;
     }
     // If this is a forward protocol declaration, get its definition.
@@ -1213,7 +1232,7 @@ Sema::FindProtocolDeclaration(bool WarnOnDeclarations, bool ForObjCContainer,
     // For an objc container, delay protocol reference checking until after we
     // can set the objc decl as the availability context, otherwise check now.
     if (!ForObjCContainer) {
-      (void)DiagnoseUseOfDecl(PDecl, ProtocolId[i].second);
+      (void)DiagnoseUseOfDecl(PDecl, Pair.second);
     }
 
     // If this is a forward declaration and we are supposed to warn in this
@@ -1223,8 +1242,7 @@ Sema::FindProtocolDeclaration(bool WarnOnDeclarations, bool ForObjCContainer,
     
     if (WarnOnDeclarations &&
         NestedProtocolHasNoDefinition(PDecl, UndefinedProtocol)) {
-      Diag(ProtocolId[i].second, diag::warn_undef_protocolref)
-        << ProtocolId[i].first;
+      Diag(Pair.second, diag::warn_undef_protocolref) << Pair.first;
       Diag(UndefinedProtocol->getLocation(), diag::note_protocol_decl_undefined)
         << UndefinedProtocol;
     }
@@ -1388,8 +1406,8 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
       if (allProtocolsDeclared) {
         Diag(firstClassNameLoc, diag::warn_objc_redundant_qualified_class_type)
           << baseClass->getDeclName() << SourceRange(lAngleLoc, rAngleLoc)
-          << FixItHint::CreateInsertion(
-               PP.getLocForEndOfToken(firstClassNameLoc), " *");
+          << FixItHint::CreateInsertion(getLocForEndOfToken(firstClassNameLoc),
+                                        " *");
       }
     }
 
@@ -1469,15 +1487,15 @@ void Sema::actOnObjCTypeArgsOrProtocolQualifiers(
     // If we have a typedef of an Objective-C class type that is missing a '*',
     // add the '*'.
     if (type->getAs<ObjCInterfaceType>()) {
-      SourceLocation starLoc = PP.getLocForEndOfToken(loc);
+      SourceLocation starLoc = getLocForEndOfToken(loc);
       ParsedAttributes parsedAttrs(attrFactory);
       D.AddTypeInfo(DeclaratorChunk::getPointer(/*typeQuals=*/0, starLoc,
                                                 SourceLocation(),
                                                 SourceLocation(),
                                                 SourceLocation(),
                                                 SourceLocation()),
-                    parsedAttrs,
-                    starLoc);
+                                                parsedAttrs,
+                                                starLoc);
 
       // Diagnose the missing '*'.
       Diag(loc, diag::err_objc_type_arg_missing_star)
@@ -1655,17 +1673,16 @@ void Sema::DiagnoseClassExtensionDupMethods(ObjCCategoryDecl *CAT,
 /// ActOnForwardProtocolDeclaration - Handle \@protocol foo;
 Sema::DeclGroupPtrTy
 Sema::ActOnForwardProtocolDeclaration(SourceLocation AtProtocolLoc,
-                                      const IdentifierLocPair *IdentList,
-                                      unsigned NumElts,
+                                      ArrayRef<IdentifierLocPair> IdentList,
                                       AttributeList *attrList) {
   SmallVector<Decl *, 8> DeclsInGroup;
-  for (unsigned i = 0; i != NumElts; ++i) {
-    IdentifierInfo *Ident = IdentList[i].first;
-    ObjCProtocolDecl *PrevDecl = LookupProtocol(Ident, IdentList[i].second,
+  for (const IdentifierLocPair &IdentPair : IdentList) {
+    IdentifierInfo *Ident = IdentPair.first;
+    ObjCProtocolDecl *PrevDecl = LookupProtocol(Ident, IdentPair.second,
                                                 ForRedeclaration);
     ObjCProtocolDecl *PDecl
       = ObjCProtocolDecl::Create(Context, CurContext, Ident, 
-                                 IdentList[i].second, AtProtocolLoc,
+                                 IdentPair.second, AtProtocolLoc,
                                  PrevDecl);
         
     PushOnScopeChains(PDecl, TUScope);
@@ -1850,6 +1867,8 @@ Decl *Sema::ActOnStartClassImplementation(
     Diag(ClassLoc, diag::err_redefinition_different_kind) << ClassName;
     Diag(PrevDecl->getLocation(), diag::note_previous_definition);
   } else if ((IDecl = dyn_cast_or_null<ObjCInterfaceDecl>(PrevDecl))) {
+    // FIXME: This will produce an error if the definition of the interface has
+    // been imported from a module but is not visible.
     RequireCompleteType(ClassLoc, Context.getObjCInterfaceType(IDecl),
                         diag::warn_undef_interface);
   } else {
@@ -2247,7 +2266,7 @@ static bool CheckMethodOverrideReturn(Sema &S,
 
       DiagID = 
         IsOverridingMode ? diag::warn_non_covariant_overriding_ret_types 
-                          : diag::warn_non_covariant_ret_types;
+                         : diag::warn_non_covariant_ret_types;
     }
   }
 
@@ -2331,7 +2350,7 @@ static bool CheckMethodOverrideParam(Sema &S,
 
       DiagID = 
       IsOverridingMode ? diag::warn_non_contravariant_overriding_param_types 
-                       :  diag::warn_non_contravariant_param_types;
+                       : diag::warn_non_contravariant_param_types;
     }
   }
 
@@ -2340,7 +2359,7 @@ static bool CheckMethodOverrideParam(Sema &S,
     << MethodImpl->getDeclName() << IfaceTy << ImplTy;
   S.Diag(IfaceVar->getLocation(), 
          (IsOverridingMode ? diag::note_previous_declaration 
-                        : diag::note_previous_definition))
+                           : diag::note_previous_definition))
     << getTypeRange(IfaceVar->getTypeSourceInfo());
   return false;
 }
@@ -2749,7 +2768,8 @@ void Sema::MatchAllMethodDeclarations(const SelectorSet &InsMap,
     if (!WarnCategoryMethodImpl) {
       for (auto *Cat : I->visible_categories())
         MatchAllMethodDeclarations(InsMap, ClsMap, InsMapSeen, ClsMapSeen,
-                                   IMPDecl, Cat, IncompleteImpl, false,
+                                   IMPDecl, Cat, IncompleteImpl,
+                                   ImmediateClass && Cat->IsClassExtension(),
                                    WarnCategoryMethodImpl);
     } else {
       // Also methods in class extensions need be looked at next.
@@ -2825,6 +2845,20 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl,
   for (const auto *I : IMPDecl->instance_methods())
     InsMap.insert(I->getSelector());
 
+  // Add the selectors for getters/setters of @dynamic properties.
+  for (const auto *PImpl : IMPDecl->property_impls()) {
+    // We only care about @dynamic implementations.
+    if (PImpl->getPropertyImplementation() != ObjCPropertyImplDecl::Dynamic)
+      continue;
+
+    const auto *P = PImpl->getPropertyDecl();
+    if (!P) continue;
+
+    InsMap.insert(P->getGetterName());
+    if (!P->getSetterName().isNull())
+      InsMap.insert(P->getSetterName());
+  }
+
   // Check and see if properties declared in the interface have either 1)
   // an implementation or 2) there is a @synthesize/@dynamic implementation
   // of the property in the @implementation.
@@ -2866,9 +2900,6 @@ void Sema::ImplMethodsVsClassMethods(Scope *S, ObjCImplDecl* IMPDecl,
     for (auto *PI : I->all_referenced_protocols())
       CheckProtocolMethodDefs(*this, IMPDecl->getLocation(), PI, IncompleteImpl,
                               InsMap, ClsMap, I, ExplicitImplProtocols);
-    // Check class extensions (unnamed categories)
-    for (auto *Ext : I->visible_extensions())
-      ImplMethodsVsClassMethods(S, IMPDecl, Ext, IncompleteImpl);
   } else if (ObjCCategoryDecl *C = dyn_cast<ObjCCategoryDecl>(CDecl)) {
     // For extended class, unimplemented methods in its protocols will
     // be reported in the primary class.
@@ -3478,6 +3509,23 @@ void Sema::DiagnoseDuplicateIvars(ObjCInterfaceDecl *ID,
   }
 }
 
+/// Diagnose attempts to define ARC-__weak ivars when __weak is disabled.
+static void DiagnoseWeakIvars(Sema &S, ObjCImplementationDecl *ID) {
+  if (S.getLangOpts().ObjCWeak) return;
+
+  for (auto ivar = ID->getClassInterface()->all_declared_ivar_begin();
+         ivar; ivar = ivar->getNextIvar()) {
+    if (ivar->isInvalidDecl()) continue;
+    if (ivar->getType().getObjCLifetime() == Qualifiers::OCL_Weak) {
+      if (S.getLangOpts().ObjCWeakRuntime) {
+        S.Diag(ivar->getLocation(), diag::err_arc_weak_disabled);
+      } else {
+        S.Diag(ivar->getLocation(), diag::err_arc_weak_no_runtime);
+      }
+    }
+  }
+}
+
 Sema::ObjCContainerKind Sema::getObjCContainerKind() const {
   switch (CurContext->getDeclKind()) {
     case Decl::ObjCInterface:
@@ -3590,7 +3638,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
       // user-defined setter/getter. It also synthesizes setter/getter methods
       // and adds them to the DeclContext and global method pools.
       for (auto *I : CDecl->properties())
-        ProcessPropertyDecl(I, CDecl);
+        ProcessPropertyDecl(I);
     CDecl->setAtEndRange(AtEnd);
   }
   if (ObjCImplementationDecl *IC=dyn_cast<ObjCImplementationDecl>(ClassDecl)) {
@@ -3627,6 +3675,7 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
       DiagnoseUnusedBackingIvarInAccessor(S, IC);
       if (IDecl->hasDesignatedInitializers())
         DiagnoseMissingDesignatedInitOverrides(IC, IDecl);
+      DiagnoseWeakIvars(*this, IC);
 
       bool HasRootClassAttr = IDecl->hasAttr<ObjCRootClassAttr>();
       if (IDecl->getSuperClass() == nullptr) {
@@ -3700,7 +3749,6 @@ Decl *Sema::ActOnAtEnd(Scope *S, SourceRange AtEnd, ArrayRef<Decl *> allMethods,
   return ClassDecl;
 }
 
-
 /// CvtQTToAstBitMask - utility routine to produce an AST bitmask for
 /// objective-c's type qualifier from the parser version of the same info.
 static Decl::ObjCDeclQualifier
@@ -3867,7 +3915,6 @@ private:
       search(Interface);
   }
 
-
   void search(const ObjCProtocolList &protocols) {
     for (ObjCProtocolList::iterator i = protocols.begin(), e = protocols.end();
          i != e; ++i)
@@ -3895,7 +3942,7 @@ private:
     searchFromContainer(container);
   }
 };
-}
+} // end anonymous namespace
 
 void Sema::CheckObjCMethodOverrides(ObjCMethodDecl *ObjCMethod,
                                     ObjCInterfaceDecl *CurrentClass,
@@ -4490,7 +4537,6 @@ void Sema::DiagnoseUseOfUnimplementedSelectors() {
     if (!LookupImplementedMethodInGlobalPool(Sel))
       Diag(Loc, diag::warn_unimplemented_selector) << Sel;
   }
-  return;
 }
 
 ObjCIvarDecl *
@@ -4522,7 +4568,7 @@ namespace {
   /// Used by Sema::DiagnoseUnusedBackingIvarInAccessor to check if a property
   /// accessor references the backing ivar.
   class UnusedBackingIvarChecker :
-      public DataRecursiveASTVisitor<UnusedBackingIvarChecker> {
+      public RecursiveASTVisitor<UnusedBackingIvarChecker> {
   public:
     Sema &S;
     const ObjCMethodDecl *Method;
@@ -4553,7 +4599,7 @@ namespace {
       return true;
     }
   };
-}
+} // end anonymous namespace
 
 void Sema::DiagnoseUnusedBackingIvarInAccessor(Scope *S,
                                           const ObjCImplementationDecl *ImplD) {
diff --git a/lib/Sema/SemaExceptionSpec.cpp b/lib/Sema/SemaExceptionSpec.cpp
index 2cf02b484cb4..f12bf2415dba 100644
--- a/lib/Sema/SemaExceptionSpec.cpp
+++ b/lib/Sema/SemaExceptionSpec.cpp
@@ -68,7 +68,7 @@ bool Sema::isLibstdcxxEagerExceptionSpecHack(const Declarator &D) {
 ///
 /// \param[in,out] T  The exception type. This will be decayed to a pointer type
 ///                   when the input is an array or a function type.
-bool Sema::CheckSpecifiedExceptionType(QualType &T, const SourceRange &Range) {
+bool Sema::CheckSpecifiedExceptionType(QualType &T, SourceRange Range) {
   // C++11 [except.spec]p2:
   //   A type cv T, "array of T", or "function returning T" denoted
   //   in an exception-specification is adjusted to type T, "pointer to T", or
@@ -232,7 +232,7 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
         hasImplicitExceptionSpec(Old) != hasImplicitExceptionSpec(New)) {
       Diag(New->getLocation(), diag::ext_implicit_exception_spec_mismatch)
         << hasImplicitExceptionSpec(Old);
-      if (!Old->getLocation().isInvalid())
+      if (Old->getLocation().isValid())
         Diag(Old->getLocation(), diag::note_previous_declaration);
     }
     return false;
@@ -270,16 +270,35 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
   FunctionProtoType::ExceptionSpecInfo ESI = OldProto->getExceptionSpecType();
   if (ESI.Type == EST_Dynamic) {
     ESI.Exceptions = OldProto->exceptions();
-  } else if (ESI.Type == EST_ComputedNoexcept) {
-    // FIXME: We can't just take the expression from the old prototype. It
-    // likely contains references to the old prototype's parameters.
   }
 
-  // Update the type of the function with the appropriate exception
-  // specification.
-  New->setType(Context.getFunctionType(
-      NewProto->getReturnType(), NewProto->getParamTypes(),
-      NewProto->getExtProtoInfo().withExceptionSpec(ESI)));
+  if (ESI.Type == EST_ComputedNoexcept) {
+    // For computed noexcept, we can't just take the expression from the old
+    // prototype. It likely contains references to the old prototype's
+    // parameters.
+    New->setInvalidDecl();
+  } else {
+    // Update the type of the function with the appropriate exception
+    // specification.
+    New->setType(Context.getFunctionType(
+        NewProto->getReturnType(), NewProto->getParamTypes(),
+        NewProto->getExtProtoInfo().withExceptionSpec(ESI)));
+  }
+
+  if (getLangOpts().MicrosoftExt && ESI.Type != EST_ComputedNoexcept) {
+    // Allow missing exception specifications in redeclarations as an extension.
+    DiagID = diag::ext_ms_missing_exception_specification;
+    ReturnValueOnError = false;
+  } else if (New->isReplaceableGlobalAllocationFunction() &&
+             ESI.Type != EST_ComputedNoexcept) {
+    // Allow missing exception specifications in redeclarations as an extension,
+    // when declaring a replaceable global allocation function.
+    DiagID = diag::ext_missing_exception_specification;
+    ReturnValueOnError = false;
+  } else {
+    DiagID = diag::err_missing_exception_specification;
+    ReturnValueOnError = true;
+  }
 
   // Warn about the lack of exception specification.
   SmallString<128> ExceptionSpecString;
@@ -318,30 +337,30 @@ bool Sema::CheckEquivalentExceptionSpec(FunctionDecl *Old, FunctionDecl *New) {
   default:
     llvm_unreachable("This spec type is compatible with none.");
   }
-  OS.flush();
 
   SourceLocation FixItLoc;
   if (TypeSourceInfo *TSInfo = New->getTypeSourceInfo()) {
     TypeLoc TL = TSInfo->getTypeLoc().IgnoreParens();
-    if (FunctionTypeLoc FTLoc = TL.getAs<FunctionTypeLoc>())
-      FixItLoc = getLocForEndOfToken(FTLoc.getLocalRangeEnd());
+    // FIXME: Preserve enough information so that we can produce a correct fixit
+    // location when there is a trailing return type.
+    if (auto FTLoc = TL.getAs<FunctionProtoTypeLoc>())
+      if (!FTLoc.getTypePtr()->hasTrailingReturn())
+        FixItLoc = getLocForEndOfToken(FTLoc.getLocalRangeEnd());
   }
 
   if (FixItLoc.isInvalid())
-    Diag(New->getLocation(), diag::warn_missing_exception_specification)
+    Diag(New->getLocation(), DiagID)
       << New << OS.str();
   else {
-    // FIXME: This will get more complicated with C++0x
-    // late-specified return types.
-    Diag(New->getLocation(), diag::warn_missing_exception_specification)
+    Diag(New->getLocation(), DiagID)
       << New << OS.str()
       << FixItHint::CreateInsertion(FixItLoc, " " + OS.str().str());
   }
 
-  if (!Old->getLocation().isInvalid())
+  if (Old->getLocation().isValid())
     Diag(Old->getLocation(), diag::note_previous_declaration);
 
-  return false;
+  return ReturnValueOnError;
 }
 
 /// CheckEquivalentExceptionSpec - Check if the two types have equivalent
@@ -694,7 +713,7 @@ bool Sema::CheckExceptionSpecSubset(
         continue;
 
       Paths.clear();
-      if (!IsDerivedFrom(CanonicalSubT, CanonicalSuperT, Paths))
+      if (!IsDerivedFrom(SubLoc, CanonicalSubT, CanonicalSuperT, Paths))
         continue;
 
       if (Paths.isAmbiguous(Context.getCanonicalType(CanonicalSuperT)))
@@ -979,8 +998,9 @@ CanThrowResult Sema::canThrow(const Expr *E) {
   case Expr::LambdaExprClass: {
     const LambdaExpr *Lambda = cast<LambdaExpr>(E);
     CanThrowResult CT = CT_Cannot;
-    for (LambdaExpr::capture_init_iterator Cap = Lambda->capture_init_begin(),
-                                        CapEnd = Lambda->capture_init_end();
+    for (LambdaExpr::const_capture_init_iterator
+             Cap = Lambda->capture_init_begin(),
+             CapEnd = Lambda->capture_init_end();
          Cap != CapEnd; ++Cap)
       CT = mergeCanThrow(CT, canThrow(*Cap));
     return CT;
@@ -1043,8 +1063,10 @@ CanThrowResult Sema::canThrow(const Expr *E) {
 
     // Many other things have subexpressions, so we have to test those.
     // Some are simple:
+  case Expr::CoawaitExprClass:
   case Expr::ConditionalOperatorClass:
   case Expr::CompoundLiteralExprClass:
+  case Expr::CoyieldExprClass:
   case Expr::CXXConstCastExprClass:
   case Expr::CXXReinterpretCastExprClass:
   case Expr::CXXStdInitializerListExprClass:
@@ -1065,6 +1087,7 @@ CanThrowResult Sema::canThrow(const Expr *E) {
 
     // Some might be dependent for other reasons.
   case Expr::ArraySubscriptExprClass:
+  case Expr::OMPArraySectionExprClass:
   case Expr::BinaryOperatorClass:
   case Expr::CompoundAssignOperatorClass:
   case Expr::CStyleCastExprClass:
@@ -1156,6 +1179,7 @@ CanThrowResult Sema::canThrow(const Expr *E) {
     return CT_Cannot;
 
   case Expr::MSPropertyRefExprClass:
+  case Expr::MSPropertySubscriptExprClass:
     llvm_unreachable("Invalid class for expression");
 
 #define STMT(CLASS, PARENT) case Expr::CLASS##Class:
diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp
index 6499cb5d19eb..5d0c6057f54f 100644
--- a/lib/Sema/SemaExpr.cpp
+++ b/lib/Sema/SemaExpr.cpp
@@ -24,6 +24,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/PartialDiagnostic.h"
@@ -326,18 +327,16 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
   if (getLangOpts().CPlusPlus && isa<FunctionDecl>(D)) {
     // If there were any diagnostics suppressed by template argument deduction,
     // emit them now.
-    SuppressedDiagnosticsMap::iterator
-      Pos = SuppressedDiagnostics.find(D->getCanonicalDecl());
+    auto Pos = SuppressedDiagnostics.find(D->getCanonicalDecl());
     if (Pos != SuppressedDiagnostics.end()) {
-      SmallVectorImpl<PartialDiagnosticAt> &Suppressed = Pos->second;
-      for (unsigned I = 0, N = Suppressed.size(); I != N; ++I)
-        Diag(Suppressed[I].first, Suppressed[I].second);
+      for (const PartialDiagnosticAt &Suppressed : Pos->second)
+        Diag(Suppressed.first, Suppressed.second);
 
       // Clear out the list of suppressed diagnostics, so that we don't emit
       // them again for this specialization. However, we don't obsolete this
       // entry from the table, because we want to avoid ever emitting these
       // diagnostics again.
-      Suppressed.clear();
+      Pos->second.clear();
     }
 
     // C++ [basic.start.main]p3:
@@ -348,8 +347,10 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc,
 
   // See if this is an auto-typed variable whose initializer we are parsing.
   if (ParsingInitForAutoVars.count(D)) {
+    const AutoType *AT = cast<VarDecl>(D)->getType()->getContainedAutoType();
+
     Diag(Loc, diag::err_auto_variable_cannot_appear_in_own_initializer)
-      << D->getDeclName();
+      << D->getDeclName() << (unsigned)AT->getKeyword();
     return true;
   }
 
@@ -464,7 +465,7 @@ void Sema::DiagnoseSentinelCalls(NamedDecl *D, SourceLocation Loc,
   // 'nil' for ObjC methods, where it's much more likely that the
   // variadic arguments form a list of object pointers.
   SourceLocation MissingNilLoc
-    = PP.getLocForEndOfToken(sentinelExpr->getLocEnd());
+    = getLocForEndOfToken(sentinelExpr->getLocEnd());
   std::string NullValue;
   if (calleeType == CT_Method && PP.isMacroDefined("nil"))
     NullValue = "nil";
@@ -493,7 +494,7 @@ SourceRange Sema::getExprRange(Expr *E) const {
 //===----------------------------------------------------------------------===//
 
 /// DefaultFunctionArrayConversion (C99 6.3.2.1p3, C99 6.3.2.1p4).
-ExprResult Sema::DefaultFunctionArrayConversion(Expr *E) {
+ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) {
   // Handle any placeholder expressions which made it here.
   if (E->getType()->isPlaceholderType()) {
     ExprResult result = CheckPlaceholderExpr(E);
@@ -508,9 +509,16 @@ ExprResult Sema::DefaultFunctionArrayConversion(Expr *E) {
     // If we are here, we are not calling a function but taking
     // its address (which is not allowed in OpenCL v1.0 s6.8.a.3).
     if (getLangOpts().OpenCL) {
-      Diag(E->getExprLoc(), diag::err_opencl_taking_function_address);
+      if (Diagnose)
+        Diag(E->getExprLoc(), diag::err_opencl_taking_function_address);
       return ExprError();
     }
+
+    if (auto *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParenCasts()))
+      if (auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl()))
+        if (!checkAddressOfFunctionIsAvailable(FD, Diagnose, E->getExprLoc()))
+          return ExprError();
+
     E = ImpCastExprToType(E, Context.getPointerType(Ty),
                           CK_FunctionToPointerDecay).get();
   } else if (Ty->isArrayType()) {
@@ -579,7 +587,7 @@ static void DiagnoseDirectIsaAccess(Sema &S, const ObjCIvarRefExpr *OIRE,
                                &S.Context.Idents.get("object_setClass"),
                                SourceLocation(), S.LookupOrdinaryName);
           if (ObjectSetClass) {
-            SourceLocation RHSLocEnd = S.PP.getLocForEndOfToken(RHS->getLocEnd());
+            SourceLocation RHSLocEnd = S.getLocForEndOfToken(RHS->getLocEnd());
             S.Diag(OIRE->getExprLoc(), diag::warn_objc_isa_assign) <<
             FixItHint::CreateInsertion(OIRE->getLocStart(), "object_setClass(") <<
             FixItHint::CreateReplacement(SourceRange(OIRE->getOpLoc(),
@@ -676,6 +684,11 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) {
   if (T.hasQualifiers())
     T = T.getUnqualifiedType();
 
+  // Under the MS ABI, lock down the inheritance model now.
+  if (T->isMemberPointerType() &&
+      Context.getTargetInfo().getCXXABI().isMicrosoft())
+    (void)isCompleteType(E->getExprLoc(), T);
+
   UpdateMarkingForLValueToRValue(E);
   
   // Loading a __weak object implicitly retains the value, so we need a cleanup to 
@@ -699,8 +712,8 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) {
   return Res;
 }
 
-ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E) {
-  ExprResult Res = DefaultFunctionArrayConversion(E);
+ExprResult Sema::DefaultFunctionArrayLvalueConversion(Expr *E, bool Diagnose) {
+  ExprResult Res = DefaultFunctionArrayConversion(E, Diagnose);
   if (Res.isInvalid())
     return ExprError();
   Res = DefaultLvalueConversion(Res.get());
@@ -1349,11 +1362,13 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc,
                                  ArrayRef<Expr *> Exprs) {
   unsigned NumAssocs = Types.size();
   assert(NumAssocs == Exprs.size());
-  if (ControllingExpr->getType()->isPlaceholderType()) {
-    ExprResult result = CheckPlaceholderExpr(ControllingExpr);
-    if (result.isInvalid()) return ExprError();
-    ControllingExpr = result.get();
-  }
+
+  // Decay and strip qualifiers for the controlling expression type, and handle
+  // placeholder type replacement. See committee discussion from WG14 DR423.
+  ExprResult R = DefaultFunctionArrayLvalueConversion(ControllingExpr);
+  if (R.isInvalid())
+    return ExprError();
+  ControllingExpr = R.get();
 
   // The controlling expression is an unevaluated operand, so side effects are
   // likely unintended.
@@ -1445,12 +1460,11 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc,
     Diag(ControllingExpr->getLocStart(), diag::err_generic_sel_multi_match)
       << ControllingExpr->getSourceRange() << ControllingExpr->getType()
       << (unsigned) CompatIndices.size();
-    for (SmallVectorImpl<unsigned>::iterator I = CompatIndices.begin(),
-         E = CompatIndices.end(); I != E; ++I) {
-      Diag(Types[*I]->getTypeLoc().getBeginLoc(),
+    for (unsigned I : CompatIndices) {
+      Diag(Types[I]->getTypeLoc().getBeginLoc(),
            diag::note_compat_assoc)
-        << Types[*I]->getTypeLoc().getSourceRange()
-        << Types[*I]->getType();
+        << Types[I]->getTypeLoc().getSourceRange()
+        << Types[I]->getType();
     }
     return ExprError();
   }
@@ -1533,8 +1547,8 @@ Sema::ActOnStringLiteral(ArrayRef<Token> StringToks, Scope *UDLScope) {
     return ExprError();
 
   SmallVector<SourceLocation, 4> StringTokLocs;
-  for (unsigned i = 0; i != StringToks.size(); ++i)
-    StringTokLocs.push_back(StringToks[i].getLocation());
+  for (const Token &Tok : StringToks)
+    StringTokLocs.push_back(Tok.getLocation());
 
   QualType CharTy = Context.CharTy;
   StringLiteral::StringKind Kind = StringLiteral::Ascii;
@@ -1697,7 +1711,7 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
 
   MarkDeclRefReferenced(E);
 
-  if (getLangOpts().ObjCARCWeak && isa<VarDecl>(D) &&
+  if (getLangOpts().ObjCWeak && isa<VarDecl>(D) &&
       Ty.getObjCLifetime() == Qualifiers::OCL_Weak &&
       !Diags.isIgnored(diag::warn_arc_repeated_use_of_weak, E->getLocStart()))
       recordUseOfEvaluatedWeak(E);
@@ -1762,10 +1776,9 @@ static void emitEmptyLookupTypoDiagnostic(
   std::string CorrectedStr = TC.getAsString(SemaRef.getLangOpts());
   bool DroppedSpecifier =
       TC.WillReplaceSpecifier() && Typo.getAsString() == CorrectedStr;
-  unsigned NoteID =
-      (TC.getCorrectionDecl() && isa<ImplicitParamDecl>(TC.getCorrectionDecl()))
-          ? diag::note_implicit_param_decl
-          : diag::note_previous_decl;
+  unsigned NoteID = TC.getCorrectionDeclAs<ImplicitParamDecl>()
+                        ? diag::note_implicit_param_decl
+                        : diag::note_previous_decl;
   if (!Ctx)
     SemaRef.diagnoseTypo(TC, SemaRef.PDiag(DiagnosticSuggestID) << Typo,
                          SemaRef.PDiag(NoteID));
@@ -1799,8 +1812,7 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
   // unqualified lookup.  This is useful when (for example) the
   // original lookup would not have found something because it was a
   // dependent name.
-  DeclContext *DC = (SS.isEmpty() && !CallsUndergoingInstantiation.empty())
-    ? CurContext : nullptr;
+  DeclContext *DC = SS.isEmpty() ? CurContext : nullptr;
   while (DC) {
     if (isa<CXXRecordDecl>(DC)) {
       LookupQualifiedName(R, DC);
@@ -1819,7 +1831,6 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
         bool isInstance = CurMethod &&
                           CurMethod->isInstance() &&
                           DC == CurMethod->getParent() && !isDefaultArgument;
-                          
 
         // Give a code modification hint to insert 'this->'.
         // TODO: fixit for inserting 'Base<T>::' in the other cases.
@@ -1829,46 +1840,14 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
         if (isInstance) {
           Diag(R.getNameLoc(), diagnostic) << Name
             << FixItHint::CreateInsertion(R.getNameLoc(), "this->");
-          UnresolvedLookupExpr *ULE = cast<UnresolvedLookupExpr>(
-              CallsUndergoingInstantiation.back()->getCallee());
-
-          CXXMethodDecl *DepMethod;
-          if (CurMethod->isDependentContext())
-            DepMethod = CurMethod;
-          else if (CurMethod->getTemplatedKind() ==
-              FunctionDecl::TK_FunctionTemplateSpecialization)
-            DepMethod = cast<CXXMethodDecl>(CurMethod->getPrimaryTemplate()->
-                getInstantiatedFromMemberTemplate()->getTemplatedDecl());
-          else
-            DepMethod = cast<CXXMethodDecl>(
-                CurMethod->getInstantiatedFromMemberFunction());
-          assert(DepMethod && "No template pattern found");
-
-          QualType DepThisType = DepMethod->getThisType(Context);
           CheckCXXThisCapture(R.getNameLoc());
-          CXXThisExpr *DepThis = new (Context) CXXThisExpr(
-                                     R.getNameLoc(), DepThisType, false);
-          TemplateArgumentListInfo TList;
-          if (ULE->hasExplicitTemplateArgs())
-            ULE->copyTemplateArgumentsInto(TList);
-          
-          CXXScopeSpec SS;
-          SS.Adopt(ULE->getQualifierLoc());
-          CXXDependentScopeMemberExpr *DepExpr =
-              CXXDependentScopeMemberExpr::Create(
-                  Context, DepThis, DepThisType, true, SourceLocation(),
-                  SS.getWithLocInContext(Context),
-                  ULE->getTemplateKeywordLoc(), nullptr,
-                  R.getLookupNameInfo(),
-                  ULE->hasExplicitTemplateArgs() ? &TList : nullptr);
-          CallsUndergoingInstantiation.back()->setCallee(DepExpr);
         } else {
           Diag(R.getNameLoc(), diagnostic) << Name;
         }
 
         // Do we really want to note all of these?
-        for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I)
-          Diag((*I)->getLocation(), diag::note_dependent_var_use);
+        for (NamedDecl *D : R)
+          Diag(D->getLocation(), diag::note_dependent_var_use);
 
         // Return true if we are inside a default argument instantiation
         // and the found name refers to an instance member function, otherwise
@@ -1923,28 +1902,26 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
 
     bool AcceptableWithRecovery = false;
     bool AcceptableWithoutRecovery = false;
-    NamedDecl *ND = Corrected.getCorrectionDecl();
+    NamedDecl *ND = Corrected.getFoundDecl();
     if (ND) {
       if (Corrected.isOverloaded()) {
         OverloadCandidateSet OCS(R.getNameLoc(),
                                  OverloadCandidateSet::CSK_Normal);
         OverloadCandidateSet::iterator Best;
-        for (TypoCorrection::decl_iterator CD = Corrected.begin(),
-                                        CDEnd = Corrected.end();
-             CD != CDEnd; ++CD) {
+        for (NamedDecl *CD : Corrected) {
           if (FunctionTemplateDecl *FTD =
-                   dyn_cast<FunctionTemplateDecl>(*CD))
+                   dyn_cast<FunctionTemplateDecl>(CD))
             AddTemplateOverloadCandidate(
                 FTD, DeclAccessPair::make(FTD, AS_none), ExplicitTemplateArgs,
                 Args, OCS);
-          else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(*CD))
+          else if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
             if (!ExplicitTemplateArgs || ExplicitTemplateArgs->size() == 0)
               AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none),
                                    Args, OCS);
         }
         switch (OCS.BestViableFunction(*this, R.getNameLoc(), Best)) {
         case OR_Success:
-          ND = Best->Function;
+          ND = Best->FoundDecl;
           Corrected.setCorrectionDecl(ND);
           break;
         default:
@@ -1966,15 +1943,16 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
         R.setNamingClass(Record);
       }
 
-      AcceptableWithRecovery =
-          isa<ValueDecl>(ND) || isa<FunctionTemplateDecl>(ND);
+      auto *UnderlyingND = ND->getUnderlyingDecl();
+      AcceptableWithRecovery = isa<ValueDecl>(UnderlyingND) ||
+                               isa<FunctionTemplateDecl>(UnderlyingND);
       // FIXME: If we ended up with a typo for a type name or
       // Objective-C class name, we're in trouble because the parser
       // is in the wrong place to recover. Suggest the typo
       // correction, but don't make it a fix-it since we're not going
       // to recover well anyway.
       AcceptableWithoutRecovery =
-          isa<TypeDecl>(ND) || isa<ObjCInterfaceDecl>(ND);
+          isa<TypeDecl>(UnderlyingND) || isa<ObjCInterfaceDecl>(UnderlyingND);
     } else {
       // FIXME: We found a keyword. Suggest it, but don't provide a fix-it
       // because we aren't able to recover.
@@ -1982,8 +1960,7 @@ Sema::DiagnoseEmptyLookup(Scope *S, CXXScopeSpec &SS, LookupResult &R,
     }
 
     if (AcceptableWithRecovery || AcceptableWithoutRecovery) {
-      unsigned NoteID = (Corrected.getCorrectionDecl() &&
-                         isa<ImplicitParamDecl>(Corrected.getCorrectionDecl()))
+      unsigned NoteID = Corrected.getCorrectionDeclAs<ImplicitParamDecl>()
                             ? diag::note_implicit_param_decl
                             : diag::note_previous_decl;
       if (SS.isEmpty())
@@ -2278,7 +2255,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
 
     if (MightBeImplicitMember)
       return BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc,
-                                             R, TemplateArgs);
+                                             R, TemplateArgs, S);
   }
 
   if (TemplateArgs || TemplateKWLoc.isValid()) {
@@ -2302,11 +2279,9 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS,
 /// declaration name, generally during template instantiation.
 /// There's a large number of things which don't need to be done along
 /// this path.
-ExprResult
-Sema::BuildQualifiedDeclarationNameExpr(CXXScopeSpec &SS,
-                                        const DeclarationNameInfo &NameInfo,
-                                        bool IsAddressOfOperand,
-                                        TypeSourceInfo **RecoveryTSI) {
+ExprResult Sema::BuildQualifiedDeclarationNameExpr(
+    CXXScopeSpec &SS, const DeclarationNameInfo &NameInfo,
+    bool IsAddressOfOperand, const Scope *S, TypeSourceInfo **RecoveryTSI) {
   DeclContext *DC = computeDeclContext(SS, false);
   if (!DC)
     return BuildDependentDeclRefExpr(SS, /*TemplateKWLoc=*/SourceLocation(),
@@ -2373,7 +2348,7 @@ Sema::BuildQualifiedDeclarationNameExpr(CXXScopeSpec &SS,
   if (!R.empty() && (*R.begin())->isCXXClassMember() && !IsAddressOfOperand)
     return BuildPossibleImplicitMemberExpr(SS,
                                            /*TemplateKWLoc=*/SourceLocation(),
-                                           R, /*TemplateArgs=*/nullptr);
+                                           R, /*TemplateArgs=*/nullptr, S);
 
   return BuildDeclarationNameExpr(SS, R, /* ADL */ false);
 }
@@ -2615,7 +2590,7 @@ Sema::PerformObjectMemberConversion(Expr *From,
     // In C++98, the qualifier type doesn't actually have to be a base
     // type of the object type, in which case we just ignore it.
     // Otherwise build the appropriate casts.
-    if (IsDerivedFrom(FromRecordType, QRecordType)) {
+    if (IsDerivedFrom(FromLoc, FromRecordType, QRecordType)) {
       CXXCastPath BasePath;
       if (CheckDerivedToBaseConversion(FromRecordType, QRecordType,
                                        FromLoc, FromRange, &BasePath))
@@ -2651,7 +2626,7 @@ Sema::PerformObjectMemberConversion(Expr *From,
     // We only need to do this if the naming-class to declaring-class
     // conversion is non-trivial.
     if (!Context.hasSameUnqualifiedType(FromRecordType, URecordType)) {
-      assert(IsDerivedFrom(FromRecordType, URecordType));
+      assert(IsDerivedFrom(FromLoc, FromRecordType, URecordType));
       CXXCastPath BasePath;
       if (CheckDerivedToBaseConversion(FromRecordType, URecordType,
                                        FromLoc, FromRange, &BasePath))
@@ -2698,9 +2673,7 @@ bool Sema::UseArgumentDependentLookup(const CXXScopeSpec &SS,
 
   // Turn off ADL when we find certain kinds of declarations during
   // normal lookup:
-  for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
-    NamedDecl *D = *I;
-
+  for (NamedDecl *D : R) {
     // C++0x [basic.lookup.argdep]p3:
     //     -- a declaration of a class member
     // Since using decls preserve this property, we check this on the
@@ -3355,13 +3328,6 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
 
     // Get the value in the widest-possible width.
     unsigned MaxWidth = Context.getTargetInfo().getIntMaxTWidth();
-    // The microsoft literal suffix extensions support 128-bit literals, which
-    // may be wider than [u]intmax_t.
-    // FIXME: Actually, they don't. We seem to have accidentally invented the
-    //        i128 suffix.
-    if (Literal.MicrosoftInteger == 128 && MaxWidth < 128 &&
-        Context.getTargetInfo().hasInt128Type())
-      MaxWidth = 128;
     llvm::APInt ResultVal(MaxWidth, 0);
 
     if (Literal.GetIntegerValue(ResultVal)) {
@@ -3384,12 +3350,7 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) {
 
       // Microsoft specific integer suffixes are explicitly sized.
       if (Literal.MicrosoftInteger) {
-        if (Literal.MicrosoftInteger > MaxWidth) {
-          // If this target doesn't support __int128, error and force to ull.
-          Diag(Tok.getLocation(), diag::err_int128_unsupported);
-          Width = MaxWidth;
-          Ty = Context.getIntMaxType();
-        } else if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) {
+        if (Literal.MicrosoftInteger == 8 && !Literal.isUnsigned) {
           Width = 8;
           Ty = Context.CharTy;
         } else {
@@ -3726,7 +3687,7 @@ static bool CheckAlignOfExpr(Sema &S, Expr *E) {
     return false;
 
   if (E->getObjectKind() == OK_BitField) {
-    S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_bitfield)
+    S.Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield)
        << 1 << E->getSourceRange();
     return true;
   }
@@ -3828,7 +3789,7 @@ Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
       Diag(E->getExprLoc(), diag::err_openmp_default_simd_align_expr);
       isInvalid = true;
   } else if (E->refersToBitField()) {  // C99 6.5.3.4p1.
-    Diag(E->getExprLoc(), diag::err_sizeof_alignof_bitfield) << 0;
+    Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 0;
     isInvalid = true;
   } else {
     isInvalid = CheckUnaryExprOrTypeTraitOperand(E, UETT_SizeOf);
@@ -3854,7 +3815,7 @@ Sema::CreateUnaryExprOrTypeTraitExpr(Expr *E, SourceLocation OpLoc,
 ExprResult
 Sema::ActOnUnaryExprOrTypeTraitExpr(SourceLocation OpLoc,
                                     UnaryExprOrTypeTrait ExprKind, bool IsType,
-                                    void *TyOrEx, const SourceRange &ArgRange) {
+                                    void *TyOrEx, SourceRange ArgRange) {
   // If error parsing type, ignore.
   if (!TyOrEx) return ExprError();
 
@@ -3940,9 +3901,21 @@ static bool checkArithmeticOnObjCPointer(Sema &S,
   return true;
 }
 
+static bool isMSPropertySubscriptExpr(Sema &S, Expr *Base) {
+  auto *BaseNoParens = Base->IgnoreParens();
+  if (auto *MSProp = dyn_cast<MSPropertyRefExpr>(BaseNoParens))
+    return MSProp->getPropertyDecl()->getType()->isArrayType();
+  return isa<MSPropertySubscriptExpr>(BaseNoParens);
+}
+
 ExprResult
 Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc,
                               Expr *idx, SourceLocation rbLoc) {
+  if (base && !base->getType().isNull() &&
+      base->getType()->isSpecificPlaceholderType(BuiltinType::OMPArraySection))
+    return ActOnOMPArraySectionExpr(base, lbLoc, idx, SourceLocation(),
+                                    /*Length=*/nullptr, rbLoc);
+
   // Since this might be a postfix expression, get rid of ParenListExprs.
   if (isa<ParenListExpr>(base)) {
     ExprResult result = MaybeConvertParenListExprToParenExpr(S, base);
@@ -3955,10 +3928,15 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc,
   // operand might be an overloadable type, in which case the overload
   // resolution for the operator overload should get the first crack
   // at the overload.
+  bool IsMSPropertySubscript = false;
   if (base->getType()->isNonOverloadPlaceholderType()) {
-    ExprResult result = CheckPlaceholderExpr(base);
-    if (result.isInvalid()) return ExprError();
-    base = result.get();
+    IsMSPropertySubscript = isMSPropertySubscriptExpr(*this, base);
+    if (!IsMSPropertySubscript) {
+      ExprResult result = CheckPlaceholderExpr(base);
+      if (result.isInvalid())
+        return ExprError();
+      base = result.get();
+    }
   }
   if (idx->getType()->isNonOverloadPlaceholderType()) {
     ExprResult result = CheckPlaceholderExpr(idx);
@@ -3973,6 +3951,21 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc,
                                             VK_LValue, OK_Ordinary, rbLoc);
   }
 
+  // MSDN, property (C++)
+  // https://msdn.microsoft.com/en-us/library/yhfk0thd(v=vs.120).aspx
+  // This attribute can also be used in the declaration of an empty array in a
+  // class or structure definition. For example:
+  // __declspec(property(get=GetX, put=PutX)) int x[];
+  // The above statement indicates that x[] can be used with one or more array
+  // indices. In this case, i=p->x[a][b] will be turned into i=p->GetX(a, b),
+  // and p->x[a][b] = i will be turned into p->PutX(a, b, i);
+  if (IsMSPropertySubscript) {
+    // Build MS property subscript expression if base is MS property reference
+    // or MS property subscript.
+    return new (Context) MSPropertySubscriptExpr(
+        base, idx, Context.PseudoObjectTy, VK_LValue, OK_Ordinary, rbLoc);
+  }
+
   // Use C++ overloaded-operator rules if either operand has record
   // type.  The spec says to do this if either type is *overloadable*,
   // but enum types can't declare subscript operators or conversion
@@ -3991,6 +3984,139 @@ Sema::ActOnArraySubscriptExpr(Scope *S, Expr *base, SourceLocation lbLoc,
   return CreateBuiltinArraySubscriptExpr(base, lbLoc, idx, rbLoc);
 }
 
+ExprResult Sema::ActOnOMPArraySectionExpr(Expr *Base, SourceLocation LBLoc,
+                                          Expr *LowerBound,
+                                          SourceLocation ColonLoc, Expr *Length,
+                                          SourceLocation RBLoc) {
+  if (Base->getType()->isPlaceholderType() &&
+      !Base->getType()->isSpecificPlaceholderType(
+          BuiltinType::OMPArraySection)) {
+    ExprResult Result = CheckPlaceholderExpr(Base);
+    if (Result.isInvalid())
+      return ExprError();
+    Base = Result.get();
+  }
+  if (LowerBound && LowerBound->getType()->isNonOverloadPlaceholderType()) {
+    ExprResult Result = CheckPlaceholderExpr(LowerBound);
+    if (Result.isInvalid())
+      return ExprError();
+    LowerBound = Result.get();
+  }
+  if (Length && Length->getType()->isNonOverloadPlaceholderType()) {
+    ExprResult Result = CheckPlaceholderExpr(Length);
+    if (Result.isInvalid())
+      return ExprError();
+    Length = Result.get();
+  }
+
+  // Build an unanalyzed expression if either operand is type-dependent.
+  if (Base->isTypeDependent() ||
+      (LowerBound &&
+       (LowerBound->isTypeDependent() || LowerBound->isValueDependent())) ||
+      (Length && (Length->isTypeDependent() || Length->isValueDependent()))) {
+    return new (Context)
+        OMPArraySectionExpr(Base, LowerBound, Length, Context.DependentTy,
+                            VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
+  }
+
+  // Perform default conversions.
+  QualType OriginalTy = OMPArraySectionExpr::getBaseOriginalType(Base);
+  QualType ResultTy;
+  if (OriginalTy->isAnyPointerType()) {
+    ResultTy = OriginalTy->getPointeeType();
+  } else if (OriginalTy->isArrayType()) {
+    ResultTy = OriginalTy->getAsArrayTypeUnsafe()->getElementType();
+  } else {
+    return ExprError(
+        Diag(Base->getExprLoc(), diag::err_omp_typecheck_section_value)
+        << Base->getSourceRange());
+  }
+  // C99 6.5.2.1p1
+  if (LowerBound) {
+    auto Res = PerformOpenMPImplicitIntegerConversion(LowerBound->getExprLoc(),
+                                                      LowerBound);
+    if (Res.isInvalid())
+      return ExprError(Diag(LowerBound->getExprLoc(),
+                            diag::err_omp_typecheck_section_not_integer)
+                       << 0 << LowerBound->getSourceRange());
+    LowerBound = Res.get();
+
+    if (LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_S) ||
+        LowerBound->getType()->isSpecificBuiltinType(BuiltinType::Char_U))
+      Diag(LowerBound->getExprLoc(), diag::warn_omp_section_is_char)
+          << 0 << LowerBound->getSourceRange();
+  }
+  if (Length) {
+    auto Res =
+        PerformOpenMPImplicitIntegerConversion(Length->getExprLoc(), Length);
+    if (Res.isInvalid())
+      return ExprError(Diag(Length->getExprLoc(),
+                            diag::err_omp_typecheck_section_not_integer)
+                       << 1 << Length->getSourceRange());
+    Length = Res.get();
+
+    if (Length->getType()->isSpecificBuiltinType(BuiltinType::Char_S) ||
+        Length->getType()->isSpecificBuiltinType(BuiltinType::Char_U))
+      Diag(Length->getExprLoc(), diag::warn_omp_section_is_char)
+          << 1 << Length->getSourceRange();
+  }
+
+  // C99 6.5.2.1p1: "shall have type "pointer to *object* type". Similarly,
+  // C++ [expr.sub]p1: The type "T" shall be a completely-defined object
+  // type. Note that functions are not objects, and that (in C99 parlance)
+  // incomplete types are not object types.
+  if (ResultTy->isFunctionType()) {
+    Diag(Base->getExprLoc(), diag::err_omp_section_function_type)
+        << ResultTy << Base->getSourceRange();
+    return ExprError();
+  }
+
+  if (RequireCompleteType(Base->getExprLoc(), ResultTy,
+                          diag::err_omp_section_incomplete_type, Base))
+    return ExprError();
+
+  if (LowerBound) {
+    llvm::APSInt LowerBoundValue;
+    if (LowerBound->EvaluateAsInt(LowerBoundValue, Context)) {
+      // OpenMP 4.0, [2.4 Array Sections]
+      // The lower-bound and length must evaluate to non-negative integers.
+      if (LowerBoundValue.isNegative()) {
+        Diag(LowerBound->getExprLoc(), diag::err_omp_section_negative)
+            << 0 << LowerBoundValue.toString(/*Radix=*/10, /*Signed=*/true)
+            << LowerBound->getSourceRange();
+        return ExprError();
+      }
+    }
+  }
+
+  if (Length) {
+    llvm::APSInt LengthValue;
+    if (Length->EvaluateAsInt(LengthValue, Context)) {
+      // OpenMP 4.0, [2.4 Array Sections]
+      // The lower-bound and length must evaluate to non-negative integers.
+      if (LengthValue.isNegative()) {
+        Diag(Length->getExprLoc(), diag::err_omp_section_negative)
+            << 1 << LengthValue.toString(/*Radix=*/10, /*Signed=*/true)
+            << Length->getSourceRange();
+        return ExprError();
+      }
+    }
+  } else if (ColonLoc.isValid() &&
+             (OriginalTy.isNull() || (!OriginalTy->isConstantArrayType() &&
+                                      !OriginalTy->isVariableArrayType()))) {
+    // OpenMP 4.0, [2.4 Array Sections]
+    // When the size of the array dimension is not known, the length must be
+    // specified explicitly.
+    Diag(ColonLoc, diag::err_omp_section_length_undefined)
+        << (!OriginalTy.isNull() && OriginalTy->isArrayType());
+    return ExprError();
+  }
+
+  return new (Context)
+      OMPArraySectionExpr(Base, LowerBound, Length, Context.OMPArraySectionTy,
+                          VK_LValue, OK_Ordinary, ColonLoc, RBLoc);
+}
+
 ExprResult
 Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc,
                                       Expr *Idx, SourceLocation RLoc) {
@@ -4275,29 +4401,27 @@ static TypoCorrection TryTypoCorrectionForCall(Sema &S, Expr *Fn,
           llvm::make_unique<FunctionCallCCC>(S, FuncName.getAsIdentifierInfo(),
                                              Args.size(), ME),
           Sema::CTK_ErrorRecovery)) {
-    if (NamedDecl *ND = Corrected.getCorrectionDecl()) {
+    if (NamedDecl *ND = Corrected.getFoundDecl()) {
       if (Corrected.isOverloaded()) {
         OverloadCandidateSet OCS(NameLoc, OverloadCandidateSet::CSK_Normal);
         OverloadCandidateSet::iterator Best;
-        for (TypoCorrection::decl_iterator CD = Corrected.begin(),
-                                           CDEnd = Corrected.end();
-             CD != CDEnd; ++CD) {
-          if (FunctionDecl *FD = dyn_cast<FunctionDecl>(*CD))
+        for (NamedDecl *CD : Corrected) {
+          if (FunctionDecl *FD = dyn_cast<FunctionDecl>(CD))
             S.AddOverloadCandidate(FD, DeclAccessPair::make(FD, AS_none), Args,
                                    OCS);
         }
         switch (OCS.BestViableFunction(S, NameLoc, Best)) {
         case OR_Success:
-          ND = Best->Function;
+          ND = Best->FoundDecl;
           Corrected.setCorrectionDecl(ND);
           break;
         default:
           break;
         }
       }
-      if (isa<ValueDecl>(ND) || isa<FunctionTemplateDecl>(ND)) {
+      ND = ND->getUnderlyingDecl();
+      if (isa<ValueDecl>(ND) || isa<FunctionTemplateDecl>(ND))
         return Corrected;
-      }
     }
   }
   return TypoCorrection();
@@ -4433,7 +4557,7 @@ bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl,
                                   bool IsListInitialization) {
   unsigned NumParams = Proto->getNumParams();
   bool Invalid = false;
-  unsigned ArgIx = 0;
+  size_t ArgIx = 0;
   // Continue to check argument types (even if we have too few/many args).
   for (unsigned i = FirstParam; i < NumParams; i++) {
     QualType ProtoArgType = Proto->getParamType(i);
@@ -4503,26 +4627,25 @@ bool Sema::GatherArgumentsForCall(SourceLocation CallLoc, FunctionDecl *FDecl,
     // return __unknown_anytype aren't *really* variadic.
     if (Proto->getReturnType() == Context.UnknownAnyTy && FDecl &&
         FDecl->isExternC()) {
-      for (unsigned i = ArgIx, e = Args.size(); i != e; ++i) {
+      for (Expr *A : Args.slice(ArgIx)) {
         QualType paramType; // ignored
-        ExprResult arg = checkUnknownAnyArg(CallLoc, Args[i], paramType);
+        ExprResult arg = checkUnknownAnyArg(CallLoc, A, paramType);
         Invalid |= arg.isInvalid();
         AllArgs.push_back(arg.get());
       }
 
     // Otherwise do argument promotion, (C99 6.5.2.2p7).
     } else {
-      for (unsigned i = ArgIx, e = Args.size(); i != e; ++i) {
-        ExprResult Arg = DefaultVariadicArgumentPromotion(Args[i], CallType,
-                                                          FDecl);
+      for (Expr *A : Args.slice(ArgIx)) {
+        ExprResult Arg = DefaultVariadicArgumentPromotion(A, CallType, FDecl);
         Invalid |= Arg.isInvalid();
         AllArgs.push_back(Arg.get());
       }
     }
 
     // Check for array bounds violations.
-    for (unsigned i = ArgIx, e = Args.size(); i != e; ++i)
-      CheckArrayAccess(Args[i]);
+    for (Expr *A : Args.slice(ArgIx))
+      CheckArrayAccess(A);
   }
   return Invalid;
 }
@@ -4623,7 +4746,9 @@ static bool isPlaceholderToRemoveAsArg(QualType type) {
   // These are always invalid as call arguments and should be reported.
   case BuiltinType::BoundMember:
   case BuiltinType::BuiltinFn:
+  case BuiltinType::OMPArraySection:
     return true;
+
   }
   llvm_unreachable("bad builtin type kind");
 }
@@ -4647,7 +4772,7 @@ static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) {
 }
 
 /// If a builtin function has a pointer argument with no explicit address
-/// space, than it should be able to accept a pointer to any address
+/// space, then it should be able to accept a pointer to any address
 /// space as input.  In order to do this, we need to replace the
 /// standard builtin declaration with one that uses the same address space
 /// as the call.
@@ -4745,7 +4870,7 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
         // Pseudo-destructor calls should not have any arguments.
         Diag(Fn->getLocStart(), diag::err_pseudo_dtor_call_with_args)
           << FixItHint::CreateRemoval(
-                                    SourceRange(ArgExprs[0]->getLocStart(),
+                                    SourceRange(ArgExprs.front()->getLocStart(),
                                                 ArgExprs.back()->getLocEnd()));
       }
 
@@ -4802,14 +4927,10 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
     // We aren't supposed to apply this logic for if there's an '&' involved.
     if (!find.HasFormOfMemberPointer) {
       OverloadExpr *ovl = find.Expression;
-      if (isa<UnresolvedLookupExpr>(ovl)) {
-        UnresolvedLookupExpr *ULE = cast<UnresolvedLookupExpr>(ovl);
+      if (UnresolvedLookupExpr *ULE = dyn_cast<UnresolvedLookupExpr>(ovl))
         return BuildOverloadedCallExpr(S, Fn, ULE, LParenLoc, ArgExprs,
                                        RParenLoc, ExecConfig);
-      } else {
-        return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs,
-                                         RParenLoc);
-      }
+      return BuildCallToMemberFunction(S, Fn, LParenLoc, ArgExprs, RParenLoc);
     }
   }
 
@@ -4832,7 +4953,7 @@ Sema::ActOnCallExpr(Scope *S, Expr *Fn, SourceLocation LParenLoc,
 
     FunctionDecl *FDecl = dyn_cast<FunctionDecl>(NDecl);
     if (FDecl && FDecl->getBuiltinID()) {
-      // Rewrite the function decl for this builtin by replacing paramaters
+      // Rewrite the function decl for this builtin by replacing parameters
       // with no explicit address space with the address space of the arguments
       // in ArgExprs.
       if ((FDecl = rewriteBuiltinFunctionDecl(this, Context, FDecl, ArgExprs))) {
@@ -4949,7 +5070,7 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl,
     if (!Result.isUsable()) return ExprError();
     TheCall = dyn_cast<CallExpr>(Result.get());
     if (!TheCall) return Result;
-    Args = ArrayRef<Expr *>(TheCall->getArgs(), TheCall->getNumArgs());
+    Args = llvm::makeArrayRef(TheCall->getArgs(), TheCall->getNumArgs());
   }
 
   // Bail out early if calling a builtin with custom typechecking.
@@ -5098,8 +5219,7 @@ ExprResult
 Sema::ActOnCompoundLiteral(SourceLocation LParenLoc, ParsedType Ty,
                            SourceLocation RParenLoc, Expr *InitExpr) {
   assert(Ty && "ActOnCompoundLiteral(): missing type");
-  // FIXME: put back this assert when initializers are worked out.
-  //assert((InitExpr != 0) && "ActOnCompoundLiteral(): missing expression");
+  assert(InitExpr && "ActOnCompoundLiteral(): missing expression");
 
   TypeSourceInfo *TInfo;
   QualType literalType = GetTypeFromParser(Ty, &TInfo);
@@ -5280,13 +5400,13 @@ CastKind Sema::PrepareScalarCast(ExprResult &Src, QualType DestTy) {
       return CK_IntegralToFloating;
     case Type::STK_IntegralComplex:
       Src = ImpCastExprToType(Src.get(),
-                              DestTy->castAs<ComplexType>()->getElementType(),
-                              CK_IntegralCast);
+                      DestTy->castAs<ComplexType>()->getElementType(),
+                      CK_IntegralCast);
       return CK_IntegralRealToComplex;
     case Type::STK_FloatingComplex:
       Src = ImpCastExprToType(Src.get(),
-                              DestTy->castAs<ComplexType>()->getElementType(),
-                              CK_IntegralToFloating);
+                      DestTy->castAs<ComplexType>()->getElementType(),
+                      CK_IntegralToFloating);
       return CK_FloatingRealToComplex;
     case Type::STK_MemberPointer:
       llvm_unreachable("member pointer type in C");
@@ -5401,36 +5521,54 @@ static bool breakDownVectorType(QualType type, uint64_t &len,
   return true;
 }
 
-static bool VectorTypesMatch(Sema &S, QualType srcTy, QualType destTy) {
+/// Are the two types lax-compatible vector types?  That is, given
+/// that one of them is a vector, do they have equal storage sizes,
+/// where the storage size is the number of elements times the element
+/// size?
+///
+/// This will also return false if either of the types is neither a
+/// vector nor a real type.
+bool Sema::areLaxCompatibleVectorTypes(QualType srcTy, QualType destTy) {
+  assert(destTy->isVectorType() || srcTy->isVectorType());
+  
+  // Disallow lax conversions between scalars and ExtVectors (these
+  // conversions are allowed for other vector types because common headers
+  // depend on them).  Most scalar OP ExtVector cases are handled by the
+  // splat path anyway, which does what we want (convert, not bitcast).
+  // What this rules out for ExtVectors is crazy things like char4*float.
+  if (srcTy->isScalarType() && destTy->isExtVectorType()) return false;
+  if (destTy->isScalarType() && srcTy->isExtVectorType()) return false;
+
   uint64_t srcLen, destLen;
-  QualType srcElt, destElt;
-  if (!breakDownVectorType(srcTy, srcLen, srcElt)) return false;
-  if (!breakDownVectorType(destTy, destLen, destElt)) return false;
+  QualType srcEltTy, destEltTy;
+  if (!breakDownVectorType(srcTy, srcLen, srcEltTy)) return false;
+  if (!breakDownVectorType(destTy, destLen, destEltTy)) return false;
   
   // ASTContext::getTypeSize will return the size rounded up to a
   // power of 2, so instead of using that, we need to use the raw
   // element size multiplied by the element count.
-  uint64_t srcEltSize = S.Context.getTypeSize(srcElt);
-  uint64_t destEltSize = S.Context.getTypeSize(destElt);
+  uint64_t srcEltSize = Context.getTypeSize(srcEltTy);
+  uint64_t destEltSize = Context.getTypeSize(destEltTy);
   
   return (srcLen * srcEltSize == destLen * destEltSize);
 }
 
-/// Is this a legal conversion between two known vector types?
+/// Is this a legal conversion between two types, one of which is
+/// known to be a vector type?
 bool Sema::isLaxVectorConversion(QualType srcTy, QualType destTy) {
   assert(destTy->isVectorType() || srcTy->isVectorType());
   
   if (!Context.getLangOpts().LaxVectorConversions)
     return false;
-  return VectorTypesMatch(*this, srcTy, destTy);
+  return areLaxCompatibleVectorTypes(srcTy, destTy);
 }
 
 bool Sema::CheckVectorCast(SourceRange R, QualType VectorTy, QualType Ty,
                            CastKind &Kind) {
   assert(VectorTy->isVectorType() && "Not a vector type!");
 
-  if (Ty->isVectorType() || Ty->isIntegerType()) {
-    if (!VectorTypesMatch(*this, Ty, VectorTy))
+  if (Ty->isVectorType() || Ty->isIntegralType(Context)) {
+    if (!areLaxCompatibleVectorTypes(Ty, VectorTy))
       return Diag(R.getBegin(),
                   Ty->isVectorType() ?
                   diag::err_invalid_conversion_between_vectors :
@@ -5456,7 +5594,7 @@ ExprResult Sema::CheckExtVectorCast(SourceRange R, QualType DestTy,
   // In OpenCL, casts between vectors of different types are not allowed.
   // (See OpenCL 6.2).
   if (SrcTy->isVectorType()) {
-    if (!VectorTypesMatch(*this, SrcTy, DestTy)
+    if (!areLaxCompatibleVectorTypes(SrcTy, DestTy)
         || (getLangOpts().OpenCL &&
             (DestTy.getCanonicalType() != SrcTy.getCanonicalType()))) {
       Diag(R.getBegin(),diag::err_invalid_conversion_between_ext_vectors)
@@ -6358,7 +6496,7 @@ QualType Sema::FindCompositeObjCPointerType(ExprResult &LHS, ExprResult &RHS,
 static void SuggestParentheses(Sema &Self, SourceLocation Loc,
                                const PartialDiagnostic &Note,
                                SourceRange ParenRange) {
-  SourceLocation EndLoc = Self.PP.getLocForEndOfToken(ParenRange.getEnd());
+  SourceLocation EndLoc = Self.getLocForEndOfToken(ParenRange.getEnd());
   if (ParenRange.getBegin().isFileID() && ParenRange.getEnd().isFileID() &&
       EndLoc.isValid()) {
     Self.Diag(Loc, Note)
@@ -6371,7 +6509,9 @@ static void SuggestParentheses(Sema &Self, SourceLocation Loc,
 }
 
 static bool IsArithmeticOp(BinaryOperatorKind Opc) {
-  return Opc >= BO_Mul && Opc <= BO_Shr;
+  return BinaryOperator::isAdditiveOp(Opc) ||
+         BinaryOperator::isMultiplicativeOp(Opc) ||
+         BinaryOperator::isShiftOp(Opc);
 }
 
 /// IsArithmeticBinaryExpr - Returns true if E is an arithmetic binary
@@ -6417,10 +6557,6 @@ static bool IsArithmeticBinaryExpr(Expr *E, BinaryOperatorKind *Opcode,
   return false;
 }
 
-static bool IsLogicOp(BinaryOperatorKind Opc) {
-  return (Opc >= BO_LT && Opc <= BO_NE) || (Opc >= BO_LAnd && Opc <= BO_LOr);
-}
-
 /// ExprLooksBoolean - Returns true if E looks boolean, i.e. it has boolean type
 /// or is a logical expression such as (x==y) which has int type, but is
 /// commonly interpreted as boolean.
@@ -6430,7 +6566,7 @@ static bool ExprLooksBoolean(Expr *E) {
   if (E->getType()->isBooleanType())
     return true;
   if (BinaryOperator *OP = dyn_cast<BinaryOperator>(E))
-    return IsLogicOp(OP->getOpcode());
+    return OP->isComparisonOp() || OP->isLogicalOp();
   if (UnaryOperator *OP = dyn_cast<UnaryOperator>(E))
     return OP->getOpcode() == UO_LNot;
   if (E->getType()->isPointerType())
@@ -6753,7 +6889,7 @@ Sema::CheckAssignmentConstraints(SourceLocation Loc,
   ExprResult RHSPtr = &RHSExpr;
   CastKind K = CK_Invalid;
 
-  return CheckAssignmentConstraints(LHSType, RHSPtr, K);
+  return CheckAssignmentConstraints(LHSType, RHSPtr, K, /*ConvertRHS=*/false);
 }
 
 /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently
@@ -6775,7 +6911,7 @@ Sema::CheckAssignmentConstraints(SourceLocation Loc,
 /// Sets 'Kind' for any result kind except Incompatible.
 Sema::AssignConvertType
 Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
-                                 CastKind &Kind) {
+                                 CastKind &Kind, bool ConvertRHS) {
   QualType RHSType = RHS.get()->getType();
   QualType OrigLHSType = LHSType;
 
@@ -6797,7 +6933,7 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
       CheckAssignmentConstraints(AtomicTy->getValueType(), RHS, Kind);
     if (result != Compatible)
       return result;
-    if (Kind != CK_NoOp)
+    if (Kind != CK_NoOp && ConvertRHS)
       RHS = ImpCastExprToType(RHS.get(), AtomicTy->getValueType(), Kind);
     Kind = CK_NonAtomicToAtomic;
     return Compatible;
@@ -6827,7 +6963,7 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
       // CK_VectorSplat does T -> vector T, so first cast to the
       // element type.
       QualType elType = cast<ExtVectorType>(LHSType)->getElementType();
-      if (elType != RHSType) {
+      if (elType != RHSType && ConvertRHS) {
         Kind = PrepareScalarCast(RHS, elType);
         RHS = ImpCastExprToType(RHS.get(), elType, Kind);
       }
@@ -6860,7 +6996,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
   // Arithmetic conversions.
   if (LHSType->isArithmeticType() && RHSType->isArithmeticType() &&
       !(getLangOpts().CPlusPlus && LHSType->isEnumeralType())) {
-    Kind = PrepareScalarCast(RHS, LHSType);
+    if (ConvertRHS)
+      Kind = PrepareScalarCast(RHS, LHSType);
     return Compatible;
   }
 
@@ -6985,7 +7122,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS,
     // Only under strict condition T^ is compatible with an Objective-C pointer.
     if (RHSType->isBlockPointerType() && 
         LHSType->isBlockCompatibleObjCPointerType(Context)) {
-      maybeExtendBlockObject(RHS);
+      if (ConvertRHS)
+        maybeExtendBlockObject(RHS);
       Kind = CK_BlockPointerToObjCPointerCast;
       return Compatible;
     }
@@ -7111,9 +7249,16 @@ Sema::CheckTransparentUnionArgumentConstraints(QualType ArgType,
 }
 
 Sema::AssignConvertType
-Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
+Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS,
                                        bool Diagnose,
-                                       bool DiagnoseCFAudited) {
+                                       bool DiagnoseCFAudited,
+                                       bool ConvertRHS) {
+  // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly,
+  // we can't avoid *all* modifications at the moment, so we need some somewhere
+  // to put the updated value.
+  ExprResult LocalRHS = CallerRHS;
+  ExprResult &RHS = ConvertRHS ? CallerRHS : LocalRHS;
+
   if (getLangOpts().CPlusPlus) {
     if (!LHSType->isRecordType() && !LHSType->isAtomicType()) {
       // C++ 5.17p3: If the left operand is not of class type, the
@@ -7151,6 +7296,15 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
     // structures.
     // FIXME: We also fall through for atomics; not sure what should
     // happen there, though.
+  } else if (RHS.get()->getType() == Context.OverloadTy) {
+    // As a set of extensions to C, we support overloading on functions. These
+    // functions need to be resolved here.
+    DeclAccessPair DAP;
+    if (FunctionDecl *FD = ResolveAddressOfOverloadedFunction(
+            RHS.get(), LHSType, /*Complain=*/false, DAP))
+      RHS = FixOverloadedFunctionReference(RHS.get(), DAP, FD);
+    else
+      return Incompatible;
   }
 
   // C99 6.5.16.1p1: the left operand is a pointer and the right is
@@ -7162,7 +7316,8 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
     CastKind Kind;
     CXXCastPath Path;
     CheckPointerConversion(RHS.get(), LHSType, Kind, Path, false);
-    RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path);
+    if (ConvertRHS)
+      RHS = ImpCastExprToType(RHS.get(), LHSType, Kind, VK_RValue, &Path);
     return Compatible;
   }
 
@@ -7173,7 +7328,8 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
   //
   // Suppress this for references: C++ 8.5.3p5.
   if (!LHSType->isReferenceType()) {
-    RHS = DefaultFunctionArrayLvalueConversion(RHS.get());
+    // FIXME: We potentially allocate here even if ConvertRHS is false.
+    RHS = DefaultFunctionArrayLvalueConversion(RHS.get(), Diagnose);
     if (RHS.isInvalid())
       return Incompatible;
   }
@@ -7189,7 +7345,7 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
   
   CastKind Kind = CK_Invalid;
   Sema::AssignConvertType result =
-    CheckAssignmentConstraints(LHSType, RHS, Kind);
+    CheckAssignmentConstraints(LHSType, RHS, Kind, ConvertRHS);
 
   // C99 6.5.16.1p2: The value of the right operand is converted to the
   // type of the assignment expression.
@@ -7211,7 +7367,8 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &RHS,
       return Compatible;
     }
     
-    RHS = ImpCastExprToType(E, Ty, Kind);
+    if (ConvertRHS)
+      RHS = ImpCastExprToType(E, Ty, Kind);
   }
   return result;
 }
@@ -7374,6 +7531,18 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS,
     return QualType();
   }
 
+  // OpenCL V1.1 6.2.6.p1:
+  // If the operands are of more than one vector type, then an error shall
+  // occur. Implicit conversions between vector types are not permitted, per
+  // section 6.2.1.
+  if (getLangOpts().OpenCL &&
+      RHSVecType && isa<ExtVectorType>(RHSVecType) &&
+      LHSVecType && isa<ExtVectorType>(LHSVecType)) {
+    Diag(Loc, diag::err_opencl_implicit_vector_conversion) << LHSType
+                                                           << RHSType;
+    return QualType();
+  }
+
   // Otherwise, use the generic diagnostic.
   Diag(Loc, diag::err_typecheck_vector_not_convertable)
     << LHSType << RHSType
@@ -7420,6 +7589,18 @@ static void checkArithmeticNull(Sema &S, ExprResult &LHS, ExprResult &RHS,
       << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
 }
 
+static void DiagnoseBadDivideOrRemainderValues(Sema& S, ExprResult &LHS,
+                                               ExprResult &RHS,
+                                               SourceLocation Loc, bool IsDiv) {
+  // Check for division/remainder by zero.
+  llvm::APSInt RHSValue;
+  if (!RHS.get()->isValueDependent() &&
+      RHS.get()->EvaluateAsInt(RHSValue, S.Context) && RHSValue == 0)
+    S.DiagRuntimeBehavior(Loc, RHS.get(),
+                          S.PDiag(diag::warn_remainder_division_by_zero)
+                            << IsDiv << RHS.get()->getSourceRange());
+}
+
 QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
                                            SourceLocation Loc,
                                            bool IsCompAssign, bool IsDiv) {
@@ -7438,15 +7619,8 @@ QualType Sema::CheckMultiplyDivideOperands(ExprResult &LHS, ExprResult &RHS,
 
   if (compType.isNull() || !compType->isArithmeticType())
     return InvalidOperands(Loc, LHS, RHS);
-
-  // Check for division by zero.
-  llvm::APSInt RHSValue;
-  if (IsDiv && !RHS.get()->isValueDependent() &&
-      RHS.get()->EvaluateAsInt(RHSValue, Context) && RHSValue == 0)
-    DiagRuntimeBehavior(Loc, RHS.get(),
-                        PDiag(diag::warn_division_by_zero)
-                          << RHS.get()->getSourceRange());
-
+  if (IsDiv)
+    DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, IsDiv);
   return compType;
 }
 
@@ -7470,15 +7644,7 @@ QualType Sema::CheckRemainderOperands(
 
   if (compType.isNull() || !compType->isIntegerType())
     return InvalidOperands(Loc, LHS, RHS);
-
-  // Check for remainder by zero.
-  llvm::APSInt RHSValue;
-  if (!RHS.get()->isValueDependent() &&
-      RHS.get()->EvaluateAsInt(RHSValue, Context) && RHSValue == 0)
-    DiagRuntimeBehavior(Loc, RHS.get(),
-                        PDiag(diag::warn_remainder_by_zero)
-                          << RHS.get()->getSourceRange());
-
+  DiagnoseBadDivideOrRemainderValues(*this, LHS, RHS, Loc, false /* IsDiv */);
   return compType;
 }
 
@@ -7596,7 +7762,7 @@ static bool checkArithmeticBinOpPointerOperands(Sema &S, SourceLocation Loc,
   if (isRHSPointer) RHSPointeeTy = RHSExpr->getType()->getPointeeType();
 
   // if both are pointers check if operation is valid wrt address spaces
-  if (isLHSPointer && isRHSPointer) {
+  if (S.getLangOpts().OpenCL && isLHSPointer && isRHSPointer) {
     const PointerType *lhsPtr = LHSExpr->getType()->getAs<PointerType>();
     const PointerType *rhsPtr = RHSExpr->getType()->getAs<PointerType>();
     if (!lhsPtr->isAddressSpaceOverlapping(*rhsPtr)) {
@@ -7669,7 +7835,7 @@ static void diagnoseStringPlusInt(Sema &Self, SourceLocation OpLoc,
 
   // Only print a fixit for "str" + int, not for int + "str".
   if (IndexExpr == RHSExpr) {
-    SourceLocation EndLoc = Self.PP.getLocForEndOfToken(RHSExpr->getLocEnd());
+    SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd());
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
         << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&")
         << FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
@@ -7719,7 +7885,7 @@ static void diagnoseStringPlusChar(Sema &Self, SourceLocation OpLoc,
 
   // Only print a fixit for str + char, not for char + str.
   if (isa<CharacterLiteral>(RHSExpr->IgnoreImpCasts())) {
-    SourceLocation EndLoc = Self.PP.getLocForEndOfToken(RHSExpr->getLocEnd());
+    SourceLocation EndLoc = Self.getLocForEndOfToken(RHSExpr->getLocEnd());
     Self.Diag(OpLoc, diag::note_string_plus_scalar_silence)
         << FixItHint::CreateInsertion(LHSExpr->getLocStart(), "&")
         << FixItHint::CreateReplacement(SourceRange(OpLoc), "[")
@@ -7739,9 +7905,10 @@ static void diagnosePointerIncompatibility(Sema &S, SourceLocation Loc,
     << RHSExpr->getSourceRange();
 }
 
-QualType Sema::CheckAdditionOperands( // C99 6.5.6
-    ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned Opc,
-    QualType* CompLHSTy) {
+// C99 6.5.6
+QualType Sema::CheckAdditionOperands(ExprResult &LHS, ExprResult &RHS,
+                                     SourceLocation Loc, BinaryOperatorKind Opc,
+                                     QualType* CompLHSTy) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
   if (LHS.get()->getType()->isVectorType() ||
@@ -7917,7 +8084,7 @@ static bool isScopedEnumerationType(QualType T) {
 }
 
 static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
-                                   SourceLocation Loc, unsigned Opc,
+                                   SourceLocation Loc, BinaryOperatorKind Opc,
                                    QualType LHSType) {
   // OpenCL 6.3j: shift values are effectively % word size of LHS (more defined),
   // so skip remaining warnings as we don't want to modify values within Sema.
@@ -8060,7 +8227,7 @@ static QualType checkOpenCLVectorShift(Sema &S,
 
 // C99 6.5.7
 QualType Sema::CheckShiftOperands(ExprResult &LHS, ExprResult &RHS,
-                                  SourceLocation Loc, unsigned Opc,
+                                  SourceLocation Loc, BinaryOperatorKind Opc,
                                   bool IsCompAssign) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/false);
 
@@ -8365,9 +8532,9 @@ static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc,
   if (BinaryOperator::isEqualityOp(Opc) &&
       hasIsEqualMethod(S, LHS.get(), RHS.get())) {
     SourceLocation Start = LHS.get()->getLocStart();
-    SourceLocation End = S.PP.getLocForEndOfToken(RHS.get()->getLocEnd());
+    SourceLocation End = S.getLocForEndOfToken(RHS.get()->getLocEnd());
     CharSourceRange OpRange =
-      CharSourceRange::getCharRange(Loc, S.PP.getLocForEndOfToken(Loc));
+      CharSourceRange::getCharRange(Loc, S.getLocForEndOfToken(Loc));
 
     S.Diag(Loc, diag::note_objc_literal_comparison_isequal)
       << FixItHint::CreateInsertion(Start, Opc == BO_EQ ? "[" : "![")
@@ -8379,20 +8546,17 @@ static void diagnoseObjCLiteralComparison(Sema &S, SourceLocation Loc,
 static void diagnoseLogicalNotOnLHSofComparison(Sema &S, ExprResult &LHS,
                                                 ExprResult &RHS,
                                                 SourceLocation Loc,
-                                                unsigned OpaqueOpc) {
-  // This checking requires bools.
-  if (!S.getLangOpts().Bool) return;
-
+                                                BinaryOperatorKind Opc) {
   // Check that left hand side is !something.
   UnaryOperator *UO = dyn_cast<UnaryOperator>(LHS.get()->IgnoreImpCasts());
   if (!UO || UO->getOpcode() != UO_LNot) return;
 
   // Only check if the right hand side is non-bool arithmetic type.
-  if (RHS.get()->getType()->isBooleanType()) return;
+  if (RHS.get()->isKnownToHaveBooleanValue()) return;
 
   // Make sure that the something in !something is not bool.
   Expr *SubExpr = UO->getSubExpr()->IgnoreImpCasts();
-  if (SubExpr->getType()->isBooleanType()) return;
+  if (SubExpr->isKnownToHaveBooleanValue()) return;
 
   // Emit warning.
   S.Diag(UO->getOperatorLoc(), diag::warn_logical_not_on_lhs_of_comparison)
@@ -8401,7 +8565,7 @@ static void diagnoseLogicalNotOnLHSofComparison(Sema &S, ExprResult &LHS,
   // First note suggest !(x < y)
   SourceLocation FirstOpen = SubExpr->getLocStart();
   SourceLocation FirstClose = RHS.get()->getLocEnd();
-  FirstClose = S.getPreprocessor().getLocForEndOfToken(FirstClose);
+  FirstClose = S.getLocForEndOfToken(FirstClose);
   if (FirstClose.isInvalid())
     FirstOpen = SourceLocation();
   S.Diag(UO->getOperatorLoc(), diag::note_logical_not_fix)
@@ -8411,7 +8575,7 @@ static void diagnoseLogicalNotOnLHSofComparison(Sema &S, ExprResult &LHS,
   // Second note suggests (!x) < y
   SourceLocation SecondOpen = LHS.get()->getLocStart();
   SourceLocation SecondClose = LHS.get()->getLocEnd();
-  SecondClose = S.getPreprocessor().getLocForEndOfToken(SecondClose);
+  SecondClose = S.getLocForEndOfToken(SecondClose);
   if (SecondClose.isInvalid())
     SecondOpen = SourceLocation();
   S.Diag(UO->getOperatorLoc(), diag::note_logical_not_silence_with_parens)
@@ -8437,12 +8601,10 @@ static ValueDecl *getCompareDecl(Expr *E) {
 
 // C99 6.5.8, C++ [expr.rel]
 QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
-                                    SourceLocation Loc, unsigned OpaqueOpc,
+                                    SourceLocation Loc, BinaryOperatorKind Opc,
                                     bool IsRelational) {
   checkArithmeticNull(*this, LHS, RHS, Loc, /*isCompare=*/true);
 
-  BinaryOperatorKind Opc = (BinaryOperatorKind) OpaqueOpc;
-
   // Handle vector comparisons separately.
   if (LHS.get()->getType()->isVectorType() ||
       RHS.get()->getType()->isVectorType())
@@ -8455,7 +8617,7 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
   Expr *RHSStripped = RHS.get()->IgnoreParenImpCasts();
 
   checkEnumComparison(*this, Loc, LHS.get(), RHS.get());
-  diagnoseLogicalNotOnLHSofComparison(*this, LHS, RHS, Loc, OpaqueOpc);
+  diagnoseLogicalNotOnLHSofComparison(*this, LHS, RHS, Loc, Opc);
 
   if (!LHSType->hasFloatingRepresentation() &&
       !(LHSType->isBlockPointerType() && IsRelational) &&
@@ -8628,12 +8790,15 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS,
       diagnoseDistinctPointerComparison(*this, Loc, LHS, RHS, /*isError*/false);
     }
     if (LCanPointeeTy != RCanPointeeTy) {
-      const PointerType *lhsPtr = LHSType->getAs<PointerType>();
-      if (!lhsPtr->isAddressSpaceOverlapping(*RHSType->getAs<PointerType>())) {
-        Diag(Loc,
-             diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
-            << LHSType << RHSType << 0 /* comparison */
-            << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
+      // Treat NULL constant as a special case in OpenCL.
+      if (getLangOpts().OpenCL && !LHSIsNull && !RHSIsNull) {
+        const PointerType *LHSPtr = LHSType->getAs<PointerType>();
+        if (!LHSPtr->isAddressSpaceOverlapping(*RHSType->getAs<PointerType>())) {
+          Diag(Loc,
+               diag::err_typecheck_op_on_nonoverlapping_address_space_pointers)
+              << LHSType << RHSType << 0 /* comparison */
+              << LHS.get()->getSourceRange() << RHS.get()->getSourceRange();
+        }
       }
       unsigned AddrSpaceL = LCanPointeeTy.getAddressSpace();
       unsigned AddrSpaceR = RCanPointeeTy.getAddressSpace();
@@ -8941,9 +9106,10 @@ inline QualType Sema::CheckBitwiseOperands(
   return InvalidOperands(Loc, LHS, RHS);
 }
 
-inline QualType Sema::CheckLogicalOperands( // C99 6.5.[13,14]
-  ExprResult &LHS, ExprResult &RHS, SourceLocation Loc, unsigned Opc) {
-  
+// C99 6.5.[13,14]
+inline QualType Sema::CheckLogicalOperands(ExprResult &LHS, ExprResult &RHS,
+                                           SourceLocation Loc,
+                                           BinaryOperatorKind Opc) {
   // Check vector operands differently.
   if (LHS.get()->getType()->isVectorType() || RHS.get()->getType()->isVectorType())
     return CheckVectorLogicalOperands(LHS, RHS, Loc);
@@ -8972,18 +9138,14 @@ inline QualType Sema::CheckLogicalOperands( // C99 6.5.[13,14]
         Diag(Loc, diag::note_logical_instead_of_bitwise_change_operator)
             << (Opc == BO_LAnd ? "&" : "|")
             << FixItHint::CreateReplacement(SourceRange(
-                Loc, Lexer::getLocForEndOfToken(Loc, 0, getSourceManager(),
-                                                getLangOpts())),
+                                                 Loc, getLocForEndOfToken(Loc)),
                                             Opc == BO_LAnd ? "&" : "|");
         if (Opc == BO_LAnd)
           // Suggest replacing "Foo() && kNonZero" with "Foo()"
           Diag(Loc, diag::note_logical_instead_of_bitwise_remove_constant)
               << FixItHint::CreateRemoval(
-                  SourceRange(
-                      Lexer::getLocForEndOfToken(LHS.get()->getLocEnd(),
-                                                 0, getSourceManager(),
-                                                 getLangOpts()),
-                      RHS.get()->getLocEnd()));
+                  SourceRange(getLocForEndOfToken(LHS.get()->getLocEnd()),
+                              RHS.get()->getLocEnd()));
       }
   }
 
@@ -9161,7 +9323,7 @@ static void DiagnoseConstAssignment(Sema &S, const Expr *E,
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
     // Function calls
     const FunctionDecl *FD = CE->getDirectCallee();
-    if (!IsTypeModifiable(FD->getReturnType(), IsDereference)) {
+    if (FD && !IsTypeModifiable(FD->getReturnType(), IsDereference)) {
       if (!DiagnosticEmitted) {
         S.Diag(Loc, diag::err_typecheck_assign_const) << ExprRange
                                                       << ConstFunction << FD;
@@ -9510,7 +9672,9 @@ static QualType CheckIncrementDecrementOperand(Sema &S, Expr *Op,
       return QualType();
     }
     // Increment of bool sets it to true, but is deprecated.
-    S.Diag(OpLoc, diag::warn_increment_bool) << Op->getSourceRange();
+    S.Diag(OpLoc, S.getLangOpts().CPlusPlus1z ? diag::ext_increment_bool
+                                              : diag::warn_increment_bool)
+      << Op->getSourceRange();
   } else if (S.getLangOpts().CPlusPlus && ResType->isEnumeralType()) {
     // Error on enum increments and decrements in C++ mode
     S.Diag(OpLoc, diag::err_increment_decrement_enum) << IsInc << ResType;
@@ -9710,6 +9874,12 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
     // expressions here, but the result of one is always an lvalue anyway.
   }
   ValueDecl *dcl = getPrimaryDecl(op);
+
+  if (auto *FD = dyn_cast_or_null<FunctionDecl>(dcl))
+    if (!checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                           op->getLocStart()))
+      return QualType();
+
   Expr::LValueClassification lval = op->ClassifyLValue(Context);
   unsigned AddressOfError = AO_No_Error;
 
@@ -9763,8 +9933,9 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
 
     QualType MPTy = Context.getMemberPointerType(
         op->getType(), Context.getTypeDeclType(MD->getParent()).getTypePtr());
+    // Under the MS ABI, lock down the inheritance model now.
     if (Context.getTargetInfo().getCXXABI().isMicrosoft())
-      RequireCompleteType(OpLoc, MPTy, 0);
+      (void)isCompleteType(OpLoc, MPTy);
     return MPTy;
   } else if (lval != Expr::LV_Valid && lval != Expr::LV_IncompleteVoidType) {
     // C99 6.5.3.2p1
@@ -9819,8 +9990,9 @@ QualType Sema::CheckAddressOfOperand(ExprResult &OrigOp, SourceLocation OpLoc) {
           QualType MPTy = Context.getMemberPointerType(
               op->getType(),
               Context.getTypeDeclType(cast<RecordDecl>(Ctx)).getTypePtr());
+          // Under the MS ABI, lock down the inheritance model now.
           if (Context.getTargetInfo().getCXXABI().isMicrosoft())
-            RequireCompleteType(OpLoc, MPTy, 0);
+            (void)isCompleteType(OpLoc, MPTy);
           return MPTy;
         }
       }
@@ -10121,6 +10293,20 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
       return ExprError();
   }
 
+  if (getLangOpts().OpenCL) {
+    // OpenCLC v2.0 s6.13.11.1 allows atomic variables to be initialized by
+    // the ATOMIC_VAR_INIT macro.
+    if (LHSExpr->getType()->isAtomicType() ||
+        RHSExpr->getType()->isAtomicType()) {
+      SourceRange SR(LHSExpr->getLocStart(), RHSExpr->getLocEnd());
+      if (BO_Assign == Opc)
+        Diag(OpLoc, diag::err_atomic_init_constant) << SR;
+      else
+        ResultTy = InvalidOperands(OpLoc, LHS, RHS);
+      return ExprError();
+    }
+  }
+
   switch (Opc) {
   case BO_Assign:
     ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType());
@@ -10211,7 +10397,7 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     break;
   case BO_AndAssign:
   case BO_OrAssign: // fallthrough
-	  DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc);
+    DiagnoseSelfAssignment(*this, LHS.get(), RHS.get(), OpLoc);
   case BO_XorAssign:
     CompResultTy = CheckBitwiseOperands(LHS, RHS, OpLoc, true);
     CompLHSTy = CompResultTy;
@@ -10238,7 +10424,7 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
                                                  &Context.Idents.get("object_setClass"),
                                                  SourceLocation(), LookupOrdinaryName);
     if (ObjectSetClass && isa<ObjCIsaExpr>(LHS.get())) {
-      SourceLocation RHSLocEnd = PP.getLocForEndOfToken(RHS.get()->getLocEnd());
+      SourceLocation RHSLocEnd = getLocForEndOfToken(RHS.get()->getLocEnd());
       Diag(LHS.get()->getExprLoc(), diag::warn_objc_isa_assign) <<
       FixItHint::CreateInsertion(LHS.get()->getLocStart(), "object_setClass(") <<
       FixItHint::CreateReplacement(SourceRange(OISA->getOpLoc(), OpLoc), ",") <<
@@ -10274,17 +10460,17 @@ static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc,
   BinaryOperator *LHSBO = dyn_cast<BinaryOperator>(LHSExpr);
   BinaryOperator *RHSBO = dyn_cast<BinaryOperator>(RHSExpr);
 
-  // Check that one of the sides is a comparison operator.
+  // Check that one of the sides is a comparison operator and the other isn't.
   bool isLeftComp = LHSBO && LHSBO->isComparisonOp();
   bool isRightComp = RHSBO && RHSBO->isComparisonOp();
-  if (!isLeftComp && !isRightComp)
+  if (isLeftComp == isRightComp)
     return;
 
   // Bitwise operations are sometimes used as eager logical ops.
   // Don't diagnose this.
   bool isLeftBitwise = LHSBO && LHSBO->isBitwiseOp();
   bool isRightBitwise = RHSBO && RHSBO->isBitwiseOp();
-  if ((isLeftComp || isLeftBitwise) && (isRightComp || isRightBitwise))
+  if (isLeftBitwise || isRightBitwise)
     return;
 
   SourceRange DiagRange = isLeftComp ? SourceRange(LHSExpr->getLocStart(),
@@ -10306,21 +10492,6 @@ static void DiagnoseBitwisePrecedence(Sema &Self, BinaryOperatorKind Opc,
     ParensRange);
 }
 
-/// \brief It accepts a '&' expr that is inside a '|' one.
-/// Emit a diagnostic together with a fixit hint that wraps the '&' expression
-/// in parentheses.
-static void
-EmitDiagnosticForBitwiseAndInBitwiseOr(Sema &Self, SourceLocation OpLoc,
-                                       BinaryOperator *Bop) {
-  assert(Bop->getOpcode() == BO_And);
-  Self.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_and_in_bitwise_or)
-      << Bop->getSourceRange() << OpLoc;
-  SuggestParentheses(Self, Bop->getOperatorLoc(),
-    Self.PDiag(diag::note_precedence_silence)
-      << Bop->getOpcodeStr(),
-    Bop->getSourceRange());
-}
-
 /// \brief It accepts a '&&' expr that is inside a '||' one.
 /// Emit a diagnostic together with a fixit hint that wraps the '&&' expression
 /// in parentheses.
@@ -10389,12 +10560,21 @@ static void DiagnoseLogicalAndInLogicalOrRHS(Sema &S, SourceLocation OpLoc,
   }
 }
 
-/// \brief Look for '&' in the left or right hand of a '|' expr.
-static void DiagnoseBitwiseAndInBitwiseOr(Sema &S, SourceLocation OpLoc,
-                                             Expr *OrArg) {
-  if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(OrArg)) {
-    if (Bop->getOpcode() == BO_And)
-      return EmitDiagnosticForBitwiseAndInBitwiseOr(S, OpLoc, Bop);
+/// \brief Look for bitwise op in the left or right hand of a bitwise op with
+/// lower precedence and emit a diagnostic together with a fixit hint that wraps
+/// the '&' expression in parentheses.
+static void DiagnoseBitwiseOpInBitwiseOp(Sema &S, BinaryOperatorKind Opc,
+                                         SourceLocation OpLoc, Expr *SubExpr) {
+  if (BinaryOperator *Bop = dyn_cast<BinaryOperator>(SubExpr)) {
+    if (Bop->isBitwiseOp() && Bop->getOpcode() < Opc) {
+      S.Diag(Bop->getOperatorLoc(), diag::warn_bitwise_op_in_bitwise_op)
+        << Bop->getOpcodeStr() << BinaryOperator::getOpcodeStr(Opc)
+        << Bop->getSourceRange() << OpLoc;
+      SuggestParentheses(S, Bop->getOperatorLoc(),
+        S.PDiag(diag::note_precedence_silence)
+          << Bop->getOpcodeStr(),
+        Bop->getSourceRange());
+    }
   }
 }
 
@@ -10449,9 +10629,10 @@ static void DiagnoseBinOpPrecedence(Sema &Self, BinaryOperatorKind Opc,
     DiagnoseBitwisePrecedence(Self, Opc, OpLoc, LHSExpr, RHSExpr);
 
   // Diagnose "arg1 & arg2 | arg3"
-  if (Opc == BO_Or && !OpLoc.isMacroID()/* Don't warn in macros. */) {
-    DiagnoseBitwiseAndInBitwiseOr(Self, OpLoc, LHSExpr);
-    DiagnoseBitwiseAndInBitwiseOr(Self, OpLoc, RHSExpr);
+  if ((Opc == BO_Or || Opc == BO_Xor) &&
+      !OpLoc.isMacroID()/* Don't warn in macros. */) {
+    DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, LHSExpr);
+    DiagnoseBitwiseOpInBitwiseOp(Self, Opc, OpLoc, RHSExpr);
   }
 
   // Warn about arg1 || arg2 && arg3, as GCC 4.3+ does.
@@ -10593,6 +10774,14 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
   ExprValueKind VK = VK_RValue;
   ExprObjectKind OK = OK_Ordinary;
   QualType resultType;
+  if (getLangOpts().OpenCL) {
+    // The only legal unary operation for atomics is '&'.
+    if (Opc != UO_AddrOf && InputExpr->getType()->isAtomicType()) {
+      return ExprError(Diag(OpLoc, diag::err_typecheck_unary_expr)
+                       << InputExpr->getType()
+                       << Input.get()->getSourceRange());
+    }
+  }
   switch (Opc) {
   case UO_PreInc:
   case UO_PreDec:
@@ -10735,6 +10924,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc,
     }
     break;
   case UO_Extension:
+  case UO_Coawait:
     resultType = Input.get()->getType();
     VK = Input.get()->getValueKind();
     OK = Input.get()->getObjectKind();
@@ -10778,10 +10968,8 @@ static bool isQualifiedMemberAccess(Expr *E) {
     if (!ULE->getQualifier())
       return false;
     
-    for (UnresolvedLookupExpr::decls_iterator D = ULE->decls_begin(),
-                                           DEnd = ULE->decls_end();
-         D != DEnd; ++D) {
-      if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(*D)) {
+    for (NamedDecl *D : ULE->decls()) {
+      if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(D)) {
         if (Method->isInstance())
           return true;
       } else {
@@ -10971,8 +11159,7 @@ Sema::ActOnStmtExpr(SourceLocation LPLoc, Stmt *SubStmt,
 
 ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
                                       TypeSourceInfo *TInfo,
-                                      OffsetOfComponent *CompPtr,
-                                      unsigned NumComponents,
+                                      ArrayRef<OffsetOfComponent> Components,
                                       SourceLocation RParenLoc) {
   QualType ArgTy = TInfo->getType();
   bool Dependent = ArgTy->isDependentType();
@@ -10996,17 +11183,15 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
   // GCC extension, diagnose them.
   // FIXME: This diagnostic isn't actually visible because the location is in
   // a system header!
-  if (NumComponents != 1)
+  if (Components.size() != 1)
     Diag(BuiltinLoc, diag::ext_offsetof_extended_field_designator)
-      << SourceRange(CompPtr[1].LocStart, CompPtr[NumComponents-1].LocEnd);
+      << SourceRange(Components[1].LocStart, Components.back().LocEnd);
   
   bool DidWarnAboutNonPOD = false;
   QualType CurrentType = ArgTy;
-  typedef OffsetOfExpr::OffsetOfNode OffsetOfNode;
   SmallVector<OffsetOfNode, 4> Comps;
   SmallVector<Expr*, 4> Exprs;
-  for (unsigned i = 0; i != NumComponents; ++i) {
-    const OffsetOfComponent &OC = CompPtr[i];
+  for (const OffsetOfComponent &OC : Components) {
     if (OC.isBrackets) {
       // Offset of an array sub-field.  TODO: Should we allow vector elements?
       if (!CurrentType->isDependentType()) {
@@ -11074,7 +11259,7 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
       if (!IsSafe && !DidWarnAboutNonPOD &&
           DiagRuntimeBehavior(BuiltinLoc, nullptr,
                               PDiag(DiagID)
-                              << SourceRange(CompPtr[0].LocStart, OC.LocEnd)
+                              << SourceRange(Components[0].LocStart, OC.LocEnd)
                               << CurrentType))
         DidWarnAboutNonPOD = true;
     }
@@ -11113,7 +11298,8 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
     // If the member was found in a base class, introduce OffsetOfNodes for
     // the base class indirections.
     CXXBasePaths Paths;
-    if (IsDerivedFrom(CurrentType, Context.getTypeDeclType(Parent), Paths)) {
+    if (IsDerivedFrom(OC.LocStart, CurrentType, Context.getTypeDeclType(Parent),
+                      Paths)) {
       if (Paths.getDetectedVirtual()) {
         Diag(OC.LocEnd, diag::err_offsetof_field_of_virtual_base)
           << MemberDecl->getDeclName()
@@ -11122,9 +11308,8 @@ ExprResult Sema::BuildBuiltinOffsetOf(SourceLocation BuiltinLoc,
       }
 
       CXXBasePath &Path = Paths.front();
-      for (CXXBasePath::iterator B = Path.begin(), BEnd = Path.end();
-           B != BEnd; ++B)
-        Comps.push_back(OffsetOfNode(B->Base));
+      for (const CXXBasePathElement &B : Path)
+        Comps.push_back(OffsetOfNode(B.Base));
     }
 
     if (IndirectMemberDecl) {
@@ -11147,8 +11332,7 @@ ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S,
                                       SourceLocation BuiltinLoc,
                                       SourceLocation TypeLoc,
                                       ParsedType ParsedArgTy,
-                                      OffsetOfComponent *CompPtr,
-                                      unsigned NumComponents,
+                                      ArrayRef<OffsetOfComponent> Components,
                                       SourceLocation RParenLoc) {
   
   TypeSourceInfo *ArgTInfo;
@@ -11159,8 +11343,7 @@ ExprResult Sema::ActOnBuiltinOffsetOf(Scope *S,
   if (!ArgTInfo)
     ArgTInfo = Context.getTrivialTypeSourceInfo(ArgTy, TypeLoc);
 
-  return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, CompPtr, NumComponents, 
-                              RParenLoc);
+  return BuildBuiltinOffsetOf(BuiltinLoc, ArgTInfo, Components, RParenLoc);
 }
 
 
@@ -11394,16 +11577,14 @@ ExprResult Sema::ActOnBlockStmtExpr(SourceLocation CaretLoc,
   // Set the captured variables on the block.
   // FIXME: Share capture structure between BlockDecl and CapturingScopeInfo!
   SmallVector<BlockDecl::Capture, 4> Captures;
-  for (unsigned i = 0, e = BSI->Captures.size(); i != e; i++) {
-    CapturingScopeInfo::Capture &Cap = BSI->Captures[i];
+  for (CapturingScopeInfo::Capture &Cap : BSI->Captures) {
     if (Cap.isThisCapture())
       continue;
     BlockDecl::Capture NewCap(Cap.getVariable(), Cap.isBlockCapture(),
                               Cap.isNested(), Cap.getInitExpr());
     Captures.push_back(NewCap);
   }
-  BSI->TheDecl->setCaptures(Context, Captures.begin(), Captures.end(),
-                            BSI->CXXThisCaptureIndex != 0);
+  BSI->TheDecl->setCaptures(Context, Captures, BSI->CXXThisCaptureIndex != 0);
 
   // If the user wrote a function type in some form, try to use that.
   if (!BSI->FunctionType.isNull()) {
@@ -11495,43 +11676,57 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
                                 Expr *E, TypeSourceInfo *TInfo,
                                 SourceLocation RPLoc) {
   Expr *OrigExpr = E;
+  bool IsMS = false;
+
+  // It might be a __builtin_ms_va_list. (But don't ever mark a va_arg()
+  // as Microsoft ABI on an actual Microsoft platform, where
+  // __builtin_ms_va_list and __builtin_va_list are the same.)
+  if (!E->isTypeDependent() && Context.getTargetInfo().hasBuiltinMSVaList() &&
+      Context.getTargetInfo().getBuiltinVaListKind() != TargetInfo::CharPtrBuiltinVaList) {
+    QualType MSVaListType = Context.getBuiltinMSVaListType();
+    if (Context.hasSameType(MSVaListType, E->getType())) {
+      if (CheckForModifiableLvalue(E, BuiltinLoc, *this))
+        return ExprError();
+      IsMS = true;
+    }
+  }
 
   // Get the va_list type
   QualType VaListType = Context.getBuiltinVaListType();
-  if (VaListType->isArrayType()) {
-    // Deal with implicit array decay; for example, on x86-64,
-    // va_list is an array, but it's supposed to decay to
-    // a pointer for va_arg.
-    VaListType = Context.getArrayDecayedType(VaListType);
-    // Make sure the input expression also decays appropriately.
-    ExprResult Result = UsualUnaryConversions(E);
-    if (Result.isInvalid())
-      return ExprError();
-    E = Result.get();
-  } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) {
-    // If va_list is a record type and we are compiling in C++ mode,
-    // check the argument using reference binding.
-    InitializedEntity Entity
-      = InitializedEntity::InitializeParameter(Context,
-          Context.getLValueReferenceType(VaListType), false);
-    ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E);
-    if (Init.isInvalid())
-      return ExprError();
-    E = Init.getAs<Expr>();
-  } else {
-    // Otherwise, the va_list argument must be an l-value because
-    // it is modified by va_arg.
-    if (!E->isTypeDependent() &&
-        CheckForModifiableLvalue(E, BuiltinLoc, *this))
-      return ExprError();
+  if (!IsMS) {
+    if (VaListType->isArrayType()) {
+      // Deal with implicit array decay; for example, on x86-64,
+      // va_list is an array, but it's supposed to decay to
+      // a pointer for va_arg.
+      VaListType = Context.getArrayDecayedType(VaListType);
+      // Make sure the input expression also decays appropriately.
+      ExprResult Result = UsualUnaryConversions(E);
+      if (Result.isInvalid())
+        return ExprError();
+      E = Result.get();
+    } else if (VaListType->isRecordType() && getLangOpts().CPlusPlus) {
+      // If va_list is a record type and we are compiling in C++ mode,
+      // check the argument using reference binding.
+      InitializedEntity Entity = InitializedEntity::InitializeParameter(
+          Context, Context.getLValueReferenceType(VaListType), false);
+      ExprResult Init = PerformCopyInitialization(Entity, SourceLocation(), E);
+      if (Init.isInvalid())
+        return ExprError();
+      E = Init.getAs<Expr>();
+    } else {
+      // Otherwise, the va_list argument must be an l-value because
+      // it is modified by va_arg.
+      if (!E->isTypeDependent() &&
+          CheckForModifiableLvalue(E, BuiltinLoc, *this))
+        return ExprError();
+    }
   }
 
-  if (!E->isTypeDependent() &&
-      !Context.hasSameType(VaListType, E->getType())) {
+  if (!IsMS && !E->isTypeDependent() &&
+      !Context.hasSameType(VaListType, E->getType()))
     return ExprError(Diag(E->getLocStart(),
                          diag::err_first_argument_to_va_arg_not_of_type_va_list)
       << OrigExpr->getType() << E->getSourceRange());
-  }
 
   if (!TInfo->getType()->isDependentType()) {
     if (RequireCompleteType(TInfo->getTypeLoc().getBeginLoc(), TInfo->getType(),
@@ -11573,7 +11768,7 @@ ExprResult Sema::BuildVAArgExpr(SourceLocation BuiltinLoc,
   }
 
   QualType T = TInfo->getType().getNonLValueExprType(Context);
-  return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T);
+  return new (Context) VAArgExpr(BuiltinLoc, E, TInfo, RPLoc, T, IsMS);
 }
 
 ExprResult Sema::ActOnGNUNullExpr(SourceLocation TokenLoc) {
@@ -11627,6 +11822,25 @@ Sema::ConversionToObjCStringLiteralCheck(QualType DstType, Expr *&Exp) {
   return true;
 }
 
+static bool maybeDiagnoseAssignmentToFunction(Sema &S, QualType DstType,
+                                              const Expr *SrcExpr) {
+  if (!DstType->isFunctionPointerType() ||
+      !SrcExpr->getType()->isFunctionType())
+    return false;
+
+  auto *DRE = dyn_cast<DeclRefExpr>(SrcExpr->IgnoreParenImpCasts());
+  if (!DRE)
+    return false;
+
+  auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+  if (!FD)
+    return false;
+
+  return !S.checkAddressOfFunctionIsAvailable(FD,
+                                              /*Complain=*/true,
+                                              SrcExpr->getLocStart());
+}
+
 bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
                                     SourceLocation Loc,
                                     QualType DstType, QualType SrcType,
@@ -11759,6 +11973,12 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     DiagKind = diag::err_arc_weak_unavailable_assign;
     break;
   case Incompatible:
+    if (maybeDiagnoseAssignmentToFunction(*this, DstType, SrcExpr)) {
+      if (Complained)
+        *Complained = true;
+      return true;
+    }
+
     DiagKind = diag::err_typecheck_convert_incompatible;
     ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
     MayHaveConvFixit = true;
@@ -11797,9 +12017,8 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
   // If we can fix the conversion, suggest the FixIts.
   assert(ConvHints.isNull() || Hint.isNull());
   if (!ConvHints.isNull()) {
-    for (std::vector<FixItHint>::iterator HI = ConvHints.Hints.begin(),
-         HE = ConvHints.Hints.end(); HI != HE; ++HI)
-      FDiag << *HI;
+    for (FixItHint &H : ConvHints.Hints)
+      FDiag << H;
   } else {
     FDiag << Hint;
   }
@@ -11816,7 +12035,7 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     
   if (SecondType == Context.OverloadTy)
     NoteAllOverloadCandidates(OverloadExpr::find(SrcExpr).Expression,
-                              FirstType);
+                              FirstType, /*TakingAddress=*/true);
 
   if (CheckInferredResultType)
     EmitRelatedResultTypeNote(SrcExpr);
@@ -11974,16 +12193,16 @@ Sema::VerifyIntegerConstantExpression(Expr *E, llvm::APSInt *Result,
   if (!Folded || !AllowFold) {
     if (!Diagnoser.Suppress) {
       Diagnoser.diagnoseNotICE(*this, DiagLoc, E->getSourceRange());
-      for (unsigned I = 0, N = Notes.size(); I != N; ++I)
-        Diag(Notes[I].first, Notes[I].second);
+      for (const PartialDiagnosticAt &Note : Notes)
+        Diag(Note.first, Note.second);
     }
 
     return ExprError();
   }
 
   Diagnoser.diagnoseFold(*this, DiagLoc, E->getSourceRange());
-  for (unsigned I = 0, N = Notes.size(); I != N; ++I)
-    Diag(Notes[I].first, Notes[I].second);
+  for (const PartialDiagnosticAt &Note : Notes)
+    Diag(Note.first, Note.second);
 
   if (Result)
     *Result = EvalResult.Val.getInt();
@@ -12417,10 +12636,15 @@ static bool isVariableAlreadyCapturedInScopeInfo(CapturingScopeInfo *CSI, VarDec
       
     // Compute the type of an expression that refers to this variable.
     DeclRefType = CaptureType.getNonReferenceType();
-      
+
+    // Similarly to mutable captures in lambda, all the OpenMP captures by copy
+    // are mutable in the sense that user can change their value - they are
+    // private instances of the captured declarations.
     const CapturingScopeInfo::Capture &Cap = CSI->getCapture(Var);
     if (Cap.isCopyCapture() &&
-        !(isa<LambdaScopeInfo>(CSI) && cast<LambdaScopeInfo>(CSI)->Mutable))
+        !(isa<LambdaScopeInfo>(CSI) && cast<LambdaScopeInfo>(CSI)->Mutable) &&
+        !(isa<CapturedRegionScopeInfo>(CSI) &&
+          cast<CapturedRegionScopeInfo>(CSI)->CapRegionKind == CR_OpenMP))
       DeclRefType.addConst();
     return true;
   }
@@ -12608,9 +12832,17 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI,
   // By default, capture variables by reference.
   bool ByRef = true;
   // Using an LValue reference type is consistent with Lambdas (see below).
-  if (S.getLangOpts().OpenMP && S.IsOpenMPCapturedVar(Var))
-    DeclRefType = DeclRefType.getUnqualifiedType();
-  CaptureType = S.Context.getLValueReferenceType(DeclRefType);
+  if (S.getLangOpts().OpenMP) {
+    ByRef = S.IsOpenMPCapturedByRef(Var, RSI);
+    if (S.IsOpenMPCapturedVar(Var))
+      DeclRefType = DeclRefType.getUnqualifiedType();
+  }
+
+  if (ByRef)
+    CaptureType = S.Context.getLValueReferenceType(DeclRefType);
+  else
+    CaptureType = DeclRefType;
+
   Expr *CopyExpr = nullptr;
   if (BuildAndDiagnose) {
     // The current implementation assumes that all variables are captured
@@ -12836,21 +13068,6 @@ bool Sema::tryCaptureVariable(
     if (isVariableAlreadyCapturedInScopeInfo(CSI, Var, Nested, CaptureType, 
                                              DeclRefType)) 
       break;
-    if (getLangOpts().OpenMP) {
-      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
-        // OpenMP private variables should not be captured in outer scope, so
-        // just break here.
-        if (RSI->CapRegionKind == CR_OpenMP) {
-          if (isOpenMPPrivateVar(Var, OpenMPLevel)) {
-            Nested = true;
-            DeclRefType = DeclRefType.getUnqualifiedType();
-            CaptureType = Context.getLValueReferenceType(DeclRefType);
-            break;
-          }
-          ++OpenMPLevel;
-        }
-      }
-    }
     // If we are instantiating a generic lambda call operator body, 
     // we do not want to capture new variables.  What was captured
     // during either a lambdas transformation or initial parsing
@@ -12996,6 +13213,29 @@ bool Sema::tryCaptureVariable(
       } while (!QTy.isNull() && QTy->isVariablyModifiedType());
     }
 
+    if (getLangOpts().OpenMP) {
+      if (auto *RSI = dyn_cast<CapturedRegionScopeInfo>(CSI)) {
+        // OpenMP private variables should not be captured in outer scope, so
+        // just break here. Similarly, global variables that are captured in a
+        // target region should not be captured outside the scope of the region.
+        if (RSI->CapRegionKind == CR_OpenMP) {
+          auto isTargetCap = isOpenMPTargetCapturedVar(Var, OpenMPLevel);
+          // When we detect target captures we are looking from inside the
+          // target region, therefore we need to propagate the capture from the
+          // enclosing region. Therefore, the capture is not initially nested.
+          if (isTargetCap)
+            FunctionScopesIndex--;
+
+          if (isTargetCap || isOpenMPPrivateVar(Var, OpenMPLevel)) {
+            Nested = !isTargetCap;
+            DeclRefType = DeclRefType.getUnqualifiedType();
+            CaptureType = Context.getLValueReferenceType(DeclRefType);
+            break;
+          }
+          ++OpenMPLevel;
+        }
+      }
+    }
     if (CSI->ImpCaptureStyle == CapturingScopeInfo::ImpCap_None && !Explicit) {
       // No capture-default, and this is not an explicit capture 
       // so cannot capture this variable.  
@@ -13152,15 +13392,13 @@ ExprResult Sema::ActOnConstantExpression(ExprResult Res) {
 }
 
 void Sema::CleanupVarDeclMarking() {
-  for (llvm::SmallPtrSetIterator<Expr*> i = MaybeODRUseExprs.begin(),
-                                        e = MaybeODRUseExprs.end();
-       i != e; ++i) {
+  for (Expr *E : MaybeODRUseExprs) {
     VarDecl *Var;
     SourceLocation Loc;
-    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(*i)) {
+    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
       Var = cast<VarDecl>(DRE->getDecl());
       Loc = DRE->getLocation();
-    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(*i)) {
+    } else if (MemberExpr *ME = dyn_cast<MemberExpr>(E)) {
       Var = cast<VarDecl>(ME->getMemberDecl());
       Loc = ME->getMemberLoc();
     } else {
@@ -13217,7 +13455,7 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
     }
 
     if (!isTemplateInstantiation(TSK))
-    	return;
+      return;
 
     // Instantiate, but do not mark as odr-used, variable templates.
     MarkODRUsed = false;
@@ -13316,7 +13554,8 @@ static void MarkExprReferenced(Sema &SemaRef, SourceLocation Loc,
   if (!MD)
     return;
   // Only attempt to devirtualize if this is truly a virtual call.
-  bool IsVirtualCall = MD->isVirtual() && !ME->hasQualifier();
+  bool IsVirtualCall = MD->isVirtual() &&
+                          ME->performsVirtualDispatch(SemaRef.getLangOpts());
   if (!IsVirtualCall)
     return;
   const Expr *Base = ME->getBase();
@@ -13350,7 +13589,7 @@ void Sema::MarkMemberReferenced(MemberExpr *E) {
   //   expression, is odr-used, unless it is a pure virtual function and its
   //   name is not explicitly qualified.
   bool OdrUse = true;
-  if (!E->hasQualifier()) {
+  if (E->performsVirtualDispatch(getLangOpts())) {
     if (CXXMethodDecl *Method = dyn_cast<CXXMethodDecl>(E->getMemberDecl()))
       if (Method->isPure())
         OdrUse = false;
@@ -13633,7 +13872,7 @@ void Sema::DiagnoseAssignmentAsCondition(Expr *E) {
   Diag(Loc, diagnostic) << E->getSourceRange();
 
   SourceLocation Open = E->getLocStart();
-  SourceLocation Close = PP.getLocForEndOfToken(E->getSourceRange().getEnd());
+  SourceLocation Close = getLocForEndOfToken(E->getSourceRange().getEnd());
   Diag(Loc, diag::note_condition_assign_silence)
         << FixItHint::CreateInsertion(Open, "(")
         << FixItHint::CreateInsertion(Close, ")");
@@ -14309,6 +14548,11 @@ ExprResult Sema::CheckPlaceholderExpr(Expr *E) {
     return ExprError();
   }
 
+  // Expressions of unknown type.
+  case BuiltinType::OMPArraySection:
+    Diag(E->getLocStart(), diag::err_omp_array_section_use);
+    return ExprError();
+
   // Everything else should be impossible.
 #define BUILTIN_TYPE(Id, SingletonId) \
   case BuiltinType::Id:
diff --git a/lib/Sema/SemaExprCXX.cpp b/lib/Sema/SemaExprCXX.cpp
index 9ad5aa59b68a..2ad595f3a814 100644
--- a/lib/Sema/SemaExprCXX.cpp
+++ b/lib/Sema/SemaExprCXX.cpp
@@ -1031,6 +1031,11 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
     return BuildCXXFunctionalCastExpr(TInfo, LParenLoc, Arg, RParenLoc);
   }
 
+  // C++14 [expr.type.conv]p2: The expression T(), where T is a
+  //   simple-type-specifier or typename-specifier for a non-array complete
+  //   object type or the (possibly cv-qualified) void type, creates a prvalue
+  //   of the specified type, whose value is that produced by value-initializing
+  //   an object of type T.
   QualType ElemTy = Ty;
   if (Ty->isArrayType()) {
     if (!ListInitialization)
@@ -1039,6 +1044,10 @@ Sema::BuildCXXTypeConstructExpr(TypeSourceInfo *TInfo,
     ElemTy = Context.getBaseElementType(Ty);
   }
 
+  if (!ListInitialization && Ty->isFunctionType())
+    return ExprError(Diag(TyBeginLoc, diag::err_value_init_for_function_type)
+                     << FullRange);
+
   if (!Ty->isVoidType() &&
       RequireCompleteType(TyBeginLoc, ElemTy,
                           diag::err_invalid_incomplete_type_use, FullRange))
@@ -2256,6 +2265,9 @@ FunctionDecl *Sema::FindUsualDeallocationFunction(SourceLocation StartLoc,
            "found an unexpected usual deallocation function");
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+    EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
+
   assert(Matches.size() == 1 &&
          "unexpectedly have multiple usual deallocation functions");
   return Matches.front();
@@ -2287,6 +2299,9 @@ bool Sema::FindDeallocationFunction(SourceLocation StartLoc, CXXRecordDecl *RD,
       Matches.push_back(F.getPair());
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads)
+    EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(CurContext), Matches);
+
   // There's exactly one suitable operator;  pick it.
   if (Matches.size() == 1) {
     Operator = cast<CXXMethodDecl>(Matches[0]->getUnderlyingDecl());
@@ -2702,6 +2717,8 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
       return ExprError(Diag(StartLoc, diag::err_delete_operand)
         << Type << Ex.get()->getSourceRange());
     } else if (!Pointee->isDependentType()) {
+      // FIXME: This can result in errors if the definition was imported from a
+      // module but is hidden.
       if (!RequireCompleteType(StartLoc, Pointee,
                                diag::warn_delete_incomplete, Ex.get())) {
         if (const RecordType *RT = PointeeElem->getAs<RecordType>())
@@ -2712,7 +2729,7 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
     if (Pointee->isArrayType() && !ArrayForm) {
       Diag(StartLoc, diag::warn_delete_array_type)
           << Type << Ex.get()->getSourceRange()
-          << FixItHint::CreateInsertion(PP.getLocForEndOfToken(StartLoc), "[]");
+          << FixItHint::CreateInsertion(getLocForEndOfToken(StartLoc), "[]");
       ArrayForm = true;
     }
 
@@ -2777,7 +2794,7 @@ Sema::ActOnCXXDelete(SourceLocation StartLoc, bool UseGlobal,
     if (!OperatorDelete)
       // Look for a global declaration.
       OperatorDelete = FindUsualDeallocationFunction(
-          StartLoc, !RequireCompleteType(StartLoc, Pointee, 0) &&
+          StartLoc, isCompleteType(StartLoc, Pointee) &&
                     (!ArrayForm || UsualArrayDeleteWantsSize ||
                      Pointee.isDestructedType()),
           DeleteName);
@@ -3103,6 +3120,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     ToType = ToAtomic->getValueType();
   }
 
+  QualType InitialFromType = FromType;
   // Perform the first implicit conversion.
   switch (SCS.First) {
   case ICK_Identity:
@@ -3293,8 +3311,8 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
     // We may not have been able to figure out what this member pointer resolved
     // to up until this exact point.  Attempt to lock-in it's inheritance model.
     if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
-      RequireCompleteType(From->getExprLoc(), From->getType(), 0);
-      RequireCompleteType(From->getExprLoc(), ToType, 0);
+      (void)isCompleteType(From->getExprLoc(), From->getType());
+      (void)isCompleteType(From->getExprLoc(), ToType);
     }
 
     From = ImpCastExprToType(From, ToType, Kind, VK_RValue, &BasePath, CCK)
@@ -3432,6 +3450,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
   case ICK_Function_To_Pointer:
   case ICK_Qualification:
   case ICK_Num_Conversion_Kinds:
+  case ICK_C_Only_Conversion:
     llvm_unreachable("Improper second standard conversion");
   }
 
@@ -3472,6 +3491,12 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
                              VK_RValue, nullptr, CCK).get();
   }
 
+  // If this conversion sequence succeeded and involved implicitly converting a
+  // _Nullable type to a _Nonnull one, complain.
+  if (CCK == CCK_ImplicitConversion)
+    diagnoseNullableToNonnullConversion(ToType, InitialFromType,
+                                        From->getLocStart());
+
   return From;
 }
 
@@ -3534,27 +3559,43 @@ static bool CheckUnaryTypeTraitTypeCompleteness(Sema &S, TypeTrait UTT,
   case UTT_IsVolatile:
   case UTT_IsSigned:
   case UTT_IsUnsigned:
+
+  // This type trait always returns false, checking the type is moot.
+  case UTT_IsInterfaceClass:
     return true;
 
-    // C++0x [meta.unary.prop] Table 49 requires the following traits to be
-    // applied to a complete type.
+  // C++14 [meta.unary.prop]:
+  //   If T is a non-union class type, T shall be a complete type.
+  case UTT_IsEmpty:
+  case UTT_IsPolymorphic:
+  case UTT_IsAbstract:
+    if (const auto *RD = ArgTy->getAsCXXRecordDecl())
+      if (!RD->isUnion())
+        return !S.RequireCompleteType(
+            Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr);
+    return true;
+
+  // C++14 [meta.unary.prop]:
+  //   If T is a class type, T shall be a complete type.
+  case UTT_IsFinal:
+  case UTT_IsSealed:
+    if (ArgTy->getAsCXXRecordDecl())
+      return !S.RequireCompleteType(
+          Loc, ArgTy, diag::err_incomplete_type_used_in_type_trait_expr);
+    return true;
+
+  // C++0x [meta.unary.prop] Table 49 requires the following traits to be
+  // applied to a complete type.
   case UTT_IsTrivial:
   case UTT_IsTriviallyCopyable:
   case UTT_IsStandardLayout:
   case UTT_IsPOD:
   case UTT_IsLiteral:
-  case UTT_IsEmpty:
-  case UTT_IsPolymorphic:
-  case UTT_IsAbstract:
-  case UTT_IsInterfaceClass:
+
   case UTT_IsDestructible:
   case UTT_IsNothrowDestructible:
     // Fall-through
 
-  // These traits require a complete type.
-  case UTT_IsFinal:
-  case UTT_IsSealed:
-
     // These trait expressions are designed to help implement predicates in
     // [meta.unary.prop] despite not being named the same. They are specified
     // by both GCC and the Embarcadero C++ compiler, and require the complete
@@ -3698,39 +3739,36 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
   case UTT_IsVolatile:
     return T.isVolatileQualified();
   case UTT_IsTrivial:
-    return T.isTrivialType(Self.Context);
+    return T.isTrivialType(C);
   case UTT_IsTriviallyCopyable:
-    return T.isTriviallyCopyableType(Self.Context);
+    return T.isTriviallyCopyableType(C);
   case UTT_IsStandardLayout:
     return T->isStandardLayoutType();
   case UTT_IsPOD:
-    return T.isPODType(Self.Context);
+    return T.isPODType(C);
   case UTT_IsLiteral:
-    return T->isLiteralType(Self.Context);
+    return T->isLiteralType(C);
   case UTT_IsEmpty:
     if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
       return !RD->isUnion() && RD->isEmpty();
     return false;
   case UTT_IsPolymorphic:
     if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
-      return RD->isPolymorphic();
+      return !RD->isUnion() && RD->isPolymorphic();
     return false;
   case UTT_IsAbstract:
     if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
-      return RD->isAbstract();
+      return !RD->isUnion() && RD->isAbstract();
     return false;
+  // __is_interface_class only returns true when CL is invoked in /CLR mode and
+  // even then only when it is used with the 'interface struct ...' syntax
+  // Clang doesn't support /CLR which makes this type trait moot.
   case UTT_IsInterfaceClass:
-    if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
-      return RD->isInterface();
     return false;
   case UTT_IsFinal:
-    if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
-      return RD->hasAttr<FinalAttr>();
-    return false;
   case UTT_IsSealed:
     if (const CXXRecordDecl *RD = T->getAsCXXRecordDecl())
-      if (FinalAttr *FA = RD->getAttr<FinalAttr>())
-        return FA->isSpelledAsSealed();
+      return RD->hasAttr<FinalAttr>();
     return false;
   case UTT_IsSigned:
     return T->isSignedIntegerType();
@@ -3757,7 +3795,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //   If __is_pod (type) is true then the trait is true, else if type is
     //   a cv class or union type (or array thereof) with a trivial default
     //   constructor ([class.ctor]) then the trait is true, else it is false.
-    if (T.isPODType(Self.Context))
+    if (T.isPODType(C))
       return true;
     if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
       return RD->hasTrivialDefaultConstructor() &&
@@ -3767,7 +3805,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //  This trait is implemented by MSVC 2012 and needed to parse the
     //  standard library headers. Specifically this is used as the logic
     //  behind std::is_trivially_move_constructible (20.9.4.3).
-    if (T.isPODType(Self.Context))
+    if (T.isPODType(C))
       return true;
     if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
       return RD->hasTrivialMoveConstructor() && !RD->hasNonTrivialMoveConstructor();
@@ -3778,7 +3816,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //   the trait is true, else if type is a cv class or union type
     //   with a trivial copy constructor ([class.copy]) then the trait
     //   is true, else it is false.
-    if (T.isPODType(Self.Context) || T->isReferenceType())
+    if (T.isPODType(C) || T->isReferenceType())
       return true;
     if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
       return RD->hasTrivialCopyConstructor() &&
@@ -3788,7 +3826,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //  This trait is implemented by MSVC 2012 and needed to parse the
     //  standard library headers. Specifically it is used as the logic
     //  behind std::is_trivially_move_assignable (20.9.4.3)
-    if (T.isPODType(Self.Context))
+    if (T.isPODType(C))
       return true;
     if (CXXRecordDecl *RD = C.getBaseElementType(T)->getAsCXXRecordDecl())
       return RD->hasTrivialMoveAssignment() && !RD->hasNonTrivialMoveAssignment();
@@ -3808,7 +3846,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
 
     if (T.isConstQualified())
       return false;
-    if (T.isPODType(Self.Context))
+    if (T.isPODType(C))
       return true;
     if (CXXRecordDecl *RD = T->getAsCXXRecordDecl())
       return RD->hasTrivialCopyAssignment() &&
@@ -3816,8 +3854,47 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     return false;
   case UTT_IsDestructible:
   case UTT_IsNothrowDestructible:
-    // FIXME: Implement UTT_IsDestructible and UTT_IsNothrowDestructible.
-    // For now, let's fall through.
+    // C++14 [meta.unary.prop]:
+    //   For reference types, is_destructible<T>::value is true.
+    if (T->isReferenceType())
+      return true;
+
+    // Objective-C++ ARC: autorelease types don't require destruction.
+    if (T->isObjCLifetimeType() &&
+        T.getObjCLifetime() == Qualifiers::OCL_Autoreleasing)
+      return true;
+
+    // C++14 [meta.unary.prop]:
+    //   For incomplete types and function types, is_destructible<T>::value is
+    //   false.
+    if (T->isIncompleteType() || T->isFunctionType())
+      return false;
+
+    // C++14 [meta.unary.prop]:
+    //   For object types and given U equal to remove_all_extents_t<T>, if the
+    //   expression std::declval<U&>().~U() is well-formed when treated as an
+    //   unevaluated operand (Clause 5), then is_destructible<T>::value is true
+    if (auto *RD = C.getBaseElementType(T)->getAsCXXRecordDecl()) {
+      CXXDestructorDecl *Destructor = Self.LookupDestructor(RD);
+      if (!Destructor)
+        return false;
+      //  C++14 [dcl.fct.def.delete]p2:
+      //    A program that refers to a deleted function implicitly or
+      //    explicitly, other than to declare it, is ill-formed.
+      if (Destructor->isDeleted())
+        return false;
+      if (C.getLangOpts().AccessControl && Destructor->getAccess() != AS_public)
+        return false;
+      if (UTT == UTT_IsNothrowDestructible) {
+        const FunctionProtoType *CPT =
+            Destructor->getType()->getAs<FunctionProtoType>();
+        CPT = Self.ResolveExceptionSpec(KeyLoc, CPT);
+        if (!CPT || !CPT->isNothrow(C))
+          return false;
+      }
+    }
+    return true;
+
   case UTT_HasTrivialDestructor:
     // http://gcc.gnu.org/onlinedocs/gcc/Type-Traits.html
     //   If __is_pod (type) is true or type is a reference type
@@ -3825,7 +3902,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //   type (or array thereof) with a trivial destructor
     //   ([class.dtor]) then the trait is true, else it is
     //   false.
-    if (T.isPODType(Self.Context) || T->isReferenceType())
+    if (T.isPODType(C) || T->isReferenceType())
       return true;
       
     // Objective-C++ ARC: autorelease types don't require destruction.
@@ -3849,7 +3926,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
       return false;
     if (T->isReferenceType())
       return false;
-    if (T.isPODType(Self.Context) || T->isObjCLifetimeType())
+    if (T.isPODType(C) || T->isObjCLifetimeType())
       return true;
 
     if (const RecordType *RT = T->getAs<RecordType>())
@@ -3862,7 +3939,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
     //  This trait is implemented by MSVC 2012 and needed to parse the
     //  standard library headers. Specifically this is used as the logic
     //  behind std::is_nothrow_move_assignable (20.9.4.3).
-    if (T.isPODType(Self.Context))
+    if (T.isPODType(C))
       return true;
 
     if (const RecordType *RT = C.getBaseElementType(T)->getAs<RecordType>())
@@ -3886,15 +3963,13 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
 
       bool FoundConstructor = false;
       unsigned FoundTQs;
-      DeclContext::lookup_result R = Self.LookupConstructors(RD);
-      for (DeclContext::lookup_iterator Con = R.begin(),
-           ConEnd = R.end(); Con != ConEnd; ++Con) {
+      for (const auto *ND : Self.LookupConstructors(RD)) {
         // A template constructor is never a copy constructor.
         // FIXME: However, it may actually be selected at the actual overload
         // resolution point.
-        if (isa<FunctionTemplateDecl>(*Con))
+        if (isa<FunctionTemplateDecl>(ND))
           continue;
-        CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(*Con);
+        const CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(ND);
         if (Constructor->isCopyConstructor(FoundTQs)) {
           FoundConstructor = true;
           const FunctionProtoType *CPT
@@ -3904,7 +3979,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
             return false;
           // TODO: check whether evaluating default arguments can throw.
           // For now, we'll be conservative and assume that they can throw.
-          if (!CPT->isNothrow(Self.Context) || CPT->getNumParams() > 1)
+          if (!CPT->isNothrow(C) || CPT->getNumParams() > 1)
             return false;
         }
       }
@@ -3926,13 +4001,11 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
         return true;
 
       bool FoundConstructor = false;
-      DeclContext::lookup_result R = Self.LookupConstructors(RD);
-      for (DeclContext::lookup_iterator Con = R.begin(),
-           ConEnd = R.end(); Con != ConEnd; ++Con) {
+      for (const auto *ND : Self.LookupConstructors(RD)) {
         // FIXME: In C++0x, a constructor template can be a default constructor.
-        if (isa<FunctionTemplateDecl>(*Con))
+        if (isa<FunctionTemplateDecl>(ND))
           continue;
-        CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(*Con);
+        const CXXConstructorDecl *Constructor = cast<CXXConstructorDecl>(ND);
         if (Constructor->isDefaultConstructor()) {
           FoundConstructor = true;
           const FunctionProtoType *CPT
@@ -3942,7 +4015,7 @@ static bool EvaluateUnaryTypeTrait(Sema &Self, TypeTrait UTT,
             return false;
           // FIXME: check whether evaluating default arguments can throw.
           // For now, we'll be conservative and assume that they can throw.
-          if (!CPT->isNothrow(Self.Context) || CPT->getNumParams() > 0)
+          if (!CPT->isNothrow(C) || CPT->getNumParams() > 0)
             return false;
         }
       }
@@ -4023,8 +4096,8 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
     // Precondition: T and all types in the parameter pack Args shall be
     // complete types, (possibly cv-qualified) void, or arrays of
     // unknown bound.
-    for (unsigned I = 0, N = Args.size(); I != N; ++I) {
-      QualType ArgTy = Args[I]->getType();
+    for (const auto *TSI : Args) {
+      QualType ArgTy = TSI->getType();
       if (ArgTy->isVoidType() || ArgTy->isIncompleteArrayType())
         continue;
 
@@ -4033,12 +4106,13 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
         return false;
     }
 
-    // Make sure the first argument is a complete type.
-    if (Args[0]->getType()->isIncompleteType())
+    // Make sure the first argument is not incomplete nor a function type.
+    QualType T = Args[0]->getType();
+    if (T->isIncompleteType() || T->isFunctionType())
       return false;
 
     // Make sure the first argument is not an abstract type.
-    CXXRecordDecl *RD = Args[0]->getType()->getAsCXXRecordDecl();
+    CXXRecordDecl *RD = T->getAsCXXRecordDecl();
     if (RD && RD->isAbstract())
       return false;
 
@@ -4046,13 +4120,13 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
     SmallVector<Expr *, 2> ArgExprs;
     ArgExprs.reserve(Args.size() - 1);
     for (unsigned I = 1, N = Args.size(); I != N; ++I) {
-      QualType T = Args[I]->getType();
-      if (T->isObjectType() || T->isFunctionType())
-        T = S.Context.getRValueReferenceType(T);
+      QualType ArgTy = Args[I]->getType();
+      if (ArgTy->isObjectType() || ArgTy->isFunctionType())
+        ArgTy = S.Context.getRValueReferenceType(ArgTy);
       OpaqueArgExprs.push_back(
-        OpaqueValueExpr(Args[I]->getTypeLoc().getLocStart(),
-                        T.getNonLValueExprType(S.Context),
-                        Expr::getValueKindForType(T)));
+          OpaqueValueExpr(Args[I]->getTypeLoc().getLocStart(),
+                          ArgTy.getNonLValueExprType(S.Context),
+                          Expr::getValueKindForType(ArgTy)));
     }
     for (Expr &E : OpaqueArgExprs)
       ArgExprs.push_back(&E);
@@ -4083,7 +4157,7 @@ static bool evaluateTypeTrait(Sema &S, TypeTrait Kind, SourceLocation KWLoc,
       // Under Objective-C ARC, if the destination has non-trivial Objective-C
       // lifetime, this is a non-trivial construction.
       if (S.getLangOpts().ObjCAutoRefCount &&
-          hasNontrivialObjCLifetime(Args[0]->getType().getNonReferenceType()))
+          hasNontrivialObjCLifetime(T.getNonReferenceType()))
         return false;
 
       // The initialization succeeded; now make sure there are no non-trivial
@@ -4219,8 +4293,7 @@ static bool EvaluateBinaryTypeTrait(Sema &Self, TypeTrait BTT, QualType LhsT,
       return LhsT->isVoidType();
 
     // A function definition requires a complete, non-abstract return type.
-    if (Self.RequireCompleteType(KeyLoc, RhsT, 0) ||
-        Self.RequireNonAbstractType(KeyLoc, RhsT, 0))
+    if (!Self.isCompleteType(KeyLoc, RhsT) || Self.isAbstractType(KeyLoc, RhsT))
       return false;
 
     // Compute the result of add_rvalue_reference.
@@ -4506,7 +4579,7 @@ QualType Sema::CheckPointerToMemberOperands(ExprResult &LHS, ExprResult &RHS,
       return QualType();
     }
 
-    if (!IsDerivedFrom(LHSType, Class)) {
+    if (!IsDerivedFrom(Loc, LHSType, Class)) {
       Diag(Loc, diag::err_bad_memptr_lhs) << OpSpelling
         << (int)isIndirect << LHS.get()->getType();
       return QualType();
@@ -4634,9 +4707,9 @@ static bool TryClassUnification(Sema &Self, Expr *From, Expr *To,
   const RecordType *FRec = FTy->getAs<RecordType>();
   const RecordType *TRec = TTy->getAs<RecordType>();
   bool FDerivedFromT = FRec && TRec && FRec != TRec &&
-                       Self.IsDerivedFrom(FTy, TTy);
-  if (FRec && TRec &&
-      (FRec == TRec || FDerivedFromT || Self.IsDerivedFrom(TTy, FTy))) {
+                       Self.IsDerivedFrom(QuestionLoc, FTy, TTy);
+  if (FRec && TRec && (FRec == TRec || FDerivedFromT ||
+                       Self.IsDerivedFrom(QuestionLoc, TTy, FTy))) {
     //         E1 can be converted to match E2 if the class of T2 is the
     //         same type as, or a base class of, the class of T1, and
     //         [cv2 > cv1].
@@ -5671,9 +5744,14 @@ ExprResult Sema::ActOnStartCXXMemberReference(Scope *S, Expr *Base,
   //
   // This also indicates that we could be parsing a pseudo-destructor-name.
   // Note that Objective-C class and object types can be pseudo-destructor
-  // expressions or normal member (ivar or property) access expressions.
+  // expressions or normal member (ivar or property) access expressions, and
+  // it's legal for the type to be incomplete if this is a pseudo-destructor
+  // call.  We'll do more incomplete-type checks later in the lookup process,
+  // so just skip this check for ObjC types.
   if (BaseType->isObjCObjectOrInterfaceType()) {
+    ObjectType = ParsedType::make(BaseType);
     MayBePseudoDestructor = true;
+    return Base;
   } else if (!BaseType->isRecordType()) {
     ObjectType = ParsedType();
     MayBePseudoDestructor = true;
@@ -6331,7 +6409,7 @@ static ExprResult attemptRecovery(Sema &SemaRef,
   else if (SS && !TC.WillReplaceSpecifier())
     NewSS = *SS;
 
-  if (auto *ND = TC.getCorrectionDecl()) {
+  if (auto *ND = TC.getFoundDecl()) {
     R.setLookupName(ND->getDeclName());
     R.addDecl(ND);
     if (ND->isCXXClassMember()) {
@@ -6364,7 +6442,7 @@ static ExprResult attemptRecovery(Sema &SemaRef,
       if (MightBeImplicitMember)
         return SemaRef.BuildPossibleImplicitMemberExpr(
             NewSS, /*TemplateKWLoc*/ SourceLocation(), R,
-            /*TemplateArgs*/ nullptr);
+            /*TemplateArgs*/ nullptr, /*S*/ nullptr);
     } else if (auto *Ivar = dyn_cast<ObjCIvarDecl>(ND)) {
       return SemaRef.LookupInObjCMethod(R, Consumer.getScope(),
                                         Ivar->getIdentifier());
@@ -6452,9 +6530,9 @@ class TransformTypos : public TreeTransform<TransformTypos> {
     if (!E)
       return nullptr;
     if (auto *DRE = dyn_cast<DeclRefExpr>(E))
-      return DRE->getDecl();
+      return DRE->getFoundDecl();
     if (auto *ME = dyn_cast<MemberExpr>(E))
-      return ME->getMemberDecl();
+      return ME->getFoundDecl();
     // FIXME: Add any other expr types that could be be seen by the delayed typo
     // correction TreeTransform for which the corresponding TypoCorrection could
     // contain multiple decls.
@@ -6494,6 +6572,8 @@ public:
 
   ExprResult TransformLambdaExpr(LambdaExpr *E) { return Owned(E); }
 
+  ExprResult TransformBlockExpr(BlockExpr *E) { return Owned(E); }
+
   ExprResult Transform(Expr *E) {
     ExprResult Res;
     while (true) {
@@ -6557,7 +6637,7 @@ public:
     // For the first TypoExpr and an uncached TypoExpr, find the next likely
     // typo correction and return it.
     while (TypoCorrection TC = State.Consumer->getNextCorrection()) {
-      if (InitDecl && TC.getCorrectionDecl() == InitDecl)
+      if (InitDecl && TC.getFoundDecl() == InitDecl)
         continue;
       ExprResult NE = State.RecoveryHandler ?
           State.RecoveryHandler(SemaRef, E, TC) :
diff --git a/lib/Sema/SemaExprMember.cpp b/lib/Sema/SemaExprMember.cpp
index a9f1919e18a8..9c345f8a69a3 100644
--- a/lib/Sema/SemaExprMember.cpp
+++ b/lib/Sema/SemaExprMember.cpp
@@ -27,18 +27,15 @@ using namespace clang;
 using namespace sema;
 
 typedef llvm::SmallPtrSet<const CXXRecordDecl*, 4> BaseSet;
-static bool BaseIsNotInSet(const CXXRecordDecl *Base, void *BasesPtr) {
-  const BaseSet &Bases = *reinterpret_cast<const BaseSet*>(BasesPtr);
-  return !Bases.count(Base->getCanonicalDecl());
-}
 
 /// Determines if the given class is provably not derived from all of
 /// the prospective base classes.
 static bool isProvablyNotDerivedFrom(Sema &SemaRef, CXXRecordDecl *Record,
                                      const BaseSet &Bases) {
-  void *BasesPtr = const_cast<void*>(reinterpret_cast<const void*>(&Bases));
-  return BaseIsNotInSet(Record, BasesPtr) &&
-         Record->forallBases(BaseIsNotInSet, BasesPtr);
+  auto BaseIsNotInSet = [&Bases](const CXXRecordDecl *Base) {
+    return !Bases.count(Base->getCanonicalDecl());
+  };
+  return BaseIsNotInSet(Record) && Record->forallBases(BaseIsNotInSet);
 }
 
 enum IMAKind {
@@ -105,8 +102,9 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
   bool hasNonInstance = false;
   bool isField = false;
   BaseSet Classes;
-  for (LookupResult::iterator I = R.begin(), E = R.end(); I != E; ++I) {
-    NamedDecl *D = *I;
+  for (NamedDecl *D : R) {
+    // Look through any using decls.
+    D = D->getUnderlyingDecl();
 
     if (D->isCXXInstanceMember()) {
       isField |= isa<FieldDecl>(D) || isa<MSPropertyDecl>(D) ||
@@ -114,8 +112,7 @@ static IMAKind ClassifyImplicitMemberAccess(Sema &SemaRef,
 
       CXXRecordDecl *R = cast<CXXRecordDecl>(D->getDeclContext());
       Classes.insert(R->getCanonicalDecl());
-    }
-    else
+    } else
       hasNonInstance = true;
   }
 
@@ -237,15 +234,17 @@ ExprResult
 Sema::BuildPossibleImplicitMemberExpr(const CXXScopeSpec &SS,
                                       SourceLocation TemplateKWLoc,
                                       LookupResult &R,
-                                const TemplateArgumentListInfo *TemplateArgs) {
+                                const TemplateArgumentListInfo *TemplateArgs,
+                                      const Scope *S) {
   switch (ClassifyImplicitMemberAccess(*this, R)) {
   case IMA_Instance:
-    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, true);
+    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, true, S);
 
   case IMA_Mixed:
   case IMA_Mixed_Unrelated:
   case IMA_Unresolved:
-    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, false);
+    return BuildImplicitMemberExpr(SS, TemplateKWLoc, R, TemplateArgs, false,
+                                   S);
 
   case IMA_Field_Uneval_Context:
     Diag(R.getNameLoc(), diag::warn_cxx98_compat_non_static_member_use)
@@ -633,6 +632,16 @@ static bool LookupMemberExprInRecord(Sema &SemaRef, LookupResult &R,
 
   DeclarationName Typo = R.getLookupName();
   SourceLocation TypoLoc = R.getNameLoc();
+
+  struct QueryState {
+    Sema &SemaRef;
+    DeclarationNameInfo NameInfo;
+    Sema::LookupNameKind LookupKind;
+    Sema::RedeclarationKind Redecl;
+  };
+  QueryState Q = {R.getSema(), R.getLookupNameInfo(), R.getLookupKind(),
+                  R.isForRedeclaration() ? Sema::ForRedeclaration
+                                         : Sema::NotForRedeclaration};
   TE = SemaRef.CorrectTypoDelayed(
       R.getLookupNameInfo(), R.getLookupKind(), nullptr, &SS,
       llvm::make_unique<RecordMemberExprValidatorCCC>(RTy),
@@ -651,6 +660,7 @@ static bool LookupMemberExprInRecord(Sema &SemaRef, LookupResult &R,
         }
       },
       [=](Sema &SemaRef, TypoExpr *TE, TypoCorrection TC) mutable {
+        LookupResult R(Q.SemaRef, Q.NameInfo, Q.LookupKind, Q.Redecl);
         R.clear(); // Ensure there's no decls lingering in the shared state.
         R.suppressDiagnostics();
         R.setLookupName(TC.getCorrection());
@@ -659,7 +669,7 @@ static bool LookupMemberExprInRecord(Sema &SemaRef, LookupResult &R,
         R.resolveKind();
         return SemaRef.BuildMemberReferenceExpr(
             BaseExpr, BaseExpr->getType(), OpLoc, IsArrow, SS, SourceLocation(),
-            nullptr, R, nullptr);
+            nullptr, R, nullptr, nullptr);
       },
       Sema::CTK_ErrorRecovery, DC);
 
@@ -679,6 +689,7 @@ Sema::BuildMemberReferenceExpr(Expr *Base, QualType BaseType,
                                NamedDecl *FirstQualifierInScope,
                                const DeclarationNameInfo &NameInfo,
                                const TemplateArgumentListInfo *TemplateArgs,
+                               const Scope *S,
                                ActOnMemberAccessExtraArgs *ExtraArgs) {
   if (BaseType->isDependentType() ||
       (SS.isSet() && isDependentScopeSpecifier(SS)))
@@ -725,7 +736,7 @@ Sema::BuildMemberReferenceExpr(Expr *Base, QualType BaseType,
 
   return BuildMemberReferenceExpr(Base, BaseType,
                                   OpLoc, IsArrow, SS, TemplateKWLoc,
-                                  FirstQualifierInScope, R, TemplateArgs,
+                                  FirstQualifierInScope, R, TemplateArgs, S,
                                   false, ExtraArgs);
 }
 
@@ -877,6 +888,18 @@ static MemberExpr *BuildMemberExpr(
   return E;
 }
 
+/// \brief Determine if the given scope is within a function-try-block handler.
+static bool IsInFnTryBlockHandler(const Scope *S) {
+  // Walk the scope stack until finding a FnTryCatchScope, or leave the
+  // function scope. If a FnTryCatchScope is found, check whether the TryScope
+  // flag is set. If it is not, it's a function-try-block handler.
+  for (; S != S->getFnParent(); S = S->getParent()) {
+    if (S->getFlags() & Scope::FnTryCatchScope)
+      return (S->getFlags() & Scope::TryScope) != Scope::TryScope;
+  }
+  return false;
+}
+
 ExprResult
 Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                                SourceLocation OpLoc, bool IsArrow,
@@ -885,6 +908,7 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
                                NamedDecl *FirstQualifierInScope,
                                LookupResult &R,
                                const TemplateArgumentListInfo *TemplateArgs,
+                               const Scope *S,
                                bool SuppressQualifierCheck,
                                ActOnMemberAccessExtraArgs *ExtraArgs) {
   QualType BaseType = BaseExprType;
@@ -948,6 +972,17 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
   if (R.isAmbiguous())
     return ExprError();
 
+  // [except.handle]p10: Referring to any non-static member or base class of an
+  // object in the handler for a function-try-block of a constructor or
+  // destructor for that object results in undefined behavior.
+  const auto *FD = getCurFunctionDecl();
+  if (S && BaseExpr && FD &&
+      (isa<CXXDestructorDecl>(FD) || isa<CXXConstructorDecl>(FD)) &&
+      isa<CXXThisExpr>(BaseExpr->IgnoreImpCasts()) &&
+      IsInFnTryBlockHandler(S))
+    Diag(MemberLoc, diag::warn_cdtor_function_try_handler_mem_expr)
+        << isa<CXXDestructorDecl>(FD);
+
   if (R.empty()) {
     // Rederive where we looked up.
     DeclContext *DC = (SS.isSet()
@@ -1042,16 +1077,8 @@ Sema::BuildMemberReferenceExpr(Expr *BaseExpr, QualType BaseExprType,
     BaseExpr = new (Context) CXXThisExpr(Loc, BaseExprType,/*isImplicit=*/true);
   }
 
-  bool ShouldCheckUse = true;
-  if (CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(MemberDecl)) {
-    // Don't diagnose the use of a virtual member function unless it's
-    // explicitly qualified.
-    if (MD->isVirtual() && !SS.isSet())
-      ShouldCheckUse = false;
-  }
-
   // Check the use of this member.
-  if (ShouldCheckUse && DiagnoseUseOfDecl(MemberDecl, MemberLoc))
+  if (DiagnoseUseOfDecl(MemberDecl, MemberLoc))
     return ExprError();
 
   if (FieldDecl *FD = dyn_cast<FieldDecl>(MemberDecl))
@@ -1645,7 +1672,7 @@ ExprResult Sema::ActOnMemberAccessExpr(Scope *S, Expr *Base,
   ActOnMemberAccessExtraArgs ExtraArgs = {S, Id, ObjCImpDecl};
   return BuildMemberReferenceExpr(Base, Base->getType(), OpLoc, IsArrow, SS,
                                   TemplateKWLoc, FirstQualifierInScope,
-                                  NameInfo, TemplateArgs, &ExtraArgs);
+                                  NameInfo, TemplateArgs, S, &ExtraArgs);
 }
 
 static ExprResult
@@ -1718,7 +1745,7 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS,
                               SourceLocation TemplateKWLoc,
                               LookupResult &R,
                               const TemplateArgumentListInfo *TemplateArgs,
-                              bool IsKnownInstance) {
+                              bool IsKnownInstance, const Scope *S) {
   assert(!R.empty() && !R.isAmbiguous());
   
   SourceLocation loc = R.getNameLoc();
@@ -1743,5 +1770,5 @@ Sema::BuildImplicitMemberExpr(const CXXScopeSpec &SS,
                                   /*IsArrow*/ true,
                                   SS, TemplateKWLoc,
                                   /*FirstQualifierInScope*/ nullptr,
-                                  R, TemplateArgs);
+                                  R, TemplateArgs, S);
 }
diff --git a/lib/Sema/SemaExprObjC.cpp b/lib/Sema/SemaExprObjC.cpp
index 6cd062621951..65f10816924f 100644
--- a/lib/Sema/SemaExprObjC.cpp
+++ b/lib/Sema/SemaExprObjC.cpp
@@ -32,24 +32,21 @@ using namespace sema;
 using llvm::makeArrayRef;
 
 ExprResult Sema::ParseObjCStringLiteral(SourceLocation *AtLocs,
-                                        Expr **strings,
-                                        unsigned NumStrings) {
-  StringLiteral **Strings = reinterpret_cast<StringLiteral**>(strings);
-
+                                        ArrayRef<Expr *> Strings) {
   // Most ObjC strings are formed out of a single piece.  However, we *can*
   // have strings formed out of multiple @ strings with multiple pptokens in
   // each one, e.g. @"foo" "bar" @"baz" "qux"   which need to be turned into one
   // StringLiteral for ObjCStringLiteral to hold onto.
-  StringLiteral *S = Strings[0];
+  StringLiteral *S = cast<StringLiteral>(Strings[0]);
 
   // If we have a multi-part string, merge it all together.
-  if (NumStrings != 1) {
+  if (Strings.size() != 1) {
     // Concatenate objc strings.
     SmallString<128> StrBuf;
     SmallVector<SourceLocation, 8> StrLocs;
 
-    for (unsigned i = 0; i != NumStrings; ++i) {
-      S = Strings[i];
+    for (Expr *E : Strings) {
+      S = cast<StringLiteral>(E);
 
       // ObjC strings can't be wide or UTF.
       if (!S->isAscii()) {
@@ -168,6 +165,77 @@ static bool validateBoxingMethod(Sema &S, SourceLocation Loc,
   return true;
 }
 
+/// \brief Maps ObjCLiteralKind to NSClassIdKindKind
+static NSAPI::NSClassIdKindKind ClassKindFromLiteralKind(
+                                            Sema::ObjCLiteralKind LiteralKind) {
+  switch (LiteralKind) {
+    case Sema::LK_Array:
+      return NSAPI::ClassId_NSArray;
+    case Sema::LK_Dictionary:
+      return NSAPI::ClassId_NSDictionary;
+    case Sema::LK_Numeric:
+      return NSAPI::ClassId_NSNumber;
+    case Sema::LK_String:
+      return NSAPI::ClassId_NSString;
+    case Sema::LK_Boxed:
+      return NSAPI::ClassId_NSValue;
+
+    // there is no corresponding matching
+    // between LK_None/LK_Block and NSClassIdKindKind
+    case Sema::LK_Block:
+    case Sema::LK_None:
+      break;
+  }
+  llvm_unreachable("LiteralKind can't be converted into a ClassKind");
+}
+
+/// \brief Validates ObjCInterfaceDecl availability.
+/// ObjCInterfaceDecl, used to create ObjC literals, should be defined
+/// if clang not in a debugger mode.
+static bool ValidateObjCLiteralInterfaceDecl(Sema &S, ObjCInterfaceDecl *Decl,
+                                            SourceLocation Loc,
+                                            Sema::ObjCLiteralKind LiteralKind) {
+  if (!Decl) {
+    NSAPI::NSClassIdKindKind Kind = ClassKindFromLiteralKind(LiteralKind);
+    IdentifierInfo *II = S.NSAPIObj->getNSClassId(Kind);
+    S.Diag(Loc, diag::err_undeclared_objc_literal_class)
+      << II->getName() << LiteralKind;
+    return false;
+  } else if (!Decl->hasDefinition() && !S.getLangOpts().DebuggerObjCLiteral) {
+    S.Diag(Loc, diag::err_undeclared_objc_literal_class)
+      << Decl->getName() << LiteralKind;
+    S.Diag(Decl->getLocation(), diag::note_forward_class);
+    return false;
+  }
+
+  return true;
+}
+
+/// \brief Looks up ObjCInterfaceDecl of a given NSClassIdKindKind.
+/// Used to create ObjC literals, such as NSDictionary (@{}),
+/// NSArray (@[]) and Boxed Expressions (@())
+static ObjCInterfaceDecl *LookupObjCInterfaceDeclForLiteral(Sema &S,
+                                            SourceLocation Loc,
+                                            Sema::ObjCLiteralKind LiteralKind) {
+  NSAPI::NSClassIdKindKind ClassKind = ClassKindFromLiteralKind(LiteralKind);
+  IdentifierInfo *II = S.NSAPIObj->getNSClassId(ClassKind);
+  NamedDecl *IF = S.LookupSingleName(S.TUScope, II, Loc,
+                                     Sema::LookupOrdinaryName);
+  ObjCInterfaceDecl *ID = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
+  if (!ID && S.getLangOpts().DebuggerObjCLiteral) {
+    ASTContext &Context = S.Context;
+    TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
+    ID = ObjCInterfaceDecl::Create (Context, TU, SourceLocation(), II,
+                                    nullptr, nullptr, SourceLocation());
+  }
+
+  if (!ValidateObjCLiteralInterfaceDecl(S, ID, Loc, LiteralKind)) {
+    ID = nullptr;
+  }
+
+  return ID;
+}
+
 /// \brief Retrieve the NSNumber factory method that should be used to create
 /// an Objective-C literal for the given type.
 static ObjCMethodDecl *getNSNumberFactoryMethod(Sema &S, SourceLocation Loc,
@@ -197,26 +265,9 @@ static ObjCMethodDecl *getNSNumberFactoryMethod(Sema &S, SourceLocation Loc,
   // Look up the NSNumber class, if we haven't done so already. It's cached
   // in the Sema instance.
   if (!S.NSNumberDecl) {
-    IdentifierInfo *NSNumberId =
-      S.NSAPIObj->getNSClassId(NSAPI::ClassId_NSNumber);
-    NamedDecl *IF = S.LookupSingleName(S.TUScope, NSNumberId,
-                                       Loc, Sema::LookupOrdinaryName);
-    S.NSNumberDecl = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
+    S.NSNumberDecl = LookupObjCInterfaceDeclForLiteral(S, Loc,
+                                                       Sema::LK_Numeric);
     if (!S.NSNumberDecl) {
-      if (S.getLangOpts().DebuggerObjCLiteral) {
-        // Create a stub definition of NSNumber.
-        S.NSNumberDecl = ObjCInterfaceDecl::Create(CX,
-                                                   CX.getTranslationUnitDecl(),
-                                                   SourceLocation(), NSNumberId,
-                                                   nullptr, nullptr,
-                                                   SourceLocation());
-      } else {
-        // Otherwise, require a declaration of NSNumber.
-        S.Diag(Loc, diag::err_undeclared_nsnumber);
-        return nullptr;
-      }
-    } else if (!S.NSNumberDecl->hasDefinition()) {
-      S.Diag(Loc, diag::err_undeclared_nsnumber);
       return nullptr;
     }
   }
@@ -457,6 +508,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
   if (RValue.isInvalid()) {
     return ExprError();
   }
+  SourceLocation Loc = SR.getBegin();
   ValueExpr = RValue.get();
   QualType ValueType(ValueExpr->getType());
   if (const PointerType *PT = ValueType->getAs<PointerType>()) {
@@ -464,29 +516,11 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     if (Context.hasSameUnqualifiedType(PointeeType, Context.CharTy)) {
 
       if (!NSStringDecl) {
-        IdentifierInfo *NSStringId =
-          NSAPIObj->getNSClassId(NSAPI::ClassId_NSString);
-        NamedDecl *Decl = LookupSingleName(TUScope, NSStringId,
-                                           SR.getBegin(), LookupOrdinaryName);
-        NSStringDecl = dyn_cast_or_null<ObjCInterfaceDecl>(Decl);
+        NSStringDecl = LookupObjCInterfaceDeclForLiteral(*this, Loc,
+                                                         Sema::LK_String);
         if (!NSStringDecl) {
-          if (getLangOpts().DebuggerObjCLiteral) {
-            // Support boxed expressions in the debugger w/o NSString declaration.
-            DeclContext *TU = Context.getTranslationUnitDecl();
-            NSStringDecl = ObjCInterfaceDecl::Create(Context, TU,
-                                                     SourceLocation(),
-                                                     NSStringId,
-                                                     nullptr, nullptr,
-                                                     SourceLocation());
-          } else {
-            Diag(SR.getBegin(), diag::err_undeclared_nsstring);
-            return ExprError();
-          }
-        } else if (!NSStringDecl->hasDefinition()) {
-          Diag(SR.getBegin(), diag::err_undeclared_nsstring);
           return ExprError();
         }
-        assert(NSStringDecl && "NSStringDecl should not be NULL");
         QualType NSStringObject = Context.getObjCInterfaceType(NSStringDecl);
         NSStringPointer = Context.getObjCObjectPointerType(NSStringObject);
       }
@@ -520,7 +554,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
           BoxingMethod = M;
         }
 
-        if (!validateBoxingMethod(*this, SR.getBegin(), NSStringDecl,
+        if (!validateBoxingMethod(*this, Loc, NSStringDecl,
                                   stringWithUTF8String, BoxingMethod))
            return ExprError();
 
@@ -563,16 +597,16 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     // FIXME:  Do I need to do anything special with BoolTy expressions?
     
     // Look for the appropriate method within NSNumber.
-    BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(), ValueType);
+    BoxingMethod = getNSNumberFactoryMethod(*this, Loc, ValueType);
     BoxedType = NSNumberPointer;
   } else if (const EnumType *ET = ValueType->getAs<EnumType>()) {
     if (!ET->getDecl()->isComplete()) {
-      Diag(SR.getBegin(), diag::err_objc_incomplete_boxed_expression_type)
+      Diag(Loc, diag::err_objc_incomplete_boxed_expression_type)
         << ValueType << ValueExpr->getSourceRange();
       return ExprError();
     }
 
-    BoxingMethod = getNSNumberFactoryMethod(*this, SR.getBegin(),
+    BoxingMethod = getNSNumberFactoryMethod(*this, Loc,
                                             ET->getDecl()->getIntegerType());
     BoxedType = NSNumberPointer;
   } else if (ValueType->isObjCBoxableRecordType()) {
@@ -582,29 +616,12 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     // Look up the NSValue class, if we haven't done so already. It's cached
     // in the Sema instance.
     if (!NSValueDecl) {
-      IdentifierInfo *NSValueId =
-        NSAPIObj->getNSClassId(NSAPI::ClassId_NSValue);
-      NamedDecl *IF = LookupSingleName(TUScope, NSValueId,
-                                       SR.getBegin(), Sema::LookupOrdinaryName);
-      NSValueDecl = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
+      NSValueDecl = LookupObjCInterfaceDeclForLiteral(*this, Loc,
+                                                      Sema::LK_Boxed);
       if (!NSValueDecl) {
-        if (getLangOpts().DebuggerObjCLiteral) {
-          // Create a stub definition of NSValue.
-          DeclContext *TU = Context.getTranslationUnitDecl();
-          NSValueDecl = ObjCInterfaceDecl::Create(Context, TU,
-                                                  SourceLocation(), NSValueId,
-                                                  nullptr, nullptr,
-                                                  SourceLocation());
-        } else {
-          // Otherwise, require a declaration of NSValue.
-          Diag(SR.getBegin(), diag::err_undeclared_nsvalue);
-          return ExprError();
-        }
-      } else if (!NSValueDecl->hasDefinition()) {
-        Diag(SR.getBegin(), diag::err_undeclared_nsvalue);
         return ExprError();
       }
-      
+
       // generate the pointer to NSValue type.
       QualType NSValueObject = Context.getObjCInterfaceType(NSValueDecl);
       NSValuePointer = Context.getObjCObjectPointerType(NSValueObject);
@@ -663,7 +680,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
         BoxingMethod = M;
       }
       
-      if (!validateBoxingMethod(*this, SR.getBegin(), NSValueDecl,
+      if (!validateBoxingMethod(*this, Loc, NSValueDecl,
                                 ValueWithBytesObjCType, BoxingMethod))
         return ExprError();
       
@@ -671,8 +688,7 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
     }
     
     if (!ValueType.isTriviallyCopyableType(Context)) {
-      Diag(SR.getBegin(), 
-           diag::err_objc_non_trivially_copyable_boxed_expression_type)
+      Diag(Loc, diag::err_objc_non_trivially_copyable_boxed_expression_type)
         << ValueType << ValueExpr->getSourceRange();
       return ExprError();
     }
@@ -682,12 +698,12 @@ ExprResult Sema::BuildObjCBoxedExpr(SourceRange SR, Expr *ValueExpr) {
   }
 
   if (!BoxingMethod) {
-    Diag(SR.getBegin(), diag::err_objc_illegal_boxed_expression_type)
+    Diag(Loc, diag::err_objc_illegal_boxed_expression_type)
       << ValueType << ValueExpr->getSourceRange();
     return ExprError();
   }
   
-  DiagnoseUseOfDecl(BoxingMethod, SR.getBegin());
+  DiagnoseUseOfDecl(BoxingMethod, Loc);
 
   ExprResult ConvertedValueExpr;
   if (ValueType->isObjCBoxableRecordType()) {
@@ -746,26 +762,16 @@ ExprResult Sema::BuildObjCSubscriptExpression(SourceLocation RB, Expr *BaseExpr,
 }
 
 ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) {
-  // Look up the NSArray class, if we haven't done so already.
-  if (!NSArrayDecl) {
-    NamedDecl *IF = LookupSingleName(TUScope,
-                                 NSAPIObj->getNSClassId(NSAPI::ClassId_NSArray),
-                                 SR.getBegin(),
-                                 LookupOrdinaryName);
-    NSArrayDecl = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
-    if (!NSArrayDecl && getLangOpts().DebuggerObjCLiteral)
-      NSArrayDecl =  ObjCInterfaceDecl::Create (Context,
-                            Context.getTranslationUnitDecl(),
-                            SourceLocation(),
-                            NSAPIObj->getNSClassId(NSAPI::ClassId_NSArray),
-                            nullptr, nullptr, SourceLocation());
+  SourceLocation Loc = SR.getBegin();
 
+  if (!NSArrayDecl) {
+    NSArrayDecl = LookupObjCInterfaceDeclForLiteral(*this, Loc,
+                                                    Sema::LK_Array);
     if (!NSArrayDecl) {
-      Diag(SR.getBegin(), diag::err_undeclared_nsarray);
       return ExprError();
     }
   }
-  
+
   // Find the arrayWithObjects:count: method, if we haven't done so already.
   QualType IdT = Context.getObjCIdType();
   if (!ArrayWithObjectsMethod) {
@@ -801,7 +807,7 @@ ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) {
       Method->setMethodParams(Context, Params, None);
     }
 
-    if (!validateBoxingMethod(*this, SR.getBegin(), NSArrayDecl, Sel, Method))
+    if (!validateBoxingMethod(*this, Loc, NSArrayDecl, Sel, Method))
       return ExprError();
 
     // Dig out the type that all elements should be converted to.
@@ -859,28 +865,18 @@ ExprResult Sema::BuildObjCArrayLiteral(SourceRange SR, MultiExprArg Elements) {
                                     ArrayWithObjectsMethod, SR));
 }
 
-ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR, 
-                                            ObjCDictionaryElement *Elements,
-                                            unsigned NumElements) {
-  // Look up the NSDictionary class, if we haven't done so already.
-  if (!NSDictionaryDecl) {
-    NamedDecl *IF = LookupSingleName(TUScope,
-                            NSAPIObj->getNSClassId(NSAPI::ClassId_NSDictionary),
-                            SR.getBegin(), LookupOrdinaryName);
-    NSDictionaryDecl = dyn_cast_or_null<ObjCInterfaceDecl>(IF);
-    if (!NSDictionaryDecl && getLangOpts().DebuggerObjCLiteral)
-      NSDictionaryDecl =  ObjCInterfaceDecl::Create (Context,
-                            Context.getTranslationUnitDecl(),
-                            SourceLocation(),
-                            NSAPIObj->getNSClassId(NSAPI::ClassId_NSDictionary),
-                            nullptr, nullptr, SourceLocation());
+ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR,
+                              MutableArrayRef<ObjCDictionaryElement> Elements) {
+  SourceLocation Loc = SR.getBegin();
 
+  if (!NSDictionaryDecl) {
+    NSDictionaryDecl = LookupObjCInterfaceDeclForLiteral(*this, Loc,
+                                                         Sema::LK_Dictionary);
     if (!NSDictionaryDecl) {
-      Diag(SR.getBegin(), diag::err_undeclared_nsdictionary);
-      return ExprError();    
+      return ExprError();
     }
   }
-  
+
   // Find the dictionaryWithObjects:forKeys:count: method, if we haven't done
   // so already.
   QualType IdT = Context.getObjCIdType();
@@ -1007,31 +1003,31 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR,
   // Check that each of the keys and values provided is valid in a collection 
   // literal, performing conversions as necessary.
   bool HasPackExpansions = false;
-  for (unsigned I = 0, N = NumElements; I != N; ++I) {
+  for (ObjCDictionaryElement &Element : Elements) {
     // Check the key.
-    ExprResult Key = CheckObjCCollectionLiteralElement(*this, Elements[I].Key, 
+    ExprResult Key = CheckObjCCollectionLiteralElement(*this, Element.Key,
                                                        KeyT);
     if (Key.isInvalid())
       return ExprError();
     
     // Check the value.
     ExprResult Value
-      = CheckObjCCollectionLiteralElement(*this, Elements[I].Value, ValueT);
+      = CheckObjCCollectionLiteralElement(*this, Element.Value, ValueT);
     if (Value.isInvalid())
       return ExprError();
     
-    Elements[I].Key = Key.get();
-    Elements[I].Value = Value.get();
+    Element.Key = Key.get();
+    Element.Value = Value.get();
     
-    if (Elements[I].EllipsisLoc.isInvalid())
+    if (Element.EllipsisLoc.isInvalid())
       continue;
     
-    if (!Elements[I].Key->containsUnexpandedParameterPack() &&
-        !Elements[I].Value->containsUnexpandedParameterPack()) {
-      Diag(Elements[I].EllipsisLoc, 
+    if (!Element.Key->containsUnexpandedParameterPack() &&
+        !Element.Value->containsUnexpandedParameterPack()) {
+      Diag(Element.EllipsisLoc,
            diag::err_pack_expansion_without_parameter_packs)
-        << SourceRange(Elements[I].Key->getLocStart(),
-                       Elements[I].Value->getLocEnd());
+        << SourceRange(Element.Key->getLocStart(),
+                       Element.Value->getLocEnd());
       return ExprError();
     }
     
@@ -1043,7 +1039,7 @@ ExprResult Sema::BuildObjCDictionaryLiteral(SourceRange SR,
     = Context.getObjCObjectPointerType(
                                 Context.getObjCInterfaceType(NSDictionaryDecl));
   return MaybeBindToTemporary(ObjCDictionaryLiteral::Create(
-      Context, makeArrayRef(Elements, NumElements), HasPackExpansions, Ty,
+      Context, Elements, HasPackExpansions, Ty,
       DictionaryWithObjectsMethod, SR));
 }
 
@@ -1092,7 +1088,7 @@ ExprResult Sema::ParseObjCEncodeExpression(SourceLocation AtLoc,
   QualType EncodedType = GetTypeFromParser(ty, &TInfo);
   if (!TInfo)
     TInfo = Context.getTrivialTypeSourceInfo(EncodedType,
-                                             PP.getLocForEndOfToken(LParenLoc));
+                                             getLocForEndOfToken(LParenLoc));
 
   return BuildObjCEncodeExpression(AtLoc, TInfo, RParenLoc);
 }
@@ -1657,7 +1653,8 @@ bool Sema::CheckMessageArgumentTypes(QualType ReceiverType,
       // Objective-C pointer type, we may need to extend the lifetime
       // of the block object.
       if (typeArgs && Args[i]->isRValue() && paramType->isBlockPointerType() &&
-          origParamType->isBlockCompatibleObjCPointerType(Context)) {
+          Args[i]->getType()->isBlockPointerType() &&
+          origParamType->isObjCObjectPointerType()) {
         ExprResult arg = Args[i];
         maybeExtendBlockObject(arg);
         Args[i] = arg.get();
@@ -1778,8 +1775,7 @@ HandleExprPropertyRefExpr(const ObjCObjectPointerType *OPT,
                           diag::err_property_not_found_forward_class,
                           MemberName, BaseRange))
     return ExprError();
-  
-  // Search for a declared property first.
+ 
   if (ObjCPropertyDecl *PD = IFace->FindPropertyDeclaration(Member)) {
     // Check whether we can reference this property.
     if (DiagnoseUseOfDecl(PD, MemberLoc))
@@ -2726,6 +2722,8 @@ ExprResult Sema::BuildInstanceMessage(Expr *Receiver,
 
         // Try to complete the type. Under ARC, this is a hard error from which
         // we don't try to recover.
+        // FIXME: In the non-ARC case, this will still be a hard error if the
+        // definition is found in a module that's not visible.
         const ObjCInterfaceDecl *forwardClass = nullptr;
         if (RequireCompleteType(Loc, OCIType->getPointeeType(),
               getLangOpts().ObjCAutoRefCount
@@ -3397,7 +3395,7 @@ static void addFixitForObjCARCConversion(Sema &S,
       DiagB.AddFixItHint(FixItHint::CreateInsertion(range.getBegin(),
                                                     BridgeCall));
       DiagB.AddFixItHint(FixItHint::CreateInsertion(
-                                       S.PP.getLocForEndOfToken(range.getEnd()),
+                                       S.getLocForEndOfToken(range.getEnd()),
                                        ")"));
     }
     return;
@@ -3430,7 +3428,7 @@ static void addFixitForObjCARCConversion(Sema &S,
       DiagB.AddFixItHint(FixItHint::CreateInsertion(range.getBegin(),
                                                     castCode));
       DiagB.AddFixItHint(FixItHint::CreateInsertion(
-                                       S.PP.getLocForEndOfToken(range.getEnd()),
+                                       S.getLocForEndOfToken(range.getEnd()),
                                        ")"));
     }
   }
@@ -3471,7 +3469,7 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange,
     (castRange.isValid() ? castRange.getBegin() : castExpr->getExprLoc());
   
   if (S.makeUnavailableInSystemHeader(loc,
-                "converts between Objective-C and C pointers in -fobjc-arc"))
+                                 UnavailableAttr::IR_ARCForbiddenConversion))
     return;
 
   QualType castExprType = castExpr->getType();
@@ -3498,7 +3496,7 @@ diagnoseObjCARCConversion(Sema &S, SourceRange castRange,
   }
   
   // Check whether this could be fixed with a bridge cast.
-  SourceLocation afterLParen = S.PP.getLocForEndOfToken(castRange.getBegin());
+  SourceLocation afterLParen = S.getLocForEndOfToken(castRange.getBegin());
   SourceLocation noteLoc = afterLParen.isValid() ? afterLParen : loc;
 
   // Bridge from an ARC type to a CF type.
@@ -3901,7 +3899,7 @@ Sema::CheckObjCBridgeRelatedConversions(SourceLocation Loc,
       ExpressionString += RelatedClass->getNameAsString();
       ExpressionString += " ";
       ExpressionString += ClassMethod->getSelector().getAsString();
-      SourceLocation SrcExprEndLoc = PP.getLocForEndOfToken(SrcExpr->getLocEnd());
+      SourceLocation SrcExprEndLoc = getLocForEndOfToken(SrcExpr->getLocEnd());
       // Provide a fixit: [RelatedClass ClassMethod SrcExpr]
       Diag(Loc, diag::err_objc_bridged_related_known_method)
         << SrcType << DestType << ClassMethod->getSelector() << false
@@ -3926,7 +3924,7 @@ Sema::CheckObjCBridgeRelatedConversions(SourceLocation Loc,
     // Implicit conversion from ObjC type to CF object is needed.
     if (InstanceMethod) {
       std::string ExpressionString;
-      SourceLocation SrcExprEndLoc = PP.getLocForEndOfToken(SrcExpr->getLocEnd());
+      SourceLocation SrcExprEndLoc = getLocForEndOfToken(SrcExpr->getLocEnd());
       if (InstanceMethod->isPropertyAccessor())
         if (const ObjCPropertyDecl *PDecl = InstanceMethod->findPropertyDecl()) {
           // fixit: ObjectExpr.propertyname when it is  aproperty accessor.
diff --git a/lib/Sema/SemaFixItUtils.cpp b/lib/Sema/SemaFixItUtils.cpp
index 2e327ecf231f..714fbedf09bc 100644
--- a/lib/Sema/SemaFixItUtils.cpp
+++ b/lib/Sema/SemaFixItUtils.cpp
@@ -42,7 +42,7 @@ bool ConversionFixItGenerator::compareTypesSimple(CanQualType From,
   const CanQualType FromUnq = From.getUnqualifiedType();
   const CanQualType ToUnq = To.getUnqualifiedType();
 
-  if ((FromUnq == ToUnq || (S.IsDerivedFrom(FromUnq, ToUnq)) ) &&
+  if ((FromUnq == ToUnq || (S.IsDerivedFrom(Loc, FromUnq, ToUnq)) ) &&
       To.isAtLeastAsQualifiedAs(From))
     return true;
   return false;
@@ -58,8 +58,8 @@ bool ConversionFixItGenerator::tryToFixConversion(const Expr *FullExpr,
   const CanQualType FromQTy = S.Context.getCanonicalType(FromTy);
   const CanQualType ToQTy = S.Context.getCanonicalType(ToTy);
   const SourceLocation Begin = FullExpr->getSourceRange().getBegin();
-  const SourceLocation End = S.PP.getLocForEndOfToken(FullExpr->getSourceRange()
-                                                      .getEnd());
+  const SourceLocation End = S.getLocForEndOfToken(FullExpr->getSourceRange()
+                                                   .getEnd());
 
   // Strip the implicit casts - those are implied by the compiler, not the
   // original source code.
diff --git a/lib/Sema/SemaInit.cpp b/lib/Sema/SemaInit.cpp
index adec512693f3..c3a89463dc69 100644
--- a/lib/Sema/SemaInit.cpp
+++ b/lib/Sema/SemaInit.cpp
@@ -443,8 +443,11 @@ ExprResult InitListChecker::PerformEmptyInit(Sema &SemaRef,
         if (!VerifyOnly) {
           SemaRef.Diag(CtorDecl->getLocation(),
                        diag::warn_invalid_initializer_from_system_header);
-          SemaRef.Diag(Entity.getDecl()->getLocation(),
-                       diag::note_used_in_initialization_here);
+          if (Entity.getKind() == InitializedEntity::EK_Member)
+            SemaRef.Diag(Entity.getDecl()->getLocation(),
+                         diag::note_used_in_initialization_here);
+          else if (Entity.getKind() == InitializedEntity::EK_ArrayElement)
+            SemaRef.Diag(Loc, diag::note_used_in_initialization_here);
         }
       }
     }
@@ -802,7 +805,8 @@ void InitListChecker::CheckImplicitInitList(const InitializedEntity &Entity,
     unsigned EndIndex = (Index == StartIndex? StartIndex : Index - 1);
     // Update the structured sub-object initializer so that it's ending
     // range corresponds with the end of the last initializer it used.
-    if (EndIndex < ParentIList->getNumInits()) {
+    if (EndIndex < ParentIList->getNumInits() &&
+        ParentIList->getInit(EndIndex)) {
       SourceLocation EndLoc
         = ParentIList->getInit(EndIndex)->getSourceRange().getEnd();
       StructuredSubobjectInitList->setRBraceLoc(EndLoc);
@@ -3008,6 +3012,7 @@ bool InitializationSequence::isAmbiguous() const {
   case FK_VariableLengthArrayHasInitializer:
   case FK_PlaceholderType:
   case FK_ExplicitConstructor:
+  case FK_AddressOfUnaddressableFunction:
     return false;
 
   case FK_ReferenceInitOverloadFailed:
@@ -3323,7 +3328,7 @@ static bool TryInitializerListConstruction(Sema &S,
   if (!S.isStdInitializerList(DestType, &E))
     return false;
 
-  if (S.RequireCompleteType(List->getExprLoc(), E, 0)) {
+  if (!S.isCompleteType(List->getExprLoc(), E)) {
     Sequence.setIncompleteTypeFailure(E);
     return true;
   }
@@ -3433,7 +3438,7 @@ static void TryConstructorInitialization(Sema &S,
          "IsListInit must come with a single initializer list argument.");
 
   // The type we're constructing needs to be complete.
-  if (S.RequireCompleteType(Kind.getLocation(), DestType, 0)) {
+  if (!S.isCompleteType(Kind.getLocation(), DestType)) {
     Sequence.setIncompleteTypeFailure(DestType);
     return;
   }
@@ -3674,7 +3679,7 @@ static void TryListInitialization(Sema &S,
   }
 
   if (DestType->isRecordType() &&
-      S.RequireCompleteType(InitList->getLocStart(), DestType, 0)) {
+      !S.isCompleteType(InitList->getLocStart(), DestType)) {
     Sequence.setIncompleteTypeFailure(DestType);
     return;
   }
@@ -3694,7 +3699,7 @@ static void TryListInitialization(Sema &S,
     if (DestType->isRecordType()) {
       QualType InitType = InitList->getInit(0)->getType();
       if (S.Context.hasSameUnqualifiedType(InitType, DestType) ||
-          S.IsDerivedFrom(InitType, DestType)) {
+          S.IsDerivedFrom(InitList->getLocStart(), InitType, DestType)) {
         Expr *InitAsExpr = InitList->getInit(0);
         TryConstructorInitialization(S, Entity, Kind, InitAsExpr, DestType,
                                      Sequence, /*InitListSyntax*/ false,
@@ -3728,7 +3733,9 @@ static void TryListInitialization(Sema &S,
 
   // C++11 [dcl.init.list]p3:
   //   - If T is an aggregate, aggregate initialization is performed.
-  if (DestType->isRecordType() && !DestType->isAggregateType()) {
+  if ((DestType->isRecordType() && !DestType->isAggregateType()) ||
+      (S.getLangOpts().CPlusPlus11 &&
+       S.isStdInitializerList(DestType, nullptr))) {
     if (S.getLangOpts().CPlusPlus11) {
       //   - Otherwise, if the initializer list has no elements and T is a
       //     class type with a default constructor, the object is
@@ -3834,7 +3841,7 @@ static OverloadingResult TryRefInitWithConversionFunction(Sema &S,
 
   const RecordType *T1RecordType = nullptr;
   if (AllowRValues && (T1RecordType = T1->getAs<RecordType>()) &&
-      !S.RequireCompleteType(Kind.getLocation(), T1, 0)) {
+      S.isCompleteType(Kind.getLocation(), T1)) {
     // The type we're converting to is a class type. Enumerate its constructors
     // to see if there is a suitable conversion.
     CXXRecordDecl *T1RecordDecl = cast<CXXRecordDecl>(T1RecordType->getDecl());
@@ -3870,7 +3877,7 @@ static OverloadingResult TryRefInitWithConversionFunction(Sema &S,
 
   const RecordType *T2RecordType = nullptr;
   if ((T2RecordType = T2->getAs<RecordType>()) &&
-      !S.RequireCompleteType(Kind.getLocation(), T2, 0)) {
+      S.isCompleteType(Kind.getLocation(), T2)) {
     // The type we're converting from is a class type, enumerate its conversion
     // functions.
     CXXRecordDecl *T2RecordDecl = cast<CXXRecordDecl>(T2RecordType->getDecl());
@@ -4455,7 +4462,7 @@ static void TryUserDefinedConversion(Sema &S,
       = cast<CXXRecordDecl>(DestRecordType->getDecl());
 
     // Try to complete the type we're converting to.
-    if (!S.RequireCompleteType(Kind.getLocation(), DestType, 0)) {
+    if (S.isCompleteType(Kind.getLocation(), DestType)) {
       DeclContext::lookup_result R = S.LookupConstructors(DestRecordDecl);
       // The container holding the constructors can under certain conditions
       // be changed while iterating. To be safe we copy the lookup results
@@ -4501,7 +4508,7 @@ static void TryUserDefinedConversion(Sema &S,
 
     // We can only enumerate the conversion functions for a complete type; if
     // the type isn't complete, simply skip this step.
-    if (!S.RequireCompleteType(DeclLoc, SourceType, 0)) {
+    if (S.isCompleteType(DeclLoc, SourceType)) {
       CXXRecordDecl *SourceRecordDecl
         = cast<CXXRecordDecl>(SourceRecordType->getDecl());
 
@@ -4798,6 +4805,17 @@ InitializationSequence::InitializationSequence(Sema &S,
   InitializeFrom(S, Entity, Kind, Args, TopLevelOfInitList);
 }
 
+/// Tries to get a FunctionDecl out of `E`. If it succeeds and we can take the
+/// address of that function, this returns true. Otherwise, it returns false.
+static bool isExprAnUnaddressableFunction(Sema &S, const Expr *E) {
+  auto *DRE = dyn_cast<DeclRefExpr>(E);
+  if (!DRE || !isa<FunctionDecl>(DRE->getDecl()))
+    return false;
+
+  return !S.checkAddressOfFunctionIsAvailable(
+      cast<FunctionDecl>(DRE->getDecl()));
+}
+
 void InitializationSequence::InitializeFrom(Sema &S,
                                             const InitializedEntity &Entity,
                                             const InitializationKind &Kind,
@@ -4979,7 +4997,7 @@ void InitializationSequence::InitializeFrom(Sema &S,
   }
 
   assert(S.getLangOpts().CPlusPlus);
-      
+
   //     - If the destination type is a (possibly cv-qualified) class type:
   if (DestType->isRecordType()) {
     //     - If the initialization is direct-initialization, or if it is
@@ -4989,7 +5007,7 @@ void InitializationSequence::InitializeFrom(Sema &S,
     if (Kind.getKind() == InitializationKind::IK_Direct ||
         (Kind.getKind() == InitializationKind::IK_Copy &&
          (Context.hasSameUnqualifiedType(SourceType, DestType) ||
-          S.IsDerivedFrom(SourceType, DestType))))
+          S.IsDerivedFrom(Initializer->getLocStart(), SourceType, DestType))))
       TryConstructorInitialization(S, Entity, Kind, Args,
                                    DestType, *this);
     //     - Otherwise (i.e., for the remaining copy-initialization cases),
@@ -5018,7 +5036,8 @@ void InitializationSequence::InitializeFrom(Sema &S,
     bool NeedAtomicConversion = false;
     if (const AtomicType *Atomic = DestType->getAs<AtomicType>()) {
       if (Context.hasSameUnqualifiedType(SourceType, Atomic->getValueType()) ||
-          S.IsDerivedFrom(SourceType, Atomic->getValueType())) {
+          S.IsDerivedFrom(Initializer->getLocStart(), SourceType,
+                          Atomic->getValueType())) {
         DestType = Atomic->getValueType();
         NeedAtomicConversion = true;
       }
@@ -5076,6 +5095,9 @@ void InitializationSequence::InitializeFrom(Sema &S,
                !S.ResolveAddressOfOverloadedFunction(Initializer, DestType,
                                                      false, dap))
       SetFailed(InitializationSequence::FK_AddressOfOverloadFailed);
+    else if (Initializer->getType()->isFunctionType() &&
+             isExprAnUnaddressableFunction(S, Initializer))
+      SetFailed(InitializationSequence::FK_AddressOfUnaddressableFunction);
     else
       SetFailed(InitializationSequence::FK_ConversionFailed);
   } else {
@@ -6357,7 +6379,7 @@ InitializationSequence::Perform(Sema &S,
         CastKind = CK_ConstructorConversion;
         QualType Class = S.Context.getTypeDeclType(Constructor->getParent());
         if (S.Context.hasSameUnqualifiedType(SourceType, Class) ||
-            S.IsDerivedFrom(SourceType, Class))
+            S.IsDerivedFrom(Loc, SourceType, Class))
           IsCopy = true;
 
         CreatedObject = true;
@@ -6596,6 +6618,8 @@ InitializationSequence::Perform(Sema &S,
 
     case SK_CAssignment: {
       QualType SourceType = CurInit.get()->getType();
+      // Save off the initial CurInit in case we need to emit a diagnostic
+      ExprResult InitialCurInit = CurInit;
       ExprResult Result = CurInit;
       Sema::AssignConvertType ConvTy =
         S.CheckSingleAssignmentConstraints(Step->Type, Result, true,
@@ -6618,7 +6642,7 @@ InitializationSequence::Perform(Sema &S,
       bool Complained;
       if (S.DiagnoseAssignmentResult(ConvTy, Kind.getLocation(),
                                      Step->Type, SourceType,
-                                     CurInit.get(),
+                                     InitialCurInit.get(),
                                      getAssignmentAction(Entity, true),
                                      &Complained)) {
         PrintInitLocationNote(S, Entity);
@@ -6921,6 +6945,13 @@ bool InitializationSequence::Diagnose(Sema &S,
     break;
   }
 
+  case FK_AddressOfUnaddressableFunction: {
+    auto *FD = cast<FunctionDecl>(cast<DeclRefExpr>(Args[0])->getDecl());
+    S.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                        Args[0]->getLocStart());
+    break;
+  }
+
   case FK_ReferenceInitOverloadFailed:
   case FK_UserConversionOverloadFailed:
     switch (FailedOverloadResult) {
@@ -6943,6 +6974,7 @@ bool InitializationSequence::Diagnose(Sema &S,
                           diag::err_typecheck_nonviable_condition_incomplete,
                                Args[0]->getType(), Args[0]->getSourceRange()))
         S.Diag(Kind.getLocation(), diag::err_typecheck_nonviable_condition)
+          << (Entity.getKind() == InitializedEntity::EK_Result)
           << Args[0]->getType() << Args[0]->getSourceRange()
           << DestType.getNonReferenceType();
 
@@ -7043,10 +7075,12 @@ bool InitializationSequence::Diagnose(Sema &S,
     SourceRange R;
 
     auto *InitList = dyn_cast<InitListExpr>(Args[0]);
-    if (InitList && InitList->getNumInits() == 1)
+    if (InitList && InitList->getNumInits() >= 1) {
       R = SourceRange(InitList->getInit(0)->getLocEnd(), InitList->getLocEnd());
-    else
+    } else {
+      assert(Args.size() > 1 && "Expected multiple initializers!");
       R = SourceRange(Args.front()->getLocEnd(), Args.back()->getLocEnd());
+    }
 
     R.setBegin(S.getLocForEndOfToken(R.getBegin()));
     if (Kind.isCStyleOrFunctionalCast())
@@ -7240,6 +7274,10 @@ void InitializationSequence::dump(raw_ostream &OS) const {
       OS << "array requires initializer list";
       break;
 
+    case FK_AddressOfUnaddressableFunction:
+      OS << "address of unaddressable function was taken";
+      break;
+
     case FK_ArrayNeedsInitListOrStringLiteral:
       OS << "array requires initializer list or string literal";
       break;
diff --git a/lib/Sema/SemaLambda.cpp b/lib/Sema/SemaLambda.cpp
index 8220641166b2..884add26e43a 100644
--- a/lib/Sema/SemaLambda.cpp
+++ b/lib/Sema/SemaLambda.cpp
@@ -226,15 +226,16 @@ getGenericLambdaTemplateParameterList(LambdaScopeInfo *LSI, Sema &SemaRef) {
   if (LSI->GLTemplateParameterList)
     return LSI->GLTemplateParameterList;
 
-  if (LSI->AutoTemplateParams.size()) {
+  if (!LSI->AutoTemplateParams.empty()) {
     SourceRange IntroRange = LSI->IntroducerRange;
     SourceLocation LAngleLoc = IntroRange.getBegin();
     SourceLocation RAngleLoc = IntroRange.getEnd();
     LSI->GLTemplateParameterList = TemplateParameterList::Create(
         SemaRef.Context,
         /*Template kw loc*/ SourceLocation(), LAngleLoc,
-        (NamedDecl **)LSI->AutoTemplateParams.data(),
-        LSI->AutoTemplateParams.size(), RAngleLoc);
+        llvm::makeArrayRef((NamedDecl *const *)LSI->AutoTemplateParams.data(),
+                           LSI->AutoTemplateParams.size()),
+        RAngleLoc);
   }
   return LSI->GLTemplateParameterList;
 }
@@ -685,7 +686,8 @@ void Sema::deduceClosureReturnType(CapturingScopeInfo &CSI) {
 
     QualType ReturnType =
         (RetE ? RetE->getType() : Context.VoidTy).getUnqualifiedType();
-    if (Context.hasSameType(ReturnType, CSI.ReturnType))
+    if (Context.getCanonicalFunctionResultType(ReturnType) ==
+          Context.getCanonicalFunctionResultType(CSI.ReturnType))
       continue;
 
     // FIXME: This is a poor diagnostic for ReturnStmts without expressions.
@@ -698,18 +700,11 @@ void Sema::deduceClosureReturnType(CapturingScopeInfo &CSI) {
   }
 }
 
-QualType Sema::performLambdaInitCaptureInitialization(SourceLocation Loc,
-                                                      bool ByRef,
-                                                      IdentifierInfo *Id,
-                                                      Expr *&Init) {
-
-  // We do not need to distinguish between direct-list-initialization
-  // and copy-list-initialization here, because we will always deduce
-  // std::initializer_list<T>, and direct- and copy-list-initialization
-  // always behave the same for such a type.
-  // FIXME: We should model whether an '=' was present.
-  const bool IsDirectInit = isa<ParenListExpr>(Init) || isa<InitListExpr>(Init);
-
+QualType Sema::buildLambdaInitCaptureInitialization(SourceLocation Loc,
+                                                    bool ByRef,
+                                                    IdentifierInfo *Id,
+                                                    bool IsDirectInit,
+                                                    Expr *&Init) {
   // Create an 'auto' or 'auto&' TypeSourceInfo that we can use to
   // deduce against.
   QualType DeductType = Context.getAutoDeductType();
@@ -722,50 +717,16 @@ QualType Sema::performLambdaInitCaptureInitialization(SourceLocation Loc,
   }
   TypeSourceInfo *TSI = TLB.getTypeSourceInfo(Context, DeductType);
 
-  // Are we a non-list direct initialization?
-  ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);
-
-  Expr *DeduceInit = Init;
-  // Initializer could be a C++ direct-initializer. Deduction only works if it
-  // contains exactly one expression.
-  if (CXXDirectInit) {
-    if (CXXDirectInit->getNumExprs() == 0) {
-      Diag(CXXDirectInit->getLocStart(), diag::err_init_capture_no_expression)
-          << DeclarationName(Id) << TSI->getType() << Loc;
-      return QualType();
-    } else if (CXXDirectInit->getNumExprs() > 1) {
-      Diag(CXXDirectInit->getExpr(1)->getLocStart(),
-           diag::err_init_capture_multiple_expressions)
-          << DeclarationName(Id) << TSI->getType() << Loc;
-      return QualType();
-    } else {
-      DeduceInit = CXXDirectInit->getExpr(0);
-      if (isa<InitListExpr>(DeduceInit))
-        Diag(CXXDirectInit->getLocStart(), diag::err_init_capture_paren_braces)
-          << DeclarationName(Id) << Loc;
-    }
-  }
-
-  // Now deduce against the initialization expression and store the deduced
-  // type below.
-  QualType DeducedType;
-  if (DeduceAutoType(TSI, DeduceInit, DeducedType) == DAR_Failed) {
-    if (isa<InitListExpr>(Init))
-      Diag(Loc, diag::err_init_capture_deduction_failure_from_init_list)
-          << DeclarationName(Id)
-          << (DeduceInit->getType().isNull() ? TSI->getType()
-                                             : DeduceInit->getType())
-          << DeduceInit->getSourceRange();
-    else
-      Diag(Loc, diag::err_init_capture_deduction_failure)
-          << DeclarationName(Id) << TSI->getType()
-          << (DeduceInit->getType().isNull() ? TSI->getType()
-                                             : DeduceInit->getType())
-          << DeduceInit->getSourceRange();
-  }
+  // Deduce the type of the init capture.
+  QualType DeducedType = deduceVarTypeFromInitializer(
+      /*VarDecl*/nullptr, DeclarationName(Id), DeductType, TSI,
+      SourceRange(Loc, Loc), IsDirectInit, Init);
   if (DeducedType.isNull())
     return QualType();
 
+  // Are we a non-list direct initialization?
+  ParenListExpr *CXXDirectInit = dyn_cast<ParenListExpr>(Init);
+
   // Perform initialization analysis and ensure any implicit conversions
   // (such as lvalue-to-rvalue) are enforced.
   InitializedEntity Entity =
@@ -802,9 +763,10 @@ QualType Sema::performLambdaInitCaptureInitialization(SourceLocation Loc,
   return DeducedType;
 }
 
-VarDecl *Sema::createLambdaInitCaptureVarDecl(SourceLocation Loc, 
-    QualType InitCaptureType, IdentifierInfo *Id, Expr *Init) {
-
+VarDecl *Sema::createLambdaInitCaptureVarDecl(SourceLocation Loc,
+                                              QualType InitCaptureType,
+                                              IdentifierInfo *Id,
+                                              unsigned InitStyle, Expr *Init) {
   TypeSourceInfo *TSI = Context.getTrivialTypeSourceInfo(InitCaptureType,
       Loc);
   // Create a dummy variable representing the init-capture. This is not actually
@@ -815,6 +777,8 @@ VarDecl *Sema::createLambdaInitCaptureVarDecl(SourceLocation Loc,
                                    Loc, Id, InitCaptureType, TSI, SC_Auto);
   NewVD->setInitCapture(true);
   NewVD->setReferenced(true);
+  // FIXME: Pass in a VarDecl::InitializationStyle.
+  NewVD->setInitStyle(static_cast<VarDecl::InitializationStyle>(InitStyle));
   NewVD->markUsed(Context);
   NewVD->setInit(Init);
   return NewVD;
@@ -1013,8 +977,23 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
       // in this case.
       if (C->InitCaptureType.get().isNull()) 
         continue;
-      Var = createLambdaInitCaptureVarDecl(C->Loc, C->InitCaptureType.get(), 
-            C->Id, C->Init.get());
+
+      unsigned InitStyle;
+      switch (C->InitKind) {
+      case LambdaCaptureInitKind::NoInit:
+        llvm_unreachable("not an init-capture?");
+      case LambdaCaptureInitKind::CopyInit:
+        InitStyle = VarDecl::CInit;
+        break;
+      case LambdaCaptureInitKind::DirectInit:
+        InitStyle = VarDecl::CallInit;
+        break;
+      case LambdaCaptureInitKind::ListInit:
+        InitStyle = VarDecl::ListInit;
+        break;
+      }
+      Var = createLambdaInitCaptureVarDecl(C->Loc, C->InitCaptureType.get(),
+                                           C->Id, InitStyle, C->Init.get());
       // C++1y [expr.prim.lambda]p11:
       //   An init-capture behaves as if it declares and explicitly
       //   captures a variable [...] whose declarative region is the
@@ -1022,6 +1001,9 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
       if (Var)
         PushOnScopeChains(Var, CurScope, false);
     } else {
+      assert(C->InitKind == LambdaCaptureInitKind::NoInit &&
+             "init capture has valid but null init?");
+
       // C++11 [expr.prim.lambda]p8:
       //   If a lambda-capture includes a capture-default that is &, the 
       //   identifiers in the lambda-capture shall not be preceded by &.
@@ -1160,6 +1142,12 @@ static void addFunctionPointerConversion(Sema &S,
                                          SourceRange IntroducerRange,
                                          CXXRecordDecl *Class,
                                          CXXMethodDecl *CallOperator) {
+  // This conversion is explicitly disabled if the lambda's function has
+  // pass_object_size attributes on any of its parameters.
+  if (std::any_of(CallOperator->param_begin(), CallOperator->param_end(),
+                  std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>)))
+    return;
+
   // Add the conversion to function pointer.
   const FunctionProtoType *CallOpProto = 
       CallOperator->getType()->getAs<FunctionProtoType>();
@@ -1700,8 +1688,7 @@ ExprResult Sema::BuildBlockForLambdaConversion(SourceLocation CurrentLocation,
                                     SC_None);
   BlockDecl::Capture Capture(/*Variable=*/CapVar, /*ByRef=*/false,
                              /*Nested=*/false, /*Copy=*/Init.get());
-  Block->setCaptures(Context, &Capture, &Capture + 1, 
-                     /*CapturesCXXThis=*/false);
+  Block->setCaptures(Context, Capture, /*CapturesCXXThis=*/false);
 
   // Add a fake function body to the block. IR generation is responsible
   // for filling in the actual body, which cannot be expressed as an AST.
diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp
index 2e7f89191580..481ae6cd55b1 100644
--- a/lib/Sema/SemaLookup.cpp
+++ b/lib/Sema/SemaLookup.cpp
@@ -11,6 +11,7 @@
 //  Objective-C++.
 //
 //===----------------------------------------------------------------------===//
+
 #include "clang/Sema/Lookup.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTMutationListener.h"
@@ -153,7 +154,7 @@ namespace {
     // by its using directives, transitively) as if they appeared in
     // the given effective context.
     void addUsingDirectives(DeclContext *DC, DeclContext *EffectiveDC) {
-      SmallVector<DeclContext*,4> queue;
+      SmallVector<DeclContext*, 4> queue;
       while (true) {
         for (auto UD : DC->using_directives()) {
           DeclContext *NS = UD->getNominatedNamespace();
@@ -204,7 +205,7 @@ namespace {
                                                UnqualUsingEntry::Comparator()));
     }
   };
-}
+} // end anonymous namespace
 
 // Retrieve the set of identifier namespaces that correspond to a
 // specific kind of name lookup.
@@ -354,13 +355,114 @@ static DeclContext *getContextForScopeMatching(Decl *D) {
   return D->getDeclContext()->getRedeclContext();
 }
 
+/// \brief Determine whether \p D is a better lookup result than \p Existing,
+/// given that they declare the same entity.
+static bool isPreferredLookupResult(Sema &S, Sema::LookupNameKind Kind,
+                                    NamedDecl *D, NamedDecl *Existing) {
+  // When looking up redeclarations of a using declaration, prefer a using
+  // shadow declaration over any other declaration of the same entity.
+  if (Kind == Sema::LookupUsingDeclName && isa<UsingShadowDecl>(D) &&
+      !isa<UsingShadowDecl>(Existing))
+    return true;
+
+  auto *DUnderlying = D->getUnderlyingDecl();
+  auto *EUnderlying = Existing->getUnderlyingDecl();
+
+  // If they have different underlying declarations, prefer a typedef over the
+  // original type (this happens when two type declarations denote the same
+  // type), per a generous reading of C++ [dcl.typedef]p3 and p4. The typedef
+  // might carry additional semantic information, such as an alignment override.
+  // However, per C++ [dcl.typedef]p5, when looking up a tag name, prefer a tag
+  // declaration over a typedef.
+  if (DUnderlying->getCanonicalDecl() != EUnderlying->getCanonicalDecl()) {
+    assert(isa<TypeDecl>(DUnderlying) && isa<TypeDecl>(EUnderlying));
+    bool HaveTag = isa<TagDecl>(EUnderlying);
+    bool WantTag = Kind == Sema::LookupTagName;
+    return HaveTag != WantTag;
+  }
+
+  // Pick the function with more default arguments.
+  // FIXME: In the presence of ambiguous default arguments, we should keep both,
+  //        so we can diagnose the ambiguity if the default argument is needed.
+  //        See C++ [over.match.best]p3.
+  if (auto *DFD = dyn_cast<FunctionDecl>(DUnderlying)) {
+    auto *EFD = cast<FunctionDecl>(EUnderlying);
+    unsigned DMin = DFD->getMinRequiredArguments();
+    unsigned EMin = EFD->getMinRequiredArguments();
+    // If D has more default arguments, it is preferred.
+    if (DMin != EMin)
+      return DMin < EMin;
+    // FIXME: When we track visibility for default function arguments, check
+    // that we pick the declaration with more visible default arguments.
+  }
+
+  // Pick the template with more default template arguments.
+  if (auto *DTD = dyn_cast<TemplateDecl>(DUnderlying)) {
+    auto *ETD = cast<TemplateDecl>(EUnderlying);
+    unsigned DMin = DTD->getTemplateParameters()->getMinRequiredArguments();
+    unsigned EMin = ETD->getTemplateParameters()->getMinRequiredArguments();
+    // If D has more default arguments, it is preferred. Note that default
+    // arguments (and their visibility) is monotonically increasing across the
+    // redeclaration chain, so this is a quick proxy for "is more recent".
+    if (DMin != EMin)
+      return DMin < EMin;
+    // If D has more *visible* default arguments, it is preferred. Note, an
+    // earlier default argument being visible does not imply that a later
+    // default argument is visible, so we can't just check the first one.
+    for (unsigned I = DMin, N = DTD->getTemplateParameters()->size();
+        I != N; ++I) {
+      if (!S.hasVisibleDefaultArgument(
+              ETD->getTemplateParameters()->getParam(I)) &&
+          S.hasVisibleDefaultArgument(
+              DTD->getTemplateParameters()->getParam(I)))
+        return true;
+    }
+  }
+
+  // For most kinds of declaration, it doesn't really matter which one we pick.
+  if (!isa<FunctionDecl>(DUnderlying) && !isa<VarDecl>(DUnderlying)) {
+    // If the existing declaration is hidden, prefer the new one. Otherwise,
+    // keep what we've got.
+    return !S.isVisible(Existing);
+  }
+
+  // Pick the newer declaration; it might have a more precise type.
+  for (Decl *Prev = DUnderlying->getPreviousDecl(); Prev;
+       Prev = Prev->getPreviousDecl())
+    if (Prev == EUnderlying)
+      return true;
+  return false;
+
+  // If the existing declaration is hidden, prefer the new one. Otherwise,
+  // keep what we've got.
+  return !S.isVisible(Existing);
+}
+
+/// Determine whether \p D can hide a tag declaration.
+static bool canHideTag(NamedDecl *D) {
+  // C++ [basic.scope.declarative]p4:
+  //   Given a set of declarations in a single declarative region [...]
+  //   exactly one declaration shall declare a class name or enumeration name
+  //   that is not a typedef name and the other declarations shall all refer to
+  //   the same variable or enumerator, or all refer to functions and function
+  //   templates; in this case the class name or enumeration name is hidden.
+  // C++ [basic.scope.hiding]p2:
+  //   A class name or enumeration name can be hidden by the name of a
+  //   variable, data member, function, or enumerator declared in the same
+  //   scope.
+  D = D->getUnderlyingDecl();
+  return isa<VarDecl>(D) || isa<EnumConstantDecl>(D) || isa<FunctionDecl>(D) ||
+         isa<FunctionTemplateDecl>(D) || isa<FieldDecl>(D);
+}
+
 /// Resolves the result kind of this lookup.
 void LookupResult::resolveKind() {
   unsigned N = Decls.size();
 
   // Fast case: no possible ambiguity.
   if (N == 0) {
-    assert(ResultKind == NotFound || ResultKind == NotFoundInCurrentInstantiation);
+    assert(ResultKind == NotFound ||
+           ResultKind == NotFoundInCurrentInstantiation);
     return;
   }
 
@@ -378,12 +480,15 @@ void LookupResult::resolveKind() {
   // Don't do any extra resolution if we've already resolved as ambiguous.
   if (ResultKind == Ambiguous) return;
 
-  llvm::SmallPtrSet<NamedDecl*, 16> Unique;
-  llvm::SmallPtrSet<QualType, 16> UniqueTypes;
+  llvm::SmallDenseMap<NamedDecl*, unsigned, 16> Unique;
+  llvm::SmallDenseMap<QualType, unsigned, 16> UniqueTypes;
 
   bool Ambiguous = false;
-  bool HasTag = false, HasFunction = false, HasNonFunction = false;
+  bool HasTag = false, HasFunction = false;
   bool HasFunctionTemplate = false, HasUnresolved = false;
+  NamedDecl *HasNonFunction = nullptr;
+
+  llvm::SmallVector<NamedDecl*, 4> EquivalentNonFunctions;
 
   unsigned UniqueTagIndex = 0;
 
@@ -393,34 +498,43 @@ void LookupResult::resolveKind() {
     D = cast<NamedDecl>(D->getCanonicalDecl());
 
     // Ignore an invalid declaration unless it's the only one left.
-    if (D->isInvalidDecl() && I < N-1) {
+    if (D->isInvalidDecl() && !(I == 0 && N == 1)) {
       Decls[I] = Decls[--N];
       continue;
     }
 
+    llvm::Optional<unsigned> ExistingI;
+
     // Redeclarations of types via typedef can occur both within a scope
     // and, through using declarations and directives, across scopes. There is
     // no ambiguity if they all refer to the same type, so unique based on the
     // canonical type.
     if (TypeDecl *TD = dyn_cast<TypeDecl>(D)) {
-      if (!TD->getDeclContext()->isRecord()) {
-        QualType T = getSema().Context.getTypeDeclType(TD);
-        if (!UniqueTypes.insert(getSema().Context.getCanonicalType(T)).second) {
-          // The type is not unique; pull something off the back and continue
-          // at this index.
-          Decls[I] = Decls[--N];
-          continue;
-        }
+      QualType T = getSema().Context.getTypeDeclType(TD);
+      auto UniqueResult = UniqueTypes.insert(
+          std::make_pair(getSema().Context.getCanonicalType(T), I));
+      if (!UniqueResult.second) {
+        // The type is not unique.
+        ExistingI = UniqueResult.first->second;
       }
     }
 
-    if (!Unique.insert(D).second) {
-      // If it's not unique, pull something off the back (and
-      // continue at this index).
-      // FIXME: This is wrong. We need to take the more recent declaration in
-      // order to get the right type, default arguments, etc. We also need to
-      // prefer visible declarations to hidden ones (for redeclaration lookup
-      // in modules builds).
+    // For non-type declarations, check for a prior lookup result naming this
+    // canonical declaration.
+    if (!ExistingI) {
+      auto UniqueResult = Unique.insert(std::make_pair(D, I));
+      if (!UniqueResult.second) {
+        // We've seen this entity before.
+        ExistingI = UniqueResult.first->second;
+      }
+    }
+
+    if (ExistingI) {
+      // This is not a unique lookup result. Pick one of the results and
+      // discard the other.
+      if (isPreferredLookupResult(getSema(), getLookupKind(), Decls[I],
+                                  Decls[*ExistingI]))
+        Decls[*ExistingI] = Decls[I];
       Decls[I] = Decls[--N];
       continue;
     }
@@ -440,9 +554,21 @@ void LookupResult::resolveKind() {
     } else if (isa<FunctionDecl>(D)) {
       HasFunction = true;
     } else {
-      if (HasNonFunction)
+      if (HasNonFunction) {
+        // If we're about to create an ambiguity between two declarations that
+        // are equivalent, but one is an internal linkage declaration from one
+        // module and the other is an internal linkage declaration from another
+        // module, just skip it.
+        if (getSema().isEquivalentInternalLinkageDeclaration(HasNonFunction,
+                                                             D)) {
+          EquivalentNonFunctions.push_back(D);
+          Decls[I] = Decls[--N];
+          continue;
+        }
+
         Ambiguous = true;
-      HasNonFunction = true;
+      }
+      HasNonFunction = D;
     }
     I++;
   }
@@ -456,15 +582,24 @@ void LookupResult::resolveKind() {
   //   wherever the object, function, or enumerator name is visible.
   // But it's still an error if there are distinct tag types found,
   // even if they're not visible. (ref?)
-  if (HideTags && HasTag && !Ambiguous &&
+  if (N > 1 && HideTags && HasTag && !Ambiguous &&
       (HasFunction || HasNonFunction || HasUnresolved)) {
-    if (getContextForScopeMatching(Decls[UniqueTagIndex])->Equals(
-            getContextForScopeMatching(Decls[UniqueTagIndex ? 0 : N - 1])))
+    NamedDecl *OtherDecl = Decls[UniqueTagIndex ? 0 : N - 1];
+    if (isa<TagDecl>(Decls[UniqueTagIndex]->getUnderlyingDecl()) &&
+        getContextForScopeMatching(Decls[UniqueTagIndex])->Equals(
+            getContextForScopeMatching(OtherDecl)) &&
+        canHideTag(OtherDecl))
       Decls[UniqueTagIndex] = Decls[--N];
     else
       Ambiguous = true;
   }
 
+  // FIXME: This diagnostic should really be delayed until we're done with
+  // the lookup result, in case the ambiguity is resolved by the caller.
+  if (!EquivalentNonFunctions.empty() && !Ambiguous)
+    getSema().diagnoseEquivalentInternalLinkageDeclarations(
+        getNameLoc(), HasNonFunction, EquivalentNonFunctions);
+
   Decls.set_size(N);
 
   if (HasNonFunction && (HasFunction || HasUnresolved))
@@ -534,6 +669,11 @@ static bool LookupBuiltin(Sema &S, LookupResult &R) {
         R.addDecl(S.getASTContext().getFloat128StubType());
         return true;
       }
+      if (S.getLangOpts().CPlusPlus && NameKind == Sema::LookupOrdinaryName &&
+          II == S.getASTContext().getMakeIntegerSeqName()) {
+        R.addDecl(S.getASTContext().getMakeIntegerSeqDecl());
+        return true;
+      }
 
       // If this is a builtin on this (or all) targets, create the decl.
       if (unsigned BuiltinID = II->getBuiltinID()) {
@@ -875,7 +1015,7 @@ struct FindLocalExternScope {
   LookupResult &R;
   bool OldFindLocalExtern;
 };
-}
+} // end anonymous namespace
 
 bool Sema::CppLookupName(LookupResult &R, Scope *S) {
   assert(getLangOpts().CPlusPlus && "Can perform only C++ lookup");
@@ -1333,22 +1473,22 @@ bool Sema::hasVisibleDefaultArgument(const NamedDecl *D,
 /// your module can see, including those later on in your module).
 bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
   assert(D->isHidden() && "should not call this: not in slow case");
-  Module *DeclModule = SemaRef.getOwningModule(D);
-  if (!DeclModule) {
-    // getOwningModule() may have decided the declaration should not be hidden.
-    assert(!D->isHidden() && "hidden decl not from a module");
-    return true;
-  }
-
-  // If the owning module is visible, and the decl is not module private,
-  // then the decl is visible too. (Module private is ignored within the same
-  // top-level module.)
-  if (!D->isFromASTFile() || !D->isModulePrivate()) {
-    if (SemaRef.isModuleVisible(DeclModule))
+  Module *DeclModule = nullptr;
+  
+  if (SemaRef.getLangOpts().ModulesLocalVisibility) {
+    DeclModule = SemaRef.getOwningModule(D);
+    if (!DeclModule) {
+      // getOwningModule() may have decided the declaration should not be hidden.
+      assert(!D->isHidden() && "hidden decl not from a module");
       return true;
-    // Also check merged definitions.
-    if (SemaRef.getLangOpts().ModulesLocalVisibility &&
-        SemaRef.hasVisibleMergedDefinition(D))
+    }
+
+    // If the owning module is visible, and the decl is not module private,
+    // then the decl is visible too. (Module private is ignored within the same
+    // top-level module.)
+    if ((!D->isFromASTFile() || !D->isModulePrivate()) &&
+        (SemaRef.isModuleVisible(DeclModule) ||
+         SemaRef.hasVisibleMergedDefinition(D)))
       return true;
   }
 
@@ -1380,6 +1520,11 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
   if (LookupModules.empty())
     return false;
 
+  if (!DeclModule) {
+    DeclModule = SemaRef.getOwningModule(D);
+    assert(DeclModule && "hidden decl not from a module");
+  }
+
   // If our lookup set contains the decl's module, it's visible.
   if (LookupModules.count(DeclModule))
     return true;
@@ -1390,18 +1535,22 @@ bool LookupResult::isVisibleSlow(Sema &SemaRef, NamedDecl *D) {
 
   // Check whether DeclModule is transitively exported to an import of
   // the lookup set.
-  for (llvm::DenseSet<Module *>::iterator I = LookupModules.begin(),
-                                          E = LookupModules.end();
-       I != E; ++I)
-    if ((*I)->isModuleVisible(DeclModule))
-      return true;
-  return false;
+  return std::any_of(LookupModules.begin(), LookupModules.end(),
+                     [&](Module *M) { return M->isModuleVisible(DeclModule); });
 }
 
 bool Sema::isVisibleSlow(const NamedDecl *D) {
   return LookupResult::isVisible(*this, const_cast<NamedDecl*>(D));
 }
 
+bool Sema::shouldLinkPossiblyHiddenDecl(LookupResult &R, const NamedDecl *New) {
+  for (auto *D : R) {
+    if (isVisible(D))
+      return true;
+  }
+  return New->isExternallyVisible();
+}
+
 /// \brief Retrieve the visible declaration corresponding to D, if any.
 ///
 /// This routine determines whether the declaration D is visible in the current
@@ -1679,12 +1828,10 @@ static bool LookupQualifiedNameInUsingDirectives(Sema &S, LookupResult &R,
 
 /// \brief Callback that looks for any member of a class with the given name.
 static bool LookupAnyMember(const CXXBaseSpecifier *Specifier,
-                            CXXBasePath &Path,
-                            void *Name) {
+                            CXXBasePath &Path, DeclarationName Name) {
   RecordDecl *BaseRecord = Specifier->getType()->getAs<RecordType>()->getDecl();
 
-  DeclarationName N = DeclarationName::getFromOpaquePtr(Name);
-  Path.Decls = BaseRecord->lookup(N);
+  Path.Decls = BaseRecord->lookup(Name);
   return !Path.Decls.empty();
 }
 
@@ -1756,7 +1903,18 @@ bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
           cast<TagDecl>(LookupCtx)->isBeingDefined()) &&
          "Declaration context must already be complete!");
 
-  // Perform qualified name lookup into the LookupCtx.
+  struct QualifiedLookupInScope {
+    bool oldVal;
+    DeclContext *Context;
+    // Set flag in DeclContext informing debugger that we're looking for qualified name
+    QualifiedLookupInScope(DeclContext *ctx) : Context(ctx) { 
+      oldVal = ctx->setUseQualifiedLookup(); 
+    }
+    ~QualifiedLookupInScope() { 
+      Context->setUseQualifiedLookup(oldVal); 
+    }
+  } QL(LookupCtx);
+
   if (LookupDirect(*this, R, LookupCtx)) {
     R.resolveKind();
     if (isa<CXXRecordDecl>(LookupCtx))
@@ -1802,7 +1960,8 @@ bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
   Paths.setOrigin(LookupRec);
 
   // Look for this member in our base classes
-  CXXRecordDecl::BaseMatchesCallback *BaseCallback = nullptr;
+  bool (*BaseCallback)(const CXXBaseSpecifier *Specifier, CXXBasePath &Path,
+                       DeclarationName Name) = nullptr;
   switch (R.getLookupKind()) {
     case LookupObjCImplicitSelfParam:
     case LookupOrdinaryName:
@@ -1835,8 +1994,12 @@ bool Sema::LookupQualifiedName(LookupResult &R, DeclContext *LookupCtx,
       break;
   }
 
-  if (!LookupRec->lookupInBases(BaseCallback,
-                                R.getLookupName().getAsOpaquePtr(), Paths))
+  DeclarationName Name = R.getLookupName();
+  if (!LookupRec->lookupInBases(
+          [=](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+            return BaseCallback(Specifier, Path, Name);
+          },
+          Paths))
     return false;
 
   R.setNamingClass(LookupRec);
@@ -2013,17 +2176,30 @@ bool Sema::LookupParsedName(LookupResult &R, Scope *S, CXXScopeSpec *SS,
 ///
 /// @returns True if any decls were found (but possibly ambiguous)
 bool Sema::LookupInSuper(LookupResult &R, CXXRecordDecl *Class) {
+  // The access-control rules we use here are essentially the rules for
+  // doing a lookup in Class that just magically skipped the direct
+  // members of Class itself.  That is, the naming class is Class, and the
+  // access includes the access of the base.
   for (const auto &BaseSpec : Class->bases()) {
     CXXRecordDecl *RD = cast<CXXRecordDecl>(
         BaseSpec.getType()->castAs<RecordType>()->getDecl());
     LookupResult Result(*this, R.getLookupNameInfo(), R.getLookupKind());
 	Result.setBaseObjectType(Context.getRecordType(Class));
     LookupQualifiedName(Result, RD);
-    for (auto *Decl : Result)
-      R.addDecl(Decl);
+
+    // Copy the lookup results into the target, merging the base's access into
+    // the path access.
+    for (auto I = Result.begin(), E = Result.end(); I != E; ++I) {
+      R.addDecl(I.getDecl(),
+                CXXRecordDecl::MergeAccess(BaseSpec.getAccessSpecifier(),
+                                           I.getAccess()));
+    }
+
+    Result.suppressDiagnostics();
   }
 
   R.resolveKind();
+  R.setNamingClass(Class);
 
   return !R.empty();
 }
@@ -2075,7 +2251,7 @@ void Sema::DiagnoseAmbiguousLookup(LookupResult &Result) {
   case LookupResult::AmbiguousTagHiding: {
     Diag(NameLoc, diag::err_ambiguous_tag_hiding) << Name << LookupRange;
 
-    llvm::SmallPtrSet<NamedDecl*,8> TagDecls;
+    llvm::SmallPtrSet<NamedDecl*, 8> TagDecls;
 
     for (auto *D : Result)
       if (TagDecl *TD = dyn_cast<TagDecl>(D)) {
@@ -2121,7 +2297,7 @@ namespace {
     Sema::AssociatedClassSet &Classes;
     SourceLocation InstantiationLoc;
   };
-}
+} // end anonymous namespace
 
 static void
 addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType T);
@@ -2252,7 +2428,8 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result,
   }
 
   // Only recurse into base classes for complete types.
-  if (!Class->hasDefinition())
+  if (!Result.S.isCompleteType(Result.InstantiationLoc,
+                               Result.S.Context.getRecordType(Class)))
     return;
 
   // Add direct and indirect base classes along with their associated
@@ -2345,10 +2522,8 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     //        classes. Its associated namespaces are the namespaces in
     //        which its associated classes are defined.
     case Type::Record: {
-      Result.S.RequireCompleteType(Result.InstantiationLoc, QualType(T, 0),
-                                   /*no diagnostic*/ 0);
-      CXXRecordDecl *Class
-        = cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl());
+      CXXRecordDecl *Class =
+          cast<CXXRecordDecl>(cast<RecordType>(T)->getDecl());
       addAssociatedClassesAndNamespaces(Result, Class);
       break;
     }
@@ -3032,7 +3207,8 @@ void Sema::ArgumentDependentLookup(DeclarationName Name, SourceLocation Loc,
         for (Decl *DI = D; DI; DI = DI->getPreviousDecl()) {
           DeclContext *LexDC = DI->getLexicalDeclContext();
           if (isa<CXXRecordDecl>(LexDC) &&
-              AssociatedClasses.count(cast<CXXRecordDecl>(LexDC))) {
+              AssociatedClasses.count(cast<CXXRecordDecl>(LexDC)) &&
+              isVisible(cast<NamedDecl>(DI))) {
             DeclaredInAssociatedClass = true;
             break;
           }
@@ -3129,9 +3305,6 @@ public:
 } // end anonymous namespace
 
 NamedDecl *VisibleDeclsRecord::checkHidden(NamedDecl *ND) {
-  // Look through using declarations.
-  ND = ND->getUnderlyingDecl();
-
   unsigned IDNS = ND->getIdentifierNamespace();
   std::list<ShadowMap>::reverse_iterator SM = ShadowMaps.rbegin();
   for (std::list<ShadowMap>::reverse_iterator SMEnd = ShadowMaps.rend();
@@ -3704,7 +3877,12 @@ void TypoCorrectionConsumer::addNamespaces(
     if (const Type *T = NNS->getAsType())
       SSIsTemplate = T->getTypeClass() == Type::TemplateSpecialization;
   }
-  for (const auto *TI : SemaRef.getASTContext().types()) {
+  // Do not transform this into an iterator-based loop. The loop body can
+  // trigger the creation of further types (through lazy deserialization) and
+  // invalide iterators into this list.
+  auto &Types = SemaRef.getASTContext().getTypes();
+  for (unsigned I = 0; I != Types.size(); ++I) {
+    const auto *TI = Types[I];
     if (CXXRecordDecl *CD = TI->getAsCXXRecordDecl()) {
       CD = CD->getCanonicalDecl();
       if (!CD->isDependentType() && !CD->isAnonymousStructOrUnion() &&
@@ -3805,7 +3983,8 @@ void TypoCorrectionConsumer::performQualifiedLookups() {
       // current correction candidate is the name of that class, then skip
       // it as it is unlikely a qualified version of the class' constructor
       // is an appropriate correction.
-      if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() : 0) {
+      if (CXXRecordDecl *NSDecl = NSType ? NSType->getAsCXXRecordDecl() :
+                                           nullptr) {
         if (NSDecl->getIdentifier() == QR.getCorrectionAsIdentifierInfo())
           continue;
       }
@@ -4540,7 +4719,7 @@ void TypoCorrection::addCorrectionDecl(NamedDecl *CDecl) {
   if (isKeyword())
     CorrectionDecls.clear();
 
-  CorrectionDecls.push_back(CDecl->getUnderlyingDecl());
+  CorrectionDecls.push_back(CDecl);
 
   if (!CorrectionName)
     CorrectionName = CDecl->getDeclName();
@@ -4769,7 +4948,7 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction,
 
   // Maybe we're just missing a module import.
   if (Correction.requiresImport()) {
-    NamedDecl *Decl = Correction.getCorrectionDecl();
+    NamedDecl *Decl = Correction.getFoundDecl();
     assert(Decl && "import required but no declaration to import");
 
     diagnoseMissingImport(Correction.getCorrectionRange().getBegin(), Decl,
@@ -4781,7 +4960,7 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction,
     << CorrectedQuotedStr << (ErrorRecovery ? FixTypo : FixItHint());
 
   NamedDecl *ChosenDecl =
-      Correction.isKeyword() ? nullptr : Correction.getCorrectionDecl();
+      Correction.isKeyword() ? nullptr : Correction.getFoundDecl();
   if (PrevNote.getDiagID() && ChosenDecl)
     Diag(ChosenDecl->getLocation(), PrevNote)
       << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo);
diff --git a/lib/Sema/SemaObjCProperty.cpp b/lib/Sema/SemaObjCProperty.cpp
index f139c83c734b..1cb84e448067 100644
--- a/lib/Sema/SemaObjCProperty.cpp
+++ b/lib/Sema/SemaObjCProperty.cpp
@@ -61,8 +61,10 @@ static Qualifiers::ObjCLifetime getImpliedARCOwnership(
   return Qualifiers::OCL_None;
 }
 
-/// Check the internal consistency of a property declaration.
-static void checkARCPropertyDecl(Sema &S, ObjCPropertyDecl *property) {
+/// Check the internal consistency of a property declaration with
+/// an explicit ownership qualifier.
+static void checkPropertyDeclWithOwnership(Sema &S,
+                                           ObjCPropertyDecl *property) {
   if (property->isInvalidDecl()) return;
 
   ObjCPropertyDecl::PropertyAttributeKind propertyKind
@@ -70,8 +72,7 @@ static void checkARCPropertyDecl(Sema &S, ObjCPropertyDecl *property) {
   Qualifiers::ObjCLifetime propertyLifetime
     = property->getType().getObjCLifetime();
 
-  // Nothing to do if we don't have a lifetime.
-  if (propertyLifetime == Qualifiers::OCL_None) return;
+  assert(propertyLifetime != Qualifiers::OCL_None);
 
   Qualifiers::ObjCLifetime expectedLifetime
     = getImpliedARCOwnership(propertyKind, property->getType());
@@ -127,32 +128,71 @@ CheckPropertyAgainstProtocol(Sema &S, ObjCPropertyDecl *Prop,
     CheckPropertyAgainstProtocol(S, Prop, P, Known);
 }
 
+static unsigned deducePropertyOwnershipFromType(Sema &S, QualType T) {
+  // In GC mode, just look for the __weak qualifier.
+  if (S.getLangOpts().getGC() != LangOptions::NonGC) {
+    if (T.isObjCGCWeak()) return ObjCDeclSpec::DQ_PR_weak;
+
+  // In ARC/MRC, look for an explicit ownership qualifier.
+  // For some reason, this only applies to __weak.
+  } else if (auto ownership = T.getObjCLifetime()) {
+    switch (ownership) {
+    case Qualifiers::OCL_Weak:
+      return ObjCDeclSpec::DQ_PR_weak;
+    case Qualifiers::OCL_Strong:
+      return ObjCDeclSpec::DQ_PR_strong;
+    case Qualifiers::OCL_ExplicitNone:
+      return ObjCDeclSpec::DQ_PR_unsafe_unretained;
+    case Qualifiers::OCL_Autoreleasing:
+    case Qualifiers::OCL_None:
+      return 0;
+    }
+    llvm_unreachable("bad qualifier");
+  }
+
+  return 0;
+}
+
+static const unsigned OwnershipMask =
+  (ObjCPropertyDecl::OBJC_PR_assign |
+   ObjCPropertyDecl::OBJC_PR_retain |
+   ObjCPropertyDecl::OBJC_PR_copy   |
+   ObjCPropertyDecl::OBJC_PR_weak   |
+   ObjCPropertyDecl::OBJC_PR_strong |
+   ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
+
+static unsigned getOwnershipRule(unsigned attr) {
+  unsigned result = attr & OwnershipMask;
+
+  // From an ownership perspective, assign and unsafe_unretained are
+  // identical; make sure one also implies the other.
+  if (result & (ObjCPropertyDecl::OBJC_PR_assign |
+                ObjCPropertyDecl::OBJC_PR_unsafe_unretained)) {
+    result |= ObjCPropertyDecl::OBJC_PR_assign |
+              ObjCPropertyDecl::OBJC_PR_unsafe_unretained;
+  }
+
+  return result;
+}
+
 Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
                           SourceLocation LParenLoc,
                           FieldDeclarator &FD,
                           ObjCDeclSpec &ODS,
                           Selector GetterSel,
                           Selector SetterSel,
-                          bool *isOverridingProperty,
                           tok::ObjCKeywordKind MethodImplKind,
                           DeclContext *lexicalDC) {
   unsigned Attributes = ODS.getPropertyAttributes();
   FD.D.setObjCWeakProperty((Attributes & ObjCDeclSpec::DQ_PR_weak) != 0);
   TypeSourceInfo *TSI = GetTypeForDeclarator(FD.D, S);
   QualType T = TSI->getType();
-  Attributes |= deduceWeakPropertyFromType(T);
+  if (!getOwnershipRule(Attributes)) {
+    Attributes |= deducePropertyOwnershipFromType(*this, T);
+  }
   bool isReadWrite = ((Attributes & ObjCDeclSpec::DQ_PR_readwrite) ||
                       // default is readwrite!
                       !(Attributes & ObjCDeclSpec::DQ_PR_readonly));
-  // property is defaulted to 'assign' if it is readwrite and is
-  // not retain or copy
-  bool isAssign = ((Attributes & ObjCDeclSpec::DQ_PR_assign) ||
-                   (isReadWrite &&
-                    !(Attributes & ObjCDeclSpec::DQ_PR_retain) &&
-                    !(Attributes & ObjCDeclSpec::DQ_PR_strong) &&
-                    !(Attributes & ObjCDeclSpec::DQ_PR_copy) &&
-                    !(Attributes & ObjCDeclSpec::DQ_PR_unsafe_unretained) &&
-                    !(Attributes & ObjCDeclSpec::DQ_PR_weak)));
 
   // Proceed with constructing the ObjCPropertyDecls.
   ObjCContainerDecl *ClassDecl = cast<ObjCContainerDecl>(CurContext);
@@ -161,11 +201,10 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
     if (CDecl->IsClassExtension()) {
       Res = HandlePropertyInClassExtension(S, AtLoc, LParenLoc,
                                            FD, GetterSel, SetterSel,
-                                           isAssign, isReadWrite,
+                                           isReadWrite,
                                            Attributes,
                                            ODS.getPropertyAttributes(),
-                                           isOverridingProperty, T, TSI,
-                                           MethodImplKind);
+                                           T, TSI, MethodImplKind);
       if (!Res)
         return nullptr;
     }
@@ -173,7 +212,7 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
 
   if (!Res) {
     Res = CreatePropertyDecl(S, ClassDecl, AtLoc, LParenLoc, FD,
-                             GetterSel, SetterSel, isAssign, isReadWrite,
+                             GetterSel, SetterSel, isReadWrite,
                              Attributes, ODS.getPropertyAttributes(),
                              T, TSI, MethodImplKind);
     if (lexicalDC)
@@ -181,12 +220,13 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
   }
 
   // Validate the attributes on the @property.
-  CheckObjCPropertyAttributes(Res, AtLoc, Attributes, 
+  CheckObjCPropertyAttributes(Res, AtLoc, Attributes,
                               (isa<ObjCInterfaceDecl>(ClassDecl) ||
                                isa<ObjCProtocolDecl>(ClassDecl)));
 
-  if (getLangOpts().ObjCAutoRefCount)
-    checkARCPropertyDecl(*this, Res);
+  // Check consistency if the type has explicit ownership qualification.
+  if (Res->getType().getObjCLifetime())
+    checkPropertyDeclWithOwnership(*this, Res);
 
   llvm::SmallPtrSet<ObjCProtocolDecl *, 16> KnownProtos;
   if (ObjCInterfaceDecl *IFace = dyn_cast<ObjCInterfaceDecl>(ClassDecl)) {
@@ -220,8 +260,12 @@ Decl *Sema::ActOnProperty(Scope *S, SourceLocation AtLoc,
       }
     }
   } else if (ObjCCategoryDecl *Cat = dyn_cast<ObjCCategoryDecl>(ClassDecl)) {
-    for (auto *P : Cat->protocols())
-      CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
+    // We don't check if class extension. Because properties in class extension
+    // are meant to override some of the attributes and checking has already done
+    // when property in class extension is constructed.
+    if (!Cat->IsClassExtension())
+      for (auto *P : Cat->protocols())
+        CheckPropertyAgainstProtocol(*this, Res, P, KnownProtos);
   } else {
     ObjCProtocolDecl *Proto = cast<ObjCProtocolDecl>(ClassDecl);
     for (auto *P : Proto->protocols())
@@ -293,13 +337,73 @@ static bool LocPropertyAttribute( ASTContext &Context, const char *attrName,
   
 }
 
-static unsigned getOwnershipRule(unsigned attr) {
-  return attr & (ObjCPropertyDecl::OBJC_PR_assign |
-                 ObjCPropertyDecl::OBJC_PR_retain |
-                 ObjCPropertyDecl::OBJC_PR_copy   |
-                 ObjCPropertyDecl::OBJC_PR_weak   |
-                 ObjCPropertyDecl::OBJC_PR_strong |
-                 ObjCPropertyDecl::OBJC_PR_unsafe_unretained);
+/// Check for a mismatch in the atomicity of the given properties.
+static void checkAtomicPropertyMismatch(Sema &S,
+                                        ObjCPropertyDecl *OldProperty,
+                                        ObjCPropertyDecl *NewProperty,
+                                        bool PropagateAtomicity) {
+  // If the atomicity of both matches, we're done.
+  bool OldIsAtomic =
+    (OldProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
+      == 0;
+  bool NewIsAtomic =
+    (NewProperty->getPropertyAttributes() & ObjCPropertyDecl::OBJC_PR_nonatomic)
+      == 0;
+  if (OldIsAtomic == NewIsAtomic) return;
+
+  // Determine whether the given property is readonly and implicitly
+  // atomic.
+  auto isImplicitlyReadonlyAtomic = [](ObjCPropertyDecl *Property) -> bool {
+    // Is it readonly?
+    auto Attrs = Property->getPropertyAttributes();
+    if ((Attrs & ObjCPropertyDecl::OBJC_PR_readonly) == 0) return false;
+
+    // Is it nonatomic?
+    if (Attrs & ObjCPropertyDecl::OBJC_PR_nonatomic) return false;
+
+    // Was 'atomic' specified directly?
+    if (Property->getPropertyAttributesAsWritten() & 
+          ObjCPropertyDecl::OBJC_PR_atomic)
+      return false;
+
+    return true;
+  };
+
+  // If we're allowed to propagate atomicity, and the new property did
+  // not specify atomicity at all, propagate.
+  const unsigned AtomicityMask =
+    (ObjCPropertyDecl::OBJC_PR_atomic | ObjCPropertyDecl::OBJC_PR_nonatomic);
+  if (PropagateAtomicity &&
+      ((NewProperty->getPropertyAttributesAsWritten() & AtomicityMask) == 0)) {
+    unsigned Attrs = NewProperty->getPropertyAttributes();
+    Attrs = Attrs & ~AtomicityMask;
+    if (OldIsAtomic)
+      Attrs |= ObjCPropertyDecl::OBJC_PR_atomic;
+    else 
+      Attrs |= ObjCPropertyDecl::OBJC_PR_nonatomic;
+
+    NewProperty->overwritePropertyAttributes(Attrs);
+    return;
+  }
+
+  // One of the properties is atomic; if it's a readonly property, and
+  // 'atomic' wasn't explicitly specified, we're okay.
+  if ((OldIsAtomic && isImplicitlyReadonlyAtomic(OldProperty)) ||
+      (NewIsAtomic && isImplicitlyReadonlyAtomic(NewProperty)))
+    return;
+
+  // Diagnose the conflict.
+  const IdentifierInfo *OldContextName;
+  auto *OldDC = OldProperty->getDeclContext();
+  if (auto Category = dyn_cast<ObjCCategoryDecl>(OldDC))
+    OldContextName = Category->getClassInterface()->getIdentifier();
+  else
+    OldContextName = cast<ObjCContainerDecl>(OldDC)->getIdentifier();
+
+  S.Diag(NewProperty->getLocation(), diag::warn_property_attribute)
+    << NewProperty->getDeclName() << "atomic"
+    << OldContextName;
+  S.Diag(OldProperty->getLocation(), diag::note_property_declare);
 }
 
 ObjCPropertyDecl *
@@ -308,11 +412,9 @@ Sema::HandlePropertyInClassExtension(Scope *S,
                                      SourceLocation LParenLoc,
                                      FieldDeclarator &FD,
                                      Selector GetterSel, Selector SetterSel,
-                                     const bool isAssign,
                                      const bool isReadWrite,
-                                     const unsigned Attributes,
+                                     unsigned &Attributes,
                                      const unsigned AttributesAsWritten,
-                                     bool *isOverridingProperty,
                                      QualType T,
                                      TypeSourceInfo *TSI,
                                      tok::ObjCKeywordKind MethodImplKind) {
@@ -322,80 +424,102 @@ Sema::HandlePropertyInClassExtension(Scope *S,
   IdentifierInfo *PropertyId = FD.D.getIdentifier();
   ObjCInterfaceDecl *CCPrimary = CDecl->getClassInterface();
   
-  if (CCPrimary) {
-    // Check for duplicate declaration of this property in current and
-    // other class extensions.
-    for (const auto *Ext : CCPrimary->known_extensions()) {
-      if (ObjCPropertyDecl *prevDecl
-            = ObjCPropertyDecl::findPropertyDecl(Ext, PropertyId)) {
-        Diag(AtLoc, diag::err_duplicate_property);
-        Diag(prevDecl->getLocation(), diag::note_property_declare);
-        return nullptr;
-      }
-    }
-  }
-
-  // Create a new ObjCPropertyDecl with the DeclContext being
-  // the class extension.
-  // FIXME. We should really be using CreatePropertyDecl for this.
-  ObjCPropertyDecl *PDecl =
-    ObjCPropertyDecl::Create(Context, DC, FD.D.getIdentifierLoc(),
-                             PropertyId, AtLoc, LParenLoc, T, TSI);
-  PDecl->setPropertyAttributesAsWritten(
-                          makePropertyAttributesAsWritten(AttributesAsWritten));
-  if (Attributes & ObjCDeclSpec::DQ_PR_readonly)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readonly);
-  if (Attributes & ObjCDeclSpec::DQ_PR_readwrite)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_readwrite);
-  if (Attributes & ObjCDeclSpec::DQ_PR_nonatomic)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nonatomic);
-  if (Attributes & ObjCDeclSpec::DQ_PR_atomic)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_atomic);
-  if (Attributes & ObjCDeclSpec::DQ_PR_nullability)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_nullability);
-  if (Attributes & ObjCDeclSpec::DQ_PR_null_resettable)
-    PDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_null_resettable);
-
-  // Set setter/getter selector name. Needed later.
-  PDecl->setGetterName(GetterSel);
-  PDecl->setSetterName(SetterSel);
-  ProcessDeclAttributes(S, PDecl, FD.D);
-  DC->addDecl(PDecl);
-
   // We need to look in the @interface to see if the @property was
   // already declared.
   if (!CCPrimary) {
     Diag(CDecl->getLocation(), diag::err_continuation_class);
-    *isOverridingProperty = true;
     return nullptr;
   }
 
-  // Find the property in continuation class's primary class only.
+  // Find the property in the extended class's primary class or
+  // extensions.
   ObjCPropertyDecl *PIDecl =
     CCPrimary->FindPropertyVisibleInPrimaryClass(PropertyId);
 
-  if (!PIDecl) {
-    // No matching property found in the primary class. Just fall thru
-    // and add property to continuation class's primary class.
-    ObjCPropertyDecl *PrimaryPDecl =
-      CreatePropertyDecl(S, CCPrimary, AtLoc, LParenLoc,
-                         FD, GetterSel, SetterSel, isAssign, isReadWrite,
-                         Attributes,AttributesAsWritten, T, TSI, MethodImplKind, 
-                         DC);
-
-    // A case of continuation class adding a new property in the class. This
-    // is not what it was meant for. However, gcc supports it and so should we.
-    // Make sure setter/getters are declared here.
-    ProcessPropertyDecl(PrimaryPDecl, CCPrimary,
-                        /* redeclaredProperty = */ nullptr,
-                        /* lexicalDC = */ CDecl);
-    PDecl->setGetterMethodDecl(PrimaryPDecl->getGetterMethodDecl());
-    PDecl->setSetterMethodDecl(PrimaryPDecl->getSetterMethodDecl());
-    if (ASTMutationListener *L = Context.getASTMutationListener())
-      L->AddedObjCPropertyInClassExtension(PrimaryPDecl, /*OrigProp=*/nullptr,
-                                           CDecl);
-    return PrimaryPDecl;
+  // If we found a property in an extension, complain. 
+  if (PIDecl && isa<ObjCCategoryDecl>(PIDecl->getDeclContext())) {
+    Diag(AtLoc, diag::err_duplicate_property);
+    Diag(PIDecl->getLocation(), diag::note_property_declare);
+    return nullptr;
   }
+
+  // Check for consistency with the previous declaration, if there is one.
+  if (PIDecl) {
+    // A readonly property declared in the primary class can be refined
+    // by adding a readwrite property within an extension.
+    // Anything else is an error.
+    if (!(PIDecl->isReadOnly() && isReadWrite)) {
+      // Tailor the diagnostics for the common case where a readwrite
+      // property is declared both in the @interface and the continuation.
+      // This is a common error where the user often intended the original
+      // declaration to be readonly.
+      unsigned diag =
+        (Attributes & ObjCDeclSpec::DQ_PR_readwrite) &&
+        (PIDecl->getPropertyAttributesAsWritten() &
+           ObjCPropertyDecl::OBJC_PR_readwrite)
+        ? diag::err_use_continuation_class_redeclaration_readwrite
+        : diag::err_use_continuation_class;
+      Diag(AtLoc, diag)
+        << CCPrimary->getDeclName();
+      Diag(PIDecl->getLocation(), diag::note_property_declare);
+      return nullptr;
+    }
+
+    // Check for consistency of getters.
+    if (PIDecl->getGetterName() != GetterSel) {
+     // If the getter was written explicitly, complain.
+      if (AttributesAsWritten & ObjCDeclSpec::DQ_PR_getter) {
+        Diag(AtLoc, diag::warn_property_redecl_getter_mismatch)
+          << PIDecl->getGetterName() << GetterSel;
+        Diag(PIDecl->getLocation(), diag::note_property_declare);
+      }
+      
+      // Always adopt the getter from the original declaration.
+      GetterSel = PIDecl->getGetterName();
+      Attributes |= ObjCDeclSpec::DQ_PR_getter;
+    }
+
+    // Check consistency of ownership.
+    unsigned ExistingOwnership
+      = getOwnershipRule(PIDecl->getPropertyAttributes());
+    unsigned NewOwnership = getOwnershipRule(Attributes);
+    if (ExistingOwnership && NewOwnership != ExistingOwnership) {
+      // If the ownership was written explicitly, complain.
+      if (getOwnershipRule(AttributesAsWritten)) {
+        Diag(AtLoc, diag::warn_property_attr_mismatch);
+        Diag(PIDecl->getLocation(), diag::note_property_declare);
+      }
+
+      // Take the ownership from the original property.
+      Attributes = (Attributes & ~OwnershipMask) | ExistingOwnership;
+    }
+
+    // If the redeclaration is 'weak' but the original property is not, 
+    if ((Attributes & ObjCPropertyDecl::OBJC_PR_weak) &&
+        !(PIDecl->getPropertyAttributesAsWritten()
+            & ObjCPropertyDecl::OBJC_PR_weak) &&
+        PIDecl->getType()->getAs<ObjCObjectPointerType>() &&
+        PIDecl->getType().getObjCLifetime() == Qualifiers::OCL_None) {
+      Diag(AtLoc, diag::warn_property_implicitly_mismatched);
+      Diag(PIDecl->getLocation(), diag::note_property_declare);
+    }        
+  }
+
+  // Create a new ObjCPropertyDecl with the DeclContext being
+  // the class extension.
+  ObjCPropertyDecl *PDecl = CreatePropertyDecl(S, CDecl, AtLoc, LParenLoc,
+                                               FD, GetterSel, SetterSel,
+                                               isReadWrite,
+                                               Attributes, AttributesAsWritten,
+                                               T, TSI, MethodImplKind, DC);
+
+  // If there was no declaration of a property with the same name in
+  // the primary class, we're done.
+  if (!PIDecl) {
+    ProcessPropertyDecl(PDecl);
+    return PDecl;
+  }
+
   if (!Context.hasSameType(PIDecl->getType(), PDecl->getType())) {
     bool IncompatibleObjC = false;
     QualType ConvertedType;
@@ -418,103 +542,13 @@ Sema::HandlePropertyInClassExtension(Scope *S,
       return nullptr;
     }
   }
-    
-  // The property 'PIDecl's readonly attribute will be over-ridden
-  // with continuation class's readwrite property attribute!
-  unsigned PIkind = PIDecl->getPropertyAttributesAsWritten();
-  if (isReadWrite && (PIkind & ObjCPropertyDecl::OBJC_PR_readonly)) {
-    PIkind &= ~ObjCPropertyDecl::OBJC_PR_readonly;
-    PIkind |= ObjCPropertyDecl::OBJC_PR_readwrite;
-    PIkind |= deduceWeakPropertyFromType(PIDecl->getType());
-    unsigned ClassExtensionMemoryModel = getOwnershipRule(Attributes);
-    unsigned PrimaryClassMemoryModel = getOwnershipRule(PIkind);
-    if (PrimaryClassMemoryModel && ClassExtensionMemoryModel &&
-        (PrimaryClassMemoryModel != ClassExtensionMemoryModel)) {
-      Diag(AtLoc, diag::warn_property_attr_mismatch);
-      Diag(PIDecl->getLocation(), diag::note_property_declare);
-    }
-    else if (getLangOpts().ObjCAutoRefCount) {
-      QualType PrimaryPropertyQT =
-        Context.getCanonicalType(PIDecl->getType()).getUnqualifiedType();
-      if (isa<ObjCObjectPointerType>(PrimaryPropertyQT)) {
-        bool PropertyIsWeak = ((PIkind & ObjCPropertyDecl::OBJC_PR_weak) != 0);
-        Qualifiers::ObjCLifetime PrimaryPropertyLifeTime =
-          PrimaryPropertyQT.getObjCLifetime();
-        if (PrimaryPropertyLifeTime == Qualifiers::OCL_None &&
-            (Attributes & ObjCDeclSpec::DQ_PR_weak) &&
-            !PropertyIsWeak) {
-              Diag(AtLoc, diag::warn_property_implicitly_mismatched);
-              Diag(PIDecl->getLocation(), diag::note_property_declare);
-            }
-        }
-    }
-    
-    DeclContext *DC = cast<DeclContext>(CCPrimary);
-    if (!ObjCPropertyDecl::findPropertyDecl(DC,
-                                 PIDecl->getDeclName().getAsIdentifierInfo())) {
-      // In mrr mode, 'readwrite' property must have an explicit
-      // memory attribute. If none specified, select the default (assign).
-      if (!getLangOpts().ObjCAutoRefCount) {
-        if (!(PIkind & (ObjCDeclSpec::DQ_PR_assign |
-                        ObjCDeclSpec::DQ_PR_retain |
-                        ObjCDeclSpec::DQ_PR_strong |
-                        ObjCDeclSpec::DQ_PR_copy |
-                        ObjCDeclSpec::DQ_PR_unsafe_unretained |
-                        ObjCDeclSpec::DQ_PR_weak)))
-          PIkind |= ObjCPropertyDecl::OBJC_PR_assign;
-      }
-      
-      // Protocol is not in the primary class. Must build one for it.
-      ObjCDeclSpec ProtocolPropertyODS;
-      // FIXME. Assuming that ObjCDeclSpec::ObjCPropertyAttributeKind
-      // and ObjCPropertyDecl::PropertyAttributeKind have identical
-      // values.  Should consolidate both into one enum type.
-      ProtocolPropertyODS.
-      setPropertyAttributes((ObjCDeclSpec::ObjCPropertyAttributeKind)
-                            PIkind);
-      // Must re-establish the context from class extension to primary
-      // class context.
-      ContextRAII SavedContext(*this, CCPrimary);
-      
-      Decl *ProtocolPtrTy =
-        ActOnProperty(S, AtLoc, LParenLoc, FD, ProtocolPropertyODS,
-                      PIDecl->getGetterName(),
-                      PIDecl->getSetterName(),
-                      isOverridingProperty,
-                      MethodImplKind,
-                      /* lexicalDC = */ CDecl);
-      PIDecl = cast<ObjCPropertyDecl>(ProtocolPtrTy);
-    }
-    PIDecl->makeitReadWriteAttribute();
-    if (Attributes & ObjCDeclSpec::DQ_PR_retain)
-      PIDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_retain);
-    if (Attributes & ObjCDeclSpec::DQ_PR_strong)
-      PIDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
-    if (Attributes & ObjCDeclSpec::DQ_PR_copy)
-      PIDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_copy);
-    PIDecl->setSetterName(SetterSel);
-  } else {
-    // Tailor the diagnostics for the common case where a readwrite
-    // property is declared both in the @interface and the continuation.
-    // This is a common error where the user often intended the original
-    // declaration to be readonly.
-    unsigned diag =
-      (Attributes & ObjCDeclSpec::DQ_PR_readwrite) &&
-      (PIkind & ObjCPropertyDecl::OBJC_PR_readwrite)
-      ? diag::err_use_continuation_class_redeclaration_readwrite
-      : diag::err_use_continuation_class;
-    Diag(AtLoc, diag)
-      << CCPrimary->getDeclName();
-    Diag(PIDecl->getLocation(), diag::note_property_declare);
-    return nullptr;
-  }
-  *isOverridingProperty = true;
-  // Make sure setter decl is synthesized, and added to primary class's list.
-  ProcessPropertyDecl(PIDecl, CCPrimary, PDecl, CDecl);
-  PDecl->setGetterMethodDecl(PIDecl->getGetterMethodDecl());
-  PDecl->setSetterMethodDecl(PIDecl->getSetterMethodDecl());
-  if (ASTMutationListener *L = Context.getASTMutationListener())
-    L->AddedObjCPropertyInClassExtension(PDecl, PIDecl, CDecl);
+  
+  // Check that atomicity of property in class extension matches the previous
+  // declaration.
+  checkAtomicPropertyMismatch(*this, PIDecl, PDecl, true);
+
+  // Make sure getter/setter are appropriately synthesized.
+  ProcessPropertyDecl(PDecl);
   return PDecl;
 }
 
@@ -525,7 +559,6 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
                                            FieldDeclarator &FD,
                                            Selector GetterSel,
                                            Selector SetterSel,
-                                           const bool isAssign,
                                            const bool isReadWrite,
                                            const unsigned Attributes,
                                            const unsigned AttributesAsWritten,
@@ -535,10 +568,23 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
                                            DeclContext *lexicalDC){
   IdentifierInfo *PropertyId = FD.D.getIdentifier();
 
-  // Issue a warning if property is 'assign' as default and its object, which is
-  // gc'able conforms to NSCopying protocol
+  // Property defaults to 'assign' if it is readwrite, unless this is ARC
+  // and the type is retainable.
+  bool isAssign;
+  if (Attributes & (ObjCDeclSpec::DQ_PR_assign |
+                    ObjCDeclSpec::DQ_PR_unsafe_unretained)) {
+    isAssign = true;
+  } else if (getOwnershipRule(Attributes) || !isReadWrite) {
+    isAssign = false;
+  } else {
+    isAssign = (!getLangOpts().ObjCAutoRefCount ||
+                !T->isObjCRetainableType());
+  }
+
+  // Issue a warning if property is 'assign' as default and its
+  // object, which is gc'able conforms to NSCopying protocol
   if (getLangOpts().getGC() != LangOptions::NonGC &&
-      isAssign && !(Attributes & ObjCDeclSpec::DQ_PR_assign))
+      isAssign && !(Attributes & ObjCDeclSpec::DQ_PR_assign)) {
     if (const ObjCObjectPointerType *ObjPtrTy =
           T->getAs<ObjCObjectPointerType>()) {
       ObjCInterfaceDecl *IDecl = ObjPtrTy->getObjectType()->getInterface();
@@ -548,6 +594,7 @@ ObjCPropertyDecl *Sema::CreatePropertyDecl(Scope *S,
           if (IDecl->ClassImplementsProtocol(PNSCopying, true))
             Diag(AtLoc, diag::warn_implements_nscopying) << PropertyId;
     }
+  }
 
   if (T->isObjCObjectType()) {
     SourceLocation StarLoc = TInfo->getTypeLoc().getLocEnd();
@@ -663,8 +710,10 @@ static void checkARCPropertyImpl(Sema &S, SourceLocation propertyImplLoc,
   // We're fine if they match.
   if (propertyLifetime == ivarLifetime) return;
 
-  // These aren't valid lifetimes for object ivars;  don't diagnose twice.
-  if (ivarLifetime == Qualifiers::OCL_None ||
+  // None isn't a valid lifetime for an object ivar in ARC, and
+  // __autoreleasing is never valid; don't diagnose twice.
+  if ((ivarLifetime == Qualifiers::OCL_None &&
+       S.getLangOpts().ObjCAutoRefCount) ||
       ivarLifetime == Qualifiers::OCL_Autoreleasing)
     return;
 
@@ -797,6 +846,38 @@ DiagnosePropertyMismatchDeclInProtocols(Sema &S, SourceLocation AtLoc,
     S.Diag(AtLoc, diag::note_property_synthesize);
 }
 
+/// Determine whether any storage attributes were written on the property.
+static bool hasWrittenStorageAttribute(ObjCPropertyDecl *Prop) {
+  if (Prop->getPropertyAttributesAsWritten() & OwnershipMask) return true;
+
+  // If this is a readwrite property in a class extension that refines
+  // a readonly property in the original class definition, check it as
+  // well.
+
+  // If it's a readonly property, we're not interested.
+  if (Prop->isReadOnly()) return false;
+
+  // Is it declared in an extension?
+  auto Category = dyn_cast<ObjCCategoryDecl>(Prop->getDeclContext());
+  if (!Category || !Category->IsClassExtension()) return false;
+
+  // Find the corresponding property in the primary class definition.
+  auto OrigClass = Category->getClassInterface();
+  for (auto Found : OrigClass->lookup(Prop->getDeclName())) {
+    if (ObjCPropertyDecl *OrigProp = dyn_cast<ObjCPropertyDecl>(Found))
+      return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
+  }
+
+  // Look through all of the protocols.
+  for (const auto *Proto : OrigClass->all_referenced_protocols()) {
+    if (ObjCPropertyDecl *OrigProp =
+          Proto->FindPropertyDeclaration(Prop->getIdentifier()))
+      return OrigProp->getPropertyAttributesAsWritten() & OwnershipMask;
+  }
+
+  return false;
+}
+
 /// ActOnPropertyImplDecl - This routine performs semantic checks and
 /// builds the AST node for a property implementation declaration; declared
 /// as \@synthesize or \@dynamic.
@@ -953,18 +1034,49 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
     ObjCPropertyDecl::PropertyAttributeKind kind 
       = property->getPropertyAttributes();
 
-    // Add GC __weak to the ivar type if the property is weak.
-    if ((kind & ObjCPropertyDecl::OBJC_PR_weak) && 
-        getLangOpts().getGC() != LangOptions::NonGC) {
-      assert(!getLangOpts().ObjCAutoRefCount);
-      if (PropertyIvarType.isObjCGCStrong()) {
-        Diag(PropertyDiagLoc, diag::err_gc_weak_property_strong_type);
-        Diag(property->getLocation(), diag::note_property_declare);
+    bool isARCWeak = false;
+    if (kind & ObjCPropertyDecl::OBJC_PR_weak) {
+      // Add GC __weak to the ivar type if the property is weak.
+      if (getLangOpts().getGC() != LangOptions::NonGC) {
+        assert(!getLangOpts().ObjCAutoRefCount);
+        if (PropertyIvarType.isObjCGCStrong()) {
+          Diag(PropertyDiagLoc, diag::err_gc_weak_property_strong_type);
+          Diag(property->getLocation(), diag::note_property_declare);
+        } else {
+          PropertyIvarType =
+            Context.getObjCGCQualType(PropertyIvarType, Qualifiers::Weak);
+        }
+
+      // Otherwise, check whether ARC __weak is enabled and works with
+      // the property type.
       } else {
-        PropertyIvarType =
-          Context.getObjCGCQualType(PropertyIvarType, Qualifiers::Weak);
+        if (!getLangOpts().ObjCWeak) {
+          // Only complain here when synthesizing an ivar.
+          if (!Ivar) {
+            Diag(PropertyDiagLoc,
+                 getLangOpts().ObjCWeakRuntime
+                   ? diag::err_synthesizing_arc_weak_property_disabled
+                   : diag::err_synthesizing_arc_weak_property_no_runtime);
+            Diag(property->getLocation(), diag::note_property_declare);
+          }
+          CompleteTypeErr = true; // suppress later diagnostics about the ivar
+        } else {
+          isARCWeak = true;
+          if (const ObjCObjectPointerType *ObjT =
+                PropertyIvarType->getAs<ObjCObjectPointerType>()) {
+            const ObjCInterfaceDecl *ObjI = ObjT->getInterfaceDecl();
+            if (ObjI && ObjI->isArcWeakrefUnavailable()) {
+              Diag(property->getLocation(),
+                   diag::err_arc_weak_unavailable_property)
+                << PropertyIvarType;
+              Diag(ClassImpDecl->getLocation(), diag::note_implemented_by_class)
+                << ClassImpDecl->getName();
+            }
+          }
+        }
       }
     }
+
     if (AtLoc.isInvalid()) {
       // Check when default synthesizing a property that there is 
       // an ivar matching property name and issue warning; since this
@@ -987,13 +1099,13 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
     if (!Ivar) {
       // In ARC, give the ivar a lifetime qualifier based on the
       // property attributes.
-      if (getLangOpts().ObjCAutoRefCount &&
+      if ((getLangOpts().ObjCAutoRefCount || isARCWeak) &&
           !PropertyIvarType.getObjCLifetime() &&
           PropertyIvarType->isObjCRetainableType()) {
 
         // It's an error if we have to do this and the user didn't
         // explicitly write an ownership attribute on the property.
-        if (!property->hasWrittenStorageAttribute() &&
+        if (!hasWrittenStorageAttribute(property) &&
             !(kind & ObjCPropertyDecl::OBJC_PR_strong)) {
           Diag(PropertyDiagLoc,
                diag::err_arc_objc_property_default_assign_on_object);
@@ -1002,24 +1114,6 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
           Qualifiers::ObjCLifetime lifetime =
             getImpliedARCOwnership(kind, PropertyIvarType);
           assert(lifetime && "no lifetime for property?");
-          if (lifetime == Qualifiers::OCL_Weak) {
-            bool err = false;
-            if (const ObjCObjectPointerType *ObjT =
-                PropertyIvarType->getAs<ObjCObjectPointerType>()) {
-              const ObjCInterfaceDecl *ObjI = ObjT->getInterfaceDecl();
-              if (ObjI && ObjI->isArcWeakrefUnavailable()) {
-                Diag(property->getLocation(),
-                     diag::err_arc_weak_unavailable_property) << PropertyIvarType;
-                Diag(ClassImpDecl->getLocation(), diag::note_implemented_by_class)
-                  << ClassImpDecl->getName();
-                err = true;
-              }
-            }
-            if (!err && !getLangOpts().ObjCARCWeak) {
-              Diag(PropertyDiagLoc, diag::err_arc_weak_no_runtime);
-              Diag(property->getLocation(), diag::note_property_declare);
-            }
-          }
           
           Qualifiers qs;
           qs.addObjCLifetime(lifetime);
@@ -1027,13 +1121,6 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
         }
       }
 
-      if (kind & ObjCPropertyDecl::OBJC_PR_weak &&
-          !getLangOpts().ObjCAutoRefCount &&
-          getLangOpts().getGC() == LangOptions::NonGC) {
-        Diag(PropertyDiagLoc, diag::error_synthesize_weak_non_arc_or_gc);
-        Diag(property->getLocation(), diag::note_property_declare);
-      }
-
       Ivar = ObjCIvarDecl::Create(Context, ClassImpDecl,
                                   PropertyIvarLoc,PropertyIvarLoc, PropertyIvar,
                                   PropertyIvarType, /*Dinfo=*/nullptr,
@@ -1121,7 +1208,8 @@ Decl *Sema::ActOnPropertyImplDecl(Scope *S,
         // Fall thru - see previous comment
       }
     }
-    if (getLangOpts().ObjCAutoRefCount)
+    if (getLangOpts().ObjCAutoRefCount || isARCWeak ||
+        Ivar->getType().getObjCLifetime())
       checkARCPropertyImpl(*this, PropertyLoc, property, Ivar);
   } else if (PropertyIvar)
     // @dynamic
@@ -1349,12 +1437,10 @@ Sema::DiagnosePropertyMismatch(ObjCPropertyDecl *Property,
     }
   }
 
-  if ((CAttr & ObjCPropertyDecl::OBJC_PR_nonatomic)
-      != (SAttr & ObjCPropertyDecl::OBJC_PR_nonatomic)) {
-    Diag(Property->getLocation(), diag::warn_property_attribute)
-      << Property->getDeclName() << "atomic" << inheritedName;
-    Diag(SuperProperty->getLocation(), diag::note_property_declare);
-  }
+  // Check for nonatomic; note that nonatomic is effectively
+  // meaningless for readonly properties, so don't diagnose if the
+  // atomic property is 'readonly'.
+  checkAtomicPropertyMismatch(*this, SuperProperty, Property, false);
   if (Property->getSetterName() != SuperProperty->getSetterName()) {
     Diag(Property->getLocation(), diag::warn_property_attribute)
       << Property->getDeclName() << "setter" << inheritedName;
@@ -1395,12 +1481,11 @@ bool Sema::DiagnosePropertyAccessorMismatch(ObjCPropertyDecl *property,
   QualType PropertyIvarType = property->getType().getNonReferenceType();
   bool compat = Context.hasSameType(PropertyIvarType, GetterType);
   if (!compat) {
-    if (isa<ObjCObjectPointerType>(PropertyIvarType) && 
-        isa<ObjCObjectPointerType>(GetterType))
-      compat =
-        Context.canAssignObjCInterfaces(
-                                      GetterType->getAs<ObjCObjectPointerType>(),
-                                      PropertyIvarType->getAs<ObjCObjectPointerType>());
+    const ObjCObjectPointerType *propertyObjCPtr = nullptr;
+    const ObjCObjectPointerType *getterObjCPtr = nullptr;
+    if ((propertyObjCPtr = PropertyIvarType->getAs<ObjCObjectPointerType>()) && 
+        (getterObjCPtr = GetterType->getAs<ObjCObjectPointerType>()))
+      compat = Context.canAssignObjCInterfaces(getterObjCPtr, propertyObjCPtr);
     else if (CheckAssignmentConstraints(Loc, GetterType, PropertyIvarType) 
               != Compatible) {
           Diag(Loc, diag::error_property_accessor_type)
@@ -1438,6 +1523,11 @@ static void CollectImmediateProperties(ObjCContainerDecl *CDecl,
   if (ObjCInterfaceDecl *IDecl = dyn_cast<ObjCInterfaceDecl>(CDecl)) {
     for (auto *Prop : IDecl->properties())
       PropMap[Prop->getIdentifier()] = Prop;
+
+    // Collect the properties from visible extensions.
+    for (auto *Ext : IDecl->visible_extensions())
+      CollectImmediateProperties(Ext, PropMap, SuperPropMap, IncludeProtocols);
+
     if (IncludeProtocols) {
       // Scan through class's protocols.
       for (auto *PI : IDecl->all_referenced_protocols())
@@ -1445,9 +1535,8 @@ static void CollectImmediateProperties(ObjCContainerDecl *CDecl,
     }
   }
   if (ObjCCategoryDecl *CATDecl = dyn_cast<ObjCCategoryDecl>(CDecl)) {
-    if (!CATDecl->IsClassExtension())
-      for (auto *Prop : CATDecl->properties())
-        PropMap[Prop->getIdentifier()] = Prop;
+    for (auto *Prop : CATDecl->properties())
+      PropMap[Prop->getIdentifier()] = Prop;
     if (IncludeProtocols) {
       // Scan through class's protocols.
       for (auto *PI : CATDecl->protocols())
@@ -1507,6 +1596,14 @@ Sema::IvarBacksCurrentMethodAccessor(ObjCInterfaceDecl *IFace,
         (Property->getPropertyIvarDecl() == IV))
       return true;
   }
+  // Also look up property declaration in class extension whose one of its
+  // accessors is implemented by this method.
+  for (const auto *Ext : IFace->known_extensions())
+    for (const auto *Property : Ext->properties())
+      if ((Property->getGetterName() == IMD->getSelector() ||
+           Property->getSetterName() == IMD->getSelector()) &&
+          (Property->getPropertyIvarDecl() == IV))
+        return true;
   return false;
 }
 
@@ -1563,7 +1660,7 @@ void Sema::DefaultSynthesizeProperties(Scope *S, ObjCImplDecl* IMPDecl,
         IMPDecl->FindPropertyImplIvarDecl(Prop->getIdentifier())) {
       Diag(Prop->getLocation(), diag::warn_no_autosynthesis_shared_ivar_property)
         << Prop->getIdentifier();
-      if (!PID->getLocation().isInvalid())
+      if (PID->getLocation().isValid())
         Diag(PID->getLocation(), diag::note_property_synthesize);
       continue;
     }
@@ -1791,11 +1888,20 @@ void Sema::diagnoseNullResettableSynthesizedSetters(const ObjCImplDecl *impDecl)
 
 void
 Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
-                                       ObjCContainerDecl* IDecl) {
+                                       ObjCInterfaceDecl* IDecl) {
   // Rules apply in non-GC mode only
   if (getLangOpts().getGC() != LangOptions::NonGC)
     return;
-  for (const auto *Property : IDecl->properties()) {
+  ObjCContainerDecl::PropertyMap PM;
+  for (auto *Prop : IDecl->properties())
+    PM[Prop->getIdentifier()] = Prop;
+  for (const auto *Ext : IDecl->known_extensions())
+    for (auto *Prop : Ext->properties())
+      PM[Prop->getIdentifier()] = Prop;
+    
+    for (ObjCContainerDecl::PropertyMap::iterator I = PM.begin(), E = PM.end();
+         I != E; ++I) {
+    const ObjCPropertyDecl *Property = I->second;
     ObjCMethodDecl *GetterMethod = nullptr;
     ObjCMethodDecl *SetterMethod = nullptr;
     bool LookedUpGetterSetter = false;
@@ -1842,30 +1948,23 @@ Sema::AtomicPropertySetterGetterRules (ObjCImplDecl* IMPDecl,
           << Property->getIdentifier() << (GetterMethod != nullptr)
           << (SetterMethod != nullptr);
         // fixit stuff.
-        if (!AttributesAsWritten) {
-          if (Property->getLParenLoc().isValid()) {
-            // @property () ... case.
-            SourceRange PropSourceRange(Property->getAtLoc(), 
-                                        Property->getLParenLoc());
-            Diag(Property->getLocation(), diag::note_atomic_property_fixup_suggest) <<
-              FixItHint::CreateReplacement(PropSourceRange, "@property (nonatomic");
-          }
-          else {
-            //@property id etc.
-            SourceLocation endLoc = 
-              Property->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
-            endLoc = endLoc.getLocWithOffset(-1);
-            SourceRange PropSourceRange(Property->getAtLoc(), endLoc);
-            Diag(Property->getLocation(), diag::note_atomic_property_fixup_suggest) <<
-              FixItHint::CreateReplacement(PropSourceRange, "@property (nonatomic) ");
-          }
-        }
-        else if (!(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic)) {
+        if (Property->getLParenLoc().isValid() &&
+            !(AttributesAsWritten & ObjCPropertyDecl::OBJC_PR_atomic)) {
           // @property () ... case.
-          SourceLocation endLoc = Property->getLParenLoc();
-          SourceRange PropSourceRange(Property->getAtLoc(), endLoc);
-          Diag(Property->getLocation(), diag::note_atomic_property_fixup_suggest) <<
-           FixItHint::CreateReplacement(PropSourceRange, "@property (nonatomic, ");
+          SourceLocation AfterLParen =
+            getLocForEndOfToken(Property->getLParenLoc());
+          StringRef NonatomicStr = AttributesAsWritten? "nonatomic, "
+                                                      : "nonatomic";
+          Diag(Property->getLocation(),
+               diag::note_atomic_property_fixup_suggest)
+            << FixItHint::CreateInsertion(AfterLParen, NonatomicStr);
+        } else if (Property->getLParenLoc().isInvalid()) {
+          //@property id etc.
+          SourceLocation startLoc = 
+            Property->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
+          Diag(Property->getLocation(),
+               diag::note_atomic_property_fixup_suggest)
+            << FixItHint::CreateInsertion(startLoc, "(nonatomic) ");
         }
         else
           Diag(MethodLoc, diag::note_atomic_property_fixup_suggest);
@@ -1950,10 +2049,16 @@ void Sema::DiagnoseMissingDesignatedInitOverrides(
          I = DesignatedInits.begin(), E = DesignatedInits.end(); I != E; ++I) {
     const ObjCMethodDecl *MD = *I;
     if (!InitSelSet.count(MD->getSelector())) {
-      Diag(ImplD->getLocation(),
-           diag::warn_objc_implementation_missing_designated_init_override)
-        << MD->getSelector();
-      Diag(MD->getLocation(), diag::note_objc_designated_init_marked_here);
+      bool Ignore = false;
+      if (auto *IMD = IFD->getInstanceMethod(MD->getSelector())) {
+        Ignore = IMD->isUnavailable();
+      }
+      if (!Ignore) {
+        Diag(ImplD->getLocation(),
+             diag::warn_objc_implementation_missing_designated_init_override)
+          << MD->getSelector();
+        Diag(MD->getLocation(), diag::note_objc_designated_init_marked_here);
+      }
     }
   }
 }
@@ -1974,20 +2079,28 @@ static void AddPropertyAttrs(Sema &S, ObjCMethodDecl *PropertyMethod,
 /// ProcessPropertyDecl - Make sure that any user-defined setter/getter methods
 /// have the property type and issue diagnostics if they don't.
 /// Also synthesize a getter/setter method if none exist (and update the
-/// appropriate lookup tables. FIXME: Should reconsider if adding synthesized
-/// methods is the "right" thing to do.
-void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
-                               ObjCContainerDecl *CD,
-                               ObjCPropertyDecl *redeclaredProperty,
-                               ObjCContainerDecl *lexicalDC) {
-
+/// appropriate lookup tables.
+void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property) {
   ObjCMethodDecl *GetterMethod, *SetterMethod;
-
+  ObjCContainerDecl *CD = cast<ObjCContainerDecl>(property->getDeclContext());
   if (CD->isInvalidDecl())
     return;
 
   GetterMethod = CD->getInstanceMethod(property->getGetterName());
+  // if setter or getter is not found in class extension, it might be
+  // in the primary class.
+  if (!GetterMethod)
+    if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
+      if (CatDecl->IsClassExtension())
+        GetterMethod = CatDecl->getClassInterface()->
+                         getInstanceMethod(property->getGetterName());
+        
   SetterMethod = CD->getInstanceMethod(property->getSetterName());
+  if (!SetterMethod)
+    if (const ObjCCategoryDecl *CatDecl = dyn_cast<ObjCCategoryDecl>(CD))
+      if (CatDecl->IsClassExtension())
+        SetterMethod = CatDecl->getClassInterface()->
+                          getInstanceMethod(property->getSetterName());
   DiagnosePropertyAccessorMismatch(property, GetterMethod,
                                    property->getLocation());
 
@@ -2020,9 +2133,7 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
     // No instance method of same name as property getter name was found.
     // Declare a getter method and add it to the list of methods
     // for this class.
-    SourceLocation Loc = redeclaredProperty ? 
-      redeclaredProperty->getLocation() :
-      property->getLocation();
+    SourceLocation Loc = property->getLocation();
 
     // If the property is null_resettable, the getter returns nonnull.
     QualType resultTy = property->getType();
@@ -2050,10 +2161,6 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
 
     AddPropertyAttrs(*this, GetterMethod, property);
 
-    // FIXME: Eventually this shouldn't be needed, as the lexical context
-    // and the real context should be the same.
-    if (lexicalDC)
-      GetterMethod->setLexicalDeclContext(lexicalDC);
     if (property->hasAttr<NSReturnsNotRetainedAttr>())
       GetterMethod->addAttr(NSReturnsNotRetainedAttr::CreateImplicit(Context,
                                                                      Loc));
@@ -2082,9 +2189,7 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
       // No instance method of same name as property setter name was found.
       // Declare a setter method and add it to the list of methods
       // for this class.
-      SourceLocation Loc = redeclaredProperty ? 
-        redeclaredProperty->getLocation() :
-        property->getLocation();
+      SourceLocation Loc = property->getLocation();
 
       SetterMethod =
         ObjCMethodDecl::Create(Context, Loc, Loc,
@@ -2126,10 +2231,6 @@ void Sema::ProcessPropertyDecl(ObjCPropertyDecl *property,
       AddPropertyAttrs(*this, SetterMethod, property);
 
       CD->addDecl(SetterMethod);
-      // FIXME: Eventually this shouldn't be needed, as the lexical context
-      // and the real context should be the same.
-      if (lexicalDC)
-        SetterMethod->setLexicalDeclContext(lexicalDC);
       if (const SectionAttr *SA = property->getAttr<SectionAttr>())
         SetterMethod->addAttr(
             SectionAttr::CreateImplicit(Context, SectionAttr::GNU_section,
@@ -2189,15 +2290,6 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
   
   ObjCPropertyDecl *PropertyDecl = cast<ObjCPropertyDecl>(PDecl);
   QualType PropertyTy = PropertyDecl->getType();
-  unsigned PropertyOwnership = getOwnershipRule(Attributes);
-
-  // 'readonly' property with no obvious lifetime.
-  // its life time will be determined by its backing ivar.
-  if (getLangOpts().ObjCAutoRefCount &&
-      Attributes & ObjCDeclSpec::DQ_PR_readonly &&
-      PropertyTy->isObjCRetainableType() &&
-      !PropertyOwnership)
-    return;
 
   // Check for copy or retain on non-object types.
   if ((Attributes & (ObjCDeclSpec::DQ_PR_weak | ObjCDeclSpec::DQ_PR_copy |
@@ -2295,13 +2387,6 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
       if (*nullability == NullabilityKind::NonNull)
         Diag(Loc, diag::err_objc_property_attr_mutually_exclusive)
           << "nonnull" << "weak";
-    } else {
-        PropertyTy =
-          Context.getAttributedType(
-            AttributedType::getNullabilityAttrKind(NullabilityKind::Nullable),
-            PropertyTy, PropertyTy);
-        TypeSourceInfo *TSInfo = PropertyDecl->getTypeSourceInfo();
-        PropertyDecl->setType(PropertyTy, TSInfo);
     }
   }
 
@@ -2314,16 +2399,14 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
 
   // Warn if user supplied no assignment attribute, property is
   // readwrite, and this is an object type.
-  if (!(Attributes & (ObjCDeclSpec::DQ_PR_assign | ObjCDeclSpec::DQ_PR_copy |
-                      ObjCDeclSpec::DQ_PR_unsafe_unretained |
-                      ObjCDeclSpec::DQ_PR_retain | ObjCDeclSpec::DQ_PR_strong |
-                      ObjCDeclSpec::DQ_PR_weak)) &&
-      PropertyTy->isObjCObjectPointerType()) {
-      if (getLangOpts().ObjCAutoRefCount)
-        // With arc,  @property definitions should default to (strong) when 
-        // not specified; including when property is 'readonly'.
-        PropertyDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
-      else if (!(Attributes & ObjCDeclSpec::DQ_PR_readonly)) {
+  if (!getOwnershipRule(Attributes) && PropertyTy->isObjCRetainableType()) {
+    if (Attributes & ObjCDeclSpec::DQ_PR_readonly) {
+      // do nothing
+    } else if (getLangOpts().ObjCAutoRefCount) {
+      // With arc, @property definitions should default to strong when 
+      // not specified.
+      PropertyDecl->setPropertyAttributes(ObjCPropertyDecl::OBJC_PR_strong);
+    } else if (PropertyTy->isObjCObjectPointerType()) {
         bool isAnyClassTy = 
           (PropertyTy->isObjCClassType() || 
            PropertyTy->isObjCQualifiedClassType());
@@ -2342,7 +2425,7 @@ void Sema::CheckObjCPropertyAttributes(Decl *PDecl,
           if (getLangOpts().getGC() == LangOptions::NonGC)
             Diag(Loc, diag::warn_objc_property_default_assign_on_object);
         }
-      }
+    }
 
     // FIXME: Implement warning dependent on NSCopying being
     // implemented. See also:
diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp
index ca67a1c0bf24..5dd835498a9a 100644
--- a/lib/Sema/SemaOpenMP.cpp
+++ b/lib/Sema/SemaOpenMP.cpp
@@ -77,6 +77,11 @@ public:
           ImplicitDSALoc() {}
   };
 
+public:
+  struct MapInfo {
+    Expr *RefExpr;
+  };
+
 private:
   struct DSAInfo {
     OpenMPClauseKind Attributes;
@@ -84,33 +89,41 @@ private:
   };
   typedef llvm::SmallDenseMap<VarDecl *, DSAInfo, 64> DeclSAMapTy;
   typedef llvm::SmallDenseMap<VarDecl *, DeclRefExpr *, 64> AlignedMapTy;
-  typedef llvm::DenseSet<VarDecl *> LoopControlVariablesSetTy;
+  typedef llvm::DenseMap<VarDecl *, unsigned> LoopControlVariablesMapTy;
+  typedef llvm::SmallDenseMap<VarDecl *, MapInfo, 64> MappedDeclsTy;
+  typedef llvm::StringMap<std::pair<OMPCriticalDirective *, llvm::APSInt>>
+      CriticalsWithHintsTy;
 
   struct SharingMapTy {
     DeclSAMapTy SharingMap;
     AlignedMapTy AlignedMap;
-    LoopControlVariablesSetTy LCVSet;
+    MappedDeclsTy MappedDecls;
+    LoopControlVariablesMapTy LCVMap;
     DefaultDataSharingAttributes DefaultAttr;
     SourceLocation DefaultAttrLoc;
     OpenMPDirectiveKind Directive;
     DeclarationNameInfo DirectiveName;
     Scope *CurScope;
     SourceLocation ConstructLoc;
-    bool OrderedRegion;
+    /// \brief first argument (Expr *) contains optional argument of the
+    /// 'ordered' clause, the second one is true if the regions has 'ordered'
+    /// clause, false otherwise.
+    llvm::PointerIntPair<Expr *, 1, bool> OrderedRegion;
     bool NowaitRegion;
-    unsigned CollapseNumber;
+    bool CancelRegion;
+    unsigned AssociatedLoops;
     SourceLocation InnerTeamsRegionLoc;
     SharingMapTy(OpenMPDirectiveKind DKind, DeclarationNameInfo Name,
                  Scope *CurScope, SourceLocation Loc)
-        : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
+        : SharingMap(), AlignedMap(), LCVMap(), DefaultAttr(DSA_unspecified),
           Directive(DKind), DirectiveName(std::move(Name)), CurScope(CurScope),
-          ConstructLoc(Loc), OrderedRegion(false), NowaitRegion(false),
-          CollapseNumber(1), InnerTeamsRegionLoc() {}
+          ConstructLoc(Loc), OrderedRegion(), NowaitRegion(false),
+          CancelRegion(false), AssociatedLoops(1), InnerTeamsRegionLoc() {}
     SharingMapTy()
-        : SharingMap(), AlignedMap(), LCVSet(), DefaultAttr(DSA_unspecified),
+        : SharingMap(), AlignedMap(), LCVMap(), DefaultAttr(DSA_unspecified),
           Directive(OMPD_unknown), DirectiveName(), CurScope(nullptr),
-          ConstructLoc(), OrderedRegion(false), NowaitRegion(false),
-          CollapseNumber(1), InnerTeamsRegionLoc() {}
+          ConstructLoc(), OrderedRegion(), NowaitRegion(false),
+          CancelRegion(false), AssociatedLoops(1), InnerTeamsRegionLoc() {}
   };
 
   typedef SmallVector<SharingMapTy, 64> StackTy;
@@ -122,6 +135,7 @@ private:
   OpenMPClauseKind ClauseKindMode;
   Sema &SemaRef;
   bool ForceCapturing;
+  CriticalsWithHintsTy Criticals;
 
   typedef SmallVector<SharingMapTy, 8>::reverse_iterator reverse_iterator;
 
@@ -152,6 +166,16 @@ public:
     Stack.pop_back();
   }
 
+  void addCriticalWithHint(OMPCriticalDirective *D, llvm::APSInt Hint) {
+    Criticals[D->getDirectiveName().getAsString()] = std::make_pair(D, Hint);
+  }
+  const std::pair<OMPCriticalDirective *, llvm::APSInt>
+  getCriticalWithHint(const DeclarationNameInfo &Name) const {
+    auto I = Criticals.find(Name.getAsString());
+    if (I != Criticals.end())
+      return I->second;
+    return std::make_pair(nullptr, llvm::APSInt());
+  }
   /// \brief If 'aligned' declaration for given variable \a D was not seen yet,
   /// add it and return NULL; otherwise return previous occurrence's expression
   /// for diagnostics.
@@ -161,7 +185,17 @@ public:
   void addLoopControlVariable(VarDecl *D);
   /// \brief Check if the specified variable is a loop control variable for
   /// current region.
-  bool isLoopControlVariable(VarDecl *D);
+  /// \return The index of the loop control variable in the list of associated
+  /// for-loops (from outer to inner).
+  unsigned isLoopControlVariable(VarDecl *D);
+  /// \brief Check if the specified variable is a loop control variable for
+  /// parent region.
+  /// \return The index of the loop control variable in the list of associated
+  /// for-loops (from outer to inner).
+  unsigned isParentLoopControlVariable(VarDecl *D);
+  /// \brief Get the loop control variable for the I-th loop (or nullptr) in
+  /// parent directive.
+  VarDecl *getParentLoopControlVariable(unsigned I);
 
   /// \brief Adds explicit data sharing attribute to the specified declaration.
   void addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A);
@@ -190,6 +224,13 @@ public:
   bool hasExplicitDSA(VarDecl *D,
                       const llvm::function_ref<bool(OpenMPClauseKind)> &CPred,
                       unsigned Level);
+
+  /// \brief Returns true if the directive at level \Level matches in the
+  /// specified \a DPred predicate.
+  bool hasExplicitDirective(
+      const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+      unsigned Level);
+
   /// \brief Finds a directive which matches specified \a DPred predicate.
   template <class NamedDirectivesPredicate>
   bool hasDirective(NamedDirectivesPredicate DPred, bool FromParent);
@@ -204,6 +245,8 @@ public:
       return Stack[Stack.size() - 2].Directive;
     return OMPD_unknown;
   }
+  /// \brief Return the directive associated with the provided scope.
+  OpenMPDirectiveKind getDirectiveForScope(const Scope *S) const;
 
   /// \brief Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
@@ -230,16 +273,23 @@ public:
   }
 
   /// \brief Marks current region as ordered (it has an 'ordered' clause).
-  void setOrderedRegion(bool IsOrdered = true) {
-    Stack.back().OrderedRegion = IsOrdered;
+  void setOrderedRegion(bool IsOrdered, Expr *Param) {
+    Stack.back().OrderedRegion.setInt(IsOrdered);
+    Stack.back().OrderedRegion.setPointer(Param);
   }
   /// \brief Returns true, if parent region is ordered (has associated
   /// 'ordered' clause), false - otherwise.
   bool isParentOrderedRegion() const {
     if (Stack.size() > 2)
-      return Stack[Stack.size() - 2].OrderedRegion;
+      return Stack[Stack.size() - 2].OrderedRegion.getInt();
     return false;
   }
+  /// \brief Returns optional parameter for the ordered region.
+  Expr *getParentOrderedRegionParam() const {
+    if (Stack.size() > 2)
+      return Stack[Stack.size() - 2].OrderedRegion.getPointer();
+    return nullptr;
+  }
   /// \brief Marks current region as nowait (it has a 'nowait' clause).
   void setNowaitRegion(bool IsNowait = true) {
     Stack.back().NowaitRegion = IsNowait;
@@ -251,13 +301,21 @@ public:
       return Stack[Stack.size() - 2].NowaitRegion;
     return false;
   }
+  /// \brief Marks parent region as cancel region.
+  void setParentCancelRegion(bool Cancel = true) {
+    if (Stack.size() > 2)
+      Stack[Stack.size() - 2].CancelRegion =
+          Stack[Stack.size() - 2].CancelRegion || Cancel;
+  }
+  /// \brief Return true if current region has inner cancel construct.
+  bool isCancelRegion() const {
+    return Stack.back().CancelRegion;
+  }
 
   /// \brief Set collapse value for the region.
-  void setCollapseNumber(unsigned Val) { Stack.back().CollapseNumber = Val; }
+  void setAssociatedLoops(unsigned Val) { Stack.back().AssociatedLoops = Val; }
   /// \brief Return collapse value for region.
-  unsigned getCollapseNumber() const {
-    return Stack.back().CollapseNumber;
-  }
+  unsigned getAssociatedLoops() const { return Stack.back().AssociatedLoops; }
 
   /// \brief Marks current target region as one with closely nested teams
   /// region.
@@ -279,10 +337,37 @@ public:
   Scope *getCurScope() const { return Stack.back().CurScope; }
   Scope *getCurScope() { return Stack.back().CurScope; }
   SourceLocation getConstructLoc() { return Stack.back().ConstructLoc; }
+
+  MapInfo getMapInfoForVar(VarDecl *VD) {
+    MapInfo VarMI = {0};
+    for (auto Cnt = Stack.size() - 1; Cnt > 0; --Cnt) {
+      if (Stack[Cnt].MappedDecls.count(VD)) {
+        VarMI = Stack[Cnt].MappedDecls[VD];
+        break;
+      }
+    }
+    return VarMI;
+  }
+
+  void addMapInfoForVar(VarDecl *VD, MapInfo MI) {
+    if (Stack.size() > 1) {
+      Stack.back().MappedDecls[VD] = MI;
+    }
+  }
+
+  MapInfo IsMappedInCurrentRegion(VarDecl *VD) {
+    assert(Stack.size() > 1 && "Target level is 0");
+    MapInfo VarMI = {0};
+    if (Stack.size() > 1 && Stack.back().MappedDecls.count(VD)) {
+      VarMI = Stack.back().MappedDecls[VD];
+    }
+    return VarMI;
+  }
 };
 bool isParallelOrTaskRegion(OpenMPDirectiveKind DKind) {
   return isOpenMPParallelDirective(DKind) || DKind == OMPD_task ||
-         isOpenMPTeamsDirective(DKind) || DKind == OMPD_unknown;
+         isOpenMPTeamsDirective(DKind) || DKind == OMPD_unknown ||
+         isOpenMPTaskLoopDirective(DKind);
 }
 } // namespace
 
@@ -409,13 +494,32 @@ DeclRefExpr *DSAStackTy::addUniqueAligned(VarDecl *D, DeclRefExpr *NewDE) {
 void DSAStackTy::addLoopControlVariable(VarDecl *D) {
   assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
   D = D->getCanonicalDecl();
-  Stack.back().LCVSet.insert(D);
+  Stack.back().LCVMap.insert(std::make_pair(D, Stack.back().LCVMap.size() + 1));
 }
 
-bool DSAStackTy::isLoopControlVariable(VarDecl *D) {
+unsigned DSAStackTy::isLoopControlVariable(VarDecl *D) {
   assert(Stack.size() > 1 && "Data-sharing attributes stack is empty");
   D = D->getCanonicalDecl();
-  return Stack.back().LCVSet.count(D) > 0;
+  return Stack.back().LCVMap.count(D) > 0 ? Stack.back().LCVMap[D] : 0;
+}
+
+unsigned DSAStackTy::isParentLoopControlVariable(VarDecl *D) {
+  assert(Stack.size() > 2 && "Data-sharing attributes stack is empty");
+  D = D->getCanonicalDecl();
+  return Stack[Stack.size() - 2].LCVMap.count(D) > 0
+             ? Stack[Stack.size() - 2].LCVMap[D]
+             : 0;
+}
+
+VarDecl *DSAStackTy::getParentLoopControlVariable(unsigned I) {
+  assert(Stack.size() > 2 && "Data-sharing attributes stack is empty");
+  if (Stack[Stack.size() - 2].LCVMap.size() < I)
+    return nullptr;
+  for (auto &Pair : Stack[Stack.size() - 2].LCVMap) {
+    if (Pair.second == I)
+      return Pair.first;
+  }
+  return nullptr;
 }
 
 void DSAStackTy::addDSA(VarDecl *D, DeclRefExpr *E, OpenMPClauseKind A) {
@@ -452,12 +556,17 @@ bool DSAStackTy::isOpenMPLocal(VarDecl *D, StackTy::reverse_iterator Iter) {
 
 /// \brief Build a variable declaration for OpenMP loop iteration variable.
 static VarDecl *buildVarDecl(Sema &SemaRef, SourceLocation Loc, QualType Type,
-                             StringRef Name) {
+                             StringRef Name, const AttrVec *Attrs = nullptr) {
   DeclContext *DC = SemaRef.CurContext;
   IdentifierInfo *II = &SemaRef.PP.getIdentifierTable().get(Name);
   TypeSourceInfo *TInfo = SemaRef.Context.getTrivialTypeSourceInfo(Type, Loc);
   VarDecl *Decl =
       VarDecl::Create(SemaRef.Context, DC, Loc, Loc, II, Type, TInfo, SC_None);
+  if (Attrs) {
+    for (specific_attr_iterator<AlignedAttr> I(Attrs->begin()), E(Attrs->end());
+         I != E; ++I)
+      Decl->addAttr(*I);
+  }
   Decl->setImplicit();
   return Decl;
 }
@@ -496,41 +605,20 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   }
 
   // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
-  // in a Construct, C/C++, predetermined, p.1]
-  // Variables with automatic storage duration that are declared in a scope
-  // inside the construct are private.
-  OpenMPDirectiveKind Kind =
-      FromParent ? getParentDirective() : getCurrentDirective();
-  auto StartI = std::next(Stack.rbegin());
-  auto EndI = std::prev(Stack.rend());
-  if (FromParent && StartI != EndI) {
-    StartI = std::next(StartI);
-  }
-  if (!isParallelOrTaskRegion(Kind)) {
-    if (isOpenMPLocal(D, StartI) &&
-        ((D->isLocalVarDecl() && (D->getStorageClass() == SC_Auto ||
-                                  D->getStorageClass() == SC_None)) ||
-         isa<ParmVarDecl>(D))) {
-      DVar.CKind = OMPC_private;
+  // in a Construct, C/C++, predetermined, p.4]
+  //  Static data members are shared.
+  // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
+  // in a Construct, C/C++, predetermined, p.7]
+  //  Variables with static storage duration that are declared in a scope
+  //  inside the construct are shared.
+  if (D->isStaticDataMember()) {
+    DSAVarData DVarTemp =
+        hasDSA(D, isOpenMPPrivate, MatchesAlways(), FromParent);
+    if (DVarTemp.CKind != OMPC_unknown && DVarTemp.RefExpr)
       return DVar;
-    }
 
-    // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
-    // in a Construct, C/C++, predetermined, p.4]
-    //  Static data members are shared.
-    // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
-    // in a Construct, C/C++, predetermined, p.7]
-    //  Variables with static storage duration that are declared in a scope
-    //  inside the construct are shared.
-    if (D->isStaticDataMember() || D->isStaticLocal()) {
-      DSAVarData DVarTemp =
-          hasDSA(D, isOpenMPPrivate, MatchesAlways(), FromParent);
-      if (DVarTemp.CKind != OMPC_unknown && DVarTemp.RefExpr)
-        return DVar;
-
-      DVar.CKind = OMPC_shared;
-      return DVar;
-    }
+    DVar.CKind = OMPC_shared;
+    return DVar;
   }
 
   QualType Type = D->getType().getNonReferenceType().getCanonicalType();
@@ -542,6 +630,9 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
   //  shared.
   CXXRecordDecl *RD =
       SemaRef.getLangOpts().CPlusPlus ? Type->getAsCXXRecordDecl() : nullptr;
+  if (auto *CTSD = dyn_cast_or_null<ClassTemplateSpecializationDecl>(RD))
+    if (auto *CTD = CTSD->getSpecializedTemplate())
+      RD = CTD->getTemplatedDecl();
   if (IsConstant &&
       !(SemaRef.getLangOpts().CPlusPlus && RD && RD->hasMutableFields())) {
     // Variables with const-qualified type having no mutable member may be
@@ -557,6 +648,11 @@ DSAStackTy::DSAVarData DSAStackTy::getTopDSA(VarDecl *D, bool FromParent) {
 
   // Explicitly specified attributes and local variables with predetermined
   // attributes.
+  auto StartI = std::next(Stack.rbegin());
+  auto EndI = std::prev(Stack.rend());
+  if (FromParent && StartI != EndI) {
+    StartI = std::next(StartI);
+  }
   auto I = std::prev(StartI);
   if (I->SharingMap.count(D)) {
     DVar.RefExpr = I->SharingMap[D].RefExpr;
@@ -635,6 +731,19 @@ bool DSAStackTy::hasExplicitDSA(
          CPred(StartI->SharingMap[D].Attributes);
 }
 
+bool DSAStackTy::hasExplicitDirective(
+    const llvm::function_ref<bool(OpenMPDirectiveKind)> &DPred,
+    unsigned Level) {
+  if (isClauseParsingMode())
+    ++Level;
+  auto StartI = Stack.rbegin();
+  auto EndI = std::prev(Stack.rend());
+  if (std::distance(StartI, EndI) <= (int)Level)
+    return false;
+  std::advance(StartI, Level);
+  return DPred(StartI->Directive);
+}
+
 template <class NamedDirectivesPredicate>
 bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
   auto StartI = std::next(Stack.rbegin());
@@ -649,15 +758,134 @@ bool DSAStackTy::hasDirective(NamedDirectivesPredicate DPred, bool FromParent) {
   return false;
 }
 
+OpenMPDirectiveKind DSAStackTy::getDirectiveForScope(const Scope *S) const {
+  for (auto I = Stack.rbegin(), EE = Stack.rend(); I != EE; ++I)
+    if (I->CurScope == S)
+      return I->Directive;
+  return OMPD_unknown;
+}
+
 void Sema::InitDataSharingAttributesStack() {
   VarDataSharingAttributesStack = new DSAStackTy(*this);
 }
 
 #define DSAStack static_cast<DSAStackTy *>(VarDataSharingAttributesStack)
 
+bool Sema::IsOpenMPCapturedByRef(VarDecl *VD,
+                                 const CapturedRegionScopeInfo *RSI) {
+  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+
+  auto &Ctx = getASTContext();
+  bool IsByRef = true;
+
+  // Find the directive that is associated with the provided scope.
+  auto DKind = DSAStack->getDirectiveForScope(RSI->TheScope);
+  auto Ty = VD->getType();
+
+  if (isOpenMPTargetDirective(DKind)) {
+    // This table summarizes how a given variable should be passed to the device
+    // given its type and the clauses where it appears. This table is based on
+    // the description in OpenMP 4.5 [2.10.4, target Construct] and
+    // OpenMP 4.5 [2.15.5, Data-mapping Attribute Rules and Clauses].
+    //
+    // =========================================================================
+    // | type |  defaultmap   | pvt | first | is_device_ptr |    map   | res.  |
+    // |      |(tofrom:scalar)|     |  pvt  |               |          |       |
+    // =========================================================================
+    // | scl  |               |     |       |       -       |          | bycopy|
+    // | scl  |               |  -  |   x   |       -       |     -    | bycopy|
+    // | scl  |               |  x  |   -   |       -       |     -    | null  |
+    // | scl  |       x       |     |       |       -       |          | byref |
+    // | scl  |       x       |  -  |   x   |       -       |     -    | bycopy|
+    // | scl  |       x       |  x  |   -   |       -       |     -    | null  |
+    // | scl  |               |  -  |   -   |       -       |     x    | byref |
+    // | scl  |       x       |  -  |   -   |       -       |     x    | byref |
+    //
+    // | agg  |      n.a.     |     |       |       -       |          | byref |
+    // | agg  |      n.a.     |  -  |   x   |       -       |     -    | byref |
+    // | agg  |      n.a.     |  x  |   -   |       -       |     -    | null  |
+    // | agg  |      n.a.     |  -  |   -   |       -       |     x    | byref |
+    // | agg  |      n.a.     |  -  |   -   |       -       |    x[]   | byref |
+    //
+    // | ptr  |      n.a.     |     |       |       -       |          | bycopy|
+    // | ptr  |      n.a.     |  -  |   x   |       -       |     -    | bycopy|
+    // | ptr  |      n.a.     |  x  |   -   |       -       |     -    | null  |
+    // | ptr  |      n.a.     |  -  |   -   |       -       |     x    | byref |
+    // | ptr  |      n.a.     |  -  |   -   |       -       |    x[]   | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |          | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |     x    | bycopy|
+    // | ptr  |      n.a.     |  -  |   -   |       x       |    x[]   | bycopy|
+    // =========================================================================
+    // Legend:
+    //  scl - scalar
+    //  ptr - pointer
+    //  agg - aggregate
+    //  x - applies
+    //  - - invalid in this combination
+    //  [] - mapped with an array section
+    //  byref - should be mapped by reference
+    //  byval - should be mapped by value
+    //  null - initialize a local variable to null on the device
+    //
+    // Observations:
+    //  - All scalar declarations that show up in a map clause have to be passed
+    //    by reference, because they may have been mapped in the enclosing data
+    //    environment.
+    //  - If the scalar value does not fit the size of uintptr, it has to be
+    //    passed by reference, regardless the result in the table above.
+    //  - For pointers mapped by value that have either an implicit map or an
+    //    array section, the runtime library may pass the NULL value to the
+    //    device instead of the value passed to it by the compiler.
+
+    // FIXME: Right now, only implicit maps are implemented. Properly mapping
+    // values requires having the map, private, and firstprivate clauses SEMA
+    // and parsing in place, which we don't yet.
+
+    if (Ty->isReferenceType())
+      Ty = Ty->castAs<ReferenceType>()->getPointeeType();
+    IsByRef = !Ty->isScalarType();
+  }
+
+  // When passing data by value, we need to make sure it fits the uintptr size
+  // and alignment, because the runtime library only deals with uintptr types.
+  // If it does not fit the uintptr size, we need to pass the data by reference
+  // instead.
+  if (!IsByRef &&
+      (Ctx.getTypeSizeInChars(Ty) >
+           Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) ||
+       Ctx.getDeclAlign(VD) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType())))
+    IsByRef = true;
+
+  return IsByRef;
+}
+
 bool Sema::IsOpenMPCapturedVar(VarDecl *VD) {
   assert(LangOpts.OpenMP && "OpenMP is not allowed");
   VD = VD->getCanonicalDecl();
+
+  // If we are attempting to capture a global variable in a directive with
+  // 'target' we return true so that this global is also mapped to the device.
+  //
+  // FIXME: If the declaration is enclosed in a 'declare target' directive,
+  // then it should not be captured. Therefore, an extra check has to be
+  // inserted here once support for 'declare target' is added.
+  //
+  if (!VD->hasLocalStorage()) {
+    if (DSAStack->getCurrentDirective() == OMPD_target &&
+        !DSAStack->isClauseParsingMode()) {
+      return true;
+    }
+    if (DSAStack->getCurScope() &&
+        DSAStack->hasDirective(
+            [](OpenMPDirectiveKind K, const DeclarationNameInfo &DNI,
+               SourceLocation Loc) -> bool {
+              return isOpenMPTargetDirective(K);
+            },
+            false)) {
+      return true;
+    }
+  }
+
   if (DSAStack->getCurrentDirective() != OMPD_unknown &&
       (!DSAStack->isClauseParsingMode() ||
        DSAStack->getParentDirective() != OMPD_unknown)) {
@@ -682,6 +910,14 @@ bool Sema::isOpenMPPrivateVar(VarDecl *VD, unsigned Level) {
       VD, [](OpenMPClauseKind K) -> bool { return K == OMPC_private; }, Level);
 }
 
+bool Sema::isOpenMPTargetCapturedVar(VarDecl *VD, unsigned Level) {
+  assert(LangOpts.OpenMP && "OpenMP is not allowed");
+  // Return true if the current level is no longer enclosed in a target region.
+
+  return !VD->hasLocalStorage() &&
+         DSAStack->hasExplicitDirective(isOpenMPTargetDirective, Level);
+}
+
 void Sema::DestroyDataSharingAttributesStack() { delete DSAStack; }
 
 void Sema::StartOpenMPDSABlock(OpenMPDirectiveKind DKind,
@@ -715,7 +951,7 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
             continue;
           }
           auto *VD = cast<VarDecl>(cast<DeclRefExpr>(DE)->getDecl());
-          QualType Type = VD->getType();
+          QualType Type = VD->getType().getNonReferenceType();
           auto DVar = DSAStack->getTopDSA(VD, false);
           if (DVar.CKind == OMPC_lastprivate) {
             // Generate helper private variable and initialize it with the
@@ -723,9 +959,9 @@ void Sema::EndOpenMPDSABlock(Stmt *CurDirective) {
             // by the address of the new private variable in CodeGen. This new
             // variable is not added to IdResolver, so the code in the OpenMP
             // region uses original variable for proper diagnostics.
-            auto *VDPrivate =
-                buildVarDecl(*this, DE->getExprLoc(), Type.getUnqualifiedType(),
-                             VD->getName());
+            auto *VDPrivate = buildVarDecl(
+                *this, DE->getExprLoc(), Type.getUnqualifiedType(),
+                VD->getName(), VD->hasAttrs() ? &VD->getAttrs() : nullptr);
             ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false);
             if (VDPrivate->isInvalidDecl())
               continue;
@@ -1158,7 +1394,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   switch (DKind) {
   case OMPD_parallel: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -1234,7 +1471,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   }
   case OMPD_parallel_for: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -1246,7 +1484,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   }
   case OMPD_parallel_for_simd: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -1258,7 +1497,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   }
   case OMPD_parallel_sections: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -1309,6 +1549,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
                              Params);
     break;
   }
+  case OMPD_target_data:
   case OMPD_target: {
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(StringRef(), QualType()) // __context with shared vars
@@ -1319,7 +1560,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   }
   case OMPD_teams: {
     QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
-    QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty);
+    QualType KmpInt32PtrTy =
+        Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32PtrTy),
         std::make_pair(".bound_tid.", KmpInt32PtrTy),
@@ -1337,6 +1579,30 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
                              Params);
     break;
   }
+  case OMPD_taskloop: {
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    break;
+  }
+  case OMPD_taskloop_simd: {
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    break;
+  }
+  case OMPD_distribute: {
+    Sema::CapturedParamNameType Params[] = {
+        std::make_pair(StringRef(), QualType()) // __context with shared vars
+    };
+    ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
+                             Params);
+    break;
+  }
   case OMPD_threadprivate:
   case OMPD_taskyield:
   case OMPD_barrier:
@@ -1356,6 +1622,10 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
     ActOnCapturedRegionError();
     return StmtError();
   }
+
+  OMPOrderedClause *OC = nullptr;
+  OMPScheduleClause *SC = nullptr;
+  SmallVector<OMPLinearClause *, 4> LCs;
   // This is required for proper codegen.
   for (auto *Clause : Clauses) {
     if (isOpenMPPrivate(Clause->getClauseKind()) ||
@@ -1377,10 +1647,42 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
       // Required for proper codegen of combined directives.
       // TODO: add processing for other clauses.
       if (auto *E = cast_or_null<Expr>(
-              cast<OMPScheduleClause>(Clause)->getHelperChunkSize())) {
-          MarkDeclarationsReferencedInExpr(E);
-        }
+              cast<OMPScheduleClause>(Clause)->getHelperChunkSize()))
+        MarkDeclarationsReferencedInExpr(E);
     }
+    if (Clause->getClauseKind() == OMPC_schedule)
+      SC = cast<OMPScheduleClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_ordered)
+      OC = cast<OMPOrderedClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_linear)
+      LCs.push_back(cast<OMPLinearClause>(Clause));
+  }
+  bool ErrorFound = false;
+  // OpenMP, 2.7.1 Loop Construct, Restrictions
+  // The nonmonotonic modifier cannot be specified if an ordered clause is
+  // specified.
+  if (SC &&
+      (SC->getFirstScheduleModifier() == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
+       SC->getSecondScheduleModifier() ==
+           OMPC_SCHEDULE_MODIFIER_nonmonotonic) &&
+      OC) {
+    Diag(SC->getFirstScheduleModifier() == OMPC_SCHEDULE_MODIFIER_nonmonotonic
+             ? SC->getFirstScheduleModifierLoc()
+             : SC->getSecondScheduleModifierLoc(),
+         diag::err_omp_schedule_nonmonotonic_ordered)
+        << SourceRange(OC->getLocStart(), OC->getLocEnd());
+    ErrorFound = true;
+  }
+  if (!LCs.empty() && OC && OC->getNumForLoops()) {
+    for (auto *C : LCs) {
+      Diag(C->getLocStart(), diag::err_omp_linear_ordered)
+          << SourceRange(OC->getLocStart(), OC->getLocEnd());
+    }
+    ErrorFound = true;
+  }
+  if (ErrorFound) {
+    ActOnCapturedRegionError();
+    return StmtError();
   }
   return ActOnCapturedRegionEnd(S.get());
 }
@@ -1419,6 +1721,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel         | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel         | cancel          | !                                  |
+  // | parallel         | taskloop        | *                                  |
+  // | parallel         | taskloop simd   | *                                  |
+  // | parallel         | distribute      |                                    |  
   // +------------------+-----------------+------------------------------------+
   // | for              | parallel        | *                                  |
   // | for              | for             | +                                  |
@@ -1445,6 +1750,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for              | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | for              | cancel          | !                                  |
+  // | for              | taskloop        | *                                  |
+  // | for              | taskloop simd   | *                                  |
+  // | for              | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | master           | parallel        | *                                  |
   // | master           | for             | +                                  |
@@ -1471,6 +1779,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | master           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | master           | cancel          |                                    |
+  // | master           | taskloop        | *                                  |
+  // | master           | taskloop simd   | *                                  |
+  // | master           | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | critical         | parallel        | *                                  |
   // | critical         | for             | +                                  |
@@ -1496,6 +1807,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | critical         | cancellation    |                                    |
   // |                  | point           |                                    |
   // | critical         | cancel          |                                    |
+  // | critical         | taskloop        | *                                  |
+  // | critical         | taskloop simd   | *                                  |
+  // | critical         | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | simd             | parallel        |                                    |
   // | simd             | for             |                                    |
@@ -1515,13 +1829,16 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | simd             | taskwait        |                                    |
   // | simd             | taskgroup       |                                    |
   // | simd             | flush           |                                    |
-  // | simd             | ordered         |                                    |
+  // | simd             | ordered         | + (with simd clause)               |
   // | simd             | atomic          |                                    |
   // | simd             | target          |                                    |
   // | simd             | teams           |                                    |
   // | simd             | cancellation    |                                    |
   // |                  | point           |                                    |
   // | simd             | cancel          |                                    |
+  // | simd             | taskloop        |                                    |
+  // | simd             | taskloop simd   |                                    |
+  // | simd             | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | for simd         | parallel        |                                    |
   // | for simd         | for             |                                    |
@@ -1541,13 +1858,16 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | for simd         | taskwait        |                                    |
   // | for simd         | taskgroup       |                                    |
   // | for simd         | flush           |                                    |
-  // | for simd         | ordered         |                                    |
+  // | for simd         | ordered         | + (with simd clause)               |
   // | for simd         | atomic          |                                    |
   // | for simd         | target          |                                    |
   // | for simd         | teams           |                                    |
   // | for simd         | cancellation    |                                    |
   // |                  | point           |                                    |
   // | for simd         | cancel          |                                    |
+  // | for simd         | taskloop        |                                    |
+  // | for simd         | taskloop simd   |                                    |
+  // | for simd         | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for simd| parallel        |                                    |
   // | parallel for simd| for             |                                    |
@@ -1567,13 +1887,16 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for simd| taskwait        |                                    |
   // | parallel for simd| taskgroup       |                                    |
   // | parallel for simd| flush           |                                    |
-  // | parallel for simd| ordered         |                                    |
+  // | parallel for simd| ordered         | + (with simd clause)               |
   // | parallel for simd| atomic          |                                    |
   // | parallel for simd| target          |                                    |
   // | parallel for simd| teams           |                                    |
   // | parallel for simd| cancellation    |                                    |
   // |                  | point           |                                    |
   // | parallel for simd| cancel          |                                    |
+  // | parallel for simd| taskloop        |                                    |
+  // | parallel for simd| taskloop simd   |                                    |
+  // | parallel for simd| distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | sections         | parallel        | *                                  |
   // | sections         | for             | +                                  |
@@ -1600,6 +1923,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | sections         | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | sections         | cancel          | !                                  |
+  // | sections         | taskloop        | *                                  |
+  // | sections         | taskloop simd   | *                                  |
+  // | sections         | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | section          | parallel        | *                                  |
   // | section          | for             | +                                  |
@@ -1626,6 +1952,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | section          | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | section          | cancel          | !                                  |
+  // | section          | taskloop        | *                                  |
+  // | section          | taskloop simd   | *                                  |
+  // | section          | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | single           | parallel        | *                                  |
   // | single           | for             | +                                  |
@@ -1652,6 +1981,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | single           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | single           | cancel          |                                    |
+  // | single           | taskloop        | *                                  |
+  // | single           | taskloop simd   | *                                  |
+  // | single           | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel for     | parallel        | *                                  |
   // | parallel for     | for             | +                                  |
@@ -1678,6 +2010,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel for     | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel for     | cancel          | !                                  |
+  // | parallel for     | taskloop        | *                                  |
+  // | parallel for     | taskloop simd   | *                                  |
+  // | parallel for     | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | parallel sections| parallel        | *                                  |
   // | parallel sections| for             | +                                  |
@@ -1704,6 +2039,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | parallel sections| cancellation    |                                    |
   // |                  | point           | !                                  |
   // | parallel sections| cancel          | !                                  |
+  // | parallel sections| taskloop        | *                                  |
+  // | parallel sections| taskloop simd   | *                                  |
+  // | parallel sections| distribute      |                                    | 
   // +------------------+-----------------+------------------------------------+
   // | task             | parallel        | *                                  |
   // | task             | for             | +                                  |
@@ -1730,6 +2068,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | task             | cancellation    |                                    |
   // |                  | point           | !                                  |
   // | task             | cancel          | !                                  |
+  // | task             | taskloop        | *                                  |
+  // | task             | taskloop simd   | *                                  |
+  // | task             | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | ordered          | parallel        | *                                  |
   // | ordered          | for             | +                                  |
@@ -1756,6 +2097,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | ordered          | cancellation    |                                    |
   // |                  | point           |                                    |
   // | ordered          | cancel          |                                    |
+  // | ordered          | taskloop        | *                                  |
+  // | ordered          | taskloop simd   | *                                  |
+  // | ordered          | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | atomic           | parallel        |                                    |
   // | atomic           | for             |                                    |
@@ -1782,6 +2126,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | atomic           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | atomic           | cancel          |                                    |
+  // | atomic           | taskloop        |                                    |
+  // | atomic           | taskloop simd   |                                    |
+  // | atomic           | distribute      |                                    | 
   // +------------------+-----------------+------------------------------------+
   // | target           | parallel        | *                                  |
   // | target           | for             | *                                  |
@@ -1808,6 +2155,9 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | target           | cancellation    |                                    |
   // |                  | point           |                                    |
   // | target           | cancel          |                                    |
+  // | target           | taskloop        | *                                  |
+  // | target           | taskloop simd   | *                                  |
+  // | target           | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   // | teams            | parallel        | *                                  |
   // | teams            | for             | +                                  |
@@ -1834,6 +2184,95 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   // | teams            | cancellation    |                                    |
   // |                  | point           |                                    |
   // | teams            | cancel          |                                    |
+  // | teams            | taskloop        | +                                  |
+  // | teams            | taskloop simd   | +                                  |
+  // | teams            | distribute      | !                                  |
+  // +------------------+-----------------+------------------------------------+
+  // | taskloop         | parallel        | *                                  |
+  // | taskloop         | for             | +                                  |
+  // | taskloop         | for simd        | +                                  |
+  // | taskloop         | master          | +                                  |
+  // | taskloop         | critical        | *                                  |
+  // | taskloop         | simd            | *                                  |
+  // | taskloop         | sections        | +                                  |
+  // | taskloop         | section         | +                                  |
+  // | taskloop         | single          | +                                  |
+  // | taskloop         | parallel for    | *                                  |
+  // | taskloop         |parallel for simd| *                                  |
+  // | taskloop         |parallel sections| *                                  |
+  // | taskloop         | task            | *                                  |
+  // | taskloop         | taskyield       | *                                  |
+  // | taskloop         | barrier         | +                                  |
+  // | taskloop         | taskwait        | *                                  |
+  // | taskloop         | taskgroup       | *                                  |
+  // | taskloop         | flush           | *                                  |
+  // | taskloop         | ordered         | +                                  |
+  // | taskloop         | atomic          | *                                  |
+  // | taskloop         | target          | *                                  |
+  // | taskloop         | teams           | +                                  |
+  // | taskloop         | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | taskloop         | cancel          |                                    |
+  // | taskloop         | taskloop        | *                                  |
+  // | taskloop         | distribute      |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | taskloop simd    | parallel        |                                    |
+  // | taskloop simd    | for             |                                    |
+  // | taskloop simd    | for simd        |                                    |
+  // | taskloop simd    | master          |                                    |
+  // | taskloop simd    | critical        |                                    |
+  // | taskloop simd    | simd            |                                    |
+  // | taskloop simd    | sections        |                                    |
+  // | taskloop simd    | section         |                                    |
+  // | taskloop simd    | single          |                                    |
+  // | taskloop simd    | parallel for    |                                    |
+  // | taskloop simd    |parallel for simd|                                    |
+  // | taskloop simd    |parallel sections|                                    |
+  // | taskloop simd    | task            |                                    |
+  // | taskloop simd    | taskyield       |                                    |
+  // | taskloop simd    | barrier         |                                    |
+  // | taskloop simd    | taskwait        |                                    |
+  // | taskloop simd    | taskgroup       |                                    |
+  // | taskloop simd    | flush           |                                    |
+  // | taskloop simd    | ordered         | + (with simd clause)               |
+  // | taskloop simd    | atomic          |                                    |
+  // | taskloop simd    | target          |                                    |
+  // | taskloop simd    | teams           |                                    |
+  // | taskloop simd    | cancellation    |                                    |
+  // |                  | point           |                                    |
+  // | taskloop simd    | cancel          |                                    |
+  // | taskloop simd    | taskloop        |                                    |
+  // | taskloop simd    | taskloop simd   |                                    |
+  // | taskloop simd    | distribute      |                                    |
+  // +------------------+-----------------+------------------------------------+
+  // | distribute       | parallel        | *                                  |
+  // | distribute       | for             | *                                  |
+  // | distribute       | for simd        | *                                  |
+  // | distribute       | master          | *                                  |
+  // | distribute       | critical        | *                                  |
+  // | distribute       | simd            | *                                  |
+  // | distribute       | sections        | *                                  |
+  // | distribute       | section         | *                                  |
+  // | distribute       | single          | *                                  |
+  // | distribute       | parallel for    | *                                  |
+  // | distribute       |parallel for simd| *                                  |
+  // | distribute       |parallel sections| *                                  |
+  // | distribute       | task            | *                                  |
+  // | distribute       | taskyield       | *                                  |
+  // | distribute       | barrier         | *                                  |
+  // | distribute       | taskwait        | *                                  |
+  // | distribute       | taskgroup       | *                                  |
+  // | distribute       | flush           | *                                  |
+  // | distribute       | ordered         | +                                  |
+  // | distribute       | atomic          | *                                  |
+  // | distribute       | target          |                                    |
+  // | distribute       | teams           |                                    |
+  // | distribute       | cancellation    | +                                  |
+  // |                  | point           |                                    |
+  // | distribute       | cancel          | +                                  |
+  // | distribute       | taskloop        | *                                  |
+  // | distribute       | taskloop simd   | *                                  |
+  // | distribute       | distribute      |                                    |
   // +------------------+-----------------+------------------------------------+
   if (Stack->getCurScope()) {
     auto ParentRegion = Stack->getParentDirective();
@@ -1843,11 +2282,15 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       NoRecommend,
       ShouldBeInParallelRegion,
       ShouldBeInOrderedRegion,
-      ShouldBeInTargetRegion
+      ShouldBeInTargetRegion,
+      ShouldBeInTeamsRegion
     } Recommend = NoRecommend;
-    if (isOpenMPSimdDirective(ParentRegion)) {
+    if (isOpenMPSimdDirective(ParentRegion) && CurrentRegion != OMPD_ordered) {
       // OpenMP [2.16, Nesting of Regions]
       // OpenMP constructs may not be nested inside a simd region.
+      // OpenMP [2.8.1,simd Construct, Restrictions]
+      // An ordered construct with the simd clause is the only OpenMP construct
+      // that can appear in the simd region.
       SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region_simd);
       return true;
     }
@@ -1890,16 +2333,19 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // construct-type-clause.
       NestingProhibited =
           !((CancelRegion == OMPD_parallel && ParentRegion == OMPD_parallel) ||
-            (CancelRegion == OMPD_for && ParentRegion == OMPD_for) ||
+            (CancelRegion == OMPD_for &&
+             (ParentRegion == OMPD_for || ParentRegion == OMPD_parallel_for)) ||
             (CancelRegion == OMPD_taskgroup && ParentRegion == OMPD_task) ||
             (CancelRegion == OMPD_sections &&
-             (ParentRegion == OMPD_section || ParentRegion == OMPD_sections)));
+             (ParentRegion == OMPD_section || ParentRegion == OMPD_sections ||
+              ParentRegion == OMPD_parallel_sections)));
     } else if (CurrentRegion == OMPD_master) {
       // OpenMP [2.16, Nesting of Regions]
       // A master region may not be closely nested inside a worksharing,
       // atomic, or explicit task region.
       NestingProhibited = isOpenMPWorksharingDirective(ParentRegion) ||
-                          ParentRegion == OMPD_task;
+                          ParentRegion == OMPD_task ||
+                          isOpenMPTaskLoopDirective(ParentRegion);
     } else if (CurrentRegion == OMPD_critical && CurrentName.getName()) {
       // OpenMP [2.16, Nesting of Regions]
       // A critical region may not be nested (closely or otherwise) inside a
@@ -1936,7 +2382,8 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       NestingProhibited =
           isOpenMPWorksharingDirective(ParentRegion) ||
           ParentRegion == OMPD_task || ParentRegion == OMPD_master ||
-          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered;
+          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered ||
+          isOpenMPTaskLoopDirective(ParentRegion);
     } else if (isOpenMPWorksharingDirective(CurrentRegion) &&
                !isOpenMPParallelDirective(CurrentRegion)) {
       // OpenMP [2.16, Nesting of Regions]
@@ -1945,7 +2392,8 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       NestingProhibited =
           isOpenMPWorksharingDirective(ParentRegion) ||
           ParentRegion == OMPD_task || ParentRegion == OMPD_master ||
-          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered;
+          ParentRegion == OMPD_critical || ParentRegion == OMPD_ordered ||
+          isOpenMPTaskLoopDirective(ParentRegion);
       Recommend = ShouldBeInParallelRegion;
     } else if (CurrentRegion == OMPD_ordered) {
       // OpenMP [2.16, Nesting of Regions]
@@ -1953,9 +2401,14 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // atomic, or explicit task region.
       // An ordered region must be closely nested inside a loop region (or
       // parallel loop region) with an ordered clause.
+      // OpenMP [2.8.1,simd Construct, Restrictions]
+      // An ordered construct with the simd clause is the only OpenMP construct
+      // that can appear in the simd region.
       NestingProhibited = ParentRegion == OMPD_critical ||
                           ParentRegion == OMPD_task ||
-                          !Stack->isParentOrderedRegion();
+                          isOpenMPTaskLoopDirective(ParentRegion) ||
+                          !(isOpenMPSimdDirective(ParentRegion) ||
+                            Stack->isParentOrderedRegion());
       Recommend = ShouldBeInOrderedRegion;
     } else if (isOpenMPTeamsDirective(CurrentRegion)) {
       // OpenMP [2.16, Nesting of Regions]
@@ -1970,10 +2423,17 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
       // distribute, parallel, parallel sections, parallel workshare, and the
       // parallel loop and parallel loop SIMD constructs are the only OpenMP
       // constructs that can be closely nested in the teams region.
-      // TODO: add distribute directive.
-      NestingProhibited = !isOpenMPParallelDirective(CurrentRegion);
+      NestingProhibited = !isOpenMPParallelDirective(CurrentRegion) &&
+                          !isOpenMPDistributeDirective(CurrentRegion);
       Recommend = ShouldBeInParallelRegion;
     }
+    if (!NestingProhibited && isOpenMPDistributeDirective(CurrentRegion)) {
+      // OpenMP 4.5 [2.17 Nesting of Regions]
+      // The region associated with the distribute construct must be strictly
+      // nested inside a teams region
+      NestingProhibited = !isOpenMPTeamsDirective(ParentRegion);
+      Recommend = ShouldBeInTeamsRegion;
+    }
     if (NestingProhibited) {
       SemaRef.Diag(StartLoc, diag::err_omp_prohibited_region)
           << CloseNesting << getOpenMPDirectiveName(ParentRegion) << Recommend
@@ -1984,6 +2444,88 @@ static bool CheckNestingOfRegions(Sema &SemaRef, DSAStackTy *Stack,
   return false;
 }
 
+static bool checkIfClauses(Sema &S, OpenMPDirectiveKind Kind,
+                           ArrayRef<OMPClause *> Clauses,
+                           ArrayRef<OpenMPDirectiveKind> AllowedNameModifiers) {
+  bool ErrorFound = false;
+  unsigned NamedModifiersNumber = 0;
+  SmallVector<const OMPIfClause *, OMPC_unknown + 1> FoundNameModifiers(
+      OMPD_unknown + 1);
+  SmallVector<SourceLocation, 4> NameModifierLoc;
+  for (const auto *C : Clauses) {
+    if (const auto *IC = dyn_cast_or_null<OMPIfClause>(C)) {
+      // At most one if clause without a directive-name-modifier can appear on
+      // the directive.
+      OpenMPDirectiveKind CurNM = IC->getNameModifier();
+      if (FoundNameModifiers[CurNM]) {
+        S.Diag(C->getLocStart(), diag::err_omp_more_one_clause)
+            << getOpenMPDirectiveName(Kind) << getOpenMPClauseName(OMPC_if)
+            << (CurNM != OMPD_unknown) << getOpenMPDirectiveName(CurNM);
+        ErrorFound = true;
+      } else if (CurNM != OMPD_unknown) {
+        NameModifierLoc.push_back(IC->getNameModifierLoc());
+        ++NamedModifiersNumber;
+      }
+      FoundNameModifiers[CurNM] = IC;
+      if (CurNM == OMPD_unknown)
+        continue;
+      // Check if the specified name modifier is allowed for the current
+      // directive.
+      // At most one if clause with the particular directive-name-modifier can
+      // appear on the directive.
+      bool MatchFound = false;
+      for (auto NM : AllowedNameModifiers) {
+        if (CurNM == NM) {
+          MatchFound = true;
+          break;
+        }
+      }
+      if (!MatchFound) {
+        S.Diag(IC->getNameModifierLoc(),
+               diag::err_omp_wrong_if_directive_name_modifier)
+            << getOpenMPDirectiveName(CurNM) << getOpenMPDirectiveName(Kind);
+        ErrorFound = true;
+      }
+    }
+  }
+  // If any if clause on the directive includes a directive-name-modifier then
+  // all if clauses on the directive must include a directive-name-modifier.
+  if (FoundNameModifiers[OMPD_unknown] && NamedModifiersNumber > 0) {
+    if (NamedModifiersNumber == AllowedNameModifiers.size()) {
+      S.Diag(FoundNameModifiers[OMPD_unknown]->getLocStart(),
+             diag::err_omp_no_more_if_clause);
+    } else {
+      std::string Values;
+      std::string Sep(", ");
+      unsigned AllowedCnt = 0;
+      unsigned TotalAllowedNum =
+          AllowedNameModifiers.size() - NamedModifiersNumber;
+      for (unsigned Cnt = 0, End = AllowedNameModifiers.size(); Cnt < End;
+           ++Cnt) {
+        OpenMPDirectiveKind NM = AllowedNameModifiers[Cnt];
+        if (!FoundNameModifiers[NM]) {
+          Values += "'";
+          Values += getOpenMPDirectiveName(NM);
+          Values += "'";
+          if (AllowedCnt + 2 == TotalAllowedNum)
+            Values += " or ";
+          else if (AllowedCnt + 1 != TotalAllowedNum)
+            Values += Sep;
+          ++AllowedCnt;
+        }
+      }
+      S.Diag(FoundNameModifiers[OMPD_unknown]->getCondition()->getLocStart(),
+             diag::err_omp_unnamed_if_clause)
+          << (TotalAllowedNum > 1) << Values;
+    }
+    for (auto Loc : NameModifierLoc) {
+      S.Diag(Loc, diag::note_omp_previous_named_if_clause);
+    }
+    ErrorFound = true;
+  }
+  return ErrorFound;
+}
+
 StmtResult Sema::ActOnOpenMPExecutableDirective(
     OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
     OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
@@ -2020,10 +2562,12 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     }
   }
 
+  llvm::SmallVector<OpenMPDirectiveKind, 4> AllowedNameModifiers;
   switch (Kind) {
   case OMPD_parallel:
     Res = ActOnOpenMPParallelDirective(ClausesWithImplicit, AStmt, StartLoc,
                                        EndLoc);
+    AllowedNameModifiers.push_back(OMPD_parallel);
     break;
   case OMPD_simd:
     Res = ActOnOpenMPSimdDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
@@ -2056,25 +2600,28 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPMasterDirective(AStmt, StartLoc, EndLoc);
     break;
   case OMPD_critical:
-    assert(ClausesWithImplicit.empty() &&
-           "No clauses are allowed for 'omp critical' directive");
-    Res = ActOnOpenMPCriticalDirective(DirName, AStmt, StartLoc, EndLoc);
+    Res = ActOnOpenMPCriticalDirective(DirName, ClausesWithImplicit, AStmt,
+                                       StartLoc, EndLoc);
     break;
   case OMPD_parallel_for:
     Res = ActOnOpenMPParallelForDirective(ClausesWithImplicit, AStmt, StartLoc,
                                           EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_parallel);
     break;
   case OMPD_parallel_for_simd:
     Res = ActOnOpenMPParallelForSimdDirective(
         ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_parallel);
     break;
   case OMPD_parallel_sections:
     Res = ActOnOpenMPParallelSectionsDirective(ClausesWithImplicit, AStmt,
                                                StartLoc, EndLoc);
+    AllowedNameModifiers.push_back(OMPD_parallel);
     break;
   case OMPD_task:
     Res =
         ActOnOpenMPTaskDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
+    AllowedNameModifiers.push_back(OMPD_task);
     break;
   case OMPD_taskyield:
     assert(ClausesWithImplicit.empty() &&
@@ -2108,9 +2655,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPFlushDirective(ClausesWithImplicit, StartLoc, EndLoc);
     break;
   case OMPD_ordered:
-    assert(ClausesWithImplicit.empty() &&
-           "No clauses are allowed for 'omp ordered' directive");
-    Res = ActOnOpenMPOrderedDirective(AStmt, StartLoc, EndLoc);
+    Res = ActOnOpenMPOrderedDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                      EndLoc);
     break;
   case OMPD_atomic:
     Res = ActOnOpenMPAtomicDirective(ClausesWithImplicit, AStmt, StartLoc,
@@ -2123,6 +2669,7 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
   case OMPD_target:
     Res = ActOnOpenMPTargetDirective(ClausesWithImplicit, AStmt, StartLoc,
                                      EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target);
     break;
   case OMPD_cancellation_point:
     assert(ClausesWithImplicit.empty() &&
@@ -2132,11 +2679,30 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res = ActOnOpenMPCancellationPointDirective(StartLoc, EndLoc, CancelRegion);
     break;
   case OMPD_cancel:
-    assert(ClausesWithImplicit.empty() &&
-           "No clauses are allowed for 'omp cancel' directive");
     assert(AStmt == nullptr &&
            "No associated statement allowed for 'omp cancel' directive");
-    Res = ActOnOpenMPCancelDirective(StartLoc, EndLoc, CancelRegion);
+    Res = ActOnOpenMPCancelDirective(ClausesWithImplicit, StartLoc, EndLoc,
+                                     CancelRegion);
+    AllowedNameModifiers.push_back(OMPD_cancel);
+    break;
+  case OMPD_target_data:
+    Res = ActOnOpenMPTargetDataDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                         EndLoc);
+    AllowedNameModifiers.push_back(OMPD_target_data);
+    break;
+  case OMPD_taskloop:
+    Res = ActOnOpenMPTaskLoopDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                       EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_taskloop);
+    break;
+  case OMPD_taskloop_simd:
+    Res = ActOnOpenMPTaskLoopSimdDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                           EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_taskloop);
+    break;
+  case OMPD_distribute:
+    Res = ActOnOpenMPDistributeDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                         EndLoc, VarsWithInheritedDSA);
     break;
   case OMPD_threadprivate:
     llvm_unreachable("OpenMP Directive is not allowed");
@@ -2148,8 +2714,11 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Diag(P.second->getExprLoc(), diag::err_omp_no_dsa_for_variable)
         << P.first << P.second->getSourceRange();
   }
-  if (!VarsWithInheritedDSA.empty())
-    return StmtError();
+  ErrorFound = !VarsWithInheritedDSA.empty() || ErrorFound;
+
+  if (!AllowedNameModifiers.empty())
+    ErrorFound = checkIfClauses(*this, Kind, Clauses, AllowedNameModifiers) ||
+                 ErrorFound;
 
   if (ErrorFound)
     return StmtError();
@@ -2160,7 +2729,9 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
                                               Stmt *AStmt,
                                               SourceLocation StartLoc,
                                               SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   CapturedStmt *CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -2171,8 +2742,8 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                      AStmt);
+  return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      DSAStack->isCancelRegion());
 }
 
 namespace {
@@ -2247,6 +2818,9 @@ public:
   Expr *BuildPreCond(Scope *S, Expr *Cond) const;
   /// \brief Build reference expression to the counter be used for codegen.
   Expr *BuildCounterVar() const;
+  /// \brief Build reference expression to the private counter be used for
+  /// codegen.
+  Expr *BuildPrivateCounterVar() const;
   /// \brief Build initization of the counter be used for codegen.
   Expr *BuildCounterInit() const;
   /// \brief Build step of the counter be used for codegen.
@@ -2261,8 +2835,8 @@ private:
   /// \brief Helper to set loop counter variable and its initializer.
   bool SetVarAndLB(VarDecl *NewVar, DeclRefExpr *NewVarRefExpr, Expr *NewLB);
   /// \brief Helper to set upper bound.
-  bool SetUB(Expr *NewUB, bool LessOp, bool StrictOp, const SourceRange &SR,
-             const SourceLocation &SL);
+  bool SetUB(Expr *NewUB, bool LessOp, bool StrictOp, SourceRange SR,
+             SourceLocation SL);
   /// \brief Helper to set loop increment.
   bool SetStep(Expr *NewStep, bool Subtract);
 };
@@ -2313,8 +2887,7 @@ bool OpenMPIterationSpaceChecker::SetVarAndLB(VarDecl *NewVar,
 }
 
 bool OpenMPIterationSpaceChecker::SetUB(Expr *NewUB, bool LessOp, bool StrictOp,
-                                        const SourceRange &SR,
-                                        const SourceLocation &SL) {
+                                        SourceRange SR, SourceLocation SL) {
   // State consistency checking to ensure correct usage.
   assert(Var != nullptr && LB != nullptr && UB == nullptr && Step == nullptr &&
          !TestIsLessOp && !TestIsStrictOp);
@@ -2410,7 +2983,7 @@ bool OpenMPIterationSpaceChecker::CheckInit(Stmt *S, bool EmitDiags) {
   } else if (auto DS = dyn_cast<DeclStmt>(S)) {
     if (DS->isSingleDecl()) {
       if (auto Var = dyn_cast_or_null<VarDecl>(DS->getSingleDecl())) {
-        if (Var->hasInit()) {
+        if (Var->hasInit() && !Var->getType()->isReferenceType()) {
           // Accept non-canonical init form here but emit ext. warning.
           if (Var->getInitStyle() != VarDecl::CInit && EmitDiags)
             SemaRef.Diag(S->getLocStart(),
@@ -2630,6 +3203,8 @@ public:
         NewVD->setPreviousDeclInSameBlockScope(
             VD->isPreviousDeclInSameBlockScope());
         VD->getDeclContext()->addHiddenDecl(NewVD);
+        if (VD->hasAttrs())
+          NewVD->setAttrs(VD->getAttrs());
         transformedLocalDecl(VD, NewVD);
         return NewVD;
       }
@@ -2802,7 +3377,21 @@ Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const {
 
 /// \brief Build reference expression to the counter be used for codegen.
 Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const {
-  return buildDeclRefExpr(SemaRef, Var, Var->getType(), DefaultLoc);
+  return buildDeclRefExpr(SemaRef, Var, Var->getType().getNonReferenceType(),
+                          DefaultLoc);
+}
+
+Expr *OpenMPIterationSpaceChecker::BuildPrivateCounterVar() const {
+  if (Var && !Var->isInvalidDecl()) {
+    auto Type = Var->getType().getNonReferenceType();
+    auto *PrivateVar =
+        buildVarDecl(SemaRef, DefaultLoc, Type, Var->getName(),
+                     Var->hasAttrs() ? &Var->getAttrs() : nullptr);
+    if (PrivateVar->isInvalidDecl())
+      return nullptr;
+    return buildDeclRefExpr(SemaRef, PrivateVar, Type, DefaultLoc);
+  }
+  return nullptr;
 }
 
 /// \brief Build initization of the counter be used for codegen.
@@ -2820,6 +3409,8 @@ struct LoopIterationSpace {
   Expr *NumIterations;
   /// \brief The loop counter variable.
   Expr *CounterVar;
+  /// \brief Private loop counter variable.
+  Expr *PrivateCounterVar;
   /// \brief This is initializer for the initial value of #CounterVar.
   Expr *CounterInit;
   /// \brief This is step for the #CounterVar used to generate its update:
@@ -2840,14 +3431,13 @@ struct LoopIterationSpace {
 void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
   assert(getLangOpts().OpenMP && "OpenMP is not active.");
   assert(Init && "Expected loop in canonical form.");
-  unsigned CollapseIteration = DSAStack->getCollapseNumber();
-  if (CollapseIteration > 0 &&
+  unsigned AssociatedLoops = DSAStack->getAssociatedLoops();
+  if (AssociatedLoops > 0 &&
       isOpenMPLoopDirective(DSAStack->getCurrentDirective())) {
     OpenMPIterationSpaceChecker ISC(*this, ForLoc);
-    if (!ISC.CheckInit(Init, /*EmitDiags=*/false)) {
+    if (!ISC.CheckInit(Init, /*EmitDiags=*/false))
       DSAStack->addLoopControlVariable(ISC.GetLoopVar());
-    }
-    DSAStack->setCollapseNumber(CollapseIteration - 1);
+    DSAStack->setAssociatedLoops(AssociatedLoops - 1);
   }
 }
 
@@ -2856,7 +3446,7 @@ void Sema::ActOnOpenMPLoopInitialization(SourceLocation ForLoc, Stmt *Init) {
 static bool CheckOpenMPIterationSpace(
     OpenMPDirectiveKind DKind, Stmt *S, Sema &SemaRef, DSAStackTy &DSA,
     unsigned CurrentNestedLoopCount, unsigned NestedLoopCount,
-    Expr *NestedLoopCountExpr,
+    Expr *CollapseLoopCountExpr, Expr *OrderedLoopCountExpr,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
     LoopIterationSpace &ResultIterSpace) {
   // OpenMP [2.6, Canonical Loop Form]
@@ -2864,13 +3454,24 @@ static bool CheckOpenMPIterationSpace(
   auto For = dyn_cast_or_null<ForStmt>(S);
   if (!For) {
     SemaRef.Diag(S->getLocStart(), diag::err_omp_not_for)
-        << (NestedLoopCountExpr != nullptr) << getOpenMPDirectiveName(DKind)
-        << NestedLoopCount << (CurrentNestedLoopCount > 0)
-        << CurrentNestedLoopCount;
-    if (NestedLoopCount > 1)
-      SemaRef.Diag(NestedLoopCountExpr->getExprLoc(),
-                   diag::note_omp_collapse_expr)
-          << NestedLoopCountExpr->getSourceRange();
+        << (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr)
+        << getOpenMPDirectiveName(DKind) << NestedLoopCount
+        << (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount;
+    if (NestedLoopCount > 1) {
+      if (CollapseLoopCountExpr && OrderedLoopCountExpr)
+        SemaRef.Diag(DSA.getConstructLoc(),
+                     diag::note_omp_collapse_ordered_expr)
+            << 2 << CollapseLoopCountExpr->getSourceRange()
+            << OrderedLoopCountExpr->getSourceRange();
+      else if (CollapseLoopCountExpr)
+        SemaRef.Diag(CollapseLoopCountExpr->getExprLoc(),
+                     diag::note_omp_collapse_ordered_expr)
+            << 0 << CollapseLoopCountExpr->getSourceRange();
+      else
+        SemaRef.Diag(OrderedLoopCountExpr->getExprLoc(),
+                     diag::note_omp_collapse_ordered_expr)
+            << 1 << OrderedLoopCountExpr->getSourceRange();
+    }
     return true;
   }
   assert(For->getBody());
@@ -2893,7 +3494,7 @@ static bool CheckOpenMPIterationSpace(
   //   A variable of signed or unsigned integer type.
   //   For C++, a variable of a random access iterator type.
   //   For C, a variable of a pointer type.
-  auto VarType = Var->getType();
+  auto VarType = Var->getType().getNonReferenceType();
   if (!VarType->isDependentType() && !VarType->isIntegerType() &&
       !VarType->isPointerType() &&
       !(SemaRef.getLangOpts().CPlusPlus && VarType->isOverloadableType())) {
@@ -2929,12 +3530,12 @@ static bool CheckOpenMPIterationSpace(
           ? ((NestedLoopCount == 1) ? OMPC_linear : OMPC_lastprivate)
           : OMPC_private;
   if (((isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
-        DVar.CKind != OMPC_threadprivate && DVar.CKind != PredeterminedCKind) ||
-       (isOpenMPWorksharingDirective(DKind) && !isOpenMPSimdDirective(DKind) &&
-        DVar.CKind != OMPC_unknown && DVar.CKind != OMPC_private &&
-        DVar.CKind != OMPC_lastprivate && DVar.CKind != OMPC_threadprivate)) &&
-      ((DVar.CKind != OMPC_private && DVar.CKind != OMPC_threadprivate) ||
-       DVar.RefExpr != nullptr)) {
+        DVar.CKind != PredeterminedCKind) ||
+       ((isOpenMPWorksharingDirective(DKind) || DKind == OMPD_taskloop ||
+         isOpenMPDistributeDirective(DKind)) &&
+        !isOpenMPSimdDirective(DKind) && DVar.CKind != OMPC_unknown &&
+        DVar.CKind != OMPC_private && DVar.CKind != OMPC_lastprivate)) &&
+      (DVar.CKind != OMPC_private || DVar.RefExpr != nullptr)) {
     SemaRef.Diag(Init->getLocStart(), diag::err_omp_loop_var_dsa)
         << getOpenMPClauseName(DVar.CKind) << getOpenMPDirectiveName(DKind)
         << getOpenMPClauseName(PredeterminedCKind);
@@ -2945,7 +3546,8 @@ static bool CheckOpenMPIterationSpace(
   } else if (LoopVarRefExpr != nullptr) {
     // Make the loop iteration variable private (for worksharing constructs),
     // linear (for simd directives with the only one associated loop) or
-    // lastprivate (for simd directives with several collapsed loops).
+    // lastprivate (for simd directives with several collapsed or ordered
+    // loops).
     if (DVar.CKind == OMPC_unknown)
       DVar = DSA.hasDSA(Var, isOpenMPPrivate, MatchesAlways(),
                         /*FromParent=*/false);
@@ -2966,8 +3568,11 @@ static bool CheckOpenMPIterationSpace(
   // Build the loop's iteration space representation.
   ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond());
   ResultIterSpace.NumIterations = ISC.BuildNumIterations(
-      DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind));
+      DSA.getCurScope(), (isOpenMPWorksharingDirective(DKind) ||
+                          isOpenMPTaskLoopDirective(DKind) ||
+                          isOpenMPDistributeDirective(DKind)));
   ResultIterSpace.CounterVar = ISC.BuildCounterVar();
+  ResultIterSpace.PrivateCounterVar = ISC.BuildPrivateCounterVar();
   ResultIterSpace.CounterInit = ISC.BuildCounterInit();
   ResultIterSpace.CounterStep = ISC.BuildCounterStep();
   ResultIterSpace.InitSrcRange = ISC.GetInitSrcRange();
@@ -2978,6 +3583,7 @@ static bool CheckOpenMPIterationSpace(
   HasErrors |= (ResultIterSpace.PreCond == nullptr ||
                 ResultIterSpace.NumIterations == nullptr ||
                 ResultIterSpace.CounterVar == nullptr ||
+                ResultIterSpace.PrivateCounterVar == nullptr ||
                 ResultIterSpace.CounterInit == nullptr ||
                 ResultIterSpace.CounterStep == nullptr);
 
@@ -3091,17 +3697,33 @@ static bool FitsInto(unsigned Bits, bool Signed, Expr *E, Sema &SemaRef) {
 /// \return Returns 0 if one of the collapsed stmts is not canonical for loop,
 /// number of collapsed loops otherwise.
 static unsigned
-CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
-                Stmt *AStmt, Sema &SemaRef, DSAStackTy &DSA,
+CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
+                Expr *OrderedLoopCountExpr, Stmt *AStmt, Sema &SemaRef,
+                DSAStackTy &DSA,
                 llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA,
                 OMPLoopDirective::HelperExprs &Built) {
   unsigned NestedLoopCount = 1;
-  if (NestedLoopCountExpr) {
+  if (CollapseLoopCountExpr) {
     // Found 'collapse' clause - calculate collapse number.
     llvm::APSInt Result;
-    if (NestedLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext()))
+    if (CollapseLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext()))
       NestedLoopCount = Result.getLimitedValue();
   }
+  if (OrderedLoopCountExpr) {
+    // Found 'ordered' clause - calculate collapse number.
+    llvm::APSInt Result;
+    if (OrderedLoopCountExpr->EvaluateAsInt(Result, SemaRef.getASTContext())) {
+      if (Result.getLimitedValue() < NestedLoopCount) {
+        SemaRef.Diag(OrderedLoopCountExpr->getExprLoc(),
+                     diag::err_omp_wrong_ordered_loop_count)
+            << OrderedLoopCountExpr->getSourceRange();
+        SemaRef.Diag(CollapseLoopCountExpr->getExprLoc(),
+                     diag::note_collapse_loop_count)
+            << CollapseLoopCountExpr->getSourceRange();
+      }
+      NestedLoopCount = Result.getLimitedValue();
+    }
+  }
   // This is helper routine for loop directives (e.g., 'for', 'simd',
   // 'for simd', etc.).
   SmallVector<LoopIterationSpace, 4> IterSpaces;
@@ -3109,8 +3731,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   Stmt *CurStmt = AStmt->IgnoreContainers(/* IgnoreCaptured */ true);
   for (unsigned Cnt = 0; Cnt < NestedLoopCount; ++Cnt) {
     if (CheckOpenMPIterationSpace(DKind, CurStmt, SemaRef, DSA, Cnt,
-                                  NestedLoopCount, NestedLoopCountExpr,
-                                  VarsWithImplicitDSA, IterSpaces[Cnt]))
+                                  NestedLoopCount, CollapseLoopCountExpr,
+                                  OrderedLoopCountExpr, VarsWithImplicitDSA,
+                                  IterSpaces[Cnt]))
       return 0;
     // Move on to the next nested for loop, or to the loop body.
     // OpenMP [2.8.1, simd construct, Restrictions]
@@ -3127,11 +3750,12 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
 
   // An example of what is generated for the following code:
   //
-  //   #pragma omp simd collapse(2)
+  //   #pragma omp simd collapse(2) ordered(2)
   //   for (i = 0; i < NI; ++i)
-  //     for (j = J0; j < NJ; j+=2) {
-  //     <loop body>
-  //   }
+  //     for (k = 0; k < NK; ++k)
+  //       for (j = J0; j < NJ; j+=2) {
+  //         <loop body>
+  //       }
   //
   // We generate the code below.
   // Note: the loop body may be outlined in CodeGen.
@@ -3254,7 +3878,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   QualType VType = LastIteration.get()->getType();
   // Build variables passed into runtime, nesessary for worksharing directives.
   ExprResult LB, UB, IL, ST, EUB;
-  if (isOpenMPWorksharingDirective(DKind)) {
+  if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
+      isOpenMPDistributeDirective(DKind)) {
     // Lower bound variable, initialized with zero.
     VarDecl *LBDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.lb");
     LB = buildDeclRefExpr(SemaRef, LBDecl, VType, InitLoc);
@@ -3302,7 +3927,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   {
     VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv");
     IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc);
-    Expr *RHS = isOpenMPWorksharingDirective(DKind)
+    Expr *RHS = (isOpenMPWorksharingDirective(DKind) ||
+                 isOpenMPTaskLoopDirective(DKind) ||
+                 isOpenMPDistributeDirective(DKind))
                     ? LB.get()
                     : SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get();
     Init = SemaRef.BuildBinOp(CurScope, InitLoc, BO_Assign, IV.get(), RHS);
@@ -3312,7 +3939,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   // Loop condition (IV < NumIterations) or (IV <= UB) for worksharing loops.
   SourceLocation CondLoc;
   ExprResult Cond =
-      isOpenMPWorksharingDirective(DKind)
+      (isOpenMPWorksharingDirective(DKind) ||
+       isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind))
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
@@ -3332,7 +3960,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   // Increments for worksharing loops (LB = LB + ST; UB = UB + ST).
   // Used for directives with static scheduling.
   ExprResult NextLB, NextUB;
-  if (isOpenMPWorksharingDirective(DKind)) {
+  if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
+      isOpenMPDistributeDirective(DKind)) {
     // LB + ST
     NextLB = SemaRef.BuildBinOp(CurScope, IncLoc, BO_Add, LB.get(), ST.get());
     if (!NextLB.isUsable())
@@ -3437,6 +4066,7 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
       }
       // Save results
       Built.Counters[Cnt] = IS.CounterVar;
+      Built.PrivateCounters[Cnt] = IS.PrivateCounterVar;
       Built.Inits[Cnt] = Init.get();
       Built.Updates[Cnt] = Update.get();
       Built.Finals[Cnt] = Final.get();
@@ -3467,26 +4097,60 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *NestedLoopCountExpr,
   return NestedLoopCount;
 }
 
-static Expr *GetCollapseNumberExpr(ArrayRef<OMPClause *> Clauses) {
-  auto &&CollapseFilter = [](const OMPClause *C) -> bool {
-    return C->getClauseKind() == OMPC_collapse;
-  };
-  OMPExecutableDirective::filtered_clause_iterator<decltype(CollapseFilter)> I(
-      Clauses, std::move(CollapseFilter));
-  if (I)
-    return cast<OMPCollapseClause>(*I)->getNumForLoops();
+static Expr *getCollapseNumberExpr(ArrayRef<OMPClause *> Clauses) {
+  auto CollapseClauses =
+      OMPExecutableDirective::getClausesOfKind<OMPCollapseClause>(Clauses);
+  if (CollapseClauses.begin() != CollapseClauses.end())
+    return (*CollapseClauses.begin())->getNumForLoops();
   return nullptr;
 }
 
+static Expr *getOrderedNumberExpr(ArrayRef<OMPClause *> Clauses) {
+  auto OrderedClauses =
+      OMPExecutableDirective::getClausesOfKind<OMPOrderedClause>(Clauses);
+  if (OrderedClauses.begin() != OrderedClauses.end())
+    return (*OrderedClauses.begin())->getNumForLoops();
+  return nullptr;
+}
+
+static bool checkSimdlenSafelenValues(Sema &S, const Expr *Simdlen,
+                                      const Expr *Safelen) {
+  llvm::APSInt SimdlenRes, SafelenRes;
+  if (Simdlen->isValueDependent() || Simdlen->isTypeDependent() ||
+      Simdlen->isInstantiationDependent() ||
+      Simdlen->containsUnexpandedParameterPack())
+    return false;
+  if (Safelen->isValueDependent() || Safelen->isTypeDependent() ||
+      Safelen->isInstantiationDependent() ||
+      Safelen->containsUnexpandedParameterPack())
+    return false;
+  Simdlen->EvaluateAsInt(SimdlenRes, S.Context);
+  Safelen->EvaluateAsInt(SafelenRes, S.Context);
+  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
+  // If both simdlen and safelen clauses are specified, the value of the simdlen
+  // parameter must be less than or equal to the value of the safelen parameter.
+  if (SimdlenRes > SafelenRes) {
+    S.Diag(Simdlen->getExprLoc(), diag::err_omp_wrong_simdlen_safelen_values)
+        << Simdlen->getSourceRange() << Safelen->getSourceRange();
+    return true;
+  }
+  return false;
+}
+
 StmtResult Sema::ActOnOpenMPSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopDirective::HelperExprs B;
-  // In presence of clause 'collapse', it will define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_simd, GetCollapseNumberExpr(Clauses), AStmt, *this,
-                      *DSAStack, VarsWithImplicitDSA, B);
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_simd, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses),
+      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -3503,6 +4167,24 @@ StmtResult Sema::ActOnOpenMPSimdDirective(
     }
   }
 
+  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
+  // If both simdlen and safelen clauses are specified, the value of the simdlen
+  // parameter must be less than or equal to the value of the safelen parameter.
+  OMPSafelenClause *Safelen = nullptr;
+  OMPSimdlenClause *Simdlen = nullptr;
+  for (auto *Clause : Clauses) {
+    if (Clause->getClauseKind() == OMPC_safelen)
+      Safelen = cast<OMPSafelenClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_simdlen)
+      Simdlen = cast<OMPSimdlenClause>(Clause);
+    if (Safelen && Simdlen)
+      break;
+  }
+  if (Simdlen && Safelen &&
+      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
+                                Safelen->getSafelen()))
+    return StmtError();
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
                                   Clauses, AStmt, B);
@@ -3512,31 +4194,52 @@ StmtResult Sema::ActOnOpenMPForDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopDirective::HelperExprs B;
-  // In presence of clause 'collapse', it will define the nested loops number.
-  unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_for, GetCollapseNumberExpr(Clauses), AStmt, *this,
-                      *DSAStack, VarsWithImplicitDSA, B);
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = CheckOpenMPLoop(
+      OMPD_for, getCollapseNumberExpr(Clauses), getOrderedNumberExpr(Clauses),
+      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp for loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope))
+          return StmtError();
+    }
+  }
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
-                                 Clauses, AStmt, B);
+                                 Clauses, AStmt, B, DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPForSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopDirective::HelperExprs B;
-  // In presence of clause 'collapse', it will define the nested loops number.
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_for_simd, GetCollapseNumberExpr(Clauses), AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+      CheckOpenMPLoop(OMPD_for_simd, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -3553,6 +4256,24 @@ StmtResult Sema::ActOnOpenMPForSimdDirective(
     }
   }
 
+  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
+  // If both simdlen and safelen clauses are specified, the value of the simdlen
+  // parameter must be less than or equal to the value of the safelen parameter.
+  OMPSafelenClause *Safelen = nullptr;
+  OMPSimdlenClause *Simdlen = nullptr;
+  for (auto *Clause : Clauses) {
+    if (Clause->getClauseKind() == OMPC_safelen)
+      Safelen = cast<OMPSafelenClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_simdlen)
+      Simdlen = cast<OMPSimdlenClause>(Clause);
+    if (Safelen && Simdlen)
+      break;
+  }
+  if (Simdlen && Safelen &&
+      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
+                                Safelen->getSafelen()))
+    return StmtError();
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPForSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
                                      Clauses, AStmt, B);
@@ -3562,23 +4283,28 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
                                               Stmt *AStmt,
                                               SourceLocation StartLoc,
                                               SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   auto BaseStmt = AStmt;
   while (CapturedStmt *CS = dyn_cast_or_null<CapturedStmt>(BaseStmt))
     BaseStmt = CS->getCapturedStmt();
   if (auto C = dyn_cast_or_null<CompoundStmt>(BaseStmt)) {
     auto S = C->children();
-    if (!S)
+    if (S.begin() == S.end())
       return StmtError();
     // All associated statements must be '#pragma omp section' except for
     // the first one.
-    for (Stmt *SectionStmt : ++S) {
+    for (Stmt *SectionStmt : llvm::make_range(std::next(S.begin()), S.end())) {
       if (!SectionStmt || !isa<OMPSectionDirective>(SectionStmt)) {
         if (SectionStmt)
           Diag(SectionStmt->getLocStart(),
                diag::err_omp_sections_substmt_not_section);
         return StmtError();
       }
+      cast<OMPSectionDirective>(SectionStmt)
+          ->setHasCancel(DSAStack->isCancelRegion());
     }
   } else {
     Diag(AStmt->getLocStart(), diag::err_omp_sections_not_compound_stmt);
@@ -3587,25 +4313,33 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                      AStmt);
+  return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPSectionDirective(Stmt *AStmt,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
   getCurFunction()->setHasBranchProtectedScope();
+  DSAStack->setParentCancelRegion(DSAStack->isCancelRegion());
 
-  return OMPSectionDirective::Create(Context, StartLoc, EndLoc, AStmt);
+  return OMPSectionDirective::Create(Context, StartLoc, EndLoc, AStmt,
+                                     DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPSingleDirective(ArrayRef<OMPClause *> Clauses,
                                             Stmt *AStmt,
                                             SourceLocation StartLoc,
                                             SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
   getCurFunction()->setHasBranchProtectedScope();
 
@@ -3632,30 +4366,81 @@ StmtResult Sema::ActOnOpenMPSingleDirective(ArrayRef<OMPClause *> Clauses,
 StmtResult Sema::ActOnOpenMPMasterDirective(Stmt *AStmt,
                                             SourceLocation StartLoc,
                                             SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
   getCurFunction()->setHasBranchProtectedScope();
 
   return OMPMasterDirective::Create(Context, StartLoc, EndLoc, AStmt);
 }
 
-StmtResult
-Sema::ActOnOpenMPCriticalDirective(const DeclarationNameInfo &DirName,
-                                   Stmt *AStmt, SourceLocation StartLoc,
-                                   SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+StmtResult Sema::ActOnOpenMPCriticalDirective(
+    const DeclarationNameInfo &DirName, ArrayRef<OMPClause *> Clauses,
+    Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+
+  bool ErrorFound = false;
+  llvm::APSInt Hint;
+  SourceLocation HintLoc;
+  bool DependentHint = false;
+  for (auto *C : Clauses) {
+    if (C->getClauseKind() == OMPC_hint) {
+      if (!DirName.getName()) {
+        Diag(C->getLocStart(), diag::err_omp_hint_clause_no_name);
+        ErrorFound = true;
+      }
+      Expr *E = cast<OMPHintClause>(C)->getHint();
+      if (E->isTypeDependent() || E->isValueDependent() ||
+          E->isInstantiationDependent())
+        DependentHint = true;
+      else {
+        Hint = E->EvaluateKnownConstInt(Context);
+        HintLoc = C->getLocStart();
+      }
+    }
+  }
+  if (ErrorFound)
+    return StmtError();
+  auto Pair = DSAStack->getCriticalWithHint(DirName);
+  if (Pair.first && DirName.getName() && !DependentHint) {
+    if (llvm::APSInt::compareValues(Hint, Pair.second) != 0) {
+      Diag(StartLoc, diag::err_omp_critical_with_hint);
+      if (HintLoc.isValid()) {
+        Diag(HintLoc, diag::note_omp_critical_hint_here)
+            << 0 << Hint.toString(/*Radix=*/10, /*Signed=*/false);
+      } else
+        Diag(StartLoc, diag::note_omp_critical_no_hint) << 0;
+      if (auto *C = Pair.first->getSingleClause<OMPHintClause>()) {
+        Diag(C->getLocStart(), diag::note_omp_critical_hint_here)
+            << 1
+            << C->getHint()->EvaluateKnownConstInt(Context).toString(
+                   /*Radix=*/10, /*Signed=*/false);
+      } else
+        Diag(Pair.first->getLocStart(), diag::note_omp_critical_no_hint) << 1;
+    }
+  }
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPCriticalDirective::Create(Context, DirName, StartLoc, EndLoc,
-                                      AStmt);
+  auto *Dir = OMPCriticalDirective::Create(Context, DirName, StartLoc, EndLoc,
+                                           Clauses, AStmt);
+  if (!Pair.first && DirName.getName() && !DependentHint)
+    DSAStack->addCriticalWithHint(Dir, Hint);
+  return Dir;
 }
 
 StmtResult Sema::ActOnOpenMPParallelForDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   CapturedStmt *CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -3665,26 +4450,41 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
   CS->getCapturedDecl()->setNothrow();
 
   OMPLoopDirective::HelperExprs B;
-  // In presence of clause 'collapse', it will define the nested loops number.
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_parallel_for, GetCollapseNumberExpr(Clauses), AStmt,
-                      *this, *DSAStack, VarsWithImplicitDSA, B);
+      CheckOpenMPLoop(OMPD_parallel_for, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
   assert((CurContext->isDependentContext() || B.builtAll()) &&
          "omp parallel for loop exprs were not built");
 
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (auto C : Clauses) {
+      if (auto LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope))
+          return StmtError();
+    }
+  }
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
-                                         NestedLoopCount, Clauses, AStmt, B);
+                                         NestedLoopCount, Clauses, AStmt, B,
+                                         DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc,
     llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   CapturedStmt *CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -3694,10 +4494,12 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
   CS->getCapturedDecl()->setNothrow();
 
   OMPLoopDirective::HelperExprs B;
-  // In presence of clause 'collapse', it will define the nested loops number.
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
   unsigned NestedLoopCount =
-      CheckOpenMPLoop(OMPD_parallel_for_simd, GetCollapseNumberExpr(Clauses),
-                      AStmt, *this, *DSAStack, VarsWithImplicitDSA, B);
+      CheckOpenMPLoop(OMPD_parallel_for_simd, getCollapseNumberExpr(Clauses),
+                      getOrderedNumberExpr(Clauses), AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
   if (NestedLoopCount == 0)
     return StmtError();
 
@@ -3711,6 +4513,24 @@ StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
     }
   }
 
+  // OpenMP 4.1 [2.8.1, simd Construct, Restrictions]
+  // If both simdlen and safelen clauses are specified, the value of the simdlen
+  // parameter must be less than or equal to the value of the safelen parameter.
+  OMPSafelenClause *Safelen = nullptr;
+  OMPSimdlenClause *Simdlen = nullptr;
+  for (auto *Clause : Clauses) {
+    if (Clause->getClauseKind() == OMPC_safelen)
+      Safelen = cast<OMPSafelenClause>(Clause);
+    else if (Clause->getClauseKind() == OMPC_simdlen)
+      Simdlen = cast<OMPSimdlenClause>(Clause);
+    if (Safelen && Simdlen)
+      break;
+  }
+  if (Simdlen && Safelen &&
+      checkSimdlenSafelenValues(*this, Simdlen->getSimdlen(),
+                                Safelen->getSafelen()))
+    return StmtError();
+
   getCurFunction()->setHasBranchProtectedScope();
   return OMPParallelForSimdDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
@@ -3720,23 +4540,28 @@ StmtResult
 Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
                                            Stmt *AStmt, SourceLocation StartLoc,
                                            SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   auto BaseStmt = AStmt;
   while (CapturedStmt *CS = dyn_cast_or_null<CapturedStmt>(BaseStmt))
     BaseStmt = CS->getCapturedStmt();
   if (auto C = dyn_cast_or_null<CompoundStmt>(BaseStmt)) {
     auto S = C->children();
-    if (!S)
+    if (S.begin() == S.end())
       return StmtError();
     // All associated statements must be '#pragma omp section' except for
     // the first one.
-    for (Stmt *SectionStmt : ++S) {
+    for (Stmt *SectionStmt : llvm::make_range(std::next(S.begin()), S.end())) {
       if (!SectionStmt || !isa<OMPSectionDirective>(SectionStmt)) {
         if (SectionStmt)
           Diag(SectionStmt->getLocStart(),
                diag::err_omp_parallel_sections_substmt_not_section);
         return StmtError();
       }
+      cast<OMPSectionDirective>(SectionStmt)
+          ->setHasCancel(DSAStack->isCancelRegion());
     }
   } else {
     Diag(AStmt->getLocStart(),
@@ -3746,14 +4571,16 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPParallelSectionsDirective::Create(Context, StartLoc, EndLoc,
-                                              Clauses, AStmt);
+  return OMPParallelSectionsDirective::Create(
+      Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTaskDirective(ArrayRef<OMPClause *> Clauses,
                                           Stmt *AStmt, SourceLocation StartLoc,
                                           SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   CapturedStmt *CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -3764,7 +4591,8 @@ StmtResult Sema::ActOnOpenMPTaskDirective(ArrayRef<OMPClause *> Clauses,
 
   getCurFunction()->setHasBranchProtectedScope();
 
-  return OMPTaskDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
+  return OMPTaskDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                  DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTaskyieldDirective(SourceLocation StartLoc,
@@ -3785,7 +4613,10 @@ StmtResult Sema::ActOnOpenMPTaskwaitDirective(SourceLocation StartLoc,
 StmtResult Sema::ActOnOpenMPTaskgroupDirective(Stmt *AStmt,
                                                SourceLocation StartLoc,
                                                SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
   getCurFunction()->setHasBranchProtectedScope();
 
@@ -3799,14 +4630,79 @@ StmtResult Sema::ActOnOpenMPFlushDirective(ArrayRef<OMPClause *> Clauses,
   return OMPFlushDirective::Create(Context, StartLoc, EndLoc, Clauses);
 }
 
-StmtResult Sema::ActOnOpenMPOrderedDirective(Stmt *AStmt,
+StmtResult Sema::ActOnOpenMPOrderedDirective(ArrayRef<OMPClause *> Clauses,
+                                             Stmt *AStmt,
                                              SourceLocation StartLoc,
                                              SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  OMPClause *DependFound = nullptr;
+  OMPClause *DependSourceClause = nullptr;
+  OMPClause *DependSinkClause = nullptr;
+  bool ErrorFound = false;
+  OMPThreadsClause *TC = nullptr;
+  OMPSIMDClause *SC = nullptr;
+  for (auto *C : Clauses) {
+    if (auto *DC = dyn_cast<OMPDependClause>(C)) {
+      DependFound = C;
+      if (DC->getDependencyKind() == OMPC_DEPEND_source) {
+        if (DependSourceClause) {
+          Diag(C->getLocStart(), diag::err_omp_more_one_clause)
+              << getOpenMPDirectiveName(OMPD_ordered)
+              << getOpenMPClauseName(OMPC_depend) << 2;
+          ErrorFound = true;
+        } else
+          DependSourceClause = C;
+        if (DependSinkClause) {
+          Diag(C->getLocStart(), diag::err_omp_depend_sink_source_not_allowed)
+              << 0;
+          ErrorFound = true;
+        }
+      } else if (DC->getDependencyKind() == OMPC_DEPEND_sink) {
+        if (DependSourceClause) {
+          Diag(C->getLocStart(), diag::err_omp_depend_sink_source_not_allowed)
+              << 1;
+          ErrorFound = true;
+        }
+        DependSinkClause = C;
+      }
+    } else if (C->getClauseKind() == OMPC_threads)
+      TC = cast<OMPThreadsClause>(C);
+    else if (C->getClauseKind() == OMPC_simd)
+      SC = cast<OMPSIMDClause>(C);
+  }
+  if (!ErrorFound && !SC &&
+      isOpenMPSimdDirective(DSAStack->getParentDirective())) {
+    // OpenMP [2.8.1,simd Construct, Restrictions]
+    // An ordered construct with the simd clause is the only OpenMP construct
+    // that can appear in the simd region.
+    Diag(StartLoc, diag::err_omp_prohibited_region_simd);
+    ErrorFound = true;
+  } else if (DependFound && (TC || SC)) {
+    Diag(DependFound->getLocStart(), diag::err_omp_depend_clause_thread_simd)
+        << getOpenMPClauseName(TC ? TC->getClauseKind() : SC->getClauseKind());
+    ErrorFound = true;
+  } else if (DependFound && !DSAStack->getParentOrderedRegionParam()) {
+    Diag(DependFound->getLocStart(),
+         diag::err_omp_ordered_directive_without_param);
+    ErrorFound = true;
+  } else if (TC || Clauses.empty()) {
+    if (auto *Param = DSAStack->getParentOrderedRegionParam()) {
+      SourceLocation ErrLoc = TC ? TC->getLocStart() : StartLoc;
+      Diag(ErrLoc, diag::err_omp_ordered_directive_with_param)
+          << (TC != nullptr);
+      Diag(Param->getLocStart(), diag::note_omp_ordered_param);
+      ErrorFound = true;
+    }
+  }
+  if ((!AStmt && !DependFound) || ErrorFound)
+    return StmtError();
 
-  getCurFunction()->setHasBranchProtectedScope();
+  if (AStmt) {
+    assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
 
-  return OMPOrderedDirective::Create(Context, StartLoc, EndLoc, AStmt);
+    getCurFunction()->setHasBranchProtectedScope();
+  }
+
+  return OMPOrderedDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
 }
 
 namespace {
@@ -4006,7 +4902,7 @@ bool OpenMPAtomicUpdateChecker::checkStatement(Stmt *S, unsigned DiagId,
           NoteLoc = AtomicUnaryOp->getOperatorLoc();
           NoteRange = SourceRange(NoteLoc, NoteLoc);
         }
-      } else {
+      } else if (!AtomicBody->isInstantiationDependent()) {
         ErrorFound = NotABinaryOrUnaryExpression;
         NoteLoc = ErrorLoc = AtomicBody->getExprLoc();
         NoteRange = ErrorRange = AtomicBody->getSourceRange();
@@ -4053,7 +4949,9 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
                                             Stmt *AStmt,
                                             SourceLocation StartLoc,
                                             SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   auto CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -4150,7 +5048,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
           NoteLoc = NotScalarExpr->getExprLoc();
           NoteRange = NotScalarExpr->getSourceRange();
         }
-      } else {
+      } else if (!AtomicBody->isInstantiationDependent()) {
         ErrorFound = NotAnAssignmentOp;
         ErrorLoc = AtomicBody->getExprLoc();
         ErrorRange = AtomicBody->getSourceRange();
@@ -4211,7 +5109,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
           NoteLoc = NotScalarExpr->getExprLoc();
           NoteRange = NotScalarExpr->getSourceRange();
         }
-      } else {
+      } else if (!AtomicBody->isInstantiationDependent()) {
         ErrorFound = NotAnAssignmentOp;
         ErrorLoc = AtomicBody->getExprLoc();
         ErrorRange = AtomicBody->getSourceRange();
@@ -4289,7 +5187,7 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
         UE = Checker.getUpdateExpr();
         IsXLHSInRHSPart = Checker.isXLHSInRHSPart();
         IsPostfixUpdate = Checker.isPostfixUpdate();
-      } else {
+      } else if (!AtomicBody->isInstantiationDependent()) {
         ErrorLoc = AtomicBody->getExprLoc();
         ErrorRange = AtomicBody->getSourceRange();
         NoteLoc = AtomicBinOp ? AtomicBinOp->getOperatorLoc()
@@ -4396,46 +5294,54 @@ StmtResult Sema::ActOnOpenMPAtomicDirective(ArrayRef<OMPClause *> Clauses,
           }
           if (!IsUpdateExprFound) {
             //  { v = x; x = expr; }
-            auto *FirstBinOp = dyn_cast<BinaryOperator>(First);
-            if (!FirstBinOp || FirstBinOp->getOpcode() != BO_Assign) {
-              ErrorFound = NotAnAssignmentOp;
-              NoteLoc = ErrorLoc = FirstBinOp ? FirstBinOp->getOperatorLoc()
-                                              : First->getLocStart();
-              NoteRange = ErrorRange = FirstBinOp
-                                           ? FirstBinOp->getSourceRange()
-                                           : SourceRange(ErrorLoc, ErrorLoc);
-            } else {
-              auto *SecondBinOp = dyn_cast<BinaryOperator>(Second);
-              if (!SecondBinOp || SecondBinOp->getOpcode() != BO_Assign) {
+            auto *FirstExpr = dyn_cast<Expr>(First);
+            auto *SecondExpr = dyn_cast<Expr>(Second);
+            if (!FirstExpr || !SecondExpr ||
+                !(FirstExpr->isInstantiationDependent() ||
+                  SecondExpr->isInstantiationDependent())) {
+              auto *FirstBinOp = dyn_cast<BinaryOperator>(First);
+              if (!FirstBinOp || FirstBinOp->getOpcode() != BO_Assign) {
                 ErrorFound = NotAnAssignmentOp;
-                NoteLoc = ErrorLoc = SecondBinOp ? SecondBinOp->getOperatorLoc()
-                                                 : Second->getLocStart();
-                NoteRange = ErrorRange = SecondBinOp
-                                             ? SecondBinOp->getSourceRange()
+                NoteLoc = ErrorLoc = FirstBinOp ? FirstBinOp->getOperatorLoc()
+                                                : First->getLocStart();
+                NoteRange = ErrorRange = FirstBinOp
+                                             ? FirstBinOp->getSourceRange()
                                              : SourceRange(ErrorLoc, ErrorLoc);
               } else {
-                auto *PossibleXRHSInFirst =
-                    FirstBinOp->getRHS()->IgnoreParenImpCasts();
-                auto *PossibleXLHSInSecond =
-                    SecondBinOp->getLHS()->IgnoreParenImpCasts();
-                llvm::FoldingSetNodeID X1Id, X2Id;
-                PossibleXRHSInFirst->Profile(X1Id, Context, /*Canonical=*/true);
-                PossibleXLHSInSecond->Profile(X2Id, Context,
-                                              /*Canonical=*/true);
-                IsUpdateExprFound = X1Id == X2Id;
-                if (IsUpdateExprFound) {
-                  V = FirstBinOp->getLHS();
-                  X = SecondBinOp->getLHS();
-                  E = SecondBinOp->getRHS();
-                  UE = nullptr;
-                  IsXLHSInRHSPart = false;
-                  IsPostfixUpdate = true;
+                auto *SecondBinOp = dyn_cast<BinaryOperator>(Second);
+                if (!SecondBinOp || SecondBinOp->getOpcode() != BO_Assign) {
+                  ErrorFound = NotAnAssignmentOp;
+                  NoteLoc = ErrorLoc = SecondBinOp
+                                           ? SecondBinOp->getOperatorLoc()
+                                           : Second->getLocStart();
+                  NoteRange = ErrorRange =
+                      SecondBinOp ? SecondBinOp->getSourceRange()
+                                  : SourceRange(ErrorLoc, ErrorLoc);
                 } else {
-                  ErrorFound = NotASpecificExpression;
-                  ErrorLoc = FirstBinOp->getExprLoc();
-                  ErrorRange = FirstBinOp->getSourceRange();
-                  NoteLoc = SecondBinOp->getLHS()->getExprLoc();
-                  NoteRange = SecondBinOp->getRHS()->getSourceRange();
+                  auto *PossibleXRHSInFirst =
+                      FirstBinOp->getRHS()->IgnoreParenImpCasts();
+                  auto *PossibleXLHSInSecond =
+                      SecondBinOp->getLHS()->IgnoreParenImpCasts();
+                  llvm::FoldingSetNodeID X1Id, X2Id;
+                  PossibleXRHSInFirst->Profile(X1Id, Context,
+                                               /*Canonical=*/true);
+                  PossibleXLHSInSecond->Profile(X2Id, Context,
+                                                /*Canonical=*/true);
+                  IsUpdateExprFound = X1Id == X2Id;
+                  if (IsUpdateExprFound) {
+                    V = FirstBinOp->getLHS();
+                    X = SecondBinOp->getLHS();
+                    E = SecondBinOp->getRHS();
+                    UE = nullptr;
+                    IsXLHSInRHSPart = false;
+                    IsPostfixUpdate = true;
+                  } else {
+                    ErrorFound = NotASpecificExpression;
+                    ErrorLoc = FirstBinOp->getExprLoc();
+                    ErrorRange = FirstBinOp->getSourceRange();
+                    NoteLoc = SecondBinOp->getLHS()->getExprLoc();
+                    NoteRange = SecondBinOp->getRHS()->getSourceRange();
+                  }
                 }
               }
             }
@@ -4474,7 +5380,16 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
                                             Stmt *AStmt,
                                             SourceLocation StartLoc,
                                             SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
+  CapturedStmt *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
 
   // OpenMP [2.16, Nesting of Regions]
   // If specified, a teams construct must be contained within a target
@@ -4511,10 +5426,27 @@ StmtResult Sema::ActOnOpenMPTargetDirective(ArrayRef<OMPClause *> Clauses,
   return OMPTargetDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt);
 }
 
+StmtResult Sema::ActOnOpenMPTargetDataDirective(ArrayRef<OMPClause *> Clauses,
+                                                Stmt *AStmt,
+                                                SourceLocation StartLoc,
+                                                SourceLocation EndLoc) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+
+  getCurFunction()->setHasBranchProtectedScope();
+
+  return OMPTargetDataDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                        AStmt);
+}
+
 StmtResult Sema::ActOnOpenMPTeamsDirective(ArrayRef<OMPClause *> Clauses,
                                            Stmt *AStmt, SourceLocation StartLoc,
                                            SourceLocation EndLoc) {
-  assert(AStmt && isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  if (!AStmt)
+    return StmtError();
+
   CapturedStmt *CS = cast<CapturedStmt>(AStmt);
   // 1.2.2 OpenMP Language Terminology
   // Structured block - An executable statement with a single entry at the
@@ -4550,7 +5482,8 @@ Sema::ActOnOpenMPCancellationPointDirective(SourceLocation StartLoc,
                                                CancelRegion);
 }
 
-StmtResult Sema::ActOnOpenMPCancelDirective(SourceLocation StartLoc,
+StmtResult Sema::ActOnOpenMPCancelDirective(ArrayRef<OMPClause *> Clauses,
+                                            SourceLocation StartLoc,
                                             SourceLocation EndLoc,
                                             OpenMPDirectiveKind CancelRegion) {
   if (CancelRegion != OMPD_parallel && CancelRegion != OMPD_for &&
@@ -4567,7 +5500,123 @@ StmtResult Sema::ActOnOpenMPCancelDirective(SourceLocation StartLoc,
     Diag(StartLoc, diag::err_omp_parent_cancel_region_ordered) << 1;
     return StmtError();
   }
-  return OMPCancelDirective::Create(Context, StartLoc, EndLoc, CancelRegion);
+  DSAStack->setParentCancelRegion(/*Cancel=*/true);
+  return OMPCancelDirective::Create(Context, StartLoc, EndLoc, Clauses,
+                                    CancelRegion);
+}
+
+static bool checkGrainsizeNumTasksClauses(Sema &S,
+                                          ArrayRef<OMPClause *> Clauses) {
+  OMPClause *PrevClause = nullptr;
+  bool ErrorFound = false;
+  for (auto *C : Clauses) {
+    if (C->getClauseKind() == OMPC_grainsize ||
+        C->getClauseKind() == OMPC_num_tasks) {
+      if (!PrevClause)
+        PrevClause = C;
+      else if (PrevClause->getClauseKind() != C->getClauseKind()) {
+        S.Diag(C->getLocStart(),
+               diag::err_omp_grainsize_num_tasks_mutually_exclusive)
+            << getOpenMPClauseName(C->getClauseKind())
+            << getOpenMPClauseName(PrevClause->getClauseKind());
+        S.Diag(PrevClause->getLocStart(),
+               diag::note_omp_previous_grainsize_num_tasks)
+            << getOpenMPClauseName(PrevClause->getClauseKind());
+        ErrorFound = true;
+      }
+    }
+  }
+  return ErrorFound;
+}
+
+StmtResult Sema::ActOnOpenMPTaskLoopDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_taskloop, getCollapseNumberExpr(Clauses),
+                      /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // The grainsize clause and num_tasks clause are mutually exclusive and may
+  // not appear on the same taskloop directive.
+  if (checkGrainsizeNumTasksClauses(*this, Clauses))
+    return StmtError();
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPTaskLoopDirective::Create(Context, StartLoc, EndLoc,
+                                      NestedLoopCount, Clauses, AStmt, B);
+}
+
+StmtResult Sema::ActOnOpenMPTaskLoopSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_taskloop_simd, getCollapseNumberExpr(Clauses),
+                      /*OrderedLoopCountExpr=*/nullptr, AStmt, *this, *DSAStack,
+                      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // The grainsize clause and num_tasks clause are mutually exclusive and may
+  // not appear on the same taskloop directive.
+  if (checkGrainsizeNumTasksClauses(*this, Clauses))
+    return StmtError();
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPTaskLoopSimdDirective::Create(Context, StartLoc, EndLoc,
+                                          NestedLoopCount, Clauses, AStmt, B);
+}
+
+StmtResult Sema::ActOnOpenMPDistributeDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc,
+    llvm::DenseMap<VarDecl *, Expr *> &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount =
+      CheckOpenMPLoop(OMPD_distribute, getCollapseNumberExpr(Clauses),
+                      nullptr /*ordered not a clause on distribute*/, AStmt,
+                      *this, *DSAStack, VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  getCurFunction()->setHasBranchProtectedScope();
+  return OMPDistributeDirective::Create(Context, StartLoc, EndLoc,
+                                        NestedLoopCount, Clauses, AStmt, B);
 }
 
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
@@ -4576,9 +5625,6 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation EndLoc) {
   OMPClause *Res = nullptr;
   switch (Kind) {
-  case OMPC_if:
-    Res = ActOnOpenMPIfClause(Expr, StartLoc, LParenLoc, EndLoc);
-    break;
   case OMPC_final:
     Res = ActOnOpenMPFinalClause(Expr, StartLoc, LParenLoc, EndLoc);
     break;
@@ -4588,9 +5634,37 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_safelen:
     Res = ActOnOpenMPSafelenClause(Expr, StartLoc, LParenLoc, EndLoc);
     break;
+  case OMPC_simdlen:
+    Res = ActOnOpenMPSimdlenClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
   case OMPC_collapse:
     Res = ActOnOpenMPCollapseClause(Expr, StartLoc, LParenLoc, EndLoc);
     break;
+  case OMPC_ordered:
+    Res = ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Expr);
+    break;
+  case OMPC_device:
+    Res = ActOnOpenMPDeviceClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_num_teams:
+    Res = ActOnOpenMPNumTeamsClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_thread_limit:
+    Res = ActOnOpenMPThreadLimitClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_priority:
+    Res = ActOnOpenMPPriorityClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_grainsize:
+    Res = ActOnOpenMPGrainsizeClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_num_tasks:
+    Res = ActOnOpenMPNumTasksClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_hint:
+    Res = ActOnOpenMPHintClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_if:
   case OMPC_default:
   case OMPC_proc_bind:
   case OMPC_schedule:
@@ -4603,7 +5677,6 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_aligned:
   case OMPC_copyin:
   case OMPC_copyprivate:
-  case OMPC_ordered:
   case OMPC_nowait:
   case OMPC_untied:
   case OMPC_mergeable:
@@ -4615,14 +5688,21 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_capture:
   case OMPC_seq_cst:
   case OMPC_depend:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_nogroup:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
 }
 
-OMPClause *Sema::ActOnOpenMPIfClause(Expr *Condition, SourceLocation StartLoc,
+OMPClause *Sema::ActOnOpenMPIfClause(OpenMPDirectiveKind NameModifier,
+                                     Expr *Condition, SourceLocation StartLoc,
                                      SourceLocation LParenLoc,
+                                     SourceLocation NameModifierLoc,
+                                     SourceLocation ColonLoc,
                                      SourceLocation EndLoc) {
   Expr *ValExpr = Condition;
   if (!Condition->isValueDependent() && !Condition->isTypeDependent() &&
@@ -4636,7 +5716,8 @@ OMPClause *Sema::ActOnOpenMPIfClause(Expr *Condition, SourceLocation StartLoc,
     ValExpr = Val.get();
   }
 
-  return new (Context) OMPIfClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+  return new (Context) OMPIfClause(NameModifier, ValExpr, StartLoc, LParenLoc,
+                                   NameModifierLoc, ColonLoc, EndLoc);
 }
 
 OMPClause *Sema::ActOnOpenMPFinalClause(Expr *Condition,
@@ -4701,38 +5782,52 @@ ExprResult Sema::PerformOpenMPImplicitIntegerConversion(SourceLocation Loc,
   return PerformContextualImplicitConversion(Loc, Op, ConvertDiagnoser);
 }
 
+static bool IsNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef,
+                                      OpenMPClauseKind CKind,
+                                      bool StrictlyPositive) {
+  if (!ValExpr->isTypeDependent() && !ValExpr->isValueDependent() &&
+      !ValExpr->isInstantiationDependent()) {
+    SourceLocation Loc = ValExpr->getExprLoc();
+    ExprResult Value =
+        SemaRef.PerformOpenMPImplicitIntegerConversion(Loc, ValExpr);
+    if (Value.isInvalid())
+      return false;
+
+    ValExpr = Value.get();
+    // The expression must evaluate to a non-negative integer value.
+    llvm::APSInt Result;
+    if (ValExpr->isIntegerConstantExpr(Result, SemaRef.Context) &&
+        Result.isSigned() &&
+        !((!StrictlyPositive && Result.isNonNegative()) ||
+          (StrictlyPositive && Result.isStrictlyPositive()))) {
+      SemaRef.Diag(Loc, diag::err_omp_negative_expression_in_clause)
+          << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
+          << ValExpr->getSourceRange();
+      return false;
+    }
+  }
+  return true;
+}
+
 OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
                                              SourceLocation EndLoc) {
   Expr *ValExpr = NumThreads;
-  if (!NumThreads->isValueDependent() && !NumThreads->isTypeDependent() &&
-      !NumThreads->containsUnexpandedParameterPack()) {
-    SourceLocation NumThreadsLoc = NumThreads->getLocStart();
-    ExprResult Val =
-        PerformOpenMPImplicitIntegerConversion(NumThreadsLoc, NumThreads);
-    if (Val.isInvalid())
-      return nullptr;
 
-    ValExpr = Val.get();
-
-    // OpenMP [2.5, Restrictions]
-    //  The num_threads expression must evaluate to a positive integer value.
-    llvm::APSInt Result;
-    if (ValExpr->isIntegerConstantExpr(Result, Context) && Result.isSigned() &&
-        !Result.isStrictlyPositive()) {
-      Diag(NumThreadsLoc, diag::err_omp_negative_expression_in_clause)
-          << "num_threads" << NumThreads->getSourceRange();
-      return nullptr;
-    }
-  }
+  // OpenMP [2.5, Restrictions]
+  //  The num_threads expression must evaluate to a positive integer value.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_num_threads,
+                                 /*StrictlyPositive=*/true))
+    return nullptr;
 
   return new (Context)
       OMPNumThreadsClause(ValExpr, StartLoc, LParenLoc, EndLoc);
 }
 
 ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
-                                                       OpenMPClauseKind CKind) {
+                                                       OpenMPClauseKind CKind,
+                                                       bool StrictlyPositive) {
   if (!E)
     return ExprError();
   if (E->isValueDependent() || E->isTypeDependent() ||
@@ -4742,9 +5837,11 @@ ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
   ExprResult ICE = VerifyIntegerConstantExpression(E, &Result);
   if (ICE.isInvalid())
     return ExprError();
-  if (!Result.isStrictlyPositive()) {
+  if ((StrictlyPositive && !Result.isStrictlyPositive()) ||
+      (!StrictlyPositive && !Result.isNonNegative())) {
     Diag(E->getExprLoc(), diag::err_omp_negative_expression_in_clause)
-        << getOpenMPClauseName(CKind) << E->getSourceRange();
+        << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
+        << E->getSourceRange();
     return ExprError();
   }
   if (CKind == OMPC_aligned && !Result.isPowerOf2()) {
@@ -4752,9 +5849,10 @@ ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
         << E->getSourceRange();
     return ExprError();
   }
-  if (CKind == OMPC_collapse) {
-    DSAStack->setCollapseNumber(Result.getExtValue());
-  }
+  if (CKind == OMPC_collapse && DSAStack->getAssociatedLoops() == 1)
+    DSAStack->setAssociatedLoops(Result.getExtValue());
+  else if (CKind == OMPC_ordered)
+    DSAStack->setAssociatedLoops(Result.getExtValue());
   return ICE;
 }
 
@@ -4771,6 +5869,19 @@ OMPClause *Sema::ActOnOpenMPSafelenClause(Expr *Len, SourceLocation StartLoc,
       OMPSafelenClause(Safelen.get(), StartLoc, LParenLoc, EndLoc);
 }
 
+OMPClause *Sema::ActOnOpenMPSimdlenClause(Expr *Len, SourceLocation StartLoc,
+                                          SourceLocation LParenLoc,
+                                          SourceLocation EndLoc) {
+  // OpenMP [2.8.1, simd construct, Description]
+  // The parameter of the simdlen clause must be a constant
+  // positive integer expression.
+  ExprResult Simdlen = VerifyPositiveIntegerConstantInClause(Len, OMPC_simdlen);
+  if (Simdlen.isInvalid())
+    return nullptr;
+  return new (Context)
+      OMPSimdlenClause(Simdlen.get(), StartLoc, LParenLoc, EndLoc);
+}
+
 OMPClause *Sema::ActOnOpenMPCollapseClause(Expr *NumForLoops,
                                            SourceLocation StartLoc,
                                            SourceLocation LParenLoc,
@@ -4788,6 +5899,28 @@ OMPClause *Sema::ActOnOpenMPCollapseClause(Expr *NumForLoops,
       OMPCollapseClause(NumForLoopsResult.get(), StartLoc, LParenLoc, EndLoc);
 }
 
+OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc,
+                                          SourceLocation EndLoc,
+                                          SourceLocation LParenLoc,
+                                          Expr *NumForLoops) {
+  // OpenMP [2.7.1, loop construct, Description]
+  // OpenMP [2.8.1, simd construct, Description]
+  // OpenMP [2.9.6, distribute construct, Description]
+  // The parameter of the ordered clause must be a constant
+  // positive integer expression if any.
+  if (NumForLoops && LParenLoc.isValid()) {
+    ExprResult NumForLoopsResult =
+        VerifyPositiveIntegerConstantInClause(NumForLoops, OMPC_ordered);
+    if (NumForLoopsResult.isInvalid())
+      return nullptr;
+    NumForLoops = NumForLoopsResult.get();
+  } else
+    NumForLoops = nullptr;
+  DSAStack->setOrderedRegion(/*IsOrdered=*/true, NumForLoops);
+  return new (Context)
+      OMPOrderedClause(NumForLoops, StartLoc, LParenLoc, EndLoc);
+}
+
 OMPClause *Sema::ActOnOpenMPSimpleClause(
     OpenMPClauseKind Kind, unsigned Argument, SourceLocation ArgumentLoc,
     SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) {
@@ -4807,6 +5940,7 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_schedule:
   case OMPC_private:
@@ -4830,39 +5964,58 @@ OMPClause *Sema::ActOnOpenMPSimpleClause(
   case OMPC_capture:
   case OMPC_seq_cst:
   case OMPC_depend:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
 }
 
+static std::string
+getListOfPossibleValues(OpenMPClauseKind K, unsigned First, unsigned Last,
+                        ArrayRef<unsigned> Exclude = llvm::None) {
+  std::string Values;
+  unsigned Bound = Last >= 2 ? Last - 2 : 0;
+  unsigned Skipped = Exclude.size();
+  auto S = Exclude.begin(), E = Exclude.end();
+  for (unsigned i = First; i < Last; ++i) {
+    if (std::find(S, E, i) != E) {
+      --Skipped;
+      continue;
+    }
+    Values += "'";
+    Values += getOpenMPSimpleClauseTypeName(K, i);
+    Values += "'";
+    if (i == Bound - Skipped)
+      Values += " or ";
+    else if (i != Bound + 1 - Skipped)
+      Values += ", ";
+  }
+  return Values;
+}
+
 OMPClause *Sema::ActOnOpenMPDefaultClause(OpenMPDefaultClauseKind Kind,
                                           SourceLocation KindKwLoc,
                                           SourceLocation StartLoc,
                                           SourceLocation LParenLoc,
                                           SourceLocation EndLoc) {
   if (Kind == OMPC_DEFAULT_unknown) {
-    std::string Values;
     static_assert(OMPC_DEFAULT_unknown > 0,
                   "OMPC_DEFAULT_unknown not greater than 0");
-    std::string Sep(", ");
-    for (unsigned i = 0; i < OMPC_DEFAULT_unknown; ++i) {
-      Values += "'";
-      Values += getOpenMPSimpleClauseTypeName(OMPC_default, i);
-      Values += "'";
-      switch (i) {
-      case OMPC_DEFAULT_unknown - 2:
-        Values += " or ";
-        break;
-      case OMPC_DEFAULT_unknown - 1:
-        break;
-      default:
-        Values += Sep;
-        break;
-      }
-    }
     Diag(KindKwLoc, diag::err_omp_unexpected_clause_value)
-        << Values << getOpenMPClauseName(OMPC_default);
+        << getListOfPossibleValues(OMPC_default, /*First=*/0,
+                                   /*Last=*/OMPC_DEFAULT_unknown)
+        << getOpenMPClauseName(OMPC_default);
     return nullptr;
   }
   switch (Kind) {
@@ -4886,25 +6039,10 @@ OMPClause *Sema::ActOnOpenMPProcBindClause(OpenMPProcBindClauseKind Kind,
                                            SourceLocation LParenLoc,
                                            SourceLocation EndLoc) {
   if (Kind == OMPC_PROC_BIND_unknown) {
-    std::string Values;
-    std::string Sep(", ");
-    for (unsigned i = 0; i < OMPC_PROC_BIND_unknown; ++i) {
-      Values += "'";
-      Values += getOpenMPSimpleClauseTypeName(OMPC_proc_bind, i);
-      Values += "'";
-      switch (i) {
-      case OMPC_PROC_BIND_unknown - 2:
-        Values += " or ";
-        break;
-      case OMPC_PROC_BIND_unknown - 1:
-        break;
-      default:
-        Values += Sep;
-        break;
-      }
-    }
     Diag(KindKwLoc, diag::err_omp_unexpected_clause_value)
-        << Values << getOpenMPClauseName(OMPC_proc_bind);
+        << getListOfPossibleValues(OMPC_proc_bind, /*First=*/0,
+                                   /*Last=*/OMPC_PROC_BIND_unknown)
+        << getOpenMPClauseName(OMPC_proc_bind);
     return nullptr;
   }
   return new (Context)
@@ -4912,21 +6050,33 @@ OMPClause *Sema::ActOnOpenMPProcBindClause(OpenMPProcBindClauseKind Kind,
 }
 
 OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
-    OpenMPClauseKind Kind, unsigned Argument, Expr *Expr,
+    OpenMPClauseKind Kind, ArrayRef<unsigned> Argument, Expr *Expr,
     SourceLocation StartLoc, SourceLocation LParenLoc,
-    SourceLocation ArgumentLoc, SourceLocation CommaLoc,
+    ArrayRef<SourceLocation> ArgumentLoc, SourceLocation DelimLoc,
     SourceLocation EndLoc) {
   OMPClause *Res = nullptr;
   switch (Kind) {
   case OMPC_schedule:
+    enum { Modifier1, Modifier2, ScheduleKind, NumberOfElements };
+    assert(Argument.size() == NumberOfElements &&
+           ArgumentLoc.size() == NumberOfElements);
     Res = ActOnOpenMPScheduleClause(
-        static_cast<OpenMPScheduleClauseKind>(Argument), Expr, StartLoc,
-        LParenLoc, ArgumentLoc, CommaLoc, EndLoc);
+        static_cast<OpenMPScheduleClauseModifier>(Argument[Modifier1]),
+        static_cast<OpenMPScheduleClauseModifier>(Argument[Modifier2]),
+        static_cast<OpenMPScheduleClauseKind>(Argument[ScheduleKind]), Expr,
+        StartLoc, LParenLoc, ArgumentLoc[Modifier1], ArgumentLoc[Modifier2],
+        ArgumentLoc[ScheduleKind], DelimLoc, EndLoc);
     break;
   case OMPC_if:
+    assert(Argument.size() == 1 && ArgumentLoc.size() == 1);
+    Res = ActOnOpenMPIfClause(static_cast<OpenMPDirectiveKind>(Argument.back()),
+                              Expr, StartLoc, LParenLoc, ArgumentLoc.back(),
+                              DelimLoc, EndLoc);
+    break;
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_default:
   case OMPC_proc_bind:
@@ -4951,38 +6101,91 @@ OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
   case OMPC_capture:
   case OMPC_seq_cst:
   case OMPC_depend:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
 }
 
+static bool checkScheduleModifiers(Sema &S, OpenMPScheduleClauseModifier M1,
+                                   OpenMPScheduleClauseModifier M2,
+                                   SourceLocation M1Loc, SourceLocation M2Loc) {
+  if (M1 == OMPC_SCHEDULE_MODIFIER_unknown && M1Loc.isValid()) {
+    SmallVector<unsigned, 2> Excluded;
+    if (M2 != OMPC_SCHEDULE_MODIFIER_unknown)
+      Excluded.push_back(M2);
+    if (M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)
+      Excluded.push_back(OMPC_SCHEDULE_MODIFIER_monotonic);
+    if (M2 == OMPC_SCHEDULE_MODIFIER_monotonic)
+      Excluded.push_back(OMPC_SCHEDULE_MODIFIER_nonmonotonic);
+    S.Diag(M1Loc, diag::err_omp_unexpected_clause_value)
+        << getListOfPossibleValues(OMPC_schedule,
+                                   /*First=*/OMPC_SCHEDULE_MODIFIER_unknown + 1,
+                                   /*Last=*/OMPC_SCHEDULE_MODIFIER_last,
+                                   Excluded)
+        << getOpenMPClauseName(OMPC_schedule);
+    return true;
+  }
+  return false;
+}
+
 OMPClause *Sema::ActOnOpenMPScheduleClause(
+    OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
     OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
-    SourceLocation LParenLoc, SourceLocation KindLoc, SourceLocation CommaLoc,
-    SourceLocation EndLoc) {
+    SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc,
+    SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) {
+  if (checkScheduleModifiers(*this, M1, M2, M1Loc, M2Loc) ||
+      checkScheduleModifiers(*this, M2, M1, M2Loc, M1Loc))
+    return nullptr;
+  // OpenMP, 2.7.1, Loop Construct, Restrictions
+  // Either the monotonic modifier or the nonmonotonic modifier can be specified
+  // but not both.
+  if ((M1 == M2 && M1 != OMPC_SCHEDULE_MODIFIER_unknown) ||
+      (M1 == OMPC_SCHEDULE_MODIFIER_monotonic &&
+       M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic) ||
+      (M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic &&
+       M2 == OMPC_SCHEDULE_MODIFIER_monotonic)) {
+    Diag(M2Loc, diag::err_omp_unexpected_schedule_modifier)
+        << getOpenMPSimpleClauseTypeName(OMPC_schedule, M2)
+        << getOpenMPSimpleClauseTypeName(OMPC_schedule, M1);
+    return nullptr;
+  }
   if (Kind == OMPC_SCHEDULE_unknown) {
     std::string Values;
-    std::string Sep(", ");
-    for (unsigned i = 0; i < OMPC_SCHEDULE_unknown; ++i) {
-      Values += "'";
-      Values += getOpenMPSimpleClauseTypeName(OMPC_schedule, i);
-      Values += "'";
-      switch (i) {
-      case OMPC_SCHEDULE_unknown - 2:
-        Values += " or ";
-        break;
-      case OMPC_SCHEDULE_unknown - 1:
-        break;
-      default:
-        Values += Sep;
-        break;
-      }
+    if (M1Loc.isInvalid() && M2Loc.isInvalid()) {
+      unsigned Exclude[] = {OMPC_SCHEDULE_unknown};
+      Values = getListOfPossibleValues(OMPC_schedule, /*First=*/0,
+                                       /*Last=*/OMPC_SCHEDULE_MODIFIER_last,
+                                       Exclude);
+    } else {
+      Values = getListOfPossibleValues(OMPC_schedule, /*First=*/0,
+                                       /*Last=*/OMPC_SCHEDULE_unknown);
     }
     Diag(KindLoc, diag::err_omp_unexpected_clause_value)
         << Values << getOpenMPClauseName(OMPC_schedule);
     return nullptr;
   }
+  // OpenMP, 2.7.1, Loop Construct, Restrictions
+  // The nonmonotonic modifier can only be specified with schedule(dynamic) or
+  // schedule(guided).
+  if ((M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
+       M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic) &&
+      Kind != OMPC_SCHEDULE_dynamic && Kind != OMPC_SCHEDULE_guided) {
+    Diag(M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ? M1Loc : M2Loc,
+         diag::err_omp_schedule_nonmonotonic_static);
+    return nullptr;
+  }
   Expr *ValExpr = ChunkSize;
   Expr *HelperValExpr = nullptr;
   if (ChunkSize) {
@@ -5004,7 +6207,7 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
       if (ValExpr->isIntegerConstantExpr(Result, Context)) {
         if (Result.isSigned() && !Result.isStrictlyPositive()) {
           Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
-              << "schedule" << ChunkSize->getSourceRange();
+              << "schedule" << 1 << ChunkSize->getSourceRange();
           return nullptr;
         }
       } else if (isParallelOrTaskRegion(DSAStack->getCurrentDirective())) {
@@ -5018,8 +6221,9 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
     }
   }
 
-  return new (Context) OMPScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc,
-                                         EndLoc, Kind, ValExpr, HelperValExpr);
+  return new (Context)
+      OMPScheduleClause(StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc, Kind,
+                        ValExpr, HelperValExpr, M1, M1Loc, M2, M2Loc);
 }
 
 OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
@@ -5054,10 +6258,20 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
   case OMPC_seq_cst:
     Res = ActOnOpenMPSeqCstClause(StartLoc, EndLoc);
     break;
+  case OMPC_threads:
+    Res = ActOnOpenMPThreadsClause(StartLoc, EndLoc);
+    break;
+  case OMPC_simd:
+    Res = ActOnOpenMPSIMDClause(StartLoc, EndLoc);
+    break;
+  case OMPC_nogroup:
+    Res = ActOnOpenMPNogroupClause(StartLoc, EndLoc);
+    break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_schedule:
   case OMPC_private:
@@ -5074,18 +6288,20 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
   case OMPC_threadprivate:
   case OMPC_flush:
   case OMPC_depend:
+  case OMPC_device:
+  case OMPC_map:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_num_tasks:
+  case OMPC_hint:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
   return Res;
 }
 
-OMPClause *Sema::ActOnOpenMPOrderedClause(SourceLocation StartLoc,
-                                          SourceLocation EndLoc) {
-  DSAStack->setOrderedRegion();
-  return new (Context) OMPOrderedClause(StartLoc, EndLoc);
-}
-
 OMPClause *Sema::ActOnOpenMPNowaitClause(SourceLocation StartLoc,
                                          SourceLocation EndLoc) {
   DSAStack->setNowaitRegion();
@@ -5127,12 +6343,28 @@ OMPClause *Sema::ActOnOpenMPSeqCstClause(SourceLocation StartLoc,
   return new (Context) OMPSeqCstClause(StartLoc, EndLoc);
 }
 
+OMPClause *Sema::ActOnOpenMPThreadsClause(SourceLocation StartLoc,
+                                          SourceLocation EndLoc) {
+  return new (Context) OMPThreadsClause(StartLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPSIMDClause(SourceLocation StartLoc,
+                                       SourceLocation EndLoc) {
+  return new (Context) OMPSIMDClause(StartLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPNogroupClause(SourceLocation StartLoc,
+                                          SourceLocation EndLoc) {
+  return new (Context) OMPNogroupClause(StartLoc, EndLoc);
+}
+
 OMPClause *Sema::ActOnOpenMPVarListClause(
     OpenMPClauseKind Kind, ArrayRef<Expr *> VarList, Expr *TailExpr,
     SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation ColonLoc,
     SourceLocation EndLoc, CXXScopeSpec &ReductionIdScopeSpec,
     const DeclarationNameInfo &ReductionId, OpenMPDependClauseKind DepKind,
-    SourceLocation DepLoc) {
+    OpenMPLinearClauseKind LinKind, OpenMPMapClauseKind MapTypeModifier, 
+    OpenMPMapClauseKind MapType, SourceLocation DepLinMapLoc) {
   OMPClause *Res = nullptr;
   switch (Kind) {
   case OMPC_private:
@@ -5153,7 +6385,7 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
     break;
   case OMPC_linear:
     Res = ActOnOpenMPLinearClause(VarList, TailExpr, StartLoc, LParenLoc,
-                                  ColonLoc, EndLoc);
+                                  LinKind, DepLinMapLoc, ColonLoc, EndLoc);
     break;
   case OMPC_aligned:
     Res = ActOnOpenMPAlignedClause(VarList, TailExpr, StartLoc, LParenLoc,
@@ -5169,13 +6401,18 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
     Res = ActOnOpenMPFlushClause(VarList, StartLoc, LParenLoc, EndLoc);
     break;
   case OMPC_depend:
-    Res = ActOnOpenMPDependClause(DepKind, DepLoc, ColonLoc, VarList, StartLoc,
-                                  LParenLoc, EndLoc);
+    Res = ActOnOpenMPDependClause(DepKind, DepLinMapLoc, ColonLoc, VarList, 
+                                  StartLoc, LParenLoc, EndLoc);
+    break;
+  case OMPC_map:
+    Res = ActOnOpenMPMapClause(MapTypeModifier, MapType, DepLinMapLoc, ColonLoc,
+                               VarList, StartLoc, LParenLoc, EndLoc);
     break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:
   case OMPC_safelen:
+  case OMPC_simdlen:
   case OMPC_collapse:
   case OMPC_default:
   case OMPC_proc_bind:
@@ -5190,6 +6427,16 @@ OMPClause *Sema::ActOnOpenMPVarListClause(
   case OMPC_update:
   case OMPC_capture:
   case OMPC_seq_cst:
+  case OMPC_device:
+  case OMPC_threads:
+  case OMPC_simd:
+  case OMPC_num_teams:
+  case OMPC_thread_limit:
+  case OMPC_priority:
+  case OMPC_grainsize:
+  case OMPC_nogroup:
+  case OMPC_num_tasks:
+  case OMPC_hint:
   case OMPC_unknown:
     llvm_unreachable("Clause is not allowed.");
   }
@@ -5240,16 +6487,7 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
                             diag::err_omp_private_incomplete_type)) {
       continue;
     }
-    if (Type->isReferenceType()) {
-      Diag(ELoc, diag::err_omp_clause_ref_type_arg)
-          << getOpenMPClauseName(OMPC_private) << Type;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
-      continue;
-    }
+    Type = Type.getNonReferenceType();
 
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct]
@@ -5290,7 +6528,8 @@ OMPClause *Sema::ActOnOpenMPPrivateClause(ArrayRef<Expr *> VarList,
     // IdResolver, so the code in the OpenMP region uses original variable for
     // proper diagnostics.
     Type = Type.getUnqualifiedType();
-    auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName());
+    auto VDPrivate = buildVarDecl(*this, DE->getExprLoc(), Type, VD->getName(),
+                                  VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     ActOnUninitializedDecl(VDPrivate, /*TypeMayContainAuto=*/false);
     if (VDPrivate->isInvalidDecl())
       continue;
@@ -5384,23 +6623,7 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
                             diag::err_omp_firstprivate_incomplete_type)) {
       continue;
     }
-    if (Type->isReferenceType()) {
-      if (IsImplicitClause) {
-        Diag(ImplicitClauseLoc,
-             diag::err_omp_task_predetermined_firstprivate_ref_type_arg)
-            << Type;
-        Diag(RefExpr->getExprLoc(), diag::note_used_here);
-      } else {
-        Diag(ELoc, diag::err_omp_clause_ref_type_arg)
-            << getOpenMPClauseName(OMPC_firstprivate) << Type;
-      }
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
-      continue;
-    }
+    Type = Type.getNonReferenceType();
 
     // OpenMP [2.9.3.4, Restrictions, C/C++, p.1]
     //  A variable of class type (or array thereof) that appears in a private
@@ -5492,6 +6715,49 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
           continue;
         }
       }
+
+      // OpenMP 4.5 [2.15.3.4, Restrictions, p.3]
+      // A list item that is private within a teams region must not appear in a
+      // firstprivate clause on a distribute construct if any of the distribute
+      // regions arising from the distribute construct ever bind to any of the
+      // teams regions arising from the teams construct.
+      // OpenMP 4.5 [2.15.3.4, Restrictions, p.3]
+      // A list item that appears in a reduction clause of a teams construct
+      // must not appear in a firstprivate clause on a distribute construct if
+      // any of the distribute regions arising from the distribute construct
+      // ever bind to any of the teams regions arising from the teams construct.
+      // OpenMP 4.5 [2.10.8, Distribute Construct, p.3]
+      // A list item may appear in a firstprivate or lastprivate clause but not
+      // both.
+      if (CurrDir == OMPD_distribute) {
+        DVar = DSAStack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_private),
+                                         [](OpenMPDirectiveKind K) -> bool {
+                                           return isOpenMPTeamsDirective(K);
+                                         },
+                                         false);
+        if (DVar.CKind == OMPC_private && isOpenMPTeamsDirective(DVar.DKind)) {
+          Diag(ELoc, diag::err_omp_firstprivate_distribute_private_teams);
+          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          continue;
+        }
+        DVar = DSAStack->hasInnermostDSA(VD, MatchesAnyClause(OMPC_reduction),
+                                         [](OpenMPDirectiveKind K) -> bool {
+                                           return isOpenMPTeamsDirective(K);
+                                         },
+                                         false);
+        if (DVar.CKind == OMPC_reduction &&
+            isOpenMPTeamsDirective(DVar.DKind)) {
+          Diag(ELoc, diag::err_omp_firstprivate_distribute_in_teams_reduction);
+          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          continue;
+        }
+        DVar = DSAStack->getTopDSA(VD, false);
+        if (DVar.CKind == OMPC_lastprivate) {
+          Diag(ELoc, diag::err_omp_firstprivate_and_lastprivate_in_distribute);
+          ReportOriginalDSA(*this, DSAStack, VD, DVar);
+          continue;
+        }
+      }
     }
 
     // Variably modified types are not supported for tasks.
@@ -5509,7 +6775,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
     }
 
     Type = Type.getUnqualifiedType();
-    auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName());
+    auto VDPrivate = buildVarDecl(*this, ELoc, Type, VD->getName(),
+                                  VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     // Generate helper private variable and initialize it with the value of the
     // original variable. The address of the original variable is replaced by
     // the address of the new private variable in the CodeGen. This new variable
@@ -5536,6 +6803,8 @@ OMPClause *Sema::ActOnOpenMPFirstprivateClause(ArrayRef<Expr *> VarList,
         VDPrivate->setInvalidDecl();
       else
         VDPrivate->setInit(Result.getAs<Expr>());
+      // Remove temp variable declaration.
+      Context.Deallocate(VDInitTemp);
     } else {
       auto *VDInit =
           buildVarDecl(*this, DE->getLocStart(), Type, ".firstprivate.temp");
@@ -5618,16 +6887,7 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
                             diag::err_omp_lastprivate_incomplete_type)) {
       continue;
     }
-    if (Type->isReferenceType()) {
-      Diag(ELoc, diag::err_omp_clause_ref_type_arg)
-          << getOpenMPClauseName(OMPC_lastprivate) << Type;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
-      continue;
-    }
+    Type = Type.getNonReferenceType();
 
     // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct]
@@ -5674,11 +6934,13 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     //  operator for the class type.
     Type = Context.getBaseElementType(Type).getNonReferenceType();
     auto *SrcVD = buildVarDecl(*this, DE->getLocStart(),
-                               Type.getUnqualifiedType(), ".lastprivate.src");
+                               Type.getUnqualifiedType(), ".lastprivate.src",
+                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoSrcExpr = buildDeclRefExpr(
         *this, SrcVD, Type.getUnqualifiedType(), DE->getExprLoc());
     auto *DstVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".lastprivate.dst",
+                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoDstExpr =
         buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
     // For arrays generate assignment operation for single element and replace
@@ -5692,6 +6954,18 @@ OMPClause *Sema::ActOnOpenMPLastprivateClause(ArrayRef<Expr *> VarList,
     if (AssignmentOp.isInvalid())
       continue;
 
+    // OpenMP 4.5 [2.10.8, Distribute Construct, p.3]
+    // A list item may appear in a firstprivate or lastprivate clause but not
+    // both.
+    if (CurrDir == OMPD_distribute) {
+      DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+      if (DVar.CKind == OMPC_firstprivate) {
+        Diag(ELoc, diag::err_omp_firstprivate_and_lastprivate_in_distribute);
+        ReportOriginalDSA(*this, DSAStack, VD, DVar);
+        continue;
+      }
+    }
+
     if (TopDVar.CKind != OMPC_firstprivate)
       DSAStack->addDSA(VD, DE, OMPC_lastprivate);
     Vars.push_back(DE);
@@ -5879,6 +7153,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
   case OO_Call:
   case OO_Subscript:
   case OO_Conditional:
+  case OO_Coawait:
   case NUM_OVERLOADED_OPERATORS:
     llvm_unreachable("Unexpected reduction identifier");
   case OO_None:
@@ -5903,6 +7178,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
   }
 
   SmallVector<Expr *, 8> Vars;
+  SmallVector<Expr *, 8> Privates;
   SmallVector<Expr *, 8> LHSs;
   SmallVector<Expr *, 8> RHSs;
   SmallVector<Expr *, 8> ReductionOps;
@@ -5911,6 +7187,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
+      Privates.push_back(nullptr);
       LHSs.push_back(nullptr);
       RHSs.push_back(nullptr);
       ReductionOps.push_back(nullptr);
@@ -5922,6 +7199,7 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
         RefExpr->containsUnexpandedParameterPack()) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
+      Privates.push_back(nullptr);
       LHSs.push_back(nullptr);
       RHSs.push_back(nullptr);
       ReductionOps.push_back(nullptr);
@@ -5937,14 +7215,53 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // OpenMP  [2.14.3.3, Restrictions, p.1]
     //  A variable that is part of another variable (as an array or
     //  structure element) cannot appear in a private clause.
-    auto DE = dyn_cast<DeclRefExpr>(RefExpr);
-    if (!DE || !isa<VarDecl>(DE->getDecl())) {
-      Diag(ELoc, diag::err_omp_expected_var_name) << ERange;
+    auto *DE = dyn_cast<DeclRefExpr>(RefExpr);
+    auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr);
+    auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr);
+    if (!ASE && !OASE && (!DE || !isa<VarDecl>(DE->getDecl()))) {
+      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item) << ERange;
       continue;
     }
-    auto D = DE->getDecl();
-    auto VD = cast<VarDecl>(D);
-    auto Type = VD->getType();
+    QualType Type;
+    VarDecl *VD = nullptr;
+    if (DE) {
+      auto D = DE->getDecl();
+      VD = cast<VarDecl>(D);
+      Type = VD->getType();
+    } else if (ASE) {
+      Type = ASE->getType();
+      auto *Base = ASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+        Base = TempASE->getBase()->IgnoreParenImpCasts();
+      DE = dyn_cast<DeclRefExpr>(Base);
+      if (DE)
+        VD = dyn_cast<VarDecl>(DE->getDecl());
+      if (!VD) {
+        Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name)
+            << 0 << Base->getSourceRange();
+        continue;
+      }
+    } else if (OASE) {
+      auto BaseType = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+      if (auto *ATy = BaseType->getAsArrayTypeUnsafe())
+        Type = ATy->getElementType();
+      else
+        Type = BaseType->getPointeeType();
+      auto *Base = OASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+        Base = TempOASE->getBase()->IgnoreParenImpCasts();
+      while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+        Base = TempASE->getBase()->IgnoreParenImpCasts();
+      DE = dyn_cast<DeclRefExpr>(Base);
+      if (DE)
+        VD = dyn_cast<VarDecl>(DE->getDecl());
+      if (!VD) {
+        Diag(Base->getExprLoc(), diag::err_omp_expected_base_var_name)
+            << 1 << Base->getSourceRange();
+        continue;
+      }
+    }
+
     // OpenMP [2.9.3.3, Restrictions, C/C++, p.3]
     //  A variable that appears in a private clause must not have an incomplete
     //  type or a reference type.
@@ -5955,36 +7272,42 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     // Arrays may not appear in a reduction clause.
     if (Type.getNonReferenceType()->isArrayType()) {
       Diag(ELoc, diag::err_omp_reduction_type_array) << Type << ERange;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+      if (!ASE && !OASE) {
+        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
+                      VarDecl::DeclarationOnly;
+        Diag(VD->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+            << VD;
+      }
       continue;
     }
     // OpenMP [2.14.3.6, reduction clause, Restrictions]
     // A list item that appears in a reduction clause must not be
     // const-qualified.
     if (Type.getNonReferenceType().isConstant(Context)) {
-      Diag(ELoc, diag::err_omp_const_variable)
+      Diag(ELoc, diag::err_omp_const_reduction_list_item)
           << getOpenMPClauseName(OMPC_reduction) << Type << ERange;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+      if (!ASE && !OASE) {
+        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
+                      VarDecl::DeclarationOnly;
+        Diag(VD->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+            << VD;
+      }
       continue;
     }
     // OpenMP [2.9.3.6, Restrictions, C/C++, p.4]
     //  If a list-item is a reference type then it must bind to the same object
     //  for all threads of the team.
-    VarDecl *VDDef = VD->getDefinition();
-    if (Type->isReferenceType() && VDDef) {
-      DSARefChecker Check(DSAStack);
-      if (Check.Visit(VDDef->getInit())) {
-        Diag(ELoc, diag::err_omp_reduction_ref_type_arg) << ERange;
-        Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef;
-        continue;
+    if (!ASE && !OASE) {
+      VarDecl *VDDef = VD->getDefinition();
+      if (Type->isReferenceType() && VDDef) {
+        DSARefChecker Check(DSAStack);
+        if (Check.Visit(VDDef->getInit())) {
+          Diag(ELoc, diag::err_omp_reduction_ref_type_arg) << ERange;
+          Diag(VDDef->getLocation(), diag::note_defined_here) << VDDef;
+          continue;
+        }
       }
     }
     // OpenMP [2.14.3.6, reduction clause, Restrictions]
@@ -6000,21 +7323,25 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
           (getLangOpts().CPlusPlus && Type->isArithmeticType()))) {
       Diag(ELoc, diag::err_omp_clause_not_arithmetic_type_arg)
           << getLangOpts().CPlusPlus;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+      if (!ASE && !OASE) {
+        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
+                      VarDecl::DeclarationOnly;
+        Diag(VD->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+            << VD;
+      }
       continue;
     }
     if ((BOK == BO_OrAssign || BOK == BO_AndAssign || BOK == BO_XorAssign) &&
         !getLangOpts().CPlusPlus && Type->isFloatingType()) {
       Diag(ELoc, diag::err_omp_clause_floating_type_arg);
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+      if (!ASE && !OASE) {
+        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
+                      VarDecl::DeclarationOnly;
+        Diag(VD->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+            << VD;
+      }
       continue;
     }
     // OpenMP [2.14.1.1, Data-sharing Attribute Rules for Variables Referenced
@@ -6028,7 +7355,8 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     //  Any number of reduction clauses can be specified on the directive,
     //  but a list item can appear only once in the reduction clauses for that
     //  directive.
-    DSAStackTy::DSAVarData DVar = DSAStack->getTopDSA(VD, false);
+    DSAStackTy::DSAVarData DVar;
+    DVar = DSAStack->getTopDSA(VD, false);
     if (DVar.CKind == OMPC_reduction) {
       Diag(ELoc, diag::err_omp_once_referenced)
           << getOpenMPClauseName(OMPC_reduction);
@@ -6059,9 +7387,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
         continue;
       }
     }
+
     Type = Type.getNonLValueExprType(Context).getUnqualifiedType();
-    auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs");
-    auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName());
+    auto *LHSVD = buildVarDecl(*this, ELoc, Type, ".reduction.lhs",
+                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto *RHSVD = buildVarDecl(*this, ELoc, Type, VD->getName(),
+                               VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto PrivateTy = Type;
+    if (OASE) {
+      // For array sections only:
+      // Create pseudo array type for private copy. The size for this array will
+      // be generated during codegen.
+      // For array subscripts or single variables Private Ty is the same as Type
+      // (type of the variable or single array element).
+      PrivateTy = Context.getVariableArrayType(
+          Type, new (Context) OpaqueValueExpr(SourceLocation(),
+                                              Context.getSizeType(), VK_RValue),
+          ArrayType::Normal, /*IndexTypeQuals=*/0, SourceRange());
+    }
+    // Private copy.
+    auto *PrivateVD = buildVarDecl(*this, ELoc, PrivateTy, VD->getName(),
+                                   VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     // Add initializer for private variable.
     Expr *Init = nullptr;
     switch (BOK) {
@@ -6170,21 +7516,27 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
     if (Init) {
       AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false,
                            /*TypeMayContainAuto=*/false);
-    } else {
+    } else
       ActOnUninitializedDecl(RHSVD, /*TypeMayContainAuto=*/false);
-    }
     if (!RHSVD->hasInit()) {
       Diag(ELoc, diag::err_omp_reduction_id_not_compatible) << Type
                                                             << ReductionIdRange;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+      if (VD) {
+        bool IsDecl = VD->isThisDeclarationADefinition(Context) ==
+                      VarDecl::DeclarationOnly;
+        Diag(VD->getLocation(),
+             IsDecl ? diag::note_previous_decl : diag::note_defined_here)
+            << VD;
+      }
       continue;
     }
+    // Store initializer for single element in private copy. Will be used during
+    // codegen.
+    PrivateVD->setInit(RHSVD->getInit());
+    PrivateVD->setInitStyle(RHSVD->getInitStyle());
     auto *LHSDRE = buildDeclRefExpr(*this, LHSVD, Type, ELoc);
     auto *RHSDRE = buildDeclRefExpr(*this, RHSVD, Type, ELoc);
+    auto *PrivateDRE = buildDeclRefExpr(*this, PrivateVD, PrivateTy, ELoc);
     ExprResult ReductionOp =
         BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(), BOK,
                    LHSDRE, RHSDRE);
@@ -6201,15 +7553,14 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
             BuildBinOp(DSAStack->getCurScope(), ReductionId.getLocStart(),
                        BO_Assign, LHSDRE, ConditionalOp);
       }
-      if (ReductionOp.isUsable()) {
-        ReductionOp = ActOnFinishFullExpr(ReductionOp.get());
-      }
+      ReductionOp = ActOnFinishFullExpr(ReductionOp.get());
     }
     if (ReductionOp.isInvalid())
       continue;
 
     DSAStack->addDSA(VD, DE, OMPC_reduction);
-    Vars.push_back(DE);
+    Vars.push_back(RefExpr);
+    Privates.push_back(PrivateDRE);
     LHSs.push_back(LHSDRE);
     RHSs.push_back(RHSDRE);
     ReductionOps.push_back(ReductionOp.get());
@@ -6220,22 +7571,28 @@ OMPClause *Sema::ActOnOpenMPReductionClause(
 
   return OMPReductionClause::Create(
       Context, StartLoc, LParenLoc, ColonLoc, EndLoc, Vars,
-      ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, LHSs,
-      RHSs, ReductionOps);
+      ReductionIdScopeSpec.getWithLocInContext(Context), ReductionId, Privates,
+      LHSs, RHSs, ReductionOps);
 }
 
-OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
-                                         SourceLocation StartLoc,
-                                         SourceLocation LParenLoc,
-                                         SourceLocation ColonLoc,
-                                         SourceLocation EndLoc) {
+OMPClause *Sema::ActOnOpenMPLinearClause(
+    ArrayRef<Expr *> VarList, Expr *Step, SourceLocation StartLoc,
+    SourceLocation LParenLoc, OpenMPLinearClauseKind LinKind,
+    SourceLocation LinLoc, SourceLocation ColonLoc, SourceLocation EndLoc) {
   SmallVector<Expr *, 8> Vars;
+  SmallVector<Expr *, 8> Privates;
   SmallVector<Expr *, 8> Inits;
+  if ((!LangOpts.CPlusPlus && LinKind != OMPC_LINEAR_val) ||
+      LinKind == OMPC_LINEAR_unknown) {
+    Diag(LinLoc, diag::err_omp_wrong_linear_modifier) << LangOpts.CPlusPlus;
+    LinKind = OMPC_LINEAR_val;
+  }
   for (auto &RefExpr : VarList) {
     assert(RefExpr && "NULL expr in OpenMP linear clause.");
     if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
       // It will be analyzed later.
       Vars.push_back(RefExpr);
+      Privates.push_back(nullptr);
       Inits.push_back(nullptr);
       continue;
     }
@@ -6278,6 +7635,7 @@ OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
     if (QType->isDependentType() || QType->isInstantiationDependentType()) {
       // It will be analyzed later.
       Vars.push_back(DE);
+      Privates.push_back(nullptr);
       Inits.push_back(nullptr);
       continue;
     }
@@ -6287,16 +7645,13 @@ OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
                             diag::err_omp_linear_incomplete_type)) {
       continue;
     }
-    if (QType->isReferenceType()) {
-      Diag(ELoc, diag::err_omp_clause_ref_type_arg)
-          << getOpenMPClauseName(OMPC_linear) << QType;
-      bool IsDecl =
-          VD->isThisDeclarationADefinition(Context) == VarDecl::DeclarationOnly;
-      Diag(VD->getLocation(),
-           IsDecl ? diag::note_previous_decl : diag::note_defined_here)
-          << VD;
+    if ((LinKind == OMPC_LINEAR_uval || LinKind == OMPC_LINEAR_ref) &&
+        !QType->isReferenceType()) {
+      Diag(ELoc, diag::err_omp_wrong_linear_modifier_non_reference)
+          << QType << getOpenMPSimpleClauseTypeName(OMPC_linear, LinKind);
       continue;
     }
+    QType = QType.getNonReferenceType();
 
     // A list item must not be const-qualified.
     if (QType.isConstant(Context)) {
@@ -6324,14 +7679,25 @@ OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
       continue;
     }
 
+    // Build private copy of original var.
+    auto *Private = buildVarDecl(*this, ELoc, QType, VD->getName(),
+                                 VD->hasAttrs() ? &VD->getAttrs() : nullptr);
+    auto *PrivateRef = buildDeclRefExpr(
+        *this, Private, DE->getType().getUnqualifiedType(), DE->getExprLoc());
     // Build var to save initial value.
     VarDecl *Init = buildVarDecl(*this, ELoc, QType, ".linear.start");
-    AddInitializerToDecl(Init, DefaultLvalueConversion(DE).get(),
+    Expr *InitExpr;
+    if (LinKind == OMPC_LINEAR_uval)
+      InitExpr = VD->getInit();
+    else
+      InitExpr = DE;
+    AddInitializerToDecl(Init, DefaultLvalueConversion(InitExpr).get(),
                          /*DirectInit*/ false, /*TypeMayContainAuto*/ false);
     auto InitRef = buildDeclRefExpr(
         *this, Init, DE->getType().getUnqualifiedType(), DE->getExprLoc());
     DSAStack->addDSA(VD, DE, OMPC_linear);
     Vars.push_back(DE);
+    Privates.push_back(PrivateRef);
     Inits.push_back(InitRef);
   }
 
@@ -6356,6 +7722,7 @@ OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
         buildDeclRefExpr(*this, SaveVar, StepExpr->getType(), StepLoc);
     ExprResult CalcStep =
         BuildBinOp(CurScope, StepLoc, BO_Assign, SaveRef.get(), StepExpr);
+    CalcStep = ActOnFinishFullExpr(CalcStep.get());
 
     // Warn about zero linear step (it would be probably better specified as
     // making corresponding variables 'const').
@@ -6371,8 +7738,9 @@ OMPClause *Sema::ActOnOpenMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
     }
   }
 
-  return OMPLinearClause::Create(Context, StartLoc, LParenLoc, ColonLoc, EndLoc,
-                                 Vars, Inits, StepExpr, CalcStepExpr);
+  return OMPLinearClause::Create(Context, StartLoc, LParenLoc, LinKind, LinLoc,
+                                 ColonLoc, EndLoc, Vars, Privates, Inits,
+                                 StepExpr, CalcStepExpr);
 }
 
 static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
@@ -6391,27 +7759,35 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
     Step = cast<BinaryOperator>(CalcStep)->getLHS();
   bool HasErrors = false;
   auto CurInit = Clause.inits().begin();
+  auto CurPrivate = Clause.privates().begin();
+  auto LinKind = Clause.getModifier();
   for (auto &RefExpr : Clause.varlists()) {
     Expr *InitExpr = *CurInit;
 
     // Build privatized reference to the current linear var.
     auto DE = cast<DeclRefExpr>(RefExpr);
-    auto PrivateRef =
-        buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
-                         DE->getType().getUnqualifiedType(), DE->getExprLoc(),
-                         /*RefersToCapture=*/true);
+    Expr *CapturedRef;
+    if (LinKind == OMPC_LINEAR_uval)
+      CapturedRef = cast<VarDecl>(DE->getDecl())->getInit();
+    else
+      CapturedRef =
+          buildDeclRefExpr(SemaRef, cast<VarDecl>(DE->getDecl()),
+                           DE->getType().getUnqualifiedType(), DE->getExprLoc(),
+                           /*RefersToCapture=*/true);
 
     // Build update: Var = InitExpr + IV * Step
     ExprResult Update =
-        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), PrivateRef,
+        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), *CurPrivate,
                            InitExpr, IV, Step, /* Subtract */ false);
-    Update = SemaRef.ActOnFinishFullExpr(Update.get());
+    Update = SemaRef.ActOnFinishFullExpr(Update.get(), DE->getLocStart(),
+                                         /*DiscardedValue=*/true);
 
     // Build final: Var = InitExpr + NumIterations * Step
     ExprResult Final =
-        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), PrivateRef,
+        BuildCounterUpdate(SemaRef, S, RefExpr->getExprLoc(), CapturedRef,
                            InitExpr, NumIterations, Step, /* Subtract */ false);
-    Final = SemaRef.ActOnFinishFullExpr(Final.get());
+    Final = SemaRef.ActOnFinishFullExpr(Final.get(), DE->getLocStart(),
+                                        /*DiscardedValue=*/true);
     if (!Update.isUsable() || !Final.isUsable()) {
       Updates.push_back(nullptr);
       Finals.push_back(nullptr);
@@ -6420,7 +7796,7 @@ static bool FinishOpenMPLinearClause(OMPLinearClause &Clause, DeclRefExpr *IV,
       Updates.push_back(Update.get());
       Finals.push_back(Final.get());
     }
-    ++CurInit;
+    ++CurInit, ++CurPrivate;
   }
   Clause.setUpdates(Updates);
   Clause.setFinals(Finals);
@@ -6557,12 +7933,14 @@ OMPClause *Sema::ActOnOpenMPCopyinClause(ArrayRef<Expr *> VarList,
     //  copyin clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
     auto ElemType = Context.getBaseElementType(Type).getNonReferenceType();
-    auto *SrcVD = buildVarDecl(*this, DE->getLocStart(),
-                               ElemType.getUnqualifiedType(), ".copyin.src");
+    auto *SrcVD =
+        buildVarDecl(*this, DE->getLocStart(), ElemType.getUnqualifiedType(),
+                     ".copyin.src", VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoSrcExpr = buildDeclRefExpr(
         *this, SrcVD, ElemType.getUnqualifiedType(), DE->getExprLoc());
     auto *DstVD =
-        buildVarDecl(*this, DE->getLocStart(), ElemType, ".copyin.dst");
+        buildVarDecl(*this, DE->getLocStart(), ElemType, ".copyin.dst",
+                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoDstExpr =
         buildDeclRefExpr(*this, DstVD, ElemType, DE->getExprLoc());
     // For arrays generate assignment operation for single element and replace
@@ -6679,13 +8057,16 @@ OMPClause *Sema::ActOnOpenMPCopyprivateClause(ArrayRef<Expr *> VarList,
     //  A variable of class type (or array thereof) that appears in a
     //  copyin clause requires an accessible, unambiguous copy assignment
     //  operator for the class type.
-    Type = Context.getBaseElementType(Type).getUnqualifiedType();
+    Type = Context.getBaseElementType(Type.getNonReferenceType())
+               .getUnqualifiedType();
     auto *SrcVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.src",
+                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoSrcExpr =
         buildDeclRefExpr(*this, SrcVD, Type, DE->getExprLoc());
     auto *DstVD =
-        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst");
+        buildVarDecl(*this, DE->getLocStart(), Type, ".copyprivate.dst",
+                     VD->hasAttrs() ? &VD->getAttrs() : nullptr);
     auto *PseudoDstExpr =
         buildDeclRefExpr(*this, DstVD, Type, DE->getExprLoc());
     auto AssignmentOp = BuildBinOp(/*S=*/nullptr, DE->getExprLoc(), BO_Assign,
@@ -6727,61 +8108,430 @@ Sema::ActOnOpenMPDependClause(OpenMPDependClauseKind DepKind,
                               SourceLocation DepLoc, SourceLocation ColonLoc,
                               ArrayRef<Expr *> VarList, SourceLocation StartLoc,
                               SourceLocation LParenLoc, SourceLocation EndLoc) {
-  if (DepKind == OMPC_DEPEND_unknown) {
-    std::string Values;
-    std::string Sep(", ");
-    for (unsigned i = 0; i < OMPC_DEPEND_unknown; ++i) {
-      Values += "'";
-      Values += getOpenMPSimpleClauseTypeName(OMPC_depend, i);
-      Values += "'";
-      switch (i) {
-      case OMPC_DEPEND_unknown - 2:
-        Values += " or ";
-        break;
-      case OMPC_DEPEND_unknown - 1:
-        break;
-      default:
-        Values += Sep;
-        break;
-      }
-    }
+  if (DSAStack->getCurrentDirective() == OMPD_ordered &&
+      DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink) {
     Diag(DepLoc, diag::err_omp_unexpected_clause_value)
-        << Values << getOpenMPClauseName(OMPC_depend);
+        << "'source' or 'sink'" << getOpenMPClauseName(OMPC_depend);
+    return nullptr;
+  }
+  if (DSAStack->getCurrentDirective() != OMPD_ordered &&
+      (DepKind == OMPC_DEPEND_unknown || DepKind == OMPC_DEPEND_source ||
+       DepKind == OMPC_DEPEND_sink)) {
+    unsigned Except[] = {OMPC_DEPEND_source, OMPC_DEPEND_sink};
+    Diag(DepLoc, diag::err_omp_unexpected_clause_value)
+        << getListOfPossibleValues(OMPC_depend, /*First=*/0,
+                                   /*Last=*/OMPC_DEPEND_unknown, Except)
+        << getOpenMPClauseName(OMPC_depend);
     return nullptr;
   }
   SmallVector<Expr *, 8> Vars;
-  for (auto &RefExpr : VarList) {
-    assert(RefExpr && "NULL expr in OpenMP shared clause.");
-    if (isa<DependentScopeDeclRefExpr>(RefExpr)) {
-      // It will be analyzed later.
-      Vars.push_back(RefExpr);
-      continue;
+  llvm::APSInt DepCounter(/*BitWidth=*/32);
+  llvm::APSInt TotalDepCount(/*BitWidth=*/32);
+  if (DepKind == OMPC_DEPEND_sink) {
+    if (auto *OrderedCountExpr = DSAStack->getParentOrderedRegionParam()) {
+      TotalDepCount = OrderedCountExpr->EvaluateKnownConstInt(Context);
+      TotalDepCount.setIsUnsigned(/*Val=*/true);
     }
-
-    SourceLocation ELoc = RefExpr->getExprLoc();
-    // OpenMP  [2.11.1.1, Restrictions, p.3]
-    //  A variable that is part of another variable (such as a field of a
-    //  structure) but is not an array element or an array section cannot appear
-    //  in a depend clause.
-    auto *SimpleExpr = RefExpr->IgnoreParenCasts();
-    DeclRefExpr *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
-    ArraySubscriptExpr *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
-    if (!RefExpr->IgnoreParenImpCasts()->isLValue() || (!ASE && !DE) ||
-        (DE && !isa<VarDecl>(DE->getDecl())) ||
-        (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
-         !ASE->getBase()->getType()->isArrayType())) {
-      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
-          << RefExpr->getSourceRange();
-      continue;
-    }
-
-    Vars.push_back(RefExpr->IgnoreParenImpCasts());
   }
+  if ((DepKind != OMPC_DEPEND_sink && DepKind != OMPC_DEPEND_source) ||
+      DSAStack->getParentOrderedRegionParam()) {
+    for (auto &RefExpr : VarList) {
+      assert(RefExpr && "NULL expr in OpenMP shared clause.");
+      if (isa<DependentScopeDeclRefExpr>(RefExpr) ||
+          (DepKind == OMPC_DEPEND_sink && CurContext->isDependentContext())) {
+        // It will be analyzed later.
+        Vars.push_back(RefExpr);
+        continue;
+      }
 
-  if (Vars.empty())
-    return nullptr;
+      SourceLocation ELoc = RefExpr->getExprLoc();
+      auto *SimpleExpr = RefExpr->IgnoreParenCasts();
+      if (DepKind == OMPC_DEPEND_sink) {
+        if (DepCounter >= TotalDepCount) {
+          Diag(ELoc, diag::err_omp_depend_sink_unexpected_expr);
+          continue;
+        }
+        ++DepCounter;
+        // OpenMP  [2.13.9, Summary]
+        // depend(dependence-type : vec), where dependence-type is:
+        // 'sink' and where vec is the iteration vector, which has the form:
+        //  x1 [+- d1], x2 [+- d2 ], . . . , xn [+- dn]
+        // where n is the value specified by the ordered clause in the loop
+        // directive, xi denotes the loop iteration variable of the i-th nested
+        // loop associated with the loop directive, and di is a constant
+        // non-negative integer.
+        SimpleExpr = SimpleExpr->IgnoreImplicit();
+        auto *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
+        if (!DE) {
+          OverloadedOperatorKind OOK = OO_None;
+          SourceLocation OOLoc;
+          Expr *LHS, *RHS;
+          if (auto *BO = dyn_cast<BinaryOperator>(SimpleExpr)) {
+            OOK = BinaryOperator::getOverloadedOperator(BO->getOpcode());
+            OOLoc = BO->getOperatorLoc();
+            LHS = BO->getLHS()->IgnoreParenImpCasts();
+            RHS = BO->getRHS()->IgnoreParenImpCasts();
+          } else if (auto *OCE = dyn_cast<CXXOperatorCallExpr>(SimpleExpr)) {
+            OOK = OCE->getOperator();
+            OOLoc = OCE->getOperatorLoc();
+            LHS = OCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+            RHS = OCE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
+          } else if (auto *MCE = dyn_cast<CXXMemberCallExpr>(SimpleExpr)) {
+            OOK = MCE->getMethodDecl()
+                      ->getNameInfo()
+                      .getName()
+                      .getCXXOverloadedOperator();
+            OOLoc = MCE->getCallee()->getExprLoc();
+            LHS = MCE->getImplicitObjectArgument()->IgnoreParenImpCasts();
+            RHS = MCE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
+          } else {
+            Diag(ELoc, diag::err_omp_depend_sink_wrong_expr);
+            continue;
+          }
+          DE = dyn_cast<DeclRefExpr>(LHS);
+          if (!DE) {
+            Diag(LHS->getExprLoc(),
+                 diag::err_omp_depend_sink_expected_loop_iteration)
+                << DSAStack->getParentLoopControlVariable(
+                    DepCounter.getZExtValue());
+            continue;
+          }
+          if (OOK != OO_Plus && OOK != OO_Minus) {
+            Diag(OOLoc, diag::err_omp_depend_sink_expected_plus_minus);
+            continue;
+          }
+          ExprResult Res = VerifyPositiveIntegerConstantInClause(
+              RHS, OMPC_depend, /*StrictlyPositive=*/false);
+          if (Res.isInvalid())
+            continue;
+        }
+        auto *VD = dyn_cast<VarDecl>(DE->getDecl());
+        if (!CurContext->isDependentContext() &&
+            DSAStack->getParentOrderedRegionParam() &&
+            (!VD || DepCounter != DSAStack->isParentLoopControlVariable(VD))) {
+          Diag(DE->getExprLoc(),
+               diag::err_omp_depend_sink_expected_loop_iteration)
+              << DSAStack->getParentLoopControlVariable(
+                  DepCounter.getZExtValue());
+          continue;
+        }
+      } else {
+        // OpenMP  [2.11.1.1, Restrictions, p.3]
+        //  A variable that is part of another variable (such as a field of a
+        //  structure) but is not an array element or an array section cannot
+        //  appear  in a depend clause.
+        auto *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
+        auto *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
+        auto *OASE = dyn_cast<OMPArraySectionExpr>(SimpleExpr);
+        if (!RefExpr->IgnoreParenImpCasts()->isLValue() ||
+            (!ASE && !DE && !OASE) || (DE && !isa<VarDecl>(DE->getDecl())) ||
+            (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
+             !ASE->getBase()->getType()->isArrayType())) {
+          Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
+              << RefExpr->getSourceRange();
+          continue;
+        }
+      }
+
+      Vars.push_back(RefExpr->IgnoreParenImpCasts());
+    }
+
+    if (!CurContext->isDependentContext() && DepKind == OMPC_DEPEND_sink &&
+        TotalDepCount > VarList.size() &&
+        DSAStack->getParentOrderedRegionParam()) {
+      Diag(EndLoc, diag::err_omp_depend_sink_expected_loop_iteration)
+          << DSAStack->getParentLoopControlVariable(VarList.size() + 1);
+    }
+    if (DepKind != OMPC_DEPEND_source && DepKind != OMPC_DEPEND_sink &&
+        Vars.empty())
+      return nullptr;
+  }
 
   return OMPDependClause::Create(Context, StartLoc, LParenLoc, EndLoc, DepKind,
                                  DepLoc, ColonLoc, Vars);
 }
 
+OMPClause *Sema::ActOnOpenMPDeviceClause(Expr *Device, SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc) {
+  Expr *ValExpr = Device;
+
+  // OpenMP [2.9.1, Restrictions]
+  // The device expression must evaluate to a non-negative integer value.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_device,
+                                 /*StrictlyPositive=*/false))
+    return nullptr;
+
+  return new (Context) OMPDeviceClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+}
+
+static bool IsCXXRecordForMappable(Sema &SemaRef, SourceLocation Loc,
+                                   DSAStackTy *Stack, CXXRecordDecl *RD) {
+  if (!RD || RD->isInvalidDecl())
+    return true;
+
+  if (auto *CTSD = dyn_cast<ClassTemplateSpecializationDecl>(RD))
+    if (auto *CTD = CTSD->getSpecializedTemplate())
+      RD = CTD->getTemplatedDecl();
+  auto QTy = SemaRef.Context.getRecordType(RD);
+  if (RD->isDynamicClass()) {
+    SemaRef.Diag(Loc, diag::err_omp_not_mappable_type) << QTy;
+    SemaRef.Diag(RD->getLocation(), diag::note_omp_polymorphic_in_target);
+    return false;
+  }
+  auto *DC = RD;
+  bool IsCorrect = true;
+  for (auto *I : DC->decls()) {
+    if (I) {
+      if (auto *MD = dyn_cast<CXXMethodDecl>(I)) {
+        if (MD->isStatic()) {
+          SemaRef.Diag(Loc, diag::err_omp_not_mappable_type) << QTy;
+          SemaRef.Diag(MD->getLocation(),
+                       diag::note_omp_static_member_in_target);
+          IsCorrect = false;
+        }
+      } else if (auto *VD = dyn_cast<VarDecl>(I)) {
+        if (VD->isStaticDataMember()) {
+          SemaRef.Diag(Loc, diag::err_omp_not_mappable_type) << QTy;
+          SemaRef.Diag(VD->getLocation(),
+                       diag::note_omp_static_member_in_target);
+          IsCorrect = false;
+        }
+      }
+    }
+  }
+
+  for (auto &I : RD->bases()) {
+    if (!IsCXXRecordForMappable(SemaRef, I.getLocStart(), Stack,
+                                I.getType()->getAsCXXRecordDecl()))
+      IsCorrect = false;
+  }
+  return IsCorrect;
+}
+
+static bool CheckTypeMappable(SourceLocation SL, SourceRange SR, Sema &SemaRef,
+                              DSAStackTy *Stack, QualType QTy) {
+  NamedDecl *ND;
+  if (QTy->isIncompleteType(&ND)) {
+    SemaRef.Diag(SL, diag::err_incomplete_type) << QTy << SR;
+    return false;
+  } else if (CXXRecordDecl *RD = dyn_cast_or_null<CXXRecordDecl>(ND)) {
+    if (!RD->isInvalidDecl() &&
+        !IsCXXRecordForMappable(SemaRef, SL, Stack, RD))
+      return false;
+  }
+  return true;
+}
+
+OMPClause *Sema::ActOnOpenMPMapClause(
+    OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType,
+    SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+    SourceLocation StartLoc, SourceLocation LParenLoc, SourceLocation EndLoc) {
+  SmallVector<Expr *, 4> Vars;
+
+  for (auto &RE : VarList) {
+    assert(RE && "Null expr in omp map");
+    if (isa<DependentScopeDeclRefExpr>(RE)) {
+      // It will be analyzed later.
+      Vars.push_back(RE);
+      continue;
+    }
+    SourceLocation ELoc = RE->getExprLoc();
+
+    // OpenMP [2.14.5, Restrictions]
+    //  A variable that is part of another variable (such as field of a
+    //  structure) but is not an array element or an array section cannot appear
+    //  in a map clause.
+    auto *VE = RE->IgnoreParenLValueCasts();
+
+    if (VE->isValueDependent() || VE->isTypeDependent() ||
+        VE->isInstantiationDependent() ||
+        VE->containsUnexpandedParameterPack()) {
+      // It will be analyzed later.
+      Vars.push_back(RE);
+      continue;
+    }
+
+    auto *SimpleExpr = RE->IgnoreParenCasts();
+    auto *DE = dyn_cast<DeclRefExpr>(SimpleExpr);
+    auto *ASE = dyn_cast<ArraySubscriptExpr>(SimpleExpr);
+    auto *OASE = dyn_cast<OMPArraySectionExpr>(SimpleExpr);
+
+    if (!RE->IgnoreParenImpCasts()->isLValue() ||
+        (!OASE && !ASE && !DE) ||
+        (DE && !isa<VarDecl>(DE->getDecl())) ||
+        (ASE && !ASE->getBase()->getType()->isAnyPointerType() &&
+         !ASE->getBase()->getType()->isArrayType())) {
+      Diag(ELoc, diag::err_omp_expected_var_name_or_array_item)
+        << RE->getSourceRange();
+      continue;
+    }
+
+    Decl *D = nullptr;
+    if (DE) {
+      D = DE->getDecl();
+    } else if (ASE) {
+      auto *B = ASE->getBase()->IgnoreParenCasts();
+      D = dyn_cast<DeclRefExpr>(B)->getDecl();
+    } else if (OASE) {
+      auto *B = OASE->getBase();
+      D = dyn_cast<DeclRefExpr>(B)->getDecl();
+    }
+    assert(D && "Null decl on map clause.");
+    auto *VD = cast<VarDecl>(D);
+
+    // OpenMP [2.14.5, Restrictions, p.8]
+    // threadprivate variables cannot appear in a map clause.
+    if (DSAStack->isThreadPrivate(VD)) {
+      auto DVar = DSAStack->getTopDSA(VD, false);
+      Diag(ELoc, diag::err_omp_threadprivate_in_map);
+      ReportOriginalDSA(*this, DSAStack, VD, DVar);
+      continue;
+    }
+
+    // OpenMP [2.14.5, Restrictions, p.2]
+    //  At most one list item can be an array item derived from a given variable
+    //  in map clauses of the same construct.
+    // OpenMP [2.14.5, Restrictions, p.3]
+    //  List items of map clauses in the same construct must not share original
+    //  storage.
+    // OpenMP [2.14.5, Restrictions, C/C++, p.2]
+    //  A variable for which the type is pointer, reference to array, or
+    //  reference to pointer and an array section derived from that variable
+    //  must not appear as list items of map clauses of the same construct.
+    DSAStackTy::MapInfo MI = DSAStack->IsMappedInCurrentRegion(VD);
+    if (MI.RefExpr) {
+      Diag(ELoc, diag::err_omp_map_shared_storage) << ELoc;
+      Diag(MI.RefExpr->getExprLoc(), diag::note_used_here)
+          << MI.RefExpr->getSourceRange();
+      continue;
+    }
+
+    // OpenMP [2.14.5, Restrictions, C/C++, p.3,4]
+    //  A variable for which the type is pointer, reference to array, or
+    //  reference to pointer must not appear as a list item if the enclosing
+    //  device data environment already contains an array section derived from
+    //  that variable.
+    //  An array section derived from a variable for which the type is pointer,
+    //  reference to array, or reference to pointer must not appear as a list
+    //  item if the enclosing device data environment already contains that
+    //  variable.
+    QualType Type = VD->getType();
+    MI = DSAStack->getMapInfoForVar(VD);
+    if (MI.RefExpr && (isa<DeclRefExpr>(MI.RefExpr->IgnoreParenLValueCasts()) !=
+                       isa<DeclRefExpr>(VE)) &&
+        (Type->isPointerType() || Type->isReferenceType())) {
+      Diag(ELoc, diag::err_omp_map_shared_storage) << ELoc;
+      Diag(MI.RefExpr->getExprLoc(), diag::note_used_here)
+          << MI.RefExpr->getSourceRange();
+      continue;
+    }
+
+    // OpenMP [2.14.5, Restrictions, C/C++, p.7]
+    //  A list item must have a mappable type.
+    if (!CheckTypeMappable(VE->getExprLoc(), VE->getSourceRange(), *this,
+                           DSAStack, Type))
+      continue;
+
+    Vars.push_back(RE);
+    MI.RefExpr = RE;
+    DSAStack->addMapInfoForVar(VD, MI);
+  }
+  if (Vars.empty())
+    return nullptr;
+
+  return OMPMapClause::Create(Context, StartLoc, LParenLoc, EndLoc, Vars,
+                              MapTypeModifier, MapType, MapLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPNumTeamsClause(Expr *NumTeams, 
+                                           SourceLocation StartLoc,
+                                           SourceLocation LParenLoc,
+                                           SourceLocation EndLoc) {
+  Expr *ValExpr = NumTeams;
+
+  // OpenMP [teams Constrcut, Restrictions]
+  // The num_teams expression must evaluate to a positive integer value.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_num_teams,
+                                 /*StrictlyPositive=*/true))
+    return nullptr;
+
+  return new (Context) OMPNumTeamsClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPThreadLimitClause(Expr *ThreadLimit,
+                                              SourceLocation StartLoc,
+                                              SourceLocation LParenLoc,
+                                              SourceLocation EndLoc) {
+  Expr *ValExpr = ThreadLimit;
+
+  // OpenMP [teams Constrcut, Restrictions]
+  // The thread_limit expression must evaluate to a positive integer value.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_thread_limit,
+                                 /*StrictlyPositive=*/true))
+    return nullptr;
+
+  return new (Context) OMPThreadLimitClause(ValExpr, StartLoc, LParenLoc,
+                                            EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPPriorityClause(Expr *Priority,
+                                           SourceLocation StartLoc,
+                                           SourceLocation LParenLoc,
+                                           SourceLocation EndLoc) {
+  Expr *ValExpr = Priority;
+
+  // OpenMP [2.9.1, task Constrcut]
+  // The priority-value is a non-negative numerical scalar expression.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_priority,
+                                 /*StrictlyPositive=*/false))
+    return nullptr;
+
+  return new (Context) OMPPriorityClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPGrainsizeClause(Expr *Grainsize,
+                                            SourceLocation StartLoc,
+                                            SourceLocation LParenLoc,
+                                            SourceLocation EndLoc) {
+  Expr *ValExpr = Grainsize;
+
+  // OpenMP [2.9.2, taskloop Constrcut]
+  // The parameter of the grainsize clause must be a positive integer
+  // expression.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_grainsize,
+                                 /*StrictlyPositive=*/true))
+    return nullptr;
+
+  return new (Context) OMPGrainsizeClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPNumTasksClause(Expr *NumTasks,
+                                           SourceLocation StartLoc,
+                                           SourceLocation LParenLoc,
+                                           SourceLocation EndLoc) {
+  Expr *ValExpr = NumTasks;
+
+  // OpenMP [2.9.2, taskloop Constrcut]
+  // The parameter of the num_tasks clause must be a positive integer
+  // expression.
+  if (!IsNonNegativeIntegerValue(ValExpr, *this, OMPC_num_tasks,
+                                 /*StrictlyPositive=*/true))
+    return nullptr;
+
+  return new (Context) OMPNumTasksClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPHintClause(Expr *Hint, SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc) {
+  // OpenMP [2.13.2, critical construct, Description]
+  // ... where hint-expression is an integer constant expression that evaluates
+  // to a valid lock hint.
+  ExprResult HintExpr = VerifyPositiveIntegerConstantInClause(Hint, OMPC_hint);
+  if (HintExpr.isInvalid())
+    return nullptr;
+  return new (Context)
+      OMPHintClause(HintExpr.get(), StartLoc, LParenLoc, EndLoc);
+}
+
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index 31f581dc15b6..1caa94c9a458 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -38,6 +38,11 @@
 using namespace clang;
 using namespace sema;
 
+static bool functionHasPassObjectSizeParams(const FunctionDecl *FD) {
+  return std::any_of(FD->param_begin(), FD->param_end(),
+                     std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
+}
+
 /// A convenience routine for creating a decayed reference to a function.
 static ExprResult
 CreateFunctionRefExpr(Sema &S, FunctionDecl *Fn, NamedDecl *FoundDecl,
@@ -60,12 +65,8 @@ CreateFunctionRefExpr(Sema &S, FunctionDecl *Fn, NamedDecl *FoundDecl,
     DRE->setHadMultipleCandidates(true);
 
   S.MarkDeclRefReferenced(DRE);
-
-  ExprResult E = DRE;
-  E = S.DefaultFunctionArrayConversion(E.get());
-  if (E.isInvalid())
-    return ExprError();
-  return E;
+  return S.ImpCastExprToType(DRE, S.Context.getPointerType(DRE->getType()),
+                             CK_FunctionToPointerDecay);
 }
 
 static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
@@ -88,7 +89,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
 
 
 static ImplicitConversionSequence::CompareKind
-CompareStandardConversionSequences(Sema &S,
+CompareStandardConversionSequences(Sema &S, SourceLocation Loc,
                                    const StandardConversionSequence& SCS1,
                                    const StandardConversionSequence& SCS2);
 
@@ -98,7 +99,7 @@ CompareQualificationConversions(Sema &S,
                                 const StandardConversionSequence& SCS2);
 
 static ImplicitConversionSequence::CompareKind
-CompareDerivedToBaseConversions(Sema &S,
+CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
                                 const StandardConversionSequence& SCS1,
                                 const StandardConversionSequence& SCS2);
 
@@ -130,7 +131,11 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) {
     ICR_Complex_Real_Conversion,
     ICR_Conversion,
     ICR_Conversion,
-    ICR_Writeback_Conversion
+    ICR_Writeback_Conversion,
+    ICR_Exact_Match, // NOTE(gbiv): This may not be completely right --
+                     // it was omitted by the patch that added
+                     // ICK_Zero_Event_Conversion
+    ICR_C_Conversion
   };
   return Rank[(int)Kind];
 }
@@ -162,7 +167,9 @@ static const char* GetImplicitConversionName(ImplicitConversionKind Kind) {
     "Complex-real conversion",
     "Block Pointer conversion",
     "Transparent Union Conversion",
-    "Writeback conversion"
+    "Writeback conversion",
+    "OpenCL Zero Event Conversion",
+    "C specific type conversion"
   };
   return Name[Kind];
 }
@@ -896,6 +903,11 @@ Sema::CheckOverload(Scope *S, FunctionDecl *New, const LookupResult &Old,
       OldD = cast<UsingShadowDecl>(OldD)->getTargetDecl();
     }
 
+    // A using-declaration does not conflict with another declaration
+    // if one of them is hidden.
+    if ((OldIsUsingDecl || NewIsUsingDecl) && !isVisible(*I))
+      continue;
+
     // If either declaration was introduced by a using declaration,
     // we'll need to use slightly different rules for matching.
     // Essentially, these rules are the normal rules, except that
@@ -1051,6 +1063,14 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
       return true;
   }
 
+  // Though pass_object_size is placed on parameters and takes an argument, we
+  // consider it to be a function-level modifier for the sake of function
+  // identity. Either the function has one or more parameters with
+  // pass_object_size or it doesn't.
+  if (functionHasPassObjectSizeParams(New) !=
+      functionHasPassObjectSizeParams(Old))
+    return true;
+
   // enable_if attributes are an order-sensitive part of the signature.
   for (specific_attr_iterator<EnableIfAttr>
          NewI = New->specific_attr_begin<EnableIfAttr>(),
@@ -1067,6 +1087,25 @@ bool Sema::IsOverload(FunctionDecl *New, FunctionDecl *Old,
       return true;
   }
 
+  if (getLangOpts().CUDA && getLangOpts().CUDATargetOverloads) {
+    CUDAFunctionTarget NewTarget = IdentifyCUDATarget(New),
+                       OldTarget = IdentifyCUDATarget(Old);
+    if (NewTarget == CFT_InvalidTarget || NewTarget == CFT_Global)
+      return false;
+
+    assert((OldTarget != CFT_InvalidTarget) && "Unexpected invalid target.");
+
+    // Don't allow mixing of HD with other kinds. This guarantees that
+    // we have only one viable function with this signature on any
+    // side of CUDA compilation .
+    if ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice))
+      return false;
+
+    // Allow overloading of functions with same signature, but
+    // different CUDA target attributes.
+    return NewTarget != OldTarget;
+  }
+
   // The signatures match; this is not an overload.
   return false;
 }
@@ -1125,7 +1164,8 @@ TryUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
       QualType ToCanon
         = S.Context.getCanonicalType(ToType).getUnqualifiedType();
       if (Constructor->isCopyConstructor() &&
-          (FromCanon == ToCanon || S.IsDerivedFrom(FromCanon, ToCanon))) {
+          (FromCanon == ToCanon ||
+           S.IsDerivedFrom(From->getLocStart(), FromCanon, ToCanon))) {
         // Turn this into a "standard" conversion sequence, so that it
         // gets ranked with standard conversion sequences.
         ICS.setStandard();
@@ -1215,7 +1255,7 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType,
   QualType FromType = From->getType();
   if (ToType->getAs<RecordType>() && FromType->getAs<RecordType>() &&
       (S.Context.hasSameUnqualifiedType(FromType, ToType) ||
-       S.IsDerivedFrom(FromType, ToType))) {
+       S.IsDerivedFrom(From->getLocStart(), FromType, ToType))) {
     ICS.setStandard();
     ICS.Standard.setAsIdentityConversion();
     ICS.Standard.setFromType(FromType);
@@ -1387,7 +1427,7 @@ static bool tryAtomicConversion(Sema &S, Expr *From, QualType ToType,
                                 bool InOverloadResolution,
                                 StandardConversionSequence &SCS,
                                 bool CStyle);
-  
+
 /// IsStandardConversion - Determines whether there is a standard
 /// conversion sequence (C++ [conv], C++ [over.ics.scs]) from the
 /// expression From to the type ToType. Standard conversion sequences
@@ -1410,13 +1450,10 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
   SCS.CopyConstructor = nullptr;
 
   // There are no standard conversions for class types in C++, so
-  // abort early. When overloading in C, however, we do permit
-  if (FromType->isRecordType() || ToType->isRecordType()) {
-    if (S.getLangOpts().CPlusPlus)
-      return false;
-
-    // When we're overloading in C, we allow, as standard conversions,
-  }
+  // abort early. When overloading in C, however, we do permit them.
+  if (S.getLangOpts().CPlusPlus &&
+      (FromType->isRecordType() || ToType->isRecordType()))
+    return false;
 
   // The first conversion can be an lvalue-to-rvalue conversion,
   // array-to-pointer conversion, or function-to-pointer conversion
@@ -1521,6 +1558,11 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
     // Function-to-pointer conversion (C++ 4.3).
     SCS.First = ICK_Function_To_Pointer;
 
+    if (auto *DRE = dyn_cast<DeclRefExpr>(From->IgnoreParenCasts()))
+      if (auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl()))
+        if (!S.checkAddressOfFunctionIsAvailable(FD))
+          return false;
+
     // An lvalue of function type T can be converted to an rvalue of
     // type "pointer to T." The result is a pointer to the
     // function. (C++ 4.3p1).
@@ -1625,9 +1667,9 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
     // tryAtomicConversion has updated the standard conversion sequence
     // appropriately.
     return true;
-  } else if (ToType->isEventT() && 
+  } else if (ToType->isEventT() &&
              From->isIntegerConstantExpr(S.getASTContext()) &&
-             (From->EvaluateKnownConstInt(S.getASTContext()) == 0)) {
+             From->EvaluateKnownConstInt(S.getASTContext()) == 0) {
     SCS.Second = ICK_Zero_Event_Conversion;
     FromType = ToType;
   } else {
@@ -1666,11 +1708,28 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
   }
   SCS.setToType(2, FromType);
 
+  if (CanonFrom == CanonTo)
+    return true;
+
   // If we have not converted the argument type to the parameter type,
-  // this is a bad conversion sequence.
-  if (CanonFrom != CanonTo)
+  // this is a bad conversion sequence, unless we're resolving an overload in C.
+  if (S.getLangOpts().CPlusPlus || !InOverloadResolution)
     return false;
 
+  ExprResult ER = ExprResult{From};
+  auto Conv = S.CheckSingleAssignmentConstraints(ToType, ER,
+                                                 /*Diagnose=*/false,
+                                                 /*DiagnoseCFAudited=*/false,
+                                                 /*ConvertRHS=*/false);
+  if (Conv != Sema::Compatible)
+    return false;
+
+  SCS.setAllToTypes(ToType);
+  // We need to set all three because we want this conversion to rank terribly,
+  // and we don't know what conversions it may overlap with.
+  SCS.First = ICK_C_Only_Conversion;
+  SCS.Second = ICK_C_Only_Conversion;
+  SCS.Third = ICK_C_Only_Conversion;
   return true;
 }
   
@@ -1763,7 +1822,7 @@ bool Sema::IsIntegralPromotion(Expr *From, QualType FromType, QualType ToType) {
 
     // We have already pre-calculated the promotion type, so this is trivial.
     if (ToType->isIntegerType() &&
-        !RequireCompleteType(From->getLocStart(), FromType, 0))
+        isCompleteType(From->getLocStart(), FromType))
       return Context.hasSameUnqualifiedType(
           ToType, FromEnumType->getDecl()->getPromotionType());
   }
@@ -2060,7 +2119,7 @@ bool Sema::IsPointerConversion(Expr *From, QualType FromType, QualType ToType,
   }
 
   // MSVC allows implicit function to void* type conversion.
-  if (getLangOpts().MicrosoftExt && FromPointeeType->isFunctionType() &&
+  if (getLangOpts().MSVCCompat && FromPointeeType->isFunctionType() &&
       ToPointeeType->isVoidType()) {
     ConvertedType = BuildSimilarlyQualifiedPointerType(FromTypePtr,
                                                        ToPointeeType,
@@ -2094,8 +2153,7 @@ bool Sema::IsPointerConversion(Expr *From, QualType FromType, QualType ToType,
   if (getLangOpts().CPlusPlus &&
       FromPointeeType->isRecordType() && ToPointeeType->isRecordType() &&
       !Context.hasSameUnqualifiedType(FromPointeeType, ToPointeeType) &&
-      !RequireCompleteType(From->getLocStart(), FromPointeeType, 0) &&
-      IsDerivedFrom(FromPointeeType, ToPointeeType)) {
+      IsDerivedFrom(From->getLocStart(), FromPointeeType, ToPointeeType)) {
     ConvertedType = BuildSimilarlyQualifiedPointerType(FromTypePtr,
                                                        ToPointeeType,
                                                        ToType, Context);
@@ -2467,6 +2525,18 @@ enum {
   ft_qualifer_mismatch
 };
 
+/// Attempts to get the FunctionProtoType from a Type. Handles
+/// MemberFunctionPointers properly.
+static const FunctionProtoType *tryGetFunctionProtoType(QualType FromType) {
+  if (auto *FPT = FromType->getAs<FunctionProtoType>())
+    return FPT;
+
+  if (auto *MPT = FromType->getAs<MemberPointerType>())
+    return MPT->getPointeeType()->getAs<FunctionProtoType>();
+
+  return nullptr;
+}
+
 /// HandleFunctionTypeMismatch - Gives diagnostic information for differeing
 /// function types.  Catches different number of parameter, mismatch in
 /// parameter types, and different return types.
@@ -2513,8 +2583,8 @@ void Sema::HandleFunctionTypeMismatch(PartialDiagnostic &PDiag,
     return;
   }
 
-  const FunctionProtoType *FromFunction = FromType->getAs<FunctionProtoType>(),
-                          *ToFunction = ToType->getAs<FunctionProtoType>();
+  const FunctionProtoType *FromFunction = tryGetFunctionProtoType(FromType),
+                          *ToFunction = tryGetFunctionProtoType(ToType);
 
   // Both types need to be function types.
   if (!FromFunction || !ToFunction) {
@@ -2621,6 +2691,14 @@ bool Sema::CheckPointerConversion(Expr *From, QualType ToType,
         // The conversion was successful.
         Kind = CK_DerivedToBase;
       }
+
+      if (!IsCStyleOrFunctionalCast && FromPointeeType->isFunctionType() &&
+          ToPointeeType->isVoidType()) {
+        assert(getLangOpts().MSVCCompat &&
+               "this should only be possible with MSVCCompat!");
+        Diag(From->getExprLoc(), diag::ext_ms_impcast_fn_obj)
+            << From->getSourceRange();
+      }
     }
   } else if (const ObjCObjectPointerType *ToPtrType =
                ToType->getAs<ObjCObjectPointerType>()) {
@@ -2681,8 +2759,7 @@ bool Sema::IsMemberPointerConversion(Expr *From, QualType FromType,
   QualType ToClass(ToTypePtr->getClass(), 0);
 
   if (!Context.hasSameUnqualifiedType(FromClass, ToClass) &&
-      !RequireCompleteType(From->getLocStart(), ToClass, 0) &&
-      IsDerivedFrom(ToClass, FromClass)) {
+      IsDerivedFrom(From->getLocStart(), ToClass, FromClass)) {
     ConvertedType = Context.getMemberPointerType(FromTypePtr->getPointeeType(),
                                                  ToClass.getTypePtr());
     return true;
@@ -2725,7 +2802,8 @@ bool Sema::CheckMemberPointerConversion(Expr *From, QualType ToType,
 
   CXXBasePaths Paths(/*FindAmbiguities=*/true, /*RecordPaths=*/true,
                      /*DetectVirtual=*/true);
-  bool DerivationOkay = IsDerivedFrom(ToClass, FromClass, Paths);
+  bool DerivationOkay =
+      IsDerivedFrom(From->getLocStart(), ToClass, FromClass, Paths);
   assert(DerivationOkay &&
          "Should not have been called if derivation isn't OK.");
   (void)DerivationOkay;
@@ -3004,14 +3082,10 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
     //   the parentheses of the initializer.
     if (S.Context.hasSameUnqualifiedType(ToType, From->getType()) ||
         (From->getType()->getAs<RecordType>() &&
-         S.IsDerivedFrom(From->getType(), ToType)))
+         S.IsDerivedFrom(From->getLocStart(), From->getType(), ToType)))
       ConstructorsOnly = true;
 
-    S.RequireCompleteType(From->getExprLoc(), ToType, 0);
-    // RequireCompleteType may have returned true due to some invalid decl
-    // during template instantiation, but ToType may be complete enough now
-    // to try to recover.
-    if (ToType->isIncompleteType()) {
+    if (!S.isCompleteType(From->getExprLoc(), ToType)) {
       // We're not going to find any constructors.
     } else if (CXXRecordDecl *ToRecordDecl
                  = dyn_cast<CXXRecordDecl>(ToRecordType->getDecl())) {
@@ -3085,7 +3159,7 @@ IsUserDefinedConversion(Sema &S, Expr *From, QualType ToType,
 
   // Enumerate conversion functions, if we're allowed to.
   if (ConstructorsOnly || isa<InitListExpr>(From)) {
-  } else if (S.RequireCompleteType(From->getLocStart(), From->getType(), 0)) {
+  } else if (!S.isCompleteType(From->getLocStart(), From->getType())) {
     // No conversion functions from incomplete types.
   } else if (const RecordType *FromRecordType
                                    = From->getType()->getAs<RecordType>()) {
@@ -3212,7 +3286,7 @@ Sema::DiagnoseMultipleUserDefinedConversion(Expr *From, QualType ToType) {
                              diag::err_typecheck_nonviable_condition_incomplete,
                              From->getType(), From->getSourceRange()))
       Diag(From->getLocStart(), diag::err_typecheck_nonviable_condition)
-          << From->getType() << From->getSourceRange() << ToType;
+          << false << From->getType() << From->getSourceRange() << ToType;
   } else
     return false;
   CandidateSet.NoteCandidates(*this, OCD_AllCandidates, From);
@@ -3264,7 +3338,7 @@ static bool hasDeprecatedStringLiteralToCharPtrConversion(
 /// conversion sequences to determine whether one is better than the
 /// other or if they are indistinguishable (C++ 13.3.3.2).
 static ImplicitConversionSequence::CompareKind
-CompareImplicitConversionSequences(Sema &S,
+CompareImplicitConversionSequences(Sema &S, SourceLocation Loc,
                                    const ImplicitConversionSequence& ICS1,
                                    const ImplicitConversionSequence& ICS2)
 {
@@ -3344,7 +3418,7 @@ CompareImplicitConversionSequences(Sema &S,
   if (ICS1.isStandard())
     // Standard conversion sequence S1 is a better conversion sequence than
     // standard conversion sequence S2 if [...]
-    Result = CompareStandardConversionSequences(S,
+    Result = CompareStandardConversionSequences(S, Loc,
                                                 ICS1.Standard, ICS2.Standard);
   else if (ICS1.isUserDefined()) {
     // User-defined conversion sequence U1 is a better conversion
@@ -3355,7 +3429,7 @@ CompareImplicitConversionSequences(Sema &S,
     // U2 (C++ 13.3.3.2p3).
     if (ICS1.UserDefined.ConversionFunction ==
           ICS2.UserDefined.ConversionFunction)
-      Result = CompareStandardConversionSequences(S,
+      Result = CompareStandardConversionSequences(S, Loc,
                                                   ICS1.UserDefined.After,
                                                   ICS2.UserDefined.After);
     else
@@ -3453,7 +3527,7 @@ isBetterReferenceBindingKind(const StandardConversionSequence &SCS1,
 /// conversion sequences to determine whether one is better than the
 /// other or if they are indistinguishable (C++ 13.3.3.2p3).
 static ImplicitConversionSequence::CompareKind
-CompareStandardConversionSequences(Sema &S,
+CompareStandardConversionSequences(Sema &S, SourceLocation Loc,
                                    const StandardConversionSequence& SCS1,
                                    const StandardConversionSequence& SCS2)
 {
@@ -3509,7 +3583,7 @@ CompareStandardConversionSequences(Sema &S,
     // Neither conversion sequence converts to a void pointer; compare
     // their derived-to-base conversions.
     if (ImplicitConversionSequence::CompareKind DerivedCK
-          = CompareDerivedToBaseConversions(S, SCS1, SCS2))
+          = CompareDerivedToBaseConversions(S, Loc, SCS1, SCS2))
       return DerivedCK;
   } else if (SCS1ConvertsToVoid && SCS2ConvertsToVoid &&
              !S.Context.hasSameType(SCS1.getFromType(), SCS2.getFromType())) {
@@ -3529,9 +3603,9 @@ CompareStandardConversionSequences(Sema &S,
     QualType FromPointee1 = FromType1->getPointeeType().getUnqualifiedType();
     QualType FromPointee2 = FromType2->getPointeeType().getUnqualifiedType();
 
-    if (S.IsDerivedFrom(FromPointee2, FromPointee1))
+    if (S.IsDerivedFrom(Loc, FromPointee2, FromPointee1))
       return ImplicitConversionSequence::Better;
-    else if (S.IsDerivedFrom(FromPointee1, FromPointee2))
+    else if (S.IsDerivedFrom(Loc, FromPointee1, FromPointee2))
       return ImplicitConversionSequence::Worse;
 
     // Objective-C++: If one interface is more specific than the
@@ -3739,7 +3813,7 @@ CompareQualificationConversions(Sema &S,
 /// [over.ics.rank]p4b3).  As part of these checks, we also look at
 /// conversions between Objective-C interface types.
 static ImplicitConversionSequence::CompareKind
-CompareDerivedToBaseConversions(Sema &S,
+CompareDerivedToBaseConversions(Sema &S, SourceLocation Loc,
                                 const StandardConversionSequence& SCS1,
                                 const StandardConversionSequence& SCS2) {
   QualType FromType1 = SCS1.getFromType();
@@ -3782,17 +3856,17 @@ CompareDerivedToBaseConversions(Sema &S,
 
     //   -- conversion of C* to B* is better than conversion of C* to A*,
     if (FromPointee1 == FromPointee2 && ToPointee1 != ToPointee2) {
-      if (S.IsDerivedFrom(ToPointee1, ToPointee2))
+      if (S.IsDerivedFrom(Loc, ToPointee1, ToPointee2))
         return ImplicitConversionSequence::Better;
-      else if (S.IsDerivedFrom(ToPointee2, ToPointee1))
+      else if (S.IsDerivedFrom(Loc, ToPointee2, ToPointee1))
         return ImplicitConversionSequence::Worse;
     }
 
     //   -- conversion of B* to A* is better than conversion of C* to A*,
     if (FromPointee1 != FromPointee2 && ToPointee1 == ToPointee2) {
-      if (S.IsDerivedFrom(FromPointee2, FromPointee1))
+      if (S.IsDerivedFrom(Loc, FromPointee2, FromPointee1))
         return ImplicitConversionSequence::Better;
-      else if (S.IsDerivedFrom(FromPointee1, FromPointee2))
+      else if (S.IsDerivedFrom(Loc, FromPointee1, FromPointee2))
         return ImplicitConversionSequence::Worse;
     }
   } else if (SCS1.Second == ICK_Pointer_Conversion &&
@@ -3889,16 +3963,16 @@ CompareDerivedToBaseConversions(Sema &S,
     QualType ToPointee2 = QualType(ToPointeeType2, 0).getUnqualifiedType();
     // conversion of A::* to B::* is better than conversion of A::* to C::*,
     if (FromPointee1 == FromPointee2 && ToPointee1 != ToPointee2) {
-      if (S.IsDerivedFrom(ToPointee1, ToPointee2))
+      if (S.IsDerivedFrom(Loc, ToPointee1, ToPointee2))
         return ImplicitConversionSequence::Worse;
-      else if (S.IsDerivedFrom(ToPointee2, ToPointee1))
+      else if (S.IsDerivedFrom(Loc, ToPointee2, ToPointee1))
         return ImplicitConversionSequence::Better;
     }
     // conversion of B::* to C::* is better than conversion of A::* to C::*
     if (ToPointee1 == ToPointee2 && FromPointee1 != FromPointee2) {
-      if (S.IsDerivedFrom(FromPointee1, FromPointee2))
+      if (S.IsDerivedFrom(Loc, FromPointee1, FromPointee2))
         return ImplicitConversionSequence::Better;
-      else if (S.IsDerivedFrom(FromPointee2, FromPointee1))
+      else if (S.IsDerivedFrom(Loc, FromPointee2, FromPointee1))
         return ImplicitConversionSequence::Worse;
     }
   }
@@ -3910,9 +3984,9 @@ CompareDerivedToBaseConversions(Sema &S,
     //      reference of type A&,
     if (S.Context.hasSameUnqualifiedType(FromType1, FromType2) &&
         !S.Context.hasSameUnqualifiedType(ToType1, ToType2)) {
-      if (S.IsDerivedFrom(ToType1, ToType2))
+      if (S.IsDerivedFrom(Loc, ToType1, ToType2))
         return ImplicitConversionSequence::Better;
-      else if (S.IsDerivedFrom(ToType2, ToType1))
+      else if (S.IsDerivedFrom(Loc, ToType2, ToType1))
         return ImplicitConversionSequence::Worse;
     }
 
@@ -3922,9 +3996,9 @@ CompareDerivedToBaseConversions(Sema &S,
     //      reference of type A&,
     if (!S.Context.hasSameUnqualifiedType(FromType1, FromType2) &&
         S.Context.hasSameUnqualifiedType(ToType1, ToType2)) {
-      if (S.IsDerivedFrom(FromType2, FromType1))
+      if (S.IsDerivedFrom(Loc, FromType2, FromType1))
         return ImplicitConversionSequence::Better;
-      else if (S.IsDerivedFrom(FromType1, FromType2))
+      else if (S.IsDerivedFrom(Loc, FromType1, FromType2))
         return ImplicitConversionSequence::Worse;
     }
   }
@@ -3973,9 +4047,9 @@ Sema::CompareReferenceRelationship(SourceLocation Loc,
   ObjCLifetimeConversion = false;
   if (UnqualT1 == UnqualT2) {
     // Nothing to do.
-  } else if (!RequireCompleteType(Loc, OrigT2, 0) &&
+  } else if (isCompleteType(Loc, OrigT2) &&
              isTypeValid(UnqualT1) && isTypeValid(UnqualT2) &&
-             IsDerivedFrom(UnqualT2, UnqualT1))
+             IsDerivedFrom(Loc, UnqualT2, UnqualT1))
     DerivedToBase = true;
   else if (UnqualT1->isObjCObjectOrInterfaceType() &&
            UnqualT2->isObjCObjectOrInterfaceType() &&
@@ -4240,7 +4314,7 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType,
     //          conversion functions (13.3.1.6) and choosing the best
     //          one through overload resolution (13.3)),
     if (!SuppressUserConversions && T2->isRecordType() &&
-        !S.RequireCompleteType(DeclLoc, T2, 0) &&
+        S.isCompleteType(DeclLoc, T2) &&
         RefRelationship == Sema::Ref_Incompatible) {
       if (FindConversionForRefInit(S, ICS, DeclType, DeclLoc,
                                    Init, T2, /*AllowRvalues=*/false,
@@ -4303,7 +4377,7 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType,
   //          in the second case (or, in either case, to an appropriate base
   //          class subobject).
   if (!SuppressUserConversions && RefRelationship == Sema::Ref_Incompatible &&
-      T2->isRecordType() && !S.RequireCompleteType(DeclLoc, T2, 0) &&
+      T2->isRecordType() && S.isCompleteType(DeclLoc, T2) &&
       FindConversionForRefInit(S, ICS, DeclType, DeclLoc,
                                Init, T2, /*AllowRvalues=*/true,
                                AllowExplicit)) {
@@ -4441,7 +4515,7 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
 
   // We need a complete type for what follows. Incomplete types can never be
   // initialized from init lists.
-  if (S.RequireCompleteType(From->getLocStart(), ToType, 0))
+  if (!S.isCompleteType(From->getLocStart(), ToType))
     return Result;
 
   // Per DR1467:
@@ -4458,7 +4532,7 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
     if (ToType->isRecordType()) {
       QualType InitType = From->getInit(0)->getType();
       if (S.Context.hasSameUnqualifiedType(InitType, ToType) ||
-          S.IsDerivedFrom(InitType, ToType))
+          S.IsDerivedFrom(From->getLocStart(), InitType, ToType))
         return TryCopyInitialization(S, From->getInit(0), ToType,
                                      SuppressUserConversions,
                                      InOverloadResolution,
@@ -4515,7 +4589,8 @@ TryListConversion(Sema &S, InitListExpr *From, QualType ToType,
       }
       // Otherwise, look for the worst conversion.
       if (Result.isBad() ||
-          CompareImplicitConversionSequences(S, ICS, Result) ==
+          CompareImplicitConversionSequences(S, From->getLocStart(), ICS,
+                                             Result) ==
               ImplicitConversionSequence::Worse)
         Result = ICS;
     }
@@ -4722,7 +4797,7 @@ static bool TryCopyInitialization(const CanQualType FromQTy,
 /// parameter of the given member function (@c Method) from the
 /// expression @p From.
 static ImplicitConversionSequence
-TryObjectArgumentInitialization(Sema &S, QualType FromType,
+TryObjectArgumentInitialization(Sema &S, SourceLocation Loc, QualType FromType,
                                 Expr::Classification FromClassification,
                                 CXXMethodDecl *Method,
                                 CXXRecordDecl *ActingContext) {
@@ -4782,7 +4857,7 @@ TryObjectArgumentInitialization(Sema &S, QualType FromType,
   ImplicitConversionKind SecondKind;
   if (ClassTypeCanon == FromTypeCanon.getLocalUnqualifiedType()) {
     SecondKind = ICK_Identity;
-  } else if (S.IsDerivedFrom(FromType, ClassType))
+  } else if (S.IsDerivedFrom(Loc, FromType, ClassType))
     SecondKind = ICK_Derived_To_Base;
   else {
     ICS.setBad(BadConversionSequence::unrelated_class,
@@ -4857,7 +4932,8 @@ Sema::PerformObjectArgumentInitialization(Expr *From,
   // Note that we always use the true parent context when performing
   // the actual argument initialization.
   ImplicitConversionSequence ICS = TryObjectArgumentInitialization(
-      *this, From->getType(), FromClassification, Method, Method->getParent());
+      *this, From->getLocStart(), From->getType(), FromClassification, Method,
+      Method->getParent());
   if (ICS.isBad()) {
     if (ICS.Bad.Kind == BadConversionSequence::bad_qualifiers) {
       Qualifiers FromQs = FromRecordType.getQualifiers();
@@ -4969,6 +5045,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
   case ICK_TransparentUnionConversion:
   case ICK_Writeback_Conversion:
   case ICK_Zero_Event_Conversion:
+  case ICK_C_Only_Conversion:
     return false;
 
   case ICK_Lvalue_To_Rvalue:
@@ -5372,14 +5449,15 @@ ExprResult Sema::PerformContextualImplicitConversion(
     Expr *From;
 
     TypeDiagnoserPartialDiag(ContextualImplicitConverter &Converter, Expr *From)
-        : TypeDiagnoser(Converter.Suppress), Converter(Converter), From(From) {}
+        : Converter(Converter), From(From) {}
 
     void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
       Converter.diagnoseIncomplete(S, Loc, T) << From->getSourceRange();
     }
   } IncompleteDiagnoser(Converter, From);
 
-  if (RequireCompleteType(Loc, T, IncompleteDiagnoser))
+  if (Converter.Suppress ? !isCompleteType(Loc, T)
+                         : RequireCompleteType(Loc, T, IncompleteDiagnoser))
     return From;
 
   // Look for a conversion to an integral or enumeration type.
@@ -5637,10 +5715,10 @@ Sema::AddOverloadCandidate(FunctionDecl *Function,
     //   A member function template is never instantiated to perform the copy
     //   of a class object to an object of its class type.
     QualType ClassType = Context.getTypeDeclType(Constructor->getParent());
-    if (Args.size() == 1 &&
-        Constructor->isSpecializationCopyingObject() &&
+    if (Args.size() == 1 && Constructor->isSpecializationCopyingObject() &&
         (Context.hasSameUnqualifiedType(ClassType, Args[0]->getType()) ||
-         IsDerivedFrom(Args[0]->getType(), ClassType))) {
+         IsDerivedFrom(Args[0]->getLocStart(), Args[0]->getType(),
+                       ClassType))) {
       Candidate.Viable = false;
       Candidate.FailureKind = ovl_fail_illegal_constructor;
       return;
@@ -5761,7 +5839,7 @@ ObjCMethodDecl *Sema::SelectBestMethod(Selector Sel, MultiExprArg Args,
         Match = false;
         break;
       }
-                
+
       ImplicitConversionSequence ConversionState
         = TryCopyInitialization(*this, argExpr, param->getType(),
                                 /*SuppressUserConversions*/false,
@@ -5809,27 +5887,36 @@ ObjCMethodDecl *Sema::SelectBestMethod(Selector Sel, MultiExprArg Args,
   return nullptr;
 }
 
-static bool IsNotEnableIfAttr(Attr *A) { return !isa<EnableIfAttr>(A); }
+// specific_attr_iterator iterates over enable_if attributes in reverse, and
+// enable_if is order-sensitive. As a result, we need to reverse things
+// sometimes. Size of 4 elements is arbitrary.
+static SmallVector<EnableIfAttr *, 4>
+getOrderedEnableIfAttrs(const FunctionDecl *Function) {
+  SmallVector<EnableIfAttr *, 4> Result;
+  if (!Function->hasAttrs())
+    return Result;
+
+  const auto &FuncAttrs = Function->getAttrs();
+  for (Attr *Attr : FuncAttrs)
+    if (auto *EnableIf = dyn_cast<EnableIfAttr>(Attr))
+      Result.push_back(EnableIf);
+
+  std::reverse(Result.begin(), Result.end());
+  return Result;
+}
 
 EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
                                   bool MissingImplicitThis) {
-  // FIXME: specific_attr_iterator<EnableIfAttr> iterates in reverse order, but
-  // we need to find the first failing one.
-  if (!Function->hasAttrs())
+  auto EnableIfAttrs = getOrderedEnableIfAttrs(Function);
+  if (EnableIfAttrs.empty())
     return nullptr;
-  AttrVec Attrs = Function->getAttrs();
-  AttrVec::iterator E = std::remove_if(Attrs.begin(), Attrs.end(),
-                                       IsNotEnableIfAttr);
-  if (Attrs.begin() == E)
-    return nullptr;
-  std::reverse(Attrs.begin(), E);
 
   SFINAETrap Trap(*this);
-
-  // Convert the arguments.
   SmallVector<Expr *, 16> ConvertedArgs;
   bool InitializationFailed = false;
   bool ContainsValueDependentExpr = false;
+
+  // Convert the arguments.
   for (unsigned i = 0, e = Args.size(); i != e; ++i) {
     if (i == 0 && !MissingImplicitThis && isa<CXXMethodDecl>(Function) &&
         !cast<CXXMethodDecl>(Function)->isStatic() &&
@@ -5861,11 +5948,32 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef<Expr *> Args,
   }
 
   if (InitializationFailed || Trap.hasErrorOccurred())
-    return cast<EnableIfAttr>(Attrs[0]);
+    return EnableIfAttrs[0];
 
-  for (AttrVec::iterator I = Attrs.begin(); I != E; ++I) {
+  // Push default arguments if needed.
+  if (!Function->isVariadic() && Args.size() < Function->getNumParams()) {
+    for (unsigned i = Args.size(), e = Function->getNumParams(); i != e; ++i) {
+      ParmVarDecl *P = Function->getParamDecl(i);
+      ExprResult R = PerformCopyInitialization(
+          InitializedEntity::InitializeParameter(Context,
+                                                 Function->getParamDecl(i)),
+          SourceLocation(),
+          P->hasUninstantiatedDefaultArg() ? P->getUninstantiatedDefaultArg()
+                                           : P->getDefaultArg());
+      if (R.isInvalid()) {
+        InitializationFailed = true;
+        break;
+      }
+      ContainsValueDependentExpr |= R.get()->isValueDependent();
+      ConvertedArgs.push_back(R.get());
+    }
+
+    if (InitializationFailed || Trap.hasErrorOccurred())
+      return EnableIfAttrs[0];
+  }
+
+  for (auto *EIA : EnableIfAttrs) {
     APValue Result;
-    EnableIfAttr *EIA = cast<EnableIfAttr>(*I);
     if (EIA->getCond()->isValueDependent()) {
       // Don't even try now, we'll examine it after instantiation.
       continue;
@@ -6027,9 +6135,9 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
   else {
     // Determine the implicit conversion sequence for the object
     // parameter.
-    Candidate.Conversions[0]
-      = TryObjectArgumentInitialization(*this, ObjectType, ObjectClassification,
-                                        Method, ActingContext);
+    Candidate.Conversions[0] = TryObjectArgumentInitialization(
+        *this, CandidateSet.getLocation(), ObjectType, ObjectClassification,
+        Method, ActingContext);
     if (Candidate.Conversions[0].isBad()) {
       Candidate.Viable = false;
       Candidate.FailureKind = ovl_fail_bad_conversion;
@@ -6286,10 +6394,9 @@ Sema::AddConversionCandidate(CXXConversionDecl *Conversion,
   CXXRecordDecl *ConversionContext
     = cast<CXXRecordDecl>(ImplicitParamType->getAs<RecordType>()->getDecl());
 
-  Candidate.Conversions[0]
-    = TryObjectArgumentInitialization(*this, From->getType(),
-                                      From->Classify(Context),
-                                      Conversion, ConversionContext);
+  Candidate.Conversions[0] = TryObjectArgumentInitialization(
+      *this, CandidateSet.getLocation(), From->getType(),
+      From->Classify(Context), Conversion, ConversionContext);
 
   if (Candidate.Conversions[0].isBad()) {
     Candidate.Viable = false;
@@ -6303,7 +6410,8 @@ Sema::AddConversionCandidate(CXXConversionDecl *Conversion,
   QualType FromCanon
     = Context.getCanonicalType(From->getType().getUnqualifiedType());
   QualType ToCanon = Context.getCanonicalType(ToType).getUnqualifiedType();
-  if (FromCanon == ToCanon || IsDerivedFrom(FromCanon, ToCanon)) {
+  if (FromCanon == ToCanon ||
+      IsDerivedFrom(CandidateSet.getLocation(), FromCanon, ToCanon)) {
     Candidate.Viable = false;
     Candidate.FailureKind = ovl_fail_trivial_conversion;
     return;
@@ -6325,7 +6433,7 @@ Sema::AddConversionCandidate(CXXConversionDecl *Conversion,
                                 &ConversionRef, VK_RValue);
 
   QualType ConversionType = Conversion->getConversionType();
-  if (RequireCompleteType(From->getLocStart(), ConversionType, 0)) {
+  if (!isCompleteType(From->getLocStart(), ConversionType)) {
     Candidate.Viable = false;
     Candidate.FailureKind = ovl_fail_bad_final_conversion;
     return;
@@ -6463,10 +6571,9 @@ void Sema::AddSurrogateCandidate(CXXConversionDecl *Conversion,
 
   // Determine the implicit conversion sequence for the implicit
   // object parameter.
-  ImplicitConversionSequence ObjectInit
-    = TryObjectArgumentInitialization(*this, Object->getType(),
-                                      Object->Classify(Context),
-                                      Conversion, ActingContext);
+  ImplicitConversionSequence ObjectInit = TryObjectArgumentInitialization(
+      *this, CandidateSet.getLocation(), Object->getType(),
+      Object->Classify(Context), Conversion, ActingContext);
   if (ObjectInit.isBad()) {
     Candidate.Viable = false;
     Candidate.FailureKind = ovl_fail_bad_conversion;
@@ -6575,7 +6682,8 @@ void Sema::AddMemberOperatorCandidates(OverloadedOperatorKind Op,
   //        the set of member candidates is empty.
   if (const RecordType *T1Rec = T1->getAs<RecordType>()) {
     // Complete the type if it can be completed.
-    RequireCompleteType(OpLoc, T1, 0);
+    if (!isCompleteType(OpLoc, T1) && !T1Rec->isBeingDefined())
+      return;
     // If the type is neither complete nor being defined, bail out now.
     if (!T1Rec->getDecl()->getDefinition())
       return;
@@ -6924,7 +7032,7 @@ BuiltinCandidateTypeSet::AddTypesConvertedFrom(QualType Ty,
     HasNullPtrType = true;
   } else if (AllowUserConversions && TyRec) {
     // No conversion functions in incomplete types.
-    if (SemaRef.RequireCompleteType(Loc, Ty, 0))
+    if (!SemaRef.isCompleteType(Loc, Ty))
       return;
 
     CXXRecordDecl *ClassDecl = cast<CXXRecordDecl>(TyRec->getDecl());
@@ -7527,7 +7635,7 @@ public:
     llvm::SmallPtrSet<QualType, 8> AddedTypes;
 
     for (int Arg = 0; Arg < 2; ++Arg) {
-      QualType AsymetricParamTypes[2] = {
+      QualType AsymmetricParamTypes[2] = {
         S.Context.getPointerDiffType(),
         S.Context.getPointerDiffType(),
       };
@@ -7539,11 +7647,11 @@ public:
         if (!PointeeTy->isObjectType())
           continue;
 
-        AsymetricParamTypes[Arg] = *Ptr;
+        AsymmetricParamTypes[Arg] = *Ptr;
         if (Arg == 0 || Op == OO_Plus) {
           // operator+(T*, ptrdiff_t) or operator-(T*, ptrdiff_t)
           // T* operator+(ptrdiff_t, T*);
-          S.AddBuiltinCandidate(*Ptr, AsymetricParamTypes, Args, CandidateSet);
+          S.AddBuiltinCandidate(*Ptr, AsymmetricParamTypes, Args, CandidateSet);
         }
         if (Op == OO_Minus) {
           // ptrdiff_t operator-(T, T);
@@ -8013,7 +8121,7 @@ public:
         const MemberPointerType *mptr = cast<MemberPointerType>(*MemPtr);
         QualType C2 = QualType(mptr->getClass(), 0);
         C2 = C2.getUnqualifiedType();
-        if (C1 != C2 && !S.IsDerivedFrom(C1, C2))
+        if (C1 != C2 && !S.IsDerivedFrom(CandidateSet.getLocation(), C1, C2))
           break;
         QualType ParamTypes[2] = { *Ptr, *MemPtr };
         // build CV12 T&
@@ -8157,9 +8265,11 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op,
 
   case OO_Comma:
   case OO_Arrow:
+  case OO_Coawait:
     // C++ [over.match.oper]p3:
-    //   -- For the operator ',', the unary operator '&', or the
-    //      operator '->', the built-in candidates set is empty.
+    //   -- For the operator ',', the unary operator '&', the
+    //      operator '->', or the operator 'co_await', the
+    //      built-in candidates set is empty.
     break;
 
   case OO_Plus: // '+' is either unary or binary
@@ -8328,6 +8438,44 @@ Sema::AddArgumentDependentLookupCandidates(DeclarationName Name,
   }
 }
 
+// Determines whether Cand1 is "better" in terms of its enable_if attrs than
+// Cand2 for overloading. This function assumes that all of the enable_if attrs
+// on Cand1 and Cand2 have conditions that evaluate to true.
+//
+// Cand1's set of enable_if attributes are said to be "better" than Cand2's iff
+// Cand1's first N enable_if attributes have precisely the same conditions as
+// Cand2's first N enable_if attributes (where N = the number of enable_if
+// attributes on Cand2), and Cand1 has more than N enable_if attributes.
+static bool hasBetterEnableIfAttrs(Sema &S, const FunctionDecl *Cand1,
+                                   const FunctionDecl *Cand2) {
+
+  // FIXME: The next several lines are just
+  // specific_attr_iterator<EnableIfAttr> but going in declaration order,
+  // instead of reverse order which is how they're stored in the AST.
+  auto Cand1Attrs = getOrderedEnableIfAttrs(Cand1);
+  auto Cand2Attrs = getOrderedEnableIfAttrs(Cand2);
+
+  // Candidate 1 is better if it has strictly more attributes and
+  // the common sequence is identical.
+  if (Cand1Attrs.size() <= Cand2Attrs.size())
+    return false;
+
+  auto Cand1I = Cand1Attrs.begin();
+  llvm::FoldingSetNodeID Cand1ID, Cand2ID;
+  for (auto &Cand2A : Cand2Attrs) {
+    Cand1ID.clear();
+    Cand2ID.clear();
+
+    auto &Cand1A = *Cand1I++;
+    Cand1A->getCond()->Profile(Cand1ID, S.getASTContext(), true);
+    Cand2A->getCond()->Profile(Cand2ID, S.getASTContext(), true);
+    if (Cand1ID != Cand2ID)
+      return false;
+  }
+
+  return true;
+}
+
 /// isBetterOverloadCandidate - Determines whether the first overload
 /// candidate is a better candidate than the second (C++ 13.3.3p1).
 bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
@@ -8359,7 +8507,7 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
   assert(Cand2.NumConversions == NumArgs && "Overload candidate mismatch");
   bool HasBetterConversion = false;
   for (unsigned ArgIdx = StartArg; ArgIdx < NumArgs; ++ArgIdx) {
-    switch (CompareImplicitConversionSequences(S,
+    switch (CompareImplicitConversionSequences(S, Loc,
                                                Cand1.Conversions[ArgIdx],
                                                Cand2.Conversions[ArgIdx])) {
     case ImplicitConversionSequence::Better:
@@ -8398,7 +8546,7 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
     ImplicitConversionSequence::CompareKind Result =
         compareConversionFunctions(S, Cand1.Function, Cand2.Function);
     if (Result == ImplicitConversionSequence::Indistinguishable)
-      Result = CompareStandardConversionSequences(S,
+      Result = CompareStandardConversionSequences(S, Loc,
                                                   Cand1.FinalConversion,
                                                   Cand2.FinalConversion);
 
@@ -8438,51 +8586,90 @@ bool clang::isBetterOverloadCandidate(Sema &S, const OverloadCandidate &Cand1,
   // Check for enable_if value-based overload resolution.
   if (Cand1.Function && Cand2.Function &&
       (Cand1.Function->hasAttr<EnableIfAttr>() ||
-       Cand2.Function->hasAttr<EnableIfAttr>())) {
-    // FIXME: The next several lines are just
-    // specific_attr_iterator<EnableIfAttr> but going in declaration order,
-    // instead of reverse order which is how they're stored in the AST.
-    AttrVec Cand1Attrs;
-    if (Cand1.Function->hasAttrs()) {
-      Cand1Attrs = Cand1.Function->getAttrs();
-      Cand1Attrs.erase(std::remove_if(Cand1Attrs.begin(), Cand1Attrs.end(),
-                                      IsNotEnableIfAttr),
-                       Cand1Attrs.end());
-      std::reverse(Cand1Attrs.begin(), Cand1Attrs.end());
-    }
+       Cand2.Function->hasAttr<EnableIfAttr>()))
+    return hasBetterEnableIfAttrs(S, Cand1.Function, Cand2.Function);
 
-    AttrVec Cand2Attrs;
-    if (Cand2.Function->hasAttrs()) {
-      Cand2Attrs = Cand2.Function->getAttrs();
-      Cand2Attrs.erase(std::remove_if(Cand2Attrs.begin(), Cand2Attrs.end(),
-                                      IsNotEnableIfAttr),
-                       Cand2Attrs.end());
-      std::reverse(Cand2Attrs.begin(), Cand2Attrs.end());
-    }
-
-    // Candidate 1 is better if it has strictly more attributes and
-    // the common sequence is identical.
-    if (Cand1Attrs.size() <= Cand2Attrs.size())
-      return false;
-
-    auto Cand1I = Cand1Attrs.begin();
-    for (auto &Cand2A : Cand2Attrs) {
-      auto &Cand1A = *Cand1I++;
-      llvm::FoldingSetNodeID Cand1ID, Cand2ID;
-      cast<EnableIfAttr>(Cand1A)->getCond()->Profile(Cand1ID,
-                                                     S.getASTContext(), true);
-      cast<EnableIfAttr>(Cand2A)->getCond()->Profile(Cand2ID,
-                                                     S.getASTContext(), true);
-      if (Cand1ID != Cand2ID)
-        return false;
-    }
-
-    return true;
+  if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
+      Cand1.Function && Cand2.Function) {
+    FunctionDecl *Caller = dyn_cast<FunctionDecl>(S.CurContext);
+    return S.IdentifyCUDAPreference(Caller, Cand1.Function) >
+           S.IdentifyCUDAPreference(Caller, Cand2.Function);
   }
 
+  bool HasPS1 = Cand1.Function != nullptr &&
+                functionHasPassObjectSizeParams(Cand1.Function);
+  bool HasPS2 = Cand2.Function != nullptr &&
+                functionHasPassObjectSizeParams(Cand2.Function);
+  return HasPS1 != HasPS2 && HasPS1;
+}
+
+/// Determine whether two declarations are "equivalent" for the purposes of
+/// name lookup and overload resolution. This applies when the same internal/no
+/// linkage entity is defined by two modules (probably by textually including
+/// the same header). In such a case, we don't consider the declarations to
+/// declare the same entity, but we also don't want lookups with both
+/// declarations visible to be ambiguous in some cases (this happens when using
+/// a modularized libstdc++).
+bool Sema::isEquivalentInternalLinkageDeclaration(const NamedDecl *A,
+                                                  const NamedDecl *B) {
+  auto *VA = dyn_cast_or_null<ValueDecl>(A);
+  auto *VB = dyn_cast_or_null<ValueDecl>(B);
+  if (!VA || !VB)
+    return false;
+
+  // The declarations must be declaring the same name as an internal linkage
+  // entity in different modules.
+  if (!VA->getDeclContext()->getRedeclContext()->Equals(
+          VB->getDeclContext()->getRedeclContext()) ||
+      getOwningModule(const_cast<ValueDecl *>(VA)) ==
+          getOwningModule(const_cast<ValueDecl *>(VB)) ||
+      VA->isExternallyVisible() || VB->isExternallyVisible())
+    return false;
+
+  // Check that the declarations appear to be equivalent.
+  //
+  // FIXME: Checking the type isn't really enough to resolve the ambiguity.
+  // For constants and functions, we should check the initializer or body is
+  // the same. For non-constant variables, we shouldn't allow it at all.
+  if (Context.hasSameType(VA->getType(), VB->getType()))
+    return true;
+
+  // Enum constants within unnamed enumerations will have different types, but
+  // may still be similar enough to be interchangeable for our purposes.
+  if (auto *EA = dyn_cast<EnumConstantDecl>(VA)) {
+    if (auto *EB = dyn_cast<EnumConstantDecl>(VB)) {
+      // Only handle anonymous enums. If the enumerations were named and
+      // equivalent, they would have been merged to the same type.
+      auto *EnumA = cast<EnumDecl>(EA->getDeclContext());
+      auto *EnumB = cast<EnumDecl>(EB->getDeclContext());
+      if (EnumA->hasNameForLinkage() || EnumB->hasNameForLinkage() ||
+          !Context.hasSameType(EnumA->getIntegerType(),
+                               EnumB->getIntegerType()))
+        return false;
+      // Allow this only if the value is the same for both enumerators.
+      return llvm::APSInt::isSameValue(EA->getInitVal(), EB->getInitVal());
+    }
+  }
+
+  // Nothing else is sufficiently similar.
   return false;
 }
 
+void Sema::diagnoseEquivalentInternalLinkageDeclarations(
+    SourceLocation Loc, const NamedDecl *D, ArrayRef<const NamedDecl *> Equiv) {
+  Diag(Loc, diag::ext_equivalent_internal_linkage_decl_in_modules) << D;
+
+  Module *M = getOwningModule(const_cast<NamedDecl*>(D));
+  Diag(D->getLocation(), diag::note_equivalent_internal_linkage_decl)
+      << !M << (M ? M->getFullModuleName() : "");
+
+  for (auto *E : Equiv) {
+    Module *M = getOwningModule(const_cast<NamedDecl*>(E));
+    Diag(E->getLocation(), diag::note_equivalent_internal_linkage_decl)
+        << !M << (M ? M->getFullModuleName() : "");
+  }
+}
+
 /// \brief Computes the best viable function (C++ 13.3.3)
 /// within an overload candidate set.
 ///
@@ -8510,6 +8697,8 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
   if (Best == end())
     return OR_No_Viable_Function;
 
+  llvm::SmallVector<const NamedDecl *, 4> EquivalentCands;
+
   // Make sure that this function is better than every other viable
   // function. If not, we have an ambiguity.
   for (iterator Cand = begin(); Cand != end(); ++Cand) {
@@ -8517,6 +8706,12 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
         Cand != Best &&
         !isBetterOverloadCandidate(S, *Best, *Cand, Loc,
                                    UserDefinedConversion)) {
+      if (S.isEquivalentInternalLinkageDeclaration(Best->Function,
+                                                   Cand->Function)) {
+        EquivalentCands.push_back(Cand->Function);
+        continue;
+      }
+
       Best = end();
       return OR_Ambiguous;
     }
@@ -8528,6 +8723,10 @@ OverloadCandidateSet::BestViableFunction(Sema &S, SourceLocation Loc,
        S.isFunctionConsideredUnavailable(Best->Function)))
     return OR_Deleted;
 
+  if (!EquivalentCands.empty())
+    S.diagnoseEquivalentInternalLinkageDeclarations(Loc, Best->Function,
+                                                    EquivalentCands);
+
   return OR_Success;
 }
 
@@ -8608,12 +8807,85 @@ void MaybeEmitInheritedConstructorNote(Sema &S, Decl *Fn) {
 
 } // end anonymous namespace
 
+static bool isFunctionAlwaysEnabled(const ASTContext &Ctx,
+                                    const FunctionDecl *FD) {
+  for (auto *EnableIf : FD->specific_attrs<EnableIfAttr>()) {
+    bool AlwaysTrue;
+    if (!EnableIf->getCond()->EvaluateAsBooleanCondition(AlwaysTrue, Ctx))
+      return false;
+    if (!AlwaysTrue)
+      return false;
+  }
+  return true;
+}
+
+/// \brief Returns true if we can take the address of the function.
+///
+/// \param Complain - If true, we'll emit a diagnostic
+/// \param InOverloadResolution - For the purposes of emitting a diagnostic, are
+///   we in overload resolution?
+/// \param Loc - The location of the statement we're complaining about. Ignored
+///   if we're not complaining, or if we're in overload resolution.
+static bool checkAddressOfFunctionIsAvailable(Sema &S, const FunctionDecl *FD,
+                                              bool Complain,
+                                              bool InOverloadResolution,
+                                              SourceLocation Loc) {
+  if (!isFunctionAlwaysEnabled(S.Context, FD)) {
+    if (Complain) {
+      if (InOverloadResolution)
+        S.Diag(FD->getLocStart(),
+               diag::note_addrof_ovl_candidate_disabled_by_enable_if_attr);
+      else
+        S.Diag(Loc, diag::err_addrof_function_disabled_by_enable_if_attr) << FD;
+    }
+    return false;
+  }
+
+  auto I = std::find_if(FD->param_begin(), FD->param_end(),
+                        std::mem_fn(&ParmVarDecl::hasAttr<PassObjectSizeAttr>));
+  if (I == FD->param_end())
+    return true;
+
+  if (Complain) {
+    // Add one to ParamNo because it's user-facing
+    unsigned ParamNo = std::distance(FD->param_begin(), I) + 1;
+    if (InOverloadResolution)
+      S.Diag(FD->getLocation(),
+             diag::note_ovl_candidate_has_pass_object_size_params)
+          << ParamNo;
+    else
+      S.Diag(Loc, diag::err_address_of_function_with_pass_object_size_params)
+          << FD << ParamNo;
+  }
+  return false;
+}
+
+static bool checkAddressOfCandidateIsAvailable(Sema &S,
+                                               const FunctionDecl *FD) {
+  return checkAddressOfFunctionIsAvailable(S, FD, /*Complain=*/true,
+                                           /*InOverloadResolution=*/true,
+                                           /*Loc=*/SourceLocation());
+}
+
+bool Sema::checkAddressOfFunctionIsAvailable(const FunctionDecl *Function,
+                                             bool Complain,
+                                             SourceLocation Loc) {
+  return ::checkAddressOfFunctionIsAvailable(*this, Function, Complain,
+                                             /*InOverloadResolution=*/false,
+                                             Loc);
+}
+
 // Notes the location of an overload candidate.
-void Sema::NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType) {
+void Sema::NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType,
+                                 bool TakingAddress) {
+  if (TakingAddress && !checkAddressOfCandidateIsAvailable(*this, Fn))
+    return;
+
   std::string FnDesc;
   OverloadCandidateKind K = ClassifyOverloadCandidate(*this, Fn, FnDesc);
   PartialDiagnostic PD = PDiag(diag::note_ovl_candidate)
                              << (unsigned) K << FnDesc;
+
   HandleFunctionTypeMismatch(PD, Fn->getType(), DestType);
   Diag(Fn->getLocation(), PD);
   MaybeEmitInheritedConstructorNote(*this, Fn);
@@ -8621,7 +8893,8 @@ void Sema::NoteOverloadCandidate(FunctionDecl *Fn, QualType DestType) {
 
 // Notes the location of all overload candidates designated through
 // OverloadedExpr
-void Sema::NoteAllOverloadCandidates(Expr* OverloadedExpr, QualType DestType) {
+void Sema::NoteAllOverloadCandidates(Expr *OverloadedExpr, QualType DestType,
+                                     bool TakingAddress) {
   assert(OverloadedExpr->getType() == Context.OverloadTy);
 
   OverloadExpr::FindResult Ovl = OverloadExpr::find(OverloadedExpr);
@@ -8632,10 +8905,11 @@ void Sema::NoteAllOverloadCandidates(Expr* OverloadedExpr, QualType DestType) {
        I != IEnd; ++I) {
     if (FunctionTemplateDecl *FunTmpl = 
                 dyn_cast<FunctionTemplateDecl>((*I)->getUnderlyingDecl()) ) {
-      NoteOverloadCandidate(FunTmpl->getTemplatedDecl(), DestType);
+      NoteOverloadCandidate(FunTmpl->getTemplatedDecl(), DestType,
+                            TakingAddress);
     } else if (FunctionDecl *Fun 
                       = dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()) ) {
-      NoteOverloadCandidate(Fun, DestType);
+      NoteOverloadCandidate(Fun, DestType, TakingAddress);
     }
   }
 }
@@ -8666,7 +8940,7 @@ void ImplicitConversionSequence::DiagnoseAmbiguousConversion(
 }
 
 static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
-                                  unsigned I) {
+                                  unsigned I, bool TakingCandidateAddress) {
   const ImplicitConversionSequence &Conv = Cand->Conversions[I];
   assert(Conv.isBad());
   assert(Cand->Function && "for now, candidate must be a function");
@@ -8808,7 +9082,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
                                                FromPtrTy->getPointeeType()) &&
           !FromPtrTy->getPointeeType()->isIncompleteType() &&
           !ToPtrTy->getPointeeType()->isIncompleteType() &&
-          S.IsDerivedFrom(ToPtrTy->getPointeeType(),
+          S.IsDerivedFrom(SourceLocation(), ToPtrTy->getPointeeType(),
                           FromPtrTy->getPointeeType()))
         BaseToDerivedConversion = 1;
     }
@@ -8826,7 +9100,7 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
     if (ToRefTy->getPointeeType().isAtLeastAsQualifiedAs(FromTy) &&
         !FromTy->isIncompleteType() &&
         !ToRefTy->getPointeeType()->isIncompleteType() &&
-        S.IsDerivedFrom(ToRefTy->getPointeeType(), FromTy)) {
+        S.IsDerivedFrom(SourceLocation(), ToRefTy->getPointeeType(), FromTy)) {
       BaseToDerivedConversion = 3;
     } else if (ToTy->isLValueReferenceType() && !FromExpr->isLValue() &&
                ToTy.getNonReferenceType().getCanonicalType() ==
@@ -8864,7 +9138,11 @@ static void DiagnoseBadConversion(Sema &S, OverloadCandidate *Cand,
         return;
       }
   }
-  
+
+  if (TakingCandidateAddress &&
+      !checkAddressOfCandidateIsAvailable(S, Cand->Function))
+    return;
+
   // Emit the generic diagnostic and, optionally, add the hints to it.
   PartialDiagnostic FDiag = S.PDiag(diag::note_ovl_candidate_bad_conv);
   FDiag << (unsigned) FnKind << FnDesc
@@ -8975,7 +9253,8 @@ static TemplateDecl *getDescribedTemplate(Decl *Templated) {
 /// Diagnose a failed template-argument deduction.
 static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
                                  DeductionFailureInfo &DeductionFailure,
-                                 unsigned NumArgs) {
+                                 unsigned NumArgs,
+                                 bool TakingCandidateAddress) {
   TemplateParameter Param = DeductionFailure.getTemplateParameter();
   NamedDecl *ParamD;
   (ParamD = Param.dyn_cast<TemplateTypeParmDecl*>()) ||
@@ -9143,6 +9422,11 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
         }
       }
     }
+
+    if (TakingCandidateAddress && isa<FunctionDecl>(Templated) &&
+        !checkAddressOfCandidateIsAvailable(S, cast<FunctionDecl>(Templated)))
+      return;
+
     // FIXME: For generic lambda parameters, check if the function is a lambda
     // call operator, and if so, emit a prettier and more informative 
     // diagnostic that mentions 'auto' and lambda in addition to 
@@ -9163,14 +9447,15 @@ static void DiagnoseBadDeduction(Sema &S, Decl *Templated,
 
 /// Diagnose a failed template-argument deduction, for function calls.
 static void DiagnoseBadDeduction(Sema &S, OverloadCandidate *Cand,
-                                 unsigned NumArgs) {
+                                 unsigned NumArgs,
+                                 bool TakingCandidateAddress) {
   unsigned TDK = Cand->DeductionFailure.Result;
   if (TDK == Sema::TDK_TooFewArguments || TDK == Sema::TDK_TooManyArguments) {
     if (CheckArityMismatch(S, Cand, NumArgs))
       return;
   }
   DiagnoseBadDeduction(S, Cand->Function, // pattern
-                       Cand->DeductionFailure, NumArgs);
+                       Cand->DeductionFailure, NumArgs, TakingCandidateAddress);
 }
 
 /// CUDA: diagnose an invalid call across targets.
@@ -9251,7 +9536,8 @@ static void DiagnoseFailedEnableIfAttr(Sema &S, OverloadCandidate *Cand) {
 /// more richly for those diagnostic clients that cared, but we'd
 /// still have to be just as careful with the default diagnostics.
 static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
-                                  unsigned NumArgs) {
+                                  unsigned NumArgs,
+                                  bool TakingCandidateAddress) {
   FunctionDecl *Fn = Cand->Function;
 
   // Note deleted candidates, but only if they're viable.
@@ -9279,7 +9565,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
     return DiagnoseArityMismatch(S, Cand, NumArgs);
 
   case ovl_fail_bad_deduction:
-    return DiagnoseBadDeduction(S, Cand, NumArgs);
+    return DiagnoseBadDeduction(S, Cand, NumArgs, TakingCandidateAddress);
 
   case ovl_fail_illegal_constructor: {
     S.Diag(Fn->getLocation(), diag::note_ovl_candidate_illegal_constructor)
@@ -9297,7 +9583,7 @@ static void NoteFunctionCandidate(Sema &S, OverloadCandidate *Cand,
     unsigned I = (Cand->IgnoreObjectArgument ? 1 : 0);
     for (unsigned N = Cand->NumConversions; I != N; ++I)
       if (Cand->Conversions[I].isBad())
-        return DiagnoseBadConversion(S, Cand, I);
+        return DiagnoseBadConversion(S, Cand, I, TakingCandidateAddress);
 
     // FIXME: this currently happens when we're called from SemaInit
     // when user-conversion overload fails.  Figure out how to handle
@@ -9421,9 +9707,10 @@ static unsigned RankDeductionFailure(const DeductionFailureInfo &DFI) {
 namespace {
 struct CompareOverloadCandidatesForDisplay {
   Sema &S;
+  SourceLocation Loc;
   size_t NumArgs;
 
-  CompareOverloadCandidatesForDisplay(Sema &S, size_t nArgs)
+  CompareOverloadCandidatesForDisplay(Sema &S, SourceLocation Loc, size_t nArgs)
       : S(S), NumArgs(nArgs) {}
 
   bool operator()(const OverloadCandidate *L,
@@ -9494,7 +9781,7 @@ struct CompareOverloadCandidatesForDisplay {
         int leftBetter = 0;
         unsigned I = (L->IgnoreObjectArgument || R->IgnoreObjectArgument);
         for (unsigned E = L->NumConversions; I != E; ++I) {
-          switch (CompareImplicitConversionSequences(S,
+          switch (CompareImplicitConversionSequences(S, Loc,
                                                      L->Conversions[I],
                                                      R->Conversions[I])) {
           case ImplicitConversionSequence::Better:
@@ -9649,7 +9936,7 @@ void OverloadCandidateSet::NoteCandidates(Sema &S,
   }
 
   std::sort(Cands.begin(), Cands.end(),
-            CompareOverloadCandidatesForDisplay(S, Args.size()));
+            CompareOverloadCandidatesForDisplay(S, OpLoc, Args.size()));
 
   bool ReportedAmbiguousConversions = false;
 
@@ -9668,7 +9955,8 @@ void OverloadCandidateSet::NoteCandidates(Sema &S,
     ++CandsShown;
 
     if (Cand->Function)
-      NoteFunctionCandidate(S, Cand, Args.size());
+      NoteFunctionCandidate(S, Cand, Args.size(),
+                            /*TakingCandidateAddress=*/false);
     else if (Cand->IsSurrogate)
       NoteSurrogateCandidate(S, Cand);
     else {
@@ -9736,9 +10024,10 @@ struct CompareTemplateSpecCandidatesForDisplay {
 /// Diagnose a template argument deduction failure.
 /// We are treating these failures as overload failures due to bad
 /// deductions.
-void TemplateSpecCandidate::NoteDeductionFailure(Sema &S) {
+void TemplateSpecCandidate::NoteDeductionFailure(Sema &S,
+                                                 bool ForTakingAddress) {
   DiagnoseBadDeduction(S, Specialization, // pattern
-                       DeductionFailure, /*NumArgs=*/0);
+                       DeductionFailure, /*NumArgs=*/0, ForTakingAddress);
 }
 
 void TemplateSpecCandidateSet::destroyCandidates() {
@@ -9791,7 +10080,7 @@ void TemplateSpecCandidateSet::NoteCandidates(Sema &S, SourceLocation Loc) {
 
     assert(Cand->Specialization &&
            "Non-matching built-in candidates are not added to Cands.");
-    Cand->NoteDeductionFailure(S);
+    Cand->NoteDeductionFailure(S, ForTakingAddress);
   }
 
   if (I != E)
@@ -9836,6 +10125,7 @@ class AddressOfFunctionResolver {
   bool TargetTypeIsNonStaticMemberFunction;
   bool FoundNonTemplateFunction;
   bool StaticMemberFunctionFromBoundPointer;
+  bool HasComplained;
 
   OverloadExpr::FindResult OvlExprInfo; 
   OverloadExpr *OvlExpr;
@@ -9852,9 +10142,10 @@ public:
             !!TargetType->getAs<MemberPointerType>()),
         FoundNonTemplateFunction(false),
         StaticMemberFunctionFromBoundPointer(false),
+        HasComplained(false),
         OvlExprInfo(OverloadExpr::find(SourceExpr)),
         OvlExpr(OvlExprInfo.Expression),
-        FailedCandidates(OvlExpr->getNameLoc()) {
+        FailedCandidates(OvlExpr->getNameLoc(), /*ForTakingAddress=*/true) {
     ExtractUnqualifiedFunctionTypeFromTargetType();
 
     if (TargetFunctionType->isFunctionType()) {
@@ -9885,21 +10176,57 @@ public:
     }
     
     if (OvlExpr->hasExplicitTemplateArgs())
-      OvlExpr->getExplicitTemplateArgs().copyInto(OvlExplicitTemplateArgs);
+      OvlExpr->copyTemplateArgumentsInto(OvlExplicitTemplateArgs);
 
     if (FindAllFunctionsThatMatchTargetTypeExactly()) {
       // C++ [over.over]p4:
       //   If more than one function is selected, [...]
-      if (Matches.size() > 1) {
+      if (Matches.size() > 1 && !eliminiateSuboptimalOverloadCandidates()) {
         if (FoundNonTemplateFunction)
           EliminateAllTemplateMatches();
         else
           EliminateAllExceptMostSpecializedTemplate();
       }
     }
+
+    if (S.getLangOpts().CUDA && S.getLangOpts().CUDATargetOverloads &&
+        Matches.size() > 1)
+      EliminateSuboptimalCudaMatches();
   }
-  
+
+  bool hasComplained() const { return HasComplained; }
+
 private:
+  // Is A considered a better overload candidate for the desired type than B?
+  bool isBetterCandidate(const FunctionDecl *A, const FunctionDecl *B) {
+    return hasBetterEnableIfAttrs(S, A, B);
+  }
+
+  // Returns true if we've eliminated any (read: all but one) candidates, false
+  // otherwise.
+  bool eliminiateSuboptimalOverloadCandidates() {
+    // Same algorithm as overload resolution -- one pass to pick the "best",
+    // another pass to be sure that nothing is better than the best.
+    auto Best = Matches.begin();
+    for (auto I = Matches.begin()+1, E = Matches.end(); I != E; ++I)
+      if (isBetterCandidate(I->second, Best->second))
+        Best = I;
+
+    const FunctionDecl *BestFn = Best->second;
+    auto IsBestOrInferiorToBest = [this, BestFn](
+        const std::pair<DeclAccessPair, FunctionDecl *> &Pair) {
+      return BestFn == Pair.second || isBetterCandidate(BestFn, Pair.second);
+    };
+
+    // Note: We explicitly leave Matches unmodified if there isn't a clear best
+    // option, so we can potentially give the user a better error
+    if (!std::all_of(Matches.begin(), Matches.end(), IsBestOrInferiorToBest))
+      return false;
+    Matches[0] = *Best;
+    Matches.resize(1);
+    return true;
+  }
+
   bool isTargetTypeAFunction() const {
     return TargetFunctionType->isFunctionType();
   }
@@ -9953,6 +10280,10 @@ private:
     assert(S.isSameOrCompatibleFunctionType(
               Context.getCanonicalType(Specialization->getType()),
               Context.getCanonicalType(TargetFunctionType)));
+
+    if (!S.checkAddressOfFunctionIsAvailable(Specialization))
+      return false;
+
     Matches.push_back(std::make_pair(CurAccessFunPair, Specialization));
     return true;
   }
@@ -9978,16 +10309,22 @@ private:
       // now.
       if (S.getLangOpts().CPlusPlus14 &&
           FunDecl->getReturnType()->isUndeducedType() &&
-          S.DeduceReturnType(FunDecl, SourceExpr->getLocStart(), Complain))
+          S.DeduceReturnType(FunDecl, SourceExpr->getLocStart(), Complain)) {
+        HasComplained |= Complain;
+        return false;
+      }
+
+      if (!S.checkAddressOfFunctionIsAvailable(FunDecl))
         return false;
 
       QualType ResultTy;
       if (Context.hasSameUnqualifiedType(TargetFunctionType, 
                                          FunDecl->getType()) ||
           S.IsNoReturnConversion(FunDecl->getType(), TargetFunctionType,
-                                 ResultTy)) {
-        Matches.push_back(std::make_pair(CurAccessFunPair,
-          cast<FunctionDecl>(FunDecl->getCanonicalDecl())));
+                                 ResultTy) ||
+          (!S.getLangOpts().CPlusPlus && TargetType->isVoidPointerType())) {
+        Matches.push_back(std::make_pair(
+            CurAccessFunPair, cast<FunctionDecl>(FunDecl->getCanonicalDecl())));
         FoundNonTemplateFunction = true;
         return true;
       }
@@ -10061,7 +10398,8 @@ private:
       Matches[0].first = Matches[Result - MatchesCopy.begin()].first;
       Matches[0].second = cast<FunctionDecl>(*Result);
       Matches.resize(1);
-    }
+    } else
+      HasComplained |= Complain;
   }
 
   void EliminateAllTemplateMatches() {
@@ -10072,11 +10410,15 @@ private:
         ++I;
       else {
         Matches[I] = Matches[--N];
-        Matches.set_size(N);
+        Matches.resize(N);
       }
     }
   }
 
+  void EliminateSuboptimalCudaMatches() {
+    S.EraseUnwantedCUDAMatches(dyn_cast<FunctionDecl>(S.CurContext), Matches);
+  }
+
 public:
   void ComplainNoMatchesFound() const {
     assert(Matches.empty());
@@ -10084,7 +10426,8 @@ public:
         << OvlExpr->getName() << TargetFunctionType
         << OvlExpr->getSourceRange();
     if (FailedCandidates.empty())
-      S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType);
+      S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType,
+                                  /*TakingAddress=*/true);
     else {
       // We have some deduction failure messages. Use them to diagnose
       // the function templates, and diagnose the non-template candidates
@@ -10094,7 +10437,9 @@ public:
            I != IEnd; ++I)
         if (FunctionDecl *Fun =
                 dyn_cast<FunctionDecl>((*I)->getUnderlyingDecl()))
-          S.NoteOverloadCandidate(Fun, TargetFunctionType);
+          if (!functionHasPassObjectSizeParams(Fun))
+            S.NoteOverloadCandidate(Fun, TargetFunctionType,
+                                    /*TakingAddress=*/true);
       FailedCandidates.NoteCandidates(S, OvlExpr->getLocStart());
     }
   }
@@ -10132,7 +10477,8 @@ public:
     S.Diag(OvlExpr->getLocStart(), diag::err_addr_ovl_ambiguous)
       << OvlExpr->getName()
       << OvlExpr->getSourceRange();
-    S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType);
+    S.NoteAllOverloadCandidates(OvlExpr, TargetFunctionType,
+                                /*TakingAddress=*/true);
   }
 
   bool hadMultipleCandidates() const { return (OvlExpr->getNumDecls() > 1); }
@@ -10178,13 +10524,14 @@ Sema::ResolveAddressOfOverloadedFunction(Expr *AddressOfExpr,
                                      Complain);
   int NumMatches = Resolver.getNumMatches();
   FunctionDecl *Fn = nullptr;
-  if (NumMatches == 0 && Complain) {
+  bool ShouldComplain = Complain && !Resolver.hasComplained();
+  if (NumMatches == 0 && ShouldComplain) {
     if (Resolver.IsInvalidFormOfPointerToMemberFunction())
       Resolver.ComplainIsInvalidFormOfPointerToMemberFunction();
     else
       Resolver.ComplainNoMatchesFound();
   }
-  else if (NumMatches > 1 && Complain)
+  else if (NumMatches > 1 && ShouldComplain)
     Resolver.ComplainMultipleMatchesFound();
   else if (NumMatches == 1) {
     Fn = Resolver.getMatchingFunctionDecl();
@@ -10229,7 +10576,7 @@ Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
     return nullptr;
 
   TemplateArgumentListInfo ExplicitTemplateArgs;
-  ovl->getExplicitTemplateArgs().copyInto(ExplicitTemplateArgs);
+  ovl->copyTemplateArgumentsInto(ExplicitTemplateArgs);
   TemplateSpecCandidateSet FailedCandidates(ovl->getNameLoc());
 
   // Look through all of the overloaded functions, searching for one
@@ -10303,7 +10650,7 @@ Sema::ResolveSingleFunctionTemplateSpecialization(OverloadExpr *ovl,
 // returns true if 'complain' is set.
 bool Sema::ResolveAndFixSingleFunctionTemplateSpecialization(
                       ExprResult &SrcExpr, bool doFunctionPointerConverion,
-                   bool complain, const SourceRange& OpRangeForComplaining, 
+                      bool complain, SourceRange OpRangeForComplaining, 
                                            QualType DestTypeForComplaining, 
                                             unsigned DiagIDForComplaining) {
   assert(SrcExpr.get()->getType() == Context.OverloadTy);
@@ -10678,8 +11025,8 @@ BuildRecoveryCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
   // casts and such from the call, we don't really care.
   ExprResult NewFn = ExprError();
   if ((*R.begin())->isCXXClassMember())
-    NewFn = SemaRef.BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc,
-                                                    R, ExplicitTemplateArgs);
+    NewFn = SemaRef.BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R,
+                                                    ExplicitTemplateArgs, S);
   else if (ExplicitTemplateArgs || TemplateKWLoc.isValid())
     NewFn = SemaRef.BuildTemplateIdExpr(SS, TemplateKWLoc, R, false,
                                         ExplicitTemplateArgs);
@@ -10749,6 +11096,8 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn,
       CallExpr *CE = new (Context) CallExpr(
           Context, Fn, Args, Context.DependentTy, VK_RValue, RParenLoc);
       CE->setTypeDependent(true);
+      CE->setValueDependent(true);
+      CE->setInstantiationDependent(true);
       *Result = CE;
       return true;
     }
@@ -10800,9 +11149,23 @@ static ExprResult FinishOverloadedCallExpr(Sema &SemaRef, Scope *S, Expr *Fn,
     if (!Recovery.isInvalid())
       return Recovery;
 
-    SemaRef.Diag(Fn->getLocStart(),
-         diag::err_ovl_no_viable_function_in_call)
-      << ULE->getName() << Fn->getSourceRange();
+    // If the user passes in a function that we can't take the address of, we
+    // generally end up emitting really bad error messages. Here, we attempt to
+    // emit better ones.
+    for (const Expr *Arg : Args) {
+      if (!Arg->getType()->isFunctionType())
+        continue;
+      if (auto *DRE = dyn_cast<DeclRefExpr>(Arg->IgnoreParenImpCasts())) {
+        auto *FD = dyn_cast<FunctionDecl>(DRE->getDecl());
+        if (FD &&
+            !SemaRef.checkAddressOfFunctionIsAvailable(FD, /*Complain=*/true,
+                                                       Arg->getExprLoc()))
+          return ExprError();
+      }
+    }
+
+    SemaRef.Diag(Fn->getLocStart(), diag::err_ovl_no_viable_function_in_call)
+        << ULE->getName() << Fn->getSourceRange();
     CandidateSet->NoteCandidates(SemaRef, OCD_AllCandidates, Args);
     break;
   }
@@ -10875,8 +11238,7 @@ static bool IsOverloaded(const UnresolvedSetImpl &Functions) {
 ///
 /// \param OpLoc The location of the operator itself (e.g., '*').
 ///
-/// \param OpcIn The UnaryOperator::Opcode that describes this
-/// operator.
+/// \param Opc The UnaryOperatorKind that describes this operator.
 ///
 /// \param Fns The set of non-member functions that will be
 /// considered by overload resolution. The caller needs to build this
@@ -10887,11 +11249,9 @@ static bool IsOverloaded(const UnresolvedSetImpl &Functions) {
 ///
 /// \param Input The input argument.
 ExprResult
-Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, unsigned OpcIn,
+Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
                               const UnresolvedSetImpl &Fns,
                               Expr *Input) {
-  UnaryOperator::Opcode Opc = static_cast<UnaryOperator::Opcode>(OpcIn);
-
   OverloadedOperatorKind Op = UnaryOperator::getOverloadedOperator(Opc);
   assert(Op != OO_None && "Invalid opcode for overloaded unary operator");
   DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op);
@@ -11062,8 +11422,7 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, unsigned OpcIn,
 ///
 /// \param OpLoc The location of the operator itself (e.g., '+').
 ///
-/// \param OpcIn The BinaryOperator::Opcode that describes this
-/// operator.
+/// \param Opc The BinaryOperatorKind that describes this operator.
 ///
 /// \param Fns The set of non-member functions that will be
 /// considered by overload resolution. The caller needs to build this
@@ -11076,13 +11435,12 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, unsigned OpcIn,
 /// \param RHS Right-hand argument.
 ExprResult
 Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
-                            unsigned OpcIn,
+                            BinaryOperatorKind Opc,
                             const UnresolvedSetImpl &Fns,
                             Expr *LHS, Expr *RHS) {
   Expr *Args[2] = { LHS, RHS };
   LHS=RHS=nullptr; // Please use only Args instead of LHS/RHS couple
 
-  BinaryOperator::Opcode Opc = static_cast<BinaryOperator::Opcode>(OpcIn);
   OverloadedOperatorKind Op = BinaryOperator::getOverloadedOperator(Opc);
   DeclarationName OpName = Context.DeclarationNames.getCXXOperatorName(Op);
 
@@ -11565,10 +11923,6 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
         << (qualsString.find(' ') == std::string::npos ? 1 : 2);
     }
 
-    if (resultType->isMemberPointerType())
-      if (Context.getTargetInfo().getCXXABI().isMicrosoft())
-        RequireCompleteType(LParenLoc, resultType, 0);
-
     CXXMemberCallExpr *call
       = new (Context) CXXMemberCallExpr(Context, MemExprE, Args,
                                         resultType, valueKind, RParenLoc);
@@ -11767,18 +12121,39 @@ Sema::BuildCallToMemberFunction(Scope *S, Expr *MemExprE,
   if (CheckFunctionCall(Method, TheCall, Proto))
     return ExprError();
 
+  // In the case the method to call was not selected by the overloading
+  // resolution process, we still need to handle the enable_if attribute. Do
+  // that here, so it will not hide previous -- and more relevant -- errors
+  if (isa<MemberExpr>(NakedMemExpr)) {
+    if (const EnableIfAttr *Attr = CheckEnableIf(Method, Args, true)) {
+      Diag(MemExprE->getLocStart(),
+           diag::err_ovl_no_viable_member_function_in_call)
+          << Method << Method->getSourceRange();
+      Diag(Method->getLocation(),
+           diag::note_ovl_candidate_disabled_by_enable_if_attr)
+          << Attr->getCond()->getSourceRange() << Attr->getMessage();
+      return ExprError();
+    }
+  }
+
   if ((isa<CXXConstructorDecl>(CurContext) || 
        isa<CXXDestructorDecl>(CurContext)) && 
       TheCall->getMethodDecl()->isPure()) {
     const CXXMethodDecl *MD = TheCall->getMethodDecl();
 
-    if (isa<CXXThisExpr>(MemExpr->getBase()->IgnoreParenCasts())) {
-      Diag(MemExpr->getLocStart(), 
+    if (isa<CXXThisExpr>(MemExpr->getBase()->IgnoreParenCasts()) &&
+        MemExpr->performsVirtualDispatch(getLangOpts())) {
+      Diag(MemExpr->getLocStart(),
            diag::warn_call_to_pure_virtual_member_function_from_ctor_dtor)
         << MD->getDeclName() << isa<CXXDestructorDecl>(CurContext)
         << MD->getParent()->getDeclName();
 
       Diag(MD->getLocStart(), diag::note_previous_decl) << MD->getDeclName();
+      if (getLangOpts().AppleKext)
+        Diag(MemExpr->getLocStart(),
+             diag::note_pure_qualified_call_kext)
+             << MD->getParent()->getDeclName()
+             << MD->getDeclName();
     }
   }
   return MaybeBindToTemporary(TheCall);
@@ -12267,13 +12642,14 @@ ExprResult Sema::BuildLiteralOperatorCall(LookupResult &R,
 /// otherwise CallExpr is set to ExprError() and some non-success value
 /// is returned.
 Sema::ForRangeStatus
-Sema::BuildForRangeBeginEndCall(Scope *S, SourceLocation Loc,
-                                SourceLocation RangeLoc, VarDecl *Decl,
-                                BeginEndFunction BEF,
+Sema::BuildForRangeBeginEndCall(SourceLocation Loc,
+                                SourceLocation RangeLoc,
                                 const DeclarationNameInfo &NameInfo,
                                 LookupResult &MemberLookup,
                                 OverloadCandidateSet *CandidateSet,
                                 Expr *Range, ExprResult *CallExpr) {
+  Scope *S = nullptr;
+
   CandidateSet->clear();
   if (!MemberLookup.empty()) {
     ExprResult MemberRef =
@@ -12282,18 +12658,14 @@ Sema::BuildForRangeBeginEndCall(Scope *S, SourceLocation Loc,
                                  /*TemplateKWLoc=*/SourceLocation(),
                                  /*FirstQualifierInScope=*/nullptr,
                                  MemberLookup,
-                                 /*TemplateArgs=*/nullptr);
+                                 /*TemplateArgs=*/nullptr, S);
     if (MemberRef.isInvalid()) {
       *CallExpr = ExprError();
-      Diag(Range->getLocStart(), diag::note_in_for_range)
-          << RangeLoc << BEF << Range->getType();
       return FRS_DiagnosticIssued;
     }
     *CallExpr = ActOnCallExpr(S, MemberRef.get(), Loc, None, Loc, nullptr);
     if (CallExpr->isInvalid()) {
       *CallExpr = ExprError();
-      Diag(Range->getLocStart(), diag::note_in_for_range)
-          << RangeLoc << BEF << Range->getType();
       return FRS_DiagnosticIssued;
     }
   } else {
@@ -12324,8 +12696,6 @@ Sema::BuildForRangeBeginEndCall(Scope *S, SourceLocation Loc,
                                          /*AllowTypoCorrection=*/false);
     if (CallExpr->isInvalid() || OverloadResult != OR_Success) {
       *CallExpr = ExprError();
-      Diag(Range->getLocStart(), diag::note_in_for_range)
-          << RangeLoc << BEF << Range->getType();
       return FRS_DiagnosticIssued;
     }
   }
diff --git a/lib/Sema/SemaPseudoObject.cpp b/lib/Sema/SemaPseudoObject.cpp
index fec97488f531..e5d51f173caa 100644
--- a/lib/Sema/SemaPseudoObject.cpp
+++ b/lib/Sema/SemaPseudoObject.cpp
@@ -44,17 +44,76 @@ using namespace sema;
 
 namespace {
   // Basically just a very focused copy of TreeTransform.
-  template <class T> struct Rebuilder {
+  struct Rebuilder {
     Sema &S;
-    Rebuilder(Sema &S) : S(S) {}
+    unsigned MSPropertySubscriptCount;
+    typedef llvm::function_ref<Expr *(Expr *, unsigned)> SpecificRebuilderRefTy;
+    const SpecificRebuilderRefTy &SpecificCallback;
+    Rebuilder(Sema &S, const SpecificRebuilderRefTy &SpecificCallback)
+        : S(S), MSPropertySubscriptCount(0),
+          SpecificCallback(SpecificCallback) {}
 
-    T &getDerived() { return static_cast<T&>(*this); }
+    Expr *rebuildObjCPropertyRefExpr(ObjCPropertyRefExpr *refExpr) {
+      // Fortunately, the constraint that we're rebuilding something
+      // with a base limits the number of cases here.
+      if (refExpr->isClassReceiver() || refExpr->isSuperReceiver())
+        return refExpr;
+
+      if (refExpr->isExplicitProperty()) {
+        return new (S.Context) ObjCPropertyRefExpr(
+            refExpr->getExplicitProperty(), refExpr->getType(),
+            refExpr->getValueKind(), refExpr->getObjectKind(),
+            refExpr->getLocation(), SpecificCallback(refExpr->getBase(), 0));
+      }
+      return new (S.Context) ObjCPropertyRefExpr(
+          refExpr->getImplicitPropertyGetter(),
+          refExpr->getImplicitPropertySetter(), refExpr->getType(),
+          refExpr->getValueKind(), refExpr->getObjectKind(),
+          refExpr->getLocation(), SpecificCallback(refExpr->getBase(), 0));
+    }
+    Expr *rebuildObjCSubscriptRefExpr(ObjCSubscriptRefExpr *refExpr) {
+      assert(refExpr->getBaseExpr());
+      assert(refExpr->getKeyExpr());
+
+      return new (S.Context) ObjCSubscriptRefExpr(
+          SpecificCallback(refExpr->getBaseExpr(), 0),
+          SpecificCallback(refExpr->getKeyExpr(), 1), refExpr->getType(),
+          refExpr->getValueKind(), refExpr->getObjectKind(),
+          refExpr->getAtIndexMethodDecl(), refExpr->setAtIndexMethodDecl(),
+          refExpr->getRBracket());
+    }
+    Expr *rebuildMSPropertyRefExpr(MSPropertyRefExpr *refExpr) {
+      assert(refExpr->getBaseExpr());
+
+      return new (S.Context) MSPropertyRefExpr(
+          SpecificCallback(refExpr->getBaseExpr(), 0),
+          refExpr->getPropertyDecl(), refExpr->isArrow(), refExpr->getType(),
+          refExpr->getValueKind(), refExpr->getQualifierLoc(),
+          refExpr->getMemberLoc());
+    }
+    Expr *rebuildMSPropertySubscriptExpr(MSPropertySubscriptExpr *refExpr) {
+      assert(refExpr->getBase());
+      assert(refExpr->getIdx());
+
+      auto *NewBase = rebuild(refExpr->getBase());
+      ++MSPropertySubscriptCount;
+      return new (S.Context) MSPropertySubscriptExpr(
+          NewBase,
+          SpecificCallback(refExpr->getIdx(), MSPropertySubscriptCount),
+          refExpr->getType(), refExpr->getValueKind(), refExpr->getObjectKind(),
+          refExpr->getRBracketLoc());
+    }
 
     Expr *rebuild(Expr *e) {
       // Fast path: nothing to look through.
-      if (typename T::specific_type *specific
-            = dyn_cast<typename T::specific_type>(e))
-        return getDerived().rebuildSpecific(specific);
+      if (auto *PRE = dyn_cast<ObjCPropertyRefExpr>(e))
+        return rebuildObjCPropertyRefExpr(PRE);
+      if (auto *SRE = dyn_cast<ObjCSubscriptRefExpr>(e))
+        return rebuildObjCSubscriptRefExpr(SRE);
+      if (auto *MSPRE = dyn_cast<MSPropertyRefExpr>(e))
+        return rebuildMSPropertyRefExpr(MSPRE);
+      if (auto *MSPSE = dyn_cast<MSPropertySubscriptExpr>(e))
+        return rebuildMSPropertySubscriptExpr(MSPSE);
 
       // Otherwise, we should look through and rebuild anything that
       // IgnoreParens would.
@@ -125,72 +184,6 @@ namespace {
     }
   };
 
-  struct ObjCPropertyRefRebuilder : Rebuilder<ObjCPropertyRefRebuilder> {
-    Expr *NewBase;
-    ObjCPropertyRefRebuilder(Sema &S, Expr *newBase)
-      : Rebuilder<ObjCPropertyRefRebuilder>(S), NewBase(newBase) {}
-
-    typedef ObjCPropertyRefExpr specific_type;
-    Expr *rebuildSpecific(ObjCPropertyRefExpr *refExpr) {
-      // Fortunately, the constraint that we're rebuilding something
-      // with a base limits the number of cases here.
-      assert(refExpr->isObjectReceiver());
-
-      if (refExpr->isExplicitProperty()) {
-        return new (S.Context)
-          ObjCPropertyRefExpr(refExpr->getExplicitProperty(),
-                              refExpr->getType(), refExpr->getValueKind(),
-                              refExpr->getObjectKind(), refExpr->getLocation(),
-                              NewBase);
-      }
-      return new (S.Context)
-        ObjCPropertyRefExpr(refExpr->getImplicitPropertyGetter(),
-                            refExpr->getImplicitPropertySetter(),
-                            refExpr->getType(), refExpr->getValueKind(),
-                            refExpr->getObjectKind(),refExpr->getLocation(),
-                            NewBase);
-    }
-  };
-
-  struct ObjCSubscriptRefRebuilder : Rebuilder<ObjCSubscriptRefRebuilder> {
-    Expr *NewBase;
-    Expr *NewKeyExpr;
-    ObjCSubscriptRefRebuilder(Sema &S, Expr *newBase, Expr *newKeyExpr)
-    : Rebuilder<ObjCSubscriptRefRebuilder>(S), 
-      NewBase(newBase), NewKeyExpr(newKeyExpr) {}
-    
-    typedef ObjCSubscriptRefExpr specific_type;
-    Expr *rebuildSpecific(ObjCSubscriptRefExpr *refExpr) {
-      assert(refExpr->getBaseExpr());
-      assert(refExpr->getKeyExpr());
-      
-      return new (S.Context)
-        ObjCSubscriptRefExpr(NewBase,
-                             NewKeyExpr,
-                             refExpr->getType(), refExpr->getValueKind(),
-                             refExpr->getObjectKind(),refExpr->getAtIndexMethodDecl(),
-                             refExpr->setAtIndexMethodDecl(),
-                             refExpr->getRBracket());
-    }
-  };
-
-  struct MSPropertyRefRebuilder : Rebuilder<MSPropertyRefRebuilder> {
-    Expr *NewBase;
-    MSPropertyRefRebuilder(Sema &S, Expr *newBase)
-    : Rebuilder<MSPropertyRefRebuilder>(S), NewBase(newBase) {}
-
-    typedef MSPropertyRefExpr specific_type;
-    Expr *rebuildSpecific(MSPropertyRefExpr *refExpr) {
-      assert(refExpr->getBaseExpr());
-
-      return new (S.Context)
-        MSPropertyRefExpr(NewBase, refExpr->getPropertyDecl(),
-                       refExpr->isArrow(), refExpr->getType(),
-                       refExpr->getValueKind(), refExpr->getQualifierLoc(),
-                       refExpr->getMemberLoc());
-    }
-  };
-  
   class PseudoOpBuilder {
   public:
     Sema &S;
@@ -236,7 +229,7 @@ namespace {
     }
 
     /// Return true if assignments have a non-void result.
-    bool CanCaptureValue(Expr *exp) {
+    static bool CanCaptureValue(Expr *exp) {
       if (exp->isGLValue())
         return true;
       QualType ty = exp->getType();
@@ -252,6 +245,20 @@ namespace {
     virtual ExprResult buildGet() = 0;
     virtual ExprResult buildSet(Expr *, SourceLocation,
                                 bool captureSetValueAsResult) = 0;
+    /// \brief Should the result of an assignment be the formal result of the
+    /// setter call or the value that was passed to the setter?
+    ///
+    /// Different pseudo-object language features use different language rules
+    /// for this.
+    /// The default is to use the set value.  Currently, this affects the
+    /// behavior of simple assignments, compound assignments, and prefix
+    /// increment and decrement.
+    /// Postfix increment and decrement always use the getter result as the
+    /// expression result.
+    ///
+    /// If this method returns true, and the set value isn't capturable for
+    /// some reason, the result of the expression will be void.
+    virtual bool captureSetValueAsResult() const { return true; }
   };
 
   /// A PseudoOpBuilder for Objective-C \@properties.
@@ -328,15 +335,25 @@ namespace {
 
  class MSPropertyOpBuilder : public PseudoOpBuilder {
    MSPropertyRefExpr *RefExpr;
+   OpaqueValueExpr *InstanceBase;
+   SmallVector<Expr *, 4> CallArgs;
+
+   MSPropertyRefExpr *getBaseMSProperty(MSPropertySubscriptExpr *E);
 
  public:
    MSPropertyOpBuilder(Sema &S, MSPropertyRefExpr *refExpr) :
      PseudoOpBuilder(S, refExpr->getSourceRange().getBegin()),
-     RefExpr(refExpr) {}
+     RefExpr(refExpr), InstanceBase(nullptr) {}
+   MSPropertyOpBuilder(Sema &S, MSPropertySubscriptExpr *refExpr)
+       : PseudoOpBuilder(S, refExpr->getSourceRange().getBegin()),
+         InstanceBase(nullptr) {
+     RefExpr = getBaseMSProperty(refExpr);
+   }
 
    Expr *rebuildAndCaptureObject(Expr *) override;
    ExprResult buildGet() override;
    ExprResult buildSet(Expr *op, SourceLocation, bool) override;
+   bool captureSetValueAsResult() const override { return false; }
  };
 }
 
@@ -406,19 +423,27 @@ PseudoOpBuilder::buildAssignmentOperation(Scope *Sc, SourceLocation opcLoc,
                                           BinaryOperatorKind opcode,
                                           Expr *LHS, Expr *RHS) {
   assert(BinaryOperator::isAssignmentOp(opcode));
-  
-  // Recover from user error
-  if (isa<UnresolvedLookupExpr>(RHS))
-    return ExprError();
 
   Expr *syntacticLHS = rebuildAndCaptureObject(LHS);
   OpaqueValueExpr *capturedRHS = capture(RHS);
 
+  // In some very specific cases, semantic analysis of the RHS as an
+  // expression may require it to be rewritten.  In these cases, we
+  // cannot safely keep the OVE around.  Fortunately, we don't really
+  // need to: we don't use this particular OVE in multiple places, and
+  // no clients rely that closely on matching up expressions in the
+  // semantic expression with expressions from the syntactic form.
+  Expr *semanticRHS = capturedRHS;
+  if (RHS->hasPlaceholderType() || isa<InitListExpr>(RHS)) {
+    semanticRHS = RHS;
+    Semantics.pop_back();
+  }
+
   Expr *syntactic;
 
   ExprResult result;
   if (opcode == BO_Assign) {
-    result = capturedRHS;
+    result = semanticRHS;
     syntactic = new (S.Context) BinaryOperator(syntacticLHS, capturedRHS,
                                                opcode, capturedRHS->getType(),
                                                capturedRHS->getValueKind(),
@@ -430,8 +455,7 @@ PseudoOpBuilder::buildAssignmentOperation(Scope *Sc, SourceLocation opcLoc,
     // Build an ordinary, non-compound operation.
     BinaryOperatorKind nonCompound =
       BinaryOperator::getOpForCompoundAssignment(opcode);
-    result = S.BuildBinOp(Sc, opcLoc, nonCompound,
-                          opLHS.get(), capturedRHS);
+    result = S.BuildBinOp(Sc, opcLoc, nonCompound, opLHS.get(), semanticRHS);
     if (result.isInvalid()) return ExprError();
 
     syntactic =
@@ -446,9 +470,12 @@ PseudoOpBuilder::buildAssignmentOperation(Scope *Sc, SourceLocation opcLoc,
 
   // The result of the assignment, if not void, is the value set into
   // the l-value.
-  result = buildSet(result.get(), opcLoc, /*captureSetValueAsResult*/ true);
+  result = buildSet(result.get(), opcLoc, captureSetValueAsResult());
   if (result.isInvalid()) return ExprError();
   addSemanticExpr(result.get());
+  if (!captureSetValueAsResult() && !result.get()->getType()->isVoidType() &&
+      (result.get()->isTypeDependent() || CanCaptureValue(result.get())))
+    setResultToLastSemantic();
 
   return complete(syntactic);
 }
@@ -490,9 +517,14 @@ PseudoOpBuilder::buildIncDecOperation(Scope *Sc, SourceLocation opcLoc,
 
   // Store that back into the result.  The value stored is the result
   // of a prefix operation.
-  result = buildSet(result.get(), opcLoc, UnaryOperator::isPrefix(opcode));
+  result = buildSet(result.get(), opcLoc, UnaryOperator::isPrefix(opcode) &&
+                                              captureSetValueAsResult());
   if (result.isInvalid()) return ExprError();
   addSemanticExpr(result.get());
+  if (UnaryOperator::isPrefix(opcode) && !captureSetValueAsResult() &&
+      !result.get()->getType()->isVoidType() &&
+      (result.get()->isTypeDependent() || CanCaptureValue(result.get())))
+    setResultToLastSemantic();
 
   UnaryOperator *syntactic =
     new (S.Context) UnaryOperator(syntacticOp, opcode, resultType,
@@ -666,9 +698,9 @@ Expr *ObjCPropertyOpBuilder::rebuildAndCaptureObject(Expr *syntacticBase) {
   // form to use the OVE as its base.
   if (RefExpr->isObjectReceiver()) {
     InstanceReceiver = capture(RefExpr->getBase());
-
-    syntacticBase =
-      ObjCPropertyRefRebuilder(S, InstanceReceiver).rebuild(syntacticBase);
+    syntacticBase = Rebuilder(S, [=](Expr *, unsigned) -> Expr * {
+                      return InstanceReceiver;
+                    }).rebuild(syntacticBase);
   }
 
   if (ObjCPropertyRefExpr *
@@ -745,16 +777,6 @@ ExprResult ObjCPropertyOpBuilder::buildSet(Expr *op, SourceLocation opcLoc,
       op = opResult.get();
       assert(op && "successful assignment left argument invalid?");
     }
-    else if (OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(op)) {
-      Expr *Initializer = OVE->getSourceExpr();
-      // passing C++11 style initialized temporaries to objc++ properties
-      // requires special treatment by removing OpaqueValueExpr so type
-      // conversion takes place and adding the OpaqueValueExpr later on.
-      if (isa<InitListExpr>(Initializer) &&
-          Initializer->getType()->isVoidType()) {
-        op = Initializer;
-      }
-    }
   }
 
   // Arguments.
@@ -996,11 +1018,19 @@ Expr *ObjCSubscriptOpBuilder::rebuildAndCaptureObject(Expr *syntacticBase) {
   // form to use the OVE as its base expression.
   InstanceBase = capture(RefExpr->getBaseExpr());
   InstanceKey = capture(RefExpr->getKeyExpr());
-    
+
   syntacticBase =
-    ObjCSubscriptRefRebuilder(S, InstanceBase, 
-                              InstanceKey).rebuild(syntacticBase);
-  
+      Rebuilder(S, [=](Expr *, unsigned Idx) -> Expr * {
+        switch (Idx) {
+        case 0:
+          return InstanceBase;
+        case 1:
+          return InstanceKey;
+        default:
+          llvm_unreachable("Unexpected index for ObjCSubscriptExpr");
+        }
+      }).rebuild(syntacticBase);
+
   return syntacticBase;
 }
 
@@ -1402,11 +1432,30 @@ ExprResult ObjCSubscriptOpBuilder::buildSet(Expr *op, SourceLocation opcLoc,
 //  MSVC __declspec(property) references
 //===----------------------------------------------------------------------===//
 
-Expr *MSPropertyOpBuilder::rebuildAndCaptureObject(Expr *syntacticBase) {
-  Expr *NewBase = capture(RefExpr->getBaseExpr());
+MSPropertyRefExpr *
+MSPropertyOpBuilder::getBaseMSProperty(MSPropertySubscriptExpr *E) {
+  CallArgs.insert(CallArgs.begin(), E->getIdx());
+  Expr *Base = E->getBase()->IgnoreParens();
+  while (auto *MSPropSubscript = dyn_cast<MSPropertySubscriptExpr>(Base)) {
+    CallArgs.insert(CallArgs.begin(), MSPropSubscript->getIdx());
+    Base = MSPropSubscript->getBase()->IgnoreParens();
+  }
+  return cast<MSPropertyRefExpr>(Base);
+}
 
-  syntacticBase =
-    MSPropertyRefRebuilder(S, NewBase).rebuild(syntacticBase);
+Expr *MSPropertyOpBuilder::rebuildAndCaptureObject(Expr *syntacticBase) {
+  InstanceBase = capture(RefExpr->getBaseExpr());
+  std::for_each(CallArgs.begin(), CallArgs.end(),
+                [this](Expr *&Arg) { Arg = capture(Arg); });
+  syntacticBase = Rebuilder(S, [=](Expr *, unsigned Idx) -> Expr * {
+                    switch (Idx) {
+                    case 0:
+                      return InstanceBase;
+                    default:
+                      assert(Idx <= CallArgs.size());
+                      return CallArgs[Idx - 1];
+                    }
+                  }).rebuild(syntacticBase);
 
   return syntacticBase;
 }
@@ -1423,10 +1472,10 @@ ExprResult MSPropertyOpBuilder::buildGet() {
   GetterName.setIdentifier(II, RefExpr->getMemberLoc());
   CXXScopeSpec SS;
   SS.Adopt(RefExpr->getQualifierLoc());
-  ExprResult GetterExpr = S.ActOnMemberAccessExpr(
-    S.getCurScope(), RefExpr->getBaseExpr(), SourceLocation(),
-    RefExpr->isArrow() ? tok::arrow : tok::period, SS, SourceLocation(),
-    GetterName, nullptr);
+  ExprResult GetterExpr =
+      S.ActOnMemberAccessExpr(S.getCurScope(), InstanceBase, SourceLocation(),
+                              RefExpr->isArrow() ? tok::arrow : tok::period, SS,
+                              SourceLocation(), GetterName, nullptr);
   if (GetterExpr.isInvalid()) {
     S.Diag(RefExpr->getMemberLoc(),
            diag::error_cannot_find_suitable_accessor) << 0 /* getter */
@@ -1434,9 +1483,8 @@ ExprResult MSPropertyOpBuilder::buildGet() {
     return ExprError();
   }
 
-  MultiExprArg ArgExprs;
   return S.ActOnCallExpr(S.getCurScope(), GetterExpr.get(),
-                         RefExpr->getSourceRange().getBegin(), ArgExprs,
+                         RefExpr->getSourceRange().getBegin(), CallArgs,
                          RefExpr->getSourceRange().getEnd());
 }
 
@@ -1453,10 +1501,10 @@ ExprResult MSPropertyOpBuilder::buildSet(Expr *op, SourceLocation sl,
   SetterName.setIdentifier(II, RefExpr->getMemberLoc());
   CXXScopeSpec SS;
   SS.Adopt(RefExpr->getQualifierLoc());
-  ExprResult SetterExpr = S.ActOnMemberAccessExpr(
-    S.getCurScope(), RefExpr->getBaseExpr(), SourceLocation(),
-    RefExpr->isArrow() ? tok::arrow : tok::period, SS, SourceLocation(),
-    SetterName, nullptr);
+  ExprResult SetterExpr =
+      S.ActOnMemberAccessExpr(S.getCurScope(), InstanceBase, SourceLocation(),
+                              RefExpr->isArrow() ? tok::arrow : tok::period, SS,
+                              SourceLocation(), SetterName, nullptr);
   if (SetterExpr.isInvalid()) {
     S.Diag(RefExpr->getMemberLoc(),
            diag::error_cannot_find_suitable_accessor) << 1 /* setter */
@@ -1464,7 +1512,8 @@ ExprResult MSPropertyOpBuilder::buildSet(Expr *op, SourceLocation sl,
     return ExprError();
   }
 
-  SmallVector<Expr*, 1> ArgExprs;
+  SmallVector<Expr*, 4> ArgExprs;
+  ArgExprs.append(CallArgs.begin(), CallArgs.end());
   ArgExprs.push_back(op);
   return S.ActOnCallExpr(S.getCurScope(), SetterExpr.get(),
                          RefExpr->getSourceRange().getBegin(), ArgExprs,
@@ -1490,6 +1539,10 @@ ExprResult Sema::checkPseudoObjectRValue(Expr *E) {
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
     MSPropertyOpBuilder builder(*this, refExpr);
     return builder.buildRValueOperation(E);
+  } else if (MSPropertySubscriptExpr *RefExpr =
+                 dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
+    MSPropertyOpBuilder Builder(*this, RefExpr);
+    return Builder.buildRValueOperation(E);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
   }
@@ -1516,6 +1569,10 @@ ExprResult Sema::checkPseudoObjectIncDec(Scope *Sc, SourceLocation opcLoc,
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
     MSPropertyOpBuilder builder(*this, refExpr);
     return builder.buildIncDecOperation(Sc, opcLoc, opcode, op);
+  } else if (MSPropertySubscriptExpr *RefExpr
+             = dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
+    MSPropertyOpBuilder Builder(*this, RefExpr);
+    return Builder.buildIncDecOperation(Sc, opcLoc, opcode, op);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
   }
@@ -1547,8 +1604,12 @@ ExprResult Sema::checkPseudoObjectAssignment(Scope *S, SourceLocation opcLoc,
     return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else if (MSPropertyRefExpr *refExpr
              = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
-    MSPropertyOpBuilder builder(*this, refExpr);
-    return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
+      MSPropertyOpBuilder builder(*this, refExpr);
+      return builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
+  } else if (MSPropertySubscriptExpr *RefExpr
+             = dyn_cast<MSPropertySubscriptExpr>(opaqueRef)) {
+      MSPropertyOpBuilder Builder(*this, RefExpr);
+      return Builder.buildAssignmentOperation(S, opcLoc, opcode, LHS, RHS);
   } else {
     llvm_unreachable("unknown pseudo-object kind!");
   }
@@ -1558,29 +1619,11 @@ ExprResult Sema::checkPseudoObjectAssignment(Scope *S, SourceLocation opcLoc,
 /// values.  Basically, undo the behavior of rebuildAndCaptureObject.
 /// This should never operate in-place.
 static Expr *stripOpaqueValuesFromPseudoObjectRef(Sema &S, Expr *E) {
-  Expr *opaqueRef = E->IgnoreParens();
-  if (ObjCPropertyRefExpr *refExpr
-        = dyn_cast<ObjCPropertyRefExpr>(opaqueRef)) {
-    // Class and super property references don't have opaque values in them.
-    if (refExpr->isClassReceiver() || refExpr->isSuperReceiver())
-      return E;
-    
-    assert(refExpr->isObjectReceiver() && "Unknown receiver kind?");
-    OpaqueValueExpr *baseOVE = cast<OpaqueValueExpr>(refExpr->getBase());
-    return ObjCPropertyRefRebuilder(S, baseOVE->getSourceExpr()).rebuild(E);
-  } else if (ObjCSubscriptRefExpr *refExpr
-               = dyn_cast<ObjCSubscriptRefExpr>(opaqueRef)) {
-    OpaqueValueExpr *baseOVE = cast<OpaqueValueExpr>(refExpr->getBaseExpr());
-    OpaqueValueExpr *keyOVE = cast<OpaqueValueExpr>(refExpr->getKeyExpr());
-    return ObjCSubscriptRefRebuilder(S, baseOVE->getSourceExpr(), 
-                                     keyOVE->getSourceExpr()).rebuild(E);
-  } else if (MSPropertyRefExpr *refExpr
-             = dyn_cast<MSPropertyRefExpr>(opaqueRef)) {
-    OpaqueValueExpr *baseOVE = cast<OpaqueValueExpr>(refExpr->getBaseExpr());
-    return MSPropertyRefRebuilder(S, baseOVE->getSourceExpr()).rebuild(E);
-  } else {
-    llvm_unreachable("unknown pseudo-object kind!");
-  }
+  return Rebuilder(S,
+                   [=](Expr *E, unsigned) -> Expr * {
+                     return cast<OpaqueValueExpr>(E)->getSourceExpr();
+                   })
+      .rebuild(E);
 }
 
 /// Given a pseudo-object expression, recreate what it looks like
diff --git a/lib/Sema/SemaStmt.cpp b/lib/Sema/SemaStmt.cpp
index c4f6fd8df1c4..e1b1a47e182b 100644
--- a/lib/Sema/SemaStmt.cpp
+++ b/lib/Sema/SemaStmt.cpp
@@ -195,7 +195,7 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
   if (isUnevaluatedContext())
     return;
 
-  SourceLocation ExprLoc = E->IgnoreParens()->getExprLoc();
+  SourceLocation ExprLoc = E->IgnoreParenImpCasts()->getExprLoc();
   // In most cases, we don't want to warn if the expression is written in a
   // macro body, or if the macro comes from a system header. If the offending
   // expression is a call to a function with the warn_unused_result attribute,
@@ -218,6 +218,15 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S) {
   if (isa<StmtExpr>(E) && Loc.isMacroID())
     return;
 
+  // Check if this is the UNREFERENCED_PARAMETER from the Microsoft headers.
+  // That macro is frequently used to suppress "unused parameter" warnings,
+  // but its implementation makes clang's -Wunused-value fire.  Prevent this.
+  if (isa<ParenExpr>(E->IgnoreImpCasts()) && Loc.isMacroID()) {
+    SourceLocation SpellLoc = Loc;
+    if (findMacroSpelling(SpellLoc, "UNREFERENCED_PARAMETER"))
+      return;
+  }
+
   // Okay, we have an unused result.  Depending on what the base expression is,
   // we might want to make a more specific diagnostic.  Check for one of these
   // cases now.
@@ -483,13 +492,6 @@ StmtResult
 Sema::ActOnIfStmt(SourceLocation IfLoc, FullExprArg CondVal, Decl *CondVar,
                   Stmt *thenStmt, SourceLocation ElseLoc,
                   Stmt *elseStmt) {
-  // If the condition was invalid, discard the if statement.  We could recover
-  // better by replacing it with a valid expr, but don't do that yet.
-  if (!CondVal.get() && !CondVar) {
-    getCurFunction()->setHasDroppedStmt();
-    return StmtError();
-  }
-
   ExprResult CondResult(CondVal.release());
 
   VarDecl *ConditionVar = nullptr;
@@ -497,22 +499,23 @@ Sema::ActOnIfStmt(SourceLocation IfLoc, FullExprArg CondVal, Decl *CondVar,
     ConditionVar = cast<VarDecl>(CondVar);
     CondResult = CheckConditionVariable(ConditionVar, IfLoc, true);
     CondResult = ActOnFinishFullExpr(CondResult.get(), IfLoc);
-    if (CondResult.isInvalid())
-      return StmtError();
   }
   Expr *ConditionExpr = CondResult.getAs<Expr>();
-  if (!ConditionExpr)
-    return StmtError();
+  if (ConditionExpr) {
+    DiagnoseUnusedExprResult(thenStmt);
 
-  DiagnoseUnusedExprResult(thenStmt);
+    if (!elseStmt) {
+      DiagnoseEmptyStmtBody(ConditionExpr->getLocEnd(), thenStmt,
+                            diag::warn_empty_if_body);
+    }
 
-  if (!elseStmt) {
-    DiagnoseEmptyStmtBody(ConditionExpr->getLocEnd(), thenStmt,
-                          diag::warn_empty_if_body);
+    DiagnoseUnusedExprResult(elseStmt);
+  } else {
+    // Create a dummy Expr for the condition for error recovery
+    ConditionExpr = new (Context) OpaqueValueExpr(SourceLocation(),
+                                                  Context.BoolTy, VK_RValue);
   }
 
-  DiagnoseUnusedExprResult(elseStmt);
-
   return new (Context) IfStmt(Context, IfLoc, ConditionVar, ConditionExpr,
                               thenStmt, ElseLoc, elseStmt);
 }
@@ -698,8 +701,6 @@ static bool ShouldDiagnoseSwitchCaseNotInEnum(const Sema &S,
                                               EnumValsTy::iterator &EI,
                                               EnumValsTy::iterator &EIEnd,
                                               const llvm::APSInt &Val) {
-  bool FlagType = ED->hasAttr<FlagEnumAttr>();
-
   if (const DeclRefExpr *DRE =
           dyn_cast<DeclRefExpr>(CaseExpr->IgnoreParenImpCasts())) {
     if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
@@ -711,7 +712,7 @@ static bool ShouldDiagnoseSwitchCaseNotInEnum(const Sema &S,
     }
   }
 
-  if (FlagType) {
+  if (ED->hasAttr<FlagEnumAttr>()) {
     return !S.IsValueInFlagEnum(ED, Val, false);
   } else {
     while (EI != EIEnd && EI->first < Val)
@@ -1349,7 +1350,7 @@ namespace {
 
   }; // end class DeclExtractor
 
-  // DeclMatcher checks to see if the decls are used in a non-evauluated
+  // DeclMatcher checks to see if the decls are used in a non-evaluated
   // context.
   class DeclMatcher : public EvaluatedExprVisitor<DeclMatcher> {
     llvm::SmallPtrSetImpl<VarDecl*> &Decls;
@@ -1705,11 +1706,10 @@ Sema::CheckObjCForCollectionOperand(SourceLocation forLoc, Expr *collection) {
   // If we have a forward-declared type, we can't do this check.
   // Under ARC, it is an error not to have a forward-declared class.
   if (iface &&
-      RequireCompleteType(forLoc, QualType(objectType, 0),
-                          getLangOpts().ObjCAutoRefCount
-                            ? diag::err_arc_collection_forward
-                            : 0,
-                          collection)) {
+      (getLangOpts().ObjCAutoRefCount
+           ? RequireCompleteType(forLoc, QualType(objectType, 0),
+                                 diag::err_arc_collection_forward, collection)
+           : !isCompleteType(forLoc, QualType(objectType, 0)))) {
     // Otherwise, if we have any useful type information, check that
     // the type declares the appropriate method.
   } else if (iface || !objectType->qual_empty()) {
@@ -1867,13 +1867,19 @@ static bool FinishForRangeVarDecl(Sema &SemaRef, VarDecl *Decl, Expr *Init,
 }
 
 namespace {
+// An enum to represent whether something is dealing with a call to begin()
+// or a call to end() in a range-based for loop.
+enum BeginEndFunction {
+  BEF_begin,
+  BEF_end
+};
 
 /// Produce a note indicating which begin/end function was implicitly called
 /// by a C++11 for-range statement. This is often not obvious from the code,
 /// nor from the diagnostics produced when analysing the implicit expressions
 /// required in a for-range statement.
 void NoteForRangeBeginEndFunction(Sema &SemaRef, Expr *E,
-                                  Sema::BeginEndFunction BEF) {
+                                  BeginEndFunction BEF) {
   CallExpr *CE = dyn_cast<CallExpr>(E);
   if (!CE)
     return;
@@ -1931,10 +1937,11 @@ static bool ObjCEnumerationCollection(Expr *Collection) {
 ///
 /// The body of the loop is not available yet, since it cannot be analysed until
 /// we have determined the type of the for-range-declaration.
-StmtResult
-Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc,
-                           Stmt *First, SourceLocation ColonLoc, Expr *Range,
-                           SourceLocation RParenLoc, BuildForRangeKind Kind) {
+StmtResult Sema::ActOnCXXForRangeStmt(Scope *S, SourceLocation ForLoc,
+                                      SourceLocation CoawaitLoc, Stmt *First,
+                                      SourceLocation ColonLoc, Expr *Range,
+                                      SourceLocation RParenLoc,
+                                      BuildForRangeKind Kind) {
   if (!First)
     return StmtError();
 
@@ -1956,6 +1963,13 @@ Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc,
     return StmtError();
   }
 
+  // Coroutines: 'for co_await' implicitly co_awaits its range.
+  if (CoawaitLoc.isValid()) {
+    ExprResult Coawait = ActOnCoawaitExpr(S, CoawaitLoc, Range);
+    if (Coawait.isInvalid()) return StmtError();
+    Range = Coawait.get();
+  }
+
   // Build  auto && __range = range-init
   SourceLocation RangeLoc = Range->getLocStart();
   VarDecl *RangeVar = BuildForRangeVarDecl(*this, RangeLoc,
@@ -1977,7 +1991,7 @@ Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc,
     return StmtError();
   }
 
-  return BuildCXXForRangeStmt(ForLoc, ColonLoc, RangeDecl.get(),
+  return BuildCXXForRangeStmt(ForLoc, CoawaitLoc, ColonLoc, RangeDecl.get(),
                               /*BeginEndDecl=*/nullptr, /*Cond=*/nullptr,
                               /*Inc=*/nullptr, DS, RParenLoc, Kind);
 }
@@ -1991,7 +2005,7 @@ Sema::ActOnCXXForRangeStmt(SourceLocation ForLoc,
 /// BeginExpr and EndExpr are set and FRS_Success is returned on success;
 /// CandidateSet and BEF are set and some non-success value is returned on
 /// failure.
-static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Scope *S,
+static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef,
                                             Expr *BeginRange, Expr *EndRange,
                                             QualType RangeType,
                                             VarDecl *BeginVar,
@@ -2000,7 +2014,7 @@ static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Scope *S,
                                             OverloadCandidateSet *CandidateSet,
                                             ExprResult *BeginExpr,
                                             ExprResult *EndExpr,
-                                            Sema::BeginEndFunction *BEF) {
+                                            BeginEndFunction *BEF) {
   DeclarationNameInfo BeginNameInfo(
       &SemaRef.PP.getIdentifierTable().get("begin"), ColonLoc);
   DeclarationNameInfo EndNameInfo(&SemaRef.PP.getIdentifierTable().get("end"),
@@ -2021,7 +2035,7 @@ static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Scope *S,
 
     if (BeginMemberLookup.empty() != EndMemberLookup.empty()) {
       SourceLocation RangeLoc = BeginVar->getLocation();
-      *BEF = BeginMemberLookup.empty() ? Sema::BEF_end : Sema::BEF_begin;
+      *BEF = BeginMemberLookup.empty() ? BEF_end : BEF_begin;
 
       SemaRef.Diag(RangeLoc, diag::err_for_range_member_begin_end_mismatch)
           << RangeLoc << BeginRange->getType() << *BEF;
@@ -2035,29 +2049,35 @@ static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Scope *S,
 
   }
 
-  *BEF = Sema::BEF_begin;
+  *BEF = BEF_begin;
   Sema::ForRangeStatus RangeStatus =
-      SemaRef.BuildForRangeBeginEndCall(S, ColonLoc, ColonLoc, BeginVar,
-                                        Sema::BEF_begin, BeginNameInfo,
+      SemaRef.BuildForRangeBeginEndCall(ColonLoc, ColonLoc, BeginNameInfo,
                                         BeginMemberLookup, CandidateSet,
                                         BeginRange, BeginExpr);
 
-  if (RangeStatus != Sema::FRS_Success)
+  if (RangeStatus != Sema::FRS_Success) {
+    if (RangeStatus == Sema::FRS_DiagnosticIssued)
+      SemaRef.Diag(BeginRange->getLocStart(), diag::note_in_for_range)
+          << ColonLoc << BEF_begin << BeginRange->getType();
     return RangeStatus;
+  }
   if (FinishForRangeVarDecl(SemaRef, BeginVar, BeginExpr->get(), ColonLoc,
                             diag::err_for_range_iter_deduction_failure)) {
     NoteForRangeBeginEndFunction(SemaRef, BeginExpr->get(), *BEF);
     return Sema::FRS_DiagnosticIssued;
   }
 
-  *BEF = Sema::BEF_end;
+  *BEF = BEF_end;
   RangeStatus =
-      SemaRef.BuildForRangeBeginEndCall(S, ColonLoc, ColonLoc, EndVar,
-                                        Sema::BEF_end, EndNameInfo,
+      SemaRef.BuildForRangeBeginEndCall(ColonLoc, ColonLoc, EndNameInfo,
                                         EndMemberLookup, CandidateSet,
                                         EndRange, EndExpr);
-  if (RangeStatus != Sema::FRS_Success)
+  if (RangeStatus != Sema::FRS_Success) {
+    if (RangeStatus == Sema::FRS_DiagnosticIssued)
+      SemaRef.Diag(EndRange->getLocStart(), diag::note_in_for_range)
+          << ColonLoc << BEF_end << EndRange->getType();
     return RangeStatus;
+  }
   if (FinishForRangeVarDecl(SemaRef, EndVar, EndExpr->get(), ColonLoc,
                             diag::err_for_range_iter_deduction_failure)) {
     NoteForRangeBeginEndFunction(SemaRef, EndExpr->get(), *BEF);
@@ -2071,6 +2091,7 @@ static Sema::ForRangeStatus BuildNonArrayForRange(Sema &SemaRef, Scope *S,
 /// and emit no diagnostics.
 static StmtResult RebuildForRangeWithDereference(Sema &SemaRef, Scope *S,
                                                  SourceLocation ForLoc,
+                                                 SourceLocation CoawaitLoc,
                                                  Stmt *LoopVarDecl,
                                                  SourceLocation ColonLoc,
                                                  Expr *Range,
@@ -2086,10 +2107,9 @@ static StmtResult RebuildForRangeWithDereference(Sema &SemaRef, Scope *S,
     if (AdjustedRange.isInvalid())
       return StmtResult();
 
-    StmtResult SR =
-      SemaRef.ActOnCXXForRangeStmt(ForLoc, LoopVarDecl, ColonLoc,
-                                   AdjustedRange.get(), RParenLoc,
-                                   Sema::BFRK_Check);
+    StmtResult SR = SemaRef.ActOnCXXForRangeStmt(
+        S, ForLoc, CoawaitLoc, LoopVarDecl, ColonLoc, AdjustedRange.get(),
+        RParenLoc, Sema::BFRK_Check);
     if (SR.isInvalid())
       return StmtResult();
   }
@@ -2099,8 +2119,8 @@ static StmtResult RebuildForRangeWithDereference(Sema &SemaRef, Scope *S,
   // case there are any other (non-fatal) problems with it.
   SemaRef.Diag(RangeLoc, diag::err_for_range_dereference)
     << Range->getType() << FixItHint::CreateInsertion(RangeLoc, "*");
-  return SemaRef.ActOnCXXForRangeStmt(ForLoc, LoopVarDecl, ColonLoc,
-                                      AdjustedRange.get(), RParenLoc,
+  return SemaRef.ActOnCXXForRangeStmt(S, ForLoc, CoawaitLoc, LoopVarDecl,
+                                      ColonLoc, AdjustedRange.get(), RParenLoc,
                                       Sema::BFRK_Rebuild);
 }
 
@@ -2122,10 +2142,20 @@ struct InvalidateOnErrorScope {
 
 /// BuildCXXForRangeStmt - Build or instantiate a C++11 for-range statement.
 StmtResult
-Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc,
+Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation CoawaitLoc,
+                           SourceLocation ColonLoc,
                            Stmt *RangeDecl, Stmt *BeginEnd, Expr *Cond,
                            Expr *Inc, Stmt *LoopVarDecl,
                            SourceLocation RParenLoc, BuildForRangeKind Kind) {
+  // FIXME: This should not be used during template instantiation. We should
+  // pick up the set of unqualified lookup results for the != and + operators
+  // in the initial parse.
+  //
+  // Testcase (accepts-invalid):
+  //   template<typename T> void f() { for (auto x : T()) {} }
+  //   namespace N { struct X { X begin(); X end(); int operator*(); }; }
+  //   bool operator!=(N::X, N::X); void operator++(N::X);
+  //   void g() { f<N::X>(); }
   Scope *S = getCurScope();
 
   DeclStmt *RangeDS = cast<DeclStmt>(RangeDecl);
@@ -2225,9 +2255,9 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc,
     } else {
       OverloadCandidateSet CandidateSet(RangeLoc,
                                         OverloadCandidateSet::CSK_Normal);
-      Sema::BeginEndFunction BEFFailure;
+      BeginEndFunction BEFFailure;
       ForRangeStatus RangeStatus =
-          BuildNonArrayForRange(*this, S, BeginRangeRef.get(),
+          BuildNonArrayForRange(*this, BeginRangeRef.get(),
                                 EndRangeRef.get(), RangeType,
                                 BeginVar, EndVar, ColonLoc, &CandidateSet,
                                 &BeginExpr, &EndExpr, &BEFFailure);
@@ -2252,6 +2282,7 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc,
         // If building the range failed, try dereferencing the range expression
         // unless a diagnostic was issued or the end function is problematic.
         StmtResult SR = RebuildForRangeWithDereference(*this, S, ForLoc,
+                                                       CoawaitLoc,
                                                        LoopVarDecl, ColonLoc,
                                                        Range, RangeLoc,
                                                        RParenLoc);
@@ -2322,7 +2353,10 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc,
       return StmtError();
 
     IncrExpr = ActOnUnaryOp(S, ColonLoc, tok::plusplus, BeginRef.get());
-    IncrExpr = ActOnFinishFullExpr(IncrExpr.get());
+    if (!IncrExpr.isInvalid() && CoawaitLoc.isValid())
+      IncrExpr = ActOnCoawaitExpr(S, CoawaitLoc, IncrExpr.get());
+    if (!IncrExpr.isInvalid())
+      IncrExpr = ActOnFinishFullExpr(IncrExpr.get());
     if (IncrExpr.isInvalid()) {
       Diag(RangeLoc, diag::note_for_range_invalid_iterator)
         << RangeLoc << 2 << BeginRangeRef.get()->getType() ;
@@ -2361,7 +2395,8 @@ Sema::BuildCXXForRangeStmt(SourceLocation ForLoc, SourceLocation ColonLoc,
 
   return new (Context) CXXForRangeStmt(
       RangeDS, cast_or_null<DeclStmt>(BeginEndDecl.get()), NotEqExpr.get(),
-      IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, ColonLoc, RParenLoc);
+      IncrExpr.get(), LoopVarDS, /*Body=*/nullptr, ForLoc, CoawaitLoc,
+      ColonLoc, RParenLoc);
 }
 
 /// FinishObjCForCollectionStmt - Attach the body to a objective-C foreach
@@ -2919,6 +2954,9 @@ Sema::ActOnCapScopeReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   if (CurCap->HasImplicitReturnType || NRVOCandidate)
     FunctionScopes.back()->Returns.push_back(Result);
 
+  if (FunctionScopes.back()->FirstReturnLoc.isInvalid())
+    FunctionScopes.back()->FirstReturnLoc = ReturnLoc;
+
   return Result;
 }
 
@@ -2989,14 +3027,9 @@ bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD,
     //   statement with a non-type-dependent operand.
     assert(AT->isDeduced() && "should have deduced to dependent type");
     return false;
-  } else if (RetExpr) {
-    //  If the deduction is for a return statement and the initializer is
-    //  a braced-init-list, the program is ill-formed.
-    if (isa<InitListExpr>(RetExpr)) {
-      Diag(RetExpr->getExprLoc(), diag::err_auto_fn_return_init_list);
-      return true;
-    }
+  } 
 
+  if (RetExpr) {
     //  Otherwise, [...] deduce a value for U using the rules of template
     //  argument deduction.
     DeduceAutoResult DAR = DeduceAutoType(OrigResultType, RetExpr, Deduced);
@@ -3035,8 +3068,11 @@ bool Sema::DeduceFunctionTypeFromReturnExpr(FunctionDecl *FD,
   //  the program is ill-formed.
   if (AT->isDeduced() && !FD->isInvalidDecl()) {
     AutoType *NewAT = Deduced->getContainedAutoType();
-    if (!FD->isDependentContext() &&
-        !Context.hasSameType(AT->getDeducedType(), NewAT->getDeducedType())) {
+    CanQualType OldDeducedType = Context.getCanonicalFunctionResultType(
+                                   AT->getDeducedType());
+    CanQualType NewDeducedType = Context.getCanonicalFunctionResultType(
+                                   NewAT->getDeducedType());
+    if (!FD->isDependentContext() && OldDeducedType != NewDeducedType) {
       const LambdaScopeInfo *LambdaSI = getCurLambda();
       if (LambdaSI && LambdaSI->HasImplicitReturnType) {
         Diag(ReturnLoc, diag::err_typecheck_missing_return_type_incompatible)
@@ -3179,7 +3215,7 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
         }
         // return (some void expression); is legal in C++.
         else if (D != diag::ext_return_has_void_expr ||
-            !getLangOpts().CPlusPlus) {
+                 !getLangOpts().CPlusPlus) {
           NamedDecl *CurDecl = getCurFunctionOrMethodDecl();
 
           int FunctionKind = 0;
@@ -3287,6 +3323,9 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   if (Result->getNRVOCandidate())
     FunctionScopes.back()->Returns.push_back(Result);
 
+  if (FunctionScopes.back()->FirstReturnLoc.isInvalid())
+    FunctionScopes.back()->FirstReturnLoc = ReturnLoc;
+
   return Result;
 }
 
@@ -3512,16 +3551,14 @@ public:
   CXXCatchStmt *getFoundHandler() const { return FoundHandler; }
   CanQualType getFoundHandlerType() const { return FoundHandlerType; }
 
-  static bool FindPublicBasesOfType(const CXXBaseSpecifier *S, CXXBasePath &,
-                                    void *User) {
-    auto &PBOT = *reinterpret_cast<CatchTypePublicBases *>(User);
+  bool operator()(const CXXBaseSpecifier *S, CXXBasePath &) {
     if (S->getAccessSpecifier() == AccessSpecifier::AS_public) {
-      CatchHandlerType Check(S->getType(), PBOT.CheckAgainstPointer);
-      auto M = PBOT.TypesToCheck;
+      CatchHandlerType Check(S->getType(), CheckAgainstPointer);
+      auto M = TypesToCheck;
       auto I = M.find(Check);
       if (I != M.end()) {
-        PBOT.FoundHandler = I->second;
-        PBOT.FoundHandlerType = PBOT.Ctx.getCanonicalType(S->getType());
+        FoundHandler = I->second;
+        FoundHandlerType = Ctx.getCanonicalType(S->getType());
         return true;
       }
     }
@@ -3589,8 +3626,7 @@ StmtResult Sema::ActOnCXXTryBlock(SourceLocation TryLoc, Stmt *TryBlock,
       CXXBasePaths Paths;
       Paths.setOrigin(RD);
       CatchTypePublicBases CTPB(Context, HandledTypes, HandlerCHT.isPointer());
-      if (RD->lookupInBases(CatchTypePublicBases::FindPublicBasesOfType, &CTPB,
-                            Paths)) {
+      if (RD->lookupInBases(CTPB, Paths)) {
         const CXXCatchStmt *Problem = CTPB.getFoundHandler();
         if (!Paths.isAmbiguous(CTPB.getFoundHandlerType())) {
           Diag(H->getExceptionDecl()->getTypeSpecStartLoc(),
@@ -3766,11 +3802,10 @@ static void buildCapturedStmtCaptureList(
       continue;
     }
 
-    assert(Cap->isReferenceCapture() &&
-           "non-reference capture not yet implemented");
-
     Captures.push_back(CapturedStmt::Capture(Cap->getLocation(),
-                                             CapturedStmt::VCK_ByRef,
+                                             Cap->isReferenceCapture()
+                                                 ? CapturedStmt::VCK_ByRef
+                                                 : CapturedStmt::VCK_ByCopy,
                                              Cap->getVariable()));
     CaptureInits.push_back(Cap->getInitExpr());
   }
diff --git a/lib/Sema/SemaStmtAsm.cpp b/lib/Sema/SemaStmtAsm.cpp
index 8e3e89f1e572..0d6e0f8e41b0 100644
--- a/lib/Sema/SemaStmtAsm.cpp
+++ b/lib/Sema/SemaStmtAsm.cpp
@@ -107,6 +107,37 @@ static bool CheckNakedParmReference(Expr *E, Sema &S) {
   return false;
 }
 
+/// \brief Returns true if given expression is not compatible with inline
+/// assembly's memory constraint; false otherwise.
+static bool checkExprMemoryConstraintCompat(Sema &S, Expr *E,
+                                            TargetInfo::ConstraintInfo &Info,
+                                            bool is_input_expr) {
+  enum {
+    ExprBitfield = 0,
+    ExprVectorElt,
+    ExprGlobalRegVar,
+    ExprSafeType
+  } EType = ExprSafeType;
+
+  // Bitfields, vector elements and global register variables are not
+  // compatible.
+  if (E->refersToBitField())
+    EType = ExprBitfield;
+  else if (E->refersToVectorElement())
+    EType = ExprVectorElt;
+  else if (E->refersToGlobalRegisterVar())
+    EType = ExprGlobalRegVar;
+
+  if (EType != ExprSafeType) {
+    S.Diag(E->getLocStart(), diag::err_asm_non_addr_value_in_memory_constraint)
+        << EType << is_input_expr << Info.getConstraintStr()
+        << E->getSourceRange();
+    return true;
+  }
+
+  return false;
+}
+
 StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                                  bool IsVolatile, unsigned NumOutputs,
                                  unsigned NumInputs, IdentifierInfo **Names,
@@ -124,8 +155,14 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   // The parser verifies that there is a string literal here.
   assert(AsmString->isAscii());
 
-  bool ValidateConstraints =
-      DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl());
+  // If we're compiling CUDA file and function attributes indicate that it's not
+  // for this compilation side, skip all the checks.
+  if (!DeclAttrsMatchCUDAMode(getLangOpts(), getCurFunctionDecl())) {
+    GCCAsmStmt *NS = new (Context) GCCAsmStmt(
+        Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs, Names,
+        Constraints, Exprs.data(), AsmString, NumClobbers, Clobbers, RParenLoc);
+    return NS;
+  }
 
   for (unsigned i = 0; i != NumOutputs; i++) {
     StringLiteral *Literal = Constraints[i];
@@ -136,8 +173,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       OutputName = Names[i]->getName();
 
     TargetInfo::ConstraintInfo Info(Literal->getString(), OutputName);
-    if (ValidateConstraints &&
-        !Context.getTargetInfo().validateOutputConstraint(Info))
+    if (!Context.getTargetInfo().validateOutputConstraint(Info))
       return StmtError(Diag(Literal->getLocStart(),
                             diag::err_asm_invalid_output_constraint)
                        << Info.getConstraintStr());
@@ -154,13 +190,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
     if (CheckNakedParmReference(OutputExpr, *this))
       return StmtError();
 
-    // Bitfield can't be referenced with a pointer.
-    if (Info.allowsMemory() && OutputExpr->refersToBitField())
-      return StmtError(Diag(OutputExpr->getLocStart(),
-                            diag::err_asm_bitfield_in_memory_constraint)
-                       << 1
-                       << Info.getConstraintStr()
-                       << OutputExpr->getSourceRange());
+    // Check that the output expression is compatible with memory constraint.
+    if (Info.allowsMemory() &&
+        checkExprMemoryConstraintCompat(*this, OutputExpr, Info, false))
+      return StmtError();
 
     OutputConstraintInfos.push_back(Info);
 
@@ -219,9 +252,8 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
       InputName = Names[i]->getName();
 
     TargetInfo::ConstraintInfo Info(Literal->getString(), InputName);
-    if (ValidateConstraints &&
-        !Context.getTargetInfo().validateInputConstraint(
-            OutputConstraintInfos.data(), NumOutputs, Info)) {
+    if (!Context.getTargetInfo().validateInputConstraint(OutputConstraintInfos,
+                                                         Info)) {
       return StmtError(Diag(Literal->getLocStart(),
                             diag::err_asm_invalid_input_constraint)
                        << Info.getConstraintStr());
@@ -238,13 +270,10 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
     if (CheckNakedParmReference(InputExpr, *this))
       return StmtError();
 
-    // Bitfield can't be referenced with a pointer.
-    if (Info.allowsMemory() && InputExpr->refersToBitField())
-      return StmtError(Diag(InputExpr->getLocStart(),
-                            diag::err_asm_bitfield_in_memory_constraint)
-                       << 0
-                       << Info.getConstraintStr()
-                       << InputExpr->getSourceRange());
+    // Check that the input expression is compatible with memory constraint.
+    if (Info.allowsMemory() &&
+        checkExprMemoryConstraintCompat(*this, InputExpr, Info, true))
+      return StmtError();
 
     // Only allow void types for memory constraints.
     if (Info.allowsMemory() && !Info.allowsRegister()) {
@@ -260,8 +289,7 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
            return StmtError(
                Diag(InputExpr->getLocStart(), diag::err_asm_immediate_expected)
                 << Info.getConstraintStr() << InputExpr->getSourceRange());
-         if (Result.slt(Info.getImmConstantMin()) ||
-             Result.sgt(Info.getImmConstantMax()))
+         if (!Info.isValidAsmImmediate(Result))
            return StmtError(Diag(InputExpr->getLocStart(),
                                  diag::err_invalid_asm_value_for_constraint)
                             << Result.toString(10) << Info.getConstraintStr()
@@ -392,6 +420,8 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
                             diag::err_asm_unexpected_constraint_alternatives)
                        << NumAlternatives << AltCount);
   }
+  SmallVector<size_t, 4> InputMatchedToOutput(OutputConstraintInfos.size(),
+                                              ~0U);
   for (unsigned i = 0, e = InputConstraintInfos.size(); i != e; ++i) {
     TargetInfo::ConstraintInfo &Info = InputConstraintInfos[i];
     StringRef ConstraintStr = Info.getConstraintStr();
@@ -413,6 +443,19 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
     Expr *OutputExpr = Exprs[TiedTo];
     Expr *InputExpr = Exprs[InputOpNo];
 
+    // Make sure no more than one input constraint matches each output.
+    assert(TiedTo < InputMatchedToOutput.size() && "TiedTo value out of range");
+    if (InputMatchedToOutput[TiedTo] != ~0U) {
+      Diag(NS->getInputExpr(i)->getLocStart(),
+           diag::err_asm_input_duplicate_match)
+          << TiedTo;
+      Diag(NS->getInputExpr(InputMatchedToOutput[TiedTo])->getLocStart(),
+           diag::note_asm_input_duplicate_first)
+          << TiedTo;
+      return StmtError();
+    }
+    InputMatchedToOutput[TiedTo] = i;
+
     if (OutputExpr->isTypeDependent() || InputExpr->isTypeDependent())
       continue;
 
@@ -504,6 +547,17 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
   return NS;
 }
 
+static void fillInlineAsmTypeInfo(const ASTContext &Context, QualType T,
+                                  llvm::InlineAsmIdentifierInfo &Info) {
+  // Compute the type size (and array length if applicable?).
+  Info.Type = Info.Size = Context.getTypeSizeInChars(T).getQuantity();
+  if (T->isArrayType()) {
+    const ArrayType *ATy = Context.getAsArrayType(T);
+    Info.Type = Context.getTypeSizeInChars(ATy->getElementType()).getQuantity();
+    Info.Length = Info.Size / Info.Type;
+  }
+}
+
 ExprResult Sema::LookupInlineAsmIdentifier(CXXScopeSpec &SS,
                                            SourceLocation TemplateKWLoc,
                                            UnqualifiedId &Id,
@@ -551,13 +605,7 @@ ExprResult Sema::LookupInlineAsmIdentifier(CXXScopeSpec &SS,
     return ExprError();
   }
 
-  // Compute the type size (and array length if applicable?).
-  Info.Type = Info.Size = Context.getTypeSizeInChars(T).getQuantity();
-  if (T->isArrayType()) {
-    const ArrayType *ATy = Context.getAsArrayType(T);
-    Info.Type = Context.getTypeSizeInChars(ATy->getElementType()).getQuantity();
-    Info.Length = Info.Size / Info.Type;
-  }
+  fillInlineAsmTypeInfo(Context, T, Info);
 
   // We can work with the expression as long as it's not an r-value.
   if (!Result.get()->isRValue())
@@ -569,47 +617,103 @@ ExprResult Sema::LookupInlineAsmIdentifier(CXXScopeSpec &SS,
 bool Sema::LookupInlineAsmField(StringRef Base, StringRef Member,
                                 unsigned &Offset, SourceLocation AsmLoc) {
   Offset = 0;
+  SmallVector<StringRef, 2> Members;
+  Member.split(Members, ".");
+
   LookupResult BaseResult(*this, &Context.Idents.get(Base), SourceLocation(),
                           LookupOrdinaryName);
 
   if (!LookupName(BaseResult, getCurScope()))
     return true;
 
-  if (!BaseResult.isSingleResult())
-    return true;
+  LookupResult CurrBaseResult(BaseResult);
 
-  const RecordType *RT = nullptr;
-  NamedDecl *FoundDecl = BaseResult.getFoundDecl();
-  if (VarDecl *VD = dyn_cast<VarDecl>(FoundDecl))
-    RT = VD->getType()->getAs<RecordType>();
-  else if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(FoundDecl)) {
-    MarkAnyDeclReferenced(TD->getLocation(), TD, /*OdrUse=*/false);
-    RT = TD->getUnderlyingType()->getAs<RecordType>();
-  } else if (TypeDecl *TD = dyn_cast<TypeDecl>(FoundDecl))
-    RT = TD->getTypeForDecl()->getAs<RecordType>();
+  for (StringRef NextMember : Members) {
+
+    if (!CurrBaseResult.isSingleResult())
+      return true;
+
+    const RecordType *RT = nullptr;
+    NamedDecl *FoundDecl = CurrBaseResult.getFoundDecl();
+    if (VarDecl *VD = dyn_cast<VarDecl>(FoundDecl))
+      RT = VD->getType()->getAs<RecordType>();
+    else if (TypedefNameDecl *TD = dyn_cast<TypedefNameDecl>(FoundDecl)) {
+      MarkAnyDeclReferenced(TD->getLocation(), TD, /*OdrUse=*/false);
+      RT = TD->getUnderlyingType()->getAs<RecordType>();
+    } else if (TypeDecl *TD = dyn_cast<TypeDecl>(FoundDecl))
+      RT = TD->getTypeForDecl()->getAs<RecordType>();
+    else if (FieldDecl *TD = dyn_cast<FieldDecl>(FoundDecl))
+      RT = TD->getType()->getAs<RecordType>();
+    if (!RT)
+      return true;
+
+    if (RequireCompleteType(AsmLoc, QualType(RT, 0),
+                            diag::err_asm_incomplete_type))
+      return true;
+
+    LookupResult FieldResult(*this, &Context.Idents.get(NextMember),
+                             SourceLocation(), LookupMemberName);
+
+    if (!LookupQualifiedName(FieldResult, RT->getDecl()))
+      return true;
+
+    // FIXME: Handle IndirectFieldDecl?
+    FieldDecl *FD = dyn_cast<FieldDecl>(FieldResult.getFoundDecl());
+    if (!FD)
+      return true;
+
+    CurrBaseResult = FieldResult;
+
+    const ASTRecordLayout &RL = Context.getASTRecordLayout(RT->getDecl());
+    unsigned i = FD->getFieldIndex();
+    CharUnits Result = Context.toCharUnitsFromBits(RL.getFieldOffset(i));
+    Offset += (unsigned)Result.getQuantity();
+  }
+
+  return false;
+}
+
+ExprResult
+Sema::LookupInlineAsmVarDeclField(Expr *E, StringRef Member, unsigned &Offset,
+                                  llvm::InlineAsmIdentifierInfo &Info,
+                                  SourceLocation AsmLoc) {
+  Info.clear();
+
+  const RecordType *RT = E->getType()->getAs<RecordType>();
+  // FIXME: Diagnose this as field access into a scalar type.
   if (!RT)
-    return true;
+    return ExprResult();
 
-  if (RequireCompleteType(AsmLoc, QualType(RT, 0), 0))
-    return true;
-
-  LookupResult FieldResult(*this, &Context.Idents.get(Member), SourceLocation(),
+  LookupResult FieldResult(*this, &Context.Idents.get(Member), AsmLoc,
                            LookupMemberName);
 
   if (!LookupQualifiedName(FieldResult, RT->getDecl()))
-    return true;
+    return ExprResult();
 
-  // FIXME: Handle IndirectFieldDecl?
-  FieldDecl *FD = dyn_cast<FieldDecl>(FieldResult.getFoundDecl());
+  // Only normal and indirect field results will work.
+  ValueDecl *FD = dyn_cast<FieldDecl>(FieldResult.getFoundDecl());
   if (!FD)
-    return true;
+    FD = dyn_cast<IndirectFieldDecl>(FieldResult.getFoundDecl());
+  if (!FD)
+    return ExprResult();
 
-  const ASTRecordLayout &RL = Context.getASTRecordLayout(RT->getDecl());
-  unsigned i = FD->getFieldIndex();
-  CharUnits Result = Context.toCharUnitsFromBits(RL.getFieldOffset(i));
-  Offset = (unsigned)Result.getQuantity();
+  Offset = (unsigned)Context.toCharUnitsFromBits(Context.getFieldOffset(FD))
+               .getQuantity();
 
-  return false;
+  // Make an Expr to thread through OpDecl.
+  ExprResult Result = BuildMemberReferenceExpr(
+      E, E->getType(), AsmLoc, /*IsArrow=*/false, CXXScopeSpec(),
+      SourceLocation(), nullptr, FieldResult, nullptr, nullptr);
+  if (Result.isInvalid())
+    return Result;
+  Info.OpDecl = Result.get();
+
+  fillInlineAsmTypeInfo(Context, Result.get()->getType(), Info);
+
+  // Fields are "variables" as far as inline assembly is concerned.
+  Info.IsVarDecl = true;
+
+  return Result;
 }
 
 StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, SourceLocation LBraceLoc,
@@ -646,7 +750,15 @@ LabelDecl *Sema::GetOrCreateMSAsmLabel(StringRef ExternalLabelName,
     // Create an internal name for the label.  The name should not be a valid mangled
     // name, and should be unique.  We use a dot to make the name an invalid mangled
     // name.
-    OS << "__MSASMLABEL_." << MSAsmLabelNameCounter++ << "__" << ExternalLabelName;
+    OS << "__MSASMLABEL_." << MSAsmLabelNameCounter++ << "__";
+    for (auto it = ExternalLabelName.begin(); it != ExternalLabelName.end();
+         ++it) {
+      OS << *it;
+      if (*it == '$') {
+        // We escape '$' in asm strings by replacing it with "$$"
+        OS << '$';
+      }
+    }
     Label->setMSAsmLabel(OS.str());
   }
   if (AlwaysCreate) {
diff --git a/lib/Sema/SemaStmtAttr.cpp b/lib/Sema/SemaStmtAttr.cpp
index 5b71c11b5297..984bd078fa03 100644
--- a/lib/Sema/SemaStmtAttr.cpp
+++ b/lib/Sema/SemaStmtAttr.cpp
@@ -65,19 +65,32 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
     return nullptr;
   }
 
-  LoopHintAttr::OptionType Option;
   LoopHintAttr::Spelling Spelling;
-  if (PragmaUnroll) {
-    Option = ValueExpr ? LoopHintAttr::UnrollCount : LoopHintAttr::Unroll;
-    Spelling = LoopHintAttr::Pragma_unroll;
-  } else if (PragmaNoUnroll) {
-    Option = LoopHintAttr::Unroll;
+  LoopHintAttr::OptionType Option;
+  LoopHintAttr::LoopHintState State;
+  if (PragmaNoUnroll) {
+    // #pragma nounroll
     Spelling = LoopHintAttr::Pragma_nounroll;
+    Option = LoopHintAttr::Unroll;
+    State = LoopHintAttr::Disable;
+  } else if (PragmaUnroll) {
+    Spelling = LoopHintAttr::Pragma_unroll;
+    if (ValueExpr) {
+      // #pragma unroll N
+      Option = LoopHintAttr::UnrollCount;
+      State = LoopHintAttr::Numeric;
+    } else {
+      // #pragma unroll
+      Option = LoopHintAttr::Unroll;
+      State = LoopHintAttr::Enable;
+    }
   } else {
+    // #pragma clang loop ...
+    Spelling = LoopHintAttr::Pragma_clang_loop;
     assert(OptionLoc && OptionLoc->Ident &&
            "Attribute must have valid option info.");
-    IdentifierInfo *OptionInfo = OptionLoc->Ident;
-    Option = llvm::StringSwitch<LoopHintAttr::OptionType>(OptionInfo->getName())
+    Option = llvm::StringSwitch<LoopHintAttr::OptionType>(
+                 OptionLoc->Ident->getName())
                  .Case("vectorize", LoopHintAttr::Vectorize)
                  .Case("vectorize_width", LoopHintAttr::VectorizeWidth)
                  .Case("interleave", LoopHintAttr::Interleave)
@@ -85,31 +98,29 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const AttributeList &A,
                  .Case("unroll", LoopHintAttr::Unroll)
                  .Case("unroll_count", LoopHintAttr::UnrollCount)
                  .Default(LoopHintAttr::Vectorize);
-    Spelling = LoopHintAttr::Pragma_clang_loop;
-  }
-
-  LoopHintAttr::LoopHintState State = LoopHintAttr::Default;
-  if (PragmaNoUnroll) {
-    State = LoopHintAttr::Disable;
-  } else if (Option == LoopHintAttr::VectorizeWidth ||
-             Option == LoopHintAttr::InterleaveCount ||
-             Option == LoopHintAttr::UnrollCount) {
-    assert(ValueExpr && "Attribute must have a valid value expression.");
-    if (S.CheckLoopHintExpr(ValueExpr, St->getLocStart()))
-      return nullptr;
-  } else if (Option == LoopHintAttr::Vectorize ||
-             Option == LoopHintAttr::Interleave ||
-             Option == LoopHintAttr::Unroll) {
-    // Default state is assumed if StateLoc is not specified, such as with
-    // '#pragma unroll'.
-    if (StateLoc && StateLoc->Ident) {
+    if (Option == LoopHintAttr::VectorizeWidth ||
+        Option == LoopHintAttr::InterleaveCount ||
+        Option == LoopHintAttr::UnrollCount) {
+      assert(ValueExpr && "Attribute must have a valid value expression.");
+      if (S.CheckLoopHintExpr(ValueExpr, St->getLocStart()))
+        return nullptr;
+      State = LoopHintAttr::Numeric;
+    } else if (Option == LoopHintAttr::Vectorize ||
+               Option == LoopHintAttr::Interleave ||
+               Option == LoopHintAttr::Unroll) {
+      assert(StateLoc && StateLoc->Ident && "Loop hint must have an argument");
       if (StateLoc->Ident->isStr("disable"))
         State = LoopHintAttr::Disable;
       else if (StateLoc->Ident->isStr("assume_safety"))
         State = LoopHintAttr::AssumeSafety;
-      else
+      else if (StateLoc->Ident->isStr("full"))
+        State = LoopHintAttr::Full;
+      else if (StateLoc->Ident->isStr("enable"))
         State = LoopHintAttr::Enable;
-    }
+      else
+        llvm_unreachable("bad loop hint argument");
+    } else
+      llvm_unreachable("bad loop hint");
   }
 
   return LoopHintAttr::CreateImplicit(S.Context, Spelling, Option, State,
@@ -139,9 +150,8 @@ CheckForIncompatibleAttributes(Sema &S,
     if (!LH)
       continue;
 
-    int Option = LH->getOption();
-    int Category;
-    enum { Vectorize, Interleave, Unroll };
+    LoopHintAttr::OptionType Option = LH->getOption();
+    enum { Vectorize, Interleave, Unroll } Category;
     switch (Option) {
     case LoopHintAttr::Vectorize:
     case LoopHintAttr::VectorizeWidth:
@@ -183,7 +193,8 @@ CheckForIncompatibleAttributes(Sema &S,
          CategoryState.StateAttr->getState() == LoopHintAttr::Disable)) {
       // Disable hints are not compatible with numeric hints of the same
       // category.  As a special case, numeric unroll hints are also not
-      // compatible with "enable" form of the unroll pragma, unroll(full).
+      // compatible with enable or full form of the unroll pragma because these
+      // directives indicate full unrolling.
       S.Diag(OptionLoc, diag::err_pragma_loop_compatibility)
           << /*Duplicate=*/false
           << CategoryState.StateAttr->getDiagnosticName(Policy)
diff --git a/lib/Sema/SemaTemplate.cpp b/lib/Sema/SemaTemplate.cpp
index 035c37cfe6e9..6cc85883345d 100644
--- a/lib/Sema/SemaTemplate.cpp
+++ b/lib/Sema/SemaTemplate.cpp
@@ -18,6 +18,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/TypeVisitor.h"
+#include "clang/Basic/Builtins.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
@@ -208,7 +209,8 @@ TemplateNameKind Sema::isTemplateName(Scope *S,
       R.suppressDiagnostics();
     } else {
       assert(isa<ClassTemplateDecl>(TD) || isa<TemplateTemplateParmDecl>(TD) ||
-             isa<TypeAliasTemplateDecl>(TD) || isa<VarTemplateDecl>(TD));
+             isa<TypeAliasTemplateDecl>(TD) || isa<VarTemplateDecl>(TD) ||
+             isa<BuiltinTemplateDecl>(TD));
       TemplateKind =
           isa<VarTemplateDecl>(TD) ? TNK_Var_template : TNK_Type_template;
     }
@@ -327,8 +329,8 @@ void Sema::LookupTemplateName(LookupResult &Found,
             Found.getLookupNameInfo(), Found.getLookupKind(), S, &SS,
             std::move(FilterCCC), CTK_ErrorRecovery, LookupCtx)) {
       Found.setLookupName(Corrected.getCorrection());
-      if (Corrected.getCorrectionDecl())
-        Found.addDecl(Corrected.getCorrectionDecl());
+      if (auto *ND = Corrected.getFoundDecl())
+        Found.addDecl(ND);
       FilterAcceptableTemplateNames(Found);
       if (!Found.empty()) {
         if (LookupCtx) {
@@ -812,14 +814,15 @@ Sema::ActOnTemplateParameterList(unsigned Depth,
                                  SourceLocation ExportLoc,
                                  SourceLocation TemplateLoc,
                                  SourceLocation LAngleLoc,
-                                 Decl **Params, unsigned NumParams,
+                                 ArrayRef<Decl *> Params,
                                  SourceLocation RAngleLoc) {
   if (ExportLoc.isValid())
     Diag(ExportLoc, diag::warn_template_export_unsupported);
 
-  return TemplateParameterList::Create(Context, TemplateLoc, LAngleLoc,
-                                       (NamedDecl**)Params, NumParams,
-                                       RAngleLoc);
+  return TemplateParameterList::Create(
+      Context, TemplateLoc, LAngleLoc,
+      llvm::makeArrayRef((NamedDecl *const *)Params.data(), Params.size()),
+      RAngleLoc);
 }
 
 static void SetNestedNameSpecifier(TagDecl *T, const CXXScopeSpec &SS) {
@@ -1089,9 +1092,9 @@ Sema::CheckClassTemplate(Scope *S, unsigned TagSpec, TagUseKind TUK,
                           /*DelayTypeCreation=*/true);
   SetNestedNameSpecifier(NewClass, SS);
   if (NumOuterTemplateParamLists > 0)
-    NewClass->setTemplateParameterListsInfo(Context,
-                                            NumOuterTemplateParamLists,
-                                            OuterTemplateParamLists);
+    NewClass->setTemplateParameterListsInfo(
+        Context, llvm::makeArrayRef(OuterTemplateParamLists,
+                                    NumOuterTemplateParamLists));
 
   // Add alignment attributes if necessary; these attributes are checked when
   // the ASTContext lays out the structure.
@@ -1936,7 +1939,7 @@ TemplateParameterList *Sema::MatchTemplateParametersToScopeSpecifier(
 
       // Fabricate an empty template parameter list for the invented header.
       return TemplateParameterList::Create(Context, SourceLocation(),
-                                           SourceLocation(), nullptr, 0,
+                                           SourceLocation(), None,
                                            SourceLocation());
     }
 
@@ -2017,6 +2020,58 @@ void Sema::NoteAllFoundTemplates(TemplateName Name) {
   }
 }
 
+static QualType
+checkBuiltinTemplateIdType(Sema &SemaRef, BuiltinTemplateDecl *BTD,
+                           const SmallVectorImpl<TemplateArgument> &Converted,
+                           SourceLocation TemplateLoc,
+                           TemplateArgumentListInfo &TemplateArgs) {
+  ASTContext &Context = SemaRef.getASTContext();
+  switch (BTD->getBuiltinTemplateKind()) {
+  case BTK__make_integer_seq:
+    // Specializations of __make_integer_seq<S, T, N> are treated like
+    // S<T, 0, ..., N-1>.
+
+    // C++14 [inteseq.intseq]p1:
+    //   T shall be an integer type.
+    if (!Converted[1].getAsType()->isIntegralType(Context)) {
+      SemaRef.Diag(TemplateArgs[1].getLocation(),
+                   diag::err_integer_sequence_integral_element_type);
+      return QualType();
+    }
+
+    // C++14 [inteseq.make]p1:
+    //   If N is negative the program is ill-formed.
+    TemplateArgument NumArgsArg = Converted[2];
+    llvm::APSInt NumArgs = NumArgsArg.getAsIntegral();
+    if (NumArgs < 0) {
+      SemaRef.Diag(TemplateArgs[2].getLocation(),
+                   diag::err_integer_sequence_negative_length);
+      return QualType();
+    }
+
+    QualType ArgTy = NumArgsArg.getIntegralType();
+    TemplateArgumentListInfo SyntheticTemplateArgs;
+    // The type argument gets reused as the first template argument in the
+    // synthetic template argument list.
+    SyntheticTemplateArgs.addArgument(TemplateArgs[1]);
+    // Expand N into 0 ... N-1.
+    for (llvm::APSInt I(NumArgs.getBitWidth(), NumArgs.isUnsigned());
+         I < NumArgs; ++I) {
+      TemplateArgument TA(Context, I, ArgTy);
+      Expr *E = SemaRef.BuildExpressionFromIntegralTemplateArgument(
+                           TA, TemplateArgs[2].getLocation())
+                    .getAs<Expr>();
+      SyntheticTemplateArgs.addArgument(
+          TemplateArgumentLoc(TemplateArgument(E), E));
+    }
+    // The first template argument will be reused as the template decl that
+    // our synthetic template arguments will be applied to.
+    return SemaRef.CheckTemplateIdType(Converted[0].getAsTemplate(),
+                                       TemplateLoc, SyntheticTemplateArgs);
+  }
+  llvm_unreachable("unexpected BuiltinTemplateDecl!");
+}
+
 QualType Sema::CheckTemplateIdType(TemplateName Name,
                                    SourceLocation TemplateLoc,
                                    TemplateArgumentListInfo &TemplateArgs) {
@@ -2171,6 +2226,9 @@ QualType Sema::CheckTemplateIdType(TemplateName Name,
     CanonType = Context.getTypeDeclType(Decl);
     assert(isa<RecordType>(CanonType) &&
            "type of non-dependent specialization is not a RecordType");
+  } else if (auto *BTD = dyn_cast<BuiltinTemplateDecl>(Template)) {
+    CanonType = checkBuiltinTemplateIdType(*this, BTD, Converted, TemplateLoc,
+                                           TemplateArgs);
   }
 
   // Build the fully-sugared type for this class template
@@ -2469,25 +2527,6 @@ DeclResult Sema::ActOnVarTemplateSpecialization(
                                 false, Converted))
     return true;
 
-  // Check that the type of this variable template specialization
-  // matches the expected type.
-  TypeSourceInfo *ExpectedDI;
-  {
-    // Do substitution on the type of the declaration
-    TemplateArgumentList TemplateArgList(TemplateArgumentList::OnStack,
-                                         Converted.data(), Converted.size());
-    InstantiatingTemplate Inst(*this, TemplateKWLoc, VarTemplate);
-    if (Inst.isInvalid())
-      return true;
-    VarDecl *Templated = VarTemplate->getTemplatedDecl();
-    ExpectedDI =
-        SubstType(Templated->getTypeSourceInfo(),
-                  MultiLevelTemplateArgumentList(TemplateArgList),
-                  Templated->getTypeSpecStartLoc(), Templated->getDeclName());
-  }
-  if (!ExpectedDI)
-    return true;
-
   // Find the variable template (partial) specialization declaration that
   // corresponds to these arguments.
   if (IsPartialSpecialization) {
@@ -2710,7 +2749,8 @@ Sema::CheckVarTemplateId(VarTemplateDecl *Template, SourceLocation TemplateLoc,
   typedef PartialSpecMatchResult MatchResult;
   SmallVector<MatchResult, 4> Matched;
   SourceLocation PointOfInstantiation = TemplateNameLoc;
-  TemplateSpecCandidateSet FailedCandidates(PointOfInstantiation);
+  TemplateSpecCandidateSet FailedCandidates(PointOfInstantiation,
+                                            /*ForTakingAddress=*/false);
 
   // 1. Attempt to find the closest partial specialization that this
   // specializes, if any.
@@ -3242,7 +3282,8 @@ SubstDefaultTemplateArgument(Sema &SemaRef,
     TemplateArgLists.addOuterTemplateArguments(None);
 
   Sema::ContextRAII SavedContext(SemaRef, Template->getDeclContext());
-  EnterExpressionEvaluationContext Unevaluated(SemaRef, Sema::Unevaluated);
+  EnterExpressionEvaluationContext ConstantEvaluated(SemaRef,
+                                                     Sema::ConstantEvaluated);
   return SemaRef.SubstExpr(Param->getDefaultArgument(), TemplateArgLists);
 }
 
@@ -3733,9 +3774,7 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
         // We're done with this parameter pack. Pack up its arguments and add
         // them to the list.
         Converted.push_back(
-          TemplateArgument::CreatePackCopy(Context,
-                                           ArgumentPack.data(),
-                                           ArgumentPack.size()));
+            TemplateArgument::CreatePackCopy(Context, ArgumentPack));
         ArgumentPack.clear();
 
         // This argument is assigned to the next parameter.
@@ -3816,10 +3855,9 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
     // If we're checking a partial template argument list, we're done.
     if (PartialTemplateArgs) {
       if ((*Param)->isTemplateParameterPack() && !ArgumentPack.empty())
-        Converted.push_back(TemplateArgument::CreatePackCopy(Context,
-                                                         ArgumentPack.data(),
-                                                         ArgumentPack.size()));
-        
+        Converted.push_back(
+            TemplateArgument::CreatePackCopy(Context, ArgumentPack));
+
       return false;
     }
 
@@ -3835,9 +3873,8 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
       if (Param + 1 != ParamEnd)
         return true;
 
-      Converted.push_back(TemplateArgument::CreatePackCopy(Context,
-                                                       ArgumentPack.data(),
-                                                       ArgumentPack.size()));
+      Converted.push_back(
+          TemplateArgument::CreatePackCopy(Context, ArgumentPack));
       ArgumentPack.clear();
 
       ++Param;
@@ -3946,7 +3983,7 @@ bool Sema::CheckTemplateArgumentList(TemplateDecl *Template,
 
   // No problems found with the new argument list, propagate changes back
   // to caller.
-  TemplateArgs = NewArgs;
+  TemplateArgs = std::move(NewArgs);
 
   return false;
 }
@@ -4245,7 +4282,11 @@ isNullPointerValueTemplateArgument(Sema &S, NonTypeTemplateParmDecl *Param,
                                    QualType ParamType, Expr *Arg) {
   if (Arg->isValueDependent() || Arg->isTypeDependent())
     return NPV_NotNullPointer;
-  
+
+  if (!S.isCompleteType(Arg->getExprLoc(), ParamType))
+    llvm_unreachable(
+        "Incomplete parameter type in isNullPointerValueTemplateArgument!");
+
   if (!S.getLangOpts().CPlusPlus11)
     return NPV_NotNullPointer;
   
@@ -4693,8 +4734,6 @@ static bool CheckTemplateArgumentPointerToMember(Sema &S,
     S.Diag(Arg->getExprLoc(), diag::warn_cxx98_compat_template_arg_null);
     Converted = TemplateArgument(S.Context.getCanonicalType(ParamType),
                                  /*isNullPtr*/true);
-    if (S.Context.getTargetInfo().getCXXABI().isMicrosoft())
-      S.RequireCompleteType(Arg->getExprLoc(), ParamType, 0);
     return false;
   case NPV_NotNullPointer:
     break;
@@ -6311,9 +6350,8 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
                                                        PrevPartial);
     SetNestedNameSpecifier(Partial, SS);
     if (TemplateParameterLists.size() > 1 && SS.isSet()) {
-      Partial->setTemplateParameterListsInfo(Context,
-                                             TemplateParameterLists.size() - 1,
-                                             TemplateParameterLists.data());
+      Partial->setTemplateParameterListsInfo(
+          Context, TemplateParameterLists.drop_back(1));
     }
 
     if (!PrevPartial)
@@ -6367,14 +6405,23 @@ Sema::ActOnClassTemplateSpecialization(Scope *S, unsigned TagSpec,
     SetNestedNameSpecifier(Specialization, SS);
     if (TemplateParameterLists.size() > 0) {
       Specialization->setTemplateParameterListsInfo(Context,
-                                              TemplateParameterLists.size(),
-                                              TemplateParameterLists.data());
+                                                    TemplateParameterLists);
     }
 
     if (!PrevDecl)
       ClassTemplate->AddSpecialization(Specialization, InsertPos);
 
-    CanonType = Context.getTypeDeclType(Specialization);
+    if (CurContext->isDependentContext()) {
+      // -fms-extensions permits specialization of nested classes without
+      // fully specializing the outer class(es).
+      assert(getLangOpts().MicrosoftExt &&
+             "Only possible with -fms-extensions!");
+      TemplateName CanonTemplate = Context.getCanonicalTemplateName(Name);
+      CanonType = Context.getTemplateSpecializationType(
+          CanonTemplate, Converted.data(), Converted.size());
+    } else {
+      CanonType = Context.getTypeDeclType(Specialization);
+    }
   }
 
   // C++ [temp.expl.spec]p6:
@@ -6497,24 +6544,6 @@ Decl *Sema::ActOnTemplateDeclarator(Scope *S,
   return NewDecl;
 }
 
-Decl *Sema::ActOnStartOfFunctionTemplateDef(Scope *FnBodyScope,
-                               MultiTemplateParamsArg TemplateParameterLists,
-                                            Declarator &D) {
-  assert(getCurFunctionDecl() == nullptr && "Function parsing confused");
-  DeclaratorChunk::FunctionTypeInfo &FTI = D.getFunctionTypeInfo();
-
-  if (FTI.hasPrototype) {
-    // FIXME: Diagnose arguments without names in C.
-  }
-
-  Scope *ParentScope = FnBodyScope->getParent();
-
-  D.setFunctionDefinitionKind(FDK_Definition);
-  Decl *DP = HandleDeclarator(ParentScope, D,
-                              TemplateParameterLists);
-  return ActOnStartOfFunctionDef(FnBodyScope, DP);
-}
-
 /// \brief Strips various properties off an implicit instantiation
 /// that has just been explicitly specialized.
 static void StripImplicitInstantiation(NamedDecl *D) {
@@ -6795,7 +6824,11 @@ bool Sema::CheckFunctionTemplateSpecialization(
   // The set of function template specializations that could match this
   // explicit function template specialization.
   UnresolvedSet<8> Candidates;
-  TemplateSpecCandidateSet FailedCandidates(FD->getLocation());
+  TemplateSpecCandidateSet FailedCandidates(FD->getLocation(),
+                                            /*ForTakingAddress=*/false);
+
+  llvm::SmallDenseMap<FunctionDecl *, TemplateArgumentListInfo, 8>
+      ConvertedTemplateArgs;
 
   DeclContext *FDLookupContext = FD->getDeclContext()->getRedeclContext();
   for (LookupResult::iterator I = Previous.begin(), E = Previous.end();
@@ -6826,6 +6859,10 @@ bool Sema::CheckFunctionTemplateSpecialization(
         }
       }
 
+      TemplateArgumentListInfo Args;
+      if (ExplicitTemplateArgs)
+        Args = *ExplicitTemplateArgs;
+
       // C++ [temp.expl.spec]p11:
       //   A trailing template-argument can be left unspecified in the
       //   template-id naming an explicit function template specialization
@@ -6837,7 +6874,7 @@ bool Sema::CheckFunctionTemplateSpecialization(
       FunctionDecl *Specialization = nullptr;
       if (TemplateDeductionResult TDK = DeduceTemplateArguments(
               cast<FunctionTemplateDecl>(FunTmpl->getFirstDecl()),
-              ExplicitTemplateArgs, FT, Specialization, Info)) {
+              ExplicitTemplateArgs ? &Args : nullptr, FT, Specialization, Info)) {
         // Template argument deduction failed; record why it failed, so
         // that we can provide nifty diagnostics.
         FailedCandidates.addCandidate()
@@ -6848,6 +6885,8 @@ bool Sema::CheckFunctionTemplateSpecialization(
       }
 
       // Record this candidate.
+      if (ExplicitTemplateArgs)
+        ConvertedTemplateArgs[Specialization] = std::move(Args);
       Candidates.addDecl(Specialization, I.getAccess());
     }
   }
@@ -6926,10 +6965,10 @@ bool Sema::CheckFunctionTemplateSpecialization(
   // Take copies of (semantic and syntactic) template argument lists.
   const TemplateArgumentList* TemplArgs = new (Context)
     TemplateArgumentList(Specialization->getTemplateSpecializationArgs());
-  FD->setFunctionTemplateSpecialization(Specialization->getPrimaryTemplate(),
-                                        TemplArgs, /*InsertPos=*/nullptr,
-                                    SpecInfo->getTemplateSpecializationKind(),
-                                        ExplicitTemplateArgs);
+  FD->setFunctionTemplateSpecialization(
+      Specialization->getPrimaryTemplate(), TemplArgs, /*InsertPos=*/nullptr,
+      SpecInfo->getTemplateSpecializationKind(),
+      ExplicitTemplateArgs ? &ConvertedTemplateArgs[Specialization] : nullptr);
 
   // The "previous declaration" for this function template specialization is
   // the prior function template specialization.
@@ -7429,11 +7468,16 @@ Sema::ActOnExplicitInstantiation(Scope *S,
       }
     }
 
+    // Set the template specialization kind. Make sure it is set before
+    // instantiating the members which will trigger ASTConsumer callbacks.
+    Specialization->setTemplateSpecializationKind(TSK);
     InstantiateClassTemplateSpecializationMembers(TemplateNameLoc, Def, TSK);
+  } else {
+
+    // Set the template specialization kind.
+    Specialization->setTemplateSpecializationKind(TSK);
   }
 
-  // Set the template specialization kind.
-  Specialization->setTemplateSpecializationKind(TSK);
   return Specialization;
 }
 
diff --git a/lib/Sema/SemaTemplateDeduction.cpp b/lib/Sema/SemaTemplateDeduction.cpp
index ae8157e70a60..de04c8a7f042 100644
--- a/lib/Sema/SemaTemplateDeduction.cpp
+++ b/lib/Sema/SemaTemplateDeduction.cpp
@@ -679,7 +679,7 @@ public:
             new (S.Context) TemplateArgument[Pack.New.size()];
         std::copy(Pack.New.begin(), Pack.New.end(), ArgumentPack);
         NewPack = DeducedTemplateArgument(
-            TemplateArgument(ArgumentPack, Pack.New.size()),
+            TemplateArgument(llvm::makeArrayRef(ArgumentPack, Pack.New.size())),
             Pack.New[0].wasDeducedFromArrayBound());
       }
 
@@ -1440,7 +1440,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
           // We cannot inspect base classes as part of deduction when the type
           // is incomplete, so either instantiate any templates necessary to
           // complete the type, or skip over it if it cannot be completed.
-          if (S.RequireCompleteType(Info.getLocation(), Arg, 0))
+          if (!S.isCompleteType(Info.getLocation(), Arg))
             return Result;
 
           // Use data recursion to crawl through the list of base classes.
@@ -1517,10 +1517,19 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
       if (!MemPtrArg)
         return Sema::TDK_NonDeducedMismatch;
 
+      QualType ParamPointeeType = MemPtrParam->getPointeeType();
+      if (ParamPointeeType->isFunctionType())
+        S.adjustMemberFunctionCC(ParamPointeeType, /*IsStatic=*/true,
+                                 /*IsCtorOrDtor=*/false, Info.getLocation());
+      QualType ArgPointeeType = MemPtrArg->getPointeeType();
+      if (ArgPointeeType->isFunctionType())
+        S.adjustMemberFunctionCC(ArgPointeeType, /*IsStatic=*/true,
+                                 /*IsCtorOrDtor=*/false, Info.getLocation());
+
       if (Sema::TemplateDeductionResult Result
             = DeduceTemplateArgumentsByTypeMatch(S, TemplateParams,
-                                                 MemPtrParam->getPointeeType(),
-                                                 MemPtrArg->getPointeeType(),
+                                                 ParamPointeeType,
+                                                 ArgPointeeType,
                                                  Info, Deduced,
                                                  TDF & TDF_IgnoreQualifiers))
         return Result;
@@ -2075,9 +2084,8 @@ ConvertDeducedTemplateArgument(Sema &S, NamedDecl *Param,
     }
 
     // Create the resulting argument pack.
-    Output.push_back(TemplateArgument::CreatePackCopy(S.Context,
-                                                      PackedArgsBuilder.data(),
-                                                     PackedArgsBuilder.size()));
+    Output.push_back(
+        TemplateArgument::CreatePackCopy(S.Context, PackedArgsBuilder));
     return false;
   }
 
@@ -2730,7 +2738,7 @@ CheckOriginalCallArgDeduction(Sema &S, Sema::OriginalCallArg OriginalArg,
     return false;
   
   if (A->isRecordType() && isSimpleTemplateIdType(OriginalParamType) &&
-      S.IsDerivedFrom(A, DeducedA))
+      S.IsDerivedFrom(SourceLocation(), A, DeducedA))
     return false;
   
   return true;
@@ -2850,7 +2858,8 @@ Sema::FinishTemplateArgumentDeduction(FunctionTemplateDecl *FunctionTemplate,
           CurrentInstantiationScope->getPartiallySubstitutedPack(&ExplicitArgs,
                                                              &NumExplicitArgs)
             == Param) {
-        Builder.push_back(TemplateArgument(ExplicitArgs, NumExplicitArgs));
+        Builder.push_back(TemplateArgument(
+            llvm::makeArrayRef(ExplicitArgs, NumExplicitArgs)));
 
         // Forget the partially-substituted pack; it's substitution is now
         // complete.
@@ -3028,7 +3037,7 @@ ResolveOverloadForDeduction(Sema &S, TemplateParameterList *TemplateParams,
   // Gather the explicit template arguments, if any.
   TemplateArgumentListInfo ExplicitTemplateArgs;
   if (Ovl->hasExplicitTemplateArgs())
-    Ovl->getExplicitTemplateArgs().copyInto(ExplicitTemplateArgs);
+    Ovl->copyTemplateArgumentsInto(ExplicitTemplateArgs);
   QualType Match;
   for (UnresolvedSetIterator I = Ovl->decls_begin(),
          E = Ovl->decls_end(); I != E; ++I) {
@@ -3123,8 +3132,10 @@ static bool AdjustFunctionParmAndArgTypesForDeduction(Sema &S,
 
   if (ParamRefType) {
     // If the argument has incomplete array type, try to complete its type.
-    if (ArgType->isIncompleteArrayType() && !S.RequireCompleteExprType(Arg, 0))
+    if (ArgType->isIncompleteArrayType()) {
+      S.completeExprArrayBound(Arg);
       ArgType = Arg->getType();
+    }
 
     // C++0x [temp.deduct.call]p3:
     //   If P is an rvalue reference to a cv-unqualified template
@@ -3203,24 +3214,63 @@ DeduceFromInitializerList(Sema &S, TemplateParameterList *TemplateParams,
                           TemplateDeductionInfo &Info,
                           SmallVectorImpl<DeducedTemplateArgument> &Deduced,
                           unsigned TDF, Sema::TemplateDeductionResult &Result) {
-  // If the argument is an initializer list then the parameter is an undeduced
-  // context, unless the parameter type is (reference to cv)
-  // std::initializer_list<P'>, in which case deduction is done for each element
-  // of the initializer list as-if it were an argument in a function call, and
-  // the result is the deduced type if it's the same for all elements.
-  QualType X;
-  if (!S.isStdInitializerList(AdjustedParamType, &X))
+
+  // [temp.deduct.call] p1 (post CWG-1591)
+  // If removing references and cv-qualifiers from P gives
+  // std::initializer_list<P0> or P0[N] for some P0 and N and the argument is a
+  // non-empty initializer list (8.5.4), then deduction is performed instead for
+  // each element of the initializer list, taking P0 as a function template
+  // parameter type and the initializer element as its argument, and in the
+  // P0[N] case, if N is a non-type template parameter, N is deduced from the
+  // length of the initializer list. Otherwise, an initializer list argument
+  // causes the parameter to be considered a non-deduced context
+
+  const bool IsConstSizedArray = AdjustedParamType->isConstantArrayType();
+
+  const bool IsDependentSizedArray =
+      !IsConstSizedArray && AdjustedParamType->isDependentSizedArrayType();
+
+  QualType ElTy;  // The element type of the std::initializer_list or the array.
+
+  const bool IsSTDList = !IsConstSizedArray && !IsDependentSizedArray &&
+                         S.isStdInitializerList(AdjustedParamType, &ElTy);
+
+  if (!IsConstSizedArray && !IsDependentSizedArray && !IsSTDList)
     return false;
 
   Result = Sema::TDK_Success;
-
-  // Recurse down into the init list.
-  for (unsigned i = 0, e = ILE->getNumInits(); i < e; ++i) {
-    if ((Result = DeduceTemplateArgumentByListElement(
-             S, TemplateParams, X, ILE->getInit(i), Info, Deduced, TDF)))
-      return true;
+  // If we are not deducing against the 'T' in a std::initializer_list<T> then
+  // deduce against the 'T' in T[N].
+  if (ElTy.isNull()) {
+    assert(!IsSTDList);
+    ElTy = S.Context.getAsArrayType(AdjustedParamType)->getElementType();
   }
+  // Deduction only needs to be done for dependent types.
+  if (ElTy->isDependentType()) {
+    for (Expr *E : ILE->inits()) {
+      if ((Result = DeduceTemplateArgumentByListElement(S, TemplateParams, ElTy,
+                                                        E, Info, Deduced, TDF)))
+        return true;
+    }
+  }
+  if (IsDependentSizedArray) {
+    const DependentSizedArrayType *ArrTy =
+        S.Context.getAsDependentSizedArrayType(AdjustedParamType);
+    // Determine the array bound is something we can deduce.
+    if (NonTypeTemplateParmDecl *NTTP =
+            getDeducedParameterFromExpr(ArrTy->getSizeExpr())) {
+      // We can perform template argument deduction for the given non-type
+      // template parameter.
+      assert(NTTP->getDepth() == 0 &&
+             "Cannot deduce non-type template argument at depth > 0");
+      llvm::APInt Size(S.Context.getIntWidth(NTTP->getType()),
+                       ILE->getNumInits());
 
+      Result = DeduceNonTypeTemplateArgument(
+          S, NTTP, llvm::APSInt(Size), NTTP->getType(),
+          /*ArrayBound=*/true, Info, Deduced);
+    }
+  }
   return true;
 }
 
@@ -3908,7 +3958,7 @@ namespace {
           !Replacement.isNull() && Replacement->isDependentType();
         QualType Result =
           SemaRef.Context.getAutoType(Dependent ? QualType() : Replacement,
-                                      TL.getTypePtr()->isDecltypeAuto(),
+                                      TL.getTypePtr()->getKeyword(),
                                       Dependent);
         AutoTypeLoc NewTL = TLB.push<AutoTypeLoc>(Result);
         NewTL.setNameLoc(TL.getNameLoc());
@@ -3976,6 +4026,11 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result) {
       if (Result.isNull())
         return DAR_FailedAlreadyDiagnosed;
       return DAR_Succeeded;
+    } else if (!getLangOpts().CPlusPlus) {
+      if (isa<InitListExpr>(Init)) {
+        Diag(Init->getLocStart(), diag::err_auto_init_list_from_c);
+        return DAR_FailedAlreadyDiagnosed;
+      }
     }
   }
 
@@ -3989,8 +4044,8 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result) {
                                  nullptr, false, false);
   QualType TemplArg = QualType(TemplParam->getTypeForDecl(), 0);
   NamedDecl *TemplParamPtr = TemplParam;
-  FixedSizeTemplateParameterList<1> TemplateParams(Loc, Loc, &TemplParamPtr,
-                                                   Loc);
+  FixedSizeTemplateParameterListStorage<1> TemplateParamsSt(
+      Loc, Loc, TemplParamPtr, Loc);
 
   QualType FuncParam = SubstituteAutoTransform(*this, TemplArg).Apply(Type);
   assert(!FuncParam.isNull() &&
@@ -4007,20 +4062,24 @@ Sema::DeduceAutoType(TypeLoc Type, Expr *&Init, QualType &Result) {
   InitListExpr *InitList = dyn_cast<InitListExpr>(Init);
   if (InitList) {
     for (unsigned i = 0, e = InitList->getNumInits(); i < e; ++i) {
-      if (DeduceTemplateArgumentByListElement(*this, &TemplateParams,
-                                              TemplArg,
-                                              InitList->getInit(i),
+      if (DeduceTemplateArgumentByListElement(*this, TemplateParamsSt.get(),
+                                              TemplArg, InitList->getInit(i),
                                               Info, Deduced, TDF))
         return DAR_Failed;
     }
   } else {
-    if (AdjustFunctionParmAndArgTypesForDeduction(*this, &TemplateParams,
-                                                  FuncParam, InitType, Init,
-                                                  TDF))
+    if (!getLangOpts().CPlusPlus && Init->refersToBitField()) {
+      Diag(Loc, diag::err_auto_bitfield);
+      return DAR_FailedAlreadyDiagnosed;
+    }
+
+    if (AdjustFunctionParmAndArgTypesForDeduction(
+            *this, TemplateParamsSt.get(), FuncParam, InitType, Init, TDF))
       return DAR_Failed;
 
-    if (DeduceTemplateArgumentsByTypeMatch(*this, &TemplateParams, FuncParam,
-                                           InitType, Info, Deduced, TDF))
+    if (DeduceTemplateArgumentsByTypeMatch(*this, TemplateParamsSt.get(),
+                                           FuncParam, InitType, Info, Deduced,
+                                           TDF))
       return DAR_Failed;
   }
 
diff --git a/lib/Sema/SemaTemplateInstantiate.cpp b/lib/Sema/SemaTemplateInstantiate.cpp
index c1961e516ddf..fb7fc109d2e9 100644
--- a/lib/Sema/SemaTemplateInstantiate.cpp
+++ b/lib/Sema/SemaTemplateInstantiate.cpp
@@ -817,14 +817,6 @@ namespace {
     QualType TransformSubstTemplateTypeParmPackType(TypeLocBuilder &TLB,
                                            SubstTemplateTypeParmPackTypeLoc TL);
 
-    ExprResult TransformCallExpr(CallExpr *CE) {
-      getSema().CallsUndergoingInstantiation.push_back(CE);
-      ExprResult Result =
-          TreeTransform<TemplateInstantiator>::TransformCallExpr(CE);
-      getSema().CallsUndergoingInstantiation.pop_back();
-      return Result;
-    }
-
     ExprResult TransformLambdaExpr(LambdaExpr *E) {
       LocalInstantiationScope Scope(SemaRef, /*CombineWithOuterScope=*/true);
       return TreeTransform<TemplateInstantiator>::TransformLambdaExpr(E);
@@ -1231,7 +1223,7 @@ TemplateInstantiator::TransformFunctionParmPackExpr(FunctionParmPackExpr *E) {
 
   // Transform each of the parameter expansions into the corresponding
   // parameters in the instantiation of the function decl.
-  SmallVector<Decl *, 8> Parms;
+  SmallVector<ParmVarDecl *, 8> Parms;
   Parms.reserve(E->getNumExpansions());
   for (FunctionParmPackExpr::iterator I = E->begin(), End = E->end();
        I != End; ++I) {
@@ -1682,15 +1674,17 @@ ParmVarDecl *Sema::SubstParmVarDecl(ParmVarDecl *OldParm,
     UnparsedDefaultArgInstantiations[OldParm].push_back(NewParm);
   } else if (Expr *Arg = OldParm->getDefaultArg()) {
     FunctionDecl *OwningFunc = cast<FunctionDecl>(OldParm->getDeclContext());
-    CXXRecordDecl *ClassD = dyn_cast<CXXRecordDecl>(OwningFunc->getDeclContext());
-    if (ClassD && ClassD->isLocalClass() && !ClassD->isLambda()) {
-      // If this is a method of a local class, as per DR1484 its default
-      // arguments must be instantiated.
-      Sema::ContextRAII SavedContext(*this, ClassD);
+    if (OwningFunc->isLexicallyWithinFunctionOrMethod()) {
+      // Instantiate default arguments for methods of local classes (DR1484)
+      // and non-defining declarations.
+      Sema::ContextRAII SavedContext(*this, OwningFunc);
       LocalInstantiationScope Local(*this);
       ExprResult NewArg = SubstExpr(Arg, TemplateArgs);
-      if (NewArg.isUsable())
-        NewParm->setDefaultArg(NewArg.get());
+      if (NewArg.isUsable()) {
+        // It would be nice if we still had this.
+        SourceLocation EqualLoc = NewArg.get()->getLocStart();
+        SetParamDefaultArgument(NewParm, NewArg.get(), EqualLoc);
+      }
     } else {
       // FIXME: if we non-lazily instantiated non-dependent default args for
       // non-dependent parameter types we could remove a bunch of duplicate
@@ -1843,9 +1837,7 @@ Sema::SubstBaseSpecifiers(CXXRecordDecl *Instantiation,
       Invalid = true;
   }
 
-  if (!Invalid &&
-      AttachBaseSpecifiers(Instantiation, InstantiatedBases.data(),
-                           InstantiatedBases.size()))
+  if (!Invalid && AttachBaseSpecifiers(Instantiation, InstantiatedBases))
     Invalid = true;
 
   return Invalid;
@@ -2050,7 +2042,7 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation,
 
   // Default arguments are parsed, if not instantiated. We can go instantiate
   // default arg exprs for default constructors if necessary now.
-  ActOnFinishCXXMemberDefaultArgs(Instantiation);
+  ActOnFinishCXXNonNestedClass(Instantiation);
 
   // Instantiate late parsed attributes, and attach them to their decls.
   // See Sema::InstantiateAttrs
@@ -2672,16 +2664,17 @@ ExprResult Sema::SubstInitializer(Expr *Init,
   return Instantiator.TransformInitializer(Init, CXXDirectInit);
 }
 
-bool Sema::SubstExprs(Expr **Exprs, unsigned NumExprs, bool IsCall,
+bool Sema::SubstExprs(ArrayRef<Expr *> Exprs, bool IsCall,
                       const MultiLevelTemplateArgumentList &TemplateArgs,
                       SmallVectorImpl<Expr *> &Outputs) {
-  if (NumExprs == 0)
+  if (Exprs.empty())
     return false;
 
   TemplateInstantiator Instantiator(*this, TemplateArgs,
                                     SourceLocation(),
                                     DeclarationName());
-  return Instantiator.TransformExprs(Exprs, NumExprs, IsCall, Outputs);
+  return Instantiator.TransformExprs(Exprs.data(), Exprs.size(),
+                                     IsCall, Outputs);
 }
 
 NestedNameSpecifierLoc
@@ -2806,14 +2799,14 @@ void LocalInstantiationScope::InstantiatedLocal(const Decl *D, Decl *Inst) {
 #endif
     Stored = Inst;
   } else if (DeclArgumentPack *Pack = Stored.dyn_cast<DeclArgumentPack *>()) {
-    Pack->push_back(Inst);
+    Pack->push_back(cast<ParmVarDecl>(Inst));
   } else {
     assert(Stored.get<Decl *>() == Inst && "Already instantiated this local");
   }
 }
 
-void LocalInstantiationScope::InstantiatedLocalPackArg(const Decl *D, 
-                                                       Decl *Inst) {
+void LocalInstantiationScope::InstantiatedLocalPackArg(const Decl *D,
+                                                       ParmVarDecl *Inst) {
   D = getCanonicalParmVarDecl(D);
   DeclArgumentPack *Pack = LocalDecls[D].get<DeclArgumentPack *>();
   Pack->push_back(Inst);
diff --git a/lib/Sema/SemaTemplateInstantiateDecl.cpp b/lib/Sema/SemaTemplateInstantiateDecl.cpp
index aff2d1c96676..7a452af77839 100644
--- a/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -813,6 +813,14 @@ Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) {
   Enum->setAccess(D->getAccess());
   // Forward the mangling number from the template to the instantiated decl.
   SemaRef.Context.setManglingNumber(Enum, SemaRef.Context.getManglingNumber(D));
+  // See if the old tag was defined along with a declarator.
+  // If it did, mark the new tag as being associated with that declarator.
+  if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
+    SemaRef.Context.addDeclaratorForUnnamedTagDecl(Enum, DD);
+  // See if the old tag was defined along with a typedef.
+  // If it did, mark the new tag as being associated with that typedef.
+  if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
+    SemaRef.Context.addTypedefNameForUnnamedTagDecl(Enum, TND);
   if (SubstQualifier(D, Enum)) return nullptr;
   Owner->addDecl(Enum);
 
@@ -827,7 +835,8 @@ Decl *TemplateDeclInstantiator::VisitEnumDecl(EnumDecl *D) {
         SemaRef.SubstType(TI->getType(), TemplateArgs,
                           UnderlyingLoc, DeclarationName());
       SemaRef.CheckEnumRedeclaration(Def->getLocation(), Def->isScoped(),
-                                     DefnUnderlying, Enum);
+                                     DefnUnderlying,
+                                     /*EnumUnderlyingIsImplicit=*/false, Enum);
     }
   }
 
@@ -913,6 +922,11 @@ Decl *TemplateDeclInstantiator::VisitEnumConstantDecl(EnumConstantDecl *D) {
   llvm_unreachable("EnumConstantDecls can only occur within EnumDecls.");
 }
 
+Decl *
+TemplateDeclInstantiator::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
+  llvm_unreachable("BuiltinTemplateDecls cannot be instantiated.");
+}
+
 Decl *TemplateDeclInstantiator::VisitClassTemplateDecl(ClassTemplateDecl *D) {
   bool isFriend = (D->getFriendObjectKind() != Decl::FOK_None);
 
@@ -1143,6 +1157,7 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateDecl(VarTemplateDecl *D) {
   VarDecl *VarInst =
       cast_or_null<VarDecl>(VisitVarDecl(Pattern,
                                          /*InstantiatingVarTemplate=*/true));
+  if (!VarInst) return nullptr;
 
   DeclContext *DC = Owner;
 
@@ -1297,6 +1312,16 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) {
   SemaRef.Context.setManglingNumber(Record,
                                     SemaRef.Context.getManglingNumber(D));
 
+  // See if the old tag was defined along with a declarator.
+  // If it did, mark the new tag as being associated with that declarator.
+  if (DeclaratorDecl *DD = SemaRef.Context.getDeclaratorForUnnamedTagDecl(D))
+    SemaRef.Context.addDeclaratorForUnnamedTagDecl(Record, DD);
+
+  // See if the old tag was defined along with a typedef.
+  // If it did, mark the new tag as being associated with that typedef.
+  if (TypedefNameDecl *TND = SemaRef.Context.getTypedefNameForUnnamedTagDecl(D))
+    SemaRef.Context.addTypedefNameForUnnamedTagDecl(Record, TND);
+
   Owner->addDecl(Record);
 
   // DR1484 clarifies that the members of a local class are instantiated as part
@@ -1657,7 +1682,7 @@ TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D,
   SmallVector<TemplateParameterList *, 4> TempParamLists;
   unsigned NumTempParamLists = 0;
   if (isFriend && (NumTempParamLists = D->getNumTemplateParameterLists())) {
-    TempParamLists.set_size(NumTempParamLists);
+    TempParamLists.resize(NumTempParamLists);
     for (unsigned I = 0; I != NumTempParamLists; ++I) {
       TemplateParameterList *TempParams = D->getTemplateParameterList(I);
       TemplateParameterList *InstParams = SubstTemplateParams(TempParams);
@@ -1809,9 +1834,9 @@ TemplateDeclInstantiator::VisitCXXMethodDecl(CXXMethodDecl *D,
   // context (which will be a namespace scope) as the template.
   if (isFriend) {
     if (NumTempParamLists)
-      Method->setTemplateParameterListsInfo(SemaRef.Context,
-                                            NumTempParamLists,
-                                            TempParamLists.data());
+      Method->setTemplateParameterListsInfo(
+          SemaRef.Context,
+          llvm::makeArrayRef(TempParamLists.data(), NumTempParamLists));
 
     Method->setLexicalDeclContext(Owner);
     Method->setObjectOfFriendDecl();
@@ -2742,7 +2767,7 @@ TemplateDeclInstantiator::SubstTemplateParams(TemplateParameterList *L) {
 
   TemplateParameterList *InstL
     = TemplateParameterList::Create(SemaRef.Context, L->getTemplateLoc(),
-                                    L->getLAngleLoc(), &Params.front(), N,
+                                    L->getLAngleLoc(), Params,
                                     L->getRAngleLoc());
   return InstL;
 }
@@ -3246,16 +3271,11 @@ TemplateDeclInstantiator::InitFunctionInstantiation(FunctionDecl *New,
     // exception specification.
     // DR1484: Local classes and their members are instantiated along with the
     // containing function.
-    bool RequireInstantiation = false;
-    if (CXXRecordDecl *Cls = dyn_cast<CXXRecordDecl>(Tmpl->getDeclContext())) {
-      if (Cls->isLocalClass())
-        RequireInstantiation = true;
-    }
     if (SemaRef.getLangOpts().CPlusPlus11 &&
         EPI.ExceptionSpec.Type != EST_None &&
         EPI.ExceptionSpec.Type != EST_DynamicNone &&
         EPI.ExceptionSpec.Type != EST_BasicNoexcept &&
-        !RequireInstantiation) {
+        !Tmpl->isLexicallyWithinFunctionOrMethod()) {
       FunctionDecl *ExceptionSpecTemplate = Tmpl;
       if (EPI.ExceptionSpec.Type == EST_Uninstantiated)
         ExceptionSpecTemplate = EPI.ExceptionSpec.SourceTemplate;
@@ -3619,19 +3639,6 @@ void Sema::BuildVariableInstantiation(
     NewVar->setReferenced(OldVar->isReferenced());
   }
 
-  // See if the old variable had a type-specifier that defined an anonymous tag.
-  // If it did, mark the new variable as being the declarator for the new
-  // anonymous tag.
-  if (const TagType *OldTagType = OldVar->getType()->getAs<TagType>()) {
-    TagDecl *OldTag = OldTagType->getDecl();
-    if (OldTag->getDeclaratorForAnonDecl() == OldVar) {
-      TagDecl *NewTag = NewVar->getType()->castAs<TagType>()->getDecl();
-      assert(!NewTag->hasNameForLinkage() &&
-             !NewTag->hasDeclaratorForAnonDecl());
-      NewTag->setDeclaratorForAnonDecl(NewVar);
-    }
-  }
-
   InstantiateAttrs(TemplateArgs, OldVar, NewVar, LateAttrs, StartingScope);
 
   LookupResult Previous(
diff --git a/lib/Sema/SemaTemplateVariadic.cpp b/lib/Sema/SemaTemplateVariadic.cpp
index fd3ba3549b4c..61052f06c834 100644
--- a/lib/Sema/SemaTemplateVariadic.cpp
+++ b/lib/Sema/SemaTemplateVariadic.cpp
@@ -737,6 +737,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
   case TST_interface:
   case TST_class:
   case TST_auto:
+  case TST_auto_type:
   case TST_decltype_auto:
   case TST_unknown_anytype:
   case TST_error:
@@ -867,8 +868,8 @@ ExprResult Sema::ActOnSizeofParameterPackExpr(Scope *S,
 
   MarkAnyDeclReferenced(OpLoc, ParameterPack, true);
 
-  return new (Context) SizeOfPackExpr(Context.getSizeType(), OpLoc, 
-                                      ParameterPack, NameLoc, RParenLoc);
+  return SizeOfPackExpr::Create(Context, OpLoc, ParameterPack, NameLoc,
+                                RParenLoc);
 }
 
 TemplateArgumentLoc
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 02a31ef8d79c..c70568c23b57 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -26,7 +26,6 @@
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Lex/Preprocessor.h"
-#include "clang/Parse/ParseDiagnostic.h"
 #include "clang/Sema/DeclSpec.h"
 #include "clang/Sema/DelayedDiagnostic.h"
 #include "clang/Sema/Lookup.h"
@@ -211,10 +210,8 @@ namespace {
     /// Diagnose all the ignored type attributes, given that the
     /// declarator worked out to the given type.
     void diagnoseIgnoredTypeAttrs(QualType type) const {
-      for (SmallVectorImpl<AttributeList*>::const_iterator
-             i = ignoredTypeAttrs.begin(), e = ignoredTypeAttrs.end();
-           i != e; ++i)
-        diagnoseBadTypeAttribute(getSema(), **i, type);
+      for (auto *Attr : ignoredTypeAttrs)
+        diagnoseBadTypeAttribute(getSema(), *Attr, type);
     }
 
     ~TypeProcessingState() {
@@ -702,7 +699,7 @@ static void maybeSynthesizeBlockSignature(TypeProcessingState &state,
       /*VolatileQualifierLoc=*/NoLoc,
       /*RestrictQualifierLoc=*/NoLoc,
       /*MutableLoc=*/NoLoc, EST_None,
-      /*ESpecLoc=*/NoLoc,
+      /*ESpecRange=*/SourceRange(),
       /*Exceptions=*/nullptr,
       /*ExceptionRanges=*/nullptr,
       /*NumExceptions=*/0,
@@ -792,18 +789,33 @@ static QualType applyObjCTypeArgs(Sema &S, SourceLocation loc, QualType type,
     TypeSourceInfo *typeArgInfo = typeArgs[i];
     QualType typeArg = typeArgInfo->getType();
 
-    // Type arguments cannot explicitly specify nullability.
-    if (auto nullability = AttributedType::stripOuterNullability(typeArg)) {
-      SourceLocation nullabilityLoc
-        = typeArgInfo->getTypeLoc().findNullabilityLoc();
-      SourceLocation diagLoc = nullabilityLoc.isValid()? nullabilityLoc
-        : typeArgInfo->getTypeLoc().getLocStart();
-      S.Diag(diagLoc,
-             diag::err_type_arg_explicit_nullability)
-        << typeArg
-        << FixItHint::CreateRemoval(nullabilityLoc);
+    // Type arguments cannot have explicit qualifiers or nullability.
+    // We ignore indirect sources of these, e.g. behind typedefs or
+    // template arguments.
+    if (TypeLoc qual = typeArgInfo->getTypeLoc().findExplicitQualifierLoc()) {
+      bool diagnosed = false;
+      SourceRange rangeToRemove;
+      if (auto attr = qual.getAs<AttributedTypeLoc>()) {
+        rangeToRemove = attr.getLocalSourceRange();
+        if (attr.getTypePtr()->getImmediateNullability()) {
+          typeArg = attr.getTypePtr()->getModifiedType();
+          S.Diag(attr.getLocStart(),
+                 diag::err_objc_type_arg_explicit_nullability)
+            << typeArg << FixItHint::CreateRemoval(rangeToRemove);
+          diagnosed = true;
+        }
+      }
+
+      if (!diagnosed) {
+        S.Diag(qual.getLocStart(), diag::err_objc_type_arg_qualified)
+          << typeArg << typeArg.getQualifiers().getAsString()
+          << FixItHint::CreateRemoval(rangeToRemove);
+      }
     }
 
+    // Remove qualifiers even if they're non-local.
+    typeArg = typeArg.getUnqualifiedType();
+
     finalTypeArgs.push_back(typeArg);
 
     if (typeArg->getAs<PackExpansionType>())
@@ -1377,11 +1389,13 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     if (Result.isNull()) {
       declarator.setInvalidType(true);
     } else if (S.getLangOpts().OpenCL) {
-      if (const AtomicType *AT = Result->getAs<AtomicType>()) {
-        const BuiltinType *BT = AT->getValueType()->getAs<BuiltinType>();
-        bool NoExtTypes = BT && (BT->getKind() == BuiltinType::Int ||
-                                 BT->getKind() == BuiltinType::UInt ||
-                                 BT->getKind() == BuiltinType::Float);
+      if (Result->getAs<AtomicType>()) {
+        StringRef TypeName = Result.getBaseTypeIdentifier()->getName();
+        bool NoExtTypes =
+            llvm::StringSwitch<bool>(TypeName)
+                .Cases("atomic_int", "atomic_uint", "atomic_float",
+                       "atomic_flag", true)
+                .Default(false);
         if (!S.getOpenCLOptions().cl_khr_int64_base_atomics && !NoExtTypes) {
           S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension)
               << Result << "cl_khr_int64_base_atomics";
@@ -1393,12 +1407,19 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
               << Result << "cl_khr_int64_extended_atomics";
           declarator.setInvalidType(true);
         }
-        if (!S.getOpenCLOptions().cl_khr_fp64 && BT &&
-            BT->getKind() == BuiltinType::Double) {
+        if (!S.getOpenCLOptions().cl_khr_fp64 &&
+            !TypeName.compare("atomic_double")) {
           S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension)
               << Result << "cl_khr_fp64";
           declarator.setInvalidType(true);
         }
+      } else if (!S.getOpenCLOptions().cl_khr_gl_msaa_sharing &&
+                 (Result->isImage2dMSAAT() || Result->isImage2dArrayMSAAT() ||
+                  Result->isImage2dArrayMSAATDepth() ||
+                  Result->isImage2dMSAATDepth())) {
+        S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_requires_extension)
+            << Result << "cl_khr_gl_msaa_sharing";
+        declarator.setInvalidType(true);
       }
     }
 
@@ -1482,14 +1503,17 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
       // template type parameter.
       Result = QualType(CorrespondingTemplateParam->getTypeForDecl(), 0);
     } else {
-      Result = Context.getAutoType(QualType(), /*decltype(auto)*/false, false);
+      Result = Context.getAutoType(QualType(), AutoTypeKeyword::Auto, false);
     }
     break;
 
+  case DeclSpec::TST_auto_type:
+    Result = Context.getAutoType(QualType(), AutoTypeKeyword::GNUAutoType, false);
+    break;
+
   case DeclSpec::TST_decltype_auto:
-    Result = Context.getAutoType(QualType(), 
-                                 /*decltype(auto)*/true, 
-                                 /*IsDependent*/   false);
+    Result = Context.getAutoType(QualType(), AutoTypeKeyword::DecltypeAuto,
+                                 /*IsDependent*/ false);
     break;
 
   case DeclSpec::TST_unknown_anytype:
@@ -1540,8 +1564,7 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
   // Apply any type attributes from the decl spec.  This may cause the
   // list of type attributes to be temporarily saved while the type
   // attributes are pushed around.
-  if (AttributeList *attrs = DS.getAttributes().getList())
-    processTypeAttrs(state, Result, TAL_DeclSpec, attrs);
+  processTypeAttrs(state, Result, TAL_DeclSpec, DS.getAttributes().getList());
 
   // Apply const/volatile/restrict qualifiers to T.
   if (unsigned TypeQuals = DS.getTypeQualifiers()) {
@@ -1975,7 +1998,7 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
     if (Context.getTargetInfo().getCXXABI().isMicrosoft())
       if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>())
         if (!MPTy->getClass()->isDependentType())
-          RequireCompleteType(Loc, T, 0);
+          (void)isCompleteType(Loc, T);
 
   } else {
     // C99 6.7.5.2p1: If the element type is an incomplete or function type,
@@ -2103,12 +2126,9 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
     if (T->isVariableArrayType()) {
       // Prohibit the use of non-POD types in VLAs.
       QualType BaseT = Context.getBaseElementType(T);
-      if (!T->isDependentType() &&
-          !RequireCompleteType(Loc, BaseT, 0) &&
-          !BaseT.isPODType(Context) &&
-          !BaseT->isObjCLifetimeType()) {
-        Diag(Loc, diag::err_vla_non_pod)
-          << BaseT;
+      if (!T->isDependentType() && isCompleteType(Loc, BaseT) &&
+          !BaseT.isPODType(Context) && !BaseT->isObjCLifetimeType()) {
+        Diag(Loc, diag::err_vla_non_pod) << BaseT;
         return QualType();
       }
       // Prohibit the use of VLAs during template argument deduction.
@@ -2122,7 +2142,7 @@ QualType Sema::BuildArrayType(QualType T, ArrayType::ArraySizeModifier ASM,
     } else if (ASM != ArrayType::Normal || Quals != 0)
       Diag(Loc,
            getLangOpts().CPlusPlus? diag::err_c99_array_usage_cxx
-                                     : diag::ext_c99_array_usage) << ASM;
+                                  : diag::ext_c99_array_usage) << ASM;
   }
 
   if (T->isVariableArrayType()) {
@@ -2271,8 +2291,11 @@ QualType Sema::BuildMemberPointerType(QualType T, QualType Class,
 
   // Adjust the default free function calling convention to the default method
   // calling convention.
+  bool IsCtorOrDtor =
+      (Entity.getNameKind() == DeclarationName::CXXConstructorName) ||
+      (Entity.getNameKind() == DeclarationName::CXXDestructorName);
   if (T->isFunctionType())
-    adjustMemberFunctionCC(T, /*IsStatic=*/false);
+    adjustMemberFunctionCC(T, /*IsStatic=*/false, IsCtorOrDtor, Loc);
 
   return Context.getMemberPointerType(T, Class.getTypePtr());
 }
@@ -2432,14 +2455,14 @@ void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
     return;
 
   struct Qual {
-    unsigned Mask;
     const char *Name;
+    unsigned Mask;
     SourceLocation Loc;
   } const QualKinds[4] = {
-    { DeclSpec::TQ_const, "const", ConstQualLoc },
-    { DeclSpec::TQ_volatile, "volatile", VolatileQualLoc },
-    { DeclSpec::TQ_restrict, "restrict", RestrictQualLoc },
-    { DeclSpec::TQ_atomic, "_Atomic", AtomicQualLoc }
+    { "const", DeclSpec::TQ_const, ConstQualLoc },
+    { "volatile", DeclSpec::TQ_volatile, VolatileQualLoc },
+    { "restrict", DeclSpec::TQ_restrict, RestrictQualLoc },
+    { "_Atomic", DeclSpec::TQ_atomic, AtomicQualLoc }
   };
 
   SmallString<32> QualStr;
@@ -2455,7 +2478,7 @@ void Sema::diagnoseIgnoredQualifiers(unsigned DiagID, unsigned Quals,
 
       // If we have a location for the qualifier, offer a fixit.
       SourceLocation QualLoc = QualKinds[I].Loc;
-      if (!QualLoc.isInvalid()) {
+      if (QualLoc.isValid()) {
         FixIts[NumQuals] = FixItHint::CreateRemoval(QualLoc);
         if (Loc.isInvalid() ||
             getSourceManager().isBeforeInTranslationUnit(QualLoc, Loc))
@@ -2548,8 +2571,6 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
   // The TagDecl owned by the DeclSpec.
   TagDecl *OwnedTagDecl = nullptr;
 
-  bool ContainsPlaceholderType = false;
-
   switch (D.getName().getKind()) {
   case UnqualifiedId::IK_ImplicitSelfParam:
   case UnqualifiedId::IK_OperatorFunctionId:
@@ -2557,7 +2578,6 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
   case UnqualifiedId::IK_LiteralOperatorId:
   case UnqualifiedId::IK_TemplateId:
     T = ConvertDeclSpecToType(state);
-    ContainsPlaceholderType = D.getDeclSpec().containsPlaceholderType();
 
     if (!D.isInvalidType() && D.getDeclSpec().isTypeSpecOwned()) {
       OwnedTagDecl = cast<TagDecl>(D.getDeclSpec().getRepAsDecl());
@@ -2572,8 +2592,8 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     // Constructors and destructors don't have return types. Use
     // "void" instead.
     T = SemaRef.Context.VoidTy;
-    if (AttributeList *attrs = D.getDeclSpec().getAttributes().getList())
-      processTypeAttrs(state, T, TAL_DeclSpec, attrs);
+    processTypeAttrs(state, T, TAL_DeclSpec,
+                     D.getDeclSpec().getAttributes().getList());
     break;
 
   case UnqualifiedId::IK_ConversionFunctionId:
@@ -2581,7 +2601,6 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     // converts to.
     T = SemaRef.GetTypeFromParser(D.getName().ConversionFunctionId,
                                   &ReturnTypeInfo);
-    ContainsPlaceholderType = T->getContainedAutoType();
     break;
   }
 
@@ -2589,17 +2608,10 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     distributeTypeAttrsFromDeclarator(state, T);
 
   // C++11 [dcl.spec.auto]p5: reject 'auto' if it is not in an allowed context.
-  // In C++11, a function declarator using 'auto' must have a trailing return
-  // type (this is checked later) and we can skip this. In other languages
-  // using auto, we need to check regardless.
-  // C++14 In generic lambdas allow 'auto' in their parameters.
-  if (ContainsPlaceholderType &&
-      (!SemaRef.getLangOpts().CPlusPlus11 || !D.isFunctionDeclarator())) {
+  if (D.getDeclSpec().containsPlaceholderType()) {
     int Error = -1;
 
     switch (D.getContext()) {
-    case Declarator::KNRTypeListContext:
-      llvm_unreachable("K&R type lists aren't allowed in C++");
     case Declarator::LambdaExprContext:
       llvm_unreachable("Can't specify a type specifier in lambda grammar");
     case Declarator::ObjCParameterContext:
@@ -2608,69 +2620,88 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
       Error = 0;  
       break;
     case Declarator::LambdaExprParameterContext:
+      // In C++14, generic lambdas allow 'auto' in their parameters.
       if (!(SemaRef.getLangOpts().CPlusPlus14 
               && D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto))
-        Error = 14;
+        Error = 16;
       break;
-    case Declarator::MemberContext:
-      if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static)
+    case Declarator::MemberContext: {
+      if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static ||
+          D.isFunctionDeclarator())
         break;
+      bool Cxx = SemaRef.getLangOpts().CPlusPlus;
       switch (cast<TagDecl>(SemaRef.CurContext)->getTagKind()) {
       case TTK_Enum: llvm_unreachable("unhandled tag kind");
-      case TTK_Struct: Error = 1; /* Struct member */ break;
-      case TTK_Union:  Error = 2; /* Union member */ break;
-      case TTK_Class:  Error = 3; /* Class member */ break;
-      case TTK_Interface: Error = 4; /* Interface member */ break;
+      case TTK_Struct: Error = Cxx ? 1 : 2; /* Struct member */ break;
+      case TTK_Union:  Error = Cxx ? 3 : 4; /* Union member */ break;
+      case TTK_Class:  Error = 5; /* Class member */ break;
+      case TTK_Interface: Error = 6; /* Interface member */ break;
       }
       break;
+    }
     case Declarator::CXXCatchContext:
     case Declarator::ObjCCatchContext:
-      Error = 5; // Exception declaration
+      Error = 7; // Exception declaration
       break;
     case Declarator::TemplateParamContext:
-      Error = 6; // Template parameter
+      Error = 8; // Template parameter
       break;
     case Declarator::BlockLiteralContext:
-      Error = 7; // Block literal
+      Error = 9; // Block literal
       break;
     case Declarator::TemplateTypeArgContext:
-      Error = 8; // Template type argument
+      Error = 10; // Template type argument
       break;
     case Declarator::AliasDeclContext:
     case Declarator::AliasTemplateContext:
-      Error = 10; // Type alias
+      Error = 12; // Type alias
       break;
     case Declarator::TrailingReturnContext:
-      if (!SemaRef.getLangOpts().CPlusPlus14)
-        Error = 11; // Function return type
+      if (!SemaRef.getLangOpts().CPlusPlus14 ||
+          D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type)
+        Error = 13; // Function return type
       break;
     case Declarator::ConversionIdContext:
-      if (!SemaRef.getLangOpts().CPlusPlus14)
-        Error = 12; // conversion-type-id
+      if (!SemaRef.getLangOpts().CPlusPlus14 ||
+          D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type)
+        Error = 14; // conversion-type-id
       break;
     case Declarator::TypeNameContext:
-      Error = 13; // Generic
+      Error = 15; // Generic
       break;
     case Declarator::FileContext:
     case Declarator::BlockContext:
     case Declarator::ForContext:
     case Declarator::ConditionContext:
+      break;
     case Declarator::CXXNewContext:
+      if (D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type)
+        Error = 17; // 'new' type
+      break;
+    case Declarator::KNRTypeListContext:
+      Error = 18; // K&R function parameter
       break;
     }
 
     if (D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_typedef)
-      Error = 9;
-
-    // In Objective-C it is an error to use 'auto' on a function declarator.
-    if (D.isFunctionDeclarator())
       Error = 11;
 
+    // In Objective-C it is an error to use 'auto' on a function declarator
+    // (and everywhere for '__auto_type').
+    if (D.isFunctionDeclarator() &&
+        (!SemaRef.getLangOpts().CPlusPlus11 ||
+         D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_auto_type))
+      Error = 13;
+
+    bool HaveTrailing = false;
+
     // C++11 [dcl.spec.auto]p2: 'auto' is always fine if the declarator
     // contains a trailing return type. That is only legal at the outermost
     // level. Check all declarator chunks (outermost first) anyway, to give
     // better diagnostics.
-    if (SemaRef.getLangOpts().CPlusPlus11 && Error != -1) {
+    // We don't support '__auto_type' with trailing return types.
+    if (SemaRef.getLangOpts().CPlusPlus11 &&
+        D.getDeclSpec().getTypeSpecType() != DeclSpec::TST_auto_type) {
       for (unsigned i = 0, e = D.getNumTypeObjects(); i != e; ++i) {
         unsigned chunkIndex = e - i - 1;
         state.setCurrentChunkIndex(chunkIndex);
@@ -2678,6 +2709,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
         if (DeclType.Kind == DeclaratorChunk::Function) {
           const DeclaratorChunk::FunctionTypeInfo &FTI = DeclType.Fun;
           if (FTI.hasTrailingReturnType()) {
+            HaveTrailing = true;
             Error = -1;
             break;
           }
@@ -2690,22 +2722,31 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
       AutoRange = D.getName().getSourceRange();
 
     if (Error != -1) {
-      const bool IsDeclTypeAuto = 
-          D.getDeclSpec().getTypeSpecType() == DeclSpec::TST_decltype_auto;
+      unsigned Keyword;
+      switch (D.getDeclSpec().getTypeSpecType()) {
+      case DeclSpec::TST_auto: Keyword = 0; break;
+      case DeclSpec::TST_decltype_auto: Keyword = 1; break;
+      case DeclSpec::TST_auto_type: Keyword = 2; break;
+      default: llvm_unreachable("unknown auto TypeSpecType");
+      }
       SemaRef.Diag(AutoRange.getBegin(), diag::err_auto_not_allowed)
-        << IsDeclTypeAuto << Error << AutoRange;
+        << Keyword << Error << AutoRange;
       T = SemaRef.Context.IntTy;
       D.setInvalidType(true);
-    } else
+    } else if (!HaveTrailing) {
+      // If there was a trailing return type, we already got
+      // warn_cxx98_compat_trailing_return_type in the parser.
       SemaRef.Diag(AutoRange.getBegin(),
                    diag::warn_cxx98_compat_auto_type_specifier)
         << AutoRange;
+    }
   }
 
   if (SemaRef.getLangOpts().CPlusPlus &&
       OwnedTagDecl && OwnedTagDecl->isCompleteDefinition()) {
     // Check the contexts where C++ forbids the declaration of a new class
     // or enumeration in a type-specifier-seq.
+    unsigned DiagID = 0;
     switch (D.getContext()) {
     case Declarator::TrailingReturnContext:
       // Class and enumeration definitions are syntactically not allowed in
@@ -2725,10 +2766,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     case Declarator::AliasDeclContext:
       break;
     case Declarator::AliasTemplateContext:
-      SemaRef.Diag(OwnedTagDecl->getLocation(),
-             diag::err_type_defined_in_alias_template)
-        << SemaRef.Context.getTypeDeclType(OwnedTagDecl);
-      D.setInvalidType(true);
+      DiagID = diag::err_type_defined_in_alias_template;
       break;
     case Declarator::TypeNameContext:
     case Declarator::ConversionIdContext:
@@ -2737,10 +2775,7 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     case Declarator::CXXCatchContext:
     case Declarator::ObjCCatchContext:
     case Declarator::TemplateTypeArgContext:
-      SemaRef.Diag(OwnedTagDecl->getLocation(),
-             diag::err_type_defined_in_type_specifier)
-        << SemaRef.Context.getTypeDeclType(OwnedTagDecl);
-      D.setInvalidType(true);
+      DiagID = diag::err_type_defined_in_type_specifier;
       break;
     case Declarator::PrototypeContext:
     case Declarator::LambdaExprParameterContext:
@@ -2749,20 +2784,21 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
     case Declarator::KNRTypeListContext:
       // C++ [dcl.fct]p6:
       //   Types shall not be defined in return or parameter types.
-      SemaRef.Diag(OwnedTagDecl->getLocation(),
-                   diag::err_type_defined_in_param_type)
-        << SemaRef.Context.getTypeDeclType(OwnedTagDecl);
-      D.setInvalidType(true);
+      DiagID = diag::err_type_defined_in_param_type;
       break;
     case Declarator::ConditionContext:
       // C++ 6.4p2:
       // The type-specifier-seq shall not contain typedef and shall not declare
       // a new class or enumeration.
-      SemaRef.Diag(OwnedTagDecl->getLocation(),
-                   diag::err_type_defined_in_condition);
-      D.setInvalidType(true);
+      DiagID = diag::err_type_defined_in_condition;
       break;
     }
+
+    if (DiagID != 0) {
+      SemaRef.Diag(OwnedTagDecl->getLocation(), DiagID)
+          << SemaRef.Context.getTypeDeclType(OwnedTagDecl);
+      D.setInvalidType(true);
+    }
   }
 
   assert(!T.isNull() && "This function should not return a null type");
@@ -3286,14 +3322,12 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
 
   // Are we in an assume-nonnull region?
   bool inAssumeNonNullRegion = false;
-  if (S.PP.getPragmaAssumeNonNullLoc().isValid() &&
-      !state.getDeclarator().isObjCWeakProperty() &&
-      !S.deduceWeakPropertyFromType(T)) {
+  if (S.PP.getPragmaAssumeNonNullLoc().isValid()) {
     inAssumeNonNullRegion = true;
     // Determine which file we saw the assume-nonnull region in.
     FileID file = getNullabilityCompletenessCheckFileID(
                     S, S.PP.getPragmaAssumeNonNullLoc());
-    if (!file.isInvalid()) {
+    if (file.isValid()) {
       FileNullability &fileNullability = S.NullabilityMap[file];
 
       // If we haven't seen any type nullability before, now we have.
@@ -3367,6 +3401,13 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
         complainAboutMissingNullability = CAMN_No;
         break;
       }
+
+      // Weak properties are inferred to be nullable.
+      if (state.getDeclarator().isObjCWeakProperty() && inAssumeNonNullRegion) {
+        inferNullability = NullabilityKind::Nullable;
+        break;
+      }
+
       // fallthrough
 
     case Declarator::FileContext:
@@ -3699,7 +3740,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
             D.setInvalidType(true);
           } else if (D.getContext() != Declarator::LambdaExprContext &&
                      (T.hasQualifiers() || !isa<AutoType>(T) ||
-                      cast<AutoType>(T)->isDecltypeAuto())) {
+                      cast<AutoType>(T)->getKeyword() != AutoTypeKeyword::Auto)) {
             S.Diag(D.getDeclSpec().getTypeSpecTypeLoc(),
                  diag::err_trailing_return_without_auto)
               << T << D.getDeclSpec().getSourceRange();
@@ -3835,9 +3876,10 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
       // Exception specs are not allowed in typedefs. Complain, but add it
       // anyway.
       if (IsTypedefName && FTI.getExceptionSpecType())
-        S.Diag(FTI.getExceptionSpecLoc(), diag::err_exception_spec_in_typedef)
-          << (D.getContext() == Declarator::AliasDeclContext ||
-              D.getContext() == Declarator::AliasTemplateContext);
+        S.Diag(FTI.getExceptionSpecLocBeg(),
+               diag::err_exception_spec_in_typedef)
+            << (D.getContext() == Declarator::AliasDeclContext ||
+                D.getContext() == Declarator::AliasTemplateContext);
 
       // If we see "T var();" or "T var(T());" at block scope, it is probably
       // an attempt to initialize a variable, not a function declaration.
@@ -4062,8 +4104,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
     }
 
     // See if there are any attributes on this declarator chunk.
-    if (AttributeList *attrs = const_cast<AttributeList*>(DeclType.getAttrs()))
-      processTypeAttrs(state, T, TAL_DeclChunk, attrs);
+    processTypeAttrs(state, T, TAL_DeclChunk,
+                     const_cast<AttributeList *>(DeclType.getAttrs()));
   }
 
   assert(!T.isNull() && "T must not be null after this point");
@@ -4156,8 +4198,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
   }
 
   // Apply any undistributed attributes from the declarator.
-  if (AttributeList *attrs = D.getAttributes())
-    processTypeAttrs(state, T, TAL_DeclName, attrs);
+  processTypeAttrs(state, T, TAL_DeclName, D.getAttributes());
 
   // Diagnose any ignored type attributes.
   state.diagnoseIgnoredTypeAttrs(T);
@@ -4377,7 +4418,7 @@ TypeSourceInfo *Sema::GetTypeForDeclaratorCast(Declarator &D, QualType FromTy) {
   TypeSourceInfo *ReturnTypeInfo = nullptr;
   QualType declSpecTy = GetDeclSpecTypeForDeclarator(state, ReturnTypeInfo);
 
-  if (getLangOpts().ObjCAutoRefCount) {
+  if (getLangOpts().ObjC1) {
     Qualifiers::ObjCLifetime ownership = Context.getInnerObjCOwnership(FromTy);
     if (ownership != Qualifiers::OCL_None)
       transferARCOwnership(state, declSpecTy, ownership);
@@ -4402,6 +4443,7 @@ static AttributeList::Kind getAttrListKind(AttributedType::Kind kind) {
   case AttributedType::attr_objc_gc:
     return AttributeList::AT_ObjCGC;
   case AttributedType::attr_objc_ownership:
+  case AttributedType::attr_objc_inert_unsafe_unretained:
     return AttributeList::AT_ObjCOwnership;
   case AttributedType::attr_noreturn:
     return AttributeList::AT_NoReturn;
@@ -5061,11 +5103,6 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
     return true;
   }
 
-  // Consume lifetime attributes without further comment outside of
-  // ARC mode.
-  if (!S.getLangOpts().ObjCAutoRefCount)
-    return true;
-
   IdentifierInfo *II = attr.getArgAsIdent(0)->Ident;
   Qualifiers::ObjCLifetime lifetime;
   if (II->isStr("none"))
@@ -5083,6 +5120,14 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
     return true;
   }
 
+  // Just ignore lifetime attributes other than __weak and __unsafe_unretained
+  // outside of ARC mode.
+  if (!S.getLangOpts().ObjCAutoRefCount &&
+      lifetime != Qualifiers::OCL_Weak &&
+      lifetime != Qualifiers::OCL_ExplicitNone) {
+    return true;
+  }
+
   SplitQualType underlyingType = type.split();
 
   // Check for redundant/conflicting ownership qualifiers.
@@ -5123,6 +5168,25 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
       << TDS_ObjCObjOrBlock << type;
   }
 
+  // Don't actually add the __unsafe_unretained qualifier in non-ARC files,
+  // because having both 'T' and '__unsafe_unretained T' exist in the type
+  // system causes unfortunate widespread consistency problems.  (For example,
+  // they're not considered compatible types, and we mangle them identicially
+  // as template arguments.)  These problems are all individually fixable,
+  // but it's easier to just not add the qualifier and instead sniff it out
+  // in specific places using isObjCInertUnsafeUnretainedType().
+  //
+  // Doing this does means we miss some trivial consistency checks that
+  // would've triggered in ARC, but that's better than trying to solve all
+  // the coexistence problems with __unsafe_unretained.
+  if (!S.getLangOpts().ObjCAutoRefCount &&
+      lifetime == Qualifiers::OCL_ExplicitNone) {
+    type = S.Context.getAttributedType(
+                             AttributedType::attr_objc_inert_unsafe_unretained,
+                                       type, type);
+    return true;
+  }
+
   QualType origType = type;
   if (!NonObjCPointer)
     type = S.Context.getQualifiedType(underlyingType);
@@ -5133,19 +5197,29 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
     type = S.Context.getAttributedType(AttributedType::attr_objc_ownership,
                                        origType, type);
 
-  // Forbid __weak if the runtime doesn't support it.
-  if (lifetime == Qualifiers::OCL_Weak &&
-      !S.getLangOpts().ObjCARCWeak && !NonObjCPointer) {
-
-    // Actually, delay this until we know what we're parsing.
+  auto diagnoseOrDelay = [](Sema &S, SourceLocation loc,
+                            unsigned diagnostic, QualType type) {
     if (S.DelayedDiagnostics.shouldDelayDiagnostics()) {
       S.DelayedDiagnostics.add(
           sema::DelayedDiagnostic::makeForbiddenType(
-              S.getSourceManager().getExpansionLoc(AttrLoc),
-              diag::err_arc_weak_no_runtime, type, /*ignored*/ 0));
+              S.getSourceManager().getExpansionLoc(loc),
+              diagnostic, type, /*ignored*/ 0));
     } else {
-      S.Diag(AttrLoc, diag::err_arc_weak_no_runtime);
+      S.Diag(loc, diagnostic);
     }
+  };
+
+  // Sometimes, __weak isn't allowed.
+  if (lifetime == Qualifiers::OCL_Weak &&
+      !S.getLangOpts().ObjCWeak && !NonObjCPointer) {
+
+    // Use a specialized diagnostic if the runtime just doesn't support them.
+    unsigned diagnostic =
+      (S.getLangOpts().ObjCWeakRuntime ? diag::err_arc_weak_disabled
+                                       : diag::err_arc_weak_no_runtime);
+
+    // In any case, delay the diagnostic until we know what we're parsing.
+    diagnoseOrDelay(S, AttrLoc, diagnostic, type);
 
     attr.setInvalid();
     return true;
@@ -5158,9 +5232,9 @@ static bool handleObjCOwnershipTypeAttr(TypeProcessingState &state,
           type->getAs<ObjCObjectPointerType>()) {
       if (ObjCInterfaceDecl *Class = ObjT->getInterfaceDecl()) {
         if (Class->isArcWeakrefUnavailable()) {
-            S.Diag(AttrLoc, diag::err_arc_unsupported_weak_class);
-            S.Diag(ObjT->getInterfaceDecl()->getLocation(),
-                   diag::note_class_declared);
+          S.Diag(AttrLoc, diag::err_arc_unsupported_weak_class);
+          S.Diag(ObjT->getInterfaceDecl()->getLocation(),
+                  diag::note_class_declared);
         }
       }
     }
@@ -5402,9 +5476,12 @@ static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &State,
   // Pointer type qualifiers can only operate on pointer types, but not
   // pointer-to-member types.
   if (!isa<PointerType>(Desugared)) {
-    S.Diag(Attr.getLoc(), Type->isMemberPointerType() ?
-                          diag::err_attribute_no_member_pointers :
-                          diag::err_attribute_pointers_only) << Attr.getName();
+    if (Type->isMemberPointerType())
+      S.Diag(Attr.getLoc(), diag::err_attribute_no_member_pointers)
+          << Attr.getName();
+    else
+      S.Diag(Attr.getLoc(), diag::err_attribute_pointers_only)
+          << Attr.getName() << 0;
     return true;
   }
 
@@ -5843,24 +5920,40 @@ bool Sema::hasExplicitCallingConv(QualType &T) {
   return false;
 }
 
-void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic) {
+void Sema::adjustMemberFunctionCC(QualType &T, bool IsStatic, bool IsCtorOrDtor,
+                                  SourceLocation Loc) {
   FunctionTypeUnwrapper Unwrapped(*this, T);
   const FunctionType *FT = Unwrapped.get();
   bool IsVariadic = (isa<FunctionProtoType>(FT) &&
                      cast<FunctionProtoType>(FT)->isVariadic());
-
-  // Only adjust types with the default convention.  For example, on Windows we
-  // should adjust a __cdecl type to __thiscall for instance methods, and a
-  // __thiscall type to __cdecl for static methods.
   CallingConv CurCC = FT->getCallConv();
-  CallingConv FromCC =
-      Context.getDefaultCallingConvention(IsVariadic, IsStatic);
   CallingConv ToCC = Context.getDefaultCallingConvention(IsVariadic, !IsStatic);
-  if (CurCC != FromCC || FromCC == ToCC)
+
+  if (CurCC == ToCC)
     return;
 
-  if (hasExplicitCallingConv(T))
-    return;
+  // MS compiler ignores explicit calling convention attributes on structors. We
+  // should do the same.
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft() && IsCtorOrDtor) {
+    // Issue a warning on ignored calling convention -- except of __stdcall.
+    // Again, this is what MS compiler does.
+    if (CurCC != CC_X86StdCall)
+      Diag(Loc, diag::warn_cconv_structors)
+          << FunctionType::getNameForCallConv(CurCC);
+  // Default adjustment.
+  } else {
+    // Only adjust types with the default convention.  For example, on Windows
+    // we should adjust a __cdecl type to __thiscall for instance methods, and a
+    // __thiscall type to __cdecl for static methods.
+    CallingConv DefaultCC =
+        Context.getDefaultCallingConvention(IsVariadic, IsStatic);
+
+    if (CurCC != DefaultCC || DefaultCC == ToCC)
+      return;
+
+    if (hasExplicitCallingConv(T))
+      return;
+  }
 
   FT = Context.adjustFunctionType(FT, FT->getExtInfo().withCallingConv(ToCC));
   QualType Wrapped = Unwrapped.wrap(*this, FT);
@@ -6077,10 +6170,11 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
   // type, but others can be present in the type specifiers even though they
   // apply to the decl.  Here we apply type attributes and ignore the rest.
 
-  AttributeList *next;
-  do {
+  bool hasOpenCLAddressSpace = false;
+  while (attrs) {
     AttributeList &attr = *attrs;
-    next = attr.getNext();
+    attrs = attr.getNext(); // reset to the next here due to early loop continue
+                            // stmts
 
     // Skip attributes that were marked to be invalid.
     if (attr.isInvalid())
@@ -6139,6 +6233,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
     case AttributeList::AT_AddressSpace:
       HandleAddressSpaceTypeAttribute(type, attr, state.getSema());
       attr.setUsedAsTypeAttr();
+      hasOpenCLAddressSpace = true;
       break;
     OBJC_POINTER_TYPE_ATTRS_CASELIST:
       if (!handleObjCPointerTypeAttr(state, attr, type))
@@ -6233,7 +6328,83 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
         distributeFunctionTypeAttr(state, attr, type);
       break;
     }
-  } while ((attrs = next));
+  }
+
+  // If address space is not set, OpenCL 2.0 defines non private default
+  // address spaces for some cases:
+  // OpenCL 2.0, section 6.5:
+  // The address space for a variable at program scope or a static variable
+  // inside a function can either be __global or __constant, but defaults to
+  // __global if not specified.
+  // (...)
+  // Pointers that are declared without pointing to a named address space point
+  // to the generic address space.
+  if (state.getSema().getLangOpts().OpenCLVersion >= 200 &&
+      !hasOpenCLAddressSpace && type.getAddressSpace() == 0 &&
+      (TAL == TAL_DeclSpec || TAL == TAL_DeclChunk)) {
+    Declarator &D = state.getDeclarator();
+    if (state.getCurrentChunkIndex() > 0 &&
+        D.getTypeObject(state.getCurrentChunkIndex() - 1).Kind ==
+            DeclaratorChunk::Pointer) {
+      type = state.getSema().Context.getAddrSpaceQualType(
+          type, LangAS::opencl_generic);
+    } else if (state.getCurrentChunkIndex() == 0 &&
+               D.getContext() == Declarator::FileContext &&
+               !D.isFunctionDeclarator() && !D.isFunctionDefinition() &&
+               D.getDeclSpec().getStorageClassSpec() != DeclSpec::SCS_typedef &&
+               !type->isSamplerT())
+      type = state.getSema().Context.getAddrSpaceQualType(
+          type, LangAS::opencl_global);
+    else if (state.getCurrentChunkIndex() == 0 &&
+             D.getContext() == Declarator::BlockContext &&
+             D.getDeclSpec().getStorageClassSpec() == DeclSpec::SCS_static)
+      type = state.getSema().Context.getAddrSpaceQualType(
+          type, LangAS::opencl_global);
+  }
+}
+
+void Sema::completeExprArrayBound(Expr *E) {
+  if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
+    if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
+      if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) {
+        SourceLocation PointOfInstantiation = E->getExprLoc();
+
+        if (MemberSpecializationInfo *MSInfo =
+                Var->getMemberSpecializationInfo()) {
+          // If we don't already have a point of instantiation, this is it.
+          if (MSInfo->getPointOfInstantiation().isInvalid()) {
+            MSInfo->setPointOfInstantiation(PointOfInstantiation);
+
+            // This is a modification of an existing AST node. Notify
+            // listeners.
+            if (ASTMutationListener *L = getASTMutationListener())
+              L->StaticDataMemberInstantiated(Var);
+          }
+        } else {
+          VarTemplateSpecializationDecl *VarSpec =
+              cast<VarTemplateSpecializationDecl>(Var);
+          if (VarSpec->getPointOfInstantiation().isInvalid())
+            VarSpec->setPointOfInstantiation(PointOfInstantiation);
+        }
+
+        InstantiateVariableDefinition(PointOfInstantiation, Var);
+
+        // Update the type to the newly instantiated definition's type both
+        // here and within the expression.
+        if (VarDecl *Def = Var->getDefinition()) {
+          DRE->setDecl(Def);
+          QualType T = Def->getType();
+          DRE->setType(T);
+          // FIXME: Update the type on all intervening expressions.
+          E->setType(T);
+        }
+
+        // We still go on to try to complete the type independently, as it
+        // may also require instantiations or diagnostics if it remains
+        // incomplete.
+      }
+    }
+  }
 }
 
 /// \brief Ensure that the type of the given expression is complete.
@@ -6250,87 +6421,26 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
 ///
 /// \returns \c true if the type of \p E is incomplete and diagnosed, \c false
 /// otherwise.
-bool Sema::RequireCompleteExprType(Expr *E, TypeDiagnoser &Diagnoser){
+bool Sema::RequireCompleteExprType(Expr *E, TypeDiagnoser &Diagnoser) {
   QualType T = E->getType();
 
-  // Fast path the case where the type is already complete.
-  if (!T->isIncompleteType())
-    // FIXME: The definition might not be visible.
-    return false;
-
   // Incomplete array types may be completed by the initializer attached to
   // their definitions. For static data members of class templates and for
   // variable templates, we need to instantiate the definition to get this
   // initializer and complete the type.
   if (T->isIncompleteArrayType()) {
-    if (DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E->IgnoreParens())) {
-      if (VarDecl *Var = dyn_cast<VarDecl>(DRE->getDecl())) {
-        if (isTemplateInstantiation(Var->getTemplateSpecializationKind())) {
-          SourceLocation PointOfInstantiation = E->getExprLoc();
-
-          if (MemberSpecializationInfo *MSInfo =
-                  Var->getMemberSpecializationInfo()) {
-            // If we don't already have a point of instantiation, this is it.
-            if (MSInfo->getPointOfInstantiation().isInvalid()) {
-              MSInfo->setPointOfInstantiation(PointOfInstantiation);
-
-              // This is a modification of an existing AST node. Notify
-              // listeners.
-              if (ASTMutationListener *L = getASTMutationListener())
-                L->StaticDataMemberInstantiated(Var);
-            }
-          } else {
-            VarTemplateSpecializationDecl *VarSpec =
-                cast<VarTemplateSpecializationDecl>(Var);
-            if (VarSpec->getPointOfInstantiation().isInvalid())
-              VarSpec->setPointOfInstantiation(PointOfInstantiation);
-          }
-
-          InstantiateVariableDefinition(PointOfInstantiation, Var);
-
-          // Update the type to the newly instantiated definition's type both
-          // here and within the expression.
-          if (VarDecl *Def = Var->getDefinition()) {
-            DRE->setDecl(Def);
-            T = Def->getType();
-            DRE->setType(T);
-            E->setType(T);
-          }
-
-          // We still go on to try to complete the type independently, as it
-          // may also require instantiations or diagnostics if it remains
-          // incomplete.
-        }
-      }
-    }
+    completeExprArrayBound(E);
+    T = E->getType();
   }
 
   // FIXME: Are there other cases which require instantiating something other
   // than the type to complete the type of an expression?
 
-  // Look through reference types and complete the referred type.
-  if (const ReferenceType *Ref = T->getAs<ReferenceType>())
-    T = Ref->getPointeeType();
-
   return RequireCompleteType(E->getExprLoc(), T, Diagnoser);
 }
 
-namespace {
-  struct TypeDiagnoserDiag : Sema::TypeDiagnoser {
-    unsigned DiagID;
-
-    TypeDiagnoserDiag(unsigned DiagID)
-      : Sema::TypeDiagnoser(DiagID == 0), DiagID(DiagID) {}
-
-    void diagnose(Sema &S, SourceLocation Loc, QualType T) override {
-      if (Suppressed) return;
-      S.Diag(Loc, DiagID) << T;
-    }
-  };
-}
-
 bool Sema::RequireCompleteExprType(Expr *E, unsigned DiagID) {
-  TypeDiagnoserDiag Diagnoser(DiagID);
+  BoundTypeDiagnoser<> Diagnoser(DiagID);
   return RequireCompleteExprType(E, Diagnoser);
 }
 
@@ -6353,7 +6463,7 @@ bool Sema::RequireCompleteExprType(Expr *E, unsigned DiagID) {
 /// @c false otherwise.
 bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
                                TypeDiagnoser &Diagnoser) {
-  if (RequireCompleteTypeImpl(Loc, T, Diagnoser))
+  if (RequireCompleteTypeImpl(Loc, T, &Diagnoser))
     return true;
   if (const TagType *Tag = T->getAs<TagType>()) {
     if (!Tag->getDecl()->isCompleteDefinitionRequired()) {
@@ -6457,7 +6567,7 @@ static void assignInheritanceModel(Sema &S, CXXRecordDecl *RD) {
 
 /// \brief The implementation of RequireCompleteType
 bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
-                                   TypeDiagnoser &Diagnoser) {
+                                   TypeDiagnoser *Diagnoser) {
   // FIXME: Add this assertion to make sure we always get instantiation points.
   //  assert(!Loc.isInvalid() && "Invalid location in RequireCompleteType");
   // FIXME: Add this assertion to help us flush out problems with
@@ -6466,24 +6576,31 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
   //  assert(!T->isDependentType() &&
   //         "Can't ask whether a dependent type is complete");
 
+  // We lock in the inheritance model once somebody has asked us to ensure
+  // that a pointer-to-member type is complete.
+  if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
+    if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>()) {
+      if (!MPTy->getClass()->isDependentType()) {
+        (void)isCompleteType(Loc, QualType(MPTy->getClass(), 0));
+        assignInheritanceModel(*this, MPTy->getMostRecentCXXRecordDecl());
+      }
+    }
+  }
+
   // If we have a complete type, we're done.
   NamedDecl *Def = nullptr;
   if (!T->isIncompleteType(&Def)) {
     // If we know about the definition but it is not visible, complain.
     NamedDecl *SuggestedDef = nullptr;
-    if (!Diagnoser.Suppressed && Def &&
-        !hasVisibleDefinition(Def, &SuggestedDef, /*OnlyNeedComplete*/true))
-      diagnoseMissingImport(Loc, SuggestedDef, /*NeedDefinition*/true);
-
-    // We lock in the inheritance model once somebody has asked us to ensure
-    // that a pointer-to-member type is complete.
-    if (Context.getTargetInfo().getCXXABI().isMicrosoft()) {
-      if (const MemberPointerType *MPTy = T->getAs<MemberPointerType>()) {
-        if (!MPTy->getClass()->isDependentType()) {
-          RequireCompleteType(Loc, QualType(MPTy->getClass(), 0), 0);
-          assignInheritanceModel(*this, MPTy->getMostRecentCXXRecordDecl());
-        }
-      }
+    if (Def &&
+        !hasVisibleDefinition(Def, &SuggestedDef, /*OnlyNeedComplete*/true)) {
+      // If the user is going to see an error here, recover by making the
+      // definition visible.
+      bool TreatAsComplete = Diagnoser && !isSFINAEContext();
+      if (Diagnoser)
+        diagnoseMissingImport(Loc, SuggestedDef, /*NeedDefinition*/true,
+                              /*Recover*/TreatAsComplete);
+      return !TreatAsComplete;
     }
 
     return false;
@@ -6500,6 +6617,9 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
   // chain for a declaration that can be accessed through a mechanism other
   // than name lookup (eg, referenced in a template, or a variable whose type
   // could be completed by the module)?
+  //
+  // FIXME: Should we map through to the base array element type before
+  // checking for a tag type?
   if (Tag || IFace) {
     NamedDecl *D =
         Tag ? static_cast<NamedDecl *>(Tag->getDecl()) : IFace->getDecl();
@@ -6530,12 +6650,16 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
            = Context.getAsConstantArrayType(MaybeTemplate))
     MaybeTemplate = Array->getElementType();
   if (const RecordType *Record = MaybeTemplate->getAs<RecordType>()) {
+    bool Instantiated = false;
+    bool Diagnosed = false;
     if (ClassTemplateSpecializationDecl *ClassTemplateSpec
           = dyn_cast<ClassTemplateSpecializationDecl>(Record->getDecl())) {
-      if (ClassTemplateSpec->getSpecializationKind() == TSK_Undeclared)
-        return InstantiateClassTemplateSpecialization(Loc, ClassTemplateSpec,
-                                                      TSK_ImplicitInstantiation,
-                                            /*Complain=*/!Diagnoser.Suppressed);
+      if (ClassTemplateSpec->getSpecializationKind() == TSK_Undeclared) {
+        Diagnosed = InstantiateClassTemplateSpecialization(
+            Loc, ClassTemplateSpec, TSK_ImplicitInstantiation,
+            /*Complain=*/Diagnoser);
+        Instantiated = true;
+      }
     } else if (CXXRecordDecl *Rec
                  = dyn_cast<CXXRecordDecl>(Record->getDecl())) {
       CXXRecordDecl *Pattern = Rec->getInstantiatedFromMemberClass();
@@ -6543,16 +6667,31 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
         MemberSpecializationInfo *MSI = Rec->getMemberSpecializationInfo();
         assert(MSI && "Missing member specialization information?");
         // This record was instantiated from a class within a template.
-        if (MSI->getTemplateSpecializationKind() != TSK_ExplicitSpecialization)
-          return InstantiateClass(Loc, Rec, Pattern,
-                                  getTemplateInstantiationArgs(Rec),
-                                  TSK_ImplicitInstantiation,
-                                  /*Complain=*/!Diagnoser.Suppressed);
+        if (MSI->getTemplateSpecializationKind() !=
+            TSK_ExplicitSpecialization) {
+          Diagnosed = InstantiateClass(Loc, Rec, Pattern,
+                                       getTemplateInstantiationArgs(Rec),
+                                       TSK_ImplicitInstantiation,
+                                       /*Complain=*/Diagnoser);
+          Instantiated = true;
+        }
       }
     }
+
+    if (Instantiated) {
+      // Instantiate* might have already complained that the template is not
+      // defined, if we asked it to.
+      if (Diagnoser && Diagnosed)
+        return true;
+      // If we instantiated a definition, check that it's usable, even if
+      // instantiation produced an error, so that repeated calls to this
+      // function give consistent answers.
+      if (!T->isIncompleteType())
+        return RequireCompleteTypeImpl(Loc, T, Diagnoser);
+    }
   }
 
-  if (Diagnoser.Suppressed)
+  if (!Diagnoser)
     return true;
 
   // We have an incomplete type. Produce a diagnostic.
@@ -6562,7 +6701,7 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
     return true;
   }
 
-  Diagnoser.diagnose(*this, Loc, T);
+  Diagnoser->diagnose(*this, Loc, T);
 
   // If the type was a forward declaration of a class/struct/union
   // type, produce a note.
@@ -6586,7 +6725,7 @@ bool Sema::RequireCompleteTypeImpl(SourceLocation Loc, QualType T,
 
 bool Sema::RequireCompleteType(SourceLocation Loc, QualType T,
                                unsigned DiagID) {
-  TypeDiagnoserDiag Diagnoser(DiagID);
+  BoundTypeDiagnoser<> Diagnoser(DiagID);
   return RequireCompleteType(Loc, T, Diagnoser);
 }
 
@@ -6627,14 +6766,10 @@ bool Sema::RequireLiteralType(SourceLocation Loc, QualType T,
   assert(!T->isDependentType() && "type should not be dependent");
 
   QualType ElemType = Context.getBaseElementType(T);
-  RequireCompleteType(Loc, ElemType, 0);
-
-  if (T->isLiteralType(Context))
+  if ((isCompleteType(Loc, ElemType) || ElemType->isVoidType()) &&
+      T->isLiteralType(Context))
     return false;
 
-  if (Diagnoser.Suppressed)
-    return true;
-
   Diagnoser.diagnose(*this, Loc, T);
 
   if (T->isVariableArrayType())
@@ -6649,10 +6784,8 @@ bool Sema::RequireLiteralType(SourceLocation Loc, QualType T,
   // A partially-defined class type can't be a literal type, because a literal
   // class type must have a trivial destructor (which can't be checked until
   // the class definition is complete).
-  if (!RD->isCompleteDefinition()) {
-    RequireCompleteType(Loc, ElemType, diag::note_non_literal_incomplete, T);
+  if (RequireCompleteType(Loc, ElemType, diag::note_non_literal_incomplete, T))
     return true;
-  }
 
   // If the class has virtual base classes, then it's not an aggregate, and
   // cannot have any constexpr constructors or a trivial default constructor,
@@ -6704,7 +6837,7 @@ bool Sema::RequireLiteralType(SourceLocation Loc, QualType T,
 }
 
 bool Sema::RequireLiteralType(SourceLocation Loc, QualType T, unsigned DiagID) {
-  TypeDiagnoserDiag Diagnoser(DiagID);
+  BoundTypeDiagnoser<> Diagnoser(DiagID);
   return RequireLiteralType(Loc, T, Diagnoser);
 }
 
@@ -6730,6 +6863,9 @@ QualType Sema::BuildTypeofExprType(Expr *E, SourceLocation Loc) {
   if (ER.isInvalid()) return QualType();
   E = ER.get();
 
+  if (!getLangOpts().CPlusPlus && E->refersToBitField())
+    Diag(E->getExprLoc(), diag::err_sizeof_alignof_typeof_bitfield) << 2;
+
   if (!E->isTypeDependent()) {
     QualType T = E->getType();
     if (const TagType *TT = T->getAs<TagType>())
diff --git a/lib/Sema/TreeTransform.h b/lib/Sema/TreeTransform.h
index 6e193a3529c9..e0a9653eb93b 100644
--- a/lib/Sema/TreeTransform.h
+++ b/lib/Sema/TreeTransform.h
@@ -21,6 +21,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
 #include "clang/AST/StmtObjC.h"
@@ -390,7 +391,7 @@ public:
   /// due to transformation.
   ///
   /// \returns true if an error occurred, false otherwise.
-  bool TransformExprs(Expr **Inputs, unsigned NumInputs, bool IsCall,
+  bool TransformExprs(Expr *const *Inputs, unsigned NumInputs, bool IsCall,
                       SmallVectorImpl<Expr *> &Outputs,
                       bool *ArgChanged = nullptr);
 
@@ -503,7 +504,8 @@ public:
   ///
   /// Returns true if there was an error.
   bool TransformTemplateArgument(const TemplateArgumentLoc &Input,
-                                 TemplateArgumentLoc &Output);
+                                 TemplateArgumentLoc &Output,
+                                 bool Uneval = false);
 
   /// \brief Transform the given set of template arguments.
   ///
@@ -525,8 +527,10 @@ public:
   /// Returns true if an error occurred.
   bool TransformTemplateArguments(const TemplateArgumentLoc *Inputs,
                                   unsigned NumInputs,
-                                  TemplateArgumentListInfo &Outputs) {
-    return TransformTemplateArguments(Inputs, Inputs + NumInputs, Outputs);
+                                  TemplateArgumentListInfo &Outputs,
+                                  bool Uneval = false) {
+    return TransformTemplateArguments(Inputs, Inputs + NumInputs, Outputs,
+                                      Uneval);
   }
 
   /// \brief Transform the given set of template arguments.
@@ -546,7 +550,8 @@ public:
   template<typename InputIterator>
   bool TransformTemplateArguments(InputIterator First,
                                   InputIterator Last,
-                                  TemplateArgumentListInfo &Outputs);
+                                  TemplateArgumentListInfo &Outputs,
+                                  bool Uneval = false);
 
   /// \brief Fakes up a TemplateArgumentLoc for a given TemplateArgument.
   void InventTemplateArgumentLoc(const TemplateArgument &Arg,
@@ -843,11 +848,11 @@ public:
   /// \brief Build a new C++11 auto type.
   ///
   /// By default, builds a new AutoType with the given deduced type.
-  QualType RebuildAutoType(QualType Deduced, bool IsDecltypeAuto) {
+  QualType RebuildAutoType(QualType Deduced, AutoTypeKeyword Keyword) {
     // Note, IsDependent is always false here: we implicitly convert an 'auto'
     // which has been deduced to a dependent type into an undeduced 'auto', so
     // that we'll retry deduction after the transformation.
-    return SemaRef.Context.getAutoType(Deduced, IsDecltypeAuto, 
+    return SemaRef.Context.getAutoType(Deduced, Keyword,
                                        /*IsDependent*/ false);
   }
 
@@ -1282,6 +1287,30 @@ public:
                                     Constraints, Clobbers, Exprs, EndLoc);
   }
 
+  /// \brief Build a new co_return statement.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  StmtResult RebuildCoreturnStmt(SourceLocation CoreturnLoc, Expr *Result) {
+    return getSema().BuildCoreturnStmt(CoreturnLoc, Result);
+  }
+
+  /// \brief Build a new co_await expression.
+  ///
+  /// By default, performs semantic analysis to build the new expression.
+  /// Subclasses may override this routine to provide different behavior.
+  ExprResult RebuildCoawaitExpr(SourceLocation CoawaitLoc, Expr *Result) {
+    return getSema().BuildCoawaitExpr(CoawaitLoc, Result);
+  }
+
+  /// \brief Build a new co_yield expression.
+  ///
+  /// By default, performs semantic analysis to build the new expression.
+  /// Subclasses may override this routine to provide different behavior.
+  ExprResult RebuildCoyieldExpr(SourceLocation CoyieldLoc, Expr *Result) {
+    return getSema().BuildCoyieldExpr(CoyieldLoc, Result);
+  }
+
   /// \brief Build a new Objective-C \@try statement.
   ///
   /// By default, performs semantic analysis to build the new statement.
@@ -1354,12 +1383,15 @@ public:
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
   /// Subclasses may override this routine to provide different behavior.
-  OMPClause *RebuildOMPIfClause(Expr *Condition,
-                                SourceLocation StartLoc,
+  OMPClause *RebuildOMPIfClause(OpenMPDirectiveKind NameModifier,
+                                Expr *Condition, SourceLocation StartLoc,
                                 SourceLocation LParenLoc,
+                                SourceLocation NameModifierLoc,
+                                SourceLocation ColonLoc,
                                 SourceLocation EndLoc) {
-    return getSema().ActOnOpenMPIfClause(Condition, StartLoc,
-                                         LParenLoc, EndLoc);
+    return getSema().ActOnOpenMPIfClause(NameModifier, Condition, StartLoc,
+                                         LParenLoc, NameModifierLoc, ColonLoc,
+                                         EndLoc);
   }
 
   /// \brief Build a new OpenMP 'final' clause.
@@ -1395,6 +1427,16 @@ public:
     return getSema().ActOnOpenMPSafelenClause(Len, StartLoc, LParenLoc, EndLoc);
   }
 
+  /// \brief Build a new OpenMP 'simdlen' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPSimdlenClause(Expr *Len, SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPSimdlenClause(Len, StartLoc, LParenLoc, EndLoc);
+  }
+
   /// \brief Build a new OpenMP 'collapse' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -1436,15 +1478,24 @@ public:
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
   /// Subclasses may override this routine to provide different behavior.
-  OMPClause *RebuildOMPScheduleClause(OpenMPScheduleClauseKind Kind,
-                                      Expr *ChunkSize,
-                                      SourceLocation StartLoc,
-                                      SourceLocation LParenLoc,
-                                      SourceLocation KindLoc,
-                                      SourceLocation CommaLoc,
-                                      SourceLocation EndLoc) {
+  OMPClause *RebuildOMPScheduleClause(
+      OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
+      OpenMPScheduleClauseKind Kind, Expr *ChunkSize, SourceLocation StartLoc,
+      SourceLocation LParenLoc, SourceLocation M1Loc, SourceLocation M2Loc,
+      SourceLocation KindLoc, SourceLocation CommaLoc, SourceLocation EndLoc) {
     return getSema().ActOnOpenMPScheduleClause(
-        Kind, ChunkSize, StartLoc, LParenLoc, KindLoc, CommaLoc, EndLoc);
+        M1, M2, Kind, ChunkSize, StartLoc, LParenLoc, M1Loc, M2Loc, KindLoc,
+        CommaLoc, EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'ordered' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPOrderedClause(SourceLocation StartLoc,
+                                     SourceLocation EndLoc,
+                                     SourceLocation LParenLoc, Expr *Num) {
+    return getSema().ActOnOpenMPOrderedClause(StartLoc, EndLoc, LParenLoc, Num);
   }
 
   /// \brief Build a new OpenMP 'private' clause.
@@ -1518,10 +1569,13 @@ public:
   OMPClause *RebuildOMPLinearClause(ArrayRef<Expr *> VarList, Expr *Step,
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
+                                    OpenMPLinearClauseKind Modifier,
+                                    SourceLocation ModifierLoc,
                                     SourceLocation ColonLoc,
                                     SourceLocation EndLoc) {
     return getSema().ActOnOpenMPLinearClause(VarList, Step, StartLoc, LParenLoc,
-                                             ColonLoc, EndLoc);
+                                             Modifier, ModifierLoc, ColonLoc,
+                                             EndLoc);
   }
 
   /// \brief Build a new OpenMP 'aligned' clause.
@@ -1586,6 +1640,97 @@ public:
                                              StartLoc, LParenLoc, EndLoc);
   }
 
+  /// \brief Build a new OpenMP 'device' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPDeviceClause(Expr *Device, SourceLocation StartLoc,
+                                    SourceLocation LParenLoc,
+                                    SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPDeviceClause(Device, StartLoc, LParenLoc,
+                                             EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'map' clause.
+  ///
+  /// By default, performs semantic analysis to build the new OpenMP clause.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPMapClause(
+      OpenMPMapClauseKind MapTypeModifier, OpenMPMapClauseKind MapType,
+      SourceLocation MapLoc, SourceLocation ColonLoc, ArrayRef<Expr *> VarList,
+      SourceLocation StartLoc, SourceLocation LParenLoc,
+      SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPMapClause(MapTypeModifier, MapType, MapLoc,
+                                          ColonLoc, VarList,StartLoc,
+                                          LParenLoc, EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'num_teams' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPNumTeamsClause(Expr *NumTeams, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPNumTeamsClause(NumTeams, StartLoc, LParenLoc, 
+                                               EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'thread_limit' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPThreadLimitClause(Expr *ThreadLimit,
+                                         SourceLocation StartLoc,
+                                         SourceLocation LParenLoc,
+                                         SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPThreadLimitClause(ThreadLimit, StartLoc,
+                                                  LParenLoc, EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'priority' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPPriorityClause(Expr *Priority, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPPriorityClause(Priority, StartLoc, LParenLoc,
+                                               EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'grainsize' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPGrainsizeClause(Expr *Grainsize, SourceLocation StartLoc,
+                                       SourceLocation LParenLoc,
+                                       SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPGrainsizeClause(Grainsize, StartLoc, LParenLoc,
+                                                EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'num_tasks' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPNumTasksClause(Expr *NumTasks, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPNumTasksClause(NumTasks, StartLoc, LParenLoc,
+                                               EndLoc);
+  }
+
+  /// \brief Build a new OpenMP 'hint' clause.
+  ///
+  /// By default, performs semantic analysis to build the new statement.
+  /// Subclasses may override this routine to provide different behavior.
+  OMPClause *RebuildOMPHintClause(Expr *Hint, SourceLocation StartLoc,
+                                  SourceLocation LParenLoc,
+                                  SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPHintClause(Hint, StartLoc, LParenLoc, EndLoc);
+  }
+
   /// \brief Rebuild the operand to an Objective-C \@synchronized statement.
   ///
   /// By default, performs semantic analysis to build the new statement.
@@ -1673,6 +1818,7 @@ public:
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
   StmtResult RebuildCXXForRangeStmt(SourceLocation ForLoc,
+                                    SourceLocation CoawaitLoc,
                                     SourceLocation ColonLoc,
                                     Stmt *Range, Stmt *BeginEnd,
                                     Expr *Cond, Expr *Inc,
@@ -1695,7 +1841,8 @@ public:
       }
     }
 
-    return getSema().BuildCXXForRangeStmt(ForLoc, ColonLoc, Range, BeginEnd,
+    return getSema().BuildCXXForRangeStmt(ForLoc, CoawaitLoc, ColonLoc,
+                                          Range, BeginEnd,
                                           Cond, Inc, LoopVar, RParenLoc,
                                           Sema::BFRK_Rebuild);
   }
@@ -1808,12 +1955,11 @@ public:
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
   ExprResult RebuildOffsetOfExpr(SourceLocation OperatorLoc,
-                                       TypeSourceInfo *Type,
-                                       Sema::OffsetOfComponent *Components,
-                                       unsigned NumComponents,
-                                       SourceLocation RParenLoc) {
+                                 TypeSourceInfo *Type,
+                                 ArrayRef<Sema::OffsetOfComponent> Components,
+                                 SourceLocation RParenLoc) {
     return getSema().BuildBuiltinOffsetOf(OperatorLoc, Type, Components,
-                                          NumComponents, RParenLoc);
+                                          RParenLoc);
   }
 
   /// \brief Build a new sizeof, alignof or vec_step expression with a
@@ -1857,6 +2003,18 @@ public:
                                              RBracketLoc);
   }
 
+  /// \brief Build a new array section expression.
+  ///
+  /// By default, performs semantic analysis to build the new expression.
+  /// Subclasses may override this routine to provide different behavior.
+  ExprResult RebuildOMPArraySectionExpr(Expr *Base, SourceLocation LBracketLoc,
+                                        Expr *LowerBound,
+                                        SourceLocation ColonLoc, Expr *Length,
+                                        SourceLocation RBracketLoc) {
+    return getSema().ActOnOMPArraySectionExpr(Base, LBracketLoc, LowerBound,
+                                              ColonLoc, Length, RBracketLoc);
+  }
+
   /// \brief Build a new call expression.
   ///
   /// By default, performs semantic analysis to build the new expression.
@@ -1921,7 +2079,8 @@ public:
     return getSema().BuildMemberReferenceExpr(Base, BaseType, OpLoc, isArrow,
                                               SS, TemplateKWLoc,
                                               FirstQualifierInScope,
-                                              R, ExplicitTemplateArgs);
+                                              R, ExplicitTemplateArgs,
+                                              /*S*/nullptr);
   }
 
   /// \brief Build a new binary operator expression.
@@ -1987,7 +2146,8 @@ public:
                                               SS, SourceLocation(),
                                               /*FirstQualifierInScope*/ nullptr,
                                               NameInfo,
-                                              /* TemplateArgs */ nullptr);
+                                              /* TemplateArgs */ nullptr,
+                                              /*S*/ nullptr);
   }
 
   /// \brief Build a new initializer list expression.
@@ -2435,7 +2595,7 @@ public:
                                                     TemplateArgs);
 
     return getSema().BuildQualifiedDeclarationNameExpr(
-        SS, NameInfo, IsAddressOfOperand, RecoveryTSI);
+        SS, NameInfo, IsAddressOfOperand, /*S*/nullptr, RecoveryTSI);
   }
 
   /// \brief Build a new template-id expression.
@@ -2529,7 +2689,7 @@ public:
                                             SS, TemplateKWLoc,
                                             FirstQualifierInScope,
                                             MemberNameInfo,
-                                            TemplateArgs);
+                                            TemplateArgs, /*S*/nullptr);
   }
 
   /// \brief Build a new member reference expression.
@@ -2551,7 +2711,7 @@ public:
                                             OperatorLoc, IsArrow,
                                             SS, TemplateKWLoc,
                                             FirstQualifierInScope,
-                                            R, TemplateArgs);
+                                            R, TemplateArgs, /*S*/nullptr);
   }
 
   /// \brief Build a new noexcept expression.
@@ -2563,18 +2723,14 @@ public:
   }
 
   /// \brief Build a new expression to compute the length of a parameter pack.
-  ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc, NamedDecl *Pack,
+  ExprResult RebuildSizeOfPackExpr(SourceLocation OperatorLoc,
+                                   NamedDecl *Pack,
                                    SourceLocation PackLoc,
                                    SourceLocation RParenLoc,
-                                   Optional<unsigned> Length) {
-    if (Length)
-      return new (SemaRef.Context) SizeOfPackExpr(SemaRef.Context.getSizeType(),
-                                                  OperatorLoc, Pack, PackLoc,
-                                                  RParenLoc, *Length);
-
-    return new (SemaRef.Context) SizeOfPackExpr(SemaRef.Context.getSizeType(),
-                                                OperatorLoc, Pack, PackLoc,
-                                                RParenLoc);
+                                   Optional<unsigned> Length,
+                                   ArrayRef<TemplateArgument> PartialArgs) {
+    return SizeOfPackExpr::Create(SemaRef.Context, OperatorLoc, Pack, PackLoc,
+                                  RParenLoc, Length, PartialArgs);
   }
 
   /// \brief Build a new Objective-C boxed expression.
@@ -2608,9 +2764,8 @@ public:
   /// By default, performs semantic analysis to build the new expression.
   /// Subclasses may override this routine to provide different behavior.
   ExprResult RebuildObjCDictionaryLiteral(SourceRange Range,
-                                          ObjCDictionaryElement *Elements,
-                                          unsigned NumElements) {
-    return getSema().BuildObjCDictionaryLiteral(Range, Elements, NumElements);
+                              MutableArrayRef<ObjCDictionaryElement> Elements) {
+    return getSema().BuildObjCDictionaryLiteral(Range, Elements);
   }
 
   /// \brief Build a new Objective-C \@encode expression.
@@ -2657,20 +2812,18 @@ public:
   ExprResult RebuildObjCMessageExpr(SourceLocation SuperLoc,
                                     Selector Sel,
                                     ArrayRef<SourceLocation> SelectorLocs,
+                                    QualType SuperType,
                                     ObjCMethodDecl *Method,
                                     SourceLocation LBracLoc,
                                     MultiExprArg Args,
                                     SourceLocation RBracLoc) {
-    ObjCInterfaceDecl *Class = Method->getClassInterface();
-    QualType ReceiverTy = SemaRef.Context.getObjCInterfaceType(Class);
-    
     return Method->isInstanceMethod() ? SemaRef.BuildInstanceMessage(nullptr,
-                                          ReceiverTy,
+                                          SuperType,
                                           SuperLoc,
                                           Sel, Method, LBracLoc, SelectorLocs,
                                           RBracLoc, Args)
                                       : SemaRef.BuildClassMessage(nullptr,
-                                          ReceiverTy,
+                                          SuperType,
                                           SuperLoc,
                                           Sel, Method, LBracLoc, SelectorLocs,
                                           RBracLoc, Args);
@@ -2693,7 +2846,8 @@ public:
                                               SS, SourceLocation(),
                                               /*FirstQualifierInScope=*/nullptr,
                                               NameInfo,
-                                              /*TemplateArgs=*/nullptr);
+                                              /*TemplateArgs=*/nullptr,
+                                              /*S=*/nullptr);
   }
 
   /// \brief Build a new Objective-C property reference expression.
@@ -2711,7 +2865,8 @@ public:
                                               SS, SourceLocation(),
                                               /*FirstQualifierInScope=*/nullptr,
                                               NameInfo,
-                                              /*TemplateArgs=*/nullptr);
+                                              /*TemplateArgs=*/nullptr,
+                                              /*S=*/nullptr);
   }
 
   /// \brief Build a new Objective-C property reference expression.
@@ -2743,7 +2898,8 @@ public:
                                               SS, SourceLocation(),
                                               /*FirstQualifierInScope=*/nullptr,
                                               NameInfo,
-                                              /*TemplateArgs=*/nullptr);
+                                              /*TemplateArgs=*/nullptr,
+                                              /*S=*/nullptr);
   }
 
   /// \brief Build a new shuffle vector expression.
@@ -3043,7 +3199,7 @@ ExprResult TreeTransform<Derived>::TransformInitializer(Expr *Init,
 }
 
 template<typename Derived>
-bool TreeTransform<Derived>::TransformExprs(Expr **Inputs,
+bool TreeTransform<Derived>::TransformExprs(Expr *const *Inputs,
                                             unsigned NumInputs,
                                             bool IsCall,
                                       SmallVectorImpl<Expr *> &Outputs,
@@ -3458,7 +3614,7 @@ void TreeTransform<Derived>::InventTemplateArgumentLoc(
 template<typename Derived>
 bool TreeTransform<Derived>::TransformTemplateArgument(
                                          const TemplateArgumentLoc &Input,
-                                         TemplateArgumentLoc &Output) {
+                                         TemplateArgumentLoc &Output, bool Uneval) {
   const TemplateArgument &Arg = Input.getArgument();
   switch (Arg.getKind()) {
   case TemplateArgument::Null:
@@ -3506,8 +3662,8 @@ bool TreeTransform<Derived>::TransformTemplateArgument(
 
   case TemplateArgument::Expression: {
     // Template argument expressions are constant expressions.
-    EnterExpressionEvaluationContext Unevaluated(getSema(),
-                                                 Sema::ConstantEvaluated);
+    EnterExpressionEvaluationContext Unevaluated(
+        getSema(), Uneval ? Sema::Unevaluated : Sema::ConstantEvaluated);
 
     Expr *InputExpr = Input.getSourceExpression();
     if (!InputExpr) InputExpr = Input.getArgument().getAsExpr();
@@ -3585,9 +3741,9 @@ public:
 
 template<typename Derived>
 template<typename InputIterator>
-bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
-                                                        InputIterator Last,
-                                            TemplateArgumentListInfo &Outputs) {
+bool TreeTransform<Derived>::TransformTemplateArguments(
+    InputIterator First, InputIterator Last, TemplateArgumentListInfo &Outputs,
+    bool Uneval) {
   for (; First != Last; ++First) {
     TemplateArgumentLoc Out;
     TemplateArgumentLoc In = *First;
@@ -3605,7 +3761,7 @@ bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
                                                  In.getArgument().pack_begin()),
                                      PackLocIterator(*this,
                                                    In.getArgument().pack_end()),
-                                     Outputs))
+                                     Outputs, Uneval))
         return true;
 
       continue;
@@ -3643,7 +3799,7 @@ bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
         // expansion.
         TemplateArgumentLoc OutPattern;
         Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), -1);
-        if (getDerived().TransformTemplateArgument(Pattern, OutPattern))
+        if (getDerived().TransformTemplateArgument(Pattern, OutPattern, Uneval))
           return true;
 
         Out = getDerived().RebuildPackExpansion(OutPattern, Ellipsis,
@@ -3660,7 +3816,7 @@ bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
       for (unsigned I = 0; I != *NumExpansions; ++I) {
         Sema::ArgumentPackSubstitutionIndexRAII SubstIndex(getSema(), I);
 
-        if (getDerived().TransformTemplateArgument(Pattern, Out))
+        if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
           return true;
 
         if (Out.getArgument().containsUnexpandedParameterPack()) {
@@ -3678,7 +3834,7 @@ bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
       if (RetainExpansion) {
         ForgetPartiallySubstitutedPackRAII Forget(getDerived());
 
-        if (getDerived().TransformTemplateArgument(Pattern, Out))
+        if (getDerived().TransformTemplateArgument(Pattern, Out, Uneval))
           return true;
 
         Out = getDerived().RebuildPackExpansion(Out, Ellipsis,
@@ -3693,7 +3849,7 @@ bool TreeTransform<Derived>::TransformTemplateArguments(InputIterator First,
     }
 
     // The simple case:
-    if (getDerived().TransformTemplateArgument(In, Out))
+    if (getDerived().TransformTemplateArgument(In, Out, Uneval))
       return true;
 
     Outputs.addArgument(Out);
@@ -3810,7 +3966,7 @@ TreeTransform<Derived>::TransformQualifiedType(TypeLocBuilder &TLB,
         Qs.removeObjCLifetime();
         Deduced = SemaRef.Context.getQualifiedType(Deduced.getUnqualifiedType(),
                                                    Qs);
-        Result = SemaRef.Context.getAutoType(Deduced, AutoTy->isDecltypeAuto(), 
+        Result = SemaRef.Context.getAutoType(Deduced, AutoTy->getKeyword(),
                                 AutoTy->isDependentType());
         TLB.TypeWasModifiedSafely(Result);
       } else {
@@ -4700,9 +4856,7 @@ QualType TreeTransform<Derived>::TransformFunctionProtoType(
 
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() || ResultType != T->getReturnType() ||
-      T->getNumParams() != ParamTypes.size() ||
-      !std::equal(T->param_type_begin(), T->param_type_end(),
-                  ParamTypes.begin()) || EPIChanged) {
+      T->getParamTypes() != llvm::makeArrayRef(ParamTypes) || EPIChanged) {
     Result = getDerived().RebuildFunctionProtoType(ResultType, ParamTypes, EPI);
     if (Result.isNull())
       return QualType();
@@ -5015,7 +5169,7 @@ QualType TreeTransform<Derived>::TransformAutoType(TypeLocBuilder &TLB,
   QualType Result = TL.getType();
   if (getDerived().AlwaysRebuild() || NewDeduced != OldDeduced ||
       T->isDependentType()) {
-    Result = getDerived().RebuildAutoType(NewDeduced, T->isDecltypeAuto());
+    Result = getDerived().RebuildAutoType(NewDeduced, T->getKeyword());
     if (Result.isNull())
       return QualType();
   }
@@ -6119,6 +6273,11 @@ TreeTransform<Derived>::TransformForStmt(ForStmt *S) {
   if (Init.isInvalid())
     return StmtError();
 
+  // In OpenMP loop region loop control variable must be captured and be
+  // private. Perform analysis of first part (if any).
+  if (getSema().getLangOpts().OpenMP && Init.isUsable())
+    getSema().ActOnOpenMPLoopInitialization(S->getForLoc(), Init.get());
+
   // Transform the condition
   ExprResult Cond;
   VarDecl *ConditionVar = nullptr;
@@ -6351,6 +6510,56 @@ TreeTransform<Derived>::TransformMSAsmStmt(MSAsmStmt *S) {
                                        TransformedExprs, S->getEndLoc());
 }
 
+// C++ Coroutines TS
+
+template<typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) {
+  // The coroutine body should be re-formed by the caller if necessary.
+  return getDerived().TransformStmt(S->getBody());
+}
+
+template<typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformCoreturnStmt(CoreturnStmt *S) {
+  ExprResult Result = getDerived().TransformInitializer(S->getOperand(),
+                                                        /*NotCopyInit*/false);
+  if (Result.isInvalid())
+    return StmtError();
+
+  // Always rebuild; we don't know if this needs to be injected into a new
+  // context or if the promise type has changed.
+  return getDerived().RebuildCoreturnStmt(S->getKeywordLoc(), Result.get());
+}
+
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformCoawaitExpr(CoawaitExpr *E) {
+  ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
+                                                        /*NotCopyInit*/false);
+  if (Result.isInvalid())
+    return ExprError();
+
+  // Always rebuild; we don't know if this needs to be injected into a new
+  // context or if the promise type has changed.
+  return getDerived().RebuildCoawaitExpr(E->getKeywordLoc(), Result.get());
+}
+
+template<typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformCoyieldExpr(CoyieldExpr *E) {
+  ExprResult Result = getDerived().TransformInitializer(E->getOperand(),
+                                                        /*NotCopyInit*/false);
+  if (Result.isInvalid())
+    return ExprError();
+
+  // Always rebuild; we don't know if this needs to be injected into a new
+  // context or if the promise type has changed.
+  return getDerived().RebuildCoyieldExpr(E->getKeywordLoc(), Result.get());
+}
+
+// Objective-C Statements.
+
 template<typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformObjCAtTryStmt(ObjCAtTryStmt *S) {
@@ -6640,6 +6849,7 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
       Inc.get() != S->getInc() ||
       LoopVar.get() != S->getLoopVarStmt()) {
     NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
+                                                  S->getCoawaitLoc(),
                                                   S->getColonLoc(), Range.get(),
                                                   BeginEnd.get(), Cond.get(),
                                                   Inc.get(), LoopVar.get(),
@@ -6656,6 +6866,7 @@ TreeTransform<Derived>::TransformCXXForRangeStmt(CXXForRangeStmt *S) {
   // it now so we have a new statement to attach the body to.
   if (Body.get() != S->getBody() && NewStmt.get() == S) {
     NewStmt = getDerived().RebuildCXXForRangeStmt(S->getForLoc(),
+                                                  S->getCoawaitLoc(),
                                                   S->getColonLoc(), Range.get(),
                                                   BeginEnd.get(), Cond.get(),
                                                   Inc.get(), LoopVar.get(),
@@ -6765,6 +6976,25 @@ TreeTransform<Derived>::TransformMSPropertyRefExpr(MSPropertyRefExpr *E) {
                         QualifierLoc, E->getMemberLoc());
 }
 
+template <typename Derived>
+ExprResult TreeTransform<Derived>::TransformMSPropertySubscriptExpr(
+    MSPropertySubscriptExpr *E) {
+  auto BaseRes = getDerived().TransformExpr(E->getBase());
+  if (BaseRes.isInvalid())
+    return ExprError();
+  auto IdxRes = getDerived().TransformExpr(E->getIdx());
+  if (IdxRes.isInvalid())
+    return ExprError();
+
+  if (!getDerived().AlwaysRebuild() &&
+      BaseRes.get() == E->getBase() &&
+      IdxRes.get() == E->getIdx())
+    return E;
+
+  return getDerived().RebuildArraySubscriptExpr(
+      BaseRes.get(), SourceLocation(), IdxRes.get(), E->getRBracketLoc());
+}
+
 template <typename Derived>
 StmtResult TreeTransform<Derived>::TransformSEHTryStmt(SEHTryStmt *S) {
   StmtResult TryBlock = getDerived().TransformCompoundStmt(S->getTryBlock());
@@ -6844,10 +7074,7 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
     }
   }
   StmtResult AssociatedStmt;
-  if (D->hasAssociatedStmt()) {
-    if (!D->getAssociatedStmt()) {
-      return StmtError();
-    }
+  if (D->hasAssociatedStmt() && D->getAssociatedStmt()) {
     getDerived().getSema().ActOnOpenMPRegionStart(D->getDirectiveKind(),
                                                   /*CurScope=*/nullptr);
     StmtResult Body;
@@ -7114,6 +7341,17 @@ TreeTransform<Derived>::TransformOMPTargetDirective(OMPTargetDirective *D) {
   return Res;
 }
 
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTargetDataDirective(
+    OMPTargetDataDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_target_data, DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPTeamsDirective(OMPTeamsDirective *D) {
@@ -7147,6 +7385,39 @@ TreeTransform<Derived>::TransformOMPCancelDirective(OMPCancelDirective *D) {
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPTaskLoopDirective(OMPTaskLoopDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop, DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPTaskLoopSimdDirective(
+    OMPTaskLoopSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_taskloop_simd, DirName,
+                                             nullptr, D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
+template <typename Derived>
+StmtResult TreeTransform<Derived>::TransformOMPDistributeDirective(
+    OMPDistributeDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(OMPD_distribute, DirName, nullptr,
+                                             D->getLocStart());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP clause transformation
 //===----------------------------------------------------------------------===//
@@ -7155,8 +7426,9 @@ OMPClause *TreeTransform<Derived>::TransformOMPIfClause(OMPIfClause *C) {
   ExprResult Cond = getDerived().TransformExpr(C->getCondition());
   if (Cond.isInvalid())
     return nullptr;
-  return getDerived().RebuildOMPIfClause(Cond.get(), C->getLocStart(),
-                                         C->getLParenLoc(), C->getLocEnd());
+  return getDerived().RebuildOMPIfClause(
+      C->getNameModifier(), Cond.get(), C->getLocStart(), C->getLParenLoc(),
+      C->getNameModifierLoc(), C->getColonLoc(), C->getLocEnd());
 }
 
 template <typename Derived>
@@ -7188,12 +7460,22 @@ TreeTransform<Derived>::TransformOMPSafelenClause(OMPSafelenClause *C) {
       E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPSimdlenClause(OMPSimdlenClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getSimdlen());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPSimdlenClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {
   ExprResult E = getDerived().TransformExpr(C->getNumForLoops());
   if (E.isInvalid())
-    return 0;
+    return nullptr;
   return getDerived().RebuildOMPCollapseClause(
       E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
 }
@@ -7221,15 +7503,23 @@ TreeTransform<Derived>::TransformOMPScheduleClause(OMPScheduleClause *C) {
   if (E.isInvalid())
     return nullptr;
   return getDerived().RebuildOMPScheduleClause(
+      C->getFirstScheduleModifier(), C->getSecondScheduleModifier(),
       C->getScheduleKind(), E.get(), C->getLocStart(), C->getLParenLoc(),
+      C->getFirstScheduleModifierLoc(), C->getSecondScheduleModifierLoc(),
       C->getScheduleKindLoc(), C->getCommaLoc(), C->getLocEnd());
 }
 
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPOrderedClause(OMPOrderedClause *C) {
-  // No need to rebuild this clause, no template-dependent parameters.
-  return C;
+  ExprResult E;
+  if (auto *Num = C->getNumForLoops()) {
+    E = getDerived().TransformExpr(Num);
+    if (E.isInvalid())
+      return nullptr;
+  }
+  return getDerived().RebuildOMPOrderedClause(C->getLocStart(), C->getLocEnd(),
+                                              C->getLParenLoc(), E.get());
 }
 
 template <typename Derived>
@@ -7286,6 +7576,26 @@ TreeTransform<Derived>::TransformOMPSeqCstClause(OMPSeqCstClause *C) {
   return C;
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPThreadsClause(OMPThreadsClause *C) {
+  // No need to rebuild this clause, no template-dependent parameters.
+  return C;
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPSIMDClause(OMPSIMDClause *C) {
+  // No need to rebuild this clause, no template-dependent parameters.
+  return C;
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPNogroupClause(OMPNogroupClause *C) {
+  // No need to rebuild this clause, no template-dependent parameters.
+  return C;
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPPrivateClause(OMPPrivateClause *C) {
@@ -7385,9 +7695,9 @@ TreeTransform<Derived>::TransformOMPLinearClause(OMPLinearClause *C) {
   ExprResult Step = getDerived().TransformExpr(C->getStep());
   if (Step.isInvalid())
     return nullptr;
-  return getDerived().RebuildOMPLinearClause(Vars, Step.get(), C->getLocStart(),
-                                             C->getLParenLoc(),
-                                             C->getColonLoc(), C->getLocEnd());
+  return getDerived().RebuildOMPLinearClause(
+      Vars, Step.get(), C->getLocStart(), C->getLParenLoc(), C->getModifier(),
+      C->getModifierLoc(), C->getColonLoc(), C->getLocEnd());
 }
 
 template <typename Derived>
@@ -7469,6 +7779,91 @@ TreeTransform<Derived>::TransformOMPDependClause(OMPDependClause *C) {
       C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
 }
 
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPDeviceClause(OMPDeviceClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getDevice());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPDeviceClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPMapClause(OMPMapClause *C) {
+  llvm::SmallVector<Expr *, 16> Vars;
+  Vars.reserve(C->varlist_size());
+  for (auto *VE : C->varlists()) {
+    ExprResult EVar = getDerived().TransformExpr(cast<Expr>(VE));
+    if (EVar.isInvalid())
+      return nullptr;
+    Vars.push_back(EVar.get());
+  }
+  return getDerived().RebuildOMPMapClause(
+      C->getMapTypeModifier(), C->getMapType(), C->getMapLoc(),
+      C->getColonLoc(), Vars, C->getLocStart(), C->getLParenLoc(),
+      C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPNumTeamsClause(OMPNumTeamsClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getNumTeams());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPNumTeamsClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPThreadLimitClause(OMPThreadLimitClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getThreadLimit());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPThreadLimitClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPPriorityClause(OMPPriorityClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getPriority());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPPriorityClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPGrainsizeClause(OMPGrainsizeClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getGrainsize());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPGrainsizeClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPNumTasksClause(OMPNumTasksClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getNumTasks());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPNumTasksClause(
+      E.get(), C->getLocStart(), C->getLParenLoc(), C->getLocEnd());
+}
+
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPHintClause(OMPHintClause *C) {
+  ExprResult E = getDerived().TransformExpr(C->getHint());
+  if (E.isInvalid())
+    return nullptr;
+  return getDerived().RebuildOMPHintClause(E.get(), C->getLocStart(),
+                                           C->getLParenLoc(), C->getLocEnd());
+}
+
 //===----------------------------------------------------------------------===//
 // Expression transformation
 //===----------------------------------------------------------------------===//
@@ -7668,16 +8063,15 @@ TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
   // template code that we don't care.
   bool ExprChanged = false;
   typedef Sema::OffsetOfComponent Component;
-  typedef OffsetOfExpr::OffsetOfNode Node;
   SmallVector<Component, 4> Components;
   for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
-    const Node &ON = E->getComponent(I);
+    const OffsetOfNode &ON = E->getComponent(I);
     Component Comp;
     Comp.isBrackets = true;
     Comp.LocStart = ON.getSourceRange().getBegin();
     Comp.LocEnd = ON.getSourceRange().getEnd();
     switch (ON.getKind()) {
-    case Node::Array: {
+    case OffsetOfNode::Array: {
       Expr *FromIndex = E->getIndexExpr(ON.getArrayExprIndex());
       ExprResult Index = getDerived().TransformExpr(FromIndex);
       if (Index.isInvalid())
@@ -7689,8 +8083,8 @@ TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
       break;
     }
 
-    case Node::Field:
-    case Node::Identifier:
+    case OffsetOfNode::Field:
+    case OffsetOfNode::Identifier:
       Comp.isBrackets = false;
       Comp.U.IdentInfo = ON.getFieldName();
       if (!Comp.U.IdentInfo)
@@ -7698,7 +8092,7 @@ TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
 
       break;
 
-    case Node::Base:
+    case OffsetOfNode::Base:
       // Will be recomputed during the rebuild.
       continue;
     }
@@ -7714,14 +8108,13 @@ TreeTransform<Derived>::TransformOffsetOfExpr(OffsetOfExpr *E) {
 
   // Build a new offsetof expression.
   return getDerived().RebuildOffsetOfExpr(E->getOperatorLoc(), Type,
-                                          Components.data(), Components.size(),
-                                          E->getRParenLoc());
+                                          Components, E->getRParenLoc());
 }
 
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformOpaqueValueExpr(OpaqueValueExpr *E) {
-  assert(getDerived().AlreadyTransformed(E->getType()) &&
+  assert((!E->getSourceExpr() || getDerived().AlreadyTransformed(E->getType())) &&
          "opaque value expression requires transformation");
   return E;
 }
@@ -7829,6 +8222,36 @@ TreeTransform<Derived>::TransformArraySubscriptExpr(ArraySubscriptExpr *E) {
                                                 E->getRBracketLoc());
 }
 
+template <typename Derived>
+ExprResult
+TreeTransform<Derived>::TransformOMPArraySectionExpr(OMPArraySectionExpr *E) {
+  ExprResult Base = getDerived().TransformExpr(E->getBase());
+  if (Base.isInvalid())
+    return ExprError();
+
+  ExprResult LowerBound;
+  if (E->getLowerBound()) {
+    LowerBound = getDerived().TransformExpr(E->getLowerBound());
+    if (LowerBound.isInvalid())
+      return ExprError();
+  }
+
+  ExprResult Length;
+  if (E->getLength()) {
+    Length = getDerived().TransformExpr(E->getLength());
+    if (Length.isInvalid())
+      return ExprError();
+  }
+
+  if (!getDerived().AlwaysRebuild() && Base.get() == E->getBase() &&
+      LowerBound.get() == E->getLowerBound() && Length.get() == E->getLength())
+    return E;
+
+  return getDerived().RebuildOMPArraySectionExpr(
+      Base.get(), E->getBase()->getLocEnd(), LowerBound.get(), E->getColonLoc(),
+      Length.get(), E->getRBracketLoc());
+}
+
 template<typename Derived>
 ExprResult
 TreeTransform<Derived>::TransformCallExpr(CallExpr *E) {
@@ -8995,9 +9418,20 @@ TreeTransform<Derived>::TransformUnresolvedLookupExpr(
   SourceLocation TemplateKWLoc = Old->getTemplateKeywordLoc();
 
   // If we have neither explicit template arguments, nor the template keyword,
-  // it's a normal declaration name.
-  if (!Old->hasExplicitTemplateArgs() && !TemplateKWLoc.isValid())
+  // it's a normal declaration name or member reference.
+  if (!Old->hasExplicitTemplateArgs() && !TemplateKWLoc.isValid()) {
+    NamedDecl *D = R.getAsSingle<NamedDecl>();
+    // In a C++11 unevaluated context, an UnresolvedLookupExpr might refer to an
+    // instance member. In other contexts, BuildPossibleImplicitMemberExpr will
+    // give a good diagnostic.
+    if (D && D->isCXXInstanceMember()) {
+      return SemaRef.BuildPossibleImplicitMemberExpr(SS, TemplateKWLoc, R,
+                                                     /*TemplateArgs=*/nullptr,
+                                                     /*Scope=*/nullptr);
+    }
+
     return getDerived().RebuildDeclarationNameExpr(SS, R, Old->requiresADL());
+  }
 
   // If we have template arguments, rebuild them, then rebuild the
   // templateid expression.
@@ -9404,9 +9838,10 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
 
     VarDecl *OldVD = C->getCapturedVar();
     QualType NewInitCaptureType =
-        getSema().performLambdaInitCaptureInitialization(C->getLocation(),
-            OldVD->getType()->isReferenceType(), OldVD->getIdentifier(),
-            NewExprInit);
+        getSema().buildLambdaInitCaptureInitialization(
+            C->getLocation(), OldVD->getType()->isReferenceType(),
+            OldVD->getIdentifier(),
+            C->getCapturedVar()->getInitStyle() != VarDecl::CInit, NewExprInit);
     NewExprInitResult = NewExprInit;
     InitCaptureExprsAndTypes[C - E->capture_begin()] =
         std::make_pair(NewExprInitResult, NewInitCaptureType);
@@ -9513,8 +9948,8 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
       }
       VarDecl *OldVD = C->getCapturedVar();
       VarDecl *NewVD = getSema().createLambdaInitCaptureVarDecl(
-          OldVD->getLocation(), InitExprTypePair.second, 
-          OldVD->getIdentifier(), Init.get());
+          OldVD->getLocation(), InitExprTypePair.second, OldVD->getIdentifier(),
+          OldVD->getInitStyle(), Init.get());
       if (!NewVD)
         Invalid = true;
       else {
@@ -9886,36 +10321,86 @@ TreeTransform<Derived>::TransformSizeOfPackExpr(SizeOfPackExpr *E) {
   if (!E->isValueDependent())
     return E;
 
-  // Note: None of the implementations of TryExpandParameterPacks can ever
-  // produce a diagnostic when given only a single unexpanded parameter pack,
-  // so
-  UnexpandedParameterPack Unexpanded(E->getPack(), E->getPackLoc());
-  bool ShouldExpand = false;
-  bool RetainExpansion = false;
-  Optional<unsigned> NumExpansions;
-  if (getDerived().TryExpandParameterPacks(E->getOperatorLoc(), E->getPackLoc(),
-                                           Unexpanded,
-                                           ShouldExpand, RetainExpansion,
-                                           NumExpansions))
-    return ExprError();
+  EnterExpressionEvaluationContext Unevaluated(getSema(), Sema::Unevaluated);
 
-  if (RetainExpansion)
-    return E;
+  ArrayRef<TemplateArgument> PackArgs;
+  TemplateArgument ArgStorage;
 
-  NamedDecl *Pack = E->getPack();
-  if (!ShouldExpand) {
-    Pack = cast_or_null<NamedDecl>(getDerived().TransformDecl(E->getPackLoc(),
-                                                              Pack));
+  // Find the argument list to transform.
+  if (E->isPartiallySubstituted()) {
+    PackArgs = E->getPartialArguments();
+  } else if (E->isValueDependent()) {
+    UnexpandedParameterPack Unexpanded(E->getPack(), E->getPackLoc());
+    bool ShouldExpand = false;
+    bool RetainExpansion = false;
+    Optional<unsigned> NumExpansions;
+    if (getDerived().TryExpandParameterPacks(E->getOperatorLoc(), E->getPackLoc(),
+                                             Unexpanded,
+                                             ShouldExpand, RetainExpansion,
+                                             NumExpansions))
+      return ExprError();
+
+    // If we need to expand the pack, build a template argument from it and
+    // expand that.
+    if (ShouldExpand) {
+      auto *Pack = E->getPack();
+      if (auto *TTPD = dyn_cast<TemplateTypeParmDecl>(Pack)) {
+        ArgStorage = getSema().Context.getPackExpansionType(
+            getSema().Context.getTypeDeclType(TTPD), None);
+      } else if (auto *TTPD = dyn_cast<TemplateTemplateParmDecl>(Pack)) {
+        ArgStorage = TemplateArgument(TemplateName(TTPD), None);
+      } else {
+        auto *VD = cast<ValueDecl>(Pack);
+        ExprResult DRE = getSema().BuildDeclRefExpr(VD, VD->getType(),
+                                                    VK_RValue, E->getPackLoc());
+        if (DRE.isInvalid())
+          return ExprError();
+        ArgStorage = new (getSema().Context) PackExpansionExpr(
+            getSema().Context.DependentTy, DRE.get(), E->getPackLoc(), None);
+      }
+      PackArgs = ArgStorage;
+    }
+  }
+
+  // If we're not expanding the pack, just transform the decl.
+  if (!PackArgs.size()) {
+    auto *Pack = cast_or_null<NamedDecl>(
+        getDerived().TransformDecl(E->getPackLoc(), E->getPack()));
     if (!Pack)
       return ExprError();
+    return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), Pack,
+                                              E->getPackLoc(),
+                                              E->getRParenLoc(), None, None);
+  }
+
+  TemplateArgumentListInfo TransformedPackArgs(E->getPackLoc(),
+                                               E->getPackLoc());
+  {
+    TemporaryBase Rebase(*this, E->getPackLoc(), getBaseEntity());
+    typedef TemplateArgumentLocInventIterator<
+        Derived, const TemplateArgument*> PackLocIterator;
+    if (TransformTemplateArguments(PackLocIterator(*this, PackArgs.begin()),
+                                   PackLocIterator(*this, PackArgs.end()),
+                                   TransformedPackArgs, /*Uneval*/true))
+      return ExprError();
   }
 
+  SmallVector<TemplateArgument, 8> Args;
+  bool PartialSubstitution = false;
+  for (auto &Loc : TransformedPackArgs.arguments()) {
+    Args.push_back(Loc.getArgument());
+    if (Loc.getArgument().isPackExpansion())
+      PartialSubstitution = true;
+  }
 
-  // We now know the length of the parameter pack, so build a new expression
-  // that stores that length.
-  return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), Pack,
+  if (PartialSubstitution)
+    return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
+                                              E->getPackLoc(),
+                                              E->getRParenLoc(), None, Args);
+
+  return getDerived().RebuildSizeOfPackExpr(E->getOperatorLoc(), E->getPack(),
                                             E->getPackLoc(), E->getRParenLoc(),
-                                            NumExpansions);
+                                            Args.size(), None);
 }
 
 template<typename Derived>
@@ -10242,8 +10727,7 @@ TreeTransform<Derived>::TransformObjCDictionaryLiteral(
     return SemaRef.MaybeBindToTemporary(E);
 
   return getDerived().RebuildObjCDictionaryLiteral(E->getSourceRange(),
-                                                   Elements.data(),
-                                                   Elements.size());
+                                                   Elements);
 }
 
 template<typename Derived>
@@ -10337,6 +10821,7 @@ TreeTransform<Derived>::TransformObjCMessageExpr(ObjCMessageExpr *E) {
     return getDerived().RebuildObjCMessageExpr(E->getSuperLoc(),
                                                E->getSelector(),
                                                SelLocs,
+                                               E->getReceiverType(),
                                                E->getMethodDecl(),
                                                E->getLeftLoc(),
                                                Args,
@@ -11075,7 +11560,8 @@ TreeTransform<Derived>::RebuildCXXPseudoDestructorExpr(Expr *Base,
                                             SS, TemplateKWLoc,
                                             /*FIXME: FirstQualifier*/ nullptr,
                                             NameInfo,
-                                            /*TemplateArgs*/ nullptr);
+                                            /*TemplateArgs*/ nullptr,
+                                            /*S*/nullptr);
 }
 
 template<typename Derived>
@@ -11114,4 +11600,4 @@ TreeTransform<Derived>::TransformCapturedStmt(CapturedStmt *S) {
 
 } // end namespace clang
 
-#endif
+#endif // LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H
diff --git a/lib/Serialization/ASTCommon.cpp b/lib/Serialization/ASTCommon.cpp
index b1bf4a6bff8b..2b78d745864a 100644
--- a/lib/Serialization/ASTCommon.cpp
+++ b/lib/Serialization/ASTCommon.cpp
@@ -27,52 +27,166 @@ serialization::TypeIdx
 serialization::TypeIdxFromBuiltin(const BuiltinType *BT) {
   unsigned ID = 0;
   switch (BT->getKind()) {
-  case BuiltinType::Void:       ID = PREDEF_TYPE_VOID_ID;       break;
-  case BuiltinType::Bool:       ID = PREDEF_TYPE_BOOL_ID;       break;
-  case BuiltinType::Char_U:     ID = PREDEF_TYPE_CHAR_U_ID;     break;
-  case BuiltinType::UChar:      ID = PREDEF_TYPE_UCHAR_ID;      break;
-  case BuiltinType::UShort:     ID = PREDEF_TYPE_USHORT_ID;     break;
-  case BuiltinType::UInt:       ID = PREDEF_TYPE_UINT_ID;       break;
-  case BuiltinType::ULong:      ID = PREDEF_TYPE_ULONG_ID;      break;
-  case BuiltinType::ULongLong:  ID = PREDEF_TYPE_ULONGLONG_ID;  break;
-  case BuiltinType::UInt128:    ID = PREDEF_TYPE_UINT128_ID;    break;
-  case BuiltinType::Char_S:     ID = PREDEF_TYPE_CHAR_S_ID;     break;
-  case BuiltinType::SChar:      ID = PREDEF_TYPE_SCHAR_ID;      break;
+  case BuiltinType::Void:
+    ID = PREDEF_TYPE_VOID_ID;
+    break;
+  case BuiltinType::Bool:
+    ID = PREDEF_TYPE_BOOL_ID;
+    break;
+  case BuiltinType::Char_U:
+    ID = PREDEF_TYPE_CHAR_U_ID;
+    break;
+  case BuiltinType::UChar:
+    ID = PREDEF_TYPE_UCHAR_ID;
+    break;
+  case BuiltinType::UShort:
+    ID = PREDEF_TYPE_USHORT_ID;
+    break;
+  case BuiltinType::UInt:
+    ID = PREDEF_TYPE_UINT_ID;
+    break;
+  case BuiltinType::ULong:
+    ID = PREDEF_TYPE_ULONG_ID;
+    break;
+  case BuiltinType::ULongLong:
+    ID = PREDEF_TYPE_ULONGLONG_ID;
+    break;
+  case BuiltinType::UInt128:
+    ID = PREDEF_TYPE_UINT128_ID;
+    break;
+  case BuiltinType::Char_S:
+    ID = PREDEF_TYPE_CHAR_S_ID;
+    break;
+  case BuiltinType::SChar:
+    ID = PREDEF_TYPE_SCHAR_ID;
+    break;
   case BuiltinType::WChar_S:
-  case BuiltinType::WChar_U:    ID = PREDEF_TYPE_WCHAR_ID;      break;
-  case BuiltinType::Short:      ID = PREDEF_TYPE_SHORT_ID;      break;
-  case BuiltinType::Int:        ID = PREDEF_TYPE_INT_ID;        break;
-  case BuiltinType::Long:       ID = PREDEF_TYPE_LONG_ID;       break;
-  case BuiltinType::LongLong:   ID = PREDEF_TYPE_LONGLONG_ID;   break;
-  case BuiltinType::Int128:     ID = PREDEF_TYPE_INT128_ID;     break;
-  case BuiltinType::Half:       ID = PREDEF_TYPE_HALF_ID;       break;
-  case BuiltinType::Float:      ID = PREDEF_TYPE_FLOAT_ID;      break;
-  case BuiltinType::Double:     ID = PREDEF_TYPE_DOUBLE_ID;     break;
-  case BuiltinType::LongDouble: ID = PREDEF_TYPE_LONGDOUBLE_ID; break;
-  case BuiltinType::NullPtr:    ID = PREDEF_TYPE_NULLPTR_ID;    break;
-  case BuiltinType::Char16:     ID = PREDEF_TYPE_CHAR16_ID;     break;
-  case BuiltinType::Char32:     ID = PREDEF_TYPE_CHAR32_ID;     break;
-  case BuiltinType::Overload:   ID = PREDEF_TYPE_OVERLOAD_ID;   break;
-  case BuiltinType::BoundMember:ID = PREDEF_TYPE_BOUND_MEMBER;  break;
-  case BuiltinType::PseudoObject:ID = PREDEF_TYPE_PSEUDO_OBJECT;break;
-  case BuiltinType::Dependent:  ID = PREDEF_TYPE_DEPENDENT_ID;  break;
-  case BuiltinType::UnknownAny: ID = PREDEF_TYPE_UNKNOWN_ANY;   break;
+  case BuiltinType::WChar_U:
+    ID = PREDEF_TYPE_WCHAR_ID;
+    break;
+  case BuiltinType::Short:
+    ID = PREDEF_TYPE_SHORT_ID;
+    break;
+  case BuiltinType::Int:
+    ID = PREDEF_TYPE_INT_ID;
+    break;
+  case BuiltinType::Long:
+    ID = PREDEF_TYPE_LONG_ID;
+    break;
+  case BuiltinType::LongLong:
+    ID = PREDEF_TYPE_LONGLONG_ID;
+    break;
+  case BuiltinType::Int128:
+    ID = PREDEF_TYPE_INT128_ID;
+    break;
+  case BuiltinType::Half:
+    ID = PREDEF_TYPE_HALF_ID;
+    break;
+  case BuiltinType::Float:
+    ID = PREDEF_TYPE_FLOAT_ID;
+    break;
+  case BuiltinType::Double:
+    ID = PREDEF_TYPE_DOUBLE_ID;
+    break;
+  case BuiltinType::LongDouble:
+    ID = PREDEF_TYPE_LONGDOUBLE_ID;
+    break;
+  case BuiltinType::NullPtr:
+    ID = PREDEF_TYPE_NULLPTR_ID;
+    break;
+  case BuiltinType::Char16:
+    ID = PREDEF_TYPE_CHAR16_ID;
+    break;
+  case BuiltinType::Char32:
+    ID = PREDEF_TYPE_CHAR32_ID;
+    break;
+  case BuiltinType::Overload:
+    ID = PREDEF_TYPE_OVERLOAD_ID;
+    break;
+  case BuiltinType::BoundMember:
+    ID = PREDEF_TYPE_BOUND_MEMBER;
+    break;
+  case BuiltinType::PseudoObject:
+    ID = PREDEF_TYPE_PSEUDO_OBJECT;
+    break;
+  case BuiltinType::Dependent:
+    ID = PREDEF_TYPE_DEPENDENT_ID;
+    break;
+  case BuiltinType::UnknownAny:
+    ID = PREDEF_TYPE_UNKNOWN_ANY;
+    break;
   case BuiltinType::ARCUnbridgedCast:
-                                ID = PREDEF_TYPE_ARC_UNBRIDGED_CAST; break;
-  case BuiltinType::ObjCId:     ID = PREDEF_TYPE_OBJC_ID;       break;
-  case BuiltinType::ObjCClass:  ID = PREDEF_TYPE_OBJC_CLASS;    break;
-  case BuiltinType::ObjCSel:    ID = PREDEF_TYPE_OBJC_SEL;      break;
-  case BuiltinType::OCLImage1d:       ID = PREDEF_TYPE_IMAGE1D_ID;      break;
-  case BuiltinType::OCLImage1dArray:  ID = PREDEF_TYPE_IMAGE1D_ARR_ID;  break;
-  case BuiltinType::OCLImage1dBuffer: ID = PREDEF_TYPE_IMAGE1D_BUFF_ID; break;
-  case BuiltinType::OCLImage2d:       ID = PREDEF_TYPE_IMAGE2D_ID;      break;
-  case BuiltinType::OCLImage2dArray:  ID = PREDEF_TYPE_IMAGE2D_ARR_ID;  break;
-  case BuiltinType::OCLImage3d:       ID = PREDEF_TYPE_IMAGE3D_ID;      break;
-  case BuiltinType::OCLSampler:       ID = PREDEF_TYPE_SAMPLER_ID;      break;
-  case BuiltinType::OCLEvent:         ID = PREDEF_TYPE_EVENT_ID;        break;
+    ID = PREDEF_TYPE_ARC_UNBRIDGED_CAST;
+    break;
+  case BuiltinType::ObjCId:
+    ID = PREDEF_TYPE_OBJC_ID;
+    break;
+  case BuiltinType::ObjCClass:
+    ID = PREDEF_TYPE_OBJC_CLASS;
+    break;
+  case BuiltinType::ObjCSel:
+    ID = PREDEF_TYPE_OBJC_SEL;
+    break;
+  case BuiltinType::OCLImage1d:
+    ID = PREDEF_TYPE_IMAGE1D_ID;
+    break;
+  case BuiltinType::OCLImage1dArray:
+    ID = PREDEF_TYPE_IMAGE1D_ARR_ID;
+    break;
+  case BuiltinType::OCLImage1dBuffer:
+    ID = PREDEF_TYPE_IMAGE1D_BUFF_ID;
+    break;
+  case BuiltinType::OCLImage2d:
+    ID = PREDEF_TYPE_IMAGE2D_ID;
+    break;
+  case BuiltinType::OCLImage2dArray:
+    ID = PREDEF_TYPE_IMAGE2D_ARR_ID;
+    break;
+  case BuiltinType::OCLImage2dDepth:
+    ID = PREDEF_TYPE_IMAGE2D_DEP_ID;
+    break;
+  case BuiltinType::OCLImage2dArrayDepth:
+    ID = PREDEF_TYPE_IMAGE2D_ARR_DEP_ID;
+    break;
+  case BuiltinType::OCLImage2dMSAA:
+    ID = PREDEF_TYPE_IMAGE2D_MSAA_ID;
+    break;
+  case BuiltinType::OCLImage2dArrayMSAA:
+    ID = PREDEF_TYPE_IMAGE2D_ARR_MSAA_ID;
+    break;
+  case BuiltinType::OCLImage2dMSAADepth:
+    ID = PREDEF_TYPE_IMAGE2D_MSAA_DEP_ID;
+    break;
+  case BuiltinType::OCLImage2dArrayMSAADepth:
+    ID = PREDEF_TYPE_IMAGE2D_ARR_MSAA_DEPTH_ID;
+    break;
+  case BuiltinType::OCLImage3d:
+    ID = PREDEF_TYPE_IMAGE3D_ID;
+    break;
+  case BuiltinType::OCLSampler:
+    ID = PREDEF_TYPE_SAMPLER_ID;
+    break;
+  case BuiltinType::OCLEvent:
+    ID = PREDEF_TYPE_EVENT_ID;
+    break;
+  case BuiltinType::OCLClkEvent:
+    ID = PREDEF_TYPE_CLK_EVENT_ID;
+    break;
+  case BuiltinType::OCLQueue:
+    ID = PREDEF_TYPE_QUEUE_ID;
+    break;
+  case BuiltinType::OCLNDRange:
+    ID = PREDEF_TYPE_NDRANGE_ID;
+    break;
+  case BuiltinType::OCLReserveID:
+    ID = PREDEF_TYPE_RESERVE_ID_ID;
+    break;
   case BuiltinType::BuiltinFn:
-                                ID = PREDEF_TYPE_BUILTIN_FN; break;
-
+    ID = PREDEF_TYPE_BUILTIN_FN;
+    break;
+  case BuiltinType::OMPArraySection:
+    ID = PREDEF_TYPE_OMP_ARRAY_SECTION;
+    break;
   }
 
   return TypeIdx(ID);
@@ -215,6 +329,7 @@ bool serialization::isRedeclarableDeclKind(unsigned Kind) {
   case Decl::ClassScopeFunctionSpecialization:
   case Decl::Import:
   case Decl::OMPThreadPrivate:
+  case Decl::BuiltinTemplate:
     return false;
 
   // These indirectly derive from Redeclarable<T> but are not actually
diff --git a/lib/Serialization/ASTCommon.h b/lib/Serialization/ASTCommon.h
index f21e8a7ea030..e59bc891f9b9 100644
--- a/lib/Serialization/ASTCommon.h
+++ b/lib/Serialization/ASTCommon.h
@@ -62,8 +62,6 @@ TypeID MakeTypeID(ASTContext &Context, QualType T, IdxForTypeTy IdxForType) {
     return TypeIdx(PREDEF_TYPE_AUTO_DEDUCT).asTypeID(FastQuals);
   if (T == Context.AutoRRefDeductTy)
     return TypeIdx(PREDEF_TYPE_AUTO_RREF_DEDUCT).asTypeID(FastQuals);
-  if (T == Context.VaListTagTy)
-    return TypeIdx(PREDEF_TYPE_VA_LIST_TAG).asTypeID(FastQuals);
 
   return IdxForType(T).asTypeID(FastQuals);
 }
diff --git a/lib/Serialization/ASTReader.cpp b/lib/Serialization/ASTReader.cpp
index 9fbf55bf15d1..7d88a31f44a7 100644
--- a/lib/Serialization/ASTReader.cpp
+++ b/lib/Serialization/ASTReader.cpp
@@ -20,6 +20,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/Frontend/PCHContainerOperations.h"
+#include "clang/AST/ASTMutationListener.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "clang/AST/Type.h"
 #include "clang/AST/TypeLocVisitor.h"
@@ -138,23 +139,33 @@ bool ChainedASTReaderListener::needsSystemInputFileVisitation() {
   return First->needsSystemInputFileVisitation() ||
   Second->needsSystemInputFileVisitation();
 }
-void ChainedASTReaderListener::visitModuleFile(StringRef Filename) {
-  First->visitModuleFile(Filename);
-  Second->visitModuleFile(Filename);
+void ChainedASTReaderListener::visitModuleFile(StringRef Filename,
+                                               ModuleKind Kind) {
+  First->visitModuleFile(Filename, Kind);
+  Second->visitModuleFile(Filename, Kind);
 }
 bool ChainedASTReaderListener::visitInputFile(StringRef Filename,
                                               bool isSystem,
-                                              bool isOverridden) {
+                                              bool isOverridden,
+                                              bool isExplicitModule) {
   bool Continue = false;
   if (First->needsInputFileVisitation() &&
       (!isSystem || First->needsSystemInputFileVisitation()))
-    Continue |= First->visitInputFile(Filename, isSystem, isOverridden);
+    Continue |= First->visitInputFile(Filename, isSystem, isOverridden,
+                                      isExplicitModule);
   if (Second->needsInputFileVisitation() &&
       (!isSystem || Second->needsSystemInputFileVisitation()))
-    Continue |= Second->visitInputFile(Filename, isSystem, isOverridden);
+    Continue |= Second->visitInputFile(Filename, isSystem, isOverridden,
+                                       isExplicitModule);
   return Continue;
 }
 
+void ChainedASTReaderListener::readModuleFileExtension(
+       const ModuleFileExtensionMetadata &Metadata) {
+  First->readModuleFileExtension(Metadata);
+  Second->readModuleFileExtension(Metadata);
+}
+
 //===----------------------------------------------------------------------===//
 // PCH validator implementation
 //===----------------------------------------------------------------------===//
@@ -735,13 +746,26 @@ ASTIdentifierLookupTraitBase::ReadKey(const unsigned char* d, unsigned n) {
 }
 
 /// \brief Whether the given identifier is "interesting".
-static bool isInterestingIdentifier(IdentifierInfo &II) {
-  return II.isPoisoned() ||
-         II.isExtensionToken() ||
-         II.getObjCOrBuiltinID() ||
+static bool isInterestingIdentifier(ASTReader &Reader, IdentifierInfo &II,
+                                    bool IsModule) {
+  return II.hadMacroDefinition() ||
+         II.isPoisoned() ||
+         (IsModule ? II.hasRevertedBuiltin() : II.getObjCOrBuiltinID()) ||
          II.hasRevertedTokenIDToIdentifier() ||
-         II.hadMacroDefinition() ||
-         II.getFETokenInfo<void>();
+         (!(IsModule && Reader.getContext().getLangOpts().CPlusPlus) &&
+          II.getFETokenInfo<void>());
+}
+
+static bool readBit(unsigned &Bits) {
+  bool Value = Bits & 0x1;
+  Bits >>= 1;
+  return Value;
+}
+
+IdentID ASTIdentifierLookupTrait::ReadIdentifierID(const unsigned char *d) {
+  using namespace llvm::support;
+  unsigned RawID = endian::readNext<uint32_t, little, unaligned>(d);
+  return Reader.getGlobalIdentifierID(F, RawID >> 1);
 }
 
 IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
@@ -754,62 +778,52 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
   // Wipe out the "is interesting" bit.
   RawID = RawID >> 1;
 
+  // Build the IdentifierInfo and link the identifier ID with it.
+  IdentifierInfo *II = KnownII;
+  if (!II) {
+    II = &Reader.getIdentifierTable().getOwn(k);
+    KnownII = II;
+  }
+  if (!II->isFromAST()) {
+    II->setIsFromAST();
+    bool IsModule = Reader.PP.getCurrentModule() != nullptr;
+    if (isInterestingIdentifier(Reader, *II, IsModule))
+      II->setChangedSinceDeserialization();
+  }
+  Reader.markIdentifierUpToDate(II);
+
   IdentID ID = Reader.getGlobalIdentifierID(F, RawID);
   if (!IsInteresting) {
-    // For uninteresting identifiers, just build the IdentifierInfo
-    // and associate it with the persistent ID.
-    IdentifierInfo *II = KnownII;
-    if (!II) {
-      II = &Reader.getIdentifierTable().getOwn(k);
-      KnownII = II;
-    }
+    // For uninteresting identifiers, there's nothing else to do. Just notify
+    // the reader that we've finished loading this identifier.
     Reader.SetIdentifierInfo(ID, II);
-    if (!II->isFromAST()) {
-      bool WasInteresting = isInterestingIdentifier(*II);
-      II->setIsFromAST();
-      if (WasInteresting)
-        II->setChangedSinceDeserialization();
-    }
-    Reader.markIdentifierUpToDate(II);
     return II;
   }
 
   unsigned ObjCOrBuiltinID = endian::readNext<uint16_t, little, unaligned>(d);
   unsigned Bits = endian::readNext<uint16_t, little, unaligned>(d);
-  bool CPlusPlusOperatorKeyword = Bits & 0x01;
-  Bits >>= 1;
-  bool HasRevertedTokenIDToIdentifier = Bits & 0x01;
-  Bits >>= 1;
-  bool Poisoned = Bits & 0x01;
-  Bits >>= 1;
-  bool ExtensionToken = Bits & 0x01;
-  Bits >>= 1;
-  bool hadMacroDefinition = Bits & 0x01;
-  Bits >>= 1;
+  bool CPlusPlusOperatorKeyword = readBit(Bits);
+  bool HasRevertedTokenIDToIdentifier = readBit(Bits);
+  bool HasRevertedBuiltin = readBit(Bits);
+  bool Poisoned = readBit(Bits);
+  bool ExtensionToken = readBit(Bits);
+  bool HadMacroDefinition = readBit(Bits);
 
   assert(Bits == 0 && "Extra bits in the identifier?");
   DataLen -= 8;
 
-  // Build the IdentifierInfo itself and link the identifier ID with
-  // the new IdentifierInfo.
-  IdentifierInfo *II = KnownII;
-  if (!II) {
-    II = &Reader.getIdentifierTable().getOwn(StringRef(k));
-    KnownII = II;
-  }
-  Reader.markIdentifierUpToDate(II);
-  if (!II->isFromAST()) {
-    bool WasInteresting = isInterestingIdentifier(*II);
-    II->setIsFromAST();
-    if (WasInteresting)
-      II->setChangedSinceDeserialization();
-  }
-
   // Set or check the various bits in the IdentifierInfo structure.
   // Token IDs are read-only.
   if (HasRevertedTokenIDToIdentifier && II->getTokenID() != tok::identifier)
-    II->RevertTokenIDToIdentifier();
-  II->setObjCOrBuiltinID(ObjCOrBuiltinID);
+    II->revertTokenIDToIdentifier();
+  if (!F.isModule())
+    II->setObjCOrBuiltinID(ObjCOrBuiltinID);
+  else if (HasRevertedBuiltin && II->getBuiltinID()) {
+    II->revertBuiltin();
+    assert((II->hasRevertedBuiltin() ||
+            II->getObjCOrBuiltinID() == ObjCOrBuiltinID) &&
+           "Incorrect ObjC keyword or builtin ID");
+  }
   assert(II->isExtensionToken() == ExtensionToken &&
          "Incorrect extension token flag");
   (void)ExtensionToken;
@@ -821,7 +835,7 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
 
   // If this identifier is a macro, deserialize the macro
   // definition.
-  if (hadMacroDefinition) {
+  if (HadMacroDefinition) {
     uint32_t MacroDirectivesOffset =
         endian::readNext<uint32_t, little, unaligned>(d);
     DataLen -= 4;
@@ -844,23 +858,48 @@ IdentifierInfo *ASTIdentifierLookupTrait::ReadData(const internal_key_type& k,
   return II;
 }
 
-unsigned 
-ASTDeclContextNameLookupTrait::ComputeHash(const DeclNameKey &Key) {
-  llvm::FoldingSetNodeID ID;
-  ID.AddInteger(Key.Kind);
-
-  switch (Key.Kind) {
+DeclarationNameKey::DeclarationNameKey(DeclarationName Name)
+    : Kind(Name.getNameKind()) {
+  switch (Kind) {
   case DeclarationName::Identifier:
-  case DeclarationName::CXXLiteralOperatorName:
-    ID.AddString(((IdentifierInfo*)Key.Data)->getName());
+    Data = (uint64_t)Name.getAsIdentifierInfo();
     break;
   case DeclarationName::ObjCZeroArgSelector:
   case DeclarationName::ObjCOneArgSelector:
   case DeclarationName::ObjCMultiArgSelector:
-    ID.AddInteger(serialization::ComputeHash(Selector(Key.Data)));
+    Data = (uint64_t)Name.getObjCSelector().getAsOpaquePtr();
     break;
   case DeclarationName::CXXOperatorName:
-    ID.AddInteger((OverloadedOperatorKind)Key.Data);
+    Data = Name.getCXXOverloadedOperator();
+    break;
+  case DeclarationName::CXXLiteralOperatorName:
+    Data = (uint64_t)Name.getCXXLiteralIdentifier();
+    break;
+  case DeclarationName::CXXConstructorName:
+  case DeclarationName::CXXDestructorName:
+  case DeclarationName::CXXConversionFunctionName:
+  case DeclarationName::CXXUsingDirective:
+    Data = 0;
+    break;
+  }
+}
+
+unsigned DeclarationNameKey::getHash() const {
+  llvm::FoldingSetNodeID ID;
+  ID.AddInteger(Kind);
+
+  switch (Kind) {
+  case DeclarationName::Identifier:
+  case DeclarationName::CXXLiteralOperatorName:
+    ID.AddString(((IdentifierInfo*)Data)->getName());
+    break;
+  case DeclarationName::ObjCZeroArgSelector:
+  case DeclarationName::ObjCOneArgSelector:
+  case DeclarationName::ObjCMultiArgSelector:
+    ID.AddInteger(serialization::ComputeHash(Selector(Data)));
+    break;
+  case DeclarationName::CXXOperatorName:
+    ID.AddInteger((OverloadedOperatorKind)Data);
     break;
   case DeclarationName::CXXConstructorName:
   case DeclarationName::CXXDestructorName:
@@ -872,140 +911,134 @@ ASTDeclContextNameLookupTrait::ComputeHash(const DeclNameKey &Key) {
   return ID.ComputeHash();
 }
 
-ASTDeclContextNameLookupTrait::internal_key_type 
-ASTDeclContextNameLookupTrait::GetInternalKey(
-                                          const external_key_type& Name) {
-  DeclNameKey Key;
-  Key.Kind = Name.getNameKind();
-  switch (Name.getNameKind()) {
-  case DeclarationName::Identifier:
-    Key.Data = (uint64_t)Name.getAsIdentifierInfo();
-    break;
-  case DeclarationName::ObjCZeroArgSelector:
-  case DeclarationName::ObjCOneArgSelector:
-  case DeclarationName::ObjCMultiArgSelector:
-    Key.Data = (uint64_t)Name.getObjCSelector().getAsOpaquePtr();
-    break;
-  case DeclarationName::CXXOperatorName:
-    Key.Data = Name.getCXXOverloadedOperator();
-    break;
-  case DeclarationName::CXXLiteralOperatorName:
-    Key.Data = (uint64_t)Name.getCXXLiteralIdentifier();
-    break;
-  case DeclarationName::CXXConstructorName:
-  case DeclarationName::CXXDestructorName:
-  case DeclarationName::CXXConversionFunctionName:
-  case DeclarationName::CXXUsingDirective:
-    Key.Data = 0;
-    break;
-  }
-
-  return Key;
+ModuleFile *
+ASTDeclContextNameLookupTrait::ReadFileRef(const unsigned char *&d) {
+  using namespace llvm::support;
+  uint32_t ModuleFileID = endian::readNext<uint32_t, little, unaligned>(d);
+  return Reader.getLocalModuleFile(F, ModuleFileID);
 }
 
 std::pair<unsigned, unsigned>
-ASTDeclContextNameLookupTrait::ReadKeyDataLength(const unsigned char*& d) {
+ASTDeclContextNameLookupTrait::ReadKeyDataLength(const unsigned char *&d) {
   using namespace llvm::support;
   unsigned KeyLen = endian::readNext<uint16_t, little, unaligned>(d);
   unsigned DataLen = endian::readNext<uint16_t, little, unaligned>(d);
   return std::make_pair(KeyLen, DataLen);
 }
 
-ASTDeclContextNameLookupTrait::internal_key_type 
-ASTDeclContextNameLookupTrait::ReadKey(const unsigned char* d, unsigned) {
+ASTDeclContextNameLookupTrait::internal_key_type
+ASTDeclContextNameLookupTrait::ReadKey(const unsigned char *d, unsigned) {
   using namespace llvm::support;
 
-  DeclNameKey Key;
-  Key.Kind = (DeclarationName::NameKind)*d++;
-  switch (Key.Kind) {
+  auto Kind = (DeclarationName::NameKind)*d++;
+  uint64_t Data;
+  switch (Kind) {
   case DeclarationName::Identifier:
-    Key.Data = (uint64_t)Reader.getLocalIdentifier(
+    Data = (uint64_t)Reader.getLocalIdentifier(
         F, endian::readNext<uint32_t, little, unaligned>(d));
     break;
   case DeclarationName::ObjCZeroArgSelector:
   case DeclarationName::ObjCOneArgSelector:
   case DeclarationName::ObjCMultiArgSelector:
-    Key.Data =
+    Data =
         (uint64_t)Reader.getLocalSelector(
                              F, endian::readNext<uint32_t, little, unaligned>(
                                     d)).getAsOpaquePtr();
     break;
   case DeclarationName::CXXOperatorName:
-    Key.Data = *d++; // OverloadedOperatorKind
+    Data = *d++; // OverloadedOperatorKind
     break;
   case DeclarationName::CXXLiteralOperatorName:
-    Key.Data = (uint64_t)Reader.getLocalIdentifier(
+    Data = (uint64_t)Reader.getLocalIdentifier(
         F, endian::readNext<uint32_t, little, unaligned>(d));
     break;
   case DeclarationName::CXXConstructorName:
   case DeclarationName::CXXDestructorName:
   case DeclarationName::CXXConversionFunctionName:
   case DeclarationName::CXXUsingDirective:
-    Key.Data = 0;
+    Data = 0;
     break;
   }
 
-  return Key;
+  return DeclarationNameKey(Kind, Data);
 }
 
-ASTDeclContextNameLookupTrait::data_type 
-ASTDeclContextNameLookupTrait::ReadData(internal_key_type, 
-                                        const unsigned char* d,
-                                        unsigned DataLen) {
+void ASTDeclContextNameLookupTrait::ReadDataInto(internal_key_type,
+                                                 const unsigned char *d,
+                                                 unsigned DataLen,
+                                                 data_type_builder &Val) {
   using namespace llvm::support;
-  unsigned NumDecls = endian::readNext<uint16_t, little, unaligned>(d);
-  LE32DeclID *Start = reinterpret_cast<LE32DeclID *>(
-                        const_cast<unsigned char *>(d));
-  return std::make_pair(Start, Start + NumDecls);
+  for (unsigned NumDecls = DataLen / 4; NumDecls; --NumDecls) {
+    uint32_t LocalID = endian::readNext<uint32_t, little, unaligned>(d);
+    Val.insert(Reader.getGlobalDeclID(F, LocalID));
+  }
 }
 
-bool ASTReader::ReadDeclContextStorage(ModuleFile &M,
-                                       BitstreamCursor &Cursor,
-                                   const std::pair<uint64_t, uint64_t> &Offsets,
-                                       DeclContextInfo &Info) {
+bool ASTReader::ReadLexicalDeclContextStorage(ModuleFile &M,
+                                              BitstreamCursor &Cursor,
+                                              uint64_t Offset,
+                                              DeclContext *DC) {
+  assert(Offset != 0);
+
   SavedStreamPosition SavedPosition(Cursor);
-  // First the lexical decls.
-  if (Offsets.first != 0) {
-    Cursor.JumpToBit(Offsets.first);
+  Cursor.JumpToBit(Offset);
 
-    RecordData Record;
-    StringRef Blob;
-    unsigned Code = Cursor.ReadCode();
-    unsigned RecCode = Cursor.readRecord(Code, Record, &Blob);
-    if (RecCode != DECL_CONTEXT_LEXICAL) {
-      Error("Expected lexical block");
-      return true;
-    }
-
-    Info.LexicalDecls = reinterpret_cast<const KindDeclIDPair*>(Blob.data());
-    Info.NumLexicalDecls = Blob.size() / sizeof(KindDeclIDPair);
+  RecordData Record;
+  StringRef Blob;
+  unsigned Code = Cursor.ReadCode();
+  unsigned RecCode = Cursor.readRecord(Code, Record, &Blob);
+  if (RecCode != DECL_CONTEXT_LEXICAL) {
+    Error("Expected lexical block");
+    return true;
   }
 
-  // Now the lookup table.
-  if (Offsets.second != 0) {
-    Cursor.JumpToBit(Offsets.second);
+  assert(!isa<TranslationUnitDecl>(DC) &&
+         "expected a TU_UPDATE_LEXICAL record for TU");
+  // If we are handling a C++ class template instantiation, we can see multiple
+  // lexical updates for the same record. It's important that we select only one
+  // of them, so that field numbering works properly. Just pick the first one we
+  // see.
+  auto &Lex = LexicalDecls[DC];
+  if (!Lex.first) {
+    Lex = std::make_pair(
+        &M, llvm::makeArrayRef(
+                reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
+                    Blob.data()),
+                Blob.size() / 4));
+  }
+  DC->setHasExternalLexicalStorage(true);
+  return false;
+}
 
-    RecordData Record;
-    StringRef Blob;
-    unsigned Code = Cursor.ReadCode();
-    unsigned RecCode = Cursor.readRecord(Code, Record, &Blob);
-    if (RecCode != DECL_CONTEXT_VISIBLE) {
-      Error("Expected visible lookup table block");
-      return true;
-    }
-    Info.NameLookupTableData = ASTDeclContextNameLookupTable::Create(
-        (const unsigned char *)Blob.data() + Record[0],
-        (const unsigned char *)Blob.data() + sizeof(uint32_t),
-        (const unsigned char *)Blob.data(),
-        ASTDeclContextNameLookupTrait(*this, M));
+bool ASTReader::ReadVisibleDeclContextStorage(ModuleFile &M,
+                                              BitstreamCursor &Cursor,
+                                              uint64_t Offset,
+                                              DeclID ID) {
+  assert(Offset != 0);
+
+  SavedStreamPosition SavedPosition(Cursor);
+  Cursor.JumpToBit(Offset);
+
+  RecordData Record;
+  StringRef Blob;
+  unsigned Code = Cursor.ReadCode();
+  unsigned RecCode = Cursor.readRecord(Code, Record, &Blob);
+  if (RecCode != DECL_CONTEXT_VISIBLE) {
+    Error("Expected visible lookup table block");
+    return true;
   }
 
+  // We can't safely determine the primary context yet, so delay attaching the
+  // lookup table until we're done with recursive deserialization.
+  auto *Data = (const unsigned char*)Blob.data();
+  PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&M, Data});
   return false;
 }
 
 void ASTReader::Error(StringRef Msg) {
   Error(diag::err_fe_pch_malformed, Msg);
-  if (Context.getLangOpts().Modules && !Diags.isDiagnosticInFlight()) {
+  if (Context.getLangOpts().Modules && !Diags.isDiagnosticInFlight() &&
+      !PP.getHeaderSearchInfo().getModuleCachePath().empty()) {
     Diag(diag::note_module_cache_path)
       << PP.getHeaderSearchInfo().getModuleCachePath();
   }
@@ -1032,11 +1065,12 @@ bool ASTReader::ParseLineTable(ModuleFile &F,
 
   // Parse the file names
   std::map<int, int> FileIDs;
-  for (int I = 0, N = Record[Idx++]; I != N; ++I) {
+  for (unsigned I = 0; Record[Idx]; ++I) {
     // Extract the file name
     auto Filename = ReadPath(F, Record, Idx);
     FileIDs[I] = LineTable.getLineTableFilenameID(Filename);
   }
+  ++Idx;
 
   // Parse the line entries
   std::vector<LineEntry> Entries;
@@ -1048,7 +1082,7 @@ bool ASTReader::ParseLineTable(ModuleFile &F,
 
     // Extract the line entries
     unsigned NumEntries = Record[Idx++];
-    assert(NumEntries && "Numentries is 00000");
+    assert(NumEntries && "no line entries for file ID");
     Entries.clear();
     Entries.reserve(NumEntries);
     for (unsigned I = 0; I != NumEntries; ++I) {
@@ -1225,7 +1259,8 @@ bool ASTReader::ReadSLocEntry(int ID) {
       = SourceMgr.getOrCreateContentCache(File,
                               /*isSystemFile=*/FileCharacter != SrcMgr::C_User);
     if (OverriddenBuffer && !ContentCache->BufferOverridden &&
-        ContentCache->ContentsEntry == ContentCache->OrigEntry) {
+        ContentCache->ContentsEntry == ContentCache->OrigEntry &&
+        !ContentCache->getRawBuffer()) {
       unsigned Code = SLocEntryCursor.ReadCode();
       Record.clear();
       unsigned RecCode = SLocEntryCursor.readRecord(Code, Record, &Blob);
@@ -1313,7 +1348,7 @@ SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
   // location of its includer.
   if (F->ImportedBy.empty() || !F->ImportedBy[0]) {
     // Main file is the importer.
-    assert(!SourceMgr.getMainFileID().isInvalid() && "missing main file");
+    assert(SourceMgr.getMainFileID().isValid() && "missing main file");
     return SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
   }
   return F->ImportedBy[0]->FirstLoc;
@@ -1323,10 +1358,8 @@ SourceLocation ASTReader::getImportLocation(ModuleFile *F) {
 /// specified cursor.  Read the abbreviations that are at the top of the block
 /// and then leave the cursor pointing into the block.
 bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID) {
-  if (Cursor.EnterSubBlock(BlockID)) {
-    Error("malformed block record in AST file");
-    return Failure;
-  }
+  if (Cursor.EnterSubBlock(BlockID))
+    return true;
 
   while (true) {
     uint64_t Offset = Cursor.GetCurrentBitNo();
@@ -1425,8 +1458,7 @@ MacroInfo *ASTReader::ReadMacroRecord(ModuleFile &F, uint64_t Offset) {
         if (isC99VarArgs) MI->setIsC99Varargs();
         if (isGNUVarArgs) MI->setIsGNUVarargs();
         if (hasCommaPasting) MI->setHasCommaPasting();
-        MI->setArgumentList(MacroArgs.data(), MacroArgs.size(),
-                            PP.getPreprocessorAllocator());
+        MI->setArgumentList(MacroArgs, PP.getPreprocessorAllocator());
       }
 
       // Remember that we saw this macro last so that we add the tokens that
@@ -1481,13 +1513,14 @@ unsigned HeaderFileInfoTrait::ComputeHash(internal_key_ref ikey) {
 
 HeaderFileInfoTrait::internal_key_type 
 HeaderFileInfoTrait::GetInternalKey(const FileEntry *FE) {
-  internal_key_type ikey = { FE->getSize(), FE->getModificationTime(),
-                             FE->getName(), /*Imported*/false };
+  internal_key_type ikey = {FE->getSize(),
+                            M.HasTimestamps ? FE->getModificationTime() : 0,
+                            FE->getName(), /*Imported*/ false};
   return ikey;
 }
     
 bool HeaderFileInfoTrait::EqualKey(internal_key_ref a, internal_key_ref b) {
-  if (a.Size != b.Size || a.ModTime != b.ModTime)
+  if (a.Size != b.Size || (a.ModTime && b.ModTime && a.ModTime != b.ModTime))
     return false;
 
   if (llvm::sys::path::is_absolute(a.Filename) &&
@@ -1536,14 +1569,15 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   using namespace llvm::support;
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
-  HFI.HeaderRole = static_cast<ModuleMap::ModuleHeaderRole>
-                   ((Flags >> 6) & 0x03);
-  HFI.isImport = (Flags >> 5) & 0x01;
-  HFI.isPragmaOnce = (Flags >> 4) & 0x01;
-  HFI.DirInfo = (Flags >> 2) & 0x03;
-  HFI.Resolved = (Flags >> 1) & 0x01;
+  // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
+  HFI.isImport |= (Flags >> 4) & 0x01;
+  HFI.isPragmaOnce |= (Flags >> 3) & 0x01;
+  HFI.DirInfo = (Flags >> 1) & 0x03;
   HFI.IndexHeaderMapHeader = Flags & 0x01;
-  HFI.NumIncludes = endian::readNext<uint16_t, little, unaligned>(d);
+  // FIXME: Find a better way to handle this. Maybe just store a
+  // "has been included" flag?
+  HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
+                             HFI.NumIncludes);
   HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
       M, endian::readNext<uint32_t, little, unaligned>(d));
   if (unsigned FrameworkOffset =
@@ -1553,34 +1587,36 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
     StringRef FrameworkName(FrameworkStrings + FrameworkOffset - 1);
     HFI.Framework = HS->getUniqueFrameworkName(FrameworkName);
   }
-  
-  if (d != End) {
+
+  assert((End - d) % 4 == 0 &&
+         "Wrong data length in HeaderFileInfo deserialization");
+  while (d != End) {
     uint32_t LocalSMID = endian::readNext<uint32_t, little, unaligned>(d);
-    if (LocalSMID) {
-      // This header is part of a module. Associate it with the module to enable
-      // implicit module import.
-      SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
-      Module *Mod = Reader.getSubmodule(GlobalSMID);
-      HFI.isModuleHeader = true;
-      FileManager &FileMgr = Reader.getFileManager();
-      ModuleMap &ModMap =
-          Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
-      // FIXME: This information should be propagated through the
-      // SUBMODULE_HEADER etc records rather than from here.
-      // FIXME: We don't ever mark excluded headers.
-      std::string Filename = key.Filename;
-      if (key.Imported)
-        Reader.ResolveImportedPath(M, Filename);
-      Module::Header H = { key.Filename, FileMgr.getFile(Filename) };
-      ModMap.addHeader(Mod, H, HFI.getHeaderRole());
-    }
+    auto HeaderRole = static_cast<ModuleMap::ModuleHeaderRole>(LocalSMID & 3);
+    LocalSMID >>= 2;
+
+    // This header is part of a module. Associate it with the module to enable
+    // implicit module import.
+    SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
+    Module *Mod = Reader.getSubmodule(GlobalSMID);
+    FileManager &FileMgr = Reader.getFileManager();
+    ModuleMap &ModMap =
+        Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
+
+    std::string Filename = key.Filename;
+    if (key.Imported)
+      Reader.ResolveImportedPath(M, Filename);
+    // FIXME: This is not always the right filename-as-written, but we're not
+    // going to use this information to rebuild the module, so it doesn't make
+    // a lot of difference.
+    Module::Header H = { key.Filename, FileMgr.getFile(Filename) };
+    ModMap.addHeader(Mod, H, HeaderRole, /*Imported*/true);
+    HFI.isModuleHeader |= !(HeaderRole & ModuleMap::TextualHeader);
   }
 
-  assert(End == d && "Wrong data length in HeaderFileInfo deserialization");
-  (void)End;
-        
   // This HeaderFileInfo was externally loaded.
   HFI.External = true;
+  HFI.IsValid = true;
   return HFI;
 }
 
@@ -1595,16 +1631,15 @@ void ASTReader::ReadDefinedMacros() {
   // Note that we are loading defined macros.
   Deserializing Macros(this);
 
-  for (ModuleReverseIterator I = ModuleMgr.rbegin(),
-      E = ModuleMgr.rend(); I != E; ++I) {
-    BitstreamCursor &MacroCursor = (*I)->MacroCursor;
+  for (auto &I : llvm::reverse(ModuleMgr)) {
+    BitstreamCursor &MacroCursor = I->MacroCursor;
 
     // If there was no preprocessor block, skip this file.
     if (!MacroCursor.getBitStreamReader())
       continue;
 
     BitstreamCursor Cursor = MacroCursor;
-    Cursor.JumpToBit((*I)->MacroStartOffset);
+    Cursor.JumpToBit(I->MacroStartOffset);
 
     RecordData Record;
     while (true) {
@@ -1626,7 +1661,7 @@ void ASTReader::ReadDefinedMacros() {
           
         case PP_MACRO_OBJECT_LIKE:
         case PP_MACRO_FUNCTION_LIKE:
-          getLocalIdentifier(**I, Record[0]);
+          getLocalIdentifier(*I, Record[0]);
           break;
           
         case PP_TOKEN:
@@ -1661,33 +1696,30 @@ namespace {
         Found()
     {
     }
-    
-    static bool visit(ModuleFile &M, void *UserData) {
-      IdentifierLookupVisitor *This
-        = static_cast<IdentifierLookupVisitor *>(UserData);
-      
+
+    bool operator()(ModuleFile &M) {
       // If we've already searched this module file, skip it now.
-      if (M.Generation <= This->PriorGeneration)
+      if (M.Generation <= PriorGeneration)
         return true;
 
       ASTIdentifierLookupTable *IdTable
         = (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
       if (!IdTable)
         return false;
-      
-      ASTIdentifierLookupTrait Trait(IdTable->getInfoObj().getReader(),
-                                     M, This->Found);
-      ++This->NumIdentifierLookups;
+
+      ASTIdentifierLookupTrait Trait(IdTable->getInfoObj().getReader(), M,
+                                     Found);
+      ++NumIdentifierLookups;
       ASTIdentifierLookupTable::iterator Pos =
-          IdTable->find_hashed(This->Name, This->NameHash, &Trait);
+          IdTable->find_hashed(Name, NameHash, &Trait);
       if (Pos == IdTable->end())
         return false;
       
       // Dereferencing the iterator has the effect of building the
       // IdentifierInfo node and populating it with the various
       // declarations it needs.
-      ++This->NumIdentifierLookupHits;
-      This->Found = *Pos;
+      ++NumIdentifierLookupHits;
+      Found = *Pos;
       return true;
     }
     
@@ -1718,7 +1750,7 @@ void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) {
   IdentifierLookupVisitor Visitor(II.getName(), PriorGeneration,
                                   NumIdentifierLookups,
                                   NumIdentifierLookupHits);
-  ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+  ModuleMgr.visit(Visitor, HitsPtr);
   markIdentifierUpToDate(&II);
 }
 
@@ -1859,26 +1891,17 @@ ASTReader::readInputFileInfo(ModuleFile &F, unsigned ID) {
          "invalid record type for input file");
   (void)Result;
 
-  std::string Filename;
-  off_t StoredSize;
-  time_t StoredTime;
-  bool Overridden;
-
   assert(Record[0] == ID && "Bogus stored ID or offset");
-  StoredSize = static_cast<off_t>(Record[1]);
-  StoredTime = static_cast<time_t>(Record[2]);
-  Overridden = static_cast<bool>(Record[3]);
-  Filename = Blob;
-  ResolveImportedPath(F, Filename);
-
-  InputFileInfo R = { std::move(Filename), StoredSize, StoredTime, Overridden };
+  InputFileInfo R;
+  R.StoredSize = static_cast<off_t>(Record[1]);
+  R.StoredTime = static_cast<time_t>(Record[2]);
+  R.Overridden = static_cast<bool>(Record[3]);
+  R.Transient = static_cast<bool>(Record[4]);
+  R.Filename = Blob;
+  ResolveImportedPath(F, R.Filename);
   return R;
 }
 
-std::string ASTReader::getInputFileName(ModuleFile &F, unsigned int ID) {
-  return readInputFileInfo(F, ID).Filename;
-}
-
 InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
   // If this ID is bogus, just return an empty input file.
   if (ID == 0 || ID > F.InputFilesLoaded.size())
@@ -1900,11 +1923,10 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
   off_t StoredSize = FI.StoredSize;
   time_t StoredTime = FI.StoredTime;
   bool Overridden = FI.Overridden;
+  bool Transient = FI.Transient;
   StringRef Filename = FI.Filename;
 
-  const FileEntry *File
-    = Overridden? FileMgr.getVirtualFile(Filename, StoredSize, StoredTime)
-                : FileMgr.getFile(Filename, /*OpenFile=*/false);
+  const FileEntry *File = FileMgr.getFile(Filename, /*OpenFile=*/false);
 
   // If we didn't find the file, resolve it relative to the
   // original directory from which this AST file was created.
@@ -1919,15 +1941,16 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
 
   // For an overridden file, create a virtual file with the stored
   // size/timestamp.
-  if (Overridden && File == nullptr) {
+  if ((Overridden || Transient) && File == nullptr)
     File = FileMgr.getVirtualFile(Filename, StoredSize, StoredTime);
-  }
 
   if (File == nullptr) {
     if (Complain) {
       std::string ErrorStr = "could not find file '";
       ErrorStr += Filename;
-      ErrorStr += "' referenced by AST file";
+      ErrorStr += "' referenced by AST file '";
+      ErrorStr += F.FileName;
+      ErrorStr += "'";
       Error(ErrorStr.c_str());
     }
     // Record that we didn't find the file.
@@ -1940,11 +1963,17 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
   // can lead to problems when lexing using the source locations from the
   // PCH.
   SourceManager &SM = getSourceManager();
-  if (!Overridden && SM.isFileOverridden(File)) {
+  // FIXME: Reject if the overrides are different.
+  if ((!Overridden && !Transient) && SM.isFileOverridden(File)) {
     if (Complain)
       Error(diag::err_fe_pch_file_overridden, Filename);
     // After emitting the diagnostic, recover by disabling the override so
     // that the original file will be used.
+    //
+    // FIXME: This recovery is just as broken as the original state; there may
+    // be another precompiled module that's using the overridden contents, or
+    // we might be half way through parsing it. Instead, we should treat the
+    // overridden contents as belonging to a separate FileEntry.
     SM.disableFileContentsOverride(File);
     // The FileEntry is a virtual file entry with the size of the contents
     // that would override the original contents. Set it to the original's
@@ -1965,14 +1994,9 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
        // have inconsistent modification times that sometimes
        // erroneously trigger this error-handling path.
        //
-       // This also happens in networked file systems, so disable this
-       // check if validation is disabled or if we have an explicitly
-       // built PCM file.
-       //
-       // FIXME: Should we also do this for PCH files? They could also
-       // reasonably get shared across a network during a distributed build.
-       (StoredTime != File->getModificationTime() && !DisableValidation &&
-        F.Kind != MK_ExplicitModule)
+       // FIXME: This probably also breaks HeaderFileInfo lookups on Windows.
+       (StoredTime && StoredTime != File->getModificationTime() &&
+        !DisableValidation)
 #endif
        )) {
     if (Complain) {
@@ -2000,8 +2024,10 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
 
     IsOutOfDate = true;
   }
+  // FIXME: If the file is overridden and we've already opened it,
+  // issue an error (or split it into a separate FileEntry).
 
-  InputFile IF = InputFile(File, Overridden, IsOutOfDate);
+  InputFile IF = InputFile(File, Overridden || Transient, IsOutOfDate);
 
   // Note that we've loaded this input file.
   F.InputFilesLoaded[ID-1] = IF;
@@ -2026,24 +2052,114 @@ void ASTReader::ResolveImportedPath(std::string &Filename, StringRef Prefix) {
   Filename.assign(Buffer.begin(), Buffer.end());
 }
 
+static bool isDiagnosedResult(ASTReader::ASTReadResult ARR, unsigned Caps) {
+  switch (ARR) {
+  case ASTReader::Failure: return true;
+  case ASTReader::Missing: return !(Caps & ASTReader::ARR_Missing);
+  case ASTReader::OutOfDate: return !(Caps & ASTReader::ARR_OutOfDate);
+  case ASTReader::VersionMismatch: return !(Caps & ASTReader::ARR_VersionMismatch);
+  case ASTReader::ConfigurationMismatch:
+    return !(Caps & ASTReader::ARR_ConfigurationMismatch);
+  case ASTReader::HadErrors: return true;
+  case ASTReader::Success: return false;
+  }
+
+  llvm_unreachable("unknown ASTReadResult");
+}
+
+ASTReader::ASTReadResult ASTReader::ReadOptionsBlock(
+    BitstreamCursor &Stream, unsigned ClientLoadCapabilities,
+    bool AllowCompatibleConfigurationMismatch, ASTReaderListener &Listener,
+    std::string &SuggestedPredefines) {
+  if (Stream.EnterSubBlock(OPTIONS_BLOCK_ID))
+    return Failure;
+
+  // Read all of the records in the options block.
+  RecordData Record;
+  ASTReadResult Result = Success;
+  while (1) {
+    llvm::BitstreamEntry Entry = Stream.advance();
+    
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::Error:
+    case llvm::BitstreamEntry::SubBlock:
+      return Failure;
+
+    case llvm::BitstreamEntry::EndBlock:
+      return Result;
+
+    case llvm::BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read and process a record.
+    Record.clear();
+    switch ((OptionsRecordTypes)Stream.readRecord(Entry.ID, Record)) {
+    case LANGUAGE_OPTIONS: {
+      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
+      if (ParseLanguageOptions(Record, Complain, Listener,
+                               AllowCompatibleConfigurationMismatch))
+        Result = ConfigurationMismatch;
+      break;
+    }
+
+    case TARGET_OPTIONS: {
+      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
+      if (ParseTargetOptions(Record, Complain, Listener,
+                             AllowCompatibleConfigurationMismatch))
+        Result = ConfigurationMismatch;
+      break;
+    }
+
+    case DIAGNOSTIC_OPTIONS: {
+      bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
+      if (!AllowCompatibleConfigurationMismatch &&
+          ParseDiagnosticOptions(Record, Complain, Listener))
+        return OutOfDate;
+      break;
+    }
+
+    case FILE_SYSTEM_OPTIONS: {
+      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
+      if (!AllowCompatibleConfigurationMismatch &&
+          ParseFileSystemOptions(Record, Complain, Listener))
+        Result = ConfigurationMismatch;
+      break;
+    }
+
+    case HEADER_SEARCH_OPTIONS: {
+      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
+      if (!AllowCompatibleConfigurationMismatch &&
+          ParseHeaderSearchOptions(Record, Complain, Listener))
+        Result = ConfigurationMismatch;
+      break;
+    }
+
+    case PREPROCESSOR_OPTIONS:
+      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
+      if (!AllowCompatibleConfigurationMismatch &&
+          ParsePreprocessorOptions(Record, Complain, Listener,
+                                   SuggestedPredefines))
+        Result = ConfigurationMismatch;
+      break;
+    }
+  }
+}
+
 ASTReader::ASTReadResult
 ASTReader::ReadControlBlock(ModuleFile &F,
                             SmallVectorImpl<ImportedModule> &Loaded,
                             const ModuleFile *ImportedBy,
                             unsigned ClientLoadCapabilities) {
   BitstreamCursor &Stream = F.Stream;
+  ASTReadResult Result = Success;
 
   if (Stream.EnterSubBlock(CONTROL_BLOCK_ID)) {
     Error("malformed block record in AST file");
     return Failure;
   }
 
-  // Should we allow the configuration of the module file to differ from the
-  // configuration of the current translation unit in a compatible way?
-  //
-  // FIXME: Allow this for files explicitly specified with -include-pch too.
-  bool AllowCompatibleConfigurationMismatch = F.Kind == MK_ExplicitModule;
-
   // Read all of the records and blocks in the control block.
   RecordData Record;
   unsigned NumInputs = 0;
@@ -2061,8 +2177,9 @@ ASTReader::ReadControlBlock(ModuleFile &F,
           PP.getHeaderSearchInfo().getHeaderSearchOpts();
 
       // All user input files reside at the index range [0, NumUserInputs), and
-      // system input files reside at [NumUserInputs, NumInputs).
-      if (!DisableValidation) {
+      // system input files reside at [NumUserInputs, NumInputs). For explicitly
+      // loaded module files, ignore missing inputs.
+      if (!DisableValidation && F.Kind != MK_ExplicitModule) {
         bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0;
 
         // If we are reading a module, we will create a verification timestamp,
@@ -2084,7 +2201,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
       }
 
       if (Listener)
-        Listener->visitModuleFile(F.FileName);
+        Listener->visitModuleFile(F.FileName, F.Kind);
 
       if (Listener && Listener->needsInputFileVisitation()) {
         unsigned N = Listener->needsSystemInputFileVisitation() ? NumInputs
@@ -2092,11 +2209,12 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         for (unsigned I = 0; I < N; ++I) {
           bool IsSystem = I >= NumUserInputs;
           InputFileInfo FI = readInputFileInfo(F, I+1);
-          Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden);
+          Listener->visitInputFile(FI.Filename, IsSystem, FI.Overridden,
+                                   F.Kind == MK_ExplicitModule);
         }
       }
 
-      return Success;
+      return Result;
     }
 
     case llvm::BitstreamEntry::SubBlock:
@@ -2110,6 +2228,41 @@ ASTReader::ReadControlBlock(ModuleFile &F,
           return Failure;
         }
         continue;
+
+      case OPTIONS_BLOCK_ID:
+        // If we're reading the first module for this group, check its options
+        // are compatible with ours. For modules it imports, no further checking
+        // is required, because we checked them when we built it.
+        if (Listener && !ImportedBy) {
+          // Should we allow the configuration of the module file to differ from
+          // the configuration of the current translation unit in a compatible
+          // way?
+          //
+          // FIXME: Allow this for files explicitly specified with -include-pch.
+          bool AllowCompatibleConfigurationMismatch =
+              F.Kind == MK_ExplicitModule;
+
+          Result = ReadOptionsBlock(Stream, ClientLoadCapabilities,
+                                    AllowCompatibleConfigurationMismatch,
+                                    *Listener, SuggestedPredefines);
+          if (Result == Failure) {
+            Error("malformed block record in AST file");
+            return Result;
+          }
+
+          if (DisableValidation ||
+              (AllowConfigurationMismatch && Result == ConfigurationMismatch))
+            Result = Success;
+
+          // If we've diagnosed a problem, we're done.
+          if (Result != Success &&
+              isDiagnosedResult(Result, ClientLoadCapabilities))
+            return Result;
+        } else if (Stream.SkipBlock()) {
+          Error("malformed block record in AST file");
+          return Failure;
+        }
+        continue;
           
       default:
         if (Stream.SkipBlock()) {
@@ -2136,7 +2289,7 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         return VersionMismatch;
       }
 
-      bool hasErrors = Record[5];
+      bool hasErrors = Record[6];
       if (hasErrors && !DisableValidation && !AllowASTWithCompilerErrors) {
         Diag(diag::err_pch_with_compiler_errors);
         return HadErrors;
@@ -2147,6 +2300,8 @@ ASTReader::ReadControlBlock(ModuleFile &F,
       if (F.RelocatablePCH)
         F.BaseDirectory = isysroot.empty() ? "/" : isysroot;
 
+      F.HasTimestamps = Record[5];
+
       const std::string &CurBranch = getClangFullRepositoryVersion();
       StringRef ASTBranch = Blob;
       if (StringRef(CurBranch) != ASTBranch && !DisableValidation) {
@@ -2178,10 +2333,23 @@ ASTReader::ReadControlBlock(ModuleFile &F,
         ASTFileSignature StoredSignature = Record[Idx++];
         auto ImportedFile = ReadPath(F, Record, Idx);
 
+        // If our client can't cope with us being out of date, we can't cope with
+        // our dependency being missing.
+        unsigned Capabilities = ClientLoadCapabilities;
+        if ((ClientLoadCapabilities & ARR_OutOfDate) == 0)
+          Capabilities &= ~ARR_Missing;
+
         // Load the AST file.
-        switch(ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F, Loaded,
-                           StoredSize, StoredModTime, StoredSignature,
-                           ClientLoadCapabilities)) {
+        auto Result = ReadASTCore(ImportedFile, ImportedKind, ImportLoc, &F,
+                                  Loaded, StoredSize, StoredModTime,
+                                  StoredSignature, Capabilities);
+
+        // If we diagnosed a problem, produce a backtrace.
+        if (isDiagnosedResult(Result, Capabilities))
+          Diag(diag::note_module_file_imported_by)
+              << F.FileName << !F.ModuleName.empty() << F.ModuleName;
+
+        switch (Result) {
         case Failure: return Failure;
           // If we have to ignore the dependency, we'll have to ignore this too.
         case Missing:
@@ -2195,71 +2363,6 @@ ASTReader::ReadControlBlock(ModuleFile &F,
       break;
     }
 
-    case KNOWN_MODULE_FILES:
-      break;
-
-    case LANGUAGE_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch) == 0;
-      // FIXME: The &F == *ModuleMgr.begin() check is wrong for modules.
-      if (Listener && &F == *ModuleMgr.begin() &&
-          ParseLanguageOptions(Record, Complain, *Listener,
-                               AllowCompatibleConfigurationMismatch) &&
-          !DisableValidation && !AllowConfigurationMismatch)
-        return ConfigurationMismatch;
-      break;
-    }
-
-    case TARGET_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch)==0;
-      if (Listener && &F == *ModuleMgr.begin() &&
-          ParseTargetOptions(Record, Complain, *Listener,
-                             AllowCompatibleConfigurationMismatch) &&
-          !DisableValidation && !AllowConfigurationMismatch)
-        return ConfigurationMismatch;
-      break;
-    }
-
-    case DIAGNOSTIC_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_OutOfDate)==0;
-      if (Listener && &F == *ModuleMgr.begin() &&
-          !AllowCompatibleConfigurationMismatch &&
-          ParseDiagnosticOptions(Record, Complain, *Listener) &&
-          !DisableValidation)
-        return OutOfDate;
-      break;
-    }
-
-    case FILE_SYSTEM_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch)==0;
-      if (Listener && &F == *ModuleMgr.begin() &&
-          !AllowCompatibleConfigurationMismatch &&
-          ParseFileSystemOptions(Record, Complain, *Listener) &&
-          !DisableValidation && !AllowConfigurationMismatch)
-        return ConfigurationMismatch;
-      break;
-    }
-
-    case HEADER_SEARCH_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch)==0;
-      if (Listener && &F == *ModuleMgr.begin() &&
-          !AllowCompatibleConfigurationMismatch &&
-          ParseHeaderSearchOptions(Record, Complain, *Listener) &&
-          !DisableValidation && !AllowConfigurationMismatch)
-        return ConfigurationMismatch;
-      break;
-    }
-
-    case PREPROCESSOR_OPTIONS: {
-      bool Complain = (ClientLoadCapabilities & ARR_ConfigurationMismatch)==0;
-      if (Listener && &F == *ModuleMgr.begin() &&
-          !AllowCompatibleConfigurationMismatch &&
-          ParsePreprocessorOptions(Record, Complain, *Listener,
-                                   SuggestedPredefines) &&
-          !DisableValidation && !AllowConfigurationMismatch)
-        return ConfigurationMismatch;
-      break;
-    }
-
     case ORIGINAL_FILE:
       F.OriginalSourceFileID = FileID::get(Record[0]);
       F.ActualOriginalSourceFileName = Blob;
@@ -2499,10 +2602,11 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         
     case TU_UPDATE_LEXICAL: {
       DeclContext *TU = Context.getTranslationUnitDecl();
-      DeclContextInfo &Info = F.DeclContextInfos[TU];
-      Info.LexicalDecls = reinterpret_cast<const KindDeclIDPair *>(Blob.data());
-      Info.NumLexicalDecls 
-        = static_cast<unsigned int>(Blob.size() / sizeof(KindDeclIDPair));
+      LexicalContents Contents(
+          reinterpret_cast<const llvm::support::unaligned_uint32_t *>(
+              Blob.data()),
+          static_cast<unsigned int>(Blob.size() / 4));
+      TULexicalDecls.push_back(std::make_pair(&F, Contents));
       TU->setHasExternalLexicalStorage(true);
       break;
     }
@@ -2510,20 +2614,12 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
     case UPDATE_VISIBLE: {
       unsigned Idx = 0;
       serialization::DeclID ID = ReadDeclID(F, Record, Idx);
-      ASTDeclContextNameLookupTable *Table =
-          ASTDeclContextNameLookupTable::Create(
-              (const unsigned char *)Blob.data() + Record[Idx++],
-              (const unsigned char *)Blob.data() + sizeof(uint32_t),
-              (const unsigned char *)Blob.data(),
-              ASTDeclContextNameLookupTrait(*this, F));
-      if (Decl *D = GetExistingDecl(ID)) {
-        auto *DC = cast<DeclContext>(D);
-        DC->getPrimaryContext()->setHasExternalVisibleStorage(true);
-        auto *&LookupTable = F.DeclContextInfos[DC].NameLookupTableData;
-        delete LookupTable;
-        LookupTable = Table;
-      } else
-        PendingVisibleUpdates[ID].push_back(std::make_pair(Table, &F));
+      auto *Data = (const unsigned char*)Blob.data();
+      PendingVisibleUpdates[ID].push_back(PendingVisibleUpdate{&F, Data});
+      // If we've already loaded the decl, perform the updates when we finish
+      // loading this block.
+      if (Decl *D = GetExistingDecl(ID))
+        PendingUpdateRecords.push_back(std::make_pair(ID, D));
       break;
     }
 
@@ -2568,6 +2664,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       break;
     }
 
+    case INTERESTING_IDENTIFIERS:
+      F.PreloadIdentifierOffsets.assign(Record.begin(), Record.end());
+      break;
+
     case EAGERLY_DESERIALIZED_DECLS:
       // FIXME: Skip reading this record if our ASTConsumer doesn't care
       // about "interesting" decls (for instance, if we're building a module).
@@ -2696,6 +2796,10 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
+      if (!F.SLocEntryBaseID) {
+        Error("ran out of source locations");
+        break;
+      }
       // Make our entry in the range map. BaseID is negative and growing, so
       // we invert it. Because we invert it, though, we need the other end of
       // the range.
@@ -2744,7 +2848,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       RemapBuilder DeclRemap(F.DeclRemap);
       RemapBuilder TypeRemap(F.TypeRemap);
 
-      while(Data < DataEnd) {
+      while (Data < DataEnd) {
         using namespace llvm::support;
         uint16_t Len = endian::readNext<uint16_t, little, unaligned>(Data);
         StringRef Name = StringRef((const char*)Data, Len);
@@ -3060,22 +3164,6 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       break;
     }
 
-    case LOCAL_REDECLARATIONS: {
-      F.RedeclarationChains.swap(Record);
-      break;
-    }
-        
-    case LOCAL_REDECLARATIONS_MAP: {
-      if (F.LocalNumRedeclarationsInMap != 0) {
-        Error("duplicate LOCAL_REDECLARATIONS_MAP record in AST file");
-        return Failure;
-      }
-      
-      F.LocalNumRedeclarationsInMap = Record[0];
-      F.RedeclarationsMap = (const LocalRedeclarationsInfo *)Blob.data();
-      break;
-    }
-        
     case MACRO_OFFSET: {
       if (F.LocalNumMacros != 0) {
         Error("duplicate MACRO_OFFSET record in AST file");
@@ -3150,11 +3238,18 @@ ASTReader::ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
     const FileEntry *ModMap = M ? Map.getModuleMapFileForUniquing(M) : nullptr;
     if (!ModMap) {
       assert(ImportedBy && "top-level import should be verified");
-      if ((ClientLoadCapabilities & ARR_Missing) == 0)
-        Diag(diag::err_imported_module_not_found) << F.ModuleName << F.FileName
-                                                  << ImportedBy->FileName
-                                                  << F.ModuleMapPath;
-      return Missing;
+      if ((ClientLoadCapabilities & ARR_OutOfDate) == 0) {
+        if (auto *ASTFE = M ? M->getASTFile() : nullptr)
+          // This module was defined by an imported (explicit) module.
+          Diag(diag::err_module_file_conflict) << F.ModuleName << F.FileName
+                                               << ASTFE->getName();
+        else
+          // This module was built with a different module map.
+          Diag(diag::err_imported_module_not_found)
+              << F.ModuleName << F.FileName << ImportedBy->FileName
+              << F.ModuleMapPath;
+      }
+      return OutOfDate;
     }
 
     assert(M->Name == F.ModuleName && "found module with different name");
@@ -3342,6 +3437,36 @@ static void updateModuleTimestamp(ModuleFile &MF) {
   OS << "Timestamp file\n";
 }
 
+/// \brief Given a cursor at the start of an AST file, scan ahead and drop the
+/// cursor into the start of the given block ID, returning false on success and
+/// true on failure.
+static bool SkipCursorToBlock(BitstreamCursor &Cursor, unsigned BlockID) {
+  while (1) {
+    llvm::BitstreamEntry Entry = Cursor.advance();
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::Error:
+    case llvm::BitstreamEntry::EndBlock:
+      return true;
+
+    case llvm::BitstreamEntry::Record:
+      // Ignore top-level records.
+      Cursor.skipRecord(Entry.ID);
+      break;
+
+    case llvm::BitstreamEntry::SubBlock:
+      if (Entry.ID == BlockID) {
+        if (Cursor.EnterSubBlock(BlockID))
+          return true;
+        // Found it!
+        return false;
+      }
+
+      if (Cursor.SkipBlock())
+        return true;
+    }
+  }
+}
+
 ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
                                             ModuleKind Type,
                                             SourceLocation ImportLoc,
@@ -3399,6 +3524,12 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
     if (ASTReadResult Result = ReadASTBlock(F, ClientLoadCapabilities))
       return Result;
 
+    // Read the extension blocks.
+    while (!SkipCursorToBlock(F.Stream, EXTENSION_BLOCK_ID)) {
+      if (ASTReadResult Result = ReadExtensionBlock(F))
+        return Result;
+    }
+
     // Once read, set the ModuleFile bit base offset and update the size in 
     // bits of all files we've seen.
     F.GlobalBitOffset = TotalModulesSizeInBits;
@@ -3414,6 +3545,32 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
       // SourceManager.
       SourceMgr.getLoadedSLocEntryByID(Index);
     }
+
+    // Preload all the pending interesting identifiers by marking them out of
+    // date.
+    for (auto Offset : F.PreloadIdentifierOffsets) {
+      const unsigned char *Data = reinterpret_cast<const unsigned char *>(
+          F.IdentifierTableData + Offset);
+
+      ASTIdentifierLookupTrait Trait(*this, F);
+      auto KeyDataLen = Trait.ReadKeyDataLength(Data);
+      auto Key = Trait.ReadKey(Data, KeyDataLen.first);
+      auto &II = PP.getIdentifierTable().getOwn(Key);
+      II.setOutOfDate(true);
+
+      // Mark this identifier as being from an AST file so that we can track
+      // whether we need to serialize it.
+      if (!II.isFromAST()) {
+        II.setIsFromAST();
+        bool IsModule = PP.getCurrentModule() != nullptr;
+        if (isInterestingIdentifier(*this, II, IsModule))
+          II.setChangedSinceDeserialization();
+      }
+
+      // Associate the ID with the identifier so that the writer can reuse it.
+      auto ID = Trait.ReadIdentifierID(Data + KeyDataLen.first);
+      SetIdentifierInfo(ID, &II);
+    }
   }
 
   // Setup the import locations and notify the module manager that we've
@@ -3434,13 +3591,20 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
                                        M->ImportLoc.getRawEncoding());
   }
 
-  // Mark all of the identifiers in the identifier table as being out of date,
-  // so that various accessors know to check the loaded modules when the
-  // identifier is used.
-  for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
-                              IdEnd = PP.getIdentifierTable().end();
-       Id != IdEnd; ++Id)
-    Id->second->setOutOfDate(true);
+  if (!Context.getLangOpts().CPlusPlus ||
+      (Type != MK_ImplicitModule && Type != MK_ExplicitModule)) {
+    // Mark all of the identifiers in the identifier table as being out of date,
+    // so that various accessors know to check the loaded modules when the
+    // identifier is used.
+    //
+    // For C++ modules, we don't need information on many identifiers (just
+    // those that provide macros or are poisoned), so we mark all of
+    // the interesting ones via PreloadIdentifierOffsets.
+    for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
+                                IdEnd = PP.getIdentifierTable().end();
+         Id != IdEnd; ++Id)
+      Id->second->setOutOfDate(true);
+  }
   
   // Resolve any unresolved module exports.
   for (unsigned I = 0, N = UnresolvedModuleRefs.size(); I != N; ++I) {
@@ -3485,7 +3649,7 @@ ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
     DeserializationListener->ReaderInitialized(this);
 
   ModuleFile &PrimaryModule = ModuleMgr.getPrimaryModule();
-  if (!PrimaryModule.OriginalSourceFileID.isInvalid()) {
+  if (PrimaryModule.OriginalSourceFileID.isValid()) {
     PrimaryModule.OriginalSourceFileID 
       = FileID::get(PrimaryModule.SLocEntryBaseID
                     + PrimaryModule.OriginalSourceFileID.getOpaqueValue() - 1);
@@ -3536,6 +3700,20 @@ static bool startsWithASTFileMagic(BitstreamCursor &Stream) {
          Stream.Read(8) == 'H';
 }
 
+static unsigned moduleKindForDiagnostic(ModuleKind Kind) {
+  switch (Kind) {
+  case MK_PCH:
+    return 0; // PCH
+  case MK_ImplicitModule:
+  case MK_ExplicitModule:
+    return 1; // module
+  case MK_MainFile:
+  case MK_Preamble:
+    return 2; // main source file
+  }
+  llvm_unreachable("unknown module kind");
+}
+
 ASTReader::ASTReadResult
 ASTReader::ReadASTCore(StringRef FileName,
                        ModuleKind Type,
@@ -3568,11 +3746,9 @@ ASTReader::ReadASTCore(StringRef FileName,
       return Missing;
 
     // Otherwise, return an error.
-    {
-      std::string Msg = "Unable to load module \"" + FileName.str() + "\": "
-                      + ErrorStr;
-      Error(Msg);
-    }
+    Diag(diag::err_module_file_not_found) << moduleKindForDiagnostic(Type)
+                                          << FileName << ErrorStr.empty()
+                                          << ErrorStr;
     return Failure;
 
   case ModuleManager::OutOfDate:
@@ -3582,11 +3758,9 @@ ASTReader::ReadASTCore(StringRef FileName,
       return OutOfDate;
 
     // Otherwise, return an error.
-    {
-      std::string Msg = "Unable to load module \"" + FileName.str() + "\": "
-                      + ErrorStr;
-      Error(Msg);
-    }
+    Diag(diag::err_module_file_out_of_date) << moduleKindForDiagnostic(Type)
+                                            << FileName << ErrorStr.empty()
+                                            << ErrorStr;
     return Failure;
   }
 
@@ -3607,20 +3781,20 @@ ASTReader::ReadASTCore(StringRef FileName,
   
   // Sniff for the signature.
   if (!startsWithASTFileMagic(Stream)) {
-    Diag(diag::err_not_a_pch_file) << FileName;
+    Diag(diag::err_module_file_invalid) << moduleKindForDiagnostic(Type)
+                                        << FileName;
     return Failure;
   }
 
   // This is used for compatibility with older PCH formats.
   bool HaveReadControlBlock = false;
-
   while (1) {
     llvm::BitstreamEntry Entry = Stream.advance();
     
     switch (Entry.Kind) {
     case llvm::BitstreamEntry::Error:
-    case llvm::BitstreamEntry::EndBlock:
     case llvm::BitstreamEntry::Record:
+    case llvm::BitstreamEntry::EndBlock:
       Error("invalid record at top-level of AST file");
       return Failure;
         
@@ -3628,18 +3802,23 @@ ASTReader::ReadASTCore(StringRef FileName,
       break;
     }
 
-    // We only know the control subblock ID.
     switch (Entry.ID) {
-    case llvm::bitc::BLOCKINFO_BLOCK_ID:
-      if (Stream.ReadBlockInfoBlock()) {
-        Error("malformed BlockInfoBlock in AST file");
-        return Failure;
-      }
-      break;
     case CONTROL_BLOCK_ID:
       HaveReadControlBlock = true;
       switch (ReadControlBlock(F, Loaded, ImportedBy, ClientLoadCapabilities)) {
       case Success:
+        // Check that we didn't try to load a non-module AST file as a module.
+        //
+        // FIXME: Should we also perform the converse check? Loading a module as
+        // a PCH file sort of works, but it's a bit wonky.
+        if ((Type == MK_ImplicitModule || Type == MK_ExplicitModule) &&
+            F.ModuleName.empty()) {
+          auto Result = (Type == MK_ImplicitModule) ? OutOfDate : Failure;
+          if (Result != OutOfDate ||
+              (ClientLoadCapabilities & ARR_OutOfDate) == 0)
+            Diag(diag::err_module_file_not_module) << FileName;
+          return Result;
+        }
         break;
 
       case Failure: return Failure;
@@ -3650,6 +3829,7 @@ ASTReader::ReadASTCore(StringRef FileName,
       case HadErrors: return HadErrors;
       }
       break;
+
     case AST_BLOCK_ID:
       if (!HaveReadControlBlock) {
         if ((ClientLoadCapabilities & ARR_VersionMismatch) == 0)
@@ -3669,7 +3849,78 @@ ASTReader::ReadASTCore(StringRef FileName,
       break;
     }
   }
-  
+
+  return Success;
+}
+
+/// Parse a record and blob containing module file extension metadata.
+static bool parseModuleFileExtensionMetadata(
+              const SmallVectorImpl<uint64_t> &Record,
+              StringRef Blob,
+              ModuleFileExtensionMetadata &Metadata) {
+  if (Record.size() < 4) return true;
+
+  Metadata.MajorVersion = Record[0];
+  Metadata.MinorVersion = Record[1];
+
+  unsigned BlockNameLen = Record[2];
+  unsigned UserInfoLen = Record[3];
+
+  if (BlockNameLen + UserInfoLen > Blob.size()) return true;
+
+  Metadata.BlockName = std::string(Blob.data(), Blob.data() + BlockNameLen);
+  Metadata.UserInfo = std::string(Blob.data() + BlockNameLen,
+                                  Blob.data() + BlockNameLen + UserInfoLen);
+  return false;
+}
+
+ASTReader::ASTReadResult ASTReader::ReadExtensionBlock(ModuleFile &F) {
+  BitstreamCursor &Stream = F.Stream;
+
+  RecordData Record;
+  while (true) {
+    llvm::BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::SubBlock:
+      if (Stream.SkipBlock())
+        return Failure;
+
+      continue;
+
+    case llvm::BitstreamEntry::EndBlock:
+      return Success;
+
+    case llvm::BitstreamEntry::Error:
+      return HadErrors;
+
+    case llvm::BitstreamEntry::Record:
+      break;
+    }
+
+    Record.clear();
+    StringRef Blob;
+    unsigned RecCode = Stream.readRecord(Entry.ID, Record, &Blob);
+    switch (RecCode) {
+    case EXTENSION_METADATA: {
+      ModuleFileExtensionMetadata Metadata;
+      if (parseModuleFileExtensionMetadata(Record, Blob, Metadata))
+        return Failure;
+
+      // Find a module file extension with this block name.
+      auto Known = ModuleFileExtensions.find(Metadata.BlockName);
+      if (Known == ModuleFileExtensions.end()) break;
+
+      // Form a reader.
+      if (auto Reader = Known->second->createExtensionReader(Metadata, *this,
+                                                             F, Stream)) {
+        F.ExtensionReaders.push_back(std::move(Reader));
+      }
+
+      break;
+    }
+    }
+  }
+
   return Success;
 }
 
@@ -3811,36 +4062,6 @@ void ASTReader::finalizeForWriting() {
   // Nothing to do for now.
 }
 
-/// \brief Given a cursor at the start of an AST file, scan ahead and drop the
-/// cursor into the start of the given block ID, returning false on success and
-/// true on failure.
-static bool SkipCursorToBlock(BitstreamCursor &Cursor, unsigned BlockID) {
-  while (1) {
-    llvm::BitstreamEntry Entry = Cursor.advance();
-    switch (Entry.Kind) {
-    case llvm::BitstreamEntry::Error:
-    case llvm::BitstreamEntry::EndBlock:
-      return true;
-        
-    case llvm::BitstreamEntry::Record:
-      // Ignore top-level records.
-      Cursor.skipRecord(Entry.ID);
-      break;
-        
-    case llvm::BitstreamEntry::SubBlock:
-      if (Entry.ID == BlockID) {
-        if (Cursor.EnterSubBlock(BlockID))
-          return true;
-        // Found it!
-        return false;
-      }
-      
-      if (Cursor.SkipBlock())
-        return true;
-    }
-  }
-}
-
 /// \brief Reads and return the signature record from \p StreamFile's control
 /// block, or else returns 0.
 static ASTFileSignature readASTFileSignature(llvm::BitstreamReader &StreamFile){
@@ -3968,6 +4189,7 @@ namespace {
 bool ASTReader::readASTFileControlBlock(
     StringRef Filename, FileManager &FileMgr,
     const PCHContainerReader &PCHContainerRdr,
+    bool FindModuleFileExtensions,
     ASTReaderListener &Listener) {
   // Open the AST file.
   // FIXME: This allows use of the VFS; we do not allow use of the
@@ -3994,36 +4216,55 @@ bool ASTReader::readASTFileControlBlock(
   bool NeedsSystemInputFiles = Listener.needsSystemInputFileVisitation();
   bool NeedsImports = Listener.needsImportVisitation();
   BitstreamCursor InputFilesCursor;
-  if (NeedsInputFiles) {
-    InputFilesCursor = Stream;
-    if (SkipCursorToBlock(InputFilesCursor, INPUT_FILES_BLOCK_ID))
-      return true;
 
-    // Read the abbreviations
-    while (true) {
-      uint64_t Offset = InputFilesCursor.GetCurrentBitNo();
-      unsigned Code = InputFilesCursor.ReadCode();
-
-      // We expect all abbrevs to be at the start of the block.
-      if (Code != llvm::bitc::DEFINE_ABBREV) {
-        InputFilesCursor.JumpToBit(Offset);
-        break;
-      }
-      InputFilesCursor.ReadAbbrevRecord();
-    }
-  }
-  
-  // Scan for ORIGINAL_FILE inside the control block.
   RecordData Record;
   std::string ModuleDir;
-  while (1) {
-    llvm::BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-    if (Entry.Kind == llvm::BitstreamEntry::EndBlock)
-      return false;
-    
-    if (Entry.Kind != llvm::BitstreamEntry::Record)
+  bool DoneWithControlBlock = false;
+  while (!DoneWithControlBlock) {
+    llvm::BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case llvm::BitstreamEntry::SubBlock: {
+      switch (Entry.ID) {
+      case OPTIONS_BLOCK_ID: {
+        std::string IgnoredSuggestedPredefines;
+        if (ReadOptionsBlock(Stream, ARR_ConfigurationMismatch | ARR_OutOfDate,
+                             /*AllowCompatibleConfigurationMismatch*/ false,
+                             Listener, IgnoredSuggestedPredefines) != Success)
+          return true;
+        break;
+      }
+
+      case INPUT_FILES_BLOCK_ID:
+        InputFilesCursor = Stream;
+        if (Stream.SkipBlock() ||
+            (NeedsInputFiles &&
+             ReadBlockAbbrevs(InputFilesCursor, INPUT_FILES_BLOCK_ID)))
+          return true;
+        break;
+
+      default:
+        if (Stream.SkipBlock())
+          return true;
+        break;
+      }
+
+      continue;
+    }
+
+    case llvm::BitstreamEntry::EndBlock:
+      DoneWithControlBlock = true;
+      break;
+
+    case llvm::BitstreamEntry::Error:
       return true;
-    
+
+    case llvm::BitstreamEntry::Record:
+      break;
+    }
+
+    if (DoneWithControlBlock) break;
+
     Record.clear();
     StringRef Blob;
     unsigned RecCode = Stream.readRecord(Entry.ID, Record, &Blob);
@@ -4050,41 +4291,6 @@ bool ASTReader::readASTFileControlBlock(
       Listener.ReadModuleMapFile(Path);
       break;
     }
-    case LANGUAGE_OPTIONS:
-      if (ParseLanguageOptions(Record, false, Listener,
-                               /*AllowCompatibleConfigurationMismatch*/false))
-        return true;
-      break;
-
-    case TARGET_OPTIONS:
-      if (ParseTargetOptions(Record, false, Listener,
-                             /*AllowCompatibleConfigurationMismatch*/ false))
-        return true;
-      break;
-
-    case DIAGNOSTIC_OPTIONS:
-      if (ParseDiagnosticOptions(Record, false, Listener))
-        return true;
-      break;
-
-    case FILE_SYSTEM_OPTIONS:
-      if (ParseFileSystemOptions(Record, false, Listener))
-        return true;
-      break;
-
-    case HEADER_SEARCH_OPTIONS:
-      if (ParseHeaderSearchOptions(Record, false, Listener))
-        return true;
-      break;
-
-    case PREPROCESSOR_OPTIONS: {
-      std::string IgnoredSuggestedPredefines;
-      if (ParsePreprocessorOptions(Record, false, Listener,
-                                   IgnoredSuggestedPredefines))
-        return true;
-      break;
-    }
-
     case INPUT_FILE_OFFSETS: {
       if (!NeedsInputFiles)
         break;
@@ -4112,8 +4318,8 @@ bool ASTReader::readASTFileControlBlock(
           bool Overridden = static_cast<bool>(Record[3]);
           std::string Filename = Blob;
           ResolveImportedPath(Filename, ModuleDir);
-          shouldContinue =
-              Listener.visitInputFile(Filename, isSystemFile, Overridden);
+          shouldContinue = Listener.visitInputFile(
+              Filename, isSystemFile, Overridden, /*IsExplicitModule*/false);
           break;
         }
         if (!shouldContinue)
@@ -4137,25 +4343,55 @@ bool ASTReader::readASTFileControlBlock(
       break;
     }
 
-    case KNOWN_MODULE_FILES: {
-      // Known-but-not-technically-used module files are treated as imports.
-      if (!NeedsImports)
-        break;
-
-      unsigned Idx = 0, N = Record.size();
-      while (Idx < N) {
-        std::string Filename = ReadString(Record, Idx);
-        ResolveImportedPath(Filename, ModuleDir);
-        Listener.visitImport(Filename);
-      }
-      break;
-    }
-
     default:
       // No other validation to perform.
       break;
     }
   }
+
+  // Look for module file extension blocks, if requested.
+  if (FindModuleFileExtensions) {
+    while (!SkipCursorToBlock(Stream, EXTENSION_BLOCK_ID)) {
+      bool DoneWithExtensionBlock = false;
+      while (!DoneWithExtensionBlock) {
+       llvm::BitstreamEntry Entry = Stream.advance();
+
+       switch (Entry.Kind) {
+       case llvm::BitstreamEntry::SubBlock:
+         if (Stream.SkipBlock())
+           return true;
+
+         continue;
+
+       case llvm::BitstreamEntry::EndBlock:
+         DoneWithExtensionBlock = true;
+         continue;
+
+       case llvm::BitstreamEntry::Error:
+         return true;
+
+       case llvm::BitstreamEntry::Record:
+         break;
+       }
+
+       Record.clear();
+       StringRef Blob;
+       unsigned RecCode = Stream.readRecord(Entry.ID, Record, &Blob);
+       switch (RecCode) {
+       case EXTENSION_METADATA: {
+         ModuleFileExtensionMetadata Metadata;
+         if (parseModuleFileExtensionMetadata(Record, Blob, Metadata))
+           return true;
+
+         Listener.readModuleFileExtension(Metadata);
+         break;
+       }
+       }
+      }
+    }
+  }
+
+  return false;
 }
 
 bool ASTReader::isAcceptableASTFile(
@@ -4166,6 +4402,7 @@ bool ASTReader::isAcceptableASTFile(
   SimplePCHValidator validator(LangOpts, TargetOpts, PPOpts,
                                ExistingModuleCachePath, FileMgr);
   return !readASTFileControlBlock(Filename, FileMgr, PCHContainerRdr,
+                                  /*FindModuleFileExtensions=*/false,
                                   validator);
 }
 
@@ -4846,22 +5083,19 @@ namespace {
   public:
     explicit HeaderFileInfoVisitor(const FileEntry *FE)
       : FE(FE) { }
-    
-    static bool visit(ModuleFile &M, void *UserData) {
-      HeaderFileInfoVisitor *This
-        = static_cast<HeaderFileInfoVisitor *>(UserData);
-      
+
+    bool operator()(ModuleFile &M) {
       HeaderFileInfoLookupTable *Table
         = static_cast<HeaderFileInfoLookupTable *>(M.HeaderFileInfoTable);
       if (!Table)
         return false;
 
       // Look in the on-disk hash table for an entry for this file name.
-      HeaderFileInfoLookupTable::iterator Pos = Table->find(This->FE);
+      HeaderFileInfoLookupTable::iterator Pos = Table->find(FE);
       if (Pos == Table->end())
         return false;
 
-      This->HFI = *Pos;
+      HFI = *Pos;
       return true;
     }
     
@@ -4871,7 +5105,7 @@ namespace {
 
 HeaderFileInfo ASTReader::GetHeaderFileInfo(const FileEntry *FE) {
   HeaderFileInfoVisitor Visitor(FE);
-  ModuleMgr.visit(&HeaderFileInfoVisitor::visit, &Visitor);
+  ModuleMgr.visit(Visitor);
   if (Optional<HeaderFileInfo> HFI = Visitor.getHeaderFileInfo())
     return *HFI;
   
@@ -5181,9 +5415,9 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
 
   case TYPE_AUTO: {
     QualType Deduced = readType(*Loc.F, Record, Idx);
-    bool IsDecltypeAuto = Record[Idx++];
+    AutoTypeKeyword Keyword = (AutoTypeKeyword)Record[Idx++];
     bool IsDependent = Deduced.isNull() ? Record[Idx++] : false;
-    return Context.getAutoType(Deduced, IsDecltypeAuto, IsDependent);
+    return Context.getAutoType(Deduced, Keyword, IsDependent);
   }
 
   case TYPE_RECORD: {
@@ -5335,7 +5569,7 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     unsigned Idx = 0;
     ElaboratedTypeKeyword Keyword = (ElaboratedTypeKeyword)Record[Idx++];
     NestedNameSpecifier *NNS = ReadNestedNameSpecifier(*Loc.F, Record, Idx);
-    const IdentifierInfo *Name = this->GetIdentifierInfo(*Loc.F, Record, Idx);
+    const IdentifierInfo *Name = GetIdentifierInfo(*Loc.F, Record, Idx);
     QualType Canon = readType(*Loc.F, Record, Idx);
     if (!Canon.isNull())
       Canon = Context.getCanonicalType(Canon);
@@ -5346,7 +5580,7 @@ QualType ASTReader::readTypeRecord(unsigned Index) {
     unsigned Idx = 0;
     ElaboratedTypeKeyword Keyword = (ElaboratedTypeKeyword)Record[Idx++];
     NestedNameSpecifier *NNS = ReadNestedNameSpecifier(*Loc.F, Record, Idx);
-    const IdentifierInfo *Name = this->GetIdentifierInfo(*Loc.F, Record, Idx);
+    const IdentifierInfo *Name = GetIdentifierInfo(*Loc.F, Record, Idx);
     unsigned NumArgs = Record[Idx++];
     SmallVector<TemplateArgument, 8> Args;
     Args.reserve(NumArgs);
@@ -5692,9 +5926,14 @@ QualType ASTReader::GetType(TypeID ID) {
   if (Index < NUM_PREDEF_TYPE_IDS) {
     QualType T;
     switch ((PredefinedTypeIDs)Index) {
-    case PREDEF_TYPE_NULL_ID: return QualType();
-    case PREDEF_TYPE_VOID_ID: T = Context.VoidTy; break;
-    case PREDEF_TYPE_BOOL_ID: T = Context.BoolTy; break;
+    case PREDEF_TYPE_NULL_ID:
+      return QualType();
+    case PREDEF_TYPE_VOID_ID:
+      T = Context.VoidTy;
+      break;
+    case PREDEF_TYPE_BOOL_ID:
+      T = Context.BoolTy;
+      break;
 
     case PREDEF_TYPE_CHAR_U_ID:
     case PREDEF_TYPE_CHAR_S_ID:
@@ -5702,59 +5941,163 @@ QualType ASTReader::GetType(TypeID ID) {
       T = Context.CharTy;
       break;
 
-    case PREDEF_TYPE_UCHAR_ID:      T = Context.UnsignedCharTy;     break;
-    case PREDEF_TYPE_USHORT_ID:     T = Context.UnsignedShortTy;    break;
-    case PREDEF_TYPE_UINT_ID:       T = Context.UnsignedIntTy;      break;
-    case PREDEF_TYPE_ULONG_ID:      T = Context.UnsignedLongTy;     break;
-    case PREDEF_TYPE_ULONGLONG_ID:  T = Context.UnsignedLongLongTy; break;
-    case PREDEF_TYPE_UINT128_ID:    T = Context.UnsignedInt128Ty;   break;
-    case PREDEF_TYPE_SCHAR_ID:      T = Context.SignedCharTy;       break;
-    case PREDEF_TYPE_WCHAR_ID:      T = Context.WCharTy;            break;
-    case PREDEF_TYPE_SHORT_ID:      T = Context.ShortTy;            break;
-    case PREDEF_TYPE_INT_ID:        T = Context.IntTy;              break;
-    case PREDEF_TYPE_LONG_ID:       T = Context.LongTy;             break;
-    case PREDEF_TYPE_LONGLONG_ID:   T = Context.LongLongTy;         break;
-    case PREDEF_TYPE_INT128_ID:     T = Context.Int128Ty;           break;
-    case PREDEF_TYPE_HALF_ID:       T = Context.HalfTy;             break;
-    case PREDEF_TYPE_FLOAT_ID:      T = Context.FloatTy;            break;
-    case PREDEF_TYPE_DOUBLE_ID:     T = Context.DoubleTy;           break;
-    case PREDEF_TYPE_LONGDOUBLE_ID: T = Context.LongDoubleTy;       break;
-    case PREDEF_TYPE_OVERLOAD_ID:   T = Context.OverloadTy;         break;
-    case PREDEF_TYPE_BOUND_MEMBER:  T = Context.BoundMemberTy;      break;
-    case PREDEF_TYPE_PSEUDO_OBJECT: T = Context.PseudoObjectTy;     break;
-    case PREDEF_TYPE_DEPENDENT_ID:  T = Context.DependentTy;        break;
-    case PREDEF_TYPE_UNKNOWN_ANY:   T = Context.UnknownAnyTy;       break;
-    case PREDEF_TYPE_NULLPTR_ID:    T = Context.NullPtrTy;          break;
-    case PREDEF_TYPE_CHAR16_ID:     T = Context.Char16Ty;           break;
-    case PREDEF_TYPE_CHAR32_ID:     T = Context.Char32Ty;           break;
-    case PREDEF_TYPE_OBJC_ID:       T = Context.ObjCBuiltinIdTy;    break;
-    case PREDEF_TYPE_OBJC_CLASS:    T = Context.ObjCBuiltinClassTy; break;
-    case PREDEF_TYPE_OBJC_SEL:      T = Context.ObjCBuiltinSelTy;   break;
-    case PREDEF_TYPE_IMAGE1D_ID:    T = Context.OCLImage1dTy;       break;
-    case PREDEF_TYPE_IMAGE1D_ARR_ID: T = Context.OCLImage1dArrayTy; break;
-    case PREDEF_TYPE_IMAGE1D_BUFF_ID: T = Context.OCLImage1dBufferTy; break;
-    case PREDEF_TYPE_IMAGE2D_ID:    T = Context.OCLImage2dTy;       break;
-    case PREDEF_TYPE_IMAGE2D_ARR_ID: T = Context.OCLImage2dArrayTy; break;
-    case PREDEF_TYPE_IMAGE3D_ID:    T = Context.OCLImage3dTy;       break;
-    case PREDEF_TYPE_SAMPLER_ID:    T = Context.OCLSamplerTy;       break;
-    case PREDEF_TYPE_EVENT_ID:      T = Context.OCLEventTy;         break;
-    case PREDEF_TYPE_AUTO_DEDUCT:   T = Context.getAutoDeductType(); break;
-        
-    case PREDEF_TYPE_AUTO_RREF_DEDUCT: 
-      T = Context.getAutoRRefDeductType(); 
+    case PREDEF_TYPE_UCHAR_ID:
+      T = Context.UnsignedCharTy;
+      break;
+    case PREDEF_TYPE_USHORT_ID:
+      T = Context.UnsignedShortTy;
+      break;
+    case PREDEF_TYPE_UINT_ID:
+      T = Context.UnsignedIntTy;
+      break;
+    case PREDEF_TYPE_ULONG_ID:
+      T = Context.UnsignedLongTy;
+      break;
+    case PREDEF_TYPE_ULONGLONG_ID:
+      T = Context.UnsignedLongLongTy;
+      break;
+    case PREDEF_TYPE_UINT128_ID:
+      T = Context.UnsignedInt128Ty;
+      break;
+    case PREDEF_TYPE_SCHAR_ID:
+      T = Context.SignedCharTy;
+      break;
+    case PREDEF_TYPE_WCHAR_ID:
+      T = Context.WCharTy;
+      break;
+    case PREDEF_TYPE_SHORT_ID:
+      T = Context.ShortTy;
+      break;
+    case PREDEF_TYPE_INT_ID:
+      T = Context.IntTy;
+      break;
+    case PREDEF_TYPE_LONG_ID:
+      T = Context.LongTy;
+      break;
+    case PREDEF_TYPE_LONGLONG_ID:
+      T = Context.LongLongTy;
+      break;
+    case PREDEF_TYPE_INT128_ID:
+      T = Context.Int128Ty;
+      break;
+    case PREDEF_TYPE_HALF_ID:
+      T = Context.HalfTy;
+      break;
+    case PREDEF_TYPE_FLOAT_ID:
+      T = Context.FloatTy;
+      break;
+    case PREDEF_TYPE_DOUBLE_ID:
+      T = Context.DoubleTy;
+      break;
+    case PREDEF_TYPE_LONGDOUBLE_ID:
+      T = Context.LongDoubleTy;
+      break;
+    case PREDEF_TYPE_OVERLOAD_ID:
+      T = Context.OverloadTy;
+      break;
+    case PREDEF_TYPE_BOUND_MEMBER:
+      T = Context.BoundMemberTy;
+      break;
+    case PREDEF_TYPE_PSEUDO_OBJECT:
+      T = Context.PseudoObjectTy;
+      break;
+    case PREDEF_TYPE_DEPENDENT_ID:
+      T = Context.DependentTy;
+      break;
+    case PREDEF_TYPE_UNKNOWN_ANY:
+      T = Context.UnknownAnyTy;
+      break;
+    case PREDEF_TYPE_NULLPTR_ID:
+      T = Context.NullPtrTy;
+      break;
+    case PREDEF_TYPE_CHAR16_ID:
+      T = Context.Char16Ty;
+      break;
+    case PREDEF_TYPE_CHAR32_ID:
+      T = Context.Char32Ty;
+      break;
+    case PREDEF_TYPE_OBJC_ID:
+      T = Context.ObjCBuiltinIdTy;
+      break;
+    case PREDEF_TYPE_OBJC_CLASS:
+      T = Context.ObjCBuiltinClassTy;
+      break;
+    case PREDEF_TYPE_OBJC_SEL:
+      T = Context.ObjCBuiltinSelTy;
+      break;
+    case PREDEF_TYPE_IMAGE1D_ID:
+      T = Context.OCLImage1dTy;
+      break;
+    case PREDEF_TYPE_IMAGE1D_ARR_ID:
+      T = Context.OCLImage1dArrayTy;
+      break;
+    case PREDEF_TYPE_IMAGE1D_BUFF_ID:
+      T = Context.OCLImage1dBufferTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_ID:
+      T = Context.OCLImage2dTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_ARR_ID:
+      T = Context.OCLImage2dArrayTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_DEP_ID:
+      T = Context.OCLImage2dDepthTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_ARR_DEP_ID:
+      T = Context.OCLImage2dArrayDepthTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_MSAA_ID:
+      T = Context.OCLImage2dMSAATy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_ARR_MSAA_ID:
+      T = Context.OCLImage2dArrayMSAATy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_MSAA_DEP_ID:
+      T = Context.OCLImage2dMSAADepthTy;
+      break;
+    case PREDEF_TYPE_IMAGE2D_ARR_MSAA_DEPTH_ID:
+      T = Context.OCLImage2dArrayMSAADepthTy;
+      break;
+    case PREDEF_TYPE_IMAGE3D_ID:
+      T = Context.OCLImage3dTy;
+      break;
+    case PREDEF_TYPE_SAMPLER_ID:
+      T = Context.OCLSamplerTy;
+      break;
+    case PREDEF_TYPE_EVENT_ID:
+      T = Context.OCLEventTy;
+      break;
+    case PREDEF_TYPE_CLK_EVENT_ID:
+      T = Context.OCLClkEventTy;
+      break;
+    case PREDEF_TYPE_QUEUE_ID:
+      T = Context.OCLQueueTy;
+      break;
+    case PREDEF_TYPE_NDRANGE_ID:
+      T = Context.OCLNDRangeTy;
+      break;
+    case PREDEF_TYPE_RESERVE_ID_ID:
+      T = Context.OCLReserveIDTy;
+      break;
+    case PREDEF_TYPE_AUTO_DEDUCT:
+      T = Context.getAutoDeductType();
+      break;
+
+    case PREDEF_TYPE_AUTO_RREF_DEDUCT:
+      T = Context.getAutoRRefDeductType();
       break;
 
     case PREDEF_TYPE_ARC_UNBRIDGED_CAST:
       T = Context.ARCUnbridgedCastTy;
       break;
 
-    case PREDEF_TYPE_VA_LIST_TAG:
-      T = Context.getVaListTagType();
-      break;
-
     case PREDEF_TYPE_BUILTIN_FN:
       T = Context.BuiltinFnTy;
       break;
+
+    case PREDEF_TYPE_OMP_ARRAY_SECTION:
+      T = Context.OMPArraySectionTy;
+      break;
     }
 
     assert(!T.isNull() && "Unknown predefined type");
@@ -5889,17 +6232,25 @@ void ASTReader::CompleteRedeclChain(const Decl *D) {
   if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC) ||
       isa<CXXRecordDecl>(DC) || isa<EnumDecl>(DC)) {
     if (DeclarationName Name = cast<NamedDecl>(D)->getDeclName()) {
-      auto *II = Name.getAsIdentifierInfo();
-      if (isa<TranslationUnitDecl>(DC) && II) {
+      if (!getContext().getLangOpts().CPlusPlus &&
+          isa<TranslationUnitDecl>(DC)) {
         // Outside of C++, we don't have a lookup table for the TU, so update
-        // the identifier instead. In C++, either way should work fine.
+        // the identifier instead. (For C++ modules, we don't store decls
+        // in the serialized identifier table, so we do the lookup in the TU.)
+        auto *II = Name.getAsIdentifierInfo();
+        assert(II && "non-identifier name in C?");
         if (II->isOutOfDate())
           updateOutOfDateIdentifier(*II);
       } else
         DC->lookup(Name);
     } else if (needsAnonymousDeclarationNumber(cast<NamedDecl>(D))) {
-      // FIXME: It'd be nice to do something a bit more targeted here.
-      D->getDeclContext()->decls_begin();
+      // Find all declarations of this kind from the relevant context.
+      for (auto *DCDecl : cast<Decl>(D->getLexicalDeclContext())->redecls()) {
+        auto *DC = cast<DeclContext>(DCDecl);
+        SmallVector<Decl*, 8> Decls;
+        FindExternalLexicalDecls(
+            DC, [&](Decl::Kind K) { return K == D->getKind(); }, Decls);
+      }
     }
   }
 
@@ -6061,8 +6412,17 @@ static Decl *getPredefinedDecl(ASTContext &Context, PredefinedDeclIDs ID) {
   case PREDEF_DECL_BUILTIN_VA_LIST_ID:
     return Context.getBuiltinVaListDecl();
 
+  case PREDEF_DECL_VA_LIST_TAG:
+    return Context.getVaListTagDecl();
+
+  case PREDEF_DECL_BUILTIN_MS_VA_LIST_ID:
+    return Context.getBuiltinMSVaListDecl();
+
   case PREDEF_DECL_EXTERN_C_CONTEXT_ID:
     return Context.getExternCContextDecl();
+
+  case PREDEF_DECL_MAKE_INTEGER_SEQ_ID:
+    return Context.getMakeIntegerSeqDecl();
   }
   llvm_unreachable("PredefinedDeclIDs unknown enum value");
 }
@@ -6155,71 +6515,47 @@ Stmt *ASTReader::GetExternalDeclStmt(uint64_t Offset) {
   return ReadStmtFromStream(*Loc.F);
 }
 
-namespace {
-  class FindExternalLexicalDeclsVisitor {
-    ASTReader &Reader;
-    const DeclContext *DC;
-    bool (*isKindWeWant)(Decl::Kind);
-    
-    SmallVectorImpl<Decl*> &Decls;
-    bool PredefsVisited[NUM_PREDEF_DECL_IDS];
+void ASTReader::FindExternalLexicalDecls(
+    const DeclContext *DC, llvm::function_ref<bool(Decl::Kind)> IsKindWeWant,
+    SmallVectorImpl<Decl *> &Decls) {
+  bool PredefsVisited[NUM_PREDEF_DECL_IDS] = {};
 
-  public:
-    FindExternalLexicalDeclsVisitor(ASTReader &Reader, const DeclContext *DC,
-                                    bool (*isKindWeWant)(Decl::Kind),
-                                    SmallVectorImpl<Decl*> &Decls)
-      : Reader(Reader), DC(DC), isKindWeWant(isKindWeWant), Decls(Decls) 
-    {
-      for (unsigned I = 0; I != NUM_PREDEF_DECL_IDS; ++I)
-        PredefsVisited[I] = false;
-    }
+  auto Visit = [&] (ModuleFile *M, LexicalContents LexicalDecls) {
+    assert(LexicalDecls.size() % 2 == 0 && "expected an even number of entries");
+    for (int I = 0, N = LexicalDecls.size(); I != N; I += 2) {
+      auto K = (Decl::Kind)+LexicalDecls[I];
+      if (!IsKindWeWant(K))
+        continue;
 
-    static bool visitPostorder(ModuleFile &M, void *UserData) {
-      FindExternalLexicalDeclsVisitor *This
-        = static_cast<FindExternalLexicalDeclsVisitor *>(UserData);
+      auto ID = (serialization::DeclID)+LexicalDecls[I + 1];
 
-      ModuleFile::DeclContextInfosMap::iterator Info
-        = M.DeclContextInfos.find(This->DC);
-      if (Info == M.DeclContextInfos.end() || !Info->second.LexicalDecls)
-        return false;
-
-      // Load all of the declaration IDs
-      for (const KindDeclIDPair *ID = Info->second.LexicalDecls,
-                               *IDE = ID + Info->second.NumLexicalDecls; 
-           ID != IDE; ++ID) {
-        if (This->isKindWeWant && !This->isKindWeWant((Decl::Kind)ID->first))
+      // Don't add predefined declarations to the lexical context more
+      // than once.
+      if (ID < NUM_PREDEF_DECL_IDS) {
+        if (PredefsVisited[ID])
           continue;
 
-        // Don't add predefined declarations to the lexical context more
-        // than once.
-        if (ID->second < NUM_PREDEF_DECL_IDS) {
-          if (This->PredefsVisited[ID->second])
-            continue;
-
-          This->PredefsVisited[ID->second] = true;
-        }
-
-        if (Decl *D = This->Reader.GetLocalDecl(M, ID->second)) {
-          if (!This->DC->isDeclInLexicalTraversal(D))
-            This->Decls.push_back(D);
-        }
+        PredefsVisited[ID] = true;
       }
 
-      return false;
+      if (Decl *D = GetLocalDecl(*M, ID)) {
+        assert(D->getKind() == K && "wrong kind for lexical decl");
+        if (!DC->isDeclInLexicalTraversal(D))
+          Decls.push_back(D);
+      }
     }
   };
-}
 
-ExternalLoadResult ASTReader::FindExternalLexicalDecls(const DeclContext *DC,
-                                         bool (*isKindWeWant)(Decl::Kind),
-                                         SmallVectorImpl<Decl*> &Decls) {
-  // There might be lexical decls in multiple modules, for the TU at
-  // least. Walk all of the modules in the order they were loaded.
-  FindExternalLexicalDeclsVisitor Visitor(*this, DC, isKindWeWant, Decls);
-  ModuleMgr.visitDepthFirst(
-      nullptr, &FindExternalLexicalDeclsVisitor::visitPostorder, &Visitor);
+  if (isa<TranslationUnitDecl>(DC)) {
+    for (auto Lexical : TULexicalDecls)
+      Visit(Lexical.first, Lexical.second);
+  } else {
+    auto I = LexicalDecls.find(DC);
+    if (I != LexicalDecls.end())
+      Visit(I->second.first, I->second.second);
+  }
+
   ++NumLexicalDeclContextsRead;
-  return ELR_Success;
 }
 
 namespace {
@@ -6298,168 +6634,26 @@ void ASTReader::FindFileRegionDecls(FileID File,
     Decls.push_back(GetDecl(getGlobalDeclID(*DInfo.Mod, *DIt)));
 }
 
-/// \brief Retrieve the "definitive" module file for the definition of the
-/// given declaration context, if there is one.
-///
-/// The "definitive" module file is the only place where we need to look to
-/// find information about the declarations within the given declaration
-/// context. For example, C++ and Objective-C classes, C structs/unions, and
-/// Objective-C protocols, categories, and extensions are all defined in a
-/// single place in the source code, so they have definitive module files
-/// associated with them. C++ namespaces, on the other hand, can have
-/// definitions in multiple different module files.
-///
-/// Note: this needs to be kept in sync with ASTWriter::AddedVisibleDecl's
-/// NDEBUG checking.
-static ModuleFile *getDefinitiveModuleFileFor(const DeclContext *DC,
-                                              ASTReader &Reader) {
-  if (const DeclContext *DefDC = getDefinitiveDeclContext(DC))
-    return Reader.getOwningModuleFile(cast<Decl>(DefDC));
-
-  return nullptr;
-}
-
-namespace {
-  /// \brief ModuleFile visitor used to perform name lookup into a
-  /// declaration context.
-  class DeclContextNameLookupVisitor {
-    ASTReader &Reader;
-    ArrayRef<const DeclContext *> Contexts;
-    DeclarationName Name;
-    ASTDeclContextNameLookupTrait::DeclNameKey NameKey;
-    unsigned NameHash;
-    SmallVectorImpl<NamedDecl *> &Decls;
-    llvm::SmallPtrSetImpl<NamedDecl *> &DeclSet;
-
-  public:
-    DeclContextNameLookupVisitor(ASTReader &Reader,
-                                 DeclarationName Name,
-                                 SmallVectorImpl<NamedDecl *> &Decls,
-                                 llvm::SmallPtrSetImpl<NamedDecl *> &DeclSet)
-      : Reader(Reader), Name(Name),
-        NameKey(ASTDeclContextNameLookupTrait::GetInternalKey(Name)),
-        NameHash(ASTDeclContextNameLookupTrait::ComputeHash(NameKey)),
-        Decls(Decls), DeclSet(DeclSet) {}
-
-    void visitContexts(ArrayRef<const DeclContext*> Contexts) {
-      if (Contexts.empty())
-        return;
-      this->Contexts = Contexts;
-
-      // If we can definitively determine which module file to look into,
-      // only look there. Otherwise, look in all module files.
-      ModuleFile *Definitive;
-      if (Contexts.size() == 1 &&
-          (Definitive = getDefinitiveModuleFileFor(Contexts[0], Reader))) {
-        visit(*Definitive, this);
-      } else {
-        Reader.getModuleManager().visit(&visit, this);
-      }
-    }
-
-  private:
-    static bool visit(ModuleFile &M, void *UserData) {
-      DeclContextNameLookupVisitor *This
-        = static_cast<DeclContextNameLookupVisitor *>(UserData);
-
-      // Check whether we have any visible declaration information for
-      // this context in this module.
-      ModuleFile::DeclContextInfosMap::iterator Info;
-      bool FoundInfo = false;
-      for (auto *DC : This->Contexts) {
-        Info = M.DeclContextInfos.find(DC);
-        if (Info != M.DeclContextInfos.end() &&
-            Info->second.NameLookupTableData) {
-          FoundInfo = true;
-          break;
-        }
-      }
-
-      if (!FoundInfo)
-        return false;
-
-      // Look for this name within this module.
-      ASTDeclContextNameLookupTable *LookupTable =
-        Info->second.NameLookupTableData;
-      ASTDeclContextNameLookupTable::iterator Pos
-        = LookupTable->find_hashed(This->NameKey, This->NameHash);
-      if (Pos == LookupTable->end())
-        return false;
-
-      bool FoundAnything = false;
-      ASTDeclContextNameLookupTrait::data_type Data = *Pos;
-      for (; Data.first != Data.second; ++Data.first) {
-        NamedDecl *ND = This->Reader.GetLocalDeclAs<NamedDecl>(M, *Data.first);
-        if (!ND)
-          continue;
-
-        if (ND->getDeclName() != This->Name) {
-          // A name might be null because the decl's redeclarable part is
-          // currently read before reading its name. The lookup is triggered by
-          // building that decl (likely indirectly), and so it is later in the
-          // sense of "already existing" and can be ignored here.
-          // FIXME: This should not happen; deserializing declarations should
-          // not perform lookups since that can lead to deserialization cycles.
-          continue;
-        }
-
-        // Record this declaration.
-        FoundAnything = true;
-        if (This->DeclSet.insert(ND).second)
-          This->Decls.push_back(ND);
-      }
-
-      return FoundAnything;
-    }
-  };
-}
-
 bool
 ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
                                           DeclarationName Name) {
-  assert(DC->hasExternalVisibleStorage() &&
+  assert(DC->hasExternalVisibleStorage() && DC == DC->getPrimaryContext() &&
          "DeclContext has no visible decls in storage");
   if (!Name)
     return false;
 
+  auto It = Lookups.find(DC);
+  if (It == Lookups.end())
+    return false;
+
   Deserializing LookupResults(this);
 
+  // Load the list of declarations.
   SmallVector<NamedDecl *, 64> Decls;
-  llvm::SmallPtrSet<NamedDecl*, 64> DeclSet;
-
-  // Compute the declaration contexts we need to look into. Multiple such
-  // declaration contexts occur when two declaration contexts from disjoint
-  // modules get merged, e.g., when two namespaces with the same name are 
-  // independently defined in separate modules.
-  SmallVector<const DeclContext *, 2> Contexts;
-  Contexts.push_back(DC);
-
-  if (DC->isNamespace()) {
-    auto Key = KeyDecls.find(const_cast<Decl *>(cast<Decl>(DC)));
-    if (Key != KeyDecls.end()) {
-      for (unsigned I = 0, N = Key->second.size(); I != N; ++I)
-        Contexts.push_back(cast<DeclContext>(GetDecl(Key->second[I])));
-    }
-  }
-
-  DeclContextNameLookupVisitor Visitor(*this, Name, Decls, DeclSet);
-  Visitor.visitContexts(Contexts);
-
-  // If this might be an implicit special member function, then also search
-  // all merged definitions of the surrounding class. We need to search them
-  // individually, because finding an entity in one of them doesn't imply that
-  // we can't find a different entity in another one.
-  if (isa<CXXRecordDecl>(DC)) {
-    auto Merged = MergedLookups.find(DC);
-    if (Merged != MergedLookups.end()) {
-      for (unsigned I = 0; I != Merged->second.size(); ++I) {
-        const DeclContext *Context = Merged->second[I];
-        Visitor.visitContexts(Context);
-        // We might have just added some more merged lookups. If so, our
-        // iterator is now invalid, so grab a fresh one before continuing.
-        Merged = MergedLookups.find(DC);
-      }
-    }
+  for (DeclID ID : It->second.Table.find(Name)) {
+    NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
+    if (ND->getDeclName() == Name)
+      Decls.push_back(ND);
   }
 
   ++NumVisibleDeclContextsRead;
@@ -6467,92 +6661,21 @@ ASTReader::FindExternalVisibleDeclsByName(const DeclContext *DC,
   return !Decls.empty();
 }
 
-namespace {
-  /// \brief ModuleFile visitor used to retrieve all visible names in a
-  /// declaration context.
-  class DeclContextAllNamesVisitor {
-    ASTReader &Reader;
-    SmallVectorImpl<const DeclContext *> &Contexts;
-    DeclsMap &Decls;
-    llvm::SmallPtrSet<NamedDecl *, 256> DeclSet;
-    bool VisitAll;
-
-  public:
-    DeclContextAllNamesVisitor(ASTReader &Reader,
-                               SmallVectorImpl<const DeclContext *> &Contexts,
-                               DeclsMap &Decls, bool VisitAll)
-      : Reader(Reader), Contexts(Contexts), Decls(Decls), VisitAll(VisitAll) { }
-
-    static bool visit(ModuleFile &M, void *UserData) {
-      DeclContextAllNamesVisitor *This
-        = static_cast<DeclContextAllNamesVisitor *>(UserData);
-
-      // Check whether we have any visible declaration information for
-      // this context in this module.
-      ModuleFile::DeclContextInfosMap::iterator Info;
-      bool FoundInfo = false;
-      for (unsigned I = 0, N = This->Contexts.size(); I != N; ++I) {
-        Info = M.DeclContextInfos.find(This->Contexts[I]);
-        if (Info != M.DeclContextInfos.end() &&
-            Info->second.NameLookupTableData) {
-          FoundInfo = true;
-          break;
-        }
-      }
-
-      if (!FoundInfo)
-        return false;
-
-      ASTDeclContextNameLookupTable *LookupTable =
-        Info->second.NameLookupTableData;
-      bool FoundAnything = false;
-      for (ASTDeclContextNameLookupTable::data_iterator
-             I = LookupTable->data_begin(), E = LookupTable->data_end();
-           I != E;
-           ++I) {
-        ASTDeclContextNameLookupTrait::data_type Data = *I;
-        for (; Data.first != Data.second; ++Data.first) {
-          NamedDecl *ND = This->Reader.GetLocalDeclAs<NamedDecl>(M,
-                                                                 *Data.first);
-          if (!ND)
-            continue;
-
-          // Record this declaration.
-          FoundAnything = true;
-          if (This->DeclSet.insert(ND).second)
-            This->Decls[ND->getDeclName()].push_back(ND);
-        }
-      }
-
-      return FoundAnything && !This->VisitAll;
-    }
-  };
-}
-
 void ASTReader::completeVisibleDeclsMap(const DeclContext *DC) {
   if (!DC->hasExternalVisibleStorage())
     return;
+
+  auto It = Lookups.find(DC);
+  assert(It != Lookups.end() &&
+         "have external visible storage but no lookup tables");
+
   DeclsMap Decls;
 
-  // Compute the declaration contexts we need to look into. Multiple such
-  // declaration contexts occur when two declaration contexts from disjoint
-  // modules get merged, e.g., when two namespaces with the same name are
-  // independently defined in separate modules.
-  SmallVector<const DeclContext *, 2> Contexts;
-  Contexts.push_back(DC);
-
-  if (DC->isNamespace()) {
-    KeyDeclsMap::iterator Key =
-        KeyDecls.find(const_cast<Decl *>(cast<Decl>(DC)));
-    if (Key != KeyDecls.end()) {
-      for (unsigned I = 0, N = Key->second.size(); I != N; ++I)
-        Contexts.push_back(cast<DeclContext>(GetDecl(Key->second[I])));
-    }
+  for (DeclID ID : It->second.Table.findAll()) {
+    NamedDecl *ND = cast<NamedDecl>(GetDecl(ID));
+    Decls[ND->getDeclName()].push_back(ND);
   }
 
-  DeclContextAllNamesVisitor Visitor(*this, Contexts, Decls,
-                                     /*VisitAll=*/DC->isFileContext());
-  ModuleMgr.visit(&DeclContextAllNamesVisitor::visit, &Visitor);
   ++NumVisibleDeclContextsRead;
 
   for (DeclsMap::iterator I = Decls.begin(), E = Decls.end(); I != E; ++I) {
@@ -6561,6 +6684,12 @@ void ASTReader::completeVisibleDeclsMap(const DeclContext *DC) {
   const_cast<DeclContext *>(DC)->setHasExternalVisibleStorage(false);
 }
 
+const serialization::reader::DeclContextLookupTable *
+ASTReader::getLoadedLookupTables(DeclContext *Primary) const {
+  auto I = Lookups.find(Primary);
+  return I == Lookups.end() ? nullptr : &I->second;
+}
+
 /// \brief Under non-PCH compilation the consumer receives the objc methods
 /// before receiving the implementation, and codegen depends on this.
 /// We simulate this by deserializing and passing to consumer the methods of the
@@ -6824,24 +6953,36 @@ void ASTReader::UpdateSema() {
     SemaObj->ActOnPragmaOptimize(/* IsOn = */ false, OptimizeOffPragmaLocation);
 }
 
-IdentifierInfo* ASTReader::get(const char *NameStart, const char *NameEnd) {
+IdentifierInfo *ASTReader::get(StringRef Name) {
   // Note that we are loading an identifier.
   Deserializing AnIdentifier(this);
-  StringRef Name(NameStart, NameEnd - NameStart);
 
-  // If there is a global index, look there first to determine which modules
-  // provably do not have any results for this identifier.
-  GlobalModuleIndex::HitSet Hits;
-  GlobalModuleIndex::HitSet *HitsPtr = nullptr;
-  if (!loadGlobalIndex()) {
-    if (GlobalIndex->lookupIdentifier(Name, Hits)) {
-      HitsPtr = &Hits;
-    }
-  }
   IdentifierLookupVisitor Visitor(Name, /*PriorGeneration=*/0,
                                   NumIdentifierLookups,
                                   NumIdentifierLookupHits);
-  ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor, HitsPtr);
+
+  // We don't need to do identifier table lookups in C++ modules (we preload
+  // all interesting declarations, and don't need to use the scope for name
+  // lookups). Perform the lookup in PCH files, though, since we don't build
+  // a complete initial identifier table if we're carrying on from a PCH.
+  if (Context.getLangOpts().CPlusPlus) {
+    for (auto F : ModuleMgr.pch_modules())
+      if (Visitor(*F))
+        break;
+  } else {
+    // If there is a global index, look there first to determine which modules
+    // provably do not have any results for this identifier.
+    GlobalModuleIndex::HitSet Hits;
+    GlobalModuleIndex::HitSet *HitsPtr = nullptr;
+    if (!loadGlobalIndex()) {
+      if (GlobalIndex->lookupIdentifier(Name, Hits)) {
+        HitsPtr = &Hits;
+      }
+    }
+
+    ModuleMgr.visit(Visitor, HitsPtr);
+  }
+
   IdentifierInfo *II = Visitor.getIdentifierInfo();
   markIdentifierUpToDate(II);
   return II;
@@ -6928,41 +7069,37 @@ namespace clang { namespace serialization {
           InstanceBits(0), FactoryBits(0), InstanceHasMoreThanOneDecl(false),
           FactoryHasMoreThanOneDecl(false) {}
 
-    static bool visit(ModuleFile &M, void *UserData) {
-      ReadMethodPoolVisitor *This
-        = static_cast<ReadMethodPoolVisitor *>(UserData);
-      
+    bool operator()(ModuleFile &M) {
       if (!M.SelectorLookupTable)
         return false;
       
       // If we've already searched this module file, skip it now.
-      if (M.Generation <= This->PriorGeneration)
+      if (M.Generation <= PriorGeneration)
         return true;
 
-      ++This->Reader.NumMethodPoolTableLookups;
+      ++Reader.NumMethodPoolTableLookups;
       ASTSelectorLookupTable *PoolTable
         = (ASTSelectorLookupTable*)M.SelectorLookupTable;
-      ASTSelectorLookupTable::iterator Pos = PoolTable->find(This->Sel);
+      ASTSelectorLookupTable::iterator Pos = PoolTable->find(Sel);
       if (Pos == PoolTable->end())
         return false;
 
-      ++This->Reader.NumMethodPoolTableHits;
-      ++This->Reader.NumSelectorsRead;
+      ++Reader.NumMethodPoolTableHits;
+      ++Reader.NumSelectorsRead;
       // FIXME: Not quite happy with the statistics here. We probably should
       // disable this tracking when called via LoadSelector.
       // Also, should entries without methods count as misses?
-      ++This->Reader.NumMethodPoolEntriesRead;
+      ++Reader.NumMethodPoolEntriesRead;
       ASTSelectorLookupTrait::data_type Data = *Pos;
-      if (This->Reader.DeserializationListener)
-        This->Reader.DeserializationListener->SelectorRead(Data.ID, 
-                                                           This->Sel);
-      
-      This->InstanceMethods.append(Data.Instance.begin(), Data.Instance.end());
-      This->FactoryMethods.append(Data.Factory.begin(), Data.Factory.end());
-      This->InstanceBits = Data.InstanceBits;
-      This->FactoryBits = Data.FactoryBits;
-      This->InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl;
-      This->FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl;
+      if (Reader.DeserializationListener)
+        Reader.DeserializationListener->SelectorRead(Data.ID, Sel);
+
+      InstanceMethods.append(Data.Instance.begin(), Data.Instance.end());
+      FactoryMethods.append(Data.Factory.begin(), Data.Factory.end());
+      InstanceBits = Data.InstanceBits;
+      FactoryBits = Data.FactoryBits;
+      InstanceHasMoreThanOneDecl = Data.InstanceHasMoreThanOneDecl;
+      FactoryHasMoreThanOneDecl = Data.FactoryHasMoreThanOneDecl;
       return true;
     }
     
@@ -7002,8 +7139,8 @@ void ASTReader::ReadMethodPool(Selector Sel) {
   // Search for methods defined with this selector.
   ++NumMethodPoolLookups;
   ReadMethodPoolVisitor Visitor(*this, Sel, PriorGeneration);
-  ModuleMgr.visit(&ReadMethodPoolVisitor::visit, &Visitor);
-  
+  ModuleMgr.visit(Visitor);
+
   if (Visitor.getInstanceMethods().empty() &&
       Visitor.getFactoryMethods().empty())
     return;
@@ -7384,33 +7521,47 @@ Module *ASTReader::getModule(unsigned ID) {
   return getSubmodule(ID);
 }
 
-ExternalASTSource::ASTSourceDescriptor
-ASTReader::getSourceDescriptor(const Module &M) {
-  StringRef Dir, Filename;
-  if (M.Directory)
-    Dir = M.Directory->getName();
-  if (auto *File = M.getASTFile())
-    Filename = File->getName();
-  return ASTReader::ASTSourceDescriptor{
-             M.getFullModuleName(), Dir, Filename,
-             M.Signature
-         };
+ModuleFile *ASTReader::getLocalModuleFile(ModuleFile &F, unsigned ID) {
+  if (ID & 1) {
+    // It's a module, look it up by submodule ID.
+    auto I = GlobalSubmoduleMap.find(getGlobalSubmoduleID(F, ID >> 1));
+    return I == GlobalSubmoduleMap.end() ? nullptr : I->second;
+  } else {
+    // It's a prefix (preamble, PCH, ...). Look it up by index.
+    unsigned IndexFromEnd = ID >> 1;
+    assert(IndexFromEnd && "got reference to unknown module file");
+    return getModuleManager().pch_modules().end()[-IndexFromEnd];
+  }
+}
+
+unsigned ASTReader::getModuleFileID(ModuleFile *F) {
+  if (!F)
+    return 1;
+
+  // For a file representing a module, use the submodule ID of the top-level
+  // module as the file ID. For any other kind of file, the number of such
+  // files loaded beforehand will be the same on reload.
+  // FIXME: Is this true even if we have an explicit module file and a PCH?
+  if (F->isModule())
+    return ((F->BaseSubmoduleID + NUM_PREDEF_SUBMODULE_IDS) << 1) | 1;
+
+  auto PCHModules = getModuleManager().pch_modules();
+  auto I = std::find(PCHModules.begin(), PCHModules.end(), F);
+  assert(I != PCHModules.end() && "emitting reference to unknown file");
+  return (I - PCHModules.end()) << 1;
 }
 
 llvm::Optional<ExternalASTSource::ASTSourceDescriptor>
 ASTReader::getSourceDescriptor(unsigned ID) {
   if (const Module *M = getSubmodule(ID))
-    return getSourceDescriptor(*M);
+    return ExternalASTSource::ASTSourceDescriptor(*M);
 
   // If there is only a single PCH, return it instead.
   // Chained PCH are not suported.
   if (ModuleMgr.size() == 1) {
     ModuleFile &MF = ModuleMgr.getPrimaryModule();
-    return ASTReader::ASTSourceDescriptor{
-      MF.OriginalSourceFileName, MF.OriginalDir,
-      MF.FileName,
-      MF.Signature
-    };
+    return ASTReader::ASTSourceDescriptor(
+        MF.OriginalSourceFileName, MF.OriginalDir, MF.FileName, MF.Signature);
   }
   return None;
 }
@@ -7619,9 +7770,19 @@ ASTReader::ReadTemplateName(ModuleFile &F, const RecordData &Record,
   llvm_unreachable("Unhandled template name kind!");
 }
 
-TemplateArgument
-ASTReader::ReadTemplateArgument(ModuleFile &F,
-                                const RecordData &Record, unsigned &Idx) {
+TemplateArgument ASTReader::ReadTemplateArgument(ModuleFile &F,
+                                                 const RecordData &Record,
+                                                 unsigned &Idx,
+                                                 bool Canonicalize) {
+  if (Canonicalize) {
+    // The caller wants a canonical template argument. Sometimes the AST only
+    // wants template arguments in canonical form (particularly as the template
+    // argument lists of template specializations) so ensure we preserve that
+    // canonical form across serialization.
+    TemplateArgument Arg = ReadTemplateArgument(F, Record, Idx, false);
+    return Context.getCanonicalTemplateArgument(Arg);
+  }
+
   TemplateArgument::ArgKind Kind = (TemplateArgument::ArgKind)Record[Idx++];
   switch (Kind) {
   case TemplateArgument::Null:
@@ -7655,7 +7816,7 @@ ASTReader::ReadTemplateArgument(ModuleFile &F,
     TemplateArgument *Args = new (Context) TemplateArgument[NumArgs];
     for (unsigned I = 0; I != NumArgs; ++I)
       Args[I] = ReadTemplateArgument(F, Record, Idx);
-    return TemplateArgument(Args, NumArgs);
+    return TemplateArgument(llvm::makeArrayRef(Args, NumArgs));
   }
   }
 
@@ -7677,7 +7838,7 @@ ASTReader::ReadTemplateParameterList(ModuleFile &F,
 
   TemplateParameterList* TemplateParams =
     TemplateParameterList::Create(Context, TemplateLoc, LAngleLoc,
-                                  Params.data(), Params.size(), RAngleLoc);
+                                  Params, RAngleLoc);
   return TemplateParams;
 }
 
@@ -7685,11 +7846,11 @@ void
 ASTReader::
 ReadTemplateArgumentList(SmallVectorImpl<TemplateArgument> &TemplArgs,
                          ModuleFile &F, const RecordData &Record,
-                         unsigned &Idx) {
+                         unsigned &Idx, bool Canonicalize) {
   unsigned NumTemplateArgs = Record[Idx++];
   TemplArgs.reserve(NumTemplateArgs);
   while (NumTemplateArgs--)
-    TemplArgs.push_back(ReadTemplateArgument(F, Record, Idx));
+    TemplArgs.push_back(ReadTemplateArgument(F, Record, Idx, Canonicalize));
 }
 
 /// \brief Read a UnresolvedSet structure.
@@ -8070,14 +8231,6 @@ void ASTReader::ReadComments() {
   }
 }
 
-void ASTReader::getInputFiles(ModuleFile &F,
-                             SmallVectorImpl<serialization::InputFile> &Files) {
-  for (unsigned I = 0, E = F.InputFilesLoaded.size(); I != E; ++I) {
-    unsigned ID = I+1;
-    Files.push_back(getInputFile(F, ID));
-  }
-}
-
 std::string ASTReader::getOwningModuleNameForDiagnostic(const Decl *D) {
   // If we know the owning module, use it.
   if (Module *M = D->getImportedOwningModule())
@@ -8119,11 +8272,8 @@ void ASTReader::finishPendingActions() {
     PendingIncompleteDeclChains.clear();
 
     // Load pending declaration chains.
-    for (unsigned I = 0; I != PendingDeclChains.size(); ++I) {
-      PendingDeclChainsKnown.erase(PendingDeclChains[I]);
-      loadPendingDeclChain(PendingDeclChains[I]);
-    }
-    assert(PendingDeclChainsKnown.empty());
+    for (unsigned I = 0; I != PendingDeclChains.size(); ++I)
+      loadPendingDeclChain(PendingDeclChains[I].first, PendingDeclChains[I].second);
     PendingDeclChains.clear();
 
     // Make the most recent of the top-level declarations visible.
@@ -8232,9 +8382,8 @@ void ASTReader::finishPendingActions() {
 
   // Load the bodies of any functions or methods we've encountered. We do
   // this now (delayed) so that we can be sure that the declaration chains
-  // have been fully wired up.
-  // FIXME: There seems to be no point in delaying this, it does not depend
-  // on the redecl chains having been wired up.
+  // have been fully wired up (hasBody relies on this).
+  // FIXME: We shouldn't require complete redeclaration chains here.
   for (PendingBodiesMap::iterator PB = PendingBodies.begin(),
                                PBEnd = PendingBodies.end();
        PB != PBEnd; ++PB) {
@@ -8310,21 +8459,26 @@ void ASTReader::diagnoseOdrViolations() {
     if (Found)
       continue;
 
+    // Quick check failed, time to do the slow thing. Note, we can't just
+    // look up the name of D in CanonDef here, because the member that is
+    // in CanonDef might not be found by name lookup (it might have been
+    // replaced by a more recent declaration in the lookup table), and we
+    // can't necessarily find it in the redeclaration chain because it might
+    // be merely mergeable, not redeclarable.
     llvm::SmallVector<const NamedDecl*, 4> Candidates;
-    DeclContext::lookup_result R = CanonDef->lookup(D->getDeclName());
-    for (DeclContext::lookup_iterator I = R.begin(), E = R.end();
-         !Found && I != E; ++I) {
-      for (auto RI : (*I)->redecls()) {
-        if (RI->getLexicalDeclContext() == CanonDef) {
-          // This declaration is present in the canonical definition. If it's
-          // in the same redecl chain, it's the one we're looking for.
-          if (RI->getCanonicalDecl() == DCanon)
-            Found = true;
-          else
-            Candidates.push_back(cast<NamedDecl>(RI));
-          break;
-        }
+    for (auto *CanonMember : CanonDef->decls()) {
+      if (CanonMember->getCanonicalDecl() == DCanon) {
+        // This can happen if the declaration is merely mergeable and not
+        // actually redeclarable (we looked for redeclarations earlier).
+        //
+        // FIXME: We should be able to detect this more efficiently, without
+        // pulling in all of the members of CanonDef.
+        Found = true;
+        break;
       }
+      if (auto *ND = dyn_cast<NamedDecl>(CanonMember))
+        if (ND->getDeclName() == D->getDeclName())
+          Candidates.push_back(ND);
     }
 
     if (!Found) {
@@ -8428,16 +8582,19 @@ void ASTReader::FinishedDeserializing() {
       PendingExceptionSpecUpdates.clear();
       for (auto Update : Updates) {
         auto *FPT = Update.second->getType()->castAs<FunctionProtoType>();
-        SemaObj->UpdateExceptionSpec(Update.second,
-                                     FPT->getExtProtoInfo().ExceptionSpec);
+        auto ESI = FPT->getExtProtoInfo().ExceptionSpec;
+        if (auto *Listener = Context.getASTMutationListener())
+          Listener->ResolvedExceptionSpec(cast<FunctionDecl>(Update.second));
+        for (auto *Redecl : Update.second->redecls())
+          Context.adjustExceptionSpec(cast<FunctionDecl>(Redecl), ESI);
       }
     }
 
-    diagnoseOdrViolations();
-
     if (ReadTimer)
       ReadTimer->stopTimer();
 
+    diagnoseOdrViolations();
+
     // We are not in recursive loading, so it's safe to pass the "interesting"
     // decls to the consumer.
     if (Consumer)
@@ -8450,7 +8607,7 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
     // Remove any fake results before adding any real ones.
     auto It = PendingFakeLookupResults.find(II);
     if (It != PendingFakeLookupResults.end()) {
-      for (auto *ND : PendingFakeLookupResults[II])
+      for (auto *ND : It->second)
         SemaObj->IdResolver.RemoveDecl(ND);
       // FIXME: this works around module+PCH performance issue.
       // Rather than erase the result from the map, which is O(n), just clear
@@ -8471,13 +8628,15 @@ void ASTReader::pushExternalDeclIntoScope(NamedDecl *D, DeclarationName Name) {
   }
 }
 
-ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context,
-                     const PCHContainerReader &PCHContainerRdr,
-                     StringRef isysroot, bool DisableValidation,
-                     bool AllowASTWithCompilerErrors,
-                     bool AllowConfigurationMismatch, bool ValidateSystemInputs,
-                     bool UseGlobalIndex,
-                     std::unique_ptr<llvm::Timer> ReadTimer)
+ASTReader::ASTReader(
+  Preprocessor &PP, ASTContext &Context,
+  const PCHContainerReader &PCHContainerRdr,
+  ArrayRef<IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
+  StringRef isysroot, bool DisableValidation,
+  bool AllowASTWithCompilerErrors,
+  bool AllowConfigurationMismatch, bool ValidateSystemInputs,
+  bool UseGlobalIndex,
+  std::unique_ptr<llvm::Timer> ReadTimer)
     : Listener(new PCHValidator(PP, *this)), DeserializationListener(nullptr),
       OwnsDeserializationListener(false), SourceMgr(PP.getSourceManager()),
       FileMgr(PP.getFileManager()), PCHContainerRdr(PCHContainerRdr),
@@ -8501,19 +8660,21 @@ ASTReader::ASTReader(Preprocessor &PP, ASTContext &Context,
       TotalModulesSizeInBits(0), NumCurrentElementsDeserializing(0),
       PassingDeclsToConsumer(false), ReadingKind(Read_None) {
   SourceMgr.setExternalSLocEntrySource(this);
+
+  for (const auto &Ext : Extensions) {
+    auto BlockName = Ext->getExtensionMetadata().BlockName;
+    auto Known = ModuleFileExtensions.find(BlockName);
+    if (Known != ModuleFileExtensions.end()) {
+      Diags.Report(diag::warn_duplicate_module_file_extension)
+        << BlockName;
+      continue;
+    }
+
+    ModuleFileExtensions.insert({BlockName, Ext});
+  }
 }
 
 ASTReader::~ASTReader() {
   if (OwnsDeserializationListener)
     delete DeserializationListener;
-
-  for (DeclContextVisibleUpdatesPending::iterator
-           I = PendingVisibleUpdates.begin(),
-           E = PendingVisibleUpdates.end();
-       I != E; ++I) {
-    for (DeclContextVisibleUpdates::iterator J = I->second.begin(),
-                                             F = I->second.end();
-         J != F; ++J)
-      delete J->first;
-  }
 }
diff --git a/lib/Serialization/ASTReaderDecl.cpp b/lib/Serialization/ASTReaderDecl.cpp
index 1a0c5b58e7f6..8fb110e4551d 100644
--- a/lib/Serialization/ASTReaderDecl.cpp
+++ b/lib/Serialization/ASTReaderDecl.cpp
@@ -26,6 +26,7 @@
 #include "clang/Sema/Sema.h"
 #include "clang/Sema/SemaDiagnostic.h"
 #include "llvm/Support/SaveAndRestore.h"
+
 using namespace clang;
 using namespace clang::serialization;
 
@@ -120,45 +121,20 @@ namespace clang {
     static void setAnonymousDeclForMerging(ASTReader &Reader, DeclContext *DC,
                                            unsigned Index, NamedDecl *D);
 
-    /// \brief RAII class used to capture the first ID within a redeclaration
-    /// chain and to introduce it into the list of pending redeclaration chains
-    /// on destruction.
+    /// Results from loading a RedeclarableDecl.
     class RedeclarableResult {
-      ASTReader &Reader;
       GlobalDeclID FirstID;
       Decl *MergeWith;
-      mutable bool Owning;
       bool IsKeyDecl;
-      Decl::Kind DeclKind;
-
-      void operator=(RedeclarableResult &) = delete;
 
     public:
-      RedeclarableResult(ASTReader &Reader, GlobalDeclID FirstID,
-                         Decl *MergeWith, Decl::Kind DeclKind,
-                         bool IsKeyDecl)
-        : Reader(Reader), FirstID(FirstID), MergeWith(MergeWith),
-          Owning(true), IsKeyDecl(IsKeyDecl), DeclKind(DeclKind) {}
-
-      RedeclarableResult(RedeclarableResult &&Other)
-        : Reader(Other.Reader), FirstID(Other.FirstID),
-          MergeWith(Other.MergeWith), Owning(Other.Owning),
-          IsKeyDecl(Other.IsKeyDecl), DeclKind(Other.DeclKind) {
-        Other.Owning = false;
-      }
-
-      ~RedeclarableResult() {
-        if (FirstID && Owning && isRedeclarableDeclKind(DeclKind)) {
-          auto Canon = Reader.GetDecl(FirstID)->getCanonicalDecl();
-          if (Reader.PendingDeclChainsKnown.insert(Canon).second)
-            Reader.PendingDeclChains.push_back(Canon);
-        }
-      }
+      RedeclarableResult(GlobalDeclID FirstID, Decl *MergeWith, bool IsKeyDecl)
+          : FirstID(FirstID), MergeWith(MergeWith), IsKeyDecl(IsKeyDecl) {}
 
       /// \brief Retrieve the first ID.
       GlobalDeclID getFirstID() const { return FirstID; }
 
-      /// \brief Is this declaration the key declaration?
+      /// \brief Is this declaration a key declaration?
       bool isKeyDecl() const { return IsKeyDecl; }
 
       /// \brief Get a known declaration that this should be merged with, if
@@ -185,7 +161,7 @@ namespace clang {
     public:
       FindExistingResult(ASTReader &Reader)
           : Reader(Reader), New(nullptr), Existing(nullptr), AddResult(false),
-            AnonymousDeclNumber(0), TypedefNameForLinkage(0) {}
+            AnonymousDeclNumber(0), TypedefNameForLinkage(nullptr) {}
 
       FindExistingResult(ASTReader &Reader, NamedDecl *New, NamedDecl *Existing,
                          unsigned AnonymousDeclNumber,
@@ -317,6 +293,7 @@ namespace clang {
     DeclID VisitTemplateDecl(TemplateDecl *D);
     RedeclarableResult VisitRedeclarableTemplateDecl(RedeclarableTemplateDecl *D);
     void VisitClassTemplateDecl(ClassTemplateDecl *D);
+    void VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D);
     void VisitVarTemplateDecl(VarTemplateDecl *D);
     void VisitFunctionTemplateDecl(FunctionTemplateDecl *D);
     void VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D);
@@ -395,7 +372,7 @@ namespace clang {
       }
     }
   };
-}
+} // end namespace clang
 
 namespace {
 /// Iterator over the redeclarations of a declaration that have already
@@ -431,12 +408,12 @@ public:
     return A.Current != B.Current;
   }
 };
-}
+} // end anonymous namespace
+
 template<typename DeclT>
 llvm::iterator_range<MergedRedeclIterator<DeclT>> merged_redecls(DeclT *D) {
-  return llvm::iterator_range<MergedRedeclIterator<DeclT>>(
-      MergedRedeclIterator<DeclT>(D),
-      MergedRedeclIterator<DeclT>());
+  return llvm::make_range(MergedRedeclIterator<DeclT>(D),
+                          MergedRedeclIterator<DeclT>());
 }
 
 uint64_t ASTDeclReader::GetCurrentCursorOffset() {
@@ -465,8 +442,8 @@ void ASTDeclReader::Visit(Decl *D) {
     // If this is a tag declaration with a typedef name for linkage, it's safe
     // to load that typedef now.
     if (NamedDeclForTagDecl)
-      cast<TagDecl>(D)->NamedDeclOrQualifier =
-          cast<NamedDecl>(Reader.GetDecl(NamedDeclForTagDecl));
+      cast<TagDecl>(D)->TypedefNameDeclOrQualifier =
+          cast<TypedefNameDecl>(Reader.GetDecl(NamedDeclForTagDecl));
   } else if (ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D)) {
     // if we have a fully initialized TypeDecl, we can safely read its type now.
     ID->TypeForDecl = Reader.GetType(TypeIDForTypeDecl).getTypePtrOrNull();
@@ -499,6 +476,8 @@ void ASTDeclReader::VisitDecl(Decl *D) {
     // placeholder.
     GlobalDeclID SemaDCIDForTemplateParmDecl = ReadDeclID(Record, Idx);
     GlobalDeclID LexicalDCIDForTemplateParmDecl = ReadDeclID(Record, Idx);
+    if (!LexicalDCIDForTemplateParmDecl)
+      LexicalDCIDForTemplateParmDecl = SemaDCIDForTemplateParmDecl;
     Reader.addPendingDeclContextInfo(D,
                                      SemaDCIDForTemplateParmDecl,
                                      LexicalDCIDForTemplateParmDecl);
@@ -506,6 +485,8 @@ void ASTDeclReader::VisitDecl(Decl *D) {
   } else {
     DeclContext *SemaDC = ReadDeclAs<DeclContext>(Record, Idx);
     DeclContext *LexicalDC = ReadDeclAs<DeclContext>(Record, Idx);
+    if (!LexicalDC)
+      LexicalDC = SemaDC;
     DeclContext *MergedSemaDC = Reader.MergedDeclContexts.lookup(SemaDC);
     // Avoid calling setLexicalDeclContext() directly because it uses
     // Decl::getASTContext() internally which is unsafe during derialization.
@@ -619,16 +600,13 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitTagDecl(TagDecl *TD) {
   case 1: { // ExtInfo
     TagDecl::ExtInfo *Info = new (Reader.getContext()) TagDecl::ExtInfo();
     ReadQualifierInfo(*Info, Record, Idx);
-    TD->NamedDeclOrQualifier = Info;
+    TD->TypedefNameDeclOrQualifier = Info;
     break;
   }
   case 2: // TypedefNameForAnonDecl
     NamedDeclForTagDecl = ReadDeclID(Record, Idx);
     TypedefNameForLinkage = Reader.GetIdentifierInfo(F, Record, Idx);
     break;
-  case 3: // DeclaratorForAnonDecl
-    NamedDeclForTagDecl = ReadDeclID(Record, Idx);
-    break;
   default:
     llvm_unreachable("unexpected tag info kind");
   }
@@ -771,8 +749,9 @@ void ASTDeclReader::VisitFunctionDecl(FunctionDecl *FD) {
     
     // Template arguments.
     SmallVector<TemplateArgument, 8> TemplArgs;
-    Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
-    
+    Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx,
+                                    /*Canonicalize*/ true);
+
     // Template args as written.
     SmallVector<TemplateArgumentLoc, 8> TemplArgLocs;
     SourceLocation LAngleLoc, RAngleLoc;
@@ -909,6 +888,7 @@ void ASTDeclReader::VisitObjCMethodDecl(ObjCMethodDecl *MD) {
 
 void ASTDeclReader::VisitObjCTypeParamDecl(ObjCTypeParamDecl *D) {
   VisitTypedefNameDecl(D);
+
   D->Variance = Record[Idx++];
   D->Index = Record[Idx++];
   D->VarianceLoc = ReadSourceLocation(Record, Idx);
@@ -1121,7 +1101,6 @@ void ASTDeclReader::VisitObjCImplementationDecl(ObjCImplementationDecl *D) {
     D->IvarInitializers = Reader.ReadCXXCtorInitializersRef(F, Record, Idx);
 }
 
-
 void ASTDeclReader::VisitObjCPropertyImplDecl(ObjCPropertyImplDecl *D) {
   VisitDecl(D);
   D->setAtLoc(ReadSourceLocation(Record, Idx));
@@ -1168,6 +1147,8 @@ void ASTDeclReader::VisitIndirectFieldDecl(IndirectFieldDecl *FD) {
 
   for (unsigned I = 0; I != FD->ChainingSize; ++I)
     FD->Chaining[I] = ReadDeclAs<NamedDecl>(Record, Idx);
+
+  mergeMergeable(FD);
 }
 
 ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
@@ -1208,8 +1189,9 @@ ASTDeclReader::RedeclarableResult ASTDeclReader::VisitVarDeclImpl(VarDecl *VD) {
   };
   switch ((VarKind)Record[Idx++]) {
   case VarNotTemplate:
-    // Only true variables (not parameters or implicit parameters) can be merged
-    if (VD->getKind() != Decl::ParmVar && VD->getKind() != Decl::ImplicitParam &&
+    // Only true variables (not parameters or implicit parameters) can be
+    // merged; the other kinds are not really redeclarable at all.
+    if (!isa<ParmVarDecl>(VD) && !isa<ImplicitParamDecl>(VD) &&
         !isa<VarTemplateSpecializationDecl>(VD))
       mergeRedeclarable(VD, Redecl);
     break;
@@ -1290,8 +1272,7 @@ void ASTDeclReader::VisitBlockDecl(BlockDecl *BD) {
 
     captures.push_back(BlockDecl::Capture(decl, byRef, nested, copyExpr));
   }
-  BD->setCaptures(Reader.getContext(), captures.begin(),
-                  captures.end(), capturesCXXThis);
+  BD->setCaptures(Reader.getContext(), captures, capturesCXXThis);
 }
 
 void ASTDeclReader::VisitCapturedDecl(CapturedDecl *CD) {
@@ -1319,7 +1300,6 @@ void ASTDeclReader::VisitLabelDecl(LabelDecl *D) {
   D->setLocStart(ReadSourceLocation(Record, Idx));
 }
 
-
 void ASTDeclReader::VisitNamespaceDecl(NamespaceDecl *D) {
   RedeclarableResult Redecl = VisitRedeclarable(D);
   VisitNamedDecl(D);
@@ -1506,21 +1486,14 @@ void ASTDeclReader::MergeDefinitionData(
   auto &DD = *D->DefinitionData.getNotUpdated();
 
   if (DD.Definition != MergeDD.Definition) {
-    // If the new definition has new special members, let the name lookup
-    // code know that it needs to look in the new definition too.
-    //
-    // FIXME: We only need to do this if the merged definition declares members
-    // that this definition did not declare, or if it defines members that this
-    // definition did not define.
-    Reader.MergedLookups[DD.Definition].push_back(MergeDD.Definition);
-    DD.Definition->setHasExternalVisibleStorage();
-
     // Track that we merged the definitions.
     Reader.MergedDeclContexts.insert(std::make_pair(MergeDD.Definition,
                                                     DD.Definition));
     Reader.PendingDefinitions.erase(MergeDD.Definition);
     MergeDD.Definition->IsCompleteDefinition = false;
     mergeDefinitionVisibility(DD.Definition, MergeDD.Definition);
+    assert(Reader.Lookups.find(MergeDD.Definition) == Reader.Lookups.end() &&
+           "already loaded pending lookups for merged definition");
   }
 
   auto PFDI = Reader.PendingFakeDefinitionData.find(&DD);
@@ -1758,7 +1731,7 @@ void ASTDeclReader::VisitImportDecl(ImportDecl *D) {
   VisitDecl(D);
   D->ImportedAndComplete.setPointer(readModule(Record, Idx));
   D->ImportedAndComplete.setInt(Record[Idx++]);
-  SourceLocation *StoredLocs = reinterpret_cast<SourceLocation *>(D + 1);
+  SourceLocation *StoredLocs = D->getTrailingObjects<SourceLocation>();
   for (unsigned I = 0, N = Record.back(); I != N; ++I)
     StoredLocs[I] = ReadSourceLocation(Record, Idx);
   ++Idx; // The number of stored source locations.
@@ -1776,7 +1749,8 @@ void ASTDeclReader::VisitFriendDecl(FriendDecl *D) {
   else
     D->Friend = GetTypeSourceInfo(Record, Idx);
   for (unsigned i = 0; i != D->NumTPLists; ++i)
-    D->getTPLists()[i] = Reader.ReadTemplateParameterList(F, Record, Idx);
+    D->getTrailingObjects<TemplateParameterList *>()[i] =
+        Reader.ReadTemplateParameterList(F, Record, Idx);
   D->NextFriend = ReadDeclID(Record, Idx);
   D->UnsupportedFriend = (Record[Idx++] != 0);
   D->FriendLoc = ReadSourceLocation(Record, Idx);
@@ -1887,6 +1861,10 @@ void ASTDeclReader::VisitClassTemplateDecl(ClassTemplateDecl *D) {
   }
 }
 
+void ASTDeclReader::VisitBuiltinTemplateDecl(BuiltinTemplateDecl *D) {
+  llvm_unreachable("BuiltinTemplates are not serialized");
+}
+
 /// TODO: Unify with ClassTemplateDecl version?
 ///       May require unifying ClassTemplateDecl and
 ///        VarTemplateDecl beyond TemplateDecl...
@@ -1933,7 +1911,8 @@ ASTDeclReader::VisitClassTemplateSpecializationDeclImpl(
   }
 
   SmallVector<TemplateArgument, 8> TemplArgs;
-  Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
+  Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx,
+                                  /*Canonicalize*/ true);
   D->TemplateArgs = TemplateArgumentList::CreateCopy(C, TemplArgs.data(), 
                                                      TemplArgs.size());
   D->PointOfInstantiation = ReadSourceLocation(Record, Idx);
@@ -2060,7 +2039,8 @@ ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
   }
 
   SmallVector<TemplateArgument, 8> TemplArgs;
-  Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx);
+  Reader.ReadTemplateArgumentList(TemplArgs, F, Record, Idx,
+                                  /*Canonicalize*/ true);
   D->TemplateArgs =
       TemplateArgumentList::CreateCopy(C, TemplArgs.data(), TemplArgs.size());
   D->PointOfInstantiation = ReadSourceLocation(Record, Idx);
@@ -2070,6 +2050,7 @@ ASTDeclReader::VisitVarTemplateSpecializationDeclImpl(
   if (writtenAsCanonicalDecl) {
     VarTemplateDecl *CanonPattern = ReadDeclAs<VarTemplateDecl>(Record, Idx);
     if (D->isCanonicalDecl()) { // It's kept in the folding set.
+      // FIXME: If it's already present, merge it.
       if (VarTemplatePartialSpecializationDecl *Partial =
               dyn_cast<VarTemplatePartialSpecializationDecl>(D)) {
         CanonPattern->getCommonPtr()->PartialSpecializations
@@ -2118,10 +2099,11 @@ void ASTDeclReader::VisitNonTypeTemplateParmDecl(NonTypeTemplateParmDecl *D) {
   D->setDepth(Record[Idx++]);
   D->setPosition(Record[Idx++]);
   if (D->isExpandedParameterPack()) {
-    void **Data = reinterpret_cast<void **>(D + 1);
+    auto TypesAndInfos =
+        D->getTrailingObjects<std::pair<QualType, TypeSourceInfo *>>();
     for (unsigned I = 0, N = D->getNumExpansionTypes(); I != N; ++I) {
-      Data[2*I] = Reader.readType(F, Record, Idx).getAsOpaquePtr();
-      Data[2*I + 1] = GetTypeSourceInfo(Record, Idx);
+      new (&TypesAndInfos[I].first) QualType(Reader.readType(F, Record, Idx));
+      TypesAndInfos[I].second = GetTypeSourceInfo(Record, Idx);
     }
   } else {
     // Rest of NonTypeTemplateParmDecl.
@@ -2137,7 +2119,8 @@ void ASTDeclReader::VisitTemplateTemplateParmDecl(TemplateTemplateParmDecl *D) {
   D->setDepth(Record[Idx++]);
   D->setPosition(Record[Idx++]);
   if (D->isExpandedParameterPack()) {
-    void **Data = reinterpret_cast<void **>(D + 1);
+    TemplateParameterList **Data =
+        D->getTrailingObjects<TemplateParameterList *>();
     for (unsigned I = 0, N = D->getNumExpansionTemplateParameters();
          I != N; ++I)
       Data[I] = Reader.ReadTemplateParameterList(F, Record, Idx);
@@ -2178,23 +2161,37 @@ ASTDeclReader::RedeclarableResult
 ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
   DeclID FirstDeclID = ReadDeclID(Record, Idx);
   Decl *MergeWith = nullptr;
+
   bool IsKeyDecl = ThisDeclID == FirstDeclID;
+  bool IsFirstLocalDecl = false;
+
+  uint64_t RedeclOffset = 0;
 
   // 0 indicates that this declaration was the only declaration of its entity,
   // and is used for space optimization.
   if (FirstDeclID == 0) {
     FirstDeclID = ThisDeclID;
     IsKeyDecl = true;
+    IsFirstLocalDecl = true;
   } else if (unsigned N = Record[Idx++]) {
-    IsKeyDecl = false;
+    // This declaration was the first local declaration, but may have imported
+    // other declarations.
+    IsKeyDecl = N == 1;
+    IsFirstLocalDecl = true;
 
     // We have some declarations that must be before us in our redeclaration
     // chain. Read them now, and remember that we ought to merge with one of
     // them.
     // FIXME: Provide a known merge target to the second and subsequent such
     // declaration.
-    for (unsigned I = 0; I != N; ++I)
+    for (unsigned I = 0; I != N - 1; ++I)
       MergeWith = ReadDecl(Record, Idx/*, MergeWith*/);
+
+    RedeclOffset = Record[Idx++];
+  } else {
+    // This declaration was not the first local declaration. Read the first
+    // local declaration now, to trigger the import of other redeclarations.
+    (void)ReadDecl(Record, Idx);
   }
 
   T *FirstDecl = cast_or_null<T>(Reader.GetDecl(FirstDeclID));
@@ -2206,14 +2203,17 @@ ASTDeclReader::VisitRedeclarable(Redeclarable<T> *D) {
     D->RedeclLink = Redeclarable<T>::PreviousDeclLink(FirstDecl);
     D->First = FirstDecl->getCanonicalDecl();
   }    
-  
-  // Note that this declaration has been deserialized.
-  Reader.RedeclsDeserialized.insert(static_cast<T *>(D));
-                             
-  // The result structure takes care to note that we need to load the 
-  // other declaration chains for this ID.
-  return RedeclarableResult(Reader, FirstDeclID, MergeWith,
-                            static_cast<T *>(D)->getKind(), IsKeyDecl);
+
+  T *DAsT = static_cast<T*>(D);
+
+  // Note that we need to load local redeclarations of this decl and build a
+  // decl chain for them. This must happen *after* we perform the preloading
+  // above; this ensures that the redeclaration chain is built in the correct
+  // order.
+  if (IsFirstLocalDecl)
+    Reader.PendingDeclChains.push_back(std::make_pair(DAsT, RedeclOffset));
+
+  return RedeclarableResult(FirstDeclID, MergeWith, IsKeyDecl);
 }
 
 /// \brief Attempts to merge the given declaration (D) with another declaration
@@ -2255,9 +2255,8 @@ void ASTDeclReader::mergeTemplatePattern(RedeclarableTemplateDecl *D,
                                          DeclID DsID, bool IsKeyDecl) {
   auto *DPattern = D->getTemplatedDecl();
   auto *ExistingPattern = Existing->getTemplatedDecl();
-  RedeclarableResult Result(Reader, DPattern->getCanonicalDecl()->getGlobalID(),
-                            /*MergeWith*/ExistingPattern, DPattern->getKind(),
-                            IsKeyDecl);
+  RedeclarableResult Result(DPattern->getCanonicalDecl()->getGlobalID(),
+                            /*MergeWith*/ ExistingPattern, IsKeyDecl);
 
   if (auto *DClass = dyn_cast<CXXRecordDecl>(DPattern)) {
     // Merge with any existing definition.
@@ -2323,11 +2322,8 @@ void ASTDeclReader::mergeRedeclarable(Redeclarable<T> *DBase, T *Existing,
           TemplatePatternID, Redecl.isKeyDecl());
 
     // If this declaration is a key declaration, make a note of that.
-    if (Redecl.isKeyDecl()) {
+    if (Redecl.isKeyDecl())
       Reader.KeyDecls[ExistingCanon].push_back(Redecl.getFirstID());
-      if (Reader.PendingDeclChainsKnown.insert(ExistingCanon).second)
-        Reader.PendingDeclChains.push_back(ExistingCanon);
-    }
   }
 }
 
@@ -2626,6 +2622,13 @@ static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
     return X->getASTContext().hasSameType(FDX->getType(), FDY->getType());
   }
 
+  // Indirect fields with the same target field match.
+  if (auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
+    auto *IFDY = cast<IndirectFieldDecl>(Y);
+    return IFDX->getAnonField()->getCanonicalDecl() ==
+           IFDY->getAnonField()->getCanonicalDecl();
+  }
+
   // Enumerators with the same name match.
   if (isa<EnumConstantDecl>(X))
     // FIXME: Also check the value is odr-equivalent.
@@ -2749,12 +2752,12 @@ static NamedDecl *getDeclForMerging(NamedDecl *Found,
   // declaration, then we want that inner declaration. Declarations from
   // AST files are handled via ImportedTypedefNamesForLinkage.
   if (Found->isFromASTFile())
-    return 0;
+    return nullptr;
 
   if (auto *TND = dyn_cast<TypedefNameDecl>(Found))
     return TND->getAnonDeclWithTypedefName();
 
-  return 0;
+  return nullptr;
 }
 
 NamedDecl *ASTDeclReader::getAnonymousDeclForMerging(ASTReader &Reader,
@@ -2924,6 +2927,7 @@ void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
   D->RedeclLink.setPrevious(cast<DeclT>(Previous));
   D->First = cast<DeclT>(Previous)->First;
 }
+
 namespace clang {
 template<>
 void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
@@ -2969,7 +2973,8 @@ void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader,
           std::make_pair(Canon, IsUnresolved ? PrevFD : FD));
   }
 }
-}
+} // end namespace clang
+
 void ASTDeclReader::attachPreviousDeclImpl(ASTReader &Reader, ...) {
   llvm_unreachable("attachPreviousDecl on non-redeclarable declaration");
 }
@@ -3319,37 +3324,13 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
   // If this declaration is also a declaration context, get the
   // offsets for its tables of lexical and visible declarations.
   if (DeclContext *DC = dyn_cast<DeclContext>(D)) {
-    // FIXME: This should really be
-    //     DeclContext *LookupDC = DC->getPrimaryContext();
-    // but that can walk the redeclaration chain, which might not work yet.
-    DeclContext *LookupDC = DC;
-    if (isa<NamespaceDecl>(DC))
-      LookupDC = DC->getPrimaryContext();
     std::pair<uint64_t, uint64_t> Offsets = Reader.VisitDeclContext(DC);
-    if (Offsets.first || Offsets.second) {
-      if (Offsets.first != 0)
-        DC->setHasExternalLexicalStorage(true);
-      if (Offsets.second != 0)
-        LookupDC->setHasExternalVisibleStorage(true);
-      if (ReadDeclContextStorage(*Loc.F, DeclsCursor, Offsets, 
-                                 Loc.F->DeclContextInfos[DC]))
-        return nullptr;
-    }
-
-    // Now add the pending visible updates for this decl context, if it has any.
-    DeclContextVisibleUpdatesPending::iterator I =
-        PendingVisibleUpdates.find(ID);
-    if (I != PendingVisibleUpdates.end()) {
-      // There are updates. This means the context has external visible
-      // storage, even if the original stored version didn't.
-      LookupDC->setHasExternalVisibleStorage(true);
-      for (const auto &Update : I->second) {
-        DeclContextInfo &Info = Update.second->DeclContextInfos[DC];
-        delete Info.NameLookupTableData;
-        Info.NameLookupTableData = Update.first;
-      }
-      PendingVisibleUpdates.erase(I);
-    }
+    if (Offsets.first &&
+        ReadLexicalDeclContextStorage(*Loc.F, DeclsCursor, Offsets.first, DC))
+      return nullptr;
+    if (Offsets.second &&
+        ReadVisibleDeclContextStorage(*Loc.F, DeclsCursor, Offsets.second, ID))
+      return nullptr;
   }
   assert(Idx == Record.size());
 
@@ -3372,17 +3353,32 @@ Decl *ASTReader::ReadDeclRecord(DeclID ID) {
 }
 
 void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
+  // Load the pending visible updates for this decl context, if it has any.
+  auto I = PendingVisibleUpdates.find(ID);
+  if (I != PendingVisibleUpdates.end()) {
+    auto VisibleUpdates = std::move(I->second);
+    PendingVisibleUpdates.erase(I);
+
+    auto *DC = cast<DeclContext>(D)->getPrimaryContext();
+    for (const PendingVisibleUpdate &Update : VisibleUpdates)
+      Lookups[DC].Table.add(
+          Update.Mod, Update.Data,
+          reader::ASTDeclContextNameLookupTrait(*this, *Update.Mod));
+    DC->setHasExternalVisibleStorage(true);
+  }
+
   // The declaration may have been modified by files later in the chain.
   // If this is the case, read the record containing the updates from each file
   // and pass it to ASTDeclReader to make the modifications.
   DeclUpdateOffsetsMap::iterator UpdI = DeclUpdateOffsets.find(ID);
   if (UpdI != DeclUpdateOffsets.end()) {
-    FileOffsetsTy &UpdateOffsets = UpdI->second;
+    auto UpdateOffsets = std::move(UpdI->second);
+    DeclUpdateOffsets.erase(UpdI);
+
     bool WasInteresting = isConsumerInterestedIn(D, false);
-    for (FileOffsetsTy::iterator
-         I = UpdateOffsets.begin(), E = UpdateOffsets.end(); I != E; ++I) {
-      ModuleFile *F = I->first;
-      uint64_t Offset = I->second;
+    for (auto &FileAndOffset : UpdateOffsets) {
+      ModuleFile *F = FileAndOffset.first;
+      uint64_t Offset = FileAndOffset.second;
       llvm::BitstreamCursor &Cursor = F->DeclsCursor;
       SavedStreamPosition SavedPosition(Cursor);
       Cursor.JumpToBit(Offset);
@@ -3407,154 +3403,42 @@ void ASTReader::loadDeclUpdateRecords(serialization::DeclID ID, Decl *D) {
   }
 }
 
-namespace {
-  /// \brief Module visitor class that finds all of the redeclarations of a
-  /// redeclarable declaration.
-  class RedeclChainVisitor {
-    ASTReader &Reader;
-    SmallVectorImpl<DeclID> &SearchDecls;
-    llvm::SmallPtrSetImpl<Decl *> &Deserialized;
-    GlobalDeclID CanonID;
-    SmallVector<Decl *, 4> Chain;
+void ASTReader::loadPendingDeclChain(Decl *FirstLocal, uint64_t LocalOffset) {
+  // Attach FirstLocal to the end of the decl chain.
+  Decl *CanonDecl = FirstLocal->getCanonicalDecl();
+  if (FirstLocal != CanonDecl) {
+    Decl *PrevMostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl);
+    ASTDeclReader::attachPreviousDecl(
+        *this, FirstLocal, PrevMostRecent ? PrevMostRecent : CanonDecl,
+        CanonDecl);
+  }
 
-  public:
-    RedeclChainVisitor(ASTReader &Reader, SmallVectorImpl<DeclID> &SearchDecls,
-                       llvm::SmallPtrSetImpl<Decl *> &Deserialized,
-                       GlobalDeclID CanonID)
-      : Reader(Reader), SearchDecls(SearchDecls), Deserialized(Deserialized),
-        CanonID(CanonID) {
-      // Ensure that the canonical ID goes at the start of the chain.
-      addToChain(Reader.GetDecl(CanonID));
-    }
-
-    static ModuleManager::DFSPreorderControl
-    visitPreorder(ModuleFile &M, void *UserData) {
-      return static_cast<RedeclChainVisitor *>(UserData)->visitPreorder(M);
-    }
-
-    static bool visitPostorder(ModuleFile &M, void *UserData) {
-      return static_cast<RedeclChainVisitor *>(UserData)->visitPostorder(M);
-    }
-
-    void addToChain(Decl *D) {
-      if (!D)
-        return;
-
-      if (Deserialized.erase(D))
-        Chain.push_back(D);
-    }
-
-    void searchForID(ModuleFile &M, GlobalDeclID GlobalID) {
-      // Map global ID of the first declaration down to the local ID
-      // used in this module file.
-      DeclID ID = Reader.mapGlobalIDToModuleFileGlobalID(M, GlobalID);
-      if (!ID)
-        return;
-
-      // If the search decl was from this module, add it to the chain before any
-      // of its redeclarations in this module or users of it, and after any from
-      // imported modules.
-      if (CanonID != GlobalID && Reader.isDeclIDFromModule(GlobalID, M))
-        addToChain(Reader.GetDecl(GlobalID));
-
-      // Perform a binary search to find the local redeclarations for this
-      // declaration (if any).
-      const LocalRedeclarationsInfo Compare = { ID, 0 };
-      const LocalRedeclarationsInfo *Result
-        = std::lower_bound(M.RedeclarationsMap,
-                           M.RedeclarationsMap + M.LocalNumRedeclarationsInMap, 
-                           Compare);
-      if (Result == M.RedeclarationsMap + M.LocalNumRedeclarationsInMap ||
-          Result->FirstID != ID)
-        return;
-
-      // Dig out all of the redeclarations.
-      unsigned Offset = Result->Offset;
-      unsigned N = M.RedeclarationChains[Offset];
-      M.RedeclarationChains[Offset++] = 0; // Don't try to deserialize again
-      for (unsigned I = 0; I != N; ++I)
-        addToChain(Reader.GetLocalDecl(M, M.RedeclarationChains[Offset++]));
-    }
-
-    bool needsToVisitImports(ModuleFile &M, GlobalDeclID GlobalID) {
-      DeclID ID = Reader.mapGlobalIDToModuleFileGlobalID(M, GlobalID);
-      if (!ID)
-        return false;
-
-      const LocalRedeclarationsInfo Compare = {ID, 0};
-      const LocalRedeclarationsInfo *Result = std::lower_bound(
-          M.RedeclarationsMap,
-          M.RedeclarationsMap + M.LocalNumRedeclarationsInMap, Compare);
-      if (Result == M.RedeclarationsMap + M.LocalNumRedeclarationsInMap ||
-          Result->FirstID != ID) {
-        return true;
-      }
-      unsigned Offset = Result->Offset;
-      unsigned N = M.RedeclarationChains[Offset];
-      // We don't need to visit a module or any of its imports if we've already
-      // deserialized the redecls from this module.
-      return N != 0;
-    }
-
-    ModuleManager::DFSPreorderControl visitPreorder(ModuleFile &M) {
-      for (unsigned I = 0, N = SearchDecls.size(); I != N; ++I) {
-        if (needsToVisitImports(M, SearchDecls[I]))
-          return ModuleManager::Continue;
-      }
-      return ModuleManager::SkipImports;
-    }
-
-    bool visitPostorder(ModuleFile &M) {
-      // Visit each of the declarations.
-      for (unsigned I = 0, N = SearchDecls.size(); I != N; ++I)
-        searchForID(M, SearchDecls[I]);
-      return false;
-    }
-    
-    ArrayRef<Decl *> getChain() const {
-      return Chain;
-    }
-  };
-}
-
-void ASTReader::loadPendingDeclChain(Decl *CanonDecl) {
-  // The decl might have been merged into something else after being added to
-  // our list. If it was, just skip it.
-  if (!CanonDecl->isCanonicalDecl())
+  if (!LocalOffset) {
+    ASTDeclReader::attachLatestDecl(CanonDecl, FirstLocal);
     return;
+  }
 
-  // Determine the set of declaration IDs we'll be searching for.
-  SmallVector<DeclID, 16> SearchDecls;
-  GlobalDeclID CanonID = CanonDecl->getGlobalID();
-  if (CanonID)
-    SearchDecls.push_back(CanonDecl->getGlobalID()); // Always first.
-  KeyDeclsMap::iterator KeyPos = KeyDecls.find(CanonDecl);
-  if (KeyPos != KeyDecls.end())
-    SearchDecls.append(KeyPos->second.begin(), KeyPos->second.end());
+  // Load the list of other redeclarations from this module file.
+  ModuleFile *M = getOwningModuleFile(FirstLocal);
+  assert(M && "imported decl from no module file");
 
-  // Build up the list of redeclarations.
-  RedeclChainVisitor Visitor(*this, SearchDecls, RedeclsDeserialized, CanonID);
-  ModuleMgr.visitDepthFirst(&RedeclChainVisitor::visitPreorder,
-                            &RedeclChainVisitor::visitPostorder, &Visitor);
+  llvm::BitstreamCursor &Cursor = M->DeclsCursor;
+  SavedStreamPosition SavedPosition(Cursor);
+  Cursor.JumpToBit(LocalOffset);
 
-  // Retrieve the chains.
-  ArrayRef<Decl *> Chain = Visitor.getChain();
-  if (Chain.empty() || (Chain.size() == 1 && Chain[0] == CanonDecl))
-    return;
+  RecordData Record;
+  unsigned Code = Cursor.ReadCode();
+  unsigned RecCode = Cursor.readRecord(Code, Record);
+  (void)RecCode;
+  assert(RecCode == LOCAL_REDECLARATIONS && "expected LOCAL_REDECLARATIONS record!");
 
-  // Hook up the chains.
-  //
-  // FIXME: We have three different dispatches on decl kind here; maybe
+  // FIXME: We have several different dispatches on decl kind here; maybe
   // we should instead generate one loop per kind and dispatch up-front?
-  Decl *MostRecent = ASTDeclReader::getMostRecentDecl(CanonDecl);
-  if (!MostRecent)
-    MostRecent = CanonDecl;
-  for (unsigned I = 0, N = Chain.size(); I != N; ++I) {
-    if (Chain[I] == CanonDecl)
-      continue;
-
-    ASTDeclReader::attachPreviousDecl(*this, Chain[I], MostRecent, CanonDecl);
-    MostRecent = Chain[I];
+  Decl *MostRecent = FirstLocal;
+  for (unsigned I = 0, N = Record.size(); I != N; ++I) {
+    auto *D = GetLocalDecl(*M, Record[N - I - 1]);
+    ASTDeclReader::attachPreviousDecl(*this, D, MostRecent, CanonDecl);
+    MostRecent = D;
   }
   ASTDeclReader::attachLatestDecl(CanonDecl, MostRecent);
 }
@@ -3630,11 +3514,7 @@ namespace {
       }
     }
 
-    static bool visit(ModuleFile &M, void *UserData) {
-      return static_cast<ObjCCategoriesVisitor *>(UserData)->visit(M);
-    }
-
-    bool visit(ModuleFile &M) {
+    bool operator()(ModuleFile &M) {
       // If we've loaded all of the category information we care about from
       // this module file, we're done.
       if (M.Generation <= PreviousGeneration)
@@ -3672,14 +3552,14 @@ namespace {
       return true;
     }
   };
-}
+} // end anonymous namespace
 
 void ASTReader::loadObjCCategories(serialization::GlobalDeclID ID,
                                    ObjCInterfaceDecl *D,
                                    unsigned PreviousGeneration) {
   ObjCCategoriesVisitor Visitor(*this, ID, D, CategoriesDeserialized,
                                 PreviousGeneration);
-  ModuleMgr.visit(ObjCCategoriesVisitor::visit, &Visitor);
+  ModuleMgr.visit(Visitor);
 }
 
 template<typename DeclT, typename Fn>
@@ -3716,17 +3596,6 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       // FIXME: We should call addHiddenDecl instead, to add the member
       // to its DeclContext.
       RD->addedMember(MD);
-
-      // If we've added a new special member to a class definition that is not
-      // the canonical definition, then we need special member lookups in the
-      // canonical definition to also look into our class.
-      auto *DD = RD->DefinitionData.getNotUpdated();
-      if (DD && DD->Definition != RD) {
-        auto &Merged = Reader.MergedLookups[DD->Definition];
-        // FIXME: Avoid the linear-time scan here.
-        if (std::find(Merged.begin(), Merged.end(), RD) == Merged.end())
-          Merged.push_back(RD);
-      }
       break;
     }
 
@@ -3798,10 +3667,8 @@ void ASTDeclReader::UpdateDecl(Decl *D, ModuleFile &ModuleFile,
       // Visible update is handled separately.
       uint64_t LexicalOffset = Record[Idx++];
       if (!HadRealDefinition && LexicalOffset) {
-        RD->setHasExternalLexicalStorage(true);
-        Reader.ReadDeclContextStorage(ModuleFile, ModuleFile.DeclsCursor,
-                                      std::make_pair(LexicalOffset, 0),
-                                      ModuleFile.DeclContextInfos[RD]);
+        Reader.ReadLexicalDeclContextStorage(ModuleFile, ModuleFile.DeclsCursor,
+                                             LexicalOffset, RD);
         Reader.PendingFakeDefinitionData.erase(OldDD);
       }
 
diff --git a/lib/Serialization/ASTReaderInternals.h b/lib/Serialization/ASTReaderInternals.h
index 5b1c4f4963e4..d392364a971b 100644
--- a/lib/Serialization/ASTReaderInternals.h
+++ b/lib/Serialization/ASTReaderInternals.h
@@ -15,8 +15,12 @@
 
 #include "clang/AST/DeclarationName.h"
 #include "clang/Serialization/ASTBitCodes.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include "MultiOnDiskHashTable.h"
 #include <utility>
 
 namespace clang {
@@ -39,45 +43,86 @@ class ASTDeclContextNameLookupTrait {
   ModuleFile &F;
   
 public:
-  /// \brief Pair of begin/end iterators for DeclIDs.
-  ///
-  /// Note that these declaration IDs are local to the module that contains this
-  /// particular lookup t
-  typedef llvm::support::ulittle32_t LE32DeclID;
-  typedef std::pair<LE32DeclID *, LE32DeclID *> data_type;
+  // Maximum number of lookup tables we allow before condensing the tables.
+  static const int MaxTables = 4;
+
+  /// The lookup result is a list of global declaration IDs.
+  typedef llvm::SmallVector<DeclID, 4> data_type;
+  struct data_type_builder {
+    data_type &Data;
+    llvm::DenseSet<DeclID> Found;
+
+    data_type_builder(data_type &D) : Data(D) {}
+    void insert(DeclID ID) {
+      // Just use a linear scan unless we have more than a few IDs.
+      if (Found.empty() && !Data.empty()) {
+        if (Data.size() <= 4) {
+          for (auto I : Found)
+            if (I == ID)
+              return;
+          Data.push_back(ID);
+          return;
+        }
+
+        // Switch to tracking found IDs in the set.
+        Found.insert(Data.begin(), Data.end());
+      }
+
+      if (Found.insert(ID).second)
+        Data.push_back(ID);
+    }
+  };
   typedef unsigned hash_value_type;
   typedef unsigned offset_type;
-
-  /// \brief Special internal key for declaration names.
-  /// The hash table creates keys for comparison; we do not create
-  /// a DeclarationName for the internal key to avoid deserializing types.
-  struct DeclNameKey {
-    DeclarationName::NameKind Kind;
-    uint64_t Data;
-    DeclNameKey() : Kind((DeclarationName::NameKind)0), Data(0) { }
-  };
+  typedef ModuleFile *file_type;
 
   typedef DeclarationName external_key_type;
-  typedef DeclNameKey internal_key_type;
+  typedef DeclarationNameKey internal_key_type;
 
   explicit ASTDeclContextNameLookupTrait(ASTReader &Reader, ModuleFile &F)
     : Reader(Reader), F(F) { }
 
-  static bool EqualKey(const internal_key_type& a,
-                       const internal_key_type& b) {
-    return a.Kind == b.Kind && a.Data == b.Data;
+  static bool EqualKey(const internal_key_type &a, const internal_key_type &b) {
+    return a == b;
   }
 
-  static hash_value_type ComputeHash(const DeclNameKey &Key);
-  static internal_key_type GetInternalKey(const external_key_type& Name);
+  static hash_value_type ComputeHash(const internal_key_type &Key) {
+    return Key.getHash();
+  }
+  static internal_key_type GetInternalKey(const external_key_type &Name) {
+    return Name;
+  }
 
   static std::pair<unsigned, unsigned>
-  ReadKeyDataLength(const unsigned char*& d);
+  ReadKeyDataLength(const unsigned char *&d);
 
-  internal_key_type ReadKey(const unsigned char* d, unsigned);
+  internal_key_type ReadKey(const unsigned char *d, unsigned);
 
-  data_type ReadData(internal_key_type, const unsigned char* d,
-                     unsigned DataLen);
+  void ReadDataInto(internal_key_type, const unsigned char *d,
+                    unsigned DataLen, data_type_builder &Val);
+
+  static void MergeDataInto(const data_type &From, data_type_builder &To) {
+    To.Data.reserve(To.Data.size() + From.size());
+    for (DeclID ID : From)
+      To.insert(ID);
+  }
+
+  file_type ReadFileRef(const unsigned char *&d);
+};
+
+struct DeclContextLookupTable {
+  MultiOnDiskHashTable<ASTDeclContextNameLookupTrait> Table;
+
+  // These look redundant, but don't remove them -- they work around MSVC 2013's
+  // inability to synthesize move operations. Without them, the
+  // MultiOnDiskHashTable will be copied (despite being move-only!).
+  DeclContextLookupTable() : Table() {}
+  DeclContextLookupTable(DeclContextLookupTable &&O)
+      : Table(std::move(O.Table)) {}
+  DeclContextLookupTable &operator=(DeclContextLookupTable &&O) {
+    Table = std::move(O.Table);
+    return *this;
+  }
 };
 
 /// \brief Base class for the trait describing the on-disk hash table for the
@@ -137,6 +182,8 @@ public:
                      const unsigned char* d,
                      unsigned DataLen);
   
+  IdentID ReadIdentifierID(const unsigned char *d);
+
   ASTReader &getReader() const { return Reader; }
 };
   
@@ -226,7 +273,7 @@ public:
   : Reader(Reader), M(M), HS(HS), FrameworkStrings(FrameworkStrings) { }
   
   static hash_value_type ComputeHash(internal_key_ref ikey);
-  static internal_key_type GetInternalKey(const FileEntry *FE);
+  internal_key_type GetInternalKey(const FileEntry *FE);
   bool EqualKey(internal_key_ref a, internal_key_ref b);
   
   static std::pair<unsigned, unsigned>
diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp
index 76e8334695f7..4082dec48c0a 100644
--- a/lib/Serialization/ASTReaderStmt.cpp
+++ b/lib/Serialization/ASTReaderStmt.cpp
@@ -93,6 +93,7 @@ namespace clang {
 
     /// \brief Read and initialize a ExplicitTemplateArgumentList structure.
     void ReadTemplateKWAndArgsInfo(ASTTemplateKWAndArgsInfo &Args,
+                                   TemplateArgumentLoc *ArgsLocArray,
                                    unsigned NumTemplateArgs);
     /// \brief Read and initialize a ExplicitTemplateArgumentList structure.
     void ReadExplicitTemplateArgumentList(ASTTemplateArgumentListInfo &ArgList,
@@ -105,9 +106,9 @@ namespace clang {
   };
 }
 
-void ASTStmtReader::
-ReadTemplateKWAndArgsInfo(ASTTemplateKWAndArgsInfo &Args,
-                          unsigned NumTemplateArgs) {
+void ASTStmtReader::ReadTemplateKWAndArgsInfo(ASTTemplateKWAndArgsInfo &Args,
+                                              TemplateArgumentLoc *ArgsLocArray,
+                                              unsigned NumTemplateArgs) {
   SourceLocation TemplateKWLoc = ReadSourceLocation(Record, Idx);
   TemplateArgumentListInfo ArgInfo;
   ArgInfo.setLAngleLoc(ReadSourceLocation(Record, Idx));
@@ -115,7 +116,7 @@ ReadTemplateKWAndArgsInfo(ASTTemplateKWAndArgsInfo &Args,
   for (unsigned i = 0; i != NumTemplateArgs; ++i)
     ArgInfo.addArgument(
         Reader.ReadTemplateArgumentLoc(F, Record, Idx));
-  Args.initializeFrom(TemplateKWLoc, ArgInfo);
+  Args.initializeFrom(TemplateKWLoc, ArgInfo, ArgsLocArray);
 }
 
 void ASTStmtReader::VisitStmt(Stmt *S) {
@@ -134,7 +135,7 @@ void ASTStmtReader::VisitCompoundStmt(CompoundStmt *S) {
   unsigned NumStmts = Record[Idx++];
   while (NumStmts--)
     Stmts.push_back(Reader.ReadSubStmt());
-  S->setStmts(Reader.getContext(), Stmts.data(), Stmts.size());
+  S->setStmts(Reader.getContext(), Stmts);
   S->LBraceLoc = ReadSourceLocation(Record, Idx);
   S->RBraceLoc = ReadSourceLocation(Record, Idx);
 }
@@ -381,6 +382,26 @@ void ASTStmtReader::VisitMSAsmStmt(MSAsmStmt *S) {
                 Constraints, Exprs, Clobbers);
 }
 
+void ASTStmtReader::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtReader::VisitCoreturnStmt(CoreturnStmt *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtReader::VisitCoawaitExpr(CoawaitExpr *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtReader::VisitCoyieldExpr(CoyieldExpr *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
 void ASTStmtReader::VisitCapturedStmt(CapturedStmt *S) {
   VisitStmt(S);
   ++Idx;
@@ -439,15 +460,17 @@ void ASTStmtReader::VisitDeclRefExpr(DeclRefExpr *E) {
     NumTemplateArgs = Record[Idx++];
 
   if (E->hasQualifier())
-    E->getInternalQualifierLoc()
-      = Reader.ReadNestedNameSpecifierLoc(F, Record, Idx);
+    new (E->getTrailingObjects<NestedNameSpecifierLoc>())
+        NestedNameSpecifierLoc(
+            Reader.ReadNestedNameSpecifierLoc(F, Record, Idx));
 
   if (E->hasFoundDecl())
-    E->getInternalFoundDecl() = ReadDeclAs<NamedDecl>(Record, Idx);
+    *E->getTrailingObjects<NamedDecl *>() = ReadDeclAs<NamedDecl>(Record, Idx);
 
   if (E->hasTemplateKWAndArgsInfo())
-    ReadTemplateKWAndArgsInfo(*E->getTemplateKWAndArgsInfo(),
-                              NumTemplateArgs);
+    ReadTemplateKWAndArgsInfo(
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+        E->getTrailingObjects<TemplateArgumentLoc>(), NumTemplateArgs);
 
   E->setDecl(ReadDeclAs<ValueDecl>(Record, Idx));
   E->setLocation(ReadSourceLocation(Record, Idx));
@@ -527,7 +550,6 @@ void ASTStmtReader::VisitUnaryOperator(UnaryOperator *E) {
 }
 
 void ASTStmtReader::VisitOffsetOfExpr(OffsetOfExpr *E) {
-  typedef OffsetOfExpr::OffsetOfNode Node;
   VisitExpr(E);
   assert(E->getNumComponents() == Record[Idx]);
   ++Idx;
@@ -537,29 +559,29 @@ void ASTStmtReader::VisitOffsetOfExpr(OffsetOfExpr *E) {
   E->setRParenLoc(ReadSourceLocation(Record, Idx));
   E->setTypeSourceInfo(GetTypeSourceInfo(Record, Idx));
   for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
-    Node::Kind Kind = static_cast<Node::Kind>(Record[Idx++]);
+    OffsetOfNode::Kind Kind = static_cast<OffsetOfNode::Kind>(Record[Idx++]);
     SourceLocation Start = ReadSourceLocation(Record, Idx);
     SourceLocation End = ReadSourceLocation(Record, Idx);
     switch (Kind) {
-    case Node::Array:
-      E->setComponent(I, Node(Start, Record[Idx++], End));
-      break;
-        
-    case Node::Field:
-      E->setComponent(I, Node(Start, ReadDeclAs<FieldDecl>(Record, Idx), End));
+    case OffsetOfNode::Array:
+      E->setComponent(I, OffsetOfNode(Start, Record[Idx++], End));
       break;
 
-    case Node::Identifier:
-      E->setComponent(I, 
-                      Node(Start, 
-                           Reader.GetIdentifierInfo(F, Record, Idx),
-                           End));
+    case OffsetOfNode::Field:
+      E->setComponent(
+          I, OffsetOfNode(Start, ReadDeclAs<FieldDecl>(Record, Idx), End));
       break;
-        
-    case Node::Base: {
+
+    case OffsetOfNode::Identifier:
+      E->setComponent(
+          I,
+          OffsetOfNode(Start, Reader.GetIdentifierInfo(F, Record, Idx), End));
+      break;
+
+    case OffsetOfNode::Base: {
       CXXBaseSpecifier *Base = new (Reader.getContext()) CXXBaseSpecifier();
       *Base = Reader.ReadCXXBaseSpecifier(F, Record, Idx);
-      E->setComponent(I, Node(Base));
+      E->setComponent(I, OffsetOfNode(Base));
       break;
     }
     }
@@ -589,6 +611,15 @@ void ASTStmtReader::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   E->setRBracketLoc(ReadSourceLocation(Record, Idx));
 }
 
+void ASTStmtReader::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
+  VisitExpr(E);
+  E->setBase(Reader.ReadSubExpr());
+  E->setLowerBound(Reader.ReadSubExpr());
+  E->setLength(Reader.ReadSubExpr());
+  E->setColonLoc(ReadSourceLocation(Record, Idx));
+  E->setRBracketLoc(ReadSourceLocation(Record, Idx));
+}
+
 void ASTStmtReader::VisitCallExpr(CallExpr *E) {
   VisitExpr(E);
   E->setNumArgs(Reader.getContext(), Record[Idx++]);
@@ -821,6 +852,7 @@ void ASTStmtReader::VisitVAArgExpr(VAArgExpr *E) {
   E->setWrittenTypeInfo(GetTypeSourceInfo(Record, Idx));
   E->setBuiltinLoc(ReadSourceLocation(Record, Idx));
   E->setRParenLoc(ReadSourceLocation(Record, Idx));
+  E->setIsMicrosoftABI(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitAddrLabelExpr(AddrLabelExpr *E) {
@@ -1168,9 +1200,10 @@ void ASTStmtReader::VisitCXXTryStmt(CXXTryStmt *S) {
 
 void ASTStmtReader::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   VisitStmt(S);
-  S->setForLoc(ReadSourceLocation(Record, Idx));
-  S->setColonLoc(ReadSourceLocation(Record, Idx));
-  S->setRParenLoc(ReadSourceLocation(Record, Idx));
+  S->ForLoc = ReadSourceLocation(Record, Idx);
+  S->CoawaitLoc = ReadSourceLocation(Record, Idx);
+  S->ColonLoc = ReadSourceLocation(Record, Idx);
+  S->RParenLoc = ReadSourceLocation(Record, Idx);
   S->setRangeStmt(Reader.ReadSubStmt());
   S->setBeginEndStmt(Reader.ReadSubStmt());
   S->setCond(Reader.ReadSubExpr());
@@ -1422,8 +1455,10 @@ ASTStmtReader::VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E){
   VisitExpr(E);
 
   if (Record[Idx++]) // HasTemplateKWAndArgsInfo
-    ReadTemplateKWAndArgsInfo(*E->getTemplateKWAndArgsInfo(),
-                              /*NumTemplateArgs=*/Record[Idx++]);
+    ReadTemplateKWAndArgsInfo(
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+        E->getTrailingObjects<TemplateArgumentLoc>(),
+        /*NumTemplateArgs=*/Record[Idx++]);
 
   E->Base = Reader.ReadSubExpr();
   E->BaseType = Reader.readType(F, Record, Idx);
@@ -1439,8 +1474,10 @@ ASTStmtReader::VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
   VisitExpr(E);
 
   if (Record[Idx++]) // HasTemplateKWAndArgsInfo
-    ReadTemplateKWAndArgsInfo(*E->getTemplateKWAndArgsInfo(),
-                              /*NumTemplateArgs=*/Record[Idx++]);
+    ReadTemplateKWAndArgsInfo(
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+        E->getTrailingObjects<TemplateArgumentLoc>(),
+        /*NumTemplateArgs=*/Record[Idx++]);
 
   E->QualifierLoc = Reader.ReadNestedNameSpecifierLoc(F, Record, Idx);
   ReadDeclarationNameInfo(E->NameInfo, Record, Idx);
@@ -1462,7 +1499,8 @@ void ASTStmtReader::VisitOverloadExpr(OverloadExpr *E) {
   VisitExpr(E);
 
   if (Record[Idx++]) // HasTemplateKWAndArgsInfo
-    ReadTemplateKWAndArgsInfo(*E->getTemplateKWAndArgsInfo(),
+    ReadTemplateKWAndArgsInfo(*E->getTrailingASTTemplateKWAndArgsInfo(),
+                              E->getTrailingTemplateArgumentLoc(),
                               /*NumTemplateArgs=*/Record[Idx++]);
 
   unsigned NumDecls = Record[Idx++];
@@ -1544,11 +1582,20 @@ void ASTStmtReader::VisitPackExpansionExpr(PackExpansionExpr *E) {
 
 void ASTStmtReader::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
   VisitExpr(E);
+  unsigned NumPartialArgs = Record[Idx++];
   E->OperatorLoc = ReadSourceLocation(Record, Idx);
   E->PackLoc = ReadSourceLocation(Record, Idx);
   E->RParenLoc = ReadSourceLocation(Record, Idx);
-  E->Length = Record[Idx++];
-  E->Pack = ReadDeclAs<NamedDecl>(Record, Idx);
+  E->Pack = Reader.ReadDeclAs<NamedDecl>(F, Record, Idx);
+  if (E->isPartiallySubstituted()) {
+    assert(E->Length == NumPartialArgs);
+    for (auto *I = reinterpret_cast<TemplateArgument *>(E + 1),
+              *E = I + NumPartialArgs;
+         I != E; ++I)
+      new (I) TemplateArgument(Reader.ReadTemplateArgument(F, Record, Idx));
+  } else if (!E->isValueDependent()) {
+    E->Length = Record[Idx++];
+  }
 }
 
 void ASTStmtReader::VisitSubstNonTypeTemplateParmExpr(
@@ -1622,6 +1669,13 @@ void ASTStmtReader::VisitMSPropertyRefExpr(MSPropertyRefExpr *E) {
   E->TheDecl = ReadDeclAs<MSPropertyDecl>(Record, Idx);
 }
 
+void ASTStmtReader::VisitMSPropertySubscriptExpr(MSPropertySubscriptExpr *E) {
+  VisitExpr(E);
+  E->setBase(Reader.ReadSubExpr());
+  E->setIdx(Reader.ReadSubExpr());
+  E->setRBracketLoc(ReadSourceLocation(Record, Idx));
+}
+
 void ASTStmtReader::VisitCXXUuidofExpr(CXXUuidofExpr *E) {
   VisitExpr(E);
   E->setSourceRange(ReadSourceRange(Record, Idx));
@@ -1716,6 +1770,9 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_safelen:
     C = new (Context) OMPSafelenClause();
     break;
+  case OMPC_simdlen:
+    C = new (Context) OMPSimdlenClause();
+    break;
   case OMPC_collapse:
     C = new (Context) OMPCollapseClause();
     break;
@@ -1755,6 +1812,15 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_seq_cst:
     C = new (Context) OMPSeqCstClause();
     break;
+  case OMPC_threads:
+    C = new (Context) OMPThreadsClause();
+    break;
+  case OMPC_simd:
+    C = new (Context) OMPSIMDClause();
+    break;
+  case OMPC_nogroup:
+    C = new (Context) OMPNogroupClause();
+    break;
   case OMPC_private:
     C = OMPPrivateClause::CreateEmpty(Context, Record[Idx++]);
     break;
@@ -1788,6 +1854,30 @@ OMPClause *OMPClauseReader::readClause() {
   case OMPC_depend:
     C = OMPDependClause::CreateEmpty(Context, Record[Idx++]);
     break;
+  case OMPC_device:
+    C = new (Context) OMPDeviceClause();
+    break;
+  case OMPC_map:
+    C = OMPMapClause::CreateEmpty(Context, Record[Idx++]);
+    break;
+  case OMPC_num_teams:
+    C = new (Context) OMPNumTeamsClause();
+    break;
+  case OMPC_thread_limit:
+    C = new (Context) OMPThreadLimitClause();
+    break;
+  case OMPC_priority:
+    C = new (Context) OMPPriorityClause();
+    break;
+  case OMPC_grainsize:
+    C = new (Context) OMPGrainsizeClause();
+    break;
+  case OMPC_num_tasks:
+    C = new (Context) OMPNumTasksClause();
+    break;
+  case OMPC_hint:
+    C = new (Context) OMPHintClause();
+    break;
   }
   Visit(C);
   C->setLocStart(Reader->ReadSourceLocation(Record, Idx));
@@ -1797,6 +1887,9 @@ OMPClause *OMPClauseReader::readClause() {
 }
 
 void OMPClauseReader::VisitOMPIfClause(OMPIfClause *C) {
+  C->setNameModifier(static_cast<OpenMPDirectiveKind>(Record[Idx++]));
+  C->setNameModifierLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setCondition(Reader->Reader.ReadSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
 }
@@ -1816,6 +1909,11 @@ void OMPClauseReader::VisitOMPSafelenClause(OMPSafelenClause *C) {
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
 }
 
+void OMPClauseReader::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
+  C->setSimdlen(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
 void OMPClauseReader::VisitOMPCollapseClause(OMPCollapseClause *C) {
   C->setNumForLoops(Reader->Reader.ReadSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
@@ -1838,14 +1936,23 @@ void OMPClauseReader::VisitOMPProcBindClause(OMPProcBindClause *C) {
 void OMPClauseReader::VisitOMPScheduleClause(OMPScheduleClause *C) {
   C->setScheduleKind(
        static_cast<OpenMPScheduleClauseKind>(Record[Idx++]));
+  C->setFirstScheduleModifier(
+      static_cast<OpenMPScheduleClauseModifier>(Record[Idx++]));
+  C->setSecondScheduleModifier(
+      static_cast<OpenMPScheduleClauseModifier>(Record[Idx++]));
   C->setChunkSize(Reader->Reader.ReadSubExpr());
   C->setHelperChunkSize(Reader->Reader.ReadSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setFirstScheduleModifierLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setSecondScheduleModifierLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setScheduleKindLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setCommaLoc(Reader->ReadSourceLocation(Record, Idx));
 }
 
-void OMPClauseReader::VisitOMPOrderedClause(OMPOrderedClause *) {}
+void OMPClauseReader::VisitOMPOrderedClause(OMPOrderedClause *C) {
+  C->setNumForLoops(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
 
 void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *) {}
 
@@ -1863,6 +1970,12 @@ void OMPClauseReader::VisitOMPCaptureClause(OMPCaptureClause *) {}
 
 void OMPClauseReader::VisitOMPSeqCstClause(OMPSeqCstClause *) {}
 
+void OMPClauseReader::VisitOMPThreadsClause(OMPThreadsClause *) {}
+
+void OMPClauseReader::VisitOMPSIMDClause(OMPSIMDClause *) {}
+
+void OMPClauseReader::VisitOMPNogroupClause(OMPNogroupClause *) {}
+
 void OMPClauseReader::VisitOMPPrivateClause(OMPPrivateClause *C) {
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   unsigned NumVars = C->varlist_size();
@@ -1948,6 +2061,10 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
     Vars.push_back(Reader->Reader.ReadSubExpr());
   C->setVarRefs(Vars);
   Vars.clear();
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setPrivates(Vars);
+  Vars.clear();
   for (unsigned i = 0; i != NumVars; ++i)
     Vars.push_back(Reader->Reader.ReadSubExpr());
   C->setLHSExprs(Vars);
@@ -1964,6 +2081,8 @@ void OMPClauseReader::VisitOMPReductionClause(OMPReductionClause *C) {
 void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
   C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
   C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setModifier(static_cast<OpenMPLinearClauseKind>(Record[Idx++]));
+  C->setModifierLoc(Reader->ReadSourceLocation(Record, Idx));
   unsigned NumVars = C->varlist_size();
   SmallVector<Expr *, 16> Vars;
   Vars.reserve(NumVars);
@@ -1971,6 +2090,10 @@ void OMPClauseReader::VisitOMPLinearClause(OMPLinearClause *C) {
     Vars.push_back(Reader->Reader.ReadSubExpr());
   C->setVarRefs(Vars);
   Vars.clear();
+  for (unsigned i = 0; i != NumVars; ++i)
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  C->setPrivates(Vars);
+  Vars.clear();
   for (unsigned i = 0; i != NumVars; ++i)
     Vars.push_back(Reader->Reader.ReadSubExpr());
   C->setInits(Vars);
@@ -2065,6 +2188,58 @@ void OMPClauseReader::VisitOMPDependClause(OMPDependClause *C) {
   C->setVarRefs(Vars);
 }
 
+void OMPClauseReader::VisitOMPDeviceClause(OMPDeviceClause *C) {
+  C->setDevice(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPMapClause(OMPMapClause *C) {
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setMapTypeModifier(
+     static_cast<OpenMPMapClauseKind>(Record[Idx++]));
+  C->setMapType(
+     static_cast<OpenMPMapClauseKind>(Record[Idx++]));
+  C->setMapLoc(Reader->ReadSourceLocation(Record, Idx));
+  C->setColonLoc(Reader->ReadSourceLocation(Record, Idx));
+  auto NumVars = C->varlist_size();
+  SmallVector<Expr *, 16> Vars;
+  Vars.reserve(NumVars);
+  for (unsigned i = 0; i != NumVars; ++i) {
+    Vars.push_back(Reader->Reader.ReadSubExpr());
+  }
+  C->setVarRefs(Vars);
+}
+
+void OMPClauseReader::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
+  C->setNumTeams(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
+  C->setThreadLimit(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPPriorityClause(OMPPriorityClause *C) {
+  C->setPriority(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) {
+  C->setGrainsize(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPNumTasksClause(OMPNumTasksClause *C) {
+  C->setNumTasks(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
+void OMPClauseReader::VisitOMPHintClause(OMPHintClause *C) {
+  C->setHint(Reader->Reader.ReadSubExpr());
+  C->setLParenLoc(Reader->ReadSourceLocation(Record, Idx));
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
@@ -2108,6 +2283,10 @@ void ASTStmtReader::VisitOMPLoopDirective(OMPLoopDirective *D) {
     Sub.push_back(Reader.ReadSubExpr());
   D->setCounters(Sub);
   Sub.clear();
+  for (unsigned i = 0; i < CollapsedNum; ++i)
+    Sub.push_back(Reader.ReadSubExpr());
+  D->setPrivateCounters(Sub);
+  Sub.clear();
   for (unsigned i = 0; i < CollapsedNum; ++i)
     Sub.push_back(Reader.ReadSubExpr());
   D->setInits(Sub);
@@ -2126,6 +2305,7 @@ void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) {
   // The NumClauses field was read in ReadStmtFromStream.
   ++Idx;
   VisitOMPExecutableDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
@@ -2134,6 +2314,7 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
 
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPForSimdDirective(OMPForSimdDirective *D) {
@@ -2145,11 +2326,13 @@ void ASTStmtReader::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
   // The NumClauses field was read in ReadStmtFromStream.
   ++Idx;
   VisitOMPExecutableDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPSectionDirective(OMPSectionDirective *D) {
   VisitStmt(D);
   VisitOMPExecutableDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPSingleDirective(OMPSingleDirective *D) {
@@ -2166,12 +2349,15 @@ void ASTStmtReader::VisitOMPMasterDirective(OMPMasterDirective *D) {
 
 void ASTStmtReader::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
   VisitStmt(D);
+  // The NumClauses field was read in ReadStmtFromStream.
+  ++Idx;
   VisitOMPExecutableDirective(D);
   ReadDeclarationNameInfo(D->DirName, Record, Idx);
 }
 
 void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPParallelForSimdDirective(
@@ -2185,6 +2371,7 @@ void ASTStmtReader::VisitOMPParallelSectionsDirective(
   // The NumClauses field was read in ReadStmtFromStream.
   ++Idx;
   VisitOMPExecutableDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPTaskDirective(OMPTaskDirective *D) {
@@ -2192,6 +2379,7 @@ void ASTStmtReader::VisitOMPTaskDirective(OMPTaskDirective *D) {
   // The NumClauses field was read in ReadStmtFromStream.
   ++Idx;
   VisitOMPExecutableDirective(D);
+  D->setHasCancel(Record[Idx++]);
 }
 
 void ASTStmtReader::VisitOMPTaskyieldDirective(OMPTaskyieldDirective *D) {
@@ -2223,6 +2411,8 @@ void ASTStmtReader::VisitOMPFlushDirective(OMPFlushDirective *D) {
 
 void ASTStmtReader::VisitOMPOrderedDirective(OMPOrderedDirective *D) {
   VisitStmt(D);
+  // The NumClauses field was read in ReadStmtFromStream.
+  ++Idx;
   VisitOMPExecutableDirective(D);
 }
 
@@ -2246,6 +2436,12 @@ void ASTStmtReader::VisitOMPTargetDirective(OMPTargetDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void ASTStmtReader::VisitOMPTargetDataDirective(OMPTargetDataDirective *D) {
+  VisitStmt(D);
+  ++Idx;
+  VisitOMPExecutableDirective(D);
+}
+
 void ASTStmtReader::VisitOMPTeamsDirective(OMPTeamsDirective *D) {
   VisitStmt(D);
   // The NumClauses field was read in ReadStmtFromStream.
@@ -2262,10 +2458,24 @@ void ASTStmtReader::VisitOMPCancellationPointDirective(
 
 void ASTStmtReader::VisitOMPCancelDirective(OMPCancelDirective *D) {
   VisitStmt(D);
+  // The NumClauses field was read in ReadStmtFromStream.
+  ++Idx;
   VisitOMPExecutableDirective(D);
   D->setCancelRegion(static_cast<OpenMPDirectiveKind>(Record[Idx++]));
 }
 
+void ASTStmtReader::VisitOMPTaskLoopDirective(OMPTaskLoopDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void ASTStmtReader::VisitOMPTaskLoopSimdDirective(OMPTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void ASTStmtReader::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 //===----------------------------------------------------------------------===//
 // ASTReader Implementation
 //===----------------------------------------------------------------------===//
@@ -2497,6 +2707,10 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       S = new (Context) ArraySubscriptExpr(Empty);
       break;
 
+    case EXPR_OMP_ARRAY_SECTION:
+      S = new (Context) OMPArraySectionExpr(Empty);
+      break;
+
     case EXPR_CALL:
       S = new (Context) CallExpr(Context, Stmt::CallExprClass, Empty);
       break;
@@ -2800,7 +3014,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
 
     case STMT_OMP_CRITICAL_DIRECTIVE:
-      S = OMPCriticalDirective::CreateEmpty(Context, Empty);
+      S = OMPCriticalDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
     case STMT_OMP_PARALLEL_FOR_DIRECTIVE: {
@@ -2851,7 +3066,8 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
 
     case STMT_OMP_ORDERED_DIRECTIVE:
-      S = OMPOrderedDirective::CreateEmpty(Context, Empty);
+      S = OMPOrderedDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
     case STMT_OMP_ATOMIC_DIRECTIVE:
@@ -2864,6 +3080,11 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
+    case STMT_OMP_TARGET_DATA_DIRECTIVE:
+      S = OMPTargetDataDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
+      break;
+
     case STMT_OMP_TEAMS_DIRECTIVE:
       S = OMPTeamsDirective::CreateEmpty(
           Context, Record[ASTStmtReader::NumStmtFields], Empty);
@@ -2874,9 +3095,34 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
 
     case STMT_OMP_CANCEL_DIRECTIVE:
-      S = OMPCancelDirective::CreateEmpty(Context, Empty);
+      S = OMPCancelDirective::CreateEmpty(
+          Context, Record[ASTStmtReader::NumStmtFields], Empty);
       break;
 
+    case STMT_OMP_TASKLOOP_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPTaskLoopDirective::CreateEmpty(Context, NumClauses, CollapsedNum,
+                                            Empty);
+      break;
+    }
+
+    case STMT_OMP_TASKLOOP_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPTaskLoopSimdDirective::CreateEmpty(Context, NumClauses,
+                                                CollapsedNum, Empty);
+      break;
+    }
+
+    case STMT_OMP_DISTRIBUTE_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPDistributeDirective::CreateEmpty(Context, NumClauses, CollapsedNum,
+                                              Empty);
+      break;
+    }
+
     case EXPR_CXX_OPERATOR_CALL:
       S = new (Context) CXXOperatorCallExpr(Context, Empty);
       break;
@@ -2944,6 +3190,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
     case EXPR_CXX_PROPERTY_REF_EXPR:
       S = new (Context) MSPropertyRefExpr(Empty);
       break;
+    case EXPR_CXX_PROPERTY_SUBSCRIPT_EXPR:
+      S = new (Context) MSPropertySubscriptExpr(Empty);
+      break;
     case EXPR_CXX_UUIDOF_TYPE:
       S = new (Context) CXXUuidofExpr(Empty, false);
       break;
@@ -3047,7 +3296,9 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
         
     case EXPR_SIZEOF_PACK:
-      S = new (Context) SizeOfPackExpr(Empty);
+      S = SizeOfPackExpr::CreateDeserialized(
+              Context,
+              /*NumPartialArgs=*/Record[ASTStmtReader::NumExprFields]);
       break;
         
     case EXPR_SUBST_NON_TYPE_TEMPLATE_PARM:
diff --git a/lib/Serialization/ASTWriter.cpp b/lib/Serialization/ASTWriter.cpp
index 8b6863822c69..128935c5c73b 100644
--- a/lib/Serialization/ASTWriter.cpp
+++ b/lib/Serialization/ASTWriter.cpp
@@ -1,4 +1,4 @@
-//===--- ASTWriter.cpp - AST File Writer ----------------------------------===//
+//===--- ASTWriter.cpp - AST File Writer ------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,7 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Serialization/ASTWriter.h"
+#include "clang/Serialization/ModuleFileExtension.h"
 #include "ASTCommon.h"
+#include "ASTReaderInternals.h"
+#include "MultiOnDiskHashTable.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclContextInternals.h"
@@ -41,6 +44,7 @@
 #include "clang/Sema/IdentifierResolver.h"
 #include "clang/Sema/Sema.h"
 #include "clang/Serialization/ASTReader.h"
+#include "clang/Serialization/SerializationDiagnostic.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/Hashing.h"
@@ -56,6 +60,7 @@
 #include <cstdio>
 #include <string.h>
 #include <utility>
+
 using namespace clang;
 using namespace clang::serialization;
 
@@ -98,7 +103,7 @@ namespace {
 #define ABSTRACT_TYPE(Class, Base)
 #include "clang/AST/TypeNodes.def"
   };
-}
+} // end anonymous namespace
 
 void ASTTypeWriter::VisitBuiltinType(const BuiltinType *T) {
   llvm_unreachable("Built-in types are never serialized");
@@ -277,7 +282,7 @@ void ASTTypeWriter::VisitUnaryTransformType(const UnaryTransformType *T) {
 
 void ASTTypeWriter::VisitAutoType(const AutoType *T) {
   Writer.AddTypeRef(T->getDeducedType(), Record);
-  Record.push_back(T->isDecltypeAuto());
+  Record.push_back((unsigned)T->getKeyword());
   if (T->getDeducedType().isNull())
     Record.push_back(T->isDependentType());
   Code = TYPE_AUTO;
@@ -329,9 +334,8 @@ ASTTypeWriter::VisitTemplateSpecializationType(
   Record.push_back(T->isDependentType());
   Writer.AddTemplateName(T->getTemplateName(), Record);
   Record.push_back(T->getNumArgs());
-  for (TemplateSpecializationType::iterator ArgI = T->begin(), ArgE = T->end();
-         ArgI != ArgE; ++ArgI)
-    Writer.AddTemplateArgument(*ArgI, Record);
+  for (const auto &ArgI : *T)
+    Writer.AddTemplateArgument(ArgI, Record);
   Writer.AddTypeRef(T->isTypeAlias() ? T->getAliasedType() :
                     T->isCanonicalUnqualified() ? QualType()
                                                 : T->getCanonicalTypeInternal(),
@@ -381,9 +385,8 @@ ASTTypeWriter::VisitDependentTemplateSpecializationType(
   Writer.AddNestedNameSpecifier(T->getQualifier(), Record);
   Writer.AddIdentifierRef(T->getIdentifier(), Record);
   Record.push_back(T->getNumArgs());
-  for (DependentTemplateSpecializationType::iterator
-         I = T->begin(), E = T->end(); I != E; ++I)
-    Writer.AddTemplateArgument(*I, Record);
+  for (const auto &I : *T)
+    Writer.AddTemplateArgument(I, Record);
   Code = TYPE_DEPENDENT_TEMPLATE_SPECIALIZATION;
 }
 
@@ -462,7 +465,7 @@ public:
   void VisitFunctionTypeLoc(FunctionTypeLoc TyLoc);
 };
 
-}
+} // end anonymous namespace
 
 void TypeLocWriter::VisitQualifiedTypeLoc(QualifiedTypeLoc TL) {
   // nothing to do
@@ -875,15 +878,17 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(METADATA);
   RECORD(SIGNATURE);
   RECORD(MODULE_NAME);
+  RECORD(MODULE_DIRECTORY);
   RECORD(MODULE_MAP_FILE);
   RECORD(IMPORTS);
-  RECORD(KNOWN_MODULE_FILES);
-  RECORD(LANGUAGE_OPTIONS);
-  RECORD(TARGET_OPTIONS);
   RECORD(ORIGINAL_FILE);
   RECORD(ORIGINAL_PCH_DIR);
   RECORD(ORIGINAL_FILE_ID);
   RECORD(INPUT_FILE_OFFSETS);
+
+  BLOCK(OPTIONS_BLOCK);
+  RECORD(LANGUAGE_OPTIONS);
+  RECORD(TARGET_OPTIONS);
   RECORD(DIAGNOSTIC_OPTIONS);
   RECORD(FILE_SYSTEM_OPTIONS);
   RECORD(HEADER_SEARCH_OPTIONS);
@@ -902,17 +907,17 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(SPECIAL_TYPES);
   RECORD(STATISTICS);
   RECORD(TENTATIVE_DEFINITIONS);
-  RECORD(UNUSED_FILESCOPED_DECLS);
   RECORD(SELECTOR_OFFSETS);
   RECORD(METHOD_POOL);
   RECORD(PP_COUNTER_VALUE);
   RECORD(SOURCE_LOCATION_OFFSETS);
   RECORD(SOURCE_LOCATION_PRELOADS);
   RECORD(EXT_VECTOR_DECLS);
+  RECORD(UNUSED_FILESCOPED_DECLS);
   RECORD(PPD_ENTITIES_OFFSETS);
+  RECORD(VTABLE_USES);
   RECORD(REFERENCED_SELECTOR_POOL);
   RECORD(TU_UPDATE_LEXICAL);
-  RECORD(LOCAL_REDECLARATIONS_MAP);
   RECORD(SEMA_DECL_REFS);
   RECORD(WEAK_UNDECLARED_IDENTIFIERS);
   RECORD(PENDING_IMPLICIT_INSTANTIATIONS);
@@ -928,17 +933,20 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(OPENCL_EXTENSIONS);
   RECORD(DELEGATING_CTORS);
   RECORD(KNOWN_NAMESPACES);
-  RECORD(UNDEFINED_BUT_USED);
   RECORD(MODULE_OFFSET_MAP);
   RECORD(SOURCE_MANAGER_LINE_TABLE);
   RECORD(OBJC_CATEGORIES_MAP);
   RECORD(FILE_SORTED_DECLS);
   RECORD(IMPORTED_MODULES);
-  RECORD(LOCAL_REDECLARATIONS);
   RECORD(OBJC_CATEGORIES);
   RECORD(MACRO_OFFSET);
+  RECORD(INTERESTING_IDENTIFIERS);
+  RECORD(UNDEFINED_BUT_USED);
   RECORD(LATE_PARSED_TEMPLATE);
   RECORD(OPTIMIZE_PRAGMA_OPTIONS);
+  RECORD(UNUSED_LOCAL_TYPEDEF_NAME_CANDIDATES);
+  RECORD(CXX_CTOR_INITIALIZERS_OFFSETS);
+  RECORD(DELETE_EXPRS_TO_ANALYZE);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -955,6 +963,29 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PP_MODULE_MACRO);
   RECORD(PP_TOKEN);
 
+  // Submodule Block.
+  BLOCK(SUBMODULE_BLOCK);
+  RECORD(SUBMODULE_METADATA);
+  RECORD(SUBMODULE_DEFINITION);
+  RECORD(SUBMODULE_UMBRELLA_HEADER);
+  RECORD(SUBMODULE_HEADER);
+  RECORD(SUBMODULE_TOPHEADER);
+  RECORD(SUBMODULE_UMBRELLA_DIR);
+  RECORD(SUBMODULE_IMPORTS);
+  RECORD(SUBMODULE_EXPORTS);
+  RECORD(SUBMODULE_REQUIRES);
+  RECORD(SUBMODULE_EXCLUDED_HEADER);
+  RECORD(SUBMODULE_LINK_LIBRARY);
+  RECORD(SUBMODULE_CONFIG_MACRO);
+  RECORD(SUBMODULE_CONFLICT);
+  RECORD(SUBMODULE_PRIVATE_HEADER);
+  RECORD(SUBMODULE_TEXTUAL_HEADER);
+  RECORD(SUBMODULE_PRIVATE_TEXTUAL_HEADER);
+
+  // Comments Block.
+  BLOCK(COMMENTS_BLOCK);
+  RECORD(COMMENTS_RAW_COMMENT);
+
   // Decls and Types block.
   BLOCK(DECLTYPES_BLOCK);
   RECORD(TYPE_EXT_QUAL);
@@ -998,6 +1029,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(TYPE_ATOMIC);
   RECORD(TYPE_DECAYED);
   RECORD(TYPE_ADJUSTED);
+  RECORD(LOCAL_REDECLARATIONS);
   RECORD(DECL_TYPEDEF);
   RECORD(DECL_TYPEALIAS);
   RECORD(DECL_ENUM);
@@ -1062,7 +1094,11 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PPD_MACRO_EXPANSION);
   RECORD(PPD_MACRO_DEFINITION);
   RECORD(PPD_INCLUSION_DIRECTIVE);
-  
+
+  // Decls and Types block.
+  BLOCK(EXTENSION_BLOCK);
+  RECORD(EXTENSION_METADATA);
+
 #undef RECORD
 #undef BLOCK
   Stream.ExitBlock();
@@ -1074,14 +1110,8 @@ void ASTWriter::WriteBlockInfoBlock() {
 /// \return \c true if the path was changed.
 static bool cleanPathForOutput(FileManager &FileMgr,
                                SmallVectorImpl<char> &Path) {
-  bool Changed = false;
-
-  if (!llvm::sys::path::is_absolute(StringRef(Path.data(), Path.size()))) {
-    llvm::sys::fs::make_absolute(Path);
-    Changed = true;
-  }
-
-  return Changed | FileMgr.removeDotPaths(Path);
+  bool Changed = FileMgr.makeAbsolutePath(Path);
+  return Changed | llvm::sys::path::remove_dots(Path);
 }
 
 /// \brief Adjusts the given filename to only write out the portion of the
@@ -1140,69 +1170,78 @@ static ASTFileSignature getSignature() {
 }
 
 /// \brief Write the control block.
-void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
-                                  StringRef isysroot,
-                                  const std::string &OutputFile) {
+uint64_t ASTWriter::WriteControlBlock(Preprocessor &PP,
+                                      ASTContext &Context,
+                                      StringRef isysroot,
+                                      const std::string &OutputFile) {
+  ASTFileSignature Signature = 0;
+
   using namespace llvm;
   Stream.EnterSubblock(CONTROL_BLOCK_ID, 5);
   RecordData Record;
   
   // Metadata
-  BitCodeAbbrev *MetadataAbbrev = new BitCodeAbbrev();
+  auto *MetadataAbbrev = new BitCodeAbbrev();
   MetadataAbbrev->Add(BitCodeAbbrevOp(METADATA));
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Major
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Minor
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang maj.
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 16)); // Clang min.
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Relocatable
+  MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Timestamps
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Errors
   MetadataAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // SVN branch/tag
   unsigned MetadataAbbrevCode = Stream.EmitAbbrev(MetadataAbbrev);
-  Record.push_back(METADATA);
-  Record.push_back(VERSION_MAJOR);
-  Record.push_back(VERSION_MINOR);
-  Record.push_back(CLANG_VERSION_MAJOR);
-  Record.push_back(CLANG_VERSION_MINOR);
   assert((!WritingModule || isysroot.empty()) &&
          "writing module as a relocatable PCH?");
-  Record.push_back(!isysroot.empty());
-  Record.push_back(ASTHasCompilerErrors);
-  Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record,
-                            getClangFullRepositoryVersion());
-
+  {
+    RecordData::value_type Record[] = {METADATA, VERSION_MAJOR, VERSION_MINOR,
+                                       CLANG_VERSION_MAJOR, CLANG_VERSION_MINOR,
+                                       !isysroot.empty(), IncludeTimestamps,
+                                       ASTHasCompilerErrors};
+    Stream.EmitRecordWithBlob(MetadataAbbrevCode, Record,
+                              getClangFullRepositoryVersion());
+  }
   if (WritingModule) {
     // For implicit modules we output a signature that we can use to ensure
     // duplicate module builds don't collide in the cache as their output order
     // is non-deterministic.
     // FIXME: Remove this when output is deterministic.
     if (Context.getLangOpts().ImplicitModules) {
-      Record.clear();
-      Record.push_back(getSignature());
+      Signature = getSignature();
+      RecordData::value_type Record[] = {Signature};
       Stream.EmitRecord(SIGNATURE, Record);
     }
 
     // Module name
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(MODULE_NAME));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Name
     unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
-    RecordData Record;
-    Record.push_back(MODULE_NAME);
+    RecordData::value_type Record[] = {MODULE_NAME};
     Stream.EmitRecordWithBlob(AbbrevCode, Record, WritingModule->Name);
   }
 
   if (WritingModule && WritingModule->Directory) {
-    // Module directory.
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
-    Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
-    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
-    unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
-    RecordData Record;
-    Record.push_back(MODULE_DIRECTORY);
-
     SmallString<128> BaseDir(WritingModule->Directory->getName());
     cleanPathForOutput(Context.getSourceManager().getFileManager(), BaseDir);
-    Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
+
+    // If the home of the module is the current working directory, then we
+    // want to pick up the cwd of the build process loading the module, not
+    // our cwd, when we load this module.
+    if (!PP.getHeaderSearchInfo()
+             .getHeaderSearchOpts()
+             .ModuleMapFileHomeIsCwd ||
+        WritingModule->Directory->getName() != StringRef(".")) {
+      // Module directory.
+      auto *Abbrev = new BitCodeAbbrev();
+      Abbrev->Add(BitCodeAbbrevOp(MODULE_DIRECTORY));
+      Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Directory
+      unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
+
+      RecordData::value_type Record[] = {MODULE_DIRECTORY};
+      Stream.EmitRecordWithBlob(AbbrevCode, Record, BaseDir);
+    }
 
     // Write out all other paths relative to the base directory if possible.
     BaseDirectory.assign(BaseDir.begin(), BaseDir.end());
@@ -1246,22 +1285,16 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
       Record.push_back((unsigned)M->Kind); // FIXME: Stable encoding
       AddSourceLocation(M->ImportLoc, Record);
       Record.push_back(M->File->getSize());
-      Record.push_back(M->File->getModificationTime());
+      Record.push_back(getTimestampForOutput(M->File));
       Record.push_back(M->Signature);
       AddPath(M->FileName, Record);
     }
     Stream.EmitRecord(IMPORTS, Record);
-
-    // Also emit a list of known module files that were not imported,
-    // but are made available by this module.
-    // FIXME: Should we also include a signature here?
-    Record.clear();
-    for (auto *E : Mgr.getAdditionalKnownModuleFiles())
-      AddPath(E->getName(), Record);
-    if (!Record.empty())
-      Stream.EmitRecord(KNOWN_MODULE_FILES, Record);
   }
 
+  // Write the options block.
+  Stream.EnterSubblock(OPTIONS_BLOCK_ID, 4);
+
   // Language options.
   Record.clear();
   const LangOptions &LangOpts = Context.getLangOpts();
@@ -1285,11 +1318,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
 
   // Comment options.
   Record.push_back(LangOpts.CommentOpts.BlockCommandNames.size());
-  for (CommentOptions::BlockCommandNamesTy::const_iterator
-           I = LangOpts.CommentOpts.BlockCommandNames.begin(),
-           IEnd = LangOpts.CommentOpts.BlockCommandNames.end();
-       I != IEnd; ++I) {
-    AddString(*I, Record);
+  for (const auto &I : LangOpts.CommentOpts.BlockCommandNames) {
+    AddString(I, Record);
   }
   Record.push_back(LangOpts.CommentOpts.ParseAllComments);
 
@@ -1332,8 +1362,8 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
 
   // File system options.
   Record.clear();
-  const FileSystemOptions &FSOpts
-    = Context.getSourceManager().getFileManager().getFileSystemOptions();
+  const FileSystemOptions &FSOpts =
+      Context.getSourceManager().getFileManager().getFileSystemOpts();
   AddString(FSOpts.WorkingDir, Record);
   Stream.EmitRecord(FILE_SYSTEM_OPTIONS, Record);
 
@@ -1401,10 +1431,13 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
   Record.push_back(static_cast<unsigned>(PPOpts.ObjCXXARCStandardLibrary));
   Stream.EmitRecord(PREPROCESSOR_OPTIONS, Record);
 
+  // Leave the options block.
+  Stream.ExitBlock();
+
   // Original file name and file ID
   SourceManager &SM = Context.getSourceManager();
   if (const FileEntry *MainFile = SM.getFileEntryForID(SM.getMainFileID())) {
-    BitCodeAbbrev *FileAbbrev = new BitCodeAbbrev();
+    auto *FileAbbrev = new BitCodeAbbrev();
     FileAbbrev->Add(BitCodeAbbrevOp(ORIGINAL_FILE));
     FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // File ID
     FileAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
@@ -1422,18 +1455,17 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
 
   // Original PCH directory
   if (!OutputFile.empty() && OutputFile != "-") {
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(ORIGINAL_PCH_DIR));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
     unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
 
     SmallString<128> OutputPath(OutputFile);
 
-    llvm::sys::fs::make_absolute(OutputPath);
+    SM.getFileManager().makeAbsolutePath(OutputPath);
     StringRef origDir = llvm::sys::path::parent_path(OutputPath);
 
-    RecordData Record;
-    Record.push_back(ORIGINAL_PCH_DIR);
+    RecordData::value_type Record[] = {ORIGINAL_PCH_DIR};
     Stream.EmitRecordWithBlob(AbbrevCode, Record, origDir);
   }
 
@@ -1441,6 +1473,7 @@ void ASTWriter::WriteControlBlock(Preprocessor &PP, ASTContext &Context,
                   PP.getHeaderSearchInfo().getHeaderSearchOpts(),
                   PP.getLangOpts().Modules);
   Stream.ExitBlock();
+  return Signature;
 }
 
 namespace  {
@@ -1448,24 +1481,25 @@ namespace  {
   struct InputFileEntry {
     const FileEntry *File;
     bool IsSystemFile;
+    bool IsTransient;
     bool BufferOverridden;
   };
-}
+} // end anonymous namespace
 
 void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
                                 HeaderSearchOptions &HSOpts,
                                 bool Modules) {
   using namespace llvm;
   Stream.EnterSubblock(INPUT_FILES_BLOCK_ID, 4);
-  RecordData Record;
-  
+
   // Create input-file abbreviation.
-  BitCodeAbbrev *IFAbbrev = new BitCodeAbbrev();
+  auto *IFAbbrev = new BitCodeAbbrev();
   IFAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE));
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 12)); // Size
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // Modification time
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Overridden
+  IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Transient
   IFAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // File name
   unsigned IFAbbrevCode = Stream.EmitAbbrev(IFAbbrev);
 
@@ -1487,6 +1521,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     InputFileEntry Entry;
     Entry.File = Cache->OrigEntry;
     Entry.IsSystemFile = Cache->IsSystemFile;
+    Entry.IsTransient = Cache->IsTransient;
     Entry.BufferOverridden = Cache->BufferOverridden;
     if (Cache->IsSystemFile)
       SortedFiles.push_back(Entry);
@@ -1497,10 +1532,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
   unsigned UserFilesNum = 0;
   // Write out all of the input files.
   std::vector<uint64_t> InputFileOffsets;
-  for (std::deque<InputFileEntry>::iterator
-         I = SortedFiles.begin(), E = SortedFiles.end(); I != E; ++I) {
-    const InputFileEntry &Entry = *I;
-
+  for (const auto &Entry : SortedFiles) {
     uint32_t &InputFileID = InputFileIDs[Entry.File];
     if (InputFileID != 0)
       continue; // already recorded this file.
@@ -1513,16 +1545,15 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
     if (!Entry.IsSystemFile)
       ++UserFilesNum;
 
-    Record.clear();
-    Record.push_back(INPUT_FILE);
-    Record.push_back(InputFileOffsets.size());
-
     // Emit size/modification time for this file.
-    Record.push_back(Entry.File->getSize());
-    Record.push_back(Entry.File->getModificationTime());
-
-    // Whether this file was overridden.
-    Record.push_back(Entry.BufferOverridden);
+    // And whether this file was overridden.
+    RecordData::value_type Record[] = {
+        INPUT_FILE,
+        InputFileOffsets.size(),
+        (uint64_t)Entry.File->getSize(),
+        (uint64_t)getTimestampForOutput(Entry.File),
+        Entry.BufferOverridden,
+        Entry.IsTransient};
 
     EmitRecordWithPath(IFAbbrevCode, Record, Entry.File->getName());
   }
@@ -1530,7 +1561,7 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
   Stream.ExitBlock();
 
   // Create input file offsets abbreviation.
-  BitCodeAbbrev *OffsetsAbbrev = new BitCodeAbbrev();
+  auto *OffsetsAbbrev = new BitCodeAbbrev();
   OffsetsAbbrev->Add(BitCodeAbbrevOp(INPUT_FILE_OFFSETS));
   OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # input files
   OffsetsAbbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # non-system
@@ -1539,10 +1570,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
   unsigned OffsetsAbbrevCode = Stream.EmitAbbrev(OffsetsAbbrev);
 
   // Write input file offsets.
-  Record.clear();
-  Record.push_back(INPUT_FILE_OFFSETS);
-  Record.push_back(InputFileOffsets.size());
-  Record.push_back(UserFilesNum);
+  RecordData::value_type Record[] = {INPUT_FILE_OFFSETS,
+                                     InputFileOffsets.size(), UserFilesNum};
   Stream.EmitRecordWithBlob(OffsetsAbbrevCode, Record, bytes(InputFileOffsets));
 }
 
@@ -1554,7 +1583,8 @@ void ASTWriter::WriteInputFiles(SourceManager &SourceMgr,
 /// file.
 static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_FILE_ENTRY));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
@@ -1572,7 +1602,8 @@ static unsigned CreateSLocFileAbbrev(llvm::BitstreamWriter &Stream) {
 /// buffer.
 static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_ENTRY));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Include location
@@ -1586,7 +1617,8 @@ static unsigned CreateSLocBufferAbbrev(llvm::BitstreamWriter &Stream) {
 /// buffer's blob.
 static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_BUFFER_BLOB));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Blob
   return Stream.EmitAbbrev(Abbrev);
@@ -1596,7 +1628,8 @@ static unsigned CreateSLocBufferBlobAbbrev(llvm::BitstreamWriter &Stream) {
 /// expansion.
 static unsigned CreateSLocExpansionAbbrev(llvm::BitstreamWriter &Stream) {
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SM_SLOC_EXPANSION_ENTRY));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // Spelling location
@@ -1631,27 +1664,25 @@ namespace {
     typedef unsigned hash_value_type;
     typedef unsigned offset_type;
     
-    static hash_value_type ComputeHash(key_type_ref key) {
+    hash_value_type ComputeHash(key_type_ref key) {
       // The hash is based only on size/time of the file, so that the reader can
       // match even when symlinking or excess path elements ("foo/../", "../")
       // change the form of the name. However, complete path is still the key.
-      //
-      // FIXME: Using the mtime here will cause problems for explicit module
-      // imports.
       return llvm::hash_combine(key.FE->getSize(),
-                                key.FE->getModificationTime());
+                                Writer.getTimestampForOutput(key.FE));
     }
     
     std::pair<unsigned,unsigned>
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       using namespace llvm::support;
-      endian::Writer<little> Writer(Out);
+      endian::Writer<little> LE(Out);
       unsigned KeyLen = strlen(key.Filename) + 1 + 8 + 8;
-      Writer.write<uint16_t>(KeyLen);
+      LE.write<uint16_t>(KeyLen);
       unsigned DataLen = 1 + 2 + 4 + 4;
-      if (Data.isModuleHeader)
-        DataLen += 4;
-      Writer.write<uint8_t>(DataLen);
+      for (auto ModInfo : HS.getModuleMap().findAllModulesForHeader(key.FE))
+        if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
+          DataLen += 4;
+      LE.write<uint8_t>(DataLen);
       return std::make_pair(KeyLen, DataLen);
     }
     
@@ -1660,7 +1691,7 @@ namespace {
       endian::Writer<little> LE(Out);
       LE.write<uint64_t>(key.FE->getSize());
       KeyLen -= 8;
-      LE.write<uint64_t>(key.FE->getModificationTime());
+      LE.write<uint64_t>(Writer.getTimestampForOutput(key.FE));
       KeyLen -= 8;
       Out.write(key.Filename, KeyLen);
     }
@@ -1671,11 +1702,9 @@ namespace {
       endian::Writer<little> LE(Out);
       uint64_t Start = Out.tell(); (void)Start;
       
-      unsigned char Flags = (Data.HeaderRole << 6)
-                          | (Data.isImport << 5)
-                          | (Data.isPragmaOnce << 4)
-                          | (Data.DirInfo << 2)
-                          | (Data.Resolved << 1)
+      unsigned char Flags = (Data.isImport << 4)
+                          | (Data.isPragmaOnce << 3)
+                          | (Data.DirInfo << 1)
                           | Data.IndexHeaderMapHeader;
       LE.write<uint8_t>(Flags);
       LE.write<uint16_t>(Data.NumIncludes);
@@ -1702,9 +1731,15 @@ namespace {
       }
       LE.write<uint32_t>(Offset);
 
-      if (Data.isModuleHeader) {
-        Module *Mod = HS.findModuleForHeader(key.FE).getModule();
-        LE.write<uint32_t>(Writer.getExistingSubmoduleID(Mod));
+      // FIXME: If the header is excluded, we should write out some
+      // record of that fact.
+      for (auto ModInfo : HS.getModuleMap().findAllModulesForHeader(key.FE)) {
+        if (uint32_t ModID =
+                Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule())) {
+          uint32_t Value = (ModID << 2) | (unsigned)ModInfo.getRole();
+          assert((Value >> 2) == ModID && "overflow in header module info");
+          LE.write<uint32_t>(Value);
+        }
       }
 
       assert(Out.tell() - Start == DataLen && "Wrong data length");
@@ -1734,12 +1769,15 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
     if (!File)
       continue;
 
-    // Use HeaderSearch's getFileInfo to make sure we get the HeaderFileInfo
-    // from the external source if it was not provided already.
-    HeaderFileInfo HFI;
-    if (!HS.tryGetFileInfo(File, HFI) ||
-        (HFI.External && Chain) ||
-        (HFI.isModuleHeader && !HFI.isCompilingModuleHeader))
+    // Get the file info. This will load info from the external source if
+    // necessary. Skip emitting this file if we have no information on it
+    // as a header file (in which case HFI will be null) or if it hasn't
+    // changed since it was loaded. Also skip it if it's for a modular header
+    // from a different module; in that case, we rely on the module(s)
+    // containing the header to provide this information.
+    const HeaderFileInfo *HFI =
+        HS.getExistingFileInfo(File, /*WantExternal*/!Chain);
+    if (!HFI || (HFI->isModuleHeader && !HFI->isCompilingModuleHeader))
       continue;
 
     // Massage the file path into an appropriate form.
@@ -1753,7 +1791,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
     }
 
     HeaderFileInfoTrait::key_type key = { File, Filename };
-    Generator.insert(key, HFI, GeneratorTrait);
+    Generator.insert(key, *HFI, GeneratorTrait);
     ++NumHeaderSearchEntries;
   }
   
@@ -1770,7 +1808,8 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
 
   // Create a blob abbreviation
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(HEADER_SEARCH_TABLE));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
@@ -1779,11 +1818,8 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) {
   unsigned TableAbbrev = Stream.EmitAbbrev(Abbrev);
   
   // Write the header search table
-  RecordData Record;
-  Record.push_back(HEADER_SEARCH_TABLE);
-  Record.push_back(BucketOffset);
-  Record.push_back(NumHeaderSearchEntries);
-  Record.push_back(TableData.size());
+  RecordData::value_type Record[] = {HEADER_SEARCH_TABLE, BucketOffset,
+                                     NumHeaderSearchEntries, TableData.size()};
   TableData.append(GeneratorTrait.strings_begin(),GeneratorTrait.strings_end());
   Stream.EmitRecordWithBlob(TableAbbrev, Record, TableData);
   
@@ -1871,9 +1907,8 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
         
         Stream.EmitRecordWithAbbrev(SLocFileAbbrv, Record);
         
-        if (Content->BufferOverridden) {
-          Record.clear();
-          Record.push_back(SM_SLOC_BUFFER_BLOB);
+        if (Content->BufferOverridden || Content->IsTransient) {
+          RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
           const llvm::MemoryBuffer *Buffer
             = Content->getBuffer(PP.getDiagnostics(), PP.getSourceManager());
           Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
@@ -1892,8 +1927,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
         const char *Name = Buffer->getBufferIdentifier();
         Stream.EmitRecordWithBlob(SLocBufferAbbrv, Record,
                                   StringRef(Name, strlen(Name) + 1));
-        Record.clear();
-        Record.push_back(SM_SLOC_BUFFER_BLOB);
+        RecordData::value_type Record[] = {SM_SLOC_BUFFER_BLOB};
         Stream.EmitRecordWithBlob(SLocBufferBlobAbbrv, Record,
                                   StringRef(Buffer->getBufferStart(),
                                                   Buffer->getBufferSize() + 1));
@@ -1927,19 +1961,20 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   // Write the source-location offsets table into the AST block. This
   // table is used for lazily loading source-location information.
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
   unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(Abbrev);
-
-  Record.clear();
-  Record.push_back(SOURCE_LOCATION_OFFSETS);
-  Record.push_back(SLocEntryOffsets.size());
-  Record.push_back(SourceMgr.getNextLocalOffset() - 1); // skip dummy
-  Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets));
-
+  {
+    RecordData::value_type Record[] = {
+        SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
+        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
+    Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
+                              bytes(SLocEntryOffsets));
+  }
   // Write the source location entry preloads array, telling the AST
   // reader which source locations entries it should load eagerly.
   Stream.EmitRecord(SOURCE_LOCATION_PRELOADS, PreloadSLocs);
@@ -1950,33 +1985,40 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     LineTableInfo &LineTable = SourceMgr.getLineTable();
 
     Record.clear();
-    // Emit the file names.
-    Record.push_back(LineTable.getNumFilenames());
-    for (unsigned I = 0, N = LineTable.getNumFilenames(); I != N; ++I)
-      AddPath(LineTable.getFilename(I), Record);
+
+    // Emit the needed file names.
+    llvm::DenseMap<int, int> FilenameMap;
+    for (const auto &L : LineTable) {
+      if (L.first.ID < 0)
+        continue;
+      for (auto &LE : L.second) {
+        if (FilenameMap.insert(std::make_pair(LE.FilenameID,
+                                              FilenameMap.size())).second)
+          AddPath(LineTable.getFilename(LE.FilenameID), Record);
+      }
+    }
+    Record.push_back(0);
 
     // Emit the line entries
-    for (LineTableInfo::iterator L = LineTable.begin(), LEnd = LineTable.end();
-         L != LEnd; ++L) {
+    for (const auto &L : LineTable) {
       // Only emit entries for local files.
-      if (L->first.ID < 0)
+      if (L.first.ID < 0)
         continue;
 
       // Emit the file ID
-      Record.push_back(L->first.ID);
+      Record.push_back(L.first.ID);
 
       // Emit the line entries
-      Record.push_back(L->second.size());
-      for (std::vector<LineEntry>::iterator LE = L->second.begin(),
-                                         LEEnd = L->second.end();
-           LE != LEEnd; ++LE) {
-        Record.push_back(LE->FileOffset);
-        Record.push_back(LE->LineNo);
-        Record.push_back(LE->FilenameID);
-        Record.push_back((unsigned)LE->FileKind);
-        Record.push_back(LE->IncludeOffset);
+      Record.push_back(L.second.size());
+      for (const auto &LE : L.second) {
+        Record.push_back(LE.FileOffset);
+        Record.push_back(LE.LineNo);
+        Record.push_back(FilenameMap[LE.FilenameID]);
+        Record.push_back((unsigned)LE.FileKind);
+        Record.push_back(LE.IncludeOffset);
       }
     }
+
     Stream.EmitRecord(SOURCE_MANAGER_LINE_TABLE, Record);
   }
 }
@@ -2015,19 +2057,17 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
   // If the preprocessor __COUNTER__ value has been bumped, remember it.
   if (PP.getCounterValue() != 0) {
-    Record.push_back(PP.getCounterValue());
+    RecordData::value_type Record[] = {PP.getCounterValue()};
     Stream.EmitRecord(PP_COUNTER_VALUE, Record);
-    Record.clear();
   }
 
   // Enter the preprocessor block.
   Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
 
   // If the AST file contains __DATE__ or __TIME__ emit a warning about this.
-  // FIXME: use diagnostics subsystem for localization etc.
+  // FIXME: Include a location for the use, and say which one was used.
   if (PP.SawDateOrTime())
-    fprintf(stderr, "warning: precompiled header used __DATE__ or __TIME__.\n");
-
+    PP.Diag(SourceLocation(), diag::warn_module_uses_date_time) << IsModule;
 
   // Loop over all the macro directives that are live at the end of the file,
   // emitting each to the PP section.
@@ -2177,6 +2217,7 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
   // Write the offsets table for macro IDs.
   using namespace llvm;
+
   auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
@@ -2184,12 +2225,11 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
 
   unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
-  Record.clear();
-  Record.push_back(MACRO_OFFSET);
-  Record.push_back(MacroOffsets.size());
-  Record.push_back(FirstMacroID - NUM_PREDEF_MACRO_IDS);
-  Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record,
-                            bytes(MacroOffsets));
+  {
+    RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
+                                       FirstMacroID - NUM_PREDEF_MACRO_IDS};
+    Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
+  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
@@ -2208,7 +2248,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
   // Set up the abbreviation for 
   unsigned InclusionAbbrev = 0;
   {
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(PPD_INCLUSION_DIRECTIVE));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // filename length
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // in quotes
@@ -2232,7 +2272,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
     PreprocessedEntityOffsets.push_back(
         PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
 
-    if (MacroDefinitionRecord *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
+    if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
       MacroDefinitions[MD] = NextPreprocessorEntityID;
 
@@ -2241,7 +2281,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
       continue;
     }
 
-    if (MacroExpansion *ME = dyn_cast<MacroExpansion>(*E)) {
+    if (auto *ME = dyn_cast<MacroExpansion>(*E)) {
       Record.push_back(ME->isBuiltinMacro());
       if (ME->isBuiltinMacro())
         AddIdentifierRef(ME->getName(), Record);
@@ -2251,7 +2291,7 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
       continue;
     }
 
-    if (InclusionDirective *ID = dyn_cast<InclusionDirective>(*E)) {
+    if (auto *ID = dyn_cast<InclusionDirective>(*E)) {
       Record.push_back(PPD_INCLUSION_DIRECTIVE);
       Record.push_back(ID->getFileName().size());
       Record.push_back(ID->wasInQuotes());
@@ -2277,46 +2317,50 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
 
     // Write the offsets table for identifier IDs.
     using namespace llvm;
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(PPD_ENTITIES_OFFSETS));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first pp entity
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned PPEOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
 
-    Record.clear();
-    Record.push_back(PPD_ENTITIES_OFFSETS);
-    Record.push_back(FirstPreprocessorEntityID - NUM_PREDEF_PP_ENTITY_IDS);
+    RecordData::value_type Record[] = {PPD_ENTITIES_OFFSETS,
+                                       FirstPreprocessorEntityID -
+                                           NUM_PREDEF_PP_ENTITY_IDS};
     Stream.EmitRecordWithBlob(PPEOffsetAbbrev, Record,
                               bytes(PreprocessedEntityOffsets));
   }
 }
 
-unsigned ASTWriter::getSubmoduleID(Module *Mod) {
-  llvm::DenseMap<Module *, unsigned>::iterator Known = SubmoduleIDs.find(Mod);
-  if (Known != SubmoduleIDs.end())
-    return Known->second;
-  
-  return SubmoduleIDs[Mod] = NextSubmoduleID++;
-}
-
-unsigned ASTWriter::getExistingSubmoduleID(Module *Mod) const {
+unsigned ASTWriter::getLocalOrImportedSubmoduleID(Module *Mod) {
   if (!Mod)
     return 0;
 
-  llvm::DenseMap<Module *, unsigned>::const_iterator
-    Known = SubmoduleIDs.find(Mod);
+  llvm::DenseMap<Module *, unsigned>::iterator Known = SubmoduleIDs.find(Mod);
   if (Known != SubmoduleIDs.end())
     return Known->second;
 
-  return 0;
+  if (Mod->getTopLevelModule() != WritingModule)
+    return 0;
+
+  return SubmoduleIDs[Mod] = NextSubmoduleID++;
+}
+
+unsigned ASTWriter::getSubmoduleID(Module *Mod) {
+  // FIXME: This can easily happen, if we have a reference to a submodule that
+  // did not result in us loading a module file for that submodule. For
+  // instance, a cross-top-level-module 'conflict' declaration will hit this.
+  unsigned ID = getLocalOrImportedSubmoduleID(Mod);
+  assert((ID || !Mod) &&
+         "asked for module ID for non-local, non-imported module");
+  return ID;
 }
 
 /// \brief Compute the number of modules within the given tree (including the
 /// given module).
 static unsigned getNumberOfModules(Module *Mod) {
   unsigned ChildModules = 0;
-  for (Module::submodule_iterator Sub = Mod->submodule_begin(),
-                               SubEnd = Mod->submodule_end();
+  for (auto Sub = Mod->submodule_begin(), SubEnd = Mod->submodule_end();
        Sub != SubEnd; ++Sub)
     ChildModules += getNumberOfModules(*Sub);
   
@@ -2329,7 +2373,8 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
   
   // Write the abbreviations needed for the submodules block.
   using namespace llvm;
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent
@@ -2408,9 +2453,9 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
   unsigned ConflictAbbrev = Stream.EmitAbbrev(Abbrev);
 
   // Write the submodule metadata block.
-  RecordData Record;
-  Record.push_back(getNumberOfModules(WritingModule));
-  Record.push_back(FirstSubmoduleID - NUM_PREDEF_SUBMODULE_IDS);
+  RecordData::value_type Record[] = {getNumberOfModules(WritingModule),
+                                     FirstSubmoduleID -
+                                         NUM_PREDEF_SUBMODULE_IDS};
   Stream.EmitRecord(SUBMODULE_METADATA, Record);
   
   // Write all of the submodules.
@@ -2420,46 +2465,37 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
     Module *Mod = Q.front();
     Q.pop();
     unsigned ID = getSubmoduleID(Mod);
-    
-    // Emit the definition of the block.
-    Record.clear();
-    Record.push_back(SUBMODULE_DEFINITION);
-    Record.push_back(ID);
+
+    uint64_t ParentID = 0;
     if (Mod->Parent) {
       assert(SubmoduleIDs[Mod->Parent] && "Submodule parent not written?");
-      Record.push_back(SubmoduleIDs[Mod->Parent]);
-    } else {
-      Record.push_back(0);
+      ParentID = SubmoduleIDs[Mod->Parent];
     }
-    Record.push_back(Mod->IsFramework);
-    Record.push_back(Mod->IsExplicit);
-    Record.push_back(Mod->IsSystem);
-    Record.push_back(Mod->IsExternC);
-    Record.push_back(Mod->InferSubmodules);
-    Record.push_back(Mod->InferExplicitSubmodules);
-    Record.push_back(Mod->InferExportWildcard);
-    Record.push_back(Mod->ConfigMacrosExhaustive);
-    Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name);
-    
+
+    // Emit the definition of the block.
+    {
+      RecordData::value_type Record[] = {
+          SUBMODULE_DEFINITION, ID, ParentID, Mod->IsFramework, Mod->IsExplicit,
+          Mod->IsSystem, Mod->IsExternC, Mod->InferSubmodules,
+          Mod->InferExplicitSubmodules, Mod->InferExportWildcard,
+          Mod->ConfigMacrosExhaustive};
+      Stream.EmitRecordWithBlob(DefinitionAbbrev, Record, Mod->Name);
+    }
+
     // Emit the requirements.
-    for (unsigned I = 0, N = Mod->Requirements.size(); I != N; ++I) {
-      Record.clear();
-      Record.push_back(SUBMODULE_REQUIRES);
-      Record.push_back(Mod->Requirements[I].second);
-      Stream.EmitRecordWithBlob(RequiresAbbrev, Record,
-                                Mod->Requirements[I].first);
+    for (const auto &R : Mod->Requirements) {
+      RecordData::value_type Record[] = {SUBMODULE_REQUIRES, R.second};
+      Stream.EmitRecordWithBlob(RequiresAbbrev, Record, R.first);
     }
 
     // Emit the umbrella header, if there is one.
     if (auto UmbrellaHeader = Mod->getUmbrellaHeader()) {
-      Record.clear();
-      Record.push_back(SUBMODULE_UMBRELLA_HEADER);
+      RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_HEADER};
       Stream.EmitRecordWithBlob(UmbrellaAbbrev, Record,
                                 UmbrellaHeader.NameAsWritten);
     } else if (auto UmbrellaDir = Mod->getUmbrellaDir()) {
-      Record.clear();
-      Record.push_back(SUBMODULE_UMBRELLA_DIR);
-      Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record, 
+      RecordData::value_type Record[] = {SUBMODULE_UMBRELLA_DIR};
+      Stream.EmitRecordWithBlob(UmbrellaDirAbbrev, Record,
                                 UmbrellaDir.NameAsWritten);
     }
 
@@ -2477,8 +2513,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
       {SUBMODULE_EXCLUDED_HEADER, ExcludedHeaderAbbrev, Module::HK_Excluded}
     };
     for (auto &HL : HeaderLists) {
-      Record.clear();
-      Record.push_back(HL.RecordKind);
+      RecordData::value_type Record[] = {HL.RecordKind};
       for (auto &H : Mod->Headers[HL.HeaderKind])
         Stream.EmitRecordWithBlob(HL.Abbrev, Record, H.NameAsWritten);
     }
@@ -2486,35 +2521,27 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
     // Emit the top headers.
     {
       auto TopHeaders = Mod->getTopHeaders(PP->getFileManager());
-      Record.clear();
-      Record.push_back(SUBMODULE_TOPHEADER);
+      RecordData::value_type Record[] = {SUBMODULE_TOPHEADER};
       for (auto *H : TopHeaders)
         Stream.EmitRecordWithBlob(TopHeaderAbbrev, Record, H->getName());
     }
 
     // Emit the imports. 
     if (!Mod->Imports.empty()) {
-      Record.clear();
-      for (unsigned I = 0, N = Mod->Imports.size(); I != N; ++I) {
-        unsigned ImportedID = getSubmoduleID(Mod->Imports[I]);
-        assert(ImportedID && "Unknown submodule!");
-        Record.push_back(ImportedID);
-      }
+      RecordData Record;
+      for (auto *I : Mod->Imports)
+        Record.push_back(getSubmoduleID(I));
       Stream.EmitRecord(SUBMODULE_IMPORTS, Record);
     }
 
     // Emit the exports. 
     if (!Mod->Exports.empty()) {
-      Record.clear();
-      for (unsigned I = 0, N = Mod->Exports.size(); I != N; ++I) {
-        if (Module *Exported = Mod->Exports[I].getPointer()) {
-          unsigned ExportedID = getSubmoduleID(Exported);
-          Record.push_back(ExportedID);
-        } else {
-          Record.push_back(0);
-        }
-        
-        Record.push_back(Mod->Exports[I].getInt());
+      RecordData Record;
+      for (const auto &E : Mod->Exports) {
+        // FIXME: This may fail; we don't require that all exported modules
+        // are local or imported.
+        Record.push_back(getSubmoduleID(E.getPointer()));
+        Record.push_back(E.getInt());
       }
       Stream.EmitRecord(SUBMODULE_EXPORTS, Record);
     }
@@ -2524,45 +2551,34 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) {
     // module itself.
 
     // Emit the link libraries.
-    for (unsigned I = 0, N = Mod->LinkLibraries.size(); I != N; ++I) {
-      Record.clear();
-      Record.push_back(SUBMODULE_LINK_LIBRARY);
-      Record.push_back(Mod->LinkLibraries[I].IsFramework);
-      Stream.EmitRecordWithBlob(LinkLibraryAbbrev, Record,
-                                Mod->LinkLibraries[I].Library);
+    for (const auto &LL : Mod->LinkLibraries) {
+      RecordData::value_type Record[] = {SUBMODULE_LINK_LIBRARY,
+                                         LL.IsFramework};
+      Stream.EmitRecordWithBlob(LinkLibraryAbbrev, Record, LL.Library);
     }
 
     // Emit the conflicts.
-    for (unsigned I = 0, N = Mod->Conflicts.size(); I != N; ++I) {
-      Record.clear();
-      Record.push_back(SUBMODULE_CONFLICT);
-      unsigned OtherID = getSubmoduleID(Mod->Conflicts[I].Other);
-      assert(OtherID && "Unknown submodule!");
-      Record.push_back(OtherID);
-      Stream.EmitRecordWithBlob(ConflictAbbrev, Record,
-                                Mod->Conflicts[I].Message);
+    for (const auto &C : Mod->Conflicts) {
+      // FIXME: This may fail; we don't require that all conflicting modules
+      // are local or imported.
+      RecordData::value_type Record[] = {SUBMODULE_CONFLICT,
+                                         getSubmoduleID(C.Other)};
+      Stream.EmitRecordWithBlob(ConflictAbbrev, Record, C.Message);
     }
 
     // Emit the configuration macros.
-    for (unsigned I = 0, N =  Mod->ConfigMacros.size(); I != N; ++I) {
-      Record.clear();
-      Record.push_back(SUBMODULE_CONFIG_MACRO);
-      Stream.EmitRecordWithBlob(ConfigMacroAbbrev, Record,
-                                Mod->ConfigMacros[I]);
+    for (const auto &CM : Mod->ConfigMacros) {
+      RecordData::value_type Record[] = {SUBMODULE_CONFIG_MACRO};
+      Stream.EmitRecordWithBlob(ConfigMacroAbbrev, Record, CM);
     }
 
     // Queue up the submodules of this module.
-    for (Module::submodule_iterator Sub = Mod->submodule_begin(),
-                                 SubEnd = Mod->submodule_end();
-         Sub != SubEnd; ++Sub)
-      Q.push(*Sub);
+    for (auto *M : Mod->submodules())
+      Q.push(M);
   }
   
   Stream.ExitBlock();
 
-  // FIXME: This can easily happen, if we have a reference to a submodule that
-  // did not result in us loading a module file for that submodule. For
-  // instance, a cross-top-level-module 'conflict' declaration will hit this.
   assert((NextSubmoduleID - FirstSubmoduleID ==
           getNumberOfModules(WritingModule)) &&
          "Wrong # of submodules; found a reference to a non-local, "
@@ -2614,11 +2630,10 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
     
     if (DiagStateID == 0) {
       DiagStateID = ++CurrID;
-      for (DiagnosticsEngine::DiagState::const_iterator
-             I = point.State->begin(), E = point.State->end(); I != E; ++I) {
-        if (I->second.isPragma()) {
-          Record.push_back(I->first);
-          Record.push_back((unsigned)I->second.getSeverity());
+      for (const auto &I : *(point.State)) {
+        if (I.second.isPragma()) {
+          Record.push_back(I.first);
+          Record.push_back((unsigned)I.second.getSeverity());
         }
       }
       Record.push_back(-1); // mark the end of the diag/map pairs for this
@@ -2634,21 +2649,18 @@ void ASTWriter::WriteCXXCtorInitializersOffsets() {
   if (CXXCtorInitializersOffsets.empty())
     return;
 
-  RecordData Record;
-
   // Create a blob abbreviation for the C++ ctor initializer offsets.
   using namespace llvm;
 
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(CXX_CTOR_INITIALIZERS_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned CtorInitializersOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
 
   // Write the base specifier offsets table.
-  Record.clear();
-  Record.push_back(CXX_CTOR_INITIALIZERS_OFFSETS);
-  Record.push_back(CXXCtorInitializersOffsets.size());
+  RecordData::value_type Record[] = {CXX_CTOR_INITIALIZERS_OFFSETS,
+                                     CXXCtorInitializersOffsets.size()};
   Stream.EmitRecordWithBlob(CtorInitializersOffsetAbbrev, Record,
                             bytes(CXXCtorInitializersOffsets));
 }
@@ -2657,21 +2669,18 @@ void ASTWriter::WriteCXXBaseSpecifiersOffsets() {
   if (CXXBaseSpecifiersOffsets.empty())
     return;
 
-  RecordData Record;
-
   // Create a blob abbreviation for the C++ base specifiers offsets.
   using namespace llvm;
     
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(CXX_BASE_SPECIFIER_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // size
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned BaseSpecifierOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
   
   // Write the base specifier offsets table.
-  Record.clear();
-  Record.push_back(CXX_BASE_SPECIFIER_OFFSETS);
-  Record.push_back(CXXBaseSpecifiersOffsets.size());
+  RecordData::value_type Record[] = {CXX_BASE_SPECIFIER_OFFSETS,
+                                     CXXBaseSpecifiersOffsets.size()};
   Stream.EmitRecordWithBlob(BaseSpecifierOffsetAbbrev, Record,
                             bytes(CXXBaseSpecifiersOffsets));
 }
@@ -2742,33 +2751,34 @@ uint64_t ASTWriter::WriteDeclContextLexicalBlock(ASTContext &Context,
     return 0;
 
   uint64_t Offset = Stream.GetCurrentBitNo();
-  RecordData Record;
-  Record.push_back(DECL_CONTEXT_LEXICAL);
-  SmallVector<KindDeclIDPair, 64> Decls;
-  for (const auto *D : DC->decls())
-    Decls.push_back(std::make_pair(D->getKind(), GetDeclRef(D)));
+  SmallVector<uint32_t, 128> KindDeclPairs;
+  for (const auto *D : DC->decls()) {
+    KindDeclPairs.push_back(D->getKind());
+    KindDeclPairs.push_back(GetDeclRef(D));
+  }
 
   ++NumLexicalDeclContexts;
-  Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record, bytes(Decls));
+  RecordData::value_type Record[] = {DECL_CONTEXT_LEXICAL};
+  Stream.EmitRecordWithBlob(DeclContextLexicalAbbrev, Record,
+                            bytes(KindDeclPairs));
   return Offset;
 }
 
 void ASTWriter::WriteTypeDeclOffsets() {
   using namespace llvm;
-  RecordData Record;
 
   // Write the type offsets array
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(TYPE_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of types
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base type index
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // types block
   unsigned TypeOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
-  Record.clear();
-  Record.push_back(TYPE_OFFSET);
-  Record.push_back(TypeOffsets.size());
-  Record.push_back(FirstTypeID - NUM_PREDEF_TYPE_IDS);
-  Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets));
+  {
+    RecordData::value_type Record[] = {TYPE_OFFSET, TypeOffsets.size(),
+                                       FirstTypeID - NUM_PREDEF_TYPE_IDS};
+    Stream.EmitRecordWithBlob(TypeOffsetAbbrev, Record, bytes(TypeOffsets));
+  }
 
   // Write the declaration offsets array
   Abbrev = new BitCodeAbbrev();
@@ -2777,16 +2787,15 @@ void ASTWriter::WriteTypeDeclOffsets() {
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // base decl ID
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // declarations block
   unsigned DeclOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
-  Record.clear();
-  Record.push_back(DECL_OFFSET);
-  Record.push_back(DeclOffsets.size());
-  Record.push_back(FirstDeclID - NUM_PREDEF_DECL_IDS);
-  Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets));
+  {
+    RecordData::value_type Record[] = {DECL_OFFSET, DeclOffsets.size(),
+                                       FirstDeclID - NUM_PREDEF_DECL_IDS};
+    Stream.EmitRecordWithBlob(DeclOffsetAbbrev, Record, bytes(DeclOffsets));
+  }
 }
 
 void ASTWriter::WriteFileDeclIDsMap() {
   using namespace llvm;
-  RecordData Record;
 
   SmallVector<std::pair<FileID, DeclIDInFileInfo *>, 64> SortedFileDeclIDs(
       FileDeclIDs.begin(), FileDeclIDs.end());
@@ -2802,13 +2811,13 @@ void ASTWriter::WriteFileDeclIDsMap() {
       FileGroupedDeclIDs.push_back(LocDeclEntry.second);
   }
 
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(FILE_SORTED_DECLS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned AbbrevCode = Stream.EmitAbbrev(Abbrev);
-  Record.push_back(FILE_SORTED_DECLS);
-  Record.push_back(FileGroupedDeclIDs.size());
+  RecordData::value_type Record[] = {FILE_SORTED_DECLS,
+                                     FileGroupedDeclIDs.size()};
   Stream.EmitRecordWithBlob(AbbrevCode, Record, bytes(FileGroupedDeclIDs));
 }
 
@@ -2816,14 +2825,12 @@ void ASTWriter::WriteComments() {
   Stream.EnterSubblock(COMMENTS_BLOCK_ID, 3);
   ArrayRef<RawComment *> RawComments = Context->Comments.getComments();
   RecordData Record;
-  for (ArrayRef<RawComment *>::iterator I = RawComments.begin(),
-                                        E = RawComments.end();
-       I != E; ++I) {
+  for (const auto *I : RawComments) {
     Record.clear();
-    AddSourceRange((*I)->getSourceRange(), Record);
-    Record.push_back((*I)->getKind());
-    Record.push_back((*I)->isTrailingComment());
-    Record.push_back((*I)->isAlmostTrailingComment());
+    AddSourceRange(I->getSourceRange(), Record);
+    Record.push_back(I->getKind());
+    Record.push_back(I->isTrailingComment());
+    Record.push_back(I->isAlmostTrailingComment());
     Stream.EmitRecord(COMMENTS_RAW_COMMENT, Record);
   }
   Stream.ExitBlock();
@@ -3010,7 +3017,7 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) {
     }
 
     // Create a blob abbreviation
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(METHOD_POOL));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
@@ -3018,11 +3025,11 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) {
     unsigned MethodPoolAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the method pool
-    RecordData Record;
-    Record.push_back(METHOD_POOL);
-    Record.push_back(BucketOffset);
-    Record.push_back(NumTableEntries);
-    Stream.EmitRecordWithBlob(MethodPoolAbbrev, Record, MethodPool);
+    {
+      RecordData::value_type Record[] = {METHOD_POOL, BucketOffset,
+                                         NumTableEntries};
+      Stream.EmitRecordWithBlob(MethodPoolAbbrev, Record, MethodPool);
+    }
 
     // Create a blob abbreviation for the selector table offsets.
     Abbrev = new BitCodeAbbrev();
@@ -3033,12 +3040,13 @@ void ASTWriter::WriteSelectors(Sema &SemaRef) {
     unsigned SelectorOffsetAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the selector offsets table.
-    Record.clear();
-    Record.push_back(SELECTOR_OFFSETS);
-    Record.push_back(SelectorOffsets.size());
-    Record.push_back(FirstSelectorID - NUM_PREDEF_SELECTOR_IDS);
-    Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record,
-                              bytes(SelectorOffsets));
+    {
+      RecordData::value_type Record[] = {
+          SELECTOR_OFFSETS, SelectorOffsets.size(),
+          FirstSelectorID - NUM_PREDEF_SELECTOR_IDS};
+      Stream.EmitRecordWithBlob(SelectorOffsetAbbrev, Record,
+                                bytes(SelectorOffsets));
+    }
   }
 }
 
@@ -3102,18 +3110,20 @@ class ASTIdentifierTableTrait {
   ASTWriter &Writer;
   Preprocessor &PP;
   IdentifierResolver &IdResolver;
+  bool IsModule;
+  bool NeedDecls;
+  ASTWriter::RecordData *InterestingIdentifierOffsets;
   
   /// \brief Determines whether this is an "interesting" identifier that needs a
   /// full IdentifierInfo structure written into the hash table. Notably, this
   /// doesn't check whether the name has macros defined; use PublicMacroIterator
   /// to check that.
-  bool isInterestingIdentifier(IdentifierInfo *II, uint64_t MacroOffset) {
+  bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) {
     if (MacroOffset ||
         II->isPoisoned() ||
-        II->isExtensionToken() ||
-        II->getObjCOrBuiltinID() ||
+        (IsModule ? II->hasRevertedBuiltin() : II->getObjCOrBuiltinID()) ||
         II->hasRevertedTokenIDToIdentifier() ||
-        II->getFETokenInfo<void>())
+        (NeedDecls && II->getFETokenInfo<void>()))
       return true;
 
     return false;
@@ -3130,13 +3140,24 @@ public:
   typedef unsigned offset_type;
 
   ASTIdentifierTableTrait(ASTWriter &Writer, Preprocessor &PP,
-                          IdentifierResolver &IdResolver)
-      : Writer(Writer), PP(PP), IdResolver(IdResolver) {}
+                          IdentifierResolver &IdResolver, bool IsModule,
+                          ASTWriter::RecordData *InterestingIdentifierOffsets)
+      : Writer(Writer), PP(PP), IdResolver(IdResolver), IsModule(IsModule),
+        NeedDecls(!IsModule || !Writer.getLangOpts().CPlusPlus),
+        InterestingIdentifierOffsets(InterestingIdentifierOffsets) {}
 
   static hash_value_type ComputeHash(const IdentifierInfo* II) {
     return llvm::HashString(II->getName());
   }
 
+  bool isInterestingIdentifier(const IdentifierInfo *II) {
+    auto MacroOffset = Writer.getMacroDirectivesOffset(II);
+    return isInterestingIdentifier(II, MacroOffset);
+  }
+  bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
+    return isInterestingIdentifier(II, 0);
+  }
+
   std::pair<unsigned,unsigned>
   EmitKeyDataLength(raw_ostream& Out, IdentifierInfo* II, IdentID ID) {
     unsigned KeyLen = II->getLength() + 1;
@@ -3148,10 +3169,12 @@ public:
       if (MacroOffset)
         DataLen += 4; // MacroDirectives offset.
 
-      for (IdentifierResolver::iterator D = IdResolver.begin(II),
-                                     DEnd = IdResolver.end();
-           D != DEnd; ++D)
-        DataLen += 4;
+      if (NeedDecls) {
+        for (IdentifierResolver::iterator D = IdResolver.begin(II),
+                                       DEnd = IdResolver.end();
+             D != DEnd; ++D)
+          DataLen += 4;
+      }
     }
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
@@ -3170,6 +3193,12 @@ public:
     // Record the location of the key data.  This is used when generating
     // the mapping from persistent IDs to strings.
     Writer.SetIdentifierOffset(II, Out.tell());
+
+    // Emit the offset of the key/data length information to the interesting
+    // identifiers table if necessary.
+    if (InterestingIdentifierOffsets && isInterestingIdentifier(II))
+      InterestingIdentifierOffsets->push_back(Out.tell() - 4);
+
     Out.write(II->getNameStart(), KeyLen);
   }
 
@@ -3193,6 +3222,7 @@ public:
     Bits = (Bits << 1) | unsigned(HadMacroDefinition);
     Bits = (Bits << 1) | unsigned(II->isExtensionToken());
     Bits = (Bits << 1) | unsigned(II->isPoisoned());
+    Bits = (Bits << 1) | unsigned(II->hasRevertedBuiltin());
     Bits = (Bits << 1) | unsigned(II->hasRevertedTokenIDToIdentifier());
     Bits = (Bits << 1) | unsigned(II->isCPlusPlusOperatorKeyword());
     LE.write<uint16_t>(Bits);
@@ -3200,18 +3230,21 @@ public:
     if (HadMacroDefinition)
       LE.write<uint32_t>(MacroOffset);
 
-    // Emit the declaration IDs in reverse order, because the
-    // IdentifierResolver provides the declarations as they would be
-    // visible (e.g., the function "stat" would come before the struct
-    // "stat"), but the ASTReader adds declarations to the end of the list
-    // (so we need to see the struct "stat" before the function "stat").
-    // Only emit declarations that aren't from a chained PCH, though.
-    SmallVector<NamedDecl *, 16> Decls(IdResolver.begin(II), IdResolver.end());
-    for (SmallVectorImpl<NamedDecl *>::reverse_iterator D = Decls.rbegin(),
-                                                        DEnd = Decls.rend();
-         D != DEnd; ++D)
-      LE.write<uint32_t>(
-          Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), *D)));
+    if (NeedDecls) {
+      // Emit the declaration IDs in reverse order, because the
+      // IdentifierResolver provides the declarations as they would be
+      // visible (e.g., the function "stat" would come before the struct
+      // "stat"), but the ASTReader adds declarations to the end of the list
+      // (so we need to see the struct "stat" before the function "stat").
+      // Only emit declarations that aren't from a chained PCH, though.
+      SmallVector<NamedDecl *, 16> Decls(IdResolver.begin(II),
+                                         IdResolver.end());
+      for (SmallVectorImpl<NamedDecl *>::reverse_iterator D = Decls.rbegin(),
+                                                          DEnd = Decls.rend();
+           D != DEnd; ++D)
+        LE.write<uint32_t>(
+            Writer.getDeclID(getDeclForLocalLookup(PP.getLangOpts(), *D)));
+    }
   }
 };
 } // end anonymous namespace
@@ -3226,11 +3259,15 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
                                      bool IsModule) {
   using namespace llvm;
 
+  RecordData InterestingIdents;
+
   // Create and write out the blob that contains the identifier
   // strings.
   {
     llvm::OnDiskChainedHashTableGenerator<ASTIdentifierTableTrait> Generator;
-    ASTIdentifierTableTrait Trait(*this, PP, IdResolver);
+    ASTIdentifierTableTrait Trait(
+        *this, PP, IdResolver, IsModule,
+        (getLangOpts().CPlusPlus && IsModule) ? &InterestingIdents : nullptr);
 
     // Look for any identifiers that were named while processing the
     // headers, but are otherwise not needed. We add these to the hash
@@ -3238,21 +3275,20 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
     // where the user adds new macro definitions when building the AST
     // file.
     SmallVector<const IdentifierInfo *, 128> IIs;
-    for (IdentifierTable::iterator ID = PP.getIdentifierTable().begin(),
-                                IDEnd = PP.getIdentifierTable().end();
-         ID != IDEnd; ++ID)
-      IIs.push_back(ID->second);
+    for (const auto &ID : PP.getIdentifierTable())
+      IIs.push_back(ID.second);
     // Sort the identifiers lexicographically before getting them references so
     // that their order is stable.
     std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
     for (const IdentifierInfo *II : IIs)
-      getIdentifierRef(II);
+      if (Trait.isInterestingNonMacroIdentifier(II))
+        getIdentifierRef(II);
 
     // Create the on-disk hash table representation. We only store offsets
     // for identifiers that appear here for the first time.
     IdentifierOffsets.resize(NextIdentID - FirstIdentID);
     for (auto IdentIDPair : IdentifierIDs) {
-      IdentifierInfo *II = const_cast<IdentifierInfo *>(IdentIDPair.first);
+      auto *II = const_cast<IdentifierInfo *>(IdentIDPair.first);
       IdentID ID = IdentIDPair.second;
       assert(II && "NULL identifier in identifier table");
       if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
@@ -3271,21 +3307,19 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
     }
 
     // Create a blob abbreviation
-    BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_TABLE));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
-    RecordData Record;
-    Record.push_back(IDENTIFIER_TABLE);
-    Record.push_back(BucketOffset);
+    RecordData::value_type Record[] = {IDENTIFIER_TABLE, BucketOffset};
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
   }
 
   // Write the offsets table for identifier IDs.
-  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(IDENTIFIER_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of identifiers
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
@@ -3296,13 +3330,17 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
   for (unsigned I = 0, N = IdentifierOffsets.size(); I != N; ++I)
     assert(IdentifierOffsets[I] && "Missing identifier offset?");
 #endif
-  
-  RecordData Record;
-  Record.push_back(IDENTIFIER_OFFSET);
-  Record.push_back(IdentifierOffsets.size());
-  Record.push_back(FirstIdentID - NUM_PREDEF_IDENT_IDS);
+
+  RecordData::value_type Record[] = {IDENTIFIER_OFFSET,
+                                     IdentifierOffsets.size(),
+                                     FirstIdentID - NUM_PREDEF_IDENT_IDS};
   Stream.EmitRecordWithBlob(IdentifierOffsetAbbrev, Record,
                             bytes(IdentifierOffsets));
+
+  // In C++, write the list of interesting identifiers (those that are
+  // defined as macros, poisoned, or similar unusual things).
+  if (!InterestingIdents.empty())
+    Stream.EmitRecord(INTERESTING_IDENTIFIERS, InterestingIdents);
 }
 
 //===----------------------------------------------------------------------===//
@@ -3313,12 +3351,14 @@ namespace {
 // Trait used for the on-disk hash table used in the method pool.
 class ASTDeclContextNameLookupTrait {
   ASTWriter &Writer;
+  llvm::SmallVector<DeclID, 64> DeclIDs;
 
 public:
-  typedef DeclarationName key_type;
+  typedef DeclarationNameKey key_type;
   typedef key_type key_type_ref;
 
-  typedef DeclContext::lookup_result data_type;
+  /// A start and end index into DeclIDs, representing a sequence of decls.
+  typedef std::pair<unsigned, unsigned> data_type;
   typedef const data_type& data_type_ref;
 
   typedef unsigned hash_value_type;
@@ -3326,42 +3366,47 @@ public:
 
   explicit ASTDeclContextNameLookupTrait(ASTWriter &Writer) : Writer(Writer) { }
 
-  hash_value_type ComputeHash(DeclarationName Name) {
-    llvm::FoldingSetNodeID ID;
-    ID.AddInteger(Name.getNameKind());
-
-    switch (Name.getNameKind()) {
-    case DeclarationName::Identifier:
-      ID.AddString(Name.getAsIdentifierInfo()->getName());
-      break;
-    case DeclarationName::ObjCZeroArgSelector:
-    case DeclarationName::ObjCOneArgSelector:
-    case DeclarationName::ObjCMultiArgSelector:
-      ID.AddInteger(serialization::ComputeHash(Name.getObjCSelector()));
-      break;
-    case DeclarationName::CXXConstructorName:
-    case DeclarationName::CXXDestructorName:
-    case DeclarationName::CXXConversionFunctionName:
-      break;
-    case DeclarationName::CXXOperatorName:
-      ID.AddInteger(Name.getCXXOverloadedOperator());
-      break;
-    case DeclarationName::CXXLiteralOperatorName:
-      ID.AddString(Name.getCXXLiteralIdentifier()->getName());
-    case DeclarationName::CXXUsingDirective:
-      break;
+  template<typename Coll>
+  data_type getData(const Coll &Decls) {
+    unsigned Start = DeclIDs.size();
+    for (NamedDecl *D : Decls) {
+      DeclIDs.push_back(
+          Writer.GetDeclRef(getDeclForLocalLookup(Writer.getLangOpts(), D)));
     }
-
-    return ID.ComputeHash();
+    return std::make_pair(Start, DeclIDs.size());
   }
 
-  std::pair<unsigned,unsigned>
-    EmitKeyDataLength(raw_ostream& Out, DeclarationName Name,
-                      data_type_ref Lookup) {
+  data_type ImportData(const reader::ASTDeclContextNameLookupTrait::data_type &FromReader) {
+    unsigned Start = DeclIDs.size();
+    for (auto ID : FromReader)
+      DeclIDs.push_back(ID);
+    return std::make_pair(Start, DeclIDs.size());
+  }
+
+  static bool EqualKey(key_type_ref a, key_type_ref b) {
+    return a == b;
+  }
+
+  hash_value_type ComputeHash(DeclarationNameKey Name) {
+    return Name.getHash();
+  }
+
+  void EmitFileRef(raw_ostream &Out, ModuleFile *F) const {
+    assert(Writer.hasChain() &&
+           "have reference to loaded module file but no chain?");
+
+    using namespace llvm::support;
+    endian::Writer<little>(Out)
+        .write<uint32_t>(Writer.getChain()->getModuleFileID(F));
+  }
+
+  std::pair<unsigned, unsigned> EmitKeyDataLength(raw_ostream &Out,
+                                                  DeclarationNameKey Name,
+                                                  data_type_ref Lookup) {
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
     unsigned KeyLen = 1;
-    switch (Name.getNameKind()) {
+    switch (Name.getKind()) {
     case DeclarationName::Identifier:
     case DeclarationName::ObjCZeroArgSelector:
     case DeclarationName::ObjCOneArgSelector:
@@ -3380,33 +3425,33 @@ public:
     }
     LE.write<uint16_t>(KeyLen);
 
-    // 2 bytes for num of decls and 4 for each DeclID.
-    unsigned DataLen = 2 + 4 * Lookup.size();
+    // 4 bytes for each DeclID.
+    unsigned DataLen = 4 * (Lookup.second - Lookup.first);
+    assert(uint16_t(DataLen) == DataLen &&
+           "too many decls for serialized lookup result");
     LE.write<uint16_t>(DataLen);
 
     return std::make_pair(KeyLen, DataLen);
   }
 
-  void EmitKey(raw_ostream& Out, DeclarationName Name, unsigned) {
+  void EmitKey(raw_ostream &Out, DeclarationNameKey Name, unsigned) {
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
-    LE.write<uint8_t>(Name.getNameKind());
-    switch (Name.getNameKind()) {
+    LE.write<uint8_t>(Name.getKind());
+    switch (Name.getKind()) {
     case DeclarationName::Identifier:
-      LE.write<uint32_t>(Writer.getIdentifierRef(Name.getAsIdentifierInfo()));
+    case DeclarationName::CXXLiteralOperatorName:
+      LE.write<uint32_t>(Writer.getIdentifierRef(Name.getIdentifier()));
       return;
     case DeclarationName::ObjCZeroArgSelector:
     case DeclarationName::ObjCOneArgSelector:
     case DeclarationName::ObjCMultiArgSelector:
-      LE.write<uint32_t>(Writer.getSelectorRef(Name.getObjCSelector()));
+      LE.write<uint32_t>(Writer.getSelectorRef(Name.getSelector()));
       return;
     case DeclarationName::CXXOperatorName:
-      assert(Name.getCXXOverloadedOperator() < NUM_OVERLOADED_OPERATORS &&
+      assert(Name.getOperatorKind() < NUM_OVERLOADED_OPERATORS &&
              "Invalid operator?");
-      LE.write<uint8_t>(Name.getCXXOverloadedOperator());
-      return;
-    case DeclarationName::CXXLiteralOperatorName:
-      LE.write<uint32_t>(Writer.getIdentifierRef(Name.getCXXLiteralIdentifier()));
+      LE.write<uint8_t>(Name.getOperatorKind());
       return;
     case DeclarationName::CXXConstructorName:
     case DeclarationName::CXXDestructorName:
@@ -3418,17 +3463,13 @@ public:
     llvm_unreachable("Invalid name kind?");
   }
 
-  void EmitData(raw_ostream& Out, key_type_ref,
-                data_type Lookup, unsigned DataLen) {
+  void EmitData(raw_ostream &Out, key_type_ref, data_type Lookup,
+                unsigned DataLen) {
     using namespace llvm::support;
     endian::Writer<little> LE(Out);
     uint64_t Start = Out.tell(); (void)Start;
-    LE.write<uint16_t>(Lookup.size());
-    for (DeclContext::lookup_iterator I = Lookup.begin(), E = Lookup.end();
-         I != E; ++I)
-      LE.write<uint32_t>(
-          Writer.GetDeclRef(getDeclForLocalLookup(Writer.getLangOpts(), *I)));
-
+    for (unsigned I = Lookup.first, N = Lookup.second; I != N; ++I)
+      LE.write<uint32_t>(DeclIDs[I]);
     assert(Out.tell() - Start == DataLen && "Data length is wrong");
   }
 };
@@ -3448,7 +3489,7 @@ bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result,
   return true;
 }
 
-uint32_t
+void
 ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
                                    llvm::SmallVectorImpl<char> &LookupTable) {
   assert(!ConstDC->HasLazyLocalLexicalLookups &&
@@ -3456,12 +3497,12 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
          "must call buildLookups first");
 
   // FIXME: We need to build the lookups table, which is logically const.
-  DeclContext *DC = const_cast<DeclContext*>(ConstDC);
+  auto *DC = const_cast<DeclContext*>(ConstDC);
   assert(DC == DC->getPrimaryContext() && "only primary DC has lookup table");
 
   // Create the on-disk hash table representation.
-  llvm::OnDiskChainedHashTableGenerator<ASTDeclContextNameLookupTrait>
-      Generator;
+  MultiOnDiskHashTableGenerator<reader::ASTDeclContextNameLookupTrait,
+                                ASTDeclContextNameLookupTrait> Generator;
   ASTDeclContextNameLookupTrait Trait(*this);
 
   // The first step is to collect the declaration names which we need to
@@ -3477,11 +3518,11 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
     auto &Name = Lookup.first;
     auto &Result = Lookup.second;
 
-    // If there are no local declarations in our lookup result, we don't
-    // need to write an entry for the name at all unless we're rewriting
-    // the decl context. If we can't write out a lookup set without
-    // performing more deserialization, just skip this entry.
-    if (isLookupResultExternal(Result, DC) && !isRewritten(cast<Decl>(DC)) &&
+    // If there are no local declarations in our lookup result, we
+    // don't need to write an entry for the name at all. If we can't
+    // write out a lookup set without performing more deserialization,
+    // just skip this entry.
+    if (isLookupResultExternal(Result, DC) &&
         isLookupResultEntirelyExternal(Result, DC))
       continue;
 
@@ -3596,7 +3637,7 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
 
     switch (Name.getNameKind()) {
     default:
-      Generator.insert(Name, Result, Trait);
+      Generator.insert(Name, Trait.getData(Result), Trait);
       break;
 
     case DeclarationName::CXXConstructorName:
@@ -3614,17 +3655,15 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC,
   // the key, only the kind of name is used.
   if (!ConstructorDecls.empty())
     Generator.insert(ConstructorDecls.front()->getDeclName(),
-                     DeclContext::lookup_result(ConstructorDecls), Trait);
+                     Trait.getData(ConstructorDecls), Trait);
   if (!ConversionDecls.empty())
     Generator.insert(ConversionDecls.front()->getDeclName(),
-                     DeclContext::lookup_result(ConversionDecls), Trait);
+                     Trait.getData(ConversionDecls), Trait);
 
-  // Create the on-disk hash table in a buffer.
-  llvm::raw_svector_ostream Out(LookupTable);
-  // Make sure that no bucket is at offset 0
-  using namespace llvm::support;
-  endian::Writer<little>(Out).write<uint32_t>(0);
-  return Generator.Emit(Out, Trait);
+  // Create the on-disk hash table. Also emit the existing imported and
+  // merged table if there is one.
+  auto *Lookups = Chain ? Chain->getLoadedLookupTables(DC) : nullptr;
+  Generator.emit(LookupTable, Trait, Lookups ? &Lookups->Table : nullptr);
 }
 
 /// \brief Write the block containing all of the declaration IDs
@@ -3640,7 +3679,7 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
   if (isa<NamespaceDecl>(DC) && Chain &&
       Chain->getKeyDeclaration(cast<Decl>(DC))->isFromASTFile()) {
     // Only do this once, for the first local declaration of the namespace.
-    for (NamespaceDecl *Prev = cast<NamespaceDecl>(DC)->getPreviousDecl(); Prev;
+    for (auto *Prev = cast<NamespaceDecl>(DC)->getPreviousDecl(); Prev;
          Prev = Prev->getPreviousDecl())
       if (!Prev->isFromASTFile())
         return 0;
@@ -3707,12 +3746,10 @@ uint64_t ASTWriter::WriteDeclContextVisibleBlock(ASTContext &Context,
 
   // Create the on-disk hash table in a buffer.
   SmallString<4096> LookupTable;
-  uint32_t BucketOffset = GenerateNameLookupTable(DC, LookupTable);
+  GenerateNameLookupTable(DC, LookupTable);
 
   // Write the lookup table
-  RecordData Record;
-  Record.push_back(DECL_CONTEXT_VISIBLE);
-  Record.push_back(BucketOffset);
+  RecordData::value_type Record[] = {DECL_CONTEXT_VISIBLE};
   Stream.EmitRecordWithBlob(DeclContextVisibleLookupAbbrev, Record,
                             LookupTable);
   ++NumVisibleDeclContexts;
@@ -3732,7 +3769,7 @@ void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
 
   // Create the on-disk hash table in a buffer.
   SmallString<4096> LookupTable;
-  uint32_t BucketOffset = GenerateNameLookupTable(DC, LookupTable);
+  GenerateNameLookupTable(DC, LookupTable);
 
   // If we're updating a namespace, select a key declaration as the key for the
   // update record; those are the only ones that will be checked on reload.
@@ -3740,17 +3777,13 @@ void ASTWriter::WriteDeclContextVisibleUpdate(const DeclContext *DC) {
     DC = cast<DeclContext>(Chain->getKeyDeclaration(cast<Decl>(DC)));
 
   // Write the lookup table
-  RecordData Record;
-  Record.push_back(UPDATE_VISIBLE);
-  Record.push_back(getDeclID(cast<Decl>(DC)));
-  Record.push_back(BucketOffset);
+  RecordData::value_type Record[] = {UPDATE_VISIBLE, getDeclID(cast<Decl>(DC))};
   Stream.EmitRecordWithBlob(UpdateVisibleAbbrev, Record, LookupTable);
 }
 
 /// \brief Write an FP_PRAGMA_OPTIONS block for the given FPOptions.
 void ASTWriter::WriteFPPragmaOptions(const FPOptions &Opts) {
-  RecordData Record;
-  Record.push_back(Opts.fp_contract);
+  RecordData::value_type Record[] = {Opts.fp_contract};
   Stream.EmitRecord(FP_PRAGMA_OPTIONS, Record);
 }
 
@@ -3766,81 +3799,6 @@ void ASTWriter::WriteOpenCLExtensions(Sema &SemaRef) {
   Stream.EmitRecord(OPENCL_EXTENSIONS, Record);
 }
 
-void ASTWriter::WriteRedeclarations() {
-  RecordData LocalRedeclChains;
-  SmallVector<serialization::LocalRedeclarationsInfo, 2> LocalRedeclsMap;
-
-  for (unsigned I = 0, N = Redeclarations.size(); I != N; ++I) {
-    const Decl *Key = Redeclarations[I];
-    assert((Chain ? Chain->getKeyDeclaration(Key) == Key
-                  : Key->isFirstDecl()) &&
-           "not the key declaration");
-
-    const Decl *First = Key->getCanonicalDecl();
-    const Decl *MostRecent = First->getMostRecentDecl();
-
-    assert((getDeclID(First) >= NUM_PREDEF_DECL_IDS || First == Key) &&
-           "should not have imported key decls for predefined decl");
-
-    // If we only have a single declaration, there is no point in storing
-    // a redeclaration chain.
-    if (First == MostRecent)
-      continue;
-    
-    unsigned Offset = LocalRedeclChains.size();
-    unsigned Size = 0;
-    LocalRedeclChains.push_back(0); // Placeholder for the size.
-
-    // Collect the set of local redeclarations of this declaration.
-    for (const Decl *Prev = MostRecent; Prev;
-         Prev = Prev->getPreviousDecl()) { 
-      if (!Prev->isFromASTFile() && Prev != Key) {
-        AddDeclRef(Prev, LocalRedeclChains);
-        ++Size;
-      }
-    }
-
-    LocalRedeclChains[Offset] = Size;
-
-    // Reverse the set of local redeclarations, so that we store them in
-    // order (since we found them in reverse order).
-    std::reverse(LocalRedeclChains.end() - Size, LocalRedeclChains.end());
-
-    // Add the mapping from the first ID from the AST to the set of local
-    // declarations.
-    LocalRedeclarationsInfo Info = { getDeclID(Key), Offset };
-    LocalRedeclsMap.push_back(Info);
-
-    assert(N == Redeclarations.size() && 
-           "Deserialized a declaration we shouldn't have");
-  }
-
-  if (LocalRedeclChains.empty())
-    return;
-
-  // Sort the local redeclarations map by the first declaration ID,
-  // since the reader will be performing binary searches on this information.
-  llvm::array_pod_sort(LocalRedeclsMap.begin(), LocalRedeclsMap.end());
-  
-  // Emit the local redeclarations map.
-  using namespace llvm;
-  llvm::BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
-  Abbrev->Add(BitCodeAbbrevOp(LOCAL_REDECLARATIONS_MAP));
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
-  unsigned AbbrevID = Stream.EmitAbbrev(Abbrev);
-  
-  RecordData Record;
-  Record.push_back(LOCAL_REDECLARATIONS_MAP);
-  Record.push_back(LocalRedeclsMap.size());
-  Stream.EmitRecordWithBlob(AbbrevID, Record, 
-    reinterpret_cast<char*>(LocalRedeclsMap.data()),
-    LocalRedeclsMap.size() * sizeof(LocalRedeclarationsInfo));
-
-  // Emit the redeclaration chains.
-  Stream.EmitRecord(LOCAL_REDECLARATIONS, LocalRedeclChains);
-}
-
 void ASTWriter::WriteObjCCategories() {
   SmallVector<ObjCCategoriesInfo, 2> CategoriesMap;
   RecordData Categories;
@@ -3877,19 +3835,18 @@ void ASTWriter::WriteObjCCategories() {
 
   // Emit the categories map.
   using namespace llvm;
-  llvm::BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+
+  auto *Abbrev = new BitCodeAbbrev();
   Abbrev->Add(BitCodeAbbrevOp(OBJC_CATEGORIES_MAP));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of entries
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   unsigned AbbrevID = Stream.EmitAbbrev(Abbrev);
-  
-  RecordData Record;
-  Record.push_back(OBJC_CATEGORIES_MAP);
-  Record.push_back(CategoriesMap.size());
-  Stream.EmitRecordWithBlob(AbbrevID, Record, 
-                            reinterpret_cast<char*>(CategoriesMap.data()),
+
+  RecordData::value_type Record[] = {OBJC_CATEGORIES_MAP, CategoriesMap.size()};
+  Stream.EmitRecordWithBlob(AbbrevID, Record,
+                            reinterpret_cast<char *>(CategoriesMap.data()),
                             CategoriesMap.size() * sizeof(ObjCCategoriesInfo));
-  
+
   // Emit the category lists.
   Stream.EmitRecord(OBJC_CATEGORIES, Categories);
 }
@@ -3908,10 +3865,8 @@ void ASTWriter::WriteLateParsedTemplates(Sema &SemaRef) {
     AddDeclRef(LPT->D, Record);
     Record.push_back(LPT->Toks.size());
 
-    for (CachedTokens::iterator TokIt = LPT->Toks.begin(),
-                                TokEnd = LPT->Toks.end();
-         TokIt != TokEnd; ++TokIt) {
-      AddToken(*TokIt, Record);
+    for (const auto &Tok : LPT->Toks) {
+      AddToken(Tok, Record);
     }
   }
   Stream.EmitRecord(LATE_PARSED_TEMPLATE, Record);
@@ -3925,6 +3880,41 @@ void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) {
   Stream.EmitRecord(OPTIMIZE_PRAGMA_OPTIONS, Record);
 }
 
+void ASTWriter::WriteModuleFileExtension(Sema &SemaRef,
+                                         ModuleFileExtensionWriter &Writer) {
+  // Enter the extension block.
+  Stream.EnterSubblock(EXTENSION_BLOCK_ID, 4);
+
+  // Emit the metadata record abbreviation.
+  auto *Abv = new llvm::BitCodeAbbrev();
+  Abv->Add(llvm::BitCodeAbbrevOp(EXTENSION_METADATA));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
+  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
+  unsigned Abbrev = Stream.EmitAbbrev(Abv);
+
+  // Emit the metadata record.
+  RecordData Record;
+  auto Metadata = Writer.getExtension()->getExtensionMetadata();
+  Record.push_back(EXTENSION_METADATA);
+  Record.push_back(Metadata.MajorVersion);
+  Record.push_back(Metadata.MinorVersion);
+  Record.push_back(Metadata.BlockName.size());
+  Record.push_back(Metadata.UserInfo.size());
+  SmallString<64> Buffer;
+  Buffer += Metadata.BlockName;
+  Buffer += Metadata.UserInfo;
+  Stream.EmitRecordWithBlob(Abbrev, Record, Buffer);
+
+  // Emit the contents of the extension block.
+  Writer.writeExtensionContents(SemaRef, Stream);
+
+  // Exit the extension block.
+  Stream.ExitBlock();
+}
+
 //===----------------------------------------------------------------------===//
 // General Serialization Routines
 //===----------------------------------------------------------------------===//
@@ -3933,9 +3923,7 @@ void ASTWriter::WriteOptimizePragmaOptions(Sema &SemaRef) {
 void ASTWriter::WriteAttributes(ArrayRef<const Attr*> Attrs,
                                 RecordDataImpl &Record) {
   Record.push_back(Attrs.size());
-  for (ArrayRef<const Attr *>::iterator i = Attrs.begin(),
-                                        e = Attrs.end(); i != e; ++i){
-    const Attr *A = *i;
+  for (const auto *A : Attrs) {
     Record.push_back(A->getKind()); // FIXME: stable encoding, target attrs
     AddSourceRange(A->getRange(), Record);
 
@@ -3986,7 +3974,7 @@ void ASTWriter::AddPath(StringRef Path, RecordDataImpl &Record) {
   AddString(FilePath, Record);
 }
 
-void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataImpl &Record,
+void ASTWriter::EmitRecordWithPath(unsigned Abbrev, RecordDataRef Record,
                                    StringRef Path) {
   SmallString<128> FilePath(Path);
   PreparePathForOutput(FilePath);
@@ -4028,28 +4016,35 @@ void ASTWriter::SetSelectorOffset(Selector Sel, uint32_t Offset) {
   SelectorOffsets[ID - FirstSelectorID] = Offset;
 }
 
-ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream)
+ASTWriter::ASTWriter(
+  llvm::BitstreamWriter &Stream,
+  ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
+  bool IncludeTimestamps)
     : Stream(Stream), Context(nullptr), PP(nullptr), Chain(nullptr),
-      WritingModule(nullptr), WritingAST(false),
-      DoneWritingDeclsAndTypes(false), ASTHasCompilerErrors(false),
-      FirstDeclID(NUM_PREDEF_DECL_IDS), NextDeclID(FirstDeclID),
-      FirstTypeID(NUM_PREDEF_TYPE_IDS), NextTypeID(FirstTypeID),
-      FirstIdentID(NUM_PREDEF_IDENT_IDS), NextIdentID(FirstIdentID),
-      FirstMacroID(NUM_PREDEF_MACRO_IDS), NextMacroID(FirstMacroID),
-      FirstSubmoduleID(NUM_PREDEF_SUBMODULE_IDS),
+      WritingModule(nullptr), IncludeTimestamps(IncludeTimestamps),
+      WritingAST(false), DoneWritingDeclsAndTypes(false),
+      ASTHasCompilerErrors(false), FirstDeclID(NUM_PREDEF_DECL_IDS),
+      NextDeclID(FirstDeclID), FirstTypeID(NUM_PREDEF_TYPE_IDS),
+      NextTypeID(FirstTypeID), FirstIdentID(NUM_PREDEF_IDENT_IDS),
+      NextIdentID(FirstIdentID), FirstMacroID(NUM_PREDEF_MACRO_IDS),
+      NextMacroID(FirstMacroID), FirstSubmoduleID(NUM_PREDEF_SUBMODULE_IDS),
       NextSubmoduleID(FirstSubmoduleID),
       FirstSelectorID(NUM_PREDEF_SELECTOR_IDS), NextSelectorID(FirstSelectorID),
       CollectedStmts(&StmtsToEmit), NumStatements(0), NumMacros(0),
       NumLexicalDeclContexts(0), NumVisibleDeclContexts(0),
       NextCXXBaseSpecifiersID(1), NextCXXCtorInitializersID(1),
-      TypeExtQualAbbrev(0),
-      TypeFunctionProtoAbbrev(0), DeclParmVarAbbrev(0),
+      TypeExtQualAbbrev(0), TypeFunctionProtoAbbrev(0), DeclParmVarAbbrev(0),
       DeclContextLexicalAbbrev(0), DeclContextVisibleLookupAbbrev(0),
       UpdateVisibleAbbrev(0), DeclRecordAbbrev(0), DeclTypedefAbbrev(0),
       DeclVarAbbrev(0), DeclFieldAbbrev(0), DeclEnumAbbrev(0),
       DeclObjCIvarAbbrev(0), DeclCXXMethodAbbrev(0), DeclRefExprAbbrev(0),
       CharacterLiteralAbbrev(0), IntegerLiteralAbbrev(0),
-      ExprImplicitCastAbbrev(0) {}
+      ExprImplicitCastAbbrev(0) {
+  for (const auto &Ext : Extensions) {
+    if (auto Writer = Ext->createExtensionWriter(*this))
+      ModuleFileExtensionWriters.push_back(std::move(Writer));
+  }
+}
 
 ASTWriter::~ASTWriter() {
   llvm::DeleteContainerSeconds(FileDeclIDs);
@@ -4060,12 +4055,15 @@ const LangOptions &ASTWriter::getLangOpts() const {
   return Context->getLangOpts();
 }
 
-void ASTWriter::WriteAST(Sema &SemaRef,
-                         const std::string &OutputFile,
-                         Module *WritingModule, StringRef isysroot,
-                         bool hasErrors) {
+time_t ASTWriter::getTimestampForOutput(const FileEntry *E) const {
+  return IncludeTimestamps ? E->getModificationTime() : 0;
+}
+
+uint64_t ASTWriter::WriteAST(Sema &SemaRef, const std::string &OutputFile,
+                             Module *WritingModule, StringRef isysroot,
+                             bool hasErrors) {
   WritingAST = true;
-  
+
   ASTHasCompilerErrors = hasErrors;
   
   // Emit the file header.
@@ -4079,13 +4077,15 @@ void ASTWriter::WriteAST(Sema &SemaRef,
   Context = &SemaRef.Context;
   PP = &SemaRef.PP;
   this->WritingModule = WritingModule;
-  WriteASTCore(SemaRef, isysroot, OutputFile, WritingModule);
+  ASTFileSignature Signature =
+      WriteASTCore(SemaRef, isysroot, OutputFile, WritingModule);
   Context = nullptr;
   PP = nullptr;
   this->WritingModule = nullptr;
   this->BaseDirectory.clear();
 
   WritingAST = false;
+  return Signature;
 }
 
 template<typename Vector>
@@ -4097,10 +4097,9 @@ static void AddLazyVectorDecls(ASTWriter &Writer, Vector &Vec,
   }
 }
 
-void ASTWriter::WriteASTCore(Sema &SemaRef,
-                             StringRef isysroot,
-                             const std::string &OutputFile, 
-                             Module *WritingModule) {
+uint64_t ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
+                                 const std::string &OutputFile,
+                                 Module *WritingModule) {
   using namespace llvm;
 
   bool isModule = WritingModule != nullptr;
@@ -4117,8 +4116,6 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
     if (D) {
       assert(D->isCanonicalDecl() && "predefined decl is not canonical");
       DeclIDs[D] = ID;
-      if (D->getMostRecentDecl() != D)
-        Redeclarations.push_back(D);
     }
   };
   RegisterPredefDecl(Context.getTranslationUnitDecl(),
@@ -4133,7 +4130,12 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
   RegisterPredefDecl(Context.ObjCInstanceTypeDecl,
                      PREDEF_DECL_OBJC_INSTANCETYPE_ID);
   RegisterPredefDecl(Context.BuiltinVaListDecl, PREDEF_DECL_BUILTIN_VA_LIST_ID);
+  RegisterPredefDecl(Context.VaListTagDecl, PREDEF_DECL_VA_LIST_TAG);
+  RegisterPredefDecl(Context.BuiltinMSVaListDecl,
+                     PREDEF_DECL_BUILTIN_MS_VA_LIST_ID);
   RegisterPredefDecl(Context.ExternCContext, PREDEF_DECL_EXTERN_C_CONTEXT_ID);
+  RegisterPredefDecl(Context.MakeIntegerSeqDecl,
+                     PREDEF_DECL_MAKE_INTEGER_SEQ_ID);
 
   // Build a record containing all of the tentative definitions in this file, in
   // TentativeDefinitions order.  Generally, this record will be empty for
@@ -4187,11 +4189,9 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
 
   // Build a record containing all of pending implicit instantiations.
   RecordData PendingInstantiations;
-  for (std::deque<Sema::PendingImplicitInstantiation>::iterator
-         I = SemaRef.PendingInstantiations.begin(),
-         N = SemaRef.PendingInstantiations.end(); I != N; ++I) {
-    AddDeclRef(I->first, PendingInstantiations);
-    AddSourceLocation(I->second, PendingInstantiations);
+  for (const auto &I : SemaRef.PendingInstantiations) {
+    AddDeclRef(I.first, PendingInstantiations);
+    AddSourceLocation(I.second, PendingInstantiations);
   }
   assert(SemaRef.PendingLocalImplicitInstantiations.empty() &&
          "There are local ones at end of translation unit!");
@@ -4210,12 +4210,9 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
 
   // Build a record containing all of the known namespaces.
   RecordData KnownNamespaces;
-  for (llvm::MapVector<NamespaceDecl*, bool>::iterator
-            I = SemaRef.KnownNamespaces.begin(),
-         IEnd = SemaRef.KnownNamespaces.end();
-       I != IEnd; ++I) {
-    if (!I->second)
-      AddDeclRef(I->first, KnownNamespaces);
+  for (const auto &I : SemaRef.KnownNamespaces) {
+    if (!I.second)
+      AddDeclRef(I.first, KnownNamespaces);
   }
 
   // Build a record of all used, undefined objects that require definitions.
@@ -4223,10 +4220,9 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
 
   SmallVector<std::pair<NamedDecl *, SourceLocation>, 16> Undefined;
   SemaRef.getUndefinedButUsed(Undefined);
-  for (SmallVectorImpl<std::pair<NamedDecl *, SourceLocation> >::iterator
-         I = Undefined.begin(), E = Undefined.end(); I != E; ++I) {
-    AddDeclRef(I->first, UndefinedButUsed);
-    AddSourceLocation(I->second, UndefinedButUsed);
+  for (const auto &I : Undefined) {
+    AddDeclRef(I.first, UndefinedButUsed);
+    AddSourceLocation(I.second, UndefinedButUsed);
   }
 
   // Build a record containing all delete-expressions that we would like to
@@ -4244,41 +4240,43 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
   }
 
   // Write the control block
-  WriteControlBlock(PP, Context, isysroot, OutputFile);
+  uint64_t Signature = WriteControlBlock(PP, Context, isysroot, OutputFile);
 
   // Write the remaining AST contents.
-  RecordData Record;
   Stream.EnterSubblock(AST_BLOCK_ID, 5);
 
   // This is so that older clang versions, before the introduction
   // of the control block, can read and reject the newer PCH format.
-  Record.clear();
-  Record.push_back(VERSION_MAJOR);
-  Stream.EmitRecord(METADATA_OLD_FORMAT, Record);
+  {
+    RecordData Record = {VERSION_MAJOR};
+    Stream.EmitRecord(METADATA_OLD_FORMAT, Record);
+  }
 
   // Create a lexical update block containing all of the declarations in the
   // translation unit that do not come from other AST files.
   const TranslationUnitDecl *TU = Context.getTranslationUnitDecl();
-  SmallVector<KindDeclIDPair, 64> NewGlobalDecls;
-  for (const auto *I : TU->noload_decls()) {
-    if (!I->isFromASTFile())
-      NewGlobalDecls.push_back(std::make_pair(I->getKind(), GetDeclRef(I)));
+  SmallVector<uint32_t, 128> NewGlobalKindDeclPairs;
+  for (const auto *D : TU->noload_decls()) {
+    if (!D->isFromASTFile()) {
+      NewGlobalKindDeclPairs.push_back(D->getKind());
+      NewGlobalKindDeclPairs.push_back(GetDeclRef(D));
+    }
   }
   
-  llvm::BitCodeAbbrev *Abv = new llvm::BitCodeAbbrev();
+  auto *Abv = new llvm::BitCodeAbbrev();
   Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
   unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(Abv);
-  Record.clear();
-  Record.push_back(TU_UPDATE_LEXICAL);
-  Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record,
-                            bytes(NewGlobalDecls));
-  
+  {
+    RecordData::value_type Record[] = {TU_UPDATE_LEXICAL};
+    Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record,
+                              bytes(NewGlobalKindDeclPairs));
+  }
+
   // And a visible updates block for the translation unit.
   Abv = new llvm::BitCodeAbbrev();
   Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6));
-  Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Fixed, 32));
   Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob));
   UpdateVisibleAbbrev = Stream.EmitAbbrev(Abv);
   WriteDeclContextVisibleUpdate(TU);
@@ -4310,29 +4308,27 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
 
   // Make sure visible decls, added to DeclContexts previously loaded from
   // an AST file, are registered for serialization.
-  for (SmallVectorImpl<const Decl *>::iterator
-         I = UpdatingVisibleDecls.begin(),
-         E = UpdatingVisibleDecls.end(); I != E; ++I) {
-    GetDeclRef(*I);
+  for (const auto *I : UpdatingVisibleDecls) {
+    GetDeclRef(I);
   }
 
   // Make sure all decls associated with an identifier are registered for
-  // serialization.
-  llvm::SmallVector<const IdentifierInfo*, 256> IIs;
-  for (IdentifierTable::iterator ID = PP.getIdentifierTable().begin(),
-                              IDEnd = PP.getIdentifierTable().end();
-       ID != IDEnd; ++ID) {
-    const IdentifierInfo *II = ID->second;
-    if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
-      IIs.push_back(II);
-  }
-  // Sort the identifiers to visit based on their name.
-  std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
-  for (const IdentifierInfo *II : IIs) {
-    for (IdentifierResolver::iterator D = SemaRef.IdResolver.begin(II),
-                                   DEnd = SemaRef.IdResolver.end();
-         D != DEnd; ++D) {
-      GetDeclRef(*D);
+  // serialization, if we're storing decls with identifiers.
+  if (!WritingModule || !getLangOpts().CPlusPlus) {
+    llvm::SmallVector<const IdentifierInfo*, 256> IIs;
+    for (const auto &ID : PP.getIdentifierTable()) {
+      const IdentifierInfo *II = ID.second;
+      if (!Chain || !II->isFromAST() || II->hasChangedSinceDeserialization())
+        IIs.push_back(II);
+    }
+    // Sort the identifiers to visit based on their name.
+    std::sort(IIs.begin(), IIs.end(), llvm::less_ptr<IdentifierInfo>());
+    for (const IdentifierInfo *II : IIs) {
+      for (IdentifierResolver::iterator D = SemaRef.IdResolver.begin(II),
+                                     DEnd = SemaRef.IdResolver.end();
+           D != DEnd; ++D) {
+        GetDeclRef(*D);
+      }
     }
   }
 
@@ -4363,7 +4359,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
     //   c++-base-specifiers-id:i32
     //   type-id:i32)
     // 
-    llvm::BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+    auto *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned ModuleOffsetMapAbbrev = Stream.EmitAbbrev(Abbrev);
@@ -4402,8 +4398,7 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
         writeBaseIDOrNone(M->BaseTypeIndex, M->LocalNumTypes);
       }
     }
-    Record.clear();
-    Record.push_back(MODULE_OFFSET_MAP);
+    RecordData::value_type Record[] = {MODULE_OFFSET_MAP};
     Stream.EmitRecordWithBlob(ModuleOffsetMapAbbrev, Record,
                               Buffer.data(), Buffer.size());
   }
@@ -4415,10 +4410,6 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
   Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5);
   WriteTypeAbbrevs();
   WriteDeclAbbrevs();
-  for (DeclsToRewriteTy::iterator I = DeclsToRewrite.begin(),
-                                  E = DeclsToRewrite.end();
-       I != E; ++I)
-    DeclTypesToEmit.push(const_cast<Decl*>(*I));
   do {
     WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord);
     while (!DeclTypesToEmit.empty()) {
@@ -4442,12 +4433,12 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
   WriteCXXCtorInitializersOffsets();
   WriteFileDeclIDsMap();
   WriteSourceManagerBlock(Context.getSourceManager(), PP);
-
   WriteComments();
   WritePreprocessor(PP, isModule);
   WriteHeaderSearch(PP.getHeaderSearchInfo());
   WriteSelectors(SemaRef);
   WriteReferencedSelectorsPool(SemaRef);
+  WriteLateParsedTemplates(SemaRef);
   WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
   WriteFPPragmaOptions(SemaRef.getFPOptions());
   WriteOpenCLExtensions(SemaRef);
@@ -4561,20 +4552,21 @@ void ASTWriter::WriteASTCore(Sema &SemaRef,
   }
 
   WriteDeclReplacementsBlock();
-  WriteRedeclarations();
   WriteObjCCategories();
-  WriteLateParsedTemplates(SemaRef);
   if(!WritingModule)
     WriteOptimizePragmaOptions(SemaRef);
 
   // Some simple statistics
-  Record.clear();
-  Record.push_back(NumStatements);
-  Record.push_back(NumMacros);
-  Record.push_back(NumLexicalDeclContexts);
-  Record.push_back(NumVisibleDeclContexts);
+  RecordData::value_type Record[] = {
+      NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts};
   Stream.EmitRecord(STATISTICS, Record);
   Stream.ExitBlock();
+
+  // Write the module file extension blocks.
+  for (const auto &ExtWriter : ModuleFileExtensionWriters)
+    WriteModuleFileExtension(SemaRef, *ExtWriter);
+
+  return Signature;
 }
 
 void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
@@ -4586,8 +4578,6 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
 
   for (auto &DeclUpdate : LocalUpdates) {
     const Decl *D = DeclUpdate.first;
-    if (isRewritten(D))
-      continue; // The decl will be written completely,no need to store updates.
 
     bool HasUpdatedBody = false;
     RecordData Record;
@@ -4699,7 +4689,7 @@ void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) {
     }
 
     if (HasUpdatedBody) {
-      const FunctionDecl *Def = cast<FunctionDecl>(D);
+      const auto *Def = cast<FunctionDecl>(D);
       Record.push_back(UPD_CXX_ADDED_FUNCTION_DEFINITION);
       Record.push_back(Def->isInlined());
       AddSourceLocation(Def->getInnerLocStart(), Record);
@@ -4720,11 +4710,10 @@ void ASTWriter::WriteDeclReplacementsBlock() {
     return;
 
   RecordData Record;
-  for (SmallVectorImpl<ReplacedDeclInfo>::iterator
-         I = ReplacedDecls.begin(), E = ReplacedDecls.end(); I != E; ++I) {
-    Record.push_back(I->ID);
-    Record.push_back(I->Offset);
-    Record.push_back(I->Loc);
+  for (const auto &I : ReplacedDecls) {
+    Record.push_back(I.ID);
+    Record.push_back(I.Offset);
+    Record.push_back(I.Loc);
   }
   Stream.EmitRecord(DECL_REPLACEMENTS, Record);
 }
@@ -5247,9 +5236,8 @@ void ASTWriter::AddTemplateName(TemplateName Name, RecordDataImpl &Record) {
   case TemplateName::OverloadedTemplate: {
     OverloadedTemplateStorage *OvT = Name.getAsOverloadedTemplate();
     Record.push_back(OvT->size());
-    for (OverloadedTemplateStorage::iterator I = OvT->begin(), E = OvT->end();
-           I != E; ++I)
-      AddDeclRef(*I, Record);
+    for (const auto &I : *OvT)
+      AddDeclRef(I, Record);
     break;
   }
 
@@ -5339,10 +5327,8 @@ ASTWriter::AddTemplateParameterList(const TemplateParameterList *TemplateParams,
   AddSourceLocation(TemplateParams->getLAngleLoc(), Record);
   AddSourceLocation(TemplateParams->getRAngleLoc(), Record);
   Record.push_back(TemplateParams->size());
-  for (TemplateParameterList::const_iterator
-         P = TemplateParams->begin(), PEnd = TemplateParams->end();
-         P != PEnd; ++P)
-    AddDeclRef(*P, Record);
+  for (const auto &P : *TemplateParams)
+    AddDeclRef(P, Record);
 }
 
 /// \brief Emit a template argument list.
@@ -5657,7 +5643,7 @@ void ASTWriter::ModuleRead(serialization::SubmoduleID ID, Module *Mod) {
 void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
   assert(D->isCompleteDefinition());
   assert(!WritingAST && "Already writing the AST!");
-  if (const CXXRecordDecl *RD = dyn_cast<CXXRecordDecl>(D)) {
+  if (auto *RD = dyn_cast<CXXRecordDecl>(D)) {
     // We are interested when a PCH decl is modified.
     if (RD->isFromASTFile()) {
       // A forward reference was mutated into a definition. Rewrite it.
@@ -5671,26 +5657,52 @@ void ASTWriter::CompletedTagDefinition(const TagDecl *D) {
   }
 }
 
+static bool isImportedDeclContext(ASTReader *Chain, const Decl *D) {
+  if (D->isFromASTFile())
+    return true;
+
+  // If we've not loaded any modules, this can't be imported.
+  if (!Chain || !Chain->getModuleManager().size())
+    return false;
+
+  // The predefined __va_list_tag struct is imported if we imported any decls.
+  // FIXME: This is a gross hack.
+  return D == D->getASTContext().getVaListTagDecl();
+}
+
 void ASTWriter::AddedVisibleDecl(const DeclContext *DC, const Decl *D) {
   // TU and namespaces are handled elsewhere.
   if (isa<TranslationUnitDecl>(DC) || isa<NamespaceDecl>(DC))
     return;
 
-  if (!(!D->isFromASTFile() && cast<Decl>(DC)->isFromASTFile()))
-    return; // Not a source decl added to a DeclContext from PCH.
+  // We're only interested in cases where a local declaration is added to an
+  // imported context.
+  if (D->isFromASTFile() || !isImportedDeclContext(Chain, cast<Decl>(DC)))
+    return;
 
+  assert(DC == DC->getPrimaryContext() && "added to non-primary context");
   assert(!getDefinitiveDeclContext(DC) && "DeclContext not definitive!");
   assert(!WritingAST && "Already writing the AST!");
-  UpdatedDeclContexts.insert(DC);
+  if (UpdatedDeclContexts.insert(DC) && !cast<Decl>(DC)->isFromASTFile()) {
+    // We're adding a visible declaration to a predefined decl context. Ensure
+    // that we write out all of its lookup results so we don't get a nasty
+    // surprise when we try to emit its lookup table.
+    for (auto *Child : DC->decls())
+      UpdatingVisibleDecls.push_back(Child);
+  }
   UpdatingVisibleDecls.push_back(D);
 }
 
 void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) {
   assert(D->isImplicit());
-  if (!(!D->isFromASTFile() && RD->isFromASTFile()))
-    return; // Not a source member added to a class from PCH.
+
+  // We're only interested in cases where a local declaration is added to an
+  // imported context.
+  if (D->isFromASTFile() || !isImportedDeclContext(Chain, RD))
+    return;
+
   if (!isa<CXXMethodDecl>(D))
-    return; // We are interested in lazily declared implicit methods.
+    return;
 
   // A decl coming from PCH was modified.
   assert(RD->isCompleteDefinition());
@@ -5698,42 +5710,6 @@ void ASTWriter::AddedCXXImplicitMember(const CXXRecordDecl *RD, const Decl *D) {
   DeclUpdates[RD].push_back(DeclUpdate(UPD_CXX_ADDED_IMPLICIT_MEMBER, D));
 }
 
-void ASTWriter::AddedCXXTemplateSpecialization(const ClassTemplateDecl *TD,
-                                     const ClassTemplateSpecializationDecl *D) {
-  // The specializations set is kept in the canonical template.
-  TD = TD->getCanonicalDecl();
-  if (!(!D->isFromASTFile() && TD->isFromASTFile()))
-    return; // Not a source specialization added to a template from PCH.
-
-  assert(!WritingAST && "Already writing the AST!");
-  DeclUpdates[TD].push_back(DeclUpdate(UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION,
-                                       D));
-}
-
-void ASTWriter::AddedCXXTemplateSpecialization(
-    const VarTemplateDecl *TD, const VarTemplateSpecializationDecl *D) {
-  // The specializations set is kept in the canonical template.
-  TD = TD->getCanonicalDecl();
-  if (!(!D->isFromASTFile() && TD->isFromASTFile()))
-    return; // Not a source specialization added to a template from PCH.
-
-  assert(!WritingAST && "Already writing the AST!");
-  DeclUpdates[TD].push_back(DeclUpdate(UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION,
-                                       D));
-}
-
-void ASTWriter::AddedCXXTemplateSpecialization(const FunctionTemplateDecl *TD,
-                                               const FunctionDecl *D) {
-  // The specializations set is kept in the canonical template.
-  TD = TD->getCanonicalDecl();
-  if (!(!D->isFromASTFile() && TD->isFromASTFile()))
-    return; // Not a source specialization added to a template from PCH.
-
-  assert(!WritingAST && "Already writing the AST!");
-  DeclUpdates[TD].push_back(DeclUpdate(UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION,
-                                       D));
-}
-
 void ASTWriter::ResolvedExceptionSpec(const FunctionDecl *FD) {
   assert(!DoneWritingDeclsAndTypes && "Already done writing updates!");
   if (!Chain) return;
@@ -5807,21 +5783,6 @@ void ASTWriter::AddedObjCCategoryToInterface(const ObjCCategoryDecl *CatD,
     const_cast<ObjCInterfaceDecl *>(IFD->getDefinition()));
 }
 
-
-void ASTWriter::AddedObjCPropertyInClassExtension(const ObjCPropertyDecl *Prop,
-                                          const ObjCPropertyDecl *OrigProp,
-                                          const ObjCCategoryDecl *ClassExt) {
-  const ObjCInterfaceDecl *D = ClassExt->getClassInterface();
-  if (!D)
-    return;
-
-  assert(!WritingAST && "Already writing the AST!");
-  if (!D->isFromASTFile())
-    return; // Declaration not imported from PCH.
-
-  RewriteDecl(D);
-}
-
 void ASTWriter::DeclarationMarkedUsed(const Decl *D) {
   assert(!WritingAST && "Already writing the AST!");
   if (!D->isFromASTFile())
diff --git a/lib/Serialization/ASTWriterDecl.cpp b/lib/Serialization/ASTWriterDecl.cpp
index fd6708dd5c3f..20ca6d6fd512 100644
--- a/lib/Serialization/ASTWriterDecl.cpp
+++ b/lib/Serialization/ASTWriterDecl.cpp
@@ -159,6 +159,22 @@ namespace clang {
       Writer.AddStmt(FD->getBody());
     }
 
+    /// Add to the record the first declaration from each module file that
+    /// provides a declaration of D. The intent is to provide a sufficient
+    /// set such that reloading this set will load all current redeclarations.
+    void AddFirstDeclFromEachModule(const Decl *D, bool IncludeLocal) {
+      llvm::MapVector<ModuleFile*, const Decl*> Firsts;
+      // FIXME: We can skip entries that we know are implied by others.
+      for (const Decl *R = D->getMostRecentDecl(); R; R = R->getPreviousDecl()) {
+        if (R->isFromASTFile())
+          Firsts[Writer.Chain->getOwningModuleFile(R)] = R;
+        else if (IncludeLocal)
+          Firsts[nullptr] = R;
+      }
+      for (const auto &F : Firsts)
+        Writer.AddDeclRef(F.second, Record);
+    }
+
     /// Get the specialization decl from an entry in the specialization list.
     template <typename EntryType>
     typename RedeclarableTemplateDecl::SpecEntryTraits<EntryType>::DeclType *
@@ -192,22 +208,48 @@ namespace clang {
       auto &&PartialSpecializations = getPartialSpecializations(Common);
       ArrayRef<DeclID> LazySpecializations;
       if (auto *LS = Common->LazySpecializations)
-        LazySpecializations = ArrayRef<DeclID>(LS + 1, LS + 1 + LS[0]);
+        LazySpecializations = llvm::makeArrayRef(LS + 1, LS[0]);
+
+      // Add a slot to the record for the number of specializations.
+      unsigned I = Record.size();
+      Record.push_back(0);
 
-      Record.push_back(Specializations.size() +
-                       PartialSpecializations.size() +
-                       LazySpecializations.size());
       for (auto &Entry : Specializations) {
         auto *D = getSpecializationDecl(Entry);
         assert(D->isCanonicalDecl() && "non-canonical decl in set");
-        Writer.AddDeclRef(D, Record);
+        AddFirstDeclFromEachModule(D, /*IncludeLocal*/true);
       }
       for (auto &Entry : PartialSpecializations) {
         auto *D = getSpecializationDecl(Entry);
         assert(D->isCanonicalDecl() && "non-canonical decl in set");
-        Writer.AddDeclRef(D, Record);
+        AddFirstDeclFromEachModule(D, /*IncludeLocal*/true);
       }
       Record.append(LazySpecializations.begin(), LazySpecializations.end());
+
+      // Update the size entry we added earlier.
+      Record[I] = Record.size() - I - 1;
+    }
+
+    /// Ensure that this template specialization is associated with the specified
+    /// template on reload.
+    void RegisterTemplateSpecialization(const Decl *Template,
+                                        const Decl *Specialization) {
+      Template = Template->getCanonicalDecl();
+
+      // If the canonical template is local, we'll write out this specialization
+      // when we emit it.
+      // FIXME: We can do the same thing if there is any local declaration of
+      // the template, to avoid emitting an update record.
+      if (!Template->isFromASTFile())
+        return;
+
+      // We only need to associate the first local declaration of the
+      // specialization. The other declarations will get pulled in by it.
+      if (Writer.getFirstLocalDecl(Specialization) != Specialization)
+        return;
+
+      Writer.DeclUpdates[Template].push_back(ASTWriter::DeclUpdate(
+          UPD_CXX_ADDED_TEMPLATE_SPECIALIZATION, Specialization));
     }
   };
 }
@@ -218,7 +260,7 @@ void ASTDeclWriter::Visit(Decl *D) {
   // Source locations require array (variable-length) abbreviations.  The
   // abbreviation infrastructure requires that arrays are encoded last, so
   // we handle it here in the case of those classes derived from DeclaratorDecl
-  if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)){
+  if (DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
     Writer.AddTypeSourceInfo(DD->getTypeSourceInfo(), Record);
   }
 
@@ -234,7 +276,10 @@ void ASTDeclWriter::Visit(Decl *D) {
 
 void ASTDeclWriter::VisitDecl(Decl *D) {
   Writer.AddDeclRef(cast_or_null<Decl>(D->getDeclContext()), Record);
-  Writer.AddDeclRef(cast_or_null<Decl>(D->getLexicalDeclContext()), Record);
+  if (D->getDeclContext() != D->getLexicalDeclContext())
+    Writer.AddDeclRef(cast_or_null<Decl>(D->getLexicalDeclContext()), Record);
+  else
+    Record.push_back(0);
   Record.push_back(D->isInvalidDecl());
   Record.push_back(D->hasAttrs());
   if (D->hasAttrs())
@@ -298,7 +343,8 @@ void ASTDeclWriter::VisitTypedefNameDecl(TypedefNameDecl *D) {
 
 void ASTDeclWriter::VisitTypedefDecl(TypedefDecl *D) {
   VisitTypedefNameDecl(D);
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       D->getFirstDecl() == D->getMostRecentDecl() &&
       !D->isInvalidDecl() &&
@@ -336,9 +382,6 @@ void ASTDeclWriter::VisitTagDecl(TagDecl *D) {
     Record.push_back(2);
     Writer.AddDeclRef(TD, Record);
     Writer.AddIdentifierRef(TD->getDeclName().getAsIdentifierInfo(), Record);
-  } else if (auto *DD = D->getDeclaratorForAnonDecl()) {
-    Record.push_back(3);
-    Writer.AddDeclRef(DD, Record);
   } else {
     Record.push_back(0);
   }
@@ -363,12 +406,12 @@ void ASTDeclWriter::VisitEnumDecl(EnumDecl *D) {
     Writer.AddDeclRef(nullptr, Record);
   }
 
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
       !D->hasExtInfo() &&
       !D->getTypedefNameForAnonDecl() &&
-      !D->getDeclaratorForAnonDecl() &&
       D->getFirstDecl() == D->getMostRecentDecl() &&
       !D->isInvalidDecl() &&
       !D->isReferenced() &&
@@ -392,12 +435,12 @@ void ASTDeclWriter::VisitRecordDecl(RecordDecl *D) {
   Record.push_back(D->hasObjectMember());
   Record.push_back(D->hasVolatileMember());
 
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
       !D->hasExtInfo() &&
       !D->getTypedefNameForAnonDecl() &&
-      !D->getDeclaratorForAnonDecl() &&
       D->getFirstDecl() == D->getMostRecentDecl() &&
       !D->isInvalidDecl() &&
       !D->isReferenced() &&
@@ -479,6 +522,9 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) {
   case FunctionDecl::TK_FunctionTemplateSpecialization: {
     FunctionTemplateSpecializationInfo *
       FTSInfo = D->getTemplateSpecializationInfo();
+
+    RegisterTemplateSpecialization(FTSInfo->getTemplate(), D);
+
     Writer.AddDeclRef(FTSInfo->getTemplate(), Record);
     Record.push_back(FTSInfo->getTemplateSpecializationKind());
     
@@ -648,7 +694,8 @@ void ASTDeclWriter::VisitObjCIvarDecl(ObjCIvarDecl *D) {
   Record.push_back(D->getAccessControl());
   Record.push_back(D->getSynthesize());
 
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
       !D->isInvalidDecl() &&
@@ -780,7 +827,8 @@ void ASTDeclWriter::VisitFieldDecl(FieldDecl *D) {
   if (!D->getDeclName())
     Writer.AddDeclRef(Context.getInstantiatedFromUnnamedFieldDecl(D), Record);
 
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
       !D->isInvalidDecl() &&
@@ -854,7 +902,8 @@ void ASTDeclWriter::VisitVarDecl(VarDecl *D) {
     Record.push_back(VarNotTemplate);
   }
 
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
       !D->isInvalidDecl() &&
@@ -902,7 +951,8 @@ void ASTDeclWriter::VisitParmVarDecl(ParmVarDecl *D) {
   // If the assumptions about the DECL_PARM_VAR abbrev are true, use it.  Here
   // we dynamically check for the properties that we optimize for, but don't
   // know are true of all PARM_VAR_DECLs.
-  if (!D->hasAttrs() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      !D->hasAttrs() &&
       !D->hasExtInfo() &&
       !D->isImplicit() &&
       !D->isUsed(false) &&
@@ -1122,7 +1172,8 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) {
     Record.push_back(0);
   }
 
-  if (D->getFirstDecl() == D->getMostRecentDecl() &&
+  if (D->getDeclContext() == D->getLexicalDeclContext() &&
+      D->getFirstDecl() == D->getMostRecentDecl() &&
       !D->isInvalidDecl() &&
       !D->hasAttrs() &&
       !D->isTopLevelDeclInObjCContainer() &&
@@ -1249,6 +1300,8 @@ void ASTDeclWriter::VisitClassTemplateDecl(ClassTemplateDecl *D) {
 
 void ASTDeclWriter::VisitClassTemplateSpecializationDecl(
                                            ClassTemplateSpecializationDecl *D) {
+  RegisterTemplateSpecialization(D->getSpecializedTemplate(), D);
+
   VisitCXXRecordDecl(D);
 
   llvm::PointerUnion<ClassTemplateDecl *,
@@ -1308,6 +1361,8 @@ void ASTDeclWriter::VisitVarTemplateDecl(VarTemplateDecl *D) {
 
 void ASTDeclWriter::VisitVarTemplateSpecializationDecl(
     VarTemplateSpecializationDecl *D) {
+  RegisterTemplateSpecialization(D->getSpecializedTemplate(), D);
+
   VisitVarDecl(D);
 
   llvm::PointerUnion<VarTemplateDecl *, VarTemplatePartialSpecializationDecl *>
@@ -1478,48 +1533,77 @@ void ASTDeclWriter::VisitDeclContext(DeclContext *DC, uint64_t LexicalOffset,
   Record.push_back(VisibleOffset);
 }
 
-/// Determine whether D is the first declaration in its redeclaration chain that
-/// is not from an AST file.
-template <typename T>
-static bool isFirstLocalDecl(Redeclarable<T> *D) {
-  assert(D && !static_cast<T*>(D)->isFromASTFile());
-  do
-    D = D->getPreviousDecl();
-  while (D && static_cast<T*>(D)->isFromASTFile());
-  return !D;
+const Decl *ASTWriter::getFirstLocalDecl(const Decl *D) {
+  /// \brief Is this a local declaration (that is, one that will be written to
+  /// our AST file)? This is the case for declarations that are neither imported
+  /// from another AST file nor predefined.
+  auto IsLocalDecl = [&](const Decl *D) -> bool {
+    if (D->isFromASTFile())
+      return false;
+    auto I = DeclIDs.find(D);
+    return (I == DeclIDs.end() || I->second >= NUM_PREDEF_DECL_IDS);
+  };
+
+  assert(IsLocalDecl(D) && "expected a local declaration");
+
+  const Decl *Canon = D->getCanonicalDecl();
+  if (IsLocalDecl(Canon))
+    return Canon;
+
+  const Decl *&CacheEntry = FirstLocalDeclCache[Canon];
+  if (CacheEntry)
+    return CacheEntry;
+
+  for (const Decl *Redecl = D; Redecl; Redecl = Redecl->getPreviousDecl())
+    if (IsLocalDecl(Redecl))
+      D = Redecl;
+  return CacheEntry = D;
 }
 
 template <typename T>
 void ASTDeclWriter::VisitRedeclarable(Redeclarable<T> *D) {
   T *First = D->getFirstDecl();
   T *MostRecent = First->getMostRecentDecl();
+  T *DAsT = static_cast<T *>(D);
   if (MostRecent != First) {
-    assert(isRedeclarableDeclKind(static_cast<T *>(D)->getKind()) &&
+    assert(isRedeclarableDeclKind(DAsT->getKind()) &&
            "Not considered redeclarable?");
 
     Writer.AddDeclRef(First, Record);
 
-    // In a modules build, emit a list of all imported key declarations
-    // (excluding First, if it was imported), so that we can be sure that all
-    // redeclarations visible to this module are before D in the redecl chain.
-    unsigned I = Record.size();
-    Record.push_back(0);
-    if (Context.getLangOpts().Modules && Writer.Chain) {
-      if (isFirstLocalDecl(D)) {
-        Writer.Chain->forEachImportedKeyDecl(First, [&](const Decl *D) {
-          if (D != First)
-            Writer.AddDeclRef(D, Record);
-        });
-        Record[I] = Record.size() - I - 1;
+    // Write out a list of local redeclarations of this declaration if it's the
+    // first local declaration in the chain.
+    const Decl *FirstLocal = Writer.getFirstLocalDecl(DAsT);
+    if (DAsT == FirstLocal) {
+      // Emit a list of all imported first declarations so that we can be sure
+      // that all redeclarations visible to this module are before D in the
+      // redecl chain.
+      unsigned I = Record.size();
+      Record.push_back(0);
+      if (Writer.Chain)
+        AddFirstDeclFromEachModule(DAsT, /*IncludeLocal*/false);
+      // This is the number of imported first declarations + 1.
+      Record[I] = Record.size() - I;
 
-        // Write a redeclaration chain, attached to the first key decl.
-        Writer.Redeclarations.push_back(Writer.Chain->getKeyDeclaration(First));
+      // Collect the set of local redeclarations of this declaration, from
+      // newest to oldest.
+      RecordData LocalRedecls;
+      for (const Decl *Prev = FirstLocal->getMostRecentDecl();
+           Prev != FirstLocal; Prev = Prev->getPreviousDecl())
+        if (!Prev->isFromASTFile())
+          Writer.AddDeclRef(Prev, LocalRedecls);
+
+      // If we have any redecls, write them now as a separate record preceding
+      // the declaration itself.
+      if (LocalRedecls.empty())
+        Record.push_back(0);
+      else {
+        Record.push_back(Writer.Stream.GetCurrentBitNo());
+        Writer.Stream.EmitRecord(LOCAL_REDECLARATIONS, LocalRedecls);
       }
-    } else if (D == First || D->getPreviousDecl()->isFromASTFile()) {
-      assert(isFirstLocalDecl(D) && "imported decl after local decl");
-
-      // Write a redeclaration chain attached to the first decl.
-      Writer.Redeclarations.push_back(First);
+    } else {
+      Record.push_back(0);
+      Writer.AddDeclRef(FirstLocal, Record);
     }
 
     // Make sure that we serialize both the previous and the most-recent 
@@ -1558,7 +1642,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(serialization::DECL_FIELD));
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1591,7 +1675,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(serialization::DECL_OBJC_IVAR));
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1629,7 +1713,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                       // No redeclaration
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1677,7 +1761,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                       // No redeclaration
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1720,7 +1804,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                       // No redeclaration
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1767,7 +1851,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                       // No redeclaration
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1796,7 +1880,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                       // No redeclaration
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                       // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                       // isInvalidDecl
   Abv->Add(BitCodeAbbrevOp(0));                       // HasAttrs
   Abv->Add(BitCodeAbbrevOp(0));                       // isImplicit
@@ -1842,7 +1926,7 @@ void ASTWriter::WriteDeclAbbrevs() {
   Abv->Add(BitCodeAbbrevOp(0));                         // CanonicalDecl
   // Decl
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // DeclContext
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // LexicalDeclContext
+  Abv->Add(BitCodeAbbrevOp(0));                         // LexicalDeclContext
   Abv->Add(BitCodeAbbrevOp(0));                         // Invalid
   Abv->Add(BitCodeAbbrevOp(0));                         // HasAttrs
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // Implicit
@@ -1977,7 +2061,6 @@ void ASTWriter::WriteDeclAbbrevs() {
 
   Abv = new BitCodeAbbrev();
   Abv->Add(BitCodeAbbrevOp(serialization::DECL_CONTEXT_VISIBLE));
-  Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
   Abv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
   DeclContextVisibleLookupAbbrev = Stream.EmitAbbrev(Abv);
 }
@@ -1994,14 +2077,19 @@ void ASTWriter::WriteDeclAbbrevs() {
 /// clients to use a separate API call to "realize" the decl. This should be
 /// relatively painless since they would presumably only do it for top-level
 /// decls.
-static bool isRequiredDecl(const Decl *D, ASTContext &Context) {
+static bool isRequiredDecl(const Decl *D, ASTContext &Context,
+                           bool WritingModule) {
   // An ObjCMethodDecl is never considered as "required" because its
   // implementation container always is.
 
-  // File scoped assembly or obj-c implementation must be seen. ImportDecl is
-  // used by codegen to determine the set of imported modules to search for
-  // inputs for automatic linking.
-  if (isa<FileScopeAsmDecl>(D) || isa<ObjCImplDecl>(D) || isa<ImportDecl>(D))
+  // File scoped assembly or obj-c implementation must be seen.
+  if (isa<FileScopeAsmDecl>(D) || isa<ObjCImplDecl>(D))
+    return true;
+
+  // ImportDecl is used by codegen to determine the set of imported modules to
+  // search for inputs for automatic linking; include it if it has a semantic
+  // effect.
+  if (isa<ImportDecl>(D) && !WritingModule)
     return true;
 
   return Context.DeclMustBeEmitted(D);
@@ -2016,16 +2104,12 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
 
   // Determine the ID for this declaration.
   serialization::DeclID ID;
-  if (D->isFromASTFile()) {
-    assert(isRewritten(D) && "should not be emitting imported decl");
-    ID = getDeclID(D);
-  } else {
-    serialization::DeclID &IDR = DeclIDs[D];
-    if (IDR == 0)
-      IDR = NextDeclID++;
+  assert(!D->isFromASTFile() && "should not be emitting imported decl");
+  serialization::DeclID &IDR = DeclIDs[D];
+  if (IDR == 0)
+    IDR = NextDeclID++;
     
-    ID= IDR;
-  }
+  ID = IDR;
 
   bool isReplacingADecl = ID < FirstDeclID;
 
@@ -2050,6 +2134,13 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
     VisibleOffset = WriteDeclContextVisibleBlock(Context, DC);
   }
   
+  // Build a record for this declaration
+  Record.clear();
+  W.Code = (serialization::DeclCode)0;
+  W.AbbrevToUse = 0;
+  W.Visit(D);
+  if (DC) W.VisitDeclContext(DC, LexicalOffset, VisibleOffset);
+
   if (isReplacingADecl) {
     // We're replacing a decl in a previous file.
     ReplacedDecls.push_back(ReplacedDeclInfo(ID, Stream.GetCurrentBitNo(),
@@ -2066,19 +2157,12 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
       DeclOffsets[Index].setLocation(Loc);
       DeclOffsets[Index].BitOffset = Stream.GetCurrentBitNo();
     }
-    
+
     SourceManager &SM = Context.getSourceManager();
     if (Loc.isValid() && SM.isLocalSourceLocation(Loc))
       associateDeclWithFile(D, ID);
   }
 
-  // Build and emit a record for this declaration
-  Record.clear();
-  W.Code = (serialization::DeclCode)0;
-  W.AbbrevToUse = 0;
-  W.Visit(D);
-  if (DC) W.VisitDeclContext(DC, LexicalOffset, VisibleOffset);
-
   if (!W.Code)
     llvm::report_fatal_error(StringRef("unexpected declaration kind '") +
                             D->getDeclKindName() + "'");
@@ -2090,7 +2174,7 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
 
   // Note declarations that should be deserialized eagerly so that we can add
   // them to a record in the AST file later.
-  if (isRequiredDecl(D, Context))
+  if (isRequiredDecl(D, Context, WritingModule))
     EagerlyDeserializedDecls.push_back(ID);
 }
 
diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp
index 0dd809036a3d..e52ed052d3bc 100644
--- a/lib/Serialization/ASTWriterStmt.cpp
+++ b/lib/Serialization/ASTWriterStmt.cpp
@@ -40,7 +40,8 @@ namespace clang {
     ASTStmtWriter(ASTWriter &Writer, ASTWriter::RecordData &Record)
       : Writer(Writer), Record(Record) { }
 
-    void AddTemplateKWAndArgsInfo(const ASTTemplateKWAndArgsInfo &Args);
+    void AddTemplateKWAndArgsInfo(const ASTTemplateKWAndArgsInfo &ArgInfo,
+                                  const TemplateArgumentLoc *Args);
 
     void VisitStmt(Stmt *S);
 #define STMT(Type, Base) \
@@ -49,13 +50,13 @@ namespace clang {
   };
 }
 
-void ASTStmtWriter::
-AddTemplateKWAndArgsInfo(const ASTTemplateKWAndArgsInfo &Args) {
-  Writer.AddSourceLocation(Args.getTemplateKeywordLoc(), Record);
-  Writer.AddSourceLocation(Args.LAngleLoc, Record);
-  Writer.AddSourceLocation(Args.RAngleLoc, Record);
-  for (unsigned i=0; i != Args.NumTemplateArgs; ++i)
-    Writer.AddTemplateArgumentLoc(Args.getTemplateArgs()[i], Record);
+void ASTStmtWriter::AddTemplateKWAndArgsInfo(
+    const ASTTemplateKWAndArgsInfo &ArgInfo, const TemplateArgumentLoc *Args) {
+  Writer.AddSourceLocation(ArgInfo.TemplateKWLoc, Record);
+  Writer.AddSourceLocation(ArgInfo.LAngleLoc, Record);
+  Writer.AddSourceLocation(ArgInfo.RAngleLoc, Record);
+  for (unsigned i = 0; i != ArgInfo.NumTemplateArgs; ++i)
+    Writer.AddTemplateArgumentLoc(Args[i], Record);
 }
 
 void ASTStmtWriter::VisitStmt(Stmt *S) {
@@ -287,6 +288,26 @@ void ASTStmtWriter::VisitMSAsmStmt(MSAsmStmt *S) {
   Code = serialization::STMT_MSASM;
 }
 
+void ASTStmtWriter::VisitCoroutineBodyStmt(CoroutineBodyStmt *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtWriter::VisitCoreturnStmt(CoreturnStmt *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtWriter::VisitCoawaitExpr(CoawaitExpr *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
+void ASTStmtWriter::VisitCoyieldExpr(CoyieldExpr *S) {
+  // FIXME: Implement coroutine serialization.
+  llvm_unreachable("unimplemented");
+}
+
 void ASTStmtWriter::VisitCapturedStmt(CapturedStmt *S) {
   VisitStmt(S);
   // NumCaptures
@@ -366,7 +387,8 @@ void ASTStmtWriter::VisitDeclRefExpr(DeclRefExpr *E) {
     Writer.AddDeclRef(E->getFoundDecl(), Record);
 
   if (E->hasTemplateKWAndArgsInfo())
-    AddTemplateKWAndArgsInfo(*E->getTemplateKWAndArgsInfo());
+    AddTemplateKWAndArgsInfo(*E->getTrailingObjects<ASTTemplateKWAndArgsInfo>(),
+                             E->getTrailingObjects<TemplateArgumentLoc>());
 
   Writer.AddDeclRef(E->getDecl(), Record);
   Writer.AddSourceLocation(E->getLocation(), Record);
@@ -462,24 +484,24 @@ void ASTStmtWriter::VisitOffsetOfExpr(OffsetOfExpr *E) {
   Writer.AddSourceLocation(E->getRParenLoc(), Record);
   Writer.AddTypeSourceInfo(E->getTypeSourceInfo(), Record);
   for (unsigned I = 0, N = E->getNumComponents(); I != N; ++I) {
-    const OffsetOfExpr::OffsetOfNode &ON = E->getComponent(I);
+    const OffsetOfNode &ON = E->getComponent(I);
     Record.push_back(ON.getKind()); // FIXME: Stable encoding
     Writer.AddSourceLocation(ON.getSourceRange().getBegin(), Record);
     Writer.AddSourceLocation(ON.getSourceRange().getEnd(), Record);
     switch (ON.getKind()) {
-    case OffsetOfExpr::OffsetOfNode::Array:
+    case OffsetOfNode::Array:
       Record.push_back(ON.getArrayExprIndex());
       break;
-        
-    case OffsetOfExpr::OffsetOfNode::Field:
+
+    case OffsetOfNode::Field:
       Writer.AddDeclRef(ON.getField(), Record);
       break;
-        
-    case OffsetOfExpr::OffsetOfNode::Identifier:
+
+    case OffsetOfNode::Identifier:
       Writer.AddIdentifierRef(ON.getFieldName(), Record);
       break;
-        
-    case OffsetOfExpr::OffsetOfNode::Base:
+
+    case OffsetOfNode::Base:
       Writer.AddCXXBaseSpecifier(*ON.getBase(), Record);
       break;
     }
@@ -511,6 +533,16 @@ void ASTStmtWriter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   Code = serialization::EXPR_ARRAY_SUBSCRIPT;
 }
 
+void ASTStmtWriter::VisitOMPArraySectionExpr(OMPArraySectionExpr *E) {
+  VisitExpr(E);
+  Writer.AddStmt(E->getBase());
+  Writer.AddStmt(E->getLowerBound());
+  Writer.AddStmt(E->getLength());
+  Writer.AddSourceLocation(E->getColonLoc(), Record);
+  Writer.AddSourceLocation(E->getRBracketLoc(), Record);
+  Code = serialization::EXPR_OMP_ARRAY_SECTION;
+}
+
 void ASTStmtWriter::VisitCallExpr(CallExpr *E) {
   VisitExpr(E);
   Record.push_back(E->getNumArgs());
@@ -761,6 +793,7 @@ void ASTStmtWriter::VisitVAArgExpr(VAArgExpr *E) {
   Writer.AddTypeSourceInfo(E->getWrittenTypeInfo(), Record);
   Writer.AddSourceLocation(E->getBuiltinLoc(), Record);
   Writer.AddSourceLocation(E->getRParenLoc(), Record);
+  Record.push_back(E->isMicrosoftABI());
   Code = serialization::EXPR_VA_ARG;
 }
 
@@ -1124,6 +1157,7 @@ void ASTStmtWriter::VisitCXXTryStmt(CXXTryStmt *S) {
 void ASTStmtWriter::VisitCXXForRangeStmt(CXXForRangeStmt *S) {
   VisitStmt(S);
   Writer.AddSourceLocation(S->getForLoc(), Record);
+  Writer.AddSourceLocation(S->getCoawaitLoc(), Record);
   Writer.AddSourceLocation(S->getColonLoc(), Record);
   Writer.AddSourceLocation(S->getRParenLoc(), Record);
   Writer.AddStmt(S->getRangeStmt());
@@ -1408,9 +1442,11 @@ ASTStmtWriter::VisitCXXDependentScopeMemberExpr(CXXDependentScopeMemberExpr *E){
 
   Record.push_back(E->HasTemplateKWAndArgsInfo);
   if (E->HasTemplateKWAndArgsInfo) {
-    const ASTTemplateKWAndArgsInfo &Args = *E->getTemplateKWAndArgsInfo();
-    Record.push_back(Args.NumTemplateArgs);
-    AddTemplateKWAndArgsInfo(Args);
+    const ASTTemplateKWAndArgsInfo &ArgInfo =
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>();
+    Record.push_back(ArgInfo.NumTemplateArgs);
+    AddTemplateKWAndArgsInfo(ArgInfo,
+                             E->getTrailingObjects<TemplateArgumentLoc>());
   }
 
   if (!E->isImplicitAccess())
@@ -1435,9 +1471,11 @@ ASTStmtWriter::VisitDependentScopeDeclRefExpr(DependentScopeDeclRefExpr *E) {
 
   Record.push_back(E->HasTemplateKWAndArgsInfo);
   if (E->HasTemplateKWAndArgsInfo) {
-    const ASTTemplateKWAndArgsInfo &Args = *E->getTemplateKWAndArgsInfo();
-    Record.push_back(Args.NumTemplateArgs);
-    AddTemplateKWAndArgsInfo(Args);
+    const ASTTemplateKWAndArgsInfo &ArgInfo =
+        *E->getTrailingObjects<ASTTemplateKWAndArgsInfo>();
+    Record.push_back(ArgInfo.NumTemplateArgs);
+    AddTemplateKWAndArgsInfo(ArgInfo,
+                             E->getTrailingObjects<TemplateArgumentLoc>());
   }
 
   Writer.AddNestedNameSpecifierLoc(E->getQualifierLoc(), Record);
@@ -1466,9 +1504,10 @@ void ASTStmtWriter::VisitOverloadExpr(OverloadExpr *E) {
 
   Record.push_back(E->HasTemplateKWAndArgsInfo);
   if (E->HasTemplateKWAndArgsInfo) {
-    const ASTTemplateKWAndArgsInfo &Args = *E->getTemplateKWAndArgsInfo();
-    Record.push_back(Args.NumTemplateArgs);
-    AddTemplateKWAndArgsInfo(Args);
+    const ASTTemplateKWAndArgsInfo &ArgInfo =
+        *E->getTrailingASTTemplateKWAndArgsInfo();
+    Record.push_back(ArgInfo.NumTemplateArgs);
+    AddTemplateKWAndArgsInfo(ArgInfo, E->getTrailingTemplateArgumentLoc());
   }
 
   Record.push_back(E->getNumDecls());
@@ -1547,11 +1586,18 @@ void ASTStmtWriter::VisitPackExpansionExpr(PackExpansionExpr *E) {
 
 void ASTStmtWriter::VisitSizeOfPackExpr(SizeOfPackExpr *E) {
   VisitExpr(E);
+  Record.push_back(E->isPartiallySubstituted() ? E->getPartialArguments().size()
+                                               : 0);
   Writer.AddSourceLocation(E->OperatorLoc, Record);
   Writer.AddSourceLocation(E->PackLoc, Record);
   Writer.AddSourceLocation(E->RParenLoc, Record);
-  Record.push_back(E->Length);
   Writer.AddDeclRef(E->Pack, Record);
+  if (E->isPartiallySubstituted()) {
+    for (const auto &TA : E->getPartialArguments())
+      Writer.AddTemplateArgument(TA, Record);
+  } else if (!E->isValueDependent()) {
+    Record.push_back(E->getPackLength());
+  }
   Code = serialization::EXPR_SIZEOF_PACK;
 }
 
@@ -1650,6 +1696,14 @@ void ASTStmtWriter::VisitMSPropertyRefExpr(MSPropertyRefExpr *E) {
   Code = serialization::EXPR_CXX_PROPERTY_REF_EXPR;
 }
 
+void ASTStmtWriter::VisitMSPropertySubscriptExpr(MSPropertySubscriptExpr *E) {
+  VisitExpr(E);
+  Writer.AddStmt(E->getBase());
+  Writer.AddStmt(E->getIdx());
+  Writer.AddSourceLocation(E->getRBracketLoc(), Record);
+  Code = serialization::EXPR_CXX_PROPERTY_SUBSCRIPT_EXPR;
+}
+
 void ASTStmtWriter::VisitCXXUuidofExpr(CXXUuidofExpr *E) {
   VisitExpr(E);
   Writer.AddSourceRange(E->getSourceRange(), Record);
@@ -1718,6 +1772,9 @@ void OMPClauseWriter::writeClause(OMPClause *C) {
 }
 
 void OMPClauseWriter::VisitOMPIfClause(OMPIfClause *C) {
+  Record.push_back(C->getNameModifier());
+  Writer->Writer.AddSourceLocation(C->getNameModifierLoc(), Record);
+  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
   Writer->Writer.AddStmt(C->getCondition());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
 }
@@ -1737,6 +1794,11 @@ void OMPClauseWriter::VisitOMPSafelenClause(OMPSafelenClause *C) {
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
 }
 
+void OMPClauseWriter::VisitOMPSimdlenClause(OMPSimdlenClause *C) {
+  Writer->Writer.AddStmt(C->getSimdlen());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
 void OMPClauseWriter::VisitOMPCollapseClause(OMPCollapseClause *C) {
   Writer->Writer.AddStmt(C->getNumForLoops());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
@@ -1756,14 +1818,21 @@ void OMPClauseWriter::VisitOMPProcBindClause(OMPProcBindClause *C) {
 
 void OMPClauseWriter::VisitOMPScheduleClause(OMPScheduleClause *C) {
   Record.push_back(C->getScheduleKind());
+  Record.push_back(C->getFirstScheduleModifier());
+  Record.push_back(C->getSecondScheduleModifier());
   Writer->Writer.AddStmt(C->getChunkSize());
   Writer->Writer.AddStmt(C->getHelperChunkSize());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Writer->Writer.AddSourceLocation(C->getFirstScheduleModifierLoc(), Record);
+  Writer->Writer.AddSourceLocation(C->getSecondScheduleModifierLoc(), Record);
   Writer->Writer.AddSourceLocation(C->getScheduleKindLoc(), Record);
   Writer->Writer.AddSourceLocation(C->getCommaLoc(), Record);
 }
 
-void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *) {}
+void OMPClauseWriter::VisitOMPOrderedClause(OMPOrderedClause *C) {
+  Writer->Writer.AddStmt(C->getNumForLoops());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
 
 void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *) {}
 
@@ -1781,6 +1850,12 @@ void OMPClauseWriter::VisitOMPCaptureClause(OMPCaptureClause *) {}
 
 void OMPClauseWriter::VisitOMPSeqCstClause(OMPSeqCstClause *) {}
 
+void OMPClauseWriter::VisitOMPThreadsClause(OMPThreadsClause *) {}
+
+void OMPClauseWriter::VisitOMPSIMDClause(OMPSIMDClause *) {}
+
+void OMPClauseWriter::VisitOMPNogroupClause(OMPNogroupClause *) {}
+
 void OMPClauseWriter::VisitOMPPrivateClause(OMPPrivateClause *C) {
   Record.push_back(C->varlist_size());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
@@ -1836,6 +1911,8 @@ void OMPClauseWriter::VisitOMPReductionClause(OMPReductionClause *C) {
   Writer->Writer.AddDeclarationNameInfo(C->getNameInfo(), Record);
   for (auto *VE : C->varlists())
     Writer->Writer.AddStmt(VE);
+  for (auto *VE : C->privates())
+    Writer->Writer.AddStmt(VE);
   for (auto *E : C->lhs_exprs())
     Writer->Writer.AddStmt(E);
   for (auto *E : C->rhs_exprs())
@@ -1848,9 +1925,14 @@ void OMPClauseWriter::VisitOMPLinearClause(OMPLinearClause *C) {
   Record.push_back(C->varlist_size());
   Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
   Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  Record.push_back(C->getModifier());
+  Writer->Writer.AddSourceLocation(C->getModifierLoc(), Record);
   for (auto *VE : C->varlists()) {
     Writer->Writer.AddStmt(VE);
   }
+  for (auto *VE : C->privates()) {
+    Writer->Writer.AddStmt(VE);
+  }
   for (auto *VE : C->inits()) {
     Writer->Writer.AddStmt(VE);
   }
@@ -1916,6 +1998,52 @@ void OMPClauseWriter::VisitOMPDependClause(OMPDependClause *C) {
     Writer->Writer.AddStmt(VE);
 }
 
+void OMPClauseWriter::VisitOMPDeviceClause(OMPDeviceClause *C) {
+  Writer->Writer.AddStmt(C->getDevice());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPMapClause(OMPMapClause *C) {
+  Record.push_back(C->varlist_size());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+  Record.push_back(C->getMapTypeModifier());
+  Record.push_back(C->getMapType());
+  Writer->Writer.AddSourceLocation(C->getMapLoc(), Record);
+  Writer->Writer.AddSourceLocation(C->getColonLoc(), Record);
+  for (auto *VE : C->varlists())
+    Writer->Writer.AddStmt(VE);
+}
+
+void OMPClauseWriter::VisitOMPNumTeamsClause(OMPNumTeamsClause *C) {
+  Writer->Writer.AddStmt(C->getNumTeams());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPThreadLimitClause(OMPThreadLimitClause *C) {
+  Writer->Writer.AddStmt(C->getThreadLimit());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPPriorityClause(OMPPriorityClause *C) {
+  Writer->Writer.AddStmt(C->getPriority());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPGrainsizeClause(OMPGrainsizeClause *C) {
+  Writer->Writer.AddStmt(C->getGrainsize());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPNumTasksClause(OMPNumTasksClause *C) {
+  Writer->Writer.AddStmt(C->getNumTasks());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
+void OMPClauseWriter::VisitOMPHintClause(OMPHintClause *C) {
+  Writer->Writer.AddStmt(C->getHint());
+  Writer->Writer.AddSourceLocation(C->getLParenLoc(), Record);
+}
+
 //===----------------------------------------------------------------------===//
 // OpenMP Directives.
 //===----------------------------------------------------------------------===//
@@ -1954,6 +2082,9 @@ void ASTStmtWriter::VisitOMPLoopDirective(OMPLoopDirective *D) {
   for (auto I : D->counters()) {
     Writer.AddStmt(I);
   }
+  for (auto I : D->private_counters()) {
+    Writer.AddStmt(I);
+  }
   for (auto I : D->inits()) {
     Writer.AddStmt(I);
   }
@@ -1969,6 +2100,7 @@ void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE;
 }
 
@@ -1979,6 +2111,7 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
 
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_FOR_DIRECTIVE;
 }
 
@@ -1991,12 +2124,14 @@ void ASTStmtWriter::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE;
 }
 
 void ASTStmtWriter::VisitOMPSectionDirective(OMPSectionDirective *D) {
   VisitStmt(D);
   VisitOMPExecutableDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_SECTION_DIRECTIVE;
 }
 
@@ -2015,6 +2150,7 @@ void ASTStmtWriter::VisitOMPMasterDirective(OMPMasterDirective *D) {
 
 void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
   VisitStmt(D);
+  Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
   Writer.AddDeclarationNameInfo(D->getDirectiveName(), Record);
   Code = serialization::STMT_OMP_CRITICAL_DIRECTIVE;
@@ -2022,6 +2158,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
 
 void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE;
 }
 
@@ -2036,6 +2173,7 @@ void ASTStmtWriter::VisitOMPParallelSectionsDirective(
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE;
 }
 
@@ -2043,6 +2181,7 @@ void ASTStmtWriter::VisitOMPTaskDirective(OMPTaskDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TASK_DIRECTIVE;
 }
 
@@ -2066,6 +2205,13 @@ void ASTStmtWriter::VisitOMPTargetDirective(OMPTargetDirective *D) {
   Code = serialization::STMT_OMP_TARGET_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTargetDataDirective(OMPTargetDataDirective *D) {
+  VisitStmt(D);
+  Record.push_back(D->getNumClauses());
+  VisitOMPExecutableDirective(D);
+  Code = serialization::STMT_OMP_TARGET_DATA_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPTaskyieldDirective(OMPTaskyieldDirective *D) {
   VisitStmt(D);
   VisitOMPExecutableDirective(D);
@@ -2099,6 +2245,7 @@ void ASTStmtWriter::VisitOMPFlushDirective(OMPFlushDirective *D) {
 
 void ASTStmtWriter::VisitOMPOrderedDirective(OMPOrderedDirective *D) {
   VisitStmt(D);
+  Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
   Code = serialization::STMT_OMP_ORDERED_DIRECTIVE;
 }
@@ -2120,11 +2267,27 @@ void ASTStmtWriter::VisitOMPCancellationPointDirective(
 
 void ASTStmtWriter::VisitOMPCancelDirective(OMPCancelDirective *D) {
   VisitStmt(D);
+  Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
   Record.push_back(D->getCancelRegion());
   Code = serialization::STMT_OMP_CANCEL_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPTaskLoopDirective(OMPTaskLoopDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_TASKLOOP_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPTaskLoopSimdDirective(OMPTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_TASKLOOP_SIMD_DIRECTIVE;
+}
+
+void ASTStmtWriter::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_DISTRIBUTE_DIRECTIVE;
+}
+
 //===----------------------------------------------------------------------===//
 // ASTWriter Implementation
 //===----------------------------------------------------------------------===//
diff --git a/lib/Serialization/CMakeLists.txt b/lib/Serialization/CMakeLists.txt
index d885db22975e..95b33c388c56 100644
--- a/lib/Serialization/CMakeLists.txt
+++ b/lib/Serialization/CMakeLists.txt
@@ -15,6 +15,7 @@ add_clang_library(clangSerialization
   GeneratePCH.cpp
   GlobalModuleIndex.cpp
   Module.cpp
+  ModuleFileExtension.cpp
   ModuleManager.cpp
 
   ADDITIONAL_HEADERS
diff --git a/lib/Serialization/GeneratePCH.cpp b/lib/Serialization/GeneratePCH.cpp
index c461fe936ee4..4a2255ab6d39 100644
--- a/lib/Serialization/GeneratePCH.cpp
+++ b/lib/Serialization/GeneratePCH.cpp
@@ -23,12 +23,15 @@
 
 using namespace clang;
 
-PCHGenerator::PCHGenerator(const Preprocessor &PP, StringRef OutputFile,
-                           clang::Module *Module, StringRef isysroot,
-                           std::shared_ptr<PCHBuffer> Buffer,
-                           bool AllowASTWithErrors)
+PCHGenerator::PCHGenerator(
+  const Preprocessor &PP, StringRef OutputFile,
+  clang::Module *Module, StringRef isysroot,
+  std::shared_ptr<PCHBuffer> Buffer,
+  ArrayRef<llvm::IntrusiveRefCntPtr<ModuleFileExtension>> Extensions,
+  bool AllowASTWithErrors, bool IncludeTimestamps)
     : PP(PP), OutputFile(OutputFile), Module(Module), isysroot(isysroot.str()),
-      SemaPtr(nullptr), Buffer(Buffer), Stream(Buffer->Data), Writer(Stream),
+      SemaPtr(nullptr), Buffer(Buffer), Stream(Buffer->Data),
+      Writer(Stream, Extensions, IncludeTimestamps),
       AllowASTWithErrors(AllowASTWithErrors) {
   Buffer->IsComplete = false;
 }
@@ -47,7 +50,8 @@ void PCHGenerator::HandleTranslationUnit(ASTContext &Ctx) {
 
   // Emit the PCH file to the Buffer.
   assert(SemaPtr && "No Sema?");
-  Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot, hasErrors);
+  Buffer->Signature =
+      Writer.WriteAST(*SemaPtr, OutputFile, Module, isysroot, hasErrors);
 
   Buffer->IsComplete = true;
 }
diff --git a/lib/Serialization/GlobalModuleIndex.cpp b/lib/Serialization/GlobalModuleIndex.cpp
index 17c7914243e7..af5f94a5cdc4 100644
--- a/lib/Serialization/GlobalModuleIndex.cpp
+++ b/lib/Serialization/GlobalModuleIndex.cpp
@@ -757,9 +757,7 @@ void GlobalModuleIndexBuilder::writeIndex(llvm::BitstreamWriter &Stream) {
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
-    Record.clear();
-    Record.push_back(IDENTIFIER_INDEX);
-    Record.push_back(BucketOffset);
+    uint64_t Record[] = {IDENTIFIER_INDEX, BucketOffset};
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, IdentifierTable);
   }
 
diff --git a/lib/Serialization/Module.cpp b/lib/Serialization/Module.cpp
index 3b237d5529c6..4884f0b09480 100644
--- a/lib/Serialization/Module.cpp
+++ b/lib/Serialization/Module.cpp
@@ -40,19 +40,11 @@ ModuleFile::ModuleFile(ModuleKind Kind, unsigned Generation)
     LocalNumCXXBaseSpecifiers(0), CXXBaseSpecifiersOffsets(nullptr),
     LocalNumCXXCtorInitializers(0), CXXCtorInitializersOffsets(nullptr),
     FileSortedDecls(nullptr), NumFileSortedDecls(0),
-    RedeclarationsMap(nullptr), LocalNumRedeclarationsInMap(0),
     ObjCCategoriesMap(nullptr), LocalNumObjCCategoriesInMap(0),
     LocalNumTypes(0), TypeOffsets(nullptr), BaseTypeIndex(0)
 {}
 
 ModuleFile::~ModuleFile() {
-  for (DeclContextInfosMap::iterator I = DeclContextInfos.begin(),
-       E = DeclContextInfos.end();
-       I != E; ++I) {
-    if (I->second.NameLookupTableData)
-      delete I->second.NameLookupTableData;
-  }
-  
   delete static_cast<ASTIdentifierLookupTable *>(IdentifierLookupTable);
   delete static_cast<HeaderFileInfoLookupTable *>(HeaderFileInfoTable);
   delete static_cast<ASTSelectorLookupTable *>(SelectorLookupTable);
diff --git a/lib/Serialization/ModuleFileExtension.cpp b/lib/Serialization/ModuleFileExtension.cpp
new file mode 100644
index 000000000000..81dcfd60ce8e
--- /dev/null
+++ b/lib/Serialization/ModuleFileExtension.cpp
@@ -0,0 +1,22 @@
+//===-- ModuleFileExtension.cpp - Module File Extensions ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Serialization/ModuleFileExtension.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace clang;
+
+ModuleFileExtension::~ModuleFileExtension() { }
+
+llvm::hash_code ModuleFileExtension::hashExtension(llvm::hash_code Code) const {
+  return Code;
+}
+
+ModuleFileExtensionWriter::~ModuleFileExtensionWriter() { }
+
+ModuleFileExtensionReader::~ModuleFileExtensionReader() { }
diff --git a/lib/Serialization/ModuleManager.cpp b/lib/Serialization/ModuleManager.cpp
index 271619404d2f..74f75a103f7a 100644
--- a/lib/Serialization/ModuleManager.cpp
+++ b/lib/Serialization/ModuleManager.cpp
@@ -95,6 +95,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
     New->File = Entry;
     New->ImportLoc = ImportLoc;
     Chain.push_back(New);
+    if (!New->isModule())
+      PCHChain.push_back(New);
     if (!ImportedBy)
       Roots.push_back(New);
     NewModule = true;
@@ -159,6 +161,8 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
         Modules.erase(Entry);
         assert(Chain.back() == ModuleEntry);
         Chain.pop_back();
+        if (!ModuleEntry->isModule())
+          PCHChain.pop_back();
         if (Roots.back() == ModuleEntry)
           Roots.pop_back();
         else
@@ -190,6 +194,9 @@ void ModuleManager::removeModules(
   if (first == last)
     return;
 
+  // Explicitly clear VisitOrder since we might not notice it is stale.
+  VisitOrder.clear();
+
   // Collect the set of module file pointers that we'll be removing.
   llvm::SmallPtrSet<ModuleFile *, 4> victimSet(first, last);
 
@@ -203,6 +210,15 @@ void ModuleManager::removeModules(
   Roots.erase(std::remove_if(Roots.begin(), Roots.end(), IsVictim),
               Roots.end());
 
+  // Remove the modules from the PCH chain.
+  for (auto I = first; I != last; ++I) {
+    if (!(*I)->isModule()) {
+      PCHChain.erase(std::find(PCHChain.begin(), PCHChain.end(), *I),
+                     PCHChain.end());
+      break;
+    }
+  }
+
   // Delete the modules and erase them from the various structures.
   for (ModuleIterator victim = first; victim != last; ++victim) {
     Modules.erase((*victim)->File);
@@ -236,15 +252,6 @@ ModuleManager::addInMemoryBuffer(StringRef FileName,
   InMemoryBuffers[Entry] = std::move(Buffer);
 }
 
-bool ModuleManager::addKnownModuleFile(StringRef FileName) {
-  const FileEntry *File;
-  if (lookupModuleFile(FileName, 0, 0, File))
-    return true;
-  if (!Modules.count(File))
-    AdditionalKnownModuleFiles.insert(File);
-  return false;
-}
-
 ModuleManager::VisitState *ModuleManager::allocateVisitState() {
   // Fast path: if we have a cached state, use it.
   if (FirstVisitState) {
@@ -281,8 +288,6 @@ void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
 }
 
 void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
-  AdditionalKnownModuleFiles.remove(MF->File);
-
   if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
     return;
 
@@ -300,10 +305,8 @@ ModuleManager::~ModuleManager() {
   delete FirstVisitState;
 }
 
-void
-ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData),
-                     void *UserData,
-                     llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
+void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
+                          llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
   // If the visitation order vector is the wrong size, recompute the order.
   if (VisitOrder.size() != Chain.size()) {
     unsigned N = size();
@@ -316,28 +319,24 @@ ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData),
     SmallVector<ModuleFile *, 4> Queue;
     Queue.reserve(N);
     llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
-    UnusedIncomingEdges.reserve(size());
-    for (ModuleIterator M = begin(), MEnd = end(); M != MEnd; ++M) {
-      if (unsigned Size = (*M)->ImportedBy.size())
-        UnusedIncomingEdges.push_back(Size);
-      else {
-        UnusedIncomingEdges.push_back(0);
+    UnusedIncomingEdges.resize(size());
+    for (auto M = rbegin(), MEnd = rend(); M != MEnd; ++M) {
+      unsigned Size = (*M)->ImportedBy.size();
+      UnusedIncomingEdges[(*M)->Index] = Size;
+      if (!Size)
         Queue.push_back(*M);
-      }
     }
 
     // Traverse the graph, making sure to visit a module before visiting any
     // of its dependencies.
-    unsigned QueueStart = 0;
-    while (QueueStart < Queue.size()) {
-      ModuleFile *CurrentModule = Queue[QueueStart++];
+    while (!Queue.empty()) {
+      ModuleFile *CurrentModule = Queue.pop_back_val();
       VisitOrder.push_back(CurrentModule);
 
       // For any module that this module depends on, push it on the
       // stack (if it hasn't already been marked as visited).
-      for (llvm::SetVector<ModuleFile *>::iterator
-             M = CurrentModule->Imports.begin(),
-             MEnd = CurrentModule->Imports.end();
+      for (auto M = CurrentModule->Imports.rbegin(),
+                MEnd = CurrentModule->Imports.rend();
            M != MEnd; ++M) {
         // Remove our current module as an impediment to visiting the
         // module we depend on. If we were the last unvisited module
@@ -379,7 +378,7 @@ ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData),
     // Visit the module.
     assert(State->VisitNumber[CurrentModule->Index] == VisitNumber - 1);
     State->VisitNumber[CurrentModule->Index] = VisitNumber;
-    if (!Visitor(*CurrentModule, UserData))
+    if (!Visitor(*CurrentModule))
       continue;
 
     // The visitor has requested that cut off visitation of any
@@ -410,71 +409,6 @@ ModuleManager::visit(bool (*Visitor)(ModuleFile &M, void *UserData),
   returnVisitState(State);
 }
 
-static void markVisitedDepthFirst(ModuleFile &M,
-                                  SmallVectorImpl<bool> &Visited) {
-  for (llvm::SetVector<ModuleFile *>::iterator IM = M.Imports.begin(),
-                                               IMEnd = M.Imports.end();
-       IM != IMEnd; ++IM) {
-    if (Visited[(*IM)->Index])
-      continue;
-    Visited[(*IM)->Index] = true;
-    if (!M.DirectlyImported)
-      markVisitedDepthFirst(**IM, Visited);
-  }
-}
-
-/// \brief Perform a depth-first visit of the current module.
-static bool visitDepthFirst(
-    ModuleFile &M,
-    ModuleManager::DFSPreorderControl (*PreorderVisitor)(ModuleFile &M,
-                                                         void *UserData),
-    bool (*PostorderVisitor)(ModuleFile &M, void *UserData), void *UserData,
-    SmallVectorImpl<bool> &Visited) {
-  if (PreorderVisitor) {
-    switch (PreorderVisitor(M, UserData)) {
-    case ModuleManager::Abort:
-      return true;
-    case ModuleManager::SkipImports:
-      markVisitedDepthFirst(M, Visited);
-      return false;
-    case ModuleManager::Continue:
-      break;
-    }
-  }
-
-  // Visit children
-  for (llvm::SetVector<ModuleFile *>::iterator IM = M.Imports.begin(),
-                                            IMEnd = M.Imports.end();
-       IM != IMEnd; ++IM) {
-    if (Visited[(*IM)->Index])
-      continue;
-    Visited[(*IM)->Index] = true;
-
-    if (visitDepthFirst(**IM, PreorderVisitor, PostorderVisitor, UserData, Visited))
-      return true;
-  }  
-  
-  if (PostorderVisitor)
-    return PostorderVisitor(M, UserData);
-
-  return false;
-}
-
-void ModuleManager::visitDepthFirst(
-    ModuleManager::DFSPreorderControl (*PreorderVisitor)(ModuleFile &M,
-                                                         void *UserData),
-    bool (*PostorderVisitor)(ModuleFile &M, void *UserData), void *UserData) {
-  SmallVector<bool, 16> Visited(size(), false);
-  for (unsigned I = 0, N = Roots.size(); I != N; ++I) {
-    if (Visited[Roots[I]->Index])
-      continue;
-    Visited[Roots[I]->Index] = true;
-
-    if (::visitDepthFirst(*Roots[I], PreorderVisitor, PostorderVisitor, UserData, Visited))
-      return;
-  }
-}
-
 bool ModuleManager::lookupModuleFile(StringRef FileName,
                                      off_t ExpectedSize,
                                      time_t ExpectedModTime,
diff --git a/lib/Serialization/MultiOnDiskHashTable.h b/lib/Serialization/MultiOnDiskHashTable.h
new file mode 100644
index 000000000000..04dea831695c
--- /dev/null
+++ b/lib/Serialization/MultiOnDiskHashTable.h
@@ -0,0 +1,330 @@
+//===--- MultiOnDiskHashTable.h - Merged set of hash tables -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides a hash table data structure suitable for incremental and
+//  distributed storage across a set of files.
+//
+//  Multiple hash tables from different files are implicitly merged to improve
+//  performance, and on reload the merged table will override those from other
+//  files.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
+#define LLVM_CLANG_LIB_SERIALIZATION_MULTIONDISKHASHTABLE_H
+
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+namespace clang {
+namespace serialization {
+
+class ModuleFile;
+
+/// \brief A collection of on-disk hash tables, merged when relevant for performance.
+template<typename Info> class MultiOnDiskHashTable {
+public:
+  /// A handle to a file, used when overriding tables.
+  typedef typename Info::file_type file_type;
+  /// A pointer to an on-disk representation of the hash table.
+  typedef const unsigned char *storage_type;
+
+  typedef typename Info::external_key_type external_key_type;
+  typedef typename Info::internal_key_type internal_key_type;
+  typedef typename Info::data_type data_type;
+  typedef typename Info::data_type_builder data_type_builder;
+  typedef unsigned hash_value_type;
+
+private:
+  /// \brief A hash table stored on disk.
+  struct OnDiskTable {
+    typedef llvm::OnDiskIterableChainedHashTable<Info> HashTable;
+
+    file_type File;
+    HashTable Table;
+
+    OnDiskTable(file_type File, unsigned NumBuckets, unsigned NumEntries,
+                storage_type Buckets, storage_type Payload, storage_type Base,
+                const Info &InfoObj)
+        : File(File),
+          Table(NumBuckets, NumEntries, Buckets, Payload, Base, InfoObj) {}
+  };
+
+  struct MergedTable {
+    std::vector<file_type> Files;
+    llvm::DenseMap<internal_key_type, data_type> Data;
+  };
+
+  typedef llvm::PointerUnion<OnDiskTable*, MergedTable*> Table;
+  typedef llvm::TinyPtrVector<void*> TableVector;
+
+  /// \brief The current set of on-disk and merged tables.
+  /// We manually store the opaque value of the Table because TinyPtrVector
+  /// can't cope with holding a PointerUnion directly.
+  /// There can be at most one MergedTable in this vector, and if present,
+  /// it is the first table.
+  TableVector Tables;
+
+  /// \brief Files corresponding to overridden tables that we've not yet
+  /// discarded.
+  llvm::TinyPtrVector<file_type> PendingOverrides;
+
+  struct AsOnDiskTable {
+    typedef OnDiskTable *result_type;
+    result_type operator()(void *P) const {
+      return Table::getFromOpaqueValue(P).template get<OnDiskTable *>();
+    }
+  };
+  typedef llvm::mapped_iterator<TableVector::iterator, AsOnDiskTable>
+      table_iterator;
+  typedef llvm::iterator_range<table_iterator> table_range;
+
+  /// \brief The current set of on-disk tables.
+  table_range tables() {
+    auto Begin = Tables.begin(), End = Tables.end();
+    if (getMergedTable())
+      ++Begin;
+    return llvm::make_range(llvm::map_iterator(Begin, AsOnDiskTable()),
+                            llvm::map_iterator(End, AsOnDiskTable()));
+  }
+
+  MergedTable *getMergedTable() const {
+    // If we already have a merged table, it's the first one.
+    return Tables.empty() ? nullptr : Table::getFromOpaqueValue(*Tables.begin())
+                                          .template dyn_cast<MergedTable*>();
+  }
+
+  /// \brief Delete all our current on-disk tables.
+  void clear() {
+    for (auto *T : tables())
+      delete T;
+    if (auto *M = getMergedTable())
+      delete M;
+    Tables.clear();
+  }
+
+  void removeOverriddenTables() {
+    llvm::DenseSet<file_type> Files;
+    Files.insert(PendingOverrides.begin(), PendingOverrides.end());
+    // Explicitly capture Files to work around an MSVC 2015 rejects-valid bug.
+    auto ShouldRemove = [&Files](void *T) -> bool {
+      auto *ODT = Table::getFromOpaqueValue(T).template get<OnDiskTable *>();
+      bool Remove = Files.count(ODT->File);
+      if (Remove)
+        delete ODT;
+      return Remove;
+    };
+    Tables.erase(std::remove_if(tables().begin().getCurrent(), Tables.end(),
+                                ShouldRemove),
+                 Tables.end());
+    PendingOverrides.clear();
+  }
+
+  void condense() {
+    MergedTable *Merged = getMergedTable();
+    if (!Merged)
+      Merged = new MergedTable;
+
+    // Read in all the tables and merge them together.
+    // FIXME: Be smarter about which tables we merge.
+    for (auto *ODT : tables()) {
+      auto &HT = ODT->Table;
+      Info &InfoObj = HT.getInfoObj();
+
+      for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) {
+        auto *LocalPtr = I.getItem();
+
+        // FIXME: Don't rely on the OnDiskHashTable format here.
+        auto L = InfoObj.ReadKeyDataLength(LocalPtr);
+        const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first);
+        data_type_builder ValueBuilder(Merged->Data[Key]);
+        InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second,
+                             ValueBuilder);
+      }
+
+      Merged->Files.push_back(ODT->File);
+      delete ODT;
+    }
+
+    Tables.clear();
+    Tables.push_back(Table(Merged).getOpaqueValue());
+  }
+
+  /// The generator is permitted to read our merged table.
+  template<typename ReaderInfo, typename WriterInfo>
+  friend class MultiOnDiskHashTableGenerator;
+
+public:
+  MultiOnDiskHashTable() {}
+  MultiOnDiskHashTable(MultiOnDiskHashTable &&O)
+      : Tables(std::move(O.Tables)),
+        PendingOverrides(std::move(O.PendingOverrides)) {
+    O.Tables.clear();
+  }
+  MultiOnDiskHashTable &operator=(MultiOnDiskHashTable &&O) {
+    if (&O == this)
+      return *this;
+    clear();
+    Tables = std::move(O.Tables);
+    O.Tables.clear();
+    PendingOverrides = std::move(O.PendingOverrides);
+    return *this;
+  }
+  ~MultiOnDiskHashTable() { clear(); }
+
+  /// \brief Add the table \p Data loaded from file \p File.
+  void add(file_type File, storage_type Data, Info InfoObj = Info()) {
+    using namespace llvm::support;
+    storage_type Ptr = Data;
+
+    uint32_t BucketOffset = endian::readNext<uint32_t, little, unaligned>(Ptr);
+
+    // Read the list of overridden files.
+    uint32_t NumFiles = endian::readNext<uint32_t, little, unaligned>(Ptr);
+    // FIXME: Add a reserve() to TinyPtrVector so that we don't need to make
+    // an additional copy.
+    llvm::SmallVector<file_type, 16> OverriddenFiles;
+    OverriddenFiles.reserve(NumFiles);
+    for (/**/; NumFiles != 0; --NumFiles)
+      OverriddenFiles.push_back(InfoObj.ReadFileRef(Ptr));
+    PendingOverrides.insert(PendingOverrides.end(), OverriddenFiles.begin(),
+                            OverriddenFiles.end());
+
+    // Read the OnDiskChainedHashTable header.
+    storage_type Buckets = Data + BucketOffset;
+    auto NumBucketsAndEntries =
+        OnDiskTable::HashTable::readNumBucketsAndEntries(Buckets);
+
+    // Register the table.
+    Table NewTable = new OnDiskTable(File, NumBucketsAndEntries.first,
+                                     NumBucketsAndEntries.second,
+                                     Buckets, Ptr, Data, std::move(InfoObj));
+    Tables.push_back(NewTable.getOpaqueValue());
+  }
+
+  /// \brief Find and read the lookup results for \p EKey.
+  data_type find(const external_key_type &EKey) {
+    data_type Result;
+
+    if (!PendingOverrides.empty())
+      removeOverriddenTables();
+
+    if (Tables.size() > static_cast<unsigned>(Info::MaxTables))
+      condense();
+
+    internal_key_type Key = Info::GetInternalKey(EKey);
+    auto KeyHash = Info::ComputeHash(Key);
+
+    if (MergedTable *M = getMergedTable()) {
+      auto It = M->Data.find(Key);
+      if (It != M->Data.end())
+        Result = It->second;
+    }
+
+    data_type_builder ResultBuilder(Result);
+
+    for (auto *ODT : tables()) {
+      auto &HT = ODT->Table;
+      auto It = HT.find_hashed(Key, KeyHash);
+      if (It != HT.end())
+        HT.getInfoObj().ReadDataInto(Key, It.getDataPtr(), It.getDataLen(),
+                                     ResultBuilder);
+    }
+
+    return Result;
+  }
+
+  /// \brief Read all the lookup results into a single value. This only makes
+  /// sense if merging values across keys is meaningful.
+  data_type findAll() {
+    data_type Result;
+    data_type_builder ResultBuilder(Result);
+
+    if (!PendingOverrides.empty())
+      removeOverriddenTables();
+
+    if (MergedTable *M = getMergedTable()) {
+      for (auto &KV : M->Data)
+        Info::MergeDataInto(KV.second, ResultBuilder);
+    }
+
+    for (auto *ODT : tables()) {
+      auto &HT = ODT->Table;
+      Info &InfoObj = HT.getInfoObj();
+      for (auto I = HT.data_begin(), E = HT.data_end(); I != E; ++I) {
+        auto *LocalPtr = I.getItem();
+
+        // FIXME: Don't rely on the OnDiskHashTable format here.
+        auto L = InfoObj.ReadKeyDataLength(LocalPtr);
+        const internal_key_type &Key = InfoObj.ReadKey(LocalPtr, L.first);
+        InfoObj.ReadDataInto(Key, LocalPtr + L.first, L.second, ResultBuilder);
+      }
+    }
+
+    return Result;
+  }
+};
+
+/// \brief Writer for the on-disk hash table.
+template<typename ReaderInfo, typename WriterInfo>
+class MultiOnDiskHashTableGenerator {
+  typedef MultiOnDiskHashTable<ReaderInfo> BaseTable;
+  typedef llvm::OnDiskChainedHashTableGenerator<WriterInfo> Generator;
+
+  Generator Gen;
+
+public:
+  MultiOnDiskHashTableGenerator() : Gen() {}
+
+  void insert(typename WriterInfo::key_type_ref Key,
+              typename WriterInfo::data_type_ref Data, WriterInfo &Info) {
+    Gen.insert(Key, Data, Info);
+  }
+
+  void emit(llvm::SmallVectorImpl<char> &Out, WriterInfo &Info,
+            const BaseTable *Base) {
+    using namespace llvm::support;
+    llvm::raw_svector_ostream OutStream(Out);
+
+    // Write our header information.
+    {
+      endian::Writer<little> Writer(OutStream);
+
+      // Reserve four bytes for the bucket offset.
+      Writer.write<uint32_t>(0);
+
+      if (auto *Merged = Base ? Base->getMergedTable() : nullptr) {
+        // Write list of overridden files.
+        Writer.write<uint32_t>(Merged->Files.size());
+        for (const auto &F : Merged->Files)
+          Info.EmitFileRef(OutStream, F);
+
+        // Add all merged entries from Base to the generator.
+        for (auto &KV : Merged->Data) {
+          if (!Gen.contains(KV.first, Info))
+            Gen.insert(KV.first, Info.ImportData(KV.second), Info);
+        }
+      } else {
+        Writer.write<uint32_t>(0);
+      }
+    }
+
+    // Write the table itself.
+    uint32_t BucketOffset = Gen.Emit(OutStream, Info);
+
+    // Replace the first four bytes with the bucket offset.
+    endian::write32le(Out.data(), BucketOffset);
+  }
+};
+
+} // end namespace clang::serialization
+} // end namespace clang
+
+
+#endif
diff --git a/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp b/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
index 166471aaa1a6..a052d83f5afa 100644
--- a/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/AnalyzerStatsChecker.cpp
@@ -101,7 +101,7 @@ void AnalyzerStatsChecker::checkEndAnalysis(ExplodedGraph &G,
   else if (isa<BlockDecl>(D)) {
     output << "block(line:" << Loc.getLine() << ":col:" << Loc.getColumn();
   }
-  
+
   NumBlocksUnreachable += unreachable;
   NumBlocks += total;
   std::string NameOfRootFunction = output.str();
diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
index 557439b28881..c092610afe2b 100644
--- a/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp
@@ -23,7 +23,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class ArrayBoundChecker : 
+class ArrayBoundChecker :
     public Checker<check::Location> {
   mutable std::unique_ptr<BuiltinBug> BT;
 
@@ -55,17 +55,17 @@ void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS,
   ProgramStateRef state = C.getState();
 
   // Get the size of the array.
-  DefinedOrUnknownSVal NumElements 
-    = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(), 
+  DefinedOrUnknownSVal NumElements
+    = C.getStoreManager().getSizeInElements(state, ER->getSuperRegion(),
                                             ER->getValueType());
 
   ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true);
   ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false);
   if (StOutBound && !StInBound) {
-    ExplodedNode *N = C.generateSink(StOutBound);
+    ExplodedNode *N = C.generateErrorNode(StOutBound);
     if (!N)
       return;
-  
+
     if (!BT)
       BT.reset(new BuiltinBug(
           this, "Out-of-bound array access",
@@ -82,7 +82,7 @@ void ArrayBoundChecker::checkLocation(SVal l, bool isLoad, const Stmt* LoadS,
     C.emitReport(std::move(report));
     return;
   }
-  
+
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   C.addTransition(StInBound);
diff --git a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
index d8dc2aaa6363..f4de733bd794 100644
--- a/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
+++ b/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
@@ -26,15 +26,15 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class ArrayBoundCheckerV2 : 
+class ArrayBoundCheckerV2 :
     public Checker<check::Location> {
   mutable std::unique_ptr<BuiltinBug> BT;
 
   enum OOB_Kind { OOB_Precedes, OOB_Excedes, OOB_Tainted };
-  
+
   void reportOOB(CheckerContext &C, ProgramStateRef errorState,
                  OOB_Kind kind) const;
-      
+
 public:
   void checkLocation(SVal l, bool isLoad, const Stmt*S,
                      CheckerContext &C) const;
@@ -55,7 +55,7 @@ public:
 
   NonLoc getByteOffset() const { return byteOffset.castAs<NonLoc>(); }
   const SubRegion *getRegion() const { return baseRegion; }
-  
+
   static RegionRawOffsetV2 computeOffset(ProgramStateRef state,
                                          SValBuilder &svalBuilder,
                                          SVal location);
@@ -65,12 +65,12 @@ public:
 };
 }
 
-static SVal computeExtentBegin(SValBuilder &svalBuilder, 
+static SVal computeExtentBegin(SValBuilder &svalBuilder,
                                const MemRegion *region) {
   while (true)
     switch (region->getKind()) {
       default:
-        return svalBuilder.makeZeroArrayIndex();        
+        return svalBuilder.makeZeroArrayIndex();
       case MemRegion::SymbolicRegionKind:
         // FIXME: improve this later by tracking symbolic lower bounds
         // for symbolic regions.
@@ -94,22 +94,22 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
   // memory access is within the extent of the base region.  Since we
   // have some flexibility in defining the base region, we can achieve
   // various levels of conservatism in our buffer overflow checking.
-  ProgramStateRef state = checkerContext.getState();  
+  ProgramStateRef state = checkerContext.getState();
   ProgramStateRef originalState = state;
 
   SValBuilder &svalBuilder = checkerContext.getSValBuilder();
-  const RegionRawOffsetV2 &rawOffset = 
+  const RegionRawOffsetV2 &rawOffset =
     RegionRawOffsetV2::computeOffset(state, svalBuilder, location);
 
   if (!rawOffset.getRegion())
     return;
 
-  // CHECK LOWER BOUND: Is byteOffset < extent begin?  
+  // CHECK LOWER BOUND: Is byteOffset < extent begin?
   //  If so, we are doing a load/store
   //  before the first valid offset in the memory region.
 
   SVal extentBegin = computeExtentBegin(svalBuilder, rawOffset.getRegion());
-  
+
   if (Optional<NonLoc> NV = extentBegin.getAs<NonLoc>()) {
     SVal lowerBound =
         svalBuilder.evalBinOpNN(state, BO_LT, rawOffset.getByteOffset(), *NV,
@@ -118,7 +118,7 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
     Optional<NonLoc> lowerBoundToCheck = lowerBound.getAs<NonLoc>();
     if (!lowerBoundToCheck)
       return;
-    
+
     ProgramStateRef state_precedesLowerBound, state_withinLowerBound;
     std::tie(state_precedesLowerBound, state_withinLowerBound) =
       state->assume(*lowerBoundToCheck);
@@ -128,12 +128,12 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
       reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
       return;
     }
-  
+
     // Otherwise, assume the constraint of the lower bound.
     assert(state_withinLowerBound);
     state = state_withinLowerBound;
   }
-  
+
   do {
     // CHECK UPPER BOUND: Is byteOffset >= extent(baseRegion)?  If so,
     // we are doing a load/store after the last valid offset.
@@ -146,11 +146,11 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
       = svalBuilder.evalBinOpNN(state, BO_GE, rawOffset.getByteOffset(),
                                 extentVal.castAs<NonLoc>(),
                                 svalBuilder.getConditionType());
-  
+
     Optional<NonLoc> upperboundToCheck = upperbound.getAs<NonLoc>();
     if (!upperboundToCheck)
       break;
-  
+
     ProgramStateRef state_exceedsUpperBound, state_withinUpperBound;
     std::tie(state_exceedsUpperBound, state_withinUpperBound) =
       state->assume(*upperboundToCheck);
@@ -161,19 +161,19 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
         reportOOB(checkerContext, state_exceedsUpperBound, OOB_Tainted);
         return;
     }
-  
+
     // If we are constrained enough to definitely exceed the upper bound, report.
     if (state_exceedsUpperBound) {
       assert(!state_withinUpperBound);
       reportOOB(checkerContext, state_exceedsUpperBound, OOB_Excedes);
       return;
     }
-  
+
     assert(state_withinUpperBound);
     state = state_withinUpperBound;
   }
   while (false);
-  
+
   if (state != originalState)
     checkerContext.addTransition(state);
 }
@@ -181,8 +181,8 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
 void ArrayBoundCheckerV2::reportOOB(CheckerContext &checkerContext,
                                     ProgramStateRef errorState,
                                     OOB_Kind kind) const {
-  
-  ExplodedNode *errorNode = checkerContext.generateSink(errorState);
+
+  ExplodedNode *errorNode = checkerContext.generateErrorNode(errorState);
   if (!errorNode)
     return;
 
@@ -259,7 +259,7 @@ RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
 {
   const MemRegion *region = location.getAsRegion();
   SVal offset = UndefinedVal();
-  
+
   while (region) {
     switch (region->getKind()) {
       default: {
@@ -280,7 +280,7 @@ RegionRawOffsetV2 RegionRawOffsetV2::computeOffset(ProgramStateRef state,
         ASTContext &astContext = svalBuilder.getContext();
         if (elemType->isIncompleteType())
           return RegionRawOffsetV2();
-        
+
         // Update the offset.
         offset = addValue(state,
                           getValue(offset, svalBuilder),
diff --git a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
index f763284aa2c9..26d42ba59c22 100644
--- a/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
+++ b/lib/StaticAnalyzer/Checkers/BasicObjCFoundationChecks.cpp
@@ -140,10 +140,10 @@ void NilArgChecker::warnIfNilExpr(const Expr *E,
   ProgramStateRef State = C.getState();
   if (State->isNull(C.getSVal(E)).isConstrainedTrue()) {
 
-    if (ExplodedNode *N = C.generateSink()) {
+    if (ExplodedNode *N = C.generateErrorNode()) {
       generateBugReport(N, Msg, E->getSourceRange(), E, C);
     }
-    
+
   }
 }
 
@@ -156,8 +156,8 @@ void NilArgChecker::warnIfNilArg(CheckerContext &C,
   ProgramStateRef State = C.getState();
   if (!State->isNull(msg.getArgSVal(Arg)).isConstrainedTrue())
       return;
-      
-  if (ExplodedNode *N = C.generateSink()) {
+
+  if (ExplodedNode *N = C.generateErrorNode()) {
     SmallString<128> sbuf;
     llvm::raw_svector_ostream os(sbuf);
 
@@ -193,7 +193,7 @@ void NilArgChecker::warnIfNilArg(CheckerContext &C,
         os << "' cannot be nil";
       }
     }
-    
+
     generateBugReport(N, os.str(), msg.getArgSourceRange(Arg),
                       msg.getArgExpr(Arg), C);
   }
@@ -224,7 +224,7 @@ void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
   static const unsigned InvalidArgIndex = UINT_MAX;
   unsigned Arg = InvalidArgIndex;
   bool CanBeSubscript = false;
-  
+
   if (Class == FC_NSString) {
     Selector S = msg.getSelector();
 
@@ -307,8 +307,7 @@ void NilArgChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
       warnIfNilArg(C, msg, /* Arg */1, Class);
     } else if (S == SetObjectForKeyedSubscriptSel) {
       CanBeSubscript = true;
-      Arg = 0;
-      warnIfNilArg(C, msg, /* Arg */1, Class, CanBeSubscript);
+      Arg = 1;
     } else if (S == RemoveObjectForKeySel) {
       Arg = 0;
     }
@@ -433,7 +432,7 @@ void CFNumberCreateChecker::checkPreStmt(const CallExpr *CE,
   const FunctionDecl *FD = C.getCalleeDecl(CE);
   if (!FD)
     return;
-  
+
   ASTContext &Ctx = C.getASTContext();
   if (!II)
     II = &Ctx.Idents.get("CFNumberCreate");
@@ -489,23 +488,24 @@ void CFNumberCreateChecker::checkPreStmt(const CallExpr *CE,
   if (SourceSize == TargetSize)
     return;
 
-  // Generate an error.  Only generate a sink if 'SourceSize < TargetSize';
-  // otherwise generate a regular node.
+  // Generate an error.  Only generate a sink error node
+  // if 'SourceSize < TargetSize'; otherwise generate a non-fatal error node.
   //
   // FIXME: We can actually create an abstract "CFNumber" object that has
   //  the bits initialized to the provided values.
   //
-  if (ExplodedNode *N = SourceSize < TargetSize ? C.generateSink() 
-                                                : C.addTransition()) {
+  ExplodedNode *N = SourceSize < TargetSize ? C.generateErrorNode()
+                                            : C.generateNonFatalErrorNode();
+  if (N) {
     SmallString<128> sbuf;
     llvm::raw_svector_ostream os(sbuf);
-    
+
     os << (SourceSize == 8 ? "An " : "A ")
        << SourceSize << " bit integer is used to initialize a CFNumber "
                         "object that represents "
        << (TargetSize == 8 ? "an " : "a ")
        << TargetSize << " bit integer. ";
-    
+
     if (SourceSize < TargetSize)
       os << (TargetSize - SourceSize)
       << " bits of the CFNumber value will be garbage." ;
@@ -549,7 +549,7 @@ void CFRetainReleaseChecker::checkPreStmt(const CallExpr *CE,
   const FunctionDecl *FD = C.getCalleeDecl(CE);
   if (!FD)
     return;
-  
+
   if (!BT) {
     ASTContext &Ctx = C.getASTContext();
     Retain = &Ctx.Idents.get("CFRetain");
@@ -589,7 +589,7 @@ void CFRetainReleaseChecker::checkPreStmt(const CallExpr *CE,
   std::tie(stateTrue, stateFalse) = state->assume(ArgIsNull);
 
   if (stateTrue && !stateFalse) {
-    ExplodedNode *N = C.generateSink(stateTrue);
+    ExplodedNode *N = C.generateErrorNode(stateTrue);
     if (!N)
       return;
 
@@ -635,7 +635,7 @@ public:
 
 void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                               CheckerContext &C) const {
-  
+
   if (!BT) {
     BT.reset(new APIMisuse(
         this, "message incorrectly sent to class instead of class instance"));
@@ -646,7 +646,7 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     autoreleaseS = GetNullarySelector("autorelease", Ctx);
     drainS = GetNullarySelector("drain", Ctx);
   }
-  
+
   if (msg.isInstanceMessage())
     return;
   const ObjCInterfaceDecl *Class = msg.getReceiverInterface();
@@ -655,8 +655,8 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
   Selector S = msg.getSelector();
   if (!(S == releaseS || S == retainS || S == autoreleaseS || S == drainS))
     return;
-  
-  if (ExplodedNode *N = C.addTransition()) {
+
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     SmallString<200> buf;
     llvm::raw_svector_ostream os(buf);
 
@@ -665,7 +665,7 @@ void ClassReleaseChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     os << "' message should be sent to instances "
           "of class '" << Class->getName()
        << "' and not the class directly";
-  
+
     auto report = llvm::make_unique<BugReport>(*BT, os.str(), N);
     report->addRange(msg.getSourceRange());
     C.emitReport(std::move(report));
@@ -699,12 +699,12 @@ public:
 bool
 VariadicMethodTypeChecker::isVariadicMessage(const ObjCMethodCall &msg) const {
   const ObjCMethodDecl *MD = msg.getDecl();
-  
+
   if (!MD || !MD->isVariadic() || isa<ObjCProtocolDecl>(MD->getDeclContext()))
     return false;
-  
+
   Selector S = msg.getSelector();
-  
+
   if (msg.isInstanceMessage()) {
     // FIXME: Ideally we'd look at the receiver interface here, but that's not
     // useful for init, because alloc returns 'id'. In theory, this could lead
@@ -751,7 +751,7 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
 
     ASTContext &Ctx = C.getASTContext();
     arrayWithObjectsS = GetUnarySelector("arrayWithObjects", Ctx);
-    dictionaryWithObjectsAndKeysS = 
+    dictionaryWithObjectsAndKeysS =
       GetUnarySelector("dictionaryWithObjectsAndKeys", Ctx);
     setWithObjectsS = GetUnarySelector("setWithObjects", Ctx);
     orderedSetWithObjectsS = GetUnarySelector("orderedSetWithObjects", Ctx);
@@ -789,18 +789,18 @@ void VariadicMethodTypeChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     // Ignore pointer constants.
     if (msg.getArgSVal(I).getAs<loc::ConcreteInt>())
       continue;
-    
+
     // Ignore pointer types annotated with 'NSObject' attribute.
     if (C.getASTContext().isObjCNSObjectType(ArgTy))
       continue;
-    
+
     // Ignore CF references, which can be toll-free bridged.
     if (coreFoundation::isCFObjectRef(ArgTy))
       continue;
 
     // Generate only one error node to use for all bug reports.
     if (!errorNode.hasValue())
-      errorNode = C.addTransition();
+      errorNode = C.generateNonFatalErrorNode();
 
     if (!errorNode.getValue())
       continue;
@@ -861,7 +861,7 @@ static bool isKnownNonNilCollectionType(QualType T) {
   const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
   if (!PT)
     return false;
-  
+
   const ObjCInterfaceDecl *ID = PT->getInterfaceDecl();
   if (!ID)
     return false;
@@ -992,9 +992,7 @@ static bool alreadyExecutedAtLeastOneLoopIteration(const ExplodedNode *N,
 
   ProgramPoint P = N->getLocation();
   if (Optional<BlockEdge> BE = P.getAs<BlockEdge>()) {
-    if (BE->getSrc()->getLoopTarget() == FCS)
-      return true;
-    return false;
+    return BE->getSrc()->getLoopTarget() == FCS;
   }
 
   // Keep looking for a block edge.
@@ -1023,9 +1021,9 @@ void ObjCLoopChecker::checkPostStmt(const ObjCForCollectionStmt *FCS,
     State = checkElementNonNil(C, State, FCS);
     State = assumeCollectionNonEmpty(C, State, FCS, /*Assumption*/true);
   }
-  
+
   if (!State)
-    C.generateSink();
+    C.generateSink(C.getState(), C.getPredecessor());
   else if (State != C.getState())
     C.addTransition(State);
 }
@@ -1038,11 +1036,8 @@ bool ObjCLoopChecker::isCollectionCountMethod(const ObjCMethodCall &M,
     CountSelectorII = &C.getASTContext().Idents.get("count");
 
   // If the method returns collection count, record the value.
-  if (S.isUnarySelector() &&
-      (S.getIdentifierInfoForSlot(0) == CountSelectorII))
-    return true;
-  
-  return false;
+  return S.isUnarySelector() &&
+         (S.getIdentifierInfoForSlot(0) == CountSelectorII);
 }
 
 void ObjCLoopChecker::checkPostObjCMessage(const ObjCMethodCall &M,
@@ -1069,7 +1064,7 @@ void ObjCLoopChecker::checkPostObjCMessage(const ObjCMethodCall &M,
   // a call to "count" and add it to the map.
   if (!isCollectionCountMethod(M, C))
     return;
-  
+
   const Expr *MsgExpr = M.getOriginExpr();
   SymbolRef CountS = C.getSVal(MsgExpr).getAsSymbol();
   if (CountS) {
diff --git a/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp b/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
index e945c38e77c7..f26f73129e78 100644
--- a/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BoolAssignmentChecker.cpp
@@ -32,7 +32,7 @@ namespace {
 
 void BoolAssignmentChecker::emitReport(ProgramStateRef state,
                                        CheckerContext &C) const {
-  if (ExplodedNode *N = C.addTransition(state)) {
+  if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
     if (!BT)
       BT.reset(new BuiltinBug(this, "Assignment of a non-Boolean value"));
     C.emitReport(llvm::make_unique<BugReport>(*BT, BT->getDescription(), N));
@@ -42,45 +42,45 @@ void BoolAssignmentChecker::emitReport(ProgramStateRef state,
 static bool isBooleanType(QualType Ty) {
   if (Ty->isBooleanType()) // C++ or C99
     return true;
-  
+
   if (const TypedefType *TT = Ty->getAs<TypedefType>())
     return TT->getDecl()->getName() == "BOOL"   || // Objective-C
            TT->getDecl()->getName() == "_Bool"  || // stdbool.h < C99
            TT->getDecl()->getName() == "Boolean";  // MacTypes.h
-  
+
   return false;
 }
 
 void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S,
                                       CheckerContext &C) const {
-  
+
   // We are only interested in stores into Booleans.
   const TypedValueRegion *TR =
     dyn_cast_or_null<TypedValueRegion>(loc.getAsRegion());
-  
+
   if (!TR)
     return;
-  
+
   QualType valTy = TR->getValueType();
-  
+
   if (!isBooleanType(valTy))
     return;
-  
+
   // Get the value of the right-hand side.  We only care about values
   // that are defined (UnknownVals and UndefinedVals are handled by other
   // checkers).
   Optional<DefinedSVal> DV = val.getAs<DefinedSVal>();
   if (!DV)
     return;
-    
+
   // Check if the assigned value meets our criteria for correctness.  It must
   // be a value that is either 0 or 1.  One way to check this is to see if
   // the value is possibly < 0 (for a negative value) or greater than 1.
-  ProgramStateRef state = C.getState(); 
+  ProgramStateRef state = C.getState();
   SValBuilder &svalBuilder = C.getSValBuilder();
   ConstraintManager &CM = C.getConstraintManager();
-  
-  // First, ensure that the value is >= 0.  
+
+  // First, ensure that the value is >= 0.
   DefinedSVal zeroVal = svalBuilder.makeIntVal(0, valTy);
   SVal greaterThanOrEqualToZeroVal =
     svalBuilder.evalBinOp(state, BO_GE, *DV, zeroVal,
@@ -91,13 +91,13 @@ void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S,
 
   if (!greaterThanEqualToZero) {
     // The SValBuilder cannot construct a valid SVal for this condition.
-    // This means we cannot properly reason about it.    
+    // This means we cannot properly reason about it.
     return;
   }
-  
+
   ProgramStateRef stateLT, stateGE;
   std::tie(stateGE, stateLT) = CM.assumeDual(state, *greaterThanEqualToZero);
-  
+
   // Is it possible for the value to be less than zero?
   if (stateLT) {
     // It is possible for the value to be less than zero.  We only
@@ -106,15 +106,15 @@ void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S,
     // value is underconstrained and there is nothing left to be done.
     if (!stateGE)
       emitReport(stateLT, C);
-    
+
     // In either case, we are done.
     return;
   }
-  
+
   // If we reach here, it must be the case that the value is constrained
   // to only be >= 0.
   assert(stateGE == state);
-  
+
   // At this point we know that the value is >= 0.
   // Now check to ensure that the value is <= 1.
   DefinedSVal OneVal = svalBuilder.makeIntVal(1, valTy);
@@ -127,13 +127,13 @@ void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S,
 
   if (!lessThanEqToOne) {
     // The SValBuilder cannot construct a valid SVal for this condition.
-    // This means we cannot properly reason about it.    
+    // This means we cannot properly reason about it.
     return;
   }
-  
+
   ProgramStateRef stateGT, stateLE;
   std::tie(stateLE, stateGT) = CM.assumeDual(state, *lessThanEqToOne);
-  
+
   // Is it possible for the value to be greater than one?
   if (stateGT) {
     // It is possible for the value to be greater than one.  We only
@@ -142,11 +142,11 @@ void BoolAssignmentChecker::checkBind(SVal loc, SVal val, const Stmt *S,
     // value is underconstrained and there is nothing left to be done.
     if (!stateLE)
       emitReport(stateGT, C);
-    
+
     // In either case, we are done.
     return;
   }
-  
+
   // If we reach here, it must be the case that the value is constrained
   // to only be <= 1.
   assert(stateLE == state);
diff --git a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
index 104a81eac4c7..dab2f61229a0 100644
--- a/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp
@@ -41,11 +41,12 @@ bool BuiltinFunctionChecker::evalCall(const CallExpr *CE,
   default:
     return false;
 
+  case Builtin::BI__builtin_unpredictable:
   case Builtin::BI__builtin_expect:
   case Builtin::BI__builtin_assume_aligned:
   case Builtin::BI__builtin_addressof: {
-    // For __builtin_expect and __builtin_assume_aligned, just return the value
-    // of the subexpression.
+    // For __builtin_unpredictable, __builtin_expect, and
+    // __builtin_assume_aligned, just return the value of the subexpression.
     // __builtin_addressof is going from a reference to a pointer, but those
     // are represented the same way in the analyzer.
     assert (CE->arg_begin() != CE->arg_end());
diff --git a/lib/StaticAnalyzer/Checkers/CMakeLists.txt b/lib/StaticAnalyzer/Checkers/CMakeLists.txt
index 9fb22ecc852b..58ff48d6a22a 100644
--- a/lib/StaticAnalyzer/Checkers/CMakeLists.txt
+++ b/lib/StaticAnalyzer/Checkers/CMakeLists.txt
@@ -33,12 +33,14 @@ add_clang_library(clangStaticAnalyzerCheckers
   DirectIvarAssignment.cpp
   DivZeroChecker.cpp
   DynamicTypePropagation.cpp
+  DynamicTypeChecker.cpp
   ExprInspectionChecker.cpp
   FixedAddressChecker.cpp
   GenericTaintChecker.cpp
   IdenticalExprChecker.cpp
   IvarInvalidationChecker.cpp
   LLVMConventionsChecker.cpp
+  LocalizationChecker.cpp
   MacOSKeychainAPIChecker.cpp
   MacOSXAPIChecker.cpp
   MallocChecker.cpp
@@ -48,12 +50,14 @@ add_clang_library(clangStaticAnalyzerCheckers
   NSErrorChecker.cpp
   NoReturnFunctionChecker.cpp
   NonNullParamChecker.cpp
+  NullabilityChecker.cpp
   ObjCAtSyncChecker.cpp
   ObjCContainersASTChecker.cpp
   ObjCContainersChecker.cpp
   ObjCMissingSuperCallChecker.cpp
   ObjCSelfInitChecker.cpp
   ObjCUnusedIVarsChecker.cpp
+  PaddingChecker.cpp
   PointerArithChecker.cpp
   PointerSubChecker.cpp
   PthreadLockChecker.cpp
@@ -73,6 +77,7 @@ add_clang_library(clangStaticAnalyzerCheckers
   UndefinedAssignmentChecker.cpp
   UnixAPIChecker.cpp
   UnreachableCodeChecker.cpp
+  VforkChecker.cpp
   VLASizeChecker.cpp
   VirtualCallChecker.cpp
 
@@ -83,5 +88,6 @@ add_clang_library(clangStaticAnalyzerCheckers
   clangAST
   clangAnalysis
   clangBasic
+  clangLex
   clangStaticAnalyzerCore
   )
diff --git a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 54b12410aa56..5d78d9b02e6b 100644
--- a/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -65,7 +65,7 @@ public:
   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
 
-  ProgramStateRef 
+  ProgramStateRef
     checkRegionChanges(ProgramStateRef state,
                        const InvalidatedSymbols *,
                        ArrayRef<const MemRegion *> ExplicitRegions,
@@ -92,7 +92,7 @@ public:
   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
   void evalstrLengthCommon(CheckerContext &C,
-                           const CallExpr *CE, 
+                           const CallExpr *CE,
                            bool IsStrnlen = false) const;
 
   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
@@ -137,15 +137,16 @@ public:
                         SVal Buf,
                         bool hypothetical = false) const;
 
-  const StringLiteral *getCStringLiteral(CheckerContext &C, 
+  const StringLiteral *getCStringLiteral(CheckerContext &C,
                                          ProgramStateRef &state,
-                                         const Expr *expr,  
+                                         const Expr *expr,
                                          SVal val) const;
 
   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
                                           ProgramStateRef state,
                                           const Expr *Ex, SVal V,
-                                          bool IsSourceBuffer);
+                                          bool IsSourceBuffer,
+                                          const Expr *Size);
 
   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
                               const MemRegion *MR);
@@ -193,6 +194,14 @@ public:
                                             ProgramStateRef state,
                                             NonLoc left,
                                             NonLoc right) const;
+
+  // Return true if the destination buffer of the copy function may be in bound.
+  // Expects SVal of Size to be positive and unsigned.
+  // Expects SVal of FirstBuf to be a FieldRegion.
+  static bool IsFirstBufInBound(CheckerContext &C,
+                                ProgramStateRef state,
+                                const Expr *FirstBuf,
+                                const Expr *Size);
 };
 
 } //end anonymous namespace
@@ -229,7 +238,7 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
     if (!Filter.CheckCStringNullArg)
       return nullptr;
 
-    ExplodedNode *N = C.generateSink(stateNull);
+    ExplodedNode *N = C.generateErrorNode(stateNull);
     if (!N)
       return nullptr;
 
@@ -282,7 +291,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   // Get the size of the array.
   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
   SValBuilder &svalBuilder = C.getSValBuilder();
-  SVal Extent = 
+  SVal Extent =
     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
 
@@ -292,7 +301,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
   if (StOutBound && !StInBound) {
-    ExplodedNode *N = C.generateSink(StOutBound);
+    ExplodedNode *N = C.generateErrorNode(StOutBound);
     if (!N)
       return nullptr;
 
@@ -327,7 +336,7 @@ ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
     C.emitReport(std::move(report));
     return nullptr;
   }
-  
+
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   return StInBound;
@@ -442,7 +451,7 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
     return state;
 
   // Are the two values the same?
-  SValBuilder &svalBuilder = C.getSValBuilder();  
+  SValBuilder &svalBuilder = C.getSValBuilder();
   std::tie(stateTrue, stateFalse) =
     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
 
@@ -489,7 +498,7 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
   // Bail out if the cast fails.
   ASTContext &Ctx = svalBuilder.getContext();
   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
-  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 
+  SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
                                          First->getType());
   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
   if (!FirstStartLoc)
@@ -525,7 +534,7 @@ ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
 
 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
                                   const Stmt *First, const Stmt *Second) const {
-  ExplodedNode *N = C.generateSink(state);
+  ExplodedNode *N = C.generateErrorNode(state);
   if (!N)
     return;
 
@@ -568,7 +577,7 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
   } else {
     // Try switching the operands. (The order of these two assignments is
     // important!)
-    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 
+    maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
                                             sizeTy);
     left = right;
   }
@@ -585,7 +594,7 @@ ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
 
     if (stateOverflow && !stateOkay) {
       // We have an overflow. Emit a bug report.
-      ExplodedNode *N = C.generateSink(stateOverflow);
+      ExplodedNode *N = C.generateErrorNode(stateOverflow);
       if (!N)
         return nullptr;
 
@@ -706,7 +715,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
       if (!Filter.CheckCStringNotNullTerm)
         return UndefinedVal();
 
-      if (ExplodedNode *N = C.addTransition(state)) {
+      if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
         if (!BT_NotCString)
           BT_NotCString.reset(new BuiltinBug(
               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
@@ -723,7 +732,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
         auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
 
         report->addRange(Ex->getSourceRange());
-        C.emitReport(std::move(report));        
+        C.emitReport(std::move(report));
       }
       return UndefinedVal();
 
@@ -766,7 +775,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
     if (!Filter.CheckCStringNotNullTerm)
       return UndefinedVal();
 
-    if (ExplodedNode *N = C.addTransition(state)) {
+    if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
       if (!BT_NotCString)
         BT_NotCString.reset(new BuiltinBug(
             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
@@ -787,7 +796,7 @@ SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
       auto report = llvm::make_unique<BugReport>(*BT_NotCString, os.str(), N);
 
       report->addRange(Ex->getSourceRange());
-      C.emitReport(std::move(report));        
+      C.emitReport(std::move(report));
     }
 
     return UndefinedVal();
@@ -814,10 +823,74 @@ const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
   return strRegion->getStringLiteral();
 }
 
+bool CStringChecker::IsFirstBufInBound(CheckerContext &C,
+                                       ProgramStateRef state,
+                                       const Expr *FirstBuf,
+                                       const Expr *Size) {
+  // If we do not know that the buffer is long enough we return 'true'.
+  // Otherwise the parent region of this field region would also get
+  // invalidated, which would lead to warnings based on an unknown state.
+
+  // Originally copied from CheckBufferAccess and CheckLocation.
+  SValBuilder &svalBuilder = C.getSValBuilder();
+  ASTContext &Ctx = svalBuilder.getContext();
+  const LocationContext *LCtx = C.getLocationContext();
+
+  QualType sizeTy = Size->getType();
+  QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
+  SVal BufVal = state->getSVal(FirstBuf, LCtx);
+
+  SVal LengthVal = state->getSVal(Size, LCtx);
+  Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
+  if (!Length)
+    return true; // cf top comment.
+
+  // Compute the offset of the last element to be accessed: size-1.
+  NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
+  NonLoc LastOffset =
+      svalBuilder.evalBinOpNN(state, BO_Sub, *Length, One, sizeTy)
+          .castAs<NonLoc>();
+
+  // Check that the first buffer is sufficiently long.
+  SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
+  Optional<Loc> BufLoc = BufStart.getAs<Loc>();
+  if (!BufLoc)
+    return true; // cf top comment.
+
+  SVal BufEnd =
+      svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, LastOffset, PtrTy);
+
+  // Check for out of bound array element access.
+  const MemRegion *R = BufEnd.getAsRegion();
+  if (!R)
+    return true; // cf top comment.
+
+  const ElementRegion *ER = dyn_cast<ElementRegion>(R);
+  if (!ER)
+    return true; // cf top comment.
+
+  assert(ER->getValueType() == C.getASTContext().CharTy &&
+         "IsFirstBufInBound should only be called with char* ElementRegions");
+
+  // Get the size of the array.
+  const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
+  SVal Extent =
+      svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
+  DefinedOrUnknownSVal ExtentSize = Extent.castAs<DefinedOrUnknownSVal>();
+
+  // Get the index of the accessed element.
+  DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
+
+  ProgramStateRef StInBound = state->assumeInBound(Idx, ExtentSize, true);
+
+  return static_cast<bool>(StInBound);
+}
+
 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
                                                  ProgramStateRef state,
                                                  const Expr *E, SVal V,
-                                                 bool IsSourceBuffer) {
+                                                 bool IsSourceBuffer,
+                                                 const Expr *Size) {
   Optional<Loc> L = V.getAs<Loc>();
   if (!L)
     return state;
@@ -843,13 +916,23 @@ ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
     // Invalidate and escape only indirect regions accessible through the source
     // buffer.
     if (IsSourceBuffer) {
-      ITraits.setTrait(R, 
+      ITraits.setTrait(R,
                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
       CausesPointerEscape = true;
+    } else {
+      const MemRegion::Kind& K = R->getKind();
+      if (K == MemRegion::FieldRegionKind)
+        if (Size && IsFirstBufInBound(C, state, E, Size)) {
+          // If destination buffer is a field region and access is in bound,
+          // do not invalidate its super region.
+          ITraits.setTrait(
+              R,
+              RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+        }
     }
 
-    return state->invalidateRegions(R, E, C.blockCount(), LCtx, 
+    return state->invalidateRegions(R, E, C.blockCount(), LCtx,
                                     CausesPointerEscape, nullptr, nullptr,
                                     &ITraits);
   }
@@ -901,7 +984,7 @@ bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
 // evaluation of individual function calls.
 //===----------------------------------------------------------------------===//
 
-void CStringChecker::evalCopyCommon(CheckerContext &C, 
+void CStringChecker::evalCopyCommon(CheckerContext &C,
                                     const CallExpr *CE,
                                     ProgramStateRef state,
                                     const Expr *Size, const Expr *Dest,
@@ -941,7 +1024,7 @@ void CStringChecker::evalCopyCommon(CheckerContext &C,
 
     // Get the value of the Src.
     SVal srcVal = state->getSVal(Source, LCtx);
-    
+
     // Ensure the source is not null. If it is NULL there will be a
     // NULL pointer dereference.
     state = checkNonNull(C, state, Source, srcVal);
@@ -959,11 +1042,11 @@ void CStringChecker::evalCopyCommon(CheckerContext &C,
     if (!state)
       return;
 
-    // If this is mempcpy, get the byte after the last byte copied and 
+    // If this is mempcpy, get the byte after the last byte copied and
     // bind the expr.
     if (IsMempcpy) {
       loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
-      
+
       // Get the length to copy.
       if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
         // Get the byte after the last byte copied.
@@ -972,11 +1055,11 @@ void CStringChecker::evalCopyCommon(CheckerContext &C,
         QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
         loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
           CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
-        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 
+        SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
                                                           DestRegCharVal,
-                                                          *lenValNonLoc, 
+                                                          *lenValNonLoc,
                                                           Dest->getType());
-      
+
         // The byte after the last byte copied is the return value.
         state = state->BindExpr(CE, LCtx, lastElement);
       } else {
@@ -999,13 +1082,13 @@ void CStringChecker::evalCopyCommon(CheckerContext &C,
     // can use LazyCompoundVals to copy the source values into the destination.
     // This would probably remove any existing bindings past the end of the
     // copied region, but that's still an improvement over blank invalidation.
-    state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 
-                             /*IsSourceBuffer*/false);
+    state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest),
+                             /*IsSourceBuffer*/false, Size);
 
     // Invalidate the source (const-invalidation without const-pointer-escaping
     // the address of the top-level region).
-    state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 
-                             /*IsSourceBuffer*/true);
+    state = InvalidateBuffer(C, state, Source, C.getSVal(Source),
+                             /*IsSourceBuffer*/true, nullptr);
 
     C.addTransition(state);
   }
@@ -1032,7 +1115,7 @@ void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
   // The return value is a pointer to the byte following the last written byte.
   const Expr *Dest = CE->getArg(0);
   ProgramStateRef state = C.getState();
-  
+
   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
 }
 
@@ -1053,7 +1136,7 @@ void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   // void bcopy(const void *src, void *dst, size_t n);
-  evalCopyCommon(C, CE, C.getState(), 
+  evalCopyCommon(C, CE, C.getState(),
                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
 }
 
@@ -1244,7 +1327,7 @@ void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
                                   .castAs<DefinedOrUnknownSVal>(), true);
       }
-      
+
       if (maxlenValNL) {
         state = state->assume(C.getSValBuilder().evalBinOpNN(
                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
@@ -1275,8 +1358,8 @@ void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   // char *strcpy(char *restrict dst, const char *restrict src);
-  evalStrcpyCommon(C, CE, 
-                   /* returnEnd = */ false, 
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ false,
                    /* isBounded = */ false,
                    /* isAppending = */ false);
 }
@@ -1286,8 +1369,8 @@ void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
-  evalStrcpyCommon(C, CE, 
-                   /* returnEnd = */ false, 
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ false,
                    /* isBounded = */ true,
                    /* isAppending = */ false);
 }
@@ -1297,8 +1380,8 @@ void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   // char *stpcpy(char *restrict dst, const char *restrict src);
-  evalStrcpyCommon(C, CE, 
-                   /* returnEnd = */ true, 
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ true,
                    /* isBounded = */ false,
                    /* isAppending = */ false);
 }
@@ -1308,8 +1391,8 @@ void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   //char *strcat(char *restrict s1, const char *restrict s2);
-  evalStrcpyCommon(C, CE, 
-                   /* returnEnd = */ false, 
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ false,
                    /* isBounded = */ false,
                    /* isAppending = */ true);
 }
@@ -1319,8 +1402,8 @@ void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
     return;
 
   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
-  evalStrcpyCommon(C, CE, 
-                   /* returnEnd = */ false, 
+  evalStrcpyCommon(C, CE,
+                   /* returnEnd = */ false,
                    /* isBounded = */ true,
                    /* isAppending = */ true);
 }
@@ -1515,7 +1598,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
 
     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
-    
+
     // If we know both string lengths, we might know the final string length.
     if (srcStrLengthNL && dstStrLengthNL) {
       // Make sure the two lengths together don't overflow a size_t.
@@ -1523,7 +1606,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
       if (!state)
         return;
 
-      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 
+      finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
                                                *dstStrLengthNL, sizeTy);
     }
 
@@ -1586,7 +1669,7 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
                                                       *maxLastNL, ptrTy);
-        state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 
+        state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
                               boundWarning);
         if (!state)
           return;
@@ -1620,11 +1703,12 @@ void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
     // This would probably remove any existing bindings past the end of the
     // string, but that's still an improvement over blank invalidation.
     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
-                             /*IsSourceBuffer*/false);
+                             /*IsSourceBuffer*/false, nullptr);
 
     // Invalidate the source (const-invalidation without const-pointer-escaping
     // the address of the top-level region).
-    state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
+    state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true,
+                             nullptr);
 
     // Set the C string length of the destination, if we know it.
     if (isBounded && !isAppending) {
@@ -1667,7 +1751,7 @@ void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
 }
 
-void CStringChecker::evalStrcasecmp(CheckerContext &C, 
+void CStringChecker::evalStrcasecmp(CheckerContext &C,
                                     const CallExpr *CE) const {
   if (CE->getNumArgs() < 2)
     return;
@@ -1676,7 +1760,7 @@ void CStringChecker::evalStrcasecmp(CheckerContext &C,
   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
 }
 
-void CStringChecker::evalStrncasecmp(CheckerContext &C, 
+void CStringChecker::evalStrncasecmp(CheckerContext &C,
                                      const CallExpr *CE) const {
   if (CE->getNumArgs() < 3)
     return;
@@ -1848,7 +1932,7 @@ void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
     // Invalidate the search string, representing the change of one delimiter
     // character to NUL.
     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
-                             /*IsSourceBuffer*/false);
+                             /*IsSourceBuffer*/false, nullptr);
 
     // Overwrite the search string pointer. The new value is either an address
     // further along in the same string, or NULL if there are no more tokens.
@@ -1915,7 +1999,7 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
     evalFunction =  &CStringChecker::evalBcopy;
   else if (C.isCLibraryFunction(FDecl, "bcmp"))
     evalFunction =  &CStringChecker::evalMemcmp;
-  
+
   // If the callee isn't a string function, let another checker handle it.
   if (!evalFunction)
     return false;
@@ -1929,10 +2013,7 @@ bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
   // properties are held. However, if the user chooses to turn off some of these
   // checks, we ignore the issues and leave the call evaluation to a generic
   // handler.
-  if (!C.isDifferent())
-    return false;
-
-  return true;
+  return C.isDifferent();
 }
 
 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
@@ -1975,7 +2056,7 @@ bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
   return !Entries.isEmpty();
 }
 
-ProgramStateRef 
+ProgramStateRef
 CStringChecker::checkRegionChanges(ProgramStateRef state,
                                    const InvalidatedSymbols *,
                                    ArrayRef<const MemRegion *> ExplicitRegions,
diff --git a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
index 26423b7368ef..145908376996 100644
--- a/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CallAndMessageChecker.cpp
@@ -40,6 +40,7 @@ class CallAndMessageChecker
   : public Checker< check::PreStmt<CallExpr>,
                     check::PreStmt<CXXDeleteExpr>,
                     check::PreObjCMessage,
+                    check::ObjCMessageNil,
                     check::PreCall > {
   mutable std::unique_ptr<BugType> BT_call_null;
   mutable std::unique_ptr<BugType> BT_call_undef;
@@ -60,6 +61,12 @@ public:
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
   void checkPreStmt(const CXXDeleteExpr *DE, CheckerContext &C) const;
   void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
+
+  /// Fill in the return value that results from messaging nil based on the
+  /// return type and architecture and diagnose if the return value will be
+  /// garbage.
+  void checkObjCMessageNil(const ObjCMethodCall &msg, CheckerContext &C) const;
+
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
 
 private:
@@ -82,7 +89,7 @@ private:
       BT.reset(new BuiltinBug(this, desc));
   }
   bool uninitRefOrPointer(CheckerContext &C, const SVal &V,
-                          const SourceRange &ArgRange,
+                          SourceRange ArgRange,
                           const Expr *ArgEx, std::unique_ptr<BugType> &BT,
                           const ParmVarDecl *ParamDecl, const char *BD) const;
 };
@@ -90,7 +97,7 @@ private:
 
 void CallAndMessageChecker::emitBadCall(BugType *BT, CheckerContext &C,
                                         const Expr *BadE) {
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
 
@@ -131,7 +138,7 @@ static StringRef describeUninitializedArgumentInCall(const CallEvent &Call,
 
 bool CallAndMessageChecker::uninitRefOrPointer(CheckerContext &C,
                                                const SVal &V,
-                                               const SourceRange &ArgRange,
+                                               SourceRange ArgRange,
                                                const Expr *ArgEx,
                                                std::unique_ptr<BugType> &BT,
                                                const ParmVarDecl *ParamDecl,
@@ -162,7 +169,7 @@ bool CallAndMessageChecker::uninitRefOrPointer(CheckerContext &C,
     const ProgramStateRef State = C.getState();
     const SVal PSV = State->getSVal(SValMemRegion);
     if (PSV.isUndef()) {
-      if (ExplodedNode *N = C.generateSink()) {
+      if (ExplodedNode *N = C.generateErrorNode()) {
         LazyInit_BT(BD, BT);
         auto R = llvm::make_unique<BugReport>(*BT, Message, N);
         R->addRange(ArgRange);
@@ -193,7 +200,7 @@ bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C,
     return true;
 
   if (V.isUndef()) {
-    if (ExplodedNode *N = C.generateSink()) {
+    if (ExplodedNode *N = C.generateErrorNode()) {
       LazyInit_BT(BD, BT);
 
       // Generate a report for this bug.
@@ -258,7 +265,7 @@ bool CallAndMessageChecker::PreVisitProcessArg(CheckerContext &C,
                              D->getStore());
 
     if (F.Find(D->getRegion())) {
-      if (ExplodedNode *N = C.generateSink()) {
+      if (ExplodedNode *N = C.generateErrorNode()) {
         LazyInit_BT(BD, BT);
         SmallString<512> Str;
         llvm::raw_svector_ostream os(Str);
@@ -331,7 +338,7 @@ void CallAndMessageChecker::checkPreStmt(const CXXDeleteExpr *DE,
   SVal Arg = C.getSVal(DE->getArgument());
   if (Arg.isUndef()) {
     StringRef Desc;
-    ExplodedNode *N = C.generateSink();
+    ExplodedNode *N = C.generateErrorNode();
     if (!N)
       return;
     if (!BT_cxx_delete_undef)
@@ -388,7 +395,7 @@ void CallAndMessageChecker::checkPreCall(const CallEvent &Call,
     // the function.
     unsigned Params = FD->getNumParams();
     if (Call.getNumArgs() < Params) {
-      ExplodedNode *N = C.generateSink();
+      ExplodedNode *N = C.generateErrorNode();
       if (!N)
         return;
 
@@ -436,7 +443,7 @@ void CallAndMessageChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
                                                 CheckerContext &C) const {
   SVal recVal = msg.getReceiverSVal();
   if (recVal.isUndef()) {
-    if (ExplodedNode *N = C.generateSink()) {
+    if (ExplodedNode *N = C.generateErrorNode()) {
       BugType *BT = nullptr;
       switch (msg.getMessageKind()) {
       case OCM_Message:
@@ -471,22 +478,14 @@ void CallAndMessageChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
       C.emitReport(std::move(R));
     }
     return;
-  } else {
-    // Bifurcate the state into nil and non-nil ones.
-    DefinedOrUnknownSVal receiverVal = recVal.castAs<DefinedOrUnknownSVal>();
-
-    ProgramStateRef state = C.getState();
-    ProgramStateRef notNilState, nilState;
-    std::tie(notNilState, nilState) = state->assume(receiverVal);
-
-    // Handle receiver must be nil.
-    if (nilState && !notNilState) {
-      HandleNilReceiver(C, state, msg);
-      return;
-    }
   }
 }
 
+void CallAndMessageChecker::checkObjCMessageNil(const ObjCMethodCall &msg,
+                                                CheckerContext &C) const {
+  HandleNilReceiver(C, C.getState(), msg);
+}
+
 void CallAndMessageChecker::emitNilReceiverBug(CheckerContext &C,
                                                const ObjCMethodCall &msg,
                                                ExplodedNode *N) const {
@@ -523,7 +522,8 @@ void CallAndMessageChecker::emitNilReceiverBug(CheckerContext &C,
 
 static bool supportsNilWithFloatRet(const llvm::Triple &triple) {
   return (triple.getVendor() == llvm::Triple::Apple &&
-          (triple.isiOS() || !triple.isMacOSXVersionLT(10,5)));
+          (triple.isiOS() || triple.isWatchOS() ||
+           !triple.isMacOSXVersionLT(10,5)));
 }
 
 void CallAndMessageChecker::HandleNilReceiver(CheckerContext &C,
@@ -560,7 +560,7 @@ void CallAndMessageChecker::HandleNilReceiver(CheckerContext &C,
             Ctx.LongDoubleTy == CanRetTy ||
             Ctx.LongLongTy == CanRetTy ||
             Ctx.UnsignedLongLongTy == CanRetTy)))) {
-      if (ExplodedNode *N = C.generateSink(state, nullptr, &Tag))
+      if (ExplodedNode *N = C.generateErrorNode(state, &Tag))
         emitNilReceiverBug(C, Msg, N);
       return;
     }
diff --git a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
index 0d683f96df08..2337400750c7 100644
--- a/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastSizeChecker.cpp
@@ -82,10 +82,7 @@ static bool evenFlexibleArraySize(ASTContext &Ctx, CharUnits RegionSize,
   if (Left.isNegative())
     return false;
 
-  if (Left % FlexSize == 0)
-    return true;
-
-  return false;
+  return Left % FlexSize == 0;
 }
 
 void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const {
@@ -131,7 +128,7 @@ void CastSizeChecker::checkPreStmt(const CastExpr *CE,CheckerContext &C) const {
   if (evenFlexibleArraySize(Ctx, regionSize, typeSize, ToPointeeTy))
     return;
 
-  if (ExplodedNode *errorNode = C.generateSink()) {
+  if (ExplodedNode *errorNode = C.generateErrorNode()) {
     if (!BT)
       BT.reset(new BuiltinBug(this, "Cast region with wrong size.",
                                     "Cast a region whose size is not a multiple"
diff --git a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
index ba3024d78a19..fa7841356efb 100644
--- a/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/CastToStructChecker.cpp
@@ -56,7 +56,7 @@ void CastToStructChecker::checkPreStmt(const CastExpr *CE,
 
   // Now the cast-to-type is struct pointer, the original type is not void*.
   if (!OrigPointeeTy->isRecordType()) {
-    if (ExplodedNode *N = C.addTransition()) {
+    if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
       if (!BT)
         BT.reset(
             new BuiltinBug(this, "Cast from non-struct type to struct type",
diff --git a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
index 12eb0bde28ad..25caa0002598 100644
--- a/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckObjCDealloc.cpp
@@ -51,7 +51,7 @@ static bool scan_ivar_release(Stmt *S, ObjCIvarDecl *ID,
           if (E->getDecl()->getIdentifier() == SelfII)
             if (ME->getMethodDecl() == PD->getSetterMethodDecl() &&
                 ME->getNumArgs() == 1 &&
-                ME->getArg(0)->isNullPointerConstant(Ctx, 
+                ME->getArg(0)->isNullPointerConstant(Ctx,
                                               Expr::NPC_ValueDependentIsNull))
               return true;
 
@@ -61,7 +61,7 @@ static bool scan_ivar_release(Stmt *S, ObjCIvarDecl *ID,
       if (ObjCPropertyRefExpr *PRE =
            dyn_cast<ObjCPropertyRefExpr>(BO->getLHS()->IgnoreParenCasts()))
         if (PRE->isExplicitProperty() && PRE->getExplicitProperty() == PD)
-            if (BO->getRHS()->isNullPointerConstant(Ctx, 
+            if (BO->getRHS()->isNullPointerConstant(Ctx,
                                             Expr::NPC_ValueDependentIsNull)) {
               // This is only a 'release' if the property kind is not
               // 'assign'.
diff --git a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
index e0c113c86262..60f16188bcf8 100644
--- a/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckSecuritySyntaxOnly.cpp
@@ -86,8 +86,7 @@ public:
   // Helpers.
   bool checkCall_strCommon(const CallExpr *CE, const FunctionDecl *FD);
 
-  typedef void (WalkAST::*FnCheck)(const CallExpr *,
-				   const FunctionDecl *);
+  typedef void (WalkAST::*FnCheck)(const CallExpr *, const FunctionDecl *);
 
   // Checker-specific methods.
   void checkLoopConditionForFloat(const ForStmt *FS);
@@ -115,7 +114,7 @@ void WalkAST::VisitChildren(Stmt *S) {
 }
 
 void WalkAST::VisitCallExpr(CallExpr *CE) {
-  // Get the callee.  
+  // Get the callee.
   const FunctionDecl *FD = CE->getDirectCallee();
 
   if (!FD)
@@ -307,7 +306,7 @@ void WalkAST::checkLoopConditionForFloat(const ForStmt *FS) {
 void WalkAST::checkCall_gets(const CallExpr *CE, const FunctionDecl *FD) {
   if (!filter.check_gets)
     return;
-  
+
   const FunctionProtoType *FPT = FD->getType()->getAs<FunctionProtoType>();
   if (!FPT)
     return;
@@ -434,18 +433,18 @@ void WalkAST::checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD) {
       .Case("mkdtemp", std::make_pair(0,-1))
       .Case("mkstemps", std::make_pair(0,1))
       .Default(std::make_pair(-1, -1));
-  
+
   assert(ArgSuffix.first >= 0 && "Unsupported function");
 
   // Check if the number of arguments is consistent with out expectations.
   unsigned numArgs = CE->getNumArgs();
   if ((signed) numArgs <= ArgSuffix.first)
     return;
-  
+
   const StringLiteral *strArg =
     dyn_cast<StringLiteral>(CE->getArg((unsigned)ArgSuffix.first)
                               ->IgnoreParenImpCasts());
-  
+
   // Currently we only handle string literals.  It is possible to do better,
   // either by looking at references to const variables, or by doing real
   // flow analysis.
@@ -470,13 +469,13 @@ void WalkAST::checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD) {
     suffix = (unsigned) Result.getZExtValue();
     n = (n > suffix) ? n - suffix : 0;
   }
-  
+
   for (unsigned i = 0; i < n; ++i)
     if (str[i] == 'X') ++numX;
-  
+
   if (numX >= 6)
     return;
-  
+
   // Issue a warning.
   PathDiagnosticLocation CELoc =
     PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC);
@@ -502,13 +501,13 @@ void WalkAST::checkCall_mkstemp(const CallExpr *CE, const FunctionDecl *FD) {
 //===----------------------------------------------------------------------===//
 // Check: Any use of 'strcpy' is insecure.
 //
-// CWE-119: Improper Restriction of Operations within 
-// the Bounds of a Memory Buffer 
+// CWE-119: Improper Restriction of Operations within
+// the Bounds of a Memory Buffer
 //===----------------------------------------------------------------------===//
 void WalkAST::checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD) {
   if (!filter.check_strcpy)
     return;
-  
+
   if (!checkCall_strCommon(CE, FD))
     return;
 
@@ -529,8 +528,8 @@ void WalkAST::checkCall_strcpy(const CallExpr *CE, const FunctionDecl *FD) {
 //===----------------------------------------------------------------------===//
 // Check: Any use of 'strcat' is insecure.
 //
-// CWE-119: Improper Restriction of Operations within 
-// the Bounds of a Memory Buffer 
+// CWE-119: Improper Restriction of Operations within
+// the Bounds of a Memory Buffer
 //===----------------------------------------------------------------------===//
 void WalkAST::checkCall_strcat(const CallExpr *CE, const FunctionDecl *FD) {
   if (!filter.check_strcpy)
@@ -684,7 +683,7 @@ void WalkAST::checkCall_vfork(const CallExpr *CE, const FunctionDecl *FD) {
 void WalkAST::checkUncheckedReturnValue(CallExpr *CE) {
   if (!filter.check_UncheckedReturn)
     return;
-  
+
   const FunctionDecl *FD = CE->getDirectCallee();
   if (!FD)
     return;
@@ -749,7 +748,7 @@ namespace {
 class SecuritySyntaxChecker : public Checker<check::ASTCodeBody> {
 public:
   ChecksFilter filter;
-  
+
   void checkASTCodeBody(const Decl *D, AnalysisManager& mgr,
                         BugReporter &BR) const {
     WalkAST walker(BR, mgr.getAnalysisDeclContext(D), filter);
diff --git a/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp b/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
index 81a20063f972..e079a8cb12be 100644
--- a/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckSizeofPointer.cpp
@@ -55,8 +55,8 @@ void WalkAST::VisitUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *E) {
   QualType T = E->getTypeOfArgument();
   if (T->isPointerType()) {
 
-    // Many false positives have the form 'sizeof *p'. This is reasonable 
-    // because people know what they are doing when they intentionally 
+    // Many false positives have the form 'sizeof *p'. This is reasonable
+    // because people know what they are doing when they intentionally
     // dereference the pointer.
     Expr *ArgEx = E->getArgumentExpr();
     if (!isa<DeclRefExpr>(ArgEx->IgnoreParens()))
diff --git a/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp b/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
index 956dca7d9258..37b84480f892 100644
--- a/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
+++ b/lib/StaticAnalyzer/Checkers/CheckerDocumentation.cpp
@@ -38,6 +38,7 @@ class CheckerDocumentation : public Checker< check::PreStmt<ReturnStmt>,
                                        check::PostStmt<DeclStmt>,
                                        check::PreObjCMessage,
                                        check::PostObjCMessage,
+                                       check::ObjCMessageNil,
                                        check::PreCall,
                                        check::PostCall,
                                        check::BranchCondition,
@@ -95,6 +96,15 @@ public:
   /// check::PostObjCMessage
   void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const {}
 
+  /// \brief Visit an Objective-C message whose receiver is nil.
+  ///
+  /// This will be called when the analyzer core processes a method call whose
+  /// receiver is definitely nil. In this case, check{Pre/Post}ObjCMessage and
+  /// check{Pre/Post}Call will not be called.
+  ///
+  /// check::ObjCMessageNil
+  void checkObjCMessageNil(const ObjCMethodCall &M, CheckerContext &C) const {}
+
   /// \brief Pre-visit an abstract "call" event.
   ///
   /// This is used for checkers that want to check arguments or attributed
@@ -222,7 +232,7 @@ public:
   /// changed, this allows the analyzer core to skip the more expensive
   /// #checkRegionChanges when no checkers are tracking any state.
   bool wantsRegionChangeUpdate(ProgramStateRef St) const { return true; }
-  
+
   /// \brief Called when the contents of one or more regions change.
   ///
   /// This can occur in many different ways: an explicit bind, a blanket
@@ -246,7 +256,7 @@ public:
   /// #wantsRegionChangeUpdate returns \c true.
   ///
   /// check::RegionChanges
-  ProgramStateRef 
+  ProgramStateRef
     checkRegionChanges(ProgramStateRef State,
                        const InvalidatedSymbols *Invalidated,
                        ArrayRef<const MemRegion *> ExplicitRegions,
@@ -259,12 +269,12 @@ public:
   ///
   /// This notifies the checkers about pointer escape, which occurs whenever
   /// the analyzer cannot track the symbol any more. For example, as a
-  /// result of assigning a pointer into a global or when it's passed to a 
+  /// result of assigning a pointer into a global or when it's passed to a
   /// function call the analyzer cannot model.
-  /// 
+  ///
   /// \param State The state at the point of escape.
   /// \param Escaped The list of escaped symbols.
-  /// \param Call The corresponding CallEvent, if the symbols escape as 
+  /// \param Call The corresponding CallEvent, if the symbols escape as
   /// parameters to the given call.
   /// \param Kind How the symbols have escaped.
   /// \returns Checkers can modify the state by returning a new state.
@@ -285,7 +295,7 @@ public:
                                      PointerEscapeKind Kind) const {
     return State;
   }
-                                         
+
   /// check::Event<ImplicitNullDerefEvent>
   void checkEvent(ImplicitNullDerefEvent Event) const {}
 
diff --git a/lib/StaticAnalyzer/Checkers/Checkers.td b/lib/StaticAnalyzer/Checkers/Checkers.td
index d1d6ac277ffe..8133d290d886 100644
--- a/lib/StaticAnalyzer/Checkers/Checkers.td
+++ b/lib/StaticAnalyzer/Checkers/Checkers.td
@@ -13,6 +13,11 @@ include "clang/StaticAnalyzer/Checkers/CheckerBase.td"
 // Packages.
 //===----------------------------------------------------------------------===//
 
+// The Alpha package is for checkers that have too many false positives to be
+// turned on by default. The hierarchy under Alpha should be organized in the
+// hierarchy checkers would have had if they were truly at the top level.
+// (For example, a Cocoa-specific checker that is alpha should be in
+// alpha.osx.cocoa).
 def Alpha : Package<"alpha">;
 
 def Core : Package<"core">;
@@ -20,16 +25,33 @@ def CoreBuiltin : Package<"builtin">, InPackage<Core>;
 def CoreUninitialized  : Package<"uninitialized">, InPackage<Core>;
 def CoreAlpha : Package<"core">, InPackage<Alpha>, Hidden;
 
+// The OptIn package is for checkers that are not alpha and that would normally
+// be on by default but where the driver does not have enough information to
+// determine when they are applicable. For example, localizability checkers fit
+// this criterion because the driver cannot determine whether a project is
+// localized or not -- this is best determined at the IDE or build-system level.
+//
+// The checker hierarchy under OptIn should mirror that in Alpha: checkers
+// should be organized as if they were at the top level.
+//
+// Note: OptIn is *not* intended for checkers that are too noisy to be on by
+// default. Such checkers belong in the alpha package.
+def OptIn : Package<"optin">;
+
+def Nullability : Package<"nullability">;
+
 def Cplusplus : Package<"cplusplus">;
 def CplusplusAlpha : Package<"cplusplus">, InPackage<Alpha>, Hidden;
 
 def DeadCode : Package<"deadcode">;
 def DeadCodeAlpha : Package<"deadcode">, InPackage<Alpha>, Hidden;
 
+def Performance : Package<"performance">, InPackage<OptIn>;
+
 def Security : Package <"security">;
 def InsecureAPI : Package<"insecureAPI">, InPackage<Security>;
 def SecurityAlpha : Package<"security">, InPackage<Alpha>, Hidden;
-def Taint : Package<"taint">, InPackage<SecurityAlpha>, Hidden;  
+def Taint : Package<"taint">, InPackage<SecurityAlpha>, Hidden;
 
 def Unix : Package<"unix">;
 def UnixAlpha : Package<"unix">, InPackage<Alpha>, Hidden;
@@ -38,11 +60,18 @@ def CStringAlpha : Package<"cstring">, InPackage<UnixAlpha>, Hidden;
 
 def OSX : Package<"osx">;
 def OSXAlpha : Package<"osx">, InPackage<Alpha>, Hidden;
+def OSXOptIn : Package<"osx">, InPackage<OptIn>;
+
 def Cocoa : Package<"cocoa">, InPackage<OSX>;
 def CocoaAlpha : Package<"cocoa">, InPackage<OSXAlpha>, Hidden;
+def CocoaOptIn : Package<"cocoa">, InPackage<OSXOptIn>;
+
 def CoreFoundation : Package<"coreFoundation">, InPackage<OSX>;
 def Containers : Package<"containers">, InPackage<CoreFoundation>;
 
+def LocalizabilityAlpha : Package<"localizability">, InPackage<CocoaAlpha>;
+def LocalizabilityOptIn : Package<"localizability">, InPackage<CocoaOptIn>;
+
 def LLVM : Package<"llvm">;
 def Debug : Package<"debug">;
 
@@ -128,8 +157,36 @@ def TestAfterDivZeroChecker : Checker<"TestAfterDivZero">,
   HelpText<"Check for division by variable that is later compared against 0. Either the comparison is useless or there is division by zero.">,
   DescFile<"TestAfterDivZeroChecker.cpp">;
 
+def DynamicTypeChecker : Checker<"DynamicTypeChecker">,
+  HelpText<"Check for cases where the dynamic and the static type of an object are unrelated.">,
+  DescFile<"DynamicTypeChecker.cpp">;
+
 } // end "alpha.core"
 
+let ParentPackage = Nullability in {
+
+def NullPassedToNonnullChecker : Checker<"NullPassedToNonnull">,
+  HelpText<"Warns when a null pointer is passed to a pointer which has a _Nonnull type.">,
+  DescFile<"NullabilityChecker.cpp">;
+
+def NullReturnedFromNonnullChecker : Checker<"NullReturnedFromNonnull">,
+  HelpText<"Warns when a null pointer is returned from a function that has _Nonnull return type.">,
+  DescFile<"NullabilityChecker.cpp">;
+
+def NullableDereferencedChecker : Checker<"NullableDereferenced">,
+  HelpText<"Warns when a nullable pointer is dereferenced.">,
+  DescFile<"NullabilityChecker.cpp">;
+
+def NullablePassedToNonnullChecker : Checker<"NullablePassedToNonnull">,
+  HelpText<"Warns when a nullable pointer is passed to a pointer which has a _Nonnull type.">,
+  DescFile<"NullabilityChecker.cpp">;
+
+def NullableReturnedFromNonnullChecker : Checker<"NullablePassedToNonnull">,
+  HelpText<"Warns when a nullable pointer is returned from a function that has _Nonnull return type.">,
+  DescFile<"NullabilityChecker.cpp">;
+
+} // end "nullability"
+
 //===----------------------------------------------------------------------===//
 // Evaluate "builtin" functions.
 //===----------------------------------------------------------------------===//
@@ -167,7 +224,7 @@ def UndefBranchChecker : Checker<"Branch">,
 def UndefCapturedBlockVarChecker : Checker<"CapturedBlockVariable">,
   HelpText<"Check for blocks that capture uninitialized values">,
   DescFile<"UndefCapturedBlockVarChecker.cpp">;
-  
+
 def ReturnUndefChecker : Checker<"UndefReturn">,
   HelpText<"Check for uninitialized values being returned to the caller">,
   DescFile<"ReturnUndefChecker.cpp">;
@@ -181,11 +238,11 @@ def ReturnUndefChecker : Checker<"UndefReturn">,
 let ParentPackage = Cplusplus in {
 
 def NewDeleteChecker : Checker<"NewDelete">,
-  HelpText<"Check for double-free and use-after-free problems. Traces memory managed by new/delete.">, 
+  HelpText<"Check for double-free and use-after-free problems. Traces memory managed by new/delete.">,
   DescFile<"MallocChecker.cpp">;
 
 def NewDeleteLeaksChecker : Checker<"NewDeleteLeaks">,
-  HelpText<"Check for memory leaks. Traces memory managed by new/delete.">, 
+  HelpText<"Check for memory leaks. Traces memory managed by new/delete.">,
   DescFile<"MallocChecker.cpp">;
 
 } // end: "cplusplus"
@@ -193,7 +250,7 @@ def NewDeleteLeaksChecker : Checker<"NewDeleteLeaks">,
 let ParentPackage = CplusplusAlpha in {
 
 def VirtualCallChecker : Checker<"VirtualCall">,
-  HelpText<"Check virtual function calls during construction or destruction">, 
+  HelpText<"Check virtual function calls during construction or destruction">,
   DescFile<"VirtualCallChecker.cpp">;
 
 } // end: "alpha.cplusplus"
@@ -217,6 +274,18 @@ def UnreachableCodeChecker : Checker<"UnreachableCode">,
 
 } // end "alpha.deadcode"
 
+//===----------------------------------------------------------------------===//
+// Performance checkers.
+//===----------------------------------------------------------------------===//
+
+let ParentPackage = Performance in {
+
+def PaddingChecker : Checker<"Padding">,
+  HelpText<"Check for excessively padded structs.">,
+  DescFile<"PaddingChecker.cpp">;
+
+} // end: "padding"
+
 //===----------------------------------------------------------------------===//
 // Security checkers.
 //===----------------------------------------------------------------------===//
@@ -257,7 +326,7 @@ let ParentPackage = SecurityAlpha in {
 
 def ArrayBoundChecker : Checker<"ArrayBound">,
   HelpText<"Warn about buffer overflows (older checker)">,
-  DescFile<"ArrayBoundChecker.cpp">;  
+  DescFile<"ArrayBoundChecker.cpp">;
 
 def ArrayBoundCheckerV2 : Checker<"ArrayBoundV2">,
   HelpText<"Warn about buffer overflows (newer checker)">,
@@ -298,7 +367,7 @@ def UnixAPIChecker : Checker<"API">,
 def MallocChecker: Checker<"Malloc">,
   HelpText<"Check for memory leaks, double free, and use-after-free problems. Traces memory managed by malloc()/free().">,
   DescFile<"MallocChecker.cpp">;
-  
+
 def MallocSizeofChecker : Checker<"MallocSizeof">,
   HelpText<"Check for dubious malloc arguments involving sizeof">,
   DescFile<"MallocSizeofChecker.cpp">;
@@ -306,7 +375,11 @@ def MallocSizeofChecker : Checker<"MallocSizeof">,
 def MismatchedDeallocatorChecker : Checker<"MismatchedDeallocator">,
   HelpText<"Check for mismatched deallocators.">,
   DescFile<"MallocChecker.cpp">;
-  
+
+def VforkChecker : Checker<"Vfork">,
+  HelpText<"Check for proper usage of vfork">,
+  DescFile<"VforkChecker.cpp">;
+
 } // end "unix"
 
 let ParentPackage = UnixAlpha in {
@@ -337,7 +410,7 @@ def CStringNullArg : Checker<"NullArg">,
 
 def CStringSyntaxChecker : Checker<"BadSizeArg">,
   HelpText<"Check the size argument passed into C string functions for common erroneous patterns">,
-  DescFile<"CStringSyntaxChecker.cpp">;  
+  DescFile<"CStringSyntaxChecker.cpp">;
 }
 
 let ParentPackage = CStringAlpha in {
@@ -428,6 +501,10 @@ def RetainCountChecker : Checker<"RetainCount">,
   HelpText<"Check for leaks and improper reference count management">,
   DescFile<"RetainCountChecker.cpp">;
 
+def ObjCGenericsChecker : Checker<"ObjCGenerics">,
+  HelpText<"Check for type errors when using Objective-C generics">,
+  DescFile<"DynamicTypePropagation.cpp">;
+
 } // end "osx.cocoa"
 
 let ParentPackage = CocoaAlpha in {
@@ -477,8 +554,25 @@ def ObjCContainersASTChecker : Checker<"PointerSizedValues">,
 def ObjCContainersChecker : Checker<"OutOfBounds">,
   HelpText<"Checks for index out-of-bounds when using 'CFArray' API">,
   DescFile<"ObjCContainersChecker.cpp">;
-    
+
 }
+
+let ParentPackage = LocalizabilityOptIn in {
+def NonLocalizedStringChecker : Checker<"NonLocalizedStringChecker">,
+  HelpText<"Warns about uses of non-localized NSStrings passed to UI methods expecting localized NSStrings">,
+  DescFile<"LocalizationChecker.cpp">;
+
+def EmptyLocalizationContextChecker : Checker<"EmptyLocalizationContextChecker">,
+  HelpText<"Check that NSLocalizedString macros include a comment for context">,
+  DescFile<"LocalizationChecker.cpp">;
+}
+
+let ParentPackage = LocalizabilityAlpha in {
+def PluralMisuseChecker : Checker<"PluralMisuseChecker">,
+  HelpText<"Warns against using one vs. many plural pattern in code when generating localized strings.">,
+  DescFile<"LocalizationChecker.cpp">;
+}
+
 //===----------------------------------------------------------------------===//
 // Checkers for LLVM development.
 //===----------------------------------------------------------------------===//
@@ -546,4 +640,8 @@ def ExplodedGraphViewer : Checker<"ViewExplodedGraph">,
   HelpText<"View Exploded Graphs using GraphViz">,
   DescFile<"DebugCheckers.cpp">;
 
+def BugHashDumper : Checker<"DumpBugHash">,
+  HelpText<"Dump the bug hash for all statements.">,
+  DescFile<"DebugCheckers.cpp">;
+
 } // end "debug"
diff --git a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
index 804e83c0fb2a..3ad1996db893 100644
--- a/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ChrootChecker.cpp
@@ -27,7 +27,7 @@ namespace {
 
 // enum value that represent the jail state
 enum Kind { NO_CHROOT, ROOT_CHANGED, JAIL_ENTERED };
-  
+
 bool isRootChanged(intptr_t k) { return k == ROOT_CHANGED; }
 //bool isJailEntered(intptr_t k) { return k == JAIL_ENTERED; }
 
@@ -50,7 +50,7 @@ public:
     static int x;
     return &x;
   }
-  
+
   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
 
@@ -87,8 +87,8 @@ bool ChrootChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
 void ChrootChecker::Chroot(CheckerContext &C, const CallExpr *CE) const {
   ProgramStateRef state = C.getState();
   ProgramStateManager &Mgr = state->getStateManager();
-  
-  // Once encouter a chroot(), set the enum value ROOT_CHANGED directly in 
+
+  // Once encouter a chroot(), set the enum value ROOT_CHANGED directly in
   // the GDM.
   state = Mgr.addGDM(state, ChrootChecker::getTag(), (void*) ROOT_CHANGED);
   C.addTransition(state);
@@ -106,7 +106,7 @@ void ChrootChecker::Chdir(CheckerContext &C, const CallExpr *CE) const {
   // After chdir("/"), enter the jail, set the enum value JAIL_ENTERED.
   const Expr *ArgExpr = CE->getArg(0);
   SVal ArgVal = state->getSVal(ArgExpr, C.getLocationContext());
-  
+
   if (const MemRegion *R = ArgVal.getAsRegion()) {
     R = R->StripCasts();
     if (const StringRegion* StrRegion= dyn_cast<StringRegion>(R)) {
@@ -135,12 +135,12 @@ void ChrootChecker::checkPreStmt(const CallExpr *CE, CheckerContext &C) const {
   // Ingnore chroot and chdir.
   if (FD->getIdentifier() == II_chroot || FD->getIdentifier() == II_chdir)
     return;
-  
+
   // If jail state is ROOT_CHANGED, generate BugReport.
   void *const* k = C.getState()->FindGDM(ChrootChecker::getTag());
   if (k)
     if (isRootChanged((intptr_t) *k))
-      if (ExplodedNode *N = C.addTransition()) {
+      if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
         if (!BT_BreakJail)
           BT_BreakJail.reset(new BuiltinBug(
               this, "Break out of jail", "No call of chdir(\"/\") immediately "
diff --git a/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
index f4be5b3e82f4..f2a269a3335c 100644
--- a/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp
@@ -28,36 +28,36 @@
 using namespace clang;
 using namespace ento;
 
-namespace {  
-  
+namespace {
+
 /// A simple visitor to record what VarDecls occur in EH-handling code.
 class EHCodeVisitor : public RecursiveASTVisitor<EHCodeVisitor> {
 public:
   bool inEH;
   llvm::DenseSet<const VarDecl *> &S;
-  
+
   bool TraverseObjCAtFinallyStmt(ObjCAtFinallyStmt *S) {
     SaveAndRestore<bool> inFinally(inEH, true);
     return ::RecursiveASTVisitor<EHCodeVisitor>::TraverseObjCAtFinallyStmt(S);
   }
-  
+
   bool TraverseObjCAtCatchStmt(ObjCAtCatchStmt *S) {
     SaveAndRestore<bool> inCatch(inEH, true);
     return ::RecursiveASTVisitor<EHCodeVisitor>::TraverseObjCAtCatchStmt(S);
   }
-  
+
   bool TraverseCXXCatchStmt(CXXCatchStmt *S) {
     SaveAndRestore<bool> inCatch(inEH, true);
     return TraverseStmt(S->getHandlerBlock());
   }
-  
+
   bool VisitDeclRefExpr(DeclRefExpr *DR) {
     if (inEH)
       if (const VarDecl *D = dyn_cast<VarDecl>(DR->getDecl()))
         S.insert(D);
     return true;
   }
-  
+
   EHCodeVisitor(llvm::DenseSet<const VarDecl *> &S) :
   inEH(false), S(S) {}
 };
@@ -70,9 +70,9 @@ class ReachableCode {
 public:
   ReachableCode(const CFG &cfg)
     : cfg(cfg), reachable(cfg.getNumBlockIDs(), false) {}
-  
+
   void computeReachableBlocks();
-  
+
   bool isReachable(const CFGBlock *block) const {
     return reachable[block->getBlockID()];
   }
@@ -82,7 +82,7 @@ public:
 void ReachableCode::computeReachableBlocks() {
   if (!cfg.getNumBlockIDs())
     return;
-  
+
   SmallVector<const CFGBlock*, 10> worklist;
   worklist.push_back(&cfg.getEntry());
 
@@ -160,19 +160,19 @@ public:
     // to analyze that yet.
     return InEH->count(D);
   }
-  
+
   void Report(const VarDecl *V, DeadStoreKind dsk,
               PathDiagnosticLocation L, SourceRange R) {
     if (Escaped.count(V))
       return;
-    
+
     // Compute reachable blocks within the CFG for trivial cases
     // where a bogus dead store can be reported because itself is unreachable.
     if (!reachableCode.get()) {
       reachableCode.reset(new ReachableCode(cfg));
       reachableCode->computeReachableBlocks();
     }
-    
+
     if (!reachableCode->isReachable(currentBlock))
       return;
 
@@ -196,7 +196,7 @@ public:
 
       case Enclosing:
         // Don't report issues in this case, e.g.: "if (x = foo())",
-        // where 'x' is unused later.  We have yet to see a case where 
+        // where 'x' is unused later.  We have yet to see a case where
         // this is a real bug.
         return;
     }
@@ -259,7 +259,7 @@ public:
                    const LiveVariables::LivenessValues &Live) override {
 
     currentBlock = block;
-    
+
     // Skip statements in macros.
     if (S->getLocStart().isMacroID())
       return;
@@ -276,7 +276,7 @@ public:
           const Expr *RHS =
             LookThroughTransitiveAssignmentsAndCommaOperators(B->getRHS());
           RHS = RHS->IgnoreParenCasts();
-          
+
           QualType T = VD->getType();
           if (T->isPointerType() || T->isObjCObjectPointerType()) {
             if (RHS->isNullPointerConstant(Ctx, Expr::NPC_ValueDependentIsNull))
@@ -318,27 +318,27 @@ public:
 
         if (!V)
           continue;
-          
-        if (V->hasLocalStorage()) {          
+
+        if (V->hasLocalStorage()) {
           // Reference types confuse the dead stores checker.  Skip them
           // for now.
           if (V->getType()->getAs<ReferenceType>())
             return;
-            
+
           if (const Expr *E = V->getInit()) {
             while (const ExprWithCleanups *exprClean =
                     dyn_cast<ExprWithCleanups>(E))
               E = exprClean->getSubExpr();
-            
+
             // Look through transitive assignments, e.g.:
             // int x = y = 0;
             E = LookThroughTransitiveAssignmentsAndCommaOperators(E);
-            
+
             // Don't warn on C++ objects (yet) until we can show that their
             // constructors/destructors don't have side effects.
             if (isa<CXXConstructExpr>(E))
               return;
-            
+
             // A dead initialization is a variable that is dead after it
             // is initialized.  We don't flag warnings for those variables
             // marked 'unused' or 'objc_precise_lifetime'.
@@ -401,6 +401,11 @@ public:
     // Check for '&'. Any VarDecl whose address has been taken we treat as
     // escaped.
     // FIXME: What about references?
+    if (auto *LE = dyn_cast<LambdaExpr>(S)) {
+      findLambdaReferenceCaptures(LE);
+      return;
+    }
+
     const UnaryOperator *U = dyn_cast<UnaryOperator>(S);
     if (!U)
       return;
@@ -412,6 +417,28 @@ public:
       if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl()))
         Escaped.insert(VD);
   }
+
+  // Treat local variables captured by reference in C++ lambdas as escaped.
+  void findLambdaReferenceCaptures(const LambdaExpr *LE)  {
+    const CXXRecordDecl *LambdaClass = LE->getLambdaClass();
+    llvm::DenseMap<const VarDecl *, FieldDecl *> CaptureFields;
+    FieldDecl *ThisCaptureField;
+    LambdaClass->getCaptureFields(CaptureFields, ThisCaptureField);
+
+    for (const LambdaCapture &C : LE->captures()) {
+      if (!C.capturesVariable())
+        continue;
+
+      VarDecl *VD = C.getCapturedVar();
+      const FieldDecl *FD = CaptureFields[VD];
+      if (!FD)
+        continue;
+
+      // If the capture field is a reference type, it is capture-by-reference.
+      if (FD->getType()->isReferenceType())
+        Escaped.insert(VD);
+    }
+  }
 };
 } // end anonymous namespace
 
diff --git a/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp b/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
index 51e7a3d3ce34..2eef1688d4c4 100644
--- a/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
+++ b/lib/StaticAnalyzer/Checkers/DebugCheckers.cpp
@@ -16,7 +16,10 @@
 #include "clang/Analysis/Analyses/LiveVariables.h"
 #include "clang/Analysis/CallGraph.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
 #include "llvm/Support/Process.h"
@@ -209,3 +212,36 @@ public:
 void ento::registerExplodedGraphViewer(CheckerManager &mgr) {
   mgr.registerChecker<ExplodedGraphViewer>();
 }
+
+//===----------------------------------------------------------------------===//
+// DumpBugHash 
+//===----------------------------------------------------------------------===//
+
+namespace {
+class BugHashDumper : public Checker<check::PostStmt<Stmt>> {
+public:
+  mutable std::unique_ptr<BugType> BT;
+
+  void checkPostStmt(const Stmt *S, CheckerContext &C) const {
+    if (!BT)
+      BT.reset(new BugType(this, "Dump hash components", "debug"));
+
+    ExplodedNode *N = C.generateNonFatalErrorNode();
+    if (!N)
+      return;
+
+    const LangOptions &Opts = C.getLangOpts();
+    const SourceManager &SM = C.getSourceManager();
+    FullSourceLoc FL(S->getLocStart(), SM);
+    std::string HashContent =
+        GetIssueString(SM, FL, getCheckName().getName(), BT->getCategory(),
+                       C.getLocationContext()->getDecl(), Opts);
+
+    C.emitReport(llvm::make_unique<BugReport>(*BT, HashContent, N));
+  }
+};
+}
+
+void ento::registerBugHashDumper(CheckerManager &mgr) {
+  mgr.registerChecker<BugHashDumper>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp b/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
index 2ba7ea4a4e4d..5dd28320f88f 100644
--- a/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DereferenceChecker.cpp
@@ -14,10 +14,12 @@
 
 #include "ClangSACheckers.h"
 #include "clang/AST/ExprObjC.h"
+#include "clang/AST/ExprOpenMP.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -83,14 +85,14 @@ DereferenceChecker::AddDerefSource(raw_ostream &os,
       SourceLocation L = IV->getLocation();
       Ranges.push_back(SourceRange(L, L));
       break;
-    }    
+    }
   }
 }
 
 void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
                                    CheckerContext &C, bool IsBind) const {
   // Generate an error node.
-  ExplodedNode *N = C.generateSink(State);
+  ExplodedNode *N = C.generateErrorNode(State);
   if (!N)
     return;
 
@@ -110,15 +112,11 @@ void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
     S = expr->IgnoreParenLValueCasts();
 
   if (IsBind) {
-    if (const BinaryOperator *BO = dyn_cast<BinaryOperator>(S)) {
-      if (BO->isAssignmentOp())
-        S = BO->getRHS();
-    } else if (const DeclStmt *DS = dyn_cast<DeclStmt>(S)) {
-      assert(DS->isSingleDecl() && "We process decls one by one");
-      if (const VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl()))
-        if (const Expr *Init = VD->getAnyInitializer())
-          S = Init;
-    }
+    const VarDecl *VD;
+    const Expr *Init;
+    std::tie(VD, Init) = parseAssignment(S);
+    if (VD && Init)
+      S = Init;
   }
 
   switch (S->getStmtClass()) {
@@ -130,6 +128,14 @@ void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
     os << " results in a null pointer dereference";
     break;
   }
+  case Stmt::OMPArraySectionExprClass: {
+    os << "Array access";
+    const OMPArraySectionExpr *AE = cast<OMPArraySectionExpr>(S);
+    AddDerefSource(os, Ranges, AE->getBase()->IgnoreParenCasts(),
+                   State.get(), N->getLocationContext());
+    os << " results in a null pointer dereference";
+    break;
+  }
   case Stmt::UnaryOperatorClass: {
     os << "Dereference of null pointer";
     const UnaryOperator *U = cast<UnaryOperator>(S);
@@ -159,7 +165,6 @@ void DereferenceChecker::reportBug(ProgramStateRef State, const Stmt *S,
     break;
   }
 
-  os.flush();
   auto report = llvm::make_unique<BugReport>(
       *BT_null, buf.empty() ? BT_null->getDescription() : StringRef(buf), N);
 
@@ -176,7 +181,7 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
                                        CheckerContext &C) const {
   // Check for dereference of an undefined value.
   if (l.isUndef()) {
-    if (ExplodedNode *N = C.generateSink()) {
+    if (ExplodedNode *N = C.generateErrorNode()) {
       if (!BT_undef)
         BT_undef.reset(
             new BuiltinBug(this, "Dereference of undefined pointer value"));
@@ -211,8 +216,9 @@ void DereferenceChecker::checkLocation(SVal l, bool isLoad, const Stmt* S,
     // Otherwise, we have the case where the location could either be
     // null or not-null.  Record the error node as an "implicit" null
     // dereference.
-    if (ExplodedNode *N = C.generateSink(nullState)) {
-      ImplicitNullDerefEvent event = { l, isLoad, N, &C.getBugReporter() };
+    if (ExplodedNode *N = C.generateSink(nullState, C.getPredecessor())) {
+      ImplicitNullDerefEvent event = {l, isLoad, N, &C.getBugReporter(),
+                                      /*IsDirectDereference=*/false};
       dispatchEvent(event);
     }
   }
@@ -248,9 +254,10 @@ void DereferenceChecker::checkBind(SVal L, SVal V, const Stmt *S,
 
     // At this point the value could be either null or non-null.
     // Record this as an "implicit" null dereference.
-    if (ExplodedNode *N = C.generateSink(StNull)) {
-      ImplicitNullDerefEvent event = { V, /*isLoad=*/true, N,
-                                       &C.getBugReporter() };
+    if (ExplodedNode *N = C.generateSink(StNull, C.getPredecessor())) {
+      ImplicitNullDerefEvent event = {V, /*isLoad=*/true, N,
+                                      &C.getBugReporter(),
+                                      /*IsDirectDereference=*/false};
       dispatchEvent(event);
     }
   }
diff --git a/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp b/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
index a71def23c0bc..ad478cbf7829 100644
--- a/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
+++ b/lib/StaticAnalyzer/Checkers/DirectIvarAssignment.cpp
@@ -41,13 +41,12 @@ namespace {
 /// Checks for the init, dealloc, and any other functions that might be allowed
 /// to perform direct instance variable assignment based on their name.
 static bool DefaultMethodFilter(const ObjCMethodDecl *M) {
-  if (M->getMethodFamily() == OMF_init || M->getMethodFamily() == OMF_dealloc ||
-      M->getMethodFamily() == OMF_copy ||
-      M->getMethodFamily() == OMF_mutableCopy ||
-      M->getSelector().getNameForSlot(0).find("init") != StringRef::npos ||
-      M->getSelector().getNameForSlot(0).find("Init") != StringRef::npos)
-    return true;
-  return false;
+  return M->getMethodFamily() == OMF_init ||
+         M->getMethodFamily() == OMF_dealloc ||
+         M->getMethodFamily() == OMF_copy ||
+         M->getMethodFamily() == OMF_mutableCopy ||
+         M->getSelector().getNameForSlot(0).find("init") != StringRef::npos ||
+         M->getSelector().getNameForSlot(0).find("Init") != StringRef::npos;
 }
 
 class DirectIvarAssignment :
diff --git a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
index 79f9479b1448..598502305633 100644
--- a/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/DivZeroChecker.cpp
@@ -29,13 +29,13 @@ class DivZeroChecker : public Checker< check::PreStmt<BinaryOperator> > {
                  CheckerContext &C) const ;
 public:
   void checkPreStmt(const BinaryOperator *B, CheckerContext &C) const;
-};  
+};
 } // end anonymous namespace
 
 void DivZeroChecker::reportBug(const char *Msg,
                                ProgramStateRef StateZero,
                                CheckerContext &C) const {
-  if (ExplodedNode *N = C.generateSink(StateZero)) {
+  if (ExplodedNode *N = C.generateErrorNode(StateZero)) {
     if (!BT)
       BT.reset(new BuiltinBug(this, "Division by zero"));
 
diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp
new file mode 100644
index 000000000000..7e0cb8e93395
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/DynamicTypeChecker.cpp
@@ -0,0 +1,213 @@
+//== DynamicTypeChecker.cpp ------------------------------------ -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This checker looks for cases where the dynamic type of an object is unrelated
+// to its static type. The type information utilized by this check is collected
+// by the DynamicTypePropagation checker. This check does not report any type
+// error for ObjC Generic types, in order to avoid duplicate erros from the
+// ObjC Generics checker. This checker is not supposed to modify the program
+// state, it is just the observer of the type information provided by other
+// checkers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class DynamicTypeChecker : public Checker<check::PostStmt<ImplicitCastExpr>> {
+  mutable std::unique_ptr<BugType> BT;
+  void initBugType() const {
+    if (!BT)
+      BT.reset(
+          new BugType(this, "Dynamic and static type mismatch", "Type Error"));
+  }
+
+  class DynamicTypeBugVisitor
+      : public BugReporterVisitorImpl<DynamicTypeBugVisitor> {
+  public:
+    DynamicTypeBugVisitor(const MemRegion *Reg) : Reg(Reg) {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+      ID.AddPointer(Reg);
+    }
+
+    PathDiagnosticPiece *VisitNode(const ExplodedNode *N,
+                                   const ExplodedNode *PrevN,
+                                   BugReporterContext &BRC,
+                                   BugReport &BR) override;
+
+  private:
+    // The tracked region.
+    const MemRegion *Reg;
+  };
+
+  void reportTypeError(QualType DynamicType, QualType StaticType,
+                       const MemRegion *Reg, const Stmt *ReportedNode,
+                       CheckerContext &C) const;
+
+public:
+  void checkPostStmt(const ImplicitCastExpr *CE, CheckerContext &C) const;
+};
+}
+
+void DynamicTypeChecker::reportTypeError(QualType DynamicType,
+                                         QualType StaticType,
+                                         const MemRegion *Reg,
+                                         const Stmt *ReportedNode,
+                                         CheckerContext &C) const {
+  initBugType();
+  SmallString<192> Buf;
+  llvm::raw_svector_ostream OS(Buf);
+  OS << "Object has a dynamic type '";
+  QualType::print(DynamicType.getTypePtr(), Qualifiers(), OS, C.getLangOpts(),
+                  llvm::Twine());
+  OS << "' which is incompatible with static type '";
+  QualType::print(StaticType.getTypePtr(), Qualifiers(), OS, C.getLangOpts(),
+                  llvm::Twine());
+  OS << "'";
+  std::unique_ptr<BugReport> R(
+      new BugReport(*BT, OS.str(), C.generateNonFatalErrorNode()));
+  R->markInteresting(Reg);
+  R->addVisitor(llvm::make_unique<DynamicTypeBugVisitor>(Reg));
+  R->addRange(ReportedNode->getSourceRange());
+  C.emitReport(std::move(R));
+}
+
+PathDiagnosticPiece *DynamicTypeChecker::DynamicTypeBugVisitor::VisitNode(
+    const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
+    BugReport &BR) {
+  ProgramStateRef State = N->getState();
+  ProgramStateRef StatePrev = PrevN->getState();
+
+  DynamicTypeInfo TrackedType = getDynamicTypeInfo(State, Reg);
+  DynamicTypeInfo TrackedTypePrev = getDynamicTypeInfo(StatePrev, Reg);
+  if (!TrackedType.isValid())
+    return nullptr;
+
+  if (TrackedTypePrev.isValid() &&
+      TrackedTypePrev.getType() == TrackedType.getType())
+    return nullptr;
+
+  // Retrieve the associated statement.
+  const Stmt *S = nullptr;
+  ProgramPoint ProgLoc = N->getLocation();
+  if (Optional<StmtPoint> SP = ProgLoc.getAs<StmtPoint>()) {
+    S = SP->getStmt();
+  }
+
+  if (!S)
+    return nullptr;
+
+  const LangOptions &LangOpts = BRC.getASTContext().getLangOpts();
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream OS(Buf);
+  OS << "Type '";
+  QualType::print(TrackedType.getType().getTypePtr(), Qualifiers(), OS,
+                  LangOpts, llvm::Twine());
+  OS << "' is inferred from ";
+
+  if (const auto *ExplicitCast = dyn_cast<ExplicitCastExpr>(S)) {
+    OS << "explicit cast (from '";
+    QualType::print(ExplicitCast->getSubExpr()->getType().getTypePtr(),
+                    Qualifiers(), OS, LangOpts, llvm::Twine());
+    OS << "' to '";
+    QualType::print(ExplicitCast->getType().getTypePtr(), Qualifiers(), OS,
+                    LangOpts, llvm::Twine());
+    OS << "')";
+  } else if (const auto *ImplicitCast = dyn_cast<ImplicitCastExpr>(S)) {
+    OS << "implicit cast (from '";
+    QualType::print(ImplicitCast->getSubExpr()->getType().getTypePtr(),
+                    Qualifiers(), OS, LangOpts, llvm::Twine());
+    OS << "' to '";
+    QualType::print(ImplicitCast->getType().getTypePtr(), Qualifiers(), OS,
+                    LangOpts, llvm::Twine());
+    OS << "')";
+  } else {
+    OS << "this context";
+  }
+
+  // Generate the extra diagnostic.
+  PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
+                             N->getLocationContext());
+  return new PathDiagnosticEventPiece(Pos, OS.str(), true, nullptr);
+}
+
+static bool hasDefinition(const ObjCObjectPointerType *ObjPtr) {
+  const ObjCInterfaceDecl *Decl = ObjPtr->getInterfaceDecl();
+  if (!Decl)
+    return false;
+
+  return Decl->getDefinition();
+}
+
+// TODO: consider checking explicit casts?
+void DynamicTypeChecker::checkPostStmt(const ImplicitCastExpr *CE,
+                                       CheckerContext &C) const {
+  // TODO: C++ support.
+  if (CE->getCastKind() != CK_BitCast)
+    return;
+
+  const MemRegion *Region = C.getSVal(CE).getAsRegion();
+  if (!Region)
+    return;
+
+  ProgramStateRef State = C.getState();
+  DynamicTypeInfo DynTypeInfo = getDynamicTypeInfo(State, Region);
+
+  if (!DynTypeInfo.isValid())
+    return;
+
+  QualType DynType = DynTypeInfo.getType();
+  QualType StaticType = CE->getType();
+
+  const auto *DynObjCType = DynType->getAs<ObjCObjectPointerType>();
+  const auto *StaticObjCType = StaticType->getAs<ObjCObjectPointerType>();
+
+  if (!DynObjCType || !StaticObjCType)
+    return;
+
+  if (!hasDefinition(DynObjCType) || !hasDefinition(StaticObjCType))
+    return;
+
+  ASTContext &ASTCtxt = C.getASTContext();
+
+  // Strip kindeofness to correctly detect subtyping relationships.
+  DynObjCType = DynObjCType->stripObjCKindOfTypeAndQuals(ASTCtxt);
+  StaticObjCType = StaticObjCType->stripObjCKindOfTypeAndQuals(ASTCtxt);
+
+  // Specialized objects are handled by the generics checker.
+  if (StaticObjCType->isSpecialized())
+    return;
+
+  if (ASTCtxt.canAssignObjCInterfaces(StaticObjCType, DynObjCType))
+    return;
+
+  if (DynTypeInfo.canBeASubClass() &&
+      ASTCtxt.canAssignObjCInterfaces(DynObjCType, StaticObjCType))
+    return;
+
+  reportTypeError(DynType, StaticType, Region, CE, C);
+}
+
+void ento::registerDynamicTypeChecker(CheckerManager &mgr) {
+  mgr.registerChecker<DynamicTypeChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
index 43a281218775..30f629830c61 100644
--- a/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
+++ b/lib/StaticAnalyzer/Checkers/DynamicTypePropagation.cpp
@@ -7,42 +7,139 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// This file contains two checkers. One helps the static analyzer core to track
+// types, the other does type inference on Obj-C generics and report type
+// errors.
+//
+// Dynamic Type Propagation:
 // This checker defines the rules for dynamic type gathering and propagation.
 //
+// Generics Checker for Objective-C:
+// This checker tries to find type errors that the compiler is not able to catch
+// due to the implicit conversions that were introduced for backward
+// compatibility.
+//
 //===----------------------------------------------------------------------===//
 
 #include "ClangSACheckers.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
 
 using namespace clang;
 using namespace ento;
 
+// ProgramState trait - The type inflation is tracked by DynamicTypeMap. This is
+// an auxiliary map that tracks more information about generic types, because in
+// some cases the most derived type is not the most informative one about the
+// type parameters. This types that are stored for each symbol in this map must
+// be specialized.
+// TODO: In some case the type stored in this map is exactly the same that is
+// stored in DynamicTypeMap. We should no store duplicated information in those
+// cases.
+REGISTER_MAP_WITH_PROGRAMSTATE(MostSpecializedTypeArgsMap, SymbolRef,
+                               const ObjCObjectPointerType *)
+
 namespace {
 class DynamicTypePropagation:
     public Checker< check::PreCall,
                     check::PostCall,
-                    check::PostStmt<ImplicitCastExpr>,
-                    check::PostStmt<CXXNewExpr> > {
+                    check::DeadSymbols,
+                    check::PostStmt<CastExpr>,
+                    check::PostStmt<CXXNewExpr>,
+                    check::PreObjCMessage,
+                    check::PostObjCMessage > {
   const ObjCObjectType *getObjectTypeForAllocAndNew(const ObjCMessageExpr *MsgE,
                                                     CheckerContext &C) const;
 
   /// \brief Return a better dynamic type if one can be derived from the cast.
   const ObjCObjectPointerType *getBetterObjCType(const Expr *CastE,
                                                  CheckerContext &C) const;
+
+  ExplodedNode *dynamicTypePropagationOnCasts(const CastExpr *CE,
+                                              ProgramStateRef &State,
+                                              CheckerContext &C) const;
+
+  mutable std::unique_ptr<BugType> ObjCGenericsBugType;
+  void initBugType() const {
+    if (!ObjCGenericsBugType)
+      ObjCGenericsBugType.reset(
+          new BugType(this, "Generics", categories::CoreFoundationObjectiveC));
+  }
+
+  class GenericsBugVisitor : public BugReporterVisitorImpl<GenericsBugVisitor> {
+  public:
+    GenericsBugVisitor(SymbolRef S) : Sym(S) {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+      ID.AddPointer(Sym);
+    }
+
+    PathDiagnosticPiece *VisitNode(const ExplodedNode *N,
+                                   const ExplodedNode *PrevN,
+                                   BugReporterContext &BRC,
+                                   BugReport &BR) override;
+
+  private:
+    // The tracked symbol.
+    SymbolRef Sym;
+  };
+
+  void reportGenericsBug(const ObjCObjectPointerType *From,
+                         const ObjCObjectPointerType *To, ExplodedNode *N,
+                         SymbolRef Sym, CheckerContext &C,
+                         const Stmt *ReportedNode = nullptr) const;
 public:
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
-  void checkPostStmt(const ImplicitCastExpr *CastE, CheckerContext &C) const;
+  void checkPostStmt(const CastExpr *CastE, CheckerContext &C) const;
   void checkPostStmt(const CXXNewExpr *NewE, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
+  void checkPreObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+  void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+
+  /// This value is set to true, when the Generics checker is turned on.
+  DefaultBool CheckGenerics;
 };
 }
 
+void DynamicTypePropagation::checkDeadSymbols(SymbolReaper &SR,
+                                              CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  DynamicTypeMapImpl TypeMap = State->get<DynamicTypeMap>();
+  for (DynamicTypeMapImpl::iterator I = TypeMap.begin(), E = TypeMap.end();
+       I != E; ++I) {
+    if (!SR.isLiveRegion(I->first)) {
+      State = State->remove<DynamicTypeMap>(I->first);
+    }
+  }
+
+  if (!SR.hasDeadSymbols()) {
+    C.addTransition(State);
+    return;
+  }
+
+  MostSpecializedTypeArgsMapTy TyArgMap =
+      State->get<MostSpecializedTypeArgsMap>();
+  for (MostSpecializedTypeArgsMapTy::iterator I = TyArgMap.begin(),
+                                              E = TyArgMap.end();
+       I != E; ++I) {
+    if (SR.isDead(I->first)) {
+      State = State->remove<MostSpecializedTypeArgsMap>(I->first);
+    }
+  }
+
+  C.addTransition(State);
+}
+
 static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD,
                             CheckerContext &C) {
   assert(Region);
@@ -52,7 +149,7 @@ static void recordFixedType(const MemRegion *Region, const CXXMethodDecl *MD,
   QualType Ty = Ctx.getPointerType(Ctx.getRecordType(MD->getParent()));
 
   ProgramStateRef State = C.getState();
-  State = State->setDynamicTypeInfo(Region, Ty, /*CanBeSubclass=*/false);
+  State = setDynamicTypeInfo(State, Region, Ty, /*CanBeSubclass=*/false);
   C.addTransition(State);
   return;
 }
@@ -113,7 +210,7 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
 
     ProgramStateRef State = C.getState();
     const ObjCMethodDecl *D = Msg->getDecl();
-    
+
     if (D && D->hasRelatedResultType()) {
       switch (Msg->getMethodFamily()) {
       default:
@@ -131,7 +228,7 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
           return;
         QualType DynResTy =
                  C.getASTContext().getObjCObjectPointerType(QualType(ObjTy, 0));
-        C.addTransition(State->setDynamicTypeInfo(RetReg, DynResTy, false));
+        C.addTransition(setDynamicTypeInfo(State, RetReg, DynResTy, false));
         break;
       }
       case OMF_init: {
@@ -140,8 +237,8 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
         const MemRegion *RecReg = Msg->getReceiverSVal().getAsRegion();
         if (!RecReg)
           return;
-        DynamicTypeInfo RecDynType = State->getDynamicTypeInfo(RecReg);
-        C.addTransition(State->setDynamicTypeInfo(RetReg, RecDynType));
+        DynamicTypeInfo RecDynType = getDynamicTypeInfo(State, RecReg);
+        C.addTransition(setDynamicTypeInfo(State, RetReg, RecDynType));
         break;
       }
       }
@@ -173,23 +270,25 @@ void DynamicTypePropagation::checkPostCall(const CallEvent &Call,
   }
 }
 
-void DynamicTypePropagation::checkPostStmt(const ImplicitCastExpr *CastE,
-                                           CheckerContext &C) const {
-  // We only track dynamic type info for regions.
-  const MemRegion *ToR = C.getSVal(CastE).getAsRegion();
+/// TODO: Handle explicit casts.
+///       Handle C++ casts.
+///
+/// Precondition: the cast is between ObjCObjectPointers.
+ExplodedNode *DynamicTypePropagation::dynamicTypePropagationOnCasts(
+    const CastExpr *CE, ProgramStateRef &State, CheckerContext &C) const {
+  // We only track type info for regions.
+  const MemRegion *ToR = C.getSVal(CE).getAsRegion();
   if (!ToR)
-    return;
+    return C.getPredecessor();
 
-  switch (CastE->getCastKind()) {
-  default:
-    break;
-  case CK_BitCast:
-    // Only handle ObjCObjects for now.
-    if (const Type *NewTy = getBetterObjCType(CastE, C))
-      C.addTransition(C.getState()->setDynamicTypeInfo(ToR, QualType(NewTy,0)));
-    break;
+  if (isa<ExplicitCastExpr>(CE))
+    return C.getPredecessor();
+
+  if (const Type *NewTy = getBetterObjCType(CE, C)) {
+    State = setDynamicTypeInfo(State, ToR, QualType(NewTy, 0));
+    return C.addTransition(State);
   }
-  return;
+  return C.getPredecessor();
 }
 
 void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE,
@@ -201,9 +300,9 @@ void DynamicTypePropagation::checkPostStmt(const CXXNewExpr *NewE,
   const MemRegion *MR = C.getSVal(NewE).getAsRegion();
   if (!MR)
     return;
-  
-  C.addTransition(C.getState()->setDynamicTypeInfo(MR, NewE->getType(),
-                                                   /*CanBeSubclass=*/false));
+
+  C.addTransition(setDynamicTypeInfo(C.getState(), MR, NewE->getType(),
+                                     /*CanBeSubclass=*/false));
 }
 
 const ObjCObjectType *
@@ -254,7 +353,7 @@ DynamicTypePropagation::getBetterObjCType(const Expr *CastE,
       CastE->getType()->getAs<ObjCObjectPointerType>();
   if (!NewTy)
     return nullptr;
-  QualType OldDTy = C.getState()->getDynamicTypeInfo(ToR).getType();
+  QualType OldDTy = getDynamicTypeInfo(C.getState(), ToR).getType();
   if (OldDTy.isNull()) {
     return NewTy;
   }
@@ -276,6 +375,566 @@ DynamicTypePropagation::getBetterObjCType(const Expr *CastE,
   return nullptr;
 }
 
+static const ObjCObjectPointerType *getMostInformativeDerivedClassImpl(
+    const ObjCObjectPointerType *From, const ObjCObjectPointerType *To,
+    const ObjCObjectPointerType *MostInformativeCandidate, ASTContext &C) {
+  // Checking if from and to are the same classes modulo specialization.
+  if (From->getInterfaceDecl()->getCanonicalDecl() ==
+      To->getInterfaceDecl()->getCanonicalDecl()) {
+    if (To->isSpecialized()) {
+      assert(MostInformativeCandidate->isSpecialized());
+      return MostInformativeCandidate;
+    }
+    return From;
+  }
+  const auto *SuperOfTo =
+      To->getObjectType()->getSuperClassType()->getAs<ObjCObjectType>();
+  assert(SuperOfTo);
+  QualType SuperPtrOfToQual =
+      C.getObjCObjectPointerType(QualType(SuperOfTo, 0));
+  const auto *SuperPtrOfTo = SuperPtrOfToQual->getAs<ObjCObjectPointerType>();
+  if (To->isUnspecialized())
+    return getMostInformativeDerivedClassImpl(From, SuperPtrOfTo, SuperPtrOfTo,
+                                              C);
+  else
+    return getMostInformativeDerivedClassImpl(From, SuperPtrOfTo,
+                                              MostInformativeCandidate, C);
+}
+
+/// A downcast may loose specialization information. E. g.:
+///   MutableMap<T, U> : Map
+/// The downcast to MutableMap looses the information about the types of the
+/// Map (due to the type parameters are not being forwarded to Map), and in
+/// general there is no way to recover that information from the
+/// declaration. In order to have to most information, lets find the most
+/// derived type that has all the type parameters forwarded.
+///
+/// Get the a subclass of \p From (which has a lower bound \p To) that do not
+/// loose information about type parameters. \p To has to be a subclass of
+/// \p From. From has to be specialized.
+static const ObjCObjectPointerType *
+getMostInformativeDerivedClass(const ObjCObjectPointerType *From,
+                               const ObjCObjectPointerType *To, ASTContext &C) {
+  return getMostInformativeDerivedClassImpl(From, To, To, C);
+}
+
+/// Inputs:
+///   \param StaticLowerBound Static lower bound for a symbol. The dynamic lower
+///   bound might be the subclass of this type.
+///   \param StaticUpperBound A static upper bound for a symbol.
+///   \p StaticLowerBound expected to be the subclass of \p StaticUpperBound.
+///   \param Current The type that was inferred for a symbol in a previous
+///   context. Might be null when this is the first time that inference happens.
+/// Precondition:
+///   \p StaticLowerBound or \p StaticUpperBound is specialized. If \p Current
+///   is not null, it is specialized.
+/// Possible cases:
+///   (1) The \p Current is null and \p StaticLowerBound <: \p StaticUpperBound
+///   (2) \p StaticLowerBound <: \p Current <: \p StaticUpperBound
+///   (3) \p Current <: \p StaticLowerBound <: \p StaticUpperBound
+///   (4) \p StaticLowerBound <: \p StaticUpperBound <: \p Current
+/// Effect:
+///   Use getMostInformativeDerivedClass with the upper and lower bound of the
+///   set {\p StaticLowerBound, \p Current, \p StaticUpperBound}. The computed
+///   lower bound must be specialized. If the result differs from \p Current or
+///   \p Current is null, store the result.
+static bool
+storeWhenMoreInformative(ProgramStateRef &State, SymbolRef Sym,
+                         const ObjCObjectPointerType *const *Current,
+                         const ObjCObjectPointerType *StaticLowerBound,
+                         const ObjCObjectPointerType *StaticUpperBound,
+                         ASTContext &C) {
+  // Precondition
+  assert(StaticUpperBound->isSpecialized() ||
+         StaticLowerBound->isSpecialized());
+  assert(!Current || (*Current)->isSpecialized());
+
+  // Case (1)
+  if (!Current) {
+    if (StaticUpperBound->isUnspecialized()) {
+      State = State->set<MostSpecializedTypeArgsMap>(Sym, StaticLowerBound);
+      return true;
+    }
+    // Upper bound is specialized.
+    const ObjCObjectPointerType *WithMostInfo =
+        getMostInformativeDerivedClass(StaticUpperBound, StaticLowerBound, C);
+    State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo);
+    return true;
+  }
+
+  // Case (3)
+  if (C.canAssignObjCInterfaces(StaticLowerBound, *Current)) {
+    return false;
+  }
+
+  // Case (4)
+  if (C.canAssignObjCInterfaces(*Current, StaticUpperBound)) {
+    // The type arguments might not be forwarded at any point of inheritance.
+    const ObjCObjectPointerType *WithMostInfo =
+        getMostInformativeDerivedClass(*Current, StaticUpperBound, C);
+    WithMostInfo =
+        getMostInformativeDerivedClass(WithMostInfo, StaticLowerBound, C);
+    if (WithMostInfo == *Current)
+      return false;
+    State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo);
+    return true;
+  }
+
+  // Case (2)
+  const ObjCObjectPointerType *WithMostInfo =
+      getMostInformativeDerivedClass(*Current, StaticLowerBound, C);
+  if (WithMostInfo != *Current) {
+    State = State->set<MostSpecializedTypeArgsMap>(Sym, WithMostInfo);
+    return true;
+  }
+
+  return false;
+}
+
+/// Type inference based on static type information that is available for the
+/// cast and the tracked type information for the given symbol. When the tracked
+/// symbol and the destination type of the cast are unrelated, report an error.
+void DynamicTypePropagation::checkPostStmt(const CastExpr *CE,
+                                           CheckerContext &C) const {
+  if (CE->getCastKind() != CK_BitCast)
+    return;
+
+  QualType OriginType = CE->getSubExpr()->getType();
+  QualType DestType = CE->getType();
+
+  const auto *OrigObjectPtrType = OriginType->getAs<ObjCObjectPointerType>();
+  const auto *DestObjectPtrType = DestType->getAs<ObjCObjectPointerType>();
+
+  if (!OrigObjectPtrType || !DestObjectPtrType)
+    return;
+
+  ProgramStateRef State = C.getState();
+  ExplodedNode *AfterTypeProp = dynamicTypePropagationOnCasts(CE, State, C);
+
+  ASTContext &ASTCtxt = C.getASTContext();
+
+  // This checker detects the subtyping relationships using the assignment
+  // rules. In order to be able to do this the kindofness must be stripped
+  // first. The checker treats every type as kindof type anyways: when the
+  // tracked type is the subtype of the static type it tries to look up the
+  // methods in the tracked type first.
+  OrigObjectPtrType = OrigObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt);
+  DestObjectPtrType = DestObjectPtrType->stripObjCKindOfTypeAndQuals(ASTCtxt);
+
+  // TODO: erase tracked information when there is a cast to unrelated type
+  //       and everything is unspecialized statically.
+  if (OrigObjectPtrType->isUnspecialized() &&
+      DestObjectPtrType->isUnspecialized())
+    return;
+
+  SymbolRef Sym = State->getSVal(CE, C.getLocationContext()).getAsSymbol();
+  if (!Sym)
+    return;
+
+  // Check which assignments are legal.
+  bool OrigToDest =
+      ASTCtxt.canAssignObjCInterfaces(DestObjectPtrType, OrigObjectPtrType);
+  bool DestToOrig =
+      ASTCtxt.canAssignObjCInterfaces(OrigObjectPtrType, DestObjectPtrType);
+  const ObjCObjectPointerType *const *TrackedType =
+      State->get<MostSpecializedTypeArgsMap>(Sym);
+
+  // Downcasts and upcasts handled in an uniform way regardless of being
+  // explicit. Explicit casts however can happen between mismatched types.
+  if (isa<ExplicitCastExpr>(CE) && !OrigToDest && !DestToOrig) {
+    // Mismatched types. If the DestType specialized, store it. Forget the
+    // tracked type otherwise.
+    if (DestObjectPtrType->isSpecialized()) {
+      State = State->set<MostSpecializedTypeArgsMap>(Sym, DestObjectPtrType);
+      C.addTransition(State, AfterTypeProp);
+    } else if (TrackedType) {
+      State = State->remove<MostSpecializedTypeArgsMap>(Sym);
+      C.addTransition(State, AfterTypeProp);
+    }
+    return;
+  }
+
+  // The tracked type should be the sub or super class of the static destination
+  // type. When an (implicit) upcast or a downcast happens according to static
+  // types, and there is no subtyping relationship between the tracked and the
+  // static destination types, it indicates an error.
+  if (TrackedType &&
+      !ASTCtxt.canAssignObjCInterfaces(DestObjectPtrType, *TrackedType) &&
+      !ASTCtxt.canAssignObjCInterfaces(*TrackedType, DestObjectPtrType)) {
+    static CheckerProgramPointTag IllegalConv(this, "IllegalConversion");
+    ExplodedNode *N = C.addTransition(State, AfterTypeProp, &IllegalConv);
+    reportGenericsBug(*TrackedType, DestObjectPtrType, N, Sym, C);
+    return;
+  }
+
+  // Handle downcasts and upcasts.
+
+  const ObjCObjectPointerType *LowerBound = DestObjectPtrType;
+  const ObjCObjectPointerType *UpperBound = OrigObjectPtrType;
+  if (OrigToDest && !DestToOrig)
+    std::swap(LowerBound, UpperBound);
+
+  // The id type is not a real bound. Eliminate it.
+  LowerBound = LowerBound->isObjCIdType() ? UpperBound : LowerBound;
+  UpperBound = UpperBound->isObjCIdType() ? LowerBound : UpperBound;
+
+  if (storeWhenMoreInformative(State, Sym, TrackedType, LowerBound, UpperBound,
+                               ASTCtxt)) {
+    C.addTransition(State, AfterTypeProp);
+  }
+}
+
+static const Expr *stripCastsAndSugar(const Expr *E) {
+  E = E->IgnoreParenImpCasts();
+  if (const PseudoObjectExpr *POE = dyn_cast<PseudoObjectExpr>(E))
+    E = POE->getSyntacticForm()->IgnoreParenImpCasts();
+  if (const OpaqueValueExpr *OVE = dyn_cast<OpaqueValueExpr>(E))
+    E = OVE->getSourceExpr()->IgnoreParenImpCasts();
+  return E;
+}
+
+static bool isObjCTypeParamDependent(QualType Type) {
+  // It is illegal to typedef parameterized types inside an interface. Therfore
+  // an Objective-C type can only be dependent on a type parameter when the type
+  // parameter structurally present in the type itself.
+  class IsObjCTypeParamDependentTypeVisitor
+      : public RecursiveASTVisitor<IsObjCTypeParamDependentTypeVisitor> {
+  public:
+    IsObjCTypeParamDependentTypeVisitor() : Result(false) {}
+    bool VisitTypedefType(const TypedefType *Type) {
+      if (isa<ObjCTypeParamDecl>(Type->getDecl())) {
+        Result = true;
+        return false;
+      }
+      return true;
+    }
+
+    bool Result;
+  };
+
+  IsObjCTypeParamDependentTypeVisitor Visitor;
+  Visitor.TraverseType(Type);
+  return Visitor.Result;
+}
+
+/// A method might not be available in the interface indicated by the static
+/// type. However it might be available in the tracked type. In order to
+/// properly substitute the type parameters we need the declaration context of
+/// the method. The more specialized the enclosing class of the method is, the
+/// more likely that the parameter substitution will be successful.
+static const ObjCMethodDecl *
+findMethodDecl(const ObjCMessageExpr *MessageExpr,
+               const ObjCObjectPointerType *TrackedType, ASTContext &ASTCtxt) {
+  const ObjCMethodDecl *Method = nullptr;
+
+  QualType ReceiverType = MessageExpr->getReceiverType();
+  const auto *ReceiverObjectPtrType =
+      ReceiverType->getAs<ObjCObjectPointerType>();
+
+  // Do this "devirtualization" on instance and class methods only. Trust the
+  // static type on super and super class calls.
+  if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Instance ||
+      MessageExpr->getReceiverKind() == ObjCMessageExpr::Class) {
+    // When the receiver type is id, Class, or some super class of the tracked
+    // type, look up the method in the tracked type, not in the receiver type.
+    // This way we preserve more information.
+    if (ReceiverType->isObjCIdType() || ReceiverType->isObjCClassType() ||
+        ASTCtxt.canAssignObjCInterfaces(ReceiverObjectPtrType, TrackedType)) {
+      const ObjCInterfaceDecl *InterfaceDecl = TrackedType->getInterfaceDecl();
+      // The method might not be found.
+      Selector Sel = MessageExpr->getSelector();
+      Method = InterfaceDecl->lookupInstanceMethod(Sel);
+      if (!Method)
+        Method = InterfaceDecl->lookupClassMethod(Sel);
+    }
+  }
+
+  // Fallback to statick method lookup when the one based on the tracked type
+  // failed.
+  return Method ? Method : MessageExpr->getMethodDecl();
+}
+
+/// Get the returned ObjCObjectPointerType by a method based on the tracked type
+/// information, or null pointer when the returned type is not an
+/// ObjCObjectPointerType.
+static QualType getReturnTypeForMethod(
+    const ObjCMethodDecl *Method, ArrayRef<QualType> TypeArgs,
+    const ObjCObjectPointerType *SelfType, ASTContext &C) {
+  QualType StaticResultType = Method->getReturnType();
+
+  // Is the return type declared as instance type?
+  if (StaticResultType == C.getObjCInstanceType())
+    return QualType(SelfType, 0);
+
+  // Check whether the result type depends on a type parameter.
+  if (!isObjCTypeParamDependent(StaticResultType))
+    return QualType();
+
+  QualType ResultType = StaticResultType.substObjCTypeArgs(
+      C, TypeArgs, ObjCSubstitutionContext::Result);
+
+  return ResultType;
+}
+
+/// When the receiver has a tracked type, use that type to validate the
+/// argumments of the message expression and the return value.
+void DynamicTypePropagation::checkPreObjCMessage(const ObjCMethodCall &M,
+                                                 CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  SymbolRef Sym = M.getReceiverSVal().getAsSymbol();
+  if (!Sym)
+    return;
+
+  const ObjCObjectPointerType *const *TrackedType =
+      State->get<MostSpecializedTypeArgsMap>(Sym);
+  if (!TrackedType)
+    return;
+
+  // Get the type arguments from tracked type and substitute type arguments
+  // before do the semantic check.
+
+  ASTContext &ASTCtxt = C.getASTContext();
+  const ObjCMessageExpr *MessageExpr = M.getOriginExpr();
+  const ObjCMethodDecl *Method =
+      findMethodDecl(MessageExpr, *TrackedType, ASTCtxt);
+
+  // It is possible to call non-existent methods in Obj-C.
+  if (!Method)
+    return;
+
+  Optional<ArrayRef<QualType>> TypeArgs =
+      (*TrackedType)->getObjCSubstitutions(Method->getDeclContext());
+  // This case might happen when there is an unspecialized override of a
+  // specialized method.
+  if (!TypeArgs)
+    return;
+
+  for (unsigned i = 0; i < Method->param_size(); i++) {
+    const Expr *Arg = MessageExpr->getArg(i);
+    const ParmVarDecl *Param = Method->parameters()[i];
+
+    QualType OrigParamType = Param->getType();
+    if (!isObjCTypeParamDependent(OrigParamType))
+      continue;
+
+    QualType ParamType = OrigParamType.substObjCTypeArgs(
+        ASTCtxt, *TypeArgs, ObjCSubstitutionContext::Parameter);
+    // Check if it can be assigned
+    const auto *ParamObjectPtrType = ParamType->getAs<ObjCObjectPointerType>();
+    const auto *ArgObjectPtrType =
+        stripCastsAndSugar(Arg)->getType()->getAs<ObjCObjectPointerType>();
+    if (!ParamObjectPtrType || !ArgObjectPtrType)
+      continue;
+
+    // Check if we have more concrete tracked type that is not a super type of
+    // the static argument type.
+    SVal ArgSVal = M.getArgSVal(i);
+    SymbolRef ArgSym = ArgSVal.getAsSymbol();
+    if (ArgSym) {
+      const ObjCObjectPointerType *const *TrackedArgType =
+          State->get<MostSpecializedTypeArgsMap>(ArgSym);
+      if (TrackedArgType &&
+          ASTCtxt.canAssignObjCInterfaces(ArgObjectPtrType, *TrackedArgType)) {
+        ArgObjectPtrType = *TrackedArgType;
+      }
+    }
+
+    // Warn when argument is incompatible with the parameter.
+    if (!ASTCtxt.canAssignObjCInterfaces(ParamObjectPtrType,
+                                         ArgObjectPtrType)) {
+      static CheckerProgramPointTag Tag(this, "ArgTypeMismatch");
+      ExplodedNode *N = C.addTransition(State, &Tag);
+      reportGenericsBug(ArgObjectPtrType, ParamObjectPtrType, N, Sym, C, Arg);
+      return;
+    }
+  }
+}
+
+/// This callback is used to infer the types for Class variables. This info is
+/// used later to validate messages that sent to classes. Class variables are
+/// initialized with by invoking the 'class' method on a class.
+/// This method is also used to infer the type information for the return
+/// types.
+// TODO: right now it only tracks generic types. Extend this to track every
+// type in the DynamicTypeMap and diagnose type errors!
+void DynamicTypePropagation::checkPostObjCMessage(const ObjCMethodCall &M,
+                                                  CheckerContext &C) const {
+  const ObjCMessageExpr *MessageExpr = M.getOriginExpr();
+
+  SymbolRef RetSym = M.getReturnValue().getAsSymbol();
+  if (!RetSym)
+    return;
+
+  Selector Sel = MessageExpr->getSelector();
+  ProgramStateRef State = C.getState();
+  // Inference for class variables.
+  // We are only interested in cases where the class method is invoked on a
+  // class. This method is provided by the runtime and available on all classes.
+  if (MessageExpr->getReceiverKind() == ObjCMessageExpr::Class &&
+      Sel.getAsString() == "class") {
+
+    QualType ReceiverType = MessageExpr->getClassReceiver();
+    const auto *ReceiverClassType = ReceiverType->getAs<ObjCObjectType>();
+    QualType ReceiverClassPointerType =
+        C.getASTContext().getObjCObjectPointerType(
+            QualType(ReceiverClassType, 0));
+
+    if (!ReceiverClassType->isSpecialized())
+      return;
+    const auto *InferredType =
+        ReceiverClassPointerType->getAs<ObjCObjectPointerType>();
+    assert(InferredType);
+
+    State = State->set<MostSpecializedTypeArgsMap>(RetSym, InferredType);
+    C.addTransition(State);
+    return;
+  }
+
+  // Tracking for return types.
+  SymbolRef RecSym = M.getReceiverSVal().getAsSymbol();
+  if (!RecSym)
+    return;
+
+  const ObjCObjectPointerType *const *TrackedType =
+      State->get<MostSpecializedTypeArgsMap>(RecSym);
+  if (!TrackedType)
+    return;
+
+  ASTContext &ASTCtxt = C.getASTContext();
+  const ObjCMethodDecl *Method =
+      findMethodDecl(MessageExpr, *TrackedType, ASTCtxt);
+  if (!Method)
+    return;
+
+  Optional<ArrayRef<QualType>> TypeArgs =
+      (*TrackedType)->getObjCSubstitutions(Method->getDeclContext());
+  if (!TypeArgs)
+    return;
+
+  QualType ResultType =
+      getReturnTypeForMethod(Method, *TypeArgs, *TrackedType, ASTCtxt);
+  // The static type is the same as the deduced type.
+  if (ResultType.isNull())
+    return;
+
+  const MemRegion *RetRegion = M.getReturnValue().getAsRegion();
+  ExplodedNode *Pred = C.getPredecessor();
+  // When there is an entry available for the return symbol in DynamicTypeMap,
+  // the call was inlined, and the information in the DynamicTypeMap is should
+  // be precise.
+  if (RetRegion && !State->get<DynamicTypeMap>(RetRegion)) {
+    // TODO: we have duplicated information in DynamicTypeMap and
+    // MostSpecializedTypeArgsMap. We should only store anything in the later if
+    // the stored data differs from the one stored in the former.
+    State = setDynamicTypeInfo(State, RetRegion, ResultType,
+                               /*CanBeSubclass=*/true);
+    Pred = C.addTransition(State);
+  }
+
+  const auto *ResultPtrType = ResultType->getAs<ObjCObjectPointerType>();
+
+  if (!ResultPtrType || ResultPtrType->isUnspecialized())
+    return;
+
+  // When the result is a specialized type and it is not tracked yet, track it
+  // for the result symbol.
+  if (!State->get<MostSpecializedTypeArgsMap>(RetSym)) {
+    State = State->set<MostSpecializedTypeArgsMap>(RetSym, ResultPtrType);
+    C.addTransition(State, Pred);
+  }
+}
+
+void DynamicTypePropagation::reportGenericsBug(
+    const ObjCObjectPointerType *From, const ObjCObjectPointerType *To,
+    ExplodedNode *N, SymbolRef Sym, CheckerContext &C,
+    const Stmt *ReportedNode) const {
+  if (!CheckGenerics)
+    return;
+
+  initBugType();
+  SmallString<192> Buf;
+  llvm::raw_svector_ostream OS(Buf);
+  OS << "Conversion from value of type '";
+  QualType::print(From, Qualifiers(), OS, C.getLangOpts(), llvm::Twine());
+  OS << "' to incompatible type '";
+  QualType::print(To, Qualifiers(), OS, C.getLangOpts(), llvm::Twine());
+  OS << "'";
+  std::unique_ptr<BugReport> R(
+      new BugReport(*ObjCGenericsBugType, OS.str(), N));
+  R->markInteresting(Sym);
+  R->addVisitor(llvm::make_unique<GenericsBugVisitor>(Sym));
+  if (ReportedNode)
+    R->addRange(ReportedNode->getSourceRange());
+  C.emitReport(std::move(R));
+}
+
+PathDiagnosticPiece *DynamicTypePropagation::GenericsBugVisitor::VisitNode(
+    const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
+    BugReport &BR) {
+  ProgramStateRef state = N->getState();
+  ProgramStateRef statePrev = PrevN->getState();
+
+  const ObjCObjectPointerType *const *TrackedType =
+      state->get<MostSpecializedTypeArgsMap>(Sym);
+  const ObjCObjectPointerType *const *TrackedTypePrev =
+      statePrev->get<MostSpecializedTypeArgsMap>(Sym);
+  if (!TrackedType)
+    return nullptr;
+
+  if (TrackedTypePrev && *TrackedTypePrev == *TrackedType)
+    return nullptr;
+
+  // Retrieve the associated statement.
+  const Stmt *S = nullptr;
+  ProgramPoint ProgLoc = N->getLocation();
+  if (Optional<StmtPoint> SP = ProgLoc.getAs<StmtPoint>()) {
+    S = SP->getStmt();
+  }
+
+  if (!S)
+    return nullptr;
+
+  const LangOptions &LangOpts = BRC.getASTContext().getLangOpts();
+
+  SmallString<256> Buf;
+  llvm::raw_svector_ostream OS(Buf);
+  OS << "Type '";
+  QualType::print(*TrackedType, Qualifiers(), OS, LangOpts, llvm::Twine());
+  OS << "' is inferred from ";
+
+  if (const auto *ExplicitCast = dyn_cast<ExplicitCastExpr>(S)) {
+    OS << "explicit cast (from '";
+    QualType::print(ExplicitCast->getSubExpr()->getType().getTypePtr(),
+                    Qualifiers(), OS, LangOpts, llvm::Twine());
+    OS << "' to '";
+    QualType::print(ExplicitCast->getType().getTypePtr(), Qualifiers(), OS,
+                    LangOpts, llvm::Twine());
+    OS << "')";
+  } else if (const auto *ImplicitCast = dyn_cast<ImplicitCastExpr>(S)) {
+    OS << "implicit cast (from '";
+    QualType::print(ImplicitCast->getSubExpr()->getType().getTypePtr(),
+                    Qualifiers(), OS, LangOpts, llvm::Twine());
+    OS << "' to '";
+    QualType::print(ImplicitCast->getType().getTypePtr(), Qualifiers(), OS,
+                    LangOpts, llvm::Twine());
+    OS << "')";
+  } else {
+    OS << "this context";
+  }
+
+  // Generate the extra diagnostic.
+  PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
+                             N->getLocationContext());
+  return new PathDiagnosticEventPiece(Pos, OS.str(), true, nullptr);
+}
+
+/// Register checkers.
+void ento::registerObjCGenericsChecker(CheckerManager &mgr) {
+  DynamicTypePropagation *checker =
+      mgr.registerChecker<DynamicTypePropagation>();
+  checker->CheckGenerics = true;
+}
+
 void ento::registerDynamicTypePropagation(CheckerManager &mgr) {
   mgr.registerChecker<DynamicTypePropagation>();
 }
diff --git a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
index 7dc0a8745958..8f6c20ab1906 100644
--- a/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp
@@ -17,22 +17,26 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class ExprInspectionChecker : public Checker< eval::Call > {
+class ExprInspectionChecker : public Checker<eval::Call, check::DeadSymbols> {
   mutable std::unique_ptr<BugType> BT;
 
   void analyzerEval(const CallExpr *CE, CheckerContext &C) const;
   void analyzerCheckInlined(const CallExpr *CE, CheckerContext &C) const;
   void analyzerWarnIfReached(const CallExpr *CE, CheckerContext &C) const;
   void analyzerCrash(const CallExpr *CE, CheckerContext &C) const;
+  void analyzerWarnOnDeadSymbol(const CallExpr *CE, CheckerContext &C) const;
 
   typedef void (ExprInspectionChecker::*FnCheck)(const CallExpr *,
                                                  CheckerContext &C) const;
 
 public:
   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
 };
 }
 
+REGISTER_SET_WITH_PROGRAMSTATE(MarkedSymbols, const void *)
+
 bool ExprInspectionChecker::evalCall(const CallExpr *CE,
                                      CheckerContext &C) const {
   // These checks should have no effect on the surrounding environment
@@ -42,7 +46,10 @@ bool ExprInspectionChecker::evalCall(const CallExpr *CE,
     .Case("clang_analyzer_checkInlined",
           &ExprInspectionChecker::analyzerCheckInlined)
     .Case("clang_analyzer_crash", &ExprInspectionChecker::analyzerCrash)
-    .Case("clang_analyzer_warnIfReached", &ExprInspectionChecker::analyzerWarnIfReached)
+    .Case("clang_analyzer_warnIfReached",
+          &ExprInspectionChecker::analyzerWarnIfReached)
+    .Case("clang_analyzer_warnOnDeadSymbol",
+          &ExprInspectionChecker::analyzerWarnOnDeadSymbol)
     .Default(nullptr);
 
   if (!Handler)
@@ -86,8 +93,7 @@ static const char *getArgumentValueString(const CallExpr *CE,
 
 void ExprInspectionChecker::analyzerEval(const CallExpr *CE,
                                          CheckerContext &C) const {
-  ExplodedNode *N = C.getPredecessor();
-  const LocationContext *LC = N->getLocationContext();
+  const LocationContext *LC = C.getPredecessor()->getLocationContext();
 
   // A specific instantiation of an inlined function may have more constrained
   // values than can generally be assumed. Skip the check.
@@ -97,24 +103,28 @@ void ExprInspectionChecker::analyzerEval(const CallExpr *CE,
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
+  ExplodedNode *N = C.generateNonFatalErrorNode();
+  if (!N)
+    return;
   C.emitReport(
       llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
 }
 
 void ExprInspectionChecker::analyzerWarnIfReached(const CallExpr *CE,
                                                   CheckerContext &C) const {
-  ExplodedNode *N = C.getPredecessor();
 
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
+  ExplodedNode *N = C.generateNonFatalErrorNode();
+  if (!N)
+    return;
   C.emitReport(llvm::make_unique<BugReport>(*BT, "REACHABLE", N));
 }
 
 void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
                                                  CheckerContext &C) const {
-  ExplodedNode *N = C.getPredecessor();
-  const LocationContext *LC = N->getLocationContext();
+  const LocationContext *LC = C.getPredecessor()->getLocationContext();
 
   // An inlined function could conceivably also be analyzed as a top-level
   // function. We ignore this case and only emit a message (TRUE or FALSE)
@@ -127,10 +137,48 @@ void ExprInspectionChecker::analyzerCheckInlined(const CallExpr *CE,
   if (!BT)
     BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
 
+  ExplodedNode *N = C.generateNonFatalErrorNode();
+  if (!N)
+    return;
   C.emitReport(
       llvm::make_unique<BugReport>(*BT, getArgumentValueString(CE, C), N));
 }
 
+void ExprInspectionChecker::analyzerWarnOnDeadSymbol(const CallExpr *CE,
+                                                     CheckerContext &C) const {
+  if (CE->getNumArgs() == 0)
+    return;
+  SVal Val = C.getSVal(CE->getArg(0));
+  SymbolRef Sym = Val.getAsSymbol();
+  if (!Sym)
+    return;
+
+  ProgramStateRef State = C.getState();
+  State = State->add<MarkedSymbols>(Sym);
+  C.addTransition(State);
+}
+
+void ExprInspectionChecker::checkDeadSymbols(SymbolReaper &SymReaper,
+                                             CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  const MarkedSymbolsTy &Syms = State->get<MarkedSymbols>();
+  for (auto I = Syms.begin(), E = Syms.end(); I != E; ++I) {
+    SymbolRef Sym = static_cast<SymbolRef>(*I);
+    if (!SymReaper.isDead(Sym))
+      continue;
+
+    if (!BT)
+      BT.reset(new BugType(this, "Checking analyzer assumptions", "debug"));
+
+    ExplodedNode *N = C.generateNonFatalErrorNode();
+    if (!N)
+      return;
+
+    C.emitReport(llvm::make_unique<BugReport>(*BT, "SYMBOL DEAD", N));
+    C.addTransition(State->remove<MarkedSymbols>(Sym), N);
+  }
+}
+
 void ExprInspectionChecker::analyzerCrash(const CallExpr *CE,
                                           CheckerContext &C) const {
   LLVM_BUILTIN_TRAP;
diff --git a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
index 48d6bd4b37e6..3fe89f96a43b 100644
--- a/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/FixedAddressChecker.cpp
@@ -23,7 +23,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class FixedAddressChecker 
+class FixedAddressChecker
   : public Checker< check::PreStmt<BinaryOperator> > {
   mutable std::unique_ptr<BuiltinBug> BT;
 
@@ -50,7 +50,7 @@ void FixedAddressChecker::checkPreStmt(const BinaryOperator *B,
   if (!RV.isConstant() || RV.isZeroConstant())
     return;
 
-  if (ExplodedNode *N = C.addTransition()) {
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     if (!BT)
       BT.reset(
           new BuiltinBug(this, "Use fixed address",
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index 2cf508ff086c..8c8acc637f1f 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -100,8 +100,8 @@ private:
   /// Generate a report if the expression is tainted or points to tainted data.
   bool generateReportIfTainted(const Expr *E, const char Msg[],
                                CheckerContext &C) const;
-                               
-  
+
+
   typedef SmallVector<unsigned, 2> ArgVector;
 
   /// \brief A struct used to specify taint propagation rules for a function.
@@ -441,7 +441,7 @@ SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
   return Val.getAsSymbol();
 }
 
-ProgramStateRef 
+ProgramStateRef
 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
                                                    CheckerContext &C) const {
   ProgramStateRef State = C.getState();
@@ -640,7 +640,7 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
     return false;
 
   // Generate diagnostic.
-  if (ExplodedNode *N = C.addTransition()) {
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     initBugType();
     auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
     report->addRange(E->getSourceRange());
@@ -658,17 +658,15 @@ bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
     return false;
 
   // If either the format string content or the pointer itself are tainted, warn.
-  if (generateReportIfTainted(CE->getArg(ArgNum),
-                              MsgUncontrolledFormatString, C))
-    return true;
-  return false;
+  return generateReportIfTainted(CE->getArg(ArgNum),
+                                 MsgUncontrolledFormatString, C);
 }
 
 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
                                           StringRef Name,
                                           CheckerContext &C) const {
-  // TODO: It might make sense to run this check on demand. In some cases, 
-  // we should check if the environment has been cleansed here. We also might 
+  // TODO: It might make sense to run this check on demand. In some cases,
+  // we should check if the environment has been cleansed here. We also might
   // need to know if the user was reset before these calls(seteuid).
   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
     .Case("system", 0)
@@ -686,11 +684,7 @@ bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
     return false;
 
-  if (generateReportIfTainted(CE->getArg(ArgNum),
-                              MsgSanitizeSystemArgs, C))
-    return true;
-
-  return false;
+  return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
 }
 
 // TODO: Should this check be a part of the CString checker?
@@ -728,11 +722,8 @@ bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
       ArgNum = 2;
   }
 
-  if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
-      generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
-    return true;
-
-  return false;
+  return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
+         generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
 }
 
 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp b/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
index 58d0783f3974..0c3bff5b63b8 100644
--- a/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IdenticalExprChecker.cpp
@@ -96,7 +96,7 @@ void FindIdenticalExprVisitor::checkBitwiseOrLogicalOp(const BinaryOperator *B,
     }
     LHS = B2->getLHS();
   }
-  
+
   if (isIdenticalStmt(AC->getASTContext(), RHS, LHS)) {
     Sr[0] = RHS->getSourceRange();
     Sr[1] = LHS->getSourceRange();
@@ -108,6 +108,24 @@ bool FindIdenticalExprVisitor::VisitIfStmt(const IfStmt *I) {
   const Stmt *Stmt1 = I->getThen();
   const Stmt *Stmt2 = I->getElse();
 
+  // Check for identical inner condition:
+  //
+  // if (x<10) {
+  //   if (x<10) {
+  //   ..
+  if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(Stmt1)) {
+    if (!CS->body_empty()) {
+      const IfStmt *InnerIf = dyn_cast<IfStmt>(*CS->body_begin());
+      if (InnerIf && isIdenticalStmt(AC->getASTContext(), I->getCond(), InnerIf->getCond(), /*ignoreSideEffects=*/ false)) {
+        PathDiagnosticLocation ELoc(InnerIf->getCond(), BR.getSourceManager(), AC);
+        BR.EmitBasicReport(AC->getDecl(), Checker, "Identical conditions",
+          categories::LogicError,
+          "conditions of the inner and outer statements are identical",
+          ELoc);
+      }
+    }
+  }
+
   // Check for identical conditions:
   //
   // if (b) {
@@ -287,9 +305,7 @@ static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
                             const Stmt *Stmt2, bool IgnoreSideEffects) {
 
   if (!Stmt1 || !Stmt2) {
-    if (!Stmt1 && !Stmt2)
-      return true;
-    return false;
+    return !Stmt1 && !Stmt2;
   }
 
   // If Stmt1 & Stmt2 are of different class then they are not
@@ -332,6 +348,7 @@ static bool isIdenticalStmt(const ASTContext &Ctx, const Stmt *Stmt1,
     return false;
   case Stmt::CallExprClass:
   case Stmt::ArraySubscriptExprClass:
+  case Stmt::OMPArraySectionExprClass:
   case Stmt::ImplicitCastExprClass:
   case Stmt::ParenExprClass:
   case Stmt::BreakStmtClass:
diff --git a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
index 3df5fa034a43..dffff38c91a2 100644
--- a/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/IvarInvalidationChecker.cpp
@@ -20,8 +20,8 @@
 //  been called on them. An invalidation method should either invalidate all
 //  the ivars or call another invalidation method (on self).
 //
-//  Partial invalidor annotation allows to addess cases when ivars are 
-//  invalidated by other methods, which might or might not be called from 
+//  Partial invalidor annotation allows to addess cases when ivars are
+//  invalidated by other methods, which might or might not be called from
 //  the invalidation method. The checker checks that each invalidation
 //  method and all the partial methods cumulatively invalidate all ivars.
 //    __attribute__((annotate("objc_instance_variable_invalidator_partial")));
@@ -310,7 +310,7 @@ const ObjCIvarDecl *IvarInvalidationCheckerImpl::findPropertyBackingIvar(
 
   // Lookup for the synthesized case.
   IvarD = Prop->getPropertyIvarDecl();
-  // We only track the ivars/properties that are defined in the current 
+  // We only track the ivars/properties that are defined in the current
   // class (not the parent).
   if (IvarD && IvarD->getContainingInterface() == InterfaceD) {
     if (TrackedIvars.count(IvarD)) {
diff --git a/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp b/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
index 4e3f9b73acb2..db4fbca36deb 100644
--- a/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/LLVMConventionsChecker.cpp
@@ -276,7 +276,6 @@ void ASTFieldVisitor::ReportError(QualType T) {
     }
   }
   os << " (type " << FieldChain.back()->getType().getAsString() << ")";
-  os.flush();
 
   // Note that this will fire for every translation unit that uses this
   // class.  This is suboptimal, but at least scan-build will merge
diff --git a/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
new file mode 100644
index 000000000000..56346cd4f706
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/LocalizationChecker.cpp
@@ -0,0 +1,1201 @@
+//=- LocalizationChecker.cpp -------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a set of checks for localizability including:
+//  1) A checker that warns about uses of non-localized NSStrings passed to
+//     UI methods expecting localized strings
+//  2) A syntactic checker that warns against the bad practice of
+//     not including a comment in NSLocalizedString macros.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclObjC.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/StmtVisitor.h"
+#include "llvm/Support/Unicode.h"
+#include "llvm/ADT/StringSet.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+struct LocalizedState {
+private:
+  enum Kind { NonLocalized, Localized } K;
+  LocalizedState(Kind InK) : K(InK) {}
+
+public:
+  bool isLocalized() const { return K == Localized; }
+  bool isNonLocalized() const { return K == NonLocalized; }
+
+  static LocalizedState getLocalized() { return LocalizedState(Localized); }
+  static LocalizedState getNonLocalized() {
+    return LocalizedState(NonLocalized);
+  }
+
+  // Overload the == operator
+  bool operator==(const LocalizedState &X) const { return K == X.K; }
+
+  // LLVMs equivalent of a hash function
+  void Profile(llvm::FoldingSetNodeID &ID) const { ID.AddInteger(K); }
+};
+
+class NonLocalizedStringChecker
+    : public Checker<check::PostCall, check::PreObjCMessage,
+                     check::PostObjCMessage,
+                     check::PostStmt<ObjCStringLiteral>> {
+
+  mutable std::unique_ptr<BugType> BT;
+
+  // Methods that require a localized string
+  mutable llvm::DenseMap<const IdentifierInfo *,
+                         llvm::DenseMap<Selector, uint8_t>> UIMethods;
+  // Methods that return a localized string
+  mutable llvm::SmallSet<std::pair<const IdentifierInfo *, Selector>, 12> LSM;
+  // C Functions that return a localized string
+  mutable llvm::SmallSet<const IdentifierInfo *, 5> LSF;
+
+  void initUIMethods(ASTContext &Ctx) const;
+  void initLocStringsMethods(ASTContext &Ctx) const;
+
+  bool hasNonLocalizedState(SVal S, CheckerContext &C) const;
+  bool hasLocalizedState(SVal S, CheckerContext &C) const;
+  void setNonLocalizedState(SVal S, CheckerContext &C) const;
+  void setLocalizedState(SVal S, CheckerContext &C) const;
+
+  bool isAnnotatedAsLocalized(const Decl *D) const;
+  void reportLocalizationError(SVal S, const ObjCMethodCall &M,
+                               CheckerContext &C, int argumentNumber = 0) const;
+
+  int getLocalizedArgumentForSelector(const IdentifierInfo *Receiver,
+                                      Selector S) const;
+
+public:
+  NonLocalizedStringChecker();
+
+  // When this parameter is set to true, the checker assumes all
+  // methods that return NSStrings are unlocalized. Thus, more false
+  // positives will be reported.
+  DefaultBool IsAggressive;
+
+  void checkPreObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
+  void checkPostObjCMessage(const ObjCMethodCall &msg, CheckerContext &C) const;
+  void checkPostStmt(const ObjCStringLiteral *SL, CheckerContext &C) const;
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+};
+
+} // end anonymous namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(LocalizedMemMap, const MemRegion *,
+                               LocalizedState)
+
+NonLocalizedStringChecker::NonLocalizedStringChecker() {
+  BT.reset(new BugType(this, "Unlocalizable string",
+                       "Localizability Issue (Apple)"));
+}
+
+#define NEW_RECEIVER(receiver)                                                 \
+  llvm::DenseMap<Selector, uint8_t> &receiver##M =                             \
+      UIMethods.insert({&Ctx.Idents.get(#receiver),                            \
+                        llvm::DenseMap<Selector, uint8_t>()})                  \
+          .first->second;
+#define ADD_NULLARY_METHOD(receiver, method, argument)                         \
+  receiver##M.insert(                                                          \
+      {Ctx.Selectors.getNullarySelector(&Ctx.Idents.get(#method)), argument});
+#define ADD_UNARY_METHOD(receiver, method, argument)                           \
+  receiver##M.insert(                                                          \
+      {Ctx.Selectors.getUnarySelector(&Ctx.Idents.get(#method)), argument});
+#define ADD_METHOD(receiver, method_list, count, argument)                     \
+  receiver##M.insert({Ctx.Selectors.getSelector(count, method_list), argument});
+
+/// Initializes a list of methods that require a localized string
+/// Format: {"ClassName", {{"selectorName:", LocStringArg#}, ...}, ...}
+void NonLocalizedStringChecker::initUIMethods(ASTContext &Ctx) const {
+  if (!UIMethods.empty())
+    return;
+
+  // UI Methods
+  NEW_RECEIVER(UISearchDisplayController)
+  ADD_UNARY_METHOD(UISearchDisplayController, setSearchResultsTitle, 0)
+
+  NEW_RECEIVER(UITabBarItem)
+  IdentifierInfo *initWithTitleUITabBarItemTag[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"),
+      &Ctx.Idents.get("tag")};
+  ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemTag, 3, 0)
+  IdentifierInfo *initWithTitleUITabBarItemImage[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("image"),
+      &Ctx.Idents.get("selectedImage")};
+  ADD_METHOD(UITabBarItem, initWithTitleUITabBarItemImage, 3, 0)
+
+  NEW_RECEIVER(NSDockTile)
+  ADD_UNARY_METHOD(NSDockTile, setBadgeLabel, 0)
+
+  NEW_RECEIVER(NSStatusItem)
+  ADD_UNARY_METHOD(NSStatusItem, setTitle, 0)
+  ADD_UNARY_METHOD(NSStatusItem, setToolTip, 0)
+
+  NEW_RECEIVER(UITableViewRowAction)
+  IdentifierInfo *rowActionWithStyleUITableViewRowAction[] = {
+      &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"),
+      &Ctx.Idents.get("handler")};
+  ADD_METHOD(UITableViewRowAction, rowActionWithStyleUITableViewRowAction, 3, 1)
+  ADD_UNARY_METHOD(UITableViewRowAction, setTitle, 0)
+
+  NEW_RECEIVER(NSBox)
+  ADD_UNARY_METHOD(NSBox, setTitle, 0)
+
+  NEW_RECEIVER(NSButton)
+  ADD_UNARY_METHOD(NSButton, setTitle, 0)
+  ADD_UNARY_METHOD(NSButton, setAlternateTitle, 0)
+
+  NEW_RECEIVER(NSSavePanel)
+  ADD_UNARY_METHOD(NSSavePanel, setPrompt, 0)
+  ADD_UNARY_METHOD(NSSavePanel, setTitle, 0)
+  ADD_UNARY_METHOD(NSSavePanel, setNameFieldLabel, 0)
+  ADD_UNARY_METHOD(NSSavePanel, setNameFieldStringValue, 0)
+  ADD_UNARY_METHOD(NSSavePanel, setMessage, 0)
+
+  NEW_RECEIVER(UIPrintInfo)
+  ADD_UNARY_METHOD(UIPrintInfo, setJobName, 0)
+
+  NEW_RECEIVER(NSTabViewItem)
+  ADD_UNARY_METHOD(NSTabViewItem, setLabel, 0)
+  ADD_UNARY_METHOD(NSTabViewItem, setToolTip, 0)
+
+  NEW_RECEIVER(NSBrowser)
+  IdentifierInfo *setTitleNSBrowser[] = {&Ctx.Idents.get("setTitle"),
+                                         &Ctx.Idents.get("ofColumn")};
+  ADD_METHOD(NSBrowser, setTitleNSBrowser, 2, 0)
+
+  NEW_RECEIVER(UIAccessibilityElement)
+  ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityLabel, 0)
+  ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityHint, 0)
+  ADD_UNARY_METHOD(UIAccessibilityElement, setAccessibilityValue, 0)
+
+  NEW_RECEIVER(UIAlertAction)
+  IdentifierInfo *actionWithTitleUIAlertAction[] = {
+      &Ctx.Idents.get("actionWithTitle"), &Ctx.Idents.get("style"),
+      &Ctx.Idents.get("handler")};
+  ADD_METHOD(UIAlertAction, actionWithTitleUIAlertAction, 3, 0)
+
+  NEW_RECEIVER(NSPopUpButton)
+  ADD_UNARY_METHOD(NSPopUpButton, addItemWithTitle, 0)
+  IdentifierInfo *insertItemWithTitleNSPopUpButton[] = {
+      &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")};
+  ADD_METHOD(NSPopUpButton, insertItemWithTitleNSPopUpButton, 2, 0)
+  ADD_UNARY_METHOD(NSPopUpButton, removeItemWithTitle, 0)
+  ADD_UNARY_METHOD(NSPopUpButton, selectItemWithTitle, 0)
+  ADD_UNARY_METHOD(NSPopUpButton, setTitle, 0)
+
+  NEW_RECEIVER(NSTableViewRowAction)
+  IdentifierInfo *rowActionWithStyleNSTableViewRowAction[] = {
+      &Ctx.Idents.get("rowActionWithStyle"), &Ctx.Idents.get("title"),
+      &Ctx.Idents.get("handler")};
+  ADD_METHOD(NSTableViewRowAction, rowActionWithStyleNSTableViewRowAction, 3, 1)
+  ADD_UNARY_METHOD(NSTableViewRowAction, setTitle, 0)
+
+  NEW_RECEIVER(NSImage)
+  ADD_UNARY_METHOD(NSImage, setAccessibilityDescription, 0)
+
+  NEW_RECEIVER(NSUserActivity)
+  ADD_UNARY_METHOD(NSUserActivity, setTitle, 0)
+
+  NEW_RECEIVER(NSPathControlItem)
+  ADD_UNARY_METHOD(NSPathControlItem, setTitle, 0)
+
+  NEW_RECEIVER(NSCell)
+  ADD_UNARY_METHOD(NSCell, initTextCell, 0)
+  ADD_UNARY_METHOD(NSCell, setTitle, 0)
+  ADD_UNARY_METHOD(NSCell, setStringValue, 0)
+
+  NEW_RECEIVER(NSPathControl)
+  ADD_UNARY_METHOD(NSPathControl, setPlaceholderString, 0)
+
+  NEW_RECEIVER(UIAccessibility)
+  ADD_UNARY_METHOD(UIAccessibility, setAccessibilityLabel, 0)
+  ADD_UNARY_METHOD(UIAccessibility, setAccessibilityHint, 0)
+  ADD_UNARY_METHOD(UIAccessibility, setAccessibilityValue, 0)
+
+  NEW_RECEIVER(NSTableColumn)
+  ADD_UNARY_METHOD(NSTableColumn, setTitle, 0)
+  ADD_UNARY_METHOD(NSTableColumn, setHeaderToolTip, 0)
+
+  NEW_RECEIVER(NSSegmentedControl)
+  IdentifierInfo *setLabelNSSegmentedControl[] = {
+      &Ctx.Idents.get("setLabel"), &Ctx.Idents.get("forSegment")};
+  ADD_METHOD(NSSegmentedControl, setLabelNSSegmentedControl, 2, 0)
+
+  NEW_RECEIVER(NSButtonCell)
+  ADD_UNARY_METHOD(NSButtonCell, setTitle, 0)
+  ADD_UNARY_METHOD(NSButtonCell, setAlternateTitle, 0)
+
+  NEW_RECEIVER(NSSliderCell)
+  ADD_UNARY_METHOD(NSSliderCell, setTitle, 0)
+
+  NEW_RECEIVER(NSControl)
+  ADD_UNARY_METHOD(NSControl, setStringValue, 0)
+
+  NEW_RECEIVER(NSAccessibility)
+  ADD_UNARY_METHOD(NSAccessibility, setAccessibilityValueDescription, 0)
+  ADD_UNARY_METHOD(NSAccessibility, setAccessibilityLabel, 0)
+  ADD_UNARY_METHOD(NSAccessibility, setAccessibilityTitle, 0)
+  ADD_UNARY_METHOD(NSAccessibility, setAccessibilityPlaceholderValue, 0)
+  ADD_UNARY_METHOD(NSAccessibility, setAccessibilityHelp, 0)
+
+  NEW_RECEIVER(NSMatrix)
+  IdentifierInfo *setToolTipNSMatrix[] = {&Ctx.Idents.get("setToolTip"),
+                                          &Ctx.Idents.get("forCell")};
+  ADD_METHOD(NSMatrix, setToolTipNSMatrix, 2, 0)
+
+  NEW_RECEIVER(NSPrintPanel)
+  ADD_UNARY_METHOD(NSPrintPanel, setDefaultButtonTitle, 0)
+
+  NEW_RECEIVER(UILocalNotification)
+  ADD_UNARY_METHOD(UILocalNotification, setAlertBody, 0)
+  ADD_UNARY_METHOD(UILocalNotification, setAlertAction, 0)
+  ADD_UNARY_METHOD(UILocalNotification, setAlertTitle, 0)
+
+  NEW_RECEIVER(NSSlider)
+  ADD_UNARY_METHOD(NSSlider, setTitle, 0)
+
+  NEW_RECEIVER(UIMenuItem)
+  IdentifierInfo *initWithTitleUIMenuItem[] = {&Ctx.Idents.get("initWithTitle"),
+                                               &Ctx.Idents.get("action")};
+  ADD_METHOD(UIMenuItem, initWithTitleUIMenuItem, 2, 0)
+  ADD_UNARY_METHOD(UIMenuItem, setTitle, 0)
+
+  NEW_RECEIVER(UIAlertController)
+  IdentifierInfo *alertControllerWithTitleUIAlertController[] = {
+      &Ctx.Idents.get("alertControllerWithTitle"), &Ctx.Idents.get("message"),
+      &Ctx.Idents.get("preferredStyle")};
+  ADD_METHOD(UIAlertController, alertControllerWithTitleUIAlertController, 3, 1)
+  ADD_UNARY_METHOD(UIAlertController, setTitle, 0)
+  ADD_UNARY_METHOD(UIAlertController, setMessage, 0)
+
+  NEW_RECEIVER(UIApplicationShortcutItem)
+  IdentifierInfo *initWithTypeUIApplicationShortcutItemIcon[] = {
+      &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle"),
+      &Ctx.Idents.get("localizedSubtitle"), &Ctx.Idents.get("icon"),
+      &Ctx.Idents.get("userInfo")};
+  ADD_METHOD(UIApplicationShortcutItem,
+             initWithTypeUIApplicationShortcutItemIcon, 5, 1)
+  IdentifierInfo *initWithTypeUIApplicationShortcutItem[] = {
+      &Ctx.Idents.get("initWithType"), &Ctx.Idents.get("localizedTitle")};
+  ADD_METHOD(UIApplicationShortcutItem, initWithTypeUIApplicationShortcutItem,
+             2, 1)
+
+  NEW_RECEIVER(UIActionSheet)
+  IdentifierInfo *initWithTitleUIActionSheet[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("delegate"),
+      &Ctx.Idents.get("cancelButtonTitle"),
+      &Ctx.Idents.get("destructiveButtonTitle"),
+      &Ctx.Idents.get("otherButtonTitles")};
+  ADD_METHOD(UIActionSheet, initWithTitleUIActionSheet, 5, 0)
+  ADD_UNARY_METHOD(UIActionSheet, addButtonWithTitle, 0)
+  ADD_UNARY_METHOD(UIActionSheet, setTitle, 0)
+
+  NEW_RECEIVER(NSURLSessionTask)
+  ADD_UNARY_METHOD(NSURLSessionTask, setTaskDescription, 0)
+
+  NEW_RECEIVER(UIAccessibilityCustomAction)
+  IdentifierInfo *initWithNameUIAccessibilityCustomAction[] = {
+      &Ctx.Idents.get("initWithName"), &Ctx.Idents.get("target"),
+      &Ctx.Idents.get("selector")};
+  ADD_METHOD(UIAccessibilityCustomAction,
+             initWithNameUIAccessibilityCustomAction, 3, 0)
+  ADD_UNARY_METHOD(UIAccessibilityCustomAction, setName, 0)
+
+  NEW_RECEIVER(UISearchBar)
+  ADD_UNARY_METHOD(UISearchBar, setText, 0)
+  ADD_UNARY_METHOD(UISearchBar, setPrompt, 0)
+  ADD_UNARY_METHOD(UISearchBar, setPlaceholder, 0)
+
+  NEW_RECEIVER(UIBarItem)
+  ADD_UNARY_METHOD(UIBarItem, setTitle, 0)
+
+  NEW_RECEIVER(UITextView)
+  ADD_UNARY_METHOD(UITextView, setText, 0)
+
+  NEW_RECEIVER(NSView)
+  ADD_UNARY_METHOD(NSView, setToolTip, 0)
+
+  NEW_RECEIVER(NSTextField)
+  ADD_UNARY_METHOD(NSTextField, setPlaceholderString, 0)
+
+  NEW_RECEIVER(NSAttributedString)
+  ADD_UNARY_METHOD(NSAttributedString, initWithString, 0)
+  IdentifierInfo *initWithStringNSAttributedString[] = {
+      &Ctx.Idents.get("initWithString"), &Ctx.Idents.get("attributes")};
+  ADD_METHOD(NSAttributedString, initWithStringNSAttributedString, 2, 0)
+
+  NEW_RECEIVER(NSText)
+  ADD_UNARY_METHOD(NSText, setString, 0)
+
+  NEW_RECEIVER(UIKeyCommand)
+  IdentifierInfo *keyCommandWithInputUIKeyCommand[] = {
+      &Ctx.Idents.get("keyCommandWithInput"), &Ctx.Idents.get("modifierFlags"),
+      &Ctx.Idents.get("action"), &Ctx.Idents.get("discoverabilityTitle")};
+  ADD_METHOD(UIKeyCommand, keyCommandWithInputUIKeyCommand, 4, 3)
+  ADD_UNARY_METHOD(UIKeyCommand, setDiscoverabilityTitle, 0)
+
+  NEW_RECEIVER(UILabel)
+  ADD_UNARY_METHOD(UILabel, setText, 0)
+
+  NEW_RECEIVER(NSAlert)
+  IdentifierInfo *alertWithMessageTextNSAlert[] = {
+      &Ctx.Idents.get("alertWithMessageText"), &Ctx.Idents.get("defaultButton"),
+      &Ctx.Idents.get("alternateButton"), &Ctx.Idents.get("otherButton"),
+      &Ctx.Idents.get("informativeTextWithFormat")};
+  ADD_METHOD(NSAlert, alertWithMessageTextNSAlert, 5, 0)
+  ADD_UNARY_METHOD(NSAlert, addButtonWithTitle, 0)
+  ADD_UNARY_METHOD(NSAlert, setMessageText, 0)
+  ADD_UNARY_METHOD(NSAlert, setInformativeText, 0)
+  ADD_UNARY_METHOD(NSAlert, setHelpAnchor, 0)
+
+  NEW_RECEIVER(UIMutableApplicationShortcutItem)
+  ADD_UNARY_METHOD(UIMutableApplicationShortcutItem, setLocalizedTitle, 0)
+  ADD_UNARY_METHOD(UIMutableApplicationShortcutItem, setLocalizedSubtitle, 0)
+
+  NEW_RECEIVER(UIButton)
+  IdentifierInfo *setTitleUIButton[] = {&Ctx.Idents.get("setTitle"),
+                                        &Ctx.Idents.get("forState")};
+  ADD_METHOD(UIButton, setTitleUIButton, 2, 0)
+
+  NEW_RECEIVER(NSWindow)
+  ADD_UNARY_METHOD(NSWindow, setTitle, 0)
+  IdentifierInfo *minFrameWidthWithTitleNSWindow[] = {
+      &Ctx.Idents.get("minFrameWidthWithTitle"), &Ctx.Idents.get("styleMask")};
+  ADD_METHOD(NSWindow, minFrameWidthWithTitleNSWindow, 2, 0)
+  ADD_UNARY_METHOD(NSWindow, setMiniwindowTitle, 0)
+
+  NEW_RECEIVER(NSPathCell)
+  ADD_UNARY_METHOD(NSPathCell, setPlaceholderString, 0)
+
+  NEW_RECEIVER(UIDocumentMenuViewController)
+  IdentifierInfo *addOptionWithTitleUIDocumentMenuViewController[] = {
+      &Ctx.Idents.get("addOptionWithTitle"), &Ctx.Idents.get("image"),
+      &Ctx.Idents.get("order"), &Ctx.Idents.get("handler")};
+  ADD_METHOD(UIDocumentMenuViewController,
+             addOptionWithTitleUIDocumentMenuViewController, 4, 0)
+
+  NEW_RECEIVER(UINavigationItem)
+  ADD_UNARY_METHOD(UINavigationItem, initWithTitle, 0)
+  ADD_UNARY_METHOD(UINavigationItem, setTitle, 0)
+  ADD_UNARY_METHOD(UINavigationItem, setPrompt, 0)
+
+  NEW_RECEIVER(UIAlertView)
+  IdentifierInfo *initWithTitleUIAlertView[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("message"),
+      &Ctx.Idents.get("delegate"), &Ctx.Idents.get("cancelButtonTitle"),
+      &Ctx.Idents.get("otherButtonTitles")};
+  ADD_METHOD(UIAlertView, initWithTitleUIAlertView, 5, 0)
+  ADD_UNARY_METHOD(UIAlertView, addButtonWithTitle, 0)
+  ADD_UNARY_METHOD(UIAlertView, setTitle, 0)
+  ADD_UNARY_METHOD(UIAlertView, setMessage, 0)
+
+  NEW_RECEIVER(NSFormCell)
+  ADD_UNARY_METHOD(NSFormCell, initTextCell, 0)
+  ADD_UNARY_METHOD(NSFormCell, setTitle, 0)
+  ADD_UNARY_METHOD(NSFormCell, setPlaceholderString, 0)
+
+  NEW_RECEIVER(NSUserNotification)
+  ADD_UNARY_METHOD(NSUserNotification, setTitle, 0)
+  ADD_UNARY_METHOD(NSUserNotification, setSubtitle, 0)
+  ADD_UNARY_METHOD(NSUserNotification, setInformativeText, 0)
+  ADD_UNARY_METHOD(NSUserNotification, setActionButtonTitle, 0)
+  ADD_UNARY_METHOD(NSUserNotification, setOtherButtonTitle, 0)
+  ADD_UNARY_METHOD(NSUserNotification, setResponsePlaceholder, 0)
+
+  NEW_RECEIVER(NSToolbarItem)
+  ADD_UNARY_METHOD(NSToolbarItem, setLabel, 0)
+  ADD_UNARY_METHOD(NSToolbarItem, setPaletteLabel, 0)
+  ADD_UNARY_METHOD(NSToolbarItem, setToolTip, 0)
+
+  NEW_RECEIVER(NSProgress)
+  ADD_UNARY_METHOD(NSProgress, setLocalizedDescription, 0)
+  ADD_UNARY_METHOD(NSProgress, setLocalizedAdditionalDescription, 0)
+
+  NEW_RECEIVER(NSSegmentedCell)
+  IdentifierInfo *setLabelNSSegmentedCell[] = {&Ctx.Idents.get("setLabel"),
+                                               &Ctx.Idents.get("forSegment")};
+  ADD_METHOD(NSSegmentedCell, setLabelNSSegmentedCell, 2, 0)
+  IdentifierInfo *setToolTipNSSegmentedCell[] = {&Ctx.Idents.get("setToolTip"),
+                                                 &Ctx.Idents.get("forSegment")};
+  ADD_METHOD(NSSegmentedCell, setToolTipNSSegmentedCell, 2, 0)
+
+  NEW_RECEIVER(NSUndoManager)
+  ADD_UNARY_METHOD(NSUndoManager, setActionName, 0)
+  ADD_UNARY_METHOD(NSUndoManager, undoMenuTitleForUndoActionName, 0)
+  ADD_UNARY_METHOD(NSUndoManager, redoMenuTitleForUndoActionName, 0)
+
+  NEW_RECEIVER(NSMenuItem)
+  IdentifierInfo *initWithTitleNSMenuItem[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("action"),
+      &Ctx.Idents.get("keyEquivalent")};
+  ADD_METHOD(NSMenuItem, initWithTitleNSMenuItem, 3, 0)
+  ADD_UNARY_METHOD(NSMenuItem, setTitle, 0)
+  ADD_UNARY_METHOD(NSMenuItem, setToolTip, 0)
+
+  NEW_RECEIVER(NSPopUpButtonCell)
+  IdentifierInfo *initTextCellNSPopUpButtonCell[] = {
+      &Ctx.Idents.get("initTextCell"), &Ctx.Idents.get("pullsDown")};
+  ADD_METHOD(NSPopUpButtonCell, initTextCellNSPopUpButtonCell, 2, 0)
+  ADD_UNARY_METHOD(NSPopUpButtonCell, addItemWithTitle, 0)
+  IdentifierInfo *insertItemWithTitleNSPopUpButtonCell[] = {
+      &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("atIndex")};
+  ADD_METHOD(NSPopUpButtonCell, insertItemWithTitleNSPopUpButtonCell, 2, 0)
+  ADD_UNARY_METHOD(NSPopUpButtonCell, removeItemWithTitle, 0)
+  ADD_UNARY_METHOD(NSPopUpButtonCell, selectItemWithTitle, 0)
+  ADD_UNARY_METHOD(NSPopUpButtonCell, setTitle, 0)
+
+  NEW_RECEIVER(NSViewController)
+  ADD_UNARY_METHOD(NSViewController, setTitle, 0)
+
+  NEW_RECEIVER(NSMenu)
+  ADD_UNARY_METHOD(NSMenu, initWithTitle, 0)
+  IdentifierInfo *insertItemWithTitleNSMenu[] = {
+      &Ctx.Idents.get("insertItemWithTitle"), &Ctx.Idents.get("action"),
+      &Ctx.Idents.get("keyEquivalent"), &Ctx.Idents.get("atIndex")};
+  ADD_METHOD(NSMenu, insertItemWithTitleNSMenu, 4, 0)
+  IdentifierInfo *addItemWithTitleNSMenu[] = {
+      &Ctx.Idents.get("addItemWithTitle"), &Ctx.Idents.get("action"),
+      &Ctx.Idents.get("keyEquivalent")};
+  ADD_METHOD(NSMenu, addItemWithTitleNSMenu, 3, 0)
+  ADD_UNARY_METHOD(NSMenu, setTitle, 0)
+
+  NEW_RECEIVER(UIMutableUserNotificationAction)
+  ADD_UNARY_METHOD(UIMutableUserNotificationAction, setTitle, 0)
+
+  NEW_RECEIVER(NSForm)
+  ADD_UNARY_METHOD(NSForm, addEntry, 0)
+  IdentifierInfo *insertEntryNSForm[] = {&Ctx.Idents.get("insertEntry"),
+                                         &Ctx.Idents.get("atIndex")};
+  ADD_METHOD(NSForm, insertEntryNSForm, 2, 0)
+
+  NEW_RECEIVER(NSTextFieldCell)
+  ADD_UNARY_METHOD(NSTextFieldCell, setPlaceholderString, 0)
+
+  NEW_RECEIVER(NSUserNotificationAction)
+  IdentifierInfo *actionWithIdentifierNSUserNotificationAction[] = {
+      &Ctx.Idents.get("actionWithIdentifier"), &Ctx.Idents.get("title")};
+  ADD_METHOD(NSUserNotificationAction,
+             actionWithIdentifierNSUserNotificationAction, 2, 1)
+
+  NEW_RECEIVER(NSURLSession)
+  ADD_UNARY_METHOD(NSURLSession, setSessionDescription, 0)
+
+  NEW_RECEIVER(UITextField)
+  ADD_UNARY_METHOD(UITextField, setText, 0)
+  ADD_UNARY_METHOD(UITextField, setPlaceholder, 0)
+
+  NEW_RECEIVER(UIBarButtonItem)
+  IdentifierInfo *initWithTitleUIBarButtonItem[] = {
+      &Ctx.Idents.get("initWithTitle"), &Ctx.Idents.get("style"),
+      &Ctx.Idents.get("target"), &Ctx.Idents.get("action")};
+  ADD_METHOD(UIBarButtonItem, initWithTitleUIBarButtonItem, 4, 0)
+
+  NEW_RECEIVER(UIViewController)
+  ADD_UNARY_METHOD(UIViewController, setTitle, 0)
+
+  NEW_RECEIVER(UISegmentedControl)
+  IdentifierInfo *insertSegmentWithTitleUISegmentedControl[] = {
+      &Ctx.Idents.get("insertSegmentWithTitle"), &Ctx.Idents.get("atIndex"),
+      &Ctx.Idents.get("animated")};
+  ADD_METHOD(UISegmentedControl, insertSegmentWithTitleUISegmentedControl, 3, 0)
+  IdentifierInfo *setTitleUISegmentedControl[] = {
+      &Ctx.Idents.get("setTitle"), &Ctx.Idents.get("forSegmentAtIndex")};
+  ADD_METHOD(UISegmentedControl, setTitleUISegmentedControl, 2, 0)
+}
+
+#define LSF_INSERT(function_name) LSF.insert(&Ctx.Idents.get(function_name));
+#define LSM_INSERT_NULLARY(receiver, method_name)                              \
+  LSM.insert({&Ctx.Idents.get(receiver), Ctx.Selectors.getNullarySelector(     \
+                                             &Ctx.Idents.get(method_name))});
+#define LSM_INSERT_UNARY(receiver, method_name)                                \
+  LSM.insert({&Ctx.Idents.get(receiver),                                       \
+              Ctx.Selectors.getUnarySelector(&Ctx.Idents.get(method_name))});
+#define LSM_INSERT_SELECTOR(receiver, method_list, arguments)                  \
+  LSM.insert({&Ctx.Idents.get(receiver),                                       \
+              Ctx.Selectors.getSelector(arguments, method_list)});
+
+/// Initializes a list of methods and C functions that return a localized string
+void NonLocalizedStringChecker::initLocStringsMethods(ASTContext &Ctx) const {
+  if (!LSM.empty())
+    return;
+
+  IdentifierInfo *LocalizedStringMacro[] = {
+      &Ctx.Idents.get("localizedStringForKey"), &Ctx.Idents.get("value"),
+      &Ctx.Idents.get("table")};
+  LSM_INSERT_SELECTOR("NSBundle", LocalizedStringMacro, 3)
+  LSM_INSERT_UNARY("NSDateFormatter", "stringFromDate")
+  IdentifierInfo *LocalizedStringFromDate[] = {
+      &Ctx.Idents.get("localizedStringFromDate"), &Ctx.Idents.get("dateStyle"),
+      &Ctx.Idents.get("timeStyle")};
+  LSM_INSERT_SELECTOR("NSDateFormatter", LocalizedStringFromDate, 3)
+  LSM_INSERT_UNARY("NSNumberFormatter", "stringFromNumber")
+  LSM_INSERT_NULLARY("UITextField", "text")
+  LSM_INSERT_NULLARY("UITextView", "text")
+  LSM_INSERT_NULLARY("UILabel", "text")
+
+  LSF_INSERT("CFDateFormatterCreateStringWithDate");
+  LSF_INSERT("CFDateFormatterCreateStringWithAbsoluteTime");
+  LSF_INSERT("CFNumberFormatterCreateStringWithNumber");
+}
+
+/// Checks to see if the method / function declaration includes
+/// __attribute__((annotate("returns_localized_nsstring")))
+bool NonLocalizedStringChecker::isAnnotatedAsLocalized(const Decl *D) const {
+  if (!D)
+    return false;
+  return std::any_of(
+      D->specific_attr_begin<AnnotateAttr>(),
+      D->specific_attr_end<AnnotateAttr>(), [](const AnnotateAttr *Ann) {
+        return Ann->getAnnotation() == "returns_localized_nsstring";
+      });
+}
+
+/// Returns true if the given SVal is marked as Localized in the program state
+bool NonLocalizedStringChecker::hasLocalizedState(SVal S,
+                                                  CheckerContext &C) const {
+  const MemRegion *mt = S.getAsRegion();
+  if (mt) {
+    const LocalizedState *LS = C.getState()->get<LocalizedMemMap>(mt);
+    if (LS && LS->isLocalized())
+      return true;
+  }
+  return false;
+}
+
+/// Returns true if the given SVal is marked as NonLocalized in the program
+/// state
+bool NonLocalizedStringChecker::hasNonLocalizedState(SVal S,
+                                                     CheckerContext &C) const {
+  const MemRegion *mt = S.getAsRegion();
+  if (mt) {
+    const LocalizedState *LS = C.getState()->get<LocalizedMemMap>(mt);
+    if (LS && LS->isNonLocalized())
+      return true;
+  }
+  return false;
+}
+
+/// Marks the given SVal as Localized in the program state
+void NonLocalizedStringChecker::setLocalizedState(const SVal S,
+                                                  CheckerContext &C) const {
+  const MemRegion *mt = S.getAsRegion();
+  if (mt) {
+    ProgramStateRef State =
+        C.getState()->set<LocalizedMemMap>(mt, LocalizedState::getLocalized());
+    C.addTransition(State);
+  }
+}
+
+/// Marks the given SVal as NonLocalized in the program state
+void NonLocalizedStringChecker::setNonLocalizedState(const SVal S,
+                                                     CheckerContext &C) const {
+  const MemRegion *mt = S.getAsRegion();
+  if (mt) {
+    ProgramStateRef State = C.getState()->set<LocalizedMemMap>(
+        mt, LocalizedState::getNonLocalized());
+    C.addTransition(State);
+  }
+}
+
+/// Reports a localization error for the passed in method call and SVal
+void NonLocalizedStringChecker::reportLocalizationError(
+    SVal S, const ObjCMethodCall &M, CheckerContext &C,
+    int argumentNumber) const {
+
+  ExplodedNode *ErrNode = C.getPredecessor();
+  static CheckerProgramPointTag Tag("NonLocalizedStringChecker",
+                                    "UnlocalizedString");
+  ErrNode = C.addTransition(C.getState(), C.getPredecessor(), &Tag);
+
+  if (!ErrNode)
+    return;
+
+  // Generate the bug report.
+  std::unique_ptr<BugReport> R(new BugReport(
+      *BT, "User-facing text should use localized string macro", ErrNode));
+  if (argumentNumber) {
+    R->addRange(M.getArgExpr(argumentNumber - 1)->getSourceRange());
+  } else {
+    R->addRange(M.getSourceRange());
+  }
+  R->markInteresting(S);
+  C.emitReport(std::move(R));
+}
+
+/// Returns the argument number requiring localized string if it exists
+/// otherwise, returns -1
+int NonLocalizedStringChecker::getLocalizedArgumentForSelector(
+    const IdentifierInfo *Receiver, Selector S) const {
+  auto method = UIMethods.find(Receiver);
+
+  if (method == UIMethods.end())
+    return -1;
+
+  auto argumentIterator = method->getSecond().find(S);
+
+  if (argumentIterator == method->getSecond().end())
+    return -1;
+
+  int argumentNumber = argumentIterator->getSecond();
+  return argumentNumber;
+}
+
+/// Check if the string being passed in has NonLocalized state
+void NonLocalizedStringChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
+                                                    CheckerContext &C) const {
+  initUIMethods(C.getASTContext());
+
+  const ObjCInterfaceDecl *OD = msg.getReceiverInterface();
+  if (!OD)
+    return;
+  const IdentifierInfo *odInfo = OD->getIdentifier();
+
+  Selector S = msg.getSelector();
+
+  std::string SelectorString = S.getAsString();
+  StringRef SelectorName = SelectorString;
+  assert(!SelectorName.empty());
+
+  if (odInfo->isStr("NSString")) {
+    // Handle the case where the receiver is an NSString
+    // These special NSString methods draw to the screen
+
+    if (!(SelectorName.startswith("drawAtPoint") ||
+          SelectorName.startswith("drawInRect") ||
+          SelectorName.startswith("drawWithRect")))
+      return;
+
+    SVal svTitle = msg.getReceiverSVal();
+
+    bool isNonLocalized = hasNonLocalizedState(svTitle, C);
+
+    if (isNonLocalized) {
+      reportLocalizationError(svTitle, msg, C);
+    }
+  }
+
+  int argumentNumber = getLocalizedArgumentForSelector(odInfo, S);
+  // Go up each hierarchy of superclasses and their protocols
+  while (argumentNumber < 0 && OD->getSuperClass() != nullptr) {
+    for (const auto *P : OD->all_referenced_protocols()) {
+      argumentNumber = getLocalizedArgumentForSelector(P->getIdentifier(), S);
+      if (argumentNumber >= 0)
+        break;
+    }
+    if (argumentNumber < 0) {
+      OD = OD->getSuperClass();
+      argumentNumber = getLocalizedArgumentForSelector(OD->getIdentifier(), S);
+    }
+  }
+
+  if (argumentNumber < 0) // There was no match in UIMethods
+    return;
+
+  SVal svTitle = msg.getArgSVal(argumentNumber);
+
+  if (const ObjCStringRegion *SR =
+          dyn_cast_or_null<ObjCStringRegion>(svTitle.getAsRegion())) {
+    StringRef stringValue =
+        SR->getObjCStringLiteral()->getString()->getString();
+    if ((stringValue.trim().size() == 0 && stringValue.size() > 0) ||
+        stringValue.empty())
+      return;
+    if (!IsAggressive && llvm::sys::unicode::columnWidthUTF8(stringValue) < 2)
+      return;
+  }
+
+  bool isNonLocalized = hasNonLocalizedState(svTitle, C);
+
+  if (isNonLocalized) {
+    reportLocalizationError(svTitle, msg, C, argumentNumber + 1);
+  }
+}
+
+static inline bool isNSStringType(QualType T, ASTContext &Ctx) {
+
+  const ObjCObjectPointerType *PT = T->getAs<ObjCObjectPointerType>();
+  if (!PT)
+    return false;
+
+  ObjCInterfaceDecl *Cls = PT->getObjectType()->getInterface();
+  if (!Cls)
+    return false;
+
+  IdentifierInfo *ClsName = Cls->getIdentifier();
+
+  // FIXME: Should we walk the chain of classes?
+  return ClsName == &Ctx.Idents.get("NSString") ||
+         ClsName == &Ctx.Idents.get("NSMutableString");
+}
+
+/// Marks a string being returned by any call as localized
+/// if it is in LocStringFunctions (LSF) or the function is annotated.
+/// Otherwise, we mark it as NonLocalized (Aggressive) or
+/// NonLocalized only if it is not backed by a SymRegion (Non-Aggressive),
+/// basically leaving only string literals as NonLocalized.
+void NonLocalizedStringChecker::checkPostCall(const CallEvent &Call,
+                                              CheckerContext &C) const {
+  initLocStringsMethods(C.getASTContext());
+
+  if (!Call.getOriginExpr())
+    return;
+
+  // Anything that takes in a localized NSString as an argument
+  // and returns an NSString will be assumed to be returning a
+  // localized NSString. (Counter: Incorrectly combining two LocalizedStrings)
+  const QualType RT = Call.getResultType();
+  if (isNSStringType(RT, C.getASTContext())) {
+    for (unsigned i = 0; i < Call.getNumArgs(); ++i) {
+      SVal argValue = Call.getArgSVal(i);
+      if (hasLocalizedState(argValue, C)) {
+        SVal sv = Call.getReturnValue();
+        setLocalizedState(sv, C);
+        return;
+      }
+    }
+  }
+
+  const Decl *D = Call.getDecl();
+  if (!D)
+    return;
+
+  const IdentifierInfo *Identifier = Call.getCalleeIdentifier();
+
+  SVal sv = Call.getReturnValue();
+  if (isAnnotatedAsLocalized(D) || LSF.count(Identifier) != 0) {
+    setLocalizedState(sv, C);
+  } else if (isNSStringType(RT, C.getASTContext()) &&
+             !hasLocalizedState(sv, C)) {
+    if (IsAggressive) {
+      setNonLocalizedState(sv, C);
+    } else {
+      const SymbolicRegion *SymReg =
+          dyn_cast_or_null<SymbolicRegion>(sv.getAsRegion());
+      if (!SymReg)
+        setNonLocalizedState(sv, C);
+    }
+  }
+}
+
+/// Marks a string being returned by an ObjC method as localized
+/// if it is in LocStringMethods or the method is annotated
+void NonLocalizedStringChecker::checkPostObjCMessage(const ObjCMethodCall &msg,
+                                                     CheckerContext &C) const {
+  initLocStringsMethods(C.getASTContext());
+
+  if (!msg.isInstanceMessage())
+    return;
+
+  const ObjCInterfaceDecl *OD = msg.getReceiverInterface();
+  if (!OD)
+    return;
+  const IdentifierInfo *odInfo = OD->getIdentifier();
+
+  Selector S = msg.getSelector();
+  std::string SelectorName = S.getAsString();
+
+  std::pair<const IdentifierInfo *, Selector> MethodDescription = {odInfo, S};
+
+  if (LSM.count(MethodDescription) || isAnnotatedAsLocalized(msg.getDecl())) {
+    SVal sv = msg.getReturnValue();
+    setLocalizedState(sv, C);
+  }
+}
+
+/// Marks all empty string literals as localized
+void NonLocalizedStringChecker::checkPostStmt(const ObjCStringLiteral *SL,
+                                              CheckerContext &C) const {
+  SVal sv = C.getSVal(SL);
+  setNonLocalizedState(sv, C);
+}
+
+namespace {
+class EmptyLocalizationContextChecker
+    : public Checker<check::ASTDecl<ObjCImplementationDecl>> {
+
+  // A helper class, which walks the AST
+  class MethodCrawler : public ConstStmtVisitor<MethodCrawler> {
+    const ObjCMethodDecl *MD;
+    BugReporter &BR;
+    AnalysisManager &Mgr;
+    const CheckerBase *Checker;
+    LocationOrAnalysisDeclContext DCtx;
+
+  public:
+    MethodCrawler(const ObjCMethodDecl *InMD, BugReporter &InBR,
+                  const CheckerBase *Checker, AnalysisManager &InMgr,
+                  AnalysisDeclContext *InDCtx)
+        : MD(InMD), BR(InBR), Mgr(InMgr), Checker(Checker), DCtx(InDCtx) {}
+
+    void VisitStmt(const Stmt *S) { VisitChildren(S); }
+
+    void VisitObjCMessageExpr(const ObjCMessageExpr *ME);
+
+    void reportEmptyContextError(const ObjCMessageExpr *M) const;
+
+    void VisitChildren(const Stmt *S) {
+      for (const Stmt *Child : S->children()) {
+        if (Child)
+          this->Visit(Child);
+      }
+    }
+  };
+
+public:
+  void checkASTDecl(const ObjCImplementationDecl *D, AnalysisManager &Mgr,
+                    BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void EmptyLocalizationContextChecker::checkASTDecl(
+    const ObjCImplementationDecl *D, AnalysisManager &Mgr,
+    BugReporter &BR) const {
+
+  for (const ObjCMethodDecl *M : D->methods()) {
+    AnalysisDeclContext *DCtx = Mgr.getAnalysisDeclContext(M);
+
+    const Stmt *Body = M->getBody();
+    assert(Body);
+
+    MethodCrawler MC(M->getCanonicalDecl(), BR, this, Mgr, DCtx);
+    MC.VisitStmt(Body);
+  }
+}
+
+/// This check attempts to match these macros, assuming they are defined as
+/// follows:
+///
+/// #define NSLocalizedString(key, comment) \
+/// [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:nil]
+/// #define NSLocalizedStringFromTable(key, tbl, comment) \
+/// [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:(tbl)]
+/// #define NSLocalizedStringFromTableInBundle(key, tbl, bundle, comment) \
+/// [bundle localizedStringForKey:(key) value:@"" table:(tbl)]
+/// #define NSLocalizedStringWithDefaultValue(key, tbl, bundle, val, comment)
+///
+/// We cannot use the path sensitive check because the macro argument we are
+/// checking for (comment) is not used and thus not present in the AST,
+/// so we use Lexer on the original macro call and retrieve the value of
+/// the comment. If it's empty or nil, we raise a warning.
+void EmptyLocalizationContextChecker::MethodCrawler::VisitObjCMessageExpr(
+    const ObjCMessageExpr *ME) {
+
+  const ObjCInterfaceDecl *OD = ME->getReceiverInterface();
+  if (!OD)
+    return;
+
+  const IdentifierInfo *odInfo = OD->getIdentifier();
+
+  if (!(odInfo->isStr("NSBundle") &&
+        ME->getSelector().getAsString() ==
+            "localizedStringForKey:value:table:")) {
+    return;
+  }
+
+  SourceRange R = ME->getSourceRange();
+  if (!R.getBegin().isMacroID())
+    return;
+
+  // getImmediateMacroCallerLoc gets the location of the immediate macro
+  // caller, one level up the stack toward the initial macro typed into the
+  // source, so SL should point to the NSLocalizedString macro.
+  SourceLocation SL =
+      Mgr.getSourceManager().getImmediateMacroCallerLoc(R.getBegin());
+  std::pair<FileID, unsigned> SLInfo =
+      Mgr.getSourceManager().getDecomposedLoc(SL);
+
+  SrcMgr::SLocEntry SE = Mgr.getSourceManager().getSLocEntry(SLInfo.first);
+
+  // If NSLocalizedString macro is wrapped in another macro, we need to
+  // unwrap the expansion until we get to the NSLocalizedStringMacro.
+  while (SE.isExpansion()) {
+    SL = SE.getExpansion().getSpellingLoc();
+    SLInfo = Mgr.getSourceManager().getDecomposedLoc(SL);
+    SE = Mgr.getSourceManager().getSLocEntry(SLInfo.first);
+  }
+
+  llvm::MemoryBuffer *BF = SE.getFile().getContentCache()->getRawBuffer();
+  Lexer TheLexer(SL, LangOptions(), BF->getBufferStart(),
+                 BF->getBufferStart() + SLInfo.second, BF->getBufferEnd());
+
+  Token I;
+  Token Result;    // This will hold the token just before the last ')'
+  int p_count = 0; // This is for parenthesis matching
+  while (!TheLexer.LexFromRawLexer(I)) {
+    if (I.getKind() == tok::l_paren)
+      ++p_count;
+    if (I.getKind() == tok::r_paren) {
+      if (p_count == 1)
+        break;
+      --p_count;
+    }
+    Result = I;
+  }
+
+  if (isAnyIdentifier(Result.getKind())) {
+    if (Result.getRawIdentifier().equals("nil")) {
+      reportEmptyContextError(ME);
+      return;
+    }
+  }
+
+  if (!isStringLiteral(Result.getKind()))
+    return;
+
+  StringRef Comment =
+      StringRef(Result.getLiteralData(), Result.getLength()).trim("\"");
+
+  if ((Comment.trim().size() == 0 && Comment.size() > 0) || // Is Whitespace
+      Comment.empty()) {
+    reportEmptyContextError(ME);
+  }
+}
+
+void EmptyLocalizationContextChecker::MethodCrawler::reportEmptyContextError(
+    const ObjCMessageExpr *ME) const {
+  // Generate the bug report.
+  BR.EmitBasicReport(MD, Checker, "Context Missing",
+                     "Localizability Issue (Apple)",
+                     "Localized string macro should include a non-empty "
+                     "comment for translators",
+                     PathDiagnosticLocation(ME, BR.getSourceManager(), DCtx));
+}
+
+namespace {
+class PluralMisuseChecker : public Checker<check::ASTCodeBody> {
+
+  // A helper class, which walks the AST
+  class MethodCrawler : public RecursiveASTVisitor<MethodCrawler> {
+    BugReporter &BR;
+    const CheckerBase *Checker;
+    AnalysisDeclContext *AC;
+
+    // This functions like a stack. We push on any IfStmt or
+    // ConditionalOperator that matches the condition
+    // and pop it off when we leave that statement
+    llvm::SmallVector<const clang::Stmt *, 8> MatchingStatements;
+    // This is true when we are the direct-child of a
+    // matching statement
+    bool InMatchingStatement = false;
+
+  public:
+    explicit MethodCrawler(BugReporter &InBR, const CheckerBase *Checker,
+                           AnalysisDeclContext *InAC)
+        : BR(InBR), Checker(Checker), AC(InAC) {}
+
+    bool VisitIfStmt(const IfStmt *I);
+    bool EndVisitIfStmt(IfStmt *I);
+    bool TraverseIfStmt(IfStmt *x);
+    bool VisitConditionalOperator(const ConditionalOperator *C);
+    bool TraverseConditionalOperator(ConditionalOperator *C);
+    bool VisitCallExpr(const CallExpr *CE);
+    bool VisitObjCMessageExpr(const ObjCMessageExpr *ME);
+
+  private:
+    void reportPluralMisuseError(const Stmt *S) const;
+    bool isCheckingPlurality(const Expr *E) const;
+  };
+
+public:
+  void checkASTCodeBody(const Decl *D, AnalysisManager &Mgr,
+                        BugReporter &BR) const {
+    MethodCrawler Visitor(BR, this, Mgr.getAnalysisDeclContext(D));
+    Visitor.TraverseDecl(const_cast<Decl *>(D));
+  }
+};
+} // end anonymous namespace
+
+// Checks the condition of the IfStmt and returns true if one
+// of the following heuristics are met:
+// 1) The conidtion is a variable with "singular" or "plural" in the name
+// 2) The condition is a binary operator with 1 or 2 on the right-hand side
+bool PluralMisuseChecker::MethodCrawler::isCheckingPlurality(
+    const Expr *Condition) const {
+  const BinaryOperator *BO = nullptr;
+  // Accounts for when a VarDecl represents a BinaryOperator
+  if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(Condition)) {
+    if (const VarDecl *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+      const Expr *InitExpr = VD->getInit();
+      if (InitExpr) {
+        if (const BinaryOperator *B =
+                dyn_cast<BinaryOperator>(InitExpr->IgnoreParenImpCasts())) {
+          BO = B;
+        }
+      }
+      if (VD->getName().lower().find("plural") != StringRef::npos ||
+          VD->getName().lower().find("singular") != StringRef::npos) {
+        return true;
+      }
+    }
+  } else if (const BinaryOperator *B = dyn_cast<BinaryOperator>(Condition)) {
+    BO = B;
+  }
+
+  if (BO == nullptr)
+    return false;
+
+  if (IntegerLiteral *IL = dyn_cast_or_null<IntegerLiteral>(
+          BO->getRHS()->IgnoreParenImpCasts())) {
+    llvm::APInt Value = IL->getValue();
+    if (Value == 1 || Value == 2) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// A CallExpr with "LOC" in its identifier that takes in a string literal
+// has been shown to almost always be a function that returns a localized
+// string. Raise a diagnostic when this is in a statement that matches
+// the condition.
+bool PluralMisuseChecker::MethodCrawler::VisitCallExpr(const CallExpr *CE) {
+  if (InMatchingStatement) {
+    if (const FunctionDecl *FD = CE->getDirectCallee()) {
+      std::string NormalizedName =
+          StringRef(FD->getNameInfo().getAsString()).lower();
+      if (NormalizedName.find("loc") != std::string::npos) {
+        for (const Expr *Arg : CE->arguments()) {
+          if (isa<ObjCStringLiteral>(Arg))
+            reportPluralMisuseError(CE);
+        }
+      }
+    }
+  }
+  return true;
+}
+
+// The other case is for NSLocalizedString which also returns
+// a localized string. It's a macro for the ObjCMessageExpr
+// [NSBundle localizedStringForKey:value:table:] Raise a
+// diagnostic when this is in a statement that matches
+// the condition.
+bool PluralMisuseChecker::MethodCrawler::VisitObjCMessageExpr(
+    const ObjCMessageExpr *ME) {
+  const ObjCInterfaceDecl *OD = ME->getReceiverInterface();
+  if (!OD)
+    return true;
+
+  const IdentifierInfo *odInfo = OD->getIdentifier();
+
+  if (odInfo->isStr("NSBundle") &&
+      ME->getSelector().getAsString() == "localizedStringForKey:value:table:") {
+    if (InMatchingStatement) {
+      reportPluralMisuseError(ME);
+    }
+  }
+  return true;
+}
+
+/// Override TraverseIfStmt so we know when we are done traversing an IfStmt
+bool PluralMisuseChecker::MethodCrawler::TraverseIfStmt(IfStmt *I) {
+  RecursiveASTVisitor<MethodCrawler>::TraverseIfStmt(I);
+  return EndVisitIfStmt(I);
+}
+
+// EndVisit callbacks are not provided by the RecursiveASTVisitor
+// so we override TraverseIfStmt and make a call to EndVisitIfStmt
+// after traversing the IfStmt
+bool PluralMisuseChecker::MethodCrawler::EndVisitIfStmt(IfStmt *I) {
+  MatchingStatements.pop_back();
+  if (!MatchingStatements.empty()) {
+    if (MatchingStatements.back() != nullptr) {
+      InMatchingStatement = true;
+      return true;
+    }
+  }
+  InMatchingStatement = false;
+  return true;
+}
+
+bool PluralMisuseChecker::MethodCrawler::VisitIfStmt(const IfStmt *I) {
+  const Expr *Condition = I->getCond()->IgnoreParenImpCasts();
+  if (isCheckingPlurality(Condition)) {
+    MatchingStatements.push_back(I);
+    InMatchingStatement = true;
+  } else {
+    MatchingStatements.push_back(nullptr);
+    InMatchingStatement = false;
+  }
+
+  return true;
+}
+
+// Preliminary support for conditional operators.
+bool PluralMisuseChecker::MethodCrawler::TraverseConditionalOperator(
+    ConditionalOperator *C) {
+  RecursiveASTVisitor<MethodCrawler>::TraverseConditionalOperator(C);
+  MatchingStatements.pop_back();
+  if (!MatchingStatements.empty()) {
+    if (MatchingStatements.back() != nullptr)
+      InMatchingStatement = true;
+    else
+      InMatchingStatement = false;
+  } else {
+    InMatchingStatement = false;
+  }
+  return true;
+}
+
+bool PluralMisuseChecker::MethodCrawler::VisitConditionalOperator(
+    const ConditionalOperator *C) {
+  const Expr *Condition = C->getCond()->IgnoreParenImpCasts();
+  if (isCheckingPlurality(Condition)) {
+    MatchingStatements.push_back(C);
+    InMatchingStatement = true;
+  } else {
+    MatchingStatements.push_back(nullptr);
+    InMatchingStatement = false;
+  }
+  return true;
+}
+
+void PluralMisuseChecker::MethodCrawler::reportPluralMisuseError(
+    const Stmt *S) const {
+  // Generate the bug report.
+  BR.EmitBasicReport(AC->getDecl(), Checker, "Plural Misuse",
+                     "Localizability Issue (Apple)",
+                     "Plural cases are not supported accross all languages. "
+                     "Use a .stringsdict file instead",
+                     PathDiagnosticLocation(S, BR.getSourceManager(), AC));
+}
+
+//===----------------------------------------------------------------------===//
+// Checker registration.
+//===----------------------------------------------------------------------===//
+
+void ento::registerNonLocalizedStringChecker(CheckerManager &mgr) {
+  NonLocalizedStringChecker *checker =
+      mgr.registerChecker<NonLocalizedStringChecker>();
+  checker->IsAggressive =
+      mgr.getAnalyzerOptions().getBooleanOption("AggressiveReport", false);
+}
+
+void ento::registerEmptyLocalizationContextChecker(CheckerManager &mgr) {
+  mgr.registerChecker<EmptyLocalizationContextChecker>();
+}
+
+void ento::registerPluralMisuseChecker(CheckerManager &mgr) {
+  mgr.registerChecker<PluralMisuseChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index 783890135ea3..1e56d709e4f9 100644
--- a/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -118,7 +118,7 @@ private:
                                    SValBuilder &Builder) const {
     return definitelyReturnedError(RetSym, State, Builder, true);
   }
-                                                 
+
   /// Mark an AllocationPair interesting for diagnostic reporting.
   void markInteresting(BugReport *R, const AllocationPair &AP) const {
     R->markInteresting(AP.first);
@@ -136,7 +136,6 @@ private:
 
   public:
     SecKeychainBugVisitor(SymbolRef S) : Sym(S) {}
-    ~SecKeychainBugVisitor() override {}
 
     void Profile(llvm::FoldingSetNodeID &ID) const override {
       static int X = 0;
@@ -202,12 +201,8 @@ unsigned MacOSKeychainAPIChecker::getTrackedFunctionIndex(StringRef Name,
 static bool isBadDeallocationArgument(const MemRegion *Arg) {
   if (!Arg)
     return false;
-  if (isa<AllocaRegion>(Arg) ||
-      isa<BlockDataRegion>(Arg) ||
-      isa<TypedRegion>(Arg)) {
-    return true;
-  }
-  return false;
+  return isa<AllocaRegion>(Arg) || isa<BlockDataRegion>(Arg) ||
+         isa<TypedRegion>(Arg);
 }
 
 /// Given the address expression, retrieve the value it's pointing to. Assume
@@ -241,11 +236,7 @@ bool MacOSKeychainAPIChecker::definitelyReturnedError(SymbolRef RetSym,
   DefinedOrUnknownSVal NoErr = Builder.evalEQ(State, NoErrVal,
                                                      nonloc::SymbolVal(RetSym));
   ProgramStateRef ErrState = State->assume(NoErr, noError);
-  if (ErrState == State) {
-    return true;
-  }
-
-  return false;
+  return ErrState == State;
 }
 
 // Report deallocator mismatch. Remove the region from tracking - reporting a
@@ -256,7 +247,7 @@ void MacOSKeychainAPIChecker::
                                     CheckerContext &C) const {
   ProgramStateRef State = C.getState();
   State = State->remove<AllocatedData>(AP.first);
-  ExplodedNode *N = C.addTransition(State);
+  ExplodedNode *N = C.generateNonFatalErrorNode(State);
 
   if (!N)
     return;
@@ -283,7 +274,7 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
   const FunctionDecl *FD = C.getCalleeDecl(CE);
   if (!FD || FD->getKind() != Decl::Function)
     return;
-  
+
   StringRef funName = C.getCalleeName(FD);
   if (funName.empty())
     return;
@@ -302,7 +293,7 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
           // Remove the value from the state. The new symbol will be added for
           // tracking when the second allocator is processed in checkPostStmt().
           State = State->remove<AllocatedData>(V);
-          ExplodedNode *N = C.addTransition(State);
+          ExplodedNode *N = C.generateNonFatalErrorNode(State);
           if (!N)
             return;
           initBugType();
@@ -365,7 +356,7 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
     if (isEnclosingFunctionParam(ArgExpr))
       return;
 
-    ExplodedNode *N = C.addTransition(State);
+    ExplodedNode *N = C.generateNonFatalErrorNode(State);
     if (!N)
       return;
     initBugType();
@@ -431,7 +422,7 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE,
   // report a bad call to free.
   if (State->assume(ArgSVal.castAs<DefinedSVal>(), false) &&
       !definitelyDidnotReturnError(AS->Region, State, C.getSValBuilder())) {
-    ExplodedNode *N = C.addTransition(State);
+    ExplodedNode *N = C.generateNonFatalErrorNode(State);
     if (!N)
       return;
     initBugType();
@@ -585,10 +576,12 @@ void MacOSKeychainAPIChecker::checkDeadSymbols(SymbolReaper &SR,
   }
 
   static CheckerProgramPointTag Tag(this, "DeadSymbolsLeak");
-  ExplodedNode *N = C.addTransition(C.getState(), C.getPredecessor(), &Tag);
+  ExplodedNode *N = C.generateNonFatalErrorNode(C.getState(), &Tag);
+  if (!N)
+    return;
 
   // Generate the error reports.
-  for (const auto P : Errors)
+  for (const auto &P : Errors)
     C.emitReport(generateAllocatedDataNotReleasedReport(P, N, C));
 
   // Generate the new, cleaned up state.
diff --git a/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
index 11ba6096e2dc..4cbe97b26075 100644
--- a/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MacOSXAPIChecker.cpp
@@ -62,7 +62,7 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
   if (!R || !isa<StackSpaceRegion>(R->getMemorySpace()))
     return;
 
-  ExplodedNode *N = C.generateSink(state);
+  ExplodedNode *N = C.generateErrorNode(state);
   if (!N)
     return;
 
@@ -79,7 +79,7 @@ void MacOSXAPIChecker::CheckDispatchOnce(CheckerContext &C, const CallExpr *CE,
     if (TrimmedFName != FName)
       FName = TrimmedFName;
   }
-  
+
   SmallString<256> S;
   llvm::raw_svector_ostream os(S);
   os << "Call to '" << FName << "' uses";
diff --git a/lib/StaticAnalyzer/Checkers/Makefile b/lib/StaticAnalyzer/Checkers/Makefile
index 2582908b95d0..7c8f7bf1bc4e 100644
--- a/lib/StaticAnalyzer/Checkers/Makefile
+++ b/lib/StaticAnalyzer/Checkers/Makefile
@@ -1,13 +1,13 @@
 ##===- clang/lib/Checker/Makefile --------------------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 #
-# This implements analyses built on top of source-level CFGs. 
+# This implements analyses built on top of source-level CFGs.
 #
 ##===----------------------------------------------------------------------===##
 
diff --git a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
index a9e08653b241..713d9fe285a5 100644
--- a/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -65,10 +65,10 @@ class RefState {
 
   const Stmt *S;
   unsigned K : 3; // Kind enum, but stored as a bitfield.
-  unsigned Family : 29; // Rest of 32-bit word, currently just an allocation 
+  unsigned Family : 29; // Rest of 32-bit word, currently just an allocation
                         // family.
 
-  RefState(Kind k, const Stmt *s, unsigned family) 
+  RefState(Kind k, const Stmt *s, unsigned family)
     : S(s), K(k), Family(family) {
     assert(family != AF_None);
   }
@@ -94,7 +94,7 @@ public:
     return RefState(AllocatedOfSizeZero, RS->getStmt(),
                     RS->getAllocationFamily());
   }
-  static RefState getReleased(unsigned family, const Stmt *s) { 
+  static RefState getReleased(unsigned family, const Stmt *s) {
     return RefState(Released, s, family);
   }
   static RefState getRelinquished(unsigned family, const Stmt *s) {
@@ -169,9 +169,9 @@ class MallocChecker : public Checker<check::DeadSymbols,
 {
 public:
   MallocChecker()
-      : II_alloca(nullptr), II_malloc(nullptr), II_free(nullptr), 
+      : II_alloca(nullptr), II_malloc(nullptr), II_free(nullptr),
         II_realloc(nullptr), II_calloc(nullptr), II_valloc(nullptr),
-        II_reallocf(nullptr), II_strndup(nullptr), II_strdup(nullptr), 
+        II_reallocf(nullptr), II_strndup(nullptr), II_strdup(nullptr),
         II_kmalloc(nullptr), II_if_nameindex(nullptr),
         II_if_freenameindex(nullptr) {}
 
@@ -185,7 +185,7 @@ public:
     CK_NumCheckKinds
   };
 
-  enum class MemoryOperationKind { 
+  enum class MemoryOperationKind {
     MOK_Allocate,
     MOK_Free,
     MOK_Any
@@ -245,19 +245,19 @@ private:
   /// \brief Print names of allocators and deallocators.
   ///
   /// \returns true on success.
-  bool printAllocDeallocName(raw_ostream &os, CheckerContext &C, 
+  bool printAllocDeallocName(raw_ostream &os, CheckerContext &C,
                              const Expr *E) const;
 
   /// \brief Print expected name of an allocator based on the deallocator's
   /// family derived from the DeallocExpr.
-  void printExpectedAllocName(raw_ostream &os, CheckerContext &C, 
+  void printExpectedAllocName(raw_ostream &os, CheckerContext &C,
                               const Expr *DeallocExpr) const;
-  /// \brief Print expected name of a deallocator based on the allocator's 
+  /// \brief Print expected name of a deallocator based on the allocator's
   /// family.
   void printExpectedDeallocName(raw_ostream &os, AllocationFamily Family) const;
 
   ///@{
-  /// Check if this is one of the functions which can allocate/reallocate memory 
+  /// Check if this is one of the functions which can allocate/reallocate memory
   /// pointed to by one of its arguments.
   bool isMemFunction(const FunctionDecl *FD, ASTContext &C) const;
   bool isCMemFunction(const FunctionDecl *FD,
@@ -292,7 +292,7 @@ private:
                       const ProgramStateRef &State) const;
 
   /// Update the RefState to reflect the new memory allocation.
-  static ProgramStateRef 
+  static ProgramStateRef
   MallocUpdateRefState(CheckerContext &C, const Expr *E, ProgramStateRef State,
                        AllocationFamily Family = AF_Malloc);
 
@@ -312,17 +312,17 @@ private:
                              bool ReturnsNullOnFailure = false) const;
 
   ProgramStateRef ReallocMem(CheckerContext &C, const CallExpr *CE,
-                             bool FreesMemOnFailure, 
+                             bool FreesMemOnFailure,
                              ProgramStateRef State) const;
   static ProgramStateRef CallocMem(CheckerContext &C, const CallExpr *CE,
                                    ProgramStateRef State);
-  
+
   ///\brief Check if the memory associated with this symbol was released.
   bool isReleased(SymbolRef Sym, CheckerContext &C) const;
 
   bool checkUseAfterFree(SymbolRef Sym, CheckerContext &C, const Stmt *S) const;
 
-  void checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C, 
+  void checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C,
                              const Stmt *S) const;
 
   bool checkDoubleDelete(SymbolRef Sym, CheckerContext &C) const;
@@ -330,7 +330,7 @@ private:
   /// Check if the function is known free memory, or if it is
   /// "interesting" and should be modeled explicitly.
   ///
-  /// \param [out] EscapingSymbol A function might not free memory in general, 
+  /// \param [out] EscapingSymbol A function might not free memory in general,
   ///   but could be known to free a particular symbol. In this case, false is
   ///   returned and the single escaping symbol is returned through the out
   ///   parameter.
@@ -357,20 +357,20 @@ private:
   Optional<CheckKind> getCheckIfTracked(CheckerContext &C,
                                         const Stmt *AllocDeallocStmt,
                                         bool IsALeakCheck = false) const;
-  Optional<CheckKind> getCheckIfTracked(CheckerContext &C, SymbolRef Sym, 
+  Optional<CheckKind> getCheckIfTracked(CheckerContext &C, SymbolRef Sym,
                                         bool IsALeakCheck = false) const;
   ///@}
   static bool SummarizeValue(raw_ostream &os, SVal V);
   static bool SummarizeRegion(raw_ostream &os, const MemRegion *MR);
-  void ReportBadFree(CheckerContext &C, SVal ArgVal, SourceRange Range, 
+  void ReportBadFree(CheckerContext &C, SVal ArgVal, SourceRange Range,
                      const Expr *DeallocExpr) const;
   void ReportFreeAlloca(CheckerContext &C, SVal ArgVal,
                         SourceRange Range) const;
   void ReportMismatchedDealloc(CheckerContext &C, SourceRange Range,
                                const Expr *DeallocExpr, const RefState *RS,
                                SymbolRef Sym, bool OwnershipTransferred) const;
-  void ReportOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range, 
-                        const Expr *DeallocExpr, 
+  void ReportOffsetFree(CheckerContext &C, SVal ArgVal, SourceRange Range,
+                        const Expr *DeallocExpr,
                         const Expr *AllocExpr = nullptr) const;
   void ReportUseAfterFree(CheckerContext &C, SourceRange Range,
                           SymbolRef Sym) const;
@@ -392,7 +392,8 @@ private:
   /// The bug visitor which allows us to print extra diagnostics along the
   /// BugReport path. For example, showing the allocation site of the leaked
   /// region.
-  class MallocBugVisitor : public BugReporterVisitorImpl<MallocBugVisitor> {
+  class MallocBugVisitor final
+      : public BugReporterVisitorImpl<MallocBugVisitor> {
   protected:
     enum NotificationMode {
       Normal,
@@ -414,8 +415,6 @@ private:
     MallocBugVisitor(SymbolRef S, bool isLeak = false)
        : Sym(S), Mode(Normal), FailedReallocSymbol(nullptr), IsLeak(isLeak) {}
 
-    ~MallocBugVisitor() override {}
-
     void Profile(llvm::FoldingSetNodeID &ID) const override {
       static int X = 0;
       ID.AddPointer(&X);
@@ -426,8 +425,8 @@ private:
                             const Stmt *Stmt) {
       // Did not track -> allocated. Other state (released) -> allocated.
       return (Stmt && (isa<CallExpr>(Stmt) || isa<CXXNewExpr>(Stmt)) &&
-              (S && (S->isAllocated() || S->isAllocatedOfSizeZero())) && 
-              (!SPrev || !(SPrev->isAllocated() || 
+              (S && (S->isAllocated() || S->isAllocatedOfSizeZero())) &&
+              (!SPrev || !(SPrev->isAllocated() ||
                            SPrev->isAllocatedOfSizeZero())));
     }
 
@@ -509,13 +508,14 @@ private:
 
 REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, SymbolRef, RefState)
 REGISTER_MAP_WITH_PROGRAMSTATE(ReallocPairs, SymbolRef, ReallocPair)
+REGISTER_SET_WITH_PROGRAMSTATE(ReallocSizeZeroSymbols, SymbolRef)
 
-// A map from the freed symbol to the symbol representing the return value of 
+// A map from the freed symbol to the symbol representing the return value of
 // the free function.
 REGISTER_MAP_WITH_PROGRAMSTATE(FreeReturnValue, SymbolRef, SymbolRef)
 
 namespace {
-class StopTrackingCallback : public SymbolVisitor {
+class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
   StopTrackingCallback(ProgramStateRef st) : state(st) {}
@@ -634,7 +634,7 @@ bool MallocChecker::isStandardNewDelete(const FunctionDecl *FD,
     return false;
 
   OverloadedOperatorKind Kind = FD->getOverloadedOperator();
-  if (Kind != OO_New && Kind != OO_Array_New && 
+  if (Kind != OO_New && Kind != OO_Array_New &&
       Kind != OO_Delete && Kind != OO_Array_Delete)
     return false;
 
@@ -799,8 +799,8 @@ void MallocChecker::checkPostStmt(const CallExpr *CE, CheckerContext &C) const {
       State = ProcessZeroAllocation(C, CE, 0, State);
     } else if (isStandardNewDelete(FD, C.getASTContext())) {
       // Process direct calls to operator new/new[]/delete/delete[] functions
-      // as distinct from new/new[]/delete/delete[] expressions that are 
-      // processed by the checkPostStmt callbacks for CXXNewExpr and 
+      // as distinct from new/new[]/delete/delete[] expressions that are
+      // processed by the checkPostStmt callbacks for CXXNewExpr and
       // CXXDeleteExpr.
       OverloadedOperatorKind K = FD->getOverloadedOperator();
       if (K == OO_New) {
@@ -870,7 +870,7 @@ ProgramStateRef MallocChecker::ProcessZeroAllocation(CheckerContext &C,
 
   assert(Arg);
 
-  Optional<DefinedSVal> DefArgVal = 
+  Optional<DefinedSVal> DefArgVal =
       State->getSVal(Arg, C.getLocationContext()).getAs<DefinedSVal>();
 
   if (!DefArgVal)
@@ -882,7 +882,7 @@ ProgramStateRef MallocChecker::ProcessZeroAllocation(CheckerContext &C,
   DefinedSVal Zero =
       SvalBuilder.makeZeroVal(Arg->getType()).castAs<DefinedSVal>();
 
-  std::tie(TrueState, FalseState) = 
+  std::tie(TrueState, FalseState) =
       State->assume(SvalBuilder.evalEQ(State, *DefArgVal, Zero));
 
   if (TrueState && !FalseState) {
@@ -892,15 +892,19 @@ ProgramStateRef MallocChecker::ProcessZeroAllocation(CheckerContext &C,
       return State;
 
     const RefState *RS = State->get<RegionState>(Sym);
-    if (!RS)
-      return State; // TODO: change to assert(RS); after realloc() will 
-                    // guarantee have a RegionState attached.
-
-    if (!RS->isAllocated())
-      return State;
-
-    return TrueState->set<RegionState>(Sym,
-                                       RefState::getAllocatedOfSizeZero(RS));
+    if (RS) {
+      if (RS->isAllocated())
+        return TrueState->set<RegionState>(Sym,
+                                          RefState::getAllocatedOfSizeZero(RS));
+      else
+        return State;
+    } else {
+      // Case of zero-size realloc. Historically 'realloc(ptr, 0)' is treated as
+      // 'free(ptr)' and the returned value from 'realloc(ptr, 0)' is not
+      // tracked. Add zero-reallocated Sym to the state to catch references
+      // to zero-allocated memory.
+      return TrueState->add<ReallocSizeZeroSymbols>(Sym);
+    }
   }
 
   // Assume the value is non-zero going forward.
@@ -944,7 +948,7 @@ static bool treatUnusedNewEscaped(const CXXNewExpr *NE) {
   return false;
 }
 
-void MallocChecker::checkPostStmt(const CXXNewExpr *NE, 
+void MallocChecker::checkPostStmt(const CXXNewExpr *NE,
                                   CheckerContext &C) const {
 
   if (NE->getNumPlacementArgs())
@@ -961,17 +965,17 @@ void MallocChecker::checkPostStmt(const CXXNewExpr *NE,
     return;
 
   ProgramStateRef State = C.getState();
-  // The return value from operator new is bound to a specified initialization 
-  // value (if any) and we don't want to loose this value. So we call 
-  // MallocUpdateRefState() instead of MallocMemAux() which breakes the 
+  // The return value from operator new is bound to a specified initialization
+  // value (if any) and we don't want to loose this value. So we call
+  // MallocUpdateRefState() instead of MallocMemAux() which breakes the
   // existing binding.
-  State = MallocUpdateRefState(C, NE, State, NE->isArray() ? AF_CXXNewArray 
+  State = MallocUpdateRefState(C, NE, State, NE->isArray() ? AF_CXXNewArray
                                                            : AF_CXXNew);
   State = ProcessZeroAllocation(C, NE, 0, State);
   C.addTransition(State);
 }
 
-void MallocChecker::checkPreStmt(const CXXDeleteExpr *DE, 
+void MallocChecker::checkPreStmt(const CXXDeleteExpr *DE,
                                  CheckerContext &C) const {
 
   if (!ChecksEnabled[CK_NewDeleteChecker])
@@ -996,12 +1000,9 @@ static bool isKnownDeallocObjCMethodName(const ObjCMethodCall &Call) {
   // Ex:  [NSData dataWithBytesNoCopy:bytes length:10];
   // (...unless a 'freeWhenDone' parameter is false, but that's checked later.)
   StringRef FirstSlot = Call.getSelector().getNameForSlot(0);
-  if (FirstSlot == "dataWithBytesNoCopy" ||
-      FirstSlot == "initWithBytesNoCopy" ||
-      FirstSlot == "initWithCharactersNoCopy")
-    return true;
-
-  return false;
+  return FirstSlot == "dataWithBytesNoCopy" ||
+         FirstSlot == "initWithBytesNoCopy" ||
+         FirstSlot == "initWithCharactersNoCopy";
 }
 
 static Optional<bool> getFreeWhenDoneArg(const ObjCMethodCall &Call) {
@@ -1038,7 +1039,7 @@ void MallocChecker::checkPostObjCMessage(const ObjCMethodCall &Call,
 
 ProgramStateRef
 MallocChecker::MallocMemReturnsAttr(CheckerContext &C, const CallExpr *CE,
-                                    const OwnershipAttr *Att, 
+                                    const OwnershipAttr *Att,
                                     ProgramStateRef State) const {
   if (!State)
     return nullptr;
@@ -1105,7 +1106,7 @@ ProgramStateRef MallocChecker::MallocMemAux(CheckerContext &C,
     State = State->assume(extentMatchesSize, true);
     assert(State);
   }
-  
+
   return MallocUpdateRefState(C, CE, State, Family);
 }
 
@@ -1132,7 +1133,7 @@ ProgramStateRef MallocChecker::MallocUpdateRefState(CheckerContext &C,
 
 ProgramStateRef MallocChecker::FreeMemAttr(CheckerContext &C,
                                            const CallExpr *CE,
-                                           const OwnershipAttr *Att, 
+                                           const OwnershipAttr *Att,
                                            ProgramStateRef State) const {
   if (!State)
     return nullptr;
@@ -1184,7 +1185,7 @@ static bool didPreviousFreeFail(ProgramStateRef State,
   return false;
 }
 
-AllocationFamily MallocChecker::getAllocationFamily(CheckerContext &C, 
+AllocationFamily MallocChecker::getAllocationFamily(CheckerContext &C,
                                                     const Stmt *S) const {
   if (!S)
     return AF_None;
@@ -1229,14 +1230,14 @@ AllocationFamily MallocChecker::getAllocationFamily(CheckerContext &C,
   return AF_None;
 }
 
-bool MallocChecker::printAllocDeallocName(raw_ostream &os, CheckerContext &C, 
+bool MallocChecker::printAllocDeallocName(raw_ostream &os, CheckerContext &C,
                                           const Expr *E) const {
   if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
     // FIXME: This doesn't handle indirect calls.
     const FunctionDecl *FD = CE->getDirectCallee();
     if (!FD)
       return false;
-    
+
     os << *FD;
     if (!FD->isOverloadedOperator())
       os << "()";
@@ -1253,14 +1254,14 @@ bool MallocChecker::printAllocDeallocName(raw_ostream &os, CheckerContext &C,
   }
 
   if (const CXXNewExpr *NE = dyn_cast<CXXNewExpr>(E)) {
-    os << "'" 
+    os << "'"
        << getOperatorSpelling(NE->getOperatorNew()->getOverloadedOperator())
        << "'";
     return true;
   }
 
   if (const CXXDeleteExpr *DE = dyn_cast<CXXDeleteExpr>(E)) {
-    os << "'" 
+    os << "'"
        << getOperatorSpelling(DE->getOperatorDelete()->getOverloadedOperator())
        << "'";
     return true;
@@ -1283,7 +1284,7 @@ void MallocChecker::printExpectedAllocName(raw_ostream &os, CheckerContext &C,
   }
 }
 
-void MallocChecker::printExpectedDeallocName(raw_ostream &os, 
+void MallocChecker::printExpectedDeallocName(raw_ostream &os,
                                              AllocationFamily Family) const {
   switch(Family) {
     case AF_Malloc: os << "free()"; return;
@@ -1327,25 +1328,25 @@ ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C,
     return nullptr;
 
   const MemRegion *R = ArgVal.getAsRegion();
-  
+
   // Nonlocs can't be freed, of course.
   // Non-region locations (labels and fixed addresses) also shouldn't be freed.
   if (!R) {
     ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr);
     return nullptr;
   }
-  
+
   R = R->StripCasts();
-  
+
   // Blocks might show up as heap data, but should not be free()d
   if (isa<BlockDataRegion>(R)) {
     ReportBadFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr);
     return nullptr;
   }
-  
+
   const MemSpaceRegion *MS = R->getMemorySpace();
-  
-  // Parameters, locals, statics, globals, and memory returned by 
+
+  // Parameters, locals, statics, globals, and memory returned by
   // __builtin_alloca() shouldn't be freed.
   if (!(isa<UnknownSpaceRegion>(MS) || isa<HeapSpaceRegion>(MS))) {
     // FIXME: at the time this code was written, malloc() regions were
@@ -1391,7 +1392,7 @@ ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C,
 
     // If the pointer is allocated or escaped, but we are now trying to free it,
     // check that the call to free is proper.
-    } else if (RsBase->isAllocated() || RsBase->isAllocatedOfSizeZero() || 
+    } else if (RsBase->isAllocated() || RsBase->isAllocatedOfSizeZero() ||
                RsBase->isEscaped()) {
 
       // Check if an expected deallocation function matches the real one.
@@ -1410,20 +1411,20 @@ ProgramStateRef MallocChecker::FreeMemAux(CheckerContext &C,
           !Offset.hasSymbolicOffset() &&
           Offset.getOffset() != 0) {
         const Expr *AllocExpr = cast<Expr>(RsBase->getStmt());
-        ReportOffsetFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr, 
+        ReportOffsetFree(C, ArgVal, ArgExpr->getSourceRange(), ParentExpr,
                          AllocExpr);
         return nullptr;
       }
     }
   }
 
-  ReleasedAllocated = (RsBase != nullptr) && (RsBase->isAllocated() || 
+  ReleasedAllocated = (RsBase != nullptr) && (RsBase->isAllocated() ||
                                               RsBase->isAllocatedOfSizeZero());
 
   // Clean out the info on previous call to free return info.
   State = State->remove<FreeReturnValue>(SymBase);
 
-  // Keep track of the return value. If it is NULL, we will know that free 
+  // Keep track of the return value. If it is NULL, we will know that free
   // failed.
   if (ReturnsNullOnFailure) {
     SVal RetVal = C.getSVal(ParentExpr);
@@ -1463,7 +1464,7 @@ MallocChecker::getCheckIfTracked(AllocationFamily Family,
     if (IsALeakCheck) {
       if (ChecksEnabled[CK_NewDeleteLeaksChecker])
         return CK_NewDeleteLeaksChecker;
-    } 
+    }
     else {
       if (ChecksEnabled[CK_NewDeleteChecker])
         return CK_NewDeleteChecker;
@@ -1488,6 +1489,9 @@ MallocChecker::getCheckIfTracked(CheckerContext &C,
 Optional<MallocChecker::CheckKind>
 MallocChecker::getCheckIfTracked(CheckerContext &C, SymbolRef Sym,
                                  bool IsALeakCheck) const {
+  if (C.getState()->contains<ReallocSizeZeroSymbols>(Sym))
+    return CK_MallocChecker;
+
   const RefState *RS = C.getState()->get<RegionState>(Sym);
   assert(RS);
   return getCheckIfTracked(RS->getAllocationFamily(), IsALeakCheck);
@@ -1502,7 +1506,7 @@ bool MallocChecker::SummarizeValue(raw_ostream &os, SVal V) {
     os << "the address of the label '" << Label->getLabel()->getName() << "'";
   else
     return false;
-  
+
   return true;
 }
 
@@ -1526,7 +1530,7 @@ bool MallocChecker::SummarizeRegion(raw_ostream &os,
     return true;
   default: {
     const MemSpaceRegion *MS = MR->getMemorySpace();
-    
+
     if (isa<StackLocalsSpaceRegion>(MS)) {
       const VarRegion *VR = dyn_cast<VarRegion>(MR);
       const VarDecl *VD;
@@ -1580,8 +1584,8 @@ bool MallocChecker::SummarizeRegion(raw_ostream &os,
   }
 }
 
-void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal, 
-                                  SourceRange Range, 
+void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal,
+                                  SourceRange Range,
                                   const Expr *DeallocExpr) const {
 
   if (!ChecksEnabled[CK_MallocChecker] &&
@@ -1593,7 +1597,7 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal,
   if (!CheckKind.hasValue())
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_BadFree[*CheckKind])
       BT_BadFree[*CheckKind].reset(
           new BugType(CheckNames[*CheckKind], "Bad free", "Memory Error"));
@@ -1610,7 +1614,7 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal,
       os << "deallocator";
 
     os << " is ";
-    bool Summarized = MR ? SummarizeRegion(os, MR) 
+    bool Summarized = MR ? SummarizeRegion(os, MR)
                          : SummarizeValue(os, ArgVal);
     if (Summarized)
       os << ", which is not memory allocated by ";
@@ -1626,7 +1630,7 @@ void MallocChecker::ReportBadFree(CheckerContext &C, SVal ArgVal,
   }
 }
 
-void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal, 
+void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal,
                                      SourceRange Range) const {
 
   Optional<MallocChecker::CheckKind> CheckKind;
@@ -1638,7 +1642,7 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal,
   else
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_FreeAlloca[*CheckKind])
       BT_FreeAlloca[*CheckKind].reset(
           new BugType(CheckNames[*CheckKind], "Free alloca()", "Memory Error"));
@@ -1652,17 +1656,17 @@ void MallocChecker::ReportFreeAlloca(CheckerContext &C, SVal ArgVal,
   }
 }
 
-void MallocChecker::ReportMismatchedDealloc(CheckerContext &C, 
+void MallocChecker::ReportMismatchedDealloc(CheckerContext &C,
                                             SourceRange Range,
-                                            const Expr *DeallocExpr, 
+                                            const Expr *DeallocExpr,
                                             const RefState *RS,
-                                            SymbolRef Sym, 
+                                            SymbolRef Sym,
                                             bool OwnershipTransferred) const {
 
   if (!ChecksEnabled[CK_MismatchedDeallocatorChecker])
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_MismatchedDealloc)
       BT_MismatchedDealloc.reset(
           new BugType(CheckNames[CK_MismatchedDeallocatorChecker],
@@ -1680,7 +1684,7 @@ void MallocChecker::ReportMismatchedDealloc(CheckerContext &C,
     if (OwnershipTransferred) {
       if (printAllocDeallocName(DeallocOs, C, DeallocExpr))
         os << DeallocOs.str() << " cannot";
-      else 
+      else
         os << "Cannot";
 
       os << " take ownership of memory";
@@ -1721,7 +1725,7 @@ void MallocChecker::ReportOffsetFree(CheckerContext &C, SVal ArgVal,
   if (!CheckKind.hasValue())
     return;
 
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
 
@@ -1775,7 +1779,7 @@ void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range,
   if (!CheckKind.hasValue())
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_UseFree[*CheckKind])
       BT_UseFree[*CheckKind].reset(new BugType(
           CheckNames[*CheckKind], "Use-after-free", "Memory Error"));
@@ -1791,7 +1795,7 @@ void MallocChecker::ReportUseAfterFree(CheckerContext &C, SourceRange Range,
 }
 
 void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range,
-                                     bool Released, SymbolRef Sym, 
+                                     bool Released, SymbolRef Sym,
                                      SymbolRef PrevSym) const {
 
   if (!ChecksEnabled[CK_MallocChecker] &&
@@ -1802,7 +1806,7 @@ void MallocChecker::ReportDoubleFree(CheckerContext &C, SourceRange Range,
   if (!CheckKind.hasValue())
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_DoubleFree[*CheckKind])
       BT_DoubleFree[*CheckKind].reset(
           new BugType(CheckNames[*CheckKind], "Double free", "Memory Error"));
@@ -1830,7 +1834,7 @@ void MallocChecker::ReportDoubleDelete(CheckerContext &C, SymbolRef Sym) const {
   if (!CheckKind.hasValue())
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_DoubleDelete)
       BT_DoubleDelete.reset(new BugType(CheckNames[CK_NewDeleteChecker],
                                         "Double delete", "Memory Error"));
@@ -1857,7 +1861,7 @@ void MallocChecker::ReportUseZeroAllocated(CheckerContext &C,
   if (!CheckKind.hasValue())
     return;
 
-  if (ExplodedNode *N = C.generateSink()) {
+  if (ExplodedNode *N = C.generateErrorNode()) {
     if (!BT_UseZerroAllocated[*CheckKind])
       BT_UseZerroAllocated[*CheckKind].reset(new BugType(
           CheckNames[*CheckKind], "Use of zero allocated", "Memory Error"));
@@ -1921,7 +1925,7 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C,
   bool PrtIsNull = StatePtrIsNull && !StatePtrNotNull;
   bool SizeIsZero = StateSizeIsZero && !StateSizeNotZero;
 
-  // If the ptr is NULL and the size is not 0, the call is equivalent to 
+  // If the ptr is NULL and the size is not 0, the call is equivalent to
   // malloc(size).
   if ( PrtIsNull && !SizeIsZero) {
     ProgramStateRef stateMalloc = MallocMemAux(C, CE, CE->getArg(1),
@@ -1930,7 +1934,7 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C,
   }
 
   if (PrtIsNull && SizeIsZero)
-    return nullptr;
+    return State;
 
   // Get the from and to pointer symbols as in toPtr = realloc(fromPtr, size).
   assert(!PrtIsNull);
@@ -1979,7 +1983,7 @@ ProgramStateRef MallocChecker::ReallocMem(CheckerContext &C,
   return nullptr;
 }
 
-ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE, 
+ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE,
                                          ProgramStateRef State) {
   if (!State)
     return nullptr;
@@ -1992,7 +1996,7 @@ ProgramStateRef MallocChecker::CallocMem(CheckerContext &C, const CallExpr *CE,
   SVal count = State->getSVal(CE->getArg(0), LCtx);
   SVal elementSize = State->getSVal(CE->getArg(1), LCtx);
   SVal TotalSize = svalBuilder.evalBinOp(State, BO_Mul, count, elementSize,
-                                        svalBuilder.getContext().getSizeType());  
+                                        svalBuilder.getContext().getSizeType());
   SVal zeroVal = svalBuilder.makeZeroVal(svalBuilder.getContext().CharTy);
 
   return MallocMemAux(C, CE, TotalSize, zeroVal, State);
@@ -2079,7 +2083,7 @@ void MallocChecker::reportLeak(SymbolRef Sym, ExplodedNode *N,
   const ExplodedNode *AllocNode = nullptr;
   const MemRegion *Region = nullptr;
   std::tie(AllocNode, Region) = getAllocationSite(N, Sym, C);
-  
+
   ProgramPoint P = AllocNode->getLocation();
   const Stmt *AllocationStmt = nullptr;
   if (Optional<CallExitEnd> Exit = P.getAs<CallExitEnd>())
@@ -2128,7 +2132,7 @@ void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper,
 
     }
   }
-  
+
   // Cleanup the Realloc Pairs Map.
   ReallocPairsTy RP = state->get<ReallocPairs>();
   for (ReallocPairsTy::iterator I = RP.begin(), E = RP.end(); I != E; ++I) {
@@ -2151,10 +2155,12 @@ void MallocChecker::checkDeadSymbols(SymbolReaper &SymReaper,
   ExplodedNode *N = C.getPredecessor();
   if (!Errors.empty()) {
     static CheckerProgramPointTag Tag("MallocChecker", "DeadSymbolsLeak");
-    N = C.addTransition(C.getState(), C.getPredecessor(), &Tag);
-    for (SmallVectorImpl<SymbolRef>::iterator
+    N = C.generateNonFatalErrorNode(C.getState(), &Tag);
+    if (N) {
+      for (SmallVectorImpl<SymbolRef>::iterator
            I = Errors.begin(), E = Errors.end(); I != E; ++I) {
-      reportLeak(*I, N, C);
+        reportLeak(*I, N, C);
+      }
     }
   }
 
@@ -2233,7 +2239,7 @@ void MallocChecker::checkPreStmt(const ReturnStmt *S, CheckerContext &C) const {
 }
 
 // TODO: Blocks should be either inlined or should call invalidate regions
-// upon invocation. After that's in place, special casing here will not be 
+// upon invocation. After that's in place, special casing here will not be
 // needed.
 void MallocChecker::checkPostStmt(const BlockExpr *BE,
                                   CheckerContext &C) const {
@@ -2292,10 +2298,14 @@ bool MallocChecker::checkUseAfterFree(SymbolRef Sym, CheckerContext &C,
 void MallocChecker::checkUseZeroAllocated(SymbolRef Sym, CheckerContext &C,
                                           const Stmt *S) const {
   assert(Sym);
-  const RefState *RS = C.getState()->get<RegionState>(Sym);
 
-  if (RS && RS->isAllocatedOfSizeZero())
-    ReportUseZeroAllocated(C, RS->getStmt()->getSourceRange(), Sym);
+  if (const RefState *RS = C.getState()->get<RegionState>(Sym)) {
+    if (RS->isAllocatedOfSizeZero())
+      ReportUseZeroAllocated(C, RS->getStmt()->getSourceRange(), Sym);
+  }
+  else if (C.getState()->contains<ReallocSizeZeroSymbols>(Sym)) {
+    ReportUseZeroAllocated(C, S->getSourceRange(), Sym);
+  }
 }
 
 bool MallocChecker::checkDoubleDelete(SymbolRef Sym, CheckerContext &C) const {
@@ -2377,7 +2387,7 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
   if (const ObjCMethodCall *Msg = dyn_cast<ObjCMethodCall>(Call)) {
     // If it's not a framework call, or if it takes a callback, assume it
     // can free memory.
-    if (!Call->isInSystemHeader() || Call->hasNonZeroCallbackArg())
+    if (!Call->isInSystemHeader() || Call->argumentsMayEscape())
       return true;
 
     // If it's a method we know about, handle it explicitly post-call.
@@ -2447,7 +2457,7 @@ bool MallocChecker::mayFreeAnyEscapedMemoryOrIsModeledExplicitly(
   StringRef FName = II->getName();
 
   // White list the 'XXXNoCopy' CoreFoundation functions.
-  // We specifically check these before 
+  // We specifically check these before
   if (FName.endswith("NoCopy")) {
     // Look for the deallocator argument. We know that the memory ownership
     // is not transferred only if the deallocator argument is
@@ -2556,7 +2566,7 @@ ProgramStateRef MallocChecker::checkPointerEscapeAux(ProgramStateRef State,
 
     if (EscapingSymbol && EscapingSymbol != sym)
       continue;
-    
+
     if (const RefState *RS = State->get<RegionState>(sym)) {
       if ((RS->isAllocated() || RS->isAllocatedOfSizeZero()) &&
           CheckRefState(RS)) {
@@ -2703,7 +2713,7 @@ void ento::registerNewDeleteLeaksChecker(CheckerManager &mgr) {
   checker->ChecksEnabled[MallocChecker::CK_NewDeleteLeaksChecker] = true;
   checker->CheckNames[MallocChecker::CK_NewDeleteLeaksChecker] =
       mgr.getCurrentCheckName();
-  // We currently treat NewDeleteLeaks checker as a subchecker of NewDelete 
+  // We currently treat NewDeleteLeaks checker as a subchecker of NewDelete
   // checker.
   if (!checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker])
     checker->ChecksEnabled[MallocChecker::CK_NewDeleteChecker] = true;
diff --git a/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
index e91347999dc1..99ba90d7a2d9 100644
--- a/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocOverflowSecurityChecker.cpp
@@ -23,19 +23,22 @@
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/Checker.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallVector.h"
 
 using namespace clang;
 using namespace ento;
+using llvm::APInt;
+using llvm::APSInt;
 
 namespace {
 struct MallocOverflowCheck {
   const BinaryOperator *mulop;
   const Expr *variable;
+  APSInt maxVal;
 
-  MallocOverflowCheck (const BinaryOperator *m, const Expr *v) 
-    : mulop(m), variable (v)
-  {}
+  MallocOverflowCheck(const BinaryOperator *m, const Expr *v, APSInt val)
+      : mulop(m), variable(v), maxVal(val) {}
 };
 
 class MallocOverflowSecurityChecker : public Checker<check::ASTCodeBody> {
@@ -54,6 +57,11 @@ public:
 };
 } // end anonymous namespace
 
+// Return true for computations which evaluate to zero: e.g., mult by 0.
+static inline bool EvaluatesToZero(APSInt &Val, BinaryOperatorKind op) {
+  return (op == BO_Mul) && (Val == 0);
+}
+
 void MallocOverflowSecurityChecker::CheckMallocArgument(
   SmallVectorImpl<MallocOverflowCheck> &PossibleMallocOverflows,
   const Expr *TheArgument,
@@ -64,13 +72,14 @@ void MallocOverflowSecurityChecker::CheckMallocArgument(
    Reject anything that applies to the variable: an explicit cast,
    conditional expression, an operation that could reduce the range
    of the result, or anything too complicated :-).  */
-  const Expr * e = TheArgument;
+  const Expr *e = TheArgument;
   const BinaryOperator * mulop = nullptr;
+  APSInt maxVal;
 
   for (;;) {
+    maxVal = 0;
     e = e->IgnoreParenImpCasts();
-    if (isa<BinaryOperator>(e)) {
-      const BinaryOperator * binop = dyn_cast<BinaryOperator>(e);
+    if (const BinaryOperator *binop = dyn_cast<BinaryOperator>(e)) {
       BinaryOperatorKind opc = binop->getOpcode();
       // TODO: ignore multiplications by 1, reject if multiplied by 0.
       if (mulop == nullptr && opc == BO_Mul)
@@ -80,12 +89,18 @@ void MallocOverflowSecurityChecker::CheckMallocArgument(
 
       const Expr *lhs = binop->getLHS();
       const Expr *rhs = binop->getRHS();
-      if (rhs->isEvaluatable(Context))
+      if (rhs->isEvaluatable(Context)) {
         e = lhs;
-      else if ((opc == BO_Add || opc == BO_Mul)
-               && lhs->isEvaluatable(Context))
+        maxVal = rhs->EvaluateKnownConstInt(Context);
+        if (EvaluatesToZero(maxVal, opc))
+          return;
+      } else if ((opc == BO_Add || opc == BO_Mul) &&
+                 lhs->isEvaluatable(Context)) {
+        maxVal = lhs->EvaluateKnownConstInt(Context);
+        if (EvaluatesToZero(maxVal, opc))
+          return;
         e = rhs;
-      else
+      } else
         return;
     }
     else if (isa<DeclRefExpr>(e) || isa<MemberExpr>(e))
@@ -103,7 +118,7 @@ void MallocOverflowSecurityChecker::CheckMallocArgument(
 
   // TODO: Could push this into the innermost scope where 'e' is
   // defined, rather than the whole function.
-  PossibleMallocOverflows.push_back(MallocOverflowCheck(mulop, e));
+  PossibleMallocOverflows.push_back(MallocOverflowCheck(mulop, e, maxVal));
 }
 
 namespace {
@@ -126,33 +141,84 @@ private:
       return false;
     }
 
-    void CheckExpr(const Expr *E_p) {
-      const Expr *E = E_p->IgnoreParenImpCasts();
+    const Decl *getDecl(const DeclRefExpr *DR) { return DR->getDecl(); }
 
+    const Decl *getDecl(const MemberExpr *ME) { return ME->getMemberDecl(); }
+
+    template <typename T1>
+    void Erase(const T1 *DR, std::function<bool(theVecType::iterator)> pred) {
       theVecType::iterator i = toScanFor.end();
       theVecType::iterator e = toScanFor.begin();
+      while (i != e) {
+        --i;
+        if (const T1 *DR_i = dyn_cast<T1>(i->variable)) {
+          if ((getDecl(DR_i) == getDecl(DR)) && pred(i))
+            i = toScanFor.erase(i);
+        }
+      }
+    }
 
-      if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E)) {
-        const Decl * EdreD = DR->getDecl();
-        while (i != e) {
-          --i;
-          if (const DeclRefExpr *DR_i = dyn_cast<DeclRefExpr>(i->variable)) {
-            if (DR_i->getDecl() == EdreD)
-              i = toScanFor.erase(i);
-          }
-        }
-      }
+    void CheckExpr(const Expr *E_p) {
+      auto PredTrue = [](theVecType::iterator) -> bool { return true; };
+      const Expr *E = E_p->IgnoreParenImpCasts();
+      if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
+        Erase<DeclRefExpr>(DR, PredTrue);
       else if (const auto *ME = dyn_cast<MemberExpr>(E)) {
-        // No points-to analysis, just look at the member
-        const Decl *EmeMD = ME->getMemberDecl();
-        while (i != e) {
-          --i;
-          if (const auto *ME_i = dyn_cast<MemberExpr>(i->variable)) {
-            if (ME_i->getMemberDecl() == EmeMD)
-              i = toScanFor.erase (i);
-          }
+        Erase<MemberExpr>(ME, PredTrue);
+      }
+    }
+
+    // Check if the argument to malloc is assigned a value
+    // which cannot cause an overflow.
+    // e.g., malloc (mul * x) and,
+    // case 1: mul = <constant value>
+    // case 2: mul = a/b, where b > x
+    void CheckAssignmentExpr(BinaryOperator *AssignEx) {
+      bool assignKnown = false;
+      bool numeratorKnown = false, denomKnown = false;
+      APSInt denomVal;
+      denomVal = 0;
+
+      // Erase if the multiplicand was assigned a constant value.
+      const Expr *rhs = AssignEx->getRHS();
+      if (rhs->isEvaluatable(Context))
+        assignKnown = true;
+
+      // Discard the report if the multiplicand was assigned a value,
+      // that can never overflow after multiplication. e.g., the assignment
+      // is a division operator and the denominator is > other multiplicand.
+      const Expr *rhse = rhs->IgnoreParenImpCasts();
+      if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(rhse)) {
+        if (BOp->getOpcode() == BO_Div) {
+          const Expr *denom = BOp->getRHS()->IgnoreParenImpCasts();
+          if (denom->EvaluateAsInt(denomVal, Context))
+            denomKnown = true;
+          const Expr *numerator = BOp->getLHS()->IgnoreParenImpCasts();
+          if (numerator->isEvaluatable(Context))
+            numeratorKnown = true;
         }
       }
+      if (!assignKnown && !denomKnown)
+        return;
+      auto denomExtVal = denomVal.getExtValue();
+
+      // Ignore negative denominator.
+      if (denomExtVal < 0)
+        return;
+
+      const Expr *lhs = AssignEx->getLHS();
+      const Expr *E = lhs->IgnoreParenImpCasts();
+
+      auto pred = [assignKnown, numeratorKnown,
+                   denomExtVal](theVecType::iterator i) {
+        return assignKnown ||
+               (numeratorKnown && (denomExtVal >= i->maxVal.getExtValue()));
+      };
+
+      if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(E))
+        Erase<DeclRefExpr>(DR, pred);
+      else if (const auto *ME = dyn_cast<MemberExpr>(E))
+        Erase<MemberExpr>(ME, pred);
     }
 
   public:
@@ -162,11 +228,13 @@ private:
         const Expr * rhs = E->getRHS();
         // Ignore comparisons against zero, since they generally don't
         // protect against an overflow.
-        if (!isIntZeroExpr(lhs) && ! isIntZeroExpr(rhs)) {
+        if (!isIntZeroExpr(lhs) && !isIntZeroExpr(rhs)) {
           CheckExpr(lhs);
           CheckExpr(rhs);
         }
       }
+      if (E->isAssignmentOp())
+        CheckAssignmentExpr(E);
       EvaluatedExprVisitor<CheckOverflowOps>::VisitBinaryOperator(E);
     }
 
@@ -243,12 +311,12 @@ void MallocOverflowSecurityChecker::checkASTCodeBody(const Decl *D,
           const FunctionDecl *FD = TheCall->getDirectCallee();
 
           if (!FD)
-            return;
+            continue;
 
           // Get the name of the callee. If it's a builtin, strip off the prefix.
           IdentifierInfo *FnInfo = FD->getIdentifier();
           if (!FnInfo)
-            return;
+            continue;
 
           if (FnInfo->isStr ("malloc") || FnInfo->isStr ("_MALLOC")) {
             if (TheCall->getNumArgs() == 1)
diff --git a/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp b/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
index fb07484bfcd9..80a3fbe1a409 100644
--- a/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/MallocSizeofChecker.cpp
@@ -143,20 +143,20 @@ static bool typesCompatible(ASTContext &C, QualType A, QualType B) {
   while (true) {
     A = A.getCanonicalType();
     B = B.getCanonicalType();
-  
+
     if (A.getTypePtr() == B.getTypePtr())
       return true;
-    
+
     if (const PointerType *ptrA = A->getAs<PointerType>())
       if (const PointerType *ptrB = B->getAs<PointerType>()) {
         A = ptrA->getPointeeType();
         B = ptrB->getPointeeType();
         continue;
       }
-      
+
     break;
   }
-  
+
   return false;
 }
 
diff --git a/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp b/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
index d23708ecbd97..0e7894788c87 100644
--- a/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NSAutoreleasePoolChecker.cpp
@@ -11,7 +11,7 @@
 //  about subpar uses of NSAutoreleasePool.  Note that while the check itself
 //  (in its current form) could be written as a flow-insensitive check, in
 //  can be potentially enhanced in the future with flow-sensitive information.
-//  It is also a good example of the CheckerVisitor interface. 
+//  It is also a good example of the CheckerVisitor interface.
 //
 //===----------------------------------------------------------------------===//
 
@@ -48,7 +48,7 @@ void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
 
   const ObjCInterfaceDecl *OD = msg.getReceiverInterface();
   if (!OD)
-    return;  
+    return;
   if (!OD->getIdentifier()->isStr("NSAutoreleasePool"))
     return;
 
@@ -62,7 +62,7 @@ void NSAutoreleasePoolChecker::checkPreObjCMessage(const ObjCMethodCall &msg,
     BT.reset(new BugType(this, "Use -drain instead of -release",
                          "API Upgrade (Apple)"));
 
-  ExplodedNode *N = C.addTransition();
+  ExplodedNode *N = C.generateNonFatalErrorNode();
   if (!N) {
     assert(0);
     return;
diff --git a/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp b/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
index c351c6e9e08b..dab068b27e80 100644
--- a/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NSErrorChecker.cpp
@@ -58,7 +58,7 @@ void NSErrorMethodChecker::checkASTDecl(const ObjCMethodDecl *D,
     return;
 
   if (!II)
-    II = &D->getASTContext().Idents.get("NSError"); 
+    II = &D->getASTContext().Idents.get("NSError");
 
   bool hasNSError = false;
   for (const auto *I : D->params())  {
@@ -105,7 +105,7 @@ void CFErrorFunctionChecker::checkASTDecl(const FunctionDecl *D,
     return;
 
   if (!II)
-    II = &D->getASTContext().Idents.get("CFErrorRef"); 
+    II = &D->getASTContext().Idents.get("CFErrorRef");
 
   bool hasCFError = false;
   for (auto I : D->params())  {
diff --git a/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp b/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
index ba82d1d1d41f..c1deadef4202 100644
--- a/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NoReturnFunctionChecker.cpp
@@ -66,6 +66,7 @@ void NoReturnFunctionChecker::checkPostCall(const CallEvent &CE,
             .Case("assfail", true)
             .Case("db_error", true)
             .Case("__assert", true)
+            .Case("__assert2", true)
             // For the purpose of static analysis, we do not care that
             //  this MSVC function will return if the user decides to continue.
             .Case("_wassert", true)
@@ -81,7 +82,7 @@ void NoReturnFunctionChecker::checkPostCall(const CallEvent &CE,
   }
 
   if (BuildSinks)
-    C.generateSink();
+    C.generateSink(C.getState(), C.getPredecessor());
 }
 
 void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
@@ -90,7 +91,7 @@ void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
   if (const ObjCMethodDecl *MD = Msg.getDecl()) {
     MD = MD->getCanonicalDecl();
     if (MD->hasAttr<AnalyzerNoReturnAttr>()) {
-      C.generateSink();
+      C.generateSink(C.getState(), C.getPredecessor());
       return;
     }
   }
@@ -136,7 +137,7 @@ void NoReturnFunctionChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
   }
 
   // If we got here, it's one of the messages we care about.
-  C.generateSink();
+  C.generateSink(C.getState(), C.getPredecessor());
 }
 
 void ento::registerNoReturnFunctionChecker(CheckerManager &mgr) {
diff --git a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
index 73f8087fd3c0..1f82ab94af82 100644
--- a/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp
@@ -28,7 +28,7 @@ using namespace ento;
 
 namespace {
 class NonNullParamChecker
-  : public Checker< check::PreCall > {
+  : public Checker< check::PreCall, EventDispatcher<ImplicitNullDerefEvent> > {
   mutable std::unique_ptr<BugType> BTAttrNonNull;
   mutable std::unique_ptr<BugType> BTNullRefArg;
 
@@ -139,26 +139,34 @@ void NonNullParamChecker::checkPreCall(const CallEvent &Call,
     ProgramStateRef stateNotNull, stateNull;
     std::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV);
 
-    if (stateNull && !stateNotNull) {
-      // Generate an error node.  Check for a null node in case
-      // we cache out.
-      if (ExplodedNode *errorNode = C.generateSink(stateNull)) {
+    if (stateNull) {
+      if (!stateNotNull) {
+        // Generate an error node.  Check for a null node in case
+        // we cache out.
+        if (ExplodedNode *errorNode = C.generateErrorNode(stateNull)) {
 
-        std::unique_ptr<BugReport> R;
-        if (haveAttrNonNull)
-          R = genReportNullAttrNonNull(errorNode, ArgE);
-        else if (haveRefTypeParam)
-          R = genReportReferenceToNullPointer(errorNode, ArgE);
+          std::unique_ptr<BugReport> R;
+          if (haveAttrNonNull)
+            R = genReportNullAttrNonNull(errorNode, ArgE);
+          else if (haveRefTypeParam)
+            R = genReportReferenceToNullPointer(errorNode, ArgE);
 
-        // Highlight the range of the argument that was null.
-        R->addRange(Call.getArgSourceRange(idx));
+          // Highlight the range of the argument that was null.
+          R->addRange(Call.getArgSourceRange(idx));
 
-        // Emit the bug report.
-        C.emitReport(std::move(R));
+          // Emit the bug report.
+          C.emitReport(std::move(R));
+        }
+
+        // Always return.  Either we cached out or we just emitted an error.
+        return;
+      }
+      if (ExplodedNode *N = C.generateSink(stateNull, C.getPredecessor())) {
+        ImplicitNullDerefEvent event = {
+            V, false, N, &C.getBugReporter(),
+            /*IsDirectDereference=*/haveRefTypeParam};
+        dispatchEvent(event);
       }
-
-      // Always return.  Either we cached out or we just emitted an error.
-      return;
     }
 
     // If a pointer value passed the check we should assume that it is
diff --git a/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
new file mode 100644
index 000000000000..bb86ea401df5
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/NullabilityChecker.cpp
@@ -0,0 +1,1066 @@
+//== Nullabilityhecker.cpp - Nullability checker ----------------*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This checker tries to find nullability violations. There are several kinds of
+// possible violations:
+// * Null pointer is passed to a pointer which has a _Nonnull type.
+// * Null pointer is returned from a function which has a _Nonnull return type.
+// * Nullable pointer is passed to a pointer which has a _Nonnull type.
+// * Nullable pointer is returned from a function which has a _Nonnull return
+//   type.
+// * Nullable pointer is dereferenced.
+//
+// This checker propagates the nullability information of the pointers and looks
+// for the patterns that are described above. Explicit casts are trusted and are
+// considered a way to suppress false positives for this checker. The other way
+// to suppress warnings would be to add asserts or guarding if statements to the
+// code. In addition to the nullability propagation this checker also uses some
+// heuristics to suppress potential false positives.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "llvm/Support/Path.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+// Do not reorder! The getMostNullable method relies on the order.
+// Optimization: Most pointers expected to be unspecified. When a symbol has an
+// unspecified or nonnull type non of the rules would indicate any problem for
+// that symbol. For this reason only nullable and contradicted nullability are
+// stored for a symbol. When a symbol is already contradicted, it can not be
+// casted back to nullable.
+enum class Nullability : char {
+  Contradicted, // Tracked nullability is contradicted by an explicit cast. Do
+                // not report any nullability related issue for this symbol.
+                // This nullability is propagated agressively to avoid false
+                // positive results. See the comment on getMostNullable method.
+  Nullable,
+  Unspecified,
+  Nonnull
+};
+
+/// Returns the most nullable nullability. This is used for message expressions
+/// like [reciever method], where the nullability of this expression is either
+/// the nullability of the receiver or the nullability of the return type of the
+/// method, depending on which is more nullable. Contradicted is considered to
+/// be the most nullable, to avoid false positive results.
+Nullability getMostNullable(Nullability Lhs, Nullability Rhs) {
+  return static_cast<Nullability>(
+      std::min(static_cast<char>(Lhs), static_cast<char>(Rhs)));
+}
+
+const char *getNullabilityString(Nullability Nullab) {
+  switch (Nullab) {
+  case Nullability::Contradicted:
+    return "contradicted";
+  case Nullability::Nullable:
+    return "nullable";
+  case Nullability::Unspecified:
+    return "unspecified";
+  case Nullability::Nonnull:
+    return "nonnull";
+  }
+  llvm_unreachable("Unexpected enumeration.");
+  return "";
+}
+
+// These enums are used as an index to ErrorMessages array.
+enum class ErrorKind : int {
+  NilAssignedToNonnull,
+  NilPassedToNonnull,
+  NilReturnedToNonnull,
+  NullableAssignedToNonnull,
+  NullableReturnedToNonnull,
+  NullableDereferenced,
+  NullablePassedToNonnull
+};
+
+const char *const ErrorMessages[] = {
+    "Null is assigned to a pointer which is expected to have non-null value",
+    "Null passed to a callee that requires a non-null argument",
+    "Null is returned from a function that is expected to return a non-null "
+    "value",
+    "Nullable pointer is assigned to a pointer which is expected to have "
+    "non-null value",
+    "Nullable pointer is returned from a function that is expected to return a "
+    "non-null value",
+    "Nullable pointer is dereferenced",
+    "Nullable pointer is passed to a callee that requires a non-null argument"};
+
+class NullabilityChecker
+    : public Checker<check::Bind, check::PreCall, check::PreStmt<ReturnStmt>,
+                     check::PostCall, check::PostStmt<ExplicitCastExpr>,
+                     check::PostObjCMessage, check::DeadSymbols,
+                     check::Event<ImplicitNullDerefEvent>> {
+  mutable std::unique_ptr<BugType> BT;
+
+public:
+  void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
+  void checkPostStmt(const ExplicitCastExpr *CE, CheckerContext &C) const;
+  void checkPreStmt(const ReturnStmt *S, CheckerContext &C) const;
+  void checkPostObjCMessage(const ObjCMethodCall &M, CheckerContext &C) const;
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
+  void checkEvent(ImplicitNullDerefEvent Event) const;
+
+  void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
+                  const char *Sep) const override;
+
+  struct NullabilityChecksFilter {
+    DefaultBool CheckNullPassedToNonnull;
+    DefaultBool CheckNullReturnedFromNonnull;
+    DefaultBool CheckNullableDereferenced;
+    DefaultBool CheckNullablePassedToNonnull;
+    DefaultBool CheckNullableReturnedFromNonnull;
+
+    CheckName CheckNameNullPassedToNonnull;
+    CheckName CheckNameNullReturnedFromNonnull;
+    CheckName CheckNameNullableDereferenced;
+    CheckName CheckNameNullablePassedToNonnull;
+    CheckName CheckNameNullableReturnedFromNonnull;
+  };
+
+  NullabilityChecksFilter Filter;
+  // When set to false no nullability information will be tracked in
+  // NullabilityMap. It is possible to catch errors like passing a null pointer
+  // to a callee that expects nonnull argument without the information that is
+  // stroed in the NullabilityMap. This is an optimization.
+  DefaultBool NeedTracking;
+
+private:
+  class NullabilityBugVisitor
+      : public BugReporterVisitorImpl<NullabilityBugVisitor> {
+  public:
+    NullabilityBugVisitor(const MemRegion *M) : Region(M) {}
+
+    void Profile(llvm::FoldingSetNodeID &ID) const override {
+      static int X = 0;
+      ID.AddPointer(&X);
+      ID.AddPointer(Region);
+    }
+
+    PathDiagnosticPiece *VisitNode(const ExplodedNode *N,
+                                   const ExplodedNode *PrevN,
+                                   BugReporterContext &BRC,
+                                   BugReport &BR) override;
+
+  private:
+    // The tracked region.
+    const MemRegion *Region;
+  };
+
+  /// When any of the nonnull arguments of the analyzed function is null, do not
+  /// report anything and turn off the check.
+  ///
+  /// When \p SuppressPath is set to true, no more bugs will be reported on this
+  /// path by this checker.
+  void reportBugIfPreconditionHolds(ErrorKind Error, ExplodedNode *N,
+                                    const MemRegion *Region, CheckerContext &C,
+                                    const Stmt *ValueExpr = nullptr,
+                                    bool SuppressPath = false) const;
+
+  void reportBug(ErrorKind Error, ExplodedNode *N, const MemRegion *Region,
+                 BugReporter &BR, const Stmt *ValueExpr = nullptr) const {
+    if (!BT)
+      BT.reset(new BugType(this, "Nullability", "Memory error"));
+    const char *Msg = ErrorMessages[static_cast<int>(Error)];
+    std::unique_ptr<BugReport> R(new BugReport(*BT, Msg, N));
+    if (Region) {
+      R->markInteresting(Region);
+      R->addVisitor(llvm::make_unique<NullabilityBugVisitor>(Region));
+    }
+    if (ValueExpr) {
+      R->addRange(ValueExpr->getSourceRange());
+      if (Error == ErrorKind::NilAssignedToNonnull ||
+          Error == ErrorKind::NilPassedToNonnull ||
+          Error == ErrorKind::NilReturnedToNonnull)
+        bugreporter::trackNullOrUndefValue(N, ValueExpr, *R);
+    }
+    BR.emitReport(std::move(R));
+  }
+
+  /// If an SVal wraps a region that should be tracked, it will return a pointer
+  /// to the wrapped region. Otherwise it will return a nullptr.
+  const SymbolicRegion *getTrackRegion(SVal Val,
+                                       bool CheckSuperRegion = false) const;
+};
+
+class NullabilityState {
+public:
+  NullabilityState(Nullability Nullab, const Stmt *Source = nullptr)
+      : Nullab(Nullab), Source(Source) {}
+
+  const Stmt *getNullabilitySource() const { return Source; }
+
+  Nullability getValue() const { return Nullab; }
+
+  void Profile(llvm::FoldingSetNodeID &ID) const {
+    ID.AddInteger(static_cast<char>(Nullab));
+    ID.AddPointer(Source);
+  }
+
+  void print(raw_ostream &Out) const {
+    Out << getNullabilityString(Nullab) << "\n";
+  }
+
+private:
+  Nullability Nullab;
+  // Source is the expression which determined the nullability. For example in a
+  // message like [nullable nonnull_returning] has nullable nullability, because
+  // the receiver is nullable. Here the receiver will be the source of the
+  // nullability. This is useful information when the diagnostics are generated.
+  const Stmt *Source;
+};
+
+bool operator==(NullabilityState Lhs, NullabilityState Rhs) {
+  return Lhs.getValue() == Rhs.getValue() &&
+         Lhs.getNullabilitySource() == Rhs.getNullabilitySource();
+}
+
+} // end anonymous namespace
+
+REGISTER_MAP_WITH_PROGRAMSTATE(NullabilityMap, const MemRegion *,
+                               NullabilityState)
+
+// If the nullability precondition of a function is violated, we should not
+// report nullability related issues on that path. For this reason once a
+// precondition is not met on a path, this checker will be esentially turned off
+// for the rest of the analysis. We do not want to generate a sink node however,
+// so this checker would not lead to reduced coverage.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(PreconditionViolated, bool)
+
+enum class NullConstraint { IsNull, IsNotNull, Unknown };
+
+static NullConstraint getNullConstraint(DefinedOrUnknownSVal Val,
+                                        ProgramStateRef State) {
+  ConditionTruthVal Nullness = State->isNull(Val);
+  if (Nullness.isConstrainedFalse())
+    return NullConstraint::IsNotNull;
+  if (Nullness.isConstrainedTrue())
+    return NullConstraint::IsNull;
+  return NullConstraint::Unknown;
+}
+
+const SymbolicRegion *
+NullabilityChecker::getTrackRegion(SVal Val, bool CheckSuperRegion) const {
+  if (!NeedTracking)
+    return nullptr;
+
+  auto RegionSVal = Val.getAs<loc::MemRegionVal>();
+  if (!RegionSVal)
+    return nullptr;
+
+  const MemRegion *Region = RegionSVal->getRegion();
+
+  if (CheckSuperRegion) {
+    if (auto FieldReg = Region->getAs<FieldRegion>())
+      return dyn_cast<SymbolicRegion>(FieldReg->getSuperRegion());
+    if (auto ElementReg = Region->getAs<ElementRegion>())
+      return dyn_cast<SymbolicRegion>(ElementReg->getSuperRegion());
+  }
+
+  return dyn_cast<SymbolicRegion>(Region);
+}
+
+PathDiagnosticPiece *NullabilityChecker::NullabilityBugVisitor::VisitNode(
+    const ExplodedNode *N, const ExplodedNode *PrevN, BugReporterContext &BRC,
+    BugReport &BR) {
+  ProgramStateRef State = N->getState();
+  ProgramStateRef StatePrev = PrevN->getState();
+
+  const NullabilityState *TrackedNullab = State->get<NullabilityMap>(Region);
+  const NullabilityState *TrackedNullabPrev =
+      StatePrev->get<NullabilityMap>(Region);
+  if (!TrackedNullab)
+    return nullptr;
+
+  if (TrackedNullabPrev &&
+      TrackedNullabPrev->getValue() == TrackedNullab->getValue())
+    return nullptr;
+
+  // Retrieve the associated statement.
+  const Stmt *S = TrackedNullab->getNullabilitySource();
+  if (!S) {
+    ProgramPoint ProgLoc = N->getLocation();
+    if (Optional<StmtPoint> SP = ProgLoc.getAs<StmtPoint>()) {
+      S = SP->getStmt();
+    }
+  }
+
+  if (!S)
+    return nullptr;
+
+  std::string InfoText =
+      (llvm::Twine("Nullability '") +
+       getNullabilityString(TrackedNullab->getValue()) + "' is infered")
+          .str();
+
+  // Generate the extra diagnostic.
+  PathDiagnosticLocation Pos(S, BRC.getSourceManager(),
+                             N->getLocationContext());
+  return new PathDiagnosticEventPiece(Pos, InfoText, true, nullptr);
+}
+
+static Nullability getNullabilityAnnotation(QualType Type) {
+  const auto *AttrType = Type->getAs<AttributedType>();
+  if (!AttrType)
+    return Nullability::Unspecified;
+  if (AttrType->getAttrKind() == AttributedType::attr_nullable)
+    return Nullability::Nullable;
+  else if (AttrType->getAttrKind() == AttributedType::attr_nonnull)
+    return Nullability::Nonnull;
+  return Nullability::Unspecified;
+}
+
+template <typename ParamVarDeclRange>
+static bool
+checkParamsForPreconditionViolation(const ParamVarDeclRange &Params,
+                                    ProgramStateRef State,
+                                    const LocationContext *LocCtxt) {
+  for (const auto *ParamDecl : Params) {
+    if (ParamDecl->isParameterPack())
+      break;
+
+    if (getNullabilityAnnotation(ParamDecl->getType()) != Nullability::Nonnull)
+      continue;
+
+    auto RegVal = State->getLValue(ParamDecl, LocCtxt)
+                      .template getAs<loc::MemRegionVal>();
+    if (!RegVal)
+      continue;
+
+    auto ParamValue = State->getSVal(RegVal->getRegion())
+                          .template getAs<DefinedOrUnknownSVal>();
+    if (!ParamValue)
+      continue;
+
+    if (getNullConstraint(*ParamValue, State) == NullConstraint::IsNull) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool checkPreconditionViolation(ProgramStateRef State, ExplodedNode *N,
+                                       CheckerContext &C) {
+  if (State->get<PreconditionViolated>())
+    return true;
+
+  const LocationContext *LocCtxt = C.getLocationContext();
+  const Decl *D = LocCtxt->getDecl();
+  if (!D)
+    return false;
+
+  if (const auto *BlockD = dyn_cast<BlockDecl>(D)) {
+    if (checkParamsForPreconditionViolation(BlockD->parameters(), State,
+                                            LocCtxt)) {
+      if (!N->isSink())
+        C.addTransition(State->set<PreconditionViolated>(true), N);
+      return true;
+    }
+    return false;
+  }
+
+  if (const auto *FuncDecl = dyn_cast<FunctionDecl>(D)) {
+    if (checkParamsForPreconditionViolation(FuncDecl->parameters(), State,
+                                            LocCtxt)) {
+      if (!N->isSink())
+        C.addTransition(State->set<PreconditionViolated>(true), N);
+      return true;
+    }
+    return false;
+  }
+  return false;
+}
+
+void NullabilityChecker::reportBugIfPreconditionHolds(
+    ErrorKind Error, ExplodedNode *N, const MemRegion *Region,
+    CheckerContext &C, const Stmt *ValueExpr, bool SuppressPath) const {
+  ProgramStateRef OriginalState = N->getState();
+
+  if (checkPreconditionViolation(OriginalState, N, C))
+    return;
+  if (SuppressPath) {
+    OriginalState = OriginalState->set<PreconditionViolated>(true);
+    N = C.addTransition(OriginalState, N);
+  }
+
+  reportBug(Error, N, Region, C.getBugReporter(), ValueExpr);
+}
+
+/// Cleaning up the program state.
+void NullabilityChecker::checkDeadSymbols(SymbolReaper &SR,
+                                          CheckerContext &C) const {
+  if (!SR.hasDeadSymbols())
+    return;
+
+  ProgramStateRef State = C.getState();
+  NullabilityMapTy Nullabilities = State->get<NullabilityMap>();
+  for (NullabilityMapTy::iterator I = Nullabilities.begin(),
+                                  E = Nullabilities.end();
+       I != E; ++I) {
+    const auto *Region = I->first->getAs<SymbolicRegion>();
+    assert(Region && "Non-symbolic region is tracked.");
+    if (SR.isDead(Region->getSymbol())) {
+      State = State->remove<NullabilityMap>(I->first);
+    }
+  }
+  // When one of the nonnull arguments are constrained to be null, nullability
+  // preconditions are violated. It is not enough to check this only when we
+  // actually report an error, because at that time interesting symbols might be
+  // reaped.
+  if (checkPreconditionViolation(State, C.getPredecessor(), C))
+    return;
+  C.addTransition(State);
+}
+
+/// This callback triggers when a pointer is dereferenced and the analyzer does
+/// not know anything about the value of that pointer. When that pointer is
+/// nullable, this code emits a warning.
+void NullabilityChecker::checkEvent(ImplicitNullDerefEvent Event) const {
+  if (Event.SinkNode->getState()->get<PreconditionViolated>())
+    return;
+
+  const MemRegion *Region =
+      getTrackRegion(Event.Location, /*CheckSuperregion=*/true);
+  if (!Region)
+    return;
+
+  ProgramStateRef State = Event.SinkNode->getState();
+  const NullabilityState *TrackedNullability =
+      State->get<NullabilityMap>(Region);
+
+  if (!TrackedNullability)
+    return;
+
+  if (Filter.CheckNullableDereferenced &&
+      TrackedNullability->getValue() == Nullability::Nullable) {
+    BugReporter &BR = *Event.BR;
+    // Do not suppress errors on defensive code paths, because dereferencing
+    // a nullable pointer is always an error.
+    if (Event.IsDirectDereference)
+      reportBug(ErrorKind::NullableDereferenced, Event.SinkNode, Region, BR);
+    else
+      reportBug(ErrorKind::NullablePassedToNonnull, Event.SinkNode, Region, BR);
+  }
+}
+
+/// This method check when nullable pointer or null value is returned from a
+/// function that has nonnull return type.
+///
+/// TODO: when nullability preconditons are violated, it is ok to violate the
+/// nullability postconditons (i.e.: when one of the nonnull parameters are null
+/// this check should not report any nullability related issue).
+void NullabilityChecker::checkPreStmt(const ReturnStmt *S,
+                                      CheckerContext &C) const {
+  auto RetExpr = S->getRetValue();
+  if (!RetExpr)
+    return;
+
+  if (!RetExpr->getType()->isAnyPointerType())
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  auto RetSVal =
+      State->getSVal(S, C.getLocationContext()).getAs<DefinedOrUnknownSVal>();
+  if (!RetSVal)
+    return;
+
+  AnalysisDeclContext *DeclCtxt =
+      C.getLocationContext()->getAnalysisDeclContext();
+  const FunctionType *FuncType = DeclCtxt->getDecl()->getFunctionType();
+  if (!FuncType)
+    return;
+
+  NullConstraint Nullness = getNullConstraint(*RetSVal, State);
+
+  Nullability RequiredNullability =
+      getNullabilityAnnotation(FuncType->getReturnType());
+
+  // If the returned value is null but the type of the expression
+  // generating it is nonnull then we will suppress the diagnostic.
+  // This enables explicit suppression when returning a nil literal in a
+  // function with a _Nonnull return type:
+  //    return (NSString * _Nonnull)0;
+  Nullability RetExprTypeLevelNullability =
+        getNullabilityAnnotation(RetExpr->getType());
+
+  if (Filter.CheckNullReturnedFromNonnull &&
+      Nullness == NullConstraint::IsNull &&
+      RetExprTypeLevelNullability != Nullability::Nonnull &&
+      RequiredNullability == Nullability::Nonnull) {
+    static CheckerProgramPointTag Tag(this, "NullReturnedFromNonnull");
+    ExplodedNode *N = C.generateErrorNode(State, &Tag);
+    if (!N)
+      return;
+    reportBugIfPreconditionHolds(ErrorKind::NilReturnedToNonnull, N, nullptr, C,
+                                 RetExpr);
+    return;
+  }
+
+  const MemRegion *Region = getTrackRegion(*RetSVal);
+  if (!Region)
+    return;
+
+  const NullabilityState *TrackedNullability =
+      State->get<NullabilityMap>(Region);
+  if (TrackedNullability) {
+    Nullability TrackedNullabValue = TrackedNullability->getValue();
+    if (Filter.CheckNullableReturnedFromNonnull &&
+        Nullness != NullConstraint::IsNotNull &&
+        TrackedNullabValue == Nullability::Nullable &&
+        RequiredNullability == Nullability::Nonnull) {
+      static CheckerProgramPointTag Tag(this, "NullableReturnedFromNonnull");
+      ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag);
+      reportBugIfPreconditionHolds(ErrorKind::NullableReturnedToNonnull, N,
+                                   Region, C);
+    }
+    return;
+  }
+  if (RequiredNullability == Nullability::Nullable) {
+    State = State->set<NullabilityMap>(Region,
+                                       NullabilityState(RequiredNullability,
+                                                        S));
+    C.addTransition(State);
+  }
+}
+
+/// This callback warns when a nullable pointer or a null value is passed to a
+/// function that expects its argument to be nonnull.
+void NullabilityChecker::checkPreCall(const CallEvent &Call,
+                                      CheckerContext &C) const {
+  if (!Call.getDecl())
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  ProgramStateRef OrigState = State;
+
+  unsigned Idx = 0;
+  for (const ParmVarDecl *Param : Call.parameters()) {
+    if (Param->isParameterPack())
+      break;
+
+    const Expr *ArgExpr = nullptr;
+    if (Idx < Call.getNumArgs())
+      ArgExpr = Call.getArgExpr(Idx);
+    auto ArgSVal = Call.getArgSVal(Idx++).getAs<DefinedOrUnknownSVal>();
+    if (!ArgSVal)
+      continue;
+
+    if (!Param->getType()->isAnyPointerType() &&
+        !Param->getType()->isReferenceType())
+      continue;
+
+    NullConstraint Nullness = getNullConstraint(*ArgSVal, State);
+
+    Nullability RequiredNullability =
+        getNullabilityAnnotation(Param->getType());
+    Nullability ArgExprTypeLevelNullability =
+        getNullabilityAnnotation(ArgExpr->getType());
+
+    if (Filter.CheckNullPassedToNonnull && Nullness == NullConstraint::IsNull &&
+        ArgExprTypeLevelNullability != Nullability::Nonnull &&
+        RequiredNullability == Nullability::Nonnull) {
+      ExplodedNode *N = C.generateErrorNode(State);
+      if (!N)
+        return;
+      reportBugIfPreconditionHolds(ErrorKind::NilPassedToNonnull, N, nullptr, C,
+                                   ArgExpr);
+      return;
+    }
+
+    const MemRegion *Region = getTrackRegion(*ArgSVal);
+    if (!Region)
+      continue;
+
+    const NullabilityState *TrackedNullability =
+        State->get<NullabilityMap>(Region);
+
+    if (TrackedNullability) {
+      if (Nullness == NullConstraint::IsNotNull ||
+          TrackedNullability->getValue() != Nullability::Nullable)
+        continue;
+
+      if (Filter.CheckNullablePassedToNonnull &&
+          RequiredNullability == Nullability::Nonnull) {
+        ExplodedNode *N = C.addTransition(State);
+        reportBugIfPreconditionHolds(ErrorKind::NullablePassedToNonnull, N,
+                                     Region, C, ArgExpr, /*SuppressPath=*/true);
+        return;
+      }
+      if (Filter.CheckNullableDereferenced &&
+          Param->getType()->isReferenceType()) {
+        ExplodedNode *N = C.addTransition(State);
+        reportBugIfPreconditionHolds(ErrorKind::NullableDereferenced, N, Region,
+                                     C, ArgExpr, /*SuppressPath=*/true);
+        return;
+      }
+      continue;
+    }
+    // No tracked nullability yet.
+    if (ArgExprTypeLevelNullability != Nullability::Nullable)
+      continue;
+    State = State->set<NullabilityMap>(
+        Region, NullabilityState(ArgExprTypeLevelNullability, ArgExpr));
+  }
+  if (State != OrigState)
+    C.addTransition(State);
+}
+
+/// Suppress the nullability warnings for some functions.
+void NullabilityChecker::checkPostCall(const CallEvent &Call,
+                                       CheckerContext &C) const {
+  auto Decl = Call.getDecl();
+  if (!Decl)
+    return;
+  // ObjC Messages handles in a different callback.
+  if (Call.getKind() == CE_ObjCMessage)
+    return;
+  const FunctionType *FuncType = Decl->getFunctionType();
+  if (!FuncType)
+    return;
+  QualType ReturnType = FuncType->getReturnType();
+  if (!ReturnType->isAnyPointerType())
+    return;
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  const MemRegion *Region = getTrackRegion(Call.getReturnValue());
+  if (!Region)
+    return;
+
+  // CG headers are misannotated. Do not warn for symbols that are the results
+  // of CG calls.
+  const SourceManager &SM = C.getSourceManager();
+  StringRef FilePath = SM.getFilename(SM.getSpellingLoc(Decl->getLocStart()));
+  if (llvm::sys::path::filename(FilePath).startswith("CG")) {
+    State = State->set<NullabilityMap>(Region, Nullability::Contradicted);
+    C.addTransition(State);
+    return;
+  }
+
+  const NullabilityState *TrackedNullability =
+      State->get<NullabilityMap>(Region);
+
+  if (!TrackedNullability &&
+      getNullabilityAnnotation(ReturnType) == Nullability::Nullable) {
+    State = State->set<NullabilityMap>(Region, Nullability::Nullable);
+    C.addTransition(State);
+  }
+}
+
+static Nullability getReceiverNullability(const ObjCMethodCall &M,
+                                          ProgramStateRef State) {
+  if (M.isReceiverSelfOrSuper()) {
+    // For super and super class receivers we assume that the receiver is
+    // nonnull.
+    return Nullability::Nonnull;
+  }
+  // Otherwise look up nullability in the state.
+  SVal Receiver = M.getReceiverSVal();
+  if (auto DefOrUnknown = Receiver.getAs<DefinedOrUnknownSVal>()) {
+    // If the receiver is constrained to be nonnull, assume that it is nonnull
+    // regardless of its type.
+    NullConstraint Nullness = getNullConstraint(*DefOrUnknown, State);
+    if (Nullness == NullConstraint::IsNotNull)
+      return Nullability::Nonnull;
+  }
+  auto ValueRegionSVal = Receiver.getAs<loc::MemRegionVal>();
+  if (ValueRegionSVal) {
+    const MemRegion *SelfRegion = ValueRegionSVal->getRegion();
+    assert(SelfRegion);
+
+    const NullabilityState *TrackedSelfNullability =
+        State->get<NullabilityMap>(SelfRegion);
+    if (TrackedSelfNullability)
+      return TrackedSelfNullability->getValue();
+  }
+  return Nullability::Unspecified;
+}
+
+/// Calculate the nullability of the result of a message expr based on the
+/// nullability of the receiver, the nullability of the return value, and the
+/// constraints.
+void NullabilityChecker::checkPostObjCMessage(const ObjCMethodCall &M,
+                                              CheckerContext &C) const {
+  auto Decl = M.getDecl();
+  if (!Decl)
+    return;
+  QualType RetType = Decl->getReturnType();
+  if (!RetType->isAnyPointerType())
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  const MemRegion *ReturnRegion = getTrackRegion(M.getReturnValue());
+  if (!ReturnRegion)
+    return;
+
+  auto Interface = Decl->getClassInterface();
+  auto Name = Interface ? Interface->getName() : "";
+  // In order to reduce the noise in the diagnostics generated by this checker,
+  // some framework and programming style based heuristics are used. These
+  // heuristics are for Cocoa APIs which have NS prefix.
+  if (Name.startswith("NS")) {
+    // Developers rely on dynamic invariants such as an item should be available
+    // in a collection, or a collection is not empty often. Those invariants can
+    // not be inferred by any static analysis tool. To not to bother the users
+    // with too many false positives, every item retrieval function should be
+    // ignored for collections. The instance methods of dictionaries in Cocoa
+    // are either item retrieval related or not interesting nullability wise.
+    // Using this fact, to keep the code easier to read just ignore the return
+    // value of every instance method of dictionaries.
+    if (M.isInstanceMessage() && Name.find("Dictionary") != StringRef::npos) {
+      State =
+          State->set<NullabilityMap>(ReturnRegion, Nullability::Contradicted);
+      C.addTransition(State);
+      return;
+    }
+    // For similar reasons ignore some methods of Cocoa arrays.
+    StringRef FirstSelectorSlot = M.getSelector().getNameForSlot(0);
+    if (Name.find("Array") != StringRef::npos &&
+        (FirstSelectorSlot == "firstObject" ||
+         FirstSelectorSlot == "lastObject")) {
+      State =
+          State->set<NullabilityMap>(ReturnRegion, Nullability::Contradicted);
+      C.addTransition(State);
+      return;
+    }
+
+    // Encoding related methods of string should not fail when lossless
+    // encodings are used. Using lossless encodings is so frequent that ignoring
+    // this class of methods reduced the emitted diagnostics by about 30% on
+    // some projects (and all of that was false positives).
+    if (Name.find("String") != StringRef::npos) {
+      for (auto Param : M.parameters()) {
+        if (Param->getName() == "encoding") {
+          State = State->set<NullabilityMap>(ReturnRegion,
+                                             Nullability::Contradicted);
+          C.addTransition(State);
+          return;
+        }
+      }
+    }
+  }
+
+  const ObjCMessageExpr *Message = M.getOriginExpr();
+  Nullability SelfNullability = getReceiverNullability(M, State);
+
+  const NullabilityState *NullabilityOfReturn =
+      State->get<NullabilityMap>(ReturnRegion);
+
+  if (NullabilityOfReturn) {
+    // When we have a nullability tracked for the return value, the nullability
+    // of the expression will be the most nullable of the receiver and the
+    // return value.
+    Nullability RetValTracked = NullabilityOfReturn->getValue();
+    Nullability ComputedNullab =
+        getMostNullable(RetValTracked, SelfNullability);
+    if (ComputedNullab != RetValTracked &&
+        ComputedNullab != Nullability::Unspecified) {
+      const Stmt *NullabilitySource =
+          ComputedNullab == RetValTracked
+              ? NullabilityOfReturn->getNullabilitySource()
+              : Message->getInstanceReceiver();
+      State = State->set<NullabilityMap>(
+          ReturnRegion, NullabilityState(ComputedNullab, NullabilitySource));
+      C.addTransition(State);
+    }
+    return;
+  }
+
+  // No tracked information. Use static type information for return value.
+  Nullability RetNullability = getNullabilityAnnotation(RetType);
+
+  // Properties might be computed. For this reason the static analyzer creates a
+  // new symbol each time an unknown property  is read. To avoid false pozitives
+  // do not treat unknown properties as nullable, even when they explicitly
+  // marked nullable.
+  if (M.getMessageKind() == OCM_PropertyAccess && !C.wasInlined)
+    RetNullability = Nullability::Nonnull;
+
+  Nullability ComputedNullab = getMostNullable(RetNullability, SelfNullability);
+  if (ComputedNullab == Nullability::Nullable) {
+    const Stmt *NullabilitySource = ComputedNullab == RetNullability
+                                        ? Message
+                                        : Message->getInstanceReceiver();
+    State = State->set<NullabilityMap>(
+        ReturnRegion, NullabilityState(ComputedNullab, NullabilitySource));
+    C.addTransition(State);
+  }
+}
+
+/// Explicit casts are trusted. If there is a disagreement in the nullability
+/// annotations in the destination and the source or '0' is casted to nonnull
+/// track the value as having contraditory nullability. This will allow users to
+/// suppress warnings.
+void NullabilityChecker::checkPostStmt(const ExplicitCastExpr *CE,
+                                       CheckerContext &C) const {
+  QualType OriginType = CE->getSubExpr()->getType();
+  QualType DestType = CE->getType();
+  if (!OriginType->isAnyPointerType())
+    return;
+  if (!DestType->isAnyPointerType())
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  Nullability DestNullability = getNullabilityAnnotation(DestType);
+
+  // No explicit nullability in the destination type, so this cast does not
+  // change the nullability.
+  if (DestNullability == Nullability::Unspecified)
+    return;
+
+  auto RegionSVal =
+      State->getSVal(CE, C.getLocationContext()).getAs<DefinedOrUnknownSVal>();
+  const MemRegion *Region = getTrackRegion(*RegionSVal);
+  if (!Region)
+    return;
+
+  // When 0 is converted to nonnull mark it as contradicted.
+  if (DestNullability == Nullability::Nonnull) {
+    NullConstraint Nullness = getNullConstraint(*RegionSVal, State);
+    if (Nullness == NullConstraint::IsNull) {
+      State = State->set<NullabilityMap>(Region, Nullability::Contradicted);
+      C.addTransition(State);
+      return;
+    }
+  }
+
+  const NullabilityState *TrackedNullability =
+      State->get<NullabilityMap>(Region);
+
+  if (!TrackedNullability) {
+    if (DestNullability != Nullability::Nullable)
+      return;
+    State = State->set<NullabilityMap>(Region,
+                                       NullabilityState(DestNullability, CE));
+    C.addTransition(State);
+    return;
+  }
+
+  if (TrackedNullability->getValue() != DestNullability &&
+      TrackedNullability->getValue() != Nullability::Contradicted) {
+    State = State->set<NullabilityMap>(Region, Nullability::Contradicted);
+    C.addTransition(State);
+  }
+}
+
+/// For a given statement performing a bind, attempt to syntactically
+/// match the expression resulting in the bound value.
+static const Expr * matchValueExprForBind(const Stmt *S) {
+  // For `x = e` the value expression is the right-hand side.
+  if (auto *BinOp = dyn_cast<BinaryOperator>(S)) {
+    if (BinOp->getOpcode() == BO_Assign)
+      return BinOp->getRHS();
+  }
+
+  // For `int x = e` the value expression is the initializer.
+  if (auto *DS = dyn_cast<DeclStmt>(S))  {
+    if (DS->isSingleDecl()) {
+      auto *VD = dyn_cast<VarDecl>(DS->getSingleDecl());
+      if (!VD)
+        return nullptr;
+
+      if (const Expr *Init = VD->getInit())
+        return Init;
+    }
+  }
+
+  return nullptr;
+}
+
+/// Returns true if \param S is a DeclStmt for a local variable that
+/// ObjC automated reference counting initialized with zero.
+static bool isARCNilInitializedLocal(CheckerContext &C, const Stmt *S) {
+  // We suppress diagnostics for ARC zero-initialized _Nonnull locals. This
+  // prevents false positives when a _Nonnull local variable cannot be
+  // initialized with an initialization expression:
+  //    NSString * _Nonnull s; // no-warning
+  //    @autoreleasepool {
+  //      s = ...
+  //    }
+  //
+  // FIXME: We should treat implicitly zero-initialized _Nonnull locals as
+  // uninitialized in Sema's UninitializedValues analysis to warn when a use of
+  // the zero-initialized definition will unexpectedly yield nil.
+
+  // Locals are only zero-initialized when automated reference counting
+  // is turned on.
+  if (!C.getASTContext().getLangOpts().ObjCAutoRefCount)
+    return false;
+
+  auto *DS = dyn_cast<DeclStmt>(S);
+  if (!DS || !DS->isSingleDecl())
+    return false;
+
+  auto *VD = dyn_cast<VarDecl>(DS->getSingleDecl());
+  if (!VD)
+    return false;
+
+  // Sema only zero-initializes locals with ObjCLifetimes.
+  if(!VD->getType().getQualifiers().hasObjCLifetime())
+    return false;
+
+  const Expr *Init = VD->getInit();
+  assert(Init && "ObjC local under ARC without initializer");
+
+  // Return false if the local is explicitly initialized (e.g., with '= nil').
+  if (!isa<ImplicitValueInitExpr>(Init))
+    return false;
+
+  return true;
+}
+
+/// Propagate the nullability information through binds and warn when nullable
+/// pointer or null symbol is assigned to a pointer with a nonnull type.
+void NullabilityChecker::checkBind(SVal L, SVal V, const Stmt *S,
+                                   CheckerContext &C) const {
+  const TypedValueRegion *TVR =
+      dyn_cast_or_null<TypedValueRegion>(L.getAsRegion());
+  if (!TVR)
+    return;
+
+  QualType LocType = TVR->getValueType();
+  if (!LocType->isAnyPointerType())
+    return;
+
+  ProgramStateRef State = C.getState();
+  if (State->get<PreconditionViolated>())
+    return;
+
+  auto ValDefOrUnknown = V.getAs<DefinedOrUnknownSVal>();
+  if (!ValDefOrUnknown)
+    return;
+
+  NullConstraint RhsNullness = getNullConstraint(*ValDefOrUnknown, State);
+
+  Nullability ValNullability = Nullability::Unspecified;
+  if (SymbolRef Sym = ValDefOrUnknown->getAsSymbol())
+    ValNullability = getNullabilityAnnotation(Sym->getType());
+
+  Nullability LocNullability = getNullabilityAnnotation(LocType);
+  if (Filter.CheckNullPassedToNonnull &&
+      RhsNullness == NullConstraint::IsNull &&
+      ValNullability != Nullability::Nonnull &&
+      LocNullability == Nullability::Nonnull &&
+      !isARCNilInitializedLocal(C, S)) {
+    static CheckerProgramPointTag Tag(this, "NullPassedToNonnull");
+    ExplodedNode *N = C.generateErrorNode(State, &Tag);
+    if (!N)
+      return;
+
+    const Stmt *ValueExpr = matchValueExprForBind(S);
+    if (!ValueExpr)
+      ValueExpr = S;
+
+    reportBugIfPreconditionHolds(ErrorKind::NilAssignedToNonnull, N, nullptr, C,
+                                 ValueExpr);
+    return;
+  }
+  // Intentionally missing case: '0' is bound to a reference. It is handled by
+  // the DereferenceChecker.
+
+  const MemRegion *ValueRegion = getTrackRegion(*ValDefOrUnknown);
+  if (!ValueRegion)
+    return;
+
+  const NullabilityState *TrackedNullability =
+      State->get<NullabilityMap>(ValueRegion);
+
+  if (TrackedNullability) {
+    if (RhsNullness == NullConstraint::IsNotNull ||
+        TrackedNullability->getValue() != Nullability::Nullable)
+      return;
+    if (Filter.CheckNullablePassedToNonnull &&
+        LocNullability == Nullability::Nonnull) {
+      static CheckerProgramPointTag Tag(this, "NullablePassedToNonnull");
+      ExplodedNode *N = C.addTransition(State, C.getPredecessor(), &Tag);
+      reportBugIfPreconditionHolds(ErrorKind::NullableAssignedToNonnull, N,
+                                   ValueRegion, C);
+    }
+    return;
+  }
+
+  const auto *BinOp = dyn_cast<BinaryOperator>(S);
+
+  if (ValNullability == Nullability::Nullable) {
+    // Trust the static information of the value more than the static
+    // information on the location.
+    const Stmt *NullabilitySource = BinOp ? BinOp->getRHS() : S;
+    State = State->set<NullabilityMap>(
+        ValueRegion, NullabilityState(ValNullability, NullabilitySource));
+    C.addTransition(State);
+    return;
+  }
+
+  if (LocNullability == Nullability::Nullable) {
+    const Stmt *NullabilitySource = BinOp ? BinOp->getLHS() : S;
+    State = State->set<NullabilityMap>(
+        ValueRegion, NullabilityState(LocNullability, NullabilitySource));
+    C.addTransition(State);
+  }
+}
+
+void NullabilityChecker::printState(raw_ostream &Out, ProgramStateRef State,
+                                    const char *NL, const char *Sep) const {
+
+  NullabilityMapTy B = State->get<NullabilityMap>();
+
+  if (B.isEmpty())
+    return;
+
+  Out << Sep << NL;
+
+  for (NullabilityMapTy::iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    Out << I->first << " : ";
+    I->second.print(Out);
+    Out << NL;
+  }
+}
+
+#define REGISTER_CHECKER(name, trackingRequired)                               \
+  void ento::register##name##Checker(CheckerManager &mgr) {                    \
+    NullabilityChecker *checker = mgr.registerChecker<NullabilityChecker>();   \
+    checker->Filter.Check##name = true;                                        \
+    checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
+    checker->NeedTracking = checker->NeedTracking || trackingRequired;         \
+  }
+
+// The checks are likely to be turned on by default and it is possible to do
+// them without tracking any nullability related information. As an optimization
+// no nullability information will be tracked when only these two checks are
+// enables.
+REGISTER_CHECKER(NullPassedToNonnull, false)
+REGISTER_CHECKER(NullReturnedFromNonnull, false)
+
+REGISTER_CHECKER(NullableDereferenced, true)
+REGISTER_CHECKER(NullablePassedToNonnull, true)
+REGISTER_CHECKER(NullableReturnedFromNonnull, true)
diff --git a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
index a7b92b4c67f2..cbaa5c23592d 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCAtSyncChecker.cpp
@@ -43,7 +43,7 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S,
 
   // Uninitialized value used for the mutex?
   if (V.getAs<UndefinedVal>()) {
-    if (ExplodedNode *N = C.generateSink()) {
+    if (ExplodedNode *N = C.generateErrorNode()) {
       if (!BT_undef)
         BT_undef.reset(new BuiltinBug(this, "Uninitialized value used as mutex "
                                             "for @synchronized"));
@@ -66,7 +66,7 @@ void ObjCAtSyncChecker::checkPreStmt(const ObjCAtSynchronizedStmt *S,
     if (!notNullState) {
       // Generate an error node.  This isn't a sink since
       // a null mutex just means no synchronization occurs.
-      if (ExplodedNode *N = C.addTransition(nullState)) {
+      if (ExplodedNode *N = C.generateNonFatalErrorNode(nullState)) {
         if (!BT_null)
           BT_null.reset(new BuiltinBug(
               this, "Nil value used as mutex for @synchronized() "
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
index 224251beb09a..b10ec848ee46 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersASTChecker.cpp
@@ -66,9 +66,8 @@ class WalkAST : public StmtVisitor<WalkAST> {
     // The type must be an array/pointer type.
 
     // This could be a null constant, which is allowed.
-    if (E->isNullPointerConstant(ASTC, Expr::NPC_ValueDependentIsNull))
-      return true;
-    return false;
+    return static_cast<bool>(
+        E->isNullPointerConstant(ASTC, Expr::NPC_ValueDependentIsNull));
   }
 
 public:
diff --git a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index 53e159879eb2..0203d79cd00e 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -133,13 +133,13 @@ void ObjCContainersChecker::checkPreStmt(const CallExpr *CE,
     if (IdxVal.isUnknownOrUndef())
       return;
     DefinedSVal Idx = IdxVal.castAs<DefinedSVal>();
-    
+
     // Now, check if 'Idx in [0, Size-1]'.
     const QualType T = IdxExpr->getType();
     ProgramStateRef StInBound = State->assumeInBound(Idx, *Size, true, T);
     ProgramStateRef StOutBound = State->assumeInBound(Idx, *Size, false, T);
     if (StOutBound && !StInBound) {
-      ExplodedNode *N = C.generateSink(StOutBound);
+      ExplodedNode *N = C.generateErrorNode(StOutBound);
       if (!N)
         return;
       initBugType();
diff --git a/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp
index 016cb146f84e..32a1adb587bf 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCMissingSuperCallChecker.cpp
@@ -49,7 +49,7 @@ public:
         DoesCallSuper = true;
 
     // Recurse if we didn't find the super call yet.
-    return !DoesCallSuper; 
+    return !DoesCallSuper;
   }
 
   bool DoesCallSuper;
@@ -59,7 +59,7 @@ private:
 };
 
 //===----------------------------------------------------------------------===//
-// ObjCSuperCallChecker 
+// ObjCSuperCallChecker
 //===----------------------------------------------------------------------===//
 
 class ObjCSuperCallChecker : public Checker<
@@ -88,7 +88,7 @@ private:
 /// \param[out] SuperclassName On return, the found superclass name.
 bool ObjCSuperCallChecker::isCheckableClass(const ObjCImplementationDecl *D,
                                             StringRef &SuperclassName) const {
-  const ObjCInterfaceDecl *ID = D->getClassInterface();
+  const ObjCInterfaceDecl *ID = D->getClassInterface()->getSuperClass();
   for ( ; ID ; ID = ID->getSuperClass())
   {
     SuperclassName = ID->getIdentifier()->getName();
@@ -202,7 +202,7 @@ void ObjCSuperCallChecker::checkASTDecl(const ObjCImplementationDecl *D,
         SmallString<320> Buf;
         llvm::raw_svector_ostream os(Buf);
 
-        os << "The '" << S.getAsString() 
+        os << "The '" << S.getAsString()
            << "' instance method in " << SuperclassName.str() << " subclass '"
            << *D << "' is missing a [super " << S.getAsString() << "] call";
 
diff --git a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
index 93b0553b3b72..ffa3a2700616 100644
--- a/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ObjCSelfInitChecker.cpp
@@ -145,15 +145,15 @@ void ObjCSelfInitChecker::checkForInvalidSelf(const Expr *E, CheckerContext &C,
                                               const char *errorStr) const {
   if (!E)
     return;
-  
+
   if (!C.getState()->get<CalledInit>())
     return;
-  
+
   if (!isInvalidSelf(E, C))
     return;
-  
+
   // Generate an error node.
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
 
@@ -177,12 +177,12 @@ void ObjCSelfInitChecker::checkPostObjCMessage(const ObjCMethodCall &Msg,
   if (isInitMessage(Msg)) {
     // Tag the return value as the result of an initializer.
     ProgramStateRef state = C.getState();
-    
+
     // FIXME this really should be context sensitive, where we record
     // the current stack frame (for IPA).  Also, we need to clean this
     // value out when we return from this method.
     state = state->set<CalledInit>(true);
-    
+
     SVal V = state->getSVal(Msg.getOriginExpr(), C.getLocationContext());
     addSelfFlag(state, V, SelfFlag_InitRes, C);
     return;
@@ -318,7 +318,7 @@ void ObjCSelfInitChecker::checkBind(SVal loc, SVal val, const Stmt *S,
                                     CheckerContext &C) const {
   // Allow assignment of anything to self. Self is a local variable in the
   // initializer, so it is legal to assign anything to it, like results of
-  // static functions/method calls. After self is assigned something we cannot 
+  // static functions/method calls. After self is assigned something we cannot
   // reason about, stop enforcing the rules.
   // (Only continue checking if the assigned value should be treated as self.)
   if ((isSelfVar(loc, C)) &&
@@ -404,15 +404,12 @@ static bool shouldRunOnFunctionOrMethod(const NamedDecl *ND) {
     if (II == NSObjectII)
       break;
   }
-  if (!ID)
-    return false;
-
-  return true;
+  return ID != nullptr;
 }
 
 /// \brief Returns true if the location is 'self'.
 static bool isSelfVar(SVal location, CheckerContext &C) {
-  AnalysisDeclContext *analCtx = C.getCurrentAnalysisDeclContext(); 
+  AnalysisDeclContext *analCtx = C.getCurrentAnalysisDeclContext();
   if (!analCtx->getSelfDecl())
     return false;
   if (!location.getAs<loc::MemRegionVal>())
diff --git a/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
new file mode 100644
index 000000000000..8ce37357fe1f
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
@@ -0,0 +1,314 @@
+//=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines a checker that checks for padding that could be
+//  removed by re-ordering members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CharUnits.h"
+#include "clang/AST/DeclTemplate.h"
+#include "clang/AST/RecordLayout.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <numeric>
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
+private:
+  mutable std::unique_ptr<BugType> PaddingBug;
+  mutable int64_t AllowedPad;
+  mutable BugReporter *BR;
+
+public:
+  void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
+                    BugReporter &BRArg) const {
+    BR = &BRArg;
+    AllowedPad =
+        MGR.getAnalyzerOptions().getOptionAsInteger("AllowedPad", 24, this);
+    assert(AllowedPad >= 0 && "AllowedPad option should be non-negative");
+
+    // The calls to checkAST* from AnalysisConsumer don't
+    // visit template instantiations or lambda classes. We
+    // want to visit those, so we make our own RecursiveASTVisitor.
+    struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
+      const PaddingChecker *Checker;
+      bool shouldVisitTemplateInstantiations() const { return true; }
+      bool shouldVisitImplicitCode() const { return true; }
+      explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
+      bool VisitRecordDecl(const RecordDecl *RD) {
+        Checker->visitRecord(RD);
+        return true;
+      }
+      bool VisitVarDecl(const VarDecl *VD) {
+        Checker->visitVariable(VD);
+        return true;
+      }
+      // TODO: Visit array new and mallocs for arrays.
+    };
+
+    LocalVisitor visitor(this);
+    visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
+  }
+
+  /// \brief Look for records of overly padded types. If padding *
+  /// PadMultiplier exceeds AllowedPad, then generate a report.
+  /// PadMultiplier is used to share code with the array padding
+  /// checker.
+  void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
+    if (shouldSkipDecl(RD))
+      return;
+
+    auto &ASTContext = RD->getASTContext();
+    const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
+    assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
+
+    CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
+    if (BaselinePad.isZero())
+      return;
+    CharUnits OptimalPad = calculateOptimalPad(RD, ASTContext, RL);
+
+    CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
+    if (DiffPad.getQuantity() <= AllowedPad) {
+      assert(!DiffPad.isNegative() && "DiffPad should not be negative");
+      // There is not enough excess padding to trigger a warning.
+      return;
+    }
+    reportRecord(RD, BaselinePad, OptimalPad);
+  }
+
+  /// \brief Look for arrays of overly padded types. If the padding of the
+  /// array type exceeds AllowedPad, then generate a report.
+  void visitVariable(const VarDecl *VD) const {
+    const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
+    if (ArrTy == nullptr)
+      return;
+    uint64_t Elts = 0;
+    if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
+      Elts = CArrTy->getSize().getZExtValue();
+    if (Elts == 0)
+      return;
+    const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
+    if (RT == nullptr)
+      return;
+
+    // TODO: Recurse into the fields and base classes to see if any
+    // of those have excess padding.
+    visitRecord(RT->getDecl(), Elts);
+  }
+
+  bool shouldSkipDecl(const RecordDecl *RD) const {
+    auto Location = RD->getLocation();
+    // If the construct doesn't have a source file, then it's not something
+    // we want to diagnose.
+    if (!Location.isValid())
+      return true;
+    SrcMgr::CharacteristicKind Kind =
+        BR->getSourceManager().getFileCharacteristic(Location);
+    // Throw out all records that come from system headers.
+    if (Kind != SrcMgr::C_User)
+      return true;
+
+    // Not going to attempt to optimize unions.
+    if (RD->isUnion())
+      return true;
+    // How do you reorder fields if you haven't got any?
+    if (RD->field_empty())
+      return true;
+    if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
+      // Tail padding with base classes ends up being very complicated.
+      // We will skip objects with base classes for now.
+      if (CXXRD->getNumBases() != 0)
+        return true;
+      // Virtual bases are complicated, skipping those for now.
+      if (CXXRD->getNumVBases() != 0)
+        return true;
+      // Can't layout a template, so skip it. We do still layout the
+      // instantiations though.
+      if (CXXRD->getTypeForDecl()->isDependentType())
+        return true;
+      if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
+        return true;
+    }
+    auto IsTrickyField = [](const FieldDecl *FD) -> bool {
+      // Bitfield layout is hard.
+      if (FD->isBitField())
+        return true;
+
+      // Variable length arrays are tricky too.
+      QualType Ty = FD->getType();
+      if (Ty->isIncompleteArrayType())
+        return true;
+      return false;
+    };
+
+    if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
+      return true;
+    return false;
+  }
+
+  static CharUnits calculateBaselinePad(const RecordDecl *RD,
+                                        const ASTContext &ASTContext,
+                                        const ASTRecordLayout &RL) {
+    CharUnits PaddingSum;
+    CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
+    for (const auto &FD : RD->fields()) {
+      // This checker only cares about the padded size of the
+      // field, and not the data size. If the field is a record
+      // with tail padding, then we won't put that number in our
+      // total because reordering fields won't fix that problem.
+      CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
+      auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
+      CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
+      PaddingSum += (FieldOffset - Offset);
+      Offset = FieldOffset + FieldSize;
+    }
+    PaddingSum += RL.getSize() - Offset;
+    return PaddingSum;
+  }
+
+  /// Optimal padding overview:
+  /// 1.  Find a close approximation to where we can place our first field.
+  ///     This will usually be at offset 0.
+  /// 2.  Try to find the best field that can legally be placed at the current
+  ///     offset.
+  ///   a.  "Best" is the largest alignment that is legal, but smallest size.
+  ///       This is to account for overly aligned types.
+  /// 3.  If no fields can fit, pad by rounding the current offset up to the
+  ///     smallest alignment requirement of our fields. Measure and track the
+  //      amount of padding added. Go back to 2.
+  /// 4.  Increment the current offset by the size of the chosen field.
+  /// 5.  Remove the chosen field from the set of future possibilities.
+  /// 6.  Go back to 2 if there are still unplaced fields.
+  /// 7.  Add tail padding by rounding the current offset up to the structure
+  ///     alignment. Track the amount of padding added.
+
+  static CharUnits calculateOptimalPad(const RecordDecl *RD,
+                                       const ASTContext &ASTContext,
+                                       const ASTRecordLayout &RL) {
+    struct CharUnitPair {
+      CharUnits Align;
+      CharUnits Size;
+      bool operator<(const CharUnitPair &RHS) const {
+        // Order from small alignments to large alignments,
+        // then large sizes to small sizes.
+        return std::make_pair(Align, -Size) <
+               std::make_pair(RHS.Align, -RHS.Size);
+      }
+    };
+    SmallVector<CharUnitPair, 20> Fields;
+    auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
+      CharUnitPair RetVal;
+      auto &Ctx = FD->getASTContext();
+      std::tie(RetVal.Size, RetVal.Align) =
+          Ctx.getTypeInfoInChars(FD->getType());
+      assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
+      if (auto Max = FD->getMaxAlignment())
+        RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
+      return RetVal;
+    };
+    std::transform(RD->field_begin(), RD->field_end(),
+                   std::back_inserter(Fields), GatherSizesAndAlignments);
+    std::sort(Fields.begin(), Fields.end());
+
+    // This lets us skip over vptrs and non-virtual bases,
+    // so that we can just worry about the fields in our object.
+    // Note that this does cause us to miss some cases where we
+    // could pack more bytes in to a base class's tail padding.
+    CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
+    CharUnits NewPad;
+
+    while (!Fields.empty()) {
+      unsigned TrailingZeros =
+          llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
+      // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
+      // 64 will overflow our unsigned long long. Shifting 63 will turn
+      // our long long (and CharUnits internal type) negative. So shift 62.
+      long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
+      CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
+      CharUnitPair InsertPoint = {CurAlignment, CharUnits::Zero()};
+      auto CurBegin = Fields.begin();
+      auto CurEnd = Fields.end();
+
+      // In the typical case, this will find the last element
+      // of the vector. We won't find a middle element unless
+      // we started on a poorly aligned address or have an overly
+      // aligned field.
+      auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint);
+      if (Iter != CurBegin) {
+        // We found a field that we can layout with the current alignment.
+        --Iter;
+        NewOffset += Iter->Size;
+        Fields.erase(Iter);
+      } else {
+        // We are poorly aligned, and we need to pad in order to layout another
+        // field. Round up to at least the smallest field alignment that we
+        // currently have.
+        CharUnits NextOffset = NewOffset.RoundUpToAlignment(Fields[0].Align);
+        NewPad += NextOffset - NewOffset;
+        NewOffset = NextOffset;
+      }
+    }
+    // Calculate tail padding.
+    CharUnits NewSize = NewOffset.RoundUpToAlignment(RL.getAlignment());
+    NewPad += NewSize - NewOffset;
+    return NewPad;
+  }
+
+  void reportRecord(const RecordDecl *RD, CharUnits BaselinePad,
+                    CharUnits TargetPad) const {
+    if (!PaddingBug)
+      PaddingBug =
+          llvm::make_unique<BugType>(this, "Excessive Padding", "Performance");
+
+    SmallString<100> Buf;
+    llvm::raw_svector_ostream Os(Buf);
+
+    Os << "Excessive padding in '";
+    Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers()) << "'";
+
+    if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
+      // TODO: make this show up better in the console output and in
+      // the HTML. Maybe just make it show up in HTML like the path
+      // diagnostics show.
+      SourceLocation ILoc = TSD->getPointOfInstantiation();
+      if (ILoc.isValid())
+        Os << " instantiated here: "
+           << ILoc.printToString(BR->getSourceManager());
+    }
+
+    Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
+       << TargetPad.getQuantity() << " is optimal). Consider reordering "
+       << "the fields or adding explicit padding members.";
+
+    PathDiagnosticLocation CELoc =
+        PathDiagnosticLocation::create(RD, BR->getSourceManager());
+
+    auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc);
+    Report->setDeclWithIssue(RD);
+    Report->addRange(RD->getSourceRange());
+
+    BR->emitReport(std::move(Report));
+  }
+};
+}
+
+void ento::registerPaddingChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker<PaddingChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
index 806312468beb..e3369677af72 100644
--- a/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerArithChecker.cpp
@@ -22,7 +22,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class PointerArithChecker 
+class PointerArithChecker
   : public Checker< check::PreStmt<BinaryOperator> > {
   mutable std::unique_ptr<BuiltinBug> BT;
 
@@ -48,10 +48,10 @@ void PointerArithChecker::checkPreStmt(const BinaryOperator *B,
 
   // If pointer arithmetic is done on variables of non-array type, this often
   // means behavior rely on memory organization, which is dangerous.
-  if (isa<VarRegion>(LR) || isa<CodeTextRegion>(LR) || 
+  if (isa<VarRegion>(LR) || isa<CodeTextRegion>(LR) ||
       isa<CompoundLiteralRegion>(LR)) {
 
-    if (ExplodedNode *N = C.addTransition()) {
+    if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
       if (!BT)
         BT.reset(
             new BuiltinBug(this, "Dangerous pointer arithmetic",
diff --git a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
index cf1f88a2851b..2d33ebc2610d 100644
--- a/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PointerSubChecker.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This files defines PointerSubChecker, a builtin checker that checks for
-// pointer subtractions on two pointers pointing to different memory chunks. 
+// pointer subtractions on two pointers pointing to different memory chunks.
 // This check corresponds to CWE-469.
 //
 //===----------------------------------------------------------------------===//
@@ -23,7 +23,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class PointerSubChecker 
+class PointerSubChecker
   : public Checker< check::PreStmt<BinaryOperator> > {
   mutable std::unique_ptr<BuiltinBug> BT;
 
@@ -60,7 +60,7 @@ void PointerSubChecker::checkPreStmt(const BinaryOperator *B,
   if (isa<SymbolicRegion>(BaseLR) || isa<SymbolicRegion>(BaseRR))
     return;
 
-  if (ExplodedNode *N = C.addTransition()) {
+  if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
     if (!BT)
       BT.reset(
           new BuiltinBug(this, "Pointer subtraction",
diff --git a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
index 4209017a58d5..28a4a083ea3c 100644
--- a/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/PthreadLockChecker.cpp
@@ -32,9 +32,9 @@ private:
   LockState(Kind K) : K(K) {}
 
 public:
-  static LockState getLocked(void) { return LockState(Locked); }
-  static LockState getUnlocked(void) { return LockState(Unlocked); }
-  static LockState getDestroyed(void) { return LockState(Destroyed); }
+  static LockState getLocked() { return LockState(Locked); }
+  static LockState getUnlocked() { return LockState(Unlocked); }
+  static LockState getDestroyed() { return LockState(Destroyed); }
 
   bool operator==(const LockState &X) const {
     return K == X.K;
@@ -62,10 +62,10 @@ class PthreadLockChecker : public Checker< check::PostStmt<CallExpr> > {
   };
 public:
   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
-    
+
   void AcquireLock(CheckerContext &C, const CallExpr *CE, SVal lock,
                    bool isTryLock, enum LockingSemantics semantics) const;
-    
+
   void ReleaseLock(CheckerContext &C, const CallExpr *CE, SVal lock) const;
   void DestroyLock(CheckerContext &C, const CallExpr *CE, SVal Lock) const;
   void InitLock(CheckerContext &C, const CallExpr *CE, SVal Lock) const;
@@ -96,7 +96,7 @@ void PthreadLockChecker::checkPostStmt(const CallExpr *CE,
                 false, PthreadSemantics);
   else if (FName == "lck_mtx_lock" ||
            FName == "lck_rw_lock_exclusive" ||
-           FName == "lck_rw_lock_shared") 
+           FName == "lck_rw_lock_shared")
     AcquireLock(C, CE, state->getSVal(CE->getArg(0), LCtx),
                 false, XNUSemantics);
   else if (FName == "pthread_mutex_trylock" ||
@@ -124,17 +124,17 @@ void PthreadLockChecker::checkPostStmt(const CallExpr *CE,
 void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
                                      SVal lock, bool isTryLock,
                                      enum LockingSemantics semantics) const {
-  
+
   const MemRegion *lockR = lock.getAsRegion();
   if (!lockR)
     return;
-  
+
   ProgramStateRef state = C.getState();
-  
+
   SVal X = state->getSVal(CE, C.getLocationContext());
   if (X.isUnknownOrUndef())
     return;
-  
+
   DefinedSVal retVal = X.castAs<DefinedSVal>();
 
   if (const LockState *LState = state->get<LockMap>(lockR)) {
@@ -142,7 +142,7 @@ void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
       if (!BT_doublelock)
         BT_doublelock.reset(new BugType(this, "Double locking",
                                         "Lock checker"));
-      ExplodedNode *N = C.generateSink();
+      ExplodedNode *N = C.generateErrorNode();
       if (!N)
         return;
       auto report = llvm::make_unique<BugReport>(
@@ -183,8 +183,8 @@ void PthreadLockChecker::AcquireLock(CheckerContext &C, const CallExpr *CE,
     assert((semantics == XNUSemantics) && "Unknown locking semantics");
     lockSucc = state;
   }
-  
-  // Record that the lock was acquired.  
+
+  // Record that the lock was acquired.
   lockSucc = lockSucc->add<LockSet>(lockR);
   lockSucc = lockSucc->set<LockMap>(lockR, LockState::getLocked());
   C.addTransition(lockSucc);
@@ -196,7 +196,7 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
   const MemRegion *lockR = lock.getAsRegion();
   if (!lockR)
     return;
-  
+
   ProgramStateRef state = C.getState();
 
   if (const LockState *LState = state->get<LockMap>(lockR)) {
@@ -204,7 +204,7 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
       if (!BT_doubleunlock)
         BT_doubleunlock.reset(new BugType(this, "Double unlocking",
                                           "Lock checker"));
-      ExplodedNode *N = C.generateSink();
+      ExplodedNode *N = C.generateErrorNode();
       if (!N)
         return;
       auto Report = llvm::make_unique<BugReport>(
@@ -227,7 +227,7 @@ void PthreadLockChecker::ReleaseLock(CheckerContext &C, const CallExpr *CE,
     if (firstLockR != lockR) {
       if (!BT_lor)
         BT_lor.reset(new BugType(this, "Lock order reversal", "Lock checker"));
-      ExplodedNode *N = C.generateSink();
+      ExplodedNode *N = C.generateErrorNode();
       if (!N)
         return;
       auto report = llvm::make_unique<BugReport>(
@@ -272,7 +272,7 @@ void PthreadLockChecker::DestroyLock(CheckerContext &C, const CallExpr *CE,
   if (!BT_destroylock)
     BT_destroylock.reset(new BugType(this, "Destroy invalid lock",
                                      "Lock checker"));
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
   auto Report = llvm::make_unique<BugReport>(*BT_destroylock, Message, N);
@@ -307,7 +307,7 @@ void PthreadLockChecker::InitLock(CheckerContext &C, const CallExpr *CE,
   if (!BT_initlock)
     BT_initlock.reset(new BugType(this, "Init invalid lock",
                                   "Lock checker"));
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
   auto Report = llvm::make_unique<BugReport>(*BT_initlock, Message, N);
@@ -320,7 +320,7 @@ void PthreadLockChecker::reportUseDestroyedBug(CheckerContext &C,
   if (!BT_destroylock)
     BT_destroylock.reset(new BugType(this, "Use destroyed lock",
                                      "Lock checker"));
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
   auto Report = llvm::make_unique<BugReport>(
diff --git a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
index 6ee87a561e02..f983c3085635 100644
--- a/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/RetainCountChecker.cpp
@@ -234,6 +234,7 @@ public:
     return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount(),
                   getType(), IvarAccessHistory::AccessedDirectly);
   }
+
   RefVal releaseViaIvar() const {
     assert(getIvarAccessHistory() == IvarAccessHistory::AccessedDirectly);
     return RefVal(getKind(), getObjKind(), getCount(), getAutoreleaseCount(),
@@ -250,7 +251,7 @@ public:
   bool operator==(const RefVal& X) const {
     return T == X.T && hasSameState(X) && getObjKind() == X.getObjKind();
   }
-  
+
   void Profile(llvm::FoldingSetNodeID& ID) const {
     ID.Add(T);
     ID.AddInteger(RawKind);
@@ -426,16 +427,16 @@ public:
   /// setRetEffect - Set the effect of the return value of the call.
   void setRetEffect(RetEffect E) { Ret = E; }
 
-  
+
   /// Sets the effect on the receiver of the message.
   void setReceiverEffect(ArgEffect e) { Receiver = e; }
-  
+
   /// getReceiverEffect - Returns the effect on the receiver of the call.
   ///  This is only meaningful if the summary applies to an ObjCMessageExpr*.
   ArgEffect getReceiverEffect() const { return Receiver; }
 
   /// Test if two retain summaries are identical. Note that merely equivalent
-  /// summaries are not necessarily identical (for example, if an explicit 
+  /// summaries are not necessarily identical (for example, if an explicit
   /// argument effect matches the default effect).
   bool operator==(const RetainSummary &Other) const {
     return Args == Other.Args && DefaultArgEffect == Other.DefaultArgEffect &&
@@ -484,7 +485,7 @@ public:
   IdentifierInfo *getIdentifier() const { return II; }
   Selector getSelector() const { return S; }
 };
-}
+} // end anonymous namespace
 
 namespace llvm {
 template <> struct DenseMapInfo<ObjCSummaryKey> {
@@ -621,7 +622,7 @@ class RetainSummaryManager {
   ArgEffects::Factory AF;
 
   /// ScratchArgs - A holding buffer for construct ArgEffects.
-  ArgEffects ScratchArgs; 
+  ArgEffects ScratchArgs;
 
   /// ObjCAllocRetE - Default return effect for methods returning Objective-C
   ///  objects.
@@ -644,7 +645,7 @@ class RetainSummaryManager {
   ArgEffects getArgEffects();
 
   enum UnaryFuncKind { cfretain, cfrelease, cfautorelease, cfmakecollectable };
-  
+
   const RetainSummary *getUnarySummary(const FunctionType* FT,
                                        UnaryFuncKind func);
 
@@ -664,7 +665,7 @@ class RetainSummaryManager {
   const RetainSummary *getDoNothingSummary() {
     return getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing);
   }
-  
+
   const RetainSummary *getDefaultSummary() {
     return getPersistentSummary(RetEffect::MakeNoRet(),
                                 DoNothing, MayEscape);
@@ -689,7 +690,7 @@ private:
   void addClassMethSummary(const char* Cls, const char* name,
                            const RetainSummary *Summ, bool isNullary = true) {
     IdentifierInfo* ClsII = &Ctx.Idents.get(Cls);
-    Selector S = isNullary ? GetNullarySelector(name, Ctx) 
+    Selector S = isNullary ? GetNullarySelector(name, Ctx)
                            : GetUnarySelector(name, Ctx);
     ObjCClassMethodSummaries[ObjCSummaryKey(ClsII, S)]  = Summ;
   }
@@ -739,7 +740,7 @@ public:
                     ? RetEffect::MakeGCNotOwned()
                     : (usesARC ? RetEffect::MakeNotOwned(RetEffect::ObjC)
                                : RetEffect::MakeOwned(RetEffect::ObjC, true))),
-     ObjCInitRetE(gcenabled 
+     ObjCInitRetE(gcenabled
                     ? RetEffect::MakeGCNotOwned()
                     : (usesARC ? RetEffect::MakeNotOwned(RetEffect::ObjC)
                                : RetEffect::MakeOwnedWhenTrackedReceiver())) {
@@ -803,7 +804,7 @@ public:
   bool isGCEnabled() const { return GCEnabled; }
 
   bool isARCEnabled() const { return ARCEnabled; }
-  
+
   bool isARCorGCEnabled() const { return GCEnabled || ARCEnabled; }
 
   RetEffect getObjAllocRetEffect() const { return ObjCAllocRetE; }
@@ -966,7 +967,7 @@ void RetainSummaryManager::updateSummaryForCall(const RetainSummary *&S,
   // Additionally, our Self Init checker already warns about it. To avoid
   // overwhelming the user with messages from both checkers, we model the case
   // of '[super init]' in cases when it is not consumed by another expression
-  // as if the call preserves the value of 'self'; essentially, assuming it can 
+  // as if the call preserves the value of 'self'; essentially, assuming it can
   // never fail and return 'nil'.
   // Note, we don't want to just stop tracking the value since we want the
   // RetainCount checker to report leaks and use-after-free if SelfInit checker
@@ -985,7 +986,6 @@ void RetainSummaryManager::updateSummaryForCall(const RetainSummary *&S,
         ModifiableSummaryTemplate->setRetEffect(RetEffect::MakeNoRet());
       }
     }
-
   }
 }
 
@@ -1150,7 +1150,7 @@ RetainSummaryManager::getFunctionSummary(const FunctionDecl *FD) {
     if (S)
       break;
 
-    if (RetTy->isPointerType()) {      
+    if (RetTy->isPointerType()) {
       // For CoreFoundation ('CF') types.
       if (cocoa::isRefType(RetTy, "CF", FName)) {
         if (isRetain(FD, FName)) {
@@ -1278,14 +1278,14 @@ RetainSummaryManager::getUnarySummary(const FunctionType* FT,
   return getPersistentSummary(RetEffect::MakeNoRet(), DoNothing, DoNothing);
 }
 
-const RetainSummary * 
+const RetainSummary *
 RetainSummaryManager::getCFSummaryCreateRule(const FunctionDecl *FD) {
   assert (ScratchArgs.isEmpty());
 
   return getPersistentSummary(RetEffect::MakeOwned(RetEffect::CF, true));
 }
 
-const RetainSummary * 
+const RetainSummary *
 RetainSummaryManager::getCFSummaryGetRule(const FunctionDecl *FD) {
   assert (ScratchArgs.isEmpty());
   return getPersistentSummary(RetEffect::MakeNotOwned(RetEffect::CF),
@@ -1331,7 +1331,7 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ,
 
   // Effects on the parameters.
   unsigned parm_idx = 0;
-  for (FunctionDecl::param_const_iterator pi = FD->param_begin(), 
+  for (FunctionDecl::param_const_iterator pi = FD->param_begin(),
          pe = FD->param_end(); pi != pe; ++pi, ++parm_idx) {
     const ParmVarDecl *pd = *pi;
     if (pd->hasAttr<NSConsumedAttr>())
@@ -1367,8 +1367,8 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ,
 
   // Effects on the receiver.
   if (MD->hasAttr<NSConsumesSelfAttr>())
-    Template->setReceiverEffect(DecRefMsg);      
-  
+    Template->setReceiverEffect(DecRefMsg);
+
   // Effects on the parameters.
   unsigned parm_idx = 0;
   for (ObjCMethodDecl::param_const_iterator
@@ -1376,9 +1376,9 @@ RetainSummaryManager::updateSummaryFromAnnotations(const RetainSummary *&Summ,
        pi != pe; ++pi, ++parm_idx) {
     const ParmVarDecl *pd = *pi;
     if (pd->hasAttr<NSConsumedAttr>())
-      Template->addArg(AF, parm_idx, DecRefMsg);      
+      Template->addArg(AF, parm_idx, DecRefMsg);
     else if (pd->hasAttr<CFConsumedAttr>()) {
-      Template->addArg(AF, parm_idx, DecRef);      
+      Template->addArg(AF, parm_idx, DecRef);
     } else if (pd->hasAttr<CFReturnsRetainedAttr>()) {
       QualType PointeeTy = pd->getType()->getPointeeType();
       if (!PointeeTy.isNull())
@@ -1415,7 +1415,7 @@ RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD,
       if (cocoa::isCocoaObjectRef(RetTy))
         ResultEff = RetEffect::MakeNotOwned(RetEffect::ObjC);
       else if (coreFoundation::isCFObjectRef(RetTy)) {
-        // ObjCMethodDecl currently doesn't consider CF objects as valid return 
+        // ObjCMethodDecl currently doesn't consider CF objects as valid return
         // values for alloc, new, copy, or mutableCopy, so we have to
         // double-check with the selector. This is ugly, but there aren't that
         // many Objective-C methods that return CF objects, right?
@@ -1428,11 +1428,11 @@ RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD,
             ResultEff = RetEffect::MakeOwned(RetEffect::CF, true);
             break;
           default:
-            ResultEff = RetEffect::MakeNotOwned(RetEffect::CF);        
+            ResultEff = RetEffect::MakeNotOwned(RetEffect::CF);
             break;
           }
         } else {
-          ResultEff = RetEffect::MakeNotOwned(RetEffect::CF);        
+          ResultEff = RetEffect::MakeNotOwned(RetEffect::CF);
         }
       }
       break;
@@ -1749,7 +1749,7 @@ namespace {
     SymbolRef Sym;
     const SummaryLogTy &SummaryLog;
     bool GCEnabled;
-    
+
   public:
     CFRefReportVisitor(SymbolRef sym, bool gcEnabled, const SummaryLogTy &log)
        : Sym(sym), SummaryLog(log), GCEnabled(gcEnabled) {}
@@ -1869,7 +1869,7 @@ void CFRefReport::addGCModeDescription(const LangOptions &LOpts,
 
 static bool isNumericLiteralExpression(const Expr *E) {
   // FIXME: This set of cases was copied from SemaExprObjC.
-  return isa<IntegerLiteral>(E) || 
+  return isa<IntegerLiteral>(E) ||
          isa<CharacterLiteral>(E) ||
          isa<FloatingLiteral>(E) ||
          isa<ObjCBoolLiteralExpr>(E) ||
@@ -1948,7 +1948,7 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N,
     else if (isa<ObjCIvarRefExpr>(S)) {
       os << "Object loaded from instance variable";
     }
-    else {      
+    else {
       if (const CallExpr *CE = dyn_cast<CallExpr>(S)) {
         // Get the name of the callee (if it is available).
         SVal X = CurrSt->getSValAsScalarOrLoc(CE->getCallee(), LCtx);
@@ -2192,6 +2192,7 @@ PathDiagnosticPiece *CFRefReportVisitor::VisitNode(const ExplodedNode *N,
   return P;
 }
 
+namespace {
 // Find the first node in the current function context that referred to the
 // tracked symbol and the memory location that value was stored to. Note, the
 // value is only reported if the allocation occurred in the same function as
@@ -2206,6 +2207,7 @@ struct AllocationInfo {
                  const LocationContext *InInterestingMethodContext) :
     N(InN), R(InR), InterestingMethodContext(InInterestingMethodContext) {}
 };
+} // end anonymous namespace
 
 static AllocationInfo
 GetAllocationSite(ProgramStateManager& StateMgr, const ExplodedNode *N,
@@ -2228,7 +2230,7 @@ GetAllocationSite(ProgramStateManager& StateMgr, const ExplodedNode *N,
 
     StoreManager::FindUniqueBinding FB(Sym);
     StateMgr.iterBindings(St, FB);
-    
+
     if (FB) {
       const MemRegion *R = FB.getRegion();
       const VarRegion *VR = R->getBaseRegion()->getAs<VarRegion>();
@@ -2345,10 +2347,10 @@ CFRefLeakReportVisitor::getEndPath(BugReporterContext &BRC,
     // objects.  Only "copy", "alloc", "retain" and "new" transfer ownership
     // to the caller for NS objects.
     const Decl *D = &EndN->getCodeDecl();
-    
+
     os << (isa<ObjCMethodDecl>(D) ? " is returned from a method "
                                   : " is returned from a function ");
-    
+
     if (D->hasAttr<CFReturnsNotRetainedAttr>())
       os << "that is annotated as CF_RETURNS_NOT_RETAINED";
     else if (D->hasAttr<NSReturnsNotRetainedAttr>())
@@ -2385,7 +2387,7 @@ CFRefLeakReportVisitor::getEndPath(BugReporterContext &BRC,
 }
 
 CFRefLeakReport::CFRefLeakReport(CFRefBug &D, const LangOptions &LOpts,
-                                 bool GCEnabled, const SummaryLogTy &Log, 
+                                 bool GCEnabled, const SummaryLogTy &Log,
                                  ExplodedNode *n, SymbolRef sym,
                                  CheckerContext &Ctx,
                                  bool IncludeAllocationLine)
@@ -2414,7 +2416,7 @@ CFRefLeakReport::CFRefLeakReport(CFRefBug &D, const LangOptions &LOpts,
   // FIXME: This will crash the analyzer if an allocation comes from an
   // implicit call (ex: a destructor call).
   // (Currently there are no such allocations in Cocoa, though.)
-  const Stmt *AllocStmt = 0;
+  const Stmt *AllocStmt = nullptr;
   ProgramPoint P = AllocNode->getLocation();
   if (Optional<CallExitEnd> Exit = P.getAs<CallExitEnd>())
     AllocStmt = Exit->getCalleeContext()->getCallSite();
@@ -2492,7 +2494,7 @@ class RetainCountChecker
   /// the allocation line.
   mutable bool IncludeAllocationLine;
 
-public:  
+public:
   RetainCountChecker(AnalyzerOptions &AO)
     : ShouldResetSummaryLog(false),
       IncludeAllocationLine(shouldIncludeAllocationSiteInLeakDiagnostics(AO)) {}
@@ -2617,7 +2619,7 @@ public:
   void checkPostStmt(const ObjCIvarRefExpr *IRE, CheckerContext &C) const;
 
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
-                      
+
   void checkSummary(const RetainSummary &Summ, const CallEvent &Call,
                     CheckerContext &C) const;
 
@@ -2630,13 +2632,13 @@ public:
   ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond,
                                  bool Assumption) const;
 
-  ProgramStateRef 
+  ProgramStateRef
   checkRegionChanges(ProgramStateRef state,
                      const InvalidatedSymbols *invalidated,
                      ArrayRef<const MemRegion *> ExplicitRegions,
                      ArrayRef<const MemRegion *> Regions,
                      const CallEvent *Call) const;
-                                        
+
   bool wantsRegionChangeUpdate(ProgramStateRef state) const {
     return true;
   }
@@ -2645,7 +2647,7 @@ public:
   void checkReturnWithRetEffect(const ReturnStmt *S, CheckerContext &C,
                                 ExplodedNode *Pred, RetEffect RE, RefVal X,
                                 SymbolRef Sym, ProgramStateRef state) const;
-                                              
+
   void checkDeadSymbols(SymbolReaper &SymReaper, CheckerContext &C) const;
   void checkEndFunction(CheckerContext &C) const;
 
@@ -2656,7 +2658,7 @@ public:
   void processNonLeakError(ProgramStateRef St, SourceRange ErrorRange,
                            RefVal::Kind ErrorKind, SymbolRef Sym,
                            CheckerContext &C) const;
-                      
+
   void processObjCLiterals(CheckerContext &C, const Expr *Ex) const;
 
   const ProgramPointTag *getDeadSymbolTag(SymbolRef sym) const;
@@ -2678,7 +2680,7 @@ public:
 } // end anonymous namespace
 
 namespace {
-class StopTrackingCallback : public SymbolVisitor {
+class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
   StopTrackingCallback(ProgramStateRef st) : state(st) {}
@@ -2740,21 +2742,21 @@ void RetainCountChecker::checkPostStmt(const CastExpr *CE,
   const ObjCBridgedCastExpr *BE = dyn_cast<ObjCBridgedCastExpr>(CE);
   if (!BE)
     return;
-  
+
   ArgEffect AE = IncRef;
-  
+
   switch (BE->getBridgeKind()) {
     case clang::OBC_Bridge:
       // Do nothing.
       return;
     case clang::OBC_BridgeRetained:
       AE = IncRef;
-      break;      
+      break;
     case clang::OBC_BridgeTransfer:
       AE = DecRefBridgedTransferred;
       break;
   }
-  
+
   ProgramStateRef state = C.getState();
   SymbolRef Sym = state->getSVal(CE, C.getLocationContext()).getAsLocSymbol();
   if (!Sym)
@@ -2765,7 +2767,7 @@ void RetainCountChecker::checkPostStmt(const CastExpr *CE,
 
   RefVal::Kind hasErr = (RefVal::Kind) 0;
   state = updateSymbol(state, Sym, *T, AE, hasErr, C);
-  
+
   if (hasErr) {
     // FIXME: If we get an error during a bridge cast, should we report it?
     return;
@@ -2777,7 +2779,7 @@ void RetainCountChecker::checkPostStmt(const CastExpr *CE,
 void RetainCountChecker::processObjCLiterals(CheckerContext &C,
                                              const Expr *Ex) const {
   ProgramStateRef state = C.getState();
-  const ExplodedNode *pred = C.getPredecessor();  
+  const ExplodedNode *pred = C.getPredecessor();
   for (const Stmt *Child : Ex->children()) {
     SVal V = state->getSVal(Child, pred->getLocationContext());
     if (SymbolRef sym = V.getAsSymbol())
@@ -2790,17 +2792,17 @@ void RetainCountChecker::processObjCLiterals(CheckerContext &C,
         }
       }
   }
-  
+
   // Return the object as autoreleased.
   //  RetEffect RE = RetEffect::MakeNotOwned(RetEffect::ObjC);
-  if (SymbolRef sym = 
+  if (SymbolRef sym =
         state->getSVal(Ex, pred->getLocationContext()).getAsSymbol()) {
     QualType ResultTy = Ex->getType();
     state = setRefBinding(state, sym,
                           RefVal::makeNotOwned(RetEffect::ObjC, ResultTy));
   }
-  
-  C.addTransition(state);  
+
+  C.addTransition(state);
 }
 
 void RetainCountChecker::checkPostStmt(const ObjCArrayLiteral *AL,
@@ -2817,7 +2819,7 @@ void RetainCountChecker::checkPostStmt(const ObjCDictionaryLiteral *DL,
 
 void RetainCountChecker::checkPostStmt(const ObjCBoxedExpr *Ex,
                                        CheckerContext &C) const {
-  const ExplodedNode *Pred = C.getPredecessor();  
+  const ExplodedNode *Pred = C.getPredecessor();
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
 
@@ -2966,7 +2968,7 @@ void RetainCountChecker::processSummaryOfInlined(const RetainSummary &Summ,
     if (Sym)
       state = removeRefBinding(state, Sym);
   }
-  
+
   C.addTransition(state);
 }
 
@@ -3062,7 +3064,7 @@ void RetainCountChecker::checkSummary(const RetainSummary &Summ,
 
   if (RE.getKind() == RetEffect::OwnedWhenTrackedReceiver) {
     if (ReceiverIsTracked)
-      RE = getSummaryManager(C).getObjAllocRetEffect();      
+      RE = getSummaryManager(C).getObjAllocRetEffect();
     else
       RE = RetEffect::MakeNoRet();
   }
@@ -3129,8 +3131,7 @@ void RetainCountChecker::checkSummary(const RetainSummary &Summ,
   }
 }
 
-
-ProgramStateRef 
+ProgramStateRef
 RetainCountChecker::updateSymbol(ProgramStateRef state, SymbolRef sym,
                                  RefVal V, ArgEffect E, RefVal::Kind &hasErr,
                                  CheckerContext &C) const {
@@ -3306,7 +3307,7 @@ void RetainCountChecker::processNonLeakError(ProgramStateRef St,
     if (RV->getIvarAccessHistory() != RefVal::IvarAccessHistory::None)
       return;
 
-  ExplodedNode *N = C.generateSink(St);
+  ExplodedNode *N = C.generateErrorNode(St);
   if (!N)
     return;
 
@@ -3388,7 +3389,7 @@ bool RetainCountChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
                 isMakeCollectable(FD, FName);
     }
   }
-        
+
   if (!canEval)
     return false;
 
@@ -3531,7 +3532,7 @@ void RetainCountChecker::checkReturnWithRetEffect(const ReturnStmt *S,
                                                   ExplodedNode *Pred,
                                                   RetEffect RE, RefVal X,
                                                   SymbolRef Sym,
-                                              ProgramStateRef state) const {
+                                                  ProgramStateRef state) const {
   // HACK: Ignore retain-count issues on values accessed through ivars,
   // because of cases like this:
   //   [_contentView retain];
@@ -3669,7 +3670,6 @@ void RetainCountChecker::checkBind(SVal loc, SVal val, const Stmt *S,
 ProgramStateRef RetainCountChecker::evalAssume(ProgramStateRef state,
                                                    SVal Cond,
                                                    bool Assumption) const {
-
   // FIXME: We may add to the interface of evalAssume the list of symbols
   //  whose assumptions have changed.  For now we just iterate through the
   //  bindings and check if any of the tracked symbols are NULL.  This isn't
@@ -3700,7 +3700,7 @@ ProgramStateRef RetainCountChecker::evalAssume(ProgramStateRef state,
   return state;
 }
 
-ProgramStateRef 
+ProgramStateRef
 RetainCountChecker::checkRegionChanges(ProgramStateRef state,
                                     const InvalidatedSymbols *invalidated,
                                     ArrayRef<const MemRegion *> ExplicitRegions,
@@ -3810,7 +3810,7 @@ RetainCountChecker::handleAutoreleaseCounts(ProgramStateRef state,
   return nullptr;
 }
 
-ProgramStateRef 
+ProgramStateRef
 RetainCountChecker::handleSymbolDeath(ProgramStateRef state,
                                       SymbolRef sid, RefVal V,
                                     SmallVectorImpl<SymbolRef> &Leaked) const {
@@ -3890,7 +3890,7 @@ void RetainCountChecker::checkEndFunction(CheckerContext &Ctx) const {
   // and suggest annotations.
   if (LCtx->getParent())
     return;
-  
+
   B = state->get<RefBindings>();
   SmallVector<SymbolRef, 10> Leaked;
 
@@ -3910,7 +3910,7 @@ RetainCountChecker::getDeadSymbolTag(SymbolRef sym) const {
     sym->dumpToStream(out);
     tag = new CheckerProgramPointTag(this, out.str());
   }
-  return tag;  
+  return tag;
 }
 
 void RetainCountChecker::checkDeadSymbols(SymbolReaper &SymReaper,
@@ -3993,7 +3993,9 @@ void ento::registerRetainCountChecker(CheckerManager &Mgr) {
 // Implementation of the CallEffects API.
 //===----------------------------------------------------------------------===//
 
-namespace clang { namespace ento { namespace objc_retain {
+namespace clang {
+namespace ento {
+namespace objc_retain {
 
 // This is a bit gross, but it allows us to populate CallEffects without
 // creating a bunch of accessors.  This kind is very localized, so the
@@ -4022,4 +4024,6 @@ CallEffects CallEffects::getEffect(const FunctionDecl *FD) {
 
 #undef createCallEffect
 
-}}}
+} // end namespace objc_retain
+} // end namespace ento
+} // end namespace clang
diff --git a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
index acbd0d95d07b..19fa0fb193cc 100644
--- a/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ReturnPointerRangeChecker.cpp
@@ -23,7 +23,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class ReturnPointerRangeChecker : 
+class ReturnPointerRangeChecker :
     public Checker< check::PreStmt<ReturnStmt> > {
   mutable std::unique_ptr<BuiltinBug> BT;
 
@@ -39,7 +39,7 @@ void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS,
   const Expr *RetE = RS->getRetValue();
   if (!RetE)
     return;
- 
+
   SVal V = state->getSVal(RetE, C.getLocationContext());
   const MemRegion *R = V.getAsRegion();
 
@@ -62,11 +62,11 @@ void ReturnPointerRangeChecker::checkPreStmt(const ReturnStmt *RS,
   ProgramStateRef StInBound = state->assumeInBound(Idx, NumElements, true);
   ProgramStateRef StOutBound = state->assumeInBound(Idx, NumElements, false);
   if (StOutBound && !StInBound) {
-    ExplodedNode *N = C.generateSink(StOutBound);
+    ExplodedNode *N = C.generateErrorNode(StOutBound);
 
     if (!N)
       return;
-  
+
     // FIXME: This bug correspond to CWE-466.  Eventually we should have bug
     // types explicitly reference such exploit categories (when applicable).
     if (!BT)
diff --git a/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp b/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
index 2668ac1e1eca..c5e826a84b84 100644
--- a/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/ReturnUndefChecker.cpp
@@ -80,7 +80,7 @@ void ReturnUndefChecker::checkPreStmt(const ReturnStmt *RS,
 
 static void emitBug(CheckerContext &C, BuiltinBug &BT, const Expr *RetE,
                     const Expr *TrackingE = nullptr) {
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
 
diff --git a/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp b/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
index c22e78b7eb62..7026a2ec16a1 100644
--- a/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/SimpleStreamChecker.cpp
@@ -92,7 +92,7 @@ public:
 REGISTER_MAP_WITH_PROGRAMSTATE(StreamMap, SymbolRef, StreamState)
 
 namespace {
-class StopTrackingCallback : public SymbolVisitor {
+class StopTrackingCallback final : public SymbolVisitor {
   ProgramStateRef state;
 public:
   StopTrackingCallback(ProgramStateRef st) : state(st) {}
@@ -200,7 +200,9 @@ void SimpleStreamChecker::checkDeadSymbols(SymbolReaper &SymReaper,
       State = State->remove<StreamMap>(Sym);
   }
 
-  ExplodedNode *N = C.addTransition(State);
+  ExplodedNode *N = C.generateNonFatalErrorNode(State);
+  if (!N)
+    return;
   reportLeaks(LeakedStreams, C, N);
 }
 
@@ -208,7 +210,7 @@ void SimpleStreamChecker::reportDoubleClose(SymbolRef FileDescSym,
                                             const CallEvent &Call,
                                             CheckerContext &C) const {
   // We reached a bug, stop exploring the path here by generating a sink.
-  ExplodedNode *ErrNode = C.generateSink();
+  ExplodedNode *ErrNode = C.generateErrorNode();
   // If we've already reached this node on another path, return.
   if (!ErrNode)
     return;
diff --git a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
index 813c811ef15f..79fc701d6d58 100644
--- a/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StackAddrEscapeChecker.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines stack address leak checker, which checks if an invalid 
+// This file defines stack address leak checker, which checks if an invalid
 // stack address is stored into a global or heap location. See CERT DCL30-C.
 //
 //===----------------------------------------------------------------------===//
@@ -49,20 +49,20 @@ SourceRange StackAddrEscapeChecker::genName(raw_ostream &os, const MemRegion *R,
   SourceManager &SM = Ctx.getSourceManager();
   SourceRange range;
   os << "Address of ";
-  
+
   // Check if the region is a compound literal.
-  if (const CompoundLiteralRegion* CR = dyn_cast<CompoundLiteralRegion>(R)) { 
+  if (const CompoundLiteralRegion* CR = dyn_cast<CompoundLiteralRegion>(R)) {
     const CompoundLiteralExpr *CL = CR->getLiteralExpr();
     os << "stack memory associated with a compound literal "
           "declared on line "
         << SM.getExpansionLineNumber(CL->getLocStart())
-        << " returned to caller";    
+        << " returned to caller";
     range = CL->getSourceRange();
   }
   else if (const AllocaRegion* AR = dyn_cast<AllocaRegion>(R)) {
     const Expr *ARE = AR->getExpr();
     SourceLocation L = ARE->getLocStart();
-    range = ARE->getSourceRange();    
+    range = ARE->getSourceRange();
     os << "stack memory allocated by call to alloca() on line "
        << SM.getExpansionLineNumber(L);
   }
@@ -87,14 +87,14 @@ SourceRange StackAddrEscapeChecker::genName(raw_ostream &os, const MemRegion *R,
   }
   else {
     llvm_unreachable("Invalid region in ReturnStackAddressChecker.");
-  } 
-  
+  }
+
   return range;
 }
 
 void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, const MemRegion *R,
                                           const Expr *RetE) const {
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
 
   if (!N)
     return;
@@ -118,7 +118,7 @@ void StackAddrEscapeChecker::EmitStackError(CheckerContext &C, const MemRegion *
 
 void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
                                           CheckerContext &C) const {
-  
+
   const Expr *RetE = RS->getRetValue();
   if (!RetE)
     return;
@@ -130,10 +130,10 @@ void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
 
   if (!R)
     return;
-  
+
   const StackSpaceRegion *SS =
     dyn_cast_or_null<StackSpaceRegion>(R->getMemorySpace());
-    
+
   if (!SS)
     return;
 
@@ -156,6 +156,15 @@ void StackAddrEscapeChecker::checkPreStmt(const ReturnStmt *RS,
   if (isa<CXXConstructExpr>(RetE) && RetE->getType()->isRecordType())
     return;
 
+  // The CK_CopyAndAutoreleaseBlockObject cast causes the block to be copied
+  // so the stack address is not escaping here.
+  if (auto *ICE = dyn_cast<ImplicitCastExpr>(RetE)) {
+    if (isa<BlockDataRegion>(R) &&
+        ICE->getCastKind() == CK_CopyAndAutoreleaseBlockObject) {
+      return;
+    }
+  }
+
   EmitStackError(C, R, RetE);
 }
 
@@ -175,35 +184,35 @@ void StackAddrEscapeChecker::checkEndFunction(CheckerContext &Ctx) const {
       Ctx(CC),
       CurSFC(CC.getLocationContext()->getCurrentStackFrame())
     {}
-    
+
     bool HandleBinding(StoreManager &SMgr, Store store,
                        const MemRegion *region, SVal val) override {
 
       if (!isa<GlobalsSpaceRegion>(region->getMemorySpace()))
         return true;
-      
+
       const MemRegion *vR = val.getAsRegion();
       if (!vR)
         return true;
-        
+
       // Under automated retain release, it is okay to assign a block
       // directly to a global variable.
       if (Ctx.getASTContext().getLangOpts().ObjCAutoRefCount &&
           isa<BlockDataRegion>(vR))
         return true;
 
-      if (const StackSpaceRegion *SSR = 
+      if (const StackSpaceRegion *SSR =
           dyn_cast<StackSpaceRegion>(vR->getMemorySpace())) {
         // If the global variable holds a location in the current stack frame,
         // record the binding to emit a warning.
         if (SSR->getStackFrame() == CurSFC)
           V.push_back(std::make_pair(region, vR));
       }
-      
+
       return true;
     }
   };
-    
+
   CallBack cb(Ctx);
   state->getStateManager().getStoreManager().iterBindings(state->getStore(),cb);
 
@@ -211,7 +220,7 @@ void StackAddrEscapeChecker::checkEndFunction(CheckerContext &Ctx) const {
     return;
 
   // Generate an error node.
-  ExplodedNode *N = Ctx.addTransition(state);
+  ExplodedNode *N = Ctx.generateNonFatalErrorNode(state);
   if (!N)
     return;
 
diff --git a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
index 2109a75b1fb6..82b01fe814da 100644
--- a/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/StreamChecker.cpp
@@ -43,8 +43,8 @@ struct StreamState {
 
   static StreamState getOpened(const Stmt *s) { return StreamState(Opened, s); }
   static StreamState getClosed(const Stmt *s) { return StreamState(Closed, s); }
-  static StreamState getOpenFailed(const Stmt *s) { 
-    return StreamState(OpenFailed, s); 
+  static StreamState getOpenFailed(const Stmt *s) {
+    return StreamState(OpenFailed, s);
   }
   static StreamState getEscaped(const Stmt *s) {
     return StreamState(Escaped, s);
@@ -59,14 +59,14 @@ struct StreamState {
 class StreamChecker : public Checker<eval::Call,
                                      check::DeadSymbols > {
   mutable IdentifierInfo *II_fopen, *II_tmpfile, *II_fclose, *II_fread,
-                 *II_fwrite, 
-                 *II_fseek, *II_ftell, *II_rewind, *II_fgetpos, *II_fsetpos,  
+                 *II_fwrite,
+                 *II_fseek, *II_ftell, *II_rewind, *II_fgetpos, *II_fsetpos,
                  *II_clearerr, *II_feof, *II_ferror, *II_fileno;
   mutable std::unique_ptr<BuiltinBug> BT_nullfp, BT_illegalwhence,
       BT_doubleclose, BT_ResourceLeak;
 
 public:
-  StreamChecker() 
+  StreamChecker()
     : II_fopen(nullptr), II_tmpfile(nullptr), II_fclose(nullptr),
       II_fread(nullptr), II_fwrite(nullptr), II_fseek(nullptr),
       II_ftell(nullptr), II_rewind(nullptr), II_fgetpos(nullptr),
@@ -93,10 +93,10 @@ private:
   void Fileno(CheckerContext &C, const CallExpr *CE) const;
 
   void OpenFileAux(CheckerContext &C, const CallExpr *CE) const;
-  
-  ProgramStateRef CheckNullStream(SVal SV, ProgramStateRef state, 
+
+  ProgramStateRef CheckNullStream(SVal SV, ProgramStateRef state,
                                  CheckerContext &C) const;
-  ProgramStateRef CheckDoubleClose(const CallExpr *CE, ProgramStateRef state, 
+  ProgramStateRef CheckDoubleClose(const CallExpr *CE, ProgramStateRef state,
                                  CheckerContext &C) const;
 };
 
@@ -216,13 +216,13 @@ void StreamChecker::OpenFileAux(CheckerContext &C, const CallExpr *CE) const {
                                                     C.blockCount())
       .castAs<DefinedSVal>();
   state = state->BindExpr(CE, C.getLocationContext(), RetVal);
-  
+
   ConstraintManager &CM = C.getConstraintManager();
   // Bifurcate the state into two: one with a valid FILE* pointer, the other
   // with a NULL.
   ProgramStateRef stateNotNull, stateNull;
   std::tie(stateNotNull, stateNull) = CM.assumeDual(state, RetVal);
-  
+
   if (SymbolRef Sym = RetVal.getAsSymbol()) {
     // if RetVal is not NULL, set the symbol's state to Opened.
     stateNotNull =
@@ -271,7 +271,7 @@ void StreamChecker::Fseek(CheckerContext &C, const CallExpr *CE) const {
   if (x >= 0 && x <= 2)
     return;
 
-  if (ExplodedNode *N = C.addTransition(state)) {
+  if (ExplodedNode *N = C.generateNonFatalErrorNode(state)) {
     if (!BT_illegalwhence)
       BT_illegalwhence.reset(
           new BuiltinBug(this, "Illegal whence argument",
@@ -349,7 +349,7 @@ ProgramStateRef StreamChecker::CheckNullStream(SVal SV, ProgramStateRef state,
   std::tie(stateNotNull, stateNull) = CM.assumeDual(state, *DV);
 
   if (!stateNotNull && stateNull) {
-    if (ExplodedNode *N = C.generateSink(stateNull)) {
+    if (ExplodedNode *N = C.generateErrorNode(stateNull)) {
       if (!BT_nullfp)
         BT_nullfp.reset(new BuiltinBug(this, "NULL stream pointer",
                                        "Stream pointer might be NULL."));
@@ -368,17 +368,17 @@ ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE,
     state->getSVal(CE->getArg(0), C.getLocationContext()).getAsSymbol();
   if (!Sym)
     return state;
-  
+
   const StreamState *SS = state->get<StreamMap>(Sym);
 
   // If the file stream is not tracked, return.
   if (!SS)
     return state;
-  
+
   // Check: Double close a File Descriptor could cause undefined behaviour.
   // Conforming to man-pages
   if (SS->isClosed()) {
-    ExplodedNode *N = C.generateSink();
+    ExplodedNode *N = C.generateErrorNode();
     if (N) {
       if (!BT_doubleclose)
         BT_doubleclose.reset(new BuiltinBug(
@@ -389,7 +389,7 @@ ProgramStateRef StreamChecker::CheckDoubleClose(const CallExpr *CE,
     }
     return nullptr;
   }
-  
+
   // Close the File Descriptor.
   return state->set<StreamMap>(Sym, StreamState::getClosed(CE));
 }
@@ -406,7 +406,7 @@ void StreamChecker::checkDeadSymbols(SymbolReaper &SymReaper,
       continue;
 
     if (SS->isOpened()) {
-      ExplodedNode *N = C.generateSink();
+      ExplodedNode *N = C.generateErrorNode();
       if (N) {
         if (!BT_ResourceLeak)
           BT_ResourceLeak.reset(new BuiltinBug(
diff --git a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
index 6e2477579f55..2e0529015ca6 100644
--- a/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/TaintTesterChecker.cpp
@@ -48,7 +48,7 @@ void TaintTesterChecker::checkPostStmt(const Expr *E,
     return;
 
   if (State->isTainted(E, C.getLocationContext())) {
-    if (ExplodedNode *N = C.addTransition()) {
+    if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
       initBugType();
       auto report = llvm::make_unique<BugReport>(*BT, "tainted",N);
       report->addRange(E->getSourceRange());
diff --git a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
index 638701da8a01..b794d2f86bbe 100644
--- a/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/TestAfterDivZeroChecker.cpp
@@ -167,7 +167,7 @@ bool TestAfterDivZeroChecker::hasDivZeroMap(SVal Var,
 }
 
 void TestAfterDivZeroChecker::reportBug(SVal Val, CheckerContext &C) const {
-  if (ExplodedNode *N = C.generateSink(C.getState())) {
+  if (ExplodedNode *N = C.generateErrorNode(C.getState())) {
     if (!DivZeroBug)
       DivZeroBug.reset(new BuiltinBug(this, "Division by zero"));
 
diff --git a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
index 1d8ef9947175..ed17610e4116 100644
--- a/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefBranchChecker.cpp
@@ -30,7 +30,7 @@ class UndefBranchChecker : public Checker<check::BranchCondition> {
     ProgramStateRef St;
     const LocationContext *LCtx;
 
-    FindUndefExpr(ProgramStateRef S, const LocationContext *L) 
+    FindUndefExpr(ProgramStateRef S, const LocationContext *L)
       : St(S), LCtx(L) {}
 
     const Expr *FindExpr(const Expr *Ex) {
@@ -45,7 +45,7 @@ class UndefBranchChecker : public Checker<check::BranchCondition> {
       return Ex;
     }
 
-    bool MatchesCriteria(const Expr *Ex) { 
+    bool MatchesCriteria(const Expr *Ex) {
       return St->getSVal(Ex, LCtx).isUndef();
     }
   };
@@ -62,7 +62,7 @@ void UndefBranchChecker::checkBranchCondition(const Stmt *Condition,
   if (X.isUndef()) {
     // Generate a sink node, which implicitly marks both outgoing branches as
     // infeasible.
-    ExplodedNode *N = Ctx.generateSink();
+    ExplodedNode *N = Ctx.generateErrorNode();
     if (N) {
       if (!BT)
         BT.reset(new BuiltinBug(
diff --git a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
index 53fd069bf150..17fe8610da06 100644
--- a/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp
@@ -74,7 +74,7 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE,
     // Get the VarRegion associated with VD in the local stack frame.
     if (Optional<UndefinedVal> V =
           state->getSVal(I.getOriginalRegion()).getAs<UndefinedVal>()) {
-      if (ExplodedNode *N = C.generateSink()) {
+      if (ExplodedNode *N = C.generateErrorNode()) {
         if (!BT)
           BT.reset(
               new BuiltinBug(this, "uninitialized variable captured by block"));
@@ -83,7 +83,7 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE,
         SmallString<128> buf;
         llvm::raw_svector_ostream os(buf);
 
-        os << "Variable '" << VD->getName() 
+        os << "Variable '" << VD->getName()
            << "' is uninitialized when captured by block";
 
         auto R = llvm::make_unique<BugReport>(*BT, os.str(), N);
diff --git a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
index 5353310e6d5a..38d2aa6d8f9d 100644
--- a/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefResultChecker.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This defines UndefResultChecker, a builtin check in ExprEngine that 
+// This defines UndefResultChecker, a builtin check in ExprEngine that
 // performs checks for undefined results of non-assignment binary operators.
 //
 //===----------------------------------------------------------------------===//
@@ -25,7 +25,7 @@ using namespace clang;
 using namespace ento;
 
 namespace {
-class UndefResultChecker 
+class UndefResultChecker
   : public Checker< check::PostStmt<BinaryOperator> > {
 
   mutable std::unique_ptr<BugType> BT;
@@ -50,10 +50,10 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
         return;
 
     // Generate an error node.
-    ExplodedNode *N = C.generateSink();
+    ExplodedNode *N = C.generateErrorNode();
     if (!N)
       return;
-    
+
     if (!BT)
       BT.reset(
           new BuiltinBug(this, "Result of operation is garbage or undefined"));
@@ -62,7 +62,7 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
     llvm::raw_svector_ostream OS(sbuf);
     const Expr *Ex = nullptr;
     bool isLeft = true;
-    
+
     if (state->getSVal(B->getLHS(), LCtx).isUndef()) {
       Ex = B->getLHS()->IgnoreParenCasts();
       isLeft = true;
@@ -71,13 +71,13 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
       Ex = B->getRHS()->IgnoreParenCasts();
       isLeft = false;
     }
-    
+
     if (Ex) {
       OS << "The " << (isLeft ? "left" : "right")
          << " operand of '"
          << BinaryOperator::getOpcodeStr(B->getOpcode())
          << "' is a garbage value";
-    }          
+    }
     else {
       // Neither operand was undefined, but the result is undefined.
       OS << "The result of the '"
@@ -91,7 +91,7 @@ void UndefResultChecker::checkPostStmt(const BinaryOperator *B,
     }
     else
       bugreporter::trackNullOrUndefValue(N, B, *report);
-    
+
     C.emitReport(std::move(report));
   }
 }
diff --git a/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
index ba4daf835148..fe07eafd281f 100644
--- a/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefinedArraySubscriptChecker.cpp
@@ -32,7 +32,7 @@ public:
 };
 } // end anonymous namespace
 
-void 
+void
 UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A,
                                              CheckerContext &C) const {
   const Expr *Index = A->getIdx();
@@ -46,7 +46,7 @@ UndefinedArraySubscriptChecker::checkPreStmt(const ArraySubscriptExpr *A,
     if (Ctor->isDefaulted())
       return;
 
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
   if (!N)
     return;
   if (!BT)
diff --git a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
index 81c96c4860bc..7a31efc8cef8 100644
--- a/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UndefinedAssignmentChecker.cpp
@@ -46,7 +46,7 @@ void UndefinedAssignmentChecker::checkBind(SVal location, SVal val,
     if (C.getCalleeName(EnclosingFunctionDecl) == "swap")
       return;
 
-  ExplodedNode *N = C.generateSink();
+  ExplodedNode *N = C.generateErrorNode();
 
   if (!N)
     return;
diff --git a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
index a799b4c21982..4b78c2058341 100644
--- a/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp
@@ -77,7 +77,7 @@ void UnixAPIChecker::ReportOpenBug(CheckerContext &C,
                                    ProgramStateRef State,
                                    const char *Msg,
                                    SourceRange SR) const {
-  ExplodedNode *N = C.generateSink(State);
+  ExplodedNode *N = C.generateErrorNode(State);
   if (!N)
     return;
 
@@ -114,7 +114,7 @@ void UnixAPIChecker::CheckOpen(CheckerContext &C, const CallExpr *CE) const {
   // The definition of O_CREAT is platform specific.  We need a better way
   // of querying this information from the checking environment.
   if (!Val_O_CREAT.hasValue()) {
-    if (C.getASTContext().getTargetInfo().getTriple().getVendor() 
+    if (C.getASTContext().getTargetInfo().getTriple().getVendor()
                                                       == llvm::Triple::Apple)
       Val_O_CREAT = 0x0200;
     else {
@@ -182,7 +182,7 @@ void UnixAPIChecker::CheckPthreadOnce(CheckerContext &C,
   if (!R || !isa<StackSpaceRegion>(R->getMemorySpace()))
     return;
 
-  ExplodedNode *N = C.generateSink(state);
+  ExplodedNode *N = C.generateErrorNode(state);
   if (!N)
     return;
 
@@ -220,7 +220,7 @@ static bool IsZeroByteAllocation(ProgramStateRef state,
                                 ProgramStateRef *falseState) {
   std::tie(*trueState, *falseState) =
     state->assume(argVal.castAs<DefinedSVal>());
-  
+
   return (*falseState && !*trueState);
 }
 
@@ -231,7 +231,7 @@ bool UnixAPIChecker::ReportZeroByteAllocation(CheckerContext &C,
                                               ProgramStateRef falseState,
                                               const Expr *arg,
                                               const char *fn_name) const {
-  ExplodedNode *N = C.generateSink(falseState);
+  ExplodedNode *N = C.generateErrorNode(falseState);
   if (!N)
     return false;
 
@@ -239,7 +239,7 @@ bool UnixAPIChecker::ReportZeroByteAllocation(CheckerContext &C,
                  "Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)");
 
   SmallString<256> S;
-  llvm::raw_svector_ostream os(S);    
+  llvm::raw_svector_ostream os(S);
   os << "Call to '" << fn_name << "' has an allocation size of 0 bytes";
   auto report = llvm::make_unique<BugReport>(*BT_mallocZero, os.str(), N);
 
@@ -272,13 +272,13 @@ void UnixAPIChecker::BasicAllocationCheck(CheckerContext &C,
 
   // Is the value perfectly constrained to zero?
   if (IsZeroByteAllocation(state, argVal, &trueState, &falseState)) {
-    (void) ReportZeroByteAllocation(C, falseState, arg, fn); 
+    (void) ReportZeroByteAllocation(C, falseState, arg, fn);
     return;
   }
   // Assume the value is non-zero going forward.
   assert(trueState);
   if (trueState != state)
-    C.addTransition(trueState);                           
+    C.addTransition(trueState);
 }
 
 void UnixAPIChecker::CheckCallocZero(CheckerContext &C,
diff --git a/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp b/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
index d78de3c6f3a8..a03abce9626b 100644
--- a/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/UnreachableCodeChecker.cpp
@@ -54,7 +54,7 @@ void UnreachableCodeChecker::checkEndAnalysis(ExplodedGraph &G,
                                               BugReporter &B,
                                               ExprEngine &Eng) const {
   CFGBlocksSet reachable, visited;
-  
+
   if (Eng.hasWorkRemaining())
     return;
 
@@ -88,7 +88,7 @@ void UnreachableCodeChecker::checkEndAnalysis(ExplodedGraph &G,
   // Bail out if we didn't get the CFG or the ParentMap.
   if (!D || !C || !PM)
     return;
-  
+
   // Don't do anything for template instantiations.  Proving that code
   // in a template instantiation is unreachable means proving that it is
   // unreachable in all instantiations.
@@ -235,12 +235,9 @@ bool UnreachableCodeChecker::isInvalidPath(const CFGBlock *CB,
     return false;
 
   // Run each of the checks on the conditions
-  if (containsMacro(cond) || containsEnum(cond)
-      || containsStaticLocal(cond) || containsBuiltinOffsetOf(cond)
-      || containsStmt<UnaryExprOrTypeTraitExpr>(cond))
-    return true;
-
-  return false;
+  return containsMacro(cond) || containsEnum(cond) ||
+         containsStaticLocal(cond) || containsBuiltinOffsetOf(cond) ||
+         containsStmt<UnaryExprOrTypeTraitExpr>(cond);
 }
 
 // Returns true if the given CFGBlock is empty
diff --git a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
index 80384bbfdb30..e3b2ed222363 100644
--- a/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This defines VLASizeChecker, a builtin check in ExprEngine that 
+// This defines VLASizeChecker, a builtin check in ExprEngine that
 // performs checks for declaration of VLA of undefined or zero size.
 // In addition, VLASizeChecker is responsible for defining the extent
 // of the MemRegion that represents a VLA.
@@ -46,7 +46,7 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind,
                                ProgramStateRef State,
                                CheckerContext &C) const {
   // Generate an error node.
-  ExplodedNode *N = C.generateSink(State);
+  ExplodedNode *N = C.generateErrorNode(State);
   if (!N)
     return;
 
@@ -82,7 +82,7 @@ void VLASizeChecker::reportBug(VLASize_Kind Kind,
 void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
   if (!DS->isSingleDecl())
     return;
-  
+
   const VarDecl *VD = dyn_cast<VarDecl>(DS->getSingleDecl());
   if (!VD)
     return;
@@ -106,7 +106,7 @@ void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
   // warned about that already.
   if (sizeV.isUnknown())
     return;
-  
+
   // Check if the size is tainted.
   if (state->isTainted(sizeV)) {
     reportBug(VLA_Tainted, SE, nullptr, C);
@@ -123,7 +123,7 @@ void VLASizeChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
     reportBug(VLA_Zero, SE, stateZero, C);
     return;
   }
- 
+
   // From this point on, assume that the size is not zero.
   state = stateNotZero;
 
diff --git a/lib/StaticAnalyzer/Checkers/VforkChecker.cpp b/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
new file mode 100644
index 000000000000..26ffee827cff
--- /dev/null
+++ b/lib/StaticAnalyzer/Checkers/VforkChecker.cpp
@@ -0,0 +1,218 @@
+//===- VforkChecker.cpp -------- Vfork usage checks --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines vfork checker which checks for dangerous uses of vfork.
+//  Vforked process shares memory (including stack) with parent so it's
+//  range of actions is significantly limited: can't write variables,
+//  can't call functions not in whitelist, etc. For more details, see
+//  http://man7.org/linux/man-pages/man2/vfork.2.html
+//
+//  This checker checks for prohibited constructs in vforked process.
+//  The state transition diagram:
+//  PARENT ---(vfork() == 0)--> CHILD
+//                                   |
+//                                   --(*p = ...)--> bug
+//                                   |
+//                                   --foo()--> bug
+//                                   |
+//                                   --return--> bug
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckers.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h"
+#include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/AST/ParentMap.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+
+class VforkChecker : public Checker<check::PreCall, check::PostCall,
+                                    check::Bind, check::PreStmt<ReturnStmt>> {
+  mutable std::unique_ptr<BuiltinBug> BT;
+  mutable llvm::SmallSet<const IdentifierInfo *, 10> VforkWhitelist;
+  mutable const IdentifierInfo *II_vfork;
+
+  static bool isChildProcess(const ProgramStateRef State);
+
+  bool isVforkCall(const Decl *D, CheckerContext &C) const;
+  bool isCallWhitelisted(const IdentifierInfo *II, CheckerContext &C) const;
+
+  void reportBug(const char *What, CheckerContext &C,
+                 const char *Details = 0) const;
+
+public:
+  VforkChecker() : II_vfork(0) {}
+
+  void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
+  void checkBind(SVal L, SVal V, const Stmt *S, CheckerContext &C) const;
+  void checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const;
+};
+
+} // end anonymous namespace
+
+// This trait holds region of variable that is assigned with vfork's
+// return value (this is the only region child is allowed to write).
+// VFORK_RESULT_INVALID means that we are in parent process.
+// VFORK_RESULT_NONE means that vfork's return value hasn't been assigned.
+// Other values point to valid regions.
+REGISTER_TRAIT_WITH_PROGRAMSTATE(VforkResultRegion, const void *)
+#define VFORK_RESULT_INVALID 0
+#define VFORK_RESULT_NONE ((void *)(uintptr_t)1)
+
+bool VforkChecker::isChildProcess(const ProgramStateRef State) {
+  return State->get<VforkResultRegion>() != VFORK_RESULT_INVALID;
+}
+
+bool VforkChecker::isVforkCall(const Decl *D, CheckerContext &C) const {
+  auto FD = dyn_cast_or_null<FunctionDecl>(D);
+  if (!FD || !C.isCLibraryFunction(FD))
+    return false;
+
+  if (!II_vfork) {
+    ASTContext &AC = C.getASTContext();
+    II_vfork = &AC.Idents.get("vfork");
+  }
+
+  return FD->getIdentifier() == II_vfork;
+}
+
+// Returns true iff ok to call function after successful vfork.
+bool VforkChecker::isCallWhitelisted(const IdentifierInfo *II,
+                                 CheckerContext &C) const {
+  if (VforkWhitelist.empty()) {
+    // According to manpage.
+    const char *ids[] = {
+      "_exit",
+      "_Exit",
+      "execl",
+      "execlp",
+      "execle",
+      "execv",
+      "execvp",
+      "execvpe",
+      0,
+    };
+
+    ASTContext &AC = C.getASTContext();
+    for (const char **id = ids; *id; ++id)
+      VforkWhitelist.insert(&AC.Idents.get(*id));
+  }
+
+  return VforkWhitelist.count(II);
+}
+
+void VforkChecker::reportBug(const char *What, CheckerContext &C,
+                             const char *Details) const {
+  if (ExplodedNode *N = C.generateErrorNode(C.getState())) {
+    if (!BT)
+      BT.reset(new BuiltinBug(this,
+                              "Dangerous construct in a vforked process"));
+
+    SmallString<256> buf;
+    llvm::raw_svector_ostream os(buf);
+
+    os << What << " is prohibited after a successful vfork";
+
+    if (Details)
+      os << "; " << Details;
+
+    auto Report = llvm::make_unique<BugReport>(*BT, os.str(), N);
+    // TODO: mark vfork call in BugReportVisitor
+    C.emitReport(std::move(Report));
+  }
+}
+
+// Detect calls to vfork and split execution appropriately.
+void VforkChecker::checkPostCall(const CallEvent &Call,
+                                 CheckerContext &C) const {
+  // We can't call vfork in child so don't bother
+  // (corresponding warning has already been emitted in checkPreCall).
+  ProgramStateRef State = C.getState();
+  if (isChildProcess(State))
+    return;
+
+  if (!isVforkCall(Call.getDecl(), C))
+    return;
+
+  // Get return value of vfork.
+  SVal VforkRetVal = Call.getReturnValue();
+  Optional<DefinedOrUnknownSVal> DVal =
+    VforkRetVal.getAs<DefinedOrUnknownSVal>();
+  if (!DVal)
+    return;
+
+  // Get assigned variable.
+  const ParentMap &PM = C.getLocationContext()->getParentMap();
+  const Stmt *P = PM.getParentIgnoreParenCasts(Call.getOriginExpr());
+  const VarDecl *LhsDecl;
+  std::tie(LhsDecl, std::ignore) = parseAssignment(P);
+
+  // Get assigned memory region.
+  MemRegionManager &M = C.getStoreManager().getRegionManager();
+  const MemRegion *LhsDeclReg =
+    LhsDecl
+      ? M.getVarRegion(LhsDecl, C.getLocationContext())
+      : (const MemRegion *)VFORK_RESULT_NONE;
+
+  // Parent branch gets nonzero return value (according to manpage).
+  ProgramStateRef ParentState, ChildState;
+  std::tie(ParentState, ChildState) = C.getState()->assume(*DVal);
+  C.addTransition(ParentState);
+  ChildState = ChildState->set<VforkResultRegion>(LhsDeclReg);
+  C.addTransition(ChildState);
+}
+
+// Prohibit calls to non-whitelist functions in child process.
+void VforkChecker::checkPreCall(const CallEvent &Call,
+                                CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  if (isChildProcess(State)
+      && !isCallWhitelisted(Call.getCalleeIdentifier(), C))
+    reportBug("This function call", C);
+}
+
+// Prohibit writes in child process (except for vfork's lhs).
+void VforkChecker::checkBind(SVal L, SVal V, const Stmt *S,
+                             CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  if (!isChildProcess(State))
+    return;
+
+  const MemRegion *VforkLhs =
+    static_cast<const MemRegion *>(State->get<VforkResultRegion>());
+  const MemRegion *MR = L.getAsRegion();
+
+  // Child is allowed to modify only vfork's lhs.
+  if (!MR || MR == VforkLhs)
+    return;
+
+  reportBug("This assignment", C);
+}
+
+// Prohibit return from function in child process.
+void VforkChecker::checkPreStmt(const ReturnStmt *RS, CheckerContext &C) const {
+  ProgramStateRef State = C.getState();
+  if (isChildProcess(State))
+    reportBug("Return", C, "call _exit() instead");
+}
+
+void ento::registerVforkChecker(CheckerManager &mgr) {
+  mgr.registerChecker<VforkChecker>();
+}
diff --git a/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp b/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
index f6ef4aef5c78..550250302611 100644
--- a/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/VirtualCallChecker.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-//  This file defines a checker that checks virtual function calls during 
+//  This file defines a checker that checks virtual function calls during
 //  construction or destruction of C++ objects.
 //
 //===----------------------------------------------------------------------===//
@@ -37,13 +37,13 @@ class WalkAST : public StmtVisitor<WalkAST> {
 
   /// A vector representing the worklist which has a chain of CallExprs.
   DFSWorkList WList;
-  
+
   // PreVisited : A CallExpr to this FunctionDecl is in the worklist, but the
   // body has not been visited yet.
   // PostVisited : A CallExpr to this FunctionDecl is in the worklist, and the
   // body has been visited.
   enum Kind { NotVisited,
-              PreVisited,  /**< A CallExpr to this FunctionDecl is in the 
+              PreVisited,  /**< A CallExpr to this FunctionDecl is in the
                                 worklist, but the body has not yet been
                                 visited. */
               PostVisited  /**< A CallExpr to this FunctionDecl is in the
@@ -57,7 +57,7 @@ class WalkAST : public StmtVisitor<WalkAST> {
   /// generating bug reports.  This is null while visiting the body of a
   /// constructor or destructor.
   const CallExpr *visitingCallExpr;
-  
+
 public:
   WalkAST(const CheckerBase *checker, BugReporter &br,
           AnalysisDeclContext *ac)
@@ -70,7 +70,7 @@ public:
   void Enqueue(WorkListUnit WLUnit) {
     const FunctionDecl *FD = WLUnit->getDirectCallee();
     if (!FD || !FD->getBody())
-      return;    
+      return;
     Kind &K = VisitedFunctions[FD];
     if (K != NotVisited)
       return;
@@ -81,9 +81,9 @@ public:
   /// This method returns an item from the worklist without removing it.
   WorkListUnit Dequeue() {
     assert(!WList.empty());
-    return WList.back();    
+    return WList.back();
   }
-  
+
   void Execute() {
     while (hasWork()) {
       WorkListUnit WLUnit = Dequeue();
@@ -95,7 +95,7 @@ public:
         // Visit the body.
         SaveAndRestore<const CallExpr *> SaveCall(visitingCallExpr, WLUnit);
         Visit(FD->getBody());
-        
+
         // Mark the function as being PostVisited to indicate we have
         // scanned the body.
         VisitedFunctions[FD] = PostVisited;
@@ -114,7 +114,7 @@ public:
   void VisitCXXMemberCallExpr(CallExpr *CE);
   void VisitStmt(Stmt *S) { VisitChildren(S); }
   void VisitChildren(Stmt *S);
-  
+
   void ReportVirtualCall(const CallExpr *CE, bool isPure);
 
 };
@@ -138,7 +138,7 @@ void WalkAST::VisitCallExpr(CallExpr *CE) {
 void WalkAST::VisitCXXMemberCallExpr(CallExpr *CE) {
   VisitChildren(CE);
   bool callIsNonVirtual = false;
-  
+
   // Several situations to elide for checking.
   if (MemberExpr *CME = dyn_cast<MemberExpr>(CE->getCallee())) {
     // If the member access is fully qualified (i.e., X::F), then treat
@@ -170,7 +170,7 @@ void WalkAST::VisitCXXMemberCallExpr(CallExpr *CE) {
 void WalkAST::ReportVirtualCall(const CallExpr *CE, bool isPure) {
   SmallString<100> buf;
   llvm::raw_svector_ostream os(buf);
-  
+
   os << "Call Path : ";
   // Name of current visiting CallExpr.
   os << *CE->getDirectCallee();
@@ -190,7 +190,7 @@ void WalkAST::ReportVirtualCall(const CallExpr *CE, bool isPure) {
   PathDiagnosticLocation CELoc =
     PathDiagnosticLocation::createBegin(CE, BR.getSourceManager(), AC);
   SourceRange R = CE->getCallee()->getSourceRange();
-  
+
   if (isPure) {
     os << "\n" <<  "Call pure virtual functions during construction or "
        << "destruction may leads undefined behaviour";
diff --git a/lib/StaticAnalyzer/Core/AnalysisManager.cpp b/lib/StaticAnalyzer/Core/AnalysisManager.cpp
index 5798f01370de..54634fdffeb5 100644
--- a/lib/StaticAnalyzer/Core/AnalysisManager.cpp
+++ b/lib/StaticAnalyzer/Core/AnalysisManager.cpp
@@ -18,7 +18,7 @@ AnalysisManager::AnalysisManager(ASTContext &ctx, DiagnosticsEngine &diags,
                                  const LangOptions &lang,
                                  const PathDiagnosticConsumers &PDC,
                                  StoreManagerCreator storemgr,
-                                 ConstraintManagerCreator constraintmgr, 
+                                 ConstraintManagerCreator constraintmgr,
                                  CheckerManager *checkerMgr,
                                  AnalyzerOptions &Options,
                                  CodeInjector *injector)
diff --git a/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp b/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
index 1696bcfac9c1..54c668cd2d6f 100644
--- a/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
+++ b/lib/StaticAnalyzer/Core/AnalyzerOptions.cpp
@@ -65,7 +65,7 @@ IPAKind AnalyzerOptions::getIPAMode() {
     // Set the member variable.
     IPAMode = IPAConfig;
   }
-  
+
   return IPAMode;
 }
 
@@ -295,6 +295,13 @@ unsigned AnalyzerOptions::getMaxTimesInlineLarge() {
   return MaxTimesInlineLarge.getValue();
 }
 
+unsigned AnalyzerOptions::getMinCFGSizeTreatFunctionsAsLarge() {
+  if (!MinCFGSizeTreatFunctionsAsLarge.hasValue())
+    MinCFGSizeTreatFunctionsAsLarge = getOptionAsInteger(
+      "min-cfg-size-treat-functions-as-large", 14);
+  return MinCFGSizeTreatFunctionsAsLarge.getValue();
+}
+
 unsigned AnalyzerOptions::getMaxNodesPerTopLevelFunction() {
   if (!MaxNodesPerTopLevelFunction.hasValue()) {
     int DefaultValue = 0;
@@ -325,3 +332,15 @@ bool AnalyzerOptions::shouldPrunePaths() {
 bool AnalyzerOptions::shouldConditionalizeStaticInitializers() {
   return getBooleanOption("cfg-conditional-static-initializers", true);
 }
+
+bool AnalyzerOptions::shouldInlineLambdas() {
+  if (!InlineLambdas.hasValue())
+    InlineLambdas = getBooleanOption("inline-lambdas", /*Default=*/true);
+  return InlineLambdas.getValue();
+}
+
+bool AnalyzerOptions::shouldWidenLoops() {
+  if (!WidenLoops.hasValue())
+    WidenLoops = getBooleanOption("widen-loops", /*Default=*/false);
+  return WidenLoops.getValue();
+}
diff --git a/lib/StaticAnalyzer/Core/BlockCounter.cpp b/lib/StaticAnalyzer/Core/BlockCounter.cpp
index c1ac03d5ab97..8c99bd808494 100644
--- a/lib/StaticAnalyzer/Core/BlockCounter.cpp
+++ b/lib/StaticAnalyzer/Core/BlockCounter.cpp
@@ -26,7 +26,7 @@ class CountKey {
   unsigned BlockID;
 
 public:
-  CountKey(const StackFrameContext *CS, unsigned ID) 
+  CountKey(const StackFrameContext *CS, unsigned ID)
     : CallSite(CS), BlockID(ID) {}
 
   bool operator==(const CountKey &RHS) const {
@@ -55,7 +55,7 @@ static inline CountMap::Factory& GetFactory(void *F) {
   return *static_cast<CountMap::Factory*>(F);
 }
 
-unsigned BlockCounter::getNumVisited(const StackFrameContext *CallSite, 
+unsigned BlockCounter::getNumVisited(const StackFrameContext *CallSite,
                                        unsigned BlockID) const {
   CountMap M = GetMap(Data);
   CountMap::data_type* T = M.lookup(CountKey(CallSite, BlockID));
@@ -71,10 +71,10 @@ BlockCounter::Factory::~Factory() {
 }
 
 BlockCounter
-BlockCounter::Factory::IncrementCount(BlockCounter BC, 
+BlockCounter::Factory::IncrementCount(BlockCounter BC,
                                         const StackFrameContext *CallSite,
                                         unsigned BlockID) {
-  return BlockCounter(GetFactory(F).add(GetMap(BC.Data), 
+  return BlockCounter(GetFactory(F).add(GetMap(BC.Data),
                                           CountKey(CallSite, BlockID),
                              BC.getNumVisited(CallSite, BlockID)+1).getRoot());
 }
diff --git a/lib/StaticAnalyzer/Core/BugReporter.cpp b/lib/StaticAnalyzer/Core/BugReporter.cpp
index e4db64fe34e0..11be764633cf 100644
--- a/lib/StaticAnalyzer/Core/BugReporter.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporter.cpp
@@ -81,13 +81,13 @@ eventsDescribeSameCondition(PathDiagnosticEventPiece *X,
   // those that came from TrackConstraintBRVisitor.
   const void *tagPreferred = ConditionBRVisitor::getTag();
   const void *tagLesser = TrackConstraintBRVisitor::getTag();
-  
+
   if (X->getLocation() != Y->getLocation())
     return nullptr;
 
   if (X->getTag() == tagPreferred && Y->getTag() == tagLesser)
     return X;
-  
+
   if (Y->getTag() == tagPreferred && X->getTag() == tagLesser)
     return Y;
 
@@ -110,7 +110,7 @@ static void removeRedundantMsgs(PathPieces &path) {
   for (unsigned i = 0; i < N; ++i) {
     IntrusiveRefCntPtr<PathDiagnosticPiece> piece(path.front());
     path.pop_front();
-    
+
     switch (piece->getKind()) {
       case clang::ento::PathDiagnosticPiece::Call:
         removeRedundantMsgs(cast<PathDiagnosticCallPiece>(piece)->path);
@@ -123,7 +123,7 @@ static void removeRedundantMsgs(PathPieces &path) {
       case clang::ento::PathDiagnosticPiece::Event: {
         if (i == N-1)
           break;
-        
+
         if (PathDiagnosticEventPiece *nextEvent =
             dyn_cast<PathDiagnosticEventPiece>(path.front().get())) {
           PathDiagnosticEventPiece *event =
@@ -157,13 +157,13 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
                                 LocationContextMap &LCM) {
   bool containsSomethingInteresting = false;
   const unsigned N = pieces.size();
-  
+
   for (unsigned i = 0 ; i < N ; ++i) {
     // Remove the front piece from the path.  If it is still something we
     // want to keep once we are done, we will push it back on the end.
     IntrusiveRefCntPtr<PathDiagnosticPiece> piece(pieces.front());
     pieces.pop_front();
-    
+
     switch (piece->getKind()) {
       case PathDiagnosticPiece::Call: {
         PathDiagnosticCallPiece *call = cast<PathDiagnosticCallPiece>(piece);
@@ -176,7 +176,7 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
 
         if (!removeUnneededCalls(call->path, R, LCM))
           continue;
-        
+
         containsSomethingInteresting = true;
         break;
       }
@@ -189,7 +189,7 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
       }
       case PathDiagnosticPiece::Event: {
         PathDiagnosticEventPiece *event = cast<PathDiagnosticEventPiece>(piece);
-        
+
         // We never throw away an event, but we do throw it away wholesale
         // as part of a path if we throw the entire path away.
         containsSomethingInteresting |= !event->isPrunable();
@@ -198,10 +198,10 @@ static bool removeUnneededCalls(PathPieces &pieces, BugReport *R,
       case PathDiagnosticPiece::ControlFlow:
         break;
     }
-    
+
     pieces.push_back(piece);
   }
-  
+
   return containsSomethingInteresting;
 }
 
@@ -213,7 +213,7 @@ static bool hasImplicitBody(const Decl *D) {
 }
 
 /// Recursively scan through a path and make sure that all call pieces have
-/// valid locations. 
+/// valid locations.
 static void
 adjustCallLocations(PathPieces &Pieces,
                     PathDiagnosticLocation *LastCallLocation = nullptr) {
@@ -323,7 +323,7 @@ class PathDiagnosticBuilder : public BugReporterContext {
   NodeMapClosure NMC;
 public:
   const LocationContext *LC;
-  
+
   PathDiagnosticBuilder(GRBugReporter &br,
                         BugReport *r, InterExplodedGraphMap &Backmap,
                         PathDiagnosticConsumer *pdc)
@@ -339,7 +339,7 @@ public:
   BugReport *getBugReport() { return R; }
 
   Decl const &getCodeDecl() { return R->getErrorNode()->getCodeDecl(); }
-  
+
   ParentMap& getParentMap() { return LC->getParentMap(); }
 
   const Stmt *getParent(const Stmt *S) {
@@ -957,7 +957,7 @@ static PathDiagnosticLocation cleanUpLocation(PathDiagnosticLocation L,
 
   if (firstCharOnly)
     L  = PathDiagnosticLocation::createSingleLocation(L);
-  
+
   return L;
 }
 
@@ -1001,7 +1001,7 @@ public:
 
   ~EdgeBuilder() {
     while (!CLocs.empty()) popLocation();
-    
+
     // Finally, add an initial edge from the start location of the first
     // statement (if it doesn't already exist).
     PathDiagnosticLocation L = PathDiagnosticLocation::createDeclBegin(
@@ -1016,7 +1016,7 @@ public:
       popLocation();
     PrevLoc = PathDiagnosticLocation();
   }
-  
+
   void addEdge(PathDiagnosticLocation NewLoc, bool alwaysAdd = false,
                bool IsPostJump = false);
 
@@ -1101,7 +1101,7 @@ void EdgeBuilder::rawAddEdge(PathDiagnosticLocation NewLoc) {
     PrevLoc = NewLoc;
     return;
   }
-  
+
   if (NewLocClean.asLocation() == PrevLocClean.asLocation())
     return;
 
@@ -1242,7 +1242,7 @@ static void reversePropagateIntererstingSymbols(BugReport &R,
   SVal V = State->getSVal(Ex, LCtx);
   if (!(R.isInteresting(V) || IE.count(Ex)))
     return;
-  
+
   switch (Ex->getStmtClass()) {
     default:
       if (!isa<CastExpr>(Ex))
@@ -1260,7 +1260,7 @@ static void reversePropagateIntererstingSymbols(BugReport &R,
       break;
     }
   }
-  
+
   R.markInteresting(V);
 }
 
@@ -1275,7 +1275,7 @@ static void reversePropagateInterestingSymbols(BugReport &R,
   const Stmt *CallSite = Callee->getCallSite();
   if (const CallExpr *CE = dyn_cast_or_null<CallExpr>(CallSite)) {
     if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(CalleeCtx->getDecl())) {
-      FunctionDecl::param_const_iterator PI = FD->param_begin(), 
+      FunctionDecl::param_const_iterator PI = FD->param_begin(),
                                          PE = FD->param_end();
       CallExpr::const_arg_iterator AI = CE->arg_begin(), AE = CE->arg_end();
       for (; AI != AE && PI != PE; ++AI, ++PI) {
@@ -1406,7 +1406,7 @@ static bool GenerateExtensivePathDiagnostic(
                                               N->getState().get(), Ex,
                                               N->getLocationContext());
       }
-      
+
       if (Optional<CallExitEnd> CE = P.getAs<CallExitEnd>()) {
         const Stmt *S = CE->getCalleeContext()->getCallSite();
         if (const Expr *Ex = dyn_cast_or_null<Expr>(S)) {
@@ -1414,7 +1414,7 @@ static bool GenerateExtensivePathDiagnostic(
                                                 N->getState().get(), Ex,
                                                 N->getLocationContext());
         }
-        
+
         PathDiagnosticCallPiece *C =
           PathDiagnosticCallPiece::construct(N, *CE, SM);
         LCM[&C->path] = CE->getCalleeContext();
@@ -1427,7 +1427,7 @@ static bool GenerateExtensivePathDiagnostic(
         CallStack.push_back(StackDiagPair(C, N));
         break;
       }
-      
+
       // Pop the call hierarchy if we are done walking the contents
       // of a function call.
       if (Optional<CallEnter> CE = P.getAs<CallEnter>()) {
@@ -1436,7 +1436,7 @@ static bool GenerateExtensivePathDiagnostic(
         PathDiagnosticLocation pos =
           PathDiagnosticLocation::createBegin(D, SM);
         EB.addEdge(pos);
-        
+
         // Flush all locations, and pop the active path.
         bool VisitedEntireCall = PD.isWithinCall();
         EB.flushLocations();
@@ -1466,7 +1466,7 @@ static bool GenerateExtensivePathDiagnostic(
         }
         break;
       }
-      
+
       // Note that is important that we update the LocationContext
       // after looking at CallExits.  CallExit basically adds an
       // edge in the *caller*, so we don't want to update the LocationContext
@@ -1486,7 +1486,7 @@ static bool GenerateExtensivePathDiagnostic(
                                                CalleeCtx, CallerCtx);
           }
         }
-       
+
         // Are we jumping to the head of a loop?  Add a special diagnostic.
         if (const Stmt *Loop = BE->getSrc()->getLoopTarget()) {
           PathDiagnosticLocation L(Loop, SM, PDB.LC);
@@ -1552,11 +1552,11 @@ static bool GenerateExtensivePathDiagnostic(
           else
             EB.addExtendedContext(PDB.getEnclosingStmtLocation(stmt).asStmt());
         }
-        
+
         break;
       }
-      
-      
+
+
     } while (0);
 
     if (!NextNode)
@@ -2410,7 +2410,7 @@ static bool optimizeEdges(PathPieces &path, SourceManager &SM,
         // Trim edges on expressions that are consumed by
         // the parent expression.
         if (isa<Expr>(s1End) && PM.isConsumedExpr(cast<Expr>(s1End))) {
-          removeEdge = true;          
+          removeEdge = true;
         }
         // Trim edges where a lexical containment doesn't exist.
         // For example:
@@ -2557,7 +2557,7 @@ BugReport::~BugReport() {
 const Decl *BugReport::getDeclWithIssue() const {
   if (DeclWithIssue)
     return DeclWithIssue;
-  
+
   const ExplodedNode *N = getErrorNode();
   if (!N)
     return nullptr;
@@ -2579,9 +2579,7 @@ void BugReport::Profile(llvm::FoldingSetNodeID& hash) const {
     hash.AddPointer(GetCurrentOrPreviousStmt(ErrorNode));
   }
 
-  for (SmallVectorImpl<SourceRange>::const_iterator I =
-      Ranges.begin(), E = Ranges.end(); I != E; ++I) {
-    const SourceRange range = *I;
+  for (SourceRange range : Ranges) {
     if (!range.isValid())
       continue;
     hash.AddInteger(range.getBegin().getRawEncoding());
@@ -2714,8 +2712,7 @@ llvm::iterator_range<BugReport::ranges_iterator> BugReport::getRanges() {
   if (Ranges.size() == 1 && !Ranges.begin()->isValid())
     return llvm::make_range(ranges_iterator(), ranges_iterator());
 
-  return llvm::iterator_range<BugReport::ranges_iterator>(Ranges.begin(),
-                                                          Ranges.end());
+  return llvm::make_range(Ranges.begin(), Ranges.end());
 }
 
 PathDiagnosticLocation BugReport::getLocation(const SourceManager &SM) const {
@@ -2973,14 +2970,14 @@ static void CompactPathDiagnostic(PathPieces &path, const SourceManager& SM) {
 
   for (PathPieces::const_iterator I = path.begin(), E = path.end();
        I!=E; ++I) {
-    
+
     PathDiagnosticPiece *piece = I->get();
 
     // Recursively compact calls.
     if (PathDiagnosticCallPiece *call=dyn_cast<PathDiagnosticCallPiece>(piece)){
       CompactPathDiagnostic(call->path, SM);
     }
-    
+
     // Get the location of the PathDiagnosticPiece.
     const FullSourceLoc Loc = piece->getLocation().asLocation();
 
@@ -3126,7 +3123,7 @@ bool GRBugReporter::generatePathDiagnostic(PathDiagnostic& PD,
       PD.resetPath();
       origReportConfigToken = R->getConfigurationChangeToken();
 
-      // Generate the very last diagnostic piece - the piece is visible before 
+      // Generate the very last diagnostic piece - the piece is visible before
       // the trace is expanded.
       std::unique_ptr<PathDiagnosticPiece> LastPiece;
       for (BugReport::visitor_iterator I = visitors.begin(), E = visitors.end();
@@ -3224,6 +3221,11 @@ void BugReporter::Register(BugType *BT) {
 
 void BugReporter::emitReport(std::unique_ptr<BugReport> R) {
   if (const ExplodedNode *E = R->getErrorNode()) {
+    // An error node must either be a sink or have a tag, otherwise
+    // it could get reclaimed before the path diagnostic is created.
+    assert((E->isSink() || E->getLocation().getTag()) &&
+            "Error node must either be a sink or have a tag");
+
     const AnalysisDeclContext *DeclCtx =
         E->getLocationContext()->getAnalysisDeclContext();
     // The source of autosynthesized body can be handcrafted AST or a model
@@ -3234,7 +3236,7 @@ void BugReporter::emitReport(std::unique_ptr<BugReport> R) {
         !DeclCtx->isBodyAutosynthesizedFromModelFile())
       return;
   }
-  
+
   bool ValidSourceLoc = R->getLocation(getSourceManager()).isValid();
   assert(ValidSourceLoc);
   // If we mess up in a release build, we'd still prefer to just drop the bug
@@ -3269,10 +3271,10 @@ namespace {
 struct FRIEC_WLItem {
   const ExplodedNode *N;
   ExplodedNode::const_succ_iterator I, E;
-  
+
   FRIEC_WLItem(const ExplodedNode *n)
   : N(n), I(N->succ_begin()), E(N->succ_end()) {}
-};  
+};
 }
 
 static BugReport *
@@ -3287,11 +3289,11 @@ FindReportInEquivalenceClass(BugReportEquivClass& EQ,
   // post-dominated by a sink, simply add all the nodes in the equivalence class
   // to 'Nodes'.  Any of the reports will serve as a "representative" report.
   if (!BT.isSuppressOnSink()) {
-    BugReport *R = I;
+    BugReport *R = &*I;
     for (BugReportEquivClass::iterator I=EQ.begin(), E=EQ.end(); I!=E; ++I) {
       const ExplodedNode *N = I->getErrorNode();
       if (N) {
-        R = I;
+        R = &*I;
         bugReports.push_back(R);
       }
     }
@@ -3317,35 +3319,35 @@ FindReportInEquivalenceClass(BugReportEquivClass& EQ,
     }
     // No successors?  By definition this nodes isn't post-dominated by a sink.
     if (errorNode->succ_empty()) {
-      bugReports.push_back(I);
+      bugReports.push_back(&*I);
       if (!exampleReport)
-        exampleReport = I;
+        exampleReport = &*I;
       continue;
     }
 
     // At this point we know that 'N' is not a sink and it has at least one
-    // successor.  Use a DFS worklist to find a non-sink end-of-path node.    
+    // successor.  Use a DFS worklist to find a non-sink end-of-path node.
     typedef FRIEC_WLItem WLItem;
     typedef SmallVector<WLItem, 10> DFSWorkList;
     llvm::DenseMap<const ExplodedNode *, unsigned> Visited;
-    
+
     DFSWorkList WL;
     WL.push_back(errorNode);
     Visited[errorNode] = 1;
-    
+
     while (!WL.empty()) {
       WLItem &WI = WL.back();
       assert(!WI.N->succ_empty());
-            
+
       for (; WI.I != WI.E; ++WI.I) {
-        const ExplodedNode *Succ = *WI.I;        
+        const ExplodedNode *Succ = *WI.I;
         // End-of-path node?
         if (Succ->succ_empty()) {
           // If we found an end-of-path node that is not a sink.
           if (!Succ->isSink()) {
-            bugReports.push_back(I);
+            bugReports.push_back(&*I);
             if (!exampleReport)
-              exampleReport = I;
+              exampleReport = &*I;
             WL.clear();
             break;
           }
@@ -3426,7 +3428,7 @@ void BugReporter::FlushReport(BugReport *exampleReport,
     PathDiagnosticLocation L = exampleReport->getLocation(getSourceManager());
     auto piece = llvm::make_unique<PathDiagnosticEventPiece>(
         L, exampleReport->getDescription());
-    for (const SourceRange &Range : exampleReport->getRanges())
+    for (SourceRange Range : exampleReport->getRanges())
       piece->addRange(Range);
     D->setEndOfPath(std::move(piece));
   }
diff --git a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
index f2915ed818b2..ec1310d91814 100644
--- a/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
+++ b/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -117,7 +117,7 @@ std::unique_ptr<PathDiagnosticPiece> BugReporterVisitor::getDefaultEndPath(
   // special ranges for this report.
   auto P = llvm::make_unique<PathDiagnosticEventPiece>(
       L, BR.getDescription(), Ranges.begin() == Ranges.end());
-  for (const SourceRange &Range : Ranges)
+  for (SourceRange Range : Ranges)
     P->addRange(Range);
 
   return std::move(P);
@@ -169,7 +169,7 @@ public:
                                     bool InEnableNullFPSuppression) {
     if (!CallEvent::isCallStmt(S))
       return;
-    
+
     // First, find when we processed the statement.
     do {
       if (Optional<CallExitEnd> CEE = Node->getLocationAs<CallExitEnd>())
@@ -192,11 +192,11 @@ public:
     Optional<CallExitEnd> CEE = Node->getLocationAs<CallExitEnd>();
     if (!CEE)
       return;
-    
+
     const StackFrameContext *CalleeContext = CEE->getCalleeContext();
     if (CalleeContext->getCallSite() != S)
       return;
-    
+
     // Check the return value.
     ProgramStateRef State = Node->getState();
     SVal RetVal = State->getSVal(S, Node->getLocationContext());
@@ -281,7 +281,7 @@ public:
                                            EnableNullFPSuppression);
       return nullptr;
     }
-      
+
     // If we're returning 0, we should track where that 0 came from.
     bugreporter::trackNullOrUndefValue(N, RetE, BR, /*IsArg*/ false,
                                        EnableNullFPSuppression);
@@ -472,7 +472,7 @@ PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ,
       InitE = PIP->getInitializer()->getInit();
     }
   }
-  
+
   // Otherwise, see if this is the store site:
   // (1) Succ has this binding and Pred does not, i.e. this is
   //     where the binding first occurred.
@@ -504,7 +504,7 @@ PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ,
     if (Optional<CallEnter> CE = Succ->getLocationAs<CallEnter>()) {
       if (const VarRegion *VR = dyn_cast<VarRegion>(R)) {
         const ParmVarDecl *Param = cast<ParmVarDecl>(VR->getDecl());
-        
+
         ProgramStateManager &StateMgr = BRC.getStateManager();
         CallEventManager &CallMgr = StateMgr.getCallEventManager();
 
@@ -681,7 +681,7 @@ PathDiagnosticPiece *FindLastStoreBRVisitor::VisitNode(const ExplodedNode *Succ,
       else
         os << "Assigning value";
     }
-    
+
     if (R->canPrintPretty()) {
       os << " to ";
       R->printPretty(os);
@@ -931,7 +931,7 @@ bool bugreporter::trackNullOrUndefValue(const ExplodedNode *N,
     if (!N)
       return false;
   }
-  
+
   ProgramStateRef state = N->getState();
 
   // The message send could be nil due to the receiver being nil.
@@ -959,7 +959,7 @@ bool bugreporter::trackNullOrUndefValue(const ExplodedNode *N,
     assert(LVNode && "Unable to find the lvalue node.");
     ProgramStateRef LVState = LVNode->getState();
     SVal LVal = LVState->getSVal(Inner, LVNode->getLocationContext());
-    
+
     if (LVState->isNull(LVal).isConstrainedTrue()) {
       // In case of C++ references, we want to differentiate between a null
       // reference and reference to null pointer.
@@ -1162,11 +1162,11 @@ PathDiagnosticPiece *ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N,
                                                        const ExplodedNode *Prev,
                                                        BugReporterContext &BRC,
                                                        BugReport &BR) {
-  
+
   ProgramPoint progPoint = N->getLocation();
   ProgramStateRef CurrentState = N->getState();
   ProgramStateRef PrevState = Prev->getState();
-  
+
   // Compare the GDMs of the state, because that is where constraints
   // are managed.  Note that ensure that we only look at nodes that
   // were generated by the analyzer engine proper, not checkers.
@@ -1177,16 +1177,16 @@ PathDiagnosticPiece *ConditionBRVisitor::VisitNodeImpl(const ExplodedNode *N,
   // If an assumption was made on a branch, it should be caught
   // here by looking at the state transition.
   if (Optional<BlockEdge> BE = progPoint.getAs<BlockEdge>()) {
-    const CFGBlock *srcBlk = BE->getSrc();    
+    const CFGBlock *srcBlk = BE->getSrc();
     if (const Stmt *term = srcBlk->getTerminator())
       return VisitTerminator(term, N, srcBlk, BE->getDst(), BR, BRC);
     return nullptr;
   }
-  
+
   if (Optional<PostStmt> PS = progPoint.getAs<PostStmt>()) {
     // FIXME: Assuming that BugReporter is a GRBugReporter is a layering
     // violation.
-    const std::pair<const ProgramPointTag *, const ProgramPointTag *> &tags =      
+    const std::pair<const ProgramPointTag *, const ProgramPointTag *> &tags =
       cast<GRBugReporter>(BRC.getBugReporter()).
         getEngine().geteagerlyAssumeBinOpBifurcationTags();
 
@@ -1222,7 +1222,7 @@ ConditionBRVisitor::VisitTerminator(const Stmt *Term,
   case Stmt::ConditionalOperatorClass:
     Cond = cast<ConditionalOperator>(Term)->getCond();
     break;
-  }      
+  }
 
   assert(Cond);
   assert(srcBlk->succ_size() == 2);
@@ -1236,9 +1236,9 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
                                   BugReporterContext &BRC,
                                   BugReport &R,
                                   const ExplodedNode *N) {
-  
+
   const Expr *Ex = Cond;
-  
+
   while (true) {
     Ex = Ex->IgnoreParenCasts();
     switch (Ex->getStmtClass()) {
@@ -1294,7 +1294,7 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex, raw_ostream &Out,
       Out << '\'';
     return quotes;
   }
-  
+
   if (const IntegerLiteral *IL = dyn_cast<IntegerLiteral>(Ex)) {
     QualType OriginalTy = OriginalExpr->getType();
     if (OriginalTy->isPointerType()) {
@@ -1309,11 +1309,11 @@ bool ConditionBRVisitor::patternMatch(const Expr *Ex, raw_ostream &Out,
         return false;
       }
     }
-    
+
     Out << IL->getValue();
     return false;
   }
-  
+
   return false;
 }
 
@@ -1324,10 +1324,10 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
                                   BugReporterContext &BRC,
                                   BugReport &R,
                                   const ExplodedNode *N) {
-  
+
   bool shouldInvert = false;
   Optional<bool> shouldPrune;
-  
+
   SmallString<128> LhsString, RhsString;
   {
     llvm::raw_svector_ostream OutLHS(LhsString), OutRHS(RhsString);
@@ -1335,10 +1335,10 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
                                        shouldPrune);
     const bool isVarRHS = patternMatch(BExpr->getRHS(), OutRHS, BRC, R, N,
                                        shouldPrune);
-    
-    shouldInvert = !isVarLHS && isVarRHS;    
+
+    shouldInvert = !isVarLHS && isVarRHS;
   }
-  
+
   BinaryOperator::Opcode Op = BExpr->getOpcode();
 
   if (BinaryOperator::isAssignmentOp(Op)) {
@@ -1380,7 +1380,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
       default:
         return nullptr;
     }
-  
+
   switch (Op) {
     case BO_EQ:
       Out << "equal to ";
@@ -1392,7 +1392,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
       Out << BinaryOperator::getOpcodeStr(Op) << ' ';
       break;
   }
-  
+
   Out << (shouldInvert ? LhsString : RhsString);
   const LocationContext *LCtx = N->getLocationContext();
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
@@ -1416,7 +1416,7 @@ ConditionBRVisitor::VisitConditionVariable(StringRef LhsString,
   SmallString<256> buf;
   llvm::raw_svector_ostream Out(buf);
   Out << "Assuming " << LhsString << " is ";
-  
+
   QualType Ty = CondVarExpr->getType();
 
   if (Ty->isPointerType())
@@ -1444,10 +1444,10 @@ ConditionBRVisitor::VisitConditionVariable(StringRef LhsString,
       }
     }
   }
-  
+
   return event;
 }
-  
+
 PathDiagnosticPiece *
 ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
                                   const DeclRefExpr *DR,
@@ -1462,11 +1462,11 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
 
   SmallString<256> Buf;
   llvm::raw_svector_ostream Out(Buf);
-    
+
   Out << "Assuming '" << VD->getDeclName() << "' is ";
-    
+
   QualType VDTy = VD->getType();
-  
+
   if (VDTy->isPointerType())
     Out << (tookTrue ? "non-null" : "null");
   else if (VDTy->isObjCObjectPointerType())
@@ -1480,7 +1480,7 @@ ConditionBRVisitor::VisitTrueTest(const Expr *Cond,
   PathDiagnosticLocation Loc(Cond, BRC.getSourceManager(), LCtx);
   PathDiagnosticEventPiece *event =
     new PathDiagnosticEventPiece(Loc, Out.str());
-  
+
   const ProgramState *state = N->getState().get();
   if (const MemRegion *R = state->getLValue(VD, LCtx).getAsRegion()) {
     if (report.isInteresting(R))
@@ -1615,13 +1615,13 @@ UndefOrNullArgVisitor::VisitNode(const ExplodedNode *N,
       // Function can only change the value passed in by address.
       continue;
     }
-    
+
     // If it is a const pointer value, the function does not intend to
     // change the value.
     if (T->getPointeeType().isConstQualified())
       continue;
 
-    // Mark the call site (LocationContext) as interesting if the value of the 
+    // Mark the call site (LocationContext) as interesting if the value of the
     // argument is undefined or '0'/'NULL'.
     SVal BoundVal = State->getSVal(R);
     if (BoundVal.isUndef() || BoundVal.isZeroConstant()) {
diff --git a/lib/StaticAnalyzer/Core/CMakeLists.txt b/lib/StaticAnalyzer/Core/CMakeLists.txt
index 59a6b6fbc595..aaffb0b82ce0 100644
--- a/lib/StaticAnalyzer/Core/CMakeLists.txt
+++ b/lib/StaticAnalyzer/Core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_clang_library(clangStaticAnalyzerCore
   AnalyzerOptions.cpp
   BasicValueFactory.cpp
   BlockCounter.cpp
+  IssueHash.cpp
   BugReporter.cpp
   BugReporterVisitors.cpp
   CallEvent.cpp
@@ -17,6 +18,7 @@ add_clang_library(clangStaticAnalyzerCore
   CommonBugCategories.cpp
   ConstraintManager.cpp
   CoreEngine.cpp
+  DynamicTypeMap.cpp
   Environment.cpp
   ExplodedGraph.cpp
   ExprEngine.cpp
@@ -26,6 +28,7 @@ add_clang_library(clangStaticAnalyzerCore
   ExprEngineObjC.cpp
   FunctionSummary.cpp
   HTMLDiagnostics.cpp
+  LoopWidening.cpp
   MemRegion.cpp
   PathDiagnostic.cpp
   PlistDiagnostics.cpp
diff --git a/lib/StaticAnalyzer/Core/CallEvent.cpp b/lib/StaticAnalyzer/Core/CallEvent.cpp
index 8542f7d6a86b..69af09b25b6e 100644
--- a/lib/StaticAnalyzer/Core/CallEvent.cpp
+++ b/lib/StaticAnalyzer/Core/CallEvent.cpp
@@ -17,6 +17,7 @@
 #include "clang/AST/ParentMap.h"
 #include "clang/Analysis/ProgramPoint.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/raw_ostream.h"
@@ -49,11 +50,7 @@ QualType CallEvent::getResultType() const {
   return ResultTy;
 }
 
-static bool isCallbackArg(SVal V, QualType T) {
-  // If the parameter is 0, it's harmless.
-  if (V.isZeroConstant())
-    return false;
-
+static bool isCallback(QualType T) {
   // If a parameter is a block or a callback, assume it can modify pointer.
   if (T->isBlockPointerType() ||
       T->isFunctionPointerType() ||
@@ -74,32 +71,53 @@ static bool isCallbackArg(SVal V, QualType T) {
         return true;
     }
   }
-
   return false;
 }
 
-bool CallEvent::hasNonZeroCallbackArg() const {
+static bool isVoidPointerToNonConst(QualType T) {
+  if (const PointerType *PT = T->getAs<PointerType>()) {
+    QualType PointeeTy = PT->getPointeeType();
+    if (PointeeTy.isConstQualified())
+      return false;
+    return PointeeTy->isVoidType();
+  } else
+    return false;
+}
+
+bool CallEvent::hasNonNullArgumentsWithType(bool (*Condition)(QualType)) const {
   unsigned NumOfArgs = getNumArgs();
 
   // If calling using a function pointer, assume the function does not
-  // have a callback. TODO: We could check the types of the arguments here.
+  // satisfy the callback.
+  // TODO: We could check the types of the arguments here.
   if (!getDecl())
     return false;
 
   unsigned Idx = 0;
   for (CallEvent::param_type_iterator I = param_type_begin(),
-                                       E = param_type_end();
+                                      E = param_type_end();
        I != E && Idx < NumOfArgs; ++I, ++Idx) {
     if (NumOfArgs <= Idx)
       break;
 
-    if (isCallbackArg(getArgSVal(Idx), *I))
+    // If the parameter is 0, it's harmless.
+    if (getArgSVal(Idx).isZeroConstant())
+      continue;
+
+    if (Condition(*I))
       return true;
   }
-  
   return false;
 }
 
+bool CallEvent::hasNonZeroCallbackArg() const {
+  return hasNonNullArgumentsWithType(isCallback);
+}
+
+bool CallEvent::hasVoidPointerToNonConstArg() const {
+  return hasNonNullArgumentsWithType(isVoidPointerToNonConst);
+}
+
 bool CallEvent::isGlobalCFunction(StringRef FunctionName) const {
   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(getDecl());
   if (!FD)
@@ -147,7 +165,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
   SmallVector<SVal, 8> ValuesToInvalidate;
   RegionAndSymbolInvalidationTraits ETraits;
 
-  getExtraInvalidatedValues(ValuesToInvalidate);
+  getExtraInvalidatedValues(ValuesToInvalidate, &ETraits);
 
   // Indexes of arguments whose values will be preserved by the call.
   llvm::SmallSet<unsigned, 4> PreserveArgs;
@@ -159,7 +177,7 @@ ProgramStateRef CallEvent::invalidateRegions(unsigned BlockCount,
     // below for efficiency.
     if (PreserveArgs.count(Idx))
       if (const MemRegion *MR = getArgSVal(Idx).getAsRegion())
-        ETraits.setTrait(MR->StripCasts(), 
+        ETraits.setTrait(MR->StripCasts(),
                         RegionAndSymbolInvalidationTraits::TK_PreserveContents);
         // TODO: Factor this out + handle the lower level const pointers.
 
@@ -184,7 +202,7 @@ ProgramPoint CallEvent::getProgramPoint(bool IsPreVisit,
   }
 
   const Decl *D = getDecl();
-  assert(D && "Cannot get a program point without a statement or decl");  
+  assert(D && "Cannot get a program point without a statement or decl");
 
   SourceLocation Loc = getSourceRange().getBegin();
   if (IsPreVisit)
@@ -265,7 +283,7 @@ QualType CallEvent::getDeclaredResultType(const Decl *D) {
 
     return QualType();
   }
-  
+
   llvm_unreachable("unknown callable kind");
 }
 
@@ -325,7 +343,7 @@ void AnyFunctionCall::getInitialStackFrameContents(
 }
 
 bool AnyFunctionCall::argumentsMayEscape() const {
-  if (hasNonZeroCallbackArg())
+  if (CallEvent::argumentsMayEscape() || hasVoidPointerToNonConstArg())
     return true;
 
   const FunctionDecl *D = getDecl();
@@ -336,7 +354,7 @@ bool AnyFunctionCall::argumentsMayEscape() const {
   if (!II)
     return false;
 
-  // This set of "escaping" APIs is 
+  // This set of "escaping" APIs is
 
   // - 'int pthread_setspecific(ptheread_key k, const void *)' stores a
   //   value into thread local storage. The value can later be retrieved with
@@ -402,8 +420,30 @@ const FunctionDecl *CXXInstanceCall::getDecl() const {
   return getSVal(CE->getCallee()).getAsFunctionDecl();
 }
 
-void CXXInstanceCall::getExtraInvalidatedValues(ValueList &Values) const {
-  Values.push_back(getCXXThisVal());
+void CXXInstanceCall::getExtraInvalidatedValues(
+    ValueList &Values, RegionAndSymbolInvalidationTraits *ETraits) const {
+  SVal ThisVal = getCXXThisVal();
+  Values.push_back(ThisVal);
+
+  // Don't invalidate if the method is const and there are no mutable fields.
+  if (const CXXMethodDecl *D = cast_or_null<CXXMethodDecl>(getDecl())) {
+    if (!D->isConst())
+      return;
+    // Get the record decl for the class of 'This'. D->getParent() may return a
+    // base class decl, rather than the class of the instance which needs to be
+    // checked for mutable fields.
+    const Expr *Ex = getCXXThisExpr()->ignoreParenBaseCasts();
+    const CXXRecordDecl *ParentRecord = Ex->getType()->getAsCXXRecordDecl();
+    if (!ParentRecord || ParentRecord->hasMutableFields())
+      return;
+    // Preserve CXXThis.
+    const MemRegion *ThisRegion = ThisVal.getAsRegion();
+    if (!ThisRegion)
+      return;
+
+    ETraits->setTrait(ThisRegion->getBaseRegion(),
+                      RegionAndSymbolInvalidationTraits::TK_PreserveContents);
+  }
 }
 
 SVal CXXInstanceCall::getCXXThisVal() const {
@@ -435,7 +475,7 @@ RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const {
     return RuntimeDefinition();
 
   // Do we know anything about the type of 'this'?
-  DynamicTypeInfo DynType = getState()->getDynamicTypeInfo(R);
+  DynamicTypeInfo DynType = getDynamicTypeInfo(getState(), R);
   if (!DynType.isValid())
     return RuntimeDefinition();
 
@@ -455,7 +495,7 @@ RuntimeDefinition CXXInstanceCall::getRuntimeDefinition() const {
     // However, we should at least be able to search up and down our own class
     // hierarchy, and some real bugs have been caught by checking this.
     assert(!RD->isDerivedFrom(MD->getParent()) && "Couldn't find known method");
-    
+
     // FIXME: This is checking that our DynamicTypeInfo is at least as good as
     // the static type. However, because we currently don't update
     // DynamicTypeInfo when an object is cast, we can't actually be sure the
@@ -525,7 +565,7 @@ RuntimeDefinition CXXMemberCall::getRuntimeDefinition() const {
   if (const MemberExpr *ME = dyn_cast<MemberExpr>(getOriginExpr()->getCallee()))
     if (ME->hasQualifier())
       return AnyFunctionCall::getRuntimeDefinition();
-  
+
   return CXXInstanceCall::getRuntimeDefinition();
 }
 
@@ -549,7 +589,8 @@ ArrayRef<ParmVarDecl*> BlockCall::parameters() const {
   return D->parameters();
 }
 
-void BlockCall::getExtraInvalidatedValues(ValueList &Values) const {
+void BlockCall::getExtraInvalidatedValues(ValueList &Values,
+                  RegionAndSymbolInvalidationTraits *ETraits) const {
   // FIXME: This also needs to invalidate captured globals.
   if (const MemRegion *R = getBlockRegion())
     Values.push_back(loc::MemRegionVal(R));
@@ -557,10 +598,25 @@ void BlockCall::getExtraInvalidatedValues(ValueList &Values) const {
 
 void BlockCall::getInitialStackFrameContents(const StackFrameContext *CalleeCtx,
                                              BindingsTy &Bindings) const {
-  const BlockDecl *D = cast<BlockDecl>(CalleeCtx->getDecl());
   SValBuilder &SVB = getState()->getStateManager().getSValBuilder();
+  ArrayRef<ParmVarDecl*> Params;
+  if (isConversionFromLambda()) {
+    auto *LambdaOperatorDecl = cast<CXXMethodDecl>(CalleeCtx->getDecl());
+    Params = LambdaOperatorDecl->parameters();
+
+    // For blocks converted from a C++ lambda, the callee declaration is the
+    // operator() method on the lambda so we bind "this" to
+    // the lambda captured by the block.
+    const VarRegion *CapturedLambdaRegion = getRegionStoringCapturedLambda();
+    SVal ThisVal = loc::MemRegionVal(CapturedLambdaRegion);
+    Loc ThisLoc = SVB.getCXXThis(LambdaOperatorDecl, CalleeCtx);
+    Bindings.push_back(std::make_pair(ThisLoc, ThisVal));
+  } else {
+    Params = cast<BlockDecl>(CalleeCtx->getDecl())->parameters();
+  }
+
   addParameterValuesToBindings(CalleeCtx, Bindings, SVB, *this,
-                               D->parameters());
+                               Params);
 }
 
 
@@ -570,7 +626,8 @@ SVal CXXConstructorCall::getCXXThisVal() const {
   return UnknownVal();
 }
 
-void CXXConstructorCall::getExtraInvalidatedValues(ValueList &Values) const {
+void CXXConstructorCall::getExtraInvalidatedValues(ValueList &Values,
+                           RegionAndSymbolInvalidationTraits *ETraits) const {
   if (Data)
     Values.push_back(loc::MemRegionVal(static_cast<const MemRegion *>(Data)));
 }
@@ -612,7 +669,8 @@ ArrayRef<ParmVarDecl*> ObjCMethodCall::parameters() const {
 }
 
 void
-ObjCMethodCall::getExtraInvalidatedValues(ValueList &Values) const {
+ObjCMethodCall::getExtraInvalidatedValues(ValueList &Values,
+                  RegionAndSymbolInvalidationTraits *ETraits) const {
   Values.push_back(getReceiverSVal());
 }
 
@@ -628,7 +686,7 @@ SVal ObjCMethodCall::getReceiverSVal() const {
   // FIXME: Is this the best way to handle class receivers?
   if (!isInstanceMessage())
     return UnknownVal();
-    
+
   if (const Expr *RecE = getOriginExpr()->getInstanceReceiver())
     return getSVal(RecE);
 
@@ -709,7 +767,7 @@ ObjCMessageKind ObjCMethodCall::getMessageKind() const {
         return K;
       }
     }
-    
+
     const_cast<ObjCMethodCall *>(this)->Data
       = ObjCMessageDataTy(nullptr, 1).getOpaqueValue();
     assert(getMessageKind() == OCM_Message);
@@ -730,7 +788,7 @@ bool ObjCMethodCall::canBeOverridenInSubclass(ObjCInterfaceDecl *IDecl,
     getState()->getStateManager().getContext().getSourceManager();
 
   // If the class interface is declared inside the main file, assume it is not
-  // subcassed. 
+  // subcassed.
   // TODO: It could actually be subclassed if the subclass is private as well.
   // This is probably very rare.
   SourceLocation InterfLoc = IDecl->getEndOfDefinitionLoc();
@@ -800,7 +858,7 @@ RuntimeDefinition ObjCMethodCall::getRuntimeDefinition() const {
       if (!Receiver)
         return RuntimeDefinition();
 
-      DynamicTypeInfo DTI = getState()->getDynamicTypeInfo(Receiver);
+      DynamicTypeInfo DTI = getDynamicTypeInfo(getState(), Receiver);
       QualType DynType = DTI.getType();
       CanBeSubClassed = DTI.canBeASubClass();
       ReceiverT = dyn_cast<ObjCObjectPointerType>(DynType);
diff --git a/lib/StaticAnalyzer/Core/Checker.cpp b/lib/StaticAnalyzer/Core/Checker.cpp
index 22352111d14a..b422a8871983 100644
--- a/lib/StaticAnalyzer/Core/Checker.cpp
+++ b/lib/StaticAnalyzer/Core/Checker.cpp
@@ -23,7 +23,7 @@ StringRef CheckerBase::getTagDescription() const {
 
 CheckName CheckerBase::getCheckName() const { return Name; }
 
-CheckerProgramPointTag::CheckerProgramPointTag(StringRef CheckerName, 
+CheckerProgramPointTag::CheckerProgramPointTag(StringRef CheckerName,
                                                StringRef Msg)
   : SimpleProgramPointTag(CheckerName, Msg) {}
 
diff --git a/lib/StaticAnalyzer/Core/CheckerContext.cpp b/lib/StaticAnalyzer/Core/CheckerContext.cpp
index 6b22bf411c29..5ec8bfa80074 100644
--- a/lib/StaticAnalyzer/Core/CheckerContext.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerContext.cpp
@@ -45,7 +45,7 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
   if (BId != 0) {
     if (Name.empty())
       return true;
-    StringRef BName = FD->getASTContext().BuiltinInfo.GetName(BId);
+    StringRef BName = FD->getASTContext().BuiltinInfo.getName(BId);
     if (BName.find(Name) != StringRef::npos)
       return true;
   }
@@ -57,12 +57,8 @@ bool CheckerContext::isCLibraryFunction(const FunctionDecl *FD,
     return false;
 
   // Look through 'extern "C"' and anything similar invented in the future.
-  const DeclContext *DC = FD->getDeclContext();
-  while (DC->isTransparentContext())
-    DC = DC->getParent();
-
-  // If this function is in a namespace, it is not a C library function.
-  if (!DC->isTranslationUnit())
+  // If this function is not in TU directly, it is not a C library function.
+  if (!FD->getDeclContext()->getRedeclContext()->isTranslationUnit())
     return false;
 
   // If this function is not externally visible, it is not a C library function.
diff --git a/lib/StaticAnalyzer/Core/CheckerHelpers.cpp b/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
index 3d9a81581549..d6aeceb1457d 100644
--- a/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerHelpers.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h"
+#include "clang/AST/Decl.h"
 #include "clang/AST/Expr.h"
 
 // Recursively find any substatements containing macros
@@ -70,3 +71,26 @@ bool clang::ento::containsBuiltinOffsetOf(const Stmt *S) {
 
   return false;
 }
+
+// Extract lhs and rhs from assignment statement
+std::pair<const clang::VarDecl *, const clang::Expr *>
+clang::ento::parseAssignment(const Stmt *S) {
+  const VarDecl *VD = 0;
+  const Expr *RHS = 0;
+
+  if (auto Assign = dyn_cast_or_null<BinaryOperator>(S)) {
+    if (Assign->isAssignmentOp()) {
+      // Ordinary assignment
+      RHS = Assign->getRHS();
+      if (auto DE = dyn_cast_or_null<DeclRefExpr>(Assign->getLHS()))
+        VD = dyn_cast_or_null<VarDecl>(DE->getDecl());
+    }
+  } else if (auto PD = dyn_cast_or_null<DeclStmt>(S)) {
+    // Initialization
+    assert(PD->isSingleDecl() && "We process decls one by one");
+    VD = dyn_cast_or_null<VarDecl>(PD->getSingleDecl());
+    RHS = VD->getAnyInitializer();
+  }
+
+  return std::make_pair(VD, RHS);
+}
diff --git a/lib/StaticAnalyzer/Core/CheckerManager.cpp b/lib/StaticAnalyzer/Core/CheckerManager.cpp
index 2684cc78be75..008e8ef31cda 100644
--- a/lib/StaticAnalyzer/Core/CheckerManager.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerManager.cpp
@@ -177,7 +177,9 @@ void CheckerManager::runCheckersForStmt(bool isPreVisit,
 namespace {
   struct CheckObjCMessageContext {
     typedef std::vector<CheckerManager::CheckObjCMessageFunc> CheckersTy;
-    bool IsPreVisit, WasInlined;
+
+    ObjCMessageVisitKind Kind;
+    bool WasInlined;
     const CheckersTy &Checkers;
     const ObjCMethodCall &Msg;
     ExprEngine &Eng;
@@ -185,14 +187,28 @@ namespace {
     CheckersTy::const_iterator checkers_begin() { return Checkers.begin(); }
     CheckersTy::const_iterator checkers_end() { return Checkers.end(); }
 
-    CheckObjCMessageContext(bool isPreVisit, const CheckersTy &checkers,
+    CheckObjCMessageContext(ObjCMessageVisitKind visitKind,
+                            const CheckersTy &checkers,
                             const ObjCMethodCall &msg, ExprEngine &eng,
                             bool wasInlined)
-      : IsPreVisit(isPreVisit), WasInlined(wasInlined), Checkers(checkers),
+      : Kind(visitKind), WasInlined(wasInlined), Checkers(checkers),
         Msg(msg), Eng(eng) { }
 
     void runChecker(CheckerManager::CheckObjCMessageFunc checkFn,
                     NodeBuilder &Bldr, ExplodedNode *Pred) {
+
+      bool IsPreVisit;
+
+      switch (Kind) {
+        case ObjCMessageVisitKind::Pre:
+          IsPreVisit = true;
+          break;
+        case ObjCMessageVisitKind::MessageNil:
+        case ObjCMessageVisitKind::Post:
+          IsPreVisit = false;
+          break;
+      }
+
       const ProgramPoint &L = Msg.getProgramPoint(IsPreVisit,checkFn.Checker);
       CheckerContext C(Bldr, Eng, Pred, L, WasInlined);
 
@@ -202,19 +218,30 @@ namespace {
 }
 
 /// \brief Run checkers for visiting obj-c messages.
-void CheckerManager::runCheckersForObjCMessage(bool isPreVisit,
+void CheckerManager::runCheckersForObjCMessage(ObjCMessageVisitKind visitKind,
                                                ExplodedNodeSet &Dst,
                                                const ExplodedNodeSet &Src,
                                                const ObjCMethodCall &msg,
                                                ExprEngine &Eng,
                                                bool WasInlined) {
-  CheckObjCMessageContext C(isPreVisit,
-                            isPreVisit ? PreObjCMessageCheckers
-                                       : PostObjCMessageCheckers,
-                            msg, Eng, WasInlined);
+  auto &checkers = getObjCMessageCheckers(visitKind);
+  CheckObjCMessageContext C(visitKind, checkers, msg, Eng, WasInlined);
   expandGraphWithCheckers(C, Dst, Src);
 }
 
+const std::vector<CheckerManager::CheckObjCMessageFunc> &
+CheckerManager::getObjCMessageCheckers(ObjCMessageVisitKind Kind) {
+  switch (Kind) {
+  case ObjCMessageVisitKind::Pre:
+    return PreObjCMessageCheckers;
+    break;
+  case ObjCMessageVisitKind::Post:
+    return PostObjCMessageCheckers;
+  case ObjCMessageVisitKind::MessageNil:
+    return ObjCMessageNilCheckers;
+  }
+  llvm_unreachable("Unknown Kind");
+}
 namespace {
   // FIXME: This has all the same signatures as CheckObjCMessageContext.
   // Is there a way we can merge the two?
@@ -357,9 +384,9 @@ void CheckerManager::runCheckersForEndFunction(NodeBuilderContext &BC,
                                                ExplodedNodeSet &Dst,
                                                ExplodedNode *Pred,
                                                ExprEngine &Eng) {
-  
+
   // We define the builder outside of the loop bacause if at least one checkers
-  // creates a sucsessor for Pred, we do not need to generate an 
+  // creates a sucsessor for Pred, we do not need to generate an
   // autotransition for it.
   NodeBuilder Bldr(Pred, Dst, BC);
   for (unsigned i = 0, e = EndFunctionCheckers.size(); i != e; ++i) {
@@ -467,7 +494,7 @@ bool CheckerManager::wantsRegionChangeUpdate(ProgramStateRef state) {
 }
 
 /// \brief Run checkers for region changes.
-ProgramStateRef 
+ProgramStateRef
 CheckerManager::runCheckersForRegionChanges(ProgramStateRef state,
                                     const InvalidatedSymbols *invalidated,
                                     ArrayRef<const MemRegion *> ExplicitRegions,
@@ -478,7 +505,7 @@ CheckerManager::runCheckersForRegionChanges(ProgramStateRef state,
     // bail out.
     if (!state)
       return nullptr;
-    state = RegionChangesCheckers[i].CheckFn(state, invalidated, 
+    state = RegionChangesCheckers[i].CheckFn(state, invalidated,
                                              ExplicitRegions, Regions, Call);
   }
   return state;
@@ -506,7 +533,7 @@ CheckerManager::runCheckersForPointerEscape(ProgramStateRef State,
 }
 
 /// \brief Run checkers for handling assumptions on symbolic values.
-ProgramStateRef 
+ProgramStateRef
 CheckerManager::runCheckersForEvalAssume(ProgramStateRef state,
                                          SVal Cond, bool Assumption) {
   for (unsigned i = 0, e = EvalAssumeCheckers.size(); i != e; ++i) {
@@ -558,7 +585,7 @@ void CheckerManager::runCheckersForEvalCall(ExplodedNodeSet &Dst,
 #endif
       }
     }
-    
+
     // If none of the checkers evaluated the call, ask ExprEngine to handle it.
     if (!anyEvaluated) {
       NodeBuilder B(Pred, Dst, Eng.getBuilderContext());
@@ -616,6 +643,11 @@ void CheckerManager::_registerForPostStmt(CheckStmtFunc checkfn,
 void CheckerManager::_registerForPreObjCMessage(CheckObjCMessageFunc checkfn) {
   PreObjCMessageCheckers.push_back(checkfn);
 }
+
+void CheckerManager::_registerForObjCMessageNil(CheckObjCMessageFunc checkfn) {
+  ObjCMessageNilCheckers.push_back(checkfn);
+}
+
 void CheckerManager::_registerForPostObjCMessage(CheckObjCMessageFunc checkfn) {
   PostObjCMessageCheckers.push_back(checkfn);
 }
diff --git a/lib/StaticAnalyzer/Core/CheckerRegistry.cpp b/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
index 6ba64f52d183..a15e1573e228 100644
--- a/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
+++ b/lib/StaticAnalyzer/Core/CheckerRegistry.cpp
@@ -94,7 +94,7 @@ void CheckerRegistry::addChecker(InitializationFunction fn, StringRef name,
   }
 }
 
-void CheckerRegistry::initializeManager(CheckerManager &checkerMgr, 
+void CheckerRegistry::initializeManager(CheckerManager &checkerMgr,
                                   SmallVectorImpl<CheckerOptInfo> &opts) const {
   // Sort checkers for efficient collection.
   std::sort(Checkers.begin(), Checkers.end(), checkerNameLT);
diff --git a/lib/StaticAnalyzer/Core/ConstraintManager.cpp b/lib/StaticAnalyzer/Core/ConstraintManager.cpp
index 4dec52600518..b7db8333aaac 100644
--- a/lib/StaticAnalyzer/Core/ConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/ConstraintManager.cpp
@@ -26,7 +26,7 @@ static DefinedSVal getLocFromSymbol(const ProgramStateRef &State,
 }
 
 ConditionTruthVal ConstraintManager::checkNull(ProgramStateRef State,
-                                               SymbolRef Sym) {  
+                                               SymbolRef Sym) {
   QualType Ty = Sym->getType();
   DefinedSVal V = Loc::isLocType(Ty) ? getLocFromSymbol(State, Sym)
                                      : nonloc::SymbolVal(Sym);
diff --git a/lib/StaticAnalyzer/Core/CoreEngine.cpp b/lib/StaticAnalyzer/Core/CoreEngine.cpp
index 7844ad4a9c04..39cf7e771755 100644
--- a/lib/StaticAnalyzer/Core/CoreEngine.cpp
+++ b/lib/StaticAnalyzer/Core/CoreEngine.cpp
@@ -271,7 +271,7 @@ void CoreEngine::dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
 
 bool CoreEngine::ExecuteWorkListWithInitialState(const LocationContext *L,
                                                  unsigned Steps,
-                                                 ProgramStateRef InitState, 
+                                                 ProgramStateRef InitState,
                                                  ExplodedNodeSet &Dst) {
   bool DidNotFinish = ExecuteWorkList(L, Steps, InitState);
   for (ExplodedGraph::eop_iterator I = G.eop_begin(), E = G.eop_end(); I != E;
@@ -386,7 +386,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
         }
         return;
       }
-        
+
       case Stmt::DoStmtClass:
         HandleBranch(cast<DoStmt>(Term)->getCond(), Term, B, Pred);
         return;
@@ -456,7 +456,7 @@ void CoreEngine::HandleBlockExit(const CFGBlock * B, ExplodedNode *Pred) {
                Pred->State, Pred);
 }
 
-void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term, 
+void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term,
                                 const CFGBlock * B, ExplodedNode *Pred) {
   assert(B->succ_size() == 2);
   NodeBuilderContext Ctx(*this, B, Pred);
@@ -491,7 +491,7 @@ void CoreEngine::HandleStaticInit(const DeclStmt *DS, const CFGBlock *B,
 }
 
 
-void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, 
+void CoreEngine::HandlePostStmt(const CFGBlock *B, unsigned StmtIdx,
                                   ExplodedNode *Pred) {
   assert(B);
   assert(!B->empty());
diff --git a/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp b/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
new file mode 100644
index 000000000000..fd35b664a912
--- /dev/null
+++ b/lib/StaticAnalyzer/Core/DynamicTypeMap.cpp
@@ -0,0 +1,51 @@
+//==- DynamicTypeMap.cpp - Dynamic Type Info related APIs ----------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines APIs that track and query dynamic type information. This
+//  information can be used to devirtualize calls during the symbolic exection
+//  or do type checking.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicTypeMap.h"
+
+namespace clang {
+namespace ento {
+
+DynamicTypeInfo getDynamicTypeInfo(ProgramStateRef State,
+                                   const MemRegion *Reg) {
+  Reg = Reg->StripCasts();
+
+  // Look up the dynamic type in the GDM.
+  const DynamicTypeInfo *GDMType = State->get<DynamicTypeMap>(Reg);
+  if (GDMType)
+    return *GDMType;
+
+  // Otherwise, fall back to what we know about the region.
+  if (const TypedRegion *TR = dyn_cast<TypedRegion>(Reg))
+    return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false);
+
+  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
+    SymbolRef Sym = SR->getSymbol();
+    return DynamicTypeInfo(Sym->getType());
+  }
+
+  return DynamicTypeInfo();
+}
+
+ProgramStateRef setDynamicTypeInfo(ProgramStateRef State, const MemRegion *Reg,
+                                   DynamicTypeInfo NewTy) {
+  Reg = Reg->StripCasts();
+  ProgramStateRef NewState = State->set<DynamicTypeMap>(Reg, NewTy);
+  assert(NewState);
+  return NewState;
+}
+
+} // namespace ento
+} // namespace clang
diff --git a/lib/StaticAnalyzer/Core/Environment.cpp b/lib/StaticAnalyzer/Core/Environment.cpp
index ae5a4cc8b4aa..e2cb52cb417e 100644
--- a/lib/StaticAnalyzer/Core/Environment.cpp
+++ b/lib/StaticAnalyzer/Core/Environment.cpp
@@ -90,6 +90,7 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry,
   case Stmt::CXXNullPtrLiteralExprClass:
   case Stmt::ObjCStringLiteralClass:
   case Stmt::StringLiteralClass:
+  case Stmt::TypeTraitExprClass:
     // Known constants; defer to SValBuilder.
     return svalBuilder.getConstantVal(cast<Expr>(S)).getValue();
 
@@ -97,9 +98,9 @@ SVal Environment::getSVal(const EnvironmentEntry &Entry,
     const ReturnStmt *RS = cast<ReturnStmt>(S);
     if (const Expr *RE = RS->getRetValue())
       return getSVal(EnvironmentEntry(RE, LCtx), svalBuilder);
-    return UndefinedVal();        
+    return UndefinedVal();
   }
-    
+
   // Handle all other Stmt* using a lookup.
   default:
     return lookupExpr(EnvironmentEntry(S, LCtx));
@@ -120,7 +121,7 @@ Environment EnvironmentManager::bindExpr(Environment Env,
 }
 
 namespace {
-class MarkLiveCallback : public SymbolVisitor {
+class MarkLiveCallback final : public SymbolVisitor {
   SymbolReaper &SymReaper;
 public:
   MarkLiveCallback(SymbolReaper &symreaper) : SymReaper(symreaper) {}
@@ -170,10 +171,6 @@ EnvironmentManager::removeDeadBindings(Environment Env,
       // Copy the binding to the new map.
       EBMapRef = EBMapRef.add(BlkExpr, X);
 
-      // If the block expr's value is a memory region, then mark that region.
-      if (Optional<loc::MemRegionVal> R = X.getAs<loc::MemRegionVal>())
-        SymReaper.markLive(R->getRegion());
-
       // Mark all symbols in the block expr's value live.
       RSScaner.scan(X);
       continue;
@@ -194,16 +191,16 @@ void Environment::print(raw_ostream &Out, const char *NL,
 
   for (Environment::iterator I = begin(), E = end(); I != E; ++I) {
     const EnvironmentEntry &En = I.getKey();
-    
+
     if (isFirst) {
       Out << NL << NL
           << "Expressions:"
-          << NL;      
+          << NL;
       isFirst = false;
     } else {
       Out << NL;
     }
-    
+
     const Stmt *S = En.getStmt();
     assert(S != nullptr && "Expected non-null Stmt");
 
diff --git a/lib/StaticAnalyzer/Core/ExplodedGraph.cpp b/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
index 010d26e48e1d..8a09720b2a19 100644
--- a/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
+++ b/lib/StaticAnalyzer/Core/ExplodedGraph.cpp
@@ -90,8 +90,8 @@ bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
   // (7) The LocationContext is the same as the predecessor.
   // (8) Expressions that are *not* lvalue expressions.
   // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
-  // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or 
-  //      PreImplicitCall (so that we would be able to find it when retrying a 
+  // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
+  //      PreImplicitCall (so that we would be able to find it when retrying a
   //      call with no inlining).
   // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
 
@@ -102,7 +102,7 @@ bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
   const ExplodedNode *pred = *(node->pred_begin());
   if (pred->succ_size() != 1)
     return false;
-  
+
   const ExplodedNode *succ = *(node->succ_begin());
   if (succ->pred_size() != 1)
     return false;
@@ -123,7 +123,7 @@ bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
 
   // Conditions 5, 6, and 7.
   ProgramStateRef state = node->getState();
-  ProgramStateRef pred_state = pred->getState();    
+  ProgramStateRef pred_state = pred->getState();
   if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
       progPoint.getLocationContext() != pred->getLocationContext())
     return false;
@@ -174,7 +174,7 @@ void ExplodedGraph::collectNode(ExplodedNode *node) {
   FreeNodes.push_back(node);
   Nodes.RemoveNode(node);
   --NumNodes;
-  node->~ExplodedNode();  
+  node->~ExplodedNode();
 }
 
 void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
diff --git a/lib/StaticAnalyzer/Core/ExprEngine.cpp b/lib/StaticAnalyzer/Core/ExprEngine.cpp
index a3239f591a38..662b0a2dd798 100644
--- a/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -26,6 +26,7 @@
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
 #include "llvm/ADT/ImmutableList.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
@@ -174,7 +175,7 @@ ProgramStateRef ExprEngine::getInitialState(const LocationContext *InitLoc) {
       }
     }
   }
-    
+
   return state;
 }
 
@@ -265,7 +266,7 @@ bool ExprEngine::wantsRegionChangeUpdate(ProgramStateRef state) {
   return getCheckerManager().wantsRegionChangeUpdate(state);
 }
 
-ProgramStateRef 
+ProgramStateRef
 ExprEngine::processRegionChanges(ProgramStateRef state,
                                  const InvalidatedSymbols *invalidated,
                                  ArrayRef<const MemRegion *> Explicits,
@@ -315,7 +316,7 @@ static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
                                      const CFGStmt S,
                                      const ExplodedNode *Pred,
                                      const LocationContext *LC) {
-  
+
   // Are we never purging state values?
   if (AMgr.options.AnalysisPurgeOpt == PurgeNone)
     return false;
@@ -327,7 +328,7 @@ static bool shouldRemoveDeadBindings(AnalysisManager &AMgr,
   // Is this on a non-expression?
   if (!isa<Expr>(S.getStmt()))
     return true;
-    
+
   // Run before processing a call.
   if (CallEvent::isCallStmt(S.getStmt()))
     return true;
@@ -475,8 +476,12 @@ void ExprEngine::ProcessInitializer(const CFGInitializer Init,
   if (BMI->isAnyMemberInitializer()) {
     // Constructors build the object directly in the field,
     // but non-objects must be copied in from the initializer.
-    const Expr *Init = BMI->getInit()->IgnoreImplicit();
-    if (!isa<CXXConstructExpr>(Init)) {
+    if (auto *CtorExpr = findDirectConstructorForCurrentCFGElement()) {
+      assert(BMI->getInit()->IgnoreImplicit() == CtorExpr);
+      (void)CtorExpr;
+      // The field was directly constructed, so there is no need to bind.
+    } else {
+      const Expr *Init = BMI->getInit()->IgnoreImplicit();
       const ValueDecl *Field;
       if (BMI->isIndirectMemberInitializer()) {
         Field = BMI->getIndirectMember();
@@ -512,7 +517,7 @@ void ExprEngine::ProcessInitializer(const CFGInitializer Init,
       assert(Tmp.size() == 1 && "have not generated any new nodes yet");
       assert(*Tmp.begin() == Pred && "have not generated any new nodes yet");
       Tmp.clear();
-      
+
       PostInitializer PP(BMI, FieldLoc.getAsRegion(), stackFrame);
       evalBind(Tmp, Init, Pred, FieldLoc, InitVal, /*isInit=*/true, &PP);
     }
@@ -754,9 +759,9 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXUuidofExprClass:
     case Stmt::CXXFoldExprClass:
     case Stmt::MSPropertyRefExprClass:
+    case Stmt::MSPropertySubscriptExprClass:
     case Stmt::CXXUnresolvedConstructExprClass:
     case Stmt::DependentScopeDeclRefExprClass:
-    case Stmt::TypeTraitExprClass:
     case Stmt::ArrayTypeTraitExprClass:
     case Stmt::ExpressionTraitExprClass:
     case Stmt::UnresolvedLookupExprClass:
@@ -766,16 +771,19 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::PackExpansionExprClass:
     case Stmt::SubstNonTypeTemplateParmPackExprClass:
     case Stmt::FunctionParmPackExprClass:
+    case Stmt::CoroutineBodyStmtClass:
+    case Stmt::CoawaitExprClass:
+    case Stmt::CoreturnStmtClass:
+    case Stmt::CoyieldExprClass:
     case Stmt::SEHTryStmtClass:
     case Stmt::SEHExceptStmtClass:
     case Stmt::SEHLeaveStmtClass:
-    case Stmt::LambdaExprClass:
     case Stmt::SEHFinallyStmtClass: {
       const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
       Engine.addAbortedBlock(node, currBldrCtx->getBlock());
       break;
     }
-    
+
     case Stmt::ParenExprClass:
       llvm_unreachable("ParenExprs already handled.");
     case Stmt::GenericSelectionExprClass:
@@ -821,9 +829,13 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPOrderedDirectiveClass:
     case Stmt::OMPAtomicDirectiveClass:
     case Stmt::OMPTargetDirectiveClass:
+    case Stmt::OMPTargetDataDirectiveClass:
     case Stmt::OMPTeamsDirectiveClass:
     case Stmt::OMPCancellationPointDirectiveClass:
     case Stmt::OMPCancelDirectiveClass:
+    case Stmt::OMPTaskLoopDirectiveClass:
+    case Stmt::OMPTaskLoopSimdDirectiveClass:
+    case Stmt::OMPDistributeDirectiveClass:
       llvm_unreachable("Stmt should not be in analyzer evaluation loop");
 
     case Stmt::ObjCSubscriptRefExprClass:
@@ -901,7 +913,9 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::ObjCStringLiteralClass:
     case Stmt::CXXPseudoDestructorExprClass:
     case Stmt::SubstNonTypeTemplateParmExprClass:
-    case Stmt::CXXNullPtrLiteralExprClass: {
+    case Stmt::CXXNullPtrLiteralExprClass:
+    case Stmt::OMPArraySectionExprClass:
+    case Stmt::TypeTraitExprClass: {
       Bldr.takeNodes(Pred);
       ExplodedNodeSet preVisit;
       getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
@@ -964,7 +978,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
 
       ExplodedNodeSet preVisit;
       getCheckerManager().runCheckersForPreStmt(preVisit, Pred, S, *this);
-      
+
       ExplodedNodeSet Tmp;
       StmtNodeBuilder Bldr2(preVisit, Tmp, *currBldrCtx);
 
@@ -972,7 +986,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       QualType resultType = Ex->getType();
 
       for (ExplodedNodeSet::iterator it = preVisit.begin(), et = preVisit.end();
-           it != et; ++it) {      
+           it != et; ++it) {
         ExplodedNode *N = *it;
         const LocationContext *LCtx = N->getLocationContext();
         SVal result = svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
@@ -981,10 +995,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
         ProgramStateRef state = N->getState()->BindExpr(Ex, LCtx, result);
         Bldr2.generateNode(S, N, state);
       }
-      
+
       getCheckerManager().runCheckersForPostStmt(Dst, Tmp, S, *this);
       Bldr.addNodes(Dst);
-      break;      
+      break;
     }
 
     case Stmt::ArraySubscriptExprClass:
@@ -1011,6 +1025,17 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
 
+    case Stmt::LambdaExprClass:
+      if (AMgr.options.shouldInlineLambdas()) {
+        Bldr.takeNodes(Pred);
+        VisitLambdaExpr(cast<LambdaExpr>(S), Pred, Dst);
+        Bldr.addNodes(Dst);
+      } else {
+        const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
+        Engine.addAbortedBlock(node, currBldrCtx->getBlock());
+      }
+      break;
+
     case Stmt::BinaryOperatorClass: {
       const BinaryOperator* B = cast<BinaryOperator>(S);
       if (B->isLogicalOp()) {
@@ -1029,7 +1054,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       }
 
       Bldr.takeNodes(Pred);
-      
+
       if (AMgr.options.eagerlyAssumeBinOpBifurcation &&
           (B->isRelationalOp() || B->isEqualityOp())) {
         ExplodedNodeSet Tmp;
@@ -1074,7 +1099,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
-    
+
     case Stmt::CXXCatchStmtClass: {
       Bldr.takeNodes(Pred);
       VisitCXXCatchStmt(cast<CXXCatchStmt>(S), Pred, Dst);
@@ -1083,7 +1108,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     }
 
     case Stmt::CXXTemporaryObjectExprClass:
-    case Stmt::CXXConstructExprClass: {      
+    case Stmt::CXXConstructExprClass: {
       Bldr.takeNodes(Pred);
       VisitCXXConstructExpr(cast<CXXConstructExpr>(S), Pred, Dst);
       Bldr.addNodes(Dst);
@@ -1105,7 +1130,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       const CXXDeleteExpr *CDE = cast<CXXDeleteExpr>(S);
       getCheckerManager().runCheckersForPreStmt(PreVisit, Pred, S, *this);
 
-      for (ExplodedNodeSet::iterator i = PreVisit.begin(), 
+      for (ExplodedNodeSet::iterator i = PreVisit.begin(),
                                      e = PreVisit.end(); i != e ; ++i)
         VisitCXXDeleteExpr(CDE, *i, Dst);
 
@@ -1171,18 +1196,18 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::CXXDynamicCastExprClass:
     case Stmt::CXXReinterpretCastExprClass:
     case Stmt::CXXConstCastExprClass:
-    case Stmt::CXXFunctionalCastExprClass: 
+    case Stmt::CXXFunctionalCastExprClass:
     case Stmt::ObjCBridgedCastExprClass: {
       Bldr.takeNodes(Pred);
       const CastExpr *C = cast<CastExpr>(S);
       // Handle the previsit checks.
       ExplodedNodeSet dstPrevisit;
       getCheckerManager().runCheckersForPreStmt(dstPrevisit, Pred, C, *this);
-      
+
       // Handle the expression itself.
       ExplodedNodeSet dstExpr;
       for (ExplodedNodeSet::iterator i = dstPrevisit.begin(),
-                                     e = dstPrevisit.end(); i != e ; ++i) { 
+                                     e = dstPrevisit.end(); i != e ; ++i) {
         VisitCast(C, C->getSubExpr(), *i, dstExpr);
       }
 
@@ -1199,7 +1224,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.addNodes(Dst);
       break;
     }
-      
+
     case Stmt::InitListExprClass:
       Bldr.takeNodes(Pred);
       VisitInitListExpr(cast<InitListExpr>(S), Pred, Dst);
@@ -1294,7 +1319,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
       Bldr.takeNodes(Pred);
       ProgramStateRef state = Pred->getState();
       const PseudoObjectExpr *PE = cast<PseudoObjectExpr>(S);
-      if (const Expr *Result = PE->getResultExpr()) { 
+      if (const Expr *Result = PE->getResultExpr()) {
         SVal V = state->getSVal(Result, Pred->getLocationContext());
         Bldr.generateNode(S, Pred,
                           state->BindExpr(S, Pred->getLocationContext(), V));
@@ -1375,12 +1400,29 @@ bool ExprEngine::replayWithoutInlining(ExplodedNode *N,
 
 /// Block entrance.  (Update counters).
 void ExprEngine::processCFGBlockEntrance(const BlockEdge &L,
-                                         NodeBuilderWithSinks &nodeBuilder, 
+                                         NodeBuilderWithSinks &nodeBuilder,
                                          ExplodedNode *Pred) {
   PrettyStackTraceLocationContext CrashInfo(Pred->getLocationContext());
 
+  // If this block is terminated by a loop and it has already been visited the
+  // maximum number of times, widen the loop.
+  unsigned int BlockCount = nodeBuilder.getContext().blockCount();
+  if (BlockCount == AMgr.options.maxBlockVisitOnPath - 1 &&
+      AMgr.options.shouldWidenLoops()) {
+    const Stmt *Term = nodeBuilder.getContext().getBlock()->getTerminator();
+    if (!(Term &&
+          (isa<ForStmt>(Term) || isa<WhileStmt>(Term) || isa<DoStmt>(Term))))
+      return;
+    // Widen.
+    const LocationContext *LCtx = Pred->getLocationContext();
+    ProgramStateRef WidenedState =
+        getWidenedLoopState(Pred->getState(), LCtx, BlockCount, Term);
+    nodeBuilder.generateNode(WidenedState, Pred);
+    return;
+  }
+
   // FIXME: Refactor this into a checker.
-  if (nodeBuilder.getContext().blockCount() >= AMgr.options.maxBlockVisitOnPath) {
+  if (BlockCount >= AMgr.options.maxBlockVisitOnPath) {
     static SimpleProgramPointTag tag(TagProviderName, "Block count exceeded");
     const ExplodedNode *Sink =
                    nodeBuilder.generateSink(Pred->getState(), Pred, &tag);
@@ -1537,7 +1579,6 @@ void ExprEngine::processBranch(const Stmt *Condition, const Stmt *Term,
     return;
   }
 
-
   if (const Expr *Ex = dyn_cast<Expr>(Condition))
     Condition = Ex->IgnoreParens();
 
@@ -1583,7 +1624,7 @@ void ExprEngine::processBranch(const Stmt *Condition, const Stmt *Term,
         }
       }
     }
-    
+
     // If the condition is still unknown, give up.
     if (X.isUnknownOrUndef()) {
       builder.generateNode(PrevState, true, PredI);
@@ -1750,7 +1791,7 @@ void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
   DefinedOrUnknownSVal CondV = CondV_untested.castAs<DefinedOrUnknownSVal>();
 
   ProgramStateRef DefaultSt = state;
-  
+
   iterator I = builder.begin(), EI = builder.end();
   bool defaultIsFeasible = I == EI;
 
@@ -1758,7 +1799,7 @@ void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
     // Successor may be pruned out during CFG construction.
     if (!I.getBlock())
       continue;
-    
+
     const CaseStmt *Case = I.getCase();
 
     // Evaluate the LHS of the case value.
@@ -1772,47 +1813,24 @@ void ExprEngine::processSwitch(SwitchNodeBuilder& builder) {
     else
       V2 = V1;
 
-    // FIXME: Eventually we should replace the logic below with a range
-    //  comparison, rather than concretize the values within the range.
-    //  This should be easy once we have "ranges" for NonLVals.
+    ProgramStateRef StateCase;
+    if (Optional<NonLoc> NL = CondV.getAs<NonLoc>())
+      std::tie(StateCase, DefaultSt) =
+          DefaultSt->assumeWithinInclusiveRange(*NL, V1, V2);
+    else // UnknownVal
+      StateCase = DefaultSt;
 
-    do {
-      nonloc::ConcreteInt CaseVal(getBasicVals().getValue(V1));
-      DefinedOrUnknownSVal Res = svalBuilder.evalEQ(DefaultSt ? DefaultSt : state,
-                                               CondV, CaseVal);
+    if (StateCase)
+      builder.generateCaseStmtNode(I, StateCase);
 
-      // Now "assume" that the case matches.
-      if (ProgramStateRef stateNew = state->assume(Res, true)) {
-        builder.generateCaseStmtNode(I, stateNew);
-
-        // If CondV evaluates to a constant, then we know that this
-        // is the *only* case that we can take, so stop evaluating the
-        // others.
-        if (CondV.getAs<nonloc::ConcreteInt>())
-          return;
-      }
-
-      // Now "assume" that the case doesn't match.  Add this state
-      // to the default state (if it is feasible).
-      if (DefaultSt) {
-        if (ProgramStateRef stateNew = DefaultSt->assume(Res, false)) {
-          defaultIsFeasible = true;
-          DefaultSt = stateNew;
-        }
-        else {
-          defaultIsFeasible = false;
-          DefaultSt = nullptr;
-        }
-      }
-
-      // Concretize the next value in the range.
-      if (V1 == V2)
-        break;
-
-      ++V1;
-      assert (V1 <= V2);
-
-    } while (true);
+    // Now "assume" that the case doesn't match.  Add this state
+    // to the default state (if it is feasible).
+    if (DefaultSt)
+      defaultIsFeasible = true;
+    else {
+      defaultIsFeasible = false;
+      break;
+    }
   }
 
   if (!defaultIsFeasible)
@@ -1849,13 +1867,44 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D,
 
   if (const VarDecl *VD = dyn_cast<VarDecl>(D)) {
     // C permits "extern void v", and if you cast the address to a valid type,
-    // you can even do things with it. We simply pretend 
+    // you can even do things with it. We simply pretend
     assert(Ex->isGLValue() || VD->getType()->isVoidType());
-    SVal V = state->getLValue(VD, Pred->getLocationContext());
+    const LocationContext *LocCtxt = Pred->getLocationContext();
+    const Decl *D = LocCtxt->getDecl();
+    const auto *MD = D ? dyn_cast<CXXMethodDecl>(D) : nullptr;
+    const auto *DeclRefEx = dyn_cast<DeclRefExpr>(Ex);
+    SVal V;
+    bool IsReference;
+    if (AMgr.options.shouldInlineLambdas() && DeclRefEx &&
+        DeclRefEx->refersToEnclosingVariableOrCapture() && MD &&
+        MD->getParent()->isLambda()) {
+      // Lookup the field of the lambda.
+      const CXXRecordDecl *CXXRec = MD->getParent();
+      llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
+      FieldDecl *LambdaThisCaptureField;
+      CXXRec->getCaptureFields(LambdaCaptureFields, LambdaThisCaptureField);
+      const FieldDecl *FD = LambdaCaptureFields[VD];
+      if (!FD) {
+        // When a constant is captured, sometimes no corresponding field is
+        // created in the lambda object.
+        assert(VD->getType().isConstQualified());
+        V = state->getLValue(VD, LocCtxt);
+        IsReference = false;
+      } else {
+        Loc CXXThis =
+            svalBuilder.getCXXThis(MD, LocCtxt->getCurrentStackFrame());
+        SVal CXXThisVal = state->getSVal(CXXThis);
+        V = state->getLValue(FD, CXXThisVal);
+        IsReference = FD->getType()->isReferenceType();
+      }
+    } else {
+      V = state->getLValue(VD, LocCtxt);
+      IsReference = VD->getType()->isReferenceType();
+    }
 
     // For references, the 'lvalue' is the pointer address stored in the
     // reference region.
-    if (VD->getType()->isReferenceType()) {
+    if (IsReference) {
       if (const MemRegion *R = V.getAsRegion())
         V = state->getSVal(R);
       else
@@ -1900,7 +1949,6 @@ void ExprEngine::VisitLvalArraySubscriptExpr(const ArraySubscriptExpr *A,
 
   const Expr *Base = A->getBase()->IgnoreParens();
   const Expr *Idx  = A->getIdx()->IgnoreParens();
-  
 
   ExplodedNodeSet checkerPreStmt;
   getCheckerManager().runCheckersForPreStmt(checkerPreStmt, Pred, A, *this);
@@ -2005,8 +2053,9 @@ void ExprEngine::VisitMemberExpr(const MemberExpr *M, ExplodedNode *Pred,
 }
 
 namespace {
-class CollectReachableSymbolsCallback : public SymbolVisitor {
+class CollectReachableSymbolsCallback final : public SymbolVisitor {
   InvalidatedSymbols Symbols;
+
 public:
   CollectReachableSymbolsCallback(ProgramStateRef State) {}
   const InvalidatedSymbols &getSymbols() const { return Symbols; }
@@ -2064,14 +2113,14 @@ ProgramStateRef ExprEngine::processPointerEscapedOnBind(ProgramStateRef State,
   return State;
 }
 
-ProgramStateRef 
+ProgramStateRef
 ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State,
     const InvalidatedSymbols *Invalidated,
     ArrayRef<const MemRegion *> ExplicitRegions,
     ArrayRef<const MemRegion *> Regions,
     const CallEvent *Call,
     RegionAndSymbolInvalidationTraits &ITraits) {
-  
+
   if (!Invalidated || Invalidated->empty())
     return State;
 
@@ -2082,7 +2131,7 @@ ExprEngine::notifyCheckersOfPointerEscape(ProgramStateRef State,
                                                            PSK_EscapeOther,
                                                            &ITraits);
 
-  // If the symbols were invalidated by a call, we want to find out which ones 
+  // If the symbols were invalidated by a call, we want to find out which ones
   // were invalidated directly due to being arguments to the call.
   InvalidatedSymbols SymbolsDirectlyInvalidated;
   for (ArrayRef<const MemRegion *>::iterator I = ExplicitRegions.begin(),
@@ -2129,7 +2178,6 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE,
   getCheckerManager().runCheckersForBind(CheckedSet, Pred, location, Val,
                                          StoreE, *this, *PP);
 
-
   StmtNodeBuilder Bldr(CheckedSet, Dst, *currBldrCtx);
 
   // If the location is not a 'Loc', it will already be handled by
@@ -2142,13 +2190,12 @@ void ExprEngine::evalBind(ExplodedNodeSet &Dst, const Stmt *StoreE,
     Bldr.generateNode(L, state, Pred);
     return;
   }
-  
 
   for (ExplodedNodeSet::iterator I = CheckedSet.begin(), E = CheckedSet.end();
        I!=E; ++I) {
     ExplodedNode *PredI = *I;
     ProgramStateRef state = PredI->getState();
-    
+
     state = processPointerEscapedOnBind(state, location, Val);
 
     // When binding the value, pass on the hint that this is a initialization.
@@ -2301,7 +2348,7 @@ void ExprEngine::evalLocation(ExplodedNodeSet &Dst,
     // "p = 0" is not noted as "Null pointer value stored to 'p'" but
     // instead "int *p" is noted as
     // "Variable 'p' initialized to a null pointer value"
-    
+
     static SimpleProgramPointTag tag(TagProviderName, "Location");
     Bldr.generateNode(NodeEx, Pred, state, &tag);
   }
@@ -2326,7 +2373,7 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
                                                    ExplodedNodeSet &Src,
                                                    const Expr *Ex) {
   StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx);
-  
+
   for (ExplodedNodeSet::iterator I=Src.begin(), E=Src.end(); I!=E; ++I) {
     ExplodedNode *Pred = *I;
     // Test if the previous node was as the same expression.  This can happen
@@ -2349,7 +2396,7 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst,
 
       // First assume that the condition is true.
       if (StateTrue) {
-        SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());        
+        SVal Val = svalBuilder.makeIntVal(1U, Ex->getType());
         StateTrue = StateTrue->BindExpr(Ex, Pred->getLocationContext(), Val);
         Bldr.generateNode(Ex, Pred, StateTrue, tags.first);
       }
@@ -2641,10 +2688,10 @@ struct DOTGraphTraits<ExplodedNode*> :
         << " NodeID: " << (const void*) N << "\\|";
     state->printDOT(Out);
 
-    Out << "\\l";    
+    Out << "\\l";
 
     if (const ProgramPointTag *tag = Loc.getTag()) {
-      Out << "\\|Tag: " << tag->getTagDescription(); 
+      Out << "\\|Tag: " << tag->getTagDescription();
       Out << "\\l";
     }
     return Out.str();
diff --git a/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/lib/StaticAnalyzer/Core/ExprEngineC.cpp
index 1777ea97a402..a5b58710b215 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineC.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineC.cpp
@@ -25,23 +25,23 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
 
   Expr *LHS = B->getLHS()->IgnoreParens();
   Expr *RHS = B->getRHS()->IgnoreParens();
-  
+
   // FIXME: Prechecks eventually go in ::Visit().
   ExplodedNodeSet CheckedSet;
   ExplodedNodeSet Tmp2;
   getCheckerManager().runCheckersForPreStmt(CheckedSet, Pred, B, *this);
-    
+
   // With both the LHS and RHS evaluated, process the operation itself.
   for (ExplodedNodeSet::iterator it=CheckedSet.begin(), ei=CheckedSet.end();
          it != ei; ++it) {
-      
+
     ProgramStateRef state = (*it)->getState();
     const LocationContext *LCtx = (*it)->getLocationContext();
     SVal LeftV = state->getSVal(LHS, LCtx);
     SVal RightV = state->getSVal(RHS, LCtx);
-      
+
     BinaryOperator::Opcode Op = B->getOpcode();
-      
+
     if (Op == BO_Assign) {
       // EXPERIMENTAL: "Conjured" symbols.
       // FIXME: Handle structs.
@@ -57,7 +57,7 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
                 LeftV, RightV);
       continue;
     }
-      
+
     if (!B->isAssignmentOp()) {
       StmtNodeBuilder Bldr(*it, Tmp2, *currBldrCtx);
 
@@ -90,19 +90,19 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
 
       // Process non-assignments except commas or short-circuited
       // logical expressions (LAnd and LOr).
-      SVal Result = evalBinOp(state, Op, LeftV, RightV, B->getType());      
+      SVal Result = evalBinOp(state, Op, LeftV, RightV, B->getType());
       if (Result.isUnknown()) {
         Bldr.generateNode(B, *it, state);
         continue;
-      }        
+      }
 
-      state = state->BindExpr(B, LCtx, Result);      
+      state = state->BindExpr(B, LCtx, Result);
       Bldr.generateNode(B, *it, state);
       continue;
     }
-      
+
     assert (B->isCompoundAssignmentOp());
-    
+
     switch (Op) {
       default:
         llvm_unreachable("Invalid opcode for compound assignment.");
@@ -117,43 +117,43 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
       case BO_XorAssign: Op = BO_Xor; break;
       case BO_OrAssign:  Op = BO_Or;  break;
     }
-      
+
     // Perform a load (the LHS).  This performs the checks for
     // null dereferences, and so on.
     ExplodedNodeSet Tmp;
     SVal location = LeftV;
     evalLoad(Tmp, B, LHS, *it, state, location);
-    
+
     for (ExplodedNodeSet::iterator I = Tmp.begin(), E = Tmp.end(); I != E;
          ++I) {
 
       state = (*I)->getState();
       const LocationContext *LCtx = (*I)->getLocationContext();
       SVal V = state->getSVal(LHS, LCtx);
-      
+
       // Get the computation type.
       QualType CTy =
         cast<CompoundAssignOperator>(B)->getComputationResultType();
       CTy = getContext().getCanonicalType(CTy);
-      
+
       QualType CLHSTy =
         cast<CompoundAssignOperator>(B)->getComputationLHSType();
       CLHSTy = getContext().getCanonicalType(CLHSTy);
-      
+
       QualType LTy = getContext().getCanonicalType(LHS->getType());
-      
+
       // Promote LHS.
       V = svalBuilder.evalCast(V, CLHSTy, LTy);
-      
+
       // Compute the result of the operation.
       SVal Result = svalBuilder.evalCast(evalBinOp(state, Op, V, RightV, CTy),
                                          B->getType(), CTy);
-      
+
       // EXPERIMENTAL: "Conjured" symbols.
       // FIXME: Handle structs.
-      
+
       SVal LHSVal;
-      
+
       if (Result.isUnknown()) {
         // The symbolic value is actually for the type of the left-hand side
         // expression, not the computation type, as this is the value the
@@ -168,52 +168,74 @@ void ExprEngine::VisitBinaryOperator(const BinaryOperator* B,
         // computation type.
         LHSVal = svalBuilder.evalCast(Result, LTy, CTy);
       }
-      
-      // In C++, assignment and compound assignment operators return an 
+
+      // In C++, assignment and compound assignment operators return an
       // lvalue.
       if (B->isGLValue())
         state = state->BindExpr(B, LCtx, location);
       else
         state = state->BindExpr(B, LCtx, Result);
-      
+
       evalStore(Tmp2, B, LHS, *I, state, location, LHSVal);
     }
   }
-  
+
   // FIXME: postvisits eventually go in ::Visit()
   getCheckerManager().runCheckersForPostStmt(Dst, Tmp2, B, *this);
 }
 
 void ExprEngine::VisitBlockExpr(const BlockExpr *BE, ExplodedNode *Pred,
                                 ExplodedNodeSet &Dst) {
-  
+
   CanQualType T = getContext().getCanonicalType(BE->getType());
 
+  const BlockDecl *BD = BE->getBlockDecl();
   // Get the value of the block itself.
-  SVal V = svalBuilder.getBlockPointer(BE->getBlockDecl(), T,
+  SVal V = svalBuilder.getBlockPointer(BD, T,
                                        Pred->getLocationContext(),
                                        currBldrCtx->blockCount());
-  
+
   ProgramStateRef State = Pred->getState();
-  
+
   // If we created a new MemRegion for the block, we should explicitly bind
   // the captured variables.
   if (const BlockDataRegion *BDR =
       dyn_cast_or_null<BlockDataRegion>(V.getAsRegion())) {
-    
+
     BlockDataRegion::referenced_vars_iterator I = BDR->referenced_vars_begin(),
                                               E = BDR->referenced_vars_end();
-    
+
+    auto CI = BD->capture_begin();
+    auto CE = BD->capture_end();
     for (; I != E; ++I) {
-      const MemRegion *capturedR = I.getCapturedRegion();
-      const MemRegion *originalR = I.getOriginalRegion();
+      const VarRegion *capturedR = I.getCapturedRegion();
+      const VarRegion *originalR = I.getOriginalRegion();
+
+      // If the capture had a copy expression, use the result of evaluating
+      // that expression, otherwise use the original value.
+      // We rely on the invariant that the block declaration's capture variables
+      // are a prefix of the BlockDataRegion's referenced vars (which may include
+      // referenced globals, etc.) to enable fast lookup of the capture for a
+      // given referenced var.
+      const Expr *copyExpr = nullptr;
+      if (CI != CE) {
+        assert(CI->getVariable() == capturedR->getDecl());
+        copyExpr = CI->getCopyExpr();
+        CI++;
+      }
+
       if (capturedR != originalR) {
-        SVal originalV = State->getSVal(loc::MemRegionVal(originalR));
+        SVal originalV;
+        if (copyExpr) {
+          originalV = State->getSVal(copyExpr, Pred->getLocationContext());
+        } else {
+          originalV = State->getSVal(loc::MemRegionVal(originalR));
+        }
         State = State->bindLoc(loc::MemRegionVal(capturedR), originalV);
       }
     }
   }
-  
+
   ExplodedNodeSet Tmp;
   StmtNodeBuilder Bldr(Pred, Tmp, *currBldrCtx);
   Bldr.generateNode(BE, Pred,
@@ -224,12 +246,12 @@ void ExprEngine::VisitBlockExpr(const BlockExpr *BE, ExplodedNode *Pred,
   getCheckerManager().runCheckersForPostStmt(Dst, Tmp, BE, *this);
 }
 
-void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, 
+void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
                            ExplodedNode *Pred, ExplodedNodeSet &Dst) {
-  
+
   ExplodedNodeSet dstPreStmt;
   getCheckerManager().runCheckersForPreStmt(dstPreStmt, Pred, CastE, *this);
-  
+
   if (CastE->getCastKind() == CK_LValueToRValue) {
     for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end();
          I!=E; ++I) {
@@ -240,18 +262,18 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
     }
     return;
   }
-  
-  // All other casts.  
+
+  // All other casts.
   QualType T = CastE->getType();
   QualType ExTy = Ex->getType();
-  
+
   if (const ExplicitCastExpr *ExCast=dyn_cast_or_null<ExplicitCastExpr>(CastE))
     T = ExCast->getTypeAsWritten();
-  
+
   StmtNodeBuilder Bldr(dstPreStmt, Dst, *currBldrCtx);
   for (ExplodedNodeSet::iterator I = dstPreStmt.begin(), E = dstPreStmt.end();
        I != E; ++I) {
-    
+
     Pred = *I;
     ProgramStateRef state = Pred->getState();
     const LocationContext *LCtx = Pred->getLocationContext();
@@ -316,8 +338,8 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
       case CK_IntegralComplexToFloatingComplex:
       case CK_CPointerToObjCPointerCast:
       case CK_BlockPointerToObjCPointerCast:
-      case CK_AnyPointerToBlockPointerCast:  
-      case CK_ObjCObjectLValueCast: 
+      case CK_AnyPointerToBlockPointerCast:
+      case CK_ObjCObjectLValueCast:
       case CK_ZeroToOCLEvent:
       case CK_LValueBitCast: {
         // Delegate to SValBuilder to process.
@@ -371,7 +393,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex,
               svalBuilder.conjureSymbolVal(nullptr, CastE, LCtx, resultType,
                                            currBldrCtx->blockCount());
             state = state->BindExpr(CastE, LCtx, NewSym);
-          } else 
+          } else
             // Else, bind to the derived region value.
             state = state->BindExpr(CastE, LCtx, val);
         }
@@ -417,7 +439,7 @@ void ExprEngine::VisitCompoundLiteralExpr(const CompoundLiteralExpr *CL,
 
   const Expr *Init = CL->getInitializer();
   SVal V = State->getSVal(CL->getInitializer(), LCtx);
-  
+
   if (isa<CXXConstructExpr>(Init)) {
     // No work needed. Just pass the value up to this expression.
   } else {
@@ -450,11 +472,11 @@ void ExprEngine::VisitDeclStmt(const DeclStmt *DS, ExplodedNode *Pred,
     Dst.insert(Pred);
     return;
   }
-  
+
   // FIXME: all pre/post visits should eventually be handled by ::Visit().
   ExplodedNodeSet dstPreVisit;
   getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, DS, *this);
-  
+
   ExplodedNodeSet dstEvaluated;
   StmtNodeBuilder B(dstPreVisit, dstEvaluated, *currBldrCtx);
   for (ExplodedNodeSet::iterator I = dstPreVisit.begin(), E = dstPreVisit.end();
@@ -470,7 +492,10 @@ void ExprEngine::VisitDeclStmt(const DeclStmt *DS, ExplodedNode *Pred,
       ExplodedNode *UpdatedN = N;
       SVal InitVal = state->getSVal(InitEx, LC);
 
-      if (isa<CXXConstructExpr>(InitEx->IgnoreImplicit())) {
+      assert(DS->isSingleDecl());
+      if (auto *CtorExpr = findDirectConstructorForCurrentCFGElement()) {
+        assert(InitEx->IgnoreImplicit() == CtorExpr);
+        (void)CtorExpr;
         // We constructed the object directly in the variable.
         // No need to bind anything.
         B.generateNode(DS, UpdatedN, state);
@@ -485,7 +510,7 @@ void ExprEngine::VisitDeclStmt(const DeclStmt *DS, ExplodedNode *Pred,
             assert(InitVal.getAs<nonloc::LazyCompoundVal>());
           }
         }
-        
+
         // Recover some path-sensitivity if a scalar value evaluated to
         // UnknownVal.
         if (InitVal.isUnknown()) {
@@ -596,7 +621,7 @@ void ExprEngine::VisitInitListExpr(const InitListExpr *IE,
       (T->isArrayType() || T->isRecordType() || T->isVectorType() ||
        T->isAnyComplexType())) {
     llvm::ImmutableList<SVal> vals = getBasicVals().getEmptySValList();
-    
+
     // Handle base case where the initializer has no elements.
     // e.g: static int* myArray[] = {};
     if (NumInitElements == 0) {
@@ -604,13 +629,13 @@ void ExprEngine::VisitInitListExpr(const InitListExpr *IE,
       B.generateNode(IE, Pred, state->BindExpr(IE, LCtx, V));
       return;
     }
-    
+
     for (InitListExpr::const_reverse_iterator it = IE->rbegin(),
          ei = IE->rend(); it != ei; ++it) {
       SVal V = state->getSVal(cast<Expr>(*it), LCtx);
       vals = getBasicVals().consVals(V, vals);
     }
-    
+
     B.generateNode(IE, Pred,
                    state->BindExpr(IE, LCtx,
                                    svalBuilder.makeCompoundVal(T, vals)));
@@ -632,7 +657,7 @@ void ExprEngine::VisitInitListExpr(const InitListExpr *IE,
 }
 
 void ExprEngine::VisitGuardedExpr(const Expr *Ex,
-                                  const Expr *L, 
+                                  const Expr *L,
                                   const Expr *R,
                                   ExplodedNode *Pred,
                                   ExplodedNodeSet &Dst) {
@@ -663,9 +688,7 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
   bool hasValue = false;
   SVal V;
 
-  for (CFGBlock::const_reverse_iterator I = SrcBlock->rbegin(),
-                                        E = SrcBlock->rend(); I != E; ++I) {
-    CFGElement CE = *I;
+  for (CFGElement CE : llvm::reverse(*SrcBlock)) {
     if (Optional<CFGStmt> CS = CE.getAs<CFGStmt>()) {
       const Expr *ValEx = cast<Expr>(CS->getStmt());
       ValEx = ValEx->IgnoreParens();
@@ -694,7 +717,7 @@ void ExprEngine::VisitGuardedExpr(const Expr *Ex,
 }
 
 void ExprEngine::
-VisitOffsetOfExpr(const OffsetOfExpr *OOE, 
+VisitOffsetOfExpr(const OffsetOfExpr *OOE,
                   ExplodedNode *Pred, ExplodedNodeSet &Dst) {
   StmtNodeBuilder B(Pred, Dst, *currBldrCtx);
   APSInt IV;
@@ -730,7 +753,7 @@ VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex,
     if (Ex->getKind() == UETT_SizeOf) {
       if (!T->isIncompleteType() && !T->isConstantSizeType()) {
         assert(T->isVariableArrayType() && "Unknown non-constant-sized type.");
-        
+
         // FIXME: Add support for VLA type arguments and VLA expressions.
         // When that happens, we should probably refactor VLASizeChecker's code.
         continue;
@@ -741,10 +764,10 @@ VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex,
         continue;
       }
     }
-    
+
     APSInt Value = Ex->EvaluateKnownConstInt(getContext());
     CharUnits amt = CharUnits::fromQuantity(Value.getZExtValue());
-    
+
     ProgramStateRef state = (*I)->getState();
     state = state->BindExpr(Ex, (*I)->getLocationContext(),
                             svalBuilder.makeIntVal(amt.getQuantity(),
@@ -755,7 +778,7 @@ VisitUnaryExprOrTypeTraitExpr(const UnaryExprOrTypeTraitExpr *Ex,
   getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this);
 }
 
-void ExprEngine::VisitUnaryOperator(const UnaryOperator* U, 
+void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
                                     ExplodedNode *Pred,
                                     ExplodedNodeSet &Dst) {
   // FIXME: Prechecks eventually go in ::Visit().
@@ -777,13 +800,13 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
     }
     case UO_Real: {
       const Expr *Ex = U->getSubExpr()->IgnoreParens();
-        
+
       // FIXME: We don't have complex SValues yet.
       if (Ex->getType()->isAnyComplexType()) {
         // Just report "Unknown."
         break;
       }
-        
+
       // For all other types, UO_Real is an identity operation.
       assert (U->getType() == Ex->getType());
       ProgramStateRef state = (*I)->getState();
@@ -792,8 +815,8 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
                                                state->getSVal(Ex, LCtx)));
       break;
     }
-      
-    case UO_Imag: {      
+
+    case UO_Imag: {
       const Expr *Ex = U->getSubExpr()->IgnoreParens();
       // FIXME: We don't have complex SValues yet.
       if (Ex->getType()->isAnyComplexType()) {
@@ -807,7 +830,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
       Bldr.generateNode(U, *I, state->BindExpr(U, LCtx, X));
       break;
     }
-      
+
     case UO_Plus:
       assert(!U->isGLValue());
       // FALL-THROUGH.
@@ -820,7 +843,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
       // Unary "+" is a no-op, similar to a parentheses.  We still have places
       // where it may be a block-level expression, so we need to
       // generate an extra node that just propagates the value of the
-      // subexpression.      
+      // subexpression.
       const Expr *Ex = U->getSubExpr()->IgnoreParens();
       ProgramStateRef state = (*I)->getState();
       const LocationContext *LCtx = (*I)->getLocationContext();
@@ -828,7 +851,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
                                                state->getSVal(Ex, LCtx)));
       break;
     }
-      
+
     case UO_LNot:
     case UO_Minus:
     case UO_Not: {
@@ -836,15 +859,15 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
       const Expr *Ex = U->getSubExpr()->IgnoreParens();
       ProgramStateRef state = (*I)->getState();
       const LocationContext *LCtx = (*I)->getLocationContext();
-        
+
       // Get the value of the subexpression.
       SVal V = state->getSVal(Ex, LCtx);
-        
+
       if (V.isUnknownOrUndef()) {
         Bldr.generateNode(U, *I, state->BindExpr(U, LCtx, V));
         break;
       }
-        
+
       switch (U->getOpcode()) {
         default:
           llvm_unreachable("Invalid Opcode.");
@@ -861,7 +884,7 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
           //
           //  Note: technically we do "E == 0", but this is the same in the
           //    transfer functions as "0 == E".
-          SVal Result;          
+          SVal Result;
           if (Optional<Loc> LV = V.getAs<Loc>()) {
             Loc X = svalBuilder.makeNull();
             Result = evalBinOp(state, BO_EQ, *LV, X, U->getType());
@@ -874,8 +897,8 @@ void ExprEngine::VisitUnaryOperator(const UnaryOperator* U,
             Result = evalBinOp(state, BO_EQ, V.castAs<NonLoc>(), X,
                                U->getType());
           }
-          
-          state = state->BindExpr(U, LCtx, Result);          
+
+          state = state->BindExpr(U, LCtx, Result);
           break;
       }
       Bldr.generateNode(U, *I, state);
@@ -893,81 +916,81 @@ void ExprEngine::VisitIncrementDecrementOperator(const UnaryOperator* U,
   // Handle ++ and -- (both pre- and post-increment).
   assert (U->isIncrementDecrementOp());
   const Expr *Ex = U->getSubExpr()->IgnoreParens();
-  
+
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef state = Pred->getState();
   SVal loc = state->getSVal(Ex, LCtx);
-  
+
   // Perform a load.
   ExplodedNodeSet Tmp;
   evalLoad(Tmp, U, Ex, Pred, state, loc);
-  
+
   ExplodedNodeSet Dst2;
   StmtNodeBuilder Bldr(Tmp, Dst2, *currBldrCtx);
   for (ExplodedNodeSet::iterator I=Tmp.begin(), E=Tmp.end();I!=E;++I) {
-    
+
     state = (*I)->getState();
     assert(LCtx == (*I)->getLocationContext());
     SVal V2_untested = state->getSVal(Ex, LCtx);
-    
+
     // Propagate unknown and undefined values.
     if (V2_untested.isUnknownOrUndef()) {
       Bldr.generateNode(U, *I, state->BindExpr(U, LCtx, V2_untested));
       continue;
     }
     DefinedSVal V2 = V2_untested.castAs<DefinedSVal>();
-    
+
     // Handle all other values.
     BinaryOperator::Opcode Op = U->isIncrementOp() ? BO_Add : BO_Sub;
-    
+
     // If the UnaryOperator has non-location type, use its type to create the
     // constant value. If the UnaryOperator has location type, create the
     // constant with int type and pointer width.
     SVal RHS;
-    
+
     if (U->getType()->isAnyPointerType())
       RHS = svalBuilder.makeArrayIndex(1);
     else if (U->getType()->isIntegralOrEnumerationType())
       RHS = svalBuilder.makeIntVal(1, U->getType());
     else
       RHS = UnknownVal();
-    
+
     SVal Result = evalBinOp(state, Op, V2, RHS, U->getType());
-    
+
     // Conjure a new symbol if necessary to recover precision.
     if (Result.isUnknown()){
       DefinedOrUnknownSVal SymVal =
         svalBuilder.conjureSymbolVal(nullptr, Ex, LCtx,
                                      currBldrCtx->blockCount());
       Result = SymVal;
-      
+
       // If the value is a location, ++/-- should always preserve
       // non-nullness.  Check if the original value was non-null, and if so
       // propagate that constraint.
       if (Loc::isLocType(U->getType())) {
         DefinedOrUnknownSVal Constraint =
         svalBuilder.evalEQ(state, V2,svalBuilder.makeZeroVal(U->getType()));
-        
+
         if (!state->assume(Constraint, true)) {
           // It isn't feasible for the original value to be null.
           // Propagate this constraint.
           Constraint = svalBuilder.evalEQ(state, SymVal,
                                        svalBuilder.makeZeroVal(U->getType()));
-          
-          
+
+
           state = state->assume(Constraint, false);
           assert(state);
         }
       }
     }
-    
+
     // Since the lvalue-to-rvalue conversion is explicit in the AST,
     // we bind an l-value if the operator is prefix and an lvalue (in C++).
     if (U->isGLValue())
       state = state->BindExpr(U, LCtx, loc);
     else
       state = state->BindExpr(U, LCtx, U->isPostfix() ? V2 : Result);
-    
+
     // Perform the store.
     Bldr.takeNodes(*I);
     ExplodedNodeSet Dst3;
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
index 2a766218aaeb..556e2239abfb 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCXX.cpp
@@ -103,49 +103,32 @@ static SVal makeZeroElementRegion(ProgramStateRef State, SVal LValue,
 }
 
 
-static const MemRegion *getRegionForConstructedObject(
-    const CXXConstructExpr *CE, ExplodedNode *Pred, ExprEngine &Eng,
-    unsigned int CurrStmtIdx) {
+const MemRegion *
+ExprEngine::getRegionForConstructedObject(const CXXConstructExpr *CE,
+                                          ExplodedNode *Pred) {
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
-  const NodeBuilderContext &CurrBldrCtx = Eng.getBuilderContext();
 
   // See if we're constructing an existing region by looking at the next
   // element in the CFG.
-  const CFGBlock *B = CurrBldrCtx.getBlock();
-  unsigned int NextStmtIdx = CurrStmtIdx + 1;
-  if (NextStmtIdx < B->size()) {
-    CFGElement Next = (*B)[NextStmtIdx];
 
-    // Is this a destructor? If so, we might be in the middle of an assignment
-    // to a local or member: look ahead one more element to see what we find.
-    while (Next.getAs<CFGImplicitDtor>() && NextStmtIdx + 1 < B->size()) {
-      ++NextStmtIdx;
-      Next = (*B)[NextStmtIdx];
-    }
-
-    // Is this a constructor for a local variable?
-    if (Optional<CFGStmt> StmtElem = Next.getAs<CFGStmt>()) {
-      if (const DeclStmt *DS = dyn_cast<DeclStmt>(StmtElem->getStmt())) {
-        if (const VarDecl *Var = dyn_cast<VarDecl>(DS->getSingleDecl())) {
-          if (Var->getInit() && Var->getInit()->IgnoreImplicit() == CE) {
-            SVal LValue = State->getLValue(Var, LCtx);
-            QualType Ty = Var->getType();
-            LValue = makeZeroElementRegion(State, LValue, Ty);
-            return LValue.getAsRegion();
-          }
+  if (auto Elem = findElementDirectlyInitializedByCurrentConstructor()) {
+    if (Optional<CFGStmt> StmtElem = Elem->getAs<CFGStmt>()) {
+      auto *DS = cast<DeclStmt>(StmtElem->getStmt());
+      if (const auto *Var = dyn_cast<VarDecl>(DS->getSingleDecl())) {
+        if (Var->getInit() && Var->getInit()->IgnoreImplicit() == CE) {
+          SVal LValue = State->getLValue(Var, LCtx);
+          QualType Ty = Var->getType();
+          LValue = makeZeroElementRegion(State, LValue, Ty);
+          return LValue.getAsRegion();
         }
       }
-    }
-
-    // Is this a constructor for a member?
-    if (Optional<CFGInitializer> InitElem = Next.getAs<CFGInitializer>()) {
+    } else if (Optional<CFGInitializer> InitElem = Elem->getAs<CFGInitializer>()) {
       const CXXCtorInitializer *Init = InitElem->getInitializer();
       assert(Init->isAnyMemberInitializer());
-
       const CXXMethodDecl *CurCtor = cast<CXXMethodDecl>(LCtx->getDecl());
-      Loc ThisPtr = Eng.getSValBuilder().getCXXThis(CurCtor,
-          LCtx->getCurrentStackFrame());
+      Loc ThisPtr =
+      getSValBuilder().getCXXThis(CurCtor, LCtx->getCurrentStackFrame());
       SVal ThisVal = State->getSVal(ThisPtr);
 
       const ValueDecl *Field;
@@ -167,13 +150,86 @@ static const MemRegion *getRegionForConstructedObject(
     // Don't forget to update the pre-constructor initialization code in
     // ExprEngine::VisitCXXConstructExpr.
   }
-
   // If we couldn't find an existing region to construct into, assume we're
   // constructing a temporary.
-  MemRegionManager &MRMgr = Eng.getSValBuilder().getRegionManager();
+  MemRegionManager &MRMgr = getSValBuilder().getRegionManager();
   return MRMgr.getCXXTempObjectRegion(CE, LCtx);
 }
 
+/// Returns true if the initializer for \Elem can be a direct
+/// constructor.
+static bool canHaveDirectConstructor(CFGElement Elem){
+  // DeclStmts and CXXCtorInitializers for fields can be directly constructed.
+
+  if (Optional<CFGStmt> StmtElem = Elem.getAs<CFGStmt>()) {
+    if (isa<DeclStmt>(StmtElem->getStmt())) {
+      return true;
+    }
+  }
+
+  if (Elem.getKind() == CFGElement::Initializer) {
+    return true;
+  }
+
+  return false;
+}
+
+Optional<CFGElement>
+ExprEngine::findElementDirectlyInitializedByCurrentConstructor() {
+  const NodeBuilderContext &CurrBldrCtx = getBuilderContext();
+  // See if we're constructing an existing region by looking at the next
+  // element in the CFG.
+  const CFGBlock *B = CurrBldrCtx.getBlock();
+  assert(isa<CXXConstructExpr>(((*B)[currStmtIdx]).castAs<CFGStmt>().getStmt()));
+  unsigned int NextStmtIdx = currStmtIdx + 1;
+  if (NextStmtIdx >= B->size())
+    return None;
+
+  CFGElement Next = (*B)[NextStmtIdx];
+
+  // Is this a destructor? If so, we might be in the middle of an assignment
+  // to a local or member: look ahead one more element to see what we find.
+  while (Next.getAs<CFGImplicitDtor>() && NextStmtIdx + 1 < B->size()) {
+    ++NextStmtIdx;
+    Next = (*B)[NextStmtIdx];
+  }
+
+  if (canHaveDirectConstructor(Next))
+    return Next;
+
+  return None;
+}
+
+const CXXConstructExpr *
+ExprEngine::findDirectConstructorForCurrentCFGElement() {
+  // Go backward in the CFG to see if the previous element (ignoring
+  // destructors) was a CXXConstructExpr. If so, that constructor
+  // was constructed directly into an existing region.
+  // This process is essentially the inverse of that performed in
+  // findElementDirectlyInitializedByCurrentConstructor().
+  if (currStmtIdx == 0)
+    return nullptr;
+
+  const CFGBlock *B = getBuilderContext().getBlock();
+  assert(canHaveDirectConstructor((*B)[currStmtIdx]));
+
+  unsigned int PreviousStmtIdx = currStmtIdx - 1;
+  CFGElement Previous = (*B)[PreviousStmtIdx];
+
+  while (Previous.getAs<CFGImplicitDtor>() && PreviousStmtIdx > 0) {
+    --PreviousStmtIdx;
+    Previous = (*B)[PreviousStmtIdx];
+  }
+
+  if (Optional<CFGStmt> PrevStmtElem = Previous.getAs<CFGStmt>()) {
+    if (auto *CtorExpr = dyn_cast<CXXConstructExpr>(PrevStmtElem->getStmt())) {
+      return CtorExpr;
+    }
+  }
+
+  return nullptr;
+}
+
 void ExprEngine::VisitCXXConstructExpr(const CXXConstructExpr *CE,
                                        ExplodedNode *Pred,
                                        ExplodedNodeSet &destNodes) {
@@ -188,7 +244,7 @@ void ExprEngine::VisitCXXConstructExpr(const CXXConstructExpr *CE,
 
   switch (CE->getConstructionKind()) {
   case CXXConstructExpr::CK_Complete: {
-    Target = getRegionForConstructedObject(CE, Pred, *this, currStmtIdx);
+    Target = getRegionForConstructedObject(CE, Pred);
     break;
   }
   case CXXConstructExpr::CK_VirtualBase:
@@ -300,7 +356,7 @@ void ExprEngine::VisitCXXDestructor(QualType ObjectType,
                                     const MemRegion *Dest,
                                     const Stmt *S,
                                     bool IsBaseDtor,
-                                    ExplodedNode *Pred, 
+                                    ExplodedNode *Pred,
                                     ExplodedNodeSet &Dst) {
   const LocationContext *LCtx = Pred->getLocationContext();
   ProgramStateRef State = Pred->getState();
@@ -373,7 +429,7 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred,
   // Also, we need to decide how allocators actually work -- they're not
   // really part of the CXXNewExpr because they happen BEFORE the
   // CXXConstructExpr subexpression. See PR12014 for some discussion.
-  
+
   unsigned blockCount = currBldrCtx->blockCount();
   const LocationContext *LCtx = Pred->getLocationContext();
   DefinedOrUnknownSVal symVal = UnknownVal();
@@ -392,8 +448,8 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred,
       IsStandardGlobalOpNewFunction = (FD->getNumParams() == 1);
   }
 
-  // We assume all standard global 'operator new' functions allocate memory in 
-  // heap. We realize this is an approximation that might not correctly model 
+  // We assume all standard global 'operator new' functions allocate memory in
+  // heap. We realize this is an approximation that might not correctly model
   // a custom global allocator.
   if (IsStandardGlobalOpNewFunction)
     symVal = svalBuilder.getConjuredHeapSymbolVal(CNE, LCtx, blockCount);
@@ -472,7 +528,7 @@ void ExprEngine::VisitCXXNewExpr(const CXXNewExpr *CNE, ExplodedNode *Pred,
   }
 }
 
-void ExprEngine::VisitCXXDeleteExpr(const CXXDeleteExpr *CDE, 
+void ExprEngine::VisitCXXDeleteExpr(const CXXDeleteExpr *CDE,
                                     ExplodedNode *Pred, ExplodedNodeSet &Dst) {
   StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
   ProgramStateRef state = Pred->getState();
@@ -513,3 +569,55 @@ void ExprEngine::VisitCXXThisExpr(const CXXThisExpr *TE, ExplodedNode *Pred,
   SVal V = state->getSVal(loc::MemRegionVal(R));
   Bldr.generateNode(TE, Pred, state->BindExpr(TE, LCtx, V));
 }
+
+void ExprEngine::VisitLambdaExpr(const LambdaExpr *LE, ExplodedNode *Pred,
+                                 ExplodedNodeSet &Dst) {
+  const LocationContext *LocCtxt = Pred->getLocationContext();
+
+  // Get the region of the lambda itself.
+  const MemRegion *R = svalBuilder.getRegionManager().getCXXTempObjectRegion(
+      LE, LocCtxt);
+  SVal V = loc::MemRegionVal(R);
+  
+  ProgramStateRef State = Pred->getState();
+  
+  // If we created a new MemRegion for the lambda, we should explicitly bind
+  // the captures.
+  CXXRecordDecl::field_iterator CurField = LE->getLambdaClass()->field_begin();
+  for (LambdaExpr::const_capture_init_iterator i = LE->capture_init_begin(),
+                                               e = LE->capture_init_end();
+       i != e; ++i, ++CurField) {
+    FieldDecl *FieldForCapture = *CurField;
+    SVal FieldLoc = State->getLValue(FieldForCapture, V);
+
+    SVal InitVal;
+    if (!FieldForCapture->hasCapturedVLAType()) {
+      Expr *InitExpr = *i;
+      assert(InitExpr && "Capture missing initialization expression");
+      InitVal = State->getSVal(InitExpr, LocCtxt);
+    } else {
+      // The field stores the length of a captured variable-length array.
+      // These captures don't have initialization expressions; instead we
+      // get the length from the VLAType size expression.
+      Expr *SizeExpr = FieldForCapture->getCapturedVLAType()->getSizeExpr();
+      InitVal = State->getSVal(SizeExpr, LocCtxt);
+    }
+
+    State = State->bindLoc(FieldLoc, InitVal);
+  }
+
+  // Decay the Loc into an RValue, because there might be a
+  // MaterializeTemporaryExpr node above this one which expects the bound value
+  // to be an RValue.
+  SVal LambdaRVal = State->getSVal(R);
+
+  ExplodedNodeSet Tmp;
+  StmtNodeBuilder Bldr(Pred, Tmp, *currBldrCtx);
+  // FIXME: is this the right program point kind?
+  Bldr.generateNode(LE, Pred,
+                    State->BindExpr(LE, LocCtxt, LambdaRVal),
+                    nullptr, ProgramPoint::PostLValueKind);
+
+  // FIXME: Move all post/pre visits to ::Visit().
+  getCheckerManager().runCheckersForPostStmt(Dst, Tmp, LE, *this);
+}
diff --git a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
index 3f608ba79ebc..74cc8d2ccbc5 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineCallAndReturn.cpp
@@ -44,19 +44,19 @@ void ExprEngine::processCallEnter(CallEnter CE, ExplodedNode *Pred) {
 
   const CFG *CalleeCFG = calleeCtx->getCFG();
   const CFGBlock *Entry = &(CalleeCFG->getEntry());
-  
+
   // Validate the CFG.
   assert(Entry->empty());
   assert(Entry->succ_size() == 1);
-  
+
   // Get the solitary successor.
   const CFGBlock *Succ = *(Entry->succ_begin());
-  
+
   // Construct an edge representing the starting location in the callee.
   BlockEdge Loc(Entry, Succ, calleeCtx);
 
   ProgramStateRef state = Pred->getState();
-  
+
   // Construct a new node and add it to the worklist.
   bool isNew;
   ExplodedNode *Node = G.getNode(Loc, state, false, &isNew);
@@ -207,8 +207,8 @@ static bool isTemporaryPRValue(const CXXConstructExpr *E, SVal V) {
   return isa<CXXTempObjectRegion>(MR);
 }
 
-/// The call exit is simulated with a sequence of nodes, which occur between 
-/// CallExitBegin and CallExitEnd. The following operations occur between the 
+/// The call exit is simulated with a sequence of nodes, which occur between
+/// CallExitBegin and CallExitEnd. The following operations occur between the
 /// two program points:
 /// 1. CallExitBegin (triggers the start of call exit sequence)
 /// 2. Bind the return value
@@ -220,12 +220,12 @@ void ExprEngine::processCallExit(ExplodedNode *CEBNode) {
   PrettyStackTraceLocationContext CrashInfo(CEBNode->getLocationContext());
   const StackFrameContext *calleeCtx =
       CEBNode->getLocationContext()->getCurrentStackFrame();
-  
+
   // The parent context might not be a stack frame, so make sure we
   // look up the first enclosing stack frame.
   const StackFrameContext *callerCtx =
     calleeCtx->getParent()->getCurrentStackFrame();
-  
+
   const Stmt *CE = calleeCtx->getCallSite();
   ProgramStateRef state = CEBNode->getState();
   // Find the last statement in the function and the corresponding basic block.
@@ -421,7 +421,8 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
   const LocationContext *CurLC = Pred->getLocationContext();
   const StackFrameContext *CallerSFC = CurLC->getCurrentStackFrame();
   const LocationContext *ParentOfCallee = CallerSFC;
-  if (Call.getKind() == CE_Block) {
+  if (Call.getKind() == CE_Block &&
+      !cast<BlockCall>(Call).isConversionFromLambda()) {
     const BlockDataRegion *BR = cast<BlockCall>(Call).getBlockRegion();
     assert(BR && "If we have the block definition we should have its region");
     AnalysisDeclContext *BlockCtx = AMgr.getAnalysisDeclContext(D);
@@ -429,7 +430,7 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
                                                          cast<BlockDecl>(D),
                                                          BR);
   }
-  
+
   // This may be NULL, but that's fine.
   const Expr *CallE = Call.getOriginExpr();
 
@@ -439,8 +440,8 @@ bool ExprEngine::inlineCall(const CallEvent &Call, const Decl *D,
     CalleeADC->getStackFrame(ParentOfCallee, CallE,
                              currBldrCtx->getBlock(),
                              currStmtIdx);
-  
-    
+
+
   CallEnter Loc(CallE, CalleeSFC, CurLC);
 
   // Construct a new state which contains the mapping from actual to
@@ -690,9 +691,11 @@ static bool hasMember(const ASTContext &Ctx, const CXXRecordDecl *RD,
     return true;
 
   CXXBasePaths Paths(false, false, false);
-  if (RD->lookupInBases(&CXXRecordDecl::FindOrdinaryMember,
-                        DeclName.getAsOpaquePtr(),
-                        Paths))
+  if (RD->lookupInBases(
+          [DeclName](const CXXBaseSpecifier *Specifier, CXXBasePath &Path) {
+            return CXXRecordDecl::FindOrdinaryMember(Specifier, Path, DeclName);
+          },
+          Paths))
     return true;
 
   return false;
@@ -767,7 +770,7 @@ static bool mayInlineDecl(AnalysisDeclContext *CalleeADC,
         if (!Ctx.getSourceManager().isInMainFile(FD->getLocation()))
           if (isContainerMethod(Ctx, FD))
             return false;
-            
+
       // Conditionally control the inlining of the destructor of C++ shared_ptr.
       // We don't currently do a good job modeling shared_ptr because we can't
       // see the reference count, so treating as opaque is probably the best
@@ -868,7 +871,8 @@ bool ExprEngine::shouldInlineCall(const CallEvent &Call, const Decl *D,
   // Do not inline large functions too many times.
   if ((Engine.FunctionSummaries->getNumTimesInlined(D) >
        Opts.getMaxTimesInlineLarge()) &&
-      CalleeCFG->getNumBlockIDs() > 13) {
+       CalleeCFG->getNumBlockIDs() >=
+       Opts.getMinCFGSizeTreatFunctionsAsLarge()) {
     NumReachedInlineCountMax++;
     return false;
   }
@@ -990,12 +994,12 @@ void ExprEngine::BifurcateCall(const MemRegion *BifurReg,
 
 void ExprEngine::VisitReturnStmt(const ReturnStmt *RS, ExplodedNode *Pred,
                                  ExplodedNodeSet &Dst) {
-  
+
   ExplodedNodeSet dstPreVisit;
   getCheckerManager().runCheckersForPreStmt(dstPreVisit, Pred, RS, *this);
 
   StmtNodeBuilder B(dstPreVisit, Dst, *currBldrCtx);
-  
+
   if (RS->getRetValue()) {
     for (ExplodedNodeSet::iterator it = dstPreVisit.begin(),
                                   ei = dstPreVisit.end(); it != ei; ++it) {
diff --git a/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp b/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp
index a6611e050dc9..92c5fe6b6f1a 100644
--- a/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp
+++ b/lib/StaticAnalyzer/Core/ExprEngineObjC.cpp
@@ -19,18 +19,18 @@
 using namespace clang;
 using namespace ento;
 
-void ExprEngine::VisitLvalObjCIvarRefExpr(const ObjCIvarRefExpr *Ex, 
+void ExprEngine::VisitLvalObjCIvarRefExpr(const ObjCIvarRefExpr *Ex,
                                           ExplodedNode *Pred,
                                           ExplodedNodeSet &Dst) {
   ProgramStateRef state = Pred->getState();
   const LocationContext *LCtx = Pred->getLocationContext();
   SVal baseVal = state->getSVal(Ex->getBase(), LCtx);
   SVal location = state->getLValue(Ex->getDecl(), baseVal);
-  
+
   ExplodedNodeSet dstIvar;
   StmtNodeBuilder Bldr(Pred, dstIvar, *currBldrCtx);
   Bldr.generateNode(Ex, Pred, state->BindExpr(Ex, LCtx, location));
-  
+
   // Perform the post-condition check of the ObjCIvarRefExpr and store
   // the created nodes in 'Dst'.
   getCheckerManager().runCheckersForPostStmt(Dst, dstIvar, Ex, *this);
@@ -45,7 +45,7 @@ void ExprEngine::VisitObjCAtSynchronizedStmt(const ObjCAtSynchronizedStmt *S,
 void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S,
                                             ExplodedNode *Pred,
                                             ExplodedNodeSet &Dst) {
-  
+
   // ObjCForCollectionStmts are processed in two places.  This method
   // handles the case where an ObjCForCollectionStmt* occurs as one of the
   // statements within a basic block.  This transfer function does two things:
@@ -74,7 +74,7 @@ void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S,
   const Stmt *elem = S->getElement();
   ProgramStateRef state = Pred->getState();
   SVal elementV;
-  
+
   if (const DeclStmt *DS = dyn_cast<DeclStmt>(elem)) {
     const VarDecl *elemD = cast<VarDecl>(DS->getSingleDecl());
     assert(elemD->getInit() == nullptr);
@@ -83,7 +83,7 @@ void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S,
   else {
     elementV = state->getSVal(elem, Pred->getLocationContext());
   }
-  
+
   ExplodedNodeSet dstLocation;
   evalLocation(dstLocation, S, elem, Pred, state, elementV, nullptr, false);
 
@@ -95,17 +95,17 @@ void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S,
     Pred = *NI;
     ProgramStateRef state = Pred->getState();
     const LocationContext *LCtx = Pred->getLocationContext();
-    
+
     // Handle the case where the container still has elements.
     SVal TrueV = svalBuilder.makeTruthVal(1);
     ProgramStateRef hasElems = state->BindExpr(S, LCtx, TrueV);
-    
+
     // Handle the case where the container has no elements.
     SVal FalseV = svalBuilder.makeTruthVal(0);
     ProgramStateRef noElems = state->BindExpr(S, LCtx, FalseV);
 
     if (Optional<loc::MemRegionVal> MV = elementV.getAs<loc::MemRegionVal>())
-      if (const TypedValueRegion *R = 
+      if (const TypedValueRegion *R =
           dyn_cast<TypedValueRegion>(MV->getRegion())) {
         // FIXME: The proper thing to do is to really iterate over the
         //  container.  We will do this with dispatch logic to the store.
@@ -116,12 +116,12 @@ void ExprEngine::VisitObjCForCollectionStmt(const ObjCForCollectionStmt *S,
                                              currBldrCtx->blockCount());
         SVal V = svalBuilder.makeLoc(Sym);
         hasElems = hasElems->bindLoc(elementV, V);
-        
+
         // Bind the location to 'nil' on the false branch.
         SVal nilV = svalBuilder.makeIntVal(0, T);
         noElems = noElems->bindLoc(elementV, nilV);
       }
-    
+
     // Create the new nodes.
     Bldr.generateNode(S, Pred, hasElems);
     Bldr.generateNode(S, Pred, noElems);
@@ -139,6 +139,76 @@ void ExprEngine::VisitObjCMessage(const ObjCMessageExpr *ME,
   CallEventRef<ObjCMethodCall> Msg =
     CEMgr.getObjCMethodCall(ME, Pred->getState(), Pred->getLocationContext());
 
+  // There are three cases for the receiver:
+  //   (1) it is definitely nil,
+  //   (2) it is definitely non-nil, and
+  //   (3) we don't know.
+  //
+  // If the receiver is definitely nil, we skip the pre/post callbacks and
+  // instead call the ObjCMessageNil callbacks and return.
+  //
+  // If the receiver is definitely non-nil, we call the pre- callbacks,
+  // evaluate the call, and call the post- callbacks.
+  //
+  // If we don't know, we drop the potential nil flow and instead
+  // continue from the assumed non-nil state as in (2). This approach
+  // intentionally drops coverage in order to prevent false alarms
+  // in the following scenario:
+  //
+  // id result = [o someMethod]
+  // if (result) {
+  //   if (!o) {
+  //     // <-- This program point should be unreachable because if o is nil
+  //     // it must the case that result is nil as well.
+  //   }
+  // }
+  //
+  // We could avoid dropping coverage by performing an explicit case split
+  // on each method call -- but this would get very expensive. An alternative
+  // would be to introduce lazy constraints.
+  // FIXME: This ignores many potential bugs (<rdar://problem/11733396>).
+  // Revisit once we have lazier constraints.
+  if (Msg->isInstanceMessage()) {
+    SVal recVal = Msg->getReceiverSVal();
+    if (!recVal.isUndef()) {
+      // Bifurcate the state into nil and non-nil ones.
+      DefinedOrUnknownSVal receiverVal =
+          recVal.castAs<DefinedOrUnknownSVal>();
+      ProgramStateRef State = Pred->getState();
+
+      ProgramStateRef notNilState, nilState;
+      std::tie(notNilState, nilState) = State->assume(receiverVal);
+
+      // Receiver is definitely nil, so run ObjCMessageNil callbacks and return.
+      if (nilState && !notNilState) {
+        StmtNodeBuilder Bldr(Pred, Dst, *currBldrCtx);
+        bool HasTag = Pred->getLocation().getTag();
+        Pred = Bldr.generateNode(ME, Pred, nilState, nullptr,
+                                 ProgramPoint::PreStmtKind);
+        assert((Pred || HasTag) && "Should have cached out already!");
+        (void)HasTag;
+        if (!Pred)
+          return;
+        getCheckerManager().runCheckersForObjCMessageNil(Dst, Pred,
+                                                         *Msg, *this);
+        return;
+      }
+
+      ExplodedNodeSet dstNonNil;
+      StmtNodeBuilder Bldr(Pred, dstNonNil, *currBldrCtx);
+      // Generate a transition to the non-nil state, dropping any potential
+      // nil flow.
+      if (notNilState != State) {
+        bool HasTag = Pred->getLocation().getTag();
+        Pred = Bldr.generateNode(ME, Pred, notNilState);
+        assert((Pred || HasTag) && "Should have cached out already!");
+        (void)HasTag;
+        if (!Pred)
+          return;
+      }
+    }
+  }
+
   // Handle the previsits checks.
   ExplodedNodeSet dstPrevisit;
   getCheckerManager().runCheckersForPreObjCMessage(dstPrevisit, Pred,
@@ -156,39 +226,16 @@ void ExprEngine::VisitObjCMessage(const ObjCMessageExpr *ME,
     ExplodedNode *Pred = *DI;
     ProgramStateRef State = Pred->getState();
     CallEventRef<ObjCMethodCall> UpdatedMsg = Msg.cloneWithState(State);
-    
+
     if (UpdatedMsg->isInstanceMessage()) {
       SVal recVal = UpdatedMsg->getReceiverSVal();
       if (!recVal.isUndef()) {
-        // Bifurcate the state into nil and non-nil ones.
-        DefinedOrUnknownSVal receiverVal =
-            recVal.castAs<DefinedOrUnknownSVal>();
-
-        ProgramStateRef notNilState, nilState;
-        std::tie(notNilState, nilState) = State->assume(receiverVal);
-        
-        // There are three cases: can be nil or non-nil, must be nil, must be
-        // non-nil. We ignore must be nil, and merge the rest two into non-nil.
-        // FIXME: This ignores many potential bugs (<rdar://problem/11733396>).
-        // Revisit once we have lazier constraints.
-        if (nilState && !notNilState) {
-          continue;
-        }
-        
-        // Check if the "raise" message was sent.
-        assert(notNilState);
         if (ObjCNoRet.isImplicitNoReturn(ME)) {
           // If we raise an exception, for now treat it as a sink.
           // Eventually we will want to handle exceptions properly.
           Bldr.generateSink(ME, Pred, State);
           continue;
         }
-        
-        // Generate a transition to non-Nil state.
-        if (notNilState != State) {
-          Pred = Bldr.generateNode(ME, Pred, notNilState);
-          assert(Pred && "Should have cached out already!");
-        }
       }
     } else {
       // Check for special class methods that are known to not return
@@ -203,7 +250,7 @@ void ExprEngine::VisitObjCMessage(const ObjCMessageExpr *ME,
 
     defaultEvalCall(Bldr, Pred, *UpdatedMsg);
   }
-  
+
   ExplodedNodeSet dstPostvisit;
   getCheckerManager().runCheckersForPostCall(dstPostvisit, dstEval,
                                              *Msg, *this);
diff --git a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
index cfcf7c6a990b..b3edb8569bd6 100644
--- a/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
+++ b/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Decl.h"
 #include "clang/Basic/FileManager.h"
@@ -22,6 +21,8 @@
 #include "clang/Rewrite/Core/Rewriter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -122,11 +123,11 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
 
   // The path as already been prechecked that all parts of the path are
   // from the same file and that it is non-empty.
-  const SourceManager &SMgr = (*path.begin())->getLocation().getManager();
+  const SourceManager &SMgr = path.front()->getLocation().getManager();
   assert(!path.empty());
   FileID FID =
-    (*path.begin())->getLocation().asLocation().getExpansionLoc().getFileID();
-  assert(!FID.isInvalid());
+    path.front()->getLocation().asLocation().getExpansionLoc().getFileID();
+  assert(FID.isValid());
 
   // Create a new rewriter to generate HTML.
   Rewriter R(const_cast<SourceManager&>(SMgr), PP.getLangOpts());
@@ -143,7 +144,7 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
           // Retrieve the relative position of the declaration which will be used
           // for the file name
           FullSourceLoc L(
-              SMgr.getExpansionLoc((*path.rbegin())->getLocation().asLocation()),
+              SMgr.getExpansionLoc(path.back()->getLocation().asLocation()),
               SMgr);
           FullSourceLoc FunL(SMgr.getExpansionLoc(Body->getLocStart()), SMgr);
           offsetDecl = L.getExpansionLineNumber() - FunL.getExpansionLineNumber();
@@ -187,8 +188,8 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
     DirName += '/';
   }
 
-  int LineNumber = (*path.rbegin())->getLocation().asLocation().getExpansionLineNumber();
-  int ColumnNumber = (*path.rbegin())->getLocation().asLocation().getExpansionColumnNumber();
+  int LineNumber = path.back()->getLocation().asLocation().getExpansionLineNumber();
+  int ColumnNumber = path.back()->getLocation().asLocation().getExpansionColumnNumber();
 
   // Add the name of the file as an <h1> tag.
 
@@ -236,6 +237,13 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
     if (!BugType.empty())
       os << "\n<!-- BUGTYPE " << BugType << " -->\n";
 
+    PathDiagnosticLocation UPDLoc = D.getUniqueingLoc();
+    FullSourceLoc L(SMgr.getExpansionLoc(UPDLoc.isValid()
+                                             ? UPDLoc.asLocation()
+                                             : D.getLocation().asLocation()),
+                    SMgr);
+    const Decl *DeclWithIssue = D.getDeclWithIssue();
+
     StringRef BugCategory = D.getCategory();
     if (!BugCategory.empty())
       os << "\n<!-- BUGCATEGORY " << BugCategory << " -->\n";
@@ -246,6 +254,10 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
 
     os  << "\n<!-- FUNCTIONNAME " <<  declName << " -->\n";
 
+    os << "\n<!-- ISSUEHASHCONTENTOFLINEINCONTEXT "
+       << GetIssueHash(SMgr, L, D.getCheckName(), D.getBugType(), DeclWithIssue,
+                       PP.getLangOpts()) << " -->\n";
+
     os << "\n<!-- BUGLINE "
        << LineNumber
        << " -->\n";
@@ -281,7 +293,12 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
 
   if (!AnalyzerOpts.shouldWriteStableReportFilename()) {
       llvm::sys::path::append(Model, Directory, "report-%%%%%%.html");
-
+      if (std::error_code EC =
+          llvm::sys::fs::make_absolute(Model)) {
+          llvm::errs() << "warning: could not make '" << Model
+                       << "' absolute: " << EC.message() << '\n';
+        return;
+      }
       if (std::error_code EC =
           llvm::sys::fs::createUniqueFile(Model, FD, ResultPath)) {
           llvm::errs() << "warning: could not create file in '" << Directory
diff --git a/lib/StaticAnalyzer/Core/IssueHash.cpp b/lib/StaticAnalyzer/Core/IssueHash.cpp
new file mode 100644
index 000000000000..0a3af3dcc7e9
--- /dev/null
+++ b/lib/StaticAnalyzer/Core/IssueHash.cpp
@@ -0,0 +1,196 @@
+//===---------- IssueHash.cpp - Generate identification hashes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/Specifiers.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/Path.h"
+
+#include <functional>
+#include <sstream>
+#include <string>
+
+using namespace clang;
+
+// Get a string representation of the parts of the signature that can be 
+// overloaded on.
+static std::string GetSignature(const FunctionDecl *Target) {
+  if (!Target)
+    return "";
+  std::string Signature;
+
+  if (!isa<CXXConstructorDecl>(Target) && !isa<CXXDestructorDecl>(Target) &&
+      !isa<CXXConversionDecl>(Target))
+    Signature.append(Target->getReturnType().getAsString()).append(" ");
+  Signature.append(Target->getQualifiedNameAsString()).append("(");
+
+  for (int i = 0, paramsCount = Target->getNumParams(); i < paramsCount; ++i) {
+    if (i)
+      Signature.append(", ");
+    Signature.append(Target->getParamDecl(i)->getType().getAsString());
+  }
+
+  if (Target->isVariadic())
+    Signature.append(", ...");
+  Signature.append(")");
+
+  const auto *TargetT =
+      llvm::dyn_cast_or_null<FunctionType>(Target->getType().getTypePtr());
+
+  if (!TargetT || !isa<CXXMethodDecl>(Target))
+    return Signature;
+
+  if (TargetT->isConst())
+    Signature.append(" const");
+  if (TargetT->isVolatile())
+    Signature.append(" volatile");
+  if (TargetT->isRestrict())
+    Signature.append(" restrict");
+
+  if (const auto *TargetPT =
+          dyn_cast_or_null<FunctionProtoType>(Target->getType().getTypePtr())) {
+    switch (TargetPT->getRefQualifier()) {
+    case RQ_LValue:
+      Signature.append(" &");
+      break;
+    case RQ_RValue:
+      Signature.append(" &&");
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Signature;
+}
+
+static std::string GetEnclosingDeclContextSignature(const Decl *D) {
+  if (!D)
+    return "";
+
+  if (const auto *ND = dyn_cast<NamedDecl>(D)) {
+    std::string DeclName;
+
+    switch (ND->getKind()) {
+    case Decl::Namespace:
+    case Decl::Record:
+    case Decl::CXXRecord:
+    case Decl::Enum:
+      DeclName = ND->getQualifiedNameAsString();
+      break;
+    case Decl::CXXConstructor:
+    case Decl::CXXDestructor:
+    case Decl::CXXConversion:
+    case Decl::CXXMethod:
+    case Decl::Function:
+      DeclName = GetSignature(dyn_cast_or_null<FunctionDecl>(ND));
+      break;
+    case Decl::ObjCMethod:
+      // ObjC Methods can not be overloaded, qualified name uniquely identifies
+      // the method.
+      DeclName = ND->getQualifiedNameAsString();
+      break;
+    default:
+      break;
+    }
+
+    return DeclName;
+  }
+
+  return "";
+}
+
+static StringRef GetNthLineOfFile(llvm::MemoryBuffer *Buffer, int Line) {
+  if (!Buffer)
+    return "";
+
+  llvm::line_iterator LI(*Buffer, false);
+  for (; !LI.is_at_eof() && LI.line_number() != Line; ++LI)
+    ;
+
+  return *LI;
+}
+
+static std::string NormalizeLine(const SourceManager &SM, FullSourceLoc &L,
+                                 const LangOptions &LangOpts) {
+  static StringRef Whitespaces = " \t\n";
+
+  StringRef Str = GetNthLineOfFile(SM.getBuffer(L.getFileID(), L),
+                                   L.getExpansionLineNumber());
+  unsigned col = Str.find_first_not_of(Whitespaces);
+  col++;
+  SourceLocation StartOfLine =
+      SM.translateLineCol(SM.getFileID(L), L.getExpansionLineNumber(), col);
+  llvm::MemoryBuffer *Buffer =
+      SM.getBuffer(SM.getFileID(StartOfLine), StartOfLine);
+  if (!Buffer)
+    return {};
+
+  const char *BufferPos = SM.getCharacterData(StartOfLine);
+
+  Token Token;
+  Lexer Lexer(SM.getLocForStartOfFile(SM.getFileID(StartOfLine)), LangOpts,
+              Buffer->getBufferStart(), BufferPos, Buffer->getBufferEnd());
+
+  size_t NextStart = 0;
+  std::ostringstream LineBuff;
+  while (!Lexer.LexFromRawLexer(Token) && NextStart < 2) {
+    if (Token.isAtStartOfLine() && NextStart++ > 0)
+      continue;
+    LineBuff << std::string(SM.getCharacterData(Token.getLocation()),
+                            Token.getLength());
+  }
+
+  return LineBuff.str();
+}
+
+static llvm::SmallString<32> GetHashOfContent(StringRef Content) {
+  llvm::MD5 Hash;
+  llvm::MD5::MD5Result MD5Res;
+  SmallString<32> Res;
+
+  Hash.update(Content);
+  Hash.final(MD5Res);
+  llvm::MD5::stringifyResult(MD5Res, Res);
+
+  return Res;
+}
+
+std::string clang::GetIssueString(const SourceManager &SM,
+                                  FullSourceLoc &IssueLoc,
+                                  StringRef CheckerName, StringRef BugType,
+                                  const Decl *D,
+                                  const LangOptions &LangOpts) {
+  static StringRef Delimiter = "$";
+
+  return (llvm::Twine(CheckerName) + Delimiter +
+          GetEnclosingDeclContextSignature(D) + Delimiter +
+          llvm::utostr(IssueLoc.getExpansionColumnNumber()) + Delimiter +
+          NormalizeLine(SM, IssueLoc, LangOpts) + Delimiter + BugType)
+      .str();
+}
+
+SmallString<32> clang::GetIssueHash(const SourceManager &SM,
+                                    FullSourceLoc &IssueLoc,
+                                    StringRef CheckerName, StringRef BugType,
+                                    const Decl *D,
+                                    const LangOptions &LangOpts) {
+
+  return GetHashOfContent(
+      GetIssueString(SM, IssueLoc, CheckerName, BugType, D, LangOpts));
+}
diff --git a/lib/StaticAnalyzer/Core/LoopWidening.cpp b/lib/StaticAnalyzer/Core/LoopWidening.cpp
new file mode 100644
index 000000000000..05865c294cb7
--- /dev/null
+++ b/lib/StaticAnalyzer/Core/LoopWidening.cpp
@@ -0,0 +1,68 @@
+//===--- LoopWidening.cpp - Widen loops -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file contains functions which are used to widen loops. A loop may be
+/// widened to approximate the exit state(s), without analyzing every
+/// iteration. The widening is done by invalidating anything which might be
+/// modified by the body of the loop.
+///
+//===----------------------------------------------------------------------===//
+
+#include "clang/StaticAnalyzer/Core/PathSensitive/LoopWidening.h"
+
+using namespace clang;
+using namespace ento;
+
+/// Return the loops condition Stmt or NULL if LoopStmt is not a loop
+static const Expr *getLoopCondition(const Stmt *LoopStmt) {
+  switch (LoopStmt->getStmtClass()) {
+  default:
+    return nullptr;
+  case Stmt::ForStmtClass:
+    return cast<ForStmt>(LoopStmt)->getCond();
+  case Stmt::WhileStmtClass:
+    return cast<WhileStmt>(LoopStmt)->getCond();
+  case Stmt::DoStmtClass:
+    return cast<DoStmt>(LoopStmt)->getCond();
+  }
+}
+
+namespace clang {
+namespace ento {
+
+ProgramStateRef getWidenedLoopState(ProgramStateRef PrevState,
+                                    const LocationContext *LCtx,
+                                    unsigned BlockCount, const Stmt *LoopStmt) {
+
+  assert(isa<ForStmt>(LoopStmt) || isa<WhileStmt>(LoopStmt) ||
+         isa<DoStmt>(LoopStmt));
+
+  // Invalidate values in the current state.
+  // TODO Make this more conservative by only invalidating values that might
+  //      be modified by the body of the loop.
+  // TODO Nested loops are currently widened as a result of the invalidation
+  //      being so inprecise. When the invalidation is improved, the handling
+  //      of nested loops will also need to be improved.
+  const StackFrameContext *STC = LCtx->getCurrentStackFrame();
+  MemRegionManager &MRMgr = PrevState->getStateManager().getRegionManager();
+  const MemRegion *Regions[] = {MRMgr.getStackLocalsRegion(STC),
+                                MRMgr.getStackArgumentsRegion(STC),
+                                MRMgr.getGlobalsRegion()};
+  RegionAndSymbolInvalidationTraits ITraits;
+  for (auto *Region : Regions) {
+    ITraits.setTrait(Region,
+                     RegionAndSymbolInvalidationTraits::TK_EntireMemSpace);
+  }
+  return PrevState->invalidateRegions(Regions, getLoopCondition(LoopStmt),
+                                      BlockCount, LCtx, true, nullptr, nullptr,
+                                      &ITraits);
+}
+
+} // end namespace ento
+} // end namespace clang
diff --git a/lib/StaticAnalyzer/Core/Makefile b/lib/StaticAnalyzer/Core/Makefile
index 4aebc163dddc..c3e00fa36825 100644
--- a/lib/StaticAnalyzer/Core/Makefile
+++ b/lib/StaticAnalyzer/Core/Makefile
@@ -1,13 +1,13 @@
 ##===- clang/lib/StaticAnalyzer/Core/Makefile --------------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 #
-# This implements analyses built on top of source-level CFGs. 
+# This implements analyses built on top of source-level CFGs.
 #
 ##===----------------------------------------------------------------------===##
 
diff --git a/lib/StaticAnalyzer/Core/MemRegion.cpp b/lib/StaticAnalyzer/Core/MemRegion.cpp
index 5ac845825c8d..ad3f396e39a1 100644
--- a/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -756,7 +756,7 @@ getStackOrCaptureRegionForDeclContext(const LocationContext *LC,
             return cast<VarRegion>(I.getCapturedRegion());
       }
     }
-    
+
     LC = LC->getParent();
   }
   return (const StackFrameContext *)nullptr;
@@ -788,18 +788,18 @@ const VarRegion* MemRegionManager::getVarRegion(const VarDecl *D,
       else
         sReg = getGlobalsRegion();
     }
-  
-  // Finally handle static locals.  
+
+  // Finally handle static locals.
   } else {
     // FIXME: Once we implement scope handling, we will need to properly lookup
     // 'D' to the proper LocationContext.
     const DeclContext *DC = D->getDeclContext();
     llvm::PointerUnion<const StackFrameContext *, const VarRegion *> V =
       getStackOrCaptureRegionForDeclContext(LC, DC, D);
-    
+
     if (V.is<const VarRegion*>())
       return V.get<const VarRegion*>();
-    
+
     const StackFrameContext *STC = V.get<const StackFrameContext*>();
 
     if (!STC)
@@ -1013,10 +1013,22 @@ MemRegionManager::getCXXBaseObjectRegion(const CXXRecordDecl *RD,
 const CXXThisRegion*
 MemRegionManager::getCXXThisRegion(QualType thisPointerTy,
                                    const LocationContext *LC) {
-  const StackFrameContext *STC = LC->getCurrentStackFrame();
-  assert(STC);
   const PointerType *PT = thisPointerTy->getAs<PointerType>();
   assert(PT);
+  // Inside the body of the operator() of a lambda a this expr might refer to an
+  // object in one of the parent location contexts.
+  const auto *D = dyn_cast<CXXMethodDecl>(LC->getDecl());
+  // FIXME: when operator() of lambda is analyzed as a top level function and
+  // 'this' refers to a this to the enclosing scope, there is no right region to
+  // return.
+  while (!LC->inTopFrame() &&
+         (!D || D->isStatic() ||
+          PT != D->getThisType(getContext())->getAs<PointerType>())) {
+    LC = LC->getParent();
+    D = dyn_cast<CXXMethodDecl>(LC->getDecl());
+  }
+  const StackFrameContext *STC = LC->getCurrentStackFrame();
+  assert(STC);
   return getSubRegion<CXXThisRegion>(PT, getStackArgumentsRegion(STC));
 }
 
@@ -1165,6 +1177,7 @@ RegionRawOffset ElementRegion::getAsArrayOffset() const {
 /// Returns true if \p Base is an immediate base class of \p Child
 static bool isImmediateBase(const CXXRecordDecl *Child,
                             const CXXRecordDecl *Base) {
+  assert(Child && "Child must not be null");
   // Note that we do NOT canonicalize the base class here, because
   // ASTRecordLayout doesn't either. If that leads us down the wrong path,
   // so be it; at least we won't crash.
@@ -1239,23 +1252,23 @@ RegionOffset MemRegion::getAsOffset() const {
         Ty = SR->getSymbol()->getType()->getPointeeType();
         RootIsSymbolic = true;
       }
-      
+
       const CXXRecordDecl *Child = Ty->getAsCXXRecordDecl();
       if (!Child) {
         // We cannot compute the offset of the base class.
         SymbolicOffsetBase = R;
-      }
-
-      if (RootIsSymbolic) {
-        // Base layers on symbolic regions may not be type-correct.
-        // Double-check the inheritance here, and revert to a symbolic offset
-        // if it's invalid (e.g. due to a reinterpret_cast).
-        if (BOR->isVirtual()) {
-          if (!Child->isVirtuallyDerivedFrom(BOR->getDecl()))
-            SymbolicOffsetBase = R;
-        } else {
-          if (!isImmediateBase(Child, BOR->getDecl()))
-            SymbolicOffsetBase = R;
+      } else {
+        if (RootIsSymbolic) {
+          // Base layers on symbolic regions may not be type-correct.
+          // Double-check the inheritance here, and revert to a symbolic offset
+          // if it's invalid (e.g. due to a reinterpret_cast).
+          if (BOR->isVirtual()) {
+            if (!Child->isVirtuallyDerivedFrom(BOR->getDecl()))
+              SymbolicOffsetBase = R;
+          } else {
+            if (!isImmediateBase(Child, BOR->getDecl()))
+              SymbolicOffsetBase = R;
+          }
         }
       }
 
@@ -1290,7 +1303,7 @@ RegionOffset MemRegion::getAsOffset() const {
       if (Optional<nonloc::ConcreteInt> CI =
               Index.getAs<nonloc::ConcreteInt>()) {
         // Don't bother calculating precise offsets if we already have a
-        // symbolic offset somewhere in the chain. 
+        // symbolic offset somewhere in the chain.
         if (SymbolicOffsetBase)
           continue;
 
@@ -1324,7 +1337,7 @@ RegionOffset MemRegion::getAsOffset() const {
 
       // Get the field number.
       unsigned idx = 0;
-      for (RecordDecl::field_iterator FI = RD->field_begin(), 
+      for (RecordDecl::field_iterator FI = RD->field_begin(),
              FE = RD->field_end(); FI != FE; ++FI, ++idx)
         if (FR->getDecl() == *FI)
           break;
@@ -1420,7 +1433,7 @@ BlockDataRegion::referenced_vars_begin() const {
 
   BumpVector<const MemRegion*> *VecOriginal =
     static_cast<BumpVector<const MemRegion*>*>(OriginalVars);
-  
+
   return BlockDataRegion::referenced_vars_iterator(Vec->begin(),
                                                    VecOriginal->begin());
 }
@@ -1456,12 +1469,12 @@ const VarRegion *BlockDataRegion::getOriginalRegion(const VarRegion *R) const {
 // RegionAndSymbolInvalidationTraits
 //===----------------------------------------------------------------------===//
 
-void RegionAndSymbolInvalidationTraits::setTrait(SymbolRef Sym, 
+void RegionAndSymbolInvalidationTraits::setTrait(SymbolRef Sym,
                                                  InvalidationKinds IK) {
   SymTraitsMap[Sym] |= IK;
 }
 
-void RegionAndSymbolInvalidationTraits::setTrait(const MemRegion *MR, 
+void RegionAndSymbolInvalidationTraits::setTrait(const MemRegion *MR,
                                                  InvalidationKinds IK) {
   assert(MR);
   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
@@ -1470,13 +1483,13 @@ void RegionAndSymbolInvalidationTraits::setTrait(const MemRegion *MR,
     MRTraitsMap[MR] |= IK;
 }
 
-bool RegionAndSymbolInvalidationTraits::hasTrait(SymbolRef Sym, 
+bool RegionAndSymbolInvalidationTraits::hasTrait(SymbolRef Sym,
                                                  InvalidationKinds IK) {
   const_symbol_iterator I = SymTraitsMap.find(Sym);
   if (I != SymTraitsMap.end())
     return I->second & IK;
 
-  return false;    
+  return false;
 }
 
 bool RegionAndSymbolInvalidationTraits::hasTrait(const MemRegion *MR,
diff --git a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
index c4900313cad4..504df30de834 100644
--- a/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
+++ b/lib/StaticAnalyzer/Core/PathDiagnostic.cpp
@@ -62,8 +62,6 @@ PathDiagnosticControlFlowPiece::~PathDiagnosticControlFlowPiece() {}
 PathDiagnosticMacroPiece::~PathDiagnosticMacroPiece() {}
 
 
-PathPieces::~PathPieces() {}
-
 void PathPieces::flattenTo(PathPieces &Primary, PathPieces &Current,
                            bool ShouldFlattenMacros) const {
   for (PathPieces::const_iterator I = begin(), E = end(); I != E; ++I) {
@@ -181,7 +179,7 @@ void PathDiagnostic::resetDiagnosticLocationToMainFile() {
       // Reset the report containing declaration and location.
       DeclWithIssue = CP->getCaller();
       Loc = CP->getLocation();
-      
+
       return;
     }
   }
@@ -201,7 +199,7 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(
     std::unique_ptr<PathDiagnostic> D) {
   if (!D || D->path.empty())
     return;
-  
+
   // We need to flatten the locations (convert Stmt* to locations) because
   // the referenced statements may be freed by the time the diagnostics
   // are emitted.
@@ -223,12 +221,12 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(
            ++I) {
         const PathDiagnosticPiece *piece = I->get();
         FullSourceLoc L = piece->getLocation().asLocation().getExpansionLoc();
-      
+
         if (FID.isInvalid()) {
           FID = SMgr.getFileID(L);
         } else if (SMgr.getFileID(L) != FID)
           return; // FIXME: Emit a warning?
-      
+
         // Check the source ranges.
         ArrayRef<SourceRange> Ranges = piece->getRanges();
         for (ArrayRef<SourceRange>::iterator I = Ranges.begin(),
@@ -240,7 +238,7 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(
           if (!L.isFileID() || SMgr.getFileID(L) != FID)
             return; // FIXME: Emit a warning?
         }
-        
+
         if (const PathDiagnosticCallPiece *call =
             dyn_cast<PathDiagnosticCallPiece>(piece)) {
           WorkList.push_back(&call->path);
@@ -251,10 +249,10 @@ void PathDiagnosticConsumer::HandlePathDiagnostic(
         }
       }
     }
-    
+
     if (FID.isInvalid())
       return; // FIXME: Emit a warning?
-  }  
+  }
 
   // Profile the node to see if we already have something matching it
   llvm::FoldingSetNodeID profile;
@@ -320,7 +318,7 @@ static Optional<bool> comparePiece(const PathDiagnosticPiece &X,
                                    const PathDiagnosticPiece &Y) {
   if (X.getKind() != Y.getKind())
     return X.getKind() < Y.getKind();
-  
+
   FullSourceLoc XL = X.getLocation().asLocation();
   FullSourceLoc YL = Y.getLocation().asLocation();
   if (XL != YL)
@@ -333,7 +331,7 @@ static Optional<bool> comparePiece(const PathDiagnosticPiece &X,
     return X.getRanges().size() < Y.getRanges().size();
 
   const SourceManager &SM = XL.getManager();
-  
+
   for (unsigned i = 0, n = X.getRanges().size(); i < n; ++i) {
     SourceRange XR = X.getRanges()[i];
     SourceRange YR = Y.getRanges()[i];
@@ -343,7 +341,7 @@ static Optional<bool> comparePiece(const PathDiagnosticPiece &X,
       return SM.isBeforeInTranslationUnit(XR.getEnd(), YR.getEnd());
     }
   }
-  
+
   switch (X.getKind()) {
     case clang::ento::PathDiagnosticPiece::ControlFlow:
       return compareControlFlow(cast<PathDiagnosticControlFlowPiece>(X),
@@ -420,9 +418,9 @@ void PathDiagnosticConsumer::FlushDiagnostics(
                                      PathDiagnosticConsumer::FilesMade *Files) {
   if (flushed)
     return;
-  
+
   flushed = true;
-  
+
   std::vector<const PathDiagnostic *> BatchDiags;
   for (llvm::FoldingSet<PathDiagnostic>::iterator it = Diags.begin(),
        et = Diags.end(); it != et; ++it) {
@@ -450,7 +448,7 @@ void PathDiagnosticConsumer::FlushDiagnostics(
     const PathDiagnostic *D = *it;
     delete D;
   }
-  
+
   // Clear out the FoldingSet.
   Diags.clear();
 }
@@ -472,7 +470,7 @@ void PathDiagnosticConsumer::FilesMade::addDiagnostic(const PathDiagnostic &PD,
     Entry = new (Entry) PDFileEntry(NodeID);
     Set.InsertNode(Entry, InsertPos);
   }
-  
+
   // Allocate persistent storage for the file name.
   char *FileName_cstr = (char*) Alloc.Allocate(FileName.size(), 1);
   memcpy(FileName_cstr, FileName.data(), FileName.size());
@@ -847,7 +845,7 @@ PathDiagnosticRange
       SourceRange R = S->getSourceRange();
       if (R.isValid())
         return R;
-      break;  
+      break;
     }
     case DeclK:
       if (const ObjCMethodDecl *MD = dyn_cast<ObjCMethodDecl>(D))
@@ -949,7 +947,7 @@ static bool describeCodeDecl(raw_ostream &Out, const Decl *D,
 
       Out << "constructor";
       describeClass(Out, MD->getParent(), " for ");
-      
+
     } else if (isa<CXXDestructorDecl>(MD)) {
       if (!MD->isUserProvided()) {
         Out << "destructor";
@@ -1041,7 +1039,7 @@ static void compute_path_size(const PathPieces &pieces, unsigned &size) {
   for (PathPieces::const_iterator it = pieces.begin(),
                                   et = pieces.end(); it != et; ++it) {
     const PathDiagnosticPiece *piece = it->get();
-    if (const PathDiagnosticCallPiece *cp = 
+    if (const PathDiagnosticCallPiece *cp =
         dyn_cast<PathDiagnosticCallPiece>(piece)) {
       compute_path_size(cp->path, size);
     }
@@ -1077,12 +1075,12 @@ void PathDiagnosticPiece::Profile(llvm::FoldingSetNodeID &ID) const {
                                         I != E; ++I) {
     ID.AddInteger(I->getBegin().getRawEncoding());
     ID.AddInteger(I->getEnd().getRawEncoding());
-  }  
+  }
 }
 
 void PathDiagnosticCallPiece::Profile(llvm::FoldingSetNodeID &ID) const {
   PathDiagnosticPiece::Profile(ID);
-  for (PathPieces::const_iterator it = path.begin(), 
+  for (PathPieces::const_iterator it = path.begin(),
        et = path.end(); it != et; ++it) {
     ID.Add(**it);
   }
diff --git a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
index e0aff589e053..55e1222e0ac6 100644
--- a/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
+++ b/lib/StaticAnalyzer/Core/PlistDiagnostics.cpp
@@ -11,13 +11,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/PlistSupport.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/PathDiagnostic.h"
+#include "clang/StaticAnalyzer/Core/IssueHash.h"
 #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
@@ -171,7 +171,7 @@ static void ReportEvent(raw_ostream &o, const PathDiagnosticPiece& P,
     --indent;
     Indent(o, indent) << "</array>\n";
   }
-  
+
   // Output the call depth.
   Indent(o, indent) << "<key>depth</key>";
   EmitInteger(o, depth) << '\n';
@@ -187,7 +187,7 @@ static void ReportEvent(raw_ostream &o, const PathDiagnosticPiece& P,
   Indent(o, indent) << "<key>message</key>\n";
   Indent(o, indent);
   EmitString(o, P.getString()) << '\n';
-  
+
   // Finish up.
   --indent;
   Indent(o, indent); o << "</dict>\n";
@@ -208,9 +208,9 @@ static void ReportCall(raw_ostream &o,
                        const LangOptions &LangOpts,
                        unsigned indent,
                        unsigned depth) {
-  
+
   IntrusiveRefCntPtr<PathDiagnosticEventPiece> callEnter =
-    P.getCallEnterEvent();  
+    P.getCallEnterEvent();
 
   if (callEnter)
     ReportPiece(o, *callEnter, FM, SM, LangOpts, indent, depth, true,
@@ -218,18 +218,18 @@ static void ReportCall(raw_ostream &o,
 
   IntrusiveRefCntPtr<PathDiagnosticEventPiece> callEnterWithinCaller =
     P.getCallEnterWithinCallerEvent();
-  
+
   ++depth;
-  
+
   if (callEnterWithinCaller)
     ReportPiece(o, *callEnterWithinCaller, FM, SM, LangOpts,
                 indent, depth, true);
-  
+
   for (PathPieces::const_iterator I = P.path.begin(), E = P.path.end();I!=E;++I)
     ReportPiece(o, **I, FM, SM, LangOpts, indent, depth, true);
 
   --depth;
-  
+
   IntrusiveRefCntPtr<PathDiagnosticEventPiece> callExit =
     P.getCallExitEvent();
 
@@ -295,9 +295,9 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
   const SourceManager* SM = nullptr;
 
   if (!Diags.empty())
-    SM = &(*(*Diags.begin())->path.begin())->getLocation().getManager();
+    SM = &Diags.front()->path.front()->getLocation().getManager();
+
 
-  
   for (std::vector<const PathDiagnostic*>::iterator DI = Diags.begin(),
        DE = Diags.end(); DI != DE; ++DI) {
 
@@ -374,7 +374,7 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
 
     o << "   <array>\n";
 
-    for (PathPieces::const_iterator I = D->path.begin(), E = D->path.end(); 
+    for (PathPieces::const_iterator I = D->path.begin(), E = D->path.end();
          I != E; ++I)
       ReportDiag(o, **I, FM, *SM, LangOpts);
 
@@ -389,7 +389,19 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
     EmitString(o, D->getBugType()) << '\n';
     o << "   <key>check_name</key>";
     EmitString(o, D->getCheckName()) << '\n';
- 
+
+    o << "   <!-- This hash is experimental and going to change! -->\n";
+    o << "   <key>issue_hash_content_of_line_in_context</key>";
+    PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
+    FullSourceLoc L(SM->getExpansionLoc(UPDLoc.isValid()
+                                            ? UPDLoc.asLocation()
+                                            : D->getLocation().asLocation()),
+                    *SM);
+    const Decl *DeclWithIssue = D->getDeclWithIssue();
+    EmitString(o, GetIssueHash(*SM, L, D->getCheckName(), D->getBugType(),
+                               DeclWithIssue, LangOpts))
+        << '\n';
+
     // Output information about the semantic context where
     // the issue occurred.
     if (const Decl *DeclWithIssue = D->getDeclWithIssue()) {
@@ -423,28 +435,23 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
         // Output the bug hash for issue unique-ing. Currently, it's just an
         // offset from the beginning of the function.
         if (const Stmt *Body = DeclWithIssue->getBody()) {
-          
+
           // If the bug uniqueing location exists, use it for the hash.
           // For example, this ensures that two leaks reported on the same line
           // will have different issue_hashes and that the hash will identify
           // the leak location even after code is added between the allocation
           // site and the end of scope (leak report location).
-          PathDiagnosticLocation UPDLoc = D->getUniqueingLoc();
           if (UPDLoc.isValid()) {
-            FullSourceLoc UL(SM->getExpansionLoc(UPDLoc.asLocation()),
-                             *SM);
             FullSourceLoc UFunL(SM->getExpansionLoc(
               D->getUniqueingDecl()->getBody()->getLocStart()), *SM);
-            o << "  <key>issue_hash</key><string>"
-              << UL.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
+            o << "  <key>issue_hash_function_offset</key><string>"
+              << L.getExpansionLineNumber() - UFunL.getExpansionLineNumber()
               << "</string>\n";
 
           // Otherwise, use the location on which the bug is reported.
           } else {
-            FullSourceLoc L(SM->getExpansionLoc(D->getLocation().asLocation()),
-                            *SM);
             FullSourceLoc FunL(SM->getExpansionLoc(Body->getLocStart()), *SM);
-            o << "  <key>issue_hash</key><string>"
+            o << "  <key>issue_hash_function_offset</key><string>"
               << L.getExpansionLineNumber() - FunL.getExpansionLineNumber()
               << "</string>\n";
           }
@@ -486,5 +493,5 @@ void PlistDiagnostics::FlushDiagnosticsImpl(
   o << " </array>\n";
 
   // Finish.
-  o << "</dict>\n</plist>";  
+  o << "</dict>\n</plist>";
 }
diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp
index 60b32c722ebf..4f9ad9ebccd9 100644
--- a/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -36,7 +36,7 @@ void ProgramStateRelease(const ProgramState *state) {
   if (--s->refCount == 0) {
     ProgramStateManager &Mgr = s->getStateManager();
     Mgr.StateSet.RemoveNode(s);
-    s->~ProgramState();    
+    s->~ProgramState();
     Mgr.freeStates.push_back(s);
   }
 }
@@ -86,7 +86,7 @@ ProgramStateManager::~ProgramStateManager() {
     I->second.second(I->second.first);
 }
 
-ProgramStateRef 
+ProgramStateRef
 ProgramStateManager::removeDeadBindings(ProgramStateRef state,
                                    const StackFrameContext *LCtx,
                                    SymbolReaper& SymReaper) {
@@ -113,7 +113,7 @@ ProgramStateManager::removeDeadBindings(ProgramStateRef state,
 
 ProgramStateRef ProgramState::bindLoc(Loc LV, SVal V, bool notifyChanges) const {
   ProgramStateManager &Mgr = getStateManager();
-  ProgramStateRef newState = makeWithStore(Mgr.StoreMgr->Bind(getStore(), 
+  ProgramStateRef newState = makeWithStore(Mgr.StoreMgr->Bind(getStore(),
                                                              LV, V));
   const MemRegion *MR = LV.getAsRegion();
   if (MR && Mgr.getOwningEngine() && notifyChanges)
@@ -127,15 +127,15 @@ ProgramStateRef ProgramState::bindDefault(SVal loc, SVal V) const {
   const MemRegion *R = loc.castAs<loc::MemRegionVal>().getRegion();
   const StoreRef &newStore = Mgr.StoreMgr->BindDefault(getStore(), R, V);
   ProgramStateRef new_state = makeWithStore(newStore);
-  return Mgr.getOwningEngine() ? 
-           Mgr.getOwningEngine()->processRegionChange(new_state, R) : 
+  return Mgr.getOwningEngine() ?
+           Mgr.getOwningEngine()->processRegionChange(new_state, R) :
            new_state;
 }
 
 typedef ArrayRef<const MemRegion *> RegionList;
 typedef ArrayRef<SVal> ValueList;
 
-ProgramStateRef 
+ProgramStateRef
 ProgramState::invalidateRegions(RegionList Regions,
                              const Expr *E, unsigned Count,
                              const LocationContext *LCtx,
@@ -197,11 +197,11 @@ ProgramState::invalidateRegionsImpl(ValueList Values,
     if (CausedByPointerEscape) {
       newState = Eng->notifyCheckersOfPointerEscape(newState, IS,
                                                     TopLevelInvalidated,
-                                                    Invalidated, Call, 
+                                                    Invalidated, Call,
                                                     *ITraits);
     }
 
-    return Eng->processRegionChanges(newState, IS, TopLevelInvalidated, 
+    return Eng->processRegionChanges(newState, IS, TopLevelInvalidated,
                                      Invalidated, Call);
   }
 
@@ -224,7 +224,7 @@ ProgramStateRef ProgramState::killBinding(Loc LV) const {
   return makeWithStore(newStore);
 }
 
-ProgramStateRef 
+ProgramStateRef
 ProgramState::enterStackFrame(const CallEvent &Call,
                               const StackFrameContext *CalleeCtx) const {
   const StoreRef &NewStore =
@@ -275,7 +275,7 @@ SVal ProgramState::getSVal(Loc location, QualType T) const {
         //  symbol for the call to foo(); the type of that symbol is 'char',
         //  not unsigned.
         const llvm::APSInt &NewV = getBasicVals().Convert(T, *Int);
-        
+
         if (V.getAs<Loc>())
           return loc::ConcreteInt(NewV);
         else
@@ -283,7 +283,7 @@ SVal ProgramState::getSVal(Loc location, QualType T) const {
       }
     }
   }
-  
+
   return V;
 }
 
@@ -353,11 +353,11 @@ ConditionTruthVal ProgramState::isNull(SVal V) const {
 
   if (V.isConstant())
     return false;
-  
+
   SymbolRef Sym = V.getAsSymbol(/* IncludeBaseRegion */ true);
   if (!Sym)
     return ConditionTruthVal();
-  
+
   return getStateManager().ConstraintMgr->isNull(this, Sym);
 }
 
@@ -390,7 +390,7 @@ ProgramStateRef ProgramStateManager::getPersistentState(ProgramState &State) {
   ProgramState *newState = nullptr;
   if (!freeStates.empty()) {
     newState = freeStates.back();
-    freeStates.pop_back();    
+    freeStates.pop_back();
   }
   else {
     newState = (ProgramState*) Alloc.Allocate<ProgramState>();
@@ -530,10 +530,10 @@ bool ScanReachableSymbols::scan(const SymExpr *sym) {
   bool wasVisited = !visited.insert(sym).second;
   if (wasVisited)
     return true;
-  
+
   if (!visitor.VisitSymbol(sym))
     return false;
-  
+
   // TODO: should be rewritten using SymExpr::symbol_iterator.
   switch (sym->getKind()) {
     case SymExpr::RegionValueKind:
@@ -582,11 +582,11 @@ bool ScanReachableSymbols::scan(SVal val) {
 bool ScanReachableSymbols::scan(const MemRegion *R) {
   if (isa<MemSpaceRegion>(R))
     return true;
-  
+
   bool wasVisited = !visited.insert(R).second;
   if (wasVisited)
     return true;
-  
+
   if (!visitor.VisitMemRegion(R))
     return false;
 
@@ -722,14 +722,14 @@ bool ProgramState::isTainted(const MemRegion *Reg, TaintTagType K) const {
 bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
   if (!Sym)
     return false;
-  
+
   // Traverse all the symbols this symbol depends on to see if any are tainted.
   bool Tainted = false;
   for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
        SI != SE; ++SI) {
     if (!isa<SymbolData>(*SI))
       continue;
-    
+
     const TaintTagType *Tag = get<TaintMap>(*SI);
     Tainted = (Tag && *Tag == Kind);
 
@@ -748,40 +748,7 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
     if (Tainted)
       return true;
   }
-  
+
   return Tainted;
 }
 
-/// The GDM component containing the dynamic type info. This is a map from a
-/// symbol to its most likely type.
-REGISTER_TRAIT_WITH_PROGRAMSTATE(DynamicTypeMap,
-                                 CLANG_ENTO_PROGRAMSTATE_MAP(const MemRegion *,
-                                                             DynamicTypeInfo))
-
-DynamicTypeInfo ProgramState::getDynamicTypeInfo(const MemRegion *Reg) const {
-  Reg = Reg->StripCasts();
-
-  // Look up the dynamic type in the GDM.
-  const DynamicTypeInfo *GDMType = get<DynamicTypeMap>(Reg);
-  if (GDMType)
-    return *GDMType;
-
-  // Otherwise, fall back to what we know about the region.
-  if (const TypedRegion *TR = dyn_cast<TypedRegion>(Reg))
-    return DynamicTypeInfo(TR->getLocationType(), /*CanBeSubclass=*/false);
-
-  if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(Reg)) {
-    SymbolRef Sym = SR->getSymbol();
-    return DynamicTypeInfo(Sym->getType());
-  }
-
-  return DynamicTypeInfo();
-}
-
-ProgramStateRef ProgramState::setDynamicTypeInfo(const MemRegion *Reg,
-                                                 DynamicTypeInfo NewTy) const {
-  Reg = Reg->StripCasts();
-  ProgramStateRef NewState = set<DynamicTypeMap>(Reg, NewTy);
-  assert(NewState);
-  return NewState;
-}
diff --git a/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
index 170f7c02b882..0a2b2e64a142 100644
--- a/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp
@@ -81,6 +81,15 @@ public:
 
   RangeSet(PrimRangeSet RS) : ranges(RS) {}
 
+  /// Create a new set with all ranges of this set and RS.
+  /// Possible intersections are not checked here.
+  RangeSet addRange(Factory &F, const RangeSet &RS) {
+    PrimRangeSet Ranges(RS.ranges);
+    for (const auto &range : ranges)
+      Ranges = F.add(Ranges, range);
+    return RangeSet(Ranges);
+  }
+
   iterator begin() const { return ranges.begin(); }
   iterator end() const { return ranges.end(); }
 
@@ -312,6 +321,14 @@ public:
                              const llvm::APSInt& Int,
                              const llvm::APSInt& Adjustment) override;
 
+  ProgramStateRef assumeSymbolWithinInclusiveRange(
+        ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+        const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
+  ProgramStateRef assumeSymbolOutOfInclusiveRange(
+        ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+        const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
+
   const llvm::APSInt* getSymVal(ProgramStateRef St,
                                 SymbolRef sym) const override;
   ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
@@ -324,6 +341,20 @@ public:
 
 private:
   RangeSet::Factory F;
+  RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
+                         const llvm::APSInt &Int,
+                         const llvm::APSInt &Adjustment);
+  RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
+                         const llvm::APSInt &Int,
+                         const llvm::APSInt &Adjustment);
+  RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
+                         const llvm::APSInt &Int,
+                         const llvm::APSInt &Adjustment);
+  RangeSet getSymLERange(const RangeSet &RS, const llvm::APSInt &Int,
+                         const llvm::APSInt &Adjustment);
+  RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
+                         const llvm::APSInt &Int,
+                         const llvm::APSInt &Adjustment);
 };
 
 } // end anonymous namespace
@@ -365,7 +396,7 @@ ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
 
 /// Scan all symbols referenced by the constraints. If the symbol is not alive
 /// as marked in LSymbols, mark it as dead in DSymbols.
-ProgramStateRef 
+ProgramStateRef
 RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
                                            SymbolReaper& SymReaper) {
 
@@ -415,7 +446,7 @@ RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
 // UINT_MAX, 0, 1, and 2.
 
-ProgramStateRef 
+ProgramStateRef
 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
@@ -435,7 +466,7 @@ RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-ProgramStateRef 
+ProgramStateRef
 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
                                     const llvm::APSInt &Int,
                                     const llvm::APSInt &Adjustment) {
@@ -450,122 +481,199 @@ RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-ProgramStateRef 
-RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
-                                    const llvm::APSInt &Int,
-                                    const llvm::APSInt &Adjustment) {
+RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
+                                               SymbolRef Sym,
+                                               const llvm::APSInt &Int,
+                                               const llvm::APSInt &Adjustment) {
   // Before we do any real work, see if the value can even show up.
   APSIntType AdjustmentType(Adjustment);
   switch (AdjustmentType.testInRange(Int, true)) {
   case APSIntType::RTR_Below:
-    return nullptr;
+    return F.getEmptySet();
   case APSIntType::RTR_Within:
     break;
   case APSIntType::RTR_Above:
-    return St;
+    return GetRange(St, Sym);
   }
 
   // Special case for Int == Min. This is always false.
   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
   llvm::APSInt Min = AdjustmentType.getMinValue();
   if (ComparisonVal == Min)
-    return nullptr;
+    return F.getEmptySet();
 
-  llvm::APSInt Lower = Min-Adjustment;
-  llvm::APSInt Upper = ComparisonVal-Adjustment;
+  llvm::APSInt Lower = Min - Adjustment;
+  llvm::APSInt Upper = ComparisonVal - Adjustment;
   --Upper;
 
-  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+  return GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
+                                    const llvm::APSInt &Int,
+                                    const llvm::APSInt &Adjustment) {
+  RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-ProgramStateRef 
-RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
-                                    const llvm::APSInt &Int,
-                                    const llvm::APSInt &Adjustment) {
+RangeSet
+RangeConstraintManager::getSymGTRange(ProgramStateRef St, SymbolRef Sym,
+                                      const llvm::APSInt &Int,
+                                      const llvm::APSInt &Adjustment) {
   // Before we do any real work, see if the value can even show up.
   APSIntType AdjustmentType(Adjustment);
   switch (AdjustmentType.testInRange(Int, true)) {
   case APSIntType::RTR_Below:
-    return St;
+    return GetRange(St, Sym);
   case APSIntType::RTR_Within:
     break;
   case APSIntType::RTR_Above:
-    return nullptr;
+    return F.getEmptySet();
   }
 
   // Special case for Int == Max. This is always false.
   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
   llvm::APSInt Max = AdjustmentType.getMaxValue();
   if (ComparisonVal == Max)
-    return nullptr;
+    return F.getEmptySet();
 
-  llvm::APSInt Lower = ComparisonVal-Adjustment;
-  llvm::APSInt Upper = Max-Adjustment;
+  llvm::APSInt Lower = ComparisonVal - Adjustment;
+  llvm::APSInt Upper = Max - Adjustment;
   ++Lower;
 
-  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+  return GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
+                                    const llvm::APSInt &Int,
+                                    const llvm::APSInt &Adjustment) {
+  RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-ProgramStateRef 
-RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
-                                    const llvm::APSInt &Int,
-                                    const llvm::APSInt &Adjustment) {
+RangeSet
+RangeConstraintManager::getSymGERange(ProgramStateRef St, SymbolRef Sym,
+                                      const llvm::APSInt &Int,
+                                      const llvm::APSInt &Adjustment) {
   // Before we do any real work, see if the value can even show up.
   APSIntType AdjustmentType(Adjustment);
   switch (AdjustmentType.testInRange(Int, true)) {
   case APSIntType::RTR_Below:
-    return St;
+    return GetRange(St, Sym);
   case APSIntType::RTR_Within:
     break;
   case APSIntType::RTR_Above:
-    return nullptr;
+    return F.getEmptySet();
   }
 
   // Special case for Int == Min. This is always feasible.
   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
   llvm::APSInt Min = AdjustmentType.getMinValue();
   if (ComparisonVal == Min)
-    return St;
+    return GetRange(St, Sym);
 
   llvm::APSInt Max = AdjustmentType.getMaxValue();
-  llvm::APSInt Lower = ComparisonVal-Adjustment;
-  llvm::APSInt Upper = Max-Adjustment;
+  llvm::APSInt Lower = ComparisonVal - Adjustment;
+  llvm::APSInt Upper = Max - Adjustment;
 
-  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+  return GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
+                                    const llvm::APSInt &Int,
+                                    const llvm::APSInt &Adjustment) {
+  RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
-ProgramStateRef 
-RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
-                                    const llvm::APSInt &Int,
-                                    const llvm::APSInt &Adjustment) {
+RangeSet
+RangeConstraintManager::getSymLERange(const RangeSet &RS,
+                                      const llvm::APSInt &Int,
+                                      const llvm::APSInt &Adjustment) {
   // Before we do any real work, see if the value can even show up.
   APSIntType AdjustmentType(Adjustment);
   switch (AdjustmentType.testInRange(Int, true)) {
   case APSIntType::RTR_Below:
-    return nullptr;
+    return F.getEmptySet();
   case APSIntType::RTR_Within:
     break;
   case APSIntType::RTR_Above:
-    return St;
+    return RS;
   }
 
   // Special case for Int == Max. This is always feasible.
   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
   llvm::APSInt Max = AdjustmentType.getMaxValue();
   if (ComparisonVal == Max)
-    return St;
+    return RS;
 
   llvm::APSInt Min = AdjustmentType.getMinValue();
-  llvm::APSInt Lower = Min-Adjustment;
-  llvm::APSInt Upper = ComparisonVal-Adjustment;
+  llvm::APSInt Lower = Min - Adjustment;
+  llvm::APSInt Upper = ComparisonVal - Adjustment;
 
-  RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+  return RS.Intersect(getBasicVals(), F, Lower, Upper);
+}
+
+RangeSet
+RangeConstraintManager::getSymLERange(ProgramStateRef St, SymbolRef Sym,
+                                      const llvm::APSInt &Int,
+                                      const llvm::APSInt &Adjustment) {
+  // Before we do any real work, see if the value can even show up.
+  APSIntType AdjustmentType(Adjustment);
+  switch (AdjustmentType.testInRange(Int, true)) {
+  case APSIntType::RTR_Below:
+    return F.getEmptySet();
+  case APSIntType::RTR_Within:
+    break;
+  case APSIntType::RTR_Above:
+    return GetRange(St, Sym);
+  }
+
+  // Special case for Int == Max. This is always feasible.
+  llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
+  llvm::APSInt Max = AdjustmentType.getMaxValue();
+  if (ComparisonVal == Max)
+    return GetRange(St, Sym);
+
+  llvm::APSInt Min = AdjustmentType.getMinValue();
+  llvm::APSInt Lower = Min - Adjustment;
+  llvm::APSInt Upper = ComparisonVal - Adjustment;
+
+  return GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
+                                    const llvm::APSInt &Int,
+                                    const llvm::APSInt &Adjustment) {
+  RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
   return New.isEmpty() ? nullptr : St->set<ConstraintRange>(Sym, New);
 }
 
+ProgramStateRef
+RangeConstraintManager::assumeSymbolWithinInclusiveRange(
+    ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+    const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
+  RangeSet New = getSymGERange(State, Sym, From, Adjustment);
+  if (New.isEmpty())
+    return nullptr;
+  New = getSymLERange(New, To, Adjustment);
+  return New.isEmpty() ? nullptr : State->set<ConstraintRange>(Sym, New);
+}
+
+ProgramStateRef
+RangeConstraintManager::assumeSymbolOutOfInclusiveRange(
+    ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+    const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
+  RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
+  RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
+  RangeSet New(RangeLT.addRange(F, RangeGT));
+  return New.isEmpty() ? nullptr : State->set<ConstraintRange>(Sym, New);
+}
+
 //===------------------------------------------------------------------------===
 // Pretty-printing.
 //===------------------------------------------------------------------------===/
diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp
index 6d41fc2146fe..a63f6e496272 100644
--- a/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -149,7 +149,8 @@ typedef llvm::ImmutableMap<const MemRegion *, ClusterBindings>
 namespace {
 class RegionBindingsRef : public llvm::ImmutableMapRef<const MemRegion *,
                                  ClusterBindings> {
- ClusterBindings::Factory &CBFactory;
+  ClusterBindings::Factory *CBFactory;
+
 public:
   typedef llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>
           ParentTy;
@@ -157,21 +158,21 @@ public:
   RegionBindingsRef(ClusterBindings::Factory &CBFactory,
                     const RegionBindings::TreeTy *T,
                     RegionBindings::TreeTy::Factory *F)
-    : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F),
-      CBFactory(CBFactory) {}
+      : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(T, F),
+        CBFactory(&CBFactory) {}
 
   RegionBindingsRef(const ParentTy &P, ClusterBindings::Factory &CBFactory)
-    : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P),
-      CBFactory(CBFactory) {}
+      : llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>(P),
+        CBFactory(&CBFactory) {}
 
   RegionBindingsRef add(key_type_ref K, data_type_ref D) const {
-    return RegionBindingsRef(static_cast<const ParentTy*>(this)->add(K, D),
-                             CBFactory);
+    return RegionBindingsRef(static_cast<const ParentTy *>(this)->add(K, D),
+                             *CBFactory);
   }
 
   RegionBindingsRef remove(key_type_ref K) const {
-    return RegionBindingsRef(static_cast<const ParentTy*>(this)->remove(K),
-                             CBFactory);
+    return RegionBindingsRef(static_cast<const ParentTy *>(this)->remove(K),
+                             *CBFactory);
   }
 
   RegionBindingsRef addBinding(BindingKey K, SVal V) const;
@@ -179,16 +180,9 @@ public:
   RegionBindingsRef addBinding(const MemRegion *R,
                                BindingKey::Kind k, SVal V) const;
 
-  RegionBindingsRef &operator=(const RegionBindingsRef &X) {
-    *static_cast<ParentTy*>(this) = X;
-    return *this;
-  }
-
   const SVal *lookup(BindingKey K) const;
   const SVal *lookup(const MemRegion *R, BindingKey::Kind k) const;
-  const ClusterBindings *lookup(const MemRegion *R) const {
-    return static_cast<const ParentTy*>(this)->lookup(R);
-  }
+  using llvm::ImmutableMapRef<const MemRegion *, ClusterBindings>::lookup;
 
   RegionBindingsRef removeBinding(BindingKey K);
 
@@ -245,10 +239,10 @@ RegionBindingsRef RegionBindingsRef::addBinding(BindingKey K, SVal V) const {
   const MemRegion *Base = K.getBaseRegion();
 
   const ClusterBindings *ExistingCluster = lookup(Base);
-  ClusterBindings Cluster = (ExistingCluster ? *ExistingCluster
-                             : CBFactory.getEmptyMap());
+  ClusterBindings Cluster =
+      (ExistingCluster ? *ExistingCluster : CBFactory->getEmptyMap());
 
-  ClusterBindings NewCluster = CBFactory.add(Cluster, K, V);
+  ClusterBindings NewCluster = CBFactory->add(Cluster, K, V);
   return add(Base, NewCluster);
 }
 
@@ -277,7 +271,7 @@ RegionBindingsRef RegionBindingsRef::removeBinding(BindingKey K) {
   if (!Cluster)
     return *this;
 
-  ClusterBindings NewCluster = CBFactory.remove(*Cluster, K);
+  ClusterBindings NewCluster = CBFactory->remove(*Cluster, K);
   if (NewCluster.isEmpty())
     return remove(Base);
   return add(Base, NewCluster);
@@ -470,9 +464,9 @@ public: // Part of public interface to class.
   StoreRef killBinding(Store ST, Loc L) override;
 
   void incrementReferenceCount(Store store) override {
-    getRegionBindings(store).manualRetain();    
+    getRegionBindings(store).manualRetain();
   }
-  
+
   /// If the StoreManager supports it, decrement the reference count of
   /// the specified Store object.  If the reference count hits 0, the memory
   /// associated with the object is recycled.
@@ -514,7 +508,7 @@ public: // Part of public interface to class.
   SVal getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
                                          const TypedValueRegion *R,
                                          QualType Ty);
-  
+
   SVal getLazyBinding(const SubRegion *LazyBindingRegion,
                       RegionBindingsRef LazyBinding);
 
@@ -656,35 +650,25 @@ protected:
 
   RegionBindingsRef B;
 
-private:
-  GlobalsFilterKind GlobalsFilter;
 
 protected:
   const ClusterBindings *getCluster(const MemRegion *R) {
     return B.lookup(R);
   }
 
-  /// Returns true if the memory space of the given region is one of the global
-  /// regions specially included at the start of analysis.
-  bool isInitiallyIncludedGlobalRegion(const MemRegion *R) {
-    switch (GlobalsFilter) {
-    case GFK_None:
-      return false;
-    case GFK_SystemOnly:
-      return isa<GlobalSystemSpaceRegion>(R->getMemorySpace());
-    case GFK_All:
-      return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace());
-    }
-
-    llvm_unreachable("unknown globals filter");
+  /// Returns true if all clusters in the given memspace should be initially
+  /// included in the cluster analysis. Subclasses may provide their
+  /// own implementation.
+  bool includeEntireMemorySpace(const MemRegion *Base) {
+    return false;
   }
 
 public:
   ClusterAnalysis(RegionStoreManager &rm, ProgramStateManager &StateMgr,
-                  RegionBindingsRef b, GlobalsFilterKind GFK)
+                  RegionBindingsRef b )
     : RM(rm), Ctx(StateMgr.getContext()),
       svalBuilder(StateMgr.getSValBuilder()),
-      B(b), GlobalsFilter(GFK) {}
+      B(b) {}
 
   RegionBindingsRef getRegionBindings() const { return B; }
 
@@ -702,8 +686,9 @@ public:
       assert(!Cluster.isEmpty() && "Empty clusters should be removed");
       static_cast<DERIVED*>(this)->VisitAddedToCluster(Base, Cluster);
 
-      // If this is an interesting global region, add it the work list up front.
-      if (isInitiallyIncludedGlobalRegion(Base))
+      // If the base's memspace should be entirely invalidated, add the cluster
+      // to the workspace up front.
+      if (static_cast<DERIVED*>(this)->includeEntireMemorySpace(Base))
         AddToWorkList(WorkListElement(Base), &Cluster);
     }
   }
@@ -716,8 +701,7 @@ public:
   }
 
   bool AddToWorkList(const MemRegion *R) {
-    const MemRegion *BaseR = R->getBaseRegion();
-    return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
+    return static_cast<DERIVED*>(this)->AddToWorkList(R);
   }
 
   void RunWorkList() {
@@ -947,6 +931,7 @@ class invalidateRegionsWorker : public ClusterAnalysis<invalidateRegionsWorker>
   InvalidatedSymbols &IS;
   RegionAndSymbolInvalidationTraits &ITraits;
   StoreManager::InvalidatedRegions *Regions;
+  GlobalsFilterKind GlobalsFilter;
 public:
   invalidateRegionsWorker(RegionStoreManager &rm,
                           ProgramStateManager &stateMgr,
@@ -957,14 +942,34 @@ public:
                           RegionAndSymbolInvalidationTraits &ITraitsIn,
                           StoreManager::InvalidatedRegions *r,
                           GlobalsFilterKind GFK)
-    : ClusterAnalysis<invalidateRegionsWorker>(rm, stateMgr, b, GFK),
-      Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r){}
+     : ClusterAnalysis<invalidateRegionsWorker>(rm, stateMgr, b),
+       Ex(ex), Count(count), LCtx(lctx), IS(is), ITraits(ITraitsIn), Regions(r),
+       GlobalsFilter(GFK) {}
 
   void VisitCluster(const MemRegion *baseR, const ClusterBindings *C);
   void VisitBinding(SVal V);
+
+  using ClusterAnalysis::AddToWorkList;
+
+  bool AddToWorkList(const MemRegion *R);
+
+  /// Returns true if all clusters in the memory space for \p Base should be
+  /// be invalidated.
+  bool includeEntireMemorySpace(const MemRegion *Base);
+
+  /// Returns true if the memory space of the given region is one of the global
+  /// regions specially included at the start of invalidation.
+  bool isInitiallyIncludedGlobalRegion(const MemRegion *R);
 };
 }
 
+bool invalidateRegionsWorker::AddToWorkList(const MemRegion *R) {
+  bool doNotInvalidateSuperRegion = ITraits.hasTrait(
+      R, RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+  const MemRegion *BaseR = doNotInvalidateSuperRegion ? R : R->getBaseRegion();
+  return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
+}
+
 void invalidateRegionsWorker::VisitBinding(SVal V) {
   // A symbol?  Mark it touched by the invalidation.
   if (SymbolRef Sym = V.getAsSymbol())
@@ -993,8 +998,8 @@ void invalidateRegionsWorker::VisitBinding(SVal V) {
 void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
                                            const ClusterBindings *C) {
 
-  bool PreserveRegionsContents = 
-      ITraits.hasTrait(baseR, 
+  bool PreserveRegionsContents =
+      ITraits.hasTrait(baseR,
                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
 
   if (C) {
@@ -1077,6 +1082,70 @@ void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
   }
 
   if (const ArrayType *AT = Ctx.getAsArrayType(T)) {
+    bool doNotInvalidateSuperRegion = ITraits.hasTrait(
+        baseR,
+        RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
+
+    if (doNotInvalidateSuperRegion) {
+      // We are not doing blank invalidation of the whole array region so we
+      // have to manually invalidate each elements.
+      Optional<uint64_t> NumElements;
+
+      // Compute lower and upper offsets for region within array.
+      if (const ConstantArrayType *CAT = dyn_cast<ConstantArrayType>(AT))
+        NumElements = CAT->getSize().getZExtValue();
+      if (!NumElements) // We are not dealing with a constant size array
+        goto conjure_default;
+      QualType ElementTy = AT->getElementType();
+      uint64_t ElemSize = Ctx.getTypeSize(ElementTy);
+      const RegionOffset &RO = baseR->getAsOffset();
+      const MemRegion *SuperR = baseR->getBaseRegion();
+      if (RO.hasSymbolicOffset()) {
+        // If base region has a symbolic offset,
+        // we revert to invalidating the super region.
+        if (SuperR)
+          AddToWorkList(SuperR);
+        goto conjure_default;
+      }
+
+      uint64_t LowerOffset = RO.getOffset();
+      uint64_t UpperOffset = LowerOffset + *NumElements * ElemSize;
+      bool UpperOverflow = UpperOffset < LowerOffset;
+
+      // Invalidate regions which are within array boundaries,
+      // or have a symbolic offset.
+      if (!SuperR)
+        goto conjure_default;
+
+      const ClusterBindings *C = B.lookup(SuperR);
+      if (!C)
+        goto conjure_default;
+
+      for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E;
+           ++I) {
+        const BindingKey &BK = I.getKey();
+        Optional<uint64_t> ROffset =
+            BK.hasSymbolicOffset() ? Optional<uint64_t>() : BK.getOffset();
+
+        // Check offset is not symbolic and within array's boundaries.
+        // Handles arrays of 0 elements and of 0-sized elements as well.
+        if (!ROffset ||
+            (ROffset &&
+             ((*ROffset >= LowerOffset && *ROffset < UpperOffset) ||
+              (UpperOverflow &&
+               (*ROffset >= LowerOffset || *ROffset < UpperOffset)) ||
+              (LowerOffset == UpperOffset && *ROffset == LowerOffset)))) {
+          B = B.removeBinding(I.getKey());
+          // Bound symbolic regions need to be invalidated for dead symbol
+          // detection.
+          SVal V = I.getData();
+          const MemRegion *R = V.getAsRegion();
+          if (R && isa<SymbolicRegion>(R))
+            VisitBinding(V);
+        }
+      }
+    }
+  conjure_default:
       // Set the default value of the array to conjured symbol.
     DefinedOrUnknownSVal V =
     svalBuilder.conjureSymbolVal(baseR, Ex, LCtx,
@@ -1091,6 +1160,29 @@ void invalidateRegionsWorker::VisitCluster(const MemRegion *baseR,
   B = B.addBinding(baseR, BindingKey::Direct, V);
 }
 
+bool invalidateRegionsWorker::isInitiallyIncludedGlobalRegion(
+    const MemRegion *R) {
+  switch (GlobalsFilter) {
+  case GFK_None:
+    return false;
+  case GFK_SystemOnly:
+    return isa<GlobalSystemSpaceRegion>(R->getMemorySpace());
+  case GFK_All:
+    return isa<NonStaticGlobalSpaceRegion>(R->getMemorySpace());
+  }
+
+  llvm_unreachable("unknown globals filter");
+}
+
+bool invalidateRegionsWorker::includeEntireMemorySpace(const MemRegion *Base) {
+  if (isInitiallyIncludedGlobalRegion(Base))
+    return true;
+
+  const MemSpaceRegion *MemSpace = Base->getMemorySpace();
+  return ITraits.hasTrait(MemSpace,
+                          RegionAndSymbolInvalidationTraits::TK_EntireMemSpace);
+}
+
 RegionBindingsRef
 RegionStoreManager::invalidateGlobalRegion(MemRegion::Kind K,
                                            const Expr *Ex,
@@ -1273,6 +1365,10 @@ SVal RegionStoreManager::getBinding(RegionBindingsConstRef B, Loc L, QualType T)
 
   const MemRegion *MR = L.castAs<loc::MemRegionVal>().getRegion();
 
+  if (isa<BlockDataRegion>(MR)) {
+    return UnknownVal();
+  }
+
   if (isa<AllocaRegion>(MR) ||
       isa<SymbolicRegion>(MR) ||
       isa<CodeTextRegion>(MR)) {
@@ -1462,7 +1558,7 @@ RegionStoreManager::findLazyBinding(RegionBindingsConstRef B,
     // through to look for lazy compound value. It is like a field region.
     Result = findLazyBinding(B, cast<SubRegion>(BaseReg->getSuperRegion()),
                              originalRegion);
-    
+
     if (Result.second)
       Result.second = MRMgr.getCXXBaseObjectRegionWithSuper(BaseReg,
                                                             Result.second);
@@ -1508,7 +1604,7 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
       return svalBuilder.makeIntVal(c, T);
     }
   }
-  
+
   // Check for loads from a code text region.  For such loads, just give up.
   if (isa<CodeTextRegion>(superR))
     return UnknownVal();
@@ -1520,12 +1616,12 @@ SVal RegionStoreManager::getBindingForElement(RegionBindingsConstRef B,
   //   return *y;
   // FIXME: This is a hack, and doesn't do anything really intelligent yet.
   const RegionRawOffset &O = R->getAsArrayOffset();
-  
+
   // If we cannot reason about the offset, return an unknown value.
   if (!O.getRegion())
     return UnknownVal();
-  
-  if (const TypedValueRegion *baseR = 
+
+  if (const TypedValueRegion *baseR =
         dyn_cast_or_null<TypedValueRegion>(O.getRegion())) {
     QualType baseT = baseR->getValueType();
     if (baseT->isScalarType()) {
@@ -1616,7 +1712,7 @@ SVal RegionStoreManager::getLazyBinding(const SubRegion *LazyBindingRegion,
 
   return Result;
 }
-                                        
+
 SVal
 RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
                                                       const TypedValueRegion *R,
@@ -1670,7 +1766,7 @@ RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
       if (!index.isConstant())
         hasSymbolicIndex = true;
     }
-    
+
     // If our super region is a field or element itself, walk up the region
     // hierarchy to see if there is a default value installed in an ancestor.
     SR = dyn_cast<SubRegion>(Base);
@@ -1680,7 +1776,7 @@ RegionStoreManager::getBindingForFieldOrElementCommon(RegionBindingsConstRef B,
     if (isa<ElementRegion>(R)) {
       // Currently we don't reason specially about Clang-style vectors.  Check
       // if superR is a vector and if so return Unknown.
-      if (const TypedValueRegion *typedSuperR = 
+      if (const TypedValueRegion *typedSuperR =
             dyn_cast<TypedValueRegion>(R->getSuperRegion())) {
         if (typedSuperR->getValueType()->isVectorType())
           return UnknownVal();
@@ -1807,7 +1903,7 @@ RegionStoreManager::getInterestingValues(nonloc::LazyCompoundVal LCV) {
       List.insert(List.end(), InnerList.begin(), InnerList.end());
       continue;
     }
-    
+
     List.push_back(V);
   }
 
@@ -1844,7 +1940,7 @@ SVal RegionStoreManager::getBindingForArray(RegionBindingsConstRef B,
                                             const TypedValueRegion *R) {
   assert(Ctx.getAsConstantArrayType(R->getValueType()) &&
          "Only constant array types can have compound bindings.");
-  
+
   return createLazyBinding(B, R);
 }
 
@@ -2018,11 +2114,11 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
   QualType T = R->getValueType();
   assert(T->isVectorType());
   const VectorType *VT = T->getAs<VectorType>(); // Use getAs for typedefs.
- 
+
   // Handle lazy compound values and symbolic values.
   if (V.getAs<nonloc::LazyCompoundVal>() || V.getAs<nonloc::SymbolVal>())
     return bindAggregate(B, R, V);
-  
+
   // We may get non-CompoundVal accidentally due to imprecise cast logic or
   // that we are binding symbolic struct value. Kill the field values, and if
   // the value is symbolic go and bind it as a "default" binding.
@@ -2039,7 +2135,7 @@ RegionBindingsRef RegionStoreManager::bindVector(RegionBindingsConstRef B,
   for ( ; index != numElements ; ++index) {
     if (VI == VE)
       break;
-    
+
     NonLoc Idx = svalBuilder.makeArrayIndex(index);
     const ElementRegion *ER = MRMgr.getElementRegion(ElemType, Idx, R, Ctx);
 
@@ -2081,7 +2177,7 @@ RegionStoreManager::tryBindSmallStruct(RegionBindingsConstRef B,
   }
 
   RegionBindingsRef NewB = B;
-  
+
   for (FieldVector::iterator I = Fields.begin(), E = Fields.end(); I != E; ++I){
     const FieldRegion *SourceFR = MRMgr.getFieldRegion(*I, LCV.getRegion());
     SVal V = getBindingForField(getRegionBindings(LCV.getStore()), SourceFR);
@@ -2185,7 +2281,7 @@ public:
                            ProgramStateManager &stateMgr,
                            RegionBindingsRef b, SymbolReaper &symReaper,
                            const StackFrameContext *LCtx)
-    : ClusterAnalysis<removeDeadBindingsWorker>(rm, stateMgr, b, GFK_None),
+    : ClusterAnalysis<removeDeadBindingsWorker>(rm, stateMgr, b),
       SymReaper(symReaper), CurrentLCtx(LCtx) {}
 
   // Called by ClusterAnalysis.
@@ -2193,11 +2289,20 @@ public:
   void VisitCluster(const MemRegion *baseR, const ClusterBindings *C);
   using ClusterAnalysis<removeDeadBindingsWorker>::VisitCluster;
 
+  using ClusterAnalysis::AddToWorkList;
+
+  bool AddToWorkList(const MemRegion *R);
+
   bool UpdatePostponed();
   void VisitBinding(SVal V);
 };
 }
 
+bool removeDeadBindingsWorker::AddToWorkList(const MemRegion *R) {
+  const MemRegion *BaseR = R->getBaseRegion();
+  return AddToWorkList(WorkListElement(BaseR), getCluster(BaseR));
+}
+
 void removeDeadBindingsWorker::VisitAddedToCluster(const MemRegion *baseR,
                                                    const ClusterBindings &C) {
 
@@ -2243,8 +2348,12 @@ void removeDeadBindingsWorker::VisitCluster(const MemRegion *baseR,
   if (const SymbolicRegion *SymR = dyn_cast<SymbolicRegion>(baseR))
     SymReaper.markLive(SymR->getSymbol());
 
-  for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I)
+  for (ClusterBindings::iterator I = C->begin(), E = C->end(); I != E; ++I) {
+    // Element index of a binding key is live.
+    SymReaper.markElementIndicesLive(I.getKey().getRegion());
+
     VisitBinding(I.getData());
+  }
 }
 
 void removeDeadBindingsWorker::VisitBinding(SVal V) {
@@ -2265,7 +2374,8 @@ void removeDeadBindingsWorker::VisitBinding(SVal V) {
   // If V is a region, then add it to the worklist.
   if (const MemRegion *R = V.getAsRegion()) {
     AddToWorkList(R);
-    
+    SymReaper.markLive(R);
+
     // All regions captured by a block are also live.
     if (const BlockDataRegion *BR = dyn_cast<BlockDataRegion>(R)) {
       BlockDataRegion::referenced_vars_iterator I = BR->referenced_vars_begin(),
@@ -2274,7 +2384,7 @@ void removeDeadBindingsWorker::VisitBinding(SVal V) {
         AddToWorkList(I.getCapturedRegion());
     }
   }
-    
+
 
   // Update the set of live symbols.
   for (SymExpr::symbol_iterator SI = V.symbol_begin(), SE = V.symbol_end();
diff --git a/lib/StaticAnalyzer/Core/SValBuilder.cpp b/lib/StaticAnalyzer/Core/SValBuilder.cpp
index 3ed2bde1e4f8..cdae04068e1d 100644
--- a/lib/StaticAnalyzer/Core/SValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SValBuilder.cpp
@@ -91,10 +91,13 @@ nonloc::ConcreteInt SValBuilder::makeBoolVal(const CXXBoolLiteralExpr *boolean){
   return makeTruthVal(boolean->getValue());
 }
 
-DefinedOrUnknownSVal 
+DefinedOrUnknownSVal
 SValBuilder::getRegionValueSymbolVal(const TypedValueRegion* region) {
   QualType T = region->getValueType();
 
+  if (T->isNullPtrType())
+    return makeZeroVal(T);
+  
   if (!SymbolManager::canSymbolicate(T))
     return UnknownVal();
 
@@ -112,6 +115,9 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *SymbolTag,
                                                    unsigned Count) {
   QualType T = Ex->getType();
 
+  if (T->isNullPtrType())
+    return makeZeroVal(T);
+
   // Compute the type of the result. If the expression is not an R-value, the
   // result should be a location.
   QualType ExType = Ex->getType();
@@ -126,6 +132,9 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const void *symbolTag,
                                                    const LocationContext *LCtx,
                                                    QualType type,
                                                    unsigned count) {
+  if (type->isNullPtrType())
+    return makeZeroVal(type);
+
   if (!SymbolManager::canSymbolicate(type))
     return UnknownVal();
 
@@ -142,14 +151,17 @@ DefinedOrUnknownSVal SValBuilder::conjureSymbolVal(const Stmt *stmt,
                                                    const LocationContext *LCtx,
                                                    QualType type,
                                                    unsigned visitCount) {
+  if (type->isNullPtrType())
+    return makeZeroVal(type);
+
   if (!SymbolManager::canSymbolicate(type))
     return UnknownVal();
 
   SymbolRef sym = SymMgr.conjureSymbol(stmt, LCtx, type, visitCount);
-  
+
   if (Loc::isLocType(type))
     return loc::MemRegionVal(MemMgr.getSymbolicRegion(sym));
-  
+
   return nonloc::SymbolVal(sym);
 }
 
@@ -160,6 +172,8 @@ SValBuilder::getConjuredHeapSymbolVal(const Expr *E,
   QualType T = E->getType();
   assert(Loc::isLocType(T));
   assert(SymbolManager::canSymbolicate(T));
+  if (T->isNullPtrType())
+    return makeZeroVal(T);
 
   SymbolRef sym = SymMgr.conjureSymbol(E, LCtx, T, VisitCount);
   return loc::MemRegionVal(MemMgr.getSymbolicHeapRegion(sym));
@@ -185,6 +199,9 @@ SValBuilder::getDerivedRegionValueSymbolVal(SymbolRef parentSymbol,
                                              const TypedValueRegion *region) {
   QualType T = region->getValueType();
 
+  if (T->isNullPtrType())
+    return makeZeroVal(T);
+
   if (!SymbolManager::canSymbolicate(T))
     return UnknownVal();
 
@@ -259,6 +276,11 @@ Optional<SVal> SValBuilder::getConstantVal(const Expr *E) {
   case Stmt::CXXBoolLiteralExprClass:
     return makeBoolVal(cast<CXXBoolLiteralExpr>(E));
 
+  case Stmt::TypeTraitExprClass: {
+    const TypeTraitExpr *TE = cast<TypeTraitExpr>(E);
+    return makeTruthVal(TE->getValue(), TE->getType());
+  }
+
   case Stmt::IntegerLiteralClass:
     return makeIntVal(cast<IntegerLiteral>(E));
 
@@ -270,11 +292,17 @@ Optional<SVal> SValBuilder::getConstantVal(const Expr *E) {
 
   case Stmt::ImplicitCastExprClass: {
     const CastExpr *CE = cast<CastExpr>(E);
-    if (CE->getCastKind() == CK_ArrayToPointerDecay) {
-      Optional<SVal> ArrayVal = getConstantVal(CE->getSubExpr());
-      if (!ArrayVal)
+    switch (CE->getCastKind()) {
+    default:
+      break;
+    case CK_ArrayToPointerDecay:
+    case CK_BitCast: {
+      const Expr *SE = CE->getSubExpr();
+      Optional<SVal> Val = getConstantVal(SE);
+      if (!Val)
         return None;
-      return evalCast(*ArrayVal, CE->getType(), CE->getSubExpr()->getType());
+      return evalCast(*Val, CE->getType(), SE->getType());
+    }
     }
     // FALLTHROUGH
   }
@@ -307,7 +335,7 @@ SVal SValBuilder::makeSymExprValNN(ProgramStateRef State,
                                    QualType ResultTy) {
   if (!State->isTainted(RHS) && !State->isTainted(LHS))
     return UnknownVal();
-    
+
   const SymExpr *symLHS = LHS.getAsSymExpr();
   const SymExpr *symRHS = RHS.getAsSymExpr();
   // TODO: When the Max Complexity is reached, we should conjure a symbol
@@ -430,7 +458,7 @@ SVal SValBuilder::evalCast(SVal val, QualType castTy, QualType originalTy) {
     if (shouldBeModeledWithNoOp(Context, Context.getPointerType(castTy),
                                          Context.getPointerType(originalTy)))
       return val;
-  
+
   // Check for casts from pointers to integers.
   if (castTy->isIntegralOrEnumerationType() && Loc::isLocType(originalTy))
     return evalCastFromLoc(val.castAs<Loc>(), castTy);
diff --git a/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp b/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp
index 35930e47f82a..4051242434ec 100644
--- a/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp
+++ b/lib/StaticAnalyzer/Core/SimpleConstraintManager.cpp
@@ -190,6 +190,42 @@ ProgramStateRef SimpleConstraintManager::assumeAux(ProgramStateRef state,
   } // end switch
 }
 
+ProgramStateRef SimpleConstraintManager::assumeWithinInclusiveRange(
+    ProgramStateRef State, NonLoc Value, const llvm::APSInt &From,
+    const llvm::APSInt &To, bool InRange) {
+
+  assert(From.isUnsigned() == To.isUnsigned() &&
+         From.getBitWidth() == To.getBitWidth() &&
+         "Values should have same types!");
+
+  if (!canReasonAbout(Value)) {
+    // Just add the constraint to the expression without trying to simplify.
+    SymbolRef Sym = Value.getAsSymExpr();
+    assert(Sym);
+    return assumeSymWithinInclusiveRange(State, Sym, From, To, InRange);
+  }
+
+  switch (Value.getSubKind()) {
+  default:
+    llvm_unreachable("'assumeWithinInclusiveRange' is not implemented"
+                     "for this NonLoc");
+
+  case nonloc::LocAsIntegerKind:
+  case nonloc::SymbolValKind: {
+    if (SymbolRef Sym = Value.getAsSymbol())
+      return assumeSymWithinInclusiveRange(State, Sym, From, To, InRange);
+    return State;
+  } // end switch
+
+  case nonloc::ConcreteIntKind: {
+    const llvm::APSInt &IntVal = Value.castAs<nonloc::ConcreteInt>().getValue();
+    bool IsInRange = IntVal >= From && IntVal <= To;
+    bool isFeasible = (IsInRange == InRange);
+    return isFeasible ? State : nullptr;
+  }
+  } // end switch
+}
+
 static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment) {
   // Is it a "($sym+constant1)" expression?
   if (const SymIntExpr *SE = dyn_cast<SymIntExpr>(Sym)) {
@@ -262,6 +298,37 @@ ProgramStateRef SimpleConstraintManager::assumeSymRel(ProgramStateRef state,
   } // end switch
 }
 
+ProgramStateRef
+SimpleConstraintManager::assumeSymWithinInclusiveRange(ProgramStateRef State,
+                                                       SymbolRef Sym,
+                                                       const llvm::APSInt &From,
+                                                       const llvm::APSInt &To,
+                                                       bool InRange) {
+  // Get the type used for calculating wraparound.
+  BasicValueFactory &BVF = getBasicVals();
+  APSIntType WraparoundType = BVF.getAPSIntType(Sym->getType());
+
+  llvm::APSInt Adjustment = WraparoundType.getZeroValue();
+  SymbolRef AdjustedSym = Sym;
+  computeAdjustment(AdjustedSym, Adjustment);
+
+  // Convert the right-hand side integer as necessary.
+  APSIntType ComparisonType = std::max(WraparoundType, APSIntType(From));
+  llvm::APSInt ConvertedFrom = ComparisonType.convert(From);
+  llvm::APSInt ConvertedTo = ComparisonType.convert(To);
+
+  // Prefer unsigned comparisons.
+  if (ComparisonType.getBitWidth() == WraparoundType.getBitWidth() &&
+      ComparisonType.isUnsigned() && !WraparoundType.isUnsigned())
+    Adjustment.setIsSigned(false);
+
+  if (InRange)
+    return assumeSymbolWithinInclusiveRange(State, AdjustedSym, ConvertedFrom,
+                                            ConvertedTo, Adjustment);
+  return assumeSymbolOutOfInclusiveRange(State, AdjustedSym, ConvertedFrom,
+                                         ConvertedTo, Adjustment);
+}
+
 } // end of namespace ento
 
 } // end of namespace clang
diff --git a/lib/StaticAnalyzer/Core/SimpleConstraintManager.h b/lib/StaticAnalyzer/Core/SimpleConstraintManager.h
index 135cd4ef8649..b26bc9486110 100644
--- a/lib/StaticAnalyzer/Core/SimpleConstraintManager.h
+++ b/lib/StaticAnalyzer/Core/SimpleConstraintManager.h
@@ -38,11 +38,24 @@ public:
 
   ProgramStateRef assume(ProgramStateRef state, NonLoc Cond, bool Assumption);
 
+  ProgramStateRef assumeWithinInclusiveRange(ProgramStateRef State,
+                                             NonLoc Value,
+                                             const llvm::APSInt &From,
+                                             const llvm::APSInt &To,
+                                             bool InRange) override;
+
   ProgramStateRef assumeSymRel(ProgramStateRef state,
                               const SymExpr *LHS,
                               BinaryOperator::Opcode op,
                               const llvm::APSInt& Int);
 
+  ProgramStateRef assumeSymWithinInclusiveRange(ProgramStateRef State,
+                                                SymbolRef Sym,
+                                                const llvm::APSInt &From,
+                                                const llvm::APSInt &To,
+                                                bool InRange);
+
+
 protected:
 
   //===------------------------------------------------------------------===//
@@ -75,6 +88,14 @@ protected:
                                      const llvm::APSInt& V,
                                      const llvm::APSInt& Adjustment) = 0;
 
+
+  virtual ProgramStateRef assumeSymbolWithinInclusiveRange(
+      ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
+      const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
+
+  virtual ProgramStateRef assumeSymbolOutOfInclusiveRange(
+      ProgramStateRef state, SymbolRef Sym, const llvm::APSInt &From,
+      const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
   //===------------------------------------------------------------------===//
   // Internal implementation.
   //===------------------------------------------------------------------===//
diff --git a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
index b3cab87c8080..a704ce224554 100644
--- a/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ b/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -638,7 +638,7 @@ SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
     // on the ABI).
     // FIXME: we can probably do a comparison against other MemRegions, though.
     // FIXME: is there a way to tell if two labels refer to the same location?
-    return UnknownVal(); 
+    return UnknownVal();
 
   case loc::ConcreteIntKind: {
     // If one of the operands is a symbol and the other is a constant,
@@ -863,7 +863,7 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
   // Special case: rhs is a zero constant.
   if (rhs.isZeroConstant())
     return lhs;
-  
+
   // We are dealing with pointer arithmetic.
 
   // Handle pointer arithmetic on constant values.
@@ -880,7 +880,7 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
       // Offset the increment by the pointer size.
       llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
       rightI *= Multiplicand;
-      
+
       // Compute the adjusted pointer.
       switch (op) {
         case BO_Add:
@@ -911,8 +911,9 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
       elementType = elemReg->getElementType();
     }
     else if (isa<SubRegion>(region)) {
+      assert(op == BO_Add || op == BO_Sub);
+      index = (op == BO_Add) ? rhs : evalMinus(rhs);
       superR = region;
-      index = rhs;
       if (resultTy->isAnyPointerType())
         elementType = resultTy->getPointeeType();
     }
@@ -922,7 +923,7 @@ SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
                                                        superR, getContext()));
     }
   }
-  return UnknownVal();  
+  return UnknownVal();
 }
 
 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
diff --git a/lib/StaticAnalyzer/Core/Store.cpp b/lib/StaticAnalyzer/Core/Store.cpp
index 99ec1e704340..7cdb55a59782 100644
--- a/lib/StaticAnalyzer/Core/Store.cpp
+++ b/lib/StaticAnalyzer/Core/Store.cpp
@@ -52,7 +52,7 @@ StoreRef StoreManager::BindDefault(Store store, const MemRegion *R, SVal V) {
   return StoreRef(store, *this);
 }
 
-const ElementRegion *StoreManager::GetElementZeroRegion(const MemRegion *R, 
+const ElementRegion *StoreManager::GetElementZeroRegion(const MemRegion *R,
                                                         QualType T) {
   NonLoc idx = svalBuilder.makeZeroArrayIndex();
   assert(!T.isNull());
@@ -366,22 +366,22 @@ SVal StoreManager::evalDynamicCast(SVal Base, QualType TargetType,
 ///  as another region.
 SVal StoreManager::CastRetrievedVal(SVal V, const TypedValueRegion *R,
                                     QualType castTy, bool performTestOnly) {
-  
+
   if (castTy.isNull() || V.isUnknownOrUndef())
     return V;
-  
+
   ASTContext &Ctx = svalBuilder.getContext();
 
-  if (performTestOnly) {  
+  if (performTestOnly) {
     // Automatically translate references to pointers.
     QualType T = R->getValueType();
     if (const ReferenceType *RT = T->getAs<ReferenceType>())
       T = Ctx.getPointerType(RT->getPointeeType());
-    
+
     assert(svalBuilder.getContext().hasSameUnqualifiedType(castTy, T));
     return V;
   }
-  
+
   return svalBuilder.dispatchCast(V, castTy);
 }
 
@@ -424,7 +424,7 @@ SVal StoreManager::getLValueIvar(const ObjCIvarDecl *decl, SVal base) {
   return getLValueFieldOrIvar(decl, base);
 }
 
-SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset, 
+SVal StoreManager::getLValueElement(QualType elementType, NonLoc Offset,
                                     SVal Base) {
 
   // If the base is an unknown or undefined value, just return it back.
diff --git a/lib/StaticAnalyzer/Core/SymbolManager.cpp b/lib/StaticAnalyzer/Core/SymbolManager.cpp
index cca0461a4748..99b2e147cb49 100644
--- a/lib/StaticAnalyzer/Core/SymbolManager.cpp
+++ b/lib/StaticAnalyzer/Core/SymbolManager.cpp
@@ -391,6 +391,18 @@ void SymbolReaper::markLive(SymbolRef sym) {
 
 void SymbolReaper::markLive(const MemRegion *region) {
   RegionRoots.insert(region);
+  markElementIndicesLive(region);
+}
+
+void SymbolReaper::markElementIndicesLive(const MemRegion *region) {
+  for (auto SR = dyn_cast<SubRegion>(region); SR;
+       SR = dyn_cast<SubRegion>(SR->getSuperRegion())) {
+    if (auto ER = dyn_cast<ElementRegion>(SR)) {
+      SVal Idx = ER->getIndex();
+      for (auto SI = Idx.symbol_begin(), SE = Idx.symbol_end(); SI != SE; ++SI)
+        markLive(*SI);
+    }
+  }
 }
 
 void SymbolReaper::markInUse(SymbolRef sym) {
@@ -409,7 +421,7 @@ bool SymbolReaper::maybeDead(SymbolRef sym) {
 bool SymbolReaper::isLiveRegion(const MemRegion *MR) {
   if (RegionRoots.count(MR))
     return true;
-  
+
   MR = MR->getBaseRegion();
 
   if (const SymbolicRegion *SR = dyn_cast<SymbolicRegion>(MR))
@@ -442,9 +454,9 @@ bool SymbolReaper::isLive(SymbolRef sym) {
     markDependentsLive(sym);
     return true;
   }
-  
+
   bool KnownLive;
-  
+
   switch (sym->getKind()) {
   case SymExpr::RegionValueKind:
     KnownLive = isLiveRegion(cast<SymbolRegionValue>(sym)->getRegion());
@@ -525,7 +537,7 @@ bool SymbolReaper::isLive(const VarRegion *VR, bool includeStoreBindings) const{
 
     if (!includeStoreBindings)
       return false;
-    
+
     unsigned &cachedQuery =
       const_cast<SymbolReaper*>(this)->includedRegionCache[VR];
 
@@ -535,16 +547,14 @@ bool SymbolReaper::isLive(const VarRegion *VR, bool includeStoreBindings) const{
 
     // Query the store to see if the region occurs in any live bindings.
     if (Store store = reapedStore.getStore()) {
-      bool hasRegion = 
+      bool hasRegion =
         reapedStore.getStoreManager().includedInBindings(store, VR);
       cachedQuery = hasRegion ? 1 : 2;
       return hasRegion;
     }
-    
+
     return false;
   }
 
   return VarContext->isParentOf(CurrentContext);
 }
-
-SymbolVisitor::~SymbolVisitor() {}
diff --git a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
index c957a654a84c..bf85c4ca0c60 100644
--- a/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
+++ b/lib/StaticAnalyzer/Frontend/AnalysisConsumer.cpp
@@ -14,7 +14,7 @@
 #include "clang/StaticAnalyzer/Frontend/AnalysisConsumer.h"
 #include "ModelInjector.h"
 #include "clang/AST/ASTConsumer.h"
-#include "clang/AST/DataRecursiveASTVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Decl.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclObjC.h"
@@ -141,7 +141,7 @@ public:
 namespace {
 
 class AnalysisConsumer : public AnalysisASTConsumer,
-                         public DataRecursiveASTVisitor<AnalysisConsumer> {
+                         public RecursiveASTVisitor<AnalysisConsumer> {
   enum {
     AM_None = 0,
     AM_Syntax = 0x1,
@@ -168,7 +168,7 @@ public:
   /// The local declaration to all declarations ratio might be very small when
   /// working with a PCH file.
   SetOfDecls LocalTUDecls;
-                           
+
   // Set of PathDiagnosticConsumers.  Owned by AnalysisManager.
   PathDiagnosticConsumers PathConsumers;
 
@@ -364,11 +364,15 @@ public:
     }
     return true;
   }
-  
+
   bool VisitBlockDecl(BlockDecl *BD) {
     if (BD->hasBody()) {
       assert(RecVisitorMode == AM_Syntax || Mgr->shouldInlineCall() == false);
-      HandleCode(BD, RecVisitorMode);
+      // Since we skip function template definitions, we should skip blocks
+      // declared in those functions as well.
+      if (!BD->isDependentContext()) {
+        HandleCode(BD, RecVisitorMode);
+      }
     }
     return true;
   }
@@ -475,7 +479,7 @@ void AnalysisConsumer::HandleDeclsCallGraph(const unsigned LocalTUDeclsSize) {
 
     CallGraphNode *N = *I;
     Decl *D = N->getDecl();
-    
+
     // Skip the abstract root node.
     if (!D)
       continue;
@@ -588,8 +592,8 @@ AnalysisConsumer::getModeForDecl(Decl *D, AnalysisMode Mode) {
   // - Header files: run non-path-sensitive checks only.
   // - System headers: don't run any checks.
   SourceManager &SM = Ctx->getSourceManager();
-  SourceLocation SL = D->hasBody() ? D->getBody()->getLocStart()
-                                     : D->getLocation();
+  const Stmt *Body = D->getBody();
+  SourceLocation SL = Body ? Body->getLocStart() : D->getLocation();
   SL = SM.getExpansionLoc(SL);
 
   if (!Opts->AnalyzeAll && !SM.isWrittenInMainFile(SL)) {
@@ -679,11 +683,11 @@ void AnalysisConsumer::RunPathSensitiveChecks(Decl *D,
   case LangOptions::NonGC:
     ActionExprEngine(D, false, IMode, Visited);
     break;
-  
+
   case LangOptions::GCOnly:
     ActionExprEngine(D, true, IMode, Visited);
     break;
-  
+
   case LangOptions::HybridGC:
     ActionExprEngine(D, false, IMode, Visited);
     ActionExprEngine(D, true, IMode, Visited);
@@ -778,8 +782,9 @@ void UbigraphViz::AddEdge(ExplodedNode *Src, ExplodedNode *Dst) {
        << ", ('arrow','true'), ('oriented', 'true'))\n";
 }
 
-UbigraphViz::UbigraphViz(std::unique_ptr<raw_ostream> Out, StringRef Filename)
-    : Out(std::move(Out)), Filename(Filename), Cntr(0) {
+UbigraphViz::UbigraphViz(std::unique_ptr<raw_ostream> OutStream,
+                         StringRef Filename)
+    : Out(std::move(OutStream)), Filename(Filename), Cntr(0) {
 
   *Out << "('vertex_style_attribute', 0, ('shape', 'icosahedron'))\n";
   *Out << "('vertex_style', 1, 0, ('shape', 'sphere'), ('color', '#ffcc66'),"
diff --git a/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp b/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
index 7fced1e5c71a..75fa4c651ace 100644
--- a/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
+++ b/lib/StaticAnalyzer/Frontend/CheckerRegistration.cpp
@@ -42,7 +42,7 @@ public:
   ClangCheckerRegistry(ArrayRef<std::string> plugins,
                        DiagnosticsEngine *diags = nullptr);
 };
-  
+
 } // end anonymous namespace
 
 ClangCheckerRegistry::ClangCheckerRegistry(ArrayRef<std::string> plugins,
@@ -52,7 +52,12 @@ ClangCheckerRegistry::ClangCheckerRegistry(ArrayRef<std::string> plugins,
   for (ArrayRef<std::string>::iterator i = plugins.begin(), e = plugins.end();
        i != e; ++i) {
     // Get access to the plugin.
-    DynamicLibrary lib = DynamicLibrary::getPermanentLibrary(i->c_str());
+    std::string err;
+    DynamicLibrary lib = DynamicLibrary::getPermanentLibrary(i->c_str(), &err);
+    if (!lib.isValid()) {
+      diags->Report(diag::err_fe_unable_to_load_plugin) << *i << err;
+      continue;
+    }
 
     // See if it's compatible with this build of clang.
     const char *pluginAPIVersion =
@@ -78,10 +83,7 @@ bool ClangCheckerRegistry::isCompatibleAPIVersion(const char *versionString) {
 
   // For now, none of the static analyzer API is considered stable.
   // Versions must match exactly.
-  if (strcmp(versionString, CLANG_ANALYZER_API_VERSION_STRING) == 0)
-    return true;
-
-  return false;
+  return strcmp(versionString, CLANG_ANALYZER_API_VERSION_STRING) == 0;
 }
 
 void ClangCheckerRegistry::warnIncompatible(DiagnosticsEngine *diags,
diff --git a/lib/StaticAnalyzer/Frontend/Makefile b/lib/StaticAnalyzer/Frontend/Makefile
index 2698120d9098..3f15988bfddb 100644
--- a/lib/StaticAnalyzer/Frontend/Makefile
+++ b/lib/StaticAnalyzer/Frontend/Makefile
@@ -1,10 +1,10 @@
 ##===- clang/lib/StaticAnalyzer/Frontend/Makefile ----------*- Makefile -*-===##
-# 
+#
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
 # License. See LICENSE.TXT for details.
-# 
+#
 ##===----------------------------------------------------------------------===##
 #
 # Starting point into the static analyzer land for the driver.
diff --git a/lib/Tooling/ArgumentsAdjusters.cpp b/lib/Tooling/ArgumentsAdjusters.cpp
index 1722ede08a86..2f3d829d7d19 100644
--- a/lib/Tooling/ArgumentsAdjusters.cpp
+++ b/lib/Tooling/ArgumentsAdjusters.cpp
@@ -13,15 +13,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/ArgumentsAdjusters.h"
-#include "clang/Basic/LLVM.h"
-#include "llvm/ADT/StringRef.h"
 
 namespace clang {
 namespace tooling {
 
 /// Add -fsyntax-only option to the commnand line arguments.
 ArgumentsAdjuster getClangSyntaxOnlyAdjuster() {
-  return [](const CommandLineArguments &Args) {
+  return [](const CommandLineArguments &Args, StringRef /*unused*/) {
     CommandLineArguments AdjustedArgs;
     for (size_t i = 0, e = Args.size(); i != e; ++i) {
       StringRef Arg = Args[i];
@@ -36,7 +34,7 @@ ArgumentsAdjuster getClangSyntaxOnlyAdjuster() {
 }
 
 ArgumentsAdjuster getClangStripOutputAdjuster() {
-  return [](const CommandLineArguments &Args) {
+  return [](const CommandLineArguments &Args, StringRef /*unused*/) {
     CommandLineArguments AdjustedArgs;
     for (size_t i = 0, e = Args.size(); i < e; ++i) {
       StringRef Arg = Args[i];
@@ -55,7 +53,7 @@ ArgumentsAdjuster getClangStripOutputAdjuster() {
 
 ArgumentsAdjuster getInsertArgumentAdjuster(const CommandLineArguments &Extra,
                                             ArgumentInsertPosition Pos) {
-  return [Extra, Pos](const CommandLineArguments &Args) {
+  return [Extra, Pos](const CommandLineArguments &Args, StringRef /*unused*/) {
     CommandLineArguments Return(Args);
 
     CommandLineArguments::iterator I;
@@ -78,8 +76,8 @@ ArgumentsAdjuster getInsertArgumentAdjuster(const char *Extra,
 
 ArgumentsAdjuster combineAdjusters(ArgumentsAdjuster First,
                                    ArgumentsAdjuster Second) {
-  return [First, Second](const CommandLineArguments &Args) {
-    return Second(First(Args));
+  return [First, Second](const CommandLineArguments &Args, StringRef File) {
+    return Second(First(Args, File), File);
   };
 }
 
diff --git a/lib/Tooling/CommonOptionsParser.cpp b/lib/Tooling/CommonOptionsParser.cpp
index adae1781f450..82f560140085 100644
--- a/lib/Tooling/CommonOptionsParser.cpp
+++ b/lib/Tooling/CommonOptionsParser.cpp
@@ -86,22 +86,22 @@ private:
   adjustCommands(std::vector<CompileCommand> Commands) const {
     for (CompileCommand &Command : Commands)
       for (const auto &Adjuster : Adjusters)
-        Command.CommandLine = Adjuster(Command.CommandLine);
+        Command.CommandLine = Adjuster(Command.CommandLine, Command.Filename);
     return Commands;
   }
 };
 } // namespace
 
-CommonOptionsParser::CommonOptionsParser(int &argc, const char **argv,
-                                         cl::OptionCategory &Category,
-                                         const char *Overview) {
+CommonOptionsParser::CommonOptionsParser(
+    int &argc, const char **argv, cl::OptionCategory &Category,
+    llvm::cl::NumOccurrencesFlag OccurrencesFlag, const char *Overview) {
   static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
   static cl::opt<std::string> BuildPath("p", cl::desc("Build path"),
                                         cl::Optional, cl::cat(Category));
 
   static cl::list<std::string> SourcePaths(
-      cl::Positional, cl::desc("<source0> [... <sourceN>]"), cl::OneOrMore,
+      cl::Positional, cl::desc("<source0> [... <sourceN>]"), OccurrencesFlag,
       cl::cat(Category));
 
   static cl::list<std::string> ArgsAfter(
@@ -116,10 +116,12 @@ CommonOptionsParser::CommonOptionsParser(int &argc, const char **argv,
 
   cl::HideUnrelatedOptions(Category);
 
-  Compilations.reset(FixedCompilationDatabase::loadFromCommandLine(argc,
-                                                                   argv));
+  Compilations.reset(FixedCompilationDatabase::loadFromCommandLine(argc, argv));
   cl::ParseCommandLineOptions(argc, argv, Overview);
   SourcePathList = SourcePaths;
+  if ((OccurrencesFlag == cl::ZeroOrMore || OccurrencesFlag == cl::Optional) &&
+      SourcePathList.empty())
+    return;
   if (!Compilations) {
     std::string ErrorMessage;
     if (!BuildPath.empty()) {
@@ -129,8 +131,12 @@ CommonOptionsParser::CommonOptionsParser(int &argc, const char **argv,
       Compilations = CompilationDatabase::autoDetectFromSource(SourcePaths[0],
                                                                ErrorMessage);
     }
-    if (!Compilations)
-      llvm::report_fatal_error(ErrorMessage);
+    if (!Compilations) {
+      llvm::errs() << "Error while trying to load a compilation database:\n"
+                   << ErrorMessage << "Running without flags.\n";
+      Compilations.reset(
+          new FixedCompilationDatabase(".", std::vector<std::string>()));
+    }
   }
   auto AdjustingCompilations =
       llvm::make_unique<ArgumentsAdjustingCompilations>(
diff --git a/lib/Tooling/CompilationDatabase.cpp b/lib/Tooling/CompilationDatabase.cpp
index 2272be632b9e..957e40137eac 100644
--- a/lib/Tooling/CompilationDatabase.cpp
+++ b/lib/Tooling/CompilationDatabase.cpp
@@ -299,13 +299,15 @@ FixedCompilationDatabase(Twine Directory, ArrayRef<std::string> CommandLine) {
   std::vector<std::string> ToolCommandLine(1, "clang-tool");
   ToolCommandLine.insert(ToolCommandLine.end(),
                          CommandLine.begin(), CommandLine.end());
-  CompileCommands.emplace_back(Directory, std::move(ToolCommandLine));
+  CompileCommands.emplace_back(Directory, StringRef(),
+                               std::move(ToolCommandLine));
 }
 
 std::vector<CompileCommand>
 FixedCompilationDatabase::getCompileCommands(StringRef FilePath) const {
   std::vector<CompileCommand> Result(CompileCommands);
   Result[0].CommandLine.push_back(FilePath);
+  Result[0].Filename = FilePath;
   return Result;
 }
 
@@ -325,7 +327,7 @@ namespace tooling {
 // This anchor is used to force the linker to link in the generated object file
 // and thus register the JSONCompilationDatabasePlugin.
 extern volatile int JSONAnchorSource;
-static int JSONAnchorDest = JSONAnchorSource;
+static int LLVM_ATTRIBUTE_UNUSED JSONAnchorDest = JSONAnchorSource;
 
 } // end namespace tooling
 } // end namespace clang
diff --git a/lib/Tooling/Core/CMakeLists.txt b/lib/Tooling/Core/CMakeLists.txt
index c8c75f95f3cb..b88e1f8333a2 100644
--- a/lib/Tooling/Core/CMakeLists.txt
+++ b/lib/Tooling/Core/CMakeLists.txt
@@ -1,9 +1,11 @@
 set(LLVM_LINK_COMPONENTS support)
 
 add_clang_library(clangToolingCore
+  Lookup.cpp
   Replacement.cpp
 
   LINK_LIBS
+  clangAST
   clangBasic
   clangLex
   clangRewrite
diff --git a/lib/Tooling/Core/Lookup.cpp b/lib/Tooling/Core/Lookup.cpp
new file mode 100644
index 000000000000..697eeb46ce41
--- /dev/null
+++ b/lib/Tooling/Core/Lookup.cpp
@@ -0,0 +1,113 @@
+//===--- Lookup.cpp - Framework for clang refactoring tools ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines helper methods for clang tools performing name lookup.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Core/Lookup.h"
+#include "clang/AST/Decl.h"
+using namespace clang;
+using namespace clang::tooling;
+
+static bool isInsideDifferentNamespaceWithSameName(const DeclContext *DeclA,
+                                                   const DeclContext *DeclB) {
+  while (true) {
+    // Look past non-namespaces on DeclA.
+    while (DeclA && !isa<NamespaceDecl>(DeclA))
+      DeclA = DeclA->getParent();
+
+    // Look past non-namespaces on DeclB.
+    while (DeclB && !isa<NamespaceDecl>(DeclB))
+      DeclB = DeclB->getParent();
+
+    // We hit the root, no namespace collision.
+    if (!DeclA || !DeclB)
+      return false;
+
+    // Literally the same namespace, not a collision.
+    if (DeclA == DeclB)
+      return false;
+
+    // Now check the names. If they match we have a different namespace with the
+    // same name.
+    if (cast<NamespaceDecl>(DeclA)->getDeclName() ==
+        cast<NamespaceDecl>(DeclB)->getDeclName())
+      return true;
+
+    DeclA = DeclA->getParent();
+    DeclB = DeclB->getParent();
+  }
+}
+
+static StringRef getBestNamespaceSubstr(const DeclContext *DeclA,
+                                        StringRef NewName,
+                                        bool HadLeadingColonColon) {
+  while (true) {
+    while (DeclA && !isa<NamespaceDecl>(DeclA))
+      DeclA = DeclA->getParent();
+
+    // Fully qualified it is! Leave :: in place if it's there already.
+    if (!DeclA)
+      return HadLeadingColonColon ? NewName : NewName.substr(2);
+
+    // Otherwise strip off redundant namespace qualifications from the new name.
+    // We use the fully qualified name of the namespace and remove that part
+    // from NewName if it has an identical prefix.
+    std::string NS =
+        "::" + cast<NamespaceDecl>(DeclA)->getQualifiedNameAsString() + "::";
+    if (NewName.startswith(NS))
+      return NewName.substr(NS.size());
+
+    // No match yet. Strip of a namespace from the end of the chain and try
+    // again. This allows to get optimal qualifications even if the old and new
+    // decl only share common namespaces at a higher level.
+    DeclA = DeclA->getParent();
+  }
+}
+
+/// Check if the name specifier begins with a written "::".
+static bool isFullyQualified(const NestedNameSpecifier *NNS) {
+  while (NNS) {
+    if (NNS->getKind() == NestedNameSpecifier::Global)
+      return true;
+    NNS = NNS->getPrefix();
+  }
+  return false;
+}
+
+std::string tooling::replaceNestedName(const NestedNameSpecifier *Use,
+                                       const DeclContext *UseContext,
+                                       const NamedDecl *FromDecl,
+                                       StringRef ReplacementString) {
+  assert(ReplacementString.startswith("::") &&
+         "Expected fully-qualified name!");
+
+  // We can do a raw name replacement when we are not inside the namespace for
+  // the original function and it is not in the global namespace.  The
+  // assumption is that outside the original namespace we must have a using
+  // statement that makes this work out and that other parts of this refactor
+  // will automatically fix using statements to point to the new function
+  const bool class_name_only = !Use;
+  const bool in_global_namespace =
+      isa<TranslationUnitDecl>(FromDecl->getDeclContext());
+  if (class_name_only && !in_global_namespace &&
+      !isInsideDifferentNamespaceWithSameName(FromDecl->getDeclContext(),
+                                              UseContext)) {
+    auto Pos = ReplacementString.rfind("::");
+    return Pos != StringRef::npos ? ReplacementString.substr(Pos + 2)
+                                  : ReplacementString;
+  }
+  // We did not match this because of a using statement, so we will need to
+  // figure out how good a namespace match we have with our destination type.
+  // We work backwards (from most specific possible namespace to least
+  // specific).
+  return getBestNamespaceSubstr(UseContext, ReplacementString,
+                                isFullyQualified(Use));
+}
diff --git a/lib/Tooling/Core/Replacement.cpp b/lib/Tooling/Core/Replacement.cpp
index 6d37a49db381..47bbdeb470ee 100644
--- a/lib/Tooling/Core/Replacement.cpp
+++ b/lib/Tooling/Core/Replacement.cpp
@@ -113,15 +113,7 @@ void Replacement::setFromSourceLocation(const SourceManager &Sources,
   const std::pair<FileID, unsigned> DecomposedLocation =
       Sources.getDecomposedLoc(Start);
   const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
-  if (Entry) {
-    // Make FilePath absolute so replacements can be applied correctly when
-    // relative paths for files are used.
-    llvm::SmallString<256> FilePath(Entry->getName());
-    std::error_code EC = llvm::sys::fs::make_absolute(FilePath);
-    this->FilePath = EC ? FilePath.c_str() : Entry->getName();
-  } else {
-    this->FilePath = InvalidLocation;
-  }
+  this->FilePath = Entry ? Entry->getName() : InvalidLocation;
   this->ReplacementRange = Range(DecomposedLocation.second, Length);
   this->ReplacementText = ReplacementText;
 }
@@ -151,34 +143,32 @@ void Replacement::setFromSourceRange(const SourceManager &Sources,
                         ReplacementText);
 }
 
-unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) {
-  unsigned NewPosition = Position;
-  for (Replacements::iterator I = Replaces.begin(), E = Replaces.end(); I != E;
-       ++I) {
-    if (I->getOffset() >= Position)
-      break;
-    if (I->getOffset() + I->getLength() > Position)
-      NewPosition += I->getOffset() + I->getLength() - Position;
-    NewPosition += I->getReplacementText().size() - I->getLength();
+template <typename T>
+unsigned shiftedCodePositionInternal(const T &Replaces, unsigned Position) {
+  unsigned Offset = 0;
+  for (const auto& R : Replaces) {
+    if (R.getOffset() + R.getLength() <= Position) {
+      Offset += R.getReplacementText().size() - R.getLength();
+      continue;
+    }
+    if (R.getOffset() < Position &&
+        R.getOffset() + R.getReplacementText().size() <= Position) {
+      Position = R.getOffset() + R.getReplacementText().size() - 1;
+    }
+    break;
   }
-  return NewPosition;
+  return Position + Offset;
+}
+
+unsigned shiftedCodePosition(const Replacements &Replaces, unsigned Position) {
+  return shiftedCodePositionInternal(Replaces, Position);
 }
 
 // FIXME: Remove this function when Replacements is implemented as std::vector
 // instead of std::set.
 unsigned shiftedCodePosition(const std::vector<Replacement> &Replaces,
                              unsigned Position) {
-  unsigned NewPosition = Position;
-  for (std::vector<Replacement>::const_iterator I = Replaces.begin(),
-                                                E = Replaces.end();
-       I != E; ++I) {
-    if (I->getOffset() >= Position)
-      break;
-    if (I->getOffset() + I->getLength() > Position)
-      NewPosition += I->getOffset() + I->getLength() - Position;
-    NewPosition += I->getReplacementText().size() - I->getLength();
-  }
-  return NewPosition;
+  return shiftedCodePositionInternal(Replaces, Position);
 }
 
 void deduplicate(std::vector<Replacement> &Replaces,
@@ -265,19 +255,18 @@ bool applyAllReplacements(const std::vector<Replacement> &Replaces,
 }
 
 std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
-  FileManager Files((FileSystemOptions()));
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
   DiagnosticsEngine Diagnostics(
       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
       new DiagnosticOptions);
   SourceManager SourceMgr(Diagnostics, Files);
   Rewriter Rewrite(SourceMgr, LangOptions());
-  std::unique_ptr<llvm::MemoryBuffer> Buf =
-      llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>");
-  const clang::FileEntry *Entry =
-      Files.getVirtualFile("<stdin>", Buf->getBufferSize(), 0);
-  SourceMgr.overrideFileContents(Entry, std::move(Buf));
-  FileID ID =
-      SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User);
+  InMemoryFileSystem->addFile(
+      "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
+  FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
+                                     clang::SrcMgr::C_User);
   for (Replacements::const_iterator I = Replaces.begin(), E = Replaces.end();
        I != E; ++I) {
     Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
@@ -292,6 +281,139 @@ std::string applyAllReplacements(StringRef Code, const Replacements &Replaces) {
   return Result;
 }
 
+namespace {
+// Represents a merged replacement, i.e. a replacement consisting of multiple
+// overlapping replacements from 'First' and 'Second' in mergeReplacements.
+//
+// Position projection:
+// Offsets and lengths of the replacements can generally refer to two different
+// coordinate spaces. Replacements from 'First' refer to the original text
+// whereas replacements from 'Second' refer to the text after applying 'First'.
+//
+// MergedReplacement always operates in the coordinate space of the original
+// text, i.e. transforms elements from 'Second' to take into account what was
+// changed based on the elements from 'First'.
+//
+// We can correctly calculate this projection as we look at the replacements in
+// order of strictly increasing offsets.
+//
+// Invariants:
+// * We always merge elements from 'First' into elements from 'Second' and vice
+//   versa. Within each set, the replacements are non-overlapping.
+// * We only extend to the right, i.e. merge elements with strictly increasing
+//   offsets.
+class MergedReplacement {
+public:
+  MergedReplacement(const Replacement &R, bool MergeSecond, int D)
+      : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
+        Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
+        Text(R.getReplacementText()) {
+    Delta += MergeSecond ? 0 : Text.size() - Length;
+    DeltaFirst = MergeSecond ? Text.size() - Length : 0;
+  }
+
+  // Merges the next element 'R' into this merged element. As we always merge
+  // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
+  // set the next element is coming from. 
+  void merge(const Replacement &R) {
+    if (MergeSecond) {
+      unsigned REnd = R.getOffset() + Delta + R.getLength();
+      unsigned End = Offset + Text.size();
+      if (REnd > End) {
+        Length += REnd - End;
+        MergeSecond = false;
+      }
+      StringRef TextRef = Text;
+      StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
+      StringRef Tail = TextRef.substr(REnd - Offset);
+      Text = (Head + R.getReplacementText() + Tail).str();
+      Delta += R.getReplacementText().size() - R.getLength();
+    } else {
+      unsigned End = Offset + Length;
+      StringRef RText = R.getReplacementText();
+      StringRef Tail = RText.substr(End - R.getOffset());
+      Text = (Text + Tail).str();
+      if (R.getOffset() + RText.size() > End) {
+        Length = R.getOffset() + R.getLength() - Offset;
+        MergeSecond = true;
+      } else {
+        Length += R.getLength() - RText.size();
+      }
+      DeltaFirst += RText.size() - R.getLength();
+    }
+  }
+
+  // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
+  // doesn't need to be merged.
+  bool endsBefore(const Replacement &R) const {
+    if (MergeSecond)
+      return Offset + Text.size() < R.getOffset() + Delta;
+    return Offset + Length < R.getOffset();
+  }
+
+  // Returns 'true' if an element from the second set should be merged next.
+  bool mergeSecond() const { return MergeSecond; }
+  int deltaFirst() const { return DeltaFirst; }
+  Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
+
+private:
+  bool MergeSecond;
+
+  // Amount of characters that elements from 'Second' need to be shifted by in
+  // order to refer to the original text.
+  int Delta;
+
+  // Sum of all deltas (text-length - length) of elements from 'First' merged
+  // into this element. This is used to update 'Delta' once the
+  // MergedReplacement is completed.
+  int DeltaFirst;
+
+  // Data of the actually merged replacement. FilePath and Offset aren't changed
+  // as the element is only extended to the right.
+  const StringRef FilePath;
+  const unsigned Offset;
+  unsigned Length;
+  std::string Text;
+};
+} // namespace
+
+Replacements mergeReplacements(const Replacements &First,
+                               const Replacements &Second) {
+  if (First.empty() || Second.empty())
+    return First.empty() ? Second : First;
+
+  // Delta is the amount of characters that replacements from 'Second' need to
+  // be shifted so that their offsets refer to the original text.
+  int Delta = 0;
+  Replacements Result;
+
+  // Iterate over both sets and always add the next element (smallest total
+  // Offset) from either 'First' or 'Second'. Merge that element with
+  // subsequent replacements as long as they overlap. See more details in the
+  // comment on MergedReplacement.
+  for (auto FirstI = First.begin(), SecondI = Second.begin();
+       FirstI != First.end() || SecondI != Second.end();) {
+    bool NextIsFirst = SecondI == Second.end() ||
+                       (FirstI != First.end() &&
+                        FirstI->getOffset() < SecondI->getOffset() + Delta);
+    MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
+                             Delta);
+    ++(NextIsFirst ? FirstI : SecondI);
+
+    while ((Merged.mergeSecond() && SecondI != Second.end()) ||
+           (!Merged.mergeSecond() && FirstI != First.end())) {
+      auto &I = Merged.mergeSecond() ? SecondI : FirstI;
+      if (Merged.endsBefore(*I))
+        break;
+      Merged.merge(*I);
+      ++I;
+    }
+    Delta -= Merged.deltaFirst();
+    Result.insert(Merged.asReplacement());
+  }
+  return Result;
+}
+
 } // end namespace tooling
 } // end namespace clang
 
diff --git a/lib/Tooling/JSONCompilationDatabase.cpp b/lib/Tooling/JSONCompilationDatabase.cpp
index 454a2ffd9587..299fbdc149bf 100644
--- a/lib/Tooling/JSONCompilationDatabase.cpp
+++ b/lib/Tooling/JSONCompilationDatabase.cpp
@@ -206,24 +206,33 @@ JSONCompilationDatabase::getAllFiles() const {
 std::vector<CompileCommand>
 JSONCompilationDatabase::getAllCompileCommands() const {
   std::vector<CompileCommand> Commands;
-  for (llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
-        CommandsRefI = IndexByFile.begin(), CommandsRefEnd = IndexByFile.end();
-      CommandsRefI != CommandsRefEnd; ++CommandsRefI) {
-    getCommands(CommandsRefI->getValue(), Commands);
-  }
+  getCommands(AllCommands, Commands);
   return Commands;
 }
 
+static std::vector<std::string>
+nodeToCommandLine(const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
+  SmallString<1024> Storage;
+  if (Nodes.size() == 1) {
+    return unescapeCommandLine(Nodes[0]->getValue(Storage));
+  }
+  std::vector<std::string> Arguments;
+  for (auto *Node : Nodes) {
+    Arguments.push_back(Node->getValue(Storage));
+  }
+  return Arguments;
+}
+
 void JSONCompilationDatabase::getCommands(
-                                  ArrayRef<CompileCommandRef> CommandsRef,
-                                  std::vector<CompileCommand> &Commands) const {
+    ArrayRef<CompileCommandRef> CommandsRef,
+    std::vector<CompileCommand> &Commands) const {
   for (int I = 0, E = CommandsRef.size(); I != E; ++I) {
     SmallString<8> DirectoryStorage;
-    SmallString<1024> CommandStorage;
+    SmallString<32> FilenameStorage;
     Commands.emplace_back(
-        // FIXME: Escape correctly:
-        CommandsRef[I].first->getValue(DirectoryStorage),
-        unescapeCommandLine(CommandsRef[I].second->getValue(CommandStorage)));
+      std::get<0>(CommandsRef[I])->getValue(DirectoryStorage),
+      std::get<1>(CommandsRef[I])->getValue(FilenameStorage),
+      nodeToCommandLine(std::get<2>(CommandsRef[I])));
   }
 }
 
@@ -243,43 +252,56 @@ bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
     ErrorMessage = "Expected array.";
     return false;
   }
-  for (llvm::yaml::SequenceNode::iterator AI = Array->begin(),
-                                          AE = Array->end();
-       AI != AE; ++AI) {
-    llvm::yaml::MappingNode *Object = dyn_cast<llvm::yaml::MappingNode>(&*AI);
+  for (auto& NextObject : *Array) {
+    llvm::yaml::MappingNode *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject);
     if (!Object) {
       ErrorMessage = "Expected object.";
       return false;
     }
     llvm::yaml::ScalarNode *Directory = nullptr;
-    llvm::yaml::ScalarNode *Command = nullptr;
+    llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command;
     llvm::yaml::ScalarNode *File = nullptr;
-    for (llvm::yaml::MappingNode::iterator KVI = Object->begin(),
-                                           KVE = Object->end();
-         KVI != KVE; ++KVI) {
-      llvm::yaml::Node *Value = (*KVI).getValue();
+    for (auto& NextKeyValue : *Object) {
+      llvm::yaml::ScalarNode *KeyString =
+          dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
+      if (!KeyString) {
+        ErrorMessage = "Expected strings as key.";
+        return false;
+      }
+      SmallString<10> KeyStorage;
+      StringRef KeyValue = KeyString->getValue(KeyStorage);
+      llvm::yaml::Node *Value = NextKeyValue.getValue();
       if (!Value) {
         ErrorMessage = "Expected value.";
         return false;
       }
       llvm::yaml::ScalarNode *ValueString =
           dyn_cast<llvm::yaml::ScalarNode>(Value);
-      if (!ValueString) {
+      llvm::yaml::SequenceNode *SequenceString =
+          dyn_cast<llvm::yaml::SequenceNode>(Value);
+      if (KeyValue == "arguments" && !SequenceString) {
+        ErrorMessage = "Expected sequence as value.";
+        return false;
+      } else if (KeyValue != "arguments" && !ValueString) {
         ErrorMessage = "Expected string as value.";
         return false;
       }
-      llvm::yaml::ScalarNode *KeyString =
-          dyn_cast<llvm::yaml::ScalarNode>((*KVI).getKey());
-      if (!KeyString) {
-        ErrorMessage = "Expected strings as key.";
-        return false;
-      }
-      SmallString<8> KeyStorage;
-      if (KeyString->getValue(KeyStorage) == "directory") {
+      if (KeyValue == "directory") {
         Directory = ValueString;
-      } else if (KeyString->getValue(KeyStorage) == "command") {
-        Command = ValueString;
-      } else if (KeyString->getValue(KeyStorage) == "file") {
+      } else if (KeyValue == "arguments") {
+        Command = std::vector<llvm::yaml::ScalarNode *>();
+        for (auto &Argument : *SequenceString) {
+          auto Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument);
+          if (!Scalar) {
+            ErrorMessage = "Only strings are allowed in 'arguments'.";
+            return false;
+          }
+          Command->push_back(Scalar);
+        }
+      } else if (KeyValue == "command") {
+        if (!Command)
+          Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString);
+      } else if (KeyValue == "file") {
         File = ValueString;
       } else {
         ErrorMessage = ("Unknown key: \"" +
@@ -292,7 +314,7 @@ bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
       return false;
     }
     if (!Command) {
-      ErrorMessage = "Missing key: \"command\".";
+      ErrorMessage = "Missing key: \"command\" or \"arguments\".";
       return false;
     }
     if (!Directory) {
@@ -311,8 +333,9 @@ bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
     } else {
       llvm::sys::path::native(FileName, NativeFilePath);
     }
-    IndexByFile[NativeFilePath].push_back(
-        CompileCommandRef(Directory, Command));
+    auto Cmd = CompileCommandRef(Directory, File, *Command);
+    IndexByFile[NativeFilePath].push_back(Cmd);
+    AllCommands.push_back(Cmd);
     MatchTrie.insert(NativeFilePath);
   }
   return true;
diff --git a/lib/Tooling/Tooling.cpp b/lib/Tooling/Tooling.cpp
index f9cb7c641344..fd5596ec2ded 100644
--- a/lib/Tooling/Tooling.cpp
+++ b/lib/Tooling/Tooling.cpp
@@ -17,6 +17,7 @@
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/Tool.h"
+#include "clang/Driver/ToolChain.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -31,13 +32,6 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Support/raw_ostream.h"
 
-// For chdir, see the comment in ClangTool::run for more information.
-#ifdef LLVM_ON_WIN32
-#  include <direct.h>
-#else
-#  include <unistd.h>
-#endif
-
 #define DEBUG_TYPE "clang-tooling"
 
 namespace clang {
@@ -52,10 +46,11 @@ FrontendActionFactory::~FrontendActionFactory() {}
 // it to be based on the same framework.
 
 /// \brief Builds a clang driver initialized for running clang tools.
-static clang::driver::Driver *newDriver(clang::DiagnosticsEngine *Diagnostics,
-                                        const char *BinaryName) {
+static clang::driver::Driver *newDriver(
+    clang::DiagnosticsEngine *Diagnostics, const char *BinaryName,
+    IntrusiveRefCntPtr<vfs::FileSystem> VFS) {
   clang::driver::Driver *CompilerDriver = new clang::driver::Driver(
-      BinaryName, llvm::sys::getDefaultTargetTriple(), *Diagnostics);
+      BinaryName, llvm::sys::getDefaultTargetTriple(), *Diagnostics, VFS);
   CompilerDriver->setTitle("clang_based_tool");
   return CompilerDriver;
 }
@@ -130,18 +125,25 @@ bool runToolOnCodeWithArgs(
 
   SmallString<16> FileNameStorage;
   StringRef FileNameRef = FileName.toNullTerminatedStringRef(FileNameStorage);
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem(
+      new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   llvm::IntrusiveRefCntPtr<FileManager> Files(
-      new FileManager(FileSystemOptions()));
+      new FileManager(FileSystemOptions(), OverlayFileSystem));
   ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef),
                             ToolAction, Files.get(), PCHContainerOps);
 
   SmallString<1024> CodeStorage;
-  Invocation.mapVirtualFile(FileNameRef,
-                            Code.toNullTerminatedStringRef(CodeStorage));
+  InMemoryFileSystem->addFile(FileNameRef, 0,
+                              llvm::MemoryBuffer::getMemBuffer(
+                                  Code.toNullTerminatedStringRef(CodeStorage)));
 
   for (auto &FilenameWithContent : VirtualMappedFiles) {
-    Invocation.mapVirtualFile(FilenameWithContent.first,
-                              FilenameWithContent.second);
+    InMemoryFileSystem->addFile(
+        FilenameWithContent.first, 0,
+        llvm::MemoryBuffer::getMemBuffer(FilenameWithContent.second));
   }
 
   return Invocation.run();
@@ -162,6 +164,31 @@ std::string getAbsolutePath(StringRef File) {
   return AbsolutePath.str();
 }
 
+void addTargetAndModeForProgramName(std::vector<std::string> &CommandLine,
+                                    StringRef InvokedAs) {
+  if (!CommandLine.empty() && !InvokedAs.empty()) {
+    bool AlreadyHasTarget = false;
+    bool AlreadyHasMode = false;
+    // Skip CommandLine[0].
+    for (auto Token = ++CommandLine.begin(); Token != CommandLine.end();
+         ++Token) {
+      StringRef TokenRef(*Token);
+      AlreadyHasTarget |=
+          (TokenRef == "-target" || TokenRef.startswith("-target="));
+      AlreadyHasMode |= (TokenRef == "--driver-mode" ||
+                         TokenRef.startswith("--driver-mode="));
+    }
+    auto TargetMode =
+        clang::driver::ToolChain::getTargetAndModeFromProgramName(InvokedAs);
+    if (!AlreadyHasMode && !TargetMode.second.empty()) {
+      CommandLine.insert(++CommandLine.begin(), TargetMode.second);
+    }
+    if (!AlreadyHasTarget && !TargetMode.first.empty()) {
+      CommandLine.insert(++CommandLine.begin(), {"-target", TargetMode.first});
+    }
+  }
+}
+
 namespace {
 
 class SingleFrontendActionFactory : public FrontendActionFactory {
@@ -212,7 +239,7 @@ bool ToolInvocation::run() {
       DiagConsumer ? DiagConsumer : &DiagnosticPrinter, false);
 
   const std::unique_ptr<clang::driver::Driver> Driver(
-      newDriver(&Diagnostics, BinaryName));
+      newDriver(&Diagnostics, BinaryName, Files->getVirtualFileSystem()));
   // Since the input might only be virtual, don't check whether it exists.
   Driver->setCheckInputsExist(false);
   const std::unique_ptr<clang::driver::Compilation> Compilation(
@@ -224,6 +251,7 @@ bool ToolInvocation::run() {
   }
   std::unique_ptr<clang::CompilerInvocation> Invocation(
       newInvocation(&Diagnostics, *CC1Args));
+  // FIXME: remove this when all users have migrated!
   for (const auto &It : MappedFileContents) {
     // Inject the code as the given file name into the preprocessor options.
     std::unique_ptr<llvm::MemoryBuffer> Input =
@@ -282,7 +310,11 @@ ClangTool::ClangTool(const CompilationDatabase &Compilations,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps)
     : Compilations(Compilations), SourcePaths(SourcePaths),
       PCHContainerOps(PCHContainerOps),
-      Files(new FileManager(FileSystemOptions())), DiagConsumer(nullptr) {
+      OverlayFileSystem(new vfs::OverlayFileSystem(vfs::getRealFileSystem())),
+      InMemoryFileSystem(new vfs::InMemoryFileSystem),
+      Files(new FileManager(FileSystemOptions(), OverlayFileSystem)),
+      DiagConsumer(nullptr) {
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   appendArgumentsAdjuster(getClangStripOutputAdjuster());
   appendArgumentsAdjuster(getClangSyntaxOnlyAdjuster());
 }
@@ -320,6 +352,16 @@ int ClangTool::run(ToolAction *Action) {
   if (std::error_code EC = llvm::sys::fs::current_path(InitialDirectory))
     llvm::report_fatal_error("Cannot detect current path: " +
                              Twine(EC.message()));
+
+  // First insert all absolute paths into the in-memory VFS. These are global
+  // for all compile commands.
+  if (SeenWorkingDirectories.insert("/").second)
+    for (const auto &MappedFile : MappedFileContents)
+      if (llvm::sys::path::is_absolute(MappedFile.first))
+        InMemoryFileSystem->addFile(
+            MappedFile.first, 0,
+            llvm::MemoryBuffer::getMemBuffer(MappedFile.second));
+
   bool ProcessingFailed = false;
   for (const auto &SourcePath : SourcePaths) {
     std::string File(getAbsolutePath(SourcePath));
@@ -350,12 +392,24 @@ int ClangTool::run(ToolAction *Action) {
       // difference for example on network filesystems, where symlinks might be
       // switched during runtime of the tool. Fixing this depends on having a
       // file system abstraction that allows openat() style interactions.
-      if (chdir(CompileCommand.Directory.c_str()))
+      if (OverlayFileSystem->setCurrentWorkingDirectory(
+              CompileCommand.Directory))
         llvm::report_fatal_error("Cannot chdir into \"" +
                                  Twine(CompileCommand.Directory) + "\n!");
+
+      // Now fill the in-memory VFS with the relative file mappings so it will
+      // have the correct relative paths. We never remove mappings but that
+      // should be fine.
+      if (SeenWorkingDirectories.insert(CompileCommand.Directory).second)
+        for (const auto &MappedFile : MappedFileContents)
+          if (!llvm::sys::path::is_absolute(MappedFile.first))
+            InMemoryFileSystem->addFile(
+                MappedFile.first, 0,
+                llvm::MemoryBuffer::getMemBuffer(MappedFile.second));
+
       std::vector<std::string> CommandLine = CompileCommand.CommandLine;
       if (ArgsAdjuster)
-        CommandLine = ArgsAdjuster(CommandLine);
+        CommandLine = ArgsAdjuster(CommandLine, CompileCommand.Filename);
       assert(!CommandLine.empty());
       CommandLine[0] = MainExecutable;
       // FIXME: We need a callback mechanism for the tool writer to output a
@@ -364,8 +418,7 @@ int ClangTool::run(ToolAction *Action) {
       ToolInvocation Invocation(std::move(CommandLine), Action, Files.get(),
                                 PCHContainerOps);
       Invocation.setDiagnosticConsumer(DiagConsumer);
-      for (const auto &MappedFile : MappedFileContents)
-        Invocation.mapVirtualFile(MappedFile.first, MappedFile.second);
+
       if (!Invocation.run()) {
         // FIXME: Diagnostics should be used instead.
         llvm::errs() << "Error while processing " << File << ".\n";
@@ -373,7 +426,7 @@ int ClangTool::run(ToolAction *Action) {
       }
       // Return to the initial directory to correctly resolve next file by
       // relative path.
-      if (chdir(InitialDirectory.c_str()))
+      if (OverlayFileSystem->setCurrentWorkingDirectory(InitialDirectory.c_str()))
         llvm::report_fatal_error("Cannot chdir into \"" +
                                  Twine(InitialDirectory) + "\n!");
     }
@@ -392,12 +445,12 @@ public:
   bool runInvocation(CompilerInvocation *Invocation, FileManager *Files,
                      std::shared_ptr<PCHContainerOperations> PCHContainerOps,
                      DiagnosticConsumer *DiagConsumer) override {
-    // FIXME: This should use the provided FileManager.
     std::unique_ptr<ASTUnit> AST = ASTUnit::LoadFromCompilerInvocation(
         Invocation, PCHContainerOps,
         CompilerInstance::createDiagnostics(&Invocation->getDiagnosticOpts(),
                                             DiagConsumer,
-                                            /*ShouldOwnClient=*/false));
+                                            /*ShouldOwnClient=*/false),
+        Files);
     if (!AST)
       return false;
 
@@ -429,12 +482,20 @@ std::unique_ptr<ASTUnit> buildASTFromCodeWithArgs(
 
   std::vector<std::unique_ptr<ASTUnit>> ASTs;
   ASTBuilderAction Action(ASTs);
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem(
+      new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
+  llvm::IntrusiveRefCntPtr<FileManager> Files(
+      new FileManager(FileSystemOptions(), OverlayFileSystem));
   ToolInvocation Invocation(getSyntaxOnlyToolArgs(Args, FileNameRef), &Action,
-                            nullptr, PCHContainerOps);
+                            Files.get(), PCHContainerOps);
 
   SmallString<1024> CodeStorage;
-  Invocation.mapVirtualFile(FileNameRef,
-                            Code.toNullTerminatedStringRef(CodeStorage));
+  InMemoryFileSystem->addFile(FileNameRef, 0,
+                              llvm::MemoryBuffer::getMemBuffer(
+                                  Code.toNullTerminatedStringRef(CodeStorage)));
   if (!Invocation.run())
     return nullptr;
 
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index ad383f6cf7e0..4775b0d25da1 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -24,52 +24,78 @@ endfunction()
 
 set(COMPILER_RT_SRC_ROOT ${LLVM_MAIN_SRC_DIR}/projects/compiler-rt)
 if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
-  if(CMAKE_GENERATOR MATCHES "Ninja")
-    message(FATAL_ERROR
-            "Ninja generator can't build compiler-rt as ExternalProject."
-            "Unset LLVM_BUILD_EXTERNAL_COMPILER_RT, or don't use Ninja."
-            "See http://www.cmake.org/Bug/view.php?id=14771")
+  if(CMAKE_VERSION VERSION_GREATER 3.3.20150708)
+    set(cmake_3_4_USES_TERMINAL_OPTIONS
+      USES_TERMINAL_CONFIGURE 1
+      USES_TERMINAL_BUILD 1
+      USES_TERMINAL_INSTALL 1
+      )
   endif()
 
   # Add compiler-rt as an external project.
   set(COMPILER_RT_PREFIX ${CMAKE_BINARY_DIR}/projects/compiler-rt)
-  
+
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/compiler-rt-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/compiler-rt-bins/)
+
+  add_custom_target(compiler-rt-clear
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR}
+    COMMENT "Clobberring compiler-rt build and stamp directories"
+    )
+
+  # Find all variables that start with COMPILER_RT and populate a variable with
+  # them.
+  get_cmake_property(variableNames VARIABLES)
+  foreach(variableName ${variableNames})
+    if(variableName MATCHES "^COMPILER_RT")
+      string(REPLACE ";" "\;" value "${${variableName}}")
+      list(APPEND COMPILER_RT_PASSTHROUGH_VARIABLES
+        -D${variableName}=${value})
+    endif()
+  endforeach()
+
   ExternalProject_Add(compiler-rt
+    DEPENDS llvm-config clang
     PREFIX ${COMPILER_RT_PREFIX}
     SOURCE_DIR ${COMPILER_RT_SRC_ROOT}
-    CMAKE_ARGS -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
+    STAMP_DIR ${STAMP_DIR}
+    BINARY_DIR ${BINARY_DIR}
+    CMAKE_ARGS ${CLANG_COMPILER_RT_CMAKE_ARGS}
+               -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
                -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++
-               -DCMAKE_BUILD_TYPE=Release
+               -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+               -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
                -DLLVM_CONFIG_PATH=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-config
                -DCOMPILER_RT_OUTPUT_DIR=${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}
                -DCOMPILER_RT_EXEC_OUTPUT_DIR=${LLVM_RUNTIME_OUTPUT_INTDIR}
-               -DCOMPILER_RT_INSTALL_PATH=lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}
+               -DCOMPILER_RT_INSTALL_PATH:STRING=lib${LLVM_LIBDIR_SUFFIX}/clang/${CLANG_VERSION}
                -DCOMPILER_RT_INCLUDE_TESTS=${LLVM_INCLUDE_TESTS}
-               -DCOMPILER_RT_ENABLE_WERROR=ON
+               -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
+               ${COMPILER_RT_PASSTHROUGH_VARIABLES}
     INSTALL_COMMAND ""
     STEP_TARGETS configure build
-    )
-  # Due to a bug, DEPENDS in ExternalProject_Add doesn't work
-  # in CMake 2.8.9 and 2.8.10.
-  add_dependencies(compiler-rt llvm-config clang)
-
-  # Add a custom step to always re-configure compiler-rt (in case some of its
-  # sources have changed).
-  ExternalProject_Add_Step(compiler-rt force-reconfigure
-    DEPENDERS configure
-    ALWAYS 1
+    ${cmake_3_4_USES_TERMINAL_OPTIONS}
     )
 
-  ExternalProject_Add_Step(compiler-rt clobber
-    COMMAND ${CMAKE_COMMAND} -E remove_directory <BINARY_DIR>
-    COMMAND ${CMAKE_COMMAND} -E make_directory <BINARY_DIR>
-    COMMENT "Clobberring compiler-rt build directory..."
-    DEPENDERS configure
-    DEPENDS ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
-  )
+  get_ext_project_build_command(run_clean_compiler_rt clean)
+  ExternalProject_Add_Step(compiler-rt clean
+    COMMAND ${run_clean_compiler_rt}
+    COMMENT "Cleaning compiler-rt..."
+    DEPENDEES configure
+    DEPENDERS build
+    DEPENDS clang
+    WORKING_DIRECTORY ${BINARY_DIR}
+    )
 
-  ExternalProject_Get_Property(compiler-rt BINARY_DIR)
-  set(COMPILER_RT_BINARY_DIR ${BINARY_DIR})
+  install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)"
+    COMPONENT compiler-rt)
+
+  add_custom_target(install-compiler-rt
+                    DEPENDS compiler-rt
+                    COMMAND "${CMAKE_COMMAND}"
+                             -DCMAKE_INSTALL_COMPONENT=compiler-rt
+                             -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
 
   # Add top-level targets that build specific compiler-rt runtimes.
   set(COMPILER_RT_RUNTIMES asan builtins dfsan lsan msan profile tsan ubsan)
@@ -78,31 +104,33 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
     add_custom_target(${runtime}
       COMMAND ${build_runtime_cmd}
       DEPENDS compiler-rt-configure
-      WORKING_DIRECTORY ${COMPILER_RT_BINARY_DIR}
+      WORKING_DIRECTORY ${BINARY_DIR}
       VERBATIM)
   endforeach()
 
-  # Add binaries that compiler-rt tests depend on.
-  set(COMPILER_RT_TEST_DEPENDENCIES
-    FileCheck count not llvm-nm llvm-symbolizer)
+  if(LLVM_INCLUDE_TESTS)
+    # Add binaries that compiler-rt tests depend on.
+    set(COMPILER_RT_TEST_DEPENDENCIES
+      FileCheck count not llvm-nm llvm-objdump llvm-symbolizer)
 
-  # Add top-level targets for various compiler-rt test suites.
-  set(COMPILER_RT_TEST_SUITES check-asan check-asan-dynamic check-dfsan
-    check-lsan check-msan check-sanitizer check-tsan check-ubsan)
-  foreach(test_suite ${COMPILER_RT_TEST_SUITES})
-    get_ext_project_build_command(run_test_suite ${test_suite})
-    add_custom_target(${test_suite}
-      COMMAND ${run_test_suite}
+    # Add top-level targets for various compiler-rt test suites.
+    set(COMPILER_RT_TEST_SUITES check-asan check-asan-dynamic check-dfsan
+      check-lsan check-msan check-sanitizer check-tsan check-ubsan)
+    foreach(test_suite ${COMPILER_RT_TEST_SUITES})
+      get_ext_project_build_command(run_test_suite ${test_suite})
+      add_custom_target(${test_suite}
+        COMMAND ${run_test_suite}
+        DEPENDS compiler-rt-build ${COMPILER_RT_TEST_DEPENDENCIES}
+        WORKING_DIRECTORY ${BINARY_DIR}
+        VERBATIM)
+    endforeach()
+
+    # Add special target to run all compiler-rt test suites.
+    get_ext_project_build_command(run_check_compiler_rt check-all)
+    add_custom_target(check-compiler-rt
+      COMMAND ${run_check_compiler_rt}
       DEPENDS compiler-rt-build ${COMPILER_RT_TEST_DEPENDENCIES}
-      WORKING_DIRECTORY ${COMPILER_RT_BINARY_DIR}
+      WORKING_DIRECTORY ${BINARY_DIR}
       VERBATIM)
-  endforeach()
-
-  # Add special target to run all compiler-rt test suites.
-  get_ext_project_build_command(run_check_compiler_rt check-all)
-  add_custom_target(check-compiler-rt
-    COMMAND ${run_check_compiler_rt}
-    DEPENDS compiler-rt-build ${COMPILER_RT_TEST_DEPENDENCIES}
-    WORKING_DIRECTORY ${COMPILER_RT_BINARY_DIR}
-    VERBATIM)
+  endif()
 endif()
diff --git a/runtime/compiler-rt/Makefile b/runtime/compiler-rt/Makefile
index 997643857d98..55642a6f0cc5 100644
--- a/runtime/compiler-rt/Makefile
+++ b/runtime/compiler-rt/Makefile
@@ -89,7 +89,9 @@ RuntimeLibrary.darwin.Configs += ios.a profile_ios.a
 endif
 
 ifneq ($(IOS_SDK),)
-RuntimeLibrary.darwin.Configs += cc_kext_ios5.a
+ifneq (,$(filter ARM AARCH64,$(TARGETS_TO_BUILD)))
+RuntimeLibrary.darwin.Configs += cc_kext_ios.a
+endif
 endif
 
 ifneq ($(IOSSIM_SDK),)
diff --git a/test/ARCMT/GC-no-arc-runtime.m b/test/ARCMT/GC-no-arc-runtime.m
index 376134e73f21..99ba2eb5f7ae 100644
--- a/test/ARCMT/GC-no-arc-runtime.m
+++ b/test/ARCMT/GC-no-arc-runtime.m
@@ -4,6 +4,9 @@
 // RUN: arcmt-test --args -triple x86_64-apple-macosx10.6 -fsyntax-only -fobjc-gc-only -x objective-c++ %s > %t
 // RUN: diff %t %s.result
 
+// MRC __weak broke this test somehow.
+// XFAIL: *
+
 #include "Common.h"
 #include "GC.h"
 
diff --git a/test/ARCMT/checking.m b/test/ARCMT/checking.m
index 6a7cf76c3865..0ce894cb42a9 100644
--- a/test/ARCMT/checking.m
+++ b/test/ARCMT/checking.m
@@ -44,9 +44,9 @@ struct UnsafeS {
 };
 
 @interface A : NSObject
-- (id)retain; // expected-note {{'retain' has been explicitly marked unavailable here}}
-- (id)retainCount; // expected-note {{'retainCount' has been explicitly marked unavailable here}}
-- (id)autorelease; // expected-note 2 {{'autorelease' has been explicitly marked unavailable here}}
+- (id)retain __attribute__((unavailable)); // expected-note {{'retain' has been explicitly marked unavailable here}}
+- (id)retainCount __attribute__((unavailable)); // expected-note {{'retainCount' has been explicitly marked unavailable here}}
+- (id)autorelease __attribute__((unavailable)); // expected-note 2 {{'autorelease' has been explicitly marked unavailable here}}
 - (id)init;
 - (oneway void)release;
 - (void)dealloc;
@@ -180,7 +180,7 @@ void test6(unsigned cond) {
   switch (cond) {
   case 0:
     ;
-    id x; // expected-note {{jump bypasses initialization of retaining variable}}
+    id x; // expected-note {{jump bypasses initialization of __strong variable}}
 
   case 1: // expected-error {{cannot jump}}
     x = 0;
diff --git a/test/ARCMT/objcmt-subscripting-literals.m b/test/ARCMT/objcmt-subscripting-literals.m
index 014c1092999a..0974c3b8bb0f 100644
--- a/test/ARCMT/objcmt-subscripting-literals.m
+++ b/test/ARCMT/objcmt-subscripting-literals.m
@@ -88,6 +88,7 @@ typedef const struct __CFString * CFStringRef;
 #define M(x) (x)
 #define PAIR(x) @#x, [NSNumber numberWithInt:(x)]
 #define TWO(x) ((x), (x))
+#define TWO_SEP(x,y) ((x), (y))
 
 @interface I {
   NSArray *ivarArr;
@@ -118,6 +119,7 @@ typedef const struct __CFString * CFStringRef;
   id o = [arr objectAtIndex:2];
   o = [dict objectForKey:@"key"];
   o = TWO([dict objectForKey:@"key"]);
+  o = TWO_SEP([dict objectForKey:@"key"], [arr objectAtIndex:2]);
   o = [NSDictionary dictionaryWithObject:[NSDictionary dictionary] forKey:@"key"];
   NSMutableArray *marr = 0;
   NSMutableDictionary *mdict = 0;
diff --git a/test/ARCMT/objcmt-subscripting-literals.m.result b/test/ARCMT/objcmt-subscripting-literals.m.result
index e9ff8df34dc6..ed7879bb139e 100644
--- a/test/ARCMT/objcmt-subscripting-literals.m.result
+++ b/test/ARCMT/objcmt-subscripting-literals.m.result
@@ -88,6 +88,7 @@ typedef const struct __CFString * CFStringRef;
 #define M(x) (x)
 #define PAIR(x) @#x, [NSNumber numberWithInt:(x)]
 #define TWO(x) ((x), (x))
+#define TWO_SEP(x,y) ((x), (y))
 
 @interface I {
   NSArray *ivarArr;
@@ -118,6 +119,7 @@ typedef const struct __CFString * CFStringRef;
   id o = arr[2];
   o = dict[@"key"];
   o = TWO(dict[@"key"]);
+  o = TWO_SEP(dict[@"key"], arr[2]);
   o = @{@"key": @{}};
   NSMutableArray *marr = 0;
   NSMutableDictionary *mdict = 0;
diff --git a/test/ASTMerge/codegen-exprs.c b/test/ASTMerge/codegen-exprs.c
index 6c4a575d6096..b5069f993be5 100644
--- a/test/ASTMerge/codegen-exprs.c
+++ b/test/ASTMerge/codegen-exprs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-pch -o %t.1.ast %S/Inputs/exprs1.c
-// RUN: %clang_cc1 -emit-pch -o %t.2.ast %S/Inputs/exprs2.c
-// RUN: %clang_cc1 -emit-obj -o /dev/null -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-pch -o %t.1.ast %S/Inputs/exprs1.c
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-pch -o %t.2.ast %S/Inputs/exprs2.c
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-obj -o /dev/null -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only -verify %s
 // expected-no-diagnostics
 
diff --git a/test/ASTMerge/exprs.c b/test/ASTMerge/exprs.c
index c82e6831f5c6..7495bb6a874a 100644
--- a/test/ASTMerge/exprs.c
+++ b/test/ASTMerge/exprs.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-pch -o %t.1.ast %S/Inputs/exprs1.c
-// RUN: %clang_cc1 -emit-pch -o %t.2.ast %S/Inputs/exprs2.c
-// RUN: %clang_cc1 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-pch -o %t.1.ast %S/Inputs/exprs1.c
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-pch -o %t.2.ast %S/Inputs/exprs2.c
+// RUN: %clang_cc1 -triple %itanium_abi_triple -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only -verify %s
 // expected-no-diagnostics
 
diff --git a/test/ASTMerge/function.c b/test/ASTMerge/function.c
index 89e1c699c3aa..650f719d1fa8 100644
--- a/test/ASTMerge/function.c
+++ b/test/ASTMerge/function.c
@@ -3,7 +3,7 @@
 // RUN: not %clang_cc1 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only %s 2>&1 | FileCheck %s
 // RUN: %clang_cc1 -ast-merge %t.1.ast -ast-merge %t.2.ast -fsyntax-only -verify %s
 
-// CHECK: function2.c:3:6: error: external function 'f1' declared with incompatible types in different translation units ('void (Int, double)' vs. 'void (int, float)')
+// CHECK: function2.c:3:6: error: external function 'f1' declared with incompatible types in different translation units ('void (Int, double)' (aka 'void (int, double)') vs. 'void (int, float)')
 // CHECK: function1.c:2:6: note: declared here with type 'void (int, float)'
 // CHECK: function2.c:5:6: error: external function 'f3' declared with incompatible types in different translation units ('void (int)' vs. 'void (void)')
 // CHECK: function1.c:4:6: note: declared here with type 'void (void)'
diff --git a/test/Analysis/DynamicTypePropagation.m b/test/Analysis/DynamicTypePropagation.m
new file mode 100644
index 000000000000..79ef37c794d0
--- /dev/null
+++ b/test/Analysis/DynamicTypePropagation.m
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.ObjCGenerics -verify %s
+
+#if !__has_feature(objc_generics)
+#  error Compiler does not support Objective-C generics?
+#endif
+
+#define nil 0
+typedef unsigned long NSUInteger;
+typedef int BOOL;
+
+@protocol NSCopying
+@end
+
+__attribute__((objc_root_class))
+@interface NSObject
+- (void) myFunction:(int*)p myParam:(int) n;
+@end
+
+@interface MyType : NSObject <NSCopying>
+- (void) myFunction:(int*)p myParam:(int) n;
+@end
+
+@interface NSArray<ObjectType> : NSObject
+- (BOOL)contains:(ObjectType)obj;
+- (ObjectType)getObjAtIndex:(NSUInteger)idx;
+- (ObjectType)objectAtIndexedSubscript:(NSUInteger)idx;
+@property(readonly) ObjectType firstObject;
+@end
+
+@implementation NSObject
+- (void) myFunction:(int*)p myParam:(int) n {
+  (void)*p;// no warning
+}
+@end
+
+@implementation MyType
+- (void) myFunction:(int*)p myParam:(int) n {
+  int i = 5/n;  // expected-warning {{}}
+  (void)i;
+}
+@end
+
+void testReturnType(NSArray<MyType *> *arr) {
+  NSArray *erased = arr;
+  NSObject *element = [erased firstObject];
+  // TODO: myFunction currently dispatches to NSObject. Make it dispatch to
+  // MyType instead!
+  [element myFunction:0 myParam:0 ];
+}
+
+void testArgument(NSArray<MyType *> *arr, id element) {
+  NSArray *erased = arr;
+  [erased contains: element];
+  // TODO: myFunction currently is not dispatched to MyType. Make it dispatch to
+  // MyType!
+  [element myFunction:0 myParam:0 ];
+}
diff --git a/test/Analysis/Inputs/system-header-simulator.h b/test/Analysis/Inputs/system-header-simulator.h
index 8b8c9c4fe17b..889e23398f03 100644
--- a/test/Analysis/Inputs/system-header-simulator.h
+++ b/test/Analysis/Inputs/system-header-simulator.h
@@ -82,7 +82,23 @@ void xpc_connection_resume(xpc_connection_t connection);
 void fakeSystemHeaderCallInt(int *);
 void fakeSystemHeaderCallIntPtr(int **);
 
+// Some data strauctures may hold onto the pointer and free it later.
+void fake_insque(void *, void *);
+typedef struct fake_rb_tree { void *opaque[8]; } fake_rb_tree_t;
+void fake_rb_tree_init(fake_rb_tree_t *, const void *);
+void *fake_rb_tree_insert_node(fake_rb_tree_t *, void *);
+
 typedef struct __SomeStruct {
   char * p;
 } SomeStruct;
 void fakeSystemHeaderCall(SomeStruct *);
+
+typedef int pid_t;
+pid_t fork(void);
+pid_t vfork(void);
+int execl(const char *path, const char *arg, ...);
+
+void exit(int status) __attribute__ ((__noreturn__));
+void _exit(int status) __attribute__ ((__noreturn__));
+void _Exit(int status) __attribute__ ((__noreturn__));
+
diff --git a/test/Analysis/MismatchedDeallocator-path-notes.cpp b/test/Analysis/MismatchedDeallocator-path-notes.cpp
index af24197f13e8..1c8c80cf21b9 100644
--- a/test/Analysis/MismatchedDeallocator-path-notes.cpp
+++ b/test/Analysis/MismatchedDeallocator-path-notes.cpp
@@ -289,10 +289,12 @@ void test() {
 // CHECK-NEXT:   <key>description</key><string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos;</string>
 // CHECK-NEXT:   <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:   <key>type</key><string>Bad deallocator</string>
-// CHECK-NEXT:   <key>check_name</key><string>unix.MismatchedDeallocator</string> 
+// CHECK-NEXT:   <key>check_name</key><string>unix.MismatchedDeallocator</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>2009ca1da75217064b6052ba7465dff9</string>
 // CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:  <key>issue_context</key><string>test</string>
-// CHECK-NEXT:  <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:  <key>location</key>
 // CHECK-NEXT:  <dict>
 // CHECK-NEXT:   <key>line</key><integer>13</integer>
diff --git a/test/Analysis/NSContainers.m b/test/Analysis/NSContainers.m
index 402ce2c90a17..c868459999da 100644
--- a/test/Analysis/NSContainers.m
+++ b/test/Analysis/NSContainers.m
@@ -24,6 +24,8 @@ typedef struct _NSZone NSZone;
 @interface NSObject <NSObject> {}
 - (id)init;
 + (id)alloc;
+
+- (id)mutableCopy;
 @end
 
 typedef struct {
@@ -153,13 +155,12 @@ void testNilArgNSMutableDictionary3(NSMutableDictionary *d) {
 }
 
 void testNilArgNSMutableDictionary5(NSMutableDictionary *d, NSString* key) {
-  d[key] = 0; // expected-warning {{Value stored into 'NSMutableDictionary' cannot be nil}}
+  d[key] = 0; // no-warning - removing the mapping for the given key
 }
 void testNilArgNSMutableDictionary6(NSMutableDictionary *d, NSString *key) {
   if (key)
     ;
   d[key] = 0; // expected-warning {{'NSMutableDictionary' key cannot be nil}}
-  // expected-warning@-1 {{Value stored into 'NSMutableDictionary' cannot be nil}}
 }
 
 NSDictionary *testNilArgNSDictionary1(NSString* key) {
@@ -292,3 +293,20 @@ void testArrayCategory(NSMutableArray *arr) {
   [arr addObject:0 safe:1]; // no-warning
 }
 
+@interface MyView : NSObject
+-(NSArray *)subviews;
+@end
+
+void testNoReportWhenReceiverNil(NSMutableArray *array, int b) {
+  // Don't warn about adding nil to a container when the receiver is also
+  // definitely nil.
+  if (array == 0) {
+    [array addObject:0]; // no-warning
+  }
+
+  MyView *view = b ? [[MyView alloc] init] : 0;
+  NSMutableArray *subviews = [[view subviews] mutableCopy];
+  // When view is nil, subviews is also nil so there should be no warning
+  // here either.
+  [subviews addObject:view]; // no-warning
+}
diff --git a/test/Analysis/NewDelete-path-notes.cpp b/test/Analysis/NewDelete-path-notes.cpp
index f55df0068d30..64b15b88dfe8 100644
--- a/test/Analysis/NewDelete-path-notes.cpp
+++ b/test/Analysis/NewDelete-path-notes.cpp
@@ -260,9 +260,11 @@ void test(Odd *odd) {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Double free</string>
 // CHECK-NEXT:    <key>check_name</key><string>cplusplus.NewDelete</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bd8e324d09c70b9e2be6f824a4942e5a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>13</integer>
@@ -476,9 +478,11 @@ void test(Odd *odd) {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Double free</string>
 // CHECK-NEXT:    <key>check_name</key><string>cplusplus.NewDelete</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8bf1a5b9fdae9d86780aa6c4cdce2605</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>26</integer>
diff --git a/test/Analysis/NoReturn.m b/test/Analysis/NoReturn.m
index 9b7011e79318..5ed92dfe5d4e 100644
--- a/test/Analysis/NoReturn.m
+++ b/test/Analysis/NoReturn.m
@@ -131,3 +131,15 @@ void test_wassert() {
   int *p = 0;
   *p = 0xDEADBEEF; // no-warning
 }
+#undef assert
+
+// Test that hard-coded Android __assert2 name is recognized as a noreturn
+#define assert(_Expression) ((_Expression) ? (void)0 : __assert2(0, 0, 0, 0));
+extern void __assert2(const char *, int, const char *, const char *);
+extern void _wassert(const char * _Message, const char *_File, unsigned _Line);
+void test___assert2() {
+  assert(0);
+  int *p = 0;
+  *p = 0xDEADBEEF; // no-warning
+}
+#undef assert
diff --git a/test/Analysis/ObjCRetSigs.m b/test/Analysis/ObjCRetSigs.m
index b5a3e7cdbf12..6ee83ec4d105 100644
--- a/test/Analysis/ObjCRetSigs.m
+++ b/test/Analysis/ObjCRetSigs.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.core -analyzer-checker=osx.cocoa.IncompatibleMethodTypes -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-checker=osx.cocoa.IncompatibleMethodTypes -verify -Wno-objc-root-class %s
 
 int printf(const char *, ...);
 
diff --git a/test/Analysis/PR24184.cpp b/test/Analysis/PR24184.cpp
new file mode 100644
index 000000000000..db0df6f9c39f
--- /dev/null
+++ b/test/Analysis/PR24184.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -w -analyze -analyzer-eagerly-assume -fcxx-exceptions -analyzer-checker=core -analyzer-checker=alpha.core.PointerArithm,alpha.core.CastToStruct -analyzer-max-loop 64 -verify %s
+// RUN: %clang_cc1 -w -analyze -analyzer-checker=core -analyzer-checker=cplusplus -fcxx-exceptions -analyzer-checker alpha.core.PointerArithm,alpha.core.CastToStruct -analyzer-max-loop 63 -verify %s
+
+// These tests used to hit an assertion in the bug report. Test case from http://llvm.org/PR24184.
+typedef struct {
+  int cbData;
+  unsigned pbData;
+} CRYPT_DATA_BLOB;
+
+typedef enum { DT_NONCE_FIXED } DATA_TYPE;
+int a;
+typedef int *vcreate_t(int *, DATA_TYPE, int, int);
+void fn1(unsigned, unsigned) {
+  char b = 0;
+  for (; 1; a++, &b + a * 0) // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
+    ;
+}
+
+vcreate_t fn2;
+struct A {
+  CRYPT_DATA_BLOB value;
+  int m_fn1() {
+    int c;
+    value.pbData == 0;
+    fn1(0, 0);
+  }
+};
+struct B {
+  A IkeHashAlg;
+  A IkeGType;
+  A NoncePhase1_r;
+};
+class C {
+  int m_fn2(B *);
+  void m_fn3(B *, int, int, int);
+};
+int C::m_fn2(B *p1) {
+  int *d;
+  int e = p1->IkeHashAlg.m_fn1();
+  unsigned f = p1->IkeGType.m_fn1(), h;
+  int g;
+  d = fn2(0, DT_NONCE_FIXED, (char)0, p1->NoncePhase1_r.value.cbData);
+  h = 0 | 0;
+  m_fn3(p1, 0, 0, 0);
+}
+
+// case 2:
+typedef struct {
+  int cbData;
+  unsigned char *pbData;
+} CRYPT_DATA_BLOB_1;
+typedef unsigned uint32_t;
+void fn1_1(void *p1, const void *p2) { p1 != p2; }
+
+void fn2_1(uint32_t *p1, unsigned char *p2, uint32_t p3) {
+  unsigned i = 0;
+  for (0; i < p3; i++)
+    fn1_1(p1 + i, p2 + i * 0);    // expected-warning{{Pointer arithmetic done on non-array variables means reliance on memory layout, which is dangerous}}
+}
+
+struct A_1 {
+  CRYPT_DATA_BLOB_1 value;
+  uint32_t m_fn1() {
+    uint32_t a;
+    if (value.pbData)
+      fn2_1(&a, value.pbData, value.cbData);
+    return 0;
+  }
+};
+struct {
+  A_1 HashAlgId;
+} *b;
+void fn3() {
+  uint32_t c, d;
+  d = b->HashAlgId.m_fn1();
+  d << 0 | 0 | 0;
+  c = 0;
+  0 | 1 << 0 | 0 && b;
+}
+
+// case 3:
+struct ST {
+  char c;
+};
+char *p;
+int foo1(ST);
+int foo2() {
+  ST *p1 = (ST *)(p);      // expected-warning{{Casting a non-structure type to a structure type and accessing a field can lead to memory access errors or data corruption}}
+  while (p1->c & 0x0F || p1->c & 0x07)
+    p1 = p1 + foo1(*p1);
+}
+
+int foo3(int *node) {
+  int i = foo2();
+  if (i)
+    return foo2();
+}
diff --git a/test/Analysis/PR2599.m b/test/Analysis/PR2599.m
index fb368e33f140..47dadbf8894a 100644
--- a/test/Analysis/PR2599.m
+++ b/test/Analysis/PR2599.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.RetainCount,alpha.core -analyzer-constraints=range -analyzer-store=region -fobjc-gc -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -analyze -analyzer-checker=core,osx.cocoa.RetainCount,alpha.core -analyzer-constraints=range -analyzer-store=region -fobjc-gc -verify %s
 
 typedef const void * CFTypeRef;
 typedef const struct __CFString * CFStringRef;
diff --git a/test/Analysis/PR2978.m b/test/Analysis/PR2978.m
index 4684ae963995..8f76120ccbc6 100644
--- a/test/Analysis/PR2978.m
+++ b/test/Analysis/PR2978.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.core -analyzer-checker=alpha.osx.cocoa.Dealloc %s -verify
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-checker=alpha.osx.cocoa.Dealloc %s -verify
 
 // Tests for the checker which checks missing/extra ivar 'release' calls 
 // in dealloc.
diff --git a/test/Analysis/analyzer-config.c b/test/Analysis/analyzer-config.c
index 55b1df9ca8a1..6faeeb3bf733 100644
--- a/test/Analysis/analyzer-config.c
+++ b/test/Analysis/analyzer-config.c
@@ -1,22 +1,31 @@
-// RUN: %clang -target x86_64-apple-darwin10 --analyze %s -o /dev/null -Xclang -analyzer-checker=debug.ConfigDumper > %t 2>&1
+// RUN: %clang -target x86_64-apple-darwin10 --analyze %s -o /dev/null -Xclang -analyzer-checker=debug.ConfigDumper -Xclang -analyzer-max-loop -Xclang 34 > %t 2>&1
 // RUN: FileCheck --input-file=%t %s
 
 void bar() {}
-void foo() { bar(); }
+void foo() {
+  // Call bar 33 times so max-times-inline-large is met and
+  // min-blocks-for-inline-large is checked
+  for (int i = 0; i < 34; ++i) {
+    bar();
+  }
+}
 
 // CHECK: [config]
 // CHECK-NEXT: cfg-conditional-static-initializers = true
 // CHECK-NEXT: cfg-temporary-dtors = false
 // CHECK-NEXT: faux-bodies = true
 // CHECK-NEXT: graph-trim-interval = 1000
+// CHECK-NEXT: inline-lambdas = true
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
 // CHECK-NEXT: leak-diagnostics-reference-allocation = false
 // CHECK-NEXT: max-inlinable-size = 50
 // CHECK-NEXT: max-nodes = 150000
 // CHECK-NEXT: max-times-inline-large = 32
+// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 12
+// CHECK-NEXT: num-entries = 15
 
diff --git a/test/Analysis/analyzer-config.cpp b/test/Analysis/analyzer-config.cpp
index 521344a5119a..23f08286eb7e 100644
--- a/test/Analysis/analyzer-config.cpp
+++ b/test/Analysis/analyzer-config.cpp
@@ -1,8 +1,14 @@
-// RUN: %clang -target x86_64-apple-darwin10 --analyze %s -o /dev/null -Xclang -analyzer-checker=debug.ConfigDumper > %t 2>&1
+// RUN: %clang -target x86_64-apple-darwin10 --analyze %s -o /dev/null -Xclang -analyzer-checker=debug.ConfigDumper -Xclang -analyzer-max-loop -Xclang 34 > %t 2>&1
 // RUN: FileCheck --input-file=%t %s
 
 void bar() {}
-void foo() { bar(); }
+void foo() {
+  // Call bar 33 times so max-times-inline-large is met and
+  // min-blocks-for-inline-large is checked
+  for (int i = 0; i < 34; ++i) {
+    bar();
+  }
+}
 
 class Foo {
 public:
@@ -20,13 +26,16 @@ public:
 // CHECK-NEXT: cfg-temporary-dtors = false
 // CHECK-NEXT: faux-bodies = true
 // CHECK-NEXT: graph-trim-interval = 1000
+// CHECK-NEXT: inline-lambdas = true
 // CHECK-NEXT: ipa = dynamic-bifurcate
 // CHECK-NEXT: ipa-always-inline-size = 3
 // CHECK-NEXT: leak-diagnostics-reference-allocation = false
 // CHECK-NEXT: max-inlinable-size = 50
 // CHECK-NEXT: max-nodes = 150000
 // CHECK-NEXT: max-times-inline-large = 32
+// CHECK-NEXT: min-cfg-size-treat-functions-as-large = 14
 // CHECK-NEXT: mode = deep
 // CHECK-NEXT: region-store-small-struct-limit = 2
+// CHECK-NEXT: widen-loops = false
 // CHECK-NEXT: [stats]
-// CHECK-NEXT: num-entries = 17
+// CHECK-NEXT: num-entries = 20
diff --git a/test/Analysis/blocks.mm b/test/Analysis/blocks.mm
new file mode 100644
index 000000000000..5f93888d45d0
--- /dev/null
+++ b/test/Analysis/blocks.mm
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -analyze -analyzer-checker=core -fblocks -analyzer-opt-analyze-nested-blocks -verify -x objective-c++ %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -analyze -analyzer-checker=core,debug.DumpCFG -fblocks -analyzer-opt-analyze-nested-blocks  %s > %t 2>&1
+// RUN: FileCheck --input-file=%t %s
+
+// expected-no-diagnostics
+
+void testBlockWithoutCopyExpression(int i) {
+  // Captures i, with no copy expression.
+  (void)(^void() {
+    (void)i;
+  });
+}
+
+// CHECK-LABEL:void testBlockWithoutCopyExpression(int i)
+// CHECK-NEXT: [B2 (ENTRY)]
+// CHECK-NEXT:   Succs (1): B1
+
+// CHECK: [B1]
+// CHECK-NEXT:   1: ^{ }
+// CHECK-NEXT:   2: (void)([B1.1]) (CStyleCastExpr, ToVoid, void)
+// CHECK-NEXT:   Preds (1): B2
+// CHECK-NEXT:   Succs (1): B0
+
+// CHECK: [B0 (EXIT)]
+// CHECK-NEXT:   Preds (1): B1
+
+struct StructWithCopyConstructor {
+  StructWithCopyConstructor(int i);
+  StructWithCopyConstructor(const StructWithCopyConstructor &s);
+};
+void testBlockWithCopyExpression(StructWithCopyConstructor s) {
+  // Captures s, with a copy expression calling the copy constructor for StructWithCopyConstructor.
+  (void)(^void() {
+    (void)s;
+  });
+}
+
+// CHECK-LABEL:void testBlockWithCopyExpression(StructWithCopyConstructor s)
+// CHECK-NEXT: [B2 (ENTRY)]
+// CHECK-NEXT:   Succs (1): B1
+
+// CHECK: [B1]
+// CHECK-NEXT:   1: s
+// CHECK-NEXT:   2: [B1.1] (CXXConstructExpr, const struct StructWithCopyConstructor)
+// CHECK-NEXT:   3: ^{ }
+// CHECK-NEXT:   4: (void)([B1.3]) (CStyleCastExpr, ToVoid, void)
+// CHECK-NEXT:   Preds (1): B2
+// CHECK-NEXT:   Succs (1): B0
+
+// CHECK: [B0 (EXIT)]
+// CHECK-NEXT:   Preds (1): B1
+
+void testBlockWithCaptureByReference() {
+  __block StructWithCopyConstructor s(5);
+  // Captures s by reference, so no copy expression.
+  (void)(^void() {
+    (void)s;
+  });
+}
+
+// CHECK-LABEL:void testBlockWithCaptureByReference()
+// CHECK-NEXT: [B2 (ENTRY)]
+// CHECK-NEXT:   Succs (1): B1
+
+// CHECK: [B1]
+// CHECK-NEXT:   1: 5
+// CHECK-NEXT:   2: [B1.1] (CXXConstructExpr, struct StructWithCopyConstructor)
+// CHECK-NEXT:   3: StructWithCopyConstructor s(5) __attribute__((blocks("byref")));
+// CHECK-NEXT:   4: ^{ }
+// CHECK-NEXT:   5: (void)([B1.4]) (CStyleCastExpr, ToVoid, void)
+// CHECK-NEXT:   Preds (1): B2
+// CHECK-NEXT:   Succs (1): B0
+
+// CHECK: [B0 (EXIT)]
+// CHECK-NEXT:   Preds (1): B1
diff --git a/test/Analysis/bug_hash_test.cpp b/test/Analysis/bug_hash_test.cpp
new file mode 100644
index 000000000000..b73528e88d2f
--- /dev/null
+++ b/test/Analysis/bug_hash_test.cpp
@@ -0,0 +1,1345 @@
+// RUN: %clang_cc1 -std=c++11 -analyze -analyzer-checker=core,debug.DumpBugHash -analyzer-output=plist %s -o %t.plist
+// RUN: FileCheck --input-file=%t.plist %s
+
+int function(int p) {
+  return 5;
+}
+
+namespace {
+int variadicParam(int p, ...) {
+  return 5;
+}
+}
+
+constexpr int f() { return 5; }
+
+namespace AA {
+  class X {
+    int priv;
+    X() : priv(5) { priv = 0; }
+
+    static int static_method() {
+      return 5;
+    }
+
+    int method() && {
+      class Y {
+        inline int method() const & {
+          return 5;
+        }
+      };
+
+      return 5;
+    }
+
+    int OutOfLine();
+
+    X& operator=(int a) {
+      return *this;
+    }
+
+    operator int() {
+      return 0;
+    }
+
+    explicit operator float() {
+      return 0;
+    }
+  };
+}
+
+int AA::X::OutOfLine() {
+  return 5;
+}
+
+void testLambda() {
+  [] () {
+    return;
+  }();
+}
+
+// CHECK: <key>diagnostics</key>
+// CHECK-NEXT: <array>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>5</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>5</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>5</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>5</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>5</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>5</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>5</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int function(int)$10$return5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>e7be204e83f8e5ad3c870ec011d5131d</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>function</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>5</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>10</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>10</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>10</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int (anonymous namespace)::variadicParam(int, ...)$10$return5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>bc5dc0507ee90f1d14259057d25fb2b9</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>variadicParam</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>10</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>14</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>14</integer>
+// CHECK-NEXT:           <key>col</key><integer>26</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>14</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>14</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>14</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>14</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>14</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int f()$28$constexprintf(){return5;}$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f5471f52854dc14167fe96db50c4ba5f</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>f</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>14</integer>
+// CHECK-NEXT:   <key>col</key><integer>28</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>16</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$16$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d23266517ac17d5ec5e2fbbdb1922af1</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>16</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>21</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>24</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>7bfcc45512a6a3f61dda6e3ecebc7384</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>21</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>26</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>26</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>26</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$21$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>95dbfbcdd1dd6401d262994c45d088be</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>26</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::X()$28$X():priv(5){priv=0;}$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>064a01551caa8eca3202f1fd55b9c692</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>0</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>28</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>22</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>22</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>22</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::static_method()$14$return5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>651fcca72f8ad65771702903ecd5f68a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>static_method</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>22</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>32</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>32</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>32</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>32</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>32</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>32</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>32</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::method() &amp;&amp;$14$return5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>c8ac8f24467234bea1f34adf5ad5007b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>method</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>7</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>32</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>38</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>38</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>38</integer>
+// CHECK-NEXT:         <key>col</key><integer>18</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$class AA::X &amp; AA::X::operator=(int)$14$return*this;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b47cf7973c9b459d9c99c483e722db8e</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>operator=</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>38</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>42</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>42</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>42</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>42</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>42</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>42</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>42</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::operator int()$14$return0;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0cbb0e1e5b03ba5b4f7f8f17504de671</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>42</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>46</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>46</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>46</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>46</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>46</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>46</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>46</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$AA::X::operator float()$14$return0;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>df306826bf89e50c1b55e1d379a761b3</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>46</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>52</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>52</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>52</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>52</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$int AA::X::OutOfLine()$10$return5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>9dd7b17a6f62ed8c95b37a38cf71f3a9</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>OutOfLine</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>52</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>56</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>56</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6ad4400e40885a78a0f57f585421a515</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>56</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>56</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>56</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>5</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$3$[](){$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6ad4400e40885a78a0f57f585421a515</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>56</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>58</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>58</integer>
+// CHECK-NEXT:      <key>col</key><integer>4</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>4</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>5</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testLambda()$4$}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>378e6de75fb41b05bcef3950ad5ffa5e</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testLambda</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>58</integer>
+// CHECK-NEXT:   <key>col</key><integer>4</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT: </array>
diff --git a/test/Analysis/bug_hash_test.m b/test/Analysis/bug_hash_test.m
new file mode 100644
index 000000000000..debed329bdde
--- /dev/null
+++ b/test/Analysis/bug_hash_test.m
@@ -0,0 +1,1192 @@
+// RUN: %clang_cc1 -fblocks -analyze -analyzer-checker=core,debug.DumpBugHash -analyzer-output=plist %s -o %t.plist
+// RUN: FileCheck --input-file=%t.plist %s
+
+@protocol NSObject
++ (id)alloc;
+- (id)init;
+@end
+
+@protocol NSCopying
+@end
+
+__attribute__((objc_root_class))
+@interface NSObject <NSObject>
+- (void)method:(int)arg param:(int)arg2;
+@end
+
+@implementation NSObject
+- (void)method:(int)arg param:(int)arg2 {
+  arg = 5;
+  return;
+}
+@end
+
+
+void testBlocks() {
+  int x = 5;
+  ^{ int y = 1 + x; }();
+}
+
+// CHECK: <key>diagnostics</key>
+// CHECK-NEXT: <array>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$NSObject::method:param:$3$arg=5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f9f569e94382c1f969aabd304581b294</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>Objective-C method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>method:param:</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>19</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>19</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>19</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$NSObject::method:param:$9$arg=5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>ca44d6aa882ee55f76e11a80d5a66372</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>Objective-C method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>method:param:</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>19</integer>
+// CHECK-NEXT:   <key>col</key><integer>9</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>26</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>26</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>26</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$intx=5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>84ec7c854c1c7849abfa03f7f20b4f06</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>26</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>26</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>26</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>26</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$11$intx=5;$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f91db2d7b129ed60e7c9caf6f8a84d5c</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>26</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0f1e9483a8ff59e787eaac18b68068ad</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$void testBlocks()$3$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0f1e9483a8ff59e787eaac18b68068ad</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testBlocks</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>6</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>6</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$6$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>8a8e42efc427e1334b77d510d3fb6361</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>6</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6d6028808f1d47ec5b74a417e96c2a02</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>16</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>16</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>18</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$14$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>162138b23629276baad7dd3e8051fd6f</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>16</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>26</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling anonymous block</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;testBlocks&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>27</integer>
+// CHECK-NEXT:      <key>col</key><integer>18</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>18</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>27</integer>
+// CHECK-NEXT:         <key>col</key><integer>18</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>debug.DumpBugHash$$18$^{inty=1+x;}();$debug</string>
+// CHECK-NEXT:   <key>category</key><string>debug</string>
+// CHECK-NEXT:   <key>type</key><string>Dump hash components</string>
+// CHECK-NEXT:   <key>check_name</key><string>debug.DumpBugHash</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b3add78bcab0ebc3da3b640081057525</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>27</integer>
+// CHECK-NEXT:   <key>col</key><integer>18</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT: </array>
diff --git a/test/Analysis/builtin_signbit.cpp b/test/Analysis/builtin_signbit.cpp
new file mode 100644
index 000000000000..bf91511c43ce
--- /dev/null
+++ b/test/Analysis/builtin_signbit.cpp
@@ -0,0 +1,43 @@
+// RUN: %clang -target powerpc-linux-gnu     -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-BE --check-prefix=CHECK
+// RUN: %clang -target powerpc64-linux-gnu   -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-BE --check-prefix=CHECK
+// RUN: %clang -target powerpc64le-linux-gnu -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-LE --check-prefix=CHECK
+
+bool b;
+double d = -1.0;
+long double ld = -1.0L;
+void test_signbit()
+{
+  b = __builtin_signbit(1.0L);
+  // CHECK: i128
+  // CHECK-LE-NOT: lshr
+  // CHECK-BE: lshr
+  // CHECK: bitcast
+  // CHECK: ppc_fp128
+
+  b = __builtin_signbit(ld);
+  // CHECK: bitcast
+  // CHECK: ppc_fp128
+  // CHECK-LE-NOT: lshr
+  // CHECK-BE: lshr
+
+  b = __builtin_signbitf(1.0);
+  // CHECK: store i8 0
+
+  b = __builtin_signbitf(d);
+  // CHECK: bitcast
+  // CHECK-LE-NOT: lshr
+  // CHECK-BE-NOT: lshr
+
+  b = __builtin_signbitl(1.0L);
+  // CHECK: i128
+  // CHECK-LE-NOT: lshr
+  // CHECK-BE: lshr
+  // CHECK: bitcast
+  // CHECK: ppc_fp128
+
+  b = __builtin_signbitl(ld);
+  // CHECK: bitcast
+  // CHECK: ppc_fp128
+  // CHECK-LE-NOT: lshr
+  // CHECK-BE: lshr
+}
diff --git a/test/Analysis/conditional-path-notes.c b/test/Analysis/conditional-path-notes.c
index 9583a4e8113c..43f1e2692d8b 100644
--- a/test/Analysis/conditional-path-notes.c
+++ b/test/Analysis/conditional-path-notes.c
@@ -314,9 +314,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8ea3f4e6a3100c73f078fac15acb0a9c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testCondOp</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>10</integer>
@@ -458,9 +460,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2c219a33e961fc1be7d54b700867259e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testCondProblem</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>19</integer>
@@ -602,9 +606,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8d43b511137326eab7d1242950e72984</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLHSProblem</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>25</integer>
@@ -746,9 +752,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b8e93b7355a6779a960f9a942fafac15</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testRHSProblem</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>33</integer>
@@ -958,9 +966,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a944281d096940ca43ec995649b48b5f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testBinaryCondOp</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>44</integer>
@@ -1102,9 +1112,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4db164bbf5cea42d75c5e838be1eef6f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testBinaryLHSProblem</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>53</integer>
@@ -1280,9 +1292,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>778d56ad485369222613ac2c03b97700</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testDiagnosableBranch</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>62</integer>
@@ -1463,9 +1477,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ebd0bb32bbdcaa2a806ff1984974c07a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNonDiagnosableBranchLogical</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>71</integer>
@@ -1578,9 +1594,11 @@ void testNonDiagnosableBranchArithmetic(int a, int b) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f56671e5f67c73abef619b56f7c29fa4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNonDiagnosableBranchArithmetic</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>79</integer>
diff --git a/test/Analysis/const-method-call.cpp b/test/Analysis/const-method-call.cpp
new file mode 100644
index 000000000000..b8aaeea6c3eb
--- /dev/null
+++ b/test/Analysis/const-method-call.cpp
@@ -0,0 +1,249 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -verify %s
+
+void clang_analyzer_eval(bool);
+
+struct A {
+  int x;
+  void foo() const;
+  void bar();
+};
+
+struct B {
+  mutable int mut;
+  void foo() const;
+};
+
+struct C {
+  int *p;
+  void foo() const;
+};
+
+struct MutBase {
+  mutable int b_mut;
+};
+
+struct MutDerived : MutBase {
+  void foo() const;
+};
+
+struct PBase {
+  int *p;
+};
+
+struct PDerived : PBase {
+  void foo() const;
+};
+
+struct Inner {
+  int x;
+  int *p;
+  void bar() const;
+};
+
+struct Outer {
+  int x;
+  Inner in;
+  void foo() const;
+};
+
+void checkThatConstMethodWithoutDefinitionDoesNotInvalidateObject() {
+  A t;
+  t.x = 3;
+  t.foo();
+  clang_analyzer_eval(t.x == 3); // expected-warning{{TRUE}}
+  // Test non-const does invalidate
+  t.bar();
+  clang_analyzer_eval(t.x); // expected-warning{{UNKNOWN}}
+}
+
+void checkThatConstMethodDoesInvalidateMutableFields() {
+  B t;
+  t.mut = 4;
+  t.foo();
+  clang_analyzer_eval(t.mut); // expected-warning{{UNKNOWN}}
+}
+
+void checkThatConstMethodDoesInvalidatePointedAtMemory() {
+  int x = 1;
+  C t;
+  t.p = &x;
+  t.foo();
+  clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(t.p == &x); // expected-warning{{TRUE}}
+}
+
+void checkThatConstMethodDoesInvalidateInheritedMutableFields() {
+  MutDerived t;
+  t.b_mut = 4;
+  t.foo();
+  clang_analyzer_eval(t.b_mut); // expected-warning{{UNKNOWN}}
+}
+
+void checkThatConstMethodDoesInvalidateInheritedPointedAtMemory() {
+  int x = 1;
+  PDerived t;
+  t.p = &x;
+  t.foo();
+  clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(t.p == &x); // expected-warning{{TRUE}}
+}
+
+void checkThatConstMethodDoesInvalidateContainedPointedAtMemory() {
+  int x = 1;
+  Outer t;
+  t.x = 2;
+  t.in.p = &x;
+  t.foo();
+  clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(t.x == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(t.in.p == &x); // expected-warning{{TRUE}}
+}
+
+void checkThatContainedConstMethodDoesNotInvalidateObjects() {
+  Outer t;
+  t.x = 1;
+  t.in.x = 2;
+  t.in.bar();
+  clang_analyzer_eval(t.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(t.in.x == 2); // expected-warning{{TRUE}}
+}
+
+// --- Versions of the above tests where the const method is inherited --- //
+
+struct B1 {
+  void foo() const;
+};
+
+struct D1 : public B1 {
+  int x;
+};
+
+struct D2 : public B1 {
+  mutable int mut;
+};
+
+struct D3 : public B1 {
+  int *p;
+};
+
+struct DInner : public B1 {
+  int x;
+  int *p;
+};
+
+struct DOuter : public B1 {
+  int x;
+  DInner in;
+};
+
+void checkThatInheritedConstMethodDoesNotInvalidateObject() {
+  D1 t;
+  t.x = 1;
+  t.foo();
+  clang_analyzer_eval(t.x == 1); // expected-warning{{TRUE}}
+}
+
+void checkThatInheritedConstMethodDoesInvalidateMutableFields() {
+  D2 t;
+  t.mut = 1;
+  t.foo();
+  clang_analyzer_eval(t.mut); // expected-warning{{UNKNOWN}}
+}
+
+void checkThatInheritedConstMethodDoesInvalidatePointedAtMemory() {
+  int x = 1;
+  D3 t;
+  t.p = &x;
+  t.foo();
+  clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(t.p == &x); // expected-warning{{TRUE}}
+}
+
+void checkThatInheritedConstMethodDoesInvalidateContainedPointedAtMemory() {
+  int x = 1;
+  DOuter t;
+  t.x = 2;
+  t.in.x = 3;
+  t.in.p = &x;
+  t.foo();
+  clang_analyzer_eval(x); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(t.x == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(t.in.x == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(t.in.p == &x); // expected-warning{{TRUE}}
+}
+
+void checkThatInheritedContainedConstMethodDoesNotInvalidateObjects() {
+  DOuter t;
+  t.x = 1;
+  t.in.x = 2;
+  t.in.foo();
+  clang_analyzer_eval(t.x == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(t.in.x == 2); // expected-warning{{TRUE}}
+}
+
+// --- PR21606 --- //
+
+struct s1 {
+    void g(const int *i) const;
+};
+
+struct s2 {
+    void f(int *i) {
+        m_i = i;
+        m_s.g(m_i);
+        if (m_i)
+            *i = 42; // no-warning
+    }
+
+    int *m_i;
+    s1 m_s;
+};
+
+void PR21606()
+{
+    s2().f(0);
+}
+
+// --- PR25392 --- //
+
+struct HasConstMemberFunction {
+public:
+  void constMemberFunction() const;
+};
+
+HasConstMemberFunction hasNoReturn() { } // expected-warning {{control reaches end of non-void function}}
+
+void testUnknownWithConstMemberFunction() {
+  hasNoReturn().constMemberFunction();
+}
+
+void testNonRegionLocWithConstMemberFunction() {
+  (*((HasConstMemberFunction *)(&&label))).constMemberFunction();
+
+  label: return;
+}
+
+// FIXME
+// When there is a circular reference to an object and a const method is called
+// the object is not invalidated because TK_PreserveContents has already been
+// set.
+struct Outer2;
+
+struct InnerWithRef {
+  Outer2 *ref;
+};
+
+struct Outer2 {
+  int x;
+  InnerWithRef in;
+  void foo() const;
+};
+
+void checkThatConstMethodCallDoesInvalidateObjectForCircularReferences() {
+  Outer2 t;
+  t.x = 1;
+  t.in.ref = &t;
+  t.foo();
+  // FIXME: Should be UNKNOWN.
+  clang_analyzer_eval(t.x); // expected-warning{{TRUE}}
+}
diff --git a/test/Analysis/cxx-for-range.cpp b/test/Analysis/cxx-for-range.cpp
index 6278ba56ffeb..6be8b78ee816 100644
--- a/test/Analysis/cxx-for-range.cpp
+++ b/test/Analysis/cxx-for-range.cpp
@@ -597,9 +597,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>7c0b35987817cf3d44b88c5349bcd4f2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLoop</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>12</integer>
@@ -823,9 +825,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f53792d63dffe6176babc00ee455a3e0</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>get</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>33</integer>
@@ -1098,9 +1102,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>7bc3cb8a56be029296ec61f7ee83fc9e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLoopOpaqueCollection</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>42</integer>
@@ -1242,9 +1248,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dc7b457a64bd56c30467c1af44049756</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLoopOpaqueCollection</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>48</integer>
@@ -1517,9 +1525,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f4697ded3a92318349a3969238e05387</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLoopOpaqueIterator</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>77</integer>
@@ -1661,9 +1671,11 @@ void testLoopErrorInRange() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>6855b29ad2407a5af43c57f062bc4602</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testLoopOpaqueIterator</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>83</integer>
diff --git a/test/Analysis/dead-stores.cpp b/test/Analysis/dead-stores.cpp
index d442c621d87b..78cba161065f 100644
--- a/test/Analysis/dead-stores.cpp
+++ b/test/Analysis/dead-stores.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -std=c++11 -analyze -analyzer-checker=deadcode.DeadStores -verify -Wno-unreachable-code %s
-// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -std=c++11 -analyze -analyzer-store=region -analyzer-constraints=range -analyzer-checker=deadcode.DeadStores -verify -Wno-unreachable-code %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fblocks -std=c++11 -analyze -analyzer-checker=deadcode.DeadStores -verify -Wno-unreachable-code %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fblocks -std=c++11 -analyze -analyzer-store=region -analyzer-constraints=range -analyzer-checker=deadcode.DeadStores -verify -Wno-unreachable-code %s
 
 //===----------------------------------------------------------------------===//
 // Basic dead store checking (but in C++ mode).
@@ -174,3 +174,31 @@ int radar_13213575() {
   return radar13213575_testit<true>(5) + radar13213575_testit<false>(3);
 }
 
+template <class T>
+void test_block_in_dependent_context(typename T::some_t someArray) {
+  ^{
+     int i = someArray[0]; // no-warning
+  }();
+}
+
+void test_block_in_non_dependent_context(int *someArray) {
+  ^{
+     int i = someArray[0]; // expected-warning {{Value stored to 'i' during its initialization is never read}}
+  }();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Dead store checking involving lambdas.
+//===----------------------------------------------------------------------===//
+
+int basicLambda(int i, int j) {
+  i = 5; // no warning
+  j = 6; // no warning
+  [i] { (void)i; }();
+  [&j] { (void)j; }();
+  i = 2;
+  j = 3;
+  return i + j;
+}
+
diff --git a/test/Analysis/dead-stores.m b/test/Analysis/dead-stores.m
index 9989efae2e8e..8bc6b2e84d01 100644
--- a/test/Analysis/dead-stores.m
+++ b/test/Analysis/dead-stores.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.core -analyzer-checker=deadcode.DeadStores,osx.cocoa.RetainCount -fblocks -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-checker=deadcode.DeadStores,osx.cocoa.RetainCount -fblocks -verify -Wno-objc-root-class %s
 // expected-no-diagnostics
 
 typedef signed char BOOL;
diff --git a/test/Analysis/delayed-template-parsing-crash.cpp b/test/Analysis/delayed-template-parsing-crash.cpp
new file mode 100644
index 000000000000..94a143b0c066
--- /dev/null
+++ b/test/Analysis/delayed-template-parsing-crash.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core -std=c++11 -fdelayed-template-parsing -verify %s
+// expected-no-diagnostics
+
+template <class T> struct remove_reference      {typedef T type;};
+template <class T> struct remove_reference<T&>  {typedef T type;};
+template <class T> struct remove_reference<T&&> {typedef T type;};
+
+template <typename T>
+typename remove_reference<T>::type&& move(T&& arg) { // this used to crash
+  return static_cast<typename remove_reference<T>::type&&>(arg);
+}
diff --git a/test/Analysis/diagnostics/deref-track-symbolic-region.c b/test/Analysis/diagnostics/deref-track-symbolic-region.c
index 47d4c6013f00..42060ccd9260 100644
--- a/test/Analysis/diagnostics/deref-track-symbolic-region.c
+++ b/test/Analysis/diagnostics/deref-track-symbolic-region.c
@@ -306,9 +306,11 @@ void testTrackConstraintBRVisitorIsTrackingTurnedOn(struct S syz, int *pp) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5cafa075eb4d5b0bf1f228608a3ec87e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>23</integer>
@@ -644,9 +646,11 @@ void testTrackConstraintBRVisitorIsTrackingTurnedOn(struct S syz, int *pp) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a213f858dacf7993492e6b32080aaa64</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testTrackConstraintBRVisitorIsTrackingTurnedOn</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>38</integer>
diff --git a/test/Analysis/diagnostics/report-issues-within-main-file.cpp b/test/Analysis/diagnostics/report-issues-within-main-file.cpp
index e10c7069417d..5fd7abd03c4a 100644
--- a/test/Analysis/diagnostics/report-issues-within-main-file.cpp
+++ b/test/Analysis/diagnostics/report-issues-within-main-file.cpp
@@ -70,1694 +70,1709 @@ void callInMacroArg() {
   j--;
 }
 
-// CHECK:  <key>diagnostics</key>
-// CHECK-NEXT:  <array>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK: </array>
+// CHECK-NEXT: <key>diagnostics</key>
+// CHECK-NEXT: <array>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>51</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>51</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>51</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>51</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header2&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header2&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>21</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacro&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacro&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>51</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>51</integer>
-// CHECK-NEXT:          <key>col</key><integer>23</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>21</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>21</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header2&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header2&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>21</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacro&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacro&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>22</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>21</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>21</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>22</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>22</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>22</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;h2&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;h2&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>22</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>22</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;h2&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;h2&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>23</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>23</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>23</integer>
+// CHECK-NEXT:         <key>col</key><integer>12</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>81ef0773d42c0309a03a9a11048f497e</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>cause_div_by_zero_in_header2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>23</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>1</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>23</integer>
-// CHECK-NEXT:       <key>col</key><integer>10</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>63</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>63</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>63</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;callInMacro3&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;callInMacro3&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>55</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callCallInMacro3&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callCallInMacro3&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>23</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>23</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>55</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>55</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>cause_div_by_zero_in_header2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>23</integer>
-// CHECK-NEXT:    <key>col</key><integer>10</integer>
-// CHECK-NEXT:    <key>file</key><integer>1</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>63</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>58</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>58</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header3&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header3&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>29</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacro3&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacro3&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>63</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>63</integer>
-// CHECK-NEXT:          <key>col</key><integer>16</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>29</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;callInMacro3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;callInMacro3&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>55</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callCallInMacro3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callCallInMacro3&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>30</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>30</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>30</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>58</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;h3&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;h3&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>58</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>58</integer>
-// CHECK-NEXT:          <key>col</key><integer>23</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>31</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>31</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header3&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>29</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacro3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacro3&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>31</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>29</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>29</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>31</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>31</integer>
+// CHECK-NEXT:         <key>col</key><integer>12</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a4f425d2b2715e682b08a754fc88d2c8</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>cause_div_by_zero_in_header3</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>31</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>1</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>30</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>69</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>69</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>69</integer>
+// CHECK-NEXT:         <key>col</key><integer>51</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header4&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header4&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>37</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacroArg&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;callInMacroArg&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>30</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>30</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>37</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>37</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;h3&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;h3&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>38</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>31</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>31</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>38</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>38</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>31</integer>
-// CHECK-NEXT:       <key>col</key><integer>10</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;h4&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;h4&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>31</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>31</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>39</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>39</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>cause_div_by_zero_in_header3</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>31</integer>
-// CHECK-NEXT:    <key>col</key><integer>10</integer>
-// CHECK-NEXT:    <key>file</key><integer>1</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>69</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>39</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>39</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>39</integer>
+// CHECK-NEXT:         <key>col</key><integer>12</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>35f2ebe7ff6b3b34233a5465a6eaf661</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>cause_div_by_zero_in_header4</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>39</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>1</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>51</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header4&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header4&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>37</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacroArg&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;callInMacroArg&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>6</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>37</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>37</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>6</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>6</integer>
+// CHECK-NEXT:         <key>col</key><integer>29</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>38</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Memory is allocated</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Memory is allocated</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>38</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>38</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;h4&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;h4&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>39</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>39</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>39</integer>
-// CHECK-NEXT:       <key>col</key><integer>10</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>39</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>39</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>6</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>cause_div_by_zero_in_header4</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>39</integer>
-// CHECK-NEXT:    <key>col</key><integer>10</integer>
-// CHECK-NEXT:    <key>file</key><integer>1</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>key_event</key><true/>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
+// CHECK-NEXT:      <key>line</key><integer>7</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;~auto_ptr&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;~auto_ptr&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>6</integer>
-// CHECK-NEXT:       <key>col</key><integer>20</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>7</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusHeader&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusHeader&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>6</integer>
-// CHECK-NEXT:          <key>col</key><integer>20</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>6</integer>
-// CHECK-NEXT:          <key>col</key><integer>29</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>7</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>7</integer>
+// CHECK-NEXT:      <key>col</key><integer>17</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>7</integer>
+// CHECK-NEXT:         <key>col</key><integer>24</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>6</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>7</integer>
+// CHECK-NEXT:         <key>col</key><integer>29</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>key_event</key><true/>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>7</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;~auto_ptr&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;~auto_ptr&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>7</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusHeader&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusHeader&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>7</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>7</integer>
-// CHECK-NEXT:       <key>col</key><integer>17</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos; (within a call to &apos;~auto_ptr&apos;)</string>
+// CHECK-NEXT:   <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:   <key>type</key><string>Bad deallocator</string>
+// CHECK-NEXT:   <key>check_name</key><string>unix.MismatchedDeallocator</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f689fbd54138491e228f0f89bb02bfb2</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>mainPlusHeader</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>7</integer>
+// CHECK-NEXT:   <key>col</key><integer>1</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>7</integer>
-// CHECK-NEXT:          <key>col</key><integer>24</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>7</integer>
-// CHECK-NEXT:          <key>col</key><integer>29</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>16</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>16</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>18</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>18</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Memory allocated by &apos;new[]&apos; should be deallocated by &apos;delete[]&apos;, not &apos;delete&apos; (within a call to &apos;~auto_ptr&apos;)</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Bad deallocator</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.MismatchedDeallocator</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>mainPlusHeader</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>7</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>18</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>16</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>16</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>18</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>18</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>18</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>18</integer>
+// CHECK-NEXT:         <key>col</key><integer>13</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;auxInMain&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;auxInMain&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>18</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>9</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusMainPlusHeader&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusMainPlusHeader&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>18</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>18</integer>
-// CHECK-NEXT:          <key>col</key><integer>13</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;auxInMain&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;auxInMain&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>9</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusMainPlusHeader&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusMainPlusHeader&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>9</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>9</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>29</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>key_event</key><true/>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>12</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>32</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>29</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;cause_div_by_zero_in_header&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>key_event</key><true/>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>10</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;auxInMain&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;auxInMain&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>12</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>10</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>32</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;cause_div_by_zero_in_header&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>11</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>10</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;auxInMain&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;auxInMain&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>11</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>11</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;h&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;h&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>11</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>1</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>11</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>11</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>12</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>1</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;h&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;h&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>10</integer>
-// CHECK-NEXT:          <key>file</key><integer>1</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>1</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero (within a call to &apos;cause_div_by_zero_in_header&apos;)</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>auxInMain</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>12</integer>
-// CHECK-NEXT:    <key>col</key><integer>3</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>12</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>1</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>30</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>1</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>30</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>2</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero (within a call to &apos;cause_div_by_zero_in_header&apos;)</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>dceba279187ce4f6514f4a4c43a51063</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>auxInMain</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>12</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>30</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>30</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>30</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;causeDivByZeroInMain&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;causeDivByZeroInMain&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>22</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusMain&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlusMain&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>30</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>30</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>30</integer>
+// CHECK-NEXT:         <key>col</key><integer>25</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;causeDivByZeroInMain&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;causeDivByZeroInMain&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>23</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>22</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusMain&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlusMain&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>23</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>23</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>22</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;m&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;m&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>23</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>23</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>24</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>24</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>23</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>23</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>24</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;m&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;m&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>24</integer>
-// CHECK-NEXT:          <key>col</key><integer>10</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>23</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>24</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>24</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>causeDivByZeroInMain</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>24</integer>
-// CHECK-NEXT:    <key>col</key><integer>9</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>24</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>41</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>41</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>43</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>43</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>24</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>43</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>43</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>24</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>44</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d0d513aa4710781c2b56c44226354403</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>causeDivByZeroInMain</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>24</integer>
+// CHECK-NEXT:   <key>col</key><integer>9</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>26</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>41</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>41</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>43</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>43</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;causeDivByZeroInMain2&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;causeDivByZeroInMain2&apos;</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>43</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>43</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>23</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>34</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlustHeaderCallAndReturnPlusMain&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;mainPlustHeaderCallAndReturnPlusMain&apos;</string>
+// CHECK-NEXT:      <key>line</key><integer>44</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>34</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>34</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>44</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>44</integer>
+// CHECK-NEXT:         <key>col</key><integer>26</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;causeDivByZeroInMain2&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;causeDivByZeroInMain2&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>35</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <key>line</key><integer>34</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlustHeaderCallAndReturnPlusMain&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;mainPlustHeaderCallAndReturnPlusMain&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>35</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>35</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>34</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>34</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>35</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>35</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>&apos;m2&apos; initialized to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>&apos;m2&apos; initialized to 0</string>
-// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      <key>line</key><integer>35</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>line</key><integer>35</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>35</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>36</integer>
-// CHECK-NEXT:       <key>col</key><integer>10</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>&apos;m2&apos; initialized to 0</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>&apos;m2&apos; initialized to 0</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
 // CHECK-NEXT:      <array>
-// CHECK-NEXT:        <array>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>36</integer>
-// CHECK-NEXT:          <key>col</key><integer>8</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>36</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:        </array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>35</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>35</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
 // CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>36</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>category</key><string>Logic error</string>
-// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>causeDivByZeroInMain2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>36</integer>
-// CHECK-NEXT:    <key>col</key><integer>10</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:  </array>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>36</integer>
+// CHECK-NEXT:         <key>col</key><integer>8</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>36</integer>
+// CHECK-NEXT:         <key>col</key><integer>12</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>577ddcf4482f18dec120e21890ed234a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>causeDivByZeroInMain2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>36</integer>
+// CHECK-NEXT:   <key>col</key><integer>10</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT: </array>
diff --git a/test/Analysis/diagnostics/undef-value-caller.c b/test/Analysis/diagnostics/undef-value-caller.c
index c8110179e852..dc19e0a2b856 100644
--- a/test/Analysis/diagnostics/undef-value-caller.c
+++ b/test/Analysis/diagnostics/undef-value-caller.c
@@ -147,9 +147,11 @@ int test_calling_unimportant_callee(int argc, char *argv[]) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Garbage return value</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.UndefReturn</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1ef750c5562c09a467ea54ea4b4de771</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_calling_unimportant_callee</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>11</integer>
diff --git a/test/Analysis/diagnostics/undef-value-param.c b/test/Analysis/diagnostics/undef-value-param.c
index f418b19c2448..8ebf0daf2b49 100644
--- a/test/Analysis/diagnostics/undef-value-param.c
+++ b/test/Analysis/diagnostics/undef-value-param.c
@@ -436,9 +436,11 @@ double testPassingParentRegionStruct(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Result of operation is garbage or undefined</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.UndefinedBinaryOperatorResult</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ae5e3bd03d6d97614947942dae18e1da</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>use</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>26</integer>
@@ -754,9 +756,11 @@ double testPassingParentRegionStruct(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Result of operation is garbage or undefined</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.UndefinedBinaryOperatorResult</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>96cf3258da3df14d13cd5c2236c9cb27</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testPassingParentRegionArray</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>42</integer>
@@ -1169,9 +1173,11 @@ double testPassingParentRegionStruct(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>299055864402de225b0ab4f2ecd8e76c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testPassingParentRegionStruct</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>62</integer>
diff --git a/test/Analysis/diagnostics/undef-value-param.m b/test/Analysis/diagnostics/undef-value-param.m
index 5ca08a56adc7..b9947d8ce019 100644
--- a/test/Analysis/diagnostics/undef-value-param.m
+++ b/test/Analysis/diagnostics/undef-value-param.m
@@ -544,9 +544,11 @@ static void CreateRefUndef(SCDynamicStoreRef *storeRef, unsigned x) {
 // CHECK-NEXT:    <key>category</key><string>API Misuse (Apple)</string>
 // CHECK-NEXT:    <key>type</key><string>null passed to CF memory management function</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.coreFoundation.CFRetainRelease</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>102c9a15c089fdc618a4c209bd5560bc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>39</integer>
@@ -925,9 +927,11 @@ static void CreateRefUndef(SCDynamicStoreRef *storeRef, unsigned x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Uninitialized argument value</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>fe2bb14813e15196c0180196fc1cce4c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>48</integer>
diff --git a/test/Analysis/dtor.cpp b/test/Analysis/dtor.cpp
index bb1e6254fb73..c67722283ff8 100644
--- a/test/Analysis/dtor.cpp
+++ b/test/Analysis/dtor.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-config c++-inlining=destructors,cfg-temporary-dtors=true -Wno-null-dereference -Wno-inaccessible-base -verify %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection,cplusplus -analyzer-config c++-inlining=destructors,cfg-temporary-dtors=true -Wno-null-dereference -Wno-inaccessible-base -verify %s
 
 void clang_analyzer_eval(bool);
 void clang_analyzer_checkInlined(bool);
@@ -505,3 +505,38 @@ namespace Incomplete {
   class Foo; // expected-note{{forward declaration}}
   void f(Foo *foo) { delete foo; } // expected-warning{{deleting pointer to incomplete type}}
 }
+
+namespace TypeTraitExpr {
+template <bool IsSimple, typename T>
+struct copier {
+  static void do_copy(T *dest, const T *src, unsigned count);
+};
+template <typename T, typename U>
+void do_copy(T *dest, const U *src, unsigned count) {
+  const bool IsSimple = __is_trivial(T) && __is_same(T, U);
+  copier<IsSimple, T>::do_copy(dest, src, count);
+}
+struct NonTrivial {
+  int *p;
+  NonTrivial() : p(new int[1]) { p[0] = 0; }
+  NonTrivial(const NonTrivial &other) {
+    p = new int[1];
+    do_copy(p, other.p, 1);
+  }
+  NonTrivial &operator=(const NonTrivial &other) {
+    p = other.p;
+    return *this;
+  }
+  ~NonTrivial() {
+    delete[] p; // expected-warning {{free released memory}}
+  }
+};
+
+void f() {
+  NonTrivial nt1;
+  NonTrivial nt2(nt1);
+  nt1 = nt2;
+  clang_analyzer_eval(__is_trivial(NonTrivial)); // expected-warning{{FALSE}}
+  clang_analyzer_eval(__alignof(NonTrivial) > 0); // expected-warning{{TRUE}}
+}
+}
diff --git a/test/Analysis/dynamic_type_check.m b/test/Analysis/dynamic_type_check.m
new file mode 100644
index 000000000000..f9b181e308a9
--- /dev/null
+++ b/test/Analysis/dynamic_type_check.m
@@ -0,0 +1,43 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core.DynamicTypeChecker -verify %s
+
+
+#define nil 0
+typedef unsigned long NSUInteger;
+typedef int BOOL;
+
+@protocol NSObject
++ (id)alloc;
+- (id)init;
+@end
+
+@protocol NSCopying
+@end
+
+__attribute__((objc_root_class))
+@interface NSObject <NSObject>
+@end
+
+@interface NSString : NSObject <NSCopying>
+@end
+
+@interface NSMutableString : NSString
+@end
+
+@interface NSNumber : NSObject <NSCopying>
+@end
+
+@class MyType;
+
+void testTypeCheck(NSString* str) {
+  id obj = str;
+  NSNumber *num = obj; // expected-warning {{}}
+  (void)num;
+}
+
+void testForwardDeclarations(NSString* str) {
+  id obj = str;
+  // Do not warn, since no information is available wether MyType is a sub or
+  // super class of any other type.
+  MyType *num = obj; // no warning
+  (void)num;
+}
diff --git a/test/Analysis/edges-new.mm b/test/Analysis/edges-new.mm
index 2c39dfdae3b8..07d1b6a5ac2e 100644
--- a/test/Analysis/edges-new.mm
+++ b/test/Analysis/edges-new.mm
@@ -726,9 +726,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dc9c2a657ca759f9744cde2e093cfd59</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_init</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>53</integer>
@@ -904,9 +906,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8f95d9681490a4e52c167969d0957b39</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>59</integer>
@@ -1145,9 +1149,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>94c43fbcad6aaff4ee7433f2d2db0bbe</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>66</integer>
@@ -1323,9 +1329,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>659f01507ffd7efd3ca3eab7179fd7d2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>71</integer>
@@ -1564,9 +1572,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c90d51e62139e614b57aff7021240a82</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>78</integer>
@@ -1742,9 +1752,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>245408d2bc416e324064d990e6dd82a8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_field</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>85</integer>
@@ -2080,9 +2092,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b12acffa40177b55b695aa2292533410</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_assumptions</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>97</integer>
@@ -2350,9 +2364,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c0a32b8291b0fc7230f847f05f415625</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_cond_assign</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>105</integer>
@@ -2610,9 +2626,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>29a10ca4af622b6146ca082e49d919d6</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar8331641</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>123</integer>
@@ -2788,9 +2806,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ed7251f5b34b8380abd60cddfd3db46b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_objc_fast_enumeration</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>132</integer>
@@ -2835,9 +2855,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead increment</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dd09640bb87b201c2adcf5ef37bfcfaa</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_objc_fast_enumeration_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>140</integer>
@@ -3042,9 +3064,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Assigned value is garbage or undefined</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.Assign</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>025372576cd3ba6716044f93a51c978c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_objc_fast_enumeration_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>140</integer>
@@ -3477,9 +3501,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>08f9a01186cb2f1b78b08ec20260f1c1</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar12280665</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>148</integer>
@@ -3781,9 +3807,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>93a53e4c8f02d191b07477940ddcf89c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_for</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>162</integer>
@@ -4051,9 +4079,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f2d2dbf579b0b21a6b68726df6a041fc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_while</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>172</integer>
@@ -4554,9 +4584,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>95c7d39fb9d0d8c172b894e02855a07c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_foo_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>186</integer>
@@ -5052,9 +5084,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4cbbc713c8267513fef8a33f1327d7ca</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>192</integer>
@@ -5618,9 +5652,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a5aa7cf7ce6ba6683bebfff63df926fd</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>206</integer>
@@ -6184,9 +6220,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>940ba28399417701285cc8f80b1b8c2d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics_3</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>10</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>10</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>219</integer>
@@ -6629,9 +6667,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>89eb0611013a66315f63bf4de8130c3d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_do_while</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>12</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>12</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>234</integer>
@@ -6875,9 +6915,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>483f7a1c29cc32152ca2ea651443e16f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_logical_and</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>241</integer>
@@ -7121,9 +7163,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>852b510b1e204194a9fe3045cabc952c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_logical_or</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>248</integer>
@@ -7401,9 +7445,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a5081bea3b0e9f4f8132ecdb2c17991b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_logical_or_call</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>256</integer>
@@ -7681,9 +7727,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>16c21155317ccfddb4f482db43b6c635</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_nested_logicals</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>264</integer>
@@ -8097,9 +8145,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>16c21155317ccfddb4f482db43b6c635</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_nested_logicals</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>268</integer>
@@ -8581,9 +8631,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5fec1fd2724e271bb73fa4dd4be2a7f5</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_deeply_nested_logicals</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>278</integer>
@@ -8992,9 +9044,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c4ebf7f3e8a792521541ffae22ae2378</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ternary</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>287</integer>
@@ -9267,9 +9321,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>137a2333a17b5eecff7ff7a4b56d38d9</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testUseless</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>298</integer>
@@ -9474,9 +9530,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d1a1b27348a9c4d0abaaea4a4df9b649</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testFoo</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>312</integer>
@@ -9647,9 +9705,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1d7470f8b42e1cff6ab697162911b488</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test1_IPA_X</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>321</integer>
@@ -10043,9 +10103,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3fda62f72d88e4bc250c415cbc45a9e2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_IPA_Y</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>341</integer>
@@ -10264,9 +10326,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d0d513aa4710781c2b56c44226354403</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>causeDivByZeroInMain</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>347</integer>
@@ -10408,9 +10472,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b21eba78798501dc6b716bb91e3f7f01</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>384</integer>
@@ -10518,9 +10584,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>API</string>
 // CHECK-NEXT:    <key>type</key><string>Argument with &apos;nonnull&apos; attribute passed null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NonNullParamChecker</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c0b359a043c633f1b8d1581f68743361</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>RDar13295437</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>394</integer>
@@ -10730,9 +10798,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>573a1a599cc8abe987f5227676d04abc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testCast</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>404</integer>
@@ -10908,9 +10978,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ea1d5db6b4c380a432c88139fdd18f42</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>422</integer>
@@ -10967,9 +11039,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3dd1ebf91fa4e92eeec82faaeb48beda</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>431</integer>
@@ -11096,9 +11170,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f533db5cbb9c20d171f9f92105789dc4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>433</integer>
@@ -11274,9 +11350,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>449</integer>
@@ -11719,9 +11797,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a76c8ba29cdedecd0dfa5e24711cd236</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>20</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>20</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>458</integer>
@@ -12130,9 +12210,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>26</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>26</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>464</integer>
@@ -12604,9 +12686,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>34</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>34</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>472</integer>
@@ -13859,9 +13943,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>62</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>62</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>500</integer>
@@ -15177,9 +15263,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>67</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>67</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>505</integer>
@@ -16592,9 +16680,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>bfb65e90e76fe7a219616d1d0b36f958</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>74</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>74</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>512</integer>
@@ -18104,9 +18194,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>6c3e2dd10c375325a3089b996dd460c3</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>variousLoops</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>83</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>83</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>521</integer>
@@ -18388,9 +18480,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Memory leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>51180616cd69dc1776be708299ec90fb</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>reallocDiagnostics</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>533</integer>
@@ -18628,7 +18722,9 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Bad deallocator</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.MismatchedDeallocator</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>0</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d9dbbf68db41ab74e2158f4b131abe34</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>0</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>544</integer>
@@ -19341,9 +19437,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5616a7601faa1a8c2ac56fa1b595b172</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>longLines</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>568</integer>
@@ -19485,9 +19583,11 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dff4970e681578e07d0512d3258aeddd</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testMacroInFunctionDecl</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>573</integer>
@@ -19668,7 +19768,9 @@ namespace rdar14960554 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>de83c7c8cc706cf47429f220bfa49458</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>588</integer>
diff --git a/test/Analysis/generics.m b/test/Analysis/generics.m
new file mode 100644
index 000000000000..b64d06935d9a
--- /dev/null
+++ b/test/Analysis/generics.m
@@ -0,0 +1,6629 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.ObjCGenerics,alpha.core.DynamicTypeChecker -verify -Wno-objc-method-access %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.ObjCGenerics,alpha.core.DynamicTypeChecker -verify -Wno-objc-method-access %s -analyzer-output=plist -o %t.plist
+// RUN: FileCheck --input-file %t.plist %s
+
+#if !__has_feature(objc_generics)
+#  error Compiler does not support Objective-C generics?
+#endif
+
+#if !__has_feature(objc_generics_variance)
+#  error Compiler does not support co- and contr-variance?
+#endif
+
+#define nil 0
+typedef unsigned long NSUInteger;
+typedef int BOOL;
+
+@protocol NSObject
++ (id)alloc;
+- (id)init;
+@end
+
+@protocol NSCopying
+@end
+
+__attribute__((objc_root_class))
+@interface NSObject <NSObject>
+@end
+
+@interface NSString : NSObject <NSCopying>
+@end
+
+@interface NSMutableString : NSString
+@end
+
+@interface NSNumber : NSObject <NSCopying>
+@end
+
+@interface NSSet : NSObject <NSCopying>
+@end
+
+@interface NSArray<__covariant ObjectType> : NSObject
++ (instancetype)arrayWithObjects:(const ObjectType [])objects count:(NSUInteger)count;
++ (instancetype)getEmpty;
++ (NSArray<ObjectType> *)getEmpty2;
+- (BOOL)contains:(ObjectType)obj;
+- (ObjectType)getObjAtIndex:(NSUInteger)idx;
+- (ObjectType)objectAtIndexedSubscript:(NSUInteger)idx;
+@property(readonly) ObjectType firstObject;
+@end
+
+@interface MutableArray<ObjectType> : NSArray<ObjectType>
+- (void)addObject:(ObjectType)anObject;
+@end
+
+@interface LegacyMutableArray : MutableArray
+@end
+
+@interface LegacySpecialMutableArray : LegacyMutableArray
+@end
+
+@interface BuggyMutableArray<T> : MutableArray
+@end
+
+@interface BuggySpecialMutableArray<T> : BuggyMutableArray<T>
+@end
+
+@interface MyMutableStringArray : MutableArray<NSString *>
+@end
+
+@interface ExceptionalArray<ExceptionType> : MutableArray<NSString *>
+- (ExceptionType) getException;
+@end
+
+@interface UnrelatedType : NSObject<NSCopying>
+@end
+
+int getUnknown();
+NSArray *getStuff();
+NSArray *getTypedStuff() {
+  NSArray<NSNumber *> *c = getStuff();
+  return c;
+}
+
+void doStuff(NSArray<NSNumber *> *);
+void withArrString(NSArray<NSString *> *);
+void withArrMutableString(NSArray<NSMutableString *> *);
+void withMutArrString(MutableArray<NSString *> *);
+void withMutArrMutableString(MutableArray<NSMutableString *> *);
+
+void incompatibleTypesErased(NSArray *a, NSArray<NSString *> *b,
+                             NSArray<NSNumber *> *c) {
+  a = b;
+  c = a; // expected-warning  {{Conversion from value of type 'NSArray<NSString *> *' to incompatible type 'NSArray<NSNumber *> *'}}
+  [a contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  [a contains: [[NSString alloc] init]];
+  doStuff(a); // expected-warning {{Conversion}}
+}
+
+void crossProceduralErasedTypes() {
+  NSArray<NSString *> *a = getTypedStuff(); // expected-warning {{Conversion}}
+}
+
+void incompatibleTypesErasedReverseConversion(NSArray *a,
+                                              NSArray<NSString *> *b) {
+  b = a;
+  [a contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  [a contains: [[NSString alloc] init]];
+  doStuff(a); // expected-warning {{Conversion}}
+}
+
+void idErasedIncompatibleTypesReverseConversion(id a, NSArray<NSString *> *b) {
+  b = a;
+  [a contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  [a contains: [[NSString alloc] init]];
+  doStuff(a); // expected-warning {{Conversion}}
+}
+
+void idErasedIncompatibleTypes(id a, NSArray<NSString *> *b,
+                               NSArray<NSNumber *> *c) {
+  a = b;
+  c = a; // expected-warning {{Conversion}}
+  [a contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  [a contains: [[NSString alloc] init]];
+  doStuff(a); // expected-warning {{Conversion}}
+}
+
+void pathSensitiveInference(MutableArray *m, MutableArray<NSString *> *a,
+                            MutableArray<NSMutableString *> *b) {
+  if (getUnknown() == 5) {
+    m = a;  
+    [m contains: [[NSString alloc] init]];
+  } else {
+    m = b;
+    [m contains: [[NSMutableString alloc] init]];
+  }
+  [m addObject: [[NSString alloc] init]]; // expected-warning {{Conversion}}
+  [m addObject: [[NSMutableString alloc] init]];
+}
+
+void verifyAPIusage(id a, MutableArray<NSString *> *b) {
+  b = a;
+  doStuff(a); // expected-warning {{Conversion}}
+}
+
+void trustExplicitCasts(MutableArray *a,
+                        MutableArray<NSMutableString *> *b) {
+  b = (MutableArray<NSMutableString *> *)a;
+  [a addObject: [[NSString alloc] init]]; // expected-warning {{Conversion}}
+}
+
+void subtypeOfGeneric(id d, MyMutableStringArray *a,
+                       MutableArray<NSString *> *b,
+                       MutableArray<NSNumber *> *c) {
+  d = a;
+  b = d;
+  c = d; // expected-warning {{Conversion}}
+}
+
+void genericSubtypeOfGeneric(id d, ExceptionalArray<NSString *> *a,
+                             MutableArray<NSString *> *b,
+                             MutableArray<NSNumber *> *c) {
+  d = a;
+  [d contains: [[NSString alloc] init]];
+  [d contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  b = d;
+  c = d; // expected-warning {{Conversion}}
+}
+
+void genericSubtypeOfGenericReverse(id d, ExceptionalArray<NSString *> *a,
+                                    MutableArray<NSString *> *b,
+                                    MutableArray<NSNumber *> *c) {
+  a = d;
+  [d contains: [[NSString alloc] init]];
+  [d contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+  b = d;
+  c = d; // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPI(id a) {
+  // Here the type parameter is invariant. There should be a warning every time
+  // when the type parameter changes during the conversions.
+  withMutArrString(a);
+  withMutArrMutableString(a); // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPI2(id a) {
+  withMutArrMutableString(a);
+  withMutArrString(a); // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPIWithLegacyTypes(LegacyMutableArray *a) {
+  withMutArrMutableString(a);
+  withMutArrString(a); // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPIWithLegacyTypes2(LegacySpecialMutableArray *a) {
+  withMutArrString(a);
+  withMutArrMutableString(a); // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPIWithLegacyTypes3(__kindof NSArray<NSString *> *a) {
+  LegacyMutableArray *b = a;
+  withMutArrString(b);
+  withMutArrMutableString(b); // expected-warning {{Conversion}}
+}
+
+void inferenceFromAPIWithBuggyTypes(BuggyMutableArray<NSMutableString *> *a) {
+  withMutArrString(a);
+  withMutArrMutableString(a); // expected-warning {{Conversion}}
+}
+
+void InferenceFromAPIWithBuggyTypes2(BuggySpecialMutableArray<NSMutableString *> *a) {
+  withMutArrMutableString(a);
+  withMutArrString(a); // expected-warning {{Conversion}}
+}
+
+void InferenceFromAPIWithBuggyTypes3(MutableArray<NSMutableString *> *a) {
+  id b = a;
+  withMutArrMutableString((BuggyMutableArray<NSMutableString *> *)b);
+  withMutArrString(b); // expected-warning {{Conversion}}
+}
+
+void InferenceFromAPIWithBuggyTypes4(__kindof NSArray<NSString *> *a) {
+  BuggyMutableArray<NSMutableString *> *b = a;
+  withMutArrString(b);
+  withMutArrMutableString(b); // expected-warning {{Conversion}}
+}
+
+NSArray<NSString *> *getStrings();
+void enforceDynamicRulesInsteadOfStatic(NSArray<NSNumber *> *a) {
+  NSArray *b = a;
+  // Valid uses of NSArray of NSNumbers.
+  b = getStrings();
+  // Valid uses of NSArray of NSStrings.
+}
+
+void workWithProperties(NSArray<NSNumber *> *a) {
+  NSArray *b = a;
+  NSString *str = [b getObjAtIndex: 0]; // expected-warning {{Object has a dynamic type 'NSNumber *' which is incompatible with static type 'NSString *'}}
+  NSNumber *num = [b getObjAtIndex: 0];
+  str = [b firstObject]; // expected-warning {{Object has a dynamic type 'NSNumber *' which is incompatible with static type 'NSString *'}}
+  num = [b firstObject];
+  str = b.firstObject; // expected-warning {{Object has a dynamic type 'NSNumber *' which is incompatible with static type 'NSString *'}}
+  num = b.firstObject;
+  str = b[0]; // expected-warning {{Object has a dynamic type 'NSNumber *' which is incompatible with static type 'NSString *'}}
+  num = b[0];
+}
+
+void findMethodDeclInTrackedType(id m, NSArray<NSMutableString *> *a,
+                                 MutableArray<NSMutableString *> *b) {
+  a = b;
+  if (getUnknown() == 5) {
+    m = a;  
+    [m addObject: [[NSString alloc] init]]; // expected-warning {{Conversion}}
+  } else {
+    m = b;
+    [m addObject: [[NSMutableString alloc] init]];
+  }
+}
+
+void findMethodDeclInTrackedType2(__kindof NSArray<NSString *> *a,
+                                  MutableArray<NSMutableString *> *b) {
+  a = b;
+  if (getUnknown() == 5) {
+    [a addObject: [[NSString alloc] init]]; // expected-warning {{Conversion}}
+  } else {
+    [a addObject: [[NSMutableString alloc] init]];
+  }
+}
+
+void testUnannotatedLiterals() {
+  // ObjCArrayLiterals are not specialized in the AST. 
+  NSArray *arr = @[@"A", @"B"];
+  [arr contains: [[NSNumber alloc] init]];
+}
+
+void testAnnotatedLiterals() {
+  NSArray<NSString *> *arr = @[@"A", @"B"];
+  NSArray *arr2 = arr;
+  [arr2 contains: [[NSNumber alloc] init]]; // expected-warning {{Conversion}}
+}
+
+void nonExistentMethodDoesNotCrash(id a, MutableArray<NSMutableString *> *b) {
+  a = b;
+  [a nonExistentMethod];
+}
+
+void trackedClassVariables() {
+  Class c = [NSArray<NSString *> class];
+  NSArray<NSNumber *> *a = [c getEmpty]; // expected-warning {{Conversion}}
+  a = [c getEmpty2]; // expected-warning {{Conversion}}
+}
+
+void nestedCollections(NSArray<NSArray<NSNumber *> *> *mat, NSArray<NSString *> *row) {
+  id temp = row;
+  [mat contains: temp]; // expected-warning {{Conversion}}
+}
+
+void testMistmatchedTypeCast(MutableArray<NSMutableString *> *a) {
+  MutableArray *b = (MutableArray<NSNumber *> *)a;
+  [b addObject: [[NSNumber alloc] init]];
+  id c = (UnrelatedType *)a;
+  [c addObject: [[NSNumber alloc] init]];
+  [c addObject: [[NSString alloc] init]];
+}
+
+void returnCollectionToIdVariable(NSArray<NSArray<NSString *> *> *arr) {
+  NSArray *erased = arr;
+  id a = [erased firstObject];
+  NSArray<NSNumber *> *res = a; // expected-warning {{Conversion}}
+}
+
+void eraseSpecialization(NSArray<NSArray<NSString *> *> *arr) {
+  NSArray *erased = arr;
+  NSArray* a = [erased firstObject];
+  NSArray<NSNumber *> *res = a; // expected-warning {{Conversion}}
+}
+
+void returnToUnrelatedType(NSArray<NSArray<NSString *> *> *arr) {
+  NSArray *erased = arr;
+  NSSet* a = [erased firstObject]; // expected-warning {{Object has a dynamic type 'NSArray<NSString *> *' which is incompatible with static type 'NSSet *'}}
+  (void)a;
+}
+
+void returnToIdVariable(NSArray<NSString *> *arr) {
+  NSArray *erased = arr;
+  id a = [erased firstObject];
+  NSNumber *res = a; // expected-warning {{Object has a dynamic type 'NSString *' which is incompatible with static type 'NSNumber *'}}
+}
+
+// CHECK: <key>diagnostics</key>
+// CHECK-NEXT: <array>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>92</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>93</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>93</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>93</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>9632a21d129601d531e0b4585a27c686</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>incompatibleTypesErased</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>93</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>92</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>94</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>94</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>94</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>94</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>3c9499ea9f5adc148d5c4b684e3dcc5a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>incompatibleTypesErased</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>94</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>92</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>92</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;NSArray *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>92</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>96</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>96</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>96</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>96</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>1d8497465a98d7cc2067a916e217d5bc</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>incompatibleTypesErased</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>96</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>40</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>100</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>42</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;getTypedStuff&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;getTypedStuff&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>79</integer>
+// CHECK-NEXT:      <key>col</key><integer>1</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;crossProceduralErasedTypes&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;crossProceduralErasedTypes&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>79</integer>
+// CHECK-NEXT:           <key>col</key><integer>1</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>79</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>80</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>80</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>80</integer>
+// CHECK-NEXT:         <key>col</key><integer>37</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSNumber *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSNumber *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSNumber *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSNumber *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>80</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>81</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>81</integer>
+// CHECK-NEXT:           <key>col</key><integer>8</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>100</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>42</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Returning from &apos;getTypedStuff&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Returning from &apos;getTypedStuff&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>100</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>100</integer>
+// CHECK-NEXT:         <key>col</key><integer>42</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSNumber *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSNumber *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSNumber *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>82c378fdcfcc5c0318d2f3ca46420ec1</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>crossProceduralErasedTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>100</integer>
+// CHECK-NEXT:   <key>col</key><integer>28</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>105</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>105</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>105</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>105</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>105</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>106</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>106</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>106</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>106</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>97a49604cb3d640307a104f14b6d950d</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>incompatibleTypesErasedReverseConversion</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>106</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>105</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>105</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>105</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>105</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>105</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>108</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>108</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>108</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b358a659d36c93a9fba593cf6d866a2b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>incompatibleTypesErasedReverseConversion</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>108</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>112</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>113</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>113</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>113</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>113</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>113</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a57d9309397250421e9b96afe80c2045</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>idErasedIncompatibleTypesReverseConversion</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>113</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>112</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>112</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>112</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>115</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>115</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>115</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>115</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>3c86c79284e67f28d04b7ef88bedbb8b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>idErasedIncompatibleTypesReverseConversion</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>115</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>120</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>121</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>121</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>121</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a669a3ca1da5a07458c0186f43a295cb</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>idErasedIncompatibleTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>121</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>120</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>122</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>122</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>122</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>122</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>122</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>7ab912bd3fb03b47dc9489cac0297e2a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>idErasedIncompatibleTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>122</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>120</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>120</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>120</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>124</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>124</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>124</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>124</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0c7b4313b8b720c914b04863f47f971f</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>idErasedIncompatibleTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>124</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>129</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>129</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>133</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>133</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>133</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>133</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>133</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>133</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>133</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>136</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>136</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>136</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>136</integer>
+// CHECK-NEXT:         <key>col</key><integer>17</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>136</integer>
+// CHECK-NEXT:         <key>col</key><integer>39</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>24a08781e7d8971a7d8f8f607ce40074</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>pathSensitiveInference</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>8</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>136</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>141</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>141</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>141</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>141</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>142</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>142</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>142</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>142</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a8534f675404d57d833dc3c371d49845</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>verifyAPIusage</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>142</integer>
+// CHECK-NEXT:   <key>col</key><integer>11</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>147</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>147</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>147</integer>
+// CHECK-NEXT:         <key>col</key><integer>42</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from explicit cast (from &apos;MutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from explicit cast (from &apos;MutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>147</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>147</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>148</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>148</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>148</integer>
+// CHECK-NEXT:         <key>col</key><integer>17</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>148</integer>
+// CHECK-NEXT:         <key>col</key><integer>39</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0549631e5a7fa668375061b6c898e438</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>trustExplicitCasts</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>148</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>154</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>154</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>155</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>155</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>155</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>155</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>155</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>155</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>155</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>156</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>156</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>156</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>156</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d113e22b9ee9ded1cd94750c3557eff4</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>subtypeOfGeneric</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>156</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>162</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>162</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>162</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>162</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>162</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>164</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>164</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>164</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>164</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>164</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>22fad8121164716006b2a12f4ddc494e</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>genericSubtypeOfGeneric</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>164</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>162</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>162</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>162</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>162</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>162</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>166</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>166</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>166</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>166</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>f1b900572a63726a729714a765595c38</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>genericSubtypeOfGeneric</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>166</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>172</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>174</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>174</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>174</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>174</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>174</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>c70c13fbce3e6a90637decd9d0e19d95</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>genericSubtypeOfGenericReverse</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>174</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>172</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>172</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;ExceptionalArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>176</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>176</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>176</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>176</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;ExceptionalArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>b6ee3b22fbad45f213b4bf14bec7eeaf</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>genericSubtypeOfGenericReverse</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>176</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>182</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>182</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>182</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>182</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>182</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>183</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>183</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>183</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>183</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>6aab15d40b39ec0a6b749e561d486e6a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPI</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>183</integer>
+// CHECK-NEXT:   <key>col</key><integer>27</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>187</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>187</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>187</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;id&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>187</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>187</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>188</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>188</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>188</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>188</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>bd1da1b7775323773555e5d5c122661f</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPI2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>188</integer>
+// CHECK-NEXT:   <key>col</key><integer>20</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>192</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>192</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>192</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacyMutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacyMutableArray *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>192</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>192</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>193</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>193</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>193</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>193</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>3f2abd3c0b4b4a80a71bcbb668124ffb</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPIWithLegacyTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>193</integer>
+// CHECK-NEXT:   <key>col</key><integer>20</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>197</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>197</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>197</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacySpecialMutableArray *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacySpecialMutableArray *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>197</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>197</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>198</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>198</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>198</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>198</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>73a50f2592e8b3c9af898749f24176f7</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPIWithLegacyTypes2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>198</integer>
+// CHECK-NEXT:   <key>col</key><integer>27</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>202</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>202</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>202</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;__kindof NSArray&lt;NSString *&gt; *&apos; to &apos;LegacyMutableArray *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;__kindof NSArray&lt;NSString *&gt; *&apos; to &apos;LegacyMutableArray *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>203</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>203</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>203</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacyMutableArray *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;LegacyMutableArray *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>203</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>204</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>204</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>204</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>204</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>23ca540c8961ab9a362a194e1e895f86</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPIWithLegacyTypes3</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>204</integer>
+// CHECK-NEXT:   <key>col</key><integer>27</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>208</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>208</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>208</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>208</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>209</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>209</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>209</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>209</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>be7e9426f827314f05fd4e15b25909df</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>inferenceFromAPIWithBuggyTypes</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>209</integer>
+// CHECK-NEXT:   <key>col</key><integer>27</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>213</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>213</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>213</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggySpecialMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggySpecialMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>213</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>214</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>214</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>214</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>214</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>0f97a618ccac913d7c8631b5b86a133e</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>InferenceFromAPIWithBuggyTypes2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>214</integer>
+// CHECK-NEXT:   <key>col</key><integer>20</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>218</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>218</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>218</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>218</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>218</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>220</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>220</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>220</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>220</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>725c54c88bb271138b1de545ee59a8aa</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>InferenceFromAPIWithBuggyTypes3</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>220</integer>
+// CHECK-NEXT:   <key>col</key><integer>20</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>224</integer>
+// CHECK-NEXT:      <key>col</key><integer>45</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>224</integer>
+// CHECK-NEXT:         <key>col</key><integer>45</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>224</integer>
+// CHECK-NEXT:         <key>col</key><integer>45</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;__kindof NSArray&lt;NSString *&gt; *&apos; to &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;__kindof NSArray&lt;NSString *&gt; *&apos; to &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>224</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>224</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>225</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>225</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>225</integer>
+// CHECK-NEXT:      <key>col</key><integer>20</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>225</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>225</integer>
+// CHECK-NEXT:         <key>col</key><integer>20</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;BuggyMutableArray&lt;NSMutableString *&gt; *&apos; to &apos;MutableArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>225</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>225</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>226</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>226</integer>
+// CHECK-NEXT:      <key>col</key><integer>27</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>226</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>226</integer>
+// CHECK-NEXT:         <key>col</key><integer>27</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;MutableArray&lt;NSString *&gt; *&apos; to incompatible type &apos;MutableArray&lt;NSMutableString *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>fb867af384c612fe5c09f821127eeaf0</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>InferenceFromAPIWithBuggyTypes4</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>226</integer>
+// CHECK-NEXT:   <key>col</key><integer>27</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>239</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>239</integer>
+// CHECK-NEXT:      <key>col</key><integer>19</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>239</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>239</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>239</integer>
+// CHECK-NEXT:      <key>col</key><integer>19</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>239</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>239</integer>
+// CHECK-NEXT:         <key>col</key><integer>38</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>2e9f738345d9fa7dae2324ff7accd1ae</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>workWithProperties</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>239</integer>
+// CHECK-NEXT:   <key>col</key><integer>19</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>241</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>241</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>241</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>241</integer>
+// CHECK-NEXT:         <key>col</key><integer>23</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d85392b4efadf710b8da65a043a65e24</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>workWithProperties</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>241</integer>
+// CHECK-NEXT:   <key>col</key><integer>9</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>243</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>243</integer>
+// CHECK-NEXT:      <key>col</key><integer>11</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>11</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>243</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>243</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>456c6227549a5e577088c9dce4cea452</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>workWithProperties</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>6</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>243</integer>
+// CHECK-NEXT:   <key>col</key><integer>9</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>238</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>245</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>245</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSNumber *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>245</integer>
+// CHECK-NEXT:      <key>col</key><integer>9</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>9</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>245</integer>
+// CHECK-NEXT:         <key>col</key><integer>12</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSNumber *&apos; which is incompatible with static type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>df7f090d2b72ac07bc2351177ed6552d</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>workWithProperties</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>8</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>245</integer>
+// CHECK-NEXT:   <key>col</key><integer>9</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>251</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>251</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>251</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;NSArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;NSArray&lt;NSMutableString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>251</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>251</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>252</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>252</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>252</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>252</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>253</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>253</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>253</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>253</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>254</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>254</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>254</integer>
+// CHECK-NEXT:      <key>col</key><integer>5</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>254</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>254</integer>
+// CHECK-NEXT:         <key>col</key><integer>41</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a0fa75143d7a7ac234292642a6d93360</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>findMethodDeclInTrackedType</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>4</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>254</integer>
+// CHECK-NEXT:   <key>col</key><integer>5</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>263</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>263</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>263</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;__kindof NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;MutableArray&lt;NSMutableString *&gt; *&apos; is inferred from implicit cast (from &apos;MutableArray&lt;NSMutableString *&gt; *&apos; to &apos;__kindof NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>263</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>263</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>264</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>264</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>264</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>264</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>265</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>265</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>265</integer>
+// CHECK-NEXT:      <key>col</key><integer>5</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>265</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>265</integer>
+// CHECK-NEXT:         <key>col</key><integer>41</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSString *&apos; to incompatible type &apos;NSMutableString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>d0b99c8523e81ec43ba4ea80aeedef1b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>findMethodDeclInTrackedType2</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>265</integer>
+// CHECK-NEXT:   <key>col</key><integer>5</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>278</integer>
+// CHECK-NEXT:      <key>col</key><integer>30</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>278</integer>
+// CHECK-NEXT:         <key>col</key><integer>30</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>278</integer>
+// CHECK-NEXT:         <key>col</key><integer>42</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray *&apos; to &apos;NSArray&lt;NSString *&gt; *&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>278</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>278</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>280</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>280</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>280</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>280</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>280</integer>
+// CHECK-NEXT:         <key>col</key><integer>41</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSNumber *&apos; to incompatible type &apos;NSString *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>ed543d4961e934fb4ac8db00e885fc7f</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>testAnnotatedLiterals</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>280</integer>
+// CHECK-NEXT:   <key>col</key><integer>3</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>289</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>289</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>290</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>290</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>290</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>290</integer>
+// CHECK-NEXT:         <key>col</key><integer>39</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>290</integer>
+// CHECK-NEXT:      <key>col</key><integer>28</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>290</integer>
+// CHECK-NEXT:         <key>col</key><integer>28</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>290</integer>
+// CHECK-NEXT:         <key>col</key><integer>39</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>46aa94b866747bcbe1ded581da7a2633</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>trackedClassVariables</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>290</integer>
+// CHECK-NEXT:   <key>col</key><integer>28</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>289</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>289</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>291</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>291</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>291</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>291</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>291</integer>
+// CHECK-NEXT:      <key>col</key><integer>7</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>291</integer>
+// CHECK-NEXT:         <key>col</key><integer>7</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>291</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>a759006d1ce88e15fcd6de6a29e2579c</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>trackedClassVariables</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>291</integer>
+// CHECK-NEXT:   <key>col</key><integer>7</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>295</integer>
+// CHECK-NEXT:      <key>col</key><integer>13</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>295</integer>
+// CHECK-NEXT:         <key>col</key><integer>13</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>295</integer>
+// CHECK-NEXT:         <key>col</key><integer>15</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from implicit cast (from &apos;NSArray&lt;NSString *&gt; *&apos; to &apos;id&apos;)</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>295</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>295</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>296</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>296</integer>
+// CHECK-NEXT:      <key>col</key><integer>18</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>296</integer>
+// CHECK-NEXT:         <key>col</key><integer>18</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>296</integer>
+// CHECK-NEXT:         <key>col</key><integer>21</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>4bb1729ff86647750b458e6f8d883f08</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>nestedCollections</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>296</integer>
+// CHECK-NEXT:   <key>col</key><integer>18</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>308</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>308</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>309</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>309</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>309</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>309</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>309</integer>
+// CHECK-NEXT:         <key>col</key><integer>29</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>309</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>309</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>310</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>310</integer>
+// CHECK-NEXT:      <key>col</key><integer>30</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>310</integer>
+// CHECK-NEXT:         <key>col</key><integer>30</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>310</integer>
+// CHECK-NEXT:         <key>col</key><integer>30</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>38831553eaecfb6c02b51e71d21ec6ea</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>returnCollectionToIdVariable</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>310</integer>
+// CHECK-NEXT:   <key>col</key><integer>30</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>314</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>314</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>315</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>315</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>315</integer>
+// CHECK-NEXT:      <key>col</key><integer>16</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>315</integer>
+// CHECK-NEXT:         <key>col</key><integer>16</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>315</integer>
+// CHECK-NEXT:         <key>col</key><integer>35</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>315</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>315</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>316</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>316</integer>
+// CHECK-NEXT:      <key>col</key><integer>30</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>316</integer>
+// CHECK-NEXT:         <key>col</key><integer>30</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>316</integer>
+// CHECK-NEXT:         <key>col</key><integer>30</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Conversion from value of type &apos;NSArray&lt;NSString *&gt; *&apos; to incompatible type &apos;NSArray&lt;NSNumber *&gt; *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Core Foundation/Objective-C</string>
+// CHECK-NEXT:   <key>type</key><string>Generics</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DynamicTypePropagation</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>9fcbd4dcf212f004877292aa5085322b</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>eraseSpecialization</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>316</integer>
+// CHECK-NEXT:   <key>col</key><integer>30</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>320</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>320</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>321</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>321</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>321</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>321</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSArray&lt;NSString *&gt; *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>321</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>321</integer>
+// CHECK-NEXT:         <key>col</key><integer>14</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>321</integer>
+// CHECK-NEXT:         <key>col</key><integer>33</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSArray&lt;NSString *&gt; *&apos; which is incompatible with static type &apos;NSSet *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSArray&lt;NSString *&gt; *&apos; which is incompatible with static type &apos;NSSet *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSArray&lt;NSString *&gt; *&apos; which is incompatible with static type &apos;NSSet *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>be6714fed24af6e7ac522b87a5098de0</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>returnToUnrelatedType</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>321</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>326</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>326</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>327</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>327</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>327</integer>
+// CHECK-NEXT:      <key>col</key><integer>10</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>327</integer>
+// CHECK-NEXT:         <key>col</key><integer>10</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>327</integer>
+// CHECK-NEXT:         <key>col</key><integer>29</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Type &apos;NSString *&apos; is inferred from this context</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Type &apos;NSString *&apos; is inferred from this context</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>327</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>327</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>328</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>328</integer>
+// CHECK-NEXT:      <key>col</key><integer>19</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>328</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>328</integer>
+// CHECK-NEXT:         <key>col</key><integer>19</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSString *&apos; which is incompatible with static type &apos;NSNumber *&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Object has a dynamic type &apos;NSString *&apos; which is incompatible with static type &apos;NSNumber *&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Object has a dynamic type &apos;NSString *&apos; which is incompatible with static type &apos;NSNumber *&apos;</string>
+// CHECK-NEXT:   <key>category</key><string>Type Error</string>
+// CHECK-NEXT:   <key>type</key><string>Dynamic and static type mismatch</string>
+// CHECK-NEXT:   <key>check_name</key><string>alpha.core.DynamicTypeChecker</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>5e96ef711c8f115bc64f69baa3f6ea7a</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:  <key>issue_context</key><string>returnToIdVariable</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>328</integer>
+// CHECK-NEXT:   <key>col</key><integer>19</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT: </array>
diff --git a/test/Analysis/identical-expressions.cpp b/test/Analysis/identical-expressions.cpp
index 46dd56289c4e..138cd7ce9887 100644
--- a/test/Analysis/identical-expressions.cpp
+++ b/test/Analysis/identical-expressions.cpp
@@ -1530,3 +1530,35 @@ void test_nowarn_long() {
     c = 0LL;
   }
 }
+
+// Identical inner conditions
+
+void test_warn_inner_if_1(int x) {
+  if (x == 1) {
+    if (x == 1) // expected-warning {{conditions of the inner and outer statements are identical}}
+      ;
+  }
+
+  // FIXME: Should warn here. The warning is currently not emitted because there
+  // is code between the conditions.
+  if (x == 1) {
+    int y = x;
+    if (x == 1)
+      ;
+  }
+}
+
+void test_nowarn_inner_if_1(int x) {
+  // Don't warn when condition has side effects.
+  if (x++ == 1) {
+    if (x++ == 1)
+      ;
+  }
+
+  // Don't warn when x is changed before inner condition.
+  if (x < 10) {
+    x++;
+    if (x < 10)
+      ;
+  }
+}
diff --git a/test/Analysis/initializer.cpp b/test/Analysis/initializer.cpp
index a71e35dc63c6..b31c315ba524 100644
--- a/test/Analysis/initializer.cpp
+++ b/test/Analysis/initializer.cpp
@@ -143,3 +143,57 @@ namespace DefaultMemberInitializers {
     clang_analyzer_eval(w.p[1] == 'y'); // expected-warning{{TRUE}}
   }
 }
+
+namespace ReferenceInitialization {
+  struct OtherStruct {
+    OtherStruct(int i);
+    ~OtherStruct();
+  };
+
+  struct MyStruct {
+    MyStruct(int i);
+    MyStruct(OtherStruct os);
+
+    void method() const;
+  };
+
+  void referenceInitializeLocal() {
+    const MyStruct &myStruct(5);
+    myStruct.method(); // no-warning
+  }
+
+  void referenceInitializeMultipleLocals() {
+    const MyStruct &myStruct1(5), myStruct2(5), &myStruct3(5);
+    myStruct1.method(); // no-warning
+    myStruct2.method(); // no-warning
+    myStruct3.method(); // no-warning
+  }
+
+  void referenceInitializeLocalWithCleanup() {
+    const MyStruct &myStruct(OtherStruct(5));
+    myStruct.method(); // no-warning
+  }
+
+  struct HasMyStruct {
+    const MyStruct &ms; // expected-note {{reference member declared here}}
+    const MyStruct &msWithCleanups; // expected-note {{reference member declared here}}
+
+    // clang's Sema issues a warning when binding a reference member to a
+    // temporary value.
+    HasMyStruct() : ms(5), msWithCleanups(OtherStruct(5)) {
+        // expected-warning@-1 {{binding reference member 'ms' to a temporary value}}
+        // expected-warning@-2 {{binding reference member 'msWithCleanups' to a temporary value}}
+
+      // At this point the members are not garbage so we should not expect an
+      // analyzer warning here even though binding a reference member
+      // to a member is a terrible idea.
+      ms.method(); // no-warning
+      msWithCleanups.method(); // no-warning
+    }
+  };
+
+  void referenceInitializeField() {
+    HasMyStruct hms;
+  }
+
+};
diff --git a/test/Analysis/inline-plist.c b/test/Analysis/inline-plist.c
index 8558e8f6ab8c..bccf219d31e5 100644
--- a/test/Analysis/inline-plist.c
+++ b/test/Analysis/inline-plist.c
@@ -290,9 +290,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8bca94f2f862468bb877fb70e66a3304</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>foo</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>18</integer>
@@ -511,9 +513,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>62ddaf4e66ff527b230b474b98791ced</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>has_bug</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>23</integer>
@@ -863,9 +867,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1ee3fb0b2fbd9c66e0cc34ca1181ece3</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>triggers_bug</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>33</integer>
@@ -1084,6 +1090,8 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a2e7504f29818834127c44ba841f4da8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>60</integer>
@@ -1457,9 +1465,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>931be9c3a45b61c0a9c99b4772bd6fca</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_block_ret</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>70</integer>
@@ -1707,9 +1717,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>524a8647c40f017409d858ff70900c1a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_block_blockvar</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>78</integer>
@@ -1957,9 +1969,11 @@ void test_block_arg() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>0efdf891fbddf2932f9d434968b94164</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_block_arg</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>86</integer>
diff --git a/test/Analysis/inline-unique-reports.c b/test/Analysis/inline-unique-reports.c
index 89cff0996b52..4b3c2fecca1e 100644
--- a/test/Analysis/inline-unique-reports.c
+++ b/test/Analysis/inline-unique-reports.c
@@ -292,9 +292,11 @@ void test_bug_2() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a6ef969f9d5a84fe3b6d153fa488020a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>bug</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>5</integer>
diff --git a/test/Analysis/inline.cpp b/test/Analysis/inline.cpp
index 183df16e124d..b7962b53a89b 100644
--- a/test/Analysis/inline.cpp
+++ b/test/Analysis/inline.cpp
@@ -275,7 +275,7 @@ namespace DefaultArgs {
 
     clang_analyzer_eval(defaultReferenceZero(1) == -1); // expected-warning{{TRUE}}
     clang_analyzer_eval(defaultReferenceZero() == 0); // expected-warning{{TRUE}}
-}
+  }
 
   double defaultFloatReference(const double &i = 42) {
     return -i;
@@ -300,6 +300,13 @@ namespace DefaultArgs {
     clang_analyzer_eval(defaultString("xyz") == 'y'); // expected-warning{{TRUE}}
     clang_analyzer_eval(defaultString() == 'b'); // expected-warning{{TRUE}}
   }
+
+  const void * const void_string = "abc";
+
+  void testBitcastedString() {
+    clang_analyzer_eval(0 != void_string); // expected-warning{{TRUE}}
+    clang_analyzer_eval('b' == ((char *)void_string)[1]); // expected-warning{{TRUE}}
+  }
 }
 
 namespace OperatorNew {
diff --git a/test/Analysis/inlining/eager-reclamation-path-notes.c b/test/Analysis/inlining/eager-reclamation-path-notes.c
index f57a3bb6529a..cd16eb3af930 100644
--- a/test/Analysis/inlining/eager-reclamation-path-notes.c
+++ b/test/Analysis/inlining/eager-reclamation-path-notes.c
@@ -319,9 +319,11 @@ void testChainedCalls() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5c273b7f0421359833fde3f06e8a5c07</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>use</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>6</integer>
@@ -777,9 +779,11 @@ void testChainedCalls() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1d1fa98a8e9fbfa90777dac9fc2795a8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>use2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>28</integer>
diff --git a/test/Analysis/inlining/eager-reclamation-path-notes.cpp b/test/Analysis/inlining/eager-reclamation-path-notes.cpp
index b02e796345cb..8d5e85a6394f 100644
--- a/test/Analysis/inlining/eager-reclamation-path-notes.cpp
+++ b/test/Analysis/inlining/eager-reclamation-path-notes.cpp
@@ -373,9 +373,11 @@ int memberCallBaseDisappears() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f4a4cee851880ebbe93d3b657920ebe9</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>memberCallBaseDisappears</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>19</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>19</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>33</integer>
diff --git a/test/Analysis/inlining/path-notes.c b/test/Analysis/inlining/path-notes.c
index 9b625c88f0b0..403d33db4851 100644
--- a/test/Analysis/inlining/path-notes.c
+++ b/test/Analysis/inlining/path-notes.c
@@ -348,9 +348,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>52499a849b132453be19ec9167d8c021</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testZero</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>14</integer>
@@ -526,9 +528,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>cb893891b6a08ee4b7dc3d0c11df856e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testCheck</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>24</integer>
@@ -767,9 +771,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ec20b5e53a72c82d442b3ca04c81e138</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testInitCheck</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>39</integer>
@@ -1008,9 +1014,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1ae8e7c84d15f307abc1df9f5612078b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testStoreCheck</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>51</integer>
@@ -1321,9 +1329,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d03346854b8f45b55cbd5efe29bf2c8e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testReturnZero</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>65</integer>
@@ -1634,9 +1644,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5a72f98932a4061a833287a12b28ba8b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testReturnZero2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>72</integer>
@@ -2010,9 +2022,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>6d3745ba4b32e1858de16c61fecf8ed4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testInitZero</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>83</integer>
@@ -2386,9 +2400,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2d25b170621486480ca76aaba4c7a0c0</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testStoreZero</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>92</integer>
@@ -2805,9 +2821,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>567151df0ae29b0f0e1a412114d544dc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>usePointer</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>97</integer>
@@ -3055,9 +3073,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>7849ab11af99aee1e3603a24ae95cdfd</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testSetFieldToNull</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>119</integer>
@@ -3165,9 +3185,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3ccb5e7e2cc82aba5c28a1cc873c83a1</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>132</integer>
@@ -3343,9 +3365,11 @@ void test4(int **p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>19292468c6c6f83fbbdb2ff072bb2ae8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test4</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>139</integer>
diff --git a/test/Analysis/inlining/path-notes.cpp b/test/Analysis/inlining/path-notes.cpp
index 30725115c2a6..2d6886f9a77f 100644
--- a/test/Analysis/inlining/path-notes.cpp
+++ b/test/Analysis/inlining/path-notes.cpp
@@ -878,9 +878,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2bf06135909de50c70d8390e77bddf9e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>C++ method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>use</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>8</integer>
@@ -1167,9 +1169,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>efde323086a985fe1e8ccc6cd0123c12</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>C++ method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>method</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>41</integer>
@@ -1417,7 +1421,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8162ef7a27137328a4a7a131e8d52cbe</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>63</integer>
@@ -1665,7 +1671,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>675157873c1414a885eb1f429b26f389</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>68</integer>
@@ -1947,7 +1955,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>aff5e83726a1ce1144580e4c80bde47c</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>73</integer>
@@ -2263,9 +2273,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>9484c73e190dfe4b8c6c5bdfad9700c1</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>C++ method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>operator=</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>78</integer>
@@ -2615,9 +2627,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a0f0ac76cf282b61236bfac7eb2eca62</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>C++ method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>operator=</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>83</integer>
@@ -2903,7 +2917,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>74b213f52cad2a4cbfcc8c5766bdd974</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>88</integer>
@@ -3151,9 +3167,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>0d5730cf85cea686ed80d788ab666603</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>173</integer>
@@ -3498,9 +3516,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>992e0f2917d29d0977d09ee64b3d10ef</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testRef</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>180</integer>
@@ -3608,9 +3628,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Returning null reference</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.UndefReturn</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ece17cf25bc055cbc118d903744a00cf</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>returnNullReference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>190</integer>
@@ -3974,7 +3996,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>32677550226a34f422a4165a34f6a124</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>197</integer>
@@ -4179,9 +4203,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b5460cc5ca67af5b9f4943d7117f9bcf</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNonPrintableAssignment</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>212</integer>
@@ -4289,9 +4315,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>749bda64658e48896477213e90176f5e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>221</integer>
@@ -4467,9 +4495,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2f9098b97145118cf80f1c9f2b9f8a90</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>C++ method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testGetDerefExprOnMemberExprWithADot</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>237</integer>
@@ -4577,9 +4607,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of undefined pointer value</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>41b38aba8763180af245befa02f63d61</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testGetDerefExprOnMemberExprWithADot</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>243</integer>
@@ -4784,9 +4816,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8b577b362ffa5a7290d00d03635c1fca</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testDeclRefExprToReferenceInGetDerefExpr</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>261</integer>
@@ -5005,9 +5039,11 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f7fcdbc77cfb95588c0e5b606288013d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>generateNoteOnDefaultArgument</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>269</integer>
@@ -5240,7 +5276,9 @@ namespace PR17746 {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ff61a6b893cd2d64c7ccaa2a9805311d</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>281</integer>
diff --git a/test/Analysis/inlining/path-notes.m b/test/Analysis/inlining/path-notes.m
index 4a5d2ae2317f..492b44c9879c 100644
--- a/test/Analysis/inlining/path-notes.m
+++ b/test/Analysis/inlining/path-notes.m
@@ -576,9 +576,11 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>24947a6942bfa6030ab81e53a201d816</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testReturnZeroIfNil</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>79</integer>
@@ -884,9 +886,11 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Division by zero</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>fd12d9853f5c0f7398305ff4047f4848</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testDispatchSyncInlining</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>14</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>14</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>101</integer>
@@ -1134,6 +1138,8 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>uninitialized variable captured by block</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.CapturedBlockVariable</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>6763f0438bcb4337c8f8c9863b35b8c1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>118</integer>
@@ -1449,9 +1455,11 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d32b4cd912950f7b38ae28dbf29d9e63</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNilReceiverHelper</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>135</integer>
@@ -1627,9 +1635,11 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>API Misuse (Apple)</string>
 // CHECK-NEXT:    <key>type</key><string>nil argument</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.NilArg</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a90589ba5d063fbfecc615bf06f84f5a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testCreateArrayLiteral</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>153</integer>
@@ -2023,9 +2033,11 @@ void testNullDereferenceInDispatch() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a3c91a7a52619d81ebe032dcc49ebb93</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testAutoreleaseTakesEffectInDispatch</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>169</integer>
diff --git a/test/Analysis/lambda-notes.cpp b/test/Analysis/lambda-notes.cpp
new file mode 100644
index 000000000000..661fd052c87d
--- /dev/null
+++ b/test/Analysis/lambda-notes.cpp
@@ -0,0 +1,206 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -analyze -analyzer-checker=core -analyzer-config inline-lambdas=true -analyzer-output plist -verify %s -o %t
+// RUN: FileCheck --input-file=%t %s
+
+
+// Diagnostic inside a lambda
+
+void diagnosticFromLambda() {
+  int i = 0;
+  [=] {
+    int p = 5/i; // expected-warning{{Division by zero}}
+    (void)p;
+  }();
+}
+
+// CHECK: <key>diagnostics</key>
+// CHECK-NEXT: <array>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>path</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>8</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>8</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>9</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>9</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>5</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>The value 0 is assigned to field &apos;&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>The value 0 is assigned to field &apos;&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>9</integer>
+// CHECK-NEXT:      <key>col</key><integer>3</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>9</integer>
+// CHECK-NEXT:         <key>col</key><integer>3</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>12</integer>
+// CHECK-NEXT:         <key>col</key><integer>5</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>0</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Calling &apos;operator()&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Calling &apos;operator()&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>9</integer>
+// CHECK-NEXT:      <key>col</key><integer>5</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;diagnosticFromLambda&apos;</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Entered call from &apos;diagnosticFromLambda&apos;</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>control</string>
+// CHECK-NEXT:     <key>edges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>start</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>9</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:        <key>end</key>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>10</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>kind</key><string>event</string>
+// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>line</key><integer>10</integer>
+// CHECK-NEXT:      <key>col</key><integer>14</integer>
+// CHECK-NEXT:      <key>file</key><integer>0</integer>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <key>ranges</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>10</integer>
+// CHECK-NEXT:         <key>col</key><integer>13</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>line</key><integer>10</integer>
+// CHECK-NEXT:         <key>col</key><integer>15</integer>
+// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>depth</key><integer>1</integer>
+// CHECK-NEXT:     <key>extended_message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:     <key>message</key>
+// CHECK-NEXT:     <string>Division by zero</string>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>category</key><string>Logic error</string>
+// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:   <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:   <key>issue_hash_content_of_line_in_context</key><string>bd4eed3234018edced5efc2ed5562a74</string>
+// CHECK-NEXT:  <key>issue_context_kind</key><string>C++ method</string>
+// CHECK-NEXT:  <key>issue_context</key><string>operator()</string>
+// CHECK-NEXT:  <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:  <key>location</key>
+// CHECK-NEXT:  <dict>
+// CHECK-NEXT:   <key>line</key><integer>10</integer>
+// CHECK-NEXT:   <key>col</key><integer>14</integer>
+// CHECK-NEXT:   <key>file</key><integer>0</integer>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT: </array>
diff --git a/test/Analysis/lambdas-generalized-capture.cpp b/test/Analysis/lambdas-generalized-capture.cpp
new file mode 100644
index 000000000000..790e15e8ccc3
--- /dev/null
+++ b/test/Analysis/lambdas-generalized-capture.cpp
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -analyze -analyzer-checker=core,deadcode,debug.ExprInspection -verify %s
+
+int clang_analyzer_eval(int);
+
+void generalizedCapture() {
+  int v = 7;
+  auto lambda = [x=v]() {
+    return x;
+  };
+
+  int result = lambda();
+  clang_analyzer_eval(result == 7); // expected-warning {{TRUE}}
+}
+
+void sideEffectsInGeneralizedCapture() {
+  int v = 7;
+  auto lambda = [x=v++]() {
+    return x;
+  };
+  clang_analyzer_eval(v == 8); // expected-warning {{TRUE}}
+
+  int r1 = lambda();
+  int r2 = lambda();
+  clang_analyzer_eval(r1 == 7); // expected-warning {{TRUE}}
+  clang_analyzer_eval(r2 == 7); // expected-warning {{TRUE}}
+  clang_analyzer_eval(v == 8); // expected-warning {{TRUE}}
+}
+
+int addOne(int p) {
+ return p + 1;
+}
+
+void inliningInGeneralizedCapture() {
+  int v = 7;
+  auto lambda = [x=addOne(v)]() {
+    return x;
+  };
+
+  int result = lambda();
+  clang_analyzer_eval(result == 8); // expected-warning {{TRUE}}
+}
+
+void caseSplitInGeneralizedCapture(bool p) {
+  auto lambda = [x=(p ? 1 : 2)]() {
+    return x;
+  };
+
+  int result = lambda();
+  clang_analyzer_eval(result == 1); // expected-warning {{FALSE}} expected-warning {{TRUE}}
+}
diff --git a/test/Analysis/lambdas.cpp b/test/Analysis/lambdas.cpp
index 33e216b57ecc..0b66e6b92fa6 100644
--- a/test/Analysis/lambdas.cpp
+++ b/test/Analysis/lambdas.cpp
@@ -1,9 +1,343 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -analyze -analyzer-checker=debug.DumpCFG %s > %t 2>&1
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -analyze -analyzer-checker=core,deadcode,debug.ExprInspection -analyzer-config inline-lambdas=true -verify %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -analyze -analyzer-checker=core,debug.DumpCFG -analyzer-config inline-lambdas=true %s > %t 2>&1
 // RUN: FileCheck --input-file=%t %s
 
+void clang_analyzer_warnIfReached();
+void clang_analyzer_eval(int);
+
 struct X { X(const X&); };
 void f(X x) { (void) [x]{}; }
 
+
+// Lambda semantics tests.
+
+void basicCapture() {
+  int i = 5;
+  [i]() mutable {
+    // clang_analyzer_eval does nothing in inlined functions.
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+    ++i;
+  }();
+  [&i] {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+  }();
+  [&i] {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+    i++;
+  }();
+  clang_analyzer_eval(i == 6); // expected-warning{{TRUE}}
+}
+
+void deferredLambdaCall() {
+  int i = 5;
+  auto l1 = [i]() mutable {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+    ++i;
+  };
+  auto l2 = [&i] {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+  };
+  auto l3 = [&i] {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+    i++;
+  };
+  l1();
+  l2();
+  l3();
+  clang_analyzer_eval(i == 6); // expected-warning{{TRUE}}
+}
+
+void multipleCaptures() {
+  int i = 5, j = 5;
+  [i, &j]() mutable {
+    if (i != 5 && j != 5)
+      clang_analyzer_warnIfReached();
+    ++i;
+    ++j;
+  }();
+  clang_analyzer_eval(i == 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval(j == 6); // expected-warning{{TRUE}}
+  [=]() mutable {
+    if (i != 5 && j != 6)
+      clang_analyzer_warnIfReached();
+    ++i;
+    ++j;
+  }();
+  clang_analyzer_eval(i == 5); // expected-warning{{TRUE}}
+  clang_analyzer_eval(j == 6); // expected-warning{{TRUE}}
+  [&]() mutable {
+    if (i != 5 && j != 6)
+      clang_analyzer_warnIfReached();
+    ++i;
+    ++j;
+  }();
+  clang_analyzer_eval(i == 6); // expected-warning{{TRUE}}
+  clang_analyzer_eval(j == 7); // expected-warning{{TRUE}}
+}
+
+void testReturnValue() {
+  int i = 5;
+  auto l = [i] (int a) {
+    return i + a;
+  };
+  int b = l(3);
+  clang_analyzer_eval(b == 8); // expected-warning{{TRUE}}
+}
+
+void testAliasingBetweenParameterAndCapture() {
+  int i = 5;
+
+  auto l = [&i](int &p) {
+    i++;
+    p++;
+  };
+  l(i);
+  clang_analyzer_eval(i == 7); // expected-warning{{TRUE}}
+}
+
+// Nested lambdas.
+
+void testNestedLambdas() {
+  int i = 5;
+  auto l = [i]() mutable {
+    [&i]() {
+      ++i;
+    }();
+    if (i != 6)
+      clang_analyzer_warnIfReached();
+  };
+  l();
+  clang_analyzer_eval(i == 5); // expected-warning{{TRUE}}
+}
+
+// Captured this.
+
+class RandomClass {
+  int i;
+
+  void captureFields() {
+    i = 5;
+    [this]() {
+      // clang_analyzer_eval does nothing in inlined functions.
+      if (i != 5)
+        clang_analyzer_warnIfReached();
+      ++i;
+    }();
+    clang_analyzer_eval(i == 6); // expected-warning{{TRUE}}
+  }
+};
+
+
+// Nested this capture.
+
+class RandomClass2 {
+  int i;
+
+  void captureFields() {
+    i = 5;
+    [this]() {
+      // clang_analyzer_eval does nothing in inlined functions.
+      if (i != 5)
+        clang_analyzer_warnIfReached();
+      ++i;
+      [this]() {
+        // clang_analyzer_eval does nothing in inlined functions.
+        if (i != 6)
+          clang_analyzer_warnIfReached();
+        ++i;
+      }();
+    }();
+    clang_analyzer_eval(i == 7); // expected-warning{{TRUE}}
+  }
+};
+
+
+// Captured function pointers.
+
+void inc(int &x) {
+  ++x;
+}
+
+void testFunctionPointerCapture() {
+  void (*func)(int &) = inc;
+  int i = 5;
+  [&i, func] {
+    func(i);
+  }();
+  clang_analyzer_eval(i == 6); // expected-warning{{TRUE}}
+}
+
+// Captured variable-length array.
+
+void testVariableLengthArrayCaptured() {
+  int n = 2;
+  int array[n];
+  array[0] = 7;
+
+  int i = [&]{
+    return array[0];
+  }();
+
+  clang_analyzer_eval(i == 7); // expected-warning{{TRUE}}
+}
+
+// Test inline defensive checks
+int getNum();
+
+void inlineDefensiveChecks() {
+  int i = getNum();
+  [=]() {
+    if (i == 0)
+      ;
+  }();
+  int p = 5/i;
+  (void)p;
+}
+
+
+template<typename T>
+void callLambda(T t) {
+  t();
+}
+
+struct DontCrash {
+  int x;
+  void f() {
+    callLambda([&](){ ++x; });
+    callLambdaFromStatic([&](){ ++x; });
+  }
+  
+  template<typename T>
+  static void callLambdaFromStatic(T t) {
+    t();
+  }
+};
+
+
+// Capture constants
+
+void captureConstants() {
+  const int i = 5;
+  [=]() {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+  }();
+  [&] {
+    if (i != 5)
+      clang_analyzer_warnIfReached();
+  }();
+}
+
+void captureReferenceByCopy(int &p) {
+  int v = 7;
+  p = 8;
+
+  // p is a reference captured by copy
+  [&v,p]() mutable {
+    v = p;
+    p = 22;
+  }();
+
+  clang_analyzer_eval(v == 8); // expected-warning{{TRUE}}
+  clang_analyzer_eval(p == 8); // expected-warning{{TRUE}}
+}
+
+void captureReferenceByReference(int &p) {
+  int v = 7;
+  p = 8;
+
+  // p is a reference captured by reference
+  [&v,&p]() {
+    v = p;
+    p = 22;
+  }();
+
+  clang_analyzer_eval(v == 8); // expected-warning{{TRUE}}
+  clang_analyzer_eval(p == 22); // expected-warning{{TRUE}}
+}
+
+void callMutableLambdaMultipleTimes(int &p) {
+  int v = 0;
+  p = 8;
+
+  auto l = [&v, p]() mutable {
+    v = p;
+    p++;
+  };
+
+  l();
+
+  clang_analyzer_eval(v == 8); // expected-warning{{TRUE}}
+  clang_analyzer_eval(p == 8); // expected-warning{{TRUE}}
+
+  l();
+
+  clang_analyzer_eval(v == 9); // expected-warning{{TRUE}}
+  clang_analyzer_eval(p == 8); // expected-warning{{TRUE}}
+}
+
+// PR 24914
+struct StructPR24914{
+  int x;
+};
+
+void takesConstStructArgument(const StructPR24914&);
+void captureStructReference(const StructPR24914& s) {
+  [s]() {
+    takesConstStructArgument(s);
+  }();
+}
+
+// Lambda capture counts as use for dead-store checking.
+
+int returnsValue();
+
+void captureByCopyCausesUse() {
+  int local1 = returnsValue(); // no-warning
+  int local2 = returnsValue(); // no-warning
+  int local3 = returnsValue(); // expected-warning{{Value stored to 'local3' during its initialization is never read}}
+
+  (void)[local1, local2]() { }; // Explicit capture by copy counts as use.
+
+  int local4 = returnsValue(); // no-warning
+  int local5 = returnsValue(); // expected-warning{{Value stored to 'local5' during its initialization is never read}}
+
+  (void)[=]() {
+    (void)local4; // Implicit capture by copy counts as use
+  };
+}
+
+void captureByReference() {
+  int local1 = returnsValue(); // no-warning
+
+  auto lambda1 = [&local1]() { // Explicit capture by reference
+    local1++;
+  };
+
+  // Don't treat as a dead store because local1 was was captured by reference.
+  local1 = 7; // no-warning
+
+  lambda1();
+
+  int local2 = returnsValue(); // no-warning
+
+  auto lambda2 = [&]() {
+    local2++; // Implicit capture by reference
+  };
+
+  // Don't treat as a dead store because local2 was was captured by reference.
+  local2 = 7; // no-warning
+
+  lambda2();
+}
+
+
 // CHECK: [B2 (ENTRY)]
 // CHECK:   Succs (1): B1
 // CHECK: [B1]
diff --git a/test/Analysis/lambdas.mm b/test/Analysis/lambdas.mm
new file mode 100644
index 000000000000..6247f28870fb
--- /dev/null
+++ b/test/Analysis/lambdas.mm
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fblocks -Wno-objc-root-class -analyze -analyzer-checker=core,deadcode,debug.ExprInspection -analyzer-config inline-lambdas=true -verify %s
+
+int clang_analyzer_eval(int);
+
+@interface Super
+- (void)superMethod;
+@end
+
+@interface Sub : Super {
+  int _ivar1;
+  int _ivar2;
+}
+@end
+
+
+@implementation Sub
+- (void)callMethodOnSuperInCXXLambda; {
+  // Explicit capture.
+  [self]() {
+    [super superMethod];
+  }();
+
+  // Implicit capture.
+  [=]() {
+    [super superMethod];
+  }();
+}
+
+- (void)swapIvars {
+  int tmp = _ivar1;
+  _ivar1 = _ivar2;
+  _ivar2 = tmp;
+}
+
+- (void)callMethodOnSelfInCXXLambda; {
+  _ivar1 = 7;
+  _ivar2 = 8;
+  [self]() {
+    [self swapIvars];
+  }();
+
+  clang_analyzer_eval(_ivar1 == 8); // expected-warning{{TRUE}}
+  clang_analyzer_eval(_ivar2 == 7); // expected-warning{{TRUE}}
+}
+
+@end
+
+int getValue();
+void useValue(int v);
+
+void castToBlockNoDeadStore() {
+  int v = getValue(); // no-warning
+
+  (void)(void(^)())[v]() { // This capture should count as a use, so no dead store warning above.
+  };
+}
+
+void takesBlock(void(^block)());
+
+void passToFunctionTakingBlockNoDeadStore() {
+  int v = 7; // no-warning
+  int x = 8; // no-warning
+  takesBlock([&v, x]() {
+    (void)v;
+  });
+}
+
+void castToBlockAndInline() {
+  int result = ((int(^)(int))[](int p) {
+    return p;
+  })(7);
+
+  clang_analyzer_eval(result == 7); // expected-warning{{TRUE}}
+}
+
+void castToBlockWithCaptureAndInline() {
+  int y = 7;
+
+  auto lambda = [y]{ return y; };
+  int(^block)() = lambda;
+
+  int result = block();
+  clang_analyzer_eval(result == 7); // expected-warning{{TRUE}}
+}
+
+void castMutableLambdaToBlock() {
+  int x = 0;
+
+  auto lambda = [x]() mutable {
+    x = x + 1;
+    return x;
+   };
+
+  // The block should copy the lambda before capturing.
+  int(^block)() = lambda;
+
+  int r1 = block();
+  clang_analyzer_eval(r1 == 1); // expected-warning{{TRUE}}
+
+  int r2 = block();
+  clang_analyzer_eval(r2 == 2); // expected-warning{{TRUE}}
+
+  // Because block copied the lambda, r3 should be 1.
+  int r3 = lambda();
+  clang_analyzer_eval(r3 == 1); // expected-warning{{TRUE}}
+
+  // Aliasing the block shouldn't copy the lambda.
+  int(^blockAlias)() = block;
+
+  int r4 = blockAlias();
+  clang_analyzer_eval(r4 == 3); // expected-warning{{TRUE}}
+
+  int r5 = block();
+  clang_analyzer_eval(r5 == 4); // expected-warning{{TRUE}}
+
+  // Another copy of lambda
+  int(^blockSecondCopy)() = lambda;
+  int r6 = blockSecondCopy();
+  clang_analyzer_eval(r6 == 2); // expected-warning{{TRUE}}
+}
+
+void castLambdaInLocalBlock() {
+  // Make sure we don't emit a spurious diagnostic about the address of a block
+  // escaping in the implicit conversion operator method for lambda-to-block
+  // conversions.
+  auto lambda = []{ }; // no-warning
+
+  void(^block)() = lambda;
+  (void)block;
+}
diff --git a/test/Analysis/localization-aggressive.m b/test/Analysis/localization-aggressive.m
new file mode 100644
index 000000000000..79c9c1331212
--- /dev/null
+++ b/test/Analysis/localization-aggressive.m
@@ -0,0 +1,266 @@
+// RUN: %clang_cc1 -analyze -fblocks -analyzer-store=region  -analyzer-checker=optin.osx.cocoa.localizability.NonLocalizedStringChecker -analyzer-checker=optin.osx.cocoa.localizability.EmptyLocalizationContextChecker -verify  -analyzer-config AggressiveReport=true %s
+
+// These declarations were reduced using Delta-Debugging from Foundation.h
+// on Mac OS X.
+
+#define nil ((id)0)
+#define NSLocalizedString(key, comment)                                        \
+  [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:nil]
+#define NSLocalizedStringFromTable(key, tbl, comment)                          \
+  [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:(tbl)]
+#define NSLocalizedStringFromTableInBundle(key, tbl, bundle, comment)          \
+  [bundle localizedStringForKey:(key) value:@"" table:(tbl)]
+#define NSLocalizedStringWithDefaultValue(key, tbl, bundle, val, comment)      \
+  [bundle localizedStringForKey:(key) value:(val) table:(tbl)]
+#define CGFLOAT_TYPE double
+typedef CGFLOAT_TYPE CGFloat;
+struct CGPoint {
+  CGFloat x;
+  CGFloat y;
+};
+typedef struct CGPoint CGPoint;
+@interface NSObject
++ (id)alloc;
+- (id)init;
+@end
+@class NSDictionary;
+@interface NSString : NSObject
+- (void)drawAtPoint:(CGPoint)point withAttributes:(NSDictionary *)attrs;
++ (instancetype)localizedStringWithFormat:(NSString *)format, ...;
+@end
+@interface NSBundle : NSObject
++ (NSBundle *)mainBundle;
+- (NSString *)localizedStringForKey:(NSString *)key
+                              value:(NSString *)value
+                              table:(NSString *)tableName;
+@end
+@protocol UIAccessibility 
+- (void)accessibilitySetIdentification:(NSString *)ident;
+- (void)setAccessibilityLabel:(NSString *)label;
+@end
+@interface UILabel : NSObject <UIAccessibility>
+@property(nullable, nonatomic, copy) NSString *text;
+@end
+@interface TestObject : NSObject
+@property(strong) NSString *text;
+@end
+@interface NSView : NSObject
+@property (strong) NSString *toolTip;
+@end
+@interface NSViewSubclass : NSView
+@end
+
+@interface LocalizationTestSuite : NSObject
+NSString *ForceLocalized(NSString *str)
+    __attribute__((annotate("returns_localized_nsstring")));
+CGPoint CGPointMake(CGFloat x, CGFloat y);
+int random();
+// This next one is a made up API
+NSString *CFNumberFormatterCreateStringWithNumber(float x);
++ (NSString *)forceLocalized:(NSString *)str
+    __attribute__((annotate("returns_localized_nsstring")));
+@end
+
+// Test cases begin here
+@implementation LocalizationTestSuite
+
+// A C-Funtion that returns a localized string because it has the
+// "returns_localized_nsstring" annotation
+NSString *ForceLocalized(NSString *str) { return str; }
+// An ObjC method that returns a localized string because it has the
+// "returns_localized_nsstring" annotation
++ (NSString *)forceLocalized:(NSString *)str {
+  return str;
+}
+
+// An ObjC method that returns a localized string
++ (NSString *)unLocalizedStringMethod {
+  return @"UnlocalizedString";
+}
+
+- (void)testLocalizationErrorDetectedOnPathway {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"Unlocalized string";
+  }
+
+  [testLabel setText:bar]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+- (void)testLocalizationErrorDetectedOnNSString {
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"Unlocalized string";
+  }
+
+  [bar drawAtPoint:CGPointMake(0, 0) withAttributes:nil]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+- (void)testNoLocalizationErrorDetectedFromCFunction {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = CFNumberFormatterCreateStringWithNumber(1);
+
+  [testLabel setText:bar]; // no-warning
+}
+
+- (void)testAnnotationAddsLocalizedStateForCFunction {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"Unlocalized string";
+  }
+
+  [testLabel setText:ForceLocalized(bar)]; // no-warning
+}
+
+- (void)testAnnotationAddsLocalizedStateForObjCMethod {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"Unlocalized string";
+  }
+
+  [testLabel setText:[LocalizationTestSuite forceLocalized:bar]]; // no-warning
+}
+
+// An empty string literal @"" should not raise an error
+- (void)testEmptyStringLiteralHasLocalizedState {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = @"";
+
+  [testLabel setText:bar]; // no-warning
+}
+
+// An empty string literal @"" inline should not raise an error
+- (void)testInlineEmptyStringLiteralHasLocalizedState {
+  UILabel *testLabel = [[UILabel alloc] init];
+  [testLabel setText:@""]; // no-warning
+}
+
+// An string literal @"Hello" inline should raise an error
+- (void)testInlineStringLiteralHasLocalizedState {
+  UILabel *testLabel = [[UILabel alloc] init];
+  [testLabel setText:@"Hello"]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+// A nil string should not raise an error
+- (void)testNilStringIsNotMarkedAsUnlocalized {
+  UILabel *testLabel = [[UILabel alloc] init];
+  [testLabel setText:nil]; // no-warning
+}
+
+// A method that takes in a localized string and returns a string
+// most likely that string is localized.
+- (void)testLocalizedStringArgument {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *localizedString = NSLocalizedString(@"Hello", @"Comment");
+
+  NSString *combinedString =
+      [NSString localizedStringWithFormat:@"%@", localizedString];
+
+  [testLabel setText:combinedString]; // no-warning
+}
+
+// A String passed in as a an parameter should not be considered
+// unlocalized
+- (void)testLocalizedStringAsArgument:(NSString *)argumentString {
+  UILabel *testLabel = [[UILabel alloc] init];
+
+  [testLabel setText:argumentString]; // no-warning
+}
+
+// The warning is expected to be seen in localizedStringAsArgument: body
+- (void)testLocalizedStringAsArgumentOtherMethod:(NSString *)argumentString {
+  [self localizedStringAsArgument:@"UnlocalizedString"];
+}
+
+// A String passed into another method that calls a method that
+// requires a localized string should give an error
+- (void)localizedStringAsArgument:(NSString *)argumentString {
+  UILabel *testLabel = [[UILabel alloc] init];
+
+  [testLabel setText:argumentString]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+// [LocalizationTestSuite unLocalizedStringMethod] returns an unlocalized string
+// so we expect an error. Unfrtunately, it probably doesn't make a difference
+// what [LocalizationTestSuite unLocalizedStringMethod] returns since all
+// string values returned are marked as Unlocalized in aggressive reporting.
+- (void)testUnLocalizedStringMethod {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  [testLabel setText:[LocalizationTestSuite unLocalizedStringMethod]]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+// This is the reverse situation: accessibilitySetIdentification: doesn't care
+// about localization so we don't expect a warning
+- (void)testMethodNotInRequiresLocalizedStringMethods {
+  UILabel *testLabel = [[UILabel alloc] init];
+
+  [testLabel accessibilitySetIdentification:@"UnlocalizedString"]; // no-warning
+}
+
+// An NSView subclass should raise a warning for methods in NSView that 
+// require localized strings 
+- (void)testRequiresLocalizationMethodFromSuperclass {
+  NSViewSubclass *s = [[NSViewSubclass alloc] init];
+  NSString *bar = @"UnlocalizedString";
+
+  [s setToolTip:bar]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+- (void)testRequiresLocalizationMethodFromProtocol {
+  UILabel *testLabel = [[UILabel alloc] init];
+
+  [testLabel setAccessibilityLabel:@"UnlocalizedString"]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+// EmptyLocalizationContextChecker tests
+#define HOM(s) YOLOC(s)
+#define YOLOC(x) NSLocalizedString(x, nil)
+
+- (void)testNilLocalizationContext {
+  NSString *string = NSLocalizedString(@"LocalizedString", nil); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string2 = NSLocalizedString(@"LocalizedString", nil); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string3 = NSLocalizedString(@"LocalizedString", nil); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+}
+
+- (void)testEmptyLocalizationContext {
+  NSString *string = NSLocalizedString(@"LocalizedString", @""); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string2 = NSLocalizedString(@"LocalizedString", @" "); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string3 = NSLocalizedString(@"LocalizedString", @"	 "); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+}
+
+- (void)testNSLocalizedStringVariants {
+  NSString *string = NSLocalizedStringFromTable(@"LocalizedString", nil, @""); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string2 = NSLocalizedStringFromTableInBundle(@"LocalizedString", nil, [[NSBundle alloc] init],@""); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string3 = NSLocalizedStringWithDefaultValue(@"LocalizedString", nil, [[NSBundle alloc] init], nil,@""); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+}
+
+- (void)testMacroExpansionNilString {
+  NSString *string = YOLOC(@"Hello"); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string2 = HOM(@"Hello");  // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+  NSString *string3 = NSLocalizedString((0 ? @"Critical" : @"Current"),nil); // expected-warning {{Localized string macro should include a non-empty comment for translators}}
+}
+
+#define KCLocalizedString(x,comment) NSLocalizedString(x, comment)
+#define POSSIBLE_FALSE_POSITIVE(s,other) KCLocalizedString(s,@"Comment")
+
+- (void)testNoWarningForNilCommentPassedIntoOtherMacro {
+  NSString *string = KCLocalizedString(@"Hello",@""); // no-warning
+  NSString *string2 = KCLocalizedString(@"Hello",nil); // no-warning
+  NSString *string3 = KCLocalizedString(@"Hello",@"Comment"); // no-warning
+}
+
+- (void)testPossibleFalsePositiveSituationAbove {
+  NSString *string = POSSIBLE_FALSE_POSITIVE(@"Hello", nil); // no-warning
+  NSString *string2 = POSSIBLE_FALSE_POSITIVE(@"Hello", @"Hello"); // no-warning
+}
+
+@end
diff --git a/test/Analysis/localization.m b/test/Analysis/localization.m
new file mode 100644
index 000000000000..ce55609b1bf3
--- /dev/null
+++ b/test/Analysis/localization.m
@@ -0,0 +1,207 @@
+// RUN: %clang_cc1 -analyze -fblocks -analyzer-store=region -analyzer-checker=optin.osx.cocoa.localizability.NonLocalizedStringChecker -analyzer-checker=alpha.osx.cocoa.localizability.PluralMisuseChecker -verify  %s
+
+// The larger set of tests in located in localization.m. These are tests
+// specific for non-aggressive reporting.
+
+// These declarations were reduced using Delta-Debugging from Foundation.h
+// on Mac OS X.
+
+#define nil ((id)0)
+#define NSLocalizedString(key, comment)                                        \
+  [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:nil]
+#define NSLocalizedStringFromTable(key, tbl, comment)                          \
+  [[NSBundle mainBundle] localizedStringForKey:(key) value:@"" table:(tbl)]
+#define NSLocalizedStringFromTableInBundle(key, tbl, bundle, comment)          \
+  [bundle localizedStringForKey:(key) value:@"" table:(tbl)]
+#define NSLocalizedStringWithDefaultValue(key, tbl, bundle, val, comment)      \
+  [bundle localizedStringForKey:(key) value:(val) table:(tbl)]
+@interface NSObject
++ (id)alloc;
+- (id)init;
+@end
+@interface NSString : NSObject
+- (NSString *)stringByAppendingFormat:(NSString *)format, ...;
++ (instancetype)stringWithFormat:(NSString *)format, ...;
+@end
+@interface NSBundle : NSObject
++ (NSBundle *)mainBundle;
+- (NSString *)localizedStringForKey:(NSString *)key
+                              value:(NSString *)value
+                              table:(NSString *)tableName;
+@end
+@interface UILabel : NSObject
+@property(nullable, nonatomic, copy) NSString *text;
+@end
+@interface TestObject : NSObject
+@property(strong) NSString *text;
+@end
+
+@interface LocalizationTestSuite : NSObject
+int random();
+@property (assign) int unreadArticlesCount;
+@end
+#define MCLocalizedString(s) NSLocalizedString(s,nil);
+// Test cases begin here
+@implementation LocalizationTestSuite
+
+NSString *KHLocalizedString(NSString* key, NSString* comment) {
+    return NSLocalizedString(key, comment);
+}
+
+// An object passed in as an parameter's string member
+// should not be considered unlocalized
+- (void)testObjectAsArgument:(TestObject *)argumentObject {
+  UILabel *testLabel = [[UILabel alloc] init];
+
+  [testLabel setText:[argumentObject text]]; // no-warning
+  [testLabel setText:argumentObject.text];   // no-warning
+}
+
+- (void)testLocalizationErrorDetectedOnPathway {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"Unlocalized string";
+  }
+
+  [testLabel setText:bar]; // expected-warning {{User-facing text should use localized string macro}}
+}
+
+- (void)testOneCharacterStringsDoNotGiveAWarning {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"-";
+  }
+
+  [testLabel setText:bar]; // no-warning
+}
+
+- (void)testOneCharacterUTFStringsDoNotGiveAWarning {
+  UILabel *testLabel = [[UILabel alloc] init];
+  NSString *bar = NSLocalizedString(@"Hello", @"Comment");
+
+  if (random()) {
+    bar = @"\u2014";
+  }
+
+  [testLabel setText:bar]; // no-warning
+}
+
+// Plural Misuse Checker Tests
+// These tests are modeled off incorrect uses of the many-one pattern
+// from real projects. 
+
+- (NSString *)test1:(int)plural {
+    if (plural) {
+        return MCLocalizedString(@"TYPE_PLURAL"); // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    }
+    return MCLocalizedString(@"TYPE");
+}
+
+- (NSString *)test2:(int)numOfReminders {
+    if (numOfReminders > 0) {
+        return [NSString stringWithFormat:@"%@, %@", @"Test", (numOfReminders != 1) ? [NSString stringWithFormat:NSLocalizedString(@"%@ Reminders", @"Plural count of reminders"), numOfReminders] : [NSString stringWithFormat:NSLocalizedString(@"1 reminder", @"One reminder")]]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}} expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    } 
+    return nil;
+}
+
+- (void)test3 {
+    NSString *count;
+    if (self.unreadArticlesCount > 1)
+    {
+        count = [count stringByAppendingFormat:@"%@", KHLocalizedString(@"New Stories", @"Plural count for new stories")]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    } else {
+        count = [count stringByAppendingFormat:@"%@",  KHLocalizedString(@"New Story", @"One new story")]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    }
+}
+
+- (NSString *)test4:(int)count {
+    if ( count == 1 )
+    {
+        return [NSString stringWithFormat:KHLocalizedString(@"value.singular",nil), count]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    } else {
+        return [NSString stringWithFormat:KHLocalizedString(@"value.plural",nil), count]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    }
+}
+
+- (NSString *)test5:(int)count {
+	int test = count == 1;
+    if (test)
+    {
+        return [NSString stringWithFormat:KHLocalizedString(@"value.singular",nil), count]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    } else {
+        return [NSString stringWithFormat:KHLocalizedString(@"value.plural",nil), count]; // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    }
+}
+
+// This tests the heuristic that the direct parent IfStmt must match the isCheckingPlurality confition to avoid false positives generated from complex code (generally the pattern we're looking for is simple If-Else)
+
+- (NSString *)test6:(int)sectionIndex {
+	int someOtherVariable = 0;
+    if (sectionIndex == 1)
+    {
+		// Do some other crazy stuff
+		if (someOtherVariable)
+        	return KHLocalizedString(@"OK",nil); // no-warning
+    } else {
+        return KHLocalizedString(@"value.plural",nil); // expected-warning {{Plural cases are not supported accross all languages. Use a .stringsdict file}}
+    }
+	return nil;
+}
+
+// False positives that we are not accounting for involve matching the heuristic
+// of having 1 or 2 in the RHS of a BinaryOperator and having a localized string 
+// in the body of the IfStmt. This is seen a lot when checking for the section
+// indexpath of something like a UITableView
+
+// - (NSString *)testNotAccountedFor:(int)sectionIndex {
+//     if (sectionIndex == 1)
+//     {
+//         return KHLocalizedString(@"1",nil); // false-positive
+//     } else if (sectionIndex == 2) {
+//     	return KHLocalizedString(@"2",nil); // false-positive
+//     } else if (sectionIndex == 3) {
+// 		return KHLocalizedString(@"3",nil); // no-false-positive
+// 	}
+// }
+
+// Potential test-cases to support in the future
+
+// - (NSString *)test7:(int)count {
+//     BOOL plural = count != 1;
+//     return KHLocalizedString(plural ? @"PluralString" : @"SingularString", @"");
+// }
+//
+// - (NSString *)test8:(BOOL)plural {
+//     return KHLocalizedString(([NSString stringWithFormat:@"RELATIVE_DATE_%@_%@", ((1 == 1) ? @"FUTURE" : @"PAST"), plural ? @"PLURAL" : @"SINGULAR"]));
+// }
+//
+//
+//
+// - (void)test9:(int)numberOfTimesEarned {
+//     NSString* localizedDescriptionKey;
+//     if (numberOfTimesEarned == 1) {
+//         localizedDescriptionKey = @"SINGULAR_%@";
+//     } else {
+//         localizedDescriptionKey = @"PLURAL_%@_%@";
+//     }
+//     NSLocalizedString(localizedDescriptionKey, nil);
+// }
+//
+// - (NSString *)test10 {
+//     NSInteger count = self.problems.count;
+//     NSString *title = [NSString stringWithFormat:@"%ld Problems", (long) count];
+//     if (count < 2) {
+//         if (count == 0) {
+//             title = [NSString stringWithFormat:@"No Problems Found"];
+//         } else {
+//             title = [NSString stringWithFormat:@"%ld Problem", (long) count];
+//         }
+//     }
+//     return title;
+// }
+
+@end
diff --git a/test/Analysis/loop-widening.c b/test/Analysis/loop-widening.c
new file mode 100644
index 000000000000..0b9bf7080873
--- /dev/null
+++ b/test/Analysis/loop-widening.c
@@ -0,0 +1,190 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-max-loop 4 -analyzer-config widen-loops=true -verify %s
+
+void clang_analyzer_eval(int);
+void clang_analyzer_warnIfReached();
+
+typedef __typeof(sizeof(int)) size_t;
+void *malloc(size_t);
+void free(void *);
+
+void loop_which_iterates_limit_times_not_widened() {
+  int i;
+  int x = 1;
+  // Check loop isn't widened by checking x isn't invalidated
+  for (i = 0; i < 1; ++i) {}
+  clang_analyzer_eval(x == 1); // expected-warning {{TRUE}}
+  for (i = 0; i < 2; ++i) {}
+  clang_analyzer_eval(x == 1); // expected-warning {{TRUE}}
+  for (i = 0; i < 3; ++i) {}
+  // FIXME loss of precision as a result of evaluating the widened loop body
+  //       *instead* of the last iteration.
+  clang_analyzer_eval(x == 1); // expected-warning {{UNKNOWN}}
+}
+
+int a_global;
+
+void loop_evaluated_before_widening() {
+  int i;
+  a_global = 1;
+  for (i = 0; i < 10; ++i) {
+    if (i == 2) {
+      // True before widening then unknown after.
+      clang_analyzer_eval(a_global == 1); // expected-warning{{TRUE}} expected-warning{{UNKNOWN}}
+    }
+  }
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void warnings_after_loop() {
+  int i;
+  for (i = 0; i < 10; ++i) {}
+  char *m = (char*)malloc(12);
+} // expected-warning {{Potential leak of memory pointed to by 'm'}}
+
+void for_loop_exits() {
+  int i;
+  for (i = 0; i < 10; ++i) {}
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void while_loop_exits() {
+  int i = 0;
+  while (i < 10) {++i;}
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void do_while_loop_exits() {
+  int i = 0;
+  do {++i;} while (i < 10);
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void loop_body_is_widened() {
+  int i = 0;
+  while (i < 100) {
+    if (i > 10) {
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+    }
+    ++i;
+  }
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void invariably_infinite_loop() {
+  int i = 0;
+  while (1) { ++i; }
+  clang_analyzer_warnIfReached(); // no-warning
+}
+
+void invariably_infinite_break_loop() {
+  int i = 0;
+  while (1) {
+    ++i;
+    int x = 1;
+    if (!x) break;
+  }
+  clang_analyzer_warnIfReached(); // no-warning
+}
+
+void reachable_break_loop() {
+  int i = 0;
+  while (1) {
+    ++i;
+    if (i == 100) break;
+  }
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void condition_constrained_true_in_loop() {
+  int i = 0;
+  while (i < 50) {
+    clang_analyzer_eval(i < 50); // expected-warning {{TRUE}}
+    ++i;
+  }
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void condition_constrained_false_after_loop() {
+  int i = 0;
+  while (i < 50) {
+    ++i;
+  }
+  clang_analyzer_eval(i >= 50); // expected-warning {{TRUE}}
+  clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+}
+
+void multiple_exit_test() {
+  int x = 0;
+  int i = 0;
+  while (i < 50) {
+    if (x) {
+      i = 10;
+      break;
+    }
+    ++i;
+  }
+  // Reachable by 'normal' exit
+  if (i == 50) clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+  // Reachable by break point
+  if (i == 10) clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+  // Not reachable
+  if (i < 10) clang_analyzer_warnIfReached(); // no-warning
+  if (i > 10 && i < 50) clang_analyzer_warnIfReached(); // no-warning
+}
+
+void pointer_doesnt_leak_from_loop() {
+  int *h_ptr = (int *) malloc(sizeof(int));
+  for (int i = 0; i < 2; ++i) {}
+  for (int i = 0; i < 10; ++i) {} // no-warning
+  free(h_ptr);
+}
+
+int g_global;
+
+void unknown_after_loop(int s_arg) {
+  g_global = 0;
+  s_arg = 1;
+  int s_local = 2;
+  int *h_ptr = malloc(sizeof(int));
+
+  for (int i = 0; i < 10; ++i) {}
+
+  clang_analyzer_eval(g_global); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(s_arg); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(s_local); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(h_ptr == 0); // expected-warning {{UNKNOWN}}
+  free(h_ptr);
+}
+
+void variable_bound_exiting_loops_widened(int x) {
+  int i = 0;
+  int t = 1;
+  while (i < x) {
+    ++i;
+  }
+  clang_analyzer_eval(t == 1); // expected-warning {{TRUE}} // expected-warning {{UNKNOWN}}
+}
+
+void nested_loop_outer_widen() {
+  int i = 0, j = 0;
+  for (i = 0; i < 10; i++) {
+    clang_analyzer_eval(i < 10); // expected-warning {{TRUE}}
+    for (j = 0; j < 2; j++) {
+      clang_analyzer_eval(j < 2); // expected-warning {{TRUE}}
+    }
+    clang_analyzer_eval(j >= 2); // expected-warning {{TRUE}}
+  }
+  clang_analyzer_eval(i >= 10); // expected-warning {{TRUE}}
+}
+
+void nested_loop_inner_widen() {
+  int i = 0, j = 0;
+  for (i = 0; i < 2; i++) {
+    clang_analyzer_eval(i < 2); // expected-warning {{TRUE}}
+    for (j = 0; j < 10; j++) {
+      clang_analyzer_eval(j < 10); // expected-warning {{TRUE}}
+    }
+    clang_analyzer_eval(j >= 10); // expected-warning {{TRUE}}
+  }
+  clang_analyzer_eval(i >= 2); // expected-warning {{TRUE}}
+}
diff --git a/test/Analysis/malloc-overflow.c b/test/Analysis/malloc-overflow.c
index 2f443caf4a17..99e05adab6fb 100644
--- a/test/Analysis/malloc-overflow.c
+++ b/test/Analysis/malloc-overflow.c
@@ -102,7 +102,7 @@ void * f13(struct s13 *s)
 {
   if (s->n > 10)
     return NULL;
-  return malloc(s->n * sizeof(int));  // no warning
+  return malloc(s->n * sizeof(int)); // no-warning
 }
 
 void * f14(int n)
diff --git a/test/Analysis/malloc-overflow2.c b/test/Analysis/malloc-overflow2.c
new file mode 100644
index 000000000000..83a2c02213b2
--- /dev/null
+++ b/test/Analysis/malloc-overflow2.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -analyze -analyzer-checker=alpha.security.MallocOverflow,unix -verify %s
+
+typedef __typeof__(sizeof(int)) size_t;
+extern void *malloc(size_t);
+extern void free(void *ptr);
+
+void *malloc(unsigned long s);
+
+struct table {
+  int nentry;
+  unsigned *table;
+  unsigned offset_max;
+};
+
+static int table_build(struct table *t) {
+
+  t->nentry = ((t->offset_max >> 2) + 31) / 32;
+  t->table = (unsigned *)malloc(sizeof(unsigned) * t->nentry); // expected-warning {{the computation of the size of the memory allocation may overflow}}
+
+  int n;
+  n = 10000;
+  int *p = malloc(sizeof(int) * n); // no-warning
+
+  free(p);
+  return t->nentry;
+}
+
+static int table_build_1(struct table *t) {
+  t->nentry = (sizeof(struct table) * 2 + 31) / 32;
+  t->table = (unsigned *)malloc(sizeof(unsigned) * t->nentry); // no-warning
+  return t->nentry;
+}
+
+void *f(int n) {
+  return malloc(n * 0 * sizeof(int)); // expected-warning {{Call to 'malloc' has an allocation size of 0 bytes}}
+}
diff --git a/test/Analysis/malloc-plist.c b/test/Analysis/malloc-plist.c
index 4ac6cec0530e..35ad43ae941f 100644
--- a/test/Analysis/malloc-plist.c
+++ b/test/Analysis/malloc-plist.c
@@ -1,5 +1,5 @@
 // RUN: rm -f %t
-// RUN: %clang_cc1 -analyze -analyzer-checker=unix.Malloc -analyzer-output=plist -analyzer-config path-diagnostics-alternate=false -o %t %s
+// RUN: %clang_cc1 -analyze -fblocks -analyzer-checker=core,unix.Malloc -analyzer-output=plist -analyzer-config path-diagnostics-alternate=false -o %t %s
 // RUN: FileCheck -input-file %t %s
 
 typedef __typeof(sizeof(int)) size_t;
@@ -86,6 +86,21 @@ void use_ret() {
     v = malloc_wrapper_ret();
 }
 
+// Passing a block as a parameter to an inlined call for which we generate
+// a stack hint message caused crashes.
+// rdar://problem/21291971
+void myfree_takingblock(void (^ignored)(void), int *p) {
+  free(p);
+}
+
+void call_myfree_takingblock() {
+  void (^some_block)(void) = ^void(void) { };
+
+  int *p = malloc(sizeof(int));
+  myfree_takingblock(some_block, p);
+  *p = 3;
+}
+
 // Test that we refer to the last symbol used in the leak diagnostic.
 void LeakedSymbol(int in) {
     int *m = 0;
@@ -191,4863 +206,5248 @@ void testMyMalloc() {
   my_malloc_into_struct(); // expected-warning {{Potential leak of memory}}
 }
 
-// CHECK:  <key>diagnostics</key>
-// CHECK-NEXT:  <array>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
+// CHECK:   <key>diagnostics</key>
+// CHECK-NEXT:   <array>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>11</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>11</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>11</integer>
-// CHECK-NEXT:          <key>col</key><integer>14</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;in&apos; is &gt; 5</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;in&apos; is &gt; 5</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>11</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>12</integer>
-// CHECK-NEXT:       <key>col</key><integer>18</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>11</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>11</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;in&apos; is &gt; 5</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;in&apos; is &gt; 5</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>18</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>12</integer>
-// CHECK-NEXT:          <key>col</key><integer>27</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>11</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>11</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>12</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>15</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>15</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>15</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;p&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;p&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;p&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>diagnosticTest</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>15</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>19</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>19</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>20</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>20</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>20</integer>
-// CHECK-NEXT:          <key>col</key><integer>30</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>11</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>20</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>22</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>22</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;A&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;A&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;A&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>myArrayAllocation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>22</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>25</integer>
-// CHECK-NEXT:       <key>col</key><integer>18</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>12</integer>
+// CHECK-NEXT:        <key>col</key><integer>18</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>12</integer>
+// CHECK-NEXT:           <key>col</key><integer>27</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>25</integer>
-// CHECK-NEXT:          <key>col</key><integer>18</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>25</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>12</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>15</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>15</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>25</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>27</integer>
-// CHECK-NEXT:       <key>col</key><integer>18</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>15</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;p&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;p&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;p&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>c60b35a3e46fd104f362f430a1eaca5d</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>diagnosticTest</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>15</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>27</integer>
-// CHECK-NEXT:          <key>col</key><integer>18</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>27</integer>
-// CHECK-NEXT:          <key>col</key><integer>40</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>19</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>19</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Attempt to reallocate memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Attempt to reallocate memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>27</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>28</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>28</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>28</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>14</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;tmp&apos; is null</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;tmp&apos; is null</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>28</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>20</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>20</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>20</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>28</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>28</integer>
-// CHECK-NEXT:          <key>col</key><integer>6</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>20</integer>
+// CHECK-NEXT:             <key>col</key><integer>14</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>22</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>22</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Reallocation failed</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Reallocation failed</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>28</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>29</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>29</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>29</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>reallocDiagnostics</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>29</integer>
-// CHECK-NEXT:    <key>col</key><integer>9</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>21</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>44</integer>
-// CHECK-NEXT:       <key>col</key><integer>15</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>22</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;A&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;A&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;A&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>9b732ec46c4a08108dfbd37aa0955c51</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>myArrayAllocation</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>22</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>15</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>23</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;wrapper&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;wrapper&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>35</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;test_wrapper&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;test_wrapper&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>35</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>13</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>36</integer>
-// CHECK-NEXT:       <key>col</key><integer>13</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>25</integer>
+// CHECK-NEXT:        <key>col</key><integer>18</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>25</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>25</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>36</integer>
-// CHECK-NEXT:          <key>col</key><integer>13</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>36</integer>
-// CHECK-NEXT:          <key>col</key><integer>23</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>25</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>13</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>36</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>38</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>38</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>38</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;x&apos; is non-null</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;x&apos; is non-null</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>38</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>39</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>39</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>44</integer>
-// CHECK-NEXT:       <key>col</key><integer>15</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>27</integer>
+// CHECK-NEXT:        <key>col</key><integer>18</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>27</integer>
+// CHECK-NEXT:           <key>col</key><integer>40</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Attempt to reallocate memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Attempt to reallocate memory</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>15</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>44</integer>
-// CHECK-NEXT:          <key>col</key><integer>23</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>27</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>44</integer>
-// CHECK-NEXT:            <key>col</key><integer>21</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>46</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>46</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>46</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>test_wrapper</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>46</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>60</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>60</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>61</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>61</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>61</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>61</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>61</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc_and_free&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc_and_free&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>53</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;test_double_action_call&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;test_double_action_call&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>53</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>53</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>54</integer>
-// CHECK-NEXT:       <key>col</key><integer>10</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>28</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;tmp&apos; is null</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;tmp&apos; is null</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>54</integer>
-// CHECK-NEXT:          <key>col</key><integer>10</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>54</integer>
-// CHECK-NEXT:          <key>col</key><integer>20</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>54</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>55</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>13</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>56</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>28</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>28</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Reallocation failed</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Reallocation failed</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>56</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>56</integer>
-// CHECK-NEXT:          <key>col</key><integer>17</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>28</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>29</integer>
+// CHECK-NEXT:             <key>col</key><integer>14</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_free&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_free&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>50</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;my_malloc_and_free&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;my_malloc_and_free&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>50</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>50</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>51</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>51</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>51</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>29</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>37c0b1a1e65d26af6f9fb840cf159149</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>reallocDiagnostics</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>29</integer>
+// CHECK-NEXT:     <key>col</key><integer>9</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>51</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>51</integer>
-// CHECK-NEXT:          <key>col</key><integer>11</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>21</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>2</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is released</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is released</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>56</integer>
-// CHECK-NEXT:       <key>col</key><integer>7</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>44</integer>
+// CHECK-NEXT:        <key>col</key><integer>15</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>23</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;wrapper&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;wrapper&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>35</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;test_wrapper&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;test_wrapper&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>56</integer>
-// CHECK-NEXT:          <key>col</key><integer>7</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>56</integer>
-// CHECK-NEXT:          <key>col</key><integer>17</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>35</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>35</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>56</integer>
-// CHECK-NEXT:            <key>col</key><integer>13</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>57</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>57</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>61</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>61</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>61</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>13</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returning; memory was released via 1st parameter</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>61</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>61</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>62</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>62</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>62</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>36</integer>
+// CHECK-NEXT:        <key>col</key><integer>13</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>col</key><integer>13</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>36</integer>
+// CHECK-NEXT:           <key>col</key><integer>23</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>62</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>62</integer>
-// CHECK-NEXT:          <key>col</key><integer>14</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>13</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>36</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Use of memory after it is freed</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Use of memory after it is freed</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Use of memory after it is freed</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Use-after-free</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>test_double_action_call</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>62</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>25</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>30</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>75</integer>
-// CHECK-NEXT:       <key>col</key><integer>25</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>75</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>75</integer>
-// CHECK-NEXT:          <key>col</key><integer>35</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>25</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>75</integer>
-// CHECK-NEXT:            <key>col</key><integer>30</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>76</integer>
-// CHECK-NEXT:       <key>col</key><integer>11</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>38</integer>
+// CHECK-NEXT:        <key>col</key><integer>7</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>38</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;x&apos; is non-null</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;x&apos; is non-null</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>76</integer>
-// CHECK-NEXT:          <key>col</key><integer>11</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>76</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>38</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>39</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_realloc&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_realloc&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>66</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;reallocIntra&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;reallocIntra&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>66</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>66</integer>
-// CHECK-NEXT:            <key>col</key><integer>4</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>67</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>67</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>67</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>67</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>68</integer>
-// CHECK-NEXT:       <key>col</key><integer>18</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>44</integer>
+// CHECK-NEXT:        <key>col</key><integer>15</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>44</integer>
+// CHECK-NEXT:           <key>col</key><integer>23</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>68</integer>
-// CHECK-NEXT:          <key>col</key><integer>18</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>68</integer>
-// CHECK-NEXT:          <key>col</key><integer>40</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>44</integer>
+// CHECK-NEXT:             <key>col</key><integer>21</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>46</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>46</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Attempt to reallocate memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Attempt to reallocate memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>18</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>68</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>69</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>46</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>384d1700f3d9c8eeea96d171e3030bdf</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>test_wrapper</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>46</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>60</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>60</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;tmp&apos; is null</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;tmp&apos; is null</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>69</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>61</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc_and_free&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc_and_free&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>53</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;test_double_action_call&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;test_double_action_call&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>69</integer>
-// CHECK-NEXT:          <key>col</key><integer>6</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>53</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>53</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Reallocation failed</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Reallocation failed</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>69</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>70</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>76</integer>
-// CHECK-NEXT:       <key>col</key><integer>11</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>76</integer>
-// CHECK-NEXT:          <key>col</key><integer>11</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>76</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Reallocation of 1st parameter failed</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Reallocation of 1st parameter failed</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>76</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>77</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>77</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>77</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>reallocIntra</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>77</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>85</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>85</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>86</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>86</integer>
-// CHECK-NEXT:            <key>col</key><integer>26</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>86</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>54</integer>
+// CHECK-NEXT:        <key>col</key><integer>10</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>54</integer>
+// CHECK-NEXT:           <key>col</key><integer>10</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>54</integer>
+// CHECK-NEXT:           <key>col</key><integer>20</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>86</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>86</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>54</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;malloc_wrapper_ret&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;malloc_wrapper_ret&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>81</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_ret&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_ret&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>81</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>81</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>19</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>82</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>82</integer>
-// CHECK-NEXT:       <key>col</key><integer>19</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
-// CHECK-NEXT:          <key>col</key><integer>19</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>82</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>55</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>col</key><integer>13</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>86</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>56</integer>
+// CHECK-NEXT:        <key>col</key><integer>7</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_free&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_free&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>50</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>2</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;my_malloc_and_free&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;my_malloc_and_free&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>86</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>86</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>50</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>50</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>51</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>51</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>86</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>86</integer>
-// CHECK-NEXT:            <key>col</key><integer>26</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>87</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>87</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>87</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;v&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;v&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;v&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>use_ret</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>87</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>91</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>91</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>93</integer>
-// CHECK-NEXT:       <key>col</key><integer>15</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>51</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>51</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>2</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is released</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is released</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>56</integer>
+// CHECK-NEXT:        <key>col</key><integer>7</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>7</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>56</integer>
+// CHECK-NEXT:           <key>col</key><integer>17</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>93</integer>
-// CHECK-NEXT:          <key>col</key><integer>15</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>93</integer>
-// CHECK-NEXT:          <key>col</key><integer>24</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>56</integer>
+// CHECK-NEXT:             <key>col</key><integer>13</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>57</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>61</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>15</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>93</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>98</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>98</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>61</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>98</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 1st parameter</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;m&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;m&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;m&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>LeakedSymbol</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>98</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>106</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>106</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>106</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>61</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>62</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak1&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak1&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>102</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak1&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak1&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>102</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>102</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>103</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>62</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>62</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>62</integer>
+// CHECK-NEXT:           <key>col</key><integer>14</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Use of memory after it is freed</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Use of memory after it is freed</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Use of memory after it is freed</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Use-after-free</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>422436dc85b85cde7e15046a5616ee99</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>test_double_action_call</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>62</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>103</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>103</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>25</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>30</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>75</integer>
+// CHECK-NEXT:        <key>col</key><integer>25</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>103</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>104</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>104</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>75</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>75</integer>
+// CHECK-NEXT:           <key>col</key><integer>35</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>104</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak1</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>104</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>115</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>115</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>115</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>25</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>75</integer>
+// CHECK-NEXT:             <key>col</key><integer>30</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>col</key><integer>11</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak2&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak2&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>110</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak2&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak2&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>110</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>110</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>111</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>76</integer>
+// CHECK-NEXT:        <key>col</key><integer>11</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_realloc&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_realloc&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>66</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;reallocIntra&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;reallocIntra&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>111</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>111</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>66</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>66</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>111</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>112</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>112</integer>
-// CHECK-NEXT:            <key>col</key><integer>7</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>112</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>112</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>124</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>124</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>124</integer>
-// CHECK-NEXT:          <key>col</key><integer>26</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>67</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak3&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>118</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak3&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak3&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>118</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>118</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>119</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>119</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>119</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>119</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>120</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>68</integer>
+// CHECK-NEXT:        <key>col</key><integer>18</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>68</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>68</integer>
+// CHECK-NEXT:           <key>col</key><integer>40</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Attempt to reallocate memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Attempt to reallocate memory</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>120</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>120</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>18</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>68</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;y&apos; is not equal to 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;y&apos; is not equal to 0</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>120</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>121</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>121</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>121</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak3</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>121</integer>
-// CHECK-NEXT:    <key>col</key><integer>9</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>135</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>135</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>135</integer>
-// CHECK-NEXT:          <key>col</key><integer>26</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak4&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak4&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>127</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak4&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak4&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>127</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>127</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>128</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>69</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;tmp&apos; is null</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;tmp&apos; is null</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>128</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>128</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>128</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>129</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>69</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>69</integer>
+// CHECK-NEXT:           <key>col</key><integer>6</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Reallocation failed</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Reallocation failed</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>129</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>129</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>69</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>70</integer>
+// CHECK-NEXT:             <key>col</key><integer>14</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Assuming &apos;y&apos; is 0</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Assuming &apos;y&apos; is 0</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>76</integer>
+// CHECK-NEXT:        <key>col</key><integer>11</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>129</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>132</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>132</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>col</key><integer>11</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>76</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>132</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Reallocation of 1st parameter failed</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Reallocation of 1st parameter failed</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak4</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>132</integer>
-// CHECK-NEXT:    <key>col</key><integer>9</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>146</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>146</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>146</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>col</key><integer>11</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>76</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>77</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak5&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak5&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>141</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak5&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak5&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>141</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>141</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>142</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>77</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;buf&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;buf&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>23a6c295a515c455bea1c81519bc05b6</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>reallocIntra</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>77</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>142</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>142</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>85</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>85</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>col</key><integer>26</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>86</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>142</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>143</integer>
-// CHECK-NEXT:            <key>col</key><integer>12</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>143</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>143</integer>
-// CHECK-NEXT:       <key>col</key><integer>12</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;malloc_wrapper_ret&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;malloc_wrapper_ret&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak5</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>143</integer>
-// CHECK-NEXT:    <key>col</key><integer>12</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>157</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>81</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_ret&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_ret&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>157</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>157</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>81</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>81</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak6&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak6&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>152</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak6&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak6&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>152</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>152</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>153</integer>
-// CHECK-NEXT:       <key>col</key><integer>22</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>153</integer>
-// CHECK-NEXT:          <key>col</key><integer>22</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>153</integer>
-// CHECK-NEXT:          <key>col</key><integer>31</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>19</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>82</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>82</integer>
+// CHECK-NEXT:        <key>col</key><integer>19</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>22</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>153</integer>
-// CHECK-NEXT:            <key>col</key><integer>27</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>154</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>154</integer>
-// CHECK-NEXT:            <key>col</key><integer>20</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>82</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>154</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>function_with_leak6</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>154</integer>
-// CHECK-NEXT:    <key>col</key><integer>5</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>169</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>86</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>86</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>86</integer>
+// CHECK-NEXT:             <key>col</key><integer>26</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>87</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak7&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;function_with_leak7&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>165</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak7&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;use_function_with_leak7&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>165</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>165</integer>
-// CHECK-NEXT:            <key>col</key><integer>6</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>10</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>19</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>166</integer>
-// CHECK-NEXT:            <key>col</key><integer>24</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>166</integer>
-// CHECK-NEXT:       <key>col</key><integer>19</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>87</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;v&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;v&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;v&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>45181c7fc1df81a732346f1ed1b3f238</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>use_ret</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>2</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>87</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>166</integer>
-// CHECK-NEXT:          <key>col</key><integer>19</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>166</integer>
-// CHECK-NEXT:          <key>col</key><integer>28</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>97</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>97</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>169</integer>
-// CHECK-NEXT:       <key>col</key><integer>5</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
-// CHECK-NEXT:          <key>col</key><integer>5</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>169</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>12</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>17</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>99</integer>
+// CHECK-NEXT:        <key>col</key><integer>12</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>169</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>169</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>170</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>170</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>99</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>99</integer>
+// CHECK-NEXT:           <key>col</key><integer>30</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>170</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential memory leak</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>use_function_with_leak7</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>170</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>179</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>13</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>12</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>99</integer>
+// CHECK-NEXT:             <key>col</key><integer>17</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>174</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;testOnlyRefferToVisibleVariables&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;testOnlyRefferToVisibleVariables&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>174</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>174</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>5</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>12</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>175</integer>
-// CHECK-NEXT:            <key>col</key><integer>17</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>175</integer>
-// CHECK-NEXT:       <key>col</key><integer>12</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>100</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>35</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;myfree_takingblock&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;myfree_takingblock&apos;</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>92</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;call_myfree_takingblock&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;call_myfree_takingblock&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>175</integer>
-// CHECK-NEXT:          <key>col</key><integer>12</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>175</integer>
-// CHECK-NEXT:          <key>col</key><integer>21</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>92</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>92</integer>
+// CHECK-NEXT:             <key>col</key><integer>4</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>93</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>93</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>179</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>93</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>93</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is released</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is released</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>100</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>100</integer>
+// CHECK-NEXT:           <key>col</key><integer>35</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 2nd parameter</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returning; memory was released via 2nd parameter</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>179</integer>
-// CHECK-NEXT:          <key>col</key><integer>13</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>100</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>101</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>101</integer>
+// CHECK-NEXT:        <key>col</key><integer>6</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
 // CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>179</integer>
-// CHECK-NEXT:            <key>col</key><integer>11</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>180</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>180</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>101</integer>
+// CHECK-NEXT:           <key>col</key><integer>4</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>180</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Use of memory after it is freed</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Use of memory after it is freed</string>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential memory leak</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>testOnlyRefferToVisibleVariables</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>180</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>path</key>
-// CHECK-NEXT:    <array>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Use of memory after it is freed</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Use-after-free</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>652c97005df876a65d64ecd286d6030a</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>call_myfree_takingblock</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>5</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>101</integer>
+// CHECK-NEXT:     <key>col</key><integer>6</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>191</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>106</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>106</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc_into_struct&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Calling &apos;my_malloc_into_struct&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>185</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;testMyMalloc&apos;</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Entered call from &apos;testMyMalloc&apos;</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>185</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>185</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>186</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>186</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>186</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>186</integer>
-// CHECK-NEXT:            <key>col</key><integer>8</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>9</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>187</integer>
-// CHECK-NEXT:            <key>col</key><integer>14</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
 // CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>187</integer>
-// CHECK-NEXT:       <key>col</key><integer>9</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
-// CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>187</integer>
-// CHECK-NEXT:          <key>col</key><integer>9</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>187</integer>
-// CHECK-NEXT:          <key>col</key><integer>18</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>1</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Memory is allocated</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>191</integer>
-// CHECK-NEXT:       <key>col</key><integer>3</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>ranges</key>
-// CHECK-NEXT:      <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>108</integer>
+// CHECK-NEXT:        <key>col</key><integer>15</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>15</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>108</integer>
+// CHECK-NEXT:           <key>col</key><integer>24</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
 // CHECK-NEXT:        <array>
 // CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>3</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
-// CHECK-NEXT:         </dict>
-// CHECK-NEXT:         <dict>
-// CHECK-NEXT:          <key>line</key><integer>191</integer>
-// CHECK-NEXT:          <key>col</key><integer>25</integer>
-// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>15</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>108</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>113</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>113</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
 // CHECK-NEXT:         </dict>
 // CHECK-NEXT:        </array>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Returned allocated memory</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>control</string>
-// CHECK-NEXT:      <key>edges</key>
-// CHECK-NEXT:       <array>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>start</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
-// CHECK-NEXT:            <key>col</key><integer>3</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>191</integer>
-// CHECK-NEXT:            <key>col</key><integer>23</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:         <key>end</key>
-// CHECK-NEXT:          <array>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>192</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:           <dict>
-// CHECK-NEXT:            <key>line</key><integer>192</integer>
-// CHECK-NEXT:            <key>col</key><integer>1</integer>
-// CHECK-NEXT:            <key>file</key><integer>0</integer>
-// CHECK-NEXT:           </dict>
-// CHECK-NEXT:          </array>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:       </array>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>kind</key><string>event</string>
-// CHECK-NEXT:      <key>location</key>
-// CHECK-NEXT:      <dict>
-// CHECK-NEXT:       <key>line</key><integer>192</integer>
-// CHECK-NEXT:       <key>col</key><integer>1</integer>
-// CHECK-NEXT:       <key>file</key><integer>0</integer>
 // CHECK-NEXT:      </dict>
-// CHECK-NEXT:      <key>depth</key><integer>0</integer>
-// CHECK-NEXT:      <key>extended_message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:      <key>message</key>
-// CHECK-NEXT:      <string>Potential memory leak</string>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:    </array>
-// CHECK-NEXT:    <key>description</key><string>Potential memory leak</string>
-// CHECK-NEXT:    <key>category</key><string>Memory Error</string>
-// CHECK-NEXT:    <key>type</key><string>Memory leak</string>
-// CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
-// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:   <key>issue_context</key><string>testMyMalloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
-// CHECK-NEXT:   <key>location</key>
-// CHECK-NEXT:   <dict>
-// CHECK-NEXT:    <key>line</key><integer>192</integer>
-// CHECK-NEXT:    <key>col</key><integer>1</integer>
-// CHECK-NEXT:    <key>file</key><integer>0</integer>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:   </dict>
-// CHECK-NEXT:  </array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>113</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;m&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;m&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;m&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>d8483ccf1d5a1af1e8bcaac6905a18b3</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>LeakedSymbol</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>3</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>113</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>121</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>121</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak1&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak1&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>117</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak1&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak1&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>117</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>117</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>118</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>118</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>118</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>118</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>119</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>119</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>119</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>b07aebe89e4ba5ea104a9f957df9531b</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak1</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>119</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>130</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>130</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak2&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak2&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>125</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak2&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak2&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>125</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>125</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>126</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>126</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>126</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>126</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>127</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>127</integer>
+// CHECK-NEXT:             <key>col</key><integer>7</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>127</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>efe2ffcb7c227e872aa732c5f793895d</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak2</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>127</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>139</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>139</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>139</integer>
+// CHECK-NEXT:           <key>col</key><integer>26</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak3&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak3&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>133</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak3&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak3&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>133</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>133</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>134</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>134</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>134</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>135</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>135</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;y&apos; is not equal to 0</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;y&apos; is not equal to 0</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>135</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>136</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>136</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>0ddc87e8a7e7104428af3905f3057611</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak3</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>136</integer>
+// CHECK-NEXT:     <key>col</key><integer>9</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>150</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>150</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>150</integer>
+// CHECK-NEXT:           <key>col</key><integer>26</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak4&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak4&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>142</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak4&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak4&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>142</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>142</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>143</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>143</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>143</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>143</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>144</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>144</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>144</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Assuming &apos;y&apos; is 0</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Assuming &apos;y&apos; is 0</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>144</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>147</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>147</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>147</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>948d356840e67fc4baa8cc5d0600463c</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak4</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>147</integer>
+// CHECK-NEXT:     <key>col</key><integer>9</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>161</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>161</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>161</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak5&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak5&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>156</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak5&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak5&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>156</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>156</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>157</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>157</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>157</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>157</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>158</integer>
+// CHECK-NEXT:             <key>col</key><integer>12</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>158</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>158</integer>
+// CHECK-NEXT:        <key>col</key><integer>12</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>2b0361060027060b94915ff4de8aa782</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak5</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>158</integer>
+// CHECK-NEXT:     <key>col</key><integer>12</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>172</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>172</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak6&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak6&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>167</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak6&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak6&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>167</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>167</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>168</integer>
+// CHECK-NEXT:        <key>col</key><integer>22</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>168</integer>
+// CHECK-NEXT:           <key>col</key><integer>22</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>168</integer>
+// CHECK-NEXT:           <key>col</key><integer>31</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>22</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>168</integer>
+// CHECK-NEXT:             <key>col</key><integer>27</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>169</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>169</integer>
+// CHECK-NEXT:             <key>col</key><integer>20</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>169</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential leak of memory pointed to by &apos;x&apos;</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>73f573faa903ad4308d5155ef979cf31</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>function_with_leak6</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>169</integer>
+// CHECK-NEXT:     <key>col</key><integer>5</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>184</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak7&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;function_with_leak7&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>180</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak7&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;use_function_with_leak7&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>180</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>180</integer>
+// CHECK-NEXT:             <key>col</key><integer>6</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>10</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>19</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>181</integer>
+// CHECK-NEXT:             <key>col</key><integer>24</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>181</integer>
+// CHECK-NEXT:        <key>col</key><integer>19</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>181</integer>
+// CHECK-NEXT:           <key>col</key><integer>19</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>181</integer>
+// CHECK-NEXT:           <key>col</key><integer>28</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>184</integer>
+// CHECK-NEXT:        <key>col</key><integer>5</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>col</key><integer>5</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>184</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>184</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>184</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>185</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>185</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>185</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential memory leak</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>eb5628850b9b9118fbda903e721436a5</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>use_function_with_leak7</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>185</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>194</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>col</key><integer>13</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>189</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;testOnlyRefferToVisibleVariables&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;testOnlyRefferToVisibleVariables&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>189</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>189</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>5</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>12</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>190</integer>
+// CHECK-NEXT:             <key>col</key><integer>17</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>190</integer>
+// CHECK-NEXT:        <key>col</key><integer>12</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>190</integer>
+// CHECK-NEXT:           <key>col</key><integer>12</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>190</integer>
+// CHECK-NEXT:           <key>col</key><integer>21</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>194</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>194</integer>
+// CHECK-NEXT:           <key>col</key><integer>13</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>194</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>194</integer>
+// CHECK-NEXT:             <key>col</key><integer>11</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>195</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>195</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>195</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential memory leak</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>ad5b603037bc3581ff94947e86dbb1c8</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>testOnlyRefferToVisibleVariables</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>195</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>path</key>
+// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>206</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc_into_struct&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Calling &apos;my_malloc_into_struct&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>200</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;testMyMalloc&apos;</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Entered call from &apos;testMyMalloc&apos;</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>200</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>200</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>201</integer>
+// CHECK-NEXT:             <key>col</key><integer>8</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>9</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>202</integer>
+// CHECK-NEXT:             <key>col</key><integer>14</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>202</integer>
+// CHECK-NEXT:        <key>col</key><integer>9</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>col</key><integer>9</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>202</integer>
+// CHECK-NEXT:           <key>col</key><integer>18</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>1</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Memory is allocated</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>206</integer>
+// CHECK-NEXT:        <key>col</key><integer>3</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>ranges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:         <array>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>col</key><integer>3</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:          <dict>
+// CHECK-NEXT:           <key>line</key><integer>206</integer>
+// CHECK-NEXT:           <key>col</key><integer>25</integer>
+// CHECK-NEXT:           <key>file</key><integer>0</integer>
+// CHECK-NEXT:          </dict>
+// CHECK-NEXT:         </array>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Returned allocated memory</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>control</string>
+// CHECK-NEXT:       <key>edges</key>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>start</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>206</integer>
+// CHECK-NEXT:             <key>col</key><integer>3</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>206</integer>
+// CHECK-NEXT:             <key>col</key><integer>23</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:          <key>end</key>
+// CHECK-NEXT:           <array>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>207</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:            <dict>
+// CHECK-NEXT:             <key>line</key><integer>207</integer>
+// CHECK-NEXT:             <key>col</key><integer>1</integer>
+// CHECK-NEXT:             <key>file</key><integer>0</integer>
+// CHECK-NEXT:            </dict>
+// CHECK-NEXT:           </array>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>kind</key><string>event</string>
+// CHECK-NEXT:       <key>location</key>
+// CHECK-NEXT:       <dict>
+// CHECK-NEXT:        <key>line</key><integer>207</integer>
+// CHECK-NEXT:        <key>col</key><integer>1</integer>
+// CHECK-NEXT:        <key>file</key><integer>0</integer>
+// CHECK-NEXT:       </dict>
+// CHECK-NEXT:       <key>depth</key><integer>0</integer>
+// CHECK-NEXT:       <key>extended_message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:       <key>message</key>
+// CHECK-NEXT:       <string>Potential memory leak</string>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:     </array>
+// CHECK-NEXT:     <key>description</key><string>Potential memory leak</string>
+// CHECK-NEXT:     <key>category</key><string>Memory Error</string>
+// CHECK-NEXT:     <key>type</key><string>Memory leak</string>
+// CHECK-NEXT:     <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:     <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:     <key>issue_hash_content_of_line_in_context</key><string>8d59cc5cc4b7db55d432abc18b0e6c23</string>
+// CHECK-NEXT:    <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:    <key>issue_context</key><string>testMyMalloc</string>
+// CHECK-NEXT:    <key>issue_hash_function_offset</key><string>1</string>
+// CHECK-NEXT:    <key>location</key>
+// CHECK-NEXT:    <dict>
+// CHECK-NEXT:     <key>line</key><integer>207</integer>
+// CHECK-NEXT:     <key>col</key><integer>1</integer>
+// CHECK-NEXT:     <key>file</key><integer>0</integer>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:    </dict>
+// CHECK-NEXT:   </array>
+// CHECK-NEXT:  </dict>
+// CHECK-NEXT:  </plist>
diff --git a/test/Analysis/malloc.c b/test/Analysis/malloc.c
index 662df4c28b77..881eb38ad840 100644
--- a/test/Analysis/malloc.c
+++ b/test/Analysis/malloc.c
@@ -263,21 +263,21 @@ void CheckUseZeroAllocated6() {
 
 void CheckUseZeroAllocated7() {
   int *p = realloc(0, 0);
-  *p = 1; //TODO: warn about use of zero-allocated memory
+  *p = 1; // expected-warning {{Use of zero-allocated memory}}
   free(p);
 }
 
 void CheckUseZeroAllocated8() {
   int *p = malloc(8);
   int *q = realloc(p, 0);
-  *q = 1; //TODO: warn about use of zero-allocated memory
+  *q = 1; // expected-warning {{Use of zero-allocated memory}}
   free(q);
 }
 
 void CheckUseZeroAllocated9() {
   int *p = realloc(0, 0);
   int *q = realloc(p, 0);
-  *q = 1; //TODO: warn about use of zero-allocated memory
+  *q = 1; // expected-warning {{Use of zero-allocated memory}}
   free(q);
 }
 
@@ -307,6 +307,34 @@ void CheckUseZeroAllocatedPathWarn(_Bool b) {
   free(p);
 }
 
+void CheckUseZeroReallocatedPathNoWarn(_Bool b) {
+  int s = 0;
+  if (b)
+    s= 10;
+
+  char *p = malloc(8);
+  char *q = realloc(p, s);
+
+  if (b)
+    *q = 1; // no warning
+
+  free(q);
+}
+
+void CheckUseZeroReallocatedPathWarn(_Bool b) {
+  int s = 10;
+  if (b)
+    s= 0;
+
+  char *p = malloc(8);
+  char *q = realloc(p, s);
+
+  if (b)
+    *q = 1; // expected-warning {{Use of zero-allocated memory}}
+
+  free(q);
+}
+
 // This case tests that storing malloc'ed memory to a static variable which is
 // then returned is not leaked.  In the absence of known contracts for functions
 // or inter-procedural analysis, this is a conservative answer.
@@ -1386,7 +1414,9 @@ char* reallocButNoMalloc(struct HasPtr *a, int c, int size) {
   int *s;
   char *b = realloc(a->p, size);
   char *m = realloc(a->p, size); // expected-warning {{Attempt to free released memory}}
-  return a->p;
+  // We don't expect a use-after-free for a->P here because the warning above
+  // is a sink.
+  return a->p; // no-warning
 }
 
 // We should not warn in this case since the caller will presumably free a->p in all cases.
@@ -1627,6 +1657,23 @@ int *radar15580979() {
   return p;
 }
 
+// Some data structures may hold onto the pointer and free it later.
+void testEscapeThroughSystemCallTakingVoidPointer1(void *queue) {
+  int *data = (int *)malloc(32);
+  fake_insque(queue, data); // no warning
+}
+
+void testEscapeThroughSystemCallTakingVoidPointer2(fake_rb_tree_t *rbt) {
+  int *data = (int *)malloc(32);
+  fake_rb_tree_init(rbt, data);
+} //expected-warning{{Potential leak}}
+
+void testEscapeThroughSystemCallTakingVoidPointer3(fake_rb_tree_t *rbt) {
+  int *data = (int *)malloc(32);
+  fake_rb_tree_init(rbt, data);
+  fake_rb_tree_insert_node(rbt, data); // no warning
+}
+
 // ----------------------------------------------------------------------------
 // False negatives.
 
diff --git a/test/Analysis/method-call-path-notes.cpp b/test/Analysis/method-call-path-notes.cpp
index bb6964d2524a..8eb07d550a61 100644
--- a/test/Analysis/method-call-path-notes.cpp
+++ b/test/Analysis/method-call-path-notes.cpp
@@ -144,9 +144,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is uninitialized</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8c0cdb645ae6be246ed75941dcefd32d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ic</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>14</integer>
@@ -254,9 +256,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>98e39ef0e4bbc0b9b2b1832285634d67</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ic_null</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>19</integer>
@@ -398,9 +402,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>84f3d9d5009d2ac455b46b4aae88f67f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ic_set_to_null</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>25</integer>
@@ -542,9 +548,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c5bd8e35fb6da070914016804720ae4d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ic_null</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>30</integer>
@@ -652,9 +660,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4681ee922f6860377317b26b3a4bb5d4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_ic_member_ptr</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>37</integer>
@@ -796,9 +806,11 @@ void test_cast(const TestInstanceCall *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Called C++ object pointer is null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.CallAndMessage</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>e23397f9f2eff1b08593c2b2db137494</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_cast</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>42</integer>
diff --git a/test/Analysis/model-file.cpp b/test/Analysis/model-file.cpp
index 2ad84eacbed1..f181c423d9b0 100644
--- a/test/Analysis/model-file.cpp
+++ b/test/Analysis/model-file.cpp
@@ -40,250 +40,252 @@ int main() {
 
 // CHECK:  <key>diagnostics</key>
 // CHECK-NEXT:  <array>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>path</key>
-// CHECK-NEXT:   <array>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>22</integer>
-// CHECK-NEXT:           <key>col</key><integer>17</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>24</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>24</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>path</key>
+// CHECK-NEXT:    <array>
 // CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>24</integer>
-// CHECK-NEXT:      <key>col</key><integer>3</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
-// CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>24</integer>
-// CHECK-NEXT:         <key>col</key><integer>3</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>24</integer>
-// CHECK-NEXT:         <key>col</key><integer>7</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>22</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>22</integer>
+// CHECK-NEXT:            <key>col</key><integer>17</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>col</key><integer>5</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>&apos;p&apos; initialized to 0</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>&apos;p&apos; initialized to 0</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>24</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>24</integer>
-// CHECK-NEXT:           <key>col</key><integer>5</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>25</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>3</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>4</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>control</string>
-// CHECK-NEXT:     <key>edges</key>
-// CHECK-NEXT:      <array>
-// CHECK-NEXT:       <dict>
-// CHECK-NEXT:        <key>start</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>7</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>31</integer>
-// CHECK-NEXT:           <key>col</key><integer>24</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:        <key>end</key>
-// CHECK-NEXT:         <array>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>35</integer>
-// CHECK-NEXT:           <key>col</key><integer>15</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:          <dict>
-// CHECK-NEXT:           <key>line</key><integer>35</integer>
-// CHECK-NEXT:           <key>col</key><integer>15</integer>
-// CHECK-NEXT:           <key>file</key><integer>0</integer>
-// CHECK-NEXT:          </dict>
-// CHECK-NEXT:         </array>
-// CHECK-NEXT:       </dict>
-// CHECK-NEXT:      </array>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:    <dict>
-// CHECK-NEXT:     <key>kind</key><string>event</string>
-// CHECK-NEXT:     <key>location</key>
-// CHECK-NEXT:     <dict>
-// CHECK-NEXT:      <key>line</key><integer>35</integer>
-// CHECK-NEXT:      <key>col</key><integer>15</integer>
-// CHECK-NEXT:      <key>file</key><integer>0</integer>
 // CHECK-NEXT:     </dict>
-// CHECK-NEXT:     <key>ranges</key>
-// CHECK-NEXT:     <array>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>24</integer>
+// CHECK-NEXT:       <key>col</key><integer>3</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>col</key><integer>3</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>24</integer>
+// CHECK-NEXT:          <key>col</key><integer>7</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>&apos;p&apos; initialized to 0</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>&apos;p&apos; initialized to 0</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
 // CHECK-NEXT:       <array>
 // CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>35</integer>
-// CHECK-NEXT:         <key>col</key><integer>13</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
-// CHECK-NEXT:        </dict>
-// CHECK-NEXT:        <dict>
-// CHECK-NEXT:         <key>line</key><integer>35</integer>
-// CHECK-NEXT:         <key>col</key><integer>17</integer>
-// CHECK-NEXT:         <key>file</key><integer>0</integer>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>24</integer>
+// CHECK-NEXT:            <key>col</key><integer>5</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
 // CHECK-NEXT:        </dict>
 // CHECK-NEXT:       </array>
-// CHECK-NEXT:     </array>
-// CHECK-NEXT:     <key>depth</key><integer>0</integer>
-// CHECK-NEXT:     <key>extended_message</key>
-// CHECK-NEXT:     <string>Division by zero</string>
-// CHECK-NEXT:     <key>message</key>
-// CHECK-NEXT:     <string>Division by zero</string>
-// CHECK-NEXT:    </dict>
-// CHECK-NEXT:   </array>
-// CHECK-NEXT:   <key>description</key><string>Division by zero</string>
-// CHECK-NEXT:   <key>category</key><string>Logic error</string>
-// CHECK-NEXT:   <key>type</key><string>Division by zero</string>
-// CHECK-NEXT:   <key>check_name</key><string>core.DivideZero</string>
-// CHECK-NEXT:  <key>issue_context_kind</key><string>function</string>
-// CHECK-NEXT:  <key>issue_context</key><string>main</string>
-// CHECK-NEXT:  <key>issue_hash</key><string>15</string>
-// CHECK-NEXT:  <key>location</key>
-// CHECK-NEXT:  <dict>
-// CHECK-NEXT:   <key>line</key><integer>35</integer>
-// CHECK-NEXT:   <key>col</key><integer>15</integer>
-// CHECK-NEXT:   <key>file</key><integer>0</integer>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT:  </dict>
-// CHECK-NEXT: </array>
\ No newline at end of file
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>25</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>3</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>4</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>24</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>control</string>
+// CHECK-NEXT:      <key>edges</key>
+// CHECK-NEXT:       <array>
+// CHECK-NEXT:        <dict>
+// CHECK-NEXT:         <key>start</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>7</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>31</integer>
+// CHECK-NEXT:            <key>col</key><integer>24</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:         <key>end</key>
+// CHECK-NEXT:          <array>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>35</integer>
+// CHECK-NEXT:            <key>col</key><integer>15</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:           <dict>
+// CHECK-NEXT:            <key>line</key><integer>35</integer>
+// CHECK-NEXT:            <key>col</key><integer>15</integer>
+// CHECK-NEXT:            <key>file</key><integer>0</integer>
+// CHECK-NEXT:           </dict>
+// CHECK-NEXT:          </array>
+// CHECK-NEXT:        </dict>
+// CHECK-NEXT:       </array>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:     <dict>
+// CHECK-NEXT:      <key>kind</key><string>event</string>
+// CHECK-NEXT:      <key>location</key>
+// CHECK-NEXT:      <dict>
+// CHECK-NEXT:       <key>line</key><integer>35</integer>
+// CHECK-NEXT:       <key>col</key><integer>15</integer>
+// CHECK-NEXT:       <key>file</key><integer>0</integer>
+// CHECK-NEXT:      </dict>
+// CHECK-NEXT:      <key>ranges</key>
+// CHECK-NEXT:      <array>
+// CHECK-NEXT:        <array>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>35</integer>
+// CHECK-NEXT:          <key>col</key><integer>13</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:         <dict>
+// CHECK-NEXT:          <key>line</key><integer>35</integer>
+// CHECK-NEXT:          <key>col</key><integer>17</integer>
+// CHECK-NEXT:          <key>file</key><integer>0</integer>
+// CHECK-NEXT:         </dict>
+// CHECK-NEXT:        </array>
+// CHECK-NEXT:      </array>
+// CHECK-NEXT:      <key>depth</key><integer>0</integer>
+// CHECK-NEXT:      <key>extended_message</key>
+// CHECK-NEXT:      <string>Division by zero</string>
+// CHECK-NEXT:      <key>message</key>
+// CHECK-NEXT:      <string>Division by zero</string>
+// CHECK-NEXT:     </dict>
+// CHECK-NEXT:    </array>
+// CHECK-NEXT:    <key>description</key><string>Division by zero</string>
+// CHECK-NEXT:    <key>category</key><string>Logic error</string>
+// CHECK-NEXT:    <key>type</key><string>Division by zero</string>
+// CHECK-NEXT:    <key>check_name</key><string>core.DivideZero</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>86cb845e4f1e6abde1c5b319d5b08eca</string>
+// CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
+// CHECK-NEXT:   <key>issue_context</key><string>main</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>15</string>
+// CHECK-NEXT:   <key>location</key>
+// CHECK-NEXT:   <dict>
+// CHECK-NEXT:    <key>line</key><integer>35</integer>
+// CHECK-NEXT:    <key>col</key><integer>15</integer>
+// CHECK-NEXT:    <key>file</key><integer>0</integer>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:   </dict>
+// CHECK-NEXT:  </array>
diff --git a/test/Analysis/no-unreachable-dtors.cpp b/test/Analysis/no-unreachable-dtors.cpp
new file mode 100644
index 000000000000..e0893b3f4e62
--- /dev/null
+++ b/test/Analysis/no-unreachable-dtors.cpp
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=debug.Stats -verify -Wno-unreachable-code %s
+
+struct S {
+  ~S();
+};
+
+// the return at the end of an CompoundStmt does not lead to an unreachable block containing the dtors
+void test() { // expected-warning-re{{test -> Total CFGBlocks: {{[0-9]+}} | Unreachable CFGBlocks: 0 | Exhausted Block: no | Empty WorkList: yes}}
+  S s;
+  return;
+}
diff --git a/test/Analysis/null-deref-path-notes.m b/test/Analysis/null-deref-path-notes.m
index da49eaaf92f2..9e5cab6e7430 100644
--- a/test/Analysis/null-deref-path-notes.m
+++ b/test/Analysis/null-deref-path-notes.m
@@ -284,9 +284,11 @@ void repeatedStores(int coin) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3ff6dd9f2084f4a0dac8c3688c02fd0f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNull</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>19</integer>
@@ -510,9 +512,11 @@ void repeatedStores(int coin) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>208c1267ca2da821fcc1b9183e1d624b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>initWithID:</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>33</integer>
@@ -785,9 +789,11 @@ void repeatedStores(int coin) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>9e1f67627fbbfb46b4e3a618246e6b0a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>repeatedStores</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>11</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>11</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>50</integer>
diff --git a/test/Analysis/nullability.mm b/test/Analysis/nullability.mm
new file mode 100644
index 000000000000..2c29d0088e18
--- /dev/null
+++ b/test/Analysis/nullability.mm
@@ -0,0 +1,289 @@
+// RUN: %clang_cc1 -fobjc-arc -analyze -analyzer-checker=core,nullability -verify %s
+
+#define nil 0
+#define BOOL int
+
+@protocol NSObject
++ (id)alloc;
+- (id)init;
+@end
+
+@protocol NSCopying
+@end
+
+__attribute__((objc_root_class))
+@interface
+NSObject<NSObject>
+@end
+
+@interface NSString : NSObject<NSCopying>
+- (BOOL)isEqualToString : (NSString *_Nonnull)aString;
+- (NSString *)stringByAppendingString:(NSString *_Nonnull)aString;
+@end
+
+@interface TestObject : NSObject
+- (int *_Nonnull)returnsNonnull;
+- (int *_Nullable)returnsNullable;
+- (int *)returnsUnspecified;
+- (void)takesNonnull:(int *_Nonnull)p;
+- (void)takesNullable:(int *_Nullable)p;
+- (void)takesUnspecified:(int *)p;
+@property(readonly, strong) NSString *stuff;
+@end
+
+TestObject * getUnspecifiedTestObject();
+TestObject *_Nonnull getNonnullTestObject();
+TestObject *_Nullable getNullableTestObject();
+
+int getRandom();
+
+typedef struct Dummy { int val; } Dummy;
+
+void takesNullable(Dummy *_Nullable);
+void takesNonnull(Dummy *_Nonnull);
+void takesUnspecified(Dummy *);
+
+Dummy *_Nullable returnsNullable();
+Dummy *_Nonnull returnsNonnull();
+Dummy *returnsUnspecified();
+int *_Nullable returnsNullableInt();
+
+template <typename T> T *eraseNullab(T *p) { return p; }
+
+void takesAttrNonnull(Dummy *p) __attribute((nonnull(1)));
+
+void testBasicRules() {
+  Dummy *p = returnsNullable();
+  int *ptr = returnsNullableInt();
+  // Make every dereference a different path to avoid sinks after errors.
+  switch (getRandom()) {
+  case 0: {
+    Dummy &r = *p; // expected-warning {{}}
+  } break;
+  case 1: {
+    int b = p->val; // expected-warning {{}}
+  } break;
+  case 2: {
+    int stuff = *ptr; // expected-warning {{}}
+  } break;
+  case 3:
+    takesNonnull(p); // expected-warning {{}}
+    break;
+  case 4: {
+    Dummy d;
+    takesNullable(&d);
+    Dummy dd(d);
+    break;
+  }
+  case 5: takesAttrNonnull(p); break; // expected-warning {{Nullable pointer is passed to}}
+  default: { Dummy d = *p; } break; // expected-warning {{Nullable pointer is dereferenced}}
+  }
+  if (p) {
+    takesNonnull(p);
+    if (getRandom()) {
+      Dummy &r = *p;
+    } else {
+      int b = p->val;
+    }
+  }
+  Dummy *q = 0;
+  if (getRandom()) {
+    takesNullable(q);
+    takesNonnull(q); // expected-warning {{}}
+  }
+  Dummy a;
+  Dummy *_Nonnull nonnull = &a;
+  nonnull = q; // expected-warning {{}}
+  q = &a;
+  takesNullable(q);
+  takesNonnull(q);
+}
+
+void testMultiParamChecking(Dummy *_Nonnull a, Dummy *_Nullable b,
+                            Dummy *_Nonnull c);
+
+void testArgumentTracking(Dummy *_Nonnull nonnull, Dummy *_Nullable nullable) {
+  Dummy *p = nullable;
+  Dummy *q = nonnull;
+  switch(getRandom()) {
+  case 1: nonnull = p; break; // expected-warning {{}}
+  case 2: p = 0; break;
+  case 3: q = p; break;
+  case 4: testMultiParamChecking(nonnull, nullable, nonnull); break;
+  case 5: testMultiParamChecking(nonnull, nonnull, nonnull); break;
+  case 6: testMultiParamChecking(nonnull, nullable, nullable); break; // expected-warning {{}}
+  case 7: testMultiParamChecking(nullable, nullable, nonnull); // expected-warning {{}}
+  case 8: testMultiParamChecking(nullable, nullable, nullable); // expected-warning {{}}
+  case 9: testMultiParamChecking((Dummy *_Nonnull)0, nullable, nonnull); break;
+  }
+}
+
+Dummy *_Nonnull testNullableReturn(Dummy *_Nullable a) {
+  Dummy *p = a;
+  return p; // expected-warning {{}}
+}
+
+Dummy *_Nonnull testNullReturn() {
+  Dummy *p = 0;
+  return p; // expected-warning {{}}
+}
+
+void testObjCMessageResultNullability() {
+  // The expected result: the most nullable of self and method return type.
+  TestObject *o = getUnspecifiedTestObject();
+  int *shouldBeNullable = [eraseNullab(getNullableTestObject()) returnsNonnull];
+  switch (getRandom()) {
+  case 0:
+    // The core analyzer assumes that the receiver is non-null after a message
+    // send. This is to avoid some false positives, and increase performance
+    // but it also reduces the coverage and makes this checker unable to reason
+    // about the nullness of the receiver. 
+    [o takesNonnull:shouldBeNullable]; // No warning expected.
+    break;
+  case 1:
+    shouldBeNullable =
+        [eraseNullab(getNullableTestObject()) returnsUnspecified];
+    [o takesNonnull:shouldBeNullable]; // No warning expected.
+    break;
+  case 3:
+    shouldBeNullable = [eraseNullab(getNullableTestObject()) returnsNullable];
+    [o takesNonnull:shouldBeNullable]; // expected-warning {{}}
+    break;
+  case 4:
+    shouldBeNullable = [eraseNullab(getNonnullTestObject()) returnsNullable];
+    [o takesNonnull:shouldBeNullable]; // expected-warning {{}}
+    break;
+  case 5:
+    shouldBeNullable =
+        [eraseNullab(getUnspecifiedTestObject()) returnsNullable];
+    [o takesNonnull:shouldBeNullable]; // expected-warning {{}}
+    break;
+  case 6:
+    shouldBeNullable = [eraseNullab(getNullableTestObject()) returnsNullable];
+    [o takesNonnull:shouldBeNullable]; // expected-warning {{}}
+    break;
+  case 7: {
+    int *shouldBeNonnull = [eraseNullab(getNonnullTestObject()) returnsNonnull];
+    [o takesNonnull:shouldBeNonnull];
+  } break;
+  }
+}
+
+Dummy * _Nonnull testDirectCastNullableToNonnull() {
+  Dummy *p = returnsNullable();
+  takesNonnull((Dummy * _Nonnull)p);  // no-warning
+  return (Dummy * _Nonnull)p;         // no-warning
+}
+
+Dummy * _Nonnull testIndirectCastNullableToNonnull() {
+  Dummy *p = (Dummy * _Nonnull)returnsNullable();
+  takesNonnull(p);  // no-warning
+  return p;         // no-warning
+}
+
+Dummy * _Nonnull testDirectCastNilToNonnull() {
+  takesNonnull((Dummy * _Nonnull)0);  // no-warning
+  return (Dummy * _Nonnull)0;         // no-warning
+}
+
+void testIndirectCastNilToNonnullAndPass() {
+  Dummy *p = (Dummy * _Nonnull)0;
+  // FIXME: Ideally the cast above would suppress this warning.
+  takesNonnull(p);  // expected-warning {{Null passed to a callee that requires a non-null argument}}
+}
+
+Dummy * _Nonnull testIndirectCastNilToNonnullAndReturn() {
+  Dummy *p = (Dummy * _Nonnull)0;
+  // FIXME: Ideally the cast above would suppress this warning.
+  return p; // expected-warning {{Null is returned from a function that is expected to return a non-null value}}
+}
+
+void testInvalidPropagation() {
+  Dummy *p = returnsUnspecified();
+  takesNullable(p);
+  takesNonnull(p);
+}
+
+void onlyReportFirstPreconditionViolationOnPath() {
+  Dummy *p = returnsNullable();
+  takesNonnull(p); // expected-warning {{}}
+  takesNonnull(p); // No warning.
+  // The first warning was not a sink. The analysis expected to continue.
+  int i = 0;
+  i = 5 / i; // expected-warning {{Division by zero}}
+  (void)i;
+}
+
+Dummy *_Nonnull doNotWarnWhenPreconditionIsViolatedInTopFunc(
+    Dummy *_Nonnull p) {
+  if (!p) {
+    Dummy *ret =
+        0; // avoid compiler warning (which is not generated by the analyzer)
+    if (getRandom())
+      return ret; // no warning
+    else
+      return p; // no warning
+  } else {
+    return p;
+  }
+}
+
+Dummy *_Nonnull doNotWarnWhenPreconditionIsViolated(Dummy *_Nonnull p) {
+  if (!p) {
+    Dummy *ret =
+        0; // avoid compiler warning (which is not generated by the analyzer)
+    if (getRandom())
+      return ret; // no warning
+    else
+      return p; // no warning
+  } else {
+    return p;
+  }
+}
+
+void testPreconditionViolationInInlinedFunction(Dummy *p) {
+  doNotWarnWhenPreconditionIsViolated(p);
+}
+
+void inlinedNullable(Dummy *_Nullable p) {
+  if (p) return;
+}
+void inlinedNonnull(Dummy *_Nonnull p) {
+  if (p) return;
+}
+void inlinedUnspecified(Dummy *p) {
+  if (p) return;
+}
+
+Dummy *_Nonnull testDefensiveInlineChecks(Dummy * p) {
+  switch (getRandom()) {
+  case 1: inlinedNullable(p); break;
+  case 2: inlinedNonnull(p); break;
+  case 3: inlinedUnspecified(p); break;
+  }
+  if (getRandom())
+    takesNonnull(p);  // no-warning
+
+  if (getRandom()) {
+    Dummy *_Nonnull varWithInitializer = p; // no-warning
+
+     Dummy *_Nonnull var1WithInitializer = p,  // no-warning
+           *_Nonnull var2WithInitializer = p;  // no-warning
+  }
+
+  if (getRandom()) {
+    Dummy *_Nonnull varWithoutInitializer;
+    varWithoutInitializer = p; // no-warning
+  }
+
+  return p;
+}
+
+void testObjCARCImplicitZeroInitialization() {
+  TestObject * _Nonnull implicitlyZeroInitialized; // no-warning
+  implicitlyZeroInitialized = getNonnullTestObject();
+}
+
+void testObjCARCExplicitZeroInitialization() {
+  TestObject * _Nonnull explicitlyZeroInitialized = nil; // expected-warning {{Null is assigned to a pointer which is expected to have non-null value}}
+}
diff --git a/test/Analysis/nullability_nullonly.mm b/test/Analysis/nullability_nullonly.mm
new file mode 100644
index 000000000000..56b3f9e14490
--- /dev/null
+++ b/test/Analysis/nullability_nullonly.mm
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,nullability.NullPassedToNonnull,nullability.NullReturnedFromNonnull -verify %s
+
+int getRandom();
+
+typedef struct Dummy { int val; } Dummy;
+
+void takesNullable(Dummy *_Nullable);
+void takesNonnull(Dummy *_Nonnull);
+Dummy *_Nullable returnsNullable();
+
+void testBasicRules() {
+  // The tracking of nullable values is turned off.
+  Dummy *p = returnsNullable();
+  takesNonnull(p); // no warning
+  Dummy *q = 0;
+  if (getRandom()) {
+    takesNullable(q);
+    takesNonnull(q); // expected-warning {{}}
+  }
+}
+
+Dummy *_Nonnull testNullReturn() {
+  Dummy *p = 0;
+  return p; // expected-warning {{}}
+}
+
+void onlyReportFirstPreconditionViolationOnPath() {
+  Dummy *p = 0;
+  takesNonnull(p); // expected-warning {{}}
+  takesNonnull(p); // No warning.
+  // Passing null to nonnull is a sink. Stop the analysis.
+  int i = 0;
+  i = 5 / i; // no warning
+  (void)i;
+}
+
+Dummy *_Nonnull doNotWarnWhenPreconditionIsViolatedInTopFunc(
+    Dummy *_Nonnull p) {
+  if (!p) {
+    Dummy *ret =
+        0; // avoid compiler warning (which is not generated by the analyzer)
+    if (getRandom())
+      return ret; // no warning
+    else
+      return p; // no warning
+  } else {
+    return p;
+  }
+}
+
+Dummy *_Nonnull doNotWarnWhenPreconditionIsViolated(Dummy *_Nonnull p) {
+  if (!p) {
+    Dummy *ret =
+        0; // avoid compiler warning (which is not generated by the analyzer)
+    if (getRandom())
+      return ret; // no warning
+    else
+      return p; // no warning
+  } else {
+    return p;
+  }
+}
+
+void testPreconditionViolationInInlinedFunction(Dummy *p) {
+  doNotWarnWhenPreconditionIsViolated(p);
+}
+
+void inlinedNullable(Dummy *_Nullable p) {
+  if (p) return;
+}
+void inlinedNonnull(Dummy *_Nonnull p) {
+  if (p) return;
+}
+void inlinedUnspecified(Dummy *p) {
+  if (p) return;
+}
+
+Dummy *_Nonnull testDefensiveInlineChecks(Dummy * p) {
+  switch (getRandom()) {
+  case 1: inlinedNullable(p); break;
+  case 2: inlinedNonnull(p); break;
+  case 3: inlinedUnspecified(p); break;
+  }
+  if (getRandom())
+    takesNonnull(p);
+  return p;
+}
diff --git a/test/Analysis/nullptr.cpp b/test/Analysis/nullptr.cpp
index 56151dc6b650..17320f3b9a84 100644
--- a/test/Analysis/nullptr.cpp
+++ b/test/Analysis/nullptr.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -std=c++11 -Wno-conversion-null -analyze -analyzer-checker=core -analyzer-store region -verify %s
+// RUN: %clang_cc1 -std=c++11 -Wno-conversion-null -analyze -analyzer-checker=core,debug.ExprInspection -analyzer-store region -verify %s
+
+void clang_analyzer_eval(int);
 
 // test to see if nullptr is detected as a null pointer
 void foo1(void) {
@@ -87,3 +89,40 @@ void testMaterializeTemporaryExprWithNullPtr() {
   // Create MaterializeTemporaryExpr with a nullptr inside.
   const nullptr_t &r = nullptr;
 }
+
+int getSymbol();
+
+struct X {
+  virtual void f() {}
+};
+
+void invokeF(X* x) {
+  x->f(); // expected-warning{{Called C++ object pointer is null}}
+}
+
+struct Type {
+  decltype(nullptr) x;
+};
+
+void shouldNotCrash() {
+  decltype(nullptr) p;
+  if (getSymbol())
+    invokeF(p); // expected-warning{{Function call argument is an uninit}}
+  if (getSymbol())
+    invokeF(nullptr);
+  if (getSymbol()) {
+    X *x = Type().x;
+    x->f(); // expected-warning{{Called C++ object pointer is null}}
+  }
+}
+
+void f(decltype(nullptr) p) {
+  int *q = nullptr;
+  clang_analyzer_eval(p == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(q == 0); // expected-warning{{TRUE}}
+}
+
+decltype(nullptr) returnsNullPtrType();
+void fromReturnType() {
+  ((X *)returnsNullPtrType())->f(); // expected-warning{{Called C++ object pointer is null}}
+}
diff --git a/test/Analysis/objc-arc.m b/test/Analysis/objc-arc.m
index e4a4e4642000..e9ef459036b2 100644
--- a/test/Analysis/objc-arc.m
+++ b/test/Analysis/objc-arc.m
@@ -367,9 +367,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b41b510812f2a0d8735c67dfff6a9a5c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_working</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>54</integer>
@@ -530,9 +532,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>61d185b2522d15fb327f6784e0217adf</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_cf_leak</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>96</integer>
@@ -589,9 +593,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>63df5972efc8a7acccdbd2aca10c9e9e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar9424882</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>119</integer>
@@ -648,9 +654,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>125290ceaf2b55f5778c262d87b2b6d5</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>139</integer>
@@ -707,9 +715,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>54627578ee3b8520400ae899bc32b3d6</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>144</integer>
@@ -766,9 +776,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>27c31d7f5825a1613c0206b5be082800</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>145</integer>
@@ -825,9 +837,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a59b85c1e38300cb17eaeedcf193f94b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>146</integer>
@@ -1003,9 +1017,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5baa7d5f38420d0a035aa61607675f3e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>146</integer>
@@ -1132,9 +1148,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4665e04694fd55e7c4ed7a67860b3b74</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>from_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>147</integer>
@@ -1191,9 +1209,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>70a67cc8fefa3ad133a35c3ffb579b9e</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>to_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>150</integer>
@@ -1250,9 +1270,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>afc41cacb8f32bfbe4a4152f20cec5bc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>to_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>151</integer>
@@ -1309,9 +1331,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>e512cd0ada59beca5acfa53e2a632670</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>to_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>152</integer>
@@ -1368,9 +1392,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a43ae78dcef14395931eeb452f81819f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>to_cf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>153</integer>
@@ -1512,9 +1538,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>798e65f80df0526369f9bb240e3d91fd</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_objc_unretainedObject</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>166</integer>
@@ -1811,9 +1839,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>e1fbcc142b678b3c2c43737ee35b64d9</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_objc_arrays</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>24</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>24</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>202</integer>
@@ -1981,9 +2011,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>e300a279615a384d2b310329651d3978</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar11059275_positive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>216</integer>
@@ -2096,9 +2128,11 @@ id rdar14061675() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>37efdaee587b783c6d49a228ff5ba49f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar14061675</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>230</integer>
diff --git a/test/Analysis/objc-message.m b/test/Analysis/objc-message.m
new file mode 100644
index 000000000000..dc0fd1f4c5c9
--- /dev/null
+++ b/test/Analysis/objc-message.m
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -analyzer-store=region -verify -Wno-objc-root-class %s
+
+extern void clang_analyzer_warnIfReached();
+void clang_analyzer_eval(int);
+
+@interface SomeClass
+-(id)someMethodWithReturn;
+-(void)someMethod;
+@end
+
+void consistencyOfReturnWithNilReceiver(SomeClass *o) {
+  id result = [o someMethodWithReturn];
+  if (result) {
+    if (!o) {
+      // It is impossible for both o to be nil and result to be non-nil,
+      // so this should not be reached.
+      clang_analyzer_warnIfReached(); // no-warning
+    }
+  }
+}
+
+void maybeNilReceiverIsNotNilAfterMessage(SomeClass *o) {
+  [o someMethod];
+
+  // We intentionally drop the nil flow (losing coverage) after a method
+  // call when the receiver may be nil in order to avoid inconsistencies of
+  // the kind tested for in consistencyOfReturnWithNilReceiver().
+  clang_analyzer_eval(o != 0); // expected-warning{{TRUE}}
+}
+
+void nilReceiverIsStillNilAfterMessage(SomeClass *o) {
+  if (o == 0) {
+    id result = [o someMethodWithReturn];
+
+    // Both the receiver and the result should be nil after a message
+    // sent to a nil receiver returning a value of type id.
+    clang_analyzer_eval(o == 0); // expected-warning{{TRUE}}
+    clang_analyzer_eval(result == 0); // expected-warning{{TRUE}}
+  }
+}
diff --git a/test/Analysis/padding_c.c b/test/Analysis/padding_c.c
new file mode 100644
index 000000000000..93cefcad8930
--- /dev/null
+++ b/test/Analysis/padding_c.c
@@ -0,0 +1,236 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=optin.performance -analyzer-config optin.performance.Padding:AllowedPad=2 -verify %s
+
+#if __has_include(<stdalign.h>)
+#include <stdalign.h>
+#endif
+
+#if __has_include(<stdalign.h>) || defined(__cplusplus)
+// expected-warning@+1{{Excessive padding in 'struct FieldAttrAlign' (6 padding}}
+struct FieldAttrAlign {
+  char c1;
+  alignas(4) int i;
+  char c2;
+};
+
+// expected-warning@+1{{Excessive padding in 'struct FieldAttrOverAlign' (10 padding}}
+struct FieldAttrOverAlign {
+  char c1;
+  alignas(8) int i;
+  char c2;
+};
+
+#endif // __has_include(<stdalign.h>) || defined(__cplusplus)
+
+// Re-ordering members of these structs won't reduce padding, so don't warn
+struct LeadingChar { // no-warning
+  char c;
+  int i;
+};
+
+struct TrailingChar { // no-warning
+  int i;
+  char c;
+};
+
+struct Helpless { // no-warning
+  struct TrailingChar i1;
+  struct LeadingChar i2;
+  char c;
+};
+
+#pragma pack(push)
+#pragma pack(1)
+struct SquishedIntSandwich { // no-warning
+  char c1;
+  int i;
+  char c2;
+};
+#pragma pack(pop)
+
+// Re-ordering members of these structs will reduce padding, so warn
+struct IntSandwich { // expected-warning{{Excessive padding in 'struct IntSandwich'}}
+  char c1;
+  int i;
+  char c2;
+};
+
+struct TurDuckHen { // expected-warning{{Excessive padding in 'struct TurDuckHen'}}
+  char c1;
+  struct IntSandwich i;
+  char c2;
+};
+
+#pragma pack(push)
+#pragma pack(2)
+struct SmallIntSandwich { // expected-warning{{Excessive padding in 'struct SmallIntSandwich'}}
+  char c1;
+  int i1;
+  char c2;
+  int i2;
+  char c3;
+  int i3;
+  char c4;
+};
+#pragma pack(pop)
+
+union SomeUnion { // no-warning
+  char c;
+  short s;
+  int i;
+};
+
+struct HoldsAUnion { // expected-warning{{Excessive padding in 'struct HoldsAUnion'}}
+  char c1;
+  union SomeUnion u;
+  char c2;
+};
+
+struct BigCharArray { // no-warning
+  char c[129];
+};
+
+struct SmallCharArray { // no-warning
+  char c[5];
+};
+
+struct MediumIntArray { // no-warning
+  int i[5];
+};
+
+struct LargeSizeToSmallSize { // expected-warning{{Excessive padding in 'struct LargeSizeToSmallSize'}}
+  struct BigCharArray b;
+  struct MediumIntArray m;
+  struct SmallCharArray s;
+};
+
+struct LargeAlignToSmallAlign { // no-warning
+  struct MediumIntArray m;
+  struct BigCharArray b;
+  struct SmallCharArray s;
+};
+
+// Currently ignoring VLA padding problems.  Still need to make sure we don't
+// choke on VLAs though
+struct HoldsVLA { // no-warning
+  char c1;
+  int x;
+  char c2;
+  int vla[];
+};
+
+// Currently ignoring bitfield padding problems.  Still need to make sure we
+// don't choke on bitfields though
+struct HoldsBitfield { // no-warning
+  char c1;
+  int x;
+  char c2;
+  unsigned char b1 : 3;
+  unsigned char b2 : 3;
+  unsigned char b3 : 2;
+};
+
+typedef struct { // expected-warning{{Excessive padding in 'TypedefSandwich'}}
+  char c1;
+  int i;
+  char c2;
+} TypedefSandwich;
+
+// expected-warning@+1{{Excessive padding in 'struct StructAttrAlign' (10 padding}}
+struct StructAttrAlign {
+  char c1;
+  int i;
+  char c2;
+} __attribute__((aligned(8)));
+
+struct CorrectOverlyAlignedChar { // no-warning
+  char c __attribute__((aligned(4096)));
+  char c1;
+  int x1;
+  char c2;
+  int x2;
+  char c3;
+};
+
+struct OverlyAlignedChar { // expected-warning{{Excessive padding in 'struct OverlyAlignedChar'}}
+  char c1;
+  int x;
+  char c2;
+  char c __attribute__((aligned(4096)));
+};
+
+struct HoldsOverlyAlignedChar { // expected-warning{{Excessive padding in 'struct HoldsOverlyAlignedChar'}}
+  char c1;
+  struct OverlyAlignedChar o;
+  char c2;
+};
+
+void internalStructFunc() {
+  struct X { // expected-warning{{Excessive padding in 'struct X'}}
+    char c1;
+    int t;
+    char c2;
+  };
+  struct X obj;
+}
+
+void typedefStructFunc() {
+  typedef struct { // expected-warning{{Excessive padding in 'S'}}
+    char c1;
+    int t;
+    char c2;
+  } S;
+  S obj;
+}
+
+void anonStructFunc() {
+  struct { // expected-warning{{Excessive padding in 'struct (anonymous}}
+    char c1;
+    int t;
+    char c2;
+  } obj;
+}
+
+struct CorrectDefaultAttrAlign { // no-warning
+  long long i;
+  char c1;
+  char c2;
+} __attribute__((aligned));
+
+struct TooSmallShortSandwich { // no-warning
+  char c1;
+  short s;
+  char c2;
+};
+
+// expected-warning@+1{{Excessive padding in 'struct SmallArrayShortSandwich'}}
+struct SmallArrayShortSandwich {
+  char c1;
+  short s;
+  char c2;
+} ShortArray[20];
+
+// expected-warning@+1{{Excessive padding in 'struct SmallArrayInFunc'}}
+struct SmallArrayInFunc {
+  char c1;
+  short s;
+  char c2;
+};
+
+void arrayHolder() {
+  struct SmallArrayInFunc Arr[15];
+}
+
+// xxxexpected-warning@+1{{Excessive padding in 'struct SmallArrayInStruct'}}
+struct SmallArrayInStruct {
+  char c1;
+  short s;
+  char c2;
+};
+
+struct HoldsSmallArray {
+  struct SmallArrayInStruct Field[20];
+} HoldsSmallArrayElt;
+
+void nestedPadding() {
+  struct HoldsSmallArray Arr[15];
+}
diff --git a/test/Analysis/padding_cpp.cpp b/test/Analysis/padding_cpp.cpp
new file mode 100644
index 000000000000..df2f2a82d35c
--- /dev/null
+++ b/test/Analysis/padding_cpp.cpp
@@ -0,0 +1,202 @@
+// RUN: %clang_cc1 -std=c++14 -analyze -analyzer-checker=optin.performance -analyzer-config optin.performance.Padding:AllowedPad=2 -verify %s
+
+// Make sure that the C cases still work fine, even when compiled as C++.
+#include "padding_c.c"
+
+struct BigCharArray2 { // no-warning
+  char c[129];
+};
+
+// xxxexpected-warning@+1{{Excessive padding in 'struct LowAlignmentBase'}}
+struct LowAlignmentBase : public BigCharArray2 {
+  int i;
+  char c;
+};
+
+struct CorrectLowAlignmentBase : public BigCharArray2 { // no-warning
+  char c;
+  int i;
+};
+
+// xxxexpected-warning@+1{{Excessive padding in 'struct LowAlignmentBase2'}}
+struct LowAlignmentBase2 : public BigCharArray2 {
+  char c1;
+  int i;
+  char c2;
+};
+
+class PaddedA { // expected-warning{{Excessive padding in 'class PaddedA'}}
+  char c1;
+  int i;
+  char c2;
+};
+
+class VirtualPaddedA : public PaddedA { // no-warning
+  virtual void foo() {}
+};
+
+class VirtualIntSandwich { // expected-warning{{Excessive padding in 'class VirtualIntSandwich'}}
+  virtual void foo() {}
+  char c1;
+  int i;
+  char c2;
+};
+
+// constructed so as not to have tail padding
+class InnerPaddedB { // expected-warning{{Excessive padding in 'class InnerPaddedB'}}
+  char c1;
+  int i1;
+  char c2;
+  int i2;
+};
+
+class TailPaddedB { // expected-warning{{Excessive padding in 'class TailPaddedB'}}
+  char c1;
+  int i1;
+  char c2;
+};
+
+class SI : public PaddedA { // no-warning
+  char c;
+};
+
+class SI2 : public PaddedA { // xxxexpected-warning{{Excessive padding in 'class SI2'}}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class VirtualSI : virtual public PaddedA { // no-warning
+  char c;
+};
+
+// currently not checked for
+class VirtualSI2 : virtual public PaddedA { // no-warning
+  char c10;
+  int i10;
+  char c11;
+};
+
+class VtblSI : public PaddedA { // no-warning
+  virtual void foo() {}
+  char c;
+};
+
+class VtblSI2 : public PaddedA { // xxxexpected-warning{{Excessive padding in 'class VtblSI2'}}
+  virtual void foo() {}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class VtblSI3 : public VirtualPaddedA { // xxxexpected-warning{{Excessive padding in 'class VtblSI3'}}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class MI : public PaddedA, public InnerPaddedB { // no-warning
+  char c;
+};
+
+class MI2 : public PaddedA, public InnerPaddedB { // xxxexpected-warning{{Excessive padding in 'class MI2'}}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class VtblMI : public PaddedA, public InnerPaddedB { // xxxexpected-warning{{Excessive padding in 'class VtblMI'}}
+  virtual void foo() {}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class VtblMI2 : public VirtualPaddedA, public InnerPaddedB { // xxxexpected-warning{{Excessive padding in 'class VtblMI2'}}
+  char c10;
+  int i10;
+  char c11;
+};
+
+class Empty {}; // no-warning
+
+class LotsOfSpace { // expected-warning{{Excessive padding in 'class LotsOfSpace'}}
+  Empty e1;
+  int i;
+  Empty e2;
+};
+
+class EBO1 : public Empty { // xxxexpected-warning{{Excessive padding in 'class EBO1'}}
+  char c1;
+  int i;
+  char c2;
+};
+
+class EBO2 : public Empty { // xxxexpected-warning{{Excessive padding in 'class EBO2'}}
+  Empty c1;
+  int i;
+  Empty c2;
+};
+
+template <typename T>
+class TemplateSandwich { // expected-warning{{Excessive padding in 'class TemplateSandwich<int>' instantiated here}}
+  char c1;
+  T t;
+  char c2;
+};
+
+template <typename T>
+class TemplateSandwich<T *> { // expected-warning{{Excessive padding in 'class TemplateSandwich<void *>' instantiated here}}
+  char c1;
+  T *t;
+  char c2;
+};
+
+template <>
+class TemplateSandwich<long long> { // expected-warning{{Excessive padding in 'class TemplateSandwich<long long>' (}}
+  char c1;
+  long long t;
+  char c2;
+};
+
+class Holder1 { // no-warning
+  TemplateSandwich<int> t1;
+  TemplateSandwich<char> t2;
+  TemplateSandwich<void *> t3;
+};
+
+typedef struct { // expected-warning{{Excessive padding in 'TypedefSandwich2'}}
+  char c1;
+  typedef struct { // expected-warning{{Excessive padding in 'TypedefSandwich2::NestedTypedef'}}
+    char c1;
+    int i;
+    char c2;
+  } NestedTypedef;
+  NestedTypedef t;
+  char c2;
+} TypedefSandwich2;
+
+template <typename T>
+struct Foo {
+  // expected-warning@+1{{Excessive padding in 'struct Foo<int>::Nested'}}
+  struct Nested {
+    char c1;
+    T t;
+    char c2;
+  };
+};
+
+struct Holder { // no-warning
+  Foo<int>::Nested t1;
+  Foo<char>::Nested t2;
+};
+
+struct GlobalsForLambda { // no-warning
+  int i;
+  char c1;
+  char c2;
+} G;
+
+// expected-warning@+1{{Excessive padding in 'class (lambda}}
+auto lambda1 = [ c1 = G.c1, i = G.i, c2 = G.c2 ]{};
+auto lambda2 = [ i = G.i, c1 = G.c1, c2 = G.c2 ]{}; // no-warning
diff --git a/test/Analysis/padding_message.cpp b/test/Analysis/padding_message.cpp
new file mode 100644
index 000000000000..f73a11af4e7d
--- /dev/null
+++ b/test/Analysis/padding_message.cpp
@@ -0,0 +1,185 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -std=c++14 -analyze -analyzer-checker=optin.performance -analyzer-config optin.performance.Padding:AllowedPad=2 -verify %s
+
+// expected-warning@+1{{Excessive padding in 'struct IntSandwich' (6 padding bytes, where 2 is optimal)}}
+struct IntSandwich {
+  char c1;
+  int i;
+  char c2;
+};
+
+// expected-warning@+1{{Excessive padding in 'struct TurDuckHen' (6 padding bytes, where 2 is optimal)}}
+struct TurDuckHen {
+  char c1;
+  struct IntSandwich i;
+  char c2;
+};
+
+#pragma pack(push)
+#pragma pack(2)
+// expected-warning@+1{{Excessive padding in 'struct SmallIntSandwich' (4 padding bytes, where 0 is optimal)}}
+struct SmallIntSandwich {
+  char c1;
+  int i1;
+  char c2;
+  int i2;
+  char c3;
+  int i3;
+  char c4;
+};
+#pragma pack(pop)
+
+union SomeUnion { // no-warning
+  char c;
+  short s;
+  int i;
+};
+
+// expected-warning@+1{{Excessive padding in 'struct HoldsAUnion' (6 padding bytes, where 2 is optimal)}}
+struct HoldsAUnion {
+  char c1;
+  union SomeUnion u;
+  char c2;
+};
+
+struct SmallCharArray { // no-warning
+  char c[5];
+};
+
+struct MediumIntArray { // no-warning
+  int i[5];
+};
+
+// expected-warning@+1{{Excessive padding in 'struct StructSandwich' (6 padding bytes, where 2 is optimal)}}
+struct StructSandwich {
+  struct SmallCharArray s;
+  struct MediumIntArray m;
+  struct SmallCharArray s2;
+};
+
+// expected-warning@+1{{Excessive padding in 'TypedefSandwich' (6 padding bytes, where 2 is optimal)}}
+typedef struct {
+  char c1;
+  int i;
+  char c2;
+} TypedefSandwich;
+
+// expected-warning@+1{{Excessive padding in 'struct StructAttrAlign' (10 padding bytes, where 2 is optimal)}}
+struct StructAttrAlign {
+  char c1;
+  int i;
+  char c2;
+} __attribute__((aligned(8)));
+
+// expected-warning@+1{{Excessive padding in 'struct OverlyAlignedChar' (8185 padding bytes, where 4089 is optimal)}}
+struct OverlyAlignedChar {
+  char c1;
+  int x;
+  char c2;
+  char c __attribute__((aligned(4096)));
+};
+
+// expected-warning@+1{{Excessive padding in 'struct HoldsOverlyAlignedChar' (8190 padding bytes, where 4094 is optimal)}}
+struct HoldsOverlyAlignedChar {
+  char c1;
+  struct OverlyAlignedChar o;
+  char c2;
+};
+
+void internalStructFunc() {
+  // expected-warning@+1{{Excessive padding in 'struct X' (6 padding bytes, where 2 is optimal)}}
+  struct X {
+    char c1;
+    int t;
+    char c2;
+  };
+  struct X obj;
+}
+
+void typedefStructFunc() {
+  // expected-warning@+1{{Excessive padding in 'S' (6 padding bytes, where 2 is optimal)}}
+  typedef struct {
+    char c1;
+    int t;
+    char c2;
+  } S;
+  S obj;
+}
+
+// expected-warning@+1{{Excessive padding in 'struct DefaultAttrAlign' (22 padding bytes, where 6 is optimal)}}
+struct DefaultAttrAlign {
+  char c1;
+  long long i;
+  char c2;
+} __attribute__((aligned));
+
+// expected-warning@+1{{Excessive padding in 'struct SmallArrayShortSandwich' (2 padding bytes, where 0 is optimal)}}
+struct SmallArrayShortSandwich {
+  char c1;
+  short s;
+  char c2;
+} ShortArray[20];
+
+// expected-warning@+1{{Excessive padding in 'struct SmallArrayInFunc' (2 padding bytes, where 0 is optimal)}}
+struct SmallArrayInFunc {
+  char c1;
+  short s;
+  char c2;
+};
+
+void arrayHolder() {
+  struct SmallArrayInFunc Arr[15];
+}
+
+// expected-warning@+1{{Excessive padding in 'class VirtualIntSandwich' (10 padding bytes, where 2 is optimal)}}
+class VirtualIntSandwich {
+  virtual void foo() {}
+  char c1;
+  int i;
+  char c2;
+};
+
+// constructed so as not to have tail padding
+// expected-warning@+1{{Excessive padding in 'class InnerPaddedB' (6 padding bytes, where 2 is optimal)}}
+class InnerPaddedB {
+  char c1;
+  int i1;
+  char c2;
+  int i2;
+};
+
+class Empty {}; // no-warning
+
+// expected-warning@+1{{Excessive padding in 'class LotsOfSpace' (6 padding bytes, where 2 is optimal)}}
+class LotsOfSpace {
+  Empty e1;
+  int i;
+  Empty e2;
+};
+
+// expected-warning@+1{{Excessive padding in 'TypedefSandwich2' (6 padding bytes, where 2 is optimal)}}
+typedef struct {
+  char c1;
+  // expected-warning@+1{{Excessive padding in 'TypedefSandwich2::NestedTypedef' (6 padding bytes, where 2 is optimal)}}
+  typedef struct {
+    char c1;
+    int i;
+    char c2;
+  } NestedTypedef;
+  NestedTypedef t;
+  char c2;
+} TypedefSandwich2;
+
+template <typename T>
+struct Foo {
+  // expected-warning@+1{{Excessive padding in 'struct Foo<int>::Nested' (6 padding bytes, where 2 is optimal)}}
+  struct Nested {
+    char c1;
+    T t;
+    char c2;
+  };
+};
+
+struct Holder { // no-warning
+  Foo<int>::Nested t1;
+  Foo<char>::Nested t2;
+};
diff --git a/test/Analysis/plist-macros.cpp b/test/Analysis/plist-macros.cpp
index 64cef2534fba..09ad15e5a570 100644
--- a/test/Analysis/plist-macros.cpp
+++ b/test/Analysis/plist-macros.cpp
@@ -221,9 +221,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Bad deallocator</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.MismatchedDeallocator</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>81d63a132b4ef0cc96734c17440a1c26</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>noteOnMacro</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>16</integer>
@@ -316,9 +318,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Memory Error</string>
 // CHECK-NEXT:    <key>type</key><string>Memory leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.Malloc</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2d723d9fb4e5f10dd54a7396866dfce4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>macroIsFirstInFunction</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>21</integer>
@@ -562,9 +566,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3570104919aab3f953357f8c87b68bd7</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>macroInExpression</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>30</integer>
@@ -808,9 +814,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>deff163b13f3fd860bc332498b0dbc59</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>macroInExpressionNoNote</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>39</integer>
@@ -1020,9 +1028,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b67b58d85375752578e947b77c6aa70c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>macroWithArgInExpression</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>47</integer>
@@ -1164,9 +1174,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b53410f6c0d3d07e62ce5c6f91a1810a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>useMultiNoteMacroWithError</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>56</integer>
@@ -1371,9 +1383,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>41f58f9549aa1867e461a7996a8d335c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>useMultiNote</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>70</integer>
@@ -1592,9 +1606,11 @@ void test2(int *p) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2dc47a4afc2f349f12217b38588769f4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>null_deref</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>78</integer>
diff --git a/test/Analysis/plist-output-alternate.m b/test/Analysis/plist-output-alternate.m
index 45f0feb9ef18..164f1c5991a2 100644
--- a/test/Analysis/plist-output-alternate.m
+++ b/test/Analysis/plist-output-alternate.m
@@ -159,9 +159,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dc9c2a657ca759f9744cde2e093cfd59</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_init</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>6</integer>
@@ -303,9 +305,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8f95d9681490a4e52c167969d0957b39</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>12</integer>
@@ -510,9 +514,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>94c43fbcad6aaff4ee7433f2d2db0bbe</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>19</integer>
@@ -688,9 +694,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>659f01507ffd7efd3ca3eab7179fd7d2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>24</integer>
@@ -895,9 +903,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c90d51e62139e614b57aff7021240a82</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>31</integer>
@@ -1073,9 +1083,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>245408d2bc416e324064d990e6dd82a8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_field</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>38</integer>
@@ -1367,9 +1379,11 @@ void rdar8331641(int x) {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>29a10ca4af622b6146ca082e49d919d6</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar8331641</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>58</integer>
diff --git a/test/Analysis/plist-output.m b/test/Analysis/plist-output.m
index 4b4b80098966..f885c72ad4ff 100644
--- a/test/Analysis/plist-output.m
+++ b/test/Analysis/plist-output.m
@@ -296,9 +296,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dc9c2a657ca759f9744cde2e093cfd59</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_init</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>6</integer>
@@ -440,9 +442,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8f95d9681490a4e52c167969d0957b39</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>12</integer>
@@ -647,9 +651,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>94c43fbcad6aaff4ee7433f2d2db0bbe</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_assign_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>19</integer>
@@ -825,9 +831,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>659f01507ffd7efd3ca3eab7179fd7d2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>24</integer>
@@ -1032,9 +1040,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c90d51e62139e614b57aff7021240a82</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_cond_transitive</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>31</integer>
@@ -1210,9 +1220,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>245408d2bc416e324064d990e6dd82a8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_null_field</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>38</integer>
@@ -1514,9 +1526,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b12acffa40177b55b695aa2292533410</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_assumptions</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>50</integer>
@@ -1784,9 +1798,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c0a32b8291b0fc7230f847f05f415625</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_cond_assign</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>58</integer>
@@ -1928,9 +1944,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>ea1d5db6b4c380a432c88139fdd18f42</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>77</integer>
@@ -1987,9 +2005,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead initialization</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3dd1ebf91fa4e92eeec82faaeb48beda</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>86</integer>
@@ -2150,9 +2170,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f533db5cbb9c20d171f9f92105789dc4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>88</integer>
@@ -2459,9 +2481,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>08f9a01186cb2f1b78b08ec20260f1c1</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>rdar12280665</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>98</integer>
@@ -2763,9 +2787,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>93a53e4c8f02d191b07477940ddcf89c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_for</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>111</integer>
@@ -3067,9 +3093,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f2d2dbf579b0b21a6b68726df6a041fc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_while</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>121</integer>
@@ -3444,9 +3472,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>95c7d39fb9d0d8c172b894e02855a07c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>radar12322528_foo_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>130</integer>
@@ -3884,9 +3914,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4cbbc713c8267513fef8a33f1327d7ca</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>136</integer>
@@ -4358,9 +4390,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a5aa7cf7ce6ba6683bebfff63df926fd</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics_2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>145</integer>
@@ -4832,9 +4866,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3917bac4f8fdce1f6c6393b1f14a1320</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_diagnostics_3</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>7</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>7</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>155</integer>
@@ -4879,9 +4915,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Dead store</string>
 // CHECK-NEXT:    <key>type</key><string>Dead increment</string>
 // CHECK-NEXT:    <key>check_name</key><string>deadcode.DeadStores</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c94f341ebaf0fac5d6703aaa7e6bee9b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_fast_enumeration</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>163</integer>
@@ -5085,10 +5123,12 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>description</key><string>The left expression of the compound assignment is an uninitialized value. The computed value will also be garbage</string>
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Assigned value is garbage or undefined</string>
-// CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.Assign</string> 
+// CHECK-NEXT:    <key>check_name</key><string>core.uninitialized.Assign</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>21c774309bdfd487c3d09a61a671bbcc</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_loop_fast_enumeration</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>163</integer>
@@ -5196,9 +5236,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b21eba78798501dc6b716bb91e3f7f01</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>172</integer>
@@ -5306,9 +5348,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>API</string>
 // CHECK-NEXT:    <key>type</key><string>Argument with &apos;nonnull&apos; attribute passed null</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NonNullParamChecker</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>c0b359a043c633f1b8d1581f68743361</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>RDar13295437</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>184</integer>
@@ -5547,9 +5591,11 @@ int testFoo(Foo *x) {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a225fec9dc1e56142b68b3df82b00b6c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testFoo</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>194</integer>
diff --git a/test/Analysis/pr22954.c b/test/Analysis/pr22954.c
new file mode 100644
index 000000000000..01aa5b34a7eb
--- /dev/null
+++ b/test/Analysis/pr22954.c
@@ -0,0 +1,916 @@
+// Given code 'struct aa { char s1[4]; char * s2;} a; memcpy(a.s1, ...);',
+// this test checks that the CStringChecker only invalidates the destination buffer array a.s1 (instead of a.s1 and a.s2).
+// At the moment the whole of the destination array content is invalidated.
+// If a.s1 region has a symbolic offset, the whole region of 'a' is invalidated.
+// Specific triple set to test structures of size 0.
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -analyze -analyzer-checker=core,unix.Malloc,debug.ExprInspection -analyzer-store=region -verify %s
+
+typedef __typeof(sizeof(int)) size_t;
+
+char *strdup(const char *s);
+void free(void *);
+void *memcpy(void *dst, const void *src, size_t n); // expected-note{{passing argument to parameter 'dst' here}}
+void *malloc(size_t n);
+
+void clang_analyzer_eval(int);
+
+struct aa {
+    char s1[4];
+    char *s2;
+};
+
+// Test different types of structure initialisation.
+int f0() {
+  struct aa a0 = {{1, 2, 3, 4}, 0};
+  a0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a0.s1, input, 4);
+  clang_analyzer_eval(a0.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a0.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a0.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a0.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(a0.s2); // no warning
+  return 0;
+}
+
+int f1() {
+  struct aa a1;
+  a1.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a1.s1, input, 4);
+  clang_analyzer_eval(a1.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a1.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a1.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a1.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a1.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(a1.s2); // no warning
+  return 0;
+}
+
+int f2() {
+  struct aa a2 = {{1, 2}};
+  a2.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a2.s1, input, 4);
+  clang_analyzer_eval(a2.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a2.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a2.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a2.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a2.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(a2.s2); // no warning
+  return 0;
+}
+
+int f3() {
+  struct aa a3 = {{1, 2, 3, 4}, 0};
+  a3.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  int * dest = (int*)a3.s1;
+  memcpy(dest, input, 4);
+  clang_analyzer_eval(a3.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a3.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a3.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a3.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a3.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(a3.s2); // no warning
+  return 0;
+}
+
+struct bb {
+  struct aa a;
+  char * s2;
+};
+
+int f4() {
+  struct bb b0 = {{1, 2, 3, 4}, 0};
+  b0.s2 = strdup("hello");
+  b0.a.s2 = strdup("hola");
+  char input[] = {'a', 'b', 'c', 'd'};
+  char * dest = (char*)(b0.a.s1);
+  memcpy(dest, input, 4);
+  clang_analyzer_eval(b0.a.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(b0.a.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(b0.a.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(b0.a.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(dest[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(b0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(b0.a.s2); // no warning
+  free(b0.s2); // no warning
+  return 0;
+}
+
+// Test that memory leaks are caught.
+int f5() {
+  struct aa a0 = {{1, 2, 3, 4}, 0};
+  a0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a0.s1, input, 4);
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'a0.s2'}}
+}
+
+int f6() {
+  struct aa a1;
+  a1.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a1.s1, input, 4);
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'a1.s2'}}
+}
+
+int f7() {
+  struct aa a2 = {{1, 2}};
+  a2.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a2.s1, input, 4);
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'a2.s2'}}
+}
+
+int f8() {
+  struct aa a3 = {{1, 2, 3, 4}, 0};
+  a3.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  int * dest = (int*)a3.s1;
+  memcpy(dest, input, 4);
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'a3.s2'}}
+}
+
+int f9() {
+  struct bb b0 = {{1, 2, 3, 4}, 0};
+  b0.s2 = strdup("hello");
+  b0.a.s2 = strdup("hola");
+  char input[] = {'a', 'b', 'c', 'd'};
+  char * dest = (char*)(b0.a.s1);
+  memcpy(dest, input, 4);
+  free(b0.a.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.s2'}}
+  return 0;
+}
+
+int f10() {
+  struct bb b0 = {{1, 2, 3, 4}, 0};
+  b0.s2 = strdup("hello");
+  b0.a.s2 = strdup("hola");
+  char input[] = {'a', 'b', 'c', 'd'};
+  char * dest = (char*)(b0.a.s1);
+  memcpy(dest, input, 4);
+  free(b0.s2); // expected-warning{{Potential leak of memory pointed to by 'b0.a.s2'}}
+  return 0;
+}
+
+// Test invalidating fields being addresses of array.
+struct cc {
+  char * s1;
+  char * s2;
+};
+
+int f11() {
+  char x[4] = {1, 2};
+  x[0] = 1;
+  x[1] = 2;
+  struct cc c0;
+  c0.s2 = strdup("hello");
+  c0.s1 = &x[0];
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(c0.s1, input, 4);
+  clang_analyzer_eval(x[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(x[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(c0.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(c0.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(c0.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(c0.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  free(c0.s2); // no-warning
+  return 0;
+}
+
+// Test inverting field position between s1 and s2.
+struct dd {
+  char *s2;
+  char s1[4];
+};
+
+int f12() {
+  struct dd d0 = {0, {1, 2, 3, 4}};
+  d0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(d0.s1, input, 4);
+  clang_analyzer_eval(d0.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d0.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d0.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d0.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(d0.s2); // no warning
+  return 0;
+}
+
+// Test arrays of structs.
+struct ee {
+  int a;
+  char b;
+};
+
+struct EE {
+  struct ee s1[2];
+  char * s2;
+};
+
+int f13() {
+  struct EE E0 = {{{1, 2}, {3, 4}}, 0};
+  E0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(E0.s1, input, 4);
+  clang_analyzer_eval(E0.s1[0].a == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(E0.s1[0].b == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(E0.s1[1].a == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(E0.s1[1].b == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(E0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(E0.s2); // no warning
+  return 0;
+}
+
+// Test global parameters.
+struct aa a15 = {{1, 2, 3, 4}, 0};
+
+int f15() {
+  a15.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a15.s1, input, 4);
+  clang_analyzer_eval(a15.s1[0] == 'a'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a15.s1[1] == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a15.s1[2] == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a15.s1[3] == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a15.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(a15.s2); // no warning
+  return 0;
+}
+
+// Test array of 0 sized elements.
+struct empty {};
+struct gg {
+  struct empty s1[4];
+  char * s2;
+};
+
+int f16() {
+  struct gg g0 = {{}, 0};
+  g0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(g0.s1, input, 4);
+  clang_analyzer_eval(*(int*)(&g0.s1[0]) == 'a'); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'g0.s2'}}
+  clang_analyzer_eval(*(int*)(&g0.s1[1]) == 'b'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(*(int*)(&g0.s1[2]) == 'c'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(*(int*)(&g0.s1[3]) == 'd'); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(g0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(g0.s2); // no warning
+  return 0;
+}
+
+// Test array of 0 elements.
+struct hh {
+  char s1[0];
+  char * s2;
+};
+
+int f17() {
+  struct hh h0;
+  h0.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(h0.s1, input, 4);
+  clang_analyzer_eval(h0.s1[0] == 'a'); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'h0.s2'}}
+  clang_analyzer_eval(h0.s2 == 0); // expected-warning{{UNKNOWN}}
+  free(h0.s2); // no warning
+  return 0;
+}
+
+// Test writing past the array.
+struct ii {
+  char s1[4];
+  int i;
+  int j;
+  char * s2;
+};
+
+int f18() {
+  struct ii i18 = {{1, 2, 3, 4}, 5, 6};
+  i18.i = 10;
+  i18.j = 11;
+  i18.s2 = strdup("hello");
+  char input[100] = {3};
+  memcpy(i18.s1, input, 100);
+  clang_analyzer_eval(i18.s1[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'i18.s2'}}
+  clang_analyzer_eval(i18.s1[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i18.s1[2] == 3); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i18.s1[3] == 4); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i18.i == 10); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i18.j == 11); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+int f181() {
+  struct ii i181 = {{1, 2, 3, 4}, 5, 6};
+  i181.i = 10;
+  i181.j = 11;
+  i181.s2 = strdup("hello");
+  char input[100] = {3};
+  memcpy(i181.s1, input, 5); // invalidate the whole region of i181
+  clang_analyzer_eval(i181.s1[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'i181.s2'}}
+  clang_analyzer_eval(i181.s1[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i181.s1[2] == 3); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i181.s1[3] == 4); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i181.i == 10); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(i181.j == 11); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+// Test array with a symbolic offset.
+struct jj {
+  char s1[2];
+  char * s2;
+};
+
+struct JJ {
+  struct jj s1[3];
+  char * s2;
+};
+
+int f19(int i) {
+  struct JJ J0 = {{{1, 2, 0}, {3, 4, 0}, {5, 6, 0}}, 0};
+  J0.s2 = strdup("hello");
+  J0.s1[0].s2 = strdup("hello");
+  J0.s1[1].s2 = strdup("hi");
+  J0.s1[2].s2 = strdup("world");
+  char input[2] = {'a', 'b'};
+  memcpy(J0.s1[i].s1, input, 2);
+  clang_analyzer_eval(J0.s1[0].s1[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by field 's2'}}\
+  expected-warning{{Potential leak of memory pointed to by 'J0.s2'}}
+  clang_analyzer_eval(J0.s1[0].s1[1] == 2); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[1].s1[0] == 3); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[1].s1[1] == 4); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[2].s1[0] == 5); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[2].s1[1] == 6); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[i].s1[0] == 5); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(J0.s1[i].s1[1] == 6); // expected-warning{{UNKNOWN}}
+  // FIXME: memory leak warning for J0.s2 should be emitted here instead of after memcpy call.
+  return 0; // no warning
+}
+
+// Test array with its super region having symbolic offseted regions.
+int f20(int i) {
+  struct aa * a20 = malloc(sizeof(struct aa) * 2);
+  a20[0].s1[0] = 1;
+  a20[0].s1[1] = 2;
+  a20[0].s1[2] = 3;
+  a20[0].s1[3] = 4;
+  a20[0].s2 = strdup("hello");
+  a20[1].s1[0] = 5;
+  a20[1].s1[1] = 6;
+  a20[1].s1[2] = 7;
+  a20[1].s1[3] = 8;
+  a20[1].s2 = strdup("world");
+  a20[i].s2 = strdup("hola");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a20[0].s1, input, 4);
+  clang_analyzer_eval(a20[0].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[0].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[0].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[0].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[0].s2 == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[1].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[1].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[1].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[1].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[1].s2 == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[i].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[i].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[i].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[i].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a20[i].s2 == 0); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'a20'}}
+
+  return 0;
+}
+
+// Test array's region and super region both having symbolic offsets.
+int f21(int i) {
+  struct aa * a21 = malloc(sizeof(struct aa) * 2);
+  a21[0].s1[0] = 1;
+  a21[0].s1[1] = 2;
+  a21[0].s1[2] = 3;
+  a21[0].s1[3] = 4;
+  a21[0].s2 = 0;
+  a21[1].s1[0] = 5;
+  a21[1].s1[1] = 6;
+  a21[1].s1[2] = 7;
+  a21[1].s1[3] = 8;
+  a21[1].s2 = 0;
+  a21[i].s2 = strdup("hello");
+  a21[i].s1[0] = 1;
+  a21[i].s1[1] = 2;
+  a21[i].s1[2] = 3;
+  a21[i].s1[3] = 4;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a21[i].s1, input, 4);
+  clang_analyzer_eval(a21[0].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[0].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[0].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[0].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[0].s2 == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[1].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[1].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[1].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[1].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[1].s2 == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[i].s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[i].s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[i].s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[i].s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a21[i].s2 == 0); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'a21'}}
+
+  return 0;
+}
+
+// Test regions aliasing other regions.
+struct ll {
+  char s1[4];
+  char * s2;
+};
+
+struct mm {
+  char s3[4];
+  char * s4;
+};
+
+int f24() {
+  struct ll l24 = {{1, 2, 3, 4}, 0};
+  struct mm * m24 = (struct mm *)&l24;
+  m24->s4 = strdup("hello");
+  char input[] = {1, 2, 3, 4};
+  memcpy(m24->s3, input, 4);
+  clang_analyzer_eval(m24->s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m24->s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m24->s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m24->s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l24.s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l24.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l24.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l24.s1[3] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by field 's4'}}
+  return 0;
+}
+
+// Test region with potential aliasing and symbolic offsets.
+// Store assumes no aliasing.
+int f25(int i, int j, struct ll * l, struct mm * m) {
+  m->s4 = strdup("hola"); // m->s4 not tracked
+  m->s3[0] = 1;
+  m->s3[1] = 2;
+  m->s3[2] = 3;
+  m->s3[3] = 4;
+  m->s3[j] = 5; // invalidates m->s3
+  l->s2 = strdup("hello"); // l->s2 not tracked
+  l->s1[0] = 6;
+  l->s1[1] = 7;
+  l->s1[2] = 8;
+  l->s1[3] = 9;
+  l->s1[i] = 10; // invalidates l->s1
+  char input[] = {1, 2, 3, 4};
+  memcpy(m->s3, input, 4); // does not invalidate l->s1[i]
+  clang_analyzer_eval(m->s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m->s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m->s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m->s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m->s3[i] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m->s3[j] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l->s1[i] == 1); // expected-warning{{FALSE}}
+  clang_analyzer_eval(l->s1[j] == 1); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+// Test size with symbolic size argument.
+int f26(int i) {
+  struct aa a26 = {{1, 2, 3, 4}, 0};
+  a26.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a26.s1, input, i); // i assumed in bound
+  clang_analyzer_eval(a26.s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a26.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a26.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a26.s1[3] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'a26.s2'}}
+  return 0;
+}
+
+// Test sizeof as a size argument.
+int f261() {
+  struct aa a261 = {{1, 2, 3, 4}, 0};
+  a261.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a261.s1, input, sizeof(a261.s1));
+  clang_analyzer_eval(a261.s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a261.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a261.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a261.s1[3] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'a261.s2'}}
+  return 0;
+}
+
+// Test negative size argument.
+int f262() {
+  struct aa a262 = {{1, 2, 3, 4}, 0};
+  a262.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(a262.s1, input, -1);
+  clang_analyzer_eval(a262.s1[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'a262.s2'}}
+  clang_analyzer_eval(a262.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a262.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(a262.s1[3] == 1); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+// Test size argument being an unknown value.
+struct xx {
+  char s1[4];
+  char * s2;
+};
+
+int f263(int n, char * len) {
+  struct xx x263 = {0};
+  x263.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(x263.s1, input, *(len + n));
+  clang_analyzer_eval(x263.s1[0] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(x263.s1[1] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(x263.s1[2] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(x263.s1[3] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(x263.s2 == 0); // expected-warning{{UNKNOWN}}
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'x263.s2'}}
+}
+
+
+// Test casting regions with symbolic offseted sub regions.
+int f27(int i) {
+  struct mm m27 = {{1, 2, 3, 4}, 0};
+  m27.s4 = strdup("hello");
+  m27.s3[i] = 5;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(((struct ll*)(&m27))->s1, input, 4);
+  clang_analyzer_eval(m27.s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m27.s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m27.s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m27.s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m27.s3[i] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'm27.s4'}}
+  return 0;
+}
+
+int f28(int i, int j, int k, int l) {
+  struct mm m28[2];
+  m28[i].s4 = strdup("hello");
+  m28[j].s3[k] = 1;
+  struct ll * l28 = (struct ll*)(&m28[1]);
+  l28->s1[l] = 2;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(l28->s1, input, 4);
+  clang_analyzer_eval(m28[0].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[0].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[0].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[0].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[1].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[1].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[1].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[1].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[i].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[i].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[i].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[i].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m28[j].s3[k] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(l28->s1[l] == 2); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+int f29(int i, int j, int k, int l, int m) {
+  struct mm m29[2];
+  m29[i].s4 = strdup("hello");
+  m29[j].s3[k] = 1;
+  struct ll * l29 = (struct ll*)(&m29[l]);
+  l29->s1[m] = 2;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(l29->s1, input, 4);
+  clang_analyzer_eval(m29[0].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[0].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[0].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[0].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[1].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[1].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[1].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[1].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[i].s3[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[i].s3[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[i].s3[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[i].s3[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(m29[j].s3[k] == 1); // expected-warning{{TRUE}}\
+  expected-warning{{Potential leak of memory pointed to by field 's4'}}
+  clang_analyzer_eval(l29->s1[m] == 2); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+// Test unions' fields.
+union uu {
+  char x;
+  char s1[4];
+};
+
+int f30() {
+  union uu u30 = { .s1 = {1, 2, 3, 4}};
+  char input[] = {1, 2, 3, 4};
+  memcpy(u30.s1, input, 4);
+  clang_analyzer_eval(u30.s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(u30.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(u30.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(u30.s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(u30.x == 1); // expected-warning{{UNKNOWN}}
+  return 0;
+}
+
+struct kk {
+  union uu u;
+  char * s2;
+};
+
+int f31() {
+  struct kk k31;
+  k31.s2 = strdup("hello");
+  k31.u.x = 1;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(k31.u.s1, input, 4);
+  clang_analyzer_eval(k31.u.s1[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'k31.s2'}}
+  clang_analyzer_eval(k31.u.s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(k31.u.s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(k31.u.s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(k31.u.x == 1); // expected-warning{{UNKNOWN}}
+  // FIXME: memory leak warning for k31.s2 should be emitted here.
+  return 0;
+}
+
+union vv {
+  int x;
+  char * s2;
+};
+
+int f32() {
+  union vv v32;
+  v32.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(v32.s2, input, 4);
+  clang_analyzer_eval(v32.s2[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(v32.s2[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(v32.s2[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(v32.s2[3] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{Potential leak of memory pointed to by 'v32.s2'}}
+  return 0;
+}
+
+struct nn {
+  int s1;
+  int i;
+  int j;
+  int k;
+  char * s2;
+};
+
+// Test bad types to dest buffer.
+int f33() {
+  struct nn n33 = {1, 2, 3, 4, 0};
+  n33.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(n33.s1, input, 4); // expected-warning{{incompatible integer to pointer conversion passing 'int' to parameter of type 'void *'}}
+  clang_analyzer_eval(n33.i == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(n33.j == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(n33.k == 4); // expected-warning{{TRUE}}
+  clang_analyzer_eval(((char*)(n33.s1))[0] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{cast to 'char *' from smaller integer type 'int'}}
+  clang_analyzer_eval(((char*)(n33.s1))[1] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{cast to 'char *' from smaller integer type 'int'}}
+  clang_analyzer_eval(((char*)(n33.s1))[2] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{cast to 'char *' from smaller integer type 'int'}}
+  clang_analyzer_eval(((char*)(n33.s1))[3] == 1); // expected-warning{{UNKNOWN}}\
+  expected-warning{{cast to 'char *' from smaller integer type 'int'}}
+  clang_analyzer_eval(n33.s2 == 0); //expected-warning{{UNKNOWN}}
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'n33.s2'}}
+}
+
+// Test destination buffer being an unknown value.
+struct ww {
+  int s1[4];
+  char s2;
+};
+
+int f34(struct ww * w34, int n) {
+  w34->s2 = 3;
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(w34->s1 + n, input , 4);
+  clang_analyzer_eval(w34->s1[0] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(w34->s1[1] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(w34->s1[2] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(w34->s1[3] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(w34->s1[n] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(w34->s2 == 3); // expected-warning{{TRUE}}
+  return 0;
+}
+
+// Test dest buffer as an element region with a symbolic index and size parameter as a symbolic value.
+struct yy {
+  char s1[4];
+  char * s2;
+};
+
+int f35(int i, int n) {
+  struct yy y35 = {{1, 2, 3, 4}, 0};
+  y35.s2 = strdup("hello");
+  char input[] = {'a', 'b', 'c', 'd'};
+  memcpy(&(y35.s1[i]), input, n);
+  clang_analyzer_eval(y35.s1[0] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(y35.s1[1] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(y35.s1[2] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(y35.s1[3] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(y35.s1[i] == 0); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(y35.s2 == 0); // expected-warning{{UNKNOWN}}
+  return 0; // expected-warning{{Potential leak of memory pointed to by 'y35.s2'}}
+}
+
+// Test regions with negative offsets.
+struct zz {
+  char s1[4];
+  int s2;
+};
+
+int f36(struct zz * z36) {
+
+  char input[] = {'a', 'b', 'c', 'd'};
+  z36->s1[0] = 0;
+  z36->s1[1] = 1;
+  z36->s1[2] = 2;
+  z36->s1[3] = 3;
+  z36->s2 = 10;
+
+  z36 = z36 - 1; // Decrement by 8 bytes (struct zz is 8 bytes).
+
+  z36->s1[0] = 4;
+  z36->s1[1] = 5;
+  z36->s1[2] = 6;
+  z36->s1[3] = 7;
+  z36->s2 = 11;
+
+  memcpy(z36->s1, input, 4);
+
+  clang_analyzer_eval(z36->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z36->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z36->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z36->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z36->s2 == 11); // expected-warning{{TRUE}}
+
+  z36 = z36 + 1; // Increment back.
+
+  clang_analyzer_eval(z36->s1[0] == 0); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z36->s1[1] == 1); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z36->s1[2] == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z36->s1[3] == 3); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z36->s2 == 10); // expected-warning{{TRUE}}
+
+  return 0;
+}
+
+int f37(struct zz * z37) {
+
+  char input[] = {'a', 'b', 'c', 'd'};
+  z37->s1[0] = 0;
+  z37->s1[1] = 1;
+  z37->s1[2] = 2;
+  z37->s1[3] = 3;
+  z37->s2 = 10;
+
+  z37 = (struct zz *)((char*)(z37) - 4); // Decrement by 4 bytes (struct zz is 8 bytes).
+
+  z37->s1[0] = 4;
+  z37->s1[1] = 5;
+  z37->s1[2] = 6;
+  z37->s1[3] = 7;
+  z37->s2 = 11;
+
+  memcpy(z37->s1, input, 4);
+
+  clang_analyzer_eval(z37->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s2 == 11); // expected-warning{{TRUE}}
+
+  z37 = (struct zz *)((char*)(z37) + 4); // Increment back.
+
+  clang_analyzer_eval(z37->s1[0] == 11); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z37->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z37->s2 == 10); // expected-warning{{TRUE}}
+
+  return 0;
+}
+
+int f38(struct zz * z38) {
+
+  char input[] = {'a', 'b', 'c', 'd'};
+  z38->s1[0] = 0;
+  z38->s1[1] = 1;
+  z38->s1[2] = 2;
+  z38->s1[3] = 3;
+  z38->s2 = 10;
+
+  z38 = (struct zz *)((char*)(z38) - 2); // Decrement by 2 bytes (struct zz is 8 bytes).
+
+  z38->s1[0] = 4;
+  z38->s1[1] = 5;
+  z38->s1[2] = 6;
+  z38->s1[3] = 7;
+  z38->s2 = 11;
+
+  memcpy(z38->s1, input, 4);
+
+  clang_analyzer_eval(z38->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s2 == 11); // expected-warning{{TRUE}}
+
+  z38 = (struct zz *)((char*)(z38) + 2); // Increment back.
+
+  clang_analyzer_eval(z38->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s1[2] == 11); // expected-warning{{TRUE}}
+  clang_analyzer_eval(z38->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(z38->s2 == 10); // expected-warning{{UNKNOWN}}
+
+  return 0;
+}
+
+// Test negative offsets with a different structure layout.
+struct z0 {
+  int s2;
+  char s1[4];
+};
+
+int f39(struct z0 * d39) {
+
+  char input[] = {'a', 'b', 'c', 'd'};
+  d39->s1[0] = 0;
+  d39->s1[1] = 1;
+  d39->s1[2] = 2;
+  d39->s1[3] = 3;
+  d39->s2 = 10;
+
+  d39 = (struct z0 *)((char*)(d39) - 2); // Decrement by 2 bytes (struct z0 is 8 bytes).
+
+  d39->s1[0] = 4;
+  d39->s1[1] = 5;
+  d39->s1[2] = 6;
+  d39->s1[3] = 7;
+  d39->s2 = 11;
+
+  memcpy(d39->s1, input, 4);
+
+  clang_analyzer_eval(d39->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s1[2] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s1[3] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s2 == 11); // expected-warning{{TRUE}}
+
+  d39 = (struct z0 *)((char*)(d39) + 2); // Increment back.
+
+  clang_analyzer_eval(d39->s1[0] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s1[1] == 1); // expected-warning{{UNKNOWN}}
+  clang_analyzer_eval(d39->s1[2] == 2); // expected-warning{{TRUE}}
+  clang_analyzer_eval(d39->s1[3] == 3); // expected-warning{{TRUE}}
+  // FIXME: d39->s2 should evaluate to at least UNKNOWN or FALSE,
+  // 'collectSubRegionBindings(...)' in RegionStore.cpp will need to
+  // handle a regions' upper boundary overflowing.
+  clang_analyzer_eval(d39->s2 == 10); // expected-warning{{TRUE}}
+
+  return 0;
+}
+
diff --git a/test/Analysis/ptr-arith.c b/test/Analysis/ptr-arith.c
index 96dc8bacbce3..57463cc7c871 100644
--- a/test/Analysis/ptr-arith.c
+++ b/test/Analysis/ptr-arith.c
@@ -296,3 +296,20 @@ void symbolicFieldRegion(struct Point *points, int i, int j) {
   clang_analyzer_eval(&points[i].x < &points[i].y);// expected-warning{{TRUE}}
 }
 
+void negativeIndex(char *str) {
+  *(str + 1) = 'a';
+  clang_analyzer_eval(*(str + 1) == 'a'); // expected-warning{{TRUE}}
+  clang_analyzer_eval(*(str - 1) == 'a'); // expected-warning{{UNKNOWN}}
+
+  char *ptr1 = str - 1;
+  clang_analyzer_eval(*ptr1 == 'a'); // expected-warning{{UNKNOWN}}
+
+  char *ptr2 = str;
+  ptr2 -= 1;
+  clang_analyzer_eval(*ptr2 == 'a'); // expected-warning{{UNKNOWN}}
+
+  char *ptr3 = str;
+  --ptr3;
+  clang_analyzer_eval(*ptr3 == 'a'); // expected-warning{{UNKNOWN}}
+}
+
diff --git a/test/Analysis/rdar-6540084.m b/test/Analysis/rdar-6540084.m
index e928710b387d..2119df5b9ef5 100644
--- a/test/Analysis/rdar-6540084.m
+++ b/test/Analysis/rdar-6540084.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.core -analyzer-checker=deadcode.DeadStores -verify %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,alpha.core -analyzer-checker=deadcode.DeadStores -verify %s
 //
 // This test exercises the live variables analysis (LiveVariables.cpp).
 // The case originally identified a non-termination bug.
diff --git a/test/Analysis/reinterpret-cast.cpp b/test/Analysis/reinterpret-cast.cpp
index cb7cbfd325d8..f3b0a7b257b6 100644
--- a/test/Analysis/reinterpret-cast.cpp
+++ b/test/Analysis/reinterpret-cast.cpp
@@ -102,4 +102,17 @@ int radar_13146953(void) {
   set_x1(x);
   set_x2((void *&)y);
   return *x + *y; // no warning
-}
\ No newline at end of file
+}
+
+namespace PR25426 {
+  struct Base {
+    int field;
+  };
+
+  struct Derived : Base { };
+
+  void foo(int &p) {
+    Derived &d = (Derived &)(p);
+    d.field = 2;
+  }
+}
diff --git a/test/Analysis/retain-release-gc-only.m b/test/Analysis/retain-release-gc-only.m
index 1a5ed34c9f39..26eb6e12be20 100644
--- a/test/Analysis/retain-release-gc-only.m
+++ b/test/Analysis/retain-release-gc-only.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.RetainCount,osx.cocoa.NSAutoreleasePool -analyzer-store=region -fobjc-gc-only -fblocks -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -analyze -analyzer-checker=core,osx.cocoa.RetainCount,osx.cocoa.NSAutoreleasePool -analyzer-store=region -fobjc-gc-only -fblocks -verify -Wno-objc-root-class %s
 
 //===----------------------------------------------------------------------===//
 // Header stuff.
diff --git a/test/Analysis/retain-release-path-notes-gc.m b/test/Analysis/retain-release-path-notes-gc.m
index aa783b56ef78..08e4151a71a8 100644
--- a/test/Analysis/retain-release-path-notes-gc.m
+++ b/test/Analysis/retain-release-path-notes-gc.m
@@ -209,9 +209,11 @@ void retainReleaseIgnored () {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of object when using garbage collection</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>487197d1f3d333a1fb4d7610b6d852df</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>creationViaCFCreate</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>44</integer>
@@ -653,9 +655,11 @@ void retainReleaseIgnored () {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of object when using garbage collection</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4f71073d5e7f2546564c1614dfc95420</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>makeCollectable</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>53</integer>
@@ -1022,9 +1026,11 @@ void retainReleaseIgnored () {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3d18c66bf99e8cd2938e8c63c345f6ea</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>retainReleaseIgnored</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>5</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>5</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>61</integer>
@@ -1207,9 +1213,11 @@ void retainReleaseIgnored () {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of returned object when using garbage collection</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d012b4dfd7d763f06cdb53f8b5708275</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>getViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>67</integer>
@@ -1392,9 +1400,11 @@ void retainReleaseIgnored () {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of returned object when using garbage collection</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b241509266f9d666b8335e0ee2f45adf</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>copyViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>72</integer>
diff --git a/test/Analysis/retain-release-path-notes.m b/test/Analysis/retain-release-path-notes.m
index 6b51305c862f..51104fb81464 100644
--- a/test/Analysis/retain-release-path-notes.m
+++ b/test/Analysis/retain-release-path-notes.m
@@ -465,9 +465,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d21e9660cc6434ef84a51f39ffcdce86</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>creationViaAlloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>48</integer>
@@ -609,9 +611,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>f8ec2601a04113e567aa1d09c9902c91</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>creationViaCFCreate</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>53</integer>
@@ -978,9 +982,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>dd26a8ad9a7a057feaa636974b43ccb0</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>acquisitionViaMethod</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>61</integer>
@@ -1197,9 +1203,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>2f2de5d7fe728958585598b619069e5a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>acquisitionViaProperty</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>67</integer>
@@ -1416,9 +1424,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1c02b65e83dad1b22270ff5a71de3118</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>acquisitionViaCFFunction</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>73</integer>
@@ -1635,9 +1645,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Use-after-release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>03c23f0f82d7f2fd880a22e0d9cf14b9</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>explicitDealloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>79</integer>
@@ -1854,9 +1866,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Use-after-release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>6f1b3f0c6c7f79f1af9b313273a01e92</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>implicitDealloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>85</integer>
@@ -2148,9 +2162,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>cb5e4205a8f925230a70715914a2e3d2</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>overAutorelease</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>92</integer>
@@ -2367,9 +2383,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1edd178e5ad76c79ce9812f519e8f467</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>autoreleaseUnowned</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>98</integer>
@@ -2661,9 +2679,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>3f08690fae9687c29bb23b7a7cb7995b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>makeCollectableIgnored</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>105</integer>
@@ -2846,9 +2866,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Method should return an owned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4b621ab5f8f2ef9240699119f4d874cb</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>CFCopyRuleViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>110</integer>
@@ -3031,9 +3053,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of returned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5248d2310322982d02e5f3d564249b4f</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>CFGetRuleViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>115</integer>
@@ -3216,9 +3240,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Method should return an owned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4f23ad2725fb68134cec8b8354cd295c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>copyViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>121</integer>
@@ -3401,9 +3427,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Method should return an owned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>da1dab126ed46b144040160ae8628460</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>copyViolationIndexedSubscript</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>126</integer>
@@ -3586,9 +3614,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Method should return an owned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>52877f9471b1ecdaf213b39016b84e52</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>copyViolationKeyedSubscript</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>131</integer>
@@ -3771,9 +3801,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak of returned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>cf8c65a18ad9982cb9848a266cd9c61b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>getViolation</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>136</integer>
@@ -3990,9 +4022,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Method should return an owned object</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>e7b798151545b45a994592df0d27d250</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>copyAutorelease</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>142</integer>
@@ -4134,9 +4168,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4e0c810e2b301aca3f636ad7e3d6b0b8</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testNumericLiteral</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>171</integer>
@@ -4278,9 +4314,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1d054002016aa4360aaf23a4c4d8fbb7</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testBoxedInt</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>176</integer>
@@ -4422,9 +4460,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>67ca92144b05322ee4569aea88d08595</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testBoxedString</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>181</integer>
@@ -4566,9 +4606,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>32fcec71872b8f62d8d7b1b05284b0fe</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testArray</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>186</integer>
@@ -4710,9 +4752,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Bad release</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d9584825bb1e62066879949e3ade8570</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>testDictionary</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>191</integer>
@@ -5091,9 +5135,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>eef2aef4b58abf21fcfa4bbf69e19c02</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>226</integer>
@@ -5443,9 +5489,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Leak</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8c27524f691296551f9e52856b824326</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>Objective-C method</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>8</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>8</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>237</integer>
@@ -5737,9 +5785,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4fc36e73ba317d307dc9cc4b3d62fd0a</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>CFOverAutorelease</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>250</integer>
@@ -5956,9 +6006,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>08e6a3931d34cda45c09dfda76976e17</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>CFAutoreleaseUnowned</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>3</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>3</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>256</integer>
@@ -6250,9 +6302,11 @@ void CFAutoreleaseUnownedMixed() {
 // CHECK-NEXT:    <key>category</key><string>Memory (Core Foundation/Objective-C)</string>
 // CHECK-NEXT:    <key>type</key><string>Object autoreleased too many times</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.cocoa.RetainCount</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>d9bb23a5435fe15df9d7ffdc27a8a072</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>CFAutoreleaseUnownedMixed</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>4</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>4</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>263</integer>
diff --git a/test/Analysis/retain-release-region-store.m b/test/Analysis/retain-release-region-store.m
index 917381341a4b..3f83fb582834 100644
--- a/test/Analysis/retain-release-region-store.m
+++ b/test/Analysis/retain-release-region-store.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -analyze -analyzer-checker=core,osx.cocoa.RetainCount -analyzer-store=region -analyzer-max-loop 6 -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -analyze -analyzer-checker=core,osx.cocoa.RetainCount -analyzer-store=region -analyzer-max-loop 6 -verify %s
 
 //===----------------------------------------------------------------------===//
 // The following code is reduced using delta-debugging from
diff --git a/test/Analysis/return-ptr-range.cpp b/test/Analysis/return-ptr-range.cpp
new file mode 100644
index 000000000000..0cc17b05042b
--- /dev/null
+++ b/test/Analysis/return-ptr-range.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.security.ReturnPtrRange -verify %s
+
+int arr[10];
+int *ptr;
+
+int conjure_index();
+
+int *test_element_index_lifetime() {
+  do {
+    int x = conjure_index();
+    ptr = arr + x;
+    if (x != 20)
+      return arr; // no-warning
+  } while (0);
+  return ptr; // expected-warning{{Returned pointer value points outside the original object (potential buffer overflow)}}
+}
+
+int *test_element_index_lifetime_with_local_ptr() {
+  int *local_ptr;
+  do {
+    int x = conjure_index();
+    local_ptr = arr + x;
+    if (x != 20)
+      return arr; // no-warning
+  } while (0);
+  return local_ptr; // expected-warning{{Returned pointer value points outside the original object (potential buffer overflow)}}
+}
diff --git a/test/Analysis/superclass.m b/test/Analysis/superclass.m
index d5d3c4764073..3102d1f35a7c 100644
--- a/test/Analysis/superclass.m
+++ b/test/Analysis/superclass.m
@@ -30,7 +30,7 @@ typedef enum UIViewAnimationOptions {
 - (void)didReceiveMemoryWarning;
 - (void)removeFromParentViewController;
 - (void)transitionFromViewController:(UIViewController *)fromViewController
-  toViewController:(UIViewController *)toViewController 
+  toViewController:(UIViewController *)toViewController
   duration:(NSTimeInterval)duration options:(UIViewAnimationOptions)options
   animations:(void (^)(void))animations
   completion:(void (^)(BOOL finished))completion;
@@ -69,6 +69,25 @@ typedef enum UIViewAnimationOptions {
 - (void)encodeRestorableStateWithCoder:(NSCoder *)coder {}
 @end
 
+// Do not warn for the implementation in the superclass itself.
+@implementation UIViewController
+- (void)addChildViewController:(UIViewController *)childController {}
+- (void)viewDidAppear:(BOOL)animated {}
+- (void)viewDidDisappear:(BOOL)animated {}
+- (void)viewDidUnload {}
+- (void)viewDidLoad {}
+- (void)viewWillUnload {}
+- (void)viewWillAppear:(BOOL)animated {}
+- (void)viewWillDisappear:(BOOL)animated {}
+- (void)didReceiveMemoryWarning {}
+- (void)removeFromParentViewController {}
+- (void)transitionFromViewController:(UIViewController *)fromViewController
+  toViewController:(UIViewController *)toViewController
+  duration:(NSTimeInterval)duration options:(UIViewAnimationOptions)options
+  animations:(void (^)(void))animations
+  completion:(void (^)(BOOL finished))completion {}
+@end
+
 // Warn if UIViewController is our superclass and we do not call super
 @interface TestB : UIViewController {}
 @end
diff --git a/test/Analysis/switch-case.c b/test/Analysis/switch-case.c
new file mode 100644
index 000000000000..08a61a07be26
--- /dev/null
+++ b/test/Analysis/switch-case.c
@@ -0,0 +1,220 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,debug.ExprInspection -verify %s
+
+void clang_analyzer_eval(int);
+void clang_analyzer_warnIfReached();
+
+#define INT_MIN 0x80000000
+#define INT_MAX 0x7fffffff
+
+// PR16833: Analyzer consumes memory until killed by kernel OOM killer
+// while analyzing large case ranges.
+void PR16833(unsigned op) {
+  switch (op) {
+  case 0x02 << 26 ... 0x03 << 26: // Analyzer should not hang here.
+    return;
+  }
+}
+
+void testAdjustment(int t) {
+  switch (t + 1) {
+  case 2:
+    clang_analyzer_eval(t == 1); // expected-warning{{TRUE}}
+    break;
+  case 3 ... 10:
+    clang_analyzer_eval(t > 1);        // expected-warning{{TRUE}}
+    clang_analyzer_eval(t + 2 <= 11);  // expected-warning{{TRUE}}
+    clang_analyzer_eval(t > 2);        // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(t + 1 == 3);   // expected-warning{{UNKNOWN}}
+    clang_analyzer_eval(t + 1 == 10);  // expected-warning{{UNKNOWN}}
+    break;
+  default:
+    clang_analyzer_warnIfReached();    // expected-warning{{REACHABLE}}
+  }
+}
+
+void testUnknownVal(int value, int mask) {
+  // Once ConstraintManager will process '&' and this test will require some changes.
+  switch (value & mask) {
+    case 1:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    case 3 ... 10:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+  }
+}
+
+void testSwitchCond(int arg) {
+  if (arg > 10) {
+    switch (arg) {
+    case INT_MIN ... 10:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 11 ... 20:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+    }
+
+    switch (arg) {
+    case INT_MIN ... 9:
+      clang_analyzer_warnIfReached();  // no-warning
+      break;
+    case 10 ... 20:
+      clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+      clang_analyzer_eval(arg > 10);   // expected-warning{{TRUE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+    }
+  } // arg > 10
+}
+
+void testDefaultUnreachable(int arg) {
+  if (arg > 10) {
+    switch (arg) {
+    case INT_MIN ... 9:
+      clang_analyzer_warnIfReached();   // no-warning
+      break;
+    case 10 ... INT_MAX:
+      clang_analyzer_warnIfReached();   // expected-warning{{REACHABLE}}
+      clang_analyzer_eval(arg > 10);    // expected-warning{{TRUE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached();   // no-warning
+    }
+  }
+}
+
+void testBranchReachability(int arg) {
+  if (arg > 10 && arg < 20) {
+    switch (arg) {
+    case INT_MIN ... 4:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 5 ... 9:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 10 ... 15:
+      clang_analyzer_warnIfReached();              // expected-warning{{REACHABLE}}
+      clang_analyzer_eval(arg > 10 && arg <= 15);  // expected-warning{{TRUE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 17 ... 25:
+      clang_analyzer_warnIfReached();              // expected-warning{{REACHABLE}}
+      clang_analyzer_eval(arg >= 17 && arg < 20);  // expected-warning{{TRUE}}
+      break;
+    case 26 ... INT_MAX:
+      clang_analyzer_warnIfReached();   // no-warning
+      break;
+    case 16:
+      clang_analyzer_warnIfReached();   // expected-warning{{REACHABLE}}
+      clang_analyzer_eval(arg == 16);   // expected-warning{{TRUE}}
+      break;
+    }
+  }
+}
+
+void testDefaultBranchRange(int arg) {
+  switch (arg) {
+  case INT_MIN ... 9:
+    clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+    break;
+  case 20 ... INT_MAX:
+    clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+    clang_analyzer_eval(arg >= 20);  // expected-warning{{TRUE}}
+    break;
+  default:
+    clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+    clang_analyzer_eval(arg == 16);  // expected-warning{{FALSE}}
+    clang_analyzer_eval(arg > 9);    // expected-warning{{TRUE}}
+    clang_analyzer_eval(arg <= 20);  // expected-warning{{TRUE}}
+
+  case 16:
+    clang_analyzer_warnIfReached();  // expected-warning{{REACHABLE}}
+  }
+}
+
+void testAllUnreachableButDefault(int arg) {
+  if (arg < 0) {
+    switch (arg) {
+    case 0 ... 9:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 20 ... INT_MAX:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    case 16:
+      clang_analyzer_warnIfReached(); // no-warning
+    }
+    clang_analyzer_warnIfReached();   // expected-warning{{REACHABLE}}
+  }
+}
+
+void testAllUnreachable(int arg) {
+  if (arg < 0) {
+    switch (arg) {
+    case 0 ... 9:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 20 ... INT_MAX:
+      clang_analyzer_warnIfReached(); // no-warning
+      break;
+    case 16:
+      clang_analyzer_warnIfReached(); // no-warning
+    }
+    clang_analyzer_warnIfReached();   // expected-warning{{REACHABLE}}
+  }
+}
+
+void testDifferentTypes(int arg) {
+  switch (arg) {
+  case -1U ... 400000000LL:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+  }
+}
+
+void testDifferentTypes2(unsigned long arg) {
+  switch (arg) {
+  case 1UL ... 400000000UL:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+  }
+}
+
+void testDifferentTypes3(int arg) {
+  switch (arg) {
+  case 1UL ... 400000000UL:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+    default:
+      clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+      break;
+  }
+}
+
+void testConstant() {
+  switch (3) {
+  case 1 ... 5:
+    clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}}
+    break;
+  default:
+    clang_analyzer_warnIfReached(); // no-warning
+    break;
+  }
+}
diff --git a/test/Analysis/symbol-reaper.c b/test/Analysis/symbol-reaper.c
new file mode 100644
index 000000000000..4051c38c2e77
--- /dev/null
+++ b/test/Analysis/symbol-reaper.c
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=debug.ExprInspection -verify %s
+
+void clang_analyzer_eval(int);
+void clang_analyzer_warnOnDeadSymbol(int);
+
+int conjure_index();
+
+void test_that_expr_inspection_works() {
+  do {
+    int x = conjure_index();
+    clang_analyzer_warnOnDeadSymbol(x);
+  } while(0); // expected-warning{{SYMBOL DEAD}}
+}
+
+// These tests verify the reaping of symbols that are only referenced as
+// index values in element regions. Most of the time, depending on where
+// the element region, as Loc value, is stored, it is possible to
+// recover the index symbol in checker code, which is also demonstrated
+// in the return_ptr_range.c test file.
+
+int arr[3];
+
+int *test_element_index_lifetime_in_environment_values() {
+  int *ptr;
+  do {
+    int x = conjure_index();
+    clang_analyzer_warnOnDeadSymbol(x);
+    ptr = arr + x;
+  } while (0);
+  return ptr;
+}
+
+void test_element_index_lifetime_in_store_keys() {
+  do {
+    int x = conjure_index();
+    clang_analyzer_warnOnDeadSymbol(x);
+    arr[x] = 1;
+    if (x) {}
+  } while (0); // no-warning
+}
+
+int *ptr;
+void test_element_index_lifetime_in_store_values() {
+  do {
+    int x = conjure_index();
+    clang_analyzer_warnOnDeadSymbol(x);
+    ptr = arr + x;
+  } while (0); // no-warning
+}
+
+struct S1 {
+  int field;
+};
+struct S2 {
+  struct S1 array[5];
+} s2;
+
+void test_element_index_lifetime_with_complicated_hierarchy_of_regions() {
+  do {
+    int x = conjure_index();
+    clang_analyzer_warnOnDeadSymbol(x);
+    s2.array[x].field = 1;
+    if (x) {}
+  } while (0); // no-warning
+}
+
+// Test below checks lifetime of SymbolRegionValue in certain conditions.
+
+int **ptrptr;
+void test_region_lifetime_as_store_value(int *x) {
+  clang_analyzer_warnOnDeadSymbol((int) x);
+  *x = 1;
+  ptrptr = &x;
+  (void)0; // No-op; make sure the environment forgets things and the GC runs.
+  clang_analyzer_eval(**ptrptr); // expected-warning{{TRUE}}
+} // no-warning
diff --git a/test/Analysis/temp-obj-dtors-cfg-output.cpp b/test/Analysis/temp-obj-dtors-cfg-output.cpp
index 491c68e1bc8b..dc10e875d361 100644
--- a/test/Analysis/temp-obj-dtors-cfg-output.cpp
+++ b/test/Analysis/temp-obj-dtors-cfg-output.cpp
@@ -1,6 +1,8 @@
 // RUN: rm -f %t
-// RUN: %clang_cc1 -analyze -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true %s > %t 2>&1
-// RUN: FileCheck --input-file=%t %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true -std=c++98 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefix=CXX98 -check-prefix=CHECK %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=debug.DumpCFG -analyzer-config cfg-temporary-dtors=true -std=c++11 %s > %t 2>&1
+// RUN: FileCheck --input-file=%t -check-prefix=CXX11 -check-prefix=CHECK %s
 
 class A {
 public:
@@ -671,15 +673,23 @@ int testConsistencyNestedNormalReturn(bool value) {
 // CHECK:     Succs (1): B0
 // CHECK:   [B3]
 // CHECK:     1: D() (CXXConstructExpr, struct D)
-// CHECK:     2: [B3.1] (ImplicitCastExpr, NoOp, const struct D)
-// CHECK:     3: [B3.2]
-// CHECK:     4: [B3.3] (CXXConstructExpr, struct D)
-// CHECK:     5: D d = D();
-// CHECK:     6: d
-// CHECK:     7: [B3.6].operator bool
-// CHECK:     8: [B3.6]
-// CHECK:     9: [B3.8] (ImplicitCastExpr, UserDefinedConversion, _Bool)
-// CHECK:     T: if [B3.9]
+// CXX98:     2: [B3.1] (ImplicitCastExpr, NoOp, const struct D)
+// CXX98:     3: [B3.2]
+// CXX98:     4: [B3.3] (CXXConstructExpr, struct D)
+// CXX98:     5: D d = D();
+// CXX98:     6: d
+// CXX98:     7: [B3.6].operator bool
+// CXX98:     8: [B3.6]
+// CXX98:     9: [B3.8] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX98:     T: if [B3.9]
+// CXX11:     2: [B3.1]
+// CXX11:     3: [B3.2] (CXXConstructExpr, struct D)
+// CXX11:     4: D d = D();
+// CXX11:     5: d
+// CXX11:     6: [B3.5].operator bool
+// CXX11:     7: [B3.5]
+// CXX11:     8: [B3.7] (ImplicitCastExpr, UserDefinedConversion, _Bool)
+// CXX11:     T: if [B3.8]
 // CHECK:     Preds (1): B4
 // CHECK:     Succs (2): B2 B1
 // CHECK:   [B0 (EXIT)]
diff --git a/test/Analysis/temporaries.cpp b/test/Analysis/temporaries.cpp
index 6e476339cb77..e96e9b0e28d7 100644
--- a/test/Analysis/temporaries.cpp
+++ b/test/Analysis/temporaries.cpp
@@ -299,13 +299,7 @@ namespace destructors {
   void testRecursiveFramesStart() { testRecursiveFrames(false); }
 
   void testLambdas() {
-    // This is the test we would like to write:
-    // []() { check(NoReturnDtor()); } != nullptr || check(Dtor());
-    // But currently the analyzer stops when it encounters a lambda:
-    [] {};
-    // The CFG for this now looks correct, but we still do not reach the line
-    // below.
-    clang_analyzer_warnIfReached(); // FIXME: Should warn.
+    []() { check(NoReturnDtor()); } != nullptr || check(Dtor());
   }
 
   void testGnuExpressionStatements(int v) {
diff --git a/test/Analysis/ubigraph-viz.cpp b/test/Analysis/ubigraph-viz.cpp
new file mode 100644
index 000000000000..0cb9bd6d8929
--- /dev/null
+++ b/test/Analysis/ubigraph-viz.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,unix.API -analyzer-viz-egraph-ubigraph -verify %s
+// expected-no-diagnostics
+
+int f(int x) {
+  return x < 0 ? 0 : 42;
+}
+
diff --git a/test/Analysis/unix-fns.c b/test/Analysis/unix-fns.c
index d591e6b9bc2c..df9edc430f3f 100644
--- a/test/Analysis/unix-fns.c
+++ b/test/Analysis/unix-fns.c
@@ -407,9 +407,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Improper use of &apos;open&apos;</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5ef25c5c47db3dbfb329901d426452e7</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_open</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>6</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>6</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>55</integer>
@@ -556,9 +558,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>API Misuse (Apple)</string>
 // CHECK-NEXT:    <key>type</key><string>Improper use of &apos;dispatch_once&apos;</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>173fbcac3fc64dbaec32768d4cfda250</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_dispatch_once</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>62</integer>
@@ -637,9 +641,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Improper use of &apos;pthread_once&apos;</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1e2f657a65cf8ee3c3ac2227de05ade4</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_pthread_once</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>73</integer>
@@ -718,9 +724,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>0e841458f0cb7cf161d35f9db5862dcf</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>pr2899</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>82</integer>
@@ -799,9 +807,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>a267ff573c7e8b959a3f886677893eb0</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_calloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>94</integer>
@@ -880,9 +890,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>14eb72957baab3c63bac610a10e6f48b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_calloc2</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>100</integer>
@@ -961,9 +973,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>7f6f67ebe3d481aed7750005bea7e371</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_realloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>112</integer>
@@ -1042,9 +1056,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>4941698efbd81601653dff10ef9c645b</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_reallocf</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>118</integer>
@@ -1123,9 +1139,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>b7ca3488e81d9d9d4b8dc545258ce97c</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_alloca</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>136</integer>
@@ -1204,9 +1222,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>1ec52551362b070237f47f6bb6c3847d</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_builtin_alloca</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>148</integer>
@@ -1285,9 +1305,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Unix API</string>
 // CHECK-NEXT:    <key>type</key><string>Undefined allocation of 0 bytes (CERT MEM04-C; CWE-131)</string>
 // CHECK-NEXT:    <key>check_name</key><string>unix.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>675741e04c8d0071d280324e23f41d35</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_valloc</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>1</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>1</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>160</integer>
@@ -1366,9 +1388,11 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>API Misuse (Apple)</string>
 // CHECK-NEXT:    <key>type</key><string>Improper use of &apos;dispatch_once&apos;</string>
 // CHECK-NEXT:    <key>check_name</key><string>osx.API</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>8ded1f2025c1e4a4bcd5302dc97006d9</string>
 // CHECK-NEXT:   <key>issue_context_kind</key><string>function</string>
 // CHECK-NEXT:   <key>issue_context</key><string>test_dispatch_once_in_macro</string>
-// CHECK-NEXT:   <key>issue_hash</key><string>2</string>
+// CHECK-NEXT:   <key>issue_hash_function_offset</key><string>2</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>184</integer>
@@ -1776,6 +1800,8 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>5d3f4c433004c7a6d4a06aa30cc3ea85</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>192</integer>
@@ -2163,6 +2189,8 @@ void test_inline_dispatch_once() {
 // CHECK-NEXT:    <key>category</key><string>Logic error</string>
 // CHECK-NEXT:    <key>type</key><string>Dereference of null pointer</string>
 // CHECK-NEXT:    <key>check_name</key><string>core.NullDereference</string>
+// CHECK-NEXT:    <!-- This hash is experimental and going to change! -->
+// CHECK-NEXT:    <key>issue_hash_content_of_line_in_context</key><string>265c4fd608dafee211bfa93d21c28866</string>
 // CHECK-NEXT:   <key>location</key>
 // CHECK-NEXT:   <dict>
 // CHECK-NEXT:    <key>line</key><integer>202</integer>
diff --git a/test/Analysis/vfork.c b/test/Analysis/vfork.c
new file mode 100644
index 000000000000..c7a02fa65c44
--- /dev/null
+++ b/test/Analysis/vfork.c
@@ -0,0 +1,114 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,security.insecureAPI.vfork,unix.Vfork -verify %s
+// RUN: %clang_cc1 -analyze -analyzer-checker=core,security.insecureAPI.vfork,unix.Vfork -verify -x c++ %s
+
+#include "Inputs/system-header-simulator.h"
+
+void foo();
+
+// Ensure that child process is properly checked.
+int f1(int x) {
+  pid_t pid = vfork(); // expected-warning{{Call to function 'vfork' is insecure}}
+  if (pid != 0)
+    return 0;
+
+  switch (x) {
+  case 0:
+    // Ensure that modifying pid is ok.
+    pid = 1; // no-warning
+    // Ensure that calling whitelisted routines is ok.
+    execl("", "", 0); // no-warning
+    _exit(1); // no-warning
+    break;
+  case 1:
+    // Ensure that writing variables is prohibited.
+    x = 0; // expected-warning{{This assignment is prohibited after a successful vfork}}
+    break;
+  case 2:
+    // Ensure that calling functions is prohibited.
+    foo(); // expected-warning{{This function call is prohibited after a successful vfork}}
+    break;
+  default:
+    // Ensure that returning from function is prohibited.
+    return 0; // expected-warning{{Return is prohibited after a successful vfork; call _exit() instead}}
+  }
+
+  while(1);
+}
+
+// Same as previous but without explicit pid variable.
+int f2(int x) {
+  pid_t pid = vfork(); // expected-warning{{Call to function 'vfork' is insecure}}
+
+  switch (x) {
+  case 0:
+    // Ensure that writing pid is ok.
+    pid = 1; // no-warning
+    // Ensure that calling whitelisted routines is ok.
+    execl("", "", 0); // no-warning
+    _exit(1); // no-warning
+    break;
+  case 1:
+    // Ensure that writing variables is prohibited.
+    x = 0; // expected-warning{{This assignment is prohibited after a successful vfork}}
+    break;
+  case 2:
+    // Ensure that calling functions is prohibited.
+    foo(); // expected-warning{{This function call is prohibited after a successful vfork}}
+    break;
+  default:
+    // Ensure that returning from function is prohibited.
+    return 0; // expected-warning{{Return is prohibited after a successful vfork; call _exit() instead}}
+  }
+
+  while(1);
+}
+
+// Ensure that parent process isn't restricted.
+int f3(int x) {
+  if (vfork() == 0) // expected-warning{{Call to function 'vfork' is insecure}}
+    _exit(1);
+  x = 0; // no-warning
+  foo(); // no-warning
+  return 0;
+} // no-warning
+
+// Unbound pids are special so test them separately.
+void f4(int x) {
+  switch (x) {
+  case 0:
+    vfork(); // expected-warning{{Call to function 'vfork' is insecure}}
+    x = 0; // expected-warning{{This assignment is prohibited after a successful vfork}}
+    break;
+
+  case 1:
+    {
+      char args[2];
+      switch (vfork()) { // expected-warning{{Call to function 'vfork' is insecure}}
+      case 0:
+        args[0] = 0; // expected-warning{{This assignment is prohibited after a successful vfork}}
+        exit(1);
+      }
+      break;
+    }
+
+  case 2:
+    {
+      pid_t pid;
+      if ((pid = vfork()) == 0) // expected-warning{{Call to function 'vfork' is insecure}}
+        while(1); // no-warning
+      break;
+    }
+  }
+  while(1);
+} //no-warning
+
+
+void f5() {
+  // See "libxtables: move some code to avoid cautions in vfork man page"
+  // (http://lists.netfilter.org/pipermail/netfilter-buglog/2014-October/003280.html).
+  if (vfork() == 0) { // expected-warning{{Call to function 'vfork' is insecure}}
+    execl("prog", "arg1", 0); // no-warning
+    exit(1);  // expected-warning{{This function call is prohibited after a successful vfork}}
+  }
+}
+
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 526f129bab58..8dd64d1614ac 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -58,7 +58,16 @@ set(CLANG_TEST_PARAMS
 if( NOT CLANG_BUILT_STANDALONE )
   list(APPEND CLANG_TEST_DEPS
     llvm-config
-    llc opt FileCheck count not llvm-symbolizer llvm-profdata llvm-objdump
+    FileCheck count not
+    llc
+    llvm-bcanalyzer
+    llvm-lto
+    llvm-objdump
+    llvm-profdata
+    llvm-readobj
+    llvm-symbolizer
+    LTO
+    opt
     )
 endif()
 
diff --git a/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4.cpp b/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4.cpp
index 32dd75ad49a8..2292fc540c48 100644
--- a/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4.cpp
+++ b/test/CXX/basic/basic.lookup/basic.lookup.argdep/p4.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 namespace A {
   class A {
@@ -20,6 +22,9 @@ namespace D {
 
 namespace C {
   class C {}; // expected-note {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'B::B' to 'const C::C &' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+  // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'B::B' to 'C::C &&' for 1st argument}}
+#endif
   void func(C); // expected-note {{'C::func' declared here}} \
                 // expected-note {{passing argument to parameter here}}
   C operator+(C,C);
diff --git a/test/CXX/basic/basic.lookup/basic.lookup.qual/namespace.qual/p2.cpp b/test/CXX/basic/basic.lookup/basic.lookup.qual/namespace.qual/p2.cpp
index 7918e9f861e4..ed6c6c0bccf1 100644
--- a/test/CXX/basic/basic.lookup/basic.lookup.qual/namespace.qual/p2.cpp
+++ b/test/CXX/basic/basic.lookup/basic.lookup.qual/namespace.qual/p2.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 namespace Ints {
   int zero = 0; // expected-note {{candidate found by name lookup is 'Ints::zero'}}
@@ -31,7 +33,11 @@ void test() {
 }
 
 namespace Numbers {
-  struct Number {	// expected-note 2 {{candidate}}
+  struct Number { // expected-note 2 {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+  // expected-note@-2 2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
     explicit Number(double d) : d(d) {}
     double d;
   };
@@ -66,7 +72,11 @@ void test3() {
 
 namespace inline_ns {
   int x; // expected-note 2{{found}}
-  inline namespace A { // expected-warning {{C++11}}
+  inline namespace A {
+#if __cplusplus <= 199711L // C++03 or earlier
+  // expected-warning@-2 {{inline namespaces are a C++11 feature}}
+#endif
+
     int x; // expected-note 2{{found}}
     int y; // expected-note 2{{found}}
   }
diff --git a/test/CXX/basic/basic.scope/basic.scope.hiding/p2.cpp b/test/CXX/basic/basic.scope/basic.scope.hiding/p2.cpp
index 1d2b525da4c2..bf8df1abee4e 100644
--- a/test/CXX/basic/basic.scope/basic.scope.hiding/p2.cpp
+++ b/test/CXX/basic/basic.scope/basic.scope.hiding/p2.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // rdar4641403
 namespace N {
@@ -34,7 +36,10 @@ namespace PR17731 {
       struct S c = b;
     }
     {
-      struct S { S() {} }; // expected-note {{candidate}}
+      struct S { S() {} }; // expected-note {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+      // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
       int a = S(); // expected-error {{no viable conversion from 'S'}}
       struct S c = b; // expected-error {{no viable conversion from 'struct S'}}
     }
@@ -50,7 +55,10 @@ namespace PR17731 {
       struct S c = b;
     }
     {
-      struct S { S() {} }; // expected-note {{candidate}}
+      struct S { S() {} }; // expected-note {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+      // expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
       int a = S(); // expected-error {{no viable conversion from 'S'}}
       struct S c = b; // expected-error {{no viable conversion from 'struct S'}}
     }
diff --git a/test/CXX/basic/basic.start/basic.start.main/p3.cpp b/test/CXX/basic/basic.start/basic.start.main/p3.cpp
new file mode 100644
index 000000000000..f7085ca31d6c
--- /dev/null
+++ b/test/CXX/basic/basic.start/basic.start.main/p3.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST1
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST2
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST3
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST4
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++14 -DTEST5
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++14 -DTEST6
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST7
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST8
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST9
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST10 -ffreestanding
+
+#if TEST1
+int main; // expected-error{{main cannot be declared as global variable}}
+
+#elif TEST2
+// expected-no-diagnostics
+int f () {
+  int main;
+  return main;
+}
+
+#elif TEST3
+// expected-no-diagnostics
+void x(int main) {};
+int y(int main);
+
+#elif TEST4
+// expected-no-diagnostics
+class A {
+  static int main;
+};
+
+#elif TEST5
+// expected-no-diagnostics
+template<class T> constexpr T main;
+
+#elif TEST6
+extern template<class T> constexpr T main; //expected-error{{expected unqualified-id}}
+
+#elif TEST7
+// expected-no-diagnostics
+namespace foo {
+  int main;
+}
+
+#elif TEST8
+void z(void)
+{
+  extern int main;  // expected-error{{main cannot be declared as global variable}}
+}
+
+#elif TEST9
+// expected-no-diagnostics
+int q(void)
+{
+  static int main;
+  return main;
+}
+
+#elif TEST10
+// expected-no-diagnostics
+int main;
+
+#else
+#error Unknown Test
+#endif
diff --git a/test/CXX/class.access/class.friend/p1.cpp b/test/CXX/class.access/class.friend/p1.cpp
index 4a681629eeae..54069b6b87be 100644
--- a/test/CXX/class.access/class.friend/p1.cpp
+++ b/test/CXX/class.access/class.friend/p1.cpp
@@ -8,11 +8,12 @@
 //   friends members of the befriending class.
 
 struct S { static void f(); }; // expected-note 2 {{'S' declared here}}
-S* g() { return 0; } // expected-note 2 {{'g' declared here}}
+S* g() { return 0; }
 
 struct X {
   friend struct S;
-  friend S* g();
+  friend S* g(); // expected-note 2 {{'g' declared here}}
+  // FIXME: The above two notes would be better attached to line 11.
 };
 
 void test1() {
diff --git a/test/CXX/class.access/class.friend/p11.cpp b/test/CXX/class.access/class.friend/p11.cpp
index a5107fd20728..0d25c59c9b62 100644
--- a/test/CXX/class.access/class.friend/p11.cpp
+++ b/test/CXX/class.access/class.friend/p11.cpp
@@ -24,13 +24,12 @@ namespace test2 {
   void foo() { // expected-note {{'::test2::foo' declared here}}
     struct S1 {
       friend void foo(); // expected-error {{no matching function 'foo' found in local scope; did you mean '::test2::foo'?}}
-      // expected-note@-1{{'::test2::foo' declared here}}
-      // TODO: the above note should go on line 24
     };
 
     void foo(); // expected-note {{local declaration nearly matches}}
     struct S2 {
-      friend void foo();
+      friend void foo(); // expected-note{{'::test2::foo' declared here}}
+      // TODO: the above note should go on line 24
     };
 
     {
@@ -48,8 +47,8 @@ namespace test2 {
 
     struct S4 {
       friend void bar(); // expected-error {{no matching function 'bar' found in local scope; did you mean '::test2::bar'?}}
-      // expected-note@-1 2 {{'::test2::bar' declared here}}
-      // TODO: the above two notes should go on line 22
+      // expected-note@-1 {{'::test2::bar' declared here}}
+      // TODO: the above note should go on line 22
     };
 
     { void bar(); }
@@ -82,6 +81,8 @@ namespace test2 {
     struct S9 {
       struct Inner {
         friend void baz(); // expected-error {{no matching function 'baz' found in local scope; did you mean 'bar'?}}
+        // expected-note@-1 {{'::test2::bar' declared here}}
+        // TODO: the above note should go on line 22
       };
     };
 
diff --git a/test/CXX/class.access/class.friend/p2-cxx03.cpp b/test/CXX/class.access/class.friend/p2-cxx03.cpp
index f8cabfd78a8f..88b2ea3a9b73 100644
--- a/test/CXX/class.access/class.friend/p2-cxx03.cpp
+++ b/test/CXX/class.access/class.friend/p2-cxx03.cpp
@@ -1,7 +1,14 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 template<typename T>
 class X0 {
-  friend T; // expected-warning{{non-class friend type 'T' is a C++11 extension}}
+  friend T;
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{non-class friend type 'T' is a C++11 extension}}
+#else
+  // expected-no-diagnostics
+#endif
 };
 
 class X1 { };
diff --git a/test/CXX/class/class.friend/p1.cpp b/test/CXX/class/class.friend/p1.cpp
index 96701b3b05a5..b83dfa36cd35 100644
--- a/test/CXX/class/class.friend/p1.cpp
+++ b/test/CXX/class/class.friend/p1.cpp
@@ -79,3 +79,9 @@ class PreDeclared;
 int myoperation(float f) {
   return (int) f;
 }
+
+template <typename T>
+class B {
+  template <typename U>
+  friend B<U>() {} // expected-error {{must use a qualified name when declaring a constructor as a friend}}
+};
diff --git a/test/CXX/class/class.nest/p1.cpp b/test/CXX/class/class.nest/p1.cpp
index b0341da7c212..59bf50f42400 100644
--- a/test/CXX/class/class.nest/p1.cpp
+++ b/test/CXX/class/class.nest/p1.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 class Outer {
   int x;
@@ -7,7 +9,10 @@ class Outer {
 
   // C++11 does relax this rule (see 5.1.1.10) in the first case, but we need to enforce it in C++03 mode.
   class Inner {
-    static char a[sizeof(x)]; // expected-error {{invalid use of non-static data member 'x'}}
+    static char a[sizeof(x)];
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{invalid use of non-static data member 'x'}}
+#endif
     static char b[sizeof(sx)]; // okay
     static char c[sizeof(f)]; // expected-error {{call to non-static member function without an object argument}}
   };
diff --git a/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p1.cpp b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p1.cpp
new file mode 100644
index 000000000000..ded6ed013c05
--- /dev/null
+++ b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p1.cpp
@@ -0,0 +1,43 @@
+// RUN:  %clang_cc1 -std=c++14 -fconcepts-ts -fcxx-exceptions -x c++ -verify %s
+
+namespace A {
+  template<typename T> concept bool C1() { return true; }
+
+  template<typename T> concept bool C2 = true;
+}
+
+template<typename T> concept bool C3() { return (throw 0, true); }
+static_assert(noexcept(C3<int>()), "function concept should be treated as if noexcept(true) specified");
+
+template<typename T> concept bool D1(); // expected-error {{function concept declaration must be a definition}}
+
+struct B {
+  template<typename T> concept bool D2() { return true; } // expected-error {{concept declarations may only appear in namespace scope}}
+};
+
+struct C {
+  template<typename T> static concept bool D3 = true; // expected-error {{concept declarations may only appear in namespace scope}}
+};
+
+concept bool D4() { return true; } // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+
+concept bool D5 = true; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+
+template<typename T>
+concept bool D6; // expected-error {{variable concept declaration must be initialized}}
+
+template<typename T>
+concept bool D7() throw(int) { return true; } // expected-error {{function concept cannot have exception specification}}
+
+// Tag
+concept class CC1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+concept struct CS1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+concept union CU1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+concept enum CE1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+template <typename T> concept class TCC1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+template <typename T> concept struct TCS1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+template <typename T> concept union TCU1 {}; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+typedef concept int CI; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+void fpc(concept int i) {} // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
+
+concept bool; // expected-error {{'concept' can only appear on the definition of a function template or variable template}}
diff --git a/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p2.cpp b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p2.cpp
new file mode 100644
index 000000000000..477910986de1
--- /dev/null
+++ b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p2.cpp
@@ -0,0 +1,13 @@
+// RUN:  %clang_cc1 -std=c++14 -fconcepts-ts -x c++ -verify %s
+
+template<typename T> concept thread_local bool VCTL = true; // expected-error {{variable concept cannot be declared 'thread_local'}}
+
+template<typename T> concept constexpr bool VCC = true; // expected-error {{variable concept cannot be declared 'constexpr'}}
+
+template<typename T> concept inline bool FCI() { return true; } // expected-error {{function concept cannot be declared 'inline'}}
+
+struct X {
+  template<typename T> concept friend bool FCF() { return true; } // expected-error {{function concept cannot be declared 'friend'}}
+};
+
+template<typename T> concept constexpr bool FCC() { return true; } // expected-error {{function concept cannot be declared 'constexpr'}}
diff --git a/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p5.cpp b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p5.cpp
new file mode 100644
index 000000000000..38593bccaf82
--- /dev/null
+++ b/test/CXX/concepts-ts/dcl.dcl/dcl.spec/dcl.spec.concept/p5.cpp
@@ -0,0 +1,13 @@
+// RUN:  %clang_cc1 -std=c++14 -fconcepts-ts -x c++ -verify %s
+
+template<typename T>
+concept bool fcpv(void) { return true; }
+
+template<typename T>
+concept bool fcpi(int i = 0) { return true; } // expected-error {{function concept cannot have any parameters}}
+
+template<typename... Ts>
+concept bool fcpp(Ts... ts) { return true; } // expected-error {{function concept cannot have any parameters}}
+
+template<typename T>
+concept bool fcpva(...) { return true; } // expected-error {{function concept cannot have any parameters}}
diff --git a/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p1.cpp b/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p1.cpp
index bf30ee74a567..021c25016c93 100644
--- a/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p1.cpp
+++ b/test/CXX/dcl.dcl/basic.namespace/namespace.udecl/p1.cpp
@@ -1,10 +1,15 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // We have to avoid ADL for this test.
 
 template <unsigned N> class test {};
 
-class foo {};	// expected-note {{candidate}}
+class foo {};	// expected-note {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 test<0> foo(foo); // expected-note {{candidate}}
 
 namespace Test0 {
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p5.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p5.cpp
index 48973237ea0b..18b2c6b1d6d1 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p5.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.constexpr/p5.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -triple x86_64-unknown-unknown -verify -std=c++11 -fcxx-exceptions %s
+// RUN: %clang_cc1 -fsyntax-only -triple x86_64-unknown-unknown -verify -std=c++14 -fcxx-exceptions %s
 // RUN: %clang_cc1 -fsyntax-only -triple x86_64-unknown-unknown -std=c++11 -fcxx-exceptions -Wno-invalid-constexpr %s -DNO_INVALID_CONSTEXPR
 
 namespace StdExample {
@@ -102,7 +103,10 @@ X x = cmin(X(), X()); // ok, not constexpr
 template<typename T>
 struct Y {
   constexpr Y() {}
-  constexpr int get() { return T(); } // expected-warning {{C++14}}
+  constexpr int get() { return T(); }
+#if __cplusplus < 201402L
+  // expected-warning@-2 {{C++14}}
+#endif
 };
 struct Z { operator int(); };
 
@@ -118,7 +122,7 @@ namespace PR14550 {
   // marks some functions as constexpr which use builtins which we don't
   // support constant folding). Ensure that we don't mark those functions
   // as invalid after suppressing the diagnostic.
-# 122 "p5.cpp" 1 3
+# 126 "p5.cpp" 1 3
   int n;
   struct A {
     static constexpr int f() { return n; }
@@ -126,7 +130,11 @@ namespace PR14550 {
   template<typename T> struct B {
     B() { g(T::f()); } // expected-error {{undeclared identifier 'g'}}
   };
-# 130 "p5.cpp" 2
+# 134 "p5.cpp" 2
   template class B<A>; // expected-note {{here}}
 }
 #endif
+
+#if __cplusplus >= 201402L
+constexpr void f() { throw; } // expected-error {{never produces}} expected-note {{subexpression}}
+#endif
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p6.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p6.cpp
index 5d1e6fb85ce9..08b22c118685 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p6.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.fct.spec/p6.cpp
@@ -1,10 +1,15 @@
 // RUN: %clang_cc1 -verify %s
+// RUN: %clang_cc1 -verify -std=c++98 %s
+// RUN: %clang_cc1 -verify -std=c++11 %s
 
 class A {
 public:
   explicit A();
   
-  explicit operator int(); // expected-warning {{explicit conversion functions are a C++11 extension}}
+  explicit operator int();
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2 {{explicit conversion functions are a C++11 extension}}
+#endif
 
   explicit void f0(); // expected-error {{'explicit' can only be applied to a constructor or conversion function}}
   
@@ -12,8 +17,11 @@ public:
 };
 
 explicit A::A() { } // expected-error {{'explicit' can only be specified inside the class definition}}
-explicit A::operator bool() { return false; }  // expected-warning {{explicit conversion functions are a C++11 extension}}\
-                                               // expected-error {{'explicit' can only be specified inside the class definition}}
+explicit A::operator bool() { return false; }
+#if __cplusplus <= 199711L // C++03 or earlier modes
+// expected-warning@-2 {{explicit conversion functions are a C++11 extension}}
+#endif
+// expected-error@-4 {{'explicit' can only be specified inside the class definition}}
 
 class B {
   friend explicit A::A(); // expected-error {{'explicit' is invalid in friend declarations}}
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
index 44cc5a7cfaa3..0b7a902f9378 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.stc/p2.cpp
@@ -1,26 +1,58 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wno-c++0x-compat %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // The auto or register specifiers can be applied only to names of objects
 // declared in a block (6.3) or to function parameters (8.4).
 
 auto int ao; // expected-error {{illegal storage class on file-scoped variable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
+
 auto void af(); // expected-error {{illegal storage class on function}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
 
 register int ro; // expected-error {{illegal storage class on file-scoped variable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'register' storage class specifier is deprecated}}
+#endif
+
 register void rf(); // expected-error {{illegal storage class on function}}
 
 struct S {
   auto int ao; // expected-error {{storage class specified for a member declaration}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
   auto void af(); // expected-error {{storage class specified for a member declaration}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
 
   register int ro; // expected-error {{storage class specified for a member declaration}}
   register void rf(); // expected-error {{storage class specified for a member declaration}}
 };
 
 void foo(auto int ap, register int rp) {
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
   auto int abo;
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
   auto void abf(); // expected-error {{illegal storage class on function}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
+#endif
 
   register int rbo;
+#if __cplusplus >= 201103L // C++11 or later
+// expected-warning@-2 {{'register' storage class specifier is deprecated}}
+#endif
+
   register void rbf(); // expected-error {{illegal storage class on function}}
 }
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-1y.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-1y.cpp
index 39c547b9aec0..eb751517e2f2 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-1y.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-1y.cpp
@@ -3,9 +3,9 @@
 
 // FIXME: This is in p11 (?) in C++1y.
 void f() {
-  decltype(auto) a = a; // expected-error{{variable 'a' declared with 'auto' type cannot appear in its own initializer}}
-  if (decltype(auto) b = b) {} // expected-error {{variable 'b' declared with 'auto' type cannot appear in its own initializer}}
-  decltype(auto) c = ({ decltype(auto) d = c; 0; }); // expected-error {{variable 'c' declared with 'auto' type cannot appear in its own initializer}}
+  decltype(auto) a = a; // expected-error{{variable 'a' declared with 'decltype(auto)' type cannot appear in its own initializer}}
+  if (decltype(auto) b = b) {} // expected-error {{variable 'b' declared with 'decltype(auto)' type cannot appear in its own initializer}}
+  decltype(auto) c = ({ decltype(auto) d = c; 0; }); // expected-error {{variable 'c' declared with 'decltype(auto)' type cannot appear in its own initializer}}
 }
 
 void g() {
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-generic-lambda-1y.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-generic-lambda-1y.cpp
index 65b085bcec13..07bc884e7d5e 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-generic-lambda-1y.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p3-generic-lambda-1y.cpp
@@ -49,7 +49,7 @@ int main()
       static double dfi(int i) { return i + 3.14; }
       static Local localfi(int) { return Local{}; }
     };
-    auto l4 = [](auto (*fp)(int)) -> int { return fp(3); }; //expected-error{{no viable conversion from 'Local' to 'int'}} 
+    auto l4 = [](auto (*fp)(int)) -> int { return fp(3); }; //expected-error{{no viable conversion from returned value of type 'Local' to function return type 'int'}} 
     l4(&Local::ifi);
     l4(&Local::cfi);
     l4(&Local::dfi);
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
index c3dc1de3ed6f..06bd72e125d1 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.spec.auto/p7-1y.cpp
@@ -49,7 +49,7 @@ decltype(auto) f1();
 decltype(auto) (*f2)(); // expected-error {{'decltype(auto)' can only be used as a return type in a function declaration}} expected-error {{requires an initializer}}
 decltype(auto) *f3(); // expected-error {{cannot form pointer to 'decltype(auto)'}}
 const decltype(auto) f4(); // expected-error {{'decltype(auto)' cannot be combined with other type specifiers}}
-typedef decltype(auto) f5(); // expected-error {{'decltype(auto)' can only be used as a return type in a function declaration}}
+typedef decltype(auto) f5(); // expected-error {{'decltype(auto)' not allowed in typedef}}
 decltype(auto) ((((((f6))))())); // ok
 decltype(auto) f7()(); // expected-error {{'decltype(auto)' can only be used as a return type in a function declaration}} expected-error {{function cannot return function type}}
 decltype(auto) (S::*f8)(); // expected-error {{'decltype(auto)' can only be used as a return type in a function declaration}} expected-error {{requires an initializer}}
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p1.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p1.cpp
new file mode 100644
index 000000000000..e3982fd6a838
--- /dev/null
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p1.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -verify %s -std=c++11
+
+namespace N {
+  struct A;
+  template<typename T> struct B {};
+}
+template<typename T> struct C {};
+struct D {
+  template<typename T> struct A {};
+};
+struct N::A; // expected-error {{cannot have a nested name specifier}}
+
+template<typename T> struct N::B; // expected-error {{cannot have a nested name specifier}}
+template<typename T> struct N::B<T*>; // FIXME: This is technically ill-formed, but that's not the intent.
+template<> struct N::B<int>;
+template struct N::B<float>;
+
+template<typename T> struct C;
+template<typename T> struct C<T*>; // FIXME: This is technically ill-formed, but that's not the intent.
+template<> struct C<int>;
+template struct C<float>;
+
+template<typename T> struct D::A; // expected-error {{cannot have a nested name specifier}}
+template<typename T> struct D::A<T*>; // FIXME: This is technically ill-formed, but that's not the intent.
+template<> struct D::A<int>;
+template struct D::A<float>;
diff --git a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp
index 40e754082b48..19406518402f 100644
--- a/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp
+++ b/test/CXX/dcl.dcl/dcl.spec/dcl.type/dcl.type.elab/p3.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 class A {}; // expected-note 4 {{previous use is here}}
 enum E {};
@@ -14,7 +16,10 @@ class A1 {
   friend union A; // expected-error {{use of 'A' with tag type that does not match previous declaration}}
 
   friend enum A; // expected-error {{use of 'A' with tag type that does not match previous declaration}}
-  friend enum E; // expected-warning {{befriending enumeration type 'enum E' is a C++11 extension}}
+  friend enum E; 
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2 {{befriending enumeration type 'enum E' is a C++11 extension}}
+#endif
 };
 
 template <class T> struct B { // expected-note {{previous use is here}}
diff --git a/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-var.cpp b/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-var.cpp
index cb62874b40cd..382212269772 100644
--- a/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-var.cpp
+++ b/test/CXX/dcl.decl/dcl.init/dcl.init.ref/p5-var.cpp
@@ -1,7 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 struct Base { };
 struct Derived : Base { }; // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 struct Unrelated { };
 struct Derived2 : Base { };
 struct Diamond : Derived, Derived2 { };
diff --git a/test/CXX/drs/dr0xx.cpp b/test/CXX/drs/dr0xx.cpp
index dd0d4d17a8fe..3bb6701b32e1 100644
--- a/test/CXX/drs/dr0xx.cpp
+++ b/test/CXX/drs/dr0xx.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -std=c++98 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -Wno-bind-to-temporary-copy
-// RUN: %clang_cc1 -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++1z %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple
+// RUN: %clang_cc1 -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple
+// RUN: %clang_cc1 -std=c++1z %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -triple %itanium_abi_triple
 
 namespace dr1 { // dr1: no
   namespace X { extern "C" void dr1_f(int a = 1); }
diff --git a/test/CXX/drs/dr13xx.cpp b/test/CXX/drs/dr13xx.cpp
index 29b39cbb95a1..37c144eb0e01 100644
--- a/test/CXX/drs/dr13xx.cpp
+++ b/test/CXX/drs/dr13xx.cpp
@@ -7,9 +7,9 @@ namespace dr1346 { // dr1346: 3.5
   auto a(1); // expected-error 0-1{{extension}}
   auto b(1, 2); // expected-error {{multiple expressions}} expected-error 0-1{{extension}}
 #if __cplusplus >= 201103L
-  auto c({}); // expected-error {{parenthesized initializer list}} expected-error {{cannot deduce}}
-  auto d({1}); // expected-error {{parenthesized initializer list}} expected-error {{<initializer_list>}}
-  auto e({1, 2}); // expected-error {{parenthesized initializer list}} expected-error {{<initializer_list>}}
+  auto c({}); // expected-error {{parenthesized initializer list}}
+  auto d({1}); // expected-error {{parenthesized initializer list}}
+  auto e({1, 2}); // expected-error {{parenthesized initializer list}}
 #endif
   template<typename...Ts> void f(Ts ...ts) { // expected-error 0-1{{extension}}
     auto x(ts...); // expected-error {{empty}} expected-error 0-1{{extension}}
@@ -21,9 +21,9 @@ namespace dr1346 { // dr1346: 3.5
     [a(1)] {} (); // expected-error 0-1{{extension}}
     [b(1, 2)] {} (); // expected-error {{multiple expressions}} expected-error 0-1{{extension}}
 #if __cplusplus >= 201103L
-    [c({})] {} (); // expected-error {{parenthesized initializer list}} expected-error {{cannot deduce}} expected-error 0-1{{extension}}
-    [d({1})] {} (); // expected-error {{parenthesized initializer list}} expected-error {{<initializer_list>}} expected-error 0-1{{extension}}
-    [e({1, 2})] {} (); // expected-error {{parenthesized initializer list}} expected-error {{<initializer_list>}} expected-error 0-1{{extension}}
+    [c({})] {} (); // expected-error {{parenthesized initializer list}} expected-error 0-1{{extension}}
+    [d({1})] {} (); // expected-error {{parenthesized initializer list}} expected-error 0-1{{extension}}
+    [e({1, 2})] {} (); // expected-error {{parenthesized initializer list}} expected-error 0-1{{extension}}
 #endif
   }
 #endif
diff --git a/test/CXX/drs/dr15xx.cpp b/test/CXX/drs/dr15xx.cpp
index d35583ff9f4e..7472be72c2f1 100644
--- a/test/CXX/drs/dr15xx.cpp
+++ b/test/CXX/drs/dr15xx.cpp
@@ -101,4 +101,75 @@ namespace dr1589 {   // dr1589: 3.7 c++11
   }
 
 } // dr1589
+
+namespace dr1591 {  //dr1591. Deducing array bound and element type from initializer list 
+  template<class T, int N> int h(T const(&)[N]);
+  int X = h({1,2,3});              // T deduced to int, N deduced to 3
+  
+  template<class T> int j(T const(&)[3]);
+  int Y = j({42});                 // T deduced to int, array bound not considered
+
+  struct Aggr { int i; int j; };
+  template<int N> int k(Aggr const(&)[N]); //expected-note{{not viable}}
+  int Y0 = k({1,2,3});              //expected-error{{no matching function}}
+  int Z = k({{1},{2},{3}});        // OK, N deduced to 3
+
+  template<int M, int N> int m(int const(&)[M][N]);
+  int X0 = m({{1,2},{3,4}});        // M and N both deduced to 2
+
+  template<class T, int N> int n(T const(&)[N], T);
+  int X1 = n({{1},{2},{3}},Aggr()); // OK, T is Aggr, N is 3
+  
+  
+  namespace check_multi_dim_arrays {
+    template<class T, int N, int M, int O> int ***f(const T (&a)[N][M][O]); //expected-note{{deduced conflicting values}}
+    template<class T, int N, int M> int **f(const T (&a)[N][M]); //expected-note{{couldn't infer}}
+   
+   template<class T, int N> int *f(const T (&a)[N]); //expected-note{{couldn't infer}}
+    int ***p3 = f({  {  {1,2}, {3, 4}  }, {  {5,6}, {7, 8}  }, {  {9,10}, {11, 12}  } });
+    int ***p33 = f({  {  {1,2}, {3, 4}  }, {  {5,6}, {7, 8}  }, {  {9,10}, {11, 12, 13}  } }); //expected-error{{no matching}}
+    int **p2 = f({  {1,2,3}, {3, 4, 5}  });
+    int **p22 = f({  {1,2}, {3, 4}  });
+    int *p1 = f({1, 2, 3});
+  }
+  namespace check_multi_dim_arrays_rref {
+    template<class T, int N, int M, int O> int ***f(T (&&a)[N][M][O]); //expected-note{{deduced conflicting values}}
+    template<class T, int N, int M> int **f(T (&&a)[N][M]); //expected-note{{couldn't infer}}
+   
+    template<class T, int N> int *f(T (&&a)[N]); //expected-note{{couldn't infer}}
+    int ***p3 = f({  {  {1,2}, {3, 4}  }, {  {5,6}, {7, 8}  }, {  {9,10}, {11, 12}  } });
+    int ***p33 = f({  {  {1,2}, {3, 4}  }, {  {5,6}, {7, 8}  }, {  {9,10}, {11, 12, 13}  } }); //expected-error{{no matching}}
+    int **p2 = f({  {1,2,3}, {3, 4, 5}  });
+    int **p22 = f({  {1,2}, {3, 4}  });
+    int *p1 = f({1, 2, 3});
+  }
+  
+  namespace check_arrays_of_init_list {
+    template<class T, int N> float *f(const std::initializer_list<T> (&)[N]);
+    template<class T, int N> double *f(const T(&)[N]);
+    double *p = f({1, 2, 3});
+    float *fp = f({{1}, {1, 2}, {1, 2, 3}});
+  }
+  namespace core_reflector_28543 {
+    
+    template<class T, int N> int *f(T (&&)[N]);  // #1
+    template<class T> char *f(std::initializer_list<T> &&);  //#2
+    template<class T, int N, int M> int **f(T (&&)[N][M]); //#3 expected-note{{candidate}}
+    template<class T, int N> char **f(std::initializer_list<T> (&&)[N]); //#4 expected-note{{candidate}}
+
+    template<class T> short *f(T (&&)[2]);  //#5
+
+    template<class T> using Arr = T[];
+     
+    char *pc = f({1, 2, 3}); // OK prefer #2 via 13.3.3.2 [over.ics.rank]
+    char *pc2 = f({1, 2}); // #2 also 
+    int *pi = f(Arr<int>{1, 2, 3}); // OK prefer #1
+
+    void *pv1 = f({ {1, 2, 3}, {4, 5, 6} }); // expected-error{{ambiguous}} btw 3 & 4
+    char **pcc = f({ {1}, {2, 3} }); // OK #4
+
+    short *ps = f(Arr<int>{1, 2});  // OK #5
+  }
+} // dr1591
+
 #endif
diff --git a/test/CXX/drs/dr1xx.cpp b/test/CXX/drs/dr1xx.cpp
index d8d9307a5ebb..47d1494ec7a3 100644
--- a/test/CXX/drs/dr1xx.cpp
+++ b/test/CXX/drs/dr1xx.cpp
@@ -234,7 +234,7 @@ namespace dr125 {
     friend dr125_A::dr125_B (::dr125_C)(); // ok
     friend dr125_A (::dr125_B::dr125_C)(); // ok
     friend dr125_A::dr125_B::dr125_C(); // expected-error {{did you mean the constructor name 'dr125_B'?}}
-    // expected-warning@-1 {{missing exception specification}}
+    // expected-error@-1 {{missing exception specification}}
 #if __cplusplus >= 201103L
     // expected-error@-3 {{follows constexpr declaration}} expected-note@-10 {{here}}
 #endif
@@ -524,8 +524,13 @@ namespace dr143 { // dr143: yes
 
 namespace dr145 { // dr145: yes
   void f(bool b) {
+#if __cplusplus <= 201402L
     ++b; // expected-warning {{deprecated}}
     b++; // expected-warning {{deprecated}}
+#else
+    ++b; // expected-error {{increment}}
+    b++; // expected-error {{increment}}
+#endif
   }
 }
 
diff --git a/test/CXX/drs/dr3xx.cpp b/test/CXX/drs/dr3xx.cpp
index c438fccebd6b..a1c1c4ce6132 100644
--- a/test/CXX/drs/dr3xx.cpp
+++ b/test/CXX/drs/dr3xx.cpp
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -std=c++98 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
-// RUN: %clang_cc1 -std=c++1z %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++98 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++14 %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++1z %s -verify -fexceptions -fcxx-exceptions -pedantic-errors
 
 namespace dr300 { // dr300: yes
   template<typename R, typename A> void f(R (&)(A)) {}
diff --git a/test/CXX/drs/dr4xx.cpp b/test/CXX/drs/dr4xx.cpp
index bbe5ee6faa59..bceea793faf8 100644
--- a/test/CXX/drs/dr4xx.cpp
+++ b/test/CXX/drs/dr4xx.cpp
@@ -83,7 +83,7 @@ namespace dr406 { // dr406: yes
   } A;
 }
 
-namespace dr407 { // dr407: no
+namespace dr407 { // dr407: 3.8
   struct S;
   typedef struct S S;
   void f() {
@@ -108,22 +108,22 @@ namespace dr407 { // dr407: no
       struct S s; // expected-error {{ambiguous}}
     }
     namespace D {
-      // FIXME: This is valid.
       using A::S;
-      typedef struct S S; // expected-note {{here}}
-      struct S s; // expected-error {{refers to a typedef}}
+      typedef struct S S;
+      struct S s;
     }
     namespace E {
-      // FIXME: The standard doesn't say whether this is valid.
+      // The standard doesn't say whether this is valid. We interpret
+      // DR407 as meaning "if lookup finds both a tag and a typedef with the
+      // same type, then it's OK in an elaborated-type-specifier".
       typedef A::S S;
       using A::S;
       struct S s;
     }
     namespace F {
-      typedef A::S S; // expected-note {{here}}
+      typedef A::S S;
     }
-    // FIXME: The standard doesn't say what to do in these cases, but
-    // our behavior should not depend on the order of the using-directives.
+    // The standard doesn't say what to do in these cases either.
     namespace G {
       using namespace A;
       using namespace F;
@@ -132,7 +132,7 @@ namespace dr407 { // dr407: no
     namespace H {
       using namespace F;
       using namespace A;
-      struct S s; // expected-error {{refers to a typedef}}
+      struct S s;
     }
   }
 }
@@ -659,7 +659,7 @@ namespace dr457 { // dr457: yes
 
   enum E {
     ea = a,
-    eb = b // expected-error {{not an integral constant}} expected-note {{read of volatile-qualified}}
+    eb = b // expected-error {{constant}} expected-note {{read of volatile-qualified}}
   };
 }
 
diff --git a/test/CXX/drs/dr5xx.cpp b/test/CXX/drs/dr5xx.cpp
index 5bf085f52204..17b525d96228 100644
--- a/test/CXX/drs/dr5xx.cpp
+++ b/test/CXX/drs/dr5xx.cpp
@@ -7,7 +7,7 @@
 // pointing at the implicit operator new. We can't match such a diagnostic
 // with -verify.
 __extension__ typedef __SIZE_TYPE__ size_t;
-void *operator new(size_t); // expected-warning 0-1{{missing exception spec}} expected-note{{candidate}}
+void *operator new(size_t); // expected-error 0-1{{missing exception spec}} expected-note{{candidate}}
 
 namespace dr500 { // dr500: dup 372
   class D;
@@ -519,23 +519,12 @@ namespace dr546 { // dr546: yes
 }
 
 namespace dr547 { // dr547: yes
-  // When targeting the MS x86 ABI, the type of a member function includes a
-  // __thiscall qualifier. This is non-conforming, but we still implement
-  // the intent of dr547
-#if defined(_M_IX86) || (defined(__MINGW32__) && !defined(__MINGW64__))
-#define THISCALL __thiscall
-#else
-#define THISCALL
-#endif
-
   template<typename T> struct X;
-  template<typename T> struct X<THISCALL T() const> {};
+  template<typename T> struct X<T() const> {};
   template<typename T, typename C> X<T> f(T C::*) { return X<T>(); }
 
   struct S { void f() const; };
-  X<THISCALL void() const> x = f(&S::f);
-
-#undef THISCALL
+  X<void() const> x = f(&S::f);
 }
 
 namespace dr548 { // dr548: dup 482
diff --git a/test/CXX/except/except.spec/p3.cpp b/test/CXX/except/except.spec/p3.cpp
index d77aea40602c..03f1d7626c0d 100644
--- a/test/CXX/except/except.spec/p3.cpp
+++ b/test/CXX/except/except.spec/p3.cpp
@@ -24,9 +24,9 @@ extern void (*r4)() throw(...);
 extern void (*r5)() throw(int); // expected-note {{previous declaration}}
 extern void (*r5)(); // expected-error {{exception specification in declaration does not match}}
 
-// For functions, we accept this with a warning.
+// throw(int) and no spec are not compatible
 extern void f5() throw(int); // expected-note {{previous declaration}}
-extern void f5(); // expected-warning {{missing exception specification}}
+extern void f5(); // expected-error {{missing exception specification}}
 
 // Different types are not compatible.
 extern void (*r7)() throw(int); // expected-note {{previous declaration}}
diff --git a/test/CXX/except/except.spec/p4.cpp b/test/CXX/except/except.spec/p4.cpp
index 8d1b75fbdd6d..04b2bd9bf28b 100644
--- a/test/CXX/except/except.spec/p4.cpp
+++ b/test/CXX/except/except.spec/p4.cpp
@@ -19,7 +19,7 @@ struct T {
   void operator delete(void*) noexcept; // expected-note {{here}}
 };
 
-void T::a() {} // expected-warning {{missing exception specification 'noexcept'}}
+void T::a() {} // expected-error {{missing exception specification 'noexcept'}}
 T::~T() {} // expected-warning {{function previously declared with an explicit exception specification redeclared with an implicit exception specification}}
 void T::operator delete(void*) {} // expected-warning {{function previously declared with an explicit exception specification redeclared with an implicit exception specification}}
 
diff --git a/test/CXX/except/except.spec/p5-pointers.cpp b/test/CXX/except/except.spec/p5-pointers.cpp
index f855520aa24d..fe4a264587f5 100644
--- a/test/CXX/except/except.spec/p5-pointers.cpp
+++ b/test/CXX/except/except.spec/p5-pointers.cpp
@@ -73,7 +73,7 @@ void fnptrs()
 // Member function stuff
 
 struct Str1 { void f() throw(int); }; // expected-note {{previous declaration}}
-void Str1::f() // expected-warning {{missing exception specification}}
+void Str1::f() // expected-error {{missing exception specification}}
 {
 }
 
diff --git a/test/CXX/expr/expr.const/p2-0x.cpp b/test/CXX/expr/expr.const/p2-0x.cpp
index 2adefd928afb..c519ecbda228 100644
--- a/test/CXX/expr/expr.const/p2-0x.cpp
+++ b/test/CXX/expr/expr.const/p2-0x.cpp
@@ -601,11 +601,11 @@ namespace rdar13090123 {
   typedef __INTPTR_TYPE__ intptr_t;
 
   constexpr intptr_t f(intptr_t x) {
-    return (((x) >> 21) * 8); // expected-note{{subexpression not valid in a constant expression}}
+    return (((x) >> 21) * 8);
   }
 
   extern "C" int foo;
 
   constexpr intptr_t i = f((intptr_t)&foo - 10); // expected-error{{constexpr variable 'i' must be initialized by a constant expression}} \
-  // expected-note{{in call to 'f((char*)&foo + -10)'}}
+  // expected-note{{reinterpret_cast}}
 }
diff --git a/test/CXX/expr/expr.const/p5-0x.cpp b/test/CXX/expr/expr.const/p5-0x.cpp
index 0a4ac22d06d2..079870947db0 100644
--- a/test/CXX/expr/expr.const/p5-0x.cpp
+++ b/test/CXX/expr/expr.const/p5-0x.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -std=c++11 -verify %s
 
 // If an expression of literal class type is used in a context where an integral
 // constant expression is required, then that class type shall have a single
diff --git a/test/CXX/expr/expr.prim/expr.prim.lambda/default-arguments.cpp b/test/CXX/expr/expr.prim/expr.prim.lambda/default-arguments.cpp
index 38d5d0a6cd6a..9b0a9ad8c257 100644
--- a/test/CXX/expr/expr.prim/expr.prim.lambda/default-arguments.cpp
+++ b/test/CXX/expr/expr.prim/expr.prim.lambda/default-arguments.cpp
@@ -26,23 +26,26 @@ struct NonPOD {
 };
 
 struct NoDefaultCtor {
-  NoDefaultCtor(const NoDefaultCtor&); // expected-note{{candidate constructor}}
+  NoDefaultCtor(const NoDefaultCtor&); // expected-note{{candidate constructor}} \
+                                       // expected-note{{candidate constructor not viable: requires 1 argument, but 0 were provided}}
   ~NoDefaultCtor();
 };
 
 template<typename T>
 void defargs_in_template_unused(T t) {
-  auto l1 = [](const T& value = T()) { };
+  auto l1 = [](const T& value = T()) { };  // expected-error{{no matching constructor for initialization of 'NoDefaultCtor'}}
   l1(t);
 }
 
 template void defargs_in_template_unused(NonPOD);
-template void defargs_in_template_unused(NoDefaultCtor);
+template void defargs_in_template_unused(NoDefaultCtor);  // expected-note{{in instantiation of function template specialization 'defargs_in_template_unused<NoDefaultCtor>' requested here}}
 
 template<typename T>
 void defargs_in_template_used() {
-  auto l1 = [](const T& value = T()) { }; // expected-error{{no matching constructor for initialization of 'NoDefaultCtor'}}
-  l1(); // expected-note{{in instantiation of default function argument expression for 'operator()<NoDefaultCtor>' required here}}
+  auto l1 = [](const T& value = T()) { }; // expected-error{{no matching constructor for initialization of 'NoDefaultCtor'}} \
+                                          // expected-note{{candidate function not viable: requires single argument 'value', but no arguments were provided}} \
+                                          // expected-note{{conversion candidate of type 'void (*)(const NoDefaultCtor &)'}}
+  l1(); // expected-error{{no matching function for call to object of type '(lambda at }}
 }
 
 template void defargs_in_template_used<NonPOD>();
diff --git a/test/CXX/expr/expr.prim/expr.prim.lambda/p11-1y.cpp b/test/CXX/expr/expr.prim/expr.prim.lambda/p11-1y.cpp
index 1228c74b0709..63e51a761449 100644
--- a/test/CXX/expr/expr.prim/expr.prim.lambda/p11-1y.cpp
+++ b/test/CXX/expr/expr.prim/expr.prim.lambda/p11-1y.cpp
@@ -48,7 +48,8 @@ auto bad_init_2 = [a(1, 2)] {}; // expected-error {{initializer for lambda captu
 auto bad_init_3 = [&a(void_fn())] {}; // expected-error {{cannot form a reference to 'void'}}
 auto bad_init_4 = [a(void_fn())] {}; // expected-error {{has incomplete type 'void'}}
 auto bad_init_5 = [a(overload_fn)] {}; // expected-error {{cannot deduce type for lambda capture 'a' from initializer of type '<overloaded function}}
-auto bad_init_6 = [a{overload_fn}] {}; // expected-error {{cannot deduce type for lambda capture 'a' from initializer list}} expected-warning {{will change meaning in a future version of Clang}}
+auto bad_init_6 = [a{overload_fn}] {}; // expected-error {{cannot deduce type for lambda capture 'a' from initializer list}}
+auto bad_init_7 = [a{{1}}] {}; // expected-error {{cannot deduce type for lambda capture 'a' from nested initializer list}}
 
 template<typename...T> void pack_1(T...t) { (void)[a(t...)] {}; } // expected-error {{initializer missing for lambda capture 'a'}}
 template void pack_1<>(); // expected-note {{instantiation of}}
@@ -61,7 +62,7 @@ auto a = [a(4), b = 5, &c = static_cast<const int&&>(0)] {
   using T = decltype(c);
   using T = const int &;
 };
-auto b = [a{0}] {}; // expected-error {{include <initializer_list>}} expected-warning {{will change meaning in a future version of Clang}}
+auto b = [a{0}] {}; // OK, per N3922
 
 struct S { S(); S(S&&); };
 template<typename T> struct remove_reference { typedef T type; };
diff --git a/test/CXX/expr/expr.prim/expr.prim.lambda/templates.cpp b/test/CXX/expr/expr.prim/expr.prim.lambda/templates.cpp
index c18bb7d1921b..e40761770d55 100644
--- a/test/CXX/expr/expr.prim/expr.prim.lambda/templates.cpp
+++ b/test/CXX/expr/expr.prim/expr.prim.lambda/templates.cpp
@@ -38,7 +38,7 @@ template X captures(X, X);
 template<typename T>
 int infer_result(T x, T y) {
   auto lambda = [=](bool b) { return x + y; };
-  return lambda(true); // expected-error{{no viable conversion from 'X' to 'int'}}
+  return lambda(true); // expected-error{{no viable conversion from returned value of type 'X' to function return type 'int'}}
 }
 
 template int infer_result(int, int);
diff --git a/test/CXX/expr/expr.unary/expr.unary.op/p4.cpp b/test/CXX/expr/expr.unary/expr.unary.op/p4.cpp
index 9babf8728cc0..48c47f74ac6c 100644
--- a/test/CXX/expr/expr.unary/expr.unary.op/p4.cpp
+++ b/test/CXX/expr/expr.unary/expr.unary.op/p4.cpp
@@ -7,7 +7,7 @@ namespace test0 {
     template<typename T> void g(T);
 
     void test() {
-      foo(&g<int>); // expected-error-re {{can't form member pointer of type 'void (test0::A::*)(int){{( __attribute__\(\(thiscall\)\))?}}' without '&' and class name}}
+      foo(&g<int>); // expected-error-re {{cannot form member pointer of type 'void (test0::A::*)(int){{( __attribute__\(\(thiscall\)\))?}}' without '&' and class name}}
     }
   };
 }
diff --git a/test/CXX/lex/lex.literal/lex.ext/p12.cpp b/test/CXX/lex/lex.literal/lex.ext/p12.cpp
index dad468093984..34a2f5c67820 100644
--- a/test/CXX/lex/lex.literal/lex.ext/p12.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p12.cpp
@@ -15,7 +15,7 @@ void *operator""_x(const char*); // #2
 void *a = 123_x; // ok, calls #2
 int b = u8"\"тест 𐀀"_x; // ok, calls #1
 int c = u8R"("тест 𐀀)"_x; // ok, calls #1
-int d = "test"_x; // expected-note {{in instantiation of function template specialization 'operator "" _x<char, 't', 'e', 's', 't'>' requested here}}
+int d = "test"_x; // expected-note {{in instantiation of function template specialization 'operator""_x<char, 't', 'e', 's', 't'>' requested here}}
 int e = uR"("тест 𐀀)"_x;
 int f = UR"("тест 𐀀)"_x;
-int g = UR"("тест_𐀀)"_x; // expected-note {{in instantiation of function template specialization 'operator "" _x<char32_t, 34, 1090, 1077, 1089, 1090, 95, 65536>' requested here}}
+int g = UR"("тест_𐀀)"_x; // expected-note {{in instantiation of function template specialization 'operator""_x<char32_t, 34, 1090, 1077, 1089, 1090, 95, 65536>' requested here}}
diff --git a/test/CXX/lex/lex.literal/lex.ext/p2.cpp b/test/CXX/lex/lex.literal/lex.ext/p2.cpp
index 3f3f796e556d..6942b68690c5 100644
--- a/test/CXX/lex/lex.literal/lex.ext/p2.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p2.cpp
@@ -3,14 +3,14 @@
 typedef decltype(sizeof(int)) size_t;
 
 // FIXME: These diagnostics should say 'size_t' instead of 'unsigned long'
-int a = 123_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'unsigned long long' or 'const char *', and no matching literal operator template}}
-int b = 4.2_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'long double' or 'const char *', and no matching literal operator template}}
-int c = "foo"_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with arguments of types 'const char *' and 'unsigned}}
-int d = L"foo"_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with arguments of types 'const wchar_t *' and 'unsigned}}
-int e = u8"foo"_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with arguments of types 'const char *' and 'unsigned}}
-int f = u"foo"_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with arguments of types 'const char16_t *' and 'unsigned}}
-int g = U"foo"_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with arguments of types 'const char32_t *' and 'unsigned}}
-int h = 'y'_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'char'}}
-int i = L'y'_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'wchar_t'}}
-int j = u'y'_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'char16_t'}}
-int k = U'y'_x; // expected-error {{no matching literal operator for call to 'operator "" _x' with argument of type 'char32_t'}}
+int a = 123_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'unsigned long long' or 'const char *', and no matching literal operator template}}
+int b = 4.2_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'long double' or 'const char *', and no matching literal operator template}}
+int c = "foo"_x; // expected-error {{no matching literal operator for call to 'operator""_x' with arguments of types 'const char *' and 'unsigned}}
+int d = L"foo"_x; // expected-error {{no matching literal operator for call to 'operator""_x' with arguments of types 'const wchar_t *' and 'unsigned}}
+int e = u8"foo"_x; // expected-error {{no matching literal operator for call to 'operator""_x' with arguments of types 'const char *' and 'unsigned}}
+int f = u"foo"_x; // expected-error {{no matching literal operator for call to 'operator""_x' with arguments of types 'const char16_t *' and 'unsigned}}
+int g = U"foo"_x; // expected-error {{no matching literal operator for call to 'operator""_x' with arguments of types 'const char32_t *' and 'unsigned}}
+int h = 'y'_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'char'}}
+int i = L'y'_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'wchar_t'}}
+int j = u'y'_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'char16_t'}}
+int k = U'y'_x; // expected-error {{no matching literal operator for call to 'operator""_x' with argument of type 'char32_t'}}
diff --git a/test/CXX/lex/lex.literal/lex.ext/p5.cpp b/test/CXX/lex/lex.literal/lex.ext/p5.cpp
index d364a37c6cf1..a516168a69b6 100644
--- a/test/CXX/lex/lex.literal/lex.ext/p5.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p5.cpp
@@ -6,7 +6,7 @@ int &operator "" _x1 (const char *);
 double &operator "" _x1 (const char *, size_t);
 double &i1 = "foo"_x1;
 double &i2 = u8"foo"_x1;
-double &i3 = L"foo"_x1; // expected-error {{no matching literal operator for call to 'operator "" _x1' with arguments of types 'const wchar_t *' and 'unsigned long'}}
+double &i3 = L"foo"_x1; // expected-error {{no matching literal operator for call to 'operator""_x1' with arguments of types 'const wchar_t *' and 'unsigned long'}}
 
 char &operator "" _x1(const wchar_t *, size_t);
 char &i4 = L"foo"_x1; // ok
diff --git a/test/CXX/lex/lex.literal/lex.ext/p7.cpp b/test/CXX/lex/lex.literal/lex.ext/p7.cpp
index be97f0cb922f..0b40ecdc143f 100644
--- a/test/CXX/lex/lex.literal/lex.ext/p7.cpp
+++ b/test/CXX/lex/lex.literal/lex.ext/p7.cpp
@@ -14,10 +14,10 @@ long double operator "" _w(long double);
 std::string operator "" _w(const char16_t*, size_t);
 unsigned operator "" _w(const char*);
 int main() {
-  auto v1 = 1.2_w;    // calls operator "" _w(1.2L)
-  auto v2 = u"one"_w; // calls operator "" _w(u"one", 3)
-  auto v3 = 12_w;     // calls operator "" _w("12")
-  "two"_w;            // expected-error {{no matching literal operator for call to 'operator "" _w' with arguments of types 'const char *' and 'unsigned long'}}
+  auto v1 = 1.2_w;    // calls operator""_w(1.2L)
+  auto v2 = u"one"_w; // calls operator""_w(u"one", 3)
+  auto v3 = 12_w;     // calls operator""_w("12")
+  "two"_w;            // expected-error {{no matching literal operator for call to 'operator""_w' with arguments of types 'const char *' and 'unsigned long'}}
 
   same_type<decltype(v1), long double> test1;
   same_type<decltype(v2), std::string> test2;
diff --git a/test/CXX/lex/lex.literal/lex.string/p4.cpp b/test/CXX/lex/lex.literal/lex.string/p4.cpp
new file mode 100644
index 000000000000..b73c8ed711a9
--- /dev/null
+++ b/test/CXX/lex/lex.literal/lex.string/p4.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
+// expected-no-diagnostics
+
+// NOTE: This file intentionally uses DOS-style line endings to test
+// that we don't propagate them into string literals as per [lex.string]p4.
+
+constexpr const char* p = R"(a\
+b
+c)";
+
+static_assert(p[0] == 'a',  "");
+static_assert(p[1] == '\\', "");
+static_assert(p[2] == '\n', "");
+static_assert(p[3] == 'b',  "");
+static_assert(p[4] == '\n', "");
+static_assert(p[5] == 'c',  "");
+static_assert(p[6] == '\0', "");
diff --git a/test/CXX/over/over.oper/over.literal/p2.cpp b/test/CXX/over/over.oper/over.literal/p2.cpp
index 00466fb311d5..f3ebadd2b8b9 100644
--- a/test/CXX/over/over.oper/over.literal/p2.cpp
+++ b/test/CXX/over/over.oper/over.literal/p2.cpp
@@ -37,7 +37,7 @@ template void operator "" _h<'a', 'b', 'c', 'd'>();
 namespace rdar13605348 {
 
 class C {
-  double operator"" _x(long double value) { return double(value); } // expected-error{{literal operator 'operator "" _x' must be in a namespace or global scope}}
+  double operator"" _x(long double value) { return double(value); } // expected-error{{literal operator 'operator""_x' must be in a namespace or global scope}}
   double value() { return 3.2_x; } // expected-error{{no matching literal operator for call to}}
 };
 
diff --git a/test/CXX/over/over.over/p2-resolve-single-template-id.cpp b/test/CXX/over/over.over/p2-resolve-single-template-id.cpp
index b1c819a93b42..e538b99a1eb0 100644
--- a/test/CXX/over/over.over/p2-resolve-single-template-id.cpp
+++ b/test/CXX/over/over.over/p2-resolve-single-template-id.cpp
@@ -180,12 +180,12 @@ namespace member_pointers {
     { bool b = &s.g<int>; }
 
     { bool b = S::h<42>; }
-    { bool b = S::h<int>; } // expected-error {{can't form member pointer of type 'bool' without '&' and class name}}
+    { bool b = S::h<int>; } // expected-error {{cannot form member pointer of type 'bool' without '&' and class name}}
     { bool b = &S::h<42>; }
     { bool b = &S::h<int>; }
     { bool b = s.h<42>; }
-    { bool b = s.h<int>; } // expected-error {{can't form member pointer of type 'bool' without '&' and class name}}
+    { bool b = s.h<int>; } // expected-error {{cannot form member pointer of type 'bool' without '&' and class name}}
     { bool b = &s.h<42>; }
-    { bool b = &s.h<int>; } // expected-error {{can't form member pointer of type 'bool' without '&' and class name}}
+    { bool b = &s.h<int>; } // expected-error {{cannot form member pointer of type 'bool' without '&' and class name}}
   }
 }
diff --git a/test/CXX/temp/temp.arg/temp.arg.nontype/p1.cpp b/test/CXX/temp/temp.arg/temp.arg.nontype/p1.cpp
index 3f70ca7c81ab..22ea018842d4 100644
--- a/test/CXX/temp/temp.arg/temp.arg.nontype/p1.cpp
+++ b/test/CXX/temp/temp.arg/temp.arg.nontype/p1.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple=x86_64-linux-gnu %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -triple=x86_64-linux-gnu %s -DCPP11ONLY
 
 // C++11 [temp.arg.nontype]p1:
 //
@@ -6,6 +7,8 @@
 //   be one of:
 //   -- an integral constant expression; or
 //   -- the name of a non-type template-parameter ; or
+#ifndef CPP11ONLY 
+
 namespace non_type_tmpl_param {
   template <int N> struct X0 { X0(); };
   template <int N> X0<N>::X0() { }
@@ -95,3 +98,14 @@ namespace bad_args {
   int* iptr = &i;
   X0<iptr> x0b; // expected-error{{non-type template argument for template parameter of pointer type 'int *' must have its address taken}}
 }
+#endif // CPP11ONLY
+
+namespace default_args {
+#ifdef CPP11ONLY
+namespace lambdas {
+template<int I = ([] { return 5; }())> //expected-error 2{{constant expression}} expected-note{{constant expression}}
+int f();
+}
+#endif // CPP11ONLY
+
+}
\ No newline at end of file
diff --git a/test/CXX/temp/temp.decls/temp.class/temp.static/p1-inst.cpp b/test/CXX/temp/temp.decls/temp.class/temp.static/p1-inst.cpp
index 9fc4a5809f35..86b2690860c4 100644
--- a/test/CXX/temp/temp.decls/temp.class/temp.static/p1-inst.cpp
+++ b/test/CXX/temp/temp.decls/temp.class/temp.static/p1-inst.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // Test instantiation of static data members declared out-of-line.
 
@@ -15,6 +17,9 @@ struct InitOkay {
 };
 
 struct CannotInit { }; // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
 int &returnInt() { return X<int>::value; }
 float &returnFloat() { return X<float>::value; }
diff --git a/test/CXX/temp/temp.decls/temp.class/temp.static/p1.cpp b/test/CXX/temp/temp.decls/temp.class/temp.static/p1.cpp
index b0305dd756a6..215f48d9d8e6 100644
--- a/test/CXX/temp/temp.decls/temp.class/temp.static/p1.cpp
+++ b/test/CXX/temp/temp.decls/temp.class/temp.static/p1.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 template<typename T>
 struct X0 {
@@ -13,6 +15,9 @@ struct X1 {
 };
 
 struct X2 { }; // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
 int& get_int() { return X0<int>::value; }
 X1& get_X1() { return X0<X1>::value; }
diff --git a/test/CXX/temp/temp.fct.spec/temp.arg.explicit/p3.cpp b/test/CXX/temp/temp.fct.spec/temp.arg.explicit/p3.cpp
index 5556f35dbfd0..bfe08a67a416 100644
--- a/test/CXX/temp/temp.fct.spec/temp.arg.explicit/p3.cpp
+++ b/test/CXX/temp/temp.fct.spec/temp.arg.explicit/p3.cpp
@@ -1,11 +1,24 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 template<class X, class Y, class Z> X f(Y,Z); // expected-note {{candidate template ignored: couldn't infer template argument 'X'}}
 
 void g() {
-  f<int,char*,double>("aa",3.0); // expected-warning{{conversion from string literal to 'char *' is deprecated}}
-  f<int,char*>("aa",3.0); // Z is deduced to be double  \
-                          // expected-warning{{conversion from string literal to 'char *' is deprecated}}
+  f<int,char*,double>("aa",3.0);
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{conversion from string literal to 'char *' is deprecated}}
+#else
+  // expected-warning@-4{{ISO C++11 does not allow conversion from string literal to 'char *'}}
+#endif
+
+  f<int,char*>("aa",3.0); // Z is deduced to be double
+#if __cplusplus <= 199711L
+  // expected-warning@-2{{conversion from string literal to 'char *' is deprecated}}
+#else
+  // expected-warning@-4{{ISO C++11 does not allow conversion from string literal to 'char *'}}
+#endif
+ 
   f<int>("aa",3.0);       // Y is deduced to be char*, and
                           // Z is deduced to be double 
   f("aa",3.0); // expected-error{{no matching}}
diff --git a/test/CXX/temp/temp.param/p3.cpp b/test/CXX/temp/temp.param/p3.cpp
index dc40c4bb9090..c3c93396cb13 100644
--- a/test/CXX/temp/temp.param/p3.cpp
+++ b/test/CXX/temp/temp.param/p3.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // A type-parameter defines its identifier to be a type-name (if
 // declared with class or typename) or template-name (if declared with
@@ -16,6 +18,10 @@ template<template<class T> class Y> struct X1 {
 // type-parameter (because its identifier is the name of an already
 // existing class) is taken as a type-parameter. For example, 
 class T { /* ... */ };  // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
 int i; 
 
 template<class T, T i> struct X2 {
diff --git a/test/CXX/temp/temp.res/temp.local/p3.cpp b/test/CXX/temp/temp.res/temp.local/p3.cpp
index d5e378660596..63c40fb57690 100644
--- a/test/CXX/temp/temp.res/temp.local/p3.cpp
+++ b/test/CXX/temp/temp.res/temp.local/p3.cpp
@@ -14,8 +14,7 @@ template <class T> struct Derived: Base<int>, Base<char> {
     t->Derived::Base<T>::f();
     t->Base<T>::f();
     t->Base::f(); // expected-error{{member 'Base' found in multiple base classes of different types}} \
-    // expected-error{{no member named 'f' in 'X0'}} \
-    // expected-error{{'Base' is not a class, namespace, or enumeration}}
+    // expected-error{{no member named 'f' in 'X0'}}
   }
 };
 
diff --git a/test/CXX/temp/temp.res/temp.local/p6.cpp b/test/CXX/temp/temp.res/temp.local/p6.cpp
index eccbb8993213..06eb1bef7fea 100644
--- a/test/CXX/temp/temp.res/temp.local/p6.cpp
+++ b/test/CXX/temp/temp.res/temp.local/p6.cpp
@@ -1,9 +1,11 @@
 // RUN: %clang_cc1 -verify %s -fcxx-exceptions -std=c++1y
 
+namespace N {}
+
 template<typename T, // expected-note {{declared here}}
          typename T> struct X {}; // expected-error {{declaration of 'T' shadows template parameter}}
 
-template<typename T> struct Y { // expected-note 15{{declared here}}
+template<typename T> struct Y { // expected-note 16{{declared here}}
   template<typename T> struct A {}; // expected-error {{declaration of 'T' shadows template parameter}}
 
   struct B {
@@ -50,6 +52,9 @@ template<typename T> struct Y { // expected-note 15{{declared here}}
   void d() {
     void T(); // expected-error {{declaration of 'T' shadows template parameter}}
   }
+  void e() {
+    namespace T = N; // expected-error {{declaration of 'T' shadows template parameter}}
+  }
 
   friend struct T; // expected-error {{declaration of 'T' shadows template parameter}}
 };
diff --git a/test/CXX/temp/temp.spec/temp.explicit/p1.cpp b/test/CXX/temp/temp.spec/temp.explicit/p1.cpp
index b42633924e82..5a77d27d5ab6 100644
--- a/test/CXX/temp/temp.spec/temp.explicit/p1.cpp
+++ b/test/CXX/temp/temp.spec/temp.explicit/p1.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 struct C { };
 
@@ -49,6 +51,9 @@ template void X1<int>::f<>(int&, int*); // expected-note{{instantiation}}
 // Explicitly instantiate members of a class template
 struct Incomplete; // expected-note{{forward declaration}}
 struct NonDefaultConstructible { // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
   NonDefaultConstructible(int); // expected-note{{candidate constructor}}
 };
 
diff --git a/test/CodeCompletion/macros-in-modules.c b/test/CodeCompletion/macros-in-modules.c
index 82ffaae7cb10..f10743af2573 100644
--- a/test/CodeCompletion/macros-in-modules.c
+++ b/test/CodeCompletion/macros-in-modules.c
@@ -2,7 +2,7 @@
 // RUN: echo 'module Foo { header "foo.h" }' > %t/module.modulemap
 // RUN: echo '#define FOO_MACRO 42' > %t/foo.h
 // RUN: c-index-test -code-completion-at=%s:9:1 -I %t %s | FileCheck %s
-// RUN: c-index-test -code-completion-at=%s:9:1 -I %t -fmodules %s | FileCheck %s
+// RUN: c-index-test -code-completion-at=%s:9:1 -I %t -fmodules -fmodules-cache-path=%t %s | FileCheck %s
 
 #include "foo.h"
 int x =
diff --git a/test/CodeCompletion/macros-in-modules.m b/test/CodeCompletion/macros-in-modules.m
index 8d6b3753d043..d845c26a74cb 100644
--- a/test/CodeCompletion/macros-in-modules.m
+++ b/test/CodeCompletion/macros-in-modules.m
@@ -1,7 +1,7 @@
 // RUN: rm -rf %t && mkdir %t
 // RUN: echo 'module Foo { header "foo.h" }' > %t/module.modulemap
 // RUN: echo '#define FOO_MACRO 42' > %t/foo.h
-// RUN: c-index-test -code-completion-at=%s:8:1 -I %t -fmodules %s | FileCheck %s
+// RUN: c-index-test -code-completion-at=%s:8:1 -I %t -fmodules-cache-path=%t -fmodules %s | FileCheck %s
 
 @import Foo;
 int x =
diff --git a/test/CodeCompletion/ordinary-name-cxx11.cpp b/test/CodeCompletion/ordinary-name-cxx11.cpp
new file mode 100644
index 000000000000..8e6f38322384
--- /dev/null
+++ b/test/CodeCompletion/ordinary-name-cxx11.cpp
@@ -0,0 +1,252 @@
+struct X { int x; };
+void z(int);
+typedef struct t TYPEDEF;
+
+void foo() {
+  int y = 17;
+  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:14 -std=gnu++11 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+  // CHECK-CC1: COMPLETION: bool
+  // CHECK-CC1-NEXT: COMPLETION: char
+  // CHECK-CC1-NEXT: COMPLETION: char16
+  // CHECK-CC1-NEXT: COMPLETION: char32
+  // CHECK-CC1-NEXT: COMPLETION: class
+  // CHECK-CC1-NEXT: COMPLETION: const
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
+  // CHECK-CC1: COMPLETION: Pattern : [#void#]delete <#expression#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-CC1: COMPLETION: double
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : dynamic_cast<<#type#>>(<#expression#>)
+  // CHECK-CC1-NEXT: COMPLETION: enum
+  // CHECK-CC1-NEXT: COMPLETION: extern
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]false
+  // CHECK-CC1-NEXT: COMPLETION: float
+  // CHECK-CC1-NEXT: COMPLETION: foo : [#void#]foo()
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-CC1: COMPLETION: Pattern : goto <#label#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-CC1: COMPLETION: int
+  // CHECK-CC1-NEXT: COMPLETION: long
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : new <#type#>[<#size#>](<#expressions#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]noexcept(<#expression#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::nullptr_t#]nullptr
+  // CHECK-CC1-NEXT: COMPLETION: operator
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : reinterpret_cast<<#type#>>(<#expression#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : return
+  // CHECK-CC1-NEXT: COMPLETION: short
+  // CHECK-CC1-NEXT: COMPLETION: signed
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#size_t#]sizeof(<#expression-or-type#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#size_t#]sizeof...(<#parameter-pack#>)
+  // CHECK-CC1-NEXT: COMPLETION: static
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
+  // CHECK-CC1-NEXT: COMPLETION: struct
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-CC1: COMPLETION: t : t
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#bool#]true
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : try{<#statements#>
+  // CHECK-CC1: COMPLETION: TYPEDEF : TYPEDEF
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typeof <#expression#>
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : typeof(<#type#>)
+  // CHECK-CC1-NEXT: COMPLETION: union
+  // CHECK-CC1-NEXT: COMPLETION: unsigned
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : using namespace <#identifier#>
+  // CHECK-CC1-NEXT: COMPLETION: void
+  // CHECK-CC1-NEXT: COMPLETION: volatile
+  // CHECK-CC1-NEXT: COMPLETION: wchar_t
+  // CHECK-CC1-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-CC1: COMPLETION: X : X
+  // CHECK-CC1-NEXT: COMPLETION: y : [#int#]y
+  // CHECK-CC1-NEXT: COMPLETION: z : [#void#]z(<#int#>)
+
+  // RUN: %clang_cc1 -fsyntax-only  -code-completion-patterns -code-completion-at=%s:4:1 -std=gnu++11 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+  // CHECK-CC2: COMPLETION: Pattern : asm(<#string-literal#>)
+  // CHECK-CC2: COMPLETION: auto
+  // CHECK-CC2-NEXT: COMPLETION: bool
+  // CHECK-CC2-NEXT: COMPLETION: char
+  // CHECK-CC2-NEXT: COMPLETION: char16
+  // CHECK-CC2-NEXT: COMPLETION: char32
+  // CHECK-CC2-NEXT: COMPLETION: class
+  // CHECK-CC2-NEXT: COMPLETION: const
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : decltype(<#expression#>)
+  // CHECK-CC2-NEXT: COMPLETION: double
+  // CHECK-CC2-NEXT: COMPLETION: enum
+  // CHECK-CC2-NEXT: COMPLETION: extern
+  // CHECK-CC2-NEXT: COMPLETION: float
+  // CHECK-CC2-NEXT: COMPLETION: inline
+  // CHECK-CC2-NEXT: COMPLETION: int
+  // CHECK-CC2-NEXT: COMPLETION: long
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : namespace <#identifier#>{<#declarations#>
+  // CHECK-CC2: COMPLETION: Pattern : namespace <#name#> = <#namespace#>
+  // CHECK-CC2-NEXT: COMPLETION: operator
+  // CHECK-CC2-NEXT: COMPLETION: short
+  // CHECK-CC2-NEXT: COMPLETION: signed
+  // CHECK-CC2-NEXT: COMPLETION: static
+  // CHECK-CC2-NEXT: COMPLETION: struct
+  // CHECK-CC2-NEXT: COMPLETION: t : t
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : template <#declaration#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : template<<#parameters#>>
+  // CHECK-CC2-NEXT: COMPLETION: TYPEDEF : TYPEDEF
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof <#expression#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : typeof(<#type#>)
+  // CHECK-CC2-NEXT: COMPLETION: union
+  // CHECK-CC2-NEXT: COMPLETION: unsigned
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : using namespace <#identifier#>
+  // CHECK-CC2-NEXT: COMPLETION: Pattern : using <#qualifier#>::<#name#>
+  // CHECK-CC2-NEXT: COMPLETION: void
+  // CHECK-CC2-NEXT: COMPLETION: volatile
+  // CHECK-CC2-NEXT: COMPLETION: wchar_t
+  // CHECK-CC2-NEXT: COMPLETION: X : X
+
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:1:19 -std=gnu++11 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+  // CHECK-CC3: COMPLETION: bool
+  // CHECK-CC3-NEXT: COMPLETION: char
+  // CHECK-CC3-NEXT: COMPLETION: char16_t
+  // CHECK-CC3-NEXT: COMPLETION: char32_t
+  // CHECK-CC3-NEXT: COMPLETION: class
+  // CHECK-CC3-NEXT: COMPLETION: const
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : decltype(<#expression#>)
+  // CHECK-CC3-NEXT: COMPLETION: double
+  // CHECK-CC3-NEXT: COMPLETION: enum
+  // CHECK-CC3-NEXT: COMPLETION: explicit
+  // CHECK-CC3-NEXT: COMPLETION: extern
+  // CHECK-CC3-NEXT: COMPLETION: float
+  // CHECK-CC3-NEXT: COMPLETION: friend
+  // CHECK-CC3-NEXT: COMPLETION: inline
+  // CHECK-CC3-NEXT: COMPLETION: int
+  // CHECK-CC3-NEXT: COMPLETION: long
+  // CHECK-CC3-NEXT: COMPLETION: mutable
+  // CHECK-CC3-NEXT: COMPLETION: operator
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : private: 
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : protected: 
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : public: 
+  // CHECK-CC3-NEXT: COMPLETION: short
+  // CHECK-CC3-NEXT: COMPLETION: signed
+  // CHECK-CC3-NEXT: COMPLETION: static
+  // CHECK-CC3-NEXT: COMPLETION: struct
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : template<<#parameters#>>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof <#expression#>
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : typeof(<#type#>)
+  // CHECK-CC3-NEXT: COMPLETION: union
+  // CHECK-CC3-NEXT: COMPLETION: unsigned
+  // CHECK-CC3-NEXT: COMPLETION: Pattern : using <#qualifier#>::<#name#>
+  // CHECK-CC3-NEXT: COMPLETION: virtual
+  // CHECK-CC3-NEXT: COMPLETION: void
+  // CHECK-CC3-NEXT: COMPLETION: volatile
+  // CHECK-CC3-NEXT: COMPLETION: wchar_t
+  // CHECK-CC3-NEXT: COMPLETION: X : X
+
+  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:11 -std=gnu++11 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
+  // CHECK-CC4: COMPLETION: bool
+  // CHECK-CC4-NEXT: COMPLETION: char
+  // CHECK-CC4-NEXT: COMPLETION: char16_t
+  // CHECK-CC4-NEXT: COMPLETION: char32_t
+  // CHECK-CC4-NEXT: COMPLETION: class
+  // CHECK-CC4-NEXT: COMPLETION: const
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : decltype(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#void#]delete <#expression#>
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
+  // CHECK-CC4-NEXT: COMPLETION: double
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : dynamic_cast<<#type#>>(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: enum
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#bool#]false
+  // CHECK-CC4-NEXT: COMPLETION: float
+  // CHECK-CC4-NEXT: COMPLETION: foo : [#void#]foo()
+  // CHECK-CC4-NEXT: COMPLETION: int
+  // CHECK-CC4-NEXT: COMPLETION: long
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : new <#type#>[<#size#>](<#expressions#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#bool#]noexcept(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#std::nullptr_t#]nullptr
+  // CHECK-CC4-NEXT: COMPLETION: operator
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : reinterpret_cast<<#type#>>(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: short
+  // CHECK-CC4-NEXT: COMPLETION: signed
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#size_t#]sizeof(<#expression-or-type#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#size_t#]sizeof...(<#parameter-pack#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
+  // CHECK-CC4-NEXT: COMPLETION: struct
+  // CHECK-CC4-NEXT: COMPLETION: t : t
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#void#]throw <#expression#>
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#bool#]true
+  // CHECK-CC4-NEXT: COMPLETION: TYPEDEF : TYPEDEF
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : [#std::type_info#]typeid(<#expression-or-type#>)
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : typeof <#expression#>
+  // CHECK-CC4-NEXT: COMPLETION: Pattern : typeof(<#type#>)
+  // CHECK-CC4-NEXT: COMPLETION: union
+  // CHECK-CC4-NEXT: COMPLETION: unsigned
+  // CHECK-CC4-NEXT: COMPLETION: void
+  // CHECK-CC4-NEXT: COMPLETION: volatile
+  // CHECK-CC4-NEXT: COMPLETION: wchar_t
+  // CHECK-CC4-NEXT: COMPLETION: X : X
+  // CHECK-CC4-NEXT: COMPLETION: y : [#int#]y
+  // CHECK-CC4-NEXT: COMPLETION: z : [#void#]z(<#int#>)
+
+  // RUN: %clang_cc1 -fsyntax-only -fno-rtti -code-completion-patterns -code-completion-at=%s:6:14 -std=gnu++11 %s -o - | FileCheck -check-prefix=CHECK-NO-RTTI %s
+  // CHECK-NO-RTTI: COMPLETION: bool
+  // CHECK-NO-RTTI-NEXT: COMPLETION: char
+  // CHECK-NO-RTTI-NEXT: COMPLETION: char16_t
+  // CHECK-NO-RTTI-NEXT: COMPLETION: char32_t
+  // CHECK-NO-RTTI-NEXT: COMPLETION: class
+  // CHECK-NO-RTTI-NEXT: COMPLETION: const
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : const_cast<<#type#>>(<#expression#>)
+  // CHECK-NO-RTTI: COMPLETION: Pattern : [#void#]delete <#expression#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#void#]delete [] <#expression#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : do{<#statements#>
+  // CHECK-NO-RTTI: COMPLETION: double
+  // CHECK-NO-RTTI-NOT: dynamic_cast
+  // CHECK-NO-RTTI: COMPLETION: enum
+  // CHECK-NO-RTTI-NEXT: COMPLETION: extern
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#bool#]false
+  // CHECK-NO-RTTI-NEXT: COMPLETION: float
+  // CHECK-NO-RTTI-NEXT: COMPLETION: foo : [#void#]foo()
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : for(<#init-statement#>;<#condition#>;<#inc-expression#>){
+  // CHECK-NO-RTTI: COMPLETION: Pattern : goto <#label#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : if(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI: COMPLETION: int
+  // CHECK-NO-RTTI-NEXT: COMPLETION: long
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : new <#type#>(<#expressions#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : new <#type#>[<#size#>](<#expressions#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#bool#]noexcept(<#expression#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#std::nullptr_t#]nullptr
+  // CHECK-NO-RTTI-NEXT: COMPLETION: operator
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : reinterpret_cast<<#type#>>(<#expression#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : return
+  // CHECK-NO-RTTI-NEXT: COMPLETION: short
+  // CHECK-NO-RTTI-NEXT: COMPLETION: signed
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#size_t#]sizeof(<#expression-or-type#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : [#size_t#]sizeof...(<#parameter-pack#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: static
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : static_cast<<#type#>>(<#expression#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: struct
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : switch(<#condition#>){
+  // CHECK-NO-RTTI: COMPLETION: t : t
+  // CHECK-NO-RTTI-NOT: throw
+  // CHECK-NO-RTTI: COMPLETION: Pattern : [#bool#]true
+  // CHECK-NO-RTTI-NOT: try
+  // CHECK-NO-RTTI: COMPLETION: TYPEDEF : TYPEDEF
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typedef <#type#> <#name#>
+  // CHECK-NO-RTTI-NOT: typeid
+  // CHECK-NO-RTTI: COMPLETION: Pattern : typename <#qualifier#>::<#name#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typeof <#expression#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : typeof(<#type#>)
+  // CHECK-NO-RTTI-NEXT: COMPLETION: union
+  // CHECK-NO-RTTI-NEXT: COMPLETION: unsigned
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : using namespace <#identifier#>
+  // CHECK-NO-RTTI-NEXT: COMPLETION: void
+  // CHECK-NO-RTTI-NEXT: COMPLETION: volatile
+  // CHECK-NO-RTTI-NEXT: COMPLETION: wchar_t
+  // CHECK-NO-RTTI-NEXT: COMPLETION: Pattern : while(<#condition#>){<#statements#>
+  // CHECK-NO-RTTI: COMPLETION: X : X
+  // CHECK-NO-RTTI-NEXT: COMPLETION: y : [#int#]y
+  // CHECK-NO-RTTI-NEXT: COMPLETION: z : [#void#]z(<#int#>)
diff --git a/test/CodeCompletion/ordinary-name.cpp b/test/CodeCompletion/ordinary-name.cpp
index d0b09b5e22d1..03dbbcaf7448 100644
--- a/test/CodeCompletion/ordinary-name.cpp
+++ b/test/CodeCompletion/ordinary-name.cpp
@@ -4,7 +4,7 @@ typedef struct t TYPEDEF;
 
 void foo() {
   int y = 17;
-  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:14 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:14 -std=gnu++98 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
   // CHECK-CC1: COMPLETION: bool
   // CHECK-CC1-NEXT: COMPLETION: char
   // CHECK-CC1-NEXT: COMPLETION: class
@@ -58,7 +58,7 @@ void foo() {
   // CHECK-CC1-NEXT: COMPLETION: y : [#int#]y
   // CHECK-CC1-NEXT: COMPLETION: z : [#void#]z(<#int#>)
 
-  // RUN: %clang_cc1 -fsyntax-only  -code-completion-patterns -code-completion-at=%s:4:1 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+  // RUN: %clang_cc1 -fsyntax-only  -code-completion-patterns -code-completion-at=%s:4:1 -std=gnu++98 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
   // CHECK-CC2: COMPLETION: Pattern : asm(<#string-literal#>)
   // CHECK-CC2-NEXT: COMPLETION: bool
   // CHECK-CC2-NEXT: COMPLETION: char
@@ -95,7 +95,7 @@ void foo() {
   // CHECK-CC2-NEXT: COMPLETION: wchar_t
   // CHECK-CC2-NEXT: COMPLETION: X : X
 
-  // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:1:19 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+  // RUN: %clang_cc1 -fsyntax-only -code-completion-patterns -code-completion-at=%s:1:19 -std=gnu++98 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
   // CHECK-CC3: COMPLETION: bool
   // CHECK-CC3-NEXT: COMPLETION: char
   // CHECK-CC3-NEXT: COMPLETION: class
@@ -132,7 +132,7 @@ void foo() {
   // CHECK-CC3-NEXT: COMPLETION: wchar_t
   // CHECK-CC3-NEXT: COMPLETION: X : X
 
-  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:11 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
+  // RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -code-completion-patterns -code-completion-at=%s:6:11 -std=gnu++98 %s -o - | FileCheck -check-prefix=CHECK-CC4 %s
   // CHECK-CC4: COMPLETION: bool
   // CHECK-CC4-NEXT: COMPLETION: char
   // CHECK-CC4-NEXT: COMPLETION: class
@@ -174,7 +174,7 @@ void foo() {
   // CHECK-CC4-NEXT: COMPLETION: y : [#int#]y
   // CHECK-CC4-NEXT: COMPLETION: z : [#void#]z(<#int#>)
 
-  // RUN: %clang_cc1 -fsyntax-only -fno-rtti -code-completion-patterns -code-completion-at=%s:6:14 %s -o - | FileCheck -check-prefix=CHECK-NO-RTTI %s
+  // RUN: %clang_cc1 -fsyntax-only -fno-rtti -code-completion-patterns -code-completion-at=%s:6:14 -std=gnu++98 %s -o - | FileCheck -check-prefix=CHECK-NO-RTTI %s
   // CHECK-NO-RTTI: COMPLETION: bool
   // CHECK-NO-RTTI-NEXT: COMPLETION: char
   // CHECK-NO-RTTI-NEXT: COMPLETION: class
diff --git a/test/CodeGen/2003-12-14-ExternInlineSupport.c b/test/CodeGen/2003-12-14-ExternInlineSupport.c
index eb3859c38092..cf01fd1ecbd1 100644
--- a/test/CodeGen/2003-12-14-ExternInlineSupport.c
+++ b/test/CodeGen/2003-12-14-ExternInlineSupport.c
@@ -1,3 +1,4 @@
-// RUN: %clang_cc1 -std=gnu89 %s -emit-llvm -o - | not grep dead_function
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=gnu89 %s -emit-llvm -o - | FileCheck %s
 
 extern __inline__ void dead_function() {}
+// CHECK-NOT: dead_function
diff --git a/test/CodeGen/2006-01-13-Includes.c b/test/CodeGen/2006-01-13-Includes.c
index 9cc45cec55ef..4b50526b9430 100644
--- a/test/CodeGen/2006-01-13-Includes.c
+++ b/test/CodeGen/2006-01-13-Includes.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -g -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=limited -emit-llvm -o - | FileCheck %s
 // PR676
 
 int printf(const char * restrict format, ...);
diff --git a/test/CodeGen/2007-05-11-str-const.c b/test/CodeGen/2007-05-11-str-const.c
index 731496d34686..5c3039ca0329 100644
--- a/test/CodeGen/2007-05-11-str-const.c
+++ b/test/CodeGen/2007-05-11-str-const.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s  -o /dev/null
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s  -o /dev/null
 
 static unsigned char out[]={0,1};
 static const unsigned char str1[]="1";
diff --git a/test/CodeGen/2009-01-21-InvalidIterator.c b/test/CodeGen/2009-01-21-InvalidIterator.c
index f857b4d8bd00..83353da68beb 100644
--- a/test/CodeGen/2009-01-21-InvalidIterator.c
+++ b/test/CodeGen/2009-01-21-InvalidIterator.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 
 typedef long unsigned int size_t;
 typedef unsigned short int uint16_t;
diff --git a/test/CodeGen/2009-03-13-dbg.c b/test/CodeGen/2009-03-13-dbg.c
index 8f48830e8e2f..5a1f294e4ba0 100644
--- a/test/CodeGen/2009-03-13-dbg.c
+++ b/test/CodeGen/2009-03-13-dbg.c
@@ -1,2 +1,2 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 void foo() {}
diff --git a/test/CodeGen/2009-04-23-dbg.c b/test/CodeGen/2009-04-23-dbg.c
index 704aba244fc1..69c38b2d4498 100644
--- a/test/CodeGen/2009-04-23-dbg.c
+++ b/test/CodeGen/2009-04-23-dbg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -g -o %t %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -o %t %s
 # 1 "a.c"
 # 1 "a.c" 1
 # 1 "<built-in>" 1
diff --git a/test/CodeGen/2009-07-31-DbgDeclare.c b/test/CodeGen/2009-07-31-DbgDeclare.c
index 3ccb2630a49c..b1d8220c0271 100644
--- a/test/CodeGen/2009-07-31-DbgDeclare.c
+++ b/test/CodeGen/2009-07-31-DbgDeclare.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -g -o %t.s %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -o %t.s %s
 void foo() {
      int i = 0;
      i = 42;
diff --git a/test/CodeGen/2010-01-14-FnType-DebugInfo.c b/test/CodeGen/2010-01-14-FnType-DebugInfo.c
index 964c031d27c0..5cb0015d02f7 100644
--- a/test/CodeGen/2010-01-14-FnType-DebugInfo.c
+++ b/test/CodeGen/2010-01-14-FnType-DebugInfo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o /dev/null
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o /dev/null
 typedef void (*sigcatch_t)( struct sigcontext *);
 sigcatch_t sigcatch[50] = {(sigcatch_t) 0};
 
diff --git a/test/CodeGen/2010-01-18-Inlined-Debug.c b/test/CodeGen/2010-01-18-Inlined-Debug.c
index bdc6fc5267e7..d763744d508b 100644
--- a/test/CodeGen/2010-01-18-Inlined-Debug.c
+++ b/test/CodeGen/2010-01-18-Inlined-Debug.c
@@ -1,5 +1,5 @@
 // PR: 6058
-// RUN: %clang_cc1 -g -emit-llvm %s -o /dev/null
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm %s -o /dev/null
 
 static inline int foo(double) __attribute__ ((always_inline));
 static inline int foo(double __x) { return __x; }
diff --git a/test/CodeGen/2010-02-10-PointerName.c b/test/CodeGen/2010-02-10-PointerName.c
index 2321c01c6f6e..e5f668413e19 100644
--- a/test/CodeGen/2010-02-10-PointerName.c
+++ b/test/CodeGen/2010-02-10-PointerName.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -g -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -debug-info-kind=limited -o - | FileCheck %s
 // CHECK: DW_TAG_pointer_type
 // CHECK-NOT: {"char"}
 
diff --git a/test/CodeGen/2010-02-15-DbgStaticVar.c b/test/CodeGen/2010-02-15-DbgStaticVar.c
index 273385a3a942..a1bfa62f981e 100644
--- a/test/CodeGen/2010-02-15-DbgStaticVar.c
+++ b/test/CodeGen/2010-02-15-DbgStaticVar.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Test to check intentionally empty linkage name for a static variable.
 // Radar 7651244.
 static int foo(int a)
diff --git a/test/CodeGen/2010-02-16-DbgScopes.c b/test/CodeGen/2010-02-16-DbgScopes.c
index 3c33bae8b19a..4188f7417c3f 100644
--- a/test/CodeGen/2010-02-16-DbgScopes.c
+++ b/test/CodeGen/2010-02-16-DbgScopes.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g < %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited < %s | FileCheck %s
 // Test to check number of lexical scope identified in debug info.
 // CHECK: !DILexicalBlock(
 // CHECK: !DILexicalBlock(
diff --git a/test/CodeGen/2010-03-5-LexicalScope.c b/test/CodeGen/2010-03-5-LexicalScope.c
index 007be7684ba3..c0da9f0f0cc3 100644
--- a/test/CodeGen/2010-03-5-LexicalScope.c
+++ b/test/CodeGen/2010-03-5-LexicalScope.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // CHECK: !DILexicalBlock(
 // CHECK: !DILexicalBlock(
 int foo(int i) {
diff --git a/test/CodeGen/2010-07-08-DeclDebugLineNo.c b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
index 386c2c333acb..94c5e659920a 100644
--- a/test/CodeGen/2010-07-08-DeclDebugLineNo.c
+++ b/test/CodeGen/2010-07-08-DeclDebugLineNo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // Insure that dbg.declare lines for locals refer to correct line number records.
 // Radar 8152866.
 void foo() {
diff --git a/test/CodeGen/2010-08-10-DbgConstant.c b/test/CodeGen/2010-08-10-DbgConstant.c
index 04956ae0f276..cbc1841cf187 100644
--- a/test/CodeGen/2010-08-10-DbgConstant.c
+++ b/test/CodeGen/2010-08-10-DbgConstant.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -emit-llvm -g  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited  %s -o - | FileCheck %s
 // CHECK: !DIGlobalVariable(
 
 static const unsigned int ro = 201;
diff --git a/test/CodeGen/3dnow-builtins.c b/test/CodeGen/3dnow-builtins.c
index f53b85c931df..d534349a7475 100644
--- a/test/CodeGen/3dnow-builtins.c
+++ b/test/CodeGen/3dnow-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +3dnow -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-unknown -target-feature +3dnowa -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/Inputs/stdio.h b/test/CodeGen/Inputs/stdio.h
index cfe359597b7b..fc49fd824703 100644
--- a/test/CodeGen/Inputs/stdio.h
+++ b/test/CodeGen/Inputs/stdio.h
@@ -5,5 +5,5 @@ extern int vprintf(const char *format, __builtin_va_list arg);
 extern __inline __attribute__((gnu_inline,always_inline)) int
 vprintf(const char *x, __builtin_va_list y)
 {
-  return vfprintf (0, 0, 0);
+  return vfprintf (0, 0, y);
 }
diff --git a/test/CodeGen/Nontemporal.cpp b/test/CodeGen/Nontemporal.cpp
new file mode 100644
index 000000000000..4ddb9a1cbb60
--- /dev/null
+++ b/test/CodeGen/Nontemporal.cpp
@@ -0,0 +1,48 @@
+// Test frontend handling of nontemporal builtins.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+float __attribute__((vector_size(16))) vf1, vf2;
+char __attribute__((vector_size(8))) vc1, vc2;
+bool b1, b2;
+
+void test_all_sizes(void)                 // CHECK-LABEL: test_all_sizes
+{
+  __builtin_nontemporal_store(true, &b1); // CHECK: store i8 1, i8* @b1, align 1, !nontemporal
+  __builtin_nontemporal_store(b1, &b2);   // CHECK: store i8{{.*}}, align 1, !nontemporal
+  __builtin_nontemporal_store(1, &uc);    // CHECK: store i8{{.*}}align 1, !nontemporal
+  __builtin_nontemporal_store(1, &sc);    // CHECK: store i8{{.*}}align 1, !nontemporal
+  __builtin_nontemporal_store(1, &us);    // CHECK: store i16{{.*}}align 2, !nontemporal
+  __builtin_nontemporal_store(1, &ss);    // CHECK: store i16{{.*}}align 2, !nontemporal
+  __builtin_nontemporal_store(1, &ui);    // CHECK: store i32{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1, &si);    // CHECK: store i32{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1, &ull);   // CHECK: store i64{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(1, &sll);   // CHECK: store i64{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(1.0, &f1);  // CHECK: store float{{.*}}align 4, !nontemporal
+  __builtin_nontemporal_store(1.0, &d1);  // CHECK: store double{{.*}}align 8, !nontemporal
+  __builtin_nontemporal_store(vf1, &vf2); // CHECK: store <4 x float>{{.*}}align 16, !nontemporal
+  __builtin_nontemporal_store(vc1, &vc2); // CHECK: store <8 x i8>{{.*}}align 8, !nontemporal
+
+  b1 = __builtin_nontemporal_load(&b2);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  uc = __builtin_nontemporal_load(&sc);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  sc = __builtin_nontemporal_load(&uc);   // CHECK: load i8{{.*}}align 1, !nontemporal
+  us = __builtin_nontemporal_load(&ss);   // CHECK: load i16{{.*}}align 2, !nontemporal
+  ss = __builtin_nontemporal_load(&us);   // CHECK: load i16{{.*}}align 2, !nontemporal
+  ui = __builtin_nontemporal_load(&si);   // CHECK: load i32{{.*}}align 4, !nontemporal
+  si = __builtin_nontemporal_load(&ui);   // CHECK: load i32{{.*}}align 4, !nontemporal
+  ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal
+  sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal
+  f1 = __builtin_nontemporal_load(&f2);   // CHECK: load float{{.*}}align 4, !nontemporal
+  d1 = __builtin_nontemporal_load(&d2);   // CHECK: load double{{.*}}align 8, !nontemporal
+  vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
+  vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
+}
diff --git a/test/CodeGen/aarch64-neon-vget.c b/test/CodeGen/aarch64-neon-vget.c
new file mode 100644
index 000000000000..83c64943daa9
--- /dev/null
+++ b/test/CodeGen/aarch64-neon-vget.c
@@ -0,0 +1,348 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple arm64-apple-darwin -target-feature +neon \
+// RUN:   -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+#include <arm_neon.h>
+
+uint8_t test_vget_lane_u8(uint8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_u8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_u8(a, 7);
+}
+
+uint16_t test_vget_lane_u16(uint16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_u16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_u16(a, 3);
+}
+
+uint32_t test_vget_lane_u32(uint32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_u32:
+  // CHECK-NEXT:  mov.s  w0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_u32(a, 1);
+}
+
+int8_t test_vget_lane_s8(int8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_s8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_s8(a, 7);
+}
+
+int16_t test_vget_lane_s16(int16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_s16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_s16(a, 3);
+}
+
+int32_t test_vget_lane_s32(int32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_s32:
+  // CHECK-NEXT:  mov.s  w0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_s32(a, 1);
+}
+
+poly8_t test_vget_lane_p8(poly8x8_t a) {
+  // CHECK-LABEL: test_vget_lane_p8:
+  // CHECK-NEXT:  umov.b w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vget_lane_p8(a, 7);
+}
+
+poly16_t test_vget_lane_p16(poly16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_p16:
+  // CHECK-NEXT:  umov.h w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vget_lane_p16(a, 3);
+}
+
+float32_t test_vget_lane_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vget_lane_f32:
+  // CHECK-NEXT:  mov s0, v0[1]
+  // CHECK-NEXT:  ret
+  return vget_lane_f32(a, 1);
+}
+
+float32_t test_vget_lane_f16(float16x4_t a) {
+  // CHECK-LABEL: test_vget_lane_f16:
+  // CHECK-NEXT:  umov.h w8, v0[1]
+  // CHECK-NEXT:  fmov s0, w8
+  // CHECK-NEXT:  fcvt s0, h0
+  // CHECK-NEXT:  ret
+  return vget_lane_f16(a, 1);
+}
+
+uint8_t test_vgetq_lane_u8(uint8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u8(a, 15);
+}
+
+uint16_t test_vgetq_lane_u16(uint16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u16(a, 7);
+}
+
+uint32_t test_vgetq_lane_u32(uint32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u32:
+  // CHECK-NEXT:  mov.s  w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u32(a, 3);
+}
+
+int8_t test_vgetq_lane_s8(int8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s8(a, 15);
+}
+
+int16_t test_vgetq_lane_s16(int16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s16(a, 7);
+}
+
+int32_t test_vgetq_lane_s32(int32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s32:
+  // CHECK-NEXT:  mov.s  w0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s32(a, 3);
+}
+
+poly8_t test_vgetq_lane_p8(poly8x16_t a) {
+  // CHECK-LABEL: test_vgetq_lane_p8:
+  // CHECK-NEXT:  umov.b w0, v0[15]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_p8(a, 15);
+}
+
+poly16_t test_vgetq_lane_p16(poly16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_p16:
+  // CHECK-NEXT:  umov.h w0, v0[7]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_p16(a, 7);
+}
+
+float32_t test_vgetq_lane_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vgetq_lane_f32:
+  // CHECK-NEXT:  mov s0, v0[3]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_f32(a, 3);
+}
+
+float32_t test_vgetq_lane_f16(float16x8_t a) {
+  // CHECK-LABEL: test_vgetq_lane_f16:
+  // CHECK-NEXT:  umov.h w8, v0[3]
+  // CHECK-NEXT:  fmov s0, w8
+  // CHECK-NEXT:  fcvt s0, h0
+  // CHECK-NEXT:  ret
+  return vgetq_lane_f16(a, 3);
+}
+
+int64_t test_vget_lane_s64(int64x1_t a) {
+  // CHECK-LABEL: test_vget_lane_s64:
+  // CHECK-NEXT:  fmov x0, d0
+  // CHECK-NEXT:  ret
+  return vget_lane_s64(a, 0);
+}
+
+uint64_t test_vget_lane_u64(uint64x1_t a) {
+  // CHECK-LABEL: test_vget_lane_u64:
+  // CHECK-NEXT:  fmov x0, d0
+  // CHECK-NEXT:  ret
+  return vget_lane_u64(a, 0);
+}
+
+int64_t test_vgetq_lane_s64(int64x2_t a) {
+  // CHECK-LABEL: test_vgetq_lane_s64:
+  // CHECK-NEXT:  mov.d  x0, v0[1]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_s64(a, 1);
+}
+
+uint64_t test_vgetq_lane_u64(uint64x2_t a) {
+  // CHECK-LABEL: test_vgetq_lane_u64:
+  // CHECK-NEXT:  mov.d  x0, v0[1]
+  // CHECK-NEXT:  ret
+  return vgetq_lane_u64(a, 1);
+}
+
+
+uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_u8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u8(a, b, 7);
+}
+
+uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_u16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u16(a, b, 3);
+}
+
+uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_u32:
+  // CHECK-NEXT:  ins.s v0[1], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_u32(a, b, 1);
+}
+
+int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_s8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s8(a, b, 7);
+}
+
+int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_s16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s16(a, b, 3);
+}
+
+int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_s32:
+  // CHECK-NEXT:  ins.s v0[1], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_s32(a, b, 1);
+}
+
+poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
+  // CHECK-LABEL: test_vset_lane_p8:
+  // CHECK-NEXT:  ins.b v0[7], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_p8(a, b, 7);
+}
+
+poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_p16:
+  // CHECK-NEXT:  ins.h v0[3], w0
+  // CHECK-NEXT:  ret
+  return vset_lane_p16(a, b, 3);
+}
+
+float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
+  // CHECK-LABEL: test_vset_lane_f32:
+  // CHECK-NEXT:  ins.s v1[1], v0[0]
+  // CHECK-NEXT:  mov.16b  v0, v1
+  // CHECK-NEXT:  ret
+  return vset_lane_f32(a, b, 1);
+}
+
+float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
+  // CHECK-LABEL: test_vset_lane_f16:
+  // CHECK-NEXT:  ld1.h { v0 }[3], [x0]
+  // CHECK-NEXT:  ret
+  return vset_lane_f16(*a, b, 3);
+}
+
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u8(a, b, 15);
+}
+
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u16(a, b, 7);
+}
+
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u32:
+  // CHECK-NEXT:  ins.s v0[3], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u32(a, b, 3);
+}
+
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s8(a, b, 15);
+}
+
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s16(a, b, 7);
+}
+
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s32:
+  // CHECK-NEXT:  ins.s v0[3], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s32(a, b, 3);
+}
+
+poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
+  // CHECK-LABEL: test_vsetq_lane_p8:
+  // CHECK-NEXT:  ins.b v0[15], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_p8(a, b, 15);
+}
+
+poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_p16:
+  // CHECK-NEXT:  ins.h v0[7], w0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_p16(a, b, 7);
+}
+
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
+  // CHECK-LABEL: test_vsetq_lane_f32:
+  // CHECK-NEXT:  ins.s v1[3], v0[0]
+  // CHECK-NEXT:  mov.16b  v0, v1
+  // CHECK-NEXT:  ret
+  return vsetq_lane_f32(a, b, 3);
+}
+
+float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
+  // CHECK-LABEL: test_vsetq_lane_f16:
+  // CHECK-NEXT:  ld1.h { v0 }[7], [x0]
+  // CHECK-NEXT:  ret
+  return vsetq_lane_f16(*a, b, 7);
+}
+
+int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
+  // CHECK-LABEL: test_vset_lane_s64:
+  // CHECK-NEXT:  fmov d0, x0
+  // CHECK-NEXT:  ret
+  return vset_lane_s64(a, b, 0);
+}
+
+uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
+  // CHECK-LABEL: test_vset_lane_u64:
+  // CHECK-NEXT:  fmov d0, x0
+  // CHECK-NEXT:  ret
+  return vset_lane_u64(a, b, 0);
+}
+
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
+  // CHECK-LABEL: test_vsetq_lane_s64:
+  // CHECK-NEXT:  ins.d v0[1], x0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_s64(a, b, 1);
+}
+
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
+  // CHECK-LABEL: test_vsetq_lane_u64:
+  // CHECK-NEXT:  ins.d v0[1], x0
+  // CHECK-NEXT:  ret
+  return vsetq_lane_u64(a, b, 1);
+}
diff --git a/test/CodeGen/aarch64-poly64.c b/test/CodeGen/aarch64-poly64.c
index a14162c05353..6ea3a2c0d8bd 100644
--- a/test/CodeGen/aarch64-poly64.c
+++ b/test/CodeGen/aarch64-poly64.c
@@ -1,3 +1,6 @@
+// FIXME: This is a front-end test that depends on LLVM optimizations (-O3). 
+// It should be split into separate files for front/middle/back-end testing.
+
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
 // RUN:  -ffp-contract=fast -S -O3 -o - %s | FileCheck %s --check-prefix=CHECK \
@@ -77,7 +80,7 @@ poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
   // CHECK-LABEL: test_vcopyq_lane_p64
   return vcopyq_lane_p64(a, 1, b, 0);
-  // CHECK: ins  {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
+  // CHECK: zip1 v0.2d, v0.2d, v1.2d
 }
 
 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
diff --git a/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
new file mode 100644
index 000000000000..078b454b3973
--- /dev/null
+++ b/test/CodeGen/aarch64-v8.1a-neon-intrinsics.c
@@ -0,0 +1,128 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+
+ #include <arm_neon.h>
+
+// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s16
+int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  return vqrdmlah_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s32
+int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  return vqrdmlah_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s16
+int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  return vqrdmlahq_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s32
+int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  return vqrdmlahq_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_s16
+int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+  return vqrdmlahh_s16(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_s32
+int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return vqrdmlahs_s32(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_lane_s16
+int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+  return vqrdmlahh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_lane_s32
+int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  return vqrdmlahs_lane_s32(a, b, c, 1);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahh_laneq_s16
+int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+  return vqrdmlahh_laneq_s16(a, b, c, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlahs_laneq_s32
+int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  return vqrdmlahs_laneq_s32(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s16
+int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7]
+  return vqrdmlsh_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s32
+int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3]
+  return vqrdmlsh_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s16
+int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7]
+  return vqrdmlshq_laneq_s16(a, b, v, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s32
+int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) {
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3]
+  return vqrdmlshq_laneq_s32(a, b, v, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_s16
+int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}
+  return vqrdmlshh_s16(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_s32
+int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+  return vqrdmlshs_s32(a, b, c);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_lane_s16
+int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3]
+  return vqrdmlshh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_lane_s32
+int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  return vqrdmlshs_lane_s32(a, b, c, 1);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshh_laneq_s16
+int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) {
+// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7]
+  return vqrdmlshh_laneq_s16(a, b, c, 7);
+}
+
+// CHECK-AARCH64-LABEL: test_vqrdmlshs_laneq_s32
+int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) {
+// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  return vqrdmlshs_laneq_s32(a, b, c, 3);
+}
+
diff --git a/test/CodeGen/aarch64-varargs.c b/test/CodeGen/aarch64-varargs.c
index 434337173cf8..08f39600c8c3 100644
--- a/test/CodeGen/aarch64-varargs.c
+++ b/test/CodeGen/aarch64-varargs.c
@@ -23,21 +23,19 @@ int simple_int(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
-// CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
-// CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 4
-// CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK-BE: [[STACK_VAL:%[0-9]+]] = ptrtoint i8* [[STACK]] to i64
-// CHECK-BE: [[STACK_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[STACK_VAL]], 4
-// CHECK-BE: [[STACK:%[0-9]+]] = inttoptr i64 [[STACK_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4
+// CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32*
+// CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
 // CHECK: br label %[[VAARG_END]]
 
 // CHECK: [[VAARG_END]]
@@ -63,7 +61,7 @@ __int128 aligned_int(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
@@ -73,7 +71,7 @@ __int128 aligned_int(void) {
 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[ALIGNED_STACK_PTR]], i32 16
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
 // CHECK: br label %[[VAARG_END]]
@@ -104,14 +102,14 @@ struct bigstruct simple_indirect(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK-NOT: and i64
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct**
 // CHECK: br label %[[VAARG_END]]
@@ -141,13 +139,13 @@ struct aligned_bigstruct simple_aligned_indirect(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct**
 // CHECK: br label %[[VAARG_END]]
@@ -172,16 +170,15 @@ double simple_double(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
-// CHECK-BE: [[REG_ADDR_VAL:%[0-9]+]] = ptrtoint i8* [[REG_ADDR]] to i64
-// CHECK-BE: [[REG_ADDR_VAL_ALIGNED:%[a-z_0-9]*]] = add i64 [[REG_ADDR_VAL]], 8
-// CHECK-BE: [[REG_ADDR:%[0-9]+]] = inttoptr i64 [[REG_ADDR_VAL_ALIGNED]] to i8*
-// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 8
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to double*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to double*
 // CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double*
 // CHECK: br label %[[VAARG_END]]
@@ -211,17 +208,17 @@ struct hfa simple_hfa(void) {
 
 // CHECK: [[VAARG_IN_REG]]
 // CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2)
-// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 0
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 12
+// CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]]
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 0
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 12
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i32 0, i32 0
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA:[a-z_.0-9]+]], i64 0, i64 0
 // CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
-// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 16
-// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr i8, i8* [[FIRST_REG]], i32 28
+// CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 16
+// CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 28
 // CHECK: [[EL_TYPED:%[a-z_0-9]+]] = bitcast i8* [[EL_ADDR]] to float*
-// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA]], i32 0, i32 1
+// CHECK: [[EL_TMPADDR:%[a-z_0-9]+]] = getelementptr inbounds [2 x float], [2 x float]* %[[TMP_HFA]], i64 0, i64 1
 // CHECK: [[EL:%[a-z_0-9]+]] = load float, float* [[EL_TYPED]]
 // CHECK: store float [[EL]], float* [[EL_TMPADDR]]
 // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast [2 x float]* %[[TMP_HFA]] to %struct.hfa*
@@ -229,7 +226,7 @@ struct hfa simple_hfa(void) {
 
 // CHECK: [[VAARG_ON_STACK]]
 // CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
-// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr i8, i8* [[STACK]], i32 8
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
 // CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
 // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa*
 // CHECK: br label %[[VAARG_END]]
diff --git a/test/CodeGen/adc-builtins.c b/test/CodeGen/adc-builtins.c
index 5577d22c60b3..5e5890595fb0 100644
--- a/test/CodeGen/adc-builtins.c
+++ b/test/CodeGen/adc-builtins.c
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -ffreestanding -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-feature +adx -emit-llvm -o - %s | FileCheck %s
+
+#define __MM_MALLOC_H
 
 #include <x86intrin.h>
 
diff --git a/test/CodeGen/alias.c b/test/CodeGen/alias.c
index b773cc8de938..a14bc0ecd553 100644
--- a/test/CodeGen/alias.c
+++ b/test/CodeGen/alias.c
@@ -1,27 +1,51 @@
+// REQUIRES: arm-registered-target
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - %s | FileCheck -check-prefix=CHECKBASIC %s
 // RUN: %clang_cc1 -triple armv7a-eabi -mfloat-abi hard -emit-llvm -o - %s | FileCheck -check-prefix=CHECKCC %s
+// RUN: %clang_cc1 -triple armv7a-eabi -mfloat-abi hard -S -o - %s | FileCheck -check-prefix=CHECKASM %s
 
 int g0;
-// CHECKBASIC: @g0 = common global i32 0
+// CHECKBASIC-DAG: @g0 = common global i32 0
+// CHECKASM-DAG: .comm g0,4,4
 __thread int TL_WITH_ALIAS;
 // CHECKBASIC-DAG: @TL_WITH_ALIAS = thread_local global i32 0, align 4
+// CHECKASM-DAG: .globl TL_WITH_ALIAS
+// CHECKASM-DAG: .size TL_WITH_ALIAS, 4
 static int bar1 = 42;
-// CHECKBASIC: @bar1 = internal global i32 42
+// CHECKBASIC-DAG: @bar1 = internal global i32 42
+// CHECKASM-DAG: bar1:
+// CHECKASM-DAG: .size bar1, 4
+
+// PR24379: alias variable expected to have same size as aliasee even when types differ
+const int wacom_usb_ids[] = {1, 1, 2, 3, 5, 8, 13, 0};
+// CHECKBASIC-DAG: @wacom_usb_ids = constant [8 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 0], align 4
+// CHECKASM-DAG: .globl wacom_usb_ids
+// CHECKASM-DAG: .size wacom_usb_ids, 32
+extern const int __mod_usb_device_table __attribute__ ((alias("wacom_usb_ids")));
+// CHECKBASIC-DAG: @__mod_usb_device_table = alias i32, getelementptr inbounds ([8 x i32], [8 x i32]* @wacom_usb_ids, i32 0, i32 0)
+// CHECKASM-DAG: .globl __mod_usb_device_table
+// CHECKASM-DAG: __mod_usb_device_table = wacom_usb_ids
+// CHECKASM-DAG-NOT: .size __mod_usb_device_table
 
 extern int g1;
 extern int g1 __attribute((alias("g0")));
-// CHECKBASIC-DAG: @g1 = alias i32* @g0
+// CHECKBASIC-DAG: @g1 = alias i32, i32* @g0
+// CHECKASM-DAG: .globl g1
+// CHECKASM-DAG: g1 = g0
+// CHECKASM-DAG-NOT: .size g1
 
 extern __thread int __libc_errno __attribute__ ((alias ("TL_WITH_ALIAS")));
-// CHECKBASIC-DAG: @__libc_errno = thread_local alias i32* @TL_WITH_ALIAS
+// CHECKBASIC-DAG: @__libc_errno = thread_local alias i32, i32* @TL_WITH_ALIAS
+// CHECKASM-DAG: .globl __libc_errno
+// CHECKASM-DAG: __libc_errno = TL_WITH_ALIAS
+// CHECKASM-DAG-NOT: .size __libc_errno
 
 void f0(void) { }
 extern void f1(void);
 extern void f1(void) __attribute((alias("f0")));
-// CHECKBASIC-DAG: @f1 = alias void ()* @f0
-// CHECKBASIC-DAG: @test8_foo = weak alias bitcast (void ()* @test8_bar to void (...)*)
-// CHECKBASIC-DAG: @test8_zed = alias bitcast (void ()* @test8_bar to void (...)*)
-// CHECKBASIC-DAG: @test9_zed = alias void ()* @test9_bar
+// CHECKBASIC-DAG: @f1 = alias void (), void ()* @f0
+// CHECKBASIC-DAG: @test8_foo = weak alias void (...), bitcast (void ()* @test8_bar to void (...)*)
+// CHECKBASIC-DAG: @test8_zed = alias void (...), bitcast (void ()* @test8_bar to void (...)*)
+// CHECKBASIC-DAG: @test9_zed = alias void (), void ()* @test9_bar
 // CHECKBASIC: define void @f0() [[NUW:#[0-9]+]] {
 
 // Make sure that aliases cause referenced values to be emitted.
@@ -41,7 +65,7 @@ static int inner(int a) { return 0; }
 static int inner_weak(int a) { return 0; }
 extern __typeof(inner) inner_a __attribute__((alias("inner")));
 static __typeof(inner_weak) inner_weak_a __attribute__((weakref, alias("inner_weak")));
-// CHECKCC: @inner_a = alias i32 (i32)* @inner
+// CHECKCC: @inner_a = alias i32 (i32), i32 (i32)* @inner
 // CHECKCC: define internal arm_aapcs_vfpcc i32 @inner(i32 %a) [[NUW:#[0-9]+]] {
 
 int outer(int a) { return inner(a); }
diff --git a/test/CodeGen/align-global-large.c b/test/CodeGen/align-global-large.c
index fcbe758a82e4..14f5d8d9e144 100644
--- a/test/CodeGen/align-global-large.c
+++ b/test/CodeGen/align-global-large.c
@@ -1,5 +1,5 @@
 // PR13606 - Clang crashes with large alignment attribute
-// RUN: %clang -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang -cc1 -S -emit-llvm %s -o - -triple i686-pc-gnu | FileCheck %s
 
 // CHECK: x
 // CHECK: align
diff --git a/test/CodeGen/align-wasm.c b/test/CodeGen/align-wasm.c
new file mode 100644
index 000000000000..ff0c213acdde
--- /dev/null
+++ b/test/CodeGen/align-wasm.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -emit-llvm %s -o - \
+// RUN:   | FileCheck %s
+
+void test1_f(void *);
+
+void test1_g(void) {
+  float x[4];
+  test1_f(x);
+}
+// CHECK: @test1_g
+// CHECK: alloca [4 x float], align 16
diff --git a/test/CodeGen/alignment.c b/test/CodeGen/alignment.c
index 0a598010c853..c9f58136e38a 100644
--- a/test/CodeGen/alignment.c
+++ b/test/CodeGen/alignment.c
@@ -59,3 +59,11 @@ void test4(float4align64 *p) {
 // CHECK: @test4(
 // CHECK: store <4 x float> {{.*}}, <4 x float>* {{.*}}, align 64
 
+// PR24944 - Typedef alignment not honored on no-op cast.
+typedef float __attribute__((vector_size(16), aligned(16))) float4align16;
+typedef float __attribute__((vector_size(16), aligned(2))) float4align2;
+void test6(float4align64 *p) {
+    float4align64 vec = *(float4align2*) p;
+}
+// CHECK-LABEL: @test6
+// CHECK:       load <4 x float>, <4 x float>* {{.*}}, align 2
diff --git a/test/CodeGen/arm-abi-vector.c b/test/CodeGen/arm-abi-vector.c
index 88bf593ed296..8d113d6533e1 100644
--- a/test/CodeGen/arm-abi-vector.c
+++ b/test/CodeGen/arm-abi-vector.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi aapcs -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple armv7-apple-darwin -target-abi apcs-gnu -emit-llvm -o - %s | FileCheck -check-prefix=APCS-GNU %s
+// RUN: %clang_cc1 -triple arm-linux-androideabi -emit-llvm -o - %s | FileCheck -check-prefix=ANDROID %s
 
 #include <stdarg.h>
 
@@ -14,18 +15,28 @@ typedef __attribute__(( ext_vector_type(5) ))  short __short5;
 // Passing legal vector types as varargs.
 double varargs_vec_2i(int fixed, ...) {
 // CHECK: varargs_vec_2i
-// CHECK: alloca <2 x i32>, align 8
-// CHECK: [[ALIGN:%.*]] = and i32 [[VAR:%.*]], -8
+// CHECK: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
-// CHECK: bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// CHECK: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 8
+// CHECK: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
 // APCS-GNU: varargs_vec_2i
-// APCS-GNU: alloca <2 x i32>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <2 x i32>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <2 x i32>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <2 x i32>, <2 x i32>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <2 x i32>*
+// APCS-GNU: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 4
+// APCS-GNU: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
+// ANDROID: varargs_vec_2i
+// ANDROID: [[VAR:%.*]] = alloca <2 x i32>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <2 x i32>*
+// ANDROID: [[VEC:%.*]] = load <2 x i32>, <2 x i32>* [[AP_CAST]], align 8
+// ANDROID: store <2 x i32> [[VEC]], <2 x i32>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -40,18 +51,24 @@ double test_2i(__int2 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_2i
 // APCS-GNU: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_2i
+// ANDROID: call double (i32, ...) @varargs_vec_2i(i32 3, <2 x i32> {{%.*}})
   return varargs_vec_2i(3, *in);
 }
 
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // CHECK: bitcast i8* [[AP]] to <3 x i8>*
 // APCS-GNU: varargs_vec_3c
 // APCS-GNU: alloca <3 x i8>, align 4
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
 // APCS-GNU: bitcast i8* [[AP]] to <3 x i8>*
+// ANDROID: varargs_vec_3c
+// ANDROID: alloca <3 x i8>, align 4
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: bitcast i8* [[AP]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -66,23 +83,35 @@ double test_3c(__char3 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
 // APCS-GNU: test_3c
 // APCS-GNU: call double (i32, ...) @varargs_vec_3c(i32 3, i32 {{%.*}})
+// ANDROID: test_3c
+// ANDROID: call double (i32, ...) @varargs_vec_3c(i32 3, <3 x i8> {{%.*}})
   return varargs_vec_3c(3, *in);
 }
 
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
-// CHECK: alloca <5 x i8>, align 8
+// CHECK: [[VAR:%.*]] = alloca <5 x i8>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
-// CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// CHECK: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 8
+// CHECK: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
 // APCS-GNU: varargs_vec_5c
-// APCS-GNU: alloca <5 x i8>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <5 x i8>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i8>, <5 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <5 x i8>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i8>*
+// APCS-GNU: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 4
+// APCS-GNU: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
+// ANDROID: varargs_vec_5c
+// ANDROID: [[VAR:%.*]] = alloca <5 x i8>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i8>*
+// ANDROID: [[VEC:%.*]] = load <5 x i8>, <5 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i8> [[VEC]], <5 x i8>* [[VAR]], align 8
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -97,26 +126,35 @@ double test_5c(__char5 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
 // APCS-GNU: test_5c
 // APCS-GNU: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
+// ANDROID: test_5c
+// ANDROID: call double (i32, ...) @varargs_vec_5c(i32 5, <2 x i32> {{%.*}})
   return varargs_vec_5c(5, *in);
 }
 
 double varargs_vec_9c(int fixed, ...) {
 // CHECK: varargs_vec_9c
-// CHECK: alloca <9 x i8>, align 16
-// CHECK: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
+// CHECK: [[VAR:%.*]] = alloca <9 x i8>, align 16
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
-// CHECK: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
-// CHECK: call void @llvm.memcpy
-// CHECK: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
+// CHECK: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
+// CHECK: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
 // APCS-GNU: varargs_vec_9c
-// APCS-GNU: alloca <9 x i8>, align 16
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <9 x i8>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
-// APCS-GNU: bitcast <9 x i8>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <9 x i8>, <9 x i8>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <9 x i8>*
+// APCS-GNU: [[VEC:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 4
+// APCS-GNU: store <9 x i8> [[VEC]], <9 x i8>* [[VAR]], align 16
+// ANDROID: varargs_vec_9c
+// ANDROID: [[VAR:%.*]] = alloca <9 x i8>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <9 x i8>*
+// ANDROID: [[T0:%.*]] = load <9 x i8>, <9 x i8>* [[AP_CAST]], align 8
+// ANDROID: store <9 x i8> [[T0]], <9 x i8>* [[VAR]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -131,20 +169,24 @@ double test_9c(__char9 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
 // APCS-GNU: test_9c
 // APCS-GNU: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
+// ANDROID: test_9c
+// ANDROID: call double (i32, ...) @varargs_vec_9c(i32 9, <4 x i32> {{%.*}})
   return varargs_vec_9c(9, *in);
 }
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
 // APCS-GNU: varargs_vec_19c
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP:%.*]], i32 4
-// APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to i8**
-// APCS-GNU: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// APCS-GNU: bitcast i8* [[VAR2]] to <19 x i8>*
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// APCS-GNU: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// APCS-GNU: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
+// ANDROID: varargs_vec_19c
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP:%.*]], i32 4
+// ANDROID: [[VAR:%.*]] = bitcast i8* [[AP]] to <19 x i8>**
+// ANDROID: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -159,6 +201,8 @@ double test_19c(__char19 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
 // APCS-GNU: test_19c
 // APCS-GNU: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
+// ANDROID: test_19c
+// ANDROID: call double (i32, ...) @varargs_vec_19c(i32 19, <19 x i8>* {{%.*}})
   return varargs_vec_19c(19, *in);
 }
 
@@ -167,15 +211,20 @@ double varargs_vec_3s(int fixed, ...) {
 // CHECK: alloca <3 x i16>, align 8
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
 // APCS-GNU: varargs_vec_3s
-// APCS-GNU: alloca <3 x i16>, align 8
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <3 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 8
-// APCS-GNU: bitcast <3 x i16>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <3 x i16>, <3 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <3 x i16>, align 8
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 8
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <3 x i16>*
+// APCS-GNU: [[VEC:%.*]] = load <3 x i16>, <3 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_3s
+// ANDROID: alloca <3 x i16>, align 8
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 8
+// ANDROID: bitcast i8* [[AP_ALIGN]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -190,26 +239,34 @@ double test_3s(__short3 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
 // APCS-GNU: test_3s
 // APCS-GNU: call double (i32, ...) @varargs_vec_3s(i32 3, <2 x i32> {{%.*}})
+// ANDROID: test_3s
+// ANDROID: call double (i32, ...) @varargs_vec_3s(i32 3, <3 x i16> {{%.*}})
   return varargs_vec_3s(3, *in);
 }
 
 double varargs_vec_5s(int fixed, ...) {
 // CHECK: varargs_vec_5s
-// CHECK: alloca <5 x i16>, align 16
-// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
+// CHECK: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
-// CHECK: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
-// CHECK: call void @llvm.memcpy
-// CHECK: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
+// CHECK: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
+// CHECK: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
 // APCS-GNU: varargs_vec_5s
-// APCS-GNU: alloca <5 x i16>, align 16
-// APCS-GNU: [[VAR_ALIGN:%.*]] = alloca <5 x i16>
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
-// APCS-GNU: bitcast <5 x i16>* [[VAR_ALIGN]] to i8*
-// APCS-GNU: call void @llvm.memcpy
-// APCS-GNU: load <5 x i16>, <5 x i16>* [[VAR_ALIGN]]
+// APCS-GNU: [[VAR:%.*]] = alloca <5 x i16>, align 16
+// APCS-GNU: [[AP:%.*]] = load i8*,
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP]], i32 16
+// APCS-GNU: [[AP_CAST:%.*]] = bitcast i8* [[AP]] to <5 x i16>*
+// APCS-GNU: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 4
+// ANDROID: varargs_vec_5s
+// ANDROID: [[VAR_ALIGN:%.*]] = alloca <5 x i16>, align 16
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: [[AP_CAST:%.*]] = bitcast i8* [[AP_ALIGN]] to <5 x i16>*
+// ANDROID: [[VEC:%.*]] = load <5 x i16>, <5 x i16>* [[AP_CAST]], align 8
+// ANDROID: store <5 x i16> [[VEC]], <5 x i16>* [[VAR_ALIGN]], align 16
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -224,6 +281,8 @@ double test_5s(__short5 *in) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
 // APCS-GNU: test_5s
 // APCS-GNU: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
+// ANDROID: test_5s
+// ANDROID: call double (i32, ...) @varargs_vec_5s(i32 5, <4 x i32> {{%.*}})
   return varargs_vec_5s(5, *in);
 }
 
@@ -238,13 +297,18 @@ double varargs_struct(int fixed, ...) {
 // CHECK: varargs_struct
 // CHECK: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
 // APCS-GNU: varargs_struct
 // APCS-GNU: [[VAR_ALIGN:%.*]] = alloca %struct.StructWithVec
-// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr i8, i8* {{%.*}}, i32 16
+// APCS-GNU: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* {{%.*}}, i32 16
 // APCS-GNU: bitcast %struct.StructWithVec* [[VAR_ALIGN]] to i8*
 // APCS-GNU: call void @llvm.memcpy
+// ANDROID: varargs_struct
+// ANDROID: [[ALIGN:%.*]] = and i32 {{%.*}}, -8
+// ANDROID: [[AP_ALIGN:%.*]] = inttoptr i32 [[ALIGN]] to i8*
+// ANDROID: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i32 16
+// ANDROID: bitcast i8* [[AP_ALIGN]] to %struct.StructWithVec*
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -259,5 +323,7 @@ double test_struct(StructWithVec* d) {
 // CHECK: call arm_aapcscc double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
 // APCS-GNU: test_struct
 // APCS-GNU: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
+// ANDROID: test_struct
+// ANDROID: call double (i32, ...) @varargs_struct(i32 3, [2 x i64] {{%.*}})
   return varargs_struct(3, *d);
 }
diff --git a/test/CodeGen/arm-arguments.c b/test/CodeGen/arm-arguments.c
index b671626939c9..ec3e1734b0c3 100644
--- a/test/CodeGen/arm-arguments.c
+++ b/test/CodeGen/arm-arguments.c
@@ -159,13 +159,13 @@ struct s30 f30() {}
 struct s31 { char x; };
 void f31(struct s31 s) { }
 // AAPCS: @f31([1 x i32] %s.coerce)
-// AAPCS: %s = alloca %struct.s31, align 4
-// AAPCS: alloca [1 x i32]
-// AAPCS: store [1 x i32] %s.coerce, [1 x i32]*
+// AAPCS: %s = alloca %struct.s31, align 1
+// AAPCS: [[TEMP:%.*]] = alloca [1 x i32], align 4
+// AAPCS: store [1 x i32] %s.coerce, [1 x i32]* [[TEMP]], align 4
 // APCS-GNU: @f31([1 x i32] %s.coerce)
-// APCS-GNU: %s = alloca %struct.s31, align 4
-// APCS-GNU: alloca [1 x i32]
-// APCS-GNU: store [1 x i32] %s.coerce, [1 x i32]*
+// APCS-GNU: %s = alloca %struct.s31, align 1
+// APCS-GNU: [[TEMP:%.*]] = alloca [1 x i32], align 4
+// APCS-GNU: store [1 x i32] %s.coerce, [1 x i32]* [[TEMP]], align 4
 
 // PR13562
 struct s32 { double x; };
diff --git a/test/CodeGen/arm-eabi.c b/test/CodeGen/arm-eabi.c
new file mode 100644
index 000000000000..0dc04f51d21f
--- /dev/null
+++ b/test/CodeGen/arm-eabi.c
@@ -0,0 +1,20 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target arm-none-eabi -S -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-eabi -S -meabi gnu -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-eabihf -S -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-eabihf -S -meabi gnu -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabi -S -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabi -S -meabi 5 -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+// RUN: %clang -target arm-none-gnueabihf -S -o - %s | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: %clang -target arm-none-gnueabihf -S -meabi 5 -o - %s | FileCheck -check-prefix=CHECK-EABI %s
+
+struct my_s {
+  unsigned long a[18];
+};
+
+// CHECK-LABEL: foo
+// CHECK-EABI: bl __aeabi_memcpy4
+// CHECK-GNUEABI: bl memcpy
+void foo(unsigned long *t) {
+  *(struct my_s *)t = *((struct my_s *)(1UL));
+}
diff --git a/test/CodeGen/arm-fp16-arguments.c b/test/CodeGen/arm-fp16-arguments.c
new file mode 100644
index 000000000000..15a9ceb94cfb
--- /dev/null
+++ b/test/CodeGen/arm-fp16-arguments.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi soft -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=SOFT
+// RUN: %clang_cc1 -triple armv7a--none-eabi -target-abi aapcs -mfloat-abi hard -fallow-half-arguments-and-returns -emit-llvm -o - -O1 %s | FileCheck %s --check-prefix=CHECK --check-prefix=HARD
+
+__fp16 g;
+
+void t1(__fp16 a) { g = a; }
+// SOFT: define void @t1(i32 [[PARAM:%.*]])
+// SOFT: [[TRUNC:%.*]] = trunc i32 [[PARAM]] to i16
+// HARD: define arm_aapcs_vfpcc void @t1(float [[PARAM:%.*]])
+// HARD: [[BITCAST:%.*]] = bitcast float [[PARAM]] to i32
+// HARD: [[TRUNC:%.*]] = trunc i32 [[BITCAST]] to i16
+// CHECK: store i16 [[TRUNC]], i16* bitcast (half* @g to i16*)
+
+__fp16 t2() { return g; }
+// SOFT: define i32 @t2()
+// HARD: define arm_aapcs_vfpcc float @t2()
+// CHECK: [[LOAD:%.*]] = load i16, i16* bitcast (half* @g to i16*)
+// CHECK: [[ZEXT:%.*]] = zext i16 [[LOAD]] to i32
+// SOFT: ret i32 [[ZEXT]]
+// HARD: [[BITCAST:%.*]] = bitcast i32 [[ZEXT]] to float
+// HARD: ret float [[BITCAST]]
diff --git a/test/CodeGen/arm-neon-misc.c b/test/CodeGen/arm-neon-misc.c
index 56ce316c749b..e7ba580ed2a3 100644
--- a/test/CodeGen/arm-neon-misc.c
+++ b/test/CodeGen/arm-neon-misc.c
@@ -14,20 +14,20 @@
 void t1(uint64_t *src, uint8_t *dst) {
 // CHECK: @t1
   uint64x2_t q = vld1q_u64(src);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
   vst1q_lane_u64(dst, q, 1);
 // CHECK: bitcast <16 x i8> %{{.*}} to <2 x i64>
 // CHECK: shufflevector <2 x i64>
-// CHECK: call void @llvm.arm.neon.vst1.v1i64
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v1i64
 }
 
 void t2(uint64_t *src1, uint8_t *src2, uint64x2_t *dst) {
 // CHECK: @t2
     uint64x2_t q = vld1q_u64(src1);
-// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64
+// CHECK: call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8
     q = vld1q_lane_u64(src2, q, 0);
 // CHECK: shufflevector <2 x i64>
-// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64
+// CHECK: call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8
 // CHECK: shufflevector <1 x i64>
     *dst = q;
 // CHECK: store <2 x i64>
diff --git a/test/CodeGen/arm-no-movt.c b/test/CodeGen/arm-no-movt.c
new file mode 100644
index 000000000000..0773941fb384
--- /dev/null
+++ b/test/CodeGen/arm-no-movt.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5  -target-feature +no-movt -emit-llvm -o - %s | FileCheck -check-prefix=NO-MOVT %s
+// RUN: %clang_cc1 -triple thumbv7-apple-ios5 -emit-llvm -o - %s | FileCheck -check-prefix=MOVT %s
+
+// NO-MOVT: attributes #0 = { {{.*}} "target-features"="+no-movt"
+// MOVT-NOT: attributes #0 = { {{.*}} "target-features"="+no-movt"
+
+int foo1(int a) { return a; }
diff --git a/test/CodeGen/arm-target-features.c b/test/CodeGen/arm-target-features.c
index 36804b4e3bb1..35c0e04ff134 100644
--- a/test/CodeGen/arm-target-features.c
+++ b/test/CodeGen/arm-target-features.c
@@ -1,12 +1,15 @@
 // REQUIRES: arm-registered-target
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a9 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// CHECK-VFP3: "target-features"="+neon,+vfp3"
+// CHECK-VFP3: "target-features"="+dsp,+neon,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-a9 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-FP16
+// CHECK-VFP3-FP16: "target-features"="+dsp,+fp16,+neon,+vfp3"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4
-// CHECK-VFP4: "target-features"="+neon,+vfp4"
+// CHECK-VFP4: "target-features"="+dsp,+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
@@ -15,24 +18,42 @@
 // RUN: %clang_cc1 -triple armv7-linux-gnueabihf -target-cpu cortex-a17 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7s-linux-gnueabi -target-cpu swift -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu krait -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
-// CHECK-VFP4-DIV: "target-features"="+hwdiv,+hwdiv-arm,+neon,+vfp4"
+// CHECK-VFP4-DIV: "target-features"="+dsp,+hwdiv,+hwdiv-arm,+neon,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7s-apple-ios7.0 -target-cpu cyclone -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a35 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple armv8-linux-gnueabi -target-cpu cortex-a53 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
 // RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a72 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-BASIC-V8
-// CHECK-BASIC-V8: "target-features"="+crc,+crypto,+fp-armv8,+hwdiv,+hwdiv-arm,+neon"
+// CHECK-BASIC-V8: "target-features"="+crc,+crypto,+dsp,+fp-armv8,+hwdiv,+hwdiv-arm,+neon"
 
 
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-DIV
-// CHECK-DIV: "target-features"="+hwdiv,+hwdiv-arm"
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-DIV
+// CHECK-VFP3-D16-DIV: "target-features"="+d16,+dsp,+hwdiv,+hwdiv-arm,+vfp3"
+
+
+// RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4f -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-THUMB-DIV
+// CHECK-VFP3-D16-THUMB-DIV: "target-features"="+d16,+dsp,+hwdiv,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-r7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3-D16-FP16-DIV
+// CHECK-VFP3-D16-FP16-DIV: "target-features"="+d16,+dsp,+fp16,+hwdiv,+hwdiv-arm,+vfp3"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-D16-SP-THUMB-DIV
+// CHECK-VFP4-D16-SP-THUMB-DIV: "target-features"="+d16,+dsp,+fp-only-sp,+hwdiv,+vfp4"
+
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP5-D16-THUMB-DIV
+// CHECK-VFP5-D16-THUMB-DIV: "target-features"="+d16,+dsp,+fp-armv8,+hwdiv"
+
 
 // RUN: %clang_cc1 -triple armv7-linux-gnueabi -target-cpu cortex-r4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV
-// CHECK-THUMB-DIV: "target-features"="+hwdiv"
+// CHECK-THUMB-DIV: "target-features"="+dsp,+hwdiv"
+
+// RUN: %clang_cc1 -triple thumbv7-linux-gnueabi -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-THUMB-DIV-M3
+// CHECK-THUMB-DIV-M3: "target-features"="+hwdiv"
 
 
 void foo() {}
diff --git a/test/CodeGen/arm-v8.1a-neon-intrinsics.c b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
new file mode 100644
index 000000000000..5fe299af5f1e
--- /dev/null
+++ b/test/CodeGen/arm-v8.1a-neon-intrinsics.c
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \
+// RUN:  -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \
+// RUN:  -target-feature +v8.1a -O3 -S -o - %s \
+// RUN:  | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64
+// REQUIRES: arm-registered-target,aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: test_vqrdmlah_s16
+int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  return vqrdmlah_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlah_s32
+int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  return vqrdmlah_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s16
+int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  return vqrdmlahq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_s32
+int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  return vqrdmlahq_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlah_lane_s16
+int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  return vqrdmlah_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlah_lane_s32
+int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vqrdmlah_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s16
+int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  return vqrdmlahq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlahq_lane_s32
+int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vqrdmlahq_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_s16
+int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
+  return vqrdmlsh_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_s32
+int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
+  return vqrdmlsh_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s16
+int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
+// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
+  return vqrdmlshq_s16(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_s32
+int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
+// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
+  return vqrdmlshq_s32(a, b, c);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s16
+int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+  return vqrdmlsh_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlsh_lane_s32
+int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+  return vqrdmlsh_lane_s32(a, b, c, 1);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s16
+int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
+// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+  return vqrdmlshq_lane_s16(a, b, c, 3);
+}
+
+// CHECK-LABEL: test_vqrdmlshq_lane_s32
+int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
+// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1]
+// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+  return vqrdmlshq_lane_s32(a, b, c, 1);
+}
+
diff --git a/test/CodeGen/arm-vector-align.c b/test/CodeGen/arm-vector-align.c
index 15dd13e7c113..87e839164086 100644
--- a/test/CodeGen/arm-vector-align.c
+++ b/test/CodeGen/arm-vector-align.c
@@ -14,9 +14,9 @@
 typedef float AlignedAddr __attribute__ ((aligned (16)));
 void t1(AlignedAddr *addr1, AlignedAddr *addr2) {
 // CHECK: @t1
-// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %{{.*}}, i32 16)
+// CHECK: call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* %{{.*}}, i32 16)
   float32x4_t a = vld1q_f32(addr1);
-// CHECK: call void @llvm.arm.neon.vst1.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
+// CHECK: call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* %{{.*}}, <4 x float> %{{.*}}, i32 16)
   vst1q_f32(addr2, a);
 }
 
diff --git a/test/CodeGen/arm64-abi-vector.c b/test/CodeGen/arm64-abi-vector.c
index ebf7f5112659..29aeadb66da4 100644
--- a/test/CodeGen/arm64-abi-vector.c
+++ b/test/CodeGen/arm64-abi-vector.c
@@ -16,7 +16,7 @@ typedef __attribute__(( ext_vector_type(3) ))  double __double3;
 double varargs_vec_3c(int fixed, ...) {
 // CHECK: varargs_vec_3c
 // CHECK: alloca <3 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   va_list ap;
   double sum = fixed;
@@ -36,7 +36,7 @@ double test_3c(__char3 *in) {
 double varargs_vec_4c(int fixed, ...) {
 // CHECK: varargs_vec_4c
 // CHECK: alloca <4 x i8>, align 4
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <4 x i8>*
   va_list ap;
   double sum = fixed;
@@ -56,7 +56,7 @@ double test_4c(__char4 *in) {
 double varargs_vec_5c(int fixed, ...) {
 // CHECK: varargs_vec_5c
 // CHECK: alloca <5 x i8>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   va_list ap;
   double sum = fixed;
@@ -78,7 +78,7 @@ double varargs_vec_9c(int fixed, ...) {
 // CHECK: alloca <9 x i8>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   va_list ap;
   double sum = fixed;
@@ -97,10 +97,9 @@ double test_9c(__char9 *in) {
 
 double varargs_vec_19c(int fixed, ...) {
 // CHECK: varargs_vec_19c
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -119,7 +118,7 @@ double test_19c(__char19 *in) {
 double varargs_vec_3s(int fixed, ...) {
 // CHECK: varargs_vec_3s
 // CHECK: alloca <3 x i16>, align 8
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   va_list ap;
   double sum = fixed;
@@ -141,7 +140,7 @@ double varargs_vec_5s(int fixed, ...) {
 // CHECK: alloca <5 x i16>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   va_list ap;
   double sum = fixed;
@@ -163,7 +162,7 @@ double varargs_vec_3i(int fixed, ...) {
 // CHECK: alloca <3 x i32>, align 16
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   va_list ap;
   double sum = fixed;
@@ -183,10 +182,9 @@ double test_3i(__int3 *in) {
 double varargs_vec_5i(int fixed, ...) {
 // CHECK: varargs_vec_5i
 // CHECK: alloca <5 x i32>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <5 x i32>**
+// CHECK: [[VAR2:%.*]] = load <5 x i32>*, <5 x i32>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -205,10 +203,9 @@ double test_5i(__int5 *in) {
 double varargs_vec_3d(int fixed, ...) {
 // CHECK: varargs_vec_3d
 // CHECK: alloca <3 x double>, align 16
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <3 x double>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <3 x double>**
+// CHECK: [[VAR2:%.*]] = load <3 x double>*, <3 x double>** [[VAR]]
   va_list ap;
   double sum = fixed;
   va_start(ap, fixed);
@@ -230,52 +227,49 @@ double varargs_vec(int fixed, ...) {
   double sum = fixed;
   va_start(ap, fixed);
   __char3 c3 = va_arg(ap, __char3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i8>*
   sum = sum + c3.x + c3.y;
   __char5 c5 = va_arg(ap, __char5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <5 x i8>*
   sum = sum + c5.x + c5.y;
   __char9 c9 = va_arg(ap, __char9);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <9 x i8>*
   sum = sum + c9.x + c9.y;
   __char19 c19 = va_arg(ap, __char19);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <19 x i8>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <19 x i8>**
+// CHECK: [[VAR2:%.*]] = load <19 x i8>*, <19 x i8>** [[VAR]]
   sum = sum + c19.x + c19.y;
   __short3 s3 = va_arg(ap, __short3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
 // CHECK: bitcast i8* [[AP_CUR]] to <3 x i16>*
   sum = sum + s3.x + s3.y;
   __short5 s5 = va_arg(ap, __short5);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <5 x i16>*
   sum = sum + s5.x + s5.y;
   __int3 i3 = va_arg(ap, __int3);
 // CHECK: [[ALIGN:%.*]] = and i64 {{%.*}}, -16
 // CHECK: [[AP_ALIGN:%.*]] = inttoptr i64 [[ALIGN]] to i8*
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_ALIGN]], i32 16
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_ALIGN]], i64 16
 // CHECK: bitcast i8* [[AP_ALIGN]] to <3 x i32>*
   sum = sum + i3.x + i3.y;
   __int5 i5 = va_arg(ap, __int5);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <5 x i32>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <5 x i32>**
+// CHECK: [[VAR2:%.*]] = load <5 x i32>*, <5 x i32>** [[VAR]]
   sum = sum + i5.x + i5.y;
   __double3 d3 = va_arg(ap, __double3);
-// CHECK: [[AP_NEXT:%.*]] = getelementptr i8, i8* [[AP_CUR:%.*]], i32 8
-// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to i8**
-// CHECK: [[VAR2:%.*]] = load i8*, i8** [[VAR]]
-// CHECK: bitcast i8* [[VAR2]] to <3 x double>*
+// CHECK: [[AP_NEXT:%.*]] = getelementptr inbounds i8, i8* [[AP_CUR:%.*]], i64 8
+// CHECK: [[VAR:%.*]] = bitcast i8* [[AP_CUR]] to <3 x double>**
+// CHECK: [[VAR2:%.*]] = load <3 x double>*, <3 x double>** [[VAR]]
   sum = sum + d3.x + d3.y;
   va_end(ap);
   return sum;
diff --git a/test/CodeGen/arm64-arguments.c b/test/CodeGen/arm64-arguments.c
index 8b551c4af470..93a1a198955e 100644
--- a/test/CodeGen/arm64-arguments.c
+++ b/test/CodeGen/arm64-arguments.c
@@ -117,7 +117,7 @@ struct s30 f30() {}
 struct s31 { char x; };
 void f31(struct s31 s) { }
 // CHECK: define void @f31(i64 %s.coerce)
-// CHECK: %s = alloca %struct.s31, align 8
+// CHECK: %s = alloca %struct.s31, align 1
 // CHECK: trunc i64 %s.coerce to i8
 // CHECK: store i8 %{{.*}},
 
@@ -273,10 +273,10 @@ typedef struct s38 s38_no_align;
 __attribute__ ((noinline))
 int f38(int i, s38_no_align s1, s38_no_align s2) {
 // CHECK: define i32 @f38(i32 %i, i64 %s1.coerce, i64 %s2.coerce)
-// CHECK: %s1 = alloca %struct.s38, align 8
-// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
+// CHECK: %s1 = alloca %struct.s38, align 4
+// CHECK: %s2 = alloca %struct.s38, align 4
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -297,10 +297,10 @@ __attribute__ ((noinline))
 int f38_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s38_no_align s1, s38_no_align s2) {
 // CHECK: define i32 @f38_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i64 %s1.coerce, i64 %s2.coerce)
-// CHECK: %s1 = alloca %struct.s38, align 8
-// CHECK: %s2 = alloca %struct.s38, align 8
-// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 8
-// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 8
+// CHECK: %s1 = alloca %struct.s38, align 4
+// CHECK: %s2 = alloca %struct.s38, align 4
+// CHECK: store i64 %s1.coerce, i64* %{{.*}}, align 4
+// CHECK: store i64 %s2.coerce, i64* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s38, %struct.s38* %s1, i32 0, i32 1
@@ -381,10 +381,10 @@ typedef struct s40 s40_no_align;
 __attribute__ ((noinline))
 int f40(int i, s40_no_align s1, s40_no_align s2) {
 // CHECK: define i32 @f40(i32 %i, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
-// CHECK: %s1 = alloca %struct.s40, align 8
-// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: %s1 = alloca %struct.s40, align 4
+// CHECK: %s2 = alloca %struct.s40, align 4
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -405,10 +405,10 @@ __attribute__ ((noinline))
 int f40_stack(int i, int i2, int i3, int i4, int i5, int i6, int i7, int i8,
               int i9, s40_no_align s1, s40_no_align s2) {
 // CHECK: define i32 @f40_stack(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
-// CHECK: %s1 = alloca %struct.s40, align 8
-// CHECK: %s2 = alloca %struct.s40, align 8
-// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 8
-// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 8
+// CHECK: %s1 = alloca %struct.s40, align 4
+// CHECK: %s2 = alloca %struct.s40, align 4
+// CHECK: store [2 x i64] %s1.coerce, [2 x i64]* %{{.*}}, align 4
+// CHECK: store [2 x i64] %s2.coerce, [2 x i64]* %{{.*}}, align 4
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s2, i32 0, i32 0
 // CHECK: getelementptr inbounds %struct.s40, %struct.s40* %s1, i32 0, i32 1
@@ -629,7 +629,7 @@ float test_hfa(int n, ...) {
 // CHECK: [[CURLIST:%.*]] = load i8*, i8** [[THELIST]]
 
   // HFA is not indirect, so occupies its full 16 bytes on the stack.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 16
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 16
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[CURLIST]] to %struct.HFA*
@@ -656,12 +656,11 @@ float test_toobig_hfa(int n, ...) {
 
   // TooBigHFA is not actually an HFA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HFAPTR:%.*]] = load i8*, i8** [[HFAPTRPTR]]
-// CHECK: bitcast i8* [[HFAPTR]] to %struct.TooBigHFA*
+// CHECK: [[HFAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHFA**
+// CHECK: [[HFAPTR:%.*]] = load %struct.TooBigHFA*, %struct.TooBigHFA** [[HFAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHFA h = __builtin_va_arg(thelist, struct TooBigHFA);
@@ -679,12 +678,12 @@ int32x4_t test_hva(int n, ...) {
 
   // HVA is not indirect, so occupies its full 16 bytes on the stack. but it
   // must be properly aligned.
-// CHECK: [[ALIGN0:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 15
-// CHECK: [[ALIGN1:%.*]] = ptrtoint i8* [[ALIGN0]] to i64
+// CHECK: [[ALIGN0:%.*]] = ptrtoint i8* [[CURLIST]] to i64
+// CHECK: [[ALIGN1:%.*]] = add i64 [[ALIGN0]], 15
 // CHECK: [[ALIGN2:%.*]] = and i64 [[ALIGN1]], -16
 // CHECK: [[ALIGNED_LIST:%.*]] = inttoptr i64 [[ALIGN2]] to i8*
 
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[ALIGNED_LIST]], i32 32
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_LIST]], i64 32
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
 // CHECK: bitcast i8* [[ALIGNED_LIST]] to %struct.HVA*
@@ -705,12 +704,11 @@ int32x4_t test_toobig_hva(int n, ...) {
 
   // TooBigHVA is not actually an HVA, so gets passed indirectly. Only 8 bytes
   // of stack consumed.
-// CHECK: [[NEXTLIST:%.*]] = getelementptr i8, i8* [[CURLIST]], i32 8
+// CHECK: [[NEXTLIST:%.*]] = getelementptr inbounds i8, i8* [[CURLIST]], i64 8
 // CHECK: store i8* [[NEXTLIST]], i8** [[THELIST]]
 
-// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to i8**
-// CHECK: [[HVAPTR:%.*]] = load i8*, i8** [[HVAPTRPTR]]
-// CHECK: bitcast i8* [[HVAPTR]] to %struct.TooBigHVA*
+// CHECK: [[HVAPTRPTR:%.*]] = bitcast i8* [[CURLIST]] to %struct.TooBigHVA**
+// CHECK: [[HVAPTR:%.*]] = load %struct.TooBigHVA*, %struct.TooBigHVA** [[HVAPTRPTR]]
   __builtin_va_list thelist;
   __builtin_va_start(thelist, n);
   struct TooBigHVA h = __builtin_va_arg(thelist, struct TooBigHVA);
diff --git a/test/CodeGen/arm64-be-hfa-vararg.c b/test/CodeGen/arm64-be-hfa-vararg.c
index 537aab52b3b5..c22572459bab 100644
--- a/test/CodeGen/arm64-be-hfa-vararg.c
+++ b/test/CodeGen/arm64-be-hfa-vararg.c
@@ -4,7 +4,15 @@
 
 // A single member HFA must be aligned just like a non-HFA register argument.
 double callee(int a, ...) {
-// CHECK: = add i64 %{{.*}}, 8
+// CHECK: [[REGPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 2
+// CHECK: [[REGP:%.*]] = load i8*, i8** [[REGPP]], align 8
+// CHECK: [[OFFSET0:%.*]] = getelementptr inbounds i8, i8* [[REGP]], i32 {{.*}}
+// CHECK: [[OFFSET1:%.*]] = getelementptr inbounds i8, i8* [[OFFSET0]], i64 8
+
+// CHECK: [[MEMPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 0
+// CHECK: [[MEMP:%.*]] = load i8*, i8** [[MEMPP]], align 8
+// CHECK: [[NEXTP:%.*]] = getelementptr inbounds i8, i8* [[MEMP]], i64 8
+// CHECK: store i8* [[NEXTP]], i8** [[MEMPP]], align 8
   va_list vl;
   va_start(vl, a);
   double result = va_arg(vl, struct { double a; }).a;
diff --git a/test/CodeGen/arm64_vget.c b/test/CodeGen/arm64_vget.c
deleted file mode 100644
index 62b68ef343c8..000000000000
--- a/test/CodeGen/arm64_vget.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
-// Test ARM64 SIMD vget intrinsics
-
-#include <arm_neon.h>
-
-float64_t test_vget_lane_f64(float64x1_t a1) {
-  // CHECK: test_vget_lane_f64
-  // why isn't 1 allowed as second argument?
-  return vget_lane_f64(a1, 0);
-  // CHECK: extractelement {{.*}} i32 0
-  // CHECK-NEXT: ret
-}
-
diff --git a/test/CodeGen/arm64_vset_lane.c b/test/CodeGen/arm64_vset_lane.c
deleted file mode 100644
index 0508123b8473..000000000000
--- a/test/CodeGen/arm64_vset_lane.c
+++ /dev/null
@@ -1,33 +0,0 @@
-// RUN: %clang_cc1 -O1 -triple arm64-apple-ios7 -target-feature +neon -ffreestanding -S -o - -emit-llvm %s | FileCheck %s
-// Test ARM64 SIMD set lane intrinsics INCOMPLETE
-
-#include <arm_neon.h>
-
-float16x4_t test_vset_lane_f16(float16_t *a1, float16x4_t a2) {
-  // CHECK-LABEL: test_vset_lane_f16
-  return vset_lane_f16(*a1, a2, 1);
-  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
-  // CHECK: insertelement <4 x i16> %a2, i16 [[A1]], i32 1
-}
-
-float16x8_t test_vsetq_lane_f16(float16_t *a1, float16x8_t a2) {
-  // CHECK-LABEL: test_vsetq_lane_f16
-  return vsetq_lane_f16(*a1, a2, 4);
-  // CHECK: [[A1:%[0-9]+]] = load i16, i16* %a1
-  // CHECK: insertelement <8 x i16> %a2, i16 [[A1]], i32 4
-}
-
-// problem with scalar_to_vector in backend.  Punt for now
-#if 0
-float64x1_t test_vset_lane_f64(float64_t a1, float64x1_t a2) {
-  // CHECK-LABEL@ test_vset_lane_f64
-  return vset_lane_f64(a1, a2, 0);
-  // CHECK@ @llvm.aarch64.neon.smaxv.i32.v8i8
-}
-#endif
-
-float64x2_t test_vsetq_lane_f64(float64_t a1, float64x2_t a2) {
-  // CHECK-LABEL: test_vsetq_lane_f64
-  return vsetq_lane_f64(a1, a2, 0);
-  // CHECK: insertelement <2 x double> %a2, double %a1, i32 0
-}
diff --git a/test/CodeGen/arm_acle.c b/test/CodeGen/arm_acle.c
index a2eb900761b3..0884394fbf19 100644
--- a/test/CodeGen/arm_acle.c
+++ b/test/CodeGen/arm_acle.c
@@ -186,27 +186,53 @@ uint64_t test_revll(uint64_t t) {
 
 // ARM-LABEL: test_rev16
 // ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
+// ARM: lshr {{.*}}, 16
+// ARM: shl {{.*}}, 16
 // ARM: or
 uint32_t test_rev16(uint32_t t) {
   return __rev16(t);
 }
 
 // ARM-LABEL: test_rev16l
-// ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
-// ARM: or
+// AArch32: llvm.bswap
+// AArch32: lshr {{.*}}, 16
+// AArch32: shl {{.*}}, 16
+// AArch32: or
+// AArch64: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32
+// AArch64: [[T2:%.*]] = trunc i64 [[T1]] to i32
+// AArch64: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]])
+// AArch64: [[T4:%.*]] = lshr i32 [[T3]], 16
+// AArch64: [[T5:%.*]] = shl i32 [[T3]], 16
+// AArch64: [[T6:%.*]] = or i32 [[T5]], [[T4]]
+// AArch64: [[T7:%.*]] = zext i32 [[T6]] to i64
+// AArch64: [[T8:%.*]] = shl nuw i64 [[T7]], 32
+// AArch64: [[T9:%.*]] = trunc i64 [[IN]] to i32
+// AArch64: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]])
+// AArch64: [[T11:%.*]] = lshr i32 [[T10]], 16
+// AArch64: [[T12:%.*]] = shl i32 [[T10]], 16
+// AArch64: [[T13:%.*]] = or i32 [[T12]], [[T11]]
+// AArch64: [[T14:%.*]] = zext i32 [[T13]] to i64
+// AArch64: [[T15:%.*]] = or i64 [[T8]], [[T14]]
 long test_rev16l(long t) {
   return __rev16l(t);
 }
 
 // ARM-LABEL: test_rev16ll
-// ARM: llvm.bswap
-// ARM: lshr
-// ARM: shl
-// ARM: or
+// ARM: [[T1:%.*]] = lshr i64 [[IN:%.*]], 32
+// ARM: [[T2:%.*]] = trunc i64 [[T1]] to i32
+// ARM: [[T3:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T2]])
+// ARM: [[T4:%.*]] = lshr i32 [[T3]], 16
+// ARM: [[T5:%.*]] = shl i32 [[T3]], 16
+// ARM: [[T6:%.*]] = or i32 [[T5]], [[T4]]
+// ARM: [[T7:%.*]] = zext i32 [[T6]] to i64
+// ARM: [[T8:%.*]] = shl nuw i64 [[T7]], 32
+// ARM: [[T9:%.*]] = trunc i64 [[IN]] to i32
+// ARM: [[T10:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[T9]])
+// ARM: [[T11:%.*]] = lshr i32 [[T10]], 16
+// ARM: [[T12:%.*]] = shl i32 [[T10]], 16
+// ARM: [[T13:%.*]] = or i32 [[T12]], [[T11]]
+// ARM: [[T14:%.*]] = zext i32 [[T13]] to i64
+// ARM: [[T15:%.*]] = or i64 [[T8]], [[T14]]
 uint64_t test_rev16ll(uint64_t t) {
   return __rev16ll(t);
 }
diff --git a/test/CodeGen/arm_function_epilog.cpp b/test/CodeGen/arm_function_epilog.cpp
new file mode 100644
index 000000000000..00895078b1f8
--- /dev/null
+++ b/test/CodeGen/arm_function_epilog.cpp
@@ -0,0 +1,17 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang_cc1 -triple armv7-none-linux-androideabi -target-abi aapcs-linux -mfloat-abi hard -x c++ -emit-llvm %s -o - | FileCheck %s
+
+struct Vec2 {
+    union { struct { float x, y; };
+            float data[2];
+    };
+};
+
+// CHECK: define arm_aapcs_vfpcc %struct.Vec2 @_Z7getVec2v()
+// CHECK: ret %struct.Vec2
+Vec2 getVec2() {
+    Vec2 out;
+    union { Vec2* v; unsigned char* u; } x;
+    x.v = &out;
+    return out;
+}
diff --git a/test/CodeGen/armv7k-abi.c b/test/CodeGen/armv7k-abi.c
new file mode 100644
index 000000000000..9b57de8727bd
--- /dev/null
+++ b/test/CodeGen/armv7k-abi.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple thumbv7k-apple-watchos2.0 -target-abi aapcs16 -target-cpu cortex-a7 %s -o - -emit-llvm | FileCheck %s
+
+#include <arm_neon.h>
+
+// Make sure 64 and 128 bit types are naturally aligned by the v7k ABI:
+
+// CHECK: target datalayout = "e-m:o-p:32:32-i64:64-a:0:32-n32-S128"
+
+typedef struct {
+  float arr[4];
+} HFA;
+
+// CHECK: define void @simple_hfa([4 x float] %h.coerce)
+void simple_hfa(HFA h) {}
+
+// CHECK: define %struct.HFA @return_simple_hfa
+HFA return_simple_hfa() {}
+
+typedef struct {
+  double arr[4];
+} BigHFA;
+
+// We don't want any padding type to be included by Clang when using the
+// APCS-VFP ABI, that needs to be handled by LLVM if needed.
+
+// CHECK: void @no_padding(i32 %r0, i32 %r1, i32 %r2, [4 x double] %d0_d3.coerce, [4 x double] %d4_d7.coerce, [4 x double] %sp.coerce, i64 %split)
+void no_padding(int r0, int r1, int r2, BigHFA d0_d3, BigHFA d4_d7, BigHFA sp,
+                long long split) {}
+
+// Structs larger than 16 bytes should be passed indirectly in space allocated
+// by the caller (a pointer to this storage should be what occurs in the arg
+// list).
+
+typedef struct {
+  float x;
+  long long y;
+  double z;
+} BigStruct;
+
+// CHECK: define void @big_struct_indirect(%struct.BigStruct* %b)
+void big_struct_indirect(BigStruct b) {}
+
+// CHECK: define void @return_big_struct_indirect(%struct.BigStruct* noalias sret
+BigStruct return_big_struct_indirect() {}
+
+// Structs smaller than 16 bytes should be passed directly, and coerced to
+// either [N x i32] or [N x i64] depending on alignment requirements.
+
+typedef struct {
+  float x;
+  int y;
+  double z;
+} SmallStruct;
+
+// CHECK: define void @small_struct_direct([2 x i64] %s.coerce)
+void small_struct_direct(SmallStruct s) {}
+
+// CHECK: define [4 x i32] @return_small_struct_direct()
+SmallStruct return_small_struct_direct() {}
+
+typedef struct {
+  float x;
+  int y;
+  int z;
+} SmallStructSmallAlign;
+
+// CHECK: define void @small_struct_align_direct([3 x i32] %s.coerce)
+void small_struct_align_direct(SmallStructSmallAlign s) {}
+
+typedef struct {
+  char x;
+  short y;
+} PaddedSmallStruct;
+
+// CHECK: define i32 @return_padded_small_struct()
+PaddedSmallStruct return_padded_small_struct() {}
+
+typedef struct {
+  char arr[7];
+} OddlySizedStruct;
+
+// CHECK: define [2 x i32] @return_oddly_sized_struct()
+OddlySizedStruct return_oddly_sized_struct() {}
+
+// CHECK: define <4 x float> @test_va_arg_vec(i8* %l)
+// CHECK:   [[ALIGN_TMP:%.*]] = add i32 {{%.*}}, 15
+// CHECK:   [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -16
+// CHECK:   [[ALIGNED_I8:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
+// CHECK:   [[ALIGNED_VEC:%.*]] = bitcast i8* [[ALIGNED_I8]] to <4 x float>
+// CHECK:   load <4 x float>, <4 x float>* [[ALIGNED_VEC]], align 16
+float32x4_t test_va_arg_vec(__builtin_va_list l) {
+  return __builtin_va_arg(l, float32x4_t);
+}
diff --git a/test/CodeGen/asm-unicode.S b/test/CodeGen/asm-unicode.S
new file mode 100644
index 000000000000..f4edbe9c44ae
--- /dev/null
+++ b/test/CodeGen/asm-unicode.S
@@ -0,0 +1,12 @@
+// RUN: %clang -S %s -o - | FileCheck %s
+.macro  my_macro, trace=1, uaccess=1
+.if \uaccess
+// CHECK: .if \uaccess
+// CHECK-NOT: .if 곎ss
+// CHECK: my_macro trace=1
+        my_macro trace=1
+.endif
+.endm
+
+foo:
+        my_macro trace=0
diff --git a/test/CodeGen/asm_64.c b/test/CodeGen/asm_64.c
new file mode 100644
index 000000000000..72610e191c5f
--- /dev/null
+++ b/test/CodeGen/asm_64.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+// CHECK-LABEL: @t1
+void t1() {
+  __asm__ ("mov r8w, 100;");
+  // CHECK: call void asm sideeffect "mov r8w, 100;"
+  __asm__ ("mov r8d, 100;");
+  // CHECK: call void asm sideeffect "mov r8d, 100;"
+  __asm__ ("mov r8b, 100;");
+  // CHECK: call void asm sideeffect "mov r8b, 100;"
+  __asm__ ("mov r9w, 100;");
+  // CHECK: call void asm sideeffect "mov r9w, 100;"
+  __asm__ ("mov r9d, 100;");
+  // CHECK: call void asm sideeffect "mov r9d, 100;"
+  __asm__ ("mov r9b, 100;");
+  // CHECK: call void asm sideeffect "mov r9b, 100;"
+  __asm__ ("mov r10w, 100;");
+  // CHECK: call void asm sideeffect "mov r10w, 100;"
+  __asm__ ("mov r10d, 100;");
+  // CHECK: call void asm sideeffect "mov r10d, 100;"
+  __asm__ ("mov r10b, 100;");
+  // CHECK: call void asm sideeffect "mov r10b, 100;"
+  __asm__ ("mov r11w, 100;");
+  // CHECK: call void asm sideeffect "mov r11w, 100;"
+  __asm__ ("mov r11d, 100;");
+  // CHECK: call void asm sideeffect "mov r11d, 100;"
+  __asm__ ("mov r11b, 100;");
+  // CHECK: call void asm sideeffect "mov r11b, 100;"
+  __asm__ ("mov r12w, 100;");
+  // CHECK: call void asm sideeffect "mov r12w, 100;"
+  __asm__ ("mov r12d, 100;");
+  // CHECK: call void asm sideeffect "mov r12d, 100;"
+  __asm__ ("mov r12b, 100;");
+  // CHECK: call void asm sideeffect "mov r12b, 100;"
+  __asm__ ("mov r13w, 100;");
+  // CHECK: call void asm sideeffect "mov r13w, 100;"
+  __asm__ ("mov r13d, 100;");
+  // CHECK: call void asm sideeffect "mov r13d, 100;"
+  __asm__ ("mov r13b, 100;");
+  // CHECK: call void asm sideeffect "mov r13b, 100;"
+  __asm__ ("mov r14w, 100;");
+  // CHECK: call void asm sideeffect "mov r14w, 100;"
+  __asm__ ("mov r14d, 100;");
+  // CHECK: call void asm sideeffect "mov r14d, 100;"
+  __asm__ ("mov r14b, 100;");
+  // CHECK: call void asm sideeffect "mov r14b, 100;"
+  __asm__ ("mov r15w, 100;");
+  // CHECK: call void asm sideeffect "mov r15w, 100;"
+  __asm__ ("mov r15d, 100;");
+  // CHECK: call void asm sideeffect "mov r15d, 100;"
+  __asm__ ("mov r15b, 100;");
+  // CHECK: call void asm sideeffect "mov r15b, 100;"
+}
diff --git a/test/CodeGen/atomic-arm64.c b/test/CodeGen/atomic-arm64.c
index 98f27aba4f7d..5cae3d134984 100644
--- a/test/CodeGen/atomic-arm64.c
+++ b/test/CodeGen/atomic-arm64.c
@@ -21,7 +21,7 @@ extern _Atomic(void*) a_pointer;
 extern _Atomic(pointer_pair_t) a_pointer_pair;
 extern _Atomic(pointer_quad_t) a_pointer_quad;
 
-// CHECK:    define void @test0()
+// CHECK-LABEL:define void @test0()
 // CHECK:      [[TEMP:%.*]] = alloca i8, align 1
 // CHECK-NEXT: store i8 1, i8* [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = load i8, i8* [[TEMP]], align 1
@@ -30,7 +30,7 @@ void test0() {
   __c11_atomic_store(&a_bool, 1, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test1()
+// CHECK-LABEL:define void @test1()
 // CHECK:      [[TEMP:%.*]] = alloca float, align 4
 // CHECK-NEXT: store float 3.000000e+00, float* [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast float* [[TEMP]] to i32*
@@ -40,7 +40,7 @@ void test1() {
   __c11_atomic_store(&a_float, 3, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test2()
+// CHECK-LABEL:define void @test2()
 // CHECK:      [[TEMP:%.*]] = alloca i8*, align 8
 // CHECK-NEXT: store i8* @a_bool, i8** [[TEMP]]
 // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[TEMP]] to i64*
@@ -50,24 +50,25 @@ void test2() {
   __c11_atomic_store(&a_pointer, &a_bool, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test3(
+// CHECK-LABEL:define void @test3(
 // CHECK:      [[PAIR:%.*]] = alloca [[PAIR_T:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[PAIR_T]], align 8
 // CHECK:      llvm.memcpy
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[PAIR_T]]* [[TEMP]] to i128*
-// CHECK-NEXT: [[T1:%.*]] = load i128, i128* [[T0]], align 16
+// CHECK-NEXT: [[T1:%.*]] = load i128, i128* [[T0]], align 8
 // CHECK-NEXT: store atomic i128 [[T1]], i128* bitcast ([[PAIR_T]]* @a_pointer_pair to i128*) seq_cst, align 16
 void test3(pointer_pair_t pair) {
   __c11_atomic_store(&a_pointer_pair, pair, memory_order_seq_cst);
 }
 
-// CHECK:    define void @test4([[QUAD_T:%.*]]*
+// CHECK-LABEL:define void @test4(
 // CHECK:      [[TEMP:%.*]] = alloca [[QUAD_T:%.*]], align 8
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8*
 // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false)
-// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8*
-// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T0]], i32 5)
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256*
+// CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8*
+// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5)
 void test4(pointer_quad_t quad) {
   __c11_atomic_store(&a_pointer_quad, quad, memory_order_seq_cst);
 }
diff --git a/test/CodeGen/atomic-ops-libcall.c b/test/CodeGen/atomic-ops-libcall.c
index e55a1bd1eae3..0093a8cbbbba 100644
--- a/test/CodeGen/atomic-ops-libcall.c
+++ b/test/CodeGen/atomic-ops-libcall.c
@@ -35,3 +35,82 @@ int *fp2a(int **p) {
   // Note, the GNU builtins do not multiply by sizeof(T)!
   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
 }
+
+int test_atomic_fetch_add(int *p) {
+  // CHECK: test_atomic_fetch_add
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_add(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_sub(int *p) {
+  // CHECK: test_atomic_fetch_sub
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_and(int *p) {
+  // CHECK: test_atomic_fetch_and
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_and(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_or(int *p) {
+  // CHECK: test_atomic_fetch_or
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_or(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_xor(int *p) {
+  // CHECK: test_atomic_fetch_xor
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_fetch_nand(int *p) {
+  // CHECK: test_atomic_fetch_nand
+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_add_fetch(int *p) {
+  // CHECK: test_atomic_add_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], 55
+  return __atomic_add_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_sub_fetch(int *p) {
+  // CHECK: test_atomic_sub_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = add i32 [[CALL]], -55
+  return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_and_fetch(int *p) {
+  // CHECK: test_atomic_and_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = and i32 [[CALL]], 55
+  return __atomic_and_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_or_fetch(int *p) {
+  // CHECK: test_atomic_or_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = or i32 [[CALL]], 55
+  return __atomic_or_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_xor_fetch(int *p) {
+  // CHECK: test_atomic_xor_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: {{%[^ ]*}} = xor i32 [[CALL]], 55
+  return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
+}
+
+int test_atomic_nand_fetch(int *p) {
+  // CHECK: test_atomic_nand_fetch
+  // CHECK: [[CALL:%[^ ]*]] = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
+  // CHECK: [[OR:%[^ ]*]] = or i32 [[CALL]], -56
+  // CHECK: {{%[^ ]*}} = xor i32 [[OR]], 55
+  return __atomic_nand_fetch(p, 55, memory_order_seq_cst);
+}
diff --git a/test/CodeGen/atomic-ops.c b/test/CodeGen/atomic-ops.c
index d8f7d28392a5..1ebb2baa1ac6 100644
--- a/test/CodeGen/atomic-ops.c
+++ b/test/CodeGen/atomic-ops.c
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -triple=i686-apple-darwin9 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 | FileCheck %s
 // REQUIRES: x86-registered-target
 
 // Also test serialization of atomic operations here, to avoid duplicating the
 // test.
-// RUN: %clang_cc1 %s -emit-pch -o %t -ffreestanding -triple=i686-apple-darwin9
-// RUN: %clang_cc1 %s -include-pch %t -ffreestanding -triple=i686-apple-darwin9 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -emit-pch -o %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9
+// RUN: %clang_cc1 %s -include-pch %t -ffreestanding -ffake-address-space-map -triple=i686-apple-darwin9 -emit-llvm -o - | FileCheck %s
 #ifndef ALREADY_INCLUDED
 #define ALREADY_INCLUDED
 
@@ -155,6 +155,14 @@ _Bool fi4c(atomic_int *i) {
   return atomic_compare_exchange_strong(i, &cmp, 1);
 }
 
+#define _AS1 __attribute__((address_space(1)))
+_Bool fi4d(_Atomic(int) *i, int _AS1 *ptr2) {
+  // CHECK-LABEL: @fi4d(
+  // CHECK: [[EXPECTED:%[.0-9A-Z_a-z]+]] = load i32, i32 addrspace(1)* %{{[0-9]+}}
+  // CHECK: cmpxchg i32* %{{[0-9]+}}, i32 [[EXPECTED]], i32 %{{[0-9]+}} acquire acquire
+  return __c11_atomic_compare_exchange_strong(i, ptr2, 1, memory_order_acquire, memory_order_acquire);
+}
+
 float ff1(_Atomic(float) *d) {
   // CHECK-LABEL: @ff1
   // CHECK: load atomic i32, i32* {{.*}} monotonic
@@ -179,8 +187,8 @@ struct S fd1(struct S *a) {
   // CHECK-LABEL: @fd1
   // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4
   // CHECK: [[RET:%.*]]    = alloca %struct.S, align 4
-  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: [[CAST:%.*]]   = bitcast %struct.S* [[RET]] to i64*
+  // CHECK: [[CALL:%.*]]   = call i64 @__atomic_load_8(
   // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4
   struct S ret;
   __atomic_load(a, &ret, memory_order_seq_cst);
@@ -195,8 +203,9 @@ void fd2(struct S *a, struct S *b) {
   // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
   // CHECK-NEXT: ret void
@@ -214,11 +223,12 @@ void fd3(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]],
-  // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
   // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4
 
   __atomic_exchange(a, b, c, memory_order_seq_cst);
@@ -235,9 +245,11 @@ _Bool fd4(struct S *a, struct S *b, struct S *c) {
   // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4
   // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4
-  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8*
-  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8*
+  // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_B_TMP:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64*
   // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64*
+  // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8*
+  // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast i64* [[COERCED_B_TMP]] to i8*
   // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4
   // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]]
   // CHECK-NEXT: ret i1 [[CALL]]
@@ -312,6 +324,8 @@ struct Seventeen {
   char c[17];
 } seventeen;
 
+struct Incomplete;
+
 int lock_free(struct Incomplete *incomplete) {
   // CHECK-LABEL: @lock_free
 
diff --git a/test/CodeGen/atomic_ops.c b/test/CodeGen/atomic_ops.c
index 980ecd20e660..0af1d387192d 100644
--- a/test/CodeGen/atomic_ops.c
+++ b/test/CodeGen/atomic_ops.c
@@ -1,4 +1,5 @@
-// XFAIL: hexagon
+// XFAIL: hexagon,sparc
+//        (due to not having native load atomic support)
 // RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple mips-linux-gnu -emit-llvm %s -o - | FileCheck %s
 
diff --git a/test/CodeGen/attr-disable-tail-calls.c b/test/CodeGen/attr-disable-tail-calls.c
index 81413492ff3f..d47b14fe8bad 100644
--- a/test/CodeGen/attr-disable-tail-calls.c
+++ b/test/CodeGen/attr-disable-tail-calls.c
@@ -1,11 +1,19 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -mdisable-tail-calls -o - | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLE
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLE
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -mdisable-tail-calls -o - | FileCheck %s -check-prefix=DISABLE
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s -check-prefix=ENABLE
 
-// CHECK: define i32 @f1() [[ATTR:#[0-9]+]] {
+// DISABLE: define i32 @f1() [[ATTRTRUE:#[0-9]+]] {
+// DISABLE: define i32 @f2() [[ATTRTRUE]] {
+// ENABLE: define i32 @f1() [[ATTRFALSE:#[0-9]+]] {
+// ENABLE: define i32 @f2() [[ATTRTRUE:#[0-9]+]] {
 
 int f1() {
   return 0;
 }
 
-// DISABLE: attributes [[ATTR]] = { {{.*}} "disable-tail-calls"="true" {{.*}} }
-// ENABLE: attributes [[ATTR]] = { {{.*}} "disable-tail-calls"="false" {{.*}} }
+int f2() __attribute__((disable_tail_calls)) {
+  return 0;
+}
+
+// DISABLE: attributes [[ATTRTRUE]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
+// ENABLE: attributes [[ATTRFALSE]] = { {{.*}}"disable-tail-calls"="false"{{.*}} }
+// ENABLE: attributes [[ATTRTRUE]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
diff --git a/test/CodeGen/attr-func-def.c b/test/CodeGen/attr-func-def.c
new file mode 100644
index 000000000000..ceafa1220f1f
--- /dev/null
+++ b/test/CodeGen/attr-func-def.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10.0 -emit-llvm -Oz -o - %s | FileCheck %s
+
+// CHECK: define i32 @foo2(i32 %a) [[ATTRS2:#[0-9]+]] {
+// CHECK: define i32 @foo1(i32 %a) [[ATTRS1:#[0-9]+]] {
+
+int foo1(int);
+
+int foo2(int a) {
+  return foo1(a + 2);
+}
+
+__attribute__((optnone))
+int foo1(int a) {
+    return a + 1;
+}
+
+// CHECK: attributes [[ATTRS2]] = { {{.*}}optsize{{.*}} }
+// CHECK: attributes [[ATTRS1]] = { {{.*}}optnone{{.*}} }
diff --git a/test/CodeGen/attr-minsize.cpp b/test/CodeGen/attr-minsize.cpp
index 0f07725fe8d8..1e5c634dbdfc 100644
--- a/test/CodeGen/attr-minsize.cpp
+++ b/test/CodeGen/attr-minsize.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -Oz -emit-llvm %s -o - | FileCheck %s -check-prefix=Oz
-// RUN: %clang_cc1     -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O1 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O2 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -O3 -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
-// RUN: %clang_cc1 -Os -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -Oz -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=Oz
+// RUN: %clang_cc1     -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O1 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O2 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -O3 -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
+// RUN: %clang_cc1 -Os -disable-llvm-optzns -emit-llvm %s -o - | FileCheck %s -check-prefix=OTHER
 // Check that we set the minsize attribute on each function
 // when Oz optimization level is set.
 
diff --git a/test/CodeGen/attr-no-tail.c b/test/CodeGen/attr-no-tail.c
new file mode 100644
index 000000000000..1c9aca6edf83
--- /dev/null
+++ b/test/CodeGen/attr-no-tail.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.7.0 %s -emit-llvm -o - | FileCheck %s
+
+// CHECK: %{{[a-z0-9]+}} = notail call i32 @callee0(i32 %
+// CHECK: %{{[a-z0-9]+}} = notail call i32 @callee1(i32 %
+
+// Check that indirect calls do not have the notail marker.
+// CHECK: store i32 (i32)* @callee1, i32 (i32)** [[ALLOCA1:%[A-Za-z0-9]+]], align 8
+// CHECK: [[INDIRFUNC:%[0-9]+]] = load i32 (i32)*, i32 (i32)** [[ALLOCA1]], align 8
+// CHECK: %{{[a-z0-9]+}} = call i32 [[INDIRFUNC]](i32 %{{[0-9]+}}
+
+// CHECK: %{{[a-z0-9]+}} = call i32 @callee2(i32 %
+
+int callee0(int a) __attribute__((not_tail_called)) {
+  return a + 1;
+}
+
+int callee1(int) __attribute__((not_tail_called));
+
+int callee2(int);
+
+typedef int (*FuncTy)(int);
+
+int foo0(int a) {
+  if (a > 1)
+    return callee0(a);
+  if (a == 1)
+    return callee1(a);
+  if (a < 0) {
+    FuncTy F = callee1;
+    return (*F)(a);
+  }
+  return callee2(a);
+}
diff --git a/test/CodeGen/attr-nodebug.c b/test/CodeGen/attr-nodebug.c
index 07a4aa35a897..8ffe33621b81 100644
--- a/test/CodeGen/attr-nodebug.c
+++ b/test/CodeGen/attr-nodebug.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 
 void t1() __attribute__((nodebug));
 
diff --git a/test/CodeGen/attr-noinline.c b/test/CodeGen/attr-noinline.c
index dbca71ff5fb2..44eb1e87b72c 100644
--- a/test/CodeGen/attr-noinline.c
+++ b/test/CodeGen/attr-noinline.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o %t %s
 // RUN: grep 'noinline' %t
 
 void t1() __attribute__((noinline));
diff --git a/test/CodeGen/attr-target-ppc.c b/test/CodeGen/attr-target-ppc.c
new file mode 100644
index 000000000000..d2901748b37c
--- /dev/null
+++ b/test/CodeGen/attr-target-ppc.c
@@ -0,0 +1,4 @@
+// RUN: not %clang_cc1 -triple powerpc64le-linux-gnu -emit-llvm %s -o -
+
+long __attribute__((target("power8-vector,no-vsx"))) foo (void) { return 0; }  // expected-error {{option '-mpower8-vector' cannot be specified with '-mno-vsx'}}
+
diff --git a/test/CodeGen/attr-target-x86-mmx.c b/test/CodeGen/attr-target-x86-mmx.c
new file mode 100644
index 000000000000..6720c6b7466c
--- /dev/null
+++ b/test/CodeGen/attr-target-x86-mmx.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -triple i386-linux-gnu -emit-llvm %s -o - | FileCheck %s
+// Picking a cpu that doesn't have mmx or sse by default so we can enable it later.
+
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// Verify that when we turn on sse that we also turn on mmx.
+void __attribute__((target("sse"))) shift(__m64 a, __m64 b, int c) {
+  _mm_slli_pi16(a, c);
+  _mm_slli_pi32(a, c);
+  _mm_slli_si64(a, c);
+
+  _mm_srli_pi16(a, c);
+  _mm_srli_pi32(a, c);
+  _mm_srli_si64(a, c);
+
+  _mm_srai_pi16(a, c);
+  _mm_srai_pi32(a, c);
+}
+
+// CHECK: "target-features"="+mmx,+sse"
diff --git a/test/CodeGen/attr-target-x86.c b/test/CodeGen/attr-target-x86.c
new file mode 100644
index 000000000000..58e33d1cbfd8
--- /dev/null
+++ b/test/CodeGen/attr-target-x86.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu x86-64 -emit-llvm %s -o - | FileCheck %s
+
+int baz(int a) { return 4; }
+
+int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
+
+int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
+int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
+
+int __attribute__((target("no-sse2"))) echidna(int a) { return 4; }
+
+int __attribute__((target("sse4"))) panda(int a) { return 4; }
+
+int bar(int a) { return baz(a) + foo(a); }
+
+int __attribute__((target("avx,      sse4.2,      arch=   ivybridge"))) qux(int a) { return 4; }
+int __attribute__((target("no-aes, arch=ivybridge"))) qax(int a) { return 4; }
+
+int __attribute__((target("no-mmx"))) qq(int a) { return 40; }
+
+// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
+// CHECK: baz{{.*}} #0
+// CHECK: foo{{.*}} #1
+// We ignore the tune attribute so walrus should be identical to baz and bar.
+// CHECK: walrus{{.*}} #0
+// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
+// CHECK: koala{{.*}} #0
+// CHECK: echidna{{.*}} #2
+// CHECK: panda{{.*}} #3
+// CHECK: bar{{.*}} #0
+// CHECK: qux{{.*}} #1
+// CHECK: qax{{.*}} #4
+// CHECK: qq{{.*}} #5
+// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2"
+// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt"
+// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt"
+// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
+// CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+xsave,+xsaveopt,-aes"
+// CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,-3dnow,-3dnowa,-mmx"
diff --git a/test/CodeGen/attr-target.c b/test/CodeGen/attr-target.c
deleted file mode 100644
index d805d133f365..000000000000
--- a/test/CodeGen/attr-target.c
+++ /dev/null
@@ -1,31 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu x86-64 -emit-llvm %s -o - | FileCheck %s
-
-int baz(int a) { return 4; }
-
-int __attribute__((target("avx,sse4.2,arch=ivybridge"))) foo(int a) { return 4; }
-
-int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
-int __attribute__((target("fpmath=387"))) koala(int a) { return 4; }
-
-int __attribute__((target("mno-sse2"))) echidna(int a) { return 4; }
-
-int __attribute__((target("sse4"))) panda(int a) { return 4; }
-
-int bar(int a) { return baz(a) + foo(a); }
-
-int __attribute__((target("avx,      sse4.2,      arch=   ivybridge"))) qux(int a) { return 4; }
-
-// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
-// CHECK: baz{{.*}} #0
-// CHECK: foo{{.*}} #1
-// We ignore the tune attribute so walrus should be identical to baz and bar.
-// CHECK: walrus{{.*}} #0
-// We're currently ignoring the fpmath attribute so koala should be identical to baz and bar.
-// CHECK: koala{{.*}} #0
-// CHECK: echidna{{.*}} #2
-// CHECK: bar{{.*}} #0
-// CHECK: qux{{.*}} #1
-// CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2"
-// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
-// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512pf,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop"
-// CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"
diff --git a/test/CodeGen/attributes.c b/test/CodeGen/attributes.c
index 4da3eca9e49e..7bfc3924fffc 100644
--- a/test/CodeGen/attributes.c
+++ b/test/CodeGen/attributes.c
@@ -26,7 +26,7 @@ int t6 __attribute__((visibility("protected")));
 // CHECK: @t12 = global i32 0, section "SECT"
 int t12 __attribute__((section("SECT")));
 
-// CHECK: @t9 = weak alias bitcast (void ()* @__t8 to void (...)*)
+// CHECK: @t9 = weak alias void (...), bitcast (void ()* @__t8 to void (...)*)
 void __t8() {}
 void t9() __attribute__((weak, alias("__t8")));
 
diff --git a/test/CodeGen/available-externally-hidden.cpp b/test/CodeGen/available-externally-hidden.cpp
index dc13f26b7526..88ebfa9684a5 100644
--- a/test/CodeGen/available-externally-hidden.cpp
+++ b/test/CodeGen/available-externally-hidden.cpp
@@ -27,6 +27,6 @@ class TestSyncMessageFilter : public SyncMessageFilter {
 };
 
 int main() {
-TestSyncMessageFilter*  f = new TestSyncMessageFilter;
+  TestSyncMessageFilter *f = new TestSyncMessageFilter;
   f->Send(new Message);
 }
diff --git a/test/CodeGen/avx-builtins.c b/test/CodeGen/avx-builtins.c
index 99d063385c4f..ee0f58fc716f 100644
--- a/test/CodeGen/avx-builtins.c
+++ b/test/CodeGen/avx-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -96,19 +96,22 @@ int test_mm_cmpistrz(__m128i A, __m128i B) {
 
 int test_extract_epi32(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi32
-  // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+  // CHECK: [[SHIFT1:%[^ ]+]] = and i32 %{{.*}}, 7
+  // CHECK: extractelement <8 x i32> %{{.*}}, i32 [[SHIFT1]]
   return _mm256_extract_epi32(__a, 8);
 }
 
 int test_extract_epi16(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi16
-  // CHECK: extractelement <16 x i16> %{{.*}}, i32 0
+  // CHECK: [[SHIFT2:%[^ ]+]] = and i32 %{{.*}}, 15
+  // CHECK: extractelement <16 x i16> %{{.*}}, i32 [[SHIFT2]]
   return _mm256_extract_epi16(__a, 16);
 }
 
 int test_extract_epi8(__m256i __a) {
   // CHECK-LABEL: @test_extract_epi8
-  // CHECK: extractelement <32 x i8> %{{.*}}, i32 0
+  // CHECK: [[SHIFT3:%[^ ]+]] = and i32 %{{.*}}, 31
+  // CHECK: extractelement <32 x i8> %{{.*}}, i32 [[SHIFT3]]
   return _mm256_extract_epi8(__a, 32);
 }
 
@@ -147,3 +150,21 @@ __m256i test_256_insert_epi64(__m256i __a) {
   // CHECK: insertelement <4 x i64> {{.*}}, i64 {{.*}}, i32 {{.*}}
   return _mm256_insert_epi64(__a, 42, 3);
 }
+
+__m256 test_mm256_undefined_ps() {
+  // CHECK-LABEL: @test_mm256_undefined_ps
+  // CHECK: ret <8 x float> undef
+  return _mm256_undefined_ps();
+}
+
+__m256d test_mm256_undefined_pd() {
+  // CHECK-LABEL: @test_mm256_undefined_pd
+  // CHECK: ret <4 x double> undef
+  return _mm256_undefined_pd();
+}
+
+__m256i test_mm256_undefined_si256() {
+  // CHECK-LABEL: @test_mm256_undefined_si256
+  // CHECK: ret <4 x i64> undef
+  return _mm256_undefined_si256();
+}
diff --git a/test/CodeGen/avx-cmp-builtins.c b/test/CodeGen/avx-cmp-builtins.c
index 5b205d79d0c6..30d1bd5c7aa0 100644
--- a/test/CodeGen/avx-cmp-builtins.c
+++ b/test/CodeGen/avx-cmp-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// FIXME: The shufflevector instructions in test_cmpgt_sd are relying on O3 here.
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/avx-shuffle-builtins.c b/test/CodeGen/avx-shuffle-builtins.c
index 913f9d238130..22bee33080a9 100644
--- a/test/CodeGen/avx-shuffle-builtins.c
+++ b/test/CodeGen/avx-shuffle-builtins.c
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -o - | FileCheck %s
+// FIXME: This is testing optimized generation of shuffle instructions and should be fixed.
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/avx2-builtins.c b/test/CodeGen/avx2-builtins.c
index e3628717c5b0..89981bb760f1 100644
--- a/test/CodeGen/avx2-builtins.c
+++ b/test/CodeGen/avx2-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -601,28 +602,35 @@ __m256i test_mm256_unpacklo_epi64(__m256i a, __m256i b) {
   return _mm256_unpacklo_epi64(a, b);
 }
 
-__m256i test_mm256_stream_load_si256(__m256i *a) {
+__m256i test_mm256_stream_load_si256(__m256i const *a) {
   // CHECK: @llvm.x86.avx2.movntdqa
   return _mm256_stream_load_si256(a);
 }
 
 __m128 test_mm_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK-LABEL: test_mm_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastss_ps(a);
 }
 
 __m128d test_mm_broadcastsd_pd(__m128d a) {
+  // CHECK-LABEL: test_mm_broadcastsd_pd
   // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_broadcastsd_pd(a);
 }
 
 __m256 test_mm256_broadcastss_ps(__m128 a) {
-  // CHECK: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK-LABEL: test_mm256_broadcastss_ps
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.ss.ps.256
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastss_ps(a);
 }
 
 __m256d test_mm256_broadcastsd_pd(__m128d a) {
-  // check: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK-LABEL: test_mm256_broadcastsd_pd
+  // CHECK-NOT: @llvm.x86.avx2.vbroadcast.sd.pd.256
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastsd_pd(a);
 }
 
@@ -646,42 +654,58 @@ __m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
 }
 
 __m256i test_mm256_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK-LABEL: test_mm256_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.256
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <32 x i32> zeroinitializer
   return _mm256_broadcastb_epi8(a);
 }
 
 __m256i test_mm256_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK-LABEL: test_mm256_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.256
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <16 x i32> zeroinitializer
   return _mm256_broadcastw_epi16(a);
 }
 
 __m256i test_mm256_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK-LABEL: test_mm256_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.256
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <8 x i32> zeroinitializer
   return _mm256_broadcastd_epi32(a);
 }
 
 __m256i test_mm256_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK-LABEL: test_mm256_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.256
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <4 x i32> zeroinitializer
   return _mm256_broadcastq_epi64(a);
 }
 
 __m128i test_mm_broadcastb_epi8(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK-LABEL: test_mm_broadcastb_epi8
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastb.128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> zeroinitializer
   return _mm_broadcastb_epi8(a);
 }
 
 __m128i test_mm_broadcastw_epi16(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK-LABEL: test_mm_broadcastw_epi16
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastw.128
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> zeroinitializer
   return _mm_broadcastw_epi16(a);
 }
 
 __m128i test_mm_broadcastd_epi32(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK-LABEL: test_mm_broadcastd_epi32
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastd.128
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
   return _mm_broadcastd_epi32(a);
 }
 
 __m128i test_mm_broadcastq_epi64(__m128i a) {
-  // CHECK: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK-LABEL: test_mm_broadcastq_epi64
+  // CHECK-NOT: @llvm.x86.avx2.pbroadcastq.128
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> zeroinitializer
   return _mm_broadcastq_epi64(a);
 }
 
@@ -695,7 +719,7 @@ __m256d test_mm256_permute4x64_pd(__m256d a) {
   return _mm256_permute4x64_pd(a, 25);
 }
 
-__m256 test_mm256_permutevar8x32_ps(__m256 a, __m256 b) {
+__m256 test_mm256_permutevar8x32_ps(__m256 a, __m256i b) {
   // CHECK: @llvm.x86.avx2.permps
   return _mm256_permutevar8x32_ps(a, b);
 }
@@ -849,11 +873,13 @@ __m256d test_mm256_mask_i32gather_pd(__m256d a, double const *b, __m128i c,
   // CHECK: @llvm.x86.avx2.gather.d.pd.256
   return _mm256_mask_i32gather_pd(a, b, c, d, 2);
 }
+
 __m128d test_mm_mask_i64gather_pd(__m128d a, double const *b, __m128i c,
                                   __m128d d) {
   // CHECK: @llvm.x86.avx2.gather.q.pd
   return _mm_mask_i64gather_pd(a, b, c, d, 2);
 }
+
 __m256d test_mm256_mask_i64gather_pd(__m256d a, double const *b, __m256i c,
                                       __m256d d) {
   // CHECK: @llvm.x86.avx2.gather.q.pd.256
@@ -865,16 +891,19 @@ __m128 test_mm_mask_i32gather_ps(__m128 a, float const *b, __m128i c,
   // CHECK: @llvm.x86.avx2.gather.d.ps
   return _mm_mask_i32gather_ps(a, b, c, d, 2);
 }
+
 __m256 test_mm256_mask_i32gather_ps(__m256 a, float const *b, __m256i c,
                                      __m256 d) {
   // CHECK: @llvm.x86.avx2.gather.d.ps.256
   return _mm256_mask_i32gather_ps(a, b, c, d, 2);
 }
+
 __m128 test_mm_mask_i64gather_ps(__m128 a, float const *b, __m128i c,
                                  __m128 d) {
   // CHECK: @llvm.x86.avx2.gather.q.ps
   return _mm_mask_i64gather_ps(a, b, c, d, 2);
 }
+
 __m128 test_mm256_mask_i64gather_ps(__m128 a, float const *b, __m256i c,
                                     __m128 d) {
   // CHECK: @llvm.x86.avx2.gather.q.ps.256
@@ -886,16 +915,19 @@ __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c,
   // CHECK: @llvm.x86.avx2.gather.d.d
   return _mm_mask_i32gather_epi32(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i32gather_epi32(__m256i a, int const *b, __m256i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.d.d.256
   return _mm256_mask_i32gather_epi32(a, b, c, d, 2);
 }
+
 __m128i test_mm_mask_i64gather_epi32(__m128i a, int const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.d
   return _mm_mask_i64gather_epi32(a, b, c, d, 2);
 }
+
 __m128i test_mm256_mask_i64gather_epi32(__m128i a, int const *b, __m256i c,
                                         __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.d.256
@@ -907,16 +939,19 @@ __m128i test_mm_mask_i32gather_epi64(__m128i a, long long const *b, __m128i c,
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_mask_i32gather_epi64(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i32gather_epi64(__m256i a, long long const *b, __m128i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_mask_i32gather_epi64(a, b, c, d, 2);
 }
+
 __m128i test_mm_mask_i64gather_epi64(__m128i a, long long const *b, __m128i c,
                                      __m128i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_mask_i64gather_epi64(a, b, c, d, 2);
 }
+
 __m256i test_mm256_mask_i64gather_epi64(__m256i a, long long const *b, __m256i c,
                                         __m256i d) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
@@ -927,30 +962,37 @@ __m128d test_mm_i32gather_pd(double const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.pd
   return _mm_i32gather_pd(b, c, 2);
 }
+
 __m256d test_mm256_i32gather_pd(double const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.pd.256
   return _mm256_i32gather_pd(b, c, 2);
 }
+
 __m128d test_mm_i64gather_pd(double const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.pd
   return _mm_i64gather_pd(b, c, 2);
 }
+
 __m256d test_mm256_i64gather_pd(double const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.pd.256
   return _mm256_i64gather_pd(b, c, 2);
 }
+
 __m128 test_mm_i32gather_ps(float const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.ps
   return _mm_i32gather_ps(b, c, 2);
 }
+
 __m256 test_mm256_i32gather_ps(float const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.d.ps.256
   return _mm256_i32gather_ps(b, c, 2);
 }
+
 __m128 test_mm_i64gather_ps(float const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.ps
   return _mm_i64gather_ps(b, c, 2);
 }
+
 __m128 test_mm256_i64gather_ps(float const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.ps.256
   return _mm256_i64gather_ps(b, c, 2);
@@ -960,30 +1002,37 @@ __m128i test_mm_i32gather_epi32(int const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.d
   return _mm_i32gather_epi32(b, c, 2);
 }
+
 __m256i test_mm256_i32gather_epi32(int const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.d.d.256
   return _mm256_i32gather_epi32(b, c, 2);
 }
+
 __m128i test_mm_i64gather_epi32(int const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.d
   return _mm_i64gather_epi32(b, c, 2);
 }
+
 __m128i test_mm256_i64gather_epi32(int const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.d.256
   return _mm256_i64gather_epi32(b, c, 2);
 }
+
 __m128i test_mm_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q
   return _mm_i32gather_epi64(b, c, 2);
 }
+
 __m256i test_mm256_i32gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.d.q.256
   return _mm256_i32gather_epi64(b, c, 2);
 }
+
 __m128i test_mm_i64gather_epi64(long long const *b, __m128i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q
   return _mm_i64gather_epi64(b, c, 2);
 }
+
 __m256i test_mm256_i64gather_epi64(long long const *b, __m256i c) {
   // CHECK: @llvm.x86.avx2.gather.q.q.256
   return _mm256_i64gather_epi64(b, c, 2);
diff --git a/test/CodeGen/avx512bw-builtins.c b/test/CodeGen/avx512bw-builtins.c
index 6cc02ef705db..7addd98b112d 100644
--- a/test/CodeGen/avx512bw-builtins.c
+++ b/test/CodeGen/avx512bw-builtins.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -830,6 +834,210 @@ __m512i test_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i _
 }
 __m512i test_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, __m512i __B) {
   // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi16
-  // CHECK: @llvm.x86.avx512.mask.vpermt2var.hi.512
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.512
   return _mm512_maskz_permutex2var_epi16(__U,__A,__I,__B); 
 }
+
+__m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_mulhrs_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A,        __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_mask_mulhrs_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.512
+  return _mm512_maskz_mulhrs_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_mulhi_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_mask_mulhi_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.512
+  return _mm512_maskz_mulhi_epi16(__U,__A,__B); 
+}
+__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_mulhi_epu16(__A,__B); 
+}
+__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A,       __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_mask_mulhi_epu16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.512
+  return _mm512_maskz_mulhi_epu16(__U,__A,__B); 
+}
+
+__m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_maddubs_epi16(__X,__Y); 
+}
+__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,         __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_mask_maddubs_epi16(__W,__U,__X,__Y); 
+}
+__m512i test_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
+  // CHECK-LABEL: @test_mm512_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.512
+  return _mm512_maskz_maddubs_epi16(__U,__X,__Y); 
+}
+__m512i test_mm512_madd_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_madd_epi16(__A,__B); 
+}
+__m512i test_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A,      __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_mask_madd_epi16(__W,__U,__A,__B); 
+}
+__m512i test_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.512
+  return _mm512_maskz_madd_epi16(__U,__A,__B); 
+}
+
+__m256i test_mm512_cvtsepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_cvtsepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtsepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtsepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.512
+  return _mm512_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m256i test_mm512_cvtusepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_cvtusepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtusepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtusepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.512
+  return _mm512_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m256i test_mm512_cvtepi16_epi8(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_cvtepi16_epi8(__A); 
+}
+
+__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.512
+  return _mm512_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m512i test_mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_unpackhi_epi8(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.512
+  return _mm512_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_unpackhi_epi16(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.512
+  return _mm512_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_unpacklo_epi8(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.512
+  return _mm512_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m512i test_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_unpacklo_epi16(__A, __B); 
+}
+
+__m512i test_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.512
+  return _mm512_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
diff --git a/test/CodeGen/avx512cdintrin.c b/test/CodeGen/avx512cdintrin.c
index 1b4860a7fd24..625a3d2bae91 100644
--- a/test/CodeGen/avx512cdintrin.c
+++ b/test/CodeGen/avx512cdintrin.c
@@ -1,5 +1,10 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512cd -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512cd -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
 #include <immintrin.h>
+
 __m512i test_mm512_conflict_epi64(__m512i __A) {
   // CHECK-LABEL: @test_mm512_conflict_epi64
   // CHECK: @llvm.x86.avx512.mask.conflict.q.512
diff --git a/test/CodeGen/avx512dq-builtins.c b/test/CodeGen/avx512dq-builtins.c
index e35b243534a4..fc09a28ac11c 100644
--- a/test/CodeGen/avx512dq-builtins.c
+++ b/test/CodeGen/avx512dq-builtins.c
@@ -1,6 +1,10 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512dq -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
+
 __m512i test_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
   // CHECK-LABEL: @test_mm512_mullo_epi64
   // CHECK: mul <8 x i64>
@@ -162,3 +166,580 @@ __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
   // CHECK: @llvm.x86.avx512.mask.andn.ps.512
   return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B);
 }
+
+__m512i test_mm512_cvtpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_cvtpd_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvtpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_cvt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_mask_cvt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.512
+  return _mm512_maskz_cvt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_cvtpd_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvtpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_cvt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_mask_cvt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.512
+  return _mm512_maskz_cvt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_cvtps_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvtps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_cvt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_mask_cvt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.512
+  return _mm512_maskz_cvt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvtps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_cvtps_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvtps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvtps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvt_roundps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_cvt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_mask_cvt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvt_roundps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.512
+  return _mm512_maskz_cvt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_cvtepi64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_cvtepi64_pd(__A); 
+}
+
+__m512d test_mm512_mask_cvtepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m512d test_mm512_maskz_cvtepi64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_cvtepi64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_cvtepi64_ps(__A); 
+}
+
+__m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_cvttpd_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvttpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundpd_epi64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_cvtt_roundpd_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundpd_epi64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_mask_cvtt_roundpd_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundpd_epi64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.512
+  return _mm512_maskz_cvtt_roundpd_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_cvttpd_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvttpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundpd_epu64(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_cvtt_roundpd_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundpd_epu64(__m512i __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_mask_cvtt_roundpd_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundpd_epu64(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.512
+  return _mm512_maskz_cvtt_roundpd_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_cvttps_epi64(__A); 
+}
+
+__m512i test_mm512_mask_cvttps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundps_epi64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_cvtt_roundps_epi64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundps_epi64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_mask_cvtt_roundps_epi64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundps_epi64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.512
+  return _mm512_maskz_cvtt_roundps_epi64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_cvttps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_cvttps_epu64(__A); 
+}
+
+__m512i test_mm512_mask_cvttps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m512i test_mm512_maskz_cvttps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m512i test_mm512_cvtt_roundps_epu64(__m256 __A) {
+  // CHECK-LABEL: @test_mm512_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_cvtt_roundps_epu64(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_mask_cvtt_roundps_epu64(__m512i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_mask_cvtt_roundps_epu64(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512i test_mm512_maskz_cvtt_roundps_epu64(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtt_roundps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.512
+  return _mm512_maskz_cvtt_roundps_epu64(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_cvtepu64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_cvtepu64_pd(__A); 
+}
+
+__m512d test_mm512_mask_cvtepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m512d test_mm512_maskz_cvtepu64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_cvtepu64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_cvtepu64_ps(__A); 
+}
+
+__m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
+  // CHECK-LABEL: @test_mm512_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
+  // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT); 
+}
+
+__m512d test_mm512_range_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_range_pd(__A, __B, 4); 
+}
+
+__m512d test_mm512_mask_range_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m512d test_mm512_maskz_range_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m512d test_mm512_range_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_range_round_pd(__A, __B, 4, 8); 
+}
+
+__m512d test_mm512_mask_range_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_mask_range_round_pd(__W, __U, __A, __B, 4, 8); 
+}
+
+__m512d test_mm512_maskz_range_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_round_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.512
+  return _mm512_maskz_range_round_pd(__U, __A, __B, 4, 8); 
+}
+
+__m512 test_mm512_range_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_range_ps(__A, __B, 4); 
+}
+
+__m512 test_mm512_mask_range_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m512 test_mm512_maskz_range_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m512 test_mm512_range_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_range_round_ps(__A, __B, 4, 8); 
+}
+
+__m512 test_mm512_mask_range_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_mask_range_round_ps(__W, __U, __A, __B, 4, 8); 
+}
+
+__m512 test_mm512_maskz_range_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_range_round_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.512
+  return _mm512_maskz_range_round_ps(__U, __A, __B, 4, 8); 
+}
+
+__m512d test_mm512_reduce_pd(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_reduce_pd(__A, 4); 
+}
+
+__m512d test_mm512_mask_reduce_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m512d test_mm512_maskz_reduce_pd(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m512 test_mm512_reduce_ps(__m512 __A) {
+  // CHECK-LABEL: @test_mm512_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_reduce_ps(__A, 4); 
+}
+
+__m512 test_mm512_mask_reduce_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m512 test_mm512_maskz_reduce_ps(__mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_maskz_reduce_ps(__U, __A, 4); 
+}
+
+__m512d test_mm512_reduce_round_pd(__m512d __A) {
+  // CHECK-LABEL: @test_mm512_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_reduce_round_pd(__A, 4, 8); 
+}
+
+__m512d test_mm512_mask_reduce_round_pd(__m512d __W, __mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_mask_reduce_round_pd(__W, __U, __A, 4, 8); 
+}
+
+__m512d test_mm512_maskz_reduce_round_pd(__mmask8 __U, __m512d __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_round_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.512
+  return _mm512_maskz_reduce_round_pd(__U, __A, 4, 8);
+}
+
+__m512 test_mm512_reduce_round_ps(__m512 __A) {
+  // CHECK-LABEL: @test_mm512_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_reduce_round_ps(__A, 4, 8); 
+}
+
+__m512 test_mm512_mask_reduce_round_ps(__m512 __W, __mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_mask_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_mask_reduce_round_ps(__W, __U, __A, 4, 8); 
+}
+
+__m512 test_mm512_maskz_reduce_round_ps(__mmask16 __U, __m512 __A) {
+  // CHECK-LABEL: @test_mm512_maskz_reduce_round_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.512
+  return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); 
+}
+
diff --git a/test/CodeGen/avx512er-builtins.c b/test/CodeGen/avx512er-builtins.c
index 993f177a1168..7c6b050c8ae5 100644
--- a/test/CodeGen/avx512er-builtins.c
+++ b/test/CodeGen/avx512er-builtins.c
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -9,14 +12,14 @@ __m512d test_mm512_rsqrt28_round_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rsqrt28_round_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rsqrt28_round_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -27,14 +30,14 @@ __m512d test_mm512_rsqrt28_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rsqrt28_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_mask_rsqrt28_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_mask_rsqrt28_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rsqrt28_pd
-  // check: @llvm.x86.avx512.rsqrt28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd
+  // CHECK: @llvm.x86.avx512.rsqrt28.pd
   return _mm512_maskz_rsqrt28_pd(m, a);
 }
 
@@ -75,38 +78,38 @@ __m512 test_mm512_maskz_rsqrt28_ps(__mmask16 m, __m512 a) {
 }
 
 __m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rsqrt28_round_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_rsqrt28_ss(a, b);
 }
 
 __m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_mask_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_mask_rsqrt28_ss(s, m, a, b);
 }
 
 __m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rsqrt28_ss
-  // check: @llvm.x86.avx512.rsqrt28.ss
+  // CHECK-LABEL: @test_mm_maskz_rsqrt28_ss
+  // CHECK: @llvm.x86.avx512.rsqrt28.ss
   return _mm_maskz_rsqrt28_ss(m, a, b);
 }
 
@@ -135,14 +138,14 @@ __m512d test_mm512_rcp28_round_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rcp28_round_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_mask_rcp28_round_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rcp28_round_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -153,14 +156,14 @@ __m512d test_mm512_rcp28_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_rcp28_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_mask_rcp28_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_mask_rcp28_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_rcp28_pd
-  // check: @llvm.x86.avx512.rcp28.pd
+  // CHECK-LABEL: @test_mm512_maskz_rcp28_pd
+  // CHECK: @llvm.x86.avx512.rcp28.pd
   return _mm512_maskz_rcp28_pd(m, a);
 }
 
@@ -201,38 +204,38 @@ __m512 test_mm512_maskz_rcp28_ps(__mmask16 m, __m512 a) {
 }
 
 __m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_rcp28_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_mask_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rcp28_round_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_maskz_rcp28_round_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m128 test_mm_rcp28_ss(__m128 a, __m128 b) {
-  // check-label: @test_mm_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_rcp28_ss(a, b);
 }
 
 __m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_mask_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_mask_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_mask_rcp28_ss(s, m, a, b);
 }
 
 __m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // check-label: @test_mm_maskz_rcp28_ss
-  // check: @llvm.x86.avx512.rcp28.ss
+  // CHECK-LABEL: @test_mm_maskz_rcp28_ss
+  // CHECK: @llvm.x86.avx512.rcp28.ss
   return _mm_maskz_rcp28_ss(m, a, b);
 }
 
@@ -279,14 +282,14 @@ __m512d test_mm512_exp2a23_round_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_exp2a23_round_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
 __m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_exp2a23_round_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_TO_NEAREST_INT);
 }
 
@@ -297,14 +300,14 @@ __m512d test_mm512_exp2a23_pd(__m512d a) {
 }
 
 __m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) {
-  // check-label: @test_mm512_mask_exp2a23_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_mask_exp2a23_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_mask_exp2a23_pd(s, m, a);
 }
 
 __m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) {
-  // check-label: @test_mm512_maskz_exp2a23_pd
-  // check: @llvm.x86.avx512.exp2.pd
+  // CHECK-LABEL: @test_mm512_maskz_exp2a23_pd
+  // CHECK: @llvm.x86.avx512.exp2.pd
   return _mm512_maskz_exp2a23_pd(m, a);
 }
 
diff --git a/test/CodeGen/avx512f-builtins.c b/test/CodeGen/avx512f-builtins.c
index 112dfd8b6ee1..c1f4c0ecc52d 100644
--- a/test/CodeGen/avx512f-builtins.c
+++ b/test/CodeGen/avx512f-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -772,8 +772,8 @@ __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
 }
 
 __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) {
-  // check-label: @test_mm512_cmp_ps_mask
-  // check: @llvm.x86.avx512.mask.cmp.ps.512
+  // CHECK-LABEL: @test_mm512_cmp_ps_mask
+  // CHECKn: @llvm.x86.avx512.mask.cmp.ps.512
   return _mm512_cmp_ps_mask(a, b, 0);
 }
 
@@ -796,8 +796,8 @@ __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
 }
 
 __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) {
-  // check-label: @test_mm512_cmp_pd_mask
-  // check: @llvm.x86.avx512.mask.cmp.pd.512
+  // CHECK-LABEL: @test_mm512_cmp_pd_mask
+  // CHECK: @llvm.x86.avx512.mask.cmp.pd.512
   return _mm512_cmp_pd_mask(a, b, 0);
 }
 
@@ -1374,3 +1374,528 @@ __m512i test_mm512_mullo_epi32(__m512i __A, __m512i __B) {
   //CHECK: mul <16 x i32>
   return _mm512_mullo_epi32(__A,__B);
 }
+
+__m512d test_mm512_add_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_add_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_add_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_mask_add_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_add_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_round_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_maskz_add_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_mask_add_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.512
+  return _mm512_maskz_add_pd(__U,__A,__B); 
+}
+__m512 test_mm512_add_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_add_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_add_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_mask_add_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_add_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_round_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_maskz_add_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_mask_add_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.512
+  return _mm512_maskz_add_ps(__U,__A,__B); 
+}
+__m128 test_mm_add_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_add_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_add_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_mask_add_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_add_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_round_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_maskz_add_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_mask_add_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_ss
+  // CHECK: @llvm.x86.avx512.mask.add.ss.round
+  return _mm_maskz_add_ss(__U,__A,__B); 
+}
+__m128d test_mm_add_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_add_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_add_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_mask_add_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_add_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_round_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_maskz_add_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_mask_add_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_sd
+  // CHECK: @llvm.x86.avx512.mask.add.sd.round
+  return _mm_maskz_add_sd(__U,__A,__B); 
+}
+__m512d test_mm512_sub_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_sub_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_sub_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_mask_sub_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_sub_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_round_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_maskz_sub_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.512
+  return _mm512_maskz_sub_pd(__U,__A,__B); 
+}
+__m512 test_mm512_sub_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_sub_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_sub_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_mask_sub_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_sub_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_round_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_maskz_sub_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.512
+  return _mm512_maskz_sub_ps(__U,__A,__B); 
+}
+__m128 test_mm_sub_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_sub_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_sub_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_mask_sub_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_sub_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_round_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_maskz_sub_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_mask_sub_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_ss
+  // CHECK: @llvm.x86.avx512.mask.sub.ss.round
+  return _mm_maskz_sub_ss(__U,__A,__B); 
+}
+__m128d test_mm_sub_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_sub_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_sub_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_mask_sub_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_sub_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_round_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_maskz_sub_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_mask_sub_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_sd
+  // CHECK: @llvm.x86.avx512.mask.sub.sd.round
+  return _mm_maskz_sub_sd(__U,__A,__B); 
+}
+__m512d test_mm512_mul_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mul_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_mul_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mask_mul_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_mul_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_round_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_maskz_mul_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.512
+  return _mm512_maskz_mul_pd(__U,__A,__B); 
+}
+__m512 test_mm512_mul_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mul_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_mul_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mask_mul_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_mul_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_round_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_maskz_mul_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.512
+  return _mm512_maskz_mul_ps(__U,__A,__B); 
+}
+__m128 test_mm_mul_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mul_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_mul_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mask_mul_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_mul_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_round_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_maskz_mul_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_mask_mul_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_ss
+  // CHECK: @llvm.x86.avx512.mask.mul.ss.round
+  return _mm_maskz_mul_ss(__U,__A,__B); 
+}
+__m128d test_mm_mul_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mul_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_mul_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mask_mul_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_mul_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_round_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_maskz_mul_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_mask_mul_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_sd
+  // CHECK: @llvm.x86.avx512.mask.mul.sd.round
+  return _mm_maskz_mul_sd(__U,__A,__B); 
+}
+__m512d test_mm512_div_round_pd(__m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_div_round_pd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_div_round_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_mask_div_round_pd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_maskz_div_round_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_round_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_maskz_div_round_pd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512d test_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_mask_div_pd(__W,__U,__A,__B); 
+}
+__m512d test_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.512
+  return _mm512_maskz_div_pd(__U,__A,__B); 
+}
+__m512 test_mm512_div_round_ps(__m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_div_round_ps(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_div_round_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_mask_div_round_ps(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_maskz_div_round_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_round_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_maskz_div_round_ps(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m512 test_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_mask_div_ps(__W,__U,__A,__B); 
+}
+__m512 test_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
+  // CHECK-LABEL: @test_mm512_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.512
+  return _mm512_maskz_div_ps(__U,__A,__B); 
+}
+__m128 test_mm_div_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_div_round_ss(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_div_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_mask_div_round_ss(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_maskz_div_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_round_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_maskz_div_round_ss(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128 test_mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_mask_div_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_ss
+  // CHECK: @llvm.x86.avx512.mask.div.ss.round
+  return _mm_maskz_div_ss(__U,__A,__B); 
+}
+__m128d test_mm_div_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_div_round_sd(__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_div_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_mask_div_round_sd(__W,__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_maskz_div_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_round_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_maskz_div_round_sd(__U,__A,__B,_MM_FROUND_TO_NEAREST_INT); 
+}
+__m128d test_mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_mask_div_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_sd
+  // CHECK: @llvm.x86.avx512.mask.div.sd.round
+  return _mm_maskz_div_sd(__U,__A,__B); 
+}
+__m128 test_mm_max_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_max_round_ss(__A,__B,0x08); 
+}
+__m128 test_mm_mask_max_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_mask_max_round_ss(__W,__U,__A,__B,0x08); 
+}
+__m128 test_mm_maskz_max_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_round_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_maskz_max_round_ss(__U,__A,__B,0x08); 
+}
+__m128 test_mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_mask_max_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_ss
+  // CHECK: @llvm.x86.avx512.mask.max.ss.round
+  return _mm_maskz_max_ss(__U,__A,__B); 
+}
+__m128d test_mm_max_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_max_round_sd(__A,__B,0x08); 
+}
+__m128d test_mm_mask_max_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_mask_max_round_sd(__W,__U,__A,__B,0x08); 
+}
+__m128d test_mm_maskz_max_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_round_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_maskz_max_round_sd(__U,__A,__B,0x08); 
+}
+__m128d test_mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_mask_max_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_sd
+  // CHECK: @llvm.x86.avx512.mask.max.sd.round
+  return _mm_maskz_max_sd(__U,__A,__B); 
+}
+__m128 test_mm_min_round_ss(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_min_round_ss(__A,__B,0x08); 
+}
+__m128 test_mm_mask_min_round_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_mask_min_round_ss(__W,__U,__A,__B,0x08); 
+}
+__m128 test_mm_maskz_min_round_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_round_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_maskz_min_round_ss(__U,__A,__B,0x08); 
+}
+__m128 test_mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_mask_min_ss(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_ss
+  // CHECK: @llvm.x86.avx512.mask.min.ss.round
+  return _mm_maskz_min_ss(__U,__A,__B); 
+}
+__m128d test_mm_min_round_sd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_min_round_sd(__A,__B,0x08); 
+}
+__m128d test_mm_mask_min_round_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_mask_min_round_sd(__W,__U,__A,__B,0x08); 
+}
+__m128d test_mm_maskz_min_round_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_round_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_maskz_min_round_sd(__U,__A,__B,0x08); 
+}
+__m128d test_mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_mask_min_sd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_sd
+  // CHECK: @llvm.x86.avx512.mask.min.sd.round
+  return _mm_maskz_min_sd(__U,__A,__B); 
+}
+
+__m512 test_mm512_undefined() {
+  // CHECK-LABEL: @test_mm512_undefined
+  // CHECK: ret <16 x float> undef
+  return _mm512_undefined();
+}
+
+__m512 test_mm512_undefined_ps() {
+  // CHECK-LABEL: @test_mm512_undefined_ps
+  // CHECK: ret <16 x float> undef
+  return _mm512_undefined_ps();
+}
+
+__m512d test_mm512_undefined_pd() {
+  // CHECK-LABEL: @test_mm512_undefined_pd
+  // CHECK: ret <8 x double> undef
+  return _mm512_undefined_pd();
+}
+
+__m512i test_mm512_undefined_epi32() {
+  // CHECK-LABEL: @test_mm512_undefined_epi32
+  // CHECK: ret <8 x i64> undef
+  return _mm512_undefined_epi32();
+}
diff --git a/test/CodeGen/avx512vl-builtins.c b/test/CodeGen/avx512vl-builtins.c
index 00b0d5d90bc1..445513ccfccb 100644
--- a/test/CodeGen/avx512vl-builtins.c
+++ b/test/CodeGen/avx512vl-builtins.c
@@ -1,103 +1,10 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
-__mmask8 test_mm256_cmpeq_epi32_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.256
-  return (__mmask8)_mm256_cmpeq_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpeq_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.256
-  return (__mmask8)_mm256_mask_cmpeq_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpeq_epi32_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.128
-  return (__mmask8)_mm_cmpeq_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpeq_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpeq_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.d.128
-  return (__mmask8)_mm_mask_cmpeq_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpeq_epi64_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.256
-  return (__mmask8)_mm256_cmpeq_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpeq_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.256
-  return (__mmask8)_mm256_mask_cmpeq_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpeq_epi64_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.128
-  return (__mmask8)_mm_cmpeq_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpeq_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpeq_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpeq.q.128
-  return (__mmask8)_mm_mask_cmpeq_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpgt_epi32_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.256
-  return (__mmask8)_mm256_cmpgt_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpgt_epi32_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.256
-  return (__mmask8)_mm256_mask_cmpgt_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpgt_epi32_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.128
-  return (__mmask8)_mm_cmpgt_epi32_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpgt_epi32_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpgt_epi32_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.d.128
-  return (__mmask8)_mm_mask_cmpgt_epi32_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm256_cmpgt_epi64_mask(__m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.256
-  return (__mmask8)_mm256_cmpgt_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm256_mask_cmpgt_epi64_mask(__mmask8 __u, __m256i __a, __m256i __b) {
-  // CHECK-LABEL: @test_mm256_mask_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.256
-  return (__mmask8)_mm256_mask_cmpgt_epi64_mask(__u, __a, __b);
-}
-
-__mmask8 test_mm_cmpgt_epi64_mask(__m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.128
-  return (__mmask8)_mm_cmpgt_epi64_mask(__a, __b);
-}
-
-__mmask8 test_mm_mask_cmpgt_epi64_mask(__mmask8 __u, __m128i __a, __m128i __b) {
-  // CHECK-LABEL: @test_mm_mask_cmpgt_epi64_mask
-  // CHECK: @llvm.x86.avx512.mask.pcmpgt.q.128
-  return (__mmask8)_mm_mask_cmpgt_epi64_mask(__u, __a, __b);
-}
-
 __mmask8 test_mm_cmpeq_epu32_mask(__m128i __a, __m128i __b) {
   // CHECK-LABEL: @test_mm_cmpeq_epu32_mask
   // CHECK: @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> {{.*}}, <4 x i32> {{.*}}, i32 0, i8 -1)
@@ -1557,3 +1464,1693 @@ __m256 test_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 _
   return _mm256_mask3_fnmsub_ps(__A, __B, __C, __U);
 }
 
+__m128d test_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.128
+  return _mm_mask_add_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.128
+  return _mm_maskz_add_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.256
+  return _mm256_mask_add_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_add_pd
+  // CHECK: @llvm.x86.avx512.mask.add.pd.256
+  return _mm256_maskz_add_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_add_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.128
+  return _mm_mask_add_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_add_ps(__mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.128
+  return _mm_maskz_add_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_add_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.256
+  return _mm256_mask_add_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_add_ps(__mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_add_ps
+  // CHECK: @llvm.x86.avx512.mask.add.ps.256
+  return _mm256_maskz_add_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi32
+  // CHECK: @llvm.x86.avx512.mask.blend.d.128
+  return _mm_mask_blend_epi32(__U,__A,__W); 
+}
+__m256i test_mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi32
+  // CHECK: @llvm.x86.avx512.mask.blend.d.256
+  return _mm256_mask_blend_epi32(__U,__A,__W); 
+}
+__m128d test_mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_pd
+  // CHECK: @llvm.x86.avx512.mask.blend.pd.128
+  return _mm_mask_blend_pd(__U,__A,__W); 
+}
+__m256d test_mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_pd
+  // CHECK: @llvm.x86.avx512.mask.blend.pd.256
+  return _mm256_mask_blend_pd(__U,__A,__W); 
+}
+__m128 test_mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_ps
+  // CHECK: @llvm.x86.avx512.mask.blend.ps.128
+  return _mm_mask_blend_ps(__U,__A,__W); 
+}
+__m256 test_mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_ps
+  // CHECK: @llvm.x86.avx512.mask.blend.ps.256
+  return _mm256_mask_blend_ps(__U,__A,__W); 
+}
+__m128i test_mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W) {
+  // CHECK-LABEL: @test_mm_mask_blend_epi64
+  // CHECK: @llvm.x86.avx512.mask.blend.q.128
+  return _mm_mask_blend_epi64(__U,__A,__W); 
+}
+__m256i test_mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W) {
+  // CHECK-LABEL: @test_mm256_mask_blend_epi64
+  // CHECK: @llvm.x86.avx512.mask.blend.q.256
+  return _mm256_mask_blend_epi64(__U,__A,__W); 
+}
+__m128d test_mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  return _mm_mask_compress_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_compress_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.128
+  return _mm_maskz_compress_pd(__U,__A); 
+}
+__m256d test_mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  return _mm256_mask_compress_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_compress_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.pd.256
+  return _mm256_maskz_compress_pd(__U,__A); 
+}
+__m128i test_mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  return _mm_mask_compress_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_compress_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.128
+  return _mm_maskz_compress_epi64(__U,__A); 
+}
+__m256i test_mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  return _mm256_mask_compress_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.q.256
+  return _mm256_maskz_compress_epi64(__U,__A); 
+}
+__m128 test_mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  return _mm_mask_compress_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_compress_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.128
+  return _mm_maskz_compress_ps(__U,__A); 
+}
+__m256 test_mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  return _mm256_mask_compress_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_compress_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.ps.256
+  return _mm256_maskz_compress_ps(__U,__A); 
+}
+__m128i test_mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  return _mm_mask_compress_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_compress_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.128
+  return _mm_maskz_compress_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  return _mm256_mask_compress_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_compress_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.d.256
+  return _mm256_maskz_compress_epi32(__U,__A); 
+}
+void test_mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.store.pd.128
+  return _mm_mask_compressstoreu_pd(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_pd
+  // CHECK: @llvm.x86.avx512.mask.compress.store.pd.256
+  return _mm256_mask_compressstoreu_pd(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.store.q.128
+  return _mm_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi64
+  // CHECK: @llvm.x86.avx512.mask.compress.store.q.256
+  return _mm256_mask_compressstoreu_epi64(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.store.ps.128
+  return _mm_mask_compressstoreu_ps(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_ps
+  // CHECK: @llvm.x86.avx512.mask.compress.store.ps.256
+  return _mm256_mask_compressstoreu_ps(__P,__U,__A); 
+}
+void test_mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_compressstoreu_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.store.d.128
+  return _mm_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+void test_mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_compressstoreu_epi32
+  // CHECK: @llvm.x86.avx512.mask.compress.store.d.256
+  return _mm256_mask_compressstoreu_epi32(__P,__U,__A); 
+}
+__m128d test_mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
+  return _mm_mask_cvtepi32_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.128
+  return _mm_maskz_cvtepi32_pd(__U,__A); 
+}
+__m256d test_mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
+  return _mm256_mask_cvtepi32_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2pd.256
+  return _mm256_maskz_cvtepi32_pd(__U,__A); 
+}
+__m128 test_mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  return _mm_mask_cvtepi32_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtepi32_ps(__mmask16 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.128
+  return _mm_maskz_cvtepi32_ps(__U,__A); 
+}
+__m256 test_mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  return _mm256_mask_cvtepi32_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_cvtepi32_ps(__mmask16 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.256
+  return _mm256_maskz_cvtepi32_ps(__U,__A); 
+}
+__m128i test_mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128
+  return _mm_mask_cvtpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.128
+  return _mm_maskz_cvtpd_epi32(__U,__A); 
+}
+__m128i test_mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  return _mm256_mask_cvtpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2dq.256
+  return _mm256_maskz_cvtpd_epi32(__U,__A); 
+}
+__m128 test_mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
+  return _mm_mask_cvtpd_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps
+  return _mm_maskz_cvtpd_ps(__U,__A); 
+}
+__m128 test_mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  return _mm256_mask_cvtpd_ps(__W,__U,__A); 
+}
+__m128 test_mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2ps.256
+  return _mm256_maskz_cvtpd_ps(__U,__A); 
+}
+__m128i test_mm_cvtpd_epu32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_cvtpd_epu32(__A); 
+}
+__m128i test_mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_mask_cvtpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.128
+  return _mm_maskz_cvtpd_epu32(__U,__A); 
+}
+__m128i test_mm256_cvtpd_epu32(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_cvtpd_epu32(__A); 
+}
+__m128i test_mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_mask_cvtpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2udq.256
+  return _mm256_maskz_cvtpd_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  return _mm_mask_cvtps_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.128
+  return _mm_maskz_cvtps_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  return _mm256_mask_cvtps_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2dq.256
+  return _mm256_maskz_cvtps_epi32(__U,__A); 
+}
+__m128d test_mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  return _mm_mask_cvtps_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.128
+  return _mm_maskz_cvtps_pd(__U,__A); 
+}
+__m256d test_mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  return _mm256_mask_cvtps_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtps2pd.256
+  return _mm256_maskz_cvtps_pd(__U,__A); 
+}
+__m128i test_mm_cvtps_epu32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_cvtps_epu32(__A); 
+}
+__m128i test_mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_mask_cvtps_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.128
+  return _mm_maskz_cvtps_epu32(__U,__A); 
+}
+__m256i test_mm256_cvtps_epu32(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_cvtps_epu32(__A); 
+}
+__m256i test_mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_mask_cvtps_epu32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvtps2udq.256
+  return _mm256_maskz_cvtps_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128
+  return _mm_mask_cvttpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.128
+  return _mm_maskz_cvttpd_epi32(__U,__A); 
+}
+__m128i test_mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  return _mm256_mask_cvttpd_epi32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2dq.256
+  return _mm256_maskz_cvttpd_epi32(__U,__A); 
+}
+__m128i test_mm_cvttpd_epu32(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_cvttpd_epu32(__A); 
+}
+__m128i test_mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_mask_cvttpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.128
+  return _mm_maskz_cvttpd_epu32(__U,__A); 
+}
+__m128i test_mm256_cvttpd_epu32(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_cvttpd_epu32(__A); 
+}
+__m128i test_mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_mask_cvttpd_epu32(__W,__U,__A); 
+}
+__m128i test_mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.256
+  return _mm256_maskz_cvttpd_epu32(__U,__A); 
+}
+__m128i test_mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  return _mm_mask_cvttps_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.128
+  return _mm_maskz_cvttps_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  return _mm256_mask_cvttps_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epi32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2dq.256
+  return _mm256_maskz_cvttps_epi32(__U,__A); 
+}
+__m128i test_mm_cvttps_epu32(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_cvttps_epu32(__A); 
+}
+__m128i test_mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_mask_cvttps_epu32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.128
+  return _mm_maskz_cvttps_epu32(__U,__A); 
+}
+__m256i test_mm256_cvttps_epu32(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_cvttps_epu32(__A); 
+}
+__m256i test_mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_mask_cvttps_epu32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epu32
+  // CHECK: @llvm.x86.avx512.mask.cvttps2udq.256
+  return _mm256_maskz_cvttps_epu32(__U,__A); 
+}
+__m128d test_mm_cvtepu32_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_cvtepu32_pd(__A); 
+}
+__m128d test_mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_mask_cvtepu32_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.128
+  return _mm_maskz_cvtepu32_pd(__U,__A); 
+}
+__m256d test_mm256_cvtepu32_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_cvtepu32_pd(__A); 
+}
+__m256d test_mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_mask_cvtepu32_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu32_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2pd.256
+  return _mm256_maskz_cvtepu32_pd(__U,__A); 
+}
+__m128 test_mm_cvtepu32_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_cvtepu32_ps(__A); 
+}
+__m128 test_mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_mask_cvtepu32_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.128
+  return _mm_maskz_cvtepu32_ps(__U,__A); 
+}
+__m256 test_mm256_cvtepu32_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_cvtepu32_ps(__A); 
+}
+__m256 test_mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_mask_cvtepu32_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu32_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.256
+  return _mm256_maskz_cvtepu32_ps(__U,__A); 
+}
+__m128d test_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.128
+  return _mm_mask_div_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.128
+  return _mm_maskz_div_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.256
+  return _mm256_mask_div_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_div_pd
+  // CHECK: @llvm.x86.avx512.mask.div.pd.256
+  return _mm256_maskz_div_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.128
+  return _mm_mask_div_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.128
+  return _mm_maskz_div_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.256
+  return _mm256_mask_div_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_div_ps
+  // CHECK: @llvm.x86.avx512.mask.div.ps.256
+  return _mm256_maskz_div_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  return _mm_mask_expand_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.128
+  return _mm_maskz_expand_pd(__U,__A); 
+}
+__m256d test_mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  return _mm256_mask_expand_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_expand_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.pd.256
+  return _mm256_maskz_expand_pd(__U,__A); 
+}
+__m128i test_mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  return _mm_mask_expand_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_expand_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.128
+  return _mm_maskz_expand_epi64(__U,__A); 
+}
+__m256i test_mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  return _mm256_mask_expand_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.q.256
+  return _mm256_maskz_expand_epi64(__U,__A); 
+}
+__m128d test_mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.128
+  return _mm_mask_expandloadu_pd(__W,__U,__P); 
+}
+__m128d test_mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.128
+  return _mm_maskz_expandloadu_pd(__U,__P); 
+}
+__m256d test_mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.256
+  return _mm256_mask_expandloadu_pd(__W,__U,__P); 
+}
+__m256d test_mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_pd
+  // CHECK: @llvm.x86.avx512.mask.expand.load.pd.256
+  return _mm256_maskz_expandloadu_pd(__U,__P); 
+}
+__m128i test_mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.128
+  return _mm_mask_expandloadu_epi64(__W,__U,__P); 
+}
+__m128i test_mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.128
+  return _mm_maskz_expandloadu_epi64(__U,__P); 
+}
+__m256i test_mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U,   void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.256
+  return _mm256_mask_expandloadu_epi64(__W,__U,__P); 
+}
+__m256i test_mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi64
+  // CHECK: @llvm.x86.avx512.mask.expand.load.q.256
+  return _mm256_maskz_expandloadu_epi64(__U,__P); 
+}
+__m128 test_mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.128
+  return _mm_mask_expandloadu_ps(__W,__U,__P); 
+}
+__m128 test_mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.128
+  return _mm_maskz_expandloadu_ps(__U,__P); 
+}
+__m256 test_mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.256
+  return _mm256_mask_expandloadu_ps(__W,__U,__P); 
+}
+__m256 test_mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.load.ps.256
+  return _mm256_maskz_expandloadu_ps(__U,__P); 
+}
+__m128i test_mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_mask_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.128
+  return _mm_mask_expandloadu_epi32(__W,__U,__P); 
+}
+__m128i test_mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm_maskz_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.128
+  return _mm_maskz_expandloadu_epi32(__U,__P); 
+}
+__m256i test_mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U,   void const *__P) {
+  // CHECK-LABEL: @test_mm256_mask_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.256
+  return _mm256_mask_expandloadu_epi32(__W,__U,__P); 
+}
+__m256i test_mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P) {
+  // CHECK-LABEL: @test_mm256_maskz_expandloadu_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.load.d.256
+  return _mm256_maskz_expandloadu_epi32(__U,__P); 
+}
+__m128 test_mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  return _mm_mask_expand_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_expand_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.128
+  return _mm_maskz_expand_ps(__U,__A); 
+}
+__m256 test_mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  return _mm256_mask_expand_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_expand_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_ps
+  // CHECK: @llvm.x86.avx512.mask.expand.ps.256
+  return _mm256_maskz_expand_ps(__U,__A); 
+}
+__m128i test_mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  return _mm_mask_expand_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_expand_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.128
+  return _mm_maskz_expand_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  return _mm256_mask_expand_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_expand_epi32
+  // CHECK: @llvm.x86.avx512.mask.expand.d.256
+  return _mm256_maskz_expand_epi32(__U,__A); 
+}
+__m128d test_mm_getexp_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_getexp_pd(__A); 
+}
+__m128d test_mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_mask_getexp_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_getexp_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.128
+  return _mm_maskz_getexp_pd(__U,__A); 
+}
+__m256d test_mm256_getexp_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_getexp_pd(__A); 
+}
+__m256d test_mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_mask_getexp_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_getexp_pd
+  // CHECK: @llvm.x86.avx512.mask.getexp.pd.256
+  return _mm256_maskz_getexp_pd(__U,__A); 
+}
+__m128 test_mm_getexp_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_getexp_ps(__A); 
+}
+__m128 test_mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_mask_getexp_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_getexp_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.128
+  return _mm_maskz_getexp_ps(__U,__A); 
+}
+__m256 test_mm256_getexp_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_getexp_ps(__A); 
+}
+__m256 test_mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_mask_getexp_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_getexp_ps
+  // CHECK: @llvm.x86.avx512.mask.getexp.ps.256
+  return _mm256_maskz_getexp_ps(__U,__A); 
+}
+__m128d test_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd
+  return _mm_mask_max_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd
+  return _mm_maskz_max_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd.256
+  return _mm256_mask_max_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_pd
+  // CHECK: @llvm.x86.avx512.mask.max.pd.256
+  return _mm256_maskz_max_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps
+  return _mm_mask_max_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps
+  return _mm_maskz_max_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps.256
+  return _mm256_mask_max_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_ps
+  // CHECK: @llvm.x86.avx512.mask.max.ps.256
+  return _mm256_maskz_max_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd
+  return _mm_mask_min_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd
+  return _mm_maskz_min_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd.256
+  return _mm256_mask_min_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_pd
+  // CHECK: @llvm.x86.avx512.mask.min.pd.256
+  return _mm256_maskz_min_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps
+  return _mm_mask_min_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps
+  return _mm_maskz_min_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps.256
+  return _mm256_mask_min_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_ps
+  // CHECK: @llvm.x86.avx512.mask.min.ps.256
+  return _mm256_maskz_min_ps(__U,__A,__B); 
+}
+__m128d test_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd
+  return _mm_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd
+  return _mm_maskz_mul_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.256
+  return _mm256_mask_mul_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_pd
+  // CHECK: @llvm.x86.avx512.mask.mul.pd.256
+  return _mm256_maskz_mul_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps
+  return _mm_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps
+  return _mm_maskz_mul_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.256
+  return _mm256_mask_mul_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mul_ps
+  // CHECK: @llvm.x86.avx512.mask.mul.ps.256
+  return _mm256_maskz_mul_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.128
+  return _mm_mask_abs_epi32(__W,__U,__A); 
+}
+__m128i test_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.128
+  return _mm_maskz_abs_epi32(__U,__A); 
+}
+__m256i test_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.256
+  return _mm256_mask_abs_epi32(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi32
+  // CHECK: @llvm.x86.avx512.mask.pabs.d.256
+  return _mm256_maskz_abs_epi32(__U,__A); 
+}
+__m128i test_mm_abs_epi64(__m128i __A) {
+  // CHECK-LABEL: @test_mm_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_abs_epi64(__A); 
+}
+__m128i test_mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_mask_abs_epi64(__W,__U,__A); 
+}
+__m128i test_mm_maskz_abs_epi64(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.128
+  return _mm_maskz_abs_epi64(__U,__A); 
+}
+__m256i test_mm256_abs_epi64(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_abs_epi64(__A); 
+}
+__m256i test_mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_mask_abs_epi64(__W,__U,__A); 
+}
+__m256i test_mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_abs_epi64
+  // CHECK: @llvm.x86.avx512.mask.pabs.q.256
+  return _mm256_maskz_abs_epi64(__U,__A); 
+}
+__m128i test_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.128
+  return _mm_maskz_max_epi32(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.128
+  return _mm_mask_max_epi32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.256
+  return _mm256_maskz_max_epi32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.d.256
+  return _mm256_mask_max_epi32(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_maskz_max_epi64(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_mask_max_epi64(__W,__M,__A,__B); 
+}
+__m128i test_mm_max_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.128
+  return _mm_max_epi64(__A,__B); 
+}
+__m256i test_mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_maskz_max_epi64(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_mask_max_epi64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_max_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_max_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmaxs.q.256
+  return _mm256_max_epi64(__A,__B); 
+}
+__m128i test_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.128
+  return _mm_maskz_max_epu32(__M,__A,__B); 
+}
+__m128i test_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.128
+  return _mm_mask_max_epu32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.256
+  return _mm256_maskz_max_epu32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu32
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.d.256
+  return _mm256_mask_max_epu32(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_maskz_max_epu64(__M,__A,__B); 
+}
+__m128i test_mm_max_epu64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_max_epu64(__A,__B); 
+}
+__m128i test_mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.128
+  return _mm_mask_max_epu64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_maskz_max_epu64(__M,__A,__B); 
+}
+__m256i test_mm256_max_epu64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_max_epu64(__A,__B); 
+}
+__m256i test_mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_max_epu64
+  // CHECK: @llvm.x86.avx512.mask.pmaxu.q.256
+  return _mm256_mask_max_epu64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.128
+  return _mm_maskz_min_epi32(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.128
+  return _mm_mask_min_epi32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.256
+  return _mm256_maskz_min_epi32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi32
+  // CHECK: @llvm.x86.avx512.mask.pmins.d.256
+  return _mm256_mask_min_epi32(__W,__M,__A,__B); 
+}
+__m128i test_mm_min_epi64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_min_epi64(__A,__B); 
+}
+__m128i test_mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_mask_min_epi64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.128
+  return _mm_maskz_min_epi64(__M,__A,__B); 
+}
+__m256i test_mm256_min_epi64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_min_epi64(__A,__B); 
+}
+__m256i test_mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_mask_min_epi64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epi64
+  // CHECK: @llvm.x86.avx512.mask.pmins.q.256
+  return _mm256_maskz_min_epi64(__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.128
+  return _mm_maskz_min_epu32(__M,__A,__B); 
+}
+__m128i test_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.128
+  return _mm_mask_min_epu32(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.256
+  return _mm256_maskz_min_epu32(__M,__A,__B); 
+}
+__m256i test_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu32
+  // CHECK: @llvm.x86.avx512.mask.pminu.d.256
+  return _mm256_mask_min_epu32(__W,__M,__A,__B); 
+}
+__m128i test_mm_min_epu64(__m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_min_epu64(__A,__B); 
+}
+__m128i test_mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_mask_min_epu64(__W,__M,__A,__B); 
+}
+__m128i test_mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.128
+  return _mm_maskz_min_epu64(__M,__A,__B); 
+}
+__m256i test_mm256_min_epu64(__m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_min_epu64(__A,__B); 
+}
+__m256i test_mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_mask_min_epu64(__W,__M,__A,__B); 
+}
+__m256i test_mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_min_epu64
+  // CHECK: @llvm.x86.avx512.mask.pminu.q.256
+  return _mm256_maskz_min_epu64(__M,__A,__B); 
+}
+__m128d test_mm_roundscale_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_roundscale_pd(__A,4); 
+}
+__m128d test_mm_mask_roundscale_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_mask_roundscale_pd(__W,__U,__A,4); 
+}
+__m128d test_mm_maskz_roundscale_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.128
+  return _mm_maskz_roundscale_pd(__U,__A,4); 
+}
+__m256d test_mm256_roundscale_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_roundscale_pd(__A,4); 
+}
+__m256d test_mm256_mask_roundscale_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_mask_roundscale_pd(__W,__U,__A,4); 
+}
+__m256d test_mm256_maskz_roundscale_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_roundscale_pd
+  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.256
+  return _mm256_maskz_roundscale_pd(__U,__A,4); 
+}
+__m128 test_mm_roundscale_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_roundscale_ps(__A,4); 
+}
+__m128 test_mm_mask_roundscale_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_mask_roundscale_ps(__W,__U,__A,4); 
+}
+__m128 test_mm_maskz_roundscale_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.128
+  return _mm_maskz_roundscale_ps(__U,__A, 4); 
+}
+__m256 test_mm256_roundscale_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_roundscale_ps(__A,4); 
+}
+__m256 test_mm256_mask_roundscale_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_mask_roundscale_ps(__W,__U,__A,4); 
+}
+__m256 test_mm256_maskz_roundscale_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_roundscale_ps
+  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.256
+  return _mm256_maskz_roundscale_ps(__U,__A,4); 
+}
+__m128d test_mm_scalef_pd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_scalef_pd(__A,__B); 
+}
+__m128d test_mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_mask_scalef_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.128
+  return _mm_maskz_scalef_pd(__U,__A,__B); 
+}
+__m256d test_mm256_scalef_pd(__m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_scalef_pd(__A,__B); 
+}
+__m256d test_mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_mask_scalef_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_scalef_pd
+  // CHECK: @llvm.x86.avx512.mask.scalef.pd.256
+  return _mm256_maskz_scalef_pd(__U,__A,__B); 
+}
+__m128 test_mm_scalef_ps(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_scalef_ps(__A,__B); 
+}
+__m128 test_mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_mask_scalef_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.128
+  return _mm_maskz_scalef_ps(__U,__A,__B); 
+}
+__m256 test_mm256_scalef_ps(__m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_scalef_ps(__A,__B); 
+}
+__m256 test_mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_mask_scalef_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_scalef_ps
+  // CHECK: @llvm.x86.avx512.mask.scalef.ps.256
+  return _mm256_maskz_scalef_ps(__U,__A,__B); 
+}
+void test_mm_i64scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  return _mm_i64scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv2.df
+  return _mm_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  return _mm_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv2.di
+  return _mm_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_pd(double *__addr, __m256i __index,  __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  return _mm256_i64scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_pd(double *__addr, __mmask8 __mask, __m256i __index, __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_pd
+  // CHECK: @llvm.x86.avx512.scatterdiv4.df
+  return _mm256_mask_i64scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_epi64(long long *__addr, __m256i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  return _mm256_i64scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_epi64(long long *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_epi64
+  // CHECK: @llvm.x86.avx512.scatterdiv4.di
+  return _mm256_mask_i64scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  return _mm_i64scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv4.sf
+  return _mm_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i64scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  return _mm_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i64scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv4.si
+  return _mm_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_ps(float *__addr, __m256i __index,  __m128 __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  return _mm256_i64scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_ps
+  // CHECK: @llvm.x86.avx512.scatterdiv8.sf
+  return _mm256_mask_i64scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i64scatter_epi32(int *__addr, __m256i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm256_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  return _mm256_i64scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i64scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i64scatter_epi32
+  // CHECK: @llvm.x86.avx512.scatterdiv8.si
+  return _mm256_mask_i64scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_pd(double *__addr, __m128i __index,  __m128d __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  return _mm_i32scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m128d __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv2.df
+  return _mm_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_epi64(long long *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  return _mm_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv2.di
+  return _mm_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_pd(double *__addr, __m128i __index,  __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  return _mm256_i32scatter_pd(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_pd(double *__addr, __mmask8 __mask, __m128i __index, __m256d __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_pd
+  // CHECK: @llvm.x86.avx512.scattersiv4.df
+  return _mm256_mask_i32scatter_pd(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_epi64(long long *__addr, __m128i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  return _mm256_i32scatter_epi64(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_epi64(long long *__addr, __mmask8 __mask,  __m128i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_epi64
+  // CHECK: @llvm.x86.avx512.scattersiv4.di
+  return _mm256_mask_i32scatter_epi64(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_ps(float *__addr, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  return _mm_i32scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m128i __index, __m128 __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv4.sf
+  return _mm_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm_i32scatter_epi32(int *__addr, __m128i __index,  __m128i __v1) {
+  // CHECK-LABEL: @test_mm_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  return _mm_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm_mask_i32scatter_epi32(int *__addr, __mmask8 __mask, __m128i __index, __m128i __v1) {
+  // CHECK-LABEL: @test_mm_mask_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv4.si
+  return _mm_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_ps(float *__addr, __m256i __index,  __m256 __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  return _mm256_i32scatter_ps(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_ps(float *__addr, __mmask8 __mask, __m256i __index, __m256 __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_ps
+  // CHECK: @llvm.x86.avx512.scattersiv8.sf
+  return _mm256_mask_i32scatter_ps(__addr,__mask,__index,__v1,2); 
+}
+void test_mm256_i32scatter_epi32(int *__addr, __m256i __index,  __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  return _mm256_i32scatter_epi32(__addr,__index,__v1,2); 
+}
+void test_mm256_mask_i32scatter_epi32(int *__addr, __mmask8 __mask,  __m256i __index, __m256i __v1) {
+  // CHECK-LABEL: @test_mm256_mask_i32scatter_epi32
+  // CHECK: @llvm.x86.avx512.scattersiv8.si
+  return _mm256_mask_i32scatter_epi32(__addr,__mask,__index,__v1,2); 
+}
+__m128d test_mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.128
+  return _mm_mask_sqrt_pd(__W,__U,__A); 
+}
+__m128d test_mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.128
+  return _mm_maskz_sqrt_pd(__U,__A); 
+}
+__m256d test_mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.256
+  return _mm256_mask_sqrt_pd(__W,__U,__A); 
+}
+__m256d test_mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_sqrt_pd
+  // CHECK: @llvm.x86.avx512.mask.sqrt.pd.256
+  return _mm256_maskz_sqrt_pd(__U,__A); 
+}
+__m128 test_mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.128
+  return _mm_mask_sqrt_ps(__W,__U,__A); 
+}
+__m128 test_mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.128
+  return _mm_maskz_sqrt_ps(__U,__A); 
+}
+__m256 test_mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.256
+  return _mm256_mask_sqrt_ps(__W,__U,__A); 
+}
+__m256 test_mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_sqrt_ps
+  // CHECK: @llvm.x86.avx512.mask.sqrt.ps.256
+  return _mm256_maskz_sqrt_ps(__U,__A); 
+}
+__m128d test_mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.128
+  return _mm_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m128d test_mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.128
+  return _mm_maskz_sub_pd(__U,__A,__B); 
+}
+__m256d test_mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.256
+  return _mm256_mask_sub_pd(__W,__U,__A,__B); 
+}
+__m256d test_mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_sub_pd
+  // CHECK: @llvm.x86.avx512.mask.sub.pd.256
+  return _mm256_maskz_sub_pd(__U,__A,__B); 
+}
+__m128 test_mm_mask_sub_ps(__m128 __W, __mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.128
+  return _mm_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m128 test_mm_maskz_sub_ps(__mmask16 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.128
+  return _mm_maskz_sub_ps(__U,__A,__B); 
+}
+__m256 test_mm256_mask_sub_ps(__m256 __W, __mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.256
+  return _mm256_mask_sub_ps(__W,__U,__A,__B); 
+}
+__m256 test_mm256_maskz_sub_ps(__mmask16 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_sub_ps
+  // CHECK: @llvm.x86.avx512.mask.sub.ps.256
+  return _mm256_maskz_sub_ps(__U,__A,__B); 
+}
+__m128i test_mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.d.128
+  return _mm_mask2_permutex2var_epi32(__A,__I,__U,__B); 
+}
+__m256i test_mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.d.256
+  return _mm256_mask2_permutex2var_epi32(__A,__I,__U,__B); 
+}
+__m128d test_mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.pd.128
+  return _mm_mask2_permutex2var_pd(__A,__I,__U,__B); 
+}
+__m256d test_mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,  __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.pd.256
+  return _mm256_mask2_permutex2var_pd(__A,__I,__U,__B); 
+}
+__m128 test_mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.ps.128
+  return _mm_mask2_permutex2var_ps(__A,__I,__U,__B); 
+}
+__m256 test_mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,  __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.ps.256
+  return _mm256_mask2_permutex2var_ps(__A,__I,__U,__B); 
+}
+__m128i test_mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask2_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.q.128
+  return _mm_mask2_permutex2var_epi64(__A,__I,__U,__B); 
+}
+__m256i test_mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask2_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermi2var.q.256
+  return _mm256_mask2_permutex2var_epi64(__A,__I,__U,__B); 
+}
+__m128i test_mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.128
+  return _mm_permutex2var_epi32(__A,__I,__B); 
+}
+__m128i test_mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.128
+  return _mm_mask_permutex2var_epi32(__A,__U,__I,__B); 
+}
+__m128i test_mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,  __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.d.128
+  return _mm_maskz_permutex2var_epi32(__U,__A,__I,__B); 
+}
+__m256i test_mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.256
+  return _mm256_permutex2var_epi32(__A,__I,__B); 
+}
+__m256i test_mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.d.256
+  return _mm256_mask_permutex2var_epi32(__A,__U,__I,__B); 
+}
+__m256i test_mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi32
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.d.256
+  return _mm256_maskz_permutex2var_epi32(__U,__A,__I,__B); 
+}
+__m128d test_mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.128
+  return _mm_permutex2var_pd(__A,__I,__B); 
+}
+__m128d test_mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.128
+  return _mm_mask_permutex2var_pd(__A,__U,__I,__B); 
+}
+__m128d test_mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.pd.128
+  return _mm_maskz_permutex2var_pd(__U,__A,__I,__B); 
+}
+__m256d test_mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.256
+  return _mm256_permutex2var_pd(__A,__I,__B); 
+}
+__m256d test_mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.pd.256
+  return _mm256_mask_permutex2var_pd(__A,__U,__I,__B); 
+}
+__m256d test_mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,  __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_pd
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.pd.256
+  return _mm256_maskz_permutex2var_pd(__U,__A,__I,__B); 
+}
+__m128 test_mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.128
+  return _mm_permutex2var_ps(__A,__I,__B); 
+}
+__m128 test_mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.128
+  return _mm_mask_permutex2var_ps(__A,__U,__I,__B); 
+}
+__m128 test_mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.ps.128
+  return _mm_maskz_permutex2var_ps(__U,__A,__I,__B); 
+}
+__m256 test_mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.256
+  return _mm256_permutex2var_ps(__A,__I,__B); 
+}
+__m256 test_mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.ps.256
+  return _mm256_mask_permutex2var_ps(__A,__U,__I,__B); 
+}
+__m256 test_mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_ps
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.ps.256
+  return _mm256_maskz_permutex2var_ps(__U,__A,__I,__B); 
+}
+__m128i test_mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.128
+  return _mm_permutex2var_epi64(__A,__I,__B); 
+}
+__m128i test_mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.128
+  return _mm_mask_permutex2var_epi64(__A,__U,__I,__B); 
+}
+__m128i test_mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.q.128
+  return _mm_maskz_permutex2var_epi64(__U,__A,__I,__B); 
+}
+__m256i test_mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.256
+  return _mm256_permutex2var_epi64(__A,__I,__B); 
+}
+__m256i test_mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.mask.vpermt2var.q.256
+  return _mm256_mask_permutex2var_epi64(__A,__U,__I,__B); 
+}
+__m256i test_mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi64
+  // CHECK: @llvm.x86.avx512.maskz.vpermt2var.q.256
+  return _mm256_maskz_permutex2var_epi64(__U,__A,__I,__B); 
+}
diff --git a/test/CodeGen/avx512vlbw-builtins.c b/test/CodeGen/avx512vlbw-builtins.c
index 7aa8b873dc42..11155f6cf818 100644
--- a/test/CodeGen/avx512vlbw-builtins.c
+++ b/test/CodeGen/avx512vlbw-builtins.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -1377,3 +1381,327 @@ __m256i test_mm256_maskz_permutex2var_epi16(__mmask16 __U, __m256i __A,
   // CHECK: @llvm.x86.avx512.maskz.vpermt2var.hi.256
   return _mm256_maskz_permutex2var_epi16(__U,__A,__I,__B); 
 }
+__m128i test_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.128
+  return _mm_mask_maddubs_epi16(__W, __U, __X, __Y); 
+}
+
+__m128i test_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.128
+  return _mm_maskz_maddubs_epi16(__U, __X, __Y); 
+}
+
+__m256i test_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_mask_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.256
+  return _mm256_mask_maddubs_epi16(__W, __U, __X, __Y); 
+}
+
+__m256i test_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_maskz_maddubs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddubs.w.256
+  return _mm256_maskz_maddubs_epi16(__U, __X, __Y); 
+}
+
+__m128i test_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.128
+  return _mm_mask_madd_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.128
+  return _mm_maskz_madd_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.256
+  return _mm256_mask_madd_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_madd_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmaddw.d.256
+  return _mm256_maskz_madd_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_cvtsepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_cvtsepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtsepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtsepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.128
+  return _mm_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtsepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_cvtsepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtsepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_mask_cvtsepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtsepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtsepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovs.wb.256
+  return _mm256_maskz_cvtsepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_cvtusepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_cvtusepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtusepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtusepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.128
+  return _mm_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtusepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_cvtusepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtusepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_mask_cvtusepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtusepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtusepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmovus.wb.256
+  return _mm256_maskz_cvtusepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_cvtepi16_epi8(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_cvtepi16_epi8(__A); 
+}
+
+__m128i test_mm_mask_cvtepi16_epi8(__m128i __O, __mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm_maskz_cvtepi16_epi8(__mmask8 __M, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  return _mm_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm256_cvtepi16_epi8(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_cvtepi16_epi8(__A); 
+}
+
+__m128i test_mm256_mask_cvtepi16_epi8(__m128i __O, __mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_mask_cvtepi16_epi8(__O, __M, __A); 
+}
+
+__m128i test_mm256_maskz_cvtepi16_epi8(__mmask16 __M, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi16_epi8
+  // CHECK: @llvm.x86.avx512.mask.pmov.wb.256
+  return _mm256_maskz_cvtepi16_epi8(__M, __A); 
+}
+
+__m128i test_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.128
+  return _mm_mask_mulhrs_epi16(__W, __U, __X, __Y); 
+}
+
+__m128i test_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
+  // CHECK-LABEL: @test_mm_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.128
+  return _mm_maskz_mulhrs_epi16(__U, __X, __Y); 
+}
+
+__m256i test_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_mask_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.256
+  return _mm256_mask_mulhrs_epi16(__W, __U, __X, __Y); 
+}
+
+__m256i test_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhrs_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmul.hr.sw.256
+  return _mm256_maskz_mulhrs_epi16(__U, __X, __Y); 
+}
+
+__m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.128
+  return _mm_mask_mulhi_epu16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.128
+  return _mm_maskz_mulhi_epu16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.256
+  return _mm256_mask_mulhi_epu16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhi_epu16
+  // CHECK: @llvm.x86.avx512.mask.pmulhu.w.256
+  return _mm256_maskz_mulhi_epu16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.128
+  return _mm_mask_mulhi_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.128
+  return _mm_maskz_mulhi_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.256
+  return _mm256_mask_mulhi_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_mulhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.pmulh.w.256
+  return _mm256_maskz_mulhi_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  return _mm_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.128
+  return _mm_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpckhb.w.256
+  return _mm256_maskz_unpackhi_epi8(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  return _mm_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.128
+  return _mm_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpackhi_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpckhw.d.256
+  return _mm256_maskz_unpackhi_epi16(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  return _mm_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.128
+  return _mm_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi8
+  // CHECK: @llvm.x86.avx512.mask.punpcklb.w.256
+  return _mm256_maskz_unpacklo_epi8(__U, __A, __B); 
+}
+
+__m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  return _mm_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
+  // CHECK-LABEL: @test_mm_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.128
+  return _mm_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
+__m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_mask_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B); 
+}
+
+__m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
+  // CHECK-LABEL: @test_mm256_maskz_unpacklo_epi16
+  // CHECK: @llvm.x86.avx512.mask.punpcklw.d.256
+  return _mm256_maskz_unpacklo_epi16(__U, __A, __B); 
+}
+
diff --git a/test/CodeGen/avx512vldq-builtins.c b/test/CodeGen/avx512vldq-builtins.c
index a9b6dbfa6fc9..69bdc7a11045 100644
--- a/test/CodeGen/avx512vldq-builtins.c
+++ b/test/CodeGen/avx512vldq-builtins.c
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 %s -O0 -triple=x86_64-apple-darwin -ffreestanding -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <immintrin.h>
 
@@ -229,3 +232,579 @@ __m128 test_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   // CHECK: @llvm.x86.avx512.mask.or.ps.128
   return (__m128) _mm_maskz_or_ps(__U, __A, __B);
 }
+
+__m128i test_mm_cvtpd_epi64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_cvtpd_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.128
+  return _mm_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvtpd_epi64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_cvtpd_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_mask_cvtpd_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2qq.256
+  return _mm256_maskz_cvtpd_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvtpd_epu64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_cvtpd_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.128
+  return _mm_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvtpd_epu64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_cvtpd_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_mask_cvtpd_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtpd2uqq.256
+  return _mm256_maskz_cvtpd_epu64(__U, __A); 
+}
+
+__m128i test_mm_cvtps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_cvtps_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.128
+  return _mm_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvtps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_cvtps_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_mask_cvtps_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2qq.256
+  return _mm256_maskz_cvtps_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvtps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_cvtps_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.128
+  return _mm_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvtps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_cvtps_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_mask_cvtps_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvtps2uqq.256
+  return _mm256_maskz_cvtps_epu64(__U, __A); 
+}
+
+__m128d test_mm_cvtepi64_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_cvtepi64_pd(__A); 
+}
+
+__m128d test_mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m128d test_mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.128
+  return _mm_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m256d test_mm256_cvtepi64_pd(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_cvtepi64_pd(__A); 
+}
+
+__m256d test_mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_mask_cvtepi64_pd(__W, __U, __A); 
+}
+
+__m256d test_mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.256
+  return _mm256_maskz_cvtepi64_pd(__U, __A); 
+}
+
+__m128 test_mm_cvtepi64_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_cvtepi64_ps(__A); 
+}
+
+__m128 test_mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.128
+  return _mm_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m128 test_mm256_cvtepi64_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_cvtepi64_ps(__A); 
+}
+
+__m128 test_mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_mask_cvtepi64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepi64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.256
+  return _mm256_maskz_cvtepi64_ps(__U, __A); 
+}
+
+__m128i test_mm_cvttpd_epi64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_cvttpd_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.128
+  return _mm_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvttpd_epi64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_cvttpd_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_mask_cvttpd_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2qq.256
+  return _mm256_maskz_cvttpd_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvttpd_epu64(__m128d __A) {
+  // CHECK-LABEL: @test_mm_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_cvttpd_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.128
+  return _mm_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvttpd_epu64(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_cvttpd_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_mask_cvttpd_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttpd_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttpd2uqq.256
+  return _mm256_maskz_cvttpd_epu64(__U, __A); 
+}
+
+__m128i test_mm_cvttps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_cvttps_epi64(__A); 
+}
+
+__m128i test_mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.128
+  return _mm_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m256i test_mm256_cvttps_epi64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_cvttps_epi64(__A); 
+}
+
+__m256i test_mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_mask_cvttps_epi64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epi64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2qq.256
+  return _mm256_maskz_cvttps_epi64(__U, __A); 
+}
+
+__m128i test_mm_cvttps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_cvttps_epu64(__A); 
+}
+
+__m128i test_mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m128i test_mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.128
+  return _mm_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m256i test_mm256_cvttps_epu64(__m128 __A) {
+  // CHECK-LABEL: @test_mm256_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_cvttps_epu64(__A); 
+}
+
+__m256i test_mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_mask_cvttps_epu64(__W, __U, __A); 
+}
+
+__m256i test_mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvttps_epu64
+  // CHECK: @llvm.x86.avx512.mask.cvttps2uqq.256
+  return _mm256_maskz_cvttps_epu64(__U, __A); 
+}
+
+__m128d test_mm_cvtepu64_pd(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_cvtepu64_pd(__A); 
+}
+
+__m128d test_mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m128d test_mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.128
+  return _mm_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m256d test_mm256_cvtepu64_pd(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_cvtepu64_pd(__A); 
+}
+
+__m256d test_mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_mask_cvtepu64_pd(__W, __U, __A); 
+}
+
+__m256d test_mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu64_pd
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.256
+  return _mm256_maskz_cvtepu64_pd(__U, __A); 
+}
+
+__m128 test_mm_cvtepu64_ps(__m128i __A) {
+  // CHECK-LABEL: @test_mm_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_cvtepu64_ps(__A); 
+}
+
+__m128 test_mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A) {
+  // CHECK-LABEL: @test_mm_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.128
+  return _mm_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m128 test_mm256_cvtepu64_ps(__m256i __A) {
+  // CHECK-LABEL: @test_mm256_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_cvtepu64_ps(__A); 
+}
+
+__m128 test_mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_mask_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_mask_cvtepu64_ps(__W, __U, __A); 
+}
+
+__m128 test_mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A) {
+  // CHECK-LABEL: @test_mm256_maskz_cvtepu64_ps
+  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.256
+  return _mm256_maskz_cvtepu64_ps(__U, __A); 
+}
+
+__m128d test_mm_range_pd(__m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_range_pd(__A, __B, 4); 
+}
+
+__m128d test_mm_mask_range_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m128d test_mm_maskz_range_pd(__mmask8 __U, __m128d __A, __m128d __B) {
+  // CHECK-LABEL: @test_mm_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.128
+  return _mm_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m256d test_mm256_range_pd(__m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_range_pd(__A, __B, 4); 
+}
+
+__m256d test_mm256_mask_range_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_mask_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_mask_range_pd(__W, __U, __A, __B, 4); 
+}
+
+__m256d test_mm256_maskz_range_pd(__mmask8 __U, __m256d __A, __m256d __B) {
+  // CHECK-LABEL: @test_mm256_maskz_range_pd
+  // CHECK: @llvm.x86.avx512.mask.range.pd.256
+  return _mm256_maskz_range_pd(__U, __A, __B, 4); 
+}
+
+__m128 test_mm_range_ps(__m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_range_ps(__A, __B, 4); 
+}
+
+__m128 test_mm_mask_range_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m128 test_mm_maskz_range_ps(__mmask8 __U, __m128 __A, __m128 __B) {
+  // CHECK-LABEL: @test_mm_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.128
+  return _mm_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m256 test_mm256_range_ps(__m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_range_ps(__A, __B, 4); 
+}
+
+__m256 test_mm256_mask_range_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_mask_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_mask_range_ps(__W, __U, __A, __B, 4); 
+}
+
+__m256 test_mm256_maskz_range_ps(__mmask8 __U, __m256 __A, __m256 __B) {
+  // CHECK-LABEL: @test_mm256_maskz_range_ps
+  // CHECK: @llvm.x86.avx512.mask.range.ps.256
+  return _mm256_maskz_range_ps(__U, __A, __B, 4); 
+}
+
+__m128d test_mm_reduce_pd(__m128d __A) {
+  // CHECK-LABEL: @test_mm_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_reduce_pd(__A, 4); 
+}
+
+__m128d test_mm_mask_reduce_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m128d test_mm_maskz_reduce_pd(__mmask8 __U, __m128d __A) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.128
+  return _mm_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m256d test_mm256_reduce_pd(__m256d __A) {
+  // CHECK-LABEL: @test_mm256_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_reduce_pd(__A, 4); 
+}
+
+__m256d test_mm256_mask_reduce_pd(__m256d __W, __mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_mask_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_mask_reduce_pd(__W, __U, __A, 4); 
+}
+
+__m256d test_mm256_maskz_reduce_pd(__mmask8 __U, __m256d __A) {
+  // CHECK-LABEL: @test_mm256_maskz_reduce_pd
+  // CHECK: @llvm.x86.avx512.mask.reduce.pd.256
+  return _mm256_maskz_reduce_pd(__U, __A, 4); 
+}
+
+__m128 test_mm_reduce_ps(__m128 __A) {
+  // CHECK-LABEL: @test_mm_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_reduce_ps(__A, 4); 
+}
+
+__m128 test_mm_mask_reduce_ps(__m128 __W, __mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m128 test_mm_maskz_reduce_ps(__mmask8 __U, __m128 __A) {
+  // CHECK-LABEL: @test_mm_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.128
+  return _mm_maskz_reduce_ps(__U, __A, 4); 
+}
+
+__m256 test_mm256_reduce_ps(__m256 __A) {
+  // CHECK-LABEL: @test_mm256_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_reduce_ps(__A, 4); 
+}
+
+__m256 test_mm256_mask_reduce_ps(__m256 __W, __mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_mask_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_mask_reduce_ps(__W, __U, __A, 4); 
+}
+
+__m256 test_mm256_maskz_reduce_ps(__mmask8 __U, __m256 __A) {
+  // CHECK-LABEL: @test_mm256_maskz_reduce_ps
+  // CHECK: @llvm.x86.avx512.mask.reduce.ps.256
+  return _mm256_maskz_reduce_ps(__U, __A, 4); 
+}
diff --git a/test/CodeGen/bitfield-2.c b/test/CodeGen/bitfield-2.c
index e4b1b0d9fd5c..9d669575ecd1 100644
--- a/test/CodeGen/bitfield-2.c
+++ b/test/CodeGen/bitfield-2.c
@@ -237,7 +237,7 @@ unsigned long long test_5() {
 /***/
 
 struct s6 {
-  _Bool f0 : 2;
+  unsigned f0 : 2;
 };
 
 struct s6 g6 = { 0xF };
diff --git a/test/CodeGen/block-byref-aggr.c b/test/CodeGen/block-byref-aggr.c
index 910f6da3cccd..7d146a2d4777 100644
--- a/test/CodeGen/block-byref-aggr.c
+++ b/test/CodeGen/block-byref-aggr.c
@@ -16,7 +16,7 @@ void test0() {
 // CHECK:      [[A:%.*]] = alloca [[BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer.
 // CHECK-NEXT: [[A_FORWARDING:%.*]] = getelementptr inbounds [[BYREF]], [[BYREF]]* [[A]], i32 0, i32 1
@@ -42,7 +42,7 @@ void test1() {
 // CHECK-NEXT: [[B:%.*]] = alloca [[B_BYREF:%.*]], align 8
 // CHECK-NEXT: [[TEMP:%.*]] = alloca [[AGG]], align 4
 // CHECK:      [[RESULT:%.*]] = call i32 @makeAgg()
-// CHECK-NEXT: [[T0:%.*]] = getelementptr [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[AGG]], [[AGG]]* [[TEMP]], i32 0, i32 0
 // CHECK-NEXT: store i32 [[RESULT]], i32* [[T0]]
 //   Check that we properly assign into the forwarding pointer, first for b:
 // CHECK-NEXT: [[B_FORWARDING:%.*]] = getelementptr inbounds [[B_BYREF]], [[B_BYREF]]* [[B]], i32 0, i32 1
diff --git a/test/CodeGen/block-with-perdefinedexpr.c b/test/CodeGen/block-with-perdefinedexpr.c
index 68fdea60f379..94d67c3b93b5 100644
--- a/test/CodeGen/block-with-perdefinedexpr.c
+++ b/test/CodeGen/block-with-perdefinedexpr.c
@@ -5,6 +5,7 @@ void syslog(const char *, ...);
 
 void handler( );
 
+__attribute__((used))
 static void (^spd)() = ^()
 {
  handler( ^(){ syslog("%s", __FUNCTION__); } );
diff --git a/test/CodeGen/bmi2-builtins.c b/test/CodeGen/bmi2-builtins.c
index 5aa54fd0ec34..b4e3fec79a47 100644
--- a/test/CodeGen/bmi2-builtins.c
+++ b/test/CodeGen/bmi2-builtins.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -O3 -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=i386-apple-darwin -target-feature +bmi2 -emit-llvm -o - | FileCheck %s --check-prefix=B32
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -24,9 +24,9 @@ unsigned int test_pext_u32(unsigned int __X, unsigned int __Y) {
 unsigned int test_mulx_u32(unsigned int __X, unsigned int __Y,
                                  unsigned int *__P) {
   // CHECK: @test_mulx_u32
-  // CHECK-NOT: mul nuw i64
+  // CHECK-NOT: mul i64
   // B32: @test_mulx_u32
-  // B32: mul nuw i64
+  // B32: mul i64
   return _mulx_u32(__X, __Y, __P);
 }
 
@@ -48,6 +48,6 @@ unsigned long long test_pext_u64(unsigned long long __X, unsigned long long __Y)
 unsigned long long test_mulx_u64(unsigned long long __X, unsigned long long __Y,
                                  unsigned long long *__P) {
   // CHECK: @test_mulx_u64
-  // CHECK: mul nuw i128
+  // CHECK: mul i128
   return _mulx_u64(__X, __Y, __P);
 }
diff --git a/test/CodeGen/builtin-cpu-supports.c b/test/CodeGen/builtin-cpu-supports.c
index 2252b3e2b8cd..96813923f279 100644
--- a/test/CodeGen/builtin-cpu-supports.c
+++ b/test/CodeGen/builtin-cpu-supports.c
@@ -10,7 +10,7 @@ int main() {
 
   // CHECK: [[LOAD:%[^ ]+]] = load i32, i32* getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, { i32, i32, i32, [1 x i32] }* @__cpu_model, i32 0, i32 3, i32 0)
   // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256
-  // CHECK = icmp ne i32 [[AND]], 0
+  // CHECK: = icmp ne i32 [[AND]], 0
 
   return 0;
 }
diff --git a/test/CodeGen/builtin-unpredictable.c b/test/CodeGen/builtin-unpredictable.c
new file mode 100644
index 000000000000..653f231cad27
--- /dev/null
+++ b/test/CodeGen/builtin-unpredictable.c
@@ -0,0 +1,46 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -disable-llvm-optzns -o - %s -O1 | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -O0 | FileCheck %s --check-prefix=CHECK_O0
+
+// When optimizing, the builtin should be converted to metadata.
+// When not optimizing, there should be no metadata created for the builtin.
+// In both cases, the builtin should be removed from the code.
+
+void foo();
+void branch(int x) {
+// CHECK-LABEL: define void @branch(
+
+// CHECK-NOT: builtin_unpredictable
+// CHECK: !unpredictable [[METADATA:.+]]
+
+// CHECK_O0-NOT: builtin_unpredictable
+// CHECK_O0-NOT: !unpredictable 
+
+  if (__builtin_unpredictable(x > 0))
+    foo ();
+}
+
+int unpredictable_switch(int x) {
+// CHECK-LABEL: @unpredictable_switch(
+
+// CHECK-NOT: builtin_unpredictable
+// CHECK: !unpredictable [[METADATA:.+]]
+
+// CHECK_O0-NOT: builtin_unpredictable
+// CHECK_O0-NOT: !unpredictable 
+
+  switch(__builtin_unpredictable(x)) {
+  default:
+    return 0;
+  case 0:
+  case 1:
+  case 2:
+    return 1;
+  case 5:
+    return 5;
+  };
+
+  return 0;
+}
+
+// CHECK: [[METADATA]] = !{}
+
diff --git a/test/CodeGen/builtins-arm.c b/test/CodeGen/builtins-arm.c
index 2b81856e6b48..4cec84c33728 100644
--- a/test/CodeGen/builtins-arm.c
+++ b/test/CodeGen/builtins-arm.c
@@ -85,6 +85,44 @@ void prefetch(int i) {
 // CHECK: call {{.*}} @llvm.prefetch(i8* %{{.*}}, i32 1, i32 3, i32 0)
 }
 
+unsigned mrc() {
+  // CHECK: define i32 @mrc()
+  // CHECK: [[R:%.*]] = {{.*}} call i32 @llvm.arm.mrc(i32 15, i32 0, i32 13, i32 0, i32 3)
+  // CHECK-NEXT: ret i32 [[R]]
+  return __builtin_arm_mrc(15, 0, 13, 0, 3);
+}
+
+unsigned mrc2() {
+  // CHECK: define i32 @mrc2()
+  // CHECK: [[R:%.*]] = {{.*}} call i32 @llvm.arm.mrc2(i32 15, i32 0, i32 13, i32 0, i32 3)
+  // CHECK-NEXT: ret i32 [[R]]
+  return __builtin_arm_mrc2(15, 0, 13, 0, 3);
+}
+
+void mcr(unsigned a) {
+  // CHECK: define void @mcr(i32 [[A:%.*]])
+  // CHECK: call void @llvm.arm.mcr(i32 15, i32 0, i32 [[A]], i32 13, i32 0, i32 3)
+  __builtin_arm_mcr(15, 0, a, 13, 0, 3);
+}
+
+void mcr2(unsigned a) {
+  // CHECK: define void @mcr2(i32 [[A:%.*]])
+  // CHECK: call void @llvm.arm.mcr2(i32 15, i32 0, i32 [[A]], i32 13, i32 0, i32 3)
+  __builtin_arm_mcr2(15, 0, a, 13, 0, 3);
+}
+
+void mcrr(unsigned a, unsigned b) {
+  // CHECK: define void @mcrr(i32 [[A:%.*]], i32 [[B:%.*]])
+  // CHECK: call void @llvm.arm.mcrr(i32 15, i32 0, i32 [[A]], i32 [[B]], i32 0)
+  __builtin_arm_mcrr(15, 0, a, b, 0);
+}
+
+void mcrr2(unsigned a, unsigned b) {
+  // CHECK: define void @mcrr2(i32 [[A:%.*]], i32 [[B:%.*]])
+  // CHECK: call void @llvm.arm.mcrr2(i32 15, i32 0, i32 [[A]], i32 [[B]], i32 0)
+  __builtin_arm_mcrr2(15, 0, a, b, 0);
+}
+
 unsigned rsr() {
   // CHECK: [[V0:[%A-Za-z0-9.]+]] = {{.*}} call i32 @llvm.read_register.i32(metadata !7)
   // CHECK-NEXT: ret i32 [[V0]]
diff --git a/test/CodeGen/builtins-arm64.c b/test/CodeGen/builtins-arm64.c
index f2c1c5454548..16e22d771fca 100644
--- a/test/CodeGen/builtins-arm64.c
+++ b/test/CodeGen/builtins-arm64.c
@@ -5,6 +5,11 @@ void f0(void *a, void *b) {
 // CHECK: call {{.*}} @__clear_cache
 }
 
+void *tp (void) {
+  return __builtin_thread_pointer ();
+// CHECK: call {{.*}} @llvm.aarch64.thread.pointer()
+}
+
 // CHECK: call {{.*}} @llvm.aarch64.rbit.i32(i32 %a)
 unsigned rbit(unsigned a) {
   return __builtin_arm_rbit(a);
diff --git a/test/CodeGen/builtins-nvptx.c b/test/CodeGen/builtins-nvptx.c
index ebf20673ddb4..745e74f0ca64 100644
--- a/test/CodeGen/builtins-nvptx.c
+++ b/test/CodeGen/builtins-nvptx.c
@@ -109,16 +109,15 @@ __device__ int read_lanemasks() {
 
 }
 
-__device__ long read_clocks() {
+__device__ long long read_clocks() {
 
 // CHECK: call i32 @llvm.ptx.read.clock()
 // CHECK: call i64 @llvm.ptx.read.clock64()
 
   int a = __builtin_ptx_read_clock();
-  long b = __builtin_ptx_read_clock64();
-
-  return (long)a + b;
+  long long b = __builtin_ptx_read_clock64();
 
+  return a + b;
 }
 
 __device__ int read_pms() {
@@ -234,37 +233,40 @@ __device__ void nvvm_atom(float *fp, float f, int *ip, int i, long *lp, long l,
   // CHECK: atomicrmw xchg
   __nvvm_atom_xchg_gen_ll(&sll, ll);
 
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw max i32*
   __nvvm_atom_max_gen_i(ip, i);
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw umax i32*
   __nvvm_atom_max_gen_ui((unsigned int *)ip, i);
   // CHECK: atomicrmw max
   __nvvm_atom_max_gen_l(&dl, l);
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw umax
   __nvvm_atom_max_gen_ul((unsigned long *)&dl, l);
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw max i64*
   __nvvm_atom_max_gen_ll(&sll, ll);
-  // CHECK: atomicrmw max
+  // CHECK: atomicrmw umax i64*
   __nvvm_atom_max_gen_ull((unsigned long long *)&sll, ll);
 
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw min i32*
   __nvvm_atom_min_gen_i(ip, i);
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw umin i32*
   __nvvm_atom_min_gen_ui((unsigned int *)ip, i);
   // CHECK: atomicrmw min
   __nvvm_atom_min_gen_l(&dl, l);
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw umin
   __nvvm_atom_min_gen_ul((unsigned long *)&dl, l);
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw min i64*
   __nvvm_atom_min_gen_ll(&sll, ll);
-  // CHECK: atomicrmw min
+  // CHECK: atomicrmw umin i64*
   __nvvm_atom_min_gen_ull((unsigned long long *)&sll, ll);
 
   // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { i32, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_i(ip, 0, i);
   // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { {{i32|i64}}, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_l(&dl, 0, l);
   // CHECK: cmpxchg
+  // CHECK-NEXT: extractvalue { i64, i1 } {{%[0-9]+}}, 0
   __nvvm_atom_cas_gen_ll(&sll, 0, ll);
 
   // CHECK: call float @llvm.nvvm.atomic.load.add.f32.p0f32
diff --git a/test/CodeGen/builtins-overflow.c b/test/CodeGen/builtins-overflow.c
index 5c5500d5684a..c8d828dd33e7 100644
--- a/test/CodeGen/builtins-overflow.c
+++ b/test/CodeGen/builtins-overflow.c
@@ -11,6 +11,171 @@ extern unsigned long long UnsignedLongLongErrorCode;
 extern int IntErrorCode;
 extern long LongErrorCode;
 extern long long LongLongErrorCode;
+void overflowed(void);
+
+unsigned test_add_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_sub_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_sub_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_sub_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_sub_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_sub_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_sub_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_mul_overflow_uint_uint_uint(unsigned x, unsigned y) {
+  // CHECK-LABEL: define i32 @test_mul_overflow_uint_uint_uint
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_mul_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_mul_overflow_int_int_int(int x, int y) {
+  // CHECK-LABEL: define i32 @test_mul_overflow_int_int_int
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  int r;
+  if (__builtin_mul_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_uint_int_int(unsigned x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_uint_int_int
+  // CHECK: [[XE:%.+]] = zext i32 %{{.+}} to i33
+  // CHECK: [[YE:%.+]] = sext i32 %{{.+}} to i33
+  // CHECK: [[S:%.+]] = call { i33, i1 } @llvm.sadd.with.overflow.i33(i33 [[XE]], i33 [[YE]])
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i33, i1 } [[S]], 0
+  // CHECK-DAG: [[C1:%.+]] = extractvalue { i33, i1 } [[S]], 1
+  // CHECK: [[QT:%.+]] = trunc i33 [[Q]] to i32
+  // CHECK: [[QTE:%.+]] = sext i32 [[QT]] to i33
+  // CHECK: [[C2:%.+]] = icmp ne i33 [[Q]], [[QTE]]
+  // CHECK: [[C3:%.+]] = or i1 [[C1]], [[C2]]
+  // CHECK: store i32 [[QT]], i32*
+  // CHECK: br i1 [[C3]]
+  int r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+_Bool test_add_overflow_uint_uint_bool(unsigned x, unsigned y) {
+  // CHECK-LABEL: define {{.*}} i1 @test_add_overflow_uint_uint_bool
+  // CHECK-NOT: ext
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C1:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: [[QT:%.+]] = trunc i32 [[Q]] to i1
+  // CHECK: [[QTE:%.+]] = zext i1 [[QT]] to i32
+  // CHECK: [[C2:%.+]] = icmp ne i32 [[Q]], [[QTE]]
+  // CHECK: [[C3:%.+]] = or i1 [[C1]], [[C2]]
+  // CHECK: [[QT2:%.+]] = zext i1 [[QT]] to i8
+  // CHECK: store i8 [[QT2]], i8*
+  // CHECK: br i1 [[C3]]
+  _Bool r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+unsigned test_add_overflow_bool_bool_uint(_Bool x, _Bool y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_bool_bool_uint
+  // CHECK: [[XE:%.+]] = zext i1 %{{.+}} to i32
+  // CHECK: [[YE:%.+]] = zext i1 %{{.+}} to i32
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[XE]], i32 [[YE]])
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  unsigned r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+_Bool test_add_overflow_bool_bool_bool(_Bool x, _Bool y) {
+  // CHECK-LABEL: define {{.*}} i1 @test_add_overflow_bool_bool_bool
+  // CHECK: [[S:%.+]] = call { i1, i1 } @llvm.uadd.with.overflow.i1(i1 %{{.+}}, i1 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i1, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i1, i1 } [[S]], 1
+  // CHECK: [[QT2:%.+]] = zext i1 [[Q]] to i8
+  // CHECK: store i8 [[QT2]], i8*
+  // CHECK: br i1 [[C]]
+  _Bool r;
+  if (__builtin_add_overflow(x, y, &r))
+    overflowed();
+  return r;
+}
+
+int test_add_overflow_volatile(int x, int y) {
+  // CHECK-LABEL: define i32 @test_add_overflow_volatile
+  // CHECK: [[S:%.+]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %{{.+}}, i32 %{{.+}})
+  // CHECK-DAG: [[Q:%.+]] = extractvalue { i32, i1 } [[S]], 0
+  // CHECK-DAG: [[C:%.+]] = extractvalue { i32, i1 } [[S]], 1
+  // CHECK: store volatile i32 [[Q]], i32*
+  // CHECK: br i1 [[C]]
+  volatile int result;
+  if (__builtin_add_overflow(x, y, &result))
+    overflowed();
+  return result;
+}
 
 unsigned test_uadd_overflow(unsigned x, unsigned y) {
 // CHECK: @test_uadd_overflow
diff --git a/test/CodeGen/builtins-ppc-altivec.c b/test/CodeGen/builtins-ppc-altivec.c
index 32166b50f985..9539d6ca7afc 100644
--- a/test/CodeGen/builtins-ppc-altivec.c
+++ b/test/CodeGen/builtins-ppc-altivec.c
@@ -940,6 +940,30 @@ void test2() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpeqfp
 
   /* vec_cmpge */
+  res_vbc = vec_cmpge(vsc, vsc);
+// CHECK: @llvm.ppc.altivec.vcmpgtsb
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsb
+
+  res_vbc = vec_cmpge(vuc, vuc);
+// CHECK: @llvm.ppc.altivec.vcmpgtub
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtub
+
+  res_vbs = vec_cmpge(vs, vs);
+// CHECK: @llvm.ppc.altivec.vcmpgtsh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsh
+
+  res_vbs = vec_cmpge(vus, vus);
+// CHECK: @llvm.ppc.altivec.vcmpgtuh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuh
+
+  res_vbi = vec_cmpge(vi, vi);
+// CHECK: @llvm.ppc.altivec.vcmpgtsw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsw
+
+  res_vbi = vec_cmpge(vui, vui);
+// CHECK: @llvm.ppc.altivec.vcmpgtuw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuw
+
   res_vbi = vec_cmpge(vf, vf);
 // CHECK: @llvm.ppc.altivec.vcmpgefp
 // CHECK-LE: @llvm.ppc.altivec.vcmpgefp
@@ -1010,6 +1034,30 @@ void test5() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtfp
 
   /* vec_cmple */
+  res_vbc = vec_cmple(vsc, vsc);
+// CHECK: @llvm.ppc.altivec.vcmpgtsb
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsb
+
+  res_vbc = vec_cmple(vuc, vuc);
+// CHECK: @llvm.ppc.altivec.vcmpgtub
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtub
+
+  res_vbs = vec_cmple(vs, vs);
+// CHECK: @llvm.ppc.altivec.vcmpgtsh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsh
+
+  res_vbs = vec_cmple(vus, vus);
+// CHECK: @llvm.ppc.altivec.vcmpgtuh
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuh
+
+  res_vbi = vec_cmple(vi, vi);
+// CHECK: @llvm.ppc.altivec.vcmpgtsw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtsw
+
+  res_vbi = vec_cmple(vui, vui);
+// CHECK: @llvm.ppc.altivec.vcmpgtuw
+// CHECK-LE: @llvm.ppc.altivec.vcmpgtuw
+
   res_vbi = vec_cmple(vf, vf);
 // CHECK: @llvm.ppc.altivec.vcmpgefp
 // CHECK-LE: @llvm.ppc.altivec.vcmpgefp
@@ -5664,6 +5712,10 @@ void test6() {
 
   res_uc = vec_extract(vuc, param_i);
 // CHECK: extractelement <16 x i8>
+// CHECK-LE: extractelement <16 x i8>
+
+  res_uc = vec_extract(vbc, param_i);
+// CHECK: extractelement <16 x i8>
 // CHECK-LE: extractelement <16 x i8>
 
   res_s  = vec_extract(vs, param_i);
@@ -5672,6 +5724,10 @@ void test6() {
 
   res_us = vec_extract(vus, param_i);
 // CHECK: extractelement <8 x i16>
+// CHECK-LE: extractelement <8 x i16>
+
+  res_us = vec_extract(vbs, param_i);
+// CHECK: extractelement <8 x i16>
 // CHECK-LE: extractelement <8 x i16>
 
   res_i  = vec_extract(vi, param_i);
@@ -5680,6 +5736,10 @@ void test6() {
 
   res_ui = vec_extract(vui, param_i);
 // CHECK: extractelement <4 x i32>
+// CHECK-LE: extractelement <4 x i32>
+
+  res_ui = vec_extract(vbi, param_i);
+// CHECK: extractelement <4 x i32>
 // CHECK-LE: extractelement <4 x i32>
 
   res_f  = vec_extract(vf, param_i);
@@ -5693,6 +5753,10 @@ void test6() {
 
   res_vuc = vec_insert(param_uc, vuc, param_i);
 // CHECK: insertelement <16 x i8>
+// CHECK-LE: insertelement <16 x i8>
+
+  res_vbc = vec_insert(param_uc, vbc, param_i);
+// CHECK: insertelement <16 x i8>
 // CHECK-LE: insertelement <16 x i8>
 
   res_vs  = vec_insert(param_s, vs, param_i);
@@ -5701,6 +5765,10 @@ void test6() {
 
   res_vus = vec_insert(param_us, vus, param_i);
 // CHECK: insertelement <8 x i16>
+// CHECK-LE: insertelement <8 x i16>
+
+  res_vbs = vec_insert(param_us, vbs, param_i);
+// CHECK: insertelement <8 x i16>
 // CHECK-LE: insertelement <8 x i16>
 
   res_vi  = vec_insert(param_i, vi, param_i);
@@ -5709,6 +5777,10 @@ void test6() {
 
   res_vui = vec_insert(param_ui, vui, param_i);
 // CHECK: insertelement <4 x i32>
+// CHECK-LE: insertelement <4 x i32>
+
+  res_vbi = vec_insert(param_ui, vbi, param_i);
+// CHECK: insertelement <4 x i32>
 // CHECK-LE: insertelement <4 x i32>
 
   res_vf  = vec_insert(param_f, vf, param_i);
diff --git a/test/CodeGen/builtins-ppc-crypto.c b/test/CodeGen/builtins-ppc-crypto.c
index 0ade413cb6ae..60bdc4982d92 100644
--- a/test/CodeGen/builtins-ppc-crypto.c
+++ b/test/CodeGen/builtins-ppc-crypto.c
@@ -6,10 +6,6 @@
 // RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-unknown \
 // RUN: -target-feature +crypto -target-feature +power8-vector \
 // RUN: -emit-llvm %s -o - | FileCheck %s
-
-// RUN: %clang_cc1 -faltivec -triple powerpc-unknown-unknown \
-// RUN: -target-feature +crypto -target-feature +power8-vector \
-// RUN: -emit-llvm %s -o - | FileCheck %s
 #include <altivec.h>
 #define B_INIT1 { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, \
                   0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10 };
@@ -34,7 +30,7 @@ vector unsigned char test_vpmsumb(void)
   vector unsigned char a = B_INIT1
   vector unsigned char b = B_INIT2
   return __builtin_altivec_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
 }
 
 // CHECK-LABEL: define <8 x i16> @test_vpmsumh
@@ -43,7 +39,7 @@ vector unsigned short test_vpmsumh(void)
   vector unsigned short a = H_INIT1
   vector unsigned short b = H_INIT2
   return __builtin_altivec_crypto_vpmsumh(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpmsumw
@@ -52,7 +48,7 @@ vector unsigned int test_vpmsumw(void)
   vector unsigned int a = W_INIT1
   vector unsigned int b = W_INIT2
   return __builtin_altivec_crypto_vpmsumw(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vpmsumd
@@ -61,7 +57,7 @@ vector unsigned long long test_vpmsumd(void)
   vector unsigned long long a = D_INIT1
   vector unsigned long long b = D_INIT2
   return __builtin_altivec_crypto_vpmsumd(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vsbox
@@ -130,7 +126,7 @@ vector unsigned long long test_vcipherlast(void)
 // CHECK: @llvm.ppc.altivec.crypto.vcipherlast
 }
 
-// CHECK: @llvm.ppc.altivec.crypto.vncipher
+// CHECK-LABEL: @test_vncipher
 vector unsigned long long test_vncipher(void)
 {
   vector unsigned long long a = D_INIT1
@@ -172,7 +168,7 @@ vector unsigned char test_vpmsumb_e(void)
   vector unsigned char a = B_INIT1
   vector unsigned char b = B_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumb
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
 }
 
 // CHECK-LABEL: define <8 x i16> @test_vpmsumh_e
@@ -181,7 +177,7 @@ vector unsigned short test_vpmsumh_e(void)
   vector unsigned short a = H_INIT1
   vector unsigned short b = H_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumh
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpmsumw_e
@@ -190,7 +186,7 @@ vector unsigned int test_vpmsumw_e(void)
   vector unsigned int a = W_INIT1
   vector unsigned int b = W_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumw
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vpmsumd_e
@@ -199,7 +195,7 @@ vector unsigned long long test_vpmsumd_e(void)
   vector unsigned long long a = D_INIT1
   vector unsigned long long b = D_INIT2
   return __builtin_crypto_vpmsumb(a, b);
-// CHECK @llvm.ppc.altivec.crypto.vpmsumd
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
 }
 
 // CHECK-LABEL: define <2 x i64> @test_vsbox_e
@@ -227,6 +223,7 @@ vector unsigned short test_vpermxorh_e(void)
   vector unsigned short b = H_INIT2
   vector unsigned short c = H_INIT2
   return __builtin_crypto_vpermxor(a, b, c);
+// CHECK: @llvm.ppc.altivec.crypto.vpermxor
 }
 
 // CHECK-LABEL: define <4 x i32> @test_vpermxorw_e
@@ -301,3 +298,99 @@ vector unsigned long long test_vshasigmad_e(void)
 // CHECK: @llvm.ppc.altivec.crypto.vshasigmad
 }
 
+// CHECK-LABEL: @test_vec_sbox_be
+vector unsigned char test_vec_sbox_be(void)
+{
+  vector unsigned char a = B_INIT1
+  return vec_sbox_be(a);
+// CHECK: @llvm.ppc.altivec.crypto.vsbox
+}
+
+// CHECK-LABEL: @test_vec_cipher_be
+vector unsigned char test_vec_cipher_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_cipher_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipher
+}
+
+// CHECK-LABEL: @test_vec_cipherlast_be
+vector unsigned char test_vec_cipherlast_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_cipherlast_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vcipherlast
+}
+
+// CHECK-LABEL: @test_vec_ncipher_be
+vector unsigned char test_vec_ncipher_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_ncipher_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipher
+}
+
+// CHECK-LABEL: @test_vec_ncipherlast_be
+vector unsigned char test_vec_ncipherlast_be(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_ncipherlast_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vncipherlast
+}
+
+// CHECK-LABEL: @test_vec_shasigma_bew
+vector unsigned int test_vec_shasigma_bew(void)
+{
+  vector unsigned int a = W_INIT1
+  return vec_shasigma_be(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmaw
+}
+
+// CHECK-LABEL: @test_vec_shasigma_bed
+vector unsigned long long test_vec_shasigma_bed(void)
+{
+  vector unsigned long long a = D_INIT2
+  return vec_shasigma_be(a, 1, 15);
+// CHECK: @llvm.ppc.altivec.crypto.vshasigmad
+}
+
+// CHECK-LABEL: @test_vec_pmsum_beb
+vector unsigned short test_vec_pmsum_beb(void)
+{
+  vector unsigned char a = B_INIT1
+  vector unsigned char b = B_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumb
+}
+
+// CHECK-LABEL: @test_vec_pmsum_beh
+vector unsigned int test_vec_pmsum_beh(void)
+{
+  vector unsigned short a = H_INIT1
+  vector unsigned short b = H_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumh
+}
+
+// CHECK-LABEL: @test_vec_pmsum_bew
+vector unsigned long long test_vec_pmsum_bew(void)
+{
+  vector unsigned int a = W_INIT1
+  vector unsigned int b = W_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumw
+}
+
+// CHECK-LABEL: @test_vec_pmsum_bed
+vector unsigned __int128 test_vec_pmsum_bed(void)
+{
+  vector unsigned long long a = D_INIT1
+  vector unsigned long long b = D_INIT2
+  return vec_pmsum_be(a, b);
+// CHECK: @llvm.ppc.altivec.crypto.vpmsumd
+}
+
diff --git a/test/CodeGen/builtins-ppc-p7.c b/test/CodeGen/builtins-ppc-p7.c
index f58441030808..237a58fb84da 100644
--- a/test/CodeGen/builtins-ppc-p7.c
+++ b/test/CodeGen/builtins-ppc-p7.c
@@ -11,7 +11,7 @@ int test_divwe(void)
   int a = 74;
   int b = 32;
   return __builtin_divwe(a, b);
-// CHECK @llvm.ppc.divwe
+// CHECK: @llvm.ppc.divwe
 }
 
 // CHECK-LABEL: define zeroext i32 @test_divweu
@@ -20,7 +20,7 @@ unsigned int test_divweu(void)
   unsigned int a = 74;
   unsigned int b = 32;
   return __builtin_divweu(a, b);
-// CHECK @llvm.ppc.divweu
+// CHECK: @llvm.ppc.divweu
 }
 
 // CHECK-LABEL: define i64 @test_divde
@@ -29,7 +29,7 @@ long long test_divde(void)
   long long a = 74LL;
   long long b = 32LL;
   return __builtin_divde(a, b);
-// CHECK @llvm.ppc.divde
+// CHECK: @llvm.ppc.divde
 }
 
 // CHECK-LABEL: define i64 @test_divdeu
@@ -38,7 +38,7 @@ unsigned long long test_divdeu(void)
   unsigned long long a = 74ULL;
   unsigned long long b = 32ULL;
   return __builtin_divdeu(a, b);
-// CHECK @llvm.ppc.divdeu
+// CHECK: @llvm.ppc.divdeu
 }
 
 // CHECK-LABEL: define i64 @test_bpermd
@@ -47,6 +47,6 @@ long long test_bpermd(void)
   long long a = 74LL;
   long long b = 32LL;
   return __builtin_bpermd(a, b);
-// CHECK @llvm.ppc.bpermd
+// CHECK: @llvm.ppc.bpermd
 }
 
diff --git a/test/CodeGen/builtins-ppc-p8vector.c b/test/CodeGen/builtins-ppc-p8vector.c
index 208dd4347f65..29503f0134be 100644
--- a/test/CodeGen/builtins-ppc-p8vector.c
+++ b/test/CodeGen/builtins-ppc-p8vector.c
@@ -7,6 +7,13 @@
 // (vec_cmpge, vec_cmple). Without this option, there is only one overload so
 // it is selected.
 
+void dummy() { }
+signed int si;
+signed long long sll;
+unsigned long long ull;
+signed __int128 sx;
+unsigned __int128 ux;
+double d;
 vector signed char vsc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
 vector unsigned char vuc = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5 };
 vector bool char vbc = { 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1 };
@@ -23,10 +30,17 @@ vector signed long long vsll = { 1, 2 };
 vector unsigned long long vull = { 1, 2 };
 vector bool long long vbll = { 1, 0 };
 
+vector signed __int128 vsx = { 1 };
+vector unsigned __int128 vux = { 1 };
+
 vector float vfa = { 1.e-4f, -132.23f, -22.1, 32.00f };
 vector double vda = { 1.e-11, -132.23e10 };
 
 int res_i;
+double res_d;
+signed long long res_sll;
+unsigned long long res_ull;
+
 vector signed char res_vsc;
 vector unsigned char res_vuc;
 vector bool char res_vbc;
@@ -43,7 +57,10 @@ vector signed long long res_vsll;
 vector unsigned long long res_vull;
 vector bool long long res_vbll;
 
-vector double res_vf;
+vector signed __int128 res_vsx;
+vector unsigned __int128 res_vux;
+
+vector float res_vf;
 vector double res_vd;
 
 // CHECK-LABEL: define void @test1
@@ -73,6 +90,37 @@ void test1() {
 // CHECK-LE: add <2 x i64>
 // CHECK-PPC: error: call to 'vec_add' is ambiguous
 
+  /* vec_addc */
+  res_vsi = vec_addc(vsi, vsi);
+// CHECK: @llvm.ppc.altivec.vaddcuw
+// CHECK-LE: @llvm.ppc.altivec.vaddcuw
+
+  res_vui = vec_addc(vui, vui);
+// CHECK: @llvm.ppc.altivec.vaddcuw
+// CHECK-LE: @llvm.ppc.altivec.vaddcuw
+
+  res_vsx = vec_addc(vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddcuq
+// CHECK-LE: @llvm.ppc.altivec.vaddcuq
+
+  res_vux = vec_addc(vux, vux);
+// CHECK: @llvm.ppc.altivec.vaddcuq
+// CHECK-LE: @llvm.ppc.altivec.vaddcuq
+
+  /* vec_adde */
+  res_vsx = vec_adde(vsx, vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddeuqm
+// CHECK-LE: @llvm.ppc.altivec.vaddeuqm
+
+  res_vux = vec_adde(vux, vux, vux);
+// CHECK: @llvm.ppc.altivec.vaddeuqm
+// CHECK-LE: @llvm.ppc.altivec.vaddeuqm
+
+  /* vec_addec */
+  res_vsx = vec_addec(vsx, vsx, vsx);
+// CHECK: @llvm.ppc.altivec.vaddecuq
+// CHECK-LE: @llvm.ppc.altivec.vaddecuq
+
   /* vec_mergee */  
   res_vbi = vec_mergee(vbi, vbi);
 // CHECK: @llvm.ppc.altivec.vperm
@@ -156,6 +204,15 @@ void test1() {
 // CHECK-LE: call <2 x i64> @llvm.ppc.altivec.vcmpgtud(<2 x i64> %{{[0-9]*}}, <2 x i64> %{{[0-9]*}})
 // CHECK-PPC: error: call to 'vec_cmplt' is ambiguous
 
+  /* vec_double */
+  res_vd = vec_double(vsll);
+// CHECK: sitofp i64 {{.+}} to double
+// CHECK-BE: sitofp i64 {{.+}} to double
+
+  res_vd = vec_double(vull);
+// CHECK: uitofp i64 {{.+}} to double
+// CHECK-BE: uitofp i64 {{.+}} to double
+
   /* vec_eqv */
   res_vsc =  vec_eqv(vsc, vsc);
 // CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
@@ -168,18 +225,7 @@ void test1() {
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
 // CHECK-PPC: error: assigning to
 
-  res_vsc =  vec_eqv(vbc, vsc);
-// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-PPC: error: assigning to
-
-  res_vsc =  vec_eqv(vsc, vbc);
+  res_vsc =  vec_eqv(vbc, vbc);
 // CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
 // CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
 // CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
@@ -199,28 +245,6 @@ void test1() {
 // CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
 // CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-PPC: error: assigning to
-
-  res_vuc =  vec_eqv(vbc, vuc);
-// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-PPC: error: assigning to
-
-  res_vuc =  vec_eqv(vuc, vbc);
-// CHECK: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <16 x i8>
-// CHECK-LE: [[T1:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <16 x i8> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <16 x i8>
 // CHECK-PPC: error: assigning to
 
   res_vss =  vec_eqv(vss, vss);
@@ -234,18 +258,7 @@ void test1() {
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
 // CHECK-PPC: error: assigning to
 
-  res_vss =  vec_eqv(vbs, vss);
-// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-PPC: error: assigning to
-
-  res_vss =  vec_eqv(vss, vbs);
+  res_vss =  vec_eqv(vbs, vbs);
 // CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
 // CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
 // CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
@@ -265,28 +278,6 @@ void test1() {
 // CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
 // CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-PPC: error: assigning to
-
-  res_vus =  vec_eqv(vbs, vus);
-// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-PPC: error: assigning to
-
-  res_vus =  vec_eqv(vus, vbs);
-// CHECK: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <8 x i16>
-// CHECK-LE: [[T1:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <8 x i16> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <8 x i16>
 // CHECK-PPC: error: assigning to
 
   res_vsi =  vec_eqv(vsi, vsi);
@@ -294,12 +285,7 @@ void test1() {
 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
 // CHECK-PPC: error: assigning to
 
-  res_vsi =  vec_eqv(vbi, vsi);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-PPC: error: assigning to
-
-  res_vsi =  vec_eqv(vsi, vbi);
+  res_vsi =  vec_eqv(vbi, vbi);
 // CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
 // CHECK-PPC: error: assigning to
@@ -307,16 +293,6 @@ void test1() {
   res_vui =  vec_eqv(vui, vui);
 // CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
 // CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-PPC: error: assigning to
-
-  res_vui =  vec_eqv(vbi, vui);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-PPC: error: assigning to
-
-  res_vui =  vec_eqv(vui, vbi);
-// CHECK: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
-// CHECK-LE: call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.*}}, <4 x i32> {{.+}})
 // CHECK-PPC: error: assigning to
 
   res_vsll =  vec_eqv(vsll, vsll);
@@ -330,18 +306,7 @@ void test1() {
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
 // CHECK-PPC: error: assigning to
 
-  res_vsll =  vec_eqv(vbll, vsll);
-// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-PPC: error: assigning to
-
-  res_vsll =  vec_eqv(vsll, vbll);
+  res_vsll =  vec_eqv(vbll, vbll);
 // CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
 // CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
 // CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
@@ -361,28 +326,6 @@ void test1() {
 // CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
 // CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-PPC: error: assigning to
-
-  res_vull =  vec_eqv(vbll, vull);
-// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-PPC: error: assigning to
-
-  res_vull =  vec_eqv(vull, vbll);
-// CHECK: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <2 x i64>
-// CHECK-LE: [[T1:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T2:%.+]] = bitcast <2 x i64> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x i64>
 // CHECK-PPC: error: assigning to
 
   res_vf = vec_eqv(vfa, vfa);
@@ -394,23 +337,6 @@ void test1() {
 // CHECK-LE: [[T2:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
 // CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32> [[T2]])
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <4 x float>
-// CHECK-PPC: error: assigning to
-
-  res_vf = vec_eqv(vbi, vfa);
-// CHECK: [[T2:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.+}}, <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <4 x float>
-// CHECK-LE: [[T2:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.+}}, <4 x i32> [[T2]])
-// CHECK-PPC: error: assigning to
-
-  res_vf = vec_eqv(vfa, vbi);
-// CHECK: [[T1:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32>
-// CHECK: bitcast <4 x i32> [[T3]] to <4 x float>
-// CHECK-LE: [[T1:%.+]] = bitcast <4 x float> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32>
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <4 x float>
 // CHECK-PPC: error: assigning to
 
   res_vd = vec_eqv(vda, vda);
@@ -424,24 +350,41 @@ void test1() {
 // CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x double>
 // CHECK-PPC: error: assigning to
 
-  res_vd = vec_eqv(vbll, vda);
-// CHECK: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.+}}, <4 x i32> [[T2]])
-// CHECK: bitcast <4 x i32> [[T3]] to <2 x double>
-// CHECK-LE: [[T2:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> {{.+}}, <4 x i32> [[T2]])
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x double>
-// CHECK-PPC: error: assigning to
+  /* vec_extract */
+  res_sll = vec_extract(vsll, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
 
-  res_vd = vec_eqv(vda, vbll);
-// CHECK: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
-// CHECK: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32>
-// CHECK: bitcast <4 x i32> [[T3]] to <2 x double>
-// CHECK-LE: [[T1:%.+]] = bitcast <2 x double> {{.+}} to <4 x i32>
-// CHECK-LE: [[T3:%.+]] = call <4 x i32> @llvm.ppc.vsx.xxleqv(<4 x i32> [[T1]], <4 x i32>
-// CHECK-LE: bitcast <4 x i32> [[T3]] to <2 x double>
-// CHECK-PPC: error: assigning to
+  res_ull = vec_extract(vull, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
 
+  res_ull = vec_extract(vbll, si);
+// CHECK: extractelement <2 x i64>
+// CHECK-LE: extractelement <2 x i64>
+
+  res_d = vec_extract(vda, si);
+// CHECK: extractelement <2 x double>
+// CHECK-LE: extractelement <2 x double>
+
+  /* vec_insert */
+  res_vsll = vec_insert(sll, vsll, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vbll = vec_insert(ull, vbll, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vull = vec_insert(ull, vull, si);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vd = vec_insert(d, vda, si);
+// CHECK: insertelement <2 x double>
+// CHECK-LE: insertelement <2 x double>
+
+  /* vec_cntlz */
   res_vsc = vec_cntlz(vsc);
 // CHECK: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
 // CHECK-LE: call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %{{.+}}, i1 false)
@@ -512,6 +455,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_all_eq' is ambiguous
 
+  res_i = vec_all_eq(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_all_ne */
   res_i = vec_all_ne(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -548,6 +495,24 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_all_ne' is ambiguous
 
+  dummy();
+// CHECK: @dummy
+
+  res_i = vec_all_ne(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
+  dummy();
+// CHECK: @dummy
+
+  res_i = vec_all_nge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
+  res_i = vec_all_ngt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_any_eq */
   res_i = vec_any_eq(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -584,6 +549,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_any_eq' is ambiguous
 
+  res_i = vec_any_eq(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_any_ne */
   res_i = vec_any_ne(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpequd.p
@@ -620,6 +589,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p
 // CHECK-PPC: error: call to 'vec_any_ne' is ambiguous
 
+  res_i = vec_any_ne(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_all_ge */
   res_i = vec_all_ge(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -656,6 +629,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_ge' is ambiguous
 
+  res_i = vec_all_ge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_all_gt */
   res_i = vec_all_gt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -692,6 +669,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_gt' is ambiguous
 
+  res_i = vec_all_gt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_all_le */
   res_i = vec_all_le(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -728,6 +709,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_le' is ambiguous
 
+  res_i = vec_all_le(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_all_lt */
   res_i = vec_all_lt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -764,6 +749,14 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_all_lt' is ambiguous
 
+  res_i = vec_all_lt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
+  res_i = vec_all_nan(vda);
+// CHECK: @llvm.ppc.vsx.xvcmpeqdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpeqdp.p
+
   /* vec_any_ge */
   res_i = vec_any_ge(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -800,6 +793,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_ge' is ambiguous
 
+  res_i = vec_any_ge(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_any_gt */
   res_i = vec_any_gt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -836,6 +833,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_gt' is ambiguous
 
+  res_i = vec_any_gt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_any_le */
   res_i = vec_any_le(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -872,6 +873,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_le' is ambiguous
 
+  res_i = vec_any_le(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgedp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgedp.p
+
   /* vec_any_lt */
   res_i = vec_any_lt(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vcmpgtsd.p
@@ -908,6 +913,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vcmpgtud.p
 // CHECK-PPC: error: call to 'vec_any_lt' is ambiguous
 
+  res_i = vec_any_lt(vda, vda);
+// CHECK: @llvm.ppc.vsx.xvcmpgtdp.p
+// CHECK-LE: @llvm.ppc.vsx.xvcmpgtdp.p
+
   /* vec_max */
   res_vsll = vec_max(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vmaxsd
@@ -939,6 +948,15 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vmaxud
 // CHECK-PPC: error: call to 'vec_max' is ambiguous
 
+  /* vec_mergeh */
+  res_vbll = vec_mergeh(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
+  res_vbll = vec_mergel(vbll, vbll);
+// CHECK: @llvm.ppc.altivec.vperm
+// CHECK-LE: @llvm.ppc.altivec.vperm
+
   /* vec_min */
   res_vsll = vec_min(vsll, vsll);
 // CHECK: @llvm.ppc.altivec.vminsd
@@ -1058,6 +1076,28 @@ void test1() {
 // CHECK-LE: ashr <2 x i64>
 // CHECK-PPC: error: call to 'vec_sra' is ambiguous
 
+  /* vec_splats */
+  res_vsll = vec_splats(sll);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vull = vec_splats(ull);
+// CHECK: insertelement <2 x i64>
+// CHECK-LE: insertelement <2 x i64>
+
+  res_vsx = vec_splats(sx);
+// CHECK: insertelement <1 x i128>
+// CHECK-LE: insertelement <1 x i128>
+
+  res_vux = vec_splats(ux);
+// CHECK: insertelement <1 x i128>
+// CHECK-LE: insertelement <1 x i128>
+
+  res_vd = vec_splats(d);
+// CHECK: insertelement <2 x double>
+// CHECK-LE: insertelement <2 x double>
+
+
   /* vec_unpackh */
   res_vsll = vec_unpackh(vsi);
 // CHECK: llvm.ppc.altivec.vupkhsw
@@ -1177,13 +1217,7 @@ void test1() {
 // CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK-PPC: warning: implicit declaration of function 'vec_nand' is invalid in C99
 
-  res_vsc = vec_nand(vsc, vbc);
-// CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-// CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  
-  res_vsc = vec_nand(vbc, vsc);
+  res_vsc = vec_nand(vbc, vbc);
 // CHECK: [[T1:%.+]] = and <16 x i8>
 // CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK-LE: [[T1:%.+]] = and <16 x i8>
@@ -1193,18 +1227,6 @@ void test1() {
 // CHECK: [[T1:%.+]] = and <16 x i8>
 // CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  
-  res_vuc = vec_nand(vuc, vbc);
-// CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-// CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-  
-  res_vuc = vec_nand(vbc, vuc);
-// CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-// CHECK-LE: [[T1:%.+]] = and <16 x i8>
 // CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   
   res_vss = vec_nand(vss, vss);
@@ -1213,13 +1235,7 @@ void test1() {
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
 // CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 
-  res_vss = vec_nand(vss, vbs);
-// CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-// CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-
-  res_vss = vec_nand(vbs, vss);
+  res_vss = vec_nand(vbs, vbs);
 // CHECK: [[T1:%.+]] = and <8 x i16>
 // CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
@@ -1229,18 +1245,6 @@ void test1() {
 // CHECK: [[T1:%.+]] = and <8 x i16>
 // CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-
-  res_vus = vec_nand(vus, vbs);
-// CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-// CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-
-  res_vus = vec_nand(vbs, vus);
-// CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-// CHECK-LE: [[T1:%.+]] = and <8 x i16>
 // CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 
   res_vsi = vec_nand(vsi, vsi);
@@ -1249,13 +1253,7 @@ void test1() {
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
 // CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
 
-  res_vsi = vec_nand(vsi, vbi);
-// CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-// CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-
-  res_vsi = vec_nand(vbi, vsi);
+  res_vsi = vec_nand(vbi, vbi);
 // CHECK: [[T1:%.+]] = and <4 x i32>
 // CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
@@ -1265,18 +1263,6 @@ void test1() {
 // CHECK: [[T1:%.+]] = and <4 x i32>
 // CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-
-  res_vui = vec_nand(vui, vbi);
-// CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-// CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-
-  res_vui = vec_nand(vbi, vui);
-// CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
-// CHECK-LE: [[T1:%.+]] = and <4 x i32>
 // CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
 
   res_vsll = vec_nand(vsll, vsll);
@@ -1285,13 +1271,7 @@ void test1() {
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
 // CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
 
-  res_vsll = vec_nand(vsll, vbll);
-// CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-// CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-
-  res_vsll = vec_nand(vbll, vsll);
+  res_vsll = vec_nand(vbll, vbll);
 // CHECK: [[T1:%.+]] = and <2 x i64>
 // CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
@@ -1301,18 +1281,6 @@ void test1() {
 // CHECK: [[T1:%.+]] = and <2 x i64>
 // CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-
-  res_vull = vec_nand(vull, vbll);
-// CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-// CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-
-  res_vull = vec_nand(vbll, vull);
-// CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
-// CHECK-LE: [[T1:%.+]] = and <2 x i64>
 // CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
 
   /* vec_orc */
@@ -1351,6 +1319,12 @@ void test1() {
 // CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
 // CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
+
+  res_vbc = vec_orc(vbc, vbc);
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: or <16 x i8> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
 
   res_vss = vec_orc(vss, vss);
@@ -1387,6 +1361,12 @@ void test1() {
 // CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
 // CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
+
+  res_vbs = vec_orc(vbs, vbs);
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: or <8 x i16> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]
 
   res_vsi = vec_orc(vsi, vsi);
@@ -1423,6 +1403,12 @@ void test1() {
 // CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
 // CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
+
+  res_vbi = vec_orc(vbi, vbi);
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: or <4 x i32> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]
 
   res_vsll = vec_orc(vsll, vsll);
@@ -1461,6 +1447,33 @@ void test1() {
 // CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
 
+  res_vbll = vec_orc(vbll, vbll);
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: or <2 x i64> {{%.+}}, [[T1]]
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]
+
+  /* vec_sub */
+  res_vsll = vec_sub(vsll, vsll);
+// CHECK: sub <2 x i64>
+// CHECK-LE: sub <2 x i64>
+
+  res_vull = vec_sub(vull, vull);
+// CHECK: sub <2 x i64>
+// CHECK-LE: sub <2 x i64>
+
+  res_vd = vec_sub(vda, vda);
+// CHECK: fsub <2 x double>
+// CHECK-LE: fsub <2 x double>
+
+  res_vsx = vec_sub(vsx, vsx);
+// CHECK: sub <1 x i128>
+// CHECK-LE: sub <1 x i128>
+
+  res_vux = vec_sub(vux, vux);
+// CHECK: sub <1 x i128>
+// CHECK-LE: sub <1 x i128>
+
   /* vec_vbpermq */
   res_vsll = vec_vbpermq(vsc, vsc);
 // CHECK: llvm.ppc.altivec.vbpermq
@@ -1480,4 +1493,14 @@ void test1() {
 // CHECK: llvm.ppc.altivec.vgbbd
 // CHECK-LE: llvm.ppc.altivec.vgbbd
 // CHECK-PPC: warning: implicit declaration of function 'vec_vgbbd'
+
+  res_vuc = vec_gb(vuc);
+// CHECK: llvm.ppc.altivec.vgbbd
+// CHECK-LE: llvm.ppc.altivec.vgbbd
+// CHECK-PPC: warning: implicit declaration of function 'vec_gb'
+
+  res_vull = vec_bperm(vux, vux);
+// CHECK: llvm.ppc.altivec.vbpermq
+// CHECK-LE: llvm.ppc.altivec.vbpermq
+// CHECK-PPC: warning: implicit declaration of function 'vec_bperm'
 }
diff --git a/test/CodeGen/builtins-ppc.c b/test/CodeGen/builtins-ppc.c
index 9ef5e3708f58..1f17787ad9c8 100644
--- a/test/CodeGen/builtins-ppc.c
+++ b/test/CodeGen/builtins-ppc.c
@@ -7,3 +7,10 @@ void test_eh_return_data_regno()
   res = __builtin_eh_return_data_regno(0);  // CHECK: store volatile i32 3
   res = __builtin_eh_return_data_regno(1);  // CHECK: store volatile i32 4
 }
+
+// CHECK-LABEL: define i64 @test_builtin_ppc_get_timebase
+long long test_builtin_ppc_get_timebase() {
+  // CHECK: call i64 @llvm.readcyclecounter()
+  return __builtin_ppc_get_timebase();
+}
+
diff --git a/test/CodeGen/builtins-wasm.c b/test/CodeGen/builtins-wasm.c
new file mode 100644
index 000000000000..15f2e9dbf324
--- /dev/null
+++ b/test/CodeGen/builtins-wasm.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -O3 -emit-llvm -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -O3 -emit-llvm -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
+
+__SIZE_TYPE__ f1(void) {
+  return __builtin_wasm_memory_size();
+// WEBASSEMBLY32: call {{i.*}} @llvm.wasm.memory.size.i32()
+// WEBASSEMBLY64: call {{i.*}} @llvm.wasm.memory.size.i64()
+}
+
+void f2(long delta) {
+  __builtin_wasm_grow_memory(delta);
+// WEBASSEMBLY32: call void @llvm.wasm.grow.memory.i32(i32 %{{.*}})
+// WEBASSEMBLY64: call void @llvm.wasm.grow.memory.i64(i64 %{{.*}})
+}
diff --git a/test/CodeGen/builtins-x86.c b/test/CodeGen/builtins-x86.c
index a239889e2b49..83b11a023a25 100644
--- a/test/CodeGen/builtins-x86.c
+++ b/test/CodeGen/builtins-x86.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -fsyntax-only -o %t %s
+// RUN: %clang_cc1 -DUSE_64 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -emit-llvm -o %t %s
+// RUN: %clang_cc1 -DUSE_ALL -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -fsyntax-only -o %t %s
 
 #ifdef USE_ALL
 #define USE_3DNOW
@@ -42,7 +42,7 @@ void f0() {
   signed int          tmp_i;
   unsigned int        tmp_Ui;
   signed long long    tmp_LLi;
-//  unsigned long long  tmp_ULLi;
+  unsigned long long  tmp_ULLi;
   float               tmp_f;
   double              tmp_d;
 
@@ -102,6 +102,9 @@ void f0() {
   const V4d* tmp_V4dCp;
   const V8f* tmp_V8fCp;
 
+  tmp_V2LLi = __builtin_ia32_undef128();
+  tmp_V4LLi = __builtin_ia32_undef256();
+
   tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
   tmp_i = __builtin_ia32_comile(tmp_V4f, tmp_V4f);
@@ -264,6 +267,20 @@ void f0() {
   (void)__builtin_ia32_fxsave64(tmp_vp);
   (void)__builtin_ia32_fxrstor(tmp_vp);
   (void)__builtin_ia32_fxrstor64(tmp_vp);
+
+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);
+  (void)__builtin_ia32_xsaves64(tmp_vp, tmp_ULLi);
+
   tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i);
   tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f);
   tmp_i = __builtin_ia32_cvtss2si(tmp_V4f);
@@ -369,12 +386,6 @@ void f0() {
   tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
   tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
   tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
-  tmp_V4i = __builtin_ia32_pmovsxbd128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovsxbq128(tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmovsxbw128(tmp_V16c);
-  tmp_V2LLi = __builtin_ia32_pmovsxdq128(tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmovsxwd128(tmp_V8s);
-  tmp_V2LLi = __builtin_ia32_pmovsxwq128(tmp_V8s);
   tmp_V4i = __builtin_ia32_pmovzxbd128(tmp_V16c);
   tmp_V2LLi = __builtin_ia32_pmovzxbq128(tmp_V16c);
   tmp_V8s = __builtin_ia32_pmovzxbw128(tmp_V16c);
@@ -454,14 +465,14 @@ void f0() {
   __builtin_ia32_movntdq256(tmp_V4LLip, tmp_V4LLi);
   __builtin_ia32_movntpd256(tmp_dp, tmp_V4d);
   __builtin_ia32_movntps256(tmp_fp, tmp_V8f);
-  tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2d);
-  tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4f);
-  tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4d);
-  tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8f);
-  __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2d, tmp_V2d);
-  __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4f, tmp_V4f);
-  __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4d, tmp_V4d);
-  __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8f, tmp_V8f);
+  tmp_V2d = __builtin_ia32_maskloadpd(tmp_V2dCp, tmp_V2LLi);
+  tmp_V4f = __builtin_ia32_maskloadps(tmp_V4fCp, tmp_V4i);
+  tmp_V4d = __builtin_ia32_maskloadpd256(tmp_V4dCp, tmp_V4LLi);
+  tmp_V8f = __builtin_ia32_maskloadps256(tmp_V8fCp, tmp_V8i);
+  __builtin_ia32_maskstorepd(tmp_V2dp, tmp_V2LLi, tmp_V2d);
+  __builtin_ia32_maskstoreps(tmp_V4fp, tmp_V4i, tmp_V4f);
+  __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4LLi, tmp_V4d);
+  __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8i, tmp_V8f);
 
 #ifdef USE_3DNOW
   tmp_V8c = __builtin_ia32_pavgusb(tmp_V8c, tmp_V8c);
diff --git a/test/CodeGen/c-strings.c b/test/CodeGen/c-strings.c
index 974eeea87677..4e14d988a5dc 100644
--- a/test/CodeGen/c-strings.c
+++ b/test/CodeGen/c-strings.c
@@ -15,6 +15,11 @@
 // MSABI: @f4.x = internal global %struct.s { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @"\01??_C@_05CJBACGMB@hello?$AA@", i32 0, i32 0) }
 // CHECK: @x = global [3 x i8] c"ola", align [[ALIGN]]
 
+// XFAIL: hexagon
+// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which
+// fails the check for "@f3.x = ... align [ALIGN]", since ALIGN is derived
+// from the alignment of a single i8, which is still 1.
+
 #if defined(__s390x__)
 unsigned char align = 2;
 #else
diff --git a/test/CodeGen/c-unicode.c b/test/CodeGen/c-unicode.c
new file mode 100644
index 000000000000..13d4bbfb71e8
--- /dev/null
+++ b/test/CodeGen/c-unicode.c
@@ -0,0 +1,8 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang --target=x86_64--linug-gnu -S %s -o - | FileCheck %s -check-prefix=ALLOWED
+// RUN: not %clang --target=x86_64--linux-gnu -std=c89 -S %s -o - 2>&1 | FileCheck %s -check-prefix=DENIED
+int \uaccess = 0;
+// ALLOWED: "곎ss":
+// ALLOWED-NOT: "\uaccess":
+// DENIED: warning: universal character names are only valid in C99 or C++; treating as '\' followed by identifier [-Wunicode]
+// DENIED: error: expected identifier or '('
diff --git a/test/CodeGen/c11atomics-ios.c b/test/CodeGen/c11atomics-ios.c
index a869982b17b0..fb731dfd7a1f 100644
--- a/test/CodeGen/c11atomics-ios.c
+++ b/test/CodeGen/c11atomics-ios.c
@@ -103,21 +103,21 @@ void testStruct(_Atomic(S) *fp) {
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
@@ -157,29 +157,29 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   _Atomic(PS) x = (PS){1,2,3};
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[APS]]* [[T0]] to i64*
 // CHECK-NEXT: [[T2:%.*]] = load atomic i64, i64* [[T1]] seq_cst, align 8
 // CHECK-NEXT: [[T3:%.*]] = bitcast [[APS]]* [[TMP0]] to i64*
-// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 2
+// CHECK-NEXT: store i64 [[T2]], i64* [[T3]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[PS]]* [[F]] to i8*
 // CHECK-NEXT: [[T2:%.*]] = bitcast [[PS]]* [[T0]] to i8*
@@ -202,11 +202,120 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: ret void
 }
 
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL:%.*]] = load atomic i64, i64* [[ADDR64]] seq_cst, align 8
+  // CHECK:   store i64 [[VAL]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
 
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8
+
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8
+  // CHECK:   [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK:   define zeroext i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[RES_ADDR:%.*]] = alloca i8, align 1
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]] = load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8
+  // CHECK:   [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst
+  // CHECK:   [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0
+  // CHECK:   [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1
+  // CHECK:   br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}}
+
+  // CHECK:   store i64 [[RES_VAL64]], i64* [[ATOMIC_DESIRED64]], align 8
+  // CHECK:   br label {{%.*}}
+
+  // CHECK:   [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8
+  // CHECK:   store i8 [[RES_BOOL8]], i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RES_BOOL8:%.*]] = load i8, i8* [[RES_ADDR]], align 1
+  // CHECK:   [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1
+  // CHECK:   ret i1 [[RETVAL]]
+
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
diff --git a/test/CodeGen/c11atomics.c b/test/CodeGen/c11atomics.c
index d1e4478d7ecd..ccb642174c94 100644
--- a/test/CodeGen/c11atomics.c
+++ b/test/CodeGen/c11atomics.c
@@ -254,21 +254,21 @@ void testStruct(_Atomic(S) *fp) {
 
 // CHECK-NEXT: [[P:%.*]] = load [[S]]*, [[S]]** [[FP]]
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[P]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   __c11_atomic_init(fp, (S){1,2,3,4});
 
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T0]], align 8
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T0]], align 2
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T0]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T0]], align 4
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[S]], [[S]]* [[X]], i32 0, i32 3
 // CHECK-NEXT: store i16 4, i16* [[T0]], align 2
   _Atomic(S) x = (S){1,2,3,4};
@@ -310,22 +310,22 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[P]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   __c11_atomic_init(fp, (PS){1,2,3});
 
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[APS]]* [[X]] to i8*
 // CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[T0]], i8 0, i32 8, i32 8, i1 false)
 // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [[APS]], [[APS]]* [[X]], i32 0, i32 0
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 0
-// CHECK-NEXT: store i16 1, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 1, i16* [[T1]], align 8
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 1
 // CHECK-NEXT: store i16 2, i16* [[T1]], align 2
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[PS]], [[PS]]* [[T0]], i32 0, i32 2
-// CHECK-NEXT: store i16 3, i16* [[T1]], align 2
+// CHECK-NEXT: store i16 3, i16* [[T1]], align 4
   _Atomic(PS) x = (PS){1,2,3};
 
 // CHECK-NEXT: [[T0:%.*]] = load [[APS]]*, [[APS]]** [[FP]]
@@ -367,14 +367,110 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 // CHECK-NEXT: ret void
 }
 
-// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]*
+PS test_promoted_load(_Atomic(PS) *addr) {
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
 
-// FIXME: none of these look right, but we can leave the "test" here
-// to make sure they at least don't crash.
-void testPromotedStructOps(_Atomic(PS) *p) {
-  PS a = __c11_atomic_load(p, 5);
-  __c11_atomic_store(p, a, 5);
-  PS b = __c11_atomic_exchange(p, a, 5);
-  _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5);
-  v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5);
+  return __c11_atomic_load(addr, 5);
+}
+
+void test_promoted_store(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  __c11_atomic_store(addr, *val, 5);
+}
+
+PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64*
+  // CHECK:   [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5)
+  // CHECK:   store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8
+  // CHECK:   [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS*
+  // CHECK:   [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8*
+  // CHECK:   [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false)
+  return __c11_atomic_exchange(addr, *val, 5);
+}
+
+_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) {
+  // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 {
+  // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
+  // CHECK:   [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4
+  // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2
+  // CHECK:   [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
+  // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
+  // CHECK:   [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4
+  // CHECK:   [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false)
+  // CHECK:   [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8*
+  // CHECK:   [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64*
+  // CHECK:   [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8*
+  // CHECK:   [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8*
+  // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false)
+  // CHECK:   [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64*
+  // CHECK:   [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8*
+  // CHECK:   [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8*
+  // CHECK:   [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2
+  // CHECK:   [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5)
+  // CHECK:   ret i1 [[RES]]
+  return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5);
 }
diff --git a/test/CodeGen/captured-statements.c b/test/CodeGen/captured-statements.c
index 53632ac45d72..607ec8e55f48 100644
--- a/test/CodeGen/captured-statements.c
+++ b/test/CodeGen/captured-statements.c
@@ -21,7 +21,7 @@ void test1() {
   // CHECK-1: %struct.anon = type { i32* }
   // CHECK-1: {{.+}} global float 3.0
   //
-  // CHECK-1: test1
+  // CHECK-1: @test1(
   // CHECK-1: alloca %struct.anon
   // CHECK-1: getelementptr inbounds %struct.anon, %struct.anon*
   // CHECK-1: store i32* %i
@@ -43,7 +43,7 @@ void test2(int x) {
     for (i = 0; i < x; i++)
       foo();
   }
-  // CHECK-2: test2
+  // CHECK-2: @test2(
   // CHECK-2-NOT: %i
   // CHECK-2: call void @[[HelperName:__captured_stmt[\.0-9]+]]
 }
@@ -60,7 +60,7 @@ void test3(int size) {
   {
     arr[2] = vla_arr[size - 1];
   }
-  // CHECK-3: test3
+  // CHECK-3: @test3(
   // CHECK-3: alloca [5 x i32]
   // CHECK-3: call void @__captured_stmt
 }
diff --git a/test/CodeGen/catch-undef-behavior.c b/test/CodeGen/catch-undef-behavior.c
index 78755360e103..c2f01ae1a66f 100644
--- a/test/CodeGen/catch-undef-behavior.c
+++ b/test/CodeGen/catch-undef-behavior.c
@@ -1,8 +1,7 @@
-// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
-// RUN: %clang_cc1 -fsanitize-trap=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
+// RUN: %clang_cc1 -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-UBSAN
+// RUN: %clang_cc1 -fsanitize-trap=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize=alignment,null,object-size,shift-base,shift-exponent,return,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -fsanitize-recover=alignment,null,object-size,shift-base,shift-exponent,signed-integer-overflow,vla-bound,float-cast-overflow,integer-divide-by-zero,bool,returns-nonnull-attribute,nonnull-attribute -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-TRAP
 // RUN: %clang_cc1 -fsanitize=null -fsanitize-recover=null -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-NULL
 // RUN: %clang_cc1 -fsanitize=signed-integer-overflow -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-OVERFLOW
-// REQUIRES: asserts
 
 // CHECK-UBSAN: @[[INT:.*]] = private unnamed_addr constant { i16, i16, [6 x i8] } { i16 0, i16 11, [6 x i8] c"'int'\00" }
 
@@ -19,6 +18,17 @@
 // CHECK-UBSAN: @[[LINE_700:.*]] = {{.*}}, i32 700, i32 14 {{.*}} @[[STRUCT_S]], i64 4, i8 3 }
 // CHECK-UBSAN: @[[LINE_800:.*]] = {{.*}}, i32 800, i32 12 {{.*}} @{{.*}} }
 // CHECK-UBSAN: @[[LINE_900:.*]] = {{.*}}, i32 900, i32 11 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1000:.*]] = {{.*}}, i32 1000, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[FP16:.*]] = private unnamed_addr constant { i16, i16, [9 x i8] } { i16 1, i16 16, [9 x i8] c"'__fp16'\00" }
+// CHECK-UBSAN: @[[LINE_1100:.*]] = {{.*}}, i32 1100, i32 8 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1200:.*]] = {{.*}}, i32 1200, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1300:.*]] = {{.*}}, i32 1300, i32 10 {{.*}} @{{.*}} }
+// CHECK-UBSAN: @[[LINE_1400:.*]] = {{.*}}, i32 1400, i32 10 {{.*}} @{{.*}} }
+// Make sure we check the fp16 type_mismatch data so we can easily match the signed char float_cast_overflow
+// CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} }
+// CHECK-UBSAN: @[[SCHAR:.*]] = private unnamed_addr constant { i16, i16, [14 x i8] } { i16 0, i16 7, [14 x i8] c"'signed char'\00" }
+// CHECK-UBSAN: @[[LINE_1500:.*]] = {{.*}}, i32 1500, i32 10 {{.*}} @[[FP16]], {{.*}} }
+// CHECK-UBSAN: @[[LINE_1600:.*]] = {{.*}}, i32 1600, i32 10 {{.*}} @{{.*}} }
 
 // CHECK-NULL: @[[LINE_100:.*]] = private unnamed_addr global {{.*}}, i32 100, i32 5 {{.*}}
 
@@ -209,10 +219,11 @@ float int_float_overflow(unsigned __int128 n) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = icmp ule i128 %{{.*}}, -20282409603651670423947251286016
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1000]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1000
   return n;
 }
 
@@ -223,10 +234,11 @@ void int_fp16_overflow(int n, __fp16 *p) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1100]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1100
   *p = n;
 }
 
@@ -239,10 +251,11 @@ int float_int_overflow(float f) {
 
   // CHECK-UBSAN: %[[CAST:.*]] = bitcast float %[[F]] to i32
   // CHECK-UBSAN: %[[ARG:.*]] = zext i32 %[[CAST]] to i64
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow({{.*}}, i64 %[[ARG]]
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1200]] to i8*), i64 %[[ARG]]
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1200
   return f;
 }
 
@@ -255,12 +268,13 @@ int long_double_int_overflow(long double ld) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: store x86_fp80 %[[F]], x86_fp80* %[[ALLOCA:.*]], !nosanitize
+  // CHECK-UBSAN: store x86_fp80 %[[F]], x86_fp80* %[[ALLOCA:.*]], align 16, !nosanitize
   // CHECK-UBSAN: %[[ARG:.*]] = ptrtoint x86_fp80* %[[ALLOCA]] to i64
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow({{.*}}, i64 %[[ARG]]
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1300]] to i8*), i64 %[[ARG]]
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1300
   return ld;
 }
 
@@ -271,10 +285,11 @@ unsigned float_uint_overflow(float f) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1400]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1400
   return f;
 }
 
@@ -285,10 +300,11 @@ signed char fp16_char_overflow(__fp16 *p) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = and i1 %[[GE]], %[[LE]]
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1500]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1500
   return *p;
 }
 
@@ -301,10 +317,11 @@ float float_float_overflow(double f) {
   // CHECK-COMMON: %[[INBOUNDS:.*]] = xor i1 %[[OUTOFBOUNDS]], true
   // CHECK-COMMON-NEXT: br i1 %[[INBOUNDS]]
 
-  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(
+  // CHECK-UBSAN: call void @__ubsan_handle_float_cast_overflow(i8* bitcast ({{.*}} @[[LINE_1600]] to i8*),
 
   // CHECK-TRAP:      call void @llvm.trap() [[NR_NUW]]
   // CHECK-TRAP-NEXT: unreachable
+#line 1600
   return f;
 }
 
diff --git a/test/CodeGen/cfi-icall-cross-dso.c b/test/CodeGen/cfi-icall-cross-dso.c
new file mode 100644
index 000000000000..9337b183c2e4
--- /dev/null
+++ b/test/CodeGen/cfi-icall-cross-dso.c
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -O1 -fsanitize=cfi-icall -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -O1 -fsanitize=cfi-icall  -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s
+
+void caller(void (*f)()) {
+  f();
+}
+
+static void g(void) {}
+void h(void);
+
+typedef void (*Fn)(void);
+Fn g1() {
+  return &g;
+}
+Fn h1() {
+  return &h;
+}
+
+inline void foo() {}
+void bar() { foo(); }
+
+// ITANIUM: call i1 @llvm.bitset.test(i8* %{{.*}}, metadata !"_ZTSFvE"), !nosanitize
+// ITANIUM: call void @__cfi_slowpath(i64 6588678392271548388, i8* %{{.*}}) {{.*}}, !nosanitize
+
+// MS: call i1 @llvm.bitset.test(i8* %{{.*}}, metadata !"?6AX@Z"), !nosanitize
+// MS: call void @__cfi_slowpath(i64 4195979634929632483, i8* %{{.*}}) {{.*}}, !nosanitize
+
+// ITANIUM: define available_externally void @foo()
+// MS: define linkonce_odr void @foo()
+
+// Check that we emit both string and hash based bit set entries for static void g(),
+// and don't emit them for the declaration of h().
+
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+// CHECK: !{!"{{.*}}", void ()* @g, i64 0}
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+// CHECK: !{i64 {{.*}}, void ()* @g, i64 0}
+// CHECK-NOT: !{!"{{.*}}", void ()* @h, i64 0}
+
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+// ITANIUM: !{!"_ZTSFvE", void ()* @bar, i64 0}
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+// ITANIUM: !{i64 6588678392271548388, void ()* @bar, i64 0}
+// ITANIUM-NOT: !{!{{.*}}, void ()* @foo,
+
+// MS: !{!"?6AX@Z", void ()* @foo, i64 0}
+// MS: !{i64 4195979634929632483, void ()* @foo, i64 0}
+
+// CHECK: !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/CodeGen/cfi-icall.c b/test/CodeGen/cfi-icall.c
new file mode 100644
index 000000000000..d6cebef49a14
--- /dev/null
+++ b/test/CodeGen/cfi-icall.c
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=MS %s
+
+// Tests that we assign appropriate identifiers to unprototyped functions.
+
+void f() {
+}
+
+void xf();
+
+void g(int b) {
+  void (*fp)() = b ? f : xf;
+  // ITANIUM: call i1 @llvm.bitset.test(i8* {{.*}}, metadata !"_ZTSFvE")
+  fp();
+}
+
+// ITANIUM-DAG: !{!"_ZTSFvE", void ()* @f, i64 0}
+// ITANIUM-DAG: !{!"_ZTSFvE", void (...)* @xf, i64 0}
+// MS-DAG: !{!"?6AX@Z", void ()* @f, i64 0}
+// MS-DAG: !{!"?6AX@Z", void (...)* @xf, i64 0}
diff --git a/test/CodeGen/cleanup-destslot-simple.c b/test/CodeGen/cleanup-destslot-simple.c
index b8328af83d2a..a1c5640fcd85 100644
--- a/test/CodeGen/cleanup-destslot-simple.c
+++ b/test/CodeGen/cleanup-destslot-simple.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=LIFETIME
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=LIFETIME
 
 // We shouldn't have markers at -O0 or with msan.
-// RUN: %clang_cc1 -O0 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s --check-prefix=CHECK
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - -fsanitize=memory | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -O0 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s --check-prefix=CHECK
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - -fsanitize=memory | FileCheck %s --check-prefix=CHECK
 
 // There is no exception to handle here, lifetime.end is not a destructor,
 // so there is no need have cleanup dest slot related code
diff --git a/test/CodeGen/complex-convert.c b/test/CodeGen/complex-convert.c
index c65a98cec2be..5d2e9d706bdc 100644
--- a/test/CodeGen/complex-convert.c
+++ b/test/CodeGen/complex-convert.c
@@ -31,6 +31,8 @@ void foo(signed char sc, unsigned char uc, signed long long sll,
   // CHECK: alloca i[[CHSIZE]], align [[CHALIGN:[0-9]+]]
   // CHECK: alloca i[[LLSIZE]], align [[LLALIGN:[0-9]+]]
 
+  // CHECK: store i64 %ull,
+
   sc1 = csc;
   // CHECK: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds { i[[CHSIZE]], i[[CHSIZE]]  }, { i[[CHSIZE]], i[[CHSIZE]]  }* %[[CSC:[A-Za-z0-9.]+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = load i[[CHSIZE]], i[[CHSIZE]]* %[[VAR1]]
diff --git a/test/CodeGen/complex-math.c b/test/CodeGen/complex-math.c
index 36ef271b0ae0..96c7ad9cdbc9 100644
--- a/test/CodeGen/complex-math.c
+++ b/test/CodeGen/complex-math.c
@@ -3,6 +3,7 @@
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple i686-unknown-unknown -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple powerpc-unknown-unknown -o - | FileCheck %s --check-prefix=PPC
 // RUN: %clang_cc1 %s -O1 -emit-llvm -triple armv7-none-linux-gnueabihf -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 %s -O1 -emit-llvm -triple thumbv7k-apple-watchos2.0 -o - -target-abi aapcs16 | FileCheck %s --check-prefix=ARM7K
 
 float _Complex add_float_rr(float a, float b) {
   // X86-LABEL: @add_float_rr(
@@ -477,5 +478,8 @@ _Bool ne_float_cc(float _Complex a, float _Complex b) {
 _Complex double foo(_Complex double a, _Complex double b) {
   // ARM-LABEL: @foo(
   // ARM: call arm_aapcscc { double, double } @__muldc3
+
+  // ARM7K-LABEL: @foo(
+  // ARM7K: call { double, double } @__muldc3
   return a*b;
 }
diff --git a/test/CodeGen/debug-info-257-args.c b/test/CodeGen/debug-info-257-args.c
index c6ffa6e9bfb9..ce8d093f1fe7 100644
--- a/test/CodeGen/debug-info-257-args.c
+++ b/test/CodeGen/debug-info-257-args.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -x c++ -g -emit-llvm -triple x86_64-linux-gnu -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -debug-info-kind=limited -emit-llvm -triple x86_64-linux-gnu -o - %s | FileCheck %s
 // PR23332
 
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 255
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 256
-// CHECK: DILocalVariable(tag: DW_TAG_arg_variable, arg: 257
+// CHECK: DILocalVariable(arg: 255
+// CHECK: DILocalVariable(arg: 256
+// CHECK: DILocalVariable(arg: 257
 void fn1(int, int, int, int, int, int, int, int, int, int, int, int, int, int,
          int, int, int, int, int, int, int, int, int, int, int, int, int, int,
          int, int, int, int, int, int, int, int, int, int, int, int, int, int,
diff --git a/test/CodeGen/debug-info-args.c b/test/CodeGen/debug-info-args.c
index 47c904b99951..ce21e7c01b46 100644
--- a/test/CodeGen/debug-info-args.c
+++ b/test/CodeGen/debug-info-args.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 
 int somefunc(char *x, int y, double z) {
   
diff --git a/test/CodeGen/debug-info-block-decl.c b/test/CodeGen/debug-info-block-decl.c
index 5476d8874bf4..e45a2d8ad474 100644
--- a/test/CodeGen/debug-info-block-decl.c
+++ b/test/CodeGen/debug-info-block-decl.c
@@ -1,11 +1,11 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -fblocks -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -fblocks -emit-llvm -o - %s | FileCheck %s
 // Assignment and block entry should point to the same line.
 // rdar://problem/14039866
 
 // CHECK: define{{.*}}@main()
 // CHECK: store{{.*}}bitcast{{.*}}, !dbg ![[ASSIGNMENT:[0-9]+]]
 // CHECK: define {{.*}} @__main_block_invoke
-// CHECK: dbg ![[BLOCK_ENTRY:[0-9]+]]
+// CHECK: , !dbg ![[BLOCK_ENTRY:[0-9]+]]
 
 int main()
 {
diff --git a/test/CodeGen/debug-info-block-out-return.c b/test/CodeGen/debug-info-block-out-return.c
index e0e5bd91249a..428a50c77a06 100644
--- a/test/CodeGen/debug-info-block-out-return.c
+++ b/test/CodeGen/debug-info-block-out-return.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -fblocks -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -fblocks -emit-llvm -o - %s | FileCheck %s
 
 // Check that arg numbering is not affected by LLVM IR argument numbering -
 // since the latter is affected by return-by-out-parameter ABI requirements
@@ -11,8 +11,8 @@
 // out of order or not at all (the latter would occur if they were both assigned
 // the same argument number by mistake).
 
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor", arg: 1,{{.*}}line: 2,
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: "param", arg: 2,{{.*}}line: 2,
+// CHECK: !DILocalVariable(name: ".block_descriptor", arg: 1,{{.*}}line: 2,
+// CHECK: !DILocalVariable(name: "param", arg: 2,{{.*}}line: 2,
 
 // Line directive so we don't have to worry about how many lines preceed the
 // test code (as the line number is mangled in with the argument number as shown
diff --git a/test/CodeGen/debug-info-block.c b/test/CodeGen/debug-info-block.c
index 48a9f66877e1..d23095ba43e9 100644
--- a/test/CodeGen/debug-info-block.c
+++ b/test/CodeGen/debug-info-block.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // Verify that the desired debugging type is generated for a structure
 // member that is a pointer to a block.
 
diff --git a/test/CodeGen/debug-info-compilation-dir.c b/test/CodeGen/debug-info-compilation-dir.c
index 4b472991498c..be2cc3542df0 100644
--- a/test/CodeGen/debug-info-compilation-dir.c
+++ b/test/CodeGen/debug-info-compilation-dir.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -fdebug-compilation-dir /nonsense -emit-llvm -g %s -o - | FileCheck -check-prefix=CHECK-NONSENSE %s
+// RUN: %clang_cc1 -fdebug-compilation-dir /nonsense -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-NONSENSE %s
 // CHECK-NONSENSE: nonsense
 
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck -check-prefix=CHECK-DIR %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck -check-prefix=CHECK-DIR %s
 // CHECK-DIR: CodeGen
 
diff --git a/test/CodeGen/debug-info-crash.c b/test/CodeGen/debug-info-crash.c
index f04548b8d5f8..9214909f5877 100644
--- a/test/CodeGen/debug-info-crash.c
+++ b/test/CodeGen/debug-info-crash.c
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple i386-apple-darwin10 -fblocks -g -S %s -o -
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -fblocks -debug-info-kind=limited -S %s -o -
 
 // rdar://7590323
 typedef struct dispatch_queue_s *dispatch_queue_t;
diff --git a/test/CodeGen/debug-info-enum.c b/test/CodeGen/debug-info-enum.c
index 4474e4000e95..5454eb562e19 100644
--- a/test/CodeGen/debug-info-enum.c
+++ b/test/CodeGen/debug-info-enum.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "e"
 // CHECK-SAME:             elements: [[TEST3_ENUMS:![0-9]*]]
diff --git a/test/CodeGen/debug-info-gline-tables-only.c b/test/CodeGen/debug-info-gline-tables-only.c
index 067d8e772189..e890dbb0da20 100644
--- a/test/CodeGen/debug-info-gline-tables-only.c
+++ b/test/CodeGen/debug-info-gline-tables-only.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -gline-tables-only -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=line-tables-only -S -emit-llvm -o - | FileCheck %s
 // Checks that clang with "-gline-tables-only" doesn't emit debug info
 // for variables and types.
 
@@ -23,9 +23,8 @@ struct S {
 // CHECK-NOT: DW_TAG_enumeration_type
 enum E { ZERO = 0, ONE = 1 };
 
-// CHECK-NOT: DW_TAG_arg_variable
+// CHECK-NOT: DILocalVariable
 int sum(int p, int q) {
-  // CHECK-NOT: DW_TAG_auto_variable
   int r = p + q;
   struct S s;
   enum E e;
diff --git a/test/CodeGen/debug-info-gline-tables-only2.c b/test/CodeGen/debug-info-gline-tables-only2.c
index be457ab0b7ab..da17d41f3b13 100644
--- a/test/CodeGen/debug-info-gline-tables-only2.c
+++ b/test/CodeGen/debug-info-gline-tables-only2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -gline-tables-only -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=line-tables-only -S -emit-llvm -o - | FileCheck %s
 // Checks that clang with "-gline-tables-only" emits metadata for
 // compile unit, subprogram and file.
 
diff --git a/test/CodeGen/debug-info-line.c b/test/CodeGen/debug-info-line.c
index bc0d23a20003..24981b54b7e9 100644
--- a/test/CodeGen/debug-info-line.c
+++ b/test/CodeGen/debug-info-line.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -w -gline-tables-only -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -w -debug-info-kind=line-tables-only -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
 
 int f1(int a, int b) {
   // CHECK: icmp {{.*}}, !dbg [[DBG_F1:!.*]]
diff --git a/test/CodeGen/debug-info-line2.c b/test/CodeGen/debug-info-line2.c
index 893b021360fb..fbdc6b1bf950 100644
--- a/test/CodeGen/debug-info-line2.c
+++ b/test/CodeGen/debug-info-line2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-darwin-apple -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-darwin-apple -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // Radar 9199234
 
 int bar();
diff --git a/test/CodeGen/debug-info-line3.c b/test/CodeGen/debug-info-line3.c
index 8ba57e208c04..042571e790b2 100644
--- a/test/CodeGen/debug-info-line3.c
+++ b/test/CodeGen/debug-info-line3.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -S -emit-llvm %s -o - | FileCheck %s
 
 void func(char c, char* d)
 {
diff --git a/test/CodeGen/debug-info-member.c b/test/CodeGen/debug-info-member.c
index 43d26f858d2c..87d4a74cee25 100644
--- a/test/CodeGen/debug-info-member.c
+++ b/test/CodeGen/debug-info-member.c
@@ -1,3 +1,3 @@
-// RUN: %clang_cc1 -emit-llvm -g < %s | grep DW_TAG_member 
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited < %s | grep DW_TAG_member
 
 struct A { int x; } a;
diff --git a/test/CodeGen/debug-info-packed-struct.c b/test/CodeGen/debug-info-packed-struct.c
index 0b5226bf1131..189bbe47e370 100644
--- a/test/CodeGen/debug-info-packed-struct.c
+++ b/test/CodeGen/debug-info-packed-struct.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -x c -g -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
 
 // CHECK: %struct.layout0 = type { i8, %struct.size8, i8 }
 // CHECK: %struct.layout1 = type <{ i8, %struct.size8_anon, i8, [2 x i8] }>
diff --git a/test/CodeGen/debug-info-same-line.c b/test/CodeGen/debug-info-same-line.c
index 7b71f57a682f..a791222d504b 100644
--- a/test/CodeGen/debug-info-same-line.c
+++ b/test/CodeGen/debug-info-same-line.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -g -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -debug-info-kind=limited -o - | FileCheck %s
 // Here two temporary nodes are identical (but should not get uniqued) while
 // building the full debug type.
 typedef struct { long x; } foo; typedef struct {  foo *x; } bar;
diff --git a/test/CodeGen/debug-info-scope-file.c b/test/CodeGen/debug-info-scope-file.c
index 97063198a95c..296ec05826f5 100644
--- a/test/CodeGen/debug-info-scope-file.c
+++ b/test/CodeGen/debug-info-scope-file.c
@@ -1,12 +1,12 @@
-// RUN: %clang_cc1 -g -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm < %s | FileCheck %s
 
 // Check that, just because we emitted a function from a different file doesn't
 // mean we insert a file-change inside the next function.
 
 // CHECK: ret void, !dbg [[F1_LINE:![0-9]*]]
 // CHECK: ret void, !dbg [[F2_LINE:![0-9]*]]
-// CHECK: [[F1:![0-9]*]] = !DISubprogram(name: "f1",{{.*}} isDefinition: true
-// CHECK: [[F2:![0-9]*]] = !DISubprogram(name: "f2",{{.*}} isDefinition: true
+// CHECK: [[F1:![0-9]*]] = distinct !DISubprogram(name: "f1",{{.*}} isDefinition: true
+// CHECK: [[F2:![0-9]*]] = distinct !DISubprogram(name: "f2",{{.*}} isDefinition: true
 // CHECK: [[F1_LINE]] = !DILocation({{.*}}, scope: [[F1]])
 // CHECK: [[F2_LINE]] = !DILocation({{.*}}, scope: [[F2]])
 
diff --git a/test/CodeGen/debug-info-scope.c b/test/CodeGen/debug-info-scope.c
index aa6e5c1f2da0..a25f1177a911 100644
--- a/test/CodeGen/debug-info-scope.c
+++ b/test/CodeGen/debug-info-scope.c
@@ -1,25 +1,19 @@
-// RUN: %clang_cc1 -g -emit-llvm < %s | FileCheck %s
-// RUN: %clang_cc1 -gline-tables-only -emit-llvm < %s | FileCheck --check-prefix=GMLT %s
+// RUN: %clang_cc1 -dwarf-version=4 -debug-info-kind=limited -disable-llvm-passes -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -dwarf-version=4 -debug-info-kind=line-tables-only -disable-llvm-passes -emit-llvm < %s | FileCheck --check-prefix=GMLT %s
 // Two variables with same name in separate scope.
 // Radar 8330217.
 int main() {
 	int j = 0;
 	int k = 0;
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK: !DILocalVariable(name: "i"
 // CHECK-NEXT: !DILexicalBlock(
 
-// FIXME: Looks like we don't actually need both these lexical blocks (disc 2
-// just refers to disc 1, nothing actually uses disc 2).
-// GMLT-NOT: !DILexicalBlock
-// GMLT: !DILexicalBlockFile({{.*}}, discriminator: 2)
-// GMLT-NOT: !DILexicalBlock
-// GMLT: !DILexicalBlockFile({{.*}}, discriminator: 1)
 // Make sure we don't have any more lexical blocks because we don't need them in
 // -gmlt.
 // GMLT-NOT: !DILexicalBlock
 	for (int i = 0; i < 10; i++)
 		j++;
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+// CHECK: !DILocalVariable(name: "i"
 // CHECK-NEXT: !DILexicalBlock(
 // GMLT-NOT: !DILexicalBlock
 	for (int i = 0; i < 10; i++)
diff --git a/test/CodeGen/debug-info-static.c b/test/CodeGen/debug-info-static.c
index 115beaf84d40..fbe2a0098f70 100644
--- a/test/CodeGen/debug-info-static.c
+++ b/test/CodeGen/debug-info-static.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1  -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1  -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 
 // CHECK: !DIGlobalVariable({{.*}}variable: i32* @f.xyzzy
 void f(void)
diff --git a/test/CodeGen/debug-info-typedef.c b/test/CodeGen/debug-info-typedef.c
index 790e302984a9..ea3b549e63b4 100644
--- a/test/CodeGen/debug-info-typedef.c
+++ b/test/CodeGen/debug-info-typedef.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -I%p %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -I%p %s -o - | FileCheck %s
 // Test that the location of the typedef points to the header file.
 #line 1 "a.c"
 #line 2 "b.h"
diff --git a/test/CodeGen/debug-info-vector.c b/test/CodeGen/debug-info-vector.c
index 1075643cd602..6b27573578c2 100644
--- a/test/CodeGen/debug-info-vector.c
+++ b/test/CodeGen/debug-info-vector.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 typedef int v4si __attribute__((__vector_size__(16)));
 
 v4si a;
diff --git a/test/CodeGen/debug-info-vla.c b/test/CodeGen/debug-info-vla.c
index 175c24cfb9dc..371d1060228e 100644
--- a/test/CodeGen/debug-info-vla.c
+++ b/test/CodeGen/debug-info-vla.c
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 void testVLAwithSize(int s)
 {
 // CHECK: dbg.declare
 // CHECK: dbg.declare({{.*}}, metadata ![[VAR:.*]], metadata ![[EXPR:.*]])
-// CHECK: ![[VAR]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "vla",{{.*}} line: [[@LINE+2]]
+// CHECK: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+2]]
 // CHECK: ![[EXPR]] = !DIExpression(DW_OP_deref)
   int vla[s];
   int i;
diff --git a/test/CodeGen/debug-info.c b/test/CodeGen/debug-info.c
index 1a505ee8bfb8..d122e7fe5cc7 100644
--- a/test/CodeGen/debug-info.c
+++ b/test/CodeGen/debug-info.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unk-unk -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 
 // PR3023
 void convert(void) {
diff --git a/test/CodeGen/debug-line-1.c b/test/CodeGen/debug-line-1.c
index be1da0820931..56f447e1f0b7 100644
--- a/test/CodeGen/debug-line-1.c
+++ b/test/CodeGen/debug-line-1.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 // REQUIRES: asserts
 // PR9796
 
diff --git a/test/CodeGen/debug-prefix-map.c b/test/CodeGen/debug-prefix-map.c
new file mode 100644
index 000000000000..dfb57bbe2e4b
--- /dev/null
+++ b/test/CodeGen/debug-prefix-map.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-NO-MAIN-FILE-NAME
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var=empty %s -emit-llvm -o - | FileCheck %s -check-prefix CHECK-EVIL
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - -main-file-name debug-prefix-map.c | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=standalone -fdebug-prefix-map=%p=/var/empty %s -emit-llvm -o - -fdebug-compilation-dir %p | FileCheck %s -check-prefix CHECK-COMPILATION-DIR
+
+#include "Inputs/stdio.h"
+
+int main(int argc, char **argv) {
+  (void)argc;
+  (void)argv;
+  return 0;
+}
+
+void test_rewrite_includes() {
+  __builtin_va_list argp;
+  vprintf("string", argp);
+}
+
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{/|\\5C}}<stdin>"
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}"
+// CHECK-NO-MAIN-FILE-NAME: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-NO-MAIN-FILE-NAME-NOT: !DIFile(filename:
+
+// CHECK-EVIL: !DIFile(filename: "/var=empty{{[/\\]}}{{.*}}"
+// CHECK-EVIL: !DIFile(filename: "/var=empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-EVIL-NOT: !DIFile(filename:
+
+// CHECK: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}"
+// CHECK: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h"
+// CHECK-NOT: !DIFile(filename:
+
+// CHECK-COMPILATION-DIR: !DIFile(filename: "/var/empty{{[/\\]}}{{.*}}", directory: "/var/empty")
+// CHECK-COMPILATION-DIR: !DIFile(filename: "/var/empty{{[/\\]}}Inputs/stdio.h", directory: "/var/empty")
+// CHECK-COMPILATION-DIR-NOT: !DIFile(filename:
diff --git a/test/CodeGen/dwarf-version.c b/test/CodeGen/dwarf-version.c
index cb95f28bc735..2171ed6ad97d 100644
--- a/test/CodeGen/dwarf-version.c
+++ b/test/CodeGen/dwarf-version.c
@@ -1,11 +1,17 @@
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-2 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
+// RUN: %clang -target x86_64-linux-gnu -gdwarf-5 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5
 // RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
+// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
 // RUN: %clang -target x86_64-apple-darwin -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target powerpc-unknown-openbsd -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target powerpc-unknown-freebsd -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
 // RUN: %clang -target i386-pc-solaris -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER2
+//
+// Test what -gcodeview and -gdwarf do on Windows.
+// RUN: %clang -target i686-pc-windows-msvc -gcodeview -S -emit-llvm -o - %s | FileCheck %s --check-prefix=NODWARF --check-prefix=CODEVIEW
+// RUN: %clang -target i686-pc-windows-msvc -gdwarf -gcodeview -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4 --check-prefix=CODEVIEW
 int main (void) {
   return 0;
 }
@@ -13,3 +19,8 @@ int main (void) {
 // VER2: !{i32 2, !"Dwarf Version", i32 2}
 // VER3: !{i32 2, !"Dwarf Version", i32 3}
 // VER4: !{i32 2, !"Dwarf Version", i32 4}
+// VER5: !{i32 2, !"Dwarf Version", i32 5}
+
+// NODWARF-NOT: !"Dwarf Version"
+// CODEVIEW: !{i32 2, !"CodeView", i32 1}
+// NODWARF-NOT: !"Dwarf Version"
diff --git a/test/CodeGen/enable_if.c b/test/CodeGen/enable_if.c
new file mode 100644
index 000000000000..f863d80c14ab
--- /dev/null
+++ b/test/CodeGen/enable_if.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-linux-gnu | FileCheck %s
+
+// Verifying that we do, in fact, select the correct function in the following
+// cases.
+
+void foo(int m) __attribute__((overloadable, enable_if(m > 0, "")));
+void foo(int m) __attribute__((overloadable));
+
+// CHECK-LABEL: define void @test1
+void test1() {
+  // CHECK: store void (i32)* @_Z3fooi
+  void (*p)(int) = foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  void (*p2)(int) = &foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  p = foo;
+  // CHECK: store void (i32)* @_Z3fooi
+  p = &foo;
+
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  void *vp1 = (void*)&foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  void *vp2 = (void*)foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  vp1 = (void*)&foo;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3fooi to i8*)
+  vp1 = (void*)foo;
+}
+
+void bar(int m) __attribute__((overloadable, enable_if(m > 0, "")));
+void bar(int m) __attribute__((overloadable, enable_if(1, "")));
+// CHECK-LABEL: define void @test2
+void test2() {
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  void (*p)(int) = bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  void (*p2)(int) = &bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  p = bar;
+  // CHECK: store void (i32)* @_Z3barUa9enable_ifIXLi1EEEi
+  p = &bar;
+
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  void *vp1 = (void*)&bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  void *vp2 = (void*)bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  vp1 = (void*)&bar;
+  // CHECK: store i8* bitcast (void (i32)* @_Z3barUa9enable_ifIXLi1EEEi to i8*)
+  vp1 = (void*)bar;
+}
+
+void baz(int m) __attribute__((overloadable, enable_if(1, "")));
+void baz(int m) __attribute__((overloadable));
+// CHECK-LABEL: define void @test3
+void test3() {
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  void (*p)(int) = baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  void (*p2)(int) = &baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  p = baz;
+  // CHECK: store void (i32)* @_Z3bazUa9enable_ifIXLi1EEEi
+  p = &baz;
+}
+
+
+const int TRUEFACTS = 1;
+void qux(int m) __attribute__((overloadable, enable_if(1, ""),
+                               enable_if(TRUEFACTS, "")));
+void qux(int m) __attribute__((overloadable, enable_if(1, "")));
+// CHECK-LABEL: define void @test4
+void test4() {
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  void (*p)(int) = qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  void (*p2)(int) = &qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  p = qux;
+  // CHECK: store void (i32)* @_Z3quxUa9enable_ifIXLi1EEXL_Z9TRUEFACTSEEEi
+  p = &qux;
+}
diff --git a/test/CodeGen/enum2.c b/test/CodeGen/enum2.c
index 3203627b8983..9729ad0b4b06 100644
--- a/test/CodeGen/enum2.c
+++ b/test/CodeGen/enum2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown %s -g -emit-llvm -o /dev/null
+// RUN: %clang_cc1 -triple i386-unknown-unknown %s -debug-info-kind=limited -emit-llvm -o /dev/null
 int v;
 enum e { MAX };
 
diff --git a/test/CodeGen/exceptions-seh-finally.c b/test/CodeGen/exceptions-seh-finally.c
index 772e28306b58..f0ed22306401 100644
--- a/test/CodeGen/exceptions-seh-finally.c
+++ b/test/CodeGen/exceptions-seh-finally.c
@@ -23,11 +23,10 @@ void basic_finally(void) {
 // CHECK-NEXT: ret void
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
 // CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
 // CHECK: define internal void @"\01?fin$0@0@basic_finally@@"({{.*}})
 // CHECK: call void @cleanup()
@@ -90,11 +89,10 @@ void use_abnormal_termination(void) {
 // CHECK: ret void
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
 // CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK: call void @"\01?fin$0@0@use_abnormal_termination@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
 // CHECK: define internal void @"\01?fin$0@0@use_abnormal_termination@@"({{i8( zeroext)?}} %[[abnormal:abnormal_termination]], i8* %frame_pointer)
 // CHECK: %[[abnormal_zext:[^ ]*]] = zext i8 %[[abnormal]] to i32
@@ -134,10 +132,9 @@ void noreturn_finally() {
 // CHECK: ret void
 //
 // CHECK: [[lpad]]
-// CHECK: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
 // CHECK: call void @"\01?fin$0@0@noreturn_finally@@"({{.*}})
-// CHECK: resume { i8*, i32 }
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
 // CHECK: define internal void @"\01?fin$0@0@noreturn_finally@@"({{.*}})
 // CHECK: call void @abort()
@@ -179,9 +176,9 @@ int nested___finally___finally() {
 // CHECK-NEXT: ret i32 0
 //
 // CHECK: [[lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[pad:[^ ]*]] = cleanuppad
 // CHECK: call void @"\01?fin$0@0@nested___finally___finally@@"({{.*}})
+// CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally@@"({{.*}})
 // CHECK: ret void
@@ -189,6 +186,8 @@ int nested___finally___finally() {
 // CHECK-LABEL: define internal void @"\01?fin$1@0@nested___finally___finally@@"({{.*}})
 // CHECK: unreachable
 
+// FIXME: Our behavior seems suspiciously different.
+
 int nested___finally___finally_with_eh_edge() {
   __try {
     __try {
@@ -207,23 +206,24 @@ int nested___finally___finally_with_eh_edge() {
 //
 // [[invokecont]]
 // CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"({{.*}})
-// CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
+// CHECK-NEXT:       to label %[[outercont:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
 //
 // CHECK: [[outercont]]
 // CHECK: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
 // CHECK-NEXT: ret i32 912
 //
 // CHECK: [[lpad1]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[innerpad:[^ ]*]] = cleanuppad
 // CHECK: invoke void @"\01?fin$1@0@nested___finally___finally_with_eh_edge@@"({{.*}})
-// CHECK:          to label %[[outercont:[^ ]*]] unwind label %[[lpad2]]
+// CHECK-NEXT:    label %[[innercleanupretbb:[^ ]*]] unwind label %[[lpad2:[^ ]*]]
+//
+// CHECK: [[innercleanupretbb]]
+// CHECK-NEXT: cleanupret from %[[innerpad]] unwind label %[[lpad2]]
 //
 // CHECK: [[lpad2]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
+// CHECK-NEXT: %[[outerpad:[^ ]*]] = cleanuppad
 // CHECK: call void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
-// CHECK: resume
+// CHECK-NEXT: cleanupret from %[[outerpad]] unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally_with_eh_edge@@"({{.*}})
 // CHECK: ret void
diff --git a/test/CodeGen/exceptions-seh-leave.c b/test/CodeGen/exceptions-seh-leave.c
index e56da17d0a80..a0b1956d1423 100644
--- a/test/CodeGen/exceptions-seh-leave.c
+++ b/test/CodeGen/exceptions-seh-leave.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - | opt -instnamer -S | FileCheck %s
 
 void g(void);
 
@@ -157,13 +157,15 @@ int nested___except___finally() {
 // CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[g1_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: catch i8* null
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT: cleanuppad
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$0@0@nested___except___finally@@"(i8 1, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[g1_resume:.*]] unwind label %[[g2_lpad]]
+// CHECK: cleanupret {{.*}} unwind label %[[g2_lpad]]
 
 // CHECK: [[g2_lpad]]
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret
 // CHECK: br label %[[trycont]]
 
 // CHECK: [[trycont]]
@@ -197,29 +199,28 @@ int nested___except___except() {
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
-// CHECK: [[g1_cont]]
-// CHECK: store i32 16, i32* %myres
-// CHECK-NEXT: br label %[[trycont:[^ ]*]]
-
 // CHECK: [[g1_lpad]]
-// CHECK:  br label %[[except:[^ ]*]]
-
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret {{.*}} to label %[[except:[^ ]*]]
 // CHECK: [[except]]
 // CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
 
+// CHECK: [[g2_lpad]]
+// CHECK: catchpad {{.*}} [i8* null]
+// CHECK: catchret
+// CHECK: br label %[[trycont4:[^ ]*]]
+
+// CHECK: [[trycont4]]
+// CHECK-NEXT: ret i32 1
+
 // CHECK: [[g2_cont]]
 // CHECK-NEXT: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: store i32 23
 
-// CHECK: [[g2_lpad]]
-// CHECK: br label %[[outerexcept:[^ ]*]]
-
-// CHECK: [[outerexcept]]
-// CHECK: br label %[[trycont4:[^ ]*]]
-
-// CHECK: [[trycont4]]
-// CHECK-NEXT: ret i32 1
+// CHECK: [[g1_cont]]
+// CHECK: store i32 16, i32* %myres
+// CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[trycont]]
 // CHECK-NEXT: store i32 51, i32* %myres
@@ -251,13 +252,9 @@ int nested___finally___except() {
 // CHECK-LABEL: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
-// CHECK: [[g1_cont]]
-// CHECK-NEXT: br label %[[trycont:[^ ]*]]
-
 // CHECK: [[g1_lpad]]
-// CHECK:  br label %[[except:[^ ]*]]
-
-// CHECK: [[except]]
+// CHECK: catchpad
+// CHECK: catchret
 // CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g2_cont:.*]] unwind label %[[g2_lpad:.*]]
 
@@ -265,10 +262,8 @@ int nested___finally___except() {
 // CHECK: br label %[[tryleave:[^ ]*]]
 // CHECK-NOT: 23
 
-// CHECK: [[g2_lpad]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
-// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 1, i8* %[[fp]])
-// CHECK-NEXT: br label %[[ehresume:[^ ]*]]
+// CHECK: [[g1_cont]]
+// CHECK-NEXT: br label %[[trycont:[^ ]*]]
 
 // CHECK: [[trycont]]
 // CHECK: store i32 51, i32* %
@@ -279,8 +274,11 @@ int nested___finally___except() {
 // CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 0, i8* %[[fp]])
 // CHECK-NEXT: ret i32 1
 
-// CHECK: [[ehresume]]
-// CHECK: resume
+// CHECK: [[g2_lpad]]
+// CHECK: cleanuppad
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___except@@"(i8 1, i8* %[[fp]])
+// CHECK: cleanupret {{.*}} unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___except@@"(i8 %abnormal_termination, i8* %frame_pointer)
 // CHECK: ret void
@@ -306,7 +304,7 @@ int nested___finally___finally() {
 // The order of basic blocks in the below doesn't matter.
 // CHECK-LABEL: define i32 @nested___finally___finally()
 
-// CHECK-LABEL: invoke void @g()
+// CHECK: invoke void @g()
 // CHECK-NEXT:       to label %[[g1_cont:.*]] unwind label %[[g1_lpad:.*]]
 
 // CHECK: [[g1_cont]]
@@ -322,24 +320,18 @@ int nested___finally___finally() {
 // CHECK-NEXT: ret i32 1
 
 // CHECK: [[g1_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT: %[[padtoken:[^ ]*]] = cleanuppad within none []
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: invoke void @"\01?fin$1@0@nested___finally___finally@@"(i8 1, i8* %[[fp]])
 // CHECK-NEXT:       to label %[[finally_cont2:.*]] unwind label %[[g2_lpad]]
+// CHECK: [[finally_cont2]]
+// CHECK: cleanupret from %[[padtoken]] unwind label %[[g2_lpad]]
 
 // CHECK: [[g2_lpad]]
-// CHECK-NEXT: landingpad
-// CHECK-NEXT: cleanup
-// CHECK: br label %[[ehcleanup:.*]]
-
-// CHECK: [[finally_cont2]]
-// CHECK: br label %[[ehcleanup]]
-
-// CHECK: [[ehcleanup]]
-// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT: %[[padtoken:[^ ]*]] = cleanuppad within none []
+// CHECK-NEXT: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK-NEXT: call void @"\01?fin$0@0@nested___finally___finally@@"(i8 1, i8* %[[fp]])
-// CHECK: resume
+// CHECK: cleanupret from %[[padtoken]] unwind to caller
 
 // CHECK-LABEL: define internal void @"\01?fin$0@0@nested___finally___finally@@"(i8 %abnormal_termination, i8* %frame_pointer)
 // CHECK: ret void
diff --git a/test/CodeGen/exceptions-seh.c b/test/CodeGen/exceptions-seh.c
index 9707a9a31beb..b027bd844b78 100644
--- a/test/CodeGen/exceptions-seh.c
+++ b/test/CodeGen/exceptions-seh.c
@@ -1,7 +1,11 @@
-// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -emit-llvm -o - \
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - \
 // RUN:         | FileCheck %s --check-prefix=CHECK --check-prefix=X64
-// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -emit-llvm -o - \
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -fnew-ms-eh -emit-llvm -o - \
 // RUN:         | FileCheck %s --check-prefix=CHECK --check-prefix=X86
+// RUN: %clang_cc1 %s -triple i686-pc-windows-gnu -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=X86-GNU
+// RUN: %clang_cc1 %s -triple x86_64-pc-windows-gnu -fms-extensions -fnew-ms-eh -emit-llvm -o - \
+// RUN:         | FileCheck %s --check-prefix=X64-GNU
 
 void try_body(int numerator, int denominator, int *myres) {
   *myres = numerator / denominator;
@@ -23,36 +27,25 @@ int safe_div(int numerator, int denominator, int *res) {
   return success;
 }
 
-// X64-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res) {{.*}} personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
-// X64: invoke void @try_body(i32 %{{.*}}, i32 %{{.*}}, i32* %{{.*}}) #[[NOINLINE:[0-9]+]]
-// X64:       to label %{{.*}} unwind label %[[lpad:[^ ]*]]
+// CHECK-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X64-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+// X86-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
+// CHECK: invoke void @try_body(i32 %{{.*}}, i32 %{{.*}}, i32* %{{.*}}) #[[NOINLINE:[0-9]+]]
+// CHECK:       to label %{{.*}} unwind label %[[catchpad:[^ ]*]]
 //
-// X64: [[lpad]]
-// X64: landingpad { i8*, i32 }
-// X64-NEXT: catch i8* null
-// X64-NOT: br i1
-// X64: br label %[[except:[^ ]*]]
-// X64: [[except]]
-// X64: store i32 -42, i32* %[[success:[^ ]*]]
+// CHECK: [[catchpad]]
+// X64: %[[padtoken:[^ ]*]] = catchpad within %{{[^ ]*}} [i8* null]
+// X86: %[[padtoken:[^ ]*]] = catchpad within %{{[^ ]*}} [i8* bitcast (i32 ()* @"\01?filt$0@0@safe_div@@" to i8*)]
+// CHECK-NEXT: catchret from %[[padtoken]] to label %[[except:[^ ]*]]
 //
-// X64: %[[res:[^ ]*]] = load i32, i32* %[[success]]
-// X64: ret i32 %[[res]]
+// CHECK: [[except]]
+// CHECK: store i32 -42, i32* %[[success:[^ ]*]]
+//
+// CHECK: %[[res:[^ ]*]] = load i32, i32* %[[success]]
+// CHECK: ret i32 %[[res]]
 
-// X86-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res) {{.*}} personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
-// X86: invoke void @try_body(i32 %{{.*}}, i32 %{{.*}}, i32* %{{.*}}) #[[NOINLINE:[0-9]+]]
-// X86:       to label %{{.*}} unwind label %[[lpad:[^ ]*]]
+// 32-bit SEH needs this filter to save the exception code.
 //
-// X86: [[lpad]]
-// X86: landingpad { i8*, i32 }
-// X86-NEXT: catch i8* bitcast (i32 ()* @"\01?filt$0@0@safe_div@@" to i8*)
-// X86-NOT: br i1
-// X86: br label %[[except:[^ ]*]]
-// X86: [[except]]
-// X86: store i32 -42, i32* %[[success:[^ ]*]]
-//
-// X86: %[[res:[^ ]*]] = load i32, i32* %[[success]]
-// X86: ret i32 %[[res]]
-
 // X86-LABEL: define internal i32 @"\01?filt$0@0@safe_div@@"()
 // X86: %[[ebp:[^ ]*]] = call i8* @llvm.frameaddress(i32 1)
 // X86: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 (i32, i32, i32*)* @safe_div to i8*), i8* %[[ebp]])
@@ -63,6 +56,12 @@ int safe_div(int numerator, int denominator, int *res) {
 // X86: store i32 %{{.*}}, i32*
 // X86: ret i32 1
 
+// Mingw uses msvcrt, so it can also use _except_handler3.
+// X86-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X86-GNU-SAME:      personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
+// X64-GNU-LABEL: define i32 @safe_div(i32 %numerator, i32 %denominator, i32* %res)
+// X64-GNU-SAME:      personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
+
 void j(void);
 
 int filter_expr_capture(void) {
@@ -83,15 +82,15 @@ int filter_expr_capture(void) {
 // CHECK: store i32 42, i32* %[[r]]
 // CHECK: invoke void @j() #[[NOINLINE]]
 //
-// CHECK: landingpad
-// CHECK-NEXT: catch i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@filter_expr_capture@@" to i8*)
+// CHECK: catchpad within %{{[^ ]*}} [i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@filter_expr_capture@@" to i8*)]
 // CHECK: store i32 13, i32* %[[r]]
 //
 // CHECK: %[[rv:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[rv]]
 
 // X64-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"(i8* %exception_pointers, i8* %frame_pointer)
-// X64: call i8* @llvm.localrecover(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %frame_pointer, i32 0)
+// X64: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %frame_pointer)
+// X64: call i8* @llvm.localrecover(i8* bitcast (i32 ()* @filter_expr_capture to i8*), i8* %[[fp]], i32 0)
 //
 // X86-LABEL: define internal i32 @"\01?filt$0@0@filter_expr_capture@@"()
 // X86: %[[ebp:[^ ]*]] = call i8* @llvm.frameaddress(i32 1)
@@ -120,29 +119,19 @@ int nested_try(void) {
 // X86-SAME: personality i8* bitcast (i32 (...)* @_except_handler3 to i8*)
 // CHECK: store i32 42, i32* %[[r:[^ ,]*]]
 // CHECK: invoke void @j() #[[NOINLINE]]
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[cswitch_inner:[^ ]*]]
 //
-// CHECK: [[cont]]
-// CHECK: store i32 0, i32* %[[r]]
-// CHECK: br label %[[inner_try_cont:[^ ]*]]
+// CHECK: [[cswitch_inner]]
+// CHECK: %[[cs_inner:[^ ]*]] = catchswitch within none [label %[[cpad_inner:[^ ]*]]] unwind label %[[cswitch_outer:[^ ]*]]
 //
-// CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 }
-// CHECK: catch i8* bitcast (i32 ({{.*}})* @"\01?filt$1@0@nested_try@@" to i8*)
-// CHECK: catch i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@nested_try@@" to i8*)
-// CHECK: store i8* %{{.*}}, i8** %[[ehptr_slot:[^ ]*]]
-// CHECK: store i32 %{{.*}}, i32* %[[sel_slot:[^ ]*]]
+// CHECK: [[cswitch_outer]]
+// CHECK: %[[cs_outer:[^ ]*]] = catchswitch within none [label %[[cpad_outer:[^ ]*]]] unwind to caller
 //
-// CHECK: load i32, i32* %[[sel_slot]]
-// CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ({{.*}})* @"\01?filt$1@0@nested_try@@" to i8*))
-// CHECK: icmp eq i32
-// CHECK: br i1
-//
-// CHECK: load i32, i32* %[[sel_slot]]
-// CHECK: call i32 @llvm.eh.typeid.for(i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@nested_try@@" to i8*))
-// CHECK: icmp eq i32
-// CHECK: br i1
+// CHECK: [[cpad_outer]]
+// CHECK: catchpad within %{{[^ ]*}} [i8* bitcast (i32 ({{.*}})* @"\01?filt$0@0@nested_try@@" to i8*)]
+// CHECK-NEXT: catchret {{.*}} to label %[[except_outer:[^ ]*]]
 //
+// CHECK: [[except_outer]]
 // CHECK: store i32 456, i32* %[[r]]
 // CHECK: br label %[[outer_try_cont:[^ ]*]]
 //
@@ -150,12 +139,21 @@ int nested_try(void) {
 // CHECK: %[[r_load:[^ ]*]] = load i32, i32* %[[r]]
 // CHECK: ret i32 %[[r_load]]
 //
+// CHECK: [[cpad_inner]]
+// CHECK: catchpad within %[[cs_inner]] [i8* bitcast (i32 ({{.*}})* @"\01?filt$1@0@nested_try@@" to i8*)]
+// CHECK-NEXT: catchret {{.*}} to label %[[except_inner:[^ ]*]]
+//
+// CHECK: [[except_inner]]
 // CHECK: store i32 123, i32* %[[r]]
-// CHECK: br label %[[inner_try_cont]]
+// CHECK: br label %[[inner_try_cont:[^ ]*]]
 //
 // CHECK: [[inner_try_cont]]
 // CHECK: br label %[[outer_try_cont]]
 //
+// CHECK: [[cont]]
+// CHECK: store i32 0, i32* %[[r]]
+// CHECK: br label %[[inner_try_cont]]
+//
 // CHECK-LABEL: define internal i32 @"\01?filt$0@0@nested_try@@"({{.*}})
 // X86: call i8* @llvm.x86.seh.recoverfp({{.*}})
 // CHECK: load i32*, i32**
@@ -184,7 +182,7 @@ int basic_finally(int g) {
 // CHECK: store i32 %g, i32* %[[g_addr]]
 //
 // CHECK: invoke void @j()
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[cleanuppad:[^ ]*]]
 //
 // CHECK: [[cont]]
 // CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
@@ -192,12 +190,11 @@ int basic_finally(int g) {
 // CHECK: load i32, i32* %[[g_addr]], align 4
 // CHECK: ret i32
 //
-// CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 }
-// CHECK-NEXT: cleanup
+// CHECK: [[cleanuppad]]
+// CHECK: %[[padtoken:[^ ]*]] = cleanuppad within none []
 // CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK: call void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} 1, i8* %[[fp]])
-// CHECK: resume
+// CHECK: cleanupret from %[[padtoken]] unwind to caller
 
 // CHECK: define internal void @"\01?fin$0@0@basic_finally@@"({{i8( zeroext)?}} %abnormal_termination, i8* %frame_pointer)
 // CHECK:   call i8* @llvm.localrecover(i8* bitcast (i32 (i32)* @basic_finally to i8*), i8* %frame_pointer, i32 0)
@@ -216,18 +213,71 @@ int except_return(void) {
 }
 // CHECK-LABEL: define i32 @except_return()
 // CHECK: %[[tmp:[^ ]*]] = invoke i32 @returns_int()
-// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// CHECK:       to label %[[cont:[^ ]*]] unwind label %[[catchpad:[^ ]*]]
 //
-// CHECK: [[cont]]
-// CHECK: store i32 %[[tmp]], i32* %[[rv:[^ ]*]]
+// CHECK: [[catchpad]]
+// CHECK: catchpad
+// CHECK: catchret
+// CHECK: store i32 42, i32* %[[rv:[^ ]*]]
 // CHECK: br label %[[retbb:[^ ]*]]
 //
-// CHECK: [[lpad]]
-// CHECK: store i32 42, i32* %[[rv]]
+// CHECK: [[cont]]
+// CHECK: store i32 %[[tmp]], i32* %[[rv]]
 // CHECK: br label %[[retbb]]
 //
 // CHECK: [[retbb]]
 // CHECK: %[[r:[^ ]*]] = load i32, i32* %[[rv]]
 // CHECK: ret i32 %[[r]]
 
+
+// PR 24751: don't assert if a variable is used twice in a __finally block.
+// Also, make sure we don't do redundant work to capture/project it.
+void finally_capture_twice(int x) {
+  __try {
+  } __finally {
+    int y = x;
+    int z = x;
+  }
+}
+//
+// CHECK-LABEL: define void @finally_capture_twice(
+// CHECK:         [[X:%.*]] = alloca i32, align 4
+// CHECK:         call void (...) @llvm.localescape(i32* [[X]])
+// CHECK-NEXT:    store i32 {{.*}}, i32* [[X]], align 4
+// CHECK-NEXT:    [[LOCAL:%.*]] = call i8* @llvm.localaddress()
+// CHECK-NEXT:    call void [[FINALLY:@.*]](i8{{ zeroext | }}0, i8* [[LOCAL]])
+// CHECK:       define internal void [[FINALLY]](
+// CHECK:         [[LOCAL:%.*]] = call i8* @llvm.localrecover(
+// CHECK:         [[X:%.*]] = bitcast i8* [[LOCAL]] to i32*
+// CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Z:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8*
+// CHECK-NEXT:    store i8
+// CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[X]], align 4
+// CHECK-NEXT:    store i32 [[T0]], i32* [[Y]], align 4
+// CHECK-NEXT:    [[T0:%.*]] = load i32, i32* [[X]], align 4
+// CHECK-NEXT:    store i32 [[T0]], i32* [[Z]], align 4
+// CHECK-NEXT:    ret void
+
+int exception_code_in_except(void) {
+  __try {
+    try_body(0, 0, 0);
+  } __except(1) {
+    return _exception_code();
+  }
+}
+
+// CHECK-LABEL: define i32 @exception_code_in_except()
+// CHECK: %[[ret_slot:[^ ]*]] = alloca i32
+// CHECK: %[[code_slot:[^ ]*]] = alloca i32
+// CHECK: invoke void @try_body(i32 0, i32 0, i32* null)
+// CHECK: %[[pad:[^ ]*]] = catchpad
+// CHECK: catchret from %[[pad]]
+// X64: %[[code:[^ ]*]] = call i32 @llvm.eh.exceptioncode(token %[[pad]])
+// X64: store i32 %[[code]], i32* %[[code_slot]]
+// CHECK: %[[ret1:[^ ]*]] = load i32, i32* %[[code_slot]]
+// CHECK: store i32 %[[ret1]], i32* %[[ret_slot]]
+// CHECK: %[[ret2:[^ ]*]] = load i32, i32* %[[ret_slot]]
+// CHECK: ret i32 %[[ret2]]
+
 // CHECK: attributes #[[NOINLINE]] = { {{.*noinline.*}} }
diff --git a/test/CodeGen/exprs.c b/test/CodeGen/exprs.c
index 59afa802b183..f46b5748f238 100644
--- a/test/CodeGen/exprs.c
+++ b/test/CodeGen/exprs.c
@@ -127,9 +127,10 @@ int f11(long X) {
   return A[X];
 
 // CHECK: [[Xaddr:%[^ ]+]] = alloca i64, align 8
-// CHECK: load {{.*}}, {{.*}}* [[Xaddr]]
-// CHECK-NEXT: getelementptr inbounds [100 x i32], [100 x i32]* %A, i32 0, 
-// CHECK-NEXT: load i32, i32*
+// CHECK: [[A:%.*]] = alloca [100 x i32], align
+// CHECK: [[X:%.*]] = load {{.*}}, {{.*}}* [[Xaddr]]
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[A]], i64 0, i64 [[X]]
+// CHECK-NEXT: load i32, i32* [[T0]], align 4
 }
 
 int f12() {
diff --git a/test/CodeGen/ext-vector-member-alignment.c b/test/CodeGen/ext-vector-member-alignment.c
index 5f044b8a28de..686051e1936b 100644
--- a/test/CodeGen/ext-vector-member-alignment.c
+++ b/test/CodeGen/ext-vector-member-alignment.c
@@ -14,14 +14,12 @@ void func(struct struct1* p, float *a, float *b, float c) {
   *a = p->position.y;
   *b = p->position[0];
   p->position[2] = c;
-  // FIXME: We should be able to come up with a more aggressive alignment
-  // estimate.
   // CHECK: @func
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 1
-  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 1
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: load <4 x float>, <4 x float>* {{%.*}}, align 4
+  // CHECK: store <4 x float> {{%.*}}, <4 x float>* {{%.*}}, align 4
   // CHECK: ret void
 }
diff --git a/test/CodeGen/f16c-builtins.c b/test/CodeGen/f16c-builtins.c
index 28430d52f661..f9cfa0d8fb3b 100644
--- a/test/CodeGen/f16c-builtins.c
+++ b/test/CodeGen/f16c-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +f16c -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,21 +6,25 @@
 #include <x86intrin.h>
 
 __m128 test_mm_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtph_ps
   // CHECK: @llvm.x86.vcvtph2ps.128
   return _mm_cvtph_ps(a);
 }
 
 __m256 test_mm256_cvtph_ps(__m128i a) {
+  // CHECK-LABEL: test_mm256_cvtph_ps
   // CHECK: @llvm.x86.vcvtph2ps.256
   return _mm256_cvtph_ps(a);
 }
 
 __m128i test_mm_cvtps_ph(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_ph
   // CHECK: @llvm.x86.vcvtps2ph.128
   return _mm_cvtps_ph(a, 0);
 }
 
 __m128i test_mm256_cvtps_ph(__m256 a) {
+  // CHECK-LABEL: test_mm256_cvtps_ph
   // CHECK: @llvm.x86.vcvtps2ph.256
   return _mm256_cvtps_ph(a, 0);
 }
diff --git a/test/CodeGen/fma-builtins.c b/test/CodeGen/fma-builtins.c
index 3424616b1233..922f12b8b3d3 100644
--- a/test/CodeGen/fma-builtins.c
+++ b/test/CodeGen/fma-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fma -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/fma4-builtins.c b/test/CodeGen/fma4-builtins.c
index b805e9acb879..69cbcd83d370 100644
--- a/test/CodeGen/fma4-builtins.c
+++ b/test/CodeGen/fma4-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fma4 -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,161 +6,193 @@
 #include <x86intrin.h>
 
 __m128 test_mm_macc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_macc_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps
   return _mm_macc_ps(a, b, c);
 }
 
 __m128d test_mm_macc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_macc_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd
   return _mm_macc_pd(a, b, c);
 }
 
 __m128 test_mm_macc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_macc_ss
   // CHECK: @llvm.x86.fma.vfmadd.ss
   return _mm_macc_ss(a, b, c);
 }
 
 __m128d test_mm_macc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_macc_sd
   // CHECK: @llvm.x86.fma.vfmadd.sd
   return _mm_macc_sd(a, b, c);
 }
 
 __m128 test_mm_msub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msub_ps
   // CHECK: @llvm.x86.fma.vfmsub.ps
   return _mm_msub_ps(a, b, c);
 }
 
 __m128d test_mm_msub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msub_pd
   // CHECK: @llvm.x86.fma.vfmsub.pd
   return _mm_msub_pd(a, b, c);
 }
 
 __m128 test_mm_msub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msub_ss
   // CHECK: @llvm.x86.fma.vfmsub.ss
   return _mm_msub_ss(a, b, c);
 }
 
 __m128d test_mm_msub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msub_sd
   // CHECK: @llvm.x86.fma.vfmsub.sd
   return _mm_msub_sd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmacc_ps
   // CHECK: @llvm.x86.fma.vfnmadd.ps
   return _mm_nmacc_ps(a, b, c);
 }
 
 __m128d test_mm_nmacc_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmacc_pd
   // CHECK: @llvm.x86.fma.vfnmadd.pd
   return _mm_nmacc_pd(a, b, c);
 }
 
 __m128 test_mm_nmacc_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmacc_ss
   // CHECK: @llvm.x86.fma.vfnmadd.ss
   return _mm_nmacc_ss(a, b, c);
 }
 
 __m128d test_mm_nmacc_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmacc_sd
   // CHECK: @llvm.x86.fma.vfnmadd.sd
   return _mm_nmacc_sd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmsub_ps
   // CHECK: @llvm.x86.fma.vfnmsub.ps
   return _mm_nmsub_ps(a, b, c);
 }
 
 __m128d test_mm_nmsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmsub_pd
   // CHECK: @llvm.x86.fma.vfnmsub.pd
   return _mm_nmsub_pd(a, b, c);
 }
 
 __m128 test_mm_nmsub_ss(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_nmsub_ss
   // CHECK: @llvm.x86.fma.vfnmsub.ss
   return _mm_nmsub_ss(a, b, c);
 }
 
 __m128d test_mm_nmsub_sd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_nmsub_sd
   // CHECK: @llvm.x86.fma.vfnmsub.sd
   return _mm_nmsub_sd(a, b, c);
 }
 
 __m128 test_mm_maddsub_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_maddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps
   return _mm_maddsub_ps(a, b, c);
 }
 
 __m128d test_mm_maddsub_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_maddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd
   return _mm_maddsub_pd(a, b, c);
 }
 
 __m128 test_mm_msubadd_ps(__m128 a, __m128 b, __m128 c) {
+  // CHECK-LABEL: test_mm_msubadd_ps
   // CHECK: @llvm.x86.fma.vfmsubadd.ps
   return _mm_msubadd_ps(a, b, c);
 }
 
 __m128d test_mm_msubadd_pd(__m128d a, __m128d b, __m128d c) {
+  // CHECK-LABEL: test_mm_msubadd_pd
   // CHECK: @llvm.x86.fma.vfmsubadd.pd
   return _mm_msubadd_pd(a, b, c);
 }
 
 __m256 test_mm256_macc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_macc_ps
   // CHECK: @llvm.x86.fma.vfmadd.ps.256
   return _mm256_macc_ps(a, b, c);
 }
 
 __m256d test_mm256_macc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_macc_pd
   // CHECK: @llvm.x86.fma.vfmadd.pd.256
   return _mm256_macc_pd(a, b, c);
 }
 
 __m256 test_mm256_msub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_msub_ps
   // CHECK: @llvm.x86.fma.vfmsub.ps.256
   return _mm256_msub_ps(a, b, c);
 }
 
 __m256d test_mm256_msub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_msub_pd
   // CHECK: @llvm.x86.fma.vfmsub.pd.256
   return _mm256_msub_pd(a, b, c);
 }
 
 __m256 test_mm256_nmacc_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_nmacc_ps
   // CHECK: @llvm.x86.fma.vfnmadd.ps.256
   return _mm256_nmacc_ps(a, b, c);
 }
 
 __m256d test_mm256_nmacc_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_nmacc_pd
   // CHECK: @llvm.x86.fma.vfnmadd.pd.256
   return _mm256_nmacc_pd(a, b, c);
 }
 
 __m256 test_mm256_nmsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_nmsub_ps
   // CHECK: @llvm.x86.fma.vfnmsub.ps.256
   return _mm256_nmsub_ps(a, b, c);
 }
 
 __m256d test_mm256_nmsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_nmsub_pd
   // CHECK: @llvm.x86.fma.vfnmsub.pd.256
   return _mm256_nmsub_pd(a, b, c);
 }
 
 __m256 test_mm256_maddsub_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_maddsub_ps
   // CHECK: @llvm.x86.fma.vfmaddsub.ps.256
   return _mm256_maddsub_ps(a, b, c);
 }
 
 __m256d test_mm256_maddsub_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_maddsub_pd
   // CHECK: @llvm.x86.fma.vfmaddsub.pd.256
   return _mm256_maddsub_pd(a, b, c);
 }
 
 __m256 test_mm256_msubadd_ps(__m256 a, __m256 b, __m256 c) {
+  // CHECK-LABEL: test_mm256_msubadd_ps
   // CHECK: @llvm.x86.fma.vfmsubadd.ps.256
   return _mm256_msubadd_ps(a, b, c);
 }
 
 __m256d test_mm256_msubadd_pd(__m256d a, __m256d b, __m256d c) {
+  // CHECK-LABEL: test_mm256_msubadd_pd
   // CHECK: @llvm.x86.fma.vfmsubadd.pd.256
   return _mm256_msubadd_pd(a, b, c);
 }
diff --git a/test/CodeGen/fp-contract-pragma.cpp b/test/CodeGen/fp-contract-pragma.cpp
index b4e24b9e6824..1c5921a53f97 100644
--- a/test/CodeGen/fp-contract-pragma.cpp
+++ b/test/CodeGen/fp-contract-pragma.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -O3 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
 
-// Is FP_CONTRACT is honored in a simple case?
+// Is FP_CONTRACT honored in a simple case?
 float fp_contract_1(float a, float b, float c) {
 // CHECK: _Z13fp_contract_1fff
 // CHECK: tail call float @llvm.fmuladd
@@ -19,7 +19,7 @@ float fp_contract_2(float a, float b, float c) {
   return a * b + c;  
 }
 
-// Does FP_CONTRACT survive template instatiation?
+// Does FP_CONTRACT survive template instantiation?
 class Foo {};
 Foo operator+(Foo, Foo);
 
@@ -62,3 +62,15 @@ float fp_contract_6(float a, float b, float c) {
   return a * b + c;
 }
 
+// If the multiply has multiple uses, don't produce fmuladd.
+// This used to assert (PR25719):
+// https://llvm.org/bugs/show_bug.cgi?id=25719
+
+float fp_contract_7(float a, float b, float c) {
+// CHECK: _Z13fp_contract_7fff
+// CHECK:  %[[M:.+]] = fmul float %b, 2.000000e+00
+// CHECK-NEXT: fsub float %[[M]], %c
+  #pragma STDC FP_CONTRACT ON
+  return (a = 2 * b) - c;
+}
+
diff --git a/test/CodeGen/fsgsbase-builtins.c b/test/CodeGen/fsgsbase-builtins.c
index 14c51a9945f8..5e9ba8c9dde4 100644
--- a/test/CodeGen/fsgsbase-builtins.c
+++ b/test/CodeGen/fsgsbase-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +fsgsbase -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +fsgsbase -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/function-attributes.c b/test/CodeGen/function-attributes.c
index 177ad848f743..8f682a715d73 100644
--- a/test/CodeGen/function-attributes.c
+++ b/test/CodeGen/function-attributes.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -Os -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -Os -std=c99 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -disable-llvm-optzns -Os -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -disable-llvm-optzns -Os -std=c99 -o - %s | FileCheck %s
 // CHECK: define signext i8 @f0(i32 %x) [[NUW:#[0-9]+]]
 // CHECK: define zeroext i8 @f1(i32 %x) [[NUW]]
 // CHECK: define void @f2(i8 signext %x) [[NUW]]
@@ -56,31 +56,11 @@ int f12(int arg) {
   return arg ? 0 : f10_t();
 }
 
-// CHECK: define void @f13() [[NUW]]
+// CHECK: define void @f13() [[NUW_OS_RN:#[0-9]+]]
 void f13(void) __attribute__((pure)) __attribute__((const));
 void f13(void){}
 
 
-// Ensure that these get inlined: rdar://6853279
-// CHECK-LABEL: define void @f14
-// CHECK-NOT: @ai_
-// CHECK: call void @f14_end
-static __inline__ __attribute__((always_inline))
-int ai_1() {  return 4; }
-
-static __inline__ __attribute__((always_inline))
-struct {
-  int a, b, c, d, e;
-} ai_2() { while (1) {} }
-
-void f14(int a) {
-  extern void f14_end(void);
-  if (a)
-    ai_2();
-  ai_1();
-  f14_end();
-}
-
 // <rdar://problem/7102668> [irgen] clang isn't setting the optsize bit on functions
 // CHECK-LABEL: define void @f15
 // CHECK: [[NUW]]
@@ -128,10 +108,11 @@ void f20(void) {
   _setjmp(0);
 }
 
-// CHECK: attributes [[NUW]] = { nounwind optsize readnone{{.*}} }
-// CHECK: attributes [[AI]] = { alwaysinline nounwind optsize readnone{{.*}} }
-// CHECK: attributes [[ALIGN]] = { nounwind optsize readnone alignstack=16{{.*}} }
+// CHECK: attributes [[NUW]] = { nounwind optsize{{.*}} }
+// CHECK: attributes [[AI]] = { alwaysinline nounwind optsize{{.*}} }
+// CHECK: attributes [[NUW_OS_RN]] = { nounwind optsize readnone{{.*}} }
+// CHECK: attributes [[ALIGN]] = { nounwind optsize alignstack=16{{.*}} }
 // CHECK: attributes [[RT]] = { nounwind optsize returns_twice{{.*}} }
-// CHECK: attributes [[NR]] = { noreturn nounwind optsize }
+// CHECK: attributes [[NR]] = { noreturn optsize }
 // CHECK: attributes [[NUW_RN]] = { nounwind optsize readnone }
-// CHECK: attributes [[RT_CALL]] = { nounwind optsize returns_twice }
+// CHECK: attributes [[RT_CALL]] = { optsize returns_twice }
diff --git a/test/CodeGen/global-blocks-lines.c b/test/CodeGen/global-blocks-lines.c
index 36e4618dde15..a46c26e965dd 100644
--- a/test/CodeGen/global-blocks-lines.c
+++ b/test/CodeGen/global-blocks-lines.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Make sure we do not generate line info for debugging-related frame setup.
 // CHECK: define {{.*}}block_invoke
 // CHECK-NOT: store {{.*}}%struct.__block_descriptor*{{.*}}dbg
diff --git a/test/CodeGen/hidden-alias-to-internal-function.c b/test/CodeGen/hidden-alias-to-internal-function.c
index e93922825ce3..93f17e53dcde 100644
--- a/test/CodeGen/hidden-alias-to-internal-function.c
+++ b/test/CodeGen/hidden-alias-to-internal-function.c
@@ -4,5 +4,5 @@ static void foo(void) {}
 void bar(void) __attribute__((alias("foo")))
 __attribute__((visibility("hidden")));
 
-// CHECK: @bar = hidden alias void ()* @foo
+// CHECK: @bar = hidden alias void (), void ()* @foo
 // CHECK: define internal void @foo()
diff --git a/test/CodeGen/inline.c b/test/CodeGen/inline.c
index 16e95c03c875..fe7efe36e653 100644
--- a/test/CodeGen/inline.c
+++ b/test/CodeGen/inline.c
@@ -53,7 +53,7 @@
 // CHECK3-LABEL: define linkonce_odr i32 @_Z2eiv()
 
 // RUN: echo "MS C Mode tests:"
-// RUN: %clang_cc1 %s -triple i386-unknown-unknown -O1 -disable-llvm-optzns -emit-llvm -o - -std=c99 -fms-compatibility | FileCheck %s --check-prefix=CHECK4
+// RUN: %clang_cc1 %s -triple i386-pc-win32 -O1 -disable-llvm-optzns -emit-llvm -o - -std=c99 | FileCheck %s --check-prefix=CHECK4
 // CHECK4-NOT: define weak_odr void @_Exit(
 // CHECK4-LABEL: define weak_odr i32 @ei()
 // CHECK4-LABEL: define i32 @bar()
diff --git a/test/CodeGen/le32-arguments.c b/test/CodeGen/le32-arguments.c
index d26640e6927a..e81d84387d32 100644
--- a/test/CodeGen/le32-arguments.c
+++ b/test/CodeGen/le32-arguments.c
@@ -10,7 +10,7 @@ typedef struct {
   int bb;
 } s1;
 // Structs should be passed byval and not split up
-// CHECK-LABEL: define void @f1(%struct.s1* byval %i)
+// CHECK-LABEL: define void @f1(%struct.s1* byval align 4 %i)
 void f1(s1 i) {}
 
 typedef struct {
@@ -48,7 +48,7 @@ union simple_union {
   char b;
 };
 // Unions should be passed as byval structs
-// CHECK-LABEL: define void @f7(%union.simple_union* byval %s)
+// CHECK-LABEL: define void @f7(%union.simple_union* byval align 4 %s)
 void f7(union simple_union s) {}
 
 typedef struct {
@@ -57,5 +57,5 @@ typedef struct {
   int b8 : 8;
 } bitfield1;
 // Bitfields should be passed as byval structs
-// CHECK-LABEL: define void @f8(%struct.bitfield1* byval %bf1)
+// CHECK-LABEL: define void @f8(%struct.bitfield1* byval align 4 %bf1)
 void f8(bitfield1 bf1) {}
diff --git a/test/CodeGen/libcalls-fno-builtin.c b/test/CodeGen/libcalls-fno-builtin.c
index 46e34202f28e..e7f3ef7b41d3 100644
--- a/test/CodeGen/libcalls-fno-builtin.c
+++ b/test/CodeGen/libcalls-fno-builtin.c
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -S -O3 -fno-builtin -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-w64-mingw32 -S -O3 -fno-builtin -o - %s | FileCheck %s
 // rdar://10551066
 
 typedef __SIZE_TYPE__ size_t;
diff --git a/test/CodeGen/lifetime-debuginfo-1.c b/test/CodeGen/lifetime-debuginfo-1.c
index 674346a031d2..e2e45cb7f816 100644
--- a/test/CodeGen/lifetime-debuginfo-1.c
+++ b/test/CodeGen/lifetime-debuginfo-1.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 
 // Inserting lifetime markers should not affect debuginfo
 
diff --git a/test/CodeGen/lifetime-debuginfo-2.c b/test/CodeGen/lifetime-debuginfo-2.c
index 03afbd8c18b7..1d2fb5957697 100644
--- a/test/CodeGen/lifetime-debuginfo-2.c
+++ b/test/CodeGen/lifetime-debuginfo-2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 
 // Inserting lifetime markers should not affect debuginfo: lifetime.end is not
 // a destructor, but instrumentation for the compiler. Ensure the debug info for
diff --git a/test/CodeGen/lineno-dbginfo.c b/test/CodeGen/lineno-dbginfo.c
index ac61c8364949..5fe64ec3469b 100644
--- a/test/CodeGen/lineno-dbginfo.c
+++ b/test/CodeGen/lineno-dbginfo.c
@@ -1,5 +1,5 @@
 // RUN: echo "#include <stddef.h>" > %t.h
-// RUN: %clang_cc1 -S -g -include %t.h %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -include %t.h %s -emit-llvm -o - | FileCheck %s
 
 // CHECK: !DIGlobalVariable(name: "outer",
 // CHECK-NOT:               linkageName:
diff --git a/test/CodeGen/linetable-endscope.c b/test/CodeGen/linetable-endscope.c
index 961eaec7dece..6eefbea2fc66 100644
--- a/test/CodeGen/linetable-endscope.c
+++ b/test/CodeGen/linetable-endscope.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 
 // Check the line numbers for the ret instruction. We expect it to be
 // at the closing of the lexical scope in this case. See the comments in
diff --git a/test/CodeGen/link-bitcode-file.c b/test/CodeGen/link-bitcode-file.c
index 92b1a88ffb2d..7810fe1d2941 100644
--- a/test/CodeGen/link-bitcode-file.c
+++ b/test/CodeGen/link-bitcode-file.c
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -emit-llvm-bc -o %t.bc %s
-// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
-// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -mlink-bitcode-file %t.bc -O3 -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE2 -emit-llvm-bc -o %t-2.bc %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file %t.bc \
+// RUN:     -O3 -emit-llvm -o - %s | FileCheck -check-prefix=CHECK-NO-BC %s
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -O3 -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc -mlink-bitcode-file %t-2.bc %s \
+// RUN:     | FileCheck -check-prefix=CHECK-NO-BC -check-prefix=CHECK-NO-BC2 %s
+// RUN: not %clang_cc1 -triple i386-pc-linux-gnu -DBITCODE -O3 -emit-llvm -o - \
+// RUN:     -mlink-bitcode-file %t.bc %s 2>&1 | FileCheck -check-prefix=CHECK-BC %s
 // Make sure we deal with failure to load the file.
 // RUN: not %clang_cc1 -triple i386-pc-linux-gnu -mlink-bitcode-file no-such-file.bc \
 // RUN:    -emit-llvm -o - %s 2>&1 | FileCheck -check-prefix=CHECK-NO-FILE %s
@@ -9,11 +15,15 @@ int f(void);
 
 #ifdef BITCODE
 
+extern int f2(void);
 // CHECK-BC: fatal error: cannot link module {{.*}}'f': symbol multiply defined
 int f(void) {
+  f2();
   return 42;
 }
 
+#elif BITCODE2
+int f2(void) { return 43; }
 #else
 
 // CHECK-NO-BC-LABEL: define i32 @g
@@ -23,6 +33,7 @@ int g(void) {
 }
 
 // CHECK-NO-BC-LABEL: define i32 @f
+// CHECK-NO-BC2-LABEL: define i32 @f2
 
 #endif
 
diff --git a/test/CodeGen/long_double_fp128.cpp b/test/CodeGen/long_double_fp128.cpp
index 1780255cea97..713a6337a767 100644
--- a/test/CodeGen/long_double_fp128.cpp
+++ b/test/CodeGen/long_double_fp128.cpp
@@ -10,9 +10,12 @@
 // RUN:    | FileCheck %s --check-prefix=G32
 // RUN: %clang_cc1 -triple powerpc-linux-gnu -emit-llvm -o - %s \
 // RUN:    | FileCheck %s --check-prefix=P32
+// RUN: %clang_cc1 -triple x86_64-nacl -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=N64
 
 // Check mangled name of long double.
 // Android's gcc and llvm use fp128 for long double.
+// NaCl uses double format for long double, but still has separate overloads.
 void test(long, float, double, long double, long double _Complex) { }
 // A64:  define void @_Z4testlfdgCg(i64, float, double, fp128, { fp128, fp128 }*
 // G64:  define void @_Z4testlfdeCe(i64, float, double, x86_fp80, { x86_fp80, x86_fp80 }*
@@ -20,3 +23,4 @@ void test(long, float, double, long double, long double _Complex) { }
 // A32:  define void @_Z4testlfdeCe(i32, float, double, double, { double, double }*
 // G32:  define void @_Z4testlfdeCe(i32, float, double, x86_fp80, { x86_fp80, x86_fp80 }*
 // P32:  define void @_Z4testlfdgCg(i32, float, double, ppc_fp128, { ppc_fp128, ppc_fp128 }*
+// N64: define void @_Z4testlfdeCe(i32, float, double, double, double {{.*}}, double
diff --git a/test/CodeGen/lzcnt-builtins.c b/test/CodeGen/lzcnt-builtins.c
index a083de9d35f0..2f8308670bb7 100644
--- a/test/CodeGen/lzcnt-builtins.c
+++ b/test/CodeGen/lzcnt-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/mangle-ms.c b/test/CodeGen/mangle-ms.c
new file mode 100644
index 000000000000..0ad43d5e06c2
--- /dev/null
+++ b/test/CodeGen/mangle-ms.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s
+
+// CHECK: define void @"\01?f@@$$J0YAXP6AX@Z@Z"
+__attribute__((overloadable)) void f(void (*x)()) {}
diff --git a/test/CodeGen/mingw-long-double.c b/test/CodeGen/mingw-long-double.c
new file mode 100644
index 000000000000..1c7c31f88be3
--- /dev/null
+++ b/test/CodeGen/mingw-long-double.c
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -triple i686-windows-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU32
+// RUN: %clang_cc1 -triple x86_64-windows-gnu -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=GNU64
+// RUN: %clang_cc1 -triple x86_64-windows-msvc -emit-llvm -o - %s \
+// RUN:    | FileCheck %s --check-prefix=MSC64
+
+struct {
+  char c;
+  long double ldb;
+} agggregate_LD = {};
+// GNU32: %struct.anon = type { i8, x86_fp80 }
+// GNU32: @agggregate_LD = global %struct.anon zeroinitializer, align 4
+// GNU64: %struct.anon = type { i8, x86_fp80 }
+// GNU64: @agggregate_LD = global %struct.anon zeroinitializer, align 16
+// MSC64: %struct.anon = type { i8, double }
+// MSC64: @agggregate_LD = global %struct.anon zeroinitializer, align 8
+
+long double dataLD = 1.0L;
+// GNU32: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 4
+// GNU64: @dataLD = global x86_fp80 0xK3FFF8000000000000000, align 16
+// MSC64: @dataLD = global double 1.000000e+00, align 8
+
+long double _Complex dataLDC = {1.0L, 1.0L};
+// GNU32: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 4
+// GNU64: @dataLDC = global { x86_fp80, x86_fp80 } { x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK3FFF8000000000000000 }, align 16
+// MSC64: @dataLDC = global { double, double } { double 1.000000e+00, double 1.000000e+00 }, align 8
+
+long double TestLD(long double x) {
+  return x * x;
+}
+// GNU32: define x86_fp80 @TestLD(x86_fp80 %x)
+// GNU64: define void @TestLD(x86_fp80* noalias sret %agg.result, x86_fp80*)
+// MSC64: define double @TestLD(double %x)
+
+long double _Complex TestLDC(long double _Complex x) {
+  return x * x;
+}
+// GNU32: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %x)
+// GNU64: define void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
+// MSC64: define void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
diff --git a/test/CodeGen/mips-inline-asm-abi.c b/test/CodeGen/mips-inline-asm-abi.c
new file mode 100644
index 000000000000..20c4f8d8be71
--- /dev/null
+++ b/test/CodeGen/mips-inline-asm-abi.c
@@ -0,0 +1,12 @@
+// REQUIRES: mips-registered-target
+// RUN: %clang_cc1 -triple mips-linux-gnu -emit-obj -o - %s | \
+// RUN:   llvm-readobj -h - | FileCheck %s
+
+// CHECK: EF_MIPS_ABI_O32
+
+__asm__(
+"bar:\n"
+"  nop\n"
+);
+
+void foo() {}
diff --git a/test/CodeGen/mips-interrupt-attr.c b/test/CodeGen/mips-interrupt-attr.c
new file mode 100644
index 000000000000..df70b12b58f1
--- /dev/null
+++ b/test/CodeGen/mips-interrupt-attr.c
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple mipsel-unknown-linux -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK
+
+void __attribute__ ((interrupt("vector=sw0")))
+isr_sw0 (void)
+{
+  // CHECK: define void @isr_sw0() [[SW0:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=sw1")))
+isr_sw1 (void)
+{
+  // CHECK: define void @isr_sw1() [[SW1:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw0")))
+isr_hw0 (void)
+{
+  // CHECK: define void @isr_hw0() [[HW0:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw1")))
+isr_hw1 (void)
+{
+  // CHECK: define void @isr_hw1() [[HW1:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw2")))
+isr_hw2 (void)
+{
+  // CHECK: define void @isr_hw2() [[HW2:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw3")))
+isr_hw3 (void)
+{
+  // CHECK: define void @isr_hw3() [[HW3:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw4")))
+isr_hw4 (void)
+{
+  // CHECK: define void @isr_hw4() [[HW4:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt("vector=hw5")))
+isr_hw5 (void)
+{
+  // CHECK: define void @isr_hw5() [[HW5:#[0-9]+]]
+}
+
+void __attribute__ ((interrupt))
+isr_eic (void)
+{
+  // CHECK: define void @isr_eic() [[EIC:#[0-9]+]]
+}
+// CHECK: attributes [[SW0]] = { {{.*}} "interrupt"="sw0" {{.*}} }
+// CHECK: attributes [[SW1]] = { {{.*}} "interrupt"="sw1" {{.*}} }
+// CHECK: attributes [[HW0]] = { {{.*}} "interrupt"="hw0" {{.*}} }
+// CHECK: attributes [[HW1]] = { {{.*}} "interrupt"="hw1" {{.*}} }
+// CHECK: attributes [[HW2]] = { {{.*}} "interrupt"="hw2" {{.*}} }
+// CHECK: attributes [[HW3]] = { {{.*}} "interrupt"="hw3" {{.*}} }
+// CHECK: attributes [[HW4]] = { {{.*}} "interrupt"="hw4" {{.*}} }
+// CHECK: attributes [[HW5]] = { {{.*}} "interrupt"="hw5" {{.*}} }
+// CHECK: attributes [[EIC]] = { {{.*}} "interrupt"="eic" {{.*}} }
diff --git a/test/CodeGen/mips-unsupported-nan.c b/test/CodeGen/mips-unsupported-nan.c
index 14a36fc0294a..2fd5042e92f8 100644
--- a/test/CodeGen/mips-unsupported-nan.c
+++ b/test/CodeGen/mips-unsupported-nan.c
@@ -1,24 +1,44 @@
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips3 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS3 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips4 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS4 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32R2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r3 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-NOT-MIPS32R3 -check-prefix=CHECK-NAN2008 %s
-// RUN: %clang -target mipsel-unknown-linux -mnan=legacy -march=mips32r6 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS32R6 -check-prefix=CHECK-NAN2008 %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64r2 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64R2 -check-prefix=CHECK-NANLEGACY %s
-// RUN: %clang -target mips64el-unknown-linux -mnan=legacy -march=mips64r6 -emit-llvm -S %s -o - 2>&1 | FileCheck -check-prefix=CHECK-MIPS64R6 -check-prefix=CHECK-NAN2008 %s
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS2 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips3 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS3 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips4 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS4 %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS32 %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=2008 -march=mips32r3 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mipsel-unknown-linux -mnan=legacy -march=mips32r6 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS32R6 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NANLEGACY %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS64 %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=2008 -march=mips64r2 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -allow-empty -check-prefix=NO-WARNINGS %s < %t
+//
+// RUN: %clang -target mips64el-unknown-linux -mnan=legacy -march=mips64r6 -emit-llvm -S %s -o - 2>%t | FileCheck -check-prefix=CHECK-NAN2008 %s
+// RUN: FileCheck -check-prefix=CHECK-MIPS64R6 %s < %t
+
+// NO-WARNINGS-NOT: warning: ignoring '-mnan=legacy' option
+// NO-WARNINGS-NOT: warning: ignoring '-mnan=2008' option
 
 // CHECK-MIPS2: warning: ignoring '-mnan=2008' option because the 'mips2' architecture does not support it
 // CHECK-MIPS3: warning: ignoring '-mnan=2008' option because the 'mips3' architecture does not support it
 // CHECK-MIPS4: warning: ignoring '-mnan=2008' option because the 'mips4' architecture does not support it
 // CHECK-MIPS32: warning: ignoring '-mnan=2008' option because the 'mips32' architecture does not support it
-// CHECK-MIPS32R2: warning: ignoring '-mnan=2008' option because the 'mips32r2' architecture does not support it
-// CHECK-MIPS32R3: warning: ignoring '-mnan=2008' option because the 'mips32r3' architecture does not support it
 // CHECK-MIPS32R6: warning: ignoring '-mnan=legacy' option because the 'mips32r6' architecture does not support it
 // CHECK-MIPS64: warning: ignoring '-mnan=2008' option because the 'mips64' architecture does not support it
-// CHECK-MIPS64R2: warning: ignoring '-mnan=2008' option because the 'mips64r2' architecture does not support it
 // CHECK-MIPS64R6: warning: ignoring '-mnan=legacy' option because the 'mips64r6' architecture does not support it
+
 // CHECK-NANLEGACY: float 0x7FF4000000000000
 // CHECK-NAN2008: float 0x7FF8000000000000
 
diff --git a/test/CodeGen/mips-varargs.c b/test/CodeGen/mips-varargs.c
index 891769c711dd..0d656dcfba9d 100644
--- a/test/CodeGen/mips-varargs.c
+++ b/test/CodeGen/mips-varargs.c
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple mips-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
-// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
-// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -O1 -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+// RUN: %clang_cc1 -triple mipsel-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=O32
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm  -target-abi n32 %s | FileCheck %s -check-prefix=ALL -check-prefix=N32 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
+// RUN: %clang_cc1 -triple mips64el-unknown-linux -o - -emit-llvm %s | FileCheck %s -check-prefix=ALL -check-prefix=N64 -check-prefix=NEW
 
 #include <stdarg.h>
 
@@ -24,75 +24,31 @@ int test_i32(char *fmt, ...) {
 // O32:   %va = alloca i8*, align [[PTRALIGN:4]]
 // N32:   %va = alloca i8*, align [[PTRALIGN:4]]
 // N64:   %va = alloca i8*, align [[PTRALIGN:8]]
+// ALL:   [[V:%.*]] = alloca i32, align 4
+// NEW:   [[PROMOTION_TEMP:%.*]] = alloca i32, align 4
+//
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
+// O32:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T:i32]] [[CHUNKSIZE:4]]
+// NEW:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T:i32|i64]] [[CHUNKSIZE:8]]
+//
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// O32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i32]]*
+// O32:   [[ARG:%.+]] = load i32, i32* [[AP_CAST]], align [[CHUNKALIGN:4]]
+//
+// N32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i64]]*
+// N32:   [[TMP:%.+]] = load i64, i64* [[AP_CAST]], align [[CHUNKALIGN:8]]
+// N64:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to [[CHUNK_T:i64]]*
+// N64:   [[TMP:%.+]] = load i64, i64* [[AP_CAST]], align [[CHUNKALIGN:8]]
+// NEW:   [[TMP2:%.+]] = trunc i64 [[TMP]] to i32
+// NEW:   store i32 [[TMP2]], i32* [[PROMOTION_TEMP]], align 4
+// NEW:   [[ARG:%.+]] = load i32, i32* [[PROMOTION_TEMP]], align 4
+// ALL:   store i32 [[ARG]], i32* [[V]], align 4
 //
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
-//
-// O32:   store i32* [[AP_NEXT]], i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
-// NEW:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// NEW:   [[ARG1:%.+]] = trunc i64 [[TMP2]] to i32
-//
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i32 [[ARG1]]
-// ALL: }
-
-int test_i32_2args(char *fmt, ...) {
-  va_list va;
-
-  va_start(va, fmt);
-  int v1 = va_arg(va, int);
-  int v2 = va_arg(va, int);
-  va_end(va);
-
-  return v1 + v2;
-}
-
-// ALL-LABEL: define i32 @test_i32_2args(i8*{{.*}} %fmt, ...)
-//
-// ALL:   %va = alloca i8*, align [[PTRALIGN]]
-// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32**
-// O32:   [[AP_CUR:%.+]] = load i32*, i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT1:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 1
-// NEW:   [[AP_NEXT1:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
-//
-// O32:   store i32* [[AP_NEXT1]], i32** [[TMP0]], align [[PTRALIGN]]
-// FIXME: N32 optimised this store out. Why only for this ABI?
-// N64:   store i64* [[AP_NEXT1]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i32, i32* [[AP_CUR]], align 4
-// NEW:   [[TMP3:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// NEW:   [[ARG1:%.+]] = trunc i64 [[TMP3]] to i32
-//
-// O32:   [[AP_NEXT2:%.+]] = getelementptr i32, i32* [[AP_CUR]], i32 2
-// NEW:   [[AP_NEXT2:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T]] 2
-//
-// O32:   store i32* [[AP_NEXT2]], i32** [[TMP0]], align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT2]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG2:%.+]] = load i32, i32* [[AP_NEXT1]], align 4
-// NEW:   [[TMP4:%.+]] = load i64, i64* [[AP_NEXT1]], align 8
-// NEW:   [[ARG2:%.+]] = trunc i64 [[TMP4]] to i32
-//
-// ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   [[ADD:%.+]] = add nsw i32 [[ARG2]], [[ARG1]]
-// ALL:   ret i32 [[ADD]]
 // ALL: }
 
 long long test_i64(char *fmt, ...) {
@@ -108,32 +64,25 @@ long long test_i64(char *fmt, ...) {
 // ALL-LABEL: define i64 @test_i64(i8*{{.*}} %fmt, ...)
 //
 // ALL:   %va = alloca i8*, align [[PTRALIGN]]
-// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// O32:   [[AP_CUR:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// NEW:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// NEW:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 //
 // i64 is 8-byte aligned, while this is within O32's stack alignment there's no
 // guarantee that the offset is still 8-byte aligned after earlier reads.
-// O32:   [[PTR1:%.+]] = add i32 [[AP_CUR]], 7
-// O32:   [[PTR2:%.+]] = and i32 [[PTR1]], -8
-// O32:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i64*
-// O32:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
+// O32:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to i32
+// O32:   [[TMP2:%.+]] = add i32 [[TMP1]], 7
+// O32:   [[TMP3:%.+]] = and i32 [[TMP2]], -8
+// O32:   [[AP_CUR:%.+]] = inttoptr i32 [[TMP3]] to i8*
 //
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 8
-// NEW:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], [[INTPTR_T:i32|i64]] 1
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] 8
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
 //
-// O32:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
-// NEW:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i64, i64* [[PTR3]], align 8
-// NEW:   [[ARG1:%.+]] = load i64, i64* [[AP_CUR]], align 8
+// ALL:   [[AP_CAST:%.*]] = bitcast i8* [[AP_CUR]] to i64*
+// ALL:   [[ARG:%.+]] = load i64, i64* [[AP_CAST]], align 8
 //
+// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i64 [[ARG1]]
 // ALL: }
 
 char *test_ptr(char *fmt, ...) {
@@ -148,41 +97,30 @@ char *test_ptr(char *fmt, ...) {
 
 // ALL-LABEL: define i8* @test_ptr(i8*{{.*}} %fmt, ...)
 //
-// O32:   %va = alloca i8*, align [[PTRALIGN:4]]
-// N32:   %va = alloca i8*, align [[PTRALIGN:4]]
-// N64:   %va = alloca i8*, align [[PTRALIGN:8]]
+// ALL:   %va = alloca i8*, align [[PTRALIGN]]
+// ALL:   [[V:%.*]] = alloca i8*, align [[PTRALIGN]]
+// N32:   [[AP_CAST:%.+]] = alloca i8*, align 4
+// ALL:   [[VA:%.+]] = bitcast i8** %va to i8*
+// ALL:   call void @llvm.va_start(i8* [[VA]])
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] [[CHUNKSIZE]]
+// ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
+//
+// When the chunk size matches the pointer size, this is easy.
+// O32:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to i8**
+// N64:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to i8**
+// Otherwise we need a promotion temporary.
+// N32:   [[TMP1:%.+]] = bitcast i8* [[AP_CUR]] to i64*
+// N32:   [[TMP2:%.+]] = load i64, i64* [[TMP1]], align 8
+// N32:   [[TMP3:%.+]] = trunc i64 [[TMP2]] to i32
+// N32:   [[PTR:%.+]] = inttoptr i32 [[TMP3]] to i8*
+// N32:   store i8* [[PTR]], i8** [[AP_CAST]], align 4
+//
+// ALL:   [[ARG:%.+]] = load i8*, i8** [[AP_CAST]], align [[PTRALIGN]]
+// ALL:   store i8* [[ARG]], i8** [[V]], align [[PTRALIGN]]
 //
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
-// ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// O32:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   [[TMP0:%.+]] = bitcast i8** %va to i64**
-// N32:   [[AP_CUR:%.+]] = load i64*, i64** [[TMP0]], align [[PTRALIGN]]
-// N64:   [[TMP0:%.+]] = bitcast i8** %va to i8***
-// N64:   [[AP_CUR:%.+]] = load i8**, i8*** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], i32 1
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   [[AP_NEXT:%.+]] = getelementptr i64, i64* [[AP_CUR]], {{i32|i64}} 1
-// N64:   [[AP_NEXT:%.+]] = getelementptr i8*, i8** [[AP_CUR]], {{i32|i64}} 1
-//
-// O32:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to 64-bit.
-// N32:   store i64* [[AP_NEXT]], i64** [[TMP0]], align [[PTRALIGN]]
-// N64:   store i8** [[AP_NEXT]], i8*** [[TMP0]], align [[PTRALIGN]]
-//
-// O32:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 4
-// N32 differs because the vararg is not a N32 pointer. It's been promoted to
-// 64-bit so we must truncate the excess and bitcast to a N32 pointer.
-// N32:   [[TMP2:%.+]] = load i64, i64* [[AP_CUR]], align 8
-// N32:   [[TMP3:%.+]] = trunc i64 [[TMP2]] to i32
-// N32:   [[ARG1:%.+]] = inttoptr i32 [[TMP3]] to i8*
-// N64:   [[ARG1:%.+]] = load i8*, i8** [[AP_CUR]], align 8
-//
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   ret i8* [[ARG1]]
 // ALL: }
 
 int test_v4i32(char *fmt, ...) {
@@ -198,31 +136,33 @@ int test_v4i32(char *fmt, ...) {
 // ALL-LABEL: define i32 @test_v4i32(i8*{{.*}} %fmt, ...)
 //
 // ALL:   %va = alloca i8*, align [[PTRALIGN]]
+// ALL:   [[V]] = alloca <4 x i32>, align 16
 // ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_start(i8* [[VA1]])
-//
-// O32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// N32:   [[TMP0:%.+]] = bitcast i8** %va to i32*
-// N64:   [[TMP0:%.+]] = bitcast i8** %va to i64*
-//
-// O32:   [[PTR0:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// N32:   [[PTR0:%.+]] = load [[INTPTR_T:i32]], i32* [[TMP0]], align [[PTRALIGN]]
-// N64:   [[PTR0:%.+]] = load [[INTPTR_T:i64]], i64* [[TMP0]], align [[PTRALIGN]]
+// ALL:   [[AP_CUR:%.+]] = load i8*, i8** %va, align [[PTRALIGN]]
 //
 // Vectors are 16-byte aligned, however the O32 ABI has a maximum alignment of
 // 8-bytes since the base of the stack is 8-byte aligned.
-// O32:   [[PTR1:%.+]] = add i32 [[PTR0]], 7
-// O32:   [[PTR2:%.+]] = and i32 [[PTR1]], -8
+// O32:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to i32
+// O32:   [[TMP2:%.+]] = add i32 [[TMP1]], 7
+// O32:   [[TMP3:%.+]] = and i32 [[TMP2]], -8
+// O32:   [[AP_CUR:%.+]] = inttoptr i32 [[TMP3]] to i8*
 //
-// NEW:   [[PTR1:%.+]] = add [[INTPTR_T]] [[PTR0]], 15
-// NEW:   [[PTR2:%.+]] = and [[INTPTR_T]] [[PTR1]], -16
+// NEW:   [[TMP1:%.+]] = ptrtoint i8* [[AP_CUR]] to [[INTPTR_T]]
+// NEW:   [[TMP2:%.+]] = add [[INTPTR_T]] [[TMP1]], 15
+// NEW:   [[TMP3:%.+]] = and [[INTPTR_T]] [[TMP2]], -16
+// NEW:   [[AP_CUR:%.+]] = inttoptr [[INTPTR_T]] [[TMP3]] to i8*
 //
-// ALL:   [[PTR3:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to <4 x i32>*
-// ALL:   [[PTR4:%.+]] = inttoptr [[INTPTR_T]] [[PTR2]] to i8*
-// ALL:   [[AP_NEXT:%.+]] = getelementptr i8, i8* [[PTR4]], [[INTPTR_T]] 16
+// ALL:   [[AP_NEXT:%.+]] = getelementptr inbounds i8, i8* [[AP_CUR]], [[INTPTR_T]] 16
 // ALL:   store i8* [[AP_NEXT]], i8** %va, align [[PTRALIGN]]
-// ALL:   [[PTR5:%.+]] = load <4 x i32>, <4 x i32>* [[PTR3]], align 16
+//
+// ALL:   [[AP_CAST:%.+]] = bitcast i8* [[AP_CUR]] to <4 x i32>*
+// O32:   [[ARG:%.+]] = load <4 x i32>, <4 x i32>* [[AP_CAST]], align 8
+// N64:   [[ARG:%.+]] = load <4 x i32>, <4 x i32>* [[AP_CAST]], align 16
+// ALL:   store <4 x i32> [[ARG]], <4 x i32>* [[V]], align 16
+//
+// ALL:   [[VA1:%.+]] = bitcast i8** %va to i8*
 // ALL:   call void @llvm.va_end(i8* [[VA1]])
-// ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> [[PTR5]], i32 0
+// ALL:   [[VECEXT:%.+]] = extractelement <4 x i32> {{.*}}, i32 0
 // ALL:   ret i32 [[VECEXT]]
 // ALL: }
diff --git a/test/CodeGen/mmx-builtins.c b/test/CodeGen/mmx-builtins.c
index 346676c6b01a..44d1ea4d57e8 100644
--- a/test/CodeGen/mmx-builtins.c
+++ b/test/CodeGen/mmx-builtins.c
@@ -1,453 +1,608 @@
-// REQUIRES: x86-registered-target
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +ssse3 -S -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
 
-// FIXME: Disable inclusion of mm_malloc.h, our current implementation is broken
-// on win32 since we don't generally know how to find errno.h.
+// Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
 
-#include <tmmintrin.h>
+#include <x86intrin.h>
 
-__m64 test1(__m64 a, __m64 b) {
-  // CHECK: phaddw
-  return _mm_hadd_pi16(a, b);
-}
-
-__m64 test2(__m64 a, __m64 b) {
-  // CHECK: phaddd
-  return _mm_hadd_pi32(a, b);
-}
-
-__m64 test3(__m64 a, __m64 b) {
-  // CHECK: phaddsw
-  return _mm_hadds_pi16(a, b);
-}
-
-__m64 test4(__m64 a, __m64 b) {
-  // CHECK: phsubw
-  return _mm_hsub_pi16(a, b);
-}
-
-__m64 test5(__m64 a, __m64 b) {
-  // CHECK: phsubd
-  return _mm_hsub_pi32(a, b);
-}
-
-__m64 test6(__m64 a, __m64 b) {
-  // CHECK: phsubsw
-  return _mm_hsubs_pi16(a, b);
-}
-
-__m64 test7(__m64 a, __m64 b) {
-  // CHECK: pmaddubsw
-  return _mm_maddubs_pi16(a, b);
-}
-
-__m64 test8(__m64 a, __m64 b) {
-  // CHECK: pmulhrsw
-  return _mm_mulhrs_pi16(a, b);
-}
-
-__m64 test9(__m64 a, __m64 b) {
-  // CHECK: pshufb
-  return _mm_shuffle_pi8(a, b);
-}
-
-__m64 test10(__m64 a, __m64 b) {
-  // CHECK: psignb
-  return _mm_sign_pi8(a, b);
-}
-
-__m64 test11(__m64 a, __m64 b) {
-  // CHECK: psignw
-  return _mm_sign_pi16(a, b);
-}
-
-__m64 test12(__m64 a, __m64 b) {
-  // CHECK: psignd
-  return _mm_sign_pi32(a, b);
-}
-
-__m64 test13(__m64 a) {
-  // CHECK: pabsb
+__m64 test_mm_abs_pi8(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.b
   return _mm_abs_pi8(a);
 }
 
-__m64 test14(__m64 a) {
-  // CHECK: pabsw
+__m64 test_mm_abs_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.w
   return _mm_abs_pi16(a);
 }
 
-__m64 test15(__m64 a) {
-  // CHECK: pabsd
+__m64 test_mm_abs_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_abs_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pabs.d
   return _mm_abs_pi32(a);
 }
 
-__m64 test16(__m64 a, __m64 b) {
-  // CHECK: palignr
-  return _mm_alignr_pi8(a, b, 2);
-}
-
-__m64 test17(__m128d a) {
-  // CHECK: cvtpd2pi
-  return _mm_cvtpd_pi32(a);
-}
-
-__m64 test18(__m128d a) {
-  // CHECK: cvttpd2pi
-  return _mm_cvttpd_pi32(a);
-}
-
-__m128d test19(__m64 a) {
-  // CHECK: cvtpi2pd
-  return _mm_cvtpi32_pd(a);
-}
-
-__m64 test20(__m64 a, __m64 b) {
-  // CHECK: pmuludq
-  return _mm_mul_su32(a, b);
-}
-
-__m64 test21(__m64 a) {
-  // CHECK: pshufw
-  return _mm_shuffle_pi16(a, 3);
-}
-
-__m64 test22(__m64 a, __m64 b) {
-  // CHECK: pmulhuw
-  return _mm_mulhi_pu16(a, b);
-}
-
-void test23(__m64 d, __m64 n, char *p) {
-  // CHECK: maskmovq
-  _mm_maskmove_si64(d, n, p);
-}
-
-int test24(__m64 a) {
-  // CHECK: pmovmskb
-  return _mm_movemask_pi8(a);
-}
-
-void test25(__m64 *p, __m64 a) {
-  // CHECK: movntq
-  _mm_stream_pi(p, a);
-}
-
-__m64 test26(__m64 a, __m64 b) {
-  // CHECK: pavgb
-  return _mm_avg_pu8(a, b);
-}
-
-__m64 test27(__m64 a, __m64 b) {
-  // CHECK: pavgw
-  return _mm_avg_pu16(a, b);
-}
-
-__m64 test28(__m64 a, __m64 b) {
-  // CHECK: pmaxub
-  return _mm_max_pu8(a, b);
-}
-
-__m64 test29(__m64 a, __m64 b) {
-  // CHECK: pmaxsw
-  return _mm_max_pi16(a, b);
-}
-
-__m64 test30(__m64 a, __m64 b) {
-  // CHECK: pminub
-  return _mm_min_pu8(a, b);
-}
-
-__m64 test31(__m64 a, __m64 b) {
-  // CHECK: pminsw
-  return _mm_min_pi16(a, b);
-}
-
-__m64 test32(__m64 a, __m64 b) {
-  // CHECK: psadbw
-  return _mm_sad_pu8(a, b);
-}
-
-__m64 test33(__m64 a, __m64 b) {
-  // CHECK: paddb
+__m64 test_mm_add_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.b
   return _mm_add_pi8(a, b);
 }
 
-__m64 test34(__m64 a, __m64 b) {
-  // CHECK: paddw
+__m64 test_mm_add_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.w
   return _mm_add_pi16(a, b);
 }
 
-__m64 test35(__m64 a, __m64 b) {
-  // CHECK: paddd
+__m64 test_mm_add_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.d
   return _mm_add_pi32(a, b);
 }
 
-__m64 test36(__m64 a, __m64 b) {
-  // CHECK: paddq
+__m64 test_mm_add_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_add_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.padd.q
   return __builtin_ia32_paddq(a, b);
 }
 
-__m64 test37(__m64 a, __m64 b) {
-  // CHECK: paddsb
+__m64 test_mm_adds_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.padds.b
   return _mm_adds_pi8(a, b);
 }
 
-__m64 test38(__m64 a, __m64 b) {
-  // CHECK: paddsw
+__m64 test_mm_adds_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.padds.w
   return _mm_adds_pi16(a, b);
 }
 
-__m64 test39(__m64 a, __m64 b) {
-  // CHECK: paddusb
+__m64 test_mm_adds_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.paddus.b
   return _mm_adds_pu8(a, b);
 }
 
-__m64 test40(__m64 a, __m64 b) {
-  // CHECK: paddusw
+__m64 test_mm_adds_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_adds_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.paddus.w
   return _mm_adds_pu16(a, b);
 }
 
-__m64 test41(__m64 a, __m64 b) {
-  // CHECK: psubb
-  return _mm_sub_pi8(a, b);
+__m64 test_mm_alignr_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_alignr_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.palignr.b
+  return _mm_alignr_pi8(a, b, 2);
 }
 
-__m64 test42(__m64 a, __m64 b) {
-  // CHECK: psubw
-  return _mm_sub_pi16(a, b);
-}
-
-__m64 test43(__m64 a, __m64 b) {
-  // CHECK: psubd
-  return _mm_sub_pi32(a, b);
-}
-
-__m64 test44(__m64 a, __m64 b) {
-  // CHECK: psubq
-  return __builtin_ia32_psubq(a, b);
-}
-
-__m64 test45(__m64 a, __m64 b) {
-  // CHECK: psubsb
-  return _mm_subs_pi8(a, b);
-}
-
-__m64 test46(__m64 a, __m64 b) {
-  // CHECK: psubsw
-  return _mm_subs_pi16(a, b);
-}
-
-__m64 test47(__m64 a, __m64 b) {
-  // CHECK: psubusb
-  return _mm_subs_pu8(a, b);
-}
-
-__m64 test48(__m64 a, __m64 b) {
-  // CHECK: psubusw
-  return _mm_subs_pu16(a, b);
-}
-
-__m64 test49(__m64 a, __m64 b) {
-  // CHECK: pmaddwd
-  return _mm_madd_pi16(a, b);
-}
-
-__m64 test50(__m64 a, __m64 b) {
-  // CHECK: pmulhw
-  return _mm_mulhi_pi16(a, b);
-}
-
-__m64 test51(__m64 a, __m64 b) {
-  // CHECK: pmullw
-  return _mm_mullo_pi16(a, b);
-}
-
-__m64 test52(__m64 a, __m64 b) {
-  // CHECK: pmullw
-  return _mm_mullo_pi16(a, b);
-}
-
-__m64 test53(__m64 a, __m64 b) {
-  // CHECK: pand
+__m64 test_mm_and_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_and_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pand
   return _mm_and_si64(a, b);
 }
 
-__m64 test54(__m64 a, __m64 b) {
-  // CHECK: pandn
+__m64 test_mm_andnot_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_andnot_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pandn
   return _mm_andnot_si64(a, b);
 }
 
-__m64 test55(__m64 a, __m64 b) {
-  // CHECK: por
-  return _mm_or_si64(a, b);
-}
-
-__m64 test56(__m64 a, __m64 b) {
-  // CHECK: pxor
-  return _mm_xor_si64(a, b);
-}
-
-__m64 test57(__m64 a, __m64 b) {
-  // CHECK: pavgb
+__m64 test_mm_avg_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_avg_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pavg.b
   return _mm_avg_pu8(a, b);
 }
 
-__m64 test58(__m64 a, __m64 b) {
-  // CHECK: pavgw
+__m64 test_mm_avg_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_avg_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pavg.w
   return _mm_avg_pu16(a, b);
 }
 
-__m64 test59(__m64 a, __m64 b) {
-  // CHECK: psllw
-  return _mm_sll_pi16(a, b);
-}
-
-__m64 test60(__m64 a, __m64 b) {
-  // CHECK: pslld
-  return _mm_sll_pi32(a, b);
-}
-
-__m64 test61(__m64 a, __m64 b) {
-  // CHECK: psllq
-  return _mm_sll_si64(a, b);
-}
-
-__m64 test62(__m64 a, __m64 b) {
-  // CHECK: psrlw
-  return _mm_srl_pi16(a, b);
-}
-
-__m64 test63(__m64 a, __m64 b) {
-  // CHECK: psrld
-  return _mm_srl_pi32(a, b);
-}
-
-__m64 test64(__m64 a, __m64 b) {
-  // CHECK: psrlq
-  return _mm_srl_si64(a, b);
-}
-
-__m64 test65(__m64 a, __m64 b) {
-  // CHECK: psraw
-  return _mm_sra_pi16(a, b);
-}
-
-__m64 test66(__m64 a, __m64 b) {
-  // CHECK: psrad
-  return _mm_sra_pi32(a, b);
-}
-
-__m64 test67(__m64 a) {
-  // CHECK: psllw
-  return _mm_slli_pi16(a, 3);
-}
-
-__m64 test68(__m64 a) {
-  // CHECK: pslld
-  return _mm_slli_pi32(a, 3);
-}
-
-__m64 test69(__m64 a) {
-  // CHECK: psllq
-  return _mm_slli_si64(a, 3);
-}
-
-__m64 test70(__m64 a) {
-  // CHECK: psrlw
-  return _mm_srli_pi16(a, 3);
-}
-
-__m64 test71(__m64 a) {
-  // CHECK: psrld
-  return _mm_srli_pi32(a, 3);
-}
-
-__m64 test72(__m64 a) {
-  // CHECK: psrlq
-  return _mm_srli_si64(a, 3);
-}
-
-__m64 test73(__m64 a) {
-  // CHECK: psraw
-  return _mm_srai_pi16(a, 3);
-}
-
-__m64 test74(__m64 a) {
-  // CHECK: psrad
-  return _mm_srai_pi32(a, 3);
-}
-
-__m64 test75(__m64 a, __m64 b) {
-  // CHECK: packsswb
-  return _mm_packs_pi16(a, b);
-}
-
-__m64 test76(__m64 a, __m64 b) {
-  // CHECK: packssdw
-  return _mm_packs_pi32(a, b);
-}
-
-__m64 test77(__m64 a, __m64 b) {
-  // CHECK: packuswb
-  return _mm_packs_pu16(a, b);
-}
-
-__m64 test78(__m64 a, __m64 b) {
-  // CHECK: punpckhbw
-  return _mm_unpackhi_pi8(a, b);
-}
-
-__m64 test79(__m64 a, __m64 b) {
-  // CHECK: punpckhwd
-  return _mm_unpackhi_pi16(a, b);
-}
-
-__m64 test80(__m64 a, __m64 b) {
-  // CHECK: punpckhdq
-  return _mm_unpackhi_pi32(a, b);
-}
-
-__m64 test81(__m64 a, __m64 b) {
-  // CHECK: punpcklbw
-  return _mm_unpacklo_pi8(a, b);
-}
-
-__m64 test82(__m64 a, __m64 b) {
-  // CHECK: punpcklwd
-  return _mm_unpacklo_pi16(a, b);
-}
-
-__m64 test83(__m64 a, __m64 b) {
-  // CHECK: punpckldq
-  return _mm_unpacklo_pi32(a, b);
-}
-
-__m64 test84(__m64 a, __m64 b) {
-  // CHECK: pcmpeqb
+__m64 test_mm_cmpeq_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.b
   return _mm_cmpeq_pi8(a, b);
 }
 
-__m64 test85(__m64 a, __m64 b) {
-  // CHECK: pcmpeqw
+__m64 test_mm_cmpeq_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.w
   return _mm_cmpeq_pi16(a, b);
 }
 
-__m64 test86(__m64 a, __m64 b) {
-  // CHECK: pcmpeqd
+__m64 test_mm_cmpeq_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpeq_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpeq.d
   return _mm_cmpeq_pi32(a, b);
 }
 
-__m64 test87(__m64 a, __m64 b) {
-  // CHECK: pcmpgtb
+__m64 test_mm_cmpgt_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.b
   return _mm_cmpgt_pi8(a, b);
 }
 
-__m64 test88(__m64 a, __m64 b) {
-  // CHECK: pcmpgtw
+__m64 test_mm_cmpgt_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.w
   return _mm_cmpgt_pi16(a, b);
 }
 
-__m64 test89(__m64 a, __m64 b) {
-  // CHECK: pcmpgtd
+__m64 test_mm_cmpgt_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cmpgt_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pcmpgt.d
   return _mm_cmpgt_pi32(a, b);
 }
+
+__m128 test_mm_cvt_pi2ps(__m128 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvt_pi2ps
+  // CHECK: <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvt_pi2ps(a, b);
+}
+
+__m64 test_mm_cvt_ps2pi(__m128 a) {
+  // CHECK-LABEL: test_mm_cvt_ps2pi
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvt_ps2pi(a);
+}
+
+__m64 test_mm_cvtpd_pi32(__m128d a) {
+  // CHECK-LABEL: test_mm_cvtpd_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtpd2pi
+  return _mm_cvtpd_pi32(a);
+}
+
+__m128 test_mm_cvtpi16_ps(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtpi16_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi16_ps(a);
+}
+
+__m128d test_mm_cvtpi32_pd(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtpi32_pd
+  // CHECK: call <2 x double> @llvm.x86.sse.cvtpi2pd
+  return _mm_cvtpi32_pd(a);
+}
+
+__m128 test_mm_cvtpi32_ps(__m128 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvtpi32_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi32_ps(a, b);
+}
+
+__m128 test_mm_cvtpi32x2_ps(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_cvtpi32x2_ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  // CHECK: call <4 x float> @llvm.x86.sse.cvtpi2ps
+  return _mm_cvtpi32x2_ps(a, b);
+}
+
+__m64 test_mm_cvtps_pi16(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_pi16
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvtps_pi16(a);
+}
+
+__m64 test_mm_cvtps_pi32(__m128 a) {
+  // CHECK-LABEL: test_mm_cvtps_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvtps2pi
+  return _mm_cvtps_pi32(a);
+}
+
+__m64 test_mm_cvtsi32_si64(int a) {
+  // CHECK-LABEL: test_mm_cvtsi32_si64
+  // CHECK: insertelement <2 x i32>
+  return _mm_cvtsi32_si64(a);
+}
+
+int test_mm_cvtsi64_si32(__m64 a) {
+  // CHECK-LABEL: test_mm_cvtsi64_si32
+  // CHECK: extractelement <2 x i32>
+  return _mm_cvtsi64_si32(a);
+}
+
+__m64 test_mm_cvttpd_pi32(__m128d a) {
+  // CHECK-LABEL: test_mm_cvttpd_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvttpd2pi
+  return _mm_cvttpd_pi32(a);
+}
+
+__m64 test_mm_cvttps_pi32(__m128 a) {
+  // CHECK-LABEL: test_mm_cvttps_pi32
+  // CHECK: call x86_mmx @llvm.x86.sse.cvttps2pi
+  return _mm_cvttps_pi32(a);
+}
+
+__m64 test_m_from_int(int a) {
+  // CHECK-LABEL: test_m_from_int
+  // CHECK: insertelement <2 x i32>
+  return _m_from_int(a);
+}
+
+__m64 test_m_from_int64(long long a) {
+  // CHECK-LABEL: test_m_from_int64
+  // CHECK: bitcast
+  return _m_from_int64(a);
+}
+
+__m64 test_mm_hadd_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadd_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.w
+  return _mm_hadd_pi16(a, b);
+}
+
+__m64 test_mm_hadd_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadd_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.d
+  return _mm_hadd_pi32(a, b);
+}
+
+__m64 test_mm_hadds_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hadds_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phadd.sw
+  return _mm_hadds_pi16(a, b);
+}
+
+__m64 test_mm_hsub_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsub_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.w
+  return _mm_hsub_pi16(a, b);
+}
+
+__m64 test_mm_hsub_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsub_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.d
+  return _mm_hsub_pi32(a, b);
+}
+
+__m64 test_mm_hsubs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_hsubs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.phsub.sw
+  return _mm_hsubs_pi16(a, b);
+}
+
+__m64 test_mm_madd_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_madd_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmadd.wd
+  return _mm_madd_pi16(a, b);
+}
+
+__m64 test_mm_maddubs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_maddubs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw
+  return _mm_maddubs_pi16(a, b);
+}
+
+void test_mm_maskmove_si64(__m64 d, __m64 n, char *p) {
+  // CHECK-LABEL: test_mm_maskmove_si64
+  // CHECK: call void @llvm.x86.mmx.maskmovq
+  _mm_maskmove_si64(d, n, p);
+}
+
+__m64 test_mm_max_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_max_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmaxs.w
+  return _mm_max_pi16(a, b);
+}
+
+__m64 test_mm_max_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_max_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmaxu.b
+  return _mm_max_pu8(a, b);
+}
+
+__m64 test_mm_min_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_min_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmins.w
+  return _mm_min_pi16(a, b);
+}
+
+__m64 test_mm_min_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_min_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.pminu.b
+  return _mm_min_pu8(a, b);
+}
+
+int test_mm_movemask_pi8(__m64 a) {
+  // CHECK-LABEL: test_mm_movemask_pi8
+  // CHECK: call i32 @llvm.x86.mmx.pmovmskb
+  return _mm_movemask_pi8(a);
+}
+
+__m64 test_mm_mul_su32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mul_su32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulu.dq
+  return _mm_mul_su32(a, b);
+}
+
+__m64 test_mm_mulhi_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhi_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulh.w
+  return _mm_mulhi_pi16(a, b);
+}
+
+__m64 test_mm_mulhi_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhi_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmulhu.w
+  return _mm_mulhi_pu16(a, b);
+}
+
+__m64 test_mm_mulhrs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mulhrs_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pmul.hr.sw
+  return _mm_mulhrs_pi16(a, b);
+}
+
+__m64 test_mm_mullo_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_mullo_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pmull.w
+  return _mm_mullo_pi16(a, b);
+}
+
+__m64 test_mm_or_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_or_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.por
+  return _mm_or_si64(a, b);
+}
+
+__m64 test_mm_packs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.packsswb
+  return _mm_packs_pi16(a, b);
+}
+
+__m64 test_mm_packs_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.packssdw
+  return _mm_packs_pi32(a, b);
+}
+
+__m64 test_mm_packs_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_packs_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.packuswb
+  return _mm_packs_pu16(a, b);
+}
+
+__m64 test_mm_sad_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sad_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psad.bw
+  return _mm_sad_pu8(a, b);
+}
+
+__m64 test_mm_shuffle_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_shuffle_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.pshuf.b
+  return _mm_shuffle_pi8(a, b);
+}
+
+__m64 test_mm_shuffle_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_shuffle_pi16
+  // CHECK: call x86_mmx @llvm.x86.sse.pshuf.w
+  return _mm_shuffle_pi16(a, 3);
+}
+
+__m64 test_mm_sign_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi8
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.b
+  return _mm_sign_pi8(a, b);
+}
+
+__m64 test_mm_sign_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi16
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.w
+  return _mm_sign_pi16(a, b);
+}
+
+__m64 test_mm_sign_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sign_pi32
+  // CHECK: call x86_mmx @llvm.x86.ssse3.psign.d
+  return _mm_sign_pi32(a, b);
+}
+
+__m64 test_mm_sll_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.w
+  return _mm_sll_pi16(a, b);
+}
+
+__m64 test_mm_sll_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.d
+  return _mm_sll_pi32(a, b);
+}
+
+__m64 test_mm_sll_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sll_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psll.q
+  return _mm_sll_si64(a, b);
+}
+
+__m64 test_mm_slli_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.w
+  return _mm_slli_pi16(a, 3);
+}
+
+__m64 test_mm_slli_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.d
+  return _mm_slli_pi32(a, 3);
+}
+
+__m64 test_mm_slli_si64(__m64 a) {
+  // CHECK-LABEL: test_mm_slli_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pslli.q
+  return _mm_slli_si64(a, 3);
+}
+
+__m64 test_mm_sra_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sra_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psra.w
+  return _mm_sra_pi16(a, b);
+}
+
+__m64 test_mm_sra_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sra_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psra.d
+  return _mm_sra_pi32(a, b);
+}
+
+__m64 test_mm_srai_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_srai_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrai.w
+  return _mm_srai_pi16(a, 3);
+}
+
+__m64 test_mm_srai_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_srai_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrai.d
+  return _mm_srai_pi32(a, 3);
+}
+
+__m64 test_mm_srl_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.w
+  return _mm_srl_pi16(a, b);
+}
+
+__m64 test_mm_srl_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.d
+  return _mm_srl_pi32(a, b);
+}
+
+__m64 test_mm_srl_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_srl_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrl.q
+  return _mm_srl_si64(a, b);
+}
+
+__m64 test_mm_srli_pi16(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.w
+  return _mm_srli_pi16(a, 3);
+}
+
+__m64 test_mm_srli_pi32(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.d
+  return _mm_srli_pi32(a, 3);
+}
+
+__m64 test_mm_srli_si64(__m64 a) {
+  // CHECK-LABEL: test_mm_srli_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psrli.q
+  return _mm_srli_si64(a, 3);
+}
+
+void test_mm_stream_pi(__m64 *p, __m64 a) {
+  // CHECK-LABEL: test_mm_stream_pi
+  // CHECK: call void @llvm.x86.mmx.movnt.dq
+  _mm_stream_pi(p, a);
+}
+
+__m64 test_mm_sub_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.b
+  return _mm_sub_pi8(a, b);
+}
+
+__m64 test_mm_sub_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.w
+  return _mm_sub_pi16(a, b);
+}
+
+__m64 test_mm_sub_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.d
+  return _mm_sub_pi32(a, b);
+}
+
+__m64 test_mm_sub_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_sub_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.psub.q
+  return __builtin_ia32_psubq(a, b);
+}
+
+__m64 test_mm_subs_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubs.b
+  return _mm_subs_pi8(a, b);
+}
+
+__m64 test_mm_subs_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubs.w
+  return _mm_subs_pi16(a, b);
+}
+
+__m64 test_mm_subs_pu8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pu8
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubus.b
+  return _mm_subs_pu8(a, b);
+}
+
+__m64 test_mm_subs_pu16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_subs_pu16
+  // CHECK: call x86_mmx @llvm.x86.mmx.psubus.w
+  return _mm_subs_pu16(a, b);
+}
+
+int test_m_to_int(__m64 a) {
+  // CHECK-LABEL: test_m_to_int
+  // CHECK: extractelement <2 x i32>
+  return _m_to_int(a);
+}
+
+long long test_m_to_int64(__m64 a) {
+  // CHECK-LABEL: test_m_to_int64
+  // CHECK: bitcast
+  return _m_to_int64(a);
+}
+
+__m64 test_mm_unpackhi_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhbw
+  return _mm_unpackhi_pi8(a, b);
+}
+
+__m64 test_mm_unpackhi_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhwd
+  return _mm_unpackhi_pi16(a, b);
+}
+
+__m64 test_mm_unpackhi_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpackhi_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckhdq
+  return _mm_unpackhi_pi32(a, b);
+}
+
+__m64 test_mm_unpacklo_pi8(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi8
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpcklbw
+  return _mm_unpacklo_pi8(a, b);
+}
+
+__m64 test_mm_unpacklo_pi16(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi16
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpcklwd
+  return _mm_unpacklo_pi16(a, b);
+}
+
+__m64 test_mm_unpacklo_pi32(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_unpacklo_pi32
+  // CHECK: call x86_mmx @llvm.x86.mmx.punpckldq
+  return _mm_unpacklo_pi32(a, b);
+}
+
+__m64 test_mm_xor_si64(__m64 a, __m64 b) {
+  // CHECK-LABEL: test_mm_xor_si64
+  // CHECK: call x86_mmx @llvm.x86.mmx.pxor
+  return _mm_xor_si64(a, b);
+}
diff --git a/test/CodeGen/ms-declspecs.c b/test/CodeGen/ms-declspecs.c
index c32733e65ff6..91f5aa210485 100644
--- a/test/CodeGen/ms-declspecs.c
+++ b/test/CodeGen/ms-declspecs.c
@@ -33,7 +33,12 @@ __declspec(noinline) void t2() {}
 __declspec(noreturn) void f20_t(void);
 void f20(void) { f20_t(); }
 
+__declspec(noalias) void noalias_callee(int *x);
+// CHECK: call void @noalias_callee({{.*}}) [[NA:#[0-9]+]]
+void noalias_caller(int *x) { noalias_callee(x); }
+
 // CHECK: attributes [[NAKED]] = { naked noinline nounwind{{.*}} }
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
 // CHECK: attributes [[NI]] = { noinline nounwind{{.*}} }
 // CHECK: attributes [[NR]] = { noreturn }
+// CHECK: attributes [[NA]] = { argmemonly nounwind{{.*}} }
diff --git a/test/CodeGen/ms-inline-asm-align.c b/test/CodeGen/ms-inline-asm-align.c
new file mode 100644
index 000000000000..de896b8e60d6
--- /dev/null
+++ b/test/CodeGen/ms-inline-asm-align.c
@@ -0,0 +1,30 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s --check-prefix=DARWIN
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fasm-blocks -emit-llvm -o - | FileCheck %s --check-prefix=WINDOWS
+
+// On Windows, .align is in bytes, and on Darwin, .align is in log2 form. The
+// Intel inline assembly parser should rewrite to the appropriate form depending
+// on the platform.
+
+void align_test() {
+  __asm align 8
+  __asm align 16;
+  __asm align 128;
+  __asm ALIGN 256;
+}
+
+// DARWIN-LABEL: define void @align_test()
+// DARWIN: call void asm sideeffect inteldialect
+// DARWIN-SAME: .align 3
+// DARWIN-SAME: .align 4
+// DARWIN-SAME: .align 7
+// DARWIN-SAME: .align 8
+// DARWIN-SAME: "~{dirflag},~{fpsr},~{flags}"()
+
+// WINDOWS-LABEL: define void @align_test()
+// WINDOWS: call void asm sideeffect inteldialect
+// WINDOWS-SAME: .align 8
+// WINDOWS-SAME: .align 16
+// WINDOWS-SAME: .align 128
+// WINDOWS-SAME: .align 256
+// WINDOWS-SAME: "~{dirflag},~{fpsr},~{flags}"()
diff --git a/test/CodeGen/ms-inline-asm.c b/test/CodeGen/ms-inline-asm.c
index d98b498a36fa..2f5de676c72f 100644
--- a/test/CodeGen/ms-inline-asm.c
+++ b/test/CodeGen/ms-inline-asm.c
@@ -470,6 +470,18 @@ typedef struct {
   int b;
 } A;
 
+typedef struct {
+  int b1;
+  A   b2;
+} B;
+
+typedef struct {
+  int c1;
+  A   c2;
+  int c3;
+  B   c4;
+} C;
+
 void t39() {
 // CHECK-LABEL: define void @t39
   __asm mov eax, [eax].A.b
@@ -478,6 +490,14 @@ void t39() {
 // CHECK: mov eax, [eax] .4
   __asm mov eax, fs:[0] A.b
 // CHECK: mov eax, fs:[$$0] .4
+  __asm mov eax, [eax].B.b2.a
+// CHECK: mov eax, [eax].4
+  __asm mov eax, [eax] B.b2.b
+// CHECK: mov eax, [eax] .8
+  __asm mov eax, fs:[0] C.c2.b
+// CHECK: mov eax, fs:[$$0] .8
+  __asm mov eax, [eax]C.c4.b2.b
+// CHECK: mov eax, [eax].24
 // CHECK: "~{eax},~{dirflag},~{fpsr},~{flags}"()
 }
 
@@ -508,6 +528,46 @@ void t41(unsigned short a) {
 // CHECK: "*m,*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"(i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}}, i16* {{.*}})
 }
 
+void t42() {
+// CHECK-LABEL: define void @t42
+  int flags;
+  __asm mov flags, eax
+// CHECK: mov dword ptr $0, eax
+// CHECK: "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %flags)
+}
+
+void t43() {
+// CHECK-LABEL: define void @t43
+  C strct;
+// Work around PR20368: These should be single line blocks
+ __asm { mov eax, 4[strct.c1] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$4$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 4[strct.c3 + 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$8$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 8[strct.c2.a + 4 + 32*2 - 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$72$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 12[4 + strct.c2.b] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$16$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 4[4 + strct.c4.b2.b + 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$12$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 4[64 + strct.c1 + (2*32)] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$132$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, 4[64 + strct.c2.a - 2*32] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$4$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [strct.c4.b1 + 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$4$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [strct.c4.b2.a + 4 + 32*2 - 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$64$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [4 + strct.c1] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$4$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [4 + strct.c2.b + 4] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$8$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [64 + strct.c3 + (2*32)] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $$128$0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+  __asm { mov eax, [64 + strct.c4.b2.b - 2*32] }
+// CHECK: call void asm sideeffect inteldialect "mov eax, dword ptr $0", "*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i32* %{{.*}})
+}
+
 void call_clobber() {
   __asm call t41
   // CHECK-LABEL: define void @call_clobber
@@ -525,8 +585,8 @@ void label1() {
     label:
     jmp label
   }
-  // CHECK-LABEL: define void @label1
-  // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.1__label:\0A\09jmp {{.*}}__MSASMLABEL_.1__label", "~{dirflag},~{fpsr},~{flags}"()
+  // CHECK-LABEL: define void @label1()
+  // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.1__label:\0A\09jmp {{.*}}__MSASMLABEL_.1__label", "~{dirflag},~{fpsr},~{flags}"() [[ATTR1:#[0-9]+]]
 }
 
 void label2() {
@@ -555,3 +615,33 @@ void label4() {
   // CHECK-LABEL: define void @label4
   // CHECK: call void asm sideeffect inteldialect "{{.*}}__MSASMLABEL_.4__label:\0A\09mov eax, {{.*}}__MSASMLABEL_.4__label", "~{eax},~{dirflag},~{fpsr},~{flags}"()
 }
+
+void label5() {
+  __asm {
+    jmp dollar_label$
+    dollar_label$:
+  }
+  // CHECK-LABEL: define void @label5
+  // CHECK: call void asm sideeffect inteldialect "jmp {{.*}}__MSASMLABEL_.5__dollar_label$$\0A\09{{.*}}__MSASMLABEL_.5__dollar_label$$:", "~{dirflag},~{fpsr},~{flags}"()
+}
+
+typedef union _LARGE_INTEGER {
+  struct {
+    unsigned int LowPart;
+    unsigned int  HighPart;
+  };
+  struct {
+    unsigned int LowPart;
+    unsigned int  HighPart;
+  } u;
+  unsigned long long QuadPart;
+} LARGE_INTEGER, *PLARGE_INTEGER;
+
+int test_indirect_field(LARGE_INTEGER LargeInteger) {
+    __asm mov     eax, LargeInteger.LowPart
+}
+// CHECK-LABEL: define i32 @test_indirect_field(
+// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $1",
+
+// MS ASM containing labels must not be duplicated (PR23715).
+// CHECK: attributes [[ATTR1]] = { {{.*}}noduplicate{{.*}} }
diff --git a/test/CodeGen/ms-mm-align.c b/test/CodeGen/ms-mm-align.c
new file mode 100644
index 000000000000..ae8e98086a76
--- /dev/null
+++ b/test/CodeGen/ms-mm-align.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
+// RUN:         -triple i686--windows -emit-llvm %s -o - \
+// RUN:         | FileCheck %s -check-prefix CHECK
+
+// Intrin.h needs size_t, but -ffreestanding prevents us from getting it from
+// stddef.h.  Work around it with this typedef.
+typedef __SIZE_TYPE__ size_t;
+#include <Intrin.h>
+
+void capture_ptr(int* i);
+void test_mm_align16(int p) {
+  _MM_ALIGN16 int i;
+  capture_ptr(&i);
+}
+
+// CHECK: alloca i32, align 16
diff --git a/test/CodeGen/ms_abi.c b/test/CodeGen/ms_abi.c
index 7c5c26fc41c2..2cca24901b9a 100644
--- a/test/CodeGen/ms_abi.c
+++ b/test/CodeGen/ms_abi.c
@@ -1,20 +1,145 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 -emit-llvm < %s | FileCheck -check-prefix=FREEBSD %s
 // RUN: %clang_cc1 -triple x86_64-pc-win32 -emit-llvm < %s | FileCheck -check-prefix=WIN64 %s
 
+struct foo {
+  int x;
+  float y;
+  char z;
+};
+// FREEBSD: %[[STRUCT_FOO:.*]] = type { i32, float, i8 }
+// WIN64: %[[STRUCT_FOO:.*]] = type { i32, float, i8 }
+
 void __attribute__((ms_abi)) f1(void);
 void __attribute__((sysv_abi)) f2(void);
 void f3(void) {
-// FREEBSD: define void @f3()
-// WIN64: define void @f3()
+  // FREEBSD-LABEL: define void @f3()
+  // WIN64-LABEL: define void @f3()
   f1();
-// FREEBSD: call x86_64_win64cc void @f1()
-// WIN64: call void @f1()
+  // FREEBSD: call x86_64_win64cc void @f1()
+  // WIN64: call void @f1()
   f2();
-// FREEBSD: call void @f2()
-// WIN64: call x86_64_sysvcc void @f2()
+  // FREEBSD: call void @f2()
+  // WIN64: call x86_64_sysvcc void @f2()
 }
 // FREEBSD: declare x86_64_win64cc void @f1()
 // FREEBSD: declare void @f2()
 // WIN64: declare void @f1()
 // WIN64: declare x86_64_sysvcc void @f2()
 
+// Win64 ABI varargs
+void __attribute__((ms_abi)) f4(int a, ...) {
+  // FREEBSD-LABEL: define x86_64_win64cc void @f4
+  // WIN64-LABEL: define void @f4
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a);
+  // FREEBSD: %[[AP:.*]] = alloca i8*
+  // FREEBSD: call void @llvm.va_start
+  // WIN64: %[[AP:.*]] = alloca i8*
+  // WIN64: call void @llvm.va_start
+  int b = __builtin_va_arg(ap, int);
+  // FREEBSD: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // FREEBSD: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // FREEBSD: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_ms_va_list ap2;
+  __builtin_ms_va_copy(ap2, ap);
+  // FREEBSD: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  // WIN64: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  __builtin_ms_va_end(ap);
+  // FREEBSD: call void @llvm.va_end
+  // WIN64: call void @llvm.va_end
+}
+
+// Let's verify that normal va_lists work right on Win64, too.
+void f5(int a, ...) {
+  // WIN64-LABEL: define void @f5
+  __builtin_va_list ap;
+  __builtin_va_start(ap, a);
+  // WIN64: %[[AP:.*]] = alloca i8*
+  // WIN64: call void @llvm.va_start
+  int b = __builtin_va_arg(ap, int);
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_va_list ap2;
+  __builtin_va_copy(ap2, ap);
+  // WIN64: call void @llvm.va_copy
+  __builtin_va_end(ap);
+  // WIN64: call void @llvm.va_end
+}
+
+// Verify that using a Win64 va_list from a System V function works.
+void __attribute__((sysv_abi)) f6(__builtin_ms_va_list ap) {
+  // FREEBSD-LABEL: define void @f6
+  // FREEBSD: store i8* %ap, i8** %[[AP:.*]]
+  // WIN64-LABEL: define x86_64_sysvcc void @f6
+  // WIN64: store i8* %ap, i8** %[[AP:.*]]
+  int b = __builtin_va_arg(ap, int);
+  // FREEBSD: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  // WIN64: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 8
+  // WIN64-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR]] to i32*
+  double _Complex c = __builtin_va_arg(ap, double _Complex);
+  // FREEBSD: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  // WIN64: %[[AP_CUR2:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT2:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR2]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT2]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR2]] to { double, double }*
+  struct foo d = __builtin_va_arg(ap, struct foo);
+  // FREEBSD: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // FREEBSD-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // FREEBSD-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  // WIN64: %[[AP_CUR3:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: %[[AP_NEXT3:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR3]], i64 16
+  // WIN64-NEXT: store i8* %[[AP_NEXT3]], i8** %[[AP]]
+  // WIN64-NEXT: bitcast i8* %[[AP_CUR3]] to %[[STRUCT_FOO]]*
+  __builtin_ms_va_list ap2;
+  __builtin_ms_va_copy(ap2, ap);
+  // FREEBSD: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // FREEBSD-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+  // WIN64: %[[AP_VAL:.*]] = load i8*, i8** %[[AP]]
+  // WIN64-NEXT: store i8* %[[AP_VAL]], i8** %[[AP2:.*]]
+}
diff --git a/test/CodeGen/ms_struct-pack.c b/test/CodeGen/ms_struct-pack.c
index 6486f2975f1f..6382f3b90a35 100644
--- a/test/CodeGen/ms_struct-pack.c
+++ b/test/CodeGen/ms_struct-pack.c
@@ -133,12 +133,12 @@ struct test0 {
   unsigned long e : 1;
 } __attribute__((__ms_struct__));
 
-// CHECK:      Type: struct test0
-// CHECK-NEXT: Record:
-// CHECK-NEXT: Layout:
-// CHECK-NEXT:   Size:64
-// CHECK-NEXT:   DataSize:64
-// CHECK-NEXT:   Alignment:16
-// CHECK-NEXT:   FieldOffsets: [0, 8, 16, 32, 42]>
+// CHECK:             0 | struct test0
+// CHECK-NEXT:    0:0-7 |   unsigned long a
+// CHECK-NEXT:    1:0-7 |   unsigned long b
+// CHECK-NEXT:    2:0-7 |   unsigned long c
+// CHECK-NEXT:    4:0-9 |   unsigned long d
+// CHECK-NEXT:    5:2-2 |   unsigned long e
+// CHECK-NEXT:          | [sizeof=8, align=2]
 
 static int test0[(sizeof(struct test0) == 8) ? 1 : -1];
diff --git a/test/CodeGen/ms_this.cpp b/test/CodeGen/ms_this.cpp
new file mode 100644
index 000000000000..8647a5bc8b8b
--- /dev/null
+++ b/test/CodeGen/ms_this.cpp
@@ -0,0 +1,57 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang_cc1 -triple x86_64-pc-win32 -fasm-blocks -emit-llvm %s -o - | FileCheck %s
+class t1 {
+public:
+  double a;
+  void runc();
+};
+
+class t2 {
+public:
+  double a;
+  void runc();
+};
+
+// CHECK: define void @"\01?runc@t2@@
+void t2::runc() {
+  double num = 0;
+  __asm {
+      mov rax,[this]
+      // CHECK: [[THIS_ADDR_T2:%.+]] = alloca %class.t2*
+      // CHECK: [[THIS1_T2:%.+]] = load %class.t2*, %class.t2** [[THIS_ADDR_T2]],
+      // CHECK: call void asm sideeffect inteldialect "mov rax,qword ptr $1{{.*}}%class.t2* [[THIS1_T2]]
+      mov rbx,[rax]
+      mov num, rbx
+	   };
+}
+
+// CHECK: define void @"\01?runc@t1@@
+void t1::runc() {
+  double num = 0;
+  __asm {
+       mov rax,[this]
+       // CHECK: [[THIS_ADDR_T1:%.+]] = alloca %class.t1*
+       // CHECK: [[THIS1_T1:%.+]] = load %class.t1*, %class.t1** [[THIS_ADDR_T1]],
+       // CHECK: call void asm sideeffect inteldialect "mov rax,qword ptr $1{{.*}}%class.t1* [[THIS1_T1]]
+        mov rbx,[rax]
+        mov num, rbx
+	   };
+}
+
+struct s {
+  int a;
+  // CHECK: define linkonce_odr void @"\01?func@s@@
+  void func() {
+    __asm mov rax, [this]
+    // CHECK: [[THIS_ADDR_S:%.+]] = alloca %struct.s*
+    // CHECK: [[THIS1_S:%.+]] = load %struct.s*, %struct.s** [[THIS_ADDR_S]],
+    // CHECK: call void asm sideeffect inteldialect "mov rax, qword ptr $0{{.*}}%struct.s* [[THIS1_S]]
+  }
+} f3;
+
+int main() {
+  f3.func();
+  f3.a=1;
+  return 0;
+}
diff --git a/test/CodeGen/mult-alt-x86.c b/test/CodeGen/mult-alt-x86.c
index 4e2a69d85bdd..c74c2841b957 100644
--- a/test/CodeGen/mult-alt-x86.c
+++ b/test/CodeGen/mult-alt-x86.c
@@ -110,9 +110,9 @@ void single_x()
 }
 
 // CHECK: @single_Y
-void single_Y0()
+void single_Y()
 {
-  // Y constraint currently broken.
+  // 'Y' constraint currently broken.
   //asm("foo %1,%0" : "=Y0" (mout0) : "Y0" (min1));
   //asm("foo %1,%0" : "=Yz" (mout0) : "Yz" (min1));
   //asm("foo %1,%0" : "=Yt" (mout0) : "Yt" (min1));
@@ -144,8 +144,12 @@ void single_K()
 // CHECK: @single_L
 void single_L()
 {
-  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 1)
-  asm("foo %1,%0" : "=m" (mout0) : "L" (1));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 255)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xff));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 65535)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xffff));
+  // CHECK: asm "foo $1,$0", "=*m,L[[CLOBBERS]](i32* @mout0, i32 -1)
+  asm("foo %1,%0" : "=m" (mout0) : "L" (0xffffffff));
 }
 
 // CHECK: @single_M
diff --git a/test/CodeGen/named_reg_global.c b/test/CodeGen/named_reg_global.c
index 8f9a9c685d1f..1da625746891 100644
--- a/test/CodeGen/named_reg_global.c
+++ b/test/CodeGen/named_reg_global.c
@@ -1,16 +1,26 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple arm64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple armv7-linux-gnu -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-X86-64
+// RUN: %clang_cc1 -triple arm64-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
+// RUN: %clang_cc1 -triple armv7-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM
 
 // CHECK-NOT: @sp = common global
+
+#if defined(__x86_64__)
+register unsigned long current_stack_pointer asm("rsp");
+#else
 register unsigned long current_stack_pointer asm("sp");
+#endif
+
 struct p4_Thread {
   struct {
     int len;
   } word;
 };
 // Testing pointer types as well
+#if defined(__x86_64__)
+register struct p4_Thread *p4TH asm("rsp");
+#else
 register struct p4_Thread *p4TH asm("sp");
+#endif
 
 // CHECK: define{{.*}} i[[bits:[0-9]+]] @get_stack_pointer_addr()
 // CHECK: [[ret:%[0-9]+]] = call i[[bits]] @llvm.read_register.i[[bits]](metadata !0)
@@ -43,5 +53,7 @@ void fn2(struct p4_Thread *val) {
 // CHECK: %[[regw:[0-9]+]] = ptrtoint %struct.p4_Thread* %{{.*}} to i[[bits]]
 // CHECK: call void @llvm.write_register.i[[bits]](metadata !0, i[[bits]] %[[regw]])
 
-// CHECK: !llvm.named.register.sp = !{!0}
-// CHECK: !0 = !{!"sp"}
+// CHECK-X86-64: !llvm.named.register.rsp = !{!0}
+// CHECK-X86-64: !0 = !{!"rsp"}
+// CHECK-ARM: !llvm.named.register.sp = !{!0}
+// CHECK-ARM: !0 = !{!"sp"}
diff --git a/test/CodeGen/nvptx-abi.c b/test/CodeGen/nvptx-abi.c
index 58ad6a17f242..7973bf06533a 100644
--- a/test/CodeGen/nvptx-abi.c
+++ b/test/CodeGen/nvptx-abi.c
@@ -21,14 +21,14 @@ float bar(void) {
 
 void foo(float4_t x) {
 // CHECK-LABEL: @foo
-// CHECK: %struct.float4_s* byval %x
+// CHECK: %struct.float4_s* byval align 4 %x
 }
 
 void fooN(float4_t x, float4_t y, float4_t z) {
 // CHECK-LABEL: @fooN
-// CHECK: %struct.float4_s* byval %x
-// CHECK: %struct.float4_s* byval %y
-// CHECK: %struct.float4_s* byval %z
+// CHECK: %struct.float4_s* byval align 4 %x
+// CHECK: %struct.float4_s* byval align 4 %y
+// CHECK: %struct.float4_s* byval align 4 %z
 }
 
 typedef struct nested_s {
@@ -39,5 +39,5 @@ typedef struct nested_s {
 
 void baz(nested_t x) {
 // CHECK-LABEL: @baz
-// CHECK: %struct.nested_s* byval %x)
+// CHECK: %struct.nested_s* byval align 8 %x)
 }
diff --git a/test/CodeGen/nvptx-inlineasm-ptx.c b/test/CodeGen/nvptx-inlineasm-ptx.c
index 0a19123ee074..e5345d93de06 100644
--- a/test/CodeGen/nvptx-inlineasm-ptx.c
+++ b/test/CodeGen/nvptx-inlineasm-ptx.c
@@ -8,8 +8,8 @@ void constraints() {
   unsigned short us;
   int            i;
   unsigned int   ui;
-  long           l;
-  unsigned long  ul;
+  long long      ll;
+  unsigned long long ull;
   float          f;
   double         d;
 
@@ -29,9 +29,9 @@ void constraints() {
   asm volatile ("mov.b32 %0, %1;" : "=r"(ui) : "r"(ui));
 
   // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
-  asm volatile ("mov.b64 %0, %1;" : "=l"(l) : "l"(l));
+  asm volatile ("mov.b64 %0, %1;" : "=l"(ll) : "l"(ll));
   // CHECK: i64 asm sideeffect "mov.b64 $0, $1;", "=l,l"
-  asm volatile ("mov.b64 %0, %1;" : "=l"(ul) : "l"(ul));
+  asm volatile ("mov.b64 %0, %1;" : "=l"(ull) : "l"(ull));
 
   // CHECK: float asm sideeffect "mov.b32 $0, $1;", "=f,f"
   asm volatile ("mov.b32 %0, %1;" : "=f"(f) : "f"(f));
diff --git a/test/CodeGen/object-size.c b/test/CodeGen/object-size.c
index 3fa038a0943b..610e54150d80 100644
--- a/test/CodeGen/object-size.c
+++ b/test/CodeGen/object-size.c
@@ -15,7 +15,7 @@ int gi, gj;
 
 // CHECK-LABEL: define void @test1
 void test1() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 4), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 59)
   strcpy(&gbuf[4], "Hi there");
 }
 
@@ -33,7 +33,7 @@ void test3() {
 
 // CHECK-LABEL: define void @test4
 void test4() {
-  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i32 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
+  // CHECK:     = call i8* @__strcpy_chk(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @gbuf, i64 0, i64 -1), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str, i32 0, i32 0), i64 0)
   strcpy((char*)(void*)&gbuf[-1], "Hi there");
 }
 
@@ -127,7 +127,7 @@ void test16() {
   strcpy(gp += 1, "Hi there");
 }
 
-// CHECK: @test17
+// CHECK-LABEL: @test17
 void test17() {
   // CHECK: store i32 -1
   gi = __builtin_object_size(gp++, 0);
@@ -139,10 +139,381 @@ void test17() {
   gi = __builtin_object_size(gp++, 3);
 }
 
-// CHECK: @test18
+// CHECK-LABEL: @test18
 unsigned test18(int cond) {
   int a[4], b[4];
   // CHECK: phi i32*
   // CHECK: call i64 @llvm.objectsize.i64
   return __builtin_object_size(cond ? a : b, 0);
 }
+
+// CHECK-LABEL: @test19
+void test19() {
+  struct {
+    int a, b;
+  } foo;
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&foo.a, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.a, 1);
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&foo.a, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.a, 3);
+
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 1);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&foo.b, 3);
+}
+
+// CHECK-LABEL: @test20
+void test20() {
+  struct { int t[10]; } t[10];
+
+  // CHECK: store i32 380
+  gi = __builtin_object_size(&t[0].t[5], 0);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&t[0].t[5], 1);
+  // CHECK: store i32 380
+  gi = __builtin_object_size(&t[0].t[5], 2);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&t[0].t[5], 3);
+}
+
+// CHECK-LABEL: @test21
+void test21() {
+  struct { int t; } t;
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t + 1, 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t.t + 1, 3);
+}
+
+// CHECK-LABEL: @test22
+void test22() {
+  struct { int t[10]; } t[10];
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[10], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[9].t[10], 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[0] + sizeof(t), 3);
+
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 0);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 1);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size((char*)&t[9].t[0] + 10*sizeof(t[0].t), 3);
+}
+
+struct Test23Ty { int a; int t[10]; };
+
+// CHECK-LABEL: @test23
+void test23(struct Test23Ty *p) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(p, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(p, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(p, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(p, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&p->a, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&p->a, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&p->a, 2);
+  // CHECK: store i32 4
+  gi = __builtin_object_size(&p->a, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&p->t[5], 0);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&p->t[5], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&p->t[5], 2);
+  // CHECK: store i32 20
+  gi = __builtin_object_size(&p->t[5], 3);
+}
+
+// PR24493 -- ICE if __builtin_object_size called with NULL and (Type & 1) != 0
+// CHECK-LABEL: @test24
+void test24() {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0, 3);
+}
+
+// CHECK-LABEL: @test25
+void test25() {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0x1000, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0x1000, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0 + 0x1000, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size((void*)0 + 0x1000, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size((void*)0 + 0x1000, 2);
+  // Note: Currently fixed at zero because LLVM can't handle type=3 correctly.
+  // Hopefully will be lowered properly in the future.
+  // CHECK: store i32 0
+  gi = __builtin_object_size((void*)0 + 0x1000, 3);
+}
+
+// CHECK-LABEL: @test26
+void test26() {
+  struct { int v[10]; } t[10];
+
+  // CHECK: store i32 316
+  gi = __builtin_object_size(&t[1].v[11], 0);
+  // CHECK: store i32 312
+  gi = __builtin_object_size(&t[1].v[12], 1);
+  // CHECK: store i32 308
+  gi = __builtin_object_size(&t[1].v[13], 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&t[1].v[14], 3);
+}
+
+struct Test27IncompleteTy;
+
+// CHECK-LABEL: @test27
+void test27(struct Test27IncompleteTy *t) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(t, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(t, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(t, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(t, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size(&test27, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 false)
+  gi = __builtin_object_size(&test27, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* {{.*}}, i1 true)
+  gi = __builtin_object_size(&test27, 2);
+  // Note: this is currently fixed at 0 because LLVM doesn't have sufficient
+  // data to correctly handle type=3
+  // CHECK: store i32 0
+  gi = __builtin_object_size(&test27, 3);
+}
+
+// The intent of this test is to ensure that __builtin_object_size treats `&foo`
+// and `(T*)&foo` identically, when used as the pointer argument.
+// CHECK-LABEL: @test28
+void test28() {
+  struct { int v[10]; } t[10];
+
+#define addCasts(s) ((char*)((short*)(s)))
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 0);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 1);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 2);
+  // CHECK: store i32 360
+  gi = __builtin_object_size(addCasts(&t[1]), 3);
+
+  // CHECK: store i32 356
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 0);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 1);
+  // CHECK: store i32 356
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 2);
+  // CHECK: store i32 36
+  gi = __builtin_object_size(addCasts(&t[1].v[1]), 3);
+#undef addCasts
+}
+
+struct DynStructVar {
+  char fst[16];
+  char snd[];
+};
+
+struct DynStruct0 {
+  char fst[16];
+  char snd[0];
+};
+
+struct DynStruct1 {
+  char fst[16];
+  char snd[1];
+};
+
+struct StaticStruct {
+  char fst[16];
+  char snd[2];
+};
+
+// CHECK-LABEL: @test29
+void test29(struct DynStructVar *dv, struct DynStruct0 *d0,
+            struct DynStruct1 *d1, struct StaticStruct *ss) {
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(dv->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(dv->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(dv->snd, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(dv->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d0->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d0->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(d0->snd, 2);
+  // CHECK: store i32 0
+  gi = __builtin_object_size(d0->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d1->snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(d1->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(d1->snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(d1->snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(ss->snd, 0);
+  // CHECK: store i32 2
+  gi = __builtin_object_size(ss->snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(ss->snd, 2);
+  // CHECK: store i32 2
+  gi = __builtin_object_size(ss->snd, 3);
+}
+
+// CHECK-LABEL: @test30
+void test30() {
+  struct { struct DynStruct1 fst, snd; } *nested;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->fst.snd, 0);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->fst.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(nested->fst.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->fst.snd, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->snd.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(nested->snd.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(nested->snd.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(nested->snd.snd, 3);
+
+  union { struct DynStruct1 d1; char c[1]; } *u;
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->c, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->c, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(u->c, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(u->c, 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->d1.snd, 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(u->d1.snd, 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(u->d1.snd, 2);
+  // CHECK: store i32 1
+  gi = __builtin_object_size(u->d1.snd, 3);
+}
+
+// CHECK-LABEL: @test31
+void test31() {
+  // Miscellaneous 'writing off the end' detection tests
+  struct DynStructVar *dsv;
+  struct DynStruct0 *ds0;
+  struct DynStruct1 *ds1;
+  struct StaticStruct *ss;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(ds1[9].snd, 1);
+
+  // CHECH: store i32 2
+  gi = __builtin_object_size(&ss[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&ds1[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&ds0[9].snd[0], 1);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&dsv[9].snd[0], 1);
+}
diff --git a/test/CodeGen/object-size.cpp b/test/CodeGen/object-size.cpp
new file mode 100644
index 000000000000..81b44a55a4e3
--- /dev/null
+++ b/test/CodeGen/object-size.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+// C++-specific tests for __builtin_object_size
+
+int gi;
+
+// CHECK-LABEL: define void @_Z5test1v()
+void test1() {
+  // Guaranteeing that our cast removal logic doesn't break more interesting
+  // cases.
+  struct A { int a; };
+  struct B { int b; };
+  struct C: public A, public B {};
+
+  C c;
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size(&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((B*)&c, 0);
+
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)&c, 0);
+  // CHECK: store i32 8
+  gi = __builtin_object_size((char*)(A*)&c, 0);
+  // CHECK: store i32 4
+  gi = __builtin_object_size((char*)(B*)&c, 0);
+}
+
+// CHECK-LABEL: define void @_Z5test2v()
+void test2() {
+  struct A { char buf[16]; };
+  struct B : A {};
+  struct C { int i; B bs[1]; } *c;
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 0);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&c->bs[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0], 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size((A*)&c->bs[0], 0);
+  // CHECK: store i32 16
+  gi = __builtin_object_size((A*)&c->bs[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size((A*)&c->bs[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size((A*)&c->bs[0], 3);
+
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 false)
+  gi = __builtin_object_size(&c->bs[0].buf[0], 0);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0].buf[0], 1);
+  // CHECK: call i64 @llvm.objectsize.i64.p0i8(i8* %{{.*}}, i1 true)
+  gi = __builtin_object_size(&c->bs[0].buf[0], 2);
+  // CHECK: store i32 16
+  gi = __builtin_object_size(&c->bs[0].buf[0], 3);
+}
diff --git a/test/CodeGen/overloadable.c b/test/CodeGen/overloadable.c
index 8b40e4d73403..4946c6d92866 100644
--- a/test/CodeGen/overloadable.c
+++ b/test/CodeGen/overloadable.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck %s
 // CHECK: _Z1fPA10_1X
+// CHECK: _Z1fPFvE
 
 int __attribute__((overloadable)) f(int x) { return x; }
 float __attribute__((overloadable)) f(float x) { return x; }
@@ -13,6 +14,8 @@ void  __attribute__((overloadable)) f(struct X (*ptr)[10]) { }
 
 void __attribute__((overloadable)) f(int x, int y, ...) { }
 
+void __attribute__((overloadable)) f(void (*x)()) {}
+
 int main() {
   int iv = 17;
   float fv = 3.0f;
diff --git a/test/CodeGen/override-layout.c b/test/CodeGen/override-layout.c
index 57de8b525dcf..9907fec31843 100644
--- a/test/CodeGen/override-layout.c
+++ b/test/CodeGen/override-layout.c
@@ -1,7 +1,6 @@
-// RUN: %clang_cc1 -w -fdump-record-layouts %s > %t.layouts
-// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.before
+// RUN: %clang_cc1 -w -fdump-record-layouts-simple %s > %t.layouts
 // RUN: %clang_cc1 -w -DPACKED= -DALIGNED16= -fdump-record-layouts-simple -foverride-record-layout=%t.layouts %s > %t.after
-// RUN: diff %t.before %t.after
+// RUN: diff %t.layouts %t.after
 // RUN: FileCheck %s < %t.after
 
 // If not explicitly disabled, set PACKED to the packed attribute.
diff --git a/test/CodeGen/packed-arrays.c b/test/CodeGen/packed-arrays.c
index 993d88e2772d..bb742c6f311b 100644
--- a/test/CodeGen/packed-arrays.c
+++ b/test/CodeGen/packed-arrays.c
@@ -64,10 +64,12 @@ int f0_b(struct s0 *a) {
   return *(a->x + 1);
 }
 
+// Note that 'y' still causes struct s1 to be four-byte aligned.
+
 // Note that we are incompatible with GCC on this example.
 // 
 // CHECK-LABEL: define i32 @f1_a
-// CHECK:   load i32, i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_b
 // CHECK:   load i32, i32* %{{.*}}, align 4
@@ -79,7 +81,7 @@ int f0_b(struct s0 *a) {
 // CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 // CHECK-LABEL: define i32 @f1_d
-// CHECK:   load i32, i32* %{{.*}}, align 1
+// CHECK:   load i32, i32* %{{.*}}, align 4
 // CHECK: }
 int f1_a(struct s1 *a) {
   return a->x[1];
diff --git a/test/CodeGen/packed-structure.c b/test/CodeGen/packed-structure.c
index 8de31d6a81d0..7d1183dc5ca6 100644
--- a/test/CodeGen/packed-structure.c
+++ b/test/CodeGen/packed-structure.c
@@ -25,7 +25,7 @@ int s0_load_x(struct s0 *a) { return a->x; }
 // with align 1 (in 2363.1 at least).
 //
 // CHECK-FUNCTIONS-LABEL: define i32 @s0_load_y
-// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32, i32* {{.*}}, align 1
+// CHECK-FUNCTIONS: [[s0_load_y:%.*]] = load i32, i32* {{.*}}, align 4
 // CHECK-FUNCTIONS: ret i32 [[s0_load_y]]
 int s0_load_y(struct s0 *a) { return a->y; }
 // CHECK-FUNCTIONS-LABEL: define void @s0_copy
@@ -95,6 +95,6 @@ int s3_1 = __alignof(((struct s3*) 0)->anInt);
 // CHECK-FUNCTIONS-LABEL: define i32 @test3(
 int test3(struct s3 *ptr) {
   // CHECK-FUNCTIONS:      [[PTR:%.*]] = getelementptr inbounds {{%.*}}, {{%.*}}* {{%.*}}, i32 0, i32 1
-  // CHECK-FUNCTIONS-NEXT: load i32, i32* [[PTR]], align 1
+  // CHECK-FUNCTIONS-NEXT: load i32, i32* [[PTR]], align 2
   return ptr->anInt;
 }
diff --git a/test/CodeGen/pass-object-size.c b/test/CodeGen/pass-object-size.c
new file mode 100644
index 000000000000..1ad3f853ca6d
--- /dev/null
+++ b/test/CodeGen/pass-object-size.c
@@ -0,0 +1,353 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -O0 %s -o - 2>&1 | FileCheck %s
+
+typedef unsigned long size_t;
+
+struct Foo {
+  int t[10];
+};
+
+#define PS(N) __attribute__((pass_object_size(N)))
+
+int gi = 0;
+
+// CHECK-LABEL: define i32 @ObjectSize0(i8* %{{.*}}, i64)
+int ObjectSize0(void *const p PS(0)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 0);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize1(i8* %{{.*}}, i64)
+int ObjectSize1(void *const p PS(1)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 1);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize2(i8* %{{.*}}, i64)
+int ObjectSize2(void *const p PS(2)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 2);
+}
+
+// CHECK-LABEL: define i32 @ObjectSize3(i8* %{{.*}}, i64)
+int ObjectSize3(void *const p PS(3)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 3);
+}
+
+// CHECK-LABEL: define void @test1
+void test1() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 360)
+  gi = ObjectSize0(&t[1]);
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 360)
+  gi = ObjectSize1(&t[1]);
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 360)
+  gi = ObjectSize2(&t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 360)
+  gi = ObjectSize3(&t[1]);
+
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 356)
+  gi = ObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 36)
+  gi = ObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 356)
+  gi = ObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 36)
+  gi = ObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test2
+void test2(struct Foo *t) {
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 36)
+  gi = ObjectSize1(&t->t[1]);
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 36)
+  gi = ObjectSize3(&t->t[1]);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize0Pv
+int NoViableOverloadObjectSize0(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 0);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize1Pv
+int NoViableOverloadObjectSize1(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 1);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize2Pv
+int NoViableOverloadObjectSize2(void *const p) __attribute__((overloadable)) {
+  // CHECK: @llvm.objectsize
+  return __builtin_object_size(p, 2);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize3Pv
+int NoViableOverloadObjectSize3(void *const p) __attribute__((overloadable)) {
+  // CHECK-NOT: @llvm.objectsize
+  return __builtin_object_size(p, 3);
+}
+
+// CHECK-LABEL: define i32 @_Z27NoViableOverloadObjectSize0Pv
+// CHECK-NOT: @llvm.objectsize
+int NoViableOverloadObjectSize0(void *const p PS(0))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 0);
+}
+
+int NoViableOverloadObjectSize1(void *const p PS(1))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 1);
+}
+
+int NoViableOverloadObjectSize2(void *const p PS(2))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 2);
+}
+
+int NoViableOverloadObjectSize3(void *const p PS(3))
+    __attribute__((overloadable)) {
+  return __builtin_object_size(p, 3);
+}
+
+const static int SHOULDNT_BE_CALLED = -100;
+int NoViableOverloadObjectSize0(void *const p PS(0))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize1(void *const p PS(1))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize2(void *const p PS(2))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+int NoViableOverloadObjectSize3(void *const p PS(3))
+    __attribute__((overloadable, enable_if(p == 0, "never selected"))) {
+  return SHOULDNT_BE_CALLED;
+}
+
+// CHECK-LABEL: define void @test3
+void test3() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize0(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize1(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize2(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 360)
+  gi = NoViableOverloadObjectSize3(&t[1]);
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 356)
+  gi = NoViableOverloadObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 356)
+  gi = NoViableOverloadObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test4
+void test4(struct Foo *t) {
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize0(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize1(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize2(&t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 0)
+  gi = NoViableOverloadObjectSize3(&t[1]);
+
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize0PvU17pass_object_size0(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize1PvU17pass_object_size1(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize2PvU17pass_object_size2(i8* %{{.*}}, i64 %{{.*}})
+  gi = NoViableOverloadObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @_Z27NoViableOverloadObjectSize3PvU17pass_object_size3(i8* %{{.*}}, i64 36)
+  gi = NoViableOverloadObjectSize3(&t[1].t[1]);
+}
+
+void test5() {
+  struct Foo t[10];
+
+  int (*f)(void *) = &NoViableOverloadObjectSize0;
+  gi = f(&t[1]);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize0
+int IndirectObjectSize0(void *const p PS(0)) {
+  // CHECK: call i32 @ObjectSize0(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize0(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize1
+int IndirectObjectSize1(void *const p PS(1)) {
+  // CHECK: call i32 @ObjectSize1(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize1(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize2
+int IndirectObjectSize2(void *const p PS(2)) {
+  // CHECK: call i32 @ObjectSize2(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize2(p);
+}
+
+// CHECK-LABEL: define i32 @IndirectObjectSize3
+int IndirectObjectSize3(void *const p PS(3)) {
+  // CHECK: call i32 @ObjectSize3(i8* %{{.*}}, i64 %{{.*}})
+  // CHECK-NOT: @llvm.objectsize
+  return ObjectSize3(p);
+}
+
+int Overload0(void *, size_t, void *, size_t);
+int OverloadNoSize(void *, void *);
+
+int OverloadedObjectSize(void *const p PS(0),
+                         void *const c PS(0))
+    __attribute__((overloadable)) __asm__("Overload0");
+
+int OverloadedObjectSize(void *const p, void *const c)
+    __attribute__((overloadable)) __asm__("OverloadNoSize");
+
+// CHECK-LABEL: define void @test6
+void test6() {
+  int known[10], *opaque;
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(&known[0], &known[0]);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(&known[0], opaque);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(opaque, &known[0]);
+
+  // CHECK: call i32 @"\01Overload0"
+  gi = OverloadedObjectSize(opaque, opaque);
+}
+
+int Identity(void *p, size_t i) { return i; }
+
+// CHECK-NOT: define void @AsmObjectSize
+int AsmObjectSize0(void *const p PS(0)) __asm__("Identity");
+
+int AsmObjectSize1(void *const p PS(1)) __asm__("Identity");
+
+int AsmObjectSize2(void *const p PS(2)) __asm__("Identity");
+
+int AsmObjectSize3(void *const p PS(3)) __asm__("Identity");
+
+// CHECK-LABEL: define void @test7
+void test7() {
+  struct Foo t[10];
+
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize0(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize1(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize2(&t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 360)
+  gi = AsmObjectSize3(&t[1]);
+
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 356)
+  gi = AsmObjectSize0(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 356)
+  gi = AsmObjectSize2(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize3(&t[1].t[1]);
+}
+
+// CHECK-LABEL: define void @test8
+void test8(struct Foo *t) {
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize1(&t[1].t[1]);
+  // CHECK: call i32 @"\01Identity"(i8* %{{.*}}, i64 36)
+  gi = AsmObjectSize3(&t[1].t[1]);
+}
+
+void DifferingObjectSize0(void *const p __attribute__((pass_object_size(0))));
+void DifferingObjectSize1(void *const p __attribute__((pass_object_size(1))));
+void DifferingObjectSize2(void *const p __attribute__((pass_object_size(2))));
+void DifferingObjectSize3(void *const p __attribute__((pass_object_size(3))));
+
+// CHECK-LABEL: define void @test9
+void test9(void *const p __attribute__((pass_object_size(0)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize2(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize0(p);
+  DifferingObjectSize1(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test10
+void test10(void *const p __attribute__((pass_object_size(1)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize2(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test11
+void test11(void *const p __attribute__((pass_object_size(2)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize2(p);
+
+  // CHECK: call void @DifferingObjectSize3(i8* %{{.*}}, i64 0)
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test12
+void test12(void *const p __attribute__((pass_object_size(3)))) {
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize0(p);
+  // CHECK: @llvm.objectsize
+  DifferingObjectSize1(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  DifferingObjectSize2(p);
+  DifferingObjectSize3(p);
+}
+
+// CHECK-LABEL: define void @test13
+void test13() {
+  // Ensuring that we don't lower objectsize if the expression has side-effects
+  char c[10];
+  char *p = c;
+
+  // CHECK: @llvm.objectsize
+  ObjectSize0(p);
+
+  // CHECK-NOT: @llvm.objectsize
+  ObjectSize0(++p);
+  ObjectSize0(p++);
+}
diff --git a/test/CodeGen/pclmul-builtins.c b/test/CodeGen/pclmul-builtins.c
index cb0af28dbe0c..ebca89903747 100644
--- a/test/CodeGen/pclmul-builtins.c
+++ b/test/CodeGen/pclmul-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +pclmul -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/popcnt-builtins.c b/test/CodeGen/popcnt-builtins.c
index f072b29cd1d5..5ae40c7a7688 100644
--- a/test/CodeGen/popcnt-builtins.c
+++ b/test/CodeGen/popcnt-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +popcnt -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,11 +6,21 @@
 #include <x86intrin.h>
 
 unsigned int test_mm_popcnt_u32(unsigned int __X) {
-  // CHECK: @llvm.ctpop.i32
+  //CHECK: call i32 @llvm.ctpop.i32
   return _mm_popcnt_u32(__X);
 }
 
+unsigned int test_popcnt_32(int __X) {
+  //CHECK: call i32 @llvm.ctpop.i32
+  return _popcnt32(__X);
+}
+
 unsigned long long test_mm_popcnt_u64(unsigned long long __X) {
-  // CHECK: @llvm.ctpop.i64
+  //CHECK: call i64 @llvm.ctpop.i64
   return _mm_popcnt_u64(__X);
 }
+
+unsigned long long test_popcnt_64(long long __X) {
+  //CHECK: call i64 @llvm.ctpop.i64
+  return _popcnt64(__X);
+}
diff --git a/test/CodeGen/ppc-sfvarargs.c b/test/CodeGen/ppc-sfvarargs.c
new file mode 100644
index 000000000000..924d9c5fe4c0
--- /dev/null
+++ b/test/CodeGen/ppc-sfvarargs.c
@@ -0,0 +1,17 @@
+// RUN: %clang -O0 --target=powerpc-unknown-linux-gnu -EB -msoft-float -S -emit-llvm %s -o - | FileCheck %s
+
+#include <stdarg.h>
+void test(char *fmt, ...) {
+  va_list ap;
+  va_start(ap, fmt);
+  va_arg(ap, double);
+  va_end(ap);
+}
+
+void foo() {
+  double a;
+  test("test",a);
+}
+// CHECK: %{{[0-9]+}} = add i8 %{{[0-9]+|numUsedRegs}}, 1
+// CHECK: %{{[0-9]+}} = and i8 %{{[0-9]+}}, -2
+// CHECK: %{{[0-9]+}} = mul i8 %{{[0-9]+}}, 4
diff --git a/test/CodeGen/ppc-varargs-struct.c b/test/CodeGen/ppc-varargs-struct.c
index 1c983c0e4349..1ad57c26b485 100644
--- a/test/CodeGen/ppc-varargs-struct.c
+++ b/test/CodeGen/ppc-varargs-struct.c
@@ -19,89 +19,73 @@ void testva (int n, ...)
 // CHECK: bitcast %struct.x* %t to i8*
 // CHECK: bitcast %struct.x* %{{[0-9]+}} to i8*
 // CHECK: call void @llvm.memcpy
-// CHECK-PPC:  [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
-// CHECK-PPC-NEXT:  [[GPRPTR:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY]] to i8*
-// CHECK-PPC-NEXT:  [[ZERO:%[0-9]+]] = ptrtoint i8* [[GPRPTR]] to i32
-// CHECK-PPC-NEXT:  [[ONE:%[0-9]+]] = add i32 [[ZERO]], 1
-// CHECK-PPC-NEXT:  [[TWO:%[0-9]+]] = inttoptr i32 [[ONE]] to i8*
-// CHECK-PPC-NEXT:  [[THREE:%[0-9]+]] = add i32 [[ONE]], 3
-// CHECK-PPC-NEXT:  [[FOUR:%[0-9]+]] = inttoptr i32 [[THREE]] to i8**
-// CHECK-PPC-NEXT:  [[FIVE:%[0-9]+]] = add i32 [[THREE]], 4
-// CHECK-PPC-NEXT:  [[SIX:%[0-9]+]] = inttoptr i32 [[FIVE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR:%[a-z0-9]+]] = load i8, i8* [[GPRPTR]]
-// CHECK-PPC-NEXT:  [[FPR:%[a-z0-9]+]] = load i8, i8* [[TWO]] 
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%[a-z_0-9]+]] = load i8*, i8** [[FOUR]]
-// CHECK-PPC-NEXT:  [[SEVEN:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%[a-z_0-9]+]] = load i8*, i8** [[SIX]]
-// CHECK-PPC-NEXT:  [[EIGHT:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA]] to i32
-// CHECK-PPC-NEXT:  [[COND:%[a-z0-9]+]] = icmp ult i8 [[GPR]], 8
-// CHECK-PPC-NEXT:  [[NINE:%[0-9]+]] = mul i8 [[GPR]], 4
-// CHECK-PPC-NEXT:  [[TEN:%[0-9]+]] = sext i8 [[NINE]] to i32
-// CHECK-PPC-NEXT:  [[ELEVEN:%[0-9]+]] = add i32 [[EIGHT]], [[TEN]]
-// CHECK-PPC-NEXT:  br i1 [[COND]], label [[USING_REGS:%[a-z_0-9]+]], label [[USING_OVERFLOW:%[a-z_0-9]+]]
+
+// CHECK-PPC:  [[ARRAYDECAY:%.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPRPTR:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPR:%.+]] = load i8, i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  [[COND:%.+]] = icmp ult i8 [[GPR]], 8
+// CHECK-PPC-NEXT:  br i1 [[COND]], label %[[USING_REGS:[a-z_0-9]+]], label %[[USING_OVERFLOW:[a-z_0-9]+]]
 //
-// CHECK-PPC1:[[USING_REGS]]
-// CHECK-PPC:  [[TWELVE:%[0-9]+]] = inttoptr i32 [[ELEVEN]] to %struct.x*
-// CHECK-PPC-NEXT:  [[THIRTEEN:%[0-9]+]] = add i8 [[GPR]], 1
-// CHECK-PPC-NEXT:  store i8 [[THIRTEEN]], i8* [[GPRPTR]]
-// CHECK-PPC-NEXT:  br label [[CONT:%[a-z0-9]+]]
+// CHECK-PPC:[[USING_REGS]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA_P:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 4
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%.+]] = load i8*, i8** [[REGSAVE_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[OFFSET:%.+]] = mul i8 [[GPR]], 4
+// CHECK-PPC-NEXT:  [[RAW_REGADDR:%.+]] = getelementptr inbounds i8, i8* [[REGSAVE_AREA]], i8 [[OFFSET]]
+// CHECK-PPC-NEXT:  [[REGADDR:%.+]] = bitcast i8* [[RAW_REGADDR]] to %struct.x**
+// CHECK-PPC-NEXT:  [[USED_GPR:%[0-9]+]] = add i8 [[GPR]], 1
+// CHECK-PPC-NEXT:  store i8 [[USED_GPR]], i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT:[a-z0-9]+]]
 //
-// CHECK-PPC1:[[USING_OVERFLOW]]
-// CHECK-PPC:  [[FOURTEEN:%[0-9]+]] = inttoptr i32 [[SEVEN]] to %struct.x*
-// CHECK-PPC-NEXT:  [[FIFTEEN:%[0-9]+]] = add i32 [[SEVEN]], 4
-// CHECK-PPC-NEXT:  [[SIXTEEN:%[0-9]+]] = inttoptr i32 [[FIFTEEN]] to i8*
-// CHECK-PPC-NEXT:  store i8* [[SIXTEEN]], i8** [[FOUR]]
-// CHECK-PPC-NEXT:  br label [[CONT]]
+// CHECK-PPC:[[USING_OVERFLOW]]
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  %{{[0-9]+}} =  ptrtoint i8* %argp.cur to i32
+// CHECK-PPC-NEXT:  %{{[0-9]+}} = add i32 %{{[0-9]+}}, 7
+// CHECK-PPC-NEXT:  %{{[0-9]+}} = and i32 %{{[0-9]+}}, -8
+// CHECK-PPC-NEXT:  %argp.cur.aligned = inttoptr i32 %{{[0-9]+}} to i8*
+// CHECK-PPC-NEXT:  [[MEMADDR:%.+]] = bitcast i8* %argp.cur.aligned to %struct.x**
+// CHECK-PPC-NEXT:  [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* %argp.cur.aligned, i32 4
+// CHECK-PPC-NEXT:  store i8* [[NEW_OVERFLOW_AREA:%[0-9]+]], i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT]]
 //
-// CHECK-PPC1:[[CONT]]
-// CHECK-PPC:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi %struct.x* [ [[TWELVE]], [[USING_REGS]] ], [ [[FOURTEEN]], [[USING_OVERFLOW]] ]
-// CHECK-PPC-NEXT:  [[AGGRPTR:%[a-z0-9]+]] = bitcast %struct.x* [[VAARG_ADDR]] to i8**
-// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load i8*, i8** [[AGGRPTR]]
-// CHECK-PPC-NEXT:  [[SEVENTEEN:%[0-9]+]] = bitcast %struct.x* %t to i8*
-// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[SEVENTEEN]], i8* [[AGGR]], i32 16, i32 8, i1 false)
+// CHECK-PPC:[[CONT]]
+// CHECK-PPC-NEXT:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi %struct.x** [ [[REGADDR]], %[[USING_REGS]] ], [ [[MEMADDR]], %[[USING_OVERFLOW]] ]
+// CHECK-PPC-NEXT:  [[AGGR:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[VAARG_ADDR]]
+// CHECK-PPC-NEXT:  [[DEST:%[0-9]+]] = bitcast %struct.x* %t to i8*
+// CHECK-PPC-NEXT:  [[SRC:%.+]] = bitcast %struct.x* [[AGGR]] to i8*
+// CHECK-PPC-NEXT:  call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[DEST]], i8* [[SRC]], i32 16, i32 8, i1 false)
 
   int v = va_arg (ap, int);
-// CHECK: ptrtoint i8* %{{[a-z.0-9]*}} to i64
-// CHECK: add i64 %{{[0-9]+}}, 4
-// CHECK: inttoptr i64 %{{[0-9]+}} to i8*
+  
+// CHECK: getelementptr inbounds i8, i8* %{{[a-z.0-9]*}}, i64 4
 // CHECK: bitcast i8* %{{[0-9]+}} to i32*
-// CHECK-PPC:  [[ARRAYDECAY1:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
-// CHECK-PPC-NEXT:  [[GPRPTR1:%[a-z0-9]+]] = bitcast %struct.__va_list_tag* [[ARRAYDECAY1]] to i8*
-// CHECK-PPC-NEXT:  [[EIGHTEEN:%[0-9]+]] = ptrtoint i8* [[GPRPTR1]] to i32
-// CHECK-PPC-NEXT:  [[NINETEEN:%[0-9]+]] = add i32 [[EIGHTEEN]], 1
-// CHECK-PPC-NEXT:  [[TWENTY:%[0-9]+]] = inttoptr i32 [[NINETEEN]] to i8*
-// CHECK-PPC-NEXT:  [[TWENTYONE:%[0-9]+]] = add i32 [[NINETEEN]], 3
-// CHECK-PPC-NEXT:  [[TWENTYTWO:%[0-9]+]] = inttoptr i32 [[TWENTYONE]] to i8**
-// CHECK-PPC-NEXT:  [[TWENTYTHREE:%[0-9]+]] = add i32 [[TWENTYONE]], 4
-// CHECK-PPC-NEXT:  [[TWENTYFOUR:%[0-9]+]] = inttoptr i32 [[TWENTYTHREE]] to i8**
-// CHECK-PPC-NEXT:  [[GPR1:%[a-z0-9]+]] = load i8, i8* [[GPRPTR1]]
-// CHECK-PPC-NEXT:  [[FPR1:%[a-z0-9]+]] = load i8, i8* [[TWENTY]]
-// CHECK-PPC-NEXT:  [[OVERFLOW_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYTWO]]
-// CHECK-PPC-NEXT:  [[TWENTYFIVE:%[0-9]+]] = ptrtoint i8* [[OVERFLOW_AREA1]] to i32
-// CHECK-PPC-NEXT:  [[REGSAVE_AREA1:%[a-z_0-9]+]] = load i8*, i8** [[TWENTYFOUR]]
-// CHECK-PPC-NEXT:  [[TWENTYSIX:%[0-9]+]] = ptrtoint i8* [[REGSAVE_AREA1]] to i32
-// CHECK-PPC-NEXT:  [[COND1:%[a-z0-9]+]] = icmp ult i8 [[GPR1]], 8
-// CHECK-PPC-NEXT:  [[TWENTYSEVEN:%[0-9]+]] = mul i8 [[GPR1]], 4
-// CHECK-PPC-NEXT:  [[TWENTYEIGHT:%[0-9]+]] = sext i8 [[TWENTYSEVEN]] to i32
-// CHECK-PPC-NEXT:  [[TWENTYNINE:%[0-9]+]] = add i32 [[TWENTYSIX]], [[TWENTYEIGHT]]
-// CHECK-PPC-NEXT:  br i1 [[COND1]], label [[USING_REGS1:%[.a-z_0-9]+]], label [[USING_OVERFLOW1:%[.a-z_0-9]+]]
+// CHECK-PPC:       [[ARRAYDECAY:%[a-z0-9]+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %ap, i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPRPTR:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 0
+// CHECK-PPC-NEXT:  [[GPR:%.+]] = load i8, i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  [[COND:%.+]] = icmp ult i8 [[GPR]], 8
+// CHECK-PPC-NEXT:  br i1 [[COND]], label %[[USING_REGS:.+]], label %[[USING_OVERFLOW:.+]]{{$}}
 //
-// CHECK-PPC1:[[USING_REGS1]]:
-// CHECK-PPC:  [[THIRTY:%[0-9]+]] = inttoptr i32 [[TWENTYNINE]] to i32*
-// CHECK-PPC-NEXT:  [[THIRTYONE:%[0-9]+]] = add i8 [[GPR1]], 1
-// CHECK-PPC-NEXT:  store i8 [[THIRTYONE]], i8* [[GPRPTR1]]
-// CHECK-PPC-NEXT:  br label [[CONT1:%[a-z0-9]+]]
+// CHECK-PPC:[[USING_REGS]]
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA_P:%.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 4
+// CHECK-PPC-NEXT:  [[REGSAVE_AREA:%.+]] = load i8*, i8** [[REGSAVE_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[OFFSET:%.+]] = mul i8 [[GPR]], 4
+// CHECK-PPC-NEXT:  [[RAW_REGADDR:%.+]] = getelementptr inbounds i8, i8* [[REGSAVE_AREA]], i8 [[OFFSET]]
+// CHECK-PPC-NEXT:  [[REGADDR:%.+]] = bitcast i8* [[RAW_REGADDR]] to i32*
+// CHECK-PPC-NEXT:  [[USED_GPR:%[0-9]+]] = add i8 [[GPR]], 1
+// CHECK-PPC-NEXT:  store i8 [[USED_GPR]], i8* [[GPRPTR]], align 4
+// CHECK-PPC-NEXT:  br label %[[CONT:[a-z0-9]+]]
 //
-// CHECK-PPC1:[[USING_OVERFLOW1]]:
-// CHECK-PPC:  [[THIRTYTWO:%[0-9]+]] = inttoptr i32 [[TWENTYFIVE]] to i32*
-// CHECK-PPC-NEXT:  [[THIRTYTHREE:%[0-9]+]] = add i32 [[TWENTYFIVE]], 4
-// CHECK-PPC-NEXT:  [[THIRTYFOUR:%[0-9]+]] = inttoptr i32 [[THIRTYTHREE]] to i8*
-// CHECK-PPC-NEXT:  store i8* [[THIRTYFOUR]], i8** [[TWENTYTWO]]
-// CHECK-PPC-NEXT:  br label [[CONT1]]
+// CHECK-PPC:[[USING_OVERFLOW]]
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA_P:%[0-9]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* [[ARRAYDECAY]], i32 0, i32 3
+// CHECK-PPC-NEXT:  [[OVERFLOW_AREA:%.+]] = load i8*, i8** [[OVERFLOW_AREA_P]], align 4
+// CHECK-PPC-NEXT:  [[MEMADDR:%.+]] = bitcast i8* [[OVERFLOW_AREA]] to i32*
+// CHECK-PPC-NEXT:  [[NEW_OVERFLOW_AREA:%[0-9]+]] = getelementptr inbounds i8, i8* [[OVERFLOW_AREA]], i32 4
+// CHECK-PPC-NEXT:  store i8* [[NEW_OVERFLOW_AREA]], i8** [[OVERFLOW_AREA_P]]
+// CHECK-PPC-NEXT:  br label %[[CONT]]
 //
-// CHECK-PPC1:[[CONT1]]:
-// CHECK-PPC:  [[VAARG_ADDR1:%[a-z.0-9]+]] = phi i32* [ [[THIRTY]], [[USING_REGS1]] ], [ [[THIRTYTWO]], [[USING_OVERFLOW1]] ]
-// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32, i32* [[VAARG_ADDR1]]
+// CHECK-PPC:[[CONT]]
+// CHECK-PPC-NEXT:  [[VAARG_ADDR:%[a-z.0-9]+]] = phi i32* [ [[REGADDR]], %[[USING_REGS]] ], [ [[MEMADDR]], %[[USING_OVERFLOW]] ]
+// CHECK-PPC-NEXT:  [[THIRTYFIVE:%[0-9]+]] = load i32, i32* [[VAARG_ADDR]]
 // CHECK-PPC-NEXT:  store i32 [[THIRTYFIVE]], i32* %v, align 4
 
 #ifdef __powerpc64__
diff --git a/test/CodeGen/ppc64-align-struct.c b/test/CodeGen/ppc64-align-struct.c
index 8c4437a38d0a..6a04d0cd84f3 100644
--- a/test/CodeGen/ppc64-align-struct.c
+++ b/test/CodeGen/ppc64-align-struct.c
@@ -41,18 +41,22 @@ void test6 (int x, struct test6 y)
 }
 
 // This case requires run-time realignment of the incoming struct
-// CHECK: define void @test7(i32 signext %x, %struct.test7* byval align 16)
+// CHECK-LABEL: define void @test7(i32 signext %x, %struct.test7* byval align 16)
 // CHECK: %y = alloca %struct.test7, align 32
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
 void test7 (int x, struct test7 y)
 {
 }
 
-// CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test1, align 4
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 8
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test1*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test1*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test1* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test1* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 8, i32 4, i1 false)
 struct test1 test1va (int x, ...)
 {
   struct test1 y;
@@ -63,15 +67,19 @@ struct test1 test1va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test2, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test2*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test2*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test2* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test2* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
 struct test2 test2va (int x, ...)
 {
   struct test2 y;
@@ -82,15 +90,19 @@ struct test2 test2va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test3, align 32
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 32
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 32
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test3*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test3*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test3* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test3* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 32, i32 16, i1 false)
 struct test3 test3va (int x, ...)
 {
   struct test3 y;
@@ -101,11 +113,15 @@ struct test3 test3va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test4, align 4
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test4*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test4*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test4* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test4* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 12, i32 4, i1 false)
 struct test4 test4va (int x, ...)
 {
   struct test4 y;
@@ -116,11 +132,15 @@ struct test4 test4va (int x, ...)
   return y;
 }
 
-// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test_longdouble, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[CUR]] to %struct.test_longdouble*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[CUR]] to %struct.test_longdouble*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test_longdouble* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test_longdouble* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 8, i1 false)
 struct test_longdouble { long double x; };
 struct test_longdouble testva_longdouble (int x, ...)
 {
@@ -132,15 +152,19 @@ struct test_longdouble testva_longdouble (int x, ...)
   return y;
 }
 
-// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK-LABEL: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
+// CHECK: %y = alloca %struct.test_vector, align 16
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
 // CHECK: %[[TMP2:[^ ]+]] = and i64 %[[TMP1]], -16
 // CHECK: %[[ALIGN:[^ ]+]] = inttoptr i64 %[[TMP2]] to i8*
-// CHECK: %[[NEXT:[^ ]+]] = getelementptr i8, i8* %[[ALIGN]], i64 16
+// CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[ALIGN]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
-// CHECK: bitcast i8* %[[ALIGN]] to %struct.test_vector*
+// CHECK: [[T0:%.*]] = bitcast i8* %[[ALIGN]] to %struct.test_vector*
+// CHECK: [[DEST:%.*]] = bitcast %struct.test_vector* %y to i8*
+// CHECK: [[SRC:%.*]] = bitcast %struct.test_vector* [[T0]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST]], i8* [[SRC]], i64 16, i32 16, i1 false)
 struct test_vector { vector int x; };
 struct test_vector testva_vector (int x, ...)
 {
diff --git a/test/CodeGen/ppc64-complex-parms.c b/test/CodeGen/ppc64-complex-parms.c
index f5583a0742d9..3f2a0c21420f 100644
--- a/test/CodeGen/ppc64-complex-parms.c
+++ b/test/CodeGen/ppc64-complex-parms.c
@@ -62,10 +62,10 @@ void bar_float(void) {
 // CHECK: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
 // CHECK: store float 2.000000e+00, float* %[[VAR2]]
 // CHECK: store float -2.500000e+00, float* %[[VAR3]]
-// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
-// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float, float* %[[VAR4]], align 1
-// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
-// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float, float* %[[VAR6]], align 1
+// CHECK: %[[VAR4:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 0
+// CHECK: %[[VAR5:[A-Za-z0-9.]+]] = load float, float* %[[VAR4]], align 4
+// CHECK: %[[VAR6:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR1]], i32 0, i32 1
+// CHECK: %[[VAR7:[A-Za-z0-9.]+]] = load float, float* %[[VAR6]], align 4
 // CHECK: %{{[A-Za-z0-9.]+}} = call float @foo_float(float %[[VAR5]], float %[[VAR7]])
 
 void bar_double(void) {
@@ -78,10 +78,10 @@ void bar_double(void) {
 // CHECK: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
 // CHECK: store double 2.000000e+00, double* %[[VAR12]]
 // CHECK: store double -2.500000e+00, double* %[[VAR13]]
-// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
-// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double, double* %[[VAR14]], align 1
-// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
-// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double, double* %[[VAR16]], align 1
+// CHECK: %[[VAR14:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 0
+// CHECK: %[[VAR15:[A-Za-z0-9.]+]] = load double, double* %[[VAR14]], align 8
+// CHECK: %[[VAR16:[A-Za-z0-9.]+]] = getelementptr inbounds { double, double }, { double, double }* %[[VAR11]], i32 0, i32 1
+// CHECK: %[[VAR17:[A-Za-z0-9.]+]] = load double, double* %[[VAR16]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call double @foo_double(double %[[VAR15]], double %[[VAR17]])
 
 void bar_long_double(void) {
@@ -94,10 +94,10 @@ void bar_long_double(void) {
 // CHECK: %[[VAR23:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
 // CHECK: store ppc_fp128 0xM40000000000000000000000000000000, ppc_fp128* %[[VAR22]]
 // CHECK: store ppc_fp128 0xMC0040000000000000000000000000000, ppc_fp128* %[[VAR23]]
-// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
-// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR24]], align 1
-// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
-// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR26]], align 1
+// CHECK: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 0
+// CHECK: %[[VAR25:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR24]], align 16
+// CHECK: %[[VAR26:[A-Za-z0-9.]+]] = getelementptr inbounds { ppc_fp128, ppc_fp128 }, { ppc_fp128, ppc_fp128 }* %[[VAR21]], i32 0, i32 1
+// CHECK: %[[VAR27:[A-Za-z0-9.]+]] = load ppc_fp128, ppc_fp128* %[[VAR26]], align 16
 // CHECK: %{{[A-Za-z0-9.]+}} = call ppc_fp128 @foo_long_double(ppc_fp128 %[[VAR25]], ppc_fp128 %[[VAR27]])
 
 void bar_int(void) {
@@ -110,10 +110,10 @@ void bar_int(void) {
 // CHECK: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
 // CHECK: store i32 2, i32* %[[VAR32]]
 // CHECK: store i32 -3, i32* %[[VAR33]]
-// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
-// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR34]], align 1
-// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
-// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR36]], align 1
+// CHECK: %[[VAR34:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 0
+// CHECK: %[[VAR35:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR34]], align 4
+// CHECK: %[[VAR36:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR31]], i32 0, i32 1
+// CHECK: %[[VAR37:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR36]], align 4
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i32 @foo_int(i32 %[[VAR35]], i32 %[[VAR37]])
 
 void bar_short(void) {
@@ -126,10 +126,10 @@ void bar_short(void) {
 // CHECK: %[[VAR43:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
 // CHECK: store i16 2, i16* %[[VAR42]]
 // CHECK: store i16 -3, i16* %[[VAR43]]
-// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
-// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR44]], align 1
-// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
-// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR46]], align 1
+// CHECK: %[[VAR44:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 0
+// CHECK: %[[VAR45:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR44]], align 2
+// CHECK: %[[VAR46:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR41]], i32 0, i32 1
+// CHECK: %[[VAR47:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR46]], align 2
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i16 @foo_short(i16 %[[VAR45]], i16 %[[VAR47]])
 
 void bar_char(void) {
@@ -142,9 +142,9 @@ void bar_char(void) {
 // CHECK: %[[VAR53:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
 // CHECK: store i8 2, i8* %[[VAR52]]
 // CHECK: store i8 -3, i8* %[[VAR53]]
-// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
+// CHECK: %[[VAR54:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 0
 // CHECK: %[[VAR55:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR54]], align 1
-// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
+// CHECK: %[[VAR56:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR51]], i32 0, i32 1
 // CHECK: %[[VAR57:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR56]], align 1
 // CHECK: %{{[A-Za-z0-9.]+}} = call signext i8 @foo_char(i8 %[[VAR55]], i8 %[[VAR57]])
 
@@ -158,10 +158,10 @@ void bar_long(void) {
 // CHECK: %[[VAR63:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR62]]
 // CHECK: store i64 -3, i64* %[[VAR63]]
-// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
-// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR64]], align 1
-// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
-// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR66]], align 1
+// CHECK: %[[VAR64:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 0
+// CHECK: %[[VAR65:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR64]], align 8
+// CHECK: %[[VAR66:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR61]], i32 0, i32 1
+// CHECK: %[[VAR67:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR66]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long(i64 %[[VAR65]], i64 %[[VAR67]])
 
 void bar_long_long(void) {
@@ -174,10 +174,10 @@ void bar_long_long(void) {
 // CHECK: %[[VAR73:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
 // CHECK: store i64 2, i64* %[[VAR72]]
 // CHECK: store i64 -3, i64* %[[VAR73]]
-// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
-// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR74]], align 1
-// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
-// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR76]], align 1
+// CHECK: %[[VAR74:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 0
+// CHECK: %[[VAR75:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR74]], align 8
+// CHECK: %[[VAR76:[A-Za-z0-9.]+]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* %[[VAR71]], i32 0, i32 1
+// CHECK: %[[VAR77:[A-Za-z0-9.]+]] = load i64, i64* %[[VAR76]], align 8
 // CHECK: %{{[A-Za-z0-9.]+}} = call i64 @foo_long_long(i64 %[[VAR75]], i64 %[[VAR77]])
 
 // CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGen/ppc64-struct-onefloat.c b/test/CodeGen/ppc64-struct-onefloat.c
index 534e5116f9b0..efc6fe9d8d6f 100644
--- a/test/CodeGen/ppc64-struct-onefloat.c
+++ b/test/CodeGen/ppc64-struct-onefloat.c
@@ -13,15 +13,15 @@ void bar(Sf a, Sd b, SSf d, SSd e) {}
 // CHECK:  %b = alloca %struct.s2, align 8
 // CHECK:  %d = alloca %struct.s4, align 4
 // CHECK:  %e = alloca %struct.s5, align 8
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %a, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %a, i32 0, i32 0
 // CHECK:  store float %a.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %b, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %b, i32 0, i32 0
 // CHECK:  store double %b.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %d, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s4, %struct.s4* %d, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  store float %d.coerce, float* %{{[a-zA-Z0-9.]+}}, align 4
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %e, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s5, %struct.s5* %e, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  store double %e.coerce, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  ret void
 
@@ -35,15 +35,15 @@ void foo(void)
 }
 
 // CHECK-LABEL: define void @foo
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %p1, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %p1, i32 0, i32 0
 // CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %p2, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %p2, i32 0, i32 0
 // CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s4, %struct.s4* %p4, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s4, %struct.s4* %p4, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s1, %struct.s1* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  %{{[0-9]+}} = load float, float* %{{[a-zA-Z0-9.]+}}, align 4
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s5, %struct.s5* %p5, i32 0, i32 0
-// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s5, %struct.s5* %p5, i32 0, i32 0
+// CHECK:  %{{[a-zA-Z0-9.]+}} = getelementptr inbounds %struct.s2, %struct.s2* %{{[a-zA-Z0-9.]+}}, i32 0, i32 0
 // CHECK:  %{{[0-9]+}} = load double, double* %{{[a-zA-Z0-9.]+}}, align 8
 // CHECK:  call void @bar(float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}}, float inreg %{{[0-9]+}}, double inreg %{{[0-9]+}})
 // CHECK:  ret void
diff --git a/test/CodeGen/ppc64-varargs-complex.c b/test/CodeGen/ppc64-varargs-complex.c
index f79062934572..58206801ac24 100644
--- a/test/CodeGen/ppc64-varargs-complex.c
+++ b/test/CodeGen/ppc64-varargs-complex.c
@@ -9,15 +9,14 @@ void testva (int n, ...)
 
   _Complex int i   = va_arg(ap, _Complex int);
   // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
-  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
-  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 4
-  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 12
-  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR2]] to i32*
-  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 4
+  // CHECK-NEXT: %[[VAR2:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 12
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR1]] to i32*
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR2]] to i32*
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]], align 4
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]], align 4
   // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
@@ -25,15 +24,14 @@ void testva (int n, ...)
 
   _Complex short s = va_arg(ap, _Complex short);
   // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
-  // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 6
-  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 14
-  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR12]] to i16*
-  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR12:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 6
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 14
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR12]] to i16*
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR13]] to i16*
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]], align 2
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]], align 2
   // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
@@ -41,15 +39,12 @@ void testva (int n, ...)
 
   _Complex char c  = va_arg(ap, _Complex char);
   // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
-  // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR22:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 7
-  // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 15
-  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR22]] to i8*
-  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 7
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 15
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]], align 1
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]], align 1
   // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
@@ -57,15 +52,14 @@ void testva (int n, ...)
 
   _Complex float f = va_arg(ap, _Complex float);
   // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
-  // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 4
-  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 12
-  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR32]] to float*
-  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR32:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 4
+  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 12
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR32]] to float*
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR33]] to float*
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]], align 4
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]], align 4
   // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
diff --git a/test/CodeGen/ppc64le-varargs-complex.c b/test/CodeGen/ppc64le-varargs-complex.c
index 68dfa0b69f35..399371b6e6da 100644
--- a/test/CodeGen/ppc64le-varargs-complex.c
+++ b/test/CodeGen/ppc64le-varargs-complex.c
@@ -9,14 +9,13 @@ void testva (int n, ...)
 
   _Complex int i   = va_arg(ap, _Complex int);
   // CHECK: %[[VAR40:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR40]], i64 16
+  // CHECK-NEXT: %[[VAR41:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 16
   // CHECK-NEXT: store i8* %[[VAR41]], i8** %[[VAR100]]
-  // CHECK-NEXT: %[[VAR1:[A-Za-z0-9.]+]] = ptrtoint i8* %[[VAR40]] to i64
-  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = add i64 %[[VAR1]], 8
-  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR1]] to i32*
-  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR3]] to i32*
-  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]]
-  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]]
+  // CHECK-NEXT: %[[VAR3:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR40]], i64 8
+  // CHECK-NEXT: %[[VAR4:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR40]] to i32*
+  // CHECK-NEXT: %[[VAR5:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR3]] to i32*
+  // CHECK-NEXT: %[[VAR6:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR4]], align 8
+  // CHECK-NEXT: %[[VAR7:[A-Za-z0-9.]+]] = load i32, i32* %[[VAR5]], align 8
   // CHECK-NEXT: %[[VAR8:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR9:[A-Za-z0-9.]+]] = getelementptr inbounds { i32, i32 }, { i32, i32 }* %[[VAR0]], i32 0, i32 1
   // CHECK-NEXT: store i32 %[[VAR6]], i32* %[[VAR8]]
@@ -24,14 +23,13 @@ void testva (int n, ...)
 
   _Complex short s = va_arg(ap, _Complex short);
   // CHECK: %[[VAR50:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR50]], i64 16
+  // CHECK-NEXT: %[[VAR51:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 16
   // CHECK-NEXT: store i8* %[[VAR51]], i8** %[[VAR100]]
-  // CHECK: %[[VAR11:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = add i64 %[[VAR11]], 8
-  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR11]] to i16*
-  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR13]] to i16*
-  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]]
-  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]]
+  // CHECK-NEXT: %[[VAR13:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR50]], i64 8
+  // CHECK-NEXT: %[[VAR14:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR50]] to i16*
+  // CHECK-NEXT: %[[VAR15:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR13]] to i16*
+  // CHECK-NEXT: %[[VAR16:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR14]], align 8
+  // CHECK-NEXT: %[[VAR17:[A-Za-z0-9.]+]] = load i16, i16* %[[VAR15]], align 8
   // CHECK-NEXT: %[[VAR18:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR19:[A-Za-z0-9.]+]] = getelementptr inbounds { i16, i16 }, { i16, i16 }* %[[VAR10]], i32 0, i32 1
   // CHECK-NEXT: store i16 %[[VAR16]], i16* %[[VAR18]]
@@ -39,14 +37,11 @@ void testva (int n, ...)
 
   _Complex char c  = va_arg(ap, _Complex char);
   // CHECK: %[[VAR60:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR60]], i64 16
+  // CHECK-NEXT: %[[VAR61:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 16
   // CHECK-NEXT: store i8* %[[VAR61]], i8** %[[VAR100]]
-  // CHECK: %[[VAR21:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR23:[A-Za-z0-9.]+]] = add i64 %[[VAR21]], 8
-  // CHECK-NEXT: %[[VAR24:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR21]] to i8*
-  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR23]] to i8*
-  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR24]]
-  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]]
+  // CHECK-NEXT: %[[VAR25:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR60]], i64 8
+  // CHECK-NEXT: %[[VAR26:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR60]], align 8
+  // CHECK-NEXT: %[[VAR27:[A-Za-z0-9.]+]] = load i8, i8* %[[VAR25]], align 8
   // CHECK-NEXT: %[[VAR28:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR29:[A-Za-z0-9.]+]] = getelementptr inbounds { i8, i8 }, { i8, i8 }* %[[VAR20]], i32 0, i32 1
   // CHECK-NEXT: store i8 %[[VAR26]], i8* %[[VAR28]]
@@ -54,14 +49,13 @@ void testva (int n, ...)
 
   _Complex float f = va_arg(ap, _Complex float);
   // CHECK: %[[VAR70:[A-Za-z0-9.]+]] = load i8*, i8** %[[VAR100:[A-Za-z0-9.]+]]
-  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr i8, i8* %[[VAR70]], i64 16
+  // CHECK-NEXT: %[[VAR71:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 16
   // CHECK-NEXT: store i8* %[[VAR71]], i8** %[[VAR100]]
-  // CHECK: %[[VAR31:[A-Za-z0-9.]+]] = ptrtoint i8* %{{[A-Za-z0-9.]+}} to i64
-  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = add i64 %[[VAR31]], 8
-  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR31]] to float*
-  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = inttoptr i64 %[[VAR33]] to float*
-  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]]
-  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]]
+  // CHECK-NEXT: %[[VAR33:[A-Za-z0-9.]+]] = getelementptr inbounds i8, i8* %[[VAR70]], i64 8
+  // CHECK-NEXT: %[[VAR34:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR70]] to float*
+  // CHECK-NEXT: %[[VAR35:[A-Za-z0-9.]+]] = bitcast i8* %[[VAR33]] to float*
+  // CHECK-NEXT: %[[VAR36:[A-Za-z0-9.]+]] = load float, float* %[[VAR34]], align 8
+  // CHECK-NEXT: %[[VAR37:[A-Za-z0-9.]+]] = load float, float* %[[VAR35]], align 8
   // CHECK-NEXT: %[[VAR38:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30:[A-Za-z0-9.]+]], i32 0, i32 0
   // CHECK-NEXT: %[[VAR39:[A-Za-z0-9.]+]] = getelementptr inbounds { float, float }, { float, float }* %[[VAR30]], i32 0, i32 1
   // CHECK-NEXT: store float %[[VAR36]], float* %[[VAR38]]
diff --git a/test/CodeGen/pragma-comment.c b/test/CodeGen/pragma-comment.c
index fbae9d532357..6da20686e957 100644
--- a/test/CodeGen/pragma-comment.c
+++ b/test/CodeGen/pragma-comment.c
@@ -30,3 +30,4 @@
 // PS4: !{!"\01msvcrt.lib"}
 // PS4: !{!"\01kernel32"}
 // PS4: !{!"\01USER32.LIB"}
+// PS4: !{!"\01\22with space\22"}
diff --git a/test/CodeGen/pragma-weak.c b/test/CodeGen/pragma-weak.c
index aba98e185b4e..36abca5de32b 100644
--- a/test/CodeGen/pragma-weak.c
+++ b/test/CodeGen/pragma-weak.c
@@ -5,18 +5,18 @@
 // CHECK: @correct_linkage = weak global
 
 
-// CHECK-DAG: @both = alias void ()* @__both
-// CHECK-DAG: @both2 = alias void ()* @__both2
-// CHECK-DAG: @weakvar_alias = weak alias i32* @__weakvar_alias
-// CHECK-DAG: @foo = weak alias void ()* @__foo
-// CHECK-DAG: @foo2 = weak alias void ()* @__foo2
-// CHECK-DAG: @stutter = weak alias void ()* @__stutter
-// CHECK-DAG: @stutter2 = weak alias void ()* @__stutter2
-// CHECK-DAG: @declfirst = weak alias void ()* @__declfirst
-// CHECK-DAG: @declfirstattr = weak alias void ()* @__declfirstattr
-// CHECK-DAG: @mix2 = weak alias void ()* @__mix2
-// CHECK-DAG: @a1 = weak alias void ()* @__a1
-// CHECK-DAG: @xxx = weak alias void ()* @__xxx
+// CHECK-DAG: @both = alias void (), void ()* @__both
+// CHECK-DAG: @both2 = alias void (), void ()* @__both2
+// CHECK-DAG: @weakvar_alias = weak alias i32, i32* @__weakvar_alias
+// CHECK-DAG: @foo = weak alias void (), void ()* @__foo
+// CHECK-DAG: @foo2 = weak alias void (), void ()* @__foo2
+// CHECK-DAG: @stutter = weak alias void (), void ()* @__stutter
+// CHECK-DAG: @stutter2 = weak alias void (), void ()* @__stutter2
+// CHECK-DAG: @declfirst = weak alias void (), void ()* @__declfirst
+// CHECK-DAG: @declfirstattr = weak alias void (), void ()* @__declfirstattr
+// CHECK-DAG: @mix2 = weak alias void (), void ()* @__mix2
+// CHECK-DAG: @a1 = weak alias void (), void ()* @__a1
+// CHECK-DAG: @xxx = weak alias void (), void ()* @__xxx
 
 
 
@@ -53,12 +53,14 @@ void __foo2(void) {}
 #pragma weak unused // expected-warning {{weak identifier 'unused' never declared}}
 #pragma weak unused_alias = __unused_alias  // expected-warning {{weak identifier '__unused_alias' never declared}}
 
-#pragma weak td // expected-warning {{weak identifier 'td' never declared}}
+#pragma weak td // expected-warning {{'weak' attribute only applies to variables and functions}}
 typedef int td;
 
-#pragma weak td2 = __td2 // expected-warning {{weak identifier '__td2' never declared}}
+#pragma weak td2 = __td2 // expected-warning {{'weak' attribute only applies to variables and functions}}
 typedef int __td2;
 
+typedef int __td3;
+#pragma weak td3 = __td3 // expected-warning {{'weak' attribute only applies to variables and functions}}
 
 ///// test weird cases
 
diff --git a/test/CodeGen/prefetchw-builtins.c b/test/CodeGen/prefetchw-builtins.c
index 9c5fdc723322..8a50325ea18f 100644
--- a/test/CodeGen/prefetchw-builtins.c
+++ b/test/CodeGen/prefetchw-builtins.c
@@ -5,8 +5,14 @@
 
 #include <x86intrin.h>
 
-void prefetch_w(void *p) {
+void test_m_prefetch(void *p) {
+  return _m_prefetch(p);
+// CHECK-LABEL: define void @test_m_prefetch
+// CHECK: call void @llvm.prefetch({{.*}}, i32 0, i32 3, i32 1)
+}
+
+void test_m_prefetch_w(void *p) {
   return _m_prefetchw(p);
-// CHECK: @prefetch_w
+// CHECK-LABEL: define void @test_m_prefetch_w
 // CHECK: call void @llvm.prefetch({{.*}}, i32 1, i32 3, i32 1)
 }
diff --git a/test/CodeGen/redefine_extname.c b/test/CodeGen/redefine_extname.c
index ad4106dd4535..d56527a88953 100644
--- a/test/CodeGen/redefine_extname.c
+++ b/test/CodeGen/redefine_extname.c
@@ -24,3 +24,9 @@ int f() {
 extern int foo() { return 1; }
 // CHECK: define i32 @bar()
 
+// Check that pragma redefine_extname applies to external declarations only.
+#pragma redefine_extname foo_static bar_static
+static int foo_static() { return 1; }
+int baz() { return foo_static(); }
+// CHECK-NOT: call i32 @bar_static()
+
diff --git a/test/CodeGen/rtm-builtins.c b/test/CodeGen/rtm-builtins.c
index 5660d8e24143..5cf3237d83e8 100644
--- a/test/CodeGen/rtm-builtins.c
+++ b/test/CodeGen/rtm-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +rtm -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +rtm -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/sanitize-address-field-padding.cpp b/test/CodeGen/sanitize-address-field-padding.cpp
index d4eea1b9e69d..045a4342a1eb 100644
--- a/test/CodeGen/sanitize-address-field-padding.cpp
+++ b/test/CodeGen/sanitize-address-field-padding.cpp
@@ -5,6 +5,8 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.type.blacklist -Rsanitize-address -emit-llvm -o - %s -O1 -mconstructor-aliases 2>&1 | FileCheck %s --check-prefix=WITH_CTOR_ALIASES
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsanitize=address -fsanitize-address-field-padding=1 -fsanitize-blacklist=%t.file.blacklist -Rsanitize-address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=FILE_BLACKLIST
 // RUN: %clang_cc1 -fsanitize=address -emit-llvm -o - %s 2>&1 | FileCheck %s --check-prefix=NO_PADDING
+// Try to emulate -save-temps option and make sure -disable-llvm-passes will not run sanitize instrumentation.
+// RUN: %clang_cc1 -fsanitize=address -emit-llvm -disable-llvm-passes -o - %s | %clang_cc1 -fsanitize=address -emit-llvm -o - -x ir | FileCheck %s --check-prefix=NO_PADDING
 //
 
 // The reasons to ignore a particular class are not set in stone and will change.
diff --git a/test/CodeGen/sanitize-blocks.c b/test/CodeGen/sanitize-blocks.c
new file mode 100644
index 000000000000..103c33c2d9b0
--- /dev/null
+++ b/test/CodeGen/sanitize-blocks.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsanitize-trap=returns-nonnull-attribute -fsanitize=returns-nonnull-attribute -emit-llvm %s -o - -triple x86_64-apple-darwin10 -fblocks | FileCheck %s
+
+// Awkward interactions of sanitizers with blocks.
+
+// rdar://22071955
+const char *TheString = "Hello, world!";
+const char *(^getString)(void) = ^{
+  return TheString;
+};
+
+// CHECK-LABEL: define internal i8* @getString_block_invoke
+
+// TODO: Actually support returns_nonnull on blocks.
diff --git a/test/CodeGen/sanitize-trap.c b/test/CodeGen/sanitize-trap.c
index 76ac1f0ad718..45f9fed61cf7 100644
--- a/test/CodeGen/sanitize-trap.c
+++ b/test/CodeGen/sanitize-trap.c
@@ -7,19 +7,24 @@ int f(int x, int y) {
   // CHECK: %[[B4:.*]] = or i1 %[[B2]], %[[B3]]
   // CHECK: br i1 %[[B1]], label %[[L1:[0-9a-z_.]*]], label %[[L2:[0-9a-z_.]*]]
 
-  // CHECK: [[L2]]
+  // {{^|:}} used to match both Debug form of the captured label
+  // cont:
+  // and Release form
+  // ; <label>:14
+  // But avoids false matches inside other numbers such as [114 x i8].
+  // CHECK: {{^|:}}[[L2]]
   // CHECK-NEXT: call void @llvm.trap()
   // CHECK-NEXT: unreachable
 
-  // CHECK: [[L1]]
+  // CHECK: {{^|:}}[[L1]]
   // CHECK-NEXT: br i1 %[[B4]], label %[[L3:[0-9a-z_.]*]], label %[[L4:[0-9a-z_.]*]]
 
-  // CHECK: [[L4]]
+  // CHECK: {{^|:}}[[L4]]
   // CHECK-NEXT: zext
   // CHECK-NEXT: zext
   // CHECK-NEXT: __ubsan_handle_divrem_overflow
 
-  // CHECK: [[L3]]
+  // CHECK: {{^|:}}[[L3]]
   // CHECK-NEXT: sdiv i32 %[[N]], %[[D]]
   return x / y;
 }
diff --git a/test/CodeGen/sha-builtins.c b/test/CodeGen/sha-builtins.c
index 2b11dedd3451..9c14a1ea7f0a 100644
--- a/test/CodeGen/sha-builtins.c
+++ b/test/CodeGen/sha-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +sha -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-unknown-unknown -target-feature +sha -emit-llvm -o - | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/sparc-arguments.c b/test/CodeGen/sparc-arguments.c
new file mode 100644
index 000000000000..c86b40b11fba
--- /dev/null
+++ b/test/CodeGen/sparc-arguments.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple sparc-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+// Ensure that we pass proper alignment to llvm in the call
+// instruction. The proper alignment for the type is sometimes known
+// only by clang, and is not manifest in the LLVM-type. So, it must be
+// explicitly passed through. (Besides the case of the user specifying
+// alignment, as here, this situation also occurrs for non-POD C++
+// structs with tail-padding: clang emits these as packed llvm-structs
+// for ABI reasons.)
+
+struct s1 {
+  int x;
+} __attribute__((aligned(8)));
+
+struct s1 x1;
+
+
+// Ensure the align 8 is passed through:
+// CHECK-LABEL: define void @f1()
+// CHECK: call void @f1_helper(%struct.s1* byval align 8 @x1)
+// Also ensure the declaration of f1_helper includes it
+// CHECK: declare void @f1_helper(%struct.s1* byval align 8)
+
+void f1_helper(struct s1);
+void f1() {
+  f1_helper(x1);
+}
diff --git a/test/CodeGen/sparcv9-abi.c b/test/CodeGen/sparcv9-abi.c
index bf447198cdfc..5984fa558c83 100644
--- a/test/CodeGen/sparcv9-abi.c
+++ b/test/CodeGen/sparcv9-abi.c
@@ -132,9 +132,9 @@ int f_variable(char *f, ...) {
   while ((c = *f++)) switch (c) {
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
-// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 4
+// CHECK-DAG: %[[EXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 4
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[EXT]] to i32*
 // CHECK-DAG: load i32, i32* %[[ADR]]
 // CHECK: br
@@ -143,7 +143,7 @@ int f_variable(char *f, ...) {
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to i64*
 // CHECK-DAG: load i64, i64* %[[ADR]]
@@ -153,7 +153,7 @@ int f_variable(char *f, ...) {
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.tiny*
 // CHECK: br
@@ -162,7 +162,7 @@ int f_variable(char *f, ...) {
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 16
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[ADR:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.small*
 // CHECK: br
@@ -171,7 +171,7 @@ int f_variable(char *f, ...) {
     break;
 
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
-// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr i8, i8* %[[CUR]], i32 8
+// CHECK-DAG: %[[NXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK-DAG: store i8* %[[NXT]], i8** %ap
 // CHECK-DAG: %[[IND:[^ ]+]] = bitcast i8* %[[CUR]] to %struct.medium**
 // CHECK-DAG: %[[ADR:[^ ]+]] = load %struct.medium*, %struct.medium** %[[IND]]
diff --git a/test/CodeGen/sse-builtins-dbg.c b/test/CodeGen/sse-builtins-dbg.c
index 8190744ff01e..25678942a89f 100644
--- a/test/CodeGen/sse-builtins-dbg.c
+++ b/test/CodeGen/sse-builtins-dbg.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 
 // Test that intrinsic calls inlined from _mm_* wrappers have debug metadata.
 
diff --git a/test/CodeGen/sse-builtins.c b/test/CodeGen/sse-builtins.c
index 6d66cca24f31..0f964e805599 100644
--- a/test/CodeGen/sse-builtins.c
+++ b/test/CodeGen/sse-builtins.c
@@ -135,54 +135,6 @@ __m128i test_loadl_epi64(void* y) {
   return _mm_loadl_epi64(y);
 }
 
-__m128i test_mm_minpos_epu16(__m128i x) {
-  // CHECK: define {{.*}} @test_mm_minpos_epu16
-  // CHECK: @llvm.x86.sse41.phminposuw
-  return _mm_minpos_epu16(x);
-}
-
-__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
-  // CHECK: define {{.*}} @test_mm_mpsadbw_epu8
-  // CHECK: @llvm.x86.sse41.mpsadbw
-  return _mm_mpsadbw_epu8(x, y, 1);
-}
-
-__m128 test_mm_dp_ps(__m128 x, __m128 y) {
-  // CHECK: define {{.*}} @test_mm_dp_ps
-  // CHECK: @llvm.x86.sse41.dpps
-  return _mm_dp_ps(x, y, 2);
-}
-
-__m128d test_mm_dp_pd(__m128d x, __m128d y) {
-  // CHECK: define {{.*}} @test_mm_dp_pd
-  // CHECK: @llvm.x86.sse41.dppd
-  return _mm_dp_pd(x, y, 2);
-}
-
-__m128 test_mm_round_ps(__m128 x) {
-  // CHECK: define {{.*}} @test_mm_round_ps
-  // CHECK: @llvm.x86.sse41.round.ps
-  return _mm_round_ps(x, 2);
-}
-
-__m128 test_mm_round_ss(__m128 x, __m128 y) {
-  // CHECK: define {{.*}} @test_mm_round_ss
-  // CHECK: @llvm.x86.sse41.round.ss
-  return _mm_round_ss(x, y, 2);
-}
-
-__m128d test_mm_round_pd(__m128d x) {
-  // CHECK: define {{.*}} @test_mm_round_pd
-  // CHECK: @llvm.x86.sse41.round.pd
-  return _mm_round_pd(x, 2);
-}
-
-__m128d test_mm_round_sd(__m128d x, __m128d y) {
-  // CHECK: define {{.*}} @test_mm_round_sd
-  // CHECK: @llvm.x86.sse41.round.sd
-  return _mm_round_sd(x, y, 2);
-}
-
 void test_storel_epi64(__m128i x, void* y) {
   // CHECK-LABEL: define void @test_storel_epi64
   // CHECK: store {{.*}} i64* {{.*}}, align 1{{$}}
@@ -214,48 +166,6 @@ void test_extract_epi16(__m128i __a) {
   _mm_extract_epi16(__a, 8);
 }
 
-int test_extract_ps(__m128i __a) {
-  // CHECK-LABEL: @test_extract_ps
-  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
-  return _mm_extract_ps(__a, 4);
-}
-
-int test_extract_epi8(__m128i __a) {
-  // CHECK-LABEL: @test_extract_epi8
-  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
-  return _mm_extract_epi8(__a, 16);
-}
-
-int test_extract_epi32(__m128i __a) {
-  // CHECK-LABEL: @test_extract_epi32
-  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
-  return _mm_extract_epi32(__a, 4);
-}
-
-void test_insert_epi32(__m128i __a, int b) {
-  // CHECK-LABEL: @test_insert_epi32
-  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
-   _mm_insert_epi32(__a, b, 4);
-}
-
-__m128d test_blend_pd(__m128d V1, __m128d V2) {
-  // CHECK-LABEL: @test_blend_pd
-  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 2, i32 1>
-  return _mm_blend_pd(V1, V2, 1);
-}
-
-__m128 test_blend_ps(__m128 V1, __m128 V2) {
-  // CHECK-LABEL: @test_blend_ps
-  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
-  return _mm_blend_ps(V1, V2, 5);
-}
-
-__m128i test_blend_epi16(__m128i V1, __m128i V2) {
-  // CHECK-LABEL: @test_blend_epi16
-  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
-  return _mm_blend_epi16(V1, V2, 42);
-}
-
 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) {
   // CHECK-LABEL: @test_mm_cmpeq_ss
   // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0)
@@ -568,12 +478,44 @@ __m128 test_mm_bsrli_si128(__m128 a) {
   return _mm_bsrli_si128(a, 5);
 }
 
-__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
-  return _mm_alignr_epi8(a, b, 2);
+__m128 test_mm_undefined_ps() {
+  // CHECK-LABEL: @test_mm_undefined_ps
+  // CHECK: ret <4 x float> undef
+  return _mm_undefined_ps();
 }
 
-__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
-  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
-  return _mm_alignr_epi8(a, b, 17);
+__m128d test_mm_undefined_pd() {
+  // CHECK-LABEL: @test_mm_undefined_pd
+  // CHECK: ret <2 x double> undef
+  return _mm_undefined_pd();
+}
+
+__m128i test_mm_undefined_si128() {
+  // CHECK-LABEL: @test_mm_undefined_si128
+  // CHECK: ret <2 x i64> undef
+  return _mm_undefined_si128();
+}
+
+__m64 test_mm_add_si64(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_add_si64
+  // CHECK @llvm.x86.mmx.padd.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_add_si64(__a, __b);
+}
+
+__m64 test_mm_sub_si64(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_sub_si64
+  // CHECK @llvm.x86.mmx.psub.q(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_sub_si64(__a, __b);
+}
+
+__m64 test_mm_mul_su32(__m64 __a, __m64 __b) {
+  // CHECK-LABEL: @test_mm_mul_su32
+  // CHECK @llvm.x86.mmx.pmulu.dq(x86_mmx %{{.*}}, x86_mmx %{{.*}})
+  return _mm_mul_su32(__a, __b);
+}
+
+void test_mm_pause() {
+  // CHECK-LABEL: @test_mm_pause
+  // CHECK @llvm.x86.sse2.pause()
+  return _mm_pause();
 }
diff --git a/test/CodeGen/sse.c b/test/CodeGen/sse.c
index 17cce69d8466..1e8c5dbe5dec 100644
--- a/test/CodeGen/sse.c
+++ b/test/CodeGen/sse.c
@@ -1,4 +1,8 @@
-// RUN: %clang_cc1 -O3 -ffreestanding -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -O3 -triple x86_64-apple-macosx10.8.0 -target-feature +sse4.1 -emit-llvm %s -o - | FileCheck %s
+// FIXME: This test currently depends on optimization - it should be rewritten to avoid it.
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
 #include <emmintrin.h>
 
diff --git a/test/CodeGen/sse2-builtins.c b/test/CodeGen/sse2-builtins.c
new file mode 100644
index 000000000000..4ceb93abfc2f
--- /dev/null
+++ b/test/CodeGen/sse2-builtins.c
@@ -0,0 +1,1105 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_add_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi8
+  // CHECK: add <16 x i8>
+  return _mm_add_epi8(A, B);
+}
+
+__m128i test_mm_add_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi16
+  // CHECK: add <8 x i16>
+  return _mm_add_epi16(A, B);
+}
+
+__m128i test_mm_add_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi32
+  // CHECK: add <4 x i32>
+  return _mm_add_epi32(A, B);
+}
+
+__m128i test_mm_add_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_add_epi64
+  // CHECK: add <2 x i64>
+  return _mm_add_epi64(A, B);
+}
+
+__m128d test_mm_add_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_add_pd
+  // CHECK: fadd <2 x double>
+  return _mm_add_pd(A, B);
+}
+
+__m128d test_mm_add_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_add_sd
+  // CHECK: fadd double
+  return _mm_add_sd(A, B);
+}
+
+__m128i test_mm_adds_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.padds.b
+  return _mm_adds_epi8(A, B);
+}
+
+__m128i test_mm_adds_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.padds.w
+  return _mm_adds_epi16(A, B);
+}
+
+__m128i test_mm_adds_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.paddus.b
+  return _mm_adds_epu8(A, B);
+}
+
+__m128i test_mm_adds_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_adds_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.paddus.w
+  return _mm_adds_epu16(A, B);
+}
+
+__m128d test_mm_and_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_and_pd
+  // CHECK: and <4 x i32>
+  return _mm_and_pd(A, B);
+}
+
+__m128i test_mm_and_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_and_si128
+  // CHECK: and <2 x i64>
+  return _mm_and_si128(A, B);
+}
+
+__m128i test_mm_avg_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_avg_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b
+  return _mm_avg_epu8(A, B);
+}
+
+__m128i test_mm_avg_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_avg_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w
+  return _mm_avg_epu16(A, B);
+}
+
+__m128i test_mm_bslli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_bslli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+  return _mm_bslli_si128(A, 5);
+}
+
+__m128i test_mm_bsrli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_bsrli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+  return _mm_bsrli_si128(A, 5);
+}
+
+void test_mm_clflush(void* A) {
+  // CHECK-LABEL: test_mm_clflush
+  // CHECK: call void @llvm.x86.sse2.clflush(i8* %{{.*}})
+  _mm_clflush(A);
+}
+
+__m128i test_mm_cmpeq_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi8
+  // CHECK: icmp eq <16 x i8>
+  return _mm_cmpeq_epi8(A, B);
+}
+
+__m128i test_mm_cmpeq_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi16
+  // CHECK: icmp eq <8 x i16>
+  return _mm_cmpeq_epi16(A, B);
+}
+
+__m128i test_mm_cmpeq_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi32
+  // CHECK: icmp eq <4 x i32>
+  return _mm_cmpeq_epi32(A, B);
+}
+
+__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpeq_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_pd(A, B);
+}
+
+__m128d test_mm_cmpeq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpeq_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 0)
+  return _mm_cmpeq_sd(A, B);
+}
+
+__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpge_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_pd(A, B);
+}
+
+__m128d test_mm_cmpge_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpge_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmpge_sd(A, B);
+}
+
+__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmpgt_epi8(A, B);
+}
+
+__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmpgt_epi16(A, B);
+}
+
+__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmpgt_epi32(A, B);
+}
+
+__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpgt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_pd(A, B);
+}
+
+__m128d test_mm_cmpgt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpgt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmpgt_sd(A, B);
+}
+
+__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmple_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_pd(A, B);
+}
+
+__m128d test_mm_cmple_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmple_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 2)
+  return _mm_cmple_sd(A, B);
+}
+
+__m128i test_mm_cmplt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmplt_epi8(A, B);
+}
+
+__m128i test_mm_cmplt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmplt_epi16(A, B);
+}
+
+__m128i test_mm_cmplt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmplt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmplt_epi32(A, B);
+}
+
+__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmplt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_pd(A, B);
+}
+
+__m128d test_mm_cmplt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmplt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 1)
+  return _mm_cmplt_sd(A, B);
+}
+
+__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpneq_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_pd(A, B);
+}
+
+__m128d test_mm_cmpneq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpneq_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 4)
+  return _mm_cmpneq_sd(A, B);
+}
+
+__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnge_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_pd(A, B);
+}
+
+__m128d test_mm_cmpnge_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnge_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnge_sd(A, B);
+}
+
+__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpngt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_pd(A, B);
+}
+
+__m128d test_mm_cmpngt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpngt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpngt_sd(A, B);
+}
+
+__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnle_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_pd(A, B);
+}
+
+__m128d test_mm_cmpnle_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnle_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 6)
+  return _mm_cmpnle_sd(A, B);
+}
+
+__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnlt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_pd(A, B);
+}
+
+__m128d test_mm_cmpnlt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnlt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 5)
+  return _mm_cmpnlt_sd(A, B);
+}
+
+__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpord_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_pd(A, B);
+}
+
+__m128d test_mm_cmpord_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpord_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 7)
+  return _mm_cmpord_sd(A, B);
+}
+
+__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpunord_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_pd(A, B);
+}
+
+__m128d test_mm_cmpunord_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpunord_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 3)
+  return _mm_cmpunord_sd(A, B);
+}
+
+int test_mm_comieq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comieq_sd
+  // CHECK: call i32 @llvm.x86.sse2.comieq.sd
+  return _mm_comieq_sd(A, B);
+}
+
+int test_mm_comige_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comige_sd
+  // CHECK: call i32 @llvm.x86.sse2.comige.sd
+  return _mm_comige_sd(A, B);
+}
+
+int test_mm_comigt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comigt_sd
+  // CHECK: call i32 @llvm.x86.sse2.comigt.sd
+  return _mm_comigt_sd(A, B);
+}
+
+int test_mm_comile_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comile_sd
+  // CHECK: call i32 @llvm.x86.sse2.comile.sd
+  return _mm_comile_sd(A, B);
+}
+
+int test_mm_comilt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comilt_sd
+  // CHECK: call i32 @llvm.x86.sse2.comilt.sd
+  return _mm_comilt_sd(A, B);
+}
+
+int test_mm_comineq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_comineq_sd
+  // CHECK: call i32 @llvm.x86.sse2.comineq.sd
+  return _mm_comineq_sd(A, B);
+}
+
+__m128d test_mm_cvtepi32_pd(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi32_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cvtdq2pd
+  return _mm_cvtepi32_pd(A);
+}
+
+__m128 test_mm_cvtepi32_ps(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtepi32_ps
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtdq2ps
+  return _mm_cvtepi32_ps(A);
+}
+
+__m128i test_mm_cvtpd_epi32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtpd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvtpd2dq
+  return _mm_cvtpd_epi32(A);
+}
+
+__m128 test_mm_cvtpd_ps(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtpd_ps
+  // CHECK: call <4 x float> @llvm.x86.sse2.cvtpd2ps
+  return _mm_cvtpd_ps(A);
+}
+
+__m128i test_mm_cvtps_epi32(__m128 A) {
+  // CHECK-LABEL: test_mm_cvtps_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvtps2dq
+  return _mm_cvtps_epi32(A);
+}
+
+__m128d test_mm_cvtps_pd(__m128 A) {
+  // CHECK-LABEL: test_mm_cvtps_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.cvtps2pd
+  return _mm_cvtps_pd(A);
+}
+
+double test_mm_cvtsd_f64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_f64
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  return _mm_cvtsd_f64(A);
+}
+
+int test_mm_cvtsd_si32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_si32
+  // CHECK: call i32 @llvm.x86.sse2.cvtsd2si
+  return _mm_cvtsd_si32(A);
+}
+
+long long test_mm_cvtsd_si64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvtsd_si64
+  // CHECK: call i64 @llvm.x86.sse2.cvtsd2si64
+  return _mm_cvtsd_si64(A);
+}
+
+__m128 test_mm_cvtsd_ss(__m128 A, __m128d B) {
+  // CHECK-LABEL: test_mm_cvtsd_ss
+  // CHECK: fptrunc double %{{.*}} to float
+  return _mm_cvtsd_ss(A, B);
+}
+
+int test_mm_cvtsi128_si32(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtsi128_si32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+  return _mm_cvtsi128_si32(A);
+}
+
+long long test_mm_cvtsi128_si64(__m128i A) {
+  // CHECK-LABEL: test_mm_cvtsi128_si64
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 0
+  return _mm_cvtsi128_si64(A);
+}
+
+__m128d test_mm_cvtsi32_sd(__m128d A, int B) {
+  // CHECK-LABEL: test_mm_cvtsi32_sd
+  // CHECK: sitofp i32 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtsi32_sd(A, B);
+}
+
+__m128i test_mm_cvtsi32_si128(int A) {
+  // CHECK-LABEL: test_mm_cvtsi32_si128
+  // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+  return _mm_cvtsi32_si128(A);
+}
+
+__m128d test_mm_cvtsi64_sd(__m128d A, long long B) {
+  // CHECK-LABEL: test_mm_cvtsi64_sd
+  // CHECK: sitofp i64 %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtsi64_sd(A, B);
+}
+
+__m128i test_mm_cvtsi64_si128(long long A) {
+  // CHECK-LABEL: test_mm_cvtsi64_si128
+  // CHECK: insertelement <2 x i64> undef, i64 %{{.*}}, i32 0
+  return _mm_cvtsi64_si128(A);
+}
+
+__m128d test_mm_cvtss_sd(__m128d A, __m128 B) {
+  // CHECK-LABEL: test_mm_cvtss_sd
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  // CHECK: fpext float %{{.*}} to double
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 0
+  return _mm_cvtss_sd(A, B);
+}
+
+__m128i test_mm_cvttpd_epi32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttpd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttpd2dq
+  return _mm_cvttpd_epi32(A);
+}
+
+__m128i test_mm_cvttps_epi32(__m128 A) {
+  // CHECK-LABEL: test_mm_cvttps_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq
+  return _mm_cvttps_epi32(A);
+}
+
+int test_mm_cvttsd_si32(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttsd_si32
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: fptosi double %{{.*}} to i32
+  return _mm_cvttsd_si32(A);
+}
+
+long long test_mm_cvttsd_si64(__m128d A) {
+  // CHECK-LABEL: test_mm_cvttsd_si64
+  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
+  // CHECK: fptosi double %{{.*}} to i64
+  return _mm_cvttsd_si64(A);
+}
+
+__m128d test_mm_div_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_div_pd
+  // CHECK: fdiv <2 x double>
+  return _mm_div_pd(A, B);
+}
+
+__m128d test_mm_div_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_div_sd
+  // CHECK: fdiv double
+  return _mm_div_sd(A, B);
+}
+
+// Lowering to pextrw requires optimization.
+int test_mm_extract_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_extract_epi16
+  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
+  // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
+  return _mm_extract_epi16(A, 8);
+}
+
+__m128i test_mm_insert_epi16(__m128i A, short B) {
+  // CHECK-LABEL: test_mm_insert_epi16
+  // CHECK: [[x:%.*]] = and i32 %{{.*}}, 7
+  // CHECK: insertelement <8 x i16> %{{.*}}, i32 [[x]]
+  return _mm_insert_epi16(A, B, 8);
+}
+
+void test_mm_lfence() {
+  // CHECK-LABEL: test_mm_lfence
+  // CHECK: call void @llvm.x86.sse2.lfence()
+  _mm_lfence();
+}
+
+__m128d test_mm_load_pd(double const* A) {
+  // CHECK-LABEL: test_mm_load_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  return _mm_load_pd(A);
+}
+
+__m128d test_mm_load_sd(double const* A) {
+  // CHECK-LABEL: test_mm_load_sd
+  // CHECK: load double, double* %{{.*}}, align 1
+  return _mm_load_sd(A);
+}
+
+__m128i test_mm_load_si128(__m128i const* A) {
+  // CHECK-LABEL: test_mm_load_si128
+  // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 16
+  return _mm_load_si128(A);
+}
+
+__m128d test_mm_load1_pd(double const* A) {
+  // CHECK-LABEL: test_mm_load1_pd
+  // CHECK: load double, double* %{{.*}}, align 8
+  // CHECK: insertelement <2 x double> undef, double %{{.*}}, i32 0
+  // CHECK: insertelement <2 x double> %{{.*}}, double %{{.*}}, i32 1
+  return _mm_load1_pd(A);
+}
+
+__m128d test_mm_loadh_pd(__m128d x, void* y) {
+  // CHECK-LABEL: test_mm_loadh_pd
+  // CHECK: load double, double* %{{.*}}, align 1{{$}}
+  return _mm_loadh_pd(x, y);
+}
+
+__m128d test_mm_loadr_pd(double const* A) {
+  // CHECK-LABEL: test_mm_loadr_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 16
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 0>
+  return _mm_loadr_pd(A);
+}
+
+__m128d test_mm_loadu_pd(double const* A) {
+  // CHECK-LABEL: test_mm_loadu_pd
+  // CHECK: load <2 x double>, <2 x double>* %{{.*}}, align 1
+  return _mm_loadu_pd(A);
+}
+
+__m128i test_mm_loadu_si128(__m128i const* A) {
+  // CHECK-LABEL: test_mm_loadu_si128
+  // CHECK: load <2 x i64>, <2 x i64>* %{{.*}}, align 1
+  return _mm_loadu_si128(A);
+}
+
+__m128i test_mm_madd_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_madd_epi16
+  // CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_madd_epi16(A, B);
+}
+
+void test_mm_maskmoveu_si128(__m128i A, __m128i B, char* C) {
+  // CHECK-LABEL: test_mm_maskmoveu_si128
+  // CHECK: call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8* %{{.*}})
+  _mm_maskmoveu_si128(A, B, C);
+}
+
+__m128i test_mm_max_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_max_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_max_epi16(A, B);
+}
+
+__m128i test_mm_max_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_max_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_max_epu8(A, B);
+}
+
+__m128d test_mm_max_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_max_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_max_pd(A, B);
+}
+
+__m128d test_mm_max_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_max_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_max_sd(A, B);
+}
+
+void test_mm_mfence() {
+  // CHECK-LABEL: test_mm_mfence
+  // CHECK: call void @llvm.x86.sse2.mfence()
+  _mm_mfence();
+}
+
+__m128i test_mm_min_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_min_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_min_epi16(A, B);
+}
+
+__m128i test_mm_min_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_min_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_min_epu8(A, B);
+}
+
+__m128d test_mm_min_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_min_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_min_pd(A, B);
+}
+
+__m128d test_mm_min_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_min_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
+  return _mm_min_sd(A, B);
+}
+
+int test_mm_movemask_epi8(__m128i A) {
+  // CHECK-LABEL: test_mm_movemask_epi8
+  // CHECK: call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
+  return _mm_movemask_epi8(A);
+}
+
+int test_mm_movemask_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_movemask_pd
+  // CHECK: call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
+  return _mm_movemask_pd(A);
+}
+
+__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mul_epu32
+  // CHECK: call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_mul_epu32(A, B);
+}
+
+__m128d test_mm_mul_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_mul_pd
+  // CHECK: fmul <2 x double> %{{.*}}, %{{.*}}
+  return _mm_mul_pd(A, B);
+}
+
+__m128d test_mm_mul_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_mul_sd
+  // CHECK: fmul double %{{.*}}, %{{.*}}
+  return _mm_mul_sd(A, B);
+}
+
+__m128i test_mm_mulhi_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mulhi_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mulhi_epi16(A, B);
+}
+
+__m128i test_mm_mulhi_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mulhi_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_mulhi_epu16(A, B);
+}
+
+__m128i test_mm_mullo_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_mullo_epi16
+  // CHECK: mul <8 x i16> %{{.*}}, %{{.*}}
+  return _mm_mullo_epi16(A, B);
+}
+
+__m128d test_mm_or_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_or_pd
+  // CHECK: or <4 x i32> %{{.*}}, %{{.*}}
+  return _mm_or_pd(A, B);
+}
+
+__m128i test_mm_or_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_or_si128
+  // CHECK: or <2 x i64> %{{.*}}, %{{.*}}
+  return _mm_or_si128(A, B);
+}
+
+__m128i test_mm_packs_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packs_epi16
+  // CHECK: call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_packs_epi16(A, B);
+}
+
+__m128i test_mm_packs_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packs_epi32
+  // CHECK: call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  return _mm_packs_epi32(A, B);
+}
+
+__m128i test_mm_packus_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_packus_epi16
+  // CHECK: call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  return _mm_packus_epi16(A, B);
+}
+
+void test_mm_pause() {
+  // CHECK-LABEL: test_mm_pause
+  // CHECK: call void @llvm.x86.sse2.pause()
+  return _mm_pause();
+}
+
+__m128i test_mm_sad_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sad_epu8
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  return _mm_sad_epu8(A, B);
+}
+
+__m128d test_mm_setzero_pd() {
+  // CHECK-LABEL: test_mm_setzero_pd
+  // CHECK: store <2 x double> zeroinitializer
+  return _mm_setzero_pd();
+}
+
+__m128i test_mm_setzero_si128() {
+  // CHECK-LABEL: test_mm_setzero_si128
+  // CHECK: store <2 x i64> zeroinitializer
+  return _mm_setzero_si128();
+}
+
+__m128i test_mm_shuffle_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_shuffle_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> zeroinitializer
+  return _mm_shuffle_epi32(A, 0);
+}
+
+__m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_shuffle_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 2>
+  return _mm_shuffle_pd(A, B, 1);
+}
+
+__m128i test_mm_shufflehi_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_shufflehi_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  return _mm_shufflehi_epi16(A, 0);
+}
+
+__m128i test_mm_shufflelo_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_shufflelo_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  return _mm_shufflelo_epi16(A, 0);
+}
+
+__m128i test_mm_sll_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w
+  return _mm_sll_epi16(A, B);
+}
+
+__m128i test_mm_sll_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psll.d
+  return _mm_sll_epi32(A, B);
+}
+
+__m128i test_mm_sll_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sll_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psll.q
+  return _mm_sll_epi64(A, B);
+}
+
+__m128i test_mm_slli_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.pslli.w
+  return _mm_slli_epi16(A, 1);
+}
+
+__m128i test_mm_slli_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.pslli.d
+  return _mm_slli_epi32(A, 1);
+}
+
+__m128i test_mm_slli_epi64(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.pslli.q
+  return _mm_slli_epi64(A, 1);
+}
+
+__m128i test_mm_slli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_slli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26>
+  return _mm_slli_si128(A, 5);
+}
+
+__m128d test_mm_sqrt_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_sqrt_pd
+  // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %{{.*}})
+  return _mm_sqrt_pd(A);
+}
+
+__m128d test_mm_sqrt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sqrt_sd
+  // CHECK: call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %{{.*}})
+  return _mm_sqrt_sd(A, B);
+}
+
+__m128i test_mm_sra_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sra_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psra.w
+  return _mm_sra_epi16(A, B);
+}
+
+__m128i test_mm_sra_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sra_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psra.d
+  return _mm_sra_epi32(A, B);
+}
+
+__m128i test_mm_srai_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_srai_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrai.w
+  return _mm_srai_epi16(A, 1);
+}
+
+__m128i test_mm_srai_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_srai_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrai.d
+  return _mm_srai_epi32(A, 1);
+}
+
+__m128i test_mm_srl_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrl.w
+  return _mm_srl_epi16(A, B);
+}
+
+__m128i test_mm_srl_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrl.d
+  return _mm_srl_epi32(A, B);
+}
+
+__m128i test_mm_srl_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_srl_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psrl.q
+  return _mm_srl_epi64(A, B);
+}
+
+__m128i test_mm_srli_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psrli.w
+  return _mm_srli_epi16(A, 1);
+}
+
+__m128i test_mm_srli_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse2.psrli.d
+  return _mm_srli_epi32(A, 1);
+}
+
+__m128i test_mm_srli_epi64(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse2.psrli.q
+  return _mm_srli_epi64(A, 1);
+}
+
+__m128i test_mm_srli_si128(__m128i A) {
+  // CHECK-LABEL: test_mm_srli_si128
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20>
+  return _mm_srli_si128(A, 5);
+}
+
+void test_mm_store_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_store_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
+  _mm_store_pd(A, B);
+}
+
+void test_mm_store_sd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_store_sd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1{{$}}
+  _mm_store_sd(A, B);
+}
+
+void test_mm_store_si128(__m128i* A, __m128i B) {
+  // CHECK-LABEL: test_mm_store_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16
+  _mm_store_si128(A, B);
+}
+
+void test_mm_storeh_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storeh_pd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
+  _mm_storeh_pd(A, B);
+}
+
+void test_mm_storel_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storel_pd
+  // CHECK: store double %{{.*}}, double* %{{.*}}, align 1
+  _mm_storel_pd(A, B);
+}
+
+void test_mm_storeu_pd(double* A, __m128d B) {
+  // CHECK-LABEL: test_mm_storeu_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 1
+  _mm_storeu_pd(A, B);
+}
+
+void test_mm_storeu_si128(__m128i* A, __m128i B) {
+  // CHECK-LABEL: test_mm_storeu_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 1
+  _mm_storeu_si128(A, B);
+}
+
+void test_mm_stream_pd(double *A, __m128d B) {
+  // CHECK-LABEL: test_mm_stream_pd
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
+  _mm_stream_pd(A, B);
+}
+
+void test_mm_stream_si32(int *A, int B) {
+  // CHECK-LABEL: test_mm_stream_si32
+  // CHECK: store i32 %{{.*}}, i32* %{{.*}}, align 1, !nontemporal
+  _mm_stream_si32(A, B);
+}
+
+void test_mm_stream_si64(long long *A, long long B) {
+  // CHECK-LABEL: test_mm_stream_si64
+  // CHECK: store i64 %{{.*}}, i64* %{{.*}}, align 1, !nontemporal
+  _mm_stream_si64(A, B);
+}
+
+void test_mm_stream_si128(__m128i *A, __m128i B) {
+  // CHECK-LABEL: test_mm_stream_si128
+  // CHECK: store <2 x i64> %{{.*}}, <2 x i64>* %{{.*}}, align 16, !nontemporal
+  _mm_stream_si128(A, B);
+}
+
+__m128i test_mm_sub_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi8
+  // CHECK: sub <16 x i8>
+  return _mm_sub_epi8(A, B);
+}
+
+__m128i test_mm_sub_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi16
+  // CHECK: sub <8 x i16>
+  return _mm_sub_epi16(A, B);
+}
+
+__m128i test_mm_sub_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi32
+  // CHECK: sub <4 x i32>
+  return _mm_sub_epi32(A, B);
+}
+
+__m128i test_mm_sub_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_sub_epi64
+  // CHECK: sub <2 x i64>
+  return _mm_sub_epi64(A, B);
+}
+
+__m128d test_mm_sub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sub_pd
+  // CHECK: fsub <2 x double>
+  return _mm_sub_pd(A, B);
+}
+
+__m128d test_mm_sub_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_sub_sd
+  // CHECK: fsub double
+  return _mm_sub_sd(A, B);
+}
+
+__m128i test_mm_subs_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubs.b
+  return _mm_subs_epi8(A, B);
+}
+
+__m128i test_mm_subs_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubs.w
+  return _mm_subs_epi16(A, B);
+}
+
+__m128i test_mm_subs_epu8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epu8
+  // CHECK: call <16 x i8> @llvm.x86.sse2.psubus.b
+  return _mm_subs_epu8(A, B);
+}
+
+__m128i test_mm_subs_epu16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_subs_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse2.psubus.w
+  return _mm_subs_epu16(A, B);
+}
+
+int test_mm_ucomieq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomieq_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomieq.sd
+  return _mm_ucomieq_sd(A, B);
+}
+
+int test_mm_ucomige_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomige_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomige.sd
+  return _mm_ucomige_sd(A, B);
+}
+
+int test_mm_ucomigt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomigt_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomigt.sd
+  return _mm_ucomigt_sd(A, B);
+}
+
+int test_mm_ucomile_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomile_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomile.sd
+  return _mm_ucomile_sd(A, B);
+}
+
+int test_mm_ucomilt_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomilt_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomilt.sd
+  return _mm_ucomilt_sd(A, B);
+}
+
+int test_mm_ucomineq_sd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_ucomineq_sd
+  // CHECK: call i32 @llvm.x86.sse2.ucomineq.sd
+  return _mm_ucomineq_sd(A, B);
+}
+
+__m128i test_mm_unpackhi_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  return _mm_unpackhi_epi8(A, B);
+}
+
+__m128i test_mm_unpackhi_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  return _mm_unpackhi_epi16(A, B);
+}
+
+__m128i test_mm_unpackhi_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+  return _mm_unpackhi_epi32(A, B);
+}
+
+__m128i test_mm_unpackhi_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpackhi_epi64
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  return _mm_unpackhi_epi64(A, B);
+}
+
+__m128d test_mm_unpackhi_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_unpackhi_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 1, i32 3>
+  return _mm_unpackhi_pd(A, B);
+}
+
+__m128i test_mm_unpacklo_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+  return _mm_unpacklo_epi8(A, B);
+}
+
+__m128i test_mm_unpacklo_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+  return _mm_unpacklo_epi16(A, B);
+}
+
+__m128i test_mm_unpacklo_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  return _mm_unpacklo_epi32(A, B);
+}
+
+__m128i test_mm_unpacklo_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_unpacklo_epi64
+  // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  return _mm_unpacklo_epi64(A, B);
+}
+
+__m128d test_mm_unpacklo_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_unpacklo_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 2>
+  return _mm_unpacklo_pd(A, B);
+}
+
+__m128d test_mm_xor_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_xor_pd
+  // CHECK: xor <4 x i32> %{{.*}}, %{{.*}}
+  return _mm_xor_pd(A, B);
+}
+
+__m128i test_mm_xor_si128(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_xor_si128
+  // CHECK: xor <2 x i64> %{{.*}}, %{{.*}}
+  return _mm_xor_si128(A, B);
+}
diff --git a/test/CodeGen/sse3-builtins.c b/test/CodeGen/sse3-builtins.c
new file mode 100644
index 000000000000..71a34e9372a2
--- /dev/null
+++ b/test/CodeGen/sse3-builtins.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse3 -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128d test_mm_addsub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_addsub_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.addsub.pd
+  return _mm_addsub_pd(A, B);
+}
+
+__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_addsub_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.addsub.ps
+  return _mm_addsub_ps(A, B);
+}
+
+__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_hadd_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.hadd.pd
+  return _mm_hadd_pd(A, B);
+}
+
+__m128 test_mm_hadd_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_hadd_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.hadd.ps
+  return _mm_hadd_ps(A, B);
+}
+
+__m128d test_mm_hsub_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_hsub_pd
+  // CHECK: call <2 x double> @llvm.x86.sse3.hsub.pd
+  return _mm_hsub_pd(A, B);
+}
+
+__m128 test_mm_hsub_ps(__m128 A, __m128 B) {
+  // CHECK-LABEL: test_mm_hsub_ps
+  // CHECK: call <4 x float> @llvm.x86.sse3.hsub.ps
+  return _mm_hsub_ps(A, B);
+}
+
+__m128i test_mm_lddqu_si128(__m128i const* P) {
+  // CHECK-LABEL: test_mm_lddqu_si128
+  // CHECK: call <16 x i8> @llvm.x86.sse3.ldu.dq
+  return _mm_lddqu_si128(P);
+}
+
+__m128d test_mm_loaddup_pd(double const* P) {
+  // CHECK-LABEL: test_mm_loaddup_pd
+  // CHECK: load double*
+  return _mm_loaddup_pd(P);
+}
+
+__m128d test_mm_movedup_pd(__m128d A) {
+  // CHECK-LABEL: test_mm_movedup_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  return _mm_movedup_pd(A);
+}
+
+__m128 test_mm_movehdup_ps(__m128 A) {
+  // CHECK-LABEL: test_mm_movehdup_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+  return _mm_movehdup_ps(A);
+}
+
+__m128 test_mm_moveldup_ps(__m128 A) {
+  // CHECK-LABEL: test_mm_moveldup_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+  return _mm_moveldup_ps(A);
+}
diff --git a/test/CodeGen/sse41-builtins.c b/test/CodeGen/sse41-builtins.c
new file mode 100644
index 000000000000..9cd5c45659fc
--- /dev/null
+++ b/test/CodeGen/sse41-builtins.c
@@ -0,0 +1,372 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.1 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_blend_epi16(__m128i V1, __m128i V2) {
+  // CHECK-LABEL: test_mm_blend_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 7>
+  return _mm_blend_epi16(V1, V2, 42);
+}
+
+__m128d test_mm_blend_pd(__m128d V1, __m128d V2) {
+  // CHECK-LABEL: test_mm_blend_pd
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> <i32 0, i32 3>
+  return _mm_blend_pd(V1, V2, 2);
+}
+
+__m128 test_mm_blend_ps(__m128 V1, __m128 V2) {
+  // CHECK-LABEL: test_mm_blend_ps
+  // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
+  return _mm_blend_ps(V1, V2, 6);
+}
+
+__m128i test_mm_blendv_epi8(__m128i V1, __m128i V2, __m128i V3) {
+  // CHECK-LABEL: test_mm_blendv_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pblendvb
+  return _mm_blendv_epi8(V1, V2, V3);
+}
+
+__m128d test_mm_blendv_pd(__m128d V1, __m128d V2, __m128d V3) {
+  // CHECK-LABEL: test_mm_blendv_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.blendvpd
+  return _mm_blendv_pd(V1, V2, V3);
+}
+
+__m128 test_mm_blendv_ps(__m128 V1, __m128 V2, __m128 V3) {
+  // CHECK-LABEL: test_mm_blendv_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.blendvps
+  return _mm_blendv_ps(V1, V2, V3);
+}
+
+__m128d test_mm_ceil_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_ceil_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_ceil_pd(x);
+}
+
+__m128 test_mm_ceil_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_ceil_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_ceil_ps(x);
+}
+
+__m128d test_mm_ceil_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_ceil_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_ceil_sd(x, y);
+}
+
+__m128 test_mm_ceil_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_ceil_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_ceil_ss(x, y);
+}
+
+__m128i test_mm_cmpeq_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpeq_epi64
+  // CHECK: icmp eq <2 x i64>
+  return _mm_cmpeq_epi64(A, B);
+}
+
+__m128i test_mm_cvtepi8_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi16
+  // CHECK: sext <8 x i8> {{.*}} to <8 x i16>
+  return _mm_cvtepi8_epi16(a);
+}
+
+__m128i test_mm_cvtepi8_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi32
+  // CHECK: sext <4 x i8> {{.*}} to <4 x i32>
+  return _mm_cvtepi8_epi32(a);
+}
+
+__m128i test_mm_cvtepi8_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi8_epi64
+  // CHECK: sext <2 x i8> {{.*}} to <2 x i64>
+  return _mm_cvtepi8_epi64(a);
+}
+
+__m128i test_mm_cvtepi16_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi16_epi32
+  // CHECK: sext <4 x i16> {{.*}} to <4 x i32>
+  return _mm_cvtepi16_epi32(a);
+}
+
+__m128i test_mm_cvtepi16_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi16_epi64
+  // CHECK: sext <2 x i16> {{.*}} to <2 x i64>
+  return _mm_cvtepi16_epi64(a);
+}
+
+__m128i test_mm_cvtepi32_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepi32_epi64
+  // CHECK: sext <2 x i32> {{.*}} to <2 x i64>
+  return _mm_cvtepi32_epi64(a);
+}
+
+__m128i test_mm_cvtepu8_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi16(a);
+}
+
+__m128i test_mm_cvtepu8_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi32(a);
+}
+
+__m128i test_mm_cvtepu8_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu8_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> {{.*}})
+  return _mm_cvtepu8_epi64(a);
+}
+
+__m128i test_mm_cvtepu16_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu16_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> {{.*}})
+  return _mm_cvtepu16_epi32(a);
+}
+
+__m128i test_mm_cvtepu16_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu16_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> {{.*}})
+  return _mm_cvtepu16_epi64(a);
+}
+
+__m128i test_mm_cvtepu32_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_cvtepu32_epi64
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> {{.*}})
+  return _mm_cvtepu32_epi64(a);
+}
+
+__m128d test_mm_dp_pd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_dp_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.dppd
+  return _mm_dp_pd(x, y, 2);
+}
+
+__m128 test_mm_dp_ps(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_dp_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.dpps
+  return _mm_dp_ps(x, y, 2);
+}
+
+int test_mm_extract_epi8(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi8
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
+  return _mm_extract_epi8(x, 16);
+}
+
+int test_mm_extract_epi32(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 1
+  return _mm_extract_epi32(x, 1);
+}
+
+long long test_mm_extract_epi64(__m128i x) {
+  // CHECK-LABEL: test_mm_extract_epi64
+  // CHECK: extractelement <2 x i64> %{{.*}}, i32 1
+  return _mm_extract_epi64(x, 1);
+}
+
+//TODO
+//int test_mm_extract_ps(__m128i x) {
+//  return _mm_extract_ps(_mm_add_ps(x,x), 1);
+//}
+
+__m128d test_mm_floor_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_floor_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_floor_pd(x);
+}
+
+__m128 test_mm_floor_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_floor_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_floor_ps(x);
+}
+
+__m128d test_mm_floor_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_floor_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_floor_sd(x, y);
+}
+
+__m128 test_mm_floor_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_floor_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_floor_ss(x, y);
+}
+
+__m128i test_mm_insert_epi8(__m128i x, char b) {
+  // CHECK-LABEL: test_mm_insert_epi8
+  // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 0
+  return _mm_insert_epi8(x, b, 16);
+}
+
+__m128i test_mm_insert_epi32(__m128i x, int b) {
+  // CHECK-LABEL: test_mm_insert_epi32
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
+  return _mm_insert_epi32(x, b, 4);
+}
+
+__m128i test_mm_insert_epi64(__m128i x, long long b) {
+  // CHECK-LABEL: test_mm_insert_epi64
+  // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 0
+  return _mm_insert_epi64(x, b, 2);
+}
+
+__m128 test_mm_insert_ps(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_insert_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.insertps
+  return _mm_insert_ps(x, y, 5);
+}
+
+__m128i test_mm_max_epi8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pmaxsb
+  return _mm_max_epi8(x, y);
+}
+
+__m128i test_mm_max_epu16(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pmaxuw
+  return _mm_max_epu16(x, y);
+}
+
+__m128i test_mm_max_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxsd
+  return _mm_max_epi32(x, y);
+}
+
+__m128i test_mm_max_epu32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_max_epu32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pmaxud
+  return _mm_max_epu32(x, y);
+}
+
+__m128i test_mm_min_epi8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epi8
+  // CHECK: call <16 x i8> @llvm.x86.sse41.pminsb
+  return _mm_min_epi8(x, y);
+}
+
+__m128i test_mm_min_epu16(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.pminuw
+  return _mm_min_epu16(x, y);
+}
+
+__m128i test_mm_min_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epi32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pminsd
+  return _mm_min_epi32(x, y);
+}
+
+__m128i test_mm_min_epu32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_min_epu32
+  // CHECK: call <4 x i32> @llvm.x86.sse41.pminud
+  return _mm_min_epu32(x, y);
+}
+
+__m128i test_mm_minpos_epu16(__m128i x) {
+  // CHECK-LABEL: test_mm_minpos_epu16
+  // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw
+  return _mm_minpos_epu16(x);
+}
+
+__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mpsadbw_epu8
+  // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw
+  return _mm_mpsadbw_epu8(x, y, 1);
+}
+
+__m128i test_mm_mul_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mul_epi32
+  // CHECK: call <2 x i64> @llvm.x86.sse41.pmuldq
+  return _mm_mul_epi32(x, y);
+}
+
+__m128i test_mm_mullo_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_mullo_epi32
+  // CHECK: mul <4 x i32>
+  return _mm_mullo_epi32(x, y);
+}
+
+__m128i test_mm_packus_epi32(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_packus_epi32
+  // CHECK: call <8 x i16> @llvm.x86.sse41.packusdw
+  return _mm_packus_epi32(x, y);
+}
+
+__m128d test_mm_round_pd(__m128d x) {
+  // CHECK-LABEL: test_mm_round_pd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd
+  return _mm_round_pd(x, 2);
+}
+
+__m128 test_mm_round_ps(__m128 x) {
+  // CHECK-LABEL: test_mm_round_ps
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps
+  return _mm_round_ps(x, 2);
+}
+
+__m128d test_mm_round_sd(__m128d x, __m128d y) {
+  // CHECK-LABEL: test_mm_round_sd
+  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd
+  return _mm_round_sd(x, y, 2);
+}
+
+__m128 test_mm_round_ss(__m128 x, __m128 y) {
+  // CHECK-LABEL: test_mm_round_ss
+  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss
+  return _mm_round_ss(x, y, 2);
+}
+
+__m128i test_mm_stream_load_si128(__m128i const *a) {
+  // CHECK-LABEL: test_mm_stream_load_si128
+  // CHECK: call <2 x i64> @llvm.x86.sse41.movntdqa
+  return _mm_stream_load_si128(a);
+}
+
+int test_mm_test_all_ones(__m128i x) {
+  // CHECK-LABEL: test_mm_test_all_ones
+  // CHECK: call i32 @llvm.x86.sse41.ptestc
+  return _mm_test_all_ones(x);
+}
+
+int test_mm_test_all_zeros(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_test_all_zeros
+  // CHECK: call i32 @llvm.x86.sse41.ptestz
+  return _mm_test_all_zeros(x, y);
+}
+
+int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_test_mix_ones_zeros
+  // CHECK: call i32 @llvm.x86.sse41.ptestnzc
+  return _mm_test_mix_ones_zeros(x, y);
+}
+
+int test_mm_testc_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testc_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestc
+  return _mm_testc_si128(x, y);
+}
+
+int test_mm_testnzc_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testnzc_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestnzc
+  return _mm_testnzc_si128(x, y);
+}
+
+int test_mm_testz_si128(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_testz_si128
+  // CHECK: call i32 @llvm.x86.sse41.ptestz
+  return _mm_testz_si128(x, y);
+}
diff --git a/test/CodeGen/sse42-builtins.c b/test/CodeGen/sse42-builtins.c
new file mode 100644
index 000000000000..e3215ddaf72b
--- /dev/null
+++ b/test/CodeGen/sse42-builtins.c
@@ -0,0 +1,139 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -emit-llvm -o - -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4.2 -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_cmpgt_epi8(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi8
+  // CHECK: icmp sgt <16 x i8>
+  return _mm_cmpgt_epi8(A, B);
+}
+
+__m128i test_mm_cmpgt_epi16(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi16
+  // CHECK: icmp sgt <8 x i16>
+  return _mm_cmpgt_epi16(A, B);
+}
+
+__m128i test_mm_cmpgt_epi32(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi32
+  // CHECK: icmp sgt <4 x i32>
+  return _mm_cmpgt_epi32(A, B);
+}
+
+__m128i test_mm_cmpgt_epi64(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpgt_epi64
+  // CHECK: icmp sgt <2 x i64>
+  return _mm_cmpgt_epi64(A, B);
+}
+
+int test_mm_cmpestra(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestra
+  // CHECK: @llvm.x86.sse42.pcmpestria128
+  return _mm_cmpestra(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrc(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrc
+  // CHECK: @llvm.x86.sse42.pcmpestric128
+  return _mm_cmpestrc(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestri(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestri
+  // CHECK: @llvm.x86.sse42.pcmpestri128
+  return _mm_cmpestri(A, LA, B, LB, 7);
+}
+
+__m128i test_mm_cmpestrm(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrm
+  // CHECK: @llvm.x86.sse42.pcmpestrm128
+  return _mm_cmpestrm(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestro(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestro
+  // CHECK: @llvm.x86.sse42.pcmpestrio128
+  return _mm_cmpestro(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrs(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrs
+  // CHECK: @llvm.x86.sse42.pcmpestris128
+  return _mm_cmpestrs(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpestrz(__m128i A, int LA, __m128i B, int LB) {
+  // CHECK-LABEL: test_mm_cmpestrz
+  // CHECK: @llvm.x86.sse42.pcmpestriz128
+  return _mm_cmpestrz(A, LA, B, LB, 7);
+}
+
+int test_mm_cmpistra(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistra
+  // CHECK: @llvm.x86.sse42.pcmpistria128
+  return _mm_cmpistra(A, B, 7);
+}
+
+int test_mm_cmpistrc(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrc
+  // CHECK: @llvm.x86.sse42.pcmpistric128
+  return _mm_cmpistrc(A, B, 7);
+}
+
+int test_mm_cmpistri(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistri
+  // CHECK: @llvm.x86.sse42.pcmpistri128
+  return _mm_cmpistri(A, B, 7);
+}
+
+__m128i test_mm_cmpistrm(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrm
+  // CHECK: @llvm.x86.sse42.pcmpistrm128
+  return _mm_cmpistrm(A, B, 7);
+}
+
+int test_mm_cmpistro(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistro
+  // CHECK: @llvm.x86.sse42.pcmpistrio128
+  return _mm_cmpistro(A, B, 7);
+}
+
+int test_mm_cmpistrs(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrs
+  // CHECK: @llvm.x86.sse42.pcmpistris128
+  return _mm_cmpistrs(A, B, 7);
+}
+
+int test_mm_cmpistrz(__m128i A, __m128i B) {
+  // CHECK-LABEL: test_mm_cmpistrz
+  // CHECK: @llvm.x86.sse42.pcmpistriz128
+  return _mm_cmpistrz(A, B, 7);
+}
+
+unsigned int test_mm_crc32_u8(unsigned int CRC, unsigned char V) {
+  // CHECK-LABEL: test_mm_crc32_u8
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.8
+  return _mm_crc32_u8(CRC, V);
+}
+
+unsigned int test_mm_crc32_u16(unsigned int CRC, unsigned short V) {
+  // CHECK-LABEL: test_mm_crc32_u16
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.16
+  return _mm_crc32_u16(CRC, V);
+}
+
+unsigned int test_mm_crc32_u32(unsigned int CRC, unsigned int V) {
+  // CHECK-LABEL: test_mm_crc32_u32
+  // CHECK: call i32 @llvm.x86.sse42.crc32.32.32
+  return _mm_crc32_u32(CRC, V);
+}
+
+unsigned int test_mm_crc32_u64(unsigned long long CRC, unsigned long long V) {
+  // CHECK-LABEL: test_mm_crc32_u64
+  // CHECK: call i64 @llvm.x86.sse42.crc32.64.64
+  return _mm_crc32_u64(CRC, V);
+}
diff --git a/test/CodeGen/sse4a-builtins.c b/test/CodeGen/sse4a-builtins.c
index e1d7e8fb561b..9a408b8bf4b5 100644
--- a/test/CodeGen/sse4a-builtins.c
+++ b/test/CodeGen/sse4a-builtins.c
@@ -1,39 +1,42 @@
-// RUN: %clang_cc1 -ffreestanding -triple i386-apple-darwin9 -target-cpu pentium4 -target-feature +sse4a -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse4a -emit-llvm -o - -Werror | FileCheck %s
 
-#include <ammintrin.h>
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
 
-__m128i test_extracti_si64(__m128i x) {
+#include <x86intrin.h>
+
+__m128i test_mm_extracti_si64(__m128i x) {
+  // CHECK-LABEL: test_mm_extracti_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
   return _mm_extracti_si64(x, 3, 2);
-// CHECK: @test_extracti_si64
-// CHECK: @llvm.x86.sse4a.extrqi(<2 x i64> %{{[^,]+}}, i8 3, i8 2)
 }
 
-__m128i test_extract_si64(__m128i x, __m128i y) {
+__m128i test_mm_extract_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_extract_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
   return _mm_extract_si64(x, y);
-// CHECK: @test_extract_si64
-// CHECK: @llvm.x86.sse4a.extrq(<2 x i64> %{{[^,]+}}, <16 x i8> %{{[^,]+}})
 }
 
-__m128i test_inserti_si64(__m128i x, __m128i y) {
+__m128i test_mm_inserti_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_inserti_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
   return _mm_inserti_si64(x, y, 5, 6);
-// CHECK: @test_inserti_si64
-// CHECK: @llvm.x86.sse4a.insertqi(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}}, i8 5, i8 6)
 }
 
-__m128i test_insert_si64(__m128i x, __m128i y) {
+__m128i test_mm_insert_si64(__m128i x, __m128i y) {
+  // CHECK-LABEL: test_mm_insert_si64
+  // CHECK: call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
   return _mm_insert_si64(x, y);
-// CHECK: @test_insert_si64
-// CHECK: @llvm.x86.sse4a.insertq(<2 x i64> %{{[^,]+}}, <2 x i64> %{{[^,]+}})
 }
 
-void test_stream_sd(double *p, __m128d a) {
+void test_mm_stream_sd(double *p, __m128d a) {
+  // CHECK-LABEL: test_mm_stream_sd
+  // CHECK: call void @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
   _mm_stream_sd(p, a);
-// CHECK: @test_stream_sd
-// CHECK: @llvm.x86.sse4a.movnt.sd(i8* %{{[^,]+}}, <2 x double> %{{[^,]+}})
 }
 
-void test_stream_ss(float *p, __m128 a) {
+void test_mm_stream_ss(float *p, __m128 a) {
+  // CHECK-LABEL: test_mm_stream_ss
+  // CHECK: call void @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
   _mm_stream_ss(p, a);
-// CHECK: @test_stream_ss
-// CHECK: @llvm.x86.sse4a.movnt.ss(i8* %{{[^,]+}}, <4 x float> %{{[^,]+}})
 }
diff --git a/test/CodeGen/ssse3-builtins.c b/test/CodeGen/ssse3-builtins.c
new file mode 100644
index 000000000000..d4b27a1e855c
--- /dev/null
+++ b/test/CodeGen/ssse3-builtins.c
@@ -0,0 +1,108 @@
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Werror | FileCheck %s
+
+// Don't include mm_malloc.h, it's system specific.
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128i test_mm_abs_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.pabs.b.128
+  return _mm_abs_epi8(a);
+}
+
+__m128i test_mm_abs_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pabs.w.128
+  return _mm_abs_epi16(a);
+}
+
+__m128i test_mm_abs_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_abs_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.pabs.d.128
+  return _mm_abs_epi32(a);
+}
+
+__m128i test_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_alignr_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>
+  return _mm_alignr_epi8(a, b, 2);
+}
+
+__m128i test2_mm_alignr_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test2_mm_alignr_epi8
+  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
+  return _mm_alignr_epi8(a, b, 17);
+}
+
+__m128i test_mm_hadd_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadd_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.w.128
+  return _mm_hadd_epi16(a, b);
+}
+
+__m128i test_mm_hadd_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadd_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.phadd.d.128
+  return _mm_hadd_epi32(a, b);
+}
+
+__m128i test_mm_hadds_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hadds_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phadd.sw.128
+  return _mm_hadds_epi16(a, b);
+}
+
+__m128i test_mm_hsub_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsub_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.w.128
+  return _mm_hsub_epi16(a, b);
+}
+
+__m128i test_mm_hsub_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsub_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.phsub.d.128
+  return _mm_hsub_epi32(a, b);
+}
+
+__m128i test_mm_hsubs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_hsubs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.phsub.sw.128
+  return _mm_hsubs_epi16(a, b);
+}
+
+__m128i test_mm_maddubs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_maddubs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128
+  return _mm_maddubs_epi16(a, b);
+}
+
+__m128i test_mm_mulhrs_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_mulhrs_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128
+  return _mm_mulhrs_epi16(a, b);
+}
+
+__m128i test_mm_shuffle_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shuffle_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.pshuf.b.128
+  return _mm_shuffle_epi8(a, b);
+}
+
+__m128i test_mm_sign_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi8
+  // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128
+  return _mm_sign_epi8(a, b);
+}
+
+__m128i test_mm_sign_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi16
+  // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128
+  return _mm_sign_epi16(a, b);
+}
+
+__m128i test_mm_sign_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sign_epi32
+  // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128
+  return _mm_sign_epi32(a, b);
+}
diff --git a/test/CodeGen/stackrealign.c b/test/CodeGen/stackrealign.c
new file mode 100644
index 000000000000..39b09397beb3
--- /dev/null
+++ b/test/CodeGen/stackrealign.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -mstackrealign | FileCheck %s -check-prefix=REALIGN
+// RUN: %clang_cc1 %s -emit-llvm -o - | FileCheck %s -check-prefix=NO-REALIGN
+
+// REALIGN: attributes #{{[0-9]+}} = {{{.*}} "stackrealign"
+// NO-REALIGN-NOT: attributes #{{[0-9]+}} = {{{.*}} "stackrealign"
+
+void test1() {
+}
diff --git a/test/CodeGen/string-literal-short-wstring.c b/test/CodeGen/string-literal-short-wstring.c
index 89aa6f76ca43..01de6a4d8027 100644
--- a/test/CodeGen/string-literal-short-wstring.c
+++ b/test/CodeGen/string-literal-short-wstring.c
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 -x c++ -triple %ms_abi_triple -emit-llvm -fshort-wchar %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSABI
 // Runs in c++ mode so that wchar_t is available.
 
+// XFAIL: hexagon
+// Hexagon aligns arrays of size 8+ bytes to a 64-bit boundary, which fails
+// the first check line with "align 1".
+
 int main() {
   // This should convert to utf8.
   // CHECK: private unnamed_addr constant [10 x i8] c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
diff --git a/test/CodeGen/string-literal.c b/test/CodeGen/string-literal.c
index 8bc97f1887e9..c46018743620 100644
--- a/test/CodeGen/string-literal.c
+++ b/test/CodeGen/string-literal.c
@@ -102,6 +102,30 @@ def)" "ghi";
 ??=
 def)";
 
+  // CHECK-CXX11: private unnamed_addr constant [13 x i8] c"def\5C\0A??=\0Aabc\00", align 1
+  const char *s = u8R\
+"(def\
+??=
+abc)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i16] [i16 97, i16 98, i16 99, i16 92, i16 10, i16 63, i16 63, i16 61, i16 10, i16 100, i16 101, i16 102, i16 0], align 2
+  const char16_t *t = uR\
+"(abc\
+??=
+def)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i32] [i32 97, i32 98, i32 99, i32 92, i32 10, i32 63, i32 63, i32 61, i32 10, i32 100, i32 101, i32 102, i32 0], align 4
+  const char32_t *u = UR\
+"(abc\
+??=
+def)";
+
+  // CHECK-CXX11: private unnamed_addr constant [13 x i32] [i32 100, i32 101, i32 102, i32 92, i32 10, i32 63, i32 63, i32 61, i32 10, i32 97, i32 98, i32 99, i32 0], align 4
+  const wchar_t *v = LR\
+"(def\
+??=
+abc)";
+
 #endif
 #endif
 }
diff --git a/test/CodeGen/target-builtin-error-2.c b/test/CodeGen/target-builtin-error-2.c
new file mode 100644
index 000000000000..949f2cc78466
--- /dev/null
+++ b/test/CodeGen/target-builtin-error-2.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// Since we do code generation on a function level this needs to error out since
+// the subtarget feature won't be available.
+__m256d wombat(__m128i a) {
+  if (__builtin_cpu_supports("avx"))
+    return __builtin_ia32_cvtdq2pd256((__v4si)a); // expected-error {{'__builtin_ia32_cvtdq2pd256' needs target feature avx}}
+  else
+    return (__m256d){0, 0, 0, 0};
+}
diff --git a/test/CodeGen/target-builtin-error.c b/test/CodeGen/target-builtin-error.c
new file mode 100644
index 000000000000..ee41277706ae
--- /dev/null
+++ b/test/CodeGen/target-builtin-error.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+__m128d foo(__m128d a, __m128d b) {
+  return __builtin_ia32_addsubps(b, a); // expected-error {{'__builtin_ia32_addsubps' needs target feature sse3}}
+}
diff --git a/test/CodeGen/target-builtin-noerror.c b/test/CodeGen/target-builtin-noerror.c
new file mode 100644
index 000000000000..7d86b9684624
--- /dev/null
+++ b/test/CodeGen/target-builtin-noerror.c
@@ -0,0 +1,44 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -o -
+#define __MM_MALLOC_H
+
+#include <x86intrin.h>
+
+// No warnings.
+extern __m256i a;
+int __attribute__((target("avx"))) bar(__m256i a) {
+  return _mm256_extract_epi32(a, 3);
+}
+
+int baz() {
+  return bar(a);
+}
+
+int __attribute__((target("avx"))) qq_avx(__m256i a) {
+  return _mm256_extract_epi32(a, 3);
+}
+
+int qq_noavx() {
+  return 0;
+}
+
+extern __m256i a;
+int qq() {
+  if (__builtin_cpu_supports("avx"))
+    return qq_avx(a);
+  else
+    return qq_noavx();
+}
+
+// Test that fma and fma4 are both separately and combined valid for an fma intrinsic.
+__m128 __attribute__((target("fma"))) fma_1(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma4"))) fma_2(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
+
+__m128 __attribute__((target("fma,fma4"))) fma_3(__m128 a, __m128 b, __m128 c) {
+  return __builtin_ia32_vfmaddps(a, b, c);
+}
diff --git a/test/CodeGen/target-data.c b/test/CodeGen/target-data.c
index 3c3ea0400787..08265f9da365 100644
--- a/test/CodeGen/target-data.c
+++ b/test/CodeGen/target-data.c
@@ -78,6 +78,14 @@
 // RUN: FileCheck %s -check-prefix=LE32-NACL
 // LE32-NACL: target datalayout = "e-p:32:32-i64:64"
 
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown -o - -emit-llvm %s | \
+// RUN: FileCheck %s -check-prefix=WEBASSEMBLY32
+// WEBASSEMBLY32: target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown -o - -emit-llvm %s | \
+// RUN: FileCheck %s -check-prefix=WEBASSEMBLY64
+// WEBASSEMBLY64: target datalayout = "e-p:64:64-i64:64-n32:64-S128"
+
 // RUN: %clang_cc1 -triple powerpc-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=PPC
 // PPC: target datalayout = "E-m:e-p:32:32-i64:64-n32"
@@ -149,7 +157,7 @@
 
 // RUN: %clang_cc1 -triple hexagon-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=HEXAGON
-// HEXAGON: target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-n32"
+// HEXAGON: target datalayout = "e-m:e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32"
 
 // RUN: %clang_cc1 -triple s390x-unknown -o - -emit-llvm %s | \
 // RUN: FileCheck %s -check-prefix=SYSTEMZ
diff --git a/test/CodeGen/target-features-error-2.c b/test/CodeGen/target-features-error-2.c
new file mode 100644
index 000000000000..c23d152dcfb3
--- /dev/null
+++ b/test/CodeGen/target-features-error-2.c
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+#define __MM_MALLOC_H
+#include <x86intrin.h>
+
+int baz(__m256i a) {
+  return _mm256_extract_epi32(a, 3); // expected-error {{always_inline function '_mm256_extract_epi32' requires target feature 'sse4.2', but would be inlined into function 'baz' that is compiled without support for 'sse4.2'}}
+}
diff --git a/test/CodeGen/target-features-error.c b/test/CodeGen/target-features-error.c
new file mode 100644
index 000000000000..518f6e6189ed
--- /dev/null
+++ b/test/CodeGen/target-features-error.c
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -S -verify -o -
+int __attribute__((target("avx"), always_inline)) foo(int a) {
+  return a + 4;
+}
+int bar() {
+  return foo(4); // expected-error {{always_inline function 'foo' requires target feature 'sse4.2', but would be inlined into function 'bar' that is compiled without support for 'sse4.2'}}
+}
+
diff --git a/test/CodeGen/target-features-no-error.c b/test/CodeGen/target-features-no-error.c
new file mode 100644
index 000000000000..b6283b65ec5a
--- /dev/null
+++ b/test/CodeGen/target-features-no-error.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -o - -target-feature -sse2
+
+// Verify that negative features don't cause additional requirements on the inline function.
+int __attribute__((target("sse"), always_inline)) foo(int a) {
+  return a + 4;
+}
+int bar() {
+  return foo(4); // expected-no-diagnostics
+}
diff --git a/test/CodeGen/tbaa-class.cpp b/test/CodeGen/tbaa-class.cpp
index a8005d605724..f611ae5abb88 100644
--- a/test/CodeGen/tbaa-class.cpp
+++ b/test/CodeGen/tbaa-class.cpp
@@ -51,10 +51,10 @@ public:
 };
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z1g
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z1g
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
   *s = 1;
@@ -63,22 +63,22 @@ uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 }
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g2
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16:!.*]]
+// PATH-LABEL: define i32 @_Z2g2
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_A_f16:!.*]]
   *s = 1;
   A->f16 = 4;
   return *s;
 }
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g3
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g3
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
   A->f32 = 1;
@@ -87,22 +87,22 @@ uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g4
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g4
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_B_a_f16:!.*]]
   A->f32 = 1;
   B->a.f16 = 4;
   return A->f32;
 }
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g5
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g5
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
   A->f32 = 1;
@@ -111,10 +111,10 @@ uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g6
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g6
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
   A->f32 = 1;
@@ -123,10 +123,10 @@ uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g7
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g7
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   A->f32 = 1;
@@ -135,22 +135,22 @@ uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 }
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g8
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g8
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S_f16:!.*]]
   A->f32 = 1;
   S->f16 = 4;
   return A->f32;
 }
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g9
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g9
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   S->f32 = 1;
@@ -159,10 +159,10 @@ uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 }
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g10
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g10
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32_2:!.*]]
   S->f32 = 1;
@@ -171,10 +171,10 @@ uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
 }
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g11
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g11
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
   C->b.a.f32 = 1;
@@ -183,11 +183,11 @@ uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 }
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g12
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g12
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
   StructB *b1 = &(C->b);
diff --git a/test/CodeGen/tbaa.cpp b/test/CodeGen/tbaa.cpp
index 2bff5d0ba070..c43ca58bc3f6 100644
--- a/test/CodeGen/tbaa.cpp
+++ b/test/CodeGen/tbaa.cpp
@@ -45,10 +45,10 @@ typedef struct
 } StructS2;
 
 uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z1g
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z1g
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32:!.*]]
   *s = 1;
@@ -57,22 +57,22 @@ uint32_t g(uint32_t *s, StructA *A, uint64_t count) {
 }
 
 uint32_t g2(uint32_t *s, StructA *A, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g2
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16:!.*]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16:!.*]]
+// PATH-LABEL: define i32 @_Z2g2
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_A_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_A_f16:!.*]]
   *s = 1;
   A->f16 = 4;
   return *s;
 }
 
 uint32_t g3(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g3
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g3
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32:!.*]]
   A->f32 = 1;
@@ -81,22 +81,22 @@ uint32_t g3(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g4(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g4
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g4
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_B_a_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_B_a_f16:!.*]]
   A->f32 = 1;
   B->a.f16 = 4;
   return A->f32;
 }
 
 uint32_t g5(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g5
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g5
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_f32:!.*]]
   A->f32 = 1;
@@ -105,10 +105,10 @@ uint32_t g5(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g6(StructA *A, StructB *B, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g6
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g6
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32_2:!.*]]
   A->f32 = 1;
@@ -117,10 +117,10 @@ uint32_t g6(StructA *A, StructB *B, uint64_t count) {
 }
 
 uint32_t g7(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g7
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g7
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32:!.*]]
   A->f32 = 1;
@@ -129,22 +129,22 @@ uint32_t g7(StructA *A, StructS *S, uint64_t count) {
 }
 
 uint32_t g8(StructA *A, StructS *S, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g8
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z2g8
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_A_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S_f16:!.*]]
   A->f32 = 1;
   S->f16 = 4;
   return A->f32;
 }
 
 uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z2g9
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z2g9
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S2_f32:!.*]]
   S->f32 = 1;
@@ -153,22 +153,22 @@ uint32_t g9(StructS *S, StructS2 *S2, uint64_t count) {
 }
 
 uint32_t g10(StructS *S, StructS2 *S2, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g10
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// CHECK: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_i16]]
-// PATH: define i32 @{{.*}}(
+// CHECK: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_i16]]
+// PATH-LABEL: define i32 @_Z3g10
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
-// PATH: store i16 4, i16* %{{.*}}, align 2, !tbaa [[TAG_S2_f16:!.*]]
+// PATH: store i16 4, i16* %{{.*}}, align 4, !tbaa [[TAG_S2_f16:!.*]]
   S->f32 = 1;
   S2->f16 = 4;
   return S->f32;
 }
 
 uint32_t g11(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g11
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g11
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_C_b_a_f32:!.*]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_D_b_a_f32:!.*]]
   C->b.a.f32 = 1;
@@ -177,11 +177,11 @@ uint32_t g11(StructC *C, StructD *D, uint64_t count) {
 }
 
 uint32_t g12(StructC *C, StructD *D, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g12
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // TODO: differentiate the two accesses.
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g12
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_B_a_f32]]
   StructB *b1 = &(C->b);
@@ -202,9 +202,9 @@ struct five {
 } ATTR;
 char g13(struct five *a, struct five *b) {
   return a->b;
-// CHECK: define signext i8 @{{.*}}(
+// CHECK-LABEL: define signext i8 @_Z3g13
 // CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char:!.*]]
-// PATH: define signext i8 @{{.*}}(
+// PATH-LABEL: define signext i8 @_Z3g13
 // PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_five_b:!.*]]
 }
 
@@ -215,9 +215,9 @@ struct six {
   char c;
 };
 char g14(struct six *a, struct six *b) {
-// CHECK: define signext i8 @{{.*}}(
+// CHECK-LABEL: define signext i8 @_Z3g14
 // CHECK: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_char]]
-// PATH: define signext i8 @{{.*}}(
+// PATH-LABEL: define signext i8 @_Z3g14
 // PATH: load i8, i8* %{{.*}}, align 1, !tbaa [[TAG_six_b:!.*]]
   return a->b;
 }
@@ -225,10 +225,10 @@ char g14(struct six *a, struct six *b) {
 // Types that differ only by name may alias.
 typedef StructS StructS3;
 uint32_t g15(StructS *S, StructS3 *S3, uint64_t count) {
-// CHECK: define i32 @{{.*}}(
+// CHECK-LABEL: define i32 @_Z3g15
 // CHECK: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
 // CHECK: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_i32]]
-// PATH: define i32 @{{.*}}(
+// PATH-LABEL: define i32 @_Z3g15
 // PATH: store i32 1, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
 // PATH: store i32 4, i32* %{{.*}}, align 4, !tbaa [[TAG_S_f32]]
   S->f32 = 1;
diff --git a/test/CodeGen/tbm-builtins.c b/test/CodeGen/tbm-builtins.c
index e3a702161ecc..29e147a1aff7 100644
--- a/test/CodeGen/tbm-builtins.c
+++ b/test/CodeGen/tbm-builtins.c
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 %s -O3 -triple=x86_64-unknown-unknown -target-feature +tbm -emit-llvm -o - | FileCheck %s
+// FIXME: The code generation checks for add/sub and/or are depending on the optimizer.
+// The REQUIRES keyword will be removed when the FIXME is complete.
+// REQUIRES: x86-registered-target
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
diff --git a/test/CodeGen/thinlto_backend.c b/test/CodeGen/thinlto_backend.c
new file mode 100644
index 000000000000..a2737fb80c7d
--- /dev/null
+++ b/test/CodeGen/thinlto_backend.c
@@ -0,0 +1,14 @@
+// RUN: %clang -O2 %s -flto=thin -c -o %t.o
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// Ensure clang -cc1 give expected error for incorrect input type
+// RUN: not %clang_cc1 -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING
+// CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir'
+
+// Ensure we get expected error for missing index file
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=bad.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-ERROR
+// CHECK-ERROR: Error loading index file 'bad.thinlto.bc'
+
+// Ensure Function Importing pass added
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -mllvm -debug-pass=Structure 2>&1 | FileCheck %s -check-prefix=CHECK-PASS
+// CHECK-PASS: Function Importing
diff --git a/test/CodeGen/tls-model.c b/test/CodeGen/tls-model.c
index b5bae77e97f3..41c8de0b0e89 100644
--- a/test/CodeGen/tls-model.c
+++ b/test/CodeGen/tls-model.c
@@ -3,7 +3,12 @@
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-dynamic -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LD
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=initial-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-IE
 // RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -ftls-model=local-exec -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-LE
+//
+// RUN: %clang_cc1 %s -triple x86_64-pc-linux-gnu -femulated-tls -emit-llvm -o - 2>&1 | \
+// RUN:     FileCheck %s -check-prefix=CHECK-GD
 
+int z1 = 0;
+int z2;
 int __thread x;
 int f() {
   static int __thread y;
@@ -11,18 +16,29 @@ int f() {
 }
 int __thread __attribute__((tls_model("initial-exec"))) z;
 
+// Note that unlike normal C uninitialized global variables,
+// uninitialized TLS variables do NOT have COMMON linkage.
+
+// CHECK-GD: @z1 = global i32 0
 // CHECK-GD: @f.y = internal thread_local global i32 0
+// CHECK-GD: @z2 = common global i32 0
 // CHECK-GD: @x = thread_local global i32 0
 // CHECK-GD: @z = thread_local(initialexec) global i32 0
 
+// CHECK-LD: @z1 = global i32 0
 // CHECK-LD: @f.y = internal thread_local(localdynamic) global i32 0
+// CHECK-LD: @z2 = common global i32 0
 // CHECK-LD: @x = thread_local(localdynamic) global i32 0
 // CHECK-LD: @z = thread_local(initialexec) global i32 0
 
+// CHECK-IE: @z1 = global i32 0
 // CHECK-IE: @f.y = internal thread_local(initialexec) global i32 0
+// CHECK-IE: @z2 = common global i32 0
 // CHECK-IE: @x = thread_local(initialexec) global i32 0
 // CHECK-IE: @z = thread_local(initialexec) global i32 0
 
+// CHECK-LE: @z1 = global i32 0
 // CHECK-LE: @f.y = internal thread_local(localexec) global i32 0
+// CHECK-LE: @z2 = common global i32 0
 // CHECK-LE: @x = thread_local(localexec) global i32 0
 // CHECK-LE: @z = thread_local(initialexec) global i32 0
diff --git a/test/CodeGen/ubsan-conditional.c b/test/CodeGen/ubsan-conditional.c
new file mode 100644
index 000000000000..7f63b39a2307
--- /dev/null
+++ b/test/CodeGen/ubsan-conditional.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 %s -emit-llvm -fsanitize=float-divide-by-zero -o - | FileCheck %s
+
+_Bool b;
+// CHECK: @f(
+double f() {
+  // CHECK: %[[B:.*]] = load {{.*}} @b
+  // CHECK: %[[COND:.*]] = trunc {{.*}} %[[B]] to i1
+  // CHECK: br i1 %[[COND]]
+  return b ? 0.0 / 0.0 : 0.0;
+}
diff --git a/test/CodeGen/ubsan-type-blacklist.cpp b/test/CodeGen/ubsan-type-blacklist.cpp
index b3137e7806c7..26b7aa877d15 100644
--- a/test/CodeGen/ubsan-type-blacklist.cpp
+++ b/test/CodeGen/ubsan-type-blacklist.cpp
@@ -14,7 +14,7 @@ Bar bar;
 // DEFAULT: @_Z7checkmev
 // TYPE: @_Z7checkmev
 void checkme() {
-// DEFAULT: call void @__ubsan_handle_dynamic_type_cache_miss({{.*}} (%class.Bar* @bar to
+// DEFAULT: call void @__ubsan_handle_dynamic_type_cache_miss({{.*}} ({{.*}}* @bar to
 // TYPE-NOT: @__ubsan_handle_dynamic_type_cache_miss
   Foo* foo = static_cast<Foo*>(&bar); // down-casting
 // DEFAULT: ret void
diff --git a/test/CodeGen/vector-alignment.c b/test/CodeGen/vector-alignment.c
index 92d1ae73f9e2..d1fd771fb7d6 100644
--- a/test/CodeGen/vector-alignment.c
+++ b/test/CodeGen/vector-alignment.c
@@ -1,38 +1,61 @@
-// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=SSE
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 -target-feature +avx \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX
+// RUN: %clang_cc1 -w -triple x86_64-apple-darwin10 -target-feature +avx512f \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
+// RUN: %clang_cc1 -w -triple   i386-apple-darwin10 -target-feature +avx512f \
+// RUN:  -emit-llvm -o - %s | FileCheck %s --check-prefix=ALL --check-prefix=AVX512
 // rdar://11759609
 
 // At or below target max alignment with no aligned attribute should align based
 // on the size of vector.
 double __attribute__((vector_size(16))) v1;
-// CHECK: @v1 {{.*}}, align 16
+// SSE: @v1 {{.*}}, align 16
+// AVX: @v1 {{.*}}, align 16
+// AVX512: @v1 {{.*}}, align 16
 double __attribute__((vector_size(32))) v2;
-// CHECK: @v2 {{.*}}, align 32
+// SSE: @v2 {{.*}}, align 16
+// AVX: @v2 {{.*}}, align 32
+// AVX512: @v2 {{.*}}, align 32
 
 // Alignment above target max alignment with no aligned attribute should align
 // based on the target max.
 double __attribute__((vector_size(64))) v3;
-// CHECK: @v3 {{.*}}, align 32
+// SSE: @v3 {{.*}}, align 16
+// AVX: @v3 {{.*}}, align 32
+// AVX512: @v3 {{.*}}, align 64
 double __attribute__((vector_size(1024))) v4;
-// CHECK: @v4 {{.*}}, align 32
+// SSE: @v4 {{.*}}, align 16
+// AVX: @v4 {{.*}}, align 32
+// AVX512: @v4 {{.*}}, align 64
 
 // Aliged attribute should always override.
 double __attribute__((vector_size(16), aligned(16))) v5;
-// CHECK: @v5 {{.*}}, align 16
+// ALL: @v5 {{.*}}, align 16
 double __attribute__((vector_size(16), aligned(64))) v6;
-// CHECK: @v6 {{.*}}, align 64
+// ALL: @v6 {{.*}}, align 64
 double __attribute__((vector_size(32), aligned(16))) v7;
-// CHECK: @v7 {{.*}}, align 16
+// ALL: @v7 {{.*}}, align 16
 double __attribute__((vector_size(32), aligned(64))) v8;
-// CHECK: @v8 {{.*}}, align 64
+// ALL: @v8 {{.*}}, align 64
 
 // Check non-power of 2 widths.
 double __attribute__((vector_size(24))) v9;
-// CHECK: @v9 {{.*}}, align 32
+// SSE: @v9 {{.*}}, align 16
+// AVX: @v9 {{.*}}, align 32
+// AVX512: @v9 {{.*}}, align 32
 double __attribute__((vector_size(40))) v10;
-// CHECK: @v10 {{.*}}, align 32
+// SSE: @v10 {{.*}}, align 16
+// AVX: @v10 {{.*}}, align 32
+// AVX512: @v10 {{.*}}, align 64
 
 // Check non-power of 2 widths with aligned attribute.
 double __attribute__((vector_size(24), aligned(64))) v11;
-// CHECK: @v11 {{.*}}, align 64
+// ALL: @v11 {{.*}}, align 64
 double __attribute__((vector_size(80), aligned(16))) v12;
-// CHECK: @v12 {{.*}}, align 16
+// ALL: @v12 {{.*}}, align 16
diff --git a/test/CodeGen/vector.c b/test/CodeGen/vector.c
index 6c14b7fa79eb..8e820f23fb77 100644
--- a/test/CodeGen/vector.c
+++ b/test/CodeGen/vector.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-apple-darwin9 -O1 -target-cpu pentium4 -target-feature +sse4.1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -O1 -target-cpu pentium4 -target-feature +sse4.1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 typedef short __v4hi __attribute__ ((__vector_size__ (8)));
 
 void test1() {
diff --git a/test/CodeGen/vectorcall.c b/test/CodeGen/vectorcall.c
index 17927c7a3de4..9ee35b1a02b6 100644
--- a/test/CodeGen/vectorcall.c
+++ b/test/CodeGen/vectorcall.c
@@ -32,13 +32,13 @@ void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
 // registers.
 void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
 // CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c)
-// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* align 8 %b, double %c)
+// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* %b, double %c)
 
 // Ensure that we pass builtin types directly while counting them against the
 // SSE register usage.
 void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
 // CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* align 8 %f)
+// X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
 
 // Aggregates with more than four elements are not HFAs and are passed byval.
 // Because they are not classified as homogeneous, they don't get special
@@ -63,11 +63,11 @@ void __vectorcall hva1(int a, struct HVA4 b, int c) {}
 
 void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
 // CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c)
-// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* align 16 %b, <4 x float> %c)
+// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* %b, <4 x float> %c)
 
 void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
 // CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
-// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* align 16 %f)
+// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
 
 typedef float __attribute__((ext_vector_type(3))) v3f32;
 struct OddSizeHVA { v3f32 x, y; };
diff --git a/test/CodeGen/vld_dup.c b/test/CodeGen/vld_dup.c
index 95904124c596..d910c82966ea 100644
--- a/test/CodeGen/vld_dup.c
+++ b/test/CodeGen/vld_dup.c
@@ -14,7 +14,7 @@ int main(){
     int64_t v7[4];
 
     v1 = vld3_dup_s32(v0);
-// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T168:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld3lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T169:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], 0
 // CHECK-NEXT: [[T170:%.*]] = shufflevector <2 x i32> [[T169]], <2 x i32> [[T169]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T171:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T168]], <2 x i32> [[T170]], 0
@@ -26,7 +26,7 @@ int main(){
 // CHECK-NEXT: [[T177:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[T174]], <2 x i32> [[T176]], 2
 
     v3 = vld4_dup_s32(v2);
-// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
+// CHECK: [[T178:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* {{.*}}, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 {{[0-9]+}}, i32 {{[0-9]+}})
 // CHECK-NEXT: [[T179:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], 0
 // CHECK-NEXT: [[T180:%.*]] = shufflevector <2 x i32> [[T179]], <2 x i32> [[T179]], <2 x i32> zeroinitializer
 // CHECK-NEXT: [[T181:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T178]], <2 x i32> [[T180]], 0
@@ -41,10 +41,10 @@ int main(){
 // CHECK-NEXT: [[T190:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[T187]], <2 x i32> [[T189]], 3
 
     v4 = vld3_dup_s64(v6);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld3.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     v5 = vld4_dup_s64(v7);
-// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64(i8* {{.*}}, i32 {{[0-9]+}})
+// CHECK: {{%.*}} = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.arm.neon.vld4.v1i64.p0i8(i8* {{.*}}, i32 {{[0-9]+}})
 
     return 0;
 }
diff --git a/test/CodeGen/wasm-arguments.c b/test/CodeGen/wasm-arguments.c
new file mode 100644
index 000000000000..723632b62f6b
--- /dev/null
+++ b/test/CodeGen/wasm-arguments.c
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -emit-llvm -o - \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY32
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -emit-llvm -o - \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
+
+// Basic argument/attribute tests for WebAssembly
+
+// WEBASSEMBLY32: define void @f0(i32 %i, i32 %j, i64 %k, double %l, fp128 %m)
+// WEBASSEMBLY64: define void @f0(i32 %i, i64 %j, i64 %k, double %l, fp128 %m)
+void f0(int i, long j, long long k, double l, long double m) {}
+
+typedef struct {
+  int aa;
+  int bb;
+} s1;
+// Structs should be passed byval and not split up.
+// WEBASSEMBLY32: define void @f1(%struct.s1* byval align 4 %i)
+// WEBASSEMBLY64: define void @f1(%struct.s1* byval align 4 %i)
+void f1(s1 i) {}
+
+typedef struct {
+  int cc;
+} s2;
+// Single-element structs should be returned as the one element.
+// WEBASSEMBLY32: define i32 @f2()
+// WEBASSEMBLY64: define i32 @f2()
+s2 f2() {
+  s2 foo;
+  return foo;
+}
+
+typedef struct {
+  int cc;
+  int dd;
+} s3;
+// Structs should be returned sret and not simplified by the frontend.
+// WEBASSEMBLY32: define void @f3(%struct.s3* noalias sret %agg.result)
+// WEBASSEMBLY64: define void @f3(%struct.s3* noalias sret %agg.result)
+s3 f3() {
+  s3 foo;
+  return foo;
+}
+
+// WEBASSEMBLY32: define void @f4(i64 %i)
+// WEBASSEMBLY64: define void @f4(i64 %i)
+void f4(long long i) {}
+
+// i8/i16 should be signext, i32 and higher should not.
+// WEBASSEMBLY32: define void @f5(i8 signext %a, i16 signext %b)
+// WEBASSEMBLY64: define void @f5(i8 signext %a, i16 signext %b)
+void f5(char a, short b) {}
+
+// WEBASSEMBLY32: define void @f6(i8 zeroext %a, i16 zeroext %b)
+// WEBASSEMBLY64: define void @f6(i8 zeroext %a, i16 zeroext %b)
+void f6(unsigned char a, unsigned short b) {}
+
+
+enum my_enum {
+  ENUM1,
+  ENUM2,
+  ENUM3,
+};
+// Enums should be treated as the underlying i32.
+// WEBASSEMBLY32: define void @f7(i32 %a)
+// WEBASSEMBLY64: define void @f7(i32 %a)
+void f7(enum my_enum a) {}
+
+enum my_big_enum {
+  ENUM4 = 0xFFFFFFFFFFFFFFFF,
+};
+// Big enums should be treated as the underlying i64.
+// WEBASSEMBLY32: define void @f8(i64 %a)
+// WEBASSEMBLY64: define void @f8(i64 %a)
+void f8(enum my_big_enum a) {}
+
+union simple_union {
+  int a;
+  char b;
+};
+// Unions should be passed as byval structs.
+// WEBASSEMBLY32: define void @f9(%union.simple_union* byval align 4 %s)
+// WEBASSEMBLY64: define void @f9(%union.simple_union* byval align 4 %s)
+void f9(union simple_union s) {}
+
+typedef struct {
+  int b4 : 4;
+  int b3 : 3;
+  int b8 : 8;
+} bitfield1;
+// Bitfields should be passed as byval structs.
+// WEBASSEMBLY32: define void @f10(%struct.bitfield1* byval align 4 %bf1)
+// WEBASSEMBLY64: define void @f10(%struct.bitfield1* byval align 4 %bf1)
+void f10(bitfield1 bf1) {}
diff --git a/test/CodeGen/wasm-regparm.c b/test/CodeGen/wasm-regparm.c
new file mode 100644
index 000000000000..66037d92b4f7
--- /dev/null
+++ b/test/CodeGen/wasm-regparm.c
@@ -0,0 +1,4 @@
+// RUN: %clang_cc1 -triple wasm32-unknown-unknown %s -fsyntax-only -verify
+// RUN: %clang_cc1 -triple wasm64-unknown-unknown %s -fsyntax-only -verify
+
+void __attribute__((regparm(2))) fc_f1(int i, int j, int k) {} // expected-error{{'regparm' is not valid on this platform}}
diff --git a/test/CodeGen/x86-soft-float.c b/test/CodeGen/x86-soft-float.c
new file mode 100644
index 000000000000..3f756286fe00
--- /dev/null
+++ b/test/CodeGen/x86-soft-float.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 3 -emit-llvm %s -o - | FileCheck %s -check-prefix=HARD
+// RUN: %clang_cc1 -triple i386-unknown-unknown -mregparm 3 -mfloat-abi soft -emit-llvm %s -o - | FileCheck %s -check-prefix=SOFT
+
+// HARD: define void @f1(float %a)
+// SOFT: define void @f1(float inreg %a)
+void f1(float a) {}
diff --git a/test/CodeGen/x86_32-arguments-iamcu.c b/test/CodeGen/x86_32-arguments-iamcu.c
new file mode 100644
index 000000000000..b53d34a3e835
--- /dev/null
+++ b/test/CodeGen/x86_32-arguments-iamcu.c
@@ -0,0 +1,69 @@
+// RUN: %clang_cc1 -w -triple i386-pc-elfiamcu -mfloat-abi soft -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: define void @ints(i32 %a, i32 %b, i32 %c, i32 %d)
+void ints(int a, int b, int c, int d) {}
+
+// CHECK-LABEL: define void @floats(float %a, float %b, float %c, float %d)
+void floats(float a, float b, float c, float d) {}
+
+// CHECK-LABEL: define void @mixed(i32 %a, float %b, i32 %c, float %d)
+void mixed(int a, float b, int c, float d) {}
+
+// CHECK-LABEL: define void @doubles(double %d1, double %d2)
+void doubles(double d1, double d2) {}
+
+// CHECK-LABEL: define void @mixedDoubles(i32 %a, double %d1)
+void mixedDoubles(int a, double d1) {}
+
+typedef struct st3_t {
+  char a[3];
+} st3_t;
+
+typedef struct st4_t {
+  int a;
+} st4_t;
+
+typedef struct st5_t {
+  int a;
+  char b;
+} st5_t;
+
+typedef  struct st12_t {
+  int a;
+  int b;
+  int c;
+} st12_t;
+
+// CHECK-LABEL: define void @smallStructs(i32 %st1.coerce, i32 %st2.coerce, i32 %st3.coerce)
+void smallStructs(st4_t st1, st4_t st2, st4_t st3) {}
+
+// CHECK-LABEL: define void @paddedStruct(i32 %i1, i32 %st.coerce0, i32 %st.coerce1, i32 %st4.0)
+void paddedStruct(int i1, st5_t st, st4_t st4) {}
+
+// CHECK-LABEL: define void @largeStructBegin(%struct.st12_t* byval align 4 %st)
+void largeStructBegin(st12_t st) {}
+
+// CHECK-LABEL: define void @largeStructMiddle(i32 %i1, %struct.st12_t* byval align 4 %st, i32 %i2, i32 %i3)
+void largeStructMiddle(int i1, st12_t st, int i2, int i3) {}
+
+// CHECK-LABEL: define void @largeStructEnd(i32 %i1, i32 %i2, i32 %i3, i32 %st.0, i32 %st.1, i32 %st.2)
+void largeStructEnd(int i1, int i2, int i3, st12_t st) {}
+
+// CHECK-LABEL: define i24 @retNonPow2Struct(i32 %r.coerce)
+st3_t retNonPow2Struct(st3_t r) { return r; }
+
+// CHECK-LABEL: define i32 @retSmallStruct(i32 %r.coerce)
+st4_t retSmallStruct(st4_t r) { return r; }
+
+// CHECK-LABEL: define i64 @retPaddedStruct(i32 %r.coerce0, i32 %r.coerce1)
+st5_t retPaddedStruct(st5_t r) { return r; }
+
+// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* noalias sret %agg.result, i32 %i1, %struct.st12_t* byval align 4 %r)
+st12_t retLargeStruct(int i1, st12_t r) { return r; }
+
+// CHECK-LABEL: define i32 @varArgs(i32 %i1, ...)
+int varArgs(int i1, ...) { return i1; }
+
+// CHECK-LABEL: define double @longDoubleArg(double %ld1)
+long double longDoubleArg(long double ld1) { return ld1; }
+
diff --git a/test/CodeGen/x86_32-xsave.c b/test/CodeGen/x86_32-xsave.c
new file mode 100644
index 000000000000..da5d38a49f1b
--- /dev/null
+++ b/test/CodeGen/x86_32-xsave.c
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=i686-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsavec -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsavec -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC
+
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=i686-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES
+
+void test() {
+  unsigned long long tmp_ULLi;
+  void*              tmp_vp;
+
+#ifdef TEST_XSAVE
+// XSAVE: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVE: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVE: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVE: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVE: call void @llvm.x86.xsave(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
+
+// XSAVE: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVE: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSAVE: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSAVE: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEOPT
+// XSAVEOPT: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVEOPT: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEOPT: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVEOPT: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVEOPT: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEC
+// XSAVEC: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVEC: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEC: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVEC: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVEC: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVES
+// XSAVES: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVES: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVES: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVES: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVES: call void @llvm.x86.xsaves(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);
+
+// XSAVES: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 4
+// XSAVES: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSAVES: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSAVES: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+#endif
+}
diff --git a/test/CodeGen/x86_64-arguments.c b/test/CodeGen/x86_64-arguments.c
index bb9fba190262..e3b853dc3727 100644
--- a/test/CodeGen/x86_64-arguments.c
+++ b/test/CodeGen/x86_64-arguments.c
@@ -336,7 +336,8 @@ void func43(SA s) {
 
 // CHECK-LABEL: define i32 @f44
 // CHECK: ptrtoint
-// CHECK-NEXT: and {{.*}}, -32
+// CHECK-NEXT: add i64 %{{[0-9]+}}, 31
+// CHECK-NEXT: and i64 %{{[0-9]+}}, -32
 // CHECK-NEXT: inttoptr
 typedef int T44 __attribute((vector_size(32)));
 struct s44 { T44 x; int y; };
diff --git a/test/CodeGen/x86_64-fp128.c b/test/CodeGen/x86_64-longdouble.c
similarity index 78%
rename from test/CodeGen/x86_64-fp128.c
rename to test/CodeGen/x86_64-longdouble.c
index 0147721f9b6a..8baf4d164912 100644
--- a/test/CodeGen/x86_64-fp128.c
+++ b/test/CodeGen/x86_64-longdouble.c
@@ -4,6 +4,9 @@
 // RUN:    | FileCheck %s --check-prefix=GNU --check-prefix=CHECK
 // RUN: %clang_cc1 -triple x86_64 -emit-llvm -O -o - %s \
 // RUN:    | FileCheck %s --check-prefix=GNU --check-prefix=CHECK
+// NaCl is an example of a target for which long double is the same as double.
+// RUN: %clang_cc1 -triple x86_64-nacl -emit-llvm -O -o - %s \
+// RUN:    | FileCheck %s --check-prefix=NACL --check-prefix=CHECK
 
 // Android uses fp128 for long double but other x86_64 targets use x86_fp80.
 
@@ -19,12 +22,14 @@ long double TestLD(long double x) {
   return x * x;
 // ANDROID: define fp128 @TestLD(fp128 %x)
 // GNU: define x86_fp80 @TestLD(x86_fp80 %x)
+// NACL: define double @TestLD(double %x)
 }
 
 long double _Complex TestLDC(long double _Complex x) {
   return x * x;
 // ANDROID: define void @TestLDC({ fp128, fp128 }* {{.*}}, { fp128, fp128 }* {{.*}} %x)
 // GNU: define { x86_fp80, x86_fp80 } @TestLDC({ x86_fp80, x86_fp80 }* {{.*}} %x)
+// NACL: define { double, double } @TestLDC(double %x{{.*}}, double %x{{.*}})
 }
 
 typedef __builtin_va_list va_list;
@@ -51,14 +56,18 @@ double TestGetVarDouble(va_list ap) {
 
 long double TestGetVarLD(va_list ap) {
   return __builtin_va_arg(ap, long double);
-// fp128 can be passed in memory or in register, but x86_fp80 is in memory.
+// fp128 and double can be passed in memory or in register, but x86_fp80 is in
+// memory.
 // ANDROID: define fp128 @TestGetVarLD(
 // GNU:     define x86_fp80 @TestGetVarLD(
+// NACL:     define double @TestGetVarLD(
 // ANDROID: br label
 // ANDROID: br label
+// NACL: br
 // ANDROID: = phi
 // GNU-NOT: br
 // GNU-NOT: = phi
+// NACL: = phi
 // ANDROID: ret fp128
 // GNU:     ret x86_fp80
 }
@@ -68,10 +77,17 @@ long double _Complex TestGetVarLDC(va_list ap) {
 // Pair of fp128 or x86_fp80 are passed as struct in memory.
 // ANDROID:   define void @TestGetVarLDC({ fp128, fp128 }* {{.*}}, %struct.__va_list_tag*
 // GNU:       define { x86_fp80, x86_fp80 } @TestGetVarLDC(
-// CHECK-NOT: br
-// CHECK-NOT: phi
+// Pair of double can go in SSE registers or memory
+// NACL:       define { double, double } @TestGetVarLDC(
+// ANDROID-NOT: br
+// GNU-NOT: br
+// NACL: br
+// ANDROID-NOT: phi
+// GNU-NOT: phi
+// NACL: phi
 // ANDROID:   ret void
 // GNU:       ret { x86_fp80, x86_fp80 }
+// NACL:       ret { double, double }
 }
 
 void TestVarArg(const char *s, ...);
@@ -100,6 +116,8 @@ void TestPassVarLD(long double x) {
 // ANDROID: call {{.*}} @TestVarArg(i8* {{.*}}, fp128 %x
 // GNU: define void @TestPassVarLD(x86_fp80 %x)
 // GNU: call {{.*}} @TestVarArg(i8* {{.*}}, x86_fp80 %x
+// NACL: define void @TestPassVarLD(double %x)
+// NACL: call {{.*}} @TestVarArg(i8* {{.*}}, double %x
 }
 
 void TestPassVarLDC(long double _Complex x) {
@@ -111,5 +129,7 @@ void TestPassVarLDC(long double _Complex x) {
 // GNU:          define void @TestPassVarLDC({ x86_fp80, x86_fp80 }* {{.*}} %x)
 // GNU:          store x86_fp80 %{{.*}}, x86_fp80* %
 // GNU-NEXT:     store x86_fp80 %{{.*}}, x86_fp80* %
-// GNGNU-NEXT:   call {{.*}} @TestVarArg(i8* {{.*}}, { x86_fp80, x86_fp80 }* {{.*}} %
+// GNU-NEXT:   call {{.*}} @TestVarArg(i8* {{.*}}, { x86_fp80, x86_fp80 }* {{.*}} %
+// NACL:      define void @TestPassVarLDC(double %x{{.*}}, double %x{{.*}})
+// NACL: call {{.*}} @TestVarArg(i8* {{.*}}, double %x{{.*}}, double %x{{.*}})
 }
diff --git a/test/CodeGen/x86_64-profiling-keep-fp.c b/test/CodeGen/x86_64-profiling-keep-fp.c
new file mode 100644
index 000000000000..ca679fa46670
--- /dev/null
+++ b/test/CodeGen/x86_64-profiling-keep-fp.c
@@ -0,0 +1,14 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O3 -pg -S -o - %s | \
+// RUN:   FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -O3 -momit-leaf-frame-pointer -pg -S -o - %s | \
+// RUN:   FileCheck %s
+
+// Test that the frame pointer is kept when compiling with
+// profiling.
+
+//CHECK: pushq %rbp
+int main(void)
+{
+  return 0;
+}
diff --git a/test/CodeGen/x86_64-xsave.c b/test/CodeGen/x86_64-xsave.c
new file mode 100644
index 000000000000..ecdb7252e362
--- /dev/null
+++ b/test/CodeGen/x86_64-xsave.c
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE
+// RUN: %clang_cc1 %s -DTEST_XSAVE -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVE
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT
+// RUN: %clang_cc1 %s -DTEST_XSAVEOPT -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaveopt -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEOPT
+
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsavec -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC
+// RUN: %clang_cc1 %s -DTEST_XSAVEC -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsavec -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVEC
+
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES
+// RUN: %clang_cc1 %s -DTEST_XSAVES -O0 -triple=x86_64-unknown-unknown -target-feature +xsave -target-feature +xsaves -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s --check-prefix=XSAVES
+
+void test() {
+  unsigned long long tmp_ULLi;
+  void*              tmp_vp;
+
+#ifdef TEST_XSAVE
+// XSAVE: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVE: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVE: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVE: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVE: call void @llvm.x86.xsave(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsave(tmp_vp, tmp_ULLi);
+
+// XSAVE: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVE: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32
+// XSAVE: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32
+// XSAVE: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
+// XSAVE: call void @llvm.x86.xsave64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsave64(tmp_vp, tmp_ULLi);
+
+// XSAVE: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVE: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSAVE: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSAVE: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSAVE: call void @llvm.x86.xrstor(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstor(tmp_vp, tmp_ULLi);
+
+// XSAVE: [[tmp_vp_4:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVE: [[tmp_ULLi_4:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVE: [[high64_4:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_4]], 32
+// XSAVE: [[high32_4:%[0-9a-zA-z]+]] = trunc i64 [[high64_4]] to i32
+// XSAVE: [[low32_4:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
+// XSAVE: call void @llvm.x86.xrstor64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
+  (void)__builtin_ia32_xrstor64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEOPT
+// XSAVEOPT: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVEOPT: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEOPT: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVEOPT: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVEOPT: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVEOPT: call void @llvm.x86.xsaveopt(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaveopt(tmp_vp, tmp_ULLi);
+
+// XSAVEOPT: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVEOPT: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEOPT: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32
+// XSAVEOPT: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32
+// XSAVEOPT: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
+// XSAVEOPT: call void @llvm.x86.xsaveopt64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsaveopt64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVEC
+// XSAVEC: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVEC: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEC: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVEC: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVEC: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVEC: call void @llvm.x86.xsavec(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsavec(tmp_vp, tmp_ULLi);
+
+// XSAVEC: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVEC: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVEC: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32
+// XSAVEC: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32
+// XSAVEC: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
+// XSAVEC: call void @llvm.x86.xsavec64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsavec64(tmp_vp, tmp_ULLi);
+#endif
+
+#ifdef TEST_XSAVES
+// XSAVES: [[tmp_vp_1:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVES: [[tmp_ULLi_1:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_1:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_1]], 32
+// XSAVES: [[high32_1:%[0-9a-zA-z]+]] = trunc i64 [[high64_1]] to i32
+// XSAVES: [[low32_1:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_1]] to i32
+// XSAVES: call void @llvm.x86.xsaves(i8* [[tmp_vp_1]], i32 [[high32_1]], i32 [[low32_1]])
+  (void)__builtin_ia32_xsaves(tmp_vp, tmp_ULLi);
+
+// XSAVES: [[tmp_vp_2:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVES: [[tmp_ULLi_2:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_2:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_2]], 32
+// XSAVES: [[high32_2:%[0-9a-zA-z]+]] = trunc i64 [[high64_2]] to i32
+// XSAVES: [[low32_2:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_2]] to i32
+// XSAVES: call void @llvm.x86.xsaves64(i8* [[tmp_vp_2]], i32 [[high32_2]], i32 [[low32_2]])
+  (void)__builtin_ia32_xsaves64(tmp_vp, tmp_ULLi);
+
+// XSAVES: [[tmp_vp_3:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVES: [[tmp_ULLi_3:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_3:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_3]], 32
+// XSAVES: [[high32_3:%[0-9a-zA-z]+]] = trunc i64 [[high64_3]] to i32
+// XSAVES: [[low32_3:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_3]] to i32
+// XSAVES: call void @llvm.x86.xrstors(i8* [[tmp_vp_3]], i32 [[high32_3]], i32 [[low32_3]])
+  (void)__builtin_ia32_xrstors(tmp_vp, tmp_ULLi);
+
+// XSAVES: [[tmp_vp_4:%[0-9a-zA-z]+]] = load i8*, i8** %tmp_vp, align 8
+// XSAVES: [[tmp_ULLi_4:%[0-9a-zA-z]+]] = load i64, i64* %tmp_ULLi, align 8
+// XSAVES: [[high64_4:%[0-9a-zA-z]+]] = lshr i64 [[tmp_ULLi_4]], 32
+// XSAVES: [[high32_4:%[0-9a-zA-z]+]] = trunc i64 [[high64_4]] to i32
+// XSAVES: [[low32_4:%[0-9a-zA-z]+]] = trunc i64 [[tmp_ULLi_4]] to i32
+// XSAVES: call void @llvm.x86.xrstors64(i8* [[tmp_vp_4]], i32 [[high32_4]], i32 [[low32_4]])
+  (void)__builtin_ia32_xrstors64(tmp_vp, tmp_ULLi);
+#endif
+}
diff --git a/test/CodeGen/xcore-abi.c b/test/CodeGen/xcore-abi.c
index 23fb44146945..2bac78d92edd 100644
--- a/test/CodeGen/xcore-abi.c
+++ b/test/CodeGen/xcore-abi.c
@@ -33,7 +33,7 @@ void testva (int n, ...) {
   f(v1);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i8**
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i8*, i8** [[P]]
   // CHECK: store i8* [[V1]], i8** [[V:%[a-z0-9]+]], align 4
@@ -43,7 +43,7 @@ void testva (int n, ...) {
   char v2 = va_arg (ap, char); // expected-warning{{second argument to 'va_arg' is of promotable type 'char'}}
   f(&v2);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i8, i8* [[I]]
   // CHECK: store i8 [[V1]], i8* [[V:%[a-z0-9]+]], align 1
@@ -53,7 +53,7 @@ void testva (int n, ...) {
   f(&v3);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i32*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i32, i32* [[P]]
   // CHECK: store i32 [[V1]], i32* [[V:%[a-z0-9]+]], align 4
@@ -64,7 +64,7 @@ void testva (int n, ...) {
   f(&v4);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to i64*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load i64, i64* [[P]]
   // CHECK: store i64 [[V1]], i64* [[V:%[a-z0-9]+]], align 4
@@ -76,7 +76,7 @@ void testva (int n, ...) {
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to %struct.x**
   // CHECK: [[P:%[a-z0-9]+]] = load %struct.x*, %struct.x** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast %struct.x* [[V:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast %struct.x* [[P]] to i8*
@@ -89,7 +89,7 @@ void testva (int n, ...) {
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[I2:%[a-z0-9]+]] = bitcast i8* [[I]] to [4 x i32]**
   // CHECK: [[P:%[a-z0-9]+]] = load [4 x i32]*, [4 x i32]** [[I2]]
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 4
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 4
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = bitcast [4 x i32]* [[V0:%[a-z0-9]+]] to i8*
   // CHECK: [[P1:%[a-z0-9]+]] = bitcast [4 x i32]* [[P]] to i8*
@@ -104,7 +104,7 @@ void testva (int n, ...) {
   f(&v7);
   // CHECK: [[I:%[a-z0-9]+]] = load i8*, i8** [[AP]]
   // CHECK: [[P:%[a-z0-9]+]] = bitcast i8* [[I]] to double*
-  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr i8, i8* [[I]], i32 8
+  // CHECK: [[IN:%[a-z0-9]+]] = getelementptr inbounds i8, i8* [[I]], i32 8
   // CHECK: store i8* [[IN]], i8** [[AP]]
   // CHECK: [[V1:%[a-z0-9]+]] = load double, double* [[P]]
   // CHECK: store double [[V1]], double* [[V:%[a-z0-9]+]], align 4
diff --git a/test/CodeGen/xop-builtins.c b/test/CodeGen/xop-builtins.c
index 436deaa52134..5f0f20d07bbb 100644
--- a/test/CodeGen/xop-builtins.c
+++ b/test/CodeGen/xop-builtins.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -O3 -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s
 
 // Don't include mm_malloc.h, it's system specific.
 #define __MM_MALLOC_H
@@ -6,321 +6,385 @@
 #include <x86intrin.h>
 
 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccs_epi16
   // CHECK: @llvm.x86.xop.vpmacssww
   return _mm_maccs_epi16(a, b, c);
 }
 
 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macc_epi16
   // CHECK: @llvm.x86.xop.vpmacsww
   return _mm_macc_epi16(a, b, c);
 }
 
 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccsd_epi16
   // CHECK: @llvm.x86.xop.vpmacsswd
   return _mm_maccsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccd_epi16
   // CHECK: @llvm.x86.xop.vpmacswd
   return _mm_maccd_epi16(a, b, c);
 }
 
 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccs_epi32
   // CHECK: @llvm.x86.xop.vpmacssdd
   return _mm_maccs_epi32(a, b, c);
 }
 
 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macc_epi32
   // CHECK: @llvm.x86.xop.vpmacsdd
   return _mm_macc_epi32(a, b, c);
 }
 
 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccslo_epi32
   // CHECK: @llvm.x86.xop.vpmacssdql
   return _mm_maccslo_epi32(a, b, c);
 }
 
 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macclo_epi32
   // CHECK: @llvm.x86.xop.vpmacsdql
   return _mm_macclo_epi32(a, b, c);
 }
 
 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maccshi_epi32
   // CHECK: @llvm.x86.xop.vpmacssdqh
   return _mm_maccshi_epi32(a, b, c);
 }
 
 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_macchi_epi32
   // CHECK: @llvm.x86.xop.vpmacsdqh
   return _mm_macchi_epi32(a, b, c);
 }
 
 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maddsd_epi16
   // CHECK: @llvm.x86.xop.vpmadcsswd
   return _mm_maddsd_epi16(a, b, c);
 }
 
 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_maddd_epi16
   // CHECK: @llvm.x86.xop.vpmadcswd
   return _mm_maddd_epi16(a, b, c);
 }
 
 __m128i test_mm_haddw_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddw_epi8
   // CHECK: @llvm.x86.xop.vphaddbw
   return _mm_haddw_epi8(a);
 }
 
 __m128i test_mm_haddd_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epi8
   // CHECK: @llvm.x86.xop.vphaddbd
   return _mm_haddd_epi8(a);
 }
 
 __m128i test_mm_haddq_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi8
   // CHECK: @llvm.x86.xop.vphaddbq
   return _mm_haddq_epi8(a);
 }
 
 __m128i test_mm_haddd_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epi16
   // CHECK: @llvm.x86.xop.vphaddwd
   return _mm_haddd_epi16(a);
 }
 
 __m128i test_mm_haddq_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi16
   // CHECK: @llvm.x86.xop.vphaddwq
   return _mm_haddq_epi16(a);
 }
 
 __m128i test_mm_haddq_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epi32
   // CHECK: @llvm.x86.xop.vphadddq
   return _mm_haddq_epi32(a);
 }
 
 __m128i test_mm_haddw_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddw_epu8
   // CHECK: @llvm.x86.xop.vphaddubw
   return _mm_haddw_epu8(a);
 }
 
 __m128i test_mm_haddd_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epu8
   // CHECK: @llvm.x86.xop.vphaddubd
   return _mm_haddd_epu8(a);
 }
 
 __m128i test_mm_haddq_epu8(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu8
   // CHECK: @llvm.x86.xop.vphaddubq
   return _mm_haddq_epu8(a);
 }
 
 __m128i test_mm_haddd_epu16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddd_epu16
   // CHECK: @llvm.x86.xop.vphadduwd
   return _mm_haddd_epu16(a);
 }
 
 __m128i test_mm_haddq_epu16(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu16
   // CHECK: @llvm.x86.xop.vphadduwq
   return _mm_haddq_epu16(a);
 }
 
 __m128i test_mm_haddq_epu32(__m128i a) {
+  // CHECK-LABEL: test_mm_haddq_epu32
   // CHECK: @llvm.x86.xop.vphaddudq
   return _mm_haddq_epu32(a);
 }
 
 __m128i test_mm_hsubw_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubw_epi8
   // CHECK: @llvm.x86.xop.vphsubbw
   return _mm_hsubw_epi8(a);
 }
 
 __m128i test_mm_hsubd_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubd_epi16
   // CHECK: @llvm.x86.xop.vphsubwd
   return _mm_hsubd_epi16(a);
 }
 
 __m128i test_mm_hsubq_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_hsubq_epi32
   // CHECK: @llvm.x86.xop.vphsubdq
   return _mm_hsubq_epi32(a);
 }
 
 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_cmov_si128
   // CHECK: @llvm.x86.xop.vpcmov
   return _mm_cmov_si128(a, b, c);
 }
 
 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
+  // CHECK-LABEL: test_mm256_cmov_si256
   // CHECK: @llvm.x86.xop.vpcmov.256
   return _mm256_cmov_si256(a, b, c);
 }
 
 __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) {
+  // CHECK-LABEL: test_mm_perm_epi8
   // CHECK: @llvm.x86.xop.vpperm
   return _mm_perm_epi8(a, b, c);
 }
 
 __m128i test_mm_rot_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi8
   // CHECK: @llvm.x86.xop.vprotb
   return _mm_rot_epi8(a, b);
 }
 
 __m128i test_mm_rot_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi16
   // CHECK: @llvm.x86.xop.vprotw
   return _mm_rot_epi16(a, b);
 }
 
 __m128i test_mm_rot_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi32
   // CHECK: @llvm.x86.xop.vprotd
   return _mm_rot_epi32(a, b);
 }
 
 __m128i test_mm_rot_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi64
   // CHECK: @llvm.x86.xop.vprotq
   return _mm_rot_epi64(a, b);
 }
 
 __m128i test_mm_roti_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi8
   // CHECK: @llvm.x86.xop.vprotbi
   return _mm_roti_epi8(a, 1);
 }
 
 __m128i test_mm_roti_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi16
   // CHECK: @llvm.x86.xop.vprotwi
   return _mm_roti_epi16(a, 50);
 }
 
 __m128i test_mm_roti_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi32
   // CHECK: @llvm.x86.xop.vprotdi
   return _mm_roti_epi32(a, -30);
 }
 
 __m128i test_mm_roti_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi64
   // CHECK: @llvm.x86.xop.vprotqi
   return _mm_roti_epi64(a, 100);
 }
 
 __m128i test_mm_shl_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi8
   // CHECK: @llvm.x86.xop.vpshlb
   return _mm_shl_epi8(a, b);
 }
 
 __m128i test_mm_shl_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi16
   // CHECK: @llvm.x86.xop.vpshlw
   return _mm_shl_epi16(a, b);
 }
 
 __m128i test_mm_shl_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi32
   // CHECK: @llvm.x86.xop.vpshld
   return _mm_shl_epi32(a, b);
 }
 
 __m128i test_mm_shl_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_shl_epi64
   // CHECK: @llvm.x86.xop.vpshlq
   return _mm_shl_epi64(a, b);
 }
 
 __m128i test_mm_sha_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi8
   // CHECK: @llvm.x86.xop.vpshab
   return _mm_sha_epi8(a, b);
 }
 
 __m128i test_mm_sha_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi16
   // CHECK: @llvm.x86.xop.vpshaw
   return _mm_sha_epi16(a, b);
 }
 
 __m128i test_mm_sha_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi32
   // CHECK: @llvm.x86.xop.vpshad
   return _mm_sha_epi32(a, b);
 }
 
 __m128i test_mm_sha_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_sha_epi64
   // CHECK: @llvm.x86.xop.vpshaq
   return _mm_sha_epi64(a, b);
 }
 
 __m128i test_mm_com_epu8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu8
   // CHECK: @llvm.x86.xop.vpcomub
   return _mm_com_epu8(a, b, 0);
 }
 
 __m128i test_mm_com_epu16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu16
   // CHECK: @llvm.x86.xop.vpcomuw
   return _mm_com_epu16(a, b, 0);
 }
 
 __m128i test_mm_com_epu32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu32
   // CHECK: @llvm.x86.xop.vpcomud
   return _mm_com_epu32(a, b, 0);
 }
 
 __m128i test_mm_com_epu64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epu64
   // CHECK: @llvm.x86.xop.vpcomuq
   return _mm_com_epu64(a, b, 0);
 }
 
 __m128i test_mm_com_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi8
   // CHECK: @llvm.x86.xop.vpcomb
   return _mm_com_epi8(a, b, 0);
 }
 
 __m128i test_mm_com_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi16
   // CHECK: @llvm.x86.xop.vpcomw
   return _mm_com_epi16(a, b, 0);
 }
 
 __m128i test_mm_com_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi32
   // CHECK: @llvm.x86.xop.vpcomd
   return _mm_com_epi32(a, b, 0);
 }
 
 __m128i test_mm_com_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_com_epi64
   // CHECK: @llvm.x86.xop.vpcomq
   return _mm_com_epi64(a, b, 0);
 }
 
 __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) {
+  // CHECK-LABEL: test_mm_permute2_pd
   // CHECK: @llvm.x86.xop.vpermil2pd
   return _mm_permute2_pd(a, b, c, 0);
 }
 
 __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) {
+  // CHECK-LABEL: test_mm256_permute2_pd
   // CHECK: @llvm.x86.xop.vpermil2pd.256
   return _mm256_permute2_pd(a, b, c, 0);
 }
 
 __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) {
+  // CHECK-LABEL: test_mm_permute2_ps
   // CHECK: @llvm.x86.xop.vpermil2ps
   return _mm_permute2_ps(a, b, c, 0);
 }
 
 __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) {
+  // CHECK-LABEL: test_mm256_permute2_ps
   // CHECK: @llvm.x86.xop.vpermil2ps.256
   return _mm256_permute2_ps(a, b, c, 0);
 }
 
 __m128 test_mm_frcz_ss(__m128 a) {
+  // CHECK-LABEL: test_mm_frcz_ss
   // CHECK: @llvm.x86.xop.vfrcz.ss
   return _mm_frcz_ss(a);
 }
 
 __m128d test_mm_frcz_sd(__m128d a) {
+  // CHECK-LABEL: test_mm_frcz_sd
   // CHECK: @llvm.x86.xop.vfrcz.sd
   return _mm_frcz_sd(a);
 }
 
 __m128 test_mm_frcz_ps(__m128 a) {
+  // CHECK-LABEL: test_mm_frcz_ps
   // CHECK: @llvm.x86.xop.vfrcz.ps
   return _mm_frcz_ps(a);
 }
 
 __m128d test_mm_frcz_pd(__m128d a) {
+  // CHECK-LABEL: test_mm_frcz_pd
   // CHECK: @llvm.x86.xop.vfrcz.pd
   return _mm_frcz_pd(a);
 }
 
 __m256 test_mm256_frcz_ps(__m256 a) {
+  // CHECK-LABEL: test_mm256_frcz_ps
   // CHECK: @llvm.x86.xop.vfrcz.ps.256
   return _mm256_frcz_ps(a);
 }
 
 __m256d test_mm256_frcz_pd(__m256d a) {
+  // CHECK-LABEL: test_mm256_frcz_pd
   // CHECK: @llvm.x86.xop.vfrcz.pd.256
   return _mm256_frcz_pd(a);
 }
diff --git a/test/CodeGenCUDA/Inputs/device-code-2.ll b/test/CodeGenCUDA/Inputs/device-code-2.ll
new file mode 100644
index 000000000000..8fde3b13ec79
--- /dev/null
+++ b/test/CodeGenCUDA/Inputs/device-code-2.ll
@@ -0,0 +1,16 @@
+; Simple bit of IR to mimic CUDA's libdevice.
+
+target triple = "nvptx-unknown-cuda"
+
+define double @__nv_sin(double %a) {
+       ret double 1.0
+}
+
+define double @__nv_exp(double %a) {
+       ret double 3.0
+}
+
+define double @__unused(double %a) {
+       ret double 2.0
+}
+
diff --git a/test/CodeGenCUDA/Inputs/device-code.ll b/test/CodeGenCUDA/Inputs/device-code.ll
new file mode 100644
index 000000000000..5943a000c1d8
--- /dev/null
+++ b/test/CodeGenCUDA/Inputs/device-code.ll
@@ -0,0 +1,38 @@
+; Simple bit of IR to mimic CUDA's libdevice. We want to be
+; able to link with it and we need to make sure all __nvvm_reflect
+; calls are eliminated by the time PTX has been produced.
+
+target triple = "nvptx-unknown-cuda"
+
+declare i32 @__nvvm_reflect(i8*)
+
+@"$str" = private addrspace(1) constant [8 x i8] c"USE_MUL\00"
+
+define void @unused_subfunc(float %a) {
+       ret void
+}
+
+define void @used_subfunc(float %a) {
+       ret void
+}
+
+define float @_Z17device_mul_or_addff(float %a, float %b) {
+  %reflect = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*))
+  %cmp = icmp ne i32 %reflect, 0
+  br i1 %cmp, label %use_mul, label %use_add
+
+use_mul:
+  %ret1 = fmul float %a, %b
+  br label %exit
+
+use_add:
+  %ret2 = fadd float %a, %b
+  br label %exit
+
+exit:
+  %ret = phi float [%ret1, %use_mul], [%ret2, %use_add]
+
+  call void @used_subfunc(float %ret)
+
+  ret float %ret
+}
diff --git a/test/CodeGenCUDA/address-spaces.cu b/test/CodeGenCUDA/address-spaces.cu
index b0ef3558e2d0..31cba958e154 100644
--- a/test/CodeGenCUDA/address-spaces.cu
+++ b/test/CodeGenCUDA/address-spaces.cu
@@ -5,10 +5,10 @@
 
 #include "Inputs/cuda.h"
 
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
 __device__ int i;
 
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
 __constant__ int j;
 
 // CHECK: @k = addrspace(3) global
@@ -24,7 +24,9 @@ struct MyStruct {
 // CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
 // CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
 // CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
+// CHECK: @c = addrspace(3) global %struct.c undef
+// CHECK  @d = addrspace(3) global %struct.d undef
 
 __device__ void foo() {
   // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)
@@ -117,3 +119,15 @@ __device__ int construct_shared_struct() {
   return t.getData();
 // CHECK: call i32 @_ZN14StructWithCtor7getDataEv(%struct.StructWithCtor* addrspacecast (%struct.StructWithCtor addrspace(3)* @_ZZ23construct_shared_structvE1t to %struct.StructWithCtor*))
 }
+
+// Make sure we allow __shared__ structures with default or empty constructors.
+struct c {
+  int i;
+};
+__shared__ struct c c;
+
+struct d {
+  int i;
+  d() {}
+};
+__shared__ struct d d;
diff --git a/test/CodeGenCUDA/device-vtable.cu b/test/CodeGenCUDA/device-vtable.cu
new file mode 100644
index 000000000000..9730e404caa4
--- /dev/null
+++ b/test/CodeGenCUDA/device-vtable.cu
@@ -0,0 +1,61 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// Make sure we don't emit vtables for classes with methods that have
+// inappropriate target attributes. Currently it's mostly needed in
+// order to avoid emitting vtables for host-only classes on device
+// side where we can't codegen them.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
+// RUN:     | FileCheck %s -check-prefix=CHECK-HOST -check-prefix=CHECK-BOTH
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -o - %s \
+// RUN:     | FileCheck %s -check-prefix=CHECK-DEVICE -check-prefix=CHECK-BOTH
+
+#include "Inputs/cuda.h"
+
+struct H  {
+  virtual void method();
+};
+//CHECK-HOST: @_ZTV1H =
+//CHECK-HOST-SAME: @_ZN1H6methodEv
+//CHECK-DEVICE-NOT: @_ZTV1H =
+
+struct D  {
+   __device__ virtual void method();
+};
+
+//CHECK-DEVICE: @_ZTV1D
+//CHECK-DEVICE-SAME: @_ZN1D6methodEv
+//CHECK-HOST-NOT: @_ZTV1D
+
+// This is the case with mixed host and device virtual methods.  It's
+// impossible to emit a valid vtable in that case because only host or
+// only device methods would be available during host or device
+// compilation. At the moment Clang (and NVCC) emit NULL pointers for
+// unavailable methods,
+struct HD  {
+  virtual void h_method();
+  __device__ virtual void d_method();
+};
+// CHECK-BOTH: @_ZTV2HD
+// CHECK-DEVICE-NOT: @_ZN2HD8h_methodEv
+// CHECK-DEVICE-SAME: null
+// CHECK-DEVICE-SAME: @_ZN2HD8d_methodEv
+// CHECK-HOST-SAME: @_ZN2HD8h_methodEv
+// CHECK-HOST-NOT: @_ZN2HD8d_methodEv
+// CHECK-HOST-SAME: null
+// CHECK-BOTH-SAME: ]
+
+void H::method() {}
+//CHECK-HOST: define void @_ZN1H6methodEv
+
+void __device__ D::method() {}
+//CHECK-DEVICE: define void @_ZN1D6methodEv
+
+void __device__ HD::d_method() {}
+// CHECK-DEVICE: define void @_ZN2HD8d_methodEv
+// CHECK-HOST-NOT: define void @_ZN2HD8d_methodEv
+void HD::h_method() {}
+// CHECK-HOST: define void @_ZN2HD8h_methodEv
+// CHECK-DEVICE-NOT: define void @_ZN2HD8h_methodEv
+
diff --git a/test/CodeGenCUDA/filter-decl.cu b/test/CodeGenCUDA/filter-decl.cu
index e69473f3e84b..023ae61f3af8 100644
--- a/test/CodeGenCUDA/filter-decl.cu
+++ b/test/CodeGenCUDA/filter-decl.cu
@@ -9,12 +9,12 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
 // CHECK-HOST-NOT: shareddata = global
diff --git a/test/CodeGenCUDA/function-overload.cu b/test/CodeGenCUDA/function-overload.cu
new file mode 100644
index 000000000000..a12ef82773a2
--- /dev/null
+++ b/test/CodeGenCUDA/function-overload.cu
@@ -0,0 +1,214 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// Make sure we handle target overloads correctly.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:     -fcuda-target-overloads -emit-llvm -o - %s \
+// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device \
+// RUN:     -fcuda-target-overloads -emit-llvm -o - %s \
+// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE %s
+
+// Check target overloads handling with disabled call target checks.
+// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads -o - %s \
+// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-HOST \
+// RUN:    -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-HOST-NC %s
+// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -emit-llvm \
+// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads \
+// RUN:    -fcuda-is-device -o - %s \
+// RUN:     | FileCheck -check-prefix=CHECK-BOTH -check-prefix=CHECK-DEVICE \
+// RUN:    -check-prefix=CHECK-BOTH-NC -check-prefix=CHECK-DEVICE-NC %s
+
+#include "Inputs/cuda.h"
+
+typedef int (*fp_t)(void);
+typedef void (*gp_t)(void);
+
+// CHECK-HOST: @hp = global i32 ()* @_Z1hv
+// CHECK-HOST: @chp = global i32 ()* @ch
+// CHECK-HOST: @dhp = global i32 ()* @_Z2dhv
+// CHECK-HOST: @cdhp = global i32 ()* @cdh
+// CHECK-HOST: @gp = global void ()* @_Z1gv
+
+// CHECK-BOTH-LABEL: define i32 @_Z2dhv()
+__device__ int dh(void) { return 1; }
+// CHECK-DEVICE: ret i32 1
+__host__ int dh(void) { return 2; }
+// CHECK-HOST:   ret i32 2
+
+// CHECK-BOTH-LABEL: define i32 @_Z2hdv()
+__host__ __device__ int hd(void) { return 3; }
+// CHECK-BOTH:   ret i32 3
+
+// CHECK-DEVICE-LABEL: define i32 @_Z1dv()
+__device__ int d(void) { return 8; }
+// CHECK-DEVICE:   ret i32 8
+
+// CHECK-HOST-LABEL: define i32 @_Z1hv()
+__host__ int h(void) { return 9; }
+// CHECK-HOST:   ret i32 9
+
+// CHECK-BOTH-LABEL: define void @_Z1gv()
+__global__ void g(void) {}
+// CHECK-BOTH:   ret void
+
+// mangled names of extern "C" __host__ __device__ functions clash
+// with those of their __host__/__device__ counterparts, so
+// overloading of extern "C" functions can only happen for __host__
+// and __device__ functions -- we never codegen them in the same
+// compilation and therefore mangled name conflict is not a problem.
+
+// CHECK-BOTH-LABEL: define i32 @cdh()
+extern "C" __device__ int cdh(void) {return 10;}
+// CHECK-DEVICE:   ret i32 10
+extern "C" __host__ int cdh(void) {return 11;}
+// CHECK-HOST:     ret i32 11
+
+// CHECK-DEVICE-LABEL: define i32 @cd()
+extern "C" __device__ int cd(void) {return 12;}
+// CHECK-DEVICE:   ret i32 12
+
+// CHECK-HOST-LABEL: define i32 @ch()
+extern "C" __host__ int ch(void) {return 13;}
+// CHECK-HOST:     ret i32 13
+
+// CHECK-BOTH-LABEL: define i32 @chd()
+extern "C" __host__ __device__ int chd(void) {return 14;}
+// CHECK-BOTH:     ret i32 14
+
+// CHECK-HOST-LABEL: define void @_Z5hostfv()
+__host__ void hostf(void) {
+#if defined (NOCHECKS)
+  fp_t dp = d;   // CHECK-HOST-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
+  fp_t cdp = cd; // CHECK-HOST-NC: store {{.*}} @cd, {{.*}} %cdp,
+#endif
+  fp_t hp = h; // CHECK-HOST: store {{.*}} @_Z1hv, {{.*}} %hp,
+  fp_t chp = ch; // CHECK-HOST: store {{.*}} @ch, {{.*}} %chp,
+  fp_t dhp = dh; // CHECK-HOST: store {{.*}} @_Z2dhv, {{.*}} %dhp,
+  fp_t cdhp = cdh; // CHECK-HOST: store {{.*}} @cdh, {{.*}} %cdhp,
+  fp_t hdp = hd; // CHECK-HOST: store {{.*}} @_Z2hdv, {{.*}} %hdp,
+  fp_t chdp = chd; // CHECK-HOST: store {{.*}} @chd, {{.*}} %chdp,
+  gp_t gp = g; // CHECK-HOST: store {{.*}} @_Z1gv, {{.*}} %gp,
+
+#if defined (NOCHECKS)
+  d();     // CHECK-HOST-NC: call i32 @_Z1dv()
+  cd();    // CHECK-HOST-NC: call i32 @cd()
+#endif
+  h();     // CHECK-HOST: call i32 @_Z1hv()
+  ch();    // CHECK-HOST: call i32 @ch()
+  dh();    // CHECK-HOST: call i32 @_Z2dhv()
+  cdh();   // CHECK-HOST: call i32 @cdh()
+  g<<<0,0>>>();  // CHECK-HOST: call void @_Z1gv()
+}
+
+// CHECK-DEVICE-LABEL: define void @_Z7devicefv()
+__device__ void devicef(void) {
+  fp_t dp = d;   // CHECK-DEVICE: store {{.*}} @_Z1dv, {{.*}} %dp,
+  fp_t cdp = cd; // CHECK-DEVICE: store {{.*}} @cd, {{.*}} %cdp,
+#if defined (NOCHECKS)
+  fp_t hp = h; // CHECK-DEVICE-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
+  fp_t chp = ch; // CHECK-DEVICE-NC: store {{.*}} @ch, {{.*}} %chp,
+#endif
+  fp_t dhp = dh; // CHECK-DEVICE: store {{.*}} @_Z2dhv, {{.*}} %dhp,
+  fp_t cdhp = cdh; // CHECK-DEVICE: store {{.*}} @cdh, {{.*}} %cdhp,
+  fp_t hdp = hd; // CHECK-DEVICE: store {{.*}} @_Z2hdv, {{.*}} %hdp,
+  fp_t chdp = chd; // CHECK-DEVICE: store {{.*}} @chd, {{.*}} %chdp,
+
+  d();     // CHECK-DEVICE: call i32 @_Z1dv()
+  cd();    // CHECK-DEVICE: call i32 @cd()
+#if defined (NOCHECKS)
+  h();     // CHECK-DEVICE-NC: call i32 @_Z1hv()
+  ch();    // CHECK-DEVICE-NC: call i32 @ch()
+#endif
+  dh();    // CHECK-DEVICE: call i32 @_Z2dhv()
+  cdh();   // CHECK-DEVICE: call i32 @cdh()
+}
+
+// CHECK-BOTH-LABEL: define void @_Z11hostdevicefv()
+__host__ __device__ void hostdevicef(void) {
+#if defined (NOCHECKS)
+  fp_t dp = d;   // CHECK-BOTH-NC: store {{.*}} @_Z1dv, {{.*}} %dp,
+  fp_t cdp = cd; // CHECK-BOTH-NC: store {{.*}} @cd, {{.*}} %cdp,
+  fp_t hp = h; // CHECK-BOTH-NC: store {{.*}} @_Z1hv, {{.*}} %hp,
+  fp_t chp = ch; // CHECK-BOTH-NC: store {{.*}} @ch, {{.*}} %chp,
+#endif
+  fp_t dhp = dh; // CHECK-BOTH: store {{.*}} @_Z2dhv, {{.*}} %dhp,
+  fp_t cdhp = cdh; // CHECK-BOTH: store {{.*}} @cdh, {{.*}} %cdhp,
+  fp_t hdp = hd; // CHECK-BOTH: store {{.*}} @_Z2hdv, {{.*}} %hdp,
+  fp_t chdp = chd; // CHECK-BOTH: store {{.*}} @chd, {{.*}} %chdp,
+#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
+  gp_t gp = g; // CHECK-HOST-NC: store {{.*}} @_Z1gv, {{.*}} %gp,
+#endif
+
+#if defined (NOCHECKS)
+  d();     // CHECK-BOTH-NC: call i32 @_Z1dv()
+  cd();    // CHECK-BOTH-NC: call i32 @cd()
+  h();     // CHECK-BOTH-NC: call i32 @_Z1hv()
+  ch();    // CHECK-BOTH-NC: call i32 @ch()
+#endif
+  dh();    // CHECK-BOTH: call i32 @_Z2dhv()
+  cdh();   // CHECK-BOTH: call i32 @cdh()
+#if defined (NOCHECKS) && !defined(__CUDA_ARCH__)
+  g<<<0,0>>>();  // CHECK-HOST-NC: call void @_Z1gv()
+#endif
+}
+
+// Test for address of overloaded function resolution in the global context.
+fp_t hp = h;
+fp_t chp = ch;
+fp_t dhp = dh;
+fp_t cdhp = cdh;
+gp_t gp = g;
+
+int x;
+// Check constructors/destructors for D/H functions
+struct s_cd_dh {
+  __host__ s_cd_dh() { x = 11; }
+  __device__ s_cd_dh() { x = 12; }
+  __host__ ~s_cd_dh() { x = 21; }
+  __device__ ~s_cd_dh() { x = 22; }
+};
+
+struct s_cd_hd {
+  __host__ __device__ s_cd_hd() { x = 31; }
+  __host__ __device__ ~s_cd_hd() { x = 32; }
+};
+
+// CHECK-BOTH: define void @_Z7wrapperv
+#if defined(__CUDA_ARCH__)
+__device__
+#else
+__host__
+#endif
+void wrapper() {
+  s_cd_dh scddh;
+  // CHECK-BOTH: call void @_ZN7s_cd_dhC1Ev(
+  s_cd_hd scdhd;
+  // CHECK-BOTH: call void @_ZN7s_cd_hdC1Ev
+
+  // CHECK-BOTH: call void @_ZN7s_cd_hdD1Ev(
+  // CHECK-BOTH: call void @_ZN7s_cd_dhD1Ev(
+}
+// CHECK-BOTH: ret void
+
+// Now it's time to check what's been generated for the methods we used.
+
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhC2Ev(
+// CHECK-HOST:   store i32 11,
+// CHECK-DEVICE: store i32 12,
+// CHECK-BOTH: ret void
+
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdC2Ev(
+// CHECK-BOTH:   store i32 31,
+// CHECK-BOTH: ret void
+
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_hdD2Ev(
+// CHECK-BOTH: store i32 32,
+// CHECK-BOTH: ret void
+
+// CHECK-BOTH: define linkonce_odr void @_ZN7s_cd_dhD2Ev(
+// CHECK-HOST:   store i32 21,
+// CHECK-DEVICE: store i32 22,
+// CHECK-BOTH: ret void
+
diff --git a/test/CodeGenCUDA/link-device-bitcode.cu b/test/CodeGenCUDA/link-device-bitcode.cu
new file mode 100644
index 000000000000..de3d39c20b49
--- /dev/null
+++ b/test/CodeGenCUDA/link-device-bitcode.cu
@@ -0,0 +1,70 @@
+// Test for linking with CUDA's libdevice as outlined in
+// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
+//
+// REQUIRES: nvptx-registered-target
+//
+// Prepare bitcode file to link with
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t.bc \
+// RUN:    %S/Inputs/device-code.ll
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc -o %t-2.bc \
+// RUN:    %S/Inputs/device-code-2.ll
+//
+// Make sure function in device-code gets linked in and internalized.
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-cuda-bitcode %t.bc  -emit-llvm \
+// RUN:    -disable-llvm-passes -o - %s \
+// RUN:    | FileCheck %s -check-prefix CHECK-IR
+//
+// Make sure we can link two bitcode files.
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-cuda-bitcode %t.bc -mlink-cuda-bitcode %t-2.bc \
+// RUN:    -emit-llvm -disable-llvm-passes -o - %s \
+// RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
+//
+// Make sure function in device-code gets linked but is not internalized
+// without -fcuda-uses-libdevice
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
+// RUN:    -disable-llvm-passes -o - %s \
+// RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
+//
+// Make sure NVVMReflect pass is enabled in NVPTX back-end.
+// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
+// RUN:    -mlink-cuda-bitcode %t.bc -S -o /dev/null %s \
+// RUN:    -backend-option -debug-pass=Structure 2>&1 \
+// RUN:    | FileCheck %s -check-prefix CHECK-REFLECT
+
+#include "Inputs/cuda.h"
+
+__device__ float device_mul_or_add(float a, float b);
+extern "C" __device__ double __nv_sin(double x);
+extern "C" __device__ double __nv_exp(double x);
+
+// CHECK-IR-LABEL: define void @_Z26should_not_be_internalizedPf(
+// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
+__device__ void should_not_be_internalized(float *data) {}
+
+// Make sure kernel call has not been internalized.
+// CHECK-IR-LABEL: define void @_Z6kernelPfS_
+// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
+__global__ __attribute__((used)) void kernel(float *out, float *in) {
+  *out = device_mul_or_add(in[0], in[1]);
+  *out += __nv_exp(__nv_sin(*out));
+  should_not_be_internalized(out);
+}
+
+// Make sure device_mul_or_add() is present in IR, is internal and
+// calls __nvvm_reflect().
+// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
+// CHECK-IR-NLD-LABEL: define float @_Z17device_mul_or_addff(
+// CHECK-IR: call i32 @__nvvm_reflect
+// CHECK-IR: ret float
+
+// Make sure we've linked in and internalized only needed functions
+// from the second bitcode file.
+// CHECK-IR-2-LABEL: define internal double @__nv_sin
+// CHECK-IR-2-LABEL: define internal double @__nv_exp
+// CHECK-IR-2-NOT: double @__unused
+
+// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
+// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
diff --git a/test/CodeGenCUDA/ptx-kernels.cu b/test/CodeGenCUDA/ptx-kernels.cu
index 658b3488fc18..6280e604f2ed 100644
--- a/test/CodeGenCUDA/ptx-kernels.cu
+++ b/test/CodeGenCUDA/ptx-kernels.cu
@@ -1,3 +1,7 @@
+// Make sure that __global__ functions are emitted along with correct
+// annotations and are added to @llvm.used to prevent their elimination.
+// REQUIRES: nvptx-registered-target
+//
 // RUN: %clang_cc1 %s -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - | FileCheck %s
 
 #include "Inputs/cuda.h"
@@ -13,4 +17,10 @@ __global__ void global_function() {
   device_function();
 }
 
+// Make sure host-instantiated kernels are preserved on device side.
+template <typename T> __global__ void templated_kernel(T param) {}
+// CHECK-LABEL: define weak_odr void @_Z16templated_kernelIiEvT_
+void host_function() { templated_kernel<<<0,0>>>(0); }
+
 // CHECK: !{{[0-9]+}} = !{void ()* @global_function, !"kernel", i32 1}
+// CHECK: !{{[0-9]+}} = !{void (i32)* @_Z16templated_kernelIiEvT_, !"kernel", i32 1}
diff --git a/test/CodeGenCXX/2006-11-20-GlobalSymbols.cpp b/test/CodeGenCXX/2006-11-20-GlobalSymbols.cpp
index 34594f43a0b9..74a7fb642208 100644
--- a/test/CodeGenCXX/2006-11-20-GlobalSymbols.cpp
+++ b/test/CodeGenCXX/2006-11-20-GlobalSymbols.cpp
@@ -1,7 +1,7 @@
 // PR1013
 // Check to make sure debug symbols use the correct name for globals and
 // functions.  Will not assemble if it fails to.
-// RUN: %clang_cc1 -emit-llvm -g -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -o - %s | FileCheck %s
 
 // CHECK: f\01oo"
 int foo __asm__("f\001oo");
diff --git a/test/CodeGenCXX/2007-01-02-UnboundedArray.cpp b/test/CodeGenCXX/2007-01-02-UnboundedArray.cpp
index 0cd83fa7ed4c..c76b7ef2163c 100644
--- a/test/CodeGenCXX/2007-01-02-UnboundedArray.cpp
+++ b/test/CodeGenCXX/2007-01-02-UnboundedArray.cpp
@@ -1,6 +1,6 @@
 // Make sure unbounded arrays compile with debug information.
 //
-// RUN: %clang_cc1 -emit-llvm -g %s -o -
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o -
 
 // PR1068
 
diff --git a/test/CodeGenCXX/2009-03-17-dbg.cpp b/test/CodeGenCXX/2009-03-17-dbg.cpp
index e2e6c5a2dd29..22d905910ba3 100644
--- a/test/CodeGenCXX/2009-03-17-dbg.cpp
+++ b/test/CodeGenCXX/2009-03-17-dbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o /dev/null -g
+// RUN: %clang_cc1 -emit-llvm %s -o /dev/null -debug-info-kind=limited
 
 template <typename T1,typename T2>
 inline void f(const T1&,const T2&) { }
diff --git a/test/CodeGenCXX/2009-04-23-bool2.cpp b/test/CodeGenCXX/2009-04-23-bool2.cpp
index cf81cc42eae4..33c2c02d736c 100644
--- a/test/CodeGenCXX/2009-04-23-bool2.cpp
+++ b/test/CodeGenCXX/2009-04-23-bool2.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o /dev/null
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o /dev/null
 // g++.old-deja/g++.jason/bool2.C from gcc testsuite.
 // Crashed before 67975 went in.
 struct F {
diff --git a/test/CodeGenCXX/2009-06-16-DebugInfoCrash.cpp b/test/CodeGenCXX/2009-06-16-DebugInfoCrash.cpp
index 500520b567f5..870e15ca5f2a 100644
--- a/test/CodeGenCXX/2009-06-16-DebugInfoCrash.cpp
+++ b/test/CodeGenCXX/2009-06-16-DebugInfoCrash.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o /dev/null -g
+// RUN: %clang_cc1 -emit-llvm %s -o /dev/null -debug-info-kind=limited
 // This crashes if we try to emit debug info for TEMPLATE_DECL members.
 template <class T> class K2PtrVectorBase {};
 template <class T> class K2Vector {};
diff --git a/test/CodeGenCXX/2010-03-09-AnonAggregate.cpp b/test/CodeGenCXX/2010-03-09-AnonAggregate.cpp
index 99883d82636e..056b500ce40b 100644
--- a/test/CodeGenCXX/2010-03-09-AnonAggregate.cpp
+++ b/test/CodeGenCXX/2010-03-09-AnonAggregate.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -S -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -S -o %t %s
 // PR: 6554
 // More then one anonymous aggregates on one line creates chaos when MDNode uniquness is 
 // combined with RAUW operation.
diff --git a/test/CodeGenCXX/2010-05-10-Var-DbgInfo.cpp b/test/CodeGenCXX/2010-05-10-Var-DbgInfo.cpp
index 802f4c3d67f7..2b39e7d78716 100644
--- a/test/CodeGenCXX/2010-05-10-Var-DbgInfo.cpp
+++ b/test/CodeGenCXX/2010-05-10-Var-DbgInfo.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o /dev/null
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o /dev/null
 // PR 7104
 
 struct A {
diff --git a/test/CodeGenCXX/2010-05-12-PtrToMember-Dbg.cpp b/test/CodeGenCXX/2010-05-12-PtrToMember-Dbg.cpp
index 048811f6e641..355c3c98dd66 100644
--- a/test/CodeGenCXX/2010-05-12-PtrToMember-Dbg.cpp
+++ b/test/CodeGenCXX/2010-05-12-PtrToMember-Dbg.cpp
@@ -1,5 +1,5 @@
-//RUN: %clang_cc1 -emit-llvm -g -o - %s | FileCheck %s
-//CHECK: DW_TAG_auto_variable
+//RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -o - %s | FileCheck %s
+//CHECK: DILocalVariable(
 class Foo
 {
  public:
diff --git a/test/CodeGenCXX/2010-06-21-LocalVarDbg.cpp b/test/CodeGenCXX/2010-06-21-LocalVarDbg.cpp
index 2542378e909a..c0c8bf66501b 100644
--- a/test/CodeGenCXX/2010-06-21-LocalVarDbg.cpp
+++ b/test/CodeGenCXX/2010-06-21-LocalVarDbg.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Do not use function name to create named metadata used to hold
 // local variable info. For example. llvm.dbg.lv.~A is an invalid name.
 
diff --git a/test/CodeGenCXX/2010-06-22-BitfieldInit.cpp b/test/CodeGenCXX/2010-06-22-BitfieldInit.cpp
index f82e527844fd..ae02cd9f871a 100644
--- a/test/CodeGenCXX/2010-06-22-BitfieldInit.cpp
+++ b/test/CodeGenCXX/2010-06-22-BitfieldInit.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o -
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o -
 struct TEST2
 {
   int subid:32;
diff --git a/test/CodeGenCXX/2010-06-22-ZeroBitfield.cpp b/test/CodeGenCXX/2010-06-22-ZeroBitfield.cpp
index c2f37f740549..0f600e77b6ea 100644
--- a/test/CodeGenCXX/2010-06-22-ZeroBitfield.cpp
+++ b/test/CodeGenCXX/2010-06-22-ZeroBitfield.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o -
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o -
 struct s8_0 { unsigned : 0; };
 struct s8_1 { double x; };
 struct s8 { s8_0 a; s8_1 b; };
diff --git a/test/CodeGenCXX/2010-07-23-DeclLoc.cpp b/test/CodeGenCXX/2010-07-23-DeclLoc.cpp
index 3bd66da62afe..a88d605f565e 100644
--- a/test/CodeGenCXX/2010-07-23-DeclLoc.cpp
+++ b/test/CodeGenCXX/2010-07-23-DeclLoc.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // Require the template function declaration refer to the correct filename.
 // First, locate the function decl in metadata, and pluck out the file handle:
 // CHECK: !DISubprogram(name: "extract_dwarf_data_from_header
diff --git a/test/CodeGenCXX/PR20038.cpp b/test/CodeGenCXX/PR20038.cpp
index 0a10244db0e7..2d7043dcda33 100644
--- a/test/CodeGenCXX/PR20038.cpp
+++ b/test/CodeGenCXX/PR20038.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -g -mllvm -no-discriminators -emit-llvm  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -debug-info-kind=limited -mllvm -no-discriminators -emit-llvm  %s -o - | FileCheck %s
 
 struct C {
   ~C();
@@ -6,8 +6,8 @@ struct C {
 extern bool b;
 // CHECK: call {{.*}}, !dbg [[DTOR_CALL1_LOC:![0-9]*]]
 // CHECK: call {{.*}}, !dbg [[DTOR_CALL2_LOC:![0-9]*]]
-// CHECK: [[FUN1:.*]] = !DISubprogram(name: "fun1",{{.*}} isDefinition: true
-// CHECK: [[FUN2:.*]] = !DISubprogram(name: "fun2",{{.*}} isDefinition: true
+// CHECK: [[FUN1:.*]] = distinct !DISubprogram(name: "fun1",{{.*}} isDefinition: true
+// CHECK: [[FUN2:.*]] = distinct !DISubprogram(name: "fun2",{{.*}} isDefinition: true
 // CHECK: [[DTOR_CALL1_LOC]] = !DILocation(line: [[@LINE+1]], scope: [[FUN1]])
 void fun1() { b && (C(), 1); }
 // CHECK: [[DTOR_CALL2_LOC]] = !DILocation(line: [[@LINE+1]], scope: [[FUN2]])
diff --git a/test/CodeGenCXX/PR24289.cpp b/test/CodeGenCXX/PR24289.cpp
new file mode 100644
index 000000000000..364b5b23385d
--- /dev/null
+++ b/test/CodeGenCXX/PR24289.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-linux-gnu -std=c++11 | FileCheck %s
+
+namespace std {
+template <class T>
+struct initializer_list {
+  const T *Begin;
+  __SIZE_TYPE__ Size;
+
+  constexpr initializer_list(const T *B, __SIZE_TYPE__ S)
+      : Begin(B), Size(S) {}
+};
+}
+
+void f() {
+  static std::initializer_list<std::initializer_list<int>> a{
+      {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
+  static std::initializer_list<std::initializer_list<int>> b{
+      {0}, {0}, {0}, {0}};
+  static std::initializer_list<std::initializer_list<int>> c{
+      {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
+  static std::initializer_list<std::initializer_list<int>> d{
+      {0}, {0}, {0}, {0}, {0}};
+  static std::initializer_list<std::initializer_list<int>> e{
+      {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}, {0}};
+}
+
+// CHECK-DAG: @_ZZ1fvE1a = internal global %{{.*}} { %{{.*}}* getelementptr inbounds ([14 x %{{.*}}], [14 x %{{.*}}]
+// CHECK-DAG: * @_ZGRZ1fvE1a_, i32 0, i32 0), i64 14 }
+// CHECK-DAG: @_ZGRZ1fvE1a0_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a1_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a2_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a3_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a4_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a5_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a6_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a7_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a8_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a9_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1aA_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1aB_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1aC_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1aD_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1a_ = internal constant [14 x %{{.*}}] [%{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a0_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a1_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a2_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a3_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a4_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a5_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a6_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a7_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a8_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1a9_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1aA_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1aB_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1aC_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1aD_, i32 0, i32 0), i64 1 }]
+// CHECK-DAG: @_ZZ1fvE1b = internal global %{{.*}} { %{{.*}}* getelementptr inbounds ([4 x %{{.*}}], [4 x %{{.*}}]*
+// CHECK-DAG: @_ZGRZ1fvE1b_, i32 0, i32 0), i64 4 }
+// CHECK-DAG: @_ZGRZ1fvE1b0_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1b1_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1b2_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1b3_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1b_ = internal constant [4 x %{{.*}}] [%{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1b0_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1b1_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1b2_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1b3_, i32 0, i32 0), i64 1 }]
+// CHECK-DAG: @_ZZ1fvE1c = internal global %{{.*}} { %{{.*}}* getelementptr inbounds ([9 x %{{.*}}], [9 x %{{.*}}]*
+// CHECK-DAG: @_ZGRZ1fvE1c_, i32 0, i32 0), i64 9 }
+// CHECK-DAG: @_ZGRZ1fvE1c0_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c1_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c2_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c3_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c4_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c5_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c6_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c7_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c8_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1c_ = internal constant [9 x %{{.*}}] [%{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c0_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c1_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c2_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c3_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c4_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c5_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c6_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c7_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1c8_, i32 0, i32 0), i64 1 }]
+// CHECK-DAG: @_ZZ1fvE1d = internal global %{{.*}} { %{{.*}}* getelementptr inbounds ([5 x %{{.*}}], [5 x %{{.*}}]* @_ZGRZ1fvE1d_, i32 0, i32 0), i64 5 }
+// CHECK-DAG: @_ZGRZ1fvE1d0_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1d1_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1d2_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1d3_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1d4_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1d_ = internal constant [5 x %{{.*}}] [%{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1d0_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1d1_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1d2_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1d3_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1d4_, i32 0, i32 0), i64 1 }]
+// CHECK-DAG: @_ZZ1fvE1e = internal global %{{.*}} { %{{.*}}* getelementptr inbounds ([11 x %{{.*}}], [11 x %{{.*}}]* @_ZGRZ1fvE1e_, i32 0, i32 0), i64 11 }
+// CHECK-DAG: @_ZGRZ1fvE1e0_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e1_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e2_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e3_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e4_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e5_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e6_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e7_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e8_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e9_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1eA_ = internal constant [1 x i32] zeroinitializer
+// CHECK-DAG: @_ZGRZ1fvE1e_ = internal constant [11 x %{{.*}}] [%{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e0_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e1_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e2_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e3_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e4_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e5_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e6_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e7_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e8_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1e9_, i32 0, i32 0), i64 1 }, %{{.*}} { i32* getelementptr inbounds ([1 x i32], [1 x i32]* @_ZGRZ1fvE1eA_, i32 0, i32 0), i64 1 }]
diff --git a/test/CodeGenCXX/alignment.cpp b/test/CodeGenCXX/alignment.cpp
new file mode 100644
index 000000000000..2a1fe71f961b
--- /dev/null
+++ b/test/CodeGenCXX/alignment.cpp
@@ -0,0 +1,299 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=x86_64-apple-darwin10 | FileCheck %s
+
+extern int int_source();
+extern void int_sink(int x);
+
+namespace test0 {
+  struct A {
+    int aField;
+    int bField;
+  };
+
+  struct B {
+    int onebit : 2;
+    int twobit : 6;
+    int intField;
+  };
+
+  struct __attribute__((packed, aligned(2))) C : A, B {
+  };
+
+  // These accesses should have alignment 4 because they're at offset 0
+  // in a reference with an assumed alignment of 4.
+  // CHECK-LABEL: @_ZN5test01aERNS_1BE
+  void a(B &b) {
+    // CHECK: [[CALL:%.*]] = call i32 @_Z10int_sourcev()
+    // CHECK: [[B_P:%.*]] = load [[B:%.*]]*, [[B]]**
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[TRUNC:%.*]] = trunc i32 [[CALL]] to i8
+    // CHECK: [[OLD_VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 4
+    // CHECK: [[T0:%.*]] = and i8 [[TRUNC]], 3
+    // CHECK: [[T1:%.*]] = and i8 [[OLD_VALUE]], -4
+    // CHECK: [[T2:%.*]] = or i8 [[T1]], [[T0]]
+    // CHECK: store i8 [[T2]], i8* [[FIELD_P]], align 4
+    b.onebit = int_source();
+    
+    // CHECK: [[B_P:%.*]] = load [[B]]*, [[B]]**
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 4
+    // CHECK: [[T0:%.*]] = shl i8 [[VALUE]], 6
+    // CHECK: [[T1:%.*]] = ashr i8 [[T0]], 6
+    // CHECK: [[T2:%.*]] = sext i8 [[T1]] to i32
+    // CHECK: call void @_Z8int_sinki(i32 [[T2]])
+    int_sink(b.onebit);
+  }
+
+  // These accesses should have alignment 2 because they're at offset 8
+  // in a reference/pointer with an assumed alignment of 2.
+  // CHECK-LABEL: @_ZN5test01bERNS_1CE
+  void b(C &c) {
+    // CHECK: [[CALL:%.*]] = call i32 @_Z10int_sourcev()
+    // CHECK: [[C_P:%.*]] = load [[C:%.*]]*, [[C]]**
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[TRUNC:%.*]] = trunc i32 [[CALL]] to i8
+    // CHECK: [[OLD_VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = and i8 [[TRUNC]], 3
+    // CHECK: [[T1:%.*]] = and i8 [[OLD_VALUE]], -4
+    // CHECK: [[T2:%.*]] = or i8 [[T1]], [[T0]]
+    // CHECK: store i8 [[T2]], i8* [[FIELD_P]], align 2
+    c.onebit = int_source();
+    
+    // CHECK: [[C_P:%.*]] = load [[C]]*, [[C]]**
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = shl i8 [[VALUE]], 6
+    // CHECK: [[T1:%.*]] = ashr i8 [[T0]], 6
+    // CHECK: [[T2:%.*]] = sext i8 [[T1]] to i32
+    // CHECK: call void @_Z8int_sinki(i32 [[T2]])
+    int_sink(c.onebit);
+  }
+
+  // CHECK-LABEL: @_ZN5test01cEPNS_1CE
+  void c(C *c) {
+    // CHECK: [[CALL:%.*]] = call i32 @_Z10int_sourcev()
+    // CHECK: [[C_P:%.*]] = load [[C]]*, [[C]]**
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[TRUNC:%.*]] = trunc i32 [[CALL]] to i8
+    // CHECK: [[OLD_VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = and i8 [[TRUNC]], 3
+    // CHECK: [[T1:%.*]] = and i8 [[OLD_VALUE]], -4
+    // CHECK: [[T2:%.*]] = or i8 [[T1]], [[T0]]
+    // CHECK: store i8 [[T2]], i8* [[FIELD_P]], align 2
+    c->onebit = int_source();
+
+    // CHECK: [[C_P:%.*]] = load [[C:%.*]]*, [[C]]**
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B:%.*]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = shl i8 [[VALUE]], 6
+    // CHECK: [[T1:%.*]] = ashr i8 [[T0]], 6
+    // CHECK: [[T2:%.*]] = sext i8 [[T1]] to i32
+    // CHECK: call void @_Z8int_sinki(i32 [[T2]])
+    int_sink(c->onebit);
+  }
+
+  // These accesses should have alignment 2 because they're at offset 8
+  // in an alignment-2 variable.
+  // CHECK-LABEL: @_ZN5test01dEv
+  void d() {
+    // CHECK: [[C_P:%.*]] = alloca [[C:%.*]], align 2
+    C c;
+
+    // CHECK: [[CALL:%.*]] = call i32 @_Z10int_sourcev()
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[TRUNC:%.*]] = trunc i32 [[CALL]] to i8
+    // CHECK: [[OLD_VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = and i8 [[TRUNC]], 3
+    // CHECK: [[T1:%.*]] = and i8 [[OLD_VALUE]], -4
+    // CHECK: [[T2:%.*]] = or i8 [[T1]], [[T0]]
+    // CHECK: store i8 [[T2]], i8* [[FIELD_P]], align 2
+    c.onebit = int_source();
+
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B:%.*]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 2
+    // CHECK: [[T0:%.*]] = shl i8 [[VALUE]], 6
+    // CHECK: [[T1:%.*]] = ashr i8 [[T0]], 6
+    // CHECK: [[T2:%.*]] = sext i8 [[T1]] to i32
+    // CHECK: call void @_Z8int_sinki(i32 [[T2]])
+    int_sink(c.onebit);
+  }
+
+  // These accesses should have alignment 8 because they're at offset 8
+  // in an alignment-16 variable.
+  // CHECK-LABEL: @_ZN5test01eEv
+  void e() {
+    // CHECK: [[C_P:%.*]] = alloca [[C:%.*]], align 16
+    __attribute__((aligned(16))) C c;
+
+    // CHECK: [[CALL:%.*]] = call i32 @_Z10int_sourcev()
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[TRUNC:%.*]] = trunc i32 [[CALL]] to i8
+    // CHECK: [[OLD_VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 8
+    // CHECK: [[T0:%.*]] = and i8 [[TRUNC]], 3
+    // CHECK: [[T1:%.*]] = and i8 [[OLD_VALUE]], -4
+    // CHECK: [[T2:%.*]] = or i8 [[T1]], [[T0]]
+    // CHECK: store i8 [[T2]], i8* [[FIELD_P]], align 8
+    c.onebit = int_source();
+
+    // CHECK: [[T0:%.*]] = bitcast [[C]]* [[C_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 8
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B:%.*]]*
+    // CHECK: [[FIELD_P:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[VALUE:%.*]] = load i8, i8* [[FIELD_P]], align 8
+    // CHECK: [[T0:%.*]] = shl i8 [[VALUE]], 6
+    // CHECK: [[T1:%.*]] = ashr i8 [[T0]], 6
+    // CHECK: [[T2:%.*]] = sext i8 [[T1]] to i32
+    // CHECK: call void @_Z8int_sinki(i32 [[T2]])
+    int_sink(c.onebit);
+  }
+}
+
+namespace test1 {
+  struct Array {
+    int elts[4];
+  };
+
+  struct A {
+    __attribute__((aligned(16))) Array aArray;
+  };
+
+  struct B : virtual A {
+    void *bPointer; // puts bArray at offset 16
+    Array bArray;
+  };
+
+  struct C : virtual A { // must be viable as primary base
+    // Non-empty, nv-size not a multiple of 16.
+    void *cPointer1;
+    void *cPointer2;
+  };
+
+  // Proof of concept that the non-virtual components of B do not have
+  // to be 16-byte-aligned.
+  struct D : C, B {};
+
+  // For the following tests, we want to assign into a variable whose
+  // alignment is high enough that it will absolutely not be the
+  // constraint on the memcpy alignment.
+  typedef __attribute__((aligned(64))) Array AlignedArray;
+
+  // CHECK-LABEL: @_ZN5test11aERNS_1AE
+  void a(A &a) {
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY:%.*]], align 64
+    // CHECK: [[A_P:%.*]] = load [[A:%.*]]*, [[A]]**
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    AlignedArray result = a.aArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11bERNS_1BE
+  void b(B &b) {
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[B_P:%.*]] = load [[B:%.*]]*, [[B]]**
+    // CHECK: [[VPTR_P:%.*]] = bitcast [[B]]* [[B_P]] to i8**
+    // CHECK: [[VPTR:%.*]] = load i8*, i8** [[VPTR_P]], align 8
+    // CHECK: [[T0:%.*]] = getelementptr i8, i8* [[VPTR]], i64 -24
+    // CHECK: [[OFFSET_P:%.*]] = bitcast i8* [[T0]] to i64*
+    // CHECK: [[OFFSET:%.*]] = load i64, i64* [[OFFSET_P]], align 8
+    // CHECK: [[T0:%.*]] = bitcast [[B]]* [[B_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 [[OFFSET]]
+    // CHECK: [[A_P:%.*]] = bitcast i8* [[T1]] to [[A]]*
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    AlignedArray result = b.aArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11cERNS_1BE
+  void c(B &b) {
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[B_P:%.*]] = load [[B]]*, [[B]]**
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    AlignedArray result = b.bArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11dEPNS_1BE
+  void d(B *b) {
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[B_P:%.*]] = load [[B]]*, [[B]]**
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    AlignedArray result = b->bArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11eEv
+  void e() {
+    // CHECK: [[B_P:%.*]] = alloca [[B]], align 16
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    B b;
+    AlignedArray result = b.bArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11fEv
+  void f() {
+    // TODO: we should devirtualize this derived-to-base conversion.
+    // CHECK: [[D_P:%.*]] = alloca [[D:%.*]], align 16
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[VPTR_P:%.*]] = bitcast [[D]]* [[D_P]] to i8**
+    // CHECK: [[VPTR:%.*]] = load i8*, i8** [[VPTR_P]], align 16
+    // CHECK: [[T0:%.*]] = getelementptr i8, i8* [[VPTR]], i64 -24
+    // CHECK: [[OFFSET_P:%.*]] = bitcast i8* [[T0]] to i64*
+    // CHECK: [[OFFSET:%.*]] = load i64, i64* [[OFFSET_P]], align 8
+    // CHECK: [[T0:%.*]] = bitcast [[D]]* [[D_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 [[OFFSET]]
+    // CHECK: [[A_P:%.*]] = bitcast i8* [[T1]] to [[A]]*
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A_P]], i32 0, i32 0
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 16, i1 false)
+    D d;
+    AlignedArray result = d.aArray;
+  }
+
+  // CHECK-LABEL: @_ZN5test11gEv
+  void g() {
+    // CHECK: [[D_P:%.*]] = alloca [[D]], align 16
+    // CHECK: [[RESULT:%.*]] = alloca [[ARRAY]], align 64
+    // CHECK: [[T0:%.*]] = bitcast [[D]]* [[D_P]] to i8*
+    // CHECK: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 24
+    // CHECK: [[B_P:%.*]] = bitcast i8* [[T1]] to [[B:%.*]]*
+    // CHECK: [[ARRAY_P:%.*]] = getelementptr inbounds [[B]], [[B]]* [[B_P]], i32 0, i32 2
+    // CHECK: [[T0:%.*]] = bitcast [[ARRAY]]* [[RESULT]] to i8*
+    // CHECK: [[T1:%.*]] = bitcast [[ARRAY]]* [[ARRAY_P]] to i8*
+    // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 16, i32 8, i1 false)
+    D d;
+    AlignedArray result = d.bArray;
+  }
+}
diff --git a/test/CodeGenCXX/arm.cpp b/test/CodeGenCXX/arm.cpp
index 0fadfe97b49a..11ae6b24c26e 100644
--- a/test/CodeGenCXX/arm.cpp
+++ b/test/CodeGenCXX/arm.cpp
@@ -152,8 +152,8 @@ namespace test3 {
   void e(A *x) {
     // CHECK-LABEL: define void @_ZN5test31eEPNS_1AE(
     // CHECK: icmp eq {{.*}}, null
-    // CHECK: getelementptr {{.*}}, i64 -8
-    // CHECK: getelementptr {{.*}}, i64 4
+    // CHECK: getelementptr {{.*}}, i32 -8
+    // CHECK: getelementptr {{.*}}, i32 4
     // CHECK: bitcast {{.*}} to i32*
     // CHECK: load
     // CHECK: invoke {{.*}} @_ZN5test31AD1Ev
@@ -164,8 +164,8 @@ namespace test3 {
   void f(A (*x)[20]) {
     // CHECK-LABEL: define void @_ZN5test31fEPA20_NS_1AE(
     // CHECK: icmp eq {{.*}}, null
-    // CHECK: getelementptr {{.*}}, i64 -8
-    // CHECK: getelementptr {{.*}}, i64 4
+    // CHECK: getelementptr {{.*}}, i32 -8
+    // CHECK: getelementptr {{.*}}, i32 4
     // CHECK: bitcast {{.*}} to i32*
     // CHECK: load
     // CHECK: invoke {{.*}} @_ZN5test31AD1Ev
@@ -223,8 +223,8 @@ namespace test4 {
 
   void e(A *x) {
     // CHECK-LABEL: define void @_ZN5test41eEPNS_1AE(
-    // CHECK: [[ALLOC:%.*]] = getelementptr inbounds {{.*}}, i64 -8
-    // CHECK: getelementptr inbounds {{.*}}, i64 4
+    // CHECK: [[ALLOC:%.*]] = getelementptr inbounds {{.*}}, i32 -8
+    // CHECK: getelementptr inbounds {{.*}}, i32 4
     // CHECK: bitcast
     // CHECK: [[T0:%.*]] = load i32, i32*
     // CHECK: [[T1:%.*]] = mul i32 4, [[T0]]
@@ -235,8 +235,8 @@ namespace test4 {
 
   void f(A (*x)[20]) {
     // CHECK-LABEL: define void @_ZN5test41fEPA20_NS_1AE(
-    // CHECK: [[ALLOC:%.*]] = getelementptr inbounds {{.*}}, i64 -8
-    // CHECK: getelementptr inbounds {{.*}}, i64 4
+    // CHECK: [[ALLOC:%.*]] = getelementptr inbounds {{.*}}, i32 -8
+    // CHECK: getelementptr inbounds {{.*}}, i32 4
     // CHECK: bitcast
     // CHECK: [[T0:%.*]] = load i32, i32*
     // CHECK: [[T1:%.*]] = mul i32 4, [[T0]]
@@ -293,7 +293,7 @@ namespace test7 {
 
   // CHECK-LABEL: define void @_ZN5test74testEv() {{.*}} personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
   void test() {
-    // CHECK:      [[T0:%.*]] = load atomic i8, i8* bitcast (i32* @_ZGVZN5test74testEvE1x to i8*) acquire, align 1
+    // CHECK:      [[T0:%.*]] = load atomic i8, i8* bitcast (i32* @_ZGVZN5test74testEvE1x to i8*) acquire, align 4
     // CHECK-NEXT: [[T1:%.*]] = and i8 [[T0]], 1
     // CHECK-NEXT: [[T2:%.*]] = icmp eq i8 [[T1]], 0
     // CHECK-NEXT: br i1 [[T2]]
@@ -328,7 +328,7 @@ namespace test8 {
 
   // CHECK-LABEL: define void @_ZN5test84testEv() {{.*}} personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
   void test() {
-    // CHECK:      [[T0:%.*]] = load atomic i8, i8* bitcast (i32* @_ZGVZN5test84testEvE1x to i8*) acquire, align 1
+    // CHECK:      [[T0:%.*]] = load atomic i8, i8* bitcast (i32* @_ZGVZN5test84testEvE1x to i8*) acquire, align 4
     // CHECK-NEXT: [[T1:%.*]] = and i8 [[T0]], 1
     // CHECK-NEXT: [[T2:%.*]] = icmp eq i8 [[T1]], 0
     // CHECK-NEXT: br i1 [[T2]]
@@ -388,7 +388,7 @@ namespace test9 {
 // CHECK-NEXT: store i32 16, i32* [[T0]]
 // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i32, i32* [[T0]], i32 1
 // CHECK-NEXT: store i32 [[N]], i32* [[T1]]
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC]], i64 16
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC]], i32 16
 // CHECK-NEXT: bitcast i8* [[T0]] to [[TEST9]]*
 //   Array allocation follows.
 
@@ -400,8 +400,8 @@ namespace test9 {
 // CHECK-NEXT: [[T0:%.*]] = icmp eq [[TEST9]]* [[BEGIN]], null
 // CHECK-NEXT: br i1 [[T0]],
 // CHECK:      [[T0:%.*]] = bitcast [[TEST9]]* [[BEGIN]] to i8*
-// CHECK-NEXT: [[ALLOC:%.*]] = getelementptr inbounds i8, i8* [[T0]], i64 -16
-// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC]], i64 4
+// CHECK-NEXT: [[ALLOC:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 -16
+// CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds i8, i8* [[ALLOC]], i32 4
 // CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to i32*
 // CHECK-NEXT: [[N:%.*]] = load i32, i32* [[T1]]
 // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds [[TEST9]], [[TEST9]]* [[BEGIN]], i32 [[N]]
diff --git a/test/CodeGenCXX/armv7k.cpp b/test/CodeGenCXX/armv7k.cpp
new file mode 100644
index 000000000000..9b27b651fe37
--- /dev/null
+++ b/test/CodeGenCXX/armv7k.cpp
@@ -0,0 +1,68 @@
+// RUN: %clang_cc1 %s -triple=thumbv7k-apple-watchos -emit-llvm -o - -target-abi aapcs16 | FileCheck %s
+// RUN: %clang_cc1 %s -triple=thumbv7k-apple-watchos -emit-llvm -o - -target-abi aapcs16 | FileCheck -check-prefix=CHECK-GLOBALS %s
+
+// __cxa_guard_acquire argument is 64-bit
+// rdar://11540122
+struct A {
+  A();
+};
+
+void f() {
+  // CHECK: call i32 @__cxa_guard_acquire(i32*
+  static A a;
+}
+
+// ARM64 uses the C++11 definition of POD.
+// rdar://12650514
+namespace test1 {
+  // This class is POD in C++11 and cannot have objects allocated in
+  // its tail-padding.
+  struct ABase {};
+  struct A : ABase {
+    int x;
+    char c;
+  };
+
+  struct B : A {
+    char d;
+  };
+
+  int test() {
+    return sizeof(B);
+  }
+  // CHECK: define i32 @_ZN5test14testEv()
+  // CHECK: ret i32 12
+}
+
+namespace std {
+  class type_info;
+}
+
+// ARM64 uses string comparisons for what would otherwise be
+// default-visibility weak RTTI.  rdar://12650568
+namespace test2 {
+  struct A {
+    virtual void foo();
+  };
+  void A::foo() {}
+  // Tested below because these globals get kindof oddly rearranged.
+
+  struct __attribute__((visibility("hidden"))) B {};
+  const std::type_info &b0 = typeid(B);
+  // CHECK-GLOBALS: @_ZTSN5test21BE = linkonce_odr hidden constant
+  // CHECK-GLOBALS: @_ZTIN5test21BE = linkonce_odr hidden constant { {{.*}}, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTSN5test21BE, i32 0, i32 0) }
+
+  const std::type_info &b1 = typeid(B*);
+  // CHECK-GLOBALS: @_ZTSPN5test21BE = linkonce_odr hidden constant
+  // CHECK-GLOBALS: @_ZTIPN5test21BE = linkonce_odr hidden constant { {{.*}}, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @_ZTSPN5test21BE, i32 0, i32 0), i32 0, i8* bitcast
+
+  struct C {};
+  const std::type_info &c0 = typeid(C);
+  // CHECK-GLOBALS: @_ZTSN5test21CE = linkonce_odr constant [11 x i8] c"N5test21CE\00"
+  // CHECK-GLOBALS: @_ZTIN5test21CE = linkonce_odr constant { {{.*}}, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTSN5test21CE, i32 0, i32 0) }
+}
+
+// va_list should be based on "char *" rather than "void *".
+
+// CHECK: define void @_Z11whatsVaListPc
+void whatsVaList(__builtin_va_list l) {}
diff --git a/test/CodeGenCXX/attr-disable-tail-calls.cpp b/test/CodeGenCXX/attr-disable-tail-calls.cpp
new file mode 100644
index 000000000000..b9a3c2712f08
--- /dev/null
+++ b/test/CodeGenCXX/attr-disable-tail-calls.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -triple=x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+class B {
+public:
+  [[clang::disable_tail_calls]] virtual int m1() { return 1; }
+  virtual int m2() { return 2; }
+  int m3() { return 3; }
+  [[clang::disable_tail_calls]] int m4();
+};
+
+class D : public B {
+public:
+  int m1() override { return 11; }
+  [[clang::disable_tail_calls]] int m2() override { return 22; }
+};
+
+int foo1() {
+  B *b = new B;
+  D *d = new D;
+  int t = 0;
+  t += b->m1() + b->m2() + b->m3() + b->m4();
+  t += d->m1() + d->m2();
+  return t;
+}
+
+// CHECK: define linkonce_odr i32 @_ZN1B2m3Ev(%class.B* %this) [[ATTRFALSE:#[0-9]+]]
+// CHECK: declare i32 @_ZN1B2m4Ev(%class.B*) [[ATTRTRUE0:#[0-9]+]]
+// CHECK: define linkonce_odr i32 @_ZN1B2m1Ev(%class.B* %this) unnamed_addr [[ATTRTRUE1:#[0-9]+]]
+// CHECK: define linkonce_odr i32 @_ZN1B2m2Ev(%class.B* %this) unnamed_addr [[ATTRFALSE:#[0-9]+]]
+// CHECK: define linkonce_odr i32 @_ZN1D2m1Ev(%class.D* %this) unnamed_addr [[ATTRFALSE:#[0-9]+]]
+// CHECK: define linkonce_odr i32 @_ZN1D2m2Ev(%class.D* %this) unnamed_addr [[ATTRTRUE1:#[0-9]+]]
+
+// CHECK: attributes [[ATTRFALSE]] = { {{.*}}"disable-tail-calls"="false"{{.*}} }
+// CHECK: attributes [[ATTRTRUE0]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
+// CHECK: attributes [[ATTRTRUE1]] = { {{.*}}"disable-tail-calls"="true"{{.*}} }
diff --git a/test/CodeGenCXX/attr-notail.cpp b/test/CodeGenCXX/attr-notail.cpp
new file mode 100644
index 000000000000..80af424ff3e1
--- /dev/null
+++ b/test/CodeGenCXX/attr-notail.cpp
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple=x86_64-apple-darwin -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+class Class1 {
+public:
+  [[clang::not_tail_called]] int m1();
+  int m2();
+};
+
+int foo1(int a, Class1 *c1) {
+  if (a)
+    return c1->m1();
+  return c1->m2();
+}
+
+// CHECK-LABEL: define i32 @_Z4foo1iP6Class1(
+// CHECK: %{{[a-z0-9]+}} = notail call i32 @_ZN6Class12m1Ev(%class.Class1*
+// CHECK: %{{[a-z0-9]+}} = call i32 @_ZN6Class12m2Ev(%class.Class1*
diff --git a/test/CodeGenCXX/attr.cpp b/test/CodeGenCXX/attr.cpp
index 8bcff363c723..67993b4227a1 100644
--- a/test/CodeGenCXX/attr.cpp
+++ b/test/CodeGenCXX/attr.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s
 
-// CHECK: @test2 = alias i32 ()* @_Z5test1v
+// CHECK: @test2 = alias i32 (), i32 ()* @_Z5test1v
 
 // CHECK: define i32 @_Z3foov() [[NUW:#[0-9]+]] align 1024
 int foo() __attribute__((aligned(1024)));
diff --git a/test/CodeGenCXX/attribute_internal_linkage.cpp b/test/CodeGenCXX/attribute_internal_linkage.cpp
new file mode 100644
index 000000000000..21cd44e916f2
--- /dev/null
+++ b/test/CodeGenCXX/attribute_internal_linkage.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -std=c++11 -emit-llvm -o - %s | FileCheck %s
+
+__attribute__((internal_linkage)) void f() {}
+// CHECK-DAG: define internal void @_ZL1fv
+
+class A {
+public:
+  static int y __attribute__((internal_linkage));
+  static int y2 [[clang::internal_linkage]];
+// CHECK-DAG: @_ZN1A1yE = internal global
+// CHECK-DAG: @_ZN1A2y2E = internal global
+  void f1() __attribute__((internal_linkage));
+// CHECK-DAG: define internal void @_ZN1A2f1Ev
+  void f2() __attribute__((internal_linkage)) {}
+// CHECK-DAG: define internal void @_ZN1A2f2Ev
+  static void f4() __attribute__((internal_linkage)) {}
+// CHECK-DAG: define internal void @_ZN1A2f4Ev
+  A() __attribute__((internal_linkage)) {}
+// CHECK-DAG: define internal void @_ZN1AC1Ev
+// CHECK-DAG: define internal void @_ZN1AC2Ev
+  ~A() __attribute__((internal_linkage)) {}
+// CHECK-DAG: define internal void @_ZN1AD1Ev
+// CHECK-DAG: define internal void @_ZN1AD2Ev
+};
+
+int A::y;
+int A::y2;
+
+void A::f1() {
+}
+
+// Forward declaration w/o an attribute.
+class B;
+
+// Internal_linkage on a class affects all its members.
+class __attribute__((internal_linkage)) B {
+public:
+  B() {}
+  // CHECK-DAG: define internal void @_ZNL1BC1Ev
+  // CHECK-DAG: define internal void @_ZNL1BC2Ev
+  ~B() {}
+  // CHECK-DAG: define internal void @_ZNL1BD1Ev
+  // CHECK-DAG: define internal void @_ZNL1BD2Ev
+  void f() {};
+  // CHECK-DAG: define internal void @_ZNL1B1fEv
+  static int x;
+  // CHECK-DAG: @_ZNL1B1xE = internal global
+};
+
+int B::x;
+
+// Forward declaration with the attribute.
+class __attribute__((internal_linkage)) C;
+class C {
+public:
+  static int x;
+  // CHECK-DAG: @_ZNL1C1xE = internal global
+};
+
+int C::x;
+
+__attribute__((internal_linkage)) void g();
+void g() {}
+// CHECK-DAG: define internal void @_ZL1gv()
+
+void use() {
+  A a;
+  a.f1();
+  a.f2();
+  A::f4();
+  f();
+  int &Y = A::y;
+  int &Y2 = A::y2;
+  B b;
+  b.f();
+  int &XX2 = B::x;
+  g();
+  int &XX3 = C::x;
+}
diff --git a/test/CodeGenCXX/cast-to-ref-bool.cpp b/test/CodeGenCXX/cast-to-ref-bool.cpp
new file mode 100644
index 000000000000..10647205de39
--- /dev/null
+++ b/test/CodeGenCXX/cast-to-ref-bool.cpp
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -o - %s | FileCheck %s
+
+// CHECK-LABEL: main
+int main(int argc, char **argv) {
+  // CHECK: load i8, i8* %
+  // CHECK-NEXT: trunc i8 %{{.+}} to i1
+  bool b = (bool &)argv[argc][1];
+  return b;
+}
diff --git a/test/CodeGenCXX/catch-undef-behavior.cpp b/test/CodeGenCXX/catch-undef-behavior.cpp
index d08fe764cdbf..3ca7f6d65ad4 100644
--- a/test/CodeGenCXX/catch-undef-behavior.cpp
+++ b/test/CodeGenCXX/catch-undef-behavior.cpp
@@ -1,9 +1,8 @@
-// RUN: %clang_cc1 -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -fsanitize=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,unreachable,return,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,float-divide-by-zero,shift-base,shift-exponent,vla-bound,alignment,null,vptr,object-size,float-cast-overflow,bool,enum,array-bounds,function -emit-llvm %s -o - -triple x86_64-linux-gnu | opt -instnamer -S | FileCheck %s
 // RUN: %clang_cc1 -std=c++11 -fsanitize=vptr,address -fsanitize-recover=vptr,address -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=CHECK-ASAN
 // RUN: %clang_cc1 -std=c++11 -fsanitize=vptr -fsanitize-recover=vptr -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s --check-prefix=DOWNCAST-NULL
 // RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple x86_64-linux-gnux32 | FileCheck %s --check-prefix=CHECK-X32
 // RUN: %clang_cc1 -std=c++11 -fsanitize=function -emit-llvm %s -o - -triple i386-linux-gnu | FileCheck %s --check-prefix=CHECK-X86
-// REQUIRES: asserts
 
 struct S {
   double d;
@@ -369,15 +368,15 @@ void downcast_pointer(B *b) {
   (void) static_cast<C*>(b);
   // Alignment check from EmitTypeCheck(TCK_DowncastPointer, ...)
   // CHECK: [[SUB:%[.a-z0-9]*]] = getelementptr i8, i8* {{.*}}, i64 -16
-  // CHECK-NEXT: [[C:%[0-9]*]] = bitcast i8* [[SUB]] to %class.C*
+  // CHECK-NEXT: [[C:%.+]] = bitcast i8* [[SUB]] to %class.C*
   // null check goes here
-  // CHECK: [[FROM_PHI:%[0-9]*]] = phi %class.C* [ [[C]], {{.*}} ], {{.*}}
+  // CHECK: [[FROM_PHI:%.+]] = phi %class.C* [ [[C]], {{.*}} ], {{.*}}
   // Objectsize check goes here
-  // CHECK: [[C_INT:%[0-9]*]] = ptrtoint %class.C* [[FROM_PHI]] to i64
-  // CHECK-NEXT: [[MASKED:%[0-9]*]] = and i64 [[C_INT]], 15
-  // CHECK-NEXT: [[TEST:%[0-9]*]] = icmp eq i64 [[MASKED]], 0
+  // CHECK: [[C_INT:%.+]] = ptrtoint %class.C* [[FROM_PHI]] to i64
+  // CHECK-NEXT: [[MASKED:%.+]] = and i64 [[C_INT]], 15
+  // CHECK-NEXT: [[TEST:%.+]] = icmp eq i64 [[MASKED]], 0
   // AND the alignment test with the objectsize test.
-  // CHECK-NEXT: [[AND:%[0-9]*]] = and i1 {{.*}}, [[TEST]]
+  // CHECK-NEXT: [[AND:%.+]] = and i1 {{.*}}, [[TEST]]
   // CHECK-NEXT: br i1 [[AND]]
 }
 
@@ -386,13 +385,13 @@ void downcast_reference(B &b) {
   (void) static_cast<C&>(b);
   // Alignment check from EmitTypeCheck(TCK_DowncastReference, ...)
   // CHECK:      [[SUB:%[.a-z0-9]*]] = getelementptr i8, i8* {{.*}}, i64 -16
-  // CHECK-NEXT: [[C:%[0-9]*]] = bitcast i8* [[SUB]] to %class.C*
+  // CHECK-NEXT: [[C:%.+]] = bitcast i8* [[SUB]] to %class.C*
   // Objectsize check goes here
-  // CHECK:      [[C_INT:%[0-9]*]] = ptrtoint %class.C* [[C]] to i64
-  // CHECK-NEXT: [[MASKED:%[0-9]*]] = and i64 [[C_INT]], 15
-  // CHECK-NEXT: [[TEST:%[0-9]*]] = icmp eq i64 [[MASKED]], 0
+  // CHECK:      [[C_INT:%.+]] = ptrtoint %class.C* [[C]] to i64
+  // CHECK-NEXT: [[MASKED:%.+]] = and i64 [[C_INT]], 15
+  // CHECK-NEXT: [[TEST:%.+]] = icmp eq i64 [[MASKED]], 0
   // AND the alignment test with the objectsize test.
-  // CHECK:      [[AND:%[0-9]*]] = and i1 {{.*}}, [[TEST]]
+  // CHECK:      [[AND:%.+]] = and i1 {{.*}}, [[TEST]]
   // CHECK-NEXT: br i1 [[AND]]
 }
 
@@ -400,18 +399,18 @@ void downcast_reference(B &b) {
 // CHECK-X32: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i8* }> <{ i32 1413875435, i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) }>
 // CHECK-X86: @_Z22indirect_function_callPFviE({{.*}} prologue <{ i32, i8* }> <{ i32 1413875435, i8* bitcast ({ i8*, i8* }* @_ZTIFvPFviEE to i8*) }>
 void indirect_function_call(void (*p)(int)) {
-  // CHECK: [[PTR:%[0-9]*]] = bitcast void (i32)* {{.*}} to <{ i32, i8* }>*
+  // CHECK: [[PTR:%.+]] = bitcast void (i32)* {{.*}} to <{ i32, i8* }>*
 
   // Signature check
-  // CHECK-NEXT: [[SIGPTR:%[0-9]*]] = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* [[PTR]], i32 0, i32 0
-  // CHECK-NEXT: [[SIG:%[0-9]*]] = load i32, i32* [[SIGPTR]]
-  // CHECK-NEXT: [[SIGCMP:%[0-9]*]] = icmp eq i32 [[SIG]], 1413876459
+  // CHECK-NEXT: [[SIGPTR:%.+]] = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* [[PTR]], i32 0, i32 0
+  // CHECK-NEXT: [[SIG:%.+]] = load i32, i32* [[SIGPTR]]
+  // CHECK-NEXT: [[SIGCMP:%.+]] = icmp eq i32 [[SIG]], 1413876459
   // CHECK-NEXT: br i1 [[SIGCMP]]
 
   // RTTI pointer check
-  // CHECK: [[RTTIPTR:%[0-9]*]] = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* [[PTR]], i32 0, i32 1
-  // CHECK-NEXT: [[RTTI:%[0-9]*]] = load i8*, i8** [[RTTIPTR]]
-  // CHECK-NEXT: [[RTTICMP:%[0-9]*]] = icmp eq i8* [[RTTI]], bitcast ({ i8*, i8* }* @_ZTIFviE to i8*)
+  // CHECK: [[RTTIPTR:%.+]] = getelementptr <{ i32, i8* }>, <{ i32, i8* }>* [[PTR]], i32 0, i32 1
+  // CHECK-NEXT: [[RTTI:%.+]] = load i8*, i8** [[RTTIPTR]]
+  // CHECK-NEXT: [[RTTICMP:%.+]] = icmp eq i8* [[RTTI]], bitcast ({ i8*, i8* }* @_ZTIFviE to i8*)
   // CHECK-NEXT: br i1 [[RTTICMP]]
   p(42);
 }
diff --git a/test/CodeGenCXX/cfi-blacklist.cpp b/test/CodeGenCXX/cfi-blacklist.cpp
new file mode 100644
index 000000000000..32ed05bcc520
--- /dev/null
+++ b/test/CodeGenCXX/cfi-blacklist.cpp
@@ -0,0 +1,30 @@
+// RUN: echo "type:attr:uuid" > %t.txt
+// RUN: %clang_cc1 -fms-extensions -fsanitize=cfi-vcall -fsanitize-blacklist=%t.txt -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOUUID %s
+// RUN: echo "type:std::*" > %t.txt
+// RUN: %clang_cc1 -fms-extensions -fsanitize=cfi-vcall -fsanitize-blacklist=%t.txt -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=NOSTD %s
+
+struct __declspec(uuid("00000000-0000-0000-0000-000000000000")) S1 {
+  virtual void f();
+};
+
+namespace std {
+
+struct S2 {
+  virtual void f();
+};
+
+}
+
+// CHECK: define{{.*}}s1f
+// NOSTD: llvm.bitset.test
+// NOUUID-NOT: llvm.bitset.test
+void s1f(S1 *s1) {
+  s1->f();
+}
+
+// CHECK: define{{.*}}s2f
+// NOSTD-NOT: llvm.bitset.test
+// NOUUID: llvm.bitset.test
+void s2f(std::S2 *s2) {
+  s2->f();
+}
diff --git a/test/CodeGenCXX/cfi-cast.cpp b/test/CodeGenCXX/cfi-cast.cpp
index 09089634442c..0b96cb6506c0 100644
--- a/test/CodeGenCXX/cfi-cast.cpp
+++ b/test/CodeGenCXX/cfi-cast.cpp
@@ -18,7 +18,7 @@ struct C : A {};
 
 // CHECK-DCAST-LABEL: define void @_Z3abpP1A
 void abp(A *a) {
-  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-DCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-DCAST: [[TRAPBB]]
@@ -32,7 +32,7 @@ void abp(A *a) {
 
 // CHECK-DCAST-LABEL: define void @_Z3abrR1A
 void abr(A &a) {
-  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-DCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-DCAST: [[TRAPBB]]
@@ -46,7 +46,7 @@ void abr(A &a) {
 
 // CHECK-DCAST-LABEL: define void @_Z4abrrO1A
 void abrr(A &&a) {
-  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-DCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-DCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-DCAST: [[TRAPBB]]
@@ -60,7 +60,7 @@ void abrr(A &&a) {
 
 // CHECK-UCAST-LABEL: define void @_Z3vbpPv
 void vbp(void *p) {
-  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-UCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-UCAST: [[TRAPBB]]
@@ -74,7 +74,7 @@ void vbp(void *p) {
 
 // CHECK-UCAST-LABEL: define void @_Z3vbrRc
 void vbr(char &r) {
-  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-UCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-UCAST: [[TRAPBB]]
@@ -88,7 +88,7 @@ void vbr(char &r) {
 
 // CHECK-UCAST-LABEL: define void @_Z4vbrrOc
 void vbrr(char &&r) {
-  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   // CHECK-UCAST-NEXT: br i1 [[P]], label %[[CONTBB:[^ ]*]], label %[[TRAPBB:[^ ,]*]]
 
   // CHECK-UCAST: [[TRAPBB]]
@@ -103,7 +103,23 @@ void vbrr(char &&r) {
 // CHECK-UCAST-LABEL: define void @_Z3vcpPv
 // CHECK-UCAST-STRICT-LABEL: define void @_Z3vcpPv
 void vcp(void *p) {
-  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1A")
-  // CHECK-UCAST-STRICT: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1C")
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1A")
+  // CHECK-UCAST-STRICT: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1C")
   static_cast<C*>(p);
 }
+
+// CHECK-UCAST-LABEL: define void @_Z3bcpP1B
+// CHECK-UCAST-STRICT-LABEL: define void @_Z3bcpP1B
+void bcp(B *p) {
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1A")
+  // CHECK-UCAST-STRICT: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1C")
+  (C *)p;
+}
+
+// CHECK-UCAST-LABEL: define void @_Z8bcp_callP1B
+// CHECK-UCAST-STRICT-LABEL: define void @_Z8bcp_callP1B
+void bcp_call(B *p) {
+  // CHECK-UCAST: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1A")
+  // CHECK-UCAST-STRICT: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1C")
+  ((C *)p)->f();
+}
diff --git a/test/CodeGenCXX/cfi-cross-dso.cpp b/test/CodeGenCXX/cfi-cross-dso.cpp
new file mode 100644
index 000000000000..fbe6fc83a5c3
--- /dev/null
+++ b/test/CodeGenCXX/cfi-cross-dso.cpp
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-vcall -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsanitize=cfi-vcall  -fsanitize-cfi-cross-dso -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s
+
+struct A {
+  A();
+  virtual void f();
+};
+
+A::A() {}
+void A::f() {}
+
+void caller(A* a) {
+  a->f();
+}
+
+namespace {
+struct B {
+  virtual void f();
+};
+
+void B::f() {}
+} // namespace
+
+void g() {
+  B b;
+  b.f();
+}
+
+// MS: @[[B_VTABLE:.*]] = private unnamed_addr constant [2 x i8*] {{.*}}@"\01??_R4B@?A@@6B@"{{.*}}@"\01?f@B@?A@@UEAAXXZ"
+
+// CHECK:   %[[VT:.*]] = load void (%struct.A*)**, void (%struct.A*)***
+// CHECK:   %[[VT2:.*]] = bitcast {{.*}}%[[VT]] to i8*, !nosanitize
+// ITANIUM:   %[[TEST:.*]] = call i1 @llvm.bitset.test(i8* %[[VT2]], metadata !"_ZTS1A"), !nosanitize
+// MS:   %[[TEST:.*]] = call i1 @llvm.bitset.test(i8* %[[VT2]], metadata !"?AUA@@"), !nosanitize
+// CHECK:   br i1 %[[TEST]], label %[[CONT:.*]], label %[[SLOW:.*]], {{.*}} !nosanitize
+// CHECK: [[SLOW]]
+// ITANIUM:   call void @__cfi_slowpath(i64 7004155349499253778, i8* %[[VT2]]) {{.*}} !nosanitize
+// MS:   call void @__cfi_slowpath(i64 -8005289897957287421, i8* %[[VT2]]) {{.*}} !nosanitize
+// CHECK:   br label %[[CONT]], !nosanitize
+// CHECK: [[CONT]]
+// CHECK:   call void %{{.*}}(%struct.A* %{{.*}})
+
+// No hash-based bit set entry for (anonymous namespace)::B
+// ITANIUM-NOT: !{i64 {{.*}}, [3 x i8*]* @_ZTVN12_GLOBAL__N_11BE,
+// MS-NOT: !{i64 {{.*}}, [2 x i8*]* @[[B_VTABLE]],
diff --git a/test/CodeGenCXX/cfi-icall.cpp b/test/CodeGenCXX/cfi-icall.cpp
new file mode 100644
index 000000000000..eceb92a4421c
--- /dev/null
+++ b/test/CodeGenCXX/cfi-icall.cpp
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=ITANIUM %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=MS %s
+
+// Tests that we assign unnamed metadata nodes to functions whose types have
+// internal linkage.
+
+namespace {
+
+struct S {};
+
+void f(S *s) {
+}
+
+}
+
+void g() {
+  void (*fp)(S *) = f;
+  // CHECK: call i1 @llvm.bitset.test(i8* {{.*}}, metadata ![[VOIDS:[0-9]+]])
+  fp(0);
+}
+
+// ITANIUM: !{![[VOIDS]], void (%"struct.(anonymous namespace)::S"*)* @_ZN12_GLOBAL__N_11fEPNS_1SE, i64 0}
+// MS: !{![[VOIDS]], void (%"struct.(anonymous namespace)::S"*)* @"\01?f@?A@@YAXPEAUS@?A@@@Z", i64 0}
diff --git a/test/CodeGenCXX/cfi-ms-rtti.cpp b/test/CodeGenCXX/cfi-ms-rtti.cpp
index 5203a6bb0c8b..b6e9175c865c 100644
--- a/test/CodeGenCXX/cfi-ms-rtti.cpp
+++ b/test/CodeGenCXX/cfi-ms-rtti.cpp
@@ -8,5 +8,5 @@ struct A {
 
 A::A() {}
 
-// RTTI: !{!"A@@", [2 x i8*]* {{.*}}, i64 8}
-// NO-RTTI: !{!"A@@", [1 x i8*]* {{.*}}, i64 0}
+// RTTI: !{!"?AUA@@", [2 x i8*]* {{.*}}, i64 8}
+// NO-RTTI: !{!"?AUA@@", [1 x i8*]* {{.*}}, i64 0}
diff --git a/test/CodeGenCXX/cfi-nvcall.cpp b/test/CodeGenCXX/cfi-nvcall.cpp
index b0db478c9d0c..be4d8448a2e0 100644
--- a/test/CodeGenCXX/cfi-nvcall.cpp
+++ b/test/CodeGenCXX/cfi-nvcall.cpp
@@ -17,8 +17,8 @@ struct C : A {
 // CHECK-LABEL: @bg
 // CHECK-STRICT-LABEL: @bg
 extern "C" void bg(B *b) {
-  // CHECK: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
-  // CHECK-STRICT: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
+  // CHECK: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
+  // CHECK-STRICT: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
   b->g();
 }
 
@@ -29,7 +29,7 @@ extern "C" void cg(C *c) {
   // In this case C's layout is the same as its base class, so we allow
   // c to be of type A in non-strict mode.
 
-  // CHECK: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1A")
-  // CHECK-STRICT: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1C")
+  // CHECK: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1A")
+  // CHECK-STRICT: call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1C")
   c->g();
 }
diff --git a/test/CodeGenCXX/cfi-vcall.cpp b/test/CodeGenCXX/cfi-vcall.cpp
index 5cb5e02cf585..daa0531e85d6 100644
--- a/test/CodeGenCXX/cfi-vcall.cpp
+++ b/test/CodeGenCXX/cfi-vcall.cpp
@@ -60,8 +60,8 @@ void D::h() {
 // ITANIUM: define void @_Z2afP1A
 // MS: define void @"\01?af@@YAXPEAUA@@@Z"
 void af(A *a) {
-  // ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* [[VT:%[^ ]*]], metadata !"1A")
-  // MS: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* [[VT:%[^ ]*]], metadata !"A@@")
+  // ITANIUM: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* [[VT:%[^ ]*]], metadata !"_ZTS1A")
+  // MS: [[P:%[^ ]*]] = call i1 @llvm.bitset.test(i8* [[VT:%[^ ]*]], metadata !"?AUA@@")
   // CHECK-NEXT: br i1 [[P]], label %[[CONTBB:[^ ,]*]], label %[[TRAPBB:[^ ,]*]]
   // CHECK-NEXT: {{^$}}
 
@@ -82,24 +82,24 @@ void af(A *a) {
 // ITANIUM: define internal void @_Z3df1PN12_GLOBAL__N_11DE
 // MS: define internal void @"\01?df1@@YAXPEAUD@?A@@@Z"
 void df1(D *d) {
-  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"[{{.*}}cfi-vcall.cpp]N12_GLOBAL__N_11DE")
-  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"A@@")
+  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
+  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"?AUA@@")
   d->f();
 }
 
 // ITANIUM: define internal void @_Z3dg1PN12_GLOBAL__N_11DE
 // MS: define internal void @"\01?dg1@@YAXPEAUD@?A@@@Z"
 void dg1(D *d) {
-  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"1B")
-  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"B@@")
+  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTS1B")
+  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"?AUB@@")
   d->g();
 }
 
 // ITANIUM: define internal void @_Z3dh1PN12_GLOBAL__N_11DE
 // MS: define internal void @"\01?dh1@@YAXPEAUD@?A@@@Z"
 void dh1(D *d) {
-  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"[{{.*}}cfi-vcall.cpp]N12_GLOBAL__N_11DE")
-  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"[{{.*}}cfi-vcall.cpp]D@?A@@")
+  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata ![[DTYPE]])
+  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata ![[DTYPE:[0-9]+]])
   d->h();
 }
 
@@ -150,8 +150,8 @@ struct D : C {
 // ITANIUM: define void @_ZN5test21fEPNS_1DE
 // MS: define void @"\01?f@test2@@YAXPEAUD@1@@Z"
 void f(D *d) {
-  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"N5test21DE")
-  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"A@test2@@")
+  // ITANIUM: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"_ZTSN5test21DE")
+  // MS: {{%[^ ]*}} = call i1 @llvm.bitset.test(i8* {{%[^ ]*}}, metadata !"?AUA@test2@@")
   d->m_fn1();
 }
 
@@ -161,28 +161,28 @@ void f(D *d) {
 // MS: !llvm.bitsets = !{[[X:[^,]*(,[^,]*){8}]]}
 // ITANIUM: !llvm.bitsets = !{[[X:[^,]*(,[^,]*){14}]]}
 
-// ITANIUM-DAG: !{!"1A", [3 x i8*]* @_ZTV1A, i64 16}
-// ITANIUM-DAG: !{!"1A", [7 x i8*]* @_ZTCN12_GLOBAL__N_11DE0_1B, i64 32}
-// ITANIUM-DAG: !{!"1B", [7 x i8*]* @_ZTCN12_GLOBAL__N_11DE0_1B, i64 32}
-// ITANIUM-DAG: !{!"1A", [9 x i8*]* @_ZTCN12_GLOBAL__N_11DE8_1C, i64 64}
-// ITANIUM-DAG: !{!"1C", [9 x i8*]* @_ZTCN12_GLOBAL__N_11DE8_1C, i64 32}
-// ITANIUM-DAG: !{!"1A", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
-// ITANIUM-DAG: !{!"1B", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
-// ITANIUM-DAG: !{!"1C", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 88}
-// ITANIUM-DAG: !{!"[{{.*}}cfi-vcall.cpp]N12_GLOBAL__N_11DE", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
-// ITANIUM-DAG: !{!"1A", [7 x i8*]* @_ZTV1B, i64 32}
-// ITANIUM-DAG: !{!"1B", [7 x i8*]* @_ZTV1B, i64 32}
-// ITANIUM-DAG: !{!"1A", [5 x i8*]* @_ZTV1C, i64 32}
-// ITANIUM-DAG: !{!"1C", [5 x i8*]* @_ZTV1C, i64 32}
-// ITANIUM-DAG: !{!"1A", [3 x i8*]* @_ZTVZ3foovE2FA, i64 16}
-// ITANIUM-DAG: !{!"[{{.*}}cfi-vcall.cpp]Z3foovE2FA", [3 x i8*]* @_ZTVZ3foovE2FA, i64 16}
+// ITANIUM-DAG: !{!"_ZTS1A", [3 x i8*]* @_ZTV1A, i64 16}
+// ITANIUM-DAG: !{!"_ZTS1A", [7 x i8*]* @_ZTCN12_GLOBAL__N_11DE0_1B, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1B", [7 x i8*]* @_ZTCN12_GLOBAL__N_11DE0_1B, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1A", [9 x i8*]* @_ZTCN12_GLOBAL__N_11DE8_1C, i64 64}
+// ITANIUM-DAG: !{!"_ZTS1C", [9 x i8*]* @_ZTCN12_GLOBAL__N_11DE8_1C, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1A", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1B", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1C", [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 88}
+// ITANIUM-DAG: !{![[DTYPE]], [12 x i8*]* @_ZTVN12_GLOBAL__N_11DE, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1A", [7 x i8*]* @_ZTV1B, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1B", [7 x i8*]* @_ZTV1B, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1A", [5 x i8*]* @_ZTV1C, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1C", [5 x i8*]* @_ZTV1C, i64 32}
+// ITANIUM-DAG: !{!"_ZTS1A", [3 x i8*]* @_ZTVZ3foovE2FA, i64 16}
+// ITANIUM-DAG: !{!{{[0-9]+}}, [3 x i8*]* @_ZTVZ3foovE2FA, i64 16}
 
-// MS-DAG: !{!"A@@", [2 x i8*]* @[[VTA]], i64 8}
-// MS-DAG: !{!"B@@", [3 x i8*]* @[[VTB]], i64 8}
-// MS-DAG: !{!"A@@", [2 x i8*]* @[[VTAinB]], i64 8}
-// MS-DAG: !{!"A@@", [2 x i8*]* @[[VTAinC]], i64 8}
-// MS-DAG: !{!"B@@", [3 x i8*]* @[[VTBinD]], i64 8}
-// MS-DAG: !{!"[{{.*}}cfi-vcall.cpp]D@?A@@", [3 x i8*]* @[[VTBinD]], i64 8}
-// MS-DAG: !{!"A@@", [2 x i8*]* @[[VTAinBinD]], i64 8}
-// MS-DAG: !{!"A@@", [2 x i8*]* @[[VTFA]], i64 8}
-// MS-DAG: !{!"[{{.*}}cfi-vcall.cpp]FA@?1??foo@@YAXXZ@", [2 x i8*]* @[[VTFA]], i64 8}
+// MS-DAG: !{!"?AUA@@", [2 x i8*]* @[[VTA]], i64 8}
+// MS-DAG: !{!"?AUB@@", [3 x i8*]* @[[VTB]], i64 8}
+// MS-DAG: !{!"?AUA@@", [2 x i8*]* @[[VTAinB]], i64 8}
+// MS-DAG: !{!"?AUA@@", [2 x i8*]* @[[VTAinC]], i64 8}
+// MS-DAG: !{!"?AUB@@", [3 x i8*]* @[[VTBinD]], i64 8}
+// MS-DAG: !{![[DTYPE]], [3 x i8*]* @[[VTBinD]], i64 8}
+// MS-DAG: !{!"?AUA@@", [2 x i8*]* @[[VTAinBinD]], i64 8}
+// MS-DAG: !{!"?AUA@@", [2 x i8*]* @[[VTFA]], i64 8}
+// MS-DAG: !{!{{[0-9]+}}, [2 x i8*]* @[[VTFA]], i64 8}
diff --git a/test/CodeGenCXX/const-init-cxx11.cpp b/test/CodeGenCXX/const-init-cxx11.cpp
index 5127c302aa0d..99be265e2126 100644
--- a/test/CodeGenCXX/const-init-cxx11.cpp
+++ b/test/CodeGenCXX/const-init-cxx11.cpp
@@ -350,6 +350,7 @@ namespace VirtualMembers {
     virtual void f();
   };
   // CHECK: @_ZN14VirtualMembersL13sGlobalMemoryE = internal global { i8** } { i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTVN14VirtualMembers12nsMemoryImplE, i64 0, i64 2) }
+  __attribute__((used))
   static nsMemoryImpl sGlobalMemory;
 
   template<class T>
diff --git a/test/CodeGenCXX/constructor-alias.cpp b/test/CodeGenCXX/constructor-alias.cpp
index 18a47775012d..8359bb90a051 100644
--- a/test/CodeGenCXX/constructor-alias.cpp
+++ b/test/CodeGenCXX/constructor-alias.cpp
@@ -9,4 +9,4 @@ struct B {
 B::B() {
 }
 
-// CHECK: @_ZN1BC1Ev = alias void (%struct.B*)* @_ZN1BC2Ev
+// CHECK: @_ZN1BC1Ev = alias void (%struct.B*), void (%struct.B*)* @_ZN1BC2Ev
diff --git a/test/CodeGenCXX/constructor-destructor-return-this.cpp b/test/CodeGenCXX/constructor-destructor-return-this.cpp
index 893e3a079ff2..9d47d1706bbe 100644
--- a/test/CodeGenCXX/constructor-destructor-return-this.cpp
+++ b/test/CodeGenCXX/constructor-destructor-return-this.cpp
@@ -1,6 +1,8 @@
 //RUN: %clang_cc1 %s -emit-llvm -o - -triple=i686-unknown-linux | FileCheck --check-prefix=CHECKGEN %s
 //RUN: %clang_cc1 %s -emit-llvm -o - -triple=thumbv7-apple-ios6.0 -target-abi apcs-gnu | FileCheck --check-prefix=CHECKARM %s
 //RUN: %clang_cc1 %s -emit-llvm -o - -triple=thumbv7-apple-ios5.0 -target-abi apcs-gnu | FileCheck --check-prefix=CHECKIOS5 %s
+//RUN: %clang_cc1 %s -emit-llvm -o - -triple=wasm32-unknown-unknown \
+//RUN:   | FileCheck --check-prefix=CHECKARM %s
 //RUN: %clang_cc1 %s -emit-llvm -o - -triple=i386-pc-win32 -fno-rtti | FileCheck --check-prefix=CHECKMS %s
 // FIXME: these tests crash on the bots when run with -triple=x86_64-pc-win32
 
diff --git a/test/CodeGenCXX/cp-blocks-linetables.cpp b/test/CodeGenCXX/cp-blocks-linetables.cpp
index d5dd46cbe0d0..46ab16e95986 100644
--- a/test/CodeGenCXX/cp-blocks-linetables.cpp
+++ b/test/CodeGenCXX/cp-blocks-linetables.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 // Ensure that we generate a line table entry for the block cleanup.
 // CHECK: define {{.*}} @__main_block_invoke
 // CHECK: _NSConcreteStackBlock
diff --git a/test/CodeGenCXX/crash.cpp b/test/CodeGenCXX/crash.cpp
index 2785d8d74bc8..d12c021a6dcc 100644
--- a/test/CodeGenCXX/crash.cpp
+++ b/test/CodeGenCXX/crash.cpp
@@ -1,6 +1,5 @@
-// XFAIL: hexagon
 // RUN: %clang_cc1 %s -std=c++11 -emit-llvm-only
-// RUN: %clang_cc1 -emit-obj -o %t -gline-tables-only -std=c++11 %s
+// RUN: %clang_cc1 -emit-obj -o %t -debug-info-kind=line-tables-only -std=c++11 %s
 // CHECK that we don't crash.
 
 // PR11676's example is ill-formed:
diff --git a/test/CodeGenCXX/ctor-dtor-alias.cpp b/test/CodeGenCXX/ctor-dtor-alias.cpp
index a7bafb830d1a..018e958aab53 100644
--- a/test/CodeGenCXX/ctor-dtor-alias.cpp
+++ b/test/CodeGenCXX/ctor-dtor-alias.cpp
@@ -15,7 +15,7 @@ namespace test1 {
 // weak_odr constructors and destructors.
 
 // CHECK1: @_ZN5test16foobarIvEC1Ev = weak_odr alias void {{.*}} @_ZN5test16foobarIvEC2Ev
-// CHECK1: @_ZN5test16foobarIvED1Ev = weak_odr alias void (%"struct.test1::foobar"*)* @_ZN5test16foobarIvED2Ev
+// CHECK1: @_ZN5test16foobarIvED1Ev = weak_odr alias void (%"struct.test1::foobar"*), void (%"struct.test1::foobar"*)* @_ZN5test16foobarIvED2Ev
 // CHECK1: define weak_odr void @_ZN5test16foobarIvEC2Ev({{.*}} comdat($_ZN5test16foobarIvEC5Ev)
 // CHECK1: define weak_odr void @_ZN5test16foobarIvED2Ev({{.*}} comdat($_ZN5test16foobarIvED5Ev)
 // CHECK1: define weak_odr void @_ZN5test16foobarIvED0Ev({{.*}} comdat($_ZN5test16foobarIvED5Ev)
diff --git a/test/CodeGenCXX/ctor-globalopt.cpp b/test/CodeGenCXX/ctor-globalopt.cpp
index bcab60916aec..0951278d3178 100644
--- a/test/CodeGenCXX/ctor-globalopt.cpp
+++ b/test/CodeGenCXX/ctor-globalopt.cpp
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s -O1 | FileCheck %s --check-prefix=O1
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -o - %s -O2 | opt - -S -globalopt -o - | FileCheck %s --check-prefix=O1
 // RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -o - %s -O1 | FileCheck %s --check-prefix=O1
+// RUN: %clang_cc1 -triple %ms_abi_triple -emit-llvm -o - %s -O2 | opt - -S -globalopt -o - | FileCheck %s --check-prefix=O1
 
 // Check that GlobalOpt can eliminate static constructors for simple implicit
-// constructors. This is a targetted integration test to make sure that LLVM's
+// constructors. This is a targeted integration test to make sure that LLVM's
 // optimizers are able to process Clang's IR. GlobalOpt in particular is
 // sensitive to the casts we emit.
 
diff --git a/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
index 6d5d3971bd74..311edaabb5db 100644
--- a/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/test/CodeGenCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -502,7 +502,7 @@ namespace B19773010 {
   }
   void f2() {
     // CHECK-LABEL: @_ZN9B197730102f2Ev
-    // CHECK: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* @_ZZN9B197730102f2EvE1p, i64 0, i64 1, i32 0), align 8
+    // CHECK: store %"struct.B19773010::pair"* getelementptr inbounds ([1 x %"struct.B19773010::pair"], [1 x %"struct.B19773010::pair"]* bitcast ([1 x { i8*, i32 }]* @_ZGRZN9B197730102f2EvE1p_ to [1 x %"struct.B19773010::pair"]*), i64 0, i64 0), %"struct.B19773010::pair"** getelementptr inbounds ([2 x %"class.std::initializer_list.10"], [2 x %"class.std::initializer_list.10"]* @_ZZN9B197730102f2EvE1p, i64 0, i64 1, i32 0), align 16
     static std::initializer_list<pair<const char *, E>> a, p[2] =
         {a, {{"", ENUM_CONSTANT}}};
   }
diff --git a/test/CodeGenCXX/cxx11-initializer-array-new.cpp b/test/CodeGenCXX/cxx11-initializer-array-new.cpp
index 2beb44ecf3b2..c662190ff386 100644
--- a/test/CodeGenCXX/cxx11-initializer-array-new.cpp
+++ b/test/CodeGenCXX/cxx11-initializer-array-new.cpp
@@ -28,7 +28,7 @@ void *p = new S[2][3]{ { 1, 2, 3 }, { 4, 5, 6 } };
 //
 // { 4, 5, 6 }
 //
-// CHECK: %[[S_1:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_0]], i32 1
+// CHECK: %[[S_1:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_0]], i64 1
 //
 // CHECK: %[[S_1_0:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_1]], i64 0, i64 0
 // CHECK: call void @_ZN1SC1Ei(%[[S]]* %[[S_1_0]], i32 4)
@@ -72,7 +72,7 @@ void *q = new S[n][3]{ { 1, 2, 3 }, { 4, 5, 6 } };
 //
 // { 4, 5, 6 }
 //
-// CHECK: %[[S_1:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_0]], i32 1
+// CHECK: %[[S_1:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_0]], i64 1
 //
 // CHECK: %[[S_1_0:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_1]], i64 0, i64 0
 // CHECK: call void @_ZN1SC1Ei(%[[S]]* %[[S_1_0]], i32 4)
@@ -83,7 +83,7 @@ void *q = new S[n][3]{ { 1, 2, 3 }, { 4, 5, 6 } };
 //
 // And the rest.
 //
-// CHECK: %[[S_2:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_1]], i32 1
+// CHECK: %[[S_2:.*]] = getelementptr inbounds [3 x %[[S]]], [3 x %[[S]]]* %[[S_1]], i64 1
 // CHECK: %[[S_2_AS_S:.*]] = bitcast [3 x %[[S]]]* %[[S_2]] to %[[S]]*
 //
 // CHECK: %[[REST:.*]] = sub i64 %[[ELTS]], 6
@@ -135,7 +135,7 @@ void *r = new T[n][3]{ { 1, 2, 3 }, { 4, 5, 6 } };
 //
 // { 4, 5, 6 }
 //
-// CHECK: %[[T_1:.*]] = getelementptr inbounds [3 x %[[T]]], [3 x %[[T]]]* %[[T_0]], i32 1
+// CHECK: %[[T_1:.*]] = getelementptr inbounds [3 x %[[T]]], [3 x %[[T]]]* %[[T_0]], i64 1
 //
 // CHECK: %[[T_1_0:.*]] = getelementptr inbounds [3 x %[[T]]], [3 x %[[T]]]* %[[T_1]], i64 0, i64 0
 // CHECK: %[[T_1_0_0:.*]] = getelementptr inbounds %[[T]], %[[T]]* %[[T_1_0]], i32 0, i32 0
@@ -149,7 +149,7 @@ void *r = new T[n][3]{ { 1, 2, 3 }, { 4, 5, 6 } };
 //
 // And the rest gets memset to 0.
 //
-// CHECK: %[[T_2:.*]] = getelementptr inbounds [3 x %[[T]]], [3 x %[[T]]]* %[[T_1]], i32 1
+// CHECK: %[[T_2:.*]] = getelementptr inbounds [3 x %[[T]]], [3 x %[[T]]]* %[[T_1]], i64 1
 // CHECK: %[[T_2_AS_T:.*]] = bitcast [3 x %[[T]]]* %[[T_2]] to %[[T]]*
 //
 // CHECK: %[[SIZE:.*]] = sub i64 %{{.*}}, 24
diff --git a/test/CodeGenCXX/cxx11-thread-local-reference.cpp b/test/CodeGenCXX/cxx11-thread-local-reference.cpp
index c3e165a41625..8b2ac5eed800 100644
--- a/test/CodeGenCXX/cxx11-thread-local-reference.cpp
+++ b/test/CodeGenCXX/cxx11-thread-local-reference.cpp
@@ -1,11 +1,14 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=LINUX %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-apple-darwin12 | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s
 
 int &f();
 
-// CHECK: @r = thread_local global i32* null
+// LINUX: @r = thread_local global i32* null
+// DARWIN: @r = internal thread_local global i32* null
 thread_local int &r = f();
 
-// CHECK: @_ZTH1r = alias void ()* @__tls_init
+// LINUX: @_ZTH1r = alias void (), void ()* @__tls_init
+// DARWIN: @_ZTH1r = internal alias void (), void ()* @__tls_init
 
 int &g() { return r; }
 
@@ -14,13 +17,17 @@ int &g() { return r; }
 // CHECK: store i32* %{{.*}}, i32** @r, align 8
 
 // CHECK-LABEL: define dereferenceable({{[0-9]+}}) i32* @_Z1gv()
-// CHECK: call i32* @_ZTW1r()
+// LINUX: call i32* @_ZTW1r()
+// DARWIN: call cxx_fast_tlscc i32* @_ZTW1r()
 // CHECK: ret i32* %{{.*}}
 
-// CHECK: define weak_odr hidden i32* @_ZTW1r() {
+// LINUX: define weak_odr hidden i32* @_ZTW1r() {
+// DARWIN: define cxx_fast_tlscc i32* @_ZTW1r() [[ATTR:#[0-9]+]] {
 // CHECK: call void @_ZTH1r()
 // CHECK: load i32*, i32** @r, align 8
 // CHECK: ret i32* %{{.*}}
 
 // CHECK-LABEL: define internal void @__tls_init()
 // CHECK: call void @[[R_INIT]]()
+
+// DARWIN: attributes [[ATTR]] = { nounwind }
diff --git a/test/CodeGenCXX/cxx11-thread-local.cpp b/test/CodeGenCXX/cxx11-thread-local.cpp
index 9b16319088c0..b5bcc5e23ecd 100644
--- a/test/CodeGenCXX/cxx11-thread-local.cpp
+++ b/test/CodeGenCXX/cxx11-thread-local.cpp
@@ -1,9 +1,13 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck --check-prefix=CHECK --check-prefix=LINUX %s
+// RUN: %clang_cc1 -std=c++11 -femulated-tls -emit-llvm %s -o - \
+// RUN:     -triple x86_64-linux-gnu 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=LINUX %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple x86_64-apple-darwin12 | FileCheck --check-prefix=CHECK --check-prefix=DARWIN %s
 
 int f();
 int g();
 
-// CHECK: @a = thread_local global i32 0
+// LINUX: @a = thread_local global i32 0
+// DARWIN: @a = internal thread_local global i32 0
 thread_local int a = f();
 extern thread_local int b;
 // CHECK: @c = global i32 0
@@ -12,9 +16,22 @@ int c = b;
 static thread_local int d = g();
 
 struct U { static thread_local int m; };
-// CHECK: @_ZN1U1mE = thread_local global i32 0
+// LINUX: @_ZN1U1mE = thread_local global i32 0
+// DARWIN: @_ZN1U1mE = internal thread_local global i32 0
 thread_local int U::m = f();
 
+namespace MismatchedInitType {
+  // Check that we don't crash here when we're forced to create a new global
+  // variable (with a different type) when we add the initializer.
+  union U {
+    int a;
+    float f;
+    constexpr U() : f(0.0) {}
+  };
+  static thread_local U u;
+  void *p = &u;
+}
+
 template<typename T> struct V { static thread_local int m; };
 template<typename T> thread_local int V<T>::m = g();
 
@@ -43,10 +60,12 @@ int e = V<int>::m;
 
 // CHECK: @llvm.global_ctors = appending global {{.*}} @[[GLOBAL_INIT:[^ ]*]]
 
-// CHECK: @_ZTH1a = alias void ()* @__tls_init
-// CHECK: @_ZTHL1d = internal alias void ()* @__tls_init
-// CHECK: @_ZTHN1U1mE = alias void ()* @__tls_init
-// CHECK: @_ZTHN1VIiE1mE = linkonce_odr alias void ()* @__tls_init
+// LINUX: @_ZTH1a = alias void (), void ()* @__tls_init
+// DARWIN: @_ZTH1a = internal alias void (), void ()* @__tls_init
+// CHECK: @_ZTHL1d = internal alias void (), void ()* @__tls_init
+// LINUX: @_ZTHN1U1mE = alias void (), void ()* @__tls_init
+// DARWIN: @_ZTHN1U1mE = internal alias void (), void ()* @__tls_init
+// CHECK: @_ZTHN1VIiE1mE = linkonce_odr alias void (), void ()* @__tls_init
 
 
 // Individual variable initialization functions:
@@ -72,17 +91,20 @@ int f() {
 }
 
 // CHECK: define {{.*}} @[[C_INIT:.*]]()
-// CHECK: call i32* @_ZTW1b()
+// LINUX: call i32* @_ZTW1b()
+// DARWIN: call cxx_fast_tlscc i32* @_ZTW1b()
 // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
 // CHECK-NEXT: store i32 %{{.*}}, i32* @c, align 4
 
-// CHECK-LABEL: define weak_odr hidden i32* @_ZTW1b()
-// CHECK: br i1 icmp ne (void ()* @_ZTH1b, void ()* null),
+// LINUX-LABEL: define weak_odr hidden i32* @_ZTW1b()
+// LINUX: br i1 icmp ne (void ()* @_ZTH1b, void ()* null),
 // not null:
-// CHECK: call void @_ZTH1b()
-// CHECK: br label
+// LINUX: call void @_ZTH1b()
+// LINUX: br label
 // finally:
-// CHECK: ret i32* @b
+// LINUX: ret i32* @b
+// DARWIN-LABEL: declare cxx_fast_tlscc i32* @_ZTW1b()
+// There is no definition of the thread wrapper on Darwin for external TLV.
 
 // CHECK: define {{.*}} @[[D_INIT:.*]]()
 // CHECK: call i32 @_Z1gv()
@@ -93,11 +115,13 @@ int f() {
 // CHECK-NEXT: store i32 %{{.*}}, i32* @_ZN1U1mE, align 4
 
 // CHECK: define {{.*}} @[[E_INIT:.*]]()
-// CHECK: call i32* @_ZTWN1VIiE1mE()
+// LINUX: call i32* @_ZTWN1VIiE1mE()
+// DARWIN: call cxx_fast_tlscc i32* @_ZTWN1VIiE1mE()
 // CHECK-NEXT: load i32, i32* %{{.*}}, align 4
 // CHECK-NEXT: store i32 %{{.*}}, i32* @e, align 4
 
-// CHECK-LABEL: define weak_odr hidden i32* @_ZTWN1VIiE1mE()
+// LINUX-LABEL: define weak_odr hidden i32* @_ZTWN1VIiE1mE()
+// DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc i32* @_ZTWN1VIiE1mE()
 // CHECK: call void @_ZTHN1VIiE1mE()
 // CHECK: ret i32* @_ZN1VIiE1mE
 
@@ -109,24 +133,28 @@ struct T { ~T(); };
 void tls_dtor() {
   // CHECK: load i8, i8* @_ZGVZ8tls_dtorvE1s
   // CHECK: call void @_ZN1SC1Ev(%struct.S* @_ZZ8tls_dtorvE1s)
-  // CHECK: call i32 @__cxa_thread_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZZ8tls_dtorvE1s{{.*}} @__dso_handle
+  // LINUX: call i32 @__cxa_thread_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZZ8tls_dtorvE1s{{.*}} @__dso_handle
+  // DARWIN: call i32 @_tlv_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZZ8tls_dtorvE1s{{.*}} @__dso_handle
   // CHECK: store i8 1, i8* @_ZGVZ8tls_dtorvE1s
   static thread_local S s;
 
   // CHECK: load i8, i8* @_ZGVZ8tls_dtorvE1t
   // CHECK-NOT: _ZN1T
-  // CHECK: call i32 @__cxa_thread_atexit({{.*}}@_ZN1TD1Ev {{.*}}@_ZZ8tls_dtorvE1t{{.*}} @__dso_handle
+  // LINUX: call i32 @__cxa_thread_atexit({{.*}}@_ZN1TD1Ev {{.*}}@_ZZ8tls_dtorvE1t{{.*}} @__dso_handle
+  // DARWIN: call i32 @_tlv_atexit({{.*}}@_ZN1TD1Ev {{.*}}@_ZZ8tls_dtorvE1t{{.*}} @__dso_handle
   // CHECK: store i8 1, i8* @_ZGVZ8tls_dtorvE1t
   static thread_local T t;
 
   // CHECK: load i8, i8* @_ZGVZ8tls_dtorvE1u
   // CHECK: call void @_ZN1SC1Ev(%struct.S* @_ZGRZ8tls_dtorvE1u_)
-  // CHECK: call i32 @__cxa_thread_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZGRZ8tls_dtorvE1u_{{.*}} @__dso_handle
+  // LINUX: call i32 @__cxa_thread_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZGRZ8tls_dtorvE1u_{{.*}} @__dso_handle
+  // DARWIN: call i32 @_tlv_atexit({{.*}}@_ZN1SD1Ev {{.*}} @_ZGRZ8tls_dtorvE1u_{{.*}} @__dso_handle
   // CHECK: store i8 1, i8* @_ZGVZ8tls_dtorvE1u
   static thread_local const S &u = S();
 }
 
-// CHECK: declare i32 @__cxa_thread_atexit(void (i8*)*, i8*, i8*)
+// LINUX: declare i32 @__cxa_thread_atexit(void (i8*)*, i8*, i8*)
+// DARWIN: declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
 
 // CHECK: define {{.*}} @_Z7PR15991v(
 int PR15991() {
@@ -141,7 +169,8 @@ struct PR19254 {
 };
 // CHECK: define {{.*}} @_ZN7PR192541fEv(
 int PR19254::f() {
-  // CHECK: call void @_ZTHN7PR192541nE(
+  // LINUX: call void @_ZTHN7PR192541nE(
+  // DARWIN: call cxx_fast_tlscc i32* @_ZTWN7PR192541nE(
   return this->n;
 }
 
@@ -151,7 +180,8 @@ thread_local int anon_i{1};
 void set_anon_i() {
   anon_i = 2;
 }
-// CHECK-LABEL: define internal i32* @_ZTWN12_GLOBAL__N_16anon_iE()
+// LINUX-LABEL: define internal i32* @_ZTWN12_GLOBAL__N_16anon_iE()
+// DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWN12_GLOBAL__N_16anon_iE()
 
 // CHECK: define {{.*}} @[[V_M_INIT:.*]]()
 // CHECK: load i8, i8* bitcast (i64* @_ZGVN1VIiE1mE to i8*)
@@ -171,28 +201,31 @@ void set_anon_i() {
 // CHECK: define {{.*}}@__tls_init()
 // CHECK: load i8, i8* @__tls_guard
 // CHECK: %[[NEED_TLS_INIT:.*]] = icmp eq i8 %{{.*}}, 0
-// CHECK: store i8 1, i8* @__tls_guard
 // CHECK: br i1 %[[NEED_TLS_INIT]],
 // init:
+// CHECK: store i8 1, i8* @__tls_guard
 // CHECK: call void @[[A_INIT]]()
 // CHECK: call void @[[D_INIT]]()
 // CHECK: call void @[[U_M_INIT]]()
 // CHECK: call void @[[V_M_INIT]]()
 
 
-// CHECK: define weak_odr hidden i32* @_ZTW1a() {
+// LIUNX: define weak_odr hidden i32* @_ZTW1a() {
+// DARWIN: define cxx_fast_tlscc i32* @_ZTW1a()
 // CHECK:   call void @_ZTH1a()
 // CHECK:   ret i32* @a
 // CHECK: }
 
 
-// CHECK: declare extern_weak void @_ZTH1b()
+// LINUX: declare extern_weak void @_ZTH1b()
 
 
-// CHECK-LABEL: define internal i32* @_ZTWL1d()
+// LINUX-LABEL: define internal i32* @_ZTWL1d()
+// DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWL1d()
 // CHECK: call void @_ZTHL1d()
 // CHECK: ret i32* @_ZL1d
 
-// CHECK-LABEL: define weak_odr hidden i32* @_ZTWN1U1mE()
+// LINUX-LABEL: define weak_odr hidden i32* @_ZTWN1U1mE()
+// DARWIN-LABEL: define cxx_fast_tlscc i32* @_ZTWN1U1mE()
 // CHECK: call void @_ZTHN1U1mE()
 // CHECK: ret i32* @_ZN1U1mE
diff --git a/test/CodeGenCXX/debug-info-access.cpp b/test/CodeGenCXX/debug-info-access.cpp
index 86237b3bc328..1699bab96104 100644
--- a/test/CodeGenCXX/debug-info-access.cpp
+++ b/test/CodeGenCXX/debug-info-access.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple %itanium_abi_triple %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple %itanium_abi_triple %s -o - | FileCheck %s
 // Test the various accessibility flags in the debug info.
 struct A {
   // CHECK-DAG: !DISubprogram(name: "pub_default",{{.*}} line: [[@LINE+1]],{{.*}} flags: DIFlagPrototyped,
diff --git a/test/CodeGenCXX/debug-info-anon-namespace.cpp b/test/CodeGenCXX/debug-info-anon-namespace.cpp
new file mode 100644
index 000000000000..4e3e08af2b74
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-anon-namespace.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-scei-ps4 -O0 %s -o - | FileCheck --check-prefix=PS4 %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-unknown-linux-gnu -O0 %s -o - | FileCheck --check-prefix=NON-PS4 %s
+
+namespace
+{
+  int a = 5;
+}
+int *b = &a;
+
+namespace
+{
+  namespace {
+    int a1 = 5;
+  }
+  int a2 = 7;
+}
+int *b1 = &a1;
+int *b2 = &a2;
+
+
+// PS4:  [[NS:![0-9]+]] = !DINamespace
+// PS4:  [[NS2:![0-9]+]] = !DINamespace
+// PS4: !DIImportedEntity(tag: DW_TAG_imported_module, scope: !0, entity: [[NS]])
+// PS4: !DIImportedEntity(tag: DW_TAG_imported_module, scope: [[NS]], entity: [[NS2]], line: {{[0-9]+}})
+// NON-PS4-NOT: !DIImportedEntity
+
diff --git a/test/CodeGenCXX/debug-info-anon-union-vars.cpp b/test/CodeGenCXX/debug-info-anon-union-vars.cpp
index ad3b6d4c6234..5b0370eb749d 100644
--- a/test/CodeGenCXX/debug-info-anon-union-vars.cpp
+++ b/test/CodeGenCXX/debug-info-anon-union-vars.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -gdwarf-4 -triple x86_64-linux-gnu %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-linux-gnu %s -o - | FileCheck %s
 
 // Make sure that we emit a global variable for each of the members of the
 // anonymous union.
@@ -29,13 +29,30 @@ void foo() {
   i = 8;
 }
 
+// A funky reinterpret cast idiom that we used to crash on.
+template <class T>
+unsigned char *buildBytes(const T v) {
+  static union {
+    unsigned char result[sizeof(T)];
+    T value;
+  };
+  value = v;
+  return result;
+}
+
+void instantiate(int x) {
+  buildBytes(x);
+}
+
 // CHECK: [[FILE:.*]] = !DIFile(filename: "{{.*}}debug-info-anon-union-vars.cpp",
 // CHECK: !DIGlobalVariable(name: "c",{{.*}} file: [[FILE]], line: 6,{{.*}} isLocal: true, isDefinition: true
 // CHECK: !DIGlobalVariable(name: "d",{{.*}} file: [[FILE]], line: 6,{{.*}} isLocal: true, isDefinition: true
 // CHECK: !DIGlobalVariable(name: "a",{{.*}} file: [[FILE]], line: 6,{{.*}} isLocal: true, isDefinition: true
 // CHECK: !DIGlobalVariable(name: "b",{{.*}} file: [[FILE]], line: 6,{{.*}} isLocal: true, isDefinition: true
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "i", {{.*}}, flags: DIFlagArtificial
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "c", {{.*}}, flags: DIFlagArtificial
+// CHECK: !DIGlobalVariable(name: "result", {{.*}} isLocal: false, isDefinition: true
+// CHECK: !DIGlobalVariable(name: "value", {{.*}} isLocal: false, isDefinition: true
+// CHECK: !DILocalVariable(name: "i", {{.*}}, flags: DIFlagArtificial
+// CHECK: !DILocalVariable(name: "c", {{.*}}, flags: DIFlagArtificial
 // CHECK: !DILocalVariable(
 // CHECK-NOT: name:
 // CHECK: type: ![[UNION:[0-9]+]]
diff --git a/test/CodeGenCXX/debug-info-artificial-arg.cpp b/test/CodeGenCXX/debug-info-artificial-arg.cpp
index dc3ac8a45406..c840df672aa0 100644
--- a/test/CodeGenCXX/debug-info-artificial-arg.cpp
+++ b/test/CodeGenCXX/debug-info-artificial-arg.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 template<class X> class B {
 public:
diff --git a/test/CodeGenCXX/debug-info-blocks.cpp b/test/CodeGenCXX/debug-info-blocks.cpp
index 7762726dc77f..ed0d659eeae1 100644
--- a/test/CodeGenCXX/debug-info-blocks.cpp
+++ b/test/CodeGenCXX/debug-info-blocks.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -gline-tables-only -fblocks -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -debug-info-kind=line-tables-only -fblocks -S -emit-llvm -o - | FileCheck %s
 
 struct A {
   A();
diff --git a/test/CodeGenCXX/debug-info-char16.cpp b/test/CodeGenCXX/debug-info-char16.cpp
index 912da6f86e5c..83ffea6f791a 100644
--- a/test/CodeGenCXX/debug-info-char16.cpp
+++ b/test/CodeGenCXX/debug-info-char16.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -std=c++11 -g %s -o -| FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -std=c++11 -debug-info-kind=limited %s -o -| FileCheck %s
 
 // 16 is DW_ATE_UTF (0x10) encoding attribute.
 char16_t char_a = u'h';
diff --git a/test/CodeGenCXX/debug-info-class-nolimit.cpp b/test/CodeGenCXX/debug-info-class-nolimit.cpp
index 11d1792e6f33..ce47f9fa495a 100644
--- a/test/CodeGenCXX/debug-info-class-nolimit.cpp
+++ b/test/CodeGenCXX/debug-info-class-nolimit.cpp
@@ -1,6 +1,7 @@
-// RUN: %clang_cc1 -triple x86_64-unk-unk -fstandalone-debug -o - -emit-llvm -g %s | FileCheck %s
-// On Darwin, this should be the default:
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unk-unk -debug-info-kind=standalone -o - -emit-llvm %s | FileCheck %s
+// On Darwin, "full" debug info is the default, so really these tests are
+// identical, as cc1 no longer chooses the effective value of DebugInfoKind.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -debug-info-kind=standalone -o - -emit-llvm %s | FileCheck %s
 
 namespace rdar14101097_1 { // see also PR16214
 // Check that we emit debug info for the definition of a struct if the
@@ -33,4 +34,3 @@ void bar() {
 struct foo {
 };
 }
-
diff --git a/test/CodeGenCXX/debug-info-codeview-display-name.cpp b/test/CodeGenCXX/debug-info-codeview-display-name.cpp
new file mode 100644
index 000000000000..1d0300c76c01
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-codeview-display-name.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -gcodeview -emit-llvm %s -o - -triple=x86_64-pc-win32 -std=c++98 | \
+// RUN:  grep 'DISubprogram' | sed -e 's/.*name: "\([^"]*\)".*/"\1"/' | FileCheck %s
+
+void freefunc() { }
+// CHECK-DAG: "freefunc"
+
+namespace N {
+  int b() { return 0; }
+// CHECK-DAG: "N::b"
+  namespace { void func() { } }
+// CHECK-DAG: "N::`anonymous namespace'::func
+}
+
+void _c(void) {
+  N::func();
+}
+// CHECK-DAG: "_c"
+
+struct foo {
+  int operator+(int);
+  foo(){}
+// CHECK-DAG: "foo::foo"
+
+  ~foo(){}
+// CHECK-DAG: "foo::~foo"
+
+  foo(int i){}
+// CHECK-DAG: "foo::foo"
+
+  foo(char *q){}
+// CHECK-DAG: "foo::foo"
+
+  static foo* static_method() { return 0; }
+// CHECK-DAG: "foo::static_method"
+
+};
+
+void use_foo() {
+  foo f1, f2(1), f3((char*)0);
+  foo::static_method();
+}
+
+// CHECK-DAG: "foo::operator+"
+int foo::operator+(int a) { return a; }
+
+// PR17371
+struct OverloadedNewDelete {
+  // __cdecl
+  void *operator new(__SIZE_TYPE__);
+  void *operator new[](__SIZE_TYPE__);
+  void operator delete(void *);
+  void operator delete[](void *);
+  // __thiscall
+  int operator+(int);
+};
+
+void *OverloadedNewDelete::operator new(__SIZE_TYPE__ s) { return 0; }
+void *OverloadedNewDelete::operator new[](__SIZE_TYPE__ s) { return 0; }
+void OverloadedNewDelete::operator delete(void *) { }
+void OverloadedNewDelete::operator delete[](void *) { }
+int OverloadedNewDelete::operator+(int x) { return x; };
+
+// CHECK-DAG: "OverloadedNewDelete::operator new"
+// CHECK-DAG: "OverloadedNewDelete::operator new[]"
+// CHECK-DAG: "OverloadedNewDelete::operator delete"
+// CHECK-DAG: "OverloadedNewDelete::operator delete[]"
+// CHECK-DAG: "OverloadedNewDelete::operator+"
+
+template <void (*)(void)>
+void fn_tmpl() {}
+
+template void fn_tmpl<freefunc>();
+// CHECK-DAG: "fn_tmpl"
diff --git a/test/CodeGenCXX/debug-info-context.cpp b/test/CodeGenCXX/debug-info-context.cpp
index d6d44a158c3b..1f7fa046752b 100644
--- a/test/CodeGenCXX/debug-info-context.cpp
+++ b/test/CodeGenCXX/debug-info-context.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 // PR11345
 
 class locale {
diff --git a/test/CodeGenCXX/debug-info-cxx0x.cpp b/test/CodeGenCXX/debug-info-cxx0x.cpp
index 9d303755be0e..4c31f60c0d96 100644
--- a/test/CodeGenCXX/debug-info-cxx0x.cpp
+++ b/test/CodeGenCXX/debug-info-cxx0x.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm-only -std=c++11 -g %s
+// RUN: %clang_cc1 -emit-llvm-only -std=c++11 -debug-info-kind=limited %s
 
 namespace PR9414 {
   int f() {
diff --git a/test/CodeGenCXX/debug-info-cxx1y.cpp b/test/CodeGenCXX/debug-info-cxx1y.cpp
index 026be3d52232..37f95959911c 100644
--- a/test/CodeGenCXX/debug-info-cxx1y.cpp
+++ b/test/CodeGenCXX/debug-info-cxx1y.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only -std=c++14 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only -std=c++14 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: [[EMPTY:![0-9]*]] = !{}
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "foo",
diff --git a/test/CodeGenCXX/debug-info-decl-nested.cpp b/test/CodeGenCXX/debug-info-decl-nested.cpp
index 2c3524175392..feab1d709094 100644
--- a/test/CodeGenCXX/debug-info-decl-nested.cpp
+++ b/test/CodeGenCXX/debug-info-decl-nested.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -g -emit-llvm -g -triple x86_64-apple-darwin %s -o %t
+// RUN: %clang_cc1 -std=c++11 -debug-info-kind=standalone -emit-llvm -triple x86_64-apple-darwin %s -o %t
 // RUN: cat %t | FileCheck %s -check-prefix=CHECK0
 // RUN: cat %t | FileCheck %s -check-prefix=CHECK1
 // RUN: cat %t | FileCheck %s -check-prefix=CHECK2
diff --git a/test/CodeGenCXX/debug-info-determinism.cpp b/test/CodeGenCXX/debug-info-determinism.cpp
index a96a14e9edcd..ea88b8042a1d 100644
--- a/test/CodeGenCXX/debug-info-determinism.cpp
+++ b/test/CodeGenCXX/debug-info-determinism.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -S -emit-llvm -g -o %t1.ll %s
-// RUN: %clang_cc1 -S -emit-llvm -g -o %t2.ll %s
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited -o %t1.ll %s
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited -o %t2.ll %s
 // RUN: diff %t1.ll %t2.ll
 
 template <int N> struct C {
diff --git a/test/CodeGenCXX/debug-info-dup-fwd-decl.cpp b/test/CodeGenCXX/debug-info-dup-fwd-decl.cpp
index db9d2e9f491d..f7a2cfe7bae6 100644
--- a/test/CodeGenCXX/debug-info-dup-fwd-decl.cpp
+++ b/test/CodeGenCXX/debug-info-dup-fwd-decl.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin -fstandalone-debug %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 class Test
 {
diff --git a/test/CodeGenCXX/debug-info-enum-class.cpp b/test/CodeGenCXX/debug-info-enum-class.cpp
index ded18bffe377..71e6e2b2574e 100644
--- a/test/CodeGenCXX/debug-info-enum-class.cpp
+++ b/test/CodeGenCXX/debug-info-enum-class.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin -std=c++11 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin -std=c++11 %s -o - | FileCheck %s
 
 enum class A { A1=1 };                 // underlying type is int by default
 enum class B: unsigned long { B1=1 };  // underlying type is unsigned long
diff --git a/test/CodeGenCXX/debug-info-enum.cpp b/test/CodeGenCXX/debug-info-enum.cpp
index 613ffef7ddb2..8f54f9d71224 100644
--- a/test/CodeGenCXX/debug-info-enum.cpp
+++ b/test/CodeGenCXX/debug-info-enum.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: !DICompileUnit(
 // CHECK-SAME:           enums: [[ENUMS:![0-9]*]]
diff --git a/test/CodeGenCXX/debug-info-explicit-cast.cpp b/test/CodeGenCXX/debug-info-explicit-cast.cpp
new file mode 100644
index 000000000000..028a7760d8b9
--- /dev/null
+++ b/test/CodeGenCXX/debug-info-explicit-cast.cpp
@@ -0,0 +1,46 @@
+// RUN: %clangxx -c -target %itanium_abi_triple -g %s -emit-llvm -S -o - | FileCheck %s
+// RUN: %clangxx -c -target %ms_abi_triple -g %s -emit-llvm -S -o - | FileCheck %s
+
+struct Foo {
+  int A;
+  Foo() : A(1){};
+};
+
+struct Bar {
+  int B;
+  Bar() : B(2){};
+};
+
+struct Baz {
+  int C;
+  Baz() : C(3){};
+};
+
+struct Qux {
+  int d() { return 4; }
+  Qux() {};
+};
+
+struct Quux {
+  int E;
+  Quux() : E(5){};
+};
+
+typedef int(Qux::*TD)();
+typedef int(Qux::*TD1)();
+int Val = reinterpret_cast<Baz *>(0)->C;
+int main() {
+  Bar *PB = new Bar;
+  TD d = &Qux::d;
+  (void)reinterpret_cast<TD1>(d);
+
+  return reinterpret_cast<Foo *>(PB)->A + reinterpret_cast<Quux *>(0)->E;
+}
+
+// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "Foo",
+// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "Bar",
+// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "Baz",
+// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "Qux",
+// CHECK-DAG: !DICompositeType(tag: DW_TAG_structure_type, name: "Quux",
+// CHECK-DAG: !DIDerivedType(tag: DW_TAG_typedef, name: "TD",
+// CHECK-DAG: !DIDerivedType(tag: DW_TAG_typedef, name: "TD1",
diff --git a/test/CodeGenCXX/debug-info-flex-member.cpp b/test/CodeGenCXX/debug-info-flex-member.cpp
index afc9d250c92b..8dcdaeb935d0 100644
--- a/test/CodeGenCXX/debug-info-flex-member.cpp
+++ b/test/CodeGenCXX/debug-info-flex-member.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 // CHECK: !DISubrange(count: -1)
 
diff --git a/test/CodeGenCXX/debug-info-function-context.cpp b/test/CodeGenCXX/debug-info-function-context.cpp
index 9ae96112b57a..24f9f1bda639 100644
--- a/test/CodeGenCXX/debug-info-function-context.cpp
+++ b/test/CodeGenCXX/debug-info-function-context.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
 
 struct C {
   void member_function();
diff --git a/test/CodeGenCXX/debug-info-fwd-ref.cpp b/test/CodeGenCXX/debug-info-fwd-ref.cpp
index 247d36411cdd..219e7963797d 100644
--- a/test/CodeGenCXX/debug-info-fwd-ref.cpp
+++ b/test/CodeGenCXX/debug-info-fwd-ref.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 struct baz {
     int h;
diff --git a/test/CodeGenCXX/debug-info-gline-tables-only.cpp b/test/CodeGenCXX/debug-info-gline-tables-only.cpp
index b766c73cc22a..d98b27872d62 100644
--- a/test/CodeGenCXX/debug-info-gline-tables-only.cpp
+++ b/test/CodeGenCXX/debug-info-gline-tables-only.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fno-rtti -gline-tables-only -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -fno-rtti -debug-info-kind=line-tables-only -S -emit-llvm -o - | FileCheck %s
 // Checks that clang with "-gline-tables-only" doesn't emit debug info
 // for variables and types.
 
diff --git a/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp b/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp
index a08045ddf34a..4c61cf78cccb 100644
--- a/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp
+++ b/test/CodeGenCXX/debug-info-global-ctor-dtor.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 %s -g -triple %itanium_abi_triple -fno-use-cxa-atexit -S -emit-llvm -o - \
+// RUN: %clang_cc1 %s -debug-info-kind=limited -triple %itanium_abi_triple -fno-use-cxa-atexit -S -emit-llvm -o - \
 // RUN:     | FileCheck %s --check-prefix=CHECK-NOKEXT
-// RUN: %clang_cc1 %s -g -triple %itanium_abi_triple -fno-use-cxa-atexit -fapple-kext -S -emit-llvm -o - \
+// RUN: %clang_cc1 %s -debug-info-kind=limited -triple %itanium_abi_triple -fno-use-cxa-atexit -fapple-kext -S -emit-llvm -o - \
 // RUN:     | FileCheck %s --check-prefix=CHECK-KEXT
 
 class A {
diff --git a/test/CodeGenCXX/debug-info-global.cpp b/test/CodeGenCXX/debug-info-global.cpp
index 8292361eea8f..920db82409bc 100644
--- a/test/CodeGenCXX/debug-info-global.cpp
+++ b/test/CodeGenCXX/debug-info-global.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // Multiple references to the same constant should result in only one entry in
 // the globals list.
diff --git a/test/CodeGenCXX/debug-info-globalinit.cpp b/test/CodeGenCXX/debug-info-globalinit.cpp
index f8c0ebd29b7e..09c7d59bffed 100644
--- a/test/CodeGenCXX/debug-info-globalinit.cpp
+++ b/test/CodeGenCXX/debug-info-globalinit.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -std=c++11 -g | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -std=c++11 -debug-info-kind=limited | FileCheck %s
 
 void crash() {
   volatile char *ptr = 0;
diff --git a/test/CodeGenCXX/debug-info-indirect-field-decl.cpp b/test/CodeGenCXX/debug-info-indirect-field-decl.cpp
index 08f71d4b1e16..19f8d01e1ab6 100644
--- a/test/CodeGenCXX/debug-info-indirect-field-decl.cpp
+++ b/test/CodeGenCXX/debug-info-indirect-field-decl.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 //
 // Test that indirect field decls are handled gracefully.
 // rdar://problem/16348575
diff --git a/test/CodeGenCXX/debug-info-large-constant.cpp b/test/CodeGenCXX/debug-info-large-constant.cpp
index 2daa1894e15d..5a0d4d2b85bb 100644
--- a/test/CodeGenCXX/debug-info-large-constant.cpp
+++ b/test/CodeGenCXX/debug-info-large-constant.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -triple=x86_64-apple-darwin %s -o /dev/null
+// RUN: %clang_cc1 -debug-info-kind=limited -triple=x86_64-apple-darwin %s -o /dev/null
 // PR 8913
 
 typedef __uint128_t word128;
diff --git a/test/CodeGenCXX/debug-info-limited.cpp b/test/CodeGenCXX/debug-info-limited.cpp
index d56e5b670a16..b209e3a850de 100644
--- a/test/CodeGenCXX/debug-info-limited.cpp
+++ b/test/CodeGenCXX/debug-info-limited.cpp
@@ -14,8 +14,7 @@ A *foo (A* x) {
 }
 
 // CHECK: !DICompositeType(tag: DW_TAG_class_type, name: "B"
-// CHECK-NOT:              DIFlagFwdDecl
-// CHECK-SAME:             ){{$}}
+// CHECK-SAME:             flags: DIFlagFwdDecl
 
 class B {
 public:
diff --git a/test/CodeGenCXX/debug-info-line-if.cpp b/test/CodeGenCXX/debug-info-line-if.cpp
index 71097259df63..29806351c94c 100644
--- a/test/CodeGenCXX/debug-info-line-if.cpp
+++ b/test/CodeGenCXX/debug-info-line-if.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -std=c++11 -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -std=c++11 -S -emit-llvm %s -o - | FileCheck %s
 // PR19864
 extern int v[2];
 int a = 0, b = 0;
diff --git a/test/CodeGenCXX/debug-info-line.cpp b/test/CodeGenCXX/debug-info-line.cpp
index 7f8e117315c0..9fb6ba8ac70e 100644
--- a/test/CodeGenCXX/debug-info-line.cpp
+++ b/test/CodeGenCXX/debug-info-line.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -w -gline-tables-only -std=c++11 -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -w -gline-tables-only -std=c++11 -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - -triple i686-linux-gnu | FileCheck %s
+// RUN: %clang_cc1 -w -debug-info-kind=line-tables-only -std=c++11 -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -w -debug-info-kind=line-tables-only -std=c++11 -fexceptions -fcxx-exceptions -S -emit-llvm %s -o - -triple i686-linux-gnu | FileCheck %s
 
 // XFAIL: win32
 
diff --git a/test/CodeGenCXX/debug-info-method-nodebug.cpp b/test/CodeGenCXX/debug-info-method-nodebug.cpp
index 474053a4caa9..0301e2f48947 100644
--- a/test/CodeGenCXX/debug-info-method-nodebug.cpp
+++ b/test/CodeGenCXX/debug-info-method-nodebug.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 class C {
   void present();
diff --git a/test/CodeGenCXX/debug-info-method.cpp b/test/CodeGenCXX/debug-info-method.cpp
index 3ce05bd2d6b3..bdd14e0b735d 100644
--- a/test/CodeGenCXX/debug-info-method.cpp
+++ b/test/CodeGenCXX/debug-info-method.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -std=c++11 -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -std=c++11 -debug-info-kind=limited %s -o - | FileCheck %s
 // CHECK: !DICompositeType(tag: DW_TAG_class_type, name: "A",{{.*}} identifier: "_ZTS1A")
 // CHECK: !DISubprogram(name: "foo", linkageName: "_ZN1A3fooEiS_3$_0"
 // CHECK-SAME:          DIFlagProtected
@@ -8,9 +8,10 @@
 // CHECK: !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: ![[MEMFUNTYPE:[0-9]+]]
 // CHECK: ![[MEMFUNTYPE]] = !DISubroutineType(types: ![[MEMFUNARGS:[0-9]+]])
 // CHECK: ![[MEMFUNARGS]] = {{.*}}, ![[THISTYPE]],
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable
+// CHECK: !DILocalVariable(name: "this", arg: 1
+// CHECK: !DILocalVariable(arg: 2
+// CHECK: !DILocalVariable(arg: 3
+// CHECK: !DILocalVariable(arg: 4
 union {
   int a;
   float b;
diff --git a/test/CodeGenCXX/debug-info-method2.cpp b/test/CodeGenCXX/debug-info-method2.cpp
index a365312b2584..40664366e81f 100644
--- a/test/CodeGenCXX/debug-info-method2.cpp
+++ b/test/CodeGenCXX/debug-info-method2.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fno-standalone-debug -x c++ -g -S -emit-llvm < %s | FileCheck %s
+// RUN: %clang_cc1 -x c++ -debug-info-kind=limited -S -emit-llvm < %s | FileCheck %s
 // rdar://10336845
 // Preserve type qualifiers in -flimit-debug-info mode.
 
diff --git a/test/CodeGenCXX/debug-info-namespace.cpp b/test/CodeGenCXX/debug-info-namespace.cpp
index 8a00d9b4a5eb..4933ae967452 100644
--- a/test/CodeGenCXX/debug-info-namespace.cpp
+++ b/test/CodeGenCXX/debug-info-namespace.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -g -fno-standalone-debug -S -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -g -gline-tables-only    -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-GMLT %s
-// RUN: %clang_cc1 -g -fstandalone-debug    -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-NOLIMIT %s
+// RUN: %clang_cc1 -debug-info-kind=limited -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=line-tables-only -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-GMLT %s
+// RUN: %clang_cc1 -debug-info-kind=standalone -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-NOLIMIT %s
 
 namespace A {
 #line 1 "foo.cpp"
@@ -67,10 +67,10 @@ void B::func_fwd() {}
 // CHECK: [[BAR:![0-9]+]] = !DICompositeType(tag: DW_TAG_structure_type, name: "bar",
 // CHECK-SAME:                               line: 6
 // CHECK-SAME:                               DIFlagFwdDecl
-// CHECK: [[F1:![0-9]+]] = !DISubprogram(name: "f1",{{.*}} line: 4
+// CHECK: [[F1:![0-9]+]] = distinct !DISubprogram(name: "f1",{{.*}} line: 4
 // CHECK-SAME:                           isDefinition: true
-// CHECK: [[FUNC:![0-9]+]] = !DISubprogram(name: "func",{{.*}} isDefinition: true
-// CHECK: [[FUNC_FWD:![0-9]+]] = !DISubprogram(name: "func_fwd",{{.*}} line: 47,{{.*}} isDefinition: true
+// CHECK: [[FUNC:![0-9]+]] = distinct !DISubprogram(name: "func",{{.*}} isDefinition: true
+// CHECK: [[FUNC_FWD:![0-9]+]] = distinct !DISubprogram(name: "func_fwd",{{.*}} line: 47,{{.*}} isDefinition: true
 // CHECK: [[I:![0-9]+]] = !DIGlobalVariable(name: "i",{{.*}} scope: [[NS]],
 // CHECK: [[VAR_FWD:![0-9]+]] = !DIGlobalVariable(name: "var_fwd",{{.*}} scope: [[NS]],
 // CHECK-SAME:                                    line: 44
@@ -110,5 +110,4 @@ void B::func_fwd() {}
 // CHECK-NOLIMIT-NOT:              DIFlagFwdDecl
 // CHECK-NOLIMIT-SAME:             ){{$}}
 
-// REQUIRES: shell-preserves-root
 // REQUIRES: dw2
diff --git a/test/CodeGenCXX/debug-info-nullptr.cpp b/test/CodeGenCXX/debug-info-nullptr.cpp
index 36baacc6dbea..3054ef849bc9 100644
--- a/test/CodeGenCXX/debug-info-nullptr.cpp
+++ b/test/CodeGenCXX/debug-info-nullptr.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -std=c++11 -g %s -o -| FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -std=c++11 -debug-info-kind=limited %s -o -| FileCheck %s
 
 void foo() {
   decltype(nullptr) t = 0;
diff --git a/test/CodeGenCXX/debug-info-ptr-to-member-function.cpp b/test/CodeGenCXX/debug-info-ptr-to-member-function.cpp
index 1b2cb578e60f..cac16b600890 100644
--- a/test/CodeGenCXX/debug-info-ptr-to-member-function.cpp
+++ b/test/CodeGenCXX/debug-info-ptr-to-member-function.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -g -emit-llvm -o - | FileCheck -check-prefix=CHECK -check-prefix=DARWIN-X64 %s
-// RUN: %clang_cc1 %s -triple x86_64-pc-win32     -g -emit-llvm -o - | FileCheck -check-prefix=CHECK -check-prefix=WIN32-X64 %s
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -debug-info-kind=limited -emit-llvm -o - | FileCheck -check-prefix=CHECK -check-prefix=DARWIN-X64 %s
+// RUN: %clang_cc1 %s -triple x86_64-pc-win32     -debug-info-kind=limited -emit-llvm -o - | FileCheck -check-prefix=CHECK -check-prefix=WIN32-X64 %s
 
 struct T {
   int method();
diff --git a/test/CodeGenCXX/debug-info-qualifiers.cpp b/test/CodeGenCXX/debug-info-qualifiers.cpp
index 9458e1f825ef..c48c9b5d9963 100644
--- a/test/CodeGenCXX/debug-info-qualifiers.cpp
+++ b/test/CodeGenCXX/debug-info-qualifiers.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 // Test (r)value and CVR qualifiers on C++11 non-static member functions.
 class A {
 public:
@@ -22,13 +22,13 @@ public:
 void g() {
   A a;
   // The type of pl is "void (A::*)() const &".
-  // CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "pl",
+  // CHECK: !DILocalVariable(name: "pl",
   // CHECK-SAME:             line: [[@LINE+3]]
   // CHECK-SAME:             type: ![[PL:[0-9]+]]
   // CHECK: !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: ![[PLSR]]
   auto pl = &A::l;
 
-  // CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "pr",
+  // CHECK: !DILocalVariable(name: "pr",
   // CHECK-SAME:             line: [[@LINE+3]]
   // CHECK-SAME:             type: ![[PR:[0-9]+]]
   // CHECK: !DIDerivedType(tag: DW_TAG_ptr_to_member_type, baseType: ![[PRSR]]
diff --git a/test/CodeGenCXX/debug-info-rvalue-ref.cpp b/test/CodeGenCXX/debug-info-rvalue-ref.cpp
index 00b5bcc20ad3..edb93ae46cda 100644
--- a/test/CodeGenCXX/debug-info-rvalue-ref.cpp
+++ b/test/CodeGenCXX/debug-info-rvalue-ref.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 extern "C" {
 extern int printf(const char * format, ...);
@@ -8,5 +8,5 @@ void foo (int &&i)
   printf("%d\n", i);
 }
 
-// CHECK: !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: ![[INT:[0-9]+]])
+// CHECK: !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: ![[INT:[0-9]+]], size: 64, align: 64)
 // CHECK: ![[INT]] = !DIBasicType(name: "int"
diff --git a/test/CodeGenCXX/debug-info-scope.cpp b/test/CodeGenCXX/debug-info-scope.cpp
index 478b7895e4ec..e81eccc5168f 100644
--- a/test/CodeGenCXX/debug-info-scope.cpp
+++ b/test/CodeGenCXX/debug-info-scope.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -std=c++11 -emit-llvm %s -o -| FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -std=c++11 -emit-llvm %s -o -| FileCheck %s
 //
 // Two variables with the same name in subsequent if staments need to be in separate scopes.
 //
@@ -9,14 +9,14 @@ int src();
 void f();
 
 void func() {
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+  // CHECK: = !DILocalVariable(name: "i"
   // CHECK-SAME:               scope: [[IF1:![0-9]*]]
   // CHECK-SAME:               line: [[@LINE+2]]
   // CHECK: [[IF1]] = distinct !DILexicalBlock({{.*}}line: [[@LINE+1]])
   if (int i = src())
     f();
 
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+  // CHECK: = !DILocalVariable(name: "i"
   // CHECK-SAME:               scope: [[IF2:![0-9]*]]
   // CHECK-SAME:               line: [[@LINE+2]]
   // CHECK: [[IF2]] = distinct !DILexicalBlock({{.*}}line: [[@LINE+1]])
@@ -25,12 +25,12 @@ void func() {
   } else
     f();
 
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+  // CHECK: = !DILocalVariable(name: "i"
   // CHECK-SAME:               scope: [[FOR:![0-9]*]]
   // CHECK-SAME:               line: [[@LINE+2]]
   // CHECK: [[FOR]] = distinct !DILexicalBlock({{.*}}line: [[@LINE+1]])
   for (int i = 0;
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b"
+  // CHECK: = !DILocalVariable(name: "b"
   // CHECK-SAME:               scope: [[FOR_BODY:![0-9]*]]
   // CHECK-SAME:               line: [[@LINE+6]]
   // CHECK: [[FOR_BODY]] = distinct !DILexicalBlock({{.*}}line: [[@LINE-4]])
@@ -41,7 +41,7 @@ void func() {
        bool b = i != 10; ++i)
     f();
 
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+  // CHECK: = !DILocalVariable(name: "i"
   // CHECK-SAME:               scope: [[FOR:![0-9]*]]
   // CHECK-SAME:               line: [[@LINE+2]]
   // CHECK: [[FOR]] = distinct !DILexicalBlock({{.*}}line: [[@LINE+1]])
@@ -50,7 +50,7 @@ void func() {
     // or using declarations) as direct children, they just waste
     // space/relocations/etc.
     // CHECK: [[FOR_LOOP_INCLUDING_COND:!.*]] = distinct !DILexicalBlock(scope: [[FOR]],{{.*}} line: [[@LINE-4]])
-    // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "b"
+    // CHECK: = !DILocalVariable(name: "b"
     // CHECK-SAME:               scope: [[FOR_COMPOUND:![0-9]*]]
     // CHECK-SAME:               line: [[@LINE+2]]
     // CHECK: [[FOR_COMPOUND]] = distinct !DILexicalBlock(scope: [[FOR_LOOP_INCLUDING_COND]],{{.*}} line: [[@LINE-8]])
@@ -58,13 +58,13 @@ void func() {
   }
 
   int x[] = {1, 2};
-  // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "__range"
+  // CHECK: = !DILocalVariable(name: "__range"
   // CHECK-SAME:               scope: [[RANGE_FOR:![0-9]*]]
   // CHECK-NOT:                line:
   // CHECK-SAME:               ){{$}}
   // CHECK: [[RANGE_FOR]] = distinct !DILexicalBlock({{.*}}, line: [[@LINE+1]])
   for (int i : x) {
-    // CHECK: = !DILocalVariable(tag: DW_TAG_auto_variable, name: "i"
+    // CHECK: = !DILocalVariable(name: "i"
     // CHECK-SAME:               scope: [[RANGE_FOR_BODY:![0-9]*]]
     // CHECK-SAME:               line: [[@LINE-3]]
     // CHECK: [[RANGE_FOR_BODY]] = distinct !DILexicalBlock(scope: [[RANGE_FOR]],{{.*}} line: [[@LINE-4]])
diff --git a/test/CodeGenCXX/debug-info-static-fns.cpp b/test/CodeGenCXX/debug-info-static-fns.cpp
index 3f8d8e838930..59c7471f7ca6 100644
--- a/test/CodeGenCXX/debug-info-static-fns.cpp
+++ b/test/CodeGenCXX/debug-info-static-fns.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 namespace A {
   static int a(int b) { return b + 4; }
@@ -7,7 +7,7 @@ namespace A {
 }
 
 // Verify that a is present and mangled.
-// CHECK: !DISubprogram(name: "a", linkageName: "_ZN1AL1aEi",
+// CHECK: define internal i32 @_ZN1AL1aEi({{.*}} !dbg [[DBG:![0-9]+]]
+// CHECK: [[DBG]] = distinct !DISubprogram(name: "a", linkageName: "_ZN1AL1aEi",
 // CHECK-SAME:          line: 4
 // CHECK-SAME:          isDefinition: true
-// CHECK-SAME:          function: i32 (i32)* @_ZN1AL1aEi
diff --git a/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp b/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp
index 4dadc4f5531b..04c63ae2ff24 100644
--- a/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp
+++ b/test/CodeGenCXX/debug-info-template-explicit-specialization.cpp
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -g %s -o - -fno-standalone-debug | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -debug-info-kind=limited %s -o - | FileCheck %s
 
 // Run again with -gline-tables-only and verify we don't crash.  We won't output
 // type info at all.
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -g %s -o - -gline-tables-only | FileCheck %s -check-prefix LINES-ONLY
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -debug-info-kind=line-tables-only %s -o - | FileCheck %s -check-prefix LINES-ONLY
 
 // LINES-ONLY-NOT: !DICompositeType(tag: DW_TAG_structure_type
 
diff --git a/test/CodeGenCXX/debug-info-template-fwd.cpp b/test/CodeGenCXX/debug-info-template-fwd.cpp
index 25daabce21c0..8b8d29cad0e4 100644
--- a/test/CodeGenCXX/debug-info-template-fwd.cpp
+++ b/test/CodeGenCXX/debug-info-template-fwd.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -g -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -debug-info-kind=limited -emit-llvm -o - | FileCheck %s
 // This test is for a crash when emitting debug info for not-yet-completed
 // types.
 // Test that we don't actually emit a forward decl for the offending class:
diff --git a/test/CodeGenCXX/debug-info-template-limit.cpp b/test/CodeGenCXX/debug-info-template-limit.cpp
index 2b4930311487..5c4ac0cc3e15 100644
--- a/test/CodeGenCXX/debug-info-template-limit.cpp
+++ b/test/CodeGenCXX/debug-info-template-limit.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -fno-standalone-debug -triple %itanium_abi_triple -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple %itanium_abi_triple %s -o - | FileCheck %s
 
 // Check that this pointer type is TC<int>
 // CHECK: ![[LINE:[0-9]+]] = !DICompositeType(tag: DW_TAG_class_type, name: "TC<int>"{{.*}}, identifier: "_ZTS2TCIiE")
diff --git a/test/CodeGenCXX/debug-info-template-member.cpp b/test/CodeGenCXX/debug-info-template-member.cpp
index dee82dce6bed..b94ff05df44e 100644
--- a/test/CodeGenCXX/debug-info-template-member.cpp
+++ b/test/CodeGenCXX/debug-info-template-member.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -fno-standalone-debug -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 struct MyClass {
   template <int i> int add(int j) {
@@ -98,4 +98,3 @@ inline void f1() {
 void f2() {
   virt<elem> d; // emit 'virt<elem>'
 }
-
diff --git a/test/CodeGenCXX/debug-info-template-partial-specialization.cpp b/test/CodeGenCXX/debug-info-template-partial-specialization.cpp
index c184f0499686..0435a6fd3d7c 100644
--- a/test/CodeGenCXX/debug-info-template-partial-specialization.cpp
+++ b/test/CodeGenCXX/debug-info-template-partial-specialization.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -g %s -o - -fstandalone-debug | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - -debug-info-kind=standalone | FileCheck %s
 namespace __pointer_type_imp
 {
   template <class _Tp, class _Dp, bool > struct __pointer_type1 {};
diff --git a/test/CodeGenCXX/debug-info-template-quals.cpp b/test/CodeGenCXX/debug-info-template-quals.cpp
index 1f249117522a..1e8bdb1ad714 100644
--- a/test/CodeGenCXX/debug-info-template-quals.cpp
+++ b/test/CodeGenCXX/debug-info-template-quals.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 template<typename _CharT>
 struct basic_string {
diff --git a/test/CodeGenCXX/debug-info-template-recursive.cpp b/test/CodeGenCXX/debug-info-template-recursive.cpp
index ef04d03bb78f..9693b386c9d1 100644
--- a/test/CodeGenCXX/debug-info-template-recursive.cpp
+++ b/test/CodeGenCXX/debug-info-template-recursive.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin %s -o - | FileCheck %s
 
 class base { };
 
diff --git a/test/CodeGenCXX/debug-info-thunk.cpp b/test/CodeGenCXX/debug-info-thunk.cpp
index 935110ffbdce..cd00ec937318 100644
--- a/test/CodeGenCXX/debug-info-thunk.cpp
+++ b/test/CodeGenCXX/debug-info-thunk.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple %itanium_abi_triple -g -S -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple %itanium_abi_triple -debug-info-kind=limited -S -emit-llvm -o - | FileCheck %s
 
 struct A {
   virtual void f();
diff --git a/test/CodeGenCXX/debug-info-union-template.cpp b/test/CodeGenCXX/debug-info-union-template.cpp
index 0616d724c9f2..d9219fcba99e 100644
--- a/test/CodeGenCXX/debug-info-union-template.cpp
+++ b/test/CodeGenCXX/debug-info-union-template.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-linux-gnu  %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-linux-gnu  %s -o - | FileCheck %s
 
 // Make sure that the union type has template parameters.
 
diff --git a/test/CodeGenCXX/debug-info-union.cpp b/test/CodeGenCXX/debug-info-union.cpp
index a81a560e6280..19e67418f9dc 100644
--- a/test/CodeGenCXX/debug-info-union.cpp
+++ b/test/CodeGenCXX/debug-info-union.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin -std=c++11 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin -std=c++11 %s -o - | FileCheck %s
 
 union E {
   int a;
diff --git a/test/CodeGenCXX/debug-info-use-after-free.cpp b/test/CodeGenCXX/debug-info-use-after-free.cpp
index 0f28a9063fb8..f87763b9050d 100644
--- a/test/CodeGenCXX/debug-info-use-after-free.cpp
+++ b/test/CodeGenCXX/debug-info-use-after-free.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -triple %itanium_abi_triple -emit-llvm-only %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple %itanium_abi_triple -emit-llvm-only %s
 // Check that we don't crash.
 // PR12305, PR12315
 
diff --git a/test/CodeGenCXX/debug-info-uuid.cpp b/test/CodeGenCXX/debug-info-uuid.cpp
index fd6e31d7d5b6..a304f37078b6 100644
--- a/test/CodeGenCXX/debug-info-uuid.cpp
+++ b/test/CodeGenCXX/debug-info-uuid.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm -fms-extensions -triple=x86_64-pc-win32 -g %s -o - -std=c++11 | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -fms-extensions -triple=x86_64-unknown-unknown -g %s -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=CHECK-ITANIUM
+// RUN: %clang_cc1 -emit-llvm -fms-extensions -triple=x86_64-pc-win32 -debug-info-kind=limited %s -o - -std=c++11 | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fms-extensions -triple=x86_64-unknown-unknown -debug-info-kind=limited %s -o - -std=c++11 2>&1 | FileCheck %s --check-prefix=CHECK-ITANIUM
 
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "tmpl_guid<&__uuidof(uuid)>"
 // CHECK-SAME:             templateParams: [[TGIARGS:![0-9]*]]
diff --git a/test/CodeGenCXX/debug-info-varargs.cpp b/test/CodeGenCXX/debug-info-varargs.cpp
index edcb0e5a53d4..52bffe6c9285 100644
--- a/test/CodeGenCXX/debug-info-varargs.cpp
+++ b/test/CodeGenCXX/debug-info-varargs.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 struct A
 {
@@ -20,7 +20,7 @@ void b(int c, ...) {
 
   A a;
 
-  // CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "fptr"
+  // CHECK: !DILocalVariable(name: "fptr"
   // CHECK-SAME:             line: [[@LINE+2]]
   // CHECK-SAME:             type: ![[PST:[0-9]+]]
   void (*fptr)(int, ...) = b;
diff --git a/test/CodeGenCXX/debug-info-vtable-optzn.cpp b/test/CodeGenCXX/debug-info-vtable-optzn.cpp
index f15571eab996..8b49e951795b 100644
--- a/test/CodeGenCXX/debug-info-vtable-optzn.cpp
+++ b/test/CodeGenCXX/debug-info-vtable-optzn.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin %s -o - | FileCheck %s
-// RUN: %clang_cc1 -emit-llvm -g -triple amd64-unknown-freebsd %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -triple x86_64-apple-darwin %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -triple amd64-unknown-freebsd %s -o - | FileCheck %s
 //
 // This tests that the "emit debug info for a C++ class only in the
 // module that has its vtable" optimization is disabled by default on
diff --git a/test/CodeGenCXX/debug-info-wchar.cpp b/test/CodeGenCXX/debug-info-wchar.cpp
index bb01f575c591..1ecdd568b68c 100644
--- a/test/CodeGenCXX/debug-info-wchar.cpp
+++ b/test/CodeGenCXX/debug-info-wchar.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o -| FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o -| FileCheck %s
 void foo() {
 // CHECK: !DIBasicType(name: "wchar_t"
   const wchar_t w = L'x';
diff --git a/test/CodeGenCXX/debug-info-windows-dtor.cpp b/test/CodeGenCXX/debug-info-windows-dtor.cpp
index 2f425fdd5bea..e5a51abae68b 100644
--- a/test/CodeGenCXX/debug-info-windows-dtor.cpp
+++ b/test/CodeGenCXX/debug-info-windows-dtor.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i386-unknown-windows-msvc -std=c++11 -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i386-unknown-windows-msvc -std=c++11 -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 
 struct A {
   virtual ~A() {}
@@ -14,9 +14,9 @@ struct AB: A, B {
 
 template struct AB<int>;
 
-// CHECK-LABEL: define {{.*}}@"\01??_E?$AB@H@@W3AEPAXI@Z"
+// CHECK: define {{.*}}@"\01??_E?$AB@H@@W3AEPAXI@Z"({{.*}} !dbg [[THUNK_VEC_DEL_DTOR:![0-9]*]]
 // CHECK: call {{.*}}@"\01??_G?$AB@H@@UAEPAXI@Z"({{.*}}) #{{[0-9]*}}, !dbg [[THUNK_LOC:![0-9]*]]
-// CHECK-LABEL: define
+// CHECK: define
 
-// CHECK: [[THUNK_VEC_DEL_DTOR:![0-9]*]] = !DISubprogram({{.*}}function: {{.*}}@"\01??_E?$AB@H@@W3AEPAXI@Z"
+// CHECK: [[THUNK_VEC_DEL_DTOR]] = distinct !DISubprogram
 // CHECK: [[THUNK_LOC]] = !DILocation(line: 15, scope: [[THUNK_VEC_DEL_DTOR]])
diff --git a/test/CodeGenCXX/debug-info.cpp b/test/CodeGenCXX/debug-info.cpp
index 13753684800a..29ed9e93156d 100644
--- a/test/CodeGenCXX/debug-info.cpp
+++ b/test/CodeGenCXX/debug-info.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -emit-llvm -g %s -o - | FileCheck %s
-// RUN: %clang_cc1 -triple i686-pc-windows-msvc -emit-llvm -g %s -o - | FileCheck %s --check-prefix=MSVC
+// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple i686-pc-windows-msvc -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s --check-prefix=MSVC
 
 template<typename T> struct Identity {
   typedef T Type;
@@ -114,9 +114,9 @@ foo func(foo f) {
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "b"
 // CHECK-SAME:             DIFlagFwdDecl
 
-// CHECK: [[FUNC:![0-9]+]] = !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE"
-// CHECK-SAME:                             type: [[FUNC_TYPE:![0-9]*]]
-// CHECK-SAME:                             isDefinition: true
+// CHECK: [[FUNC:![0-9]+]] = distinct !DISubprogram(name: "func", linkageName: "_ZN7pr147634funcENS_3fooE"
+// CHECK-SAME:                                      type: [[FUNC_TYPE:![0-9]*]]
+// CHECK-SAME:                                      isDefinition: true
 }
 
 void foo() {
@@ -142,11 +142,13 @@ incomplete (*x)[3];
 }
 
 // For some reason function arguments ended up down here
-// CHECK: ![[F]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "f", arg: 1, scope: [[FUNC]]
+// CHECK: ![[F]] = !DILocalVariable(name: "f", arg: 1, scope: [[FUNC]]
 // CHECK-SAME:                      type: !"[[FOO]]"
 // CHECK: ![[EXPR]] = !DIExpression(DW_OP_deref)
 
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "c"
+// CHECK: !DILocalVariable(name: "c"
+// CHECK-NOT:              arg:
+// CHECK-SAME:            )
 
 namespace pr16214 {
 struct a {
diff --git a/test/CodeGenCXX/debug-lambda-expressions.cpp b/test/CodeGenCXX/debug-lambda-expressions.cpp
index a53274af3435..a022fad1b6b9 100644
--- a/test/CodeGenCXX/debug-lambda-expressions.cpp
+++ b/test/CodeGenCXX/debug-lambda-expressions.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -g | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -debug-info-kind=limited | FileCheck %s
 
 auto var = [](int i) { return i+1; };
 void *use = &var;
@@ -20,16 +20,16 @@ int d(int x) { D y[10]; return [x,y] { return y[x].x; }(); }
 // CHECK: ![[INT:[0-9]+]] = !DIBasicType(name: "int"
 
 // A: 10
-// CHECK: ![[A_FUNC:.*]] = !DISubprogram(name: "a"{{.*}}, line: [[A_LINE:[0-9]+]]{{.*}}, isDefinition: true
+// CHECK: ![[A_FUNC:.*]] = distinct !DISubprogram(name: "a"{{.*}}, line: [[A_LINE:[0-9]+]]{{.*}}, isDefinition: true
 
 // B: 14
-// CHECK: ![[B_FUNC:.*]] = !DISubprogram(name: "b"{{.*}}, line: [[B_LINE:[0-9]+]]{{.*}}, isDefinition: true
+// CHECK: ![[B_FUNC:.*]] = distinct !DISubprogram(name: "b"{{.*}}, line: [[B_LINE:[0-9]+]]{{.*}}, isDefinition: true
 
 // C: 17
-// CHECK: ![[C_FUNC:.*]] = !DISubprogram(name: "c"{{.*}}, line: [[C_LINE:[0-9]+]]{{.*}}, isDefinition: true
+// CHECK: ![[C_FUNC:.*]] = distinct !DISubprogram(name: "c"{{.*}}, line: [[C_LINE:[0-9]+]]{{.*}}, isDefinition: true
 
 // D: 18
-// CHECK: ![[D_FUNC:.*]] = !DISubprogram(name: "d"{{.*}}, line: [[D_LINE:[0-9]+]]{{.*}}, isDefinition: true
+// CHECK: ![[D_FUNC:.*]] = distinct !DISubprogram(name: "d"{{.*}}, line: [[D_LINE:[0-9]+]]{{.*}}, isDefinition: true
 
 
 // Back to A. -- 78
diff --git a/test/CodeGenCXX/debug-lambda-this.cpp b/test/CodeGenCXX/debug-lambda-this.cpp
index e3ef67091701..0c413449a3ec 100644
--- a/test/CodeGenCXX/debug-lambda-this.cpp
+++ b/test/CodeGenCXX/debug-lambda-this.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -g | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -fexceptions -std=c++11 -debug-info-kind=limited | FileCheck %s
 
 struct D {
   D();
diff --git a/test/CodeGenCXX/default-destructor-synthesis.cpp b/test/CodeGenCXX/default-destructor-synthesis.cpp
deleted file mode 100644
index af780044d193..000000000000
--- a/test/CodeGenCXX/default-destructor-synthesis.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -O2 -o - | FileCheck %s
-static int count = 0;
-
-struct S {
-  S() { count++; }
-  ~S() { count--; }
-};
-
-struct P {
-  P() { count++; }
-  ~P() { count--; }
-};
-
-struct Q {
-  Q() { count++; }
-  ~Q() { count--; }
-};
-
-struct M : Q, P {
-  S s;
-  Q q;
-  P p;
-  P p_arr[3];
-  Q q_arr[2][3];
-};
-  
-// CHECK: define i32 @_Z1fv() [[NUW:#[0-9]+]]
-int f() {
-  {
-    count = 1;
-    M a;
-  }
-
-  // CHECK: ret i32 1
-  return count;
-}
-
-// CHECK: attributes [[NUW]] = { nounwind{{.*}} }
diff --git a/test/CodeGenCXX/delete-two-arg.cpp b/test/CodeGenCXX/delete-two-arg.cpp
index e5a4cfa3ee6d..85275b3eb176 100644
--- a/test/CodeGenCXX/delete-two-arg.cpp
+++ b/test/CodeGenCXX/delete-two-arg.cpp
@@ -30,7 +30,7 @@ namespace test2 {
     // CHECK:      [[NEW:%.*]] = call noalias i8* @_Znaj(i32 44)
     // CHECK-NEXT: [[T0:%.*]] = bitcast i8* [[NEW]] to i32*
     // CHECK-NEXT: store i32 10, i32* [[T0]]
-    // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[NEW]], i64 4
+    // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[NEW]], i32 4
     // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to [[A]]*
     // CHECK-NEXT: ret [[A]]* [[T2]]
     return ::new A[10];
@@ -44,7 +44,7 @@ namespace test2 {
     // CHECK-NEXT: [[T1:%.*]] = icmp eq [[A]]* [[T0]], null
     // CHECK-NEXT: br i1 [[T1]],
     // CHECK:      [[T2:%.*]] = bitcast [[A]]* [[T0]] to i8*
-    // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8, i8* [[T2]], i64 -4
+    // CHECK-NEXT: [[T3:%.*]] = getelementptr inbounds i8, i8* [[T2]], i32 -4
     // CHECK-NEXT: [[T4:%.*]] = bitcast i8* [[T3]] to i32*
     // CHECK-NEXT: [[T5:%.*]] = load i32, i32* [[T4]]
     // CHECK-NEXT: call void @_ZdaPv(i8* [[T3]])
diff --git a/test/CodeGenCXX/destructor-debug-info.cpp b/test/CodeGenCXX/destructor-debug-info.cpp
index 2534364aa4c4..7b10f8339420 100644
--- a/test/CodeGenCXX/destructor-debug-info.cpp
+++ b/test/CodeGenCXX/destructor-debug-info.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -S -emit-llvm %s -o - | FileCheck %s
 
 class A { int a; };
 class B {
diff --git a/test/CodeGenCXX/destructors.cpp b/test/CodeGenCXX/destructors.cpp
index b47c6c6b1d43..529603142d31 100644
--- a/test/CodeGenCXX/destructors.cpp
+++ b/test/CodeGenCXX/destructors.cpp
@@ -151,7 +151,7 @@ namespace test1 {
 
   struct S : A { ~S(); int x; };
   S::~S() {}
-  // CHECK4: @_ZN5test11SD2Ev = alias bitcast {{.*}} @_ZN5test11AD2Ev
+  // CHECK4: @_ZN5test11SD2Ev = alias {{.*}}, bitcast {{.*}} @_ZN5test11AD2Ev
 
   struct T : A { ~T(); B x; };
   T::~T() {} // CHECK4-LABEL: define void @_ZN5test11TD2Ev(%"struct.test1::T"* %this) unnamed_addr
diff --git a/test/CodeGenCXX/dllexport-alias.cpp b/test/CodeGenCXX/dllexport-alias.cpp
index 479595d05751..a3dc61edde5d 100644
--- a/test/CodeGenCXX/dllexport-alias.cpp
+++ b/test/CodeGenCXX/dllexport-alias.cpp
@@ -14,5 +14,5 @@ A::A() {}
 
 A::~A() {}
 
-// CHECK: @_ZN1AC1Ev = dllexport alias void (%class.A*)* @_ZN1AC2Ev
-// CHECK: @_ZN1AD1Ev = dllexport alias void (%class.A*)* @_ZN1AD2Ev
+// CHECK: @_ZN1AC1Ev = dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AC2Ev
+// CHECK: @_ZN1AD1Ev = dllexport alias void (%class.A*), void (%class.A*)* @_ZN1AD2Ev
diff --git a/test/CodeGenCXX/dllexport-members.cpp b/test/CodeGenCXX/dllexport-members.cpp
index 4038d0ba467d..76f692dbd301 100644
--- a/test/CodeGenCXX/dllexport-members.cpp
+++ b/test/CodeGenCXX/dllexport-members.cpp
@@ -110,10 +110,10 @@ public:
 
   // MSC-DAG: @"\01?StaticField@ExportMembers@@2HA"               = dllexport global i32 1, align 4
   // MSC-DAG: @"\01?StaticConstField@ExportMembers@@2HB"          = dllexport constant i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstFieldEqualInit@ExportMembers@@2HB" = dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldBraceInit@ExportMembers@@2HB" = dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldEqualInit@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldBraceInit@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
   // MSC-DAG: @"\01?StaticConstFieldRefNotDef@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?ConstexprField@ExportMembers@@2HB"            = dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?ConstexprField@ExportMembers@@2HB"            = weak_odr dllexport constant i32 1, comdat, align 4
   // GNU-DAG: @_ZN13ExportMembers11StaticFieldE                   = dllexport global i32 1, align 4
   // GNU-DAG: @_ZN13ExportMembers16StaticConstFieldE              = dllexport constant i32 1, align 4
   // GNU-DAG: @_ZN13ExportMembers25StaticConstFieldEqualInitE     = dllexport constant i32 1, align 4
@@ -236,10 +236,10 @@ public:
 
   // MSC-DAG: @"\01?StaticField@Nested@ExportMembers@@2HA"               = dllexport global i32 1, align 4
   // MSC-DAG: @"\01?StaticConstField@Nested@ExportMembers@@2HB"          = dllexport constant i32 1, align 4
-  // MSC-DAG: @"\01?StaticConstFieldEqualInit@Nested@ExportMembers@@2HB" = dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?StaticConstFieldBraceInit@Nested@ExportMembers@@2HB" = dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldEqualInit@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?StaticConstFieldBraceInit@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
   // MSC-DAG: @"\01?StaticConstFieldRefNotDef@Nested@ExportMembers@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
-  // MSC-DAG: @"\01?ConstexprField@Nested@ExportMembers@@2HB"            = dllexport constant i32 1, comdat, align 4
+  // MSC-DAG: @"\01?ConstexprField@Nested@ExportMembers@@2HB"            = weak_odr dllexport constant i32 1, comdat, align 4
   // GNU-DAG: @_ZN13ExportMembers6Nested11StaticFieldE                   = dllexport global i32 1, align 4
   // GNU-DAG: @_ZN13ExportMembers6Nested16StaticConstFieldE              = dllexport constant i32 1, align 4
   // GNU-DAG: @_ZN13ExportMembers6Nested25StaticConstFieldEqualInitE     = dllexport constant i32 1, align 4
@@ -623,13 +623,13 @@ extern template const int MemVarTmpl::ExportedStaticVar<ExplicitDecl_Exported>;
 template const int MemVarTmpl::ExportedStaticVar<ExplicitInst_Exported>;
 
 // Export specialization of an exported member variable template.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = dllexport constant i32 1, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
 // GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI25ExplicitSpec_Def_ExportedEE       = dllexport constant i32 1, align 4
 template<> __declspec(dllexport) const int MemVarTmpl::ExportedStaticVar<ExplicitSpec_Def_Exported> = 1;
 
 // Not exporting specialization of an exported member variable template without
 // explicit dllexport.
-// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_NotExported@@@MemVarTmpl@@2HB" = constant i32 1, align 4
+// MSC-DAG: @"\01??$ExportedStaticVar@UExplicitSpec_NotExported@@@MemVarTmpl@@2HB" = weak_odr constant i32 1, comdat, align 4
 // GNU-DAG: @_ZN10MemVarTmpl17ExportedStaticVarI24ExplicitSpec_NotExportedEE       = constant i32 1, align 4
 template<> const int MemVarTmpl::ExportedStaticVar<ExplicitSpec_NotExported> = 1;
 
@@ -648,6 +648,6 @@ extern template __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitDe
 template __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitInst_Exported>;
 
 // Export specialization of a non-exported member variable template.
-// MSC-DAG: @"\01??$StaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = dllexport constant i32 1, align 4
+// MSC-DAG: @"\01??$StaticVar@UExplicitSpec_Def_Exported@@@MemVarTmpl@@2HB" = weak_odr dllexport constant i32 1, comdat, align 4
 // GNU-DAG: @_ZN10MemVarTmpl9StaticVarI25ExplicitSpec_Def_ExportedEE        = dllexport constant i32 1, align 4
 template<> __declspec(dllexport) const int MemVarTmpl::StaticVar<ExplicitSpec_Def_Exported> = 1;
diff --git a/test/CodeGenCXX/dllexport.cpp b/test/CodeGenCXX/dllexport.cpp
index c598880b625e..1412ad866bd3 100644
--- a/test/CodeGenCXX/dllexport.cpp
+++ b/test/CodeGenCXX/dllexport.cpp
@@ -544,6 +544,7 @@ struct A {
 struct __declspec(dllexport) B {
   B(A = 0) {}
 };
+
 }
 //
 // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01??_FB@PR23801@@QAEXXZ"({{.*}}) comdat
@@ -582,7 +583,7 @@ void W::foo() {}
 // M32-DAG: define weak_odr dllexport x86_thiscallcc %struct.W* @"\01??0W@@QAE@ABU0@@Z"
 // vftable:
 // M32-DAG: [[W_VTABLE:@.*]] = private unnamed_addr constant [2 x i8*] [i8* bitcast (%rtti.CompleteObjectLocator* @"\01??_R4W@@6B@" to i8*), i8* bitcast (void (%struct.W*)* @"\01?foo@W@@UAEXXZ" to i8*)], comdat($"\01??_7W@@6B@")
-// M32-DAG: @"\01??_7W@@6B@" = dllexport unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* [[W_VTABLE]], i32 0, i32 1)
+// M32-DAG: @"\01??_7W@@6B@" = dllexport unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* [[W_VTABLE]], i32 0, i32 1)
 // G32-DAG: @_ZTV1W = dllexport unnamed_addr constant [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1W to i8*), i8* bitcast (void (%struct.W*)* @_ZN1W3fooEv to i8*)]
 
 struct __declspec(dllexport) X : public virtual W {};
@@ -611,7 +612,6 @@ namespace UseDtorAlias {
   B::~B() { }
   // Emit a alias definition of B's constructor.
   // M32-DAG: @"\01??1B@UseDtorAlias@@QAE@XZ" = dllexport alias {{.*}} @"\01??1A@UseDtorAlias@@QAE@XZ"
-
 }
 
 struct __declspec(dllexport) DefaultedCtorsDtors {
@@ -729,6 +729,54 @@ extern template struct PR23770DerivedTemplate<int>;
 template struct __declspec(dllexport) PR23770DerivedTemplate<int>;
 // M32-DAG: define weak_odr dllexport x86_thiscallcc void @"\01?f@?$PR23770BaseTemplate@H@@QAEXXZ"
 
+namespace InClassInits {
+
+struct __declspec(dllexport) S {
+  int x = 42;
+};
+// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::S"* @"\01??0S@InClassInits@@QAE@XZ"
+
+// dllexport an already instantiated class template.
+template <typename T> struct Base {
+  int x = 42;
+};
+Base<int> base;
+struct __declspec(dllexport) T : Base<int> { };
+// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::Base"* @"\01??0?$Base@H@InClassInits@@QAE@XZ"
+
+struct A { A(int); };
+struct __declspec(dllexport) U {
+  // Class with both default constructor closure and in-class initializer.
+  U(A = 0) {}
+  int x = 0;
+};
+// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::U"* @"\01??0U@InClassInits@@QAE@UA@1@@Z"
+
+struct Evil {
+  template <typename T> struct Base {
+    int x = 0;
+  };
+  struct S : Base<int> {};
+  // The already instantiated Base<int> becomes dllexported below, but the
+  // in-class initializer for Base<>::x still hasn't been parsed, so emitting
+  // the default ctor must still be delayed.
+  struct __declspec(dllexport) T : Base<int> {};
+};
+// M32-DAG: define weak_odr dllexport x86_thiscallcc %"struct.InClassInits::Evil::Base"* @"\01??0?$Base@H@Evil@InClassInits@@QAE@XZ"
+
+template <typename T> struct Foo {};
+template <typename T> struct Bar {
+  Bar<T> &operator=(Foo<T>) {}
+};
+struct __declspec(dllexport) Baz {
+  Bar<int> n;
+};
+// After parsing Baz, in ActOnFinishCXXNonNestedClass we would synthesize
+// Baz's operator=, causing instantiation of Foo<int> after which
+// ActOnFinishCXXNonNestedClass is called, and we would bite our own tail.
+// M32-DAG: define weak_odr dllexport x86_thiscallcc dereferenceable(1) %"struct.InClassInits::Baz"* @"\01??4Baz@InClassInits@@QAEAAU01@ABU01@@Z"
+}
+
 
 //===----------------------------------------------------------------------===//
 // Classes with template base classes
diff --git a/test/CodeGenCXX/dllimport-rtti.cpp b/test/CodeGenCXX/dllimport-rtti.cpp
index 6fe67daa38d7..071ce278a5bb 100644
--- a/test/CodeGenCXX/dllimport-rtti.cpp
+++ b/test/CodeGenCXX/dllimport-rtti.cpp
@@ -19,7 +19,9 @@ struct U : S {
 struct __declspec(dllimport) V {
   virtual void f();
 } v;
-// GNU-DAG: @_ZTV1V = external dllimport
+// GNU-DAG: @_ZTV1V = available_externally dllimport
+// GNU-DAG: @_ZTS1V = linkonce_odr
+// GNU-DAG: @_ZTI1V = linkonce_odr
 
 struct W {
   __declspec(dllimport) virtual void f();
diff --git a/test/CodeGenCXX/dllimport.cpp b/test/CodeGenCXX/dllimport.cpp
index 0f15ff0291b9..b9c850b8b87b 100644
--- a/test/CodeGenCXX/dllimport.cpp
+++ b/test/CodeGenCXX/dllimport.cpp
@@ -2,8 +2,8 @@
 // RUN: %clang_cc1 -triple x86_64-windows-msvc -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s -DMSABI -w | FileCheck --check-prefix=MSC --check-prefix=M64 %s
 // RUN: %clang_cc1 -triple i686-windows-gnu    -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s         -w | FileCheck --check-prefix=GNU --check-prefix=G32 %s
 // RUN: %clang_cc1 -triple x86_64-windows-gnu  -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O0 -o - %s         -w | FileCheck --check-prefix=GNU --check-prefix=G64 %s
-// RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=18.00 -emit-llvm -std=c++1y -O1 -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M18 %s
-// RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -std=c++1y -O1 -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M19 %s
+// RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=18.00 -emit-llvm -std=c++1y -O1 -disable-llvm-optzns -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M18 %s
+// RUN: %clang_cc1 -triple i686-windows-msvc   -fno-rtti -fno-threadsafe-statics -fms-extensions -fms-compatibility-version=19.00 -emit-llvm -std=c++1y -O1 -disable-llvm-optzns -o - %s -DMSABI -w | FileCheck --check-prefix=MO1 --check-prefix=M19 %s
 // RUN: %clang_cc1 -triple i686-windows-gnu    -fno-rtti -fno-threadsafe-statics -fms-extensions -emit-llvm -std=c++1y -O1 -o - %s         -w | FileCheck --check-prefix=GO1 %s
 
 // CHECK-NOT doesn't play nice with CHECK-DAG, so use separate run lines.
@@ -86,7 +86,7 @@ USEVAR(GlobalRedecl3)
 namespace ns { __declspec(dllimport) int ExternalGlobal; }
 USEVAR(ns::ExternalGlobal)
 
-int f();
+int __declspec(dllimport) f();
 // MO1-DAG: @"\01?x@?1??inlineStaticLocalsFunc@@YAHXZ@4HA" = available_externally dllimport global i32 0
 // MO1-DAG: @"\01??_B?1??inlineStaticLocalsFunc@@YAHXZ@51" = available_externally dllimport global i32 0
 inline int __declspec(dllimport) inlineStaticLocalsFunc() {
@@ -314,6 +314,46 @@ void UNIQ(use)() { ::operator new(42); }
 namespace ns { __declspec(dllimport) void externalFunc(); }
 USE(ns::externalFunc)
 
+// A dllimport function referencing non-imported vars or functions must not be available_externally.
+__declspec(dllimport) int ImportedVar;
+int NonImportedVar;
+__declspec(dllimport) int ImportedFunc();
+int NonImportedFunc();
+__declspec(dllimport) inline int ReferencingImportedVar() { return ImportedVar; }
+// MO1-DAG: define available_externally dllimport i32 @"\01?ReferencingImportedVar@@YAHXZ"
+__declspec(dllimport) inline int ReferencingNonImportedVar() { return NonImportedVar; }
+// MO1-DAG: declare dllimport i32 @"\01?ReferencingNonImportedVar@@YAHXZ"()
+__declspec(dllimport) inline int ReferencingImportedFunc() { return ImportedFunc(); }
+// MO1-DAG: define available_externally dllimport i32 @"\01?ReferencingImportedFunc@@YAHXZ"
+__declspec(dllimport) inline int ReferencingNonImportedFunc() { return NonImportedFunc(); }
+// MO1-DAG: declare dllimport i32 @"\01?ReferencingNonImportedFunc@@YAHXZ"()
+USE(ReferencingImportedVar)
+USE(ReferencingNonImportedVar)
+USE(ReferencingImportedFunc)
+USE(ReferencingNonImportedFunc)
+// References to operator new and delete count too, despite not being DeclRefExprs.
+__declspec(dllimport) inline int *ReferencingNonImportedNew() { return new int[2]; }
+// MO1-DAG: declare dllimport i32* @"\01?ReferencingNonImportedNew@@YAPAHXZ"
+__declspec(dllimport) inline int *ReferencingNonImportedDelete() { delete (int*)nullptr; }
+// MO1-DAG: declare dllimport i32* @"\01?ReferencingNonImportedDelete@@YAPAHXZ"
+USE(ReferencingNonImportedNew)
+USE(ReferencingNonImportedDelete)
+__declspec(dllimport) void* operator new[](__SIZE_TYPE__);
+__declspec(dllimport) void operator delete(void*);
+__declspec(dllimport) inline int *ReferencingImportedNew() { return new int[2]; }
+// MO1-DAG: define available_externally dllimport i32* @"\01?ReferencingImportedNew@@YAPAHXZ"
+__declspec(dllimport) inline int *ReferencingImportedDelete() { delete (int*)nullptr; }
+// MO1-DAG: define available_externally dllimport i32* @"\01?ReferencingImportedDelete@@YAPAHXZ"
+USE(ReferencingImportedNew)
+USE(ReferencingImportedDelete)
+
+// A dllimport function with a TLS variable must not be available_externally.
+__declspec(dllimport) inline void FunctionWithTLSVar() { static __thread int x = 42; }
+// MO1-DAG: declare dllimport void @"\01?FunctionWithTLSVar@@YAXXZ"
+__declspec(dllimport) inline void FunctionWithNormalVar() { static int x = 42; }
+// MO1-DAG: define available_externally dllimport void @"\01?FunctionWithNormalVar@@YAXXZ"
+USE(FunctionWithTLSVar)
+USE(FunctionWithNormalVar)
 
 
 //===----------------------------------------------------------------------===//
@@ -581,7 +621,7 @@ struct __declspec(dllimport) KeyFuncClass {
   constexpr KeyFuncClass() {}
   virtual void foo();
 };
-constexpr KeyFuncClass keyFuncClassVar;
+extern constexpr KeyFuncClass keyFuncClassVar = {};
 // G32-DAG: @_ZTV12KeyFuncClass = external dllimport unnamed_addr constant [3 x i8*]
 
 struct __declspec(dllimport) X : public virtual W {};
diff --git a/test/CodeGenCXX/duplicate-mangled-name.cpp b/test/CodeGenCXX/duplicate-mangled-name.cpp
index e57012e8c414..104bb6eb4d62 100644
--- a/test/CodeGenCXX/duplicate-mangled-name.cpp
+++ b/test/CodeGenCXX/duplicate-mangled-name.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only %s -verify
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only %s -verify -DTEST1
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm-only %s -verify -DTEST2
+
+#ifdef TEST1
 
 // rdar://15522601
 class MyClass {
@@ -8,3 +11,34 @@ void MyClass::meth() { } // expected-note {{previous}}
 extern "C" {
   void _ZN7MyClass4methEv() { } // expected-error {{definition with same mangled name as another definition}}
 }
+
+#elif TEST2
+
+// We expect no warnings here, as there is only declaration of _ZN1TD1Ev function, no definitions.
+extern "C" void _ZN1TD1Ev();
+struct T {
+  ~T() {}
+};
+
+void foo() {
+  _ZN1TD1Ev();
+  T t;
+}
+
+extern "C" void _ZN2T2D2Ev() {}; // expected-note {{previous definition is here}}
+
+struct T2 {
+  ~T2() {} // expected-error {{definition with same mangled name as another definition}}
+};
+
+void bar() {
+  _ZN2T2D2Ev();
+  T2 t;
+}
+
+#else
+
+#error Unknwon test
+
+#endif
+
diff --git a/test/CodeGenCXX/enable_if.cpp b/test/CodeGenCXX/enable_if.cpp
index 00c55c774909..e17695b413b6 100644
--- a/test/CodeGenCXX/enable_if.cpp
+++ b/test/CodeGenCXX/enable_if.cpp
@@ -1,4 +1,13 @@
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-linux-gnu| FileCheck %s
+
+// Test address-of overloading logic
+int test5(int);
+template <typename T>
+T test5(T) __attribute__((enable_if(1, "better than non-template")));
+
+// CHECK: @_Z5test5IiEUa9enable_ifIXLi1EEET_S0_
+int (*Ptr)(int) = &test5;
+
 // Test itanium mangling for attribute enable_if
 
 // CHECK: _Z5test1Ua9enable_ifIXeqfL0p_Li1EEEi
diff --git a/test/CodeGenCXX/exceptions-cxx-new.cpp b/test/CodeGenCXX/exceptions-cxx-new.cpp
new file mode 100644
index 000000000000..3767f3321c3e
--- /dev/null
+++ b/test/CodeGenCXX/exceptions-cxx-new.cpp
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -fms-extensions -fexceptions -fcxx-exceptions -fnew-ms-eh -emit-llvm -o - -std=c++11 | FileCheck %s
+
+int f(int);
+
+void test_catch() {
+  try {
+    f(1);
+  } catch (int) {
+    f(2);
+  } catch (double) {
+    f(3);
+  }
+}
+
+// CHECK-LABEL: define void @"\01?test_catch@@YAXXZ"(
+// CHECK:   invoke i32 @"\01?f@@YAHH@Z"(i32 1)
+// CHECK:         to label %[[NORMAL:.*]] unwind label %[[CATCHSWITCH:.*]]
+
+// CHECK: [[CATCHSWITCH]]
+// CHECK:   %[[CATCHSWITCHPAD:.*]] = catchswitch within none [label %[[CATCH_INT:.*]], label %[[CATCH_DOUBLE:.*]]] unwind to caller
+
+// CHECK: [[CATCH_INT]]
+// CHECK:   %[[CATCHPAD_INT:.*]] = catchpad within %[[CATCHSWITCHPAD]] [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+// CHECK:   call i32 @"\01?f@@YAHH@Z"(i32 2)
+// CHECK:   catchret from %[[CATCHPAD_INT]] to label %[[LEAVE_INT_CATCH:.*]]
+
+// CHECK: [[LEAVE_INT_CATCH]]
+// CHECK:   br label %[[LEAVE_FUNC:.*]]
+
+// CHECK: [[LEAVE_FUNC]]
+// CHECK:   ret void
+
+// CHECK: [[CATCH_DOUBLE]]
+// CHECK:   %[[CATCHPAD_DOUBLE:.*]] = catchpad within %[[CATCHSWITCHPAD]] [%rtti.TypeDescriptor2* @"\01??_R0N@8", i32 0, i8* null]
+// CHECK:   call i32 @"\01?f@@YAHH@Z"(i32 3)
+// CHECK:   catchret from %[[CATCHPAD_DOUBLE]] to label %[[LEAVE_DOUBLE_CATCH:.*]]
+
+// CHECK: [[LEAVE_DOUBLE_CATCH]]
+// CHECK:   br label %[[LEAVE_FUNC]]
+
+// CHECK: [[NORMAL]]
+// CHECK:   br label %[[LEAVE_FUNC]]
+
+struct Cleanup {
+  ~Cleanup() { f(-1); }
+};
+
+void test_cleanup() {
+  Cleanup C;
+  f(1);
+}
+
+// CHECK-LABEL: define {{.*}} @"\01?test_cleanup@@YAXXZ"(
+// CHECK:   invoke i32 @"\01?f@@YAHH@Z"(i32 1)
+// CHECK:           to label %[[LEAVE_FUNC:.*]] unwind label %[[CLEANUP:.*]]
+
+// CHECK: [[LEAVE_FUNC]]
+// CHECK:   call x86_thiscallcc void @"\01??_DCleanup@@QAE@XZ"(
+// CHECK:   ret void
+
+// CHECK: [[CLEANUP]]
+// CHECK:   %[[CLEANUPPAD:.*]] = cleanuppad within none []
+// CHECK:   call x86_thiscallcc void @"\01??_DCleanup@@QAE@XZ"(
+// CHECK:   cleanupret from %[[CLEANUPPAD]] unwind to caller
+
+
+// CHECK-LABEL: define {{.*}} void @"\01??1Cleanup@@QAE@XZ"(
+// CHECK:   invoke i32 @"\01?f@@YAHH@Z"(i32 -1)
+// CHECK:           to label %[[LEAVE_FUNC:.*]] unwind label %[[TERMINATE:.*]]
+
+// CHECK: [[LEAVE_FUNC]]
+// CHECK:   ret void
+
+// CHECK: [[TERMINATE]]
+// CHECK:   cleanuppad within none []
+// CHECK-NEXT:   call void @"\01?terminate@@YAXXZ"()
+
diff --git a/test/CodeGenCXX/exceptions-seh-filter-captures.cpp b/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
index 26ef90f5a6f3..4e8be72089d5 100644
--- a/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
+++ b/test/CodeGenCXX/exceptions-seh-filter-captures.cpp
@@ -21,9 +21,10 @@ extern "C" void test_freefunc(int p1) {
 // CHECK: invoke void @might_crash()
 
 // CHECK-LABEL: define internal i32 @"\01?filt$0@0@test_freefunc@@"(i8* %exception_pointers, i8* %frame_pointer)
-// CHECK: %[[p1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (i32)* @test_freefunc to i8*), i8* %frame_pointer, i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (i32)* @test_freefunc to i8*), i8* %frame_pointer)
+// CHECK: %[[p1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (i32)* @test_freefunc to i8*), i8* %[[fp]], i32 0)
 // CHECK: %[[p1_ptr:[^ ]*]] = bitcast i8* %[[p1_i8]] to i32*
-// CHECK: %[[l1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (i32)* @test_freefunc to i8*), i8* %frame_pointer, i32 1)
+// CHECK: %[[l1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (i32)* @test_freefunc to i8*), i8* %[[fp]], i32 1)
 // CHECK: %[[l1_ptr:[^ ]*]] = bitcast i8* %[[l1_i8]] to i32*
 // CHECK: %[[s1:[^ ]*]] = load i32, i32* @"\01?s1@?1??test_freefunc@@9@4HA", align 4
 // CHECK: %[[l1:[^ ]*]] = load i32, i32* %[[l1_ptr]]
@@ -50,7 +51,8 @@ void S::test_method() {
 // CHECK: invoke void @might_crash()
 
 // CHECK-LABEL: define internal i32 @"\01?filt$0@0@test_method@S@@"(i8* %exception_pointers, i8* %frame_pointer)
-// CHECK: %[[l1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (%struct.S*)* @"\01?test_method@S@@QEAAXXZ" to i8*), i8* %frame_pointer, i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (%struct.S*)* @"\01?test_method@S@@QEAAXXZ" to i8*), i8* %frame_pointer)
+// CHECK: %[[l1_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (%struct.S*)* @"\01?test_method@S@@QEAAXXZ" to i8*), i8* %[[fp]], i32 0)
 // CHECK: %[[l1_ptr:[^ ]*]] = bitcast i8* %[[l1_i8]] to i32*
 // CHECK: %[[l1:[^ ]*]] = load i32, i32* %[[l1_ptr]]
 // CHECK: call i32 (i32, ...) @basic_filter(i32 %[[l1]])
@@ -74,7 +76,8 @@ void test_lambda() {
 // CHECK: invoke void @might_crash()
 
 // CHECK-LABEL: define internal i32 @"\01?filt$0@0@?R<lambda_0>@?test_lambda@@YAXXZ@"(i8* %exception_pointers, i8* %frame_pointer)
-// CHECK: %[[l2_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (%class.anon*)* @"\01??R<lambda_0>@?test_lambda@@YAXXZ@QEBAXXZ" to i8*), i8* %frame_pointer, i32 0)
+// CHECK: %[[fp:[^ ]*]] = call i8* @llvm.x86.seh.recoverfp(i8* bitcast (void (%class.anon*)* @"\01??R<lambda_0>@?test_lambda@@YAXXZ@QEBAXXZ" to i8*), i8* %frame_pointer)
+// CHECK: %[[l2_i8:[^ ]*]] = call i8* @llvm.localrecover(i8* bitcast (void (%class.anon*)* @"\01??R<lambda_0>@?test_lambda@@YAXXZ@QEBAXXZ" to i8*), i8* %[[fp]], i32 0)
 // CHECK: %[[l2_ptr:[^ ]*]] = bitcast i8* %[[l2_i8]] to i32*
 // CHECK: %[[l2:[^ ]*]] = load i32, i32* %[[l2_ptr]]
 // CHECK: call i32 (i32, ...) @basic_filter(i32 %[[l2]])
diff --git a/test/CodeGenCXX/exceptions-seh.cpp b/test/CodeGenCXX/exceptions-seh.cpp
index 187ad4bc710e..abbe95be3404 100644
--- a/test/CodeGenCXX/exceptions-seh.cpp
+++ b/test/CodeGenCXX/exceptions-seh.cpp
@@ -32,13 +32,9 @@ extern "C" void use_cxx() {
 // CXXEH: ret void
 //
 // CXXEH: [[lpad]]
-// CXXEH: landingpad { i8*, i32 }
-// CXXEH-NEXT: cleanup
+// CXXEH: cleanuppad
 // CXXEH: call void @"\01??1HasCleanup@@QEAA@XZ"(%struct.HasCleanup* %{{.*}})
-// CXXEH: br label %[[resume:[^ ]*]]
-//
-// CXXEH: [[resume]]
-// CXXEH: resume
+// CXXEH: cleanupret
 
 // NOCXX-LABEL: define void @use_cxx()
 // NOCXX-NOT: invoke
@@ -64,17 +60,21 @@ extern "C" void use_seh() {
 // CHECK: invoke void @might_throw() #[[NOINLINE:[0-9]+]]
 // CHECK:       to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
 //
-// CHECK: [[cont]]
-// CHECK: br label %[[ret:[^ ]*]]
-//
 // CHECK: [[lpad]]
-// CHECK: landingpad { i8*, i32 }
-// CHECK-NEXT: catch i8*
+// CHECK-NEXT: %[[switch:.*]] = catchswitch within none [label %[[cpad:.*]]] unwind to caller
 //
-// CHECK: br label %[[ret]]
+// CHECK: [[cpad]]
+// CHECK-NEXT: catchpad within %[[switch]]
+// CHECK: catchret {{.*}} label %[[except:[^ ]*]]
+//
+// CHECK: [[except]]
+// CHECK: br label %[[ret:[^ ]*]]
 //
 // CHECK: [[ret]]
 // CHECK: ret void
+//
+// CHECK: [[cont]]
+// CHECK: br label %[[ret]]
 
 void use_seh_in_lambda() {
   ([]() {
@@ -89,7 +89,7 @@ void use_seh_in_lambda() {
 
 // CXXEH-LABEL: define void @"\01?use_seh_in_lambda@@YAXXZ"()
 // CXXEH-SAME:  personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
-// CXXEH: landingpad { i8*, i32 }
+// CXXEH: cleanuppad
 
 // NOCXX-LABEL: define void @"\01?use_seh_in_lambda@@YAXXZ"()
 // NOCXX-NOT: invoke
@@ -98,7 +98,7 @@ void use_seh_in_lambda() {
 // CHECK-LABEL: define internal void @"\01??R<lambda_0>@?use_seh_in_lambda@@YAXXZ@QEBAXXZ"(%class.anon* %this)
 // CXXEH-SAME:  personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // CHECK: invoke void @might_throw() #[[NOINLINE]]
-// CHECK: landingpad { i8*, i32 }
+// CHECK: catchpad
 
 static int my_unique_global;
 
@@ -122,8 +122,7 @@ void use_inline() {
 // CHECK-SAME:  personality i8* bitcast (i32 (...)* @__C_specific_handler to i8*)
 // CHECK: invoke void @might_throw()
 //
-// CHECK: landingpad { i8*, i32 }
-// CHECK-NEXT: catch i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_seh_in_inline_func@@" to i8*)
+// CHECK: catchpad {{.*}} [i8* bitcast (i32 (i8*, i8*)* @"\01?filt$0@0@use_seh_in_inline_func@@" to i8*)]
 //
 // CHECK: invoke void @might_throw()
 //
@@ -131,8 +130,7 @@ void use_inline() {
 // CHECK: call void @"\01?fin$0@0@use_seh_in_inline_func@@"(i8 0, i8* %[[fp]])
 // CHECK: ret void
 //
-// CHECK: landingpad { i8*, i32 }
-// CHECK-NEXT: cleanup
+// CHECK: cleanuppad
 // CHECK: %[[fp:[^ ]*]] = call i8* @llvm.localaddress()
 // CHECK: call void @"\01?fin$0@0@use_seh_in_inline_func@@"(i8 1, i8* %[[fp]])
 
diff --git a/test/CodeGenCXX/exceptions.cpp b/test/CodeGenCXX/exceptions.cpp
index e8f6c7996a16..ff76b11350db 100644
--- a/test/CodeGenCXX/exceptions.cpp
+++ b/test/CodeGenCXX/exceptions.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -emit-llvm -o - -fcxx-exceptions -fexceptions | FileCheck %s
 
-typedef typeof(sizeof(0)) size_t;
+typedef __typeof(sizeof(0)) size_t;
+
+// Declare the reserved global placement new.
+void *operator new(size_t, void*);
 
 // This just shouldn't crash.
 namespace test0 {
@@ -526,4 +529,21 @@ namespace test11 {
   //   (After this is a terminate landingpad.)
 }
 
+namespace test12 {
+  struct A {
+    void operator delete(void *, void *);
+    A();
+  };
+
+  A *test(void *ptr) {
+    return new (ptr) A();
+  }
+  // CHECK-LABEL: define {{.*}} @_ZN6test124testEPv(
+  // CHECK:       [[PTR:%.*]] = load i8*, i8*
+  // CHECK-NEXT:  [[CAST:%.*]] = bitcast i8* [[PTR]] to [[A:%.*]]*
+  // CHECK-NEXT:  invoke void @_ZN6test121AC1Ev([[A]]* [[CAST]])
+  // CHECK:       ret [[A]]* [[CAST]]
+  // CHECK:       invoke void @_ZN6test121AdlEPvS1_(i8* [[PTR]], i8* [[PTR]])
+}
+
 // CHECK: attributes [[NI_NR_NUW]] = { noinline noreturn nounwind }
diff --git a/test/CodeGenCXX/extern-c.cpp b/test/CodeGenCXX/extern-c.cpp
index e68738b9db58..5b59a38ba0d7 100644
--- a/test/CodeGenCXX/extern-c.cpp
+++ b/test/CodeGenCXX/extern-c.cpp
@@ -59,10 +59,10 @@ extern "C" {
 
   // CHECK-NOT: @unused
   // CHECK-NOT: @duplicate_internal
-  // CHECK: @internal_var = internal alias i32* @_Z12internal_var
+  // CHECK: @internal_var = internal alias i32, i32* @_Z12internal_var
   // CHECK-NOT: @unused
   // CHECK-NOT: @duplicate_internal
-  // CHECK: @internal_fn = internal alias i32 ()* @_Z11internal_fnv
+  // CHECK: @internal_fn = internal alias i32 (), i32 ()* @_Z11internal_fnv
   // CHECK-NOT: @unused
   // CHECK-NOT: @duplicate_internal
 }
diff --git a/test/CodeGenCXX/funcattrs-global-ctor-dtor.cpp b/test/CodeGenCXX/funcattrs-global-ctor-dtor.cpp
new file mode 100644
index 000000000000..b98cb24d5691
--- /dev/null
+++ b/test/CodeGenCXX/funcattrs-global-ctor-dtor.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -S -stack-protector 2 -emit-llvm -o - | FileCheck %s
+
+class A {
+ public:
+  virtual ~A() {}
+};
+
+A g;
+
+// CHECK: define internal void @__cxx_global_var_init() [[ATTR0:#[0-9]+]]
+// CHECK: define internal void @_GLOBAL__sub_I_funcattrs_global_ctor_dtor.cpp() [[ATTR0]]
+// CHECK: attributes [[ATTR0]] = {{{.*}} sspstrong {{.*}}}
diff --git a/test/CodeGenCXX/globalinit-loc.cpp b/test/CodeGenCXX/globalinit-loc.cpp
index 27120526fc30..203eb2142667 100644
--- a/test/CodeGenCXX/globalinit-loc.cpp
+++ b/test/CodeGenCXX/globalinit-loc.cpp
@@ -1,10 +1,10 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // rdar://problem/14985269.
 //
 // Verify that the global init helper function does not get associated
 // with any source location.
 //
-// CHECK: define internal {{.*}}void @_GLOBAL__sub_I_globalinit_loc.cpp
+// CHECK: define internal {{.*}}void @_GLOBAL__sub_I_globalinit_loc.cpp({{.*}} {
 // CHECK: !dbg ![[DBG:.*]]
 // CHECK: !DISubprogram(linkageName: "_GLOBAL__sub_I_globalinit_loc.cpp"
 // CHECK-NOT:           line:
diff --git a/test/CodeGenCXX/homogeneous-aggregates.cpp b/test/CodeGenCXX/homogeneous-aggregates.cpp
index fbbb1ebed465..67911c0d7f90 100644
--- a/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -91,7 +91,7 @@ struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
 // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
-// FIXME: MSVC doesn't consider this an HVA becuase of the empty base.
+// FIXME: MSVC doesn't consider this an HVA because of the empty base.
 // X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(float %a.0, float %a.1, float %a.2)
 
 struct HVAWithEmptyBitField : Float1, Float2 {
diff --git a/test/CodeGenCXX/init-invariant.cpp b/test/CodeGenCXX/init-invariant.cpp
index 8af4ae6fde46..7f348257f263 100644
--- a/test/CodeGenCXX/init-invariant.cpp
+++ b/test/CodeGenCXX/init-invariant.cpp
@@ -56,5 +56,5 @@ void e() {
 
 // CHECK-LABEL: define void @_Z1ev(
 // CHECK: call void @_ZN1AC1Ev(%struct.A* nonnull @_ZZ1evE1a)
-// CHECK: call {{.*}}@llvm.invariant.start(i64 4, i8* bitcast ({{.*}} @_ZZ1evE1a to i8*))
+// CHECK: call {{.*}}@llvm.invariant.start(i64 4, i8* nonnull bitcast ({{.*}} @_ZZ1evE1a to i8*))
 // CHECK-NOT: llvm.invariant.end
diff --git a/test/CodeGenCXX/inline-dllexport-member.cpp b/test/CodeGenCXX/inline-dllexport-member.cpp
index 4bc1d4ce633a..2160f32e0b4f 100644
--- a/test/CodeGenCXX/inline-dllexport-member.cpp
+++ b/test/CodeGenCXX/inline-dllexport-member.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple i686-windows-gnu -fms-compatibility -g -emit-llvm %s -o - \
+// RUN: %clang_cc1 -triple i686-windows-win32 -fms-extensions -debug-info-kind=limited -emit-llvm %s -o - \
 // RUN:    | FileCheck %s
 
 struct __declspec(dllexport) s {
@@ -6,6 +6,6 @@ struct __declspec(dllexport) s {
 };
 
 // CHECK: ![[SCOPE:[0-9]+]] = distinct !DICompileUnit(
-// CHECK: !DIGlobalVariable(name: "ui", linkageName: "_ZN1s2uiE", scope: ![[SCOPE]],
-// CHECK-SAME:              variable: i32* @_ZN1s2uiE
+// CHECK: !DIGlobalVariable(name: "ui", linkageName: "\01?ui@s@@2IB", scope: ![[SCOPE]],
+// CHECK-SAME:              variable: i32* @"\01?ui@s@@2IB"
 
diff --git a/test/CodeGenCXX/inline-functions.cpp b/test/CodeGenCXX/inline-functions.cpp
index 20da1f631809..f1169f91913e 100644
--- a/test/CodeGenCXX/inline-functions.cpp
+++ b/test/CodeGenCXX/inline-functions.cpp
@@ -1,12 +1,13 @@
 // RUN: %clang_cc1 %s -std=c++11 -triple=x86_64-apple-darwin10 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK --check-prefix=NORMAL
-// RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -triple=x86_64-apple-darwin10 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSVCCOMPAT
+// RUN: %clang_cc1 %s -std=c++11 -fms-compatibility -triple=x86_64-pc-win32 -emit-llvm -o - | FileCheck %s --check-prefix=CHECK --check-prefix=MSVCCOMPAT
 // CHECK: ; ModuleID 
 
 struct A {
     inline void f();
 };
 
-// CHECK-NOT: define void @_ZN1A1fEv
+// NORMAL-NOT: define void @_ZN1A1fEv
+// MSVCCOMPAT-NOT: define void @"\01?f@A@@QEAAXXZ"
 void A::f() { }
 
 template<typename> struct B { };
@@ -15,18 +16,21 @@ template<> struct B<char> {
   inline void f();
 };
 
-// CHECK-NOT: _ZN1BIcE1fEv
+// NORMAL-NOT: _ZN1BIcE1fEv
+// MSVCCOMPAT-NOT: @"\01?f@?$B@D@@QEAAXXZ"
 void B<char>::f() { }
 
 // We need a final CHECK line here.
 
-// CHECK-LABEL: define void @_Z1fv
+// NORMAL-LABEL: define void @_Z1fv
+// MSVCCOMPAT-LABEL: define void @"\01?f@@YAXXZ"
 void f() { }
 
 // <rdar://problem/8740363>
 inline void f1(int);
 
-// CHECK-LABEL: define linkonce_odr void @_Z2f1i
+// NORMAL-LABEL: define linkonce_odr void @_Z2f1i
+// MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?f1@@YAXH@Z"
 void f1(int) { }
 
 void test_f1() { f1(17); }
@@ -39,7 +43,8 @@ namespace test1 {
     void g() {}
   };
 
-  // CHECK-LABEL: define linkonce_odr void @_ZN5test11C4funcEv(
+  // NORMAL-LABEL: define linkonce_odr void @_ZN5test11C4funcEv(
+  // MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?func@C@test1@@QEAAXXZ"(
 
   class C {
   public:
@@ -66,59 +71,65 @@ namespace test2 {
     A a;
     f(a);
   }
-  // CHECK-LABEL: define linkonce_odr void @_ZN5test21fERKNS_1AE
+  // NORMAL-LABEL: define linkonce_odr void @_ZN5test21fERKNS_1AE
+  // MSVCCOMPAT-LABEL: define linkonce_odr void @"\01?f@test2@@YAXAEBUA@1@@Z"
 }
 
-// MSVCCOMPAT-LABEL: define weak_odr void @_Z17ExternAndInlineFnv
 // NORMAL-NOT: _Z17ExternAndInlineFnv
+// MSVCCOMPAT-LABEL: define weak_odr void @"\01?ExternAndInlineFn@@YAXXZ"
 extern inline void ExternAndInlineFn() {}
 
-// MSVCCOMPAT-LABEL: define weak_odr void @_Z18InlineThenExternFnv
 // NORMAL-NOT: _Z18InlineThenExternFnv
+// MSVCCOMPAT-LABEL: define weak_odr void @"\01?InlineThenExternFn@@YAXXZ"
 inline void InlineThenExternFn() {}
 extern void InlineThenExternFn();
 
-// CHECK-LABEL: define void @_Z18ExternThenInlineFnv
+// NORMAL-LABEL: define void @_Z18ExternThenInlineFnv
+// MSVCCOMPAT-LABEL: define void @"\01?ExternThenInlineFn@@YAXXZ"
 extern void ExternThenInlineFn() {}
 
-// MSVCCOMPAT-LABEL: define weak_odr void @_Z25ExternThenInlineThenDefFnv
 // NORMAL-NOT: _Z25ExternThenInlineThenDefFnv
+// MSVCCOMPAT-LABEL: define weak_odr void @"\01?ExternThenInlineThenDefFn@@YAXXZ"
 extern void ExternThenInlineThenDefFn();
 inline void ExternThenInlineThenDefFn();
 void ExternThenInlineThenDefFn() {}
 
-// MSVCCOMPAT-LABEL: define weak_odr void @_Z25InlineThenExternThenDefFnv
 // NORMAL-NOT: _Z25InlineThenExternThenDefFnv
+// MSVCCOMPAT-LABEL: define weak_odr void @"\01?InlineThenExternThenDefFn@@YAXXZ"
 inline void InlineThenExternThenDefFn();
 extern void InlineThenExternThenDefFn();
 void InlineThenExternThenDefFn() {}
 
-// MSVCCOMPAT-LABEL: define weak_odr i32 @_Z20ExternAndConstexprFnv
 // NORMAL-NOT: _Z17ExternAndConstexprFnv
+// MSVCCOMPAT-LABEL: define weak_odr i32 @"\01?ExternAndConstexprFn@@YAHXZ"
 extern constexpr int ExternAndConstexprFn() { return 0; }
 
-// CHECK-NOT: _Z11ConstexprFnv
+// NORMAL-NOT: _Z11ConstexprFnv
+// MSVCCOMPAT-NOT: @"\01?ConstexprFn@@YAHXZ"
 constexpr int ConstexprFn() { return 0; }
 
 template <typename T>
 extern inline void ExternInlineOnPrimaryTemplate(T);
 
-// CHECK-LABEL: define void @_Z29ExternInlineOnPrimaryTemplateIiEvT_
+// NORMAL-LABEL: define void @_Z29ExternInlineOnPrimaryTemplateIiEvT_
+// MSVCCOMPAT-LABEL: define void @"\01??$ExternInlineOnPrimaryTemplate@H@@YAXH@Z"
 template <>
 void ExternInlineOnPrimaryTemplate(int) {}
 
 template <typename T>
 extern inline void ExternInlineOnPrimaryTemplateAndSpecialization(T);
 
-// MSVCCOMPAT-LABEL: define weak_odr void @_Z46ExternInlineOnPrimaryTemplateAndSpecializationIiEvT_
 // NORMAL-NOT: _Z46ExternInlineOnPrimaryTemplateAndSpecializationIiEvT_
+// MSVCCOMPAT-LABEL: define weak_odr void @"\01??$ExternInlineOnPrimaryTemplateAndSpecialization@H@@YAXH@Z"
 template <>
 extern inline void ExternInlineOnPrimaryTemplateAndSpecialization(int) {}
 
 struct TypeWithInlineMethods {
-  // CHECK-NOT: _ZN21TypeWithInlineMethods9StaticFunEv
+  // NORMAL-NOT: _ZN21TypeWithInlineMethods9StaticFunEv
+  // MSVCCOMPAT-NOT: @"\01?StaticFun@TypeWithInlineMethods@@SAXXZ"
   static void StaticFun() {}
-  // CHECK-NOT: _ZN21TypeWithInlineMethods12NonStaticFunEv
+  // NORMAL-NOT: _ZN21TypeWithInlineMethods12NonStaticFunEv
+  // MSVCCOMPAT-NOT: @"\01?NonStaticFun@TypeWithInlineMethods@@QEAAXXZ"
   void NonStaticFun() { StaticFun(); }
 };
 
@@ -134,5 +145,6 @@ struct S {
 };
 
 __attribute__((used)) inline S<int> Foo() { return S<int>(); }
-// CHECK-LABEL: define linkonce_odr void @_ZN7PR229593FooEv(
+// NORMAL-LABEL: define linkonce_odr void @_ZN7PR229593FooEv(
+// MSVCCOMPAT-LABEL: define linkonce_odr i8 @"\01?Foo@PR22959@@YA?AU?$S@H@1@XZ"(
 }
diff --git a/test/CodeGenCXX/invariant.group-for-vptrs.cpp b/test/CodeGenCXX/invariant.group-for-vptrs.cpp
new file mode 100644
index 000000000000..ca737ee23fb4
--- /dev/null
+++ b/test/CodeGenCXX/invariant.group-for-vptrs.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -triple=x86_64-pc-linux-gnu -emit-llvm %s -fstrict-vtable-pointers -O1 -o - -disable-llvm-optzns | FileCheck %s
+
+struct A {
+  virtual void foo();
+};
+
+struct D : A {
+  void foo();
+};
+
+// CHECK-LABEL: define void @_Z21testExternallyVisiblev()
+void testExternallyVisible() {
+  A *a = new A;
+
+  // CHECK: load {{.*}} !invariant.group ![[A_MD:[0-9]+]]
+  a->foo();
+
+  D *d = new D;
+  // CHECK: call void @_ZN1DC1Ev(
+  // CHECK: load {{.*}} !invariant.group ![[D_MD:[0-9]+]]
+  d->foo();
+  A *a2 = d;
+  // CHECK: load {{.*}} !invariant.group ![[A_MD]]
+  a2->foo();
+}
+// CHECK-LABEL: }
+
+namespace {
+
+struct B {
+  virtual void bar();
+};
+
+struct C : B {
+  void bar();
+};
+
+}
+
+// CHECK-LABEL: define void @_Z21testInternallyVisibleb(
+void testInternallyVisible(bool p) {
+  B *b = new B;
+  // CHECK: = load {{.*}}, !invariant.group ![[B_MD:[0-9]+]]
+  b->bar();
+
+  // CHECK: call void @_ZN12_GLOBAL__N_11CC1Ev(
+  C *c = new C;
+  // CHECK: = load {{.*}}, !invariant.group ![[C_MD:[0-9]+]]
+  c->bar();
+}
+
+// Checking A::A()
+// CHECK-LABEL: define linkonce_odr void @_ZN1AC2Ev(
+// CHECK: store {{.*}}, !invariant.group ![[A_MD]]
+// CHECK-LABEL: }
+
+// Checking D::D()
+// CHECK-LABEL: define linkonce_odr void @_ZN1DC2Ev(
+// CHECK:  = call i8* @llvm.invariant.group.barrier(i8*
+// CHECK:  call void @_ZN1AC2Ev(%struct.A*
+// CHECK: store {{.*}} !invariant.group ![[D_MD]]
+
+// Checking B::B()
+// CHECK-LABEL: define internal void @_ZN12_GLOBAL__N_11BC2Ev(
+// CHECK:  store {{.*}}, !invariant.group ![[B_MD]]
+
+// Checking C::C()
+// CHECK-LABEL: define internal void @_ZN12_GLOBAL__N_11CC2Ev(
+// CHECK:  store {{.*}}, !invariant.group ![[C_MD]]
+
+// CHECK: ![[A_MD]] = !{!"_ZTS1A"}
+// CHECK: ![[D_MD]] = !{!"_ZTS1D"}
+// CHECK: ![[B_MD]] = distinct !{}
+// CHECK: ![[C_MD]] = distinct !{}
diff --git a/test/CodeGenCXX/lambda-expressions.cpp b/test/CodeGenCXX/lambda-expressions.cpp
index 28a8841b600a..2ea0561f9e92 100644
--- a/test/CodeGenCXX/lambda-expressions.cpp
+++ b/test/CodeGenCXX/lambda-expressions.cpp
@@ -81,7 +81,7 @@ int g() {
 };
 
 // PR14773
-// CHECK: [[ARRVAL:%[0-9a-zA-Z]*]] = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @_ZZ14staticarrayrefvE5array, i32 0, i64 0), align 4
+// CHECK: [[ARRVAL:%[0-9a-zA-Z]*]] = load i32, i32* getelementptr inbounds ([0 x i32], [0 x i32]* @_ZZ14staticarrayrefvE5array, i64 0, i64 0), align 4
 // CHECK-NEXT: store i32 [[ARRVAL]]
 void staticarrayref(){
   static int array[] = {};
diff --git a/test/CodeGenCXX/linetable-cleanup.cpp b/test/CodeGenCXX/linetable-cleanup.cpp
index 99aa814a9332..fbef05e53267 100644
--- a/test/CodeGenCXX/linetable-cleanup.cpp
+++ b/test/CodeGenCXX/linetable-cleanup.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 
 // Check the line numbers for cleanup code with EH in combination with
 // simple return expressions.
diff --git a/test/CodeGenCXX/linetable-eh.cpp b/test/CodeGenCXX/linetable-eh.cpp
index 219aab1bf54b..8c0a3971c599 100644
--- a/test/CodeGenCXX/linetable-eh.cpp
+++ b/test/CodeGenCXX/linetable-eh.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-macosx10.9.0 -munwind-tables -std=c++11 -fcxx-exceptions -fexceptions %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-macosx10.9.0 -munwind-tables -std=c++11 -fcxx-exceptions -fexceptions %s -o - | FileCheck %s
 
 // Test that emitting a landing pad does not affect the line table
 // entries for the code that triggered it.
diff --git a/test/CodeGenCXX/linetable-fnbegin.cpp b/test/CodeGenCXX/linetable-fnbegin.cpp
index 1f752ff0b4ec..f4cf53b0c5f5 100644
--- a/test/CodeGenCXX/linetable-fnbegin.cpp
+++ b/test/CodeGenCXX/linetable-fnbegin.cpp
@@ -1,13 +1,13 @@
-// RUN: %clang_cc1 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // Test that the line table info for Foo<T>::bar() is pointing to the
 // right header file.
 // CHECK: define{{.*}}bar
 // CHECK-NOT: define
 // CHECK: ret {{.*}}, !dbg [[DBG:.*]]
 // CHECK: [[HPP:.*]] = !DIFile(filename: "./template.hpp",
-// CHECK: [[SP:.*]] = !DISubprogram(name: "bar",
-// CHECK-SAME:                      file: [[HPP]], line: 22
-// CHECK-SAME:                      isDefinition: true
+// CHECK: [[SP:.*]] = distinct !DISubprogram(name: "bar",
+// CHECK-SAME:                               file: [[HPP]], line: 22
+// CHECK-SAME:                               isDefinition: true
 // We shouldn't need a lexical block for this function.
 // CHECK: [[DBG]] = !DILocation(line: 23, scope: [[SP]])
 
diff --git a/test/CodeGenCXX/linetable-virtual-variadic.cpp b/test/CodeGenCXX/linetable-virtual-variadic.cpp
index 115f1ae71408..8d1bf47814f8 100644
--- a/test/CodeGenCXX/linetable-virtual-variadic.cpp
+++ b/test/CodeGenCXX/linetable-virtual-variadic.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -gline-tables-only %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -debug-info-kind=line-tables-only %s -o - | FileCheck %s
 // Crasher for PR22929.
 class Base {
   virtual void VariadicFunction(...);
@@ -10,7 +10,7 @@ class Derived : public virtual Base {
 
 void Derived::VariadicFunction(...) { }
 
-// CHECK-LABEL: define void @_ZN7Derived16VariadicFunctionEz(
+// CHECK: define void @_ZN7Derived16VariadicFunctionEz({{.*}} !dbg ![[SP:[0-9]+]]
 // CHECK: ret void, !dbg ![[LOC:[0-9]+]]
 // CHECK-LABEL: define void @_ZT{{.+}}N7Derived16VariadicFunctionEz(
 // CHECK: ret void, !dbg ![[LOC:[0-9]+]]
@@ -18,6 +18,6 @@ void Derived::VariadicFunction(...) { }
 // CHECK: !llvm.dbg.cu = !{![[CU:[0-9]+]]}
 //
 // CHECK: ![[CU]] = distinct !DICompileUnit({{.*}} subprograms: ![[SPs:[0-9]+]]
-// CHECK: ![[SPs]] = !{![[SP:[0-9]+]]}
-// CHECK: ![[SP]] = !DISubprogram(name: "VariadicFunction",{{.*}} function: {{[^:]+}} @_ZN7Derived16VariadicFunctionEz
+// CHECK: ![[SPs]] = !{![[SP]]}
+// CHECK: ![[SP]] = distinct !DISubprogram(name: "VariadicFunction"
 // CHECK: ![[LOC]] = !DILocation({{.*}}scope: ![[SP]])
diff --git a/test/CodeGenCXX/lpad-linetable.cpp b/test/CodeGenCXX/lpad-linetable.cpp
index 7f1d2214d8fc..69236693c9c2 100644
--- a/test/CodeGenCXX/lpad-linetable.cpp
+++ b/test/CodeGenCXX/lpad-linetable.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1  -fcxx-exceptions -fexceptions -emit-llvm -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1  -fcxx-exceptions -fexceptions -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // The landing pad should have the line number of the closing brace of the function.
 // rdar://problem/13888152
 // CHECK: ret i32
diff --git a/test/CodeGenCXX/main-norecurse.cpp b/test/CodeGenCXX/main-norecurse.cpp
new file mode 100644
index 000000000000..0f3640260962
--- /dev/null
+++ b/test/CodeGenCXX/main-norecurse.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: define {{.*}} @main({{.*}}) #0
+int main(int argc, char **argv) {
+    return 1;
+}
+
+// CHECK: attributes #0 = { norecurse{{.*}} }
diff --git a/test/CodeGenCXX/mangle-literal-suffix.cpp b/test/CodeGenCXX/mangle-literal-suffix.cpp
index ab557d5a1bfa..d3ca9ffc378d 100644
--- a/test/CodeGenCXX/mangle-literal-suffix.cpp
+++ b/test/CodeGenCXX/mangle-literal-suffix.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple mips-none-none -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple mips-none-none -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=FP64
+// RUN: %clang_cc1 -triple powerpc64-none-none -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=FP128
 
 template <class T> void g3(char (&buffer)[sizeof(T() + 5.0)]) {}
 template void g3<int>(char (&)[sizeof(double)]);
@@ -6,7 +7,8 @@ template void g3<int>(char (&)[sizeof(double)]);
 
 template <class T> void g4(char (&buffer)[sizeof(T() + 5.0L)]) {}
 template void g4<int>(char (&)[sizeof(long double)]);
-// CHECK: _Z2g4IiEvRAszplcvT__ELe4014000000000000E_c
+// FP64: _Z2g4IiEvRAszplcvT__ELe4014000000000000E_c
+// FP128: _Z2g4IiEvRAszplcvT__ELg00000000000000004014000000000000E_c
 
 template <class T> void g5(char (&buffer)[sizeof(T() + 5)]) {}
 template void g5<int>(char (&)[sizeof(int)]);
diff --git a/test/CodeGenCXX/mangle-ms-cxx11.cpp b/test/CodeGenCXX/mangle-ms-cxx11.cpp
index 3f4075f7f372..999def87fc5e 100644
--- a/test/CodeGenCXX/mangle-ms-cxx11.cpp
+++ b/test/CodeGenCXX/mangle-ms-cxx11.cpp
@@ -243,3 +243,46 @@ void f() {}
 template void f<AliasA>();
 // CHECK-DAG: @"\01??$f@$$YAliasA@PR20047@@@PR20047@@YAXXZ"
 }
+
+namespace UnnamedType {
+struct A {
+  struct {} *TD;
+};
+
+void f(decltype(*A::TD)) {}
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXAAU<unnamed-type-TD>@A@1@@Z"
+
+template <typename T>
+struct B {
+  enum {
+  } *e;
+};
+
+void f(decltype(B<int>::e)) {}
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXPAW4<unnamed-type-e>@?$B@H@1@@Z
+}
+
+namespace PR24651 {
+template <typename T>
+void f(T) {}
+
+void g() {
+  enum {} E;
+  f(E);
+  {
+    enum {} E;
+    f(E);
+  }
+}
+// CHECK-DAG: @"\01??$f@W4<unnamed-type-E>@?1??g@PR24651@@YAXXZ@@PR24651@@YAXW4<unnamed-type-E>@?1??g@0@YAXXZ@@Z"
+// CHECK-DAG: @"\01??$f@W4<unnamed-type-E>@?2??g@PR24651@@YAXXZ@@PR24651@@YAXW4<unnamed-type-E>@?2??g@0@YAXXZ@@Z"
+}
+
+namespace PR18204 {
+template <typename T>
+int f(T *);
+static union {
+  int n = f(this);
+};
+// CHECK-DAG: @"\01??$f@T<unnamed-type-$S1>@PR18204@@@PR18204@@YAHPAT<unnamed-type-$S1>@0@@Z"
+}
diff --git a/test/CodeGenCXX/mangle-ms-cxx14.cpp b/test/CodeGenCXX/mangle-ms-cxx14.cpp
index 51627dd89e19..9d30c406c8c7 100644
--- a/test/CodeGenCXX/mangle-ms-cxx14.cpp
+++ b/test/CodeGenCXX/mangle-ms-cxx14.cpp
@@ -3,27 +3,27 @@
 
 template <typename> int x = 0;
 
-// CHECK: "\01??$x@X@@3HA"
+// CHECK-DAG: "\01??$x@X@@3HA"
 template <> int x<void>;
-// CHECK: "\01??$x@H@@3HA"
+// CHECK-DAG: "\01??$x@H@@3HA"
 template <> int x<int>;
 
-// CHECK: "\01?FunctionWithLocalType@@YA?A?<auto>@@XZ"
+// CHECK-DAG: "\01?FunctionWithLocalType@@YA?A?<auto>@@XZ"
 auto FunctionWithLocalType() {
   struct LocalType {};
   return LocalType{};
 }
 
-// CHECK: "\01?ValueFromFunctionWithLocalType@@3ULocalType@?1??FunctionWithLocalType@@YA?A?<auto>@@XZ@A"
+// CHECK-DAG: "\01?ValueFromFunctionWithLocalType@@3ULocalType@?1??FunctionWithLocalType@@YA?A?<auto>@@XZ@A"
 auto ValueFromFunctionWithLocalType = FunctionWithLocalType();
 
-// CHECK: "\01??R<lambda_0>@@QBE?A?<auto>@@XZ"
+// CHECK-DAG: "\01??R<lambda_0>@@QBE?A?<auto>@@XZ"
 auto LambdaWithLocalType = [] {
   struct LocalType {};
   return LocalType{};
 };
 
-// CHECK: "\01?ValueFromLambdaWithLocalType@@3ULocalType@?1???R<lambda_0>@@QBE?A?<auto>@@XZ@A"
+// CHECK-DAG: "\01?ValueFromLambdaWithLocalType@@3ULocalType@?1???R<lambda_0>@@QBE?A?<auto>@@XZ@A"
 auto ValueFromLambdaWithLocalType = LambdaWithLocalType();
 
 template <typename T>
@@ -39,6 +39,19 @@ auto TemplateFuncionWithLocalLambda(T) {
 // MSVC2013-DAG: "\01?ValueFromTemplateFuncionWithLocalLambda@@3ULocalType@?2???R<lambda_1>@??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z@QBE?A?3@XZ@A"
 // MSVC2015-DAG: "\01?ValueFromTemplateFuncionWithLocalLambda@@3ULocalType@?1???R<lambda_1>@??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z@QBE?A?3@XZ@A"
 // MSVC2015-DAG: "\01?ValueFromTemplateFuncionWithLocalLambda@@3ULocalType@?1???R<lambda_1>@??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z@QBE?A?3@XZ@A"
-// CHECK: "\01??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z"
-// CHECK: "\01??R<lambda_1>@??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z@QBE?A?1@XZ"
+// CHECK-DAG: "\01??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z"
+// CHECK-DAG: "\01??R<lambda_1>@??$TemplateFuncionWithLocalLambda@H@@YA?A?<auto>@@H@Z@QBE?A?1@XZ"
 auto ValueFromTemplateFuncionWithLocalLambda = TemplateFuncionWithLocalLambda(0);
+
+struct S;
+template <int S::*>
+int WithPMD = 0;
+
+template <> int WithPMD<nullptr>;
+// CHECK-DAG: "\01??$WithPMD@$GA@A@?0@@3HA"
+
+template <const int *, const int *>
+struct Foo {};
+
+Foo<&x<int>, &x<int>> Zoo;
+// CHECK-DAG: "\01?Zoo@@3U?$Foo@$1??$x@H@@3HA$1?1@3HA@@A"
diff --git a/test/CodeGenCXX/mangle-ms-vector-types.cpp b/test/CodeGenCXX/mangle-ms-vector-types.cpp
index aca492918a1e..53a1a43fe4af 100644
--- a/test/CodeGenCXX/mangle-ms-vector-types.cpp
+++ b/test/CodeGenCXX/mangle-ms-vector-types.cpp
@@ -27,7 +27,11 @@ void foo256i(__m256i) {}
 
 // We have a custom mangling for vector types not standardized by Intel.
 void foov8hi(__v8hi) {}
-// CHECK: define void @"\01?foov8hi@@YAXT__clang_vec8_F@@@Z"
+// CHECK: define void @"\01?foov8hi@@YAXT?$__vector@F$07@__clang@@@Z"
+
+typedef __attribute__((ext_vector_type(4))) int vi4b;
+void foovi4b(vi4b) {}
+// CHECK: define void @"\01?foovi4b@@YAXT?$__vector@H$03@__clang@@@Z"
 
 // Clang does not support vectors of complex types, so we can't test the
 // mangling of them.
diff --git a/test/CodeGenCXX/mangle-ms.cpp b/test/CodeGenCXX/mangle-ms.cpp
index 0da5c6f1f8df..c2a311423a9d 100644
--- a/test/CodeGenCXX/mangle-ms.cpp
+++ b/test/CodeGenCXX/mangle-ms.cpp
@@ -21,6 +21,10 @@ int _c(void) {return N::anonymous + c;}
 // CHECK-DAG: @"\01?_c@@YAHXZ"
 // X64-DAG:   @"\01?_c@@YAHXZ"
 
+const int &NeedsReferenceTemporary = 2;
+// CHECK-DAG: @"\01?NeedsReferenceTemporary@@3ABHB" = constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3ABHB"
+// X64-DAG: @"\01?NeedsReferenceTemporary@@3AEBHEB" = constant i32* @"\01?$RT1@NeedsReferenceTemporary@@3AEBHEB"
+
 class foo {
   static const short d;
 // CHECK-DAG: @"\01?d@foo@@0FB"
@@ -389,3 +393,64 @@ template void fn_tmpl<extern_c_func>();
 
 extern "C" void __attribute__((overloadable)) overloaded_fn() {}
 // CHECK-DAG: @"\01?overloaded_fn@@$$J0YAXXZ"
+
+namespace UnnamedType {
+struct S {
+  typedef struct {} *T1[1];
+  typedef struct {} T2;
+  typedef struct {} *T3, T4;
+  using T5 = struct {};
+  using T6 = struct {} *;
+};
+void f(S::T1) {}
+void f(S::T2) {}
+void f(S::T3) {}
+void f(S::T4) {}
+void f(S::T5) {}
+void f(S::T6) {}
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXQAPAU<unnamed-type-T1>@S@1@@Z"
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXUT2@S@1@@Z"
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXPAUT4@S@1@@Z"
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXUT4@S@1@@Z"
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXUT5@S@1@@Z"
+// CHECK-DAG: @"\01?f@UnnamedType@@YAXPAU<unnamed-type-T6>@S@1@@Z"
+
+// X64-DAG: @"\01?f@UnnamedType@@YAXQEAPEAU<unnamed-type-T1>@S@1@@Z"
+// X64-DAG: @"\01?f@UnnamedType@@YAXUT2@S@1@@Z"
+// X64-DAG: @"\01?f@UnnamedType@@YAXPEAUT4@S@1@@Z"(%"struct.UnnamedType::S::T4"
+// X64-DAG: @"\01?f@UnnamedType@@YAXUT4@S@1@@Z"
+// X64-DAG: @"\01?f@UnnamedType@@YAXUT5@S@1@@Z"
+// X64-DAG: @"\01?f@UnnamedType@@YAXPEAU<unnamed-type-T6>@S@1@@Z"
+}
+
+namespace PassObjectSize {
+// NOTE: This mangling is subject to change.
+// Reiterating from the comment in MicrosoftMangle, the scheme is pretend a
+// parameter of type __clang::__pass_object_sizeN exists after each pass object
+// size param P, where N is the Type of the pass_object_size attribute on P.
+//
+// e.g. we want to mangle:
+//   void foo(void *const __attribute__((pass_object_size(0))));
+// as if it were
+//   namespace __clang { enum __pass_object_size0 : size_t {}; }
+//   void foo(void *const, __clang::__pass_object_size0);
+// where __clang is a top-level namespace.
+
+// CHECK-DAG: define i32 @"\01?foo@PassObjectSize@@YAHQAHW4__pass_object_size0@__clang@@@Z"
+int foo(int *const i __attribute__((pass_object_size(0)))) { return 0; }
+// CHECK-DAG: define i32 @"\01?bar@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@@Z"
+int bar(int *const i __attribute__((pass_object_size(1)))) { return 0; }
+// CHECK-DAG: define i32 @"\01?qux@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@0W4__pass_object_size0@3@@Z"
+int qux(int *const i __attribute__((pass_object_size(1))), int *const j __attribute__((pass_object_size(0)))) { return 0; }
+// CHECK-DAG: define i32 @"\01?zot@PassObjectSize@@YAHQAHW4__pass_object_size1@__clang@@01@Z"
+int zot(int *const i __attribute__((pass_object_size(1))), int *const j __attribute__((pass_object_size(1)))) { return 0; }
+}
+
+namespace Atomic {
+// CHECK-DAG: define void @"\01?f@Atomic@@YAXU?$_Atomic@H@__clang@@@Z"(
+void f(_Atomic(int)) {}
+}
+namespace Complex {
+// CHECK-DAG: define void @"\01?f@Complex@@YAXU?$_Complex@H@__clang@@@Z"(
+void f(_Complex int) {}
+}
diff --git a/test/CodeGenCXX/mangle-variadic-templates.cpp b/test/CodeGenCXX/mangle-variadic-templates.cpp
index 264cc113cd57..d2c1b7726590 100644
--- a/test/CodeGenCXX/mangle-variadic-templates.cpp
+++ b/test/CodeGenCXX/mangle-variadic-templates.cpp
@@ -3,11 +3,12 @@
 template<unsigned I, typename ...Types>
 struct X { };
 
-template<typename T> struct identity { };
+template<typename T> struct identity { using type = T; };
 template<typename T> struct add_reference;
 template<typename ...Types> struct tuple { };
 template<int ...Values> struct int_tuple { };
 template<template<typename> class ...Templates> struct template_tuple { };
+template<typename ...T> using ArrayOfN = int[sizeof...(T)];
 
 // CHECK-LABEL: define weak_odr void @_Z2f0IJEEv1XIXsZT_EJDpRT_EE
 template<typename ...Types>
@@ -65,3 +66,12 @@ template<template<typename> class ...Templates>
 template_tuple<Templates...> f7() {}
 // CHECK-LABEL: define weak_odr void @_Z2f7IJ8identity13add_referenceEE14template_tupleIJDpT_EEv
 template template_tuple<identity, add_reference> f7();
+
+template<typename T, typename ...U> void f8(ArrayOfN<int, U..., T, typename U::type...>&) {}
+// CHECK-LABEL: define weak_odr void @_Z2f8IiJ8identityIiES0_IfEEEvRAsPiDpT0_T_DpNS3_4typeEE_i
+template void f8<int, identity<int>, identity<float>>(int (&)[6]);
+
+template<typename ...T> void f10(ArrayOfN<T...> &) {}
+// FIXME: This is wrong; should be @_Z3f10IJifEEvRAsZT__i
+// CHECK-LABEL: define weak_odr void @_Z3f10IJifEEvRAsPDpT_E_i
+template void f10<int, float>(int (&)[2]);
diff --git a/test/CodeGenCXX/member-alignment.cpp b/test/CodeGenCXX/member-alignment.cpp
index 43ed5e28e884..ff6bb442b847 100644
--- a/test/CodeGenCXX/member-alignment.cpp
+++ b/test/CodeGenCXX/member-alignment.cpp
@@ -1,4 +1,9 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple %s -o - | \
+// RUN: FileCheck -check-prefix CHECK-ITANIUM %s
+// RUN: %clang_cc1 -emit-llvm -triple wasm32-unknown-unknown %s -o - | \
+// RUN: FileCheck -check-prefix CHECK-WEBASSEMBLY32 %s
+// RUN: %clang_cc1 -emit-llvm -triple wasm64-unknown-unknown %s -o - | \
+// RUN: FileCheck -check-prefix CHECK-WEBASSEMBLY64 %s
 
 // rdar://7268289
 
@@ -10,10 +15,14 @@ public:
 
 void
 t::bar(void) {
-// CHECK: _ZN1t3barEv{{.*}} align 2
+// CHECK-ITANIUM: @_ZN1t3barEv({{.*}}) #0 align 2 {
+// CHECK-WEBASSEMBLY32: @_ZN1t3barEv({{.*}}) #0 {
+// CHECK-WEBASSEMBLY64: @_ZN1t3barEv({{.*}}) #0 {
 }
 
 void
 t::foo(void) {
-// CHECK: _ZN1t3fooEv{{.*}} align 2
+// CHECK-ITANIUM: @_ZN1t3fooEv({{.*}}) unnamed_addr #0 align 2 {
+// CHECK-WEBASSEMBLY32: @_ZN1t3fooEv({{.*}}) unnamed_addr #0 {
+// CHECK-WEBASSEMBLY64: @_ZN1t3fooEv({{.*}}) unnamed_addr #0 {
 }
diff --git a/test/CodeGenCXX/member-function-pointers.cpp b/test/CodeGenCXX/member-function-pointers.cpp
index 7ffe4cd9d284..faeb8550c3c4 100644
--- a/test/CodeGenCXX/member-function-pointers.cpp
+++ b/test/CodeGenCXX/member-function-pointers.cpp
@@ -8,6 +8,8 @@
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=le32-unknown-nacl | FileCheck -check-prefix GLOBAL-ARM %s
 // MIPS uses the same representation of method pointers as ARM.
 // RUN: %clang_cc1 %s -emit-llvm -o - -triple=mips-unknown-linux-gnu | FileCheck -check-prefix GLOBAL-ARM %s
+// WebAssembly uses the same representation of method pointers as ARM.
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple=wasm32-unknown-unknown | FileCheck -check-prefix GLOBAL-ARM %s
 
 struct A { int a; void f(); virtual void vf1(); virtual void vf2(); };
 struct B { int b; virtual void g(); };
diff --git a/test/CodeGenCXX/member-initializers.cpp b/test/CodeGenCXX/member-initializers.cpp
deleted file mode 100644
index c98e6bf92362..000000000000
--- a/test/CodeGenCXX/member-initializers.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-apple-darwin10 -O3 | FileCheck %s
-
-struct A {
-  virtual int f() { return 1; }
-};
-
-struct B : A {
-  B() : i(f()) { }
-  
-  virtual int f() { return 2; }
-  
-  int i;
-};
-
-// CHECK-LABEL: define i32 @_Z1fv() #0
-int f() {
-  B b;
-  
-  // CHECK: ret i32 2
-  return b.i;
-}
-
-// Test that we don't try to fold the default value of j when initializing i.
-// CHECK: define i32 @_Z9test_foldv() [[NUW_RN:#[0-9]+]]
-int test_fold() {
-  struct A {
-    A(const int j = 1) : i(j) { } 
-    int i;
-  };
-
-  // CHECK: ret i32 2
-  return A(2).i;
-}
-
-// CHECK: attributes [[NUW_RN]] = { nounwind readnone{{.*}} }
diff --git a/test/CodeGenCXX/microsoft-abi-arg-order.cpp b/test/CodeGenCXX/microsoft-abi-arg-order.cpp
index cbef1045080e..68c141f1620f 100644
--- a/test/CodeGenCXX/microsoft-abi-arg-order.cpp
+++ b/test/CodeGenCXX/microsoft-abi-arg-order.cpp
@@ -42,18 +42,19 @@ void call_foo() {
 // X86: call i8* @llvm.stacksave()
 // X86: %[[argmem:[^ ]*]] = alloca inalloca [[argmem_ty]]
 // X86: %[[arg3:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 2
-// X86: invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@H@Z"(%struct.A* %[[arg3]], i32 3)
+// X86: call x86_thiscallcc %struct.A* @"\01??0A@@QAE@H@Z"(%struct.A* %[[arg3]], i32 3)
 // X86: %[[arg2:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 1
 // X86: invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@H@Z"(%struct.A* %[[arg2]], i32 2)
 // X86: %[[arg1:[^ ]*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 0
 // X86: invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@H@Z"(%struct.A* %[[arg1]], i32 1)
-// X86: invoke void @"\01?foo@@YAXUA@@00@Z"([[argmem_ty]]* inalloca %[[argmem]])
+// X86: call void @"\01?foo@@YAXUA@@00@Z"([[argmem_ty]]* inalloca %[[argmem]])
 // X86: call void @llvm.stackrestore
 // X86: ret void
 //
 //   lpad2:
+// X86: cleanuppad within none []
 // X86: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[arg2]])
-// X86: br label
+// X86: cleanupret
 //
 //   ehcleanup:
 // X86: call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[arg3]])
@@ -67,8 +68,9 @@ void call_foo() {
 // X64: ret void
 //
 //   lpad2:
+// X64: cleanuppad within none []
 // X64: call void @"\01??1A@@QEAA@XZ"(%struct.A* %[[arg2]])
-// X64: br label
+// X64: cleanupret
 //
 //   ehcleanup:
 // X64: call void @"\01??1A@@QEAA@XZ"(%struct.A* %[[arg3]])
diff --git a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
index 62ead4fb69d3..75c0621347a5 100644
--- a/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
+++ b/test/CodeGenCXX/microsoft-abi-array-cookies.cpp
@@ -28,12 +28,12 @@ void check_array_cookies_simple() {
 // 46 = 42 + size of cookie (4)
 // CHECK: [[COOKIE:%.*]] = bitcast i8* [[ALLOCATED]] to i32*
 // CHECK: store i32 42, i32* [[COOKIE]]
-// CHECK: [[ARRAY:%.*]] = getelementptr inbounds i8, i8* [[ALLOCATED]], i64 4
+// CHECK: [[ARRAY:%.*]] = getelementptr inbounds i8, i8* [[ALLOCATED]], i32 4
 // CHECK: bitcast i8* [[ARRAY]] to [[CLASS:%.*]]*
 
   delete [] array;
 // CHECK: [[ARRAY_AS_CHAR:%.*]] = bitcast [[CLASS]]* {{%.*}} to i8*
-// CHECK: getelementptr inbounds i8, i8* [[ARRAY_AS_CHAR]], i64 -4
+// CHECK: getelementptr inbounds i8, i8* [[ARRAY_AS_CHAR]], i32 -4
 }
 
 struct __attribute__((aligned(8))) ClassWithAlignment {
@@ -50,12 +50,12 @@ void check_array_cookies_aligned() {
 //   344 = 42*8 + size of cookie (8, due to alignment)
 // CHECK: [[COOKIE:%.*]] = bitcast i8* [[ALLOCATED]] to i32*
 // CHECK: store i32 42, i32* [[COOKIE]]
-// CHECK: [[ARRAY:%.*]] = getelementptr inbounds i8, i8* [[ALLOCATED]], i64 8
+// CHECK: [[ARRAY:%.*]] = getelementptr inbounds i8, i8* [[ALLOCATED]], i32 8
 // CHECK: bitcast i8* [[ARRAY]] to [[CLASS:%.*]]*
 
   delete [] array;
 // CHECK: [[ARRAY_AS_CHAR:%.*]] = bitcast [[CLASS]]*
-// CHECK: getelementptr inbounds i8, i8* [[ARRAY_AS_CHAR]], i64 -8
+// CHECK: getelementptr inbounds i8, i8* [[ARRAY_AS_CHAR]], i32 -8
 }
 
 namespace PR23990 {
diff --git a/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp b/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
index 0fef6255ada9..e9eba6ed0b20 100644
--- a/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
+++ b/test/CodeGenCXX/microsoft-abi-dynamic-cast.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -O1 -o - -triple=i386-pc-win32 %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -O1 -o - -fexceptions -triple=i386-pc-win32 %s | FileCheck %s
 
 struct S { char a; };
 struct V { virtual void f(); };
@@ -60,7 +60,7 @@ T* test5(A* x) { return dynamic_cast<T*>(x); }
 // CHECK-NEXT:   [[VBOFFP:%.*]] = getelementptr inbounds i32, i32* [[VBTBL]], i32 1
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[VOIDP]], i32 [[VBOFFS]]
-// CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 0)
+// CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[VBOFFS]], i8* nonnull bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8" to i8*), i8* nonnull bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 0)
 // CHECK-NEXT:   [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T*
 // CHECK-NEXT:   br label
 // CHECK:        [[RET:%.*]] = phi %struct.T*
@@ -78,7 +78,7 @@ T* test6(B* x) { return dynamic_cast<T*>(x); }
 // CHECK-NEXT:   [[VBOFFS:%.*]] = load i32, i32* [[VBOFFP]], align 4
 // CHECK-NEXT:   [[DELTA:%.*]] = add nsw i32 [[VBOFFS]], 4
 // CHECK-NEXT:   [[ADJ:%.*]] = getelementptr inbounds i8, i8* [[CAST]], i32 [[DELTA]]
-// CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUB@@@8" to i8*), i8* bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 0)
+// CHECK-NEXT:   [[CALL:%.*]] = tail call i8* @__RTDynamicCast(i8* [[ADJ]], i32 [[DELTA]], i8* nonnull bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUB@@@8" to i8*), i8* nonnull bitcast (%rtti.TypeDescriptor7* @"\01??_R0?AUT@@@8" to i8*), i32 0)
 // CHECK-NEXT:   [[RES:%.*]] = bitcast i8* [[CALL]] to %struct.T*
 // CHECK-NEXT:   br label
 // CHECK:        [[RET:%.*]] = phi %struct.T*
@@ -122,3 +122,22 @@ void* test9(B* x) { return dynamic_cast<void*>(x); }
 // CHECK:        [[RET:%.*]] = phi i8*
 // CHECK-NEXT:   ret i8* [[RET]]
 
+namespace PR25606 {
+struct Cleanup {
+  ~Cleanup();
+};
+struct S1 { virtual ~S1(); };
+struct S2 : virtual S1 {};
+struct S3 : S2 {};
+
+S3 *f(S2 &s) {
+  Cleanup c;
+  return dynamic_cast<S3 *>(&s);
+}
+// CHECK-LABEL: define %"struct.PR25606::S3"* @"\01?f@PR25606@@YAPAUS3@1@AAUS2@1@@Z"(
+// CHECK:    [[CALL:%.*]] = invoke i8* @__RTDynamicCast
+
+// CHECK:    [[BC:%.*]] = bitcast i8* [[CALL]] to %"struct.PR25606::S3"*
+// CHECK:    call x86_thiscallcc void @"\01??_DCleanup@PR25606@@QAE@XZ"(
+// CHECK:    ret %"struct.PR25606::S3"* [[BC]]
+}
diff --git a/test/CodeGenCXX/microsoft-abi-eh-catch.cpp b/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
index d7268bf38529..69ec34754813 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-catch.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple=x86_64-pc-windows-msvc -mconstructor-aliases -fexceptions -fcxx-exceptions | FileCheck -check-prefix WIN64 %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple=x86_64-pc-windows-msvc \
+// RUN:     -mconstructor-aliases -fexceptions -fcxx-exceptions -fnew-ms-eh \
+// RUN:     -O1 -disable-llvm-optzns \
+// RUN:     | FileCheck -check-prefix WIN64 %s
 
 extern "C" void might_throw();
 
@@ -16,21 +19,24 @@ extern "C" void catch_all() {
 
 // WIN64-LABEL: define void @catch_all()
 // WIN64: invoke void @might_throw()
-// WIN64-NEXT: to label %[[cont:[^ ]*]] unwind label %[[lpad:[^ ]*]]
+// WIN64-NEXT: to label %[[cont:[^ ]*]] unwind label %[[catchswitch_lpad:[^ ]*]]
 //
-// WIN64: [[cont]]
-// WIN64: br label %[[ret:[^ ]*]]
+// WIN64: [[catchswitch_lpad]]
+// WIN64: %[[catchswitch:[^ ]*]] = catchswitch within none [label %[[catchpad_lpad:[^ ]*]]] unwind to caller
 //
-// WIN64: [[lpad]]
-// WIN64: landingpad { i8*, i32 }
-// WIN64-NEXT: catch i8* null
-// WIN64: call void @llvm.eh.begincatch(i8* %{{[^,]*}}, i8* null)
+// WIN64: [[catchpad_lpad]]
+// WIN64: catchpad within %[[catchswitch]] [i8* null, i32 64, i8* null]
 // WIN64: call void @recover()
-// WIN64: call void @llvm.eh.endcatch()
-// WIN64: br label %[[ret]]
+// WIN64: catchret from %{{.*}} to label %[[catchret:[^ ]*]]
+//
+// WIN64: [[catchret]]
+// WIN64-NEXT: br label %[[ret:[^ ]*]]
 //
 // WIN64: [[ret]]
 // WIN64: ret void
+//
+// WIN64: [[cont]]
+// WIN64: br label %[[ret]]
 
 extern "C" void catch_int() {
   try {
@@ -41,12 +47,19 @@ extern "C" void catch_int() {
 }
 
 // WIN64-LABEL: define void @catch_int()
-// WIN64: landingpad { i8*, i32 }
-// WIN64: %[[e_i8:[^ ]*]] = bitcast i32* %[[e_addr:[^ ]*]] to i8*
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* %[[e_i8]])
+// WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i32* %[[e_addr:[^\]]*]]]
+//
+// The catchpad instruction starts the lifetime of 'e'. Unfortunately, that
+// leaves us with nowhere to put lifetime.start, so we don't emit lifetime
+// markers for now.
+// WIN64-NOT: lifetime.start
+//
 // WIN64: %[[e_i8:[^ ]*]] = bitcast i32* %[[e_addr]] to i8*
-// WIN64: call void @handle_exception(i8* %[[e_i8]])
-// WIN64: call void @llvm.eh.endcatch()
+// WIN64-NOT: lifetime.start
+// WIN64: call void @handle_exception
+// WIN64-SAME: (i8* %[[e_i8]])
+// WIN64-NOT: lifetime.end
+// WIN64: catchret
 
 extern "C" void catch_int_unnamed() {
   try {
@@ -56,9 +69,8 @@ extern "C" void catch_int_unnamed() {
 }
 
 // WIN64-LABEL: define void @catch_int_unnamed()
-// WIN64: landingpad { i8*, i32 }
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* null)
-// WIN64: call void @llvm.eh.endcatch()
+// WIN64: catchpad within %{{.*}} [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+// WIN64: catchret
 
 struct A {
   A();
@@ -84,12 +96,10 @@ extern "C" void catch_a_byval() {
 
 // WIN64-LABEL: define void @catch_a_byval()
 // WIN64: %[[e_addr:[^ ]*]] = alloca %struct.A
-// WIN64: landingpad { i8*, i32 }
-// WIN64: %[[e_i8:[^ ]*]] = bitcast %struct.A* %[[e_addr]] to i8*
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* %[[e_i8]])
+// WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 0, %struct.A* %[[e_addr]]]
 // WIN64: %[[e_i8:[^ ]*]] = bitcast %struct.A* %[[e_addr]] to i8*
 // WIN64: call void @handle_exception(i8* %[[e_i8]])
-// WIN64: call void @llvm.eh.endcatch()
+// WIN64: catchret
 
 extern "C" void catch_a_ref() {
   try {
@@ -101,13 +111,11 @@ extern "C" void catch_a_ref() {
 
 // WIN64-LABEL: define void @catch_a_ref()
 // WIN64: %[[e_addr:[^ ]*]] = alloca %struct.A*
-// WIN64: landingpad { i8*, i32 }
-// WIN64: %[[e_i8:[^ ]*]] = bitcast %struct.A** %[[e_addr]] to i8*
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* %[[e_i8]])
+// WIN64: catchpad within %{{[^ ]*}} [%rtti.TypeDescriptor7* @"\01??_R0?AUA@@@8", i32 8, %struct.A** %[[e_addr]]]
 // WIN64: %[[eptr:[^ ]*]] = load %struct.A*, %struct.A** %[[e_addr]]
 // WIN64: %[[eptr_i8:[^ ]*]] = bitcast %struct.A* %[[eptr]] to i8*
 // WIN64: call void @handle_exception(i8* %[[eptr_i8]])
-// WIN64: call void @llvm.eh.endcatch()
+// WIN64: catchret
 
 extern "C" void fn_with_exc_spec() throw(int) {
   might_throw();
@@ -131,24 +139,26 @@ extern "C" void catch_nested() {
 
 // WIN64-LABEL: define void @catch_nested()
 // WIN64: invoke void @might_throw()
-// WIN64-NEXT: to label %[[cont1:[^ ]*]] unwind label %[[lp1:[^ ]*]]
-// WIN64: [[cont1]]
+// WIN64-NEXT: to label %{{.*}} unwind label %[[catchswitch_outer:[^ ]*]]
 //
-// WIN64: [[lp1]]
-// WIN64: landingpad { i8*, i32 }
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* null)
+// WIN64: [[catchswitch_outer]]
+// WIN64: %[[catchswitch_outer_scope:[^ ]*]] = catchswitch within none [label %[[catch_int_outer:[^ ]*]]] unwind to caller
+//
+// WIN64: [[catch_int_outer]]
+// WIN64: %[[catchpad:[^ ]*]] = catchpad within %[[catchswitch_outer_scope]] [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
 // WIN64: invoke void @might_throw()
-// WIN64-NEXT: to label %[[cont2:[^ ]*]] unwind label %[[lp2:[^ ]*]]
+// WIN64-NEXT: to label %[[cont2:[^ ]*]] unwind label %[[catchswitch_inner:[^ ]*]]
 //
-// WIN64: [[cont2]]
-// WIN64-NEXT: br label %[[trycont:[^ ]*]]
+// WIN64: [[catchswitch_inner]]
+// WIN64: %[[catchswitch_inner_scope:[^ ]*]] = catchswitch within %[[catchpad]] [label %[[catch_int_inner:[^ ]*]]] unwind to caller
 //
-// WIN64: [[lp2]]
-// WIN64: landingpad { i8*, i32 }
-// WIN64: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* null)
+// WIN64: [[catch_int_inner]]
+// WIN64: catchpad within %[[catchswitch_inner_scope]] [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
 // WIN64-NEXT: call void @might_throw()
-// WIN64-NEXT: call void @llvm.eh.endcatch()
-// WIN64-NEXT: br label %[[trycont]]
+// WIN64: catchret {{.*}} to label %[[catchret2:[^ ]*]]
 //
-// WIN64: [[trycont]]
-// WIN64: call void @llvm.eh.endcatch()
+// WIN64: [[catchret2]]
+// WIN64: catchret {{.*}} to label %[[mainret:[^ ]*]]
+//
+// WIN64: [[mainret]]
+// WIN64: ret void
diff --git a/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp b/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
index 68f1430ace57..bf05c693ec0d 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 -mconstructor-aliases -fexceptions -fcxx-exceptions -fno-rtti | FileCheck -check-prefix WIN32 %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm %s -o - -triple=i386-pc-win32 -mconstructor-aliases -fexceptions -fcxx-exceptions -fno-rtti | FileCheck -check-prefix WIN32 %s
 
 struct A {
   A();
@@ -17,18 +17,18 @@ void HasEHCleanup() {
 // WIN32-LABEL: define void @"\01?HasEHCleanup@@YAXXZ"() {{.*}} {
 // WIN32:   %[[base:.*]] = call i8* @llvm.stacksave()
 //    If this call throws, we have to restore the stack.
-// WIN32:   invoke void @"\01?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
+// WIN32:   call void @"\01?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
 //    If this call throws, we have to cleanup the first temporary.
 // WIN32:   invoke void @"\01?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
 //    If this call throws, we have to cleanup the stacksave.
-// WIN32:   invoke i32 @"\01?TakesTwo@@YAHUA@@0@Z"
-// WIN32:   call void @llvm.stackrestore(i8* %[[base]])
+// WIN32:   call i32 @"\01?TakesTwo@@YAHUA@@0@Z"
+// WIN32:   call void @llvm.stackrestore
 // WIN32:   ret void
 //
 //    There should be one dtor call for unwinding from the second getA.
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
+// WIN32:   cleanuppad
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
 // WIN32-NOT: @"\01??1A@@QAE@XZ"
-// WIN32:   call void @llvm.stackrestore
 // WIN32: }
 
 void TakeRef(const A &a);
@@ -41,7 +41,7 @@ int HasDeactivatedCleanups() {
 // WIN32:   call i8* @llvm.stacksave()
 // WIN32:   %[[argmem:.*]] = alloca inalloca [[argmem_ty:<{ %struct.A, %struct.A }>]]
 // WIN32:   %[[arg1:.*]] = getelementptr inbounds [[argmem_ty]], [[argmem_ty]]* %[[argmem]], i32 0, i32 1
-// WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"
+// WIN32:   call x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"
 // WIN32:   invoke void @"\01?TakeRef@@YAXABUA@@@Z"
 //
 // WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %[[arg1]])
@@ -54,16 +54,15 @@ int HasDeactivatedCleanups() {
 // WIN32:   store i1 false, i1* %[[isactive]]
 //
 // WIN32:   invoke i32 @"\01?TakesTwo@@YAHUA@@0@Z"([[argmem_ty]]* inalloca %[[argmem]])
-// WIN32:   call void @llvm.stackrestore
 //        Destroy the two const ref temporaries.
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
 // WIN32:   ret i32
 //
 //        Conditionally destroy arg1.
 // WIN32:   %[[cond:.*]] = load i1, i1* %[[isactive]]
 // WIN32:   br i1 %[[cond]]
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[arg1]])
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"(%struct.A* %[[arg1]])
 // WIN32: }
 
 // Test putting the cleanups inside a conditional.
@@ -76,18 +75,18 @@ int HasConditionalCleanup(bool cond) {
 // WIN32:   store i1 false
 // WIN32:   br i1
 // WIN32:   call i8* @llvm.stacksave()
-// WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %{{.*}})
+// WIN32:   call x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %{{.*}})
 // WIN32:   store i1 true
 // WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"(%struct.A* %{{.*}})
-// WIN32:   invoke i32 @"\01?TakesTwo@@YAHUA@@0@Z"
+// WIN32:   call i32 @"\01?TakesTwo@@YAHUA@@0@Z"
+//
 // WIN32:   call void @llvm.stackrestore
 //
 // WIN32:   call i32 @"\01?CouldThrow@@YAHXZ"()
 //
 //        Only one dtor in the invoke for arg1
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
 // WIN32-NOT: invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
-// WIN32:   call void @llvm.stackrestore
 // WIN32: }
 
 // Now test both.
@@ -105,7 +104,7 @@ int HasConditionalDeactivatedCleanups(bool cond) {
 // WIN32:   store i1 false
 // WIN32:   br i1
 //        True condition.
-// WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"
+// WIN32:   call x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"
 // WIN32:   store i1 true
 // WIN32:   invoke void @"\01?TakeRef@@YAXABUA@@@Z"
 // WIN32:   invoke x86_thiscallcc %struct.A* @"\01??0A@@QAE@XZ"
@@ -120,14 +119,14 @@ int HasConditionalDeactivatedCleanups(bool cond) {
 //        False condition.
 // WIN32:   invoke i32 @"\01?CouldThrow@@YAHXZ"()
 //        Two normal cleanups for TakeRef args.
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
+// WIN32-NOT:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
 // WIN32:   ret i32
 //
 //        Somewhere in the landing pad soup, we conditionally destroy arg1.
 // WIN32:   %[[isactive:.*]] = load i1, i1* %[[arg1_cond]]
 // WIN32:   br i1 %[[isactive]]
-// WIN32:   invoke x86_thiscallcc void @"\01??1A@@QAE@XZ"
+// WIN32:   call x86_thiscallcc void @"\01??1A@@QAE@XZ"({{.*}})
 // WIN32: }
 
 namespace crash_on_partial_destroy {
@@ -150,22 +149,22 @@ C::C() { foo(); }
 // Verify that we don't bother with a vbtable lookup when adjusting the this
 // pointer to call a base destructor from a constructor while unwinding.
 // WIN32-LABEL: define {{.*}} @"\01??0C@crash_on_partial_destroy@@QAE@XZ"{{.*}} {
-// WIN32:      landingpad
+// WIN32:      cleanuppad
 //
 //        We shouldn't do any vbptr loads, just constant GEPs.
 // WIN32-NOT:  load
 // WIN32:      getelementptr i8, i8* %{{.*}}, i32 4
 // WIN32-NOT:  load
 // WIN32:      bitcast i8* %{{.*}} to %"struct.crash_on_partial_destroy::B"*
-// WIN32:      invoke x86_thiscallcc void @"\01??1B@crash_on_partial_destroy@@UAE@XZ"
+// WIN32:      call x86_thiscallcc void @"\01??1B@crash_on_partial_destroy@@UAE@XZ"
 //
 // WIN32-NOT:  load
 // WIN32:      bitcast %"struct.crash_on_partial_destroy::C"* %{{.*}} to i8*
 // WIN32-NOT:  load
-// WIN32:      getelementptr inbounds i8, i8* %{{.*}}, i64 4
+// WIN32:      getelementptr inbounds i8, i8* %{{.*}}, i32 4
 // WIN32-NOT:  load
 // WIN32:      bitcast i8* %{{.*}} to %"struct.crash_on_partial_destroy::A"*
-// WIN32:      call x86_thiscallcc void @"\01??1A@crash_on_partial_destroy@@UAE@XZ"
+// WIN32:      call x86_thiscallcc void @"\01??1A@crash_on_partial_destroy@@UAE@XZ"({{.*}})
 // WIN32: }
 }
 
@@ -187,7 +186,23 @@ void f() {
 // WIN32: call x86_thiscallcc void @"\01??1C@dont_call_terminate@@QAE@XZ"({{.*}})
 //
 // WIN32: [[lpad]]
-// WIN32-NEXT: landingpad
-// WIN32-NEXT: cleanup
+// WIN32-NEXT: cleanuppad
 // WIN32: call x86_thiscallcc void @"\01??1C@dont_call_terminate@@QAE@XZ"({{.*}})
 }
+
+namespace noexcept_false_dtor {
+struct D {
+  ~D() noexcept(false);
+};
+void f() {
+  D d;
+  CouldThrow();
+}
+}
+
+// WIN32-LABEL: define void @"\01?f@noexcept_false_dtor@@YAXXZ"()
+// WIN32: invoke i32 @"\01?CouldThrow@@YAHXZ"()
+// WIN32: call x86_thiscallcc void @"\01??1D@noexcept_false_dtor@@QAE@XZ"(%"struct.noexcept_false_dtor::D"* %{{.*}})
+// WIN32: cleanuppad
+// WIN32: call x86_thiscallcc void @"\01??1D@noexcept_false_dtor@@QAE@XZ"(%"struct.noexcept_false_dtor::D"* %{{.*}})
+// WIN32: cleanupret
diff --git a/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp b/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
index cbc1686893d7..0b8d270e1379 100644
--- a/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
+++ b/test/CodeGenCXX/microsoft-abi-eh-terminate.cpp
@@ -6,10 +6,10 @@ void never_throws() noexcept(true) {
   may_throw();
 }
 
-// CHECK-LABEL: define void @"\01?never_throws@@YAXXZ"
+// CHECK-LABEL: define void @"\01?never_throws@@YAXXZ"()
+// CHECK-SAME:          personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
 // CHECK:      invoke void @"\01?may_throw@@YAXXZ"()
-
-// CHECK:      landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*)
-// MSVC2013:   call void @"\01?terminate@@YAXXZ"()
-// MSVC2015:   call void @__std_terminate()
+// CHECK:      cleanuppad within none []
+// MSVC2013:      call void @"\01?terminate@@YAXXZ"()
+// MSVC2015:      call void @__std_terminate()
 // CHECK-NEXT: unreachable
diff --git a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
index a509d57194f2..fd22c0034203 100755
--- a/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
+++ b/test/CodeGenCXX/microsoft-abi-member-pointers.cpp
@@ -729,3 +729,53 @@ typedef void (D::*DMemPtrTy)();
 // CHECK: @"\01??_9C@pr23878@@$BA@AE" to i8*), i32 0, i32 4
 DMemPtrTy get_memptr() { return &D::f; }
 }
+
+class C {};
+
+typedef void (C::*f)();
+
+class CA : public C {
+public:
+  void OnHelp(void);
+  int OnHelp(int);
+};
+
+// CHECK-LABEL: foo_fun
+void foo_fun() {
+  // CHECK: store i8* bitcast (void (%class.CA*)* @"\01?OnHelp@CA@@QAEXXZ" to i8*), i8**
+  f func = (f)&CA::OnHelp;
+}
+namespace PR24703 {
+struct S;
+
+void f(int S::*&p) {}
+// CHECK-LABEL: define void @"\01?f@PR24703@@YAXAAPQS@1@H@Z"(
+}
+
+namespace ReferenceToMPTWithIncompleteClass {
+struct S;
+struct J;
+struct K;
+extern K *k;
+
+// CHECK-LABEL: @"\01?f@ReferenceToMPTWithIncompleteClass@@YAIAAPQS@1@H@Z"(
+// CHECK: ret i32 12
+unsigned f(int S::*&p) { return sizeof p; }
+
+// CHECK-LABEL: @"\01?g@ReferenceToMPTWithIncompleteClass@@YA_NAAPQJ@1@H0@Z"(
+bool g(int J::*&p, int J::*&q) { return p == q; }
+
+// CHECK-LABEL: @"\01?h@ReferenceToMPTWithIncompleteClass@@YAHAAPQK@1@H@Z"(
+int h(int K::*&p) { return k->*p; }
+}
+
+namespace PMFInTemplateArgument {
+template <class C, int (C::*M)(int)>
+void JSMethod();
+class A {
+  int printd(int);
+  void printd();
+};
+void A::printd() { JSMethod<A, &A::printd>(); }
+// CHECK-LABEL: @"\01??$JSMethod@VA@PMFInTemplateArgument@@$1?printd@12@AAEHH@Z@PMFInTemplateArgument@@YAXXZ"(
+}
diff --git a/test/CodeGenCXX/microsoft-abi-structors-alias.cpp b/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
index f977556aa565..08df374f2fc9 100644
--- a/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
+++ b/test/CodeGenCXX/microsoft-abi-structors-alias.cpp
@@ -22,5 +22,21 @@ B::~B() {}
 void foo() {
   B b;
 }
-// CHECK-DAG: @"\01??1B@test2@@UAE@XZ" = alias bitcast (void (%"struct.test2::A"*)* @"\01??1A@test2@@UAE@XZ" to void (%"struct.test2::B"*)*)
+// CHECK-DAG: @"\01??1B@test2@@UAE@XZ" = alias void (%"struct.test2::B"*), bitcast (void (%"struct.test2::A"*)* @"\01??1A@test2@@UAE@XZ" to void (%"struct.test2::B"*)*)
 }
+
+namespace test3 {
+struct A { virtual ~A(); };
+A::~A() {}
+}
+// CHECK-DAG: define x86_thiscallcc void @"\01??1A@test3@@UAE@XZ"(
+namespace test3 {
+template <typename T>
+struct B : A {
+  virtual ~B() { }
+};
+template struct B<int>;
+}
+// This has to be weak, and emitting weak aliases is fragile, so we don't do the
+// aliasing.
+// CHECK-DAG: define weak_odr x86_thiscallcc void @"\01??1?$B@H@test3@@UAE@XZ"(
diff --git a/test/CodeGenCXX/microsoft-abi-structors.cpp b/test/CodeGenCXX/microsoft-abi-structors.cpp
index 594dea473ef1..3fb97b9a3632 100644
--- a/test/CodeGenCXX/microsoft-abi-structors.cpp
+++ b/test/CodeGenCXX/microsoft-abi-structors.cpp
@@ -5,6 +5,7 @@
 // RUN: FileCheck --check-prefix DTORS %s < %t
 // RUN: FileCheck --check-prefix DTORS2 %s < %t
 // RUN: FileCheck --check-prefix DTORS3 %s < %t
+// RUN: FileCheck --check-prefix DTORS4 %s < %t
 //
 // RUN: %clang_cc1 -emit-llvm %s -o - -mconstructor-aliases -triple=x86_64-pc-win32 -fno-rtti | FileCheck --check-prefix DTORS-X64 %s
 
@@ -161,7 +162,7 @@ C::~C() {
 // CHECK:   load %"struct.dtor_in_second_nvbase::C"*, %"struct.dtor_in_second_nvbase::C"** %{{.*}}
 //      Now we this-adjust before calling ~B.
 // CHECK:   bitcast %"struct.dtor_in_second_nvbase::C"* %{{.*}} to i8*
-// CHECK:   getelementptr inbounds i8, i8* %{{.*}}, i64 4
+// CHECK:   getelementptr inbounds i8, i8* %{{.*}}, i32 4
 // CHECK:   bitcast i8* %{{.*}} to %"struct.dtor_in_second_nvbase::B"*
 // CHECK:   call x86_thiscallcc void @"\01??1B@dtor_in_second_nvbase@@UAE@XZ"
 // CHECK:       (%"struct.dtor_in_second_nvbase::B"* %{{.*}})
@@ -176,7 +177,7 @@ void foo() {
 //      Do an adjustment from B* to C*.
 // DTORS2:   getelementptr i8, i8* %{{.*}}, i32 -4
 // DTORS2:   bitcast i8* %{{.*}} to %"struct.dtor_in_second_nvbase::C"*
-// DTORS2:   %[[CALL:.*]] = call x86_thiscallcc i8* @"\01??_GC@dtor_in_second_nvbase@@UAEPAXI@Z"
+// DTORS2:   %[[CALL:.*]] = tail call x86_thiscallcc i8* @"\01??_GC@dtor_in_second_nvbase@@UAEPAXI@Z"
 // DTORS2:   ret i8* %[[CALL]]
 
 }
@@ -246,11 +247,11 @@ C::C() {
   //
   // CHECK: [[INIT_VBASES]]
   // CHECK-NEXT: %[[this_i8:.*]] = bitcast %"struct.constructors::C"* %{{.*}} to i8*
-  // CHECK-NEXT: %[[vbptr_off:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i64 0
+  // CHECK-NEXT: %[[vbptr_off:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i32 0
   // CHECK-NEXT: %[[vbptr:.*]] = bitcast i8* %[[vbptr_off]] to i32**
   // CHECK-NEXT: store i32* getelementptr inbounds ([2 x i32], [2 x i32]* @"\01??_8C@constructors@@7B@", i32 0, i32 0), i32** %[[vbptr]]
   // CHECK-NEXT: bitcast %"struct.constructors::C"* %{{.*}} to i8*
-  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i64 4
+  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i32 4
   // CHECK-NEXT: bitcast i8* %{{.*}} to %"struct.constructors::A"*
   // CHECK-NEXT: call x86_thiscallcc %"struct.constructors::A"* @"\01??0A@constructors@@QAE@XZ"(%"struct.constructors::A"* %{{.*}})
   // CHECK-NEXT: br label %[[SKIP_VBASES]]
@@ -281,11 +282,11 @@ D::D() {
   //
   // CHECK: [[INIT_VBASES]]
   // CHECK-NEXT: %[[this_i8:.*]] = bitcast %"struct.constructors::D"* %{{.*}} to i8*
-  // CHECK-NEXT: %[[vbptr_off:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i64 0
+  // CHECK-NEXT: %[[vbptr_off:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i32 0
   // CHECK-NEXT: %[[vbptr:.*]] = bitcast i8* %[[vbptr_off]] to i32**
   // CHECK-NEXT: store i32* getelementptr inbounds ([2 x i32], [2 x i32]* @"\01??_8D@constructors@@7B@", i32 0, i32 0), i32** %[[vbptr]]
   // CHECK-NEXT: bitcast %"struct.constructors::D"* %{{.*}} to i8*
-  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i64 4
+  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i32 4
   // CHECK-NEXT: bitcast i8* %{{.*}} to %"struct.constructors::A"*
   // CHECK-NEXT: call x86_thiscallcc %"struct.constructors::A"* @"\01??0A@constructors@@QAE@XZ"(%"struct.constructors::A"* %{{.*}})
   // CHECK-NEXT: br label %[[SKIP_VBASES]]
@@ -308,14 +309,14 @@ E::E() {
   //
   // CHECK: [[INIT_VBASES]]
   // CHECK-NEXT: %[[this_i8:.*]] = bitcast %"struct.constructors::E"* %{{.*}} to i8*
-  // CHECK-NEXT: %[[offs:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i64 0
+  // CHECK-NEXT: %[[offs:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i32 0
   // CHECK-NEXT: %[[vbptr_E:.*]] = bitcast i8* %[[offs]] to i32**
   // CHECK-NEXT: store i32* getelementptr inbounds ([3 x i32], [3 x i32]* @"\01??_8E@constructors@@7B01@@", i32 0, i32 0), i32** %[[vbptr_E]]
-  // CHECK-NEXT: %[[offs:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i64 4
+  // CHECK-NEXT: %[[offs:.*]] = getelementptr inbounds i8, i8* %[[this_i8]], i32 4
   // CHECK-NEXT: %[[vbptr_C:.*]] = bitcast i8* %[[offs]] to i32**
   // CHECK-NEXT: store i32* getelementptr inbounds ([2 x i32], [2 x i32]* @"\01??_8E@constructors@@7BC@1@@", i32 0, i32 0), i32** %[[vbptr_C]]
   // CHECK-NEXT: bitcast %"struct.constructors::E"* %{{.*}} to i8*
-  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i64 4
+  // CHECK-NEXT: getelementptr inbounds i8, i8* %{{.*}}, i32 4
   // CHECK-NEXT: bitcast i8* %{{.*}} to %"struct.constructors::A"*
   // CHECK-NEXT: call x86_thiscallcc %"struct.constructors::A"* @"\01??0A@constructors@@QAE@XZ"(%"struct.constructors::A"* %{{.*}})
   // CHECK: call x86_thiscallcc %"struct.constructors::C"* @"\01??0C@constructors@@QAE@XZ"(%"struct.constructors::C"* %{{.*}}, i32 0)
@@ -407,9 +408,7 @@ B::B(short *a) {}
 // CHECK:               (%"struct.test1::B"* returned %this, i32* %a, i32 %is_most_derived)
 // CHECK: define %"struct.test1::B"* @"\01??0B@test1@@QAA@PBDZZ"
 // CHECK:               (%"struct.test1::B"* returned %this, i32 %is_most_derived, i8* %a, ...)
-
-// FIXME: This should be x86_thiscallcc.  MSVC ignores explicit CCs on structors.
-// CHECK: define %"struct.test1::B"* @"\01??0B@test1@@QAA@PAF@Z"
+// CHECK: define x86_thiscallcc %"struct.test1::B"* @"\01??0B@test1@@QAE@PAF@Z"
 // CHECK:               (%"struct.test1::B"* returned %this, i16* %a, i32 %is_most_derived)
 
 void construct_b() {
@@ -458,3 +457,18 @@ void *getA() {
 // CHECK:               (%"struct.(anonymous namespace)::A"* %this, i32 %should_call_delete)
 // CHECK: define internal x86_thiscallcc void @"\01??1A@?A@@UAE@XZ"
 // CHECK:               (%"struct.(anonymous namespace)::A"* %this)
+
+// Check that we correctly transform __stdcall to __thiscall for ctors and
+// dtors.
+class G {
+ public:
+  __stdcall G() {};
+// DTORS4: define linkonce_odr x86_thiscallcc %class.G* @"\01??0G@@QAE@XZ"
+  __stdcall ~G() {};
+// DTORS4: define linkonce_odr x86_thiscallcc void @"\01??1G@@QAE@XZ"
+};
+
+extern void testG() {
+  G g;
+}
+
diff --git a/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp b/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
index 89a62c2cb390..29b434eaf2c5 100644
--- a/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
+++ b/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp
@@ -39,15 +39,11 @@ extern inline S &f() {
 // CHECK-NEXT:  ret %struct.S* @"\01?s@?1??f@@YAAAUS@@XZ@4U2@A"
 
 // CHECK:     [[lpad:.*]]:
-// CHECK-NEXT:  landingpad { i8*, i32 }
-// CHECK-NEXT:    cleanup
+// CHECK-NEXT: cleanuppad within none []
 // CHECK:       %[[guard:.*]] = load i32, i32* @"\01??__J?1??f@@YAAAUS@@XZ@51"
 // CHECK-NEXT:  %[[mask:.*]] = and i32 %[[guard]], -2
 // CHECK-NEXT:  store i32 %[[mask]], i32* @"\01??__J?1??f@@YAAAUS@@XZ@51"
-// CHECK-NEXT:  br label %[[eh_resume:.*]]
-//
-// CHECK:     [[eh_resume]]:
-// CHECK:       resume { i8*, i32 }
+// CHECK-NEXT:  cleanupret {{.*}} unwind to caller
   return s;
 }
 
@@ -79,11 +75,9 @@ extern inline S &g() {
 // CHECK-NEXT:  ret %struct.S* @"\01?s@?1??g@@YAAAUS@@XZ@4U2@A"
 //
 // CHECK:     [[lpad]]:
+// CHECK-NEXT: cleanuppad within none []
 // CHECK:       call void @_Init_thread_abort(i32* @"\01?$TSS0@?1??g@@YAAAUS@@XZ")
-// CHECK-NEXT:  br label %[[eh_resume:.*]]
-//
-// CHECK:     [[eh_resume]]:
-// CHECK:       resume { i8*, i32 }
+// CHECK-NEXT:  cleanupret {{.*}} unwind to caller
   return s;
 }
 
diff --git a/test/CodeGenCXX/microsoft-abi-try-throw.cpp b/test/CodeGenCXX/microsoft-abi-try-throw.cpp
index fed39761714e..6b1d2bf2a513 100644
--- a/test/CodeGenCXX/microsoft-abi-try-throw.cpp
+++ b/test/CodeGenCXX/microsoft-abi-try-throw.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 -mconstructor-aliases -fcxx-exceptions -fexceptions -fno-rtti -DTRY   | FileCheck %s -check-prefix=TRY
+// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 -mconstructor-aliases -fcxx-exceptions -fexceptions -fno-rtti -DTRY -fnew-ms-eh   | FileCheck %s -check-prefix=TRY
 // RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 -mconstructor-aliases -fcxx-exceptions -fexceptions -fno-rtti -DTHROW | FileCheck %s -check-prefix=THROW
 
 // THROW-DAG: @"\01??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"\01??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat
@@ -6,8 +6,6 @@
 // THROW-DAG: @_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x %eh.CatchableType*] [%eh.CatchableType* @"_CT??_R0H@84"] }, section ".xdata", comdat
 // THROW-DAG: @_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i8* null, i8* null, i8* bitcast (%eh.CatchableTypeArray.1* @_CTA1H to i8*) }, section ".xdata", comdat
 
-// TRY-DAG: @llvm.eh.handlertype.PAH.1 = private unnamed_addr constant %eh.CatchHandlerType { i32 1, i8* bitcast (%rtti.TypeDescriptor4* @"\01??_R0PAH@8" to i8*) }, section "llvm.metadata"
-
 void external();
 
 inline void not_emitted() {
@@ -21,12 +19,12 @@ int main() {
     external(); // TRY: invoke void @"\01?external@@YAXXZ"
   } catch (int) {
     rv = 1;
-    // TRY: call void @llvm.eh.begincatch(i8* %{{.*}}, i8* null)
-    // TRY: call void @llvm.eh.endcatch()
+    // TRY: catchpad within {{.*}} [%rtti.TypeDescriptor2* @"\01??_R0H@8", i32 0, i8* null]
+    // TRY: catchret
   }
 #endif
 #ifdef THROW
-  // THROW: store i32 42, i32* %[[mem_for_throw:.*]]
+  // THROW: store i32 42, i32* %[[mem_for_throw:.*]], align 4
   // THROW: %[[cast:.*]] = bitcast i32* %[[mem_for_throw]] to i8*
   // THROW: call void @_CxxThrowException(i8* %[[cast]], %eh.ThrowInfo* @_TI1H)
   throw int(42);
@@ -41,7 +39,7 @@ void qual_catch() {
     external();
   } catch (const int *) {
   }
-  // TRY: catch %eh.CatchHandlerType* @llvm.eh.handlertype.PAH.1
-  // TRY: call i32 @llvm.eh.typeid.for(i8* bitcast (%eh.CatchHandlerType* @llvm.eh.handlertype.PAH.1 to i8*))
+  // TRY: catchpad within {{.*}} [%rtti.TypeDescriptor4* @"\01??_R0PAH@8", i32 1, i8* null]
+  // TRY: catchret
 }
 #endif
diff --git a/test/CodeGenCXX/microsoft-abi-vftables.cpp b/test/CodeGenCXX/microsoft-abi-vftables.cpp
index 1a48411f5aff..340675b188d5 100644
--- a/test/CodeGenCXX/microsoft-abi-vftables.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vftables.cpp
@@ -9,7 +9,7 @@ struct S {
 } s;
 
 // RTTI-DAG: [[VTABLE_S:@.*]] = private unnamed_addr constant [2 x i8*] [i8* bitcast ({{.*}} @"\01??_R4S@@6B@" to i8*), i8* bitcast ({{.*}} @"\01??_GS@@UAEPAXI@Z" to i8*)], comdat($"\01??_7S@@6B@")
-// RTTI-DAG: @"\01??_7S@@6B@" = unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_S]], i32 0, i32 1)
+// RTTI-DAG: @"\01??_7S@@6B@" = unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_S]], i32 0, i32 1)
 
 // NO-RTTI-DAG: @"\01??_7S@@6B@" = linkonce_odr unnamed_addr constant [1 x i8*] [i8* bitcast ({{.*}} @"\01??_GS@@UAEPAXI@Z" to i8*)]
 
@@ -26,7 +26,7 @@ struct __declspec(dllexport) V {
 } v;
 
 // RTTI-DAG: [[VTABLE_V:@.*]] = private unnamed_addr constant [2 x i8*] [i8* bitcast ({{.*}} @"\01??_R4V@@6B@" to i8*), i8* bitcast ({{.*}} @"\01??_GV@@UAEPAXI@Z" to i8*)], comdat($"\01??_7V@@6B@")
-// RTTI-DAG: @"\01??_7V@@6B@" = dllexport unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_V]], i32 0, i32 1)
+// RTTI-DAG: @"\01??_7V@@6B@" = dllexport unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_V]], i32 0, i32 1)
 
 // NO-RTTI-DAG: @"\01??_7V@@6B@" = weak_odr dllexport unnamed_addr constant [1 x i8*] [i8* bitcast ({{.*}} @"\01??_GV@@UAEPAXI@Z" to i8*)]
 
@@ -36,7 +36,7 @@ struct W {
 } w;
 }
 // RTTI-DAG: [[VTABLE_W:@.*]] = private unnamed_addr constant [2 x i8*] [i8* bitcast ({{.*}} @"\01??_R4W@?A@@6B@" to i8*), i8* bitcast ({{.*}} @"\01??_GW@?A@@UAEPAXI@Z" to i8*)]
-// RTTI-DAG: @"\01??_7W@?A@@6B@" = internal unnamed_addr alias getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_W]], i32 0, i32 1)
+// RTTI-DAG: @"\01??_7W@?A@@6B@" = internal unnamed_addr alias i8*, getelementptr inbounds ([2 x i8*], [2 x i8*]* [[VTABLE_W]], i32 0, i32 1)
 
 // NO-RTTI-DAG: @"\01??_7W@?A@@6B@" = internal unnamed_addr constant [1 x i8*] [i8* bitcast ({{.*}} @"\01??_GW@?A@@UAEPAXI@Z" to i8*)]
 
diff --git a/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp b/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
index 204da8db1536..bb73f8773ccb 100644
--- a/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
+++ b/test/CodeGenCXX/microsoft-abi-virtual-inheritance-vtordisps.cpp
@@ -26,7 +26,7 @@ D::D() {}  // Forces vftable emission.
 // CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@D@@$4PPPPPPPM@A@AEXXZ"
 // CHECK: %[[ECX:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.D* %[[ECX]] to i8*
-// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr i8, i8* %[[ECX_i8]], i32 -4
+// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -4
 // CHECK: %[[VTORDISP_PTR:.*]] = bitcast i8* %[[VTORDISP_PTR_i8]] to i32*
 // CHECK: %[[VTORDISP:.*]] = load i32, i32* %[[VTORDISP_PTR]]
 // CHECK: %[[VTORDISP_NEG:.*]] = sub i32 0, %[[VTORDISP]]
@@ -37,7 +37,7 @@ D::D() {}  // Forces vftable emission.
 // CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@D@@$4PPPPPPPI@3AEXXZ"
 // CHECK: %[[ECX:.*]] = load %struct.D*, %struct.D** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.D* %[[ECX]] to i8*
-// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr i8, i8* %[[ECX_i8]], i32 -8
+// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -8
 // CHECK: %[[VTORDISP_PTR:.*]] = bitcast i8* %[[VTORDISP_PTR_i8]] to i32*
 // CHECK: %[[VTORDISP:.*]] = load i32, i32* %[[VTORDISP_PTR]]
 // CHECK: %[[VTORDISP_NEG:.*]] = sub i32 0, %[[VTORDISP]]
@@ -66,7 +66,7 @@ G::G() {}  // Forces vftable emission.
 // CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"\01?f@E@@$R4BA@M@PPPPPPPM@7AEXXZ"(i8*)
 // CHECK: %[[ECX:.*]] = load %struct.E*, %struct.E** %{{.*}}
 // CHECK: %[[ECX_i8:.*]] = bitcast %struct.E* %[[ECX]] to i8*
-// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr i8, i8* %[[ECX_i8]], i32 -4
+// CHECK: %[[VTORDISP_PTR_i8:.*]] = getelementptr inbounds i8, i8* %[[ECX_i8]], i32 -4
 // CHECK: %[[VTORDISP_PTR:.*]] = bitcast i8* %[[VTORDISP_PTR_i8]] to i32*
 // CHECK: %[[VTORDISP:.*]] = load i32, i32* %[[VTORDISP_PTR]]
 // CHECK: %[[VTORDISP_NEG:.*]] = sub i32 0, %[[VTORDISP]]
diff --git a/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
index b868d1f0b516..8897a384430c 100644
--- a/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-virtual-inheritance.cpp
@@ -91,7 +91,7 @@ B::~B() {
   // CHECK2: %[[B:.*]] = bitcast i8* %[[B_i8]] to %struct.B*
   // CHECK2: call x86_thiscallcc void @"\01??1B@@UAE@XZ"(%struct.B* %[[B]])
   // CHECK2: %[[THIS_i8:.*]] = bitcast %struct.B* %[[THIS]] to i8*
-  // CHECK2: %[[VBASE_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_i8]], i64 8
+  // CHECK2: %[[VBASE_i8:.*]] = getelementptr inbounds i8, i8* %[[THIS_i8]], i32 8
   // CHECK2: %[[VBASE:.*]] = bitcast i8* %[[VBASE_i8]] to %struct.VBase*
   // CHECK2: call x86_thiscallcc void @"\01??1VBase@@UAE@XZ"(%struct.VBase* %[[VBASE]])
   // CHECK2: ret
@@ -290,7 +290,7 @@ D::~D() {
   // CHECK: store %"struct.diamond::D"* %[[THIS]], %"struct.diamond::D"** %[[THIS_VAL:.*]], align 4
   // CHECK: %[[THIS:.*]] = load %"struct.diamond::D"*, %"struct.diamond::D"** %[[THIS_VAL]]
   // CHECK: %[[D_i8:.*]] = bitcast %"struct.diamond::D"* %[[THIS]] to i8*
-  // CHECK: %[[C_i8:.*]] = getelementptr inbounds i8, i8* %[[D_i8]], i64 4
+  // CHECK: %[[C_i8:.*]] = getelementptr inbounds i8, i8* %[[D_i8]], i32 4
   // CHECK: %[[C:.*]] = bitcast i8* %[[C_i8]] to %"struct.diamond::C"*
   // CHECK: %[[C_i8:.*]] = bitcast %"struct.diamond::C"* %[[C]] to i8*
   // CHECK: %[[ARG_i8:.*]] = getelementptr i8, i8* %{{.*}}, i32 16
@@ -455,3 +455,29 @@ void destroy(E *obj) {
 }
 
 }
+
+namespace test5 {
+// PR25370: Don't zero-initialize vbptrs in virtual bases.
+struct A {
+  virtual void f();
+};
+
+struct B : virtual A {
+  int Field;
+};
+
+struct C : B {
+  C();
+};
+
+C::C() : B() {}
+// CHECK-LABEL: define x86_thiscallcc %"struct.test5::C"* @"\01??0C@test5@@QAE@XZ"(
+// CHECK:   %[[THIS:.*]] = load %"struct.test5::C"*, %"struct.test5::C"**
+// CHECK:   br i1 %{{.*}}, label %[[INIT_VBASES:.*]], label %[[SKIP_VBASES:.*]]
+
+// CHECK: %[[SKIP_VBASES]]
+// CHECK:   %[[B:.*]] = bitcast %"struct.test5::C"* %[[THIS]] to %"struct.test5::B"*
+// CHECK:   %[[B_i8:.*]] = bitcast %"struct.test5::B"* %[[B]] to i8*
+// CHECK:   %[[FIELD:.*]] = getelementptr inbounds i8, i8* %[[B_i8]], i32 4
+// CHECK:   call void @llvm.memset.p0i8.i32(i8* %[[FIELD]], i8 0, i32 4, i32 4, i1 false)
+}
diff --git a/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp b/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp
index baed35145f98..aa39d6de615a 100644
--- a/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vtables-single-inheritance.cpp
@@ -299,3 +299,18 @@ struct S {
 };
 
 S::S() {}
+
+struct T {
+  struct U {};
+};
+struct V : T {
+  // CHECK-LABEL: VFTable for 'V' (2 entries).
+  // CHECK-NEXT:   0 | void V::U()
+  // CHECK-NEXT:   1 | void V::f()
+  using T::U;
+  virtual void f();
+  virtual void U();
+  V();
+};
+
+V::V() {}
diff --git a/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp b/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp
index 1f6d4202154a..e26d333bad12 100644
--- a/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp
+++ b/test/CodeGenCXX/microsoft-abi-vtables-virtual-inheritance-vtordisps.cpp
@@ -33,7 +33,7 @@ void use_somewhere_else(void*);
 
 namespace simple {
 // In case of a single-layer virtual inheritance, the "this" adjustment for a
-// virtual method is done staically:
+// virtual method is done statically:
 //   struct A {
 //     virtual void f();  // Expects "(A*)this" in ECX
 //   };
@@ -222,7 +222,7 @@ G::G() {}
 
 namespace extended {
 // If a virtual function requires vtordisp adjustment and the final overrider
-// is defined in another vitual base of the most derived class,
+// is defined in another virtual base of the most derived class,
 // we need to know two vbase offsets.
 // In this case, we should use the extended form of vtordisp thunks, called
 // vtordispex thunks.
diff --git a/test/CodeGenCXX/microsoft-compatibility.cpp b/test/CodeGenCXX/microsoft-compatibility.cpp
index 297184a1df2a..36760243d49b 100644
--- a/test/CodeGenCXX/microsoft-compatibility.cpp
+++ b/test/CodeGenCXX/microsoft-compatibility.cpp
@@ -1,5 +1,15 @@
 // RUN: %clang_cc1 %s -triple i686-pc-win32 -std=c++11 -fms-compatibility -emit-llvm -o - | FileCheck %s
 
+template <typename>
+struct S {
+  static const int x[];
+};
+
+template <>
+const int S<char>::x[] = {1};
+
+// CHECK-LABEL: @"\01?x@?$S@D@@2QBHB" = weak_odr constant [1 x i32] [i32 1], comdat
+
 template<class T>
 void destroy(T *p) {
   p->~T();
diff --git a/test/CodeGenCXX/ms-inline-asm-fields.cpp b/test/CodeGenCXX/ms-inline-asm-fields.cpp
new file mode 100644
index 000000000000..a78d511485aa
--- /dev/null
+++ b/test/CodeGenCXX/ms-inline-asm-fields.cpp
@@ -0,0 +1,31 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -fasm-blocks -emit-llvm -o - | FileCheck %s
+
+struct A {
+  int a1;
+  int a2;
+  struct B {
+    int b1;
+    int b2;
+  } a3;
+};
+
+namespace asdf {
+A a_global;
+}
+
+extern "C" int test_param_field(A p) {
+// CHECK: define i32 @test_param_field(%struct.A* byval align 4 %p)
+// CHECK: getelementptr inbounds %struct.A, %struct.A* %p, i32 0, i32 0
+// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $1"
+// CHECK: ret i32
+  __asm mov eax, p.a1
+}
+
+extern "C" int test_namespace_global() {
+// CHECK: define i32 @test_namespace_global()
+// CHECK: call i32 asm sideeffect inteldialect "mov eax, dword ptr $1", "{{.*}}"(i32* getelementptr inbounds (%struct.A, %struct.A* @_ZN4asdf8a_globalE, i32 0, i32 2, i32 1))
+// CHECK: ret i32
+  __asm mov eax, asdf::a_global.a3.b2
+}
+
diff --git a/test/CodeGenCXX/ms-integer-static-data-members.cpp b/test/CodeGenCXX/ms-integer-static-data-members.cpp
index 5e5b81d4a591..b2bfc92d264c 100644
--- a/test/CodeGenCXX/ms-integer-static-data-members.cpp
+++ b/test/CodeGenCXX/ms-integer-static-data-members.cpp
@@ -42,10 +42,10 @@ const int S::OutOfLine_Def_Ref = 5;
 // CHECK-DAG: @"\01?Inline_NotDef_Ref@S@@2HB" = linkonce_odr constant i32 5, comdat, align 4
 
 // Inline initialization, real definiton, not referenced.
-// CHECK-DAG: @"\01?Inline_Def_NotRef@S@@2HB" = constant i32 5, align 4
+// CHECK-NOT: @"\01?Inline_Def_NotRef@S@@2HB" = constant i32 5, align 4
 
 // Inline initialization, real definiton, referenced.
-// CHECK-DAG: @"\01?Inline_Def_Ref@S@@2HB" = constant i32 5, comdat, align 4
+// CHECK-DAG: @"\01?Inline_Def_Ref@S@@2HB" = linkonce_odr constant i32 5, comdat, align 4
 
 // Out-of-line initialization.
 // CHECK-DAG: @"\01?OutOfLine_Def_NotRef@S@@2HB" = constant i32 5, align 4
diff --git a/test/CodeGenCXX/ms-property.cpp b/test/CodeGenCXX/ms-property.cpp
new file mode 100644
index 000000000000..49e957b58e33
--- /dev/null
+++ b/test/CodeGenCXX/ms-property.cpp
@@ -0,0 +1,113 @@
+// RUN: %clang_cc1 -emit-llvm -triple=x86_64-pc-win32 -fms-compatibility %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fms-compatibility -emit-pch -o %t %s
+// RUN: %clang_cc1 -emit-llvm -triple=x86_64-pc-win32 -fms-compatibility -include-pch %t -verify %s -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+class Test1 {
+private:
+  int x_;
+  double y_;
+
+public:
+  Test1(int x) : x_(x) {}
+  __declspec(property(get = get_x)) int X;
+  int get_x() const { return x_; }
+  static Test1 *GetTest1() { return new Test1(10); }
+};
+
+class S {
+public:
+  __declspec(property(get=GetX,put=PutX)) int x[];
+  int GetX(int i, int j) { return i+j; }
+  void PutX(int i, int j, int k) { j = i = k; }
+};
+
+template <typename T>
+class St {
+public:
+  __declspec(property(get=GetX,put=PutX)) T x[];
+  T GetX(T i, T j) { return i+j; }
+  T GetX() { return 0; }
+  T PutX(T i, T j, T k) { return j = i = k; }
+  __declspec(property(get=GetY,put=PutY)) T y[];
+  char GetY(char i,  Test1 j) { return i+j.get_x(); }
+  void PutY(char i, int j, double k) { j = i = k; }
+};
+
+template <typename T>
+void foo(T i, T j) {
+  St<T> bar;
+  Test1 t(i);
+  bar.x[i][j] = bar.x[i][j];
+  bar.y[t.X][j] = bar.x[i][j];
+  bar.x[i][j] = bar.y[bar.x[i][j]][t];
+}
+
+int idx() { return 7; }
+
+// CHECK-LABEL: main
+int main(int argc, char **argv) {
+  Test1 t(argc);
+  S *p1 = 0;
+  St<float> *p2 = 0;
+  // CHECK: call i32 @"\01?GetX@S@@QEAAHHH@Z"(%class.S* %{{.+}}, i32 223, i32 11)
+  int j = p1->x[223][11];
+  // CHECK: [[J:%.+]] = load i32, i32* %
+  // CHECK-NEXT: call void @"\01?PutX@S@@QEAAXHHH@Z"(%class.S* %{{.+}}, i32 23, i32 1, i32 [[J]])
+  p1->x[23][1] = j;
+  // CHECK: call float @"\01?GetX@?$St@M@@QEAAMMM@Z"(%class.St* %{{.+}}, float 2.230000e+02, float 1.100000e+01)
+  float j1 = p2->x[223][11];
+  // CHECK: [[J1:%.+]] = load float, float* %
+  // CHECK-NEXT: [[CALL:%.+]] = call float @"\01?PutX@?$St@M@@QEAAMMMM@Z"(%class.St* %{{.+}}, float 2.300000e+01, float 1.000000e+00, float [[J1]])
+  // CHECK-NEXT: [[CONV:%.+]] = fptosi float [[CALL]] to i32
+  // CHECK-NEXT: store i32 [[CONV]], i32*
+  argc = p2->x[23][1] = j1;
+  // CHECK: [[IDX:%.+]] = call i32 @"\01?idx@@YAHXZ"()
+  // CHECK-NEXT: [[CONV:%.+]] = sitofp i32 [[IDX]] to float
+  // CHECK-NEXT: [[GET:%.+]] = call float @"\01?GetX@?$St@M@@QEAAMMM@Z"(%class.St* %{{.+}}, float [[CONV]], float 1.000000e+00)
+  // CHECK-NEXT: [[INC:%.+]] = fadd float [[GET]], 1.000000e+00
+  // CHECK-NEXT: [[CONV:%.+]] = sitofp i32 [[IDX]] to float
+  // CHECK-NEXT: call float @"\01?PutX@?$St@M@@QEAAMMMM@Z"(%class.St* %{{.+}}, float [[CONV]], float 1.000000e+00, float [[INC]])
+  ++p2->x[idx()][1];
+  // CHECK: call void @"\01??$foo@H@@YAXHH@Z"(i32 %{{.+}}, i32 %{{.+}})
+  foo(argc, (int)argv[0][0]);
+  // CHECK: [[P2:%.+]] = load %class.St*, %class.St** %
+  // CHECK: [[T_X:%.+]] = call i32 @"\01?get_x@Test1@@QEBAHXZ"(%class.Test1* %{{.+}})
+  // CHECK: [[P1:%.+]] = load %class.S*, %class.S** %
+  // CHECK: [[P1_X_22_33:%.+]] = call i32 @"\01?GetX@S@@QEAAHHH@Z"(%class.S* [[P1]], i32 22, i32 33)
+  // CHECK: [[CAST:%.+]] = sitofp i32 [[P1_X_22_33]] to double
+  // CHECK: [[ARGC:%.+]] = load i32, i32* %
+  // CHECK: [[CAST2:%.+]] = trunc i32 [[T_X]] to i8
+  // CHECK: call void @"\01?PutY@?$St@M@@QEAAXDHN@Z"(%class.St* [[P2]], i8 [[CAST2]], i32 [[ARGC]], double [[CAST]])
+  p2->y[t.X][argc] =  p1->x[22][33];
+  // CHECK: [[P2_1:%.+]] = load %class.St*, %class.St**
+  // CHECK: [[P2_2:%.+]] = load %class.St*, %class.St**
+  // CHECK: [[P1:%.+]] = load %class.S*, %class.S**
+  // CHECK: [[ARGC:%.+]] = load i32, i32* %
+  // CHECK: [[P1_X_ARGC_0:%.+]] = call i32 @"\01?GetX@S@@QEAAHHH@Z"(%class.S* [[P1]], i32 [[ARGC]], i32 0)
+  // CHECK: [[CAST:%.+]] = trunc i32 [[P1_X_ARGC_0]] to i8
+  // CHECK: [[P2_Y_p1_X_ARGC_0_T:%.+]] = call i8 @"\01?GetY@?$St@M@@QEAADDVTest1@@@Z"(%class.St* [[P2_2]], i8 [[CAST]], %class.Test1* %{{.+}})
+  // CHECK: [[CAST:%.+]] = sitofp i8 [[P2_Y_p1_X_ARGC_0_T]] to float
+  // CHECK: [[J:%.+]] = load i32, i32* %
+  // CHECK: [[CAST1:%.+]] = sitofp i32 [[J]] to float
+  // CHECK: [[J:%.+]] = load i32, i32* %
+  // CHECK: [[CAST2:%.+]] = sitofp i32 [[J]] to float
+  // CHECK: call float @"\01?PutX@?$St@M@@QEAAMMMM@Z"(%class.St* [[P2_1]], float [[CAST2]], float [[CAST1]], float [[CAST]])
+  p2->x[j][j] = p2->y[p1->x[argc][0]][t];
+  // CHECK: [[CALL:%.+]] = call %class.Test1* @"\01?GetTest1@Test1@@SAPEAV1@XZ"()
+  // CHECK-NEXT: call i32 @"\01?get_x@Test1@@QEBAHXZ"(%class.Test1* [[CALL]])
+  return Test1::GetTest1()->X;
+}
+
+// CHECK: define linkonce_odr void @"\01??$foo@H@@YAXHH@Z"(i32 %{{.+}}, i32 %{{.+}})
+// CHECK: call i32 @"\01?GetX@?$St@H@@QEAAHHH@Z"(%class.St{{.+}}* [[BAR:%.+]], i32 %{{.+}} i32 %{{.+}})
+// CHECK: call i32 @"\01?PutX@?$St@H@@QEAAHHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}}, i32 %{{.+}}, i32 %{{.+}})
+// CHECK: call i32 @"\01?GetX@?$St@H@@QEAAHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}} i32 %{{.+}})
+// CHECK: call void @"\01?PutY@?$St@H@@QEAAXDHN@Z"(%class.St{{.+}}* [[BAR]], i8 %{{.+}}, i32 %{{.+}}, double %{{.+}}
+// CHECK: call i32 @"\01?GetX@?$St@H@@QEAAHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}} i32 %{{.+}})
+// CHECK: call i8 @"\01?GetY@?$St@H@@QEAADDVTest1@@@Z"(%class.St{{.+}}* [[BAR]], i8 %{{.+}}, %class.Test1* %{{.+}})
+// CHECK: call i32 @"\01?PutX@?$St@H@@QEAAHHHH@Z"(%class.St{{.+}}* [[BAR]], i32 %{{.+}}, i32 %{{.+}}, i32 %{{.+}})
+#endif //HEADER
diff --git a/test/CodeGenCXX/new-alias.cpp b/test/CodeGenCXX/new-alias.cpp
index 7ddc1f988086..4afd942e3cdb 100644
--- a/test/CodeGenCXX/new-alias.cpp
+++ b/test/CodeGenCXX/new-alias.cpp
@@ -5,7 +5,7 @@ using size_t = decltype(sizeof(0));
 extern "C" char *something(long long x) {
 }
 
-// CHECK: @_Znwm = alias i8* (i64)* @something
+// CHECK: @_Znwm = alias i8* (i64), i8* (i64)* @something
 void *operator new(size_t) __attribute__((alias("something")));
 
 // PR16715: don't assert here.
diff --git a/test/CodeGenCXX/new.cpp b/test/CodeGenCXX/new.cpp
index c8e0acba7b05..6d6f70138616 100644
--- a/test/CodeGenCXX/new.cpp
+++ b/test/CodeGenCXX/new.cpp
@@ -371,12 +371,9 @@ namespace builtins {
 // CHECK-DAG: attributes [[ATTR_NOBUILTIN]] = {{[{].*}} nobuiltin {{.*[}]}}
 // CHECK-DAG: attributes [[ATTR_NOBUILTIN_NOUNWIND]] = {{[{].*}} nobuiltin nounwind {{.*[}]}}
 
-// CHECK: attributes [[ATTR_NOUNWIND]] =
-// CHECK-NOT: builtin
-// CHECK-NOT: attributes
-// CHECK: nounwind
-// CHECK-NOT: builtin
-// CHECK: attributes
-
 // CHECK-DAG: attributes [[ATTR_BUILTIN_NEW]] = {{[{].*}} builtin {{.*[}]}}
 // CHECK-DAG: attributes [[ATTR_BUILTIN_DELETE]] = {{[{].*}} builtin {{.*[}]}}
+
+// The ([^b}|...) monstrosity is matching a character that's not the start of 'builtin'.
+// Add more letters if this matches some other attribute.
+// CHECK-DAG: attributes [[ATTR_NOUNWIND]] = {{([^b]|b[^u]|bu[^i]|bui[^l])*}} nounwind {{([^b]|b[^u]|bu[^i]|bui[^l])*$}}
diff --git a/test/CodeGenCXX/observe-noexcept.cpp b/test/CodeGenCXX/observe-noexcept.cpp
new file mode 100644
index 000000000000..76046a6168a9
--- /dev/null
+++ b/test/CodeGenCXX/observe-noexcept.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple  powerpc64le-unknown-unknown -std=c++11 -fopenmp -fexceptions -fcxx-exceptions -O0 -emit-llvm %s -o - | FileCheck %s
+
+// Check that regions that install a terminate scope in the exception stack can
+// correctly generate complex arithmetic.
+
+// CHECK-LABEL: ffcomplex
+void ffcomplex (int a) {
+  double _Complex dc = (double)a;
+
+  // CHECK: call { double, double } @__muldc3(double %{{.+}}, double %{{.+}}, double %{{.+}}, double %{{.+}})
+  dc *= dc;
+  // CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME1:@.*]] to void (i32*, i32*, ...)*), { double, double }* %{{.+}})
+  #pragma omp parallel
+  {
+    dc *= dc;
+  }
+  // CHECK: ret void
+}
+
+// CHECK: define internal {{.+}}[[REGNAME1]](
+// CHECK-NOT: invoke
+// CHECK: call { double, double } @__muldc3(double %{{.+}}, double %{{.+}}, double %{{.+}}, double %{{.+}})
+// CHECK-NOT: invoke
+// CHECK: ret void
+
+// Check if we are observing the function pointer attribute regardless what is
+// in the exception specification of the callees.
+void fnoexcp(void) noexcept;
+
+// CHECK-LABEL: foo
+void foo(int a, int b) {
+
+  void (*fptr)(void) noexcept = fnoexcp;
+
+  // CHECK: call {{.+}} @__kmpc_fork_call({{.+}} [[REGNAME2:@.*]] to void (i32*, i32*, ...)*), void ()** %{{.+}})
+  #pragma omp parallel
+  {
+    fptr();
+  }
+  // CHECK: ret void
+}
+
+// CHECK: define internal {{.+}}[[REGNAME2]](
+// CHECK-NOT: invoke
+// CHECK: call void %{{[0-9]+}}()
+// CHECK-NOT: invoke
+// CHECK: ret void
diff --git a/test/CodeGenCXX/partial-destruction.cpp b/test/CodeGenCXX/partial-destruction.cpp
index d135149592a1..4097fbfaa19c 100644
--- a/test/CodeGenCXX/partial-destruction.cpp
+++ b/test/CodeGenCXX/partial-destruction.cpp
@@ -173,3 +173,34 @@ namespace test3 {
     // invoke void @_ZN5test31BD1Ev(
   }
 }
+
+namespace test4 {
+  struct A { A(unsigned i); ~A(); };
+  void test() {
+    A v[2][3] = { { A(0), A(1), A(2) }, { A(3), A(4), A(5) } };
+  }
+}
+// CHECK-LABEL: define void @_ZN5test44testEv()
+// CHECK:       [[ARRAY:%.*]] = alloca [2 x [3 x [[A:%.*]]]], align
+// CHECK:       [[A0:%.*]] = getelementptr inbounds [2 x [3 x [[A]]]], [2 x [3 x [[A]]]]* [[ARRAY]], i64 0, i64 0
+// CHECK-NEXT:  store [3 x [[A]]]* [[A0]],
+// CHECK-NEXT:  [[A00:%.*]] = getelementptr inbounds [3 x [[A]]], [3 x [[A]]]* [[A0]], i64 0, i64 0
+// CHECK-NEXT:  store [[A]]* [[A00]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A00]], i32 0)
+// CHECK:       [[A01:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A00]], i64 1
+// CHECK-NEXT:  store [[A]]* [[A01]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A01]], i32 1)
+// CHECK:       [[A02:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A01]], i64 1
+// CHECK-NEXT:  store [[A]]* [[A02]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A02]], i32 2)
+// CHECK:       [[A1:%.*]] = getelementptr inbounds [3 x [[A]]], [3 x [[A]]]* [[A0]], i64 1
+// CHECK-NEXT:  store [3 x [[A]]]* [[A1]],
+// CHECK-NEXT:  [[A10:%.*]] = getelementptr inbounds [3 x [[A]]], [3 x [[A]]]* [[A1]], i64 0, i64 0
+// CHECK-NEXT:  store [[A]]* [[A10]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A10]], i32 3)
+// CHECK:       [[A11:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A10]], i64 1
+// CHECK-NEXT:  store [[A]]* [[A11]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A11]], i32 4)
+// CHECK:       [[A12:%.*]] = getelementptr inbounds [[A]], [[A]]* [[A11]], i64 1
+// CHECK-NEXT:  store [[A]]* [[A12]],
+// CHECK-NEXT:  invoke void @_ZN5test41AC1Ej([[A]]* [[A12]], i32 5)
diff --git a/test/CodeGenCXX/partial-init.cpp b/test/CodeGenCXX/partial-init.cpp
new file mode 100644
index 000000000000..cb94660915ca
--- /dev/null
+++ b/test/CodeGenCXX/partial-init.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -std=c++11 -fcxx-exceptions -fexceptions -S -emit-llvm -o - %s | FileCheck %s
+
+namespace std {
+  struct string {
+    const char *p;
+    string(const char *s);
+    ~string();
+  };
+}
+
+struct Bar {
+  int a;
+};
+
+struct Foo {
+  std::string c;
+  Bar d[32];
+};
+
+static Foo table[] = {
+  { "blerg" },
+};
+
+// CHECK: define internal void @__cxx_global_var_init
+// CHECK: invoke {{.*}} @_ZNSt6stringC1EPKc(
+// CHECK-NOT: unreachable
+// CHECK: br label
diff --git a/test/CodeGenCXX/pass-object-size.cpp b/test/CodeGenCXX/pass-object-size.cpp
new file mode 100644
index 000000000000..254669b97627
--- /dev/null
+++ b/test/CodeGenCXX/pass-object-size.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm -O0 %s -o - 2>&1 -std=c++11 | FileCheck %s
+
+int gi;
+
+namespace lambdas {
+// CHECK-LABEL: define void @_ZN7lambdas7LambdasEPc
+void Lambdas(char *ptr) {
+  auto L1 = [](void *const p __attribute__((pass_object_size(0)))) {
+    return __builtin_object_size(p, 0);
+  };
+
+  int i = 0;
+  auto L2 = [&i](void *const p __attribute__((pass_object_size(0)))) {
+    return __builtin_object_size(p, 0) + i;
+  };
+
+  // CHECK: @llvm.objectsize
+  gi = L1(ptr);
+  // CHECK: @llvm.objectsize
+  gi = L2(ptr);
+}
+
+// CHECK-DAG: define internal i64 @"_ZZN7lambdas7LambdasEPcENK3$_0clEPvU17pass_object_size0"
+// CHECK-NOT: call i64 @llvm.objectsize
+// CHECK-DAG: define internal i64 @"_ZZN7lambdas7LambdasEPcENK3$_1clEPvU17pass_object_size0"
+// CHECK-NOT: call i64 @llvm.objectsize
+}
diff --git a/test/CodeGenCXX/pointers-to-data-members.cpp b/test/CodeGenCXX/pointers-to-data-members.cpp
index 94337d98a195..fd1b9b8215f4 100644
--- a/test/CodeGenCXX/pointers-to-data-members.cpp
+++ b/test/CodeGenCXX/pointers-to-data-members.cpp
@@ -1,8 +1,6 @@
 // RUN: %clang_cc1 %s -emit-llvm -o %t.ll -triple=x86_64-apple-darwin10
 // RUN: FileCheck %s < %t.ll
 // RUN: FileCheck -check-prefix=CHECK-GLOBAL %s < %t.ll
-// RUN: %clang_cc1 %s -emit-llvm -o %t-opt.ll -triple=x86_64-apple-darwin10 -O3
-// RUN: FileCheck --check-prefix=CHECK-O3 %s < %t-opt.ll
 
 struct A { int a; int b; };
 struct B { int b; };
@@ -131,40 +129,6 @@ A::A() : a() {}
 
 }
 
-namespace PR7139 {
-
-struct pair {
-  int first;
-  int second;
-};
-
-typedef int pair::*ptr_to_member_type;
-
-struct ptr_to_member_struct { 
-  ptr_to_member_type data;
-  int i;
-};
-
-struct A {
-  ptr_to_member_struct a;
-
-  A() : a() {}
-};
-
-// CHECK-O3: define zeroext i1 @_ZN6PR71395checkEv() [[NUW:#[0-9]+]]
-bool check() {
-  // CHECK-O3: ret i1 true
-  return A().a.data == 0;
-}
-
-// CHECK-O3: define zeroext i1 @_ZN6PR71396check2Ev() [[NUW]]
-bool check2() {
-  // CHECK-O3: ret i1 true
-  return ptr_to_member_type() == 0;
-}
-
-}
-
 namespace VirtualBases {
 
 struct A {
@@ -294,5 +258,3 @@ union U {
 U u;
 // CHECK-GLOBAL: @_ZN11IndirectPDM1uE = global %"union.IndirectPDM::U" { %union.anon { i64 -1 } }, align 8
 }
-
-// CHECK-O3: attributes [[NUW]] = { nounwind readnone{{.*}} }
diff --git a/test/CodeGenCXX/pragma-loop-safety.cpp b/test/CodeGenCXX/pragma-loop-safety.cpp
index d12e41274918..393f0a3e43fe 100644
--- a/test/CodeGenCXX/pragma-loop-safety.cpp
+++ b/test/CodeGenCXX/pragma-loop-safety.cpp
@@ -2,48 +2,53 @@
 
 // Verify assume_safety vectorization is recognized.
 void vectorize_test(int *List, int Length) {
-// CHECK: define {{.*}} @_Z14vectorize_testPii
+// CHECK: define {{.*}} @_Z14vectorize_test
 // CHECK: [[LOAD1_IV:.+]] = load i32, i32* [[IV1:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID:[0-9]+]]
 // CHECK-NEXT: [[LOAD1_LEN:.+]] = load i32, i32* [[LEN1:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
 // CHECK-NEXT: [[CMP1:.+]] = icmp slt i32[[LOAD1_IV]],[[LOAD1_LEN]]
-// CHECK-NEXT: br i1[[CMP1]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]], !llvm.loop ![[LOOP1_HINTS:[0-9]+]]
-#pragma clang loop vectorize(assume_safety)
+// CHECK-NEXT: br i1[[CMP1]], label %[[LOOP1_BODY:[^,]+]], label %[[LOOP1_END:[^,]+]]
+#pragma clang loop vectorize(assume_safety) interleave(disable) unroll(disable)
   for (int i = 0; i < Length; i++) {
-    // CHECK: [[LOOP1_BODY]]
-    // CHECK-NEXT: [[RHIV1:.+]] = load i32, i32* [[IV1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
+    // CHECK: [[RHIV1:.+]] = load i32, i32* [[IV1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
     // CHECK-NEXT: [[CALC1:.+]] = mul nsw i32[[RHIV1]], 2
     // CHECK-NEXT: [[SIV1:.+]] = load i32, i32* [[IV1]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
     // CHECK-NEXT: [[INDEX1:.+]] = sext i32[[SIV1]] to i64
     // CHECK-NEXT: [[ARRAY1:.+]] = load i32*, i32** [[LIST1:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
     // CHECK-NEXT: [[PTR1:.+]] = getelementptr inbounds i32, i32*[[ARRAY1]], i64[[INDEX1]]
     // CHECK-NEXT: store i32[[CALC1]], i32*[[PTR1]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP1_ID]]
+    // CHECK-NEXT: br label [[LOOP1_INC:[^,]+]]
     List[i] = i * 2;
+
+    // CHECK: br label [[LOOP1_COND:[^,]+]], !llvm.loop ![[LOOP1_HINTS:[0-9]+]]
   }
-  // CHECK: [[LOOP1_END]]
 }
 
 // Verify assume_safety interleaving is recognized.
 void interleave_test(int *List, int Length) {
-// CHECK: define {{.*}} @_Z15interleave_testPii
+// CHECK: define {{.*}} @_Z15interleave_test
 // CHECK: [[LOAD2_IV:.+]] = load i32, i32* [[IV2:[^,]+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID:[0-9]+]]
 // CHECK-NEXT: [[LOAD2_LEN:.+]] = load i32, i32* [[LEN2:.+]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
 // CHECK-NEXT: [[CMP2:.+]] = icmp slt i32[[LOAD2_IV]],[[LOAD2_LEN]]
-// CHECK-NEXT: br i1[[CMP2]], label %[[LOOP2_BODY:[^,]+]], label %[[LOOP2_END:[^,]+]], !llvm.loop ![[LOOP2_HINTS:[0-9]+]]
-#pragma clang loop interleave(assume_safety)
+// CHECK-NEXT: br i1[[CMP2]], label %[[LOOP2_BODY:[^,]+]], label %[[LOOP2_END:[^,]+]]
+#pragma clang loop interleave(assume_safety) vectorize(disable) unroll(disable)
   for (int i = 0; i < Length; i++) {
-    // CHECK: [[LOOP2_BODY]]
-    // CHECK-NEXT: [[RHIV2:.+]] = load i32, i32* [[IV2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
+    // CHECK: [[RHIV2:.+]] = load i32, i32* [[IV2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
     // CHECK-NEXT: [[CALC2:.+]] = mul nsw i32[[RHIV2]], 2
     // CHECK-NEXT: [[SIV2:.+]] = load i32, i32* [[IV2]]{{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
     // CHECK-NEXT: [[INDEX2:.+]] = sext i32[[SIV2]] to i64
     // CHECK-NEXT: [[ARRAY2:.+]] = load i32*, i32** [[LIST2:.*]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
     // CHECK-NEXT: [[PTR2:.+]] = getelementptr inbounds i32, i32*[[ARRAY2]], i64[[INDEX2]]
     // CHECK-NEXT: store i32[[CALC2]], i32*[[PTR2]], {{.*}}!llvm.mem.parallel_loop_access ![[LOOP2_ID]]
+    // CHECK-NEXT: br label [[LOOP2_INC:[^,]+]]
     List[i] = i * 2;
+
+    // CHECK: br label [[LOOP2_COND:[^,]+]], !llvm.loop ![[LOOP2_HINTS:[0-9]+]]
   }
-  // CHECK: [[LOOP2_END]]
 }
 
-// CHECK: ![[LOOP1_HINTS]] = distinct !{![[LOOP1_HINTS]], ![[INTENABLE_1:.*]]}
+// CHECK: ![[LOOP1_HINTS]] = distinct !{![[LOOP1_HINTS]], ![[INTERLEAVE_1:[0-9]+]], ![[INTENABLE_1:[0-9]+]], ![[UNROLL_DISABLE:[0-9]+]]}
+// CHECK: ![[INTERLEAVE_1]] = !{!"llvm.loop.interleave.count", i32 1}
 // CHCCK: ![[INTENABLE_1]] = !{!"llvm.loop.vectorize.enable", i1 true}
-// CHECK: ![[LOOP2_HINTS]] = distinct !{![[LOOP2_HINTS]], ![[INTENABLE_1:.*]]}
+// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: ![[LOOP2_HINTS]] = distinct !{![[LOOP2_HINTS]], ![[WIDTH_1:[0-9]+]], ![[INTENABLE_1]], ![[UNROLL_DISABLE]]}
+// CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
diff --git a/test/CodeGenCXX/pragma-loop.cpp b/test/CodeGenCXX/pragma-loop.cpp
index dd40c1d72600..b85e0b49b9df 100644
--- a/test/CodeGenCXX/pragma-loop.cpp
+++ b/test/CodeGenCXX/pragma-loop.cpp
@@ -10,7 +10,7 @@ void while_test(int *List, int Length) {
 #pragma clang loop vectorize_width(4)
 #pragma clang loop unroll(full)
   while (i < Length) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
     List[i] = i * 2;
     i++;
   }
@@ -36,7 +36,7 @@ void for_test(int *List, int Length) {
 #pragma clang loop interleave_count(static_cast<int>(Tuner::Interleave))
 #pragma clang loop unroll_count(static_cast<int>(Tuner::Unroll))
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
     List[i] = i * 2;
   }
 }
@@ -48,7 +48,7 @@ void for_range_test() {
 
 #pragma clang loop vectorize_width(2) interleave_count(2)
   for (int i : List) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
     List[i] = i;
   }
 }
@@ -57,7 +57,7 @@ void for_range_test() {
 void disable_test(int *List, int Length) {
 #pragma clang loop vectorize(disable) unroll(disable)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
     List[i] = i * 2;
   }
 }
@@ -71,7 +71,7 @@ void for_define_test(int *List, int Length, int Value) {
 #pragma clang loop vectorize_width(VECWIDTH) interleave_count(INTCOUNT)
 #pragma clang loop unroll_count(UNROLLCOUNT)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_6:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_6:.*]]
     List[i] = i * Value;
   }
 }
@@ -80,13 +80,13 @@ void for_define_test(int *List, int Length, int Value) {
 void for_contant_expression_test(int *List, int Length) {
 #pragma clang loop vectorize_width(1 + 4)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_7:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_7:.*]]
     List[i] = i;
   }
 
 #pragma clang loop vectorize_width(3 + VECWIDTH)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_8:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_8:.*]]
     List[i] += i;
   }
 }
@@ -96,7 +96,7 @@ template <typename A>
 void for_template_test(A *List, int Length, A Value) {
 #pragma clang loop vectorize_width(8) interleave_count(8) unroll_count(8)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_9:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_9:.*]]
     List[i] = i * Value;
   }
 }
@@ -110,7 +110,7 @@ void for_template_define_test(A *List, int Length, A Value) {
 #pragma clang loop vectorize_width(VWidth) interleave_count(ICount)
 #pragma clang loop unroll_count(UCount)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_10:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_10:.*]]
     List[i] = i * Value;
   }
 }
@@ -120,26 +120,26 @@ template <typename A, int V, int I, int U>
 void for_template_constant_expression_test(A *List, int Length) {
 #pragma clang loop vectorize_width(V) interleave_count(I) unroll_count(U)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_11:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_11:.*]]
     List[i] = i;
   }
 
 #pragma clang loop vectorize_width(V * 2 + VECWIDTH) interleave_count(I * 2 + INTCOUNT) unroll_count(U * 2 + UNROLLCOUNT)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_12:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_12:.*]]
     List[i] += i;
   }
 
   const int Scale = 4;
 #pragma clang loop vectorize_width(Scale * V) interleave_count(Scale * I) unroll_count(Scale * U)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_13:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_13:.*]]
     List[i] += i;
   }
 
 #pragma clang loop vectorize_width((Scale * V) + 2)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_14:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_14:.*]]
     List[i] += i;
   }
 }
@@ -157,35 +157,35 @@ void template_test(double *List, int Length) {
   for_template_constant_expression_test<double, 2, 4, 8>(List, Length);
 }
 
-// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_FULL:.*]], ![[WIDTH_4:.*]], ![[INTERLEAVE_4:.*]], ![[INTENABLE_1:.*]]}
-// CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[WIDTH_4:.*]], ![[INTERLEAVE_4:.*]], ![[INTENABLE_1:.*]], ![[UNROLL_FULL:.*]]}
 // CHECK: ![[WIDTH_4]] = !{!"llvm.loop.vectorize.width", i32 4}
 // CHECK: ![[INTERLEAVE_4]] = !{!"llvm.loop.interleave.count", i32 4}
 // CHECK: ![[INTENABLE_1]] = !{!"llvm.loop.vectorize.enable", i1 true}
-// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2:.*]], ![[UNROLL_DISABLE:.*]], ![[INTERLEAVE_4:.*]], ![[WIDTH_8:.*]]}
-// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+// CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2:.*]], ![[WIDTH_8:.*]], ![[INTERLEAVE_4:.*]], ![[UNROLL_DISABLE:.*]]}
 // CHECK: ![[WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
-// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_8:.*]], ![[INTERLEAVE_4:.*]], ![[ENABLE_1:.*]]}
+// CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
+// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[INTERLEAVE_4:.*]], ![[UNROLL_8:.*]], ![[INTENABLE_1:.*]]}
 // CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8}
-// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[INTERLEAVE_2:.*]], ![[WIDTH_2:.*]]}
-// CHECK: ![[INTERLEAVE_2]] = !{!"llvm.loop.interleave.count", i32 2}
+// CHECK: ![[LOOP_4]] = distinct !{![[LOOP_4]], ![[WIDTH_2:.*]], ![[INTERLEAVE_2:.*]]}
 // CHECK: ![[WIDTH_2]] = !{!"llvm.loop.vectorize.width", i32 2}
-// CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]], ![[WIDTH_1:.*]]}
+// CHECK: ![[INTERLEAVE_2]] = !{!"llvm.loop.interleave.count", i32 2}
+// CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[WIDTH_1:.*]], ![[UNROLL_DISABLE:.*]]}
 // CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
-// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], ![[UNROLL_8:.*]], ![[INTERLEAVE_2:.*]], ![[WIDTH_2:.*]]}
+// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], ![[WIDTH_2:.*]], ![[INTERLEAVE_2:.*]], ![[UNROLL_8:.*]]}
 // CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[WIDTH_5:.*]]}
 // CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5}
 // CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], ![[WIDTH_5:.*]]}
-// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[UNROLL_8:.*]], ![[INTERLEAVE_8:.*]], ![[WIDTH_8:.*]]}
+// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[INTERLEAVE_8:.*]], ![[UNROLL_8:.*]]}
 // CHECK: ![[INTERLEAVE_8]] = !{!"llvm.loop.interleave.count", i32 8}
-// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[UNROLL_8:.*]], ![[INTERLEAVE_2:.*]], ![[WIDTH_2:.*]]}
-// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[UNROLL_8:.*]], ![[INTERLEAVE_4:.*]], ![[WIDTH_2:.*]]}
-// CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[UNROLL_24:.*]], ![[INTERLEAVE_10:.*]], ![[WIDTH_6:.*]]}
-// CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24}
-// CHECK: ![[INTERLEAVE_10]] = !{!"llvm.loop.interleave.count", i32 10}
+// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[INTERLEAVE_2:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[INTERLEAVE_4:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[WIDTH_6:.*]], ![[INTERLEAVE_10:.*]], ![[UNROLL_24:.*]]}
 // CHECK: ![[WIDTH_6]] = !{!"llvm.loop.vectorize.width", i32 6}
-// CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[UNROLL_32:.*]], ![[INTERLEAVE_16:.*]], ![[WIDTH_8:.*]]}
-// CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32}
+// CHECK: ![[INTERLEAVE_10]] = !{!"llvm.loop.interleave.count", i32 10}
+// CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24}
+// CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[WIDTH_8:.*]], ![[INTERLEAVE_16:.*]], ![[UNROLL_32:.*]]}
 // CHECK: ![[INTERLEAVE_16]] = !{!"llvm.loop.interleave.count", i32 16}
+// CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32}
 // CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], ![[WIDTH_10:.*]]}
 // CHECK: ![[WIDTH_10]] = !{!"llvm.loop.vectorize.width", i32 10}
diff --git a/test/CodeGenCXX/pragma-unroll.cpp b/test/CodeGenCXX/pragma-unroll.cpp
index 8b73fa6c8aab..8f079092480a 100644
--- a/test/CodeGenCXX/pragma-unroll.cpp
+++ b/test/CodeGenCXX/pragma-unroll.cpp
@@ -7,7 +7,7 @@ void while_test(int *List, int Length) {
 
 #pragma unroll
   while (i < Length) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_1:.*]]
     List[i] = i * 2;
     i++;
   }
@@ -15,6 +15,7 @@ void while_test(int *List, int Length) {
 
 // Verify do loop is recognized after multi-option pragma clang loop directive.
 void do_test(int *List, int Length) {
+  // CHECK: define {{.*}} @_Z7do_test
   int i = 0;
 
 #pragma nounroll
@@ -27,20 +28,22 @@ void do_test(int *List, int Length) {
 
 // Verify for loop is recognized after unroll pragma.
 void for_test(int *List, int Length) {
+// CHECK: define {{.*}} @_Z8for_test
 #pragma unroll 8
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_3:.*]]
     List[i] = i * 2;
   }
 }
 
 // Verify c++11 for range loop is recognized after unroll pragma.
 void for_range_test() {
+  // CHECK: define {{.*}} @_Z14for_range_test
   double List[100];
 
 #pragma unroll(4)
   for (int i : List) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_4:.*]]
     List[i] = i;
   }
 }
@@ -49,9 +52,10 @@ void for_range_test() {
 
 // Verify defines are correctly resolved in unroll pragmas.
 void for_define_test(int *List, int Length, int Value) {
+// CHECK: define {{.*}} @_Z15for_define_test
 #pragma unroll(UNROLLCOUNT)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_5:.*]]
     List[i] = i * Value;
   }
 }
@@ -59,9 +63,10 @@ void for_define_test(int *List, int Length, int Value) {
 // Verify metadata is generated when template is used.
 template <typename A>
 void for_template_test(A *List, int Length, A Value) {
+// CHECK: define {{.*}} @_Z13template_test
 #pragma unroll 8
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_6:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_6:.*]]
     List[i] = i * Value;
   }
 }
@@ -69,9 +74,11 @@ void for_template_test(A *List, int Length, A Value) {
 // Verify define is resolved correctly when template is used.
 template <typename A>
 void for_template_define_test(A *List, int Length, A Value) {
+// CHECK: define {{.*}} @_Z24for_template_define_test
+
 #pragma unroll(UNROLLCOUNT)
   for (int i = 0; i < Length; i++) {
-    // CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[LOOP_7:.*]]
+    // CHECK: br label {{.*}}, !llvm.loop ![[LOOP_7:.*]]
     List[i] = i * Value;
   }
 }
@@ -86,8 +93,8 @@ void template_test(double *List, int Length) {
   for_template_define_test<double>(List, Length, Value);
 }
 
-// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_FULL:.*]]}
-// CHECK: ![[UNROLL_FULL]] = !{!"llvm.loop.unroll.full"}
+// CHECK: ![[LOOP_1]] = distinct !{![[LOOP_1]], ![[UNROLL_ENABLE:.*]]}
+// CHECK: ![[UNROLL_ENABLE]] = !{!"llvm.loop.unroll.enable"}
 // CHECK: ![[LOOP_2]] = distinct !{![[LOOP_2:.*]], ![[UNROLL_DISABLE:.*]]}
 // CHECK: ![[UNROLL_DISABLE]] = !{!"llvm.loop.unroll.disable"}
 // CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], ![[UNROLL_8:.*]]}
diff --git a/test/CodeGenCXX/redefine_extname.cpp b/test/CodeGenCXX/redefine_extname.cpp
index f76fe6252fef..41860d47ffe8 100644
--- a/test/CodeGenCXX/redefine_extname.cpp
+++ b/test/CodeGenCXX/redefine_extname.cpp
@@ -28,3 +28,9 @@ extern "C" {
 // CHECK: define i32 @bar()
 }
 
+// Check that #pragma redefine_extname applies to C code only, and shouldn't be
+// applied to C++.
+#pragma redefine_extname foo_cpp bar_cpp
+extern int foo_cpp() { return 1; }
+// CHECK-NOT: define i32 @bar_cpp()
+
diff --git a/test/CodeGenCXX/sanitize-dtor-bit-field.cpp b/test/CodeGenCXX/sanitize-dtor-bit-field.cpp
new file mode 100644
index 000000000000..d4497f6aac0b
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-bit-field.cpp
@@ -0,0 +1,84 @@
+// Test -fsanitize-memory-use-after-dtor
+// RUN: %clang_cc1 -O0 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+// 24 bytes total
+struct Packed {
+  // Packed into 4 bytes
+  unsigned int a : 1;
+  unsigned int b : 1;
+  //unsigned int c : 1;
+  // Force alignment to next 4 bytes
+  unsigned int   : 0;
+  unsigned int d : 1;
+  // Force alignment, 8 more bytes
+  double e = 5.0;
+  // 4 bytes
+  unsigned int f : 1;
+  ~Packed() {}
+};
+Packed p;
+
+
+// 1 byte total
+struct Empty {
+  unsigned int : 0;
+  ~Empty() {}
+};
+Empty e;
+
+
+// 4 byte total
+struct Simple {
+  unsigned int a : 1;
+  ~Simple() {}
+};
+Simple s;
+
+
+// 8 bytes total
+struct Anon {
+  // 1 byte
+  unsigned int a : 1;
+  unsigned int b : 2;
+  // Force alignment to next byte
+  unsigned int   : 0;
+  unsigned int c : 1;
+  ~Anon() {}
+};
+Anon an;
+
+
+struct CharStruct {
+  char c;
+  ~CharStruct();
+};
+
+struct Adjacent {
+  CharStruct a;
+  int b : 1;
+  CharStruct c;
+  ~Adjacent() {}
+};
+Adjacent ad;
+
+
+// CHECK-LABEL: define {{.*}}PackedD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 17
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}EmptyD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback{{.*}}i64 0
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}SimpleD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 1
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}AnonD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 5
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}AdjacentD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 1
+// CHECK: ret void
diff --git a/test/CodeGenCXX/sanitize-dtor-callback.cpp b/test/CodeGenCXX/sanitize-dtor-callback.cpp
index 4912a27229f9..e208a6f7e5a5 100644
--- a/test/CodeGenCXX/sanitize-dtor-callback.cpp
+++ b/test/CodeGenCXX/sanitize-dtor-callback.cpp
@@ -1,17 +1,70 @@
 // Test -fsanitize-memory-use-after-dtor
-// RUN: %clang_cc1 -fsanitize=memory -fsanitize-memory-use-after-dtor -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fsanitize=memory -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s -check-prefix=NO_DTOR_CHECK
+// RUN: %clang_cc1 -fsanitize=memory -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+// Sanitizing dtor is emitted in dtor for every class, and only
+// poisons once.
 
 struct Simple {
+  int x;
   ~Simple() {}
 };
 Simple s;
 // Simple internal member is poisoned by compiler-generated dtor
-// CHECK-LABEL: @_ZN6SimpleD2Ev
-// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-LABEL: define {{.*}}SimpleD1Ev
+// CHECK: call void {{.*}}SimpleD2Ev
 // CHECK: ret void
 
-// Compiling without the flag does not generate member-poisoning dtor
-// NO_DTOR_CHECK-LABEL: @_ZN6SimpleD2Ev
-// NO_DTOR_CHECK-NOT: call void @sanitizer_dtor_callback
-// NO_DTOR_CHECK: ret void
+struct Inlined {
+  int y;
+  inline ~Inlined() {}
+};
+Inlined i;
+// Simple internal member is poisoned by compiler-generated dtor
+// CHECK-LABEL: define {{.*}}InlinedD1Ev
+// CHECK: call void {{.*}}InlinedD2Ev
+// CHECK: ret void
+
+struct Defaulted_Trivial {
+  ~Defaulted_Trivial() = default;
+};
+void create_def_trivial() {
+  Defaulted_Trivial def_trivial;
+}
+// The compiler is explicitly signalled to handle object cleanup.
+// No complex member attributes. Compiler destroys inline, so
+// no destructor defined.
+// CHECK-LABEL: define {{.*}}create_def_trivial
+// CHECK-NOT: call {{.*}}Defaulted_Trivial
+// CHECK: ret void
+
+struct Defaulted_Non_Trivial {
+  Simple s;
+  ~Defaulted_Non_Trivial() = default;
+};
+Defaulted_Non_Trivial def_non_trivial;
+// Explicitly compiler-generated dtor poisons object.
+// By including a Simple member in the struct, the compiler is
+// forced to generate a non-trivial destructor.
+// CHECK-LABEL: define {{.*}}Defaulted_Non_TrivialD1Ev
+// CHECK: call void {{.*}}Defaulted_Non_TrivialD2
+// CHECK: ret void
+
+
+// Note: ordering is important. In the emitted bytecode, these
+// second dtors defined after the first. Explicitly checked here
+// to confirm that all invoked dtors have member poisoning
+// instrumentation inserted.
+// CHECK-LABEL: define {{.*}}SimpleD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}InlinedD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}Defaulted_Non_TrivialD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
diff --git a/test/CodeGenCXX/sanitize-dtor-derived-class.cpp b/test/CodeGenCXX/sanitize-dtor-derived-class.cpp
new file mode 100644
index 000000000000..f3134711824d
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-derived-class.cpp
@@ -0,0 +1,71 @@
+// RUN: %clang_cc1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+// Base dtor poisons members
+// Complete dtor poisons vtable ptr after destroying members and
+// virtual bases
+
+class Base {
+ public:
+  int x;
+  Base() {
+    x = 5;
+  }
+  virtual ~Base() {
+    x += 1;
+  }
+};
+
+class Derived : public Base {
+ public:
+  int y;
+  Derived() {
+    y = 10;
+  }
+  ~Derived() {
+    y += 1;
+  }
+};
+
+Derived d;
+
+// Invoke base destructor. No vtable pointer to poison.
+// CHECK-LABEL: define {{.*}}DerivedD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}DerivedD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}DerivedD0Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}DerivedD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Invokes base destructor, and poison vtable pointer.
+// CHECK-LABEL: define {{.*}}BaseD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}BaseD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}BaseD0Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}BaseD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Poison members and vtable ptr.
+// CHECK-LABEL: define {{.*}}BaseD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 8
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Poison members and destroy non-virtual base.
+// CHECK-LABEL: define {{.*}}DerivedD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}BaseD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 8
+// CHECK: ret void
diff --git a/test/CodeGenCXX/sanitize-dtor-fn-attribute.cpp b/test/CodeGenCXX/sanitize-dtor-fn-attribute.cpp
new file mode 100644
index 000000000000..4af26770223f
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-fn-attribute.cpp
@@ -0,0 +1,43 @@
+// Test -fsanitize-memory-use-after-dtor
+// RUN: %clang_cc1 -fsanitize=memory -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+// The no_sanitize_memory attribute, when applied to a destructor,
+// represses emission of sanitizing callback
+
+template <class T> class Vector {
+ public:
+  int size;
+  ~Vector() {}
+};
+
+struct No_San {
+  Vector<int> v;
+  int x;
+  No_San() { }
+  __attribute__((no_sanitize_memory)) ~No_San() = default;
+};
+
+int main() {
+  No_San *ns = new No_San();
+  ns->~No_San();
+  return 0;
+}
+
+// Repressing the sanitization attribute results in no msan
+// instrumentation of the destructor
+// CHECK: define {{.*}}No_SanD1Ev{{.*}} [[ATTRIBUTE:#[0-9]+]]
+// CHECK-NOT: call void {{.*}}sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK: define {{.*}}No_SanD2Ev{{.*}} [[ATTRIBUTE:#[0-9]+]]
+// CHECK-NOT: call void {{.*}}sanitizer_dtor_callback
+// CHECK: call void {{.*}}VectorIiED2Ev
+// CHECK-NOT: call void {{.*}}sanitizer_dtor_callback
+// CHECK: ret void
+
+// CHECK: define {{.*}}VectorIiED2Ev
+// CHECK: call void {{.*}}sanitizer_dtor_callback
+// CHECK: ret void
+
+// When attribute is repressed, the destructor does not emit any tail calls
+// CHECK-NOT: attributes [[ATTRIBUTE]] = {{.*}} sanitize_memory
diff --git a/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp b/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
new file mode 100644
index 000000000000..27eb64b55347
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-nontrivial-virtual-base.cpp
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -fsanitize=memory -O0 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=memory -O1 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+template <class T>
+class Vector {
+public:
+  int size;
+  ~Vector() {
+    size += 1;
+  }
+};
+
+struct Base {
+  int b1;
+  double b2;
+  Base() {
+    b1 = 5;
+    b2 = 10.989;
+  }
+  virtual ~Base() {}
+};
+
+struct VirtualBase {
+  int vb1;
+  int vb2;
+  VirtualBase() {
+    vb1 = 10;
+    vb2 = 11;
+  }
+  virtual ~VirtualBase() {}
+};
+
+struct Derived : public Base, public virtual VirtualBase {
+  int d1;
+  Vector<int> v;
+  int d2;
+  Derived() {
+    d1 = 10;
+  }
+  ~Derived() {}
+};
+
+Derived d;
+
+// Destruction order:
+// Derived: int, Vector, Base, VirtualBase
+
+// CHECK-LABEL: define {{.*}}ZN7DerivedD1Ev
+// CHECK: call void {{.*}}ZN11VirtualBaseD2Ev
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}ZN7DerivedD0Ev
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}ZN11VirtualBaseD1Ev
+// CHECK: ret void
+
+// CHECK-LABEL: define {{.*}}ZN11VirtualBaseD0Ev
+// CHECK: ret void
+
+// poison 2 ints
+// CHECK-LABEL: define {{.*}}ZN11VirtualBaseD2Ev
+// CHECK: call void {{.*}}sanitizer_dtor_callback({{.*}}, i64 8)
+// CHECK: ret void
+
+// poison int and double
+// CHECK-LABEL: define {{.*}}ZN4BaseD2Ev
+// CHECK: call void {{.*}}sanitizer_dtor_callback({{.*}}, i64 16)
+// CHECK: ret void
+
+// poison int, ignore vector, poison int
+// CHECK-LABEL: define {{.*}}ZN7DerivedD2Ev
+// CHECK: call void {{.*}}ZN6VectorIiED1Ev
+// CHECK: call void {{.*}}sanitizer_dtor_callback({{.*}}, i64 4)
+// CHECK: call void {{.*}}sanitizer_dtor_callback({{.*}}, i64 4)
+// CHECK: call void {{.*}}ZN4BaseD2Ev
+// CHECK: ret void
+
+// poison int
+// CHECK-LABEL: define {{.*}}ZN6VectorIiED2Ev
+// CHECK: call void {{.*}}sanitizer_dtor_callback({{.*}}, i64 4)
+// CHECK: ret void
diff --git a/test/CodeGenCXX/sanitize-dtor-repress-aliasing.cpp b/test/CodeGenCXX/sanitize-dtor-repress-aliasing.cpp
new file mode 100644
index 000000000000..28624a0e7aec
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-repress-aliasing.cpp
@@ -0,0 +1,30 @@
+// Test -fsanitize-memory-use-after-dtor
+// RUN: %clang_cc1 -fsanitize=memory -O1 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsanitize=memory -O2 -fsanitize-memory-use-after-dtor -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+template <class T>
+class Vector {
+public:
+  int size;
+  ~Vector() {}
+};
+
+// Virtual function table for the derived class only contains
+// its own destructors, with no aliasing to base class dtors.
+struct Base {
+  Vector<int> v;
+  int x;
+  Base() { x = 5; }
+  virtual ~Base() {}
+};
+
+struct Derived : public Base {
+  int z;
+  Derived() { z = 10; }
+  ~Derived() {}
+};
+
+Derived d;
+
+// Definition of virtual function table
+// CHECK: @_ZTV7Derived = {{.*}}@_ZN7DerivedD1Ev{{.*}}@_ZN7DerivedD0Ev
diff --git a/test/CodeGenCXX/sanitize-dtor-tail-call.cpp b/test/CodeGenCXX/sanitize-dtor-tail-call.cpp
new file mode 100644
index 000000000000..de2fd8c75c88
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-tail-call.cpp
@@ -0,0 +1,23 @@
+// Test -fsanitize-memory-use-after-dtor
+// RUN: %clang_cc1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+struct Simple {
+  int x_;
+  Simple() {
+    x_ = 5;
+  }
+  ~Simple() {
+    x_ += 1;
+  }
+};
+
+Simple s;
+// Simple internal member is poisoned by compiler-generated dtor
+// CHECK: define {{.*}}SimpleD2Ev{{.*}} [[ATTRIBUTE:#[0-9]+]]
+// CHECK: {{^ *}}call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Destructor does not emit any tail calls
+// CHECK: attributes [[ATTRIBUTE]] = {{.*}}"disable-tail-calls"="true"
diff --git a/test/CodeGenCXX/sanitize-dtor-trivial.cpp b/test/CodeGenCXX/sanitize-dtor-trivial.cpp
new file mode 100644
index 000000000000..39f580af97e7
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-trivial.cpp
@@ -0,0 +1,15 @@
+// Test -fsanitize-memory-use-after-dtor
+// RUN: %clang_cc1 -O0 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+// TODO Success pending on resolution of issue:
+//    https://github.com/google/sanitizers/issues/596
+// XFAIL: *
+
+struct Trivial {
+  int a;
+  int b;
+};
+Trivial t;
+
+// CHECK: call void @__sanitizer_dtor_callback
diff --git a/test/CodeGenCXX/sanitize-dtor-vtable.cpp b/test/CodeGenCXX/sanitize-dtor-vtable.cpp
new file mode 100644
index 000000000000..78be7949c32c
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-dtor-vtable.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -O0 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -O1 -fsanitize=memory -fsanitize-memory-use-after-dtor -disable-llvm-optzns -std=c++11 -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+class A {
+ public:
+  int x;
+  A() {}
+  virtual ~A() {}
+};
+A a;
+
+class B : virtual public A {
+ public:
+  int y;
+  B() {}
+  ~B() {}
+};
+B b;
+
+// CHECK-LABEL: define {{.*}}AD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}AD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// After invoking base dtor and dtor for virtual base, poison vtable ptr.
+// CHECK-LABEL: define {{.*}}BD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}BD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: call void {{.*}}AD2Ev
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 8
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Since no virtual bases, poison vtable ptr here.
+// CHECK-LABEL: define {{.*}}AD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK: call void @__sanitizer_dtor_callback{{.*}}i64 8
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
+
+// Poison members
+// CHECK-LABEL: define {{.*}}BD2Ev
+// CHECK: call void @__sanitizer_dtor_callback
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+// CHECK: ret void
diff --git a/test/CodeGenCXX/sanitize-no-dtor-callback.cpp b/test/CodeGenCXX/sanitize-no-dtor-callback.cpp
new file mode 100644
index 000000000000..2c355766216d
--- /dev/null
+++ b/test/CodeGenCXX/sanitize-no-dtor-callback.cpp
@@ -0,0 +1,23 @@
+// Test without the flag -fsanitize-memory-use-after-dtor, to ensure that
+// instrumentation is not erroneously inserted
+// RUN: %clang_cc1 -fsanitize=memory -triple=x86_64-pc-linux -emit-llvm -o - %s | FileCheck %s
+
+struct Simple {
+  ~Simple() {}
+};
+Simple s;
+// CHECK-LABEL: define {{.*}}SimpleD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+
+struct Inlined {
+  inline ~Inlined() {}
+};
+Inlined i;
+// CHECK-LABEL: define {{.*}}InlinedD1Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+
+// CHECK-LABEL: define {{.*}}SimpleD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
+
+// CHECK-LABEL: define {{.*}}InlinedD2Ev
+// CHECK-NOT: call void @__sanitizer_dtor_callback
diff --git a/test/CodeGenCXX/scoped-enums-debug-info.cpp b/test/CodeGenCXX/scoped-enums-debug-info.cpp
index 52658fc28d74..131e31b021ae 100644
--- a/test/CodeGenCXX/scoped-enums-debug-info.cpp
+++ b/test/CodeGenCXX/scoped-enums-debug-info.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -emit-llvm -g -o - %s | FileCheck %s
+// RUN: %clang_cc1 -std=c++11 -emit-llvm -debug-info-kind=limited -o - %s | FileCheck %s
 // Test that we are emitting debug info and base types for scoped enums.
 
 // CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "Color"
diff --git a/test/CodeGenCXX/sel-address.mm b/test/CodeGenCXX/sel-address.mm
deleted file mode 100644
index c3db9a7d00e5..000000000000
--- a/test/CodeGenCXX/sel-address.mm
+++ /dev/null
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 %s -verify -emit-llvm -o %t
-// pr7390
-
-void f(const SEL& v2) {}
-void g() {
-  f(@selector(dealloc));
-
-  SEL s = @selector(dealloc);
- SEL* ps = &s;
-
- @selector(dealloc) = s;  // expected-error {{expression is not assignable}}
-
- SEL* ps2 = &@selector(dealloc);
-}
diff --git a/test/CodeGenCXX/static-init-wasm.cpp b/test/CodeGenCXX/static-init-wasm.cpp
new file mode 100644
index 000000000000..2d187b5c05fc
--- /dev/null
+++ b/test/CodeGenCXX/static-init-wasm.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -emit-llvm -triple=wasm32-unknown-unknown -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY32
+// RUN: %clang_cc1 -emit-llvm -triple=wasm64-unknown-unknown -o - %s \
+// RUN:   | FileCheck %s -check-prefix=WEBASSEMBLY64
+
+// Test that we don't create common blocks.
+int tentative;
+// WEBASSEMBLY32: @tentative = global i32 0, align 4
+// WEBASSEMBLY64: @tentative = global i32 0, align 4
+
+// Test that WebAssembly uses the ARM-style ABI in which the static
+// variable's guard variable is tested via "load i8 and test the
+// bottom bit" rather than the Itanium/x86 ABI which uses "load i8
+// and compare with zero".
+int f();
+void g() {
+  static int a = f();
+}
+// WEBASSEMBLY32-LABEL: @_Z1gv()
+// WEBASSEMBLY32:       %[[R0:.+]] = load atomic i8, i8* bitcast (i32* @_ZGVZ1gvE1a to i8*) acquire, align 4
+// WEBASSEMBLY32-NEXT:  %[[R1:.+]] = and i8 %[[R0]], 1
+// WEBASSEMBLY32-NEXT:  %[[R2:.+]] = icmp eq i8 %[[R1]], 0
+// WEBASSEMBLY32-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY32:       [[CHECK]]
+// WEBASSEMBLY32:       call i32 @__cxa_guard_acquire
+// WEBASSEMBLY32:       [[END]]
+// WEBASSEMBLY32:       call void @__cxa_guard_release
+//
+// WEBASSEMBLY64-LABEL: @_Z1gv()
+// WEBASSEMBLY64:       %[[R0:.+]] = load atomic i8, i8* bitcast (i64* @_ZGVZ1gvE1a to i8*) acquire, align 8
+// WEBASSEMBLY64-NEXT:  %[[R1:.+]] = and i8 %[[R0]], 1
+// WEBASSEMBLY64-NEXT:  %[[R2:.+]] = icmp eq i8 %[[R1]], 0
+// WEBASSEMBLY64-NEXT:  br i1 %[[R2]], label %[[CHECK:.+]], label %[[END:.+]]
+// WEBASSEMBLY64:       [[CHECK]]
+// WEBASSEMBLY64:       call i32 @__cxa_guard_acquire
+// WEBASSEMBLY64:       [[END]]
+// WEBASSEMBLY64:       call void @__cxa_guard_release
+
+// Test various aspects of static constructor calls.
+struct A {
+    A();
+};
+
+A theA;
+
+// WEBASSEMBLY32: define internal void @__cxx_global_var_init() #0 section ".text.__startup" {
+// WEBASSEMBLY32: call %struct.A* @_ZN1AC1Ev(%struct.A* @theA)
+// WEBASSEMBLY32: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #0 section ".text.__startup" {
+// WEBASSEMBLY32: call void @__cxx_global_var_init()
+//
+// WEBASSEMBLY64: define internal void @__cxx_global_var_init() #0 section ".text.__startup" {
+// WEBASSEMBLY64: call %struct.A* @_ZN1AC1Ev(%struct.A* @theA)
+// WEBASSEMBLY64: define internal void @_GLOBAL__sub_I_static_init_wasm.cpp() #0 section ".text.__startup" {
+// WEBASSEMBLY64: call void @__cxx_global_var_init()
diff --git a/test/CodeGenCXX/static-init.cpp b/test/CodeGenCXX/static-init.cpp
index 25489f022ce6..541f6416efd0 100644
--- a/test/CodeGenCXX/static-init.cpp
+++ b/test/CodeGenCXX/static-init.cpp
@@ -6,9 +6,10 @@
 
 // CHECK: @_ZZN5test31BC1EvE1u = internal global { i8, [3 x i8] } { i8 97, [3 x i8] undef }, align 4
 
-// CHECK: @_ZZ2h2vE1i = linkonce_odr global i32 0, comdat, align
-// CHECK: @_ZGVZ2h2vE1i = linkonce_odr global i64 0, comdat{{$}}
+// CHECK: @_ZZ2h2vE1i = linkonce_odr global i32 0, comdat, align 4
+// CHECK: @_ZGVZ2h2vE1i = linkonce_odr global i64 0, comdat, align 8{{$}}
 // CHECK: @_ZZN5test1L6getvarEiE3var = internal constant [4 x i32] [i32 1, i32 0, i32 2, i32 4], align 16
+// CHECK: @_ZZN5test414useStaticLocalEvE3obj = linkonce_odr global %"struct.test4::HasVTable" zeroinitializer, comdat, align 8
 
 struct A {
   A();
@@ -16,7 +17,7 @@ struct A {
 };
 
 void f() {
-  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE1a to i8*) acquire, align 1
+  // CHECK: load atomic i8, i8* bitcast (i64* @_ZGVZ1fvE1a to i8*) acquire, align 8
   // CHECK: call i32 @__cxa_guard_acquire
   // CHECK: call void @_ZN1AC1Ev
   // CHECK: call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.A*)* @_ZN1AD1Ev to void (i8*)*), i8* getelementptr inbounds (%struct.A, %struct.A* @_ZZ1fvE1a, i32 0, i32 0), i8* @__dso_handle)
@@ -154,3 +155,19 @@ namespace test3 {
   // CHECK-LABEL: define void @_ZN5test31BC2Ev(
   // CHECK-LABEL: define void @_ZN5test31BC1Ev(
 }
+
+// We forgot to set the comdat when replacing the global with a different type.
+namespace test4 {
+struct HasVTable {
+  virtual void f();
+};
+inline HasVTable &useStaticLocal() {
+  static HasVTable obj;
+  return obj;
+}
+void useit() {
+  useStaticLocal();
+}
+// CHECK: define linkonce_odr dereferenceable(8) %"struct.test4::HasVTable"* @_ZN5test414useStaticLocalEv()
+// CHECK: ret %"struct.test4::HasVTable"* @_ZZN5test414useStaticLocalEvE3obj
+}
diff --git a/test/CodeGenCXX/strict-vtable-pointers.cpp b/test/CodeGenCXX/strict-vtable-pointers.cpp
new file mode 100644
index 000000000000..ee3919149cab
--- /dev/null
+++ b/test/CodeGenCXX/strict-vtable-pointers.cpp
@@ -0,0 +1,219 @@
+// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -fstrict-vtable-pointers -disable-llvm-optzns -O2 -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefix=CHECK-CTORS %s < %t.ll
+// RUN: FileCheck --check-prefix=CHECK-NEW %s < %t.ll
+// RUN: FileCheck --check-prefix=CHECK-DTORS %s < %t.ll
+// RUN: FileCheck --check-prefix=CHECK-LINK-REQ %s < %t.ll
+
+typedef __typeof__(sizeof(0)) size_t;
+void *operator new(size_t, void*) throw();
+
+struct NotTrivialDtor {
+  ~NotTrivialDtor();
+};
+
+struct DynamicBase1 {
+  NotTrivialDtor obj;
+  virtual void foo();
+};
+
+struct DynamicDerived : DynamicBase1 {
+  void foo();
+};
+
+struct DynamicBase2 {
+  virtual void bar();
+  ~DynamicBase2() {
+    bar();
+  }
+};
+
+struct DynamicDerivedMultiple : DynamicBase1, DynamicBase2 {
+  virtual void foo();
+  virtual void bar();
+};
+
+struct StaticBase {
+  NotTrivialDtor obj;
+  void bar();
+};
+
+struct DynamicFromStatic : StaticBase {
+  virtual void bar();
+};
+
+struct DynamicFromVirtualStatic1 : virtual StaticBase {
+};
+
+struct DynamicFromVirtualStatic2 : virtual StaticBase {
+};
+
+struct DynamicFrom2Virtuals :
+            DynamicFromVirtualStatic1,
+            DynamicFromVirtualStatic2 {
+};
+
+// CHECK-NEW-LABEL: define void @_Z12LocalObjectsv()
+// CHECK-NEW-NOT: @llvm.invariant.group.barrier(
+// CHECK-NEW-LABEL: }
+void LocalObjects() {
+  DynamicBase1 DB;
+  DB.foo();
+  DynamicDerived DD;
+  DD.foo();
+
+  DynamicBase2 DB2;
+  DB2.bar();
+
+  StaticBase SB;
+  SB.bar();
+
+  DynamicDerivedMultiple DDM;
+  DDM.foo();
+  DDM.bar();
+
+  DynamicFromStatic DFS;
+  DFS.bar();
+  DynamicFromVirtualStatic1 DFVS1;
+  DFVS1.bar();
+  DynamicFrom2Virtuals DF2V;
+  DF2V.bar();
+}
+
+struct DynamicFromVirtualStatic1;
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN25DynamicFromVirtualStatic1C1Ev
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier(
+// CHECK-CTORS-LABEL: }
+
+struct DynamicFrom2Virtuals;
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN20DynamicFrom2VirtualsC1Ev
+// CHECK-CTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-CTORS-LABEL: }
+
+
+// CHECK-NEW-LABEL: define void @_Z9Pointers1v()
+// CHECK-NEW-NOT: @llvm.invariant.group.barrier(
+// CHECK-NEW-LABEL: call void @_ZN12DynamicBase1C1Ev(
+
+// CHECK-NEW: %[[THIS3:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS2:.*]])
+// CHECK-NEW: %[[THIS4:.*]] = bitcast i8* %[[THIS3]] to %[[DynamicDerived:.*]]*
+// CHECK-NEW: call void @_ZN14DynamicDerivedC1Ev(%[[DynamicDerived:.*]]* %[[THIS4]])
+// CHECK-NEW-LABEL: }
+void Pointers1() {
+  DynamicBase1 *DB = new DynamicBase1;
+  DB->foo();
+
+  DynamicDerived *DD = new (DB) DynamicDerived;
+  DD->foo();
+  DD->~DynamicDerived();
+}
+
+// CHECK-NEW-LABEL: define void @_Z14HackingObjectsv()
+// CHECK-NEW:  call void @_ZN12DynamicBase1C1Ev
+// CHECK-NEW:  call i8* @llvm.invariant.group.barrier(
+// CHECK-NEW:  call void @_ZN14DynamicDerivedC1Ev(
+// CHECK-NEW:  call i8* @llvm.invariant.group.barrier(
+// CHECK-NEW: call void @_ZN12DynamicBase1C1Ev(
+// CHECK-NEW-LABEL: }
+void HackingObjects() {
+  DynamicBase1 DB;
+  DB.foo();
+
+  DynamicDerived *DB2 = new (&DB) DynamicDerived;
+  // Using DB now is prohibited.
+  DB2->foo();
+  DB2->~DynamicDerived();
+
+  // We have to get back to the previous type to avoid calling wrong destructor
+  new (&DB) DynamicBase1;
+  DB.foo();
+}
+
+/*** Testing Constructors ***/
+struct DynamicBase1;
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase1C2Ev(
+// CHECK-CTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-CTORS-LABEL: }
+
+
+struct DynamicDerived;
+
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN14DynamicDerivedC2Ev(
+// CHECK-CTORS: %[[THIS0:.*]] = load %[[DynamicDerived:.*]]*, %[[DynamicDerived]]** {{.*}}
+// CHECK-CTORS: %[[THIS1:.*]] = bitcast %[[DynamicDerived:.*]]* %[[THIS0]] to i8*
+// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS1:.*]])
+// CHECK-CTORS: %[[THIS3:.*]] = bitcast i8* %[[THIS2]] to %[[DynamicDerived]]*
+// CHECK-CTORS: %[[THIS4:.*]] = bitcast %[[DynamicDerived]]* %[[THIS3]] to %[[DynamicBase:.*]]*
+// CHECK-CTORS: call void @_ZN12DynamicBase1C2Ev(%[[DynamicBase]]* %[[THIS4]])
+
+// CHECK-CTORS: %[[THIS5:.*]] = bitcast %struct.DynamicDerived* %[[THIS0]] to i32 (...)***
+// CHECK-CTORS: store {{.*}} %[[THIS5]]
+// CHECK-CTORS-LABEL: }
+
+struct DynamicDerivedMultiple;
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN22DynamicDerivedMultipleC2Ev(
+
+// CHECK-CTORS: %[[THIS0:.*]] = load %[[CLASS:.*]]*, %[[CLASS]]** {{.*}}
+// CHECK-CTORS: %[[THIS1:.*]] = bitcast %[[CLASS:.*]]* %[[THIS0]] to i8*
+// CHECK-CTORS: %[[THIS2:.*]] = call i8* @llvm.invariant.group.barrier(i8* %[[THIS1]])
+// CHECK-CTORS: %[[THIS3:.*]] = bitcast i8* %[[THIS2]] to %[[CLASS]]*
+// CHECK-CTORS: %[[THIS4:.*]] = bitcast %[[CLASS]]* %[[THIS3]] to %[[BASE_CLASS:.*]]*
+// CHECK-CTORS: call void @_ZN12DynamicBase1C2Ev(%[[BASE_CLASS]]* %[[THIS4]])
+
+// CHECK-CTORS: call i8* @llvm.invariant.group.barrier(
+
+// CHECK-CTORS: call void @_ZN12DynamicBase2C2Ev(
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier
+
+
+// CHECK-CTORS: %[[THIS10:.*]] = bitcast %struct.DynamicDerivedMultiple* %[[THIS0]] to i32 (...)***
+// CHECK-CTORS: store {{.*}} @_ZTV22DynamicDerivedMultiple, i64 0, i64 2) {{.*}} %[[THIS10]]
+// CHECK-CTORS: %[[THIS11:.*]] = bitcast %struct.DynamicDerivedMultiple* %[[THIS0]] to i8*
+// CHECK-CTORS: %[[THIS_ADD:.*]] = getelementptr inbounds i8, i8* %[[THIS11]], i64 16
+// CHECK-CTORS: %[[THIS12:.*]]  = bitcast i8* %[[THIS_ADD]] to i32 (...)***
+
+
+// CHECK-CTORS: store {{.*}} @_ZTV22DynamicDerivedMultiple, i64 0, i64 6) {{.*}} %[[THIS12]]
+// CHECK-CTORS-LABEL: }
+
+struct DynamicFromStatic;
+// CHECK-CTORS-LABEL: define linkonce_odr void @_ZN17DynamicFromStaticC2Ev(
+// CHECK-CTORS-NOT: @llvm.invariant.group.barrier(
+// CHECK-CTORS-LABEL: }
+
+
+/** DTORS **/
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN10StaticBaseD2Ev(
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-LABEL: }
+
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN25DynamicFromVirtualStatic2D2Ev(
+// CHECK-DTORS-NOT: invariant.barrier
+// CHECK-DTORS-LABEL: }
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN17DynamicFromStaticD2Ev
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-LABEL: }
+
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN22DynamicDerivedMultipleD2Ev(
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase2D2Ev(
+// CHECK-DTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-LABEL: }
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN12DynamicBase1D2Ev
+// CHECK-DTORS: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-LABEL: }
+
+// CHECK-DTORS-LABEL: define linkonce_odr void @_ZN14DynamicDerivedD2Ev
+// CHECK-DTORS-NOT: call i8* @llvm.invariant.group.barrier(
+// CHECK-DTORS-LABEL: }
+
+
+// CHECK-LINK-REQ: !llvm.module.flags = !{![[FIRST:.*]], ![[SEC:.*]]{{.*}}}
+
+// CHECK-LINK-REQ: ![[FIRST]] = !{i32 1, !"StrictVTablePointers", i32 1}
+// CHECK-LINK-REQ: ![[SEC]] = !{i32 3, !"StrictVTablePointersRequirement", ![[META:.*]]}
+// CHECK-LINK-REQ: ![[META]] = !{!"StrictVTablePointers", i32 1}
+
diff --git a/test/CodeGenCXX/thunks.cpp b/test/CodeGenCXX/thunks.cpp
index 38afb9d0dbf7..3a598b90afe1 100644
--- a/test/CodeGenCXX/thunks.cpp
+++ b/test/CodeGenCXX/thunks.cpp
@@ -1,5 +1,9 @@
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o - -O1 -disable-llvm-optzns | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o %t
+// RUN: %clang_cc1 %s -triple=x86_64-pc-linux-gnu -munwind-tables -emit-llvm -o %t.opt -O1 -disable-llvm-optzns
+// RUN: FileCheck %s < %t
+// RUN: FileCheck %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-NONOPT %s < %t
+// RUN: FileCheck --check-prefix=CHECK-OPT %s < %t.opt
 
 namespace Test1 {
 
@@ -380,13 +384,25 @@ D::~D() {}
 
 /**** The following has to go at the end of the file ****/
 
+// checking without opt
+// CHECK-NONOPT-LABEL: define internal void @_ZThn8_N6Test4B12_GLOBAL__N_11C1fEv(
+// CHECK-NONOPT-NOT: comdat
+
 // This is from Test5:
-// CHECK-LABEL: define internal void @_ZThn8_N6Test4B12_GLOBAL__N_11C1fEv(
-// CHECK-NOT: comdat
-// CHECK-LABEL: define linkonce_odr void @_ZTv0_n24_N5Test51B1fEv
+// CHECK-NONOPT-LABEL: define linkonce_odr void @_ZTv0_n24_N5Test51B1fEv
 
 // This is from Test10:
-// CHECK-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
-// CHECK-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
+// CHECK-NONOPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
+// CHECK-NONOPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
+
+// Checking with opt
+// CHECK-OPT-LABEL: define internal void @_ZThn8_N6Test4B12_GLOBAL__N_11C1fEv(%"struct.Test4B::(anonymous namespace)::C"* %this) unnamed_addr #0 align 2
+
+// This is from Test5:
+// CHECK-OPT-LABEL: define linkonce_odr void @_ZTv0_n24_N5Test51B1fEv
+
+// This is from Test10:
+// CHECK-OPT-LABEL: define linkonce_odr void @_ZN6Test101C3fooEv
+// CHECK-OPT-LABEL: define linkonce_odr void @_ZThn8_N6Test101C3fooEv
 
 // CHECK: attributes [[NUW]] = { nounwind uwtable{{.*}} }
diff --git a/test/CodeGenCXX/tls-init-funcs.cpp b/test/CodeGenCXX/tls-init-funcs.cpp
index d47329cdc29f..a2a563b84f22 100644
--- a/test/CodeGenCXX/tls-init-funcs.cpp
+++ b/test/CodeGenCXX/tls-init-funcs.cpp
@@ -1,13 +1,13 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.8 -std=c++1y -S -emit-llvm %s -o - | FileCheck %s
 
 // CHECK: @a = internal thread_local global
-// CHECK: @_Z2vtIiE = internal thread_local global i32 5
+// CHECK: @_Z2vtIiE = linkonce_odr thread_local global i32 5
 // CHECK: @_ZZ3inlvE3loc = linkonce_odr thread_local global i32 0
 // CHECK: @_tlv_atexit({{.*}}@_ZN1AD1Ev
-// CHECK: call i32* @_ZTW3ext()
-// CHECK: declare i32* @_ZTW3ext()
-// CHECK: define weak i32* @_ZTW2vtIiE()
-// CHECK: define weak i32* @_ZTW2vtIvE()
+// CHECK: call cxx_fast_tlscc i32* @_ZTW3ext()
+// CHECK: declare cxx_fast_tlscc i32* @_ZTW3ext()
+// CHECK: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIiE()
+// CHECK: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIvE()
 // CHECK: define {{.*}} @_ZTW1a
 
 struct A {
diff --git a/test/CodeGenCXX/typeid-cxx11.cpp b/test/CodeGenCXX/typeid-cxx11.cpp
index 4e32d2dcb5a5..5c10ca5b21a5 100644
--- a/test/CodeGenCXX/typeid-cxx11.cpp
+++ b/test/CodeGenCXX/typeid-cxx11.cpp
@@ -18,8 +18,8 @@ struct A { virtual ~A(); };
 struct B : virtual A {};
 struct C { int n; };
 
-// CHECK: @_ZN5Test1L5itemsE = internal constant [4 x {{.*}}] [{{.*}} @_ZTIN5Test11AE {{.*}}, {{.*}}, {{.*}} @_ZN5Test19make_implINS_1AEEEPvv }, {{.*}} @_ZTIN5Test11BE {{.*}} @_ZN5Test19make_implINS_1BEEEPvv {{.*}} @_ZTIN5Test11CE {{.*}} @_ZN5Test19make_implINS_1CEEEPvv {{.*}} @_ZTIi {{.*}} @_ZN5Test19make_implIiEEPvv }]
-constexpr Item items[] = {
+// CHECK: @_ZN5Test15itemsE = constant [4 x {{.*}}] [{{.*}} @_ZTIN5Test11AE {{.*}}, {{.*}}, {{.*}} @_ZN5Test19make_implINS_1AEEEPvv }, {{.*}} @_ZTIN5Test11BE {{.*}} @_ZN5Test19make_implINS_1BEEEPvv {{.*}} @_ZTIN5Test11CE {{.*}} @_ZN5Test19make_implINS_1CEEEPvv {{.*}} @_ZTIi {{.*}} @_ZN5Test19make_implIiEEPvv }]
+extern constexpr Item items[] = {
   item<A>("A"), item<B>("B"), item<C>("C"), item<int>("int")
 };
 
diff --git a/test/CodeGenCXX/uncopyable-args.cpp b/test/CodeGenCXX/uncopyable-args.cpp
index 814cb6215d15..c1d284a74185 100644
--- a/test/CodeGenCXX/uncopyable-args.cpp
+++ b/test/CodeGenCXX/uncopyable-args.cpp
@@ -76,7 +76,7 @@ void bar() {
 // FIXME: The copy ctor is deleted.
 // CHECK-DISABLED-LABEL: define void @_ZN11all_deleted3barEv()
 // CHECK-DISABLED: call void @_Z{{.*}}C1Ev(
-// CHECK-DISABLED-NOT call
+// CHECK-DISABLED-NOT: call
 // CHECK-DISABLED: call void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"* %{{.*}})
 // CHECK-DISABLED-LABEL: declare void @_ZN11all_deleted3fooENS_1AE(%"struct.all_deleted::A"*)
 
@@ -96,7 +96,7 @@ void bar() {
 // FIXME: The copy and move ctors are implicitly deleted.
 // CHECK-DISABLED-LABEL: define void @_ZN18implicitly_deleted3barEv()
 // CHECK-DISABLED: call void @_Z{{.*}}C1Ev(
-// CHECK-DISABLED-NOT call
+// CHECK-DISABLED-NOT: call
 // CHECK-DISABLED: call void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"* %{{.*}})
 // CHECK-DISABLED-LABEL: declare void @_ZN18implicitly_deleted3fooENS_1AE(%"struct.implicitly_deleted::A"*)
 
@@ -116,7 +116,7 @@ void bar() {
 // FIXME: The copy constructor is implicitly deleted.
 // CHECK-DISABLED-LABEL: define void @_ZN11one_deleted3barEv()
 // CHECK-DISABLED: call void @_Z{{.*}}C1Ev(
-// CHECK-DISABLED-NOT call
+// CHECK-DISABLED-NOT: call
 // CHECK-DISABLED: call void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"* %{{.*}})
 // CHECK-DISABLED-LABEL: declare void @_ZN11one_deleted3fooENS_1AE(%"struct.one_deleted::A"*)
 
diff --git a/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp b/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
index 530a4284b4ec..64b1c525a614 100644
--- a/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
+++ b/test/CodeGenCXX/vararg-non-pod-ms-compat.cpp
@@ -18,7 +18,7 @@ void test(X x) {
   // X64: alloca %struct.X
 
   // X64: %[[agg:[^ ]*]] = alloca %struct.X
-  // X64: %[[valptr:[^ ]*]] = getelementptr %struct.X, %struct.X* %[[agg]], i32 0, i32 0
+  // X64: %[[valptr:[^ ]*]] = getelementptr inbounds %struct.X, %struct.X* %[[agg]], i32 0, i32 0
   // X64: %[[val:[^ ]*]] = load i32, i32* %[[valptr]]
   // X64: call void (...) @"\01?vararg@@YAXZZ"(i32 %[[val]])
 
diff --git a/test/CodeGenCXX/virtual-base-ctor.cpp b/test/CodeGenCXX/virtual-base-ctor.cpp
index 8c28965c5c2f..20a88cd37105 100644
--- a/test/CodeGenCXX/virtual-base-ctor.cpp
+++ b/test/CodeGenCXX/virtual-base-ctor.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -emit-llvm -triple %itanium_abi_triple -o - -O2 | FileCheck %s
+// RUN: %clang_cc1 %s -emit-llvm -triple %itanium_abi_triple -o - -O2 | opt - -S -globalopt -o - | FileCheck %s
 
 struct B;
 extern B x;
diff --git a/test/CodeGenCXX/virtual-destructor-calls.cpp b/test/CodeGenCXX/virtual-destructor-calls.cpp
index f0e3dc58b641..e13adfcc85cf 100644
--- a/test/CodeGenCXX/virtual-destructor-calls.cpp
+++ b/test/CodeGenCXX/virtual-destructor-calls.cpp
@@ -17,7 +17,7 @@ struct B : A {
 // CHECK: @_ZN1BD1Ev = alias {{.*}} @_ZN1BD2Ev
 
 // (aliases from C)
-// CHECK: @_ZN1CD2Ev = alias bitcast {{.*}} @_ZN1BD2Ev
+// CHECK: @_ZN1CD2Ev = alias {{.*}}, bitcast {{.*}} @_ZN1BD2Ev
 // CHECK: @_ZN1CD1Ev = alias {{.*}} @_ZN1CD2Ev
 
 // Base dtor: actually calls A's base dtor.
diff --git a/test/CodeGenCXX/visibility.cpp b/test/CodeGenCXX/visibility.cpp
index 7239cbe00225..aff6554282ca 100644
--- a/test/CodeGenCXX/visibility.cpp
+++ b/test/CodeGenCXX/visibility.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 %s -std=c++11 -triple=x86_64-apple-darwin10 -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -std=c++11 -triple=x86_64-apple-darwin10 -fvisibility hidden -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-HIDDEN
+// For clang, "internal" is just an alias for "hidden". We could use it for some
+// optimization purposes on 32-bit x86, but it's not worth it.
+// RUN: %clang_cc1 %s -std=c++11 -triple=x86_64-apple-darwin10 -fvisibility internal -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-HIDDEN
 
 #define HIDDEN __attribute__((visibility("hidden")))
 #define PROTECTED __attribute__((visibility("protected")))
diff --git a/test/CodeGenCXX/vtable-assume-load.cpp b/test/CodeGenCXX/vtable-assume-load.cpp
new file mode 100644
index 000000000000..30cfc00ec7ba
--- /dev/null
+++ b/test/CodeGenCXX/vtable-assume-load.cpp
@@ -0,0 +1,313 @@
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin10 -emit-llvm -o %t.ll -O1 -disable-llvm-optzns -fms-extensions -fstrict-vtable-pointers
+// RUN: %clang_cc1 %s -triple i686-pc-win32 -emit-llvm -o %t.ms.ll -O1 -disable-llvm-optzns -fms-extensions -fstrict-vtable-pointers
+// FIXME: Assume load should not require -fstrict-vtable-pointers
+
+// RUN: FileCheck --check-prefix=CHECK1 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK2 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK3 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK4 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK-MS --input-file=%t.ms.ll %s
+// RUN: FileCheck --check-prefix=CHECK6 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK7 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK8 --input-file=%t.ll %s
+// RUN: FileCheck --check-prefix=CHECK9 --input-file=%t.ll %s
+namespace test1 {
+
+struct A {
+  A();
+  virtual void foo();
+};
+
+struct B : A {
+  virtual void foo();
+};
+
+void g(A *a) { a->foo(); }
+
+// CHECK1-LABEL: define void @_ZN5test14fooAEv()
+// CHECK1: call void @_ZN5test11AC1Ev(%"struct.test1::A"*
+// CHECK1: %[[VTABLE:.*]] = load i8**, i8*** %{{.*}}
+// CHECK1: %[[CMP:.*]] = icmp eq i8** %[[VTABLE]], getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTVN5test11AE, i64 0, i64 2)
+// CHECK1: call void @llvm.assume(i1 %[[CMP]])
+// CHECK1-LABEL: }
+
+void fooA() {
+  A a;
+  g(&a);
+}
+
+// CHECK1-LABEL: define void @_ZN5test14fooBEv()
+// CHECK1: call void @_ZN5test11BC1Ev(%"struct.test1::B"* %{{.*}})
+// CHECK1: %[[VTABLE:.*]] = load i8**, i8*** %{{.*}}
+// CHECK1: %[[CMP:.*]] = icmp eq i8** %[[VTABLE]], getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTVN5test11BE, i64 0, i64 2)
+// CHECK1: call void @llvm.assume(i1 %[[CMP]])
+// CHECK1-LABEL: }
+
+void fooB() {
+  B b;
+  g(&b);
+}
+// there should not be any assumes in the ctor that calls base ctor
+// CHECK1-LABEL: define linkonce_odr void @_ZN5test11BC2Ev(%"struct.test1::B"*
+// CHECK1-NOT: @llvm.assume(
+// CHECK1-LABEL: }
+}
+namespace test2 {
+struct A {
+  A();
+  virtual void foo();
+};
+
+struct B {
+  B();
+  virtual void bar();
+};
+
+struct C : A, B {
+  C();
+  virtual void foo();
+};
+void g(A *a) { a->foo(); }
+void h(B *b) { b->bar(); }
+
+// CHECK2-LABEL: define void @_ZN5test24testEv()
+// CHECK2: call void @_ZN5test21CC1Ev(%"struct.test2::C"*
+// CHECK2: %[[VTABLE:.*]] = load i8**, i8*** {{.*}}
+// CHECK2: %[[CMP:.*]] = icmp eq i8** %[[VTABLE]], getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTVN5test21CE, i64 0, i64 2)
+// CHECK2: call void @llvm.assume(i1 %[[CMP]])
+
+// CHECK2: %[[V2:.*]] = bitcast %"struct.test2::C"* %{{.*}} to i8*
+// CHECK2: %[[ADD_PTR:.*]] = getelementptr inbounds i8, i8* %[[V2]], i64 8
+// CHECK2: %[[V3:.*]] = bitcast i8* %[[ADD_PTR]] to i8***
+// CHECK2: %[[VTABLE2:.*]] = load i8**, i8*** %[[V3]]
+// CHECK2: %[[CMP2:.*]] = icmp eq i8** %[[VTABLE2]], getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTVN5test21CE, i64 0, i64 5)
+// CHECK2: call void @llvm.assume(i1 %[[CMP2]])
+
+// CHECK2: call void @_ZN5test21gEPNS_1AE(
+// CHECK2-LABEL: }
+
+void test() {
+  C c;
+  g(&c);
+  h(&c);
+}
+}
+
+namespace test3 {
+struct A {
+  A();
+};
+
+struct B : A {
+  B();
+  virtual void foo();
+};
+
+struct C : virtual A, B {
+  C();
+  virtual void foo();
+};
+void g(B *a) { a->foo(); }
+
+// CHECK3-LABEL: define void @_ZN5test34testEv()
+// CHECK3: call void @_ZN5test31CC1Ev(%"struct.test3::C"*
+// CHECK3: %[[CMP:.*]] = icmp eq i8** %{{.*}}, getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTVN5test31CE, i64 0, i64 3)
+// CHECK3: call void @llvm.assume(i1 %[[CMP]])
+// CHECK3-LABLEL: }
+void test() {
+  C c;
+  g(&c);
+}
+} // test3
+
+namespace test4 {
+struct A {
+  A();
+  virtual void foo();
+};
+
+struct B : virtual A {
+  B();
+  virtual void foo();
+};
+struct C : B {
+  C();
+  virtual void foo();
+};
+
+void g(C *c) { c->foo(); }
+
+// CHECK4-LABEL: define void @_ZN5test44testEv()
+// CHECK4: call void @_ZN5test41CC1Ev(%"struct.test4::C"*
+// CHECK4: %[[VTABLE:.*]] = load i8**, i8*** %{{.*}}
+// CHECK4: %[[CMP:.*]] = icmp eq i8** %[[VTABLE]], getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTVN5test41CE, i64 0, i64 4)
+// CHECK4: call void @llvm.assume(i1 %[[CMP]]
+
+// CHECK4: %[[VTABLE2:.*]] = load i8**, i8*** %{{.*}}
+// CHECK4: %[[CMP2:.*]] = icmp eq i8** %[[VTABLE2]], getelementptr inbounds ([5 x i8*], [5 x i8*]* @_ZTVN5test41CE, i64 0, i64 4)
+// CHECK4: call void @llvm.assume(i1 %[[CMP2]])
+// CHECK4-LABEL: }
+
+void test() {
+  C c;
+  g(&c);
+}
+} // test4
+
+namespace testMS {
+
+struct __declspec(novtable) S {
+  virtual void foo();
+};
+
+void g(S &s) { s.foo(); }
+
+// if struct has novtable specifier, then we can't generate assumes
+// CHECK-MS-LABEL: define void @"\01?test@testMS@@YAXXZ"()
+// CHECK-MS: call x86_thiscallcc %"struct.testMS::S"* @"\01??0S@testMS@@QAE@XZ"(
+// CHECK-MS-NOT: @llvm.assume
+// CHECK-MS-LABEL: }
+
+void test() {
+  S s;
+  g(s);
+}
+
+} // testMS
+
+namespace test6 {
+struct A {
+  A();
+  virtual void foo();
+  virtual ~A() {}
+};
+struct B : A {
+  B();
+};
+// FIXME: Because A's vtable is external, and no virtual functions are hidden,
+// it's safe to generate assumption loads.
+// CHECK6-LABEL: define void @_ZN5test61gEv()
+// CHECK6: call void @_ZN5test61AC1Ev(
+// CHECK6-NOT: call void @llvm.assume(
+
+// We can't emit assumption loads for B, because if we would refer to vtable
+// it would refer to functions that will not be able to find (like implicit
+// inline destructor).
+
+// CHECK6-LABEL:   call void @_ZN5test61BC1Ev(
+// CHECK6-NOT: call void @llvm.assume(
+// CHECK6-LABEL: }
+void g() {
+  A *a = new A;
+  B *b = new B;
+}
+}
+
+namespace test7 {
+// Because A's key function is defined here, vtable is generated in this TU
+// CHECK7: @_ZTVN5test71AE = unnamed_addr constant
+struct A {
+  A();
+  virtual void foo();
+  virtual void bar();
+};
+void A::foo() {}
+
+// CHECK7-LABEL: define void @_ZN5test71gEv()
+// CHECK7: call void @_ZN5test71AC1Ev(
+// CHECK7: call void @llvm.assume(
+// CHECK7-LABEL: }
+void g() {
+  A *a = new A();
+  a->bar();
+}
+}
+
+namespace test8 {
+
+struct A {
+  virtual void foo();
+  virtual void bar();
+};
+
+// CHECK8-DAG: @_ZTVN5test81BE = available_externally unnamed_addr constant
+struct B : A {
+  B();
+  void foo();
+  void bar();
+};
+
+// CHECK8-DAG: @_ZTVN5test81CE = linkonce_odr unnamed_addr constant
+struct C : A {
+  C();
+  void bar();
+  void foo() {}
+};
+inline void C::bar() {}
+
+struct D : A {
+  D();
+  void foo();
+  void inline bar();
+};
+void D::bar() {}
+
+// CHECK8-DAG: @_ZTVN5test81EE = linkonce_odr unnamed_addr constant
+struct E : A {
+  E();
+};
+
+// CHECK8-LABEL: define void @_ZN5test81bEv()
+// CHECK8: call void @llvm.assume(
+// CHECK8-LABEL: }
+void b() {
+  B b;
+  b.bar();
+}
+
+// FIXME: C has inline virtual functions which prohibits as from generating
+// assumption loads, but because vtable is generated in this TU (key function
+// defined here) it would be correct to refer to it.
+// CHECK8-LABEL: define void @_ZN5test81cEv()
+// CHECK8-NOT: call void @llvm.assume(
+// CHECK8-LABEL: }
+void c() {
+  C c;
+  c.bar();
+}
+
+// FIXME: We could generate assumption loads here.
+// CHECK8-LABEL: define void @_ZN5test81dEv()
+// CHECK8-NOT: call void @llvm.assume(
+// CHECK8-LABEL: }
+void d() {
+  D d;
+  d.bar();
+}
+
+// CHECK8-LABEL: define void @_ZN5test81eEv()
+// CHECK8: call void @llvm.assume(
+// CHECK8-LABEL: }
+void e() {
+  E e;
+  e.bar();
+}
+}
+
+namespace test9 {
+
+struct S {
+  S();
+  __attribute__((visibility("hidden"))) virtual void doStuff();
+};
+
+// CHECK9-LABEL: define void @_ZN5test94testEv()
+// CHECK9-NOT: @llvm.assume(
+// CHECK9: }
+void test() {
+  S *s = new S();
+  s->doStuff();
+  delete s;
+}
+}
+
diff --git a/test/CodeGenCXX/vtable-available-externally.cpp b/test/CodeGenCXX/vtable-available-externally.cpp
index e07d48463f4f..f38d177666e6 100644
--- a/test/CodeGenCXX/vtable-available-externally.cpp
+++ b/test/CodeGenCXX/vtable-available-externally.cpp
@@ -1,7 +1,17 @@
-// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -emit-llvm -o %t 
+// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -emit-llvm -o %t
+// RUN: %clang_cc1 %s -I%S -triple=x86_64-apple-darwin10 -O2 -disable-llvm-optzns -emit-llvm -o %t.opt
 // RUN: FileCheck --check-prefix=CHECK-TEST1 %s < %t
 // RUN: FileCheck --check-prefix=CHECK-TEST2 %s < %t
 // RUN: FileCheck --check-prefix=CHECK-TEST5 %s < %t
+// RUN: FileCheck --check-prefix=CHECK-TEST8 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST9 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST10 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST11 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST12 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST13 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST14 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST15 %s < %t.opt
+// RUN: FileCheck --check-prefix=CHECK-TEST16 %s < %t.opt
 
 #include <typeinfo>
 
@@ -152,3 +162,232 @@ struct c28 : virtual c11{
   void f6 ();
 };
 }
+
+namespace Test8 {
+// CHECK-TEST8: @_ZTVN5Test81YE = available_externally unnamed_addr constant
+// vtable for X is not generated because there are no stores here
+struct X {
+  X();
+  virtual void foo();
+};
+struct Y : X {
+  void foo();
+};
+
+void g(X* p) { p->foo(); }
+void f() {
+  Y y;
+  g(&y);
+  X x;
+  g(&x);
+}
+
+}  // Test8
+
+namespace Test9 {
+// All virtual functions are outline, so we can assume that it will
+// be generated in translation unit where foo is defined.
+// CHECK-TEST9-DAG: @_ZTVN5Test91AE = available_externally unnamed_addr constant
+// CHECK-TEST9-DAG: @_ZTVN5Test91BE = available_externally unnamed_addr constant
+struct A {
+  virtual void foo();
+  virtual void bar();
+};
+void A::bar() {}
+
+struct B : A {
+  void foo();
+};
+
+void g() {
+  A a;
+  a.foo();
+  B b;
+  b.foo();
+}
+
+}  // Test9
+
+namespace Test10 {
+
+// because A's key function is defined here, vtable is generated in this TU
+// CHECK-TEST10-DAG: @_ZTVN6Test101AE = unnamed_addr constant
+struct A {
+  virtual void foo();
+  virtual void bar();
+};
+void A::foo() {}
+
+// Because key function is inline we will generate vtable as linkonce_odr.
+// CHECK-TEST10-DAG: @_ZTVN6Test101DE = linkonce_odr unnamed_addr constant
+struct D : A {
+  void bar();
+};
+inline void D::bar() {}
+
+// Because B has outline all virtual functions, we can refer to them.
+// CHECK-TEST10-DAG: @_ZTVN6Test101BE = available_externally unnamed_addr constant
+struct B : A {
+  void foo();
+  void bar();
+};
+
+// C's key function (car) is outline, but C has inline virtual function so we
+// can't guarantee that we will be able to refer to bar from name
+// so (at the moment) we can't emit vtable available_externally.
+// CHECK-TEST10-DAG: @_ZTVN6Test101CE = external unnamed_addr constant
+struct C : A {
+  void bar() {}               // defined in body - not key function
+  virtual inline void gar();  // inline in body - not key function
+  virtual void car();
+};
+
+// no key function, vtable will be generated everywhere it will be used
+// CHECK-TEST10-DAG: @_ZTVN6Test101EE = linkonce_odr unnamed_addr constant
+struct E : A {};
+
+void g(A& a) {
+  a.foo();
+  a.bar();
+}
+
+void f() {
+  A a;
+  g(a);
+  B b;
+  g(b);
+  C c;
+  g(c);
+  D d;
+  g(d);
+  E e;
+  g(e);
+}
+
+}  // Test10
+
+namespace Test11 {
+struct D;
+// Can emit C's vtable available_externally.
+// CHECK-TEST11: @_ZTVN6Test111CE = available_externally unnamed_addr constant
+struct C {
+  virtual D& operator=(const D&);
+};
+
+// Cannot emit B's vtable available_externally, because we cannot create
+// a reference to the inline virtual B::operator= function.
+// CHECK-TEST11: @_ZTVN6Test111DE = external unnamed_addr constant
+struct D : C {
+  virtual void key();
+};
+D f();
+
+void g(D& a) {
+  C c;
+  c = a;
+  a.key();
+  a.key();
+}
+void g() {
+  D d;
+  d = f();
+  g(d);
+}
+}  // Test 11
+
+namespace Test12 {
+
+// CHECK-TEST12: @_ZTVN6Test121AE = external unnamed_addr constant
+struct A {
+  virtual void foo();
+  virtual ~A() {}
+};
+// CHECK-TEST12: @_ZTVN6Test121BE = external unnamed_addr constant
+struct B : A {
+  void foo();
+};
+
+void g() {
+  A a;
+  a.foo();
+  B b;
+  b.foo();
+}
+}
+
+namespace Test13 {
+
+// CHECK-TEST13-DAG: @_ZTVN6Test131AE = available_externally unnamed_addr constant
+// CHECK-TEST13-DAG: @_ZTVN6Test131BE = external unnamed_addr constant
+struct A {
+  virtual ~A();
+};
+struct B : A {
+  virtual void f();
+  void operator delete(void *);
+  ~B() {}
+};
+
+void g() {
+  A *b = new B;
+}
+}
+
+namespace Test14 {
+
+// CHECK-TEST14: @_ZTVN6Test141AE = available_externally unnamed_addr constant
+struct A {
+  virtual void f();
+  void operator delete(void *);
+  ~A();
+};
+
+void g() {
+  A *b = new A;
+  delete b;
+}
+}
+
+namespace Test15 {
+// In this test D's vtable has two slots for function f(), but uses only one,
+// so the second slot is set to null.
+// CHECK-TEST15: @_ZTVN6Test151DE = available_externally unnamed_addr constant
+struct A { virtual void f() {} };
+struct B : virtual A {};
+struct C : virtual A {};
+struct D : B, C {
+  virtual void g();
+  void f();
+};
+
+void test() {
+  D * d = new D;
+  d->f();
+}
+}
+
+namespace Test16 {
+// S has virtual method that is hidden, because of it we can't
+// generate available_externally vtable for it.
+// CHECK-TEST16-DAG: @_ZTVN6Test161SE = external unnamed_addr constant
+// CHECK-TEST16-DAG: @_ZTVN6Test162S2E = available_externally
+
+struct S {
+  __attribute__((visibility("hidden"))) virtual void doStuff();
+};
+
+struct S2 {
+  virtual void doStuff();
+  __attribute__((visibility("hidden"))) void unused();
+
+};
+
+void test() {
+  S *s = new S;
+  s->doStuff();
+
+  S2 *s2 = new S2;
+  s2->doStuff();
+}
+}
+
diff --git a/test/CodeGenCXX/vtable-holder-self-reference.cpp b/test/CodeGenCXX/vtable-holder-self-reference.cpp
index 8f5314e7211c..727de7a5f759 100644
--- a/test/CodeGenCXX/vtable-holder-self-reference.cpp
+++ b/test/CodeGenCXX/vtable-holder-self-reference.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -gdwarf-2 -x c++ -o - %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -dwarf-version=2 -debug-info-kind=limited -x c++ -o - %s | FileCheck %s
 //
 // PR21941: crasher for self-referencing DW_TAG_structure_type node.  If we get
 // rid of self-referenceing structure_types (PR21902), then it should be safe
diff --git a/test/CodeGenCXX/vtable-key-function-ios.cpp b/test/CodeGenCXX/vtable-key-function-ios.cpp
index bf2e1f9720b5..d17aa695d2b5 100644
--- a/test/CodeGenCXX/vtable-key-function-ios.cpp
+++ b/test/CodeGenCXX/vtable-key-function-ios.cpp
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 %s -triple=armv7-apple-darwin -emit-llvm -o - | FileCheck %s
 // RUN: %clang_cc1 %s -triple=armv7-apple-darwin -emit-llvm -o - | FileCheck -check-prefix=CHECK-LATE %s
 
+// RUN: %clang_cc1 %s -triple=x86_64-pc-windows-gnu -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-pc-windows-gnu -emit-llvm -o - | FileCheck -check-prefix=CHECK-LATE %s
+
 // The 'a' variants ask for the v-table first.
 // The 'b' variants ask for the v-table second.
 // The 'c' variants ask for the v-table third.
diff --git a/test/CodeGenCXX/vtable-key-function-win-comdat.cpp b/test/CodeGenCXX/vtable-key-function-win-comdat.cpp
new file mode 100644
index 000000000000..3dd1be7dc69c
--- /dev/null
+++ b/test/CodeGenCXX/vtable-key-function-win-comdat.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 %s -triple=x86_64-pc-windows-gnu -emit-llvm -o - | FileCheck %s
+
+namespace std { class type_info; }
+extern void use(const std::type_info &rtti);
+
+struct Test1a {
+  Test1a();
+  virtual void foo();
+  virtual void bar();
+};
+
+// V-table needs to be defined weakly.
+Test1a::Test1a() { use(typeid(Test1a)); }
+// This defines the key function.
+inline void Test1a::foo() {}
+
+// CHECK:     $_ZTV6Test1a = comdat any
+// CHECK:     $_ZTS6Test1a = comdat any
+// CHECK:     $_ZTI6Test1a = comdat any
+// CHECK-NOT: $_ZTS6Test1a.1 = comdat any
+// CHECK-NOT: $_ZTI6Test1a.1 = comdat any
+
+// CHECK: @_ZTV6Test1a = linkonce_odr unnamed_addr constant {{.*}} ({ i8*, i8* }* @_ZTI6Test1a to i8*)
+// CHECK: @_ZTS6Test1a = linkonce_odr constant
+// CHECK: @_ZTI6Test1a = linkonce_odr constant {{.*}} [8 x i8]* @_ZTS6Test1a
diff --git a/test/CodeGenCXX/vtable-linkage.cpp b/test/CodeGenCXX/vtable-linkage.cpp
index 5cbd3897a1ff..ff398ffa61dd 100644
--- a/test/CodeGenCXX/vtable-linkage.cpp
+++ b/test/CodeGenCXX/vtable-linkage.cpp
@@ -139,10 +139,11 @@ void use_F() {
 // CHECK-OPT-DAG: @_ZTV1FIiE = external unnamed_addr constant
 
 // E<int> is an explicit template instantiation declaration. It has a
-// key function that is not instantiated, so we should only reference
-// its vtable, not define it.
+// key function is not instantiated, so we know that vtable definition
+// will be generated in TU where key function will be defined
+// so we can mark it as available_externally (only with optimizations)
 // CHECK-DAG: @_ZTV1EIiE = external unnamed_addr constant
-// CHECK-OPT-DAG: @_ZTV1EIiE = external unnamed_addr constant
+// CHECK-OPT-DAG: @_ZTV1EIiE = available_externally unnamed_addr constant
 
 // The anonymous struct for e has no linkage, so the vtable should have
 // internal linkage.
@@ -196,8 +197,8 @@ void use_H() {
 // CHECK-DAG: @_ZTT1IIiE = external unnamed_addr constant
 // CHECK-NOT: @_ZTC1IIiE
 //
-// CHECK-OPT-DAG: @_ZTV1IIiE = external unnamed_addr constant
-// CHECK-OPT-DAG: @_ZTT1IIiE = external unnamed_addr constant
+// CHECK-OPT-DAG: @_ZTV1IIiE = available_externally unnamed_addr constant
+// CHECK-OPT-DAG: @_ZTT1IIiE = available_externally unnamed_addr constant
 struct VBase1 { virtual void f(); }; struct VBase2 : virtual VBase1 {};
 template<typename T>
 struct I : VBase2 {};
diff --git a/test/CodeGenCXX/warn-padded-packed.cpp b/test/CodeGenCXX/warn-padded-packed.cpp
index 4203bb3dda10..f2af60865e05 100644
--- a/test/CodeGenCXX/warn-padded-packed.cpp
+++ b/test/CodeGenCXX/warn-padded-packed.cpp
@@ -69,7 +69,7 @@ struct S12 {
 
 struct S13 { // expected-warning {{padding size of 'S13' with 6 bits to alignment boundary}}
   char c;
-  bool b : 10; // expected-warning {{size of bit-field 'b' (10 bits) exceeds the size of its type}}
+  bool b : 10;
 };
 
 // The warnings are emitted when the layout of the structs is computed, so we have to use them.
diff --git a/test/CodeGenCXX/wasm-args-returns.cpp b/test/CodeGenCXX/wasm-args-returns.cpp
new file mode 100644
index 000000000000..2b80430014d0
--- /dev/null
+++ b/test/CodeGenCXX/wasm-args-returns.cpp
@@ -0,0 +1,100 @@
+// RUN: %clang_cc1 -O1 -triple wasm32-unknown-unknown -emit-llvm -o - %s \
+// RUN:   | FileCheck %s
+// RUN: %clang_cc1 -O1 -triple wasm64-unknown-unknown -emit-llvm -o - %s \
+// RUN:   | FileCheck %s
+
+#define concat_(x, y) x ## y
+#define concat(x, y) concat_(x, y)
+
+#define test(T) \
+  T forward(T x) { return x; } \
+  void use(T x); \
+  T concat(def_, T)(void); \
+  void concat(test_, T)(void) { use(concat(def_, T)()); }
+
+struct one_field { double d; };
+test(one_field);
+// CHECK: define double @_Z7forward9one_field(double %{{.*}})
+//
+// CHECK: define void @_Z14test_one_fieldv()
+// CHECK: %[[call:.*]] = tail call double @_Z13def_one_fieldv()
+// CHECK: tail call void @_Z3use9one_field(double %[[call]])
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use9one_field(double)
+// CHECK: declare double @_Z13def_one_fieldv()
+
+struct two_fields { double d, e; };
+test(two_fields);
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* byval nocapture readonly align 8 %{{.*}})
+//
+// CHECK: define void @_Z15test_two_fieldsv()
+// CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
+// CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
+// CHECK: call void @_Z3use10two_fields(%struct.two_fields* byval nonnull align 8 %[[tmp]])
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval align 8)
+// CHECK: declare void @_Z14def_two_fieldsv(%struct.two_fields* sret)
+
+struct copy_ctor {
+    double d;
+    copy_ctor(copy_ctor const&);
+};
+test(copy_ctor);
+// CHECK: define void @_Z7forward9copy_ctor(%struct.copy_ctor* noalias sret %{{.*}}, %struct.copy_ctor* %{{.*}})
+//
+// CHECK: declare %struct.copy_ctor* @_ZN9copy_ctorC1ERKS_(%struct.copy_ctor* returned, %struct.copy_ctor* dereferenceable(8))
+//
+// CHECK: define void @_Z14test_copy_ctorv()
+// CHECK: %[[tmp:.*]] = alloca %struct.copy_ctor, align 8
+// CHECK: call void @_Z13def_copy_ctorv(%struct.copy_ctor* nonnull sret %[[tmp]])
+// CHECK: call void @_Z3use9copy_ctor(%struct.copy_ctor* nonnull %[[tmp]])
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use9copy_ctor(%struct.copy_ctor*)
+// CHECK: declare void @_Z13def_copy_ctorv(%struct.copy_ctor* sret)
+
+struct __attribute__((aligned(16))) aligned_copy_ctor {
+    double d, e;
+    aligned_copy_ctor(aligned_copy_ctor const&);
+};
+test(aligned_copy_ctor);
+// CHECK: define void @_Z7forward17aligned_copy_ctor(%struct.aligned_copy_ctor* noalias sret %{{.*}}, %struct.aligned_copy_ctor* %{{.*}})
+//
+// CHECK: declare %struct.aligned_copy_ctor* @_ZN17aligned_copy_ctorC1ERKS_(%struct.aligned_copy_ctor* returned, %struct.aligned_copy_ctor* dereferenceable(16))
+//
+// CHECK: define void @_Z22test_aligned_copy_ctorv()
+// CHECK: %[[tmp:.*]] = alloca %struct.aligned_copy_ctor, align 16
+// CHECK: call void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* nonnull sret %[[tmp]])
+// CHECK: call void @_Z3use17aligned_copy_ctor(%struct.aligned_copy_ctor* nonnull %[[tmp]])
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use17aligned_copy_ctor(%struct.aligned_copy_ctor*)
+// CHECK: declare void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* sret)
+
+struct empty {};
+test(empty);
+// CHECK: define void @_Z7forward5empty()
+//
+// CHECK: define void @_Z10test_emptyv()
+// CHECK: tail call void @_Z9def_emptyv()
+// CHECK: tail call void @_Z3use5empty()
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use5empty()
+// CHECK: declare void @_Z9def_emptyv()
+
+struct one_bitfield {
+    int d:3;
+};
+test(one_bitfield);
+// CHECK: define i32 @_Z7forward12one_bitfield(i32 %{{.*}})
+//
+// CHECK: define void @_Z17test_one_bitfieldv()
+// CHECK: %[[call:.*]] = tail call i32 @_Z16def_one_bitfieldv()
+// CHECK: tail call void @_Z3use12one_bitfield(i32 %[[call]])
+// CHECK: ret void
+//
+// CHECK: declare void @_Z3use12one_bitfield(i32)
+// CHECK: declare i32 @_Z16def_one_bitfieldv()
diff --git a/test/CodeGenCXX/x86_64-arguments.cpp b/test/CodeGenCXX/x86_64-arguments.cpp
index 64202b4d02c6..c7eca2386af7 100644
--- a/test/CodeGenCXX/x86_64-arguments.cpp
+++ b/test/CodeGenCXX/x86_64-arguments.cpp
@@ -212,3 +212,12 @@ int FuncForDerivedPacked(DerivedPacked d) {
   return d.three;
 }
 }
+
+namespace test11 {
+union U {
+  float f1;
+  char __attribute__((__vector_size__(1))) f2;
+};
+int f(union U u) { return u.f2[1]; }
+// CHECK-LABEL: define i32 @_ZN6test111fENS_1UE(i32
+}
diff --git a/test/CodeGenObjC/2009-01-21-invalid-debug-info.m b/test/CodeGenObjC/2009-01-21-invalid-debug-info.m
index af912e2dc54a..8278708769ae 100644
--- a/test/CodeGenObjC/2009-01-21-invalid-debug-info.m
+++ b/test/CodeGenObjC/2009-01-21-invalid-debug-info.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -g -o %t.s %s
+// RUN: %clang_cc1 -S -debug-info-kind=limited -o %t.s %s
 
 // FIXME: This test case can be removed at some point (since it will
 // no longer effectively test anything). The reason it was causing
diff --git a/test/CodeGenObjC/2010-02-09-DbgSelf.m b/test/CodeGenObjC/2010-02-09-DbgSelf.m
index a0179d9227a7..53ee35e4690a 100644
--- a/test/CodeGenObjC/2010-02-09-DbgSelf.m
+++ b/test/CodeGenObjC/2010-02-09-DbgSelf.m
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -x objective-c -emit-llvm -g < %s | FileCheck %s
+// RUN: %clang_cc1 -x objective-c -emit-llvm -debug-info-kind=limited < %s | FileCheck %s
 // Test to check that "self" argument is assigned a location.
 // CHECK: call void @llvm.dbg.declare(metadata %0** %{{[^,]+}}, metadata [[SELF:![0-9]*]], metadata !{{.*}})
-// CHECK: [[SELF]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: "self"
+// CHECK: [[SELF]] = !DILocalVariable(name: "self", arg: 1,
 
 @interface Foo 
 -(void) Bar: (int)x ;
diff --git a/test/CodeGenObjC/2010-02-15-Dbg-MethodStart.m b/test/CodeGenObjC/2010-02-15-Dbg-MethodStart.m
index 1a5df30dd84c..31d4e6980ca7 100644
--- a/test/CodeGenObjC/2010-02-15-Dbg-MethodStart.m
+++ b/test/CodeGenObjC/2010-02-15-Dbg-MethodStart.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -x objective-c -emit-llvm -g < %s | FileCheck "%s"
+// RUN: %clang_cc1 -x objective-c -emit-llvm -debug-info-kind=limited < %s | FileCheck "%s"
 // Test to check that subprogram start location.
 
 @interface Foo
diff --git a/test/CodeGenObjC/2010-02-23-DbgInheritance.m b/test/CodeGenObjC/2010-02-23-DbgInheritance.m
index 7d31b30cce5f..cb8b5f8861cb 100644
--- a/test/CodeGenObjC/2010-02-23-DbgInheritance.m
+++ b/test/CodeGenObjC/2010-02-23-DbgInheritance.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm %s -g -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm %s -debug-info-kind=limited -o - | FileCheck %s
 // CHECK-NOT: DW_TAG_member
 // Interface P should not be a member of interface I in debug info.
 @interface P 
diff --git a/test/CodeGenObjC/arc-blocks.m b/test/CodeGenObjC/arc-blocks.m
index ba76c1cbc950..30f9271bd9dc 100644
--- a/test/CodeGenObjC/arc-blocks.m
+++ b/test/CodeGenObjC/arc-blocks.m
@@ -43,7 +43,7 @@ void test2(id x) {
   extern void test2_helper(id (^)(void));
   test2_helper(^{ return x; });
 
-// CHECK-LABEL:    define internal void @__copy_helper_block_
+// CHECK-LABEL:    define internal void @__copy_helper_block_(i8*, i8*) #{{[0-9]+}} {
 // CHECK:      [[T0:%.*]] = load i8*, i8**
 // CHECK-NEXT: [[SRC:%.*]] = bitcast i8* [[T0]] to [[BLOCK_T]]*
 // CHECK-NEXT: [[T0:%.*]] = load i8*, i8**
@@ -53,7 +53,7 @@ void test2(id x) {
 // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]]) [[NUW]]
 // CHECK-NEXT: ret void
 
-// CHECK-LABEL:    define internal void @__destroy_helper_block_
+// CHECK-LABEL:    define internal void @__destroy_helper_block_(i8*) #{{[0-9]+}} {
 // CHECK:      [[T0:%.*]] = load i8*, i8**
 // CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to [[BLOCK_T]]*
 // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[T1]], i32 0, i32 5
@@ -134,7 +134,7 @@ void test4(void) {
   // CHECK-NEXT: call void @objc_release(i8* [[T0]])
   // CHECK: ret void
 
-  // CHECK-LABEL:    define internal void @__Block_byref_object_copy_
+  // CHECK-LABEL:    define internal void @__Block_byref_object_copy_(i8*, i8*) #{{[0-9]+}} {
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* {{%.*}}, i32 0, i32 6
   // CHECK-NEXT: load i8*, i8**
   // CHECK-NEXT: bitcast i8* {{%.*}} to [[BYREF_T]]*
@@ -143,7 +143,7 @@ void test4(void) {
   // CHECK-NEXT: store i8* [[T2]], i8** [[T0]]
   // CHECK-NEXT: store i8* null, i8** [[T1]]
 
-  // CHECK-LABEL:    define internal void @__Block_byref_object_dispose_
+  // CHECK-LABEL:    define internal void @__Block_byref_object_dispose_(i8*) #{{[0-9]+}} {
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* {{%.*}}, i32 0, i32 6
   // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** [[T0]]
   // CHECK-NEXT: call void @objc_release(i8* [[T1]])
@@ -155,10 +155,10 @@ void test4(void) {
   // CHECK-NEXT: call void @objc_release(i8* [[T0]])
   // CHECK-NEXT: ret void
 
-  // CHECK-LABEL:    define internal void @__copy_helper_block_
+  // CHECK-LABEL:    define internal void @__copy_helper_block_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
   // CHECK:      call void @_Block_object_assign(i8* {{%.*}}, i8* {{%.*}}, i32 8)
 
-  // CHECK-LABEL:    define internal void @__destroy_helper_block_
+  // CHECK-LABEL:    define internal void @__destroy_helper_block_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
   // CHECK:      call void @_Block_object_dispose(i8* {{%.*}}, i32 8)
 }
 
@@ -221,14 +221,14 @@ void test6(void) {
   // CHECK-NEXT: call void @llvm.lifetime.end(i64 48, i8* [[VARPTR2]])
   // CHECK-NEXT: ret void
 
-  // CHECK-LABEL:    define internal void @__Block_byref_object_copy_
+  // CHECK-LABEL:    define internal void @__Block_byref_object_copy_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* {{%.*}}, i32 0, i32 6
   // CHECK-NEXT: load i8*, i8**
   // CHECK-NEXT: bitcast i8* {{%.*}} to [[BYREF_T]]*
   // CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* {{%.*}}, i32 0, i32 6
   // CHECK-NEXT: call void @objc_moveWeak(i8** [[T0]], i8** [[T1]])
 
-  // CHECK-LABEL:    define internal void @__Block_byref_object_dispose_
+  // CHECK-LABEL:    define internal void @__Block_byref_object_dispose_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* {{%.*}}, i32 0, i32 6
   // CHECK-NEXT: call void @objc_destroyWeak(i8** [[T0]])
 
@@ -237,11 +237,11 @@ void test6(void) {
   // CHECK-NEXT: call i8* @objc_storeWeak(i8** [[SLOT]], i8* null)
   // CHECK-NEXT: ret void
 
-  // CHECK-LABEL:    define internal void @__copy_helper_block_
+  // CHECK-LABEL:    define internal void @__copy_helper_block_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
   // 0x8 - FIELD_IS_BYREF (no FIELD_IS_WEAK because clang in control)
   // CHECK:      call void @_Block_object_assign(i8* {{%.*}}, i8* {{%.*}}, i32 8)
 
-  // CHECK-LABEL:    define internal void @__destroy_helper_block_
+  // CHECK-LABEL:    define internal void @__destroy_helper_block_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
   // 0x8 - FIELD_IS_BYREF (no FIELD_IS_WEAK because clang in control)
   // CHECK:      call void @_Block_object_dispose(i8* {{%.*}}, i32 8)
 }
@@ -263,8 +263,7 @@ void test7(void) {
   // 0x42800000 - has signature, copy/dispose helpers, as well as BLOCK_HAS_EXTENDED_LAYOUT
   // CHECK:      store i32 -1040187392,
   // CHECK:      [[SLOT:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[BLOCK]], i32 0, i32 5
-  // CHECK-NEXT: [[T0:%.*]] = call i8* @objc_loadWeakRetained(i8** [[VAR]])
-  // CHECK-NEXT: call i8* @objc_initWeak(i8** [[SLOT]], i8* [[T0]])
+  // CHECK-NEXT: call void @objc_copyWeak(i8** [[SLOT]], i8** [[VAR]])
   // CHECK:      call void @test7_helper(
   // CHECK-NEXT: call void @objc_destroyWeak(i8** {{%.*}})
   // CHECK-NEXT: call void @objc_destroyWeak(i8** [[VAR]])
@@ -277,12 +276,12 @@ void test7(void) {
   // CHECK-NEXT: call void @objc_release(i8* [[T0]])
   // CHECK: ret void
 
-  // CHECK-LABEL:    define internal void @__copy_helper_block_
+  // CHECK-LABEL:    define internal void @__copy_helper_block_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
   // CHECK:      getelementptr
   // CHECK-NEXT: getelementptr
   // CHECK-NEXT: call void @objc_copyWeak(
 
-  // CHECK-LABEL:    define internal void @__destroy_helper_block_
+  // CHECK-LABEL:    define internal void @__destroy_helper_block_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
   // CHECK:      getelementptr
   // CHECK-NEXT: call void @objc_destroyWeak(
 }
@@ -374,7 +373,7 @@ void test10a(void) {
 // We can also use _Block_object_assign/destroy with
 // BLOCK_FIELD_IS_BLOCK as long as we don't pass BLOCK_BYREF_CALLER.
 
-// CHECK-LABEL: define internal void @__Block_byref_object_copy
+// CHECK-LABEL: define internal void @__Block_byref_object_copy_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
 // CHECK:      [[D0:%.*]] = load i8*, i8** {{%.*}}
 // CHECK-NEXT: [[D1:%.*]] = bitcast i8* [[D0]] to [[BYREF_T]]*
 // CHECK-NEXT: [[D2:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[D1]], i32 0, i32 6
@@ -388,7 +387,7 @@ void test10a(void) {
 // CHECK-NEXT: store void ()* [[T3]], void ()** [[D2]], align 8
 // CHECK: ret void
 
-// CHECK-LABEL: define internal void @__Block_byref_object_dispose
+// CHECK-LABEL: define internal void @__Block_byref_object_dispose_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
 // CHECK:      [[T0:%.*]] = load i8*, i8** {{%.*}}
 // CHECK-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to [[BYREF_T]]*
 // CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [[BYREF_T]], [[BYREF_T]]* [[T1]], i32 0, i32 6
@@ -648,7 +647,7 @@ void test18(id x) {
   extern void test18_helper(id (^)(void));
   test18_helper(^{ return x; });
 
-// CHECK-UNOPT-LABEL:    define internal void @__copy_helper_block_
+// CHECK-UNOPT-LABEL:    define internal void @__copy_helper_block_.{{[0-9]+}}(i8*, i8*) #{{[0-9]+}} {
 // CHECK-UNOPT:      [[T0:%.*]] = load i8*, i8**
 // CHECK-UNOPT-NEXT: [[SRC:%.*]] = bitcast i8* [[T0]] to [[BLOCK_T]]*
 // CHECK-UNOPT-NEXT: [[T0:%.*]] = load i8*, i8**
@@ -660,7 +659,7 @@ void test18(id x) {
 // CHECK-UNOPT-NEXT: call void @objc_storeStrong(i8** [[T1]], i8* [[T2]]) [[NUW]]
 // CHECK-UNOPT-NEXT: ret void
 
-// CHECK-UNOPT-LABEL:    define internal void @__destroy_helper_block_
+// CHECK-UNOPT-LABEL:    define internal void @__destroy_helper_block_.{{[0-9]+}}(i8*) #{{[0-9]+}} {
 // CHECK-UNOPT:      [[T0:%.*]] = load i8*, i8**
 // CHECK-UNOPT-NEXT: [[T1:%.*]] = bitcast i8* [[T0]] to [[BLOCK_T]]*
 // CHECK-UNOPT-NEXT: [[T2:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[T1]], i32 0, i32 5
diff --git a/test/CodeGenObjC/arc-captured-32bit-block-var-layout-2.m b/test/CodeGenObjC/arc-captured-32bit-block-var-layout-2.m
index 6ab02a916e8b..10feda938d08 100644
--- a/test/CodeGenObjC/arc-captured-32bit-block-var-layout-2.m
+++ b/test/CodeGenObjC/arc-captured-32bit-block-var-layout-2.m
@@ -11,31 +11,30 @@ int main() {
   NSString *strong;
   unsigned long long eightByte = 0x8001800181818181ull;
   // Test1
-// CHECK: block variable layout: BL_NON_OBJECT_WORD:3, BL_STRONG:1, BL_OPERATOR:0
+  // CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
   void (^block1)() = ^{ printf("%#llx", eightByte); NSLog(@"%@", strong); };
 
   // Test2
   int i = 1;
-// CHECK:  block variable layout: BL_NON_OBJECT_WORD:3, BL_STRONG:1, BL_OPERATOR:0
+  // CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
   void (^block2)() = ^{ printf("%#llx, %d", eightByte, i); NSLog(@"%@", strong); };
 
   //  Test3
   char ch = 'a';
-// CHECK: block variable layout: BL_NON_OBJECT_WORD:3, BL_STRONG:1, BL_OPERATOR:0
+  // CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
   void (^block3)() = ^{ printf("%c %#llx", ch, eightByte); NSLog(@"%@", strong); };
 
   // Test4
   unsigned long fourByte = 0x8001ul;
-// block variable layout: BL_NON_OBJECT_WORD:1, BL_STRONG:1, BL_OPERATOR:0
-// CHECK: Inline instruction for block variable layout: 0x0100
+  // CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
   void (^block4)() = ^{ printf("%c %#lx", ch, fourByte); NSLog(@"%@", strong); };
 
   // Test5
-// CHECK: block variable layout: BL_NON_OBJECT_WORD:3, BL_STRONG:1, BL_OPERATOR:0
+  // CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
   void (^block5)() = ^{ NSLog(@"%@", strong); printf("%c %#llx", ch, eightByte); };
 
   // Test6
-// CHECK: block variable layout: BL_OPERATOR:0
+  // CHECK: Block variable layout: BL_OPERATOR:0
   void (^block6)() = ^{ printf("%#llx", eightByte); };
 }
 
diff --git a/test/CodeGenObjC/arc-captured-32bit-block-var-layout.m b/test/CodeGenObjC/arc-captured-32bit-block-var-layout.m
index 45a894c38150..d1b578342703 100644
--- a/test/CodeGenObjC/arc-captured-32bit-block-var-layout.m
+++ b/test/CodeGenObjC/arc-captured-32bit-block-var-layout.m
@@ -33,7 +33,7 @@ void f() {
 // and a descriptor pointer).
 
 // Test 1
-// CHECK: Inline instruction for block variable layout: 0x0320
+// CHECK: Inline block variable layout: 0x0320, BL_STRONG:3, BL_BYREF:2, BL_OPERATOR:0
     void (^b)() = ^{
         byref_int = sh + ch+ch1+ch2 ;
         x(bar);
@@ -44,7 +44,7 @@ void f() {
     b();
 
 // Test 2
-// CHECK: Inline instruction for block variable layout: 0x0331
+// CHECK: Inline block variable layout: 0x0331, BL_STRONG:3, BL_BYREF:3, BL_WEAK:1, BL_OPERATOR:0
     void (^c)() = ^{
         byref_int = sh + ch+ch1+ch2 ;
         x(bar);
@@ -65,7 +65,7 @@ void g() {
   unsigned int i;
   NSString *y;
   NSString *z;
-// CHECK: Inline instruction for block variable layout: 0x0401
+// CHECK: Inline block variable layout: 0x0401, BL_STRONG:4, BL_WEAK:1, BL_OPERATOR:0
   void (^c)() = ^{
    int j = i + bletch;
    x(foo);
@@ -110,7 +110,7 @@ void h() {
 block variable layout: BL_NON_OBJECT_WORD:1, BL_UNRETAINE:1, BL_NON_OBJECT_WORD:1, 
                        BL_UNRETAINE:1, BL_NON_OBJECT_WORD:3, BL_BYREF:1, BL_OPERATOR:0
 */
-// CHECK: block variable layout: BL_BYREF:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_BYREF:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
   void (^c)() = ^{
     x(s2.ui.o1);
     x(u2.o1);
@@ -125,7 +125,7 @@ void arr1() {
     __unsafe_unretained id unsafe_unretained_var[4];
  } imported_s;
 
-// CHECK: block variable layout: BL_UNRETAINED:4, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_UNRETAINED:4, BL_OPERATOR:0
     void (^c)() = ^{
         x(imported_s.unsafe_unretained_var[2]);
     };    
@@ -140,7 +140,7 @@ void arr2() {
     __unsafe_unretained id unsafe_unretained_var[4];
  } imported_s;
 
-// CHECK: block variable layout: BL_NON_OBJECT_WORD:1, BL_UNRETAINED:4, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_NON_OBJECT_WORD:1, BL_UNRETAINED:4, BL_OPERATOR:0
     void (^c)() = ^{
         x(imported_s.unsafe_unretained_var[2]);
     };    
@@ -155,7 +155,7 @@ void arr3() {
     __unsafe_unretained id unsafe_unretained_var[0];
  } imported_s;
 
-// CHECK: block variable layout: BL_OPERATOR:0
+// CHECK: Block variable layout: BL_OPERATOR:0
     void (^c)() = ^{
       int i = imported_s.a;
     };    
@@ -181,7 +181,7 @@ void arr4() {
     } f4[2][2];
   } captured_s;
 
-// CHECK: block variable layout: BL_UNRETAINED:3, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_UNRETAINED:3, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
   void (^c)() = ^{
       id i = captured_s.f0.s_f1;
   };
@@ -199,7 +199,7 @@ void bf1() {
     int flag4: 24;
   } s;
 
-// CHECK:  block variable layout: BL_OPERATOR:0
+// CHECK:  Block variable layout: BL_OPERATOR:0
   int (^c)() = ^{
       return s.flag;
   };
@@ -212,7 +212,7 @@ void bf2() {
     int flag : 1;
   } s;
 
-// CHECK: block variable layout: BL_OPERATOR:0
+// CHECK: Block variable layout: BL_OPERATOR:0
   int (^c)() = ^{
       return s.flag;
   };
@@ -243,7 +243,7 @@ void bf3() {
         unsigned int _filler : 32;
     } _flags;
 
-// CHECK: block variable layout: BL_OPERATOR:0
+// CHECK: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags._draggedNodesAreDeletable;
   };
@@ -278,7 +278,7 @@ void bf4() {
         unsigned int _filler : 32;
     } _flags;
 
-// CHECK:  block variable layout: BL_OPERATOR:0
+// CHECK:  Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags._draggedNodesAreDeletable;
   };
@@ -296,7 +296,7 @@ void bf5() {
         unsigned char flag1 : 1;
     } _flags;
 
-// CHECK:  block variable layout: BL_OPERATOR:0
+// CHECK:  Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags.flag;
   };
@@ -313,7 +313,7 @@ void bf6() {
         unsigned char flag1 : 1;
     } _flags;
 
-// CHECK: block variable layout: BL_OPERATOR:0
+// CHECK: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags.flag;
   };
@@ -329,7 +329,7 @@ void Test7() {
     __weak id wid9, wid10, wid11, wid12;
     __weak id wid13, wid14, wid15, wid16;
     const id bar = (id) opaque_id();
-// CHECK: block variable layout: BL_STRONG:1, BL_WEAK:16, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_STRONG:1, BL_WEAK:16, BL_OPERATOR:0
     void (^b)() = ^{
       x(bar);
       x(wid1);
@@ -364,7 +364,7 @@ __weak id wid;
     __weak id w9, w10, w11, w12;
     __weak id w13, w14, w15, w16;
     const id bar = (id) opaque_id();
-// CHECK: block variable layout: BL_STRONG:1, BL_WEAK:16, BL_WEAK:16, BL_WEAK:1, BL_OPERATOR:0
+// CHECK: Block variable layout: BL_STRONG:1, BL_WEAK:16, BL_WEAK:16, BL_WEAK:1, BL_OPERATOR:0
     void (^b)() = ^{
       x(bar);
       x(wid1);
diff --git a/test/CodeGenObjC/arc-captured-block-var-inlined-layout.m b/test/CodeGenObjC/arc-captured-block-var-inlined-layout.m
index 89e2b570b038..07b194da9435 100644
--- a/test/CodeGenObjC/arc-captured-block-var-inlined-layout.m
+++ b/test/CodeGenObjC/arc-captured-block-var-inlined-layout.m
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -triple x86_64-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null %s > %t-64.layout
-// RUN: FileCheck --input-file=%t-64.layout %s
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECK-64 --input-file=%t-64.layout %s
 // RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -triple i386-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null  %s > %t-32.layout
-// RUN: FileCheck -check-prefix=CHECK-i386 --input-file=%t-32.layout %s
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECK-32 --input-file=%t-32.layout %s
 // rdar://12184410
 
 void x(id y) {}
@@ -17,22 +17,19 @@ void f() {
     __block id byref_bab = (id)0;
     __block id bl_var1;
 
-// CHECK: Inline instruction for block variable layout: 0x0100
-// CHECK-i386: Inline instruction for block variable layout: 0x0100
+// CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
     void (^b)() = ^{
         x(bar);
     };    
 
-// CHECK: Inline instruction for block variable layout: 0x0210
-// CHECK-i386: Inline instruction for block variable layout: 0x0210
+// CHECK: Inline block variable layout: 0x0210, BL_STRONG:2, BL_BYREF:1, BL_OPERATOR:0
     void (^c)() = ^{
         x(bar);
         x(baz);
         byref_int = 1;
     };    
 
-// CHECK: Inline instruction for block variable layout: 0x0230
-// CHECK-i386: Inline instruction for block variable layout: 0x0230
+// CHECK: Inline block variable layout: 0x0230, BL_STRONG:2, BL_BYREF:3, BL_OPERATOR:0
     void (^d)() = ^{
         x(bar);
         x(baz);
@@ -41,8 +38,7 @@ void f() {
         byref_bab = 0;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x0231
-// CHECK-i386: Inline instruction for block variable layout: 0x0231
+// CHECK: Inline block variable layout: 0x0231, BL_STRONG:2, BL_BYREF:3, BL_WEAK:1, BL_OPERATOR:0
     __weak id wid;
     id (^e)() = ^{
         x(bar);
@@ -53,8 +49,7 @@ void f() {
         return wid;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x0235
-// CHECK-i386: Inline instruction for block variable layout: 0x0235
+// CHECK: Inline block variable layout: 0x0235, BL_STRONG:2, BL_BYREF:3, BL_WEAK:5, BL_OPERATOR:0
     __weak id wid1, wid2, wid3, wid4;
     id (^f)() = ^{
         x(bar);
@@ -69,8 +64,7 @@ void f() {
         return wid;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x035
-// CHECK-i386: Inline instruction for block variable layout: 0x035
+// CHECK: Inline block variable layout: 0x035, BL_BYREF:3, BL_WEAK:5, BL_OPERATOR:0
     id (^g)() = ^{
         byref_int = 1;
         bl_var1 = 0;
@@ -82,21 +76,18 @@ void f() {
         return wid;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x01
-// CHECK-i386: Inline instruction for block variable layout: 0x01
+// CHECK: Inline block variable layout: 0x01, BL_WEAK:1, BL_OPERATOR:0
     id (^h)() = ^{
         return wid;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x020
-// CHECK-i386: Inline instruction for block variable layout: 0x020
+// CHECK: Inline block variable layout: 0x020, BL_BYREF:2, BL_OPERATOR:0
     void (^ii)() = ^{
        byref_int = 1;
        byref_bab = 0;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x0102
-// CHECK-i386: Inline instruction for block variable layout: 0x0102
+// CHECK: Inline block variable layout: 0x0102, BL_STRONG:1, BL_WEAK:2, BL_OPERATOR:0
     void (^jj)() = ^{
       x(bar);
       x(wid1);
@@ -114,8 +105,7 @@ int main() {
         __weak NSString *w1 = 0;
 
 
-// CHECK: Inline instruction for block variable layout: 0x0201
-// CHECK-i386: Inline instruction for block variable layout: 0x0201
+// CHECK: Inline block variable layout: 0x0201, BL_STRONG:2, BL_WEAK:1, BL_OPERATOR:0
         dispatch_block_t block2 = ^{
                 NSLog(@"%@, %@, %@", s1, w1, s2);
         };
diff --git a/test/CodeGenObjC/arc-captured-block-var-layout.m b/test/CodeGenObjC/arc-captured-block-var-layout.m
index 42e6060dcc25..f8518d1c44b8 100644
--- a/test/CodeGenObjC/arc-captured-block-var-layout.m
+++ b/test/CodeGenObjC/arc-captured-block-var-layout.m
@@ -34,7 +34,7 @@ void f() {
 
 // Test 1
 // Inline instruction for block variable layout: 0x0320 (3 strong 2 byref)
-// CHECK-LP64: Inline instruction for block variable layout: 0x0320
+// CHECK-LP64: Inline block variable layout: 0x0320, BL_STRONG:3, BL_BYREF:2, BL_OPERATOR:0
     void (^b)() = ^{
         byref_int = sh + ch+ch1+ch2 ;
         x(bar);
@@ -46,7 +46,7 @@ void f() {
 
 // Test 2
 // Inline instruction for block variable layout: 0x0331 (3 strong 3 byref 1 weak)
-// CHECK-LP64: Inline instruction for block variable layout: 0x0331
+// CHECK-LP64: Inline block variable layout: 0x0331, BL_STRONG:3, BL_BYREF:3, BL_WEAK:1, BL_OPERATOR:0
     void (^c)() = ^{
         byref_int = sh + ch+ch1+ch2 ;
         x(bar);
@@ -68,7 +68,7 @@ void g() {
   NSString *y;
   NSString *z;
 // Inline instruction for block variable layout: 0x0401 (4 strong 0 byref 1 weak)
-// CHECK-LP64: Inline instruction for block variable layout: 0x0401
+// CHECK-LP64: Inline block variable layout: 0x0401, BL_STRONG:4, BL_WEAK:1, BL_OPERATOR:0
   void (^c)() = ^{
    int j = i + bletch;
    x(foo);
@@ -109,7 +109,7 @@ void h() {
   union U u2;
   __block id block_id;
 
-// CHECK-LP64: block variable layout: BL_BYREF:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_BYREF:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
   void (^c)() = ^{
     x(s2.ui.o1);
     x(u2.o1);
@@ -124,7 +124,7 @@ void arr1() {
     __unsafe_unretained id unsafe_unretained_var[4];
  } imported_s;
 
-// CHECK-LP64: block variable layout: BL_UNRETAINED:4, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_UNRETAINED:4, BL_OPERATOR:0
     void (^c)() = ^{
         x(imported_s.unsafe_unretained_var[2]);
     };    
@@ -139,7 +139,7 @@ void arr2() {
     __unsafe_unretained id unsafe_unretained_var[4];
  } imported_s;
 
-// CHECK-LP64: block variable layout: BL_NON_OBJECT_WORD:1, BL_UNRETAINED:4, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_NON_OBJECT_WORD:1, BL_UNRETAINED:4, BL_OPERATOR:0
     void (^c)() = ^{
         x(imported_s.unsafe_unretained_var[2]);
     };    
@@ -154,7 +154,7 @@ void arr3() {
     __unsafe_unretained id unsafe_unretained_var[0];
  } imported_s;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
     void (^c)() = ^{
       int i = imported_s.a;
     };    
@@ -180,7 +180,7 @@ void arr4() {
     } f4[2][2];
   } captured_s;
 
-// CHECK-LP64: block variable layout: BL_UNRETAINED:3, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_UNRETAINED:3, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_NON_OBJECT_WORD:1, BL_UNRETAINED:1, BL_OPERATOR:0
   void (^c)() = ^{
       id i = captured_s.f0.s_f1;
   };
@@ -198,7 +198,7 @@ void bf1() {
     int flag4: 24;
   } s;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   int (^c)() = ^{
       return s.flag;
   };
@@ -211,7 +211,7 @@ void bf2() {
     int flag : 1;
   } s;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   int (^c)() = ^{
       return s.flag;
   };
@@ -242,7 +242,7 @@ void bf3() {
         unsigned int _filler : 32;
     } _flags;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags._draggedNodesAreDeletable;
   };
@@ -277,7 +277,7 @@ void bf4() {
         unsigned int _filler : 32;
     } _flags;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags._draggedNodesAreDeletable;
   };
@@ -295,7 +295,7 @@ void bf5() {
         unsigned char flag1 : 1;
     } _flags;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags.flag;
   };
@@ -312,7 +312,7 @@ void bf6() {
         unsigned char flag1 : 1;
     } _flags;
 
-// CHECK-LP64: block variable layout: BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_OPERATOR:0
   unsigned char (^c)() = ^{
       return _flags.flag;
   };
@@ -328,7 +328,7 @@ void Test7() {
     __weak id wid9, wid10, wid11, wid12;
     __weak id wid13, wid14, wid15, wid16;
     const id bar = (id) opaque_id();
-// CHECK-LP64: block variable layout: BL_STRONG:1, BL_WEAK:16, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_STRONG:1, BL_WEAK:16, BL_OPERATOR:0
     void (^b)() = ^{
       x(bar);
       x(wid1);
@@ -363,7 +363,7 @@ __weak id wid;
     __weak id w9, w10, w11, w12;
     __weak id w13, w14, w15, w16;
     const id bar = (id) opaque_id();
-// CHECK-LP64: block variable layout: BL_STRONG:1, BL_WEAK:16, BL_WEAK:16, BL_WEAK:1, BL_OPERATOR:0
+// CHECK-LP64: Block variable layout: BL_STRONG:1, BL_WEAK:16, BL_WEAK:16, BL_WEAK:1, BL_OPERATOR:0
     void (^b)() = ^{
       x(bar);
       x(wid1);
diff --git a/test/CodeGenObjC/arc-foreach.m b/test/CodeGenObjC/arc-foreach.m
index 17067a0a584e..90d9c1f12617 100644
--- a/test/CodeGenObjC/arc-foreach.m
+++ b/test/CodeGenObjC/arc-foreach.m
@@ -110,9 +110,7 @@ void test1(NSArray *array) {
 
 // CHECK-LP64:      [[D0:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[BLOCK]], i32 0, i32 5
 // CHECK-LP64:      [[T0:%.*]] = getelementptr inbounds [[BLOCK_T]], [[BLOCK_T]]* [[BLOCK]], i32 0, i32 5
-// CHECK-LP64-NEXT: [[T1:%.*]] = call i8* @objc_loadWeakRetained(i8** [[X]])
-// CHECK-LP64-NEXT: call i8* @objc_initWeak(i8** [[T0]], i8* [[T1]])
-// CHECK-LP64-NEXT: call void @objc_release(i8* [[T1]]) 
+// CHECK-LP64-NEXT: call void @objc_copyWeak(i8** [[T0]], i8** [[X]])
 // CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[BLOCK_T]]* [[BLOCK]] to
 // CHECK-LP64: call void @use_block
 // CHECK-LP64-NEXT: call void @objc_destroyWeak(i8** [[D0]])
diff --git a/test/CodeGenObjC/arc-ivar-layout.m b/test/CodeGenObjC/arc-ivar-layout.m
index 086a726d806c..5e08d625e34e 100644
--- a/test/CodeGenObjC/arc-ivar-layout.m
+++ b/test/CodeGenObjC/arc-ivar-layout.m
@@ -1,6 +1,4 @@
-// RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -triple x86_64-apple-darwin -S %s -o %t-64.s
-// RUN: FileCheck -check-prefix CHECK-LP64 --input-file=%t-64.s %s
-// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -triple x86_64-apple-darwin -print-ivar-layout -emit-llvm %s -o %t-64.s | FileCheck -check-prefix CHECK-LP64 %s
 // rdar://8991729
 
 @interface NSObject {
@@ -17,8 +15,7 @@
 
 @implementation AllPointers
 @end
-// CHECK-LP64: L_OBJC_CLASS_NAME_.1:
-// CHECK-LP64-NEXT: .asciz	"\003"
+// CHECK-LP64: strong ivar layout for class 'AllPointers': 0x03, 0x00
 
 @class NSString, NSNumber;
 @interface A : NSObject {
@@ -38,10 +35,12 @@
 
 @implementation A @end
 
+// CHECK-LP64: strong ivar layout for class 'A': 0x02, 0x00
+// CHECK-LP64: weak ivar layout for class 'A': 0x31, 0x00
+
 @implementation B @end
 
-// CHECK-LP64: L_OBJC_CLASS_NAME_.15:
-// CHECK-LP64-NEXT: .asciz	"\022"
+// CHECK-LP64: strong ivar layout for class 'B': 0x12, 0x00
 
 @interface UnsafePerson {
 @public
@@ -52,8 +51,8 @@
 @end
 
 @implementation UnsafePerson @end
-// CHECK-LP64: L_OBJC_CLASS_NAME_.20:
-// CHECK-LP64-NEXT: .asciz      "!"
+
+// CHECK-LP64: strong ivar layout for class 'UnsafePerson': 0x21, 0x00
 
 // rdar://16136439
 @interface rdar16136439
@@ -61,5 +60,20 @@
 @end
 
 @implementation rdar16136439 @end
-// CHECK-LP64: L_OBJC_PROP_NAME_ATTR_.29:
-// CHECK-LP64-NEXT: .asciz  "T@,R,W,N,V_first"
+
+// CHECK-LP64: weak ivar layout for class 'rdar16136439': 0x01, 0x00
+
+@interface Misalign : NSObject {
+  char a;
+}
+@end
+
+@interface Misaligned : Misalign {
+  char b;
+  id x;
+}
+@end
+
+@implementation Misaligned @end
+
+// CHECK-LP64: strong ivar layout for class 'Misaligned': 0x01, 0x00
diff --git a/test/CodeGenObjC/arc-linetable-autorelease.m b/test/CodeGenObjC/arc-linetable-autorelease.m
index 329206867b85..6812e8a6de83 100644
--- a/test/CodeGenObjC/arc-linetable-autorelease.m
+++ b/test/CodeGenObjC/arc-linetable-autorelease.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -fobjc-arc -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fobjc-arc -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // Ensure that the line info is making sense:
 // ARC cleanups should be at the closing '}'.
 @protocol NSObject
diff --git a/test/CodeGenObjC/arc-linetable.m b/test/CodeGenObjC/arc-linetable.m
index a3232ecd4e05..877dfdc12211 100644
--- a/test/CodeGenObjC/arc-linetable.m
+++ b/test/CodeGenObjC/arc-linetable.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -fblocks -fobjc-arc -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fblocks -fobjc-arc -debug-info-kind=standalone -dwarf-version=4 -disable-llvm-passes -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 
 // Legend: EXP = Return expression, RET = ret instruction
 
@@ -35,7 +35,7 @@
 // CHECK: define {{.*}}testCleanupVoid
 // CHECK: icmp ne {{.*}}!dbg ![[SKIP1:[0-9]+]]
 // CHECK: store i32 0, i32* {{.*}}, !dbg ![[RET8:[0-9]+]]
-// CHECK: @objc_storeStrong{{.*}}, !dbg ![[ARC8:[0-9]+]]
+// CHECK: @objc_storeStrong{{.*}}, !dbg ![[RET8]]
 // CHECK: ret {{.*}} !dbg ![[RET8]]
 
 typedef signed char BOOL;
@@ -54,9 +54,9 @@ typedef signed char BOOL;
 
 @implementation AppDelegate : NSObject
 
-// CHECK: ![[TESTNOSIDEEFFECT:.*]] = !DISubprogram(name: "-[AppDelegate testNoSideEffect:]"
-// CHECK-SAME:                                     line: [[@LINE+2]]
-// CHECK-SAME:                                     isLocal: true, isDefinition: true
+// CHECK: ![[TESTNOSIDEEFFECT:.*]] = distinct !DISubprogram(name: "-[AppDelegate testNoSideEffect:]"
+// CHECK-SAME:                                              line: [[@LINE+2]]
+// CHECK-SAME:                                              isLocal: true, isDefinition: true
 - (int)testNoSideEffect:(NSString *)foo {
   int x = 1;
   return 1; // Return expression
@@ -112,8 +112,7 @@ typedef signed char BOOL;
       [delegate testVoid :s];
     }
   }
-  // CHECK: ![[RET8]] = !DILocation(line: [[@LINE+2]], scope:
-  // CHECK: ![[ARC8]] = !DILocation(line: [[@LINE+1]], scope:
+  // CHECK: ![[RET8]] = !DILocation(line: [[@LINE+1]], scope:
 }
 
 
diff --git a/test/CodeGenObjC/arc-literals.m b/test/CodeGenObjC/arc-literals.m
index d107a28506f9..ab6c82b743fc 100644
--- a/test/CodeGenObjC/arc-literals.m
+++ b/test/CodeGenObjC/arc-literals.m
@@ -47,10 +47,10 @@ void test_array(id a, id b) {
   // CHECK: call i8* @objc_retain(i8*
 
   // Constructing the array
-  // CHECK:      [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS:%[A-Za-z0-9]+]], i32 0, i32 0
+  // CHECK:      [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS:%[A-Za-z0-9]+]], i64 0, i64 0
   // CHECK-NEXT: [[V0:%.*]] = load i8*, i8** [[A]],
   // CHECK-NEXT: store i8* [[V0]], i8** [[T0]]
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 1
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1
   // CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[B]],
   // CHECK-NEXT: store i8* [[V1]], i8** [[T0]]
 
@@ -83,16 +83,16 @@ void test_dictionary(id k1, id o1, id k2, id o2) {
   // CHECK: call i8* @objc_retain(i8*
 
   // Constructing the arrays
-  // CHECK:      [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[KEYS:%[A-Za-z0-9]+]], i32 0, i32 0
+  // CHECK:      [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[KEYS:%[A-Za-z0-9]+]], i64 0, i64 0
   // CHECK-NEXT: [[V0:%.*]] = load i8*, i8** [[K1]],
   // CHECK-NEXT: store i8* [[V0]], i8** [[T0]]
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS:%[A-Za-z0-9]+]], i32 0, i32 0
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS:%[A-Za-z0-9]+]], i64 0, i64 0
   // CHECK-NEXT: [[V1:%.*]] = load i8*, i8** [[O1]],
   // CHECK-NEXT: store i8* [[V1]], i8** [[T0]]
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[KEYS]], i32 0, i32 1
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[KEYS]], i64 0, i64 1
   // CHECK-NEXT: [[V2:%.*]] = load i8*, i8** [[K2]],
   // CHECK-NEXT: store i8* [[V2]], i8** [[T0]]
-  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 1
+  // CHECK-NEXT: [[T0:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1
   // CHECK-NEXT: [[V3:%.*]] = load i8*, i8** [[O2]],
   // CHECK-NEXT: store i8* [[V3]], i8** [[T0]]
 
@@ -128,7 +128,7 @@ void test_property(B *b) {
   // Retain parameter
   // CHECK: call i8* @objc_retain
 
-  // CHECK:      [[T0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OBJECTS:%.*]], i32 0, i32 0
+  // CHECK:      [[T0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OBJECTS:%.*]], i64 0, i64 0
 
   // Invoke 'prop'
   // CHECK:      [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
diff --git a/test/CodeGenObjC/arc-loadweakretained-release.m b/test/CodeGenObjC/arc-loadweakretained-release.m
index 5d00ace66055..5392abdf8c5f 100644
--- a/test/CodeGenObjC/arc-loadweakretained-release.m
+++ b/test/CodeGenObjC/arc-loadweakretained-release.m
@@ -66,7 +66,7 @@ void test1(int cond) {
 // CHECK-NEXT: br i1 [[ICRISNULL1]], label [[ICRDONE:%.*]], label [[ICRWRITEBACK:%.*]]
 // CHECK:  [[TWO:%.*]] = load i8*, i8** [[INCRTEMP]]
 // CHECK-NEXT:  [[THREE:%.*]] = call i8* @objc_storeWeak(
-// CHECK-NEXT  br label [[ICRDONE]]
+// CHECK-NEXT:  br label [[ICRDONE]]
 // CHECK:  [[CLEANUPISACTIVE:%.*]] = load i1, i1* [[CONDCLEANUP]]
 // CHECK-NEXT:  br i1 [[CLEANUPISACTIVE]], label [[CLEASNUPACTION:%.*]], label [[CLEANUPDONE:%.*]]
 
diff --git a/test/CodeGenObjC/arc-precise-lifetime.m b/test/CodeGenObjC/arc-precise-lifetime.m
index 6dc3ebf54457..42a048307948 100644
--- a/test/CodeGenObjC/arc-precise-lifetime.m
+++ b/test/CodeGenObjC/arc-precise-lifetime.m
@@ -26,7 +26,8 @@ void test0() {
   // CHECK-NEXT: ret void
 }
 
-// rdar://problem/9821110
+// rdar://problem/9821110 - precise lifetime should suppress extension
+// rdar://problem/22172983 - should work for calls via property syntax, too
 @interface Test1
 - (char*) interior __attribute__((objc_returns_inner_pointer));
 // Should we allow this on properties? Yes! see // rdar://14990439
@@ -34,8 +35,8 @@ void test0() {
 @end
 extern Test1 *test1_helper(void);
 
-// CHECK-LABEL: define void @test1a()
-void test1a(void) {
+// CHECK-LABEL: define void @test1a_message()
+void test1a_message(void) {
   // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
   // CHECK:      [[C:%.*]] = alloca i8*, align 8
   // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
@@ -67,8 +68,43 @@ void test1a(void) {
   char *c = [(ptr) interior];
 }
 
-// CHECK-LABEL: define void @test1b()
-void test1b(void) {
+
+// CHECK-LABEL: define void @test1a_property()
+void test1a_property(void) {
+  // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
+  // CHECK:      [[C:%.*]] = alloca i8*, align 8
+  // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK:      call void @llvm.lifetime.start(i64 8, i8* [[PTRPTR1]])
+  // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: store [[TEST1]]* [[T3]]
+  // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[CPTR1]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutorelease(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
+  // CHECK-NEXT: store i8* [[T6]], i8**
+  // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[CPTR2]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: call void @objc_release(i8* [[T1]]) [[NUW]], !clang.imprecise_release
+  // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
+  // CHECK-NEXT: ret void
+  Test1 *ptr = test1_helper();
+  char *c = ptr.interior;
+}
+
+
+// CHECK-LABEL: define void @test1b_message()
+void test1b_message(void) {
   // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
   // CHECK:      [[C:%.*]] = alloca i8*, align 8
   // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
@@ -94,11 +130,76 @@ void test1b(void) {
   // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
   // CHECK-NEXT: ret void
-  __attribute__((objc_precise_lifetime)) Test1 *ptr = test1_helper();
+  PRECISE_LIFETIME Test1 *ptr = test1_helper();
   char *c = [ptr interior];
 }
 
-void test1c(void) {
+// CHECK-LABEL: define void @test1b_property()
+void test1b_property(void) {
+  // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
+  // CHECK:      [[C:%.*]] = alloca i8*, align 8
+  // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK:      call void @llvm.lifetime.start(i64 8, i8* [[PTRPTR1]])
+  // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: store [[TEST1]]* [[T3]]
+  // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[CPTR1]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast
+  // CHECK-NEXT: store i8* [[T3]], i8**
+  // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[CPTR2]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: call void @objc_release(i8* [[T1]]) [[NUW]]
+  // CHECK-NOT:  clang.imprecise_release
+  // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
+  // CHECK-NEXT: ret void
+  PRECISE_LIFETIME Test1 *ptr = test1_helper();
+  char *c = ptr.interior;
+}
+
+// CHECK-LABEL: define void @test1c_message()
+void test1c_message(void) {
+  // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
+  // CHECK:      [[PC:%.*]] = alloca i8*, align 8
+  // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK:      call void @llvm.lifetime.start(i64 8, i8* [[PTRPTR1]])
+  // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: store [[TEST1]]* [[T3]]
+  // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[PCPTR1]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutorelease(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
+  // CHECK-NEXT: store i8* [[T6]], i8**
+  // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PCPTR2]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: call void @objc_release(i8* [[T1]]) [[NUW]], !clang.imprecise_release
+  // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
+  // CHECK-NEXT: ret void
+  Test1 *ptr = test1_helper();
+  char *pc = [ptr PropertyReturnsInnerPointer];
+}
+
+// CHECK-LABEL: define void @test1c_property()
+void test1c_property(void) {
   // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
   // CHECK:      [[PC:%.*]] = alloca i8*, align 8
   // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
@@ -130,7 +231,8 @@ void test1c(void) {
   char *pc = ptr.PropertyReturnsInnerPointer;
 }
 
-void test1d(void) {
+// CHECK-LABEL: define void @test1d_message()
+void test1d_message(void) {
   // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
   // CHECK:      [[PC:%.*]] = alloca i8*, align 8
   // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
@@ -143,11 +245,8 @@ void test1d(void) {
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[PCPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
-  // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[T3:%.*]] = call i8* @objc_retainAutorelease
-  // CHECK-NEXT: [[SIX:%.*]] = bitcast i8* [[T3]] to [[TEST1]]*
   // CHECK-NEXT: [[SEVEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-  // CHECK-NEXT: [[EIGHT:%.*]] = bitcast [[TEST1]]* [[SIX]] to i8*
+  // CHECK-NEXT: [[EIGHT:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
   // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* [[EIGHT]], i8* [[SEVEN]])
   // CHECK-NEXT: store i8* [[CALL1]], i8**
   // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
@@ -158,7 +257,37 @@ void test1d(void) {
   // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
   // CHECK-NEXT: ret void
-  __attribute__((objc_precise_lifetime)) Test1 *ptr = test1_helper();
+  PRECISE_LIFETIME Test1 *ptr = test1_helper();
+  char *pc = [ptr PropertyReturnsInnerPointer];
+}
+
+// CHECK-LABEL: define void @test1d_property()
+void test1d_property(void) {
+  // CHECK:      [[PTR:%.*]] = alloca [[PTR_T:%.*]]*, align 8
+  // CHECK:      [[PC:%.*]] = alloca i8*, align 8
+  // CHECK:      [[PTRPTR1:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK:      call void @llvm.lifetime.start(i64 8, i8* [[PTRPTR1]])
+  // CHECK:      [[T0:%.*]] = call [[TEST1:%.*]]* @test1_helper()
+  // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[T1]])
+  // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
+  // CHECK-NEXT: store [[TEST1]]* [[T3]]
+  // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[PCPTR1]])
+  // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[SEVEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[EIGHT:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* [[EIGHT]], i8* [[SEVEN]])
+  // CHECK-NEXT: store i8* [[CALL1]], i8**
+  // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PCPTR2]])
+  // CHECK-NEXT: [[NINE:%.*]] = load [[TEST1]]*, [[TEST1]]**
+  // CHECK-NEXT: [[TEN:%.*]] = bitcast [[TEST1]]* [[NINE]] to i8*
+  // CHECK-NEXT: call void @objc_release(i8* [[TEN]])
+  // CHECK-NEXT: [[PTRPTR2:%.*]] = bitcast [[PTR_T]]** [[PTR]] to i8*
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* [[PTRPTR2]])
+  // CHECK-NEXT: ret void
+  PRECISE_LIFETIME Test1 *ptr = test1_helper();
   char *pc = ptr.PropertyReturnsInnerPointer;
 }
 
diff --git a/test/CodeGenObjC/arc-weak.m b/test/CodeGenObjC/arc-weak.m
new file mode 100644
index 000000000000..59f8d1d69300
--- /dev/null
+++ b/test/CodeGenObjC/arc-weak.m
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -fblocks -fobjc-arc -fobjc-runtime-has-weak -o - %s | FileCheck %s
+
+__attribute((objc_root_class)) @interface A @end
+@interface B : A @end
+
+// rdar://problem/23559789
+//   Ensure that type differences don't cause an assert here.
+void test0(__weak B **src) {
+  __weak A *dest = *src;
+}
+// CHECK-LABEL: define void @test0
+// CHECK:       [[SRC:%.*]] = alloca [[B:%.*]]**, align 8
+// CHECK:       [[DEST:%.*]] = alloca [[A:%.*]]*, align 8
+// CHECK:       [[T0:%.*]] = load [[B]]**, [[B]]*** [[SRC]], align 8
+// CHECK-NEXT:  [[T1:%.*]] = bitcast [[B]]** [[T0]] to [[A]]**
+// CHECK-NEXT:  [[T2:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK-NEXT:  [[T3:%.*]] = bitcast [[A]]** [[T1]] to i8**
+// CHECK-NEXT:  call void @objc_copyWeak(i8** [[T2]], i8** [[T3]])
+// CHECK-NEXT:  [[T0:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK:       call void @objc_destroyWeak(i8** [[T0]])
diff --git a/test/CodeGenObjC/arc.ll b/test/CodeGenObjC/arc.ll
new file mode 100644
index 000000000000..caafcff05246
--- /dev/null
+++ b/test/CodeGenObjC/arc.ll
@@ -0,0 +1,27 @@
+; RUN: %clang_cc1 -Os -emit-llvm -fobjc-arc -o - %s | FileCheck %s
+
+target triple = "x86_64-apple-darwin10"
+
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+
+; CHECK-LABEL: define void @test(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test(i8* %x, i1* %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  call i8* @objc_retain(i8* %x)
+  %q = load i1, i1* %p
+  br i1 %q, label %loop.more, label %exit
+
+loop.more:
+  call void @objc_release(i8* %x)
+  br label %loop
+
+exit:
+  call void @objc_release(i8* %x)
+  ret void
+}
diff --git a/test/CodeGenObjC/arc.m b/test/CodeGenObjC/arc.m
index 3aafefd43cdc..f756df1460f3 100644
--- a/test/CodeGenObjC/arc.m
+++ b/test/CodeGenObjC/arc.m
@@ -515,7 +515,7 @@ void test19() {
 
   // CHECK-NEXT: [[CALL:%.*]] = call i8* @test19_helper()
   // CHECK-NEXT: [[T1:%.*]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[CALL]]) [[NUW]]
-  // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[X]], i32 0, i64 2
+  // CHECK-NEXT: [[SLOT:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[X]], i64 0, i64 2
   // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[SLOT]]
   // CHECK-NEXT: store i8* [[T1]], i8** [[SLOT]]
   // CHECK-NEXT: call void @objc_release(i8* [[T0]]) [[NUW]]
@@ -556,7 +556,7 @@ void test20(unsigned n) {
   // Zero-initialize.
   // CHECK-NEXT: [[T0:%.*]] = bitcast i8** [[VLA]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 [[DIM]], 8
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T1]], i32 8, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T1]], i32 16, i1 false)
 
   // Destroy.
   // CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i8*, i8** [[VLA]], i64 [[DIM]]
@@ -599,7 +599,7 @@ void test21(unsigned n) {
   // CHECK-NEXT: [[T0:%.*]] = bitcast [3 x i8*]* [[VLA]] to i8*
   // CHECK-NEXT: [[T1:%.*]] = mul nuw i64 2, [[DIM]]
   // CHECK-NEXT: [[T2:%.*]] = mul nuw i64 [[T1]], 24
-  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T2]], i32 8, i1 false)
+  // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[T0]], i8 0, i64 [[T2]], i32 16, i1 false)
 
   // Destroy.
   // CHECK-NEXT: [[T0:%.*]] = mul nuw i64 2, [[DIM]]
diff --git a/test/CodeGenObjC/attr-noreturn.m b/test/CodeGenObjC/attr-noreturn.m
new file mode 100644
index 000000000000..c413d0557ad2
--- /dev/null
+++ b/test/CodeGenObjC/attr-noreturn.m
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-MRC
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -fobjc-arc -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-ARC
+
+__attribute__((objc_root_class))
+@interface Root
+- (instancetype) init;
+@end
+
+@interface Base : Root
+@end
+
+@interface Middle : Base
++ (void) abort __attribute__((noreturn));
+- (void) fail __attribute__((noreturn));
+@end
+  
+@interface Derived : Middle
+@end
+
+// An arbitrary instance pointer may be null.
+void testInstanceMethod(Derived *x) {
+  [x fail];
+}
+// CHECK-LABEL: @testInstanceMethod
+// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}){{$}}
+
+// A direct call of a class method will normally never have a null receiver.
+void testClassMethod() {
+  [Derived abort];
+}
+// CHECK-LABEL: @testClassMethod
+// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) [[NORETURN:#[0-9]+]]
+
+__attribute__((weak_import))
+@interface WeakMiddle : Base
+@end
+  
+@interface WeakDerived : WeakMiddle
++ (void) abort __attribute__((noreturn));
+@end
+
+// The class pointer of a weakly-imported class may be null.
+void testWeakImport() {
+  [WeakDerived abort];
+}
+// CHECK-LABEL: @testWeakImport
+// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}){{$}}
+
+@interface Derived (MyMethods)
+@end
+
+@implementation Derived (MyMethods)
+
+// In general, self can be reassigned, so we can't make stronger assumptions.
+// But ARC makes self const in an ordinary method.
+// TODO: do the analysis to take advantage of the dominant case where
+// self is not reassigned.
+- (void) testSelfInstanceMethod {
+  [self fail];
+}
+// CHECK-LABEL: [Derived(MyMethods) testSelfInstanceMethod]
+// CHECK-MRC: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}){{$}}
+// CHECK-ARC: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) [[NORETURN]]
+
+// The ARC rule doesn't apply in -init methods.
+- (id) initWhileTestingSelfInstanceMethod {
+  self = [super init];
+  [self fail];
+  return self;
+}
+// CHECK-LABEL: [Derived(MyMethods) initWhileTestingSelfInstanceMethod]
+// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}){{$}}
+
+// Same thing applies to class methods.
++ (void) testSelfClassMethod {
+  [self abort];
+}
+// CHECK-LABEL: [Derived(MyMethods) testSelfClassMethod]
+// CHECK-MRC: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}){{$}}
+// CHECK-ARC: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}}) [[NORETURN]]
+
+// Super invocations may never be used with a null pointer; this is a
+// constraint on user code when it isn't enforced by the ARC const-self
+// rule.
+- (void) testSuperInstanceMethod {
+  [super fail];
+}
+// CHECK-LABEL: [Derived(MyMethods) testSuperInstanceMethod]
+// CHECK: call void bitcast (i8* ([[SUPER_T:%.*]]*, i8*, ...)* @objc_msgSendSuper2 to void ([[SUPER_T]]*, i8*)*)([[SUPER_T]]* {{.*}}, i8* {{.*}}) [[NORETURN]]
+
++ (void) testSuperClassMethod {
+  [super abort];
+}
+// CHECK-LABEL: [Derived(MyMethods) testSuperClassMethod]
+// CHECK: call void bitcast (i8* ([[SUPER_T]]*, i8*, ...)* @objc_msgSendSuper2 to void ([[SUPER_T]]*, i8*)*)([[SUPER_T]]* {{.*}}, i8* {{.*}}) [[NORETURN]]
+@end
+
+// CHECK: attributes [[NORETURN]] = { noreturn }
+  
\ No newline at end of file
diff --git a/test/CodeGenObjC/block-byref-debuginfo.m b/test/CodeGenObjC/block-byref-debuginfo.m
index aa916289449e..f11566634294 100644
--- a/test/CodeGenObjC/block-byref-debuginfo.m
+++ b/test/CodeGenObjC/block-byref-debuginfo.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -g -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -debug-info-kind=limited -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
 
 // rdar://problem/14386148
 // Test that the foo is aligned at an 8 byte boundary in the DWARF
diff --git a/test/CodeGenObjC/block-var-layout.m b/test/CodeGenObjC/block-var-layout.m
index 171df7b27489..00409f3891b9 100644
--- a/test/CodeGenObjC/block-var-layout.m
+++ b/test/CodeGenObjC/block-var-layout.m
@@ -159,7 +159,7 @@ void notifyBlock(id dependentBlock) {
 
 void test_empty_block() {
 // 01 00
-// CHECK-LP64: block variable layout for block: 0x01, 0x00
+// CHECK-LP64: block variable layout for block: 0x01, 0x30, 0x00
   void (^wrapperBlock)() = ^() {
   };
  wrapperBlock();
diff --git a/test/CodeGenObjC/blocks-ivar-debug.m b/test/CodeGenObjC/blocks-ivar-debug.m
index d0cf1f10f729..4d03fef54960 100644
--- a/test/CodeGenObjC/blocks-ivar-debug.m
+++ b/test/CodeGenObjC/blocks-ivar-debug.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g %s -fblocks -S -o %t
+// RUN: %clang_cc1 -debug-info-kind=limited %s -fblocks -S -o %t
 // Radar 7959934
 
 @interface NSObject {
diff --git a/test/CodeGenObjC/blocks.m b/test/CodeGenObjC/blocks.m
index 091331e8b2bf..d485bb4053b0 100644
--- a/test/CodeGenObjC/blocks.m
+++ b/test/CodeGenObjC/blocks.m
@@ -84,7 +84,7 @@ void test2(Test2 *x) {
   // CHECK:      [[T0:%.*]] = bitcast [[WEAK_T]]* [[WEAKX]] to i8*
   // CHECK:      call void @_Block_object_dispose(i8* [[T0]], i32 8)
 
-  __weak __block Test2 *weakX = x;
+  __attribute__((objc_gc(weak))) __block Test2 *weakX = x;
   test2_helper(^{ [weakX destroy]; });
 }
 
diff --git a/test/CodeGenObjC/catch-lexical-block.m b/test/CodeGenObjC/catch-lexical-block.m
index ae49405160df..a06aa83fe323 100644
--- a/test/CodeGenObjC/catch-lexical-block.m
+++ b/test/CodeGenObjC/catch-lexical-block.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -fobjc-exceptions -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -fobjc-exceptions -emit-llvm %s -o - | FileCheck %s
 @interface Foo @end
 void f0() {
   @try {
@@ -10,6 +10,6 @@ void f0() {
 // We should have 3 lexical blocks here at the moment, including one
 // for the catch block.
 // CHECK: !DILexicalBlock(
-// CHECK: !DILocalVariable(tag: DW_TAG_auto_variable
+// CHECK: !DILocalVariable(
 // CHECK: !DILexicalBlock(
 // CHECK: !DILexicalBlock(
diff --git a/test/CodeGenObjC/debug-info-block-captured-self.m b/test/CodeGenObjC/debug-info-block-captured-self.m
index fb9d7c2045e6..e142a0bceb8f 100644
--- a/test/CodeGenObjC/debug-info-block-captured-self.m
+++ b/test/CodeGenObjC/debug-info-block-captured-self.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fblocks -g -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
 //
 // Test that debug location is generated for a captured "self" inside
 // a block.
@@ -53,19 +53,20 @@ typedef enum {
 // CHECK: define internal void {{.*}}_block_invoke{{.*}}
 // CHECK:        %[[MEM1:.*]] = alloca i8*, align 8
 // CHECK-NEXT:   %[[MEM2:.*]] = alloca i8*, align 8
+// CHECK-NEXT:   [[DBGADDR:%.*]] = alloca [[BLOCK_T:<{.*}>]]*, align 8
 // CHECK:        store i8* [[BLOCK_DESC:%.*]], i8** %[[MEM1]], align 8
 // CHECK:        %[[TMP0:.*]] = load i8*, i8** %[[MEM1]]
 // CHECK:        call void @llvm.dbg.value(metadata i8* %[[TMP0]], i64 0, metadata ![[BDMD:[0-9]+]], metadata !{{.*}})
 // CHECK:        call void @llvm.dbg.declare(metadata i8* [[BLOCK_DESC]], metadata ![[BDMD:[0-9]+]], metadata !{{.*}})
-// CHECK:        %[[TMP1:.*]] = bitcast
-// CHECK-NEXT:   store
-// CHECK:        call void @llvm.dbg.declare(metadata <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, %0* }>** {{[^,]*}}, metadata ![[SELF:.*]], metadata !{{.*}})
+// CHECK:        store [[BLOCK_T]]* {{%.*}}, [[BLOCK_T]]** [[DBGADDR]], align 8
+// CHECK:        call void @llvm.dbg.declare(metadata [[BLOCK_T]]** [[DBGADDR]], metadata ![[SELF:.*]], metadata !{{.*}})
 // make sure we are still in the same function
 // CHECK: define {{.*}}__copy_helper_block_
 // Metadata
 // CHECK: ![[MAIN:.*]] = !DICompositeType(tag: DW_TAG_structure_type, name: "Main"
 // CHECK-SAME:                            line: 23,
 // CHECK: ![[PMAIN:.*]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: ![[MAIN]],
-// CHECK: ![[BDMD]] = !DILocalVariable(tag: DW_TAG_arg_variable, name: ".block_descriptor"
-// CHECK: ![[SELF]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self"
+// CHECK: ![[BDMD]] = !DILocalVariable(name: ".block_descriptor", arg:
+// CHECK: ![[SELF]] = !DILocalVariable(name: "self"
+// CHECK-NOT:                          arg:
 // CHECK-SAME:                         line: 40,
diff --git a/test/CodeGenObjC/debug-info-block-helper.m b/test/CodeGenObjC/debug-info-block-helper.m
index ea68cb1cf38e..107830782c3c 100644
--- a/test/CodeGenObjC/debug-info-block-helper.m
+++ b/test/CodeGenObjC/debug-info-block-helper.m
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -emit-llvm -fblocks -g -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fblocks -debug-info-kind=limited -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 %s -o - | FileCheck %s
 extern void foo(void(^)(void));
 
 // CHECK: !DISubprogram(name: "__destroy_helper_block_"
diff --git a/test/CodeGenObjC/debug-info-block-line.m b/test/CodeGenObjC/debug-info-block-line.m
index 9ba22bcd24fc..d4c409411c6a 100644
--- a/test/CodeGenObjC/debug-info-block-line.m
+++ b/test/CodeGenObjC/debug-info-block-line.m
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -emit-llvm -fblocks -fobjc-arc -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fblocks -fobjc-arc -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 
 // rdar://11562117
 typedef unsigned int NSUInteger;
diff --git a/test/CodeGenObjC/debug-info-block-type.m b/test/CodeGenObjC/debug-info-block-type.m
index 35c92dc64bda..1f137ed9dfc2 100644
--- a/test/CodeGenObjC/debug-info-block-type.m
+++ b/test/CodeGenObjC/debug-info-block-type.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -fblocks -g  -triple x86_64-apple-darwin14 -x objective-c < %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fblocks -debug-info-kind=limited  -triple x86_64-apple-darwin14 -x objective-c < %s -o - | FileCheck %s
 #define nil ((void*) 0)
 typedef signed char BOOL;
 // CHECK: ![[BOOL:[0-9]+]] = !DIDerivedType(tag: DW_TAG_typedef, name: "BOOL"
diff --git a/test/CodeGenObjC/debug-info-blocks.m b/test/CodeGenObjC/debug-info-blocks.m
index 5514c5164c9b..0bf566395aa9 100644
--- a/test/CodeGenObjC/debug-info-blocks.m
+++ b/test/CodeGenObjC/debug-info-blocks.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -fblocks -g  -triple x86_64-apple-darwin10 -fobjc-dispatch-method=mixed -x objective-c < %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fblocks -debug-info-kind=limited  -triple x86_64-apple-darwin10 -fobjc-dispatch-method=mixed -x objective-c < %s -o - | FileCheck %s
 
 // rdar://problem/9279956
 // Test that we generate the proper debug location for a captured self.
@@ -24,9 +24,9 @@
 
 // CHECK-DAG: [[DBG_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]])
 // CHECK-DAG: [[COPY_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]])
-// CHECK-DAG: [[COPY_SP]] = !DISubprogram(name: "__copy_helper_block_"
+// CHECK-DAG: [[COPY_SP]] = distinct !DISubprogram(name: "__copy_helper_block_"
 // CHECK-DAG: [[DESTROY_LINE]] = !DILocation(line: 0, scope: ![[DESTROY_SP:[0-9]+]])
-// CHECK-DAG: [[DESTROY_SP]] = !DISubprogram(name: "__destroy_helper_block_"
+// CHECK-DAG: [[DESTROY_SP]] = distinct !DISubprogram(name: "__destroy_helper_block_"
 typedef unsigned int NSUInteger;
 
 @protocol NSObject
@@ -61,8 +61,8 @@ static void run(void (^block)(void))
 {
     if ((self = [super init])) {
       run(^{
-          // CHECK-DAG: ![[SELF]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "self"{{.*}}, line: [[@LINE+4]],
-          // CHECK-DAG: ![[D]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "d"{{.*}}, line: [[@LINE+1]],
+          // CHECK-DAG: ![[SELF]] = !DILocalVariable(name: "self", scope:{{.*}}, line: [[@LINE+4]],
+          // CHECK-DAG: ![[D]] = !DILocalVariable(name: "d", scope:{{.*}}, line: [[@LINE+1]],
           NSMutableDictionary *d = [[NSMutableDictionary alloc] init]; 
           ivar = 42 + (int)[d count];
         });
diff --git a/test/CodeGenObjC/debug-info-class-extension.m b/test/CodeGenObjC/debug-info-class-extension.m
index 0d1b720aa677..a27810cce743 100644
--- a/test/CodeGenObjC/debug-info-class-extension.m
+++ b/test/CodeGenObjC/debug-info-class-extension.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: AT_APPLE_objc_complete_type
 
diff --git a/test/CodeGenObjC/debug-info-class-extension2.m b/test/CodeGenObjC/debug-info-class-extension2.m
index 383390c4ab31..d4750c120f60 100644
--- a/test/CodeGenObjC/debug-info-class-extension2.m
+++ b/test/CodeGenObjC/debug-info-class-extension2.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 // CHECK: AT_APPLE_objc_complete_type
 
 @interface Foo {} @end
diff --git a/test/CodeGenObjC/debug-info-class-extension3.m b/test/CodeGenObjC/debug-info-class-extension3.m
index f49bef82a24f..a9cf6f6a5c59 100644
--- a/test/CodeGenObjC/debug-info-class-extension3.m
+++ b/test/CodeGenObjC/debug-info-class-extension3.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK-NOT: AT_APPLE_objc_complete_type
 
diff --git a/test/CodeGenObjC/debug-info-crash-2.m b/test/CodeGenObjC/debug-info-crash-2.m
index 9e80580796a0..e464cc730b88 100644
--- a/test/CodeGenObjC/debug-info-crash-2.m
+++ b/test/CodeGenObjC/debug-info-crash-2.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -g -S %s -o -
+// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -debug-info-kind=limited -S %s -o -
 // REQUIRES: x86-registered-target
 
 @class Bar;
diff --git a/test/CodeGenObjC/debug-info-crash.m b/test/CodeGenObjC/debug-info-crash.m
index abbe2eb651c6..a8745d6d5ad3 100644
--- a/test/CodeGenObjC/debug-info-crash.m
+++ b/test/CodeGenObjC/debug-info-crash.m
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple i386-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 -fblocks -g -S %s -o -
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 -fblocks -debug-info-kind=limited -S %s -o -
 
 // rdar://7556129
 @implementation test
diff --git a/test/CodeGenObjC/debug-info-default-synth-ivar.m b/test/CodeGenObjC/debug-info-default-synth-ivar.m
index e9045eca4388..fb2641265bde 100644
--- a/test/CodeGenObjC/debug-info-default-synth-ivar.m
+++ b/test/CodeGenObjC/debug-info-default-synth-ivar.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o %t 
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o %t
 // RUN: grep DW_TAG_member %t | count 5
 // rdar://8493239
 
diff --git a/test/CodeGenObjC/debug-info-getter-name.m b/test/CodeGenObjC/debug-info-getter-name.m
index 1d7f545f34b6..7915a2fdd259 100644
--- a/test/CodeGenObjC/debug-info-getter-name.m
+++ b/test/CodeGenObjC/debug-info-getter-name.m
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -emit-llvm -triple x86_64-apple-darwin10 -fexceptions -fobjc-exceptions -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple x86_64-apple-darwin10 -fexceptions -fobjc-exceptions -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK:  !DISubprogram(name: "-[InstanceVariablesEverywhereButTheInterface someString]"
 
diff --git a/test/CodeGenObjC/debug-info-id-with-protocol.m b/test/CodeGenObjC/debug-info-id-with-protocol.m
index 836e456b2352..8974e2764783 100644
--- a/test/CodeGenObjC/debug-info-id-with-protocol.m
+++ b/test/CodeGenObjC/debug-info-id-with-protocol.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 __attribute((objc_root_class)) @interface NSObject {
 	id isa;
 }
@@ -38,10 +38,10 @@ int main()
 // Verify that the debug type for both variables is 'id'.
 // CHECK:  ![[IDTYPE:[0-9]+]] = !DIDerivedType(tag: DW_TAG_typedef, name: "id"
 //
-// CHECK:  !DILocalVariable(tag: DW_TAG_arg_variable, name: "bad_carrier"
+// CHECK:  !DILocalVariable(name: "bad_carrier", arg:
 // CHECK-NOT:               line:
 // CHECK-SAME:              type: ![[IDTYPE]]
 //
-// CHECK:  !DILocalVariable(tag: DW_TAG_arg_variable, name: "good_carrier"
+// CHECK:  !DILocalVariable(name: "good_carrier", arg:
 // CHECK-NOT:               line:
 // CHECK-SAME:              type: ![[IDTYPE]]
diff --git a/test/CodeGenObjC/debug-info-impl.m b/test/CodeGenObjC/debug-info-impl.m
index 556bf0ee63f7..a648ea17f64f 100644
--- a/test/CodeGenObjC/debug-info-impl.m
+++ b/test/CodeGenObjC/debug-info-impl.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -S -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -S -emit-llvm %s -o - | FileCheck %s
 @interface NSObject {
   struct objc_object *isa;
 }
diff --git a/test/CodeGenObjC/debug-info-instancetype.m b/test/CodeGenObjC/debug-info-instancetype.m
index c96153edb9dc..be454e38c0b3 100644
--- a/test/CodeGenObjC/debug-info-instancetype.m
+++ b/test/CodeGenObjC/debug-info-instancetype.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 // rdar://problem/13359718
 // Substitute the actual type for a method returning instancetype.
 @interface NSObject
diff --git a/test/CodeGenObjC/debug-info-ivars-extension.m b/test/CodeGenObjC/debug-info-ivars-extension.m
index fe658f009f11..0709d2ad24d3 100644
--- a/test/CodeGenObjC/debug-info-ivars-extension.m
+++ b/test/CodeGenObjC/debug-info-ivars-extension.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // Make sure we generate debug symbols for ivars added by a class extension.
 
diff --git a/test/CodeGenObjC/debug-info-ivars-indirect.m b/test/CodeGenObjC/debug-info-ivars-indirect.m
index 0c644c7fe344..b227bc648644 100644
--- a/test/CodeGenObjC/debug-info-ivars-indirect.m
+++ b/test/CodeGenObjC/debug-info-ivars-indirect.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // Make sure we generate debug symbols for an indirectly referenced
 // extension to an interface.
diff --git a/test/CodeGenObjC/debug-info-ivars-private.m b/test/CodeGenObjC/debug-info-ivars-private.m
index d3d8cdc3e786..f533ef3adc20 100644
--- a/test/CodeGenObjC/debug-info-ivars-private.m
+++ b/test/CodeGenObjC/debug-info-ivars-private.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 // Debug symbols for private ivars. This test ensures that we are
 // generating debug info for ivars added by the implementation.
diff --git a/test/CodeGenObjC/debug-info-ivars.m b/test/CodeGenObjC/debug-info-ivars.m
index aea3edaa8ae6..c6e544ae1c1c 100644
--- a/test/CodeGenObjC/debug-info-ivars.m
+++ b/test/CodeGenObjC/debug-info-ivars.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 __attribute((objc_root_class)) @interface NSObject {
     id isa;
diff --git a/test/CodeGenObjC/debug-info-lifetime-crash.m b/test/CodeGenObjC/debug-info-lifetime-crash.m
index bbd7dd4ac3b1..668af631a376 100644
--- a/test/CodeGenObjC/debug-info-lifetime-crash.m
+++ b/test/CodeGenObjC/debug-info-lifetime-crash.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm-apple-ios -emit-llvm -g -fblocks -fobjc-runtime=ios-7.0.0 -fobjc-arc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple arm-apple-ios -emit-llvm -debug-info-kind=limited -fblocks -fobjc-runtime=ios-7.0.0 -fobjc-arc %s -o - | FileCheck %s
 // rdar://problem/14990656
 @protocol NSObject
 - (id)copy;
@@ -13,12 +13,12 @@
 {
   // The debug type for these two will be identical, because we do not
   // actually emit the ownership qualifier.
-  // CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "weakSelf",
+  // CHECK: !DILocalVariable(name: "weakSelf",
   // CHECK-SAME:             line: [[@LINE+2]]
   // CHECK-SAME:             type: ![[SELFTY:[0-9]+]]
   __attribute__((objc_ownership(weak))) __typeof(self) weakSelf = self;
   Block = [^{
-  // CHECK: !DILocalVariable(tag: DW_TAG_auto_variable, name: "strongSelf",
+  // CHECK: !DILocalVariable(name: "strongSelf",
   // CHECK-SAME:             line: [[@LINE+2]]
   // CHECK-SAME:             type: ![[SELFTY]]
       __attribute__((objc_ownership(strong))) __typeof(self) strongSelf = weakSelf;
diff --git a/test/CodeGenObjC/debug-info-linkagename.m b/test/CodeGenObjC/debug-info-linkagename.m
index b606e5d5dfca..94d438a03e2e 100644
--- a/test/CodeGenObjC/debug-info-linkagename.m
+++ b/test/CodeGenObjC/debug-info-linkagename.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1  -g -S -o %t %s
+// RUN: %clang_cc1  -debug-info-kind=limited -S -o %t %s
 // RUN: not grep "001-[F bar" %t
 // Linkage name should not use 001 prefix in debug info.
 
diff --git a/test/CodeGenObjC/debug-info-nested-blocks.m b/test/CodeGenObjC/debug-info-nested-blocks.m
index 5c5958cf3096..fe6c55bfcd6f 100644
--- a/test/CodeGenObjC/debug-info-nested-blocks.m
+++ b/test/CodeGenObjC/debug-info-nested-blocks.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -gdwarf-2 -fblocks -o - -x objective-c %s| FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -dwarf-version=2 -debug-info-kind=limited -fblocks -o - -x objective-c %s| FileCheck %s
 // This code triggered a bug where a dbg.declare intrinsic ended up with the
 // wrong parent and subsequently failed the Verifier.
 void baz(id b);
diff --git a/test/CodeGenObjC/debug-info-property-accessors.m b/test/CodeGenObjC/debug-info-property-accessors.m
index 274bf6e74db6..d05010095d29 100644
--- a/test/CodeGenObjC/debug-info-property-accessors.m
+++ b/test/CodeGenObjC/debug-info-property-accessors.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -x objective-c -g -triple x86_64-apple-macosx10.8.0 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -x objective-c -debug-info-kind=limited -triple x86_64-apple-macosx10.8.0 %s -o - | FileCheck %s
 //
 // rdar://problem/14035789
 //
diff --git a/test/CodeGenObjC/debug-info-property-class-extension.m b/test/CodeGenObjC/debug-info-property-class-extension.m
new file mode 100644
index 000000000000..ea2551799f30
--- /dev/null
+++ b/test/CodeGenObjC/debug-info-property-class-extension.m
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+
+// Checks debug info for properties from class extensions for a few cases.
+
+
+// Readonly property in interface made readwrite in a category, with @impl
+// The interesting bit is that when the ivar debug info is generated, the corresponding
+// property is looked up and also gets debug info. If the debug info from the interface's
+// declaration and from the ivar doesn't match, this will end up with two DIObjCProperty
+// entries which would be bad.
+@interface FooROWithImpl
+// CHECK-NOT: !DIObjCProperty(name: "evolvingpropwithimpl"{{.*}}line: [[@LINE+1]]
+@property (readonly) int evolvingpropwithimpl;
+@end
+@interface FooROWithImpl ()
+// CHECK: !DIObjCProperty(name: "evolvingpropwithimpl"{{.*}}line: [[@LINE+1]]
+@property int evolvingpropwithimpl;
+@end
+@implementation FooROWithImpl
+@synthesize evolvingpropwithimpl = _evolvingpropwithimpl;
+@end
+
+
+// Simple property from a class extension:
+@interface Foo
+@end
+@interface Foo()
+// CHECK: !DIObjCProperty(name: "myprop"{{.*}}line: [[@LINE+1]]
+@property int myprop;
+@end
+// There's intentionally no @implementation for Foo, because that would
+// generate debug info for the property via the backing ivar.
+
+
+// Readonly property in interface made readwrite in a category:
+@interface FooRO
+// Shouldn't be here but in the class extension below.
+// CHECK-NOT: !DIObjCProperty(name: "evolvingprop"{{.*}}line: [[@LINE+1]]
+@property (readonly) int evolvingprop;
+@end
+@interface FooRO ()
+// CHECK: !DIObjCProperty(name: "evolvingprop"{{.*}}line: [[@LINE+1]]
+@property int evolvingprop;
+@end
+
+
+// This references types in this file to force emission of their debug info.
+void foo(Foo *f, FooRO *g, FooROWithImpl* h) { }
diff --git a/test/CodeGenObjC/debug-info-property.m b/test/CodeGenObjC/debug-info-property.m
index 6e2dcda8b54b..9b471be23dbd 100644
--- a/test/CodeGenObjC/debug-info-property.m
+++ b/test/CodeGenObjC/debug-info-property.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: AT_APPLE_property_name
 // CHECK: AT_APPLE_property_attribute
diff --git a/test/CodeGenObjC/debug-info-property2.m b/test/CodeGenObjC/debug-info-property2.m
index 41140dc20425..6a15922c932c 100644
--- a/test/CodeGenObjC/debug-info-property2.m
+++ b/test/CodeGenObjC/debug-info-property2.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: AT_APPLE_property_name
 @interface C {
diff --git a/test/CodeGenObjC/debug-info-property3.m b/test/CodeGenObjC/debug-info-property3.m
index 68cb234878a3..20880600a781 100644
--- a/test/CodeGenObjC/debug-info-property3.m
+++ b/test/CodeGenObjC/debug-info-property3.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -S -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -S -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 
 @interface I1
 // CHECK: !DIObjCProperty(name: "p1"
diff --git a/test/CodeGenObjC/debug-info-property4.m b/test/CodeGenObjC/debug-info-property4.m
index 2057d4d1d974..f862c85b344d 100644
--- a/test/CodeGenObjC/debug-info-property4.m
+++ b/test/CodeGenObjC/debug-info-property4.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: AT_APPLE_property_name
 // CHECK-NOT: AT_APPLE_property_getter
diff --git a/test/CodeGenObjC/debug-info-property5.m b/test/CodeGenObjC/debug-info-property5.m
index 126d0a2677e2..191da9c16fcc 100644
--- a/test/CodeGenObjC/debug-info-property5.m
+++ b/test/CodeGenObjC/debug-info-property5.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -masm-verbose -S -debug-info-kind=limited %s -o - | FileCheck %s
 
 // CHECK: AT_APPLE_property_name
 // CHECK: AT_APPLE_property_getter
diff --git a/test/CodeGenObjC/debug-info-pubtypes.m b/test/CodeGenObjC/debug-info-pubtypes.m
index e95ddab9049e..ce3896f652fe 100644
--- a/test/CodeGenObjC/debug-info-pubtypes.m
+++ b/test/CodeGenObjC/debug-info-pubtypes.m
@@ -1,5 +1,5 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -emit-llvm %s -o - | FileCheck %s
 
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "H"
 // CHECK-SAME:             line: [[@LINE+1]],
diff --git a/test/CodeGenObjC/debug-info-selector.m b/test/CodeGenObjC/debug-info-selector.m
index 67642ac3bebe..13130b1bde5e 100644
--- a/test/CodeGenObjC/debug-info-selector.m
+++ b/test/CodeGenObjC/debug-info-selector.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm  -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm  -debug-info-kind=limited %s -o - | FileCheck %s
 // Radar 8494540
 
 // CHECK: objc_selector
diff --git a/test/CodeGenObjC/debug-info-self.m b/test/CodeGenObjC/debug-info-self.m
index 225a0bdef63f..0391ac477d64 100644
--- a/test/CodeGenObjC/debug-info-self.m
+++ b/test/CodeGenObjC/debug-info-self.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -triple %itanium_abi_triple -debug-info-kind=limited %s -o - | FileCheck %s
 // self and _cmd are marked as DW_AT_artificial. 
 // myarg is not marked as DW_AT_artificial.
 
@@ -14,15 +14,15 @@
 }
 @end
 
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: "self", arg: 1,
+// CHECK: !DILocalVariable(name: "self", arg: 1,
 // CHECK-SAME:             scope: ![[CTOR:[0-9]+]]
 // CHECK-NOT:              line:
 // CHECK-SAME:             flags: DIFlagArtificial | DIFlagObjectPointer{{[,)]}}
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: "_cmd", arg: 2,
+// CHECK: !DILocalVariable(name: "_cmd", arg: 2,
 // CHECK-SAME:             scope: ![[CTOR]]
 // CHECK-NOT:              line:
 // CHECK-SAME:             flags: DIFlagArtificial{{[,)]}}
-// CHECK: !DILocalVariable(tag: DW_TAG_arg_variable, name: "myarg", arg: 3,
+// CHECK: !DILocalVariable(name: "myarg", arg: 3,
 // CHECK-SAME:             scope: ![[CTOR]]
 // CHECK-SAME:             line: 11
 // CHECK-NOT:              flags:
diff --git a/test/CodeGenObjC/debug-info-static-var.m b/test/CodeGenObjC/debug-info-static-var.m
index 50334250dfae..331ab0a283a8 100644
--- a/test/CodeGenObjC/debug-info-static-var.m
+++ b/test/CodeGenObjC/debug-info-static-var.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // Radar 8801045
 // Do not emit AT_MIPS_linkage_name for static variable i
 
diff --git a/test/CodeGenObjC/debug-info-synthesis.m b/test/CodeGenObjC/debug-info-synthesis.m
index 2bf001b28cba..8d2846e7312b 100644
--- a/test/CodeGenObjC/debug-info-synthesis.m
+++ b/test/CodeGenObjC/debug-info-synthesis.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-llvm -g -w -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited -w -triple x86_64-apple-darwin10 %s -o - | FileCheck %s
 # 1 "foo.m" 1
 # 1 "foo.m" 2
 # 1 "./foo.h" 1
diff --git a/test/CodeGenObjC/debug-info-variadic-method.m b/test/CodeGenObjC/debug-info-variadic-method.m
index 828d4dc4a164..d57019810839 100644
--- a/test/CodeGenObjC/debug-info-variadic-method.m
+++ b/test/CodeGenObjC/debug-info-variadic-method.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -o - -emit-llvm -g %s | FileCheck %s
+// RUN: %clang_cc1 -o - -emit-llvm -debug-info-kind=limited %s | FileCheck %s
 
 // This test verifies that variadic ObjC methods get the
 // DW_TAG_unspecified_parameter marker.
diff --git a/test/CodeGenObjC/debug-property-synth.m b/test/CodeGenObjC/debug-property-synth.m
index 8367478b943b..74ee775f751a 100644
--- a/test/CodeGenObjC/debug-property-synth.m
+++ b/test/CodeGenObjC/debug-property-synth.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -g %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
 // rdar://problem/9468526
 //
 // Setting a breakpoint on a property should create breakpoints in
diff --git a/test/CodeGenObjC/debuginfo-properties.m b/test/CodeGenObjC/debuginfo-properties.m
index b2c479c03100..5593b0d87f6a 100644
--- a/test/CodeGenObjC/debuginfo-properties.m
+++ b/test/CodeGenObjC/debuginfo-properties.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -triple x86_64-apple-darwin -o - %s | FileCheck %s
 // Check that we emit the correct method names for properties from a protocol.
 // rdar://problem/13798000
 @protocol NSObject
diff --git a/test/CodeGenObjC/encode-test.m b/test/CodeGenObjC/encode-test.m
index 0002c0f6e460..710e65be62da 100644
--- a/test/CodeGenObjC/encode-test.m
+++ b/test/CodeGenObjC/encode-test.m
@@ -177,3 +177,6 @@ const char g13[] = @encode(__typeof__(*test_class));
 const char g14[] = @encode(__typeof__(*test_id));
 // CHECK: constant [14 x i8] c"{objc_class=}\00"
 // CHECK: constant [15 x i8] c"{objc_object=}\00"
+
+// CHECK: @g15 = constant [2 x i8] c":\00"
+const char g15[] = @encode(SEL);
diff --git a/test/CodeGenObjC/exceptions.m b/test/CodeGenObjC/exceptions.m
index 075b714f29e2..56c9a0e907be 100644
--- a/test/CodeGenObjC/exceptions.m
+++ b/test/CodeGenObjC/exceptions.m
@@ -124,7 +124,7 @@ void f3() {
   }
 
   // CHECK:      call void @f3_helper(i32 4, i32* nonnull [[X]])
-  // CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* [[XPTR]])
+  // CHECK-NEXT: call void @llvm.lifetime.end(i64 4, i8* nonnull [[XPTR]])
   // CHECK-NEXT: ret void
   f3_helper(4, &x);
 }
diff --git a/test/CodeGenObjC/fragile-arc.m b/test/CodeGenObjC/fragile-arc.m
new file mode 100644
index 000000000000..ecb955bd34ac
--- /dev/null
+++ b/test/CodeGenObjC/fragile-arc.m
@@ -0,0 +1,175 @@
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -emit-llvm -fblocks -fobjc-arc -fobjc-exceptions -fobjc-runtime=macosx-fragile-10.10 -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -emit-llvm -fblocks -fobjc-arc -fobjc-exceptions -fobjc-runtime=macosx-fragile-10.10 -o - %s | FileCheck %s -check-prefix=GLOBALS
+
+@class Opaque;
+
+@interface Root {
+  Class isa;
+}
+@end
+
+@interface A : Root {
+  Opaque *strong;
+  __weak Opaque *weak;
+}
+@end
+
+// GLOBALS-LABEL @OBJC_METACLASS_A
+//  Strong layout: scan the first word.
+// GLOBALS: @OBJC_CLASS_NAME_{{.*}} = private global [2 x i8] c"\01\00"
+//  Weak layout: skip the first word, scan the second word.
+// GLOBALS: @OBJC_CLASS_NAME_{{.*}} = private global [2 x i8] c"\11\00"
+
+//  0x04002001
+//     ^ is compiled by ARC (controls interpretation of layouts)
+//        ^ has C++ structors (no distinction for zero-initializable)
+//           ^ factory (always set on non-metaclasses)
+// GLOBALS: @OBJC_CLASS_A = private global {{.*}} i32 67117057
+
+@implementation A
+// CHECK-LABEL: define internal void @"\01-[A testStrong]"
+// CHECK:      [[SELFVAR:%.*]] = alloca [[A:%.*]]*, align 4
+- (void) testStrong {
+// CHECK:      [[X:%.*]] = alloca [[OPAQUE:%.*]]*, align 4
+// CHECK:      [[SELF:%.*]] = load [[A]]*, [[A]]** [[SELFVAR]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 4
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = load [[OPAQUE]]*, [[OPAQUE]]** [[IVAR]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[OPAQUE]]* [[T0]] to i8*
+// CHECK-NEXT: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]])
+// CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[OPAQUE]]*
+// CHECK-NEXT: store [[OPAQUE]]* [[T3]], [[OPAQUE]]** [[X]]
+  Opaque *x = strong;
+// CHECK-NEXT: [[VALUE:%.*]] = load [[OPAQUE]]*, [[OPAQUE]]** [[X]]
+// CHECK-NEXT: [[SELF:%.*]] = load [[A]]*, [[A]]** [[SELFVAR]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 4
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[IVAR]] to i8**
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[OPAQUE]]* [[VALUE]] to i8*
+// CHECK-NEXT: call void @objc_storeStrong(i8** [[T0]], i8* [[T1]])
+  strong = x;
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[X]] to i8**
+// CHECK-NEXT: call void @objc_storeStrong(i8** [[T0]], i8* null)
+// CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: define internal void @"\01-[A testWeak]"
+// CHECK:      [[SELFVAR:%.*]] = alloca [[A]]*, align 4
+- (void) testWeak {
+// CHECK:      [[X:%.*]] = alloca [[OPAQUE]]*, align 4
+// CHECK:      [[SELF:%.*]] = load [[A]]*, [[A]]** [[SELFVAR]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 8
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[IVAR]] to i8**
+// CHECK-NEXT: [[T1:%.*]] = call i8* @objc_loadWeakRetained(i8** [[T0]])
+// CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]*
+// CHECK-NEXT: store [[OPAQUE]]* [[T2]], [[OPAQUE]]** [[X]]
+  Opaque *x = weak;
+// CHECK-NEXT: [[VALUE:%.*]] = load [[OPAQUE]]*, [[OPAQUE]]** [[X]]
+// CHECK-NEXT: [[SELF:%.*]] = load [[A]]*, [[A]]** [[SELFVAR]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 8
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[IVAR]] to i8**
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[OPAQUE]]* [[VALUE]] to i8*
+// CHECK-NEXT: call i8* @objc_storeWeak(i8** [[T0]], i8* [[T1]])
+  weak = x;
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[X]] to i8**
+// CHECK-NEXT: call void @objc_storeStrong(i8** [[T0]], i8* null)
+// CHECK-NEXT: ret void
+}
+
+// CHECK-LABEL: define internal void @"\01-[A .cxx_destruct]"
+// CHECK:      [[SELFVAR:%.*]] = alloca [[A]]*, align 4
+// CHECK:      [[SELF:%.*]] = load [[A]]*, [[A]]** [[SELFVAR]]
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 8
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[IVAR]] to i8**
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[T0]])
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[A]]* [[SELF]] to i8*
+// CHECK-NEXT: [[T1:%.*]] = getelementptr inbounds i8, i8* [[T0]], i32 4
+// CHECK-NEXT: [[IVAR:%.*]] = bitcast i8* [[T1]] to [[OPAQUE]]**
+// CHECK-NEXT: [[T0:%.*]] = bitcast [[OPAQUE]]** [[IVAR]] to i8**
+// CHECK-NEXT: call void @objc_storeStrong(i8** [[T0]], i8* null)
+// CHECK-NEXT: ret void
+@end
+
+// Test case for corner case of ivar layout.
+@interface B : A {
+  char _b_flag;
+}
+@end
+
+@interface C : B {
+  char _c_flag;
+  __unsafe_unretained id c_unsafe[5];
+  id c_strong[4];
+  __weak id c_weak[3];
+  id c_strong2[7];
+}
+@end
+@implementation C @end
+
+//  Note that these layouts implicitly start at the end of the previous
+//  class rounded up to pointer alignment.
+// GLOBALS-LABEL: @OBJC_METACLASS_C
+//  Strong layout: skip five, scan four, skip three, scan seven
+//    'T' == 0x54, '7' == 0x37
+// GLOBALS: @OBJC_CLASS_NAME_{{.*}} = private global [3 x i8] c"T7\00"
+//  Weak layout: skip nine, scan three
+// GLOBALS: @OBJC_CLASS_NAME_{{.*}} = private global [2 x i8] c"\93\00"
+
+extern void useBlock(void (^block)(void));
+
+//  256 == 0x100 == starts with 1 strong
+// GLOBALS: @__block_descriptor_tmp{{.*}} = internal constant {{.*}}, i32 256 }
+void testBlockLayoutStrong(id x) {
+  useBlock(^{ (void) x; });
+}
+
+//  1   == 0x001 == starts with 1 weak
+// GLOBALS: @__block_descriptor_tmp{{.*}} = internal constant {{.*}}, i32 1 }
+void testBlockLayoutWeak(__weak id x) {
+  useBlock(^{ (void) x; });
+}
+
+// CHECK-LABEL: define void @testCatch()
+// CHECK: [[X:%.*]] = alloca [[A:%.*]]*, align 4
+// CHECK: [[Y:%.*]] = alloca i8*, align 4
+// CHECK: call void @objc_exception_try_enter
+// CHECK: br i1
+// CHECK: call void @checkpoint(i32 0)
+// CHECK: call void @objc_exception_try_exit
+// CHECK: br label
+// CHECK: call void @checkpoint(i32 3)
+// CHECK: [[EXN:%.*]] = call i8* @objc_exception_extract
+// CHECK: call i32 @objc_exception_match(
+// CHECK: br i1
+// CHECK: [[T0:%.*]] = bitcast i8* [[EXN]] to [[A]]*
+// CHECK: [[T1:%.*]] = bitcast [[A]]* [[T0]] to i8*
+// CHECK: [[T2:%.*]] = call i8* @objc_retain(i8* [[T1]])
+// CHECK: [[T3:%.*]] = bitcast i8* [[T2]] to [[A]]*
+// CHECK: store [[A]]* [[T3]], [[A]]** [[X]]
+// CHECK: call void @checkpoint(i32 1)
+// CHECK: [[T0:%.*]] = bitcast [[A]]** [[X]] to i8**
+// CHECK: call void @objc_storeStrong(i8** [[T0]], i8* null)
+// CHECK: br label
+// CHECK: [[T0:%.*]] = call i8* @objc_retain(i8* [[EXN]])
+// CHECK: store i8* [[T0]], i8** [[Y]]
+// CHECK: call void @checkpoint(i32 2)
+// CHECK: call void @objc_storeStrong(i8** [[Y]], i8* null)
+extern void checkpoint(int n);
+void testCatch() {
+  @try {
+    checkpoint(0);
+  } @catch (A *x) {
+    checkpoint(1);
+  } @catch (id y) {
+    checkpoint(2);
+  }
+  checkpoint(3);
+}
diff --git a/test/CodeGenObjC/ivar-base-as-invariant-load.m b/test/CodeGenObjC/ivar-base-as-invariant-load.m
index 4a17eb16f6be..a3201e04011e 100644
--- a/test/CodeGenObjC/ivar-base-as-invariant-load.m
+++ b/test/CodeGenObjC/ivar-base-as-invariant-load.m
@@ -23,7 +23,7 @@
 
 @end
 
-// CHECK: [[T1:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", !invariant.load ![[MD_NUM:[0-9]+]]
-// CHECK: [[T2:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", !invariant.load ![[MD_NUM]]
-// CHECK: [[T3:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", !invariant.load ![[MD_NUM]]
+// CHECK: [[T1:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", align 8, !invariant.load ![[MD_NUM:[0-9]+]]
+// CHECK: [[T2:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", align 8, !invariant.load ![[MD_NUM]]
+// CHECK: [[T3:%.*]] = load i64, i64* @"OBJC_IVAR_$_A._flags", align 8, !invariant.load ![[MD_NUM]]
 //
diff --git a/test/CodeGenObjC/ivar-invariant.m b/test/CodeGenObjC/ivar-invariant.m
index b9c5bec93063..eb1ba9a0d73d 100644
--- a/test/CodeGenObjC/ivar-invariant.m
+++ b/test/CodeGenObjC/ivar-invariant.m
@@ -29,7 +29,7 @@
 @end
 
 // CHECK: define internal i8* @"\01-[Derived init]"
-// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member", !invariant.load
+// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member", align 8, !invariant.load
 
 void * variant_load_1(int i) {
     void *ptr;
@@ -41,7 +41,7 @@ void * variant_load_1(int i) {
 }
 
 // CHECK-LABEL: define i8* @variant_load_1(i32 %i)
-// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member"{{$}}
+// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member", align 8{{$}}
 
 @interface Container : Derived @end
 @implementation Container
@@ -51,8 +51,8 @@ void * variant_load_1(int i) {
 }
 @end
 
-// CHECK: define internal i8* @"\01-[Container invariant_load_1]"
-// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member", !invariant.load
+// CHECK-LABEL: define internal i8* @"\01-[Container invariant_load_1]"
+// CHECK: [[IVAR:%.*]] = load i64, i64* @"OBJC_IVAR_$_Derived.member", align 8, !invariant.load
 
 @interface ForBlock
 { 
diff --git a/test/CodeGenObjC/ivar-layout-64.m b/test/CodeGenObjC/ivar-layout-64.m
index 0dfdbb960857..0866704ecf92 100644
--- a/test/CodeGenObjC/ivar-layout-64.m
+++ b/test/CodeGenObjC/ivar-layout-64.m
@@ -112,3 +112,50 @@ typedef unsigned int FSCatalogInfoBitmap;
 // CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"\02\10\00"
 
 @implementation Foo @end
+
+// GC layout strings aren't capable of expressing __strong ivars at
+// non-word alignments.
+struct __attribute__((packed)) PackedStruct {
+  char c;
+  __strong id x;
+};
+@interface Packed : NSObject {
+  struct PackedStruct _packed;
+}
+@end
+@implementation Packed @end
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"Packed\00"
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"\01 \00"
+//  ' ' == 0x20
+
+// Ensure that layout descends into anonymous unions and structs.
+// Hilariously, anonymous unions and structs that appear directly as ivars
+// are completely ignored by layout.
+
+@interface AnonymousUnion : NSObject {
+  struct {
+    union {
+      id _object;
+      void *_ptr;
+    };
+  } a;
+}
+@end
+@implementation AnonymousUnion @end
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"AnonymousUnion\00"
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"\02\00"
+
+@interface AnonymousStruct : NSObject {
+  struct {
+    struct {
+      id _object;
+      __weak id _weakref;
+    };
+  } a;
+}
+@end
+@implementation AnonymousStruct @end
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"AnonymousStruct\00"
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"\02\10\00"
+// CHECK: @OBJC_CLASS_NAME_{{.*}} = private global {{.*}} c"!\00"
+//  '!' == 0x21
diff --git a/test/CodeGenObjC/layout-bitfield-crash.m b/test/CodeGenObjC/layout-bitfield-crash.m
index 5d0e7bfd2e7f..6f0943e927b5 100644
--- a/test/CodeGenObjC/layout-bitfield-crash.m
+++ b/test/CodeGenObjC/layout-bitfield-crash.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -fobjc-gc -emit-llvm -g -o - %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin11 -fobjc-gc -emit-llvm -debug-info-kind=limited -o - %s
 // Check that this doesn't crash when compiled with debugging on.
 @class Foo;
 typedef struct Bar *BarRef;
diff --git a/test/CodeGenObjC/local-static-block.m b/test/CodeGenObjC/local-static-block.m
index b55cc6af4d65..73c670f5c925 100644
--- a/test/CodeGenObjC/local-static-block.m
+++ b/test/CodeGenObjC/local-static-block.m
@@ -14,8 +14,13 @@ static  NSArray *(^ArrayRecurs)(NSArray *addresses, unsigned long level) = ^(NSA
   return (NSArray *)0;
 };
 
+extern NSArray *address;
+extern unsigned long level;
+
 void FUNC()
 {
+ ArrayRecurs(address, level);
+
  static  NSArray *(^ArrayRecurs)(NSArray *addresses, unsigned long level) = ^(NSArray *addresses, unsigned long level) {
 
   for(id rawAddress in addresses)
@@ -25,6 +30,7 @@ void FUNC()
   }
   return (NSArray *)0;
  };
+ ArrayRecurs(address, level);
 
  if (ArrayRecurs) {
    static  NSArray *(^ArrayRecurs)(NSArray *addresses, unsigned long level) = ^(NSArray *addresses, unsigned long level) {
@@ -36,6 +42,7 @@ void FUNC()
      }
      return (NSArray *)0;
    };
+   ArrayRecurs(address, level);
  }
 }
 
@@ -50,8 +57,9 @@ void FUNC1()
   }
   return (NSArray *)0;
  };
+ ArrayRecurs(address, level);
 }
 // CHECK-LP64: @ArrayRecurs = internal global
 // CHECK-LP64: @FUNC.ArrayRecurs = internal global
-// CHECK-LP64: @FUNC.ArrayRecurs.3 = internal global
+// CHECK-LP64: @FUNC.ArrayRecurs.1 = internal global
 // CHECK-LP64: @FUNC1.ArrayRecurs = internal global
diff --git a/test/CodeGenObjC/mrc-weak.m b/test/CodeGenObjC/mrc-weak.m
new file mode 100644
index 000000000000..e2c78f073367
--- /dev/null
+++ b/test/CodeGenObjC/mrc-weak.m
@@ -0,0 +1,191 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-10.10 -emit-llvm -fblocks -fobjc-weak -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-MODERN
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -fobjc-runtime=macosx-fragile-10.10 -emit-llvm -fblocks -fobjc-weak -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-FRAGILE
+
+@interface Object
+- (instancetype) retain;
+- (void) run;
+@end
+
+// The ivars in HighlyAlignedSubclass should be placed in the tail-padding
+// of the superclass.  Ensure that they're still covered by layouts.
+@interface HighlyAligned : Object {
+  __attribute__((aligned(32))) void *array[2];
+}
+@end
+// CHECK-MODERN: @"OBJC_IVAR_$_HighlyAlignedSubclass.ivar2" = global i64 24,
+// CHECK-MODERN: @"OBJC_IVAR_$_HighlyAlignedSubclass.ivar" = global i64 16,
+// CHECK-MODERN: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\02\00"
+// CHECK-MODERN: @"\01l_OBJC_CLASS_RO_$_HighlyAlignedSubclass" = {{.*}} {
+// CHECK-FRAGILE: @OBJC_INSTANCE_VARIABLES_HighlyAlignedSubclass = {{.*}}, i32 8 }, {{.*}}, i32 12 }]
+// CHECK-FRAGILE: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\02\00"
+// CHECK-FRAGILE: @OBJC_CLASS_HighlyAlignedSubclass
+@interface HighlyAlignedSubclass : HighlyAligned {
+  __weak id ivar;
+  __weak id ivar2;
+}
+@end
+@implementation HighlyAlignedSubclass @end
+
+// CHECK-MODERN: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\01\00"
+// CHECK-MODERN: @"\01l_OBJC_CLASS_RO_$_Foo" = {{.*}} { i32 772
+//   772 == 0x304
+//            ^ HasMRCWeakIvars
+//            ^ HasCXXDestructorOnly
+//              ^ HasCXXStructors
+
+// CHECK-FRAGILE: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\01\00"
+// CHECK-FRAGILE: @OBJC_CLASS_Foo = {{.*}} i32 134225921,
+//   134225921 == 0x08002001
+//                   ^ HasMRCWeakIvars
+//                      ^ HasCXXStructors
+//                         ^ Factory
+@interface Foo : Object {
+  __weak id ivar;
+}
+@end
+
+@implementation Foo
+// CHECK-LABEL: define internal void @"\01-[Foo .cxx_destruct]"
+// CHECK: call void @objc_destroyWeak
+@end
+
+
+void test1(__weak id x) {}
+// CHECK-LABEL: define void @test1
+// CHECK:      [[X:%.*]] = alloca i8*,
+// CHECK-NEXT: objc_initWeak
+// CHECK-NEXT: objc_destroyWeak
+// CHECK-NEXT: ret void
+
+void test2(id y) {
+  __weak id z = y;
+}
+// CHECK-LABEL: define void @test2
+// CHECK:      [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: [[Z:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]]
+// CHECK-NEXT: call i8* @objc_initWeak(i8** [[Z]], i8* [[T0]])
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[Z]])
+// CHECK-NEXT: ret void
+
+void test3(id y) {
+  __weak id z;
+  z = y;
+}
+// CHECK-LABEL: define void @test3
+// CHECK:      [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: [[Z:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: store i8* null, i8** [[Z]]
+// CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]]
+// CHECK-NEXT: call i8* @objc_storeWeak(i8** [[Z]], i8* [[T0]])
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[Z]])
+// CHECK-NEXT: ret void
+
+void test4(__weak id *p) {
+  id y = *p;
+}
+// CHECK-LABEL: define void @test4
+// CHECK:      [[P:%.*]] = alloca i8**,
+// CHECK-NEXT: [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8**, i8*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = call i8* @objc_loadWeak(i8** [[T0]])
+// CHECK-NEXT: store i8* [[T1]], i8** [[Y]]
+// CHECK-NEXT: ret void
+
+void test5(__weak id *p) {
+  id y = [*p retain];
+}
+// CHECK-LABEL: define void @test5
+// CHECK:      [[P:%.*]] = alloca i8**,
+// CHECK-NEXT: [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8**, i8*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = call i8* @objc_loadWeakRetained(i8** [[T0]])
+// CHECK-NEXT: store i8* [[T1]], i8** [[Y]]
+// CHECK-NEXT: ret void
+
+void test6(__weak Foo **p) {
+  Foo *y = [*p retain];
+}
+// CHECK-LABEL: define void @test6
+// CHECK:      [[P:%.*]] = alloca [[FOO:%.*]]**,
+// CHECK-NEXT: [[Y:%.*]] = alloca [[FOO]]*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load [[FOO]]**, [[FOO]]*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[FOO]]** [[T0]] to i8**
+// CHECK-NEXT: [[T2:%.*]] = call i8* @objc_loadWeakRetained(i8** [[T1]])
+// CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[FOO]]*
+// CHECK-NEXT: store [[FOO]]* [[T3]], [[FOO]]** [[Y]]
+// CHECK-NEXT: ret void
+
+extern id get_object(void);
+extern void use_block(void (^)(void));
+
+void test7(void) {
+  __weak Foo *p = get_object();
+  use_block(^{ [p run ]; });
+}
+// CHECK-LABEL: define void @test7
+// CHECK:       [[P:%.*]] = alloca [[FOO]]*,
+// CHECK:       [[T0:%.*]] = call i8* @get_object()
+// CHECK-NEXT:  [[T1:%.*]] = bitcast i8* [[T0]] to [[FOO]]*
+// CHECK-NEXT:  [[T2:%.*]] = bitcast [[FOO]]** [[P]] to i8**
+// CHECK-NEXT:  [[T3:%.*]] = bitcast [[FOO]]* [[T1]] to i8*
+// CHECK-NEXT:  call i8* @objc_initWeak(i8** [[T2]], i8* [[T3]])
+// CHECK:       call void @objc_copyWeak
+// CHECK:       call void @use_block
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define internal void @__copy_helper_block
+// CHECK:       @objc_copyWeak
+
+// CHECK-LABEL: define internal void @__destroy_helper_block
+// CHECK:       @objc_destroyWeak
+
+void test8(void) {
+  __block __weak Foo *p = get_object();
+  use_block(^{ [p run ]; });
+}
+// CHECK-LABEL: define void @test8
+// CHECK:       call i8* @objc_initWeak
+// CHECK-NOT:   call void @objc_copyWeak
+// CHECK:       call void @use_block
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define internal void @__Block_byref_object_copy
+// CHECK:       call void @objc_moveWeak
+
+// CHECK-LABEL: define internal void @__Block_byref_object_dispose
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define void @test9_baseline()
+// CHECK:       define internal void @__copy_helper
+// CHECK:       define internal void @__destroy_helper
+void test9_baseline(void) {
+  Foo *p = get_object();
+  use_block(^{ [p run]; });
+}
+
+// CHECK-LABEL: define void @test9()
+// CHECK-NOT:   define internal void @__copy_helper
+// CHECK-NOT:   define internal void @__destroy_helper
+// CHECK:       define void @test9_fin()
+void test9(void) {
+  __unsafe_unretained Foo *p = get_object();
+  use_block(^{ [p run]; });
+}
+void test9_fin() {}
+
+// CHECK-LABEL: define void @test10()
+// CHECK-NOT:   define internal void @__copy_helper
+// CHECK-NOT:   define internal void @__destroy_helper
+// CHECK:       define void @test10_fin()
+void test10(void) {
+  typedef __unsafe_unretained Foo *UnsafeFooPtr;
+  UnsafeFooPtr p = get_object();
+  use_block(^{ [p run]; });
+}
+void test10_fin() {}
diff --git a/test/CodeGenObjC/mrr-captured-block-var-inlined-layout.m b/test/CodeGenObjC/mrr-captured-block-var-inlined-layout.m
index 6ea656443a8b..76b7cfd118b5 100644
--- a/test/CodeGenObjC/mrr-captured-block-var-inlined-layout.m
+++ b/test/CodeGenObjC/mrr-captured-block-var-inlined-layout.m
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -fblocks -fobjc-runtime-has-weak -triple x86_64-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null %s > %t-64.layout
-// RUN: FileCheck --input-file=%t-64.layout %s
-// RUN: %clang_cc1 -fblocks -fobjc-runtime-has-weak -triple i386-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null %s > %t-32.layout
-// RUN: FileCheck -check-prefix=CHECK-i386 --input-file=%t-32.layout %s
+// RUN: %clang_cc1 -fblocks -fobjc-runtime-has-weak -fobjc-arc -triple x86_64-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null %s > %t-64.layout
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECK-64 --input-file=%t-64.layout %s
+// RUN: %clang_cc1 -fblocks -fobjc-runtime-has-weak -fobjc-arc -triple i386-apple-darwin -print-ivar-layout -emit-llvm -o /dev/null %s > %t-32.layout
+// RUN: FileCheck -check-prefix=CHECK -check-prefix=CHECK-32 --input-file=%t-32.layout %s
 // rdar://12184410
 // rdar://12184410
 
@@ -20,15 +20,13 @@ void f() {
     __block id bl_var1;
 
 // block variable layout: BL_STRONG:1, BL_OPERATOR:0
-// CHECK: Inline instruction for block variable layout: 0x0100
-// CHECK-i386: Inline instruction for block variable layout: 0x0100
+// CHECK: Inline block variable layout: 0x0100, BL_STRONG:1, BL_OPERATOR:0
     void (^b)() = ^{
         x(bar);
     };    
 
 // block variable layout: BL_STRONG:2, BL_BYREF:1, BL_OPERATOR:0
-// CHECK: Inline instruction for block variable layout: 0x0210
-// CHECK-i386: Inline instruction for block variable layout: 0x0210
+// CHECK: Inline block variable layout: 0x0210, BL_STRONG:2, BL_BYREF:1, BL_OPERATOR:0
     void (^c)() = ^{
         x(bar);
         x(baz);
@@ -36,8 +34,7 @@ void f() {
     };    
 
 // block variable layout: BL_STRONG:2, BL_BYREF:3, BL_OPERATOR:0
-// CHECK: Inline instruction for block variable layout: 0x0230
-// CHECK-i386: Inline instruction for block variable layout: 0x0230
+// CHECK: Inline block variable layout: 0x0230, BL_STRONG:2, BL_BYREF:3, BL_OPERATOR:0
     void (^d)() = ^{
         x(bar);
         x(baz);
@@ -47,8 +44,7 @@ void f() {
     };
 
 // block variable layout: BL_STRONG:2, BL_BYREF:3, BL_OPERATOR:0
-// CHECK: Inline instruction for block variable layout: 0x0230
-// CHECK-i386: Inline instruction for block variable layout: 0x0230
+// CHECK: Inline block variable layout: 0x0230, BL_STRONG:2, BL_BYREF:3, BL_OPERATOR:0
     id (^e)() = ^{
         x(bar);
         x(baz);
@@ -58,8 +54,7 @@ void f() {
         return wid;
     };
 
-// CHECK: Inline instruction for block variable layout: 0x020
-// CHECK-i386: Inline instruction for block variable layout: 0x020
+// CHECK: Inline block variable layout: 0x020, BL_BYREF:2, BL_OPERATOR:0
     void (^ii)() = ^{
        byref_int = 1;
        byref_bab = 0;
diff --git a/test/CodeGenObjC/objc-fixed-enum.m b/test/CodeGenObjC/objc-fixed-enum.m
index 52811b1d68bc..532959aab9b3 100644
--- a/test/CodeGenObjC/objc-fixed-enum.m
+++ b/test/CodeGenObjC/objc-fixed-enum.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -g -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
 // The DWARF standard says the underlying data type of an enum may be
 // stored in an DW_AT_type entry in the enum DIE. This is useful to have
 // so the debugger knows about the signedness of the underlying type.
@@ -59,22 +59,22 @@ int main() {
 // CHECK-SAME:                                       line: 22
 // CHECK-SAME:                                       baseType: ![[ENUMERATOR3]]
 
-// CHECK: ![[ENUM0]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e0"
+// CHECK: ![[ENUM0]] = !DILocalVariable(name: "e0"
 // CHECK-SAME:                          type: ![[TYPE0:[0-9]+]]
 // CHECK: ![[TYPE0]] = !DIDerivedType(tag: DW_TAG_typedef, name: "Enum0",
 // CHECK-SAME:                        baseType: ![[ENUMERATOR0]]
 
-// CHECK: ![[ENUM1]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e1"
+// CHECK: ![[ENUM1]] = !DILocalVariable(name: "e1"
 // CHECK-SAME:                          type: ![[TYPE1:[0-9]+]]
 // CHECK: ![[TYPE1]] = !DIDerivedType(tag: DW_TAG_typedef, name: "Enum1"
 // CHECK-SAME:                        baseType: ![[ENUMERATOR1]]
 
-// CHECK: ![[ENUM2]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e2"
+// CHECK: ![[ENUM2]] = !DILocalVariable(name: "e2"
 // CHECK-SAME:                          type: ![[TYPE2:[0-9]+]]
 // CHECK: ![[TYPE2]] = !DIDerivedType(tag: DW_TAG_typedef, name: "Enum2"
 // CHECK-SAME:                        baseType: ![[ENUMERATOR2]]
 
-// CHECK: ![[ENUM3]] = !DILocalVariable(tag: DW_TAG_auto_variable, name: "e3"
+// CHECK: ![[ENUM3]] = !DILocalVariable(name: "e3"
 // CHECK-SAME:                          type: ![[TYPE3:[0-9]+]]
 // CHECK: ![[TYPE3]] = !DIDerivedType(tag: DW_TAG_typedef, name: "Enum3"
 // CHECK-SAME:                        baseType: ![[ENUMERATOR3]]
diff --git a/test/CodeGenObjC/objc-literal-tests.m b/test/CodeGenObjC/objc-literal-tests.m
index c53ee644f059..03286e24563e 100644
--- a/test/CodeGenObjC/objc-literal-tests.m
+++ b/test/CodeGenObjC/objc-literal-tests.m
@@ -94,4 +94,4 @@ void baz(void) {
   bar(^(void) { return YES; });
 }
 
-// CHECK: attributes [[NUW]] = { nounwind{{.*}} }
+// CHECK: attributes [[NUW]] = { {{(norecurse )?}}nounwind{{.*}} }
diff --git a/test/CodeGenObjC/objc2-weak-ivar-debug.m b/test/CodeGenObjC/objc2-weak-ivar-debug.m
index 8c323b75213e..4377f6715490 100644
--- a/test/CodeGenObjC/objc2-weak-ivar-debug.m
+++ b/test/CodeGenObjC/objc2-weak-ivar-debug.m
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -g -emit-llvm -o - %s
-// RUN: %clang_cc1 -triple i386-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -g -emit-llvm -o - %s
-// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -g -emit-llvm -o - %s
-// RUN: %clang_cc1 -x objective-c++ -triple i386-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -g -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -debug-info-kind=limited -emit-llvm -o - %s
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -debug-info-kind=limited -emit-llvm -o - %s
+// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -debug-info-kind=limited -emit-llvm -o - %s
+// RUN: %clang_cc1 -x objective-c++ -triple i386-apple-darwin9 -fobjc-runtime=macosx-fragile-10.5 -fobjc-gc -debug-info-kind=limited -emit-llvm -o - %s
 
 // rdar://7252252
 @interface Loop {
diff --git a/test/CodeGenObjC/optimize-ivar-offset-load.m b/test/CodeGenObjC/optimize-ivar-offset-load.m
index 0317c094033f..6a073dbd29c1 100644
--- a/test/CodeGenObjC/optimize-ivar-offset-load.m
+++ b/test/CodeGenObjC/optimize-ivar-offset-load.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10  -Os -emit-llvm %s -o -  | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10  -O0 -emit-llvm %s -o -  | FileCheck %s
 // rdar://16095748
 
 @interface NSObject 
@@ -31,7 +31,7 @@ extern void foo(int);
 // CHECK: [[ADDPTR:%.*]] = getelementptr inbounds i8, i8* [[THREE]], i64 [[IVAR]]
 // CHECK: [[FOUR:%.*]] = bitcast i8* [[ADDPTR]] to i32*
 // CHECK: [[FIVE:%.*]] = load i32, i32* [[FOUR]], align 4
-// CHECK:   tail call void @foo(i32 [[FIVE]])
+// CHECK:   call void @foo(i32 [[FIVE]])
 
 @implementation SampleClass
 + (SampleClass*) new { return 0; }
diff --git a/test/CodeGenObjC/parameterized_classes.m b/test/CodeGenObjC/parameterized_classes.m
index 1d8e9a259934..b75cf2e3ad2d 100644
--- a/test/CodeGenObjC/parameterized_classes.m
+++ b/test/CodeGenObjC/parameterized_classes.m
@@ -24,6 +24,8 @@ __attribute__((objc_root_class))
 - (void)addObject:(T)object;
 - (void)sortWithFunction:(int (*)(T, T))function;
 - (void)getObjects:(T __strong *)objects length:(unsigned*)length;
+- (T)objectAtIndexedSubscript:(unsigned)index;
+- (void)setObject:(T)object atIndexedSubscript:(unsigned)index;
 @end
 
 NSString *getFirstObjectProp(NSMutableArray<NSString *> *array) {
@@ -58,6 +60,11 @@ void printMe(NSString *name) { }
 
 // CHECK-LABEL: define void @blockTest
 void blockTest(NSMutableArray<void (^)(void)> *array, NSString *name) {
+  // CHECK-NOT: ret void
   // CHECK: call i8* @objc_retainBlock
   [array addObject: ^ { printMe(name); }];
+  // CHECK-NOT: ret void
+  array[0] = ^ { printMe(name); };
+  // CHECK: call i8* @objc_retainBlock
+  // CHECK: ret void
 }
diff --git a/test/CodeGenObjC/property-dbg.m b/test/CodeGenObjC/property-dbg.m
index e0cac9850f25..fb70747f5db8 100644
--- a/test/CodeGenObjC/property-dbg.m
+++ b/test/CodeGenObjC/property-dbg.m
@@ -1,5 +1,5 @@
 // FIXME: Check IR rather than asm, then triple is not needed.
-// RUN: %clang_cc1 -triple %itanium_abi_triple -S -g -masm-verbose -x objective-c < %s | grep DW_AT_name
+// RUN: %clang_cc1 -triple %itanium_abi_triple -S -debug-info-kind=limited -masm-verbose -x objective-c < %s | grep DW_AT_name
 @interface Foo {
   int i;
 }
diff --git a/test/CodeGenObjC/property-list-in-extension.m b/test/CodeGenObjC/property-list-in-extension.m
new file mode 100644
index 000000000000..878745e73e4d
--- /dev/null
+++ b/test/CodeGenObjC/property-list-in-extension.m
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-weak -fobjc-runtime-has-weak -emit-llvm %s -o - | FileCheck %s
+
+// Checks metadata for properties in a few cases.
+
+
+// Property from a class extension:
+__attribute__((objc_root_class))
+@interface Foo
+@end
+
+@interface Foo()
+@property int myprop;
+@end
+
+@implementation Foo
+@synthesize myprop = _myprop;
+@end
+// Metadata for _myprop should be present, and PROP_LIST for Foo should have
+// only one entry.
+// CHECK: = private global [12 x i8] c"Ti,V_myprop\00",
+// CHECK: @"\01l_OBJC_$_PROP_LIST_Foo" = private global { i32, i32, [1 x %struct._prop_t] }
+
+// Readonly property in interface made readwrite in a category:
+__attribute__((objc_root_class))
+@interface FooRO
+@property (readonly) int evolvingprop;
+@property (nonatomic,readonly,getter=isBooleanProp) int booleanProp;
+@property (nonatomic,readonly,weak) Foo *weakProp;
+@end
+
+@interface FooRO ()
+@property int evolvingprop;
+@property int booleanProp;
+@property Foo *weakProp;
+@end
+
+@implementation FooRO
+@synthesize evolvingprop = _evolvingprop;
+@end
+// Metadata for _evolvingprop should be present, and PROP_LIST for FooRO should
+// still have only one entry, and the one entry should point to the version of
+// the property with a getter and setter.
+// CHECK: [[evolvinggetter:@OBJC_PROP_NAME_ATTR[^ ]+]] = private global [13 x i8] c"evolvingprop\00"
+// CHECK: [[evolvingsetter:@OBJC_PROP_NAME_ATTR[^ ]+]] = private global [18 x i8] c"Ti,V_evolvingprop\00",
+// CHECK: [[booleanmetadata:@OBJC_PROP_NAME_ATTR[^ ]+]] = private global [34 x i8] c"Ti,N,GisBooleanProp,V_booleanProp\00"
+// CHECK: [[weakmetadata:@OBJC_PROP_NAME_ATTR[^ ]+]] = private global [23 x i8] c"T@\22Foo\22,W,N,V_weakProp\00"
+// CHECK: @"\01l_OBJC_$_PROP_LIST_FooRO" = private global { i32, i32, [3 x %struct._prop_t] }{{.*}}[[evolvinggetter]]{{.*}}[[evolvingsetter]]{{.*}}[[booleanmetadata]]
diff --git a/test/CodeGenObjC/selector-ref-invariance.m b/test/CodeGenObjC/selector-ref-invariance.m
index 5758a1cd7ff1..18fb828d29de 100644
--- a/test/CodeGenObjC/selector-ref-invariance.m
+++ b/test/CodeGenObjC/selector-ref-invariance.m
@@ -3,7 +3,7 @@
 // rdar://6027699
 
 void test(id x) {
-// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_, !invariant.load
+// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load
 // CHECK: @objc_msgSend
   [x foo];
 }
diff --git a/test/CodeGenObjC/stret-1.m b/test/CodeGenObjC/stret-1.m
index f45d1219da65..a7bcdda48b14 100644
--- a/test/CodeGenObjC/stret-1.m
+++ b/test/CodeGenObjC/stret-1.m
@@ -1,8 +1,7 @@
-// RUN: %clang_cc1 -fblocks -triple arm64-apple-darwin %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK-ARM64
+// RUN: %clang_cc1 -fblocks -triple arm64-apple-darwin %s -emit-llvm -o - | FileCheck %s
 // rdar://12416433
 
 struct stret { int x[100]; };
-struct stret zero;
 struct stret one = {{1}};
 
 @interface Test  @end
@@ -13,8 +12,12 @@ struct stret one = {{1}};
 
 int main(int argc, const char **argv)
 {
-    struct stret st2 = one;
-    if (argc) st2 = [(id)(argc&~255) method];
-}
+    [(id)(argc&~255) method];
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T0:%[^,]+]]
+    // CHECK: [[T0P:%.*]] = bitcast %struct.stret* [[T0]] to i8*
+    // CHECK: call void @llvm.memset.p0i8.i64(i8* [[T0P]], i8 0, i64 400, i32 4, i1 false)
 
-// CHECK-ARM64: call void @llvm.memset.p0i8.i64(i8* [[T0:%.*]], i8 0, i64 400, i32 4, i1 false)
+    [Test method];
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]]
+    // CHECK-NOT: call void @llvm.memset.p0i8.i64(
+}
diff --git a/test/CodeGenObjCXX/arc-cxx11-init-list.mm b/test/CodeGenObjCXX/arc-cxx11-init-list.mm
index da214dc38ad2..230d0f197bfb 100644
--- a/test/CodeGenObjCXX/arc-cxx11-init-list.mm
+++ b/test/CodeGenObjCXX/arc-cxx11-init-list.mm
@@ -41,8 +41,6 @@ extern "C" void extended() {
 }
 
 // CHECK: [[INSTANCE:%.*]] = {{.*}} call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* {{.*}}, i8* {{.*}})
-// CHECK-NEXT: [[CAST:%.*]] = bitcast [1 x %0*]* %{{.*}} to i8**
-// CHECK-NEXT: store i8* [[INSTANCE]], i8** [[CAST]],
 // CHECK: {{.*}} call void @_Z8externalv()
 // CHECK: {{.*}} call void @objc_release(i8* {{.*}})
 
diff --git a/test/CodeGenObjCXX/arc-exceptions.mm b/test/CodeGenObjCXX/arc-exceptions.mm
index cc2206d1a9ec..0ae306943186 100644
--- a/test/CodeGenObjCXX/arc-exceptions.mm
+++ b/test/CodeGenObjCXX/arc-exceptions.mm
@@ -121,4 +121,37 @@ namespace test4 {
   // CHECK:      resume
 }
 
+// rdar://21397946
+__attribute__((ns_returns_retained)) id test5_helper(unsigned);
+void test5(void) {
+  id array[][2] = {
+    test5_helper(0),
+    test5_helper(1),
+    test5_helper(2),
+    test5_helper(3)
+  };
+}
+// CHECK-LABEL: define void @_Z5test5v()
+// CHECK:       [[ARRAY:%.*]] = alloca [2 x [2 x i8*]], align
+// CHECK:       [[A0:%.*]] = getelementptr inbounds [2 x [2 x i8*]], [2 x [2 x i8*]]* [[ARRAY]], i64 0, i64 0
+// CHECK-NEXT:  store [2 x i8*]* [[A0]],
+// CHECK-NEXT:  [[A00:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[A0]], i64 0, i64 0
+// CHECK-NEXT:  store i8** [[A00]],
+// CHECK-NEXT:  [[T0:%.*]] = invoke i8* @_Z12test5_helperj(i32 0)
+// CHECK:       store i8* [[T0]], i8** [[A00]], align
+// CHECK-NEXT:  [[A01:%.*]] = getelementptr inbounds i8*, i8** [[A00]], i64 1
+// CHECK-NEXT:  store i8** [[A01]],
+// CHECK-NEXT:  [[T0:%.*]] = invoke i8* @_Z12test5_helperj(i32 1)
+// CHECK:       store i8* [[T0]], i8** [[A01]], align
+// CHECK-NEXT:  [[A1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[A0]], i64 1
+// CHECK-NEXT:  store [2 x i8*]* [[A1]],
+// CHECK-NEXT:  [[A10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[A1]], i64 0, i64 0
+// CHECK-NEXT:  store i8** [[A10]],
+// CHECK-NEXT:  [[T0:%.*]] = invoke i8* @_Z12test5_helperj(i32 2)
+// CHECK:       store i8* [[T0]], i8** [[A10]], align
+// CHECK-NEXT:  [[A11:%.*]] = getelementptr inbounds i8*, i8** [[A10]], i64 1
+// CHECK-NEXT:  store i8** [[A11]],
+// CHECK-NEXT:  [[T0:%.*]] = invoke i8* @_Z12test5_helperj(i32 3)
+// CHECK:       store i8* [[T0]], i8** [[A11]], align
+
 // CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/CodeGenObjCXX/arc-new-delete.mm b/test/CodeGenObjCXX/arc-new-delete.mm
index 9a61f183c697..f853ea4366a0 100644
--- a/test/CodeGenObjCXX/arc-new-delete.mm
+++ b/test/CodeGenObjCXX/arc-new-delete.mm
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -fblocks -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -fblocks -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=UNOPT
+// RUN: %clang_cc1 -fobjc-arc -fobjc-runtime-has-weak -fblocks -triple x86_64-apple-darwin10.0.0 -emit-llvm -o - %s -O -disable-llvm-optzns | FileCheck %s -check-prefix=CHECK -check-prefix=OPT
 
 typedef __strong id strong_id;
 typedef __weak id weak_id;
@@ -6,8 +7,10 @@ typedef __weak id weak_id;
 // CHECK-LABEL: define void @_Z8test_newP11objc_object
 void test_new(id invalue) {
   // CHECK: [[INVALUEADDR:%.*]] = alloca i8*
-  // CHECK-NEXT: store i8* null, i8** [[INVALUEADDR]]
-  // CHECK-NEXT: call void @objc_storeStrong(i8** [[INVALUEADDR]], i8* [[INVALUE:%.*]])
+  // UNOPT-NEXT: store i8* null, i8** [[INVALUEADDR]]
+  // UNOPT-NEXT: call void @objc_storeStrong(i8** [[INVALUEADDR]], i8* [[INVALUE:%.*]])
+  // OPT-NEXT: [[T0:%.*]] = call i8* @objc_retain(i8* [[INVALUE:%.*]])
+  // OPT-NEXT: store i8* [[T0]], i8** [[INVALUEADDR]]
 
   // CHECK: call noalias i8* @_Znwm
   // CHECK-NEXT: {{bitcast i8\*.*to i8\*\*}}
@@ -15,7 +18,8 @@ void test_new(id invalue) {
   new strong_id;
   // CHECK: call noalias i8* @_Znwm
   // CHECK-NEXT: {{bitcast i8\*.*to i8\*\*}}
-  // CHECK-NEXT: store i8* null, i8**
+  // UNOPT-NEXT: store i8* null, i8**
+  // OPT-NEXT: call i8* @objc_initWeak(i8** {{.*}}, i8* null)
   new weak_id;
 
   // CHECK: call noalias i8* @_Znwm
@@ -24,7 +28,8 @@ void test_new(id invalue) {
   new __strong id;
   // CHECK: call noalias i8* @_Znwm
   // CHECK-NEXT: {{bitcast i8\*.*to i8\*\*}}
-  // CHECK-NEXT: store i8* null, i8**
+  // UNOPT-NEXT: store i8* null, i8**
+  // OPT-NEXT: call i8* @objc_initWeak(i8** {{.*}}, i8* null)
   new __weak id;
 
   // CHECK: call noalias i8* @_Znwm
@@ -36,7 +41,8 @@ void test_new(id invalue) {
   // CHECK: call i8* @objc_initWeak
   new __weak id(invalue);
 
-  // CHECK: call void @objc_storeStrong
+  // UNOPT: call void @objc_storeStrong
+  // OPT: call void @objc_release
   // CHECK: ret void
 }
 
@@ -57,8 +63,9 @@ void test_array_new() {
 // CHECK-LABEL: define void @_Z11test_deletePU8__strongP11objc_objectPU6__weakS0_
 void test_delete(__strong id *sptr, __weak id *wptr) {
   // CHECK: br i1
-  // CHECK: load i8*, i8**
-  // CHECK-NEXT: call void @objc_release
+  // UNOPT: call void @objc_storeStrong(i8** {{.*}}, i8* null)
+  // OPT: load i8*, i8**
+  // OPT-NEXT: call void @objc_release
   // CHECK: call void @_ZdlPv
   delete sptr;
 
@@ -77,7 +84,9 @@ void test_array_delete(__strong id *sptr, __weak id *wptr) {
   // CHECK-NEXT: icmp eq i8** [[BEGIN]], [[END]]
   // CHECK: [[PAST:%.*]] = phi i8** [ [[END]], {{%.*}} ], [ [[CUR:%.*]],
   // CHECK-NEXT: [[CUR]] = getelementptr inbounds i8*, i8** [[PAST]], i64 -1
-  // CHECK-NEXT: call void @objc_storeStrong(i8** [[CUR]], i8* null)
+  // UNOPT-NEXT: call void @objc_storeStrong(i8** [[CUR]], i8* null)
+  // OPT-NEXT: [[T0:%.*]] = load i8*, i8** [[CUR]]
+  // OPT-NEXT: objc_release(i8* [[T0]])
   // CHECK-NEXT: icmp eq i8** [[CUR]], [[BEGIN]]
   // CHECK: call void @_ZdaPv
   delete [] sptr;
diff --git a/test/CodeGenObjCXX/arc-weak.mm b/test/CodeGenObjCXX/arc-weak.mm
new file mode 100644
index 000000000000..8fd03379aa69
--- /dev/null
+++ b/test/CodeGenObjCXX/arc-weak.mm
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -emit-llvm -fblocks -fobjc-arc -fobjc-runtime-has-weak -std=c++11 -o - %s | FileCheck %s
+
+__attribute((objc_root_class)) @interface A @end
+@interface B : A @end
+
+// rdar://problem/23559789
+//   Ensure that type differences don't cause an assert here.
+void test0(__weak B **src) {
+  __weak A *dest = *src;
+}
+// CHECK-LABEL: define void @_Z5test0PU6__weakP1B(
+// CHECK:       [[SRC:%.*]] = alloca [[B:%.*]]**, align 8
+// CHECK:       [[DEST:%.*]] = alloca [[A:%.*]]*, align 8
+// CHECK:       [[T0:%.*]] = load [[B]]**, [[B]]*** [[SRC]], align 8
+// CHECK-NEXT:  [[T1:%.*]] = bitcast [[B]]** [[T0]] to [[A]]**
+// CHECK-NEXT:  [[T2:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK-NEXT:  [[T3:%.*]] = bitcast [[A]]** [[T1]] to i8**
+// CHECK-NEXT:  call void @objc_copyWeak(i8** [[T2]], i8** [[T3]])
+// CHECK-NEXT:  [[T0:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK:       call void @objc_destroyWeak(i8** [[T0]])
+
+void test1(__weak B **src) {
+  __weak A *dest = static_cast<__weak B*&&>(*src);
+}
+// CHECK-LABEL: define void @_Z5test1PU6__weakP1B(
+// CHECK:       [[SRC:%.*]] = alloca [[B:%.*]]**, align 8
+// CHECK:       [[DEST:%.*]] = alloca [[A:%.*]]*, align 8
+// CHECK:       [[T0:%.*]] = load [[B]]**, [[B]]*** [[SRC]], align 8
+// CHECK-NEXT:  [[T1:%.*]] = bitcast [[B]]** [[T0]] to [[A]]**
+// CHECK-NEXT:  [[T2:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK-NEXT:  [[T3:%.*]] = bitcast [[A]]** [[T1]] to i8**
+// CHECK-NEXT:  call void @objc_moveWeak(i8** [[T2]], i8** [[T3]])
+// CHECK-NEXT:  [[T0:%.*]] = bitcast [[A]]** [[DEST]] to i8**
+// CHECK:       call void @objc_destroyWeak(i8** [[T0]])
diff --git a/test/CodeGenObjCXX/arc.mm b/test/CodeGenObjCXX/arc.mm
index 4ce59df3e0fd..e3a6349afaf0 100644
--- a/test/CodeGenObjCXX/arc.mm
+++ b/test/CodeGenObjCXX/arc.mm
@@ -324,3 +324,13 @@ template void test40_helper<int>();
 // CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[TEMP]]
 // CHECK-NEXT: call i8* @objc_retain(i8* [[T0]])
 
+// Check that moves out of __weak variables are compiled to use objc_moveWeak.
+void test41(__weak id &&x) {
+  __weak id y = static_cast<__weak id &&>(x);
+}
+// CHECK-LABEL: define void @_Z6test41OU6__weakP11objc_object
+// CHECK:      [[X:%.*]] = alloca i8**
+// CHECK:      [[Y:%.*]] = alloca i8*
+// CHECK:      [[T0:%.*]] = load i8**, i8*** [[X]]
+// CHECK-NEXT: call void @objc_moveWeak(i8** [[Y]], i8** [[T0]])
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[Y]])
diff --git a/test/CodeGenObjCXX/block-var-layout.mm b/test/CodeGenObjCXX/block-var-layout.mm
index 793d4b99aad6..fc3a056048b3 100644
--- a/test/CodeGenObjCXX/block-var-layout.mm
+++ b/test/CodeGenObjCXX/block-var-layout.mm
@@ -151,7 +151,7 @@ void notifyBlock(id dependentBlock) {
 
 void test_empty_block() {
 // 01 00
-// CHECK: block variable layout for block: 0x01, 0x00
+// CHECK: block variable layout for block: 0x01, 0x30, 0x00
  void (^wrapperBlock)() = ^() {
     };
  wrapperBlock();
diff --git a/test/CodeGenObjCXX/blocks.mm b/test/CodeGenObjCXX/blocks.mm
index 62ae428e5ec1..63a1b33f355a 100644
--- a/test/CodeGenObjCXX/blocks.mm
+++ b/test/CodeGenObjCXX/blocks.mm
@@ -8,6 +8,8 @@
 @end
 
 void f(int (^bl)(B* b));
+void takeBlock(void (^block)());
+void useValues(...);
 
 // Test1
 void g() {
@@ -59,3 +61,25 @@ void gun() {
     return foovar;
   };
 }
+
+// PR24780
+class CaptureThisAndAnotherPointer {
+  void test(void *ptr) {
+    takeBlock(^{ useValues(ptr, this); });
+  }
+};
+
+// rdar://problem/23713871
+// Check that we don't crash when using BLOCK_LAYOUT_STRONG.
+#pragma clang assume_nonnull begin
+@interface NSUUID @end
+#pragma clang assume_nonnull end
+
+struct Wrapper1 { NSUUID *Ref; };
+struct Wrapper2 { Wrapper1 W1; };
+
+@implementation B
+- (void) captureStrongRef {
+  __block Wrapper2 W2;
+}
+@end
diff --git a/test/CodeGenObjCXX/debug-info-cyclic.mm b/test/CodeGenObjCXX/debug-info-cyclic.mm
index 37a8064baaf1..582ca445f993 100644
--- a/test/CodeGenObjCXX/debug-info-cyclic.mm
+++ b/test/CodeGenObjCXX/debug-info-cyclic.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -g -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -debug-info-kind=standalone -emit-llvm %s -o - | FileCheck %s
 
 struct B {
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "B"
diff --git a/test/CodeGenObjCXX/debug-info-line.mm b/test/CodeGenObjCXX/debug-info-line.mm
index 6d7321c381f5..98ec2ce77c37 100644
--- a/test/CodeGenObjCXX/debug-info-line.mm
+++ b/test/CodeGenObjCXX/debug-info-line.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -gline-tables-only -fblocks -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -debug-info-kind=line-tables-only -fblocks -emit-llvm %s -o - | FileCheck %s
 
 void fn();
 
diff --git a/test/CodeGenObjCXX/debug-info.mm b/test/CodeGenObjCXX/debug-info.mm
index 04cf66f2cd18..ea5bb62fe75a 100644
--- a/test/CodeGenObjCXX/debug-info.mm
+++ b/test/CodeGenObjCXX/debug-info.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -g -emit-llvm %s -o /dev/null
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -debug-info-kind=limited -emit-llvm %s -o /dev/null
 
 // This test passes if clang doesn't crash.
 
diff --git a/test/CodeGenObjCXX/designated-initializers.mm b/test/CodeGenObjCXX/designated-initializers.mm
index 41a4271bc8c5..71ffe1fbbd5e 100644
--- a/test/CodeGenObjCXX/designated-initializers.mm
+++ b/test/CodeGenObjCXX/designated-initializers.mm
@@ -34,3 +34,11 @@ int vals5[3] = {
   6
 };
 // CHECK: @vals5 = global [3 x i32] [i32 1, i32 2, i32 6]
+
+enum SomeEnum : unsigned char {
+  blah = 255
+};
+char vals6[] = {
+  [blah] = 'a'
+};
+// CHECK: @vals6 = global [256 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00a"
diff --git a/test/CodeGenObjCXX/exception-cxx.mm b/test/CodeGenObjCXX/exception-cxx.mm
new file mode 100644
index 000000000000..36eec28a0ab7
--- /dev/null
+++ b/test/CodeGenObjCXX/exception-cxx.mm
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin10 -emit-llvm -fcxx-exceptions -fexceptions -fobjc-exceptions -o - %s | FileCheck %s
+
+// rdar://problem/22155434
+namespace test0 {
+  void foo() {
+    try {
+      throw 0;
+    } catch (int e) {
+      return;
+    }
+  }
+// CHECK: define void @_ZN5test03fooEv() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+}
diff --git a/test/CodeGenObjCXX/literals.mm b/test/CodeGenObjCXX/literals.mm
index 7089de23b933..4d1b5019412a 100644
--- a/test/CodeGenObjCXX/literals.mm
+++ b/test/CodeGenObjCXX/literals.mm
@@ -22,14 +22,14 @@ void test_array() {
   // Initializing first element
   // CHECK: [[PTR1:%.*]] = bitcast i8** [[ARR]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[PTR1]])
-  // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 0
+  // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 0
   // CHECK-NEXT: call void @_ZN1XC1Ev
   // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv
   // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[OBJECT0]])
   // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]]
   
   // Initializing the second element
-  // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 1
+  // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1
   // CHECK-NEXT: invoke void @_ZN1YC1Ev
   // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv
   // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[OBJECT1]])
@@ -74,14 +74,14 @@ void test_array_instantiation() {
   // Initializing first element
   // CHECK:      [[PTR1:%.*]] = bitcast i8** [[ARR]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start(i64 8, i8* [[PTR1]])
-  // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 0
+  // CHECK: [[ELEMENT0:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 0
   // CHECK-NEXT: call void @_ZN1XC1Ev
   // CHECK-NEXT: [[OBJECT0:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1XcvP11objc_objectEv
   // CHECK: [[RET0:%[a-zA-Z0-9.]+]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[OBJECT0]])
   // CHECK: store i8* [[RET0]], i8** [[ELEMENT0]]
   
   // Initializing the second element
-  // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i32 0, i32 1
+  // CHECK: [[ELEMENT1:%[a-zA-Z0-9.]+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OBJECTS]], i64 0, i64 1
   // CHECK-NEXT: invoke void @_ZN1YC1Ev
   // CHECK: [[OBJECT1:%[a-zA-Z0-9.]+]] = invoke i8* @_ZNK1YcvP11objc_objectEv
   // CHECK: [[RET1:%[a-zA-Z0-9.]+]] = call i8* @objc_retainAutoreleasedReturnValue(i8* [[OBJECT1]])
diff --git a/test/CodeGenObjCXX/mrc-weak.mm b/test/CodeGenObjCXX/mrc-weak.mm
new file mode 100644
index 000000000000..17ceb31231a3
--- /dev/null
+++ b/test/CodeGenObjCXX/mrc-weak.mm
@@ -0,0 +1,183 @@
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-10.10 -emit-llvm -fblocks -fobjc-weak -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-MODERN
+// RUN: %clang_cc1 -triple i386-apple-darwin10 -fobjc-runtime=macosx-fragile-10.10 -emit-llvm -fblocks -fobjc-weak -o - %s | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-FRAGILE
+
+@interface Object
+- (instancetype) retain;
+- (void) run;
+@end
+
+// CHECK-MODERN: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\01\00"
+// CHECK-MODERN: @"\01l_OBJC_CLASS_RO_$_Foo" = {{.*}} { i32 772
+//   772 == 0x304
+//            ^ HasMRCWeakIvars
+//            ^ HasCXXDestructorOnly
+//              ^ HasCXXStructors
+
+// CHECK-FRAGILE: @OBJC_CLASS_NAME_{{.*}} = {{.*}} c"\01\00"
+// CHECK-FRAGILE: @OBJC_CLASS_Foo = {{.*}} i32 134225921,
+//   134225921 == 0x08002001
+//                   ^ HasMRCWeakIvars
+//                      ^ HasCXXStructors
+//                         ^ Factory
+@interface Foo : Object {
+  __weak id ivar;
+}
+@end
+
+@implementation Foo
+// CHECK-LABEL: define internal void @"\01-[Foo .cxx_destruct]"
+// CHECK: call void @objc_destroyWeak
+@end
+
+
+void test1(__weak id x) {}
+// CHECK-LABEL: define void @_Z5test1P11objc_object(
+// CHECK:      [[X:%.*]] = alloca i8*,
+// CHECK-NEXT: objc_initWeak
+// CHECK-NEXT: objc_destroyWeak
+// CHECK-NEXT: ret void
+
+void test2(id y) {
+  __weak id z = y;
+}
+// CHECK-LABEL: define void @_Z5test2P11objc_object(
+// CHECK:      [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: [[Z:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]]
+// CHECK-NEXT: call i8* @objc_initWeak(i8** [[Z]], i8* [[T0]])
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[Z]])
+// CHECK-NEXT: ret void
+
+void test3(id y) {
+  __weak id z;
+  z = y;
+}
+// CHECK-LABEL: define void @_Z5test3P11objc_object(
+// CHECK:      [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: [[Z:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: store i8* null, i8** [[Z]]
+// CHECK-NEXT: [[T0:%.*]] = load i8*, i8** [[Y]]
+// CHECK-NEXT: call i8* @objc_storeWeak(i8** [[Z]], i8* [[T0]])
+// CHECK-NEXT: call void @objc_destroyWeak(i8** [[Z]])
+// CHECK-NEXT: ret void
+
+void test4(__weak id *p) {
+  id y = *p;
+}
+// CHECK-LABEL: define void @_Z5test4PU6__weakP11objc_object(
+// CHECK:      [[P:%.*]] = alloca i8**,
+// CHECK-NEXT: [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8**, i8*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = call i8* @objc_loadWeak(i8** [[T0]])
+// CHECK-NEXT: store i8* [[T1]], i8** [[Y]]
+// CHECK-NEXT: ret void
+
+void test5(__weak id *p) {
+  id y = [*p retain];
+}
+// CHECK-LABEL: define void @_Z5test5PU6__weakP11objc_object
+// CHECK:      [[P:%.*]] = alloca i8**,
+// CHECK-NEXT: [[Y:%.*]] = alloca i8*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load i8**, i8*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = call i8* @objc_loadWeakRetained(i8** [[T0]])
+// CHECK-NEXT: store i8* [[T1]], i8** [[Y]]
+// CHECK-NEXT: ret void
+
+void test6(__weak Foo **p) {
+  Foo *y = [*p retain];
+}
+// CHECK-LABEL: define void @_Z5test6PU6__weakP3Foo
+// CHECK:      [[P:%.*]] = alloca [[FOO:%.*]]**,
+// CHECK-NEXT: [[Y:%.*]] = alloca [[FOO]]*,
+// CHECK-NEXT: store
+// CHECK-NEXT: [[T0:%.*]] = load [[FOO]]**, [[FOO]]*** [[P]]
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[FOO]]** [[T0]] to i8**
+// CHECK-NEXT: [[T2:%.*]] = call i8* @objc_loadWeakRetained(i8** [[T1]])
+// CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[FOO]]*
+// CHECK-NEXT: store [[FOO]]* [[T3]], [[FOO]]** [[Y]]
+// CHECK-NEXT: ret void
+
+extern "C" id get_object(void);
+extern "C" void use_block(void (^)(void));
+
+void test7(void) {
+  __weak Foo *p = get_object();
+  use_block(^{ [p run ]; });
+}
+// CHECK-LABEL: define void @_Z5test7v
+// CHECK:       [[P:%.*]] = alloca [[FOO]]*,
+// CHECK:       [[T0:%.*]] = call i8* @get_object()
+// CHECK-NEXT:  [[T1:%.*]] = bitcast i8* [[T0]] to [[FOO]]*
+// CHECK-NEXT:  [[T2:%.*]] = bitcast [[FOO]]** [[P]] to i8**
+// CHECK-NEXT:  [[T3:%.*]] = bitcast [[FOO]]* [[T1]] to i8*
+// CHECK-NEXT:  call i8* @objc_initWeak(i8** [[T2]], i8* [[T3]])
+// CHECK:       call void @objc_copyWeak
+// CHECK:       call void @use_block
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define internal void @__copy_helper_block
+// CHECK:       @objc_copyWeak
+
+// CHECK-LABEL: define internal void @__destroy_helper_block
+// CHECK:       @objc_destroyWeak
+
+void test8(void) {
+  __block __weak Foo *p = get_object();
+  use_block(^{ [p run ]; });
+}
+// CHECK-LABEL: define void @_Z5test8v
+// CHECK:       call i8* @objc_initWeak
+// CHECK-NOT:   call void @objc_copyWeak
+// CHECK:       call void @use_block
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define internal void @__Block_byref_object_copy
+// CHECK:       call void @objc_moveWeak
+
+// CHECK-LABEL: define internal void @__Block_byref_object_dispose
+// CHECK:       call void @objc_destroyWeak
+
+// CHECK-LABEL: define void @_Z14test9_baselinev()
+// CHECK:       define internal void @__copy_helper
+// CHECK:       define internal void @__destroy_helper
+void test9_baseline(void) {
+  Foo *p = get_object();
+  use_block(^{ [p run]; });
+}
+
+// CHECK-LABEL: define void @_Z5test9v()
+// CHECK-NOT:   define internal void @__copy_helper
+// CHECK-NOT:   define internal void @__destroy_helper
+// CHECK:       define void @_Z9test9_finv()
+void test9(void) {
+  __unsafe_unretained Foo *p = get_object();
+  use_block(^{ [p run]; });
+}
+void test9_fin() {}
+
+// CHECK-LABEL: define void @_Z6test10v()
+// CHECK-NOT:   define internal void @__copy_helper
+// CHECK-NOT:   define internal void @__destroy_helper
+// CHECK:       define void @_Z10test10_finv()
+void test10(void) {
+  typedef __unsafe_unretained Foo *UnsafeFooPtr;
+  UnsafeFooPtr p = get_object();
+  use_block(^{ [p run]; });
+}
+void test10_fin() {}
+
+// CHECK-LABEL: define weak_odr void @_Z6test11ILj0EEvv()
+// CHECK-NOT:   define internal void @__copy_helper
+// CHECK-NOT:   define internal void @__destroy_helper
+// CHECK:       define void @_Z10test11_finv()
+template <unsigned i> void test11(void) {
+  typedef __unsafe_unretained Foo *UnsafeFooPtr;
+  UnsafeFooPtr p = get_object();
+  use_block(^{ [p run]; });
+}
+template void test11<0>();
+void test11_fin() {}
diff --git a/test/CodeGenObjCXX/nested-ehlocation.mm b/test/CodeGenObjCXX/nested-ehlocation.mm
index de3e3597548e..030bc7c5cd3e 100644
--- a/test/CodeGenObjCXX/nested-ehlocation.mm
+++ b/test/CodeGenObjCXX/nested-ehlocation.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1  -triple x86_64-apple-macosx -emit-llvm -g -stdlib=libc++ -fblocks -fexceptions -x objective-c++ -o - %s | FileCheck %s
+// RUN: %clang_cc1  -triple x86_64-apple-macosx -emit-llvm -debug-info-kind=limited -stdlib=libc++ -fblocks -fexceptions -x objective-c++ -o - %s | FileCheck %s
 
 // Verify that all invoke instructions have a debug location.
 // Literally: There are no unwind lines that don't end with ", (!dbg 123)".
diff --git a/test/CodeGenObjCXX/personality-abuse.mm b/test/CodeGenObjCXX/personality-abuse.mm
new file mode 100644
index 000000000000..f5170bf75c09
--- /dev/null
+++ b/test/CodeGenObjCXX/personality-abuse.mm
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin10 -emit-llvm -fcxx-exceptions -fexceptions -fobjc-exceptions -o - %s | FileCheck %s
+
+extern "C" {
+  int __objc_personality_v0();
+}
+
+void *abuse_personality_func() {
+  return (void *)&__objc_personality_v0;
+}
+
+void foo() {
+  try {
+    throw 0;
+  } catch (int e) {
+    return;
+  }
+}
+
+// CHECK: define void @_Z3foov() #1 personality i8* bitcast (i32 ()* @__objc_personality_v0 to i8*)
diff --git a/test/CodeGenObjCXX/pr14474-gline-tables-only.mm b/test/CodeGenObjCXX/pr14474-gline-tables-only.mm
index e927ab96f3c1..6c74ce80db0f 100644
--- a/test/CodeGenObjCXX/pr14474-gline-tables-only.mm
+++ b/test/CodeGenObjCXX/pr14474-gline-tables-only.mm
@@ -1,6 +1,6 @@
 // PR 14474
 // RUN: %clang_cc1 -triple i386-apple-macosx10.6.0 -emit-llvm \
-// RUN:   -gline-tables-only -x objective-c++ -o /dev/null %s
+// RUN:   -debug-info-kind=line-tables-only -x objective-c++ -o /dev/null %s
 
 typedef signed char BOOL;
 @class NSInvocation, NSMethodSignature, NSCoder, NSString, NSEnumerator;
diff --git a/test/CodeGenObjCXX/property-lvalue-capture.mm b/test/CodeGenObjCXX/property-lvalue-capture.mm
index b800c39fb3c4..c5a753c8cf23 100644
--- a/test/CodeGenObjCXX/property-lvalue-capture.mm
+++ b/test/CodeGenObjCXX/property-lvalue-capture.mm
@@ -24,10 +24,10 @@ typedef Quad2<double> Quad2d;
 }
 @end
 
-// CHECK:   [[TWO:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, !invariant.load ![[MD_NUM:[0-9]+]]
+// CHECK:   [[TWO:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load ![[MD_NUM:[0-9]+]]
 // CHECK:   [[THREE:%.*]] = bitcast [[ONET:%.*]]* [[ONE:%.*]] to i8*
 // CHECK:   [[CALL:%.*]] = call nonnull %struct.Quad2* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %struct.Quad2* (i8*, i8*)*)(i8* [[THREE]], i8* [[TWO]])
-// CHECK:   [[FOUR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2, !invariant.load ![[MD_NUM]]
+// CHECK:   [[FOUR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2, align 8, !invariant.load ![[MD_NUM]]
 // CHECK:   [[FIVE:%.*]] = bitcast [[ONET]]* [[ZERO:%.*]] to i8*
 // CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.Quad2*)*)(i8* [[FIVE]], i8* [[FOUR]], %struct.Quad2* nonnull [[CALL]])
 
@@ -47,7 +47,7 @@ void test(C *c, const A &a) {
 }
 
 // CHECK:   [[ONE1:%.*]] = load %struct.A*, %struct.A** [[AADDR:%.*]], align 8
-// CHECK:   [[TWO1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.5, !invariant.load ![[MD_NUM]]
+// CHECK:   [[TWO1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.5, align 8, !invariant.load ![[MD_NUM]]
 // CHECK:   [[THREE1:%.*]] = bitcast [[TWOT:%.*]]* [[ZERO1:%.*]] to i8*
 // CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.A*)*)(i8* [[THREE1]], i8* [[TWO1]], %struct.A* dereferenceable({{[0-9]+}}) [[ONE1]])
 // CHECK:   store %struct.A* [[ONE1]], %struct.A** [[RESULT:%.*]], align 8
diff --git a/test/CodeGenObjCXX/property-object-conditional-exp.mm b/test/CodeGenObjCXX/property-object-conditional-exp.mm
index e3fc2d70962f..cdee635a6fbb 100644
--- a/test/CodeGenObjCXX/property-object-conditional-exp.mm
+++ b/test/CodeGenObjCXX/property-object-conditional-exp.mm
@@ -24,10 +24,10 @@ extern "C" bool CGRectIsEmpty(CGRect);
 
 // CHECK: [[SRC:%.*]] = call { i8*, i32 } bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
 // CHECK-NEXT: bitcast
-// CHECK-NEXT:getelementptr { i8*, i32 }, { i8*, i32 }* [[SRC:%.*]]
+// CHECK-NEXT:getelementptr inbounds { i8*, i32 }, { i8*, i32 }* [[SRC:%.*]]
 // CHECK-NEXT:extractvalue
 // CHECK-NEXT:store
-// CHECK-NEXT:getelementptr { i8*, i32 }, { i8*, i32 }* [[SRC:%.*]]
+// CHECK-NEXT:getelementptr inbounds { i8*, i32 }, { i8*, i32 }* [[SRC:%.*]]
 // CHECK-NEXT:extractvalue
 // CHECK-NEXT:store
   dataRect = CGRectIsEmpty(virtualBounds) ? self.bounds : virtualBounds;
diff --git a/test/CodeGenObjCXX/property-object-reference-2.mm b/test/CodeGenObjCXX/property-object-reference-2.mm
index 87cebc10836a..925cc52f9092 100644
--- a/test/CodeGenObjCXX/property-object-reference-2.mm
+++ b/test/CodeGenObjCXX/property-object-reference-2.mm
@@ -29,7 +29,7 @@ struct TCPPObject
   @synthesize MyProperty1 = _cppObject1;
 @end
 
-// CHECK-LABEL: define internal void @__copy_helper_atomic_property_(
+// CHECK-LABEL: define internal void @__copy_helper_atomic_property_(%struct.TCPPObject*, %struct.TCPPObject*) #
 // CHECK: [[TWO:%.*]] = load %struct.TCPPObject*, %struct.TCPPObject** [[ADDR:%.*]], align 8
 // CHECK: [[THREE:%.*]] = load %struct.TCPPObject*, %struct.TCPPObject** [[ADDR1:%.*]], align 8
 // CHECK: [[CALL:%.*]] = call i32 @_Z7DEFAULTv()
@@ -43,7 +43,7 @@ struct TCPPObject
 // CHECK: call void @objc_copyCppObjectAtomic(i8* [[THREE]], i8* [[TWO]], i8* bitcast (void (%struct.TCPPObject*, %struct.TCPPObject*)* @__copy_helper_atomic_property_ to i8*))
 // CHECK: ret void
 
-// CHECK-LABEL: define internal void @__assign_helper_atomic_property_(
+// CHECK-LABEL: define internal void @__assign_helper_atomic_property_(%struct.TCPPObject*, %struct.TCPPObject*) #
 // CHECK: [[TWO:%.*]] = load %struct.TCPPObject*, %struct.TCPPObject** [[ADDR:%.*]], align 8
 // CHECK: [[THREE:%.*]] = load %struct.TCPPObject*, %struct.TCPPObject** [[ADDR1:%.*]], align 8
 // CHECK: [[CALL:%.*]] = call dereferenceable({{[0-9]+}}) %struct.TCPPObject* @_ZN10TCPPObjectaSERKS_(%struct.TCPPObject* [[TWO]], %struct.TCPPObject* dereferenceable({{[0-9]+}}) [[THREE]])
diff --git a/test/CodeGenObjCXX/property-objects.mm b/test/CodeGenObjCXX/property-objects.mm
index 73ed21dcb7d8..8e0b6aebe149 100644
--- a/test/CodeGenObjCXX/property-objects.mm
+++ b/test/CodeGenObjCXX/property-objects.mm
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -emit-llvm -g -o - | FileCheck %s
+// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin10 -std=c++11 -emit-llvm -debug-info-kind=limited -o - | FileCheck %s
 
 class S {
 public:
@@ -37,7 +37,7 @@ struct CGRect {
 
 // Don't attach debug locations to the prologue instructions. These were
 // leaking over from the previous function emission by accident.
-// CHECK: define internal void @"\01-[I setBounds:]"
+// CHECK: define internal void @"\01-[I setBounds:]"({{.*}} {
 // CHECK-NOT: !dbg
 // CHECK: call void @llvm.dbg.declare
 - (void)setFrame:(CGRect)frameRect {}
@@ -91,3 +91,133 @@ struct X {
 void f(A* a) {
   a.x = X();
 }
+
+// rdar://21801088
+//   Ensure that pseudo-objecet expressions that require the RHS to be
+//   rewritten don't result in crashes or redundant emission of code.
+struct B0 { long long x; };
+struct B1 { long long x; }; B1 operator+(B1, B1);
+struct B2 { B1 x; };
+struct B3 { B3(); B1 x; operator B1(); };
+@interface B
+@property B0 b0;
+@property B1 b1;
+@property B2 b2;
+@property B3 b3;
+@end
+
+int b_makeInt();
+
+// Note that there's a promotion from int to long long, so
+// the syntactic form of the RHS will be bogus.
+void testB0(B *b) {
+  b.b0 = { b_makeInt() };
+}
+void testB1(B *b) {
+  b.b1 += { b_makeInt() };
+}
+// CHECK:    define void @_Z6testB0P1B([[B:%.*]]*
+// CHECK:      [[BVAR:%.*]] = alloca [[B]]*, align 8
+// CHECK:      [[TEMP:%.*]] = alloca [[B0:%.*]], align 8
+// CHECK:      load [[B]]*, [[B]]** [[BVAR]]
+// CHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[B0]], [[B0]]* [[TEMP]], i32 0, i32 0
+// CHECK-NEXT: [[T0:%.*]] = call i32 @_Z9b_makeIntv()
+// CHECK-NEXT: [[T1:%.*]] = sext i32 [[T0]] to i64
+// CHECK-NEXT: store i64 [[T1]], i64* [[X]], align 8
+// CHECK-NOT:  call
+// CHECK:      call void @llvm.memcpy
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      ret void
+
+// CHECK:    define void @_Z6testB1P1B([[B]]*
+// CHECK:      [[BVAR:%.*]] = alloca [[B]]*, align 8
+// CHECK:      load [[B]]*, [[B]]** [[BVAR]]
+// CHECK-NOT:  call
+// CHECK:      [[T0:%.*]] = call i64 bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      store i64 [[T0]],
+// CHECK-NOT:  call
+// CHECK:      [[T0:%.*]] = call i32 @_Z9b_makeIntv()
+// CHECK-NEXT: [[T1:%.*]] = sext i32 [[T0]] to i64
+// CHECK-NEXT: store i64 [[T1]], i64* {{.*}}, align 8
+// CHECK-NOT:  call
+// CHECK:      [[T0:%.*]] = call i64 @_Zpl2B1S_
+// CHECK-NOT:  call
+// CHECK:      store i64 [[T0]],
+// CHECK-NOT:  call
+// CHECK:      call void @llvm.memcpy
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      ret void
+
+// Another example of a conversion that needs to be applied
+// in the semantic form.
+void testB2(B *b) {
+  b.b2 = { B3() };
+}
+
+// CHECK:    define void @_Z6testB2P1B([[B]]*
+// CHECK:      [[BVAR:%.*]] = alloca [[B]]*, align 8
+// CHECK:      load [[B]]*, [[B]]** [[BVAR]]
+// CHECK-NOT:  call
+// CHECK:      call void @_ZN2B3C1Ev(
+// CHECK-NEXT: [[T0:%.*]] = call i64 @_ZN2B3cv2B1Ev(
+// CHECK-NOT:  call
+// CHECK:      store i64 [[T0]],
+// CHECK-NOT:  call
+// CHECK:      call void @llvm.memcpy
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      ret void
+
+// A similar test to B, but using overloaded function references.
+struct C1 {
+  int x;
+  friend C1 operator+(C1, void(&)());
+};
+@interface C
+@property void (*c0)();
+@property C1 c1;
+@end
+
+void c_helper();
+void c_helper(int);
+
+void testC0(C *c) {
+  c.c0 = c_helper;
+  c.c0 = &c_helper;
+}
+// CHECK:    define void @_Z6testC0P1C([[C:%.*]]*
+// CHECK:      [[CVAR:%.*]] = alloca [[C]]*, align 8
+// CHECK:      load [[C]]*, [[C]]** [[CVAR]]
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend {{.*}} @_Z8c_helperv
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend {{.*}} @_Z8c_helperv
+// CHECK-NOT:  call
+// CHECK:      ret void
+
+void testC1(C *c) {
+  c.c1 += c_helper;
+}
+// CHECK:    define void @_Z6testC1P1C([[C]]*
+// CHECK:      [[CVAR:%.*]] = alloca [[C]]*, align 8
+// CHECK:      load [[C]]*, [[C]]** [[CVAR]]
+// CHECK-NOT:  call
+// CHECK:      [[T0:%.*]] = call i32 bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      store i32 [[T0]],
+// CHECK-NOT:  call
+// CHECK:      [[T0:%.*]] = call i32 @_Zpl2C1RFvvE({{.*}} @_Z8c_helperv
+// CHECK-NOT:  call
+// CHECK:      store i32 [[T0]],
+// CHECK-NOT:  call
+// CHECK:      call void @llvm.memcpy
+// CHECK-NOT:  call
+// CHECK:      call void bitcast {{.*}} @objc_msgSend
+// CHECK-NOT:  call
+// CHECK:      ret void
diff --git a/test/CodeGenObjCXX/selector-expr-lvalue.mm b/test/CodeGenObjCXX/selector-expr-lvalue.mm
index 508ea83f0d51..bd726863110d 100644
--- a/test/CodeGenObjCXX/selector-expr-lvalue.mm
+++ b/test/CodeGenObjCXX/selector-expr-lvalue.mm
@@ -1,16 +1,23 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5  -emit-llvm -o - %s 
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5  -emit-llvm -o - %s | FileCheck %s
 // PR7390
 
-@interface NSObject {}
-- (void)respondsToSelector:(const SEL&)s : (SEL*)s1;
-- (void) setPriority:(int)p;
+// CHECK: @[[setprioname:[^ ]*]] = {{.*}}"setPriority:
+// CHECK-NEXT: @[[setpriosel:[^ ]*]] = {{.*}}getelementptr{{.*}}[[setprioname]]
+@interface NSObject
+- (void)respondsToSelector:(const SEL &)s ps:(SEL *)s1;
+- (void)setPriority:(int)p;
 - (void)Meth;
 @end
 
-@implementation  NSObject
+@implementation NSObject
+
+// CHECK-LABEL: define internal void @"\01-[NSObject Meth]"(
 - (void)Meth {
-    [self respondsToSelector:@selector(setPriority:) : &@selector(setPriority:)];
+// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8**, i8**)*){{.*}}, i8** @[[setpriosel]])
+  [self respondsToSelector:@selector(setPriority:) ps:&@selector(setPriority:)];
+}
+- (void)setPriority:(int)p {
+}
+- (void)respondsToSelector:(const SEL &)s ps:(SEL *)s1 {
 }
-- (void) setPriority:(int)p{}
-- (void)respondsToSelector:(const SEL&)s : (SEL*)s1 {}
 @end
diff --git a/test/CodeGenOpenCL/address-spaces.cl b/test/CodeGenOpenCL/address-spaces.cl
index e030c77d3e01..68fa02dadb26 100644
--- a/test/CodeGenOpenCL/address-spaces.cl
+++ b/test/CodeGenOpenCL/address-spaces.cl
@@ -1,27 +1,47 @@
-// RUN: %clang_cc1 %s -ffake-address-space-map -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefix=CL20
 
-void f__p(__private int *arg) { }
-// CHECK: i32* nocapture %arg
+// CHECK: i32* %arg
+void f__p(__private int *arg) {}
 
-void f__g(__global int *arg) { }
-// CHECK: i32 addrspace(1)* nocapture %arg
+// CHECK: i32 addrspace(1)* %arg
+void f__g(__global int *arg) {}
 
-void f__l(__local int *arg) { }
-// CHECK: i32 addrspace(2)* nocapture %arg
+// CHECK: i32 addrspace(2)* %arg
+void f__l(__local int *arg) {}
 
-void f__c(__constant int *arg) { }
-// CHECK: i32 addrspace(3)* nocapture %arg
+// CHECK: i32 addrspace(3)* %arg
+void f__c(__constant int *arg) {}
 
+// CHECK: i32* %arg
+void fp(private int *arg) {}
 
-void fp(private int *arg) { }
-// CHECK: i32* nocapture %arg
+// CHECK: i32 addrspace(1)* %arg
+void fg(global int *arg) {}
 
-void fg(global int *arg) { }
-// CHECK: i32 addrspace(1)* nocapture %arg
+// CHECK: i32 addrspace(2)* %arg
+void fl(local int *arg) {}
 
-void fl(local int *arg) { }
-// CHECK: i32 addrspace(2)* nocapture %arg
+// CHECK: i32 addrspace(3)* %arg
+void fc(constant int *arg) {}
 
-void fc(constant int *arg) { }
-// CHECK: i32 addrspace(3)* nocapture %arg
+#ifdef CL20
+int i;
+// CL20-DAG: @i = common addrspace(1) global i32 0
+int *ptr;
+// CL20-DAG: @ptr = common addrspace(1) global i32 addrspace(4)* null
+#endif
 
+// CHECK: i32* %arg
+// CL20-DAG: i32 addrspace(4)* %arg
+void f(int *arg) {
+
+  int i;
+// CHECK: %i = alloca i32,
+// CL20-DAG: %i = alloca i32,
+
+#ifdef CL20
+  static int ii;
+// CL20-DAG: @f.ii = internal addrspace(1) global i32 0
+#endif
+}
diff --git a/test/CodeGenOpenCL/bool_cast.cl b/test/CodeGenOpenCL/bool_cast.cl
new file mode 100644
index 000000000000..d63431b1b7ca
--- /dev/null
+++ b/test/CodeGenOpenCL/bool_cast.cl
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 | FileCheck %s
+
+typedef unsigned char uchar4 __attribute((ext_vector_type(4)));
+typedef unsigned int int4 __attribute((ext_vector_type(4)));
+
+void kernel ker() {
+  bool t = true;
+  int4 vec4 = (int4)t;
+// CHECK: {{%.*}} = load i8, i8* %t, align 1
+// CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
+// CHECK: {{%.*}} = sext i1 {{%.*}} to i32
+// CHECK: {{%.*}} = insertelement <4 x i32> undef, i32 {{%.*}}, i32 0
+// CHECK: {{%.*}} = shufflevector <4 x i32> {{%.*}}, <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK: store <4 x i32> {{%.*}}, <4 x i32>* %vec4, align 16
+  int i = (int)t;
+// CHECK: {{%.*}} = load i8, i8* %t, align 1
+// CHECK: {{%.*}} = trunc i8 {{%.*}} to i1
+// CHECK: {{%.*}} = zext i1 {{%.*}} to i32
+// CHECK: store i32 {{%.*}}, i32* %i, align 4
+
+  uchar4 vc;
+  vc = (uchar4)true;
+// CHECK: store <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, <4 x i8>* %vc, align 4
+  unsigned char c;
+  c = (unsigned char)true;
+// CHECK: store i8 1, i8* %c, align 1
+}
diff --git a/test/Coverage/codegen-next.m b/test/Coverage/codegen-next.m
index 7b907fea0e1e..5430cf28daf0 100644
--- a/test/Coverage/codegen-next.m
+++ b/test/Coverage/codegen-next.m
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-llvm -fobjc-exceptions -triple x86_64-apple-darwin -o %t %s
-// RUN: %clang_cc1 -g -emit-llvm -fobjc-exceptions -triple x86_64-apple-darwin -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -emit-llvm -fobjc-exceptions -triple x86_64-apple-darwin -o %t %s
 
 // An error could be seen for targeting x86_64-win32;
 //
diff --git a/test/Coverage/codegen.c b/test/Coverage/codegen.c
index 8e5195cc39ef..263e9b4c4e05 100644
--- a/test/Coverage/codegen.c
+++ b/test/Coverage/codegen.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm -o %t %s
 // RUN: %clang_cc1 -triple i386-unknown-unknown -emit-llvm-bc -o %t %s
-// RUN: %clang_cc1 -triple i386-unknown-unknown -g -emit-llvm-bc -o %t %s
+// RUN: %clang_cc1 -triple i386-unknown-unknown -debug-info-kind=limited -emit-llvm-bc -o %t %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm-bc -o %t %s
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -g -emit-llvm-bc -o %t %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -debug-info-kind=limited -emit-llvm-bc -o %t %s
 
 #include "c-language-features.inc"
diff --git a/test/Coverage/targets.c b/test/Coverage/targets.c
index 14b895ea1519..58c635ae74e1 100644
--- a/test/Coverage/targets.c
+++ b/test/Coverage/targets.c
@@ -1,19 +1,19 @@
-// RUN: %clang_cc1 -g -triple armv6-apple-darwin9 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple armv6-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple i686-apple-darwin9 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple i686-pc-linux-gnu -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple i686-unknown-dragonfly -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple i686-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple i686-unknown-win32 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple powerpc-apple-darwin9 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple powerpc-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple powerpc64-apple-darwin9 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple powerpc64-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple sparc-unknown-solaris -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple sparc-unknown-unknown -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple x86_64-apple-darwin9 -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple x86_64-pc-linux-gnu -emit-llvm -o %t %s
-// RUN: %clang_cc1 -g -triple x86_64-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple armv6-apple-darwin9 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple armv6-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple i686-apple-darwin9 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple i686-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple i686-unknown-dragonfly -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple i686-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple i686-unknown-win32 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple powerpc-apple-darwin9 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple powerpc-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple powerpc64-apple-darwin9 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple powerpc64-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple sparc-unknown-solaris -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple sparc-unknown-unknown -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple x86_64-apple-darwin9 -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple x86_64-pc-linux-gnu -emit-llvm -o %t %s
+// RUN: %clang_cc1 -debug-info-kind=limited -triple x86_64-unknown-unknown -emit-llvm -o %t %s
 
 // <rdar://problem/7181838> clang 1.0 fails to compile Python 2.6
 // RUN: %clang -target x86_64-apple-darwin9 -### -S %s -mmacosx-version-min=10.4
diff --git a/test/CoverageMapping/decl.c b/test/CoverageMapping/decl.c
new file mode 100644
index 000000000000..96ee30357a84
--- /dev/null
+++ b/test/CoverageMapping/decl.c
@@ -0,0 +1,15 @@
+// Ensure that declarations without definitions don't have maps emitted for them
+
+// RUN: %clang_cc1 -fprofile-instr-generate -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s > %t
+// FileCheck -input-file %t %s
+// RUN: FileCheck -check-prefix BAR -input-file %t %s
+
+// FOO: foo:
+// FOO-NOT: foo:
+inline int foo() { return 0; }
+extern inline int foo();
+
+// BAR: bar:
+// BAR-NOT: bar:
+int bar() { return 0; }
+extern int bar();
diff --git a/test/CoverageMapping/ir.c b/test/CoverageMapping/ir.c
index 5ac3495c41f8..4dbfb489f7ca 100644
--- a/test/CoverageMapping/ir.c
+++ b/test/CoverageMapping/ir.c
@@ -9,4 +9,4 @@ int main(void) {
   return 0;
 }
 
-// CHECK: @__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x <{ i8*, i32, i32, i64 }>], [{{[0-9]+}} x i8] } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0, [2 x <{ i8*, i32, i32, i64 }>] [<{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }>, <{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__llvm_profile_name_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }>]
+// CHECK: @__llvm_coverage_mapping = internal constant { i32, i32, i32, i32, [2 x <{ i8*, i32, i32, i64 }>], [{{[0-9]+}} x i8] } { i32 2, i32 {{[0-9]+}}, i32 {{[0-9]+}}, i32 0, [2 x <{ i8*, i32, i32, i64 }>] [<{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i32 3, i32 9, i64 {{[0-9]+}} }>, <{ i8*, i32, i32, i64 }> <{ i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_main, i32 0, i32 0), i32 4, i32 9, i64 {{[0-9]+}} }>]
diff --git a/test/CoverageMapping/trymacro.cpp b/test/CoverageMapping/trymacro.cpp
new file mode 100644
index 000000000000..949186d96159
--- /dev/null
+++ b/test/CoverageMapping/trymacro.cpp
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fexceptions -fcxx-exceptions -fprofile-instr-generate -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -main-file-name trymacro.cpp %s | FileCheck %s
+
+// CHECK: Z3fn1v:
+void fn1() try { return; } // CHECK: [[@LINE]]:12 -> [[@LINE+1]]:14 = #1
+catch(...) {}              // CHECK: [[@LINE]]:12 -> [[@LINE]]:14 = #2
+
+#define RETURN_BLOCK { return; }
+
+// CHECK: Z3fn2v:
+void fn2() try RETURN_BLOCK // CHECK: [[@LINE]]:12 -> [[@LINE+1]]:14 = #1
+catch(...) {}               // CHECK: [[@LINE]]:12 -> [[@LINE]]:14 = #2
+
+#define TRY try
+#define CATCH(x) catch (x)
+
+// CHECK: Z3fn3v:
+void fn3() TRY { return; } // CHECK: [[@LINE]]:15 -> [[@LINE+1]]:14 = #1
+CATCH(...) {}              // CHECK: [[@LINE]]:12 -> [[@LINE]]:14 = #2
+
+int main() {
+  fn1();
+  fn2();
+  fn3();
+}
diff --git a/test/CoverageMapping/unused_names.c b/test/CoverageMapping/unused_names.c
index d229492eca08..00941b863158 100644
--- a/test/CoverageMapping/unused_names.c
+++ b/test/CoverageMapping/unused_names.c
@@ -4,11 +4,11 @@
 
 // Since foo is never emitted, there should not be a profile name for it.
 
-// CHECK-DAG: @__llvm_profile_name_bar = {{.*}} [3 x i8] c"bar", section "{{.*}}__llvm_prf_names"
-// CHECK-DAG: @__llvm_profile_name_baz = {{.*}} [3 x i8] c"baz", section "{{.*}}__llvm_prf_names"
-// CHECK-DAG: @"__llvm_profile_name_unused_names.c:qux" = {{.*}} [18 x i8] c"unused_names.c:qux", section "{{.*}}__llvm_prf_names"
+// CHECK-DAG: @__profn_bar = {{.*}} [3 x i8] c"bar", section "{{.*}}__llvm_prf_names"
+// CHECK-DAG: @__profn_baz = {{.*}} [3 x i8] c"baz", section "{{.*}}__llvm_prf_names"
+// CHECK-DAG: @__profn_unused_names.c_qux = {{.*}} [18 x i8] c"unused_names.c:qux", section "{{.*}}__llvm_prf_names"
 
-// SYSHEADER-NOT: @__llvm_profile_name_foo =
+// SYSHEADER-NOT: @__profn_foo =
 
 
 #ifdef IS_SYSHEADER
diff --git a/tools/scan-view/Resources/GetRadarVersion.scpt b/test/Driver/Inputs/CUDA/usr/local/cuda/include/.keep
similarity index 100%
rename from tools/scan-view/Resources/GetRadarVersion.scpt
rename to test/Driver/Inputs/CUDA/usr/local/cuda/include/.keep
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/lib/.keep b/test/Driver/Inputs/CUDA/usr/local/cuda/lib/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/lib64/.keep b/test/Driver/Inputs/CUDA/usr/local/cuda/lib64/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc b/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_20.10.bc
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc b/test/Driver/Inputs/CUDA/usr/local/cuda/nvvm/libdevice/libdevice.compute_35.10.bc
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld.lld-link2 b/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld.lld-link2
new file mode 100755
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/bin/.keep b/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/bin/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v1/.keep b/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v1/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v2/.keep b/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/include/c++/v2/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/lib/.keep b/test/Driver/Inputs/basic_linux_libcxxv2_tree/usr/lib/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/bin/.keep b/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/bin/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/4.8/backward/.keep b/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/4.8/backward/.keep
new file mode 100755
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v1/.keep b/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v1/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v2/.keep b/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/include/c++/v2/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/lib/.keep b/test/Driver/Inputs/basic_linux_libstdcxx_libcxxv2_tree/usr/lib/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtbegin.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtend.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crti.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crti.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtn.o b/test/Driver/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/crtn.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep b/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/include/c++/4.8.2/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o b/test/Driver/Inputs/basic_myriad_tree/sparc-myriad-elf/lib/crt0.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-as b/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-as
deleted file mode 100755
index 331ef4a6bc06..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-as
+++ /dev/null
@@ -1 +0,0 @@
-# placeholder for testing purposes
\ No newline at end of file
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-gcc b/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-gcc
deleted file mode 100755
index 331ef4a6bc06..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-gcc
+++ /dev/null
@@ -1 +0,0 @@
-# placeholder for testing purposes
\ No newline at end of file
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-ld b/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-ld
deleted file mode 100755
index 331ef4a6bc06..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/bin/hexagon-ld
+++ /dev/null
@@ -1 +0,0 @@
-# placeholder for testing purposes
\ No newline at end of file
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/c++/4.4.0/ios b/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/c++/4.4.0/ios
deleted file mode 100644
index 777a4ec06213..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/c++/4.4.0/ios
+++ /dev/null
@@ -1 +0,0 @@
-// placeholder for testing purposes
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/stdio.h b/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/stdio.h
deleted file mode 100644
index 777a4ec06213..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/hexagon/include/stdio.h
+++ /dev/null
@@ -1 +0,0 @@
-// placeholder for testing purposes
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include-fixed/limits.h b/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include-fixed/limits.h
deleted file mode 100644
index 777a4ec06213..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include-fixed/limits.h
+++ /dev/null
@@ -1 +0,0 @@
-// placeholder for testing purposes
diff --git a/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include/stddef.h b/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include/stddef.h
deleted file mode 100644
index 777a4ec06213..000000000000
--- a/test/Driver/Inputs/hexagon_tree/gnu/lib/gcc/hexagon/4.4.0/include/stddef.h
+++ /dev/null
@@ -1 +0,0 @@
-// placeholder for testing purposes
diff --git a/test/Driver/Inputs/hexagon_tree/qc/bin/placeholder b/test/Driver/Inputs/hexagon_tree/qc/bin/placeholder
deleted file mode 100644
index 777a4ec06213..000000000000
--- a/test/Driver/Inputs/hexagon_tree/qc/bin/placeholder
+++ /dev/null
@@ -1 +0,0 @@
-// placeholder for testing purposes
diff --git a/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.a b/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.a
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.so b/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mips-r2-hard-musl/lib/linux/libclang_rt.builtins-mips.so
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.a b/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.a
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.so b/test/Driver/Inputs/mips_mti_linux/lib/clang/3.8.0/mipsel-r2-hard-musl/lib/linux/libclang_rt.builtins-mipsel.so
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crt1.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crt1.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crti.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crti.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crtn.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mips-r2-hard-musl/usr/lib/crtn.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crt1.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crt1.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crti.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crti.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crtn.o b/test/Driver/Inputs/mips_mti_linux/sysroot/mipsel-r2-hard-musl/usr/lib/crtn.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/multilib_arm_linux_tree/usr/include/arm-linux-gnueabi/.keep b/test/Driver/Inputs/multilib_arm_linux_tree/usr/include/arm-linux-gnueabi/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/multilib_armeb_linux_tree/usr/include/armeb-linux-gnueabi/.keep b/test/Driver/Inputs/multilib_armeb_linux_tree/usr/include/armeb-linux-gnueabi/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/multilib_armebhf_linux_tree/usr/include/armeb-linux-gnueabihf/.keep b/test/Driver/Inputs/multilib_armebhf_linux_tree/usr/include/armeb-linux-gnueabihf/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/multilib_armhf_linux_tree/usr/include/arm-linux-gnueabihf/.keep b/test/Driver/Inputs/multilib_armhf_linux_tree/usr/include/arm-linux-gnueabihf/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/resource_dir/asan_blacklist.txt b/test/Driver/Inputs/resource_dir/asan_blacklist.txt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/scei-ps4_tree/target/include/.keep b/test/Driver/Inputs/scei-ps4_tree/target/include/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/scei-ps4_tree/target/include_common/.keep b/test/Driver/Inputs/scei-ps4_tree/target/include_common/.keep
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crt1.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbegin.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtend.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crti.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/crtn.o
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1 b/test/Driver/Inputs/sparc-sun-solaris2.11/usr/lib/ld.so.1
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Driver/Xlinker-args.c b/test/Driver/Xlinker-args.c
index 87b238b3b9d8..a828f15b2b9a 100644
--- a/test/Driver/Xlinker-args.c
+++ b/test/Driver/Xlinker-args.c
@@ -3,17 +3,17 @@
 
 // RUN: %clang -target i386-apple-darwin9 -### \
 // RUN:   -Xlinker one -Xlinker --no-demangle \
-// RUN:   -Wl,two,--no-demangle,three -Xlinker four -z five %s 2> %t
+// RUN:   -Wl,two,--no-demangle,three -Xlinker four -z five -r %s 2> %t
 // RUN: FileCheck -check-prefix=DARWIN < %t %s
 //
 // RUN: %clang -target x86_64-pc-linux-gnu -### \
 // RUN:   -Xlinker one -Xlinker --no-demangle \
-// RUN:   -Wl,two,--no-demangle,three -Xlinker four -z five %s 2> %t
+// RUN:   -Wl,two,--no-demangle,three -Xlinker four -z five -r %s 2> %t
 // RUN: FileCheck -check-prefix=LINUX < %t %s
 //
 // DARWIN-NOT: --no-demangle
-// DARWIN: "one" "two" "three" "four" "-z" "five"
-// LINUX: "--no-demangle" "one" "two" "three" "four" "-z" "five"
+// DARWIN: "one" "two" "three" "four" "-z" "five" "-r"
+// LINUX: "--no-demangle" "one" "two" "three" "four" "-z" "five" "-r"
 
 // Check that we forward '-Xlinker' and '-Wl,' on Windows.
 // RUN: %clang -target i686-pc-win32 -### \
diff --git a/test/Driver/aarch64-cpus.c b/test/Driver/aarch64-cpus.c
index 4d4e1bcb693c..6355c2481828 100644
--- a/test/Driver/aarch64-cpus.c
+++ b/test/Driver/aarch64-cpus.c
@@ -1,18 +1,38 @@
 // Check target CPUs are correctly passed.
 
 // RUN: %clang -target aarch64 -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
+// RUN: %clang -target aarch64 -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
 // RUN: %clang -target aarch64 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
+// RUN: %clang -target aarch64 -mlittle-endian -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
 // RUN: %clang -target aarch64_be -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC %s
 // GENERIC: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERIC %s
+// RUN: %clang -target arm64 -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERIC %s
 // RUN: %clang -target arm64 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERIC %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu-generic -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-GENERIC %s
 
 // ARM64-GENERIC: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
 
 // RUN: %clang -target arm64-apple-darwin -arch arm64 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-DARWIN %s
 // ARM64-DARWIN: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cyclone"
 
+// RUN: %clang -target aarch64 -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// RUN: %clang -target aarch64_be -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35 %s
+// CA35: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a35"
+
+// RUN: %clang -target arm64 -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
+// RUN: %clang -target arm64 -mlittle-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
+// RUN: %clang -target arm64 -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
+// RUN: %clang -target arm64 -mlittle-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-CA35 %s
+// ARM64-CA35: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a35"
+
+
 // RUN: %clang -target aarch64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
 // RUN: %clang -target aarch64 -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
 // RUN: %clang -target aarch64_be -mlittle-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53 %s
@@ -59,6 +79,14 @@
 // RUN: %clang -target aarch64_be -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=GENERIC-BE %s
 // GENERIC-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic"
 
+// RUN: %clang -target aarch64_be -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64_be -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64 -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// RUN: %clang -target aarch64_be -mbig-endian -mtune=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CA35-BE %s
+// CA35-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "cortex-a35"
+
 // RUN: %clang -target aarch64_be -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
 // RUN: %clang -target aarch64 -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CA53-BE %s
@@ -109,6 +137,24 @@
 // RUN: %clang -target aarch64 -mbig-endian -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
 // RUN: %clang -target aarch64_be -mbig-endian -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
+
+// RUN: %clang -target aarch64 -march=armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// RUN: %clang -target aarch64 -march=armv8.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// RUN: %clang -target aarch64 -mlittle-endian -march=armv8.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.2a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// RUN: %clang -target aarch64_be -mlittle-endian -march=armv8.2-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A %s
+// GENERICV82A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.2a"
+
+// RUN: %clang -target aarch64 -march=armv8.2-a+fp16 -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A-FP16 %s
+// GENERICV82A-FP16: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.2a" "-target-feature" "+fullfp16"
+
+// RUN: %clang -target aarch64 -march=armv8.2-a+profile -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A-SPE %s
+// GENERICV82A-SPE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.2a" "-target-feature" "+spe"
+//
+// RUN: %clang -target aarch64 -march=armv8.2-a+fp16+profile -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV82A-FP16-SPE %s
+// GENERICV82A-FP16-SPE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.2a" "-target-feature" "+fullfp16" "-target-feature" "+spe"
+
 // ================== Check whether -march accepts mixed-case values.
 // RUN: %clang -target aarch64_be -march=ARMV8.1A -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
 // RUN: %clang -target aarch64_be -march=ARMV8.1-A -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
@@ -118,15 +164,19 @@
 // RUN: %clang -target aarch64_be -mbig-endian -march=ARMV8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV81A-BE %s
 // GENERICV81A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+neon" "-target-feature" "+v8.1a"
 
-// ================== Check whether -mcpu accepts mixed-case values.
+// ================== Check whether -mcpu and -mtune accept mixed-case values.
 // RUN: %clang -target aarch64 -mcpu=Cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA53 %s
+// RUN: %clang -target aarch64 -mtune=Cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA53 %s
 // CASE-INSENSITIVE-CA53: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a53"
 
 // RUN: %clang -target arm64 -mcpu=cortex-A53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA53 %s
+// RUN: %clang -target arm64 -mtune=cortex-A53 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA53 %s
 // CASE-INSENSITIVE-ARM64-CA53: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a53"
 
 // RUN: %clang -target aarch64 -mcpu=CORTEX-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA57 %s
+// RUN: %clang -target aarch64 -mtune=CORTEX-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-CA57 %s
 // CASE-INSENSITIVE-CA57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a57"
 
 // RUN: %clang -target arm64 -mcpu=Cortex-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA57 %s
+// RUN: %clang -target arm64 -mtune=Cortex-A57 -### -c %s 2>&1 | FileCheck -check-prefix=CASE-INSENSITIVE-ARM64-CA57 %s
 // CASE-INSENSITIVE-ARM64-CA57: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "cortex-a57"
diff --git a/test/Driver/aarch64-fixed-x18.c b/test/Driver/aarch64-fixed-x18.c
index 35d0caf03217..631865f9aa4a 100644
--- a/test/Driver/aarch64-fixed-x18.c
+++ b/test/Driver/aarch64-fixed-x18.c
@@ -1,4 +1,4 @@
 // RUN: %clang -target aarch64-none-gnu -ffixed-x18 -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-FIXED-X18 < %t %s
 
-// CHECK-FIXED-X18: "-backend-option" "-aarch64-reserve-x18"
+// CHECK-FIXED-X18: "-target-feature" "+reserve-x18"
diff --git a/test/Driver/amdgpu-toolchain.c b/test/Driver/amdgpu-toolchain.c
new file mode 100644
index 000000000000..41cef7f4c323
--- /dev/null
+++ b/test/Driver/amdgpu-toolchain.c
@@ -0,0 +1,3 @@
+// RUN: %clang -### -target amdgcn--amdhsa -x assembler -mcpu=kaveri %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
+// AS_LINK: clang{{.*}} "-cc1as"
+// AS_LINK: lld{{.*}} "-flavor" "old-gnu" "-target" "amdgcn--amdhsa"
diff --git a/test/Driver/apple-kext-mkernel.c b/test/Driver/apple-kext-mkernel.c
index 5f4f52246155..db3fbb333602 100644
--- a/test/Driver/apple-kext-mkernel.c
+++ b/test/Driver/apple-kext-mkernel.c
@@ -1,19 +1,24 @@
-// RUN: %clang -target x86_64-apple-darwin10 \
-// RUN:   -mkernel -### -fsyntax-only %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-X86 < %t %s
+// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s
+// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-X86 %s
 
 // CHECK-X86: "-disable-red-zone"
 // CHECK-X86: "-fno-builtin"
 // CHECK-X86: "-fno-rtti"
 // CHECK-X86: "-fno-common"
 
-// RUN: %clang -target x86_64-apple-darwin10 \
-// RUN:   -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2> %t
-// RUN: FileCheck --check-prefix=CHECK-ARM < %t %s
+// RUN: %clang -target x86_64-apple-darwin10 -mkernel -### -fsyntax-only -fbuiltin -fcommon %s 2>&1 | FileCheck --check-prefix=CHECK-X86-2 %s
+
+// CHECK-X86-2: "-disable-red-zone"
+// CHECK-X86-2-NOT: "-fno-builtin"
+// CHECK-X86-2: "-fno-rtti"
+// CHECK-X86-2-NOT: "-fno-common"
+
+// RUN: %clang -target x86_64-apple-darwin10 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
+// RUN: %clang -target x86_64-apple-darwin10 -arch armv7 -mkernel -mstrict-align -### -fsyntax-only -fbuiltin -fno-builtin -fcommon -fno-common %s 2>&1 | FileCheck --check-prefix=CHECK-ARM %s
 
 // CHECK-ARM: "-target-feature" "+long-calls"
-// CHECK-ARM: "-backend-option" "-arm-strict-align"
-// CHECK-ARM-NOT: "-backend-option" "-arm-strict-align"
+// CHECK-ARM: "-target-feature" "+strict-align"
+// CHECK-ARM-NOT: "-target-feature" "+strict-align"
 // CHECK-ARM: "-fno-builtin"
 // CHECK-ARM: "-fno-rtti"
 // CHECK-ARM: "-fno-common"
@@ -21,3 +26,7 @@
 // RUN: %clang -target x86_64-apple-darwin10 \
 // RUN:   -Werror -fno-builtin -fno-exceptions -fno-common -fno-rtti \
 // RUN:   -mkernel -fsyntax-only %s
+
+// RUN: %clang -c %s -target armv7k-apple-watchos -fapple-kext -mwatchos-version-min=1.0.0 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-WATCH
+// CHECK-WATCH-NOT: "-backend-option" "-arm-long-calls"
diff --git a/test/Driver/appletvos-version-min.c b/test/Driver/appletvos-version-min.c
new file mode 100644
index 000000000000..9ff8297fe9eb
--- /dev/null
+++ b/test/Driver/appletvos-version-min.c
@@ -0,0 +1,7 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: aarch64-registered-target
+// RUN: %clang -target i386-apple-darwin10 -mappletvsimulator-version-min=9.0 -arch x86_64 -S -o - %s | FileCheck %s
+// RUN: %clang -target armv7s-apple-darwin10 -mappletvos-version-min=9.0 -arch arm64 -S -o - %s | FileCheck %s
+
+int main() { return 0; }
+// CHECK: .tvos_version_min 9, 0
diff --git a/test/Driver/arch-armv7k.c b/test/Driver/arch-armv7k.c
new file mode 100644
index 000000000000..f5c33cfd9f58
--- /dev/null
+++ b/test/Driver/arch-armv7k.c
@@ -0,0 +1,13 @@
+// Tests that make sure armv7k is mapped to the correct CPU and ABI choices
+
+// RUN: %clang -target x86_64-apple-macosx10.9 -arch armv7k -c %s -### 2>&1 | FileCheck %s
+// CHECK: "-cc1"{{.*}} "-target-cpu" "cortex-a7"
+// CHECK-NOT: "-fsjlj-exceptions"
+
+// "thumbv7k-apple-ios" is a bit of a weird triple, but since the backend is
+// going to choose to use sjlj-based exceptions for it, the front-end needs to
+// match.
+
+// RUN: %clang -target x86_64-apple-macosx10.9 -arch armv7k -miphoneos-version-min=9.0 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-SJLJ
+// CHECK-SJLJ: "-cc1"{{.*}} "-target-cpu" "cortex-a7"
+// CHECK-SJLJ: "-fsjlj-exceptions"
diff --git a/test/Driver/arm-alignment.c b/test/Driver/arm-alignment.c
index 3fe595143f7d..3e2165203956 100644
--- a/test/Driver/arm-alignment.c
+++ b/test/Driver/arm-alignment.c
@@ -7,6 +7,18 @@
 // RUN: %clang -target arm-none-gnueabi -mno-unaligned-access -munaligned-access -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-UNALIGNED-ARM < %t %s
 
+// RUN: %clang -target armv6-apple-darwin -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-UNALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv6-netbsd-eabi -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-UNALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv7-unknown-linux -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-UNALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv7-unknown-nacl-gnueabihf -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-UNALIGNED-ARM < %t %s
+
 // RUN: %clang -target aarch64-none-gnueabi -munaligned-access -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-UNALIGNED-AARCH64 < %t %s
 
@@ -16,8 +28,8 @@
 // RUN: %clang -target aarch64-none-gnueabi -mno-unaligned-access -munaligned-access -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-UNALIGNED-AARCH64 < %t %s
 
-// CHECK-UNALIGNED-ARM: "-backend-option" "-arm-no-strict-align"
-// CHECK-UNALIGNED-AARCH64: "-backend-option" "-aarch64-no-strict-align"
+// CHECK-UNALIGNED-ARM-NOT: "-target-feature" "+strict-align"
+// CHECK-UNALIGNED-AARCH64-NOT: "-target-feature" "+strict-align"
 
 
 // RUN: %clang -target arm-none-gnueabi -mno-unaligned-access -### %s 2> %t
@@ -32,6 +44,27 @@
 // RUN: %clang -target arm-none-gnueabi -munaligned-access -mstrict-align -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
 
+// RUN: %clang -target arm-none-gnueabi -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv5-apple-darwin -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv5t-netbsd-eabi -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv6-unknown-linux -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv6-unknown-nacl-gnueabihf -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv6m-apple-darwin -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
+// RUN: %clang -target armv6m-netbsd-eabi -### %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-ALIGNED-ARM < %t %s
+
 // RUN: %clang -target aarch64-none-gnueabi -mno-unaligned-access -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ALIGNED-AARCH64 < %t %s
 
@@ -47,8 +80,8 @@
 // RUN: %clang -target aarch64-none-gnueabi -mkernel -mno-unaligned-access -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-ALIGNED-AARCH64 < %t %s
 
-// CHECK-ALIGNED-ARM: "-backend-option" "-arm-strict-align"
-// CHECK-ALIGNED-AARCH64: "-backend-option" "-aarch64-strict-align"
+// CHECK-ALIGNED-ARM: "-target-feature" "+strict-align"
+// CHECK-ALIGNED-AARCH64: "-target-feature" "+strict-align"
 
 // Make sure that v6M cores always trigger the unsupported aligned accesses error
 // for all supported architecture triples.
diff --git a/test/Driver/arm-compiler-rt.c b/test/Driver/arm-compiler-rt.c
new file mode 100644
index 000000000000..a7e947c37867
--- /dev/null
+++ b/test/Driver/arm-compiler-rt.c
@@ -0,0 +1,21 @@
+// RUN: %clang -target arm-linux-gnueabi -rtlib=compiler-rt -### %s 2>&1 | FileCheck %s -check-prefix ARM-GNUEABI
+// ARM-GNUEABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a"
+
+// RUN: %clang -target arm-linux-gnueabi -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 | FileCheck %s -check-prefix ARM-GNUEABI-ABI
+// ARM-GNUEABI-ABI: "{{.*[/\\]}}libclang_rt.builtins-armhf.a"
+
+// RUN: %clang -target arm-linux-gnueabihf -rtlib=compiler-rt -### %s 2>&1 | FileCheck %s -check-prefix ARM-GNUEABIHF
+// ARM-GNUEABIHF: "{{.*[/\\]}}libclang_rt.builtins-armhf.a"
+
+// RUN: %clang -target arm-linux-gnueabihf -rtlib=compiler-rt -mfloat-abi=soft -### %s 2>&1 | FileCheck %s -check-prefix ARM-GNUEABIHF-ABI
+// ARM-GNUEABIHF-ABI: "{{.*[/\\]}}libclang_rt.builtins-arm.a"
+
+// RUN: %clang -target arm-windows-itanium -rtlib=compiler-rt -### %s 2>&1 | FileCheck %s -check-prefix ARM-WINDOWS
+// ARM-WINDOWS: "{{.*[/\\]}}clang_rt.builtins-arm.lib"
+
+// RUN: %clang -target arm-linux-androideabi -rtlib=compiler-rt -### %s 2>&1 | FileCheck %s -check-prefix ARM-ANDROID
+// ARM-ANDROID: "{{.*[/\\]}}libclang_rt.builtins-arm-android.a"
+
+// RUN: %clang -target arm-linux-androideabi -rtlib=compiler-rt -mfloat-abi=hard -### %s 2>&1 | FileCheck %s -check-prefix ARM-ANDROIDHF
+// ARM-ANDROIDHF: "{{.*[/\\]}}libclang_rt.builtins-armhf-android.a"
+
diff --git a/test/Driver/arm-cortex-cpus.c b/test/Driver/arm-cortex-cpus.c
index ef3056deb5f8..c0492efb91d1 100644
--- a/test/Driver/arm-cortex-cpus.c
+++ b/test/Driver/arm-cortex-cpus.c
@@ -1,4 +1,16 @@
 // ================== Check default CPU on each major architecture
+// RUN: %clang -target arm -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-GENERIC %s
+// CHECK-GENERIC: "-cc1"{{.*}} "-triple" "armv4t-{{.*}} "-target-cpu" "generic"
+
+// RUN: %clang -target armeb -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-GENERIC %s
+// CHECK-BE-GENERIC: "-cc1"{{.*}} "-triple" "armebv4t-{{.*}} "-target-cpu" "generic"
+
+// RUN: %clang -target arm -mthumb -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-GENERIC-THUMB %s
+// CHECK-GENERIC-THUMB: "-cc1"{{.*}} "-triple" "thumbv4t-{{.*}} "-target-cpu" "generic"
+
+// RUN: %clang -target armeb -mthumb -mcpu=generic -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-GENERIC-THUMB %s
+// CHECK-BE-GENERIC-THUMB: "-cc1"{{.*}} "-triple" "thumbebv4t-{{.*}} "-target-cpu" "generic"
+
 // RUN: %clang -target armv4t -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V4T %s
 // RUN: %clang -target arm -march=armv4t -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V4T %s
 // CHECK-V4T: "-cc1"{{.*}} "-triple" "armv4t-{{.*}} "-target-cpu" "arm7tdmi"
@@ -45,11 +57,11 @@
 
 // FIXME %clang -target armv6j -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6J %s
 // RUN: %clang -target arm -march=armv6j -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6J %s
-// CHECK-V6J: "-cc1"{{.*}} "-triple" "armv6-{{.*}} "-target-cpu" "arm1136j-s"
+// CHECK-V6J: "-cc1"{{.*}} "-triple" "armv6-{{.*}} "-target-cpu" "arm1136jf-s"
 
 // FIXME %clang -target armv6j -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6J-THUMB %s
 // RUN: %clang -target arm -march=armv6j -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6J-THUMB %s
-// CHECK-V6J-THUMB: "-cc1"{{.*}} "-triple" "thumbv6-{{.*}} "-target-cpu" "arm1136j-s"
+// CHECK-V6J-THUMB: "-cc1"{{.*}} "-triple" "thumbv6-{{.*}} "-target-cpu" "arm1136jf-s"
 
 // FIXME %clang -target armv6z -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6Z %s
 // FIXME %clang -target arm -march=armv6z -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6Z %s
@@ -61,11 +73,11 @@
 
 // RUN: %clang -target armv6k -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6K %s
 // RUN: %clang -target arm -march=armv6k -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6K %s
-// CHECK-V6K: "-cc1"{{.*}} "-triple" "armv6k-{{.*}} "-target-cpu" "arm1176jzf-s"
+// CHECK-V6K: "-cc1"{{.*}} "-triple" "armv6k-{{.*}} "-target-cpu" "arm1176j-s"
 
 // RUN: %clang -target armv6k -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6K-THUMB %s
 // RUN: %clang -target arm -march=armv6k -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6K-THUMB %s
-// CHECK-V6K-THUMB: "-cc1"{{.*}} "-triple" "thumbv6k-{{.*}} "-target-cpu" "arm1176jzf-s"
+// CHECK-V6K-THUMB: "-cc1"{{.*}} "-triple" "thumbv6k-{{.*}} "-target-cpu" "arm1176j-s"
 
 // RUN: %clang -target armv6t2 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6T2 %s
 // RUN: %clang -target arm -march=armv6t2 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V6T2 %s
@@ -134,6 +146,18 @@
 // RUN: %clang -target arm -mlittle-endian -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A %s
 // CHECK-V8A: "-cc1"{{.*}} "-triple" "armv8-{{.*}}" "-target-cpu" "cortex-a53"
 
+// RUN: %clang -mcpu=generic -target armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target armv8 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// RUN: %clang -mcpu=generic -target arm -mlittle-endian -march=armv8-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V8A-GENERIC %s
+// CHECK-V8A-GENERIC: "-cc1"{{.*}} "-triple" "armv8-{{.*}}" "-target-cpu" "generic"
+
 // RUN: %clang -target armebv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V8A %s
 // RUN: %clang -target armeb -march=armebv8 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V8A %s
 // RUN: %clang -target armebv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V8A %s
@@ -173,6 +197,13 @@
 // RUN: %clang -target armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
 // RUN: %clang -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
 // RUN: %clang -target arm -mlittle-endian -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target armv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8.1-a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target arm -march=armv8.1a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
+// RUN: %clang -mcpu=generic -target arm -mlittle-endian -march=armv8.1-a -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-V81A %s
 // CHECK-V81A: "-cc1"{{.*}} "-triple" "armv8.1a-{{.*}}" "-target-cpu" "generic" "-target-feature" "+v8.1a"
 
 // RUN: %clang -target armebv8.1a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-V81A %s
@@ -218,12 +249,14 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1136jf-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6 %s
 // CHECK-CPUV6: "-cc1"{{.*}} "-triple" "armv6-{{.*}}
 
-// RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jz-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
-// RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jzf-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=mpcore -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=mpcorenovfp -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6K %s
 // CHECK-CPUV6K: "-cc1"{{.*}} "-triple" "armv6k-{{.*}}
 
+// RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jz-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6KZ %s
+// RUN: %clang -target arm-linux-gnueabi -mcpu=arm1176jzf-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6KZ %s
+// CHECK-CPUV6KZ: "-cc1"{{.*}} "-triple" "armv6kz-{{.*}}
+
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1156t2-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6T2 %s
 // RUN: %clang -target arm-linux-gnueabi -mcpu=arm1156t2f-s -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV6T2 %s
 // CHECK-CPUV6T2: "-cc1"{{.*}} "-triple" "armv6t2-{{.*}}
@@ -361,33 +394,41 @@
 // RUN: %clang -target arm-linux-gnueabi -mcpu=cortex-r7 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV7R-THUMB %s
 // CHECK-BE-CPUV7R-THUMB: "-cc1"{{.*}} "-triple" "thumbebv7r-{{.*}}
 
+// RUN: %clang -target arm -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
+// RUN: %clang -target arm -mcpu=cortex-a35 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mlittle-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A %s
 // CHECK-CPUV8A: "-cc1"{{.*}} "-triple" "armv8-{{.*}}
 
+// RUN: %clang -target armeb -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target armeb -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
+// RUN: %clang -target arm -mcpu=cortex-a35 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mbig-endian -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A %s
 // CHECK-BE-CPUV8A: "-cc1"{{.*}} "-triple" "armebv8-{{.*}}
 
+// RUN: %clang -target arm -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
+// RUN: %clang -target arm -mcpu=cortex-a35 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mlittle-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CPUV8A-THUMB %s
 // CHECK-CPUV8A-THUMB: "-cc1"{{.*}} "-triple" "thumbv8-{{.*}}
 
+// RUN: %clang -target armeb -mcpu=cortex-a35 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a53 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a57 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target armeb -mcpu=cortex-a72 -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
+// RUN: %clang -target arm -mcpu=cortex-a35 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a53 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a57 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
 // RUN: %clang -target arm -mcpu=cortex-a72 -mbig-endian -mthumb -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-BE-CPUV8A-THUMB %s
diff --git a/test/Driver/arm-features.c b/test/Driver/arm-features.c
new file mode 100644
index 000000000000..eb197da935a0
--- /dev/null
+++ b/test/Driver/arm-features.c
@@ -0,0 +1,13 @@
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+crc -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRC %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+crc -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRC %s
+// CHECK-CRC: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+crc"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+crypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-CRYPTO %s
+// CHECK-CRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "+crypto"
+
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrc -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrc -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRC %s
+// CHECK-NOCRC: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-crc"
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic+nocrypto -march=armv8a -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
+// RUN: %clang -target arm-none-none-eabi -mcpu=generic -march=armv8a+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-NOCRYPTO %s
+// CHECK-NOCRYPTO: "-cc1"{{.*}} "-triple" "armv8-{{.*}} "-target-cpu" "generic"{{.*}} "-target-feature" "-crypto"
diff --git a/test/Driver/arm-fixed-r9.c b/test/Driver/arm-fixed-r9.c
index 2cec8b4c4fc8..74ac336bd7f0 100644
--- a/test/Driver/arm-fixed-r9.c
+++ b/test/Driver/arm-fixed-r9.c
@@ -1,4 +1,4 @@
 // RUN: %clang -target arm-none-gnueabi -ffixed-r9 -### %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-FIXED-R9 < %t %s
 
-// CHECK-FIXED-R9: "-backend-option" "-arm-reserve-r9"
+// CHECK-FIXED-R9: "-target-feature" "+reserve-r9"
diff --git a/test/Driver/arm-float-abi.c b/test/Driver/arm-float-abi.c
new file mode 100644
index 000000000000..e5b42c96960f
--- /dev/null
+++ b/test/Driver/arm-float-abi.c
@@ -0,0 +1,6 @@
+// RUN: not %clang %s -target armv7-apple-ios -mfloat-abi=hard 2>&1 | FileCheck -check-prefix=ARMV7-ERROR %s
+// RUN: %clang %s -target armv7-apple-ios -mfloat-abi=softfp -### 2>&1 | FileCheck -check-prefix=NOERROR %s
+// RUN: %clang %s -arch armv7 -target thumbv7-apple-darwin-eabi -mfloat-abi=hard -### 2>&1 | FileCheck -check-prefix=NOERROR %s
+
+// ARMV7-ERROR: unsupported option '-mfloat-abi=hard' for target 'thumbv7'
+// NOERROR-NOT: unsupported option
diff --git a/test/Driver/arm-ias-Wa.s b/test/Driver/arm-ias-Wa.s
index 6eadd3b841d5..1483f902ff1d 100644
--- a/test/Driver/arm-ias-Wa.s
+++ b/test/Driver/arm-ias-Wa.s
@@ -62,3 +62,20 @@
 // RUN:   | FileCheck --check-prefix=CHECK-DUP-HDIV %s
 // CHECK-DUP-HDIV: "-target-feature" "-hwdiv-arm"
 // CHECK-DUP-HDIV: "-target-feature" "+hwdiv"
+
+// ========================================================== Triple
+// RUN: %clang -target armv7a-none-eabi -c %s -### 2>&1 \
+// RUN: %clang -target x86_64-apple-darwin -arch armv7 -c %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-A-PROFILE %s
+// CHECK-A-PROFILE: "-triple" "armv7-{{.*}}"
+
+// RUN: %clang -target armv7r-none-eabi -c %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-R-PROFILE %s
+// CHECK-R-PROFILE: "-triple" "armv7r-none--eabi"
+
+// RUN: %clang -target armv7m-none-eabi -c %s -### 2>&1 \
+// RUN: %clang -target thumbv7m-none-eabi -c %s -### 2>&1 \
+// RUN: %clang -target x86_64-apple-darwin -arch armv7m -c %s -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-M-PROFILE %s
+// CHECK-M-PROFILE: "-triple" "thumbv7m-{{.*}}"
+
diff --git a/test/Driver/arm-multilibs.c b/test/Driver/arm-multilibs.c
new file mode 100644
index 000000000000..bd9c80e8b16a
--- /dev/null
+++ b/test/Driver/arm-multilibs.c
@@ -0,0 +1,17 @@
+// RUN: %clang -target armv7-linux-gnueabi --sysroot=%S/Inputs/multilib_arm_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARM %s
+// RUN: %clang -target thumbv7-linux-gnueabi --sysroot=%S/Inputs/multilib_arm_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARM %s
+
+// RUN: %clang -target armv7-linux-gnueabihf --sysroot=%S/Inputs/multilib_armhf_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMHF %s
+// RUN: %clang -target thumbv7-linux-gnueabihf --sysroot=%S/Inputs/multilib_armhf_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMHF %s
+
+// RUN: %clang -target armv7eb-linux-gnueabi --sysroot=%S/Inputs/multilib_armeb_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMEB %s
+// RUN: %clang -target thumbv7eb-linux-gnueabi --sysroot=%S/Inputs/multilib_armeb_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMEB %s
+
+// RUN: %clang -target armv7eb-linux-gnueabihf --sysroot=%S/Inputs/multilib_armebhf_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMEBHF %s
+// RUN: %clang -target thumbv7eb-linux-gnueabihf --sysroot=%S/Inputs/multilib_armebhf_linux_tree -### -c %s -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-ARMEBHF %s
+
+// CHECK-ARM: "-internal-externc-isystem" "{{.*}}/usr/include/arm-linux-gnueabi"
+// CHECK-ARMHF: "-internal-externc-isystem" "{{.*}}/usr/include/arm-linux-gnueabihf"
+// CHECK-ARMEB: "-internal-externc-isystem" "{{.*}}/usr/include/armeb-linux-gnueabi"
+// CHECK-ARMEBHF: "-internal-externc-isystem" "{{.*}}/usr/include/armeb-linux-gnueabihf"
+
diff --git a/test/Driver/arm-no-movt.c b/test/Driver/arm-no-movt.c
new file mode 100644
index 000000000000..9684d0e16cce
--- /dev/null
+++ b/test/Driver/arm-no-movt.c
@@ -0,0 +1,9 @@
+// RUN: %clang -target armv7-apple-darwin -### %s 2>&1 \
+// RUN:    | FileCheck %s -check-prefix CHECK-DEFAULT
+
+// RUN: %clang -target armv7-apple-darwin -mkernel -### %s 2>&1 \
+// RUN:    | FileCheck %s -check-prefix CHECK-KERNEL
+
+// CHECK-DEFAULT-NOT: "-target-feature" "+no-movt"
+
+// CHECK-KERNEL: "-target-feature" "+no-movt"
diff --git a/test/Driver/as-default-dwarf.s b/test/Driver/as-default-dwarf.s
new file mode 100644
index 000000000000..9988203c14b3
--- /dev/null
+++ b/test/Driver/as-default-dwarf.s
@@ -0,0 +1,15 @@
+@ REQUIRES: arm-registered-target
+@ RUN: %clang --target=armv8a--linux-gnueabi -c %s -o %t
+@ RUN: llvm-objdump -t %t | FileCheck %s
+    .text
+    .type   foo,%function
+foo:
+    .fnstart
+    .cfi_startproc
+
+.Ltmp2:
+    .size   foo, .Ltmp2-foo
+    .cfi_endproc
+    .fnend
+    .cfi_sections .debug_frame
+@ CHECK: foo
diff --git a/test/Driver/as-options.s b/test/Driver/as-options.s
new file mode 100644
index 000000000000..405030fb371d
--- /dev/null
+++ b/test/Driver/as-options.s
@@ -0,0 +1,37 @@
+// PR21000: Test that -I is passed to both external and integrated assemblers.
+
+// RUN: %clang -target x86_64-linux-gnu -c -no-integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target x86_64-linux-gnu -c -no-integrated-as %s \
+// RUN:   -I foo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target x86_64-linux-gnu -c -integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target x86_64-linux-gnu -c -integrated-as %s \
+// RUN:   -I foo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// Other GNU targets
+
+// RUN: %clang -target aarch64-linux-gnu -c -no-integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target aarch64-linux-gnu -c -integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target armv7-linux-gnueabihf -c -no-integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// RUN: %clang -target armv7-linux-gnueabihf -c -integrated-as %s \
+// RUN:   -Ifoo_dir -### 2>&1 \
+// RUN:   | FileCheck %s
+
+// CHECK: "-I" "foo_dir"
diff --git a/test/Driver/biarch.c b/test/Driver/biarch.c
index 80c42feb7f8b..c2b2e4eef847 100644
--- a/test/Driver/biarch.c
+++ b/test/Driver/biarch.c
@@ -1,41 +1,33 @@
-// RUN: %clang -target i386--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "i386--netbsd"' %t
+// RUN: %clang -target i386--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=I386 %s
+// RUN: %clang -target x86_64--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=I386 %s
+// I386: "-cc1" "-triple" "i386--netbsd"
 
-// RUN: %clang -target i386--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "x86_64--netbsd"' %t
+// RUN: %clang -target i386--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=X86_64 %s
+// RUN: %clang -target x86_64--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=X86_64 %s
+// X86_64: "-cc1" "-triple" "x86_64--netbsd"
 
-// RUN: %clang -target x86_64--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "i386--netbsd"' %t
+// r196538 set arm1176jzf-s as default CPU for ARMv6 on NetBSD
+// RUN: %clang -target armv6--netbsd-eabihf -m32 %s -### 2>&1 | FileCheck -check-prefix=ARMV6 %s
+// ARMV6: "-cc1" "-triple" "armv6kz--netbsd-eabihf"
 
-// RUN: %clang -target x86_64--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "x86_64--netbsd"' %t
+// RUN: %clang -target sparcv9--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=SPARC %s
+// RUN: %clang -target sparc--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=SPARC %s
+// SPARC: "-cc1" "-triple" "sparc--netbsd"
 
-// RUN: %clang -target armv6--netbsd-eabihf -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "armv6k--netbsd-eabihf"' %t
+// RUN: %clang -target sparcv9--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=SPARCV9 %s
+// RUN: %clang -target sparc--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=SPARCV9 %s
+// SPARCV9: "-cc1" "-triple" "sparcv9--netbsd"
 
-// RUN: %clang -target sparcv9--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "sparc--netbsd"' %t
+// RUN: %clang -target sparc64--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=SPARC64 %s
+// SPARC64: "-cc1" "-triple" "sparc64--netbsd"
 
-// RUN: %clang -target sparcv9--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "sparcv9--netbsd"' %t
+// RUN: %clang -target sparcel -o foo %s -### 2>&1 | FileCheck -check-prefix=SPARCEL %s
+// SPARCEL: gcc{{(\.exe)?}}" "-EL" "-o" "foo"
 
-// RUN: %clang -target sparc64--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "sparc64--netbsd"' %t
+// RUN: %clang -target mips64--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=MIPS %s
+// RUN: %clang -target mips--netbsd -m32 %s -### 2>&1 | FileCheck -check-prefix=MIPS %s
+// MIPS: "-cc1" "-triple" "mips--netbsd"
 
-// RUN: %clang -target sparc--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "sparc--netbsd"' %t
-
-// RUN: %clang -target sparc--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "sparcv9--netbsd"' %t
-
-// RUN: %clang -target mips64--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "mips--netbsd"' %t
-
-// RUN: %clang -target mips64--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "mips64--netbsd"' %t
-
-// RUN: %clang -target mips--netbsd -m32 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "mips--netbsd"' %t
-
-// RUN: %clang -target mips--netbsd -m64 %s -### 2> %t
-// RUN: grep '"-cc1" "-triple" "mips64--netbsd"' %t
+// RUN: %clang -target mips64--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=MIPS64 %s
+// RUN: %clang -target mips--netbsd -m64 %s -### 2>&1 | FileCheck -check-prefix=MIPS64 %s
+// MIPS64: "-cc1" "-triple" "mips64--netbsd"
diff --git a/test/Driver/cl-eh.cpp b/test/Driver/cl-eh.cpp
index b43f31f83fc7..1745616ea9b1 100644
--- a/test/Driver/cl-eh.cpp
+++ b/test/Driver/cl-eh.cpp
@@ -1,14 +1,9 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
-// FIXME: When C++ EH works, we can make this flag turn things back on.
-
 // RUN: %clang_cl /c /EHsc -### -- %s 2>&1 | FileCheck -check-prefix=EHsc %s
-// EHsc-NOT: "-fcxx-exceptions"
-// EHsc-NOT: "-fexceptions"
+// EHsc: "-fcxx-exceptions"
+// EHsc: "-fexceptions"
 
 // RUN: %clang_cl /c /EHs-c- -### -- %s 2>&1 | FileCheck -check-prefix=EHs_c_ %s
 // EHs_c_-NOT: "-fcxx-exceptions"
@@ -19,12 +14,12 @@
 // EHs_EHc_-NOT: "-fexceptions"
 
 // RUN: %clang_cl /c /EHs- /EHs -### -- %s 2>&1 | FileCheck -check-prefix=EHs_EHs %s
-// EHs_EHs-NOT: "-fcxx-exceptions"
-// EHs_EHs-NOT: "-fexceptions"
+// EHs_EHs: "-fcxx-exceptions"
+// EHs_EHs: "-fexceptions"
 
 // RUN: %clang_cl /c /EHs- /EHsa -### -- %s 2>&1 | FileCheck -check-prefix=EHs_EHa %s
-// EHs_EHa-NOT: "-fcxx-exceptions"
-// EHs_EHa-NOT: "-fexceptions"
+// EHs_EHa: "-fcxx-exceptions"
+// EHs_EHa: "-fexceptions"
 
 // RUN: %clang_cl /c /EHinvalid -### -- %s 2>&1 | FileCheck -check-prefix=EHinvalid %s
 // EHinvalid: error: invalid value 'invalid' in '/EH'
diff --git a/test/Driver/cl-fallback.c b/test/Driver/cl-fallback.c
index bbc9ad84d6f3..e5ebde5c0182 100644
--- a/test/Driver/cl-fallback.c
+++ b/test/Driver/cl-fallback.c
@@ -1,11 +1,8 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
-// RUN: %clang_cl /fallback /Dfoo=bar /Ubaz /Ifoo /O0 /Ox /GR /GR- /Gy /Gy- \
-// RUN:   /Gw /Gw- /LD /LDd /EHs /EHs- /MD /MDd /MTd /MT /FImyheader.h /Zi \
+// RUN: %clang_cl --target=i686-pc-win32 /fallback /Dfoo=bar /Ubaz /Ifoo /O0 /Ox /GR /GR- /Gy /Gy- \
+// RUN:   /Gw /Gw- /LD /LDd /EHs /EHs- /Zl /MD /MDd /MTd /MT /FImyheader.h /Zi \
 // RUN:   -### -- %s 2>&1 \
 // RUN:   | FileCheck %s
 // CHECK: "-fdiagnostics-format" "msvc-fallback"
@@ -17,7 +14,12 @@
 // CHECK: "-D" "foo=bar"
 // CHECK: "-U" "baz"
 // CHECK: "-I" "foo"
-// CHECK: "/Ox"
+// CHECK: "/Oi"
+// CHECK: "/Og"
+// CHECK: "/Ot"
+// CHECK: "/Ob2"
+// CHECK: "/Oy"
+// CHECK: "/GF"
 // CHECK: "/GR-"
 // CHECK: "/Gy-"
 // CHECK: "/Gw-"
@@ -27,6 +29,7 @@
 // CHECK: "/LDd"
 // CHECK: "/EHs"
 // CHECK: "/EHs-"
+// CHECK: "/Zl"
 // CHECK: "/MT"
 // CHECK: "/Tc" "{{.*cl-fallback.c}}"
 // CHECK: "/Fo{{.*cl-fallback.*.obj}}"
@@ -38,18 +41,18 @@
 // RUN: %clang_cl /fallback /Od -### -- %s 2>&1 | FileCheck -check-prefix=O0 %s
 // O0: cl.exe
 // O0: "/Od"
-// RUN: %clang_cl /fallback /O1 -### -- %s 2>&1 | FileCheck -check-prefix=O1 %s
+// RUN: %clang_cl --target=i686-pc-win32 /fallback /O1 -### -- %s 2>&1 | FileCheck -check-prefix=O1 %s
 // O1: cl.exe
-// O1: "-O1"
-// RUN: %clang_cl /fallback /O2 -### -- %s 2>&1 | FileCheck -check-prefix=O2 %s
+// O1: "/Og" "/Os" "/Ob2" "/Oy" "/GF" "/Gy"
+// RUN: %clang_cl --target=i686-pc-win32 /fallback /O2 -### -- %s 2>&1 | FileCheck -check-prefix=O2 %s
 // O2: cl.exe
-// O2: "-O2"
-// RUN: %clang_cl /fallback /Os -### -- %s 2>&1 | FileCheck -check-prefix=Os %s
+// O2: "/Oi" "/Og" "/Ot" "/Ob2" "/Oy" "/GF" "/Gy"
+// RUN: %clang_cl --target=i686-pc-win32 /fallback /Os -### -- %s 2>&1 | FileCheck -check-prefix=Os %s
 // Os: cl.exe
-// Os: "-Os"
-// RUN: %clang_cl /fallback /Ox -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s
+// Os: "/Os"
+// RUN: %clang_cl --target=i686-pc-win32 /fallback /Ox -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s
 // Ox: cl.exe
-// Ox: "/Ox"
+// Ox: "/Oi" "/Og" "/Ot" "/Ob2" "/Oy" "/GF"
 
 // Only fall back when actually compiling, not for e.g. /P (preprocess).
 // RUN: %clang_cl /fallback /P -### -- %s 2>&1 | FileCheck -check-prefix=P %s
diff --git a/test/Driver/cl-inputs.c b/test/Driver/cl-inputs.c
index 632000990f07..c67fc24484ee 100644
--- a/test/Driver/cl-inputs.c
+++ b/test/Driver/cl-inputs.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
@@ -34,12 +31,13 @@
 // WARN: note: The last /TC or /TP option takes precedence over earlier instances
 // WARN-NOT: note
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl /c /TP cl-test.lib -### 2>&1 | FileCheck -check-prefix=TPlib %s
+// MSYS2_ARG_CONV_EXCL tells MSYS2 to skip conversion of the specified argument.
+// RUN: env LIB=%S/Inputs/cl-libs MSYS2_ARG_CONV_EXCL="/TP;/c" %clang_cl /c /TP cl-test.lib -### 2>&1 | FileCheck -check-prefix=TPlib %s
 // TPlib: warning: cl-test.lib: 'linker' input unused
 // TPlib: warning: argument unused during compilation: '/TP'
 // TPlib-NOT: cl-test.lib
 
-// RUN: env LIB=%S/Inputs/cl-libs %clang_cl /c /TC cl-test.lib -### 2>&1 | FileCheck -check-prefix=TClib %s
+// RUN: env LIB=%S/Inputs/cl-libs MSYS2_ARG_CONV_EXCL="/TC;/c" %clang_cl /c /TC cl-test.lib -### 2>&1 | FileCheck -check-prefix=TClib %s
 // TClib: warning: cl-test.lib: 'linker' input unused
 // TClib: warning: argument unused during compilation: '/TC'
 // TClib-NOT: cl-test.lib
diff --git a/test/Driver/cl-link-at-file.c b/test/Driver/cl-link-at-file.c
index f817ce523ad7..3a37e2f476e4 100644
--- a/test/Driver/cl-link-at-file.c
+++ b/test/Driver/cl-link-at-file.c
@@ -1,9 +1,6 @@
 // PR17239 - The /link option, when inside a response file, should only extend
 // until the end of the response file (and not the entire command line)
 
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by -- or bound to another option, otherwise it may
 // be interpreted as a command-line option, e.g. on Mac where %s is commonly
 // under /Users.
diff --git a/test/Driver/cl-link.c b/test/Driver/cl-link.c
index 8572b77a740f..9813c51d80af 100644
--- a/test/Driver/cl-link.c
+++ b/test/Driver/cl-link.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by -- or bound to another option, otherwise it may
 // be interpreted as a command-line option, e.g. on Mac where %s is commonly
 // under /Users.
diff --git a/test/Driver/cl-options.c b/test/Driver/cl-options.c
index a1145f106db0..c23aefea146d 100644
--- a/test/Driver/cl-options.c
+++ b/test/Driver/cl-options.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
@@ -84,7 +81,7 @@
 // J: -fno-signed-char
 
 // RUN: %clang_cl /Ofoo -### -- %s 2>&1 | FileCheck -check-prefix=O %s
-// O: -Ofoo
+// O: /Ofoo
 
 // RUN: %clang_cl /Ob0 -### -- %s 2>&1 | FileCheck -check-prefix=Ob0 %s
 // Ob0: -fno-inline
@@ -98,18 +95,32 @@
 // RUN: %clang_cl /Oi- -### -- %s 2>&1 | FileCheck -check-prefix=Oi_ %s
 // Oi_: -fno-builtin
 
-// RUN: %clang_cl /Os -### -- %s 2>&1 | FileCheck -check-prefix=Os %s
+// RUN: %clang_cl /Os --target=i686-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Os %s
+// RUN: %clang_cl /Os --target=x86_64-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Os %s
+// Os-NOT: -mdisable-fp-elim
+// Os: -momit-leaf-frame-pointer
 // Os: -Os
 
-// RUN: %clang_cl /Ot -### -- %s 2>&1 | FileCheck -check-prefix=Ot %s
+// RUN: %clang_cl /Ot --target=i686-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ot %s
+// RUN: %clang_cl /Ot --target=x86_64-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ot %s
+// Ot-NOT: -mdisable-fp-elim
+// Ot: -momit-leaf-frame-pointer
 // Ot: -O2
 
-// RUN: %clang_cl /Ox -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s
-// Ox: -O3
+// RUN: %clang_cl /Ox --target=i686-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s
+// RUN: %clang_cl /Ox --target=x86_64-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s
+// Ox-NOT: -mdisable-fp-elim
+// Ox: -momit-leaf-frame-pointer
+// Ox: -O2
+
+// RUN: %clang_cl --target=i686-pc-win32 /O2sy- -### -- %s 2>&1 | FileCheck -check-prefix=PR24003 %s
+// PR24003: -mdisable-fp-elim
+// PR24003: -momit-leaf-frame-pointer
+// PR24003: -Os
 
 // RUN: %clang_cl /Zs /Oy -- %s 2>&1
 
-// RUN: %clang_cl /Oy- -### -- %s 2>&1 | FileCheck -check-prefix=Oy_ %s
+// RUN: %clang_cl --target=i686-pc-win32 /Oy- -### -- %s 2>&1 | FileCheck -check-prefix=Oy_ %s
 // Oy_: -mdisable-fp-elim
 
 // RUN: %clang_cl /Qvec -### -- %s 2>&1 | FileCheck -check-prefix=Qvec %s
@@ -162,9 +173,10 @@
 // RUN: %clang_cl /W1 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
 // RUN: %clang_cl /W2 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
 // RUN: %clang_cl /W3 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
-// RUN: %clang_cl /W4 -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
-// RUN: %clang_cl /Wall -### -- %s 2>&1 | FileCheck -check-prefix=W1 %s
+// RUN: %clang_cl /W4 -### -- %s 2>&1 | FileCheck -check-prefix=W4 %s
+// RUN: %clang_cl /Wall -### -- %s 2>&1 | FileCheck -check-prefix=W4 %s
 // W1: -Wall
+// W4: -WCL4
 
 // RUN: %clang_cl /WX -### -- %s 2>&1 | FileCheck -check-prefix=WX %s
 // WX: -Werror
@@ -204,15 +216,17 @@
 // NOSTRICT: "-relaxed-aliasing"
 
 // For some warning ids, we can map from MSVC warning to Clang warning.
-// RUN: %clang_cl -wd4005 -wd4996 -wd4910 -### -- %s 2>&1 | FileCheck -check-prefix=Wno %s
+// RUN: %clang_cl -wd4005 -wd4100 -wd4910 -wd4996 -### -- %s 2>&1 | FileCheck -check-prefix=Wno %s
 // Wno: "-cc1"
 // Wno: "-Wno-macro-redefined"
-// Wno: "-Wno-deprecated-declarations"
+// Wno: "-Wno-unused-parameter"
 // Wno: "-Wno-dllexport-explicit-instantiation-decl"
+// Wno: "-Wno-deprecated-declarations"
 
 // Ignored options. Check that we don't get "unused during compilation" errors.
 // RUN: %clang_cl /c \
 // RUN:    /analyze- \
+// RUN:    /bigobj \
 // RUN:    /cgthreads4 \
 // RUN:    /cgthreads8 \
 // RUN:    /d2Zi+ \
@@ -289,6 +303,8 @@
 // RUN:     /Gr \
 // RUN:     /GS \
 // RUN:     /GT \
+// RUN:     /guard:cf \
+// RUN:     /guard:cf- \
 // RUN:     /GX \
 // RUN:     /Gv \
 // RUN:     /Gz \
@@ -352,12 +368,48 @@
 // RUN: %clang_cl /Zc:threadSafeInit /c -### -- %s 2>&1 | FileCheck -check-prefix=ThreadSafeStatics %s
 // ThreadSafeStatics-NOT: "-fno-threadsafe-statics"
 
+// RUN: %clang_cl /Zi /c -### -- %s 2>&1 | FileCheck -check-prefix=Zi %s
+// Zi: "-gcodeview"
+// Zi: "-debug-info-kind=line-tables-only"
+
+// RUN: %clang_cl /Z7 /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7 %s
+// Z7: "-gcodeview"
+// Z7: "-debug-info-kind=line-tables-only"
+
+// RUN: %clang_cl /c -### -- %s 2>&1 | FileCheck -check-prefix=BreproDefault %s
+// BreproDefault: "-mincremental-linker-compatible"
+
+// RUN: %clang_cl /Brepro- /Brepro /c '-###' -- %s 2>&1 | FileCheck -check-prefix=Brepro %s
+// Brepro: "-mincremental-linker-compatible"
+
+// RUN: %clang_cl /Brepro /Brepro- /c '-###' -- %s 2>&1 | FileCheck -check-prefix=Brepro_ %s
+// Brepro_-NOT: "-mincremental-linker-compatible"
+
+// This test was super sneaky: "/Z7" means "line-tables", but "-gdwarf" occurs
+// later on the command line, so it should win. Interestingly the cc1 arguments
+// came out right, but had wrong semantics, because an invariant assumed by
+// CompilerInvocation was violated: it expects that at most one of {gdwarfN,
+// line-tables-only} appear. If you assume that, then you can safely use
+// Args.hasArg to test whether a boolean flag is present without caring
+// where it appeared. And for this test, it appeared to the left of -gdwarf
+// which made it "win". This test could not detect that bug.
+// RUN: %clang_cl /Z7 -gdwarf /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7_gdwarf %s
+// Z7_gdwarf: "-gcodeview"
+// Z7_gdwarf: "-debug-info-kind=limited"
+// Z7_gdwarf: "-dwarf-version=4"
+
 // RUN: %clang_cl -fmsc-version=1800 -TP -### -- %s 2>&1 | FileCheck -check-prefix=CXX11 %s
 // CXX11: -std=c++11
 
 // RUN: %clang_cl -fmsc-version=1900 -TP -### -- %s 2>&1 | FileCheck -check-prefix=CXX14 %s
 // CXX14: -std=c++14
 
+// RUN: env CL="/Gy" %clang_cl -### -- %s 2>&1 | FileCheck -check-prefix=ENV-CL %s
+// ENV-CL: "-ffunction-sections"
+
+// RUN: env CL="/Gy" _CL_="/Gy- -- %s" %clang_cl -### 2>&1 | FileCheck -check-prefix=ENV-_CL_ %s
+// ENV-_CL_-NOT: "-ffunction-sections"
+
 // Accept "core" clang options.
 // (/Zs is for syntax-only, -Werror makes it fail hard on unknown options)
 // RUN: %clang_cl \
@@ -372,6 +424,10 @@
 // RUN:     -fno-strict-aliasing \
 // RUN:     -fstrict-aliasing \
 // RUN:     -fsyntax-only \
+// RUN:     -fms-compatibility \
+// RUN:     -fno-ms-compatibility \
+// RUN:     -fms-extensions \
+// RUN:     -fno-ms-extensions \
 // RUN:     -mllvm -disable-llvm-optzns \
 // RUN:     -Wunused-variable \
 // RUN:     -fmacro-backtrace-limit=0 \
diff --git a/test/Driver/cl-outputs.c b/test/Driver/cl-outputs.c
index 3d986db198a3..bf6b43172625 100644
--- a/test/Driver/cl-outputs.c
+++ b/test/Driver/cl-outputs.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
diff --git a/test/Driver/cl-response-file.c b/test/Driver/cl-response-file.c
new file mode 100644
index 000000000000..51ee00bff4ed
--- /dev/null
+++ b/test/Driver/cl-response-file.c
@@ -0,0 +1,10 @@
+// Test that we use the Windows tokenizer for clang-cl response files. The
+// trailing backslash before the space should be interpreted as a literal
+// backslash. PR23709
+
+
+
+// RUN: echo '/I%S\Inputs\cl-response-file\ /DFOO=2' > %t.rsp
+// RUN: %clang_cl /c -### @%t.rsp -- %s 2>&1 | FileCheck %s
+
+// CHECK: "-I" "{{.*}}\\Inputs\\cl-response-file\\" "-D" "FOO=2"
diff --git a/test/Driver/cl-runtime-flags.c b/test/Driver/cl-runtime-flags.c
index 8367531f9ccb..a54aa1a14aee 100644
--- a/test/Driver/cl-runtime-flags.c
+++ b/test/Driver/cl-runtime-flags.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
@@ -87,3 +84,12 @@
 
 // RUN: %clang_cl /MD /MT -### -- %s 2>&1 | FileCheck -check-prefix=MTOVERRIDE %s
 // MTOVERRIDE: "--dependent-lib=libcmt"
+
+// RUN: %clang_cl -### /Zl -- %s 2>&1 | FileCheck -check-prefix=CHECK-MTZl %s
+// RUN: %clang_cl -### /MT /Zl -- %s 2>&1 | FileCheck -check-prefix=CHECK-MTZl %s
+// CHECK-MTZl-NOT: "-D_DEBUG"
+// CHECK-MTZl: "-D_MT"
+// CHECK-MTZl-NOT: "-D_DLL"
+// CHECK-MTZl-SAME: "-D_VC_NODEFAULTLIB"
+// CHECK-MTZl-NOT: "--dependent-lib=libcmt"
+// CHECK-MTZl-NOT: "--dependent-lib=oldnames"
diff --git a/test/Driver/cl-x86-flags.c b/test/Driver/cl-x86-flags.c
index d4f7fb585975..32cd072ec1ef 100644
--- a/test/Driver/cl-x86-flags.c
+++ b/test/Driver/cl-x86-flags.c
@@ -1,5 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
 // REQUIRES: x86-registered-target
 
 // We support -m32 and -m64.  We support all x86 CPU feature flags in gcc's -m
diff --git a/test/Driver/cl-zc.cpp b/test/Driver/cl-zc.cpp
index 85eacffc02a9..26496293201e 100644
--- a/test/Driver/cl-zc.cpp
+++ b/test/Driver/cl-zc.cpp
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: %s must be preceded by --, otherwise it may be interpreted as a
 // command-line option, e.g. on Mac where %s is commonly under /Users.
 
diff --git a/test/Driver/cl.c b/test/Driver/cl.c
index 855cdc8c9701..34b4821b1a2f 100644
--- a/test/Driver/cl.c
+++ b/test/Driver/cl.c
@@ -1,6 +1,3 @@
-// Don't attempt slash switches on msys bash.
-// REQUIRES: shell-preserves-root
-
 // Note: we have to quote the /? option, otherwise some shells will try to
 // expand the ? into a one-letter filename in the root directory, and make
 // the test fail is such a file or directory exists.
diff --git a/test/Driver/clang-g-opts.c b/test/Driver/clang-g-opts.c
index 4a1e71908b9c..32b8d429ea1c 100644
--- a/test/Driver/clang-g-opts.c
+++ b/test/Driver/clang-g-opts.c
@@ -1,14 +1,24 @@
 // RUN: %clang -### -S %s        2>&1 | FileCheck --check-prefix=CHECK-WITHOUT-G %s
 // RUN: %clang -### -S %s -g -target x86_64-linux-gnu 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G %s
+
+// Assert that the toolchains which should default to a lower Dwarf version do so.
 // RUN: %clang -### -S %s -g -target x86_64-apple-darwin 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G-DWARF2 %s
 // RUN: %clang -### -S %s -g -target i686-pc-openbsd 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G-DWARF2 %s
 // RUN: %clang -### -S %s -g -target x86_64-pc-freebsd10.0 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G-DWARF2 %s
+
+// 'g0' is the default. Just sanity-test that it does nothing
 // RUN: %clang -### -S %s -g0    2>&1 | FileCheck --check-prefix=CHECK-WITHOUT-G %s
+
+// And check that the last of -g or -g0 wins.
 // RUN: %clang -### -S %s -g -g0 2>&1 | FileCheck --check-prefix=CHECK-WITHOUT-G %s
+
+// These should be semantically the same as not having given 'g0' at all,
+// as the last 'g' option wins.
+//
 // RUN: %clang -### -S %s -g0 -g -target x86_64-linux-gnu 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G %s
 // RUN: %clang -### -S %s -g0 -g -target x86_64-apple-darwin 2>&1 \
@@ -20,7 +30,7 @@
 // RUN: %clang -### -S %s -g0 -g -target i386-pc-solaris 2>&1 \
 // RUN:             | FileCheck --check-prefix=CHECK-WITH-G-DWARF2 %s
 
-// CHECK-WITHOUT-G-NOT: "-g"
-// CHECK-WITH-G: "-g"
-// CHECK-WITH-G-DWARF2: "-gdwarf-2"
-
+// CHECK-WITHOUT-G-NOT: -debug-info-kind
+// CHECK-WITH-G: "-debug-info-kind=limited"
+// CHECK-WITH-G: "-dwarf-version=4"
+// CHECK-WITH-G-DWARF2: "-dwarf-version=2"
diff --git a/test/Driver/clang-translation.c b/test/Driver/clang-translation.c
index 9db23a074d85..422aa139346d 100644
--- a/test/Driver/clang-translation.c
+++ b/test/Driver/clang-translation.c
@@ -266,5 +266,5 @@
 // RUN: FileCheck -check-prefix=MIPS64EL-ANDROID %s
 // MIPS64EL-ANDROID: clang
 // MIPS64EL-ANDROID: "-cc1"
-// MIPS64EL-ANDROID: "-target-cpu" "mips64r2"
+// MIPS64EL-ANDROID: "-target-cpu" "mips64r6"
 // MIPS64EL-ANDROID: "-mfloat-abi" "hard"
diff --git a/test/Driver/clang_f_opts.c b/test/Driver/clang_f_opts.c
index 003430174b61..25a1930bdd6d 100644
--- a/test/Driver/clang_f_opts.c
+++ b/test/Driver/clang_f_opts.c
@@ -86,10 +86,25 @@
 // RUN: %clang -### -S -fprofile-generate=dir -fprofile-use=dir %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s
 // RUN: %clang -### -S -fprofile-generate=dir -fprofile-instr-use %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s
 // RUN: %clang -### -S -fprofile-generate=dir -fprofile-instr-use=file %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MIX-GEN-USE %s
+// RUN: %clang -### -S -fprofile-instr-generate=file -fno-profile-instr-generate %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-GEN %s
+// RUN: %clang -### -S -fprofile-instr-generate=file -fno-profile-generate %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-GEN %s
+// RUN: %clang -### -S -fprofile-generate=dir -fno-profile-generate %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-GEN %s
+// RUN: %clang -### -S -fprofile-generate=dir -fno-profile-instr-generate %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-GEN %s
+// RUN: %clang -### -S -fprofile-instr-use=file -fno-profile-instr-use %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-USE %s
+// RUN: %clang -### -S -fprofile-instr-use=file -fno-profile-use %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-USE %s
+// RUN: %clang -### -S -fprofile-use=file -fno-profile-use %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-USE %s
+// RUN: %clang -### -S -fprofile-use=file -fno-profile-instr-use %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-USE %s
+// RUN: %clang -### -S -fcoverage-mapping %s 2>&1 | FileCheck -check-prefix=CHECK-COVERAGE-AND-GEN %s
+// RUN: %clang -### -S -fcoverage-mapping -fno-coverage-mapping %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-COVERAGE %s
+// RUN: %clang -### -S -fprofile-instr-generate -fcoverage-mapping -fno-coverage-mapping %s 2>&1 | FileCheck -check-prefix=CHECK-DISABLE-COVERAGE %s
 // CHECK-PROFILE-GENERATE: "-fprofile-instr-generate"
 // CHECK-PROFILE-GENERATE-DIR: "-fprofile-instr-generate=/some/dir{{/|\\\\}}default.profraw"
 // CHECK-PROFILE-GENERATE-FILE: "-fprofile-instr-generate=/tmp/somefile.profraw"
 // CHECK-NO-MIX-GEN-USE: '{{[a-z=-]*}}' not allowed with '{{[a-z=-]*}}'
+// CHECK-DISABLE-GEN-NOT: "-fprofile-instr-generate"
+// CHECK-DISABLE-USE-NOT: "-fprofile-instr-use"
+// CHECK-COVERAGE-AND-GEN: '-fcoverage-mapping' only allowed with '-fprofile-instr-generate'
+// CHECK-DISABLE-COVERAGE-NOT: "-fcoverage-mapping"
 
 // RUN: %clang -### -S -fprofile-use %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-USE %s
 // RUN: %clang -### -S -fprofile-instr-use %s 2>&1 | FileCheck -check-prefix=CHECK-PROFILE-USE %s
@@ -358,7 +373,7 @@
 // CHECK-WARNING-DAG: optimization flag '-ftracer' is not supported
 // CHECK-WARNING-DAG: optimization flag '-funroll-all-loops' is not supported
 // CHECK-WARNING-DAG: optimization flag '-funswitch-loops' is not supported
-// CHECK-WARNING-DAG: optimization flag '-flto=1' is not supported
+// CHECK-WARNING-DAG: unsupported argument '1' to option 'flto='
 // CHECK-WARNING-DAG: optimization flag '-falign-labels' is not supported
 // CHECK-WARNING-DAG: optimization flag '-falign-labels=100' is not supported
 // CHECK-WARNING-DAG: optimization flag '-falign-loops' is not supported
@@ -412,6 +427,19 @@
 // CHECK-NO-WARNING1-NOT: optimization flag '-finline-limit=1000' is not supported
 // CHECK-NO-WARNING2-NOT: optimization flag '-finline-limit' is not supported
 
+// Test that an ignored optimization argument only prints 1 warning,
+// not both a warning about not claiming the arg, *and* about not supporting
+// the arg; and that adding -Wno-ignored-optimization silences the warning.
+//
+// RUN: %clang -### -fprofile-correction %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-WARNING3 %s
+// CHECK-NO-WARNING3: optimization flag '-fprofile-correction' is not supported
+// CHECK-NO-WARNING3-NOT: argument unused
+// RUN: %clang -### -fprofile-correction -Wno-ignored-optimization-argument %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-WARNING4 %s
+// CHECK-NO-WARNING4-NOT: not supported
+// CHECK-NO-WARNING4-NOT: argument unused
+
 // RUN: %clang -### -S -fsigned-char %s 2>&1 | FileCheck -check-prefix=CHAR-SIGN1 %s
 // CHAR-SIGN1-NOT: -fno-signed-char
 
diff --git a/test/Driver/coverage_no_integrated_as.c b/test/Driver/coverage_no_integrated_as.c
index 6f85b422d994..9a0a0c203e7d 100644
--- a/test/Driver/coverage_no_integrated_as.c
+++ b/test/Driver/coverage_no_integrated_as.c
@@ -17,7 +17,7 @@
 // RUN: %clang -### -c -fprofile-arcs -no-integrated-as %s -o foo/bar.o 2>&1 | FileCheck -check-prefix=CHECK-GCNO-LOCATION-REL-PATH %s
 
 
-// CHECK-GCNO-DEFAULT-LOCATION: "-coverage-file" "{{.*}}/coverage_no_integrated_as.c"
+// CHECK-GCNO-DEFAULT-LOCATION: "-coverage-file" "{{.*}}{{/|\\\\}}coverage_no_integrated_as.c"
 // CHECK-GCNO-DEFAULT-LOCATION-NOT: "-coverage-file" "/tmp/{{.*}}/coverage_no_integrated_as.c"
-// CHECK-GCNO-LOCATION: "-coverage-file" "/foo/bar.o"
-// CHECK-GCNO-LOCATION-REL-PATH: "-coverage-file" "{{.*}}/foo/bar.o"
+// CHECK-GCNO-LOCATION: "-coverage-file" "{{.*}}/foo/bar.o"
+// CHECK-GCNO-LOCATION-REL-PATH: "-coverage-file" "{{.*}}{{/|\\\\}}foo/bar.o"
diff --git a/test/Driver/crash-report-modules.m b/test/Driver/crash-report-modules.m
index 0e1d81a1f340..16af75ca700b 100644
--- a/test/Driver/crash-report-modules.m
+++ b/test/Driver/crash-report-modules.m
@@ -1,9 +1,9 @@
 // RUN: rm -rf %t
-// RUN: mkdir %t
+// RUN: mkdir -p %t/i %t/m %t
 
 // RUN: not env FORCE_CLANG_DIAGNOSTICS_CRASH= TMPDIR=%t TEMP=%t TMP=%t \
-// RUN: %clang -fsyntax-only %s -I %S/Inputs/module -isysroot /tmp/     \
-// RUN: -fmodules -fmodules-cache-path=/tmp/ -DFOO=BAR 2>&1 | FileCheck %s
+// RUN: %clang -fsyntax-only %s -I %S/Inputs/module -isysroot %/t/i/    \
+// RUN: -fmodules -fmodules-cache-path=%t/m/ -DFOO=BAR 2>&1 | FileCheck %s
 
 // RUN: FileCheck --check-prefix=CHECKSRC %s -input-file %t/crash-report-*.m
 // RUN: FileCheck --check-prefix=CHECKSH %s -input-file %t/crash-report-*.sh
@@ -30,8 +30,8 @@ const int x = MODULE_MACRO;
 // CHECKSH-SAME: "-D" "FOO=BAR"
 // CHECKSH-NEXT: # Original command: {{.*$}}
 // CHECKSH-NEXT: "-cc1"
-// CHECKSH: "-isysroot" "/tmp/"
+// CHECKSH: "-isysroot" "{{[^"]*}}/i/"
 // CHECKSH: "-D" "FOO=BAR"
-// CHECKSH-NOT: "-fmodules-cache-path=/tmp/"
+// CHECKSH-NOT: "-fmodules-cache-path="
 // CHECKSH: "crash-report-modules-{{[^ ]*}}.m"
 // CHECKSH: "-ivfsoverlay" "crash-report-modules-{{[^ ]*}}.cache/vfs/vfs.yaml"
diff --git a/test/Driver/crash-report.c b/test/Driver/crash-report.c
index 2ff5b44f5e52..a3f1f9e72a57 100644
--- a/test/Driver/crash-report.c
+++ b/test/Driver/crash-report.c
@@ -1,10 +1,13 @@
 // RUN: rm -rf %t
 // RUN: mkdir %t
-// RUN: not env TMPDIR=%t TEMP=%t TMP=%t RC_DEBUG_OPTIONS=1 %clang -fsyntax-only %s \
-// RUN:  -F/tmp/ -I /tmp/ -idirafter /tmp/ -iquote /tmp/ -isystem /tmp/ \
+// RUN: not env TMPDIR=%t TEMP=%t TMP=%t RC_DEBUG_OPTIONS=1              \
+// RUN:  CC_PRINT_HEADERS=1 CC_LOG_DIAGNOSTICS=1                         \
+// RUN:  %clang -fsyntax-only %s                                         \
+// RUN:  -F/tmp/ -I /tmp/ -idirafter /tmp/ -iquote /tmp/ -isystem /tmp/  \
 // RUN:  -iprefix /the/prefix -iwithprefix /tmp -iwithprefixbefore /tmp/ \
 // RUN:  -Xclang -internal-isystem -Xclang /tmp/                         \
 // RUN:  -Xclang -internal-externc-isystem -Xclang /tmp/                 \
+// RUN:  -Xclang -main-file-name -Xclang foo.c                           \
 // RUN:  -DFOO=BAR -DBAR="BAZ QUX" 2>&1 | FileCheck %s
 // RUN: cat %t/crash-report-*.c | FileCheck --check-prefix=CHECKSRC %s
 // RUN: cat %t/crash-report-*.sh | FileCheck --check-prefix=CHECKSH %s
@@ -25,6 +28,8 @@ FOO
 // CHECKSH-NEXT: # Original command: {{.*$}}
 // CHECKSH-NEXT: "-cc1"
 // CHECKSH: "-main-file-name" "crash-report.c"
+// CHECKSH-NOT: "-header-include-file"
+// CHECKSH-NOT: "-diagnostic-log-file"
 // CHECKSH: "-D" "FOO=BAR"
 // CHECKSH: "-D" "BAR=BAZ QUX"
 // CHECKSH-NOT: "-F/tmp/"
diff --git a/test/Driver/cuda-detect.cu b/test/Driver/cuda-detect.cu
new file mode 100644
index 000000000000..d8fba06605e9
--- /dev/null
+++ b/test/Driver/cuda-detect.cu
@@ -0,0 +1,64 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+//
+// # Check that we properly detect CUDA installation.
+// RUN: %clang -v --target=i386-unknown-linux \
+// RUN:   --sysroot=%S/no-cuda-there 2>&1 | FileCheck %s -check-prefix NOCUDA
+// RUN: %clang -v --target=i386-unknown-linux \
+// RUN:   --sysroot=%S/Inputs/CUDA 2>&1 | FileCheck %s
+// RUN: %clang -v --target=i386-unknown-linux \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda 2>&1 | FileCheck %s
+
+// Make sure we map libdevice bitcode files to proper GPUs.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_21 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE21
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix CUDAINC \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
+// Verify that -nocudainc prevents adding include path to CUDA headers.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   -nocudainc --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC \
+// RUN:     -check-prefix LIBDEVICE -check-prefix LIBDEVICE35
+// We should not add any CUDA include paths if there's no valid CUDA installation
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-path=%S/no-cuda-there %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOCUDAINC
+
+// Verify that no options related to bitcode linking are passes if
+// there's no bitcode file.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_30 \
+// RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
+// .. or if we explicitly passed -nocudalib
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   -nocudalib --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON -check-prefix NOLIBDEVICE
+// Verify that we don't add include paths, link with libdevice or
+// -include __clang_cuda_runtime_wrapper.h without valid CUDA installation.
+// RUN: %clang -### -v --target=i386-unknown-linux --cuda-gpu-arch=sm_35 \
+// RUN:   --cuda-path=%S/no-cuda-there %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix COMMON \
+// RUN:     -check-prefix NOCUDAINC -check-prefix NOLIBDEVICE
+
+// CHECK: Found CUDA installation: {{.*}}/Inputs/CUDA/usr/local/cuda
+// NOCUDA-NOT: Found CUDA installation:
+
+// COMMON: "-triple" "nvptx-nvidia-cuda"
+// COMMON-SAME: "-fcuda-is-device"
+// LIBDEVICE-SAME: "-mlink-cuda-bitcode"
+// NOLIBDEVICE-NOT: "-mlink-cuda-bitcode"
+// LIBDEVICE21-SAME: libdevice.compute_20.10.bc
+// LIBDEVICE35-SAME: libdevice.compute_35.10.bc
+// NOLIBDEVICE-NOT: libdevice.compute_{{.*}}.bc
+// LIBDEVICE-SAME: "-target-feature" "+ptx42"
+// NOLIBDEVICE-NOT: "-target-feature" "+ptx42"
+// CUDAINC-SAME: "-internal-isystem" "{{.*}}/Inputs/CUDA/usr/local/cuda/include"
+// NOCUDAINC-NOT: "-internal-isystem" "{{.*}}/cuda/include"
+// CUDAINC-SAME: "-include" "__clang_cuda_runtime_wrapper.h"
+// NOCUDAINC-NOT: "-include" "__clang_cuda_runtime_wrapper.h"
+// COMMON-SAME: "-x" "cuda"
diff --git a/test/Driver/cuda-options.cu b/test/Driver/cuda-options.cu
index 65d4913a159e..21625259d3ef 100644
--- a/test/Driver/cuda-options.cu
+++ b/test/Driver/cuda-options.cu
@@ -6,7 +6,7 @@
 // Simple compilation case:
 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
 // Compile device-side to PTX assembly and make sure we use it on the host side.
-// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS\
 // Then compile host side and incorporate device code.
 // RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
 // Make sure we don't link anything.
@@ -15,25 +15,43 @@
 // Typical compilation + link case:
 // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \
 // Compile device-side to PTX assembly and make sure we use it on the host side
-// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS\
 // Then compile host side and incorporate device code.
 // RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
 // Then link things.
 // RUN:   -check-prefix CUDA-L %s
 
-// Verify that -cuda-no-device disables device-side compilation and linking
+// Verify that --cuda-host-only disables device-side compilation and linking
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \
 // Make sure we didn't run device-side compilation.
 // RUN:   | FileCheck -check-prefix CUDA-ND \
 // Then compile host side and make sure we don't attempt to incorporate GPU code.
 // RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
-// Make sure we don't link anything.
-// RUN:    -check-prefix CUDA-NL %s
+// Linking is allowed to happen, even if we're missing GPU code.
+// RUN:    -check-prefix CUDA-L %s
 
-// Verify that -cuda-no-host disables host-side compilation and linking
+// Same test as above, but with preceeding --cuda-device-only to make
+// sure only last option has effect.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only --cuda-host-only %s 2>&1 \
+// Make sure we didn't run device-side compilation.
+// RUN:   | FileCheck -check-prefix CUDA-ND \
+// Then compile host side and make sure we don't attempt to incorporate GPU code.
+// RUN:    -check-prefix CUDA-H -check-prefix CUDA-H-NI \
+// Linking is allowed to happen, even if we're missing GPU code.
+// RUN:    -check-prefix CUDA-L %s
+
+// Verify that --cuda-device-only disables host-side compilation and linking
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \
 // Compile device-side to PTX assembly
-// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS\
+// Make sure there are no host cmpilation or linking.
+// RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
+
+// Same test as above, but with preceeding --cuda-host-only to make
+// sure only last option has effect.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only --cuda-device-only %s 2>&1 \
+// Compile device-side to PTX assembly
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS\
 // Make sure there are no host cmpilation or linking.
 // RUN:   -check-prefix CUDA-NH -check-prefix CUDA-NL %s
 
@@ -41,7 +59,7 @@
 // and incorporate device code on the host side.
 // RUN: %clang -### -target x86_64-linux-gnu -S -c %s 2>&1 \
 // Compile device-side to PTX assembly
-// RUN:   | FileCheck -check-prefix CUDA-D1 \
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS\
 // Then compile host side and incorporate GPU code.
 // RUN:  -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
 // Make sure we don't link anything.
@@ -51,7 +69,8 @@
 // archtecture info to device compilation.
 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 -c %s 2>&1 \
 // Compile device-side to PTX assembly.
-// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS \
+// RUN:   -check-prefix CUDA-D1-SM35 \
 // Then compile host side and incorporate GPU code.
 // RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 \
 // Make sure we don't link anything.
@@ -59,43 +78,104 @@
 
 // Verify that there is device-side compilation per --cuda-gpu-arch args
 // and that all results are included on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
+// RUN: %clang -### -target x86_64-linux-gnu \
+// RUN:        --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
 // Compile both device-sides to PTX assembly
 // RUN:   | FileCheck \
-// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1-SM35 \
+// RUN: -check-prefix CUDA-D1 -check-prefix CUDA-D1NS -check-prefix CUDA-D1-SM35 \
 // RUN: -check-prefix CUDA-D2 -check-prefix CUDA-D2-SM30 \
 // Then compile host side and incorporate both device-side outputs
-// RUN:   -check-prefix CUDA-H -check-prefix CUDA-H-I1 -check-prefix CUDA-H-I2 \
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-HNS \
+// RUN:   -check-prefix CUDA-H-I1 -check-prefix CUDA-H-I2 \
 // Make sure we don't link anything.
 // RUN:   -check-prefix CUDA-NL %s
 
-// Match device-side compilation
-// CUDA-D1: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Verify that device-side results are passed to correct tool when
+// -save-temps is used
+// RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \
+// Compile device-side to PTX assembly and make sure we use it on the host side.
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1S \
+// Then compile host side and incorporate device code.
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-HS -check-prefix CUDA-HS-I1 \
+// Make sure we don't link anything.
+// RUN:   -check-prefix CUDA-NL %s
+
+// Verify that device-side results are passed to correct tool when
+// -fno-integrated-as is used
+// RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \
+// Compile device-side to PTX assembly and make sure we use it on the host side.
+// RUN:   | FileCheck -check-prefix CUDA-D1 -check-prefix CUDA-D1NS \
+// Then compile host side and incorporate device code.
+// RUN:   -check-prefix CUDA-H -check-prefix CUDA-HNS -check-prefix CUDA-HS-I1 \
+// RUN:   -check-prefix CUDA-H-AS \
+// Make sure we don't link anything.
+// RUN:   -check-prefix CUDA-NL %s
+
+// --cuda-host-only should never trigger unused arg warning.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -c %s 2>&1 | \
+// RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -x c -c %s 2>&1 | \
+// RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CHO %s
+
+// --cuda-device-only should not produce warning compiling CUDA files
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -c %s 2>&1 | \
+// RUN:    FileCheck -check-prefix CUDA-NO-UNUSED-CDO %s
+
+// --cuda-device-only should warn during non-CUDA compilation.
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -x c -c %s 2>&1 | \
+// RUN:    FileCheck -check-prefix CUDA-UNUSED-CDO %s
+
+// Match device-side preprocessor, and compiler phases with -save-temps
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda"
+
+// CUDA-D1S: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1S-SAME: "-aux-triple" "x86_64--linux-gnu"
+// CUDA-D1S-SAME: "-fcuda-is-device"
+// CUDA-D1S-SAME: "-x" "cuda-cpp-output"
+
+// Match the job that produces PTX assembly
+// CUDA-D1: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D1NS-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D1-SAME: "-fcuda-is-device"
 // CUDA-D1-SM35-SAME: "-target-cpu" "sm_35"
 // CUDA-D1-SAME: "-o" "[[GPUBINARY1:[^"]*]]"
-// CUDA-D1-SAME: "-x" "cuda"
+// CUDA-D1NS-SAME: "-x" "cuda"
+// CUDA-D1S-SAME: "-x" "ir"
 
-// Match anothe device-side compilation
-// CUDA-D2: "-cc1" "-triple" "nvptx{{(64)?}}-nvidia-cuda"
+// Match another device-side compilation
+// CUDA-D2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
+// CUDA-D2-SAME: "-aux-triple" "x86_64--linux-gnu"
 // CUDA-D2-SAME: "-fcuda-is-device"
 // CUDA-D2-SM30-SAME: "-target-cpu" "sm_30"
 // CUDA-D2-SAME: "-o" "[[GPUBINARY2:[^"]*]]"
 // CUDA-D2-SAME: "-x" "cuda"
 
 // Match no device-side compilation
-// CUDA-ND-NOT: "-cc1" "-triple" "nvptx{{64?}}-nvidia-cuda"
+// CUDA-ND-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
 // CUDA-ND-SAME-NOT: "-fcuda-is-device"
 
+// Match host-side preprocessor job with -save-temps
+// CUDA-HS: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-HS-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
+// CUDA-HS-SAME-NOT: "-fcuda-is-device"
+// CUDA-HS-SAME: "-x" "cuda"
+
 // Match host-side compilation
-// CUDA-H: "-cc1" "-triple"
-// CUDA-H-SAME-NOT: "nvptx{{64?}}-nvidia-cuda"
+// CUDA-H: "-cc1" "-triple" "x86_64--linux-gnu"
+// CUDA-H-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
 // CUDA-H-SAME-NOT: "-fcuda-is-device"
-// CUDA-H-SAME: "-o" "[[HOSTOBJ:[^"]*]]"
-// CUDA-H-SAME: "-x" "cuda"
+// CUDA-H-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
+// CUDA-HNS-SAME: "-x" "cuda"
+// CUDA-HS-SAME: "-x" "cuda-cpp-output"
 // CUDA-H-I1-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY1]]"
 // CUDA-H-I2-SAME: "-fcuda-include-gpubinary" "[[GPUBINARY2]]"
 
+// Match external assembler that uses compilation output
+// CUDA-H-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
+
 // Match no GPU code inclusion.
 // CUDA-H-NI-NOT: "-fcuda-include-gpubinary"
 
@@ -105,7 +185,11 @@
 
 // Match linker
 // CUDA-L: "{{.*}}{{ld|link}}{{(.exe)?}}"
-// CUDA-L-SAME: "[[HOSTOBJ]]"
+// CUDA-L-SAME: "[[HOSTOUTPUT]]"
 
 // Match no linker
 // CUDA-NL-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
+
+// CUDA-NO-UNUSED-CHO-NOT: warning: argument unused during compilation: '--cuda-host-only'
+// CUDA-UNUSED-CDO: warning: argument unused during compilation: '--cuda-device-only'
+// CUDA-NO-UNUSED-CDO-NOT: warning: argument unused during compilation: '--cuda-device-only'
diff --git a/test/Driver/cuda-simple.cu b/test/Driver/cuda-simple.cu
index 99d4bfdc0e37..3dc0babea7b0 100644
--- a/test/Driver/cuda-simple.cu
+++ b/test/Driver/cuda-simple.cu
@@ -1,6 +1,6 @@
 // Verify that we can parse a simple CUDA file with or without -save-temps
 // http://llvm.org/PR22936
-// RUN: %clang  -Werror -fsyntax-only -c %s
+// RUN: %clang -nocudainc -Werror -fsyntax-only -c %s
 //
 // Verify that we pass -x cuda-cpp-output to compiler after 
 // preprocessing a CUDA file
diff --git a/test/Driver/darwin-debug-flags.c b/test/Driver/darwin-debug-flags.c
index 5080a59e943d..6ba374523096 100644
--- a/test/Driver/darwin-debug-flags.c
+++ b/test/Driver/darwin-debug-flags.c
@@ -16,6 +16,3 @@ int x;
 // S: "-dwarf-debug-flags"
 
 // P: "-dwarf-debug-producer"
-
-// This depends on shell quoting.
-// REQUIRES: shell
diff --git a/test/Driver/darwin-ld-lto.c b/test/Driver/darwin-ld-lto.c
new file mode 100644
index 000000000000..23e006a01850
--- /dev/null
+++ b/test/Driver/darwin-ld-lto.c
@@ -0,0 +1,25 @@
+// REQUIRES: system-darwin
+
+// Check that ld gets "-lto_library" and warnings about libLTO.dylib path.
+
+// RUN: %clang -target x86_64-apple-darwin10 -### %s \
+// RUN:   -mlinker-version=133 -flto 2> %t.log
+// RUN: cat %t.log
+// RUN: FileCheck -check-prefix=LINK_LTOLIB_PATH %s < %t.log
+//
+// LINK_LTOLIB_PATH: {{ld(.exe)?"}}
+// LINK_LTOLIB_PATH: "-lto_library"
+
+// RUN: %clang -target x86_64-apple-darwin10 -### %s \
+// RUN:   -ccc-install-dir %S/dummytestdir -mlinker-version=133 -flto 2> %t.log
+// RUN: cat %t.log
+// RUN: FileCheck -check-prefix=LINK_LTOLIB_PATH_WRN %s < %t.log
+//
+// LINK_LTOLIB_PATH_WRN: warning: libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead
+
+// RUN: %clang -target x86_64-apple-darwin10 -### %s \
+// RUN:   -ccc-install-dir %S/dummytestdir -mlinker-version=133 -Wno-liblto -flto 2> %t.log
+// RUN: cat %t.log
+// RUN: FileCheck -check-prefix=LINK_LTOLIB_PATH_NOWRN %s < %t.log
+//
+// LINK_LTOLIB_PATH_NOWRN-NOT: warning: libLTO.dylib relative to clang installed dir not found; using 'ld' default search path instead
diff --git a/test/Driver/darwin-ld.c b/test/Driver/darwin-ld.c
index a5f96085a978..ffef3b14785c 100644
--- a/test/Driver/darwin-ld.c
+++ b/test/Driver/darwin-ld.c
@@ -152,6 +152,70 @@
 // RUN: FileCheck -check-prefix=LINK_NO_IOS_ARM64_CRT1 %s < %t.log
 // LINK_NO_IOS_ARM64_CRT1-NOT: crt
 
+// RUN: %clang -target arm64-apple-tvos8.3 -mtvos-version-min=8.3 -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_TVOS_ARM64 %s < %t.log
+// LINK_TVOS_ARM64: {{ld(.exe)?"}}
+// LINK_TVOS_ARM64: -tvos_version_min
+// LINK_TVOS_ARM64-NOT: crt
+// LINK_TVOS_ARM64-NOT: lgcc_s.1
+// FIXME: This library does not get built unless the tvOS SDK is
+// installed, and the driver will not try to link it if it does not exist.
+// This should be reenabled when the tvOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_TVOS_ARM64: libclang_rt.tvos.a
+
+// RUN: %clang -target arm64-apple-tvos8.3 -mtvos-version-min=8.3 -fprofile-instr-generate -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_TVOS_PROFILE %s < %t.log
+// LINK_TVOS_PROFILE: {{ld(.exe)?"}}
+// FIXME: These libraries do not get built unless the tvOS SDK is
+// installed, and the driver will not try to link them if they do not exist.
+// This should be reenabled when the tvOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_TVOS_PROFILE: libclang_rt.profile_tvos.a
+// FIXME_LINK_TVOS_PROFILE: libclang_rt.tvos.a
+
+// RUN: %clang -target arm64-apple-tvos8.3 -mtvos-version-min=8.3 -### %t.o -lcc_kext 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_TVOS_KEXT %s < %t.log
+// LINK_TVOS_KEXT: {{ld(.exe)?"}}
+// FIXME: These libraries do not get built unless the tvOS SDK is
+// installed, and the driver will not try to link them if they do not exist.
+// This should be reenabled when the tvOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_TVOS_KEXT: libclang_rt.cc_kext_tvos.a
+// FIXME_LINK_TVOS_KEXT: libclang_rt.tvos.a
+
+// RUN: %clang -target armv7k-apple-watchos2.0 -mwatchos-version-min=2.0 -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_WATCHOS_ARM %s < %t.log
+// LINK_WATCHOS_ARM: {{ld(.exe)?"}}
+// LINK_WATCHOS_ARM: -watchos_version_min
+// LINK_WATCHOS_ARM-NOT: crt
+// LINK_WATCHOS_ARM-NOT: lgcc_s.1
+// FIXME: This library does not get built unless the watchOS SDK is
+// installed, and the driver will not try to link it if it does not exist.
+// This should be reenabled when the watchOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_WATCHOS_ARM: libclang_rt.watchos.a
+
+// RUN: %clang -target armv7k-apple-watchos2.0 -mwatchos-version-min=2.0 -fprofile-instr-generate -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_WATCHOS_PROFILE %s < %t.log
+// LINK_WATCHOS_PROFILE: {{ld(.exe)?"}}
+// FIXME: These libraries do not get built unless the watchOS SDK is
+// installed, and the driver will not try to link them if they do not exist.
+// This should be reenabled when the watchOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_WATCHOS_PROFILE: libclang_rt.profile_watchos.a
+// FIXME_LINK_WATCHOS_PROFILE: libclang_rt.watchos.a
+
+// RUN: %clang -target armv7k-apple-watchos2.0 -mwatchos-version-min=2.0 -### %t.o -lcc_kext 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_WATCHOS_KEXT %s < %t.log
+// LINK_WATCHOS_KEXT: {{ld(.exe)?"}}
+// FIXME: These libraries do not get built unless the watchOS SDK is
+// installed, and the driver will not try to link them if they do not exist.
+// This should be reenabled when the watchOS SDK becomes a standard part
+// of Xcode.
+// FIXME_LINK_WATCHOS_KEXT: libclang_rt.cc_kext_watchos.a
+// FIXME_LINK_WATCHOS_KEXT: libclang_rt.watchos.a
+
 // RUN: %clang -target i386-apple-darwin12 -pg -### %t.o 2> %t.log
 // RUN: FileCheck -check-prefix=LINK_PG %s < %t.log
 // LINK_PG: -lgcrt1.o
@@ -205,6 +269,26 @@
 // LINK_IPHONEOS_VERSION_MIN: -iphoneos_version_min
 // LINK_IOS_SIMULATOR_VERSION_MIN: -ios_simulator_version_min
 
+// Ditto for tvOS....
+// RUN: env TVOS_DEPLOYMENT_TARGET=7.0 \
+// RUN:   %clang -target armv7-apple-darwin -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_TVOS_VERSION_MIN %s < %t.log
+// RUN: env TVOS_DEPLOYMENT_TARGET=7.0 \
+// RUN:   %clang -target x86_64-apple-darwin -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_TVOS_SIMULATOR_VERSION_MIN %s < %t.log
+// LINK_TVOS_VERSION_MIN: -tvos_version_min
+// LINK_TVOS_SIMULATOR_VERSION_MIN: -tvos_simulator_version_min
+
+// ...and for watchOS.
+// RUN: env WATCHOS_DEPLOYMENT_TARGET=2.0 \
+// RUN:   %clang -target armv7k-apple-darwin -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_WATCHOS_VERSION_MIN %s < %t.log
+// RUN: env WATCHOS_DEPLOYMENT_TARGET=2.0 \
+// RUN:   %clang -target i386-apple-darwin -### %t.o 2> %t.log
+// RUN: FileCheck -check-prefix=LINK_WATCHOS_SIMULATOR_VERSION_MIN %s < %t.log
+// LINK_WATCHOS_VERSION_MIN: -watchos_version_min
+// LINK_WATCHOS_SIMULATOR_VERSION_MIN: -watchos_simulator_version_min
+
 // Check -iframework gets forward to ld as -F
 // RUN: %clang -target x86_64-apple-darwin %s -iframework Bar -framework Foo -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=LINK-IFRAMEWORK %s
diff --git a/test/Driver/darwin-sdkroot.c b/test/Driver/darwin-sdkroot.c
index 3d2a413a6370..4f1fca2785b0 100644
--- a/test/Driver/darwin-sdkroot.c
+++ b/test/Driver/darwin-sdkroot.c
@@ -31,15 +31,13 @@
 // CHECK-NONROOT: "-cc1"
 // CHECK-NONROOT-NOT: "-isysroot"
 //
-// It doesn't make sense on msys bash.
-// REQUIRES: shell-preserves-root
-//
-// This test will fail with MSYS env.exe, since it does not preserve root,
-// expanding / into C:/MINGW/MSYS/1.0. To see the problem, from cmd.exe run:
+// This test fails with MSYS or MSYS2 env.exe, since it does not preserve
+// root, expanding / into C:/MINGW/MSYS/1.0 or c:/msys64. To reproduce the
+// problem, run:
 //
 //   env SDKROOT=/ cmd //c echo %SDKROOT%
 //
-// This test passes using env.exe from GnuWin32.
+// This test do pass using GnuWin32 env.exe.
 
 // Check if clang set the correct deployment target from -sysroot
 //
diff --git a/test/Driver/darwin-version.c b/test/Driver/darwin-version.c
index 3e7496b6bc39..009f8483a000 100644
--- a/test/Driver/darwin-version.c
+++ b/test/Driver/darwin-version.c
@@ -33,12 +33,26 @@
 // RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min= -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-MISSING %s
 // CHECK-VERSION-MISSING: invalid version number
+// RUN: %clang -target armv7k-apple-darwin -mwatchos-version-min=2.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOS20 %s
+// RUN: %clang -target armv7-apple-darwin -mtvos-version-min=8.3 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TVOS83 %s
+// CHECK-VERSION-TVOS83: "thumbv7-apple-tvos8.3.0"
+// RUN: %clang -target i386-apple-darwin -mtvos-simulator-version-min=8.3 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TVSIM83 %s
+// CHECK-VERSION-TVSIM83: "i386-apple-tvos8.3.0"
+// RUN: %clang -target armv7k-apple-darwin -mwatchos-version-min=2.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOS20 %s
+// CHECK-VERSION-WATCHOS20: "thumbv7k-apple-watchos2.0.0"
+// RUN: %clang -target i386-apple-darwin -mwatchos-simulator-version-min=2.0 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHSIM20 %s
+// CHECK-VERSION-WATCHSIM20: "i386-apple-watchos2.0.0"
 
 // Check environment variable gets interpreted correctly
-// RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 \
+// RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 IPHONEOS_DEPLOYMENT_TARGET=2.0 \
 // RUN:   %clang -target i386-apple-darwin9 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-OSX5 %s
-// RUN: env IPHONEOS_DEPLOYMENT_TARGET=2.0 \
+// RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 IPHONEOS_DEPLOYMENT_TARGET=2.0 \
 // RUN:   %clang -target armv6-apple-darwin9 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-IOS2 %s
 
@@ -50,3 +64,21 @@
 // RUN:   %clang -target armv6-apple-darwin9 -c %s -### 2>&1 | \
 // RUN:   FileCheck --check-prefix=CHECK-VERSION-IOS231 %s
 // CHECK-VERSION-IOS231: "armv6k-apple-ios2.3.1"
+
+// RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 TVOS_DEPLOYMENT_TARGET=8.3.1 \
+// RUN:   %clang -target armv7-apple-darwin9 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TVOS %s
+// CHECK-VERSION-TVOS: "thumbv7-apple-tvos8.3.1"
+// RUN: env TVOS_DEPLOYMENT_TARGET=8.3.1 \
+// RUN:   %clang -target i386-apple-darwin9 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-TVOSSIM %s
+// CHECK-VERSION-TVOSSIM: "i386-apple-tvos8.3.1"
+
+// RUN: env MACOSX_DEPLOYMENT_TARGET=10.5 WATCHOS_DEPLOYMENT_TARGET=2.0 \
+// RUN:   %clang -target armv7-apple-darwin9 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOS %s
+// CHECK-VERSION-WATCHOS: "thumbv7-apple-watchos2.0.0"
+// RUN: env WATCHOS_DEPLOYMENT_TARGET=2.0 \
+// RUN:   %clang -target i386-apple-darwin9 -c %s -### 2>&1 | \
+// RUN:   FileCheck --check-prefix=CHECK-VERSION-WATCHOSSIM %s
+// CHECK-VERSION-WATCHOSSIM: "i386-apple-watchos2.0.0"
diff --git a/test/Driver/debug-options-as.c b/test/Driver/debug-options-as.c
index 74a544c1c4b0..51475680e9b1 100644
--- a/test/Driver/debug-options-as.c
+++ b/test/Driver/debug-options-as.c
@@ -4,22 +4,30 @@
 // RUN: %clang -### -c -save-temps -integrated-as -g %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=SAVE %s
 //
+// SAVE: "-cc1"{{.*}}"-E"{{.*}}"-debug-info-kind=
+// SAVE: "-cc1"{{.*}}"-emit-llvm-bc"{{.*}}"-debug-info-kind=
+// SAVE: "-cc1"{{.*}}"-S"{{.*}}"-debug-info-kind=
 // SAVE: "-cc1as"
-// SAVE-NOT: "-g"
+// SAVE-NOT: -debug-info-kind=
+
+// Make sure that '-ggdb0' is not accidentally mistaken for '-g'
+// RUN: %clang -### -ggdb0 -c -integrated-as -x assembler %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=GGDB0 %s
+//
+// GGDB0: "-cc1as"
+// GGDB0-NOT: -debug-info-kind=
 
 // Check to make sure clang with -g on a .s file gets passed.
 // rdar://9275556
-// RUN: touch %t.s
-// RUN: %clang -### -c -integrated-as -g %t.s 2>&1 \
+// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
 // RUN:   | FileCheck %s
 //
 // CHECK: "-cc1as"
-// CHECK: "-g"
+// CHECK: "-debug-info-kind=limited"
 
 // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer.
 // rdar://12955296
-// RUN: touch %t.s
-// RUN: %clang -### -c -integrated-as -g %t.s 2>&1 \
+// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=P %s
 //
 // P: "-cc1as"
diff --git a/test/Driver/debug-options.c b/test/Driver/debug-options.c
index 50461798efd0..e160b7fd3260 100644
--- a/test/Driver/debug-options.c
+++ b/test/Driver/debug-options.c
@@ -2,36 +2,64 @@
 // rdar://10383444
 
 // RUN: %clang -### -c -g %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=G -check-prefix=G_GDB %s
 // RUN: %clang -### -c -g2 %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=G %s
 // RUN: %clang -### -c -g3 %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=G %s
 // RUN: %clang -### -c -ggdb %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=G -check-prefix=G_GDB %s
 // RUN: %clang -### -c -ggdb1 %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=GLTO_ONLY -check-prefix=G_GDB %s
 // RUN: %clang -### -c -ggdb3 %s -target x86_64-linux-gnu 2>&1 \
-                    | FileCheck -check-prefix=G %s
+// RUN:             | FileCheck -check-prefix=G %s
+// RUN: %clang -### -c -glldb %s -target x86_64-linux-gnu 2>&1 \
+// RUN:             | FileCheck -check-prefix=G -check-prefix=G_LLDB %s
+// RUN: %clang -### -c -gsce %s -target x86_64-linux-gnu 2>&1 \
+// RUN:             | FileCheck -check-prefix=G -check-prefix=G_SCE %s
 
 // RUN: %clang -### -c -g %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=G_DARWIN -check-prefix=G_LLDB %s
 // RUN: %clang -### -c -g2 %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=G_DARWIN %s
 // RUN: %clang -### -c -g3 %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=G_DARWIN %s
 // RUN: %clang -### -c -ggdb %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=G_DARWIN -check-prefix=G_GDB %s
 // RUN: %clang -### -c -ggdb1 %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=GLTO_ONLY_DWARF2 %s
 // RUN: %clang -### -c -ggdb3 %s -target x86_64-apple-darwin 2>&1 \
-                    | FileCheck -check-prefix=G_DARWIN %s
+// RUN:             | FileCheck -check-prefix=G_DARWIN %s
+
+// RUN: %clang -### -c -g %s -target x86_64-pc-freebsd10.0 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_LLDB %s
+
+// On the PS4, -g defaults to -gno-column-info, and we always generate the
+// arange section.
+// RUN: %clang -### -c %s -target x86_64-scei-ps4 2>&1 \
+// RUN:             | FileCheck -check-prefix=NOG_PS4 %s
+// RUN: %clang -### -c %s -g -target x86_64-scei-ps4 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_PS4 %s
+// RUN: %clang -### -c %s -g -target x86_64-scei-ps4 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_SCE %s
+// RUN: %clang -### -c %s -g -target x86_64-scei-ps4 2>&1 \
+// RUN:             | FileCheck -check-prefix=NOCI %s
+// RUN: %clang -### -c %s -g -gcolumn-info -target x86_64-scei-ps4 2>&1 \
+// RUN:             | FileCheck -check-prefix=CI %s
 
 // RUN: %clang -### -c -gdwarf-2 %s 2>&1 | FileCheck -check-prefix=G_D2 %s
 //
 // RUN: %clang -### -c -gfoo %s 2>&1 | FileCheck -check-prefix=G_NO %s
 // RUN: %clang -### -c -g -g0 %s 2>&1 | FileCheck -check-prefix=G_NO %s
 // RUN: %clang -### -c -ggdb0 %s 2>&1 | FileCheck -check-prefix=G_NO %s
+// RUN: %clang -### -c -glldb -g0 %s 2>&1 | FileCheck -check-prefix=G_NO %s
+// RUN: %clang -### -c -glldb -g1 %s 2>&1 \
+// RUN:             | FileCheck -check-prefix=GLTO_ONLY -check-prefix=G_LLDB %s
+//
+// PS4 defaults to sce; -ggdb0 changes tuning but turns off debug info,
+// then -g turns it back on without affecting tuning.
+// RUN: %clang -### -c -ggdb0 -g -target x86_64-scei-ps4 %s 2>&1 \
+// RUN:             | FileCheck -check-prefix=G -check-prefix=G_GDB %s
 //
 // RUN: %clang -### -c -g1 %s 2>&1 \
 // RUN:             | FileCheck -check-prefix=GLTO_ONLY %s
@@ -75,43 +103,56 @@
 // RUN: %clang -### -g -gno-column-info %s 2>&1 \
 // RUN:        | FileCheck -check-prefix=NOCI %s
 //
-// RUN: %clang -### -g %s 2>&1 | FileCheck -check-prefix=CI %s
+// RUN: %clang -### -g -target x86_64-unknown-unknown %s 2>&1 \
+//             | FileCheck -check-prefix=CI %s
+//
+// RUN: %clang -### -gmodules %s 2>&1 \
+// RUN:        | FileCheck -check-prefix=GEXTREFS %s
 //
 // G: "-cc1"
-// G: "-g"
+// G: "-debug-info-kind=limited"
 //
 // G_DARWIN: "-cc1"
-// G_DARWIN: "-gdwarf-2"
+// G_DARWIN: "-dwarf-version=2"
+//
+// NOG_PS4: "-cc1"
+// NOG_PS4-NOT "-dwarf-version=
+// NOG_PS4: "-generate-arange-section"
+// NOG_PS4-NOT: "-dwarf-version=
+//
+// G_PS4: "-cc1"
+// G_PS4: "-dwarf-version=
+// G_PS4: "-generate-arange-section"
 //
 // G_D2: "-cc1"
-// G_D2: "-gdwarf-2"
+// G_D2: "-dwarf-version=2"
 //
 // G_NO: "-cc1"
-// G_NO-NOT: "-g"
+// G_NO-NOT: -debug-info-kind=
 //
 // GLTO_ONLY: "-cc1"
-// GLTO_ONLY-NOT: "-g"
-// GLTO_ONLY: "-gline-tables-only"
-// GLTO_ONLY-NOT: "-g"
+// GLTO_ONLY: "-debug-info-kind=line-tables-only"
 //
 // GLTO_ONLY_DWARF2: "-cc1"
-// GLTO_ONLY_DWARF2-NOT: "-g"
-// GLTO_ONLY_DWARF2: "-gline-tables-only"
-// GLTO_ONLY_DWARF2: "-gdwarf-2"
-// GLTO_ONLY_DWARF2-NOT: "-g"
+// GLTO_ONLY_DWARF2: "-debug-info-kind=line-tables-only"
+// GLTO_ONLY_DWARF2: "-dwarf-version=2"
 //
 // G_ONLY: "-cc1"
-// G_ONLY-NOT: "-gline-tables-only"
-// G_ONLY: "-g"
-// G_ONLY-NOT: "-gline-tables-only"
+// G_ONLY: "-debug-info-kind=limited"
 //
+// G_GDB:  "-debugger-tuning=gdb"
+// G_LLDB: "-debugger-tuning=lldb"
+// G_SCE:  "-debugger-tuning=sce"
+//
+// These tests assert that "-gline-tables-only" "-g" uses the latter,
+// but otherwise not caring about the DebugInfoKind.
 // G_ONLY_DWARF2: "-cc1"
-// G_ONLY_DWARF2-NOT: "-gline-tables-only"
-// G_ONLY_DWARF2: "-gdwarf-2"
-// G_ONLY_DWARF2-NOT: "-gline-tables-only"
+// G_ONLY_DWARF2: "-debug-info-kind={{standalone|limited}}"
+// G_ONLY_DWARF2: "-dwarf-version=2"
 //
+// This tests asserts that "-gline-tables-only" "-g0" disables debug info.
 // GLTO_NO: "-cc1"
-// GLTO_NO-NOT: "-gline-tables-only"
+// GLTO_NO-NOT: -debug-info-kind=
 //
 // GIGNORE-NOT: "argument unused during compilation"
 //
@@ -126,3 +167,5 @@
 // CI: "-dwarf-column-info"
 //
 // NOCI-NOT: "-dwarf-column-info"
+//
+// GEXTREFS: "-dwarf-ext-refs" "-fmodule-format=obj" "-debug-info-kind={{standalone|limited}}"
diff --git a/test/Driver/debug-prefix-map.c b/test/Driver/debug-prefix-map.c
new file mode 100644
index 000000000000..b4f3859f982a
--- /dev/null
+++ b/test/Driver/debug-prefix-map.c
@@ -0,0 +1,9 @@
+// RUN: %clang -### -fdebug-prefix-map=old %s 2>&1 | FileCheck %s -check-prefix CHECK-INVALID
+// RUN: %clang -### -fdebug-prefix-map=old=new %s 2>&1 | FileCheck %s -check-prefix CHECK-SIMPLE
+// RUN: %clang -### -fdebug-prefix-map=old=n=ew %s 2>&1 | FileCheck %s -check-prefix CHECK-COMPLEX
+// RUN: %clang -### -fdebug-prefix-map=old= %s 2>&1 | FileCheck %s -check-prefix CHECK-EMPTY
+
+// CHECK-INVALID: error: invalid argument 'old' to -fdebug-prefix-map
+// CHECK-SIMPLE: fdebug-prefix-map=old=new
+// CHECK-COMPLEX: fdebug-prefix-map=old=n=ew
+// CHECK-EMPTY: fdebug-prefix-map=old=
diff --git a/test/Driver/dragonfly.c b/test/Driver/dragonfly.c
index 4be2aadd7aaf..20921bb38af0 100644
--- a/test/Driver/dragonfly.c
+++ b/test/Driver/dragonfly.c
@@ -2,6 +2,6 @@
 // RUN: FileCheck -input-file %t.log %s
 
 // CHECK: clang{{.*}}" "-cc1" "-triple" "x86_64-pc-dragonfly"
-// CHECK: ld{{.*}}" "--eh-frame-hdr" "-dynamic-linker" "/usr/libexec/ld-elf.so.{{.*}}" "--hash-style=both" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-L{{.*}}gcc4{{.*}}" "-rpath" "{{.*}}gcc4{{.*}}" "-lc" "-lgcc" "{{.*}}crtend.o" "{{.*}}crtn.o"
+// CHECK: ld{{.*}}" "--eh-frame-hdr" "-dynamic-linker" "/usr/libexec/ld-elf.so.{{.*}}" "--hash-style=gnu" "--enable-new-dtags" "-o" "a.out" "{{.*}}crt1.o" "{{.*}}crti.o" "{{.*}}crtbegin.o" "{{.*}}.o" "-L{{.*}}gcc{{.*}}" "-rpath" "{{.*}}gcc{{.*}}" "-lc" "-lgcc" "{{.*}}crtend.o" "{{.*}}crtn.o"
 
 
diff --git a/test/Driver/dyld-prefix.c b/test/Driver/dyld-prefix.c
index 5336a1142ea9..2c2bc4ff88ea 100644
--- a/test/Driver/dyld-prefix.c
+++ b/test/Driver/dyld-prefix.c
@@ -1,5 +1,3 @@
-// REQUIRES: shell-preserves-root
-
 // RUN: touch %t.o
 
 // RUN: %clang -target i386-unknown-linux --dyld-prefix /foo -### %t.o 2>&1 | FileCheck --check-prefix=CHECK-32 %s
diff --git a/test/Driver/eabi.c b/test/Driver/eabi.c
new file mode 100644
index 000000000000..4fd8ee8344e6
--- /dev/null
+++ b/test/Driver/eabi.c
@@ -0,0 +1,13 @@
+// RUN: %clang %s -meabi 4 -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-EABI4 %s
+// RUN: %clang %s -meabi 5 -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-EABI5 %s
+// RUN: %clang %s -meabi gnu -### 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-GNUEABI %s
+// RUN: not %clang %s -meabi unknown 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-UNKNOWN %s
+
+// CHECK-EABI4: "-meabi" "4"
+// CHECK-EABI5: "-meabi" "5"
+// CHECK-GNUEABI: "-meabi" "gnu"
+// CHECK-UNKNOWN: error: invalid value 'unknown' in '-meabi unknown'
diff --git a/test/Driver/elfiamcu-header-search.c b/test/Driver/elfiamcu-header-search.c
new file mode 100644
index 000000000000..d0f147a994c7
--- /dev/null
+++ b/test/Driver/elfiamcu-header-search.c
@@ -0,0 +1,6 @@
+// REQUIRES: x86-registered-target
+
+// RUN: %clang -target i386-pc-elfiamcu -c -v -fsyntax-only %s 2>&1 | FileCheck %s
+// CHECK-NOT: /usr/include
+// CHECK-NOT: /usr/local/include
+
diff --git a/test/Driver/env.c b/test/Driver/env.c
index f243fa641c76..0371bc91c4a3 100644
--- a/test/Driver/env.c
+++ b/test/Driver/env.c
@@ -1,6 +1,6 @@
 // These tests try to ensure that the driver operates reasonably when run with
 // a strange environment. Unfortunately, it requires a normal shell and the
-// 'env' command.
+// 'env' command that understands arguments, unlike the LIT built-in env.
 //
 // REQUIRES: shell
 // The PATH variable is heavily used when trying to find a linker.
diff --git a/test/Driver/fplugin.c b/test/Driver/fplugin.c
new file mode 100644
index 000000000000..d0aaa9efe623
--- /dev/null
+++ b/test/Driver/fplugin.c
@@ -0,0 +1,7 @@
+// Check that all -fplugin arguments are converted to -load
+
+// RUN: %clang -c %s -fplugin=foo.so -### 2>&1                 | FileCheck %s --check-prefix=CHECK1
+// RUN: %clang -c %s -fplugin=foo.so -fplugin=bar.so -### 2>&1 | FileCheck %s --check-prefix=CHECK2
+
+// CHECK1: "-load" "foo.so"
+// CHECK2: "-load" "foo.so" "-load" "bar.so"
diff --git a/test/Driver/freebsd-mips-as.c b/test/Driver/freebsd-mips-as.c
index da2d1200c47b..7555888e066e 100644
--- a/test/Driver/freebsd-mips-as.c
+++ b/test/Driver/freebsd-mips-as.c
@@ -89,3 +89,9 @@
 // RUN:   -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=MIPS-ALIAS-64R2 %s
 // MIPS-ALIAS-64R2: as{{(.exe)?}}" "-march" "mips64r2" "-mabi" "64" "-EB"
+//
+// RUN: %clang -target mips-unknown-freebsd -### \
+// RUN:   -no-integrated-as -G0 -c %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MIPS32-EB-AS-G0 %s
+// MIPS32-EB-AS-G0: as{{(.exe)?}}" "-march" "mips32r2" "-mabi" "32" "-EB" "-G0"
+// MIPS32-EB-AS-G0-NOT: "-KPIC"
diff --git a/test/Driver/freebsd.c b/test/Driver/freebsd.c
index a7448c11bfe6..45e92043619b 100644
--- a/test/Driver/freebsd.c
+++ b/test/Driver/freebsd.c
@@ -135,4 +135,9 @@
 // RUN: %clang -mcpu=ultrasparc -target sparc64-unknown-freebsd8 %s -### -no-integrated-as 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-SPARC-CPU %s
 // CHECK-SPARC-CPU: cc1{{.*}}" "-target-cpu" "ultrasparc"
-// CHECK-SPARC-CPU: as{{.*}}" "-Av9a
+// CHECK-SPARC-CPU: as{{.*}}" "-Av9
+
+// Check that -G flags are passed to the linker for mips
+// RUN: %clang -target mips-unknown-freebsd %s -### -G0 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-MIPS-G %s
+// CHECK-MIPS-G: ld{{.*}}" "-G0"
diff --git a/test/Driver/fsanitize-blacklist.c b/test/Driver/fsanitize-blacklist.c
index c7180ded70b3..9edbbaf2bd8d 100644
--- a/test/Driver/fsanitize-blacklist.c
+++ b/test/Driver/fsanitize-blacklist.c
@@ -12,28 +12,40 @@
 // RUN: echo "fun:bar" > %t.second
 // RUN: echo "badline" > %t.bad
 
-// RUN: %clang -fsanitize=address -fsanitize-blacklist=%t.good -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BLACKLIST
-// CHECK-BLACKLIST: -fsanitize-blacklist={{.*}}.good
-// CHECK-BLACKLIST: -fsanitize-blacklist={{.*}}.second
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BLACKLIST
+// CHECK-BLACKLIST: -fsanitize-blacklist={{.*}}.good" "-fsanitize-blacklist={{.*}}.second
+
+// Now, check for -fdepfile-entry flags.
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BLACKLIST2
+// CHECK-BLACKLIST2: -fdepfile-entry={{.*}}.good" "-fdepfile-entry={{.*}}.second
+
+// Check that the default blacklist is not added as an extra dependency.
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -resource-dir=%S/Inputs/resource_dir %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEFAULT-BLACKLIST --implicit-check-not=fdepfile-entry
+// CHECK-DEFAULT-BLACKLIST: -fsanitize-blacklist={{.*}}asan_blacklist.txt
 
 // Ignore -fsanitize-blacklist flag if there is no -fsanitize flag.
-// RUN: %clang -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE --check-prefix=DELIMITERS
+// RUN: %clang -target x86_64-linux-gnu -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE --check-prefix=DELIMITERS
 // CHECK-NO-SANITIZE-NOT: -fsanitize-blacklist
 
+// Ignore -fsanitize-blacklist flag if there is no -fsanitize flag.
+// Now, check for the absense of -fdepfile-entry flags.
+// RUN: %clang -target x86_64-linux-gnu -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SANITIZE2 --check-prefix=DELIMITERS
+// CHECK-NO-SANITIZE2-NOT: -fdepfile-entry
+
 // Flag -fno-sanitize-blacklist wins if it is specified later.
-// RUN: %clang -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-BLACKLIST --check-prefix=DELIMITERS
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-BLACKLIST --check-prefix=DELIMITERS
 // CHECK-NO-BLACKLIST-NOT: -fsanitize-blacklist
 
 // Driver barks on unexisting blacklist files.
-// RUN: %clang -fno-sanitize-blacklist -fsanitize-blacklist=unexisting.txt %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SUCH-FILE
+// RUN: %clang -target x86_64-linux-gnu -fno-sanitize-blacklist -fsanitize-blacklist=unexisting.txt %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-SUCH-FILE
 // CHECK-NO-SUCH-FILE: error: no such file or directory: 'unexisting.txt'
 
 // Driver properly reports malformed blacklist files.
-// RUN: %clang -fsanitize=address -fsanitize-blacklist=%t.second -fsanitize-blacklist=%t.bad -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-BLACKLIST
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.second -fsanitize-blacklist=%t.bad -fsanitize-blacklist=%t.good %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-BLACKLIST
 // CHECK-BAD-BLACKLIST: error: malformed sanitizer blacklist: 'error parsing file '{{.*}}.bad': malformed line 1: 'badline''
 
 // -fno-sanitize-blacklist disables all blacklists specified earlier.
-// RUN: %clang -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-FIRST-DISABLED
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address -fsanitize-blacklist=%t.good -fno-sanitize-blacklist -fsanitize-blacklist=%t.second %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ONLY-FIRST-DISABLED
 // CHECK-ONLY_FIRST-DISABLED-NOT: good
 // CHECK-ONLY-FIRST-DISABLED: -fsanitize-blacklist={{.*}}.second
 // CHECK-ONLY_FIRST-DISABLED-NOT: good
diff --git a/test/Driver/fsanitize-coverage.c b/test/Driver/fsanitize-coverage.c
index 51ab97a98f48..fdaa9faf8902 100644
--- a/test/Driver/fsanitize-coverage.c
+++ b/test/Driver/fsanitize-coverage.c
@@ -61,7 +61,7 @@
 // CHECK-EXTEND-LEGACY: -fsanitize-coverage-type=1
 // CHECK-EXTEND-LEGACY: -fsanitize-coverage-trace-cmp
 
-// RUN: %clang_cl -fsanitize=address -fsanitize-coverage=1 -c -### -- %s 2>&1 | FileCheck %s -check-prefix=CLANG-CL-COVERAGE
+// RUN: %clang_cl --target=i386-pc-win32 -fsanitize=address -fsanitize-coverage=1 -c -### -- %s 2>&1 | FileCheck %s -check-prefix=CLANG-CL-COVERAGE
 // CLANG-CL-COVERAGE-NOT: error:
 // CLANG-CL-COVERAGE-NOT: warning:
 // CLANG-CL-COVERAGE-NOT: argument unused
diff --git a/test/Driver/fsanitize.c b/test/Driver/fsanitize.c
index 15b438332730..3d7713dfd7b3 100644
--- a/test/Driver/fsanitize.c
+++ b/test/Driver/fsanitize.c
@@ -142,36 +142,43 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=vptr -fno-sanitize=vptr -fsanitize=undefined,address %s -### 2>&1
 // OK
 
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-NO-PIE
-// CHECK-TSAN-NO-PIE: "-mrelocation-model" "static"
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
+// RUN: %clang -target x86_64-unknown-freebsd -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
+// RUN: %clang -target aarch64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
+// RUN: %clang -target arm-linux-androideabi -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-PIE
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
+// RUN: %clang -target i386-linux-gnu -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE
 
-// RUN: %clang -target x86_64-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MSAN-NO-PIE
-// CHECK-MSAN-NO-PIE: "-mrelocation-model" "pic" "-pic-level" "2" "-pie-level" "2"
-// CHECK-MSAN-NO-PIE: "-pie"
+// CHECK-NO-PIE-NOT: "-pie"
+// CHECK-NO-PIE: "-mrelocation-model" "static"
+// CHECK-NO-PIE-NOT: "-pie"
 
-// RUN: %clang -target arm-linux-androideabi -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ANDROID-ASAN-NO-PIE
-// CHECK-ANDROID-ASAN-NO-PIE: "-mrelocation-model" "pic" "-pic-level" "2" "-pie-level" "2"
-// CHECK-ANDROID-ASAN-NO-PIE: "-pie"
+// CHECK-PIE: "-mrelocation-model" "pic" "-pic-level" "2" "-pie-level" "2"
+// CHECK-PIE: "-pie"
 
 // RUN: %clang -target arm-linux-androideabi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ANDROID-NO-ASAN
 // CHECK-ANDROID-NO-ASAN: "-mrelocation-model" "pic"
 
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=all -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=thread -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover -fsanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=all -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=object-size,shift-base -### 2>&1 | FileCheck %s --check-prefix=CHECK-PARTIAL-RECOVER
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=all -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover -fsanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=thread -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=all -fno-sanitize-recover=undefined -### 2>&1 | FileCheck %s --check-prefix=CHECK-NO-RECOVER-UBSAN
+// CHECK-RECOVER-UBSAN: "-fsanitize-recover={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|vla-bound|alignment|null|vptr|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
+// CHECK-NO-RECOVER-UBSAN-NOT: sanitize-recover
 
-// CHECK-RECOVER: "-fsanitize-recover={{((signed-integer-overflow|integer-divide-by-zero|float-divide-by-zero|function|shift-base|shift-exponent|vla-bound|alignment|null|vptr|object-size|float-cast-overflow|array-bounds|enum|bool|returns-nonnull-attribute|nonnull-attribute),?){17}"}}
-// CHECK-NO-RECOVER-NOT: sanitize-recover
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fno-sanitize-recover=all -fsanitize-recover=object-size,shift-base -### 2>&1 | FileCheck %s --check-prefix=CHECK-PARTIAL-RECOVER
 // CHECK-PARTIAL-RECOVER: "-fsanitize-recover={{((object-size|shift-base),?){2}"}}
 
-// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=address,foobar,object-size,unreachable -### 2>&1 | FileCheck %s --check-prefix=CHECK-DIAG-RECOVER
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=address -fsanitize-recover=all -### 2>&1 | FileCheck %s --check-prefix=CHECK-RECOVER-ASAN
+// CHECK-RECOVER-ASAN: "-fsanitize-recover=address"
+
+// RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover=foobar,object-size,unreachable -### 2>&1 | FileCheck %s --check-prefix=CHECK-DIAG-RECOVER
 // CHECK-DIAG-RECOVER: unsupported argument 'foobar' to option 'fsanitize-recover='
-// CHECK-DIAG-RECOVER: unsupported argument 'address,unreachable' to option 'fsanitize-recover='
+// CHECK-DIAG-RECOVER: unsupported argument 'unreachable' to option 'fsanitize-recover='
 
 // RUN: %clang -target x86_64-linux-gnu %s -fsanitize=undefined -fsanitize-recover -fno-sanitize-recover -### 2>&1 | FileCheck %s --check-prefix=CHECK-DEPRECATED-RECOVER
 // CHECK-DEPRECATED-RECOVER: argument '-fsanitize-recover' is deprecated, use '-fsanitize-recover=undefined,integer' instead
@@ -203,12 +210,11 @@
 // CHECK-MSAN-NOMSAN-DARWIN-NOT: unsupported option
 
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=memory -fsanitize=thread,memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MSAN-TSAN-MSAN-DARWIN
-// CHECK-MSAN-TSAN-MSAN-DARWIN: unsupported option '-fsanitize=thread,memory' for target 'x86_64-apple-darwin10'
+// CHECK-MSAN-TSAN-MSAN-DARWIN: unsupported option '-fsanitize=memory' for target 'x86_64-apple-darwin10'
 // CHECK-MSAN-TSAN-MSAN-DARWIN-NOT: unsupported option
 
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=thread,memory -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-MSAN-MSAN-DARWIN
 // CHECK-TSAN-MSAN-MSAN-DARWIN: unsupported option '-fsanitize=memory' for target 'x86_64-apple-darwin10'
-// CHECK-TSAN-MSAN-MSAN-DARWIN: unsupported option '-fsanitize=thread' for target 'x86_64-apple-darwin10'
 // CHECK-TSAN-MSAN-MSAN-DARWIN-NOT: unsupported option
 
 // RUN: %clang -target x86_64-apple-darwin10 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FSAN-DARWIN
@@ -230,11 +236,12 @@
 // CHECK-ASAN-OPENBSD: unsupported option '-fsanitize=address' for target 'i386-pc-openbsd'
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI
+// RUN: %clang -target x86_64-apple-darwin10 -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi-derived-cast -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-DCAST
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi-unrelated-cast -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-UCAST
 // RUN: %clang -target x86_64-linux-gnu -flto -fsanitize=cfi-nvcall -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NVCALL
 // RUN: %clang -target x86_64-linux-gnu -flto -fsanitize=cfi-vcall -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-VCALL
-// CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-unrelated-cast,cfi-nvcall,cfi-vcall
+// CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall
 // CHECK-CFI-DCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast
 // CHECK-CFI-UCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-unrelated-cast
 // CHECK-CFI-NVCALL: -emit-llvm-bc{{.*}}-fsanitize=cfi-nvcall
@@ -243,6 +250,9 @@
 // RUN: %clang -target x86_64-linux-gnu -flto -fsanitize=cfi-derived-cast -fno-lto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NOLTO
 // CHECK-CFI-NOLTO: '-fsanitize=cfi-derived-cast' only allowed with '-flto'
 
+// RUN: %clang -target mips-unknown-linux -fsanitize=cfi-icall %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-ICALL-MIPS
+// CHECK-CFI-ICALL-MIPS: unsupported option '-fsanitize=cfi-icall' for target 'mips-unknown-linux'
+
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-trap=address -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-TRAP
 // CHECK-ASAN-TRAP: error: unsupported argument 'address' to option '-fsanitize-trap'
 
@@ -253,6 +263,15 @@
 // CHECK-CFI-NOTRAP-WIN: -emit-llvm-bc
 // CHECK-CFI-NOTRAP-WIN-NOT: -fsanitize-trap=cfi
 
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -fsanitize-cfi-cross-dso -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-CROSS-DSO
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NO-CROSS-DSO
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -fsanitize-cfi-cross-dso -fno-sanitize-cfi-cross-dso -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-NO-CROSS-DSO
+// RUN: %clang -target x86_64-linux-gnu -fsanitize=cfi -fno-sanitize-cfi-cross-dso -fsanitize-cfi-cross-dso -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI-CROSS-DSO
+// CHECK-CFI-CROSS-DSO: -emit-llvm-bc
+// CHECK-CFI-CROSS-DSO: -fsanitize-cfi-cross-dso
+// CHECK-CFI-NO-CROSS-DSO: -emit-llvm-bc
+// CHECK-CFI-NO-CROSS-DSO-NOT: -fsanitize-cfi-cross-dso
+
 // RUN: %clang_cl -fsanitize=address -c -MDd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
 // RUN: %clang_cl -fsanitize=address -c -MTd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
 // RUN: %clang_cl -fsanitize=address -c -LDd -### -- %s 2>&1 | FileCheck %s -check-prefix=CHECK-ASAN-DEBUGRTL
@@ -278,6 +297,8 @@
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=address,safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP-ASAN
 // RUN: %clang -target x86_64-linux-gnu -fstack-protector -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
 // RUN: %clang -target x86_64-linux-gnu -fsanitize=safe-stack -fstack-protector-all -### %s 2>&1 | FileCheck %s -check-prefix=SP
+// RUN: %clang -target arm-linux-androideabi -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
+// RUN: %clang -target aarch64-linux-android -fsanitize=safe-stack -### %s 2>&1 | FileCheck %s -check-prefix=SP
 // SP-NOT: stack-protector
 // SP: "-fsanitize=safe-stack"
 // SP-ASAN-NOT: stack-protector
@@ -286,3 +307,20 @@
 // RUN: %clang -target powerpc64-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
 // RUN: %clang -target powerpc64le-unknown-linux-gnu -fsanitize=memory %s -### 2>&1 | FileCheck %s -check-prefix=CHECK-SANM
 // CHECK-SANM: "-fsanitize=memory"
+
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=function -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FSAN-UBSAN-PS4
+// CHECK-FSAN-UBSAN-PS4: unsupported option '-fsanitize=function' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FSAN-PS4
+// CHECK-FSAN-PS4: unsupported option '-fsanitize=function' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=dataflow %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-DFSAN-PS4
+// CHECK-DFSAN-PS4: unsupported option '-fsanitize=dataflow' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=leak %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-LSAN-PS4
+// CHECK-LSAN-PS4: unsupported option '-fsanitize=leak' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=memory %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-MSAN-PS4
+// CHECK-MSAN-PS4: unsupported option '-fsanitize=memory' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=thread %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-TSAN-PS4
+// CHECK-TSAN-PS4: unsupported option '-fsanitize=thread' for target 'x86_64-scei-ps4'
+// RUN: %clang -target x86_64-scei-ps4 -fsanitize=address %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-ASAN-PS4
+// Make sure there are no *.{o,bc} or -l passed before the ASan library.
+// CHECK-ASAN-PS4-NOT: {{(\.(o|bc)"? |-l).*-lSceDbgAddressSanitizer_stub_weak}}
+// CHECK-ASAN-PS4: -lSceDbgAddressSanitizer_stub_weak
diff --git a/test/Driver/gold-lto.c b/test/Driver/gold-lto.c
index db6786ffd8b8..050b1ef18be1 100644
--- a/test/Driver/gold-lto.c
+++ b/test/Driver/gold-lto.c
@@ -1,23 +1,26 @@
 // RUN: touch %t.o
 //
 // RUN: %clang -target x86_64-unknown-linux -### %t.o -flto 2>&1 \
-// RUN:     -Wl,-plugin-opt=foo \
+// RUN:     -Wl,-plugin-opt=foo -O3 \
 // RUN:     | FileCheck %s --check-prefix=CHECK-X86-64-BASIC
 // CHECK-X86-64-BASIC: "-plugin" "{{.*}}/LLVMgold.so"
+// CHECK-X86-64-BASIC: "-plugin-opt=O3"
 // CHECK-X86-64-BASIC: "-plugin-opt=foo"
 //
 // RUN: %clang -target x86_64-unknown-linux -### %t.o -flto 2>&1 \
-// RUN:     -march=corei7 -Wl,-plugin-opt=foo \
+// RUN:     -march=corei7 -Wl,-plugin-opt=foo -Ofast \
 // RUN:     | FileCheck %s --check-prefix=CHECK-X86-64-COREI7
 // CHECK-X86-64-COREI7: "-plugin" "{{.*}}/LLVMgold.so"
 // CHECK-X86-64-COREI7: "-plugin-opt=mcpu=corei7"
+// CHECK-X86-64-COREI7: "-plugin-opt=O3"
 // CHECK-X86-64-COREI7: "-plugin-opt=foo"
 //
 // RUN: %clang -target arm-unknown-linux -### %t.o -flto 2>&1 \
-// RUN:     -march=armv7a -Wl,-plugin-opt=foo \
+// RUN:     -march=armv7a -Wl,-plugin-opt=foo -O0 \
 // RUN:     | FileCheck %s --check-prefix=CHECK-ARM-V7A
 // CHECK-ARM-V7A: "-plugin" "{{.*}}/LLVMgold.so"
 // CHECK-ARM-V7A: "-plugin-opt=mcpu=cortex-a8"
+// CHECK-ARM-V7A: "-plugin-opt=O0"
 // CHECK-ARM-V7A: "-plugin-opt=foo"
 //
 // RUN: %clang -target i686-linux-android -### %t.o -flto 2>&1 \
diff --git a/test/Driver/hexagon-toolchain-elf.c b/test/Driver/hexagon-toolchain-elf.c
index b9e53abe9b70..e3a54ddf861f 100644
--- a/test/Driver/hexagon-toolchain-elf.c
+++ b/test/Driver/hexagon-toolchain-elf.c
@@ -2,135 +2,92 @@
 // Test standard include paths
 // -----------------------------------------------------------------------------
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK000 %s
+// CHECK000: "-cc1" {{.*}} "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/include"
+
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK001 %s
-// CHECK001: "-cc1" {{.*}} "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK001:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK001:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK001-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK002 %s
-// CHECK002: "-cc1" {{.*}} "-internal-isystem" "[[INSTALL_DIR:.*]]/Inputs/hexagon_tree/qc/bin/../../gnu{{/|\\\\}}hexagon/include/c++/4.4.0"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK002-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
+// CHECK001: "-cc1" {{.*}} "-internal-isystem" "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/include/c++"
+// CHECK001:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/include"
 
 // -----------------------------------------------------------------------------
 // Test -nostdinc, -nostdlibinc, -nostdinc++
 // -----------------------------------------------------------------------------
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -nostdinc \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK003 %s
-// CHECK003: "-cc1"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK003-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
+// RUN:   | FileCheck -check-prefix=CHECK010 %s
+// CHECK010: "-cc1"
+// CHECK010-NOT: "-internal-externc-isystem"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -nostdlibinc \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK004 %s
-// CHECK004: "-cc1"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK004-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
+// RUN:   | FileCheck -check-prefix=CHECK011 %s
+// CHECK011: "-cc1"
+// CHECK011-NOT: "-internal-externc-isystem"
 
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdlibinc \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK005 %s
-// CHECK005: "-cc1"
-// CHECK005-NOT: "-internal-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include/c++/4.4.0"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK005-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -nostdinc++ \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK006 %s
-// CHECK006: "-cc1"
-// CHECK006-NOT: "-internal-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include/c++/4.4.0"
-// CHECK006-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
+// RUN:   | FileCheck -check-prefix=CHECK012 %s
+// CHECK012: "-cc1"
+// CHECK012-DAG-NOT: "-internal-isystem"
+// CHECK012-DAG: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/include"
 
-// -----------------------------------------------------------------------------
-// Test -march=<archname> -mcpu=<archname> -mv<number>
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-elf     \
+// RUN: %clangxx -### -target hexagon-unknown-elf -fno-integrated-as    \
 // RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
 // RUN:   --gcc-toolchain="" \
-// RUN:   -march=hexagonv3 \
+// RUN:   -nostdlibinc \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK007 %s
-// CHECK007: "-cc1" {{.*}} "-target-cpu" "hexagonv3"
-// CHECK007-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v3"
-// CHECK007-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv3"
+// RUN:   | FileCheck -check-prefix=CHECK013 %s
+// CHECK013: "-cc1"
+// CHECK013-DAG-NOT: "-internal-isystem"
+// CHECK013-DAG-NOT: "-internal-externc-isystem"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// -----------------------------------------------------------------------------
+// Test -mcpu=<cpuname> -mv<number>
+// -----------------------------------------------------------------------------
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv4 \
+// RUN:   %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK020 %s
+// CHECK020: "-cc1" {{.*}} "-target-cpu" "hexagonv4"
+// CHECK020: "hexagon-link" {{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v4/crt0
+
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
 // RUN:   -mcpu=hexagonv5 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK008 %s
-// CHECK008: "-cc1" {{.*}} "-target-cpu" "hexagonv5"
-// CHECK008-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v5"
-// CHECK008-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv5"
+// RUN:   | FileCheck -check-prefix=CHECK021 %s
+// CHECK021: "-cc1" {{.*}} "-target-cpu" "hexagonv5"
+// CHECK021: "hexagon-link" {{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v5/crt0
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -mv2 \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv55 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK009 %s
-// CHECK009: "-cc1" {{.*}} "-target-cpu" "hexagonv2"
-// CHECK009-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v2"
-// CHECK009-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv2"
+// RUN:   | FileCheck -check-prefix=CHECK022 %s
+// CHECK022: "-cc1" {{.*}} "-target-cpu" "hexagonv55"
+// CHECK022: "hexagon-link" {{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v55/crt0
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK010 %s
-// CHECK010: "-cc1" {{.*}} "-target-cpu" "hexagonv4"
-// CHECK010-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v4"
-// CHECK010-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv4"
-
-// RUN: not %clang -march=hexagonv2 -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// RUN: not %clang -mcpu=hexagonv2  -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// RUN: not %clang -mv2             -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// CHECK-UNKNOWN-V2: error: unknown target CPU 'hexagonv2'
-
-// RUN: not %clang -march=hexagonv3 -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// RUN: not %clang -mcpu=hexagonv3  -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// RUN: not %clang -mv3             -target hexagon-unknown-elf \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// CHECK-UNKNOWN-V3: error: unknown target CPU 'hexagonv3'
+// RUN:   | FileCheck -check-prefix=CHECK023 %s
+// CHECK023: "-cc1" {{.*}} "-target-cpu" "hexagonv60"
+// CHECK023: "hexagon-link" {{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0
 
 // -----------------------------------------------------------------------------
 // Test Linker related args
@@ -139,450 +96,388 @@
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // Defaults for C
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK011 %s
-// CHECK011: "-cc1"
-// CHECK011-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK011-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK011-NOT: "-static"
-// CHECK011-NOT: "-shared"
-// CHECK011: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK011: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK011: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK011: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK011: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK011: "-L{{.*}}/lib/gcc"
-// CHECK011: "-L{{.*}}/hexagon/lib/v4"
-// CHECK011: "-L{{.*}}/hexagon/lib"
-// CHECK011: "{{[^"]+}}.o"
-// CHECK011: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK011: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK030 %s
+// CHECK030: "-cc1"
+// CHECK030-NEXT: hexagon-link
+// CHECK030-NOT: "-static"
+// CHECK030-NOT: "-shared"
+// CHECK030: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK030: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK030: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK030: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK030: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK030: "{{[^"]+}}.o"
+// CHECK030: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
+// CHECK030: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // Defaults for C++
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK012 %s
-// CHECK012: "-cc1"
-// CHECK012-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK012-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK012-NOT: "-static"
-// CHECK012-NOT: "-shared"
-// CHECK012: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK012: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK012: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK012: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK012: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK012: "-L{{.*}}/lib/gcc"
-// CHECK012: "-L{{.*}}/hexagon/lib/v4"
-// CHECK012: "-L{{.*}}/hexagon/lib"
-// CHECK012: "{{[^"]+}}.o"
-// CHECK012: "-lstdc++" "-lm"
-// CHECK012: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK012: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK031 %s
+// CHECK031: "-cc1"
+// CHECK031-NEXT: hexagon-link
+// CHECK031-NOT: "-static"
+// CHECK031-NOT: "-shared"
+// CHECK031: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK031: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK031: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK031: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK031: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK031: "{{[^"]+}}.o"
+// CHECK031: "-lstdc++" "-lm"
+// CHECK031: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
+// CHECK031: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // Additional Libraries (-L)
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -Lone -L two -L three \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK013 %s
-// CHECK013: "-cc1"
-// CHECK013-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK013-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK013: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK013: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK013: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK013: "-Lone" "-Ltwo" "-Lthree"
-// CHECK013: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK013: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK013: "-L{{.*}}/lib/gcc"
-// CHECK013: "-L{{.*}}/hexagon/lib/v4"
-// CHECK013: "-L{{.*}}/hexagon/lib"
-// CHECK013: "{{[^"]+}}.o"
-// CHECK013: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK013: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK032 %s
+// CHECK032: "-cc1"
+// CHECK032-NEXT: hexagon-link
+// CHECK032-NOT: "-static"
+// CHECK032-NOT: "-shared"
+// CHECK032: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK032: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK032: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK032: "-Lone" "-Ltwo" "-Lthree"
+// CHECK032: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK032: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK032: "{{[^"]+}}.o"
+// CHECK032: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
+// CHECK032: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // -static, -shared
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -static \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK014 %s
-// CHECK014: "-cc1"
-// CHECK014-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK014-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK014: "-static"
-// CHECK014: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK014: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK014: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK014: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK014: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK014: "-L{{.*}}/lib/gcc"
-// CHECK014: "-L{{.*}}/hexagon/lib/v4"
-// CHECK014: "-L{{.*}}/hexagon/lib"
-// CHECK014: "{{[^"]+}}.o"
-// CHECK014: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK014: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK033 %s
+// CHECK033: "-cc1"
+// CHECK033-NEXT: hexagon-link
+// CHECK033: "-static"
+// CHECK033: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK033: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK033: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK033: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK033: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK033: "{{[^"]+}}.o"
+// CHECK033: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
+// CHECK033: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -shared \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK015 %s
-// CHECK015: "-cc1"
-// CHECK015-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK015-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK015: "-shared" "-call_shared"
-// CHECK015-NOT: crt0_standalone.o
-// CHECK015-NOT: crt0.o
-// CHECK015: "{{.*}}/hexagon/lib/v4/G0/initS.o"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4/G0"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/G0"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK015: "-L{{.*}}/lib/gcc"
-// CHECK015: "-L{{.*}}/hexagon/lib/v4/G0"
-// CHECK015: "-L{{.*}}/hexagon/lib/G0"
-// CHECK015: "-L{{.*}}/hexagon/lib/v4"
-// CHECK015: "-L{{.*}}/hexagon/lib"
-// CHECK015: "{{[^"]+}}.o"
-// CHECK015: "--start-group"
-// CHECK015-NOT: "-lstandalone"
-// CHECK015-NOT: "-lc"
-// CHECK015: "-lgcc"
-// CHECK015: "--end-group"
-// CHECK015: "{{.*}}/hexagon/lib/v4/G0/finiS.o"
+// RUN:   | FileCheck -check-prefix=CHECK034 %s
+// CHECK034: "-cc1"
+// CHECK034-NEXT: hexagon-link
+// CHECK034: "-shared" "-call_shared"
+// CHECK034-NOT: crt0_standalone.o
+// CHECK034-NOT: crt0.o
+// CHECK034: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0/pic/initS.o"
+// CHECK034: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0"
+// CHECK034: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK034: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK034: "{{[^"]+}}.o"
+// CHECK034: "--start-group"
+// CHECK034-NOT: "-lstandalone"
+// CHECK034-NOT: "-lc"
+// CHECK034: "-lgcc"
+// CHECK034: "--end-group"
+// CHECK034: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0/pic/finiS.o"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -shared \
 // RUN:   -static \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK016 %s
-// CHECK016: "-cc1"
-// CHECK016-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK016-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK016: "-shared" "-call_shared" "-static"
-// CHECK016-NOT: crt0_standalone.o
-// CHECK016-NOT: crt0.o
-// CHECK016: "{{.*}}/hexagon/lib/v4/G0/init.o"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4/G0"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/G0"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK016: "-L{{.*}}/lib/gcc"
-// CHECK016: "-L{{.*}}/hexagon/lib/v4/G0"
-// CHECK016: "-L{{.*}}/hexagon/lib/G0"
-// CHECK016: "-L{{.*}}/hexagon/lib/v4"
-// CHECK016: "-L{{.*}}/hexagon/lib"
-// CHECK016: "{{[^"]+}}.o"
-// CHECK016: "--start-group"
-// CHECK016-NOT: "-lstandalone"
-// CHECK016-NOT: "-lc"
-// CHECK016: "-lgcc"
-// CHECK016: "--end-group"
-// CHECK016: "{{.*}}/hexagon/lib/v4/G0/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK035 %s
+// CHECK035: "-cc1"
+// CHECK035-NEXT: hexagon-link
+// CHECK035: "-shared" "-call_shared" "-static"
+// CHECK035-NOT: crt0_standalone.o
+// CHECK035-NOT: crt0.o
+// CHECK035: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0/init.o"
+// CHECK035: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0"
+// CHECK035: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK035: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK035: "{{[^"]+}}.o"
+// CHECK035: "--start-group"
+// CHECK035-NOT: "-lstandalone"
+// CHECK035-NOT: "-lc"
+// CHECK035: "-lgcc"
+// CHECK035: "--end-group"
+// CHECK035: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/G0/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // -nostdlib, -nostartfiles, -nodefaultlibs
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -nostdlib \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK017 %s
-// CHECK017: "-cc1"
-// CHECK017-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK017-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK017-NOT: crt0_standalone.o
-// CHECK017-NOT: crt0.o
-// CHECK017-NOT: init.o
-// CHECK017: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK017: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK017: "-L{{.*}}/lib/gcc"
-// CHECK017: "-L{{.*}}/hexagon/lib/v4"
-// CHECK017: "-L{{.*}}/hexagon/lib"
-// CHECK017: "{{[^"]+}}.o"
-// CHECK017-NOT: "-lstdc++"
-// CHECK017-NOT: "-lm"
-// CHECK017-NOT: "--start-group"
-// CHECK017-NOT: "-lstandalone"
-// CHECK017-NOT: "-lc"
-// CHECK017-NOT: "-lgcc"
-// CHECK017-NOT: "--end-group"
-// CHECK017-NOT: fini.o
+// RUN:   | FileCheck -check-prefix=CHECK036 %s
+// CHECK036: "-cc1"
+// CHECK036-NEXT: hexagon-link
+// CHECK036-NOT: crt0_standalone.o
+// CHECK036-NOT: crt0.o
+// CHECK036-NOT: init.o
+// CHECK036: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK036: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK036: "{{[^"]+}}.o"
+// CHECK036-NOT: "-lstdc++"
+// CHECK036-NOT: "-lm"
+// CHECK036-NOT: "--start-group"
+// CHECK036-NOT: "-lstandalone"
+// CHECK036-NOT: "-lc"
+// CHECK036-NOT: "-lgcc"
+// CHECK036-NOT: "--end-group"
+// CHECK036-NOT: fini.o
 
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -nostartfiles \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK018 %s
-// CHECK018: "-cc1"
-// CHECK018-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK018-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK018-NOT: crt0_standalone.o
-// CHECK018-NOT: crt0.o
-// CHECK018-NOT: init.o
-// CHECK018: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK018: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK018: "-L{{.*}}/lib/gcc"
-// CHECK018: "-L{{.*}}/hexagon/lib/v4"
-// CHECK018: "-L{{.*}}/hexagon/lib"
-// CHECK018: "{{[^"]+}}.o"
-// CHECK018: "-lstdc++"
-// CHECK018: "-lm"
-// CHECK018: "--start-group"
-// CHECK018: "-lstandalone"
-// CHECK018: "-lc"
-// CHECK018: "-lgcc"
-// CHECK018: "--end-group"
-// CHECK018-NOT: fini.o
+// RUN:   | FileCheck -check-prefix=CHECK037 %s
+// CHECK037: "-cc1"
+// CHECK037-NEXT: hexagon-link
+// CHECK037-NOT: crt0_standalone.o
+// CHECK037-NOT: crt0.o
+// CHECK037-NOT: init.o
+// CHECK037: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK037: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK037: "{{[^"]+}}.o"
+// CHECK037: "-lstdc++"
+// CHECK037: "-lm"
+// CHECK037: "--start-group"
+// CHECK037: "-lstandalone"
+// CHECK037: "-lc"
+// CHECK037: "-lgcc"
+// CHECK037: "--end-group"
+// CHECK037-NOT: fini.o
 
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -nodefaultlibs \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK019 %s
-// CHECK019: "-cc1"
-// CHECK019-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK019-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK019: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK019: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK019: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK019: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK019: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK019: "-L{{.*}}/lib/gcc"
-// CHECK019: "-L{{.*}}/hexagon/lib/v4"
-// CHECK019: "-L{{.*}}/hexagon/lib"
-// CHECK019: "{{[^"]+}}.o"
-// CHECK019-NOT: "-lstdc++"
-// CHECK019-NOT: "-lm"
-// CHECK019-NOT: "--start-group"
-// CHECK019-NOT: "-lstandalone"
-// CHECK019-NOT: "-lc"
-// CHECK019-NOT: "-lgcc"
-// CHECK019-NOT: "--end-group"
-// CHECK019: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK038 %s
+// CHECK038: "-cc1"
+// CHECK038-NEXT: hexagon-link
+// CHECK038: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK038: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK038: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK038: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK038: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK038: "{{[^"]+}}.o"
+// CHECK038-NOT: "-lstdc++"
+// CHECK038-NOT: "-lm"
+// CHECK038-NOT: "--start-group"
+// CHECK038-NOT: "-lstandalone"
+// CHECK038-NOT: "-lc"
+// CHECK038-NOT: "-lgcc"
+// CHECK038-NOT: "--end-group"
+// CHECK038: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // -moslib
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -moslib=first -moslib=second \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK020 %s
-// CHECK020: "-cc1"
-// CHECK020-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK020-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK020-NOT: "-static"
-// CHECK020-NOT: "-shared"
-// CHECK020-NOT: crt0_standalone.o
-// CHECK020: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK020: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK020: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK020: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK020: "-L{{.*}}/lib/gcc"
-// CHECK020: "-L{{.*}}/hexagon/lib/v4"
-// CHECK020: "-L{{.*}}/hexagon/lib"
-// CHECK020: "{{[^"]+}}.o"
-// CHECK020: "--start-group"
-// CHECK020: "-lfirst" "-lsecond"
-// CHECK020-NOT: "-lstandalone"
-// CHECK020: "-lc" "-lgcc" "--end-group"
-// CHECK020: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK039 %s
+// CHECK039: "-cc1"
+// CHECK039-NEXT: hexagon-link
+// CHECK039-NOT: "-static"
+// CHECK039-NOT: "-shared"
+// CHECK039-NOT: crt0_standalone.o
+// CHECK039: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK039: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK039: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK039: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK039: "{{[^"]+}}.o"
+// CHECK039: "--start-group"
+// CHECK039: "-lfirst" "-lsecond"
+// CHECK039-NOT: "-lstandalone"
+// CHECK039: "-lc" "-lgcc" "--end-group"
+// CHECK039: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -moslib=first -moslib=second -moslib=standalone\
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
+// RUN:   -moslib=first -moslib=second -moslib=standalone \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK021 %s
-// CHECK021: "-cc1"
-// CHECK021-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK021-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK021-NOT: "-static"
-// CHECK021-NOT: "-shared"
-// CHECK021: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK021: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK021: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK021: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK021: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK021: "-L{{.*}}/lib/gcc"
-// CHECK021: "-L{{.*}}/hexagon/lib/v4"
-// CHECK021: "-L{{.*}}/hexagon/lib"
-// CHECK021: "{{[^"]+}}.o"
-// CHECK021: "--start-group"
-// CHECK021: "-lfirst" "-lsecond"
-// CHECK021: "-lstandalone"
-// CHECK021: "-lc" "-lgcc" "--end-group"
-// CHECK021: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK03A %s
+// CHECK03A: "-cc1"
+// CHECK03A-NEXT: hexagon-link
+// CHECK03A-NOT: "-static"
+// CHECK03A-NOT: "-shared"
+// CHECK03A: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK03A: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK03A: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK03A: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK03A: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK03A: "{{[^"]+}}.o"
+// CHECK03A: "--start-group"
+// CHECK03A: "-lfirst" "-lsecond"
+// CHECK03A: "-lstandalone"
+// CHECK03A: "-lc" "-lgcc" "--end-group"
+// CHECK03A: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 // Other args to pass to linker
 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clangxx -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -s \
 // RUN:   -Tbss 0xdead -Tdata 0xbeef -Ttext 0xcafe \
 // RUN:   -t \
 // RUN:   -e start_here \
 // RUN:   -uFoo -undefined Bar \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK022 %s
-// CHECK022: "-cc1"
-// CHECK022-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK022-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK022: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK022: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK022: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK022: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK022: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK022: "-L{{.*}}/lib/gcc"
-// CHECK022: "-L{{.*}}/hexagon/lib/v4"
-// CHECK022: "-L{{.*}}/hexagon/lib"
-// CHECK022: "-Tbss" "0xdead" "-Tdata" "0xbeef" "-Ttext" "0xcafe"
-// CHECK022: "-s"
-// CHECK022: "-t"
-// CHECK022: "-u" "Foo" "-undefined" "Bar"
-// CHECK022: "{{[^"]+}}.o"
-// CHECK022: "-lstdc++" "-lm"
-// CHECK022: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK022: "{{.*}}/hexagon/lib/v4/fini.o"
+// RUN:   | FileCheck -check-prefix=CHECK03B %s
+// CHECK03B: "-cc1"
+// CHECK03B-NEXT: hexagon-link
+// CHECK03B: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0_standalone.o"
+// CHECK03B: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/crt0.o"
+// CHECK03B: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/init.o"
+// CHECK03B: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60"
+// CHECK03B: "-L{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib"
+// CHECK03B: "-s"
+// CHECK03B: "-Tbss" "0xdead" "-Tdata" "0xbeef" "-Ttext" "0xcafe"
+// CHECK03B: "-t"
+// CHECK03B: "-u" "Foo" "-undefined" "Bar"
+// CHECK03B: "{{[^"]+}}.o"
+// CHECK03B: "-lstdc++" "-lm"
+// CHECK03B: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
+// CHECK03B: "{{.*}}/Inputs/hexagon_tree/Tools/bin/../target/hexagon/lib/v60/fini.o"
 
 // -----------------------------------------------------------------------------
 // pic, small data threshold
 // -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK023 %s
-// CHECK023:      "-cc1"
-// CHECK023:        "-mrelocation-model" "static"
-// CHECK023-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK023-NOT:    "-G{{[0-9]+}}"
-// CHECK023-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK023-NOT:    "-G{{[0-9]+}}"
+// RUN:   | FileCheck -check-prefix=CHECK040 %s
+// CHECK040:      "-cc1"
+// CHECK040-NEXT: hexagon-link
+// CHECK040-NOT:  "-G{{[0-9]+}}"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -fpic \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK024 %s
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN:   | FileCheck -check-prefix=CHECK041 %s
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -fPIC \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK024 %s
-// CHECK024:      "-cc1"
-// CHECK024-NOT:    "-mrelocation-model" "static"
-// CHECK024:        "-pic-level" "{{[12]}}"
-// CHECK024:        "-mllvm" "-hexagon-small-data-threshold=0"
-// CHECK024-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK024:        "-G0"
-// CHECK024-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK024:        "-G0"
+// RUN:   | FileCheck -check-prefix=CHECK041 %s
+// CHECK041:      "-cc1"
+// CHECK041-NOT:  "-mrelocation-model" "static"
+// CHECK041:      "-pic-level" "{{[12]}}"
+// CHECK041:      "-mllvm" "-hexagon-small-data-threshold=0"
+// CHECK041-NEXT: hexagon-link
+// CHECK041:      "-G0"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf -fno-integrated-as \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -G=8 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN:   | FileCheck -check-prefix=CHECK042 %s
+// RUN: %clang -### -target hexagon-unknown-elf -fno-integrated-as \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -G 8 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN:   | FileCheck -check-prefix=CHECK042 %s
+// RUN: %clang -### -target hexagon-unknown-elf -fno-integrated-as \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -msmall-data-threshold=8 \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// CHECK025:      "-cc1"
-// CHECK025:        "-mrelocation-model" "static"
-// CHECK025:        "-mllvm" "-hexagon-small-data-threshold=8"
-// CHECK025-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK025:        "-G8"
-// CHECK025-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK025:        "-G8"
+// RUN:   | FileCheck -check-prefix=CHECK042 %s
+// CHECK042:      "-cc1"
+// CHECK042:      "-mrelocation-model" "static"
+// CHECK042:      "-mllvm" "-hexagon-small-data-threshold=8"
+// CHECK042-NEXT: llvm-mc
+// CHECK042:      "-gpsize=8"
+// CHECK042-NEXT: hexagon-link
+// CHECK042:      "-G8"
 
 // -----------------------------------------------------------------------------
 // pie
 // -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -pie \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK026 %s
-// CHECK026:      "-cc1"
-// CHECK026-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK026-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK026:        "-pie"
+// RUN:   | FileCheck -check-prefix=CHECK050 %s
+// CHECK050:      "-cc1"
+// CHECK050-NEXT: hexagon-link
+// CHECK050:      "-pie"
 
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -pie -shared \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK027 %s
-// CHECK027:      "-cc1"
-// CHECK027-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK027-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK027-NOT:    "-pie"
-
-// -----------------------------------------------------------------------------
-// Misc Defaults
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK028 %s
-// CHECK028:      "-cc1"
-// CHECK028:        "-mqdsp6-compat"
-// CHECK028:        "-Wreturn-type"
-// CHECK028-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK028-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
+// RUN:   | FileCheck -check-prefix=CHECK051 %s
+// CHECK051:      "-cc1"
+// CHECK051-NEXT: hexagon-link
+// CHECK051-NOT:  "-pie"
 
 // -----------------------------------------------------------------------------
 // Test Assembler related args
 // -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-elf     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
+// RUN: %clang -### -target hexagon-unknown-elf -fno-integrated-as    \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
 // RUN:   -gdwarf-2 \
 // RUN:   -Wa,--noexecstack,--trap \
 // RUN:   -Xassembler --keep-locals \
 // RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK029 %s
-// CHECK029:      "-cc1"
-// CHECK029-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK029:      "--noexecstack" "--trap" "--keep-locals"
-// CHECK029-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
+// RUN:   | FileCheck -check-prefix=CHECK060 %s
+// CHECK060:      "-cc1"
+// CHECK060-NEXT: llvm-mc
+// CHECK060:      "--noexecstack" "--trap" "--keep-locals"
+// CHECK060-NEXT: hexagon-link
+
+// -----------------------------------------------------------------------------
+// Misc Defaults
+// -----------------------------------------------------------------------------
+// RUN: %clang -### -target hexagon-unknown-elf \
+// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/Tools/bin \
+// RUN:   -mcpu=hexagonv60 \
+// RUN:   %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK070 %s
+// CHECK070:      "-cc1"
+// CHECK070:      "-Wreturn-type"
diff --git a/test/Driver/hexagon-toolchain.c b/test/Driver/hexagon-toolchain.c
deleted file mode 100644
index 5d9553ef08c6..000000000000
--- a/test/Driver/hexagon-toolchain.c
+++ /dev/null
@@ -1,588 +0,0 @@
-// -----------------------------------------------------------------------------
-// Test standard include paths
-// -----------------------------------------------------------------------------
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK001 %s
-// CHECK001: "-cc1" {{.*}} "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK001:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK001:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK001-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK002 %s
-// CHECK002: "-cc1" {{.*}} "-internal-isystem" "[[INSTALL_DIR:.*]]/Inputs/hexagon_tree/qc/bin/../../gnu{{/|\\\\}}hexagon/include/c++/4.4.0"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK002:   "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK002-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// -----------------------------------------------------------------------------
-// Test -nostdinc, -nostdlibinc, -nostdinc++
-// -----------------------------------------------------------------------------
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdinc \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK003 %s
-// CHECK003: "-cc1"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK003-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK003-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdlibinc \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK004 %s
-// CHECK004: "-cc1"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK004-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK004-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdlibinc \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK005 %s
-// CHECK005: "-cc1"
-// CHECK005-NOT: "-internal-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include/c++/4.4.0"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/lib/gcc/hexagon/4.4.0/include-fixed"
-// CHECK005-NOT: "-internal-externc-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include"
-// CHECK005-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdinc++ \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK006 %s
-// CHECK006: "-cc1"
-// CHECK006-NOT: "-internal-isystem" "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/hexagon/include/c++/4.4.0"
-// CHECK006-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"
-
-// -----------------------------------------------------------------------------
-// Test -march=<archname> -mcpu=<archname> -mv<number>
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -march=hexagonv3 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK007 %s
-// CHECK007: "-cc1" {{.*}} "-target-cpu" "hexagonv3"
-// CHECK007-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v3"
-// CHECK007-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv3"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -mcpu=hexagonv5 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK008 %s
-// CHECK008: "-cc1" {{.*}} "-target-cpu" "hexagonv5"
-// CHECK008-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v5"
-// CHECK008-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv5"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -mv2 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK009 %s
-// CHECK009: "-cc1" {{.*}} "-target-cpu" "hexagonv2"
-// CHECK009-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v2"
-// CHECK009-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv2"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK010 %s
-// CHECK010: "-cc1" {{.*}} "-target-cpu" "hexagonv4"
-// CHECK010-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-as"{{.*}} "-march=v4"
-// CHECK010-NEXT: "{{.*}}/Inputs/hexagon_tree/qc/bin/../../gnu/bin{{/|\\\\}}hexagon-ld"{{.*}} "-mv4"
-
-// RUN: not %clang -march=hexagonv2 -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// RUN: not %clang -mcpu=hexagonv2  -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// RUN: not %clang -mv2             -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V2 %s
-// CHECK-UNKNOWN-V2: error: unknown target CPU 'hexagonv2'
-
-// RUN: not %clang -march=hexagonv3 -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// RUN: not %clang -mcpu=hexagonv3  -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// RUN: not %clang -mv3             -target hexagon-unknown-linux \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=CHECK-UNKNOWN-V3 %s
-// CHECK-UNKNOWN-V3: error: unknown target CPU 'hexagonv3'
-
-// -----------------------------------------------------------------------------
-// Test Linker related args
-// -----------------------------------------------------------------------------
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// Defaults for C
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK011 %s
-// CHECK011: "-cc1"
-// CHECK011-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK011-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK011-NOT: "-static"
-// CHECK011-NOT: "-shared"
-// CHECK011: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK011: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK011: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK011: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK011: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK011: "-L{{.*}}/lib/gcc"
-// CHECK011: "-L{{.*}}/hexagon/lib/v4"
-// CHECK011: "-L{{.*}}/hexagon/lib"
-// CHECK011: "{{[^"]+}}.o"
-// CHECK011: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK011: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// Defaults for C++
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK012 %s
-// CHECK012: "-cc1"
-// CHECK012-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK012-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK012-NOT: "-static"
-// CHECK012-NOT: "-shared"
-// CHECK012: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK012: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK012: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK012: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK012: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK012: "-L{{.*}}/lib/gcc"
-// CHECK012: "-L{{.*}}/hexagon/lib/v4"
-// CHECK012: "-L{{.*}}/hexagon/lib"
-// CHECK012: "{{[^"]+}}.o"
-// CHECK012: "-lstdc++" "-lm"
-// CHECK012: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK012: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// Additional Libraries (-L)
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -Lone -L two -L three \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK013 %s
-// CHECK013: "-cc1"
-// CHECK013-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK013-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK013: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK013: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK013: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK013: "-Lone" "-Ltwo" "-Lthree"
-// CHECK013: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK013: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK013: "-L{{.*}}/lib/gcc"
-// CHECK013: "-L{{.*}}/hexagon/lib/v4"
-// CHECK013: "-L{{.*}}/hexagon/lib"
-// CHECK013: "{{[^"]+}}.o"
-// CHECK013: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK013: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// -static, -shared
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -static \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK014 %s
-// CHECK014: "-cc1"
-// CHECK014-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK014-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK014: "-static"
-// CHECK014: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK014: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK014: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK014: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK014: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK014: "-L{{.*}}/lib/gcc"
-// CHECK014: "-L{{.*}}/hexagon/lib/v4"
-// CHECK014: "-L{{.*}}/hexagon/lib"
-// CHECK014: "{{[^"]+}}.o"
-// CHECK014: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK014: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -shared \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK015 %s
-// CHECK015: "-cc1"
-// CHECK015-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK015-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK015: "-shared" "-call_shared"
-// CHECK015-NOT: crt0_standalone.o
-// CHECK015-NOT: crt0.o
-// CHECK015: "{{.*}}/hexagon/lib/v4/G0/initS.o"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4/G0"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/G0"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK015: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK015: "-L{{.*}}/lib/gcc"
-// CHECK015: "-L{{.*}}/hexagon/lib/v4/G0"
-// CHECK015: "-L{{.*}}/hexagon/lib/G0"
-// CHECK015: "-L{{.*}}/hexagon/lib/v4"
-// CHECK015: "-L{{.*}}/hexagon/lib"
-// CHECK015: "{{[^"]+}}.o"
-// CHECK015: "--start-group"
-// CHECK015-NOT: "-lstandalone"
-// CHECK015-NOT: "-lc"
-// CHECK015: "-lgcc"
-// CHECK015: "--end-group"
-// CHECK015: "{{.*}}/hexagon/lib/v4/G0/finiS.o"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -shared \
-// RUN:   -static \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK016 %s
-// CHECK016: "-cc1"
-// CHECK016-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK016-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK016: "-shared" "-call_shared" "-static"
-// CHECK016-NOT: crt0_standalone.o
-// CHECK016-NOT: crt0.o
-// CHECK016: "{{.*}}/hexagon/lib/v4/G0/init.o"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4/G0"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/G0"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK016: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK016: "-L{{.*}}/lib/gcc"
-// CHECK016: "-L{{.*}}/hexagon/lib/v4/G0"
-// CHECK016: "-L{{.*}}/hexagon/lib/G0"
-// CHECK016: "-L{{.*}}/hexagon/lib/v4"
-// CHECK016: "-L{{.*}}/hexagon/lib"
-// CHECK016: "{{[^"]+}}.o"
-// CHECK016: "--start-group"
-// CHECK016-NOT: "-lstandalone"
-// CHECK016-NOT: "-lc"
-// CHECK016: "-lgcc"
-// CHECK016: "--end-group"
-// CHECK016: "{{.*}}/hexagon/lib/v4/G0/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// -nostdlib, -nostartfiles, -nodefaultlibs
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostdlib \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK017 %s
-// CHECK017: "-cc1"
-// CHECK017-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK017-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK017-NOT: crt0_standalone.o
-// CHECK017-NOT: crt0.o
-// CHECK017-NOT: init.o
-// CHECK017: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK017: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK017: "-L{{.*}}/lib/gcc"
-// CHECK017: "-L{{.*}}/hexagon/lib/v4"
-// CHECK017: "-L{{.*}}/hexagon/lib"
-// CHECK017: "{{[^"]+}}.o"
-// CHECK017-NOT: "-lstdc++"
-// CHECK017-NOT: "-lm"
-// CHECK017-NOT: "--start-group"
-// CHECK017-NOT: "-lstandalone"
-// CHECK017-NOT: "-lc"
-// CHECK017-NOT: "-lgcc"
-// CHECK017-NOT: "--end-group"
-// CHECK017-NOT: fini.o
-
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nostartfiles \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK018 %s
-// CHECK018: "-cc1"
-// CHECK018-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK018-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK018-NOT: crt0_standalone.o
-// CHECK018-NOT: crt0.o
-// CHECK018-NOT: init.o
-// CHECK018: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK018: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK018: "-L{{.*}}/lib/gcc"
-// CHECK018: "-L{{.*}}/hexagon/lib/v4"
-// CHECK018: "-L{{.*}}/hexagon/lib"
-// CHECK018: "{{[^"]+}}.o"
-// CHECK018: "-lstdc++"
-// CHECK018: "-lm"
-// CHECK018: "--start-group"
-// CHECK018: "-lstandalone"
-// CHECK018: "-lc"
-// CHECK018: "-lgcc"
-// CHECK018: "--end-group"
-// CHECK018-NOT: fini.o
-
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -nodefaultlibs \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK019 %s
-// CHECK019: "-cc1"
-// CHECK019-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK019-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK019: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK019: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK019: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK019: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK019: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK019: "-L{{.*}}/lib/gcc"
-// CHECK019: "-L{{.*}}/hexagon/lib/v4"
-// CHECK019: "-L{{.*}}/hexagon/lib"
-// CHECK019: "{{[^"]+}}.o"
-// CHECK019-NOT: "-lstdc++"
-// CHECK019-NOT: "-lm"
-// CHECK019-NOT: "--start-group"
-// CHECK019-NOT: "-lstandalone"
-// CHECK019-NOT: "-lc"
-// CHECK019-NOT: "-lgcc"
-// CHECK019-NOT: "--end-group"
-// CHECK019: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// -moslib
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -moslib=first -moslib=second \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK020 %s
-// CHECK020: "-cc1"
-// CHECK020-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK020-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK020-NOT: "-static"
-// CHECK020-NOT: "-shared"
-// CHECK020-NOT: crt0_standalone.o
-// CHECK020: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK020: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK020: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK020: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK020: "-L{{.*}}/lib/gcc"
-// CHECK020: "-L{{.*}}/hexagon/lib/v4"
-// CHECK020: "-L{{.*}}/hexagon/lib"
-// CHECK020: "{{[^"]+}}.o"
-// CHECK020: "--start-group"
-// CHECK020: "-lfirst" "-lsecond"
-// CHECK020-NOT: "-lstandalone"
-// CHECK020: "-lc" "-lgcc" "--end-group"
-// CHECK020: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -moslib=first -moslib=second -moslib=standalone\
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK021 %s
-// CHECK021: "-cc1"
-// CHECK021-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK021-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK021-NOT: "-static"
-// CHECK021-NOT: "-shared"
-// CHECK021: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK021: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK021: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK021: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK021: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK021: "-L{{.*}}/lib/gcc"
-// CHECK021: "-L{{.*}}/hexagon/lib/v4"
-// CHECK021: "-L{{.*}}/hexagon/lib"
-// CHECK021: "{{[^"]+}}.o"
-// CHECK021: "--start-group"
-// CHECK021: "-lfirst" "-lsecond"
-// CHECK021: "-lstandalone"
-// CHECK021: "-lc" "-lgcc" "--end-group"
-// CHECK021: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// Other args to pass to linker
-// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-// RUN: %clangxx -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -s \
-// RUN:   -Tbss 0xdead -Tdata 0xbeef -Ttext 0xcafe \
-// RUN:   -t \
-// RUN:   -e start_here \
-// RUN:   -uFoo -undefined Bar \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK022 %s
-// CHECK022: "-cc1"
-// CHECK022-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"{{.*}}
-// CHECK022-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK022: "{{.*}}/hexagon/lib/v4/crt0_standalone.o"
-// CHECK022: "{{.*}}/hexagon/lib/v4/crt0.o"
-// CHECK022: "{{.*}}/hexagon/lib/v4/init.o"
-// CHECK022: "-L{{.*}}/lib/gcc/hexagon/4.4.0/v4"
-// CHECK022: "-L{{.*}}/lib/gcc/hexagon/4.4.0"
-// CHECK022: "-L{{.*}}/lib/gcc"
-// CHECK022: "-L{{.*}}/hexagon/lib/v4"
-// CHECK022: "-L{{.*}}/hexagon/lib"
-// CHECK022: "-Tbss" "0xdead" "-Tdata" "0xbeef" "-Ttext" "0xcafe"
-// CHECK022: "-s"
-// CHECK022: "-t"
-// CHECK022: "-u" "Foo" "-undefined" "Bar"
-// CHECK022: "{{[^"]+}}.o"
-// CHECK022: "-lstdc++" "-lm"
-// CHECK022: "--start-group" "-lstandalone" "-lc" "-lgcc" "--end-group"
-// CHECK022: "{{.*}}/hexagon/lib/v4/fini.o"
-
-// -----------------------------------------------------------------------------
-// pic, small data threshold
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK023 %s
-// CHECK023:      "-cc1"
-// CHECK023:        "-mrelocation-model" "static"
-// CHECK023-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK023-NOT:    "-G{{[0-9]+}}"
-// CHECK023-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK023-NOT:    "-G{{[0-9]+}}"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -fpic \
-// RUN:   %s 2>&1 \
-// RUN:   | sed -e "s/\.exe//" -e "s/\.EXE//" | FileCheck -check-prefix=CHECK024 %s
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -fPIC \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK024 %s
-// CHECK024:      "-cc1"
-// CHECK024-NOT:    "-mrelocation-model" "static"
-// CHECK024:        "-pic-level" "{{[12]}}"
-// CHECK024:        "-mllvm" "-hexagon-small-data-threshold=0"
-// CHECK024-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK024:        "-G0"
-// CHECK024-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK024:        "-G0"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -G=8 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -G 8 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -msmall-data-threshold=8 \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK025 %s
-// CHECK025:      "-cc1"
-// CHECK025:        "-mrelocation-model" "static"
-// CHECK025:        "-mllvm" "-hexagon-small-data-threshold=8"
-// CHECK025-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK025:        "-G8"
-// CHECK025-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK025:        "-G8"
-
-// -----------------------------------------------------------------------------
-// pie
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -pie \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK026 %s
-// CHECK026:      "-cc1"
-// CHECK026-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK026-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK026:        "-pie"
-
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -pie -shared \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK027 %s
-// CHECK027:      "-cc1"
-// CHECK027-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK027-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-// CHECK027-NOT:    "-pie"
-
-// -----------------------------------------------------------------------------
-// Misc Defaults
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK028 %s
-// CHECK028:      "-cc1"
-// CHECK028:        "-mqdsp6-compat"
-// CHECK028:        "-Wreturn-type"
-// CHECK028-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK028-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
-
-// -----------------------------------------------------------------------------
-// Test Assembler related args
-// -----------------------------------------------------------------------------
-// RUN: %clang -### -target hexagon-unknown-linux     \
-// RUN:   -ccc-install-dir %S/Inputs/hexagon_tree/qc/bin \
-// RUN:   --gcc-toolchain="" \
-// RUN:   -gdwarf-2 \
-// RUN:   -Wa,--noexecstack,--trap \
-// RUN:   -Xassembler --keep-locals \
-// RUN:   %s 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK029 %s
-// CHECK029:      "-cc1"
-// CHECK029-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-as"
-// CHECK029:      "--noexecstack" "--trap" "--keep-locals"
-// CHECK029-NEXT: "{{.*}}/bin{{/|\\\\}}hexagon-ld"
diff --git a/test/Driver/implicit-function-as-error.c b/test/Driver/implicit-function-as-error.c
index 5949e2064cee..d6309963ff73 100644
--- a/test/Driver/implicit-function-as-error.c
+++ b/test/Driver/implicit-function-as-error.c
@@ -1,9 +1,11 @@
 // RUN: %clang -target x86_64-apple-darwin -mios-simulator-version-min=7 -fsyntax-only %s -Xclang -verify
 // RUN: %clang -target x86_64-apple-darwin -arch arm64 -target x86_64-apple-darwin -mios-version-min=7 -fsyntax-only %s -Xclang -verify
+// RUN: %clang -target armv7k-apple-watchos -arch armv7k -target armv7k-apple-watchos -fsyntax-only %s -Xclang -verify
 
 // For 64-bit iOS, automatically promote -Wimplicit-function-declaration
 // to an error.
 
 void radar_10894044() {
+  printf("Hi\n"); // expected-error {{implicitly declaring library function 'printf' with type 'int (const char *, ...)'}} expected-note {{include the header <stdio.h> or explicitly provide a declaration for 'printf'}}
   radar_10894044_not_declared(); // expected-error {{implicit declaration of function 'radar_10894044_not_declared' is invalid in C99}}
 }
diff --git a/test/Driver/incremental-linker-compatible.c b/test/Driver/incremental-linker-compatible.c
new file mode 100644
index 000000000000..e702a01376ad
--- /dev/null
+++ b/test/Driver/incremental-linker-compatible.c
@@ -0,0 +1,17 @@
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-linux-gnu -integrated-as -mincremental-linker-compatible 2>&1 | FileCheck %s --check-prefix=TEST1
+// TEST1: "-cc1" {{.*}} "-mincremental-linker-compatible"
+
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-linux-gnu -integrated-as -mno-incremental-linker-compatible 2>&1 | FileCheck %s --check-prefix=TEST2
+// TEST2-NOT: "-cc1" {{.*}} "-mincremental-linker-compatible"
+
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-linux-gnu -integrated-as -mno-incremental-linker-compatible -mincremental-linker-compatible 2>&1 | FileCheck %s --check-prefix=TEST3
+// TEST3: "-cc1" {{.*}} "-mincremental-linker-compatible"
+
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-linux-gnu -integrated-as -mincremental-linker-compatible -mno-incremental-linker-compatible 2>&1 | FileCheck %s --check-prefix=TEST4
+// TEST4-NOT: "-cc1" {{.*}} "-mincremental-linker-compatible"
+
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-mingw32 -integrated-as 2>&1 | FileCheck %s --check-prefix=TEST5
+// TEST5-NOT: "-cc1" {{.*}} "-mincremental-linker-compatible"
+
+// RUN: %clang '-###' %s -c -o tmp.o -target i686-pc-win32 -integrated-as 2>&1 | FileCheck %s --check-prefix=TEST6
+// TEST6: "-cc1" {{.*}} "-mincremental-linker-compatible"
diff --git a/test/Driver/instrprof-ld.c b/test/Driver/instrprof-ld.c
index cd926cd186b0..b3ba12ee434f 100644
--- a/test/Driver/instrprof-ld.c
+++ b/test/Driver/instrprof-ld.c
@@ -89,3 +89,19 @@
 //
 // CHECK-DARWIN-ARM64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
 // CHECK-DARWIN-ARM64: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_ios.a"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target armv7-apple-darwin -mtvos-version-min=8.3 -fprofile-instr-generate \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:   | FileCheck --check-prefix=CHECK-TVOS-ARMV7 %s
+//
+// CHECK-TVOS-ARMV7: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-TVOS-ARMV7: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_tvos.a"
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k -fprofile-instr-generate \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:   | FileCheck --check-prefix=CHECK-WATCHOS-ARMV7 %s
+//
+// CHECK-WATCHOS-ARMV7: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-WATCHOS-ARMV7: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}darwin{{/|\\\\}}libclang_rt.profile_watchos.a"
diff --git a/test/Driver/integrated-as.s b/test/Driver/integrated-as.s
index 181f790a1f99..5316f522dab2 100644
--- a/test/Driver/integrated-as.s
+++ b/test/Driver/integrated-as.s
@@ -5,9 +5,6 @@
 // RUN: %clang -### -c -integrated-as -Wa,-L %s 2>&1 | FileCheck --check-prefix=OPT_L %s
 // OPT_L: msave-temp-labels
 
-// RUN: %clang -### -target x86_64-linux-gnu -c -integrated-as %s -fsanitize=address 2>&1 %s | FileCheck --check-prefix=SANITIZE %s
-// SANITIZE: argument unused during compilation: '-fsanitize=address'
-
 // Test that -I params in -Wa, and -Xassembler args are passed to integrated assembler
 // RUN: %clang -### -c -integrated-as %s -Wa,-I,foo_dir 2>&1 | FileCheck --check-prefix=WA_INCLUDE1 %s
 // WA_INCLUDE1: cc1as
@@ -29,20 +26,27 @@
 // XA_INCLUDE2: cc1as
 // XA_INCLUDE2: "-Ifoo_dir"
 
-// RUN: %clang -### -c -integrated-as %s -gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2 %s
-// DWARF2: "-g" "-gdwarf-2"
+// RUN: %clang -### -c -integrated-as %s -gdwarf-4 -gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2 %s
+// DWARF2: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // RUN: %clang -### -c -integrated-as %s -gdwarf-3 2>&1 | FileCheck --check-prefix=DWARF3 %s
-// DWARF3: "-g" "-gdwarf-3"
+// DWARF3: "-debug-info-kind=limited" "-dwarf-version=3"
 
 // RUN: %clang -### -c -integrated-as %s -gdwarf-4 2>&1 | FileCheck --check-prefix=DWARF4 %s
-// DWARF4: "-g" "-gdwarf-4"
+// DWARF4: "-debug-info-kind=limited" "-dwarf-version=4"
 
 // RUN: %clang -### -c -integrated-as %s -Xassembler -gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2XASSEMBLER %s
-// DWARF2XASSEMBLER: "-gdwarf-2"
+// DWARF2XASSEMBLER: "-debug-info-kind=limited" "-dwarf-version=2"
 
 // RUN: %clang -### -c -integrated-as %s -Wa,-gdwarf-2 2>&1 | FileCheck --check-prefix=DWARF2WA %s
-// DWARF2WA: "-gdwarf-2"
+// DWARF2WA: "-debug-info-kind=limited" "-dwarf-version=2"
+
+// A dwarf version number that driver can't parse is just stuffed in.
+// RUN: %clang -### -c -integrated-as %s -Wa,-gdwarf-huh 2>&1 | FileCheck --check-prefix=BOGODWARF %s
+// BOGODWARF: "-gdwarf-huh"
 
 // RUN: %clang -### -x assembler -c -integrated-as %s -I myincludedir 2>&1 | FileCheck --check-prefix=INCLUDEPATH %s
 // INCLUDEPATH: "-I" "myincludedir"
+
+// RUN: %clang -### -x assembler -c -fPIC -integrated-as %s 2>&1 | FileCheck --check-prefix=PIC %s
+// PIC: "-mrelocation-model" "pic"
diff --git a/test/Driver/ios-simulator-arcruntime.c b/test/Driver/ios-simulator-arcruntime.c
index 605df93f4957..dbe306e4467b 100644
--- a/test/Driver/ios-simulator-arcruntime.c
+++ b/test/Driver/ios-simulator-arcruntime.c
@@ -6,3 +6,11 @@
 // CHECK-OPTIONS1: -fobjc-runtime=ios-4.2.1
 // CHECK-OPTIONS2: i386-apple-ios5.0.0
 // CHECK-OPTIONS2: -fobjc-runtime=ios-5.0.0
+
+// RUN: %clang -### -x objective-c -target x86_64-apple-darwin -mtvos-simulator-version-min=8.3.0 -fobjc-arc -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS3 %s
+// CHECK-OPTIONS3: x86_64-apple-tvos8.3.0
+// CHECK-OPTIONS3: -fobjc-runtime=ios-8.3.0
+
+// RUN: %clang -### -x objective-c -target x86_64-apple-darwin -mwatchos-simulator-version-min=2.0.0 -fobjc-arc -fsyntax-only %s 2>&1 | FileCheck -check-prefix=CHECK-OPTIONS4 %s
+// CHECK-OPTIONS4: x86_64-apple-watchos2.0.0
+// CHECK-OPTIONS4: -fobjc-runtime=watchos-2.0.0
diff --git a/test/Driver/linux-as.c b/test/Driver/linux-as.c
index 8aa323ab5b9d..a07abc17e52f 100644
--- a/test/Driver/linux-as.c
+++ b/test/Driver/linux-as.c
@@ -110,7 +110,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV9 %s
 // CHECK-SPARCV9: as
 // CHECK-SPARCV9: -64
-// CHECK-SPARCV9: -Av9a
+// CHECK-SPARCV9: -Av9
 // CHECK-SPARCV9-NOT: -KPIC
 // CHECK-SPARCV9: -o
 //
@@ -119,7 +119,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV9PIC %s
 // CHECK-SPARCV9PIC: as
 // CHECK-SPARCV9PIC: -64
-// CHECK-SPARCV9PIC: -Av9a
+// CHECK-SPARCV9PIC: -Av9
 // CHECK-SPARCV9PIC: -KPIC
 // CHECK-SPARCV9PIC: -o
 //
@@ -128,7 +128,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV8 %s
 // CHECK-SPARCV8: as
 // CHECK-SPARCV8: -32
-// CHECK-SPARCV8: -Av8plusa
+// CHECK-SPARCV8: -Av8
 // CHECK-SPARCV8: -o
 //
 // RUN: %clang -target sparcel-linux -mcpu=invalid-cpu -### \
@@ -136,7 +136,7 @@
 // RUN:   | FileCheck -check-prefix=CHECK-SPARCV8EL %s
 // CHECK-SPARCV8EL: as
 // CHECK-SPARCV8EL: -32
-// CHECK-SPARCV8EL: -Av8plusa
+// CHECK-SPARCV8EL: -Av8
 // CHECK-SPARCV8EL: -o
 //
 // RUN: %clang -target s390x-linux -### -no-integrated-as -c %s 2>&1 \
diff --git a/test/Driver/linux-header-search.cpp b/test/Driver/linux-header-search.cpp
index 23912cb94aef..bd1da4976d12 100644
--- a/test/Driver/linux-header-search.cpp
+++ b/test/Driver/linux-header-search.cpp
@@ -26,6 +26,42 @@
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v1"
 // CHECK-BASIC-LIBCXX-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
 //
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
+// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN:     -stdlib=libc++ \
+// RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
+// RUN:     --sysroot=%S/Inputs/basic_linux_libcxxv2_tree \
+// RUN:     --gcc-toolchain="" \
+// RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXXV2-SYSROOT %s
+// CHECK-BASIC-LIBCXXV2-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXXV2-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-BASIC-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v2"
+// CHECK-BASIC-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
+// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN:     -stdlib=libc++ \
+// RUN:     -ccc-install-dir %S/Inputs/basic_linux_libcxxv2_tree/usr/bin \
+// RUN:     --sysroot=%S/Inputs/basic_linux_libcxxv2_tree \
+// RUN:     --gcc-toolchain="" \
+// RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBCXXV2-INSTALL %s
+// CHECK-BASIC-LIBCXXV2-INSTALL: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBCXXV2-INSTALL: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/bin/../include/c++/v2"
+// CHECK-BASIC-LIBCXXV2-INSTALL: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
+//
+// Test Linux with both libc++ and libstdc++ installed.
+// RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
+// RUN:     -target x86_64-unknown-linux-gnu \
+// RUN:     -stdlib=libc++ \
+// RUN:     -ccc-install-dir %S/Inputs/basic_linux_tree/usr/bin \
+// RUN:     --sysroot=%S/Inputs/basic_linux_libstdcxx_libcxxv2_tree \
+// RUN:     --gcc-toolchain="" \
+// RUN:   | FileCheck --check-prefix=CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT %s
+// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-isysroot" "[[SYSROOT:[^"]+]]"
+// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/include/c++/v2"
+// CHECK-BASIC-LIBSTDCXX-LIBCXXV2-SYSROOT: "-internal-isystem" "[[SYSROOT]]/usr/local/include"
+//
 // Test a very broken version of multiarch that shipped in Ubuntu 11.04.
 // RUN: %clang -no-canonical-prefixes %s -### -fsyntax-only 2>&1 \
 // RUN:     -target i386-unknown-linux \
diff --git a/test/Driver/linux-ld.c b/test/Driver/linux-ld.c
index a976b388d3cc..c15e24d294a9 100644
--- a/test/Driver/linux-ld.c
+++ b/test/Driver/linux-ld.c
@@ -66,9 +66,9 @@
 // CHECK-LD-RT: "-L[[SYSROOT]]/usr/lib/gcc/x86_64-unknown-linux/4.6.0/../../.."
 // CHECK-LD-RT: "-L[[SYSROOT]]/lib"
 // CHECK-LD-RT: "-L[[SYSROOT]]/usr/lib"
-// CHECK-LD-RT: libclang_rt.builtins-x86_64.a" "-lgcc_s"
+// CHECK-LD-RT: libclang_rt.builtins-x86_64.a"
 // CHECK-LD-RT: "-lc"
-// CHECK-LD-RT: libclang_rt.builtins-x86_64.a" "-lgcc_s"
+// CHECK-LD-RT: libclang_rt.builtins-x86_64.a"
 //
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=arm-linux-androideabi \
@@ -81,9 +81,9 @@
 // CHECK-LD-RT-ANDROID: "--eh-frame-hdr"
 // CHECK-LD-RT-ANDROID: "-m" "armelf_linux_eabi"
 // CHECK-LD-RT-ANDROID: "-dynamic-linker"
-// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" "-lgcc_s"
+// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a"
 // CHECK-LD-RT-ANDROID: "-lc"
-// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a" "-lgcc_s"
+// CHECK-LD-RT-ANDROID: libclang_rt.builtins-arm-android.a"
 //
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     --target=x86_64-unknown-linux \
@@ -102,7 +102,7 @@
 // CHECK-LD-GCC: "-L[[SYSROOT]]/usr/lib/gcc/x86_64-unknown-linux/4.6.0/../../.."
 // CHECK-LD-GCC: "-L[[SYSROOT]]/lib"
 // CHECK-LD-GCC: "-L[[SYSROOT]]/usr/lib"
-// CHECK-LD-GCC "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
+// CHECK-LD-GCC: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 // CHECK-LD-GCC: "-lc"
 // CHECK-LD-GCC: "-lgcc" "--as-needed" "-lgcc_s" "--no-as-needed"
 //
diff --git a/test/Driver/lto.c b/test/Driver/lto.c
index 62300bd8fac1..3f66274ee6fc 100644
--- a/test/Driver/lto.c
+++ b/test/Driver/lto.c
@@ -1,25 +1,51 @@
 // -flto causes a switch to llvm-bc object files.
-// RUN: %clang -ccc-print-phases -c %s -flto 2> %t.log
-// RUN: grep '2: compiler, {1}, ir' %t.log
-// RUN: grep '3: backend, {2}, lto-bc' %t.log
+// RUN: %clang -ccc-print-phases -c %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s
+//
+// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir
+// CHECK-COMPILE-ACTIONS: 3: backend, {2}, lto-bc
 
-// RUN: %clang -ccc-print-phases %s -flto 2> %t.log
-// RUN: grep '0: input, ".*lto.c", c' %t.log
-// RUN: grep '1: preprocessor, {0}, cpp-output' %t.log
-// RUN: grep '2: compiler, {1}, ir' %t.log
-// RUN: grep '3: backend, {2}, lto-bc' %t.log
-// RUN: grep '4: linker, {3}, image' %t.log
+// RUN: %clang -ccc-print-phases %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s
+//
+// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}lto.c", c
+// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cpp-output
+// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir
+// CHECK-COMPILELINK-ACTIONS: 3: backend, {2}, lto-bc
+// CHECK-COMPILELINK-ACTIONS: 4: linker, {3}, image
 
 // llvm-bc and llvm-ll outputs need to match regular suffixes
 // (unfortunately).
-// RUN: %clang %s -flto -save-temps -### 2> %t.log
-// RUN: grep '"-o" ".*lto\.i" "-x" "c" ".*lto\.c"' %t.log
-// RUN: grep '"-o" ".*lto\.bc" .*".*lto\.i"' %t.log
-// RUN: grep '"-o" ".*lto\.o" .*".*lto\.bc"' %t.log
-// RUN: grep '".*a\.\(out\|exe\)" .*".*lto\.o"' %t.log
+// RUN: %clang %s -flto -save-temps -### 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-SUFFIXES < %t %s
+//
+// CHECK-COMPILELINK-SUFFIXES: "-o" "{{.*}}lto.i" "-x" "c" "{{.*}}lto.c"
+// CHECK-COMPILELINK-SUFFIXES: "-o" "{{.*}}lto.bc" {{.*}}"{{.*}}lto.i"
+// CHECK-COMPILELINK-SUFFIXES: "-o" "{{.*}}lto.o" {{.*}}"{{.*}}lto.bc"
+// CHECK-COMPILELINK-SUFFIXES: "{{.*}}a.{{(out|exe)}}" {{.*}}"{{.*}}lto.o"
 
-// RUN: %clang %s -flto -S -### 2> %t.log
-// RUN: grep '"-o" ".*lto\.s" "-x" "c" ".*lto\.c"' %t.log
+// RUN: %clang %s -flto -S -### 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-SUFFIXES < %t %s
+//
+// CHECK-COMPILE-SUFFIXES: "-o" "{{.*}}lto.s" "-x" "c" "{{.*}}lto.c"
 
 // RUN: not %clang %s -emit-llvm 2>&1 | FileCheck --check-prefix=LLVM-LINK %s
 // LLVM-LINK: -emit-llvm cannot be used when linking
+
+// -flto should cause link using gold plugin
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-LTO-ACTION < %t %s
+//
+// CHECK-LINK-LTO-ACTION: "-plugin" "{{.*}}/LLVMgold.so"
+
+// -flto=full should cause link using gold plugin
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s
+//
+// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}/LLVMgold.so"
+
+// Check that subsequent -fno-lto takes precedence
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -fno-lto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s
+//
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}/LLVMgold.so"
diff --git a/test/Driver/mingw-libgcc.c b/test/Driver/mingw-libgcc.c
new file mode 100644
index 000000000000..75a569644215
--- /dev/null
+++ b/test/Driver/mingw-libgcc.c
@@ -0,0 +1,25 @@
+// Test if mingw toolchain driver emits static linking (-lgcc -lgcc_eh) or dynamic linking (-lgcc_s -lgcc).
+// Verified with gcc version 5.1.0 (i686-posix-dwarf-rev0, Built by MinGW-W64 project).
+
+// gcc, static
+// RUN: %clang -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static-libgcc -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static -shared -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static-libgcc -shared -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+
+// gcc, dynamic
+// RUN: %clang -shared -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
+
+// g++, static
+// RUN: %clang -static --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static-libgcc --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static -shared --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+// RUN: %clang -static-libgcc -shared --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_STATIC %s
+
+// g++, dynamic
+// RUN: %clang --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
+// RUN: %clang -shared --driver-mode=g++ -v -target i686-pc-windows-gnu -### %s 2>&1 | FileCheck -check-prefix=CHECK_DYNAMIC %s
+
+// CHECK_STATIC: "-lgcc" "-lgcc_eh"
+// CHECK_DYNAMIC: "-lgcc_s" "-lgcc"
diff --git a/test/Driver/mingw-useld.c b/test/Driver/mingw-useld.c
new file mode 100644
index 000000000000..a0ab5a933864
--- /dev/null
+++ b/test/Driver/mingw-useld.c
@@ -0,0 +1,19 @@
+// RUN: %clang -### -target i686-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s 2>&1 | FileCheck -check-prefix=CHECK_LD_32 %s
+// CHECK_LD_32: ld{{(.exe)?}}"
+// CHECK_LD_32: "i386pe"
+// CHECK_LD_32-NOT: "-flavor" "gnu"
+
+// RUN: %clang -### -target i686-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_32 %s
+// CHECK_LLD_32-NOT: invalid linker name in argument
+// CHECK_LLD_32: lld{{(.exe)?}}" "-flavor" "gnu"
+// CHECK_LLD_32: "i386pe"
+
+// RUN: %clang -### -target x86_64-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_64 %s
+// CHECK_LLD_64-NOT: invalid linker name in argument
+// CHECK_LLD_64: lld{{(.exe)?}}" "-flavor" "gnu"
+// CHECK_LLD_64: "i386pep"
+
+// RUN: %clang -### -target arm-pc-windows-gnu --sysroot=%S/Inputs/mingw_clang_tree/mingw32 %s -fuse-ld=lld 2>&1 | FileCheck -check-prefix=CHECK_LLD_ARM %s
+// CHECK_LLD_ARM-NOT: invalid linker name in argument
+// CHECK_LLD_ARM: lld{{(.exe)?}}" "-flavor" "gnu"
+// CHECK_LLD_ARM: "thumb2pe"
diff --git a/test/Driver/mips-abi.c b/test/Driver/mips-abi.c
index f75632446e27..cede6850a023 100644
--- a/test/Driver/mips-abi.c
+++ b/test/Driver/mips-abi.c
@@ -99,6 +99,12 @@
 // MIPS-ARCH-32R2: "-target-abi" "o32"
 //
 // RUN: %clang -target mips-linux-gnu -### -c %s \
+// RUN:        -march=p5600 2>&1 \
+// RUN:   | FileCheck -check-prefix=MIPS-ARCH-P5600 %s
+// MIPS-ARCH-P5600: "-target-cpu" "p5600"
+// MIPS-ARCH-P5600: "-target-abi" "o32"
+//
+// RUN: %clang -target mips-linux-gnu -### -c %s \
 // RUN:        -march=mips64 2>&1 \
 // RUN:   | FileCheck -check-prefix=MIPS-ARCH-3264 %s
 // MIPS-ARCH-3264: "-target-cpu" "mips64"
diff --git a/test/Driver/mips-as.c b/test/Driver/mips-as.c
index 0da1a9e11c05..63fc64c3f74a 100644
--- a/test/Driver/mips-as.c
+++ b/test/Driver/mips-as.c
@@ -58,6 +58,11 @@
 // RUN:   | FileCheck -check-prefix=MIPS-32R2 %s
 // MIPS-32R2: as{{(.exe)?}}" "-march" "mips32r2" "-mabi" "32" "-mno-shared" "-call_nonpic" "-EB"
 //
+// RUN: %clang -target mips-linux-gnu -march=p5600 -### \
+// RUN:   -no-integrated-as -c %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=MIPS-P5600 %s
+// MIPS-P5600: as{{(.exe)?}}" "-march" "p5600" "-mabi" "32" "-mno-shared" "-call_nonpic" "-EB"
+//
 // RUN: %clang -target mips64-linux-gnu -march=octeon -### \
 // RUN:   -no-integrated-as -c %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=MIPS-OCTEON %s
diff --git a/test/Driver/mips-ias-Wa.s b/test/Driver/mips-ias-Wa.s
new file mode 100644
index 000000000000..233d062d6deb
--- /dev/null
+++ b/test/Driver/mips-ias-Wa.s
@@ -0,0 +1,49 @@
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s 2>&1 | \
+// RUN:   FileCheck -check-prefix=TRAP-DEFAULT %s
+// TRAP-DEFAULT: -cc1as
+// TRAP-DEFAULT-NOT: "-target-feature" "-use-tcc-in-div"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,--trap 2>&1 | \
+// RUN:   FileCheck -check-prefix=TRAP-ON %s
+// TRAP-ON: -cc1as
+// TRAP-ON: "-target-feature" "+use-tcc-in-div"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,--break 2>&1 | \
+// RUN:   FileCheck -check-prefix=TRAP-OFF %s
+// TRAP-OFF: -cc1as
+// TRAP-OFF: "-target-feature" "-use-tcc-in-div"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,--trap,--break 2>&1 | \
+// RUN:   FileCheck -check-prefix=TRAP-BOTH-TRAP-FIRST %s
+// TRAP-BOTH-TRAP-FIRST: -cc1as
+// TRAP-BOTH-TRAP-FIRST: "-target-feature" "+use-tcc-in-div" "-target-feature" "-use-tcc-in-div"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,--break,--trap 2>&1 | \
+// RUN:   FileCheck -check-prefix=TRAP-BOTH-BREAK-FIRST %s
+// TRAP-BOTH-BREAK-FIRST: -cc1as
+// TRAP-BOTH-BREAK-FIRST: "-target-feature" "-use-tcc-in-div" "-target-feature" "+use-tcc-in-div"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s 2>&1 | \
+// RUN:   FileCheck -check-prefix=MSOFT-FLOAT-DEFAULT %s
+// MSOFT-FLOAT-DEFAULT: -cc1as
+// MSOFT-FLOAT-DEFAULT-NOT: "-target-feature" "-soft-float"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,-msoft-float 2>&1 | \
+// RUN:   FileCheck -check-prefix=MSOFT-FLOAT-ON %s
+// MSOFT-FLOAT-ON: -cc1as
+// MSOFT-FLOAT-ON: "-target-feature" "+soft-float"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,-mhard-float 2>&1 | \
+// RUN:   FileCheck -check-prefix=MSOFT-FLOAT-OFF %s
+// MSOFT-FLOAT-OFF: -cc1as
+// MSOFT-FLOAT-OFF: "-target-feature" "-soft-float"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,-msoft-float,-mhard-float 2>&1 | \
+// RUN:   FileCheck -check-prefix=MSOFT-FLOAT-BOTH-MSOFT-FLOAT-FIRST %s
+// MSOFT-FLOAT-BOTH-MSOFT-FLOAT-FIRST: -cc1as
+// MSOFT-FLOAT-BOTH-MSOFT-FLOAT-FIRST: "-target-feature" "+soft-float" "-target-feature" "-soft-float"
+
+// RUN: %clang -target mips-linux-gnu -### -fintegrated-as -c %s -Wa,-mhard-float,-msoft-float 2>&1 | \
+// RUN:   FileCheck -check-prefix=MSOFT-FLOAT-BOTH-MHARD-FLOAT-FIRST %s
+// MSOFT-FLOAT-BOTH-MHARD-FLOAT-FIRST: -cc1as
+// MSOFT-FLOAT-BOTH-MHARD-FLOAT-FIRST: "-target-feature" "-soft-float" "-target-feature" "+soft-float"
diff --git a/test/Driver/mips-mti-linux.c b/test/Driver/mips-mti-linux.c
new file mode 100644
index 000000000000..e3560e204406
--- /dev/null
+++ b/test/Driver/mips-mti-linux.c
@@ -0,0 +1,43 @@
+// Check frontend and linker invocations on GPL-free MIPS toolchain.
+//
+// FIXME: Using --sysroot with this toolchain/triple isn't supported. We use
+//        it here to test that we are producing the correct paths/flags.
+//        Ideally, we'd like to have an --llvm-toolchain option similar to
+//        the --gcc-toolchain one.
+// REQUIRES: mips-registered-target
+
+// = Big-endian, mips32r2, hard float
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     --target=mips-mti-linux -mips32r2 -mhard-float \
+// RUN:     --sysroot=%S/Inputs/mips_mti_linux/sysroot \
+// RUN:   | FileCheck --check-prefix=CHECK-BE-HF-32R2 %s
+//
+// CHECK-BE-HF-32R2: "{{[^"]*}}clang{{[^"]*}}" {{.*}} "-triple" "mips-mti-linux"
+// CHECK-BE-HF-32R2-SAME: "-fuse-init-array" "-target-cpu" "mips32r2"
+// CHECK-BE-HF-32R2-SAME: "-isysroot" "{{.*}}mips_mti_linux/sysroot"
+// CHECK-BE-HF-32R2: "{{[^"]*}}lld{{[^"]*}}" "-flavor" "old-gnu" "-target" "mips-mti-linux"
+// CHECK-BE-HF-32R2-SAME: "--sysroot=[[SYSROOT:[^"]+]]" {{.*}} "-dynamic-linker" "/lib/ld-musl-mips.so.1"
+// CHECK-BE-HF-32R2-SAME: "[[SYSROOT]]/mips-r2-hard-musl/usr/lib{{/|\\\\}}crt1.o"
+// CHECK-BE-HF-32R2-SAME: "[[SYSROOT]]/mips-r2-hard-musl/usr/lib{{/|\\\\}}crti.o"
+// CHECK-BE-HF-32R2-SAME: "-L[[SYSROOT]]/mips-r2-hard-musl/usr/lib"
+// CHECK-BE-HF-32R2-SAME: "{{[^"]+}}/mips-r2-hard-musl{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.builtins-mips.a"
+// CHECK-BE-HF-32R2-SAME: "-lc"
+// CHECK-BE-HF-32R2-SAME: "[[SYSROOT]]/mips-r2-hard-musl/usr/lib{{/|\\\\}}crtn.o"
+
+// = Little-endian, mips32r2, hard float
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     --target=mips-mti-linux -mips32r2 -EL -mhard-float \
+// RUN:     --sysroot=%S/Inputs/mips_mti_linux/sysroot \
+// RUN:   | FileCheck --check-prefix=CHECK-LE-HF-32R2 %s
+//
+// CHECK-LE-HF-32R2: "{{[^"]*}}clang{{[^"]*}}" {{.*}} "-triple" "mipsel-mti-linux"
+// CHECK-LE-HF-32R2-SAME: "-fuse-init-array" "-target-cpu" "mips32r2"
+// CHECK-LE-HF-32R2-SAME: "-isysroot" "{{.*}}mips_mti_linux/sysroot"
+// CHECK-LE-HF-32R2: "{{[^"]*}}lld{{[^"]*}}" "-flavor" "old-gnu" "-target" "mipsel-mti-linux"
+// CHECK-LE-HF-32R2-SAME: "--sysroot=[[SYSROOT:[^"]+]]" {{.*}} "-dynamic-linker" "/lib/ld-musl-mipsel.so.1"
+// CHECK-LE-HF-32R2-SAME: "[[SYSROOT]]/mipsel-r2-hard-musl/usr/lib{{/|\\\\}}crt1.o"
+// CHECK-LE-HF-32R2-SAME: "[[SYSROOT]]/mipsel-r2-hard-musl/usr/lib{{/|\\\\}}crti.o"
+// CHECK-LE-HF-32R2-SAME: "-L[[SYSROOT]]/mipsel-r2-hard-musl/usr/lib"
+// CHECK-LE-HF-32R2-SAME: "{{[^"]+}}/mipsel-r2-hard-musl{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.builtins-mips.a"
+// CHECK-LE-HF-32R2-SAME: "-lc"
+// CHECK-LE-HF-32R2-SAME: "[[SYSROOT]]/mipsel-r2-hard-musl/usr/lib{{/|\\\\}}crtn.o"
diff --git a/test/Driver/modules.m b/test/Driver/modules.m
index 8a0c8ba4d88f..0f2d4d5bb0b4 100644
--- a/test/Driver/modules.m
+++ b/test/Driver/modules.m
@@ -37,3 +37,13 @@
 // CHECK-MODULE-MAP-FILES: "-fmodules"
 // CHECK-MODULE-MAP-FILES: "-fmodule-map-file=foo.map"
 // CHECK-MODULE-MAP-FILES: "-fmodule-map-file=bar.map"
+
+// RUN: %clang -fmodules -fmodule-file=foo.pcm -fmodule-file=bar.pcm -### %s 2>&1 | FileCheck -check-prefix=CHECK-MODULE-FILES %s
+// CHECK-MODULE-FILES: "-fmodules"
+// CHECK-MODULE-FILES: "-fmodule-file=foo.pcm"
+// CHECK-MODULE-FILES: "-fmodule-file=bar.pcm"
+
+// RUN: %clang -fno-modules -fmodule-file=foo.pcm -fmodule-file=bar.pcm -### %s 2>&1 | FileCheck -check-prefix=CHECK-NO-MODULE-FILES %s
+// CHECK-NO-MODULE-FILES-NOT: "-fmodules"
+// CHECK-NO-MODULE-FILES-NOT: "-fmodule-file=foo.pcm"
+// CHECK-NO-MODULE-FILES-NOT: "-fmodule-file=bar.pcm"
diff --git a/test/Driver/ms-bitfields.c b/test/Driver/ms-bitfields.c
new file mode 100644
index 000000000000..7bf9aaad2c96
--- /dev/null
+++ b/test/Driver/ms-bitfields.c
@@ -0,0 +1,8 @@
+// RUN: %clang -### %s 2>&1 | FileCheck %s -check-prefix=NO-MSBITFIELDS
+// RUN: %clang -### -mno-ms-bitfields -mms-bitfields %s 2>&1 | FileCheck %s -check-prefix=MSBITFIELDS
+// RUN: %clang -### -mms-bitfields -mno-ms-bitfields %s 2>&1 | FileCheck %s -check-prefix=NO-MSBITFIELDS
+
+// MSBITFIELDS: -mms-bitfields
+// NO-MSBITFIELDS-NOT: -mms-bitfields
+
+// REQUIRES: clang-driver
diff --git a/test/Driver/myriad-toolchain.c b/test/Driver/myriad-toolchain.c
new file mode 100644
index 000000000000..6c94cff6801c
--- /dev/null
+++ b/test/Driver/myriad-toolchain.c
@@ -0,0 +1,79 @@
+// RUN: %clang -no-canonical-prefixes -### -target sparc-myriad-rtems-elf %s \
+// RUN: --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=LINK_WITH_RTEMS
+// LINK_WITH_RTEMS: Inputs{{.*}}crti.o
+// LINK_WITH_RTEMS: Inputs{{.*}}crtbegin.o
+// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../..{{/|\\\\}}../sparc-myriad-elf/lib"
+// LINK_WITH_RTEMS: "-L{{.*}}Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2"
+// LINK_WITH_RTEMS: "--start-group" "-lc" "-lrtemscpu" "-lrtemsbsp" "--end-group" "-lgcc"
+// LINK_WITH_RTEMS: Inputs{{.*}}crtend.o
+// LINK_WITH_RTEMS: Inputs{{.*}}crtn.o
+
+// RUN: %clang -c -no-canonical-prefixes -### -target sparc-myriad-rtems-elf -x c++ %s \
+// RUN: --gcc-toolchain=%S/Inputs/basic_myriad_tree 2>&1 | FileCheck %s -check-prefix=COMPILE_CXX
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2"
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2/sparc-myriad-elf"
+// COMPILE_CXX: "-internal-isystem" "{{.*}}/Inputs/basic_myriad_tree/lib/gcc/sparc-myriad-elf/4.8.2/../../../../sparc-myriad-elf/include/c++/4.8.2/backward"
+
+// RUN: %clang -### -E -target sparc-myriad --sysroot=/yow %s 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=SLASH_INCLUDE
+// SLASH_INCLUDE: "-isysroot" "/yow" "-internal-isystem" "/yow/include"
+
+// RUN: %clang -### -E -target sparc-myriad --sysroot=/yow %s -nostdinc 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=NO_SLASH_INCLUDE
+// NO_SLASH_INCLUDE: "-isysroot" "/yow"
+// NO_SLASH_INCLUDE-NOT: "-internal-isystem" "/yow/include"
+
+// RUN: %clang -### -target what-myriad %s 2>&1 | FileCheck %s -check-prefix=BAD_ARCH
+// BAD_ARCH: the target architecture 'what' is not supported by the target 'myriad'
+
+// Ensure that '-target shave' picks a different compiler.
+// Also check that '-I' is turned into '-i:' for the assembler.
+
+// Note that since we don't know where movi tools are installed,
+// the driver may or may not find a full path to them.
+// That is, the 0th argument will be "/path/to/my/moviCompile"
+// or just "moviCompile" depending on whether moviCompile is found.
+// As such, we test only for a trailing quote in its rendering.
+// The same goes for "moviAsm".
+
+// RUN: %clang -target shave-myriad -c -### %s -isystem somewhere -Icommon -Wa,-yippee 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=MOVICOMPILE
+// MOVICOMPILE: moviCompile{{(.exe)?}}" "-S" "-fno-exceptions" "-mcpu=myriad2" "-DMYRIAD2" "-isystem" "somewhere" "-I" "common"
+// MOVICOMPILE: moviAsm{{(.exe)?}}" "-no6thSlotCompression" "-cv:myriad2" "-noSPrefixing" "-a"
+// MOVICOMPILE: "-yippee" "-i:somewhere" "-i:common" "-elf"
+
+// RUN: %clang -target shave-myriad -c -### %s -DEFINE_ME -UNDEFINE_ME 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=DEFINES
+// DEFINES: "-D" "EFINE_ME" "-U" "NDEFINE_ME"
+
+// RUN: %clang -target shave-myriad -c -### %s -Icommon -iquote quotepath -isystem syspath 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=INCLUDES
+// INCLUDES: "-iquote" "quotepath" "-isystem" "syspath"
+
+// RUN: %clang -target shave-myriad -c -### %s -g -fno-inline-functions \
+// RUN: -fno-inline-functions-called-once -Os -Wall -MF dep.d \
+// RUN: -ffunction-sections 2>&1 | FileCheck %s -check-prefix=PASSTHRU_OPTIONS
+// PASSTHRU_OPTIONS: "-g" "-fno-inline-functions" "-fno-inline-functions-called-once"
+// PASSTHRU_OPTIONS: "-Os" "-Wall" "-MF" "dep.d" "-ffunction-sections"
+
+// RUN: %clang -target shave-myriad -c %s -o foo.o -### -MD -MF dep.d 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=MDMF
+// MDMF: "-S" "-fno-exceptions" "-mcpu=myriad2" "-DMYRIAD2" "-MD" "-MF" "dep.d" "-MT" "foo.o"
+
+// RUN: %clang -target shave-myriad -std=gnu++11 -S %s -o foo.o -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=STDEQ
+// STDEQ: "-S" "-fno-exceptions" "-mcpu=myriad2" "-DMYRIAD2" "-std=gnu++11"
+
+// RUN: %clang -target shave-myriad -E -Ifoo %s -o foo.i -### 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=PREPROCESS
+// PREPROCESS: "-E" "-mcpu=myriad2" "-DMYRIAD2" "-I" "foo"
+
+// RUN: %clang -target sparc-myriad -### --driver-mode=g++ %s 2>&1 | FileCheck %s --check-prefix=STDLIBCXX
+// STDLIBCXX: "-lstdc++" "-lc" "-lgcc"
+
+// RUN: %clang -target sparc-myriad -### -nostdlib %s 2>&1 | FileCheck %s --check-prefix=NOSTDLIB
+// NOSTDLIB-NOT: crtbegin.o
+// NOSTDLIB-NOT: "-lc"
+
+// RUN: %clang -### -c -g %s -target sparc-myriad 2>&1 | FileCheck -check-prefix=G_SPARC %s
+// G_SPARC: "-debug-info-kind=limited" "-dwarf-version=2"
diff --git a/test/Driver/nacl-direct.c b/test/Driver/nacl-direct.c
index 5fe857622320..f71e14f996ee 100644
--- a/test/Driver/nacl-direct.c
+++ b/test/Driver/nacl-direct.c
@@ -9,7 +9,7 @@
 // CHECK-I686: "-target-cpu" "pentium4"
 // CHECK-I686: "-resource-dir" "foo"
 // CHECK-I686: "-internal-isystem" "foo{{/|\\\\}}include"
-// CHECK-I686: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}usr{{/|\\\\}}include"
+// CHECK-I686: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}i686-nacl{{/|\\\\}}usr{{/|\\\\}}include"
 // CHECK-I686: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}include"
 // CHECK-I686: as{{(.exe)?}}" "--32"
 // CHECK-I686: ld{{(.exe)?}}"
@@ -17,7 +17,7 @@
 // CHECK-I686: "-m" "elf_i386_nacl"
 // CHECK-I686: "-static"
 // CHECK-I686: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}lib32"
-// CHECK-I686: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}usr{{/|\\\\}}lib32"
+// CHECK-I686: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}i686-nacl{{/|\\\\}}usr{{/|\\\\}}lib"
 // CHECK-I686: "-Lfoo{{/|\\\\}}lib{{/|\\\\}}i686-nacl"
 // CHECK-I686-NOT: -lpthread
 //
@@ -118,7 +118,7 @@
 // CHECK-I686-CXX: "-resource-dir" "foo"
 // CHECK-I686-CXX: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}include{{/|\\\\}}c++{{/|\\\\}}v1"
 // CHECK-I686-CXX: "-internal-isystem" "foo{{/|\\\\}}include"
-// CHECK-I686-CXX: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}usr{{/|\\\\}}include"
+// CHECK-I686-CXX: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}i686-nacl{{/|\\\\}}usr{{/|\\\\}}include"
 // CHECK-I686-CXX: "-internal-isystem" "{{.*}}{{/|\\\\}}..{{/|\\\\}}x86_64-nacl{{/|\\\\}}include"
 // CHECK-I686-CXX: "-lpthread"
 
diff --git a/test/Driver/netbsd.c b/test/Driver/netbsd.c
index 612935d9819e..ffaab36390c0 100644
--- a/test/Driver/netbsd.c
+++ b/test/Driver/netbsd.c
@@ -206,7 +206,7 @@
 // ARM-6: "{{.*}}/usr/lib{{/|\\\\}}crtend.o" "{{.*}}/usr/lib{{/|\\\\}}crtn.o"
 
 // SPARC: clang{{.*}}" "-cc1" "-triple" "sparc--netbsd"
-// SPARC: as{{.*}}" "-32" "-o"
+// SPARC: as{{.*}}" "-32" "-Av8" "-o"
 // SPARC: ld{{.*}}" "--eh-frame-hdr" "-dynamic-linker" "/libexec/ld.elf_so"
 // SPARC: "-m" "elf32_sparc"
 // SPARC: "-o" "a.out" "{{.*}}/usr/lib{{/|\\\\}}crt0.o"
diff --git a/test/Driver/no-canonical-prefixes.c b/test/Driver/no-canonical-prefixes.c
index 1aef9f40a9a7..7bc76be22d75 100644
--- a/test/Driver/no-canonical-prefixes.c
+++ b/test/Driver/no-canonical-prefixes.c
@@ -1,10 +1,17 @@
+// Due to ln -sf:
 // REQUIRES: shell
-// RUN: mkdir -p %t
-// RUN: cd %t
+// RUN: mkdir -p %t.real
+// RUN: cd %t.real
 // RUN: ln -sf %clang test-clang
-// RUN: ./test-clang -v -S %s 2>&1 | FileCheck %s
-// RUN: ./test-clang -v -S %s -no-canonical-prefixes 2>&1 | FileCheck --check-prefix=NCP %s
-
-
-// CHECK: /clang{{.*}}" -cc1
-// NCP: test-clang" -cc1
+// RUN: cd ..
+// RUN: ln -sf %t.real %t.fake
+// RUN: cd %t.fake
+// RUN: ./test-clang -v -S %s 2>&1 | FileCheck --check-prefix=CANONICAL %s
+// RUN: ./test-clang -v -S %s -no-canonical-prefixes 2>&1 | FileCheck --check-prefix=NON-CANONICAL %s
+//
+// FIXME: This should really be '.real'.
+// CANONICAL: InstalledDir: {{.*}}.fake
+// CANONICAL: {{[/|\\]*}}clang{{.*}}" -cc1
+//
+// NON-CANONICAL: InstalledDir: .{{$}}
+// NON-CANONICAL: test-clang" -cc1
diff --git a/test/Driver/no-integrated-as.s b/test/Driver/no-integrated-as.s
new file mode 100644
index 000000000000..cd6f5e2867a4
--- /dev/null
+++ b/test/Driver/no-integrated-as.s
@@ -0,0 +1,8 @@
+; RUN: %clang -### -no-integrated-as -c %s 2>&1 | FileCheck %s -check-prefix IAS
+; Windows doesn't support no-integrated-as
+; XFAIL: win32,win64
+;
+; Make sure the current file's filename appears in the output.
+; We can't generically match on the assembler name, so we just make sure
+; the filename is in the output.
+; IAS: no-integrated-as.s
diff --git a/test/Driver/nodefaultlib.c b/test/Driver/nodefaultlib.c
index f9462fd27a3d..08bcea56fadf 100644
--- a/test/Driver/nodefaultlib.c
+++ b/test/Driver/nodefaultlib.c
@@ -1,8 +1,10 @@
-// RUN: %clang -target i686-pc-linux-gnu -### -nodefaultlibs %s 2> %t
-// RUN: FileCheck < %t %s
-//
-// CHECK-NOT: start-group
-// CHECK-NOT: "-lgcc"
-// CHECK-NOT: "-lc"
-// CHECK: crtbegin
-// CHECK: crtend
+// RUN: %clang -target i686-pc-linux-gnu -### -nodefaultlibs %s 2>&1 | FileCheck -check-prefix=TEST1 %s
+// TEST1-NOT: start-group
+// TEST1-NOT: "-lgcc"
+// TEST1-NOT: "-lc"
+// TEST1: crtbegin
+// TEST1: crtend
+
+// RUN: %clang -target i686-pc-linux-gnu -stdlib=libc++ -nodefaultlibs -lstdc++ -### %s 2>&1 | FileCheck -check-prefix=TEST2 %s
+// TEST2-NOT: "-lc++"
+// TEST2: "-lstdc++"
diff --git a/test/Driver/nostdlib.c b/test/Driver/nostdlib.c
index e9ada3172232..47c6f8bacd46 100644
--- a/test/Driver/nostdlib.c
+++ b/test/Driver/nostdlib.c
@@ -2,3 +2,26 @@
 // RUN: FileCheck < %t %s
 //
 // CHECK-NOT: start-group
+
+// Most of the toolchains would check for -nostartfiles and -nostdlib
+// in a short-circuiting boolean expression, so if both of the preceding
+// options were present, the second would warn about being unused.
+// RUN: %clang -### -nostartfiles -nostdlib -target i386-apple-darwin %s \
+// RUN:   2>&1 | FileCheck %s -check-prefix=ARGSCLAIMED
+// ARGSCLAIMED-NOT: warning:
+
+// In the presence of -nostdlib, the standard libraries should not be
+// passed down to link line
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target i686-pc-linux-gnu -nostdlib --rtlib=compiler-rt \
+// RUN:     -resource-dir=%S/Inputs/resource_dir -lclang_rt.builtins-i686 \
+// RUN:   | FileCheck --check-prefix=CHECK-LINUX-NOSTDLIB %s
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target i686-pc-linux-gnu --rtlib=compiler-rt -nostdlib \
+// RUN:     -resource-dir=%S/Inputs/resource_dir -lclang_rt.builtins-i686 \
+// RUN:   | FileCheck --check-prefix=CHECK-LINUX-NOSTDLIB %s
+//
+// CHECK-LINUX-NOSTDLIB: warning: argument unused during compilation: '--rtlib=compiler-rt'
+// CHECK-LINUX-NOSTDLIB: "{{(.*[^.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-LINUX-NOSTDLIB-NOT: "{{.*}}/Inputs/resource_dir{{/|\\\\}}lib{{/|\\\\}}linux{{/|\\\\}}libclang_rt.builtins-i686.a"
diff --git a/test/Driver/objc-weak.m b/test/Driver/objc-weak.m
new file mode 100644
index 000000000000..ff60759f2c9c
--- /dev/null
+++ b/test/Driver/objc-weak.m
@@ -0,0 +1,27 @@
+// Check miscellaneous Objective-C options.
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK
+// ARC-WEAK: -fobjc-arc
+// ARC-WEAK: -fobjc-weak
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-arc -fno-objc-weak 2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak -fno-objc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-NO-WEAK
+// ARC-NO-WEAK: -fobjc-arc
+// ARC-NO-WEAK: -fno-objc-weak
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fobjc-arc -fobjc-weak 2>&1 | FileCheck %s --check-prefix ARC-WEAK-UNSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fno-objc-weak -fobjc-weak -fobjc-arc  2>&1 | FileCheck %s --check-prefix ARC-WEAK-UNSUPPORTED
+// ARC-WEAK-UNSUPPORTED: error: -fobjc-weak is not supported on the current deployment target
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK
+// MRC-WEAK: -fobjc-weak
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.7 -S -### %s -fobjc-weak -fno-objc-weak 2>&1 | FileCheck %s --check-prefix MRC-NO-WEAK
+// MRC-NO-WEAK: -fno-objc-weak
+
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-UNSUPPORTED
+// RUN: %clang -target x86_64-apple-macosx -mmacosx-version-min=10.5 -S -### %s -fno-objc-weak -fobjc-weak 2>&1 | FileCheck %s --check-prefix MRC-WEAK-UNSUPPORTED
+// MRC-WEAK-UNSUPPORTED: error: -fobjc-weak is not supported on the current deployment target
diff --git a/test/Driver/openbsd.c b/test/Driver/openbsd.c
index 74fef5092a96..95b9e6ad4f36 100644
--- a/test/Driver/openbsd.c
+++ b/test/Driver/openbsd.c
@@ -61,8 +61,8 @@
 // RUN:   | FileCheck -check-prefix=CHECK-MIPS64EL-PIC %s
 // CHECK-AMD64-M32: as{{.*}}" "--32"
 // CHECK-POWERPC: as{{.*}}" "-mppc" "-many"
-// CHECK-SPARC: as{{.*}}" "-32"
-// CHECK-SPARC64: as{{.*}}" "-64" "-Av9a"
+// CHECK-SPARC: as{{.*}}" "-32" "-Av8"
+// CHECK-SPARC64: as{{.*}}" "-64" "-Av9"
 // CHECK-MIPS64: as{{.*}}" "-mabi" "64" "-EB"
 // CHECK-MIPS64-PIC: as{{.*}}" "-mabi" "64" "-EB" "-KPIC"
 // CHECK-MIPS64EL: as{{.*}}" "-mabi" "64" "-EL"
diff --git a/test/Driver/pic.c b/test/Driver/pic.c
index 120e66a92ac4..06b4204e60da 100644
--- a/test/Driver/pic.c
+++ b/test/Driver/pic.c
@@ -11,6 +11,7 @@
 // CHECK-PIC2: "-mrelocation-model" "pic"
 // CHECK-PIC2: "-pic-level" "2"
 //
+// CHECK-STATIC: "-static"
 // CHECK-NO-STATIC-NOT: "-static"
 //
 // CHECK-PIE1: "-mrelocation-model" "pic"
@@ -135,8 +136,15 @@
 // Disregard any of the PIC-specific flags if we have a trump-card flag.
 // RUN: %clang -c %s -target i386-unknown-unknown -mkernel -fPIC -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+
+// The -static argument *doesn't* override PIC: -static only affects
+// linking, and -fPIC only affects code generation.
 // RUN: %clang -c %s -target i386-unknown-unknown -static -fPIC -### 2>&1 \
-// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang %s -target i386-linux-gnu -static -fPIC -### \
+// RUN: --gcc-toolchain="" \
+// RUN: --sysroot=%S/Inputs/basic_linux_tree 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-STATIC
 //
 // On Linux, disregard -pie if we have -shared.
 // RUN: %clang %s -target i386-unknown-linux -shared -pie -### 2>&1 \
@@ -204,6 +212,8 @@
 // RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
 // RUN: %clang -x assembler -c %s -target arm64-apple-ios -mkernel -miphoneos-version-min=7.0.0 -no-integrated-as -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-STATIC
+// RUN: %clang -c %s -target armv7k-apple-watchos -fapple-kext -mwatchos-version-min=1.0.0 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
 // RUN: %clang -c %s -target armv7-apple-ios -fapple-kext -miphoneos-version-min=5.0.0 -### 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
 // RUN: %clang -c %s -target armv7-apple-ios -fapple-kext -miphoneos-version-min=6.0.0 -static -### 2>&1 \
diff --git a/test/Driver/ppc-features.cpp b/test/Driver/ppc-features.cpp
index 947183c61f70..34c1ce5cc4db 100644
--- a/test/Driver/ppc-features.cpp
+++ b/test/Driver/ppc-features.cpp
@@ -12,6 +12,50 @@
 // RUN: not %clang -target mips64-linux-gnu -faltivec -fsyntax-only %s 2>&1 | FileCheck %s
 // RUN: not %clang -target sparc-unknown-solaris -faltivec -fsyntax-only %s 2>&1 | FileCheck %s
 
+// check -msoft-float option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -msoft-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-SOFTFLOAT %s
+// CHECK-SOFTFLOAT: "-target-feature" "+soft-float"
+
+// check -mfloat-abi=soft option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -mfloat-abi=soft -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-FLOATABISOFT %s
+// CHECK-FLOATABISOFT: "-target-feature" "+soft-float"
+
+// check -mhard-float option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -mhard-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-HARDFLOAT %s
+// CHECK-HARDFLOAT-NOT: "-target-feature" "+soft-float"
+
+// check -mfloat-abi=hard option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -mfloat-abi=hard -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-FLOATABIHARD %s
+// CHECK-FLOATABIHARD-NOT: "-target-feature" "+soft-float"
+
+// check combine -mhard-float -msoft-float option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -mhard-float -msoft-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-HARDSOFT %s
+// CHECK-HARDSOFT: "-target-feature" "+soft-float"
+
+// check combine -msoft-float -mhard-float option for ppc32
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -msoft-float -mhard-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-SOFTHARD %s
+// CHECK-SOFTHARD-NOT: "-target-feature" "+soft-float"
+
+// check -mfloat-abi=x option
+// RUN: %clang -target powerpc-unknown-linux-gnu %s -mfloat-abi=x -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-ERRMSG %s
+// CHECK-ERRMSG: error: invalid float ABI '-mfloat-abi=x'
+
+// check -msoft-float option for ppc64
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -msoft-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-SOFTFLOAT64 %s
+// CHECK-SOFTFLOAT64: error: invalid float ABI 'soft float is not supported for ppc64'
+
+// check -mfloat-abi=soft option for ppc64
+// RUN: %clang -target powerpc64-unknown-linux-gnu %s -mfloat-abi=soft -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-FLOATABISOFT64 %s
+// CHECK-FLOATABISOFT64: error: invalid float ABI 'soft float is not supported for ppc64'
+
+// check -msoft-float option for ppc64
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -msoft-float -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-SOFTFLOAT64le %s
+// CHECK-SOFTFLOAT64le: error: invalid float ABI 'soft float is not supported for ppc64'
+
+// check -mfloat-abi=soft option for ppc64
+// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -mfloat-abi=soft -### -o %t.o 2>&1 | FileCheck --check-prefix=CHECK-FLOATABISOFT64le %s
+// CHECK-FLOATABISOFT64le: error: invalid float ABI 'soft float is not supported for ppc64'
+
 // CHECK: invalid argument '-faltivec' only allowed with 'ppc/ppc64/ppc64le'
 
 // Check that -fno-altivec and -mno-altivec correctly disable the altivec
diff --git a/test/Driver/ps4-header-search.c b/test/Driver/ps4-header-search.c
new file mode 100644
index 000000000000..15e093f60e67
--- /dev/null
+++ b/test/Driver/ps4-header-search.c
@@ -0,0 +1,10 @@
+// REQUIRES: x86-registered-target
+
+// RUN: env SCE_PS4_SDK_DIR=%S/Inputs/scei-ps4_tree %clang -target x86_64-scei-ps4 -E -v %s 2>&1 | FileCheck %s --check-prefix=ENVPS4
+// ENVPS4: Inputs/scei-ps4_tree/target/include{{$}}
+// ENVPS4: Inputs/scei-ps4_tree/target/include_common{{$}}
+
+// RUN: %clang -isysroot %S/Inputs/scei-ps4_tree -target x86_64-scei-ps4 -E -v %s 2>&1 | FileCheck %s --check-prefix=SYSROOTPS4
+// SYSROOTPS4: "{{[^"]*}}clang{{[^"]*}}"
+// SYSROOTPS4: Inputs/scei-ps4_tree/target/include{{$}}
+// SYSROOTPS4: Inputs/scei-ps4_tree/target/include_common{{$}}
diff --git a/test/Driver/ps4-linker-non-win.c b/test/Driver/ps4-linker-non-win.c
new file mode 100644
index 000000000000..1fce6d6077f1
--- /dev/null
+++ b/test/Driver/ps4-linker-non-win.c
@@ -0,0 +1,21 @@
+// UNSUPPORTED: system-windows
+// REQUIRES: x86-registered-target
+
+// RUN: mkdir -p %T/Output
+// RUN: rm -f %T/Output/ps4-ld
+// RUN: touch %T/Output/ps4-ld
+// RUN: chmod +x %T/Output/ps4-ld
+
+// RUN: env "PATH=%T/Output:%PATH%" %clang -### -target x86_64-scei-ps4  %s -fuse-ld=gold 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+// RUN: env "PATH=%T/Output:%PATH%" %clang -### -target x86_64-scei-ps4  %s -shared 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+
+// RUN: env "PATH=%T/Output:%PATH%" %clang -### -target x86_64-scei-ps4  %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+// RUN: env "PATH=%T/Output:%PATH%" %clang -### -target x86_64-scei-ps4  %s -fuse-ld=ps4 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+// RUN: env "PATH=%T/Output:%PATH%" %clang -### -target x86_64-scei-ps4  %s -shared \
+// RUN:     -fuse-ld=ps4 2>&1 | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+
+// CHECK-PS4-LINKER: Output/ps4-ld
diff --git a/test/Driver/ps4-linker-win.c b/test/Driver/ps4-linker-win.c
new file mode 100644
index 000000000000..e42fc963dcee
--- /dev/null
+++ b/test/Driver/ps4-linker-win.c
@@ -0,0 +1,27 @@
+// The full path to the gold linker was not found on Windows because the
+// driver fails to add an .exe extension to the name.
+// We check that gold linker's full name (with an extension) is specified
+// on the command line if -fuse-ld=gold, or -shared with no -fuse-ld option
+// are passed. Otherwise, we check that the PS4's linker's full name is
+// specified.
+
+// REQUIRES: system-windows, x86-registered-target
+
+// RUN: touch %T/ps4-ld.exe
+// RUN: touch %T/ps4-ld.gold.exe
+
+// RUN: env "PATH=%T;%PATH%" %clang -target x86_64-scei-ps4  %s -fuse-ld=gold -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-GOLD %s
+// RUN: env "PATH=%T;%PATH%" %clang -target x86_64-scei-ps4  %s -shared -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-GOLD %s
+
+// RUN: env "PATH=%T;%PATH%" %clang -target x86_64-scei-ps4  %s -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+// RUN: env "PATH=%T;%PATH%" %clang -target x86_64-scei-ps4  %s -fuse-ld=ps4 -### 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+// RUN: env "PATH=%T;%PATH%" %clang -target x86_64-scei-ps4  %s -shared \
+// RUN:     -fuse-ld=ps4 -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-LINKER %s
+
+// FIXME: "Output\\" is hardcoded part of %T.
+// CHECK-PS4-GOLD: Output\\ps4-ld.gold.exe"
+// CHECK-PS4-LINKER: Output\\ps4-ld.exe"
diff --git a/test/Driver/ps4-pic.c b/test/Driver/ps4-pic.c
new file mode 100644
index 000000000000..0cf9ad5f1992
--- /dev/null
+++ b/test/Driver/ps4-pic.c
@@ -0,0 +1,106 @@
+// REQUIRES: x86-registered-target
+
+// Test the driver's control over the PIC behavior for PS4 compiler.
+// These consist of tests of the relocation model flags and the
+// pic level flags passed to CC1.
+//
+// CHECK-NO-PIC: "-mrelocation-model" "static"
+// CHECK-NO-PIC-NOT: "-pic-level"
+// CHECK-NO-PIC-NOT: "-pie-level"
+//
+// CHECK-DYNAMIC-NO-PIC2: unsupported option '-mdynamic-no-pic'
+// CHECK-DYNAMIC-NO-PIC2: "-mrelocation-model" "dynamic-no-pic"
+//
+// CHECK-PIC2: "-mrelocation-model" "pic"
+// CHECK-PIC2: "-pic-level" "2"
+//
+// CHECK-PIE2: "-mrelocation-model" "pic"
+// CHECK-PIE2: "-pie-level" "2"
+//
+// CHECK-NOPIC-IGNORED: using '-fPIC'
+// CHECK-NOPIC-IGNORED: "-mrelocation-model" "pic"
+// CHECK-NOPIC-IGNORED: "-pic-level" "2"
+//
+// CHECK-DIAG-PIC: option '-fno-PIC' was ignored by the PS4 toolchain, using '-fPIC'
+// CHECK-DIAG-PIE: option '-fno-PIE' was ignored by the PS4 toolchain, using '-fPIC'
+// CHECK-DIAG-pic: option '-fno-pic' was ignored by the PS4 toolchain, using '-fPIC'
+// CHECK-DIAG-pie: option '-fno-pie' was ignored by the PS4 toolchain, using '-fPIC'
+//
+// CHECK-STATIC-ERR: unsupported option '-static' for target 'PS4'
+
+// RUN: %clang -c %s -target x86_64-scei-ps4 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIC -fno-PIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -fno-PIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIC -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpie -fno-pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIE -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpie -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIE -fno-pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpie -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -fno-pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -fPIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fPIC -fpic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpic -fPIE -fpie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fpie -fPIC -fPIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIE2
+//
+// Defaults change for PS4.
+// RUN: %clang -c %s -target x86_64-scei-ps4 -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-PIC2
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-PIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NOPIC-IGNORED
+//
+// Disregard any of the PIC-specific flags if we have a trump-card flag.
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mkernel -fPIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mdynamic-no-pic -fPIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-DYNAMIC-NO-PIC2
+//
+// -static not supported at all.
+// RUN: %clang -c %s -target x86_64-scei-ps4 -static -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-STATIC-ERR
+//
+// -fno-PIC etc. is obeyed if -mcmodel=kernel is also present.
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mcmodel=kernel -fno-PIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mcmodel=kernel -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mcmodel=kernel -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+// RUN: %clang -c %s -target x86_64-scei-ps4 -mcmodel=kernel -fno-pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-NO-PIC
+//
+// Verify that we reflect the option the user specified, when we ignore it.
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-PIC -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-DIAG-PIC
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-PIE -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-DIAG-PIE
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-pic -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-DIAG-pic
+// RUN: %clang -c %s -target x86_64-scei-ps4 -fno-pie -### 2>&1 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-DIAG-pie
diff --git a/test/Driver/ps4-runtime-flags.c b/test/Driver/ps4-runtime-flags.c
new file mode 100644
index 000000000000..315976d4e228
--- /dev/null
+++ b/test/Driver/ps4-runtime-flags.c
@@ -0,0 +1,19 @@
+// REQUIRES: x86-registered-target
+//
+// Test the profile runtime library to be linked for PS4 compiler.
+// Check PS4 runtime flag --dependent-lib which does not append the default library search path.
+//
+// RUN: %clang -target x86_64-scei-ps4 --coverage %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-arcs %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fno-profile-arcs -fprofile-arcs %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fno-profile-arcs %s -### 2>&1 | FileCheck -check-prefix=CHECK-PS4-NO-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-arcs -fno-profile-arcs %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-NO-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-generate %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fno-profile-generate %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-NO-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-generate=dir %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-instr-generate %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fno-profile-instr-generate %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-NO-PROFILE %s
+// RUN: %clang -target x86_64-scei-ps4 -fprofile-instr-generate=somefile.profraw %s -### 2>&1 | FileCheck --check-prefix=CHECK-PS4-PROFILE %s
+//
+// CHECK-PS4-PROFILE: "--dependent-lib=libclang_rt.profile-x86_64.a"
+// CHECK-PS4-NO-PROFILE-NOT: "--dependent-lib=libclang_rt.profile-x86_64.a"
diff --git a/test/Driver/ps4-sdk-root.c b/test/Driver/ps4-sdk-root.c
new file mode 100644
index 000000000000..f40a963ac9e9
--- /dev/null
+++ b/test/Driver/ps4-sdk-root.c
@@ -0,0 +1,48 @@
+// REQUIRES: x86-registered-target
+
+// Check that ps4-clang doesn't report a warning message when locating
+// system header files (either by looking at the value of SCE_PS4_SDK_DIR
+// or relative to the location of the compiler driver), if "-nostdinc",
+// "--sysroot" or "-isysroot" option is specified on the command line.
+// Otherwise, check that ps4-clang reports a warning.
+
+// Check that clang doesn't report a warning message when locating
+// system libraries (either by looking at the value of SCE_PS4_SDK_DIR
+// or relative to the location of the compiler driver), if "-c", "-S", "-E",
+// "--sysroot", "-nostdlib" or "-nodefaultlibs" option is specified on
+// the command line.
+// Otherwise, check that ps4-clang reports a warning.
+
+// setting up SCE_PS4_SDK_DIR to existing location, which is not a PS4 SDK.
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=WARN-SYS-LIBS -check-prefix=NO-WARN %s
+
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -c -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -S -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -E -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=WARN-SYS-LIBS -check-prefix=NO-WARN %s
+
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -c -nostdinc -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -S -nostdinc -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -E -nostdinc -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast -nostdinc -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -c --sysroot=foo/ -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -S --sysroot=foo/ -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -E --sysroot=foo/ -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast --sysroot=foo/ -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=NO-WARN %s
+
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -c -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -S -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -E -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -emit-ast -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### --sysroot=foo/ -isysroot foo -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-ISYSROOT -check-prefix=NO-WARN %s
+
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nostdlib -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+// RUN: env SCE_PS4_SDK_DIR=.. %clang -Winvalid-or-nonexistent-directory -### -nodefaultlibs -target x86_64-scei-ps4 %s 2>&1 | FileCheck -check-prefix=WARN-SYS-HEADERS -check-prefix=NO-WARN %s
+
+// NO-WARN-NOT: {{warning:|error:}}
+// WARN-SYS-HEADERS: warning: unable to find PS4 system headers directory
+// WARN-ISYSROOT: warning: no such sysroot directory: 'foo'
+// WARN-SYS-LIBS: warning: unable to find PS4 system libraries directory
+// NO-WARN-NOT: {{warning:|error:}}
diff --git a/test/Driver/rewrite-legacy-objc.m b/test/Driver/rewrite-legacy-objc.m
index b0b78d00817a..6a2b44b8fb9b 100644
--- a/test/Driver/rewrite-legacy-objc.m
+++ b/test/Driver/rewrite-legacy-objc.m
@@ -3,11 +3,11 @@
 // TEST0: clang{{.*}}" "-cc1"
 // TEST0: "-rewrite-objc"
 // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead.
-// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
+// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
 // TEST0: rewrite-legacy-objc.m"
 // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.9.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST1 %s
 // RUN: %clang -no-canonical-prefixes -target i386-apple-macosx10.6.0 -rewrite-legacy-objc %s -o - -### 2>&1 | \
 // RUN:   FileCheck -check-prefix=TEST2 %s
-// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
-// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
+// TEST1: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fobjc-subscripting-legacy-runtime" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
+// TEST2: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fobjc-runtime=macosx-fragile" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
diff --git a/test/Driver/rewrite-objc.m b/test/Driver/rewrite-objc.m
index ba5f835ed8f5..ca1e84042c55 100644
--- a/test/Driver/rewrite-objc.m
+++ b/test/Driver/rewrite-objc.m
@@ -3,4 +3,4 @@
 // TEST0: clang{{.*}}" "-cc1"
 // TEST0: "-rewrite-objc"
 // FIXME: CHECK-NOT is broken somehow, it doesn't work here. Check adjacency instead.
-// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-mstackrealign" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
+// TEST0: "-fmessage-length" "0" "-stack-protector" "1" "-fblocks" "-fobjc-runtime=macosx" "-fencode-extended-block-signature" "-fno-objc-infer-related-result-type" "-fobjc-exceptions" "-fexceptions" "-fmax-type-align=16" "-fdiagnostics-show-option"
diff --git a/test/Driver/rtti-options.cpp b/test/Driver/rtti-options.cpp
index 50354e5f019c..bb2958d845b6 100644
--- a/test/Driver/rtti-options.cpp
+++ b/test/Driver/rtti-options.cpp
@@ -21,6 +21,10 @@
 // RUN: %clang -### -c -target x86_64-unknown-linux -fsanitize=vptr -fno-rtti %s 2>&1 | FileCheck -check-prefix=CHECK-SAN-ERROR %s
 // RUN: %clang -### -c -target x86_64-unknown-linux -fsanitize=undefined %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s
 // RUN: %clang -### -c -target x86_64-unknown-linux -fsanitize=undefined -frtti %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s
+// RUN: %clang -### -c -target x86_64-scei-ps4 -fsanitize=vptr %s 2>&1 | FileCheck -check-prefix=CHECK-SAN-WARN %s
+// RUN: %clang -### -c -target x86_64-scei-ps4 -fsanitize=vptr -frtti %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s
+// RUN: %clang -### -c -target x86_64-scei-ps4 -fsanitize=vptr -fno-rtti %s 2>&1 | FileCheck -check-prefix=CHECK-SAN-ERROR %s
+// RUN: %clang -### -c -target x86_64-scei-ps4 -fsanitize=undefined -frtti %s 2>&1 | FileCheck -check-prefix=CHECK-OK %s
 
 // Exceptions + no/default rtti
 // RUN: %clang -### -c -target x86_64-scei-ps4 -fcxx-exceptions -fno-rtti %s 2>&1 | FileCheck -check-prefix=CHECK-EXC-ERROR-CXX %s
@@ -46,6 +50,7 @@
 // RUN: %clang -### -c -target x86_64-unknown-unknown %s 2>&1 | FileCheck -check-prefix=CHECK-RTTI %s
 
 // CHECK-UNUSED: warning: argument unused during compilation: '-fcxx-exceptions'
+// CHECK-SAN-WARN: implicitly disabling vptr sanitizer because rtti wasn't enabled
 // CHECK-SAN-ERROR: invalid argument '-fsanitize=vptr' not allowed with '-fno-rtti'
 // CHECK-EXC-WARN: implicitly enabling rtti for exception handling
 // CHECK-EXC-ERROR: invalid argument '-fno-rtti' not allowed with '-fexceptions'
diff --git a/test/Driver/sanitizer-ld.c b/test/Driver/sanitizer-ld.c
index 5fef8170e31e..0e9c596fa259 100644
--- a/test/Driver/sanitizer-ld.c
+++ b/test/Driver/sanitizer-ld.c
@@ -291,6 +291,40 @@
 // CHECK-LSAN-ASAN-LINUX: libclang_rt.asan-x86_64
 // CHECK-LSAN-ASAN-LINUX-NOT: libclang_rt.lsan
 
+// CFI by itself does not link runtime libraries.
+// RUN: %clang -fsanitize=cfi %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-unknown-linux \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-CFI-LINUX %s
+// CHECK-CFI-LINUX: "{{.*}}ld{{(.exe)?}}"
+// CHECK-CFI-LINUX-NOT: libclang_rt.
+
+// CFI with diagnostics links the UBSan runtime.
+// RUN: %clang -fsanitize=cfi -fno-sanitize-trap=cfi -fsanitize-recover=cfi \
+// RUN:     %s -### -o %t.o 2>&1\
+// RUN:     -target x86_64-unknown-linux \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-CFI-DIAG-LINUX %s
+// CHECK-CFI-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}"
+// CHECK-CFI-DIAG-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.ubsan_standalone-x86_64.a" "-no-whole-archive"
+
+// Cross-DSO CFI links the CFI runtime.
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-unknown-linux \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-LINUX %s
+// CHECK-CFI-CROSS-DSO-LINUX: "{{.*}}ld{{(.exe)?}}"
+// CHECK-CFI-CROSS-DSO-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.cfi-x86_64.a" "-no-whole-archive"
+
+// Cross-DSO CFI with diagnostics links just the CFI runtime.
+// RUN: %clang -fsanitize=cfi -fsanitize-cfi-cross-dso %s -### -o %t.o 2>&1 \
+// RUN:     -fno-sanitize-trap=cfi -fsanitize-recover=cfi \
+// RUN:     -target x86_64-unknown-linux \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-CFI-CROSS-DSO-DIAG-LINUX %s
+// CHECK-CFI-CROSS-DSO-DIAG-LINUX: "{{.*}}ld{{(.exe)?}}"
+// CHECK-CFI-CROSS-DSO-DIAG-LINUX: "-whole-archive" "{{[^"]*}}libclang_rt.cfi_diag-x86_64.a" "-no-whole-archive"
+
 // RUN: %clangxx -fsanitize=address %s -### -o %t.o 2>&1 \
 // RUN:     -mmacosx-version-min=10.6 \
 // RUN:     -target x86_64-apple-darwin13.4.0 \
@@ -299,3 +333,59 @@
 // CHECK-ASAN-DARWIN106-CXX: "{{.*}}ld{{(.exe)?}}"
 // CHECK-ASAN-DARWIN106-CXX: libclang_rt.asan_osx_dynamic.dylib
 // CHECK-ASAN-DARWIN106-CXX-NOT: -lc++abi
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-unknown-linux -fsanitize=safe-stack \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-LINUX %s
+//
+// CHECK-SAFESTACK-LINUX: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SAFESTACK-LINUX-NOT: "-lc"
+// CHECK-SAFESTACK-LINUX: libclang_rt.safestack-x86_64.a"
+// CHECK-SAFESTACK-LINUX: "-lpthread"
+// CHECK-SAFESTACK-LINUX: "-ldl"
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target arm-linux-androideabi -fsanitize=safe-stack \
+// RUN:     --sysroot=%S/Inputs/basic_android_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-ARM %s
+//
+// CHECK-SAFESTACK-ANDROID-ARM: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SAFESTACK-ANDROID-ARM-NOT: libclang_rt.safestack
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o -shared 2>&1 \
+// RUN:     -target arm-linux-androideabi -fsanitize=safe-stack \
+// RUN:     --sysroot=%S/Inputs/basic_android_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-ARM %s
+//
+// CHECK-SAFESTACK-SHARED-ANDROID-ARM: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SAFESTACK-SHARED-ANDROID-ARM-NOT: libclang_rt.safestack
+
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -target aarch64-linux-android -fsanitize=safe-stack \
+// RUN:     --sysroot=%S/Inputs/basic_android_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-SAFESTACK-ANDROID-AARCH64 %s
+//
+// CHECK-SAFESTACK-ANDROID-AARCH64: "{{(.*[^-.0-9A-Z_a-z])?}}ld{{(.exe)?}}"
+// CHECK-SAFESTACK-ANDROID-AARCH64-NOT: libclang_rt.safestack
+
+// RUN: %clang -fsanitize=undefined %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-scei-ps4 \
+// RUN:     -shared \
+// RUN:   | FileCheck --check-prefix=CHECK-UBSAN-PS4 %s
+// CHECK-UBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
+// CHECK-UBSAN-PS4: -lSceDbgUBSanitizer_stub_weak
+
+// RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-scei-ps4 \
+// RUN:     -shared \
+// RUN:   | FileCheck --check-prefix=CHECK-ASAN-PS4 %s
+// CHECK-ASAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
+// CHECK-ASAN-PS4: -lSceDbgAddressSanitizer_stub_weak
+
+// RUN: %clang -fsanitize=address,undefined %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-scei-ps4 \
+// RUN:     -shared \
+// RUN:   | FileCheck --check-prefix=CHECK-AUBSAN-PS4 %s
+// CHECK-AUBSAN-PS4: "{{.*}}ld{{(.gold)?(.exe)?}}"
+// CHECK-AUBSAN-PS4: -lSceDbgAddressSanitizer_stub_weak
diff --git a/test/Driver/save-temps.c b/test/Driver/save-temps.c
index 277a901eeb6f..c974d1582c32 100644
--- a/test/Driver/save-temps.c
+++ b/test/Driver/save-temps.c
@@ -2,7 +2,7 @@
 // RUN:   | FileCheck %s
 // CHECK: "-o" "save-temps.i"
 // CHECK: "-emit-llvm-uselists"
-// CHECK: "-disable-llvm-optzns"
+// CHECK: "-disable-llvm-passes"
 // CHECK: "-o" "save-temps.bc"
 // CHECK: "-o" "save-temps.s"
 // CHECK: "-o" "save-temps.o"
@@ -14,7 +14,7 @@
 // RUN:   | FileCheck %s -check-prefix=CWD
 // CWD: "-o" "save-temps.i"
 // CWD: "-emit-llvm-uselists"
-// CWD: "-disable-llvm-optzns"
+// CWD: "-disable-llvm-passes"
 // CWD: "-o" "save-temps.bc"
 // CWD: "-o" "save-temps.s"
 // CWD: "-o" "save-temps.o"
@@ -63,7 +63,7 @@
 // RUN: %clang -target x86_64-apple-darwin -save-temps=obj -o obj/dir/a.out -arch x86_64 %s -### 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-OBJ
 // CHECK-OBJ: "-o" "obj/dir{{/|\\\\}}save-temps.i"
-// CHECK-OBJ: "-disable-llvm-optzns"
+// CHECK-OBJ: "-disable-llvm-passes"
 // CHECK-OBJ: "-o" "obj/dir{{/|\\\\}}save-temps.bc"
 // CHECK-OBJ: "-o" "obj/dir{{/|\\\\}}save-temps.s"
 // CHECK-OBJ: "-o" "obj/dir{{/|\\\\}}save-temps.o"
@@ -72,7 +72,7 @@
 // RUN: %clang -target x86_64-apple-darwin -save-temps=obj -arch x86_64 %s -### 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-OBJ-NOO
 // CHECK-OBJ-NOO: "-o" "save-temps.i"
-// CHECK-OBJ-NOO: "-disable-llvm-optzns"
+// CHECK-OBJ-NOO: "-disable-llvm-passes"
 // CHECK-OBJ-NOO: "-o" "save-temps.bc"
 // CHECK-OBJ-NOO: "-o" "save-temps.s"
 // CHECK-OBJ-NOO: "-o" "save-temps.o"
diff --git a/test/Driver/shave-toolchain.c b/test/Driver/shave-toolchain.c
deleted file mode 100644
index a02d04971def..000000000000
--- a/test/Driver/shave-toolchain.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// Ensure that '-target shave' picks a different compiler.
-// Also check that '-I' is turned into '-i:' for the assembler.
-
-// Note that since we don't know where movi tools are installed,
-// the driver may or may not find a full path to them.
-// That is, the 0th argument will be "/path/to/my/moviCompile"
-// or just "moviCompile" depending on whether moviCompile is found.
-// As such, we test only for a trailing quote in its rendering.
-// The same goes for "moviAsm".
-
-// RUN: %clang -target shave -c -### %s -Icommon 2>&1 \
-// RUN:   | FileCheck %s -check-prefix=movicompile
-// movicompile: moviCompile" "-DMYRIAD2"
-// movicompile: moviAsm" "-no6thSlotCompression" "-cv:myriad2" "-noSPrefixing" "-a" "-i:common" "-elf"
-
-// RUN: %clang -target shave -c -### %s -DEFINE_ME -UNDEFINE_ME 2>&1 \
-// RUN:   | FileCheck %s -check-prefix=defines
-// defines: "-D" "EFINE_ME" "-U" "NDEFINE_ME"
-
-// RUN: %clang -target shave -c -### %s -Icommon -iquote quotepath -isystem syspath 2>&1 \
-// RUN:   | FileCheck %s -check-prefix=includes
-// includes: "-iquote" "quotepath" "-isystem" "syspath"
diff --git a/test/Driver/solaris-header-search.cpp b/test/Driver/solaris-header-search.cpp
new file mode 100644
index 000000000000..667b2c05d438
--- /dev/null
+++ b/test/Driver/solaris-header-search.cpp
@@ -0,0 +1,11 @@
+// Test that the C++ headers are found.
+//
+// RUN: %clang -no-canonical-prefixes %s -### 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
+// RUN:   | FileCheck %s
+// CHECK: "{{[^"]*}}clang{{[^"]*}}" "-cc1"
+// CHECK: "-internal-isystem" "{{.*}}/usr/include/c++/v1/support/solaris"
+// CHECK: "-internal-isystem" "{{.*}}/usr/gcc/4.8/include/c++/4.8.2"
+// CHECK: "-internal-isystem" "{{.*}}/usr/gcc/4.8/include/c++/4.8.2/sparc-sun-solaris2.11"
diff --git a/test/Driver/solaris-ld.c b/test/Driver/solaris-ld.c
new file mode 100644
index 000000000000..d871b592eb18
--- /dev/null
+++ b/test/Driver/solaris-ld.c
@@ -0,0 +1,33 @@
+// Test ld invocation on Solaris targets.
+
+// Check sparc-sun-solaris2.1
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
+// RUN:   | FileCheck %s
+// CHECK: "-cc1" "-triple" "sparc-sun-solaris2.11"
+// CHECK: ld{{.*}}"
+// CHECK: "--dynamic-linker" "{{.*}}/usr/lib/ld.so.1"
+// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crt1.o"
+// CHECK: "{{.*}}/usr/lib/crti.o"
+// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtbegin.o"
+// CHECK: "{{.*}}/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2{{/|\\\\}}crtend.o"
+// CHECK: "{{.*}}/usr/lib/crtn.o"
+// CHECK "-lc"
+// CHECK "-lgcc_s"
+// CHECK "-lgcc"
+// CHECK "-lm"
+
+// Check the right -l flags are present with -shared
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o -shared 2>&1 \
+// RUN:     --target=sparc-sun-solaris2.11 \
+// RUN:     --gcc-toolchain="" \
+// RUN:     --sysroot=%S/Inputs/sparc-sun-solaris2.11 \
+// RUN:   | FileCheck --check-prefix=CHECK-SHARED %s
+
+// CHECK-SHARED: ld{{.*}}"
+// CHECK-SHARED "-lc"
+// CHECK-SHARED "-lgcc_s"
+// CHECK-SHARED-NOT "-lgcc"
+// CHECK-SHARED-NOT: "-lm"
diff --git a/test/Driver/solaris-opts.c b/test/Driver/solaris-opts.c
new file mode 100644
index 000000000000..8c54ae0a628d
--- /dev/null
+++ b/test/Driver/solaris-opts.c
@@ -0,0 +1,4 @@
+// RUN: %clang %s --target=sparc-sun-solaris2.11 -### -o %t.o 2>&1 | FileCheck %s
+
+// CHECK: "-fno-use-cxa-atexit"
+
diff --git a/test/Driver/sparc-as.c b/test/Driver/sparc-as.c
new file mode 100644
index 000000000000..5b939956cb38
--- /dev/null
+++ b/test/Driver/sparc-as.c
@@ -0,0 +1,85 @@
+// Make sure Sparc does not use the integrated assembler by default.
+
+// RUN: %clang -target sparc-linux -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=NO-IAS %s
+
+// RUN: %clang -target sparc-linux -fintegrated-as -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=IAS %s
+
+// RUN: %clang -target sparc-linux -fno-integrated-as -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=NO-IAS %s
+
+// IAS-NOT: "-no-integrated-as"
+// NO-IAS: "-no-integrated-as"
+
+// RUN: %clang -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC %s
+
+// RUN: %clang -mcpu=v8 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=supersparc -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=sparclite -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
+
+// RUN: %clang -mcpu=f934 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
+
+// RUN: %clang -mcpu=hypersparc -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: %clang -mcpu=sparclite86x -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-SPARCLITE %s
+
+// RUN: %clang -mcpu=sparclet -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-SPARCLET %s
+
+// RUN: %clang -mcpu=tsc701 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-SPARCLET %s
+
+// RUN: %clang -mcpu=v9 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
+
+// RUN: %clang -mcpu=ultrasparc -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
+
+// RUN: %clang -mcpu=ultrasparc3 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUS %s
+
+// RUN: %clang -mcpu=niagara -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUSB %s
+
+// RUN: %clang -mcpu=niagara2 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUSB %s
+
+// RUN: %clang -mcpu=niagara3 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUSD %s
+
+// RUN: %clang -mcpu=niagara4 -no-canonical-prefixes -target sparc--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8PLUSD %s
+
+// SPARC: as{{.*}}" "-32" "-Av8" "-o"
+// SPARC-V8: as{{.*}}" "-32" "-Av8" "-o"
+// SPARC-SPARCLITE: as{{.*}}" "-32" "-Asparclite" "-o"
+// SPARC-SPARCLET: as{{.*}}" "-32" "-Asparclet" "-o"
+// SPARC-V8PLUS: as{{.*}}" "-32" "-Av8plus" "-o"
+// SPARC-V8PLUSB: as{{.*}}" "-32" "-Av8plusb" "-o"
+// SPARC-V8PLUSD: as{{.*}}" "-32" "-Av8plusd" "-o"
diff --git a/test/Driver/sparcv9-as.c b/test/Driver/sparcv9-as.c
new file mode 100644
index 000000000000..3b641cc98da3
--- /dev/null
+++ b/test/Driver/sparcv9-as.c
@@ -0,0 +1,86 @@
+// Make sure SparcV9 does not use the integrated assembler by default.
+
+// RUN: %clang -target sparcv9-linux -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=NO-IAS %s
+
+// RUN: %clang -target sparcv9-linux -fintegrated-as -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=IAS %s
+
+// RUN: %clang -target sparcv9-linux -fno-integrated-as -### -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix=NO-IAS %s
+
+// IAS-NOT: "-no-integrated-as"
+// NO-IAS: "-no-integrated-as"
+
+// RUN: %clang -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC %s
+
+// RUN: %clang -mcpu=v9 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9 %s
+
+// RUN: %clang -mcpu=ultrasparc -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9 %s
+
+// RUN: %clang -mcpu=ultrasparc3 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9 %s
+
+// RUN: %clang -mcpu=niagara -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9B %s
+
+// RUN: %clang -mcpu=niagara2 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9B %s
+
+// RUN: %clang -mcpu=niagara3 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9D %s
+
+// RUN: %clang -mcpu=niagara4 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V9D %s
+
+// SPARC: as{{.*}}" "-64" "-Av9" "-o"
+// SPARC-V9: as{{.*}}" "-64" "-Av9" "-o"
+// SPARC-V9B: as{{.*}}" "-64" "-Av9b" "-o"
+// SPARC-V9D: as{{.*}}" "-64" "-Av9d" "-o"
+
+// RUN: not %clang -mcpu=v8 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=supersparc -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=sparclite -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=f934 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=hypersparc -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=sparclite86x -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=sparclet -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// RUN: not %clang -mcpu=tsc701 -no-canonical-prefixes -target sparcv9--netbsd \
+// RUN: -no-integrated-as --sysroot=%S/Inputs/basic_netbsd_tree %s -c 2>&1 \
+// RUN: | FileCheck -check-prefix=SPARC-V8 %s
+
+// SPARC-V8: error: unknown target CPU
+
+int x;
diff --git a/test/Driver/split-debug.c b/test/Driver/split-debug.c
index 6296c4672e5c..ed6685209398 100644
--- a/test/Driver/split-debug.c
+++ b/test/Driver/split-debug.c
@@ -32,3 +32,29 @@
 // RUN: FileCheck -check-prefix=CHECK-IAS < %t %s
 //
 // CHECK-IAS: objcopy
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -gsplit-dwarf -gmlt -S -### %s 2> %t
+// RUN: FileCheck -check-prefix=CHECK-GMLT-OVER-SPLIT < %t %s
+//
+// CHECK-GMLT-OVER-SPLIT: "-debug-info-kind=line-tables-only"
+// CHECK-GMLT-OVER-SPLIT-NOT: "-split-dwarf=Enable"
+// CHECK-GMLT-OVER-SPLIT-NOT: "-split-dwarf-file"
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -gmlt -gsplit-dwarf -S -### %s 2> %t
+// RUN: FileCheck -check-prefix=CHECK-SPLIT-OVER-GMLT < %t %s
+//
+// CHECK-SPLIT-OVER-GMLT: "-split-dwarf=Enable" "-debug-info-kind=limited"
+// CHECK-SPLIT-OVER-GMLT: "-split-dwarf-file"
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -gsplit-dwarf -g0 -S -### %s 2> %t
+// RUN: FileCheck -check-prefix=CHECK-G0-OVER-SPLIT < %t %s
+//
+// CHECK-G0-OVER-SPLIT-NOT: "-debug-info-kind
+// CHECK-G0-OVER-SPLIT-NOT: "-split-dwarf=Enable"
+// CHECK-G0-OVER-SPLIT-NOT: "-split-dwarf-file"
+
+// RUN: %clang -target x86_64-unknown-linux-gnu -g0 -gsplit-dwarf -S -### %s 2> %t
+// RUN: FileCheck -check-prefix=CHECK-SPLIT-OVER-G0 < %t %s
+//
+// CHECK-SPLIT-OVER-G0: "-split-dwarf=Enable" "-debug-info-kind=limited"
+// CHECK-SPLIT-OVER-G0: "-split-dwarf-file"
diff --git a/test/Driver/stack-protector.c b/test/Driver/stack-protector.c
index 7fecd1b02132..487af569f248 100644
--- a/test/Driver/stack-protector.c
+++ b/test/Driver/stack-protector.c
@@ -23,3 +23,12 @@
 // RUN: %clang -fstack-protector-all -### %s 2>&1 | FileCheck %s -check-prefix=SSP-ALL
 // SSP-ALL: "-stack-protector" "3"
 // SSP-ALL-NOT: "-stack-protector-buffer-size" 
+
+// RUN: %clang -target x86_64-scei-ps4 -### %s 2>&1 | FileCheck %s -check-prefix=SSP-PS4
+// RUN: %clang -target x86_64-scei-ps4 -fstack-protector -### %s 2>&1 | FileCheck %s -check-prefix=SSP-PS4
+// SSP-PS4: "-stack-protector" "2"
+// SSP-PS4-NOT: "-stack-protector-buffer-size"
+
+// RUN: %clang -target x86_64-scei-ps4 -fstack-protector --param ssp-buffer-size=16 -### %s 2>&1 | FileCheck %s -check-prefix=SSP-PS4-BUF
+// SSP-PS4-BUF: "-stack-protector" "2"
+// SSP-PS4-BUF: "-stack-protector-buffer-size" "16"
diff --git a/test/Driver/stackrealign.c b/test/Driver/stackrealign.c
index c5221d652422..217be7a91fc6 100644
--- a/test/Driver/stackrealign.c
+++ b/test/Driver/stackrealign.c
@@ -1,12 +1,7 @@
-// RUN: %clang -### %s 2>&1 | FileCheck %s -check-prefix=NORMAL
-// NORMAL-NOT: -force-align-stack
-// NORMAL: -mstackrealign
+// RUN: %clang -### %s 2>&1 | FileCheck %s -check-prefix=NO-REALIGN
+// RUN: %clang -### -mno-stackrealign -mstackrealign %s 2>&1 | FileCheck %s -check-prefix=REALIGN
+// RUN: %clang -### -mstackrealign -mno-stackrealign %s 2>&1 | FileCheck %s -check-prefix=NO-REALIGN
+// REQUIRES: clang-driver
 
-// RUN: %clang -### -mstackrealign %s 2>&1 | FileCheck %s -check-prefix=MREALIGN
-// MREALIGN: -force-align-stack
-// MREALIGN: -mstackrealign
-
-// RUN: %clang -### -mno-stackrealign %s 2>&1 | \
-// RUN: FileCheck %s -check-prefix=MNOREALIGN
-// MNOREALIGN-NOT: -force-align-stack
-// MNOREALIGN-NOT: -mstackrealign
+// REALIGN: -mstackrealign
+// NO-REALIGN-NOT: -mstackrealign
diff --git a/test/Driver/thinlto.c b/test/Driver/thinlto.c
new file mode 100644
index 000000000000..0369b78be0f5
--- /dev/null
+++ b/test/Driver/thinlto.c
@@ -0,0 +1,37 @@
+// -flto=thin causes a switch to llvm-bc object files.
+// RUN: %clang -ccc-print-phases -c %s -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILE-ACTIONS < %t %s
+//
+// CHECK-COMPILE-ACTIONS: 2: compiler, {1}, ir
+// CHECK-COMPILE-ACTIONS: 3: backend, {2}, lto-bc
+
+// RUN: %clang -ccc-print-phases %s -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-COMPILELINK-ACTIONS < %t %s
+//
+// CHECK-COMPILELINK-ACTIONS: 0: input, "{{.*}}thinlto.c", c
+// CHECK-COMPILELINK-ACTIONS: 1: preprocessor, {0}, cpp-output
+// CHECK-COMPILELINK-ACTIONS: 2: compiler, {1}, ir
+// CHECK-COMPILELINK-ACTIONS: 3: backend, {2}, lto-bc
+// CHECK-COMPILELINK-ACTIONS: 4: linker, {3}, image
+
+// -flto=thin should cause link using gold plugin with thinlto option,
+// also confirm that it takes precedence over earlier -fno-lto and -flto=full.
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto=full -fno-lto -flto=thin 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-THIN-ACTION < %t %s
+//
+// CHECK-LINK-THIN-ACTION: "-plugin" "{{.*}}/LLVMgold.so"
+// CHECK-LINK-THIN-ACTION: "-plugin-opt=thinlto"
+
+// Check that subsequent -flto=full takes precedence
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -flto=full 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-FULL-ACTION < %t %s
+//
+// CHECK-LINK-FULL-ACTION: "-plugin" "{{.*}}/LLVMgold.so"
+// CHECK-LINK-FULL-ACTION-NOT: "-plugin-opt=thinlto"
+
+// Check that subsequent -fno-lto takes precedence
+// RUN: %clang -target x86_64-unknown-linux -### %s -flto=thin -fno-lto 2> %t
+// RUN: FileCheck -check-prefix=CHECK-LINK-NOLTO-ACTION < %t %s
+//
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin" "{{.*}}/LLVMgold.so"
+// CHECK-LINK-NOLTO-ACTION-NOT: "-plugin-opt=thinlto"
diff --git a/test/Driver/thinlto_backend.c b/test/Driver/thinlto_backend.c
new file mode 100644
index 000000000000..6def5821b6a2
--- /dev/null
+++ b/test/Driver/thinlto_backend.c
@@ -0,0 +1,10 @@
+// RUN: %clang -O2 %s -flto=thin -c -o %t.o
+// RUN: llvm-lto -thinlto -o %t %t.o
+
+// -fthinlto_index should be passed to cc1
+// RUN: %clang -O2 -o %t1.o -x ir %t.o -c -fthinlto-index=%t.thinlto.bc -### 2>&1 | FileCheck %s -check-prefix=CHECK-THINLTOBE-ACTION
+// CHECK-THINLTOBE-ACTION: -fthinlto-index=
+
+// Ensure clang driver gives the expected error for incorrect input type
+// RUN: not %clang -O2 -o %t1.o %s -c -fthinlto-index=%t.thinlto.bc 2>&1 | FileCheck %s -check-prefix=CHECK-WARNING
+// CHECK-WARNING: error: invalid argument '-fthinlto-index={{.*}}' only allowed with '-x ir'
diff --git a/test/Driver/thread-model.c b/test/Driver/thread-model.c
index 9702c2231a26..d01ef1c1a47c 100644
--- a/test/Driver/thread-model.c
+++ b/test/Driver/thread-model.c
@@ -13,3 +13,19 @@
 // CHECK-LINUX-SINGLE: Thread model: single
 // CHECK-LINUX-SINGLE: "-mthread-model" "single"
 // CHECK-LINUX-INVALID-NOT: Thread model:
+
+// RUN: %clang -### -target wasm32-unknown-linux-gnu -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-DEFAULT %s
+// RUN: %clang -### -target wasm32-unknown-linux-gnu -c %s -v -mthread-model single 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-SINGLE %s
+// RUN: %clang -### -target wasm32-unknown-linux-gnu -c %s -v -mthread-model posix 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-POSIX %s
+// RUN: %clang -### -target wasm32-unknown-linux-gnu -c %s -v -mthread-model silly 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-INVALID %s
+// RUN: %clang -### -target wasm64-unknown-linux-gnu -c %s -v 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-DEFAULT %s
+// RUN: %clang -### -target wasm64-unknown-linux-gnu -c %s -v -mthread-model single 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-SINGLE %s
+// RUN: %clang -### -target wasm64-unknown-linux-gnu -c %s -v -mthread-model posix 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-POSIX %s
+// RUN: %clang -### -target wasm64-unknown-linux-gnu -c %s -v -mthread-model silly 2>&1 | FileCheck -check-prefix=CHECK-WEBASSEMBLY-INVALID %s
+// CHECK-WEBASSEMBLY-DEFAULT: Thread model: posix
+// CHECK-WEBASSEMBLY-DEFAULT: "-mthread-model" "posix"
+// CHECK-WEBASSEMBLY-SINGLE: Thread model: single
+// CHECK-WEBASSEMBLY-SINGLE: "-mthread-model" "single"
+// CHECK-WEBASSEMBLY-POSIX: Thread model: posix
+// CHECK-WEBASSEMBLY-POSIX: "-mthread-model" "posix"
+// CHECK-WEBASSEMBLY-INVALID-NOT: Thread model:
diff --git a/test/Driver/wasm-toolchain.c b/test/Driver/wasm-toolchain.c
new file mode 100644
index 000000000000..4d707a0f6c8f
--- /dev/null
+++ b/test/Driver/wasm-toolchain.c
@@ -0,0 +1,3 @@
+// RUN: %clang -### -no-canonical-prefixes -target wasm32-unknown-unknown -x assembler %s 2>&1 | FileCheck -check-prefix=AS_LINK %s
+// AS_LINK: clang{{.*}}" "-cc1as" {{.*}} "-o" "[[temp:[^"]*]]"
+// AS_LINK: lld{{.*}}" "-flavor" "ld" "[[temp]]" "-o" "a.out"
diff --git a/test/Driver/wasm32-unknown-unknown.cpp b/test/Driver/wasm32-unknown-unknown.cpp
new file mode 100644
index 000000000000..c47428796dc9
--- /dev/null
+++ b/test/Driver/wasm32-unknown-unknown.cpp
@@ -0,0 +1,119 @@
+// RUN: %clang -target wasm32-unknown-unknown -### %s -emit-llvm-only -c 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=ECHO
+// RUN: %clang -target wasm32-unknown-unknown %s -emit-llvm -S -o - \
+// RUN:   | FileCheck %s
+// RUN: %clang -target wasm32-unknown-unknown %s -emit-llvm -S -pthread -o - \
+// RUN:   | FileCheck %s -check-prefix=THREADS
+
+// ECHO: {{.*}} "-cc1" {{.*}}wasm32-unknown-unknown.c
+
+typedef __builtin_va_list va_list;
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+extern "C" {
+
+// CHECK: @align_c = global i32 1
+int align_c = __alignof(char);
+
+// CHECK: @align_s = global i32 2
+int align_s = __alignof(short);
+
+// CHECK: @align_i = global i32 4
+int align_i = __alignof(int);
+
+// CHECK: @align_l = global i32 4
+int align_l = __alignof(long);
+
+// CHECK: @align_ll = global i32 8
+int align_ll = __alignof(long long);
+
+// CHECK: @align_p = global i32 4
+int align_p = __alignof(void*);
+
+// CHECK: @align_f = global i32 4
+int align_f = __alignof(float);
+
+// CHECK: @align_d = global i32 8
+int align_d = __alignof(double);
+
+// CHECK: @align_ld = global i32 16
+int align_ld = __alignof(long double);
+
+// CHECK: @align_vl = global i32 4
+int align_vl = __alignof(va_list);
+
+// CHECK: _GNU_SOURCEdefined
+#ifdef _GNU_SOURCE
+void _GNU_SOURCEdefined() {}
+#endif
+
+// THREADS: _REENTRANTdefined
+// CHECK: _REENTRANTundefined
+#ifdef _REENTRANT
+void _REENTRANTdefined() {}
+#else
+void _REENTRANTundefined() {}
+#endif
+
+// Check types
+
+// CHECK: signext i8 @check_char()
+char check_char() { return 0; }
+
+// CHECK: signext i16 @check_short()
+short check_short() { return 0; }
+
+// CHECK: i32 @check_int()
+int check_int() { return 0; }
+
+// CHECK: i32 @check_long()
+long check_long() { return 0; }
+
+// CHECK: i64 @check_longlong()
+long long check_longlong() { return 0; }
+
+// CHECK: zeroext i8 @check_uchar()
+unsigned char check_uchar() { return 0; }
+
+// CHECK: zeroext i16 @check_ushort()
+unsigned short check_ushort() { return 0; }
+
+// CHECK: i32 @check_uint()
+unsigned int check_uint() { return 0; }
+
+// CHECK: i32 @check_ulong()
+unsigned long check_ulong() { return 0; }
+
+// CHECK: i64 @check_ulonglong()
+unsigned long long check_ulonglong() { return 0; }
+
+// CHECK: i32 @check_size_t()
+size_t check_size_t() { return 0; }
+
+// CHECK: float @check_float()
+float check_float() { return 0; }
+
+// CHECK: double @check_double()
+double check_double() { return 0; }
+
+// CHECK: fp128 @check_longdouble()
+long double check_longdouble() { return 0; }
+
+}
+
+template<int> void Switch();
+template<> void Switch<4>();
+template<> void Switch<8>();
+template<> void Switch<16>();
+
+void check_pointer_size() {
+  // CHECK: SwitchILi4
+  Switch<sizeof(void*)>();
+
+  // CHECK: SwitchILi8
+  Switch<sizeof(long long)>();
+
+  // CHECK: SwitchILi4
+  Switch<sizeof(va_list)>();
+}
diff --git a/test/Driver/wasm64-unknown-unknown.cpp b/test/Driver/wasm64-unknown-unknown.cpp
new file mode 100644
index 000000000000..c33f4e56a65f
--- /dev/null
+++ b/test/Driver/wasm64-unknown-unknown.cpp
@@ -0,0 +1,119 @@
+// RUN: %clang -target wasm64-unknown-unknown -### %s -emit-llvm-only -c 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=ECHO
+// RUN: %clang -target wasm64-unknown-unknown %s -emit-llvm -S -o - \
+// RUN:   | FileCheck %s
+// RUN: %clang -target wasm64-unknown-unknown %s -emit-llvm -S -pthread -o - \
+// RUN:   | FileCheck %s -check-prefix=THREADS
+
+// ECHO: {{.*}} "-cc1" {{.*}}wasm64-unknown-unknown.c
+
+typedef __builtin_va_list va_list;
+typedef __SIZE_TYPE__ size_t;
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+
+extern "C" {
+
+// CHECK: @align_c = global i32 1
+int align_c = __alignof(char);
+
+// CHECK: @align_s = global i32 2
+int align_s = __alignof(short);
+
+// CHECK: @align_i = global i32 4
+int align_i = __alignof(int);
+
+// CHECK: @align_l = global i32 8
+int align_l = __alignof(long);
+
+// CHECK: @align_ll = global i32 8
+int align_ll = __alignof(long long);
+
+// CHECK: @align_p = global i32 8
+int align_p = __alignof(void*);
+
+// CHECK: @align_f = global i32 4
+int align_f = __alignof(float);
+
+// CHECK: @align_d = global i32 8
+int align_d = __alignof(double);
+
+// CHECK: @align_ld = global i32 16
+int align_ld = __alignof(long double);
+
+// CHECK: @align_vl = global i32 8
+int align_vl = __alignof(va_list);
+
+// CHECK: _GNU_SOURCEdefined
+#ifdef _GNU_SOURCE
+void _GNU_SOURCEdefined() {}
+#endif
+
+// THREADS: _REENTRANTdefined
+// CHECK: _REENTRANTundefined
+#ifdef _REENTRANT
+void _REENTRANTdefined() {}
+#else
+void _REENTRANTundefined() {}
+#endif
+
+// Check types
+
+// CHECK: signext i8 @check_char()
+char check_char() { return 0; }
+
+// CHECK: signext i16 @check_short()
+short check_short() { return 0; }
+
+// CHECK: i32 @check_int()
+int check_int() { return 0; }
+
+// CHECK: i64 @check_long()
+long check_long() { return 0; }
+
+// CHECK: i64 @check_longlong()
+long long check_longlong() { return 0; }
+
+// CHECK: zeroext i8 @check_uchar()
+unsigned char check_uchar() { return 0; }
+
+// CHECK: zeroext i16 @check_ushort()
+unsigned short check_ushort() { return 0; }
+
+// CHECK: i32 @check_uint()
+unsigned int check_uint() { return 0; }
+
+// CHECK: i64 @check_ulong()
+unsigned long check_ulong() { return 0; }
+
+// CHECK: i64 @check_ulonglong()
+unsigned long long check_ulonglong() { return 0; }
+
+// CHECK: i64 @check_size_t()
+size_t check_size_t() { return 0; }
+
+// CHECK: float @check_float()
+float check_float() { return 0; }
+
+// CHECK: double @check_double()
+double check_double() { return 0; }
+
+// CHECK: fp128 @check_longdouble()
+long double check_longdouble() { return 0; }
+
+}
+
+template<int> void Switch();
+template<> void Switch<4>();
+template<> void Switch<8>();
+template<> void Switch<16>();
+
+void check_pointer_size() {
+  // CHECK: SwitchILi8
+  Switch<sizeof(void*)>();
+
+  // CHECK: SwitchILi8
+  Switch<sizeof(long long)>();
+
+  // CHECK: SwitchILi8
+  Switch<sizeof(va_list)>();
+}
diff --git a/test/Driver/watchos-version-min.c b/test/Driver/watchos-version-min.c
new file mode 100644
index 000000000000..8f12285d4e47
--- /dev/null
+++ b/test/Driver/watchos-version-min.c
@@ -0,0 +1,7 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: arm-registered-target
+// RUN: %clang -target i386-apple-darwin10 -mwatchsimulator-version-min=2.0 -arch i386 -S -o - %s | FileCheck %s
+// RUN: %clang -target armv7s-apple-darwin10 -mwatchos-version-min=2.0 -arch armv7k -S -o - %s | FileCheck %s
+
+int main() { return 0; }
+// CHECK: .watchos_version_min 2, 0
diff --git a/test/Driver/windows-cross.c b/test/Driver/windows-cross.c
index 979f68539c01..d355fbcb4982 100644
--- a/test/Driver/windows-cross.c
+++ b/test/Driver/windows-cross.c
@@ -38,3 +38,32 @@
 
 // CHECK-LIBSTDCXX:  "-internal-isystem" "{{.*}}/usr/include/c++" "-internal-isystem" "{{.*}}/usr/include/c++/armv7--windows-itanium" "-internal-isystem" "{{.*}}/usr/include/c++/backwards"
 
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -x c++ %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix CHECK-FUSE-LD
+
+// CHECK-FUSE-LD: "{{.*}}lld-link2"
+
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=address -x c++ %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS
+
+// CHECK-SANITIZE-ADDRESS: "-fsanitize=address"
+// CHECK-SANITIZE-ADDRESS: "{{.*}}clang_rt.asan_dll_thunk-arm.lib"
+
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE
+
+// CHECK-SANITIZE-ADDRESS-EXE: "-fsanitize=address"
+// CHECK-SANITIZE-ADDRESS-EXE: "{{.*}}clang_rt.asan_dynamic-arm.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-arm.lib" "--undefined" "__asan_seh_interceptor"
+
+// RUN: %clang -### -target i686-windows-itanium -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -o test.exe -fsanitize=address -x c++ %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix CHECK-SANITIZE-ADDRESS-EXE-X86
+
+// CHECK-SANITIZE-ADDRESS-EXE-X86: "-fsanitize=address"
+// CHECK-SANITIZE-ADDRESS-EXE-X86: "{{.*}}clang_rt.asan_dynamic-i686.lib" "{{.*}}clang_rt.asan_dynamic_runtime_thunk-i686.lib" "--undefined" "___asan_seh_interceptor"
+
+// RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -fsanitize=tsan -x c++ %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix CHECK-SANITIZE-TSAN
+
+// CHECK-SANITIZE-TSAN: error: unsupported argument 'tsan' to option 'fsanitize='
+// CHECK-SANITIZE-TSAN-NOT: "-fsanitize={{.*}}"
+
diff --git a/test/Driver/woa-fp.c b/test/Driver/woa-fp.c
new file mode 100644
index 000000000000..f851e93884ed
--- /dev/null
+++ b/test/Driver/woa-fp.c
@@ -0,0 +1,40 @@
+// RUN: %clang -target armv7-windows-msvc -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-msvc -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-msvc -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-msvc -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-msvc -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+
+// RUN: %clang -target armv7-windows-itanium -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-itanium -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-itanium -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-itanium -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target armv7-windows-itanium -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-DEFAULT
+
+// RUN: %clang -target armv7-windows-msvc -fomit-frame-pointer -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-msvc -fomit-frame-pointer -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-msvc -fomit-frame-pointer -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-msvc -fomit-frame-pointer -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-msvc -fomit-frame-pointer -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+
+// RUN: %clang -target armv7-windows-itanium -fomit-frame-pointer -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-itanium -fomit-frame-pointer -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-itanium -fomit-frame-pointer -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-itanium -fomit-frame-pointer -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+// RUN: %clang -target armv7-windows-itanium -fomit-frame-pointer -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-FPO
+
+// RUN: %clang -target armv7-windows-msvc -fno-omit-frame-pointer -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-msvc -fno-omit-frame-pointer -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-msvc -fno-omit-frame-pointer -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-msvc -fno-omit-frame-pointer -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-msvc -fno-omit-frame-pointer -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+
+// RUN: %clang -target armv7-windows-itanium -fno-omit-frame-pointer -### -S %s -O0 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-itanium -fno-omit-frame-pointer -### -S %s -O1 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-itanium -fno-omit-frame-pointer -### -S %s -O2 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-itanium -fno-omit-frame-pointer -### -S %s -O3 -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+// RUN: %clang -target armv7-windows-itanium -fno-omit-frame-pointer -### -S %s -Os -o /dev/null 2>&1 | FileCheck %s -check-prefix CHECK-NO-FPO
+
+// CHECK-DEFAULT: "-mdisable-fp-elim"
+// CHECK-FPO-NOT: "-mdisable-fp-elim"
+// CHECK-NO-FPO: "-mdisable-fp-elim"
+
diff --git a/test/Driver/working-directory.c b/test/Driver/working-directory.c
index 195abaca6f0c..15ba8f00bd12 100644
--- a/test/Driver/working-directory.c
+++ b/test/Driver/working-directory.c
@@ -1,4 +1,3 @@
 // RUN: %clang -### -working-directory /no/such/dir/ input 2>&1 | FileCheck %s
-// REQUIRES: shell-preserves-root
 
 //CHECK: no such file or directory: '/no/such/dir/input'
diff --git a/test/FixIt/atomic-property.m b/test/FixIt/atomic-property.m
index 9ede7f1e76b4..84dd820e1cb9 100644
--- a/test/FixIt/atomic-property.m
+++ b/test/FixIt/atomic-property.m
@@ -23,7 +23,7 @@
 - (id) atomic_prop1 { return 0; }
 @end
 
-// CHECK: {4:1-4:10}:"@property (nonatomic) "
-// CHECK: {9:1-9:12}:"@property (nonatomic"
-// CHECK: {13:1-13:12}:"@property (nonatomic, "
+// CHECK-DAG: {4:11-4:11}:"(nonatomic) "
+// CHECK-DAG: {9:12-9:12}:"nonatomic"
+// CHECK-DAG: {13:12-13:12}:"nonatomic, "
 
diff --git a/test/FixIt/fixit-vexing-parse.cpp b/test/FixIt/fixit-vexing-parse.cpp
index 79d8cad6bb9f..0232f5dbb04e 100644
--- a/test/FixIt/fixit-vexing-parse.cpp
+++ b/test/FixIt/fixit-vexing-parse.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -verify -x c++ %s
-// RUN: not %clang_cc1 -fdiagnostics-parseable-fixits -x c++ %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -verify -x c++ -std=c++98 %s
+// RUN: not %clang_cc1 -fdiagnostics-parseable-fixits -x c++ -std=c++98 %s 2>&1 | FileCheck %s
 
 struct S {
   int n;
diff --git a/test/FixIt/fixit.cpp b/test/FixIt/fixit.cpp
index 512713aa529f..686cc23d6354 100644
--- a/test/FixIt/fixit.cpp
+++ b/test/FixIt/fixit.cpp
@@ -45,7 +45,7 @@ class B : public A {
 };
 
 void f() throw(); // expected-note{{previous}}
-void f(); // expected-warning{{missing exception specification}}
+void f(); // expected-error{{missing exception specification}}
 
 namespace rdar7853795 {
   struct A {
diff --git a/test/FixIt/format.m b/test/FixIt/format.m
index d07ee363b4a8..7af2cfdaf71a 100644
--- a/test/FixIt/format.m
+++ b/test/FixIt/format.m
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -fblocks -verify %s
-// RUN: %clang_cc1 -fdiagnostics-parseable-fixits -fblocks %s 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -fblocks -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fdiagnostics-parseable-fixits -fblocks %s 2>&1 | FileCheck %s
 
 @class NSString;
 extern void NSLog(NSString *, ...);
diff --git a/test/Format/adjust-indent.cpp b/test/Format/adjust-indent.cpp
new file mode 100644
index 000000000000..5565dc00b62d
--- /dev/null
+++ b/test/Format/adjust-indent.cpp
@@ -0,0 +1,10 @@
+// RUN: grep -Ev "// *[A-Z-]+:" %s | clang-format -lines=2:2 \
+// RUN:   | FileCheck -strict-whitespace %s
+
+void  f() {
+// CHECK: void f() {
+int i;
+// CHECK: {{^  int\ i;}}
+ int j;
+// CHECK: {{^  int\ j;}}
+}
diff --git a/test/Format/basic.cpp b/test/Format/basic.cpp
index a12866b9c1e4..e92291d04d4c 100644
--- a/test/Format/basic.cpp
+++ b/test/Format/basic.cpp
@@ -1,6 +1,5 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t.cpp
-// RUN: clang-format -style=LLVM -i %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
+// RUN: grep -Ev "// *[A-Z-]+:" %s | clang-format -style=LLVM \
+// RUN:   | FileCheck -strict-whitespace %s
 
 // CHECK: {{^int\ \*i;}}
  int   *  i  ;
diff --git a/test/Format/cursor.cpp b/test/Format/cursor.cpp
index c7d576ba6e64..e41c0d5bb19c 100644
--- a/test/Format/cursor.cpp
+++ b/test/Format/cursor.cpp
@@ -1,6 +1,5 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t2.cpp
-// RUN: clang-format -style=LLVM %t2.cpp -cursor=6 > %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
-// CHECK: {{^\{ "Cursor": 4, }}
+// RUN: grep -Ev "// *[A-Z-]+:" %s | clang-format -style=LLVM -cursor=6 \
+// RUN:   | FileCheck -strict-whitespace %s
+// CHECK: {{^\{ "Cursor": 3, }}
 // CHECK: {{^int\ \i;$}}
  int    i;
diff --git a/test/Format/disable-format.cpp b/test/Format/disable-format.cpp
index 59484b3df16d..a1e8cd150887 100644
--- a/test/Format/disable-format.cpp
+++ b/test/Format/disable-format.cpp
@@ -1,6 +1,5 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t.cpp
-// RUN: clang-format -style=none -i %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
+// RUN: grep -Ev "// *[A-Z-]+:" %s | clang-format -style=none \
+// RUN:   | FileCheck -strict-whitespace %s
 
 // CHECK: int   i;
 int   i;
diff --git a/test/Format/disable-include-sorting.cpp b/test/Format/disable-include-sorting.cpp
new file mode 100644
index 000000000000..875a0af400e9
--- /dev/null
+++ b/test/Format/disable-include-sorting.cpp
@@ -0,0 +1,10 @@
+// RUN: clang-format %s | FileCheck %s
+// RUN: clang-format %s -sort-includes -style="{SortIncludes: false}" | FileCheck %s
+// RUN: clang-format %s -sort-includes=false | FileCheck %s -check-prefix=NOT-SORTED
+
+#include <b>
+#include <a>
+// CHECK: <a>
+// CHECK-NEXT: <b>
+// NOT-SORTED: <b>
+// NOT-SORTED-NEXT: <a>
diff --git a/test/Format/incomplete.cpp b/test/Format/incomplete.cpp
index bd2c74ca3c17..8a65fad657cd 100644
--- a/test/Format/incomplete.cpp
+++ b/test/Format/incomplete.cpp
@@ -1,6 +1,5 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t2.cpp
-// RUN: clang-format -style=LLVM %t2.cpp -cursor=0 > %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
+// RUN: grep -Ev "// *[A-Z-]+:" %s | clang-format -style=LLVM -cursor=0 \
+// RUN:   | FileCheck -strict-whitespace %s
 // CHECK: {{"IncompleteFormat": true}}
 // CHECK: {{^int\ \i;$}}
  int    i;
diff --git a/test/Format/language-detection.cpp b/test/Format/language-detection.cpp
index bec444dc9dda..2a53be71dafa 100644
--- a/test/Format/language-detection.cpp
+++ b/test/Format/language-detection.cpp
@@ -1,7 +1,9 @@
-// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.js
-// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.cpp
-// RUN: clang-format -style=llvm %t.js | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
-// RUN: clang-format -style=llvm %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s \
+// RUN:   | clang-format -style=llvm -assume-filename=foo.js \
+// RUN:   | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
+// RUN: grep -Ev "// *[A-Z0-9_]+:" %s \
+// RUN:   | clang-format -style=llvm -assume-filename=foo.cpp \
+// RUN:   | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
 // CHECK1: {{^a >>>= b;$}}
 // CHECK2: {{^a >> >= b;$}}
 a >>>= b;
diff --git a/test/Format/line-ranges.cpp b/test/Format/line-ranges.cpp
index 370445aed1e1..e81e9624344d 100644
--- a/test/Format/line-ranges.cpp
+++ b/test/Format/line-ranges.cpp
@@ -1,11 +1,11 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t.cpp
-// RUN: clang-format -style=LLVM -lines=1:1 -lines=5:5 -i %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
+// RUN: grep -Ev "// *[A-Z-]+:" %s \
+// RUN:   | clang-format -style=LLVM -lines=1:1 -lines=5:5 \
+// RUN:   | FileCheck -strict-whitespace %s
 // CHECK: {{^int\ \*i;$}}
   int*i;
 
-// CHECK: {{^\ \ int\ \ \*\ \ i;$}}
-  int  *  i; 
+// CHECK: {{^int\ \ \*\ \ i;$}}
+int  *  i; 
 
-// CHECK: {{^\ \ int\ \*i;$}}
-  int   *   i;
+// CHECK: {{^int\ \*i;$}}
+int   *   i;
diff --git a/test/Format/ranges.cpp b/test/Format/ranges.cpp
index c7fdd4b97a43..66b984e037b3 100644
--- a/test/Format/ranges.cpp
+++ b/test/Format/ranges.cpp
@@ -1,11 +1,11 @@
-// RUN: grep -Ev "// *[A-Z-]+:" %s > %t.cpp
-// RUN: clang-format -style=LLVM -offset=2 -length=0 -offset=28 -length=0 -i %t.cpp
-// RUN: FileCheck -strict-whitespace -input-file=%t.cpp %s
+// RUN: grep -Ev "// *[A-Z-]+:" %s \
+// RUN:   | clang-format -style=LLVM -offset=2 -length=0 -offset=28 -length=0 \
+// RUN:   | FileCheck -strict-whitespace %s
 // CHECK: {{^int\ \*i;$}}
-  int*i;
+int*i;
 
-// CHECK: {{^\ \ int\ \ \*\ \ i;$}}
-  int  *  i; 
+// CHECK: {{^int\ \ \*\ \ i;$}}
+int  *  i; 
 
-// CHECK: {{^\ \ int\ \*i;$}}
-  int   *   i;
+// CHECK: {{^int\ \*i;$}}
+int   *   i;
diff --git a/test/Format/style-on-command-line.cpp b/test/Format/style-on-command-line.cpp
index 3f4f77ad0c31..08da60a988d1 100644
--- a/test/Format/style-on-command-line.cpp
+++ b/test/Format/style-on-command-line.cpp
@@ -1,17 +1,16 @@
-// RUN: grep -Ev "// *[A-Z0-9_]+:" %s > %t.cpp
-// RUN: clang-format -style="{BasedOnStyle: Google, IndentWidth: 8}" %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
-// RUN: clang-format -style="{BasedOnStyle: LLVM, IndentWidth: 7}" %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
-// RUN: clang-format -style="{BasedOnStyle: invalid, IndentWidth: 7}" -fallback-style=LLVM %t.cpp 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK3 %s
-// RUN: clang-format -style="{lsjd}" %t.cpp -fallback-style=LLVM 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK4 %s
+// RUN: clang-format -style="{BasedOnStyle: Google, IndentWidth: 8}" %s | FileCheck -strict-whitespace -check-prefix=CHECK1 %s
+// RUN: clang-format -style="{BasedOnStyle: LLVM, IndentWidth: 7}" %s | FileCheck -strict-whitespace -check-prefix=CHECK2 %s
+// RUN: clang-format -style="{BasedOnStyle: invalid, IndentWidth: 7}" -fallback-style=LLVM %s 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK3 %s
+// RUN: clang-format -style="{lsjd}" %s -fallback-style=LLVM 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK4 %s
 // RUN: printf "BasedOnStyle: google\nIndentWidth: 5\n" > %T/.clang-format
-// RUN: clang-format -style=file %t.cpp 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK5 %s
+// RUN: clang-format -style=file -assume-filename=%T/foo.cpp < %s | FileCheck -strict-whitespace -check-prefix=CHECK5 %s
 // RUN: printf "\n" > %T/.clang-format
-// RUN: clang-format -style=file -fallback-style=webkit %t.cpp 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK6 %s
+// RUN: clang-format -style=file -fallback-style=webkit -assume-filename=%T/foo.cpp < %s 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK6 %s
 // RUN: rm %T/.clang-format
 // RUN: printf "BasedOnStyle: google\nIndentWidth: 6\n" > %T/_clang-format
-// RUN: clang-format -style=file %t.cpp 2>&1 | FileCheck -strict-whitespace -check-prefix=CHECK7 %s
-// RUN: clang-format -style="{BasedOnStyle: LLVM, PointerBindsToType: true}" %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK8 %s
-// RUN: clang-format -style="{BasedOnStyle: WebKit, PointerBindsToType: false}" %t.cpp | FileCheck -strict-whitespace -check-prefix=CHECK9 %s
+// RUN: clang-format -style=file -assume-filename=%T/foo.cpp < %s | FileCheck -strict-whitespace -check-prefix=CHECK7 %s
+// RUN: clang-format -style="{BasedOnStyle: LLVM, PointerBindsToType: true}" %s | FileCheck -strict-whitespace -check-prefix=CHECK8 %s
+// RUN: clang-format -style="{BasedOnStyle: WebKit, PointerBindsToType: false}" %s | FileCheck -strict-whitespace -check-prefix=CHECK9 %s
 void f() {
 // CHECK1: {{^        int\* i;$}}
 // CHECK2: {{^       int \*i;$}}
diff --git a/test/Format/xmloutput.cpp b/test/Format/xmloutput.cpp
new file mode 100644
index 000000000000..3d84a2f5e4c2
--- /dev/null
+++ b/test/Format/xmloutput.cpp
@@ -0,0 +1,12 @@
+// RUN: clang-format -output-replacements-xml -sort-includes %s \
+// RUN:   | FileCheck -strict-whitespace %s
+
+// CHECK: <?xml
+// CHECK-NEXT: {{<replacements.*incomplete_format='false'}}
+// CHECK-NEXT: {{<replacement.*#include &lt;a>&#10;#include &lt;b><}}
+// CHECK-NEXT: {{<replacement.*>&#10;<}}
+// CHECK-NEXT: {{<replacement.*> <}}
+#include <b>
+#include <a>
+
+int a;int*b;
diff --git a/test/Frontend/Inputs/profile-sample-use-loc-tracking.prof b/test/Frontend/Inputs/profile-sample-use-loc-tracking.prof
index ba9a678a4526..c5b8d9ef1a89 100644
--- a/test/Frontend/Inputs/profile-sample-use-loc-tracking.prof
+++ b/test/Frontend/Inputs/profile-sample-use-loc-tracking.prof
@@ -1,2 +1,2 @@
 bar:100:100
-1: 2000
+ 1: 2000
diff --git a/test/Frontend/darwin-version.c b/test/Frontend/darwin-version.c
index 2da0c4c7a574..e7bc41117e3f 100644
--- a/test/Frontend/darwin-version.c
+++ b/test/Frontend/darwin-version.c
@@ -13,6 +13,8 @@
 // RUN: %clang_cc1 -triple i386-apple-macosx10.4.0 -dM -E -o %t %s
 // RUN: grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t | grep '1040' | count 1
 // RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__' %t
 // RUN: %clang_cc1 -triple i386-apple-macosx10.4.10 -dM -E -o %t %s
 // RUN: grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t | grep '1049' | count 1
 // RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
@@ -25,3 +27,23 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.10.0 -dM -E -o %t %s
 // RUN: grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t | grep '101000' | count 1
 // RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
+
+// RUN: %clang_cc1 -triple arm64-apple-tvos8.3 -dM -E -o %t %s
+// RUN: grep '__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__' %t | grep '80300' | count 1
+// RUN: not grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
+
+// RUN: %clang_cc1 -triple x86_64-apple-tvos8.3 -dM -E -o %t %s
+// RUN: grep '__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__' %t | grep '80300' | count 1
+// RUN: not grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
+
+// RUN: %clang_cc1 -triple armv7k-apple-watchos2.1 -dM -E -o %t %s
+// RUN: grep '__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__' %t | grep '20100' | count 1
+// RUN: not grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
+
+// RUN: %clang_cc1 -triple i386-apple-watchos2.1 -dM -E -o %t %s
+// RUN: grep '__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__' %t | grep '20100' | count 1
+// RUN: not grep '__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__' %t
+// RUN: not grep '__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__' %t
diff --git a/test/Frontend/dependency-gen.c b/test/Frontend/dependency-gen.c
index 0465dc864e42..054aa79e3d49 100644
--- a/test/Frontend/dependency-gen.c
+++ b/test/Frontend/dependency-gen.c
@@ -20,7 +20,10 @@
 // RUN: cd a/b
 // RUN: %clang -MD -MF - %s -fsyntax-only -I ./ | FileCheck -check-prefix=CHECK-SIX %s
 // CHECK-SIX: {{ }}x.h
-
+// RUN: echo "fun:foo" > %t.blacklist
+// RUN: %clang -MD -MF - %s -fsyntax-only -fsanitize=cfi-vcall -flto -fsanitize-blacklist=%t.blacklist -I ./ | FileCheck -check-prefix=CHECK-SEVEN %s
+// CHECK-SEVEN: .blacklist
+// CHECK-SEVEN: {{ }}x.h
 #ifndef INCLUDE_FLAG_TEST
 #include <x.h>
 #endif
diff --git a/test/Frontend/force-include-not-found.c b/test/Frontend/force-include-not-found.c
new file mode 100644
index 000000000000..191cf9d403db
--- /dev/null
+++ b/test/Frontend/force-include-not-found.c
@@ -0,0 +1,3 @@
+// RUN: not %clang_cc1 %s -include "/abspath/missing file with spaces.h" 2>&1 | FileCheck %s
+// CHECK: file not found
+int main() { }
diff --git a/test/Frontend/optimization-remark-analysis.c b/test/Frontend/optimization-remark-analysis.c
new file mode 100644
index 000000000000..5b4d9aec41ae
--- /dev/null
+++ b/test/Frontend/optimization-remark-analysis.c
@@ -0,0 +1,21 @@
+// RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -Rpass-analysis -S %s -o - 2>&1 | FileCheck %s --check-prefix=RPASS
+// RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -emit-llvm -S %s -o - 2>&1 | FileCheck %s
+
+// RPASS: {{.*}}:21:1: remark: loop not vectorized: loop contains a switch statement
+// CHECK-NOT: {{.*}}:21:1: remark: loop not vectorized: loop contains a switch statement
+
+double foo(int N, int *Array) {
+  double v = 0.0;
+
+  #pragma clang loop vectorize(enable)
+  for (int i = 0; i < N; i++) {
+    switch(Array[i]) {
+    case 0: v += 1.0f; break;
+    case 1: v -= 0.5f; break;
+    case 2: v *= 2.0f; break;
+    default: v = 0.0f;
+    }
+  }
+
+  return v;
+}
diff --git a/test/Frontend/optimization-remark-line-directive.c b/test/Frontend/optimization-remark-line-directive.c
index f4c0011fb4f8..8e95e33fe6da 100644
--- a/test/Frontend/optimization-remark-line-directive.c
+++ b/test/Frontend/optimization-remark-line-directive.c
@@ -2,7 +2,7 @@
 // directives. We cannot map #line directives back to
 // a SourceLocation.
 
-// RUN: %clang_cc1 %s -Rpass=inline -gline-tables-only -dwarf-column-info -emit-llvm-only -verify
+// RUN: %clang_cc1 %s -Rpass=inline -debug-info-kind=line-tables-only -dwarf-column-info -emit-llvm-only -verify
 
 int foo(int x, int y) __attribute__((always_inline));
 int foo(int x, int y) { return x + y; }
diff --git a/test/Frontend/optimization-remark-options.c b/test/Frontend/optimization-remark-options.c
new file mode 100644
index 000000000000..74fbeaf5d0c1
--- /dev/null
+++ b/test/Frontend/optimization-remark-options.c
@@ -0,0 +1,21 @@
+// RUN: %clang -O1 -fvectorize -target x86_64-unknown-unknown -Rpass-analysis=loop-vectorize -emit-llvm -S %s -o - 2>&1 | FileCheck %s
+
+// CHECK: {{.*}}:9:11: remark: loop not vectorized: cannot prove it is safe to reorder floating-point operations; allow reordering by specifying '#pragma clang loop vectorize(enable)' before the loop or by providing the compiler option '-ffast-math'.
+
+double foo(int N) {
+  double v = 0.0;
+
+  for (int i = 0; i < N; i++)
+    v = v + 1.0;
+
+  return v;
+}
+
+// CHECK: {{.*}}:18:13: remark: loop not vectorized: cannot prove it is safe to reorder memory operations; allow reordering by specifying '#pragma clang loop vectorize(enable)' before the loop. If the arrays will always be independent specify '#pragma clang loop vectorize(assume_safety)' before the loop or provide the '__restrict__' qualifier with the independent array arguments. Erroneous results will occur if these options are incorrectly applied!
+
+void foo2(int *dw, int *uw, int *A, int *B, int *C, int *D, int N) {
+  for (int i = 0; i < N; i++) {
+    dw[i] = A[i] + B[i - 1] + C[i - 2] + D[i - 3];
+    uw[i] = A[i] + B[i + 1] + C[i + 2] + D[i + 3];
+  }
+}
diff --git a/test/Frontend/optimization-remark.c b/test/Frontend/optimization-remark.c
index 6ada0030a700..e5bd75cbdb30 100644
--- a/test/Frontend/optimization-remark.c
+++ b/test/Frontend/optimization-remark.c
@@ -4,7 +4,7 @@
 // optimization level.
 
 // RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -verify
-// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -gline-tables-only -verify
+// RUN: %clang_cc1 %s -Rpass=inline -Rpass-analysis=inline -Rpass-missed=inline -O0 -emit-llvm-only -debug-info-kind=line-tables-only -verify
 // RUN: %clang_cc1 %s -Rpass=inline -emit-llvm -o - 2>/dev/null | FileCheck %s
 //
 // Check that we can override -Rpass= with -Rno-pass.
diff --git a/test/Frontend/print-header-includes.c b/test/Frontend/print-header-includes.c
index 966b4af86069..3f2b0696cd1e 100644
--- a/test/Frontend/print-header-includes.c
+++ b/test/Frontend/print-header-includes.c
@@ -13,4 +13,12 @@
 // MS: Note: including file:  {{.*test2.h}}
 // MS-NOT: Note
 
+// RUN: echo "fun:foo" > %t.blacklist
+// RUN: %clang_cc1 -fsanitize=address -fdepfile-entry=%t.blacklist -E --show-includes -o %t.out %s > %t.stdout
+// RUN: FileCheck --check-prefix=MS-BLACKLIST < %t.stdout %s
+// MS-BLACKLIST: Note: including file: {{.*\.blacklist}}
+// MS-BLACKLIST: Note: including file: {{.*test.h}}
+// MS-BLACKLIST: Note: including file:  {{.*test2.h}}
+// MS-BLACKLIST-NOT: Note
+
 #include "Inputs/test.h"
diff --git a/test/Frontend/source-col-map.c b/test/Frontend/source-col-map.c
index ae69fbe565c3..1c8078998c56 100644
--- a/test/Frontend/source-col-map.c
+++ b/test/Frontend/source-col-map.c
@@ -1,4 +1,5 @@
 // RUN: not %clang_cc1 -fsyntax-only -fmessage-length 75 -o /dev/null -x c < %s 2>&1 | FileCheck %s -strict-whitespace
+// REQUIRES: utf8-capable-terminal
 
 // Test case for the text diagnostics source column conversion crash.
 
diff --git a/test/Headers/ms-intrin.cpp b/test/Headers/ms-intrin.cpp
index a83225e37f8e..9356d21a2368 100644
--- a/test/Headers/ms-intrin.cpp
+++ b/test/Headers/ms-intrin.cpp
@@ -5,12 +5,12 @@
 
 // RUN: %clang_cc1 -triple i386-pc-win32 -target-cpu broadwell \
 // RUN:     -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple x86_64-pc-win32  \
 // RUN:     -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:     -ffreestanding -fsyntax-only -Werror \
+// RUN:     -ffreestanding -emit-obj -o /dev/null -Werror \
 // RUN:     -isystem %S/Inputs/include %s
 
 // RUN: %clang_cc1 -triple thumbv7--windows \
@@ -18,6 +18,8 @@
 // RUN:     -ffreestanding -fsyntax-only -Werror \
 // RUN:     -isystem %S/Inputs/include %s
 
+// REQUIRES: x86-registered-target
+
 // Intrin.h needs size_t, but -ffreestanding prevents us from getting it from
 // stddef.h.  Work around it with this typedef.
 typedef __SIZE_TYPE__ size_t;
@@ -27,3 +29,35 @@ typedef __SIZE_TYPE__ size_t;
 // Use some C++ to make sure we closed the extern "C" brackets.
 template <typename T>
 void foo(T V) {}
+
+// __asm__ blocks are only checked for inline functions that end up being
+// emitted, so call functions with __asm__ blocks to make sure their inline
+// assembly parses.
+void f() {
+  __movsb(0, 0, 0);
+  __movsd(0, 0, 0);
+  __movsw(0, 0, 0);
+
+  __stosb(0, 0, 0);
+  __stosd(0, 0, 0);
+  __stosw(0, 0, 0);
+
+#ifdef _M_X64
+  __movsq(0, 0, 0);
+  __stosq(0, 0, 0);
+#endif
+
+  int info[4];
+  __cpuid(info, 0);
+  __cpuidex(info, 0, 0);
+  _xgetbv(0);
+  __halt();
+  __readmsr(0);
+
+  // FIXME: Call these in 64-bit too once the intrinsics have been fixed to
+  // work there, PR19301
+#ifndef _M_X64
+  __readcr3();
+  __writecr3(0);
+#endif
+}
diff --git a/test/Headers/pmmintrin.c b/test/Headers/pmmintrin.c
new file mode 100644
index 000000000000..5b7a3a4ef6b9
--- /dev/null
+++ b/test/Headers/pmmintrin.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
+// expected-no-diagnostics
+
+#if defined(i386) || defined(__x86_64__)
+#include <pmmintrin.h>
+
+int __attribute__((__target__(("sse3")))) foo(int a) {
+  _mm_mwait(0, 0);
+  return 4;
+}
+#endif
diff --git a/test/Headers/x86intrin-2.c b/test/Headers/x86intrin-2.c
new file mode 100644
index 000000000000..f98fdbd13a8a
--- /dev/null
+++ b/test/Headers/x86intrin-2.c
@@ -0,0 +1,137 @@
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -fno-lax-vector-conversions %s -verify
+// RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify
+// expected-no-diagnostics
+
+#if defined(i386) || defined(__x86_64__)
+
+// Include the metaheader that includes all x86 intrinsic headers.
+#include <x86intrin.h>
+
+void __attribute__((__target__("mmx"))) mm_empty_wrap(void) {
+  _mm_empty();
+}
+
+__m128 __attribute__((__target__("sse"))) mm_add_ss_wrap(__m128 a, __m128 b) {
+  return _mm_add_ss(a, b);
+}
+
+__m128d __attribute__((__target__("sse2"))) mm_sqrt_sd_wrap(__m128d a, __m128d b) {
+  return _mm_sqrt_sd(a, b);
+}
+
+void __attribute__((__target__("sse3"))) mm_mwait_wrap(int a) {
+  _mm_mwait(0, 0);
+}
+
+__m64 __attribute__((__target__("ssse3"))) mm_abs_pi8_wrap(__m64 a) {
+  return _mm_abs_pi8(a);
+}
+
+__m128i __attribute__((__target__("sse4.1"))) mm_minpos_epu16_wrap(__m128i v) {
+  return _mm_minpos_epu16(v);
+}
+
+unsigned int __attribute__((__target__("sse4.2"))) mm_crc32_u8_wrap(unsigned int c, unsigned char d) {
+  return _mm_crc32_u8(c, d);
+}
+
+__m128i __attribute__((__target__("aes"))) mm_aesenc_si128_wrap(__m128i v, __m128i r) {
+  return _mm_aesenc_si128(v, r);
+}
+
+__m256d __attribute__((__target__("avx"))) mm256_add_pd_wrap(__m256d a, __m256d b) {
+  return _mm256_add_pd(a, b);
+}
+
+__m256i __attribute__((__target__("avx2"))) mm256_abs_epi8_wrap(__m256i a) {
+  return _mm256_abs_epi8(a);
+}
+
+unsigned short __attribute__((__target__("bmi"))) tzcnt_u16_wrap(unsigned short x) {
+  return __tzcnt_u16(x);
+}
+
+unsigned int __attribute__((__target__("bmi2"))) bzhi_u32_wrap(unsigned int x, unsigned int y) {
+  return _bzhi_u32(x, y);
+}
+
+unsigned short __attribute__((__target__("lzcnt"))) lzcnt16_wrap(unsigned short x) {
+  return __lzcnt16(x);
+}
+
+__m256d __attribute__((__target__("fma"))) mm256_fmsubadd_pd_wrap(__m256d a, __m256d b, __m256d c) {
+  return _mm256_fmsubadd_pd(a, b, c);
+}
+
+__m512i __attribute__((__target__("avx512f"))) mm512_setzero_si512_wrap(void) {
+  return _mm512_setzero_si512();
+}
+
+__mmask8 __attribute__((__target__("avx512vl"))) mm_cmpeq_epi32_mask_wrap(__m128i a, __m128i b) {
+  return _mm_cmpeq_epi32_mask(a, b);
+}
+
+__v64qi __attribute__((__target__("avx512bw"))) mm512_setzero_qi_wrap(void) {
+  return _mm512_setzero_qi();
+}
+
+__m512i __attribute__((__target__("avx512dq"))) mm512_mullo_epi64_wrap(__m512i a, __m512i b) {
+  return _mm512_mullo_epi64(a, b);
+}
+
+__mmask16 __attribute__((__target__("avx512vl,avx512bw"))) mm_cmpeq_epi8_mask_wrap(__m128i a, __m128i b) {
+  return _mm_cmpeq_epi8_mask(a, b);
+}
+
+__m256i __attribute__((__target__("avx512vl,avx512dq"))) mm256_mullo_epi64_wrap(__m256i a, __m256i b) {
+  return _mm256_mullo_epi64(a, b);
+}
+
+int __attribute__((__target__("rdrnd"))) rdrand16_step_wrap(unsigned short *p) {
+  return _rdrand16_step(p);
+}
+
+#if defined(__x86_64__)
+unsigned int __attribute__((__target__("fsgsbase"))) readfsbase_u32_wrap(void) {
+  return _readfsbase_u32();
+}
+#endif
+
+unsigned int __attribute__((__target__("rtm"))) xbegin_wrap(void) {
+  return _xbegin();
+}
+
+__m128i __attribute__((__target__("sha"))) mm_sha1nexte_epu32_wrap(__m128i x, __m128i y) {
+  return _mm_sha1nexte_epu32(x, y);
+}
+
+int __attribute__((__target__("rdseed"))) rdseed16_step_wrap(unsigned short *p) {
+  return _rdseed16_step(p);
+}
+
+__m128i __attribute__((__target__("sse4a"))) mm_extract_si64_wrap(__m128i x, __m128i y) {
+  return _mm_extract_si64(x, y);
+}
+
+__m128 __attribute__((__target__("fma4"))) mm_macc_ps_wrap(__m128 a, __m128 b, __m128 c) {
+  return _mm_macc_ps(a, b, c);
+}
+
+__m256 __attribute__((__target__("xop"))) mm256_frcz_ps_wrap(__m256 a) {
+  return _mm256_frcz_ps(a);
+}
+
+unsigned int __attribute__((__target__("tbm"))) blcfill_u32_wrap(unsigned int a) {
+  return __blcfill_u32(a);
+}
+
+__m128 __attribute__((__target__("f16c"))) mm_cvtph_ps_wrap(__m128i a) {
+  return _mm_cvtph_ps(a);
+}
+
+int __attribute__((__target__("rtm"))) xtest_wrap(void) {
+  return _xtest();
+}
+
+#endif
diff --git a/test/Headers/x86intrin.c b/test/Headers/x86intrin.c
index 6a1608b756a0..7c15c4816b33 100644
--- a/test/Headers/x86intrin.c
+++ b/test/Headers/x86intrin.c
@@ -5,126 +5,7 @@
 
 #if defined(i386) || defined(__x86_64__)
 
-// Pretend to enable all features.
-#ifndef __3dNOW__
-#define __3dNOW__
-#endif
-#ifndef __BMI__
-#define __BMI__
-#endif
-#ifndef __BMI2__
-#define __BMI2__
-#endif
-#ifndef __LZCNT__
-#define __LZCNT__
-#endif
-#ifndef __POPCNT__
-#define __POPCNT__
-#endif
-#ifndef __RDSEED__
-#define __RDSEED__
-#endif
-#ifndef __PRFCHW__
-#define __PRFCHW__
-#endif
-#ifndef __SSE4A__
-#define __SSE4A__
-#endif
-#ifndef __FMA4__
-#define __FMA4__
-#endif
-#ifndef __XOP__
-#define __XOP__
-#endif
-#ifndef __F16C__
-#define __F16C__
-#endif
-#ifndef __MMX__
-#define __MMX__
-#endif
-#ifndef __SSE__
-#define __SSE__
-#endif
-#ifndef __SSE2__
-#define __SSE2__
-#endif
-#ifndef __SSE3__
-#define __SSE3__
-#endif
-#ifndef __SSSE3__
-#define __SSSE3__
-#endif
-#ifndef __SSE4_1__
-#define __SSE4_1__
-#endif
-#ifndef __SSE4_2__
-#define __SSE4_2__
-#endif
-#ifndef __AES__
-#define __AES__
-#endif
-#ifndef __AVX__
-#define __AVX__
-#endif
-#ifndef __AVX2__
-#define __AVX2__
-#endif
-#ifndef __BMI__
-#define __BMI__
-#endif
-#ifndef __BMI2__
-#define __BMI2__
-#endif
-#ifndef __LZCNT__
-#define __LZCNT__
-#endif
-#ifndef __FMA__
-#define __FMA__
-#endif
-#ifndef __RDRND__
-#define __RDRND__
-#endif
-#ifndef __SHA__
-#define __SHA__
-#endif
-#ifndef __ADX__
-#define __ADX__
-#endif
-#ifndef __TBM__
-#define __TBM__
-#endif
-#ifndef __RTM__
-#define __RTM__
-#endif
-#ifndef __PCLMUL__
-#define __PCLMUL__
-#endif
-#ifndef __FSGSBASE__
-#define __FSGSBASE__
-#endif
-#ifndef __AVX512F__
-#define __AVX512F__
-#endif
-#ifndef __AVX512VL__
-#define __AVX512VL__
-#endif
-#ifndef __AVX512BW__
-#define __AVX512BW__
-#endif
-#ifndef __AVX512ER__
-#define __AVX512ER__
-#endif
-#ifndef __AVX512PF__
-#define __AVX512PF__
-#endif
-#ifndef __AVX512DQ__
-#define __AVX512DQ__
-#endif
-#ifndef __AVX512CD__
-#define __AVX512CD__
-#endif
-
-// Now include the metaheader that includes all x86 intrinsic headers.
+// Include the metaheader that includes all x86 intrinsic headers.
 #include <x86intrin.h>
 
 #endif
diff --git a/test/Index/TestClassForwardDecl.m b/test/Index/TestClassForwardDecl.m
index 284406f6446f..4f8d8ff0ae01 100644
--- a/test/Index/TestClassForwardDecl.m
+++ b/test/Index/TestClassForwardDecl.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fblocks -emit-pch -x objective-c %s -o %t.ast
+// RUN: c-index-test -write-pch %t.ast -arch x86_64 -mmacosx-version-min=10.6 %s
 // RUN: c-index-test -test-file-scan %t.ast %s | FileCheck -check-prefix=CHECK-scan %s
 // RUN: c-index-test -test-load-tu %t.ast local | FileCheck -check-prefix=CHECK-load %s
 // REQUIRES: x86-registered-target
diff --git a/test/Index/annotate-comments-objc.m b/test/Index/annotate-comments-objc.m
index a8eaa0b03880..471bc2bcb217 100644
--- a/test/Index/annotate-comments-objc.m
+++ b/test/Index/annotate-comments-objc.m
@@ -69,6 +69,7 @@ typedef struct Struct_notdoxy *typedef_isdoxy1;
 // RUN: %clang_cc1 -emit-pch -o %t/out.pch -F %S/Inputs/Frameworks %s
 // RUN: %clang_cc1 -include-pch %t/out.pch -F %S/Inputs/Frameworks -fsyntax-only %s
 
+// RUN: c-index-test -write-pch %t/out.pch -F %S/Inputs/Frameworks %s
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -F %S/Inputs/Frameworks > %t/out.c-index-direct
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -F %S/Inputs/Frameworks -fmodules -fmodules-cache-path=%t/module-cache > %t/out.c-index-modules
 // RUN: c-index-test -test-load-tu %t/out.pch all -F %S/Inputs/Frameworks > %t/out.c-index-pch
diff --git a/test/Index/annotate-comments.cpp b/test/Index/annotate-comments.cpp
index 6612a44792e6..6f9f8f0bbbc9 100644
--- a/test/Index/annotate-comments.cpp
+++ b/test/Index/annotate-comments.cpp
@@ -263,6 +263,7 @@ void isdoxy54(int);
 // RUN: %clang_cc1 -x c++ -std=c++11 -emit-pch -o %t/out.pch %s
 // RUN: %clang_cc1 -x c++ -std=c++11 -include-pch %t/out.pch -fsyntax-only %s
 
+// RUN: c-index-test -write-pch %t/out.pch -x c++ -std=c++11 %s
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -std=c++11 > %t/out.c-index-direct
 // RUN: c-index-test -test-load-tu %t/out.pch all > %t/out.c-index-pch
 
diff --git a/test/Index/annotate-module.m b/test/Index/annotate-module.m
index 456a192f4253..24dce3f290bf 100644
--- a/test/Index/annotate-module.m
+++ b/test/Index/annotate-module.m
@@ -6,6 +6,8 @@ int glob;
 // RUN: rm -rf %t.cache
 // RUN: c-index-test -test-annotate-tokens=%s:2:1:5:1 %s -fmodules-cache-path=%t.cache -fmodules -F %S/../Modules/Inputs \
 // RUN:      | FileCheck %s
+// RUN: c-index-test -test-annotate-tokens=%s:2:1:5:1 %s -fmodules-cache-path=%t.cache -fmodules -gmodules -F %S/../Modules/Inputs \
+// RUN:      | FileCheck %s
 
 // CHECK:      Punctuation: "#" [2:1 - 2:2] inclusion directive=[[INC_DIR:DependsOnModule[/\\]DependsOnModule\.h \(.*/Modules/Inputs/DependsOnModule\.framework[/\\]Headers[/\\]DependsOnModule.h\)]]
 // CHECK-NEXT: Identifier: "include" [2:2 - 2:9] inclusion directive=[[INC_DIR]]
diff --git a/test/Index/attributes-cuda.cu b/test/Index/attributes-cuda.cu
index 51f4aedd198d..83ac8d389fe6 100644
--- a/test/Index/attributes-cuda.cu
+++ b/test/Index/attributes-cuda.cu
@@ -7,6 +7,14 @@ __attribute__((global)) void f_global();
 __attribute__((constant)) int* g_constant;
 __attribute__((shared)) float *g_shared;
 __attribute__((host)) void f_host();
+__attribute__((device_builtin)) void f_device_builtin();
+typedef __attribute__((device_builtin)) const void *t_device_builtin;
+enum __attribute__((device_builtin)) e_device_builtin {};
+__attribute__((device_builtin)) int v_device_builtin;
+__attribute__((cudart_builtin)) void f_cudart_builtin();
+__attribute__((nv_weak)) void f_nv_weak();
+__attribute__((device_builtin_surface_type)) unsigned long long surface_var;
+__attribute__((device_builtin_texture_type)) unsigned long long texture_var;
 
 // CHECK:       attributes-cuda.cu:5:30: FunctionDecl=f_device:5:30
 // CHECK-NEXT:  attributes-cuda.cu:5:16: attribute(device)
@@ -18,3 +26,11 @@ __attribute__((host)) void f_host();
 // CHECK-NEXT:  attributes-cuda.cu:8:16: attribute(shared)
 // CHECK:       attributes-cuda.cu:9:28: FunctionDecl=f_host:9:28
 // CHECK-NEXT:  attributes-cuda.cu:9:16: attribute(host)
+// CHECK:	attributes-cuda.cu:10:38: FunctionDecl=f_device_builtin:10:38
+// CHECK:	attributes-cuda.cu:11:53: TypedefDecl=t_device_builtin:11:53
+// CHECK:	attributes-cuda.cu:12:38: EnumDecl=e_device_builtin:12:38
+// CHECK:	attributes-cuda.cu:13:37: VarDecl=v_device_builtin:13:37
+// CHECK:	attributes-cuda.cu:14:38: FunctionDecl=f_cudart_builtin:14:38
+// CHECK:	attributes-cuda.cu:15:31: FunctionDecl=f_nv_weak:15:31
+// CHECK:	attributes-cuda.cu:16:65: VarDecl=surface_var:16:65
+// CHECK:	attributes-cuda.cu:17:65: VarDecl=texture_var:17:65
diff --git a/test/Index/availability.cpp b/test/Index/availability.cpp
new file mode 100644
index 000000000000..d6f90385f0bc
--- /dev/null
+++ b/test/Index/availability.cpp
@@ -0,0 +1,13 @@
+void foo() = delete;
+
+struct Foo {
+  int foo() = delete;
+  Foo() = delete;
+};
+
+
+// RUN: c-index-test -test-print-type --std=c++11 %s | FileCheck %s
+// CHECK: FunctionDecl=foo:1:6 (unavailable) [type=void ()] [typekind=FunctionProto] [resulttype=void] [resulttypekind=Void] [isPOD=0]
+// CHECK: StructDecl=Foo:3:8 (Definition) [type=Foo] [typekind=Record] [isPOD=1]
+// CHECK: CXXMethod=foo:4:7 (unavailable) [type=int (){{.*}}] [typekind=FunctionProto] [resulttype=int] [resulttypekind=Int] [isPOD=0]
+// CHECK: CXXConstructor=Foo:5:3 (unavailable) [type=void (){{.*}}] [typekind=FunctionProto] [resulttype=void] [resulttypekind=Void] [isPOD=0]
diff --git a/test/Index/c-index-api-loadTU-test.m b/test/Index/c-index-api-loadTU-test.m
index cbd742b240b5..29c37397673a 100644
--- a/test/Index/c-index-api-loadTU-test.m
+++ b/test/Index/c-index-api-loadTU-test.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fblocks -emit-pch -x objective-c %s -o %t.ast
+// RUN: c-index-test -write-pch %t.ast -arch x86_64 -mmacosx-version-min=10.6 -fblocks -x objective-c %s
 // RUN: c-index-test -test-load-tu %t.ast all > %t 2>&1 && FileCheck --input-file=%t %s
 // REQUIRES: x86-registered-target
 @interface Foo 
diff --git a/test/Index/c-index-getCursor-test.m b/test/Index/c-index-getCursor-test.m
index f368f1f07433..2361af62ab70 100644
--- a/test/Index/c-index-getCursor-test.m
+++ b/test/Index/c-index-getCursor-test.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fblocks -emit-pch -x objective-c %s -detailed-preprocessing-record -o %t.ast
+// RUN: c-index-test -write-pch %t.ast -arch x86_64 -mmacosx-version-min=10.6 -fblocks -x objective-c %s
 // RUN: c-index-test -test-file-scan %t.ast %s > %t 2>&1 && FileCheck --input-file=%t %s
 @interface Foo 
 {
diff --git a/test/Index/c-index-pch.c b/test/Index/c-index-pch.c
index 313fae88f4f9..4a035d7ad8f9 100644
--- a/test/Index/c-index-pch.c
+++ b/test/Index/c-index-pch.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-pch -x c -o %t.pch %S/Inputs/c-index-pch.h
-// RUN: %clang_cc1 -include-pch %t.pch -x c -emit-pch -o %t.ast %s
+// RUN: c-index-test -write-pch %t.pch -x c %S/Inputs/c-index-pch.h
+// RUN: c-index-test -write-pch %t.ast -Xclang -include-pch -Xclang %t.pch -x c %s
 // RUN: c-index-test -test-load-tu %t.ast all | FileCheck -check-prefix=ALL %s
 // ALL: FunctionDecl=foo
 // ALL: VarDecl=bar
diff --git a/test/Index/c-index-redecls.c b/test/Index/c-index-redecls.c
index 0cf2f032a3c7..67e58684e811 100644
--- a/test/Index/c-index-redecls.c
+++ b/test/Index/c-index-redecls.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -emit-pch -o %t.ast %s
+// RUN: c-index-test -write-pch %t.ast %s
 // RUN: c-index-test -test-load-tu %t.ast all
 
 // rdar://8956193 - We would blow the thread stack because of nested calls due
diff --git a/test/Index/cindex-from-source.m b/test/Index/cindex-from-source.m
index 504d90fd2bc9..5ea8504d673e 100644
--- a/test/Index/cindex-from-source.m
+++ b/test/Index/cindex-from-source.m
@@ -1,5 +1,5 @@
 // REQUIRES: native
-// RUN: %clang -x objective-c-header %S/Inputs/cindex-from-source.h -o %t.pfx.h.gch
+// RUN: c-index-test -write-pch %t.pfx.h.gch -x objective-c-header %S/Inputs/cindex-from-source.h
 // RUN: c-index-test -test-load-source local %s -include %t.pfx.h > %t
 // RUN: FileCheck %s < %t
 // CHECK: cindex-from-source.m:{{.*}}:{{.*}}: StructDecl=s0:{{.*}}:{{.*}}
diff --git a/test/Index/comment-custom-block-command.cpp b/test/Index/comment-custom-block-command.cpp
index f87cef96e590..bb153dc87804 100644
--- a/test/Index/comment-custom-block-command.cpp
+++ b/test/Index/comment-custom-block-command.cpp
@@ -5,6 +5,7 @@
 // RUN: %clang_cc1 -fcomment-block-commands=CustomCommand -x c++ -std=c++11 -emit-pch -o %t/out.pch %s
 // RUN: %clang_cc1 -x c++ -std=c++11 -fcomment-block-commands=CustomCommand -include-pch %t/out.pch -fsyntax-only %s
 
+// RUN: c-index-test -write-pch %t/out.pch -fcomment-block-commands=CustomCommand -x c++ -std=c++11 %s
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -std=c++11 -fcomment-block-commands=CustomCommand > %t/out.c-index-direct
 // RUN: c-index-test -test-load-tu %t/out.pch all > %t/out.c-index-pch
 
diff --git a/test/Index/comment-objc-parameterized-classes.m b/test/Index/comment-objc-parameterized-classes.m
index 5a928509dfe6..8ee5c6bd6487 100644
--- a/test/Index/comment-objc-parameterized-classes.m
+++ b/test/Index/comment-objc-parameterized-classes.m
@@ -17,3 +17,8 @@
 /// A
 @interface A<__covariant T : id, U : NSObject *> : NSObject
 @end
+
+// CHECK: <Declaration>@interface AA : A &lt;id, NSObject *&gt;
+/// AA
+@interface AA : A<id, NSObject *>
+@end
diff --git a/test/Index/comment-to-html-xml-conversion.cpp b/test/Index/comment-to-html-xml-conversion.cpp
index f31061aa12ab..6c10efedeaf0 100644
--- a/test/Index/comment-to-html-xml-conversion.cpp
+++ b/test/Index/comment-to-html-xml-conversion.cpp
@@ -7,6 +7,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -emit-pch -o %t/out.pch %s
 // RUN: %clang_cc1 -x c++ -std=c++11 -include-pch %t/out.pch -fsyntax-only %s
 
+// RUN: c-index-test -write-pch %t/out.pch -x c++ -std=c++11 %s
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -std=c++11 > %t/out.c-index-direct
 // RUN: c-index-test -test-load-tu %t/out.pch all > %t/out.c-index-pch
 
@@ -799,7 +800,7 @@ class comment_to_xml_conversion_01 {
   template<typename T>
   using comment_to_xml_conversion_09 = comment_to_xml_conversion_08<T, int>;
 
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:3: UnexposedDecl=comment_to_xml_conversion_09:{{.*}} FullCommentAsXML=[<Typedef file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="3"><Name>comment_to_xml_conversion_09</Name><USR>c:@S@comment_to_xml_conversion_01@comment_to_xml_conversion_09</USR><Declaration>template &lt;typename T&gt;\nusing comment_to_xml_conversion_09 = comment_to_xml_conversion_08&lt;T, int&gt;</Declaration><Abstract><Para> Aaa.</Para></Abstract></Typedef>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-2]]:3: TypeAliasTemplateDecl=comment_to_xml_conversion_09:{{.*}} FullCommentAsXML=[<Typedef file="{{[^"]+}}comment-to-html-xml-conversion.cpp" line="[[@LINE-2]]" column="3"><Name>comment_to_xml_conversion_09</Name><USR>c:@S@comment_to_xml_conversion_01@comment_to_xml_conversion_09</USR><Declaration>template &lt;typename T&gt;\nusing comment_to_xml_conversion_09 = comment_to_xml_conversion_08&lt;T, int&gt;</Declaration><Abstract><Para> Aaa.</Para></Abstract></Typedef>]
 };
 
 /// Aaa.
diff --git a/test/Index/complete-kvc.m b/test/Index/complete-kvc.m
index 62d98a9b2077..336d41d759a1 100644
--- a/test/Index/complete-kvc.m
+++ b/test/Index/complete-kvc.m
@@ -101,5 +101,5 @@ typedef signed char BOOL;
 
 // RUN: c-index-test -code-completion-at=%s:52:8 %s | FileCheck -check-prefix=CHECK-CC3 %s
 // CHECK-CC3: ObjCInstanceMethodDecl:{TypedText countOfIntProperty} (55)
-// CHECK-CC3-NEXT: ObjCInstanceMethodDecl:{TypedText intProperty} (40)
+// CHECK-CC3-NEXT: ObjCInstanceMethodDecl:{TypedText intProperty} (42)
 // CHECK-CC3-NEXT: ObjCInstanceMethodDecl:{TypedText isIntProperty} (40)
diff --git a/test/Index/complete-method-decls.m b/test/Index/complete-method-decls.m
index e45d68f97087..41134081e155 100644
--- a/test/Index/complete-method-decls.m
+++ b/test/Index/complete-method-decls.m
@@ -87,6 +87,9 @@ typedef A *MyObjectRef;
 
 @interface I2
 -(nonnull I2 *)produceI2:(nullable I2 *)i2;
+-(int *__nullable *__nullable)something:(void(^__nullable)(int *__nullable))b;
+@property (nullable, strong) id prop;
+@property (nullable, strong) void(^propWB)(int *_Nullable);
 @end
 
 @implementation I2
@@ -222,9 +225,14 @@ typedef A *MyObjectRef;
 // CHECK-CLASSTY: ObjCInstanceMethodDecl:{LeftParen (}{Text MyObject<P1> *}{RightParen )}{TypedText meth2}
 // CHECK-CLASSTY: ObjCInstanceMethodDecl:{LeftParen (}{Text MyObjectRef}{RightParen )}{TypedText meth3}
 
-// RUN: c-index-test -code-completion-at=%s:93:2 %s | FileCheck -check-prefix=CHECK-NULLABILITY %s
-// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text nonnull }{Text I2 *}{RightParen )}{TypedText produceI2}{TypedText :}{LeftParen (}{Text nullable }{Text I2 *}{RightParen )}{Text i2} (40)
+// RUN: c-index-test -code-completion-at=%s:96:2 %s -target x86_64-apple-macosx10.7 | FileCheck -check-prefix=CHECK-NULLABILITY %s
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text I2 *}{RightParen )}{TypedText produceI2}{TypedText :}{LeftParen (}{Text I2 *}{RightParen )}{Text i2} (40)
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text id}{RightParen )}{TypedText prop}
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text void (^)(int * _Nullable)}{RightParen )}{TypedText propWB}
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text void}{RightParen )}{TypedText setProp}{TypedText :}{LeftParen (}{Text id}{RightParen )}{Text prop}
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text void}{RightParen )}{TypedText setPropWB}{TypedText :}{LeftParen (}{Text void (^)(int * _Nullable)}{RightParen )}{Text propWB}
+// CHECK-NULLABILITY: ObjCInstanceMethodDecl:{LeftParen (}{Text int * _Nullable *}{RightParen )}{TypedText something}{TypedText :}{LeftParen (}{Text void (^)(int * _Nullable)}{RightParen )}{Text b}
 
-// RUN: c-index-test -code-completion-at=%s:104:2 %s | FileCheck -check-prefix=CHECK-NULLABILITY2 %s
-// CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text nonnull }{Text instancetype}{RightParen )}{TypedText getI3} (40)
-// CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text nonnull }{Text I3 *}{RightParen )}{TypedText produceI3}{TypedText :}{LeftParen (}{Text nonnull }{Text I3 *}{RightParen )}{Text i3} (40)
+// RUN: c-index-test -code-completion-at=%s:107:2 %s -target x86_64-apple-macosx10.7 | FileCheck -check-prefix=CHECK-NULLABILITY2 %s
+// CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text instancetype}{RightParen )}{TypedText getI3} (40)
+// CHECK-NULLABILITY2: ObjCInstanceMethodDecl:{LeftParen (}{Text I3 *}{RightParen )}{TypedText produceI3}{TypedText :}{LeftParen (}{Text I3 *}{RightParen )}{Text i3} (40)
diff --git a/test/Index/complete-modules.m b/test/Index/complete-modules.m
index 175cf3db2c7f..21f0e4bafe89 100644
--- a/test/Index/complete-modules.m
+++ b/test/Index/complete-modules.m
@@ -5,6 +5,7 @@
 
 // RUN: rm -rf %t
 // RUN: c-index-test -code-completion-at=%s:4:9 -fmodules-cache-path=%t -fmodules -F %S/Inputs/Frameworks -I %S/Inputs/Headers %s | FileCheck -check-prefix=CHECK-TOP-LEVEL %s
+// RUN: c-index-test -code-completion-at=%s:4:9 -fmodules-cache-path=%t -fmodules -gmodules -F %S/Inputs/Frameworks -I %S/Inputs/Headers %s | FileCheck -check-prefix=CHECK-TOP-LEVEL %s
 // CHECK-TOP-LEVEL: ModuleImport:{TypedText Framework} (50)
 // CHECK-TOP-LEVEL: ModuleImport:{TypedText LibA} (50)
 // CHECK-TOP-LEVEL: ModuleImport:{TypedText nested} (50)
diff --git a/test/Index/complete-parameterized-classes.m b/test/Index/complete-parameterized-classes.m
index 70d85885e3b7..ffef991acbee 100644
--- a/test/Index/complete-parameterized-classes.m
+++ b/test/Index/complete-parameterized-classes.m
@@ -65,7 +65,7 @@ void test3() {
 // CHECK-CC5: ObjCIvarDecl:{ResultType __kindof NSObject *}{TypedText myVar} (35)
 
 // RUN: c-index-test -code-completion-at=%s:37:2 %s | FileCheck -check-prefix=CHECK-CC6 %s
-// CHECK-CC6: ObjCInstanceMethodDecl:{LeftParen (}{Text void}{RightParen )}{TypedText apply2}{TypedText :}{LeftParen (}{Text void (^ _Nonnull)(id, NSObject *)}{RightParen )}{Text block} (40)
+// CHECK-CC6: ObjCInstanceMethodDecl:{LeftParen (}{Text void}{RightParen )}{TypedText apply2}{TypedText :}{LeftParen (}{Text void (^)(id, NSObject *)}{RightParen )}{Text block} (40)
 // CHECK-CC6: ObjCInstanceMethodDecl:{LeftParen (}{Text void}{RightParen )}{TypedText apply}{TypedText :}{LeftParen (}{Text void (^)(id, NSObject *)}{RightParen )}{Text block} (40)
 // CHECK-CC6: ObjCInstanceMethodDecl:{LeftParen (}{Text NSObject *}{RightParen )}{TypedText getit}{TypedText :}{LeftParen (}{Text id}{RightParen )}{Text val} (40)
 // CHECK-CC6: ObjCInstanceMethodDecl:{LeftParen (}{Text id}{RightParen )}{TypedText prop} (40)
diff --git a/test/Index/complete-pch.m b/test/Index/complete-pch.m
index 4c29f2796b5c..486a0dc53d04 100644
--- a/test/Index/complete-pch.m
+++ b/test/Index/complete-pch.m
@@ -14,7 +14,7 @@ void msg_id(id x) {
 // REQUIRES: native
 
 // Build the precompiled header
-// RUN: %clang -x objective-c-header -o %t.h.pch %S/Inputs/complete-pch.h
+// RUN: c-index-test -write-pch %t.h.pch -x objective-c-header %S/Inputs/complete-pch.h
 
 // Run the actual tests
 // RUN: c-index-test -code-completion-at=%s:10:7 -include %t.h %s | FileCheck -check-prefix=CHECK-CC1 %s
diff --git a/test/Index/complete-preamble.cpp b/test/Index/complete-preamble.cpp
index c57c88ab330a..15720f9a297f 100644
--- a/test/Index/complete-preamble.cpp
+++ b/test/Index/complete-preamble.cpp
@@ -3,6 +3,15 @@ void f() {
   std::
 }
 
-// RUN: env CINDEXTEST_EDITING=1 c-index-test -code-completion-at=%s:3:8 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
-// CHECK-CC1: {ResultType void}{TypedText wibble}{LeftParen (}{RightParen )} (50)
+// RUN: env CINDEXTEST_EDITING=1 LIBCLANG_TIMING=1 c-index-test -code-completion-at=%s:3:8 %s -o - 2>&1 | FileCheck -check-prefix=CHECK-CC1 -check-prefix=SECOND %s
+// RUN: env CINDEXTEST_EDITING=1 CINDEXTEST_CREATE_PREAMBLE_ON_FIRST_PARSE=1 LIBCLANG_TIMING=1 c-index-test -code-completion-at=%s:3:8 %s -o - 2>&1 | FileCheck -check-prefix=CHECK-CC1 -check-prefix=FIRST %s
 
+// FIRST: Precompiling preamble
+// FIRST: Parsing
+// FIRST: Reparsing
+
+// SECOND: Parsing
+// SECOND: Precompiling preamble
+// SECOND: Reparsing
+
+// CHECK-CC1: {ResultType void}{TypedText wibble}{LeftParen (}{RightParen )} (50)
diff --git a/test/Index/complete-property-flags.m b/test/Index/complete-property-flags.m
index 9e3fc1aff319..8e357678fd37 100644
--- a/test/Index/complete-property-flags.m
+++ b/test/Index/complete-property-flags.m
@@ -7,7 +7,8 @@
 @property(copy) Foo *myprop;
 @property(retain, nonatomic) id xx;
 
-// RUN: c-index-test -code-completion-at=%s:7:11 %s -fno-objc-arc | FileCheck -check-prefix=CHECK-CC1 %s
+// RUN: c-index-test -code-completion-at=%s:7:11 %s -fobjc-runtime=macosx-10.4 -fno-objc-arc | FileCheck -check-prefix=CHECK-CC1 -check-prefix=CHECK-CC1-NOWEAK %s
+// RUN: c-index-test -code-completion-at=%s:7:11 %s -fobjc-runtime=macosx-10.8 -Xclang -fobjc-weak -fno-objc-arc | FileCheck -check-prefix=CHECK-CC1 -check-prefix=CHECK-CC1-WEAK  %s
 // CHECK-CC1: {TypedText assign}
 // CHECK-CC1-NEXT: {TypedText atomic}
 // CHECK-CC1-NEXT: {TypedText copy}
@@ -23,7 +24,8 @@
 // CHECK-CC1-NEXT: {TypedText setter}{Text =}{Placeholder method}
 // CHECK-CC1-NEXT: {TypedText strong}
 // CHECK-CC1-NEXT: {TypedText unsafe_unretained}
-// CHECK-CC1-NOT: {TypedText weak}
+// CHECK-CC1-NOWEAK-NOT: {TypedText weak}
+// CHECK-CC1-WEAK-NEXT: {TypedText weak}
 
 // RUN: c-index-test -code-completion-at=%s:7:11 %s -fobjc-arc -fobjc-runtime=macosx-10.7 | FileCheck -check-prefix=CHECK-CC1-ARC %s
 // CHECK-CC1-ARC: {TypedText assign}
diff --git a/test/Index/crash-recovery-modules.m b/test/Index/crash-recovery-modules.m
index 431c23f7dcad..3e7e8059aaaa 100644
--- a/test/Index/crash-recovery-modules.m
+++ b/test/Index/crash-recovery-modules.m
@@ -2,13 +2,13 @@
 // RUN: rm -rf %t
 
 // Parse the file, such that building the module will cause Clang to crash.
-// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DCRASH %s 2> %t.err
+// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DCRASH %s > /dev/null 2> %t.err
 // RUN: FileCheck < %t.err -check-prefix=CHECK-CRASH %s
 // CHECK-CRASH: crash-recovery-modules.m:16:9:{16:2-16:14}: fatal error: could not build module 'Crash'
 
 // Parse the file again, without crashing, to make sure that
 // subsequent parses do the right thing.
-// RUN: env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers %s
+// RUN: env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers %s > /dev/null
 
 // REQUIRES: crash-recovery
 // REQUIRES: shell
@@ -26,10 +26,10 @@ void test() {
 
 // RUN: rm -rf %t
 // Check that libclang crash-recovery works; both with a module building crash...
-// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DCRASH -DLIBCLANG_CRASH %s 2> %t.err
+// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DCRASH -DLIBCLANG_CRASH %s > /dev/null 2> %t.err
 // RUN: FileCheck < %t.err -check-prefix=CHECK-LIBCLANG-CRASH %s
 // ...and with module building successful.
-// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DLIBCLANG_CRASH %s 2> %t.err
+// RUN: not env CINDEXTEST_FAILONERROR=1 c-index-test -test-load-source all -fmodules -fmodules-cache-path=%t -Xclang -fdisable-module-hash -I %S/Inputs/Headers -DLIBCLANG_CRASH %s > /dev/null 2> %t.err
 // RUN: FileCheck < %t.err -check-prefix=CHECK-LIBCLANG-CRASH %s
 // CHECK-LIBCLANG-CRASH: libclang: crash detected during parsing
 // CHECK-LIBCLANG-CRASH: Unable to load translation unit!
diff --git a/test/Index/fix-its.m b/test/Index/fix-its.m
index fabcdb2905f6..65aa81dc8663 100644
--- a/test/Index/fix-its.m
+++ b/test/Index/fix-its.m
@@ -20,7 +20,3 @@ void _rdar_12584554_A (volatile const void * object, volatile const void * selec
 @end
 
 // CHECK: FIX-IT: Insert "@" at 18:22
-// CHECK: fix-its.m:9:28: note: expanded from macro '_rdar_12584554_C'
-// CHECK: Number FIX-ITs = 0
-// CHECK: fix-its.m:7:77: note: expanded from macro '_rdar_12584554_B'
-// CHECK: Number FIX-ITs = 0
diff --git a/test/Index/get-cursor.m b/test/Index/get-cursor.m
index f659fb1ff61c..d321233401c8 100644
--- a/test/Index/get-cursor.m
+++ b/test/Index/get-cursor.m
@@ -112,6 +112,23 @@ void foo3(Test3 *test3) {
 @property (retain) id propProp3;
 @end
 
+@interface TestNullability
+@property (strong, nonnull) id prop1;
+@property (strong, nullable) id prop2;
+@end
+
+@implementation TestNullability
+- (void)meth {
+  TestNullability *o;
+  [o.prop1 meth];
+  [o.prop2 meth];
+  _Nullable id lo1;
+  _Nonnull id lo2;
+  [lo1 meth];
+  [lo2 meth];
+}
+@end
+
 
 // RUN: c-index-test -cursor-at=%s:4:28 -cursor-at=%s:5:28 %s | FileCheck -check-prefix=CHECK-PROP %s
 // CHECK-PROP: ObjCPropertyDecl=foo1:4:26
@@ -170,3 +187,9 @@ void foo3(Test3 *test3) {
 // CHECK-OBJCOPTIONAL: 108:23 ObjCPropertyDecl=propProp2:108:23 (@optional) [retain,] Extent=[108:1 - 108:32]
 // CHECK-OBJCOPTIONAL: 111:8 ObjCInstanceMethodDecl=protMeth3:111:8 Extent=[111:1 - 111:18]
 // CHECK-OBJCOPTIONAL: 112:23 ObjCPropertyDecl=propProp3:112:23 [retain,] Extent=[112:1 - 112:32]
+
+// RUN: c-index-test -cursor-at=%s:123:12 %s | FileCheck -check-prefix=CHECK-RECEIVER-WITH-NULLABILITY %s
+// RUN: c-index-test -cursor-at=%s:124:12 %s | FileCheck -check-prefix=CHECK-RECEIVER-WITH-NULLABILITY %s
+// RUN: c-index-test -cursor-at=%s:127:8 %s | FileCheck -check-prefix=CHECK-RECEIVER-WITH-NULLABILITY %s
+// RUN: c-index-test -cursor-at=%s:128:8 %s | FileCheck -check-prefix=CHECK-RECEIVER-WITH-NULLABILITY %s
+// CHECK-RECEIVER-WITH-NULLABILITY: Receiver-type=ObjCId
diff --git a/test/Index/index-attrs.c b/test/Index/index-attrs.c
new file mode 100644
index 000000000000..d526721f5b20
--- /dev/null
+++ b/test/Index/index-attrs.c
@@ -0,0 +1,16 @@
+// RUN: c-index-test -index-file -check-prefix CHECK %s -target armv7-windows-gnu -fdeclspec
+
+void __declspec(dllexport) export_function(void) {}
+// CHECK: [indexDeclaraton]: kind: function | name: export_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllexport)
+void __attribute__((dllexport)) export_gnu_attribute(void) {}
+// CHECK: [indexDeclaration] kind: function | name: export_gnu_attribute | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllexport)
+
+void __declspec(dllimport) import_function(void);
+// CHECK: [indexDeclaration] kind: function | name: import_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllimport)
+void __attribute__((dllimport)) import_gnu_attribute(void);
+// CHECK: [indexDeclaration] kind: function | name: import_gnu_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllimport)
+
diff --git a/test/Index/index-attrs.cpp b/test/Index/index-attrs.cpp
new file mode 100644
index 000000000000..b6100acf880a
--- /dev/null
+++ b/test/Index/index-attrs.cpp
@@ -0,0 +1,50 @@
+// RUN: c-index-test -index-file -check-prefix CHECK %s -target armv7-windows-gnu -fdeclspec
+
+struct __declspec(dllexport) export_s {
+  void m();
+};
+// CHECK: [indexDeclaration]: kind: struct | name: export_s | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllexport)
+// CHECK: [indexDeclaration]: kind: c++-instance-method | name: m | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllexport)
+
+struct __declspec(dllimport) import_s {
+  void m();
+};
+// CHECK: [indexDeclaration]: kind: struct | name: import_s | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllimport)
+// CHECK: [indexDeclaration]: kind: c++-instance-method | name: m | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllimport)
+
+class __attribute__((dllexport)) export_gnu_s {
+  void m();
+};
+// CHECK: [indexDeclaration]: kind: struct | name: export_gnu_s | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllexport)
+// CHECK: [indexDeclaration]: kind: c++-instance-method | name: m | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllexport)
+
+class __attribute__((dllimport)) import_gnu_s {
+  void m();
+};
+// CHECK: [indexDeclaration]: kind: struct | name: import_gnu_s | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllimport)
+// CHECK: [indexDeclaration]: kind: c++-instance-method | name: m | {{.*}} | lang: C++
+// CHECK: <attribute>: attribute(dllimport)
+
+extern "C" void __declspec(dllexport) export_function(void) {}
+// CHECK: [indexDeclaraton]: kind: function | name: export_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllexport)
+extern "C" void __attribute__((dllexport)) export_gnu_function(void) {}
+// CHECK: [indexDeclaraton]: kind: function | name: export_gnu_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllexport)
+
+extern "C" {
+void __declspec(dllimport) import_function(void);
+// CHECK: [indexDeclaration] kind: function | name: import_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllimport)
+void __attribute__((dllimport)) import_gnu_function(void);
+// CHECK: [indexDeclaration] kind: function | name: import_gnu_function | {{.*}} | lang: C
+// CHECK: <attribute>: attribute(dllimport)
+}
+
diff --git a/test/Index/index-file.cpp b/test/Index/index-file.cpp
index 37b14a2715a9..f1ae68a2508d 100644
--- a/test/Index/index-file.cpp
+++ b/test/Index/index-file.cpp
@@ -15,9 +15,25 @@ void tfoo() {}
 void tfoo<int>() {}
 }
 
+namespace crash1 {
+template<typename T> class A {
+  A(A &) = delete;
+  void meth();
+};
+template <> void A<int>::meth();
+template class A<int>;
+}
+
+class B {
+  mutable int x_;
+  int y_;
+};
 // RUN: c-index-test -index-file %s > %t
 // RUN: FileCheck %s -input-file=%t
 
 // CHECK: [indexDeclaration]: kind: type-alias | name: MyTypeAlias | {{.*}} | loc: 1:7
 // CHECK: [indexDeclaration]: kind: struct-template-spec | name: TS | {{.*}} | loc: 11:8
 // CHECK: [indexDeclaration]: kind: function-template-spec | name: tfoo | {{.*}} | loc: 15:6
+// CHECK: [indexDeclaration]: kind: c++-instance-method | name: meth | {{.*}} | loc: 23:26
+// CHECK: [indexDeclaration]: kind: field | name: x_ | USR: c:@S@B@FI@x_ | lang: C++ | cursor: FieldDecl=x_:28:15 (Definition) (mutable) | loc: 28:15 | semantic-container: [B:27:7] | lexical-container: [B:27:7] | isRedecl: 0 | isDef: 1 | isContainer: 0 | isImplicit: 0
+// CHECK: [indexDeclaration]: kind: field | name: y_ | USR: c:@S@B@FI@y_ | lang: C++ | cursor: FieldDecl=y_:29:7 (Definition) | loc: 29:7 | semantic-container: [B:27:7] | lexical-container: [B:27:7] | isRedecl: 0 | isDef: 1 | isContainer: 0 | isImplicit: 0
diff --git a/test/Index/index-module.m b/test/Index/index-module.m
index a973e91af833..ff512592b670 100644
--- a/test/Index/index-module.m
+++ b/test/Index/index-module.m
@@ -6,6 +6,8 @@ int glob;
 // RUN: rm -rf %t.cache
 // RUN: c-index-test -index-file %s -fmodules-cache-path=%t.cache -fmodules -F %S/../Modules/Inputs \
 // RUN:      -Xclang -fdisable-module-hash | FileCheck %s
+// RUN: c-index-test -index-file %s -fmodules-cache-path=%t.cache -fmodules -gmodules -F %S/../Modules/Inputs \
+// RUN:      -Xclang -fdisable-module-hash | FileCheck %s
 
 // CHECK-NOT: [indexDeclaration]
 // CHECK: [ppIncludedFile]: {{.*}}/Modules/Inputs/DependsOnModule.framework{{[/\\]}}Headers{{[/\\]}}DependsOnModule.h | name: "DependsOnModule/DependsOnModule.h" | hash loc: 2:1 | isImport: 0 | isAngled: 1 | isModule: 1
diff --git a/test/Index/index-pch-with-module.m b/test/Index/index-pch-with-module.m
index 53bac1e44600..36c9c2beb475 100644
--- a/test/Index/index-pch-with-module.m
+++ b/test/Index/index-pch-with-module.m
@@ -18,8 +18,8 @@ int glob;
 
 // CHECK-NOT: [indexDeclaration]
 // CHECK:      [importedASTFile]: {{.*}}.h.pch
-// CHECK-NEXT: [enteredMainFile]: {{.*[/\\]}}index-pch-with-module.m
 // CHECK-NEXT: [startedTranslationUnit]
+// CHECK-NEXT: [enteredMainFile]: {{.*[/\\]}}index-pch-with-module.m
 // CHECK-NEXT: [indexDeclaration]: kind: variable | name: glob | {{.*}} | loc: 10:5
 // CHECK-NOT: [indexDeclaration]
 
diff --git a/test/Index/index-templates.cpp b/test/Index/index-templates.cpp
index 570d7cf15d4d..79b9c181ecaf 100644
--- a/test/Index/index-templates.cpp
+++ b/test/Index/index-templates.cpp
@@ -110,6 +110,9 @@ static const int FxnTmpl_Var = 7;
 template <>
 void foo<float, 9, FxnTmplEnum_B, FxnTmpl_Var + 7>(float Value);
 
+template <class T>
+using alias = T;
+
 // RUN: c-index-test -test-load-source all -fno-delayed-template-parsing %s | FileCheck -check-prefix=CHECK-LOAD %s
 // CHECK-LOAD: index-templates.cpp:4:6: FunctionTemplate=f:4:6 Extent=[3:1 - 4:22]
 // CHECK-LOAD: index-templates.cpp:3:19: TemplateTypeParameter=T:3:19 (Definition) Extent=[3:10 - 3:20]
@@ -189,6 +192,10 @@ void foo<float, 9, FxnTmplEnum_B, FxnTmpl_Var + 7>(float Value);
 // CHECK-LOAD: index-templates.cpp:101:20: C++ base class specifier=Pair<int, int>:98:16 [access=public isVirtual=false] Extent=[101:20 - 101:34]
 // CHECK-LOAD: index-templates.cpp:101:36: C++ base class specifier=Pair<T, U>:76:8 [access=public isVirtual=false] Extent=[101:36 - 101:46]
 // CHECK-LOAD: index-templates.cpp:111:6: FunctionDecl=foo:111:6 [Specialization of foo:107:6] [Template arg 0: kind: 1, type: float] [Template arg 1: kind: 4, intval: 9] [Template arg 2: kind: 4, intval: 1] [Template arg 3: kind: 4, intval: 14] Extent=[110:1 - 111:64]
+// CHECK-LOAD: index-templates.cpp:114:1: TypeAliasTemplateDecl=alias:114:1 (Definition) Extent=[113:1 - 114:16]
+// CHECK-LOAD: index-templates.cpp:113:17: TemplateTypeParameter=T:113:17 (Definition) Extent=[113:11 - 113:18] [access=public]
+// CHECK-LOAD: index-templates.cpp:114:7: TypeAliasDecl=alias:114:7 (Definition) Extent=[114:1 - 114:16]
+// CHECK-LOAD: index-templates.cpp:114:15: TypeRef=T:113:17 Extent=[114:15 - 114:16]
 
 // RUN: c-index-test -test-load-source-usrs all -fno-delayed-template-parsing %s | FileCheck -check-prefix=CHECK-USRS %s
 // CHECK-USRS: index-templates.cpp c:@FT@>3#T#Nt0.0#t>2#T#Nt1.0f#>t0.22S0_#v# Extent=[3:1 - 4:22]
diff --git a/test/Index/namespaced-base-ctor-init.cpp b/test/Index/namespaced-base-ctor-init.cpp
new file mode 100644
index 000000000000..2d60f7c6530a
--- /dev/null
+++ b/test/Index/namespaced-base-ctor-init.cpp
@@ -0,0 +1,10 @@
+namespace ns1 {
+struct Base {};
+struct Derived : Base {
+  Derived() : ns1::Base() {}
+};
+}
+
+// RUN: c-index-test -test-load-source all %s | FileCheck %s
+// CHECK: namespaced-base-ctor-init.cpp:4:15: NamespaceRef=ns1:1:11 Extent=[4:15 - 4:18]
+// CHECK: namespaced-base-ctor-init.cpp:4:20: TypeRef=struct ns1::Base:2:8 Extent=[4:20 - 4:24]
diff --git a/test/Index/parse-all-comments.c b/test/Index/parse-all-comments.c
index f8b0449f2002..66981e2b421b 100644
--- a/test/Index/parse-all-comments.c
+++ b/test/Index/parse-all-comments.c
@@ -33,12 +33,47 @@ void multi_line_comment_plus_ordinary(int);
 // WITH EMPTY LINE
 void multi_line_comment_empty_line(int);
 
+int notdoxy7; // Not a Doxygen juxtaposed comment.  notdoxy7 NOT_DOXYGEN
+int notdoxy8; // Not a Doxygen juxtaposed comment.  notdoxy8 NOT_DOXYGEN
+
+int trdoxy9;  /// A Doxygen non-trailing comment.  trdoxyA IS_DOXYGEN_SINGLE
+int trdoxyA;
+
+int trdoxyB;  // Not a Doxygen trailing comment.  PART_ONE
+              // It's a multiline one too.  trdoxyB NOT_DOXYGEN
+int trdoxyC;
+
+int trdoxyD;  // Not a Doxygen trailing comment.   trdoxyD NOT_DOXYGEN
+              /// This comment doesn't get merged.   trdoxyE IS_DOXYGEN
+int trdoxyE;
+
+int trdoxyF;  /// A Doxygen non-trailing comment that gets dropped on the floor.
+              // This comment will also be dropped.
+int trdoxyG;  // This one won't.  trdoxyG NOT_DOXYGEN
+
+int trdoxyH;  ///< A Doxygen trailing comment.  PART_ONE
+              // This one gets merged with it.  trdoxyH SOME_DOXYGEN
+int trdoxyI;  // This one doesn't.  trdoxyI NOT_DOXYGEN
+
+int trdoxyJ;  // Not a Doxygen trailing comment.  PART_ONE
+              ///< This one gets merged with it.  trdoxyJ SOME_DOXYGEN
+int trdoxyK;  // This one doesn't.  trdoxyK NOT_DOXYGEN
+
+int trdoxyL;  // Not a Doxygen trailing comment.  trdoxyL NOT_DOXYGEN
+// This one shouldn't get merged.  trdoxyM NOT_DOXYGEN
+int trdoxyM;
+
+int trdoxyN;  ///< A Doxygen trailing comment.  trdoxyN IS_DOXYGEN
+  // This one shouldn't get merged.  trdoxyO NOT_DOXYGEN
+int trdoxyO;
+
+
 #endif
 
 // RUN: rm -rf %t
 // RUN: mkdir %t
 
-// RUN: %clang_cc1 -fparse-all-comments -x c++ -std=c++11 -emit-pch -o %t/out.pch %s
+// RUN: c-index-test -write-pch %t/out.pch -fparse-all-comments -x c++ -std=c++11 %s
 
 // RUN: c-index-test -test-load-source all -comments-xml-schema=%S/../../bindings/xml/comment-xml-schema.rng %s -std=c++11 -fparse-all-comments > %t/out.c-index-direct
 // RUN: c-index-test -test-load-tu %t/out.pch all > %t/out.c-index-pch
@@ -60,3 +95,21 @@ void multi_line_comment_empty_line(int);
 // CHECK: parse-all-comments.c:22:6: FunctionDecl=isdoxy6:{{.*}} isdoxy6 IS_DOXYGEN_SINGLE
 // CHECK: parse-all-comments.c:29:6: FunctionDecl=multi_line_comment_plus_ordinary:{{.*}} BLOCK_ORDINARY_COMMENT {{.*}} ORDINARY COMMENT {{.*}} IS_DOXYGEN_START {{.*}} IS_DOXYGEN_END
 // CHECK: parse-all-comments.c:34:6: FunctionDecl=multi_line_comment_empty_line:{{.*}} MULTILINE COMMENT{{.*}}\n{{.*}}\n{{.*}} WITH EMPTY LINE
+// CHECK: parse-all-comments.c:36:5: VarDecl=notdoxy7:{{.*}} notdoxy7 NOT_DOXYGEN
+// CHECK: parse-all-comments.c:37:5: VarDecl=notdoxy8:{{.*}} notdoxy8 NOT_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:39:5: VarDecl=trdoxy9:{{.*}} trdoxyA IS_DOXYGEN_SINGLE
+// CHECK: parse-all-comments.c:40:5: VarDecl=trdoxyA:{{.*}} trdoxyA IS_DOXYGEN_SINGLE
+// CHECK: parse-all-comments.c:42:5: VarDecl=trdoxyB:{{.*}} PART_ONE {{.*}} trdoxyB NOT_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:44:5: VarDecl=trdoxyC:{{.*}} trdoxyB NOT_DOXYGEN
+// CHECK: parse-all-comments.c:46:5: VarDecl=trdoxyD:{{.*}} trdoxyD NOT_DOXYGEN
+// CHECK: parse-all-comments.c:48:5: VarDecl=trdoxyE:{{.*}} trdoxyE IS_DOXYGEN
+// CHECK-NOT: parse-all-comments.c:50:5: VarDecl=trdoxyF:{{.*}} RawComment
+// CHECK: parse-all-comments.c:52:5: VarDecl=trdoxyG:{{.*}} trdoxyG NOT_DOXYGEN
+// CHECK: parse-all-comments.c:54:5: VarDecl=trdoxyH:{{.*}} PART_ONE {{.*}} trdoxyH SOME_DOXYGEN
+// CHECK: parse-all-comments.c:56:5: VarDecl=trdoxyI:{{.*}} trdoxyI NOT_DOXYGEN
+// CHECK: parse-all-comments.c:58:5: VarDecl=trdoxyJ:{{.*}} PART_ONE {{.*}} trdoxyJ SOME_DOXYGEN
+// CHECK: parse-all-comments.c:60:5: VarDecl=trdoxyK:{{.*}} trdoxyK NOT_DOXYGEN
+// CHECK: parse-all-comments.c:62:5: VarDecl=trdoxyL:{{.*}} trdoxyL NOT_DOXYGEN
+// CHECK: parse-all-comments.c:64:5: VarDecl=trdoxyM:{{.*}} trdoxyM NOT_DOXYGEN
+// CHECK: parse-all-comments.c:66:5: VarDecl=trdoxyN:{{.*}} trdoxyN IS_DOXYGEN
+// CHECK: parse-all-comments.c:68:5: VarDecl=trdoxyO:{{.*}} trdoxyO NOT_DOXYGEN
diff --git a/test/Index/pch-depending-on-deleted-module.c b/test/Index/pch-depending-on-deleted-module.c
index a0fbaf559fca..8efa66a32373 100644
--- a/test/Index/pch-depending-on-deleted-module.c
+++ b/test/Index/pch-depending-on-deleted-module.c
@@ -9,6 +9,6 @@
 // RUN: not %clang_cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -I %S/Inputs/Headers -verify-pch %t/use_LibA.pch 2>&1 | FileCheck -check-prefix=VERIFY %s
 // RUN: not c-index-test -test-load-source all -x c -fmodules -fimplicit-module-maps -Xclang -fdisable-module-hash -fmodules-cache-path=%t/modules-cache -I %S/Inputs/Headers -include-pch %t/use_LibA.pch %s 2>&1 | FileCheck -check-prefix=INDEX %s
 
-// VERIFY: fatal error: malformed or corrupted AST file: 'Unable to load module
+// VERIFY: fatal error: module file '{{.*}}LibA.pcm' not found
 // INDEX: {{^}}Failure: AST deserialization error occurred{{$}}
 
diff --git a/test/Index/print-cxx-manglings.cpp b/test/Index/print-cxx-manglings.cpp
new file mode 100644
index 000000000000..aae299301727
--- /dev/null
+++ b/test/Index/print-cxx-manglings.cpp
@@ -0,0 +1,66 @@
+// REQUIRES: x86-registered-target
+
+// RUN: c-index-test -write-pch %t.itanium.ast -target i686-pc-linux-gnu -fdeclspec %s
+// RUN: c-index-test -test-print-manglings %t.itanium.ast | FileCheck --check-prefix=ITANIUM %s
+
+// RUN: c-index-test -write-pch %t.macho.ast -target i686-apple-darwin -fdeclspec %s
+// RUN: c-index-test -test-print-manglings %t.macho.ast | FileCheck --check-prefix=MACHO %s
+
+// RUN: c-index-test -write-pch %t.msvc.ast -target i686-pc-windows %s
+// RUN: c-index-test -test-print-manglings %t.msvc.ast | FileCheck --check-prefix=MSVC %s
+
+struct s {
+  s(int);
+  ~s();
+  int m(int);
+};
+
+// ITANIUM: CXXConstructor=s{{.*}}[mangled=_ZN1sC2Ei] [mangled=_ZN1sC1Ei]
+// ITANIUM: CXXDestructor=~s{{.*}}[mangled=_ZN1sD2Ev] [mangled=_ZN1sD1Ev]
+
+// MACHO: CXXConstructor=s{{.*}}[mangled=__ZN1sC2Ei] [mangled=__ZN1sC1Ei]
+// MACHO: CXXDestructor=~s{{.*}}[mangled=__ZN1sD2Ev] [mangled=__ZN1sD1Ev]
+
+// MSVC: CXXConstructor=s{{.*}}[mangled=??0s@@QAE@H@Z]
+// MSVC: CXXDestructor=~s{{.*}}[mangled=??1s@@QAE@XZ]
+
+struct t {
+  t(int);
+  virtual ~t();
+  int m(int);
+};
+
+// ITANIUM: CXXConstructor=t{{.*}}[mangled=_ZN1tC2Ei] [mangled=_ZN1tC1Ei]
+// ITANIUM: CXXDestructor=~t{{.*}}[mangled=_ZN1tD2Ev] [mangled=_ZN1tD1Ev] [mangled=_ZN1tD0Ev]
+
+// MACHO: CXXConstructor=t{{.*}}[mangled=__ZN1tC2Ei] [mangled=__ZN1tC1Ei]
+// MACHO: CXXDestructor=~t{{.*}}[mangled=__ZN1tD2Ev] [mangled=__ZN1tD1Ev] [mangled=__ZN1tD0Ev]
+
+// MSVC: CXXConstructor=t{{.*}}[mangled=??0t@@QAE@H@Z]
+// MSVC: CXXDestructor=~t{{.*}}[mangled=??1t@@UAE@XZ]
+
+struct u {
+  u();
+  virtual ~u();
+  virtual int m(int) = 0;
+};
+
+// ITANIUM: CXXConstructor=u{{.*}}[mangled=_ZN1uC2Ev]
+// ITANIUM: CXXDestructor=~u{{.*}}[mangled=_ZN1uD2Ev] [mangled=_ZN1uD1Ev] [mangled=_ZN1uD0Ev]
+
+// MACHO: CXXConstructor=u{{.*}}[mangled=__ZN1uC2Ev]
+// MACHO: CXXDestructor=~u{{.*}}[mangled=__ZN1uD2Ev] [mangled=__ZN1uD1Ev] [mangled=__ZN1uD0Ev]
+
+// MSVC: CXXConstructor=u{{.*}}[mangled=??0u@@QAE@XZ]
+// MSVC: CXXDestructor=~u{{.*}}[mangled=??1u@@UAE@XZ]
+
+struct v {
+  __declspec(dllexport) v(int = 0);
+};
+
+// ITANIUM: CXXConstructor=v{{.*}}[mangled=_ZN1vC2Ei] [mangled=_ZN1vC1Ei]
+
+// MACHO: CXXConstructor=v{{.*}}[mangled=__ZN1vC2Ei] [mangled=__ZN1vC1Ei]
+
+// MSVC: CXXConstructor=v{{.*}}[mangled=??0v@@QAE@H@Z] [mangled=??_Fv@@QAEXXZ]
+
diff --git a/test/Index/print-mangled-name.cpp b/test/Index/print-mangled-name.cpp
index d4edc5ff3921..dc6f734dfb73 100644
--- a/test/Index/print-mangled-name.cpp
+++ b/test/Index/print-mangled-name.cpp
@@ -1,11 +1,11 @@
 // REQUIRES: x86-registered-target
-// RUN: %clang_cc1 -triple i686-pc-linux-gnu -emit-pch %s -o %t_linux.ast
+// RUN: c-index-test -write-pch %t_linux.ast -target i686-pc-linux-gnu %s
 // RUN: c-index-test -test-print-mangle %t_linux.ast | FileCheck %s --check-prefix=ITANIUM
 
-// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-pch %s -o %t_macho.ast
+// RUN: c-index-test -write-pch %t_macho.ast -target x86_64-apple-darwin %s
 // RUN: c-index-test -test-print-mangle %t_macho.ast | FileCheck %s --check-prefix=MACHO
 
-// RUN: %clang_cc1 -triple i686-pc-win32 -emit-pch %s -o %t_msft.ast
+// RUN: c-index-test -write-pch %t_msft.ast -target i686-pc-win32 %s
 // RUN: c-index-test -test-print-mangle %t_msft.ast | FileCheck %s --check-prefix=MICROSOFT
 
 int foo(int, int);
@@ -29,3 +29,8 @@ int foo(S, S&);
 // ITANIUM: mangled=_Z3foo1SRS_
 // MACHO: mangled=__Z3foo1SRS_
 // MICROSOFT: mangled=?foo@@YAHUS
+
+extern "C" int foo(int);
+// ITANIUM: mangled=foo
+// MACHO: mangled=_foo
+// MICROSOFT: mangled=_foo
diff --git a/test/Index/print-type.cpp b/test/Index/print-type.cpp
index f73f1b442f83..61135e3e8ace 100644
--- a/test/Index/print-type.cpp
+++ b/test/Index/print-type.cpp
@@ -48,7 +48,15 @@ struct Blob {
 };
 int Blob::*member_pointer;
 
-// RUN: c-index-test -test-print-type %s -std=c++11 | FileCheck %s
+
+
+auto autoI = 0;
+auto autoTbar = tbar<int>(0);
+auto autoBlob = new Blob();
+auto autoFunction(){return int();}
+decltype(auto) autoInt = 5;
+
+// RUN: c-index-test -test-print-type %s -std=c++14 | FileCheck %s
 // CHECK: Namespace=outer:1:11 (Definition) [type=] [typekind=Invalid] [isPOD=0]
 // CHECK: ClassTemplate=Foo:4:8 (Definition) [type=] [typekind=Invalid] [isPOD=0]
 // CHECK: TemplateTypeParameter=T:3:19 (Definition) [type=T] [typekind=Unexposed] [canonicaltype=type-parameter-0-0] [canonicaltypekind=Unexposed] [isPOD=0]
@@ -119,3 +127,20 @@ int Blob::*member_pointer;
 // CHECK: StructDecl=Blob:45:8 (Definition) [type=Blob] [typekind=Record] [isPOD=1] [nbFields=2]
 // CHECK: FieldDecl=i:46:7 (Definition) [type=int] [typekind=Int] [isPOD=1]
 // CHECK: VarDecl=member_pointer:49:12 (Definition) [type=int Blob::*] [typekind=MemberPointer] [isPOD=1]
+// CHECK: VarDecl=autoI:53:6 (Definition) [type=int] [typekind=Auto] [canonicaltype=int] [canonicaltypekind=Int] [isPOD=1]
+// CHECK: IntegerLiteral= [type=int] [typekind=Int] [isPOD=1]
+// CHECK: VarDecl=autoTbar:54:6 (Definition) [type=int] [typekind=Auto] [canonicaltype=int] [canonicaltypekind=Int] [isPOD=1]
+// CHECK: CallExpr=tbar:35:3 [type=int] [typekind=Unexposed] [canonicaltype=int] [canonicaltypekind=Int] [args= [int] [Int]] [isPOD=1]
+// CHECK: UnexposedExpr=tbar:35:3 [type=int (*)(int)] [typekind=Pointer] [canonicaltype=int (*)(int)] [canonicaltypekind=Pointer] [isPOD=1] [pointeetype=int (int)] [pointeekind=FunctionProto]
+// CHECK: DeclRefExpr=tbar:35:3 RefName=[54:17 - 54:21] RefName=[54:21 - 54:26] [type=int (int)] [typekind=FunctionProto] [canonicaltype=int (int)] [canonicaltypekind=FunctionProto] [isPOD=0]
+// CHECK: IntegerLiteral= [type=int] [typekind=Int] [isPOD=1]
+// CHECK: VarDecl=autoBlob:55:6 (Definition) [type=Blob *] [typekind=Auto] [canonicaltype=Blob *] [canonicaltypekind=Pointer] [isPOD=1]
+// CHECK: CXXNewExpr= [type=Blob *] [typekind=Pointer] [isPOD=1] [pointeetype=Blob] [pointeekind=Record]
+// CHECK: TypeRef=struct Blob:45:8 [type=Blob] [typekind=Record] [isPOD=1] [nbFields=2]
+// CHECK: CallExpr=Blob:45:8 [type=Blob] [typekind=Record] [isPOD=1] [nbFields=2]
+// CHECK: FunctionDecl=autoFunction:56:6 (Definition) [type=int ()] [typekind=FunctionProto] [canonicaltype=int ()] [canonicaltypekind=FunctionProto] [resulttype=int] [resulttypekind=Auto] [isPOD=0]
+// CHECK: CompoundStmt= [type=] [typekind=Invalid] [isPOD=0]
+// CHECK: ReturnStmt= [type=] [typekind=Invalid] [isPOD=0]
+// CHECK: UnexposedExpr= [type=int] [typekind=Int] [isPOD=1]
+// CHECK: VarDecl=autoInt:57:16 (Definition) [type=int] [typekind=Auto] [canonicaltype=int] [canonicaltypekind=Int] [isPOD=1]
+// CHECK: IntegerLiteral= [type=int] [typekind=Int] [isPOD=1]
diff --git a/test/Index/print-type.m b/test/Index/print-type.m
index 5a4272b131b6..777069b3a58b 100644
--- a/test/Index/print-type.m
+++ b/test/Index/print-type.m
@@ -3,6 +3,7 @@
 -(int) mymethod;
 -(const id) mymethod2:(id)x blah:(Class)y boo:(SEL)z;
 -(bycopy)methodIn:(in int)i andOut:(out short *)j , ...;
+-(void)kindof_meth:(__kindof Foo *)p;
 @end
 
 // RUN: c-index-test -test-print-type %s | FileCheck %s
@@ -13,3 +14,4 @@
 // CHECK: ObjCInstanceMethodDecl=methodIn:andOut::5:10 (variadic) [Bycopy,] [type=] [typekind=Invalid] [resulttype=id] [resulttypekind=ObjCId] [args= [int] [Int] [short *] [Pointer]] [isPOD=0]
 // CHECK: ParmDecl=i:5:27 (Definition) [In,] [type=int] [typekind=Int] [isPOD=1]
 // CHECK: ParmDecl=j:5:49 (Definition) [Out,] [type=short *] [typekind=Pointer] [isPOD=1] [pointeetype=short] [pointeekind=Short]
+// CHECK: ParmDecl=p:6:36 (Definition) [type=__kindof Foo *] [typekind=ObjCObjectPointer] [canonicaltype=__kindof Foo *] [canonicaltypekind=ObjCObjectPointer] [isPOD=1] [pointeetype=Foo] [pointeekind=ObjCInterface]
diff --git a/test/Index/skip-parsed-bodies/compile_commands.json b/test/Index/skip-parsed-bodies/compile_commands.json
index 6707e84d2642..30ede0db1015 100644
--- a/test/Index/skip-parsed-bodies/compile_commands.json
+++ b/test/Index/skip-parsed-bodies/compile_commands.json
@@ -19,7 +19,8 @@
 // XFAIL: mingw32,win32,windows-gnu
 // RUN: c-index-test -index-compile-db %s | FileCheck %s
 
-// CHECK:      [enteredMainFile]: t1.cpp
+// CHECK:      [startedTranslationUnit]
+// CHECK-NEXT: [enteredMainFile]: t1.cpp
 // CHECK:      [indexDeclaration]: kind: c++-instance-method | name: method_decl | {{.*}} | isRedecl: 0 | isDef: 0 | isContainer: 0
 // CHECK-NEXT: [indexDeclaration]: kind: c++-instance-method | name: method_def1 | {{.*}} | isRedecl: 0 | isDef: 1 | isContainer: 1
 // CHECK-NEXT: [indexEntityReference]: kind: variable | name: some_val | {{.*}} | loc: ./t.h:9:27
@@ -34,6 +35,7 @@
 // CHECK-NEXT: [diagnostic]: {{.*}} undeclared identifier 'undef_val2'
 // CHECK-NEXT: [diagnostic]: {{.*}} undeclared identifier 'undef_val3'
 
+// CHECK-NEXT: [startedTranslationUnit]
 // CHECK-NEXT: [enteredMainFile]: t2.cpp
 // CHECK:      [indexDeclaration]: kind: c++-instance-method | name: method_decl | {{.*}} | isRedecl: 0 | isDef: 0 | isContainer: 0
 // CHECK-NEXT: [indexDeclaration]: kind: c++-instance-method | name: method_def1 | {{.*}} | isRedecl: 0 | isDef: 1 | isContainer: skipped
@@ -53,6 +55,7 @@
 // CHECK-NEXT: [diagnostic]: {{.*}} undeclared identifier 'undef_tsval'
 // CHECK-NEXT: [diagnostic]: {{.*}} undeclared identifier 'undef_impval'
 
+// CHECK-NEXT: [startedTranslationUnit]
 // CHECK-NEXT: [enteredMainFile]: t3.cpp
 // CHECK:      [indexDeclaration]: kind: c++-instance-method | name: method_decl | {{.*}} | isRedecl: 0 | isDef: 0 | isContainer: 0
 // CHECK-NEXT: [indexDeclaration]: kind: c++-instance-method | name: method_def1 | {{.*}} | isRedecl: 0 | isDef: 1 | isContainer: skipped
diff --git a/test/Index/symbol-visibility.c b/test/Index/symbol-visibility.c
new file mode 100644
index 000000000000..014e80f81878
--- /dev/null
+++ b/test/Index/symbol-visibility.c
@@ -0,0 +1,7 @@
+// RUN: c-index-test -test-print-visibility %s | FileCheck %s
+
+__attribute__ ((visibility ("default"))) void foo1();
+__attribute__ ((visibility ("hidden"))) void foo2();
+
+// CHECK: FunctionDecl=foo1:3:47visibility=Default
+// CHECK: FunctionDecl=foo2:4:46visibility=Hidden
diff --git a/test/Index/visibility.c b/test/Index/visibility.c
new file mode 100644
index 000000000000..1a71ab9b611e
--- /dev/null
+++ b/test/Index/visibility.c
@@ -0,0 +1,13 @@
+// RUN: c-index-test -index-file %s -target i686-pc-linux \
+// RUN:  | FileCheck %s -check-prefix CHECK -check-prefix CHECK-LINUX
+// RUN: c-index-test -index-file -Wno-unsupported-visibility %s -target i386-darwin \
+// RUN:  | FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN
+
+void __attribute__ (( visibility("default") )) default_visibility();
+// CHECK:      <attribute>: attribute(visibility)=default
+void __attribute__ (( visibility("hidden") )) hidden_visibility();
+// CHECK:      <attribute>: attribute(visibility)=hidden
+void __attribute__ (( visibility("protected") )) protected_visibility();
+// CHECK-LINUX:      <attribute>: attribute(visibility)=protected
+// CHECK-DARWIN:      <attribute>: attribute(visibility)=default
+
diff --git a/test/Index/warning-flags.c b/test/Index/warning-flags.c
index b76662e9eedd..af789c0443d2 100644
--- a/test/Index/warning-flags.c
+++ b/test/Index/warning-flags.c
@@ -5,8 +5,9 @@ int *bar(float *f) { return f; }
 // RUN: c-index-test -test-load-source-reparse 5 all %s 2>&1|FileCheck -check-prefix=CHECK-BOTH-WARNINGS %s
 // RUN: c-index-test -test-load-source all -Wno-return-type  %s 2>&1|FileCheck -check-prefix=CHECK-SECOND-WARNING %s
 // RUN: c-index-test -test-load-source-reparse 5 all -Wno-return-type %s 2>&1|FileCheck -check-prefix=CHECK-SECOND-WARNING %s
-// RUN: c-index-test -test-load-source all -w %s 2>&1|not grep warning:
-// RUN: c-index-test -test-load-source-reparse 5 all -w %s 2>&1|not grep warning:
+// RUN: c-index-test -test-load-source all -w %s 2>&1 | FileCheck -check-prefix=NOWARNINGS %s
+// RUN: c-index-test -test-load-source-reparse 5 all -w %s 2>&1 | FileCheck -check-prefix=NOWARNINGS %s
+// RUN: c-index-test -test-load-source all -w -O4 %s 2>&1 | FileCheck -check-prefix=NOWARNINGS %s
 
 // CHECK-BOTH-WARNINGS: warning: control reaches end of non-void function
 // CHECK-BOTH-WARNINGS: warning: incompatible pointer types returning 'float *' from a function with result type 'int *'
@@ -14,3 +15,4 @@ int *bar(float *f) { return f; }
 // CHECK-SECOND-WARNING-NOT:control reaches end of non-void
 // CHECK-SECOND-WARNING: warning: incompatible pointer types returning 'float *' from a function with result type 'int *'
 
+// NOWARNINGS-NOT: warning:
diff --git a/test/Layout/itanium-union-bitfield.cpp b/test/Layout/itanium-union-bitfield.cpp
index b06fd36071e9..289a565359e9 100644
--- a/test/Layout/itanium-union-bitfield.cpp
+++ b/test/Layout/itanium-union-bitfield.cpp
@@ -16,14 +16,14 @@ union B {
 B::B() {}
 
 // CHECK:*** Dumping AST Record Layout
-// CHECK-NEXT:   0 | union A
-// CHECK-NEXT:   0 |   int f1
-// CHECK-NEXT:     | [sizeof=4, dsize=1, align=4
-// CHECK-NEXT:     |  nvsize=1, nvalign=4]
+// CHECK-NEXT:     0 | union A
+// CHECK-NEXT: 0:0-2 |   int f1
+// CHECK-NEXT:       | [sizeof=4, dsize=1, align=4
+// CHECK-NEXT:       |  nvsize=1, nvalign=4]
 
 // CHECK:*** Dumping AST Record Layout
-// CHECK-NEXT:   0 | union B
-// CHECK-NEXT:   0 |   char f1
-// CHECK-NEXT:     | [sizeof=8, dsize=5, align=4
-// CHECK-NEXT:     |  nvsize=5, nvalign=4]
+// CHECK-NEXT:      0 | union B
+// CHECK-NEXT: 0:0-34 |   char f1
+// CHECK-NEXT:        | [sizeof=8, dsize=5, align=4
+// CHECK-NEXT:        |  nvsize=5, nvalign=4]
 
diff --git a/test/Layout/ms-vtordisp-local.cpp b/test/Layout/ms-vtordisp-local.cpp
new file mode 100644
index 000000000000..048f4e58297b
--- /dev/null
+++ b/test/Layout/ms-vtordisp-local.cpp
@@ -0,0 +1,217 @@
+// RUN: %clang_cc1 -fms-extensions -fexceptions -fcxx-exceptions -emit-llvm-only -triple x86_64-pc-win32 -fdump-record-layouts -fsyntax-only %s 2>&1 | FileCheck %s
+
+struct Base {
+  virtual ~Base() {}
+  virtual void BaseFunc() {}
+};
+
+#pragma vtordisp(0)
+
+struct Container {
+  static void f() try {
+    #pragma vtordisp(2)
+    struct HasVtorDisp : virtual Base {
+      virtual ~HasVtorDisp() {}
+      virtual void Func() {}
+    };
+
+    int x[sizeof(HasVtorDisp)];
+
+    // HasVtorDisp: vtordisp because of pragma right before it.
+    //
+    // CHECK: *** Dumping AST Record Layout
+    // CHECK: *** Dumping AST Record Layout
+    // CHECK-NEXT:          0 | struct HasVtorDisp
+    // CHECK-NEXT:          0 |   (HasVtorDisp vftable pointer)
+    // CHECK-NEXT:          8 |   (HasVtorDisp vbtable pointer)
+    // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+    // CHECK-NEXT:         24 |   struct Base (virtual base)
+    // CHECK-NEXT:         24 |     (Base vftable pointer)
+    // CHECK-NEXT:            | [sizeof=32, align=8,
+    // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+  } catch (...) {
+  }
+};
+
+struct NoVtorDisp1 : virtual Base {
+  virtual ~NoVtorDisp1() {}
+  virtual void Func() {}
+};
+
+int x1[sizeof(NoVtorDisp1)];
+
+// NoVtroDisp1: no vtordisp because of pragma disabling it.
+//
+// CHECK: *** Dumping AST Record Layout
+// CHECK-NEXT:          0 | struct NoVtorDisp1
+// CHECK-NEXT:          0 |   (NoVtorDisp1 vftable pointer)
+// CHECK-NEXT:          8 |   (NoVtorDisp1 vbtable pointer)
+// CHECK-NEXT:         16 |   struct Base (virtual base)
+// CHECK-NEXT:         16 |     (Base vftable pointer)
+// CHECK-NEXT:            | [sizeof=24, align=8,
+// CHECK-NEXT:            |  nvsize=16, nvalign=8]
+
+struct Container2 {
+  static void f1() {
+    // Local pragma #1 - must be disabled on exit from f1().
+    #pragma vtordisp(push, 2)
+    struct HasVtorDisp1 : virtual Base {
+      virtual ~HasVtorDisp1() {}
+      virtual void Func() {}
+    };
+
+    int x2[sizeof(HasVtorDisp1)];
+
+    // HasVtorDisp1: vtordisp because of pragma right before it.
+    //
+    // CHECK: *** Dumping AST Record Layout
+    // CHECK-NEXT:          0 | struct HasVtorDisp1
+    // CHECK-NEXT:          0 |   (HasVtorDisp1 vftable pointer)
+    // CHECK-NEXT:          8 |   (HasVtorDisp1 vbtable pointer)
+    // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+    // CHECK-NEXT:         24 |   struct Base (virtual base)
+    // CHECK-NEXT:         24 |     (Base vftable pointer)
+    // CHECK-NEXT:            | [sizeof=32, align=8,
+    // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+
+    struct InnerContainer {
+      static void g1() {
+        struct HasVtorDisp2 : virtual Base {
+          virtual ~HasVtorDisp2() {}
+          virtual void Func() {}
+        };
+
+        int x3[sizeof(HasVtorDisp2)];
+
+        // HasVtorDisp2: vtordisp because of vtordisp(2) in f1().
+        //
+        // CHECK: *** Dumping AST Record Layout
+        // CHECK-NEXT:          0 | struct HasVtorDisp2
+        // CHECK-NEXT:          0 |   (HasVtorDisp2 vftable pointer)
+        // CHECK-NEXT:          8 |   (HasVtorDisp2 vbtable pointer)
+        // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+        // CHECK-NEXT:         24 |   struct Base (virtual base)
+        // CHECK-NEXT:         24 |     (Base vftable pointer)
+        // CHECK-NEXT:            | [sizeof=32, align=8,
+        // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+
+        // Local pragma #2 - must be disabled on exit from g1().
+        #pragma vtordisp(push, 0)
+        struct NoVtorDisp2 : virtual Base {
+          virtual ~NoVtorDisp2() {}
+          virtual void Func() {}
+        };
+
+        int x4[sizeof(NoVtorDisp2)];
+
+        // NoVtroDisp2: no vtordisp because of vtordisp(0) in g1().
+        //
+        // CHECK: *** Dumping AST Record Layout
+        // CHECK-NEXT:          0 | struct NoVtorDisp2
+        // CHECK-NEXT:          0 |   (NoVtorDisp2 vftable pointer)
+        // CHECK-NEXT:          8 |   (NoVtorDisp2 vbtable pointer)
+        // CHECK-NEXT:         16 |   struct Base (virtual base)
+        // CHECK-NEXT:         16 |     (Base vftable pointer)
+        // CHECK-NEXT:            | [sizeof=24, align=8,
+        // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+      }
+
+      static void g2() {
+        struct HasVtorDisp3 : virtual Base {
+          virtual ~HasVtorDisp3() {}
+          virtual void Func() {}
+        };
+
+        int x5[sizeof(HasVtorDisp3)];
+
+        // HasVtorDisp3: vtordisp because of vtordisp(2) in f1(),
+        //               local vtordisp(0) in g1() is disabled.
+        //
+        // CHECK: *** Dumping AST Record Layout
+        // CHECK-NEXT:          0 | struct HasVtorDisp3
+        // CHECK-NEXT:          0 |   (HasVtorDisp3 vftable pointer)
+        // CHECK-NEXT:          8 |   (HasVtorDisp3 vbtable pointer)
+        // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+        // CHECK-NEXT:         24 |   struct Base (virtual base)
+        // CHECK-NEXT:         24 |     (Base vftable pointer)
+        // CHECK-NEXT:            | [sizeof=32, align=8,
+        // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+      }
+    };
+
+    struct HasVtorDisp4 : virtual Base {
+      virtual ~HasVtorDisp4() {}
+      virtual void Func() {}
+    };
+
+    int x6[sizeof(HasVtorDisp4)];
+
+    // HasVtorDisp4: vtordisp because of vtordisp(2) in f1(),
+    //               local vtordisp(0) in g1() is disabled,
+    //               g2() has no pragmas - stack is not affected.
+    //
+    // CHECK: *** Dumping AST Record Layout
+    // CHECK-NEXT:          0 | struct HasVtorDisp4
+    // CHECK-NEXT:          0 |   (HasVtorDisp4 vftable pointer)
+    // CHECK-NEXT:          8 |   (HasVtorDisp4 vbtable pointer)
+    // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+    // CHECK-NEXT:         24 |   struct Base (virtual base)
+    // CHECK-NEXT:         24 |     (Base vftable pointer)
+    // CHECK-NEXT:            | [sizeof=32, align=8,
+    // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+
+    InnerContainer::g1();
+    InnerContainer::g2();
+  }
+
+  static void f2() {
+    struct NoVtorDisp3 : virtual Base {
+      virtual ~NoVtorDisp3() {}
+      virtual void Func() {}
+    };
+
+    int x7[sizeof(NoVtorDisp3)];
+
+    // NoVtroDisp3: no vtordisp because of global pragma (0),
+    //              local vtordisp(2) is disabled on exit from f1().
+    //
+    // CHECK: *** Dumping AST Record Layout
+    // CHECK-NEXT:          0 | struct NoVtorDisp3
+    // CHECK-NEXT:          0 |   (NoVtorDisp3 vftable pointer)
+    // CHECK-NEXT:          8 |   (NoVtorDisp3 vbtable pointer)
+    // CHECK-NEXT:         16 |   struct Base (virtual base)
+    // CHECK-NEXT:         16 |     (Base vftable pointer)
+    // CHECK-NEXT:            | [sizeof=24, align=8,
+    // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+  }
+};
+
+struct Container3 {
+  #pragma vtordisp(2)
+  struct HasVtorDisp5 : virtual Base {
+    virtual ~HasVtorDisp5() {}
+    virtual void Func() {}
+  };
+
+  int x8[sizeof(HasVtorDisp5)];
+
+  // HasVtorDisp5: vtordisp because of pragma right before it.
+  //
+  // CHECK: *** Dumping AST Record Layout
+  // CHECK-NEXT:          0 | struct Container3::HasVtorDisp5
+  // CHECK-NEXT:          0 |   (HasVtorDisp5 vftable pointer)
+  // CHECK-NEXT:          8 |   (HasVtorDisp5 vbtable pointer)
+  // CHECK-NEXT:         20 |   (vtordisp for vbase Base)
+  // CHECK-NEXT:         24 |   struct Base (virtual base)
+  // CHECK-NEXT:         24 |     (Base vftable pointer)
+  // CHECK-NEXT:            | [sizeof=32, align=8,
+  // CHECK-NEXT:            |  nvsize=16, nvalign=8]
+};
+
+int main() {
+  Container::f();
+  Container2::f1();
+  Container2::f2();
+  Container3 cont3;
+  return 0;
+};
diff --git a/test/Layout/ms-x86-alias-avoidance-padding.cpp b/test/Layout/ms-x86-alias-avoidance-padding.cpp
index 1d77bf9e9061..203927b69a89 100644
--- a/test/Layout/ms-x86-alias-avoidance-padding.cpp
+++ b/test/Layout/ms-x86-alias-avoidance-padding.cpp
@@ -391,8 +391,6 @@ struct RZ3 : RX3, RY {};
 // CHECK-NEXT:    0 |     struct RA (base) (empty)
 // CHECK-NEXT:    0 |     struct RB a
 // CHECK-NEXT:    0 |       char c
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-NEXT:    1 |   struct RY (base) (empty)
 // CHECK-NEXT:      | [sizeof=1, align=1
 // CHECK-NEXT:      |  nvsize=1, nvalign=1]
@@ -403,8 +401,6 @@ struct RZ3 : RX3, RY {};
 // CHECK-X64-NEXT:    0 |     struct RA (base) (empty)
 // CHECK-X64-NEXT:    0 |     struct RB a
 // CHECK-X64-NEXT:    0 |       char c
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-X64-NEXT:    1 |   struct RY (base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=1, align=1
 // CHECK-X64-NEXT:      |  nvsize=1, nvalign=1]
@@ -415,8 +411,6 @@ struct RZ4 : RX4, RY {};
 // CHECK-NEXT:    0 | struct RZ4
 // CHECK-NEXT:    0 |   struct RX4 (base)
 // CHECK-NEXT:    0 |     struct RA a (empty)
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-NEXT:    1 |     char b
 // CHECK-NEXT:    3 |   struct RY (base) (empty)
 // CHECK-NEXT:      | [sizeof=3, align=1
@@ -426,8 +420,6 @@ struct RZ4 : RX4, RY {};
 // CHECK-X64-NEXT:    0 | struct RZ4
 // CHECK-X64-NEXT:    0 |   struct RX4 (base)
 // CHECK-X64-NEXT:    0 |     struct RA a (empty)
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-X64-NEXT:    1 |     char b
 // CHECK-X64-NEXT:    3 |   struct RY (base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=3, align=1
@@ -439,12 +431,8 @@ struct RZ5 : RX5, RY {};
 // CHECK-NEXT:    0 | struct RZ5
 // CHECK-NEXT:    0 |   struct RX5 (base)
 // CHECK-NEXT:    0 |     struct RA a (empty)
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-NEXT:    1 |     struct RB b
 // CHECK-NEXT:    1 |       char c
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-NEXT:    2 |   struct RY (base) (empty)
 // CHECK-NEXT:      | [sizeof=2, align=1
 // CHECK-NEXT:      |  nvsize=2, nvalign=1]
@@ -453,12 +441,8 @@ struct RZ5 : RX5, RY {};
 // CHECK-X64-NEXT:    0 | struct RZ5
 // CHECK-X64-NEXT:    0 |   struct RX5 (base)
 // CHECK-X64-NEXT:    0 |     struct RA a (empty)
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-X64-NEXT:    1 |     struct RB b
 // CHECK-X64-NEXT:    1 |       char c
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-X64-NEXT:    2 |   struct RY (base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=2, align=1
 // CHECK-X64-NEXT:      |  nvsize=2, nvalign=1]
@@ -472,8 +456,6 @@ struct RZ6 : RX6, RY {};
 // CHECK-NEXT:    0 |     (RX6 vbtable pointer)
 // CHECK-NEXT:    4 |     struct RB a
 // CHECK-NEXT:    4 |       char c
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-NEXT:    9 |   struct RY (base) (empty)
 // CHECK-NEXT:   12 |   struct RV (virtual base) (empty)
 // CHECK-NEXT:      | [sizeof=12, align=4
@@ -486,8 +468,6 @@ struct RZ6 : RX6, RY {};
 // CHECK-X64-NEXT:    0 |     (RX6 vbtable pointer)
 // CHECK-X64-NEXT:    8 |     struct RB a
 // CHECK-X64-NEXT:    8 |       char c
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=1, nvalign=1]
 // CHECK-X64-NEXT:   17 |   struct RY (base) (empty)
 // CHECK-X64-NEXT:   24 |   struct RV (virtual base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=24, align=8
@@ -501,8 +481,6 @@ struct RZ7 : RX7, RY {};
 // CHECK-NEXT:    0 |   struct RX7 (base)
 // CHECK-NEXT:    0 |     (RX7 vbtable pointer)
 // CHECK-NEXT:    4 |     struct RA a (empty)
-// CHECK-NEXT:      |     [sizeof=1, align=1
-// CHECK-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-NEXT:    8 |   struct RY (base) (empty)
 // CHECK-NEXT:    8 |   struct RW (virtual base)
 // CHECK-NEXT:    8 |     char c
@@ -515,8 +493,6 @@ struct RZ7 : RX7, RY {};
 // CHECK-X64-NEXT:    0 |   struct RX7 (base)
 // CHECK-X64-NEXT:    0 |     (RX7 vbtable pointer)
 // CHECK-X64-NEXT:    8 |     struct RA a (empty)
-// CHECK-X64-NEXT:      |     [sizeof=1, align=1
-// CHECK-X64-NEXT:      |      nvsize=0, nvalign=1]
 // CHECK-X64-NEXT:   16 |   struct RY (base) (empty)
 // CHECK-X64-NEXT:   16 |   struct RW (virtual base)
 // CHECK-X64-NEXT:   16 |     char c
diff --git a/test/Layout/ms-x86-bitfields-vbases.cpp b/test/Layout/ms-x86-bitfields-vbases.cpp
index 5b54596bbb83..a78fdad7e2eb 100644
--- a/test/Layout/ms-x86-bitfields-vbases.cpp
+++ b/test/Layout/ms-x86-bitfields-vbases.cpp
@@ -12,7 +12,7 @@ struct A : virtual B0 { char a : 1; };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct A
 // CHECK-NEXT:    0 |   (A vbtable pointer)
-// CHECK-NEXT:    4 |   char a
+// CHECK-NEXT:    4:0-0 |   char a
 // CHECK-NEXT:    8 |   struct B0 (virtual base)
 // CHECK-NEXT:    8 |     int a
 // CHECK-NEXT:      | [sizeof=12, align=4
@@ -21,7 +21,7 @@ struct A : virtual B0 { char a : 1; };
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct A
 // CHECK-X64-NEXT:    0 |   (A vbtable pointer)
-// CHECK-X64-NEXT:    8 |   char a
+// CHECK-X64-NEXT:    8:0-0 |   char a
 // CHECK-X64-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-X64-NEXT:   16 |     int a
 // CHECK-X64-NEXT:      | [sizeof=24, align=8
@@ -32,7 +32,7 @@ struct B : virtual B0 { short a : 1; };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct B
 // CHECK-NEXT:    0 |   (B vbtable pointer)
-// CHECK-NEXT:    4 |   short a
+// CHECK-NEXT:    4:0-0 |   short a
 // CHECK-NEXT:    8 |   struct B0 (virtual base)
 // CHECK-NEXT:    8 |     int a
 // CHECK-NEXT:      | [sizeof=12, align=4
@@ -40,7 +40,7 @@ struct B : virtual B0 { short a : 1; };
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct B
 // CHECK-X64-NEXT:    0 |   (B vbtable pointer)
-// CHECK-X64-NEXT:    8 |   short a
+// CHECK-X64-NEXT:    8:0-0 |   short a
 // CHECK-X64-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-X64-NEXT:   16 |     int a
 // CHECK-X64-NEXT:      | [sizeof=24, align=8
@@ -51,8 +51,8 @@ struct C : virtual B0 { char a : 1; char : 0; };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct C
 // CHECK-NEXT:    0 |   (C vbtable pointer)
-// CHECK-NEXT:    4 |   char a
-// CHECK-NEXT:    5 |   char
+// CHECK-NEXT:    4:0-0 |   char a
+// CHECK-NEXT:    5:- |   char
 // CHECK-NEXT:    8 |   struct B0 (virtual base)
 // CHECK-NEXT:    8 |     int a
 // CHECK-NEXT:      | [sizeof=12, align=4
@@ -60,8 +60,8 @@ struct C : virtual B0 { char a : 1; char : 0; };
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct C
 // CHECK-X64-NEXT:    0 |   (C vbtable pointer)
-// CHECK-X64-NEXT:    8 |   char a
-// CHECK-X64-NEXT:    9 |   char
+// CHECK-X64-NEXT:    8:0-0 |   char a
+// CHECK-X64-NEXT:    9:- |   char
 // CHECK-X64-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-X64-NEXT:   16 |     int a
 // CHECK-X64-NEXT:      | [sizeof=24, align=8
@@ -72,7 +72,7 @@ struct D : virtual B0 { char a : 1; char b; };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct D
 // CHECK-NEXT:    0 |   (D vbtable pointer)
-// CHECK-NEXT:    4 |   char a
+// CHECK-NEXT:    4:0-0 |   char a
 // CHECK-NEXT:    5 |   char b
 // CHECK-NEXT:    8 |   struct B0 (virtual base)
 // CHECK-NEXT:    8 |     int a
@@ -81,7 +81,7 @@ struct D : virtual B0 { char a : 1; char b; };
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct D
 // CHECK-X64-NEXT:    0 |   (D vbtable pointer)
-// CHECK-X64-NEXT:    8 |   char a
+// CHECK-X64-NEXT:    8:0-0 |   char a
 // CHECK-X64-NEXT:    9 |   char b
 // CHECK-X64-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-X64-NEXT:   16 |     int a
@@ -93,7 +93,7 @@ struct E : virtual B0, virtual B1 { long long : 1; };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct E
 // CHECK-NEXT:    0 |   (E vbtable pointer)
-// CHECK-NEXT:    8 |   long long
+// CHECK-NEXT:    8:0-0 |   long long
 // CHECK-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-NEXT:   16 |     int a
 // CHECK-NEXT:   20 |   struct B1 (virtual base)
@@ -104,7 +104,7 @@ struct E : virtual B0, virtual B1 { long long : 1; };
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct E
 // CHECK-X64-NEXT:    0 |   (E vbtable pointer)
-// CHECK-X64-NEXT:    8 |   long long
+// CHECK-X64-NEXT:    8:0-0 |   long long
 // CHECK-X64-NEXT:   16 |   struct B0 (virtual base)
 // CHECK-X64-NEXT:   16 |     int a
 // CHECK-X64-NEXT:   20 |   struct B1 (virtual base)
diff --git a/test/Layout/ms-x86-empty-layout.c b/test/Layout/ms-x86-empty-layout.c
index 3554baf3a8bc..5dbd8446597e 100644
--- a/test/Layout/ms-x86-empty-layout.c
+++ b/test/Layout/ms-x86-empty-layout.c
@@ -6,48 +6,36 @@
 struct EmptyIntMemb {
   int FlexArrayMemb[0];
 };
-// CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyIntMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:32
-// CHECK:     Alignment:32
-// CHECK:     FieldOffsets: [0]>
+// CHECK:       *** Dumping AST Record Layout
+// CHECK-NEXT:  0 | struct EmptyIntMemb
+// CHECK-NEXT:  0 | int [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=4, align=4
 
 struct EmptyLongLongMemb {
   long long FlexArrayMemb[0];
 };
 // CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyLongLongMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:32
-// CHECK:     Alignment:64
-// CHECK:     FieldOffsets: [0]>
+// CHECK-NEXT:  0 | struct EmptyLongLongMemb
+// CHECK-NEXT:  0 | long long [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=4, align=8
 
 struct EmptyAligned2LongLongMemb {
   long long __declspec(align(2)) FlexArrayMemb[0];
 };
 
 // CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyAligned2LongLongMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:32
-// CHECK:     Alignment:64
-// CHECK:     FieldOffsets: [0]>
+// CHECK-NEXT:  0 | struct EmptyAligned2LongLongMemb
+// CHECK-NEXT:  0 | long long [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=4, align=8
 
 struct EmptyAligned8LongLongMemb {
   long long __declspec(align(8)) FlexArrayMemb[0];
 };
 
 // CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyAligned8LongLongMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:64
-// CHECK:     Alignment:64
-// CHECK:     FieldOffsets: [0]>
+// CHECK-NEXT:  0 | struct EmptyAligned8LongLongMemb
+// CHECK-NEXT:  0 | long long [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=8, align=8
 
 #pragma pack(1)
 struct __declspec(align(4)) EmptyPackedAligned4LongLongMemb {
@@ -56,12 +44,9 @@ struct __declspec(align(4)) EmptyPackedAligned4LongLongMemb {
 #pragma pack()
 
 // CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyPackedAligned4LongLongMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:32
-// CHECK:     Alignment:32
-// CHECK:     FieldOffsets: [0]>
+// CHECK-NEXT:  0 | struct EmptyPackedAligned4LongLongMemb
+// CHECK-NEXT:  0 | long long [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=4, align=4
 
 #pragma pack(1)
 struct EmptyPackedAligned8LongLongMemb {
@@ -70,12 +55,9 @@ struct EmptyPackedAligned8LongLongMemb {
 #pragma pack()
 
 // CHECK: *** Dumping AST Record Layout
-// CHECK: Type: struct EmptyPackedAligned8LongLongMemb
-// CHECK: Record: 
-// CHECK: Layout: <ASTRecordLayout
-// CHECK:     Size:64
-// CHECK:     Alignment:64
-// CHECK:     FieldOffsets: [0]>
+// CHECK-NEXT:  0 | struct EmptyPackedAligned8LongLongMemb
+// CHECK-NEXT:  0 | long long [0] FlexArrayMemb
+// CHECK-NEXT:    | [sizeof=8, align=8
 
 
 int a[
diff --git a/test/Layout/ms-x86-empty-nonvirtual-bases.cpp b/test/Layout/ms-x86-empty-nonvirtual-bases.cpp
index 3fca32444b77..07be1d836ec5 100644
--- a/test/Layout/ms-x86-empty-nonvirtual-bases.cpp
+++ b/test/Layout/ms-x86-empty-nonvirtual-bases.cpp
@@ -40,8 +40,6 @@ struct B : B0 {
 // CHECK-NEXT:    0 | struct B
 // CHECK-NEXT:    0 |   struct B0 (base) (empty)
 // CHECK-NEXT:    0 |   struct B0 b0 (empty)
-// CHECK-NEXT:      |   [sizeof=8, align=8
-// CHECK-NEXT:      |    nvsize=0, nvalign=8]
 // CHECK:         8 |   int a
 // CHECK-NEXT:      | [sizeof=16, align=8
 // CHECK-NEXT:      |  nvsize=16, nvalign=8]
@@ -82,23 +80,13 @@ struct D {
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct D
 // CHECK-NEXT:    0 |   struct B0 b0 (empty)
-// CHECK-NEXT:      |   [sizeof=8, align=8
-// CHECK-NEXT:      |    nvsize=0, nvalign=8]
 // CHECK:         8 |   struct C0 c0
 // CHECK-NEXT:    8 |     int a
-// CHECK-NEXT:      |   [sizeof=4, align=4
-// CHECK-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK:        12 |   struct C1 c1
 // CHECK-NEXT:   12 |     int a
-// CHECK-NEXT:      |   [sizeof=4, align=4
-// CHECK-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK:        16 |   struct C2 c2
 // CHECK-NEXT:   16 |     int a
-// CHECK-NEXT:      |   [sizeof=4, align=4
-// CHECK-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK:        24 |   struct B1 b1 (empty)
-// CHECK-NEXT:      |   [sizeof=8, align=8
-// CHECK-NEXT:      |    nvsize=0, nvalign=8]
 // CHECK:        32 |   int a
 // CHECK-NEXT:      | [sizeof=40, align=8
 // CHECK-NEXT:      |  nvsize=40, nvalign=8]
@@ -171,6 +159,28 @@ struct __declspec(align(32)) H : B0, B1, B2, B3, B4 {
 // CHECK-NEXT:      | [sizeof=64, align=32
 // CHECK-NEXT:      |  nvsize=40, nvalign=32]
 
+struct I {
+  int i0[0];
+};
+
+// CHECK: *** Dumping AST Record Layout
+// CHECK-NEXT:    0 | struct I
+// CHECK-NEXT:    0 |   int [0] i0
+// CHECK-NEXT:      | [sizeof={{1|4}}, align=4,
+// CHECK-NEXT:      |  nvsize=0, nvalign=4]
+
+struct J : I {
+  int j;
+};
+
+// CHECK: *** Dumping AST Record Layout
+// CHECK-NEXT:   0 | struct J
+// CHECK-NEXT:   0 |   struct I (base)
+// CHECK-NEXT:   0 |     int [0] i0
+// CHECK-NEXT:   0 |   int j
+// CHECK-NEXT:     | [sizeof=4, align=4,
+// CHECK-NEXT:     |  nvsize=4, nvalign=4]
+
 int a[
 sizeof(A)+
 sizeof(B)+
@@ -179,4 +189,6 @@ sizeof(D)+
 sizeof(E)+
 sizeof(F)+
 sizeof(G)+
-sizeof(H)];
+sizeof(H)+
+sizeof(I)+
+sizeof(J)];
diff --git a/test/Layout/ms-x86-empty-virtual-base.cpp b/test/Layout/ms-x86-empty-virtual-base.cpp
index 2d0e55a01cb9..b732415152ff 100644
--- a/test/Layout/ms-x86-empty-virtual-base.cpp
+++ b/test/Layout/ms-x86-empty-virtual-base.cpp
@@ -53,8 +53,6 @@ struct B : virtual B0 {
 // CHECK-NEXT:    0 | struct B
 // CHECK-NEXT:    0 |   (B vbtable pointer)
 // CHECK-NEXT:    8 |   struct B0 b0 (empty)
-// CHECK-NEXT:      |   [sizeof=8, align=8
-// CHECK-NEXT:      |    nvsize=0, nvalign=8]
 // CHECK:        16 |   int a
 // CHECK-NEXT:   24 |   struct B0 (virtual base) (empty)
 // CHECK-NEXT:      | [sizeof=24, align=8
@@ -63,8 +61,6 @@ struct B : virtual B0 {
 // CHECK-X64-NEXT:    0 | struct B
 // CHECK-X64-NEXT:    0 |   (B vbtable pointer)
 // CHECK-X64-NEXT:    8 |   struct B0 b0 (empty)
-// CHECK-X64-NEXT:      |   [sizeof=8, align=8
-// CHECK-X64-NEXT:      |    nvsize=0, nvalign=8]
 // CHECK-X64:        16 |   int a
 // CHECK-X64-NEXT:   24 |   struct B0 (virtual base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=24, align=8
diff --git a/test/Layout/ms-x86-pack-and-align.cpp b/test/Layout/ms-x86-pack-and-align.cpp
index 9ded22ec8505..958ee1958148 100644
--- a/test/Layout/ms-x86-pack-and-align.cpp
+++ b/test/Layout/ms-x86-pack-and-align.cpp
@@ -40,8 +40,6 @@ struct X {
 // CHECK-NEXT:    0 | struct X
 // CHECK-NEXT:    0 |   struct B a
 // CHECK-NEXT:    0 |     long long a
-// CHECK-NEXT:      |   [sizeof=8, align=8
-// CHECK-NEXT:      |    nvsize=8, nvalign=8]
 // CHECK-NEXT:    8 |   char b
 // CHECK-NEXT:   10 |   int c
 // CHECK-NEXT:      | [sizeof=16, align=4
@@ -51,8 +49,6 @@ struct X {
 // CHECK-X64-NEXT:    0 | struct X
 // CHECK-X64-NEXT:    0 |   struct B a
 // CHECK-X64-NEXT:    0 |     long long a
-// CHECK-X64-NEXT:      |   [sizeof=8, align=8
-// CHECK-X64-NEXT:      |    nvsize=8, nvalign=8]
 // CHECK-X64-NEXT:    8 |   char b
 // CHECK-X64-NEXT:   10 |   int c
 // CHECK-X64-NEXT:      | [sizeof=16, align=4
@@ -193,12 +189,12 @@ struct YA {
 };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct YA (empty)
-// CHECK-NEXT:    0 |   char
+// CHECK-NEXT:0:0-0 |   char
 // CHECK-NEXT:      | [sizeof=32, align=32
 // CHECK-NEXT:      |  nvsize=32, nvalign=32]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YA (empty)
-// CHECK-X64-NEXT:    0 |   char
+// CHECK-X64-NEXT:0:0-0 |   char
 // CHECK-X64-NEXT:      | [sizeof=32, align=32
 // CHECK-X64-NEXT:      |  nvsize=32, nvalign=32]
 
@@ -211,18 +207,14 @@ struct YB {
 // CHECK-NEXT:    0 | struct YB
 // CHECK-NEXT:    0 |   char a
 // CHECK-NEXT:    1 |   struct YA b (empty)
-// CHECK-NEXT:    1 |     char
-// CHECK-NEXT:      |   [sizeof=32, align=32
-// CHECK-NEXT:      |    nvsize=32, nvalign=32]
+// CHECK-NEXT:1:0-0 |     char
 // CHECK-NEXT:      | [sizeof=33, align=1
 // CHECK-NEXT:      |  nvsize=33, nvalign=1]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YB
 // CHECK-X64-NEXT:    0 |   char a
 // CHECK-X64-NEXT:    1 |   struct YA b (empty)
-// CHECK-X64-NEXT:    1 |     char
-// CHECK-X64-NEXT:      |   [sizeof=32, align=32
-// CHECK-X64-NEXT:      |    nvsize=32, nvalign=32]
+// CHECK-X64-NEXT:1:0-0 |     char
 // CHECK-X64-NEXT:      | [sizeof=33, align=1
 // CHECK-X64-NEXT:      |  nvsize=33, nvalign=1]
 
@@ -232,12 +224,12 @@ struct YC {
 };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct YC (empty)
-// CHECK-NEXT:    0 |   char
+// CHECK-NEXT:0:0-0 |   char
 // CHECK-NEXT:      | [sizeof=32, align=32
 // CHECK-NEXT:      |  nvsize=32, nvalign=32]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YC (empty)
-// CHECK-X64-NEXT:    0 |   char
+// CHECK-X64-NEXT:    0:0-0 |   char
 // CHECK-X64-NEXT:      | [sizeof=8, align=32
 // CHECK-X64-NEXT:      |  nvsize=8, nvalign=32]
 
@@ -250,18 +242,14 @@ struct YD {
 // CHECK-NEXT:    0 | struct YD
 // CHECK-NEXT:    0 |   char a
 // CHECK-NEXT:    1 |   struct YC b (empty)
-// CHECK-NEXT:    1 |     char
-// CHECK-NEXT:      |   [sizeof=32, align=32
-// CHECK-NEXT:      |    nvsize=32, nvalign=32]
+// CHECK-NEXT:1:0-0 |     char
 // CHECK-NEXT:      | [sizeof=33, align=1
 // CHECK-NEXT:      |  nvsize=33, nvalign=1]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YD
 // CHECK-X64-NEXT:    0 |   char a
 // CHECK-X64-NEXT:    1 |   struct YC b (empty)
-// CHECK-X64-NEXT:    1 |     char
-// CHECK-X64-NEXT:      |   [sizeof=8, align=32
-// CHECK-X64-NEXT:      |    nvsize=8, nvalign=32]
+// CHECK-X64-NEXT:1:0-0 |     char
 // CHECK-X64-NEXT:      | [sizeof=9, align=1
 // CHECK-X64-NEXT:      |  nvsize=9, nvalign=1]
 
@@ -271,12 +259,12 @@ struct YE {
 };
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct YE (empty)
-// CHECK-NEXT:    0 |   char
+// CHECK-NEXT:    0:0-0 |   char
 // CHECK-NEXT:      | [sizeof=4, align=32
 // CHECK-NEXT:      |  nvsize=4, nvalign=32]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YE (empty)
-// CHECK-X64-NEXT:    0 |   char
+// CHECK-X64-NEXT:    0:0-0 |   char
 // CHECK-X64-NEXT:      | [sizeof=4, align=32
 // CHECK-X64-NEXT:      |  nvsize=4, nvalign=32]
 
@@ -289,18 +277,14 @@ struct YF {
 // CHECK-NEXT:    0 | struct YF
 // CHECK-NEXT:    0 |   char a
 // CHECK-NEXT:    1 |   struct YE b (empty)
-// CHECK-NEXT:    1 |     char
-// CHECK-NEXT:      |   [sizeof=4, align=32
-// CHECK-NEXT:      |    nvsize=4, nvalign=32]
+// CHECK-NEXT:1:0-0 |     char
 // CHECK-NEXT:      | [sizeof=5, align=1
 // CHECK-NEXT:      |  nvsize=5, nvalign=1]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct YF
 // CHECK-X64-NEXT:    0 |   char a
 // CHECK-X64-NEXT:    1 |   struct YE b (empty)
-// CHECK-X64-NEXT:    1 |     char
-// CHECK-X64-NEXT:      |   [sizeof=4, align=32
-// CHECK-X64-NEXT:      |    nvsize=4, nvalign=32]
+// CHECK-X64-NEXT:1:0-0 |     char
 // CHECK-X64-NEXT:      | [sizeof=5, align=1
 // CHECK-X64-NEXT:      |  nvsize=5, nvalign=1]
 
@@ -459,20 +443,20 @@ struct RE {
 
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct RB0
-// CHECK-NEXT:    0 |   int b
+// CHECK-NEXT:0:0-2 |   int b
 // CHECK-NEXT:      | [sizeof=8, align=1024
 // CHECK-NEXT:      |  nvsize=4, nvalign=1024]
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct RB1
 // CHECK-NEXT:    0 |   (RB1 vftable pointer)
-// CHECK-NEXT: 1024 |   int b
+// CHECK-NEXT: 1024:0-2 |   int b
 // CHECK-NEXT:      | [sizeof=1032, align=1024
 // CHECK-NEXT:      |  nvsize=1028, nvalign=1024]
 // CHECK: *** Dumping AST Record Layout
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct RB2
 // CHECK-NEXT:    0 |   (RB2 vbtable pointer)
-// CHECK-NEXT: 1024 |   int b
+// CHECK-NEXT: 1024:0-2 |   int b
 // CHECK-NEXT: 1028 |   struct RA (virtual base) (empty)
 // CHECK-NEXT:      | [sizeof=1032, align=1024
 // CHECK-NEXT:      |  nvsize=1028, nvalign=1024]
@@ -480,14 +464,14 @@ struct RE {
 // CHECK-NEXT:    0 | struct RB3
 // CHECK-NEXT:    0 |   (RB3 vftable pointer)
 // CHECK-NEXT: 1024 |   (RB3 vbtable pointer)
-// CHECK-NEXT: 2048 |   int b
+// CHECK-NEXT: 2048:0-2 |   int b
 // CHECK-NEXT: 2052 |   struct RA (virtual base) (empty)
 // CHECK-NEXT:      | [sizeof=2056, align=1024
 // CHECK-NEXT:      |  nvsize=2052, nvalign=1024]
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct RC
 // CHECK-NEXT:    0 |   char _
-// CHECK-NEXT: 1024 |   int c
+// CHECK-NEXT: 1024:0-2 |   int c
 // CHECK-NEXT:      | [sizeof=1028, align=1024
 // CHECK-NEXT:      |  nvsize=1028, nvalign=1024]
 // CHECK: *** Dumping AST Record Layout
@@ -495,27 +479,25 @@ struct RE {
 // CHECK-NEXT:    0 |   char _
 // CHECK-NEXT:    1 |   struct RC c
 // CHECK-NEXT:    1 |     char _
-// CHECK-NEXT: 1025 |     int c
-// CHECK-NEXT:      |   [sizeof=1028, align=1024
-// CHECK-NEXT:      |    nvsize=1028, nvalign=1024]
+// CHECK-NEXT: 1025:0-2 |     int c
 // CHECK-NEXT:      | [sizeof=1029, align=1
 // CHECK-NEXT:      |  nvsize=1029, nvalign=1]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct RB0
-// CHECK-X64-NEXT:    0 |   int b
+// CHECK-X64-NEXT:    0:0-2 |   int b
 // CHECK-X64-NEXT:      | [sizeof=8, align=1024
 // CHECK-X64-NEXT:      |  nvsize=4, nvalign=1024]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct RB1
 // CHECK-X64-NEXT:    0 |   (RB1 vftable pointer)
-// CHECK-X64-NEXT: 1024 |   int b
+// CHECK-X64-NEXT: 1024:0-2 |   int b
 // CHECK-X64-NEXT:      | [sizeof=1032, align=1024
 // CHECK-X64-NEXT:      |  nvsize=1028, nvalign=1024]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct RB2
 // CHECK-X64-NEXT:    0 |   (RB2 vbtable pointer)
-// CHECK-X64-NEXT: 1024 |   int b
+// CHECK-X64-NEXT: 1024:0-2 |   int b
 // CHECK-X64-NEXT: 1028 |   struct RA (virtual base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=1032, align=1024
 // CHECK-X64-NEXT:      |  nvsize=1028, nvalign=1024]
@@ -523,14 +505,14 @@ struct RE {
 // CHECK-X64-NEXT:    0 | struct RB3
 // CHECK-X64-NEXT:    0 |   (RB3 vftable pointer)
 // CHECK-X64-NEXT: 1024 |   (RB3 vbtable pointer)
-// CHECK-X64-NEXT: 2048 |   int b
+// CHECK-X64-NEXT: 2048:0-2 |   int b
 // CHECK-X64-NEXT: 2052 |   struct RA (virtual base) (empty)
 // CHECK-X64-NEXT:      | [sizeof=2056, align=1024
 // CHECK-X64-NEXT:      |  nvsize=2052, nvalign=1024]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct RC
 // CHECK-X64-NEXT:    0 |   char _
-// CHECK-X64-NEXT: 1024 |   int c
+// CHECK-X64-NEXT: 1024:0-2 |   int c
 // CHECK-X64-NEXT:      | [sizeof=1028, align=1024
 // CHECK-X64-NEXT:      |  nvsize=1028, nvalign=1024]
 // CHECK-X64: *** Dumping AST Record Layout
@@ -538,9 +520,7 @@ struct RE {
 // CHECK-X64-NEXT:    0 |   char _
 // CHECK-X64-NEXT:    1 |   struct RC c
 // CHECK-X64-NEXT:    1 |     char _
-// CHECK-X64-NEXT: 1025 |     int c
-// CHECK-X64-NEXT:      |   [sizeof=1028, align=1024
-// CHECK-X64-NEXT:      |    nvsize=1028, nvalign=1024]
+// CHECK-X64-NEXT: 1025:0-2 |     int c
 // CHECK-X64-NEXT:      | [sizeof=1029, align=1
 // CHECK-X64-NEXT:      |  nvsize=1029, nvalign=1]
 
@@ -670,8 +650,6 @@ struct PC {
 // CHECK-NEXT:    0 |   char a
 // CHECK-NEXT:    8 |   struct PA x
 // CHECK-NEXT:    8 |     int c
-// CHECK-NEXT:      |   [sizeof=4, align=4
-// CHECK-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK-NEXT:      | [sizeof=16, align=8
 // CHECK-NEXT:      |  nvsize=12, nvalign=8]
 // CHECK-X64: *** Dumping AST Record Layout
@@ -679,8 +657,6 @@ struct PC {
 // CHECK-X64-NEXT:    0 |   char a
 // CHECK-X64-NEXT:    8 |   struct PA x
 // CHECK-X64-NEXT:    8 |     int c
-// CHECK-X64-NEXT:      |   [sizeof=4, align=4
-// CHECK-X64-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK-X64-NEXT:      | [sizeof=16, align=8
 // CHECK-X64-NEXT:      |  nvsize=12, nvalign=8]
 
@@ -698,8 +674,6 @@ struct PE {
 // CHECK-NEXT:    0 |   char a
 // CHECK-NEXT:    8 |   struct PA x
 // CHECK-NEXT:    8 |     int c
-// CHECK-NEXT:      |   [sizeof=4, align=4
-// CHECK-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK-NEXT:      | [sizeof=16, align=8
 // CHECK-NEXT:      |  nvsize=12, nvalign=8]
 // CHECK-X64: *** Dumping AST Record Layout
@@ -707,8 +681,6 @@ struct PE {
 // CHECK-X64-NEXT:    0 |   char a
 // CHECK-X64-NEXT:    8 |   struct PA x
 // CHECK-X64-NEXT:    8 |     int c
-// CHECK-X64-NEXT:      |   [sizeof=4, align=4
-// CHECK-X64-NEXT:      |    nvsize=4, nvalign=4]
 // CHECK-X64-NEXT:      | [sizeof=16, align=8
 // CHECK-X64-NEXT:      |  nvsize=12, nvalign=8]
 
@@ -759,13 +731,13 @@ struct QD {
 // CHECK: *** Dumping AST Record Layout
 // CHECK-NEXT:    0 | struct QD
 // CHECK-NEXT:    0 |   char a
-// CHECK-NEXT:    4 |   QA b
+// CHECK-NEXT:4:0-2 |   QA b
 // CHECK-NEXT:      | [sizeof=8, align=4
 // CHECK-NEXT:      |  nvsize=8, nvalign=4]
 // CHECK-X64: *** Dumping AST Record Layout
 // CHECK-X64-NEXT:    0 | struct QD
 // CHECK-X64-NEXT:    0 |   char a
-// CHECK-X64-NEXT:    4 |   QA b
+// CHECK-X64-NEXT:4:0-2 |   QA b
 // CHECK-X64-NEXT:      | [sizeof=8, align=4
 // CHECK-X64-NEXT:      |  nvsize=8, nvalign=4]
 
diff --git a/test/Layout/ms_struct-bitfields.c b/test/Layout/ms_struct-bitfields.c
new file mode 100644
index 000000000000..9cb455b5cc42
--- /dev/null
+++ b/test/Layout/ms_struct-bitfields.c
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -emit-llvm-only -triple armv7-apple-darwin -fdump-record-layouts %s 2>/dev/null \
+// RUN:            | FileCheck %s
+
+// rdar://22275433
+
+#pragma ms_struct on
+
+union A {
+  unsigned long long x : 32;
+  unsigned long long y : 32;
+} a;
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:        0 | union A
+// CHECK-NEXT:   0:0-31 |   unsigned long long x
+// CHECK-NEXT:   0:0-31 |   unsigned long long y
+// CHECK-NEXT:          | [sizeof=8, align=1]
+
+union B {
+  __attribute__((aligned(4)))
+  unsigned long long x : 32;
+  unsigned long long y : 32;
+} b;
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:       0 | union B
+// CHECK-NEXT:  0:0-31 |   unsigned long long x
+// CHECK-NEXT:  0:0-31 |   unsigned long long y
+// CHECK-NEXT:         | [sizeof=8, align=1]
+
+union C {
+  unsigned long long : 0;
+  unsigned short y : 8;
+} c;
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:       0 | union C
+// CHECK-NEXT:     0:- |   unsigned long long
+// CHECK-NEXT:   0:0-7 |   unsigned short y
+// CHECK-NEXT:         | [sizeof=2, align=1]
+
+union D {
+  unsigned long long : 0;
+  unsigned short : 0;
+} d;
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:       0 | union D
+// CHECK-NEXT:     0:- |   unsigned long long
+// CHECK-NEXT:     0:- |   unsigned short
+// CHECK-NEXT:         | [sizeof=1, align=1]
+
diff --git a/test/Lexer/coroutines.cpp b/test/Lexer/coroutines.cpp
new file mode 100644
index 000000000000..86d5f969374d
--- /dev/null
+++ b/test/Lexer/coroutines.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -fsyntax-only %s
+// RUN: %clang_cc1 -fcoroutines -DCORO -fsyntax-only %s
+
+#ifdef CORO
+#define CORO_KEYWORD(NAME) _Static_assert(!__is_identifier(NAME), #NAME)
+#else
+#define CORO_KEYWORD(NAME) _Static_assert(__is_identifier(NAME), #NAME)
+#endif
+
+CORO_KEYWORD(co_await);
+CORO_KEYWORD(co_return);
+CORO_KEYWORD(co_yield);
diff --git a/test/Lexer/cxx-features.cpp b/test/Lexer/cxx-features.cpp
index 4ec4d55ac088..6c4a092b1c48 100644
--- a/test/Lexer/cxx-features.cpp
+++ b/test/Lexer/cxx-features.cpp
@@ -2,6 +2,7 @@
 // RUN: %clang_cc1 -std=c++11 -verify %s
 // RUN: %clang_cc1 -std=c++1y -fsized-deallocation -verify %s
 // RUN: %clang_cc1 -std=c++1y -fsized-deallocation -fconcepts-ts -DCONCEPTS_TS=1 -verify %s
+// RUN: %clang_cc1 -fcoroutines -DCOROUTINES -verify %s
 
 // expected-no-diagnostics
 
@@ -128,3 +129,7 @@
 #if check(experimental_concepts, 0, 0, CONCEPTS_TS)
 #error "wrong value for __cpp_experimental_concepts"
 #endif
+
+#if (COROUTINES && !__cpp_coroutines) || (!COROUTINES && __cpp_coroutines)
+#error "wrong value for __cpp_coroutines"
+#endif
diff --git a/test/Lexer/cxx0x_keyword_as_cxx98.cpp b/test/Lexer/cxx0x_keyword_as_cxx98.cpp
index 5d168100412d..8f5fcf40b22e 100644
--- a/test/Lexer/cxx0x_keyword_as_cxx98.cpp
+++ b/test/Lexer/cxx0x_keyword_as_cxx98.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify -fsyntax-only -Wc++11-compat 
+// RUN: %clang_cc1 %s -verify -fsyntax-only -Wc++11-compat -std=c++98
 
 #define constexpr const
 constexpr int x = 0;
diff --git a/test/Lexer/has_feature_cxx0x.cpp b/test/Lexer/has_feature_cxx0x.cpp
index dbb650e1b225..8c7ff18860a2 100644
--- a/test/Lexer/has_feature_cxx0x.cpp
+++ b/test/Lexer/has_feature_cxx0x.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -E -triple x86_64-linux-gnu -std=c++11 %s -o - | FileCheck --check-prefix=CHECK-11 %s
 // RUN: %clang_cc1 -E -triple armv7-apple-darwin -std=c++11 %s -o - | FileCheck --check-prefix=CHECK-NO-TLS %s
-// RUN: %clang_cc1 -E -triple x86_64-linux-gnu %s -o - | FileCheck --check-prefix=CHECK-NO-11 %s
+// RUN: %clang_cc1 -E -triple x86_64-linux-gnu -std=c++98 %s -o - | FileCheck --check-prefix=CHECK-NO-11 %s
 // RUN: %clang_cc1 -E -triple x86_64-linux-gnu -std=c++14 %s -o - | FileCheck --check-prefix=CHECK-14 %s
 // RUN: %clang_cc1 -E -triple x86_64-linux-gnu -std=c++1z %s -o - | FileCheck --check-prefix=CHECK-1Z %s
 
diff --git a/test/Lexer/has_feature_rtti.cpp b/test/Lexer/has_feature_rtti.cpp
index 4bfeead32997..26eaa2ead627 100644
--- a/test/Lexer/has_feature_rtti.cpp
+++ b/test/Lexer/has_feature_rtti.cpp
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -E %s -o - | FileCheck --check-prefix=CHECK-RTTI %s
 // RUN: %clang_cc1 -E -fno-rtti %s -o - | FileCheck --check-prefix=CHECK-NO-RTTI %s
+// RUN: %clang_cc1 -E -fno-rtti-data %s -o - | FileCheck --check-prefix=CHECK-NO-RTTI %s
 
 #if __has_feature(cxx_rtti)
 int foo();
diff --git a/test/Lexer/hexfloat.cpp b/test/Lexer/hexfloat.cpp
index bd53d4a89e56..6985c7fbe2a4 100644
--- a/test/Lexer/hexfloat.cpp
+++ b/test/Lexer/hexfloat.cpp
@@ -12,4 +12,4 @@ double h = 0x1.p2; // expected-warning{{hexadecimal floating constants are a C99
 double i = 0p+3; // expected-error{{invalid suffix 'p' on integer constant}}
 #define PREFIX(x) foo ## x
 double foo0p = 1, j = PREFIX(0p+3); // ok
-double k = 0x42_amp+3; // expected-error-re{{{{invalid suffix '_amp' on integer constant|no matching literal operator for call to 'operator "" _amp'}}}}
+double k = 0x42_amp+3; // expected-error-re{{{{invalid suffix '_amp' on integer constant|no matching literal operator for call to 'operator""_amp'}}}}
diff --git a/test/Lexer/keywords_test.c b/test/Lexer/keywords_test.c
index 4eb1700e9b60..7f840c1154ca 100644
--- a/test/Lexer/keywords_test.c
+++ b/test/Lexer/keywords_test.c
@@ -9,6 +9,10 @@
 
 // RUN: %clang_cc1 -std=c99 -fms-extensions -E %s -o - \
 // RUN:     | FileCheck --check-prefix=CHECK-MS-KEYWORDS %s
+// RUN: %clang_cc1 -std=c99 -fdeclspec -E %s -o - \
+// RUN:     | FileCheck --check-prefix=CHECK-DECLSPEC-KEYWORD %s
+// RUN: %clang_cc1 -std=c99 -fms-extensions -fno-declspec -E %s -o - \
+// RUN:     | FileCheck --check-prefix=CHECK-MS-KEYWORDS-WITHOUT-DECLSPEC %s
 
 void f() {
 // CHECK-NONE: int asm
@@ -22,8 +26,19 @@ void f() {
 
 // CHECK-NONE: no_ms_wchar
 // CHECK-MS-KEYWORDS: has_ms_wchar
+// CHECK-MS-KEYWORDS-WITHOUT-DECLSPEC: has_ms_wchar
 #if __is_identifier(__wchar_t)
 void no_ms_wchar();
 #else
 void has_ms_wchar();
 #endif
+
+// CHECK-NONE: no_declspec
+// CHECK-MS-KEYWORDS: has_declspec
+// CHECK-MS-KEYWORDS-WITHOUT-DECLSPEC: no_declspec
+// CHECK-DECLSPEC-KEYWORD: has_declspec
+#if __is_identifier(__declspec)
+void no_declspec();
+#else
+void has_declspec();
+#endif
diff --git a/test/Lexer/keywords_test.cpp b/test/Lexer/keywords_test.cpp
index dd45b40edcab..aba8d023c761 100644
--- a/test/Lexer/keywords_test.cpp
+++ b/test/Lexer/keywords_test.cpp
@@ -1,6 +1,19 @@
 // RUN: %clang_cc1 -std=c++03 -fsyntax-only %s
 // RUN: %clang_cc1 -std=c++11 -DCXX11 -fsyntax-only %s
 // RUN: %clang_cc1 -std=c++14 -fconcepts-ts -DCXX11 -DCONCEPTS -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fdeclspec -DDECLSPEC -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fms-extensions -DDECLSPEC -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fborland-extensions -DDECLSPEC -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fms-extensions -fno-declspec -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fborland-extensions -fno-declspec -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fno-declspec -fdeclspec -DDECLSPEC -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fdeclspec -fno-declspec -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fms-extensions -fno-declspec -fdeclspec -DDECLSPEC -fsyntax-only %s
+// RUN: %clang_cc1 -std=c++03 -fms-extensions -fdeclspec -fno-declspec -fsyntax-only %s
+// RUN: %clang -std=c++03 -target i686-windows-msvc -DDECLSPEC -fsyntax-only %s
+// RUN: %clang -std=c++03 -target x86_64-scei-ps4 -DDECLSPEC -fsyntax-only %s
+// RUN: %clang -std=c++03 -target i686-windows-msvc -fno-declspec -fsyntax-only %s
+// RUN: %clang -std=c++03 -target x86_64-scei-ps4 -fno-declspec -fsyntax-only %s
 
 #define IS_KEYWORD(NAME) _Static_assert(!__is_identifier(NAME), #NAME)
 #define NOT_KEYWORD(NAME) _Static_assert(__is_identifier(NAME), #NAME)
@@ -12,6 +25,12 @@
 #define CONCEPTS_KEYWORD(NAME)  NOT_KEYWORD(NAME)
 #endif
 
+#ifdef DECLSPEC
+#define DECLSPEC_KEYWORD(NAME)  IS_KEYWORD(NAME)
+#else
+#define DECLSPEC_KEYWORD(NAME)  NOT_KEYWORD(NAME)
+#endif
+
 #ifdef CXX11
 #define CXX11_KEYWORD(NAME)  IS_KEYWORD(NAME)
 #define CXX11_TYPE(NAME)     IS_TYPE(NAME)
@@ -38,6 +57,9 @@ CXX11_KEYWORD(thread_local);
 CONCEPTS_KEYWORD(concept);
 CONCEPTS_KEYWORD(requires);
 
+// __declspec extension
+DECLSPEC_KEYWORD(__declspec);
+
 // Clang extension
 IS_KEYWORD(__char16_t);
 IS_TYPE(__char16_t);
diff --git a/test/Lexer/ms-extensions.c b/test/Lexer/ms-extensions.c
index ebcf0f49998f..de7ef93e6b13 100644
--- a/test/Lexer/ms-extensions.c
+++ b/test/Lexer/ms-extensions.c
@@ -7,10 +7,6 @@ __int16 x2 = 4i16;
 __int32 x3 = 5i32;
 __int64 x5 = 0x42i64;
 __int64 x6 = 0x42I64;
-#ifndef __SIZEOF_INT128__
-// expected-error@+2 {{__int128 is not supported on this target}}
-#endif
-__int64 x4 = 70000000i128;
 
 __int64 y = 0x42i64u;  // expected-error {{invalid suffix}}
 __int64 w = 0x43ui64; 
diff --git a/test/Lexer/ms-extensions.cpp b/test/Lexer/ms-extensions.cpp
index 7e18a6cb2bc8..9a858a4ff323 100644
--- a/test/Lexer/ms-extensions.cpp
+++ b/test/Lexer/ms-extensions.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wreserved-user-defined-literal -fms-extensions -fms-compatibility %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wreserved-user-defined-literal -fms-extensions -fms-compatibility -std=c++98 %s
 
 #define bar(x) #x
 const char * f() {
diff --git a/test/Lexer/msdos-cpm-eof.c b/test/Lexer/msdos-cpm-eof.c
index 3469b59d4089..0b5150dce72d 100644
--- a/test/Lexer/msdos-cpm-eof.c
+++ b/test/Lexer/msdos-cpm-eof.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions %s
-// expected-no-diagnostics
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -Wmicrosoft %s
 
 int x; 
 
+// expected-warning@+1 {{treating Ctrl-Z as end-of-file is a Microsoft extension}}
 
 
 I am random garbage after ^Z
diff --git a/test/Lexer/objc_macros.m b/test/Lexer/objc_macros.m
new file mode 100644
index 000000000000..0223bed4e3b0
--- /dev/null
+++ b/test/Lexer/objc_macros.m
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only "-triple" "x86_64-apple-macosx10.10.0" -fobjc-runtime-has-weak -fobjc-weak %s -verify %s
+
+#define __strong
+// expected-warning@-1 {{ignoring redefinition of Objective-C qualifier macro}}
+#define __weak
+// expected-warning@-1 {{ignoring redefinition of Objective-C qualifier macro}}
+#define __unsafe_unretained
+// expected-warning@-1 {{ignoring redefinition of Objective-C qualifier macro}}
+#define __autoreleased
+// No warning because this is the default expansion anyway.
+
+// Check that this still expands to the right text.
+void test() {
+  goto label; // expected-error {{cannot jump from this goto statement to its label}}
+  __weak id x; // expected-note {{jump bypasses initialization of __weak variable}}
+label:
+  return;
+}
+
+#undef __strong
+#define __strong
+// No warning.
diff --git a/test/Lexer/unicode.c b/test/Lexer/unicode.c
index de758f179a41..30805d1acb2c 100644
--- a/test/Lexer/unicode.c
+++ b/test/Lexer/unicode.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -x c++ -std=c++11 %s
 // RUN: %clang_cc1 -E -DPP_ONLY=1 %s -o %t
 // RUN: FileCheck --strict-whitespace --input-file=%t %s
 
@@ -24,3 +25,11 @@ COPYRIGHT
 // CHECK: Copyright © {{2012}}
 CHECK: The preprocessor should not complain about Unicode characters like ©.
 #endif
+
+// A 🌹 by any other name....
+extern int 🌹;
+int 🌵(int 🌻) { return 🌻+ 1; }
+int main () {
+  int 🌷 = 🌵(🌹);
+  return 🌷;
+}
diff --git a/test/Lexer/warn_binary_literals.cpp b/test/Lexer/warn_binary_literals.cpp
new file mode 100644
index 000000000000..d196be91b5b9
--- /dev/null
+++ b/test/Lexer/warn_binary_literals.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wc++14-binary-literal
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wc++14-extensions
+
+int x = 0b11;
+// expected-warning@-1{{binary integer literals are a C++14 extension}}
diff --git a/test/Misc/ast-dump-attr.cpp b/test/Misc/ast-dump-attr.cpp
index ed6d1f5781da..e0575cb18d89 100644
--- a/test/Misc/ast-dump-attr.cpp
+++ b/test/Misc/ast-dump-attr.cpp
@@ -150,3 +150,7 @@ void f() {
   // CHECK: DeprecatedAttr
 }
 }
+
+struct __attribute__((objc_bridge_related(NSParagraphStyle,,))) TestBridgedRef;
+// CHECK: CXXRecordDecl{{.*}} struct TestBridgedRef
+// CHECK-NEXT: ObjCBridgeRelatedAttr{{.*}} NSParagraphStyle
diff --git a/test/Misc/ast-dump-color.cpp b/test/Misc/ast-dump-color.cpp
index 479467df7160..e93274e33acb 100644
--- a/test/Misc/ast-dump-color.cpp
+++ b/test/Misc/ast-dump-color.cpp
@@ -32,7 +32,7 @@ struct Invalid {
 //CHECK: {{^}}[[GREEN:.\[0;1;32m]]TranslationUnitDecl[[RESET:.\[0m]][[Yellow:.\[0;33m]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue:.\[0;34m]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN:.\[0;1;36m]] __int128_t[[RESET]] [[Green:.\[0;32m]]'__int128'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __uint128_t[[RESET]] [[Green]]'unsigned __int128'[[RESET]]{{$}}
-//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __builtin_va_list[[RESET]] [[Green]]'__va_list_tag [1]'[[RESET]]{{$}}
+//CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]TypedefDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]<invalid sloc>[[RESET]]> [[Yellow]]<invalid sloc>[[RESET]] implicit[[CYAN]] __builtin_va_list[[RESET]] [[Green]]'struct __va_list_tag [1]'[[RESET]]{{$}}
 //CHECK: {{^}}[[Blue]]|-[[RESET]][[GREEN]]VarDecl[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]{{.*}}ast-dump-color.cpp:6:1[[RESET]], [[Yellow]]col:5[[RESET]]> [[Yellow]]col:5[[RESET]][[CYAN]] Test[[RESET]] [[Green]]'int'[[RESET]]
 //CHECK: {{^}}[[Blue]]| |-[[RESET]][[BLUE:.\[0;1;34m]]UnusedAttr[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]col:25[[RESET]]>{{$}}
 //CHECK: {{^}}[[Blue]]| `-[[RESET]][[Blue]]FullComment[[RESET]][[Yellow]] 0x{{[0-9a-fA-F]*}}[[RESET]] <[[Yellow]]line:4:4[[RESET]], [[Yellow]]line:5:8[[RESET]]>{{$}}
diff --git a/test/Misc/ast-dump-invalid.cpp b/test/Misc/ast-dump-invalid.cpp
index 3b97cc65409b..7b02ba111339 100644
--- a/test/Misc/ast-dump-invalid.cpp
+++ b/test/Misc/ast-dump-invalid.cpp
@@ -18,3 +18,26 @@ void f(T i, T j) {
 // CHECK-NEXT:         `-CXXUnresolvedConstructExpr {{.*}} <col:10, col:16> 'T'
 // CHECK-NEXT:           |-DeclRefExpr {{.*}} <col:13> 'T' lvalue ParmVar {{.*}} 'i' 'T'
 // CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:16> 'T' lvalue ParmVar {{.*}} 'j' 'T'
+
+
+namespace TestInvalidIf {
+int g(int i) {
+  if (invalid_condition)
+    return 4;
+  else
+    return i;
+}
+}
+// CHECK: NamespaceDecl {{.*}} <{{.*}}> {{.*}} TestInvalidIf
+// CHECK-NEXT: `-FunctionDecl
+// CHECK-NEXT:   |-ParmVarDecl
+// CHECK-NEXT:   `-CompoundStmt
+// CHECK-NEXT:     `-IfStmt {{.*}} <line:25:3, line:28:12>
+// CHECK-NEXT:       |-<<<NULL>>>
+// CHECK-NEXT:       |-OpaqueValueExpr {{.*}} <<invalid sloc>> '_Bool'
+// CHECK-NEXT:       |-ReturnStmt {{.*}} <line:26:5, col:12>
+// CHECK-NEXT:       | `-IntegerLiteral {{.*}} <col:12> 'int' 4
+// CHECK-NEXT:       `-ReturnStmt {{.*}} <line:28:5, col:12>
+// CHECK-NEXT:         `-ImplicitCastExpr {{.*}} <col:12> 'int' <LValueToRValue>
+// CHECK-NEXT:           `-DeclRefExpr {{.*}} <col:12> 'int' lvalue ParmVar {{.*}} 'i' 'int'
+
diff --git a/test/Misc/ast-print-pragmas.cpp b/test/Misc/ast-print-pragmas.cpp
index 23f533fc3766..c4fe1e23b1e6 100644
--- a/test/Misc/ast-print-pragmas.cpp
+++ b/test/Misc/ast-print-pragmas.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -ast-print %s -o - | FileCheck %s
+// RUN: %clang_cc1 -DMS_EXT -fsyntax-only -fms-extensions %s -triple x86_64-pc-win32 -ast-print | FileCheck %s --check-prefix=MS-EXT
 
 // FIXME: A bug in ParsedAttributes causes the order of the attributes to be
 // reversed. The checks are consequently in the reverse order below.
@@ -53,3 +54,11 @@ void test_nontype_template_param(int *List, int Length) {
 void test_templates(int *List, int Length) {
   test_nontype_template_param<2, 4>(List, Length);
 }
+
+#ifdef MS_EXT
+#pragma init_seg(compiler)
+// MS-EXT: #pragma init_seg (.CRT$XCC)
+// MS-EXT-NEXT: int x = 3 __declspec(thread);
+int __declspec(thread) x = 3;
+#endif //MS_EXT
+
diff --git a/test/Misc/backend-optimization-failure.cpp b/test/Misc/backend-optimization-failure.cpp
index 3e407123ed0d..c0f3bf46f081 100644
--- a/test/Misc/backend-optimization-failure.cpp
+++ b/test/Misc/backend-optimization-failure.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -O3 -emit-llvm -gline-tables-only -S -verify -o /dev/null
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown %s -O3 -emit-llvm -debug-info-kind=line-tables-only -S -verify -o /dev/null
 // REQUIRES: x86-registered-target
 
 // Test verifies optimization failures generated by the backend are handled
diff --git a/test/Misc/caret-diags-macros.c b/test/Misc/caret-diags-macros.c
index 316454c513b6..15eebb6d287e 100644
--- a/test/Misc/caret-diags-macros.c
+++ b/test/Misc/caret-diags-macros.c
@@ -16,9 +16,6 @@ void foo() {
 void bar() {
   C(1);
   // CHECK: {{.*}}:17:5: warning: expression result unused
-  // CHECK: {{.*}}:15:16: note: expanded from macro 'C'
-  // CHECK: {{.*}}:14:16: note: expanded from macro 'B'
-  // CHECK: {{.*}}:13:14: note: expanded from macro 'A'
 }
 
 // rdar://7597492
@@ -41,48 +38,45 @@ void baz(char *Msg) {
 
 void test() {
   macro_args3(11);
-  // CHECK: {{.*}}:43:15: warning: expression result unused
+  // CHECK: {{.*}}:40:15: warning: expression result unused
   // Also check that the 'caret' printing agrees with the location here where
   // its easy to FileCheck.
   // CHECK-NEXT:      macro_args3(11);
   // CHECK-NEXT: {{^              \^~}}
-  // CHECK: {{.*}}:36:36: note: expanded from macro 'macro_args3'
-  // CHECK: {{.*}}:35:36: note: expanded from macro 'macro_args2'
-  // CHECK: {{.*}}:34:24: note: expanded from macro 'macro_args1'
 
   macro_many_args3(
     1,
     2,
     3);
-  // CHECK: {{.*}}:55:5: warning: expression result unused
-  // CHECK: {{.*}}:40:55: note: expanded from macro 'macro_many_args3'
-  // CHECK: {{.*}}:39:55: note: expanded from macro 'macro_many_args2'
-  // CHECK: {{.*}}:38:35: note: expanded from macro 'macro_many_args1'
+  // CHECK: {{.*}}:49:5: warning: expression result unused
+  // CHECK: {{.*}}:37:55: note: expanded from macro 'macro_many_args3'
+  // CHECK: {{.*}}:36:55: note: expanded from macro 'macro_many_args2'
+  // CHECK: {{.*}}:35:35: note: expanded from macro 'macro_many_args1'
 
   macro_many_args3(
     1,
     M2,
     3);
-  // CHECK: {{.*}}:64:5: warning: expression result unused
+  // CHECK: {{.*}}:58:5: warning: expression result unused
   // CHECK: {{.*}}:4:12: note: expanded from macro 'M2'
-  // CHECK: {{.*}}:40:55: note: expanded from macro 'macro_many_args3'
-  // CHECK: {{.*}}:39:55: note: expanded from macro 'macro_many_args2'
-  // CHECK: {{.*}}:38:35: note: expanded from macro 'macro_many_args1'
+  // CHECK: {{.*}}:37:55: note: expanded from macro 'macro_many_args3'
+  // CHECK: {{.*}}:36:55: note: expanded from macro 'macro_many_args2'
+  // CHECK: {{.*}}:35:35: note: expanded from macro 'macro_many_args1'
 
   macro_many_args3(
     1,
     macro_args2(22),
     3);
-  // CHECK: {{.*}}:74:17: warning: expression result unused
+  // CHECK: {{.*}}:68:17: warning: expression result unused
   // This caret location needs to be printed *inside* a different macro's
   // arguments.
   // CHECK-NEXT:        macro_args2(22),
   // CHECK-NEXT: {{^                \^~}}
-  // CHECK: {{.*}}:35:36: note: expanded from macro 'macro_args2'
-  // CHECK: {{.*}}:34:24: note: expanded from macro 'macro_args1'
-  // CHECK: {{.*}}:40:55: note: expanded from macro 'macro_many_args3'
-  // CHECK: {{.*}}:39:55: note: expanded from macro 'macro_many_args2'
-  // CHECK: {{.*}}:38:35: note: expanded from macro 'macro_many_args1'
+  // CHECK: {{.*}}:32:36: note: expanded from macro 'macro_args2'
+  // CHECK: {{.*}}:31:24: note: expanded from macro 'macro_args1'
+  // CHECK: {{.*}}:37:55: note: expanded from macro 'macro_many_args3'
+  // CHECK: {{.*}}:36:55: note: expanded from macro 'macro_many_args2'
+  // CHECK: {{.*}}:35:35: note: expanded from macro 'macro_many_args1'
 }
 
 #define variadic_args1(x, y, ...) y
@@ -91,12 +85,12 @@ void test() {
 
 void test2() {
   variadic_args3(1, 22, 3, 4);
-  // CHECK: {{.*}}:93:21: warning: expression result unused
+  // CHECK: {{.*}}:87:21: warning: expression result unused
   // CHECK-NEXT:      variadic_args3(1, 22, 3, 4);
   // CHECK-NEXT: {{^                    \^~}}
-  // CHECK: {{.*}}:90:53: note: expanded from macro 'variadic_args3'
-  // CHECK: {{.*}}:89:50: note: expanded from macro 'variadic_args2'
-  // CHECK: {{.*}}:88:35: note: expanded from macro 'variadic_args1'
+  // CHECK: {{.*}}:84:53: note: expanded from macro 'variadic_args3'
+  // CHECK: {{.*}}:83:50: note: expanded from macro 'variadic_args2'
+  // CHECK: {{.*}}:82:35: note: expanded from macro 'variadic_args1'
 }
 
 #define variadic_pasting_args1(x, y, z) y
@@ -107,35 +101,35 @@ void test2() {
 
 void test3() {
   variadic_pasting_args3(1, 2, 3, 4);
-  // CHECK: {{.*}}:109:32: warning: expression result unused
-  // CHECK: {{.*}}:105:72: note: expanded from macro 'variadic_pasting_args3'
-  // CHECK: {{.*}}:103:68: note: expanded from macro 'variadic_pasting_args2'
-  // CHECK: {{.*}}:102:41: note: expanded from macro 'variadic_pasting_args1'
+  // CHECK: {{.*}}:103:32: warning: expression result unused
+  // CHECK: {{.*}}:99:72: note: expanded from macro 'variadic_pasting_args3'
+  // CHECK: {{.*}}:97:68: note: expanded from macro 'variadic_pasting_args2'
+  // CHECK: {{.*}}:96:41: note: expanded from macro 'variadic_pasting_args1'
 
   variadic_pasting_args3a(1, 2, 3, 4);
-  // CHECK:        {{.*}}:115:3: warning: expression result unused
+  // CHECK:        {{.*}}:109:3: warning: expression result unused
   // CHECK-NEXT:     variadic_pasting_args3a(1, 2, 3, 4);
-  // CHECK-NEXT: {{  \^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}}
-  // CHECK:        {{.*}}:106:44: note: expanded from macro 'variadic_pasting_args3a'
+  // CHECK-NEXT: {{  \^~~~~~~~~~~~~~~~~~~~~~~}}
+  // CHECK:        {{.*}}:100:44: note: expanded from macro 'variadic_pasting_args3a'
   // CHECK-NEXT:   #define variadic_pasting_args3a(x, y, ...) variadic_pasting_args2a(x, y, __VA_ARGS__)
-  // CHECK-NEXT: {{                                           \^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}}
-  // CHECK:        {{.*}}:104:70: note: expanded from macro 'variadic_pasting_args2a'
+  // CHECK-NEXT: {{                                           \^~~~~~~~~~~~~~~~~~~~~~~}}
+  // CHECK:        {{.*}}:98:70: note: expanded from macro 'variadic_pasting_args2a'
   // CHECK-NEXT:   #define variadic_pasting_args2a(x, y, ...) variadic_pasting_args1(x, y ## __VA_ARGS__)
   // CHECK-NEXT: {{                                                                     \^~~~~~~~~~~~~~~~}}
-  // CHECK:        {{.*}}:102:41: note: expanded from macro 'variadic_pasting_args1'
+  // CHECK:        {{.*}}:96:41: note: expanded from macro 'variadic_pasting_args1'
   // CHECK-NEXT:   #define variadic_pasting_args1(x, y, z) y
   // CHECK-NEXT: {{                                        \^}}
 }
 
 #define BAD_CONDITIONAL_OPERATOR (2<3)?2:3
 int test4 = BAD_CONDITIONAL_OPERATOR+BAD_CONDITIONAL_OPERATOR;
-// CHECK:         {{.*}}:130:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
+// CHECK:         {{.*}}:124:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
 // CHECK-NEXT:    #define BAD_CONDITIONAL_OPERATOR (2<3)?2:3
 // CHECK-NEXT: {{^                                      \^}}
-// CHECK:         {{.*}}:130:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
+// CHECK:         {{.*}}:124:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
 // CHECK-NEXT:    #define BAD_CONDITIONAL_OPERATOR (2<3)?2:3
 // CHECK-NEXT: {{^                                      \^}}
-// CHECK:         {{.*}}:130:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
+// CHECK:         {{.*}}:124:39: note: expanded from macro 'BAD_CONDITIONAL_OPERATOR'
 // CHECK-NEXT:    #define BAD_CONDITIONAL_OPERATOR (2<3)?2:3
 // CHECK-NEXT: {{^                                 ~~~~~\^~~~}}
 
@@ -143,32 +137,32 @@ int test4 = BAD_CONDITIONAL_OPERATOR+BAD_CONDITIONAL_OPERATOR;
 #define TWOL (2<
 #define X 1+TWOL 3) QMARK 4:5
 int x = X;
-// CHECK:         {{.*}}:145:9: note: place parentheses around the '+' expression to silence this warning
+// CHECK:         {{.*}}:139:9: note: place parentheses around the '+' expression to silence this warning
 // CHECK-NEXT:    int x = X;
 // CHECK-NEXT: {{^        \^}}
-// CHECK-NEXT:    {{.*}}:144:21: note: expanded from macro 'X'
+// CHECK-NEXT:    {{.*}}:138:21: note: expanded from macro 'X'
 // CHECK-NEXT:    #define X 1+TWOL 3) QMARK 4:5
 // CHECK-NEXT: {{^          ~~~~~~~~~ \^}}
-// CHECK-NEXT:    {{.*}}:142:15: note: expanded from macro 'QMARK'
+// CHECK-NEXT:    {{.*}}:136:15: note: expanded from macro 'QMARK'
 // CHECK-NEXT:    #define QMARK ?
 // CHECK-NEXT: {{^              \^}}
-// CHECK-NEXT:    {{.*}}:145:9: note: place parentheses around the '?:' expression to evaluate it first
+// CHECK-NEXT:    {{.*}}:139:9: note: place parentheses around the '?:' expression to evaluate it first
 // CHECK-NEXT:    int x = X;
 // CHECK-NEXT: {{^        \^}}
-// CHECK-NEXT:    {{.*}}:144:21: note: expanded from macro 'X'
+// CHECK-NEXT:    {{.*}}:138:21: note: expanded from macro 'X'
 // CHECK-NEXT:    #define X 1+TWOL 3) QMARK 4:5
 // CHECK-NEXT: {{^            ~~~~~~~~\^~~~~~~~~}}
 
 #define ONEPLUS 1+
 #define Y ONEPLUS (2<3) QMARK 4:5
 int y = Y;
-// CHECK:         {{.*}}:164:9: warning: operator '?:' has lower precedence than '+'; '+' will be evaluated first
+// CHECK:         {{.*}}:158:9: warning: operator '?:' has lower precedence than '+'; '+' will be evaluated first
 // CHECK-NEXT:    int y = Y;
 // CHECK-NEXT: {{^        \^}}
-// CHECK-NEXT:    {{.*}}:163:25: note: expanded from macro 'Y'
+// CHECK-NEXT:    {{.*}}:157:25: note: expanded from macro 'Y'
 // CHECK-NEXT:    #define Y ONEPLUS (2<3) QMARK 4:5
 // CHECK-NEXT: {{^          ~~~~~~~~~~~~~ \^}}
-// CHECK-NEXT:    {{.*}}:142:15: note: expanded from macro 'QMARK'
+// CHECK-NEXT:    {{.*}}:136:15: note: expanded from macro 'QMARK'
 // CHECK-NEXT:    #define QMARK ?
 // CHECK-NEXT: {{^              \^}}
 
@@ -179,10 +173,10 @@ void foo_aa(char* s)
 #define /* */ BARC(c, /* */b, a) (a + b ? c : c)
   iequals(__LINE__, BARC(123, (456 < 345), 789), 8);
 }
-// CHECK:         {{.*}}:180:21: warning: operator '?:' has lower precedence than '+'
+// CHECK:         {{.*}}:174:21: warning: operator '?:' has lower precedence than '+'
 // CHECK-NEXT:      iequals(__LINE__, BARC(123, (456 < 345), 789), 8);
 // CHECK-NEXT: {{^                    \^~~~~~~~~~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT:    {{.*}}:179:41: note: expanded from macro 'BARC'
+// CHECK-NEXT:    {{.*}}:173:41: note: expanded from macro 'BARC'
 // CHECK-NEXT:    #define /* */ BARC(c, /* */b, a) (a + b ? c : c)
 // CHECK-NEXT: {{^                                  ~~~~~ \^}}
 
@@ -193,16 +187,16 @@ void foo_aa(char* s)
 #if UTARG_MAX_U
 #endif
 
-// CHECK:         {{.*}}:193:5: warning: left side of operator converted from negative value to unsigned: -1 to 18446744073709551615
+// CHECK:         {{.*}}:187:5: warning: left side of operator converted from negative value to unsigned: -1 to 18446744073709551615
 // CHECK-NEXT:    #if UTARG_MAX_U
 // CHECK-NEXT: {{^    \^~~~~~~~~~~}}
-// CHECK-NEXT:    {{.*}}:191:21: note: expanded from macro 'UTARG_MAX_U'
+// CHECK-NEXT:    {{.*}}:185:21: note: expanded from macro 'UTARG_MAX_U'
 // CHECK-NEXT:    #define UTARG_MAX_U APPEND (MAX_UINT, UL)
 // CHECK-NEXT: {{^                    \^~~~~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT:    {{.*}}:190:27: note: expanded from macro 'APPEND'
+// CHECK-NEXT:    {{.*}}:184:27: note: expanded from macro 'APPEND'
 // CHECK-NEXT:    #define APPEND(NUM, SUFF) APPEND2(NUM, SUFF)
 // CHECK-NEXT: {{^                          \^~~~~~~~~~~~~~~~~~}}
-// CHECK-NEXT:    {{.*}}:189:31: note: expanded from macro 'APPEND2'
+// CHECK-NEXT:    {{.*}}:183:31: note: expanded from macro 'APPEND2'
 // CHECK-NEXT:    #define APPEND2(NUM, SUFF) -1 != NUM ## SUFF
 // CHECK-NEXT: {{^                           ~~ \^  ~~~~~~~~~~~}}
 
@@ -213,15 +207,42 @@ unsigned long strlen_test(const char *s);
 #define Cstrlen(a)  strlen_test(a)
 #define Csprintf    sprintf2
 void f(char* pMsgBuf, char* pKeepBuf) {
-Csprintf(pMsgBuf,"\nEnter minimum anagram length (2-%1d): ", Cstrlen(pKeepBuf));
+Csprintf(pMsgBuf,"\nEnter minimum anagram length (2-%1d): ", strlen_test(pKeepBuf));
+// FIXME: Change test to use 'Cstrlen' instead of 'strlen_test' when macro printing is fixed.
 }
-// CHECK:         {{.*}}:216:62: warning: format specifies type 'int' but the argument has type 'unsigned long'
-// CHECK-NEXT:    Csprintf(pMsgBuf,"\nEnter minimum anagram length (2-%1d): ", Cstrlen(pKeepBuf));
-// CHECK-NEXT: {{^                                                    ~~~      \^}}
+// CHECK:         {{.*}}:210:62: warning: format specifies type 'int' but the argument has type 'unsigned long'
+// CHECK-NEXT:    Csprintf(pMsgBuf,"\nEnter minimum anagram length (2-%1d): ", strlen_test(pKeepBuf));
+// CHECK-NEXT: {{^                                                    ~~~      \^~~~~~~~~~~~~~~~~~~~~}}
 // CHECK-NEXT: {{^                                                    %1lu}}
-// CHECK-NEXT:    {{.*}}:213:21: note: expanded from macro 'Cstrlen'
-// CHECK-NEXT:    #define Cstrlen(a)  strlen_test(a)
+// CHECK-NEXT:    {{.*}}:208:21: note: expanded from macro 'Csprintf'
+// CHECK-NEXT:    #define Csprintf    sprintf2
 // CHECK-NEXT: {{^                    \^}}
-// CHECK-NEXT:    {{.*}}:212:56: note: expanded from macro 'sprintf2'
+// CHECK-NEXT:    {{.*}}:206:56: note: expanded from macro 'sprintf2'
 // CHECK-NEXT:      __builtin___sprintf_chk (str, 0, __darwin_obsz(str), __VA_ARGS__)
-// CHECK-NEXT: {{^                                                       \^}}
+// CHECK-NEXT: {{^                                                       \^~~~~~~~~~~}}
+
+#define SWAP_AND_APPLY(arg, macro) macro arg
+#define APPLY(macro, arg) macro arg
+#define DECLARE_HELPER() __builtin_printf("%d\n", mylong);
+void use_evil_macros(long mylong) {
+  SWAP_AND_APPLY((), DECLARE_HELPER)
+  APPLY(DECLARE_HELPER, ())
+}
+// CHECK:      {{.*}}:228:22: warning: format specifies type 'int' but the argument has type 'long'
+// CHECK-NEXT:   SWAP_AND_APPLY((), DECLARE_HELPER)
+// CHECK-NEXT:   ~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~
+// CHECK-NEXT: {{.*}}:224:36: note: expanded from macro 'SWAP_AND_APPLY'
+// CHECK-NEXT: #define SWAP_AND_APPLY(arg, macro) macro arg
+// CHECK-NEXT:                                    ^~~~~~~~~
+// CHECK-NEXT: {{.*}}:226:51: note: expanded from macro 'DECLARE_HELPER'
+// CHECK-NEXT: #define DECLARE_HELPER() __builtin_printf("%d\n", mylong);
+// CHECK-NEXT:                                            ~~     ^~~~~~
+// CHECK-NEXT: {{.*}}:229:9: warning: format specifies type 'int' but the argument has type 'long'
+// CHECK-NEXT:   APPLY(DECLARE_HELPER, ())
+// CHECK-NEXT:   ~~~~~~^~~~~~~~~~~~~~~~~~~
+// CHECK-NEXT: {{.*}}:225:27: note: expanded from macro 'APPLY'
+// CHECK-NEXT: #define APPLY(macro, arg) macro arg
+// CHECK-NEXT:                           ^~~~~~~~~
+// CHECK-NEXT: {{.*}}:226:51: note: expanded from macro 'DECLARE_HELPER'
+// CHECK-NEXT: #define DECLARE_HELPER() __builtin_printf("%d\n", mylong);
+// CHECK-NEXT:                                            ~~     ^~~~~~
diff --git a/test/Misc/diag-aka-types.cpp b/test/Misc/diag-aka-types.cpp
index 3a00b7122e83..c386504a6c56 100644
--- a/test/Misc/diag-aka-types.cpp
+++ b/test/Misc/diag-aka-types.cpp
@@ -45,3 +45,20 @@ void helper(callback cb) {} // expected-note{{candidate function not viable: no
 void test() {
  helper(&ns::str::method); // expected-error{{no matching function for call to 'helper'}}
 }
+
+template <typename T>
+class A {};
+
+int a1 = A<decltype(1 + 2)>(); // expected-error{{no viable conversion from 'A<decltype(1 + 2)>' (aka 'A<int>') to 'int'}}
+int a2 = A<A<decltype(1 + 2)>>(); // expected-error{{no viable conversion from 'A<A<decltype(1 + 2)> >' (aka 'A<A<int> >') to 'int'}}
+int a3 = A<__typeof(1 + 2)>(); // expected-error{{no viable conversion from 'A<typeof (1 + 2)>' (aka 'A<int>') to 'int'}}
+int a4 = A<A<__typeof(1 + 2)>>(); // expected-error{{no viable conversion from 'A<A<typeof (1 + 2)> >' (aka 'A<A<int> >') to 'int'}}
+
+using B = A<decltype(1+2)>;
+int a5 = B(); // expected-error{{no viable conversion from 'B' (aka 'A<int>') to 'int'}}
+
+decltype(void()) (&f1)() = 0; // expected-error{{non-const lvalue reference to type 'decltype(void()) ()' (aka 'void ()') cannot bind to a temporary of type 'int'}}
+decltype(void()) (&f2)(int) = 0; // expected-error{{non-const lvalue reference to type 'decltype(void()) (int)' (aka 'void (int)') cannot bind to a temporary of type 'int'}}
+void (&f3)(decltype(1 + 2)) = 0; // expected-error{{non-const lvalue reference to type 'void (decltype(1 + 2))' (aka 'void (int)') cannot bind to a temporary of type 'int'}}
+decltype(1+2) (&f4)(double, decltype(1 + 2)) = 0; // expected-error{{non-const lvalue reference to type 'decltype(1 + 2) (double, decltype(1 + 2))' (aka 'int (double, int)') cannot bind to a temporary of type 'int'}}
+auto (&f5)() -> decltype(1+2) = 0; // expected-error{{non-const lvalue reference to type 'auto () -> decltype(1 + 2)' (aka 'auto () -> int') cannot bind to a temporary of type 'int'}}
diff --git a/test/Misc/diag-macro-backtrace2.c b/test/Misc/diag-macro-backtrace2.c
new file mode 100644
index 000000000000..af79bbde6009
--- /dev/null
+++ b/test/Misc/diag-macro-backtrace2.c
@@ -0,0 +1,50 @@
+// RUN: not %clang -cc1 -fsyntax-only %s 2>&1 | FileCheck %s
+
+#define a b
+#define b c
+#define c(x) d(x)
+#define d(x) x*1
+
+#define e f
+#define f g
+#define g(x) h(x)
+#define h(x) x
+
+void PR16799() {
+  const char str[] = "string";
+  a(str);
+  // CHECK: :15:3: error: invalid operands to binary expression
+  // CHECK:       ('const char *' and 'int')
+  // CHECK:   a(str);
+  // CHECK:   ^~~~~~
+  // CHECK: :3:11: note: expanded from macro 'a'
+  // CHECK: #define a b
+  // CHECK:           ^
+  // CHECK: :4:11: note: expanded from macro 'b'
+  // CHECK: #define b c
+  // CHECK:           ^
+  // CHECK: :5:14: note: expanded from macro 'c'
+  // CHECK: #define c(x) d(x)
+  // CHECK:              ^~~~
+  // CHECK: :6:15: note: expanded from macro 'd'
+  // CHECK: #define d(x) x*1
+  // CHECK:              ~^~
+
+  e(str);
+  // CHECK: :33:5: warning: expression result unused
+  // CHECK:   e(str);
+  // CHECK:     ^~~
+  // CHECK: :8:11: note: expanded from macro 'e'
+  // CHECK: #define e f
+  // CHECK:           ^
+  // CHECK: :9:11: note: expanded from macro 'f'
+  // CHECK: #define f g
+  // CHECK:           ^
+  // CHECK: :10:16: note: expanded from macro 'g'
+  // CHECK: #define g(x) h(x)
+  // CHECK:                ^
+  // CHECK: :11:14: note: expanded from macro 'h'
+  // CHECK: #define h(x) x
+  // CHECK:              ^
+}
+// CHECK: 1 warning and 1 error generated.
diff --git a/test/Misc/diag-presumed.c b/test/Misc/diag-presumed.c
index 70f2f24e321c..2f0506981440 100644
--- a/test/Misc/diag-presumed.c
+++ b/test/Misc/diag-presumed.c
@@ -6,13 +6,11 @@
 X(int n = error);
 
 // PRESUMED: diag-presumed.c:101:11: error: use of undeclared identifier 'error'
-// PRESUMED: diag-presumed.c:100:14: note: expanded from
 // SPELLING: diag-presumed.c:6:11: error: use of undeclared identifier 'error'
-// SPELLING: diag-presumed.c:5:14: note: expanded from
 
 ;
-// PRESUMED: diag-presumed.c:108:1: error: extra ';' outside of a functio
-// SPELLING: diag-presumed.c:13:1: error: extra ';' outside of a functio
+// PRESUMED: diag-presumed.c:106:1: error: extra ';' outside of a functio
+// SPELLING: diag-presumed.c:11:1: error: extra ';' outside of a functio
 
 # 1 "thing1.cc" 1
 # 1 "thing1.h" 1
@@ -24,13 +22,13 @@ X(int n = error);
 // SPELLING-NOT: extra ';'
 
 another error;
-// PRESUMED: included from {{.*}}diag-presumed.c:112:
+// PRESUMED: included from {{.*}}diag-presumed.c:110:
 // PRESUMED: from thing1.cc:1:
 // PRESUMED: from thing1.h:1:
 // PRESUMED: systemheader.h:7:1: error: unknown type name 'another'
 
 // SPELLING-NOT: included from
-// SPELLING: diag-presumed.c:26:1: error: unknown type name 'another'
+// SPELLING: diag-presumed.c:24:1: error: unknown type name 'another'
 
 # 1 "thing1.h" 2
 # 1 "thing1.cc" 2
diff --git a/test/Misc/permissions.cpp b/test/Misc/permissions.cpp
index 143e49d58eac..83f6c5720809 100644
--- a/test/Misc/permissions.cpp
+++ b/test/Misc/permissions.cpp
@@ -1,9 +1,5 @@
 // REQUIRES: shell
 
-// MSYS doesn't emulate umask.
-// FIXME: Could we introduce another feature for it?
-// REQUIRES: shell-preserves-root
-
 // RUN: umask 000
 // RUN: %clang_cc1 -emit-llvm-bc %s -o %t
 // RUN: ls -l %t | FileCheck --check-prefix=CHECK000 %s
diff --git a/test/Misc/reduced-diags-macros-backtrace.cpp b/test/Misc/reduced-diags-macros-backtrace.cpp
new file mode 100644
index 000000000000..70c4122a1eda
--- /dev/null
+++ b/test/Misc/reduced-diags-macros-backtrace.cpp
@@ -0,0 +1,47 @@
+// RUN: not %clang_cc1 -fsyntax-only -fmacro-backtrace-limit 0 %s 2>&1 | FileCheck %s --check-prefix=ALL
+// RUN: not %clang_cc1 -fsyntax-only -fmacro-backtrace-limit 2 %s 2>&1 | FileCheck %s --check-prefix=SKIP
+
+#define F(x) x + 1
+#define G(x) F(x) + 2
+#define ADD(x,y) G(x) + y
+#define LEVEL4(x) ADD(p,x)
+#define LEVEL3(x) LEVEL4(x)
+#define LEVEL2(x) LEVEL3(x)
+#define LEVEL1(x) LEVEL2(x)
+
+int a = LEVEL1(b);
+
+// ALL: {{.*}}:12:9: error: use of undeclared identifier 'p'
+// ALL-NEXT: int a = LEVEL1(b);
+// ALL-NEXT:         ^
+// ALL-NEXT: {{.*}}:10:19: note: expanded from macro 'LEVEL1'
+// ALL-NEXT: #define LEVEL1(x) LEVEL2(x)
+// ALL-NEXT:                   ^
+// ALL-NEXT: {{.*}}:9:19: note: expanded from macro 'LEVEL2'
+// ALL-NEXT: #define LEVEL2(x) LEVEL3(x)
+// ALL-NEXT:                   ^
+// ALL-NEXT: {{.*}}:8:19: note: expanded from macro 'LEVEL3'
+// ALL-NEXT: #define LEVEL3(x) LEVEL4(x)
+// ALL-NEXT:                   ^
+// ALL-NEXT: {{.*}}:7:23: note: expanded from macro 'LEVEL4'
+// ALL-NEXT: #define LEVEL4(x) ADD(p,x)
+// ALL-NEXT:                       ^
+// ALL-NEXT: {{.*}}:12:16: error: use of undeclared identifier 'b'
+// ALL-NEXT: int a = LEVEL1(b);
+// ALL-NEXT:                ^
+// ALL-NEXT: 2 errors generated.
+
+// SKIP: {{.*}}:12:9: error: use of undeclared identifier 'p'
+// SKIP-NEXT: int a = LEVEL1(b);
+// SKIP-NEXT:         ^
+// SKIP-NEXT: {{.*}}:10:19: note: expanded from macro 'LEVEL1'
+// SKIP-NEXT: #define LEVEL1(x) LEVEL2(x)
+// SKIP-NEXT:                   ^
+// SKIP-NEXT: note: (skipping 2 expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)
+// SKIP-NEXT: {{.*}}:7:23: note: expanded from macro 'LEVEL4'
+// SKIP-NEXT: #define LEVEL4(x) ADD(p,x)
+// SKIP-NEXT:                       ^
+// SKIP-NEXT: {{.*}}:12:16: error: use of undeclared identifier 'b'
+// SKIP-NEXT: int a = LEVEL1(b);
+// SKIP-NEXT:                ^
+// SKIP-NEXT: 2 errors generated.
diff --git a/test/Misc/reduced-diags-macros.cpp b/test/Misc/reduced-diags-macros.cpp
new file mode 100644
index 000000000000..1ee6311d1980
--- /dev/null
+++ b/test/Misc/reduced-diags-macros.cpp
@@ -0,0 +1,44 @@
+// RUN: not %clang_cc1 -fsyntax-only %s 2>&1 | FileCheck %s -strict-whitespace
+
+#define NO_INITIATION(x) int a = x * 2
+#define NO_DEFINITION(x) int c = x * 2
+
+NO_INITIATION(a);
+NO_DEFINITION(b);
+
+// CHECK: {{.*}}:6:15: warning: variable 'a' is uninitialized when used within its own initialization
+// CHECK-NEXT: NO_INITIATION(a);
+// CHECK-NEXT: ~~~~~~~~~~~~~~^~
+// CHECK-NEXT: {{.*}}:3:34: note: expanded from macro 'NO_INITIATION'
+// CHECK-NEXT: #define NO_INITIATION(x) int a = x * 2
+// CHECK-NEXT:                              ~   ^
+
+// CHECK: {{.*}}:7:15: error: use of undeclared identifier 'b'
+// CHECK-NEXT: NO_DEFINITION(b);
+// CHECK-NEXT:               ^
+
+
+#define F(x) x + 1
+#define ADD(x,y) y + F(x)
+#define SWAP_ARGU(x,y) ADD(y,x)
+
+int  p = SWAP_ARGU(3, x);
+
+// CHECK: {{.*}}:25:23: error: use of undeclared identifier 'x'
+// CHECK-NEXT: int  p = SWAP_ARGU(3, x);
+// CHECK-NEXT:                       ^
+
+#define APPLY(f,x,y) x f y
+
+struct node {
+};
+
+node ff;
+
+int r = APPLY(+,ff,1);
+// CHECK: {{.*}}:38:15: error: invalid operands to binary expression ('node' and 'int')
+// CHECK-NEXT: int r = APPLY(+,ff,1);
+// CHECK-NEXT:               ^ ~~ ~
+// CHECK-NEXT: {{.*}}:31:24: note: expanded from macro 'APPLY'
+// CHECK-NEXT: #define APPLY(f,x,y) x f y
+// CHECK-NEXT:                      ~ ^ ~
\ No newline at end of file
diff --git a/test/Misc/serialized-diags-really-long-text.cpp b/test/Misc/serialized-diags-really-long-text.cpp
new file mode 100644
index 000000000000..2cdd50994a3a
--- /dev/null
+++ b/test/Misc/serialized-diags-really-long-text.cpp
@@ -0,0 +1,30 @@
+// Make sure that diagnostics serialization does not crash with a really long diagnostic text.
+
+// RUN: not %clang_cc1 -std=c++11 %s -serialize-diagnostic-file %t.dia
+// RUN: c-index-test -read-diagnostics %t.dia 2>&1 | FileCheck %s
+
+typedef class AReallyLooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooongName {} alias;
+
+template <int N, typename ...T>
+struct MyTS {
+  typedef MyTS<N-1, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias,
+    alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, alias, T...> type;
+  static type callme() {
+    return type::callme();
+  }
+};
+
+template <typename ...T>
+struct MyTS<0, T...> {};
+
+void foo() {
+  // CHECK: [[@LINE+1]]:20: note: in instantiation of member function
+  int e = MyTS<2>::callme();
+}
diff --git a/test/Misc/serialized-diags.c b/test/Misc/serialized-diags.c
index 1290b4ea9f33..e401477a2ebd 100644
--- a/test/Misc/serialized-diags.c
+++ b/test/Misc/serialized-diags.c
@@ -55,7 +55,6 @@ void rdar11040133() {
 // CHECK: Range: {{.*[/\\]}}serialized-diags.c:22:3 {{.*[/\\]}}serialized-diags.c:22:6
 // CHECK: Range: {{.*[/\\]}}serialized-diags.c:22:13 {{.*[/\\]}}serialized-diags.c:22:18
 // CHECK: +-{{.*[/\\]}}serialized-diags.c:20:15: note: expanded from macro 'false' []
-// CHECK: +-Range: {{.*[/\\]}}serialized-diags.c:22:3 {{.*[/\\]}}serialized-diags.c:22:6
 // CHECK: +-Range: {{.*[/\\]}}serialized-diags.c:20:15 {{.*[/\\]}}serialized-diags.c:20:16
 // CHECK: +-{{.*[/\\]}}serialized-diags.c:19:1: note: 'taz' declared here []
 // CHECK: {{.*[/\\]}}serialized-diags.h:5:7: warning: incompatible integer to pointer conversion initializing 'char *' with an expression of type 'int' [-Wint-conversion]
diff --git a/test/Misc/thinlto.c b/test/Misc/thinlto.c
new file mode 100644
index 000000000000..9134cbe5c5ee
--- /dev/null
+++ b/test/Misc/thinlto.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -flto=thin -emit-llvm-bc < %s | llvm-bcanalyzer -dump | FileCheck %s
+// CHECK: <FUNCTION_SUMMARY_BLOCK
+// CHECK-NEXT: <PERMODULE_ENTRY
+// CHECK-NEXT: <PERMODULE_ENTRY
+// CHECK-NEXT: </FUNCTION_SUMMARY_BLOCK
+
+__attribute__((noinline)) void foo() {}
+
+int main() { foo(); }
diff --git a/test/Misc/warning-flags-enabled.c b/test/Misc/warning-flags-enabled.c
index ba29e7ac51a1..b809465a0ebf 100644
--- a/test/Misc/warning-flags-enabled.c
+++ b/test/Misc/warning-flags-enabled.c
@@ -3,7 +3,7 @@
 // This shows warnings which are on by default.
 // We just check a few to make sure it's doing something sensible.
 //
-// CHECK: ext_unterminated_string
+// CHECK: ext_unterminated_char_or_string
 // CHECK: warn_condition_is_assignment
 // CHECK: warn_null_arg
 
diff --git a/test/Misc/warning-flags.c b/test/Misc/warning-flags.c
index 86274ad0bdb9..69e820542ab7 100644
--- a/test/Misc/warning-flags.c
+++ b/test/Misc/warning-flags.c
@@ -18,12 +18,11 @@ This test serves two purposes:
 
 The list of warnings below should NEVER grow.  It should gradually shrink to 0.
 
-CHECK: Warnings without flags (93):
+CHECK: Warnings without flags (84):
 CHECK-NEXT:   ext_excess_initializers
 CHECK-NEXT:   ext_excess_initializers_in_char_array_initializer
 CHECK-NEXT:   ext_expected_semi_decl_list
 CHECK-NEXT:   ext_explicit_specialization_storage_class
-CHECK-NEXT:   ext_implicit_lib_function_decl
 CHECK-NEXT:   ext_initializer_string_for_char_array_too_long
 CHECK-NEXT:   ext_missing_declspec
 CHECK-NEXT:   ext_missing_whitespace_after_macro_name
@@ -36,19 +35,13 @@ CHECK-NEXT:   ext_typecheck_cond_incompatible_operands_nonstandard
 CHECK-NEXT:   ext_typecheck_ordered_comparison_of_function_pointers
 CHECK-NEXT:   ext_typecheck_ordered_comparison_of_pointer_integer
 CHECK-NEXT:   ext_using_undefined_std
-CHECK-NEXT:   pp_include_next_absolute_path
-CHECK-NEXT:   pp_include_next_in_primary
 CHECK-NEXT:   pp_invalid_string_literal
 CHECK-NEXT:   pp_out_of_date_dependency
 CHECK-NEXT:   pp_poisoning_existing_macro
-CHECK-NEXT:   pp_pragma_once_in_main_file
-CHECK-NEXT:   pp_pragma_sysheader_in_main_file
 CHECK-NEXT:   w_asm_qualifier_ignored
 CHECK-NEXT:   warn_accessor_property_type_mismatch
-CHECK-NEXT:   warn_anon_bitfield_width_exceeds_type_size
 CHECK-NEXT:   warn_arcmt_nsalloc_realloc
 CHECK-NEXT:   warn_asm_label_on_auto_decl
-CHECK-NEXT:   warn_bitfield_width_exceeds_type_size
 CHECK-NEXT:   warn_c_kext
 CHECK-NEXT:   warn_call_to_pure_virtual_member_function_from_ctor_dtor
 CHECK-NEXT:   warn_call_wrong_number_of_arguments
@@ -83,7 +76,6 @@ CHECK-NEXT:   warn_maynot_respond
 CHECK-NEXT:   warn_method_param_redefinition
 CHECK-NEXT:   warn_missing_case_for_condition
 CHECK-NEXT:   warn_missing_dependent_template_keyword
-CHECK-NEXT:   warn_missing_exception_specification
 CHECK-NEXT:   warn_missing_whitespace_after_macro_name
 CHECK-NEXT:   warn_mt_message
 CHECK-NEXT:   warn_no_constructor_for_refconst
@@ -92,8 +84,7 @@ CHECK-NEXT:   warn_objc_property_copy_missing_on_block
 CHECK-NEXT:   warn_objc_protocol_qualifier_missing_id
 CHECK-NEXT:   warn_on_superclass_use
 CHECK-NEXT:   warn_partial_specs_not_deducible
-CHECK-NEXT:   warn_pp_convert_lhs_to_positive
-CHECK-NEXT:   warn_pp_convert_rhs_to_positive
+CHECK-NEXT:   warn_pp_convert_to_positive
 CHECK-NEXT:   warn_pp_expr_overflow
 CHECK-NEXT:   warn_pp_line_decimal
 CHECK-NEXT:   warn_pragma_pack_pop_identifer_and_alignment
@@ -115,4 +106,4 @@ CHECK-NEXT:   warn_weak_import
 
 The list of warnings in -Wpedantic should NEVER grow.
 
-CHECK: Number in -Wpedantic (not covered by other -W flags): 28
+CHECK: Number in -Wpedantic (not covered by other -W flags): 27
diff --git a/test/Modules/DebugInfoSubmoduleImport.c b/test/Modules/DebugInfoSubmoduleImport.c
new file mode 100644
index 000000000000..9fb5d9c6d5c9
--- /dev/null
+++ b/test/Modules/DebugInfoSubmoduleImport.c
@@ -0,0 +1,15 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \
+// RUN:     -fimplicit-module-maps -x c -fmodules-cache-path=%t -I %S/Inputs \
+// RUN:     %s -emit-llvm -o - | FileCheck %s
+#include "DebugSubmoduleA.h"
+#include "DebugSubmoduleB.h"
+
+// CHECK: !DICompileUnit
+// CHECK-NOT: !DICompileUnit
+// CHECK: !DIModule(scope: ![[PARENT:.*]], name: "DebugSubmoduleA"
+// CHECK: [[PARENT]] = !DIModule(scope: null, name: "DebugSubmodules"
+// CHECK: !DIModule(scope: ![[PARENT]], name: "DebugSubmoduleB"
+// CHECK: !DICompileUnit({{.*}}splitDebugFilename: {{.*}}DebugSubmodules
+// CHECK-SAME:                 dwoId:
+// CHECK-NOT: !DICompileUnit
diff --git a/test/Modules/DebugInfoSubmodules.c b/test/Modules/DebugInfoSubmodules.c
new file mode 100644
index 000000000000..b662a3eba78d
--- /dev/null
+++ b/test/Modules/DebugInfoSubmodules.c
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \
+// RUN:     -fimplicit-module-maps -x c -fmodules-cache-path=%t -I %S/Inputs \
+// RUN:     %s -mllvm -debug-only=pchcontainer -emit-llvm -o %t.ll \
+// RUN:     2>&1 | FileCheck %s
+// REQUIRES: asserts
+#include "DebugSubmoduleA.h"
+
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "A",
+// CHECK-SAME:             scope: ![[SUBMODULEA:[0-9]+]]
+// CHECK: ![[SUBMODULEA]] = !DIModule(scope: ![[PARENT:[0-9]+]],
+// CHECK-SAME:                        name: "DebugSubmoduleA",
+// CHECK: ![[PARENT]] = !DIModule(scope: null, name: "DebugSubmodules"
+
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "B",
+// CHECK-SAME:             scope: ![[SUBMODULEB:[0-9]+]]
+// CHECK: ![[SUBMODULEB]] = !DIModule(scope: ![[PARENT]],
+// CHECK-SAME:                        name: "DebugSubmoduleB",
diff --git a/test/Modules/DebugInfoTransitiveImport.m b/test/Modules/DebugInfoTransitiveImport.m
new file mode 100644
index 000000000000..206be2e42e92
--- /dev/null
+++ b/test/Modules/DebugInfoTransitiveImport.m
@@ -0,0 +1,22 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodule-format=obj -debug-info-kind=limited -dwarf-ext-refs \
+// RUN:     -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs \
+// RUN:     %s -mllvm -debug-only=pchcontainer 2>&1 | FileCheck %s
+// REQUIRES: asserts
+
+@import diamond_left;
+
+// Definition of top:
+// CHECK: !DICompileUnit({{.*}}dwoId:
+// CHECK: !DIFile({{.*}}diamond_top.h
+
+// Definition of left:
+// CHECK: !DICompileUnit({{.*}}dwoId:
+// CHECK: !DIFile({{.*}}diamond_left
+// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration,
+// CHECK-SAME:              entity: ![[MODULE:.*]], line: 3)
+// CHECK: ![[MODULE]] = !DIModule(scope: null, name: "diamond_top"
+
+// Skeleton for top:
+// CHECK: !DICompileUnit({{.*}}splitDebugFilename: {{.*}}diamond_top{{.*}}dwoId:
+
diff --git a/test/Modules/ExtDebugInfo.cpp b/test/Modules/ExtDebugInfo.cpp
new file mode 100644
index 000000000000..b255042cbfec
--- /dev/null
+++ b/test/Modules/ExtDebugInfo.cpp
@@ -0,0 +1,72 @@
+// RUN: rm -rf %t
+// Test that only forward declarations are emitted for types dfined in modules.
+
+// Modules:
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -debug-info-kind=limited -dwarf-ext-refs -fmodules \
+// RUN:     -fmodule-format=obj -fimplicit-module-maps -DMODULES \
+// RUN:     -triple %itanium_abi_triple \
+// RUN:     -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t-mod.ll
+// RUN: cat %t-mod.ll |  FileCheck %s
+
+// PCH:
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodule-format=obj -emit-pch -I%S/Inputs \
+// RUN:     -triple %itanium_abi_triple \
+// RUN:     -o %t.pch %S/Inputs/DebugCXX.h
+// RUN: %clang_cc1 -std=c++11 -debug-info-kind=limited -dwarf-ext-refs -fmodule-format=obj \
+// RUN:     -triple %itanium_abi_triple \
+// RUN:     -include-pch %t.pch %s -emit-llvm -o %t-pch.ll %s
+// RUN: cat %t-pch.ll |  FileCheck %s
+
+#ifdef MODULES
+@import DebugCXX;
+#endif
+
+using DebugCXX::Struct;
+
+Struct s;
+DebugCXX::Enum e;
+DebugCXX::Template<long> implicitTemplate;
+DebugCXX::Template<int> explicitTemplate;
+DebugCXX::FloatInstatiation typedefTemplate;
+int Struct::static_member = -1;
+enum {
+  e3 = -1
+} conflicting_uid = e3;
+auto anon_enum = DebugCXX::e2;
+char _anchor = anon_enum + conflicting_uid;
+
+// CHECK: ![[NS:.*]] = !DINamespace(name: "DebugCXX", scope: ![[MOD:[0-9]+]],
+// CHECK: ![[MOD]] = !DIModule(scope: null, name: {{.*}}DebugCXX
+
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "Struct",
+// CHECK-SAME:             scope: ![[NS]],
+// CHECK-SAME:             flags: DIFlagFwdDecl,
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX6StructE")
+
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum",
+// CHECK-SAME:             scope: ![[NS]],
+// CHECK-SAME:             flags: DIFlagFwdDecl,
+// CHECK-SAME:             identifier:  "_ZTSN8DebugCXX4EnumE")
+
+// CHECK: !DICompositeType(tag: DW_TAG_class_type,
+
+// CHECK: !DICompositeType(tag: DW_TAG_class_type,
+// CHECK-SAME:             name: "Template<int, DebugCXX::traits<int> >",
+// CHECK-SAME:             scope: ![[NS]],
+// CHECK-SAME:             flags: DIFlagFwdDecl,
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX8TemplateIiNS_6traitsIiEEEE")
+
+// CHECK: !DICompositeType(tag: DW_TAG_class_type,
+// CHECK-SAME:             name: "Template<float, DebugCXX::traits<float> >",
+// CHECK-SAME:             scope: ![[NS]],
+// CHECK-SAME:             flags: DIFlagFwdDecl,
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX8TemplateIfNS_6traitsIfEEEE")
+
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "static_member",
+// CHECK-SAME:           scope: !"_ZTSN8DebugCXX6StructE"
+
+// CHECK: !DIGlobalVariable(name: "anon_enum", {{.*}}, type: ![[ANON_ENUM:[0-9]+]]
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, scope: ![[NS]],
+// CHECK-SAME:             line: 16
+
+// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !0, entity: !"_ZTSN8DebugCXX6StructE", line: 24)
diff --git a/test/Modules/ExtDebugInfo.m b/test/Modules/ExtDebugInfo.m
new file mode 100644
index 000000000000..8e063f09048d
--- /dev/null
+++ b/test/Modules/ExtDebugInfo.m
@@ -0,0 +1,36 @@
+// RUN: rm -rf %t
+// Test that only forward declarations are emitted for types defined in modules.
+
+// Modules:
+// RUN: %clang_cc1 -x objective-c -debug-info-kind=limited -dwarf-ext-refs -fmodules \
+// RUN:     -fmodule-format=obj -fimplicit-module-maps -DMODULES \
+// RUN:     -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t-mod.ll
+// RUN: cat %t-mod.ll |  FileCheck %s
+
+// PCH:
+// RUN: %clang_cc1 -x objective-c -fmodule-format=obj -emit-pch -I%S/Inputs \
+// RUN:     -o %t.pch %S/Inputs/DebugObjC.h
+// RUN: %clang_cc1 -x objective-c -debug-info-kind=limited -dwarf-ext-refs -fmodule-format=obj \
+// RUN:     -include-pch %t.pch %s -emit-llvm -o %t-pch.ll %s
+// RUN: cat %t-pch.ll |  FileCheck %s
+
+#ifdef MODULES
+@import DebugObjC;
+#endif
+
+int foo(ObjCClass *c) {
+  InnerEnum e = e0;
+  [c instanceMethodWithInt: 0];
+  return [c property];
+}
+
+// CHECK-NOT: !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "ObjCClass",
+// CHECK-SAME:             scope: ![[MOD:[0-9]+]],
+// CHECK-SAME:             flags: DIFlagFwdDecl)
+// CHECK-NOT: !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK: ![[MOD]] = !DIModule(scope: null, name: {{.*}}DebugObjC
+// CHECK-NOT: !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type,
+// CHECK-SAME:             scope: ![[MOD]],
+// CHECK-SAME:             flags: DIFlagFwdDecl)
diff --git a/test/Modules/Inputs/AutolinkTBD.framework/AutolinkTBD.tbd b/test/Modules/Inputs/AutolinkTBD.framework/AutolinkTBD.tbd
new file mode 100644
index 000000000000..4aa0f85d0d56
--- /dev/null
+++ b/test/Modules/Inputs/AutolinkTBD.framework/AutolinkTBD.tbd
@@ -0,0 +1 @@
+empty file - clang only needs to check if it exists.
diff --git a/test/Modules/Inputs/AutolinkTBD.framework/Headers/AutolinkTBD.h b/test/Modules/Inputs/AutolinkTBD.framework/Headers/AutolinkTBD.h
new file mode 100644
index 000000000000..cf790ac3eabd
--- /dev/null
+++ b/test/Modules/Inputs/AutolinkTBD.framework/Headers/AutolinkTBD.h
@@ -0,0 +1 @@
+extern int foo();
diff --git a/test/Modules/Inputs/DebugCXX.h b/test/Modules/Inputs/DebugCXX.h
new file mode 100644
index 000000000000..6ef4445cb971
--- /dev/null
+++ b/test/Modules/Inputs/DebugCXX.h
@@ -0,0 +1,52 @@
+/* -*- C++ -*- */
+namespace DebugCXX {
+  // Records.
+  struct Struct {
+    int i;
+    static int static_member;
+  };
+
+  // Enums.
+  enum Enum {
+    Enumerator
+  };
+  enum {
+    e1 = '1'
+  };
+  enum {
+    e2 = '2'
+  };
+
+  // Templates (instatiations).
+  template<typename T> struct traits {};
+  template<typename T,
+           typename Traits = traits<T>
+          > class Template {
+    T member;
+  };
+  extern template class Template<int>;
+
+  extern template struct traits<float>;
+  typedef class Template<float> FloatInstatiation;
+
+  inline void fn() {
+    Template<long> invisible;
+  }
+
+  // Non-template inside a template.
+  template <class> struct Outer {
+    Outer();
+    struct Inner {
+      Inner(Outer) {}
+    };
+  };
+  template <class T> Outer<T>::Outer() {
+    Inner a(*this);
+  };
+
+  // Partial template specialization.
+  template <typename...> class A;
+  template <typename T> class A<T> {};
+  typedef A<void> B;
+  void foo(B) {}
+}
diff --git a/test/Modules/Inputs/DebugModule.h b/test/Modules/Inputs/DebugModule.h
deleted file mode 100644
index 5612b73a8cfb..000000000000
--- a/test/Modules/Inputs/DebugModule.h
+++ /dev/null
@@ -1 +0,0 @@
-@class F;
diff --git a/test/Modules/Inputs/DebugObjC.h b/test/Modules/Inputs/DebugObjC.h
new file mode 100644
index 000000000000..bde463abfd61
--- /dev/null
+++ b/test/Modules/Inputs/DebugObjC.h
@@ -0,0 +1,24 @@
+@class FwdDecl;
+
+@interface ObjCClass {
+  int ivar;
+}
++ classMethod;
+- instanceMethodWithInt:(int)i;
+- (struct OpaqueData*) getSomethingOpaque;
+@property int property;
+@end
+
+@interface ObjCClass (Category)
+- categoryMethod;
+@end
+
+@protocol ObjCProtocol
+
+typedef enum {
+  e0 = 0
+}  InnerEnum;
+
++ (InnerEnum)protocolMethod;
+
+@end
diff --git a/test/Modules/Inputs/DebugSubmoduleA.h b/test/Modules/Inputs/DebugSubmoduleA.h
new file mode 100644
index 000000000000..1403a7d1568c
--- /dev/null
+++ b/test/Modules/Inputs/DebugSubmoduleA.h
@@ -0,0 +1,3 @@
+struct A {
+  int a;
+};
diff --git a/test/Modules/Inputs/DebugSubmoduleB.h b/test/Modules/Inputs/DebugSubmoduleB.h
new file mode 100644
index 000000000000..b06ae6a230d8
--- /dev/null
+++ b/test/Modules/Inputs/DebugSubmoduleB.h
@@ -0,0 +1,3 @@
+struct B {
+  int b;
+};
diff --git a/test/Modules/Inputs/ExtensionTestA.h b/test/Modules/Inputs/ExtensionTestA.h
new file mode 100644
index 000000000000..fee0bb953e7b
--- /dev/null
+++ b/test/Modules/Inputs/ExtensionTestA.h
@@ -0,0 +1 @@
+extern int ExtensionA;
diff --git a/test/Modules/Inputs/System/usr/include/assert.h b/test/Modules/Inputs/System/usr/include/assert.h
new file mode 100644
index 000000000000..844e37909120
--- /dev/null
+++ b/test/Modules/Inputs/System/usr/include/assert.h
@@ -0,0 +1,2 @@
+// assert.h
+#define DARWIN_C_EXCLUDED 1
diff --git a/test/Modules/Inputs/System/usr/include/module.map b/test/Modules/Inputs/System/usr/include/module.map
index 9b2f3af2bace..1d88ca380f49 100644
--- a/test/Modules/Inputs/System/usr/include/module.map
+++ b/test/Modules/Inputs/System/usr/include/module.map
@@ -30,3 +30,25 @@ module uses_other_constants {
   header "uses_other_constants.h"
   export *
 }
+
+module Darwin {
+  module C {
+    module excluded {
+      requires excluded
+      header "assert.h"
+    }
+  }
+}
+
+module Tcl {
+  module Private {
+    requires excluded
+    umbrella ""
+  }
+}
+
+module IOKit {
+  module avc {
+    requires cplusplus
+  }
+}
diff --git a/test/Modules/Inputs/System/usr/include/tcl-private/header.h b/test/Modules/Inputs/System/usr/include/tcl-private/header.h
new file mode 100644
index 000000000000..0e8fb64a7121
--- /dev/null
+++ b/test/Modules/Inputs/System/usr/include/tcl-private/header.h
@@ -0,0 +1,2 @@
+// tcl-private/header.h
+#define TCL_PRIVATE 1
diff --git a/test/Modules/Inputs/auto-import-unavailable/missing_header/not_missing.h b/test/Modules/Inputs/auto-import-unavailable/missing_header/not_missing.h
new file mode 100644
index 000000000000..5bab8334a4e4
--- /dev/null
+++ b/test/Modules/Inputs/auto-import-unavailable/missing_header/not_missing.h
@@ -0,0 +1 @@
+// missing_header/not_missing.h
diff --git a/test/Modules/Inputs/auto-import-unavailable/missing_requirement.h b/test/Modules/Inputs/auto-import-unavailable/missing_requirement.h
new file mode 100644
index 000000000000..7090978c9bc3
--- /dev/null
+++ b/test/Modules/Inputs/auto-import-unavailable/missing_requirement.h
@@ -0,0 +1 @@
+// missing_requirement.h
diff --git a/test/Modules/Inputs/auto-import-unavailable/module.modulemap b/test/Modules/Inputs/auto-import-unavailable/module.modulemap
new file mode 100644
index 000000000000..26196dcb3738
--- /dev/null
+++ b/test/Modules/Inputs/auto-import-unavailable/module.modulemap
@@ -0,0 +1,19 @@
+module missing_header {
+  module missing { header "missing_header/missing.h" }
+  module error_importing_this { header "missing_header/not_missing.h" }
+}
+
+module nonrequired_missing_header {
+  module unsatisfied_requires {
+    requires nonexistent_feature
+    header "nonrequired_missing_header/missing.h"
+  }
+  module fine_to_import {
+    header "nonrequired_missing_header/not_missing.h"
+  }
+}
+
+module missing_requirement {
+  requires nonexistent_feature
+  header "missing_requirement.h"
+}
diff --git a/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/not_missing.h b/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/not_missing.h
new file mode 100644
index 000000000000..3ccfcb1b474d
--- /dev/null
+++ b/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/not_missing.h
@@ -0,0 +1 @@
+// nonrequired_missing_header/not_missing.h
diff --git a/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/requires_feature_you_dont_have.h b/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/requires_feature_you_dont_have.h
new file mode 100644
index 000000000000..1bcb70d2afc2
--- /dev/null
+++ b/test/Modules/Inputs/auto-import-unavailable/nonrequired_missing_header/requires_feature_you_dont_have.h
@@ -0,0 +1 @@
+// nonrequired_missing_header/requires_feature_you_dont_have.h
diff --git a/test/Modules/Inputs/available-is-better/available-is-better.h b/test/Modules/Inputs/available-is-better/available-is-better.h
new file mode 100644
index 000000000000..8ed032081af8
--- /dev/null
+++ b/test/Modules/Inputs/available-is-better/available-is-better.h
@@ -0,0 +1,2 @@
+#pragma once
+int available;
diff --git a/test/Modules/Inputs/available-is-better/module.modulemap b/test/Modules/Inputs/available-is-better/module.modulemap
new file mode 100644
index 000000000000..19ffabdffbea
--- /dev/null
+++ b/test/Modules/Inputs/available-is-better/module.modulemap
@@ -0,0 +1,17 @@
+// There is some order-dependence to how clang chooses modules, so make
+// sure that both the first and last modules here are ones that would
+// cause a test failure if they were picked.
+
+module unavailable_before {
+  requires nonexistent_feature
+  header "available-is-better.h"
+}
+
+module available {
+  header "available-is-better.h"
+}
+
+module unavailable_after {
+  requires nonexistent_feature
+  header "available-is-better.h"
+}
diff --git a/test/Modules/Inputs/builtin_sub.h b/test/Modules/Inputs/builtin_sub.h
index 79e3c0332597..5752ef984e94 100644
--- a/test/Modules/Inputs/builtin_sub.h
+++ b/test/Modules/Inputs/builtin_sub.h
@@ -2,3 +2,4 @@ int getBos1(void) {
   return __builtin_object_size(p, 0);
 }
 
+#define IS_CONST(x) __builtin_constant_p(x)
diff --git a/test/Modules/Inputs/declare-use/module.map b/test/Modules/Inputs/declare-use/module.map
index 2dad0d061cfe..14551fde7e13 100644
--- a/test/Modules/Inputs/declare-use/module.map
+++ b/test/Modules/Inputs/declare-use/module.map
@@ -20,14 +20,12 @@ module XD {
 
 module XE {
   header "e.h"
-  header "unavailable.h"
   use XA
   use XB
 }
 
 module XF {
   header "f.h"
-  header "unavailable.h"
   use XA
   use XB
 }
diff --git a/test/Modules/Inputs/elaborated-type-structs.h b/test/Modules/Inputs/elaborated-type-structs.h
new file mode 100644
index 000000000000..da3940998fd5
--- /dev/null
+++ b/test/Modules/Inputs/elaborated-type-structs.h
@@ -0,0 +1,3 @@
+struct S1;
+struct S2 { int x; };
+struct S3 { int x; };
diff --git a/test/Modules/Inputs/explicit-build-overlap/a.h b/test/Modules/Inputs/explicit-build-overlap/a.h
new file mode 100644
index 000000000000..4c5cd949f234
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-overlap/a.h
@@ -0,0 +1 @@
+struct A {};
diff --git a/test/Modules/Inputs/explicit-build-overlap/b.h b/test/Modules/Inputs/explicit-build-overlap/b.h
new file mode 100644
index 000000000000..c51edab1416d
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-overlap/b.h
@@ -0,0 +1 @@
+struct B {};
diff --git a/test/Modules/Inputs/explicit-build-overlap/def.map b/test/Modules/Inputs/explicit-build-overlap/def.map
new file mode 100644
index 000000000000..444faf7cb59a
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-overlap/def.map
@@ -0,0 +1,2 @@
+module a { textual header "a.h" }
+module b { header "a.h" header "b.h" }
diff --git a/test/Modules/Inputs/explicit-build-overlap/use.map b/test/Modules/Inputs/explicit-build-overlap/use.map
new file mode 100644
index 000000000000..456fbd069438
--- /dev/null
+++ b/test/Modules/Inputs/explicit-build-overlap/use.map
@@ -0,0 +1,3 @@
+module "use" {
+  use a
+}
diff --git a/test/Modules/Inputs/internal-constants/a.h b/test/Modules/Inputs/internal-constants/a.h
new file mode 100644
index 000000000000..d2881381b75d
--- /dev/null
+++ b/test/Modules/Inputs/internal-constants/a.h
@@ -0,0 +1,3 @@
+#pragma once
+#include "const.h"
+inline int f() { return N::k; }
diff --git a/test/Modules/Inputs/internal-constants/b.h b/test/Modules/Inputs/internal-constants/b.h
new file mode 100644
index 000000000000..679603afa256
--- /dev/null
+++ b/test/Modules/Inputs/internal-constants/b.h
@@ -0,0 +1,3 @@
+#pragma once
+#include "const.h"
+inline int g() { return N::k; }
diff --git a/test/Modules/Inputs/internal-constants/c.h b/test/Modules/Inputs/internal-constants/c.h
new file mode 100644
index 000000000000..43a37f831a4e
--- /dev/null
+++ b/test/Modules/Inputs/internal-constants/c.h
@@ -0,0 +1,3 @@
+#pragma once
+#include "a.h"
+inline int h() { return N::k; }
diff --git a/test/Modules/Inputs/internal-constants/const.h b/test/Modules/Inputs/internal-constants/const.h
new file mode 100644
index 000000000000..e2dc8e14571f
--- /dev/null
+++ b/test/Modules/Inputs/internal-constants/const.h
@@ -0,0 +1,3 @@
+namespace N {
+  const int k = 5;
+}
diff --git a/test/Modules/Inputs/internal-constants/module.modulemap b/test/Modules/Inputs/internal-constants/module.modulemap
new file mode 100644
index 000000000000..6d471f5fc94f
--- /dev/null
+++ b/test/Modules/Inputs/internal-constants/module.modulemap
@@ -0,0 +1,6 @@
+module X {
+  textual header "const.h"
+  module A { header "a.h" export * }
+  module B { header "b.h" export * }
+  module C { header "c.h" export * }
+}
diff --git a/test/Modules/Inputs/libstdcxx-ambiguous-internal/a.h b/test/Modules/Inputs/libstdcxx-ambiguous-internal/a.h
new file mode 100644
index 000000000000..097136991333
--- /dev/null
+++ b/test/Modules/Inputs/libstdcxx-ambiguous-internal/a.h
@@ -0,0 +1,5 @@
+#ifndef A_H
+#define A_H
+static inline void f() {}
+constexpr int n = 0;
+#endif
diff --git a/test/Modules/Inputs/libstdcxx-ambiguous-internal/b.h b/test/Modules/Inputs/libstdcxx-ambiguous-internal/b.h
new file mode 100644
index 000000000000..c0a827870a9e
--- /dev/null
+++ b/test/Modules/Inputs/libstdcxx-ambiguous-internal/b.h
@@ -0,0 +1,4 @@
+#ifndef B_H
+#define B_H
+#include "a.h"
+#endif
diff --git a/test/Modules/Inputs/libstdcxx-ambiguous-internal/c.h b/test/Modules/Inputs/libstdcxx-ambiguous-internal/c.h
new file mode 100644
index 000000000000..53122faf7956
--- /dev/null
+++ b/test/Modules/Inputs/libstdcxx-ambiguous-internal/c.h
@@ -0,0 +1,4 @@
+#ifndef C_H
+#define C_H
+#include "a.h"
+#endif
diff --git a/test/Modules/Inputs/libstdcxx-ambiguous-internal/d.h b/test/Modules/Inputs/libstdcxx-ambiguous-internal/d.h
new file mode 100644
index 000000000000..efec99f003d8
--- /dev/null
+++ b/test/Modules/Inputs/libstdcxx-ambiguous-internal/d.h
@@ -0,0 +1,4 @@
+#include "b.h"
+#include "c.h"
+inline void g() { f(); }
+inline int h() { return n; }
diff --git a/test/Modules/Inputs/libstdcxx-ambiguous-internal/module.modulemap b/test/Modules/Inputs/libstdcxx-ambiguous-internal/module.modulemap
new file mode 100644
index 000000000000..12d03880c6e4
--- /dev/null
+++ b/test/Modules/Inputs/libstdcxx-ambiguous-internal/module.modulemap
@@ -0,0 +1,6 @@
+module std {
+  module A { textual header "a.h" }
+  module B { header "b.h" }
+  module C { header "c.h" }
+  module D { header "d.h" export * }
+}
diff --git a/test/Modules/Inputs/macro-reexport/module.modulemap b/test/Modules/Inputs/macro-reexport/module.modulemap
index 896bda041c30..f861ff89555f 100644
--- a/test/Modules/Inputs/macro-reexport/module.modulemap
+++ b/test/Modules/Inputs/macro-reexport/module.modulemap
@@ -19,5 +19,4 @@ module e {
 }
 module f {
   module f1 { header "f1.h" export * }
-  module f2 { header "f2.h" export * }
 }
diff --git a/test/Modules/Inputs/misplaced/misplaced-a.h b/test/Modules/Inputs/misplaced/misplaced-a.h
new file mode 100644
index 000000000000..f50e5cefc3af
--- /dev/null
+++ b/test/Modules/Inputs/misplaced/misplaced-a.h
@@ -0,0 +1,5 @@
+namespace A {
+  namespace B {  // expected-note{{namespace 'A::B' begins here}}
+    #include "misplaced-b.h"  // expected-error{{import of module 'Misplaced.Sub_B' appears within namespace 'A::B'}}
+  }
+}
diff --git a/test/Modules/Inputs/misplaced/misplaced-b.h b/test/Modules/Inputs/misplaced/misplaced-b.h
new file mode 100644
index 000000000000..68dd955ef822
--- /dev/null
+++ b/test/Modules/Inputs/misplaced/misplaced-b.h
@@ -0,0 +1 @@
+int a;
\ No newline at end of file
diff --git a/test/Modules/Inputs/misplaced/misplaced.modulemap b/test/Modules/Inputs/misplaced/misplaced.modulemap
new file mode 100644
index 000000000000..50aa7a47e502
--- /dev/null
+++ b/test/Modules/Inputs/misplaced/misplaced.modulemap
@@ -0,0 +1,8 @@
+module Misplaced {
+  module Sub_A {
+    header "misplaced-a.h"
+  }
+  module Sub_B {
+    header "misplaced-b.h"
+  }
+}
diff --git a/test/Modules/Inputs/module-map-path-hash/a.h b/test/Modules/Inputs/module-map-path-hash/a.h
new file mode 100644
index 000000000000..f1373543a2b5
--- /dev/null
+++ b/test/Modules/Inputs/module-map-path-hash/a.h
@@ -0,0 +1,2 @@
+#pragma once
+int a = 42;
diff --git a/test/Modules/Inputs/module-map-path-hash/module.modulemap b/test/Modules/Inputs/module-map-path-hash/module.modulemap
new file mode 100644
index 000000000000..514d745d1465
--- /dev/null
+++ b/test/Modules/Inputs/module-map-path-hash/module.modulemap
@@ -0,0 +1,3 @@
+module a {
+  header "a.h"
+}
diff --git a/test/Modules/Inputs/module.map b/test/Modules/Inputs/module.map
index 904c65c2dbf4..632517dd363f 100644
--- a/test/Modules/Inputs/module.map
+++ b/test/Modules/Inputs/module.map
@@ -328,8 +328,12 @@ module crash {
   header "crash.h"
 }
 
-module DebugModule {
-  header "DebugModule.h"
+module DebugCXX {
+  header "DebugCXX.h"
+}
+
+module DebugObjC {
+  header "DebugObjC.h"
 }
 
 module ImportNameInDir {
@@ -347,3 +351,45 @@ module RequiresWithMissingHeader {
     header "RequiresWithMissingHeader-Missing2.h"
   }
 }
+
+module TargetFeatures {
+  module arm {
+    requires arm
+    module aarch32 { requires aarch32 }
+    module aarch64 { requires aarch64 }
+  }
+  module x86 {
+    requires x86
+    module x86_32 { requires x86_32 }
+    module x86_64 { requires x86_64 }
+  }
+}
+
+module DebugSubmodules {
+  module DebugSubmoduleA {
+    header "DebugSubmoduleA.h"
+    export *
+  }
+  module DebugSubmoduleB {
+    header "DebugSubmoduleB.h"
+    export *
+  }
+}
+
+module ExtensionTestA {
+  header "ExtensionTestA.h"
+}
+
+module TypedefTag {
+  header "typedef-tag.h"
+  explicit module Hidden {
+    header "typedef-tag-hidden.h"
+  }
+}
+
+module ElaboratedTypeStructs {
+  module Empty {}
+  module Structs {
+    header "elaborated-type-structs.h"
+  }
+}
diff --git a/test/Modules/Inputs/no-linkage/decls.h b/test/Modules/Inputs/no-linkage/decls.h
new file mode 100644
index 000000000000..c8d6de55f74e
--- /dev/null
+++ b/test/Modules/Inputs/no-linkage/decls.h
@@ -0,0 +1,11 @@
+namespace RealNS { int UsingDecl; }
+namespace NS = RealNS;
+typedef int Typedef;
+using AliasDecl = int;
+using RealNS::UsingDecl;
+struct Struct {};
+extern int Variable;
+namespace AnotherNS {}
+enum X { Enumerator };
+void Overloads();
+void Overloads(int);
diff --git a/test/Modules/Inputs/no-linkage/empty.h b/test/Modules/Inputs/no-linkage/empty.h
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/test/Modules/Inputs/no-linkage/module.modulemap b/test/Modules/Inputs/no-linkage/module.modulemap
new file mode 100644
index 000000000000..3931b0fe8e74
--- /dev/null
+++ b/test/Modules/Inputs/no-linkage/module.modulemap
@@ -0,0 +1 @@
+module M { module E { header "empty.h" } module D { header "decls.h" } }
diff --git a/test/Modules/Inputs/private3/private.h b/test/Modules/Inputs/private3/private.h
new file mode 100644
index 000000000000..cf34b14c2e5a
--- /dev/null
+++ b/test/Modules/Inputs/private3/private.h
@@ -0,0 +1,7 @@
+#ifndef PRIVATE_H
+#define PRIVATE_H
+
+void priv();
+
+#endif
+
diff --git a/test/Modules/Inputs/private3/public.h b/test/Modules/Inputs/private3/public.h
new file mode 100644
index 000000000000..2cf9d214d348
--- /dev/null
+++ b/test/Modules/Inputs/private3/public.h
@@ -0,0 +1,11 @@
+#ifndef PUBLIC_H
+#define PUBLIC_H
+
+#include "private.h"
+
+void pub() {
+  priv();
+}
+
+#endif
+
diff --git a/test/Modules/Inputs/stress1/merge00.h b/test/Modules/Inputs/stress1/merge00.h
index 46d5e4138279..0aeb6ce0087f 100644
--- a/test/Modules/Inputs/stress1/merge00.h
+++ b/test/Modules/Inputs/stress1/merge00.h
@@ -9,6 +9,10 @@
 //#pragma weak pragma_weak01 // expected-warning {{weak identifier 'pragma_weak01' never declared}}
 //#pragma weak pragma_weak04 // expected-warning {{weak identifier 'pragma_waek04' never declared}}
 
+#ifdef MERGE_NO_REEXPORT
+#include "merge_no_reexport.h"
+#endif
+
 #include "common.h"
 #include "m00.h"
 #include "m01.h"
diff --git a/test/Modules/Inputs/stress1/merge_no_reexport.h b/test/Modules/Inputs/stress1/merge_no_reexport.h
new file mode 100644
index 000000000000..7912c72b3b07
--- /dev/null
+++ b/test/Modules/Inputs/stress1/merge_no_reexport.h
@@ -0,0 +1,9 @@
+#ifndef STRESS1_MERGE_NO_REEXPORT_H
+#define STRESS1_MERGE_NO_REEXPORT_H
+
+#include "m00.h"
+#include "m01.h"
+#include "m02.h"
+#include "m03.h"
+
+#endif
diff --git a/test/Modules/Inputs/stress1/module.modulemap b/test/Modules/Inputs/stress1/module.modulemap
index 2b687b015215..33eb23e2a6e0 100644
--- a/test/Modules/Inputs/stress1/module.modulemap
+++ b/test/Modules/Inputs/stress1/module.modulemap
@@ -3,4 +3,5 @@ module m01 { header "Inputs/stress1/m01.h" export * }
 module m02 { header "Inputs/stress1/m02.h" export * }
 module m03 { header "Inputs/stress1/m03.h" export * }
 
+module merge_no_reexport { header "Inputs/stress1/merge_no_reexport.h" }
 module merge00 { header "Inputs/stress1/merge00.h" export * }
diff --git a/test/Modules/Inputs/submodules-merge-defs/defs.h b/test/Modules/Inputs/submodules-merge-defs/defs.h
index 07dfac7ee657..f6004f0fc8b2 100644
--- a/test/Modules/Inputs/submodules-merge-defs/defs.h
+++ b/test/Modules/Inputs/submodules-merge-defs/defs.h
@@ -5,10 +5,17 @@ class B {
   struct Inner1 {};
 public:
   struct Inner2;
+  struct Inner3;
   template<typename T> void f();
 };
+struct BFriend {
+  friend class B::Inner3;
+private:
+  struct Inner3Base {};
+};
 // Check that lookup and access checks are performed in the right context.
 struct B::Inner2 : Inner1 {};
+struct B::Inner3 : BFriend::Inner3Base {};
 template<typename T> void B::f() {}
 template<> inline void B::f<int>() {}
 
@@ -97,3 +104,37 @@ namespace MergeFunctionTemplateSpecializations {
 
 enum ScopedEnum : int;
 enum ScopedEnum : int { a, b, c };
+
+namespace RedeclDifferentDeclKind {
+  struct X {};
+  typedef X X;
+  using RedeclDifferentDeclKind::X;
+}
+
+namespace Anon {
+  struct X {
+    union {
+      int n;
+    };
+  };
+}
+
+namespace ClassTemplatePartialSpec {
+  template<typename T> struct F;
+  template<template<int> class A, int B> struct F<A<B>> {
+    template<typename C> F();
+  };
+  template<template<int> class A, int B> template<typename C> F<A<B>>::F() {}
+
+  template<typename A, int B> struct F<A[B]> {
+    template<typename C> F();
+  };
+  template<typename A, int B> template<typename C> F<A[B]>::F() {}
+}
+
+struct MemberClassTemplate {
+  template<typename T> struct A;
+};
+template<typename T> struct MemberClassTemplate::A {};
+template<typename T> struct MemberClassTemplate::A<T*> {};
+template<> struct MemberClassTemplate::A<int> {};
diff --git a/test/Modules/Inputs/template-default-args/a.h b/test/Modules/Inputs/template-default-args/a.h
index be760fe6f1d5..532cbc8be035 100644
--- a/test/Modules/Inputs/template-default-args/a.h
+++ b/test/Modules/Inputs/template-default-args/a.h
@@ -1,3 +1,4 @@
+BEGIN
 template<typename T = int> struct A {};
 template<typename T> struct B {};
 template<typename T> struct C;
@@ -5,3 +6,11 @@ template<typename T> struct D;
 template<typename T> struct E;
 template<typename T = int> struct G;
 template<typename T = int> struct H;
+template<typename T> struct J {};
+template<typename T = int> struct J;
+struct K : J<> {};
+template<typename T = void> struct L;
+struct FriendL {
+  template<typename T> friend struct L;
+};
+END
diff --git a/test/Modules/Inputs/template-default-args/c.h b/test/Modules/Inputs/template-default-args/c.h
index 2946013b6131..30cddb374f97 100644
--- a/test/Modules/Inputs/template-default-args/c.h
+++ b/test/Modules/Inputs/template-default-args/c.h
@@ -1,2 +1,4 @@
+BEGIN
 template<typename T = int> struct F;
 template<typename T, typename U> struct I;
+END
diff --git a/test/Modules/Inputs/template-default-args/d.h b/test/Modules/Inputs/template-default-args/d.h
new file mode 100644
index 000000000000..5961c91a82ec
--- /dev/null
+++ b/test/Modules/Inputs/template-default-args/d.h
@@ -0,0 +1,6 @@
+BEGIN
+template<typename T = void> struct L;
+struct FriendL {
+  template<typename T> friend struct L;
+};
+END
diff --git a/test/Modules/Inputs/template-default-args/module.modulemap b/test/Modules/Inputs/template-default-args/module.modulemap
index d54dfc345abf..21bf40c3bcca 100644
--- a/test/Modules/Inputs/template-default-args/module.modulemap
+++ b/test/Modules/Inputs/template-default-args/module.modulemap
@@ -3,3 +3,6 @@ module X {
   module B { header "b.h" }
   module C { header "c.h" }
 }
+module Y {
+  module D { header "d.h" }
+}
diff --git a/test/Modules/Inputs/templates-right.h b/test/Modules/Inputs/templates-right.h
index daea97b86b88..32487c60c269 100644
--- a/test/Modules/Inputs/templates-right.h
+++ b/test/Modules/Inputs/templates-right.h
@@ -38,6 +38,10 @@ int defineListDoubleRight() {
   return ld.size;
 }
 
+inline void defineListLongRight() {
+  List<long> ll;
+}
+
 template<typename T> struct MergePatternDecl;
 
 void outOfLineInlineUseRightF(void (OutOfLineInline<int>::*)() = &OutOfLineInline<int>::f);
diff --git a/test/Modules/Inputs/templates-top.h b/test/Modules/Inputs/templates-top.h
index 31f5e4199281..a08268352399 100644
--- a/test/Modules/Inputs/templates-top.h
+++ b/test/Modules/Inputs/templates-top.h
@@ -10,6 +10,7 @@ public:
 };
 
 extern List<double> *instantiateListDoubleDeclaration;
+extern List<long> *instantiateListLongDeclaration;
 
 namespace A {
   class Y {
diff --git a/test/Modules/Inputs/thread-safety/a.h b/test/Modules/Inputs/thread-safety/a.h
new file mode 100644
index 000000000000..879c038cf970
--- /dev/null
+++ b/test/Modules/Inputs/thread-safety/a.h
@@ -0,0 +1,4 @@
+struct __attribute__((lockable)) mutex {
+  void lock() __attribute__((exclusive_lock_function));
+  void unlock() __attribute__((unlock_function));
+};
diff --git a/test/Modules/Inputs/thread-safety/b.h b/test/Modules/Inputs/thread-safety/b.h
new file mode 100644
index 000000000000..c8ed23738f18
--- /dev/null
+++ b/test/Modules/Inputs/thread-safety/b.h
@@ -0,0 +1,8 @@
+#include "a.h"
+
+struct X {
+  mutex m;
+  int n __attribute__((guarded_by(m)));
+
+  void f();
+};
diff --git a/test/Modules/Inputs/thread-safety/c.h b/test/Modules/Inputs/thread-safety/c.h
new file mode 100644
index 000000000000..ec849c2250a3
--- /dev/null
+++ b/test/Modules/Inputs/thread-safety/c.h
@@ -0,0 +1,10 @@
+#include "a.h"
+
+struct X {
+  mutex m;
+  int n __attribute__((guarded_by(m)));
+
+  void f();
+};
+
+inline void unlock(X &x) __attribute__((unlock_function(x.m))) { x.m.unlock(); }
diff --git a/test/Modules/Inputs/thread-safety/module.map b/test/Modules/Inputs/thread-safety/module.map
new file mode 100644
index 000000000000..bd6ea830c2d4
--- /dev/null
+++ b/test/Modules/Inputs/thread-safety/module.map
@@ -0,0 +1,3 @@
+module a { header "a.h" }
+module b { header "b.h" export * }
+module c { header "c.h" export * }
diff --git a/test/Modules/Inputs/typedef-tag-hidden.h b/test/Modules/Inputs/typedef-tag-hidden.h
new file mode 100644
index 000000000000..eb59d69cc6e9
--- /dev/null
+++ b/test/Modules/Inputs/typedef-tag-hidden.h
@@ -0,0 +1 @@
+typedef struct { int x; } TypedefStructHidden_t;
diff --git a/test/Modules/Inputs/typedef-tag.h b/test/Modules/Inputs/typedef-tag.h
new file mode 100644
index 000000000000..77dff951174e
--- /dev/null
+++ b/test/Modules/Inputs/typedef-tag.h
@@ -0,0 +1 @@
+typedef struct { int x; } TypedefStructVisible_t;
diff --git a/test/Modules/Inputs/use-builtin.h b/test/Modules/Inputs/use-builtin.h
new file mode 100644
index 000000000000..fd047419a6df
--- /dev/null
+++ b/test/Modules/Inputs/use-builtin.h
@@ -0,0 +1,2 @@
+@import builtin;
+@import builtin.sub;
diff --git a/test/Modules/Inputs/using-decl-a.h b/test/Modules/Inputs/using-decl-a.h
index 85a4788e7647..1d1ffe9ed350 100644
--- a/test/Modules/Inputs/using-decl-a.h
+++ b/test/Modules/Inputs/using-decl-a.h
@@ -1,5 +1,6 @@
 typedef int using_decl_type;
 int using_decl_var;
+int merged;
 
 namespace UsingDecl {
   using ::using_decl_type;
diff --git a/test/Modules/Inputs/using-decl-b.h b/test/Modules/Inputs/using-decl-b.h
index b82526f39ff4..7c03d09960ed 100644
--- a/test/Modules/Inputs/using-decl-b.h
+++ b/test/Modules/Inputs/using-decl-b.h
@@ -8,4 +8,31 @@ namespace UsingDecl {
 namespace UsingDecl {
   using ::using_decl_type;
   using ::using_decl_var;
+  using ::merged;
 }
+
+namespace X {
+  int conflicting_hidden_using_decl;
+  int conflicting_hidden_using_decl_fn();
+  int conflicting_hidden_using_decl_var;
+  struct conflicting_hidden_using_decl_struct;
+
+  int conflicting_hidden_using_decl_mixed_1;
+  int conflicting_hidden_using_decl_mixed_2();
+  struct conflicting_hidden_using_decl_mixed_3 {};
+}
+
+using X::conflicting_hidden_using_decl;
+using X::conflicting_hidden_using_decl_fn;
+using X::conflicting_hidden_using_decl_var;
+using X::conflicting_hidden_using_decl_struct;
+int conflicting_hidden_using_decl_fn_2();
+int conflicting_hidden_using_decl_var_2;
+struct conflicting_hidden_using_decl_struct_2 {};
+
+using X::conflicting_hidden_using_decl_mixed_1;
+using X::conflicting_hidden_using_decl_mixed_2;
+using X::conflicting_hidden_using_decl_mixed_3;
+int conflicting_hidden_using_decl_mixed_4;
+int conflicting_hidden_using_decl_mixed_5();
+struct conflicting_hidden_using_decl_mixed_6 {};
diff --git a/test/Modules/Inputs/using-decl-redecl/a.h b/test/Modules/Inputs/using-decl-redecl/a.h
new file mode 100644
index 000000000000..477546945c09
--- /dev/null
+++ b/test/Modules/Inputs/using-decl-redecl/a.h
@@ -0,0 +1,2 @@
+struct string {};
+namespace N { typedef ::string clstring; }
diff --git a/test/Modules/Inputs/using-decl-redecl/b.h b/test/Modules/Inputs/using-decl-redecl/b.h
new file mode 100644
index 000000000000..0714bb991416
--- /dev/null
+++ b/test/Modules/Inputs/using-decl-redecl/b.h
@@ -0,0 +1,3 @@
+#include "a.h"
+namespace N { using ::N::clstring; }
+extern N::clstring b;
diff --git a/test/Modules/Inputs/using-decl-redecl/c.h b/test/Modules/Inputs/using-decl-redecl/c.h
new file mode 100644
index 000000000000..e44e1a04e050
--- /dev/null
+++ b/test/Modules/Inputs/using-decl-redecl/c.h
@@ -0,0 +1,2 @@
+#include "b.h"
+namespace N { using ::N::clstring; }
diff --git a/test/Modules/Inputs/using-decl-redecl/module.modulemap b/test/Modules/Inputs/using-decl-redecl/module.modulemap
new file mode 100644
index 000000000000..bd6ea830c2d4
--- /dev/null
+++ b/test/Modules/Inputs/using-decl-redecl/module.modulemap
@@ -0,0 +1,3 @@
+module a { header "a.h" }
+module b { header "b.h" export * }
+module c { header "c.h" export * }
diff --git a/test/Modules/Inputs/va_list/left.h b/test/Modules/Inputs/va_list/left.h
new file mode 100644
index 000000000000..6842f9f7d0a6
--- /dev/null
+++ b/test/Modules/Inputs/va_list/left.h
@@ -0,0 +1,7 @@
+@import top;
+
+template<typename T>
+void f(int k, ...) {
+  va_list va;
+  __builtin_va_start(va, k);
+}
diff --git a/test/Modules/Inputs/va_list/module.modulemap b/test/Modules/Inputs/va_list/module.modulemap
index 870f38bb0ecd..bd9c61456fba 100644
--- a/test/Modules/Inputs/va_list/module.modulemap
+++ b/test/Modules/Inputs/va_list/module.modulemap
@@ -1,2 +1,5 @@
 module va_list_a { header "va_list_a.h" }
 module va_list_b { header "va_list_b.h" }
+module top { header "top.h" }
+module left { header "left.h" }
+module right { header "right.h" }
diff --git a/test/Modules/Inputs/va_list/right.h b/test/Modules/Inputs/va_list/right.h
new file mode 100644
index 000000000000..6842f9f7d0a6
--- /dev/null
+++ b/test/Modules/Inputs/va_list/right.h
@@ -0,0 +1,7 @@
+@import top;
+
+template<typename T>
+void f(int k, ...) {
+  va_list va;
+  __builtin_va_start(va, k);
+}
diff --git a/test/Modules/Inputs/va_list/top.h b/test/Modules/Inputs/va_list/top.h
new file mode 100644
index 000000000000..5660b876552f
--- /dev/null
+++ b/test/Modules/Inputs/va_list/top.h
@@ -0,0 +1 @@
+typedef __builtin_va_list va_list;
diff --git a/test/Modules/Inputs/working-dir-test/Test.framework/Headers/Test.h b/test/Modules/Inputs/working-dir-test/Test.framework/Headers/Test.h
new file mode 100644
index 000000000000..ecc54bfc7252
--- /dev/null
+++ b/test/Modules/Inputs/working-dir-test/Test.framework/Headers/Test.h
@@ -0,0 +1 @@
+void test_me_call(void);
diff --git a/test/Modules/Inputs/working-dir-test/Test.framework/Modules/module.modulemap b/test/Modules/Inputs/working-dir-test/Test.framework/Modules/module.modulemap
new file mode 100644
index 000000000000..3040ac722f98
--- /dev/null
+++ b/test/Modules/Inputs/working-dir-test/Test.framework/Modules/module.modulemap
@@ -0,0 +1,6 @@
+framework module Test {
+  umbrella header "Test.h"
+
+  export *
+  module * { export * }
+}
diff --git a/test/Modules/ModuleDebugInfo.cpp b/test/Modules/ModuleDebugInfo.cpp
new file mode 100644
index 000000000000..81192cb3e932
--- /dev/null
+++ b/test/Modules/ModuleDebugInfo.cpp
@@ -0,0 +1,44 @@
+// Test that (the same) debug info is emitted for an Objective-C++
+// module and a C++ precompiled header.
+
+// REQUIRES: asserts, shell
+
+// Modules:
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x objective-c++ -std=c++11 -debug-info-kind=limited -fmodules -fmodule-format=obj -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s -I %S/Inputs -I %t -emit-llvm -o %t.ll -mllvm -debug-only=pchcontainer &>%t-mod.ll
+// RUN: cat %t-mod.ll | FileCheck %s
+// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s
+// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-DWO %s
+
+// PCH:
+// RUN: %clang_cc1 -triple %itanium_abi_triple -x c++ -std=c++11 -emit-pch -fmodule-format=obj -I %S/Inputs -o %t.pch %S/Inputs/DebugCXX.h -mllvm -debug-only=pchcontainer &>%t-pch.ll
+// RUN: cat %t-pch.ll | FileCheck %s
+// RUN: cat %t-mod.ll | FileCheck --check-prefix=CHECK-NEG %s
+
+#ifdef MODULES
+@import DebugCXX;
+#endif
+
+// CHECK: distinct !DICompileUnit(language: DW_LANG_{{.*}}C_plus_plus,
+// CHECK-SAME:                    isOptimized: false,
+// CHECK-SAME-NOT:                splitDebugFilename:
+// CHECK-DWO:                     dwoId:
+// CHECK: !DICompositeType(tag: DW_TAG_enumeration_type, name: "Enum"
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX4EnumE")
+// CHECK: !DINamespace(name: "DebugCXX"
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "Struct"
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX6StructE")
+// CHECK: !DICompositeType(tag: DW_TAG_class_type,
+// CHECK-SAME:             name: "Template<int, DebugCXX::traits<int> >"
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX8TemplateIiNS_6traitsIiEEEE")
+// CHECK: !DICompositeType(tag: DW_TAG_class_type,
+// CHECK-SAME:             name: "Template<float, DebugCXX::traits<float> >"
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX8TemplateIfNS_6traitsIfEEEE")
+// CHECK: !DICompositeType(tag: DW_TAG_class_type, name: "A<void>"
+// CHECK-SAME:             identifier: "_ZTSN8DebugCXX1AIJvEEE")
+// CHECK: !DIDerivedType(tag: DW_TAG_typedef, name: "FloatInstatiation"
+// no mangled name here yet.
+// CHECK: !DIDerivedType(tag: DW_TAG_typedef, name: "B",
+// no mangled name here yet.
+
+// CHECK-NEG-NOT: "_ZTSN8DebugCXX8TemplateIlNS_6traitsIlEEEE"
diff --git a/test/Modules/ModuleDebugInfo.m b/test/Modules/ModuleDebugInfo.m
new file mode 100644
index 000000000000..0974f38cc22e
--- /dev/null
+++ b/test/Modules/ModuleDebugInfo.m
@@ -0,0 +1,51 @@
+// Test that debug info is emitted for an Objective-C module and
+// a precompiled header.
+
+// REQUIRES: asserts, shell
+
+// Modules:
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x objective-c -fmodules -fmodule-format=obj \
+// RUN:   -fimplicit-module-maps -DMODULES -fmodules-cache-path=%t %s \
+// RUN:   -I %S/Inputs -I %t -emit-llvm -o %t.ll \
+// RUN:   -mllvm -debug-only=pchcontainer &>%t-mod.ll
+// RUN: cat %t-mod.ll | FileCheck %s
+// RUN: cat %t-mod.ll | FileCheck %s --check-prefix=MODULE-CHECK
+
+// PCH:
+// RUN: %clang_cc1 -x objective-c -emit-pch -fmodule-format=obj -I %S/Inputs \
+// RUN:   -o %t.pch %S/Inputs/DebugObjC.h \
+// RUN:   -mllvm -debug-only=pchcontainer &>%t-pch.ll
+// RUN: cat %t-pch.ll | FileCheck %s
+
+#ifdef MODULES
+@import DebugObjC;
+#endif
+
+// CHECK: distinct !DICompileUnit(language: DW_LANG_ObjC
+// CHECK-SAME:                    isOptimized: false,
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK-SAME:             name: "FwdDecl",
+// CHECK: !DICompositeType(tag: DW_TAG_structure_type,
+// CHECK-SAME:             name: "ObjCClass",
+// CHECK: !DIObjCProperty(name: "property",
+// CHECK: !DIDerivedType(tag: DW_TAG_member, name: "ivar"
+// CHECK: !DIDerivedType(tag: DW_TAG_typedef, name: "InnerEnum"
+// CHECK: !DISubprogram(name: "+[ObjCClass classMethod]"
+// CHECK: !DISubprogram(name: "-[ObjCClass instanceMethodWithInt:]"
+// CHECK: !DISubprogram(name: "-[Category(Category) categoryMethod]"
+
+// MODULE-CHECK: !DICompositeType(tag: DW_TAG_enumeration_type,
+// MODULE-CHECK-SAME:             scope: ![[MODULE:[0-9]+]],
+// MODULE-CHECK: ![[MODULE]] = !DIModule(scope: null, name: "DebugObjC"
+// MODULE-CHECK: !DICompositeType(tag: DW_TAG_structure_type,
+// MODULE-CHECK-SAME:             name: "FwdDecl",
+// MODULE-CHECK-SAME:             scope: ![[MODULE]],
+// MODULE-CHECK: !DICompositeType(tag: DW_TAG_structure_type,
+// MODULE-CHECK-SAME:             name: "ObjCClass",
+// MODULE-CHECK-SAME:             scope: ![[MODULE]],
+// MODULE-CHECK: !DISubprogram(name: "+[ObjCClass classMethod]",
+// MODULE-CHECK-SAME:          scope: ![[MODULE]],
+
+// The forward declaration should not be in the module scope.
+// MODULE-CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "OpaqueData", file
diff --git a/test/Modules/auto-import-unavailable.cpp b/test/Modules/auto-import-unavailable.cpp
new file mode 100644
index 000000000000..a35695d42a2b
--- /dev/null
+++ b/test/Modules/auto-import-unavailable.cpp
@@ -0,0 +1,47 @@
+// RUN: rm -rf %t
+// RUN: not %clang_cc1 -x c++ -Rmodule-build -DMISSING_HEADER -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/auto-import-unavailable %s 2>&1 | FileCheck %s --check-prefix=MISSING-HEADER
+// RUN: %clang_cc1 -x c++ -Rmodule-build -DNONREQUIRED_MISSING_HEADER -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/auto-import-unavailable %s 2>&1 | FileCheck %s --check-prefix=NONREQUIRED-MISSING-HEADER
+// RUN: not %clang_cc1 -x c++ -Rmodule-build -DMISSING_REQUIREMENT -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/auto-import-unavailable %s 2>&1 | FileCheck %s --check-prefix=MISSING-REQUIREMENT
+
+#ifdef MISSING_HEADER
+
+// Even if the header we ask for is not missing, if the top-level module
+// containing it has a missing header, then the whole top-level is
+// unavailable and we issue an error.
+
+// MISSING-HEADER: module.modulemap:2:27: error: header 'missing_header/missing.h' not found
+// MISSING-HEADER-DAG: auto-import-unavailable.cpp:[[@LINE+1]]:10: note: submodule of top-level module 'missing_header' implicitly imported here
+#include "missing_header/not_missing.h"
+
+// We should not attempt to build the module.
+// MISSING-HEADER-NOT: remark: building module
+
+#endif // #ifdef MISSING_HEADER
+
+
+#ifdef NONREQUIRED_MISSING_HEADER
+
+// However, if the missing header is dominated by an unsatisfied
+// `requires`, then that is acceptable.
+// This also tests that an unsatisfied `requires` elsewhere in the
+// top-level module doesn't affect an available module.
+
+// NONREQUIRED-MISSING-HEADER: auto-import-unavailable.cpp:[[@LINE+2]]:10: remark: building module 'nonrequired_missing_header'
+// NONREQUIRED-MISSING-HEADER: auto-import-unavailable.cpp:[[@LINE+1]]:10: remark: finished building module 'nonrequired_missing_header'
+#include "nonrequired_missing_header/not_missing.h"
+
+#endif // #ifdef NONREQUIRED_MISSING_HEADER
+
+
+#ifdef MISSING_REQUIREMENT
+
+// If the header is unavailable due to a missing requirement, an error
+// should be emitted if a user tries to include it.
+
+// MISSING-REQUIREMENT:module.modulemap:16:8: error: module 'missing_requirement' requires feature 'nonexistent_feature'
+// MISSING-REQUIREMENT: auto-import-unavailable.cpp:[[@LINE+1]]:10: note: submodule of top-level module 'missing_requirement' implicitly imported here
+#include "missing_requirement.h"
+
+// MISSING-REQUIREMENT-NOT: remark: building module
+
+#endif // #ifdef MISSING_REQUIREMENT
diff --git a/test/Modules/auto-module-import.m b/test/Modules/auto-module-import.m
index de2c355a4247..9a34c92eab2f 100644
--- a/test/Modules/auto-module-import.m
+++ b/test/Modules/auto-module-import.m
@@ -1,6 +1,7 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify -DERRORS
 // RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s -verify
+// RUN: %clang_cc1 -Wauto-import -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs -xobjective-c++ %s -verify
 // 
 // Test both with and without the declarations that refer to unimported
 // entities. For error recovery, those cases implicitly trigger an import.
@@ -83,6 +84,18 @@ int getNotInModule() {
   return not_in_module;
 }
 
-void includeNotAtTopLevel() { // expected-note {{to match this '{'}}
-  #include <NoUmbrella/A.h> // expected-warning {{treating #include as an import}} expected-error {{expected '}'}}
-} // expected-error {{extraneous closing brace}}
+void includeNotAtTopLevel() { // expected-note {{function 'includeNotAtTopLevel' begins here}}
+  #include <NoUmbrella/A.h> // expected-warning {{treating #include as an import}} \
+			       expected-error {{redundant #include of module 'NoUmbrella.A' appears within function 'includeNotAtTopLevel'}}
+}
+
+#ifdef __cplusplus
+namespace NS { // expected-note {{begins here}}
+#include <NoUmbrella/A.h> // expected-warning {{treating #include as an import}} \
+                             expected-error {{redundant #include of module 'NoUmbrella.A' appears within namespace 'NS'}}
+}
+extern "C" { // expected-note {{begins here}}
+#include <NoUmbrella/A.h> // expected-warning {{treating #include as an import}} \
+                             expected-error {{import of C++ module 'NoUmbrella.A' appears within extern "C"}}
+}
+#endif
diff --git a/test/Modules/autolinkTBD.m b/test/Modules/autolinkTBD.m
new file mode 100644
index 000000000000..6107952c3b9b
--- /dev/null
+++ b/test/Modules/autolinkTBD.m
@@ -0,0 +1,17 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -emit-llvm -o - -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -fno-autolink -o - -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -F %S/Inputs %s | FileCheck --check-prefix=CHECK-AUTOLINK-DISABLED %s
+
+@import AutolinkTBD;
+
+int f() {
+  return foo();
+}
+
+// CHECK: !llvm.module.flags = !{{{.*}}}
+// CHECK: !{{[0-9]+}} = !{i32 6, !"Linker Options", ![[AUTOLINK_OPTIONS:[0-9]+]]}
+// CHECK: ![[AUTOLINK_OPTIONS]] = !{![[AUTOLINK_FRAMEWORK:[0-9]+]]}
+// CHECK: ![[AUTOLINK_FRAMEWORK]] = !{!"-framework", !"AutolinkTBD"}
+
+// CHECK-AUTOLINK-DISABLED: !llvm.module.flags
+// CHECK-AUTOLINK-DISABLED-NOT: "Linker Options"
diff --git a/test/Modules/available-is-better.cpp b/test/Modules/available-is-better.cpp
new file mode 100644
index 000000000000..39d37c405e2a
--- /dev/null
+++ b/test/Modules/available-is-better.cpp
@@ -0,0 +1,5 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x c++ -Rmodule-build -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs/available-is-better %s 2>&1 | FileCheck %s
+
+#include "available-is-better.h"
+// CHECK: remark: building module
diff --git a/test/Modules/builtins.m b/test/Modules/builtins.m
index c095f4f01645..33d23979ce79 100644
--- a/test/Modules/builtins.m
+++ b/test/Modules/builtins.m
@@ -10,7 +10,15 @@ int bar() {
   return __builtin_object_size(p, 0);
 }
 
+int baz() {
+  return IS_CONST(0);
+}
 
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs %s -verify
+
+// RUN: rm -rf %t.pch.cache
+// RUN: %clang_cc1 -fmodules-cache-path=%t.pch.cache -fmodules -fimplicit-module-maps -I %S/Inputs -emit-pch -o %t.pch -x objective-c-header %S/Inputs/use-builtin.h
+// RUN: %clang_cc1 -fmodules-cache-path=%t.pch.cache -fmodules -fimplicit-module-maps -I %S/Inputs %s -include-pch %t.pch %s -verify
+
 // expected-no-diagnostics
diff --git a/test/Modules/compiler_builtins_aarch64.m b/test/Modules/compiler_builtins_aarch64.m
new file mode 100644
index 000000000000..6403f09b19ed
--- /dev/null
+++ b/test/Modules/compiler_builtins_aarch64.m
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -triple aarch64-unknown-unknown -target-feature +neon -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -verify %s
+// expected-no-diagnostics
+// REQUIRES: aarch64-registered-target
+@import _Builtin_intrinsics.arm;
+@import _Builtin_intrinsics.arm.neon;
diff --git a/test/Modules/cxx-irgen.cpp b/test/Modules/cxx-irgen.cpp
index 7cdb0d6eb595..75fb2c1495e9 100644
--- a/test/Modules/cxx-irgen.cpp
+++ b/test/Modules/cxx-irgen.cpp
@@ -1,6 +1,6 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -std=c++11 -fmodules-cache-path=%t -I %S/Inputs -triple %itanium_abi_triple -disable-llvm-optzns -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -std=c++11 -fmodules-cache-path=%t -I %S/Inputs -triple %itanium_abi_triple -disable-llvm-optzns -emit-llvm -g -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -std=c++11 -fmodules-cache-path=%t -I %S/Inputs -triple %itanium_abi_triple -disable-llvm-optzns -emit-llvm -debug-info-kind=limited -o - %s | FileCheck %s
 // FIXME: When we have a syntax for modules in C++, use that.
 
 @import cxx_irgen_top;
diff --git a/test/Modules/cxx-templates.cpp b/test/Modules/cxx-templates.cpp
index d0f5a145a18a..ef4e4e420d04 100644
--- a/test/Modules/cxx-templates.cpp
+++ b/test/Modules/cxx-templates.cpp
@@ -28,8 +28,8 @@ void g() {
   f<double>(1.0);
   f<int>();
   f(); // expected-error {{no matching function}}
-  // expected-note@Inputs/cxx-templates-b.h:3 {{couldn't infer template argument}}
-  // expected-note@Inputs/cxx-templates-b.h:4 {{requires single argument}}
+  // expected-note@Inputs/cxx-templates-a.h:3 {{couldn't infer template argument}}
+  // expected-note@Inputs/cxx-templates-a.h:4 {{requires 1 argument}}
 
   N::f(0);
   N::f<double>(1.0);
@@ -105,8 +105,8 @@ void g() {
 
   TemplateInstantiationVisibility<char[1]> tiv1;
   TemplateInstantiationVisibility<char[2]> tiv2;
-  TemplateInstantiationVisibility<char[3]> tiv3; // expected-error {{must be imported from module 'cxx_templates_b_impl'}}
-  // expected-note@cxx-templates-b-impl.h:10 {{previous definition is here}}
+  TemplateInstantiationVisibility<char[3]> tiv3; // expected-error 2{{must be imported from module 'cxx_templates_b_impl'}}
+  // expected-note@cxx-templates-b-impl.h:10 2{{previous definition is here}}
   TemplateInstantiationVisibility<char[4]> tiv4;
 
   int &p = WithPartialSpecializationUse().f();
@@ -179,18 +179,22 @@ namespace Std {
 
 // CHECK-GLOBAL:      DeclarationName 'f'
 // CHECK-GLOBAL-NEXT: |-FunctionTemplate {{.*}} 'f'
+// CHECK-GLOBAL-NEXT: |-FunctionTemplate {{.*}} 'f'
+// CHECK-GLOBAL-NEXT: |-FunctionTemplate {{.*}} 'f'
 // CHECK-GLOBAL-NEXT: `-FunctionTemplate {{.*}} 'f'
 
 // CHECK-NAMESPACE-N:      DeclarationName 'f'
 // CHECK-NAMESPACE-N-NEXT: |-FunctionTemplate {{.*}} 'f'
+// CHECK-NAMESPACE-N-NEXT: |-FunctionTemplate {{.*}} 'f'
+// CHECK-NAMESPACE-N-NEXT: |-FunctionTemplate {{.*}} 'f'
 // CHECK-NAMESPACE-N-NEXT: `-FunctionTemplate {{.*}} 'f'
 
 // CHECK-DUMP:      ClassTemplateDecl {{.*}} <{{.*[/\\]}}cxx-templates-common.h:1:1, {{.*}}>  col:{{.*}} in cxx_templates_common SomeTemplate
 // CHECK-DUMP:        ClassTemplateSpecializationDecl {{.*}} prev {{.*}} SomeTemplate
-// CHECK-DUMP-NEXT:     TemplateArgument type 'char [2]'
+// CHECK-DUMP-NEXT:     TemplateArgument type 'char [1]'
 // CHECK-DUMP:        ClassTemplateSpecializationDecl {{.*}} SomeTemplate definition
-// CHECK-DUMP-NEXT:     TemplateArgument type 'char [2]'
+// CHECK-DUMP-NEXT:     TemplateArgument type 'char [1]'
 // CHECK-DUMP:        ClassTemplateSpecializationDecl {{.*}} prev {{.*}} SomeTemplate
-// CHECK-DUMP-NEXT:     TemplateArgument type 'char [1]'
+// CHECK-DUMP-NEXT:     TemplateArgument type 'char [2]'
 // CHECK-DUMP:        ClassTemplateSpecializationDecl {{.*}} SomeTemplate definition
-// CHECK-DUMP-NEXT:     TemplateArgument type 'char [1]'
+// CHECK-DUMP-NEXT:     TemplateArgument type 'char [2]'
diff --git a/test/Modules/darwin_specific_modulemap_hacks.m b/test/Modules/darwin_specific_modulemap_hacks.m
new file mode 100644
index 000000000000..82fc6978838d
--- /dev/null
+++ b/test/Modules/darwin_specific_modulemap_hacks.m
@@ -0,0 +1,22 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -isystem %S/Inputs/System/usr/include -triple x86_64-apple-darwin10 %s -verify -fsyntax-only
+// expected-no-diagnostics
+
+@import Darwin.C.excluded; // no error, header is implicitly 'textual'
+@import Tcl.Private;       // no error, header is implicitly 'textual'
+@import IOKit.avc;         // no error, cplusplus requirement removed
+
+#if defined(DARWIN_C_EXCLUDED)
+#error assert.h should be textual
+#elif defined(TCL_PRIVATE)
+#error tcl-private/header.h should be textual
+#endif
+
+#import <assert.h>
+#import <tcl-private/header.h>
+
+#if !defined(DARWIN_C_EXCLUDED)
+#error assert.h missing
+#elif !defined(TCL_PRIVATE)
+#error tcl-private/header.h missing
+#endif
diff --git a/test/Modules/debug-info-moduleimport.m b/test/Modules/debug-info-moduleimport.m
index 1f12b0270be4..bb0ea3149efb 100644
--- a/test/Modules/debug-info-moduleimport.m
+++ b/test/Modules/debug-info-moduleimport.m
@@ -1,7 +1,25 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -g -fmodules -DGREETING="Hello World" -UNDEBUG -fimplicit-module-maps -fmodules-cache-path=%t %s -I %S/Inputs -isysroot /tmp/.. -I %t -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -fmodules -DGREETING="Hello World" -UNDEBUG -fimplicit-module-maps -fmodules-cache-path=%t %s -I %S/Inputs -isysroot /tmp/.. -I %t -emit-llvm -o - | FileCheck %s
 
 // CHECK: ![[CU:.*]] = distinct !DICompileUnit
-@import DebugModule;
-// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[CU]], entity: ![[MODULE:.*]], line: 5)
-// CHECK: ![[MODULE]] = !DIModule(scope: null, name: "DebugModule", configMacros: "\22-DGREETING=Hello World\22 \22-UNDEBUG\22", includePath: "{{.*}}test{{.*}}Modules{{.*}}Inputs", isysroot: "/tmp/..")
+@import DebugObjC;
+// CHECK: !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: ![[CU]],
+// CHECK-SAME:              entity: ![[MODULE:.*]], line: 5)
+// CHECK: ![[MODULE]] = !DIModule(scope: null, name: "DebugObjC",
+// CHECK-SAME:  configMacros: "\22-DGREETING=Hello World\22 \22-UNDEBUG\22",
+// CHECK-SAME:  includePath: "{{.*}}test{{.*}}Modules{{.*}}Inputs",
+// CHECK-SAME:  isysroot: "/tmp/..")
+
+
+// RUN: %clang_cc1 -debug-info-kind=limited -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
+// RUN:   %s -I %S/Inputs -isysroot /tmp/.. -I %t -emit-llvm -o - \
+// RUN:     | FileCheck %s --check-prefix=NO-SKEL-CHECK
+// NO-SKEL-CHECK: distinct !DICompileUnit
+// NO-SKEL-CHECK-NOT: distinct !DICompileUnit
+
+// RUN: %clang_cc1 -debug-info-kind=limited -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
+// RUN:   -fmodule-format=obj -dwarf-ext-refs \
+// RUN:   %s -I %S/Inputs -isysroot /tmp/.. -I %t -emit-llvm -o - \
+// RUN:     | FileCheck %s --check-prefix=SKEL-CHECK
+// SKEL-CHECK: distinct !DICompileUnit
+// SKEL-CHECK: distinct !DICompileUnit{{.*}}dwoId
diff --git a/test/Modules/decldef.m b/test/Modules/decldef.m
index 420c6774ecba..784743ff6e79 100644
--- a/test/Modules/decldef.m
+++ b/test/Modules/decldef.m
@@ -11,7 +11,13 @@ Def *def;
 #ifdef USE_EARLY
 A *a1; // expected-error{{declaration of 'A' must be imported from module 'decldef.Def' before it is required}}
 #endif
-B *b1; // expected-error{{must use 'struct' tag to refer to type 'B'}}
+B *b1;
+#ifdef USE_EARLY
+// expected-error@-2{{must use 'struct' tag to refer to type 'B'}}
+#else
+// expected-error@-4{{declaration of 'B' must be imported from module 'decldef.Decl' before it is required}}
+// expected-note@Inputs/decl.h:2 {{previous}}
+#endif
 @import decldef.Decl;
 
 A *a2;
diff --git a/test/Modules/dependency-gen-pch.m b/test/Modules/dependency-gen-pch.m
index 4da054ff7dc8..589865e71d68 100644
--- a/test/Modules/dependency-gen-pch.m
+++ b/test/Modules/dependency-gen-pch.m
@@ -6,8 +6,8 @@
 // RUN: FileCheck %s < %t.d
 // CHECK: dependency-gen-pch.m.o
 // CHECK-NEXT: dependency-gen-pch.m
+// CHECK-NEXT: Inputs{{.}}module.map
 // CHECK-NEXT: diamond_top.pcm
 // CHECK-NEXT: Inputs{{.}}diamond_top.h
-// CHECK-NEXT: Inputs{{.}}module.map
 
 #import "diamond_top.h"
diff --git a/test/Modules/dependency-gen.m b/test/Modules/dependency-gen.m
index 60a7192ed5a1..cb0a87595605 100644
--- a/test/Modules/dependency-gen.m
+++ b/test/Modules/dependency-gen.m
@@ -4,8 +4,8 @@
 // RUN: %clang_cc1 -x objective-c -isystem %S/Inputs/System/usr/include -dependency-file %t.d.1 -MT %s.o -I %S/Inputs -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t-mcp %s
 // RUN: FileCheck %s < %t.d.1
 // CHECK: dependency-gen.m
-// CHECK: Inputs{{.}}diamond_top.h
 // CHECK: Inputs{{.}}module.map
+// CHECK: Inputs{{.}}diamond_top.h
 // CHECK-NOT: usr{{.}}include{{.}}module.map
 // CHECK-NOT: stdint.h
 
@@ -13,8 +13,8 @@
 // RUN: %clang_cc1 -x objective-c -isystem %S/Inputs/System/usr/include -dependency-file %t.d.2 -MT %s.o -I %S/Inputs -sys-header-deps -fsyntax-only -fmodules -fimplicit-module-maps -fmodules-cache-path=%t-mcp %s
 // RUN: FileCheck %s -check-prefix=CHECK-SYS < %t.d.2
 // CHECK-SYS: dependency-gen.m
-// CHECK-SYS: Inputs{{.}}diamond_top.h
 // CHECK-SYS: Inputs{{.}}module.map
+// CHECK-SYS: Inputs{{.}}diamond_top.h
 // CHECK-SYS: usr{{.}}include{{.}}module.map
 // CHECK-SYS: stdint.h
 
diff --git a/test/Modules/dependency-gen.modulemap b/test/Modules/dependency-gen.modulemap
index a71bfe993826..00a3308569c5 100644
--- a/test/Modules/dependency-gen.modulemap
+++ b/test/Modules/dependency-gen.modulemap
@@ -1,16 +1,43 @@
 // RUN: cd %S
-// RUN: rm -f %t.cpm %t-base.pcm %t-base.d %t.d
-// RUN: %clang_cc1 -I. -x c++ -fmodule-name=test-base -fmodules -emit-module -fno-validate-pch -fmodules-strict-decluse Inputs/dependency-gen-base.modulemap -dependency-file %t-base.d -MT %t-base.pcm -o %t-base.pcm -fmodule-map-file-home-is-cwd
-// RUN: %clang_cc1 -I. -x c++ -fmodule-name=test -fmodules -emit-module -fno-validate-pch -fmodules-strict-decluse -fmodule-file=%t-base.pcm %s -dependency-file %t.d -MT %t.pcm -o %t.pcm -fmodule-map-file-home-is-cwd
-// RUN: FileCheck %s < %t.d
+// RUN: rm -rf %t
+//
+// RUN: %clang_cc1 -I. -x c++ -fmodule-name=test -fmodules -emit-module -fno-validate-pch -fmodules-strict-decluse %s -dependency-file - -MT implicit.pcm -o %t/implicit.pcm -fmodules-cache-path=%t -fmodule-map-file-home-is-cwd -fmodule-map-file=%S/Inputs/dependency-gen-base.modulemap | FileCheck %s --check-prefix=IMPLICIT
+//
+// RUN: %clang_cc1 -I. -x c++ -fmodule-name=test-base -fmodules -emit-module -fno-validate-pch -fmodules-strict-decluse Inputs/dependency-gen-base.modulemap -o %t/base.pcm -fmodule-map-file-home-is-cwd -fmodule-map-file=%S/Inputs/dependency-gen-base.modulemap
+// RUN: %clang_cc1 -I. -x c++ -fmodule-name=test -fmodules -emit-module -fno-validate-pch -fmodules-strict-decluse -fmodule-file=%t/base.pcm %s -dependency-file - -MT explicit.pcm -o %t/explicit.pcm -fmodules-cache-path=%t -fmodule-map-file-home-is-cwd | FileCheck %s --check-prefix=EXPLICIT
+//
+// RUN: %clang_cc1 -I. -x c++ -fmodules -include Inputs/dependency-gen.h -x c++ /dev/null -fmodule-file=%t/explicit.pcm -MT main.o -fsyntax-only -dependency-file - | FileCheck %s --check-prefix=EXPLICIT-USE
 module "test" {
   export *
   header "Inputs/dependency-gen.h"
   use "test-base"
   use "test-base2"
 }
-extern module "test-base2" "Inputs/dependency-gen-base2.modulemap"
-extern module "test-base" "Inputs/dependency-gen-base.modulemap"
 
-// CHECK: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-included2.h
-// CHECK: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-base.modulemap
+// For implicit use of a module via the module cache, the input files
+// referenced by the .pcm are also dependencies of this build.
+//
+// IMPLICIT-DAG: {{[/\\]}}dependency-gen.modulemap
+// IMPLICIT-DAG: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-base.modulemap
+// IMPLICIT-DAG: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-base2.modulemap
+// IMPLICIT-DAG: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen.h
+// IMPLICIT-DAG: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-included.h
+// IMPLICIT-DAG: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen-included2.h
+
+// For an explicit use of a module via -fmodule-file=, the other module maps
+// and included headers are not dependencies of this target (they are instead
+// dependencies of the explicitly-specified .pcm input).
+//
+// EXPLICIT: {{^}}explicit.pcm:
+// EXPLICIT-NOT: dependency-gen-
+// EXPLICIT: {{.*[/\\]}}dependency-gen.modulemap
+// EXPLICIT-NOT: dependency-gen-
+// EXPLICIT: base.pcm
+// EXPLICIT-NOT: dependency-gen-
+// EXPLICIT: {{ |\.[/\\]}}Inputs{{[/\\]}}dependency-gen.h
+// EXPLICIT-NOT: dependency-gen-
+
+// EXPLICIT-USE: main.o:
+// EXPLICIT-USE-NOT: base.pcm
+// EXPLICIT-USE: explicit.pcm
+// EXPLICIT-USE-NOT: base.pcm
diff --git a/test/Modules/elaborated-type-specifier-from-hidden-module.m b/test/Modules/elaborated-type-specifier-from-hidden-module.m
new file mode 100644
index 000000000000..0ca1c24bba00
--- /dev/null
+++ b/test/Modules/elaborated-type-specifier-from-hidden-module.m
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+@import ElaboratedTypeStructs.Empty; // The structs are now hidden.
+struct S1 *x;
+struct S2 *y;
+// FIXME: compatible definition should not be an error.
+struct S2 { int x; }; // expected-error {{redefinition}}
+struct S3 *z;
+// Incompatible definition.
+struct S3 { float y; }; // expected-error {{redefinition}}
+// expected-note@elaborated-type-structs.h:* 2 {{previous definition is here}}
+
+@import ElaboratedTypeStructs.Structs;
+
+void useS1(struct S1 *x);
+void useS2(struct S2 *x);
+void useS2(struct S2 *x);
diff --git a/test/Modules/embed-files.cpp b/test/Modules/embed-files.cpp
new file mode 100644
index 000000000000..a1db21852d05
--- /dev/null
+++ b/test/Modules/embed-files.cpp
@@ -0,0 +1,15 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo 'module a { header "a.h" } module b { header "b.h" }' > %t/modulemap
+// RUN: echo 'extern int t;' > %t/t.h
+// RUN: echo '#include "t.h"' > %t/a.h
+// RUN: echo '#include "t.h"' > %t/b.h
+
+// RUN: %clang_cc1 -fmodules -I%t -fmodules-cache-path=%t -fmodule-map-file=%t/modulemap -fmodules-embed-all-files %s -verify
+#include "a.h"
+char t; // expected-error {{different type}}
+// expected-note@t.h:1 {{here}}
+#include "t.h"
+#include "b.h"
+char t; // expected-error {{different type}}
+// expected-note@t.h:1 {{here}}
diff --git a/test/Modules/empty.modulemap b/test/Modules/empty.modulemap
index 6350e2f63eb6..0d1718409d55 100644
--- a/test/Modules/empty.modulemap
+++ b/test/Modules/empty.modulemap
@@ -10,6 +10,12 @@
 // RUN:   -emit-module -fmodule-name=empty -o %t/check.pcm \
 // RUN:   %s
 //
+// The module file should be identical each time we produce it.
 // RUN: diff %t/base.pcm %t/check.pcm
+//
+// We expect an empty module to be less than 30KB (and at least 10K, for now).
+// REQUIRES: shell
+// RUN: wc -c %t/base.pcm | FileCheck --check-prefix=CHECK-SIZE %s
+// CHECK-SIZE: {{(^|[^0-9])[12][0-9][0-9][0-9][0-9]($|[^0-9])}}
 
 module empty { header "Inputs/empty.h" export * }
diff --git a/test/Modules/explicit-build-extra-files.cpp b/test/Modules/explicit-build-extra-files.cpp
new file mode 100644
index 000000000000..6cec420832d6
--- /dev/null
+++ b/test/Modules/explicit-build-extra-files.cpp
@@ -0,0 +1,14 @@
+// REQUIRES: shell
+//
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: cd %t
+// RUN: echo 'module X {}' > %t/x
+// RUN: echo 'module Y {}' > %t/y
+//
+// RUN: %clang_cc1 -emit-module -fmodules -fmodule-name=X %t/x -x c++ -o %t/x.pcm
+// RUN: %clang_cc1 -emit-module -fmodules -fmodule-name=Y %t/y -x c++ -o %t/y.pcm
+// RUN: %clang_cc1 -fmodules -fmodule-file=%t/x.pcm -fmodule-file=%t/y.pcm -x c++ /dev/null -fsyntax-only
+//
+// RUN: not test -f %t/modules.timestamp
+// RUN: not test -f %t/modules.idx
diff --git a/test/Modules/explicit-build-missing-files.cpp b/test/Modules/explicit-build-missing-files.cpp
new file mode 100644
index 000000000000..1ee65d9c5e0f
--- /dev/null
+++ b/test/Modules/explicit-build-missing-files.cpp
@@ -0,0 +1,56 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo 'extern int a; template<typename T> int a2 = T::error;' > %t/a.h
+// RUN: echo 'extern int b;' > %t/b.h
+// RUN: echo 'extern int c = 0;' > %t/c.h
+// RUN: echo 'module a { header "a.h" header "b.h" header "c.h" }' > %t/modulemap
+// RUN: echo 'module other {}' > %t/other.modulemap
+
+// We lazily check that the files referenced by an explicitly-specified .pcm
+// file exist. Test this by removing files and ensuring that the compilation
+// still succeeds.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module -fmodule-name=a -x c++ %t/modulemap -o %t/a.pcm \
+// RUN:            -fmodule-map-file=%t/other.modulemap \
+// RUN:            -fmodules-embed-file=%t/modulemap -fmodules-embed-file=%t/other.modulemap
+// RUN: %clang_cc1 -fmodules -I %t -emit-module -fmodule-name=a -x c++ %t/modulemap -o %t/b.pcm \
+// RUN:            -fmodule-map-file=%t/other.modulemap \
+// RUN:            -fmodules-embed-all-files
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s
+// RUN: not %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -DERRORS 2>&1 | FileCheck %s
+// RUN: rm %t/modulemap
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s
+// RUN: not %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -DERRORS 2>&1 | FileCheck %s
+// RUN: rm %t/other.modulemap
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s
+// RUN: not %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -DERRORS 2>&1 | FileCheck %s
+// RUN: sleep 1
+// RUN: touch %t/a.h
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s
+// RUN: not %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -DERRORS 2>&1 | FileCheck %s
+// RUN: rm %t/b.h
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/b.pcm %s
+// RUN: not %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -DERRORS 2>&1 | FileCheck %s --check-prefix=MISSING-B
+// RUN: rm %t/a.h
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/a.pcm %s -verify
+// RUN: %clang_cc1 -fmodules -I %t -fmodule-file=%t/b.pcm %s -verify
+
+// Oftentimes on Windows there are open handles, and deletion will fail.
+// REQUIRES: can-remove-opened-file
+
+#include "a.h" // expected-error {{file not found}}
+int x = b;
+
+#ifdef ERRORS
+int y = a2<int>;
+// CHECK: In module 'a':
+// CHECK-NEXT: a.h:1:45: error:
+
+// MISSING-B: could not find file '{{.*}}b.h'
+// MISSING-B-NOT: please delete the module cache
+#endif
+
+// RUN: not %clang_cc1 -fmodules -I %t -emit-module -fmodule-name=a -x c++ /dev/null -o %t/c.pcm \
+// RUN:                -fmodules-embed-file=%t/does-not-exist 2>&1 | FileCheck %s --check-prefix=MISSING-EMBED
+// MISSING-EMBED: fatal error: file '{{.*}}does-not-exist' specified by '-fmodules-embed-file=' not found
diff --git a/test/Modules/explicit-build-overlap.cpp b/test/Modules/explicit-build-overlap.cpp
new file mode 100644
index 000000000000..1966cce170ef
--- /dev/null
+++ b/test/Modules/explicit-build-overlap.cpp
@@ -0,0 +1,14 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x c++ -fmodules                   %S/Inputs/explicit-build-overlap/def.map -fmodule-name=a -emit-module -o %t/a.pcm
+// RUN: %clang_cc1 -x c++ -fmodules                   %S/Inputs/explicit-build-overlap/def.map -fmodule-name=b -emit-module -o %t/ba.pcm -fmodule-file=%t/a.pcm
+// RUN: %clang_cc1 -x c++ -fmodules -fmodule-map-file=%S/Inputs/explicit-build-overlap/use.map -fmodule-name=use -fmodule-file=%t/ba.pcm %s -verify -I%S/Inputs/explicit-build-overlap -fmodules-decluse
+//
+// RUN: %clang_cc1 -x c++ -fmodules                   %S/Inputs/explicit-build-overlap/def.map -fmodule-name=b -emit-module -o %t/b.pcm
+// RUN: %clang_cc1 -x c++ -fmodules                   %S/Inputs/explicit-build-overlap/def.map -fmodule-name=a -emit-module -o %t/ab.pcm -fmodule-file=%t/b.pcm
+// RUN: %clang_cc1 -x c++ -fmodules -fmodule-map-file=%S/Inputs/explicit-build-overlap/use.map -fmodule-name=use -fmodule-file=%t/ab.pcm %s -verify -I%S/Inputs/explicit-build-overlap -fmodules-decluse
+
+// expected-no-diagnostics
+#include "a.h"
+
+A a;
+B b;
diff --git a/test/Modules/explicit-build.cpp b/test/Modules/explicit-build.cpp
index 01aa5dc68bf8..2a5b70dce6cf 100644
--- a/test/Modules/explicit-build.cpp
+++ b/test/Modules/explicit-build.cpp
@@ -138,7 +138,7 @@
 // RUN:            -fmodule-map-file=%S/Inputs/explicit-build/module.modulemap \
 // RUN:            %s 2>&1 | FileCheck --check-prefix=CHECK-MULTIPLE-AS %s
 //
-// CHECK-MULTIPLE-AS: error: module 'a' is defined in both '{{.*}}/a{{.*}}.pcm' and '{{.*[/\\]}}a{{.*}}.pcm'
+// CHECK-MULTIPLE-AS: error: module 'a' is defined in both '{{.*[/\\]}}a{{.*}}.pcm' and '{{.*[/\\]}}a{{.*}}.pcm'
 
 // -------------------------------
 // Try to import a PCH with -fmodule-file=
@@ -179,6 +179,7 @@
 // CHECK-NO-FILE-INDIRECT:      error: module file '{{.*}}a.pcm' not found
 // CHECK-NO-FILE-INDIRECT-NEXT: note: imported by module 'b' in '{{.*}}b.pcm'
 // CHECK-NO-FILE-INDIRECT-NEXT: note: imported by module 'c' in '{{.*}}c.pcm'
+// CHECK-NO-FILE-INDIRECT-NOT:  note:
 
 // -------------------------------
 // Check that we don't get upset if B's timestamp is newer than C's.
@@ -198,4 +199,6 @@
 // RUN:            -fmodule-file=%t/c.pcm \
 // RUN:            %s -DHAVE_A -DHAVE_B -DHAVE_C 2>&1 | FileCheck --check-prefix=CHECK-MISMATCHED-B %s
 //
-// CHECK-MISMATCHED-B: fatal error: malformed or corrupted AST file: {{.*}}b.pcm": module file out of date
+// CHECK-MISMATCHED-B:      fatal error: module file '{{.*}}b.pcm' is out of date and needs to be rebuilt
+// CHECK-MISMATCHED-B-NEXT: note: imported by module 'c'
+// CHECK-MISMATCHED-B-NOT:  note:
diff --git a/test/Modules/extensions.c b/test/Modules/extensions.c
new file mode 100644
index 000000000000..1858f02e681a
--- /dev/null
+++ b/test/Modules/extensions.c
@@ -0,0 +1,44 @@
+// Test creation of modules that include extension blocks.
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -ftest-module-file-extension=clang.testA:1:5:0:user_info_for_A -ftest-module-file-extension=clang.testB:2:3:0:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s
+
+// Make sure the extension blocks are actually there.
+// RUN: llvm-bcanalyzer %t/ExtensionTestA.pcm | FileCheck -check-prefix=CHECK-BCANALYZER %s
+// RUN: %clang_cc1 -module-file-info %t/ExtensionTestA.pcm | FileCheck -check-prefix=CHECK-INFO %s
+
+// Make sure that the readers are able to check the metadata.
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -ftest-module-file-extension=clang.testA:1:5:0:user_info_for_A -ftest-module-file-extension=clang.testB:2:3:0:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -ftest-module-file-extension=clang.testA:1:3:0:user_info_for_A -ftest-module-file-extension=clang.testB:3:2:0:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+// Make sure that extension blocks can be part of the module hash.
+// We test this in an obscure way, by making sure we don't get conflicts when
+// using different "versions" of the extensions. Above, the "-verify" test
+// checks that such conflicts produce errors.
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -ftest-module-file-extension=clang.testA:1:5:1:user_info_for_A -ftest-module-file-extension=clang.testB:2:3:1:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -ftest-module-file-extension=clang.testA:1:3:1:user_info_for_A -ftest-module-file-extension=clang.testB:3:2:1:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -ftest-module-file-extension=clang.testA:2:5:0:user_info_for_A -ftest-module-file-extension=clang.testB:7:3:0:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s
+
+// Make sure we can read the message back.
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -ftest-module-file-extension=clang.testA:1:5:0:user_info_for_A -ftest-module-file-extension=clang.testB:2:3:0:user_info_for_B -fmodules-cache-path=%t -I %S/Inputs %s > %t.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-MESSAGE %s < %t.log
+
+// Make sure we diagnose duplicate module file extensions.
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fdisable-module-hash -ftest-module-file-extension=clang.testA:1:5:0:user_info_for_A -ftest-module-file-extension=clang.testA:1:5:0:user_info_for_A -fmodules-cache-path=%t -I %S/Inputs %s > %t.log 2>&1
+// RUN: FileCheck -check-prefix=CHECK-DUPLICATE %s < %t.log
+
+#include "ExtensionTestA.h"
+// expected-error@-1{{test module file extension 'clang.testA' has different version (1.5) than expected (1.3)}}
+// expected-error@-2{{test module file extension 'clang.testB' has different version (2.3) than expected (3.2)}}
+
+// CHECK-BCANALYZER: {{Block ID.*EXTENSION_BLOCK}}
+// CHECK-BCANALYZER: {{100.00.*EXTENSION_METADATA}}
+
+// CHECK-INFO: Module file extension 'clang.testA' 1.5: user_info_for_A
+// CHECK-INFO: Module file extension 'clang.testB' 2.3: user_info_for_B
+
+// CHECK-MESSAGE: Read extension block message: Hello from clang.testA v1.5
+// CHECK-MESSAGE: Read extension block message: Hello from clang.testB v2.3
+
+// CHECK-DUPLICATE: warning: duplicate module file extension block name 'clang.testA'
diff --git a/test/Modules/extern_c.cpp b/test/Modules/extern_c.cpp
index f505c11f7c9d..c072b7e75ab8 100644
--- a/test/Modules/extern_c.cpp
+++ b/test/Modules/extern_c.cpp
@@ -9,6 +9,8 @@
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -DCXX_HEADER -DEXTERN_CXX
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -DCXX_HEADER -DEXTERN_C -DEXTERN_CXX
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -DCXX_HEADER -DEXTERN_C -DNAMESPACE
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -DCXX_HEADER -DEXTERN_C -DNO_EXTERN_C_ERROR -Wno-module-import-in-extern-c
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs %s -DCXX_HEADER -DEXTERN_C -DNAMESPACE -DNO_EXTERN_C_ERROR -Wno-module-import-in-extern-c
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs -x c %s
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -I %S/Inputs/elsewhere -I %S/Inputs %s -DEXTERN_C -DINDIRECT
 
@@ -36,12 +38,12 @@ extern "C++" {
 
 #include HEADER
 
-#if defined(EXTERN_C) && !defined(EXTERN_CXX) && defined(CXX_HEADER)
-// expected-error@-3 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}}
-// expected-note@-17 {{extern "C" language linkage specification begins here}}
-#elif defined(NAMESPACE)
-// expected-error-re@-6 {{import of module '{{c_library.inner|cxx_library}}' appears within namespace 'M'}}
-// expected-note@-24 {{namespace 'M' begins here}}
+#if defined(NAMESPACE)
+// expected-error-re@-3 {{import of module '{{c_library.inner|cxx_library}}' appears within namespace 'M'}}
+// expected-note@-21 {{namespace 'M' begins here}}
+#elif defined(EXTERN_C) && !defined(EXTERN_CXX) && defined(CXX_HEADER) && !defined(NO_EXTERN_C_ERROR)
+// expected-error@-6 {{import of C++ module 'cxx_library' appears within extern "C" language linkage specification}}
+// expected-note@-20 {{extern "C" language linkage specification begins here}}
 #endif
 
 #ifdef EXTERN_CXX
@@ -68,7 +70,7 @@ namespace N {
   extern "C" {
 #endif
     int f;
-#if !defined(CXX_HEADER)
+#if !defined(CXX_HEADER) && !defined(NAMESPACE)
     // expected-error@-2 {{redefinition of 'f' as different kind of symbol}}
     // expected-note@c-header.h:1 {{previous}}
 #endif
@@ -78,4 +80,6 @@ namespace N {
 }
 #endif
 
+#if !defined(NAMESPACE)
 suppress_expected_no_diagnostics_error error_here; // expected-error {{}}
+#endif
diff --git a/test/Modules/fatal-module-loader-error.m b/test/Modules/fatal-module-loader-error.m
index 99a42c2aff42..6e6131e00010 100644
--- a/test/Modules/fatal-module-loader-error.m
+++ b/test/Modules/fatal-module-loader-error.m
@@ -8,13 +8,13 @@
 
 #ifdef IMPLICIT
 
-// expected-error@+1{{does not appear to be}}
+// expected-error@+1{{Module.pcm' is not a valid precompiled module file}}
 #import <Module/Module.h>
 #pragma clang __debug crash;
 
 #else
 
-// expected-error@+1{{does not appear to be}}
+// expected-error@+1{{Module.pcm' is not a valid precompiled module file}}
 @import Module;
 #pragma clang __debug crash;
 
diff --git a/test/Modules/hidden-definition.cpp b/test/Modules/hidden-definition.cpp
new file mode 100644
index 000000000000..d06f9393458f
--- /dev/null
+++ b/test/Modules/hidden-definition.cpp
@@ -0,0 +1,16 @@
+// RUN: rm -rf %t
+// RUN: mkdir %t
+// RUN: echo 'struct X {}; struct Y : X { friend int f(Y); };' > %t/a.h
+// RUN: echo 'module a { header "a.h" }' > %t/map
+// RUN: %clang_cc1 -fmodules -x c++ -emit-module -fmodule-name=a %t/map -o %t/a.pcm
+// RUN: %clang_cc1 -fmodules -x c++ -verify -fmodule-file=%t/a.pcm %s -fno-modules-error-recovery
+
+struct X;
+struct Y;
+
+// Ensure that we can't use the definitions of X and Y, since we've not imported module a.
+Y *yp;
+X *xp = yp; // expected-error {{cannot initialize}}
+_Static_assert(!__is_convertible(Y*, X*), "");
+X &xr = *yp; // expected-error {{unrelated type}}
+int g(Y &y) { f(y); } // expected-error {{undeclared identifier 'f'}}
diff --git a/test/Modules/internal-constants.cpp b/test/Modules/internal-constants.cpp
new file mode 100644
index 000000000000..f95e95cc8bbb
--- /dev/null
+++ b/test/Modules/internal-constants.cpp
@@ -0,0 +1,12 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -fmodules-local-submodule-visibility -I%S/Inputs/internal-constants %s -verify
+
+// expected-no-diagnostics
+#include "c.h"
+
+int q = h();
+int r = N::k;
+
+#include "b.h"
+
+int s = N::k; // FIXME: This should be ambiguous if we really want internal linkage declarations to not collide.
diff --git a/test/Modules/libstdcxx-ambiguous-internal.cpp b/test/Modules/libstdcxx-ambiguous-internal.cpp
new file mode 100644
index 000000000000..784b442eab46
--- /dev/null
+++ b/test/Modules/libstdcxx-ambiguous-internal.cpp
@@ -0,0 +1,13 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules -emit-module -fmodule-name=std -fmodules-cache-path=%t %S/Inputs/libstdcxx-ambiguous-internal/module.modulemap -Werror
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules -emit-module -fmodule-name=std -fmodules-cache-path=%t %S/Inputs/libstdcxx-ambiguous-internal/module.modulemap -fmodules-local-submodule-visibility -DAMBIGUOUS 2>&1| FileCheck %s
+
+// CHECK-NOT: error
+// CHECK: warning: ambiguous use of internal linkage declaration 'f' defined in multiple modules
+// CHECK: note: declared here in module 'std.C'
+// CHECK: note: declared here in module 'std.B'
+// CHECK-NOT: error
+// CHECK: warning: ambiguous use of internal linkage declaration 'n' defined in multiple modules
+// CHECK: note: declared here in module 'std.C'
+// CHECK: note: declared here in module 'std.B'
+// CHECK-NOT: error
diff --git a/test/Modules/linkage-merge.cpp b/test/Modules/linkage-merge.cpp
index 005206afadb3..dc2ad759127e 100644
--- a/test/Modules/linkage-merge.cpp
+++ b/test/Modules/linkage-merge.cpp
@@ -7,9 +7,5 @@ static int f(int);
 int f(int);
 
 static void g(int);
-// FIXME: Whether we notice the problem here depends on the order in which we
-// happen to find lookup results for 'g'; LookupResult::resolveKind needs to
-// be taught to prefer a visible result over a non-visible one.
-//
 // expected-error@9 {{functions that differ only in their return type cannot be overloaded}}
 // expected-note@Inputs/linkage-merge-foo.h:2 {{previous declaration is here}}
diff --git a/test/Modules/macros.c b/test/Modules/macros.c
index 54dca191645e..032eea65918a 100644
--- a/test/Modules/macros.c
+++ b/test/Modules/macros.c
@@ -1,8 +1,15 @@
 // RUN: rm -rf %t
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c -verify -fmodules-cache-path=%t -I %S/Inputs %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c -verify -fmodules-cache-path=%t -I %S/Inputs %s -DALT
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c -verify -fmodules-cache-path=%t -I %S/Inputs %s -detailed-preprocessing-record
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -DLOCAL_VISIBILITY -fmodules-local-submodule-visibility -x objective-c++ -verify -fmodules-cache-path=%t -I %S/Inputs %s
 // RUN: not %clang_cc1 -E -fmodules -fimplicit-module-maps -x objective-c -fmodules-cache-path=%t -I %S/Inputs %s | FileCheck -check-prefix CHECK-PREPROCESSED %s
+// 
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -verify -fmodules-cache-path=%t -I %S/Inputs %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -verify -fmodules-cache-path=%t -I %S/Inputs %s -DALT
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x objective-c++ -verify -fmodules-cache-path=%t -I %S/Inputs %s -detailed-preprocessing-record
+// RUN: not %clang_cc1 -E -fmodules -fimplicit-module-maps -x objective-c++ -fmodules-cache-path=%t -I %S/Inputs %s | FileCheck -check-prefix CHECK-PREPROCESSED %s
+//
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -DLOCAL_VISIBILITY -fmodules-local-submodule-visibility -x objective-c++ -verify -fmodules-cache-path=%t -I %S/Inputs %s
 // FIXME: When we have a syntax for modules in C, use that.
 // These notes come from headers in modules, and are bogus.
 
@@ -11,7 +18,6 @@
 // expected-note@Inputs/macros_right.h:12{{expanding this definition of 'LEFT_RIGHT_DIFFERENT'}}
 // expected-note@Inputs/macros_right.h:13{{expanding this definition of 'LEFT_RIGHT_DIFFERENT2'}}
 // expected-note@Inputs/macros_left.h:14{{other definition of 'LEFT_RIGHT_DIFFERENT'}}
-// expected-note@Inputs/macros_left.h:11{{other definition of 'LEFT_RIGHT_DIFFERENT2'}}
 
 @import macros;
 
@@ -65,9 +71,14 @@ void f() {
 #  error TOP should not be visible
 #endif
 
+#undef INTEGER
+#define INTEGER int
+
 // Import left module (which also imports top)
 @import macros_left;
 
+INTEGER my_integer = 0;
+
 #ifndef LEFT
 #  error LEFT should be visible
 #endif
@@ -157,6 +168,10 @@ int TOP_DEF_RIGHT_UNDEF; // ok, no longer defined
 # endif
 #endif
 
+#ifdef ALT
+int tmp = TOP_OTHER_REDEF1;
+#endif
+
 @import macros_other;
 
 #ifndef TOP_OTHER_UNDEF1
@@ -166,13 +181,13 @@ int TOP_DEF_RIGHT_UNDEF; // ok, no longer defined
 #ifndef TOP_OTHER_UNDEF2
 # error TOP_OTHER_UNDEF2 should still be defined
 #endif
-
+#pragma clang __debug macro TOP_OTHER_REDEF1
 #ifndef TOP_OTHER_REDEF1
 # error TOP_OTHER_REDEF1 should still be defined
 #endif
 int n1 = TOP_OTHER_REDEF1; // expected-warning{{ambiguous expansion of macro 'TOP_OTHER_REDEF1'}}
-// expected-note@macros_top.h:19 {{expanding this definition}}
-// expected-note@macros_other.h:4 {{other definition}}
+// expected-note@macros_other.h:4 {{expanding this definition}}
+// expected-note@macros_top.h:19 {{other definition}}
 
 #ifndef TOP_OTHER_REDEF2
 # error TOP_OTHER_REDEF2 should still be defined
diff --git a/test/Modules/malformed.cpp b/test/Modules/malformed.cpp
index 7d5bcc8c9d66..040361c9eeb8 100644
--- a/test/Modules/malformed.cpp
+++ b/test/Modules/malformed.cpp
@@ -12,20 +12,14 @@
 #include STR(HEADER)
 
 // CHECK-A: While building module 'malformed_a'
-// CHECK-A: {{^}}Inputs/malformed/a1.h:1:{{.*}} error: expected '}'
+// CHECK-A: {{^}}Inputs/malformed/a1.h:1:{{.*}} error: expected '}' at end of module
 // CHECK-A: {{^}}Inputs/malformed/a1.h:1:{{.*}} note: to match this '{'
 //
 // CHECK-A: While building module 'malformed_a'
 // CHECK-A: {{^}}Inputs/malformed/a2.h:1:{{.*}} error: extraneous closing brace
 
 // CHECK-B: While building module 'malformed_b'
-// CHECK-B: {{^}}Inputs/malformed/b1.h:2:{{.*}} error: expected '}'
-// CHECK-B: {{^}}Inputs/malformed/b1.h:1:{{.*}} note: to match this '{'
-// CHECK-B: {{^}}Inputs/malformed/b1.h:3:{{.*}} error: extraneous closing brace ('}')
-//
-// CHECK-B: While building module 'malformed_b'
-// CHECK-B: {{^}}Inputs/malformed/b2.h:1:{{.*}} error: redefinition of 'g'
-// CHECK-B: {{^}}Inputs/malformed/b2.h:1:{{.*}} note: previous definition is here
+// CHECK-B: {{^}}Inputs/malformed/b1.h:2:{{.*}} error: import of module 'malformed_b.b2' appears within 'S'
 
 void test() { f<int>(); }
 // Test that we use relative paths to name files within an imported module.
diff --git a/test/Modules/merge-enumerators.cpp b/test/Modules/merge-enumerators.cpp
index 34a4ec9523fa..10e1914bd7b8 100644
--- a/test/Modules/merge-enumerators.cpp
+++ b/test/Modules/merge-enumerators.cpp
@@ -4,8 +4,25 @@
 // RUN: echo '#include "a.h"' > %t/b.h
 // RUN: touch %t/x.h
 // RUN: echo 'module B { module b { header "b.h" } module x { header "x.h" } }' > %t/b.modulemap
-// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -x c++ -fmodule-map-file=%t/b.modulemap %s -I%t -verify
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -x c++ -fmodule-map-file=%t/b.modulemap %s -I%t -verify -fmodules-local-submodule-visibility
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -x c++ -fmodule-map-file=%t/b.modulemap %s -I%t -verify -fmodules-local-submodule-visibility -DMERGE_LATE
+
+#ifndef MERGE_LATE
 // expected-no-diagnostics
 #include "a.h"
+#endif
+
 #include "x.h"
+
+#ifdef MERGE_LATE
+namespace N {
+  enum { A } a; // expected-note {{candidate}}
+  // expected-note@a.h:1 {{candidate}} (from module B.b)
+}
+#include "a.h"
+#endif
+
 N::E e = N::A;
+#ifdef MERGE_LATE
+// expected-error@-2 {{ambiguous}}
+#endif
diff --git a/test/Modules/merge-target-features.cpp b/test/Modules/merge-target-features.cpp
index 938715dd6b11..9ca0539e6a22 100644
--- a/test/Modules/merge-target-features.cpp
+++ b/test/Modules/merge-target-features.cpp
@@ -20,7 +20,9 @@
 // RUN:   -target-cpu i386 \
 // RUN:   -fsyntax-only merge-target-features.cpp 2>&1 \
 // RUN:   | FileCheck --check-prefix=SUBSET %s
-// SUBSET: AST file was compiled with the target feature'+sse2' but the current translation unit is not
+// SUBSET-NOT: error:
+// SUBSET: error: {{.*}} configuration mismatch
+// SUBSET-NOT: error:
 //
 // RUN: %clang_cc1 -fmodules -x c++ -fmodules-cache-path=%t \
 // RUN:   -iquote Inputs/merge-target-features \
@@ -56,8 +58,9 @@
 // RUN:   -target-cpu i386 -target-feature +cx16 \
 // RUN:   -fsyntax-only merge-target-features.cpp 2>&1 \
 // RUN:   | FileCheck --check-prefix=MISMATCH %s
-// MISMATCH: AST file was compiled with the target feature'+sse2' but the current translation unit is not
-// MISMATCH: current translation unit was compiled with the target feature'+cx16' but the AST file was not
+// MISMATCH-NOT: error:
+// MISMATCH: error: {{.*}} configuration mismatch
+// MISMATCH-NOT: error:
 
 #include "foo.h"
 
diff --git a/test/Modules/merge-using-decls.cpp b/test/Modules/merge-using-decls.cpp
index 789f75b57400..98989d12f985 100644
--- a/test/Modules/merge-using-decls.cpp
+++ b/test/Modules/merge-using-decls.cpp
@@ -31,6 +31,8 @@ template int UseAll<YA>();
 template int UseAll<YB>();
 template int UseAll<Y>();
 
+// Which of these two sets of diagnostics is chosen is not important. It's OK
+// if this varies with ORDER, but it must be consistent across runs.
 #if ORDER == 1
 // Here, we're instantiating the definition from 'A' and merging the definition
 // from 'B' into it.
diff --git a/test/Modules/misplaced-1.cpp b/test/Modules/misplaced-1.cpp
new file mode 100644
index 000000000000..d67ad194a922
--- /dev/null
+++ b/test/Modules/misplaced-1.cpp
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+namespace N1 {  // expected-note{{namespace 'N1' begins here}}
+#include "dummy.h"  // expected-error{{import of module 'dummy' appears within namespace 'N1'}}
+}
diff --git a/test/Modules/misplaced-2.cpp b/test/Modules/misplaced-2.cpp
new file mode 100644
index 000000000000..da460993868c
--- /dev/null
+++ b/test/Modules/misplaced-2.cpp
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+void func1() {  // expected-note{{function 'func1' begins here}}
+#include "dummy.h"  // expected-error{{import of module 'dummy' appears within function 'func1'}}
+}
diff --git a/test/Modules/misplaced-3.cpp b/test/Modules/misplaced-3.cpp
new file mode 100644
index 000000000000..6d18e13ecbf6
--- /dev/null
+++ b/test/Modules/misplaced-3.cpp
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+class C1 {  // expected-note{{'C1' begins here}}
+#include "dummy.h"  // expected-error{{import of module 'dummy' appears within 'C1'}}
+}
diff --git a/test/Modules/misplaced-4.cpp b/test/Modules/misplaced-4.cpp
new file mode 100644
index 000000000000..8f795d5c50f2
--- /dev/null
+++ b/test/Modules/misplaced-4.cpp
@@ -0,0 +1,2 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -emit-module -fmodule-name=Misplaced -fmodules-cache-path=%t -x c++ -I %S/Inputs %S/Inputs/misplaced/misplaced.modulemap -verify
diff --git a/test/Modules/misplaced-5.c b/test/Modules/misplaced-5.c
new file mode 100644
index 000000000000..ae4b3f5d8b4a
--- /dev/null
+++ b/test/Modules/misplaced-5.c
@@ -0,0 +1,6 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+struct S1 {  // expected-note{{'struct S1' begins here}}
+#include "dummy.h"  // expected-error{{import of module 'dummy' appears within 'struct S1'}}
+}
diff --git a/test/Modules/module-map-path-hash.cpp b/test/Modules/module-map-path-hash.cpp
new file mode 100644
index 000000000000..e5c9d7507c3e
--- /dev/null
+++ b/test/Modules/module-map-path-hash.cpp
@@ -0,0 +1,10 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x c++ -Rmodule-build -I%S/Inputs/module-map-path-hash -fmodules-cache-path=%t -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix=BUILD
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x c++ -Rmodule-build -I%S/Inputs//module-map-path-hash -fmodules-cache-path=%t -fsyntax-only %s 2>&1 | FileCheck -allow-empty %s --check-prefix=NOBUILD
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x c++ -Rmodule-build -I%S/Inputs/./module-map-path-hash -fmodules-cache-path=%t -fsyntax-only %s 2>&1 | FileCheck -allow-empty %s --check-prefix=NOBUILD
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -x c++ -Rmodule-build -I%S/Inputs/../Inputs/module-map-path-hash -fmodules-cache-path=%t -fsyntax-only %s 2>&1 | FileCheck -allow-empty %s --check-prefix=NOBUILD
+
+#include "a.h"
+
+// BUILD: remark: building module
+// NOBUILD-NOT: remark: building module
diff --git a/test/Modules/module-private.cpp b/test/Modules/module-private.cpp
index 6e723c863ce3..42ab185760bb 100644
--- a/test/Modules/module-private.cpp
+++ b/test/Modules/module-private.cpp
@@ -12,11 +12,7 @@ void test() {
 }
 
 int test_broken() {
-  HiddenStruct hidden; // \
-  // expected-error{{must use 'struct' tag to refer to type 'HiddenStruct' in this scope}} \
-  // expected-error{{definition of 'HiddenStruct' must be imported}}
-  // expected-note@Inputs/module_private_left.h:3 {{previous definition is here}}
-
+  HiddenStruct hidden; // expected-error{{unknown type name 'HiddenStruct'}}
   Integer i; // expected-error{{unknown type name 'Integer'}}
 
   int *ip = 0;
diff --git a/test/Modules/modules.idx b/test/Modules/modules.idx
new file mode 100644
index 0000000000000000000000000000000000000000..58010c0d2c54b8e0aa2b54b583e535a5870c13ba
GIT binary patch
literal 404
zcmZ>AcK2lDVPKG8U|`^8N@*(LTJwQxkpPh7UipA)NdVW558R6maIH7sTD{>RW6(iS
zkA)%y55=PviexB?cqoEMGZ{ziJxrQcn6#%HYV+`54bor@U(gz2&>Et_8u_C&qyQum
j12kU|XpaYwW^Qs=+~BZK1;|Vg1QH<N047HXPzV435fLT_

literal 0
HcmV?d00001

diff --git a/test/Modules/no-implicit-builds.cpp b/test/Modules/no-implicit-builds.cpp
index bfc3562dbcf4..374ed5e4181e 100644
--- a/test/Modules/no-implicit-builds.cpp
+++ b/test/Modules/no-implicit-builds.cpp
@@ -4,6 +4,11 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
 // RUN:     -fmodule-map-file=%S/Inputs/no-implicit-builds/b.modulemap \
 // RUN:     -fno-implicit-modules %s -verify
+//
+// Same thing if we're running -cc1 and no module cache path has been provided.
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules -fimplicit-module-maps \
+// RUN:     -fmodule-map-file=%S/Inputs/no-implicit-builds/b.modulemap \
+// RUN:     %s -verify
 
 // Compile the module and put it into the cache.
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t \
diff --git a/test/Modules/no-linkage.cpp b/test/Modules/no-linkage.cpp
new file mode 100644
index 000000000000..1ec8f4075e47
--- /dev/null
+++ b/test/Modules/no-linkage.cpp
@@ -0,0 +1,56 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fmodules-local-submodule-visibility -I%S/Inputs/no-linkage -fmodule-map-file=%S/Inputs/no-linkage/module.modulemap %s -verify
+
+#include "empty.h"
+
+namespace NS { int n; } // expected-note {{candidate}}
+struct Typedef { int n; }; // expected-note {{candidate}}
+int AliasDecl; // expected-note {{candidate}}
+int UsingDecl; // expected-note {{candidate}}
+namespace RealNS = NS; // expected-note {{candidate}}
+typedef int Struct; // expected-note {{candidate}}
+enum { Variable }; // expected-note {{candidate}}
+const int AnotherNS = 0; // expected-note {{candidate}}
+const int Enumerator = 0; // expected-note {{candidate}}
+static int Overloads; // expected-note {{candidate}}
+
+// expected-note@decls.h:1 {{candidate}}
+// expected-note@decls.h:2 {{candidate}}
+// expected-note@decls.h:3 {{candidate}}
+// expected-note@decls.h:4 {{candidate}}
+// expected-note@decls.h:5 {{candidate}}
+// expected-note@decls.h:6 {{candidate}}
+// expected-note@decls.h:7 {{candidate}}
+// expected-note@decls.h:8 {{candidate}}
+// expected-note@decls.h:9 {{candidate}}
+// expected-note@decls.h:10 {{candidate}}
+// expected-note@decls.h:11 {{candidate}}
+
+void use(int);
+void use_things() {
+  use(Typedef().n);
+  use(NS::n);
+  use(AliasDecl);
+  use(UsingDecl);
+  use(RealNS::n);
+  use(Struct(0));
+  use(Variable);
+  use(AnotherNS);
+  use(Enumerator);
+  use(Overloads);
+}
+
+#include "decls.h"
+
+void use_things_again() {
+  use(Typedef().n); // expected-error {{ambiguous}}
+  use(NS::n); // expected-error {{ambiguous}}
+  use(AliasDecl); // expected-error {{ambiguous}}
+  use(UsingDecl); // expected-error {{ambiguous}}
+  use(RealNS::n); // expected-error {{ambiguous}}
+  use(Struct(0)); // expected-error {{ambiguous}}
+  use(Variable); // expected-error {{ambiguous}}
+  use(AnotherNS); // expected-error {{ambiguous}}
+  use(Enumerator); // expected-error {{ambiguous}}
+  use(Overloads); // expected-error {{ambiguous}}
+}
diff --git a/test/Modules/private.modulemap b/test/Modules/private.modulemap
new file mode 100644
index 000000000000..f8b150447b04
--- /dev/null
+++ b/test/Modules/private.modulemap
@@ -0,0 +1,35 @@
+// RUN: rm -rf %t
+// RUN: cd %S
+// RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fmodules-cache-path=%t \
+// RUN:   -I%S/Inputs/private3 -emit-module -fmodule-name=A -o %t/m.pcm %s
+// RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fmodules-cache-path=%t \
+// RUN:   -I%S/Inputs/private3 -emit-module -fmodule-name=B -o %t/m.pcm %s
+// RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fmodules-cache-path=%t \
+// RUN:   -I%S/Inputs/private3 -emit-module -fmodule-name=C -o %t/m.pcm %s
+// RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fmodules-cache-path=%t \
+// RUN:   -I%S/Inputs/private3 -emit-module -fmodule-name=D -o %t/m.pcm %s
+
+module A {
+  header "Inputs/private3/public.h"
+  private header "Inputs/private3/private.h"
+}
+module B {
+  header "Inputs/private3/public.h"
+  module "private.h" {
+    private header "Inputs/private3/private.h"
+  }
+}
+module C {
+  module "public.h" {
+    header "Inputs/private3/public.h"
+  }
+  private header "Inputs/private3/private.h"
+}
+module D {
+  module "public.h" {
+    header "Inputs/private3/public.h"
+  }
+  module "private.h" {
+    private header "Inputs/private3/private.h"
+  }
+}
diff --git a/test/Modules/relative-dep-gen.cpp b/test/Modules/relative-dep-gen.cpp
index 86c465147742..e58dd6eb3596 100644
--- a/test/Modules/relative-dep-gen.cpp
+++ b/test/Modules/relative-dep-gen.cpp
@@ -17,8 +17,22 @@
 // RUN: FileCheck --check-prefix=CHECK-BUILD %s < %t/build-cwd.d
 // RUN: FileCheck --check-prefix=CHECK-USE %s < %t/use-explicit-cwd.d
 // RUN: FileCheck --check-prefix=CHECK-USE %s < %t/use-implicit-cwd.d
+//
+// Check that the .d file is still correct after relocating the module.
+// RUN: mkdir %t/Inputs
+// RUN: cp %S/Inputs/relative-dep-gen-1.h %t/Inputs
+// RUN: cp %s %t
+// RUN: cd %t
+// RUN: %clang_cc1 -cc1 -fno-implicit-modules -fmodule-file=%t/mod-cwd.pcm -dependency-file %t/use-explicit-no-map-cwd.d -MT use.o relative-dep-gen.cpp -fsyntax-only -fmodule-map-file-home-is-cwd
+// RUN: cat %t/use-explicit-no-map-cwd.d
+// RUN: FileCheck --check-prefix=CHECK-USE %s < %t/use-explicit-no-map-cwd.d
 
 #include "Inputs/relative-dep-gen-1.h"
 
-// CHECK-BUILD: mod.pcm: Inputs/relative-dep-gen-1.h Inputs/relative-dep-gen-2.h
-// CHECK-USE: use.o: relative-dep-gen.cpp Inputs/relative-dep-gen-1.h
+// CHECK-BUILD: mod.pcm:
+// CHECK-BUILD:   {{[ \t]}}Inputs/relative-dep-gen{{(-cwd)?}}.modulemap
+// CHECK-BUILD:   {{[ \t]}}Inputs/relative-dep-gen-1.h
+// CHECK-BUILD:   {{[ \t]}}Inputs/relative-dep-gen-2.h
+// CHECK-USE: use.o:
+// CHECK-USE-DAG:   {{[ \t]}}relative-dep-gen.cpp
+// CHECK-USE-DAG:   {{[ \t]}}Inputs{{[/\\]}}relative-dep-gen-1.h
diff --git a/test/Modules/stress1.cpp b/test/Modules/stress1.cpp
index 4f3c34ab640b..4da1edd092e3 100644
--- a/test/Modules/stress1.cpp
+++ b/test/Modules/stress1.cpp
@@ -4,6 +4,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m00 -o %t/m00.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -11,6 +12,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m00 -o %t/m00_check.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -20,6 +22,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fdelayed-template-parsing \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m01 -o %t/m01.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -27,6 +30,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 -fdelayed-template-parsing \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m01 -o %t/m01_check.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -36,6 +40,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m02 -o %t/m02.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -43,6 +48,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -emit-module -fmodule-name=m03 -o %t/m03.pcm \
 // RUN:   Inputs/stress1/module.modulemap
@@ -50,6 +56,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -fmodule-file=%t/m00.pcm \
 // RUN:   -fmodule-file=%t/m01.pcm \
@@ -61,6 +68,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -fmodule-file=%t/m00.pcm \
 // RUN:   -fmodule-file=%t/m01.pcm \
@@ -74,6 +82,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -fmodule-map-file=Inputs/stress1/module.modulemap \
 // RUN:   -fmodule-file=%t/m00.pcm \
@@ -86,6 +95,7 @@
 // RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
 // RUN:   -I Inputs/stress1 \
 // RUN:   -fno-implicit-modules \
+// RUN:   -fmodules-cache-path=%t \
 // RUN:   -fmodule-map-file-home-is-cwd \
 // RUN:   -fmodule-map-file=Inputs/stress1/module.modulemap \
 // RUN:   -fmodule-file=%t/m00.pcm \
@@ -97,6 +107,18 @@
 //
 // RUN: diff -u %t/stress1.ll %t/stress1_check.ll
 //
+// RUN: %clang_cc1 -fmodules -x c++ -std=c++11 \
+// RUN:   -I Inputs/stress1 \
+// RUN:   -fmodules-cache-path=%t \
+// RUN:   -fmodule-map-file-home-is-cwd \
+// RUN:   -fmodule-file=%t/m00.pcm \
+// RUN:   -fmodule-file=%t/m01.pcm \
+// RUN:   -fmodule-file=%t/m02.pcm \
+// RUN:   -fmodule-file=%t/m03.pcm \
+// RUN:   -emit-module -fmodule-name=merge00 -o /dev/null \
+// RUN:   -DMERGE_NO_REEXPORT \
+// RUN:   Inputs/stress1/module.modulemap
+//
 // expected-no-diagnostics
 
 #include "m00.h"
diff --git a/test/Modules/submodule-visibility-cycles.cpp b/test/Modules/submodule-visibility-cycles.cpp
index f26f6f21eea6..a01fe562b14f 100644
--- a/test/Modules/submodule-visibility-cycles.cpp
+++ b/test/Modules/submodule-visibility-cycles.cpp
@@ -3,7 +3,7 @@
 
 #include "cycle1.h"
 C1 c1;
-C2 c2; // expected-error {{must be imported}} expected-error {{}}
+C2 c2; // expected-error {{must be imported}}
 // expected-note@cycle2.h:6 {{here}}
 
 #include "cycle2.h"
diff --git a/test/Modules/submodule-visibility.cpp b/test/Modules/submodule-visibility.cpp
index b2c5fc7ba198..345ae155bb32 100644
--- a/test/Modules/submodule-visibility.cpp
+++ b/test/Modules/submodule-visibility.cpp
@@ -28,3 +28,10 @@ int k = n + m; // OK, a and b are visible here.
 #ifndef B
 #error B is not defined
 #endif
+
+// Ensure we don't compute the linkage of this struct before we find it has a
+// typedef name for linkage purposes.
+typedef struct {
+  int p;                 
+  void (*f)(int p);                                                                       
+} name_for_linkage;
diff --git a/test/Modules/submodules-merge-defs.cpp b/test/Modules/submodules-merge-defs.cpp
index 016b8a8f47a7..23d1f5cfb12b 100644
--- a/test/Modules/submodules-merge-defs.cpp
+++ b/test/Modules/submodules-merge-defs.cpp
@@ -3,7 +3,7 @@
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility
-// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL -DEARLY_INDIRECT_INCLUDE -fno-modules-hide-internal-linkage
+// RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -DTEXTUAL -DEARLY_INDIRECT_INCLUDE
 // RUN: %clang_cc1 -x c++ -std=c++11 -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/submodules-merge-defs %s -verify -fno-modules-error-recovery -fmodules-local-submodule-visibility -fmodule-feature use_defs_twice -DIMPORT_USE_2
 
 // Trigger import of definitions, but don't make them visible.
@@ -12,7 +12,7 @@
 #include "indirect.h"
 #endif
 
-A pre_a; // expected-error {{must use 'struct'}}
+A pre_a;
 #ifdef IMPORT_USE_2
 // expected-error-re@-2 {{must be imported from one of {{.*}}stuff.use{{.*}}stuff.use-2}}
 #elif EARLY_INDIRECT_INCLUDE
@@ -21,43 +21,42 @@ A pre_a; // expected-error {{must use 'struct'}}
 // expected-error@-6 {{must be imported from module 'stuff.use'}}
 #endif
 // expected-note@defs.h:1 +{{here}}
+extern class A pre_a2;
+int pre_use_a = use_a(pre_a2); // expected-error 2{{'A' must be imported}} expected-error {{'use_a' must be imported}}
 // expected-note@defs.h:2 +{{here}}
-int pre_use_a = use_a(pre_a); // expected-error {{'A' must be imported}} expected-error {{'use_a' must be imported}}
 
 B::Inner2 pre_bi; // expected-error +{{must be imported}}
 // expected-note@defs.h:4 +{{here}}
-// expected-note@defs.h:11 +{{here}}
-void pre_bfi(B b) { // expected-error {{must use 'class'}} expected-error +{{must be imported}}
-  b.f<int>(); // expected-error +{{must be imported}} expected-error +{{}}
-  // expected-note@defs.h:12 +{{here}}
+// expected-note@defs.h:17 +{{here}}
+void pre_bfi(B b) { // expected-error +{{must be imported}}
+  b.f<int>(); // expected-error +{{}}
 }
 
 C_Base<1> pre_cb1; // expected-error +{{must be imported}}
-// expected-note@defs.h:16 +{{here}}
-C1 pre_c1; // expected-error +{{must be imported}} expected-error {{must use 'struct'}}
-// expected-note@defs.h:18 +{{here}}
-C2 pre_c2; // expected-error +{{must be imported}} expected-error {{must use 'struct'}}
-// expected-note@defs.h:19 +{{here}}
+// expected-note@defs.h:23 +{{here}}
+C1 pre_c1; // expected-error +{{must be imported}}
+// expected-note@defs.h:25 +{{here}}
+C2 pre_c2; // expected-error +{{must be imported}}
+// expected-note@defs.h:26 +{{here}}
 
 D::X pre_dx; // expected-error +{{must be imported}}
-// expected-note@defs.h:21 +{{here}}
-// expected-note@defs.h:22 +{{here}}
-// FIXME: We should warn that use_dx is being used without being imported.
-int pre_use_dx = use_dx(pre_dx);
+// expected-note@defs.h:28 +{{here}}
+// expected-note@defs.h:29 +{{here}}
+int pre_use_dx = use_dx(pre_dx); // ignored; pre_dx is invalid
 
 int pre_e = E(0); // expected-error {{must be imported}}
-// expected-note@defs.h:25 +{{here}}
+// expected-note@defs.h:32 +{{here}}
 
 int pre_ff = F<int>().f(); // expected-error +{{must be imported}}
-int pre_fg = F<int>().g<int>(); // expected-error +{{must be imported}}
-// expected-note@defs.h:27 +{{here}}
+int pre_fg = F<int>().g<int>(); // expected-error +{{must be imported}} expected-error 2{{expected}}
+// expected-note@defs.h:34 +{{here}}
 
 G::A pre_ga // expected-error +{{must be imported}}
   = G::a; // expected-error +{{must be imported}}
-// expected-note@defs.h:42 +{{here}}
-// expected-note@defs.h:43 +{{here}}
+// expected-note@defs.h:49 +{{here}}
+// expected-note@defs.h:50 +{{here}}
 decltype(G::h) pre_gh = G::h; // expected-error +{{must be imported}}
-// expected-note@defs.h:44 +{{here}}
+// expected-note@defs.h:51 +{{here}}
 
 J<> pre_j; // expected-error {{declaration of 'J' must be imported}}
 #ifdef IMPORT_USE_2
@@ -67,10 +66,11 @@ J<> pre_j; // expected-error {{declaration of 'J' must be imported}}
 #else
 // expected-error@-6 {{default argument of 'J' must be imported from module 'stuff.use'}}
 #endif
-// expected-note@defs.h:51 +{{here}}
+// expected-note@defs.h:58 +{{here}}
 
-ScopedEnum pre_scopedenum; // expected-error {{must be imported}} expected-error {{must use 'enum'}}
-// expected-note@defs.h:99 {{here}}
+ScopedEnum pre_scopedenum; // expected-error {{must be imported}}
+// expected-note@defs.h:105 0-1{{here}}
+// expected-note@defs.h:106 0-1{{here}}
 enum ScopedEnum : int;
 ScopedEnum pre_scopedenum_declared; // ok
 
@@ -104,10 +104,24 @@ template<typename T, int N, template<typename> class K> struct J;
 J<> post_j2;
 FriendDefArg::Y<int> friend_def_arg;
 FriendDefArg::D<> friend_def_arg_d;
+int post_anon_x_n = Anon::X().n;
 
 MergeFunctionTemplateSpecializations::X<int>::Q<char> xiqc;
 
 #ifdef TEXTUAL
 #include "use-defs.h"
 void use_static_inline() { StaticInline::g({}); }
+#ifdef EARLY_INDIRECT_INCLUDE
+// expected-warning@-2 {{ambiguous use of internal linkage declaration 'g' defined in multiple modules}}
+// expected-note@defs.h:71 {{declared here in module 'redef'}}
+// expected-note@defs.h:71 {{declared here in module 'stuff.use'}}
+#endif
+int use_anon_enum = G::g;
+#ifdef EARLY_INDIRECT_INCLUDE
+// expected-warning@-2 3{{ambiguous use of internal linkage declaration 'g' defined in multiple modules}}
+// FIXME: These notes are produced, but -verify can't match them?
+// FIXME-note@defs.h:51 3{{declared here in module 'redef'}}
+// FIXME-note@defs.h:51 3{{declared here in module 'stuff.use'}}
+#endif
+int use_named_enum = G::i;
 #endif
diff --git a/test/Modules/system_headers.m b/test/Modules/system_headers.m
index 165e8e866401..b69ef3d3c9eb 100644
--- a/test/Modules/system_headers.m
+++ b/test/Modules/system_headers.m
@@ -1,7 +1,7 @@
 // Test that system-headerness works for building modules.
 
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -isystem %S/Inputs -pedantic -Werror %s -verify -std=c11
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -isystem %S/Inputs -pedantic -Werror %s -verify -std=c11
 // expected-no-diagnostics
 
 @import warning;
diff --git a/test/Modules/target-features.m b/test/Modules/target-features.m
new file mode 100644
index 000000000000..b4524835f534
--- /dev/null
+++ b/test/Modules/target-features.m
@@ -0,0 +1,61 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: arm-registered-target
+// REQUIRES: aarch64-registered-target
+
+// RUN: rm -rf %t
+
+// Sanity check one of the compilations.
+// RUN: %clang_cc1 -triple aarch64-unknown-unknown -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs -fsyntax-only %s -verify -DSANITY_CHECK
+// expected-no-diagnostics
+
+// Check all the targets:
+// RUN: not %clang_cc1 -triple armv7-unknown-unknown -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs -fsyntax-only  %s 2> %t.aarch32
+// RUN: FileCheck %s -check-prefix=AARCH32 < %t.aarch32
+// RUN: not %clang_cc1 -triple aarch64-unknown-unknown -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs -fsyntax-only  %s 2> %t.aarch64
+// RUN: FileCheck %s -check-prefix=AARCH64 < %t.aarch64
+// RUN: not %clang_cc1 -triple i386-unknown-unknown -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs -fsyntax-only  %s 2> %t.x86_32
+// RUN: FileCheck %s -check-prefix=X86_32 < %t.x86_32
+// RUN: not %clang_cc1 -triple x86_64-unknown-unknown -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I %S/Inputs -fsyntax-only  %s 2> %t.x86_64
+// RUN: FileCheck %s -check-prefix=X86_64 < %t.x86_64
+
+#ifndef SANITY_CHECK
+@import TargetFeatures;
+// AARCH32-NOT: module 'TargetFeatures' requires
+// AARCH64-NOT: module 'TargetFeatures' requires
+// X86_32-NOT: module 'TargetFeatures' requires
+// X86_64-NOT: module 'TargetFeatures' requires
+@import TargetFeatures.arm;
+// AARCH32-NOT: module 'TargetFeatures.arm' requires
+// AARCH64-NOT: module 'TargetFeatures.arm' requires
+// X86_32: module 'TargetFeatures.arm' requires feature 'arm'
+// X86_64: module 'TargetFeatures.arm' requires feature 'arm'
+@import TargetFeatures.arm.aarch32;
+// AARCH32-NOT: module 'TargetFeatures.arm.aarch32' requires
+// AARCH64: module 'TargetFeatures.arm.aarch32' requires feature 'aarch32'
+// X86_32: module 'TargetFeatures.arm.aarch32' requires feature 
+// X86_64: module 'TargetFeatures.arm.aarch32' requires feature
+#endif
+
+@import TargetFeatures.arm.aarch64;
+// AARCH32: module 'TargetFeatures.arm.aarch64' requires feature 'aarch64'
+// AARCH64-NOT: module 'TargetFeatures.arm.aarch64' requires
+// X86_32: module 'TargetFeatures.arm.aarch64' requires feature 
+// X86_64: module 'TargetFeatures.arm.aarch64' requires feature
+
+#ifndef SANITY_CHECK
+@import TargetFeatures.x86;
+// AARCH32:  module 'TargetFeatures.x86' requires feature 'x86'
+// AARCH64:  module 'TargetFeatures.x86' requires feature 'x86'
+// X86_32-NOT: module 'TargetFeatures.x86' requires
+// X86_64-NOT: module 'TargetFeatures.x86' requires
+@import TargetFeatures.x86.x86_32;
+// AARCH32:  module 'TargetFeatures.x86.x86_32' requires feature
+// AARCH64:  module 'TargetFeatures.x86.x86_32' requires feature
+// X86_32-NOT: module 'TargetFeatures.x86.x86_32' requires
+// X86_64: module 'TargetFeatures.x86.x86_32' requires feature 'x86_32'
+@import TargetFeatures.x86.x86_64;
+// AARCH32:  module 'TargetFeatures.x86.x86_64' requires feature
+// AARCH64:  module 'TargetFeatures.x86.x86_64' requires feature
+// X86_32: module 'TargetFeatures.x86.x86_64' requires feature 'x86_64'
+// X86_64-NOT: module 'TargetFeatures.x86.x86_64' requires
+#endif
diff --git a/test/Modules/template-default-args.cpp b/test/Modules/template-default-args.cpp
index dc44534302b2..9d16cbf43420 100644
--- a/test/Modules/template-default-args.cpp
+++ b/test/Modules/template-default-args.cpp
@@ -1,6 +1,8 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fno-modules-error-recovery -I %S/Inputs/template-default-args -std=c++11 %s
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fno-modules-error-recovery -I %S/Inputs/template-default-args -std=c++11 %s -DBEGIN= -DEND=
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fno-modules-error-recovery -I %S/Inputs/template-default-args -std=c++11 %s -DBEGIN="namespace N {" -DEND="}"
 
+BEGIN
 template<typename T> struct A;
 template<typename T> struct B;
 template<typename T> struct C;
@@ -8,9 +10,12 @@ template<typename T = int> struct D;
 template<typename T = int> struct E {};
 template<typename T> struct H {};
 template<typename T = int, typename U = int> struct I {};
+END
 
 #include "b.h"
+#include "d.h"
 
+BEGIN
 template<typename T = int> struct A {};
 template<typename T> struct B {};
 template<typename T = int> struct B;
@@ -18,9 +23,14 @@ template<typename T = int> struct C;
 template<typename T> struct D {};
 template<typename T> struct F {};
 template<typename T> struct G {};
+template<typename T> struct J {};
+template<typename T = int> struct J;
+struct K : J<> {};
+END
 
 #include "c.h"
 
+BEGIN
 A<> a;
 B<> b;
 extern C<> c;
@@ -28,7 +38,9 @@ D<> d;
 E<> e;
 F<> f;
 G<> g; // expected-error {{default argument of 'G' must be imported from module 'X.A' before it is required}}
-// expected-note@a.h:6 {{default argument declared here}}
-H<> h; // expected-error {{default argument of 'H' must be imported from module 'X.A' before it is required}}
 // expected-note@a.h:7 {{default argument declared here}}
+H<> h; // expected-error {{default argument of 'H' must be imported from module 'X.A' before it is required}}
+// expected-note@a.h:8 {{default argument declared here}}
 I<> i;
+L<> *l;
+END
diff --git a/test/Modules/templates.mm b/test/Modules/templates.mm
index 4c6e4723a3bb..190084ad2b74 100644
--- a/test/Modules/templates.mm
+++ b/test/Modules/templates.mm
@@ -28,6 +28,8 @@ void testTemplateClasses() {
   N::Set<char> set_char;
   set_char.insert('A');
 
+  static_assert(sizeof(List<long>) == sizeof(List<short>), "");
+
   List<double> list_double;
   list_double.push_back(0.0);
 }
diff --git a/test/Modules/thread-safety.cpp b/test/Modules/thread-safety.cpp
new file mode 100644
index 000000000000..e6e85d284c82
--- /dev/null
+++ b/test/Modules/thread-safety.cpp
@@ -0,0 +1,18 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps \
+// RUN:            -I%S/Inputs/thread-safety -std=c++11 -Wthread-safety \
+// RUN:            -verify %s
+//
+// expected-no-diagnostics
+
+#include "b.h"
+#include "c.h"
+
+bool g();
+void X::f() {
+  m.lock();
+  if (g())
+    m.unlock();
+  else
+    unlock(*this);
+}
diff --git a/test/Modules/typedef-tag-not-visible.m b/test/Modules/typedef-tag-not-visible.m
new file mode 100644
index 000000000000..e1a640633f69
--- /dev/null
+++ b/test/Modules/typedef-tag-not-visible.m
@@ -0,0 +1,8 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+
+@import TypedefTag;
+
+typedef struct { int x; } TypedefStructHidden_t;
+typedef struct { int x; } TypedefStructVisible_t; // expected-error{{typedef redefinition}}
+// expected-note@typedef-tag.h:1 {{here}}
diff --git a/test/Modules/using-decl-redecl.cpp b/test/Modules/using-decl-redecl.cpp
new file mode 100644
index 000000000000..0e78cec1188f
--- /dev/null
+++ b/test/Modules/using-decl-redecl.cpp
@@ -0,0 +1,11 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t \
+// RUN:            -fmodule-map-file=%S/Inputs/using-decl-redecl/module.modulemap \
+// RUN:            -I%S/Inputs/using-decl-redecl \
+// RUN:            -verify %s
+#include "c.h"
+N::clstring y = b;
+
+// Use a typo to trigger import of all declarations in N.
+N::clstrinh s; // expected-error {{did you mean 'clstring'}}
+// expected-note@a.h:2 {{here}}
diff --git a/test/Modules/using-decl.cpp b/test/Modules/using-decl.cpp
index a388a5b7e711..b24593b57802 100644
--- a/test/Modules/using-decl.cpp
+++ b/test/Modules/using-decl.cpp
@@ -1,8 +1,84 @@
 // RUN: rm -rf %t
-// RUN: %clang_cc1 -x objective-c++ -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify -DEARLY_IMPORT
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify -UEARLY_IMPORT
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify -DEARLY_IMPORT -fmodules-local-submodule-visibility
+// RUN: %clang_cc1 -x objective-c++ -std=c++11 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t -I %S/Inputs %s -verify -UEARLY_IMPORT -fmodules-local-submodule-visibility
 
+#ifdef EARLY_IMPORT
 @import using_decl.a;
+namespace UsingDecl {
+  using ::merged;
+}
+int k = UsingDecl::merged;
+#endif
+
+namespace Y {
+  int conflicting_hidden_using_decl;
+  int conflicting_hidden_using_decl_fn_2();
+  int conflicting_hidden_using_decl_var_2;
+  struct conflicting_hidden_using_decl_struct_2;
+
+  struct conflicting_hidden_using_decl_mixed_4 {};
+  int conflicting_hidden_using_decl_mixed_5;
+  int conflicting_hidden_using_decl_mixed_6();
+}
+
+using Y::conflicting_hidden_using_decl;
+int conflicting_hidden_using_decl_fn();
+int conflicting_hidden_using_decl_var;
+struct conflicting_hidden_using_decl_struct {};
+using Y::conflicting_hidden_using_decl_fn_2;
+using Y::conflicting_hidden_using_decl_var_2;
+using Y::conflicting_hidden_using_decl_struct_2;
+
+struct conflicting_hidden_using_decl_mixed_1 {};
+int conflicting_hidden_using_decl_mixed_2;
+int conflicting_hidden_using_decl_mixed_3();
+using Y::conflicting_hidden_using_decl_mixed_4;
+using Y::conflicting_hidden_using_decl_mixed_5;
+using Y::conflicting_hidden_using_decl_mixed_6;
+
+template<typename T> int use(T);
+void test_conflicting() {
+  use(conflicting_hidden_using_decl);
+  use(conflicting_hidden_using_decl_fn());
+  use(conflicting_hidden_using_decl_var);
+  use(conflicting_hidden_using_decl_fn_2());
+  use(conflicting_hidden_using_decl_var_2);
+  use(conflicting_hidden_using_decl_mixed_1());
+  use(conflicting_hidden_using_decl_mixed_2);
+  use(conflicting_hidden_using_decl_mixed_3());
+  use(conflicting_hidden_using_decl_mixed_4());
+  use(conflicting_hidden_using_decl_mixed_5);
+  use(conflicting_hidden_using_decl_mixed_6());
+}
+
+#ifndef EARLY_IMPORT
+@import using_decl.a;
+#endif
 
-// expected-no-diagnostics
 UsingDecl::using_decl_type x = UsingDecl::using_decl_var;
 UsingDecl::inner y = x;
+
+@import using_decl.b;
+
+void test_conflicting_2() {
+  use(conflicting_hidden_using_decl);         // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_fn());    // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_var);     // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_fn_2());  // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_var_2);   // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_mixed_1); // ok, struct hidden
+  use(conflicting_hidden_using_decl_mixed_2); // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_mixed_3); // ok, struct hidden
+  use(conflicting_hidden_using_decl_mixed_4); // ok, struct hidden
+  use(conflicting_hidden_using_decl_mixed_5); // expected-error {{ambiguous}}
+  use(conflicting_hidden_using_decl_mixed_6); // ok, struct hidden
+  // expected-note@using-decl.cpp:* 7{{candidate}}
+  // expected-note@using-decl-b.h:* 7{{candidate}}
+
+  int conflicting_hidden_using_decl_mixed_1::*p1;
+  int conflicting_hidden_using_decl_mixed_3::*p3;
+  int conflicting_hidden_using_decl_mixed_4::*p4;
+  int conflicting_hidden_using_decl_mixed_6::*p6;
+}
diff --git a/test/Modules/va_list.cpp b/test/Modules/va_list.cpp
new file mode 100644
index 000000000000..35694cdec6f4
--- /dev/null
+++ b/test/Modules/va_list.cpp
@@ -0,0 +1,8 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -x objective-c++ -fmodules-cache-path=%t -fmodules -fimplicit-module-maps -I %S/Inputs/va_list %s -verify
+// expected-no-diagnostics
+
+@import left;
+@import right;
+
+void g(int k, ...) { f<int>(k); }
diff --git a/test/Modules/working-dir-flag.m b/test/Modules/working-dir-flag.m
new file mode 100644
index 000000000000..0e258c0d2feb
--- /dev/null
+++ b/test/Modules/working-dir-flag.m
@@ -0,0 +1,9 @@
+// RUN: rm -rf %t.mcp
+// RUN: %clang_cc1 -fmodules-cache-path=%t.mcp -fmodules -fimplicit-module-maps -F . -working-directory=%S/Inputs/working-dir-test %s -verify
+// expected-no-diagnostics
+
+@import Test;
+
+void foo() {
+  test_me_call();
+}
diff --git a/test/OpenMP/atomic_capture_codegen.cpp b/test/OpenMP/atomic_capture_codegen.cpp
index 6dd9f7ab2a41..f91da3e80f54 100644
--- a/test/OpenMP/atomic_capture_codegen.cpp
+++ b/test/OpenMP/atomic_capture_codegen.cpp
@@ -72,7 +72,10 @@ struct BitFields4_packed {
 typedef float float2 __attribute__((ext_vector_type(2)));
 float2 float2x;
 
-register int rix __asm__("0");
+// Register "0" is currently an invalid register for global register variables.
+// Use "esp" instead of "0".
+// register int rix __asm__("0");
+register int rix __asm__("esp");
 
 int main() {
 // CHECK: [[PREV:%.+]] = atomicrmw add i8* @{{.+}}, i8 1 monotonic
diff --git a/test/OpenMP/atomic_codegen.cpp b/test/OpenMP/atomic_codegen.cpp
index 2ce1f9450b3f..536f2cdffafb 100644
--- a/test/OpenMP/atomic_codegen.cpp
+++ b/test/OpenMP/atomic_codegen.cpp
@@ -1,11 +1,12 @@
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -x c++ -emit-llvm %s -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // expected-no-diagnostics
 
 int a;
 int b;
 
 struct St {
+  unsigned long field;
   St() {}
   ~St() {}
   int &get() { return a; }
@@ -13,6 +14,7 @@ struct St {
 
 // CHECK-LABEL: parallel_atomic_ewc
 void parallel_atomic_ewc() {
+  St s;
 #pragma omp parallel
   {
       // CHECK: invoke void @_ZN2StC1Ev(%struct.St* [[TEMP_ST_ADDR:%.+]])
@@ -47,6 +49,8 @@ void parallel_atomic_ewc() {
       // CHECK: invoke void @_ZN2StD1Ev(%struct.St* [[TEMP_ST_ADDR]])
 #pragma omp atomic
       St().get() %= b;
+#pragma omp atomic
+      s.field++;
       // CHECK: invoke void @_ZN2StC1Ev(%struct.St* [[TEMP_ST_ADDR:%.+]])
       // CHECK: [[SCALAR_ADDR:%.+]] = invoke dereferenceable(4) i32* @_ZN2St3getEv(%struct.St* [[TEMP_ST_ADDR]])
       // CHECK: [[B_VAL:%.+]] = load i32, i32* @b
@@ -108,7 +112,7 @@ void parallel_atomic() {
   // TERM_DEBUG:     call void @__clang_call_terminate
   // TERM_DEBUG:     unreachable
 }
-// TERM_DEBUG-DAG: [[READ_LOC]] = !DILocation(line: [[@LINE-33]],
-// TERM_DEBUG-DAG: [[WRITE_LOC]] = !DILocation(line: [[@LINE-28]],
-// TERM_DEBUG-DAG: [[UPDATE_LOC]] = !DILocation(line: [[@LINE-22]],
-// TERM_DEBUG-DAG: [[CAPTURE_LOC]] = !DILocation(line: [[@LINE-16]],
+// TERM_DEBUG-DAG: [[READ_LOC]] = !DILocation(line: [[@LINE-28]],
+// TERM_DEBUG-DAG: [[WRITE_LOC]] = !DILocation(line: [[@LINE-22]],
+// TERM_DEBUG-DAG: [[UPDATE_LOC]] = !DILocation(line: [[@LINE-16]],
+// TERM_DEBUG-DAG: [[CAPTURE_LOC]] = !DILocation(line: [[@LINE-9]],
diff --git a/test/OpenMP/atomic_messages.cpp b/test/OpenMP/atomic_messages.cpp
index c3e02bc96e86..7f78ad443da6 100644
--- a/test/OpenMP/atomic_messages.cpp
+++ b/test/OpenMP/atomic_messages.cpp
@@ -48,8 +48,8 @@ T read() {
   // expected-note@+1 {{expected built-in assignment operator}}
   foo();
 #pragma omp atomic read
-  // expected-error@+2 {{the statement for 'atomic read' must be an expression statement of form 'v = x;', where v and x are both lvalue expressions with scalar type}}
-  // expected-note@+1 {{expected built-in assignment operator}}
+  // expected-error@+2 2 {{the statement for 'atomic read' must be an expression statement of form 'v = x;', where v and x are both lvalue expressions with scalar type}}
+  // expected-note@+1 2 {{expected built-in assignment operator}}
   a += b;
 #pragma omp atomic read
   // expected-error@+2 {{the statement for 'atomic read' must be an expression statement of form 'v = x;', where v and x are both lvalue expressions with scalar type}}
@@ -93,7 +93,8 @@ int read() {
 #pragma omp atomic read read
   a = b;
 
-  // expected-note@+1 {{in instantiation of function template specialization 'read<S>' requested here}}
+  // expected-note@+2 {{in instantiation of function template specialization 'read<S>' requested here}}
+  // expected-note@+1 {{in instantiation of function template specialization 'read<int>' requested here}}
   return read<int>() + read<S>().a;
 }
 
@@ -147,6 +148,7 @@ int write() {
 #pragma omp atomic write
   a = foo();
 
+  // expected-note@+1 {{in instantiation of function template specialization 'write<int>' requested here}}
   return write<int>();
 }
 
@@ -672,6 +674,7 @@ int capture() {
 #pragma omp atomic capture capture
   b = a /= b;
 
+  // expected-note@+1 {{in instantiation of function template specialization 'capture<int>' requested here}}
   return capture<int>();
 }
 
diff --git a/test/OpenMP/atomic_read_codegen.c b/test/OpenMP/atomic_read_codegen.c
index fc47c82d8913..0cd46e3821fd 100644
--- a/test/OpenMP/atomic_read_codegen.c
+++ b/test/OpenMP/atomic_read_codegen.c
@@ -72,7 +72,10 @@ struct BitFields4_packed {
 typedef float float2 __attribute__((ext_vector_type(2)));
 float2 float2x;
 
-register int rix __asm__("0");
+// Register "0" is currently an invalid register for global register variables.
+// Use "esp" instead of "0".
+// register int rix __asm__("0");
+register int rix __asm__("esp");
 
 int main() {
 // CHECK: load atomic i8, i8*
diff --git a/test/OpenMP/atomic_update_codegen.cpp b/test/OpenMP/atomic_update_codegen.cpp
index df8b538ee165..063b76d7ff54 100644
--- a/test/OpenMP/atomic_update_codegen.cpp
+++ b/test/OpenMP/atomic_update_codegen.cpp
@@ -72,7 +72,10 @@ struct BitFields4_packed {
 typedef float float2 __attribute__((ext_vector_type(2)));
 float2 float2x;
 
-register int rix __asm__("0");
+// Register "0" is currently an invalid register for global register variables.
+// Use "esp" instead of "0".
+// register int rix __asm__("0");
+register int rix __asm__("esp");
 
 int main() {
 // CHECK-NOT: atomicrmw
diff --git a/test/OpenMP/atomic_write_codegen.c b/test/OpenMP/atomic_write_codegen.c
index 1ee26b078212..66172af07e8f 100644
--- a/test/OpenMP/atomic_write_codegen.c
+++ b/test/OpenMP/atomic_write_codegen.c
@@ -72,7 +72,10 @@ struct BitFields4_packed {
 typedef float float2 __attribute__((ext_vector_type(2)));
 float2 float2x;
 
-register int rix __asm__("0");
+// Register "0" is currently an invalid register for global register variables.
+// Use "esp" instead of "0".
+// register int rix __asm__("0");
+register int rix __asm__("esp");
 
 int main() {
 // CHECK: load i8, i8*
diff --git a/test/OpenMP/cancel_ast_print.cpp b/test/OpenMP/cancel_ast_print.cpp
index f244c784e86e..007237a7fb98 100644
--- a/test/OpenMP/cancel_ast_print.cpp
+++ b/test/OpenMP/cancel_ast_print.cpp
@@ -10,11 +10,11 @@ int main (int argc, char **argv) {
 // CHECK: int main(int argc, char **argv) {
 #pragma omp parallel
 {
-#pragma omp cancel parallel
+#pragma omp cancel parallel if(argc)
 }
 // CHECK: #pragma omp parallel
 // CHECK-NEXT: {
-// CHECK-NEXT: #pragma omp cancel parallel
+// CHECK-NEXT: #pragma omp cancel parallel if(argc)
 // CHECK-NEXT: }
 #pragma omp sections
 {
@@ -26,11 +26,11 @@ int main (int argc, char **argv) {
 // CHECK: }
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
-#pragma omp cancel for
+#pragma omp cancel for if(cancel:argc)
 }
 // CHECK: #pragma omp for
 // CHECK-NEXT: for (int i = 0; i < argc; ++i) {
-// CHECK-NEXT: #pragma omp cancel for
+// CHECK-NEXT: #pragma omp cancel for if(cancel: argc)
 // CHECK-NEXT: }
 #pragma omp task
 {
diff --git a/test/OpenMP/cancel_codegen.cpp b/test/OpenMP/cancel_codegen.cpp
index 4bd85a22b1d9..e2dd3673caff 100644
--- a/test/OpenMP/cancel_codegen.cpp
+++ b/test/OpenMP/cancel_codegen.cpp
@@ -6,11 +6,12 @@
 #ifndef HEADER
 #define HEADER
 
+float flag;
 int main (int argc, char **argv) {
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
 #pragma omp parallel
 {
-#pragma omp cancel parallel
+#pragma omp cancel parallel if(flag)
   argv[0][0] = argc;
 }
 // CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
@@ -50,9 +51,13 @@ int main (int argc, char **argv) {
 // CHECK: call void @__kmpc_for_static_fini(
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
-#pragma omp cancel for
+#pragma omp cancel for if(cancel: flag)
 }
 // CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
+// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
+// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
+// CHECK: [[THEN]]
 // CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
@@ -61,6 +66,8 @@ for (int i = 0; i < argc; ++i) {
 // CHECK: br label
 // CHECK: [[CONTINUE]]
 // CHECK: br label
+// CHECK: [[ELSE]]
+// CHECK: br label
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_barrier(%ident_t*
 #pragma omp task
@@ -69,16 +76,41 @@ for (int i = 0; i < argc; ++i) {
 }
 // CHECK: call i8* @__kmpc_omp_task_alloc(
 // CHECK: call i32 @__kmpc_omp_task(
+#pragma omp parallel sections
+{
+#pragma omp cancel sections
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+#pragma omp parallel sections
+{
+#pragma omp cancel sections
+#pragma omp section
+  {
+#pragma omp cancel sections
+  }
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+#pragma omp parallel for
+for (int i = 0; i < argc; ++i) {
+#pragma omp cancel for
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
   return argc;
 }
 
 // CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
+// CHECK: [[FLAG:%.+]] = load float, float* @{{.+}},
+// CHECK: [[BOOL:%.+]] = fcmp une float [[FLAG]], 0.000000e+00
+// CHECK: br i1 [[BOOL]], label %[[THEN:[^,]+]], label %[[ELSE:[^,]+]]
+// CHECK: [[THEN]]
 // CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[EXIT:[^,]+]],
 // CHECK: [[EXIT]]
 // CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
 // CHECK: br label %[[RETURN:.+]]
+// CHECK: [[ELSE]]
+// CHECK: br label
 // CHECK: [[RETURN]]
 // CHECK: ret void
 
@@ -92,4 +124,44 @@ for (int i = 0; i < argc; ++i) {
 // CHECK: [[RETURN]]
 // CHECK: ret i32 0
 
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
+// CHECK: call i32 @__kmpc_single(
+// CHECK-NOT: @__kmpc_cancel
+// CHECK: call void @__kmpc_end_single(
+// CHECK: ret void
+
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancel(%ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 2)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
 #endif
diff --git a/test/OpenMP/cancel_if_messages.cpp b/test/OpenMP/cancel_if_messages.cpp
new file mode 100644
index 000000000000..8d1afc9ecddf
--- /dev/null
+++ b/test/OpenMP/cancel_if_messages.cpp
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp parallel
+  {
+    #pragma omp cancel parallel if // expected-error {{expected '(' after 'if'}}
+    #pragma omp cancel parallel if ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if () // expected-error {{expected expression}}
+    #pragma omp cancel parallel if (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if (argc)) // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+    #pragma omp cancel parallel if (argc > 0 ? argv[1] : argv[2])
+    #pragma omp cancel parallel if (foobool(argc)), if (true) // expected-error {{directive '#pragma omp cancel' cannot contain more than one 'if' clause}}
+    #pragma omp cancel parallel if (S) // expected-error {{'S' does not refer to a value}}
+    #pragma omp cancel parallel if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(argc)
+    #pragma omp cancel parallel if(cancel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : argc)
+    #pragma omp cancel parallel if(cancel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp cancel'}}
+    #pragma omp cancel parallel if(cancel : argc) if (cancel:argc) // expected-error {{directive '#pragma omp cancel' cannot contain more than one 'if' clause with 'cancel' name modifier}}
+    #pragma omp cancel parallel if(cancel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+    foo();
+  }
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp parallel
+  {
+    #pragma omp cancel parallel if // expected-error {{expected '(' after 'if'}}
+    #pragma omp cancel parallel if ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if () // expected-error {{expected expression}}
+    #pragma omp cancel parallel if (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if (argc)) // expected-warning {{extra tokens at the end of '#pragma omp cancel' are ignored}}
+    #pragma omp cancel parallel if (argc > 0 ? argv[1] : argv[2])
+    #pragma omp cancel parallel if (foobool(argc)), if (true) // expected-error {{directive '#pragma omp cancel' cannot contain more than one 'if' clause}}
+    #pragma omp cancel parallel if (S1) // expected-error {{'S1' does not refer to a value}}
+    #pragma omp cancel parallel if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+    #pragma omp cancel parallel if(cancel : argc)
+    #pragma omp cancel parallel if(cancel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp cancel'}}
+    #pragma omp cancel parallel if(cancel : argc) if (cancel:argc)  // expected-error {{directive '#pragma omp cancel' cannot contain more than one 'if' clause with 'cancel' name modifier}}
+    #pragma omp cancel parallel if(cancel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+    foo();
+  }
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/cancellation_point_codegen.cpp b/test/OpenMP/cancellation_point_codegen.cpp
index 453393082ce8..795f69ed24fa 100644
--- a/test/OpenMP/cancellation_point_codegen.cpp
+++ b/test/OpenMP/cancellation_point_codegen.cpp
@@ -11,12 +11,16 @@ int main (int argc, char **argv) {
 #pragma omp parallel
 {
 #pragma omp cancellation point parallel
+#pragma omp cancel parallel
   argv[0][0] = argc;
 }
 // CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
 #pragma omp sections
 {
+  {
 #pragma omp cancellation point sections
+#pragma omp cancel sections
+  }
 }
 // CHECK: call i32 @__kmpc_single(
 // CHECK-NOT: @__kmpc_cancellationpoint
@@ -28,6 +32,7 @@ int main (int argc, char **argv) {
 #pragma omp section
   {
 #pragma omp cancellation point sections
+#pragma omp cancel sections
   }
 }
 // CHECK: call void @__kmpc_for_static_init_4(
@@ -51,6 +56,7 @@ int main (int argc, char **argv) {
 #pragma omp for
 for (int i = 0; i < argc; ++i) {
 #pragma omp cancellation point for
+#pragma omp cancel for
 }
 // CHECK: call void @__kmpc_for_static_init_4(
 // CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 2)
@@ -66,9 +72,36 @@ for (int i = 0; i < argc; ++i) {
 #pragma omp task
 {
 #pragma omp cancellation point taskgroup
+#pragma omp cancel taskgroup
 }
 // CHECK: call i8* @__kmpc_omp_task_alloc(
 // CHECK: call i32 @__kmpc_omp_task(
+#pragma omp parallel sections
+{
+  {
+#pragma omp cancellation point sections
+#pragma omp cancel sections
+  }
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+#pragma omp parallel sections
+{
+  {
+#pragma omp cancellation point sections
+#pragma omp cancel sections
+  }
+#pragma omp section
+  {
+#pragma omp cancellation point sections
+  }
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
+#pragma omp parallel for
+for (int i = 0; i < argc; ++i) {
+#pragma omp cancellation point for
+#pragma omp cancel for
+}
+// CHECK: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(
   return argc;
 }
 
@@ -92,4 +125,44 @@ for (int i = 0; i < argc; ++i) {
 // CHECK: [[RETURN]]
 // CHECK: ret i32 0
 
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
+// CHECK: call i32 @__kmpc_single(
+// CHECK-NOT: @__kmpc_cancellationpoint
+// CHECK: call void @__kmpc_end_single(
+// CHECK: ret void
+
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}})
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID]], i32 3)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define internal void @{{[^(]+}}(i32* {{[^,]+}}, i32* {{[^,]+}},
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_cancellationpoint(%ident_t* {{[^,]+}}, i32 [[GTID:%.+]], i32 2)
+// CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK: br i1 [[CMP]], label %[[EXIT:[^,].+]], label %[[CONTINUE:.+]]
+// CHECK: [[EXIT]]
+// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t*
+// CHECK: br label
+// CHECK: [[CONTINUE]]
+// CHECK: br label
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
 #endif
diff --git a/test/OpenMP/critical_ast_print.cpp b/test/OpenMP/critical_ast_print.cpp
index 87a1fe0bcee7..7189fddd7186 100644
--- a/test/OpenMP/critical_ast_print.cpp
+++ b/test/OpenMP/critical_ast_print.cpp
@@ -8,6 +8,26 @@
 
 void foo() {}
 
+// CHECK: template <typename T, int N> int tmain(T argc, char **argv)
+template <typename T, int N>
+int tmain (T argc, char **argv) {
+  T b = argc, c, d, e, f, g;
+  static int a;
+// CHECK: static int a;
+#pragma omp critical
+  a=2;
+// CHECK-NEXT: #pragma omp critical
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: ++a;
+  ++a;
+#pragma omp critical  (the_name) hint(N)
+  foo();
+// CHECK-NEXT: #pragma omp critical (the_name) hint(N)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: return N;
+  return N;
+}
+
 int main (int argc, char **argv) {
   int b = argc, c, d, e, f, g;
   static int a;
@@ -18,12 +38,12 @@ int main (int argc, char **argv) {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: ++a;
   ++a;
-#pragma omp critical  (the_name)
+#pragma omp critical  (the_name1) hint(23)
   foo();
-// CHECK-NEXT: #pragma omp critical (the_name)
+// CHECK-NEXT: #pragma omp critical (the_name1) hint(23)
 // CHECK-NEXT: foo();
-// CHECK-NEXT: return 0;
-  return 0;
+// CHECK-NEXT: return tmain<int, 4>(a, argv);
+  return tmain<int, 4>(a, argv);
 }
 
 #endif
diff --git a/test/OpenMP/critical_codegen.cpp b/test/OpenMP/critical_codegen.cpp
index 5cf3446d9f5f..e44e2202e9a2 100644
--- a/test/OpenMP/critical_codegen.cpp
+++ b/test/OpenMP/critical_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
@@ -10,6 +10,7 @@
 // CHECK:       [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
 // CHECK:       [[UNNAMED_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 // CHECK:       [[THE_NAME_LOCK:@.+]] = common global [8 x i32] zeroinitializer
+// CHECK:       [[THE_NAME_LOCK1:@.+]] = common global [8 x i32] zeroinitializer
 
 // CHECK:       define {{.*}}void [[FOO:@.+]]()
 
@@ -32,11 +33,41 @@ int main() {
 // CHECK:       call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
 #pragma omp critical(the_name)
   foo();
+// CHECK:       call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23)
+// CHECK-NEXT:  invoke {{.*}}void [[FOO]]()
+// CHECK:       call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]])
+#pragma omp critical(the_name1) hint(23)
+  foo();
+// CHECK:       call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]])
+// CHECK-NOT:   call {{.*}}void @__kmpc_end_critical(
+  if (a)
+#pragma omp critical(the_name)
+    while (1)
+      ;
+// CHECK:  call {{.*}}void [[FOO]]()
+  foo();
 // CHECK-NOT:   call void @__kmpc_critical
 // CHECK-NOT:   call void @__kmpc_end_critical
   return a;
 }
 
+struct S {
+  int a;
+};
+// CHECK-LABEL: critical_ref
+void critical_ref(S &s) {
+  // CHECK: [[S_ADDR:%.+]] = alloca %struct.S*,
+  // CHECK: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]],
+  // CHECK: [[S_A_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[S_REF]], i32 0, i32 0
+  ++s.a;
+  // CHECK: call void @__kmpc_critical(
+#pragma omp critical
+  // CHECK: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]],
+  // CHECK: [[S_A_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[S_REF]], i32 0, i32 0
+  ++s.a;
+  // CHECK: call void @__kmpc_end_critical(
+}
+
 // CHECK-LABEL:      parallel_critical
 // TERM_DEBUG-LABEL: parallel_critical
 void parallel_critical() {
diff --git a/test/OpenMP/critical_messages.cpp b/test/OpenMP/critical_messages.cpp
index e3f2ad70fe98..bb5ec00fdbf1 100644
--- a/test/OpenMP/critical_messages.cpp
+++ b/test/OpenMP/critical_messages.cpp
@@ -2,20 +2,21 @@
 
 int foo();
 
-int main() {
+template<typename T, int N>
+int tmain(int argc, char **argv) { // expected-note {{declared here}}
   #pragma omp critical
   ;
   #pragma omp critical untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp critical'}}
   #pragma omp critical unknown // expected-warning {{extra tokens at the end of '#pragma omp critical' are ignored}}
   #pragma omp critical ( // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp critical ( + // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  #pragma omp critical (name // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp critical (name2 // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp critical (name1)
   foo();
   {
     #pragma omp critical
   } // expected-error {{expected statement}}
-  #pragma omp critical (name) // expected-note {{previous 'critical' region starts here}}
+  #pragma omp critical (name2) // expected-note {{previous 'critical' region starts here}}
   #pragma omp critical
   for (int i = 0; i < 10; ++i) {
     foo();
@@ -23,11 +24,11 @@ int main() {
     #pragma omp for
     for (int j = 0; j < 10; j++) {
       foo();
-      #pragma omp critical(name) // expected-error {{cannot nest 'critical' regions having the same name 'name'}}
+      #pragma omp critical(name2) // expected-error {{cannot nest 'critical' regions having the same name 'name2'}}
       foo();
     }
   }
-  #pragma omp critical (name)
+  #pragma omp critical (name2)
   #pragma omp critical
   for (int i = 0; i < 10; ++i) {
     foo();
@@ -38,7 +39,7 @@ int main() {
       foo();
     }
   }
-  #pragma omp critical (name)
+  #pragma omp critical (name2)
   #pragma omp critical
   for (int i = 0; i < 10; ++i) {
     foo();
@@ -50,9 +51,86 @@ int main() {
     }
   }
 
+  #pragma omp critical (name2) hint // expected-error {{expected '(' after 'hint'}}
+  foo();
+  #pragma omp critical (name2) hint( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+  #pragma omp critical (name2) hint(+ // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+  #pragma omp critical (name2) hint(argc) // expected-error {{expression is not an integral constant expression}} expected-note {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  foo();
+  #pragma omp critical (name) hint(N) // expected-error {{argument to 'hint' clause must be a strictly positive integer value}} expected-error {{constructs with the same name must have a 'hint' clause with the same value}} expected-note {{'hint' clause with value '4'}}
+  foo();
+  #pragma omp critical hint(N) // expected-error {{the name of the construct must be specified in presence of 'hint' clause}}
+  foo();
   return 0;
 }
 
+int main(int argc, char **argv) { // expected-note {{declared here}}
+  #pragma omp critical
+  ;
+  #pragma omp critical untied // expected-error {{unexpected OpenMP clause 'untied' in directive '#pragma omp critical'}}
+  #pragma omp critical unknown // expected-warning {{extra tokens at the end of '#pragma omp critical' are ignored}}
+  #pragma omp critical ( // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp critical ( + // expected-error {{expected identifier}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp critical (name2 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp critical (name1)
+  foo();
+  {
+    #pragma omp critical
+  } // expected-error {{expected statement}}
+  #pragma omp critical (name2) // expected-note {{previous 'critical' region starts here}}
+  #pragma omp critical
+  for (int i = 0; i < 10; ++i) {
+    foo();
+    #pragma omp parallel
+    #pragma omp for
+    for (int j = 0; j < 10; j++) {
+      foo();
+      #pragma omp critical(name2) // expected-error {{cannot nest 'critical' regions having the same name 'name2'}}
+      foo();
+    }
+  }
+  #pragma omp critical (name2)
+  #pragma omp critical
+  for (int i = 0; i < 10; ++i) {
+    foo();
+    #pragma omp parallel
+    #pragma omp for
+    for (int j = 0; j < 10; j++) {
+      #pragma omp critical
+      foo();
+    }
+  }
+  #pragma omp critical (name2)
+  #pragma omp critical
+  for (int i = 0; i < 10; ++i) {
+    foo();
+    #pragma omp parallel
+    #pragma omp for
+    for (int j = 0; j < 10; j++) {
+      #pragma omp critical (nam)
+      foo();
+    }
+  }
+
+  #pragma omp critical (name2) hint // expected-error {{expected '(' after 'hint'}}
+  foo();
+  #pragma omp critical (name2) hint( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+  #pragma omp critical (name2) hint(+ // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+  #pragma omp critical (name2) hint(argc) // expected-error {{expression is not an integral constant expression}} expected-note {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  foo();
+  #pragma omp critical (name) hint(23) // expected-note {{previous 'hint' clause with value '23'}}
+  foo();
+  #pragma omp critical hint(-5) // expected-error {{argument to 'hint' clause must be a strictly positive integer value}}
+  foo();
+  #pragma omp critical hint(1) // expected-error {{the name of the construct must be specified in presence of 'hint' clause}}
+  foo();
+  return tmain<int, 4>(argc, argv) + tmain<float, -5>(argc, argv); // expected-note {{in instantiation of function template specialization 'tmain<float, -5>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<int, 4>' requested here}}
+}
+
 int foo() {
   L1:
     foo();
@@ -69,4 +147,4 @@ int foo() {
   }
 
   return 0;
-}
+ }
diff --git a/test/OpenMP/distribute_ast_print.cpp b/test/OpenMP/distribute_ast_print.cpp
new file mode 100644
index 000000000000..c3a175a5e5e8
--- /dev/null
+++ b/test/OpenMP/distribute_ast_print.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo() {}
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, g;
+  static T a;
+// CHECK: static T a;
+#pragma omp distribute
+// CHECK-NEXT: #pragma omp distribute
+  for (int i=0; i < 2; ++i)a=2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute private(argc, b), firstprivate(c, d), collapse(2)
+  for (int i = 0; i < 10; ++i)
+  for (int j = 0; j < 10; ++j)foo();
+// CHECK-NEXT: #pragma omp target
+// CHECK-NEXT: #pragma omp teams
+// CHECK-NEXT: #pragma omp distribute private(argc,b) firstprivate(c,d) collapse(2)
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: for (int j = 0; j < 10; ++j)
+// CHECK-NEXT: foo();
+  for (int i = 0; i < 10; ++i)foo();
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: foo();
+#pragma omp distribute
+// CHECK: #pragma omp distribute
+  for (int i = 0; i < 10; ++i)foo();
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: foo();  
+  return T();
+}
+
+int main (int argc, char **argv) {
+  int b = argc, c, d, e, f, g;
+  static int a;
+// CHECK: static int a;
+#pragma omp distribute
+// CHECK-NEXT: #pragma omp distribute
+  for (int i=0; i < 2; ++i)a=2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute private(argc,b),firstprivate(argv, c), collapse(2)
+  for (int i = 0; i < 10; ++i)
+  for (int j = 0; j < 10; ++j)foo();
+// CHECK-NEXT: #pragma omp target
+// CHECK-NEXT: #pragma omp teams
+// CHECK-NEXT: #pragma omp distribute private(argc,b) firstprivate(argv,c) collapse(2)
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: for (int j = 0; j < 10; ++j)
+// CHECK-NEXT: foo();
+  for (int i = 0; i < 10; ++i)foo();
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: foo();
+#pragma omp distribute
+// CHECK: #pragma omp distribute
+  for (int i = 0; i < 10; ++i)foo();
+// CHECK-NEXT: for (int i = 0; i < 10; ++i)
+// CHECK-NEXT: foo();
+  return (0);
+}
+
+#endif
diff --git a/test/OpenMP/distribute_collapse_messages.cpp b/test/OpenMP/distribute_collapse_messages.cpp
new file mode 100644
index 000000000000..8818d659d441
--- /dev/null
+++ b/test/OpenMP/distribute_collapse_messages.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp distribute collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp distribute collapse (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp distribute collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp distribute' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp distribute', but found only 1}}
+  // expected-error@+3 2 {{directive '#pragma omp distribute' cannot contain more than one 'collapse' clause}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp distribute collapse (foobool(argc)), collapse (true), collapse (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp distribute collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse (1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp distribute collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp distribute'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp distribute collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp distribute collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp distribute collapse () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp distribute collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute', but found only 1}}
+  #pragma omp distribute collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp distribute' are ignored}}  expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp distribute', but found only 1}}
+  #pragma omp distribute collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp distribute' cannot contain more than one 'collapse' clause}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp distribute collapse (foobool(argc)), collapse (true), collapse (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp distribute collapse (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp distribute collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp distribute' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp distribute collapse(collapse(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  #pragma omp distribute collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp distribute'}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
diff --git a/test/OpenMP/distribute_firstprivate_messages.cpp b/test/OpenMP/distribute_firstprivate_messages.cpp
new file mode 100644
index 000000000000..5d371abbfa8e
--- /dev/null
+++ b/test/OpenMP/distribute_firstprivate_messages.cpp
@@ -0,0 +1,157 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}} expected-note{{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+  
+public:
+  S2() : a(0) {}
+  S2(const S2 &s2) : a(s2.a) {}
+  static float S2s;
+  static const float S2sc;
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3);
+  
+public:
+  S3() : a(0) {} // expected-note {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+  S3(S3 &s3) : a(s3.a) {} // expected-note {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4);
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+  S5(const S5 &s5):a(s5.a) { }
+public:
+  S5(int v):a(v) { }
+};
+class S6 {
+  int a;
+public:
+  S6() : a(0) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note {{defined as threadprivate or thread local}}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = { 0 };
+  S4 e(4);
+  S5 g(5);
+  S6 p;
+  int i;
+  int &j = i;
+  #pragma omp distribute firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp distribute firstprivate ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp distribute firstprivate () // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (argc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate (argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(ba)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(da)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(S2::S2s)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(S2::S2sc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute private(i), firstprivate(i) // expected-error {{private variable cannot be firstprivate}} expected-note{{defined as private}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams shared(i)
+  #pragma omp distribute firstprivate(i)
+  for (j = 0; j < argc; ++j) foo();
+  #pragma omp target
+  #pragma omp teams shared(i)
+  #pragma omp distribute firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) foo(); // expected-error {{loop iteration variable in the associated loop of 'omp distribute' directive may not be firstprivate, predetermined as private}}
+  #pragma omp target
+  #pragma omp teams private(argc) // expected-note {{defined as private}}
+  #pragma omp distribute firstprivate(argc) // expected-error {{private variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams reduction(+:argc) // expected-note {{defined as reduction}}
+  #pragma omp distribute firstprivate(argc) // expected-error {{reduction variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(j)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute lastprivate(argc), firstprivate(argc) // expected-error {{lastprivate variable cannot be firstprivate in '#pragma omp distribute'}} expected-note{{defined as lastprivate}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp target
+  #pragma omp teams
+#pragma omp distribute firstprivate(argc), lastprivate(argc)  // expected-error {{lastprivate variable cannot be firstprivate in '#pragma omp distribute'}} expected-note{{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) foo();
+  return 0;
+}
diff --git a/test/OpenMP/distribute_private_messages.cpp b/test/OpenMP/distribute_private_messages.cpp
new file mode 100644
index 000000000000..94ba4659c684
--- /dev/null
+++ b/test/OpenMP/distribute_private_messages.cpp
@@ -0,0 +1,132 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}} expected-note {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  static float S2s; // expected-note {{predetermined as shared}}
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 c; // expected-note {{predetermined as shared}}
+const S3 ca[5]; // expected-note {{predetermined as shared}}
+extern const int f;  // expected-note {{predetermined as shared}}
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+public:
+  S4(int v):a(v) { }
+};
+class S5 { 
+  int a;
+  S5():a(0) {} // expected-note {{implicitly declared private here}}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note {{defined as threadprivate or thread local}}
+
+
+int main(int argc, char **argv) {
+  const int d = 5;  // expected-note {{predetermined as shared}}
+  const int da[5] = { 0 }; // expected-note {{predetermined as shared}}
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+ #pragma omp distribute private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (a, b, c, d, f) // expected-error {{private variable with incomplete type 'S1'}} expected-error 3 {{shared variable cannot be private}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(ba)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(ca) // expected-error {{shared variable cannot be private}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(da) // expected-error {{shared variable cannot be private}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(S2::S2s) // expected-error {{shared variable cannot be private}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute shared(i) // expected-error {{unexpected OpenMP clause 'shared' in directive '#pragma omp distribute'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp target
+  #pragma omp teams
+  {
+    int i; // expected-note {{predetermined as private}}
+    #pragma omp distribute firstprivate(i), private(i) // expected-error {{private variable in '#pragma omp teams' cannot be firstprivate in '#pragma omp distribute'}}
+    for (int k = 0; k < argc; ++k) ++k;
+  }
+  #pragma omp target
+  #pragma omp teams private(i)
+  #pragma omp distribute private(j)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel private(i)
+  #pragma omp target
+  #pragma omp teams firstprivate(i)
+  #pragma omp parallel private(i)
+  #pragma omp target
+  #pragma omp teams reduction(+:i)
+  #pragma omp distribute private(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp distribute private(i)
+  for (int k = 0; k < 10; ++k) {
+    #pragma omp target
+    #pragma omp teams private(i)
+    #pragma omp distribute private(i)
+    for (int x = 0; x < 10; ++x) foo();
+  }
+  #pragma omp target
+  #pragma omp teams
+  #pragma omp distribute firstprivate(i)
+  for (int k = 0; k < 10; ++k) {
+    #pragma omp target
+    #pragma omp teams firstprivate(i)
+    #pragma omp distribute private(i)
+    for (int x = 0; x < 10; ++x) foo();
+  }
+  #pragma omp target
+  #pragma omp teams reduction(+:i)
+  #pragma omp distribute
+  for (int k = 0; k < 10; ++k) {
+    #pragma omp target
+    #pragma omp teams reduction(+:i)
+    #pragma omp distribute private(i)
+    for (int x = 0; x < 10; ++x) foo();
+  }
+
+  return 0;
+}
diff --git a/test/OpenMP/driver.c b/test/OpenMP/driver.c
new file mode 100644
index 000000000000..f84541bef8ba
--- /dev/null
+++ b/test/OpenMP/driver.c
@@ -0,0 +1,10 @@
+// Test that by default -fnoopenmp-use-tls is passed to frontend.
+//
+// RUN: %clang %s -### -o %t.o 2>&1 -fopenmp=libomp | FileCheck --check-prefix=CHECK-DEFAULT %s
+// CHECK-DEFAULT: -cc1
+// CHECK-DEFAULT-NOT: -fnoopenmp-use-tls
+//
+// RUN: %clang %s -### -o %t.o 2>&1 -fopenmp=libomp -fnoopenmp-use-tls | FileCheck --check-prefix=CHECK-NO-TLS %s
+// CHECK-NO-TLS: -cc1
+// CHECK-NO-TLS-SAME: -fnoopenmp-use-tls
+//
diff --git a/test/OpenMP/flush_codegen.cpp b/test/OpenMP/flush_codegen.cpp
index cd411cea4573..d0ab9f1525a8 100644
--- a/test/OpenMP/flush_codegen.cpp
+++ b/test/OpenMP/flush_codegen.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
diff --git a/test/OpenMP/for_ast_print.cpp b/test/OpenMP/for_ast_print.cpp
index c482ec56b9ca..8fd82e7f028b 100644
--- a/test/OpenMP/for_ast_print.cpp
+++ b/test/OpenMP/for_ast_print.cpp
@@ -13,27 +13,37 @@ T tmain(T argc) {
   T b = argc, c, d, e, f, g;
   static T a;
 // CHECK: static T a;
-#pragma omp for schedule(dynamic)
-  // CHECK-NEXT: #pragma omp for schedule(dynamic)
+#pragma omp for schedule(dynamic) linear(a)
+  // CHECK-NEXT: #pragma omp for schedule(dynamic) linear(a)
   for (int i = 0; i < 2; ++i)
     a = 2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp parallel
-#pragma omp for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered nowait
-  for (int i = 0; i < 10; ++i)
-    for (int j = 0; j < 10; ++j)
-      for (int j = 0; j < 10; ++j)
-        for (int j = 0; j < 10; ++j)
-          for (int j = 0; j < 10; ++j)
+#pragma omp for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered(N) nowait
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
             foo();
   // CHECK-NEXT: #pragma omp parallel
-  // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered nowait
-  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered(N) nowait
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
   // CHECK-NEXT: foo();
   return T();
 }
@@ -49,12 +59,12 @@ int main(int argc, char **argv) {
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
 #pragma omp parallel
-#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) ordered nowait
+#pragma omp for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) ordered nowait linear(g:-1)
   for (int i = 0; i < 10; ++i)
     for (int j = 0; j < 10; ++j)
       foo();
   // CHECK-NEXT: #pragma omp parallel
-  // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) ordered nowait
+  // CHECK-NEXT: #pragma omp for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) ordered nowait linear(g: -1)
   // CHECK-NEXT: for (int i = 0; i < 10; ++i)
   // CHECK-NEXT: for (int j = 0; j < 10; ++j)
   // CHECK-NEXT: foo();
diff --git a/test/OpenMP/for_codegen.cpp b/test/OpenMP/for_codegen.cpp
index 9db157011075..98761f56c762 100644
--- a/test/OpenMP/for_codegen.cpp
+++ b/test/OpenMP/for_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 //
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
@@ -357,7 +357,7 @@ void parallel_for(float *a) {
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_fini({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
-  // TERM_DEBUG:     call {{.+}} @__kmpc_cancel_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
+  // TERM_DEBUG:     call {{.+}} @__kmpc_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
   // TERM_DEBUG:     [[TERM_LPAD]]
   // TERM_DEBUG:     call void @__clang_call_terminate
   // TERM_DEBUG:     unreachable
@@ -404,6 +404,25 @@ void for_with_global_lcv() {
     k = i;
     k = j;
   }
+  char &cnt = i;
+#pragma omp for
+  for (cnt = 0; cnt < 2; ++cnt)
+    k = cnt;
+}
+
+// CHECK-LABEL: for_with_references
+void for_with_references() {
+// CHECK: [[I:%.+]] = alloca i8,
+// CHECK: [[CNT:%.+]] = alloca i8*,
+// CHECK: [[CNT_PRIV:%.+]] = alloca i8,
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK-NOT: load i8, i8* [[CNT]],
+// CHECK: call void @__kmpc_for_static_fini(
+  char i = 0;
+  char &cnt = i;
+#pragma omp for
+  for (cnt = 0; cnt < 2; ++cnt)
+    k = cnt;
 }
 
 struct Bool {
diff --git a/test/OpenMP/for_collapse_messages.cpp b/test/OpenMP/for_collapse_messages.cpp
index 0a7443319e8c..d40c305c6271 100644
--- a/test/OpenMP/for_collapse_messages.cpp
+++ b/test/OpenMP/for_collapse_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp for collapse (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp for collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp for collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for', but found only 1}}
   // expected-error@+3 2 {{directive '#pragma omp for' cannot contain more than one 'collapse' clause}}
-  // expected-error@+2 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp for collapse (foobool(argc)), collapse (true), collapse (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for collapse (1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for collapse (N) // expected-error {{argument to 'collapse' clause must be a positive integer value}}
+  #pragma omp for collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for collapse (2) // expected-note {{as specified in 'collapse' clause}}
   foo(); // expected-error {{expected 2 for loops after '#pragma omp for'}}
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'collapse' clause}}
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp for collapse (foobool(argc)), collapse (true), collapse (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for collapse (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/for_firstprivate_codegen.cpp b/test/OpenMP/for_firstprivate_codegen.cpp
index 0d5f83ed4ec3..01a93559451a 100644
--- a/test/OpenMP/for_firstprivate_codegen.cpp
+++ b/test/OpenMP/for_firstprivate_codegen.cpp
@@ -16,6 +16,7 @@ struct St {
 };
 
 volatile int g = 1212;
+volatile int &g1 = g;
 
 template <class T>
 struct S {
@@ -30,7 +31,6 @@ struct S {
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
 
 template <typename T>
 T tmain() {
@@ -38,7 +38,7 @@ T tmain() {
   T t_var = T();
   T vec[] = {1, 2};
   S<T> s_arr[] = {1, 2};
-  S<T> var(3);
+  S<T> &var = test;
 #pragma omp parallel
 #pragma omp for firstprivate(t_var, vec, s_arr, var)
   for (int i = 0; i < 2; ++i) {
@@ -58,22 +58,24 @@ int vec[] = {1, 2};
 S<float> s_arr[] = {1, 2};
 // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
 S<float> var(3);
+// CHECK: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for firstprivate(g)
+#pragma omp for firstprivate(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // Skip temp vars for loop
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
@@ -81,25 +83,47 @@ int main() {
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR2_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+    // LAMBDA:  store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** %{{.+}},
+    // LAMBDA:  [[SIVAR2_PRIVATE_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
+
+
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // LAMBDA: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR_REF]]
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR2_VAL]], i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
+
     // LAMBDA: call void @__kmpc_barrier(
     g = 1;
+    g1 = 1;
+    sivar = 2;
     // LAMBDA: call void @__kmpc_for_static_init_4(
+
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_for_static_fini(
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call void @__kmpc_barrier(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      g1 = 2;
+      sivar = 4;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -110,11 +134,11 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
 // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for firstprivate(g)
+#pragma omp for firstprivate(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // Skip temp vars for loop
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
@@ -122,34 +146,56 @@ int main() {
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[SIVAR2_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+    // BLOCKS: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** %{{.+}},
+    // BLOCKS: [[SIVAR_REF_ADDRR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
+
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+
+    // BLOCKS: [[SIVAR2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF_ADDRR]]
+    // BLOCKS: store i{{[0-9]+}} {{.+}}, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
+
     // BLOCKS: call void @__kmpc_barrier(
     g = 1;
+    g1 =1;
+    sivar = 2;
     // BLOCKS: call void @__kmpc_for_static_init_4(
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]],
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR2_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_for_static_fini(
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call void @__kmpc_barrier(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      g1 = 2;
+      sivar = 4;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
   }();
   return 0;
 #else
-#pragma omp for firstprivate(t_var, vec, s_arr, var)
+#pragma omp for firstprivate(t_var, vec, s_arr, var, sivar)
   for (int i = 0; i < 2; ++i) {
     vec[i] = t_var;
     s_arr[i] = var;
+    sivar += i;
   }
   return tmain<int>();
 #endif
@@ -167,6 +213,7 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(
 
 // firstprivate t_var(t_var)
@@ -194,6 +241,10 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
+// firstprivate (sivar)
+// CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]]
+// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]]
+
 // Synchronization for initialization.
 // CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
@@ -212,11 +263,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -229,22 +280,20 @@ int main() {
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+
 // firstprivate t_var(t_var)
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
 
 // firstprivate vec(vec)
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
-// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
@@ -256,8 +305,7 @@ int main() {
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // firstprivate var(var)
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
@@ -275,7 +323,7 @@ int main() {
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 #endif
 
diff --git a/test/OpenMP/for_firstprivate_messages.cpp b/test/OpenMP/for_firstprivate_messages.cpp
index 2ec81104e771..1933de25850e 100644
--- a/test/OpenMP/for_firstprivate_messages.cpp
+++ b/test/OpenMP/for_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (int k = 0; k < argc; ++k)
@@ -114,15 +114,11 @@ int foomain(int argc, char **argv) {
 #pragma omp for firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
   for (int k = 0; k < argc; ++k)
     ++k;
-#pragma omp parallel
-#pragma omp for linear(i) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp for'}}
-  for (int k = 0; k < argc; ++k)
-    ++k;
 #pragma omp parallel
   {
     int v = 0;
     int i;                      // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
-#pragma omp for firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp for firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     for (int k = 0; k < argc; ++k) {
       i = k;
       v += i;
@@ -130,7 +126,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp for firstprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel
@@ -175,7 +171,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (i = 0; i < argc; ++i)
@@ -274,7 +270,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp for firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
@@ -289,7 +285,7 @@ int main(int argc, char **argv) {
   {
     int v = 0;
     int i;                      // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
-#pragma omp for firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp for firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     for (int k = 0; k < argc; ++k) {
       i = k;
       v += i;
@@ -307,6 +303,10 @@ int main(int argc, char **argv) {
 #pragma omp for firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp for firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/for_lastprivate_codegen.cpp b/test/OpenMP/for_lastprivate_codegen.cpp
index 880637ea8b94..36abd8df07cb 100644
--- a/test/OpenMP/for_lastprivate_codegen.cpp
+++ b/test/OpenMP/for_lastprivate_codegen.cpp
@@ -18,14 +18,13 @@ struct S {
   ~S() {}
 };
 
-volatile int g = 1212;
+volatile int g __attribute__((aligned(128)))= 1212;
+volatile int &g1 = g;
 float f;
 char cnt;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
 // CHECK: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[X:@.+]] = global double 0.0
 // CHECK-DAG: [[F:@.+]] = global float 0.0
@@ -33,10 +32,10 @@ char cnt;
 template <typename T>
 T tmain() {
   S<T> test;
-  T t_var = T();
-  T vec[] = {1, 2};
-  S<T> s_arr[] = {1, 2};
-  S<T> var(3);
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] __attribute__((aligned(128))) = {1, 2};
+  S<T> s_arr[] __attribute__((aligned(128))) = {1, 2};
+  S<T> &var __attribute__((aligned(128))) = test;
 #pragma omp parallel
 #pragma omp for lastprivate(t_var, vec, s_arr, var)
   for (int i = 0; i < 2; ++i) {
@@ -54,33 +53,44 @@ using A::x;
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
   // LAMBDA-LABEL: @main
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* %{{.+}})
 #pragma omp parallel
-#pragma omp for lastprivate(g)
+#pragma omp for lastprivate(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR:%.+]])
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
-    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128
+    // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
+
     // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
     // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+
     // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
     g = 1;
+    g1 = 1;
+    sivar = 2;
     // Check for final copying of private values back to original vars.
     // LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
     // LAMBDA: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0
@@ -91,17 +101,26 @@ int main() {
     // original g=private_g;
     // LAMBDA: [[G_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
+
+    // original sivar=private_sivar;
+    // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* %{{.+}},
     // LAMBDA: br label %[[LAST_DONE]]
     // LAMBDA: [[LAST_DONE]]
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    // LAMBDA: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      g1 = 2;
+      sivar = 4;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -112,18 +131,21 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for lastprivate(g)
+#pragma omp for lastprivate(g, g1, sivar)
   for (int i = 0; i < 2; ++i) {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR:%.+]])
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
-    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128
+    // BLOCKS: [[G1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 4
+    // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_ADDR:%.+]],
+    // BLOCKS: {{.+}} = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_ADDR]]
     // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
     // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
     // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
@@ -134,6 +156,8 @@ int main() {
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
     g = 1;
+    g1 = 1;
+    sivar = 2;
     // Check for final copying of private values back to original vars.
     // BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
     // BLOCKS: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0
@@ -144,16 +168,24 @@ int main() {
     // original g=private_g;
     // BLOCKS: [[G_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
+    // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* %{{.+}},
     // BLOCKS: br label %[[LAST_DONE]]
     // BLOCKS: [[LAST_DONE]]
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    // BLOCKS: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
     g = 1;
+    g1 = 1;
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      g1 = 1;
+      sivar = 4;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -166,10 +198,11 @@ int main() {
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
 #pragma omp parallel
-#pragma omp for lastprivate(t_var, vec, s_arr, var)
+#pragma omp for lastprivate(t_var, vec, s_arr, var, sivar)
   for (int i = 0; i < 2; ++i) {
     vec[i] = t_var;
     s_arr[i] = var;
+    sivar += i;
   }
 #pragma omp parallel
 #pragma omp for lastprivate(A::x, B::x) firstprivate(f) lastprivate(f)
@@ -193,16 +226,15 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, %{{.+}}*)* [[MAIN_MICROTASK1:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, %{{.+}}*)* [[MAIN_MICROTASK2:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, %{{.+}}*)* [[MAIN_MICROTASK3:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i32*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK2:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK3:@.+]] to void
 // CHECK: = call {{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i32* noalias [[GTID_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -212,21 +244,19 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i32]*, [2 x i32]** %
+// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+
 // Check for default initialization.
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK-NOT: [[VEC_PRIV]]
-// CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_REF_PTR]],
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
 // <Skip loop body>
@@ -261,17 +291,18 @@ int main() {
 
 // original var=private_var;
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[VAR_REF]], [[S_FLOAT_TY]]* {{.*}} [[VAR_PRIV]])
+// CHECK: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_PRIV]],
 // CHECK: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 // CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 //
-// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[F_PRIV:%.+]] = alloca float,
 // CHECK-NOT: alloca float
 // CHECK: [[X_PRIV:%.+]] = alloca double,
@@ -308,11 +339,10 @@ int main() {
 // CHECK-NEXT: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK2]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[F_PRIV:%.+]] = alloca float,
 // CHECK-NOT: alloca float
 
@@ -340,11 +370,10 @@ int main() {
 // CHECK-NEXT: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
-// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK3]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[CNT_PRIV:%.+]] = alloca i8,
 
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
@@ -387,43 +416,41 @@ int main() {
 // CHECK-NEXT: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
+// CHECK: [[VAR_PRIV_REF:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+
 // Check for default initialization.
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK-NOT: [[VEC_PRIV]]
-// CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF_PTR]],
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: store [[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]** [[VAR_PRIV_REF]]
 // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
 // <Skip loop body>
 // CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
@@ -456,14 +483,15 @@ int main() {
 // CHECK: [[S_ARR_BODY_DONE]]
 
 // original var=private_var;
-// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* {{.*}} [[VAR_PRIV]])
+// CHECK: [[VAR_PRIV1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PRIV_REF]],
+// CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[VAR_REF]], [[S_INT_TY]]* {{.*}} [[VAR_PRIV1]])
 // CHECK: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 // CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 #endif
 
diff --git a/test/OpenMP/for_lastprivate_messages.cpp b/test/OpenMP/for_lastprivate_messages.cpp
index 2fa14b63bd11..79912b6efbca 100644
--- a/test/OpenMP/for_lastprivate_messages.cpp
+++ b/test/OpenMP/for_lastprivate_messages.cpp
@@ -67,7 +67,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (int k = 0; k < argc; ++k)
@@ -116,10 +116,6 @@ int foomain(int argc, char **argv) {
 #pragma omp for lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
   for (int k = 0; k < argc; ++k)
     ++k;
-#pragma omp parallel
-#pragma omp for linear(i) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp for'}}
-  for (int k = 0; k < argc; ++k)
-    ++k;
 #pragma omp parallel
   {
     int v = 0;
@@ -132,7 +128,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp for lastprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel
@@ -165,7 +161,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (i = 0; i < argc; ++i)
@@ -272,7 +268,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp for lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
@@ -283,5 +279,9 @@ int main(int argc, char **argv) {
 #pragma omp for lastprivate(n) firstprivate(n) // OK
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp for lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/for_linear_codegen.cpp b/test/OpenMP/for_linear_codegen.cpp
new file mode 100644
index 000000000000..db9788361dd0
--- /dev/null
+++ b/test/OpenMP/for_linear_codegen.cpp
@@ -0,0 +1,266 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// expected-no-diagnostics
+// REQUIRES: x86-registered-target
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  S<T> &operator=(const S<T> &);
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile int g = 1212;
+volatile int &g1 = g;
+float f;
+char cnt;
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
+// CHECK-DAG: [[F:@.+]] = global float 0.0
+// CHECK-DAG: [[CNT:@.+]] = global i8 0
+template <typename T>
+T tmain() {
+  S<T> test;
+  T *pvar = &test.f;
+  T &lvar = test.f;
+#pragma omp parallel
+#pragma omp for linear(pvar, lvar)
+  for (int i = 0; i < 2; ++i) {
+    ++pvar, ++lvar;
+  }
+  return T();
+}
+
+int main() {
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
+#pragma omp parallel
+#pragma omp for linear(g, g1:5)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // LAMBDA: [[VAL:%.+]] = load i32, i32* [[G_START_ADDR]]
+    // LAMBDA: [[CNT:%.+]] = load i32, i32*
+    // LAMBDA: [[MUL:%.+]] = mul nsw i32 [[CNT]], 5
+    // LAMBDA: [[ADD:%.+]] = add nsw i32 [[VAL]], [[MUL]]
+    // LAMBDA: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[ADD:%.+]] = add nsw i32 [[VAL]], 5
+    // LAMBDA: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
+    g += 5;
+    g1 += 5;
+    // LAMBDA: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      g1 = 2;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
+#pragma omp parallel
+#pragma omp for linear(g, g1:5)
+  for (int i = 0; i < 2; ++i) {
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // BLOCKS: [[VAL:%.+]] = load i32, i32* [[G_START_ADDR]]
+    // BLOCKS: [[CNT:%.+]] = load i32, i32*
+    // BLOCKS: [[MUL:%.+]] = mul nsw i32 [[CNT]], 5
+    // BLOCKS: [[ADD:%.+]] = add nsw i32 [[VAL]], [[MUL]]
+    // BLOCKS: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS: [[VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS: [[ADD:%.+]] = add nsw i32 [[VAL]], 5
+    // BLOCKS: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
+    g += 5;
+    g1 += 5;
+    // BLOCKS: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    g = 1;
+    g1 = 5;
+    ^{
+      // BLOCKS: define {{.+}} void {{@.+}}(i8*
+      g = 2;
+      g1 = 2;
+      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS: ret
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<float> test;
+  float *pvar = &test.f;
+  long long lvar = 0;
+#pragma omp parallel
+#pragma omp for linear(pvar, lvar : 3)
+  for (int i = 0; i < 2; ++i) {
+    pvar += 3, lvar += 3;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float**, i64*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: = call {{.+}} [[TMAIN_INT:@.+]]()
+// CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
+// CHECK: ret
+
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}})
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_START:%.+]] = alloca float*,
+// CHECK: [[LVAR_START:%.+]] = alloca i64,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_PRIV:%.+]] = alloca float*,
+// CHECK: [[LVAR_PRIV:%.+]] = alloca i64,
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// Check for default initialization.
+// CHECK: [[PVAR_REF:%.+]] = load float**, float*** %
+// CHECK: [[LVAR_REF:%.+]] = load i64*, i64** %
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_REF]],
+// CHECK: store float* [[PVAR_VAL]], float** [[PVAR_START]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_REF]],
+// CHECK: store i64 [[LVAR_VAL]], i64* [[LVAR_START]],
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 3
+// CHECK: [[IDX:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[PTR:%.+]] = getelementptr inbounds float, float* [[PVAR_VAL]], i64 [[IDX]]
+// CHECK: store float* [[PTR]], float** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 3
+// CHECK: [[CONV:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[VAL:%.+]] = add nsw i64 [[LVAR_VAL]], [[CONV]]
+// CHECK: store i64 [[VAL]], i64* [[LVAR_PRIV]],
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_PRIV]]
+// CHECK: [[PTR:%.+]] = getelementptr inbounds float, float* [[PVAR_VAL]], i64 3
+// CHECK: store float* [[PTR]], float** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3
+// CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: ret void
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, i32*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// CHECK: ret
+//
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}})
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_START:%.+]] = alloca i32*,
+// CHECK: [[LVAR_START:%.+]] = alloca i32,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_PRIV:%.+]] = alloca i32*,
+// CHECK: [[LVAR_PRIV:%.+]] = alloca i32,
+// CHECK: [[LVAR_PRIV_REF:%.+]] = alloca i32*,
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// Check for default initialization.
+// CHECK: [[PVAR_REF:%.+]] = load i32**, i32*** %
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_REF]],
+// CHECK: store i32* [[PVAR_VAL]], i32** [[PVAR_START]],
+// CHECK: [[LVAR_REF:%.+]] = load i32*, i32** %
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_REF]],
+// CHECK: store i32 [[LVAR_VAL]], i32* [[LVAR_START]],
+// CHECK: store i32* [[LVAR_PRIV]], i32** [[LVAR_PRIV_REF]],
+
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 1
+// CHECK: [[IDX:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[PTR:%.+]] = getelementptr inbounds i32, i32* [[PVAR_VAL]], i64 [[IDX]]
+// CHECK: store i32* [[PTR]], i32** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 1
+// CHECK: [[VAL:%.+]] = add nsw i32 [[LVAR_VAL]], [[MUL]]
+// CHECK: store i32 [[VAL]], i32* [[LVAR_PRIV]],
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_PRIV]]
+// CHECK: [[PTR:%.+]] = getelementptr inbounds i32, i32* [[PVAR_VAL]], i32 1
+// CHECK: store i32* [[PTR]], i32** [[PVAR_PRIV]],
+// CHECK: [[LVAR_PRIV:%.+]] = load i32*, i32** [[LVAR_PRIV_REF]],
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i32 [[LVAR_VAL]], 1
+// CHECK: store i32 [[ADD]], i32* [[LVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: ret void
+#endif
+
diff --git a/test/OpenMP/for_linear_messages.cpp b/test/OpenMP/for_linear_messages.cpp
new file mode 100644
index 000000000000..39fb21ef7be0
--- /dev/null
+++ b/test/OpenMP/for_linear_messages.cpp
@@ -0,0 +1,218 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+namespace X {
+  int x;
+};
+
+struct B {
+  static int ib; // expected-note {{'B::ib' declared here}}
+  static int bfoo() { return 8; }
+};
+
+int bfoo() { return 4; }
+
+int z;
+const int C1 = 1;
+const int C2 = 2;
+void test_linear_colons()
+{
+  int B = 0;
+  #pragma omp for linear(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+  #pragma omp for linear(B::ib:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
+  #pragma omp for linear(B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  #pragma omp for linear(z:B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp for linear(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp for linear(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp for linear(B,::z, X::x)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp for linear(::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{expected variable name}}
+  #pragma omp for linear(B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp for linear(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
+  // expected-error@+1 {{argument of a linear clause should be of integral or pointer type}}
+#pragma omp for linear(ind2:L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int ind2 = 0;
+  // expected-warning@+1 {{zero linear step (ind2 should probably be const)}}
+  #pragma omp for linear(ind2:LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return ind2;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 2 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+  #pragma omp for linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc : 5)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{linear variable with incomplete type 'S1'}}
+  // expected-error@+1 {{const-qualified variable cannot be linear}}
+  #pragma omp for linear (a, b:B::ib)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(e, g)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int v = 0;
+    int i;
+    #pragma omp for linear(v:i)
+    for (int k = 0; k < argc; ++k) { i = k; v += i; }
+  }
+  #pragma omp for linear(j)
+  for (int k = 0; k < argc; ++k) ++k;
+  int v = 0;
+  #pragma omp for linear(v:j)
+  for (int k = 0; k < argc; ++k) { ++k; v += j; }
+  #pragma omp for linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(i) ordered(1) // expected-error {{'linear' clause cannot be specified along with 'ordered' clause with a parameter}}
+  for (int k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace C {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  S4 e(4); // expected-note {{'e' defined here}}
+  S5 g(5); // expected-note {{'g' defined here}}
+  int i;
+  int &j = i;
+  #pragma omp for linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{linear variable with incomplete type 'S1'}}
+  // expected-error@+1 {{const-qualified variable cannot be linear}}
+  #pragma omp for linear(a, b)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{argument of a linear clause should be of integral or pointer type, not 'S4'}}
+  // expected-error@+1 {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
+  #pragma omp for linear(e, g)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int i;
+    #pragma omp for linear(i)
+    for (int k = 0; k < argc; ++k) ++k;
+    #pragma omp for linear(i : 4)
+    for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+  }
+  #pragma omp for linear(j)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp for linear(i) ordered(1) // expected-error {{'linear' clause cannot be specified along with 'ordered' clause with a parameter}}
+  for (int k = 0; k < argc; ++k) ++k;
+
+  foomain<int,char>(argc,argv);
+  return 0;
+}
+
diff --git a/test/OpenMP/for_loop_messages.cpp b/test/OpenMP/for_loop_messages.cpp
index 1a1315507893..895baf57e9cc 100644
--- a/test/OpenMP/for_loop_messages.cpp
+++ b/test/OpenMP/for_loop_messages.cpp
@@ -10,10 +10,13 @@ public:
 };
 
 static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
 static int globalii;
 
-register int reg0 __asm__("0");
+// Currently, we cannot use "0" for global register variables.
+// register int reg0 __asm__("0");
+int reg0;
 
 int test_iteration_spaces() {
   const int N = 100;
@@ -66,37 +69,37 @@ int test_iteration_spaces() {
     c[(int)fi] = a[(int)fi] + b[(int)fi];
   }
 #pragma omp parallel
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (int &ref = ii; ref < 10; ref++) {
   }
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (int i; i < 10; i++)
     c[i] = a[i];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (int i = 0, j = 0; i < 10; ++i)
     c[i] = a[i];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (; ii < 10; ++ii)
     c[ii] = a[ii];
 
 #pragma omp parallel
 // expected-warning@+3 {{expression result unused}}
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (ii + 1; ii < 10; ++ii)
     c[ii] = a[ii];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (c[ii] = 0; ii < 10; ++ii)
     c[ii] = a[ii];
@@ -289,7 +292,6 @@ int test_iteration_spaces() {
     c[ii] = a[ii];
 
 #pragma omp parallel
-// expected-error@+3 {{unexpected OpenMP clause 'linear' in directive '#pragma omp for'}}
 // expected-note@+2  {{defined as linear}}
 // expected-error@+2 {{loop iteration variable in the associated loop of 'omp for' directive may not be linear, predetermined as private}}
 #pragma omp for linear(ii)
@@ -308,6 +310,7 @@ int test_iteration_spaces() {
 
 #pragma omp parallel
   {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp for' directive may not be threadprivate or thread local, predetermined as private}}
 #pragma omp for
     for (sii = 0; sii < 10; sii += 1)
       c[sii] = a[sii];
@@ -447,7 +450,7 @@ int test_with_random_access_iterator() {
   for (GoodIter I = begin; I < end; ++I)
     ++I;
 #pragma omp parallel
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (GoodIter &I = begin; I < end; ++I)
     ++I;
@@ -486,7 +489,7 @@ int test_with_random_access_iterator() {
   for (begin = begin0; begin < end; ++begin)
     ++begin;
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for
   for (++begin; begin < end; ++begin)
     ++begin;
diff --git a/test/OpenMP/for_misc_messages.c b/test/OpenMP/for_misc_messages.c
index f5b87514947c..0a7cdf8fc06b 100644
--- a/test/OpenMP/for_misc_messages.c
+++ b/test/OpenMP/for_misc_messages.c
@@ -62,9 +62,8 @@ void test_non_identifiers() {
 #pragma omp for;
   for (i = 0; i < 16; ++i)
     ;
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp for' are ignored}}
 #pragma omp parallel
-// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp for'}}
-// expected-warning@+1 {{extra tokens at the end of '#pragma omp for' are ignored}}
 #pragma omp for linear(x);
   for (i = 0; i < 16; ++i)
     ;
@@ -176,17 +175,17 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for collapse(-5)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for collapse(0)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for collapse(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
@@ -195,7 +194,7 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
 // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
     for (int j = 0; j < 16; ++j)
-// expected-error@+2 {{private variable cannot be reduction}}
+// expected-error@+2 {{reduction variable must be shared}}
 // expected-error@+1 {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}}
 #pragma omp for reduction(+ : i, j)
       for (int k = 0; k < 16; ++k)
diff --git a/test/OpenMP/for_ordered_clause.cpp b/test/OpenMP/for_ordered_clause.cpp
new file mode 100644
index 000000000000..8af509ab9aa8
--- /dev/null
+++ b/test/OpenMP/for_ordered_clause.cpp
@@ -0,0 +1,120 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) {                   //expected-note 2 {{declared here}}
+#pragma omp for ordered
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered() // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+// expected-error@+2 2 {{expression is not an integral constant expression}}
+// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+#pragma omp for ordered(argc
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+#pragma omp for ordered(ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(1)) // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for', but found only 1}}
+// expected-error@+3 2 {{directive '#pragma omp for' cannot contain more than one 'ordered' clause}}
+// expected-error@+2 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+// expected-error@+1 2 {{expression is not an integral constant expression}}
+#pragma omp for ordered(foobool(argc)), ordered(true), ordered(-5)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 2 {{expression is not an integral constant expression}}
+#pragma omp for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(1)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(N-1) // expected-error 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(N) // expected-error {{argument to 'ordered' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp for ordered(2) // expected-note {{as specified in 'ordered' clause}}
+  foo();                    // expected-error {{expected 2 for loops after '#pragma omp for'}}
+#pragma omp for ordered(N) collapse(N + 2) // expected-error {{the parameter of the 'ordered' clause must be greater than or equal to the parameter of the 'collapse' clause}} expected-note {{parameter of the 'collapse' clause}} expected-error {{argument to 'ordered' clause must be a strictly positive integer value}}
+  for (int i = ST; i < N; i++)
+    for (int j = ST; j < N; j++)
+      for (int k = ST; k < N; k++)
+        foo();
+  return argc;
+}
+
+int main(int argc, char **argv) {
+#pragma omp for ordered
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp for ordered( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp for ordered() // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp for ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'ordered' clause}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp for', but found only 1}}
+#pragma omp for ordered(2 + 2))              // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}  expected-note {{as specified in 'ordered' clause}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];    // expected-error {{expected 4 for loops after '#pragma omp for', but found only 1}}
+#pragma omp for ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+3 {{expression is not an integral constant expression}}
+// expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'ordered' clause}}
+// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+#pragma omp for ordered(foobool(argc)), ordered(true), ordered(-5)
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp for ordered(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+1 {{expression is not an integral constant expression}}
+#pragma omp for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+3 {{statement after '#pragma omp for' must be a for loop}}
+// expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+#pragma omp for ordered(ordered(tmain < int, char, -1, -2 > (argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+#pragma omp for ordered(2) // expected-note {{as specified in 'ordered' clause}}
+  foo();                    // expected-error {{expected 2 for loops after '#pragma omp for'}}
+#pragma omp for ordered(0)              // expected-error {{argument to 'ordered' clause must be a strictly positive integer value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp for ordered(2) collapse(3) // expected-error {{the parameter of the 'ordered' clause must be greater than or equal to the parameter of the 'collapse' clause}} expected-note {{parameter of the 'collapse' clause}}
+  for (int i = 0; i < 10; i++)
+    for (int j = 0; j < 11; j++)
+      for (int k = 0; k < 12; k++)
+        foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
diff --git a/test/OpenMP/for_private_codegen.cpp b/test/OpenMP/for_private_codegen.cpp
index ed6d87bcbae0..e8f4d3f4c830 100644
--- a/test/OpenMP/for_private_codegen.cpp
+++ b/test/OpenMP/for_private_codegen.cpp
@@ -18,19 +18,17 @@ struct S {
 };
 
 volatile double g;
+volatile double &g1 = g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
-// CHECK: type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
   T t_var = T();
   T vec[] = {1, 2};
   S<T> s_arr[] = {1, 2};
-  S<T> var(3);
+  S<T> &var = test;
 #pragma omp parallel
 #pragma omp for private(t_var, vec, s_arr, s_arr, var, var)
   for (int i = 0; i < 2; ++i) {
@@ -41,34 +39,55 @@ T tmain() {
 }
 
 int main() {
+  static int svar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
+  static float sfvar;
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for private(g)
+#pragma omp for private(g, g1, svar, sfvar)
   for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
     g = 1;
+    g1 = 1;
+    svar = 3;
+    sfvar = 4.0;
     // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
     // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+    // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+    // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      g1 = 2;
+      svar = 4;
+      sfvar = 8.0;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
       // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+      // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+      // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+      // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+      // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
     }();
   }
   }();
@@ -78,28 +97,50 @@ int main() {
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
+  static float sfvar;
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for private(g)
+#pragma omp for private(g, g1, svar, sfvar)
   for (int i = 0; i < 2; ++i) {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // BLOCKS: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
     g = 1;
+    g1 = 1;
+    svar = 2;
+    sfvar = 3.0;
     // BLOCKS: call {{.*}}void @__kmpc_for_static_init_4(
     // BLOCKS: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+    // BLOCKS-NOT: [[SVAR]]{{[[^:word:]]}}
+    // BLOCKS: store float 3.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
+    // BLOCKS-NOT: [[SFVAR]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SVAR]]{{[[^:word:]]}}
+    // BLOCKS: float* [[SFVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SFVAR]]{{[[^:word:]]}}
     // BLOCKS: call {{.*}}void {{%.+}}(i8
     // BLOCKS: call {{.*}}void @__kmpc_for_static_fini(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      g1 = 2;
+      svar = 4;
+      sfvar = 9.0;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store double 2.0{{.+}}, double*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SVAR]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SFVAR]]{{[[^:word:]]}}
+      // BLOCKS: store float 9.0{{.+}}, float*
+      // BLOCKS-NOT: [[SFVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -110,9 +151,9 @@ int main() {
   int t_var = 0;
   int vec[] = {1, 2};
   S<float> s_arr[] = {1, 2};
-  S<float> var(3);
+  S<float> &var = test;
 #pragma omp parallel
-#pragma omp for private(t_var, vec, s_arr, s_arr, var, var)
+#pragma omp for private(t_var, vec, s_arr, s_arr, var, var, svar)
   for (int i = 0; i < 2; ++i) {
     vec[i] = t_var;
     s_arr[i] = var;
@@ -130,19 +171,19 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
@@ -161,11 +202,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
diff --git a/test/OpenMP/for_private_messages.cpp b/test/OpenMP/for_private_messages.cpp
index 225a1beb99ea..3015f819b544 100644
--- a/test/OpenMP/for_private_messages.cpp
+++ b/test/OpenMP/for_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp for private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -99,7 +99,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp for private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp for private(i)
@@ -127,7 +127,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp for private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -179,12 +179,16 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp for private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp for private(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+  static int si;
+#pragma omp for private(si) // OK
+  for(int k = 0; k < argc; ++k)
+    si = k + 1;
 
   return 0;
 }
diff --git a/test/OpenMP/for_reduction_codegen.cpp b/test/OpenMP/for_reduction_codegen.cpp
index 7de8dff31c4c..423ab3ce76d0 100644
--- a/test/OpenMP/for_reduction_codegen.cpp
+++ b/test/OpenMP/for_reduction_codegen.cpp
@@ -8,7 +8,8 @@
 #ifndef HEADER
 #define HEADER
 
-volatile double g;
+volatile double g, g_orig;
+volatile double &g1 = g_orig;
 
 template <class T>
 struct S {
@@ -22,8 +23,6 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]* }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [[S_INT_TY]]*, [[S_INT_TY]]*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]* }
 // CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
@@ -36,7 +35,8 @@ T tmain() {
   T t_var = T(), t_var1;
   T vec[] = {1, 2};
   S<T> s_arr[] = {1, 2};
-  S<T> var(3), var1;
+  S<T> &var = test;
+  S<T> var1;
 #pragma omp parallel
 #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1) nowait
   for (int i = 0; i < 2; ++i) {
@@ -59,26 +59,27 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for reduction(+:g)
+#pragma omp for reduction(+:g, g1)
     for (int i = 0; i < 2; ++i) {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* %{{.+}})
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
 
     // Reduction list for runtime.
-    // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*],
+    // LAMBDA: [[RED_LIST:%.+]] = alloca [2 x i8*],
 
     // LAMBDA: store double 0.0{{.+}}, double* [[G_PRIVATE_ADDR]]
     // LAMBDA: call void @__kmpc_for_static_init_4(
     g = 1;
+    g1 = 1;
     // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_for_static_fini(
 
-    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[RED_LIST]], i64 0, i64 0
     // LAMBDA: [[BITCAST:%.+]] = bitcast double* [[G_PRIVATE_ADDR]] to i8*
     // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // LAMBDA: call i32 @__kmpc_reduce(
@@ -104,6 +105,7 @@ int main() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      g1 = 2;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
@@ -118,18 +120,19 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp for reduction(-:g)
+#pragma omp for reduction(-:g, g1)
     for (int i = 0; i < 2; ++i)  {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* %{{.+}})
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
 
     // Reduction list for runtime.
-    // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*],
+    // BLOCKS: [[RED_LIST:%.+]] = alloca [2 x i8*],
 
     // BLOCKS: store double 0.0{{.+}}, double* [[G_PRIVATE_ADDR]]
     g = 1;
+    g1 = 1;
     // BLOCKS: call void @__kmpc_for_static_init_4(
     // BLOCKS: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
@@ -138,7 +141,7 @@ int main() {
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_for_static_fini(
 
-    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[RED_LIST]], i64 0, i64 0
     // BLOCKS: [[BITCAST:%.+]] = bitcast double* [[G_PRIVATE_ADDR]] to i8*
     // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // BLOCKS: call i32 @__kmpc_reduce(
@@ -163,6 +166,7 @@ int main() {
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      g1 = 2;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store double 2.0{{.+}}, double*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
@@ -176,13 +180,18 @@ int main() {
   float t_var = 0, t_var1;
   int vec[] = {1, 2};
   S<float> s_arr[] = {1, 2};
-  S<float> var(3), var1;
+  S<float> &var = test;
+  S<float> var1, arrs[10][4];
 #pragma omp parallel
 #pragma omp for reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
   for (int i = 0; i < 2; ++i) {
     vec[i] = t_var;
     s_arr[i] = var;
   }
+  int arr[10][vec[1]];
+#pragma omp parallel for reduction(+:arr[1][:vec[1]]) reduction(&:arrs[1:vec[1]][1:2])
+  for (int i = 0; i < 10; ++i)
+    ++arr[1][i];
   return tmain<int>();
 #endif
 }
@@ -190,13 +199,13 @@ int main() {
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i64, i64, i32*, [2 x i32]*, [10 x [4 x [[S_FLOAT_TY]]]]*)* [[MAIN_MICROTASK1:@.+]] to void
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, float* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %vec, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
@@ -207,23 +216,20 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR_REF:%.+]] = load float*, float** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
+
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
 
-// CHECK: [[VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PTR_REF:%.+]],
 // For & reduction operation initial value of private variable is ones in all bits.
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 
-// CHECK: [[VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PTR_REF:%.+]],
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]])
 
-// CHECK: [[T_VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** [[T_VAR1_PTR_REF]],
 // For min reduction operation initial value of private variable is largest repesentable value.
 // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
 
@@ -236,16 +242,16 @@ int main() {
 
 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
 
-// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 0
+// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
-// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 1
+// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
-// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 2
+// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
-// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 3
+// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
 
@@ -372,8 +378,7 @@ int main() {
 // CHECK: [[RED_DONE]]
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: ret void
 
@@ -385,38 +390,38 @@ int main() {
 // }
 // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
 // t_var_lhs = (float*)lhs[0];
-// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to float*
 // t_var_rhs = (float*)rhs[0];
-// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to float*
 
 // var_lhs = (S<float>*)lhs[1];
-// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 1
+// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_FLOAT_TY]]*
 // var_rhs = (S<float>*)rhs[1];
-// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 1
+// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_FLOAT_TY]]*
 
 // var1_lhs = (S<float>*)lhs[2];
-// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 2
+// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_FLOAT_TY]]*
 // var1_rhs = (S<float>*)rhs[2];
-// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 2
+// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_FLOAT_TY]]*
 
 // t_var1_lhs = (float*)lhs[3];
-// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to float*
 // t_var1_rhs = (float*)rhs[3];
-// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float*
 
@@ -457,15 +462,218 @@ int main() {
 // CHECK: store float [[UP]], float* [[T_VAR1_LHS]],
 // CHECK: ret void
 
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* nonnull %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [10 x [4 x [[S_FLOAT_TY]]]]* dereferenceable(160) %{{.+}})
+
+// Reduction list for runtime.
+// CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[IDX1:%.+]] = mul nsw i64 1, %{{.+}}
+// CHECK: [[LB1:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[LB1_0:%.+]] = getelementptr inbounds i32, i32* [[LB1]], i64 0
+// CHECK: [[IDX1:%.+]] = mul nsw i64 1, %{{.+}}
+// CHECK: [[UB1:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[UB1_UP:%.+]] = getelementptr inbounds i32, i32* [[UB1]], i64 %
+// CHECK: [[UB_CAST:%.+]] = ptrtoint i32* [[UB1_UP]] to i64
+// CHECK: [[LB_CAST:%.+]] = ptrtoint i32* [[LB1_0]] to i64
+// CHECK: [[DIFF:%.+]] = sub i64 [[UB_CAST]], [[LB_CAST]]
+// CHECK: [[SIZE_1:%.+]] = sdiv exact i64 [[DIFF]], ptrtoint (i32* getelementptr (i32, i32* null, i32 1) to i64)
+// CHECK: [[ARR_SIZE:%.+]] = add nuw i64 [[SIZE_1]], 1
+// CHECK: call i8* @llvm.stacksave()
+// CHECK: [[ARR_PRIV:%.+]] = alloca i32, i64 [[ARR_SIZE]],
+
+// Check initialization of private copy.
+// CHECK: [[END:%.+]] = getelementptr i32, i32* [[ARR_PRIV]], i64 [[ARR_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_PRIV]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi i32*
+// CHECK: store i32 0, i32* %
+// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// CHECK: [[ARRS_PRIV:%.+]] = alloca [[S_FLOAT_TY]], i64 [[ARRS_SIZE:%.+]],
+
+// Check initialization of private copy.
+// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_PRIV]], i64 [[ARRS_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_PRIV]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi [[S_FLOAT_TY]]*
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* %
+// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: call void @__kmpc_for_static_init_4(
+// Skip checks for internal operations.
+// CHECK: call void @__kmpc_for_static_fini(
+
+// void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+
+// CHECK: [[ARR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
+// CHECK: [[BITCAST:%.+]] = bitcast i32* [[ARR_PRIV]] to i8*
+// CHECK: store i8* [[BITCAST]], i8** [[ARR_PRIV_REF]],
+// CHECK: [[ARR_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
+// CHECK: [[BITCAST:%.+]] = inttoptr i64 [[ARR_SIZE]] to i8*
+// CHECK: store i8* [[BITCAST]], i8** [[ARR_SIZE_REF]],
+// CHECK: [[ARRS_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
+// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[ARRS_PRIV]] to i8*
+// CHECK: store i8* [[BITCAST]], i8** [[ARRS_PRIV_REF]],
+// CHECK: [[ARRS_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
+// CHECK: [[BITCAST:%.+]] = inttoptr i64 [[ARRS_SIZE]] to i8*
+// CHECK: store i8* [[BITCAST]], i8** [[ARRS_SIZE_REF]],
+
+// res = __kmpc_reduce(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>);
+
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+// CHECK: [[BITCAST:%.+]] = bitcast [4 x i8*]* [[RED_LIST]] to i8*
+// CHECK: [[RES:%.+]] = call i32 @__kmpc_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], i32 2, i64 32, i8* [[BITCAST]], void (i8*, i8*)* [[REDUCTION_FUNC:@.+]], [8 x i32]* [[REDUCTION_LOCK]])
+
+// switch(res)
+// CHECK: switch i32 [[RES]], label %[[RED_DONE:.+]] [
+// CHECK: i32 1, label %[[CASE1:.+]]
+// CHECK: i32 2, label %[[CASE2:.+]]
+// CHECK: ]
+
+// case 1:
+// CHECK: [[CASE1]]
+
+// arr[:] += arr_reduction[:];
+// CHECK: [[END:%.+]] = getelementptr i32, i32* [[LB1_0]], i64 [[ARR_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi i32*
+// CHECK: [[ADD:%.+]] = add nsw i32 %
+// CHECK: store i32 [[ADD]], i32* %
+// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// arrs[:] = var.operator &(arrs_reduction[:]);
+// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi [[S_FLOAT_TY]]*
+// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
+// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
+// CHECK: call void @__kmpc_end_reduce_nowait(%{{.+}}* [[REDUCTION_LOC]], i32 [[GTID]], [8 x i32]* [[REDUCTION_LOCK]])
+
+// break;
+// CHECK: br label %[[RED_DONE]]
+
+// case 2:
+// CHECK: [[CASE2]]
+
+// arr[:] += arr_reduction[:];
+// CHECK: [[END:%.+]] = getelementptr i32, i32* [[LB1_0]], i64 [[ARR_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[LB1_0]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi i32*
+// CHECK: atomicrmw add i32* %{{.+}}, i32 %{{.+}} monotonic
+// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// arrs[:] = var.operator &(arrs_reduction[:]);
+// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi [[S_FLOAT_TY]]*
+// CHECK: call void @__kmpc_critical(
+// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
+// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: call void @__kmpc_end_critical(
+// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// break;
+// CHECK: br label %[[RED_DONE]]
+// CHECK: [[RED_DONE]]
+
+// Check destruction of private copy.
+// CHECK: [[END:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_PRIV]], i64 [[ARRS_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_PRIV]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi [[S_FLOAT_TY]]*
+// CHECK: call void @_ZN1SIfED1Ev([[S_FLOAT_TY]]* %
+// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[ARRS_PRIV]]
+// CHECK: br i1 [[DONE]],
+// CHECK: call void @llvm.stackrestore(i8*
+
+// CHECK: ret void
+
+// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+//  *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
+//  ...
+//  *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
+//  *(Type<n>-1*)rhs[<n>-1]);
+// }
+// CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
+// arr_rhs = (int*)rhs[0];
+// CHECK: [[ARR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
+// CHECK: [[ARR_RHS_VOID:%.+]] = load i8*, i8** [[ARR_RHS_REF]],
+// CHECK: [[ARR_RHS:%.+]] = bitcast i8* [[ARR_RHS_VOID]] to i32*
+// arr_lhs = (int*)lhs[0];
+// CHECK: [[ARR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
+// CHECK: [[ARR_LHS_VOID:%.+]] = load i8*, i8** [[ARR_LHS_REF]],
+// CHECK: [[ARR_LHS:%.+]] = bitcast i8* [[ARR_LHS_VOID]] to i32*
+
+// arr_size = (size_t)lhs[1];
+// CHECK: [[ARR_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
+// CHECK: [[ARR_SIZE_VOID:%.+]] = load i8*, i8** [[ARR_SIZE_REF]],
+// CHECK: [[ARR_SIZE:%.+]] = ptrtoint i8* [[ARR_SIZE_VOID]] to i64
+
+// arrs_rhs = (S<float>*)rhs[2];
+// CHECK: [[ARRS_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
+// CHECK: [[ARRS_RHS_VOID:%.+]] = load i8*, i8** [[ARRS_RHS_REF]],
+// CHECK: [[ARRS_RHS:%.+]] = bitcast i8* [[ARRS_RHS_VOID]] to [[S_FLOAT_TY]]*
+// arrs_lhs = (S<float>*)lhs[2];
+// CHECK: [[ARRS_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
+// CHECK: [[ARRS_LHS_VOID:%.+]] = load i8*, i8** [[ARRS_LHS_REF]],
+// CHECK: [[ARRS_LHS:%.+]] = bitcast i8* [[ARRS_LHS_VOID]] to [[S_FLOAT_TY]]*
+
+// arrs_size = (size_t)lhs[3];
+// CHECK: [[ARRS_SIZE_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
+// CHECK: [[ARRS_SIZE_VOID:%.+]] = load i8*, i8** [[ARRS_SIZE_REF]],
+// CHECK: [[ARRS_SIZE:%.+]] = ptrtoint i8* [[ARRS_SIZE_VOID]] to i64
+
+// arr_lhs[:] += arr_rhs[:];
+// CHECK: [[END:%.+]] = getelementptr i32, i32* [[ARR_LHS]], i64 [[ARR_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq i32* [[ARR_LHS]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi i32*
+// CHECK: [[ADD:%.+]] = add nsw i32 %
+// CHECK: store i32 [[ADD]], i32* %
+// CHECK: [[DONE:%.+]] = icmp eq i32* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// arrs_lhs = arrs_lhs.operator &(arrs_rhs);
+// CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[ARRS_LB:%.+]], i64 [[ARRS_SIZE]]
+// CHECK: [[ISEMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[ARRS_LB]], [[END]]
+// CHECK: br i1 [[ISEMPTY]],
+// CHECK: phi [[S_FLOAT_TY]]*
+// CHECK: [[AND:%.+]] = call dereferenceable(4) [[S_FLOAT_TY]]* @_ZN1SIfEanERKS0_([[S_FLOAT_TY]]* %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}})
+// CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[AND]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* [[BITCAST]], i64 4, i32 4, i1 false)
+// CHECK: [[DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* %{{.+}}, [[END]]
+// CHECK: br i1 [[DONE]],
+
+// CHECK: ret void
+
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_TMAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}})
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -481,23 +689,20 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]],
 
-// CHECK: [[VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For & reduction operation initial value of private variable is ones in all bits.
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR_PRIV]])
 
-// CHECK: [[VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR1_PRIV]])
 
-// CHECK: [[T_VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR1_PTR_REF]],
 // For min reduction operation initial value of private variable is largest repesentable value.
 // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
 
@@ -509,16 +714,16 @@ int main() {
 
 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
 
-// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 0
+// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
-// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 1
+// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
-// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 2
+// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
-// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 3
+// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
 
@@ -616,7 +821,6 @@ int main() {
 // CHECK: [[RED_DONE]]
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
@@ -627,38 +831,38 @@ int main() {
 // }
 // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
 // t_var_lhs = (i{{[0-9]+}}*)lhs[0];
-// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}*
 // t_var_rhs = (i{{[0-9]+}}*)rhs[0];
-// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}*
 
 // var_lhs = (S<i{{[0-9]+}}>*)lhs[1];
-// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 1
+// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]*
 // var_rhs = (S<i{{[0-9]+}}>*)rhs[1];
-// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 1
+// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]*
 
 // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2];
-// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 2
+// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]*
 // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2];
-// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 2
+// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]*
 
 // t_var1_lhs = (i{{[0-9]+}}*)lhs[3];
-// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}*
 // t_var1_rhs = (i{{[0-9]+}}*)rhs[3];
-// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
 
diff --git a/test/OpenMP/for_reduction_messages.cpp b/test/OpenMP/for_reduction_messages.cpp
index 9c15e37989b2..317f88ce11ba 100644
--- a/test/OpenMP/for_reduction_messages.cpp
+++ b/test/OpenMP/for_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
 void foo() {
 }
@@ -54,6 +56,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable: no known conversion from 'int' to 'S6' for 1st argument}}
+#endif
   int a;
 
 public:
@@ -64,6 +69,8 @@ public:
 S3 h, k;
 #pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
 
+char *get();
+
 template <class T>       // expected-note {{declared here}}
 T tmain(T argc) {
   const T d = T();       // expected-note 4 {{'d' defined here}}
@@ -128,27 +135,27 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp for reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp for reduction(max : qa[1])
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp for reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp for reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp for reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -160,7 +167,7 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -184,7 +191,7 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -196,6 +203,14 @@ T tmain(T argc) {
 #pragma omp for reduction(+ : fl) // expected-error 2 {{reduction variable must be shared}}
   for (int i = 0; i < 10; ++i)
     foo();
+#pragma omp parallel private(qa)  // expected-note 2 {{defined as private}}
+#pragma omp for reduction(+ : qa[1], get()[0]) // expected-error 2 {{reduction variable must be shared}} expected-error {{expected variable name as a base of the array subscript}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(qa)
+#pragma omp for reduction(+ : qa[1], qa[0]) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
 #pragma omp parallel reduction(* : fl) // expected-note 2 {{defined as reduction}}
 #pragma omp for reduction(+ : fl)      // expected-error 2 {{reduction variable must be shared}}
   for (int i = 0; i < 10; ++i)
@@ -277,27 +292,27 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp for reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp for reduction(max : argv[1])
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp for reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp for reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp for reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -309,7 +324,7 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -341,7 +356,7 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -353,10 +368,22 @@ int main(int argc, char **argv) {
 #pragma omp for reduction(+ : fl) // expected-error {{reduction variable must be shared}}
   for (int i = 0; i < 10; ++i)
     foo();
+#pragma omp parallel private(argv)  // expected-note {{defined as private}}
+#pragma omp for reduction(+ : argv[1], get()[0]) // expected-error {{reduction variable must be shared}} expected-error {{expected variable name as a base of the array subscript}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(qa)
+#pragma omp for reduction(+ : qa[1], qa[0]) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
 #pragma omp parallel reduction(* : fl) // expected-note {{defined as reduction}}
 #pragma omp for reduction(+ : fl)      // expected-error {{reduction variable must be shared}}
   for (int i = 0; i < 10; ++i)
     foo();
+  static int m=0;
+#pragma omp for reduction(+:m)
+  for (int i = 0; i < 10; ++i)
+    m++;
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/for_schedule_messages.cpp b/test/OpenMP/for_schedule_messages.cpp
index c4490e88f02b..8946ce3aedfb 100644
--- a/test/OpenMP/for_schedule_messages.cpp
+++ b/test/OpenMP/for_schedule_messages.cpp
@@ -13,13 +13,23 @@ template <class T, typename S, int N, int ST> // expected-note {{declared here}}
 T tmain(T argc, S **argv) {
   #pragma omp for schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for schedule (monotonic // expected-error {{expected ')'}} expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-warning {{missing ':' after schedule modifier - ignoring}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic, // expected-error {{expected ')'}} expected-error {{expected 'simd' in OpenMP clause 'schedule'}} expected-warning {{missing ':' after schedule modifier - ignoring}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (simd: // expected-error {{expected ')'}} expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (monotonic, auto // expected-error {{expected ')'}} expected-warning {{missing ':' after schedule modifier - ignoring}} expected-error {{expected 'simd' in OpenMP clause 'schedule'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic: auto, // expected-error {{expected ')'}} expected-error {{'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -28,7 +38,7 @@ T tmain(T argc, S **argv) {
   // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
   #pragma omp for schedule (guided argc
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for schedule (static, ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for schedule (dynamic, 1)) // expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}
@@ -36,17 +46,35 @@ T tmain(T argc, S **argv) {
   #pragma omp for schedule (guided, (ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for schedule (static, foobool(argc)), schedule (dynamic, true), schedule (guided, -5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for schedule (static, S) // expected-error {{'S' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}
+  #pragma omp for schedule (static, S) // expected-error {{'S' does not refer to a value}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+1 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp for schedule (guided, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for schedule (dynamic, 1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for schedule (static, N) // expected-error {{argument to 'schedule' clause must be a positive integer value}}
+  #pragma omp for schedule (static, N) // expected-error {{argument to 'schedule' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic: static) // expected-error {{'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic: auto) // expected-error {{'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic: runtime) // expected-error {{'nonmonotonic' modifier can only be specified with 'dynamic' or 'guided' schedule kind}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (monotonic, nonmonotonic: auto) // expected-error {{modifier 'nonmonotonic' cannot be used along with modifier 'monotonic'}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic, monotonic: auto) // expected-error {{modifier 'monotonic' cannot be used along with modifier 'nonmonotonic'}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic, nonmonotonic: auto) // expected-error {{modifier 'nonmonotonic' cannot be used along with modifier 'nonmonotonic'}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (monotonic, monotonic: auto) // expected-error {{modifier 'monotonic' cannot be used along with modifier 'monotonic'}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for schedule (nonmonotonic: guided) ordered // expected-error {{'schedule' clause with 'nonmonotonic' modifier cannot be specified if an 'ordered' clause is specified}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for ordered(1) schedule(nonmonotonic: dynamic) // expected-error {{'schedule' clause with 'nonmonotonic' modifier cannot be specified if an 'ordered' clause is specified}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -54,13 +82,13 @@ T tmain(T argc, S **argv) {
 int main(int argc, char **argv) {
   #pragma omp for schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
@@ -73,17 +101,17 @@ int main(int argc, char **argv) {
   #pragma omp for schedule (dynamic, foobool(1) > 0 ? 1 : 2)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+2 2 {{directive '#pragma omp for' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for schedule (guided, foobool(argc)), schedule (static, true), schedule (dynamic, -5)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for schedule (guided, S1) // expected-error {{'S1' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp for' are ignored}}
+  #pragma omp for schedule (guided, S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+1 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp for schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{statement after '#pragma omp for' must be a for loop}}
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
-  #pragma omp for schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   foo();
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
   return tmain<int, char, 1, 0>(argc, argv);
diff --git a/test/OpenMP/for_simd_aligned_messages.cpp b/test/OpenMP/for_simd_aligned_messages.cpp
index d41ecc1f28ee..1007b3c545cc 100644
--- a/test/OpenMP/for_simd_aligned_messages.cpp
+++ b/test/OpenMP/for_simd_aligned_messages.cpp
@@ -50,7 +50,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
   T sum = (T)0;
   T ind2 = - num * L;
   // Negative number is passed as L.
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp for simd aligned(arr:L)
   for (i = 0; i < num; ++i) {
     T cur = arr[(int)ind2];
@@ -65,7 +65,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
 
 template<int LEN> int test_warn() {
   int *ind2 = 0;
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp for simd aligned(ind2:LEN)
   for (int i = 0; i < 100; i++) {
     ind2 += LEN;
diff --git a/test/OpenMP/for_simd_ast_print.cpp b/test/OpenMP/for_simd_ast_print.cpp
index 21c15a2fd677..725c727e261e 100644
--- a/test/OpenMP/for_simd_ast_print.cpp
+++ b/test/OpenMP/for_simd_ast_print.cpp
@@ -35,8 +35,8 @@ template<class T> struct S {
 // CHECK: T res;
 // CHECK: T val;
 // CHECK: T lin = 0;
-    #pragma omp for simd private(val)  safelen(7) linear(lin : -5) lastprivate(res)
-// CHECK-NEXT: #pragma omp for simd private(val) safelen(7) linear(lin: -5) lastprivate(res)
+    #pragma omp for simd private(val)  safelen(7) linear(lin : -5) lastprivate(res) simdlen(5)
+// CHECK-NEXT: #pragma omp for simd private(val) safelen(7) linear(lin: -5) lastprivate(res) simdlen(5)
     for (T i = 7; i < m_a; ++i) {
       val = v[i-7] + m_a;
       res = val;
@@ -44,8 +44,8 @@ template<class T> struct S {
     }
     const T clen = 3;
 // CHECK: T clen = 3;
-    #pragma omp for simd safelen(clen-1)
-// CHECK-NEXT: #pragma omp for simd safelen(clen - 1)
+    #pragma omp for simd safelen(clen-1) simdlen(clen-1)
+// CHECK-NEXT: #pragma omp for simd safelen(clen - 1) simdlen(clen - 1)
     for(T i = clen+2; i < 20; ++i) {
 // CHECK-NEXT: for (T i = clen + 2; i < 20; ++i) {
       v[i] = v[v-clen] + 1;
@@ -62,7 +62,7 @@ template<class T> struct S {
 template<int LEN> struct S2 {
   static void func(int n, float *a, float *b, float *c) {
     int k1 = 0, k2 = 0;
-#pragma omp for simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN)
+#pragma omp for simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN) simdlen(LEN)
     for(int i = 0; i < n; i++) {
       c[i] = a[i] + b[i];
       c[k1] = a[k1] + b[k1];
@@ -77,7 +77,7 @@ template<int LEN> struct S2 {
 // CHECK: template <int LEN = 4> struct S2 {
 // CHECK-NEXT: static void func(int n, float *a, float *b, float *c)     {
 // CHECK-NEXT:   int k1 = 0, k2 = 0;
-// CHECK-NEXT: #pragma omp for simd safelen(4) linear(k1,k2: 4) aligned(a: 4)
+// CHECK-NEXT: #pragma omp for simd safelen(4) linear(k1,k2: 4) aligned(a: 4) simdlen(4)
 // CHECK-NEXT:   for (int i = 0; i < n; i++) {
 // CHECK-NEXT:     c[i] = a[i] + b[i];
 // CHECK-NEXT:     c[k1] = a[k1] + b[k1];
@@ -114,8 +114,8 @@ int main (int argc, char **argv) {
 // CHECK-NEXT: foo();
   const int CLEN = 4;
 // CHECK-NEXT: const int CLEN = 4;
-  #pragma omp for simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 )
-// CHECK-NEXT: #pragma omp for simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1)
+  #pragma omp for simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 ) simdlen(CLEN)
+// CHECK-NEXT: #pragma omp for simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1) simdlen(CLEN)
   for (int i = 0; i < 10; ++i)foo();
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: foo();
diff --git a/test/OpenMP/for_simd_codegen.cpp b/test/OpenMP/for_simd_codegen.cpp
index a5644013868c..cc8193d31803 100644
--- a/test/OpenMP/for_simd_codegen.cpp
+++ b/test/OpenMP/for_simd_codegen.cpp
@@ -1,7 +1,7 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm -fexceptions -fcxx-exceptions -o - < %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t < %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify -emit-llvm -o - < %s | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm -o - < %s | FileCheck %s --check-prefix=TERM_DEBUG
 // REQUIRES: x86-registered-target
 // expected-no-diagnostics
 #ifndef HEADER
@@ -303,7 +303,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
 // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* %{{.+}},
 // CHECK-NEXT: br label
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
   int R;
   #pragma omp parallel
@@ -354,7 +354,7 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[RED:%.+]] = mul nsw i32 %{{.+}}, [[R_PRIV_VAL]]
 // CHECK-NEXT: store i32 [[RED]], i32* %{{.+}},
 // CHECK-NEXT: call void @__kmpc_end_reduce(
-// CHECK: call i32 @__kmpc_cancel_barrier(%ident_t* {{.+}}, i32 %{{.+}})
+// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
 }
 
@@ -683,4 +683,3 @@ void parallel_simd(float *a) {
 }
 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
 #endif // HEADER
-
diff --git a/test/OpenMP/for_simd_collapse_messages.cpp b/test/OpenMP/for_simd_collapse_messages.cpp
index 7bb9b04cda7a..5c9d058b975c 100644
--- a/test/OpenMP/for_simd_collapse_messages.cpp
+++ b/test/OpenMP/for_simd_collapse_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp for simd collapse (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp for simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp for simd', but found only 1}}
   // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+2 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp for simd collapse (foobool(argc)), collapse (true), collapse (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd collapse (1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd collapse (N) // expected-error {{argument to 'collapse' clause must be a positive integer value}}
+  #pragma omp for simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
   foo(); // expected-error {{expected 2 for loops after '#pragma omp for simd'}}
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp for simd collapse (foobool(argc)), collapse (true), collapse (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/for_simd_firstprivate_messages.cpp b/test/OpenMP/for_simd_firstprivate_messages.cpp
index 6b49b11eaff6..cb74ee081710 100644
--- a/test/OpenMP/for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/for_simd_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (int k = 0; k < argc; ++k)
@@ -122,7 +122,7 @@ int foomain(int argc, char **argv) {
   {
     int v = 0;
     int i;                      // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for simd' directive into a parallel or another task region?}}
-#pragma omp for simd firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp for simd firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     for (int k = 0; k < argc; ++k) {
       i = k;
       v += i;
@@ -130,7 +130,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for simd firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp for simd firstprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel
@@ -168,7 +168,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (i = 0; i < argc; ++i)
@@ -271,7 +271,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp for simd firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
@@ -286,7 +286,7 @@ int main(int argc, char **argv) {
   {
     int v = 0;
     int i;                      // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for simd' directive into a parallel or another task region?}}
-#pragma omp for simd firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp for simd firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     for (int k = 0; k < argc; ++k) {
       i = k;
       v += i;
@@ -300,6 +300,10 @@ int main(int argc, char **argv) {
 #pragma omp for simd firstprivate(i)       // expected-error {{firstprivate variable must be shared}}
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp for simd firstprivate(si)
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/for_simd_lastprivate_messages.cpp b/test/OpenMP/for_simd_lastprivate_messages.cpp
index 2aa4dd12045c..d48c07d92b39 100644
--- a/test/OpenMP/for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/for_simd_lastprivate_messages.cpp
@@ -67,7 +67,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (int k = 0; k < argc; ++k)
@@ -132,7 +132,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp for simd lastprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel
@@ -158,7 +158,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (i = 0; i < argc; ++i)
@@ -261,7 +261,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp for simd lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
@@ -272,5 +272,9 @@ int main(int argc, char **argv) {
 #pragma omp for simd lastprivate(n) firstprivate(n) // OK
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp for simd lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/for_simd_linear_messages.cpp b/test/OpenMP/for_simd_linear_messages.cpp
index 189c8b0be88e..44370a15664f 100644
--- a/test/OpenMP/for_simd_linear_messages.cpp
+++ b/test/OpenMP/for_simd_linear_messages.cpp
@@ -102,7 +102,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp for simd linear // expected-error {{expected '(' after 'linear'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -138,7 +138,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
     #pragma omp for simd linear(v:i)
     for (int k = 0; k < argc; ++k) { i = k; v += i; }
   }
-  #pragma omp for simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type}}
+  #pragma omp for simd linear(j)
   for (int k = 0; k < argc; ++k) ++k;
   int v = 0;
   #pragma omp for simd linear(v:j)
@@ -166,7 +166,7 @@ int main(int argc, char **argv) {
   S4 e(4); // expected-note {{'e' defined here}}
   S5 g(5); // expected-note {{'g' defined here}}
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp for simd linear // expected-error {{expected '(' after 'linear'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -203,7 +203,7 @@ int main(int argc, char **argv) {
     #pragma omp for simd linear(i : 4)
     for (int k = 0; k < argc; ++k) { ++k; i += 4; }
   }
-  #pragma omp for simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type 'int &'}}
+  #pragma omp for simd linear(j)
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp for simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
diff --git a/test/OpenMP/for_simd_loop_messages.cpp b/test/OpenMP/for_simd_loop_messages.cpp
index da7e6843a0c4..958c7f60d0eb 100644
--- a/test/OpenMP/for_simd_loop_messages.cpp
+++ b/test/OpenMP/for_simd_loop_messages.cpp
@@ -10,6 +10,7 @@ public:
 };
 
 static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
 static int globalii;
 
@@ -63,37 +64,37 @@ int test_iteration_spaces() {
     c[(int)fi] = a[(int)fi] + b[(int)fi];
   }
 #pragma omp parallel
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (int &ref = ii; ref < 10; ref++) {
   }
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (int i; i < 10; i++)
     c[i] = a[i];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (int i = 0, j = 0; i < 10; ++i)
     c[i] = a[i];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (; ii < 10; ++ii)
     c[ii] = a[ii];
 
 #pragma omp parallel
 // expected-warning@+3 {{expression result unused}}
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (ii + 1; ii < 10; ++ii)
     c[ii] = a[ii];
 
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (c[ii] = 0; ii < 10; ++ii)
     c[ii] = a[ii];
@@ -306,6 +307,7 @@ int test_iteration_spaces() {
 
 #pragma omp parallel
   {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp for simd' directive may not be threadprivate or thread local, predetermined as linear}}
 #pragma omp for simd
     for (sii = 0; sii < 10; sii += 1)
       c[sii] = a[sii];
@@ -430,7 +432,7 @@ int test_with_random_access_iterator() {
   for (GoodIter I = begin; I < end; ++I)
     ++I;
 #pragma omp parallel
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (GoodIter &I = begin; I < end; ++I)
     ++I;
@@ -469,7 +471,7 @@ int test_with_random_access_iterator() {
   for (begin = begin0; begin < end; ++begin)
     ++begin;
 #pragma omp parallel
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp for simd
   for (++begin; begin < end; ++begin)
     ++begin;
diff --git a/test/OpenMP/for_simd_misc_messages.c b/test/OpenMP/for_simd_misc_messages.c
index e87a78976e2c..ca1e366ca092 100644
--- a/test/OpenMP/for_simd_misc_messages.c
+++ b/test/OpenMP/for_simd_misc_messages.c
@@ -153,20 +153,117 @@ void test_safelen() {
 #pragma omp for simd safelen(foo())
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp for simd safelen(-5)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp for simd safelen(0)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp for simd safelen(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
 }
 
+void test_simdlen() {
+  int i;
+// expected-error@+1 {{expected '('}}
+#pragma omp for simd simdlen
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}
+#pragma omp for simd simdlen()
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}  expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(, )
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp for simd' are ignored}}
+// expected-error@+1 {{expected '('}}
+#pragma omp for simd simdlen 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp for simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp for simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp for simd simdlen(4, 8)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp for simd simdlen(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp for simd simdlen(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp for simd simdlen(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp for simd simdlen(0)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp for simd simdlen(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_safelen_simdlen() {
+  int i;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp for simd simdlen(6) safelen(5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp for simd safelen(5) simdlen(6)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
 void test_collapse() {
   int i;
 #pragma omp parallel
@@ -259,17 +356,17 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for simd collapse(-5)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for simd collapse(0)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp for simd collapse(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
@@ -278,7 +375,7 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
 // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for simd' directive into a parallel or another task region?}}
     for (int j = 0; j < 16; ++j)
-// expected-error@+2 {{private variable cannot be reduction}}
+// expected-error@+2 {{reduction variable must be shared}}
 // expected-error@+1 {{OpenMP constructs may not be nested inside a simd region}}
 #pragma omp for simd reduction(+ : i, j)
       for (int k = 0; k < 16; ++k)
diff --git a/test/OpenMP/for_simd_private_messages.cpp b/test/OpenMP/for_simd_private_messages.cpp
index 96885c859e99..15e235c2f8f4 100644
--- a/test/OpenMP/for_simd_private_messages.cpp
+++ b/test/OpenMP/for_simd_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp for simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -99,7 +99,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp for simd private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp for simd private(i)
@@ -120,7 +120,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp for simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -169,12 +169,16 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp for simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp for simd private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp for simd private(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+  static int m;
+#pragma omp for simd private(m)
+  for (int k = 0; k < argc; ++k)
+    m = k + 2;
 
   return 0;
 }
diff --git a/test/OpenMP/for_simd_reduction_messages.cpp b/test/OpenMP/for_simd_reduction_messages.cpp
index 74952776d508..000960fb55b5 100644
--- a/test/OpenMP/for_simd_reduction_messages.cpp
+++ b/test/OpenMP/for_simd_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
 void foo() {
 }
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -112,7 +118,7 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp for simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -128,27 +134,27 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp for simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp for simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp for simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -160,7 +166,7 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -184,7 +190,7 @@ T tmain(T argc) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -261,7 +267,7 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp for simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -277,27 +283,27 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp for simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp for simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp for simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -309,7 +315,7 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
@@ -337,7 +343,7 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel
-#pragma omp for simd reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp for simd reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -353,6 +359,10 @@ int main(int argc, char **argv) {
 #pragma omp for simd reduction(+ : fl)      // expected-error {{reduction variable must be shared}}
   for (int i = 0; i < 10; ++i)
     foo();
+  static int m;
+#pragma omp for simd reduction(+ : m)
+  for (int i = 0; i < 10; ++i)
+    m++;
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/for_simd_safelen_messages.cpp b/test/OpenMP/for_simd_safelen_messages.cpp
index 27a87b559bc9..d70e90198ad6 100644
--- a/test/OpenMP/for_simd_safelen_messages.cpp
+++ b/test/OpenMP/for_simd_safelen_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp for simd safelen (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp for simd safelen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp for simd safelen ((ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+2 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp for simd safelen (foobool(argc)), safelen (true), safelen (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd safelen (4)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd safelen (N) // expected-error {{argument to 'safelen' clause must be a positive integer value}}
+  #pragma omp for simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -61,7 +61,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+1 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp for simd safelen (foobool(argc)), safelen (true), safelen (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/for_simd_schedule_messages.cpp b/test/OpenMP/for_simd_schedule_messages.cpp
index 1367676ad58c..76ba3a42eda2 100644
--- a/test/OpenMP/for_simd_schedule_messages.cpp
+++ b/test/OpenMP/for_simd_schedule_messages.cpp
@@ -13,13 +13,13 @@ template <class T, typename S, int N, int ST> // expected-note {{declared here}}
 T tmain(T argc, S **argv) {
   #pragma omp for simd schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -28,7 +28,7 @@ T tmain(T argc, S **argv) {
   // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
   #pragma omp for simd schedule (guided argc
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for simd schedule (static, ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd schedule (dynamic, 1)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
@@ -36,17 +36,17 @@ T tmain(T argc, S **argv) {
   #pragma omp for simd schedule (guided, (ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for simd schedule (static, foobool(argc)), schedule (dynamic, true), schedule (guided, -5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd schedule (static, S) // expected-error {{'S' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
+  #pragma omp for simd schedule (static, S) // expected-error {{'S' does not refer to a value}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+1 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp for simd schedule (guided, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp for simd schedule (dynamic, 1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp for simd schedule (static, N) // expected-error {{argument to 'schedule' clause must be a positive integer value}}
+  #pragma omp for simd schedule (static, N) // expected-error {{argument to 'schedule' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -54,13 +54,13 @@ T tmain(T argc, S **argv) {
 int main(int argc, char **argv) {
   #pragma omp for simd schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
@@ -73,17 +73,17 @@ int main(int argc, char **argv) {
   #pragma omp for simd schedule (dynamic, foobool(1) > 0 ? 1 : 2)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp for simd schedule (guided, foobool(argc)), schedule (static, true), schedule (dynamic, -5)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp for simd schedule (guided, S1) // expected-error {{'S1' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
+  #pragma omp for simd schedule (guided, S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+1 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp for simd schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{statement after '#pragma omp for simd' must be a for loop}}
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
-  #pragma omp for simd schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp for simd schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   foo();
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
   return tmain<int, char, 1, 0>(argc, argv);
diff --git a/test/OpenMP/for_simd_simdlen_messages.cpp b/test/OpenMP/for_simd_simdlen_messages.cpp
new file mode 100644
index 000000000000..c72e54681192
--- /dev/null
+++ b/test/OpenMP/for_simd_simdlen_messages.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp for simd simdlen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp for simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{directive '#pragma omp for simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+2 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp for simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp for simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp for simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+1 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp for simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp for simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp for simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
diff --git a/test/OpenMP/function-attr.cpp b/test/OpenMP/function-attr.cpp
new file mode 100644
index 000000000000..87aba40f2b7d
--- /dev/null
+++ b/test/OpenMP/function-attr.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10  -stack-protector 2 -emit-llvm -o - %s | FileCheck %s
+
+// Check that function attributes are added to the OpenMP runtime functions.
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+// CHECK: define internal void @.omp.copyprivate.copy_func(i8*, i8*) [[ATTR0:#[0-9]+]] {
+
+void foo0();
+
+int foo1() {
+  char a;
+
+#pragma omp parallel
+  a = 2;
+#pragma omp single copyprivate(a)
+  foo0();
+
+  return 0;
+}
+
+// CHECK: define internal void @.omp_task_privates_map.({{.*}}) [[ATTR3:#[0-9]+]] {
+// CHECK: define internal i32 @.omp_task_entry.({{.*}}) [[ATTR0]] {
+// CHECK: define internal i32 @.omp_task_destructor.({{.*}}) [[ATTR0]] {
+
+int foo2() {
+  S<double> s_arr[] = {1, 2};
+  S<double> var(3);
+#pragma omp task private(s_arr, var)
+  s_arr[0] = var;
+  return 0;
+}
+
+// CHECK: define internal void @.omp.reduction.reduction_func(i8*, i8*) [[ATTR0]] {
+
+float foo3(int n, float *a, float *b) {
+  int i;
+  float result;
+
+#pragma omp parallel for private(i) reduction(+:result)
+  for (i=0; i < n; i++)
+    result = result + (a[i] * b[i]);
+  return result;
+}
+
+// CHECK: attributes [[ATTR0]] = {{{.*}} sspstrong {{.*}}}
+// CHECK: attributes [[ATTR3]] = {{{.*}} sspstrong {{.*}}}
diff --git a/test/OpenMP/linking.c b/test/OpenMP/linking.c
index 778216d723b8..81706d4f6201 100644
--- a/test/OpenMP/linking.c
+++ b/test/OpenMP/linking.c
@@ -69,3 +69,21 @@
 // CHECK-LD-OVERRIDE-64: "-lgomp" "-lrt" "-lgcc"
 // CHECK-LD-OVERRIDE-64: "-lpthread" "-lc"
 //
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp=libomp -target x86_64-msvc-win32 \
+// RUN:   | FileCheck --check-prefix=CHECK-MSVC-LINK-64 %s
+// CHECK-MSVC-LINK-64: link.exe
+// CHECK-MSVC-LINK-64-SAME: -nodefaultlib:vcomp.lib
+// CHECK-MSVC-LINK-64-SAME: -nodefaultlib:vcompd.lib
+// CHECK-MSVC-LINK-64-SAME: -libpath:{{.+}}/../lib
+// CHECK-MSVC-LINK-64-SAME: -defaultlib:libomp.lib
+//
+// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
+// RUN:     -fopenmp=libiomp5 -target x86_64-msvc-win32 \
+// RUN:   | FileCheck --check-prefix=CHECK-MSVC-ILINK-64 %s
+// CHECK-MSVC-ILINK-64: link.exe
+// CHECK-MSVC-ILINK-64-SAME: -nodefaultlib:vcomp.lib
+// CHECK-MSVC-ILINK-64-SAME: -nodefaultlib:vcompd.lib
+// CHECK-MSVC-ILINK-64-SAME: -libpath:{{.+}}/../lib
+// CHECK-MSVC-ILINK-64-SAME: -defaultlib:libiomp5md.lib
+//
diff --git a/test/OpenMP/master_codegen.cpp b/test/OpenMP/master_codegen.cpp
index c364c5278baf..b491fb921c5c 100644
--- a/test/OpenMP/master_codegen.cpp
+++ b/test/OpenMP/master_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
diff --git a/test/OpenMP/nesting_of_regions.cpp b/test/OpenMP/nesting_of_regions.cpp
index 8a3bacf0f115..6445c6b1c6d9 100644
--- a/test/OpenMP/nesting_of_regions.cpp
+++ b/test/OpenMP/nesting_of_regions.cpp
@@ -100,6 +100,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SIMD DIRECTIVE
 #pragma omp simd
@@ -227,6 +239,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR DIRECTIVE
 #pragma omp for
@@ -377,6 +401,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR SIMD DIRECTIVE
 #pragma omp for simd
@@ -504,6 +540,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // SECTIONS DIRECTIVE
 #pragma omp sections
@@ -661,6 +709,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp sections
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp sections
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SECTION DIRECTIVE
 #pragma omp section // expected-error {{orphaned 'omp section' directives are prohibited, it must be closely nested to a sections region}}
@@ -854,6 +914,20 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'section' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp sections
+  {
+#pragma omp section
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp sections
+  {
+#pragma omp section
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'section' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SINGLE DIRECTIVE
 #pragma omp single
@@ -994,6 +1068,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp single
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp single
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // MASTER DIRECTIVE
 #pragma omp master
@@ -1134,6 +1220,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp master
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp master
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // CRITICAL DIRECTIVE
 #pragma omp critical
@@ -1288,6 +1386,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp critical
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp critical
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // PARALLEL FOR DIRECTIVE
 #pragma omp parallel for
@@ -1443,6 +1553,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL FOR SIMD DIRECTIVE
 #pragma omp parallel for simd
@@ -1598,6 +1720,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL SECTIONS DIRECTIVE
 #pragma omp parallel sections
@@ -1744,6 +1878,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel sections
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel sections
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TASK DIRECTIVE
 #pragma omp task
@@ -1836,6 +1982,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp task
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp task
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ORDERED DIRECTIVE
 #pragma omp ordered
@@ -1976,6 +2134,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'ordered' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp ordered
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp ordered
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'ordered' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ATOMIC DIRECTIVE
 #pragma omp atomic
@@ -2145,6 +2315,22 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
     ++a;
   }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET DIRECTIVE
 #pragma omp target
@@ -2250,6 +2436,18 @@ void foo() {
 #pragma omp teams  // expected-note {{nested teams construct here}}
     ++a;
   }
+#pragma omp target
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp target
+  { 
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'target' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TEAMS DIRECTIVE
 #pragma omp target
@@ -2370,6 +2568,365 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp target
+#pragma omp teams
+  {
+#pragma omp taskloop // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp taskloop' directive into a parallel region?}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp distribute
+  for (int j = 0; j < 10; ++j)
+    ;        
+
+// TASKLOOP DIRECTIVE
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp for simd' directive into a parallel region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp sections' directive into a parallel region?}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a taskloop region}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp single' directive into a parallel region?}}
+    {
+      bar();
+    }
+  }
+
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master // expected-error {{region cannot be closely nested inside 'taskloop' region}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    {
+#pragma omp single // OK
+      {
+        bar();
+      }
+#pragma omp for // OK
+      for (int i = 0; i < 10; ++i)
+        ;
+#pragma omp sections // OK
+      {
+        bar();
+      }
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier // expected-error {{region cannot be closely nested inside 'taskloop' region}}
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+// DISTRIBUTE DIRECTIVE
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams  
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a distribute region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    {
+#pragma omp single
+      {
+	bar();
+      }
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
+    ++a;
+  }
 }
 
 void foo() {
@@ -2469,6 +3026,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SIMD DIRECTIVE
 #pragma omp simd
@@ -2589,6 +3158,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR DIRECTIVE
 #pragma omp for
@@ -2729,6 +3310,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'for' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // FOR SIMD DIRECTIVE
 #pragma omp for simd
@@ -2849,6 +3442,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // SECTIONS DIRECTIVE
 #pragma omp sections
@@ -2981,6 +3586,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp sections
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp sections
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'sections' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SECTION DIRECTIVE
 #pragma omp section // expected-error {{orphaned 'omp section' directives are prohibited, it must be closely nested to a sections region}}
@@ -3180,6 +3797,22 @@ void foo() {
       ++a;
     }
   }
+#pragma omp sections
+  {
+#pragma omp section
+    {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+      ++a;
+    }
+  }
+#pragma omp sections
+  {
+#pragma omp section
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'section' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // SINGLE DIRECTIVE
 #pragma omp single
@@ -3310,6 +3943,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp single
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp single
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'single' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // MASTER DIRECTIVE
 #pragma omp master
@@ -3450,6 +4095,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp master
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp master
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'master' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // CRITICAL DIRECTIVE
 #pragma omp critical
@@ -3609,6 +4266,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp critical
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp critical
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'critical' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // PARALLEL FOR DIRECTIVE
 #pragma omp parallel for
@@ -3764,6 +4433,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel for
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL FOR SIMD DIRECTIVE
 #pragma omp parallel for simd
@@ -3919,6 +4600,18 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside a simd region}}
     ++a;
   }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    for (int j = 0; j < 10; ++j)
+      ;
+  }
 
 // PARALLEL SECTIONS DIRECTIVE
 #pragma omp parallel sections
@@ -4061,6 +4754,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp parallel sections
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp parallel sections
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'parallel sections' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TASK DIRECTIVE
 #pragma omp task
@@ -4152,6 +4857,18 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp task
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp task
+  {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'task' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // ATOMIC DIRECTIVE
 #pragma omp atomic
@@ -4321,6 +5038,22 @@ void foo() {
 #pragma omp teams // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
     ++a;
   }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp taskloop // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp atomic
+  // expected-error@+2 {{the statement for 'atomic' must be an expression statement of form '++x;', '--x;', 'x++;', 'x--;', 'x binop= expr;', 'x = x binop expr' or 'x = expr binop x', where x is an l-value expression with scalar type}}
+  // expected-note@+1 {{expected an expression statement}}
+  {
+#pragma omp distribute // expected-error {{OpenMP constructs may not be nested inside an atomic region}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TARGET DIRECTIVE
 #pragma omp target
@@ -4426,6 +5159,18 @@ void foo() {
 #pragma omp teams // expected-note {{nested teams construct here}}
     ++a;
   }
+#pragma omp target
+  {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp target
+  { 
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'target' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
 
 // TEAMS DIRECTIVE
 #pragma omp target
@@ -4546,6 +5291,366 @@ void foo() {
 #pragma omp teams // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
     ++a;
   }
+#pragma omp target
+#pragma omp teams
+  {
+#pragma omp taskloop // expected-error {{region cannot be closely nested inside 'teams' region; perhaps you forget to enclose 'omp taskloop' directive into a parallel region?}}
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp distribute
+  for (int j = 0; j < 10; ++j)
+    ;
+
+// TASKLOOP DIRECTIVE
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp for simd' directive into a parallel region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp sections' directive into a parallel region?}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a taskloop region}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp single' directive into a parallel region?}}
+    {
+      bar();
+    }
+  }
+
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master // expected-error {{region cannot be closely nested inside 'taskloop' region}}
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    {
+#pragma omp single // OK
+      {
+        bar();
+      }
+#pragma omp for // OK
+      for (int i = 0; i < 10; ++i)
+        ;
+#pragma omp sections // OK
+      {
+        bar();
+      }
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task
+    {
+      bar();
+    }
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier // expected-error {{region cannot be closely nested inside 'taskloop' region}}
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
+    bar();
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{region cannot be closely nested inside 'taskloop' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
+    ++a;
+  }
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    ++a;
+  }
+
+// DISTRIBUTE DIRECTIVE
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp distribute // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp distribute' directive into a teams region?}}
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams  
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp sections
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp section // expected-error {{'omp section' directive must be closely nested to a sections region, not a distribute region}}
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp single
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp master
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp critical
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel
+    {
+#pragma omp single
+      {
+	bar();
+      }
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel for simd
+    for (int i = 0; i < 10; ++i)
+      ;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp parallel sections
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp task
+    {
+      bar();
+    }
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskyield
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp barrier
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp taskwait
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp flush
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp ordered // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp ordered' directive into a for or a parallel for region with 'ordered' clause?}}
+    bar();
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp atomic
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp target
+    ++a;
+  }
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute
+  for (int i = 0; i < 10; ++i) {
+#pragma omp teams // expected-error {{region cannot be closely nested inside 'distribute' region; perhaps you forget to enclose 'omp teams' directive into a target region?}}
+    ++a;
+  }
   return foo<int>();
 }
 
diff --git a/test/OpenMP/openmp_check.cpp b/test/OpenMP/openmp_check.cpp
new file mode 100644
index 000000000000..c9b5eb0b9cba
--- /dev/null
+++ b/test/OpenMP/openmp_check.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+int nested(int a) {
+#pragma omp parallel
+  ++a;
+
+  auto F = [&]() { // expected-error {{expected expression}} expected-error {{expected ';' at end of declaration}} expected-warning {{'auto' type specifier is a C++11 extension}}
+#pragma omp parallel
+    {
+#pragma omp target
+      ++a;
+    }
+  };
+  F(); // expected-error {{C++ requires a type specifier for all declarations}}
+  return a; // expected-error {{expected unqualified-id}}
+}// expected-error {{extraneous closing brace ('}')}}
diff --git a/test/OpenMP/ordered_ast_print.cpp b/test/OpenMP/ordered_ast_print.cpp
index 0006080b2082..97fe7007e262 100644
--- a/test/OpenMP/ordered_ast_print.cpp
+++ b/test/OpenMP/ordered_ast_print.cpp
@@ -8,7 +8,7 @@
 
 void foo() {}
 
-template <class T>
+template <class T, int N>
 T tmain (T argc) {
   T b = argc, c, d, e, f, g;
   static T a;
@@ -18,6 +18,36 @@ T tmain (T argc) {
   {
     a=2;
   }
+  #pragma omp for ordered
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered threads
+  {
+    a=2;
+  }
+  #pragma omp simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp for simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp parallel for simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp parallel for ordered(1)
+  for (int i =0 ; i < argc; ++i) {
+  #pragma omp ordered depend(source)
+  #pragma omp ordered depend(sink:i+N)
+    a = 2;
+  }
   return (0);
 }
 
@@ -28,7 +58,36 @@ T tmain (T argc) {
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
-
+// CHECK-NEXT: #pragma omp for ordered
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered threads
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for ordered(1)
+// CHECK-NEXT: for (int i = 0; i < argc; ++i) {
+// CHECK-NEXT: #pragma omp ordered depend(source)
+// CHECK-NEXT: #pragma omp ordered depend(sink : i + 3)
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
 // CHECK: static T a;
 // CHECK-NEXT: #pragma omp for ordered
 // CHECK-NEXT: for (int i = 0; i < argc; ++i)
@@ -36,7 +95,38 @@ T tmain (T argc) {
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp for ordered
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered threads
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for ordered(1)
+// CHECK-NEXT: for (int i = 0; i < argc; ++i) {
+// CHECK-NEXT: #pragma omp ordered depend(source)
+// CHECK-NEXT: #pragma omp ordered depend(sink : i + N)
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
 
+// CHECK-LABEL: int main(
 int main (int argc, char **argv) {
   int b = argc, c, d, e, f, g;
   static int a;
@@ -47,13 +137,73 @@ int main (int argc, char **argv) {
   {
     a=2;
   }
+  #pragma omp for ordered
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered threads
+  {
+    a=2;
+  }
+  #pragma omp simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp for simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp parallel for simd
+  for (int i =0 ; i < argc; ++i)
+  #pragma omp ordered simd
+  {
+    a=2;
+  }
+  #pragma omp parallel for ordered(1)
+  for (int i =0 ; i < argc; ++i) {
+  #pragma omp ordered depend(source)
+  #pragma omp ordered depend(sink: i - 5)
+    a = 2;
+  }
 // CHECK-NEXT: #pragma omp for ordered
 // CHECK-NEXT: for (int i = 0; i < argc; ++i)
 // CHECK-NEXT: #pragma omp ordered
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
-  return tmain(argc);
+// CHECK-NEXT: #pragma omp for ordered
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered threads
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for simd
+// CHECK-NEXT: for (int i = 0; i < argc; ++i)
+// CHECK-NEXT: #pragma omp ordered simd
+// CHECK-NEXT: {
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+// CHECK-NEXT: #pragma omp parallel for ordered(1)
+// CHECK-NEXT: for (int i = 0; i < argc; ++i) {
+// CHECK-NEXT: #pragma omp ordered depend(source)
+// CHECK-NEXT: #pragma omp ordered depend(sink : i - 5)
+// CHECK-NEXT: a = 2;
+// CHECK-NEXT: }
+  return tmain<int, 3>(argc);
 }
 
 #endif
diff --git a/test/OpenMP/ordered_codegen.cpp b/test/OpenMP/ordered_codegen.cpp
index ff8a8047cae4..e77c1bed97ca 100644
--- a/test/OpenMP/ordered_codegen.cpp
+++ b/test/OpenMP/ordered_codegen.cpp
@@ -92,7 +92,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
 // CHECK-NOT: !llvm.mem.parallel_loop_access
 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // ... end of ordered region ...
-    #pragma omp ordered
+    #pragma omp ordered threads
     a[i] = b[i] * c[i] * d[i];
 // CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
 // CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
@@ -197,7 +197,7 @@ void runtime(float *a, float *b, float *c, float *d) {
 // CHECK-NOT: !llvm.mem.parallel_loop_access
 // CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
 // ... end of ordered region ...
-    #pragma omp ordered
+    #pragma omp ordered threads
     a[i] = b[i] * c[i] * d[i];
 // CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
 // CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
@@ -213,5 +213,22 @@ void runtime(float *a, float *b, float *c, float *d) {
 // CHECK: ret void
 }
 
+float f[10];
+// CHECK-LABEL: foo_simd
+void foo_simd(int low, int up) {
+  // CHECK: store float 0.000000e+00, float* %{{.+}}, align {{[0-9]+}}, !llvm.mem.parallel_loop_access !
+  // CHECK-NEXT: call void [[CAP_FUNC:@.+]](i32* %{{.+}}) #{{[0-9]+}}, !llvm.mem.parallel_loop_access !
+#pragma omp simd
+  for (int i = low; i < up; ++i) {
+    f[i] = 0.0;
+#pragma omp ordered simd
+    f[i] = 1.0;
+  }
+}
+
+// CHECK: define internal void [[CAP_FUNC]](i32* dereferenceable({{[0-9]+}}) %{{.+}}) #
+// CHECK: store float 1.000000e+00, float* %{{.+}}, align
+// CHECK-NEXT: ret void
+
 #endif // HEADER
 
diff --git a/test/OpenMP/ordered_messages.cpp b/test/OpenMP/ordered_messages.cpp
index 72e59b7c7646..36f9bb2c7777 100644
--- a/test/OpenMP/ordered_messages.cpp
+++ b/test/OpenMP/ordered_messages.cpp
@@ -4,6 +4,7 @@ int foo();
 
 template <class T>
 T foo() {
+ T k;
   #pragma omp for ordered
   for (int i = 0; i < 10; ++i) {
     L1:
@@ -24,11 +25,115 @@ T foo() {
       foo();
     }
   }
-
+  #pragma omp for ordered
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads threads // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'threads' clause}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for ordered(1) // expected-note {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for ordered(1) // expected-note {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{'ordered' directive with 'threads' clause cannot be closely nested inside ordered region with specified parameter}}
+    {
+      foo();
+    }
+  }
+  #pragma omp ordered simd simd // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'simd' clause}}
+  {
+    foo();
+  }
+  #pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+    #pragma omp ordered depend(source) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  }
+#pragma omp parallel for ordered
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered depend(source) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
+    #pragma omp ordered depend(sink : i) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
+  }
+#pragma omp parallel for ordered(2) // expected-note 5 {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < 10; ++j) {
+#pragma omp ordered depend // expected-error {{expected '(' after 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend( // expected-error {{expected ')'}} expected-error {{expected 'source' or 'sink' in OpenMP clause 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}} expected-warning {{missing ':' or ')' after dependency type - ignoring}} expected-note {{to match this '('}}
+#pragma omp ordered depend(source // expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp ordered depend(sink // expected-error {{expected expression}} expected-warning {{missing ':' or ')' after dependency type - ignoring}} expected-error {{expected ')'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}} expected-note {{to match this '('}}
+#pragma omp ordered depend(sink : // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend(sink : i // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(sink : i) // expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(source)
+                           if (i == j)
+#pragma omp ordered depend(source) // expected-error {{'#pragma omp ordered' with 'depend' clause cannot be an immediate substatement}}
+                             ;
+                           if (i == j)
+#pragma omp ordered depend(sink : i, j) // expected-error {{'#pragma omp ordered' with 'depend' clause cannot be an immediate substatement}}
+                             ;
+#pragma omp ordered depend(source) threads // expected-error {{'depend' clauses cannot be mixed with 'threads' clause}}
+#pragma omp ordered simd depend(source) // expected-error {{'depend' clauses cannot be mixed with 'simd' clause}}
+#pragma omp ordered depend(source) depend(source) // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'depend' clause with 'source' dependence}}
+#pragma omp ordered depend(in : i) // expected-error {{expected 'source' or 'sink' in OpenMP clause 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend(sink : i, j)
+#pragma omp ordered depend(sink : j, i) // expected-error {{expected 'i' loop iteration variable}} expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(sink : i, j, k) // expected-error {{unexpected expression: number of expressions is larger than the number of associated loops}}
+#pragma omp ordered depend(sink : i+foo(), j/4) // expected-error {{expression is not an integral constant expression}} expected-error {{expected '+' or '-' operation}}
+#pragma omp ordered depend(sink : i*0, j-4)// expected-error {{expected '+' or '-' operation}}
+#pragma omp ordered depend(sink : i-0, j+sizeof(T)) depend(sink : i-0, j+sizeof(T))
+#pragma omp ordered depend(sink : i-0, j+sizeof(T)) depend(source) // expected-error {{'depend(source)' clause cannot be mixed with 'depend(sink:vec)' clauses}}
+#pragma omp ordered depend(source) depend(sink : i-0, j+sizeof(T)) // expected-error {{'depend(sink:vec)' clauses cannot be mixed with 'depend(source)' clause}}
+    }
+  }
   return T();
 }
 
 int foo() {
+int k;
   #pragma omp for ordered
   for (int i = 0; i < 10; ++i) {
     L1:
@@ -49,6 +154,110 @@ int foo() {
       foo();
     }
   }
+  #pragma omp for ordered
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads threads // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'threads' clause}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for ordered(1) // expected-note {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for ordered(1) // expected-note {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{'ordered' directive with 'threads' clause cannot be closely nested inside ordered region with specified parameter}}
+    {
+      foo();
+    }
+  }
+  #pragma omp ordered simd simd // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'simd' clause}}
+  {
+    foo();
+  }
+  #pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+  }
+  #pragma omp parallel for simd
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered threads // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+    {
+      foo();
+    }
+    #pragma omp ordered depend(source) // expected-error {{OpenMP constructs may not be nested inside a simd region}}
+  }
+#pragma omp parallel for ordered
+  for (int i = 0; i < 10; ++i) {
+    #pragma omp ordered depend(source) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
+    #pragma omp ordered depend(sink : i) // expected-error {{'ordered' directive with 'depend' clause cannot be closely nested inside ordered region without specified parameter}}
+  }
+#pragma omp parallel for ordered(2) // expected-note 5 {{'ordered' clause with specified parameter}}
+  for (int i = 0; i < 10; ++i) {
+    for (int j = 0; j < 10; ++j) {
+#pragma omp ordered depend // expected-error {{expected '(' after 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend( // expected-error {{expected ')'}} expected-error {{expected 'source' or 'sink' in OpenMP clause 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}} expected-warning {{missing ':' or ')' after dependency type - ignoring}} expected-note {{to match this '('}}
+#pragma omp ordered depend(source // expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp ordered depend(sink // expected-error {{expected expression}} expected-warning {{missing ':' or ')' after dependency type - ignoring}} expected-error {{expected ')'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}} expected-note {{to match this '('}}
+#pragma omp ordered depend(sink : // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend(sink : i // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(sink : i) // expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(source)
+                           if (i == j)
+#pragma omp ordered depend(source) // expected-error {{'#pragma omp ordered' with 'depend' clause cannot be an immediate substatement}}
+                             ;
+                           if (i == j)
+#pragma omp ordered depend(sink : i, j) // expected-error {{'#pragma omp ordered' with 'depend' clause cannot be an immediate substatement}}
+                             ;
+#pragma omp ordered depend(source) threads // expected-error {{'depend' clauses cannot be mixed with 'threads' clause}}
+#pragma omp ordered simd depend(source) // expected-error {{'depend' clauses cannot be mixed with 'simd' clause}}
+#pragma omp ordered depend(source) depend(source) // expected-error {{directive '#pragma omp ordered' cannot contain more than one 'depend' clause with 'source' dependence}}
+#pragma omp ordered depend(in : i) // expected-error {{expected 'source' or 'sink' in OpenMP clause 'depend'}} expected-error {{'ordered' directive without any clauses cannot be closely nested inside ordered region with specified parameter}}
+#pragma omp ordered depend(sink : i, j)
+#pragma omp ordered depend(sink : j, i) // expected-error {{expected 'i' loop iteration variable}} expected-error {{expected 'j' loop iteration variable}}
+#pragma omp ordered depend(sink : i, j, k) // expected-error {{unexpected expression: number of expressions is larger than the number of associated loops}}
+#pragma omp ordered depend(sink : i+foo(), j/4) // expected-error {{expression is not an integral constant expression}} expected-error {{expected '+' or '-' operation}}
+#pragma omp ordered depend(sink : i*0, j-4)// expected-error {{expected '+' or '-' operation}}
+#pragma omp ordered depend(sink : i-0, j+sizeof(int)) depend(sink : i-0, j+sizeof(int))
+#pragma omp ordered depend(sink : i-0, j+sizeof(int)) depend(source) // expected-error {{'depend(source)' clause cannot be mixed with 'depend(sink:vec)' clauses}}
+#pragma omp ordered depend(source) depend(sink : i-0, j+sizeof(int)) // expected-error {{'depend(sink:vec)' clauses cannot be mixed with 'depend(source)' clause}}
+    }
+  }
 
-  return foo<int>();
+  return foo<int>(); // expected-note {{in instantiation of function template specialization 'foo<int>' requested here}}
 }
diff --git a/test/OpenMP/parallel_ast_print.cpp b/test/OpenMP/parallel_ast_print.cpp
index c3bd416a6d5f..1e46fba48ca3 100644
--- a/test/OpenMP/parallel_ast_print.cpp
+++ b/test/OpenMP/parallel_ast_print.cpp
@@ -33,11 +33,12 @@ T tmain(T argc, T *argv) {
   T b = argc, c, d, e, f, g;
   static T a;
   S<T> s;
+  T arr[C][10], arr1[C];
 #pragma omp parallel
   a=2;
-#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+:c) reduction(max:e)
+#pragma omp parallel default(none), private(argc,b) firstprivate(argv) shared (d) if (parallel:argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+:c, arr1[argc]) reduction(max:e, arr[:C][0:10])
   foo();
-#pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f) reduction(&& : g)
+#pragma omp parallel if (C) num_threads(s) proc_bind(close) reduction(^:e, f, arr[0:C][:argc]) reduction(&& : g)
   foo();
   return 0;
 }
@@ -46,31 +47,34 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: int b = argc, c, d, e, f, g;
 // CHECK-NEXT: static int a;
 // CHECK-NEXT: S<int> s;
+// CHECK-NEXT: int arr[5][10], arr1[5];
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:5][0:10])
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f) reduction(&&: g)
+// CHECK-NEXT: #pragma omp parallel if(5) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:5][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
 // CHECK: template <typename T = long, int C = 1> long tmain(long argc, long *argv) {
 // CHECK-NEXT: long b = argc, c, d, e, f, g;
 // CHECK-NEXT: static long a;
 // CHECK-NEXT: S<long> s;
+// CHECK-NEXT: long arr[1][10], arr1[1];
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:1][0:10])
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f) reduction(&&: g)
+// CHECK-NEXT: #pragma omp parallel if(1) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:1][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T b = argc, c, d, e, f, g;
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
+// CHECK-NEXT: T arr[C][10], arr1[C];
 // CHECK-NEXT: #pragma omp parallel
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c,arr1[argc]) reduction(max: e,arr[:C][0:10])
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f) reduction(&&: g)
+// CHECK-NEXT: #pragma omp parallel if(C) num_threads(s) proc_bind(close) reduction(^: e,f,arr[0:C][:argc]) reduction(&&: g)
 // CHECK-NEXT: foo()
 
 enum Enum { };
@@ -80,17 +84,40 @@ int main (int argc, char **argv) {
   int b = argc, c, d, e, f, g;
   static int a;
   #pragma omp threadprivate(a)
+  int arr[10][argc], arr1[2];
   Enum ee;
 // CHECK: Enum ee;
 #pragma omp parallel
 // CHECK-NEXT: #pragma omp parallel
   a=2;
 // CHECK-NEXT: a = 2;
-#pragma omp parallel default(none), private(argc,b) firstprivate(argv) if (argc > 0) num_threads(ee) copyin(a) proc_bind(spread) reduction(| : c, d) reduction(* : e)
-// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) if(argc > 0) num_threads(ee) copyin(a) proc_bind(spread) reduction(|: c,d) reduction(*: e)
+#pragma omp parallel default(none), private(argc,b) firstprivate(argv) if (parallel: argc > 0) num_threads(ee) copyin(a) proc_bind(spread) reduction(| : c, d, arr1[argc]) reduction(* : e, arr[:10][0:argc])
+// CHECK-NEXT: #pragma omp parallel default(none) private(argc,b) firstprivate(argv) if(parallel: argc > 0) num_threads(ee) copyin(a) proc_bind(spread) reduction(|: c,d,arr1[argc]) reduction(*: e,arr[:10][0:argc])
   foo();
 // CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp parallel if(b) num_threads(c) proc_bind(close) reduction(^: e,f) reduction(&&: g,arr[0:argc][:10])
+// CHECK-NEXT: foo()
+#pragma omp parallel if (b) num_threads(c) proc_bind(close) reduction(^:e, f) reduction(&& : g, arr[0:argc][:10])
+  foo();
   return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
 }
 
+template <class T>
+struct Foo {
+  int foo;
+};
+
+void foo(const Foo<int> &arg) {
+// CHECK: #pragma omp parallel
+#pragma omp parallel
+  {
+// CHECK: #pragma omp for schedule(static)
+#pragma omp for schedule(static)
+    for (int idx = 0; idx < 1234; ++idx) {
+      //arg.foo = idx;
+      idx = arg.foo;
+    }
+  }
+}
+
 #endif
diff --git a/test/OpenMP/parallel_codegen.cpp b/test/OpenMP/parallel_codegen.cpp
index 6486e44a9dac..04c5c2c85ae2 100644
--- a/test/OpenMP/parallel_codegen.cpp
+++ b/test/OpenMP/parallel_codegen.cpp
@@ -1,18 +1,14 @@
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -triple %itanium_abi_triple -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
 #define HEADER
 // CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: %struct.anon = type { i32* }
-// CHECK-DAG: %struct.anon.0 = type { i8*** }
 // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
 // CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CHECK-DEBUG-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
-// CHECK-DEBUG-DAG: %struct.anon = type { i32* }
-// CHECK-DEBUG-DAG: %struct.anon.0 = type { i8*** }
 // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
 // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 // CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+14]];9;;\00"
@@ -35,56 +31,42 @@ int main (int argc, char **argv) {
 }
 
 // CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv)
-// CHECK:       [[AGG_CAPTURED:%.+]] = alloca %struct.anon
-// CHECK:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK-NEXT:  store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]]
-// CHECK-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
-// CHECK-NEXT:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]],
+// CHECK:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]])
 // CHECK-NEXT:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
 // CHECK-NEXT:  [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
 // CHECK-NEXT:  ret i32 [[RET]]
 // CHECK-NEXT:  }
 // CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv)
-// CHECK-DEBUG-DAG:   [[AGG_CAPTURED:%.+]] = alloca %struct.anon
-// CHECK-DEBUG-DAG:   [[LOC_2_ADDR:%.+]] = alloca %ident_t
+// CHECK-DEBUG:       [[LOC_2_ADDR:%.+]] = alloca %ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 ptrtoint (%ident_t* getelementptr (%ident_t, %ident_t* null, i32 1) to i64), i32 8, i1 false)
-// CHECK-DEBUG:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK-DEBUG-NEXT:  store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]]
-// CHECK-DEBUG-NEXT:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
+// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false)
+// CHECK-DEBUG:       store i32 %argc, i32* [[ARGC_ADDR:%.+]],
+// CHECK-DEBUG:       [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
 // CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
-// CHECK-DEBUG-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8*
-// CHECK-DEBUG-NEXT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK-DEBUG:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]])
 // CHECK-DEBUG-NEXT:  [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}}
 // CHECK-DEBUG-NEXT:  [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]])
 // CHECK-DEBUG-NEXT:  ret i32 [[RET]]
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
-// CHECK:       #[[FN_ATTRS:[0-9]+]]
-// CHECK:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon*
-// CHECK:       store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]]
-// CHECK:       [[CONTEXT_PTR:%.+]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR]]
-// CHECK-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CONTEXT_PTR]], i32 0, i32 0
-// CHECK-NEXT:  [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]]
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) [[ARGC_ADDR:%[^)]+]])
+// CHECK-SAME:       #[[FN_ATTRS:[0-9]+]]
+// CHECK:       store i32* [[ARGC_ADDR]], i32** [[ARGC_PTR_ADDR:%.+]],
+// CHECK:       [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_ADDR]]
 // CHECK-NEXT:  [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]]
 // CHECK-NEXT:  invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]])
-// CHECK:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK:       ret void
 // CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
-// CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context)
-// CHECK-DEBUG:       #[[FN_ATTRS:[0-9]+]]
-// CHECK-DEBUG:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon*
-// CHECK-DEBUG:       store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]]
-// CHECK-DEBUG:       [[CONTEXT_PTR:%.+]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR]]
-// CHECK-DEBUG-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CONTEXT_PTR]], i32 0, i32 0
-// CHECK-DEBUG-NEXT:  [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]]
+// CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) [[ARGC_ADDR:%[^)]+]])
+// CHECK-DEBUG-SAME:  #[[FN_ATTRS:[0-9]+]]
+// CHECK-DEBUG:       store i32* [[ARGC_ADDR]], i32** [[ARGC_PTR_ADDR:%.+]],
+// CHECK-DEBUG:  [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_ADDR]]
 // CHECK-DEBUG-NEXT:  [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]]
 // CHECK-DEBUG-NEXT:  invoke void [[FOO:@.+foo.+]](i32 [[ARGC]])
-// CHECK-DEBUG:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK-DEBUG:       ret void
 // CHECK-DEBUG:       call void @{{.+terminate.*|abort}}(
 // CHECK-DEBUG-NEXT:  unreachable
@@ -96,50 +78,36 @@ int main (int argc, char **argv) {
 // CHECK-DEBUG-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
 // CHECK:       define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc)
-// CHECK:       [[AGG_CAPTURED:%.+]] = alloca %struct.anon.0
-// CHECK:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK-NEXT:  store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]]
-// CHECK-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8*
-// CHECK-NEXT:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK:       store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
+// CHECK-NEXT:  call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]])
 // CHECK-NEXT:  ret i32 0
 // CHECK-NEXT:  }
 // CHECK-DEBUG:       define linkonce_odr i32 [[TMAIN]](i8** %argc)
-// CHECK-DEBUG-DAG:   [[AGG_CAPTURED:%.+]] = alloca %struct.anon.0
 // CHECK-DEBUG-DAG:   [[LOC_2_ADDR:%.+]] = alloca %ident_t
 // CHECK-DEBUG:       [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8*
 // CHECK-DEBUG-NEXT:  [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8*
-// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 ptrtoint (%ident_t* getelementptr (%ident_t, %ident_t* null, i32 1) to i64), i32 8, i1 false)
-// CHECK-DEBUG:       [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0
-// CHECK-DEBUG-NEXT:  store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]]
-// CHECK-DEBUG-NEXT:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
+// CHECK-DEBUG-NEXT:   call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false)
+// CHECK-DEBUG-NEXT:  store i8** %argc, i8*** [[ARGC_ADDR:%.+]],
+// CHECK-DEBUG:  [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4
 // CHECK-DEBUG-NEXT:  store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]]
-// CHECK-DEBUG-NEXT:  [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8*
-// CHECK-DEBUG-NEXT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]])
+// CHECK-DEBUG-NEXT:  call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]])
 // CHECK-DEBUG-NEXT:  ret i32 0
 // CHECK-DEBUG-NEXT:  }
 
-// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
-// CHECK:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0*
-// CHECK:       store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]]
-// CHECK:       [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]]
-// CHECK-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[CONTEXT_PTR]], i32 0, i32 0
-// CHECK-NEXT:  [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]]
+// CHECK:       define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc)
+// CHECK:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
+// CHECK:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
 // CHECK-NEXT:  [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
 // CHECK-NEXT:  invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]])
-// CHECK:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK:       ret void
 // CHECK:       call {{.*}}void @{{.+terminate.*|abort}}(
 // CHECK-NEXT:  unreachable
 // CHECK-NEXT:  }
-// CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context)
-// CHECK-DEBUG:       [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0*
-// CHECK-DEBUG:       store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]]
-// CHECK-DEBUG:       [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]]
-// CHECK-DEBUG-NEXT:  [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[CONTEXT_PTR]], i32 0, i32 0
-// CHECK-DEBUG-NEXT:  [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]]
+// CHECK-DEBUG:       define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable({{4|8}}) %argc)
+// CHECK-DEBUG:       store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]],
+// CHECK-DEBUG:       [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]]
 // CHECK-DEBUG-NEXT:  [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]]
 // CHECK-DEBUG-NEXT:  invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]])
-// CHECK-DEBUG:       call {{.+}} @__kmpc_cancel_barrier(
 // CHECK-DEBUG:       ret void
 // CHECK-DEBUG:       call void @{{.+terminate.*|abort}}(
 // CHECK-DEBUG-NEXT:  unreachable
diff --git a/test/OpenMP/parallel_copyin_codegen.cpp b/test/OpenMP/parallel_copyin_codegen.cpp
index 4aacb809573f..ff76cfe4dd6c 100644
--- a/test/OpenMP/parallel_copyin_codegen.cpp
+++ b/test/OpenMP/parallel_copyin_codegen.cpp
@@ -11,13 +11,12 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=TLS-LAMBDA %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple %itanium_abi_triple -emit-llvm %s -o - | FileCheck -check-prefix=TLS-BLOCKS %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck -check-prefix=TLS-ARRAY %s
-// REQUIRES: tls
 // expected-no-diagnostics
 #ifndef ARRAY
 #ifndef HEADER
 #define HEADER
 
-volatile int g = 1212;
+volatile int g __attribute__((aligned(128))) = 1212;
 #pragma omp threadprivate(g)
 
 template <class T>
@@ -58,10 +57,10 @@ template <typename T>
 T tmain() {
   S<T> test;
   test = S<T>();
-  static T t_var = 333;
-  static T vec[] = {3, 3};
-  static S<T> s_arr[] = {1, 2};
-  static S<T> var(3);
+  static T t_var __attribute__((aligned(128))) = 333;
+  static T vec[] __attribute__((aligned(128))) = {3, 3};
+  static S<T> s_arr[] __attribute__((aligned(128))) = {1, 2};
+  static S<T> var __attribute__((aligned(128))) (3);
 #pragma omp threadprivate(t_var, vec, s_arr, var)
 #pragma omp parallel copyin(t_var, vec, s_arr, var)
   {
@@ -78,28 +77,24 @@ int main() {
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
-  // TLS-LAMBDA: [[G_CAPTURE_TY:%.+]] = type { i{{[0-9]+}}* }
   // TLS-LAMBDA: [[G:@.+]] = {{.*}}thread_local {{.*}}global i{{[0-9]+}} 1212,
   // TLS-LAMBDA-LABEL: @main
   // TLS-LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 
-  // TLS-LAMBDA-DAG: [[G_CPY_ADDR:%.+]] = getelementptr inbounds [[G_CAPTURE_TY]], [[G_CAPTURE_TY]]* [[G_CAPTURE:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // TLS-LAMBDA-DAG: [[G_CPY_VAL:%.+]] = call i{{[0-9]+}}* [[G_CTOR:@.+]]()
-  // TLS-LAMBDA:     store i{{[0-9]+}}* [[G_CPY_VAL]], i{{[0-9]+}}** [[G_CPY_ADDR]]
-  // TLS-LAMBDA:     [[G_CAPTURE_CAST:%.+]] = bitcast [[G_CAPTURE_TY]]* [[G_CAPTURE]] to i8*
-  // TLS-LAMBDA:     call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[G_CAPTURE_CAST]]
+  // TLS-LAMBDA:     [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]()
+  // TLS-LAMBDA:     call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]])
 
-  // TLS-LAMBDA:     define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() {
+  // TLS-LAMBDA:     define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
   // TLS-LAMBDA:     ret i{{[0-9]+}}* [[G]]
   // TLS-LAMBDA:     }
 
 #pragma omp parallel copyin(g)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
-    // TLS-LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, [[G_CAPTURE_TY]]* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+    // TLS-LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
 
     // threadprivate_g = g;
     // LAMBDA: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
@@ -107,24 +102,23 @@ int main() {
     // LAMBDA: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}}
     // LAMBDA: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
     // LAMBDA: [[NOT_MASTER]]
-    // LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]],
-    // LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+    // LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], align 128
+    // LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128
     // LAMBDA: [[DONE]]
 
-    // TLS-LAMBDA-DAG: [[G_CAPTURE_FIELD:%.+]] = getelementptr inbounds [[G_CAPTURE_TY]], [[G_CAPTURE_TY]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // TLS-LAMBDA-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_CAPTURE_FIELD]]
-    // TLS-LAMBDA-DAG: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]()
+    // TLS-LAMBDA-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+    // TLS-LAMBDA-DAG: [[G_CAPTURE_DST:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR]]()
     // TLS-LAMBDA-DAG: [[G_CAPTURE_SRCC:%.+]] = ptrtoint i{{[0-9]+}}* [[G_CAPTURE_SRC]] to i{{[0-9]+}}
     // TLS-LAMBDA-DAG: [[G_CAPTURE_DSTC:%.+]] = ptrtoint i{{[0-9]+}}* [[G_CAPTURE_DST]] to i{{[0-9]+}}
     // TLS-LAMBDA: icmp ne i{{[0-9]+}} {{%.+}}, {{%.+}}
     // TLS-LAMBDA: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
     // TLS-LAMBDA: [[NOT_MASTER]]
     // TLS-LAMBDA: load i{{[0-9]+}}, i{{[0-9]+}}* [[G_CAPTURE_SRC]],
-    // TLS-LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]],
+    // TLS-LAMBDA: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128
     // TLS-LAMBDA: [[DONE]]
 
-    // LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier(
-    // TLS-LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call {{.*}}void @__kmpc_barrier(
+    // TLS-LAMBDA: call {{.*}}void @__kmpc_barrier(
     g = 1;
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
     // TLS-LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}*
@@ -134,8 +128,8 @@ int main() {
       g = 2;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
 
-      // TLS-LAMBDA: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]()
-      // TLS-LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_CAPTURE_DST]]
+      // TLS-LAMBDA: [[G_CAPTURE_DST:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR]]()
+      // TLS-LAMBDA: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128
     }();
   }
   }();
@@ -145,27 +139,23 @@ int main() {
   // BLOCKS-LABEL: @main
   // BLOCKS: call {{.*}}void {{%.+}}(i8
 
-  // TLS-BLOCKS: [[G_CAPTURE_TY:%.+]] = type { i{{[0-9]+}}* }
   // TLS-BLOCKS: [[G:@.+]] = {{.*}}thread_local {{.*}}global i{{[0-9]+}} 1212,
   // TLS-BLOCKS-LABEL: @main
   // TLS-BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8*
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 
-  // TLS-BLOCKS-DAG: [[G_CPY_ADDR:%.+]] = getelementptr inbounds [[G_CAPTURE_TY]], [[G_CAPTURE_TY]]* [[G_CAPTURE:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // TLS-BLOCKS-DAG: [[G_CPY_VAL:%.+]] = call i{{[0-9]+}}* [[G_CTOR:@.+]]()
-  // TLS-BLOCKS:     store i{{[0-9]+}}* [[G_CPY_VAL]], i{{[0-9]+}}** [[G_CPY_ADDR]]
-  // TLS-BLOCKS:     [[G_CAPTURE_CAST:%.+]] = bitcast [[G_CAPTURE_TY]]* [[G_CAPTURE]] to i8*
-  // TLS-BLOCKS:     call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[G_CAPTURE_CAST]]
+  // TLS-BLOCKS:     [[G_CPY_VAL:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR:@.+]]()
+  // TLS-BLOCKS:     call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G_CPY_VAL]])
 
-  // TLS-BLOCKS:     define {{.*}}i{{[0-9]+}}* [[G_CTOR]]() {
+  // TLS-BLOCKS:     define {{.*}}i{{[0-9]+}}* [[G_CTOR]]()
   // TLS-BLOCKS:     ret i{{[0-9]+}}* [[G]]
   // TLS-BLOCKS:     }
 #pragma omp parallel copyin(g)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
-    // TLS-BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, [[G_CAPTURE_TY]]* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+    // TLS-BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
 
     // threadprivate_g = g;
     // BLOCKS: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[G]]
@@ -173,30 +163,29 @@ int main() {
     // BLOCKS: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[G]] to i{{[0-9]+}}), %{{.+}}
     // BLOCKS: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
     // BLOCKS: [[NOT_MASTER]]
-    // BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]],
-    // BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+    // BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G]], align 128
+    // BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128
     // BLOCKS: [[DONE]]
 
-    // TLS-BLOCKS-DAG: [[G_CAPTURE_FIELD:%.+]] = getelementptr inbounds [[G_CAPTURE_TY]], [[G_CAPTURE_TY]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // TLS-BLOCKS-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_CAPTURE_FIELD]]
-    // TLS-BLOCKS-DAG: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]()
+    // TLS-BLOCKS-DAG: [[G_CAPTURE_SRC:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+    // TLS-BLOCKS-DAG: [[G_CAPTURE_DST:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR]]()
     // TLS-BLOCKS-DAG: [[G_CAPTURE_SRCC:%.+]] = ptrtoint i{{[0-9]+}}* [[G_CAPTURE_SRC]] to i{{[0-9]+}}
     // TLS-BLOCKS-DAG: [[G_CAPTURE_DSTC:%.+]] = ptrtoint i{{[0-9]+}}* [[G_CAPTURE_DST]] to i{{[0-9]+}}
     // TLS-BLOCKS: icmp ne i{{[0-9]+}} {{%.+}}, {{%.+}}
     // TLS-BLOCKS: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
     // TLS-BLOCKS: [[NOT_MASTER]]
     // TLS-BLOCKS: load i{{[0-9]+}}, i{{[0-9]+}}* [[G_CAPTURE_SRC]],
-    // TLS-BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]],
+    // TLS-BLOCKS: store volatile i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* [[G_CAPTURE_DST]], align 128
     // TLS-BLOCKS: [[DONE]]
 
-    // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier(
-    // TLS-BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call {{.*}}void @__kmpc_barrier(
+    // TLS-BLOCKS: call {{.*}}void @__kmpc_barrier(
     g = 1;
     // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: call {{.*}}void {{%.+}}(i8
 
-    // TLS-BLOCKS: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]()
+    // TLS-BLOCKS: [[G_CAPTURE_DST:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR]]()
     // TLS-BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_CAPTURE_DST]]
     // TLS-BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // TLS-BLOCKS: call {{.*}}void {{%.+}}(i8
@@ -211,7 +200,7 @@ int main() {
       // BLOCKS: ret
 
       // TLS-BLOCKS-NOT: [[G]]{{[[^:word:]]}}
-      // TLS-BLOCKS: [[G_CAPTURE_DST:%.+]] = call i{{[0-9]+}}* [[G_CTOR]]()
+      // TLS-BLOCKS: [[G_CAPTURE_DST:%.+]] = call{{( cxx_fast_tlscc)?}} i{{[0-9]+}}* [[G_CTOR]]()
       // TLS-BLOCKS: store volatile i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_CAPTURE_DST]]
       // TLS-BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // TLS-BLOCKS: ret
@@ -241,8 +230,8 @@ int main() {
 // CHECK-LABEL: @main
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]*
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
@@ -250,28 +239,18 @@ int main() {
 // TLS-CHECK-LABEL: @main
 // TLS-CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN:@.+]]([[S_FLOAT_TY]]* [[TEST]], [[S_FLOAT_TY]]*
-// TLS-CHECK-DAG: store i{{[0-9]+}}* [[T_VAR]], i{{[0-9]+}}** [[T_VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: store [2 x i{{[0-9]+}}]* [[VEC]], [2 x i{{[0-9]+}}]** [[VEC_FIELD:%.+]],
-// TLS-CHECK-DAG: store [2 x [[S_FLOAT_TY]]]* [[S_ARR]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_FIELD:%.+]],
-// TLS-CHECK-DAG: store [[S_FLOAT_TY]]* [[VAR]], [[S_FLOAT_TY]]** [[VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: [[T_VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[VEC_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[S_ARR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// TLS-CHECK-DAG: store i{{[0-9]+}}* [[T_VAR]], i{{[0-9]+}}** [[T_VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: [[T_VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*)* [[MAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*),
+// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*)* [[MAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*),
 // TLS-CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // TLS-CHECK: ret
 
-// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
-// TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
@@ -283,8 +262,11 @@ int main() {
 // CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** [[MASTER_FIELD]]
+// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
+// TLS-CHECK: [[MASTER_REF2:%.+]] = load [2 x i32]*, [2 x i32]** %
+// TLS-CHECK: [[MASTER_REF3:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** %
+// TLS-CHECK: [[MASTER_REF4:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+
 // TLS-CHECK: [[MASTER_LONG:%.+]] = ptrtoint i32* [[MASTER_REF]] to i{{[0-9]+}}
 // TLS-CHECK: icmp ne i{{[0-9]+}} [[MASTER_LONG]], ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}})
 // TLS-CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -296,9 +278,7 @@ int main() {
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[VEC]]
 // CHECK: call void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [2 x i32]*, [2 x i32]** [[MASTER_FIELD]]
-// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF]] to i8*
+// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF2]] to i8*
 // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*), i8* [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
@@ -311,9 +291,7 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}})
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[MASTER_FIELD]]
-// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[MASTER_REF]] to [[S_FLOAT_TY]]*
+// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[MASTER_REF3]] to [[S_FLOAT_TY]]*
 // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_FLOAT_TY]]* {{.*}}[[MASTER_CAST]]
 // TLS-CHECK-DAG: [[S_ARR_DST_BEGIN:%.+]] = phi [[S_FLOAT_TY]]* {{.*}}getelementptr inbounds ([2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0)
 // TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}})
@@ -328,24 +306,22 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{%.+}}, [[S_FLOAT_TY]]* {{.*}}[[VAR]])
 // CHECK: [[DONE]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[MASTER_FIELD]]
-// TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.*}}[[VAR]], [[S_FLOAT_TY]]* {{.*}}[[MASTER_REF]])
+// TLS-CHECK: call {{.*}} [[S_FLOAT_TY_COPY_ASSIGN]]([[S_FLOAT_TY]]* {{.*}}[[VAR]], [[S_FLOAT_TY]]* {{.*}}[[MASTER_REF4]])
 
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 // TLS-CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // TLS-CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
-// TLS-CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // TLS-CHECK: ret void
 
-// CHECK: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
-// TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// TLS-CHECK: define internal {{.*}}void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
@@ -358,8 +334,8 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 // CHECK: [[DONE]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** [[MASTER_FIELD]]
+// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
+
 // TLS-CHECK: [[MASTER_LONG:%.+]] = ptrtoint i32* [[MASTER_REF]] to i{{[0-9]+}}
 // TLS-CHECK: icmp ne i{{[0-9]+}} [[MASTER_LONG]], ptrtoint (i{{[0-9]+}}* [[T_VAR]] to i{{[0-9]+}})
 // TLS-CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -368,44 +344,34 @@ int main() {
 // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[T_VAR]]
 // TLS-CHECK: [[DONE]]
 
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 // TLS-CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // TLS-CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
-// TLS-CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // TLS-CHECK: ret void
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*))
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*))
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 
 // TLS-CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // TLS-CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN:@.+]]([[S_INT_TY]]* [[TEST]], [[S_INT_TY]]*
-// TLS-CHECK-DAG: store i{{[0-9]+}}* [[TMAIN_T_VAR]], i{{[0-9]+}}** [[T_VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: store [2 x i{{[0-9]+}}]* [[TMAIN_VEC]], [2 x i{{[0-9]+}}]** [[VEC_FIELD:%.+]],
-// TLS-CHECK-DAG: store [2 x [[S_INT_TY]]]* [[TMAIN_S_ARR]], [2 x [[S_INT_TY]]]** [[S_ARR_FIELD:%.+]],
-// TLS-CHECK-DAG: store [[S_INT_TY]]* [[TMAIN_VAR]], [[S_INT_TY]]** [[VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: [[T_VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[VEC_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[S_ARR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK-DAG: [[VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// TLS-CHECK-DAG: store i{{[0-9]+}}* [[TMAIN_T_VAR]], i{{[0-9]+}}** [[T_VAR_FIELD:%.+]],
-// TLS-CHECK-DAG: [[T_VAR_FIELD]] = getelementptr inbounds {{.*}}
-// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, {{%.+}}*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void (i32*, i32*, ...)*),
+// TLS-CHECK:     call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*)* [[TMAIN_MICROTASK1:@.+]] to void (i32*, i32*, ...)*),
 //
-// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 //
-// TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
@@ -414,25 +380,26 @@ int main() {
 // CHECK: icmp ne i{{[0-9]+}} ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}}), %{{.+}}
 // CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
 // CHECK: [[NOT_MASTER]]
-// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]],
-// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
+// CHECK: load i{{[0-9]+}}, i{{[0-9]+}}* [[TMAIN_T_VAR]], align 128
+// CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}}, align 128
+
+// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
+// TLS-CHECK: [[MASTER_REF1:%.+]] = load [2 x i32]*, [2 x i32]** %
+// TLS-CHECK: [[MASTER_REF2:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+// TLS-CHECK: [[MASTER_REF3:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** [[MASTER_FIELD]]
 // TLS-CHECK: [[MASTER_LONG:%.+]] = ptrtoint i32* [[MASTER_REF]] to i{{[0-9]+}}
 // TLS-CHECK: icmp ne i{{[0-9]+}} [[MASTER_LONG]], ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}})
 // TLS-CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
 // TLS-CHECK: [[NOT_MASTER]]
-// TLS-CHECK: [[MASTER_VAL:%.+]] = load i32, i32* [[MASTER_REF]]
-// TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]]
+// TLS-CHECK: [[MASTER_VAL:%.+]] = load i32, i32* [[MASTER_REF]],
+// TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]], align 128
 
 // threadprivate_vec = vec;
 // CHECK: call {{.*}}i8* @__kmpc_threadprivate_cached({{.+}} [[TMAIN_VEC]]
 // CHECK: call {{.*}}void @llvm.memcpy{{.*}}(i8* %{{.+}}, i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*),
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [2 x i32]*, [2 x i32]** [[MASTER_FIELD]]
-// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF]] to i8*
+// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x i32]* [[MASTER_REF1]] to i8*
 // TLS-CHECK: call void @llvm.memcpy{{.*}}(i8* bitcast ([2 x i{{[0-9]+}}]* [[TMAIN_VEC]] to i8*), i8* [[MASTER_CAST]]
 
 // threadprivate_s_arr = s_arr;
@@ -445,9 +412,7 @@ int main() {
 // CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[MASTER_FIELD]]
-// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[MASTER_REF]] to [[S_INT_TY]]*
+// TLS-CHECK: [[MASTER_CAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[MASTER_REF2]] to [[S_INT_TY]]*
 // TLS-CHECK-DAG: [[S_ARR_SRC_BEGIN:%.+]] = phi [[S_INT_TY]]* {{.*}}[[MASTER_CAST]]
 // TLS-CHECK-DAG: [[S_ARR_DST_BEGIN:%.+]] = phi [[S_INT_TY]]* {{.*}}getelementptr inbounds ([2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[TMAIN_S_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0)
 // TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}})
@@ -462,24 +427,22 @@ int main() {
 // CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{%.+}}, [[S_INT_TY]]* {{.*}}[[TMAIN_VAR]])
 // CHECK: [[DONE]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[MASTER_FIELD]]
-// TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.*}}[[TMAIN_VAR]], [[S_INT_TY]]* {{.*}}[[MASTER_REF]])
+// TLS-CHECK: call {{.*}} [[S_INT_TY_COPY_ASSIGN]]([[S_INT_TY]]* {{.*}}[[TMAIN_VAR]], [[S_INT_TY]]* {{.*}}[[MASTER_REF3]])
 
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 // TLS-CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // TLS-CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
-// TLS-CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // TLS-CHECK: ret void
 
-// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
 
-// TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, {{%.+}}* %{{.+}})
+// TLS-CHECK: define internal {{.*}}void [[TMAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // TLS-CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
 // threadprivate_t_var = t_var;
@@ -492,8 +455,8 @@ int main() {
 // CHECK: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* %{{.+}},
 // CHECK: [[DONE]]
 
-// TLS-CHECK: [[MASTER_FIELD:%.+]] = getelementptr inbounds {{.+}}
-// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** [[MASTER_FIELD]]
+// TLS-CHECK: [[MASTER_REF:%.+]] = load i32*, i32** %
+
 // TLS-CHECK: [[MASTER_LONG:%.+]] = ptrtoint i32* [[MASTER_REF]] to i{{[0-9]+}}
 // TLS-CHECK: icmp ne i{{[0-9]+}} [[MASTER_LONG]], ptrtoint (i{{[0-9]+}}* [[TMAIN_T_VAR]] to i{{[0-9]+}})
 // TLS-CHECK: br i1 %{{.+}}, label %[[NOT_MASTER:.+]], label %[[DONE:.+]]
@@ -502,18 +465,17 @@ int main() {
 // TLS-CHECK: store i32 [[MASTER_VAL]], i32* [[TMAIN_T_VAR]]
 // TLS-CHECK: [[DONE]]
 
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // CHECK: ret void
 
 // TLS-CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_ADDR]],
 // TLS-CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]],
-// TLS-CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
+// TLS-CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i32 [[GTID]])
 // TLS-CHECK: ret void
 
 #endif
 #else
 // ARRAY-LABEL: array_func
-// TLS-ARRAY: [[ARR_CAPTURETY:%.+]] = type { [2 x {{.*}}]*, [2 x {{.*}}]* }
 // TLS-ARRAY-LABEL: array_func
 
 struct St {
@@ -531,15 +493,10 @@ void array_func() {
 // ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* bitcast ([2 x i32]* @{{.+}} to i8*), i64 8, i32 4, i1 false)
 // ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}})
 
-// TLS-ARRAY-DAG: [[ARR_CAP1:%.+]] = getelementptr inbounds [[ARR_CAPTURETY]], [[ARR_CAPTURETY]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// TLS-ARRAY-DAG: [[ARR_CAP2:%.+]] = getelementptr inbounds [[ARR_CAPTURETY]], [[ARR_CAPTURETY]]* {{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// TLS-ARRAY-DAG: store [2 x {{.*}}]* @{{.*}}, [2 x {{.*}}]** [[ARR_CAP1]]
-// TLS-ARRAY-DAG: store [2 x {{.*}}]* @{{.*}}, [2 x {{.*}}]** [[ARR_CAP2]]
 // TLS-ARRAY: @__kmpc_fork_call(
-// TLS-ARRAY-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([2 x i32]* @{{.+}} to i8*), i8* [[REF:%.+]], i64 8, i32 4, i1 false)
-// TLS-ARRAY-DAG: [[REF]] = bitcast [2 x i32]* [[REFT:%.+]] to i8*
-// TLS-ARRAY-DAG: [[REFT]] = load [2 x i32]*, [2 x i32]** [[FIELDADDR:%.+]],
-// TLS-ARRAY-DAG: [[FIELDADDR]] = getelementptr inbounds [[ARR_CAPTURETY]], [[ARR_CAPTURETY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}}
+// TLS-ARRAY: [[REFT:%.+]] = load [2 x i32]*, [2 x i32]** [[ADDR:%.+]],
+// TLS-ARRAY: [[REF:%.+]] = bitcast [2 x i32]* [[REFT]] to i8*
+// TLS-ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast ([2 x i32]* @{{.+}} to i8*), i8* [[REF]], i64 8, i32 4, i1 false)
 // TLS-ARRAY: call dereferenceable(8) %struct.St* @{{.+}}(%struct.St* %{{.+}}, %struct.St* dereferenceable(8) %{{.+}})
 
 #pragma omp threadprivate(a, s)
diff --git a/test/OpenMP/parallel_firstprivate_codegen.cpp b/test/OpenMP/parallel_firstprivate_codegen.cpp
index 6e0f014f4d9e..d0da8cea87ac 100644
--- a/test/OpenMP/parallel_firstprivate_codegen.cpp
+++ b/test/OpenMP/parallel_firstprivate_codegen.cpp
@@ -16,7 +16,7 @@ struct St {
   ~St() {}
 };
 
-volatile int g = 1212;
+volatile int g __attribute__((aligned(128))) = 1212;
 
 template <class T>
 struct S {
@@ -31,17 +31,15 @@ struct S {
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 
 template <typename T>
 T tmain() {
   S<T> test;
-  T t_var = T();
-  T vec[] = {1, 2};
-  S<T> s_arr[] = {1, 2};
-  S<T> var(3);
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] __attribute__((aligned(128))) = {1, 2};
+  S<T> s_arr[] __attribute__((aligned(128))) = {1, 2};
+  S<T> var __attribute__((aligned(128))) (3);
 #pragma omp parallel firstprivate(t_var, vec, s_arr, var)
   {
     vec[0] = t_var;
@@ -53,40 +51,46 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
-#pragma omp parallel firstprivate(g)
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 2, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]], {{.+}})
+#pragma omp parallel firstprivate(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
-    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // LAMBDA: [[ARG:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_REF]]
-    // LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
-    // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
-    // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // LAMBDA: call {{.*}}i32 @__kmpc_cancel_barrier(
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
+    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
+    // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR:%.+]]
+    // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]], align 128
+    // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
+    // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+    // LAMBDA: call {{.*}}void @__kmpc_barrier(
     g = 1;
+    sivar = 2;
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 4;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
-      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -97,33 +101,42 @@ int main() {
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
-#pragma omp parallel firstprivate(g)
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 2, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]], {{.+}})
+#pragma omp parallel firstprivate(g, sivar)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
-    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // BLOCKS: [[ARG:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_REF]]
-    // BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
-    // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]]
-    // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
-    // BLOCKS: call {{.*}}i32 @__kmpc_cancel_barrier(
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
+    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, align 128
+    // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
+    // BLOCKS: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR:%.+]]
+    // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G_REF]], align 128
+    // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
+    // BLOCK: [[SIVAR_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // BLOCK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]]
+    // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]],
+    // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
+    // BLOCKS: call {{.*}}void @__kmpc_barrier(
     g = 1;
+    sivar = 2;
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call {{.*}}void {{%.+}}(i8
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 4;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -135,10 +148,11 @@ int main() {
   int vec[] = {1, 2};
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
-#pragma omp parallel firstprivate(t_var, vec, s_arr, var)
+#pragma omp parallel firstprivate(t_var, vec, s_arr, var, sivar)
   {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 2;
   }
 #pragma omp parallel firstprivate(t_var)
   {}
@@ -149,29 +163,29 @@ int main() {
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal {{.*}}void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x [[S_FLOAT_TY]]]* dereferenceable(8) %{{.+}}, [[S_FLOAT_TY]]* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) [[SIVAR:%.+]])
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR7_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
+
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
-// CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_REF_PTR]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_REF]] to [[S_FLOAT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -182,42 +196,41 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR:@.+]]([[S_FLOAT_TY]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR:@.+]]([[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+
+// CHECK: [[SIVAR_REF_ADDR:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]],
+// CHECK: store i{{[0-9]+}} [[SIVAR_REF_ADDR]], i{{[0-9]+}}* [[SIVAR7_PRIV]],
+// CHECK: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR7_PRIV]],
+
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 // CHECK: ret void
-
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call {{.*}}void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: define internal {{.*}}void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
-// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
-// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
+
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]], align 128
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]], align 128
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
-// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
-// CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF_PTR]],
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]], i{{[0-9]+}} {{[0-9]+}}, i{{[0-9]+}} 128,
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_REF]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
@@ -228,44 +241,62 @@ int main() {
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{.+}}, [[S_INT_TY]]* {{.+}}, [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call {{.*}}i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call {{.*}}void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: ret void
 
 #endif
 #else
-// ARRAY-LABEL: array_func
 struct St {
   int a, b;
   St() : a(0), b(0) {}
   St(const St &) { }
   ~St() {}
+  void St_func(St s[2], int n, long double vla1[n]) {
+    double vla2[n][n] __attribute__((aligned(128)));
+    a = b;
+#pragma omp parallel firstprivate(s, vla1, vla2)
+    vla1[b] = vla2[1][n - 1] = a = b;
+  }
 };
 
+// ARRAY-LABEL: array_func
 void array_func(float a[3], St s[2], int n, long double vla1[n]) {
-  double vla2[n];
+  double vla2[n][n] __attribute__((aligned(128)));
 // ARRAY: @__kmpc_fork_call(
-// ARRAY: [[PRIV_A:%.+]] = alloca float*
-// ARRAY: [[PRIV_S:%.+]] = alloca %struct.St*
-// ARRAY: [[PRIV_VLA1:%.+]] = alloca x86_fp80*
-// ARRAY: store float* %{{.+}}, float** [[PRIV_A]],
-// ARRAY: store %struct.St* %{{.+}}, %struct.St** [[PRIV_S]],
-// ARRAY: store x86_fp80* %{{.+}}, x86_fp80** [[PRIV_VLA1]],
+// ARRAY-DAG: [[PRIV_A:%.+]] = alloca float**,
+// ARRAY-DAG: [[PRIV_S:%.+]] = alloca %struct.St**,
+// ARRAY-DAG: [[PRIV_VLA1:%.+]] = alloca x86_fp80**,
+// ARRAY-DAG: [[PRIV_VLA2:%.+]] = alloca double*,
+// ARRAY-DAG: store float** %{{.+}}, float*** [[PRIV_A]],
+// ARRAY-DAG: store %struct.St** %{{.+}}, %struct.St*** [[PRIV_S]],
+// ARRAY-DAG: store x86_fp80** %{{.+}}, x86_fp80*** [[PRIV_VLA1]],
+// ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
 // ARRAY: call i8* @llvm.stacksave()
 // ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
-// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 8, i1 false)
-// ARRAY: call void @llvm.stackrestore(i8*
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false)
 #pragma omp parallel firstprivate(a, s, vla1, vla2)
+  s[0].St_func(s, n, vla1);
   ;
 }
+
+// ARRAY-LABEL: St_func
+// ARRAY: @__kmpc_fork_call(
+// ARRAY-DAG: [[PRIV_S:%.+]] = alloca %struct.St**,
+// ARRAY-DAG: [[PRIV_VLA1:%.+]] = alloca x86_fp80**,
+// ARRAY-DAG: [[PRIV_VLA2:%.+]] = alloca double*,
+// ARRAY-DAG: store %struct.St** %{{.+}}, %struct.St*** [[PRIV_S]],
+// ARRAY-DAG: store x86_fp80** %{{.+}}, x86_fp80*** [[PRIV_VLA1]],
+// ARRAY-DAG: store double* %{{.+}}, double** [[PRIV_VLA2]],
+// ARRAY: call i8* @llvm.stacksave()
+// ARRAY: [[SIZE:%.+]] = mul nuw i64 %{{.+}}, 8
+// ARRAY: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.+}}, i8* %{{.+}}, i64 [[SIZE]], i32 128, i1 false)
 #endif
 
 
diff --git a/test/OpenMP/parallel_firstprivate_messages.cpp b/test/OpenMP/parallel_firstprivate_messages.cpp
index c6f8dbe138d5..fc0eb4c7d358 100644
--- a/test/OpenMP/parallel_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_firstprivate_messages.cpp
@@ -61,7 +61,8 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
+  static int m;
   #pragma omp parallel firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   #pragma omp parallel firstprivate ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel firstprivate () // expected-error {{expected expression}}
@@ -83,7 +84,8 @@ int main(int argc, char **argv) {
   foo();
   #pragma omp parallel shared(i)
   #pragma omp parallel firstprivate(i)
-  #pragma omp parallel firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+  #pragma omp parallel firstprivate(j)
+  #pragma omp parallel firstprivate(m)
   foo();
 
   return 0;
diff --git a/test/OpenMP/parallel_for_ast_print.cpp b/test/OpenMP/parallel_for_ast_print.cpp
index f2899ee16ef5..c4be521455df 100644
--- a/test/OpenMP/parallel_for_ast_print.cpp
+++ b/test/OpenMP/parallel_for_ast_print.cpp
@@ -15,25 +15,35 @@ T tmain(T argc) {
 // CHECK: static T a;
   static T g;
 #pragma omp threadprivate(g)
-#pragma omp parallel for schedule(dynamic) default(none) copyin(g)
-  // CHECK: #pragma omp parallel for schedule(dynamic) default(none) copyin(g)
+#pragma omp parallel for schedule(dynamic) default(none) copyin(g) linear(a)
+  // CHECK: #pragma omp parallel for schedule(dynamic) default(none) copyin(g) linear(a)
   for (int i = 0; i < 2; ++i)
     a = 2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
-#pragma omp parallel for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered if (argc) num_threads(N) default(shared) shared(e) reduction(+ : h)
-  for (int i = 0; i < 10; ++i)
-    for (int j = 0; j < 10; ++j)
-      for (int j = 0; j < 10; ++j)
-        for (int j = 0; j < 10; ++j)
-          for (int j = 0; j < 10; ++j)
+#pragma omp parallel for private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) schedule(static, N) ordered(N) if (parallel :argc) num_threads(N) default(shared) shared(e) reduction(+ : h)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
             foo();
-  // CHECK-NEXT: #pragma omp parallel for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered if(argc) num_threads(N) default(shared) shared(e) reduction(+: h)
-  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
-  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: #pragma omp parallel for private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) schedule(static, N) ordered(N) if(parallel: argc) num_threads(N) default(shared) shared(e) reduction(+: h)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
   // CHECK-NEXT: foo();
   return T();
 }
@@ -44,18 +54,18 @@ int main(int argc, char **argv) {
 // CHECK: static int a;
   static float g;
 #pragma omp threadprivate(g)
-#pragma omp parallel for schedule(guided, argc) default(none) copyin(g)
-  // CHECK: #pragma omp parallel for schedule(guided, argc) default(none) copyin(g)
+#pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a)
+  // CHECK: #pragma omp parallel for schedule(guided, argc) default(none) copyin(g) linear(a)
   for (int i = 0; i < 2; ++i)
     a = 2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: a = 2;
-#pragma omp parallel for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) ordered if (argc) num_threads(a) default(shared) shared(e) reduction(+ : h)
+#pragma omp parallel for private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) schedule(auto) ordered if (argc) num_threads(a) default(shared) shared(e) reduction(+ : h) linear(a:-5)
   for (int i = 0; i < 10; ++i)
     for (int j = 0; j < 10; ++j)
       foo();
-  // CHECK-NEXT: #pragma omp parallel for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) ordered if(argc) num_threads(a) default(shared) shared(e) reduction(+: h)
-  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
+  // CHECK-NEXT: #pragma omp parallel for private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) schedule(auto) ordered if(argc) num_threads(a) default(shared) shared(e) reduction(+: h) linear(a: -5)
+ // CHECK-NEXT: for (int i = 0; i < 10; ++i)
   // CHECK-NEXT: for (int j = 0; j < 10; ++j)
   // CHECK-NEXT: foo();
   return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
diff --git a/test/OpenMP/parallel_for_codegen.cpp b/test/OpenMP/parallel_for_codegen.cpp
index 9fda8404c6e7..036fa37cf91b 100644
--- a/test/OpenMP/parallel_for_codegen.cpp
+++ b/test/OpenMP/parallel_for_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -O1 -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=CLEANUP
 // REQUIRES: x86-registered-target
 // expected-no-diagnostics
@@ -9,25 +9,19 @@
 #define HEADER
 
 // CHECK-DAG: [[IDENT_T_TY:%.+]] = type { i32, i32, i32, i32, i8* }
-// CHECK-DAG: [[CAP_TY:%.+]] = type { i8* }
 
 // CHECK-LABEL: with_var_schedule
 void with_var_schedule() {
   double a = 5;
 // CHECK: [[CHUNK_SIZE:%.+]] = fptosi double %{{.+}}to i8
 // CHECK: store i8 %{{.+}}, i8* [[CHUNK:%.+]],
-// CHECK: [[CHUNK_REF:%.+]] = getelementptr inbounds [[CAP_TY]], [[CAP_TY]]* [[CAP_ARG:%.+]], i{{.+}} 0, i{{.+}} 0
-// CHECK: store i8* [[CHUNK]], i8** [[CHUNK_REF]],
-// CHECK: [[BITCAST:%.+]] = bitcast [[CAP_TY]]* [[CAP_ARG]] to i8*
-// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i8* [[BITCAST]])
+// CHECK: call void {{.+}} @__kmpc_fork_call({{.+}}, i8* [[CHUNK]])
 
-// CHECK: [[CHUNK_REF:%.+]] = getelementptr inbounds [[CAP_TY]], [[CAP_TY]]* %{{.+}}, i{{.+}} 0, i{{.+}} 0
-// CHECK: [[CHUNK:%.+]] = load i8*, i8** [[CHUNK_REF]],
+// CHECK: [[CHUNK:%.+]] = load i8*, i8** %
 // CHECK: [[CHUNK_VAL:%.+]] = load i8, i8* [[CHUNK]],
 // CHECK: [[CHUNK_SIZE:%.+]] = sext i8 [[CHUNK_VAL]] to i64
 // CHECK: call void @__kmpc_for_static_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC:@[^,]+]], i32 [[GTID:%[^,]+]], i32 33, i32* [[IS_LAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]], i64 1, i64 [[CHUNK_SIZE]])
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: __kmpc_barrier
 #pragma omp parallel for schedule(static, char(a))
   for (unsigned long long i = 1; i < 2; ++i) {
   }
@@ -36,8 +30,8 @@ void with_var_schedule() {
 // CHECK-LABEL: define {{.*void}} @{{.*}}without_schedule_clause{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void without_schedule_clause(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -73,15 +67,14 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}static_not_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void static_not_chunked(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for schedule(static)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -117,15 +110,14 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}static_chunked{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void static_chunked(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for schedule(static, 5)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -180,15 +172,14 @@ void static_chunked(float *a, float *b, float *c, float *d) {
 
 // CHECK: [[O_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}dynamic1{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void dynamic1(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for schedule(dynamic)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -225,15 +216,14 @@ void dynamic1(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}guided7{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
 void guided7(float *a, float *b, float *c, float *d) {
   #pragma omp parallel for schedule(guided, 7)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -270,7 +260,6 @@ void guided7(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -279,8 +268,8 @@ void test_auto(float *a, float *b, float *c, float *d) {
   unsigned int x = 0;
   unsigned int y = 0;
   #pragma omp parallel for schedule(auto) collapse(2)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -320,9 +309,6 @@ void test_auto(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -330,8 +316,8 @@ void test_auto(float *a, float *b, float *c, float *d) {
 void runtime(float *a, float *b, float *c, float *d) {
   int x = 0;
   #pragma omp parallel for collapse(2) schedule(runtime)
-// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: call void ([[IDENT_T_TY]]*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call([[IDENT_T_TY]]* [[DEFAULT_LOC:[@%].+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*),
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}}, float** dereferenceable(8) %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 // CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
@@ -367,9 +353,6 @@ void runtime(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[LOOP1_END]]
 // CHECK: [[O_LOOP1_END]]
-// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]],
-// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]],
-// CHECK: call {{.+}} @__kmpc_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]])
 // CHECK: ret void
 }
 
@@ -378,29 +361,27 @@ int foo() {return 0;};
 
 // TERM_DEBUG-LABEL: parallel_for
 // CLEANUP: parallel_for
-void parallel_for(float *a) {
-#pragma omp parallel for schedule(static, 5)
+void parallel_for(float *a, int n) {
+  float arr[n];
+#pragma omp parallel for schedule(static, 5) private(arr)
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_init_4u({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]]
   // TERM_DEBUG:     invoke i32 {{.*}}foo{{.*}}()
   // TERM_DEBUG:     unwind label %[[TERM_LPAD:.+]],
   // TERM_DEBUG-NOT: __kmpc_global_thread_num
   // TERM_DEBUG:     call void @__kmpc_for_static_fini({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]]
-  // TERM_DEBUG:     call {{.+}} @__kmpc_barrier({{.+}}), !dbg [[DBG_LOC_CANCEL:![0-9]+]]
   // TERM_DEBUG:     [[TERM_LPAD]]
   // TERM_DEBUG:     call void @__clang_call_terminate
   // TERM_DEBUG:     unreachable
   // CLEANUP-NOT: __kmpc_global_thread_num
   // CLEANUP:     call void @__kmpc_for_static_init_4u({{.+}})
   // CLEANUP:     call void @__kmpc_for_static_fini({{.+}})
-  // CLEANUP:     call {{.+}} @__kmpc_barrier({{.+}})
   for (unsigned i = 131071; i <= 2147483647; i += 127)
-    a[i] += foo();
+    a[i] += foo() + arr[i];
 }
 // Check source line corresponds to "#pragma omp parallel for schedule(static, 5)" above:
 // TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-4]],
-// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-20]],
-// TERM_DEBUG-DAG: [[DBG_LOC_CANCEL]] = !DILocation(line: [[@LINE-21]],
+// TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-18]],
 
 #endif // HEADER
 
diff --git a/test/OpenMP/parallel_for_collapse_messages.cpp b/test/OpenMP/parallel_for_collapse_messages.cpp
index 042b819fe58e..6e5f71ff1672 100644
--- a/test/OpenMP/parallel_for_collapse_messages.cpp
+++ b/test/OpenMP/parallel_for_collapse_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp parallel for collapse (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp parallel for collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp parallel for collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for', but found only 1}}
   // expected-error@+3 2 {{directive '#pragma omp parallel for' cannot contain more than one 'collapse' clause}}
-  // expected-error@+2 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp parallel for collapse (foobool(argc)), collapse (true), collapse (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for collapse (1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for collapse (N) // expected-error {{argument to 'collapse' clause must be a positive integer value}}
+  #pragma omp parallel for collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for collapse (2) // expected-note {{as specified in 'collapse' clause}}
   foo(); // expected-error {{expected 2 for loops after '#pragma omp parallel for'}}
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'collapse' clause}}
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp parallel for collapse (foobool(argc)), collapse (true), collapse (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for collapse (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/parallel_for_firstprivate_messages.cpp b/test/OpenMP/parallel_for_firstprivate_messages.cpp
index 2c762b49b39e..c9a69dab2cb2 100644
--- a/test/OpenMP/parallel_for_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -102,9 +102,6 @@ int foomain(int argc, char **argv) {
 #pragma omp parallel for firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
   for (int k = 0; k < argc; ++k)
     ++k;
-#pragma omp parallel for linear(i) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp parallel for'}}
-  for (int k = 0; k < argc; ++k)
-    ++k;
 #pragma omp parallel
   {
     int v = 0;
@@ -117,7 +114,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel for firstprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for firstprivate(i)
@@ -153,7 +150,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (i = 0; i < argc; ++i)
     foo();
@@ -228,7 +225,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for firstprivate(xa) // OK: may be firstprivate
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel for firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
@@ -255,6 +252,10 @@ int main(int argc, char **argv) {
 #pragma omp parallel for firstprivate(i) // expected-note {{defined as firstprivate}}
   for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp parallel for' directive may not be firstprivate, predetermined as private}}
     foo();
+  static int si;
+#pragma omp parallel for firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_if_messages.cpp b/test/OpenMP/parallel_for_if_messages.cpp
index cf2a3b431dc7..2b7a5f7e2b3e 100644
--- a/test/OpenMP/parallel_for_if_messages.cpp
+++ b/test/OpenMP/parallel_for_if_messages.cpp
@@ -34,6 +34,20 @@ int tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for if(argc)
   for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel for'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel for' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
 
   return 0;
 }
@@ -64,6 +78,20 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel for'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel for' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
 
   return tmain(argc, argv);
 }
diff --git a/test/OpenMP/parallel_for_lastprivate_messages.cpp b/test/OpenMP/parallel_for_lastprivate_messages.cpp
index 09bed02a05f9..ccfe2ea407b1 100644
--- a/test/OpenMP/parallel_for_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_lastprivate_messages.cpp
@@ -17,7 +17,7 @@ public:
   S2(S2 &s2) : a(s2.a) {}
   S2 &operator=(const S2 &);
   const S2 &operator=(const S2 &) const;
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
@@ -67,7 +67,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                        // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -104,9 +104,6 @@ int foomain(int argc, char **argv) {
 #pragma omp parallel for lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
   for (int k = 0; k < argc; ++k)
     ++k;
-#pragma omp parallel for linear(i) // expected-error {{unexpected OpenMP clause 'linear' in directive '#pragma omp parallel for'}}
-  for (int k = 0; k < argc; ++k)
-    ++k;
 #pragma omp parallel
   {
     int v = 0;
@@ -119,7 +116,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel for lastprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for lastprivate(i)
@@ -144,7 +141,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i;                        // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (i = 0; i < argc; ++i)
     foo();
@@ -191,7 +188,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for lastprivate(xa) // OK
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for lastprivate(S2::S2s)
+#pragma omp parallel for lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for lastprivate(S2::S2sc) // expected-error {{shared variable cannot be lastprivate}}
@@ -223,7 +220,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for lastprivate(xa)
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel for lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
@@ -232,5 +229,10 @@ int main(int argc, char **argv) {
 #pragma omp parallel for lastprivate(n) firstprivate(n) // OK
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp parallel for lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 2;
+
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_linear_codegen.cpp b/test/OpenMP/parallel_for_linear_codegen.cpp
new file mode 100644
index 000000000000..940d6032a14a
--- /dev/null
+++ b/test/OpenMP/parallel_for_linear_codegen.cpp
@@ -0,0 +1,249 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// expected-no-diagnostics
+// REQUIRES: x86-registered-target
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  S<T> &operator=(const S<T> &);
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile int g = 1212;
+float f;
+char cnt;
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[F:@.+]] = global float 0.0
+// CHECK-DAG: [[CNT:@.+]] = global i8 0
+template <typename T>
+T tmain() {
+  S<T> test;
+  T *pvar = &test.f;
+  T lvar = T();
+#pragma omp parallel for linear(pvar, lvar)
+  for (int i = 0; i < 2; ++i) {
+    ++pvar, ++lvar;
+  }
+  return T();
+}
+
+int main() {
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
+#pragma omp parallel for linear(g:5)
+  for (int i = 0; i < 2; ++i) {
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: alloca i{{[0-9]+}},
+    // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store i32 0,
+    // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // LAMBDA: [[VAL:%.+]] = load i32, i32* [[G_START_ADDR]]
+    // LAMBDA: [[CNT:%.+]] = load i32, i32*
+    // LAMBDA: [[MUL:%.+]] = mul nsw i32 [[CNT]], 5
+    // LAMBDA: [[ADD:%.+]] = add nsw i32 [[VAL]], [[MUL]]
+    // LAMBDA: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[ADD:%.+]] = add nsw i32 [[VAL]], 5
+    // LAMBDA: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
+    g += 5;
+    [&]() {
+      // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+      // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+      g = 2;
+      // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+      // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+      // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global i{{[0-9]+}} 1212,
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
+#pragma omp parallel for linear(g:5)
+  for (int i = 0; i < 2; ++i) {
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: alloca i{{[0-9]+}},
+    // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: store i32 0,
+    // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
+    // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
+    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // BLOCKS: [[VAL:%.+]] = load i32, i32* [[G_START_ADDR]]
+    // BLOCKS: [[CNT:%.+]] = load i32, i32*
+    // BLOCKS: [[MUL:%.+]] = mul nsw i32 [[CNT]], 5
+    // BLOCKS: [[ADD:%.+]] = add nsw i32 [[VAL]], [[MUL]]
+    // BLOCKS: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS: [[VAL:%.+]] = load i32, i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS: [[ADD:%.+]] = add nsw i32 [[VAL]], 5
+    // BLOCKS: store i32 [[ADD]], i32* [[G_PRIVATE_ADDR]],
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
+    g += 5;
+    g = 1;
+    ^{
+      // BLOCKS: define {{.+}} void {{@.+}}(i8*
+      g = 2;
+      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS: ret
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<float> test;
+  float *pvar = &test.f;
+  long long lvar = 0;
+#pragma omp parallel for linear(pvar, lvar : 3)
+  for (int i = 0; i < 2; ++i) {
+    pvar += 3, lvar += 3;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float**, i64*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: = call {{.+}} [[TMAIN_INT:@.+]]()
+// CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
+// CHECK: ret
+
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}})
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_START:%.+]] = alloca float*,
+// CHECK: [[LVAR_START:%.+]] = alloca i64,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_PRIV:%.+]] = alloca float*,
+// CHECK: [[LVAR_PRIV:%.+]] = alloca i64,
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// CHECK: [[PVAR_REF:%.+]] = load float**, float*** %
+// CHECK: [[LVAR_REF:%.+]] = load i64*, i64** %
+
+// Check for default initialization.
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_REF]],
+// CHECK: store float* [[PVAR_VAL]], float** [[PVAR_START]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_REF]],
+// CHECK: store i64 [[LVAR_VAL]], i64* [[LVAR_START]],
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 3
+// CHECK: [[IDX:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[PTR:%.+]] = getelementptr inbounds float, float* [[PVAR_VAL]], i64 [[IDX]]
+// CHECK: store float* [[PTR]], float** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 3
+// CHECK: [[CONV:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[VAL:%.+]] = add nsw i64 [[LVAR_VAL]], [[CONV]]
+// CHECK: store i64 [[VAL]], i64* [[LVAR_PRIV]],
+// CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_PRIV]]
+// CHECK: [[PTR:%.+]] = getelementptr inbounds float, float* [[PVAR_VAL]], i64 3
+// CHECK: store float* [[PTR]], float** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i64 [[LVAR_VAL]], 3
+// CHECK: store i64 [[ADD]], i64* [[LVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: ret void
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, i32*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
+// CHECK: ret
+//
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}})
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_START:%.+]] = alloca i32*,
+// CHECK: [[LVAR_START:%.+]] = alloca i32,
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: alloca i{{[0-9]+}},
+// CHECK: [[PVAR_PRIV:%.+]] = alloca i32*,
+// CHECK: [[LVAR_PRIV:%.+]] = alloca i32,
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+
+// CHECK: [[PVAR_REF:%.+]] = load i32**, i32*** %
+// CHECK: [[LVAR_REF:%.+]] = load i32*, i32** %
+
+// Check for default initialization.
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_REF]],
+// CHECK: store i32* [[PVAR_VAL]], i32** [[PVAR_START]],
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_REF]],
+// CHECK: store i32 [[LVAR_VAL]], i32* [[LVAR_START]],
+// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 1
+// CHECK: [[IDX:%.+]] = sext i32 [[MUL]] to i64
+// CHECK: [[PTR:%.+]] = getelementptr inbounds i32, i32* [[PVAR_VAL]], i64 [[IDX]]
+// CHECK: store i32* [[PTR]], i32** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_START]],
+// CHECK: [[CNT:%.+]] = load i32, i32*
+// CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT]], 1
+// CHECK: [[VAL:%.+]] = add nsw i32 [[LVAR_VAL]], [[MUL]]
+// CHECK: store i32 [[VAL]], i32* [[LVAR_PRIV]],
+// CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_PRIV]]
+// CHECK: [[PTR:%.+]] = getelementptr inbounds i32, i32* [[PVAR_VAL]], i32 1
+// CHECK: store i32* [[PTR]], i32** [[PVAR_PRIV]],
+// CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_PRIV]],
+// CHECK: [[ADD:%.+]] = add nsw i32 [[LVAR_VAL]], 1
+// CHECK: store i32 [[ADD]], i32* [[LVAR_PRIV]],
+// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}})
+// CHECK: ret void
+#endif
+
diff --git a/test/OpenMP/parallel_for_linear_messages.cpp b/test/OpenMP/parallel_for_linear_messages.cpp
new file mode 100644
index 000000000000..7272aada92a0
--- /dev/null
+++ b/test/OpenMP/parallel_for_linear_messages.cpp
@@ -0,0 +1,269 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+namespace X {
+int x;
+};
+
+struct B {
+  static int ib; // expected-note {{'B::ib' declared here}}
+  static int bfoo() { return 8; }
+};
+
+int bfoo() { return 4; }
+
+int z;
+const int C1 = 1;
+const int C2 = 2;
+void test_linear_colons() {
+  int B = 0;
+#pragma omp parallel for linear(B : bfoo())
+  for (int i = 0; i < 10; ++i)
+    ;
+// expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+#pragma omp parallel for linear(B::ib : B : bfoo())
+  for (int i = 0; i < 10; ++i)
+    ;
+// expected-error@+1 {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
+#pragma omp parallel for linear(B : ib)
+  for (int i = 0; i < 10; ++i)
+    ;
+// expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+#pragma omp parallel for linear(z : B : ib)
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel for linear(B : B::bfoo())
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel for linear(X::x : ::z)
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel for linear(B, ::z, X::x)
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel for linear(::z)
+  for (int i = 0; i < 10; ++i)
+    ;
+// expected-error@+1 {{expected variable name}}
+#pragma omp parallel for linear(B::bfoo())
+  for (int i = 0; i < 10; ++i)
+    ;
+#pragma omp parallel for linear(B::ib, B : C1 + C2)
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+template <int L, class T, class N>
+T test_template(T *arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = -num * L; // expected-note {{'ind2' defined here}}
+// expected-error@+1 {{argument of a linear clause should be of integral or pointer type}}
+#pragma omp parallel for linear(ind2 : L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  return T();
+}
+
+template <int LEN>
+int test_warn() {
+  int ind2 = 0;
+// expected-warning@+1 {{zero linear step (ind2 should probably be const)}}
+#pragma omp parallel for linear(ind2 : LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return ind2;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+};
+const S2 b; // expected-note 2 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {}
+
+public:
+  S5(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp parallel for linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc : 5)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+// expected-error@+2 {{linear variable with incomplete type 'S1'}}
+// expected-error@+1 {{const-qualified variable cannot be linear}}
+#pragma omp parallel for linear(a, b : B::ib)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(e, g)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp parallel for linear(v : i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel for linear(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  int v = 0;
+#pragma omp parallel for linear(v : j)
+  for (int k = 0; k < argc; ++k) {
+    ++k;
+    v += j;
+  }
+#pragma omp parallel for linear(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace C {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  S4 e(4); // expected-note {{'e' defined here}}
+  S5 g(5); // expected-note {{'g' defined here}}
+  int i;
+  int &j = i;
+#pragma omp parallel for linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+// expected-error@+2 {{linear variable with incomplete type 'S1'}}
+// expected-error@+1 {{const-qualified variable cannot be linear}}
+#pragma omp parallel for linear(a, b)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+// expected-error@+2 {{argument of a linear clause should be of integral or pointer type, not 'S4'}}
+// expected-error@+1 {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
+#pragma omp parallel for linear(e, g)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int i;
+#pragma omp parallel for linear(i)
+    for (int k = 0; k < argc; ++k)
+      ++k;
+#pragma omp parallel for linear(i : 4)
+    for (int k = 0; k < argc; ++k) {
+      ++k;
+      i += 4;
+    }
+  }
+#pragma omp parallel for linear(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel for linear(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+
+  foomain<int, char>(argc, argv);
+  return 0;
+}
+
diff --git a/test/OpenMP/parallel_for_loop_messages.cpp b/test/OpenMP/parallel_for_loop_messages.cpp
index 7d14a3c9b74c..2bb32bdfc176 100644
--- a/test/OpenMP/parallel_for_loop_messages.cpp
+++ b/test/OpenMP/parallel_for_loop_messages.cpp
@@ -10,6 +10,7 @@ public:
 };
 
 static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
 static int globalii;
 
@@ -54,32 +55,32 @@ int test_iteration_spaces() {
   for (double fi = 0; fi < 10.0; fi++) {
     c[(int)fi] = a[(int)fi] + b[(int)fi];
   }
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (int &ref = ii; ref < 10; ref++) {
   }
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (int i; i < 10; i++)
     c[i] = a[i];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (int i = 0, j = 0; i < 10; ++i)
     c[i] = a[i];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (; ii < 10; ++ii)
     c[ii] = a[ii];
 
 // expected-warning@+3 {{expression result unused}}
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (ii + 1; ii < 10; ++ii)
     c[ii] = a[ii];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (c[ii] = 0; ii < 10; ++ii)
     c[ii] = a[ii];
@@ -242,7 +243,6 @@ int test_iteration_spaces() {
   for (ii = 0; ii < 10; ii++)
     c[ii] = a[ii];
 
-// expected-error@+3 {{unexpected OpenMP clause 'linear' in directive '#pragma omp parallel for'}}
 // expected-note@+2  {{defined as linear}}
 // expected-error@+2 {{loop iteration variable in the associated loop of 'omp parallel for' directive may not be linear, predetermined as private}}
 #pragma omp parallel for linear(ii)
@@ -258,6 +258,7 @@ int test_iteration_spaces() {
     c[ii] = a[ii];
 
   {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp parallel for' directive may not be threadprivate or thread local, predetermined as private}}
 #pragma omp parallel for
     for (sii = 0; sii < 10; sii += 1)
       c[sii] = a[sii];
@@ -375,7 +376,7 @@ int test_with_random_access_iterator() {
 #pragma omp parallel for
   for (GoodIter I = begin; I < end; ++I)
     ++I;
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (GoodIter &I = begin; I < end; ++I)
     ++I;
@@ -406,7 +407,7 @@ int test_with_random_access_iterator() {
 #pragma omp parallel for
   for (begin = begin0; begin < end; ++begin)
     ++begin;
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for
   for (++begin; begin < end; ++begin)
     ++begin;
diff --git a/test/OpenMP/parallel_for_misc_messages.c b/test/OpenMP/parallel_for_misc_messages.c
index ee6f2e81bb8d..1a773be617c7 100644
--- a/test/OpenMP/parallel_for_misc_messages.c
+++ b/test/OpenMP/parallel_for_misc_messages.c
@@ -59,11 +59,6 @@ void test_non_identifiers() {
 #pragma omp parallel for;
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp parallel for'}}
-// expected-warning@+1 {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
-#pragma omp parallel for linear(x);
-  for (i = 0; i < 16; ++i)
-    ;
 
 // expected-warning@+1 {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
 #pragma omp parallel for private(x);
@@ -153,15 +148,15 @@ void test_collapse() {
 #pragma omp parallel for collapse(foo())
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for collapse(-5)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for collapse(0)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for collapse(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
@@ -170,8 +165,7 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
 // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
     for (int j = 0; j < 16; ++j)
-// expected-error@+3 {{reduction variable must be shared}}
-// expected-error@+2 {{private variable cannot be reduction}}
+// expected-error@+2 2 {{reduction variable must be shared}}
 // expected-error@+1 {{region cannot be closely nested inside 'parallel for' region; perhaps you forget to enclose 'omp for' directive into a parallel region?}}
 #pragma omp for reduction(+ : i, j)
       for (int k = 0; k < 16; ++k)
diff --git a/test/OpenMP/parallel_for_num_threads_messages.cpp b/test/OpenMP/parallel_for_num_threads_messages.cpp
index 60c7dfb6e3cd..10a4e1bfb1aa 100644
--- a/test/OpenMP/parallel_for_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_for_num_threads_messages.cpp
@@ -24,7 +24,7 @@ T tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for num_threads ((argc > 0) ? argv[1] : argv[2]) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for num_threads (S) // expected-error {{'S' does not refer to a value}}
   for (i = 0; i < argc; ++i) foo();
@@ -32,7 +32,7 @@ T tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for num_threads (argc)
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for num_threads (N) // expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for num_threads (N) // expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
 
   return argc;
@@ -52,7 +52,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for num_threads (argc > 0 ? argv[1] : argv[2]) // expected-error {{integral }}
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for num_threads (S1) // expected-error {{'S1' does not refer to a value}}
   for (i = 0; i < argc; ++i) foo();
diff --git a/test/OpenMP/parallel_for_ordered_messages.cpp b/test/OpenMP/parallel_for_ordered_messages.cpp
new file mode 100644
index 000000000000..3729eb9a68a3
--- /dev/null
+++ b/test/OpenMP/parallel_for_ordered_messages.cpp
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) {                   //expected-note 2 {{declared here}}
+#pragma omp parallel for ordered
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered() // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+// expected-error@+2 2 {{expression is not an integral constant expression}}
+// expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+#pragma omp parallel for ordered(argc
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+#pragma omp parallel for ordered(ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered(1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'ordered' clause}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for', but found only 1}}
+// expected-error@+3 2 {{directive '#pragma omp parallel for' cannot contain more than one 'ordered' clause}}
+// expected-error@+2 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+// expected-error@+1 2 {{expression is not an integral constant expression}}
+#pragma omp parallel for ordered(foobool(argc)), ordered(true), ordered(-5)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered(S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+// expected-error@+1 2 {{expression is not an integral constant expression}}
+#pragma omp parallel for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered(1)
+  for (int i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered(N) // expected-error {{argument to 'ordered' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - ST];
+#pragma omp parallel for ordered(2) // expected-note {{as specified in 'ordered' clause}}
+  foo();                            // expected-error {{expected 2 for loops after '#pragma omp parallel for'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+#pragma omp parallel for ordered
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp parallel for ordered( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp parallel for ordered() // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp parallel for ordered(4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'ordered' clause}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4]; // expected-error {{expected 4 for loops after '#pragma omp parallel for', but found only 1}}
+#pragma omp parallel for ordered(2 + 2))      // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}  expected-note {{as specified in 'ordered' clause}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];            // expected-error {{expected 4 for loops after '#pragma omp parallel for', but found only 1}}
+#pragma omp parallel for ordered(foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+3 {{expression is not an integral constant expression}}
+// expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'ordered' clause}}
+// expected-error@+1 2 {{argument to 'ordered' clause must be a strictly positive integer value}}
+#pragma omp parallel for ordered(foobool(argc)), ordered(true), ordered(-5)
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+#pragma omp parallel for ordered(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+1 {{expression is not an integral constant expression}}
+#pragma omp parallel for ordered(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++)
+    argv[0][i] = argv[0][i] - argv[0][i - 4];
+// expected-error@+3 {{statement after '#pragma omp parallel for' must be a for loop}}
+// expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+#pragma omp parallel for ordered(ordered(tmain < int, char, -1, -2 > (argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+#pragma omp parallel for ordered(2) // expected-note {{as specified in 'ordered' clause}}
+  foo();                            // expected-error {{expected 2 for loops after '#pragma omp parallel for'}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
diff --git a/test/OpenMP/parallel_for_private_messages.cpp b/test/OpenMP/parallel_for_private_messages.cpp
index 17344f62d7f1..efc827b0d5e5 100644
--- a/test/OpenMP/parallel_for_private_messages.cpp
+++ b/test/OpenMP/parallel_for_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -99,7 +99,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel for private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for private(i)
@@ -120,7 +120,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -169,12 +169,16 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel for private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for private(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+  static int m;
+#pragma omp parallel for private(m)
+  for (int k = 0; k < argc; ++k)
+    m = k + 2;
 
   return 0;
 }
diff --git a/test/OpenMP/parallel_for_reduction_messages.cpp b/test/OpenMP/parallel_for_reduction_messages.cpp
index 26fa48cf5697..22251b446300 100644
--- a/test/OpenMP/parallel_for_reduction_messages.cpp
+++ b/test/OpenMP/parallel_for_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
 void foo() {
 }
@@ -16,7 +18,7 @@ class S2 {
 public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
-  static float S2s;
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -102,7 +108,7 @@ T tmain(T argc) {
 #pragma omp parallel for reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp parallel for reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
@@ -114,31 +120,31 @@ T tmain(T argc) {
 #pragma omp parallel for reduction(^ : T) // expected-error {{'T' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel for reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp parallel for reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel for reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel for reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp parallel for reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(&& : S2::S2s)
+#pragma omp parallel for reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
@@ -157,7 +163,7 @@ T tmain(T argc) {
 #pragma omp parallel for reduction(+ : p), reduction(+ : p) // expected-error 3 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 3 {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -224,7 +230,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp parallel for reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(~ : argc) // expected-error {{expected unqualified-id}}
@@ -236,31 +242,31 @@ int main(int argc, char **argv) {
 #pragma omp parallel for reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel for reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp parallel for reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel for reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel for reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp parallel for reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(&& : S2::S2s)
+#pragma omp parallel for reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
@@ -282,7 +288,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -298,6 +304,10 @@ int main(int argc, char **argv) {
 #pragma omp parallel for reduction(+ : fl)
   for (int i = 0; i < 10; ++i)
     foo();
+  static int m;
+#pragma omp parallel for reduction(+ : m) // OK
+  for (int i = 0; i < 10; ++i)
+    m++;
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_schedule_messages.cpp b/test/OpenMP/parallel_for_schedule_messages.cpp
index c12c4282686b..f446609615da 100644
--- a/test/OpenMP/parallel_for_schedule_messages.cpp
+++ b/test/OpenMP/parallel_for_schedule_messages.cpp
@@ -13,13 +13,13 @@ template <class T, typename S, int N, int ST> // expected-note {{declared here}}
 T tmain(T argc, S **argv) {
   #pragma omp parallel for schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp parallel for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -28,7 +28,7 @@ T tmain(T argc, S **argv) {
   // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
   #pragma omp parallel for schedule (guided argc
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for schedule (static, ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for schedule (dynamic, 1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
@@ -36,17 +36,17 @@ T tmain(T argc, S **argv) {
   #pragma omp parallel for schedule (guided, (ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for schedule (static, foobool(argc)), schedule (dynamic, true), schedule (guided, -5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for schedule (static, S) // expected-error {{'S' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  #pragma omp parallel for schedule (static, S) // expected-error {{'S' does not refer to a value}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+1 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel for schedule (guided, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for schedule (dynamic, 1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for schedule (static, N) // expected-error {{argument to 'schedule' clause must be a positive integer value}}
+  #pragma omp parallel for schedule (static, N) // expected-error {{argument to 'schedule' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -54,13 +54,13 @@ T tmain(T argc, S **argv) {
 int main(int argc, char **argv) {
   #pragma omp parallel for schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp parallel for schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
@@ -73,17 +73,17 @@ int main(int argc, char **argv) {
   #pragma omp parallel for schedule (dynamic, foobool(1) > 0 ? 1 : 2)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+2 2 {{directive '#pragma omp parallel for' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for schedule (guided, foobool(argc)), schedule (static, true), schedule (dynamic, -5)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for schedule (guided, S1) // expected-error {{'S1' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp parallel for' are ignored}}
+  #pragma omp parallel for schedule (guided, S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+1 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel for schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{statement after '#pragma omp parallel for' must be a for loop}}
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
-  #pragma omp parallel for schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   foo();
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
   return tmain<int, char, 1, 0>(argc, argv);
diff --git a/test/OpenMP/parallel_for_simd_aligned_messages.cpp b/test/OpenMP/parallel_for_simd_aligned_messages.cpp
index e1b9602f0ca7..8bffd21f4655 100644
--- a/test/OpenMP/parallel_for_simd_aligned_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_aligned_messages.cpp
@@ -50,7 +50,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
   T sum = (T)0;
   T ind2 = - num * L;
   // Negative number is passed as L.
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd aligned(arr:L)
   for (i = 0; i < num; ++i) {
     T cur = arr[(int)ind2];
@@ -65,7 +65,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
 
 template<int LEN> int test_warn() {
   int *ind2 = 0;
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd aligned(ind2:LEN)
   for (int i = 0; i < 100; i++) {
     ind2 += LEN;
diff --git a/test/OpenMP/parallel_for_simd_ast_print.cpp b/test/OpenMP/parallel_for_simd_ast_print.cpp
index cd62fc51af7f..e23c2cbd2428 100644
--- a/test/OpenMP/parallel_for_simd_ast_print.cpp
+++ b/test/OpenMP/parallel_for_simd_ast_print.cpp
@@ -14,8 +14,8 @@ template<class T, class N> T reduct(T* arr, N num) {
   N myind;
   T sum = (T)0;
 // CHECK: T sum = (T)0;
-#pragma omp parallel for simd private(myind, g_ind), linear(ind), aligned(arr)
-// CHECK-NEXT: #pragma omp parallel for simd private(myind,g_ind) linear(ind) aligned(arr)
+#pragma omp parallel for simd private(myind, g_ind), linear(ind), aligned(arr) if (parallel :num)
+// CHECK-NEXT: #pragma omp parallel for simd private(myind,g_ind) linear(ind) aligned(arr) if(parallel: num)
   for (i = 0; i < num; ++i) {
     myind = ind;
     T cur = arr[myind];
@@ -35,8 +35,8 @@ template<class T> struct S {
 // CHECK: T res;
 // CHECK: T val;
 // CHECK: T lin = 0;
-    #pragma omp parallel for simd private(val)  safelen(7) linear(lin : -5) lastprivate(res)
-// CHECK-NEXT: #pragma omp parallel for simd private(val) safelen(7) linear(lin: -5) lastprivate(res)
+    #pragma omp parallel for simd private(val)  safelen(7) linear(lin : -5) lastprivate(res) simdlen(5) if(7)
+// CHECK-NEXT: #pragma omp parallel for simd private(val) safelen(7) linear(lin: -5) lastprivate(res) simdlen(5) if(7)
     for (T i = 7; i < m_a; ++i) {
       val = v[i-7] + m_a;
       res = val;
@@ -44,8 +44,8 @@ template<class T> struct S {
     }
     const T clen = 3;
 // CHECK: T clen = 3;
-    #pragma omp parallel for simd safelen(clen-1)
-// CHECK-NEXT: #pragma omp parallel for simd safelen(clen - 1)
+    #pragma omp parallel for simd safelen(clen-1) simdlen(clen-1)
+// CHECK-NEXT: #pragma omp parallel for simd safelen(clen - 1) simdlen(clen - 1)
     for(T i = clen+2; i < 20; ++i) {
 // CHECK-NEXT: for (T i = clen + 2; i < 20; ++i) {
       v[i] = v[v-clen] + 1;
@@ -62,7 +62,7 @@ template<class T> struct S {
 template<int LEN> struct S2 {
   static void func(int n, float *a, float *b, float *c) {
     int k1 = 0, k2 = 0;
-#pragma omp parallel for simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN)
+#pragma omp parallel for simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN) simdlen(LEN)
     for(int i = 0; i < n; i++) {
       c[i] = a[i] + b[i];
       c[k1] = a[k1] + b[k1];
@@ -77,7 +77,7 @@ template<int LEN> struct S2 {
 // CHECK: template <int LEN = 4> struct S2 {
 // CHECK-NEXT: static void func(int n, float *a, float *b, float *c)     {
 // CHECK-NEXT:   int k1 = 0, k2 = 0;
-// CHECK-NEXT: #pragma omp parallel for simd safelen(4) linear(k1,k2: 4) aligned(a: 4)
+// CHECK-NEXT: #pragma omp parallel for simd safelen(4) linear(k1,k2: 4) aligned(a: 4) simdlen(4)
 // CHECK-NEXT:   for (int i = 0; i < n; i++) {
 // CHECK-NEXT:     c[i] = a[i] + b[i];
 // CHECK-NEXT:     c[k1] = a[k1] + b[k1];
@@ -92,17 +92,17 @@ int main (int argc, char **argv) {
   int k1=0,k2=0;
   static int *a;
 // CHECK: static int *a;
-#pragma omp parallel for simd
-// CHECK-NEXT: #pragma omp parallel for simd
+#pragma omp parallel for simd if(parallel :b)
+// CHECK-NEXT: #pragma omp parallel for simd if(parallel: b)
   for (int i=0; i < 2; ++i)*a=2;
 // CHECK-NEXT: for (int i = 0; i < 2; ++i)
 // CHECK-NEXT: *a = 2;
 #pragma omp  parallel
-#pragma omp parallel for simd private(argc, b),lastprivate(d,f) collapse(2) aligned(a : 4) ,firstprivate( g )
+#pragma omp parallel for simd private(argc, b),lastprivate(d,f) collapse(2) aligned(a : 4) ,firstprivate( g ) if(g)
   for (int i = 0; i < 10; ++i)
   for (int j = 0; j < 10; ++j) {foo(); k1 += 8; k2 += 8;}
 // CHECK-NEXT: #pragma omp parallel
-// CHECK-NEXT: #pragma omp parallel for simd private(argc,b) lastprivate(d,f) collapse(2) aligned(a: 4) firstprivate(g)
+// CHECK-NEXT: #pragma omp parallel for simd private(argc,b) lastprivate(d,f) collapse(2) aligned(a: 4) firstprivate(g) if(g)
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: for (int j = 0; j < 10; ++j) {
 // CHECK-NEXT: foo();
@@ -114,8 +114,8 @@ int main (int argc, char **argv) {
 // CHECK-NEXT: foo();
   const int CLEN = 4;
 // CHECK-NEXT: const int CLEN = 4;
-  #pragma omp parallel for simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 )
-// CHECK-NEXT: #pragma omp parallel for simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1)
+  #pragma omp parallel for simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 ) simdlen(CLEN)
+// CHECK-NEXT: #pragma omp parallel for simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1) simdlen(CLEN)
   for (int i = 0; i < 10; ++i)foo();
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: foo();
diff --git a/test/OpenMP/parallel_for_simd_codegen.cpp b/test/OpenMP/parallel_for_simd_codegen.cpp
index eb1e493230e3..53f18d09b751 100644
--- a/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // REQUIRES: x86-registered-target
 // expected-no-diagnostics
 #ifndef HEADER
@@ -63,7 +63,6 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   long long k = get_val();
 
@@ -112,21 +111,19 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
 // CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
 // CHECK-NEXT: store i64 [[LIN_ADD2]], i64* %{{.+}}
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   int lin = 12;
   #pragma omp parallel for simd linear(lin : get_val()), linear(g_ptr)
 
 // Init linear private var.
 // CHECK: [[LIN_VAR:%.+]] = load i32*, i32** %
-// CHECK-NEXT: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
+// CHECK: [[LIN_LOAD:%.+]] = load i32, i32* [[LIN_VAR]]
 // CHECK-NEXT: store i32 [[LIN_LOAD]], i32* [[LIN_START:%[^,]+]]
 // Remember linear step.
 // CHECK: [[CALL_VAL:%.+]] = invoke
 // CHECK: store i64 [[CALL_VAL]], i64* [[LIN_STEP:%[^,]+]]
 
-// CHECK: [[GLIN_VAR:%.+]] = load double**, double*** %
-// CHECK-NEXT: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR]]
+// CHECK: [[GLIN_LOAD:%.+]] = load double*, double** [[GLIN_VAR:%.+]],
 // CHECK-NEXT: store double* [[GLIN_LOAD]], double** [[GLIN_START:%[^,]+]]
 
 // CHECK: call void @__kmpc_for_static_init_8u(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i64* [[LB:%[^,]+]], i64* [[UB:%[^,]+]], i64* [[STRIDE:%[^,]+]], i64 1, i64 1)
@@ -179,14 +176,11 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
 //
 // Linear start and step are used to calculate final value of the linear variables.
-// CHECK: [[LIN_VAR:%.+]] = load i32*, i32** %
 // CHECK: [[LINSTART:.+]] = load i32, i32* [[LIN_START]]
 // CHECK: [[LINSTEP:.+]] = load i64, i64* [[LIN_STEP]]
 // CHECK: store i32 {{.+}}, i32* [[LIN_VAR]],
-// CHECK: [[GLIN_VAR:%.+]] = load double**, double*** %
 // CHECK: [[GLINSTART:.+]] = load double*, double** [[GLIN_START]]
 // CHECK: store double* {{.*}}[[GLIN_VAR]]
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp parallel for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -223,7 +217,6 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP4_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
   #pragma omp parallel for simd
 // CHECK: call void @__kmpc_for_static_init_4(%ident_t* {{[^,]+}}, i32 %{{[^,]+}}, i32 34, i32* %{{[^,]+}}, i32* [[LB:%[^,]+]], i32* [[UB:%[^,]+]], i32* [[STRIDE:%[^,]+]], i32 1, i32 1)
@@ -260,7 +253,6 @@ void simple(float *a, float *b, float *c, float *d) {
   }
 // CHECK: [[SIMPLE_LOOP5_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 
 // CHECK-NOT: mul i32 %{{.+}}, 10
   #pragma omp parallel for simd
@@ -315,7 +307,6 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[A_PRIV_VAL:%.+]] = load i32, i32* [[A_PRIV]],
 // CHECK-NEXT: store i32 [[A_PRIV_VAL]], i32* %{{.+}},
 // CHECK-NEXT: br label
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
   int R;
   {
@@ -364,7 +355,6 @@ void simple(float *a, float *b, float *c, float *d) {
 // CHECK: [[RED:%.+]] = mul nsw i32 %{{.+}}, [[R_PRIV_VAL]]
 // CHECK-NEXT: store i32 [[RED]], i32* %{{.+}},
 // CHECK-NEXT: call void @__kmpc_end_reduce_nowait(
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
   }
 }
 
@@ -473,7 +463,6 @@ void iter_simple(IterDouble ia, IterDouble ib, IterDouble ic) {
   }
 // CHECK: [[IT_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
@@ -552,7 +541,6 @@ void collapsed(float *a, float *b, float *c, float *d) {
 // CHECK: store i32 3, i32* [[I:%[^,]+]]
 // CHECK: store i32 5, i32* [[I:%[^,]+]]
 // CHECK: store i16 9, i16* [[I:%[^,]+]]
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 }
 
@@ -672,7 +660,6 @@ void widened(float *a, float *b, float *c, float *d) {
 // CHECK-NEXT: br label {{%.+}}
 // CHECK: [[T1_END]]
 // CHECK: call void @__kmpc_for_static_fini(%ident_t* {{.+}}, i32 %{{.+}})
-// CHECK: call void @__kmpc_barrier(%ident_t* {{.+}}, i32 %{{.+}})
 // CHECK: ret void
 //
 // TERM_DEBUG-LABEL: bar
diff --git a/test/OpenMP/parallel_for_simd_collapse_messages.cpp b/test/OpenMP/parallel_for_simd_collapse_messages.cpp
index 22090e6b23ef..4f04cca5635e 100644
--- a/test/OpenMP/parallel_for_simd_collapse_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_collapse_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp parallel for simd collapse (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp parallel for simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel for simd', but found only 1}}
   // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+2 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd collapse (1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd collapse (N) // expected-error {{argument to 'collapse' clause must be a positive integer value}}
+  #pragma omp parallel for simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
   foo(); // expected-error {{expected 2 for loops after '#pragma omp parallel for simd'}}
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd collapse (foobool(argc)), collapse (true), collapse (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp b/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
index 2ff32246dc17..7a08572f7d1a 100644
--- a/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -116,7 +116,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for simd firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel for simd firstprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for simd firstprivate(i)
@@ -152,7 +152,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   for (i = 0; i < argc; ++i)
     foo();
@@ -226,7 +226,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd firstprivate(xa) // OK: may be firstprivate
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel for simd firstprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
@@ -253,6 +253,10 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd firstprivate(i)  // expected-note {{defined as firstprivate}}
   for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp parallel for simd' directive may not be firstprivate, predetermined as linear}}
     foo();
+  static int si;
+#pragma omp parallel for simd firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 2;
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_simd_if_messages.cpp b/test/OpenMP/parallel_for_simd_if_messages.cpp
index ca327cff7206..829b825aaf50 100644
--- a/test/OpenMP/parallel_for_simd_if_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_if_messages.cpp
@@ -34,6 +34,20 @@ int tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd if(argc)
   for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
 
   return 0;
 }
@@ -64,6 +78,20 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc)
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel for simd'}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  for (i = 0; i < argc; ++i) foo();
+  #pragma omp parallel for simd if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  for (i = 0; i < argc; ++i) foo();
 
   return tmain(argc, argv);
 }
diff --git a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
index e85e28b3f18a..bd1a6d505572 100644
--- a/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_lastprivate_messages.cpp
@@ -16,7 +16,7 @@ public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
   const S2 &operator=(const S2 &) const;
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
@@ -66,7 +66,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                        // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -118,7 +118,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel for simd lastprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for simd lastprivate(i)
@@ -143,7 +143,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i;                        // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (i = 0; i < argc; ++i)
     foo();
@@ -190,7 +190,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd lastprivate(xa) // OK
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd lastprivate(S2::S2s)
+#pragma omp parallel for simd lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for simd lastprivate(S2::S2sc) // expected-error {{shared variable cannot be lastprivate}}
@@ -222,7 +222,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd lastprivate(xa)
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel for simd lastprivate(j)
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
@@ -231,5 +231,9 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd lastprivate(n) firstprivate(n) // OK
   for (i = 0; i < argc; ++i)
     foo();
+  static int si;
+#pragma omp parallel for simd lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 3;
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_simd_linear_messages.cpp b/test/OpenMP/parallel_for_simd_linear_messages.cpp
index 55b0c3d7f9c8..858f53f4674d 100644
--- a/test/OpenMP/parallel_for_simd_linear_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_linear_messages.cpp
@@ -102,7 +102,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp parallel for simd linear // expected-error {{expected '(' after 'linear'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp parallel for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -138,7 +138,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
     #pragma omp parallel for simd linear(v:i)
     for (int k = 0; k < argc; ++k) { i = k; v += i; }
   }
-  #pragma omp parallel for simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type}}
+  #pragma omp parallel for simd linear(j)
   for (int k = 0; k < argc; ++k) ++k;
   int v = 0;
   #pragma omp parallel for simd linear(v:j)
@@ -166,7 +166,7 @@ int main(int argc, char **argv) {
   S4 e(4); // expected-note {{'e' defined here}}
   S5 g(5); // expected-note {{'g' defined here}}
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp parallel for simd linear // expected-error {{expected '(' after 'linear'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp parallel for simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -203,7 +203,7 @@ int main(int argc, char **argv) {
     #pragma omp parallel for simd linear(i : 4)
     for (int k = 0; k < argc; ++k) { ++k; i += 4; }
   }
-  #pragma omp parallel for simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type 'int &'}}
+  #pragma omp parallel for simd linear(j)
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp parallel for simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
diff --git a/test/OpenMP/parallel_for_simd_loop_messages.cpp b/test/OpenMP/parallel_for_simd_loop_messages.cpp
index 85d8574ba9f4..f4929d97d484 100644
--- a/test/OpenMP/parallel_for_simd_loop_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_loop_messages.cpp
@@ -10,6 +10,7 @@ public:
 };
 
 static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
 static int globalii;
 
@@ -54,32 +55,32 @@ int test_iteration_spaces() {
   for (double fi = 0; fi < 10.0; fi++) {
     c[(int)fi] = a[(int)fi] + b[(int)fi];
   }
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (int &ref = ii; ref < 10; ref++) {
   }
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (int i; i < 10; i++)
     c[i] = a[i];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (int i = 0, j = 0; i < 10; ++i)
     c[i] = a[i];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (; ii < 10; ++ii)
     c[ii] = a[ii];
 
 // expected-warning@+3 {{expression result unused}}
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (ii + 1; ii < 10; ++ii)
     c[ii] = a[ii];
 
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (c[ii] = 0; ii < 10; ++ii)
     c[ii] = a[ii];
@@ -259,6 +260,7 @@ int test_iteration_spaces() {
     c[ii] = a[ii];
 
   {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp parallel for simd' directive may not be threadprivate or thread local, predetermined as linear}}
 #pragma omp parallel for simd
     for (sii = 0; sii < 10; sii += 1)
       c[sii] = a[sii];
@@ -375,7 +377,7 @@ int test_with_random_access_iterator() {
 #pragma omp parallel for simd
   for (GoodIter I = begin; I < end; ++I)
     ++I;
-// expected-error@+2 {{variable must be of integer or random access iterator type}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (GoodIter &I = begin; I < end; ++I)
     ++I;
@@ -406,7 +408,7 @@ int test_with_random_access_iterator() {
 #pragma omp parallel for simd
   for (begin = begin0; begin < end; ++begin)
     ++begin;
-// expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
 #pragma omp parallel for simd
   for (++begin; begin < end; ++begin)
     ++begin;
diff --git a/test/OpenMP/parallel_for_simd_misc_messages.c b/test/OpenMP/parallel_for_simd_misc_messages.c
index ed9ac4b5242a..378c48ff8a4c 100644
--- a/test/OpenMP/parallel_for_simd_misc_messages.c
+++ b/test/OpenMP/parallel_for_simd_misc_messages.c
@@ -153,20 +153,117 @@ void test_safelen() {
 #pragma omp parallel for simd safelen(foo())
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd safelen(-5)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd safelen(0)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd safelen(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
 }
 
+void test_simdlen() {
+  int i;
+// expected-error@+1 {{expected '('}}
+#pragma omp parallel for simd simdlen
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}
+#pragma omp parallel for simd simdlen()
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}  expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(, )
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+// expected-error@+1 {{expected '('}}
+#pragma omp parallel for simd simdlen 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel for simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel for simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp parallel for simd simdlen(4, 8)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp parallel for simd simdlen(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp parallel for simd simdlen(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp parallel for simd simdlen(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp parallel for simd simdlen(0)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp parallel for simd simdlen(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_safelen_simdlen() {
+  int i;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp parallel for simd simdlen(6) safelen(5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp parallel for simd safelen(5) simdlen(6)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
 void test_collapse() {
   int i;
 #pragma omp parallel
@@ -259,17 +356,17 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd collapse(-5)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd collapse(0)
   for (i = 0; i < 16; ++i)
     ;
 #pragma omp parallel
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp parallel for simd collapse(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
diff --git a/test/OpenMP/parallel_for_simd_num_threads_messages.cpp b/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
index 5b5d33464ef8..940565cc10c3 100644
--- a/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_num_threads_messages.cpp
@@ -24,7 +24,7 @@ T tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd num_threads ((argc > 0) ? argv[1] : argv[2]) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd num_threads (S) // expected-error {{'S' does not refer to a value}}
   for (i = 0; i < argc; ++i) foo();
@@ -32,7 +32,7 @@ T tmain(T argc, S **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd num_threads (argc)
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for simd num_threads (N) // expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for simd num_threads (N) // expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
 
   return argc;
@@ -52,7 +52,7 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd num_threads (argc > 0 ? argv[1] : argv[2]) // expected-error {{integral }}
   for (i = 0; i < argc; ++i) foo();
-  #pragma omp parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel for simd num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   for (i = 0; i < argc; ++i) foo();
   #pragma omp parallel for simd num_threads (S1) // expected-error {{'S1' does not refer to a value}}
   for (i = 0; i < argc; ++i) foo();
diff --git a/test/OpenMP/parallel_for_simd_private_messages.cpp b/test/OpenMP/parallel_for_simd_private_messages.cpp
index 130736a33a07..a031d407ec1b 100644
--- a/test/OpenMP/parallel_for_simd_private_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -99,7 +99,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel for simd private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for simd private(i)
@@ -120,7 +120,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;           // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel for simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -169,12 +169,16 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel for simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel for simd private(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp parallel for simd private(i)
   for (int k = 0; k < argc; ++k)
     ++k;
+  static int m;
+#pragma omp parallel for simd private(m) // OK
+  for (int k = 0; k < argc; ++k)
+    m = k + 3;
 
   return 0;
 }
diff --git a/test/OpenMP/parallel_for_simd_reduction_messages.cpp b/test/OpenMP/parallel_for_simd_reduction_messages.cpp
index 75733a3647ac..e2e9e1bca38b 100644
--- a/test/OpenMP/parallel_for_simd_reduction_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
 void foo() {
 }
@@ -16,7 +18,7 @@ class S2 {
 public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
-  static float S2s;
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -102,7 +108,7 @@ T tmain(T argc) {
 #pragma omp parallel for simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp parallel for simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
@@ -114,31 +120,31 @@ T tmain(T argc) {
 #pragma omp parallel for simd reduction(^ : T) // expected-error {{'T' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp parallel for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel for simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel for simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp parallel for simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(&& : S2::S2s)
+#pragma omp parallel for simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
@@ -157,7 +163,7 @@ T tmain(T argc) {
 #pragma omp parallel for simd reduction(+ : p), reduction(+ : p) // expected-error 3 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 3 {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -224,7 +230,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp parallel for simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(~ : argc) // expected-error {{expected unqualified-id}}
@@ -236,31 +242,31 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel for simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp parallel for simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel for simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel for simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp parallel for simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(&& : S2::S2s)
+#pragma omp parallel for simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel for simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
@@ -282,7 +288,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp parallel for simd reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel for simd reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -298,6 +304,10 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd reduction(+ : fl)
   for (int i = 0; i < 10; ++i)
     foo();
+  static int m;
+#pragma omp parallel for simd reduction(+ : m) // OK
+  for (int i = 0; i < 10; ++i)
+    m++;
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/parallel_for_simd_safelen_messages.cpp b/test/OpenMP/parallel_for_simd_safelen_messages.cpp
index eb0aa5ae8b5e..45f2fa2b6277 100644
--- a/test/OpenMP/parallel_for_simd_safelen_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_safelen_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp parallel for simd safelen (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd safelen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp parallel for simd safelen ((ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+2 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd safelen (4)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd safelen (N) // expected-error {{argument to 'safelen' clause must be a positive integer value}}
+  #pragma omp parallel for simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -61,7 +61,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+1 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd safelen (foobool(argc)), safelen (true), safelen (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/parallel_for_simd_schedule_messages.cpp b/test/OpenMP/parallel_for_simd_schedule_messages.cpp
index 36befb213559..4135427ce10b 100644
--- a/test/OpenMP/parallel_for_simd_schedule_messages.cpp
+++ b/test/OpenMP/parallel_for_simd_schedule_messages.cpp
@@ -13,13 +13,13 @@ template <class T, typename S, int N, int ST> // expected-note {{declared here}}
 T tmain(T argc, S **argv) {
   #pragma omp parallel for simd schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -28,7 +28,7 @@ T tmain(T argc, S **argv) {
   // expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
   #pragma omp parallel for simd schedule (guided argc
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd schedule (static, ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd schedule (dynamic, 1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
@@ -36,17 +36,17 @@ T tmain(T argc, S **argv) {
   #pragma omp parallel for simd schedule (guided, (ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd schedule (static, foobool(argc)), schedule (dynamic, true), schedule (guided, -5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd schedule (static, S) // expected-error {{'S' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+  #pragma omp parallel for simd schedule (static, S) // expected-error {{'S' does not refer to a value}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+1 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel for simd schedule (guided, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp parallel for simd schedule (dynamic, 1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp parallel for simd schedule (static, N) // expected-error {{argument to 'schedule' clause must be a positive integer value}}
+  #pragma omp parallel for simd schedule (static, N) // expected-error {{argument to 'schedule' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -54,13 +54,13 @@ T tmain(T argc, S **argv) {
 int main(int argc, char **argv) {
   #pragma omp parallel for simd schedule // expected-error {{expected '(' after 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for simd schedule ( // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}}
+  #pragma omp parallel for simd schedule () // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for simd schedule (auto // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto' or 'runtime' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for simd schedule (auto_dynamic // expected-error {{expected 'static', 'dynamic', 'guided', 'auto', 'runtime', 'monotonic', 'nonmonotonic' or 'simd' in OpenMP clause 'schedule'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp parallel for simd schedule (auto,  // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
@@ -73,17 +73,17 @@ int main(int argc, char **argv) {
   #pragma omp parallel for simd schedule (dynamic, foobool(1) > 0 ? 1 : 2)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'schedule' clause}}
-  // expected-error@+1 {{argument to 'schedule' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'schedule' clause must be a strictly positive integer value}}
   #pragma omp parallel for simd schedule (guided, foobool(argc)), schedule (static, true), schedule (dynamic, -5)
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
-  #pragma omp parallel for simd schedule (guided, S1) // expected-error {{'S1' does not refer to a value}} expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+  #pragma omp parallel for simd schedule (guided, S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+1 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel for simd schedule (static, argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{statement after '#pragma omp parallel for simd' must be a for loop}}
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
-  #pragma omp parallel for simd schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel for simd schedule(dynamic, schedule(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   foo();
   // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
   return tmain<int, char, 1, 0>(argc, argv);
diff --git a/test/OpenMP/parallel_for_simd_simdlen_messages.cpp b/test/OpenMP/parallel_for_simd_simdlen_messages.cpp
new file mode 100644
index 000000000000..dd1cf0feaa49
--- /dev/null
+++ b/test/OpenMP/parallel_for_simd_simdlen_messages.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp parallel for simd simdlen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp parallel for simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+2 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp parallel for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel for simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp parallel for simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel for simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp parallel for simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+1 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp parallel for simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel for simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp parallel for simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp parallel for simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp parallel for simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
diff --git a/test/OpenMP/parallel_if_codegen.cpp b/test/OpenMP/parallel_if_codegen.cpp
index ace20ecd8317..9ecf57211f85 100644
--- a/test/OpenMP/parallel_if_codegen.cpp
+++ b/test/OpenMP/parallel_if_codegen.cpp
@@ -16,14 +16,14 @@ int Arg;
 
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK:  call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
+// CHECK:  call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
 #pragma omp parallel
-#pragma omp parallel if (false)
+#pragma omp parallel if (parallel: false)
   gtid_test();
 // CHECK: ret void
 }
 
-// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* [[GTID_PARAM:%.+]], i
+// CHECK: define internal {{.*}}void [[GTID_TEST_REGION1]](i{{.+}}* noalias [[GTID_PARAM:%.+]], i32* noalias
 // CHECK: store i{{[0-9]+}}* [[GTID_PARAM]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]]
@@ -43,7 +43,7 @@ int tmain(T Arg) {
   fn1();
 #pragma omp parallel if (false)
   fn2();
-#pragma omp parallel if (Arg)
+#pragma omp parallel if (parallel: Arg)
   fn3();
   return 0;
 }
@@ -51,7 +51,7 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN4:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN4:@.+]] to void
 #pragma omp parallel if (true)
   fn4();
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
@@ -63,7 +63,7 @@ int main() {
 
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN6:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN6:@.+]] to void
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
@@ -92,14 +92,14 @@ int main() {
 
 // CHECK-LABEL: define {{.+}} @{{.+}}tmain
 // CHECK: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num(
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN1:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN1:@.+]] to void
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR:%.+]],
 // CHECK: call void [[CAP_FN2:@.+]](i32* [[GTID_ADDR]],
 // CHECK: call {{.*}}void @__kmpc_end_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
 // CHECK: br i1 %{{.+}}, label %[[OMP_THEN:.+]], label %[[OMP_ELSE:.+]]
 // CHECK: [[OMP_THEN]]
-// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, void {{.+}}* [[CAP_FN3:@.+]] to void
+// CHECK: call {{.*}}void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, void {{.+}}* [[CAP_FN3:@.+]] to void
 // CHECK: br label %[[OMP_END:.+]]
 // CHECK: [[OMP_ELSE]]
 // CHECK: call {{.*}}void @__kmpc_serialized_parallel(%{{.+}}* @{{.+}}, i32 [[GTID]])
diff --git a/test/OpenMP/parallel_if_messages.cpp b/test/OpenMP/parallel_if_messages.cpp
index 97096dfae3e8..1ffc567b5916 100644
--- a/test/OpenMP/parallel_if_messages.cpp
+++ b/test/OpenMP/parallel_if_messages.cpp
@@ -22,6 +22,13 @@ int tmain(T argc, S **argv) {
   #pragma omp parallel if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel if(argc)
+  #pragma omp parallel if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : argc)
+  #pragma omp parallel if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel'}}
+  #pragma omp parallel if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  #pragma omp parallel if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return 0;
@@ -40,6 +47,13 @@ int main(int argc, char **argv) {
   #pragma omp parallel if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel if (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel // expected-warning {{missing ':' after directive name modifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp parallel if(parallel : argc)
+  #pragma omp parallel if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel'}}
+  #pragma omp parallel if(parallel : argc) if (parallel:argc)  // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  #pragma omp parallel if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return tmain(argc, argv);
diff --git a/test/OpenMP/parallel_num_threads_messages.cpp b/test/OpenMP/parallel_num_threads_messages.cpp
index 180d9cd8035a..adcf6ca4253f 100644
--- a/test/OpenMP/parallel_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_num_threads_messages.cpp
@@ -19,11 +19,11 @@ T tmain(T argc, S **argv) {
   #pragma omp parallel num_threads (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel num_threads (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}}
   #pragma omp parallel num_threads ((argc > 0) ? argv[1] : argv[2]) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
-  #pragma omp parallel num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   #pragma omp parallel num_threads (S) // expected-error {{'S' does not refer to a value}}
   #pragma omp parallel num_threads (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel num_threads (argc)
-  #pragma omp parallel num_threads (N) // expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel num_threads (N) // expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   #pragma omp parallel redef_num_threads (argc, argc)
   foo();
 
@@ -37,7 +37,7 @@ int main(int argc, char **argv) {
   #pragma omp parallel num_threads (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel num_threads (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel' are ignored}}
   #pragma omp parallel num_threads (argc > 0 ? argv[1] : argv[2]) // expected-error {{integral }}
-  #pragma omp parallel num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   #pragma omp parallel num_threads (S1) // expected-error {{'S1' does not refer to a value}}
   #pragma omp parallel num_threads (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
   #pragma omp parallel num_threads (num_threads(tmain<int, char, -1>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}} expected-note {{in instantiation of function template specialization 'tmain<int, char, -1>' requested here}}
diff --git a/test/OpenMP/parallel_private_codegen.cpp b/test/OpenMP/parallel_private_codegen.cpp
index d2d02a7c3d37..1d195be97d6e 100644
--- a/test/OpenMP/parallel_private_codegen.cpp
+++ b/test/OpenMP/parallel_private_codegen.cpp
@@ -16,19 +16,17 @@ struct S {
   ~S() {}
 };
 
-volatile int g = 1212;
+volatile int g __attribute__((aligned(128))) = 1212;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
-  T t_var = T();
-  T vec[] = {1, 2};
-  S<T> s_arr[] = {1, 2};
-  S<T> var(3);
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] __attribute__((aligned(128))) = {1, 2};
+  S<T> s_arr[] __attribute__((aligned(128))) = {1, 2};
+  S<T> var __attribute__((aligned(128))) (3);
 #pragma omp parallel private(t_var, vec, s_arr, var)
   {
     vec[0] = t_var;
@@ -38,6 +36,7 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
@@ -45,26 +44,35 @@ int main() {
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
   // LAMBDA-NOT: = getelementptr inbounds %{{.+}},
-  // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8*
-  // LAMBDA: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
-#pragma omp parallel private(g)
+  // LAMBDA: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
+#pragma omp parallel private(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     g = 1;
+    sivar = 2;
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
+
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 4;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -76,25 +84,33 @@ int main() {
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
   // BLOCKS-NOT: = getelementptr inbounds %{{.+}},
-  // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* %{{.+}} to i8*
-  // BLOCKS: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
-#pragma omp parallel private(g)
+  // BLOCKS: call{{.*}} void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
+#pragma omp parallel private(g, sivar)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     g = 1;
+    sivar = 20;
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 20, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call{{.*}} void {{%.+}}(i8
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 40;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 40, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -106,10 +122,11 @@ int main() {
   int vec[] = {1, 2};
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
-#pragma omp parallel private(t_var, vec, s_arr, var)
+#pragma omp parallel private(t_var, vec, s_arr, var, sivar)
   {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 3;
   }
   return tmain<int>();
 #endif
@@ -118,17 +135,17 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
@@ -145,18 +162,19 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
-// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}], align 128
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]], align 128
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
+// CHECK-NOT: [[SIVAR_PRIV]]
 // CHECK: {{.+}}:
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
diff --git a/test/OpenMP/parallel_private_messages.cpp b/test/OpenMP/parallel_private_messages.cpp
index a637b5018f53..ab535b475265 100644
--- a/test/OpenMP/parallel_private_messages.cpp
+++ b/test/OpenMP/parallel_private_messages.cpp
@@ -13,7 +13,7 @@ class S2 {
   mutable int a;
 public:
   S2():a(0) { }
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
 };
 const S2 b;
 const S2 ba[5];
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g[] = {5, 6};
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp parallel private // expected-error {{expected '(' after 'private'}}
   #pragma omp parallel private ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp parallel private () // expected-error {{expected expression}}
@@ -69,7 +69,7 @@ int main(int argc, char **argv) {
   #pragma omp parallel private(ba)
   #pragma omp parallel private(ca) // expected-error {{shared variable cannot be private}}
   #pragma omp parallel private(da) // expected-error {{shared variable cannot be private}}
-  #pragma omp parallel private(S2::S2s)
+  #pragma omp parallel private(S2::S2s) // expected-error {{shared variable cannot be private}}
   #pragma omp parallel private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
   #pragma omp parallel private(threadvar, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be private}}
   #pragma omp parallel shared(i), private(i) // expected-error {{shared variable cannot be private}} expected-note {{defined as shared}}
@@ -77,13 +77,16 @@ int main(int argc, char **argv) {
   #pragma omp parallel firstprivate(i) private(i) // expected-error {{firstprivate variable cannot be private}} expected-note {{defined as firstprivate}}
   foo();
   #pragma omp parallel private(i)
-  #pragma omp parallel private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type 'int &'}}
+  #pragma omp parallel private(j)
   foo();
   #pragma omp parallel firstprivate(i)
   for (int k = 0; k < 10; ++k) {
     #pragma omp parallel private(i)
     foo();
   }
+  static int m;
+  #pragma omp parallel private(m) // OK
+  foo();
 
   return 0;
 }
diff --git a/test/OpenMP/parallel_reduction_codegen.cpp b/test/OpenMP/parallel_reduction_codegen.cpp
index 04d19ebea8c5..b9744b634118 100644
--- a/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/test/OpenMP/parallel_reduction_codegen.cpp
@@ -8,7 +8,7 @@
 #ifndef HEADER
 #define HEADER
 
-volatile int g = 1212;
+volatile int g __attribute__((aligned(128))) = 1212;
 
 template <class T>
 struct S {
@@ -22,9 +22,6 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float* }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i{{[0-9]+}}]*, i{{[0-9]+}}*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*, [[S_INT_TY]]*, i{{[0-9]+}}* }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
 
@@ -32,10 +29,10 @@ template <typename T>
 T tmain() {
   T t;
   S<T> test;
-  T t_var = T(), t_var1;
+  T t_var __attribute__((aligned(128))) = T(), t_var1 __attribute__((aligned(128)));
   T vec[] = {1, 2};
-  S<T> s_arr[] = {1, 2};
-  S<T> var(3), var1;
+  S<T> s_arr[]  = {1, 2};
+  S<T> var __attribute__((aligned(128))) (3), var1 __attribute__((aligned(128)));
 #pragma omp parallel reduction(+:t_var) reduction(&:var) reduction(&& : var1) reduction(min: t_var1)
   {
     vec[0] = t_var;
@@ -51,30 +48,24 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // LAMBDA: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // LAMBDA: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
 #pragma omp parallel reduction(+:g)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
 
     // Reduction list for runtime.
     // LAMBDA: [[RED_LIST:%.+]] = alloca [1 x i8*],
 
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // LAMBDA: [[ARG:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_REF]]
-    // LAMBDA: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
-    // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
+    // LAMBDA: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
     g = 1;
-    // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
 
-    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
     // LAMBDA: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
     // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // LAMBDA: call i32 @__kmpc_reduce_nowait(
@@ -112,31 +103,25 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[G_LOCAL_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[AGG_CAPTURED:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // BLOCKS: store i{{[0-9]+}}* [[G]], i{{[0-9]+}}** [[G_LOCAL_REF]]
-  // BLOCKS: [[ARG:%.+]] = bitcast %{{.+}}* [[AGG_CAPTURED]] to i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* [[ARG]])
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]])
 #pragma omp parallel reduction(-:g)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
 
     // Reduction list for runtime.
     // BLOCKS: [[RED_LIST:%.+]] = alloca [1 x i8*],
 
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // BLOCKS: [[ARG:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_REF]]
-    // BLOCKS: [[G_REF_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-    // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR]]
-    // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // BLOCKS: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_REF_ADDR:%.+]]
+    // BLOCKS: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
     g = 1;
-    // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], align 128
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
 
-    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
     // BLOCKS: [[BITCAST:%.+]] = bitcast i32* [[G_PRIVATE_ADDR]] to i8*
     // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // BLOCKS: call i32 @__kmpc_reduce_nowait(
@@ -178,6 +163,12 @@ int main() {
     vec[0] = t_var;
     s_arr[0] = var;
   }
+  if (var1)
+#pragma omp parallel reduction(+ : t_var) reduction(& : var) reduction(&& : var1) reduction(min : t_var1)
+    while (1) {
+      vec[0] = t_var;
+      s_arr[0] = var;
+    }
   return tmain<int>();
 #endif
 }
@@ -185,13 +176,13 @@ int main() {
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, float*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*)* [[MAIN_MICROTASK1:@.+]] to void
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
@@ -202,23 +193,20 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR_REF:%.+]] = load float*, float** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
+
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
 
-// CHECK: [[VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PTR_REF:%.+]],
 // For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 
-// CHECK: [[VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_PTR_REF:%.+]],
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]])
 
-// CHECK: [[T_VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** [[T_VAR1_PTR_REF]],
 // For min reduction operation initial value of private variable is largest repesentable value.
 // CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
 
@@ -226,16 +214,16 @@ int main() {
 
 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
 
-// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 0
+// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
-// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 1
+// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
-// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 2
+// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
-// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 3
+// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
 // CHECK: [[BITCAST:%.+]] = bitcast float* [[T_VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
 
@@ -359,7 +347,6 @@ int main() {
 // break;
 // CHECK: br label %[[RED_DONE]]
 // CHECK: [[RED_DONE]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@@ -373,38 +360,38 @@ int main() {
 // }
 // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
 // t_var_lhs = (float*)lhs[0];
-// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to float*
 // t_var_rhs = (float*)rhs[0];
-// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to float*
 
 // var_lhs = (S<float>*)lhs[1];
-// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 1
+// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_FLOAT_TY]]*
 // var_rhs = (S<float>*)rhs[1];
-// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 1
+// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_FLOAT_TY]]*
 
 // var1_lhs = (S<float>*)lhs[2];
-// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 2
+// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_FLOAT_TY]]*
 // var1_rhs = (S<float>*)rhs[2];
-// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 2
+// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_FLOAT_TY]]*
 
 // t_var1_lhs = (float*)lhs[3];
-// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to float*
 // t_var1_rhs = (float*)rhs[3];
-// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to float*
 
@@ -445,42 +432,67 @@ int main() {
 // CHECK: store float [[UP]], float* [[T_VAR1_LHS]],
 // CHECK: ret void
 
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca float,
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[T_VAR1_PRIV:%.+]] = alloca float,
+
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
+
+// CHECK: [[T_VAR_REF:%.+]] = load float*, float** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load float*, float** %
+
+// For + reduction operation initial value of private variable is 0.
+// CHECK: store float 0.0{{.+}}, float* [[T_VAR_PRIV]],
+
+// For & reduction operation initial value of private variable is ones in all bits.
+// CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+
+// For && reduction operation initial value of private variable is 1.0.
+// CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[VAR1_PRIV]])
+
+// For min reduction operation initial value of private variable is largest repesentable value.
+// CHECK: store float 0x47EFFFFFE0000000, float* [[T_VAR1_PRIV]],
+
+// CHECK-NOT: call i32 @__kmpc_reduce
+
+// CHECK: ret void
+
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_TMAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
-// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
-// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
+// CHECK: [[VAR1_PRIV:%.+]] = alloca [[S_INT_TY]], align 128
+// CHECK: [[T_VAR1_PRIV:%.+]] = alloca i{{[0-9]+}}, align 128
 
 // Reduction list for runtime.
 // CHECK: [[RED_LIST:%.+]] = alloca [4 x i8*],
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]],
 
-// CHECK: [[VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR_PRIV]])
 
-// CHECK: [[VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR1_PRIV]])
 
-// CHECK: [[T_VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR1_PTR_REF]],
 // For min reduction operation initial value of private variable is largest repesentable value.
 // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
 
@@ -488,16 +500,16 @@ int main() {
 
 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
 
-// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 0
+// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
-// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 1
+// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
-// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 2
+// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
-// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 3
+// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
 
@@ -594,7 +606,6 @@ int main() {
 // break;
 // CHECK: br label %[[RED_DONE]]
 // CHECK: [[RED_DONE]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
@@ -608,38 +619,38 @@ int main() {
 // }
 // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
 // t_var_lhs = (i{{[0-9]+}}*)lhs[0];
-// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}*
 // t_var_rhs = (i{{[0-9]+}}*)rhs[0];
-// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}*
 
 // var_lhs = (S<i{{[0-9]+}}>*)lhs[1];
-// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 1
+// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]*
 // var_rhs = (S<i{{[0-9]+}}>*)rhs[1];
-// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 1
+// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]*
 
 // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2];
-// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 2
+// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]*
 // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2];
-// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 2
+// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]*
 
 // t_var1_lhs = (i{{[0-9]+}}*)lhs[3];
-// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}*
 // t_var1_rhs = (i{{[0-9]+}}*)rhs[3];
-// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
 
diff --git a/test/OpenMP/parallel_reduction_messages.cpp b/test/OpenMP/parallel_reduction_messages.cpp
index c93cfbd9efaf..b29f7c98a574 100644
--- a/test/OpenMP/parallel_reduction_messages.cpp
+++ b/test/OpenMP/parallel_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
 void foo() {
 }
@@ -16,7 +18,7 @@ class S2 {
 public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
-  static float S2s;
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -93,7 +99,7 @@ T tmain(T argc) {
   foo();
 #pragma omp parallel reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
-#pragma omp parallel reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp parallel reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   foo();
 #pragma omp parallel reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
   foo();
@@ -101,23 +107,23 @@ T tmain(T argc) {
   foo();
 #pragma omp parallel reduction(^ : T) // expected-error {{'T' does not refer to a value}}
   foo();
-#pragma omp parallel reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   foo();
-#pragma omp parallel reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   foo();
-#pragma omp parallel reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp parallel reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   foo();
-#pragma omp parallel reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   foo();
-#pragma omp parallel reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   foo();
-#pragma omp parallel reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp parallel reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   foo();
 #pragma omp parallel reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
-#pragma omp parallel reduction(&& : S2::S2s)
+#pragma omp parallel reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   foo();
-#pragma omp parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
   foo();
@@ -130,7 +136,7 @@ T tmain(T argc) {
   foo();
 #pragma omp parallel reduction(+ : p), reduction(+ : p) // expected-error 3 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 3 {{previously referenced here}}
   foo();
-#pragma omp parallel reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel shared(i)
 #pragma omp parallel reduction(min : i)
@@ -188,7 +194,7 @@ int main(int argc, char **argv) {
   foo();
 #pragma omp parallel reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
-#pragma omp parallel reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp parallel reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   foo();
 #pragma omp parallel reduction(~ : argc) // expected-error {{expected unqualified-id}}
   foo();
@@ -196,23 +202,23 @@ int main(int argc, char **argv) {
   foo();
 #pragma omp parallel reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
   foo();
-#pragma omp parallel reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   foo();
-#pragma omp parallel reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   foo();
-#pragma omp parallel reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp parallel reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   foo();
-#pragma omp parallel reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   foo();
-#pragma omp parallel reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   foo();
-#pragma omp parallel reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp parallel reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   foo();
 #pragma omp parallel reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
-#pragma omp parallel reduction(&& : S2::S2s)
+#pragma omp parallel reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   foo();
-#pragma omp parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{nvalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
   foo();
@@ -227,7 +233,7 @@ int main(int argc, char **argv) {
   foo();
 #pragma omp parallel reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
   foo();
-#pragma omp parallel reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel shared(i)
 #pragma omp parallel reduction(min : i)
@@ -243,6 +249,9 @@ int main(int argc, char **argv) {
   for (int i = 0; i < 10; ++i)
 #pragma omp parallel reduction(+ : fl)
     foo();
+  static int m;
+#pragma omp parallel reduction(+ : m) // OK
+  m++;
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/parallel_sections_ast_print.cpp b/test/OpenMP/parallel_sections_ast_print.cpp
index 7667e45bfdbc..9f5c1fadbeb8 100644
--- a/test/OpenMP/parallel_sections_ast_print.cpp
+++ b/test/OpenMP/parallel_sections_ast_print.cpp
@@ -37,7 +37,7 @@ T tmain(T argc, T *argv) {
   {
     a = 2;
   }
-#pragma omp parallel sections default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) num_threads(C) copyin(S < T > ::TS) proc_bind(master) reduction(+ : c) reduction(max : e)
+#pragma omp parallel sections default(none), private(argc, b) firstprivate(argv) shared(d) if (parallel: argc > 0) num_threads(C) copyin(S < T > ::TS) proc_bind(master) reduction(+ : c) reduction(max : e)
   {
     foo();
   }
@@ -58,7 +58,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
-// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(5) copyin(S<int>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
 // CHECK-NEXT: {
 // CHECK-NEXT: foo();
 // CHECK-NEXT: }
@@ -76,7 +76,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
-// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(1) copyin(S<long>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
 // CHECK-NEXT: {
 // CHECK-NEXT: foo();
 // CHECK-NEXT: }
@@ -94,7 +94,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: {
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: }
-// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp parallel sections default(none) private(argc,b) firstprivate(argv) shared(d) if(parallel: argc > 0) num_threads(C) copyin(S<T>::TS) proc_bind(master) reduction(+: c) reduction(max: e)
 // CHECK-NEXT: {
 // CHECK-NEXT: foo();
 // CHECK-NEXT: }
diff --git a/test/OpenMP/parallel_sections_codegen.cpp b/test/OpenMP/parallel_sections_codegen.cpp
index 2e9e1f95aec5..b8c1e39d8f9e 100644
--- a/test/OpenMP/parallel_sections_codegen.cpp
+++ b/test/OpenMP/parallel_sections_codegen.cpp
@@ -5,7 +5,6 @@
 // REQUIRES: x86-registered-target
 #ifndef HEADER
 #define HEADER
-// CHECK: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-LABEL: foo
 void foo() {};
 // CHECK-LABEL: bar
@@ -22,9 +21,9 @@ T tmain() {
 
 // CHECK-LABEL: @main
 int main() {
-// CHECK: call void (%{{.+}}*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %{{.+}}*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*), i8* %{{.+}})
+// CHECK: call void (%{{.+}}*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* [[OMP_PARALLEL_FUNC:@.+]] to void (i32*, i32*, ...)*))
 // CHECK-LABEL: }
-// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* [[GTID_PARAM_ADDR:%.+]], i32* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: define internal void [[OMP_PARALLEL_FUNC]](i32* noalias [[GTID_PARAM_ADDR:%.+]], i32* noalias %{{.+}})
 // CHECK: store i32* [[GTID_PARAM_ADDR]], i32** [[GTID_REF_ADDR:%.+]],
 #pragma omp parallel sections
   {
@@ -73,7 +72,6 @@ int main() {
 // CHECK:      [[INNER_LOOP_END]]
   }
 // CHECK:      call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
-// CHECK:      call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
   return tmain<int>();
 }
 
@@ -89,7 +87,6 @@ int main() {
 // CHECK:       call void @__kmpc_end_single(
 // CHECK-NEXT:  br label %[[END]]
 // CHECK:       [[END]]
-// CHECK-NEXT:  call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]],
 // CHECK-NEXT:  ret
 // CHECK:       [[TERM_LPAD]]
 // CHECK:       call void @__clang_call_terminate(i8*
diff --git a/test/OpenMP/parallel_sections_firstprivate_messages.cpp b/test/OpenMP/parallel_sections_firstprivate_messages.cpp
index 84d18596c99c..b733aabb7f25 100644
--- a/test/OpenMP/parallel_sections_firstprivate_messages.cpp
+++ b/test/OpenMP/parallel_sections_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i;                              // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   {
     foo();
@@ -130,7 +130,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel sections firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel sections firstprivate(j)
   {
     foo();
   }
@@ -171,7 +171,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i;                              // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   {
     foo();
@@ -266,7 +266,7 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp parallel sections firstprivate(j)
   {
     foo();
   }
@@ -298,6 +298,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int r;
+#pragma omp parallel sections firstprivate(r) // OK
+  {
+    foo();
+  }
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/parallel_sections_if_messages.cpp b/test/OpenMP/parallel_sections_if_messages.cpp
index 03182b4c8fe1..c2e3f5ec99bd 100644
--- a/test/OpenMP/parallel_sections_if_messages.cpp
+++ b/test/OpenMP/parallel_sections_if_messages.cpp
@@ -55,6 +55,30 @@ int tmain(T argc, S **argv) {
   {
     foo();
   }
+  #pragma omp parallel sections if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc)
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel sections'}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel sections' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  {
+    foo();
+  }
 
   return 0;
 }
@@ -108,6 +132,30 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  #pragma omp parallel sections if(parallel : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc)
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp parallel sections'}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (parallel:argc) // expected-error {{directive '#pragma omp parallel sections' cannot contain more than one 'if' clause with 'parallel' name modifier}}
+  {
+    foo();
+  }
+  #pragma omp parallel sections if(parallel : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  {
+    foo();
+  }
 
   return tmain(argc, argv);
 }
diff --git a/test/OpenMP/parallel_sections_lastprivate_messages.cpp b/test/OpenMP/parallel_sections_lastprivate_messages.cpp
index 10cc36e5ea7f..af3c5e2a575b 100644
--- a/test/OpenMP/parallel_sections_lastprivate_messages.cpp
+++ b/test/OpenMP/parallel_sections_lastprivate_messages.cpp
@@ -16,7 +16,7 @@ public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
   const S2 &operator=(const S2 &) const;
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
@@ -66,7 +66,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                             // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   {
     foo();
@@ -131,7 +131,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel sections lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel sections lastprivate(j)
   {
     foo();
   }
@@ -158,7 +158,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i;                             // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   {
     foo();
@@ -220,7 +220,7 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections lastprivate(S2::S2s)
+#pragma omp parallel sections lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
   {
     foo();
   }
@@ -262,7 +262,7 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp parallel sections lastprivate(j)
   {
     foo();
   }
@@ -271,6 +271,11 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel sections lastprivate(n) firstprivate(n) // OK
+  {
+    foo();
+  }
+  static int r;
+#pragma omp parallel sections lastprivate(r) // OK
   {
     foo();
   }
diff --git a/test/OpenMP/parallel_sections_num_threads_messages.cpp b/test/OpenMP/parallel_sections_num_threads_messages.cpp
index a50025660da9..cda3841ace97 100644
--- a/test/OpenMP/parallel_sections_num_threads_messages.cpp
+++ b/test/OpenMP/parallel_sections_num_threads_messages.cpp
@@ -23,7 +23,7 @@ T tmain(T argc, S **argv) {
   {foo();}
   #pragma omp parallel sections num_threads ((argc > 0) ? argv[1] : argv[2]) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'char *'}}
   {foo();}
-  #pragma omp parallel sections num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel sections' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel sections num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel sections' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   {foo();}
   #pragma omp parallel sections num_threads (S) // expected-error {{'S' does not refer to a value}}
   {foo();}
@@ -31,7 +31,7 @@ T tmain(T argc, S **argv) {
   {foo();}
   #pragma omp parallel sections num_threads (argc)
   {foo();}
-  #pragma omp parallel sections num_threads (N) // expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel sections num_threads (N) // expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   {foo();}
 
   return argc;
@@ -50,7 +50,7 @@ int main(int argc, char **argv) {
   {foo();}
   #pragma omp parallel sections num_threads (argc > 0 ? argv[1] : argv[2]) // expected-error {{integral }}
   {foo();}
-  #pragma omp parallel sections num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel sections' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a positive integer value}}
+  #pragma omp parallel sections num_threads (foobool(argc)), num_threads (true), num_threads (-5) // expected-error 2 {{directive '#pragma omp parallel sections' cannot contain more than one 'num_threads' clause}} expected-error {{argument to 'num_threads' clause must be a strictly positive integer value}}
   {foo();}
   #pragma omp parallel sections num_threads (S1) // expected-error {{'S1' does not refer to a value}}
   {foo();}
diff --git a/test/OpenMP/parallel_sections_private_messages.cpp b/test/OpenMP/parallel_sections_private_messages.cpp
index d9c4404bd984..ac9280e74ce1 100644
--- a/test/OpenMP/parallel_sections_private_messages.cpp
+++ b/test/OpenMP/parallel_sections_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                         // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections private // expected-error {{expected '(' after 'private'}}
   {
     foo();
@@ -112,7 +112,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel sections private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel sections private(j)
   {
     foo();
   }
@@ -135,7 +135,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;                         // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel sections private // expected-error {{expected '(' after 'private'}}
   {
     foo();
@@ -198,7 +198,7 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp parallel sections private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp parallel sections private(j)
   {
     foo();
   }
@@ -206,6 +206,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int m;
+#pragma omp parallel sections private(m)
+  {
+    foo();
+  }
 
   return 0;
 }
diff --git a/test/OpenMP/parallel_sections_reduction_messages.cpp b/test/OpenMP/parallel_sections_reduction_messages.cpp
index 2ed828dabfa7..eff1849d7132 100644
--- a/test/OpenMP/parallel_sections_reduction_messages.cpp
+++ b/test/OpenMP/parallel_sections_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
 
 void foo() {
 }
@@ -16,7 +18,7 @@ class S2 {
 public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
-  static float S2s;
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -111,7 +117,7 @@ T tmain(T argc) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp parallel sections reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   {
     foo();
   }
@@ -127,27 +133,27 @@ T tmain(T argc) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel sections reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp parallel sections reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel sections reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel sections reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp parallel sections reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   {
     foo();
   }
@@ -155,11 +161,11 @@ T tmain(T argc) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(&& : S2::S2s)
+#pragma omp parallel sections reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -184,7 +190,7 @@ T tmain(T argc) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -264,7 +270,7 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp parallel sections reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
@@ -280,27 +286,27 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp parallel sections reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp parallel sections reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp parallel sections reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp parallel sections reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp parallel sections reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   {
     foo();
   }
@@ -308,11 +314,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(&& : S2::S2s)
+#pragma omp parallel sections reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   {
     foo();
   }
-#pragma omp parallel sections reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -341,7 +347,7 @@ int main(int argc, char **argv) {
   {
     foo();
   }
-#pragma omp parallel sections reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp parallel sections reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -361,6 +367,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int m;
+#pragma omp parallel sections reduction(+ : m) // OK
+  {
+    foo();
+  }
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/sections_codegen.cpp b/test/OpenMP/sections_codegen.cpp
index 2c257e83ae30..44fdefeee733 100644
--- a/test/OpenMP/sections_codegen.cpp
+++ b/test/OpenMP/sections_codegen.cpp
@@ -95,8 +95,7 @@ int main() {
 // CHECK:       call void @__kmpc_end_single(
 // CHECK-NEXT:  br label %[[END]]
 // CHECK:       [[END]]
-// CHECK-NEXT:  call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]],
-// CHECK:  call i32 @__kmpc_cancel_barrier(
+// CHECK-NEXT:  call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_SINGLE_LOC]],
 // CHECK:  ret
 // CHECK:       [[TERM_LPAD]]
 // CHECK:       call void @__clang_call_terminate(i8*
diff --git a/test/OpenMP/sections_firstprivate_codegen.cpp b/test/OpenMP/sections_firstprivate_codegen.cpp
index af9c1fe5fbda..f673597f660b 100644
--- a/test/OpenMP/sections_firstprivate_codegen.cpp
+++ b/test/OpenMP/sections_firstprivate_codegen.cpp
@@ -30,7 +30,6 @@ struct S {
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
 
 template <typename T>
 T tmain() {
@@ -59,50 +58,70 @@ int vec[] = {1, 2};
 S<float> s_arr[] = {1, 2};
 // CHECK-DAG: [[VAR:@.+]] = global [[S_FLOAT_TY]] zeroinitializer,
 S<float> var(3);
+// CHECK-DAG: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
 // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections firstprivate(g)
+#pragma omp sections firstprivate(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // Skip temp vars for loop
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
+
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** [[SIVAR_REF_ADDR:%.+]],
+    // LAMBDA: [[SIVAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_REF_ADDR]],
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+
+    // LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]]
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
+
     // LAMBDA: call void @__kmpc_barrier(
-    g = 1;
+    {
+      g = 1;
+      sivar = 10;
+    }
     // LAMBDA: call void @__kmpc_for_static_init_4(
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 10, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_for_static_fini(
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call void @__kmpc_barrier(
 #pragma omp section
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 20;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 20, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -113,11 +132,11 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
 // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections firstprivate(g)
+#pragma omp sections firstprivate(g, sivar)
    {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // Skip temp vars for loop
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
@@ -125,36 +144,56 @@ int main() {
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[SIVAR1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+    // BLOCKS: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** {{.+}},
+    // BLOCKS: [[SIVAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** {{.+}},
+
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+
+    // BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]],
+    // BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // BLOCKS: call void @__kmpc_barrier(
-    g = 1;
+    {
+      g = 1;
+      sivar = 10;
+    }
     // BLOCKS: call void @__kmpc_for_static_init_4(
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 10, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_for_static_fini(
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call void @__kmpc_barrier(
 #pragma omp section
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 20;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 20, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
   }();
   return 0;
 #else
-#pragma omp sections firstprivate(t_var, vec, s_arr, var) nowait
+#pragma omp sections firstprivate(t_var, vec, s_arr, var, sivar) nowait
   {
     {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 31;
     }
   }
   return tmain<int>();
@@ -168,12 +207,12 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 
 // CHECK: call i32 @__kmpc_single(
 // firstprivate t_var(t_var)
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR]],
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
-
 // firstprivate vec(vec)
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* bitcast ([2 x i{{[0-9]+}}]* [[VEC]] to i8*),
@@ -195,6 +234,10 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
+// firstprivate isvar
+// CHEC: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]],
+// CHEC: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]],
+
 // ~(firstprivate var), ~(firstprivate s_arr)
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@@ -209,11 +252,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // Skip temp vars for loop
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -226,22 +269,21 @@ int main() {
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+
 // firstprivate t_var(t_var)
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
 
 // firstprivate vec(vec)
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
-// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
@@ -253,8 +295,6 @@ int main() {
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // firstprivate var(var)
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
@@ -272,7 +312,7 @@ int main() {
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 #endif
 
diff --git a/test/OpenMP/sections_firstprivate_messages.cpp b/test/OpenMP/sections_firstprivate_messages.cpp
index ff7614967ee8..cd2b4b883873 100644
--- a/test/OpenMP/sections_firstprivate_messages.cpp
+++ b/test/OpenMP/sections_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp sections firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   {
@@ -135,7 +135,7 @@ int foomain(int argc, char **argv) {
   {
     int v = 0;
     int i;                           // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp sections' directive into a parallel or another task region?}}
-#pragma omp sections firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp sections firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     {
       foo();
     }
@@ -143,7 +143,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp sections firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp sections firstprivate(j)
   {
     foo();
   }
@@ -186,7 +186,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp sections firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   {
@@ -304,7 +304,7 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp sections firstprivate(j)
   {
     foo();
   }
@@ -322,7 +322,7 @@ int main(int argc, char **argv) {
   {
     int v = 0;
     int i;                           // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp sections' directive into a parallel or another task region?}}
-#pragma omp sections firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp sections firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     {
       foo();
     }
@@ -338,6 +338,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int r;
+#pragma omp sections firstprivate(r) // OK
+  {
+    foo();
+  }
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/sections_lastprivate_codegen.cpp b/test/OpenMP/sections_lastprivate_codegen.cpp
index c2f68aa0fe7d..a1ff007fd61d 100644
--- a/test/OpenMP/sections_lastprivate_codegen.cpp
+++ b/test/OpenMP/sections_lastprivate_codegen.cpp
@@ -21,10 +21,8 @@ struct S {
 volatile int g = 1212;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]* }
+// CHECK [[CAP_MAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}* }
 // CHECK: [[S_INT_TY:%.+]] = type { i32 }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
 // CHECK-DAG: [[SECTIONS_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 // CHECK-DAG: [[X:@.+]] = global double 0.0
@@ -53,33 +51,45 @@ using A::x;
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections lastprivate(g)
+#pragma omp sections lastprivate(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias [[GTID:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: alloca i{{[0-9]+}},
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
-    // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // LAMBDA: [[SIVAR1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** %{{.+}},
+    // LAMBDA: [[SIVAR_REF_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
+
+    // LAMBDA: [[GTID_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}, align 8
+    // LAMBDA: [[GTID_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_ADDR]], align 4
+
+    // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID_ADDR_REF]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 13, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
-    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
-    g = 1;
+    // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID_ADDR_REF]])
+    {
+      g = 1;
+      sivar = 13;
+    }
     // Check for final copying of private values back to original vars.
     // LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
     // LAMBDA: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0
@@ -90,18 +100,26 @@ int main() {
     // original g=private_g;
     // LAMBDA: [[G_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
+
+    // original sivar = private sivar;
+    // LAMBDA: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR_REF_ADDR]],
     // LAMBDA: br label %[[LAST_DONE]]
     // LAMBDA: [[LAST_DONE]]
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    // LAMBDA: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID_ADDR_REF]])
 #pragma omp section
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 23;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 23, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -112,28 +130,39 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections lastprivate(g)
+#pragma omp sections lastprivate(g, sivar)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias [[GTID:%.+]], i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR:%.+]])
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: alloca i{{[0-9]+}},
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}}
-    // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
+    // BLOCKS: [[SIVAR1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+
+    // BLOCKS: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_ADDR:%.+]],
+    // BLOCKS: [[SIVAR_REF_ADDR:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_ADDR]],
+
+    // BLOCKS: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID:%.+]], align 8
+    // BLOCKS: [[GTID_ADDR_REF:%.+]] = load i32, i32* [[GTID_ADDR]], align 4
+    // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID_ADDR_REF]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 17, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
-    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]])
-    g = 1;
+    // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID_ADDR_REF]])
+    {
+      g = 1;
+      sivar = 17;
+    }
     // Check for final copying of private values back to original vars.
     // BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]],
     // BLOCKS: [[IS_LAST_ITER:%.+]] = icmp ne i32 [[IS_LAST_VAL]], 0
@@ -144,16 +173,24 @@ int main() {
     // original g=private_g;
     // BLOCKS: [[G_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
     // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]],
+
+    // original sivar = private sivar;
+    // BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR_REF_ADDR]],
     // BLOCKS: br label %[[LAST_DONE]]
     // BLOCKS: [[LAST_DONE]]
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]])
+    // BLOCKS: call void @__kmpc_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID_ADDR_REF]])
 #pragma omp section
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 29;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 29, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -166,11 +203,12 @@ int main() {
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
 #pragma omp parallel
-#pragma omp sections lastprivate(t_var, vec, s_arr, var)
+#pragma omp sections lastprivate(t_var, vec, s_arr, var, sivar)
   {
     {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 31;
     }
   }
 #pragma omp parallel
@@ -187,14 +225,15 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, %{{.+}}*)* [[MAIN_MICROTASK1:@.+]] to void
+
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 5, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*, [[S_FLOAT_TY]]*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
+
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK1:@.+]] to void
 // CHECK: = call {{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // CHECK-NOT: alloca i{{[0-9]+}},
 // CHECK-NOT: alloca [2 x i{{[0-9]+}}],
 // CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
@@ -205,10 +244,8 @@ int main() {
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
 // CHECK: call i32 @__kmpc_single(
 
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK-DAG: getelementptr inbounds [2 x i32], [2 x i32]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK-DAG: getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
 
 // <Skip loop body>
 
@@ -217,12 +254,11 @@ int main() {
 
 // CHECK: call void @__kmpc_end_single(
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 //
-// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, %{{.+}}* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[X_PRIV:%.+]] = alloca double,
 // CHECK-NOT: alloca double
 
@@ -248,18 +284,17 @@ int main() {
 // CHECK-NEXT: br label %[[LAST_DONE]]
 // CHECK: [[LAST_DONE]]
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -271,19 +306,16 @@ int main() {
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+
 // Check for default initialization.
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK-NOT: [[T_VAR_PRIV]]
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK-NOT: [[VEC_PRIV]]
-// CHECK: [[S_ARR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF_PTR]],
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1)
 // <Skip loop body>
@@ -309,6 +341,10 @@ int main() {
 // CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]] to [[S_INT_TY]]*
 // CHECK: [[S_ARR_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+
+// CHK: [[SIVAR_REF:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 4
+// CHK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}} [[SIVAR_REF]]
+
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_END]]
 // CHECK: br i1 [[IS_EMPTY]], label %[[S_ARR_BODY_DONE:.+]], label %[[S_ARR_BODY:.+]]
 // CHECK: [[S_ARR_BODY]]
@@ -324,10 +360,7 @@ int main() {
 // CHECK-DAG: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]*
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]]
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SECTIONS_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 #endif
 
diff --git a/test/OpenMP/sections_lastprivate_messages.cpp b/test/OpenMP/sections_lastprivate_messages.cpp
index 870dd158aeeb..5f6b420cb8ea 100644
--- a/test/OpenMP/sections_lastprivate_messages.cpp
+++ b/test/OpenMP/sections_lastprivate_messages.cpp
@@ -66,7 +66,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp sections lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   {
@@ -144,7 +144,7 @@ int foomain(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp sections lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp sections lastprivate(j)
   {
     foo();
   }
@@ -172,7 +172,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp sections lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   {
@@ -300,7 +300,7 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp sections lastprivate(j)
   {
     foo();
   }
@@ -311,6 +311,11 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel
 #pragma omp sections lastprivate(n) firstprivate(n) // OK
+  {
+    foo();
+  }
+  static int r;
+#pragma omp sections lastprivate(r) // OK
   {
     foo();
   }
diff --git a/test/OpenMP/sections_private_codegen.cpp b/test/OpenMP/sections_private_codegen.cpp
index 9b579a2aa661..cd2218832bcf 100644
--- a/test/OpenMP/sections_private_codegen.cpp
+++ b/test/OpenMP/sections_private_codegen.cpp
@@ -20,9 +20,7 @@ struct S {
 volatile double g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -41,24 +39,31 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections private(g)
+#pragma omp sections private(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    g = 1;
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    {
+      g = 1;
+      sivar = 11;
+    }
     // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
     // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
 #pragma omp section
@@ -66,10 +71,16 @@ int main() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 22;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
       // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 22, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -80,28 +91,39 @@ int main() {
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp sections private(g)
+#pragma omp sections private(g, sivar)
     {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
-    g = 1;
+    // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    {
+      g = 1;
+      sivar = 111;
+    }
     // BLOCKS: call {{.*}}void @__kmpc_for_static_init_4(
     // BLOCKS: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 111, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call {{.*}}void {{%.+}}(i8
     // BLOCKS: call {{.*}}void @__kmpc_for_static_fini(
 #pragma omp section
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 222;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store double 2.0{{.+}}, double*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 222, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -114,11 +136,12 @@ int main() {
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
 #pragma omp parallel
-#pragma omp sections private(t_var, vec, s_arr, s_arr, var, var)
+#pragma omp sections private(t_var, vec, s_arr, s_arr, var, var, sivar)
   {
     {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 2;
     }
   }
   return tmain<int>();
@@ -128,23 +151,24 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK-NOT: alloca [[S_FLOAT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 // CHECK: call i32 @__kmpc_single(
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
+// CHECK-NOT: [[SIVAR_PRIV]]
 // CHECK: {{.+}}:
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
@@ -159,11 +183,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: alloca i32,
 // CHECK: alloca i32,
 // CHECK: alloca i32,
diff --git a/test/OpenMP/sections_private_messages.cpp b/test/OpenMP/sections_private_messages.cpp
index 7854b3d8bdd3..f13bbdb012e2 100644
--- a/test/OpenMP/sections_private_messages.cpp
+++ b/test/OpenMP/sections_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp sections private // expected-error {{expected '(' after 'private'}}
   {
     foo();
@@ -112,7 +112,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp sections private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp sections private(j)
   {
     foo();
   }
@@ -135,7 +135,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp sections private // expected-error {{expected '(' after 'private'}}
   {
     foo();
@@ -198,7 +198,7 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp sections private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp sections private(j)
   {
     foo();
   }
@@ -206,6 +206,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int m;
+#pragma omp sections private(m)
+  {
+    foo();
+  }
 
   return 0;
 }
diff --git a/test/OpenMP/sections_reduction_codegen.cpp b/test/OpenMP/sections_reduction_codegen.cpp
index 4d404dbcc6d1..f67977c39580 100644
--- a/test/OpenMP/sections_reduction_codegen.cpp
+++ b/test/OpenMP/sections_reduction_codegen.cpp
@@ -22,10 +22,7 @@ struct S {
 
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i{{[0-9]+}}]*, [2 x [[S_FLOAT_TY]]]* }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [[S_INT_TY]]*, [[S_INT_TY]]*, i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]* }
 // CHECK-DAG: [[ATOMIC_REDUCE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
-// CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8*
 // CHECK-DAG: [[SINGLE_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 322, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 18, i32 0, i32 0, i8*
 // CHECK-DAG: [[REDUCTION_LOCK:@.+]] = common global [8 x i32] zeroinitializer
@@ -55,11 +52,11 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
 #pragma omp sections reduction(+:g)
     {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* %{{.+}})
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
 
     // Reduction list for runtime.
@@ -74,7 +71,7 @@ int main() {
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_for_static_fini(
 
-    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // LAMBDA: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
     // LAMBDA: [[BITCAST:%.+]] = bitcast double* [[G_PRIVATE_ADDR]] to i8*
     // LAMBDA: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // LAMBDA: call i32 @__kmpc_reduce(
@@ -115,11 +112,11 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
 #pragma omp sections reduction(-:g)
     {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* %{{.+}})
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
 
     // Reduction list for runtime.
@@ -135,7 +132,7 @@ int main() {
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_for_static_fini(
 
-    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i32 0, i32 0
+    // BLOCKS: [[G_PRIV_REF:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[RED_LIST]], i64 0, i64 0
     // BLOCKS: [[BITCAST:%.+]] = bitcast double* [[G_PRIVATE_ADDR]] to i8*
     // BLOCKS: store i8* [[BITCAST]], i8** [[G_PRIV_REF]],
     // BLOCKS: call i32 @__kmpc_reduce(
@@ -192,13 +189,12 @@ int main() {
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float*, [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]*, float*, [2 x i32]*, [2 x [[S_FLOAT_TY]]]*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call {{.*}}i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call {{.*}} [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // CHECK-NOT: alloca float,
 // CHECK-NOT: alloca [[S_FLOAT_TY]],
 // CHECK-NOT: alloca [[S_FLOAT_TY]],
@@ -210,30 +206,23 @@ int main() {
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
 // CHECK: call i32 @__kmpc_single(
 
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK-DAG: getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-
 // CHECK-NOT: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-NOT: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
 
 // CHECK: call void @__kmpc_end_single(
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 
 // CHECK: ret void
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_TMAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 6, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [[S_INT_TY]]*, [[S_INT_TY]]*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
 // CHECK: alloca i{{[0-9]+}},
@@ -249,23 +238,20 @@ int main() {
 
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+
 // For + reduction operation initial value of private variable is 0.
 // CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[T_VAR_PRIV]],
 
-// CHECK: [[VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR_PRIV]])
 
-// CHECK: [[VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[VAR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_PTR_REF:%.+]],
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[VAR1_PRIV]])
 
-// CHECK: [[T_VAR1_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} {{[0-9]+}}
-// CHECK: [[T_VAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR1_PTR_REF]],
 // For min reduction operation initial value of private variable is largest repesentable value.
 // CHECK: store i{{[0-9]+}} 2147483647, i{{[0-9]+}}* [[T_VAR1_PRIV]],
 
@@ -277,16 +263,16 @@ int main() {
 
 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
 
-// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 0
+// CHECK: [[T_VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 0
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR_PRIV_REF]],
-// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 1
+// CHECK: [[VAR_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 1
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR_PRIV_REF]],
-// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 2
+// CHECK: [[VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 2
 // CHECK: [[BITCAST:%.+]] = bitcast [[S_INT_TY]]* [[VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[VAR1_PRIV_REF]],
-// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i32 0, i32 3
+// CHECK: [[T_VAR1_PRIV_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST]], i64 0, i64 3
 // CHECK: [[BITCAST:%.+]] = bitcast i{{[0-9]+}}* [[T_VAR1_PRIV]] to i8*
 // CHECK: store i8* [[BITCAST]], i8** [[T_VAR1_PRIV_REF]],
 
@@ -384,7 +370,6 @@ int main() {
 // CHECK: [[RED_DONE]]
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 
 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
@@ -395,38 +380,38 @@ int main() {
 // }
 // CHECK: define internal void [[REDUCTION_FUNC]](i8*, i8*)
 // t_var_lhs = (i{{[0-9]+}}*)lhs[0];
-// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR_RHS_REF]],
 // CHECK: [[T_VAR_RHS:%.+]] = bitcast i8* [[T_VAR_RHS_VOID]] to i{{[0-9]+}}*
 // t_var_rhs = (i{{[0-9]+}}*)rhs[0];
-// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i32 0, i32 0
+// CHECK: [[T_VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS:%.+]], i64 0, i64 0
 // CHECK: [[T_VAR_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR_LHS_REF]],
 // CHECK: [[T_VAR_LHS:%.+]] = bitcast i8* [[T_VAR_LHS_VOID]] to i{{[0-9]+}}*
 
 // var_lhs = (S<i{{[0-9]+}}>*)lhs[1];
-// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 1
+// CHECK: [[VAR_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 1
 // CHECK: [[VAR_RHS_VOID:%.+]] = load i8*, i8** [[VAR_RHS_REF]],
 // CHECK: [[VAR_RHS:%.+]] = bitcast i8* [[VAR_RHS_VOID]] to [[S_INT_TY]]*
 // var_rhs = (S<i{{[0-9]+}}>*)rhs[1];
-// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 1
+// CHECK: [[VAR_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 1
 // CHECK: [[VAR_LHS_VOID:%.+]] = load i8*, i8** [[VAR_LHS_REF]],
 // CHECK: [[VAR_LHS:%.+]] = bitcast i8* [[VAR_LHS_VOID]] to [[S_INT_TY]]*
 
 // var1_lhs = (S<i{{[0-9]+}}>*)lhs[2];
-// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 2
+// CHECK: [[VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 2
 // CHECK: [[VAR1_RHS_VOID:%.+]] = load i8*, i8** [[VAR1_RHS_REF]],
 // CHECK: [[VAR1_RHS:%.+]] = bitcast i8* [[VAR1_RHS_VOID]] to [[S_INT_TY]]*
 // var1_rhs = (S<i{{[0-9]+}}>*)rhs[2];
-// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 2
+// CHECK: [[VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 2
 // CHECK: [[VAR1_LHS_VOID:%.+]] = load i8*, i8** [[VAR1_LHS_REF]],
 // CHECK: [[VAR1_LHS:%.+]] = bitcast i8* [[VAR1_LHS_VOID]] to [[S_INT_TY]]*
 
 // t_var1_lhs = (i{{[0-9]+}}*)lhs[3];
-// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_RHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_RHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_RHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_RHS_REF]],
 // CHECK: [[T_VAR1_RHS:%.+]] = bitcast i8* [[T_VAR1_RHS_VOID]] to i{{[0-9]+}}*
 // t_var1_rhs = (i{{[0-9]+}}*)rhs[3];
-// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i32 0, i32 3
+// CHECK: [[T_VAR1_LHS_REF:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[RED_LIST_LHS]], i64 0, i64 3
 // CHECK: [[T_VAR1_LHS_VOID:%.+]] = load i8*, i8** [[T_VAR1_LHS_REF]],
 // CHECK: [[T_VAR1_LHS:%.+]] = bitcast i8* [[T_VAR1_LHS_VOID]] to i{{[0-9]+}}*
 
diff --git a/test/OpenMP/sections_reduction_messages.cpp b/test/OpenMP/sections_reduction_messages.cpp
index a0f56d5a1e2a..79473d4e5d28 100644
--- a/test/OpenMP/sections_reduction_messages.cpp
+++ b/test/OpenMP/sections_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s
 
 void foo() {
 }
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -121,7 +127,7 @@ T tmain(T argc) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp sections reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   {
     foo();
   }
@@ -141,32 +147,32 @@ T tmain(T argc) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp sections reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp sections reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp sections reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp sections reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp sections reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   {
     foo();
   }
@@ -181,7 +187,7 @@ T tmain(T argc) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -211,7 +217,7 @@ T tmain(T argc) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -301,7 +307,7 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp sections reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
@@ -321,32 +327,32 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp sections reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp sections reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp sections reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp sections reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp sections reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   {
     foo();
   }
@@ -361,7 +367,7 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -396,7 +402,7 @@ int main(int argc, char **argv) {
     foo();
   }
 #pragma omp parallel
-#pragma omp sections reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp sections reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   {
     foo();
   }
@@ -416,6 +422,11 @@ int main(int argc, char **argv) {
   {
     foo();
   }
+  static int m;
+#pragma omp sections reduction(+ : m) // OK
+  {
+    foo();
+  }
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/simd_aligned_messages.cpp b/test/OpenMP/simd_aligned_messages.cpp
index 408cc2e360b5..6be7529ad511 100644
--- a/test/OpenMP/simd_aligned_messages.cpp
+++ b/test/OpenMP/simd_aligned_messages.cpp
@@ -50,7 +50,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
   T sum = (T)0;
   T ind2 = - num * L;
   // Negative number is passed as L.
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp simd aligned(arr:L)
   for (i = 0; i < num; ++i) {
     T cur = arr[(int)ind2];
@@ -65,7 +65,7 @@ template<int L, class T, class N> T test_template(T* arr, N num) {
 
 template<int LEN> int test_warn() {
   int *ind2 = 0;
-  // expected-error@+1 {{argument to 'aligned' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
   #pragma omp simd aligned(ind2:LEN)
   for (int i = 0; i < 100; i++) {
     ind2 += LEN;
diff --git a/test/OpenMP/simd_ast_print.cpp b/test/OpenMP/simd_ast_print.cpp
index 069862b6dfbc..cabbe338db2a 100644
--- a/test/OpenMP/simd_ast_print.cpp
+++ b/test/OpenMP/simd_ast_print.cpp
@@ -12,10 +12,11 @@ template<class T, class N> T reduct(T* arr, N num) {
   N i;
   N ind;
   N myind;
+  N &ref = i;
   T sum = (T)0;
 // CHECK: T sum = (T)0;
-#pragma omp simd private(myind, g_ind), linear(ind), aligned(arr)
-// CHECK-NEXT: #pragma omp simd private(myind,g_ind) linear(ind) aligned(arr)
+#pragma omp simd private(myind, g_ind), linear(ind), aligned(arr), linear(uval(ref))
+// CHECK-NEXT: #pragma omp simd private(myind,g_ind) linear(ind) aligned(arr) linear(uval(ref))
   for (i = 0; i < num; ++i) {
     myind = ind;
     T cur = arr[myind];
@@ -32,11 +33,13 @@ template<class T> struct S {
     T res;
     T val;
     T lin = 0;
+    T &ref = res;
 // CHECK: T res;
 // CHECK: T val;
 // CHECK: T lin = 0;
-    #pragma omp simd private(val)  safelen(7) linear(lin : -5) lastprivate(res)
-// CHECK-NEXT: #pragma omp simd private(val) safelen(7) linear(lin: -5) lastprivate(res)
+// CHECK: T &ref = res;
+    #pragma omp simd private(val)  safelen(7) linear(lin : -5) lastprivate(res) simdlen(5) linear(ref(ref))
+// CHECK-NEXT: #pragma omp simd private(val) safelen(7) linear(lin: -5) lastprivate(res) simdlen(5) linear(ref(ref))
     for (T i = 7; i < m_a; ++i) {
       val = v[i-7] + m_a;
       res = val;
@@ -44,8 +47,8 @@ template<class T> struct S {
     }
     const T clen = 3;
 // CHECK: T clen = 3;
-    #pragma omp simd safelen(clen-1)
-// CHECK-NEXT: #pragma omp simd safelen(clen - 1)
+    #pragma omp simd safelen(clen-1) simdlen(clen-1)
+// CHECK-NEXT: #pragma omp simd safelen(clen - 1) simdlen(clen - 1)
     for(T i = clen+2; i < 20; ++i) {
 // CHECK-NEXT: for (T i = clen + 2; i < 20; ++i) {
       v[i] = v[v-clen] + 1;
@@ -62,7 +65,7 @@ template<class T> struct S {
 template<int LEN> struct S2 {
   static void func(int n, float *a, float *b, float *c) {
     int k1 = 0, k2 = 0;
-#pragma omp simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN)
+#pragma omp simd safelen(LEN) linear(k1,k2:LEN) aligned(a:LEN) simdlen(LEN)
     for(int i = 0; i < n; i++) {
       c[i] = a[i] + b[i];
       c[k1] = a[k1] + b[k1];
@@ -77,7 +80,7 @@ template<int LEN> struct S2 {
 // CHECK: template <int LEN = 4> struct S2 {
 // CHECK-NEXT: static void func(int n, float *a, float *b, float *c)     {
 // CHECK-NEXT:   int k1 = 0, k2 = 0;
-// CHECK-NEXT: #pragma omp simd safelen(4) linear(k1,k2: 4) aligned(a: 4)
+// CHECK-NEXT: #pragma omp simd safelen(4) linear(k1,k2: 4) aligned(a: 4) simdlen(4)
 // CHECK-NEXT:   for (int i = 0; i < n; i++) {
 // CHECK-NEXT:     c[i] = a[i] + b[i];
 // CHECK-NEXT:     c[k1] = a[k1] + b[k1];
@@ -90,6 +93,7 @@ template<int LEN> struct S2 {
 int main (int argc, char **argv) {
   int b = argc, c, d, e, f, g;
   int k1=0,k2=0;
+  int &ref = b;
   static int *a;
 // CHECK: static int *a;
 #pragma omp simd
@@ -112,8 +116,8 @@ int main (int argc, char **argv) {
 // CHECK-NEXT: foo();
   const int CLEN = 4;
 // CHECK-NEXT: const int CLEN = 4;
-  #pragma omp simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 )
-// CHECK-NEXT: #pragma omp simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1)
+  #pragma omp simd aligned(a:CLEN) linear(a:CLEN) safelen(CLEN) collapse( 1 ) simdlen(CLEN) linear(val(ref): CLEN)
+// CHECK-NEXT: #pragma omp simd aligned(a: CLEN) linear(a: CLEN) safelen(CLEN) collapse(1) simdlen(CLEN) linear(val(ref): CLEN)
   for (int i = 0; i < 10; ++i)foo();
 // CHECK-NEXT: for (int i = 0; i < 10; ++i)
 // CHECK-NEXT: foo();
diff --git a/test/OpenMP/simd_codegen.cpp b/test/OpenMP/simd_codegen.cpp
index e67ad5d4c7e0..62028339f514 100644
--- a/test/OpenMP/simd_codegen.cpp
+++ b/test/OpenMP/simd_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // REQUIRES: x86-registered-target
 // expected-no-diagnostics
 #ifndef HEADER
@@ -481,6 +481,139 @@ void widened(float *a, float *b, float *c, float *d) {
 // CHECK: ret void
 }
 
+// CHECK-LABEL: define {{.*void}} @{{.*}}linear{{.*}}(float* {{.+}})
+void linear(float *a) {
+  // CHECK: [[VAL_ADDR:%.+]] = alloca i64,
+  // CHECK: [[K_ADDR:%.+]] = alloca i64*,
+  long long val = 0;
+  long long &k = val;
+
+  #pragma omp simd linear(k : 3)
+// CHECK: store i64* [[VAL_ADDR]], i64** [[K_ADDR]],
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+
+  #pragma omp simd linear(val(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[K_REF]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[K_REF:%.+]] = load i64*, i64** [[K_ADDR]],
+// CHECK: store i64* [[K_REF]], i64** [[K_PRIV_REF:%.+]],
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: [[K_REF:%.+]] = load i64*, i64** [[K_PRIV_REF]],
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[K_REF]]
+//
+  #pragma omp simd linear(uval(k) : 3)
+// CHECK: store i32 0, i32* [[OMP_IV:%[^,]+]]
+// CHECK: [[K0LOAD:%.+]] = load i64, i64* [[VAL_ADDR]]
+// CHECK-NEXT: store i64 [[K0LOAD]], i64* [[LIN0:%[^,]+]]
+
+// CHECK: [[IV:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID:[0-9]+]]
+// CHECK-NEXT: [[CMP2:%.+]] = icmp slt i32 [[IV]], 9
+// CHECK-NEXT: br i1 [[CMP2]], label %[[SIMPLE_LOOP_BODY:.+]], label %[[SIMPLE_LOOP_END:[^,]+]]
+  for (int i = 10; i > 1; i--) {
+// CHECK: [[SIMPLE_LOOP_BODY]]
+// Start of body: calculate i from IV:
+// CHECK: [[IV_0:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// FIXME: It is interesting, why the following "mul 1" was not constant folded?
+// CHECK-NEXT: [[IV_1:%.+]] = mul nsw i32 [[IV_0]], 1
+// CHECK-NEXT: [[LC_I_1:%.+]] = sub nsw i32 10, [[IV_1]]
+// CHECK-NEXT: store i32 [[LC_I_1]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+//
+// CHECK-NEXT: [[LIN0_1:%.+]] = load i64, i64* [[LIN0]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[LIN_MUL1:%.+]] = mul nsw i32 [[IV_2]], 3
+// CHECK-NEXT: [[LIN_EXT1:%.+]] = sext i32 [[LIN_MUL1]] to i64
+// CHECK-NEXT: [[LIN_ADD1:%.+]] = add nsw i64 [[LIN0_1]], [[LIN_EXT1]]
+// Update of the privatized version of linear variable!
+// CHECK-NEXT: store i64 [[LIN_ADD1]], i64* [[K_PRIVATIZED:%[^,]+]]
+    a[k]++;
+    k = k + 3;
+// CHECK: [[IV_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// CHECK-NEXT: [[ADD2_2:%.+]] = add nsw i32 [[IV_2]], 1
+// CHECK-NEXT: store i32 [[ADD2_2]], i32* [[OMP_IV]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP_ID]]
+// br label {{.+}}, !llvm.loop ![[SIMPLE_LOOP_ID]]
+  }
+// CHECK: [[SIMPLE_LOOP_END]]
+//
+// Update linear vars after loop, as the loop was operating on a private version.
+// CHECK: [[LIN0_2:%.+]] = load i64, i64* [[LIN0]]
+// CHECK-NEXT: [[LIN_ADD2:%.+]] = add nsw i64 [[LIN0_2]], 27
+// CHECK-NEXT: store i64 [[LIN_ADD2]], i64* [[VAL_ADDR]]
+//
+}
+
 // TERM_DEBUG-LABEL: bar
 int bar() {return 0;};
 
diff --git a/test/OpenMP/simd_collapse_messages.cpp b/test/OpenMP/simd_collapse_messages.cpp
index 56ff201d112d..e34f0a15b20c 100644
--- a/test/OpenMP/simd_collapse_messages.cpp
+++ b/test/OpenMP/simd_collapse_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp simd collapse (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp simd', but found only 1}}
   // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+2 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp simd collapse (foobool(argc)), collapse (true), collapse (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp simd collapse (1)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp simd collapse (N) // expected-error {{argument to 'collapse' clause must be a positive integer value}}
+  #pragma omp simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
   foo(); // expected-error {{expected 2 for loops after '#pragma omp simd'}}
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'collapse' clause}}
-  // expected-error@+1 2 {{argument to 'collapse' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
   #pragma omp simd collapse (foobool(argc)), collapse (true), collapse (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/simd_lastprivate_messages.cpp b/test/OpenMP/simd_lastprivate_messages.cpp
index ca26db0d1447..7cc5ba81f78b 100644
--- a/test/OpenMP/simd_lastprivate_messages.cpp
+++ b/test/OpenMP/simd_lastprivate_messages.cpp
@@ -66,7 +66,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (int k = 0; k < argc; ++k)
     ++k;
@@ -118,7 +118,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp simd lastprivate(j)
   for (int k = 0; k < argc; ++k)
     ++k;
 #pragma omp simd lastprivate(i)
@@ -134,7 +134,7 @@ int main(int argc, char **argv) {
   S5 g(5);
   S3 m;
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
   for (i = 0; i < argc; ++i)
     foo();
@@ -210,7 +210,11 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel
-#pragma omp simd lastprivate(j) // expected-error {{arguments of OpenMP clause 'lastprivate' cannot be of reference type}}
+#pragma omp simd lastprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int t;
+#pragma omp simd lastprivate(t) // OK
   for (i = 0; i < argc; ++i)
     foo();
   return 0;
diff --git a/test/OpenMP/simd_linear_messages.cpp b/test/OpenMP/simd_linear_messages.cpp
index 6fac26290b4e..792f78aa3db6 100644
--- a/test/OpenMP/simd_linear_messages.cpp
+++ b/test/OpenMP/simd_linear_messages.cpp
@@ -102,16 +102,26 @@ template<class I, class C> int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp simd linear // expected-error {{expected '(' after 'linear'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (uval( // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (ref() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (foo() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
-  #pragma omp simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp simd linear (val argc // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (val(argc, // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
@@ -121,15 +131,15 @@ template<class I, class C> int foomain(I argc, C **argv) {
   for (int k = 0; k < argc; ++k) ++k;
   // expected-error@+2 {{linear variable with incomplete type 'S1'}}
   // expected-error@+1 {{const-qualified variable cannot be linear}}
-  #pragma omp simd linear (a, b:B::ib)
+  #pragma omp simd linear (val(a, b):B::ib)
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear (argv[1]) // expected-error {{expected variable name}}
   for (int k = 0; k < argc; ++k) ++k;
-  #pragma omp simd linear(e, g)
+  #pragma omp simd linear(ref(e, g)) // expected-error 2 {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'ref'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
-  #pragma omp simd linear(i)
+  #pragma omp simd linear(uval(i)) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp parallel
   {
@@ -138,7 +148,9 @@ template<class I, class C> int foomain(I argc, C **argv) {
     #pragma omp simd linear(v:i)
     for (int k = 0; k < argc; ++k) { i = k; v += i; }
   }
-  #pragma omp simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type}}
+  #pragma omp simd linear(ref(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(uval(j))
   for (int k = 0; k < argc; ++k) ++k;
   int v = 0;
   #pragma omp simd linear(v:j)
@@ -156,6 +168,20 @@ namespace C {
 using A::x;
 }
 
+void linear_modifiers(int argc) {
+  int &f = argc;
+  #pragma omp simd linear(f)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(val(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(uval(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(ref(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(foo(f)) // expected-error {{expected one of 'ref', val' or 'uval' modifiers}}
+  for (int k = 0; k < argc; ++k) ++k;
+}
+
 int f;
 int main(int argc, char **argv) {
   double darr[100];
@@ -167,7 +193,7 @@ int main(int argc, char **argv) {
   S4 e(4); // expected-note {{'e' defined here}}
   S5 g(5); // expected-note {{'g' defined here}}
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp simd linear(f) linear(f) // expected-error {{linear variable cannot be linear}} expected-note {{defined as linear}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear // expected-error {{expected '(' after 'linear'}}
@@ -176,6 +202,12 @@ int main(int argc, char **argv) {
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear () // expected-error {{expected expression}}
   for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (ref()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear (foo()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -194,24 +226,24 @@ int main(int argc, char **argv) {
   for (int k = 0; k < argc; ++k) ++k;
   // expected-error@+2 {{argument of a linear clause should be of integral or pointer type, not 'S4'}}
   // expected-error@+1 {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
-  #pragma omp simd linear(e, g)
+  #pragma omp simd linear(val(e, g))
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp parallel
   {
     int i;
-    #pragma omp simd linear(i)
+    #pragma omp simd linear(val(i))
     for (int k = 0; k < argc; ++k) ++k;
-    #pragma omp simd linear(i : 4)
+    #pragma omp simd linear(uval(i) : 4) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
     for (int k = 0; k < argc; ++k) { ++k; i += 4; }
   }
-  #pragma omp simd linear(j) // expected-error {{arguments of OpenMP clause 'linear' cannot be of reference type 'int &'}}
+  #pragma omp simd linear(ref(j))
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd linear(i)
   for (int k = 0; k < argc; ++k) ++k;
 
-  foomain<int,char>(argc,argv);
+  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
   return 0;
 }
 
diff --git a/test/OpenMP/simd_loop_messages.cpp b/test/OpenMP/simd_loop_messages.cpp
index bd7dd5bf52d1..96b4cafc6bd8 100644
--- a/test/OpenMP/simd_loop_messages.cpp
+++ b/test/OpenMP/simd_loop_messages.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
 
 static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
 #pragma omp threadprivate(sii)
 static int globalii;
 
@@ -45,32 +46,32 @@ int test_iteration_spaces() {
   for (double fi = 0; fi < 10.0; fi++) {
     c[(int)fi] = a[(int)fi] + b[(int)fi];
   }
-  // expected-error@+2 {{variable must be of integer or random access iterator type}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (int &ref = ii; ref < 10; ref++) {
   }
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (int i; i < 10; i++)
     c[i] = a[i];
 
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (int i = 0, j = 0; i < 10; ++i)
     c[i] = a[i];
 
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (;ii < 10; ++ii)
     c[ii] = a[ii];
 
   // expected-warning@+3 {{expression result unused}}
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (ii + 1;ii < 10; ++ii)
     c[ii] = a[ii];
 
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (c[ii] = 0;ii < 10; ++ii)
     c[ii] = a[ii];
@@ -252,6 +253,7 @@ int test_iteration_spaces() {
 
   #pragma omp parallel
   {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp simd' directive may not be threadprivate or thread local, predetermined as linear}}
     #pragma omp simd
     for (sii = 0; sii < 10; sii+=1)
       c[sii] = a[sii];
@@ -364,7 +366,7 @@ int test_with_random_access_iterator() {
   #pragma omp simd
   for (GoodIter I = begin; I < end; ++I)
     ++I;
-  // expected-error@+2 {{variable must be of integer or random access iterator type}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (GoodIter &I = begin; I < end; ++I)
     ++I;
@@ -393,7 +395,7 @@ int test_with_random_access_iterator() {
   #pragma omp simd
   for (begin = GoodIter(1,2); begin < end; ++begin)
     ++begin;
-  // expected-error@+2 {{initialization clause of OpenMP for loop must be of the form 'var = init' or 'T var = init'}}
+  // expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
   #pragma omp simd
   for (++begin; begin < end; ++begin)
     ++begin;
diff --git a/test/OpenMP/simd_metadata.c b/test/OpenMP/simd_metadata.c
index e7e35dd54d74..6ed660747dcb 100644
--- a/test/OpenMP/simd_metadata.c
+++ b/test/OpenMP/simd_metadata.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
+// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86
+// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX
+// RUN: %clang_cc1 -fopenmp -triple i386-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC
 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX
 
@@ -36,8 +39,69 @@ void h1(float *c, float *a, double b[], int size)
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
+  }
 // do not emit parallel_loop_access metadata due to usage of safelen clause.
 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
+#pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
+// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
+// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
+// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
+// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
+// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+
+// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
+// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
+// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+
+// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
+  for (int i = 0; i < size; ++i) {
+    c[i] = a[i] * a[i] + b[i] * b[t];
+    ++t;
+  }
+// do not emit parallel_loop_access metadata due to usage of safelen clause.
+// CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
+#pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
+// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
+// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
+// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
+// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
+// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+
+// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
+// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
+// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+
+// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
+  for (int i = 0; i < size; ++i) {
+    c[i] = a[i] * a[i] + b[i] * b[t];
+    ++t;
+// CHECK: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.mem.parallel_loop_access {{![0-9]+}}
   }
 }
 
@@ -70,6 +134,9 @@ void h3(float *c, float *a, float *b, int size)
 // CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_16:![0-9]+]], [[LOOP_VEC_ENABLE:![0-9]+]]}
 // CHECK: [[LOOP_WIDTH_16]] = !{!"llvm.loop.vectorize.width", i32 16}
 // CHECK: [[LOOP_VEC_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8:![0-9]+]], [[LOOP_VEC_ENABLE]]}
+// CHECK: [[LOOP_WIDTH_8]] = !{!"llvm.loop.vectorize.width", i32 8}
+// CHECK: [[LOOP_H1_HEADER:![0-9]+]] = distinct !{[[LOOP_H1_HEADER]], [[LOOP_WIDTH_8]], [[LOOP_VEC_ENABLE]]}
 //
 // Metadata for h2:
 // CHECK: [[LOOP_H2_HEADER]] = distinct !{[[LOOP_H2_HEADER]], [[LOOP_VEC_ENABLE]]}
diff --git a/test/OpenMP/simd_misc_messages.c b/test/OpenMP/simd_misc_messages.c
index 0b4dd7f3b533..1e1548291947 100644
--- a/test/OpenMP/simd_misc_messages.c
+++ b/test/OpenMP/simd_misc_messages.c
@@ -152,20 +152,117 @@ void test_safelen() {
 #pragma omp simd safelen(foo())
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp simd safelen(-5)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp simd safelen(0)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
 #pragma omp simd safelen(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
 }
 
+void test_simdlen() {
+  int i;
+// expected-error@+1 {{expected '('}}
+#pragma omp simd simdlen
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}
+#pragma omp simd simdlen()
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expected expression}}  expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(, )
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp simd' are ignored}}
+// expected-error@+1 {{expected '('}}
+#pragma omp simd simdlen 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4,
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4 4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp simd simdlen(4)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}}
+#pragma omp simd simdlen(4, 8)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp simd simdlen(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp simd simdlen(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp simd simdlen(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp simd simdlen(0)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+#pragma omp simd simdlen(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_safelen_simdlen() {
+  int i;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp simd simdlen(6) safelen(5)
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{the value of 'simdlen' parameter must be less than or equal to the value of the 'safelen' parameter}}
+#pragma omp simd safelen(5) simdlen(6)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
 void test_collapse() {
   int i;
 // expected-error@+1 {{expected '('}}
@@ -241,15 +338,15 @@ void test_collapse() {
 #pragma omp simd collapse(foo())
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp simd collapse(-5)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp simd collapse(0)
   for (i = 0; i < 16; ++i)
     ;
-// expected-error@+1 {{argument to 'collapse' clause must be a positive integer value}}
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
 #pragma omp simd collapse(5 - 5)
   for (i = 0; i < 16; ++i)
     ;
@@ -259,12 +356,18 @@ void test_collapse() {
   for (i = 0; i < 16; ++i)
     // expected-note@+1 {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp for' directive into a parallel or another task region?}}
     for (int j = 0; j < 16; ++j)
-// expected-error@+3 {{reduction variable must be shared}}
-// expected-error@+2 {{private variable cannot be reduction}}
+// expected-error@+2 2 {{reduction variable must be shared}}
 // expected-error@+1 {{OpenMP constructs may not be nested inside a simd region}}
 #pragma omp for reduction(+ : i, j)
       for (int k = 0; k < 16; ++k)
         i += j;
+#pragma omp parallel
+#pragma omp for
+  for (i = 0; i < 16; ++i)
+    for (int j = 0; j < 16; ++j)
+#pragma omp simd reduction(+ : i, j)
+      for (int k = 0; k < 16; ++k)
+        i += j;
 }
 
 void test_linear() {
@@ -678,3 +781,17 @@ void test_loop_messages() {
   }
 }
 
+void linear_modifiers(int argc) {
+  int f;
+  #pragma omp simd linear(f)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(val(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(uval(f)) // expected-error {{expected 'val' modifier}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(ref(f)) // expected-error {{expected 'val' modifier}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp simd linear(foo(f)) // expected-error {{expected 'val' modifier}}
+  for (int k = 0; k < argc; ++k) ++k;
+}
+
diff --git a/test/OpenMP/simd_private_messages.cpp b/test/OpenMP/simd_private_messages.cpp
index adef373b6a4e..3442d182ed02 100644
--- a/test/OpenMP/simd_private_messages.cpp
+++ b/test/OpenMP/simd_private_messages.cpp
@@ -42,7 +42,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd private ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -78,7 +78,7 @@ template<class I, class C> int foomain(I argc, C **argv) {
   }
   #pragma omp parallel shared(i)
   #pragma omp parallel private(i)
-  #pragma omp simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+  #pragma omp simd private(j)
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd private(i)
   for (int k = 0; k < argc; ++k) ++k;
@@ -97,7 +97,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp simd private // expected-error {{expected '(' after 'private'}}
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd private ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -132,7 +132,7 @@ int main(int argc, char **argv) {
   }
   #pragma omp parallel shared(i)
   #pragma omp parallel private(i)
-  #pragma omp simd private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+  #pragma omp simd private(j)
   for (int k = 0; k < argc; ++k) ++k;
   #pragma omp simd private(i)
   for (int k = 0; k < argc; ++k) ++k;
diff --git a/test/OpenMP/simd_reduction_messages.cpp b/test/OpenMP/simd_reduction_messages.cpp
index aeb2b23bb0ca..e082921cf383 100644
--- a/test/OpenMP/simd_reduction_messages.cpp
+++ b/test/OpenMP/simd_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s
 
 void foo() {
 }
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -102,7 +108,7 @@ T tmain(T argc) {
 #pragma omp simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
@@ -114,22 +120,22 @@ T tmain(T argc) {
 #pragma omp simd reduction(^ : T) // expected-error {{'T' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
@@ -138,7 +144,7 @@ T tmain(T argc) {
 #pragma omp simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
@@ -160,7 +166,7 @@ T tmain(T argc) {
 #pragma omp simd reduction(+ : p), reduction(+ : p) // expected-error 3 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 3 {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
@@ -227,7 +233,7 @@ int main(int argc, char **argv) {
 #pragma omp simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(~ : argc) // expected-error {{expected unqualified-id}}
@@ -239,22 +245,22 @@ int main(int argc, char **argv) {
 #pragma omp simd reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp simd reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp simd reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp simd reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
@@ -263,7 +269,7 @@ int main(int argc, char **argv) {
 #pragma omp simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
@@ -285,7 +291,7 @@ int main(int argc, char **argv) {
 #pragma omp simd reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
   for (int i = 0; i < 10; ++i)
     foo();
-#pragma omp simd reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp simd reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   for (int i = 0; i < 10; ++i)
     foo();
 #pragma omp parallel shared(i)
diff --git a/test/OpenMP/simd_safelen_messages.cpp b/test/OpenMP/simd_safelen_messages.cpp
index b7300c337532..aa31b7da9b7f 100644
--- a/test/OpenMP/simd_safelen_messages.cpp
+++ b/test/OpenMP/simd_safelen_messages.cpp
@@ -22,7 +22,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
   #pragma omp simd safelen (argc 
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  // expected-error@+1 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp simd safelen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}}
@@ -30,7 +30,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   #pragma omp simd safelen ((ST > 0) ? 1 + ST : 2)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+2 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+2 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   // expected-error@+1 2 {{expression is not an integral constant expression}}
   #pragma omp simd safelen (foobool(argc)), safelen (true), safelen (-5)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
@@ -41,7 +41,7 @@ T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   #pragma omp simd safelen (4)
   for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
-  #pragma omp simd safelen (N) // expected-error {{argument to 'safelen' clause must be a positive integer value}}
+  #pragma omp simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
   for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
   return argc;
 }
@@ -61,7 +61,7 @@ int main(int argc, char **argv) {
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   // expected-error@+3 {{expression is not an integral constant expression}}
   // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'safelen' clause}}
-  // expected-error@+1 2 {{argument to 'safelen' clause must be a positive integer value}}
+  // expected-error@+1 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
   #pragma omp simd safelen (foobool(argc)), safelen (true), safelen (-5) 
   for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
   #pragma omp simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
diff --git a/test/OpenMP/simd_simdlen_messages.cpp b/test/OpenMP/simd_simdlen_messages.cpp
new file mode 100644
index 000000000000..91656f87b583
--- /dev/null
+++ b/test/OpenMP/simd_simdlen_messages.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp simd simdlen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{directive '#pragma omp simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+2 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp simd simdlen (foobool(argc)), simdlen (true), simdlen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+1 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
diff --git a/test/OpenMP/single_codegen.cpp b/test/OpenMP/single_codegen.cpp
index 327f6fafa834..61a93a5a1b73 100644
--- a/test/OpenMP/single_codegen.cpp
+++ b/test/OpenMP/single_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fnoopenmp-use-tls -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
@@ -38,7 +38,7 @@ int main() {
   // CHECK-DAG: [[C_ADDR:%.+]] = alloca [[TEST_CLASS_TY]]
   char a;
   char a2[2];
-  TestClass c;
+  TestClass &c = tc;
 
 // CHECK:       [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]])
 // CHECK-DAG:   [[DID_IT:%.+]] = alloca i32,
@@ -81,8 +81,7 @@ int main() {
 // CHECK:       [[A_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK:       store i8* [[A_ADDR]], i8** [[A_PTR_REF]],
 // CHECK:       [[C_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK:       [[C_PTR_REF_VOID_PTR:%.+]] = bitcast [[TEST_CLASS_TY]]* [[C_ADDR]] to i8*
-// CHECK:       store i8* [[C_PTR_REF_VOID_PTR]], i8** [[C_PTR_REF]],
+// CHECK:       store i8* {{.+}}, i8** [[C_PTR_REF]],
 // CHECK:       [[TC_PTR_REF:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[COPY_LIST]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
 // CHECK:       [[TC_THREADPRIVATE_ADDR_VOID_PTR:%.+]] = call{{.*}} i8* @__kmpc_threadprivate_cached
 // CHECK:       [[TC_THREADPRIVATE_ADDR:%.+]] = bitcast i8* [[TC_THREADPRIVATE_ADDR_VOID_PTR]] to [[TEST_CLASS_TY]]*
diff --git a/test/OpenMP/single_firstprivate_codegen.cpp b/test/OpenMP/single_firstprivate_codegen.cpp
index 01eaea318119..cc72addb5f45 100644
--- a/test/OpenMP/single_firstprivate_codegen.cpp
+++ b/test/OpenMP/single_firstprivate_codegen.cpp
@@ -30,7 +30,6 @@ struct S {
 // CHECK-DAG: [[S_FLOAT_TY:%.+]] = type { float }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
 // CHECK-DAG: [[ST_TY:%.+]] = type { i{{[0-9]+}}, i{{[0-9]+}} }
-// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i{{[0-9]+}}*, [2 x i{{[0-9]+}}]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
 
 template <typename T>
 T tmain() {
@@ -64,36 +63,49 @@ S<float> var(3);
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 // CHECK: ([[S_FLOAT_TY]]*)* [[S_FLOAT_TY_DESTR:@[^ ]+]] {{[^,]+}}, {{.+}}([[S_FLOAT_TY]]* [[TEST]]
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global i{{[0-9]+}} 1212,
   // LAMBDA-LABEL: @main
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
 // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp single firstprivate(g)
+#pragma omp single firstprivate(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[ARG:%.+]])
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // LAMBDA: %{{.+}} = alloca [[CAP_MAIN_TY:%.+]], 
     // LAMBDA: call i32 @__kmpc_single(
     // LAMBDA: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // LAMBDA: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* {{.*}}
+    // LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]   
     g = 1;
+    sivar = 17;
     // LAMBDA: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 17, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call void @__kmpc_end_single(
-    // LAMBDA: call i32 @__kmpc_cancel_barrier(
+    // LAMBDA: call void @__kmpc_barrier(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 31;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[G_PTR_REF]]
       // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 31, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -104,40 +116,55 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
 // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+// BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp single firstprivate(g)
+#pragma omp single firstprivate(g, sivar)
    {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) [[SIVAR_REF:%.+]])
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: [[SIVAR1_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+    // BLOCKS: store i{{[0-9]+}}* [[SIVAR_REF]], i{{[0-9]+}}** %{{.+}},
+    // BLOCKS: [[SIVAR1_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}},
     // BLOCKS: call i32 @__kmpc_single(
     // BLOCKS: [[G_VAL:%.+]] = load volatile i{{[0-9]+}}, i{{[0-9]+}}* [[G]]
     // BLOCKS: store i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
+    // BLOCKS: [[SIVAR1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR1_REF]],
+    // BLOCKS: store i{{[0-9]+}} [[SIVAR1_VAL]], i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     g = 1;
+    sivar = 37;
     // BLOCKS: store i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 37, i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR1_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
     // BLOCKS: call void @__kmpc_end_single(
-    // BLOCKS: call i32 @__kmpc_cancel_barrier(
+    // BLOCKS: call void @__kmpc_barrier(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 31;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store i{{[0-9]+}} 2, i{{[0-9]+}}*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 31, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
   }();
   return 0;
 #else
-#pragma omp single firstprivate(t_var, vec, s_arr, var) nowait
+#pragma omp single firstprivate(t_var, vec, s_arr, var, sivar) nowait
   {
     {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 41;
     }
   }
   return tmain<int>();
@@ -151,6 +178,7 @@ int main() {
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 
 // CHECK: call i32 @__kmpc_single(
 // firstprivate t_var(t_var)
@@ -178,6 +206,10 @@ int main() {
 // CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
 
+// firstprivate isvar
+// CHEC: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR]],
+// CHEC: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIV]],
+
 // ~(firstprivate var), ~(firstprivate s_arr)
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@@ -192,36 +224,36 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32*, [2 x i32]*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call {{.*}} [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, [2 x i32]* dereferenceable(8) %{{.+}}, [2 x [[S_INT_TY]]]* dereferenceable(8) %{{.+}}, [[S_INT_TY]]* dereferenceable(4) %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
 
+// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %
+// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** %
+// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** %
+// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** %
+
 // CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
 // CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]]
 // CHECK: call i32 @__kmpc_single(
+
 // firstprivate t_var(t_var)
-// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_PTR_REF]],
 // CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_REF]],
 // CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_PRIV]],
 
 // firstprivate vec(vec)
-// CHECK: [[VEC_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[VEC_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_PTR_REF:%.+]],
 // CHECK: [[VEC_DEST:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
 // CHECK: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_REF]] to i8*
 // CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VEC_DEST]], i8* [[VEC_SRC]],
 
 // firstprivate s_arr(s_arr)
-// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
-// CHECK: [[S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
 // CHECK: [[S_ARR_PRIV_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_PRIV_END:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], i{{[0-9]+}} 2
 // CHECK: [[IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_PRIV_BEGIN]], [[S_ARR_PRIV_END]]
@@ -233,8 +265,6 @@ int main() {
 // CHECK: br i1 {{.+}}, label %{{.+}}, label %[[S_ARR_BODY]]
 
 // firstprivate var(var)
-// CHECK: [[VAR_REF_PTR:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_REF_PTR]],
 // CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
 // CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
 // CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
@@ -245,8 +275,7 @@ int main() {
 
 // CHECK: call void @__kmpc_end_single(
 
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
-// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
+// CHECK: call void @__kmpc_barrier(%{{.+}}* [[SINGLE_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
 // CHECK: ret void
 #endif
 
diff --git a/test/OpenMP/single_firstprivate_messages.cpp b/test/OpenMP/single_firstprivate_messages.cpp
index 6f21a42208f4..32de9fdc8806 100644
--- a/test/OpenMP/single_firstprivate_messages.cpp
+++ b/test/OpenMP/single_firstprivate_messages.cpp
@@ -65,7 +65,7 @@ int foomain(int argc, char **argv) {
   I e(4);
   C g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp single firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   foo();
@@ -109,13 +109,13 @@ int foomain(int argc, char **argv) {
   {
     int v = 0;
     int i;                         // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp single' directive into a parallel or another task region?}}
-#pragma omp single firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp single firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     foo();
     v += i;
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp single firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp single firstprivate(j)
   foo();
 #pragma omp parallel
 #pragma omp single firstprivate(i)
@@ -148,7 +148,7 @@ int main(int argc, char **argv) {
   S3 m;
   S6 n(2);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp parallel
 #pragma omp single firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   foo();
@@ -220,7 +220,7 @@ int main(int argc, char **argv) {
 #pragma omp single firstprivate(xa) // OK: may be firstprivate
   foo();
 #pragma omp parallel
-#pragma omp single firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp single firstprivate(j)
   foo();
 #pragma omp parallel
 #pragma omp single firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
@@ -232,7 +232,7 @@ int main(int argc, char **argv) {
   {
     int v = 0;
     int i;                         // expected-note {{variable with automatic storage duration is predetermined as private; perhaps you forget to enclose 'omp single' directive into a parallel or another task region?}}
-#pragma omp single firstprivate(i) // expected-error {{private variable cannot be firstprivate}}
+#pragma omp single firstprivate(i) // expected-error {{firstprivate variable must be shared}}
     foo();
     v += i;
   }
@@ -242,6 +242,9 @@ int main(int argc, char **argv) {
 #pragma omp parallel reduction(+ : i) // expected-note {{defined as reduction}}
 #pragma omp single firstprivate(i)    // expected-error {{firstprivate variable must be shared}}
   foo();
+  static int t;
+#pragma omp single firstprivate(t)    // OK
+  foo();
 
   return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
 }
diff --git a/test/OpenMP/single_private_codegen.cpp b/test/OpenMP/single_private_codegen.cpp
index e228b1b14887..c98c5ea1dd67 100644
--- a/test/OpenMP/single_private_codegen.cpp
+++ b/test/OpenMP/single_private_codegen.cpp
@@ -20,9 +20,7 @@ struct S {
 volatile double g;
 
 // CHECK: [[S_FLOAT_TY:%.+]] = type { float }
-// CHECK: [[CAP_MAIN_TY:%.+]] = type { i8 }
 // CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
-// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i8 }
 template <typename T>
 T tmain() {
   S<T> test;
@@ -40,34 +38,43 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}})
+  // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp single private(g)
+#pragma omp single private(g, sivar)
   {
-    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     g = 1;
+    sivar = 101;
     // LAMBDA: call {{.*}}i32 @__kmpc_single(
     // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+    // LAMBDA: store i{{[0-9]+}} 101, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]]
+    // LAMBDA: [[SIVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: store i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SIVAR_PRIVATE_ADDR_REF]]
     // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
     // LAMBDA: call {{.*}}void @__kmpc_end_single(
     [&]() {
       // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
       // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
       g = 2;
+      sivar = 211;
       // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
       // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
       // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
       // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+      // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+      // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+      // LAMBDA: store i{{[0-9]+}} 211, i{{[0-9]+}}* [[SIVAR_REF]]
     }();
   }
   }();
@@ -78,27 +85,36 @@ int main() {
   // BLOCKS: call {{.*}}void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* {{.+}})
+  // BLOCKS: call {{.*}}void {{.+}} @__kmpc_fork_call({{.+}}, i32 0, {{.+}}* [[OMP_REGION:@.+]] to {{.+}})
 #pragma omp parallel
-#pragma omp single private(g)
+#pragma omp single private(g, sivar)
   {
-    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]])
+    // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
     // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca double,
-    // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]],
+    // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
     g = 1;
+    sivar = 101;
     // BLOCKS: call {{.*}}i32 @__kmpc_single(
     // BLOCKS: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+    // BLOCKS: store i{{[0-9]+}} 101, i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]],
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: double* [[G_PRIVATE_ADDR]]
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call {{.*}}void {{%.+}}(i8
     // BLOCKS: call {{.*}}void @__kmpc_end_single(
     ^{
       // BLOCKS: define {{.+}} void {{@.+}}(i8*
       g = 2;
+      sivar = 203;
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
       // BLOCKS: store double 2.0{{.+}}, double*
       // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+      // BLOCKS: store i{{[0-9]+}} 203, i{{[0-9]+}}*
+      // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
       // BLOCKS: ret
     }();
   }
@@ -111,10 +127,11 @@ int main() {
   S<float> s_arr[] = {1, 2};
   S<float> var(3);
 #pragma omp parallel
-#pragma omp single private(t_var, vec, s_arr, s_arr, var, var)
+#pragma omp single private(t_var, vec, s_arr, s_arr, var, var, sivar)
   {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 303;
   }
   return tmain<int>();
 #endif
@@ -123,28 +140,30 @@ int main() {
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]*
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[MAIN_MICROTASK:@.+]] to void
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
 // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
 // CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
 // CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[SIVAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
 // CHECK: call i32 @__kmpc_single(
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
+// CHECK-NOT: [[SIVAR_PRIV]]
 // CHECK: {{.+}}:
 // CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
 // CHECK-NOT: [[T_VAR_PRIV]]
 // CHECK-NOT: [[VEC_PRIV]]
+// CHECK-NOT: [[SIVAR_PRIV]]
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
 // CHECK-DAG: call void [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
@@ -154,11 +173,11 @@ int main() {
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[TMAIN_MICROTASK:@.+]] to void
 // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]*
 // CHECK: ret
 //
-// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}})
+// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
 // CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
 // CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
 // CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
diff --git a/test/OpenMP/single_private_messages.cpp b/test/OpenMP/single_private_messages.cpp
index 0e04e0f5aff1..a24cf47cd2a2 100644
--- a/test/OpenMP/single_private_messages.cpp
+++ b/test/OpenMP/single_private_messages.cpp
@@ -47,7 +47,7 @@ int foomain(I argc, C **argv) {
   I e(4);
   I g(5);
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp single private // expected-error {{expected '(' after 'private'}}
   foo();
 #pragma omp single private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -84,7 +84,7 @@ int foomain(I argc, C **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp single private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp single private(j)
   foo();
 #pragma omp single private(i)
   foo();
@@ -103,7 +103,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;                // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp single private // expected-error {{expected '(' after 'private'}}
   foo();
 #pragma omp single private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -138,10 +138,13 @@ int main(int argc, char **argv) {
   }
 #pragma omp parallel shared(i)
 #pragma omp parallel private(i)
-#pragma omp single private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type}}
+#pragma omp single private(j)
   foo();
 #pragma omp single private(i)
   foo();
+  static int m;
+#pragma omp single private(m) // OK
+  foo();
 
   return 0;
 }
diff --git a/test/OpenMP/target_ast_print.cpp b/test/OpenMP/target_ast_print.cpp
index 6955cf31112e..acf032a22120 100644
--- a/test/OpenMP/target_ast_print.cpp
+++ b/test/OpenMP/target_ast_print.cpp
@@ -10,39 +10,77 @@ void foo() {}
 
 template <typename T, int C>
 T tmain(T argc, T *argv) {
+  T i, j, a[20];
 #pragma omp target
   foo();
-#pragma omp target if (argc > 0)
+#pragma omp target if (target:argc > 0)
   foo();
 #pragma omp target if (C)
   foo();
+#pragma omp target map(i)
+  foo();
+#pragma omp target map(a[0:10], i)
+  foo();
+#pragma omp target map(to: i) map(from: j)
+  foo();
+#pragma omp target map(always,alloc: i)
+  foo();
   return 0;
 }
 
 // CHECK: template <typename T = int, int C = 5> int tmain(int argc, int *argv) {
+// CHECK-NEXT: int i, j, a[20]
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: foo();
-// CHECK-NEXT: #pragma omp target if(argc > 0)
+// CHECK-NEXT: #pragma omp target if(target: argc > 0)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target if(5)
 // CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: a[0:10],i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(to: i) map(from: j)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(always,alloc: i)
+// CHECK-NEXT: foo()
 // CHECK: template <typename T = char, int C = 1> char tmain(char argc, char *argv) {
+// CHECK-NEXT: char i, j, a[20]
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: foo();
-// CHECK-NEXT: #pragma omp target if(argc > 0)
+// CHECK-NEXT: #pragma omp target if(target: argc > 0)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target if(1)
 // CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: a[0:10],i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(to: i) map(from: j)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(always,alloc: i)
+// CHECK-NEXT: foo()
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
+// CHECK-NEXT: T i, j, a[20]
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: foo();
-// CHECK-NEXT: #pragma omp target if(argc > 0)
+// CHECK-NEXT: #pragma omp target if(target: argc > 0)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target if(C)
 // CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(tofrom: a[0:10],i)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(to: i) map(from: j)
+// CHECK-NEXT: foo()
+// CHECK-NEXT: #pragma omp target map(always,alloc: i)
+// CHECK-NEXT: foo()
 
 // CHECK-LABEL: int main(int argc, char **argv) {
 int main (int argc, char **argv) {
+  int i, j, a[20];
+// CHECK-NEXT: int i, j, a[20]
 #pragma omp target
 // CHECK-NEXT: #pragma omp target
   foo();
@@ -51,6 +89,32 @@ int main (int argc, char **argv) {
 // CHECK-NEXT: #pragma omp target if(argc > 0)
   foo();
 // CHECK-NEXT: foo();
+
+#pragma omp target map(i) if(argc>0)
+// CHECK-NEXT: #pragma omp target map(tofrom: i) if(argc > 0)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target map(i)
+// CHECK-NEXT: #pragma omp target map(tofrom: i)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target map(a[0:10], i)
+// CHECK-NEXT: #pragma omp target map(tofrom: a[0:10],i)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target map(to: i) map(from: j)
+// CHECK-NEXT: #pragma omp target map(to: i) map(from: j)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target map(always,alloc: i)
+// CHECK-NEXT: #pragma omp target map(always,alloc: i)
+  foo();
+// CHECK-NEXT: foo();
+
   return tmain<int, 5>(argc, &argc) + tmain<char, 1>(argv[0][0], argv[0]);
 }
 
diff --git a/test/OpenMP/target_codegen.cpp b/test/OpenMP/target_codegen.cpp
new file mode 100644
index 000000000000..bcefa2419dd2
--- /dev/null
+++ b/test/OpenMP/target_codegen.cpp
@@ -0,0 +1,652 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: [[TT:%.+]] = type { i64, i8 }
+// CHECK-DAG: [[S1:%.+]] = type { double }
+
+// We have 8 target regions, but only 7 that actually will generate offloading
+// code, only 6 will have mapped arguments, and only 4 have all-constant map
+// sizes.
+
+// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [1 x i{{32|64}}] [i[[SZ:32|64]] 2]
+// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [1 x i32] [i32 128]
+// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2]
+// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i32] [i32 128, i32 128]
+// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [9 x i32] [i32 128, i32 3, i32 128, i32 3, i32 3, i32 128, i32 128, i32 3, i32 3]
+// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i32] [i32 128, i32 128, i32 3]
+// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [4 x i[[SZ]]] [i[[SZ]] 4, i[[SZ]] 2, i[[SZ]] 1, i[[SZ]] 40]
+// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [4 x i32] [i32 128, i32 128, i32 128, i32 3]
+// CHECK-DAG: [[MAPT7:@.+]] = private unnamed_addr constant [5 x i32] [i32 3, i32 128, i32 128, i32 128, i32 3]
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+template<typename tx, typename ty>
+struct TT{
+  tx X;
+  ty Y;
+};
+
+// CHECK: define {{.*}}[[FOO:@.+]](
+int foo(int n) {
+  int a = 0;
+  short aa = 0;
+  float b[10];
+  float bn[n];
+  double c[5][10];
+  double cn[5][n];
+  TT<long long, char> d;
+
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null)
+  // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+  // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT0:@.+]]()
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target
+  {
+  }
+
+  // CHECK:       store i32 0, i32* [[RHV:%.+]], align 4
+  // CHECK:       store i32 -1, i32* [[RHV]], align 4
+  // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+  // CHECK:       call void [[HVT1:@.+]](i[[SZ]] {{[^,]+}})
+  #pragma omp target if(0)
+  {
+    a += 1;
+  }
+
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+  // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+  // CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+
+  // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+  // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT2:@.+]](i[[SZ]] {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(1)
+  {
+    aa += 1;
+  }
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
+  // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+  // CHECK:       [[IFTHEN]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
+  // CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+  // CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+  // CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] %{{.+}} to i8*
+
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 1
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 1
+  // CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+  // CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+  // CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] %{{.+}} to i8*
+  // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+
+  // CHECK:       [[IFELSE]]
+  // CHECK:       store i32 -1, i32* [[RHV]], align 4
+  // CHECK-NEXT:  br label %[[IFEND:.+]]
+
+  // CHECK:       [[IFEND]]
+  // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+  // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT3:@.+]]({{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(n>10)
+  {
+    a += 1;
+    aa += 1;
+  }
+
+  // We capture 3 VLA sizes in this target region
+  // CHECK-64:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-64:       [[A_ADDR:%.+]] = bitcast i[[SZ]]* [[A_CADDR:%.+]] to i32*
+  // CHECK-64:       store i32 [[A_VAL]], i32* [[A_ADDR]],
+  // CHECK-64:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK-32:       [[A_VAL:%.+]] = load i32, i32* %{{.+}},
+  // CHECK-32:       store i32 [[A_VAL]], i32* [[A_CADDR:%.+]],
+  // CHECK-32:       [[A_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[A_CADDR]],
+
+  // CHECK:       [[BNSIZE:%.+]] = mul nuw i[[SZ]] [[VLA0:%.+]], 4
+  // CHECK:       [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]]
+  // CHECK:       [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8
+
+  // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
+  // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+  // CHECK:       [[TRY]]
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
+  // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
+
+  // CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX0:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX0]]
+  // CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX1:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX1]]
+  // CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX2:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX2]]
+  // CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX3:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX3]]
+  // CHECK-DAG:   [[SADDR4:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX4:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[PADDR4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX4]]
+  // CHECK-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX5:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX5]]
+  // CHECK-DAG:   [[SADDR6:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX6:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[PADDR6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX6]]
+  // CHECK-DAG:   [[SADDR7:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX7:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[PADDR7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX7]]
+  // CHECK-DAG:   [[SADDR8:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S]], i32 0, i32 [[IDX8:[0-9]+]]
+  // CHECK-DAG:   [[BPADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP]], i32 0, i32 [[IDX8]]
+  // CHECK-DAG:   [[PADDR8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P]], i32 0, i32 [[IDX8]]
+
+  // The names below are not necessarily consistent with the names used for the
+  // addresses above as some are repeated.
+  // CHECK-DAG:   [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+  // CHECK-DAG:   [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+  // CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
+  // CHECK-DAG:   [[P1:%[^,]+]] = inttoptr i[[SZ]] [[VLA1]] to i8*
+  // CHECK-DAG:   store i8* [[BP1]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P1]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* inttoptr (i[[SZ]] 5 to i8*), i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
+  // CHECK-DAG:   [[P3:%[^,]+]] = inttoptr i[[SZ]] [[A_CVAL]] to i8*
+  // CHECK-DAG:   store i8* [[BP3]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P3]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P4:%[^,]+]] = bitcast [10 x float]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP4]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P4]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] 40, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+  // CHECK-DAG:   [[P5:%[^,]+]] = bitcast float* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP5]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P5]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] [[BNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P6:%[^,]+]] = bitcast [5 x [10 x double]]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP6]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P6]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] 400, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+  // CHECK-DAG:   [[P7:%[^,]+]] = bitcast double* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP7]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P7]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] [[CNSIZE]], i[[SZ]]* {{%[^,]+}}
+
+  // CHECK-DAG:   [[BP8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+  // CHECK-DAG:   [[P8:%[^,]+]] = bitcast [[TT]]* %{{.+}} to i8*
+  // CHECK-DAG:   store i8* [[BP8]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i8* [[P8]], i8** {{%[^,]+}}
+  // CHECK-DAG:   store i[[SZ]] {{12|16}}, i[[SZ]]* {{%[^,]+}}
+
+  // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+  // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+  // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+  // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+  // CHECK:       [[FAIL]]
+  // CHECK:       call void [[HVT4:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+  // CHECK-NEXT:  br label %[[END]]
+  // CHECK:       [[END]]
+  #pragma omp target if(n>20)
+  {
+    a += 1;
+    b[2] += 1.0;
+    bn[3] += 1.0;
+    c[1][2] += 1.0;
+    cn[1][3] += 1.0;
+    d.X += 1;
+    d.Y += 1;
+  }
+
+  return a;
+}
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions in foo().
+
+// CHECK:       define internal void [[HVT0]]()
+
+// CHECK:       define internal void [[HVT1]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64:    [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i32*
+// CHECK-64:    load i32, i32* [[AA_CADDR]], align
+// CHECK-32:    load i32, i32* [[AA_ADDR]], align
+
+// CHECK:       define internal void [[HVT2]](i[[SZ]] %{{.+}})
+// Create stack storage and store argument in there.
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK:       [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK:       load i16, i16* [[AA_CADDR]], align
+
+// CHECK:       define internal void [[HVT3]]
+// Create stack storage and store argument in there.
+// CHECK:       [[A_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK:       [[AA_ADDR:%.+]] = alloca i[[SZ]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[A_ADDR]], align
+// CHECK-DAG:   store i[[SZ]] %{{.+}}, i[[SZ]]* [[AA_ADDR]], align
+// CHECK-64-DAG:[[A_CADDR:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i32*
+// CHECK-DAG:   [[AA_CADDR:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
+// CHECK-64-DAG:load i32, i32* [[A_CADDR]], align
+// CHECK-32-DAG:load i32, i32* [[A_ADDR]], align
+// CHECK-DAG:   load i16, i16* [[AA_CADDR]], align
+
+// CHECK:       define internal void [[HVT4]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x float]*
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_BN:%.+]] = alloca float*
+// CHECK:       [[LOCAL_C:%.+]] = alloca [5 x [10 x double]]*
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA3:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_CN:%.+]] = alloca double*
+// CHECK:       [[LOCAL_D:%.+]] = alloca [[TT]]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store [10 x float]* [[ARG_B:%.+]], [10 x float]** [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store float* [[ARG_BN:%.+]], float** [[LOCAL_BN]]
+// CHECK-DAG:   store [5 x [10 x double]]* [[ARG_C:%.+]], [5 x [10 x double]]** [[LOCAL_C]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA3:%.+]], i[[SZ]]* [[LOCAL_VLA3]]
+// CHECK-DAG:   store double* [[ARG_CN:%.+]], double** [[LOCAL_CN]]
+// CHECK-DAG:   store [[TT]]* [[ARG_D:%.+]], [[TT]]** [[LOCAL_D]]
+
+// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x float]*, [10 x float]** [[LOCAL_B]],
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[REF_BN:%.+]] = load float*, float** [[LOCAL_BN]],
+// CHECK-DAG:   [[REF_C:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[LOCAL_C]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[VAL_VLA3:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA3]],
+// CHECK-DAG:   [[REF_CN:%.+]] = load double*, double** [[LOCAL_CN]],
+// CHECK-DAG:   [[REF_D:%.+]] = load [[TT]]*, [[TT]]** [[LOCAL_D]],
+
+// Use captures.
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
+// CHECK-DAG:   getelementptr inbounds [10 x float], [10 x float]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
+// CHECK-DAG:   getelementptr inbounds float, float* [[REF_BN]], i[[SZ]] 3
+// CHECK-DAG:   getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[REF_C]], i[[SZ]] 0, i[[SZ]] 1
+// CHECK-DAG:   getelementptr inbounds double, double* [[REF_CN]], i[[SZ]] %{{.+}}
+// CHECK-DAG:   getelementptr inbounds [[TT]], [[TT]]* [[REF_D]], i32 0, i32 0
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+  short aa = 0;
+  tx b[10];
+
+  #pragma omp target if(n>40)
+  {
+    a += 1;
+    aa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+  int a = 0;
+  short aa = 0;
+  char aaa = 0;
+  int b[10];
+
+  #pragma omp target if(n>50)
+  {
+    a += 1;
+    aa += 1;
+    aaa += 1;
+    b[2] += 1;
+  }
+
+  return a;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = n+1;
+    short int c[2][n];
+
+    #pragma omp target if(n>60)
+    {
+      this->a = (double)b + 1.5;
+      c[1][1] = ++a;
+    }
+
+    return c[1][1] + (int)b;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  // CHECK: call {{.*}}i32 [[FOO]](i32 {{.*}})
+  a += foo(n);
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+//
+// CHECK: define {{.*}}[[FS1]]
+//
+// CHECK:          i8* @llvm.stacksave()
+// CHECK-64:       [[B_ADDR:%.+]] = bitcast i[[SZ]]* [[B_CADDR:%.+]] to i32*
+// CHECK-64:       store i32 %{{.+}}, i32* [[B_ADDR]],
+// CHECK-64:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_CADDR]],
+
+// CHECK-32:       store i32 %{{.+}}, i32* [[B_ADDR:%.+]],
+// CHECK-32:       [[B_CVAL:%.+]] = load i[[SZ]], i[[SZ]]* [[B_ADDR]],
+
+// We capture 2 VLA sizes in this target region
+// CHECK:       [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]]
+// CHECK:       [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
+
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
+// CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
+// CHECK:       [[TRY]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[SADDR0:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX0:[0-9]+]]
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX0]]
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX0]]
+// CHECK-DAG:   [[SADDR1:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX1:[0-9]+]]
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX1]]
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX1]]
+// CHECK-DAG:   [[SADDR2:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX2:[0-9]+]]
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX2]]
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX2]]
+// CHECK-DAG:   [[SADDR3:%.+]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S]], i32 [[IDX3:[0-9]+]]
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP]], i32 [[IDX3]]
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P]], i32 [[IDX3]]
+
+// The names below are not necessarily consistent with the names used for the
+// addresses above as some are repeated.
+// CHECK-DAG:   [[BP0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+// CHECK-DAG:   [[P0:%[^,]+]] = inttoptr i[[SZ]] [[VLA0]] to i8*
+// CHECK-DAG:   store i8* [[BP0]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P0]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* inttoptr (i[[SZ]] 2 to i8*), i8** {{%[^,]+}}
+// CHECK-DAG:   store i[[SZ]] {{4|8}}, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
+// CHECK-DAG:   [[P2:%[^,]+]] = inttoptr i[[SZ]] [[B_CVAL]] to i8*
+// CHECK-DAG:   store i8* [[BP2]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P2]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i[[SZ]] 4, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG:   [[P3:%[^,]+]] = bitcast [[S1]]* %{{.+}} to i8*
+// CHECK-DAG:   store i8* [[BP3]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P3]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i[[SZ]] 8, i[[SZ]]* {{%[^,]+}}
+
+// CHECK-DAG:   [[BP4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   [[P4:%[^,]+]] = bitcast i16* %{{.+}} to i8*
+// CHECK-DAG:   store i8* [[BP4]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i8* [[P4]], i8** {{%[^,]+}}
+// CHECK-DAG:   store i[[SZ]] [[CSIZE]], i[[SZ]]* {{%[^,]+}}
+
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
+
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT7:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FSTATIC]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
+// CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
+// CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG:   store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+
+// CHECK-DAG:   [[BPADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP]], i32 0, i32 3
+// CHECK-DAG:   [[PADDR3:%.+]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P]], i32 0, i32 3
+// CHECK-DAG:   store i8* [[BP3:%[^,]+]], i8** [[BPADDR3]]
+// CHECK-DAG:   store i8* [[P3:%[^,]+]], i8** [[PADDR3]]
+// CHECK-DAG:   [[BP3]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG:   [[P3]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+
+// CHECK:       [[IFELSE]]
+// CHECK:       store i32 -1, i32* [[RHV]], align 4
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+
+// CHECK:       [[IFEND]]
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
+// CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
+// CHECK:       [[IFTHEN]]
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
+// CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
+
+// CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 0
+// CHECK-DAG:   [[PADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 0
+// CHECK-DAG:   store i8* [[BP0:%[^,]+]], i8** [[BPADDR0]]
+// CHECK-DAG:   store i8* [[P0:%[^,]+]], i8** [[PADDR0]]
+// CHECK-DAG:   [[BP0]] = inttoptr i[[SZ]] [[VAL0:%.+]] to i8*
+// CHECK-DAG:   [[P0]] = inttoptr i[[SZ]] [[VAL0]] to i8*
+
+// CHECK-DAG:   [[BPADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 1
+// CHECK-DAG:   [[PADDR1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 1
+// CHECK-DAG:   store i8* [[BP1:%[^,]+]], i8** [[BPADDR1]]
+// CHECK-DAG:   store i8* [[P1:%[^,]+]], i8** [[PADDR1]]
+// CHECK-DAG:   [[BP1]] = inttoptr i[[SZ]] [[VAL1:%.+]] to i8*
+// CHECK-DAG:   [[P1]] = inttoptr i[[SZ]] [[VAL1]] to i8*
+
+// CHECK-DAG:   [[BPADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP]], i32 0, i32 2
+// CHECK-DAG:   [[PADDR2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P]], i32 0, i32 2
+// CHECK-DAG:   store i8* [[BP2:%[^,]+]], i8** [[BPADDR2]]
+// CHECK-DAG:   store i8* [[P2:%[^,]+]], i8** [[PADDR2]]
+// CHECK-DAG:   [[BP2]] = bitcast [10 x i32]* %{{.+}} to i8*
+// CHECK-DAG:   [[P2]] = bitcast [10 x i32]* %{{.+}} to i8*
+
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+
+// CHECK:       [[IFELSE]]
+// CHECK:       store i32 -1, i32* [[RHV]], align 4
+// CHECK-NEXT:  br label %[[IFEND:.+]]
+
+// CHECK:       [[IFEND]]
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}})
+// CHECK-NEXT:  br label %[[END]]
+// CHECK:       [[END]]
+
+
+
+// Check that the offloading functions are emitted and that the arguments are
+// correct and loaded correctly for the target regions of the callees of bar().
+
+// CHECK:       define internal void [[HVT7]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_THIS:%.+]] = alloca [[S1]]*
+// CHECK:       [[LOCAL_B:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA1:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_VLA2:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_C:%.+]] = alloca i16*
+// CHECK-DAG:   store [[S1]]* [[ARG_THIS:%.+]], [[S1]]** [[LOCAL_THIS]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_B:%.+]], i[[SZ]]* [[LOCAL_B]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA1:%.+]], i[[SZ]]* [[LOCAL_VLA1]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_VLA2:%.+]], i[[SZ]]* [[LOCAL_VLA2]]
+// CHECK-DAG:   store i16* [[ARG_C:%.+]], i16** [[LOCAL_C]]
+// Store captures in the context.
+// CHECK-DAG:   [[REF_THIS:%.+]] = load [[S1]]*, [[S1]]** [[LOCAL_THIS]],
+// CHECK-64-DAG:[[REF_B:%.+]] = bitcast i[[SZ]]* [[LOCAL_B]] to i32*
+// CHECK-DAG:   [[VAL_VLA1:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA1]],
+// CHECK-DAG:   [[VAL_VLA2:%.+]] = load i[[SZ]], i[[SZ]]* [[LOCAL_VLA2]],
+// CHECK-DAG:   [[REF_C:%.+]] = load i16*, i16** [[LOCAL_C]],
+// Use captures.
+// CHECK-DAG:   getelementptr inbounds [[S1]], [[S1]]* [[REF_THIS]], i32 0, i32 0
+// CHECK-64-DAG:load i32, i32* [[REF_B]]
+// CHECK-32-DAG:load i32, i32* [[LOCAL_B]]
+// CHECK-DAG:   getelementptr inbounds i16, i16* [[REF_C]], i[[SZ]] %{{.+}}
+
+
+// CHECK:       define internal void [[HVT6]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AAA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AAA:%.+]], i[[SZ]]* [[LOCAL_AAA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:   [[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:      [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:      [[REF_AAA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AAA]] to i8*
+// CHECK-DAG:      [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+// Use captures.
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-DAG:      load i16, i16* [[REF_AA]]
+// CHECK-DAG:      load i8, i8* [[REF_AAA]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
+// CHECK-DAG:      getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
+
+// CHECK:       define internal void [[HVT5]]
+// Create local storage for each capture.
+// CHECK:       [[LOCAL_A:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_AA:%.+]] = alloca i[[SZ]]
+// CHECK:       [[LOCAL_B:%.+]] = alloca [10 x i32]*
+// CHECK-DAG:   store i[[SZ]] [[ARG_A:%.+]], i[[SZ]]* [[LOCAL_A]]
+// CHECK-DAG:   store i[[SZ]] [[ARG_AA:%.+]], i[[SZ]]* [[LOCAL_AA]]
+// CHECK-DAG:   store [10 x i32]* [[ARG_B:%.+]], [10 x i32]** [[LOCAL_B]]
+// Store captures in the context.
+// CHECK-64-DAG:[[REF_A:%.+]] = bitcast i[[SZ]]* [[LOCAL_A]] to i32*
+// CHECK-DAG:   [[REF_AA:%.+]] = bitcast i[[SZ]]* [[LOCAL_AA]] to i16*
+// CHECK-DAG:   [[REF_B:%.+]] = load [10 x i32]*, [10 x i32]** [[LOCAL_B]],
+// Use captures.
+// CHECK-64-DAG:   load i32, i32* [[REF_A]]
+// CHECK-32-DAG:   load i32, i32* [[LOCAL_A]]
+// CHECK-DAG:   load i16, i16* [[REF_AA]]
+// CHECK-DAG:   getelementptr inbounds [10 x i32], [10 x i32]* [[REF_B]], i[[SZ]] 0, i[[SZ]] 2
+#endif
diff --git a/test/OpenMP/target_codegen_global_capture.cpp b/test/OpenMP/target_codegen_global_capture.cpp
new file mode 100644
index 000000000000..29469cdf71ea
--- /dev/null
+++ b/test/OpenMP/target_codegen_global_capture.cpp
@@ -0,0 +1,287 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+
+// CHECK-DAG: [[GA:@.+]] = global double 1.000000e+00
+// CHECK-DAG: [[GB:@.+]] = global double 2.000000e+00
+// CHECK-DAG: [[GC:@.+]] = global double 3.000000e+00
+// CHECK-DAG: [[GD:@.+]] = global double 4.000000e+00
+// CHECK-DAG: [[FA:@.+]] = internal global float 5.000000e+00
+// CHECK-DAG: [[FB:@.+]] = internal global float 6.000000e+00
+// CHECK-DAG: [[FC:@.+]] = internal global float 7.000000e+00
+// CHECK-DAG: [[FD:@.+]] = internal global float 8.000000e+00
+// CHECK-DAG: [[BA:@.+]] = internal global float 9.000000e+00
+// CHECK-DAG: [[BB:@.+]] = internal global float 1.000000e+01
+// CHECK-DAG: [[BC:@.+]] = internal global float 1.100000e+01
+// CHECK-DAG: [[BD:@.+]] = internal global float 1.200000e+01
+double Ga = 1.0;
+double Gb = 2.0;
+double Gc = 3.0;
+double Gd = 4.0;
+
+// CHECK: define {{.*}} @{{.*}}foo{{.*}}(
+// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
+// CHECK: [[LA:%.+]] = alloca i16
+// CHECK: [[LB:%.+]] = alloca i16
+// CHECK: [[LC:%.+]] = alloca i16
+// CHECK: [[LD:%.+]] = alloca i16
+int foo(short a, short b, short c, short d){
+  static float Sa = 5.0;
+  static float Sb = 6.0;
+  static float Sc = 7.0;
+  static float Sd = 8.0;
+
+  // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LB]],
+  // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
+  // CHECK-DAG:    [[VALFB:%.+]] = load float, float* @_ZZ3foossssE2Sb,
+  // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+  // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LC]],
+  // CHECK-DAG:    [[VALFC:%.+]] = load float, float* @_ZZ3foossssE2Sc,
+  // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LD]],
+  // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd,
+  // CHECK-DAG:    [[VALFD:%.+]] = load float, float* @_ZZ3foossssE2Sd,
+
+  // 3 local vars being captured.
+
+  // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
+  // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
+  // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
+  // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
+  // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
+  // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
+  // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
+  // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
+  // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
+  // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
+  // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
+  // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
+  // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
+  // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // 3 static vars being captured.
+
+  // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
+  // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
+  // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
+  // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
+  // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
+  // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
+  // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
+  // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
+  // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
+  // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
+  // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
+  // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
+  // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
+  // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // 3 static global vars being captured.
+
+  // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
+  // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
+  // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
+  // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
+  // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
+  // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
+  // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
+  // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
+  // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
+  // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
+  // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
+  // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
+  // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+  // CHECK: call i32 @__tgt_target
+  // CHECK: call void [[OFFLOADF:@.+]](
+  // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
+  #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
+  {
+    b += 1;
+    Gb += 1.0;
+    Sb += 1.0;
+
+    // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
+    // The parallel region only uses 3 captures.
+    // CHECK:     call {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
+    // CHECK:     call void @.omp_outlined.(i32* %{{.+}}, i32* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}}, {{.+}}* %{{.+}})
+    // Capture d, Gd, Sd,
+
+    // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}},
+    #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
+    {
+      d += 1;
+      Gd += 1.0;
+      Sd += 1.0;
+    }
+  }
+  return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
+}
+
+// CHECK: define {{.*}} @{{.*}}bar{{.*}}(
+// CHECK-SAME: i16 {{[^,]*}}[[A:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[B:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[C:%[^,]+]],
+// CHECK-SAME: i16 {{[^,]*}}[[D:%[^,]+]])
+// CHECK: [[LA:%.+]] = alloca i16
+// CHECK: [[LB:%.+]] = alloca i16
+// CHECK: [[LC:%.+]] = alloca i16
+// CHECK: [[LD:%.+]] = alloca i16
+int bar(short a, short b, short c, short d){
+  static float Sa = 9.0;
+  static float Sb = 10.0;
+  static float Sc = 11.0;
+  static float Sd = 12.0;
+
+  // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}}), i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}}, i16* %{{.+}})
+  // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, i16* dereferenceable(2) [[A:%.+]], i16* dereferenceable(2) [[B:%.+]], i16* dereferenceable(2) [[C:%.+]], i16* dereferenceable(2) [[D:%.+]])
+  // Capture a, b, c, d
+  // CHECK: [[ALLOCLA:%.+]] = alloca i16
+  // CHECK: [[ALLOCLB:%.+]] = alloca i16
+  // CHECK: [[ALLOCLC:%.+]] = alloca i16
+  // CHECK: [[ALLOCLD:%.+]] = alloca i16
+  // CHECK: [[LLA:%.+]] = load i16*, i16** [[ALLOCLA]],
+  // CHECK: [[LLB:%.+]] = load i16*, i16** [[ALLOCLB]],
+  // CHECK: [[LLC:%.+]] = load i16*, i16** [[ALLOCLC]],
+  // CHECK: [[LLD:%.+]] = load i16*, i16** [[ALLOCLD]],
+  #pragma omp parallel
+  {
+    // CHECK-DAG:    [[VALLB:%.+]] = load i16, i16* [[LLB]],
+    // CHECK-64-DAG: [[VALGB:%.+]] = load double, double* @Gb,
+    // CHECK-DAG:    [[VALFB:%.+]] = load float, float* @_ZZ3barssssE2Sb,
+    // CHECK-64-DAG: [[VALGC:%.+]] = load double, double* @Gc,
+    // CHECK-DAG:    [[VALLC:%.+]] = load i16, i16* [[LLC]],
+    // CHECK-DAG:    [[VALFC:%.+]] = load float, float* @_ZZ3barssssE2Sc,
+    // CHECK-DAG:    [[VALLD:%.+]] = load i16, i16* [[LLD]],
+    // CHECK-64-DAG: [[VALGD:%.+]] = load double, double* @Gd,
+    // CHECK-DAG:    [[VALFD:%.+]] = load float, float* @_ZZ3barssssE2Sd,
+
+    // 3 local vars being captured.
+
+    // CHECK-DAG: store i16 [[VALLB]], i16* [[CONVLB:%.+]],
+    // CHECK-DAG: [[CONVLB]] = bitcast i[[sz:64|32]]* [[CADDRLB:%.+]] to i16*
+    // CHECK-DAG: [[CVALLB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLB]],
+    // CHECK-DAG: [[CPTRLB:%.+]] = inttoptr i[[sz]] [[CVALLB]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLB]], i8** [[GEPLB:%.+]],
+    // CHECK-DAG: [[GEPLB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store i16 [[VALLC]], i16* [[CONVLC:%.+]],
+    // CHECK-DAG: [[CONVLC]] = bitcast i[[sz]]* [[CADDRLC:%.+]] to i16*
+    // CHECK-DAG: [[CVALLC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLC]],
+    // CHECK-DAG: [[CPTRLC:%.+]] = inttoptr i[[sz]] [[CVALLC]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLC]], i8** [[GEPLC:%.+]],
+    // CHECK-DAG: [[GEPLC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store i16 [[VALLD]], i16* [[CONVLD:%.+]],
+    // CHECK-DAG: [[CONVLD]] = bitcast i[[sz]]* [[CADDRLD:%.+]] to i16*
+    // CHECK-DAG: [[CVALLD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRLD]],
+    // CHECK-DAG: [[CPTRLD:%.+]] = inttoptr i[[sz]] [[CVALLD]] to i8*
+    // CHECK-DAG: store i8* [[CPTRLD]], i8** [[GEPLD:%.+]],
+    // CHECK-DAG: [[GEPLD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // 3 static vars being captured.
+
+    // CHECK-DAG: store float [[VALFB]], float* [[CONVFB:%.+]],
+    // CHECK-DAG: [[CONVFB]] = bitcast i[[sz]]* [[CADDRFB:%.+]] to float*
+    // CHECK-DAG: [[CVALFB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFB]],
+    // CHECK-DAG: [[CPTRFB:%.+]] = inttoptr i[[sz]] [[CVALFB]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFB]], i8** [[GEPFB:%.+]],
+    // CHECK-DAG: [[GEPFB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store float [[VALFC]], float* [[CONVFC:%.+]],
+    // CHECK-DAG: [[CONVFC]] = bitcast i[[sz]]* [[CADDRFC:%.+]] to float*
+    // CHECK-DAG: [[CVALFC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFC]],
+    // CHECK-DAG: [[CPTRFC:%.+]] = inttoptr i[[sz]] [[CVALFC]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFC]], i8** [[GEPFC:%.+]],
+    // CHECK-DAG: [[GEPFC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-DAG: store float [[VALFD]], float* [[CONVFD:%.+]],
+    // CHECK-DAG: [[CONVFD]] = bitcast i[[sz]]* [[CADDRFD:%.+]] to float*
+    // CHECK-DAG: [[CVALFD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRFD]],
+    // CHECK-DAG: [[CPTRFD:%.+]] = inttoptr i[[sz]] [[CVALFD]] to i8*
+    // CHECK-DAG: store i8* [[CPTRFD]], i8** [[GEPFD:%.+]],
+    // CHECK-DAG: [[GEPFD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // 3 static global vars being captured.
+
+    // CHECK-64-DAG: store double [[VALGB]], double* [[CONVGB:%.+]],
+    // CHECK-64-DAG: [[CONVGB]] = bitcast i[[sz]]* [[CADDRGB:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGB:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGB]],
+    // CHECK-64-DAG: [[CPTRGB:%.+]] = inttoptr i[[sz]] [[CVALGB]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGB]], i8** [[GEPGB:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gb to i8*), i8** [[GEPGB:%.+]],
+    // CHECK-DAG: [[GEPGB]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-64-DAG: store double [[VALGC]], double* [[CONVGC:%.+]],
+    // CHECK-64-DAG: [[CONVGC]] = bitcast i[[sz]]* [[CADDRGC:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGC:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGC]],
+    // CHECK-64-DAG: [[CPTRGC:%.+]] = inttoptr i[[sz]] [[CVALGC]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGC]], i8** [[GEPGC:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gc to i8*), i8** [[GEPGC:%.+]],
+    // CHECK-DAG: [[GEPGC]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK-64-DAG: store double [[VALGD]], double* [[CONVGD:%.+]],
+    // CHECK-64-DAG: [[CONVGD]] = bitcast i[[sz]]* [[CADDRGD:%.+]] to double*
+    // CHECK-64-DAG: [[CVALGD:%.+]] = load i[[sz]], i[[sz]]* [[CADDRGD]],
+    // CHECK-64-DAG: [[CPTRGD:%.+]] = inttoptr i[[sz]] [[CVALGD]] to i8*
+    // CHECK-64-DAG: store i8* [[CPTRGD]], i8** [[GEPGD:%.+]],
+    // CHECK-32-DAG: store i8* bitcast (double* @Gd to i8*), i8** [[GEPGD:%.+]],
+    // CHECK-DAG: [[GEPGD]] = getelementptr inbounds [9 x i8*], [9 x i8*]* %{{.+}}, i32 0, i32 {{[0-8]}}
+
+    // CHECK: call i32 @__tgt_target
+    // CHECK: call void [[OFFLOADF:@.+]](
+    // Capture b, Gb, Sb, Gc, c, Sc, d, Gd, Sd
+    #pragma omp target if(Ga>0.0 && a>0 && Sa>0.0)
+    {
+      b += 1;
+      Gb += 1.0;
+      Sb += 1.0;
+
+      // CHECK: define internal void [[OFFLOADF]]({{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}}, {{.+}} {{.*}}%{{.+}})
+      // CHECK: call void {{.*}}@__kmpc_fork_call(%ident_t* {{.+}}, i32 {{.+}}, void (i32*, i32*, ...)* bitcast ({{.*}}[[PARF:@.+]] to {{.*}})
+
+      // CHECK: define internal void [[PARF]](i32* noalias %{{.*}}, i32* noalias %{{.*}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}}, {{.+}}* dereferenceable({{.+}}) %{{.+}})
+      // Capture d, Gd, Sd
+      #pragma omp parallel if(Gc>0.0 && c>0 && Sc>0.0)
+      {
+        d += 1;
+        Gd += 1.0;
+        Sd += 1.0;
+      }
+    }
+  }
+  return a + b + c + d + (int)Sa + (int)Sb + (int)Sc + (int)Sd;
+}
+
+#endif
diff --git a/test/OpenMP/target_data_ast_print.cpp b/test/OpenMP/target_data_ast_print.cpp
new file mode 100644
index 000000000000..cdff857e569a
--- /dev/null
+++ b/test/OpenMP/target_data_ast_print.cpp
@@ -0,0 +1,173 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo() {}
+
+template <typename T, int C>
+T tmain(T argc, T *argv) {
+  T i, j, b, c, d, e, x[20];
+
+#pragma omp target data
+  i = argc;
+
+#pragma omp target data if (target data: j > 0)
+  foo();
+
+#pragma omp target data if (b)
+  foo();
+
+#pragma omp target data map(c)
+  foo();
+
+#pragma omp target data map(c) if(b>e)
+  foo();
+
+#pragma omp target data map(x[0:10], c)
+  foo();
+
+#pragma omp target data map(to: c) map(from: d)
+  foo();
+
+#pragma omp target data map(always,alloc: e)
+  foo();
+
+// nesting a target region
+#pragma omp target data map(e)
+{
+  #pragma omp target map(always, alloc: e)
+    foo();
+}
+
+  return 0;
+}
+
+// CHECK: template <typename T = int, int C = 5> int tmain(int argc, int *argv) {
+// CHECK-NEXT: int i, j, b, c, d, e, x[20];
+// CHECK-NEXT: #pragma omp target data
+// CHECK-NEXT: i = argc;
+// CHECK-NEXT: #pragma omp target data if(target data: j > 0)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data if(b)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c) if(b > e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: x[0:10],c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(to: c) map(from: d)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(always,alloc: e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: e)
+// CHECK-NEXT: {
+// CHECK-NEXT: #pragma omp target map(always,alloc: e)
+// CHECK-NEXT: foo();
+// CHECK: template <typename T = char, int C = 1> char tmain(char argc, char *argv) {
+// CHECK-NEXT: char i, j, b, c, d, e, x[20];
+// CHECK-NEXT: #pragma omp target data
+// CHECK-NEXT: i = argc;
+// CHECK-NEXT: #pragma omp target data if(target data: j > 0)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data if(b)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c) if(b > e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: x[0:10],c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(to: c) map(from: d)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(always,alloc: e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: e)
+// CHECK-NEXT: {
+// CHECK-NEXT: #pragma omp target map(always,alloc: e)
+// CHECK-NEXT: foo();
+// CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
+// CHECK-NEXT: T i, j, b, c, d, e, x[20];
+// CHECK-NEXT: #pragma omp target data
+// CHECK-NEXT: i = argc;
+// CHECK-NEXT: #pragma omp target data if(target data: j > 0)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data if(b)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: c) if(b > e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: x[0:10],c)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(to: c) map(from: d)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(always,alloc: e)
+// CHECK-NEXT: foo();
+// CHECK-NEXT: #pragma omp target data map(tofrom: e)
+// CHECK-NEXT: {
+// CHECK-NEXT: #pragma omp target map(always,alloc: e)
+// CHECK-NEXT: foo();
+
+int main (int argc, char **argv) {
+  int b = argc, c, d, e, f, g, x[20];
+  static int a;
+// CHECK: static int a;
+
+#pragma omp target data
+// CHECK:      #pragma omp target data
+  a=2;
+// CHECK-NEXT: a = 2;
+#pragma omp target data if (target data: b)
+// CHECK: #pragma omp target data if(target data: b)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data if (b > g)
+// CHECK: #pragma omp target data if(b > g)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data map(c)
+// CHECK-NEXT: #pragma omp target data map(tofrom: c)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data map(c) if(b>g)
+// CHECK-NEXT: #pragma omp target data map(tofrom: c) if(b > g)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data map(x[0:10], c)
+// CHECK-NEXT: #pragma omp target data map(tofrom: x[0:10],c)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data map(to: c) map(from: d)
+// CHECK-NEXT: #pragma omp target data map(to: c) map(from: d)
+  foo();
+// CHECK-NEXT: foo();
+
+#pragma omp target data map(always,alloc: e)
+// CHECK-NEXT: #pragma omp target data map(always,alloc: e)
+  foo();
+// CHECK-NEXT: foo();
+
+// nesting a target region
+#pragma omp target data map(e)
+// CHECK-NEXT: #pragma omp target data map(tofrom: e)
+{
+// CHECK-NEXT: {
+  #pragma omp target map(always, alloc: e)
+// CHECK-NEXT: #pragma omp target map(always,alloc: e)
+    foo();
+// CHECK-NEXT: foo();
+}
+  return tmain<int, 5>(argc, &argc) + tmain<char, 1>(argv[0][0], argv[0]);
+}
+
+#endif
diff --git a/test/OpenMP/target_data_device_messages.cpp b/test/OpenMP/target_data_device_messages.cpp
new file mode 100644
index 000000000000..9e8e31a28f5b
--- /dev/null
+++ b/test/OpenMP/target_data_device_messages.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+int main(int argc, char **argv) {
+  #pragma omp target data device // expected-error {{expected '(' after 'device'}}
+  #pragma omp target data device ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data device () // expected-error {{expected expression}}
+  #pragma omp target data device (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data device (argc)) // expected-warning {{extra tokens at the end of '#pragma omp target data' are ignored}}
+#pragma omp target data device (argc > 0 ? argv[1] : argv[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  #pragma omp target data device (argc + argc)
+  #pragma omp target data device (argc), device (argc+1) // expected-error {{directive '#pragma omp target data' cannot contain more than one 'device' clause}}
+  #pragma omp target data device (S1) // expected-error {{'S1' does not refer to a value}}
+  #pragma omp target data device (-2) // expected-error {{argument to 'device' clause must be a non-negative integer value}}
+  #pragma omp target device (-10u)
+  #pragma omp target device (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return 0;
+}
diff --git a/test/OpenMP/target_data_if_messages.cpp b/test/OpenMP/target_data_if_messages.cpp
new file mode 100644
index 000000000000..77edefa48b8a
--- /dev/null
+++ b/test/OpenMP/target_data_if_messages.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+int main(int argc, char **argv) {
+  #pragma omp target data if // expected-error {{expected '(' after 'if'}}
+  #pragma omp target data if ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data if () // expected-error {{expected expression}}
+  #pragma omp target data if (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data if (argc)) // expected-warning {{extra tokens at the end of '#pragma omp target data' are ignored}}
+  #pragma omp target data if (argc > 0 ? argv[1] : argv[2])
+  #pragma omp target data if (argc + argc)
+  #pragma omp target data if (foobool(argc)), if (true) // expected-error {{directive '#pragma omp target data' cannot contain more than one 'if' clause}}
+  #pragma omp target data if (S1) // expected-error {{'S1' does not refer to a value}}
+  #pragma omp target data if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data if(target data : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data if(target data : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target data if(target data : argc)
+  #pragma omp target data if(target data : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp target data'}}
+  #pragma omp target data if(target data : argc) if (target data:argc) // expected-error {{directive '#pragma omp target data' cannot contain more than one 'if' clause with 'target data' name modifier}}
+  #pragma omp target data if(target data : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
+  foo();
+
+  return 0;
+}
diff --git a/test/OpenMP/target_data_messages.c b/test/OpenMP/target_data_messages.c
new file mode 100644
index 000000000000..cd60d85a9030
--- /dev/null
+++ b/test/OpenMP/target_data_messages.c
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() { }
+
+int main(int argc, char **argv) {
+  L1:
+    foo();
+  #pragma omp target data
+  {
+    foo();
+    goto L1; // expected-error {{use of undeclared label 'L1'}}
+  }
+  goto L2; // expected-error {{use of undeclared label 'L2'}}
+  #pragma omp target data
+  L2:
+  foo();
+
+  #pragma omp target data(i) // expected-warning {{extra tokens at the end of '#pragma omp target data' are ignored}}
+  {
+    foo();
+  }
+  #pragma omp target unknown // expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+  {
+    foo();
+  }
+  return 0;
+}
diff --git a/test/OpenMP/target_device_messages.cpp b/test/OpenMP/target_device_messages.cpp
new file mode 100644
index 000000000000..fb0f2defa42b
--- /dev/null
+++ b/test/OpenMP/target_device_messages.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -verify -fopenmp -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+int main(int argc, char **argv) {
+  #pragma omp target device // expected-error {{expected '(' after 'device'}}
+  #pragma omp target device ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target device () // expected-error {{expected expression}}
+  #pragma omp target device (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target device (argc)) // expected-warning {{extra tokens at the end of '#pragma omp target' are ignored}}
+#pragma omp target device (argc > 0 ? argv[1] : argv[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  #pragma omp target device (argc + argc)
+  #pragma omp target device (argc), device (argc+1) // expected-error {{directive '#pragma omp target' cannot contain more than one 'device' clause}}
+  #pragma omp target device (S1) // expected-error {{'S1' does not refer to a value}}
+  #pragma omp target device (-2) // expected-error {{argument to 'device' clause must be a non-negative integer value}}
+  #pragma omp target device (-10u)
+  #pragma omp target device (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return 0;
+}
diff --git a/test/OpenMP/target_if_messages.cpp b/test/OpenMP/target_if_messages.cpp
index 5193b64ff720..4ee7302282f6 100644
--- a/test/OpenMP/target_if_messages.cpp
+++ b/test/OpenMP/target_if_messages.cpp
@@ -22,6 +22,12 @@ int tmain(T argc, S **argv) {
   #pragma omp target if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp target if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp target if(argc)
+  #pragma omp target if(target : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target if(target : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target if(target : argc)
+  #pragma omp target if(target : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp target'}}
+  #pragma omp target if(target : argc) if (target:argc) // expected-error {{directive '#pragma omp target' cannot contain more than one 'if' clause with 'target' name modifier}}
+  #pragma omp target if(target : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return 0;
@@ -40,6 +46,12 @@ int main(int argc, char **argv) {
   #pragma omp target if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp target if (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp target if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target if(target : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target if(target : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp target if(target : argc)
+  #pragma omp target if(target : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp target'}}
+  #pragma omp target if(target : argc) if (target:argc) // expected-error {{directive '#pragma omp target' cannot contain more than one 'if' clause with 'target' name modifier}}
+  #pragma omp target if(target : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return tmain(argc, argv);
diff --git a/test/OpenMP/target_map_codegen.cpp b/test/OpenMP/target_map_codegen.cpp
new file mode 100644
index 000000000000..2b24c8296406
--- /dev/null
+++ b/test/OpenMP/target_map_codegen.cpp
@@ -0,0 +1,1015 @@
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+///
+/// Implicit maps.
+///
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-64
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+// RUN: %clang_cc1 -DCK1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK1 --check-prefix CK1-32
+#ifdef CK1
+
+// CK1-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK1-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK1-LABEL: implicit_maps_integer
+void implicit_maps_integer (int a){
+  int i = a;
+
+  // CK1-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK1-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK1-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK1-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK1-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK1-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK1-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK1-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK1: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++i;
+  }
+}
+
+// CK1: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK1: [[ADDR:%.+]] = alloca i[[sz]],
+// CK1: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK1-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK1-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK1-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-64
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+// RUN: %clang_cc1 -DCK2 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK2 --check-prefix CK2-32
+#ifdef CK2
+
+// CK2-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK2-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK2-LABEL: implicit_maps_integer_reference
+void implicit_maps_integer_reference (int a){
+  int &i = a;
+  // CK2-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK2-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK2-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK2-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK2-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK2-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK2-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK2-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK2-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK2: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++i;
+  }
+}
+
+// CK2: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK2: [[ADDR:%.+]] = alloca i[[sz]],
+// CK2: [[REF:%.+]] = alloca i32*,
+// CK2: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK2-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+// CK2-64: store i32* [[CADDR]], i32** [[REF]],
+// CK2-64: [[RVAL:%.+]] = load i32*, i32** [[REF]],
+// CK2-64: {{.+}} = load i32, i32* [[RVAL]],
+// CK2-32: store i32* [[ADDR]], i32** [[REF]],
+// CK2-32: [[RVAL:%.+]] = load i32*, i32** [[REF]],
+// CK2-32: {{.+}} = load i32, i32* [[RVAL]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-64
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+// RUN: %clang_cc1 -DCK3 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK3 --check-prefix CK3-32
+#ifdef CK3
+
+// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK3-LABEL: implicit_maps_parameter
+void implicit_maps_parameter (int a){
+
+  // CK3-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK3-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK3-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK3-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK3-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK3-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK3-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK3-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK3: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+   ++a;
+  }
+}
+
+// CK3: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK3: [[ADDR:%.+]] = alloca i[[sz]],
+// CK3: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK3-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK3-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK3-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-64
+// RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+// RUN: %clang_cc1 -DCK4 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK4 --check-prefix CK4-32
+#ifdef CK4
+
+// CK4-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK4-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK4-LABEL: implicit_maps_nested_integer
+void implicit_maps_nested_integer (int a){
+  int i = a;
+
+  // The captures in parallel are by reference. Only the capture in target is by
+  // copy.
+
+  // CK4: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP1:@.+]] to void (i32*, i32*, ...)*), i32* {{.+}})
+  // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
+  #pragma omp parallel
+  {
+    // CK4-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+    // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+    // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+    // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+    // CK4-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+    // CK4-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+    // CK4-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+    // CK4-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+    // CK4-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+    // CK4-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+    // CK4-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+    // CK4-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+    // CK4: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+    #pragma omp target
+    {
+      #pragma omp parallel
+      {
+        ++i;
+      }
+    }
+  }
+}
+
+// CK4: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK4: [[ADDR:%.+]] = alloca i[[sz]],
+// CK4: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK4-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK4-64: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[CADDR]])
+// CK4-32: call void {{.+}}@__kmpc_fork_call({{.+}} [[KERNELP2:@.+]] to void (i32*, i32*, ...)*), i32* [[ADDR]])
+// CK4: define internal void [[KERNELP2]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-64
+// RUN: %clang_cc1 -DCK5 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+// RUN: %clang_cc1 -DCK5 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK5 --check-prefix CK5-32
+#ifdef CK5
+
+// CK5-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK5-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK5-LABEL: implicit_maps_nested_integer_and_enum
+void implicit_maps_nested_integer_and_enum (int a){
+  enum Bla {
+    SomeEnum = 0x09
+  };
+
+  // Using an enum should not change the mapping information.
+  int  i = a;
+
+  // CK5-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK5-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK5-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK5-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK5-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK5-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK5-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK5-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK5-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK5: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    ++i;
+    i += SomeEnum;
+  }
+}
+
+// CK5: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK5: [[ADDR:%.+]] = alloca i[[sz]],
+// CK5: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK5-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK5-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK5-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-64
+// RUN: %clang_cc1 -DCK6 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+// RUN: %clang_cc1 -DCK6 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK6 --check-prefix CK6-32
+#ifdef CK6
+// CK6-DAG: [[GBL:@Gi]] = global i32 0
+// CK6-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK6-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK6-LABEL: implicit_maps_host_global
+int Gi;
+void implicit_maps_host_global (int a){
+  // CK6-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK6-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK6-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK6-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK6-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK6-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK6-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK6-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK6-64-DAG: store i32 [[GBLVAL:%.+]], i32* [[CADDR]],
+  // CK6-64-DAG: [[GBLVAL]] = load i32, i32* [[GBL]],
+  // CK6-32-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[GBLVAL:%.+]],
+
+  // CK6: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    ++Gi;
+  }
+}
+
+// CK6: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK6: [[ADDR:%.+]] = alloca i[[sz]],
+// CK6: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK6-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK6-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK6-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7  --check-prefix CK7-32
+#ifdef CK7
+
+// For a 32-bit targets, the value doesn't fit the size of the pointer,
+// therefore it is passed by reference with a map 'to' specification.
+
+// CK7-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK7-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+// Map types: OMP_MAP_TO = 1
+// CK7-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK7-LABEL: implicit_maps_double
+void implicit_maps_double (int a){
+  double d = (double)a;
+
+  // CK7-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK7-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+
+  // CK7-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK7-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK7-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK7-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK7-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK7-64-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to double*
+  // CK7-64-64-DAG: store double {{.+}}, double* [[CADDR]],
+
+  // CK7-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK7-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK7-32-DAG: [[VALBP]] = bitcast double* [[DECL:%.+]] to i8*
+  // CK7-32-DAG: [[VALP]] = bitcast double* [[DECL]] to i8*
+
+  // CK7-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  // CK7-32: call void [[KERNEL:@.+]](double* [[DECL]])
+  #pragma omp target
+  {
+    d += 1.0;
+  }
+}
+
+// CK7-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK7-64: [[ADDR:%.+]] = alloca i[[sz]],
+// CK7-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK7-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to double*
+// CK7-64: {{.+}} = load double, double* [[CADDR]],
+
+// CK7-32: define internal void [[KERNEL]](double* {{.+}}[[ARG:%.+]])
+// CK7-32: [[ADDR:%.+]] = alloca double*,
+// CK7-32: store double* [[ARG]], double** [[ADDR]],
+// CK7-32: [[REF:%.+]] = load double*, double** [[ADDR]],
+// CK7-32: {{.+}} = load double, double* [[REF]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK8
+// RUN: %clang_cc1 -DCK8 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK8
+#ifdef CK8
+
+// CK8-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK8-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+// CK8-LABEL: implicit_maps_float
+void implicit_maps_float (int a){
+  float f = (float)a;
+
+  // CK8-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK8-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK8-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK8-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK8-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK8-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK8-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK8-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float*
+  // CK8-DAG: store float {{.+}}, float* [[CADDR]],
+
+  // CK8: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  #pragma omp target
+  {
+    f += 1.0;
+  }
+}
+
+// CK8: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK8: [[ADDR:%.+]] = alloca i[[sz]],
+// CK8: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK8: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to float*
+// CK8: {{.+}} = load float, float* [[CADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK9
+// RUN: %clang_cc1 -DCK9 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK9
+#ifdef CK9
+
+// CK9-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
+// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK9-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3]
+
+// CK9-LABEL: implicit_maps_array
+void implicit_maps_array (int a){
+  double darr[2] = {(double)a, (double)a};
+
+  // CK9-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK9-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK9-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK9-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK9-DAG: [[VALBP]] = bitcast [2 x double]* [[DECL:%.+]] to i8*
+  // CK9-DAG: [[VALP]] = bitcast [2 x double]* [[DECL]] to i8*
+
+  // CK9: call void [[KERNEL:@.+]]([2 x double]* [[DECL]])
+  #pragma omp target
+  {
+    darr[0] += 1.0;
+    darr[1] += 1.0;
+  }
+}
+
+// CK9: define internal void [[KERNEL]]([2 x double]* {{.+}}[[ARG:%.+]])
+// CK9: [[ADDR:%.+]] = alloca [2 x double]*,
+// CK9: store [2 x double]* [[ARG]], [2 x double]** [[ADDR]],
+// CK9: [[REF:%.+]] = load [2 x double]*, [2 x double]** [[ADDR]],
+// CK9: {{.+}} = getelementptr inbounds [2 x double], [2 x double]* [[REF]], i[[sz]] 0, i[[sz]] 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK10
+// RUN: %clang_cc1 -DCK10 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK10
+#ifdef CK10
+
+// CK10-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}]
+// Map types: OMP_MAP_BYCOPY | OMP_MAP_PTR = 128 + 32
+// CK10-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 160]
+
+// CK10-LABEL: implicit_maps_pointer
+void implicit_maps_pointer (){
+  double *ddyn;
+
+  // CK10-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK10-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK10-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK10-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK10-DAG: [[VALBP]] = bitcast double* [[PTR:%.+]] to i8*
+  // CK10-DAG: [[VALP]] = bitcast double* [[PTR]] to i8*
+
+  // CK10: call void [[KERNEL:@.+]](double* [[PTR]])
+  #pragma omp target
+  {
+    ddyn[0] += 1.0;
+    ddyn[1] += 1.0;
+  }
+}
+
+// CK10: define internal void [[KERNEL]](double* {{.*}}[[ARG:%.+]])
+// CK10: [[ADDR:%.+]] = alloca double*,
+// CK10: store double* [[ARG]], double** [[ADDR]],
+// CK10: [[REF:%.+]] = load double*, double** [[ADDR]],
+// CK10: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK11
+// RUN: %clang_cc1 -DCK11 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK11
+#ifdef CK11
+
+// CK11-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 16]
+// Map types: OMP_MAP_TO = 1
+// CK11-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK11-LABEL: implicit_maps_double_complex
+void implicit_maps_double_complex (int a){
+  double _Complex dc = (double)a;
+
+  // CK11-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK11-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK11-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK11-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK11-DAG: [[VALBP]] = bitcast { double, double }* [[PTR:%.+]] to i8*
+  // CK11-DAG: [[VALP]] = bitcast { double, double }* [[PTR]] to i8*
+
+  // CK11: call void [[KERNEL:@.+]]({ double, double }* [[PTR]])
+  #pragma omp target
+  {
+   dc *= dc;
+  }
+}
+
+// CK11: define internal void [[KERNEL]]({ double, double }* {{.*}}[[ARG:%.+]])
+// CK11: [[ADDR:%.+]] = alloca { double, double }*,
+// CK11: store { double, double }* [[ARG]], { double, double }** [[ADDR]],
+// CK11: [[REF:%.+]] = load { double, double }*, { double, double }** [[ADDR]],
+// CK11: {{.+}} = getelementptr inbounds { double, double }, { double, double }* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-64
+// RUN: %clang_cc1 -DCK12 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+// RUN: %clang_cc1 -DCK12 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK12 --check-prefix CK12-32
+#ifdef CK12
+
+// For a 32-bit targets, the value doesn't fit the size of the pointer,
+// therefore it is passed by reference with a map 'to' specification.
+
+// CK12-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 8]
+// Map types: OMP_MAP_BYCOPY = 128
+// CK12-64-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+// Map types: OMP_MAP_TO = 1
+// CK12-32-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 1]
+
+// CK12-LABEL: implicit_maps_float_complex
+void implicit_maps_float_complex (int a){
+  float _Complex fc = (float)a;
+
+  // CK12-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK12-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+
+  // CK12-64-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK12-64-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK12-64-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK12-64-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK12-64-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK12-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }*
+  // CK12-64-DAG: store { float, float } {{.+}}, { float, float }* [[CADDR]],
+
+  // CK12-32-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK12-32-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK12-32-DAG: [[VALBP]] = bitcast { float, float }* [[DECL:%.+]] to i8*
+  // CK12-32-DAG: [[VALP]] = bitcast { float, float }* [[DECL]] to i8*
+
+  // CK12-64: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  // CK12-32: call void [[KERNEL:@.+]]({ float, float }* [[DECL]])
+  #pragma omp target
+  {
+    fc *= fc;
+  }
+}
+
+// CK12-64: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK12-64: [[ADDR:%.+]] = alloca i[[sz]],
+// CK12-64: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK12-64: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to { float, float }*
+// CK12-64: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[CADDR]], i32 0, i32 0
+
+// CK12-32: define internal void [[KERNEL]]({ float, float }* {{.+}}[[ARG:%.+]])
+// CK12-32: [[ADDR:%.+]] = alloca { float, float }*,
+// CK12-32: store { float, float }* [[ARG]], { float, float }** [[ADDR]],
+// CK12-32: [[REF:%.+]] = load { float, float }*, { float, float }** [[ADDR]],
+// CK12-32: {{.+}} = getelementptr inbounds { float, float }, { float, float }* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK13
+// RUN: %clang_cc1 -DCK13 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK13
+#ifdef CK13
+
+// We don't have a constant map size for VLAs.
+// Map types:
+//  - OMP_MAP_BYCOPY = 128 (vla size)
+//  - OMP_MAP_BYCOPY = 128 (vla size)
+//  - OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK13-DAG: [[TYPES:@.+]] = {{.+}}constant [3 x i32] [i32 128, i32 128, i32 3]
+
+// CK13-LABEL: implicit_maps_variable_length_array
+void implicit_maps_variable_length_array (int a){
+  double vla[2][a];
+
+  // CK13-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i[[sz:64|32]]* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
+
+  // CK13-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK13-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK13-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 0
+  // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[BP0]],
+  // CK13-DAG: store i8* inttoptr (i[[sz]] 2 to i8*), i8** [[P0]],
+  // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S0]],
+
+  // CK13-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK13-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK13-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 1
+  // CK13-DAG: store i8* [[VALBP1:%.+]], i8** [[BP1]],
+  // CK13-DAG: store i8* [[VALP1:%.+]], i8** [[P1]],
+  // CK13-DAG: [[VALBP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK13-DAG: [[VALP1]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK13-DAG: store i[[sz]] {{8|4}}, i[[sz]]* [[S1]],
+
+  // CK13-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
+  // CK13-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
+  // CK13-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[SS]], i32 0, i32 2
+  // CK13-DAG: store i8* [[VALBP2:%.+]], i8** [[BP2]],
+  // CK13-DAG: store i8* [[VALP2:%.+]], i8** [[P2]],
+  // CK13-DAG: store i[[sz]] [[VALS2:%.+]], i[[sz]]* [[S2]],
+  // CK13-DAG: [[VALBP2]] = bitcast double* [[DECL:%.+]] to i8*
+  // CK13-DAG: [[VALP2]] = bitcast double* [[DECL]] to i8*
+  // CK13-DAG: [[VALS2]] = mul nuw i[[sz]] %{{.+}}, 8
+
+  // CK13: call void [[KERNEL:@.+]](i[[sz]] {{.+}}, i[[sz]] {{.+}}, double* [[DECL]])
+  #pragma omp target
+  {
+    vla[1][3] += 1.0;
+  }
+}
+
+// CK13: define internal void [[KERNEL]](i[[sz]] [[VLA0:%.+]], i[[sz]] [[VLA1:%.+]], double* {{.+}}[[ARG:%.+]])
+// CK13: [[ADDR0:%.+]] = alloca i[[sz]],
+// CK13: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK13: [[ADDR2:%.+]] = alloca double*,
+// CK13: store i[[sz]] [[VLA0]], i[[sz]]* [[ADDR0]],
+// CK13: store i[[sz]] [[VLA1]], i[[sz]]* [[ADDR1]],
+// CK13: store double* [[ARG]], double** [[ADDR2]],
+// CK13: {{.+}} = load i[[sz]],  i[[sz]]* [[ADDR0]],
+// CK13: {{.+}} = load i[[sz]],  i[[sz]]* [[ADDR1]],
+// CK13: [[REF:%.+]] = load double*, double** [[ADDR2]],
+// CK13: {{.+}} = getelementptr inbounds double, double* [[REF]], i[[sz]] %{{.+}}
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-64
+// RUN: %clang_cc1 -DCK14 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+// RUN: %clang_cc1 -DCK14 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK14 --check-prefix CK14-32
+#ifdef CK14
+
+// CK14-DAG: [[ST:%.+]] = type { i32, double }
+// CK14-DAG: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK14-DAG: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+class SSS {
+public:
+  int a;
+  double b;
+
+  void foo(int c) {
+    #pragma omp target
+    {
+      a += c;
+      b += (double)c;
+    }
+  }
+
+  SSS(int a, double b) : a(a), b(b) {}
+};
+
+// CK14-LABEL: implicit_maps_class
+void implicit_maps_class (int a){
+  SSS sss(a, (double)a);
+
+  // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK14-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK14-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK14-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK14-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK14-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK14-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK14-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK14-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK14-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK14-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK14-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK14-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK14-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK14-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK14-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK14-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK14: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  sss.foo(123);
+}
+
+// CK14: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK14: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK14: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK14: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK14: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK14: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK14-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK14-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK14-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK14: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-64
+// RUN: %clang_cc1 -DCK15 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+// RUN: %clang_cc1 -DCK15 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK15 --check-prefix CK15-32
+#ifdef CK15
+
+// CK15: [[ST:%.+]] = type { i32, double, i32* }
+// CK15: [[SIZES:@.+]] = {{.+}}constant [2 x i[[sz:64|32]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK15: [[TYPES:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+// CK15: [[SIZES2:@.+]] = {{.+}}constant [2 x i[[sz]]] [i{{64|32}} {{24|16}}, i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_TO | OMP_MAP_FROM = 1 + 2
+// - OMP_MAP_BYCOPY = 128
+// CK15: [[TYPES2:@.+]] = {{.+}}constant [2 x i32] [i32 3, i32 128]
+
+template<int x>
+class SSST {
+public:
+  int a;
+  double b;
+  int &r;
+
+  void foo(int c) {
+    #pragma omp target
+    {
+      a += c + x;
+      b += (double)(c + x);
+      r += x;
+    }
+  }
+  template<int y>
+  void bar(int c) {
+    #pragma omp target
+    {
+      a += c + x + y;
+      b += (double)(c + x + y);
+      r += x + y;
+    }
+  }
+
+  SSST(int a, double b, int &r) : a(a), b(b), r(r) {}
+};
+
+// CK15-LABEL: implicit_maps_templated_class
+void implicit_maps_templated_class (int a){
+  SSST<123> ssst(a, (double)a, a);
+
+  // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK15: call void [[KERNEL:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  ssst.foo(456);
+
+  // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
+  // CK15-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK15-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK15-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK15-DAG: store i8* [[VALBP0:%.+]], i8** [[BP0]],
+  // CK15-DAG: store i8* [[VALP0:%.+]], i8** [[P0]],
+  // CK15-DAG: [[VALBP0]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK15-DAG: [[VALP0]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK15-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK15-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK15-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK15-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK15-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK15-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK15-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK15-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK15: call void [[KERNEL2:@.+]]([[ST]]* [[DECL]], i[[sz]] {{.+}})
+  ssst.bar<210>(789);
+}
+
+// CK15: define internal void [[KERNEL]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK15: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK15: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK15-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK15-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+// CK15: define internal void [[KERNEL2]]([[ST]]* [[THIS:%.+]], i[[sz]] [[ARG:%.+]])
+// CK15: [[ADDR0:%.+]] = alloca [[ST]]*,
+// CK15: [[ADDR1:%.+]] = alloca i[[sz]],
+// CK15: store [[ST]]* [[THIS]], [[ST]]** [[ADDR0]],
+// CK15: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR1]],
+// CK15: [[REF0:%.+]] = load [[ST]]*, [[ST]]** [[ADDR0]],
+// CK15-64: [[CADDR1:%.+]] = bitcast i[[sz]]* [[ADDR1]] to i32*
+// CK15-64: {{.+}} = load i32,  i32* [[CADDR1]],
+// CK15-32: {{.+}} = load i32, i32* [[ADDR1]],
+// CK15: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF0]], i32 0, i32 0
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-64
+// RUN: %clang_cc1 -DCK16 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+// RUN: %clang_cc1 -DCK16 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK16 --check-prefix CK16-32
+#ifdef CK16
+
+// CK16-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_BYCOPY = 128
+// CK16-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+template<int y>
+int foo(int d) {
+  int res = d;
+  #pragma omp target
+  {
+    res += y;
+  }
+  return res;
+}
+// CK16-LABEL: implicit_maps_templated_function
+void implicit_maps_templated_function (int a){
+  int i = a;
+
+  // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
+  // CK16-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK16-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK16-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK16-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK16-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK16-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK16-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK16-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK16-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK16-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK16: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  i = foo<543>(i);
+}
+// CK16: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK16: [[ADDR:%.+]] = alloca i[[sz]],
+// CK16: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK16-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK16-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK16-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK17
+// RUN: %clang_cc1 -DCK17 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK17
+#ifdef CK17
+
+// CK17-DAG: [[ST:%.+]] = type { i32, double }
+// CK17-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{16|12}}]
+// Map types: OMP_MAP_TO + OMP_MAP_FROM = 2 + 1
+// CK17-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 3]
+
+class SSS {
+public:
+  int a;
+  double b;
+};
+
+// CK17-LABEL: implicit_maps_struct
+void implicit_maps_struct (int a){
+  SSS s = {a, (double)a};
+
+  // CK17-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK17-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK17-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK17-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK17-DAG: [[VALBP]] = bitcast [[ST]]* [[DECL:%.+]] to i8*
+  // CK17-DAG: [[VALP]] = bitcast [[ST]]* [[DECL]] to i8*
+
+  // CK17: call void [[KERNEL:@.+]]([[ST]]* [[DECL]])
+  #pragma omp target
+  {
+    s.a += 1;
+    s.b += 1.0;
+  }
+}
+
+// CK17: define internal void [[KERNEL]]([[ST]]* {{.+}}[[ARG:%.+]])
+// CK17: [[ADDR:%.+]] = alloca [[ST]]*,
+// CK17: store [[ST]]* [[ARG]], [[ST]]** [[ADDR]],
+// CK17: [[REF:%.+]] = load [[ST]]*, [[ST]]** [[ADDR]],
+// CK17: {{.+}} = getelementptr inbounds [[ST]], [[ST]]* [[REF]], i32 0, i32 0
+#endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-64
+// RUN: %clang_cc1 -DCK18 -verify -fopenmp -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+// RUN: %clang_cc1 -DCK18 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK18 --check-prefix CK18-32
+#ifdef CK18
+
+// CK18-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} 4]
+// Map types:
+// - OMP_MAP_BYCOPY = 128
+// CK18-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i32] [i32 128]
+
+template<typename T>
+int foo(T d) {
+  #pragma omp target
+  {
+    d += (T)1;
+  }
+  return d;
+}
+// CK18-LABEL: implicit_maps_template_type_capture
+void implicit_maps_template_type_capture (int a){
+  int i = a;
+
+  // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
+  // CK18-DAG: call i32 @__tgt_target(i32 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+
+  // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK18-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK18-DAG: store i8* [[VALBP:%.+]], i8** [[BP1]],
+  // CK18-DAG: store i8* [[VALP:%.+]], i8** [[P1]],
+  // CK18-DAG: [[VALBP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK18-DAG: [[VALP]] = inttoptr i[[sz]] [[VAL:%.+]] to i8*
+  // CK18-DAG: [[VAL]] = load i[[sz]], i[[sz]]* [[ADDR:%.+]],
+  // CK18-64-DAG: [[CADDR:%.+]] = bitcast i[[sz]]* [[ADDR]] to i32*
+  // CK18-64-DAG: store i32 {{.+}}, i32* [[CADDR]],
+
+  // CK18: call void [[KERNEL:@.+]](i[[sz]] [[VAL]])
+  i = foo(i);
+}
+// CK18: define internal void [[KERNEL]](i[[sz]] [[ARG:%.+]])
+// CK18: [[ADDR:%.+]] = alloca i[[sz]],
+// CK18: store i[[sz]] [[ARG]], i[[sz]]* [[ADDR]],
+// CK18-64: [[CADDR:%.+]] = bitcast i64* [[ADDR]] to i32*
+// CK18-64: {{.+}} = load i32, i32* [[CADDR]],
+// CK18-32: {{.+}} = load i32, i32* [[ADDR]],
+
+#endif
+#endif
diff --git a/test/OpenMP/target_map_messages.cpp b/test/OpenMP/target_map_messages.cpp
new file mode 100644
index 000000000000..d61e766c6b4b
--- /dev/null
+++ b/test/OpenMP/target_map_messages.cpp
@@ -0,0 +1,207 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+  S2(S2 &s2):a(s2.a) { }
+  static float S2s; // expected-note 4 {{mappable type cannot contain static members}}
+  static const float S2sc; // expected-note 4 {{mappable type cannot contain static members}}
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+  S3(S3 &s3):a(s3.a) { }
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4);
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+  S5(const S5 &s5):a(s5.a) { }
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+typedef int from;
+
+template <typename T, int I> // expected-note {{declared here}}
+T tmain(T argc) {
+  const T d = 5;
+  const T da[5] = { 0 };
+  S4 e(4);
+  S5 g(5);
+  T i, t[20];
+  T &j = i;
+  T *k = &j;
+  T x;
+  T y;
+  T to, tofrom, always;
+  const T (&l)[5] = da;
+
+
+#pragma omp target map // expected-error {{expected '(' after 'map'}}
+#pragma omp target map( // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}}
+#pragma omp target map() // expected-error {{expected expression}}
+#pragma omp target map(alloc) // expected-error {{use of undeclared identifier 'alloc'}}
+#pragma omp target map(to argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected ',' or ')' in 'map' clause}}
+#pragma omp target map(to:) // expected-error {{expected expression}}
+#pragma omp target map(from: argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target map(x: y) // expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+#pragma omp target map(x)
+  foo();
+#pragma omp target map(tofrom: t[:I])
+  foo();
+#pragma omp target map(T: a) // expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+  foo();
+#pragma omp target map(T) // expected-error {{'T' does not refer to a value}}
+  foo();
+#pragma omp target map(I) // expected-error 2 {{expected variable name, array element or array section}}
+  foo();
+#pragma omp target map(S2::S2s)
+  foo();
+#pragma omp target map(S2::S2sc)
+  foo();
+#pragma omp target map(x)
+  foo();
+#pragma omp target map(to: x)
+  foo();
+#pragma omp target map(to: to)
+  foo();
+#pragma omp target map(to)
+  foo();
+#pragma omp target map(to, x)
+  foo();
+#pragma omp target map(to x) // expected-error {{expected ',' or ')' in 'map' clause}}
+#pragma omp target map(tofrom: argc > 0 ? x : y) // expected-error 2 {{expected variable name, array element or array section}} 
+#pragma omp target map(argc)
+#pragma omp target map(S1) // expected-error {{'S1' does not refer to a value}}
+#pragma omp target map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}}
+#pragma omp target map(ba) // expected-error 2 {{type 'S2' is not mappable to target}}
+#pragma omp target map(ca)
+#pragma omp target map(da)
+#pragma omp target map(S2::S2s)
+#pragma omp target map(S2::S2sc)
+#pragma omp target map(e, g)
+#pragma omp target map(h) // expected-error {{threadprivate variables are not allowed in map clause}}
+#pragma omp target map(k), map(k) // expected-error 2 {{variable already marked as mapped in current construct}} expected-note 2 {{used here}}
+#pragma omp target map(k), map(k[:5]) // expected-error 2 {{variable already marked as mapped in current construct}} expected-note 2 {{used here}}
+  foo();
+#pragma omp target map(da)
+#pragma omp target map(da[:4])
+  foo();
+#pragma omp target map(k, j, l) // expected-note 4 {{used here}}
+#pragma omp target map(k[:4]) // expected-error 2 {{variable already marked as mapped in current construct}}
+#pragma omp target map(j)
+#pragma omp target map(l[:5]) // expected-error 2 {{variable already marked as mapped in current construct}}
+  foo();
+#pragma omp target map(k[:4], j, l[:5]) // expected-note 4 {{used here}}
+#pragma omp target map(k) // expected-error 2 {{variable already marked as mapped in current construct}}
+#pragma omp target map(j)
+#pragma omp target map(l) // expected-error 2 {{variable already marked as mapped in current construct}}
+  foo();
+
+#pragma omp target map(always, tofrom: x)
+#pragma omp target map(always: x) // expected-error {{missing map type}}
+#pragma omp target map(tofrom, always: x) // expected-error {{incorrect map type modifier, expected 'always'}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+#pragma omp target map(always, tofrom: always, tofrom, x)
+#pragma omp target map(tofrom j) // expected-error {{expected ',' or ')' in 'map' clause}}
+  foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = { 0 };
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+  int *k = &j;
+  int x;
+  int y;
+  int to, tofrom, always;
+  const int (&l)[5] = da;
+#pragma omp target map // expected-error {{expected '(' after 'map'}}
+#pragma omp target map( // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected expression}}
+#pragma omp target map() // expected-error {{expected expression}}
+#pragma omp target map(alloc) // expected-error {{use of undeclared identifier 'alloc'}}
+#pragma omp target map(to argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{expected ',' or ')' in 'map' clause}}
+#pragma omp target map(to:) // expected-error {{expected expression}}
+#pragma omp target map(from: argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target map(x: y) // expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+#pragma omp target map(x)
+  foo();
+#pragma omp target map(to: x)
+  foo();
+#pragma omp target map(to: to)
+  foo();
+#pragma omp target map(to)
+  foo();
+#pragma omp target map(to, x)
+  foo();
+#pragma omp target map(to x) // expected-error {{expected ',' or ')' in 'map' clause}}
+#pragma omp target map(tofrom: argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
+#pragma omp target map(argc)
+#pragma omp target map(S1) // expected-error {{'S1' does not refer to a value}}
+#pragma omp target map(a, b, c, d, f) // expected-error {{incomplete type 'S1' where a complete type is required}} expected-error 2 {{type 'S2' is not mappable to target}}
+#pragma omp target map(argv[1])
+#pragma omp target map(ba) // expected-error 2 {{type 'S2' is not mappable to target}}
+#pragma omp target map(ca)
+#pragma omp target map(da)
+#pragma omp target map(S2::S2s)
+#pragma omp target map(S2::S2sc)
+#pragma omp target map(e, g)
+#pragma omp target map(h) // expected-error {{threadprivate variables are not allowed in map clause}}
+#pragma omp target map(k), map(k) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}}
+#pragma omp target map(k), map(k[:5]) // expected-error {{variable already marked as mapped in current construct}} expected-note {{used here}}
+  foo();
+#pragma omp target map(da)
+#pragma omp target map(da[:4])
+  foo();
+#pragma omp target map(k, j, l) // expected-note 2 {{used here}}
+#pragma omp target map(k[:4]) // expected-error {{variable already marked as mapped in current construct}}
+#pragma omp target map(j)
+#pragma omp target map(l[:5]) // expected-error {{variable already marked as mapped in current construct}}
+  foo();
+#pragma omp target map(k[:4], j, l[:5]) // expected-note 2 {{used here}}
+#pragma omp target map(k) // expected-error {{variable already marked as mapped in current construct}}
+#pragma omp target map(j)
+#pragma omp target map(l) // expected-error {{variable already marked as mapped in current construct}}
+  foo();
+
+#pragma omp target map(always, tofrom: x)
+#pragma omp target map(always: x) // expected-error {{missing map type}}
+#pragma omp target map(tofrom, always: x) // expected-error {{incorrect map type modifier, expected 'always'}} expected-error {{incorrect map type, expected one of 'to', 'from', 'tofrom', 'alloc', 'release', or 'delete'}}
+#pragma omp target map(always, tofrom: always, tofrom, x)
+#pragma omp target map(tofrom j) // expected-error {{expected ',' or ')' in 'map' clause}}
+  foo();
+
+  return tmain<int, 3>(argc)+tmain<from, 4>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 3>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<int, 4>' requested here}}
+}
+
diff --git a/test/OpenMP/task_ast_print.cpp b/test/OpenMP/task_ast_print.cpp
index 5fd41038ad29..723139b08183 100644
--- a/test/OpenMP/task_ast_print.cpp
+++ b/test/OpenMP/task_ast_print.cpp
@@ -33,11 +33,12 @@ T tmain(T argc, T *argv) {
   T b = argc, c, d, e, f, g;
   static T a;
   S<T> s;
-#pragma omp task untied depend(in : argc)
+  T arr[argc];
+#pragma omp task untied depend(in : argc, argv[b:argc], arr[:]) if (task : argc > 0)
   a = 2;
-#pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0)
+#pragma omp task default(none), private(argc, b) firstprivate(argv) shared(d) if (argc > 0) final(S<T>::TS > 0) priority(argc)
   foo();
-#pragma omp task if (C) mergeable
+#pragma omp task if (C) mergeable priority(C)
   foo();
   return 0;
 }
@@ -46,31 +47,34 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: int b = argc, c, d, e, f, g;
 // CHECK-NEXT: static int a;
 // CHECK-NEXT: S<int> s;
-// CHECK-NEXT: #pragma omp task untied depend(in : argc)
+// CHECK-NEXT: int arr[argc];
+// CHECK-NEXT: #pragma omp task untied depend(in : argc,argv[b:argc],arr[:]) if(task: argc > 0)
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0)
+// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<int>::TS > 0) priority(argc)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp task if(5) mergeable
+// CHECK-NEXT: #pragma omp task if(5) mergeable priority(5)
 // CHECK-NEXT: foo()
 // CHECK: template <typename T = long, int C = 1> long tmain(long argc, long *argv) {
 // CHECK-NEXT: long b = argc, c, d, e, f, g;
 // CHECK-NEXT: static long a;
 // CHECK-NEXT: S<long> s;
-// CHECK-NEXT: #pragma omp task untied depend(in : argc)
+// CHECK-NEXT: long arr[argc];
+// CHECK-NEXT: #pragma omp task untied depend(in : argc,argv[b:argc],arr[:]) if(task: argc > 0)
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0)
+// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<long>::TS > 0) priority(argc)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp task if(1) mergeable
+// CHECK-NEXT: #pragma omp task if(1) mergeable priority(1)
 // CHECK-NEXT: foo()
 // CHECK: template <typename T, int C> T tmain(T argc, T *argv) {
 // CHECK-NEXT: T b = argc, c, d, e, f, g;
 // CHECK-NEXT: static T a;
 // CHECK-NEXT: S<T> s;
-// CHECK-NEXT: #pragma omp task untied depend(in : argc)
+// CHECK-NEXT: T arr[argc];
+// CHECK-NEXT: #pragma omp task untied depend(in : argc,argv[b:argc],arr[:]) if(task: argc > 0)
 // CHECK-NEXT: a = 2;
-// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0)
+// CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) shared(d) if(argc > 0) final(S<T>::TS > 0) priority(argc)
 // CHECK-NEXT: foo()
-// CHECK-NEXT: #pragma omp task if(C) mergeable
+// CHECK-NEXT: #pragma omp task if(C) mergeable priority(C)
 // CHECK-NEXT: foo()
 
 enum Enum {};
@@ -79,15 +83,16 @@ int main(int argc, char **argv) {
   long x;
   int b = argc, c, d, e, f, g;
   static int a;
+  int arr[10];
 #pragma omp threadprivate(a)
   Enum ee;
 // CHECK: Enum ee;
-#pragma omp task untied mergeable depend(out:argv[1])
-  // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[1])
+#pragma omp task untied mergeable depend(out:argv[:a][1], (arr)[0:]) if(task: argc > 0) priority(f)
+  // CHECK-NEXT: #pragma omp task untied mergeable depend(out : argv[:a][1],(arr)[0:]) if(task: argc > 0) priority(f)
   a = 2;
 // CHECK-NEXT: a = 2;
-#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0) depend(inout : a)
-  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0) depend(inout : a)
+#pragma omp task default(none), private(argc, b) firstprivate(argv) if (argc > 0) final(a > 0) depend(inout : a, argv[:argc],arr[:a]) priority(23)
+  // CHECK-NEXT: #pragma omp task default(none) private(argc,b) firstprivate(argv) if(argc > 0) final(a > 0) depend(inout : a,argv[:argc],arr[:a]) priority(23)
   foo();
   // CHECK-NEXT: foo();
   return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
diff --git a/test/OpenMP/task_codegen.cpp b/test/OpenMP/task_codegen.cpp
index 139ac505a9f6..23dc014aad3b 100644
--- a/test/OpenMP/task_codegen.cpp
+++ b/test/OpenMP/task_codegen.cpp
@@ -18,12 +18,13 @@ struct S {
   ~S() {}
 };
 int a;
-// CHECK-LABEL : @main
+// CHECK-LABEL: @main
 int main() {
 // CHECK: [[B:%.+]] = alloca i8
 // CHECK: [[S:%.+]] = alloca [2 x [[STRUCT_S]]]
   char b;
   S s[2];
+  int arr[10][a];
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* @{{.+}})
 // CHECK: [[B_REF:%.+]] = getelementptr inbounds [[STRUCT_SHAREDS]], [[STRUCT_SHAREDS]]* [[CAPTURES:%.+]], i32 0, i32 0
 // CHECK: store i8* [[B]], i8** [[B_REF]]
@@ -52,33 +53,49 @@ int main() {
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[BITCAST]], i64 8, i32 8, i1 false)
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]], [[KMP_TASK_T]]* [[TASK_PTR]], i32 0, i32 3
 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
-// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
-// CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
-// CHECK: store i64 4, i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
-// CHECK: store i8 1, i8*
-// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 1
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES:%.*]], i64 0, i64 0
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0
+// CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 1
+// CHECK: store i64 4, i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 2
+// CHECK: store i8 1, i8* [[T0]]
+// CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES]], i64 0, i64 1
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0
 // CHECK: ptrtoint i8* [[B]] to i64
-// CHECK: store i64 %{{[^,]+}}, i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
-// CHECK: store i64 1, i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
-// CHECK: store i8 1, i8*
-// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 2
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: store i64 %{{[^,]+}}, i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 1
+// CHECK: store i64 1, i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 2
+// CHECK: store i8 1, i8* [[T0]]
+// CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES]], i64 0, i64 2
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0
 // CHECK: ptrtoint [2 x [[STRUCT_S]]]* [[S]] to i64
-// CHECK: store i64 %{{[^,]+}}, i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
-// CHECK: store i64 8, i64*
-// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
-// CHECK: store i8 1, i8*
-// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
-// CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
-#pragma omp task shared(a, s) depend(in : a, b, s)
+// CHECK: store i64 %{{[^,]+}}, i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 1
+// CHECK: store i64 8, i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 2
+// CHECK: store i8 1, i8* [[T0]]
+// CHECK: [[IDX1:%.+]] = mul nsw i64 0, [[A_VAL:%.+]]
+// CHECK: [[START:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[IDX1:%.+]] = mul nsw i64 9, [[A_VAL]]
+// CHECK: [[END:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[END1:%.+]] = getelementptr i32, i32* [[END]], i32 1
+// CHECK: [[START_INT:%.+]] = ptrtoint i32* [[START]] to i64
+// CHECK: [[END_INT:%.+]] = ptrtoint i32* [[END1]] to i64
+// CHECK: [[SIZEOF:%.+]] = sub nuw i64 [[END_INT]], [[START_INT]]
+// CHECK: [[DEP:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 3
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* [[DEP]], i32 0, i32 0
+// CHECK: [[T1:%.*]] = ptrtoint i32* [[START]] to i64
+// CHECK: store i64 [[T1]], i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 [[SIZEOF]], i64* [[T0]]
+// CHECK: [[T0:%.*]] = getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 1, i8* [[T0]]
+// CHECK: [[DEPS:%.*]] = getelementptr inbounds [4 x [[KMP_DEPEND_INFO]]], [4 x [[KMP_DEPEND_INFO]]]* [[DEPENDENCIES]], i32 0, i32 0
+// CHECK: bitcast [[KMP_DEPEND_INFO]]* [[DEPS]] to i8*
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 4, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task shared(a, s) depend(in : a, b, s, arr[:])
   {
     a = 15;
     s[1].a = 10;
@@ -94,34 +111,56 @@ int main() {
 // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1,
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
-// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i32 0, i64 0
-// CHECK: getelementptr inbounds [1 x [[KMP_DEPEND_INFO]]], [1 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 0
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
 // CHECK: store i64 %{{[^,]+}}, i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
 // CHECK: store i64 4, i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
-// CHECK: store i8 2, i8*
-// CHECK: getelementptr inbounds [1 x [[KMP_DEPEND_INFO]]], [1 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: store i8 3, i8*
+// CHECK: [[IDX1:%.+]] = mul nsw i64 4, [[A_VAL]]
+// CHECK: [[START:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
+// CHECK: [[IDX2:%.+]] = sext i8 [[B_VAL]] to i64
+// CHECK: [[START1:%.+]] = getelementptr inbounds i32, i32* [[START]], i64 [[IDX2]]
+// CHECK: [[IDX1:%.+]] = mul nsw i64 9, [[A_VAL]]
+// CHECK: [[END:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[B_VAL:%.+]] = load i8, i8* [[B]]
+// CHECK: [[IDX2:%.+]] = sext i8 [[B_VAL]] to i64
+// CHECK: [[END1:%.+]] = getelementptr inbounds i32, i32* [[END]], i64 [[IDX2]]
+// CHECK: [[END2:%.+]] = getelementptr i32, i32* [[END1]], i32 1
+// CHECK: [[START_INT:%.+]] = ptrtoint i32* [[START1]] to i64
+// CHECK: [[END_INT:%.+]] = ptrtoint i32* [[END2]] to i64
+// CHECK: [[SIZEOF:%.+]] = sub nuw i64 [[END_INT]], [[START_INT]]
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 1
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint i32* [[START1]] to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 [[SIZEOF]], i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 3, i8*
+// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 1, i8* %{{[^,]+}}, i32 0, i8* null)
-#pragma omp task untied depend(out : s[0])
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task untied depend(out : s[0], arr[4:][b])
   {
     a = 1;
   }
 // CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 3, i64 32, i64 1,
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
-// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 0
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: store i64 ptrtoint (i32* @{{.+}} to i64), i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
 // CHECK: store i64 4, i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
-// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i32 0, i64 1
-// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [2 x [[STRUCT_S]]], [2 x [[STRUCT_S]]]* [[S]], i64 0, i64 1
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 1
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: ptrtoint [[STRUCT_S]]* %{{.+}} to i64
 // CHECK: store i64 %{{[^,]+}}, i64*
@@ -129,10 +168,34 @@ int main() {
 // CHECK: store i64 4, i64*
 // CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
 // CHECK: store i8 3, i8*
-// CHECK: getelementptr inbounds [2 x [[KMP_DEPEND_INFO]]], [2 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: [[IDX1:%.+]] = mul nsw i64 0, [[A_VAL]]
+// CHECK: [[START:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[START1:%.+]] = getelementptr inbounds i32, i32* [[START]], i64 3
+// CHECK: [[NEW_A_VAL:%.+]] = load i32, i32* @{{.+}},
+// CHECK: [[NEW_A_VAL_I64:%.+]] = sext i32 [[NEW_A_VAL]] to i64
+// CHECK: [[SUB:%.+]] = add nsw i64 -1, [[NEW_A_VAL_I64]]
+// CHECK: [[IDX1:%.+]] = mul nsw i64 [[SUB]], [[A_VAL]]
+// CHECK: [[END:%.+]] = getelementptr inbounds i32, i32* %{{.+}}, i64 [[IDX1]]
+// CHECK: [[NEW_A_VAL:%.+]] = load i32, i32* @{{.+}},
+// CHECK: [[NEW_A_VAL_I64:%.+]] = sext i32 [[NEW_A_VAL]] to i64
+// CHECK: [[IDX2:%.+]] = sub nsw i64 [[NEW_A_VAL_I64]], 1
+// CHECK: [[END1:%.+]] = getelementptr inbounds i32, i32* [[END]], i64 [[IDX2]]
+// CHECK: [[END2:%.+]] = getelementptr i32, i32* [[END1]], i32 1
+// CHECK: [[START_INT:%.+]] = ptrtoint i32* [[START1]] to i64
+// CHECK: [[END_INT:%.+]] = ptrtoint i32* [[END2]] to i64
+// CHECK: [[SIZEOF:%.+]] = sub nuw i64 [[END_INT]], [[START_INT]]
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i64 0, i64 2
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 0
+// CHECK: ptrtoint i32* [[START1]] to i64
+// CHECK: store i64 %{{[^,]+}}, i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 1
+// CHECK: store i64 [[SIZEOF]], i64*
+// CHECK: getelementptr inbounds [[KMP_DEPEND_INFO]], [[KMP_DEPEND_INFO]]* %{{[^,]+}}, i32 0, i32 2
+// CHECK: store i8 3, i8*
+// CHECK: getelementptr inbounds [3 x [[KMP_DEPEND_INFO]]], [3 x [[KMP_DEPEND_INFO]]]* %{{[^,]+}}, i32 0, i32 0
 // CHECK: bitcast [[KMP_DEPEND_INFO]]* %{{.+}} to i8*
-// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 2, i8* %{{[^,]+}}, i32 0, i8* null)
-#pragma omp task final(true) depend(inout: a, s[1])
+// CHECK: call i32 @__kmpc_omp_task_with_deps([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]], i32 3, i8* %{{[^,]+}}, i32 0, i8* null)
+#pragma omp task final(true) depend(inout: a, s[1], arr[:a][3:])
   {
     a = 2;
   }
@@ -157,33 +220,36 @@ int main() {
 // CHECK: [[CMP:%.+]] = icmp ne i8 [[B_VAL]], 0
 // CHECK: [[FINAL:%.+]] = select i1 [[CMP]], i32 2, i32 0
 // CHECK: [[FLAGS:%.+]] = or i32 [[FINAL]], 1
-// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 [[FLAGS]], i64 32, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY5:@.+]] to i32 (i32, i8*)*))
 // CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
 // CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
 // CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
-#pragma omp task final(b)
+  int c __attribute__((aligned(128)));
+#pragma omp task final(b) shared(c)
   {
     a = 4;
+    c = 5;
   }
   return a;
 }
-// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}*)
+// CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
 // CHECK: store i32 15, i32* [[A_PTR:@.+]]
 // CHECK: [[A_VAL:%.+]] = load i32, i32* [[A_PTR]]
 // CHECK: [[A_VAL_I8:%.+]] = trunc i32 [[A_VAL]] to i8
 // CHECK: store i8 [[A_VAL_I8]], i8* %{{.+}}
 // CHECK: store i32 10, i32* %{{.+}}
 
-// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}*)
+// CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
 // CHECK: store i32 1, i32* [[A_PTR:@.+]]
 
-// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}*)
+// CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
 // CHECK: store i32 2, i32* [[A_PTR:@.+]]
 
-// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}*)
+// CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
 // CHECK: store i32 3, i32* [[A_PTR:@.+]]
 
-// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}*)
+// CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
 // CHECK: store i32 4, i32* [[A_PTR:@.+]]
+// CHECK: store i32 5, i32* [[C_PTR:%.+]], align 128
 #endif
 
diff --git a/test/OpenMP/task_depend_messages.cpp b/test/OpenMP/task_depend_messages.cpp
index 152350cca93a..39bf4847890c 100644
--- a/test/OpenMP/task_depend_messages.cpp
+++ b/test/OpenMP/task_depend_messages.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - -std=c++11 %s
 
 void foo() {
 }
@@ -14,15 +14,18 @@ class vector {
     int operator[](int index) { return 0; }
 };
 
-int main(int argc, char **argv) {
+int main(int argc, char **argv, char *env[]) {
   vector vec;
   typedef float V __attribute__((vector_size(16)));
   V a;
+  auto arr = x; // expected-error {{use of undeclared identifier 'x'}}
 
   #pragma omp task depend // expected-error {{expected '(' after 'depend'}}
   #pragma omp task depend ( // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-warning {{missing ':' after dependency type - ignoring}}
   #pragma omp task depend () // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-warning {{missing ':' after dependency type - ignoring}}
   #pragma omp task depend (argc // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task depend (source : argc) // expected-error {{expected 'in', 'out' or 'inout' in OpenMP clause 'depend'}}
+  #pragma omp task depend (source) // expected-error {{expected expression}} expected-warning {{missing ':' after dependency type - ignoring}}
   #pragma omp task depend (in : argc)) // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}}
   #pragma omp task depend (out: ) // expected-error {{expected expression}}
   #pragma omp task depend (inout : foobool(argc)), depend (in, argc) // expected-error {{expected variable name, array element or array section}} expected-warning {{missing ':' after dependency type - ignoring}} expected-error {{expected expression}}
@@ -33,6 +36,22 @@ int main(int argc, char **argv) {
   #pragma omp task depend (in : ) // expected-error {{expected expression}}
   #pragma omp task depend (in : main) // expected-error {{expected variable name, array element or array section}}
   #pragma omp task depend(in : a[0]) // expected-error{{expected variable name, array element or array section}}
+  #pragma omp task depend (in : vec[1:2]) // expected-error {{ value is not an array or pointer}}
+  #pragma omp task depend (in : argv[ // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argv[: // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argv[:] // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argv[argc: // expected-error {{expected expression}} expected-error {{expected ']'}} expected-error {{expected ')'}} expected-note {{to match this '['}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argv[argc:argc] // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task depend (in : argv[0:-1]) // expected-error {{section length is evaluated to a negative value -1}}
+  #pragma omp task depend (in : argv[-1:0]) // expected-error {{section lower bound is evaluated to a negative value -1}}
+  #pragma omp task depend (in : argv[:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+  #pragma omp task depend (in : argv[3:4:1]) // expected-error {{expected ']'}} expected-note {{to match this '['}}
+  #pragma omp task depend(in:a[0:1]) // expected-error {{subscripted value is not an array or pointer}}
+  #pragma omp task depend(in:argv[argv[:2]:1]) // expected-error {{OpenMP array section is not allowed here}}
+  #pragma omp task depend(in:argv[0:][:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is not an array}}
+  #pragma omp task depend(in:env[0:][:]) // expected-error {{section length is unspecified and cannot be inferred because subscripted value is an array of unknown bound}}
+  #pragma omp task depend(in : argv[ : argc][1 : argc - 1])
+  #pragma omp task depend(in : arr[0])
   foo();
 
   return 0;
diff --git a/test/OpenMP/task_firstprivate_codegen.cpp b/test/OpenMP/task_firstprivate_codegen.cpp
index 89aadbeb6a3f..e2244140d1e1 100644
--- a/test/OpenMP/task_firstprivate_codegen.cpp
+++ b/test/OpenMP/task_firstprivate_codegen.cpp
@@ -26,18 +26,18 @@ volatile double g;
 
 // CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
 // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
-// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* }
-// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [[S_DOUBLE_TY]], [2 x [[S_DOUBLE_TY]]], i32, [2 x i32]
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* }
 // CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
 // CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
-// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]] }
-// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_TMAIN_TY]] }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
 template <typename T>
 T tmain() {
   S<T> ttt;
   S<T> test(ttt);
-  T t_var = T();
+  T t_var __attribute__((aligned(128))) = T();
   T vec[] = {1, 2};
   S<T> s_arr[] = {1, 2};
   S<T> var(3);
@@ -50,22 +50,31 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 48, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
 // LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
 // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]]
 // LAMBDA: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]]
 // LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+
+// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// LAMBDA: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_ADDR_REF]]
+// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+
 // LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
 // LAMBDA: ret
-#pragma omp task firstprivate(g)
+#pragma omp task firstprivate(g, sivar)
   {
     // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
     // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -75,13 +84,16 @@ int main() {
     // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
 
     // LAMBDA: store double* %{{.+}}, double** %{{.+}},
-    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
     g = 1;
+    sivar = 11;
     // LAMBDA: store double 1.0{{.+}}, double* %{{.+}},
+    // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
     // LAMBDA: call void [[INNER_LAMBDA]](%
     // LAMBDA: ret
     [&]() {
       g = 2;
+      sivar = 22;
     }();
   }
   }();
@@ -92,31 +104,45 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 48, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
   // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
   // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
   // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
   // BLOCKS: [[G_REF:%.+]] = load double*, double** [[G_ADDR_REF]]
   // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* [[G_REF]]
   // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+
+  // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_ADDR_REF]]
+  // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_REF]]
+  // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
   // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
   // BLOCKS: ret
-#pragma omp task firstprivate(g)
+#pragma omp task firstprivate(g, sivar)
   {
     // BLOCKS: define {{.+}} void {{@.+}}(i8*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: store double 2.0{{.+}}, double*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}*
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: ret
 
     // BLOCKS: store double* %{{.+}}, double** %{{.+}},
-    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}},
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
     g = 1;
+    sivar = 11;
     // BLOCKS: store double 1.0{{.+}}, double* %{{.+}},
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
     ^{
       g = 2;
+      sivar = 22;
     }();
   }
   }();
@@ -128,15 +154,17 @@ int main() {
   int vec[] = {1, 2};
   S<double> s_arr[] = {1, 2};
   S<double> var(3);
-#pragma omp task firstprivate(var, t_var, s_arr, vec, s_arr, var)
+#pragma omp task firstprivate(var, t_var, s_arr, vec, s_arr, var, sivar)
   {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 33;
   }
   return tmain<int>();
 #endif
 }
 
+// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0,
 // CHECK: define i{{[0-9]+}} @main()
 // CHECK: alloca [[S_DOUBLE_TY]],
 // CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
@@ -157,13 +185,15 @@ int main() {
 // CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
 // CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
 // CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR]], [[S_DOUBLE_TY]]** [[VAR_REF]],
+// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: store i{{[0-9]+}}* [[SIVAR]], i{{[0-9]+}}** [[SIVAR_REF]],
 
 // Allocate task.
 // Returns struct kmp_task_t {
 //         [[KMP_TASK_T]] task_data;
 //         [[KMP_TASK_MAIN_TY]] privates;
 //       };
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 72, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 72, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
 
 // Fill kmp_task_t->shareds by copying from original capture argument.
@@ -171,7 +201,7 @@ int main() {
 // CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
 // CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
-// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 40, i32 8, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
 // Also copy address of private copy to the corresponding shareds reference.
@@ -179,14 +209,8 @@ int main() {
 // CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]*
 
 // Constructors for s_arr and var.
-// var;
-// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
-// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
-// CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]],
-// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]],
-
 // s_arr;
-// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
 // CHECK: load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]],
 // CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]],
@@ -195,6 +219,12 @@ int main() {
 // CHECK: icmp eq
 // CHECK: br i1
 
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]],
+// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]],
+
 // t_var;
 // CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
@@ -207,6 +237,13 @@ int main() {
 // CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
 
+// sivar;
+// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4
+// CHECK: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 4
+// CHECK: [[SIVAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[SIVAR_ADDR_REF]],
+// CHECK: [[SIVAR:%.+]] = load i{{.+}}, i{{.+}}* [[SIVAR_REF]],
+// CHECK: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]],
+
 // Provide pointer to destructor function, which will destroy private variables at the end of the task.
 // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]],
@@ -225,61 +262,69 @@ int main() {
 // CHECK: ret
 //
 
-// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias)
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias, i32** noalias)
 // CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
-// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
-// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
-// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
-// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
 // CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
 // CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
 // CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
 // CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
 // CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
 // CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3
 // CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
 // CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
+// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4
+// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}},
+// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]],
 // CHECK: ret void
 
-// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]*)
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
 
 // CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
 // CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
 // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
 // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
-// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
 // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
-// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]])
+
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
+
 // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
 // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
 // CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
 // CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
 
 // Privates actually are used.
 // CHECK-DAG: [[PRIV_VAR]]
 // CHECK-DAG: [[PRIV_T_VAR]]
 // CHECK-DAG: [[PRIV_S_ARR]]
 // CHECK-DAG: [[PRIV_VEC]]
+// CHECK-DAG: [[PRIV_SIVAR]]
 
 // CHECK: ret
 
-// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]*)
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
-// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
 // CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
 // CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
 // CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
 // CHECK: icmp eq
 // CHECK: br i1
-// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
 // CHECK: ret i32
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
 // CHECK: alloca [[S_INT_TY]],
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128
 // CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
 // CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
 // CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
@@ -302,7 +347,7 @@ int main() {
 //         [[KMP_TASK_T_TY]] task_data;
 //         [[KMP_TASK_TMAIN_TY]] privates;
 //       };
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 56, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
 
 // Fill kmp_task_t->shareds by copying from original capture argument.
@@ -313,15 +358,15 @@ int main() {
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[SHAREDS_REF]], i8* [[CAPTURES_ADDR]], i64 32, i32 8, i1 false)
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
-// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
 // CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]*
 
 // t_var;
 // CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
 // CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
 // CHECK: [[T_VAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[T_VAR_ADDR_REF]],
-// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]],
-// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]],
+// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]], align 128
+// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128
 
 // vec;
 // CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
@@ -376,7 +421,7 @@ int main() {
 // CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
 // CHECK: ret void
 
-// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]*)
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
 
 // CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
 // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
@@ -398,8 +443,8 @@ int main() {
 
 // CHECK: ret
 
-// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]*)
-// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
 // CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
 // CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
 // CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
diff --git a/test/OpenMP/task_firstprivate_messages.cpp b/test/OpenMP/task_firstprivate_messages.cpp
index 0126d47c50f9..ef5f3856430a 100644
--- a/test/OpenMP/task_firstprivate_messages.cpp
+++ b/test/OpenMP/task_firstprivate_messages.cpp
@@ -70,7 +70,8 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;                                               // expected-note {{'j' defined here}}
+  int &j = i;
+  static int m;
 #pragma omp task firstprivate                               // expected-error {{expected '(' after 'firstprivate'}}
 #pragma omp task firstprivate(                              // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
 #pragma omp task firstprivate()                             // expected-error {{expected expression}}
@@ -92,7 +93,8 @@ int main(int argc, char **argv) {
   foo();
 #pragma omp task shared(i)
 #pragma omp task firstprivate(i)
-#pragma omp task firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp task firstprivate(j)
+#pragma omp task firstprivate(m) // OK
   foo();
 
   return 0;
diff --git a/test/OpenMP/task_if_codegen.cpp b/test/OpenMP/task_if_codegen.cpp
index d08c27f5a408..5992be02a84a 100644
--- a/test/OpenMP/task_if_codegen.cpp
+++ b/test/OpenMP/task_if_codegen.cpp
@@ -21,14 +21,14 @@ int Arg;
 
 // CHECK-LABEL: define void @{{.+}}gtid_test
 void gtid_test() {
-// CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 1, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
+// CHECK:  call void {{.+}} @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{.+}} 0, {{.+}}* [[GTID_TEST_REGION1:@.+]] to void
 #pragma omp parallel
-#pragma omp task if (false)
+#pragma omp task if (task: false)
   gtid_test();
 // CHECK: ret void
 }
 
-// CHECK: define internal void [[GTID_TEST_REGION1]](i32* [[GTID_PARAM:%.+]], i
+// CHECK: define internal void [[GTID_TEST_REGION1]](i32* noalias [[GTID_PARAM:%.+]], i
 // CHECK: store i32* [[GTID_PARAM]], i32** [[GTID_ADDR_REF:%.+]],
 // CHECK: [[GTID_ADDR:%.+]] = load i32*, i32** [[GTID_ADDR_REF]]
 // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_ADDR]]
@@ -45,13 +45,13 @@ void gtid_test() {
 
 template <typename T>
 int tmain(T Arg) {
-#pragma omp task if (true)
+#pragma omp task if (task: true)
   fn1();
 #pragma omp task if (false)
   fn2();
 #pragma omp task if (Arg)
   fn3();
-#pragma omp task if (Arg) depend(in : Arg)
+#pragma omp task if (task: Arg) depend(in : Arg)
   fn4();
 #pragma omp task if (Arg) depend(out : Arg)
   fn5();
diff --git a/test/OpenMP/task_if_messages.cpp b/test/OpenMP/task_if_messages.cpp
index ae6bb13093d9..f3f56d51673a 100644
--- a/test/OpenMP/task_if_messages.cpp
+++ b/test/OpenMP/task_if_messages.cpp
@@ -22,6 +22,12 @@ int tmain(T argc, S **argv) {
   #pragma omp task if (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp task if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp task if(argc)
+  #pragma omp task if(task : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task if(task : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task if(task : argc)
+  #pragma omp task if(task : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp task'}}
+  #pragma omp task if(task : argc) if (task:argc) // expected-error {{directive '#pragma omp task' cannot contain more than one 'if' clause with 'task' name modifier}}
+  #pragma omp task if(task : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return 0;
@@ -40,6 +46,12 @@ int main(int argc, char **argv) {
   #pragma omp task if (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp task if (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
   #pragma omp task if(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task if(task : // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task if(task : argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task if(task : argc)
+  #pragma omp task if(task : argc) if (for:argc) // expected-error {{directive name modifier 'for' is not allowed for '#pragma omp task'}}
+  #pragma omp task if(task : argc) if (task:argc) // expected-error {{directive '#pragma omp task' cannot contain more than one 'if' clause with 'task' name modifier}}
+  #pragma omp task if(task : argc) if (argc) // expected-error {{no more 'if' clause is allowed}} expected-note {{previous clause with directive name modifier specified here}}
   foo();
 
   return tmain(argc, argv);
diff --git a/test/OpenMP/task_messages.cpp b/test/OpenMP/task_messages.cpp
index ec67998afdce..64bf8a40f025 100644
--- a/test/OpenMP/task_messages.cpp
+++ b/test/OpenMP/task_messages.cpp
@@ -6,7 +6,7 @@ void foo() {
 #pragma omp task // expected-error {{unexpected OpenMP directive '#pragma omp task'}}
 
 class S {
-  S(const S &s) { a = s.a + 12; } // expected-note 6 {{implicitly declared private here}}
+  S(const S &s) { a = s.a + 12; } // expected-note 10 {{implicitly declared private here}}
   int a;
 
 public:
@@ -29,7 +29,7 @@ public:
 template <class T>
 int foo() {
   T a;
-  T &b = a; // expected-note 4 {{'b' defined here}}
+  T &b = a;
   int r;
   S1 s1;
 // expected-error@+1 2 {{call to deleted constructor of 'S1'}}
@@ -53,16 +53,11 @@ int foo() {
 #pragma omp task
 #pragma omp parallel
   ++a;
-// expected-error@+2 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'int &'}}
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'S &'}}
-#pragma omp task
-  // expected-note@+1 2 {{used here}}
-  ++b;
-// expected-error@+2 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'int &'}}
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'S &'}}
-#pragma omp task
 // expected-error@+2 {{calling a private constructor of class 'S'}}
-// expected-note@+1 2 {{used here}}
+#pragma omp task
+  ++b;
+#pragma omp task
+// expected-error@+1 2 {{calling a private constructor of class 'S'}}
 #pragma omp parallel shared(a, b)
   ++a, ++b;
 // expected-note@+1 3 {{defined as reduction}}
@@ -120,9 +115,9 @@ int foo() {
 
 int main(int argc, char **argv) {
   int a;
-  int &b = a; // expected-note 2 {{'b' defined here}}
+  int &b = a;
   S sa;
-  S &sb = sa; // expected-note 2 {{'sb' defined here}}
+  S &sb = sa;
   int r;
 #pragma omp task { // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}}
   foo();
@@ -192,13 +187,9 @@ L2:
 #pragma omp task
 #pragma omp parallel
   ++a;
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'int &'}}
 #pragma omp task
-  // expected-note@+1 {{used here}}
   ++b;
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'int &'}}
 #pragma omp task
-// expected-note@+1 {{used here}}
 #pragma omp parallel shared(a, b)
   ++a, ++b;
 #pragma omp task default(none)
@@ -218,14 +209,11 @@ L2:
 #pragma omp task
 #pragma omp parallel
   ++sa;
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'S &'}}
-#pragma omp task
-  // expected-note@+1 {{used here}}
-  ++sb;
-// expected-error@+1 {{predetermined as a firstprivate in a task construct variable cannot be of reference type 'S &'}}
-#pragma omp task
 // expected-error@+2 {{calling a private constructor of class 'S'}}
-// expected-note@+1 {{used here}}
+#pragma omp task
+  ++sb;
+// expected-error@+2 2 {{calling a private constructor of class 'S'}}
+#pragma omp task
 #pragma omp parallel shared(sa, sb)
   ++sa, ++sb;
 // expected-note@+1 2 {{defined as reduction}}
diff --git a/test/OpenMP/task_priority_messages.cpp b/test/OpenMP/task_priority_messages.cpp
new file mode 100644
index 000000000000..8a5553eb08c5
--- /dev/null
+++ b/test/OpenMP/task_priority_messages.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp task priority // expected-error {{expected '(' after 'priority'}}
+  #pragma omp task priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority () // expected-error {{expected expression}}
+  #pragma omp task priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}}
+  #pragma omp task priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  #pragma omp task priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp task' cannot contain more than one 'priority' clause}}
+  #pragma omp task priority (S) // expected-error {{'S' does not refer to a value}}
+  #pragma omp task priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority(0)
+  #pragma omp task priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp task priority // expected-error {{expected '(' after 'priority'}}
+  #pragma omp task priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority () // expected-error {{expected expression}}
+  #pragma omp task priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp task' are ignored}}
+  #pragma omp task priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  #pragma omp task priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp task' cannot contain more than one 'priority' clause}}
+  #pragma omp task priority (S1) // expected-error {{'S1' does not refer to a value}}
+  #pragma omp task priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  #pragma omp task priority(0)
+  #pragma omp task priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/task_private_codegen.cpp b/test/OpenMP/task_private_codegen.cpp
index b29d0d39a1e2..1455fd11a91d 100644
--- a/test/OpenMP/task_private_codegen.cpp
+++ b/test/OpenMP/task_private_codegen.cpp
@@ -27,16 +27,16 @@ volatile double g;
 // CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, i32 (i32, i8*)* }
 // CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
 // CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 }
-// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [[S_DOUBLE_TY]], [2 x [[S_DOUBLE_TY]]], i32, [2 x i32]
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
 // CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
 // CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
 // CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i8 }
-// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]] }
-// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_TMAIN_TY]] }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
 template <typename T>
 T tmain() {
   S<T> test;
-  T t_var = T();
+  T t_var __attribute__((aligned(128))) = T();
   T vec[] = {1, 2};
   S<T> s_arr[] = {1, 2};
   S<T> var(3);
@@ -49,18 +49,20 @@ T tmain() {
 }
 
 int main() {
+  static int sivar;
 #ifdef LAMBDA
   // LAMBDA: [[G:@.+]] = global double
   // LAMBDA-LABEL: @main
   // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
   [&]() {
   // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
 // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
 // LAMBDA: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
 // LAMBDA: ret
-#pragma omp task private(g)
+#pragma omp task private(g, sivar)
   {
     // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
     // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
@@ -68,14 +70,20 @@ int main() {
     // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
     // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
     // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+    // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+    // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SIVAR_REF]]
 
-    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
     g = 1;
+    sivar = 2;
     // LAMBDA: store double 1.0{{.+}}, double* %{{.+}},
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* %{{.+}},
     // LAMBDA: call void [[INNER_LAMBDA]](%
     // LAMBDA: ret
     [&]() {
       g = 2;
+      sivar = 3;
     }();
   }
   }();
@@ -86,26 +94,34 @@ int main() {
   // BLOCKS: call void {{%.+}}(i8
   ^{
   // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
-  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 40, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 48, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
   // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
   // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
   // BLOCKS: call i32 @__kmpc_omp_task(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]])
   // BLOCKS: ret
-#pragma omp task private(g)
+#pragma omp task private(g, sivar)
   {
     // BLOCKS: define {{.+}} void {{@.+}}(i8*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
     // BLOCKS: store double 2.0{{.+}}, double*
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: ret
 
-    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}*)
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32, %{{.+}}* noalias)
     g = 1;
+    sivar = 3;
     // BLOCKS: store double 1.0{{.+}}, double* %{{.+}},
     // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 3, i{{[0-9]+}}* %{{.+}},
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
     // BLOCKS: call void {{%.+}}(i8
     ^{
       g = 2;
+      sivar = 4;
     }();
   }
   }();
@@ -116,11 +132,14 @@ int main() {
   int vec[] = {1, 2};
   S<double> s_arr[] = {1, 2};
   S<double> var(3);
-#pragma omp task private(var, t_var, s_arr, vec, s_arr, var)
+#pragma omp task private(var, t_var, s_arr, vec, s_arr, var, sivar)
   {
     vec[0] = t_var;
     s_arr[0] = var;
+    sivar = 8;
   }
+#pragma omp task
+  g+=1;
   return tmain<int>();
 #endif
 }
@@ -152,12 +171,8 @@ int main() {
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
 
 // Constructors for s_arr and var.
-// var;
-// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
-// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF:%.+]])
-
 // a_arr;
-// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
 // CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
 // CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%.+]])
@@ -165,12 +180,17 @@ int main() {
 // CHECK: icmp eq
 // CHECK: br i1
 
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF:%.+]])
+
 // Provide pointer to destructor function, which will destroy private variables at the end of the task.
 // CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
 // CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_REF]],
 
 // Start task.
 // CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8* [[RES]])
+// CHECK: call i32 @__kmpc_omp_task([[LOC]], i32 [[GTID]], i8*
 
 // CHECK: = call i{{.+}} [[TMAIN_INT:@.+]]()
 
@@ -183,14 +203,14 @@ int main() {
 // CHECK: ret
 //
 
-// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias)
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias, [[S_DOUBLE_TY]]** noalias, i32** noalias, [2 x [[S_DOUBLE_TY]]]** noalias, [2 x i32]** noalias, i32** noalias)
 // CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
-// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
-// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
-// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
-// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
 // CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
 // CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
 // CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
 // CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
 // CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
@@ -199,39 +219,42 @@ int main() {
 // CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
 // CHECK: ret void
 
-// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]*)
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
 
 // CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
 // CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
 // CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
 // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
-// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
 // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
-// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]])
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
 // CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
 // CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
 // CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
 // CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
 
 // Privates actually are used.
 // CHECK-DAG: [[PRIV_VAR]]
 // CHECK-DAG: [[PRIV_T_VAR]]
 // CHECK-DAG: [[PRIV_S_ARR]]
 // CHECK-DAG: [[PRIV_VEC]]
+// CHECK_DAG: [[PRIV_SIVAR]]
 
 // CHECK: ret
 
-// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]*)
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_MAIN_TY]]* noalias)
 // CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
-// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
-// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
 // CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
 // CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
 // CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
 // CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
 // CHECK: icmp eq
 // CHECK: br i1
-// CHECK: call void [[S_DOUBLE_TY_DESTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
 // CHECK: ret i32
 
 // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]()
@@ -252,13 +275,13 @@ int main() {
 //         [[KMP_TASK_T_TY]] task_data;
 //         [[KMP_TASK_TMAIN_TY]] privates;
 //       };
-// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 56, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
 // CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
 
 // CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 
 // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
-// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
 
 // Constructors for s_arr and var.
 // a_arr;
@@ -306,7 +329,7 @@ int main() {
 // CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
 // CHECK: ret void
 
-// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]*)
+// CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
 
 // CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
 // CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
@@ -328,8 +351,8 @@ int main() {
 
 // CHECK: ret
 
-// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]*)
-// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: define internal i32 [[DESTRUCTORS]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
 // CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
 // CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
 // CHECK: call void [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
diff --git a/test/OpenMP/task_private_messages.cpp b/test/OpenMP/task_private_messages.cpp
index c2c1f519d0c1..6bce1352b13e 100644
--- a/test/OpenMP/task_private_messages.cpp
+++ b/test/OpenMP/task_private_messages.cpp
@@ -14,7 +14,7 @@ class S2 {
 
 public:
   S2() : a(0) {}
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
 };
 const S2 b;
 const S2 ba[5];
@@ -64,7 +64,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i;                                          // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp task private                               // expected-error {{expected '(' after 'private'}}
 #pragma omp task private(                              // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
 #pragma omp task private()                             // expected-error {{expected expression}}
@@ -78,7 +78,7 @@ int main(int argc, char **argv) {
 #pragma omp task private(ba)
 #pragma omp task private(ca)           // expected-error {{shared variable cannot be private}}
 #pragma omp task private(da)           // expected-error {{shared variable cannot be private}}
-#pragma omp task private(S2::S2s)
+#pragma omp task private(S2::S2s)      // expected-error {{shared variable cannot be private}}
 #pragma omp task private(e, g)         // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
 #pragma omp task private(threadvar, B::x)    // expected-error 2 {{threadprivate or thread local variable cannot be private}}
 #pragma omp task shared(i), private(i) // expected-error {{shared variable cannot be private}} expected-note {{defined as shared}}
@@ -86,13 +86,16 @@ int main(int argc, char **argv) {
 #pragma omp task firstprivate(i) private(i) // expected-error {{firstprivate variable cannot be private}} expected-note {{defined as firstprivate}}
   foo();
 #pragma omp task private(i)
-#pragma omp task private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type 'int &'}}
+#pragma omp task private(j)
   foo();
 #pragma omp task firstprivate(i)
   for (int k = 0; k < 10; ++k) {
 #pragma omp task private(i)
     foo();
   }
+  static int m;
+#pragma omp task private(m) // OK
+    foo();
 
   return 0;
 }
diff --git a/test/OpenMP/taskgroup_codegen.cpp b/test/OpenMP/taskgroup_codegen.cpp
index c04e4c723b8d..d1bc2aafc7b4 100644
--- a/test/OpenMP/taskgroup_codegen.cpp
+++ b/test/OpenMP/taskgroup_codegen.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
-// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -gline-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -fexceptions -fcxx-exceptions -debug-info-kind=line-tables-only -x c++ -emit-llvm %s -o - | FileCheck %s --check-prefix=TERM_DEBUG
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
 #ifndef HEADER
diff --git a/test/OpenMP/taskloop_ast_print.cpp b/test/OpenMP/taskloop_ast_print.cpp
new file mode 100644
index 000000000000..06164ab168d9
--- /dev/null
+++ b/test/OpenMP/taskloop_ast_print.cpp
@@ -0,0 +1,74 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo() {}
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, g;
+  static T a;
+// CHECK: static T a;
+#pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N)
+  // CHECK-NEXT: #pragma omp taskloop if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp taskloop private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) shared(g) if (c) final(d) mergeable priority(f) nogroup num_tasks(N)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+            foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp taskloop private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) shared(g) if(c) final(d) mergeable priority(f) nogroup num_tasks(N)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: foo();
+  return T();
+}
+
+int main(int argc, char **argv) {
+  int b = argc, c, d, e, f, g;
+  static int a;
+// CHECK: static int a;
+#pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) nogroup num_tasks(argc)
+  // CHECK-NEXT: #pragma omp taskloop if(taskloop: a) default(none) shared(a) final(b) priority(5) nogroup num_tasks(argc)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp taskloop private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc)
+  for (int i = 0; i < 10; ++i)
+    for (int j = 0; j < 10; ++j)
+      foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp taskloop private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc)
+  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: foo();
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
+}
+
+#endif
diff --git a/test/OpenMP/taskloop_collapse_messages.cpp b/test/OpenMP/taskloop_collapse_messages.cpp
new file mode 100644
index 000000000000..f33da11f5ed9
--- /dev/null
+++ b/test/OpenMP/taskloop_collapse_messages.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp taskloop collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp taskloop collapse (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp taskloop collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp taskloop', but found only 1}}
+  // expected-error@+3 2 {{directive '#pragma omp taskloop' cannot contain more than one 'collapse' clause}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop collapse (foobool(argc)), collapse (true), collapse (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse (1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp taskloop'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop collapse () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+  #pragma omp taskloop collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}  expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+  #pragma omp taskloop collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp taskloop' cannot contain more than one 'collapse' clause}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp taskloop collapse (foobool(argc)), collapse (true), collapse (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop collapse (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp taskloop collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp taskloop' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp taskloop collapse(collapse(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  #pragma omp taskloop collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp taskloop'}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
diff --git a/test/OpenMP/taskloop_final_messages.cpp b/test/OpenMP/taskloop_final_messages.cpp
new file mode 100644
index 000000000000..a2d7cdae9673
--- /dev/null
+++ b/test/OpenMP/taskloop_final_messages.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+#pragma omp taskloop final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc > 0 ? argv[1] : argv[2])
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc)
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+#pragma omp taskloop final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc > 0 ? argv[1] : argv[2])
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop final(if (tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_firstprivate_messages.cpp b/test/OpenMP/taskloop_firstprivate_messages.cpp
new file mode 100644
index 000000000000..e2e87e4697a5
--- /dev/null
+++ b/test/OpenMP/taskloop_firstprivate_messages.cpp
@@ -0,0 +1,313 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(const S2 &s2) : a(s2.a) {}
+  static float S2s;
+  static const float S2sc;
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3);
+
+public:
+  S3() : a(0) {} // expected-note 2 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+  S3(S3 &s3) : a(s3.a) {} // expected-note 2 {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+
+public:
+  S5() : a(0) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  C g(5);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(a, b) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop firstprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(i)
+#pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+    foo();
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop firstprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(ba) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(da) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp taskloop firstprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(S2::S2s) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(S2::S2sc) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop safelen(5) // expected-error {{unexpected OpenMP clause 'safelen' in directive '#pragma omp taskloop'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(m) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop private(xa), firstprivate(xa) // expected-error {{private variable cannot be firstprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i)    // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+    foo();
+#pragma omp parallel shared(xa)
+#pragma omp taskloop firstprivate(xa) // OK: may be firstprivate
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel private(i)
+#pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp taskloop firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp taskloop firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
+
diff --git a/test/OpenMP/taskloop_grainsize_messages.cpp b/test/OpenMP/taskloop_grainsize_messages.cpp
new file mode 100644
index 000000000000..047f925b8250
--- /dev/null
+++ b/test/OpenMP/taskloop_grainsize_messages.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(0) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(0)  // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_lastprivate_messages.cpp b/test/OpenMP/taskloop_lastprivate_messages.cpp
new file mode 100644
index 000000000000..e4364448159c
--- /dev/null
+++ b/test/OpenMP/taskloop_lastprivate_messages.cpp
@@ -0,0 +1,287 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  const S2 &operator =(const S2&) const;
+  S2 &operator =(const S2&);
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
+  static const float S2sc;
+};
+const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S3() : a(0) {}
+  S3(S3 &s3) : a(s3.a) {}
+};
+const S3 c;         // expected-note {{global variable is predetermined as shared}}
+const S3 ca[5];     // expected-note {{global variable is predetermined as shared}}
+extern const int f; // expected-note {{global variable is predetermined as shared}}
+class S4 {
+  int a;
+  S4();             // expected-note 3 {{implicitly declared private here}}
+  S4(const S4 &s4);
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(const S5 &s5) : a(s5.a) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(a, b) // expected-error {{lastprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(e, g) // expected-error 2 {{calling a private constructor of class 'S4'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop lastprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop lastprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop lastprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note {{constant variable is predetermined as shared}}
+  const int da[5] = {0}; // expected-note {{constant variable is predetermined as shared}}
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(a, b, c, d, f) // expected-error {{lastprivate variable with incomplete type 'S1'}} expected-error 3 {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(ba)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(ca) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(da) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(S2::S2sc) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop safelen(5) // expected-error {{unexpected OpenMP clause 'safelen' in directive '#pragma omp taskloop'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(i)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(xa)
+#pragma omp taskloop lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel reduction(+ : xa)
+#pragma omp taskloop lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp taskloop lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
diff --git a/test/OpenMP/taskloop_loop_messages.cpp b/test/OpenMP/taskloop_loop_messages.cpp
new file mode 100644
index 000000000000..02518e572f3a
--- /dev/null
+++ b/test/OpenMP/taskloop_loop_messages.cpp
@@ -0,0 +1,738 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
+class S {
+  int a;
+  S() : a(0) {}
+
+public:
+  S(int v) : a(v) {}
+  S(const S &s) : a(s.a) {}
+};
+
+static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
+#pragma omp threadprivate(sii)
+static int globalii;
+
+// Currently, we cannot use "0" for global register variables.
+// register int reg0 __asm__("0");
+int reg0;
+
+int test_iteration_spaces() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+  int ii, jj, kk;
+  float fii;
+  double dii;
+  register int reg; // expected-warning {{'register' storage class specifier is deprecated}}
+#pragma omp parallel
+#pragma omp taskloop
+  for (int i = 0; i < 10; i += 1) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop
+  for (char i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop
+  for (char i = 0; i < 10; i += '\1') {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop
+  for (long long i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'double'}}
+#pragma omp taskloop
+  for (long long i = 0; i < 10; i += 1.5) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop
+  for (long long i = 0; i < 'z'; i += 1u) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or random access iterator type}}
+#pragma omp taskloop
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or random access iterator type}}
+#pragma omp taskloop
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (int &ref = ii; ref < 10; ref++) {
+  }
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (int i; i < 10; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (int i = 0, j = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{expression result unused}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (ii + 1; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (c[ii] = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok to skip parenthesises.
+#pragma omp taskloop
+  for (((ii)) = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop
+  for (int i = 0; i; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+#pragma omp taskloop
+  for (int i = 0; jj < kk; ii++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop
+  for (int i = 0; !!i; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop
+  for (int i = 0; i != 1; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop
+  for (int i = 0;; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop
+  for (int i = 11; i > 10; i--)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ++jj)
+    c[ii] = a[jj];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ++++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok but undefined behavior (in general, cannot check that incr
+// is really loop-invariant).
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii + ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'float'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii + 1.0f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok - step was converted to integer type.
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii + (int)1.1f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; jj = ii + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{relational comparison result unused}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii<10; jj> kk + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10;)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{expression result unused}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; !ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii ? ++ii : ++jj)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii < 10)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii + 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; ii = ii + (int)(0.8 - 0.45))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; (ii) < 10; ii -= 25)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; (ii < 10); ii -= 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; ii > 10; (ii += 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; ii < 10; (ii) = (1 - 1) + (ii))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for ((ii = 0); ii > 10; (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (ii = 0; (ii < 10); (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+2  {{defined as firstprivate}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be firstprivate, predetermined as private}}
+#pragma omp taskloop firstprivate(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp taskloop'}}
+// expected-note@+1 {{defined as linear}}
+#pragma omp taskloop linear(ii)
+// expected-error@+1 {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be linear, predetermined as private}}
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+#pragma omp taskloop private(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+#pragma omp taskloop lastprivate(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+  {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp taskloop' directive may not be threadprivate or thread local, predetermined as private}}
+#pragma omp taskloop
+    for (sii = 0; sii < 10; sii += 1)
+      c[sii] = a[sii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop
+    for (reg0 = 0; reg0 < 10; reg0 += 1)
+      c[reg0] = a[reg0];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop
+    for (reg = 0; reg < 10; reg += 1)
+      c[reg] = a[reg];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] = a[globalii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop collapse(2)
+    for (ii = 0; ii < 10; ii += 1)
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] += a[globalii] + ii;
+  }
+
+#pragma omp parallel
+// expected-error@+2 {{statement after '#pragma omp taskloop' must be a for loop}}
+#pragma omp taskloop
+  for (auto &item : a) {
+    item = item + 1;
+  }
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'i' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (unsigned i = 9; i < 10; i--) {
+    c[i] = a[i] + b[i];
+  }
+
+  int(*lb)[4] = nullptr;
+#pragma omp parallel
+#pragma omp taskloop
+  for (int(*p)[4] = lb; p < lb + 8; ++p) {
+  }
+
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (int a{0}; a < 10; ++a) {
+  }
+
+  return 0;
+}
+
+// Iterators allowed in openmp for-loops.
+namespace std {
+struct random_access_iterator_tag {};
+template <class Iter>
+struct iterator_traits {
+  typedef typename Iter::difference_type difference_type;
+  typedef typename Iter::iterator_category iterator_category;
+};
+template <class Iter>
+typename iterator_traits<Iter>::difference_type
+distance(Iter first, Iter last) { return first - last; }
+}
+class Iter0 {
+public:
+  Iter0() {}
+  Iter0(const Iter0 &) {}
+  Iter0 operator++() { return *this; }
+  Iter0 operator--() { return *this; }
+  bool operator<(Iter0 a) { return true; }
+};
+// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'Iter0' for 1st argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'Iter0' for 1st argument}}
+int operator-(Iter0 a, Iter0 b) { return 0; }
+class Iter1 {
+public:
+  Iter1(float f = 0.0f, double d = 0.0) {}
+  Iter1(const Iter1 &) {}
+  Iter1 operator++() { return *this; }
+  Iter1 operator--() { return *this; }
+  bool operator<(Iter1 a) { return true; }
+  bool operator>=(Iter1 a) { return false; }
+};
+class GoodIter {
+public:
+  GoodIter() {}
+  GoodIter(const GoodIter &) {}
+  GoodIter(int fst, int snd) {}
+  GoodIter &operator=(const GoodIter &that) { return *this; }
+  GoodIter &operator=(const Iter0 &that) { return *this; }
+  GoodIter &operator+=(int x) { return *this; }
+  GoodIter &operator-=(int x) { return *this; }
+  explicit GoodIter(void *) {}
+  GoodIter operator++() { return *this; }
+  GoodIter operator--() { return *this; }
+  bool operator!() { return true; }
+  bool operator<(GoodIter a) { return true; }
+  bool operator<=(GoodIter a) { return true; }
+  bool operator>=(GoodIter a) { return false; }
+  typedef int difference_type;
+  typedef std::random_access_iterator_tag iterator_category;
+};
+// expected-note@+2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 2nd argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+int operator-(GoodIter a, GoodIter b) { return 0; }
+// expected-note@+1 3 {{candidate function not viable: requires single argument 'a', but 2 arguments were provided}}
+GoodIter operator-(GoodIter a) { return a; }
+// expected-note@+2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 2nd argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+GoodIter operator-(GoodIter a, int v) { return GoodIter(); }
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 1st argument}}
+GoodIter operator+(GoodIter a, int v) { return GoodIter(); }
+// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'int' for 1st argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'int' for 1st argument}}
+GoodIter operator-(int v, GoodIter a) { return GoodIter(); }
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 1st argument}}
+GoodIter operator+(int v, GoodIter a) { return GoodIter(); }
+
+int test_with_random_access_iterator() {
+  GoodIter begin, end;
+  Iter0 begin0, end0;
+#pragma omp parallel
+#pragma omp taskloop
+  for (GoodIter I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (GoodIter &I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; --I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (GoodIter I(begin); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (GoodIter I(nullptr); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (GoodIter I(0); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (GoodIter I(1, 2); I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop
+  for (begin = GoodIter(0); begin < end; ++begin)
+    ++begin;
+// expected-error@+4 {{invalid operands to binary expression ('GoodIter' and 'Iter0')}}
+// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp taskloop
+  for (begin = begin0; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (++begin; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+#pragma omp taskloop
+  for (begin = end; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop
+  for (GoodIter I = begin; I - I; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop
+  for (GoodIter I = begin; begin < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop
+  for (GoodIter I = begin; !I; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; I = I + 1)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; I = I - 1)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; I = -I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; I = 2 + I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp taskloop
+  for (GoodIter I = begin; I >= end; I = 2 - I)
+    ++I;
+// In the following example, we cannot update the loop variable using '+='
+// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+#pragma omp parallel
+#pragma omp taskloop
+  for (Iter0 I = begin0; I < end0; ++I)
+    ++I;
+#pragma omp parallel
+// Initializer is constructor without params.
+// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (Iter0 I; I < end0; ++I)
+    ++I;
+  Iter1 begin1, end1;
+// expected-error@+4 {{invalid operands to binary expression ('Iter1' and 'Iter1')}}
+// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp taskloop
+  for (Iter1 I = begin1; I < end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (Iter1 I = begin1; I >= end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+5 {{invalid operands to binary expression ('Iter1' and 'float')}}
+// expected-error@+4 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+// Initializer is constructor with all default params.
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop
+  for (Iter1 I; I < end1; ++I) {
+  }
+  return 0;
+}
+
+template <typename IT, int ST>
+class TC {
+public:
+  int dotest_lt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+    for (IT I = begin; I < end; I = I + ST) {
+      ++I;
+    }
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+    for (IT I = begin; I <= end; I += ST) {
+      ++I;
+    }
+#pragma omp parallel
+#pragma omp taskloop
+    for (IT I = begin; I < end; ++I) {
+      ++I;
+    }
+  }
+
+  static IT step() {
+    return IT(ST);
+  }
+};
+template <typename IT, int ST = 0>
+int dotest_gt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note@+3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (IT I = begin; I >= end; I = I + ST) {
+    ++I;
+  }
+#pragma omp parallel
+// expected-note@+3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (IT I = begin; I >= end; I += ST) {
+    ++I;
+  }
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop
+  for (IT I = begin; I >= end; ++I) {
+    ++I;
+  }
+
+#pragma omp parallel
+#pragma omp taskloop
+  for (IT I = begin; I < end; I += TC<int, ST>::step()) {
+    ++I;
+  }
+}
+
+void test_with_template() {
+  GoodIter begin, end;
+  TC<GoodIter, 100> t1;
+  TC<GoodIter, -100> t2;
+  t1.dotest_lt(begin, end);
+  t2.dotest_lt(begin, end);         // expected-note {{in instantiation of member function 'TC<GoodIter, -100>::dotest_lt' requested here}}
+  dotest_gt(begin, end);            // expected-note {{in instantiation of function template specialization 'dotest_gt<GoodIter, 0>' requested here}}
+  dotest_gt<unsigned, -10>(0, 100); // expected-note {{in instantiation of function template specialization 'dotest_gt<unsigned int, -10>' requested here}}
+}
+
+void test_loop_break() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp taskloop
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    for (int j = 0; j < 10; ++j) {
+      if (a[i] > b[j])
+        break; // OK in nested loop
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    if (c[i] > 10)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+
+    if (c[i] > 11)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+  }
+
+#pragma omp parallel
+#pragma omp taskloop
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
+      c[i] = a[i] + b[i];
+      if (c[i] > 10) {
+        if (c[i] < 20) {
+          break; // OK
+        }
+      }
+    }
+  }
+}
+
+void test_loop_eh() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp taskloop
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    try {
+      for (int j = 0; j < 10; ++j) {
+        if (a[i] > b[j])
+          throw a[i];
+      }
+      throw a[i];
+    } catch (float f) {
+      if (f > 0.1)
+        throw a[i];
+      return; // expected-error {{cannot return from OpenMP region}}
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    for (int j = 0; j < 10; j++) {
+      if (c[i] > 10)
+        throw c[i];
+    }
+  }
+  if (c[9] > 10)
+    throw c[9]; // OK
+
+#pragma omp parallel
+#pragma omp taskloop
+  for (int i = 0; i < 10; ++i) {
+    struct S {
+      void g() { throw 0; }
+    };
+  }
+}
+
+void test_loop_firstprivate_lastprivate() {
+  S s(4);
+#pragma omp parallel
+#pragma omp taskloop lastprivate(s) firstprivate(s)
+  for (int i = 0; i < 16; ++i)
+    ;
+}
+
diff --git a/test/OpenMP/taskloop_misc_messages.c b/test/OpenMP/taskloop_misc_messages.c
new file mode 100644
index 000000000000..f9e084e0edd1
--- /dev/null
+++ b/test/OpenMP/taskloop_misc_messages.c
@@ -0,0 +1,373 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s
+
+// expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop'}}
+#pragma omp taskloop
+
+// expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop'}}
+#pragma omp taskloop foo
+
+void test_no_clause() {
+  int i;
+#pragma omp taskloop
+  for (i = 0; i < 16; ++i)
+    ;
+
+// expected-error@+2 {{statement after '#pragma omp taskloop' must be a for loop}}
+#pragma omp taskloop
+  ++i;
+}
+
+void test_branch_protected_scope() {
+  int i = 0;
+L1:
+  ++i;
+
+  int x[24];
+
+#pragma omp parallel
+#pragma omp taskloop
+  for (i = 0; i < 16; ++i) {
+    if (i == 5)
+      goto L1; // expected-error {{use of undeclared label 'L1'}}
+    else if (i == 6)
+      return; // expected-error {{cannot return from OpenMP region}}
+    else if (i == 7)
+      goto L2;
+    else if (i == 8) {
+    L2:
+      x[i]++;
+    }
+  }
+
+  if (x[0] == 0)
+    goto L2; // expected-error {{use of undeclared label 'L2'}}
+  else if (x[1] == 1)
+    goto L1;
+}
+
+void test_invalid_clause() {
+  int i;
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+#pragma omp taskloop foo bar
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{directive '#pragma omp taskloop' cannot contain more than one 'nogroup' clause}}
+#pragma omp taskloop nogroup nogroup
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_non_identifiers() {
+  int i, x;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+#pragma omp taskloop;
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning@+3 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+// expected-error@+2 {{unexpected OpenMP clause 'linear' in directive '#pragma omp taskloop'}}
+#pragma omp parallel
+#pragma omp taskloop linear(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+#pragma omp taskloop private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+#pragma omp taskloop, private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+extern int foo();
+
+void test_collapse() {
+  int i;
+#pragma omp parallel
+// expected-error@+1 {{expected '('}}
+#pragma omp taskloop collapse
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop collapse(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop collapse()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop collapse(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}  expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop collapse(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+// expected-error@+1 {{expected '('}}
+#pragma omp taskloop collapse 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4,
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4, )
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+#pragma omp taskloop collapse(4)
+  for (int i1 = 0; i1 < 16; ++i1)
+    for (int i2 = 0; i2 < 16; ++i2)
+      for (int i3 = 0; i3 < 16; ++i3)
+        for (int i4 = 0; i4 < 16; ++i4)
+          foo();
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop collapse(4, 8)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop', but found only 1}}
+#pragma omp parallel
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp taskloop collapse(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp taskloop collapse(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop collapse(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop collapse(0)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop collapse(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_private() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected expression}}
+// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop private(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop private(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop private(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop private()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop private(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop private(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop private(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop private(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop private(x, y, z)
+  for (i = 0; i < 16; ++i) {
+    x = y * i + z;
+  }
+}
+
+void test_lastprivate() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop lastprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop lastprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop lastprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop lastprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop lastprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop lastprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_firstprivate() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop firstprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop firstprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop firstprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop firstprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop firstprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop firstprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x) firstprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x, y) firstprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop lastprivate(x, y, z) firstprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_loop_messages() {
+  float a[100], b[100], c[100];
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or pointer type}}
+#pragma omp taskloop
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or pointer type}}
+#pragma omp taskloop
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+
+  // expected-warning@+2 {{OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed}}
+  #pragma omp taskloop
+  for (__int128 ii = 0; ii < 10; ii++) {
+    c[ii] = a[ii] + b[ii];
+  }
+}
+
diff --git a/test/OpenMP/taskloop_num_tasks_messages.cpp b/test/OpenMP/taskloop_num_tasks_messages.cpp
new file mode 100644
index 000000000000..94d74eb41eb0
--- /dev/null
+++ b/test/OpenMP/taskloop_num_tasks_messages.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(0) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(0)  // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_priority_messages.cpp b/test/OpenMP/taskloop_priority_messages.cpp
new file mode 100644
index 000000000000..6c1584570eb9
--- /dev/null
+++ b/test/OpenMP/taskloop_priority_messages.cpp
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp taskloop' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_private_messages.cpp b/test/OpenMP/taskloop_private_messages.cpp
new file mode 100644
index 000000000000..3d00d3f252b3
--- /dev/null
+++ b/test/OpenMP/taskloop_private_messages.cpp
@@ -0,0 +1,195 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp taskloop private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(e, g)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop private(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop private(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+#pragma omp taskloop private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(B::x) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int i;
+#pragma omp taskloop private(i)
+    for (int k = 0; k < argc; ++k)
+      ++k;
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  static int si;
+#pragma omp taskloop private(si) // OK
+  for(int k = 0; k < argc; ++k)
+    si = k + 1;
+
+  return 0;
+}
+
diff --git a/test/OpenMP/taskloop_simd_aligned_messages.cpp b/test/OpenMP/taskloop_simd_aligned_messages.cpp
new file mode 100644
index 000000000000..b62af044c60f
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_aligned_messages.cpp
@@ -0,0 +1,202 @@
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s
+
+struct B {
+  static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
+  static constexpr int bfoo() { return 8; }
+};
+namespace X {
+  B x; // expected-note {{'x' defined here}}
+};
+constexpr int bfoo() { return 4; }
+
+int **z;
+const int C1 = 1;
+const int C2 = 2;
+void test_aligned_colons(int *&rp)
+{
+  int *B = 0;
+  #pragma omp taskloop simd aligned(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+  #pragma omp taskloop simd aligned(B::ib:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  #pragma omp taskloop simd aligned(z:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'int **'}}
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'B'}}
+  #pragma omp taskloop simd aligned(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'B'}}
+  #pragma omp taskloop simd aligned(B,rp,::z: X::x)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd aligned(::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{expected variable name}}
+  #pragma omp taskloop simd aligned(B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-warning@+1 {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  #pragma omp taskloop simd aligned(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+// expected-note@+1 {{'num' defined here}}
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L;
+  // Negative number is passed as L.
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd aligned(arr:L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp taskloop simd aligned(num:4)
+  for (i = 0; i < num; ++i);
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int *ind2 = 0;
+  // expected-error@+1 {{argument to 'aligned' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd aligned(ind2:LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return 0;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+extern S1 a; // expected-note {{'a' declared here}}
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 1 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h; // expected-note 2 {{'h' defined here}}
+#pragma omp threadprivate(h)
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(argc);
+  I g(argc);
+  int i; // expected-note {{declared here}} expected-note {{'i' defined here}}
+  // expected-note@+2 {{declared here}}
+  // expected-note@+1 {{reference to 'i' is not a constant expression}}
+  int &j = i;
+  #pragma omp taskloop simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned () // expected-error {{expected expression}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argc : 5) // expected-warning {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned(e, g)
+  for (I k = 0; k < argc; ++k) ++k;
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  #pragma omp taskloop simd aligned(h)
+  for (I k = 0; k < argc; ++k) ++k;
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp taskloop simd aligned(i)
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int *v = 0;
+    I i;
+    #pragma omp taskloop simd aligned(v:16)
+    for (I k = 0; k < argc; ++k) { i = k; v += 2; }
+  }
+  float *f;
+  #pragma omp taskloop simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+  int v = 0;
+  // expected-note@+2 {{initializer of 'j' is not a constant expression}}
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd aligned(f:j)
+  for (I k = 0; k < argc; ++k) { ++k; v += j; }
+  #pragma omp taskloop simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+// expected-note@+1 2 {{'argc' defined here}}
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  test_warn<4>(); // ok
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  int i;
+  int &j = i;
+  #pragma omp taskloop simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argv // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp taskloop simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp taskloop simd aligned (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S1'}}
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S2'}}
+  #pragma omp taskloop simd aligned (a, b) 
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  #pragma omp taskloop simd aligned(h)
+  for (int k = 0; k < argc; ++k) ++k;
+  int *pargc = &argc;
+  foomain<int*,char>(pargc,argv);
+  return 0;
+}
+
diff --git a/test/OpenMP/taskloop_simd_ast_print.cpp b/test/OpenMP/taskloop_simd_ast_print.cpp
new file mode 100644
index 000000000000..3e3b7e4bec2a
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_ast_print.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -verify -fopenmp -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo() {}
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, g;
+  T *ptr;
+  static T a;
+// CHECK: static T a;
+#pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N)
+  // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) safelen(N) linear(c) aligned(ptr) grainsize(N)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp taskloop simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) shared(g) if (c) final(d) mergeable priority(f) simdlen(N) nogroup num_tasks(N)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+            foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp taskloop simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) shared(g) if(c) final(d) mergeable priority(f) simdlen(N) nogroup num_tasks(N)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: foo();
+  return T();
+}
+
+int main(int argc, char **argv) {
+  int b = argc, c, d, e, f, g;
+  static int a;
+// CHECK: static int a;
+#pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b), aligned(argv) nogroup num_tasks(argc)
+  // CHECK-NEXT: #pragma omp taskloop simd if(taskloop: a) default(none) shared(a) final(b) priority(5) safelen(8) linear(b) aligned(argv) nogroup num_tasks(argc)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp taskloop simd private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(argc) mergeable priority(argc) simdlen(16) grainsize(argc)
+  for (int i = 0; i < 10; ++i)
+    for (int j = 0; j < 10; ++j)
+      foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp taskloop simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) simdlen(16) grainsize(argc)
+  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: foo();
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
+}
+
+#endif
diff --git a/test/OpenMP/taskloop_simd_collapse_messages.cpp b/test/OpenMP/taskloop_simd_collapse_messages.cpp
new file mode 100644
index 000000000000..d178c0834d62
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_collapse_messages.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp taskloop simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp taskloop simd collapse (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp taskloop simd', but found only 1}}
+  // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'collapse' clause}}
+  // expected-error@+2 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse (1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp taskloop simd'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd collapse () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+  #pragma omp taskloop simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}  expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+  #pragma omp taskloop simd collapse (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'collapse' clause}}
+  // expected-error@+1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp taskloop simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp taskloop simd collapse(collapse(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  #pragma omp taskloop simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp taskloop simd'}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+
diff --git a/test/OpenMP/taskloop_simd_final_messages.cpp b/test/OpenMP/taskloop_simd_final_messages.cpp
new file mode 100644
index 000000000000..c6580fbaa9b4
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_final_messages.cpp
@@ -0,0 +1,90 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+#pragma omp taskloop simd final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc > 0 ? argv[1] : argv[2])
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc)
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+#pragma omp taskloop simd final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc > 0 ? argv[1] : argv[2])
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp taskloop simd final(if (tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_simd_firstprivate_messages.cpp b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp
new file mode 100644
index 000000000000..83946695204b
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_firstprivate_messages.cpp
@@ -0,0 +1,313 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(const S2 &s2) : a(s2.a) {}
+  static float S2s;
+  static const float S2sc;
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3);
+
+public:
+  S3() : a(0) {} // expected-note 2 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+  S3(S3 &s3) : a(s3.a) {} // expected-note 2 {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+
+public:
+  S5() : a(0) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  C g(5);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(a, b) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop simd firstprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(i)
+#pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(ba) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(da) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(S2::S2s) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(S2::S2sc) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd safelen(5)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(m) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd private(xa), firstprivate(xa) // expected-error {{private variable cannot be firstprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i)    // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel shared(xa)
+#pragma omp taskloop simd firstprivate(xa) // OK: may be firstprivate
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel private(i)
+#pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i)
+#pragma omp taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp taskloop simd firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
+
diff --git a/test/OpenMP/taskloop_simd_grainsize_messages.cpp b/test/OpenMP/taskloop_simd_grainsize_messages.cpp
new file mode 100644
index 000000000000..45ccb31824fa
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_grainsize_messages.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop simd grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(0) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(0)  // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_simd_lastprivate_messages.cpp b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp
new file mode 100644
index 000000000000..ed1bdf5a6e3e
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_lastprivate_messages.cpp
@@ -0,0 +1,287 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  const S2 &operator =(const S2&) const;
+  S2 &operator =(const S2&);
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
+  static const float S2sc;
+};
+const float S2::S2sc = 0; // expected-note {{static data member is predetermined as shared}}
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S3() : a(0) {}
+  S3(S3 &s3) : a(s3.a) {}
+};
+const S3 c;         // expected-note {{global variable is predetermined as shared}}
+const S3 ca[5];     // expected-note {{global variable is predetermined as shared}}
+extern const int f; // expected-note {{global variable is predetermined as shared}}
+class S4 {
+  int a;
+  S4();             // expected-note 3 {{implicitly declared private here}}
+  S4(const S4 &s4);
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(const S5 &s5) : a(s5.a) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(a, b) // expected-error {{lastprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(e, g) // expected-error 2 {{calling a private constructor of class 'S4'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop simd lastprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop simd lastprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note {{constant variable is predetermined as shared}}
+  const int da[5] = {0}; // expected-note {{constant variable is predetermined as shared}}
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argc)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(a, b, c, d, f) // expected-error {{lastprivate variable with incomplete type 'S1'}} expected-error 3 {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(ba)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(ca) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(da) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(S2::S2sc) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd safelen(5)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(i) // expected-note {{defined as lastprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be lastprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel private(xa)
+#pragma omp taskloop simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel reduction(+ : xa)
+#pragma omp taskloop simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp taskloop simd lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
diff --git a/test/OpenMP/taskloop_simd_linear_messages.cpp b/test/OpenMP/taskloop_simd_linear_messages.cpp
new file mode 100644
index 000000000000..c9a82b9dc148
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_linear_messages.cpp
@@ -0,0 +1,249 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+namespace X {
+  int x;
+};
+
+struct B {
+  static int ib; // expected-note {{'B::ib' declared here}}
+  static int bfoo() { return 8; }
+};
+
+int bfoo() { return 4; }
+
+int z;
+const int C1 = 1;
+const int C2 = 2;
+void test_linear_colons()
+{
+  int B = 0;
+  #pragma omp taskloop simd linear(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+  #pragma omp taskloop simd linear(B::ib:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
+  #pragma omp taskloop simd linear(B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  #pragma omp taskloop simd linear(z:B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd linear(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd linear(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd linear(B,::z, X::x)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd linear(::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error@+1 {{expected variable name}}
+  #pragma omp taskloop simd linear(B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp taskloop simd linear(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
+  // expected-error@+1 {{argument of a linear clause should be of integral or pointer type}}
+#pragma omp taskloop simd linear(ind2:L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int ind2 = 0;
+  // expected-warning@+1 {{zero linear step (ind2 should probably be const)}}
+  #pragma omp taskloop simd linear(ind2:LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return ind2;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 2 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+  #pragma omp taskloop simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (uval( // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (ref() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (foo() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (val argc // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (val(argc, // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc : 5)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{linear variable with incomplete type 'S1'}}
+  // expected-error@+1 {{const-qualified variable cannot be linear}}
+  #pragma omp taskloop simd linear (val(a, b):B::ib)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(ref(e, g)) // expected-error 2 {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'ref'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(h) // expected-error {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(uval(i)) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int v = 0;
+    int i;
+    #pragma omp taskloop simd linear(v:i)
+    for (int k = 0; k < argc; ++k) { i = k; v += i; }
+  }
+  #pragma omp taskloop simd linear(ref(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(uval(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  int v = 0;
+  #pragma omp taskloop simd linear(v:j)
+  for (int k = 0; k < argc; ++k) { ++k; v += j; }
+  #pragma omp taskloop simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace C {
+using A::x;
+}
+
+void linear_modifiers(int argc) {
+  int &f = argc;
+  #pragma omp taskloop simd linear(f)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(val(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(uval(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(ref(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(foo(f)) // expected-error {{expected one of 'ref', val' or 'uval' modifiers}}
+  for (int k = 0; k < argc; ++k) ++k;
+}
+
+int f;
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note@+1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  // expected-note@+1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  S4 e(4); // expected-note {{'e' defined here}}
+  S5 g(5); // expected-note {{'g' defined here}}
+  int i;
+  int &j = i;
+  #pragma omp taskloop simd linear(f) linear(f) // expected-error {{linear variable cannot be linear}} expected-note {{defined as linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (ref()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (foo()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{linear variable with incomplete type 'S1'}}
+  // expected-error@+1 {{const-qualified variable cannot be linear}}
+  #pragma omp taskloop simd linear(a, b)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error@+2 {{argument of a linear clause should be of integral or pointer type, not 'S4'}}
+  // expected-error@+1 {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
+  #pragma omp taskloop simd linear(val(e, g))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int i;
+    #pragma omp taskloop simd linear(val(i))
+    for (int k = 0; k < argc; ++k) ++k;
+    #pragma omp taskloop simd linear(uval(i) : 4) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
+    for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+  }
+  #pragma omp taskloop simd linear(ref(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp taskloop simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+
+  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
+  return 0;
+}
+
diff --git a/test/OpenMP/taskloop_simd_loop_messages.cpp b/test/OpenMP/taskloop_simd_loop_messages.cpp
new file mode 100644
index 000000000000..47318721102f
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_loop_messages.cpp
@@ -0,0 +1,739 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify %s
+
+class S {
+  int a;
+  S() : a(0) {}
+
+public:
+  S(int v) : a(v) {}
+  S(const S &s) : a(s.a) {}
+};
+
+static int sii;
+// expected-note@+1 {{defined as threadprivate or thread local}}
+#pragma omp threadprivate(sii)
+static int globalii;
+
+// Currently, we cannot use "0" for global register variables.
+// register int reg0 __asm__("0");
+int reg0;
+
+int test_iteration_spaces() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+  int ii, jj, kk;
+  float fii;
+  double dii;
+  register int reg; // expected-warning {{'register' storage class specifier is deprecated}}
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; i += 1) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (char i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (char i = 0; i < 10; i += '\1') {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (long long i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'double'}}
+#pragma omp taskloop simd
+  for (long long i = 0; i < 10; i += 1.5) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (long long i = 0; i < 'z'; i += 1u) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or random access iterator type}}
+#pragma omp taskloop simd
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or random access iterator type}}
+#pragma omp taskloop simd
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (int &ref = ii; ref < 10; ref++) {
+  }
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (int i; i < 10; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (int i = 0, j = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{expression result unused}}
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (ii + 1; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (c[ii] = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok to skip parenthesises.
+#pragma omp taskloop simd
+  for (((ii)) = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop simd
+  for (int i = 0; i; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+#pragma omp taskloop simd
+  for (int i = 0; jj < kk; ii++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop simd
+  for (int i = 0; !!i; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop simd
+  for (int i = 0; i != 1; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp taskloop simd
+  for (int i = 0;; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop simd
+  for (int i = 11; i > 10; i--)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ++jj)
+    c[ii] = a[jj];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ++++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok but undefined behavior (in general, cannot check that incr
+// is really loop-invariant).
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii + ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{expression must have integral or unscoped enumeration type, not 'float'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii + 1.0f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok - step was converted to integer type.
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii + (int)1.1f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; jj = ii + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{relational comparison result unused}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii<10; jj> kk + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10;)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning@+3 {{expression result unused}}
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; !ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii ? ++ii : ++jj)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii < 10)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii + 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; ii = ii + (int)(0.8 - 0.45))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; (ii) < 10; ii -= 25)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; (ii < 10); ii -= 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; ii > 10; (ii += 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; ii < 10; (ii) = (1 - 1) + (ii))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for ((ii = 0); ii > 10; (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (ii = 0; (ii < 10); (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note@+2  {{defined as firstprivate}}
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be firstprivate, predetermined as linear}}
+#pragma omp taskloop simd firstprivate(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+#pragma omp taskloop simd linear(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+// expected-note@+3 {{defined as private}}
+// expected-error@+3 {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be private, predetermined as linear}}
+#pragma omp parallel
+#pragma omp taskloop simd private(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+// expected-note@+3 {{defined as lastprivate}}
+// expected-error@+3 {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be lastprivate, predetermined as linear}}
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+  {
+// expected-error@+2 {{loop iteration variable in the associated loop of 'omp taskloop simd' directive may not be threadprivate or thread local, predetermined as linear}}
+#pragma omp taskloop simd
+    for (sii = 0; sii < 10; sii += 1)
+      c[sii] = a[sii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop simd
+    for (reg0 = 0; reg0 < 10; reg0 += 1)
+      c[reg0] = a[reg0];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop simd
+    for (reg = 0; reg < 10; reg += 1)
+      c[reg] = a[reg];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop simd
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] = a[globalii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp taskloop simd collapse(2)
+    for (ii = 0; ii < 10; ii += 1)
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] += a[globalii] + ii;
+  }
+
+#pragma omp parallel
+// expected-error@+2 {{statement after '#pragma omp taskloop simd' must be a for loop}}
+#pragma omp taskloop simd
+  for (auto &item : a) {
+    item = item + 1;
+  }
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'i' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (unsigned i = 9; i < 10; i--) {
+    c[i] = a[i] + b[i];
+  }
+
+  int(*lb)[4] = nullptr;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int(*p)[4] = lb; p < lb + 8; ++p) {
+  }
+
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (int a{0}; a < 10; ++a) {
+  }
+
+  return 0;
+}
+
+// Iterators allowed in openmp for-loops.
+namespace std {
+struct random_access_iterator_tag {};
+template <class Iter>
+struct iterator_traits {
+  typedef typename Iter::difference_type difference_type;
+  typedef typename Iter::iterator_category iterator_category;
+};
+template <class Iter>
+typename iterator_traits<Iter>::difference_type
+distance(Iter first, Iter last) { return first - last; }
+}
+class Iter0 {
+public:
+  Iter0() {}
+  Iter0(const Iter0 &) {}
+  Iter0 operator++() { return *this; }
+  Iter0 operator--() { return *this; }
+  bool operator<(Iter0 a) { return true; }
+};
+// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'Iter0' for 1st argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'Iter0' for 1st argument}}
+int operator-(Iter0 a, Iter0 b) { return 0; }
+class Iter1 {
+public:
+  Iter1(float f = 0.0f, double d = 0.0) {}
+  Iter1(const Iter1 &) {}
+  Iter1 operator++() { return *this; }
+  Iter1 operator--() { return *this; }
+  bool operator<(Iter1 a) { return true; }
+  bool operator>=(Iter1 a) { return false; }
+};
+class GoodIter {
+public:
+  GoodIter() {}
+  GoodIter(const GoodIter &) {}
+  GoodIter(int fst, int snd) {}
+  GoodIter &operator=(const GoodIter &that) { return *this; }
+  GoodIter &operator=(const Iter0 &that) { return *this; }
+  GoodIter &operator+=(int x) { return *this; }
+  GoodIter &operator-=(int x) { return *this; }
+  explicit GoodIter(void *) {}
+  GoodIter operator++() { return *this; }
+  GoodIter operator--() { return *this; }
+  bool operator!() { return true; }
+  bool operator<(GoodIter a) { return true; }
+  bool operator<=(GoodIter a) { return true; }
+  bool operator>=(GoodIter a) { return false; }
+  typedef int difference_type;
+  typedef std::random_access_iterator_tag iterator_category;
+};
+// expected-note@+2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 2nd argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+int operator-(GoodIter a, GoodIter b) { return 0; }
+// expected-note@+1 3 {{candidate function not viable: requires single argument 'a', but 2 arguments were provided}}
+GoodIter operator-(GoodIter a) { return a; }
+// expected-note@+2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 2nd argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+GoodIter operator-(GoodIter a, int v) { return GoodIter(); }
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 1st argument}}
+GoodIter operator+(GoodIter a, int v) { return GoodIter(); }
+// expected-note@+2 {{candidate function not viable: no known conversion from 'GoodIter' to 'int' for 1st argument}}
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'int' for 1st argument}}
+GoodIter operator-(int v, GoodIter a) { return GoodIter(); }
+// expected-note@+1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 1st argument}}
+GoodIter operator+(int v, GoodIter a) { return GoodIter(); }
+
+int test_with_random_access_iterator() {
+  GoodIter begin, end;
+  Iter0 begin0, end0;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (GoodIter &I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; --I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (GoodIter I(begin); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (GoodIter I(nullptr); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (GoodIter I(0); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (GoodIter I(1, 2); I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (begin = GoodIter(0); begin < end; ++begin)
+    ++begin;
+// expected-error@+4 {{invalid operands to binary expression ('GoodIter' and 'Iter0')}}
+// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (begin = begin0; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// expected-error@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (++begin; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (begin = end; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I - I; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; begin < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; !I; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; I = I + 1)
+    ++I;
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; I = I - 1)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; I = -I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; I = 2 + I)
+    ++I;
+#pragma omp parallel
+// expected-error@+2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp taskloop simd
+  for (GoodIter I = begin; I >= end; I = 2 - I)
+    ++I;
+// In the following example, we cannot update the loop variable using '+='
+// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (Iter0 I = begin0; I < end0; ++I)
+    ++I;
+#pragma omp parallel
+// Initializer is constructor without params.
+// expected-error@+3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (Iter0 I; I < end0; ++I)
+    ++I;
+  Iter1 begin1, end1;
+// expected-error@+4 {{invalid operands to binary expression ('Iter1' and 'Iter1')}}
+// expected-error@+3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (Iter1 I = begin1; I < end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (Iter1 I = begin1; I >= end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error@+5 {{invalid operands to binary expression ('Iter1' and 'float')}}
+// expected-error@+4 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+// Initializer is constructor with all default params.
+// expected-warning@+2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp taskloop simd
+  for (Iter1 I; I < end1; ++I) {
+  }
+  return 0;
+}
+
+template <typename IT, int ST>
+class TC {
+public:
+  int dotest_lt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+    for (IT I = begin; I < end; I = I + ST) {
+      ++I;
+    }
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be positive due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+    for (IT I = begin; I <= end; I += ST) {
+      ++I;
+    }
+#pragma omp parallel
+#pragma omp taskloop simd
+    for (IT I = begin; I < end; ++I) {
+      ++I;
+    }
+  }
+
+  static IT step() {
+    return IT(ST);
+  }
+};
+template <typename IT, int ST = 0>
+int dotest_gt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note@+3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (IT I = begin; I >= end; I = I + ST) {
+    ++I;
+  }
+#pragma omp parallel
+// expected-note@+3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (IT I = begin; I >= end; I += ST) {
+    ++I;
+  }
+
+#pragma omp parallel
+// expected-note@+3 {{loop step is expected to be negative due to this condition}}
+// expected-error@+2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp taskloop simd
+  for (IT I = begin; I >= end; ++I) {
+    ++I;
+  }
+
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (IT I = begin; I < end; I += TC<int, ST>::step()) {
+    ++I;
+  }
+}
+
+void test_with_template() {
+  GoodIter begin, end;
+  TC<GoodIter, 100> t1;
+  TC<GoodIter, -100> t2;
+  t1.dotest_lt(begin, end);
+  t2.dotest_lt(begin, end);         // expected-note {{in instantiation of member function 'TC<GoodIter, -100>::dotest_lt' requested here}}
+  dotest_gt(begin, end);            // expected-note {{in instantiation of function template specialization 'dotest_gt<GoodIter, 0>' requested here}}
+  dotest_gt<unsigned, -10>(0, 100); // expected-note {{in instantiation of function template specialization 'dotest_gt<unsigned int, -10>' requested here}}
+}
+
+void test_loop_break() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    for (int j = 0; j < 10; ++j) {
+      if (a[i] > b[j])
+        break; // OK in nested loop
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    if (c[i] > 10)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+
+    if (c[i] > 11)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+  }
+
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
+      c[i] = a[i] + b[i];
+      if (c[i] > 10) {
+        if (c[i] < 20) {
+          break; // OK
+        }
+      }
+    }
+  }
+}
+
+void test_loop_eh() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    try { // expected-error {{'try' statement cannot be used in OpenMP simd region}}
+      for (int j = 0; j < 10; ++j) {
+        if (a[i] > b[j])
+          throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+      }
+      throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+    } catch (float f) {
+      if (f > 0.1)
+        throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+      return; // expected-error {{cannot return from OpenMP region}}
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    for (int j = 0; j < 10; j++) {
+      if (c[i] > 10)
+        throw c[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+    }
+  }
+  if (c[9] > 10)
+    throw c[9]; // OK
+
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (int i = 0; i < 10; ++i) {
+    struct S {
+      void g() { throw 0; }
+    };
+  }
+}
+
+void test_loop_firstprivate_lastprivate() {
+  S s(4);
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(s) firstprivate(s)
+  for (int i = 0; i < 16; ++i)
+    ;
+}
+
diff --git a/test/OpenMP/taskloop_simd_misc_messages.c b/test/OpenMP/taskloop_simd_misc_messages.c
new file mode 100644
index 000000000000..61c7b107d496
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_misc_messages.c
@@ -0,0 +1,372 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s
+
+// expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop simd'}}
+#pragma omp taskloop simd
+
+// expected-error@+1 {{unexpected OpenMP directive '#pragma omp taskloop simd'}}
+#pragma omp taskloop simd foo
+
+void test_no_clause() {
+  int i;
+#pragma omp taskloop simd
+  for (i = 0; i < 16; ++i)
+    ;
+
+// expected-error@+2 {{statement after '#pragma omp taskloop simd' must be a for loop}}
+#pragma omp taskloop simd
+  ++i;
+}
+
+void test_branch_protected_scope() {
+  int i = 0;
+L1:
+  ++i;
+
+  int x[24];
+
+#pragma omp parallel
+#pragma omp taskloop simd
+  for (i = 0; i < 16; ++i) {
+    if (i == 5)
+      goto L1; // expected-error {{use of undeclared label 'L1'}}
+    else if (i == 6)
+      return; // expected-error {{cannot return from OpenMP region}}
+    else if (i == 7)
+      goto L2;
+    else if (i == 8) {
+    L2:
+      x[i]++;
+    }
+  }
+
+  if (x[0] == 0)
+    goto L2; // expected-error {{use of undeclared label 'L2'}}
+  else if (x[1] == 1)
+    goto L1;
+}
+
+void test_invalid_clause() {
+  int i;
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+#pragma omp taskloop simd foo bar
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error@+1 {{directive '#pragma omp taskloop simd' cannot contain more than one 'nogroup' clause}}
+#pragma omp taskloop simd nogroup nogroup
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_non_identifiers() {
+  int i, x;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+#pragma omp taskloop simd;
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+#pragma omp parallel
+#pragma omp taskloop simd linear(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+#pragma omp taskloop simd private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning@+1 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+#pragma omp taskloop simd, private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+extern int foo();
+
+void test_collapse() {
+  int i;
+#pragma omp parallel
+// expected-error@+1 {{expected '('}}
+#pragma omp taskloop simd collapse
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop simd collapse(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd collapse()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}} expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop simd collapse(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}  expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop simd collapse(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-warning@+2 {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+// expected-error@+1 {{expected '('}}
+#pragma omp taskloop simd collapse 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4,
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4, )
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+#pragma omp taskloop simd collapse(4)
+  for (int i1 = 0; i1 < 16; ++i1)
+    for (int i2 = 0; i2 < 16; ++i2)
+      for (int i3 = 0; i3 < 16; ++i3)
+        for (int i4 = 0; i4 < 16; ++i4)
+          foo();
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}}
+// expected-note@+1 {{to match this '('}} expected-note@+1 {{as specified in 'collapse' clause}}
+#pragma omp taskloop simd collapse(4, 8)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp taskloop simd collapse(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expression is not an integer constant expression}}
+#pragma omp taskloop simd collapse(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop simd collapse(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop simd collapse(0)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp taskloop simd collapse(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_private() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected expression}}
+// expected-error@+1 {{expected ')'}} expected-note@+1 {{to match this '('}}
+#pragma omp taskloop simd private(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd private(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd private(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd private()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd private(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop simd private(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop simd private(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd private(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd private(x, y, z)
+  for (i = 0; i < 16; ++i) {
+    x = y * i + z;
+  }
+}
+
+void test_lastprivate() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd lastprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd lastprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd lastprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd lastprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd lastprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop simd lastprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_firstprivate() {
+  int i;
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd firstprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error@+2 {{expected ')'}} expected-note@+2 {{to match this '('}}
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd firstprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 2 {{expected expression}}
+#pragma omp taskloop simd firstprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd firstprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected expression}}
+#pragma omp taskloop simd firstprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error@+1 {{expected variable name}}
+#pragma omp taskloop simd firstprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x) firstprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x, y) firstprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp taskloop simd lastprivate(x, y, z) firstprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_loop_messages() {
+  float a[100], b[100], c[100];
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or pointer type}}
+#pragma omp taskloop simd
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error@+2 {{variable must be of integer or pointer type}}
+#pragma omp taskloop simd
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+
+  // expected-warning@+2 {{OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed}}
+  #pragma omp taskloop simd
+  for (__int128 ii = 0; ii < 10; ii++) {
+    c[ii] = a[ii] + b[ii];
+  }
+}
+
diff --git a/test/OpenMP/taskloop_simd_num_tasks_messages.cpp b/test/OpenMP/taskloop_simd_num_tasks_messages.cpp
new file mode 100644
index 000000000000..f26ee79a8740
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_num_tasks_messages.cpp
@@ -0,0 +1,99 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop simd num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(0) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(0)  // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_simd_priority_messages.cpp b/test/OpenMP/taskloop_simd_priority_messages.cpp
new file mode 100644
index 000000000000..a94798f821a6
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_priority_messages.cpp
@@ -0,0 +1,93 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  #pragma omp taskloop simd priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc > 0 ? argv[1][0] : argv[2][argc])
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp taskloop simd' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp taskloop simd priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}
diff --git a/test/OpenMP/taskloop_simd_private_messages.cpp b/test/OpenMP/taskloop_simd_private_messages.cpp
new file mode 100644
index 000000000000..4a9b08a7e9e2
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_private_messages.cpp
@@ -0,0 +1,195 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i;
+  int &j = i;
+#pragma omp taskloop simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(e, g)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp taskloop simd private(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp taskloop simd private(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  S4 e(4);
+  S5 g(5);
+  int i;
+  int &j = i;
+#pragma omp taskloop simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(B::x) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int i;
+#pragma omp taskloop simd private(i)
+    for (int k = 0; k < argc; ++k)
+      ++k;
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp taskloop simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp taskloop simd private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  static int si;
+#pragma omp taskloop simd private(si) // OK
+  for(int k = 0; k < argc; ++k)
+    si = k + 1;
+
+  return 0;
+}
+
diff --git a/test/OpenMP/taskloop_simd_safelen_messages.cpp b/test/OpenMP/taskloop_simd_safelen_messages.cpp
new file mode 100644
index 000000000000..3182c8af80fa
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_safelen_messages.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp taskloop simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp taskloop simd safelen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd safelen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'safelen' clause}}
+  // expected-error@+2 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'safelen' clause}}
+  // expected-error@+1 2 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp taskloop simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp taskloop simd safelen(safelen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
diff --git a/test/OpenMP/taskloop_simd_simdlen_messages.cpp b/test/OpenMP/taskloop_simd_simdlen_messages.cpp
new file mode 100644
index 000000000000..ba3f20e23cfe
--- /dev/null
+++ b/test/OpenMP/taskloop_simd_simdlen_messages.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -verify -fopenmp %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 {{expected ')'}} expected-note@+3 {{to match this '('}}
+  // expected-error@+2 2 {{expression is not an integral constant expression}}
+  // expected-note@+1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp taskloop simd simdlen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+3 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+2 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error@+1 2 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp taskloop simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp taskloop simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen (foobool(1) > 0 ? 1 : 2) // expected-error {{expression is not an integral constant expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{expression is not an integral constant expression}}
+  // expected-error@+2 2 {{directive '#pragma omp taskloop simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error@+1 2 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp taskloop simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+1 {{expression is not an integral constant expression}}
+  #pragma omp taskloop simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error@+3 {{statement after '#pragma omp taskloop simd' must be a for loop}}
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp taskloop simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note@+1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+
diff --git a/test/OpenMP/teams_ast_print.cpp b/test/OpenMP/teams_ast_print.cpp
index 6cea914c8626..292586ae530d 100644
--- a/test/OpenMP/teams_ast_print.cpp
+++ b/test/OpenMP/teams_ast_print.cpp
@@ -37,7 +37,7 @@ T tmain(T argc, T *argv) {
 #pragma omp teams
   a=2;
 #pragma omp target
-#pragma omp teams default(none), private(argc,b) firstprivate(argv) shared (d) reduction(+:c) reduction(max:e)
+#pragma omp teams default(none), private(argc,b) firstprivate(argv) shared (d) reduction(+:c) reduction(max:e) num_teams(C) thread_limit(d*C)
   foo();
 #pragma omp target
 #pragma omp teams reduction(^:e, f) reduction(&& : g)
@@ -53,7 +53,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: #pragma omp teams
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp target
-// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(5) thread_limit(d * 5)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: #pragma omp teams reduction(^: e,f) reduction(&&: g)
@@ -66,7 +66,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: #pragma omp teams
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp target
-// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(1) thread_limit(d * 1)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: #pragma omp teams reduction(^: e,f) reduction(&&: g)
@@ -79,7 +79,7 @@ T tmain(T argc, T *argv) {
 // CHECK-NEXT: #pragma omp teams
 // CHECK-NEXT: a = 2;
 // CHECK-NEXT: #pragma omp target
-// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e)
+// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) shared(d) reduction(+: c) reduction(max: e) num_teams(C) thread_limit(d * C)
 // CHECK-NEXT: foo()
 // CHECK-NEXT: #pragma omp target
 // CHECK-NEXT: #pragma omp teams reduction(^: e,f) reduction(&&: g)
@@ -101,9 +101,9 @@ int main (int argc, char **argv) {
   a=2;
 // CHECK-NEXT: a = 2;
 #pragma omp target
-#pragma omp teams default(none), private(argc,b) firstprivate(argv) reduction(| : c, d) reduction(* : e)
+#pragma omp teams default(none), private(argc,b) num_teams(f) firstprivate(argv) reduction(| : c, d) reduction(* : e) thread_limit(f+g)
 // CHECK-NEXT: #pragma omp target
-// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) firstprivate(argv) reduction(|: c,d) reduction(*: e)
+// CHECK-NEXT: #pragma omp teams default(none) private(argc,b) num_teams(f) firstprivate(argv) reduction(|: c,d) reduction(*: e) thread_limit(f + g)
   foo();
 // CHECK-NEXT: foo();
   return tmain<int, 5>(b, &b) + tmain<long, 1>(x, &x);
diff --git a/test/OpenMP/teams_firstprivate_messages.cpp b/test/OpenMP/teams_firstprivate_messages.cpp
index ac5197749bb4..ba12fde86b5c 100644
--- a/test/OpenMP/teams_firstprivate_messages.cpp
+++ b/test/OpenMP/teams_firstprivate_messages.cpp
@@ -63,7 +63,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
 #pragma omp target
 #pragma omp teams firstprivate // expected-error {{expected '(' after 'firstprivate'}}
   foo();
@@ -125,7 +125,11 @@ int main(int argc, char **argv) {
 #pragma omp teams firstprivate(i)
   foo();
 #pragma omp target
-#pragma omp teams firstprivate(j) // expected-error {{arguments of OpenMP clause 'firstprivate' cannot be of reference type}}
+#pragma omp teams firstprivate(j)
+  foo();
+  static int m;
+#pragma omp target
+#pragma omp teams firstprivate(m) // OK
   foo();
 
   return 0;
diff --git a/test/OpenMP/teams_num_teams_messages.cpp b/test/OpenMP/teams_num_teams_messages.cpp
new file mode 100644
index 000000000000..01871127bda5
--- /dev/null
+++ b/test/OpenMP/teams_num_teams_messages.cpp
@@ -0,0 +1,111 @@
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+
+template <typename T, int C> // expected-note {{declared here}}
+T tmain(T argc) {
+  char **a;
+#pragma omp target
+#pragma omp teams num_teams(C)
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(T) // expected-error {{'T' does not refer to a value}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams // expected-error {{expected '(' after 'num_teams'}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams() // expected-error {{expected expression}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(argc)) // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(argc > 0 ? a[1] : a[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(argc + argc)
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(argc), num_teams (argc+1) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'num_teams' clause}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(-2) // expected-error {{argument to 'num_teams' clause must be a strictly positive integer value}}
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(-10u)
+  foo();
+#pragma omp target
+#pragma omp teams num_teams(3.14) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams num_teams // expected-error {{expected '(' after 'num_teams'}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams () // expected-error {{expected expression}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (argc)) // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (argc > 0 ? argv[1] : argv[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (argc + argc)
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (argc), num_teams (argc+1) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'num_teams' clause}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (-2) // expected-error {{argument to 'num_teams' clause must be a strictly positive integer value}}
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (-10u)
+  foo();
+
+#pragma omp target
+#pragma omp teams num_teams (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return tmain<int, 10>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 10>' requested here}}
+}
diff --git a/test/OpenMP/teams_private_messages.cpp b/test/OpenMP/teams_private_messages.cpp
index f50060ff29b3..344ef8daf996 100644
--- a/test/OpenMP/teams_private_messages.cpp
+++ b/test/OpenMP/teams_private_messages.cpp
@@ -13,7 +13,7 @@ class S2 {
   mutable int a;
 public:
   S2():a(0) { }
-  static float S2s;
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
 };
 const S2 b;
 const S2 ba[5];
@@ -55,7 +55,7 @@ int main(int argc, char **argv) {
   S4 e(4);
   S5 g(5);
   int i;
-  int &j = i; // expected-note {{'j' defined here}}
+  int &j = i;
   #pragma omp target
   #pragma omp teams private // expected-error {{expected '(' after 'private'}}
   foo();
@@ -96,7 +96,7 @@ int main(int argc, char **argv) {
   #pragma omp teams private(da) // expected-error {{shared variable cannot be private}}
   foo();
   #pragma omp target
-  #pragma omp teams private(S2::S2s)
+  #pragma omp teams private(S2::S2s) // expected-error {{shared variable cannot be private}}
   foo();
   #pragma omp target
   #pragma omp teams private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
@@ -114,7 +114,7 @@ int main(int argc, char **argv) {
   #pragma omp teams private(i)
   foo();
   #pragma omp target
-  #pragma omp teams private(j) // expected-error {{arguments of OpenMP clause 'private' cannot be of reference type 'int &'}}
+  #pragma omp teams private(j)
   foo();
   #pragma omp target
   #pragma omp teams firstprivate(i)
@@ -122,6 +122,10 @@ int main(int argc, char **argv) {
     #pragma omp parallel private(i)
     foo();
   }
+  static int m;
+  #pragma omp target
+  #pragma omp teams private(m) // OK
+  foo();
 
   return 0;
 }
diff --git a/test/OpenMP/teams_reduction_messages.cpp b/test/OpenMP/teams_reduction_messages.cpp
index 93c4b141591f..87d03485c17b 100644
--- a/test/OpenMP/teams_reduction_messages.cpp
+++ b/test/OpenMP/teams_reduction_messages.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -verify -fopenmp -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -o - %s
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -o - %s
 
 void foo() {
 }
@@ -16,7 +18,7 @@ class S2 {
 public:
   S2() : a(0) {}
   S2(S2 &s2) : a(s2.a) {}
-  static float S2s;
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
   static const float S2sc;
 };
 const float S2::S2sc = 0; // expected-note 2 {{'S2sc' defined here}}
@@ -26,6 +28,7 @@ class S3 {
   int a;
 
 public:
+  int b;
   S3() : a(0) {}
   S3(const S3 &s3) : a(s3.a) {}
   S3 operator+(const S3 &arg1) { return arg1; }
@@ -54,6 +57,9 @@ public:
   S5(int v) : a(v) {}
 };
 class S6 { // expected-note 2 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   int a;
 
 public:
@@ -103,7 +109,7 @@ T tmain(T argc) {
 #pragma omp teams reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name}}
+#pragma omp teams reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
@@ -115,31 +121,31 @@ T tmain(T argc) {
 #pragma omp teams reduction(^ : T) // expected-error {{'T' does not refer to a value}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
+#pragma omp teams reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified list item cannot be reduction}} expected-error 3 {{'operator+' is a private member of 'S2'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(max : qa[1]) // expected-error 2 {{expected variable name}}
+#pragma omp teams reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp teams reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp teams reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}} expected-error {{a reduction variable with array type 'const float [5]'}}
+#pragma omp teams reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}} expected-error {{a reduction list item with array type 'const float [5]'}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(&& : S2::S2s)
+#pragma omp teams reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
@@ -158,7 +164,7 @@ T tmain(T argc) {
 #pragma omp teams reduction(+ : p), reduction(+ : p) // expected-error 3 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 3 {{previously referenced here}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(+ : r) // expected-error 2 {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel shared(i)
 #pragma omp parallel reduction(min : i)
@@ -231,7 +237,7 @@ int main(int argc, char **argv) {
 #pragma omp teams reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+#pragma omp teams reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(~ : argc) // expected-error {{expected unqualified-id}}
@@ -243,31 +249,31 @@ int main(int argc, char **argv) {
 #pragma omp teams reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+#pragma omp teams reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified list item cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(min : a, b, c, d, f) // expected-error {{reduction variable with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(max : argv[1]) // expected-error {{expected variable name}}
+#pragma omp teams reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : ba) // expected-error {{a reduction variable with array type 'const S2 [5]'}}
+#pragma omp teams reduction(+ : ba) // expected-error {{a reduction list item with array type 'const S2 [5]'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(* : ca) // expected-error {{a reduction variable with array type 'const S3 [5]'}}
+#pragma omp teams reduction(* : ca) // expected-error {{a reduction list item with array type 'const S3 [5]'}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(- : da) // expected-error {{a reduction variable with array type 'const int [5]'}}
+#pragma omp teams reduction(- : da) // expected-error {{a reduction list item with array type 'const int [5]'}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(&& : S2::S2s)
+#pragma omp teams reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(&& : S2::S2sc) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp target
 #pragma omp teams reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
@@ -289,7 +295,7 @@ int main(int argc, char **argv) {
 #pragma omp teams reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
   foo();
 #pragma omp target
-#pragma omp teams reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+#pragma omp teams reduction(+ : r) // expected-error {{const-qualified list item cannot be reduction}}
   foo();
 #pragma omp parallel shared(i)
 #pragma omp parallel reduction(min : i)
@@ -310,6 +316,10 @@ int main(int argc, char **argv) {
 #pragma omp target
 #pragma omp teams reduction(+ : fl)
     foo();
+  static int m;
+#pragma omp target
+#pragma omp teams reduction(+ : m) // OK
+  foo();
 
   return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
 }
diff --git a/test/OpenMP/teams_thread_limit_messages.cpp b/test/OpenMP/teams_thread_limit_messages.cpp
new file mode 100644
index 000000000000..1cb147c60781
--- /dev/null
+++ b/test/OpenMP/teams_thread_limit_messages.cpp
@@ -0,0 +1,111 @@
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 100 -o - %s
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+
+template <typename T, int C> // expected-note {{declared here}}
+T tmain(T argc) {
+  char **a;
+#pragma omp target
+#pragma omp teams thread_limit(C)
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(T) // expected-error {{'T' does not refer to a value}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit // expected-error {{expected '(' after 'thread_limit'}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit() // expected-error {{expected expression}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(argc)) // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(argc > 0 ? a[1] : a[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(argc + argc)
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(argc), thread_limit (argc+1) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'thread_limit' clause}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(-2) // expected-error {{argument to 'thread_limit' clause must be a strictly positive integer value}}
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(-10u)
+  foo();
+#pragma omp target
+#pragma omp teams thread_limit(3.14) // expected-error 2 {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams thread_limit // expected-error {{expected '(' after 'thread_limit'}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit () // expected-error {{expected expression}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (argc)) // expected-warning {{extra tokens at the end of '#pragma omp teams' are ignored}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (argc > 0 ? argv[1] : argv[2]) // expected-error {{expression must have integral or unscoped enumeration type, not 'char *'}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (argc + argc)
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (argc), thread_limit (argc+1) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'thread_limit' clause}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (S1) // expected-error {{'S1' does not refer to a value}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (-2) // expected-error {{argument to 'thread_limit' clause must be a strictly positive integer value}}
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (-10u)
+  foo();
+
+#pragma omp target
+#pragma omp teams thread_limit (3.14) // expected-error {{expression must have integral or unscoped enumeration type, not 'double'}}
+  foo();
+
+  return tmain<int, 10>(argc); // expected-note {{in instantiation of function template specialization 'tmain<int, 10>' requested here}}
+}
diff --git a/test/OpenMP/threadprivate_codegen.cpp b/test/OpenMP/threadprivate_codegen.cpp
index 7176e0499273..53d7ef708385 100644
--- a/test/OpenMP/threadprivate_codegen.cpp
+++ b/test/OpenMP/threadprivate_codegen.cpp
@@ -1,10 +1,10 @@
 // RUN: %clang_cc1 -verify -fopenmp -fnoopenmp-use-tls -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
 // RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
+// RUN: %clang_cc1 -fopenmp -fnoopenmp-use-tls -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s
 
 // RUN: %clang_cc1 -verify -fopenmp -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK-TLS
 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-TLS %s
+// RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-TLS %s
 
 // expected-no-diagnostics
 // REQUIRES: x86-registered-target
@@ -177,11 +177,11 @@ struct S5 {
 // CHECK-TLS-DAG:  [[ST_S4_ST_GUARD:@_ZGVN2STI2S4E2stE]] = linkonce_odr thread_local global i64 0
 // CHECK-TLS-DAG:  @__tls_guard = internal thread_local global i8 0
 // CHECK-TLS-DAG:  @__dso_handle = external global i8
-// CHECK-TLS-DAG:  [[GS1_TLS_INIT:@_ZTHL3gs1]] = internal alias void ()* @__tls_init
-// CHECK-TLS-DAG:  [[ARR_X_TLS_INIT:@_ZTH5arr_x]] = alias void ()* @__tls_init
-// CHECK-TLS-DAG:  [[ST_INT_ST_TLS_INIT:@_ZTHN2STIiE2stE]] = linkonce_odr alias void ()* @__tls_init
-// CHECK-TLS-DAG:  [[ST_FLOAT_ST_TLS_INIT:@_ZTHN2STIfE2stE]] = linkonce_odr alias void ()* @__tls_init
-// CHECK-TLS-DAG:  [[ST_S4_ST_TLS_INIT:@_ZTHN2STI2S4E2stE]] = linkonce_odr alias void ()* @__tls_init
+// CHECK-TLS-DAG:  [[GS1_TLS_INIT:@_ZTHL3gs1]] = internal alias void (), void ()* @__tls_init
+// CHECK-TLS-DAG:  [[ARR_X_TLS_INIT:@_ZTH5arr_x]] = alias void (), void ()* @__tls_init
+// CHECK-TLS-DAG:  [[ST_INT_ST_TLS_INIT:@_ZTHN2STIiE2stE]] = linkonce_odr alias void (), void ()* @__tls_init
+// CHECK-TLS-DAG:  [[ST_FLOAT_ST_TLS_INIT:@_ZTHN2STIfE2stE]] = linkonce_odr alias void (), void ()* @__tls_init
+// CHECK-TLS-DAG:  [[ST_S4_ST_TLS_INIT:@_ZTHN2STI2S4E2stE]] = linkonce_odr alias void (), void ()* @__tls_init
 
 struct Static {
   static S3 s;
@@ -939,11 +939,11 @@ int foobar() {
 // CHECK-TLS:      define internal void @__tls_init()
 // CHECK-TLS:      [[GRD:%.*]] = load i8, i8* @__tls_guard
 // CHECK-TLS-NEXT: [[IS_INIT:%.*]] = icmp eq i8 [[GRD]], 0
-// CHECK-TLS-NEXT: store i8 1, i8* @__tls_guard
 // CHECK-TLS-NEXT: br i1 [[IS_INIT]], label %[[INIT_LABEL:[^,]+]], label %[[DONE_LABEL:[^,]+]]{{.*}}
 // CHECK-TLS:      [[INIT_LABEL]]
+// CHECK-TLS-NEXT: store i8 1, i8* @__tls_guard
 // CHECK-TLS:      call void [[GS1_CXX_INIT]]
-// CHECK-TLS-NOT   call void [[GS2_CXX_INIT]]
+// CHECK-TLS-NOT:  call void [[GS2_CXX_INIT]]
 // CHECK-TLS:      call void [[ARR_X_CXX_INIT]]
 // CHECK-TLS:      call void [[ST_S4_ST_CXX_INIT]]
 // CHECK-TLS:      [[DONE_LABEL]]
diff --git a/test/OpenMP/threadprivate_messages.cpp b/test/OpenMP/threadprivate_messages.cpp
index 39a4431f0c5b..f71d58bc52a4 100644
--- a/test/OpenMP/threadprivate_messages.cpp
+++ b/test/OpenMP/threadprivate_messages.cpp
@@ -96,7 +96,10 @@ class TempClass {
 static __thread int t; // expected-note {{'t' defined here}}
 #pragma omp threadprivate (t) // expected-error {{variable 't' cannot be threadprivate because it is thread-local}}
 
-register int reg0 __asm__("0"); // expected-note {{'reg0' defined here}}
+// Register "0" is currently an invalid register for global register variables.
+// Use "esp" instead of "0".
+// register int reg0 __asm__("0");
+register int reg0 __asm__("esp"); // expected-note {{'reg0' defined here}}
 #pragma omp threadprivate (reg0) // expected-error {{variable 'reg0' cannot be threadprivate because it is a global named register variable}}
 
 int o; // expected-note {{candidate found by name lookup is 'o'}}
diff --git a/test/PCH/Inputs/va_arg.h b/test/PCH/Inputs/va_arg.h
index 1244e9faa36a..132759ed0276 100644
--- a/test/PCH/Inputs/va_arg.h
+++ b/test/PCH/Inputs/va_arg.h
@@ -1,2 +1,5 @@
 #include <stdarg.h>
 
+typedef __builtin_ms_va_list __ms_va_list;
+#define __ms_va_start(ap, a) __builtin_ms_va_start(ap, a)
+#define __ms_va_end(ap) __builtin_ms_va_end(ap)
diff --git a/test/PCH/chain-categories.m b/test/PCH/chain-categories.m
index 7836e09d88fe..dc57387fd5a9 100644
--- a/test/PCH/chain-categories.m
+++ b/test/PCH/chain-categories.m
@@ -16,6 +16,10 @@
 - (void)finalize;
 @end
 
+@interface NSObject (Properties)
+@property (readonly,nonatomic) int intProp;
+@end
+
 //===----------------------------------------------------------------------===//
 #elif !defined(HEADER2)
 #define HEADER2
@@ -34,6 +38,12 @@
 -(void)extMeth;
 @end
 
+@interface NSObject ()
+@property (readwrite,nonatomic) int intProp;
+@end
+
+@class NSObject;
+
 //===----------------------------------------------------------------------===//
 #else
 //===----------------------------------------------------------------------===//
@@ -47,6 +57,9 @@
 
 void test(NSObject *o) {
   [o extMeth];
+
+  // Make sure the property is treated as read-write.
+  o.intProp = 17;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/test/PCH/chain-openmp-threadprivate.cpp b/test/PCH/chain-openmp-threadprivate.cpp
index c75b872ba2a0..c7f0f4195967 100644
--- a/test/PCH/chain-openmp-threadprivate.cpp
+++ b/test/PCH/chain-openmp-threadprivate.cpp
@@ -9,8 +9,6 @@
 // RUN: %clang_cc1 -fopenmp -emit-llvm -chain-include %s -chain-include %s %s -o - | FileCheck %s -check-prefix=CHECK-TLS-1
 // RUN: %clang_cc1 -fopenmp -emit-llvm -chain-include %s -chain-include %s %s -o - | FileCheck %s -check-prefix=CHECK-TLS-2
 
-// REQUIRES: tls
-
 #if !defined(PASS1)
 #define PASS1
 
diff --git a/test/PCH/check-deserializations.cpp b/test/PCH/check-deserializations.cpp
index e24d20549385..50c84c89d1cd 100644
--- a/test/PCH/check-deserializations.cpp
+++ b/test/PCH/check-deserializations.cpp
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-pch -o %t.1 %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -error-on-deserialized-decl S1_keyfunc -error-on-deserialized-decl S3 -include-pch %t.1 -emit-pch -o %t.2 %s
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -error-on-deserialized-decl S1_method -error-on-deserialized-decl S3 -include-pch %t.2 -emit-llvm-only %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++11 -emit-pch -o %t.1 %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -error-on-deserialized-decl S1_keyfunc -error-on-deserialized-decl S3 -error-on-deserialized-decl DND -std=c++11 -include-pch %t.1 -emit-pch -o %t.2 %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -error-on-deserialized-decl S1_method -error-on-deserialized-decl S3 -error-on-deserialized-decl DND -std=c++11 -include-pch %t.2 -emit-llvm-only %s
 
 // FIXME: Why does this require an x86 target?
 // REQUIRES: x86-registered-target
@@ -20,6 +20,15 @@ struct S2 {
   operator S3();
 };
 
+namespace vars {
+  constexpr int f() { return 0; }
+  struct X { constexpr X() {} };
+  namespace v1 { const int DND = 0; }
+  namespace v2 { constexpr int DND = f(); }
+  namespace v3 { static X DND; }
+  namespace v4 { constexpr X DND = {}; }
+}
+
 #elif !defined(HEADER2)
 #define HEADER2
 
diff --git a/test/PCH/cxx1y-default-initializer.cpp b/test/PCH/cxx1y-default-initializer.cpp
index 5a68f2704517..1f8d9a5d08d8 100644
--- a/test/PCH/cxx1y-default-initializer.cpp
+++ b/test/PCH/cxx1y-default-initializer.cpp
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -pedantic -std=c++1y %s -o %t
 // RUN: %clang_cc1 -pedantic -std=c++1y -emit-pch %s -o %t
 // RUN: %clang_cc1 -pedantic -std=c++1y -include-pch %t -verify %s
 
diff --git a/test/PCH/datetime.c b/test/PCH/datetime.c
new file mode 100644
index 000000000000..801c0c71048f
--- /dev/null
+++ b/test/PCH/datetime.c
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 -x c-header %s -emit-pch -o %t 2>&1 | FileCheck %s
+// CHECK: precompiled header uses __DATE__ or __TIME__
+const char *p = __DATE__;
diff --git a/test/PCH/debug-info-limited-struct.c b/test/PCH/debug-info-limited-struct.c
index 067f98165a27..710a76ab26ec 100644
--- a/test/PCH/debug-info-limited-struct.c
+++ b/test/PCH/debug-info-limited-struct.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -emit-pch -o %t %S/debug-info-limited-struct.h
-// RUN: %clang_cc1 -include-pch %t -emit-llvm %s -g -o - | FileCheck %s
+// RUN: %clang_cc1 -include-pch %t -emit-llvm %s -debug-info-kind=limited -o - | FileCheck %s
 
 // CHECK: !DICompositeType(tag: DW_TAG_structure_type, name: "foo"
 // CHECK-NOT:              flags: {{[^,]*}}FlagFwdDecl
diff --git a/test/PCH/make-integer-seq.cpp b/test/PCH/make-integer-seq.cpp
new file mode 100644
index 000000000000..3622c33449d3
--- /dev/null
+++ b/test/PCH/make-integer-seq.cpp
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -std=c++14 -x c++-header %s -emit-pch -o %t.pch
+// RUN: %clang_cc1 -std=c++14 -x c++ /dev/null -include-pch %t.pch
+
+template <class T, T... I>
+struct Seq {
+    static constexpr T PackSize = sizeof...(I);
+};
+
+template <typename T, T N>
+using MakeSeq = __make_integer_seq<Seq, T, N>;
+
+void fn1() {
+  MakeSeq<int, 3> x;
+}
diff --git a/test/PCH/ocl_types.h b/test/PCH/ocl_types.h
index 65c6acb7c508..93cf4f66442b 100644
--- a/test/PCH/ocl_types.h
+++ b/test/PCH/ocl_types.h
@@ -23,3 +23,37 @@ typedef sampler_t smp_t;
 
 // event_t
 typedef event_t evt_t;
+
+#if __OPENCL_VERSION__ >= 200
+
+// clk_event_t
+typedef clk_event_t clkevt_t;
+
+// queue_t
+typedef queue_t q_t;
+
+// ndrange_t
+typedef ndrange_t range_t;
+
+// reserve_id_t
+typedef reserve_id_t reserveid_t;
+
+// image2d_depth_t
+typedef image2d_depth_t img2ddep_t;
+
+// image2d_array_depth_t
+typedef image2d_array_depth_t img2darr_dep_t;
+
+// image2d_msaa_t
+typedef image2d_msaa_t img2dmsaa_t;
+
+// image2d_array_msaa_t
+typedef image2d_array_msaa_t img2darrmsaa_t;
+
+// image2d_msaa_depth_t
+typedef image2d_msaa_depth_t img2dmsaadep_t;
+
+// image2d_array_msaa_depth_t
+typedef image2d_array_msaa_depth_t img2darrmsaadep_t;
+
+#endif
diff --git a/test/PCH/pch-dir.c b/test/PCH/pch-dir.c
index 2ac10ea56c92..944753c9c9c0 100644
--- a/test/PCH/pch-dir.c
+++ b/test/PCH/pch-dir.c
@@ -1,4 +1,6 @@
+// RUN: rm -rf %t.h.gch
 // RUN: mkdir -p %t.h.gch
+//
 // RUN: %clang -x c-header %S/pch-dir.h -DFOO=foo -o %t.h.gch/c.gch 
 // RUN: %clang -x c-header %S/pch-dir.h -DFOO=bar -o %t.h.gch/cbar.gch 
 // RUN: %clang -x c++-header -std=c++98 %S/pch-dir.h -o %t.h.gch/cpp.gch 
@@ -10,7 +12,11 @@
 // RUN: FileCheck -check-prefix=CHECK-CPP %s < %t.cpplog
 
 // RUN: not %clang -x c++ -std=c++11 -include %t.h -fsyntax-only %s 2> %t.cpp11log
-// RUN: FileCheck -check-prefix=CHECK-CPP11 %s < %t.cpp11log
+// RUN: FileCheck -check-prefix=CHECK-NO-SUITABLE %s < %t.cpp11log
+
+// Don't crash if the precompiled header file is missing.
+// RUN: not %clang_cc1 -include-pch %t.h.gch -DFOO=baz -fsyntax-only %s -print-stats 2> %t.missinglog
+// RUN: FileCheck -check-prefix=CHECK-NO-SUITABLE %s < %t.missinglog
 
 // CHECK-CBAR: int bar
 int FOO;
@@ -25,4 +31,4 @@ int get() {
 #endif
 }
 
-// CHECK-CPP11: no suitable precompiled header file found in directory
+// CHECK-NO-SUITABLE: no suitable precompiled header file found in directory
diff --git a/test/PCH/pending-ids.m b/test/PCH/pending-ids.m
index 2ca0e6e93568..2cec8e11aa6a 100644
--- a/test/PCH/pending-ids.m
+++ b/test/PCH/pending-ids.m
@@ -5,7 +5,7 @@
 
 // With PCH
 // RUN: %clang_cc1 %s -emit-pch -o %t
-// RUN: %clang_cc1 -emit-llvm-only -verify %s -include-pch %t -g
+// RUN: %clang_cc1 -emit-llvm-only -verify %s -include-pch %t -debug-info-kind=limited
 
 // expected-no-diagnostics
 
diff --git a/test/PCH/preprocess.c b/test/PCH/preprocess.c
deleted file mode 100644
index 8bf841f17f8d..000000000000
--- a/test/PCH/preprocess.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// Check that -E mode is invariant when using an implicit PCH.
-
-// RUN: %clang_cc1 -include %S/preprocess.h -E -o %t.orig %s
-// RUN: %clang_cc1 -emit-pch -o %t %S/preprocess.h
-// RUN: %clang_cc1 -include-pch %t -E -o %t.from_pch %s
-// RUN: diff %t.orig %t.from_pch
-
-a_typedef a_value;
diff --git a/test/PCH/preprocess.h b/test/PCH/preprocess.h
deleted file mode 100644
index 39fa006181f2..000000000000
--- a/test/PCH/preprocess.h
+++ /dev/null
@@ -1,7 +0,0 @@
-// Helper header for preprocess.c PCH test
-#ifndef PREPROCESS_H
-#define PREPROCESS_H
-
-typedef int a_typedef;
-
-#endif // PREPROCESS_H
diff --git a/test/PCH/va_arg.c b/test/PCH/va_arg.c
index dba9eea8de42..2bbf3c5ebd75 100644
--- a/test/PCH/va_arg.c
+++ b/test/PCH/va_arg.c
@@ -11,3 +11,9 @@ char *g0(char** argv, int argc) { return argv[argc]; }
 char *g(char **argv) {
   f(g0, argv, 1, 2, 3);
 }
+
+char *i0(char **argv, int argc) { return argv[argc]; }
+
+char *i(char **argv) {
+  h(i0, argv, 1, 2, 3);
+}
diff --git a/test/PCH/va_arg.cpp b/test/PCH/va_arg.cpp
index 0b3c3b107c06..627ce23ad5d7 100644
--- a/test/PCH/va_arg.cpp
+++ b/test/PCH/va_arg.cpp
@@ -10,8 +10,13 @@ typedef __SIZE_TYPE__ size_t;
 
 extern "C" {
 int vsnprintf(char * , size_t, const char * , va_list) ;
+int __attribute__((ms_abi)) wvsprintfA(char *, const char *, __ms_va_list);
 }
 
 void f(char *buffer, unsigned count, const char* format, va_list argptr) {
   vsnprintf(buffer, count, format, argptr);
 }
+
+void g(char *buffer, const char *format, __ms_va_list argptr) {
+  wvsprintfA(buffer, format, argptr);
+}
diff --git a/test/PCH/va_arg.h b/test/PCH/va_arg.h
index 4a8e5102bc6d..255c6589a7e8 100644
--- a/test/PCH/va_arg.h
+++ b/test/PCH/va_arg.h
@@ -6,3 +6,10 @@ char *f (char * (*g) (char **, int), char **p, ...) {
     va_list v;
     s = g (p, __builtin_va_arg(v, int));
 }
+
+typedef __builtin_ms_va_list __ms_va_list;
+char *__attribute__((ms_abi)) h(char *(*i)(char **, int), char **p, ...) {
+  char *s;
+  __ms_va_list v;
+  s = i(p, __builtin_va_arg(v, int));
+}
diff --git a/test/Parser/MicrosoftExtensions.c b/test/Parser/MicrosoftExtensions.c
index a29f6c0b5492..e58a7455c083 100644
--- a/test/Parser/MicrosoftExtensions.c
+++ b/test/Parser/MicrosoftExtensions.c
@@ -5,7 +5,7 @@ typedef int (__cdecl *tptr)(void);
 void (*__fastcall fastpfunc)(void);
 extern __declspec(dllimport) void __stdcall VarR4FromDec(void);
 __declspec(deprecated) __declspec(deprecated) char * __cdecl ltoa( long _Val, char * _DstBuf, int _Radix);
-__declspec(safebuffers) __declspec(noalias) __declspec(restrict) void * __cdecl xxx(void *_Memory); /* expected-warning{{__declspec attribute 'safebuffers' is not supported}} expected-warning{{__declspec attribute 'noalias' is not supported}} */
+__declspec(safebuffers) __declspec(noalias) __declspec(restrict) void * __cdecl xxx(void *_Memory); /* expected-warning{{__declspec attribute 'safebuffers' is not supported}} */
 typedef __w64 unsigned long ULONG_PTR, *PULONG_PTR;
 
 void * __ptr64 PtrToPtr64(const void *p) {
diff --git a/test/Parser/MicrosoftExtensions.cpp b/test/Parser/MicrosoftExtensions.cpp
index e674d0101e97..efb9490697e7 100644
--- a/test/Parser/MicrosoftExtensions.cpp
+++ b/test/Parser/MicrosoftExtensions.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple i386-mingw32 -std=c++14 -fsyntax-only -Wno-unused-getter-return-value -Wno-unused-value -Wmicrosoft -verify -fms-extensions -fms-compatibility -fdelayed-template-parsing
+// RUN: %clang_cc1 %s -triple i386-pc-win32 -std=c++14 -fsyntax-only -Wno-unused-getter-return-value -Wno-unused-value -Wmicrosoft -verify -fms-extensions -fms-compatibility -fdelayed-template-parsing
 
 /* Microsoft attribute tests */
 [repeatable][source_annotation_attribute( Parameter|ReturnValue )]
diff --git a/test/Parser/arm-windows-calling-convention-handling.c b/test/Parser/arm-windows-calling-convention-handling.c
index 7717aada53a3..ee25e601931f 100644
--- a/test/Parser/arm-windows-calling-convention-handling.c
+++ b/test/Parser/arm-windows-calling-convention-handling.c
@@ -1,10 +1,9 @@
 // RUN: %clang_cc1 -triple thumbv7-windows -fms-compatibility -fsyntax-only -verify %s
 
-int __cdecl cdecl(int a, int b, int c, int d) { // expected-warning {{calling convention '__cdecl' ignored for this target}}
+int __cdecl cdecl(int a, int b, int c, int d) { // expected-no-diagnostics
   return a + b + c + d;
 }
 
-float __stdcall stdcall(float a, float b, float c, float d) { // expected-warning {{calling convention '__stdcall' ignored for this target}}
+float __stdcall stdcall(float a, float b, float c, float d) { // expected-no-diagnostics
   return a + b + c + d;
 }
-
diff --git a/test/Parser/colon-colon-parentheses.cpp b/test/Parser/colon-colon-parentheses.cpp
index e031ce2e9045..b3db4fbed2f1 100644
--- a/test/Parser/colon-colon-parentheses.cpp
+++ b/test/Parser/colon-colon-parentheses.cpp
@@ -22,9 +22,9 @@ void foo() {
 }
 
 #ifdef PR21815
-// expected-error@+4{{C++ requires a type specifier for all declarations}}
-// expected-error@+3{{expected unqualified-id}}
-// expected-error@+3{{expected expression}}
-// expected-error@+1{{expected ';' after top level declarator}}
-a (::(
+// expected-error@+2{{C++ requires a type specifier for all declarations}}
+// expected-error@+1{{expected unqualified-id}}
+a (::( ));
+
+::((c )); // expected-error{{expected unqualified-id}}
 #endif
diff --git a/test/Parser/cxx-casting.cpp b/test/Parser/cxx-casting.cpp
index d2c97b88b809..43885bff27a5 100644
--- a/test/Parser/cxx-casting.cpp
+++ b/test/Parser/cxx-casting.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 char *const_cast_test(const char *var)
 {
@@ -41,10 +43,25 @@ namespace test1 {
 typedef char* c;
 typedef A* a;
 void test2(char x, struct B * b) {
-  (void)const_cast<::c>(&x);  // expected-error{{found '<::' after a const_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  (void)dynamic_cast<::a>(b);  // expected-error{{found '<::' after a dynamic_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  (void)reinterpret_cast<::c>(x);  // expected-error{{found '<::' after a reinterpret_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  (void)static_cast<::c>(&x);  // expected-error{{found '<::' after a static_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+  (void)const_cast<::c>(&x);
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a const_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  (void)dynamic_cast<::a>(b);
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a dynamic_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  (void)reinterpret_cast<::c>(x);
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a reinterpret_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  (void)static_cast<::c>(&x);
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a static_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
 
   // Do not do digraph correction.
   (void)static_cast<: :c>(&x); //\
@@ -64,8 +81,15 @@ void test2(char x, struct B * b) {
   (void)static_cast<:C c>(&x); // expected-error {{expected '<' after 'static_cast'}} expected-error 2{{}} expected-note{{}}
 
 #define LCC <::
-  test1::A LCC B> e; // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  (void)static_cast LCC c>(&x); // expected-error{{found '<::' after a static_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+  test1::A LCC B> e;
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  (void)static_cast LCC c>(&x);
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a static_cast which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
 }
 
                                // This note comes from "::D[:F> A5;"
@@ -74,10 +98,25 @@ template <class T> void E() {};
 class F {};
 
 void test3() {
-  ::D<::F> A1; // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  D<::F> A2; // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  ::E<::F>(); // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
-  E<::F>(); // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+  ::D<::F> A1;
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  D<::F> A2;
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  ::E<::F>();
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
+
+  E<::F>();
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
 
   ::D< ::F> A3;
   D< ::F> A4;
diff --git a/test/Parser/cxx-concept-declaration.cpp b/test/Parser/cxx-concept-declaration.cpp
index 591629ca2856..fad5975e4a96 100644
--- a/test/Parser/cxx-concept-declaration.cpp
+++ b/test/Parser/cxx-concept-declaration.cpp
@@ -25,6 +25,4 @@ template<typename T> concept concept bool C6 = true; // expected-warning {{dupli
 
 template<typename T> concept concept bool C7() { return true; } // expected-warning {{duplicate 'concept' declaration specifier}}
 
-concept D1 = true; // expected-error {{C++ requires a type specifier for all declarations}}
-
-template<concept T> concept bool D2 = true; // expected-error {{unknown type name 'T'}}
+template<concept T> concept bool D1 = true; // expected-error {{unknown type name 'T'}}
diff --git a/test/Parser/cxx-reference.cpp b/test/Parser/cxx-reference.cpp
index b62638b1a4f9..58bd7ab60727 100644
--- a/test/Parser/cxx-reference.cpp
+++ b/test/Parser/cxx-reference.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 extern char *bork;
 char *& bar = bork;
@@ -18,4 +20,7 @@ int & volatile Y = val; // expected-error {{'volatile' qualifier may not be appl
 int & const volatile Z = val; /* expected-error {{'const' qualifier may not be applied}} \
                            expected-error {{'volatile' qualifier may not be applied}} */
 
-typedef int && RV; // expected-warning {{rvalue references are a C++11 extension}}
+typedef int && RV; 
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{rvalue references are a C++11 extension}}
+#endif
diff --git a/test/Parser/cxx-template-argument.cpp b/test/Parser/cxx-template-argument.cpp
index c9cc6b807902..9b8ca985ddd9 100644
--- a/test/Parser/cxx-template-argument.cpp
+++ b/test/Parser/cxx-template-argument.cpp
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 // RUN: %clang_cc1 -fsyntax-only -verify %s -fdelayed-template-parsing
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -fdelayed-template-parsing
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -fdelayed-template-parsing
 
 template<typename T> struct A {};
 
@@ -22,7 +26,10 @@ namespace greatergreater {
     void (*p)() = &t<int>;
     (void)(&t<int>==p); // expected-error {{use '> ='}}
     (void)(&t<int>>=p); // expected-error {{use '> >'}}
-    (void)(&t<S<int>>>=p); // expected-error {{use '> >'}}
+    (void)(&t<S<int>>>=p);
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
     (void)(&t<S<int>>==p); // expected-error {{use '> >'}} expected-error {{use '> ='}}
   }
 }
@@ -72,13 +79,17 @@ namespace pr16225add {
   { };
 
   template<class T1, typename T2> struct foo5 :
-    UnknownBase<T1,T2,ABC<T2,T1>> // expected-error {{unknown template name 'UnknownBase'}} \
-                                  // expected-error {{use '> >'}}
+    UnknownBase<T1,T2,ABC<T2,T1>> // expected-error {{unknown template name 'UnknownBase'}}
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
   { };
 
   template<class T1, typename T2> struct foo6 :
-    UnknownBase<T1,ABC<T2,T1>>, // expected-error {{unknown template name 'UnknownBase'}} \
-                                // expected-error {{use '> >'}}
+    UnknownBase<T1,ABC<T2,T1>>, // expected-error {{unknown template name 'UnknownBase'}}
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
     Known<T1>  // expected-error {{too few template arguments for class template 'Known'}}
   { };
 
@@ -87,18 +98,24 @@ namespace pr16225add {
   { };
 
   template<class T1, typename T2> struct foo8 :
-    UnknownBase<X<int,int>,X<int,int>> // expected-error {{unknown template name 'UnknownBase'}} \
-                                       // expected-error {{use '> >'}}
+    UnknownBase<X<int,int>,X<int,int>> // expected-error {{unknown template name 'UnknownBase'}}
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
   { };
 
   template<class T1, typename T2> struct foo9 :
-    UnknownBase<Known<int,int>,X<int,int>> // expected-error {{unknown template name 'UnknownBase'}} \
-                                           // expected-error {{use '> >'}}
+    UnknownBase<Known<int,int>,X<int,int>> // expected-error {{unknown template name 'UnknownBase'}}
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
   { };
 
   template<class T1, typename T2> struct foo10 :
-    UnknownBase<Known<int,int>,X<int,X<int,int>>> // expected-error {{unknown template name 'UnknownBase'}} \
-                                                  // expected-error {{use '> >'}}
+    UnknownBase<Known<int,int>,X<int,X<int,int>>> // expected-error {{unknown template name 'UnknownBase'}}
+#if __cplusplus <= 199711L
+    // expected-error@-2 {{use '> >'}}
+#endif
   { };
 
   template<int N1, int N2> struct foo11 :
diff --git a/test/Parser/cxx-typeof.cpp b/test/Parser/cxx-typeof.cpp
index 1ec6e29b1311..c9651b4e1cd7 100644
--- a/test/Parser/cxx-typeof.cpp
+++ b/test/Parser/cxx-typeof.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=gnu++11 %s
 
 static void test() {
   int *pi;
@@ -9,5 +11,10 @@ static void test() {
 // Part of rdar://problem/8347416;  from the gcc test suite.
 struct S {
   int i;
-  __typeof(S::i) foo(); // expected-error {{invalid use of non-static data member 'i'}}
+  __typeof(S::i) foo();
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{invalid use of non-static data member 'i'}}
+#else
+  // expected-no-diagnostics
+#endif
 };
diff --git a/test/Parser/cxx0x-decl.cpp b/test/Parser/cxx0x-decl.cpp
index 994104fc9dfb..23f46a184788 100644
--- a/test/Parser/cxx0x-decl.cpp
+++ b/test/Parser/cxx0x-decl.cpp
@@ -132,3 +132,7 @@ void NoMissingSemicolonHere(struct S
                             [3]);
 template<int ...N> void NoMissingSemicolonHereEither(struct S
                                                      ... [N]);
+
+// This must be at the end of the file; we used to look ahead past the EOF token here.
+// expected-error@+1 {{expected unqualified-id}}
+using
diff --git a/test/Parser/cxx0x-lambda-expressions.cpp b/test/Parser/cxx0x-lambda-expressions.cpp
index c2bf6fd3960a..7deeb219c9e8 100644
--- a/test/Parser/cxx0x-lambda-expressions.cpp
+++ b/test/Parser/cxx0x-lambda-expressions.cpp
@@ -61,7 +61,7 @@ class C {
   int z;
   void init_capture() {
     [n(0)] () mutable -> int { return ++n; }; // expected-warning{{extension}}
-    [n{0}] { return; }; // expected-error {{<initializer_list>}} expected-warning{{extension}} expected-warning{{will change meaning in a future version}}
+    [n{0}] { return; }; // expected-warning{{extension}}
     [n = 0] { return ++n; }; // expected-error {{captured by copy in a non-mutable}} expected-warning{{extension}}
     [n = {0}] { return; }; // expected-error {{<initializer_list>}} expected-warning{{extension}}
     [a([&b = z]{})](){}; // expected-warning 2{{extension}}
diff --git a/test/Parser/cxx11-user-defined-literals.cpp b/test/Parser/cxx11-user-defined-literals.cpp
index b89a57418656..d3188f8f43eb 100644
--- a/test/Parser/cxx11-user-defined-literals.cpp
+++ b/test/Parser/cxx11-user-defined-literals.cpp
@@ -77,21 +77,21 @@ const char *p =
   erk
   flux
   )x" "eep\x1f"\
-_no_such_suffix // expected-error {{'operator "" _no_such_suffix'}}
+_no_such_suffix // expected-error {{'operator""_no_such_suffix'}}
 "and a bit more"
 "and another suffix"_no_such_suffix;
 
 char c =
   '\x14'\
-_no_such_suffix; // expected-error {{'operator "" _no_such_suffix'}}
+_no_such_suffix; // expected-error {{'operator""_no_such_suffix'}}
 
 int &r =
 1234567\
-_no_such_suffix; // expected-error {{'operator "" _no_such_suffix'}}
+_no_such_suffix; // expected-error {{'operator""_no_such_suffix'}}
 
 int k =
 1234567.89\
-_no_such_suffix; // expected-error {{'operator "" _no_such_suffix'}}
+_no_such_suffix; // expected-error {{'operator""_no_such_suffix'}}
 
 // Make sure we handle more interesting ways of writing a string literal which
 // is "" in translation phase 7.
@@ -115,15 +115,15 @@ void operator "" u8"" "\u0123" "hello"_all_of_the_things ""(const char*); // exp
 // Make sure we treat UCNs and UTF-8 as equivalent.
 int operator""_µs(unsigned long long) {} // expected-note {{previous}}
 int hundred_µs = 50_µs + 50_\u00b5s;
-int operator""_\u00b5s(unsigned long long) {} // expected-error {{redefinition of 'operator "" _µs'}}
+int operator""_\u00b5s(unsigned long long) {} // expected-error {{redefinition of 'operator""_µs'}}
 
 int operator""_\U0000212B(long double) {} // expected-note {{previous}}
 int hundred_Å = 50.0_Å + 50._\U0000212B;
-int operator""_Å(long double) {} // expected-error {{redefinition of 'operator "" _Å'}}
+int operator""_Å(long double) {} // expected-error {{redefinition of 'operator""_Å'}}
 
 int operator""_𐀀(char) {} // expected-note {{previous}}
 int 𐀀 = '4'_𐀀 + '2'_\U00010000;
-int operator""_\U00010000(char) {} // expected-error {{redefinition of 'operator "" _𐀀'}}
+int operator""_\U00010000(char) {} // expected-error {{redefinition of 'operator""_𐀀'}}
 
 // These all declare the same function.
 int operator""_℮""_\u212e""_\U0000212e""(const char*, size_t);
diff --git a/test/Parser/cxx1z-coroutines.cpp b/test/Parser/cxx1z-coroutines.cpp
new file mode 100644
index 000000000000..3e698404a600
--- /dev/null
+++ b/test/Parser/cxx1z-coroutines.cpp
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -std=c++11 -fcoroutines %s -verify
+
+template<typename T, typename U>
+U f(T t) {
+  co_await t;
+  co_yield t;
+
+  1 + co_await t;
+  1 + co_yield t; // expected-error {{expected expression}}
+
+  auto x = co_await t;
+  auto y = co_yield t;
+
+  for co_await (int x : t) {}
+  for co_await (int x = 0; x != 10; ++x) {} // expected-error {{'co_await' modifier can only be applied to range-based for loop}}
+
+  if (t)
+    co_return t;
+  else
+    co_return {t};
+}
+
+struct Y {};
+struct X { Y operator co_await(); };
+struct Z {};
+Y operator co_await(Z);
+
+void f(X x, Z z) {
+  x.operator co_await();
+  operator co_await(z);
+}
+
+void operator co_await(); // expected-error {{must have at least one parameter}}
+void operator co_await(X, Y, Z); // expected-error {{must be a unary operator}}
+void operator co_await(int); // expected-error {{parameter of class or enumeration type}}
diff --git a/test/Parser/objc-init.m b/test/Parser/objc-init.m
index c9d7d5d301ea..088e385f9508 100644
--- a/test/Parser/objc-init.m
+++ b/test/Parser/objc-init.m
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile -verify -pedantic -Wno-objc-root-class %s
 // RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile -verify -x objective-c++ -Wno-objc-root-class %s
+// RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile -verify -x objective-c++ -Wno-objc-root-class -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile -verify -x objective-c++ -Wno-objc-root-class -std=c++11 %s
 // rdar://5707001
 
 @interface NSNumber;
@@ -36,8 +38,16 @@ void test5(NSNumber *x) {
   };
   
   struct SomeStruct z = {
-    .x = [x METH2], // ok.
+    .x = [x METH2], // ok in C++98.
+#if __cplusplus >= 201103L
+    // expected-error@-2 {{non-constant-expression cannot be narrowed from type 'unsigned int' to 'int' in initializer list}}
+    // expected-note@-3 {{insert an explicit cast to silence this issue}}
+#endif
     .x [x METH2]    // expected-error {{expected '=' or another designator}}
+#if __cplusplus >= 201103L
+    // expected-error@-2 {{non-constant-expression cannot be narrowed from type 'unsigned int' to 'int' in initializer list}}
+    // expected-note@-3 {{insert an explicit cast to silence this issue}}
+#endif
   };
 }
 
diff --git a/test/Parser/objcxx-lambda-expressions-neg.mm b/test/Parser/objcxx-lambda-expressions-neg.mm
index 7cdb1a292e40..b2fe39dfbf70 100644
--- a/test/Parser/objcxx-lambda-expressions-neg.mm
+++ b/test/Parser/objcxx-lambda-expressions-neg.mm
@@ -1,5 +1,13 @@
 // RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value -verify -std=c++11 %s
 
 int main() {
-  []{}; // expected-error {{expected expression}}
+  []{};
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{expected expression}}
+#else
+  // expected-no-diagnostics
+#endif
+
 }
diff --git a/test/Parser/objcxx0x-lambda-expressions.mm b/test/Parser/objcxx0x-lambda-expressions.mm
index c6ed121f8b40..0f3e9481a722 100644
--- a/test/Parser/objcxx0x-lambda-expressions.mm
+++ b/test/Parser/objcxx0x-lambda-expressions.mm
@@ -21,7 +21,7 @@ class C {
 
     [foo(bar)] () {};
     [foo = bar] () {};
-    [foo{bar}] () {}; // expected-error {{<initializer_list>}} expected-warning {{will change meaning}}
+    [foo{bar}] () {};
     [foo = {bar}] () {}; // expected-error {{<initializer_list>}}
 
     [foo(bar) baz] () {}; // expected-error {{called object type 'int' is not a function}}
diff --git a/test/Parser/opencl-atomics-cl20.cl b/test/Parser/opencl-atomics-cl20.cl
index 65cc6f88aa4c..cb2f59721acc 100644
--- a/test/Parser/opencl-atomics-cl20.cl
+++ b/test/Parser/opencl-atomics-cl20.cl
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -verify -pedantic -fsyntax-only
-// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -verify  -fsyntax-only -cl-std=CL2.0 -DCL20
-// RUN: %clang_cc1 %s -triple x86_64-unknown-unknown -verify  -fsyntax-only -cl-std=CL2.0 -DCL20 -DEXT
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only
+// RUN: %clang_cc1 %s -verify  -fsyntax-only -cl-std=CL2.0 -DCL20
+// RUN: %clang_cc1 %s -verify  -fsyntax-only -cl-std=CL2.0 -DCL20 -DEXT
 
 #ifdef EXT
 #pragma OPENCL EXTENSION cl_khr_int64_base_atomics:enable
@@ -54,3 +54,16 @@ void atomic_types_test() {
 // expected-error-re@-31 {{use of type 'atomic_ptrdiff_t' (aka '_Atomic({{.+}})') requires cl_khr_int64_base_atomics extension to be enabled}}
 // expected-error-re@-32 {{use of type 'atomic_ptrdiff_t' (aka '_Atomic({{.+}})') requires cl_khr_int64_extended_atomics extension to be enabled}}
 #endif
+
+#ifdef CL20
+void foo(atomic_int * ptr) {}
+void atomic_ops_test() {
+  atomic_int i;
+  foo(&i);
+// OpenCL v2.0 s6.13.11.8, arithemtic operations are not permitted on atomic types.
+  i++; // expected-error {{invalid argument type 'atomic_int' (aka '_Atomic(int)') to unary expression}}
+  i = 1; // expected-error {{atomic variable can only be assigned to a compile time constant in the declaration statement in the program scope}}
+  i += 1; // expected-error {{invalid operands to binary expression ('atomic_int' (aka '_Atomic(int)') and 'int')}}
+  i = i + i; // expected-error {{invalid operands to binary expression ('atomic_int' (aka '_Atomic(int)') and 'atomic_int')}}
+}
+#endif
diff --git a/test/Parser/opencl-storage-class.cl b/test/Parser/opencl-storage-class.cl
index 874329b62d5a..3d9aef59f0bb 100644
--- a/test/Parser/opencl-storage-class.cl
+++ b/test/Parser/opencl-storage-class.cl
@@ -8,7 +8,7 @@ void test_storage_class_specs()
   auto int d;      // expected-error {{OpenCL does not support the 'auto' storage class specifier}}
 
 #pragma OPENCL EXTENSION cl_clang_storage_class_specifiers : enable
-  static int e;
+  static int e; // expected-error {{program scope variable must reside in constant address space}}
   register int f;
   extern int g;
   auto int h;
diff --git a/test/Parser/pragma-loop-safety.cpp b/test/Parser/pragma-loop-safety.cpp
index cc98c775e595..0776000e5121 100644
--- a/test/Parser/pragma-loop-safety.cpp
+++ b/test/Parser/pragma-loop-safety.cpp
@@ -15,11 +15,11 @@ void test(int *List, int Length) {
 /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(assume_safety
 /* expected-error {{expected ')'}} */ #pragma clang loop interleave(assume_safety
 
-/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(assume_safety)
+/* expected-error {{invalid argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll(assume_safety)
 
 /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop vectorize(badidentifier)
 /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop interleave(badidentifier)
-/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
+/* expected-error {{invalid argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
   while (i-7 < Length) {
     List[i] = i;
   }
diff --git a/test/Parser/pragma-loop.cpp b/test/Parser/pragma-loop.cpp
index 60820584a9c8..b9b5b41efbad 100644
--- a/test/Parser/pragma-loop.cpp
+++ b/test/Parser/pragma-loop.cpp
@@ -132,7 +132,7 @@ void test(int *List, int Length) {
 
 /* expected-error {{missing argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop vectorize()
 /* expected-error {{missing argument; expected an integer value}} */ #pragma clang loop interleave_count()
-/* expected-error {{missing argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll()
+/* expected-error {{missing argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll()
 
 /* expected-error {{missing option; expected vectorize, vectorize_width, interleave, interleave_count, unroll, or unroll_count}} */ #pragma clang loop
 /* expected-error {{invalid option 'badkeyword'}} */ #pragma clang loop badkeyword
@@ -186,7 +186,7 @@ const int VV = 4;
 
 /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop vectorize(badidentifier)
 /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop interleave(badidentifier)
-/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
+/* expected-error {{invalid argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll(badidentifier)
   while (i-7 < Length) {
     List[i] = i;
   }
@@ -195,7 +195,7 @@ const int VV = 4;
 // constants crash FE.
 /* expected-error {{expected ')'}} */ #pragma clang loop vectorize(()
 /* expected-error {{invalid argument; expected 'enable', 'assume_safety' or 'disable'}} */ #pragma clang loop interleave(*)
-/* expected-error {{invalid argument; expected 'full' or 'disable'}} */ #pragma clang loop unroll(=)
+/* expected-error {{invalid argument; expected 'enable', 'full' or 'disable'}} */ #pragma clang loop unroll(=)
 /* expected-error {{type name requires a specifier or qualifier}} expected-error {{expected expression}} */ #pragma clang loop vectorize_width(^)
 /* expected-error {{expected expression}} expected-error {{expected expression}} */ #pragma clang loop interleave_count(/)
 /* expected-error {{expected expression}} expected-error {{expected expression}} */ #pragma clang loop unroll_count(==)
diff --git a/test/Parser/pragma-unroll.cpp b/test/Parser/pragma-unroll.cpp
index 260945756f2f..b1d779879831 100644
--- a/test/Parser/pragma-unroll.cpp
+++ b/test/Parser/pragma-unroll.cpp
@@ -67,6 +67,12 @@ void test(int *List, int Length) {
     List[i] = i;
   }
 
+/* expected-error {{incompatible directives 'unroll(enable)' and '#pragma unroll(4)'}} */ #pragma unroll(4)
+#pragma clang loop unroll(enable)
+  while (i-11 < Length) {
+    List[i] = i;
+  }
+
 /* expected-error {{incompatible directives '#pragma unroll' and '#pragma unroll(4)'}} */ #pragma unroll(4)
 #pragma unroll
   while (i-11 < Length) {
diff --git a/test/Parser/x64-windows-calling-convention-handling.c b/test/Parser/x64-windows-calling-convention-handling.c
new file mode 100644
index 000000000000..c02766341482
--- /dev/null
+++ b/test/Parser/x64-windows-calling-convention-handling.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -triple x86_64-windows -fms-compatibility -fsyntax-only -verify %s
+
+int __cdecl cdecl(int a, int b, int c, int d) { // expected-no-diagnostics
+  return a + b + c + d;
+}
+
+float __stdcall stdcall(float a, float b, float c, float d) { // expected-no-diagnostics
+  return a + b + c + d;
+}
diff --git a/test/Preprocessor/Inputs/microsoft-header-search/a/findme.h b/test/Preprocessor/Inputs/microsoft-header-search/a/findme.h
index b809c9075d60..0afe145ee32a 100644
--- a/test/Preprocessor/Inputs/microsoft-header-search/a/findme.h
+++ b/test/Preprocessor/Inputs/microsoft-header-search/a/findme.h
@@ -1,3 +1,3 @@
 #pragma once
 
-#warning findme.h successfully included using MS search rules
\ No newline at end of file
+#warning findme.h successfully included using Microsoft header search rules
diff --git a/test/Preprocessor/Inputs/microsoft-header-search/a/include2.h b/test/Preprocessor/Inputs/microsoft-header-search/a/include2.h
index 99640ae47a87..42bdaa7db3a3 100644
--- a/test/Preprocessor/Inputs/microsoft-header-search/a/include2.h
+++ b/test/Preprocessor/Inputs/microsoft-header-search/a/include2.h
@@ -1,6 +1,3 @@
 #pragma once
 
 #include "b/include3.h"
-#pragma once
-
-#include "b/include3.h"
\ No newline at end of file
diff --git a/test/Preprocessor/Inputs/microsoft-header-search/findme.h b/test/Preprocessor/Inputs/microsoft-header-search/findme.h
index aeaf795a1a80..b080cd80df2a 100644
--- a/test/Preprocessor/Inputs/microsoft-header-search/findme.h
+++ b/test/Preprocessor/Inputs/microsoft-header-search/findme.h
@@ -1,3 +1,3 @@
 #pragma once
 
-#error Wrong findme.h included, MSVC header search incorrect
\ No newline at end of file
+#error Wrong findme.h included, Microsoft header search incorrect
diff --git a/test/Preprocessor/Inputs/microsoft-header-search/include1.h b/test/Preprocessor/Inputs/microsoft-header-search/include1.h
index f00fac7a1799..531561b2c744 100644
--- a/test/Preprocessor/Inputs/microsoft-header-search/include1.h
+++ b/test/Preprocessor/Inputs/microsoft-header-search/include1.h
@@ -1,6 +1,3 @@
 #pragma once
 
 #include "a/include2.h"
-#pragma once
-
-#include "a/include2.h"
\ No newline at end of file
diff --git a/test/Preprocessor/_Pragma.c b/test/Preprocessor/_Pragma.c
index 120e754cb986..99231879ece0 100644
--- a/test/Preprocessor/_Pragma.c
+++ b/test/Preprocessor/_Pragma.c
@@ -12,4 +12,8 @@ _Pragma("message(\"foo \\\\\\\\ bar\")") // expected-warning {{foo \\ bar}}
 #error #define invalid
 #endif
 
+_Pragma(unroll 1 // expected-error{{_Pragma takes a parenthesized string literal}}
+
+_Pragma(clang diagnostic push) // expected-error{{_Pragma takes a parenthesized string literal}}
+
 _Pragma( // expected-error{{_Pragma takes a parenthesized string literal}}
diff --git a/test/Preprocessor/aarch64-target-features.c b/test/Preprocessor/aarch64-target-features.c
index eea72b3df35b..2630e32fc45a 100644
--- a/test/Preprocessor/aarch64-target-features.c
+++ b/test/Preprocessor/aarch64-target-features.c
@@ -3,10 +3,17 @@
 
 // CHECK: __AARCH64EL__ 1
 // CHECK: __ARM_64BIT_STATE 1
+// CHECK-NOT: __ARM_32BIT_STATE
 // CHECK: __ARM_ACLE 200
 // CHECK: __ARM_ALIGN_MAX_STACK_PWR 4
 // CHECK: __ARM_ARCH 8
 // CHECK: __ARM_ARCH_ISA_A64 1
+// CHECK-NOT: __ARM_ARCH_ISA_ARM
+// CHECK-NOT: __ARM_ARCH_ISA_THUMB
+// CHECK-NOT: __ARM_FEATURE_QBIT
+// CHECK-NOT: __ARM_FEATURE_DSP
+// CHECK-NOT: __ARM_FEATURE_SAT
+// CHECK-NOT: __ARM_FEATURE_SIMD32
 // CHECK: __ARM_ARCH_PROFILE 'A'
 // CHECK-NOT: __ARM_FEATURE_BIG_ENDIAN
 // CHECK: __ARM_FEATURE_CLZ 1
@@ -16,18 +23,24 @@
 // CHECK: __ARM_FEATURE_DIV 1
 // CHECK: __ARM_FEATURE_FMA 1
 // CHECK: __ARM_FEATURE_IDIV 1
+// CHECK: __ARM_FEATURE_LDREX 0xF
 // CHECK: __ARM_FEATURE_NUMERIC_MAXMIN 1
 // CHECK: __ARM_FEATURE_UNALIGNED 1
-// CHECK: __ARM_FP 0xe
+// CHECK: __ARM_FP 0xE
+// CHECK: __ARM_FP16_ARGS 1
 // CHECK: __ARM_FP16_FORMAT_IEEE 1
 // CHECK-NOT: __ARM_FP_FAST 1
-// CHECK: __ARM_FP_FENV_ROUNDING 1
 // CHECK: __ARM_NEON 1
-// CHECK: __ARM_NEON_FP 0xe
+// CHECK: __ARM_NEON_FP 0xE
 // CHECK: __ARM_PCS_AAPCS64 1
+// CHECK-NOT: __ARM_PCS 1
+// CHECK-NOT: __ARM_PCS_VFP 1
 // CHECK-NOT: __ARM_SIZEOF_MINIMAL_ENUM 1
 // CHECK-NOT: __ARM_SIZEOF_WCHAR_T 2
 
+// RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
+// CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1
+
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crypto -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRYPTO %s
 // CHECK-CRYPTO: __ARM_FEATURE_CRYPTO 1
@@ -38,6 +51,9 @@
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+crc -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-CRC32 %s
 // CHECK-CRC32: __ARM_FEATURE_CRC32 1
 
+// RUN: %clang -target aarch64-none-linux-gnu -fno-math-errno -fno-signed-zeros\
+// RUN:        -fno-trapping-math -fassociative-math -freciprocal-math\
+// RUN:        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
 // RUN: %clang -target aarch64-none-linux-gnu -ffast-math -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
 // RUN: %clang -target arm64-none-linux-gnu -ffast-math -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
 // CHECK-FASTMATH: __ARM_FP_FAST 1
@@ -53,7 +69,10 @@
 // RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+simd -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NEON %s
 // RUN: %clang -target arm64-none-linux-gnu -march=armv8-a+simd -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-NEON %s
 // CHECK-NEON: __ARM_NEON 1
-// CHECK-NEON: __ARM_NEON_FP 0xe
+// CHECK-NEON: __ARM_NEON_FP 0xE
+
+// RUN: %clang -target aarch64-none-eabi -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V81A %s
+// CHECK-V81A: __ARM_FEATURE_QRDMX 1
 
 // RUN: %clang -target aarch64 -march=arm64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
 // RUN: %clang -target aarch64 -march=aarch64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s
@@ -64,13 +83,17 @@
 // CHECK-GENERIC: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon"
 
 // RUN: %clang -target aarch64 -mtune=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MTUNE-CYCLONE %s
+// ================== Check whether -mtune accepts mixed-case features.
+// RUN: %clang -target aarch64 -mtune=CYCLONE -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MTUNE-CYCLONE %s
 // CHECK-MTUNE-CYCLONE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+zcm" "-target-feature" "+zcz"
 
 // RUN: %clang -target aarch64 -mcpu=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-CYCLONE %s
+// RUN: %clang -target aarch64 -mcpu=cortex-a35 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A35 %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a53 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A53 %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a57 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A57 %s
 // RUN: %clang -target aarch64 -mcpu=cortex-a72 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-A72 %s
 // CHECK-MCPU-CYCLONE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz"
+// CHECK-MCPU-A35: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A53: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A57: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
 // CHECK-MCPU-A72: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto"
@@ -108,6 +131,9 @@
 
 // RUN: %clang -target aarch64 -mcpu=cortex-a53 -mtune=cyclone -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-MTUNE %s
 // RUN: %clang -target aarch64 -mtune=cyclone -mcpu=cortex-a53  -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-MTUNE %s
+// ================== Check whether -mtune accepts mixed-case features.
+// RUN: %clang -target aarch64 -mcpu=cortex-a53 -mtune=CYCLONE -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-MTUNE %s
+// RUN: %clang -target aarch64 -mtune=CyclonE -mcpu=cortex-a53  -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MCPU-MTUNE %s
 // CHECK-MCPU-MTUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+neon" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+zcm" "-target-feature" "+zcz"
 
 // RUN: %clang -target aarch64 -mcpu=generic+neon -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ERROR-NEON %s
diff --git a/test/Preprocessor/arm-acle-6.4.c b/test/Preprocessor/arm-acle-6.4.c
index fc228d093c8c..148ce6df090c 100644
--- a/test/Preprocessor/arm-acle-6.4.c
+++ b/test/Preprocessor/arm-acle-6.4.c
@@ -1,46 +1,180 @@
-// RUN: %clang -target arm-eabi -mcpu=cortex-m0 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-CORTEX-M0
+// RUN: %clang -target arm-eabi -x c -E -dM %s -o - | FileCheck %s
+// RUN: %clang -target thumb-eabi -x c -E -dM %s -o - | FileCheck %s
 
-// CHECK-CORTEX-M0: __ARM_32BIT_STATE 1
-// CHECK-CORTEX-M0: __ARM_ARCH 6
-// CHECK-CORTEX-M0-NOT: __ARM_ARCH_ISA_ARM
-// CHECK-CORTEX-M0: __ARM_ARCH_ISA_THUMB 1
-// CHECK-CORTEX-M0: __ARM_ARCH_PROFILE 'M'
-// CHECK-CORTEX-M0-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
-// CHECK-CORTEX-M0-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-NOT: __ARM_64BIT_STATE
+// CHECK-NOT: __ARM_ARCH_ISA_A64
+// CHECK-NOT: __ARM_BIG_ENDIAN
+// CHECK:     __ARM_32BIT_STATE 1
+// CHECK:     __ARM_ACLE 200
 
-// RUN: %clang -target arm-eabi -mcpu=arm810 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-ARM810
+// RUN: %clang -target armeb-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
+// RUN: %clang -target thumbeb-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
 
-// CHECK-ARM810: __ARM_32BIT_STATE 1
-// CHECK-ARM810: __ARM_ARCH 4
-// CHECK-ARM810: __ARM_ARCH_ISA_ARM 1
-// CHECK-ARM810-NOT: __ARM_ARCH_ISA_THUMB
-// CHECK-ARM810-NOT: __ARM_ARCH_PROFILE
-// CHECK-ARM810-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
-// CHECK-ARM810-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1
 
-// RUN: %clang -target arm-eabi -mcpu=arm7tdmi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-ARM7TDMI
+// RUN: %clang -target armv7-none-linux-eabi -mno-unaligned-access -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-UNALIGNED
 
-// CHECK-ARM7TDMI: __ARM_32BIT_STATE 1
-// CHECK-ARM7TDMI: __ARM_ARCH 4
-// CHECK-ARM7TDMI: __ARM_ARCH_ISA_ARM 1
-// CHECK-ARM7TDMI: __ARM_ARCH_ISA_THUMB 1
-// CHECK-ARM7TDMI-NOT: __ARM_ARCH_PROFILE
+// CHECK-UNALIGNED-NOT: __ARM_FEATURE_UNALIGNED
 
-// RUN: %clang -target arm-eabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-CORTEX-A7
+// RUN: %clang -target arm-none-linux-eabi -march=armv4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V4
 
-// CHECK-CORTEX-A7: __ARM_32BIT_STATE 1
-// CHECK-CORTEX-A7: __ARM_ARCH 7
-// CHECK-CORTEX-A7: __ARM_ARCH_ISA_ARM 1
-// CHECK-CORTEX-A7: __ARM_ARCH_ISA_THUMB 2
-// CHECK-CORTEX-A7: __ARM_ARCH_PROFILE 'A'
-// CHECK-CORTEX-A7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
-// CHECK-CORTEX-A7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-V4-NOT: __ARM_ARCH_ISA_THUMB
+// CHECK-V4-NOT: __ARM_ARCH_PROFILE
+// CHECK-V4-NOT: __ARM_FEATURE_CLZ
+// CHECK-V4-NOT: __ARM_FEATURE_LDREX
+// CHECK-V4-NOT: __ARM_FEATURE_UNALIGNED
+// CHECK-V4-NOT: __ARM_FEATURE_DSP
+// CHECK-V4-NOT: __ARM_FEATURE_SAT
+// CHECK-V4-NOT: __ARM_FEATURE_QBIT
+// CHECK-V4-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V4-NOT: __ARM_FEATURE_IDIV
+// CHECK-V4:     __ARM_ARCH 4
+// CHECK-V4:     __ARM_ARCH_ISA_ARM 1
 
-// RUN: %clang -target arm-eabi -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-CORTEX-R4
+// RUN: %clang -target arm-none-linux-eabi -march=armv4t -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V4T
 
-// CHECK-CORTEX-R4: __ARM_32BIT_STATE 1
-// CHECK-CORTEX-R4: __ARM_ARCH 7
-// CHECK-CORTEX-R4: __ARM_ARCH_ISA_ARM 1
-// CHECK-CORTEX-R4: __ARM_ARCH_ISA_THUMB 2
-// CHECK-CORTEX-R4: __ARM_ARCH_PROFILE 'R'
+// CHECK-V4T: __ARM_ARCH_ISA_THUMB 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv5t -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V5
+
+// CHECK-V5-NOT: __ARM_ARCH_PROFILE
+// CHECK-V5-NOT: __ARM_FEATURE_LDREX
+// CHECK-V5-NOT: __ARM_FEATURE_UNALIGNED
+// CHECK-V5-NOT: __ARM_FEATURE_DSP
+// CHECK-V5-NOT: __ARM_FEATURE_SAT
+// CHECK-V5-NOT: __ARM_FEATURE_QBIT
+// CHECK-V5-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V5-NOT: __ARM_FEATURE_IDIV
+// CHECK-V5:     __ARM_ARCH 5
+// CHECK-V5:     __ARM_ARCH_ISA_ARM 1
+// CHECK-V5:     __ARM_ARCH_ISA_THUMB 1
+// CHECK-V5:     __ARM_FEATURE_CLZ 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv5te -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V5E
+
+// CHECK-V5E: __ARM_FEATURE_DSP 1
+// CHECK-V5E: __ARM_FEATURE_QBIT 1
+
+// RUN: %clang -target armv6-none-netbsd-eabi -mcpu=arm1136jf-s -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6
+
+// CHECK-V6-NOT: __ARM_ARCH_PROFILE
+// CHECK-V6-NOT: __ARM_FEATURE_IDIV
+// CHECK-V6:     __ARM_ARCH 6
+// CHECK-V6:     __ARM_ARCH_ISA_ARM 1
+// CHECK-V6:     __ARM_ARCH_ISA_THUMB 1
+// CHECK-V6:     __ARM_FEATURE_CLZ 1
+// CHECK-V6:     __ARM_FEATURE_DSP 1
+// CHECK-V6:     __ARM_FEATURE_LDREX 0x4
+// CHECK-V6:     __ARM_FEATURE_QBIT 1
+// CHECK-V6:     __ARM_FEATURE_SAT 1
+// CHECK-V6:     __ARM_FEATURE_SIMD32 1
+// CHECK-V6:     __ARM_FEATURE_UNALIGNED 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv6m -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6M
+
+// CHECK-V6M-NOT: __ARM_ARCH_ISA_ARM
+// CHECK-V6M-NOT: __ARM_FEATURE_CLZ
+// CHECK-V6M-NOT: __ARM_FEATURE_LDREX
+// CHECK-V6M-NOT: __ARM_FEATURE_UNALIGNED
+// CHECK-V6M-NOT: __ARM_FEATURE_DSP
+// CHECK-V6M-NOT: __ARM_FEATURE_QBIT
+// CHECK-V6M-NOT: __ARM_FEATURE_SAT
+// CHECK-V6M-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V6M-NOT: __ARM_FEATURE_IDIV
+// CHECK-V6M:     __ARM_ARCH 6
+// CHECK-V6M:     __ARM_ARCH_ISA_THUMB 1
+// CHECK-V6M:     __ARM_ARCH_PROFILE 'M'
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv6t2 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6T2
+
+// CHECK-V6T2: __ARM_ARCH_ISA_THUMB 2
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv6k -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V6K
+ 
+// CHECK-V6K: __ARM_FEATURE_LDREX 0xF
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv7-a -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A
+
+// CHECK-V7A: __ARM_ARCH 7
+// CHECK-V7A: __ARM_ARCH_ISA_ARM 1
+// CHECK-V7A: __ARM_ARCH_ISA_THUMB 2
+// CHECK-V7A: __ARM_ARCH_PROFILE 'A'
+// CHECK-V7A: __ARM_FEATURE_CLZ 1
+// CHECK-V7A: __ARM_FEATURE_DSP 1
+// CHECK-V7A: __ARM_FEATURE_LDREX 0xF
+// CHECK-V7A: __ARM_FEATURE_QBIT 1
+// CHECK-V7A: __ARM_FEATURE_SAT 1
+// CHECK-V7A: __ARM_FEATURE_SIMD32 1
+// CHECK-V7A: __ARM_FEATURE_UNALIGNED 1
+
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-IDIV
+
+// CHECK-V7A-IDIV: __ARM_FEATURE_IDIV 1
+
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-NO-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-NO-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7A-NO-IDIV
+
+// CHECK-V7A-NO-IDIV-NOT: __ARM_FEATURE_IDIV
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv7-r -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7R
+
+// CHECK-V7R: __ARM_ARCH 7
+// CHECK-V7R: __ARM_ARCH_ISA_ARM 1
+// CHECK-V7R: __ARM_ARCH_ISA_THUMB 2
+// CHECK-V7R: __ARM_ARCH_PROFILE 'R'
+// CHECK-V7R: __ARM_FEATURE_CLZ 1
+// CHECK-V7R: __ARM_FEATURE_DSP 1
+// CHECK-V7R: __ARM_FEATURE_LDREX 0xF
+// CHECK-V7R: __ARM_FEATURE_QBIT 1
+// CHECK-V7R: __ARM_FEATURE_SAT 1
+// CHECK-V7R: __ARM_FEATURE_SIMD32 1
+// CHECK-V7R: __ARM_FEATURE_UNALIGNED 1
+
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7R-NO-IDIV
+
+// CHECK-V7R-NO-IDIV-NOT: __ARM_FEATURE_IDIV
+
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7R-IDIV
+// RUN: %clang -target arm-none-linux-eabi -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7R-IDIV
+
+// CHECK-V7R-IDIV: __ARM_FEATURE_IDIV 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv7-m -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7M
+
+// CHECK-V7M-NOT: __ARM_ARCH_ISA_ARM
+// CHECK-V7M-NOT: __ARM_FEATURE_DSP
+// CHECK-V7M-NOT: __ARM_FEATURE_SIMD32
+// CHECK-V7M:     __ARM_ARCH 7
+// CHECK-V7M:     __ARM_ARCH_ISA_THUMB 2
+// CHECK-V7M:     __ARM_ARCH_PROFILE 'M'
+// CHECK-V7M:     __ARM_FEATURE_CLZ 1
+// CHECK-V7M:     __ARM_FEATURE_IDIV 1
+// CHECK-V7M:     __ARM_FEATURE_LDREX 0x7
+// CHECK-V7M:     __ARM_FEATURE_QBIT 1
+// CHECK-V7M:     __ARM_FEATURE_SAT 1
+// CHECK-V7M:     __ARM_FEATURE_UNALIGNED 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv7e-m -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V7EM
+
+// CHECK-V7EM: __ARM_FEATURE_DSP 1
+// CHECK-V7EM: __ARM_FEATURE_SIMD32 1
+
+// RUN: %clang -target arm-none-linux-eabi -march=armv8-a -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-V8A
+
+// CHECK-V8A: __ARM_ARCH 8
+// CHECK-V8A: __ARM_ARCH_ISA_ARM 1
+// CHECK-V8A: __ARM_ARCH_ISA_THUMB 2
+// CHECK-V8A: __ARM_ARCH_PROFILE 'A'
+// CHECK-V8A: __ARM_FEATURE_CLZ 1
+// CHECK-V8A: __ARM_FEATURE_DSP 1
+// CHECK-V8A: __ARM_FEATURE_IDIV 1
+// CHECK-V8A: __ARM_FEATURE_LDREX 0xF
+// CHECK-V8A: __ARM_FEATURE_QBIT 1
+// CHECK-V8A: __ARM_FEATURE_SAT 1
+// CHECK-V8A: __ARM_FEATURE_SIMD32 1
+// CHECK-V8A: __ARM_FEATURE_UNALIGNED 1
 
diff --git a/test/Preprocessor/arm-acle-6.5.c b/test/Preprocessor/arm-acle-6.5.c
index 9db83b774b0c..95adad9faa8e 100644
--- a/test/Preprocessor/arm-acle-6.5.c
+++ b/test/Preprocessor/arm-acle-6.5.c
@@ -1,22 +1,95 @@
-// RUN: %clang -target arm-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-DEFAULT
+// RUN: %clang -target arm-eabi -mfpu=none -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
+// RUN: %clang -target armv4-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
+// RUN: %clang -target armv5-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
+// RUN: %clang -target armv6m-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
+// RUN: %clang -target armv7r-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
+// RUN: %clang -target armv7m-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FP
 
-// CHECK-DEFAULT-NOT: __ARM_FP
+// CHECK-NO-FP-NOT: __ARM_FP 0x{{.*}}
+
+// RUN: %clang -target arm-eabi -mfpu=vfpv3xd -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-ONLY
+
+// CHECK-SP-ONLY: __ARM_FP 0x4
+
+// RUN: %clang -target arm-eabi -mfpu=vfpv3xd-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-HP
+// RUN: %clang -target arm-eabi -mfpu=fpv4-sp-d16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-HP
+// RUN: %clang -target arm-eabi -mfpu=fpv5-sp-d16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-HP
+
+// CHECK-SP-HP: __ARM_FP 0x6
 
 // RUN: %clang -target arm-eabi -mfpu=vfp -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
-// RUN: %clang -target arm-eabi -mfpu=vfp3 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
+// RUN: %clang -target arm-eabi -mfpu=vfpv2 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
+// RUN: %clang -target arm-eabi -mfpu=vfpv3 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 // RUN: %clang -target arm-eabi -mfpu=vfp3-d16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 // RUN: %clang -target arm-eabi -mfpu=neon -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
-// RUN: %clang -target arm-eabi -mfpu=vfp3 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
-// RUN: %clang -target armv7-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
+// RUN: %clang -target armv6-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
+// RUN: %clang -target armv7a-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP
 
 // CHECK-SP-DP: __ARM_FP 0xC
 
+// RUN: %clang -target arm-eabi -mfpu=vfpv3-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
+// RUN: %clang -target arm-eabi -mfpu=vfpv3-d16-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=vfpv4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=vfpv4-d16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
+// RUN: %clang -target arm-eabi -mfpu=fpv5-d16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
+// RUN: %clang -target arm-eabi -mfpu=neon-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
+// RUN: %clang -target arm-eabi -mfpu=neon-vfpv4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target arm-eabi -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 // RUN: %clang -target armv8-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-SP-DP-HP
 
 // CHECK-SP-DP-HP: __ARM_FP 0xE
 
+// RUN: %clang -target armv4-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
+// RUN: %clang -target armv5-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
+// RUN: %clang -target armv6-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
+// RUN: %clang -target armv6m-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
+// RUN: %clang -target armv7m-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-FMA
+
+// CHECK-NO-FMA-NOT: __ARM_FEATURE_FMA
+
+// RUN: %clang -target armv7a-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-FMA
+// RUN: %clang -target armv7r-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-FMA
+// RUN: %clang -target armv7em-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-FMA
+// RUN: %clang -target armv8-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-FMA
+
+// CHECK-FMA: __ARM_FEATURE_FMA 1
+
+// RUN: %clang -target armv4-eabi -mfpu=neon -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-NEON
+// RUN: %clang -target armv5-eabi -mfpu=neon -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-NEON
+// RUN: %clang -target armv6-eabi -mfpu=neon -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-NEON
+
+// CHECK-NO-NEON-NOT: __ARM_NEON
+// CHECK-NO-NEON-NOT: __ARM_NEON_FP 0x{{.*}}
+
+// RUN: %clang -target armv7-eabi -mfpu=neon -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NEON-SP
+
+// CHECK-NEON-SP: __ARM_NEON 1
+// CHECK-NEON-SP: __ARM_NEON_FP 0x4
+
+// RUN: %clang -target armv7-eabi -mfpu=neon-fp16 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NEON-SP-HP
+// RUN: %clang -target armv7-eabi -mfpu=neon-vfpv4 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NEON-SP-HP
+// RUN: %clang -target armv7-eabi -mfpu=neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NEON-SP-HP
+// RUN: %clang -target armv7-eabi -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NEON-SP-HP
+
+// CHECK-NEON-SP-HP: __ARM_NEON 1
+// CHECK-NEON-SP-HP: __ARM_NEON_FP 0x6
+
+// RUN: %clang -target armv4-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-EXTENSIONS
+// RUN: %clang -target armv5-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-EXTENSIONS
+// RUN: %clang -target armv6-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-EXTENSIONS
+// RUN: %clang -target armv7-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-NO-EXTENSIONS
+
+// CHECK-NO-EXTENSIONS-NOT: __ARM_FEATURE_CRC32
+// CHECK-NO-EXTENSIONS-NOT: __ARM_FEATURE_CRYPTO
+// CHECK-NO-EXTENSIONS-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-NO-EXTENSIONS-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
+
+// RUN: %clang -target armv8-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-EXTENSIONS
+
+// CHECK-EXTENSIONS: __ARM_FEATURE_CRC32 1
+// CHECK-EXTENSIONS: __ARM_FEATURE_CRYPTO 1
+// CHECK-EXTENSIONS: __ARM_FEATURE_DIRECTED_ROUNDING 1
+// CHECK-EXTENSIONS: __ARM_FEATURE_NUMERIC_MAXMIN 1
+
diff --git a/test/Preprocessor/arm-target-features.c b/test/Preprocessor/arm-target-features.c
index 389877125d11..ae27aa0a08b5 100644
--- a/test/Preprocessor/arm-target-features.c
+++ b/test/Preprocessor/arm-target-features.c
@@ -1,19 +1,31 @@
-// RUN: %clang -target armv8a-none-linux-gnu -x c -E -dM %s -o - | FileCheck %s
-// CHECK: __ARMEL__ 1
-// CHECK: __ARM_ARCH 8
-// CHECK: __ARM_ARCH_8A__ 1
-// CHECK: __ARM_FEATURE_CRC32 1
-// CHECK: __ARM_FEATURE_DIRECTED_ROUNDING 1
-// CHECK: __ARM_FEATURE_NUMERIC_MAXMIN 1
+// RUN: %clang -target armv8a-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V8A %s
+// CHECK-V8A: __ARMEL__ 1
+// CHECK-V8A: __ARM_ARCH 8
+// CHECK-V8A: __ARM_ARCH_8A__ 1
+// CHECK-V8A: __ARM_FEATURE_CRC32 1
+// CHECK-V8A: __ARM_FEATURE_DIRECTED_ROUNDING 1
+// CHECK-V8A: __ARM_FEATURE_NUMERIC_MAXMIN 1
+// CHECK-V8A: __ARM_FP 0xE
+// CHECK-V8A: __ARM_FP16_ARGS 1
+// CHECK-V8A: __ARM_FP16_FORMAT_IEEE 1
 
 // RUN: %clang -target armv7a-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V7 %s
-// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V7 %s
 // CHECK-V7: __ARMEL__ 1
 // CHECK-V7: __ARM_ARCH 7
 // CHECK-V7: __ARM_ARCH_7A__ 1
 // CHECK-V7-NOT: __ARM_FEATURE_CRC32
 // CHECK-V7-NOT: __ARM_FEATURE_NUMERIC_MAXMIN                                   
 // CHECK-V7-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-V7: __ARM_FP 0xC
+
+// RUN: %clang -target x86_64-apple-macosx10.10 -arch armv7s -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V7S %s
+// CHECK-V7S: __ARMEL__ 1
+// CHECK-V7S: __ARM_ARCH 7
+// CHECK-V7S: __ARM_ARCH_7S__ 1
+// CHECK-V7S-NOT: __ARM_FEATURE_CRC32
+// CHECK-V7S-NOT: __ARM_FEATURE_NUMERIC_MAXMIN
+// CHECK-V7S-NOT: __ARM_FEATURE_DIRECTED_ROUNDING
+// CHECK-V7S: __ARM_FP 0xE
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -x c -E -dM %s | FileCheck --check-prefix=CHECK-V8-BAREHF %s
 // CHECK-V8-BAREHF: __ARMEL__ 1
@@ -22,15 +34,19 @@
 // CHECK-V8-BAREHF: __ARM_FEATURE_CRC32 1
 // CHECK-V8-BAREHF: __ARM_FEATURE_DIRECTED_ROUNDING 1
 // CHECK-V8-BAREHF: __ARM_FEATURE_NUMERIC_MAXMIN 1
+// CHECK-V8-BAREHP: __ARM_FP 0xE
 // CHECK-V8-BAREHF: __ARM_NEON__ 1
+// CHECK-V8-BAREHF: __ARM_PCS_VFP 1
 // CHECK-V8-BAREHF: __VFP_FP__ 1
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=fp-armv8 -x c -E -dM %s | FileCheck --check-prefix=CHECK-V8-BAREHF-FP %s
 // CHECK-V8-BAREHF-FP-NOT: __ARM_NEON__ 1
+// CHECK-V8-BAREHP-FP: __ARM_FP 0xE
 // CHECK-V8-BAREHF-FP: __VFP_FP__ 1
 
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=neon-fp-armv8 -x c -E -dM %s | FileCheck --check-prefix=CHECK-V8-BAREHF-NEON-FP %s
 // RUN: %clang -target armv8a -mfloat-abi=hard -mfpu=crypto-neon-fp-armv8 -x c -E -dM %s | FileCheck --check-prefix=CHECK-V8-BAREHF-NEON-FP %s
+// CHECK-V8-BAREHP-NEON-FP: __ARM_FP 0xE
 // CHECK-V8-BAREHF-NEON-FP: __ARM_NEON__ 1
 // CHECK-V8-BAREHF-NEON-FP: __VFP_FP__ 1
 
@@ -65,20 +81,32 @@
 
 // RUN: %clang -target armv8a -x c -E -dM %s -o - | FileCheck --check-prefix=ARMV8A %s
 // ARMV8A:#define __ARM_ARCH_EXT_IDIV__ 1
+// ARMV8A: #define __ARM_FP 0xE
 
 // RUN: %clang -target armv8a -mthumb -x c -E -dM %s -o - | FileCheck --check-prefix=THUMBV8A %s
 // THUMBV8A:#define __ARM_ARCH_EXT_IDIV__ 1
+// THUMBV8A: #define __ARM_FP 0xE
 
 // RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck --check-prefix=ARMV8A-EABI %s
 // ARMV8A-EABI:#define __ARM_ARCH_EXT_IDIV__ 1
+// ARMV8A-EABI: #define __ARM_FP 0xE
 
 // RUN: %clang -target armv8a-eabi -x c -E -dM %s -o - | FileCheck --check-prefix=THUMBV8A-EABI %s
 // THUMBV8A-EABI:#define __ARM_ARCH_EXT_IDIV__ 1
+// THUMBV8A-EABI: #define __ARM_FP 0xE
 
 // RUN: %clang -target arm-none-linux-gnu -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DEFS %s
+// CHECK-DEFS:#define __ARM_PCS 1
 // CHECK-DEFS:#define __ARM_SIZEOF_MINIMAL_ENUM 4
 // CHECK-DEFS:#define __ARM_SIZEOF_WCHAR_T 4
 
+// RUN: %clang -target arm-none-linux-gnu -fno-math-errno -fno-signed-zeros\
+// RUN:        -fno-trapping-math -fassociative-math -freciprocal-math\
+// RUN:        -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-FASTMATH %s
+// RUN: %clang -target arm-none-linux-gnu -ffast-math -x c -E -dM %s -o -\
+// RUN:        | FileCheck --check-prefix=CHECK-FASTMATH %s
+// CHECK-FASTMATH: __ARM_FP_FAST 1
+
 // RUN: %clang -target arm-none-linux-gnu -fshort-wchar -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SHORTWCHAR %s
 // CHECK-SHORTWCHAR:#define __ARM_SIZEOF_WCHAR_T 2
 
@@ -114,32 +142,38 @@
 // Check that -mfpu works properly for Cortex-A7 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A7 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A7 %s
+// DEFAULTFPU-A7:#define __ARM_FP 0xE
 // DEFAULTFPU-A7:#define __ARM_NEON__ 1
 // DEFAULTFPU-A7:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A7 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A7 %s
+// FPUNONE-A7-NOT:#define __ARM_FP 0x{{.*}}
 // FPUNONE-A7-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A7-NOT:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck --check-prefix=NONEON-A7 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a7 -mfpu=vfp4 -x c -E -dM %s -o - | FileCheck --check-prefix=NONEON-A7 %s
+// NONEON-A7:#define __ARM_FP 0xE
 // NONEON-A7-NOT:#define __ARM_NEON__ 1
 // NONEON-A7:#define __ARM_VFPV4__ 1
 
 // Check that -mfpu works properly for Cortex-A5 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A5 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A5 %s
+// DEFAULTFPU-A5:#define __ARM_FP 0xE
 // DEFAULTFPU-A5:#define __ARM_NEON__ 1
 // DEFAULTFPU-A5:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A5 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A5 %s
+// FPUNONE-A5-NOT:#define __ARM_FP 0x{{.*}}
 // FPUNONE-A5-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A5-NOT:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck --check-prefix=NONEON-A5 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a5 -mfpu=vfp4-d16 -x c -E -dM %s -o - | FileCheck --check-prefix=NONEON-A5 %s
+// NONEON-A5:#define __ARM_FP 0xE
 // NONEON-A5-NOT:#define __ARM_NEON__ 1
 // NONEON-A5:#define __ARM_VFPV4__ 1
 
@@ -147,19 +181,23 @@
 // Test whether predefines are as expected when targeting ep9312.
 // RUN: %clang -target armv4t -mcpu=ep9312 -x c -E -dM %s -o - | FileCheck --check-prefix=A4T %s
 // A4T-NOT:#define __ARM_FEATURE_DSP
+// A4T-NOT:#define __ARM_FP 0x{{.*}}
 
 // Test whether predefines are as expected when targeting arm10tdmi.
 // RUN: %clang -target armv5 -mcpu=arm10tdmi -x c -E -dM %s -o - | FileCheck --check-prefix=A5T %s
 // A5T-NOT:#define __ARM_FEATURE_DSP
+// A5T-NOT:#define __ARM_FP 0x{{.*}}
 
 // Test whether predefines are as expected when targeting cortex-a5.
 // RUN: %clang -target armv7 -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=A5-ARM %s
 // A5-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A5-ARM:#define __ARM_FEATURE_DSP
+// A5-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=A5-THUMB %s
 // A5-THUMB-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A5-THUMB:#define __ARM_FEATURE_DSP
+// A5-THUMB:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7 -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=A5 %s
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a5 -x c -E -dM %s -o - | FileCheck --check-prefix=A5 %s
@@ -169,43 +207,50 @@
 // A5-NOT: #define __ARM_FEATURE_NUMERIC_MAXMIN
 // A5-NOT: #define __ARM_FEATURE_DIRECTED_ROUNDING
 // A5:#define __ARM_FEATURE_DSP
+// A5:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-a7.
-// RUN: %clang -target armv7 -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=A7 %s
-// RUN: %clang -target armv7 -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=A7 %s
+// RUN: %clang -target armv7k -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=A7 %s
+// RUN: %clang -target armv7k -mthumb -mcpu=cortex-a7 -x c -E -dM %s -o - | FileCheck --check-prefix=A7 %s
 // A7:#define __ARM_ARCH 7
-// A7:#define __ARM_ARCH_7A__ 1
 // A7:#define __ARM_ARCH_EXT_IDIV__ 1
 // A7:#define __ARM_ARCH_PROFILE 'A'
 // A7:#define __ARM_FEATURE_DSP
+// A7:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-a8.
 // RUN: %clang -target armv7 -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck --check-prefix=A8-ARM %s
 // A8-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A8-ARM:#define __ARM_FEATURE_DSP
+// A8-ARM:#define __ARM_FP 0xC
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a8 -x c -E -dM %s -o - | FileCheck --check-prefix=A8-THUMB %s
 // A8-THUMB-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A8-THUMB:#define __ARM_FEATURE_DSP
+// A8-THUMB:#define __ARM_FP 0xC
 
 // Test whether predefines are as expected when targeting cortex-a9.
 // RUN: %clang -target armv7 -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck --check-prefix=A9-ARM %s
 // A9-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A9-ARM:#define __ARM_FEATURE_DSP
+// A9-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a9 -x c -E -dM %s -o - | FileCheck --check-prefix=A9-THUMB %s
 // A9-THUMB-NOT:#define __ARM_ARCH_EXT_IDIV__
 // A9-THUMB:#define __ARM_FEATURE_DSP
+// A9-THUMB:#define __ARM_FP 0xE
 
 
 // Check that -mfpu works properly for Cortex-A12 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A12 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a12 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A12 %s
+// DEFAULTFPU-A12:#define __ARM_FP 0xE
 // DEFAULTFPU-A12:#define __ARM_NEON__ 1
 // DEFAULTFPU-A12:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a12 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A12 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a12 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A12 %s
+// FPUNONE-A12-NOT:#define __ARM_FP 0x{{.*}}
 // FPUNONE-A12-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A12-NOT:#define __ARM_VFPV4__ 1
 
@@ -217,24 +262,29 @@
 // A12:#define __ARM_ARCH_EXT_IDIV__ 1
 // A12:#define __ARM_ARCH_PROFILE 'A'
 // A12:#define __ARM_FEATURE_DSP
+// A12:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-a15.
 // RUN: %clang -target armv7 -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck --check-prefix=A15-ARM %s
 // A15-ARM:#define __ARM_ARCH_EXT_IDIV__ 1
 // A15-ARM:#define __ARM_FEATURE_DSP
+// A15-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-a15 -x c -E -dM %s -o - | FileCheck --check-prefix=A15-THUMB %s
 // A15-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // A15-THUMB:#define __ARM_FEATURE_DSP
+// A15-THUMB:#define __ARM_FP 0xE
 
 // Check that -mfpu works properly for Cortex-A17 (enabled by default).
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A17 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a17 -x c -E -dM %s -o - | FileCheck --check-prefix=DEFAULTFPU-A17 %s
+// DEFAULTFPU-A17:#define __ARM_FP 0xE
 // DEFAULTFPU-A17:#define __ARM_NEON__ 1
 // DEFAULTFPU-A17:#define __ARM_VFPV4__ 1
 
 // RUN: %clang -target armv7-none-linux-gnueabi -mcpu=cortex-a17 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A17 %s
 // RUN: %clang -target armv7-none-linux-gnueabi -mthumb -mcpu=cortex-a17 -mfpu=none -x c -E -dM %s -o - | FileCheck --check-prefix=FPUNONE-A17 %s
+// FPUNONE-A17-NOT:#define __ARM_FP 0x{{.*}}
 // FPUNONE-A17-NOT:#define __ARM_NEON__ 1
 // FPUNONE-A17-NOT:#define __ARM_VFPV4__ 1
 
@@ -246,60 +296,73 @@
 // A17:#define __ARM_ARCH_EXT_IDIV__ 1
 // A17:#define __ARM_ARCH_PROFILE 'A'
 // A17:#define __ARM_FEATURE_DSP
+// A17:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting swift.
 // RUN: %clang -target armv7s -mcpu=swift -x c -E -dM %s -o - | FileCheck --check-prefix=SWIFT-ARM %s
 // SWIFT-ARM:#define __ARM_ARCH_EXT_IDIV__ 1
-// SWIFT:#define __ARM_FEATURE_DSP
+// SWIFT-ARM:#define __ARM_FEATURE_DSP
+// SWIFT-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7s -mthumb -mcpu=swift -x c -E -dM %s -o - | FileCheck --check-prefix=SWIFT-THUMB %s
 // SWIFT-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // SWIFT-THUMB:#define __ARM_FEATURE_DSP
+// SWIFT-THUMB:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-a53.
 // RUN: %clang -target armv8 -mcpu=cortex-a53 -x c -E -dM %s -o - | FileCheck --check-prefix=A53-ARM %s
 // A53-ARM:#define __ARM_ARCH_EXT_IDIV__ 1
 // A53-ARM:#define __ARM_FEATURE_DSP
+// A53-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv8 -mthumb -mcpu=cortex-a53 -x c -E -dM %s -o - | FileCheck --check-prefix=A53-THUMB %s
 // A53-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // A53-THUMB:#define __ARM_FEATURE_DSP
+// A53-THUMB:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-r4.
 // RUN: %clang -target armv7 -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck --check-prefix=R4-ARM %s
 // R4-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // R4-ARM:#define __ARM_FEATURE_DSP
+// R4-ARM-NOT:#define __ARM_FP 0x{{.*}}
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r4 -x c -E -dM %s -o - | FileCheck --check-prefix=R4-THUMB %s
 // R4-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // R4-THUMB:#define __ARM_FEATURE_DSP
+// R4-THUMB-NOT:#define __ARM_FP 0x{{.*}}
 
 // Test whether predefines are as expected when targeting cortex-r4f.
 // RUN: %clang -target armv7 -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck --check-prefix=R4F-ARM %s
 // R4F-ARM-NOT:#define __ARM_ARCH_EXT_IDIV__
 // R4F-ARM:#define __ARM_FEATURE_DSP
+// R4F-ARM:#define __ARM_FP 0xC
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r4f -x c -E -dM %s -o - | FileCheck --check-prefix=R4F-THUMB %s
 // R4F-THUMBT:#define __ARM_ARCH_EXT_IDIV__ 1
 // R4F-THUMB:#define __ARM_FEATURE_DSP
+// R4F-THUMB:#define __ARM_FP 0xC
 
 // Test whether predefines are as expected when targeting cortex-r5.
 // RUN: %clang -target armv7 -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck --check-prefix=R5-ARM %s
 // R5-ARM:#define __ARM_ARCH_EXT_IDIV__ 1
 // R5-ARM:#define __ARM_FEATURE_DSP
+// R5-ARM:#define __ARM_FP 0xC
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r5 -x c -E -dM %s -o - | FileCheck --check-prefix=R5-THUMB %s
 // R5-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // R5-THUMB:#define __ARM_FEATURE_DSP
+// R5-THUMB:#define __ARM_FP 0xC
 
 // Test whether predefines are as expected when targeting cortex-r7.
 // RUN: %clang -target armv7 -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck --check-prefix=R7-ARM %s
 // R7-ARM:#define __ARM_ARCH_EXT_IDIV__ 1
 // R7-ARM:#define __ARM_FEATURE_DSP
+// R7-ARM:#define __ARM_FP 0xE
 
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-r7 -x c -E -dM %s -o - | FileCheck --check-prefix=R7-THUMB %s
 // R7-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // R7-THUMB:#define __ARM_FEATURE_DSP
+// R7-THUMB:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting cortex-m0.
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m0 -x c -E -dM %s -o - | FileCheck --check-prefix=M0-THUMB %s
@@ -308,22 +371,26 @@
 // RUN: %clang -target armv7 -mthumb -mcpu=sc000 -x c -E -dM %s -o - | FileCheck --check-prefix=M0-THUMB %s
 // M0-THUMB-NOT:#define __ARM_ARCH_EXT_IDIV__
 // M0-THUMB-NOT:#define __ARM_FEATURE_DSP
+// M0-THUMB-NOT:#define __ARM_FP 0x{{.*}}
 
 // Test whether predefines are as expected when targeting cortex-m3.
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m3 -x c -E -dM %s -o - | FileCheck --check-prefix=M3-THUMB %s
 // RUN: %clang -target armv7 -mthumb -mcpu=sc300 -x c -E -dM %s -o - | FileCheck --check-prefix=M3-THUMB %s
 // M3-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // M3-THUMB-NOT:#define __ARM_FEATURE_DSP
+// M3-THUMB-NOT:#define __ARM_FP 0x{{.*}}
 
 // Test whether predefines are as expected when targeting cortex-m4.
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m4 -x c -E -dM %s -o - | FileCheck --check-prefix=M4-THUMB %s
 // M4-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // M4-THUMB:#define __ARM_FEATURE_DSP
+// M4-THUMB:#define __ARM_FP 0x6
 
 // Test whether predefines are as expected when targeting cortex-m7.
 // RUN: %clang -target armv7 -mthumb -mcpu=cortex-m7 -x c -E -dM %s -o - | FileCheck --check-prefix=M7-THUMB %s
 // M7-THUMB:#define __ARM_ARCH_EXT_IDIV__ 1
 // M7-THUMB:#define __ARM_FEATURE_DSP
+// M7-THUMB:#define __ARM_FP 0xE
 
 // Test whether predefines are as expected when targeting krait.
 // RUN: %clang -target armv7 -mcpu=krait -x c -E -dM %s -o - | FileCheck --check-prefix=KRAIT-ARM %s
@@ -340,3 +407,5 @@
 // CHECK-V81A: __ARM_ARCH 8
 // CHECK-V81A: __ARM_ARCH_8_1A__ 1
 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A'
+// CHECK-V81A: __ARM_FEATURE_QRDMX 1
+// CHECK-V81A: #define __ARM_FP 0xE
diff --git a/test/Preprocessor/elfiamcu-predefines.c b/test/Preprocessor/elfiamcu-predefines.c
new file mode 100644
index 000000000000..af5e40e3c8ab
--- /dev/null
+++ b/test/Preprocessor/elfiamcu-predefines.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -E -dM -triple i586-intel-elfiamcu | FileCheck %s
+
+// CHECK: #define __iamcu
+// CHECK: #define __iamcu__
+
diff --git a/test/Preprocessor/init-v7k-compat.c b/test/Preprocessor/init-v7k-compat.c
new file mode 100644
index 000000000000..3a1074753f18
--- /dev/null
+++ b/test/Preprocessor/init-v7k-compat.c
@@ -0,0 +1,184 @@
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=thumbv7k-apple-watchos2.0 < /dev/null | FileCheck %s
+
+// Check that the chosen types for things like size_t, ptrdiff_t etc are as
+// expected
+
+// CHECK-NOT: #define _LP64 1
+// CHECK-NOT: #define __AARCH_BIG_ENDIAN 1
+// CHECK-NOT: #define __ARM_BIG_ENDIAN 1
+// CHECK: #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
+// CHECK: #define __CHAR16_TYPE__ unsigned short
+// CHECK: #define __CHAR32_TYPE__ unsigned int
+// CHECK: #define __CHAR_BIT__ 8
+// CHECK: #define __DBL_DENORM_MIN__ 4.9406564584124654e-324
+// CHECK: #define __DBL_DIG__ 15
+// CHECK: #define __DBL_EPSILON__ 2.2204460492503131e-16
+// CHECK: #define __DBL_HAS_DENORM__ 1
+// CHECK: #define __DBL_HAS_INFINITY__ 1
+// CHECK: #define __DBL_HAS_QUIET_NAN__ 1
+// CHECK: #define __DBL_MANT_DIG__ 53
+// CHECK: #define __DBL_MAX_10_EXP__ 308
+// CHECK: #define __DBL_MAX_EXP__ 1024
+// CHECK: #define __DBL_MAX__ 1.7976931348623157e+308
+// CHECK: #define __DBL_MIN_10_EXP__ (-307)
+// CHECK: #define __DBL_MIN_EXP__ (-1021)
+// CHECK: #define __DBL_MIN__ 2.2250738585072014e-308
+// CHECK: #define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__
+// CHECK: #define __FLT_DENORM_MIN__ 1.40129846e-45F
+// CHECK: #define __FLT_DIG__ 6
+// CHECK: #define __FLT_EPSILON__ 1.19209290e-7F
+// CHECK: #define __FLT_EVAL_METHOD__ 0
+// CHECK: #define __FLT_HAS_DENORM__ 1
+// CHECK: #define __FLT_HAS_INFINITY__ 1
+// CHECK: #define __FLT_HAS_QUIET_NAN__ 1
+// CHECK: #define __FLT_MANT_DIG__ 24
+// CHECK: #define __FLT_MAX_10_EXP__ 38
+// CHECK: #define __FLT_MAX_EXP__ 128
+// CHECK: #define __FLT_MAX__ 3.40282347e+38F
+// CHECK: #define __FLT_MIN_10_EXP__ (-37)
+// CHECK: #define __FLT_MIN_EXP__ (-125)
+// CHECK: #define __FLT_MIN__ 1.17549435e-38F
+// CHECK: #define __FLT_RADIX__ 2
+// CHECK: #define __INT16_C_SUFFIX__ {{$}}
+// CHECK: #define __INT16_FMTd__ "hd"
+// CHECK: #define __INT16_FMTi__ "hi"
+// CHECK: #define __INT16_MAX__ 32767
+// CHECK: #define __INT16_TYPE__ short
+// CHECK: #define __INT32_C_SUFFIX__ {{$}}
+// CHECK: #define __INT32_FMTd__ "d"
+// CHECK: #define __INT32_FMTi__ "i"
+// CHECK: #define __INT32_MAX__ 2147483647
+// CHECK: #define __INT32_TYPE__ int
+// CHECK: #define __INT64_C_SUFFIX__ LL
+// CHECK: #define __INT64_FMTd__ "lld"
+// CHECK: #define __INT64_FMTi__ "lli"
+// CHECK: #define __INT64_MAX__ 9223372036854775807LL
+// CHECK: #define __INT64_TYPE__ long long int
+// CHECK: #define __INT8_C_SUFFIX__ {{$}}
+// CHECK: #define __INT8_FMTd__ "hhd"
+// CHECK: #define __INT8_FMTi__ "hhi"
+// CHECK: #define __INT8_MAX__ 127
+// CHECK: #define __INT8_TYPE__ signed char
+// CHECK: #define __INTMAX_C_SUFFIX__ LL
+// CHECK: #define __INTMAX_FMTd__ "lld"
+// CHECK: #define __INTMAX_FMTi__ "lli"
+// CHECK: #define __INTMAX_MAX__ 9223372036854775807LL
+// CHECK: #define __INTMAX_TYPE__ long long int
+// CHECK: #define __INTMAX_WIDTH__ 64
+// CHECK: #define __INTPTR_FMTd__ "ld"
+// CHECK: #define __INTPTR_FMTi__ "li"
+// CHECK: #define __INTPTR_MAX__ 2147483647L
+// CHECK: #define __INTPTR_TYPE__ long int
+// CHECK: #define __INTPTR_WIDTH__ 32
+// CHECK: #define __INT_FAST16_FMTd__ "hd"
+// CHECK: #define __INT_FAST16_FMTi__ "hi"
+// CHECK: #define __INT_FAST16_MAX__ 32767
+// CHECK: #define __INT_FAST16_TYPE__ short
+// CHECK: #define __INT_FAST32_FMTd__ "d"
+// CHECK: #define __INT_FAST32_FMTi__ "i"
+// CHECK: #define __INT_FAST32_MAX__ 2147483647
+// CHECK: #define __INT_FAST32_TYPE__ int
+// CHECK: #define __INT_FAST64_FMTd__ "lld"
+// CHECK: #define __INT_FAST64_FMTi__ "lli"
+// CHECK: #define __INT_FAST64_MAX__ 9223372036854775807LL
+// CHECK: #define __INT_FAST64_TYPE__ long long int
+// CHECK: #define __INT_FAST8_FMTd__ "hhd"
+// CHECK: #define __INT_FAST8_FMTi__ "hhi"
+// CHECK: #define __INT_FAST8_MAX__ 127
+// CHECK: #define __INT_FAST8_TYPE__ signed char
+// CHECK: #define __INT_LEAST16_FMTd__ "hd"
+// CHECK: #define __INT_LEAST16_FMTi__ "hi"
+// CHECK: #define __INT_LEAST16_MAX__ 32767
+// CHECK: #define __INT_LEAST16_TYPE__ short
+// CHECK: #define __INT_LEAST32_FMTd__ "d"
+// CHECK: #define __INT_LEAST32_FMTi__ "i"
+// CHECK: #define __INT_LEAST32_MAX__ 2147483647
+// CHECK: #define __INT_LEAST32_TYPE__ int
+// CHECK: #define __INT_LEAST64_FMTd__ "lld"
+// CHECK: #define __INT_LEAST64_FMTi__ "lli"
+// CHECK: #define __INT_LEAST64_MAX__ 9223372036854775807LL
+// CHECK: #define __INT_LEAST64_TYPE__ long long int
+// CHECK: #define __INT_LEAST8_FMTd__ "hhd"
+// CHECK: #define __INT_LEAST8_FMTi__ "hhi"
+// CHECK: #define __INT_LEAST8_MAX__ 127
+// CHECK: #define __INT_LEAST8_TYPE__ signed char
+// CHECK: #define __INT_MAX__ 2147483647
+// CHECK: #define __LDBL_DENORM_MIN__  4.9406564584124654e-324L
+// CHECK: #define __LDBL_DIG__ 15
+// CHECK: #define __LDBL_EPSILON__ 2.2204460492503131e-16L
+// CHECK: #define __LDBL_HAS_DENORM__ 1
+// CHECK: #define __LDBL_HAS_INFINITY__ 1
+// CHECK: #define __LDBL_HAS_QUIET_NAN__ 1
+// CHECK: #define __LDBL_MANT_DIG__ 53
+// CHECK: #define __LDBL_MAX_10_EXP__ 308
+// CHECK: #define __LDBL_MAX_EXP__ 1024
+// CHECK: #define __LDBL_MAX__ 1.7976931348623157e+308L
+// CHECK: #define __LDBL_MIN_10_EXP__ (-307)
+// CHECK: #define __LDBL_MIN_EXP__ (-1021)
+// CHECK: #define __LDBL_MIN__ 2.2250738585072014e-308L
+// CHECK: #define __LONG_LONG_MAX__ 9223372036854775807LL
+// CHECK: #define __LONG_MAX__ 2147483647L
+// CHECK: #define __POINTER_WIDTH__ 32
+// CHECK: #define __PTRDIFF_TYPE__ long int
+// CHECK: #define __PTRDIFF_WIDTH__ 32
+// CHECK: #define __SCHAR_MAX__ 127
+// CHECK: #define __SHRT_MAX__ 32767
+// CHECK: #define __SIG_ATOMIC_MAX__ 2147483647
+// CHECK: #define __SIG_ATOMIC_WIDTH__ 32
+// CHECK: #define __SIZEOF_DOUBLE__ 8
+// CHECK: #define __SIZEOF_FLOAT__ 4
+// CHECK: #define __SIZEOF_INT__ 4
+// CHECK: #define __SIZEOF_LONG_DOUBLE__ 8
+// CHECK: #define __SIZEOF_LONG_LONG__ 8
+// CHECK: #define __SIZEOF_LONG__ 4
+// CHECK: #define __SIZEOF_POINTER__ 4
+// CHECK: #define __SIZEOF_PTRDIFF_T__ 4
+// CHECK: #define __SIZEOF_SHORT__ 2
+// CHECK: #define __SIZEOF_SIZE_T__ 4
+// CHECK: #define __SIZEOF_WCHAR_T__ 4
+// CHECK: #define __SIZEOF_WINT_T__ 4
+// CHECK: #define __SIZE_MAX__ 4294967295UL
+// CHECK: #define __SIZE_TYPE__ long unsigned int
+// CHECK: #define __SIZE_WIDTH__ 32
+// CHECK: #define __UINT16_C_SUFFIX__ {{$}}
+// CHECK: #define __UINT16_MAX__ 65535
+// CHECK: #define __UINT16_TYPE__ unsigned short
+// CHECK: #define __UINT32_C_SUFFIX__ U
+// CHECK: #define __UINT32_MAX__ 4294967295U
+// CHECK: #define __UINT32_TYPE__ unsigned int
+// CHECK: #define __UINT64_C_SUFFIX__ ULL
+// CHECK: #define __UINT64_MAX__ 18446744073709551615ULL
+// CHECK: #define __UINT64_TYPE__ long long unsigned int
+// CHECK: #define __UINT8_C_SUFFIX__ {{$}}
+// CHECK: #define __UINT8_MAX__ 255
+// CHECK: #define __UINT8_TYPE__ unsigned char
+// CHECK: #define __UINTMAX_C_SUFFIX__ ULL
+// CHECK: #define __UINTMAX_MAX__ 18446744073709551615ULL
+// CHECK: #define __UINTMAX_TYPE__ long long unsigned int
+// CHECK: #define __UINTMAX_WIDTH__ 64
+// CHECK: #define __UINTPTR_MAX__ 4294967295UL
+// CHECK: #define __UINTPTR_TYPE__ long unsigned int
+// CHECK: #define __UINTPTR_WIDTH__ 32
+// CHECK: #define __UINT_FAST16_MAX__ 65535
+// CHECK: #define __UINT_FAST16_TYPE__ unsigned short
+// CHECK: #define __UINT_FAST32_MAX__ 4294967295U
+// CHECK: #define __UINT_FAST32_TYPE__ unsigned int
+// CHECK: #define __UINT_FAST64_MAX__ 18446744073709551615UL
+// CHECK: #define __UINT_FAST64_TYPE__ long long unsigned int
+// CHECK: #define __UINT_FAST8_MAX__ 255
+// CHECK: #define __UINT_FAST8_TYPE__ unsigned char
+// CHECK: #define __UINT_LEAST16_MAX__ 65535
+// CHECK: #define __UINT_LEAST16_TYPE__ unsigned short
+// CHECK: #define __UINT_LEAST32_MAX__ 4294967295U
+// CHECK: #define __UINT_LEAST32_TYPE__ unsigned int
+// CHECK: #define __UINT_LEAST64_MAX__ 18446744073709551615UL
+// CHECK: #define __UINT_LEAST64_TYPE__ long long unsigned int
+// CHECK: #define __UINT_LEAST8_MAX__ 255
+// CHECK: #define __UINT_LEAST8_TYPE__ unsigned char
+// CHECK: #define __USER_LABEL_PREFIX__ _
+// CHECK: #define __WCHAR_MAX__ 2147483647
+// CHECK: #define __WCHAR_TYPE__ int
+// CHECK-NOT: #define __WCHAR_UNSIGNED__ 1
+// CHECK: #define __WCHAR_WIDTH__ 32
+// CHECK: #define __WINT_TYPE__ int
+// CHECK: #define __WINT_WIDTH__ 32
diff --git a/test/Preprocessor/init.c b/test/Preprocessor/init.c
index e80d57b82cd8..099e58561699 100644
--- a/test/Preprocessor/init.c
+++ b/test/Preprocessor/init.c
@@ -1,8 +1,8 @@
-/PtrDiffType / RUN: %clang_cc1 -E -dM -x assembler-with-cpp < /dev/null | FileCheck -check-prefix ASM %s
+// RUN: %clang_cc1 -E -dM -x assembler-with-cpp < /dev/null | FileCheck -check-prefix ASM %s
 //
 // ASM:#define __ASSEMBLER__ 1
 //
-// 
+//
 // RUN: %clang_cc1 -fblocks -E -dM < /dev/null | FileCheck -check-prefix BLOCKS %s
 //
 // BLOCKS:#define __BLOCKS__ 1
@@ -144,12 +144,14 @@
 //
 // MSEXT-CXX:#define _NATIVE_WCHAR_T_DEFINED 1
 // MSEXT-CXX:#define _WCHAR_T_DEFINED 1
+// MSEXT-CXX:#define __BOOL_DEFINED 1
 //
 //
 // RUN: %clang_cc1 -x c++ -fno-wchar -fms-extensions -triple i686-pc-win32 -E -dM < /dev/null | FileCheck -check-prefix MSEXT-CXX-NOWCHAR %s
 //
 // MSEXT-CXX-NOWCHAR-NOT:#define _NATIVE_WCHAR_T_DEFINED 1
 // MSEXT-CXX-NOWCHAR-NOT:#define _WCHAR_T_DEFINED 1
+// MSEXT-CXX-NOWCHAR:#define __BOOL_DEFINED 1
 //
 // 
 // RUN: %clang_cc1 -x objective-c -E -dM < /dev/null | FileCheck -check-prefix OBJC %s
@@ -1196,7 +1198,7 @@
 // ARM:#define __APCS_32__ 1
 // ARM-NOT:#define __ARMEB__ 1
 // ARM:#define __ARMEL__ 1
-// ARM:#define __ARM_ARCH_6J__ 1
+// ARM:#define __ARM_ARCH_4T__ 1
 // ARM-NOT:#define __ARM_BIG_ENDIAN 1
 // ARM:#define __BIGGEST_ALIGNMENT__ 8
 // ARM:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
@@ -1336,7 +1338,6 @@
 // ARM:#define __SIZE_MAX__ 4294967295U
 // ARM:#define __SIZE_TYPE__ unsigned int
 // ARM:#define __SIZE_WIDTH__ 32
-// ARM:#define __THUMB_INTERWORK__ 1
 // ARM:#define __UINT16_C_SUFFIX__ {{$}}
 // ARM:#define __UINT16_MAX__ 65535
 // ARM:#define __UINT16_TYPE__ unsigned short
@@ -1387,7 +1388,7 @@
 // ARM-BE:#define __APCS_32__ 1
 // ARM-BE:#define __ARMEB__ 1
 // ARM-BE-NOT:#define __ARMEL__ 1
-// ARM-BE:#define __ARM_ARCH_6J__ 1
+// ARM-BE:#define __ARM_ARCH_4T__ 1
 // ARM-BE:#define __ARM_BIG_ENDIAN 1
 // ARM-BE:#define __BIGGEST_ALIGNMENT__ 8
 // ARM-BE:#define __BIG_ENDIAN__ 1
@@ -1527,7 +1528,6 @@
 // ARM-BE:#define __SIZE_MAX__ 4294967295U
 // ARM-BE:#define __SIZE_TYPE__ unsigned int
 // ARM-BE:#define __SIZE_WIDTH__ 32
-// ARM-BE:#define __THUMB_INTERWORK__ 1
 // ARM-BE:#define __UINT16_C_SUFFIX__ {{$}}
 // ARM-BE:#define __UINT16_MAX__ 65535
 // ARM-BE:#define __UINT16_TYPE__ unsigned short
@@ -1578,8 +1578,8 @@
 // ARMEABISOFTFP:#define __APCS_32__ 1
 // ARMEABISOFTFP-NOT:#define __ARMEB__ 1
 // ARMEABISOFTFP:#define __ARMEL__ 1
-// ARMEABISOFTFP:#define __ARM_ARCH 6
-// ARMEABISOFTFP:#define __ARM_ARCH_6J__ 1
+// ARMEABISOFTFP:#define __ARM_ARCH 4
+// ARMEABISOFTFP:#define __ARM_ARCH_4T__ 1
 // ARMEABISOFTFP-NOT:#define __ARM_BIG_ENDIAN 1
 // ARMEABISOFTFP:#define __ARM_EABI__ 1
 // ARMEABISOFTFP:#define __ARM_PCS 1
@@ -1723,7 +1723,6 @@
 // ARMEABISOFTFP:#define __SIZE_TYPE__ unsigned int
 // ARMEABISOFTFP:#define __SIZE_WIDTH__ 32
 // ARMEABISOFTFP:#define __SOFTFP__ 1
-// ARMEABISOFTFP:#define __THUMB_INTERWORK__ 1
 // ARMEABISOFTFP:#define __UINT16_C_SUFFIX__ {{$}}
 // ARMEABISOFTFP:#define __UINT16_MAX__ 65535
 // ARMEABISOFTFP:#define __UINT16_TYPE__ unsigned short
@@ -1774,8 +1773,8 @@
 // ARMEABIHARDFP:#define __APCS_32__ 1
 // ARMEABIHARDFP-NOT:#define __ARMEB__ 1
 // ARMEABIHARDFP:#define __ARMEL__ 1
-// ARMEABIHARDFP:#define __ARM_ARCH 6
-// ARMEABIHARDFP:#define __ARM_ARCH_6J__ 1
+// ARMEABIHARDFP:#define __ARM_ARCH 4
+// ARMEABIHARDFP:#define __ARM_ARCH_4T__ 1
 // ARMEABIHARDFP-NOT:#define __ARM_BIG_ENDIAN 1
 // ARMEABIHARDFP:#define __ARM_EABI__ 1
 // ARMEABIHARDFP:#define __ARM_PCS 1
@@ -1919,7 +1918,6 @@
 // ARMEABIHARDFP:#define __SIZE_TYPE__ unsigned int
 // ARMEABIHARDFP:#define __SIZE_WIDTH__ 32
 // ARMEABIHARDFP-NOT:#define __SOFTFP__ 1
-// ARMEABIHARDFP:#define __THUMB_INTERWORK__ 1
 // ARMEABIHARDFP:#define __UINT16_C_SUFFIX__ {{$}}
 // ARMEABIHARDFP:#define __UINT16_MAX__ 65535
 // ARMEABIHARDFP:#define __UINT16_TYPE__ unsigned short
@@ -1970,7 +1968,7 @@
 // ARM-NETBSD:#define __APCS_32__ 1
 // ARM-NETBSD-NOT:#define __ARMEB__ 1
 // ARM-NETBSD:#define __ARMEL__ 1
-// ARM-NETBSD:#define __ARM_ARCH_6J__ 1
+// ARM-NETBSD:#define __ARM_ARCH_4T__ 1
 // ARM-NETBSD:#define __ARM_DWARF_EH__ 1
 // ARM-NETBSD:#define __ARM_EABI__ 1
 // ARM-NETBSD-NOT:#define __ARM_BIG_ENDIAN 1
@@ -2112,7 +2110,6 @@
 // ARM-NETBSD:#define __SIZE_MAX__ 4294967295U
 // ARM-NETBSD:#define __SIZE_TYPE__ long unsigned int
 // ARM-NETBSD:#define __SIZE_WIDTH__ 32
-// ARM-NETBSD:#define __THUMB_INTERWORK__ 1
 // ARM-NETBSD:#define __UINT16_C_SUFFIX__ {{$}}
 // ARM-NETBSD:#define __UINT16_MAX__ 65535
 // ARM-NETBSD:#define __UINT16_TYPE__ unsigned short
@@ -4735,10 +4732,10 @@
 // NVPTX32:#define __INT_FAST32_FMTi__ "i"
 // NVPTX32:#define __INT_FAST32_MAX__ 2147483647
 // NVPTX32:#define __INT_FAST32_TYPE__ int
-// NVPTX32:#define __INT_FAST64_FMTd__ "ld"
-// NVPTX32:#define __INT_FAST64_FMTi__ "li"
+// NVPTX32:#define __INT_FAST64_FMTd__ "lld"
+// NVPTX32:#define __INT_FAST64_FMTi__ "lli"
 // NVPTX32:#define __INT_FAST64_MAX__ 9223372036854775807L
-// NVPTX32:#define __INT_FAST64_TYPE__ long int
+// NVPTX32:#define __INT_FAST64_TYPE__ long long int
 // NVPTX32:#define __INT_FAST8_FMTd__ "hhd"
 // NVPTX32:#define __INT_FAST8_FMTi__ "hhi"
 // NVPTX32:#define __INT_FAST8_MAX__ 127
@@ -4751,10 +4748,10 @@
 // NVPTX32:#define __INT_LEAST32_FMTi__ "i"
 // NVPTX32:#define __INT_LEAST32_MAX__ 2147483647
 // NVPTX32:#define __INT_LEAST32_TYPE__ int
-// NVPTX32:#define __INT_LEAST64_FMTd__ "ld"
-// NVPTX32:#define __INT_LEAST64_FMTi__ "li"
+// NVPTX32:#define __INT_LEAST64_FMTd__ "lld"
+// NVPTX32:#define __INT_LEAST64_FMTi__ "lli"
 // NVPTX32:#define __INT_LEAST64_MAX__ 9223372036854775807L
-// NVPTX32:#define __INT_LEAST64_TYPE__ long int
+// NVPTX32:#define __INT_LEAST64_TYPE__ long long int
 // NVPTX32:#define __INT_LEAST8_FMTd__ "hhd"
 // NVPTX32:#define __INT_LEAST8_FMTi__ "hhi"
 // NVPTX32:#define __INT_LEAST8_MAX__ 127
@@ -4775,7 +4772,7 @@
 // NVPTX32:#define __LDBL_MIN__ 2.2250738585072014e-308L
 // NVPTX32:#define __LITTLE_ENDIAN__ 1
 // NVPTX32:#define __LONG_LONG_MAX__ 9223372036854775807LL
-// NVPTX32:#define __LONG_MAX__ 9223372036854775807L
+// NVPTX32:#define __LONG_MAX__ 2147483647L
 // NVPTX32-NOT:#define __LP64__
 // NVPTX32:#define __NVPTX__ 1
 // NVPTX32:#define __POINTER_WIDTH__ 32
@@ -4792,7 +4789,7 @@
 // NVPTX32:#define __SIZEOF_INT__ 4
 // NVPTX32:#define __SIZEOF_LONG_DOUBLE__ 8
 // NVPTX32:#define __SIZEOF_LONG_LONG__ 8
-// NVPTX32:#define __SIZEOF_LONG__ 8
+// NVPTX32:#define __SIZEOF_LONG__ 4
 // NVPTX32:#define __SIZEOF_POINTER__ 4
 // NVPTX32:#define __SIZEOF_PTRDIFF_T__ 4
 // NVPTX32:#define __SIZEOF_SHORT__ 2
@@ -4826,7 +4823,7 @@
 // NVPTX32:#define __UINT_FAST32_MAX__ 4294967295U
 // NVPTX32:#define __UINT_FAST32_TYPE__ unsigned int
 // NVPTX32:#define __UINT_FAST64_MAX__ 18446744073709551615UL
-// NVPTX32:#define __UINT_FAST64_TYPE__ long unsigned int
+// NVPTX32:#define __UINT_FAST64_TYPE__ long long unsigned int
 // NVPTX32:#define __UINT_FAST8_MAX__ 255
 // NVPTX32:#define __UINT_FAST8_TYPE__ unsigned char
 // NVPTX32:#define __UINT_LEAST16_MAX__ 65535
@@ -4834,7 +4831,7 @@
 // NVPTX32:#define __UINT_LEAST32_MAX__ 4294967295U
 // NVPTX32:#define __UINT_LEAST32_TYPE__ unsigned int
 // NVPTX32:#define __UINT_LEAST64_MAX__ 18446744073709551615UL
-// NVPTX32:#define __UINT_LEAST64_TYPE__ long unsigned int
+// NVPTX32:#define __UINT_LEAST64_TYPE__ long long unsigned int
 // NVPTX32:#define __UINT_LEAST8_MAX__ 255
 // NVPTX32:#define __UINT_LEAST8_TYPE__ unsigned char
 // NVPTX32:#define __USER_LABEL_PREFIX__ _
@@ -6603,8 +6600,15 @@
 // PPC-DARWIN:#define __powerpc__ 1
 // PPC-DARWIN:#define __ppc__ 1
 //
-// RUN: %clang_cc1 -x cl -E -dM -ffreestanding -triple=amdgcn < /dev/null | FileCheck -check-prefix AMDGCN %s
+// RUN: %clang_cc1 -x cl -E -dM -ffreestanding -triple=amdgcn < /dev/null | FileCheck -check-prefix AMDGCN --check-prefix AMDGPU %s
+// RUN: %clang_cc1 -x cl -E -dM -ffreestanding -triple=r600 -target-cpu caicos < /dev/null | FileCheck --check-prefix AMDGPU %s
+//
+// AMDGPU:#define cl_khr_byte_addressable_store 1
 // AMDGCN:#define cl_khr_fp64 1
+// AMDGPU:#define cl_khr_global_int32_base_atomics 1
+// AMDGPU:#define cl_khr_global_int32_extended_atomics 1
+// AMDGPU:#define cl_khr_local_int32_base_atomics 1
+// AMDGPU:#define cl_khr_local_int32_extended_atomics 1
 
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=s390x-none-none -fno-signed-char < /dev/null | FileCheck -check-prefix S390X %s
 //
@@ -6975,14 +6979,15 @@
 // SPARC:#define __sparcv8 1
 // SPARC:#define sparc 1
 // 
-// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-netbsd < /dev/null | FileCheck -check-prefix SPARC-NETBSD %s
-// SPARC-NETBSD:#define __INTPTR_FMTd__ "ld"
-// SPARC-NETBSD:#define __INTPTR_FMTi__ "li"
-// SPARC-NETBSD:#define __INTPTR_MAX__ 2147483647L
-// SPARC-NETBSD:#define __INTPTR_TYPE__ long int
-// SPARC-NETBSD:#define __PTRDIFF_TYPE__ long int
-// SPARC-NETBSD:#define __SIZE_TYPE__ long unsigned int
-// SPARC-NETBSD:#define __UINTPTR_TYPE__ long unsigned int
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-netbsd < /dev/null | FileCheck -check-prefix SPARC-NETOPENBSD %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=sparc-none-openbsd < /dev/null | FileCheck -check-prefix SPARC-NETOPENBSD %s
+// SPARC-NETOPENBSD:#define __INTPTR_FMTd__ "ld"
+// SPARC-NETOPENBSD:#define __INTPTR_FMTi__ "li"
+// SPARC-NETOPENBSD:#define __INTPTR_MAX__ 2147483647L
+// SPARC-NETOPENBSD:#define __INTPTR_TYPE__ long int
+// SPARC-NETOPENBSD:#define __PTRDIFF_TYPE__ long int
+// SPARC-NETOPENBSD:#define __SIZE_TYPE__ long unsigned int
+// SPARC-NETOPENBSD:#define __UINTPTR_TYPE__ long unsigned int
 
 // RUN: %clang_cc1 -E -dM -ffreestanding -triple=tce-none-none < /dev/null | FileCheck -check-prefix TCE %s
 //
@@ -8409,3 +8414,635 @@
 // XCORE:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
 // XCORE:#define __LITTLE_ENDIAN__ 1
 // XCORE:#define __XS1B__ 1
+//
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm32-unknown-unknown \
+// RUN:   < /dev/null \
+// RUN:   | FileCheck -check-prefix=WEBASSEMBLY32 %s
+//
+// WEBASSEMBLY32:#define _ILP32 1{{$}}
+// WEBASSEMBLY32-NOT:#define _LP64
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQUIRE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_ACQ_REL 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_CONSUME 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELAXED 0{{$}}
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_RELEASE 3{{$}}
+// WEBASSEMBLY32-NEXT:#define __ATOMIC_SEQ_CST 5{{$}}
+// WEBASSEMBLY32-NEXT:#define __BIGGEST_ALIGNMENT__ 16{{$}}
+// WEBASSEMBLY32-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__{{$}}
+// WEBASSEMBLY32-NEXT:#define __CHAR16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY32-NEXT:#define __CHAR32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __CHAR_BIT__ 8{{$}}
+// WEBASSEMBLY32-NOT:#define __CHAR_UNSIGNED__
+// WEBASSEMBLY32-NEXT:#define __CONSTANT_CFSTRINGS__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_DECIMAL_DIG__ 17{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_DIG__ 15{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MANT_DIG__ 53{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MAX_10_EXP__ 308{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MAX_EXP__ 1024{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308{{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MIN_10_EXP__ (-307){{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MIN_EXP__ (-1021){{$}}
+// WEBASSEMBLY32-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308{{$}}
+// WEBASSEMBLY32-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__{{$}}
+// WEBASSEMBLY32-NOT:#define __ELF__
+// WEBASSEMBLY32-NEXT:#define __FINITE_MATH_ONLY__ 0{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_DECIMAL_DIG__ 9{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_DIG__ 6{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_EVAL_METHOD__ 0{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MANT_DIG__ 24{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MAX_10_EXP__ 38{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MAX_EXP__ 128{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MAX__ 3.40282347e+38F{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MIN_10_EXP__ (-37){{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MIN_EXP__ (-125){{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_MIN__ 1.17549435e-38F{{$}}
+// WEBASSEMBLY32-NEXT:#define __FLT_RADIX__ 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __GNUC_MINOR__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __GNUC_PATCHLEVEL__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __GNUC_STDC_INLINE__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __GNUC__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __GXX_ABI_VERSION 1002{{$}}
+// WEBASSEMBLY32-NEXT:#define __GXX_RTTI 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __ILP32__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT16_C_SUFFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __INT16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT16_MAX__ 32767{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT16_TYPE__ short{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT32_C_SUFFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __INT32_FMTd__ "d"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT32_FMTi__ "i"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT32_TYPE__ int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT64_C_SUFFIX__ LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT64_TYPE__ long long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT8_C_SUFFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __INT8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT8_MAX__ 127{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT8_TYPE__ signed char{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_C_SUFFIX__ LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTd__ "lld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_FMTi__ "lli"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_TYPE__ long long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTMAX_WIDTH__ 64{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTd__ "ld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTPTR_FMTi__ "li"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTPTR_MAX__ 2147483647L{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTPTR_TYPE__ long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INTPTR_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST16_MAX__ 32767{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST16_TYPE__ short{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTd__ "d"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST32_FMTi__ "i"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST32_TYPE__ int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST64_TYPE__ long long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST8_MAX__ 127{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_FAST8_TYPE__ signed char{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_MAX__ 32767{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST16_TYPE__ short{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTd__ "d"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_FMTi__ "i"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST32_TYPE__ int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST64_TYPE__ long long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_MAX__ 127{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_LEAST8_TYPE__ signed char{{$}}
+// WEBASSEMBLY32-NEXT:#define __INT_MAX__ 2147483647{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_DECIMAL_DIG__ 36{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_DIG__ 33{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MANT_DIG__ 113{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_10_EXP__ 4932{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MAX_EXP__ 16384{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L{{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_10_EXP__ (-4931){{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MIN_EXP__ (-16381){{$}}
+// WEBASSEMBLY32-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L{{$}}
+// WEBASSEMBLY32-NEXT:#define __LITTLE_ENDIAN__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY32-NEXT:#define __LONG_MAX__ 2147483647L{{$}}
+// WEBASSEMBLY32-NOT:#define __LP64__
+// WEBASSEMBLY32-NEXT:#define __NO_INLINE__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __ORDER_BIG_ENDIAN__ 4321{{$}}
+// WEBASSEMBLY32-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234{{$}}
+// WEBASSEMBLY32-NEXT:#define __ORDER_PDP_ENDIAN__ 3412{{$}}
+// WEBASSEMBLY32-NEXT:#define __POINTER_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTd__ "ld"{{$}}
+// WEBASSEMBLY32-NEXT:#define __PTRDIFF_FMTi__ "li"{{$}}
+// WEBASSEMBLY32-NEXT:#define __PTRDIFF_MAX__ 2147483647L{{$}}
+// WEBASSEMBLY32-NEXT:#define __PTRDIFF_TYPE__ long int{{$}}
+// WEBASSEMBLY32-NEXT:#define __PTRDIFF_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NOT:#define __REGISTER_PREFIX__
+// WEBASSEMBLY32-NEXT:#define __SCHAR_MAX__ 127{{$}}
+// WEBASSEMBLY32-NEXT:#define __SHRT_MAX__ 32767{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_MAX__ 2147483647L{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIG_ATOMIC_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_DOUBLE__ 8{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_FLOAT__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT128__ 16{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_INT__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG_LONG__ 8{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_LONG__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_POINTER__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_PTRDIFF_T__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_SHORT__ 2{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_SIZE_T__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_WCHAR_T__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZEOF_WINT_T__ 4{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_FMTX__ "lX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_FMTo__ "lo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_FMTu__ "lu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_FMTx__ "lx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_MAX__ 4294967295UL{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_TYPE__ long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __SIZE_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __STDC_HOSTED__ 0{{$}}
+// WEBASSEMBLY32-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
+// WEBASSEMBLY32-NOT:#define __STDC_NO_ATOMICS__
+// WEBASSEMBLY32-NOT:#define __STDC_NO_COMPLEX__
+// WEBASSEMBLY32-NOT:#define __STDC_NO_VLA__
+// WEBASSEMBLY32-NOT:#define __STDC_NO_THREADS__
+// WEBASSEMBLY32-NEXT:#define __STDC_UTF_16__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __STDC_UTF_32__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __STDC_VERSION__ 201112L{{$}}
+// WEBASSEMBLY32-NEXT:#define __STDC__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_C_SUFFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_MAX__ 65535{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_C_SUFFIX__ U{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_FMTX__ "X"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_FMTo__ "o"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_FMTu__ "u"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_FMTx__ "x"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_C_SUFFIX__ ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_C_SUFFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_MAX__ 255{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_C_SUFFIX__ ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTX__ "llX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTo__ "llo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTu__ "llu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_FMTx__ "llx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTMAX_WIDTH__ 64{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTX__ "lX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTo__ "lo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTu__ "lu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_FMTx__ "lx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_MAX__ 4294967295UL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_TYPE__ long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINTPTR_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_MAX__ 65535{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTX__ "X"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTo__ "o"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTu__ "u"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_FMTx__ "x"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_MAX__ 255{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_FAST8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_MAX__ 65535{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTX__ "X"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTo__ "o"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTu__ "u"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_FMTx__ "x"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_MAX__ 255{{$}}
+// WEBASSEMBLY32-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY32-NEXT:#define __USER_LABEL_PREFIX__ {{$}}
+// WEBASSEMBLY32-NEXT:#define __VERSION__ "{{.*}}"{{$}}
+// WEBASSEMBLY32-NEXT:#define __WCHAR_MAX__ 2147483647{{$}}
+// WEBASSEMBLY32-NEXT:#define __WCHAR_TYPE__ int{{$}}
+// WEBASSEMBLY32-NOT:#define __WCHAR_UNSIGNED__
+// WEBASSEMBLY32-NEXT:#define __WCHAR_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __WINT_TYPE__ int{{$}}
+// WEBASSEMBLY32-NOT:#define __WINT_UNSIGNED__
+// WEBASSEMBLY32-NEXT:#define __WINT_WIDTH__ 32{{$}}
+// WEBASSEMBLY32-NEXT:#define __clang__ 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __clang_major__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __clang_minor__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __clang_patchlevel__ {{.}}
+// WEBASSEMBLY32-NEXT:#define __clang_version__ "{{.*}}"{{$}}
+// WEBASSEMBLY32-NEXT:#define __llvm__ 1{{$}}
+// WEBASSEMBLY32-NOT:#define __wasm_simd128__
+// WEBASSEMBLY32-NOT:#define __wasm_simd256__
+// WEBASSEMBLY32-NOT:#define __wasm_simd512__
+// WEBASSEMBLY32-NOT:#define __unix
+// WEBASSEMBLY32-NOT:#define __unix__
+// WEBASSEMBLY32-NEXT:#define __wasm 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __wasm32 1{{$}}
+// WEBASSEMBLY32-NEXT:#define __wasm32__ 1{{$}}
+// WEBASSEMBLY32-NOT:#define __wasm64
+// WEBASSEMBLY32-NOT:#define __wasm64__
+// WEBASSEMBLY32-NEXT:#define __wasm__ 1{{$}}
+//
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=wasm64-unknown-unknown \
+// RUN:   < /dev/null \
+// RUN:   | FileCheck -check-prefix=WEBASSEMBLY64 %s
+//
+// WEBASSEMBLY64-NOT:#define _ILP32
+// WEBASSEMBLY64:#define _LP64 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQUIRE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_ACQ_REL 4{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_CONSUME 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELAXED 0{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_RELEASE 3{{$}}
+// WEBASSEMBLY64-NEXT:#define __ATOMIC_SEQ_CST 5{{$}}
+// WEBASSEMBLY64-NEXT:#define __BIGGEST_ALIGNMENT__ 16{{$}}
+// WEBASSEMBLY64-NEXT:#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__{{$}}
+// WEBASSEMBLY64-NEXT:#define __CHAR16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY64-NEXT:#define __CHAR32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __CHAR_BIT__ 8{{$}}
+// WEBASSEMBLY64-NOT:#define __CHAR_UNSIGNED__
+// WEBASSEMBLY64-NEXT:#define __CONSTANT_CFSTRINGS__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_DECIMAL_DIG__ 17{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_DENORM_MIN__ 4.9406564584124654e-324{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_DIG__ 15{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_EPSILON__ 2.2204460492503131e-16{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MANT_DIG__ 53{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MAX_10_EXP__ 308{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MAX_EXP__ 1024{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MAX__ 1.7976931348623157e+308{{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MIN_10_EXP__ (-307){{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MIN_EXP__ (-1021){{$}}
+// WEBASSEMBLY64-NEXT:#define __DBL_MIN__ 2.2250738585072014e-308{{$}}
+// WEBASSEMBLY64-NEXT:#define __DECIMAL_DIG__ __LDBL_DECIMAL_DIG__{{$}}
+// WEBASSEMBLY64-NOT:#define __ELF__
+// WEBASSEMBLY64-NEXT:#define __FINITE_MATH_ONLY__ 0{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_DECIMAL_DIG__ 9{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_DENORM_MIN__ 1.40129846e-45F{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_DIG__ 6{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_EPSILON__ 1.19209290e-7F{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_EVAL_METHOD__ 0{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MANT_DIG__ 24{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MAX_10_EXP__ 38{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MAX_EXP__ 128{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MAX__ 3.40282347e+38F{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MIN_10_EXP__ (-37){{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MIN_EXP__ (-125){{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_MIN__ 1.17549435e-38F{{$}}
+// WEBASSEMBLY64-NEXT:#define __FLT_RADIX__ 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_BOOL_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR16_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR32_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_CHAR_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_INT_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LLONG_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_LONG_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_POINTER_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_SHORT_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_TEST_AND_SET_TRUEVAL 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __GCC_ATOMIC_WCHAR_T_LOCK_FREE 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __GNUC_MINOR__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __GNUC_PATCHLEVEL__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __GNUC_STDC_INLINE__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __GNUC__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __GXX_ABI_VERSION 1002{{$}}
+// WEBASSEMBLY64-NEXT:#define __GXX_RTTI 1{{$}}
+// WEBASSEMBLY64-NOT:#define __ILP32__
+// WEBASSEMBLY64-NEXT:#define __INT16_C_SUFFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __INT16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT16_MAX__ 32767{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT16_TYPE__ short{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT32_C_SUFFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __INT32_FMTd__ "d"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT32_FMTi__ "i"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT32_TYPE__ int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT64_C_SUFFIX__ LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT64_TYPE__ long long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT8_C_SUFFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __INT8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT8_MAX__ 127{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT8_TYPE__ signed char{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_C_SUFFIX__ LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTd__ "lld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_FMTi__ "lli"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_TYPE__ long long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTMAX_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTd__ "ld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTPTR_FMTi__ "li"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTPTR_MAX__ 9223372036854775807L{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTPTR_TYPE__ long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INTPTR_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST16_MAX__ 32767{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST16_TYPE__ short{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTd__ "d"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST32_FMTi__ "i"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST32_TYPE__ int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST64_TYPE__ long long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST8_MAX__ 127{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_FAST8_TYPE__ signed char{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTd__ "hd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_FMTi__ "hi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_MAX__ 32767{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST16_TYPE__ short{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTd__ "d"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_FMTi__ "i"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_MAX__ 2147483647{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST32_TYPE__ int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTd__ "lld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_FMTi__ "lli"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST64_TYPE__ long long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTd__ "hhd"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_FMTi__ "hhi"{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_MAX__ 127{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_LEAST8_TYPE__ signed char{{$}}
+// WEBASSEMBLY64-NEXT:#define __INT_MAX__ 2147483647{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_DECIMAL_DIG__ 36{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_DENORM_MIN__ 6.47517511943802511092443895822764655e-4966L{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_DIG__ 33{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_EPSILON__ 1.92592994438723585305597794258492732e-34L{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_DENORM__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_INFINITY__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_HAS_QUIET_NAN__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MANT_DIG__ 113{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_10_EXP__ 4932{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MAX_EXP__ 16384{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MAX__ 1.18973149535723176508575932662800702e+4932L{{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_10_EXP__ (-4931){{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MIN_EXP__ (-16381){{$}}
+// WEBASSEMBLY64-NEXT:#define __LDBL_MIN__ 3.36210314311209350626267781732175260e-4932L{{$}}
+// WEBASSEMBLY64-NEXT:#define __LITTLE_ENDIAN__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __LONG_LONG_MAX__ 9223372036854775807LL{{$}}
+// WEBASSEMBLY64-NEXT:#define __LONG_MAX__ 9223372036854775807L{{$}}
+// WEBASSEMBLY64-NEXT:#define __LP64__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __NO_INLINE__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __ORDER_BIG_ENDIAN__ 4321{{$}}
+// WEBASSEMBLY64-NEXT:#define __ORDER_LITTLE_ENDIAN__ 1234{{$}}
+// WEBASSEMBLY64-NEXT:#define __ORDER_PDP_ENDIAN__ 3412{{$}}
+// WEBASSEMBLY64-NEXT:#define __POINTER_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __PRAGMA_REDEFINE_EXTNAME 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTd__ "ld"{{$}}
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_FMTi__ "li"{{$}}
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_MAX__ 9223372036854775807L{{$}}
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_TYPE__ long int{{$}}
+// WEBASSEMBLY64-NEXT:#define __PTRDIFF_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NOT:#define __REGISTER_PREFIX__
+// WEBASSEMBLY64-NEXT:#define __SCHAR_MAX__ 127{{$}}
+// WEBASSEMBLY64-NEXT:#define __SHRT_MAX__ 32767{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_MAX__ 9223372036854775807L{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIG_ATOMIC_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_DOUBLE__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_FLOAT__ 4{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT128__ 16{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_INT__ 4{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_DOUBLE__ 16{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG_LONG__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_LONG__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_POINTER__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_PTRDIFF_T__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_SHORT__ 2{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_SIZE_T__ 8{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_WCHAR_T__ 4{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZEOF_WINT_T__ 4{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_FMTX__ "lX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_FMTo__ "lo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_FMTu__ "lu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_FMTx__ "lx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_MAX__ 18446744073709551615UL{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_TYPE__ long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __SIZE_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __STDC_HOSTED__ 0{{$}}
+// WEBASSEMBLY64-NOT:#define __STDC_MB_MIGHT_NEQ_WC__
+// WEBASSEMBLY64-NOT:#define __STDC_NO_ATOMICS__
+// WEBASSEMBLY64-NOT:#define __STDC_NO_COMPLEX__
+// WEBASSEMBLY64-NOT:#define __STDC_NO_VLA__
+// WEBASSEMBLY64-NOT:#define __STDC_NO_THREADS__
+// WEBASSEMBLY64-NEXT:#define __STDC_UTF_16__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __STDC_UTF_32__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __STDC_VERSION__ 201112L{{$}}
+// WEBASSEMBLY64-NEXT:#define __STDC__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_C_SUFFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_MAX__ 65535{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_C_SUFFIX__ U{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_FMTX__ "X"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_FMTo__ "o"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_FMTu__ "u"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_FMTx__ "x"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_C_SUFFIX__ ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_C_SUFFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_MAX__ 255{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_C_SUFFIX__ ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTX__ "llX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTo__ "llo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTu__ "llu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_FMTx__ "llx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTMAX_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTX__ "lX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTo__ "lo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTu__ "lu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_FMTx__ "lx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_MAX__ 18446744073709551615UL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_TYPE__ long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINTPTR_WIDTH__ 64{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_MAX__ 65535{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTX__ "X"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTo__ "o"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTu__ "u"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_FMTx__ "x"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_MAX__ 255{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_FAST8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTX__ "hX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTo__ "ho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTu__ "hu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_FMTx__ "hx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_MAX__ 65535{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST16_TYPE__ unsigned short{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTX__ "X"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTo__ "o"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTu__ "u"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_FMTx__ "x"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_MAX__ 4294967295U{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST32_TYPE__ unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTX__ "llX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTo__ "llo"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTu__ "llu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_FMTx__ "llx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_MAX__ 18446744073709551615ULL{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST64_TYPE__ long long unsigned int{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTX__ "hhX"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTo__ "hho"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTu__ "hhu"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_FMTx__ "hhx"{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_MAX__ 255{{$}}
+// WEBASSEMBLY64-NEXT:#define __UINT_LEAST8_TYPE__ unsigned char{{$}}
+// WEBASSEMBLY64-NEXT:#define __USER_LABEL_PREFIX__ {{$}}
+// WEBASSEMBLY64-NEXT:#define __VERSION__ "{{.*}}"{{$}}
+// WEBASSEMBLY64-NEXT:#define __WCHAR_MAX__ 2147483647{{$}}
+// WEBASSEMBLY64-NEXT:#define __WCHAR_TYPE__ int{{$}}
+// WEBASSEMBLY64-NOT:#define __WCHAR_UNSIGNED__
+// WEBASSEMBLY64-NEXT:#define __WCHAR_WIDTH__ 32{{$}}
+// WEBASSEMBLY64-NEXT:#define __WINT_TYPE__ int{{$}}
+// WEBASSEMBLY64-NOT:#define __WINT_UNSIGNED__
+// WEBASSEMBLY64-NEXT:#define __WINT_WIDTH__ 32{{$}}
+// WEBASSEMBLY64-NEXT:#define __clang__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __clang_major__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __clang_minor__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __clang_patchlevel__ {{.}}
+// WEBASSEMBLY64-NEXT:#define __clang_version__ "{{.*}}"{{$}}
+// WEBASSEMBLY64-NEXT:#define __llvm__ 1{{$}}
+// WEBASSEMBLY64-NOT:#define __wasm_simd128__
+// WEBASSEMBLY64-NOT:#define __wasm_simd256__
+// WEBASSEMBLY64-NOT:#define __wasm_simd512__
+// WEBASSEMBLY64-NOT:#define __unix
+// WEBASSEMBLY64-NOT:#define __unix__
+// WEBASSEMBLY64-NEXT:#define __wasm 1{{$}}
+// WEBASSEMBLY64-NOT:#define __wasm32
+// WEBASSEMBLY64-NOT:#define __wasm32__
+// WEBASSEMBLY64-NEXT:#define __wasm64 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __wasm64__ 1{{$}}
+// WEBASSEMBLY64-NEXT:#define __wasm__ 1{{$}}
diff --git a/test/Preprocessor/macro-multiline.c b/test/Preprocessor/macro-multiline.c
index 526192860cf0..72a5d20e523f 100644
--- a/test/Preprocessor/macro-multiline.c
+++ b/test/Preprocessor/macro-multiline.c
@@ -1,5 +1,4 @@
 // RUN: printf -- "-DX=A\nTHIS_SHOULD_NOT_EXIST_IN_THE_OUTPUT" | xargs -0 %clang -E %s | FileCheck -strict-whitespace %s
-// REQUIRES: shell
 
 // Per GCC -D semantics, \n and anything that follows is ignored.
 
diff --git a/test/Preprocessor/macro-reserved-cxx11.cpp b/test/Preprocessor/macro-reserved-cxx11.cpp
index a740ff612954..6daea953e8a6 100644
--- a/test/Preprocessor/macro-reserved-cxx11.cpp
+++ b/test/Preprocessor/macro-reserved-cxx11.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -pedantic -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++14 -pedantic -verify %s
 
 #define for 0    // expected-warning {{keyword is hidden by macro definition}}
 #define final 1  // expected-warning {{keyword is hidden by macro definition}}
diff --git a/test/Preprocessor/macro-reserved.cpp b/test/Preprocessor/macro-reserved.cpp
index ba1594a0af49..d1f70317f7aa 100644
--- a/test/Preprocessor/macro-reserved.cpp
+++ b/test/Preprocessor/macro-reserved.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -pedantic %s
+// RUN: %clang_cc1 -fsyntax-only -verify -pedantic -std=c++98 %s
 
 #define for 0    // expected-warning {{keyword is hidden by macro definition}}
 #define final 1
diff --git a/test/Preprocessor/macro_arg_slocentry_merge.c b/test/Preprocessor/macro_arg_slocentry_merge.c
index 9ab385f80841..27a2a01b233b 100644
--- a/test/Preprocessor/macro_arg_slocentry_merge.c
+++ b/test/Preprocessor/macro_arg_slocentry_merge.c
@@ -3,5 +3,3 @@
 #include "macro_arg_slocentry_merge.h"
 
 // CHECK: macro_arg_slocentry_merge.h:7:19: error: unknown type name 'win'
-// CHECK: macro_arg_slocentry_merge.h:5:16: note: expanded from macro 'WINDOW'
-// CHECK: macro_arg_slocentry_merge.h:6:18: note: expanded from macro 'P_'
diff --git a/test/Preprocessor/macro_paste_msextensions.c b/test/Preprocessor/macro_paste_msextensions.c
index aa5f41f9ee4f..dcc5336b91c1 100644
--- a/test/Preprocessor/macro_paste_msextensions.c
+++ b/test/Preprocessor/macro_paste_msextensions.c
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -verify -fms-extensions -Wmicrosoft %s
 // RUN: not %clang_cc1 -P -E -fms-extensions %s | FileCheck -strict-whitespace %s
 
 // This horrible stuff should preprocess into (other than whitespace):
@@ -10,6 +11,7 @@ int foo;
 // CHECK: int foo;
 
 #define comment /##/  dead tokens live here
+// expected-warning@+1 {{pasting two '/' tokens}}
 comment This is stupidity
 
 int bar;
@@ -18,6 +20,7 @@ int bar;
 
 #define nested(x) int x comment cute little dead tokens...
 
+// expected-warning@+1 {{pasting two '/' tokens}}
 nested(baz)  rise of the dead tokens
 
 ;
@@ -29,13 +32,13 @@ nested(baz)  rise of the dead tokens
 // rdar://8197149 - VC++ allows invalid token pastes: (##baz
 #define foo(x) abc(x)
 #define bar(y) foo(##baz(y))
-bar(q)
+bar(q) // expected-warning {{type specifier missing}} expected-error {{invalid preprocessing token}} expected-error {{parameter list without types}}
 
 // CHECK: abc(baz(q))
 
 
 #define str(x) #x
 #define collapse_spaces(a, b, c, d) str(a ## - ## b ## - ## c ## d)
-collapse_spaces(1a, b2, 3c, d4)
+collapse_spaces(1a, b2, 3c, d4) // expected-error 4 {{invalid preprocessing token}} expected-error {{expected function body}}
 
 // CHECK: "1a-b2-3cd4"
diff --git a/test/Preprocessor/microsoft-header-search.c b/test/Preprocessor/microsoft-header-search.c
index 2cdc54e5c416..875bffe8793b 100644
--- a/test/Preprocessor/microsoft-header-search.c
+++ b/test/Preprocessor/microsoft-header-search.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -I%S/Inputs/microsoft-header-search %s -fms-compatibility -verify
 
-// expected-warning@Inputs/microsoft-header-search/a/findme.h:3 {{findme.h successfully included using MS search rules}}
-// expected-warning@Inputs/microsoft-header-search/a/b/include3.h:3 {{#include resolved using non-portable MSVC search rules as}}
+// expected-warning@Inputs/microsoft-header-search/a/findme.h:3 {{findme.h successfully included using Microsoft header search rules}}
+// expected-warning@Inputs/microsoft-header-search/a/b/include3.h:3 {{#include resolved using non-portable Microsoft search rules as}}
 
 // expected-warning@Inputs/microsoft-header-search/falsepos.h:3 {{successfully resolved the falsepos.h header}}
 
diff --git a/test/Preprocessor/pragma_microsoft.c b/test/Preprocessor/pragma_microsoft.c
index b6921fa42fc7..2a9e7bab35b5 100644
--- a/test/Preprocessor/pragma_microsoft.c
+++ b/test/Preprocessor/pragma_microsoft.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -fms-extensions
+// RUN: %clang_cc1 %s -fsyntax-only -verify -fms-extensions -Wunknown-pragmas
 // RUN: not %clang_cc1 %s -fms-extensions -E | FileCheck %s
 // REQUIRES: non-ps4-sdk
 
@@ -53,7 +53,7 @@ __pragma(comment(linker," bar=" BAR))
 
 void f()
 {
-  __pragma()
+  __pragma() // expected-warning{{unknown pragma ignored}}
 // CHECK: #pragma
 
   // If we ever actually *support* __pragma(warning(disable: x)),
@@ -159,3 +159,6 @@ void g() {}
 #pragma warning(default 321) // expected-warning {{expected ':'}}
 #pragma warning(asdf : 321) // expected-warning {{expected 'push', 'pop'}}
 #pragma warning(push, -1) // expected-warning {{requires a level between 0 and 4}}
+
+// Test that runtime_checks is parsed but ignored.
+#pragma runtime_checks("sc", restore) // no-warning
diff --git a/test/Preprocessor/predefined-arch-macros.c b/test/Preprocessor/predefined-arch-macros.c
index 14fc49478af5..66a96e431fc9 100644
--- a/test/Preprocessor/predefined-arch-macros.c
+++ b/test/Preprocessor/predefined-arch-macros.c
@@ -425,6 +425,8 @@
 // CHECK_COREI7_AVX_M32: #define __SSE4_2__ 1
 // CHECK_COREI7_AVX_M32: #define __SSE__ 1
 // CHECK_COREI7_AVX_M32: #define __SSSE3__ 1
+// CHECK_COREI7_AVX_M32: #define __XSAVEOPT__ 1
+// CHECK_COREI7_AVX_M32: #define __XSAVE__ 1
 // CHECK_COREI7_AVX_M32: #define __corei7 1
 // CHECK_COREI7_AVX_M32: #define __corei7__ 1
 // CHECK_COREI7_AVX_M32: #define __i386 1
@@ -448,6 +450,8 @@
 // CHECK_COREI7_AVX_M64: #define __SSE_MATH__ 1
 // CHECK_COREI7_AVX_M64: #define __SSE__ 1
 // CHECK_COREI7_AVX_M64: #define __SSSE3__ 1
+// CHECK_COREI7_AVX_M64: #define __XSAVEOPT__ 1
+// CHECK_COREI7_AVX_M64: #define __XSAVE__ 1
 // CHECK_COREI7_AVX_M64: #define __amd64 1
 // CHECK_COREI7_AVX_M64: #define __amd64__ 1
 // CHECK_COREI7_AVX_M64: #define __corei7 1
@@ -471,6 +475,8 @@
 // CHECK_CORE_AVX_I_M32: #define __SSE4_2__ 1
 // CHECK_CORE_AVX_I_M32: #define __SSE__ 1
 // CHECK_CORE_AVX_I_M32: #define __SSSE3__ 1
+// CHECK_CORE_AVX_I_M32: #define __XSAVEOPT__ 1
+// CHECK_CORE_AVX_I_M32: #define __XSAVE__ 1
 // CHECK_CORE_AVX_I_M32: #define __corei7 1
 // CHECK_CORE_AVX_I_M32: #define __corei7__ 1
 // CHECK_CORE_AVX_I_M32: #define __i386 1
@@ -494,6 +500,8 @@
 // CHECK_CORE_AVX_I_M64: #define __SSE_MATH__ 1
 // CHECK_CORE_AVX_I_M64: #define __SSE__ 1
 // CHECK_CORE_AVX_I_M64: #define __SSSE3__ 1
+// CHECK_CORE_AVX_I_M64: #define __XSAVEOPT__ 1
+// CHECK_CORE_AVX_I_M64: #define __XSAVE__ 1
 // CHECK_CORE_AVX_I_M64: #define __amd64 1
 // CHECK_CORE_AVX_I_M64: #define __amd64__ 1
 // CHECK_CORE_AVX_I_M64: #define __corei7 1
@@ -524,6 +532,8 @@
 // CHECK_CORE_AVX2_M32: #define __SSE4_2__ 1
 // CHECK_CORE_AVX2_M32: #define __SSE__ 1
 // CHECK_CORE_AVX2_M32: #define __SSSE3__ 1
+// CHECK_CORE_AVX2_M32: #define __XSAVEOPT__ 1
+// CHECK_CORE_AVX2_M32: #define __XSAVE__ 1
 // CHECK_CORE_AVX2_M32: #define __corei7 1
 // CHECK_CORE_AVX2_M32: #define __corei7__ 1
 // CHECK_CORE_AVX2_M32: #define __i386 1
@@ -554,6 +564,8 @@
 // CHECK_CORE_AVX2_M64: #define __SSE_MATH__ 1
 // CHECK_CORE_AVX2_M64: #define __SSE__ 1
 // CHECK_CORE_AVX2_M64: #define __SSSE3__ 1
+// CHECK_CORE_AVX2_M64: #define __XSAVEOPT__ 1
+// CHECK_CORE_AVX2_M64: #define __XSAVE__ 1
 // CHECK_CORE_AVX2_M64: #define __amd64 1
 // CHECK_CORE_AVX2_M64: #define __amd64__ 1
 // CHECK_CORE_AVX2_M64: #define __corei7 1
@@ -586,6 +598,8 @@
 // CHECK_BROADWELL_M32: #define __SSE4_2__ 1
 // CHECK_BROADWELL_M32: #define __SSE__ 1
 // CHECK_BROADWELL_M32: #define __SSSE3__ 1
+// CHECK_BROADWELL_M32: #define __XSAVEOPT__ 1
+// CHECK_BROADWELL_M32: #define __XSAVE__ 1
 // CHECK_BROADWELL_M32: #define __corei7 1
 // CHECK_BROADWELL_M32: #define __corei7__ 1
 // CHECK_BROADWELL_M32: #define __i386 1
@@ -618,6 +632,8 @@
 // CHECK_BROADWELL_M64: #define __SSE_MATH__ 1
 // CHECK_BROADWELL_M64: #define __SSE__ 1
 // CHECK_BROADWELL_M64: #define __SSSE3__ 1
+// CHECK_BROADWELL_M64: #define __XSAVEOPT__ 1
+// CHECK_BROADWELL_M64: #define __XSAVE__ 1
 // CHECK_BROADWELL_M64: #define __amd64 1
 // CHECK_BROADWELL_M64: #define __amd64__ 1
 // CHECK_BROADWELL_M64: #define __corei7 1
@@ -652,6 +668,8 @@
 // CHECK_KNL_M32: #define __SSE4_2__ 1
 // CHECK_KNL_M32: #define __SSE__ 1
 // CHECK_KNL_M32: #define __SSSE3__ 1
+// CHECK_KNL_M32: #define __XSAVEOPT__ 1
+// CHECK_KNL_M32: #define __XSAVE__ 1
 // CHECK_KNL_M32: #define __i386 1
 // CHECK_KNL_M32: #define __i386__ 1
 // CHECK_KNL_M32: #define __knl 1
@@ -687,6 +705,8 @@
 // CHECK_KNL_M64: #define __SSE_MATH__ 1
 // CHECK_KNL_M64: #define __SSE__ 1
 // CHECK_KNL_M64: #define __SSSE3__ 1
+// CHECK_KNL_M64: #define __XSAVEOPT__ 1
+// CHECK_KNL_M64: #define __XSAVE__ 1
 // CHECK_KNL_M64: #define __amd64 1
 // CHECK_KNL_M64: #define __amd64__ 1
 // CHECK_KNL_M64: #define __knl 1
@@ -722,6 +742,10 @@
 // CHECK_SKX_M32: #define __SSE4_2__ 1
 // CHECK_SKX_M32: #define __SSE__ 1
 // CHECK_SKX_M32: #define __SSSE3__ 1
+// CHECK_SKX_M32: #define __XSAVEC__ 1
+// CHECK_SKX_M32: #define __XSAVEOPT__ 1
+// CHECK_SKX_M32: #define __XSAVES__ 1
+// CHECK_SKX_M32: #define __XSAVE__ 1
 // CHECK_SKX_M32: #define __i386 1
 // CHECK_SKX_M32: #define __i386__ 1
 // CHECK_SKX_M32: #define __skx 1
@@ -758,6 +782,10 @@
 // CHECK_SKX_M64: #define __SSE_MATH__ 1
 // CHECK_SKX_M64: #define __SSE__ 1
 // CHECK_SKX_M64: #define __SSSE3__ 1
+// CHECK_SKX_M64: #define __XSAVEC__ 1
+// CHECK_SKX_M64: #define __XSAVEOPT__ 1
+// CHECK_SKX_M64: #define __XSAVES__ 1
+// CHECK_SKX_M64: #define __XSAVE__ 1
 // CHECK_SKX_M64: #define __amd64 1
 // CHECK_SKX_M64: #define __amd64__ 1
 // CHECK_SKX_M64: #define __skx 1
@@ -1307,6 +1335,7 @@
 // CHECK_BTVER1_M32: #define __SSE_MATH__ 1
 // CHECK_BTVER1_M32: #define __SSE__ 1
 // CHECK_BTVER1_M32: #define __SSSE3__ 1
+// CHECK_BTVER1_M32: #define __XSAVE__ 1
 // CHECK_BTVER1_M32: #define __btver1 1
 // CHECK_BTVER1_M32: #define __btver1__ 1
 // CHECK_BTVER1_M32: #define __i386 1
@@ -1328,6 +1357,7 @@
 // CHECK_BTVER1_M64: #define __SSE_MATH__ 1
 // CHECK_BTVER1_M64: #define __SSE__ 1
 // CHECK_BTVER1_M64: #define __SSSE3__ 1
+// CHECK_BTVER1_M64: #define __XSAVE__ 1
 // CHECK_BTVER1_M64: #define __amd64 1
 // CHECK_BTVER1_M64: #define __amd64__ 1
 // CHECK_BTVER1_M64: #define __btver1 1
@@ -1356,6 +1386,8 @@
 // CHECK_BTVER2_M32: #define __SSE_MATH__ 1
 // CHECK_BTVER2_M32: #define __SSE__ 1
 // CHECK_BTVER2_M32: #define __SSSE3__ 1
+// CHECK_BTVER2_M32: #define __XSAVEOPT__ 1
+// CHECK_BTVER2_M32: #define __XSAVE__ 1
 // CHECK_BTVER2_M32: #define __btver2 1
 // CHECK_BTVER2_M32: #define __btver2__ 1
 // CHECK_BTVER2_M32: #define __i386 1
@@ -1382,6 +1414,8 @@
 // CHECK_BTVER2_M64: #define __SSE_MATH__ 1
 // CHECK_BTVER2_M64: #define __SSE__ 1
 // CHECK_BTVER2_M64: #define __SSSE3__ 1
+// CHECK_BTVER2_M64: #define __XSAVEOPT__ 1
+// CHECK_BTVER2_M64: #define __XSAVE__ 1
 // CHECK_BTVER2_M64: #define __amd64 1
 // CHECK_BTVER2_M64: #define __amd64__ 1
 // CHECK_BTVER2_M64: #define __btver2 1
@@ -1412,6 +1446,7 @@
 // CHECK_BDVER1_M32: #define __SSE__ 1
 // CHECK_BDVER1_M32: #define __SSSE3__ 1
 // CHECK_BDVER1_M32: #define __XOP__ 1
+// CHECK_BDVER1_M32: #define __XSAVE__ 1
 // CHECK_BDVER1_M32: #define __bdver1 1
 // CHECK_BDVER1_M32: #define __bdver1__ 1
 // CHECK_BDVER1_M32: #define __i386 1
@@ -1440,6 +1475,7 @@
 // CHECK_BDVER1_M64: #define __SSE__ 1
 // CHECK_BDVER1_M64: #define __SSSE3__ 1
 // CHECK_BDVER1_M64: #define __XOP__ 1
+// CHECK_BDVER1_M64: #define __XSAVE__ 1
 // CHECK_BDVER1_M64: #define __amd64 1
 // CHECK_BDVER1_M64: #define __amd64__ 1
 // CHECK_BDVER1_M64: #define __bdver1 1
@@ -1474,6 +1510,7 @@
 // CHECK_BDVER2_M32: #define __SSSE3__ 1
 // CHECK_BDVER2_M32: #define __TBM__ 1
 // CHECK_BDVER2_M32: #define __XOP__ 1
+// CHECK_BDVER2_M32: #define __XSAVE__ 1
 // CHECK_BDVER2_M32: #define __bdver2 1
 // CHECK_BDVER2_M32: #define __bdver2__ 1
 // CHECK_BDVER2_M32: #define __i386 1
@@ -1506,6 +1543,7 @@
 // CHECK_BDVER2_M64: #define __SSSE3__ 1
 // CHECK_BDVER2_M64: #define __TBM__ 1
 // CHECK_BDVER2_M64: #define __XOP__ 1
+// CHECK_BDVER2_M64: #define __XSAVE__ 1
 // CHECK_BDVER2_M64: #define __amd64 1
 // CHECK_BDVER2_M64: #define __amd64__ 1
 // CHECK_BDVER2_M64: #define __bdver2 1
@@ -1541,6 +1579,8 @@
 // CHECK_BDVER3_M32: #define __SSSE3__ 1
 // CHECK_BDVER3_M32: #define __TBM__ 1
 // CHECK_BDVER3_M32: #define __XOP__ 1
+// CHECK_BDVER3_M32: #define __XSAVEOPT__ 1
+// CHECK_BDVER3_M32: #define __XSAVE__ 1
 // CHECK_BDVER3_M32: #define __bdver3 1
 // CHECK_BDVER3_M32: #define __bdver3__ 1
 // CHECK_BDVER3_M32: #define __i386 1
@@ -1574,6 +1614,8 @@
 // CHECK_BDVER3_M64: #define __SSSE3__ 1
 // CHECK_BDVER3_M64: #define __TBM__ 1
 // CHECK_BDVER3_M64: #define __XOP__ 1
+// CHECK_BDVER3_M64: #define __XSAVEOPT__ 1
+// CHECK_BDVER3_M64: #define __XSAVE__ 1
 // CHECK_BDVER3_M64: #define __amd64 1
 // CHECK_BDVER3_M64: #define __amd64__ 1
 // CHECK_BDVER3_M64: #define __bdver3 1
@@ -1611,6 +1653,7 @@
 // CHECK_BDVER4_M32: #define __SSSE3__ 1
 // CHECK_BDVER4_M32: #define __TBM__ 1
 // CHECK_BDVER4_M32: #define __XOP__ 1
+// CHECK_BDVER4_M32: #define __XSAVE__ 1
 // CHECK_BDVER4_M32: #define __bdver4 1
 // CHECK_BDVER4_M32: #define __bdver4__ 1
 // CHECK_BDVER4_M32: #define __i386 1
@@ -1646,6 +1689,7 @@
 // CHECK_BDVER4_M64: #define __SSSE3__ 1
 // CHECK_BDVER4_M64: #define __TBM__ 1
 // CHECK_BDVER4_M64: #define __XOP__ 1
+// CHECK_BDVER4_M64: #define __XSAVE__ 1
 // CHECK_BDVER4_M64: #define __amd64 1
 // CHECK_BDVER4_M64: #define __amd64__ 1
 // CHECK_BDVER4_M64: #define __bdver4 1
@@ -1675,6 +1719,9 @@
 //
 // CHECK_PPC_CRYPTO_M64: #define __CRYPTO__
 //
+// RUN: %clang -mcpu=ppc64 -E -dM %s -o - 2>&1 \
+// RUN:     -target powerpc64-unknown-unknown \
+// RUN:   | FileCheck %s -check-prefix=CHECK_PPC_GCC_ATOMICS
 // RUN: %clang -mcpu=pwr8 -E -dM %s -o - 2>&1 \
 // RUN:     -target powerpc64-unknown-unknown \
 // RUN:   | FileCheck %s -check-prefix=CHECK_PPC_GCC_ATOMICS
@@ -1694,11 +1741,23 @@
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target sparc-unknown-linux \
 // RUN:   | FileCheck %s -check-prefix=CHECK_SPARC
+// RUN: %clang -mcpu=v9 -E -dM %s -o - 2>&1 \
+// RUN:     -target sparc-unknown-linux \
+// RUN:   | FileCheck %s -check-prefix=CHECK_SPARC-V9
 //
 // CHECK_SPARC: #define __BIG_ENDIAN__ 1
 // CHECK_SPARC: #define __sparc 1
 // CHECK_SPARC: #define __sparc__ 1
+// CHECK_SPARC-NOT: #define __sparcv9 1
+// CHECK_SPARC-NOT: #define __sparcv9__ 1
 // CHECK_SPARC: #define __sparcv8 1
+// CHECK_SPARC-NOT: #define __sparcv9 1
+// CHECK_SPARC-NOT: #define __sparcv9__ 1
+
+// CHECK_SPARC-V9-NOT: #define __sparcv8 1
+// CHECK_SPARC-V9: #define __sparc_v9__ 1
+// CHECK_SPARC-V9: #define __sparcv9 1
+// CHECK_SPARC-V9-NOT: #define __sparcv8 1
 
 //
 // RUN: %clang -E -dM %s -o - 2>&1 \
diff --git a/test/Preprocessor/predefined-macros.c b/test/Preprocessor/predefined-macros.c
index a32f4a158c9b..1f68a082a914 100644
--- a/test/Preprocessor/predefined-macros.c
+++ b/test/Preprocessor/predefined-macros.c
@@ -18,8 +18,8 @@
 // CHECK-MS64: #define _INTEGRAL_MAX_BITS 64
 // CHECK-MS64: #define _MSC_EXTENSIONS 1
 // CHECK-MS64: #define _MSC_VER 1300
-// CHECK-MS64: #define _M_AMD64 1
-// CHECK-MS64: #define _M_X64 1
+// CHECK-MS64: #define _M_AMD64 100
+// CHECK-MS64: #define _M_X64 100
 // CHECK-MS64: #define _WIN64 1
 // CHECK-MS64-NOT: #define __STRICT_ANSI__
 // CHECK-MS64-NOT: GCC
@@ -134,3 +134,15 @@
 // RUN: %clang_cc1 %s -E -dM -o - -triple armv6 -target-cpu cortex-m0 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-SYNC_CAS_ARMv6
 // CHECK-SYNC_CAS_ARMv6-NOT: __GCC_HAVE_SYNC_COMPARE_AND_SWAP
+//
+// RUN: %clang_cc1 %s -E -dM -o - -triple mips -target-cpu mips2 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-SYNC_CAS_MIPS \
+// RUN:         --check-prefix=CHECK-SYNC_CAS_MIPS32
+// RUN: %clang_cc1 %s -E -dM -o - -triple mips64 -target-cpu mips3 \
+// RUN:   | FileCheck %s --check-prefix=CHECK-SYNC_CAS_MIPS \
+// RUN:         --check-prefix=CHECK-SYNC_CAS_MIPS64
+// CHECK-SYNC_CAS_MIPS:       __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1
+// CHECK-SYNC_CAS_MIPS:       __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2
+// CHECK-SYNC_CAS_MIPS:       __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4
+// CHECK-SYNC_CAS_MIPS32-NOT: __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
+// CHECK-SYNC_CAS_MIPS64:     __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
diff --git a/test/Preprocessor/traditional-cpp.c b/test/Preprocessor/traditional-cpp.c
index aa9f0f146e4c..f311be0bb593 100644
--- a/test/Preprocessor/traditional-cpp.c
+++ b/test/Preprocessor/traditional-cpp.c
@@ -99,8 +99,8 @@ FOO_NO_STRINGIFY(foobar)
  */
 
 #define FOO_NO_PASTE(a, b) test(b##a)
-FOO_NO_PASTE(foo,bar)
-/* CHECK {{^}}test(bar##foo){{$}}
+FOO_NO_PASTE(xxx,yyy)
+/* CHECK: {{^}}test(yyy##xxx){{$}}
  */
 
 #define BAR_NO_STRINGIFY(a) test(#a)
diff --git a/test/Preprocessor/wasm-target-features.c b/test/Preprocessor/wasm-target-features.c
new file mode 100644
index 000000000000..f4d40b1774a2
--- /dev/null
+++ b/test/Preprocessor/wasm-target-features.c
@@ -0,0 +1,35 @@
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -msimd128 \
+// RUN:   | FileCheck %s -check-prefix=SIMD128
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -msimd128 \
+// RUN:   | FileCheck %s -check-prefix=SIMD128
+//
+// SIMD128:#define __wasm_simd128__ 1{{$}}
+//
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mcpu=mvp \
+// RUN:   | FileCheck %s -check-prefix=MVP
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mcpu=mvp \
+// RUN:   | FileCheck %s -check-prefix=MVP
+//
+// MVP-NOT:#define __wasm_simd128__
+//
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mcpu=bleeding-edge \
+// RUN:   | FileCheck %s -check-prefix=BLEEDING_EDGE
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mcpu=bleeding-edge \
+// RUN:   | FileCheck %s -check-prefix=BLEEDING_EDGE
+//
+// BLEEDING_EDGE:#define __wasm_simd128__ 1{{$}}
+//
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm32-unknown-unknown -mcpu=bleeding-edge -mno-simd128 \
+// RUN:   | FileCheck %s -check-prefix=BLEEDING_EDGE_NO_SIMD128
+// RUN: %clang -E -dM %s -o - 2>&1 \
+// RUN:     -target wasm64-unknown-unknown -mcpu=bleeding-edge -mno-simd128 \
+// RUN:   | FileCheck %s -check-prefix=BLEEDING_EDGE_NO_SIMD128
+//
+// BLEEDING_EDGE_NO_SIMD128-NOT:#define __wasm_simd128__
diff --git a/test/Preprocessor/x86_target_features.c b/test/Preprocessor/x86_target_features.c
index ac9e7309f452..9c4192caf3f0 100644
--- a/test/Preprocessor/x86_target_features.c
+++ b/test/Preprocessor/x86_target_features.c
@@ -293,3 +293,26 @@
 
 // RDSEED: #define __RDSEED__ 1
 
+// RUN: %clang -target i386-unknown-unknown -march=atom -mxsave -x c -E -dM -o - %s | FileCheck --check-prefix=XSAVE %s
+
+// XSAVE: #define __XSAVE__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mxsaveopt -x c -E -dM -o - %s | FileCheck --check-prefix=XSAVEOPT %s
+
+// XSAVEOPT: #define __XSAVEOPT__ 1
+// XSAVEOPT: #define __XSAVE__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mxsavec -x c -E -dM -o - %s | FileCheck --check-prefix=XSAVEC %s
+
+// XSAVEC: #define __XSAVEC__ 1
+// XSAVEC: #define __XSAVE__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mxsaves -x c -E -dM -o - %s | FileCheck --check-prefix=XSAVES %s
+
+// XSAVES: #define __XSAVES__ 1
+// XSAVES: #define __XSAVE__ 1
+
+// RUN: %clang -target i386-unknown-unknown -march=atom -mxsaveopt -mno-xsave -x c -E -dM -o - %s | FileCheck --check-prefix=NOXSAVE %s
+
+// NOXSAVE-NOT: #define __XSAVEOPT__ 1
+// NOXSAVE-NOT: #define __XSAVE__ 1
diff --git a/test/Profile/Inputs/c-general.profdata.v3 b/test/Profile/Inputs/c-general.profdata.v3
new file mode 100644
index 0000000000000000000000000000000000000000..06545f9dba2f19a992ed120ca3d6d629e02e8ab4
GIT binary patch
literal 2192
zcmc&!O-K}B7@n=&ER|H0eyqbPBxBGL3K3zr?H;16@DglM&5pCPyA$Kgv@=Ue1et-k
zunMIGb?6@=2s(DDAdSKxx<rRA6@^77i!OGkm3^Kam_=4k?d6&0{dwN+{oeQ6zq*w%
zliJWqq4@Iw3FlVPKKw9nZ!S8Pu7%42DyEUp9L+Rj-5&oe=5CACz8d(bmH_YYU<)|0
z9$W!3&&yZC?-Ts{Ui!rsw#JHacvLx~4@&BQ=GeIoG1n>z!kSVNJ^-B_`^E~K+3q0@
zPYkpNZfrRbfa?|TWVYY3uMQpXT~5t!TIZ4F-B8ZU7qc^N1p+uZa^}%a&O9$xGXO}=
z>vG$CKQh|%=lk_4Zop1t*jm4(tCDV-mOUIU8vXFG8YfBYf!|d>ayNH>^xB<R13WQc
zpK0o<Y)Gc1W@N|A*kixC-b4#dH?<f^=~g60fE1*?1$Hi@t8?^qZDjAQL(Y0ch(Kjq
zN;L}A^^mg5c4S9Wq-54m*duSAz48WK;o#RP?aAD+!6pc9r?C5WO8xa>aGW1iks^@0
z>ZjUgy4ucPdPd75gxaSyM@g&pgM6s_=a&s_jj&U}i*@i<dE!6?_DSRkiM$UY^T>&O
zCaljmp5G_@p4{#4={?@aO^YLBVHZrRxM_8t>c>A8Qn;s7<5>Bu$(x};zL?)V#r(Vc
z?2g3b2cA0!g??Ex@L8G$tGn4}zs*itSAIm+B9yNsVM<9^*PV36%%;*a;i8*0sKXuN
zT~n9n6S@y>HsT|D&M@*O>hx(TsW|!0sfG*tLvQnZ0Mrg5l1MwuWj*n~mB@G^_l8J2
zkv$OE!$Q3<igMDRWgx=xw9NB=3{f_R@>|&)_(kv<K8__GGqKG0CCV;4?0JQAOmJZ<
PC^z%5d*Wgq`(yBb9Qc`%

literal 0
HcmV?d00001

diff --git a/test/Profile/Inputs/max-function-count.proftext b/test/Profile/Inputs/max-function-count.proftext
new file mode 100644
index 000000000000..c744f7af9d81
--- /dev/null
+++ b/test/Profile/Inputs/max-function-count.proftext
@@ -0,0 +1,26 @@
+begin
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+1
+0
+
+main
+# Func Hash:
+0
+# Num Counters:
+1
+# Counter Values:
+1
+
+end
+# Func Hash:
+10
+# Num Counters:
+2
+# Counter Values:
+2
+2
+
diff --git a/test/Profile/c-captured.c b/test/Profile/c-captured.c
index 84fa6d3eea71..e859628eabde 100644
--- a/test/Profile/c-captured.c
+++ b/test/Profile/c-captured.c
@@ -3,9 +3,9 @@
 // RUN: llvm-profdata merge %S/Inputs/c-captured.proftext -o %t.profdata
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-captured.c %s -o - -emit-llvm -fprofile-instr-use=%t.profdata | FileCheck -check-prefix=PGOUSE -check-prefix=PGOALL %s
 
-// PGOGEN: @[[DCC:__llvm_profile_counters_debug_captured]] = private global [3 x i64] zeroinitializer
-// PGOGEN: @[[CSC:"__llvm_profile_counters_c-captured.c:__captured_stmt"]] = private global [2 x i64] zeroinitializer
-// PGOGEN: @[[C1C:"__llvm_profile_counters_c-captured.c:__captured_stmt.1"]] = private global [3 x i64] zeroinitializer
+// PGOGEN: @[[DCC:__profc_debug_captured]] = private global [3 x i64] zeroinitializer
+// PGOGEN: @[[CSC:__profc_c_captured.c___captured_stmt]] = private global [2 x i64] zeroinitializer
+// PGOGEN: @[[C1C:__profc_c_captured.c___captured_stmt.1]] = private global [3 x i64] zeroinitializer
 
 // PGOALL-LABEL: define void @debug_captured()
 // PGOGEN: store {{.*}} @[[DCC]], i64 0, i64 0
diff --git a/test/Profile/c-general.c b/test/Profile/c-general.c
index 4e123ae4ef93..03631d8c7c69 100644
--- a/test/Profile/c-general.c
+++ b/test/Profile/c-general.c
@@ -4,22 +4,22 @@
 
 // RUN: llvm-profdata merge %S/Inputs/c-general.proftext -o %t.profdata
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instr-use=%t.profdata | FileCheck -check-prefix=PGOUSE %s
-
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instr-use=%S/Inputs/c-general.profdata.v3 | FileCheck -check-prefix=PGOUSE %s
 // Also check compatibility with older profiles.
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-general.c %s -o - -emit-llvm -fprofile-instr-use=%S/Inputs/c-general.profdata.v1 | FileCheck -check-prefix=PGOUSE %s
 
-// PGOGEN: @[[SLC:__llvm_profile_counters_simple_loops]] = private global [4 x i64] zeroinitializer
-// PGOGEN: @[[IFC:__llvm_profile_counters_conditionals]] = private global [11 x i64] zeroinitializer
-// PGOGEN: @[[EEC:__llvm_profile_counters_early_exits]] = private global [9 x i64] zeroinitializer
-// PGOGEN: @[[JMC:__llvm_profile_counters_jumps]] = private global [22 x i64] zeroinitializer
-// PGOGEN: @[[SWC:__llvm_profile_counters_switches]] = private global [19 x i64] zeroinitializer
-// PGOGEN: @[[BSC:__llvm_profile_counters_big_switch]] = private global [17 x i64] zeroinitializer
-// PGOGEN: @[[BOC:__llvm_profile_counters_boolean_operators]] = private global [8 x i64] zeroinitializer
-// PGOGEN: @[[BLC:__llvm_profile_counters_boolop_loops]] = private global [9 x i64] zeroinitializer
-// PGOGEN: @[[COC:__llvm_profile_counters_conditional_operator]] = private global [3 x i64] zeroinitializer
-// PGOGEN: @[[DFC:__llvm_profile_counters_do_fallthrough]] = private global [4 x i64] zeroinitializer
-// PGOGEN: @[[MAC:__llvm_profile_counters_main]] = private global [1 x i64] zeroinitializer
-// PGOGEN: @[[STC:"__llvm_profile_counters_c-general.c:static_func"]] = private global [2 x i64] zeroinitializer
+// PGOGEN: @[[SLC:__profc_simple_loops]] = private global [4 x i64] zeroinitializer
+// PGOGEN: @[[IFC:__profc_conditionals]] = private global [11 x i64] zeroinitializer
+// PGOGEN: @[[EEC:__profc_early_exits]] = private global [9 x i64] zeroinitializer
+// PGOGEN: @[[JMC:__profc_jumps]] = private global [22 x i64] zeroinitializer
+// PGOGEN: @[[SWC:__profc_switches]] = private global [19 x i64] zeroinitializer
+// PGOGEN: @[[BSC:__profc_big_switch]] = private global [17 x i64] zeroinitializer
+// PGOGEN: @[[BOC:__profc_boolean_operators]] = private global [8 x i64] zeroinitializer
+// PGOGEN: @[[BLC:__profc_boolop_loops]] = private global [9 x i64] zeroinitializer
+// PGOGEN: @[[COC:__profc_conditional_operator]] = private global [3 x i64] zeroinitializer
+// PGOGEN: @[[DFC:__profc_do_fallthrough]] = private global [4 x i64] zeroinitializer
+// PGOGEN: @[[MAC:__profc_main]] = private global [1 x i64] zeroinitializer
+// PGOGEN: @[[STC:__profc_c_general.c_static_func]] = private global [2 x i64] zeroinitializer
 
 // PGOGEN-LABEL: @simple_loops()
 // PGOUSE-LABEL: @simple_loops()
diff --git a/test/Profile/c-linkage-available_externally.c b/test/Profile/c-linkage-available_externally.c
index c2ff2abd3b79..8585bf8425b0 100644
--- a/test/Profile/c-linkage-available_externally.c
+++ b/test/Profile/c-linkage-available_externally.c
@@ -2,10 +2,10 @@
 // get thrown out.
 // RUN: %clang_cc1 -O2 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage-available_externally.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
 
-// CHECK: @__llvm_profile_name_foo = linkonce_odr hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
+// CHECK: @__profn_foo = linkonce_odr hidden constant [3 x i8] c"foo", section "__DATA,__llvm_prf_names", align 1
 
-// CHECK: @__llvm_profile_counters_foo = linkonce_odr hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
-// CHECK: @__llvm_profile_data_foo = linkonce_odr hidden constant { i32, i32, i64, i8*, i64* } { i32 3, i32 1, i64 {{[0-9]+}}, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__llvm_profile_name_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__llvm_profile_counters_foo, i32 0, i32 0) }, section "__DATA,__llvm_prf_data", align 8
+// CHECK: @__profc_foo = linkonce_odr hidden global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
+// CHECK: @__profd_foo = linkonce_odr hidden global { i32, i32, i64, i8*, i64*, i8*, i8*, [1 x i16] } { i32 3, i32 1, i64 0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i32 0, i32 0), i8* null, i8* null, [1 x i16] zeroinitializer }, section "__DATA,__llvm_prf_data", align 8
 inline int foo(void) { return 1; }
 
 int main(void) {
diff --git a/test/Profile/c-linkage.c b/test/Profile/c-linkage.c
index 9abbc29a93d4..e6fbda9c288b 100644
--- a/test/Profile/c-linkage.c
+++ b/test/Profile/c-linkage.c
@@ -1,10 +1,10 @@
 // Check that the profiling names we create have the linkage we expect
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9 -main-file-name c-linkage.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
 
-// CHECK: @__llvm_profile_name_foo = private constant [3 x i8] c"foo"
-// CHECK: @__llvm_profile_name_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
-// CHECK: @__llvm_profile_name_main = private constant [4 x i8] c"main"
-// CHECK: @"__llvm_profile_name_c-linkage.c:foo_internal" = private constant [24 x i8] c"c-linkage.c:foo_internal"
+// CHECK: @__profn_foo = private constant [3 x i8] c"foo"
+// CHECK: @__profn_foo_weak = weak hidden constant [8 x i8] c"foo_weak"
+// CHECK: @__profn_main = private constant [4 x i8] c"main"
+// CHECK: @__profn_c_linkage.c_foo_internal = private constant [24 x i8] c"c-linkage.c:foo_internal"
 
 void foo(void) { }
 
diff --git a/test/Profile/c-unreachable-after-switch.c b/test/Profile/c-unreachable-after-switch.c
index 63add03dbd93..7d1855db1881 100644
--- a/test/Profile/c-unreachable-after-switch.c
+++ b/test/Profile/c-unreachable-after-switch.c
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -O3 -triple x86_64-apple-macosx10.10 -main-file-name c-unreachable-after-switch.c %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
 
-// CHECK: @[[C:__llvm_profile_counters_foo]] = private global [3 x i64] zeroinitializer
+// CHECK: @[[C:__profc_foo]] = private global [3 x i64] zeroinitializer
 
 // CHECK-LABEL: @foo()
 // CHECK: store {{.*}} @[[C]], i64 0, i64 0
diff --git a/test/Profile/cxx-class.cpp b/test/Profile/cxx-class.cpp
index 21cbbd68320a..a53414014449 100644
--- a/test/Profile/cxx-class.cpp
+++ b/test/Profile/cxx-class.cpp
@@ -18,7 +18,7 @@ class Simple {
 public:
   // CTRGEN-LABEL: define {{.*}} @_ZN6SimpleC2Ei(
   // CTRUSE-LABEL: define {{.*}} @_ZN6SimpleC2Ei(
-  // CTRGEN: store {{.*}} @[[SCC:__llvm_profile_counters__ZN6SimpleC2Ei]], i64 0, i64 0
+  // CTRGEN: store {{.*}} @[[SCC:__profc__ZN6SimpleC2Ei]], i64 0, i64 0
   explicit Simple(int Member) : Member(Member) {
     // CTRGEN: store {{.*}} @[[SCC]], i64 0, i64 1
     // CTRUSE: br {{.*}} !prof ![[SC1:[0-9]+]]
@@ -31,7 +31,7 @@ public:
 
   // DTRGEN-LABEL: define {{.*}} @_ZN6SimpleD2Ev(
   // DTRUSE-LABEL: define {{.*}} @_ZN6SimpleD2Ev(
-  // DTRGEN: store {{.*}} @[[SDC:__llvm_profile_counters__ZN6SimpleD2Ev]], i64 0, i64 0
+  // DTRGEN: store {{.*}} @[[SDC:__profc__ZN6SimpleD2Ev]], i64 0, i64 0
   ~Simple() {
     // DTRGEN: store {{.*}} @[[SDC]], i64 0, i64 1
     // DTRUSE: br {{.*}} !prof ![[SD1:[0-9]+]]
@@ -44,7 +44,7 @@ public:
 
   // MTHGEN-LABEL: define {{.*}} @_ZN6Simple6methodEv(
   // MTHUSE-LABEL: define {{.*}} @_ZN6Simple6methodEv(
-  // MTHGEN: store {{.*}} @[[SMC:__llvm_profile_counters__ZN6Simple6methodEv]], i64 0, i64 0
+  // MTHGEN: store {{.*}} @[[SMC:__profc__ZN6Simple6methodEv]], i64 0, i64 0
   void method() {
     // MTHGEN: store {{.*}} @[[SMC]], i64 0, i64 1
     // MTHUSE: br {{.*}} !prof ![[SM1:[0-9]+]]
@@ -58,7 +58,7 @@ public:
 
 // WRPGEN-LABEL: define {{.*}} @_Z14simple_wrapperv(
 // WRPUSE-LABEL: define {{.*}} @_Z14simple_wrapperv(
-// WRPGEN: store {{.*}} @[[SWC:__llvm_profile_counters__Z14simple_wrapperv]], i64 0, i64 0
+// WRPGEN: store {{.*}} @[[SWC:__profc__Z14simple_wrapperv]], i64 0, i64 0
 void simple_wrapper() {
   // WRPGEN: store {{.*}} @[[SWC]], i64 0, i64 1
   // WRPUSE: br {{.*}} !prof ![[SW1:[0-9]+]]
diff --git a/test/Profile/cxx-implicit.cpp b/test/Profile/cxx-implicit.cpp
index 79840ad93856..b25486ae4c0d 100644
--- a/test/Profile/cxx-implicit.cpp
+++ b/test/Profile/cxx-implicit.cpp
@@ -4,7 +4,7 @@
 
 // An implicit constructor is generated for Base. We should not emit counters
 // for it.
-// CHECK-NOT: @__llvm_profile_counters__ZN4BaseC2Ev =
+// CHECK-NOT: @__profc__ZN4BaseC2Ev =
 
 struct Base {
   virtual void foo();
diff --git a/test/Profile/cxx-lambda.cpp b/test/Profile/cxx-lambda.cpp
index a111f0663e3e..26314c892e44 100644
--- a/test/Profile/cxx-lambda.cpp
+++ b/test/Profile/cxx-lambda.cpp
@@ -9,9 +9,9 @@
 // RUN: FileCheck --input-file=%tuse -check-prefix=PGOUSE %s
 // RUN: FileCheck --input-file=%tuse -check-prefix=LMBUSE %s
 
-// PGOGEN: @[[LWC:__llvm_profile_counters__Z7lambdasv]] = private global [4 x i64] zeroinitializer
-// PGOGEN: @[[MAC:__llvm_profile_counters_main]] = private global [1 x i64] zeroinitializer
-// LMBGEN: @[[LFC:"__llvm_profile_counters_cxx-lambda.cpp:_ZZ7lambdasvENK3\$_0clEi"]] = private global [3 x i64] zeroinitializer
+// PGOGEN: @[[LWC:__profc__Z7lambdasv]] = private global [4 x i64] zeroinitializer
+// PGOGEN: @[[MAC:__profc_main]] = private global [1 x i64] zeroinitializer
+// LMBGEN: @[[LFC:"__profc_cxx_lambda.cpp__ZZ7lambdasvENK3\$_0clEi"]] = private global [3 x i64] zeroinitializer
 
 // PGOGEN-LABEL: define {{.*}}void @_Z7lambdasv()
 // PGOUSE-LABEL: define {{.*}}void @_Z7lambdasv()
diff --git a/test/Profile/cxx-linkage.cpp b/test/Profile/cxx-linkage.cpp
index 669e8eddb57e..701a88028154 100644
--- a/test/Profile/cxx-linkage.cpp
+++ b/test/Profile/cxx-linkage.cpp
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9.0 -emit-llvm -main-file-name cxx-linkage.cpp %s -o - -fprofile-instr-generate | FileCheck %s
 
-// CHECK: @__llvm_profile_name__Z3foov = private constant [7 x i8] c"_Z3foov"
-// CHECK: @__llvm_profile_name__Z8foo_weakv = weak hidden constant [12 x i8] c"_Z8foo_weakv"
-// CHECK: @__llvm_profile_name_main = private constant [4 x i8] c"main"
-// CHECK: @__llvm_profile_name__Z10foo_inlinev = linkonce_odr hidden constant [15 x i8] c"_Z10foo_inlinev"
+// CHECK: @__profn__Z3foov = private constant [7 x i8] c"_Z3foov"
+// CHECK: @__profn__Z8foo_weakv = weak hidden constant [12 x i8] c"_Z8foo_weakv"
+// CHECK: @__profn_main = private constant [4 x i8] c"main"
+// CHECK: @__profn__Z10foo_inlinev = linkonce_odr hidden constant [15 x i8] c"_Z10foo_inlinev"
 
 void foo(void) { }
 
diff --git a/test/Profile/cxx-rangefor.cpp b/test/Profile/cxx-rangefor.cpp
index 23be0f5771a3..1007a7077552 100644
--- a/test/Profile/cxx-rangefor.cpp
+++ b/test/Profile/cxx-rangefor.cpp
@@ -7,7 +7,7 @@
 // RUN: %clang_cc1 -x c++ %s -triple %itanium_abi_triple -main-file-name cxx-rangefor.cpp -std=c++11 -o - -emit-llvm -fprofile-instr-use=%t.profdata > %tuse
 // RUN: FileCheck --input-file=%tuse -check-prefix=CHECK -check-prefix=PGOUSE %s
 
-// PGOGEN: @[[RFC:__llvm_profile_counters__Z9range_forv]] = private global [5 x i64] zeroinitializer
+// PGOGEN: @[[RFC:__profc__Z9range_forv]] = private global [5 x i64] zeroinitializer
 
 // CHECK-LABEL: define {{.*}}void @_Z9range_forv()
 // PGOGEN: store {{.*}} @[[RFC]], i64 0, i64 0
diff --git a/test/Profile/cxx-structors.cpp b/test/Profile/cxx-structors.cpp
new file mode 100644
index 000000000000..183df9225783
--- /dev/null
+++ b/test/Profile/cxx-structors.cpp
@@ -0,0 +1,32 @@
+// Tests for instrumentation of C++ constructors and destructors.
+//
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.11.0 -x c++ %s -o - -emit-llvm -fprofile-instr-generate | FileCheck %s
+
+struct Foo {
+  Foo() {}
+  Foo(int) {}
+  ~Foo() {}
+};
+
+struct Bar : public Foo {
+  Bar() {}
+  Bar(int x) : Foo(x) {}
+  ~Bar();
+};
+
+Foo foo;
+Foo foo2(1);
+Bar bar;
+
+// Profile data for complete constructors and destructors must absent.
+
+// CHECK-NOT: @__profn__ZN3FooC1Ev
+// CHECK-NOT: @__profn__ZN3FooC1Ei
+// CHECK-NOT: @__profn__ZN3FooD1Ev
+// CHECK-NOT: @__profn__ZN3BarC1Ev
+// CHECK-NOT: @__profn__ZN3BarD1Ev
+// CHECK-NOT: @__profc__ZN3FooD1Ev
+// CHECK-NOT: @__profd__ZN3FooD1Ev
+
+int main() {
+}
diff --git a/test/Profile/cxx-templates.cpp b/test/Profile/cxx-templates.cpp
index ca0e8614d09d..c24bae39c3af 100644
--- a/test/Profile/cxx-templates.cpp
+++ b/test/Profile/cxx-templates.cpp
@@ -10,8 +10,8 @@
 // RUN: FileCheck --input-file=%tuse -check-prefix=T0USE -check-prefix=ALL %s
 // RUN: FileCheck --input-file=%tuse -check-prefix=T100USE -check-prefix=ALL %s
 
-// T0GEN: @[[T0C:__llvm_profile_counters__Z4loopILj0EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
-// T100GEN: @[[T100C:__llvm_profile_counters__Z4loopILj100EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
+// T0GEN: @[[T0C:__profc__Z4loopILj0EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
+// T100GEN: @[[T100C:__profc__Z4loopILj100EEvv]] = linkonce_odr hidden global [2 x i64] zeroinitializer
 
 // T0GEN-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj0EEvv()
 // T0USE-LABEL: define linkonce_odr {{.*}}void @_Z4loopILj0EEvv()
diff --git a/test/Profile/cxx-throws.cpp b/test/Profile/cxx-throws.cpp
index 47d079b7a0b1..6b33416cdbb4 100644
--- a/test/Profile/cxx-throws.cpp
+++ b/test/Profile/cxx-throws.cpp
@@ -10,9 +10,9 @@
 // RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%t.profdata -fcxx-exceptions -target %itanium_abi_triple | FileCheck -check-prefix=PGOUSE %s
 // RUN: %clang %s -o - -emit-llvm -S -fprofile-instr-use=%t.profdata -fcxx-exceptions -target %itanium_abi_triple | FileCheck -check-prefix=PGOUSE-EXC %s
 
-// PGOGEN: @[[THC:__llvm_profile_counters__Z6throwsv]] = private global [9 x i64] zeroinitializer
-// PGOGEN-EXC: @[[THC:__llvm_profile_counters__Z6throwsv]] = private global [9 x i64] zeroinitializer
-// PGOGEN: @[[UNC:__llvm_profile_counters__Z11unreachablei]] = private global [3 x i64] zeroinitializer
+// PGOGEN: @[[THC:__profc__Z6throwsv]] = private global [9 x i64] zeroinitializer
+// PGOGEN-EXC: @[[THC:__profc__Z6throwsv]] = private global [9 x i64] zeroinitializer
+// PGOGEN: @[[UNC:__profc__Z11unreachablei]] = private global [3 x i64] zeroinitializer
 
 // PGOGEN-LABEL: @_Z6throwsv()
 // PGOUSE-LABEL: @_Z6throwsv()
diff --git a/test/Profile/cxx-virtual-destructor-calls.cpp b/test/Profile/cxx-virtual-destructor-calls.cpp
index 35975c28533c..4affd2618e21 100644
--- a/test/Profile/cxx-virtual-destructor-calls.cpp
+++ b/test/Profile/cxx-virtual-destructor-calls.cpp
@@ -13,18 +13,25 @@ struct B : A {
   virtual ~B();
 };
 
-// Complete dtor
-// CHECK: @__llvm_profile_name__ZN1BD1Ev = private constant [9 x i8] c"_ZN1BD1Ev"
+// Base dtor
+// CHECK: @__profn__ZN1BD2Ev = private constant [9 x i8] c"_ZN1BD2Ev"
 
-// Deleting dtor
-// CHECK: @__llvm_profile_name__ZN1BD0Ev = private constant [9 x i8] c"_ZN1BD0Ev"
+// Complete dtor must not be instrumented
+// CHECK-NOT: @__profn__ZN1BD1Ev = private constant [9 x i8] c"_ZN1BD1Ev"
 
-// Complete dtor counters and profile data
-// CHECK: @__llvm_profile_counters__ZN1BD1Ev = private global [1 x i64] zeroinitializer
-// CHECK: @__llvm_profile_data__ZN1BD1Ev =
+// Deleting dtor must not be instrumented
+// CHECK-NOT: @__profn__ZN1BD0Ev = private constant [9 x i8] c"_ZN1BD0Ev"
 
-// Deleting dtor counters and profile data
-// CHECK: @__llvm_profile_counters__ZN1BD0Ev = private global [1 x i64] zeroinitializer
-// CHECK: @__llvm_profile_data__ZN1BD0Ev =
+// Base dtor counters and profile data
+// CHECK: @__profc__ZN1BD2Ev = private global [1 x i64] zeroinitializer
+// CHECK: @__profd__ZN1BD2Ev =
+
+// Complete dtor counters and profile data must absent
+// CHECK-NOT: @__profc__ZN1BD1Ev = private global [1 x i64] zeroinitializer
+// CHECK-NOT: @__profd__ZN1BD1Ev =
+
+// Deleting dtor counters and profile data must absent
+// CHECK-NOT: @__profc__ZN1BD0Ev = private global [1 x i64] zeroinitializer
+// CHECK-NOT: @__profd__ZN1BD0Ev =
 
 B::~B() { }
diff --git a/test/Profile/gcc-flag-compatibility.c b/test/Profile/gcc-flag-compatibility.c
index 5f76c62483f4..679a72221c61 100644
--- a/test/Profile/gcc-flag-compatibility.c
+++ b/test/Profile/gcc-flag-compatibility.c
@@ -9,7 +9,6 @@
 
 // Check that -fprofile-generate uses the runtime default profile file.
 // RUN: %clang %s -c -S -o - -emit-llvm -fprofile-generate | FileCheck -check-prefix=PROFILE-GEN %s
-// PROFILE-GEN: @__llvm_profile_runtime = external global i32
 // PROFILE-GEN-NOT: call void @__llvm_profile_override_default_filename
 // PROFILE-GEN-NOT: declare void @__llvm_profile_override_default_filename(i8*)
 
diff --git a/test/Profile/max-function-count.c b/test/Profile/max-function-count.c
new file mode 100644
index 000000000000..39490d7b276e
--- /dev/null
+++ b/test/Profile/max-function-count.c
@@ -0,0 +1,24 @@
+// Test that maximum function counts are set correctly.
+
+// RUN: llvm-profdata merge %S/Inputs/max-function-count.proftext -o %t.profdata
+// RUN: %clang %s -o - -mllvm -disable-llvm-optzns -emit-llvm -S -fprofile-instr-use=%t.profdata | FileCheck %s
+//
+int begin(int i) {
+  if (i)
+    return 0;
+  return 1;
+}
+
+int end(int i) {
+  if (i)
+    return 0;
+  return 1;
+}
+
+int main(int argc, const char *argv[]) {
+  begin(0);
+  end(1);
+  end(1);
+  return 0;
+}
+// CHECK: !{{[0-9]+}} = !{i32 1, !"MaxFunctionCount", i32 2}
diff --git a/test/Profile/objc-general.m b/test/Profile/objc-general.m
index eae19426ced6..b6435af78b7b 100644
--- a/test/Profile/objc-general.m
+++ b/test/Profile/objc-general.m
@@ -31,9 +31,9 @@ struct NSFastEnumerationState;
 @end;
 #endif
 
-// PGOGEN: @[[FRC:"__llvm_profile_counters_objc-general.m:\+\[A foreach:\]"]] = private global [2 x i64] zeroinitializer
-// PGOGEN: @[[BLC:"__llvm_profile_counters_objc-general.m:__13\+\[A foreach:\]_block_invoke"]] = private global [2 x i64] zeroinitializer
-// PGOGEN: @[[MAC:__llvm_profile_counters_main]] = private global [1 x i64] zeroinitializer
+// PGOGEN: @[[FRC:"__profc_objc_general.m_\+\[A foreach_\]"]] = private global [2 x i64] zeroinitializer
+// PGOGEN: @[[BLC:"__profc_objc_general.m___13\+\[A foreach_\]_block_invoke"]] = private global [2 x i64] zeroinitializer
+// PGOGEN: @[[MAC:__profc_main]] = private global [1 x i64] zeroinitializer
 
 @interface A : NSObject
 + (void)foreach: (NSArray *)array;
diff --git a/test/Rewriter/line-generation-test.m b/test/Rewriter/line-generation-test.m
index 0e666e1f5fbc..cafdba280847 100644
--- a/test/Rewriter/line-generation-test.m
+++ b/test/Rewriter/line-generation-test.m
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -E %s -o %t.mm
-// RUN: %clang_cc1 -fms-extensions -rewrite-objc -g %t.mm -o %t-rw.cpp
+// RUN: %clang_cc1 -fms-extensions -rewrite-objc -debug-info-kind=limited %t.mm -o %t-rw.cpp
 // RUN: FileCheck  -check-prefix CHECK-LINE --input-file=%t-rw.cpp %s
 // RUN: %clang_cc1 -fms-extensions -rewrite-objc %t.mm -o %t-rwnog.cpp
 // RUN: FileCheck  -check-prefix CHECK-NOLINE --input-file=%t-rwnog.cpp %s
diff --git a/test/Sema/128bitint.c b/test/Sema/128bitint.c
index 6469d84c7cc5..45e1cd8d57ff 100644
--- a/test/Sema/128bitint.c
+++ b/test/Sema/128bitint.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-apple-darwin9 -fms-extensions %s -DHAVE
-// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-gnu -fms-extensions %s -DHAVE_NOT
+// RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-apple-darwin9 %s -DHAVE
+// RUN: %clang_cc1 -fsyntax-only -verify -triple i686-linux-gnu %s -DHAVE_NOT
 
 #ifdef HAVE
 typedef int i128 __attribute__((__mode__(TI)));
@@ -17,28 +17,14 @@ __int128 i = (__int128)0;
 unsigned __int128 u = (unsigned __int128)-1;
 
 long long SignedTooBig = 123456789012345678901234567890; // expected-error {{integer literal is too large to be represented in any integer type}}
-__int128_t Signed128 = 123456789012345678901234567890i128;
-long long Signed64 = 123456789012345678901234567890i128; // expected-warning {{implicit conversion from '__int128' to 'long long' changes value from 123456789012345678901234567890 to -4362896299872285998}}
 unsigned long long UnsignedTooBig = 123456789012345678901234567890; // expected-error {{integer literal is too large to be represented in any integer type}}
-__uint128_t Unsigned128 = 123456789012345678901234567890Ui128;
-unsigned long long Unsigned64 = 123456789012345678901234567890Ui128; // expected-warning {{implicit conversion from 'unsigned __int128' to 'unsigned long long' changes value from 123456789012345678901234567890 to 14083847773837265618}}
-
-// Ensure we don't crash when user passes 128-bit values to type safety
-// attributes.
-void pointer_with_type_tag_arg_num_1(void *buf, int datatype)
-    __attribute__(( pointer_with_type_tag(mpi,0x10000000000000001i128,1) )); // expected-error {{attribute parameter 2 is out of bounds}}
-
-void pointer_with_type_tag_arg_num_2(void *buf, int datatype)
-    __attribute__(( pointer_with_type_tag(mpi,1,0x10000000000000001i128) )); // expected-error {{attribute parameter 3 is out of bounds}}
 
 void MPI_Send(void *buf, int datatype) __attribute__(( pointer_with_type_tag(mpi,1,2) ));
 
-static const __uint128_t mpi_int_wrong __attribute__(( type_tag_for_datatype(mpi,int) )) = 0x10000000000000001i128; // expected-error {{'type_tag_for_datatype' attribute requires the initializer to be an integer constant expression that can be represented by a 64 bit integer}}
 static const int mpi_int __attribute__(( type_tag_for_datatype(mpi,int) )) = 10;
 
 void test(int *buf)
 {
-  MPI_Send(buf, 0x10000000000000001i128); // expected-warning {{implicit conversion from '__int128' to 'int' changes value}}
 }
 #else
 
diff --git a/test/Sema/MicrosoftCompatibility.c b/test/Sema/MicrosoftCompatibility.c
index 010033bba00c..697d3f216637 100644
--- a/test/Sema/MicrosoftCompatibility.c
+++ b/test/Sema/MicrosoftCompatibility.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -Wno-unused-value -Wmicrosoft -verify -fms-compatibility
+// RUN: %clang_cc1 %s -fsyntax-only -Wno-unused-value -Wmicrosoft -verify -fms-compatibility -triple i686-pc-win32
 
 enum ENUM1; // expected-warning {{forward references to 'enum' types are a Microsoft extension}}
 enum ENUM1 var1 = 3;
diff --git a/test/Sema/address_spaces.c b/test/Sema/address_spaces.c
index 4756af9d9528..1922c8ae4f6f 100644
--- a/test/Sema/address_spaces.c
+++ b/test/Sema/address_spaces.c
@@ -67,3 +67,8 @@ void access_as_field()
 
 typedef int PR4997 __attribute__((address_space(Foobar))); // expected-error {{use of undeclared identifier 'Foobar'}}
 __attribute__((address_space("12"))) int *i; // expected-error {{'address_space' attribute requires an integer constant}}
+
+// Clang extension doesn't forbid operations on pointers to different address spaces.
+char* cmp(_AS1 char *x,  _AS2 char *y) {
+  return x < y ? x : y; // expected-warning {{pointer type mismatch ('__attribute__((address_space(1))) char *' and '__attribute__((address_space(2))) char *')}}
+}
\ No newline at end of file
diff --git a/test/Sema/asm-label.c b/test/Sema/asm-label.c
new file mode 100644
index 000000000000..87cda2f5335c
--- /dev/null
+++ b/test/Sema/asm-label.c
@@ -0,0 +1,30 @@
+// RUN: %clang_cc1 -verify %s
+
+void f();
+void f() __asm__("fish");
+void g();
+
+void f() {
+  g();
+}
+void g() __asm__("gold");  // expected-error{{cannot apply asm label to function after its first use}}
+
+void h() __asm__("hose");  // expected-note{{previous declaration is here}}
+void h() __asm__("hair");  // expected-error{{conflicting asm label}}
+
+int x;
+int x __asm__("xenon");
+int y;
+
+int test() { return y; }
+
+int y __asm__("yacht");  // expected-error{{cannot apply asm label to variable after its first use}}
+
+int z __asm__("zebra");  // expected-note{{previous declaration is here}}
+int z __asm__("zooms");  // expected-error{{conflicting asm label}}
+
+
+// No diagnostics on the following.
+void __real_readlink() __asm("readlink");
+void readlink() __asm("__protected_readlink");
+void readlink() { __real_readlink(); }
diff --git a/test/Sema/asm.c b/test/Sema/asm.c
index 1a1e02993a72..d29b136a117a 100644
--- a/test/Sema/asm.c
+++ b/test/Sema/asm.c
@@ -1,7 +1,6 @@
 // RUN: %clang_cc1 %s -Wno-private-extern -triple i386-pc-linux-gnu -verify -fsyntax-only
 
 
-
 void f() {
   int i;
 
@@ -154,10 +153,13 @@ double test15() {
 // PR19837
 struct foo {
   int a;
-  char b;
 };
-register struct foo bar asm("sp"); // expected-error {{bad type for named register variable}}
-register float baz asm("sp"); // expected-error {{bad type for named register variable}}
+register struct foo bar asm("esp"); // expected-error {{bad type for named register variable}}
+register float baz asm("esp"); // expected-error {{bad type for named register variable}}
+
+register int r0 asm ("edi"); // expected-error {{register 'edi' unsuitable for global register variables on this target}}
+register long long r1 asm ("esp"); // expected-error {{size of register 'esp' does not match variable size}}
+register int r2 asm ("esp");
 
 double f_output_constraint(void) {
   double result;
@@ -211,13 +213,40 @@ typedef struct test16_foo {
   unsigned int field2 : 2;
   unsigned int field3 : 3;
 } test16_foo;
-test16_foo x;
+typedef __attribute__((vector_size(16))) int test16_bar;
+register int test16_baz asm("esp");
+
 void test16()
 {
+  test16_foo a;
+  test16_bar b;
+
   __asm__("movl $5, %0"
-          : "=rm" (x.field2)); // expected-error {{reference to a bit-field in asm output with a memory constraint '=rm'}}
+          : "=rm" (a.field2)); // expected-error {{reference to a bit-field in asm input with a memory constraint '=rm'}}
   __asm__("movl $5, %0"
           :
-          : "m" (x.field3)); // expected-error {{reference to a bit-field in asm input with a memory constraint 'm'}}
+          : "m" (a.field3)); // expected-error {{reference to a bit-field in asm output with a memory constraint 'm'}}
+  __asm__("movl $5, %0"
+          : "=rm" (b[2])); // expected-error {{reference to a vector element in asm input with a memory constraint '=rm'}}
+  __asm__("movl $5, %0"
+          :
+          : "m" (b[3])); // expected-error {{reference to a vector element in asm output with a memory constraint 'm'}}
+  __asm__("movl $5, %0"
+          : "=rm" (test16_baz)); // expected-error {{reference to a global register variable in asm input with a memory constraint '=rm'}}
+  __asm__("movl $5, %0"
+          :
+          : "m" (test16_baz)); // expected-error {{reference to a global register variable in asm output with a memory constraint 'm'}}
+}
+
+int test17(int t0)
+{
+  int r0, r1;
+  __asm ("addl %2, %2\n\t"
+         "movl $123, %0"
+         : "=a" (r0),
+           "=&r" (r1)
+         : "1" (t0),   // expected-note {{constraint '1' is already present here}}
+           "1" (t0));  // expected-error {{more than one input constraint matches the same output '1'}}
+  return r0 + r1;
 }
 
diff --git a/test/Sema/atomic-compare.c b/test/Sema/atomic-compare.c
index 01eb82004725..9e7555d57f5a 100644
--- a/test/Sema/atomic-compare.c
+++ b/test/Sema/atomic-compare.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -verify -fsyntax-only
+// RUN: %clang_cc1 %s -verify -fsyntax-only -Wno-logical-not-parentheses
 
 void f(_Atomic(int) a, _Atomic(int) b) {
   if (a > b)      {} // no warning
diff --git a/test/Sema/atomic-ops.c b/test/Sema/atomic-ops.c
index 71eaaa8b7a5b..9a37ec2a3876 100644
--- a/test/Sema/atomic-ops.c
+++ b/test/Sema/atomic-ops.c
@@ -85,41 +85,57 @@ _Static_assert(__atomic_always_lock_free(4, &i64), "");
 _Static_assert(!__atomic_always_lock_free(8, &i32), "");
 _Static_assert(__atomic_always_lock_free(8, &i64), "");
 
-void f(_Atomic(int) *i, _Atomic(int*) *p, _Atomic(float) *d,
-       int *I, int **P, float *D, struct S *s1, struct S *s2) {
+#define _AS1 __attribute__((address_space(1)))
+#define _AS2 __attribute__((address_space(2)))
+
+void f(_Atomic(int) *i, const _Atomic(int) *ci,
+       _Atomic(int*) *p, _Atomic(float) *d,
+       int *I, const int *CI,
+       int **P, float *D, struct S *s1, struct S *s2) {
   __c11_atomic_init(I, 5); // expected-error {{pointer to _Atomic}}
+  __c11_atomic_init(ci, 5); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
+
   __c11_atomic_load(0); // expected-error {{too few arguments to function}}
   __c11_atomic_load(0,0,0); // expected-error {{too many arguments to function}}
   __c11_atomic_store(0,0,0); // expected-error {{address argument to atomic builtin must be a pointer}}
   __c11_atomic_store((int*)0,0,0); // expected-error {{address argument to atomic operation must be a pointer to _Atomic}}
+  __c11_atomic_store(i, 0, memory_order_relaxed);
+  __c11_atomic_store(ci, 0, memory_order_relaxed); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
 
   __c11_atomic_load(i, memory_order_seq_cst);
   __c11_atomic_load(p, memory_order_seq_cst);
   __c11_atomic_load(d, memory_order_seq_cst);
+  __c11_atomic_load(ci, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
 
   int load_n_1 = __atomic_load_n(I, memory_order_relaxed);
   int *load_n_2 = __atomic_load_n(P, memory_order_relaxed);
   float load_n_3 = __atomic_load_n(D, memory_order_relaxed); // expected-error {{must be a pointer to integer or pointer}}
   __atomic_load_n(s1, memory_order_relaxed); // expected-error {{must be a pointer to integer or pointer}}
+  load_n_1 = __atomic_load_n(CI, memory_order_relaxed);
 
   __atomic_load(i, I, memory_order_relaxed); // expected-error {{must be a pointer to a trivially-copyable type}}
+  __atomic_load(CI, I, memory_order_relaxed);
+
   __atomic_load(I, i, memory_order_relaxed); // expected-warning {{passing '_Atomic(int) *' to parameter of type 'int *'}}
   __atomic_load(I, *P, memory_order_relaxed);
   __atomic_load(I, *P, memory_order_relaxed, 42); // expected-error {{too many arguments}}
   (int)__atomic_load(I, I, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
   __atomic_load(s1, s2, memory_order_acquire);
-
+  (void)__atomic_load(I, CI, memory_order_relaxed); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
   __c11_atomic_store(i, 1, memory_order_seq_cst);
   __c11_atomic_store(p, 1, memory_order_seq_cst); // expected-warning {{incompatible integer to pointer conversion}}
   (int)__c11_atomic_store(d, 1, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
 
   __atomic_store_n(I, 4, memory_order_release);
   __atomic_store_n(I, 4.0, memory_order_release);
+  __atomic_store_n(CI, 4, memory_order_release); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
   __atomic_store_n(I, P, memory_order_release); // expected-warning {{parameter of type 'int'}}
   __atomic_store_n(i, 1, memory_order_release); // expected-error {{must be a pointer to integer or pointer}}
   __atomic_store_n(s1, *s2, memory_order_release); // expected-error {{must be a pointer to integer or pointer}}
+  __atomic_store_n(I, I, memory_order_release); // expected-warning {{incompatible pointer to integer conversion passing 'int *' to parameter of type 'int'; dereference with *}}
 
   __atomic_store(I, *P, memory_order_release);
+  __atomic_store(CI, I, memory_order_release); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
   __atomic_store(s1, s2, memory_order_release);
   __atomic_store(i, I, memory_order_release); // expected-error {{trivially-copyable}}
 
@@ -131,6 +147,9 @@ void f(_Atomic(int) *i, _Atomic(int*) *p, _Atomic(float) *d,
   __atomic_exchange(s1, s2, s2, memory_order_seq_cst);
   __atomic_exchange(s1, I, P, memory_order_seq_cst); // expected-warning 2{{parameter of type 'struct S *'}}
   (int)__atomic_exchange(s1, s2, s2, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
+  __atomic_exchange(I, I, I, memory_order_seq_cst);
+  __atomic_exchange(CI, I, I, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
+  __atomic_exchange(I, I, CI, memory_order_seq_cst); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
 
   __c11_atomic_fetch_add(i, 1, memory_order_seq_cst);
   __c11_atomic_fetch_add(p, 1, memory_order_seq_cst);
@@ -155,14 +174,27 @@ void f(_Atomic(int) *i, _Atomic(int*) *p, _Atomic(float) *d,
   _Bool cmpexch_1 = __c11_atomic_compare_exchange_strong(i, 0, 1, memory_order_seq_cst, memory_order_seq_cst);
   _Bool cmpexch_2 = __c11_atomic_compare_exchange_strong(p, 0, (int*)1, memory_order_seq_cst, memory_order_seq_cst);
   _Bool cmpexch_3 = __c11_atomic_compare_exchange_strong(d, (int*)0, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{incompatible pointer types}}
+  (void)__c11_atomic_compare_exchange_strong(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
+
+  _Bool cmpexchw_1 = __c11_atomic_compare_exchange_weak(i, 0, 1, memory_order_seq_cst, memory_order_seq_cst);
+  _Bool cmpexchw_2 = __c11_atomic_compare_exchange_weak(p, 0, (int*)1, memory_order_seq_cst, memory_order_seq_cst);
+  _Bool cmpexchw_3 = __c11_atomic_compare_exchange_weak(d, (int*)0, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{incompatible pointer types}}
+  (void)__c11_atomic_compare_exchange_weak(i, CI, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
 
   _Bool cmpexch_4 = __atomic_compare_exchange_n(I, I, 5, 1, memory_order_seq_cst, memory_order_seq_cst);
   _Bool cmpexch_5 = __atomic_compare_exchange_n(I, P, 5, 0, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{; dereference with *}}
   _Bool cmpexch_6 = __atomic_compare_exchange_n(I, I, P, 0, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'int **' to parameter of type 'int'}}
+  (void)__atomic_compare_exchange_n(CI, I, 5, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
+  (void)__atomic_compare_exchange_n(I, CI, 5, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
 
   _Bool cmpexch_7 = __atomic_compare_exchange(I, I, 5, 1, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'int' to parameter of type 'int *'}}
   _Bool cmpexch_8 = __atomic_compare_exchange(I, P, I, 0, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{; dereference with *}}
   _Bool cmpexch_9 = __atomic_compare_exchange(I, I, I, 0, memory_order_seq_cst, memory_order_seq_cst);
+  (void)__atomic_compare_exchange(CI, I, I, 0, memory_order_seq_cst, memory_order_seq_cst); // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
+  (void)__atomic_compare_exchange(I, CI, I, 0, memory_order_seq_cst, memory_order_seq_cst); // expected-warning {{passing 'const int *' to parameter of type 'int *' discards qualifiers}}
+
+  // Pointers to different address spaces are allowed.
+  _Bool cmpexch_10 = __c11_atomic_compare_exchange_strong((_Atomic int _AS1 *)0x308, (int _AS2 *)0x309, 1, memory_order_seq_cst, memory_order_seq_cst);
 
   const volatile int flag_k = 0;
   volatile int flag = 0;
@@ -172,10 +204,9 @@ void f(_Atomic(int) *i, _Atomic(int*) *p, _Atomic(float) *d,
   __atomic_clear(&flag, memory_order_seq_cst);
   (int)__atomic_clear(&flag, memory_order_seq_cst); // expected-error {{operand of type 'void'}}
 
-  const _Atomic(int) const_atomic;
-  __c11_atomic_init(&const_atomic, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
-  __c11_atomic_store(&const_atomic, 0, memory_order_release); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
-  __c11_atomic_load(&const_atomic, memory_order_acquire); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
+  __c11_atomic_init(ci, 0); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
+  __c11_atomic_store(ci, 0, memory_order_release); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
+  __c11_atomic_load(ci, memory_order_acquire); // expected-error {{address argument to atomic operation must be a pointer to non-const _Atomic type ('const _Atomic(int) *' invalid)}}
 
   // Ensure the <stdatomic.h> macros behave appropriately.
   atomic_int n = ATOMIC_VAR_INIT(123);
diff --git a/test/Sema/attr-availability-app-extensions.c b/test/Sema/attr-availability-app-extensions.c
index a84709281938..8f9dcbc763d1 100644
--- a/test/Sema/attr-availability-app-extensions.c
+++ b/test/Sema/attr-availability-app-extensions.c
@@ -1,14 +1,24 @@
 // RUN: %clang_cc1 -triple x86_64-apple-macosx10.9.0 -fsyntax-only -fapplication-extension %s -verify
 // RUN: %clang_cc1 -triple armv7-apple-ios9.0 -fsyntax-only -fapplication-extension %s -verify
+// RUN: %clang_cc1 -triple arm64-apple-tvos3.0 -fsyntax-only -fapplication-extension -DTVOS=1 -verify %s
+// RUN: %clang_cc1 -triple arm64-apple-tvos3.0 -fsyntax-only -fapplication-extension -verify %s
 
 #if __has_feature(attribute_availability_app_extension)
  __attribute__((availability(macosx_app_extension,unavailable)))
+#ifndef TVOS
  __attribute__((availability(ios_app_extension,unavailable)))
+#else
+ __attribute__((availability(tvos_app_extension,unavailable)))
+#endif
 #endif
 void f0(int); // expected-note {{'f0' has been explicitly marked unavailable here}}
 
 __attribute__((availability(macosx,unavailable)))
+#ifndef TVOS
 __attribute__((availability(ios,unavailable)))
+#else
+  __attribute__((availability(tvos,unavailable)))
+#endif
 void f1(int); // expected-note {{'f1' has been explicitly marked unavailable here}}
 
 void test() {
diff --git a/test/Sema/attr-availability-tvos.c b/test/Sema/attr-availability-tvos.c
new file mode 100644
index 000000000000..b60fdb0d19cc
--- /dev/null
+++ b/test/Sema/attr-availability-tvos.c
@@ -0,0 +1,59 @@
+// RUN: %clang_cc1 "-triple" "x86_64-apple-tvos3.0" -fsyntax-only -verify %s
+
+void f0(int) __attribute__((availability(tvos,introduced=2.0,deprecated=2.1))); // expected-note {{'f0' has been explicitly marked deprecated here}}
+void f1(int) __attribute__((availability(tvos,introduced=2.1)));
+void f2(int) __attribute__((availability(tvos,introduced=2.0,deprecated=3.0))); // expected-note {{'f2' has been explicitly marked deprecated here}}
+void f3(int) __attribute__((availability(tvos,introduced=3.0)));
+void f4(int) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3,obsoleted=10.5), availability(tvos,introduced=2.0,deprecated=2.1,obsoleted=3.0))); // expected-note{{explicitly marked unavailable}}
+
+void f5(int) __attribute__((availability(tvos,introduced=2.0))) __attribute__((availability(tvos,deprecated=3.0))); // expected-note {{'f5' has been explicitly marked deprecated here}}
+void f6(int) __attribute__((availability(tvos,deprecated=3.0)));
+void f6(int) __attribute__((availability(tvos,introduced=2.0))); // expected-note {{'f6' has been explicitly marked deprecated here}}
+
+void test() {
+  f0(0); // expected-warning{{'f0' is deprecated: first deprecated in tvOS 2.1}}
+  f1(0);
+  f2(0); // expected-warning{{'f2' is deprecated: first deprecated in tvOS 3.0}}
+  f3(0);
+  f4(0); // expected-error{{f4' is unavailable: obsoleted in tvOS 3.0}}
+  f5(0); // expected-warning{{'f5' is deprecated: first deprecated in tvOS 3.0}}
+  f6(0); // expected-warning{{'f6' is deprecated: first deprecated in tvOS 3.0}}
+}
+
+// Anything iOS later than 8 does not apply to tvOS.
+void f9(int) __attribute__((availability(ios,introduced=2.0,deprecated=9.0)));
+
+void test_transcribed_availability() {
+  f9(0);
+}
+
+// Test tvOS specific attributes.
+void f0_tvos(int) __attribute__((availability(tvos,introduced=2.0,deprecated=2.1))); // expected-note {{'f0_tvos' has been explicitly marked deprecated here}}
+void f1_tvos(int) __attribute__((availability(tvos,introduced=2.1)));
+void f2_tvos(int) __attribute__((availability(tvos,introduced=2.0,deprecated=3.0))); // expected-note {{'f2_tvos' has been explicitly marked deprecated here}}
+void f3_tvos(int) __attribute__((availability(tvos,introduced=3.0)));
+void f4_tvos(int) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3,obsoleted=10.5), availability(tvos,introduced=2.0,deprecated=2.1,obsoleted=3.0))); // expected-note{{explicitly marked unavailable}}
+void f5_tvos(int) __attribute__((availability(tvos,introduced=2.0))) __attribute__((availability(ios,deprecated=3.0)));
+void f5_attr_reversed_tvos(int) __attribute__((availability(ios, deprecated=3.0))) __attribute__((availability(tvos,introduced=2.0)));
+void f5b_tvos(int) __attribute__((availability(tvos,introduced=2.0))) __attribute__((availability(tvos,deprecated=3.0))); // expected-note {{'f5b_tvos' has been explicitly marked deprecated here}}
+void f5c_tvos(int) __attribute__((availability(ios,introduced=2.0))) __attribute__((availability(ios,deprecated=3.0))); // expected-note {{'f5c_tvos' has been explicitly marked deprecated here}}
+void f6_tvos(int) __attribute__((availability(tvos,deprecated=3.0)));
+void f6_tvos(int) __attribute__((availability(tvos,introduced=2.0))); // expected-note {{'f6_tvos' has been explicitly marked deprecated here}}
+
+void test_tvos() {
+  f0_tvos(0); // expected-warning{{'f0_tvos' is deprecated: first deprecated in tvOS 2.1}}
+  f1_tvos(0);
+  f2_tvos(0); // expected-warning{{'f2_tvos' is deprecated: first deprecated in tvOS 3.0}}
+  f3_tvos(0);
+  f4_tvos(0); // expected-error{{'f4_tvos' is unavailable: obsoleted in tvOS 3.0}}
+  // We get no warning here because any explicit 'tvos' availability causes
+  // the ios availablity to not implicitly become 'tvos' availability.  Otherwise we'd get
+  // a deprecated warning.
+  f5_tvos(0); // no-warning
+  f5_attr_reversed_tvos(0); // no-warning
+  // We get a deprecated warning here because both attributes are explicitly 'tvos'.
+  f5b_tvos(0); // expected-warning {{'f5b_tvos' is deprecated: first deprecated in tvOS 3.0}}
+  // We get a deprecated warning here because both attributes are 'ios' (both get mapped to 'tvos').
+  f5c_tvos(0); // expected-warning {{'f5c_tvos' is deprecated: first deprecated in tvOS 3.0}}
+  f6_tvos(0); // expected-warning{{'f6_tvos' is deprecated: first deprecated in tvOS 3.0}}
+}
diff --git a/test/Sema/attr-availability-watchos.c b/test/Sema/attr-availability-watchos.c
new file mode 100644
index 000000000000..cb9f968bd6ab
--- /dev/null
+++ b/test/Sema/attr-availability-watchos.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 "-triple" "arm64-apple-watchos3.0" -fsyntax-only -verify %s
+
+void f0(int) __attribute__((availability(ios,introduced=2.0,deprecated=2.1))); // expected-note {{'f0' has been explicitly marked deprecated here}}
+void f1(int) __attribute__((availability(ios,introduced=2.1)));
+void f2(int) __attribute__((availability(ios,introduced=2.0,deprecated=3.0))); // expected-note {{'f2' has been explicitly marked deprecated here}}
+void f3(int) __attribute__((availability(ios,introduced=3.0)));
+void f4(int) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3,obsoleted=10.5), availability(ios,introduced=2.0,deprecated=2.1,obsoleted=3.0))); // expected-note{{explicitly marked unavailable}}
+void f5(int) __attribute__((availability(ios,introduced=2.0))) __attribute__((availability(ios,deprecated=3.0))); // expected-note {{'f5' has been explicitly marked deprecated here}}
+void f6(int) __attribute__((availability(ios,deprecated=12.1))); // OK - not deprecated for watchOS
+void f7(int) __attribute__((availability(ios,deprecated=8.3))); // expected-note {{'f7' has been explicitly marked deprecated here}}
+void f8(int) __attribute__((availability(ios,introduced=2.0,obsoleted=10.0))); // expected-note {{explicitly marked unavailable}}
+
+void test() {
+  f0(0); // expected-warning{{'f0' is deprecated: first deprecated in watchOS 2.0}}
+  f1(0);
+  f2(0); // expected-warning{{'f2' is deprecated: first deprecated in watchOS 2.0}}
+  f3(0);
+  f4(0); // expected-error {{f4' is unavailable: obsoleted in watchOS 2.0}}
+  f5(0); // expected-warning {{'f5' is deprecated: first deprecated in watchOS 2.0}}
+  f6(0);
+  f7(0); // expected-warning {{'f7' is deprecated: first deprecated in watchOS 2.0}}
+  f8(0); // expected-error {{'f8' is unavailable: obsoleted in watchOS 3.0}}
+}
+
+// Test watchOS specific attributes.
+void f0_watchos(int) __attribute__((availability(watchos,introduced=2.0,deprecated=2.1))); // expected-note {{'f0_watchos' has been explicitly marked deprecated here}}
+void f1_watchos(int) __attribute__((availability(watchos,introduced=2.1)));
+void f2_watchos(int) __attribute__((availability(watchos,introduced=2.0,deprecated=3.0))); // expected-note {{'f2_watchos' has been explicitly marked deprecated here}}
+void f3_watchos(int) __attribute__((availability(watchos,introduced=3.0)));
+void f4_watchos(int) __attribute__((availability(macosx,introduced=10.1,deprecated=10.3,obsoleted=10.5), availability(watchos,introduced=2.0,deprecated=2.1,obsoleted=3.0))); // expected-note{{explicitly marked unavailable}}
+void f5_watchos(int) __attribute__((availability(watchos,introduced=2.0))) __attribute__((availability(ios,deprecated=3.0)));
+void f5_attr_reversed_watchos(int) __attribute__((availability(ios, deprecated=3.0))) __attribute__((availability(watchos,introduced=2.0)));
+void f5b_watchos(int) __attribute__((availability(watchos,introduced=2.0))) __attribute__((availability(watchos,deprecated=3.0))); // expected-note {{'f5b_watchos' has been explicitly marked deprecated here}}
+void f5c_watchos(int) __attribute__((availability(ios,introduced=2.0))) __attribute__((availability(ios,deprecated=3.0))); // expected-note {{'f5c_watchos' has been explicitly marked deprecated here}}
+void f6_watchos(int) __attribute__((availability(watchos,deprecated=3.0)));
+void f6_watchos(int) __attribute__((availability(watchos,introduced=2.0))); // expected-note {{'f6_watchos' has been explicitly marked deprecated here}}
+
+void test_watchos() {
+  f0_watchos(0); // expected-warning{{'f0_watchos' is deprecated: first deprecated in watchOS 2.1}}
+  f1_watchos(0);
+  f2_watchos(0); // expected-warning{{'f2_watchos' is deprecated: first deprecated in watchOS 3.0}}
+  f3_watchos(0);
+  f4_watchos(0); // expected-error{{'f4_watchos' is unavailable: obsoleted in watchOS 3.0}}
+  // We get no warning here because any explicit 'watchos' availability causes
+  // the ios availablity to not implicitly become 'watchos' availability.  Otherwise we'd get
+  // a deprecated warning.
+  f5_watchos(0); // no-warning
+  f5_attr_reversed_watchos(0); // no-warning
+  // We get a deprecated warning here because both attributes are explicitly 'watchos'.
+  f5b_watchos(0); // expected-warning {{'f5b_watchos' is deprecated: first deprecated in watchOS 3.0}}
+  // We get a deprecated warning here because both attributes are 'ios' (both get mapped to 'watchos').
+  f5c_watchos(0); // expected-warning {{'f5c_watchos' is deprecated: first deprecated in watchOS 2.0}}
+  f6_watchos(0); // expected-warning {{'f6_watchos' is deprecated: first deprecated in watchOS 3.0}}
+}
diff --git a/test/Sema/attr-capabilities.c b/test/Sema/attr-capabilities.c
index 5bfbdacf508f..89967704865f 100644
--- a/test/Sema/attr-capabilities.c
+++ b/test/Sema/attr-capabilities.c
@@ -4,11 +4,15 @@ typedef int __attribute__((capability("role"))) ThreadRole;
 struct __attribute__((shared_capability("mutex"))) Mutex {};
 struct NotACapability {};
 
+// Put capability attributes on unions
+union __attribute__((capability("mutex"))) MutexUnion { int a; char* b; };
+typedef union { int a; char* b; } __attribute__((capability("mutex"))) MutexUnion2;
+
 // Test an invalid capability name
 struct __attribute__((capability("wrong"))) IncorrectName {}; // expected-warning {{invalid capability name 'wrong'; capability name must be 'mutex' or 'role'}}
 
-int Test1 __attribute__((capability("test1")));  // expected-error {{'capability' attribute only applies to structs and typedefs}}
-int Test2 __attribute__((shared_capability("test2"))); // expected-error {{'shared_capability' attribute only applies to structs and typedefs}}
+int Test1 __attribute__((capability("test1")));  // expected-error {{'capability' attribute only applies to structs, unions, and typedefs}}
+int Test2 __attribute__((shared_capability("test2"))); // expected-error {{'shared_capability' attribute only applies to structs, unions, and typedefs}}
 int Test3 __attribute__((acquire_capability("test3")));  // expected-warning {{'acquire_capability' attribute only applies to functions}}
 int Test4 __attribute__((try_acquire_capability("test4"))); // expected-error {{'try_acquire_capability' attribute only applies to functions}}
 int Test5 __attribute__((release_capability("test5"))); // expected-warning {{'release_capability' attribute only applies to functions}}
diff --git a/test/Sema/attr-coldhot.c b/test/Sema/attr-coldhot.c
index abadf889fc63..972f5a5266aa 100644
--- a/test/Sema/attr-coldhot.c
+++ b/test/Sema/attr-coldhot.c
@@ -6,5 +6,7 @@ int bar() __attribute__((__cold__));
 int var1 __attribute__((__cold__)); // expected-warning{{'__cold__' attribute only applies to functions}}
 int var2 __attribute__((__hot__)); // expected-warning{{'__hot__' attribute only applies to functions}}
 
-int qux() __attribute__((__hot__)) __attribute__((__cold__)); // expected-error{{'__hot__' and 'cold' attributes are not compatible}}
-int baz() __attribute__((__cold__)) __attribute__((__hot__)); // expected-error{{'__cold__' and 'hot' attributes are not compatible}}
+int qux() __attribute__((__hot__)) __attribute__((__cold__)); // expected-error{{'__hot__' and 'cold' attributes are not compatible}} \
+// expected-note{{conflicting attribute is here}}
+int baz() __attribute__((__cold__)) __attribute__((__hot__)); // expected-error{{'__cold__' and 'hot' attributes are not compatible}} \
+// expected-note{{conflicting attribute is here}}
diff --git a/test/Sema/attr-disable-tail-calls.c b/test/Sema/attr-disable-tail-calls.c
new file mode 100644
index 000000000000..4574d5e0b66b
--- /dev/null
+++ b/test/Sema/attr-disable-tail-calls.c
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+void __attribute__((disable_tail_calls,naked)) foo1(int a) { // expected-error {{'disable_tail_calls' and 'naked' attributes are not compatible}} expected-note {{conflicting attribute is here}}
+  __asm__("");
+}
+
+void __attribute__((naked,disable_tail_calls)) foo2(int a) { // expected-error {{'naked' and 'disable_tail_calls' attributes are not compatible}} expected-note {{conflicting attribute is here}}
+  __asm__("");
+}
+
+int g0 __attribute__((disable_tail_calls)); // expected-warning {{'disable_tail_calls' attribute only applies to functions and methods}}
+
+int foo3(int a) __attribute__((disable_tail_calls("abc"))); // expected-error {{'disable_tail_calls' attribute takes no arguments}}
diff --git a/test/Sema/attr-flag-enum.c b/test/Sema/attr-flag-enum.c
index a53c1dc8e7d1..ae3e3ad5ab99 100644
--- a/test/Sema/attr-flag-enum.c
+++ b/test/Sema/attr-flag-enum.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -verify -fsyntax-only -std=c11 -Wassign-enum %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -verify -fsyntax-only -std=c11 -Wassign-enum %s
 
 enum __attribute__((flag_enum)) flag {
   ea = 0x1,
diff --git a/test/Sema/attr-mode-vector-types.c b/test/Sema/attr-mode-vector-types.c
index 3893922a6a6d..15674412f156 100644
--- a/test/Sema/attr-mode-vector-types.c
+++ b/test/Sema/attr-mode-vector-types.c
@@ -9,6 +9,14 @@ typedef float __attribute__((mode(SF))) __attribute__((vector_size(256))) vec_t5
 typedef float __attribute__((mode(DF))) __attribute__((vector_size(256))) vec_t6;
 typedef float __attribute__((mode(XF))) __attribute__((vector_size(256))) vec_t7;
 
+typedef int v8qi __attribute__ ((mode(QI))) __attribute__ ((vector_size(8)));
+typedef int v8qi __attribute__ ((mode(V8QI)));
+// expected-warning@-1{{specifying vector types with the 'mode' attribute is deprecated; use the 'vector_size' attribute instead}}
+
+typedef float v4sf __attribute__((mode(V4SF)));
+// expected-warning@-1{{specifying vector types with the 'mode' attribute is deprecated; use the 'vector_size' attribute instead}}
+typedef float v4sf __attribute__((mode(SF)))  __attribute__ ((vector_size(16)));
+
 // Incorrect cases.
 typedef float __attribute__((mode(QC))) __attribute__((vector_size(256))) vec_t8;
 // expected-error@-1{{unsupported machine mode 'QC'}}
@@ -24,3 +32,5 @@ typedef float __attribute__((mode(DC))) __attribute__((vector_size(256))) vec_t1
 // expected-error@-2{{type of machine mode does not support base vector types}}
 typedef _Complex float __attribute__((mode(XC))) __attribute__((vector_size(256))) vec_t12;
 // expected-error@-1{{invalid vector element type '_Complex float'}}
+typedef int __attribute__((mode(V3QI))) v3qi;
+// expected-error@-1{{unknown machine mode 'V3QI'}}
diff --git a/test/Sema/attr-notail.c b/test/Sema/attr-notail.c
new file mode 100644
index 000000000000..4d05fcf6f2c0
--- /dev/null
+++ b/test/Sema/attr-notail.c
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+int callee0() __attribute__((not_tail_called,always_inline)); // expected-error{{'not_tail_called' and 'always_inline' attributes are not compatible}} \
+// expected-note{{conflicting attribute is here}}
+int callee1() __attribute__((always_inline,not_tail_called)); // expected-error{{'always_inline' and 'not_tail_called' attributes are not compatible}} \
+// expected-note{{conflicting attribute is here}}
+
+int foo(int a) {
+  return a ? callee0() : callee1();
+}
+
+int g0 __attribute__((not_tail_called)); // expected-warning {{'not_tail_called' attribute only applies to functions}}
+
+int foo2(int a) __attribute__((not_tail_called("abc"))); // expected-error {{'not_tail_called' attribute takes no arguments}}
diff --git a/test/Sema/attr-ownership.c b/test/Sema/attr-ownership.c
index d2e48c65a23c..f7969d4af93c 100644
--- a/test/Sema/attr-ownership.c
+++ b/test/Sema/attr-ownership.c
@@ -9,7 +9,7 @@ void f6(void) __attribute__((ownership_holds(foo, 1, 2, 3)));  // expected-error
 void f7(void) __attribute__((ownership_takes(foo)));  // expected-error {{'ownership_takes' attribute takes at least 2 arguments}}
 void f8(int *i, int *j, int k) __attribute__((ownership_holds(foo, 1, 2, 4)));  // expected-error {{'ownership_holds' attribute parameter 3 is out of bounds}}
 
-int f9 __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to functions}}
+int f9 __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
 
 void f10(int i) __attribute__((ownership_holds(foo, 1)));  // expected-error {{'ownership_holds' attribute only applies to pointer arguments}}
 void *f11(float i) __attribute__((ownership_returns(foo, 1)));  // expected-error {{'ownership_returns' attribute only applies to integer arguments}}
@@ -19,6 +19,8 @@ void f13(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__(
 void f14(int i, int j, int *k) __attribute__((ownership_holds(foo, 3))) __attribute__((ownership_takes(foo, 3)));  // expected-error {{'ownership_holds' and 'ownership_takes' attributes are not compatible}}
 
 void f15(int, int)
-  __attribute__((ownership_returns(foo, 1)))  // expected-note {{declared with index 1 here}}
-  __attribute__((ownership_returns(foo, 2))); // expected-error {{'ownership_returns' attribute index does not match; here it is 2}}
-void f16(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__((ownership_holds(foo, 1))); // OK, same index
+  __attribute__((ownership_returns(foo, 1)))  // expected-note {{declared with index 1 here}}
+  __attribute__((ownership_returns(foo, 2))); // expected-error {{'ownership_returns' attribute index does not match; here it is 2}}
+void f16(int *i, int *j) __attribute__((ownership_holds(foo, 1))) __attribute__((ownership_holds(foo, 1))); // OK, same index
+void f17(void*) __attribute__((ownership_takes(__, 1)));
+void f18() __attribute__((ownership_takes(foo, 1)));  // expected-warning {{'ownership_takes' attribute only applies to non-K&R-style functions}}
diff --git a/test/Sema/auto-type.c b/test/Sema/auto-type.c
new file mode 100644
index 000000000000..9fadb90c2cda
--- /dev/null
+++ b/test/Sema/auto-type.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -fsyntax-only -verify -pedantic -std=c11
+
+__auto_type a = 5; // expected-warning {{'__auto_type' is a GNU extension}}
+__extension__ __auto_type a1 = 5;
+#pragma clang diagnostic ignored "-Wgnu-auto-type"
+__auto_type b = 5.0;
+__auto_type c = &b;
+__auto_type d = (struct {int a;}) {5};
+_Static_assert(__builtin_types_compatible_p(__typeof(a), int), "");
+__auto_type e = e; // expected-error {{variable 'e' declared with '__auto_type' type cannot appear in its own initializer}}
+
+struct s { __auto_type a; }; // expected-error {{'__auto_type' not allowed in struct member}}
+
+__auto_type f = 1, g = 1.0; // expected-error {{'__auto_type' deduced as 'int' in declaration of 'f' and deduced as 'double' in declaration of 'g'}}
+
+__auto_type h() {} // expected-error {{'__auto_type' not allowed in function return type}}
+
+int i() {
+  struct bitfield { int field:2; };
+  __auto_type j = (struct bitfield){1}.field; // expected-error {{cannot pass bit-field as __auto_type initializer in C}}
+
+}
+
+int k(l)
+__auto_type l; // expected-error {{'__auto_type' not allowed in K&R-style function parameter}}
+{}
diff --git a/test/Sema/bitfield.c b/test/Sema/bitfield.c
index f214b671f6f9..810dc798eaa5 100644
--- a/test/Sema/bitfield.c
+++ b/test/Sema/bitfield.c
@@ -6,7 +6,7 @@ struct a {
   int a : -1; // expected-error{{bit-field 'a' has negative width}}
 
   // rdar://6081627
-  int b : 33; // expected-error{{size of bit-field 'b' (33 bits) exceeds size of its type (32 bits)}}
+  int b : 33; // expected-error{{width of bit-field 'b' (33 bits) exceeds width of its type (32 bits)}}
 
   int c : (1 + 0.25); // expected-error{{expression is not an integer constant expression}}
   int d : (int)(1 + 0.25); 
@@ -22,9 +22,12 @@ struct a {
   int g : (_Bool)1;
   
   // PR4017  
-  char : 10;      // expected-error {{size of anonymous bit-field (10 bits) exceeds size of its type (8 bits)}}
+  char : 10;      // expected-error {{width of anonymous bit-field (10 bits) exceeds width of its type (8 bits)}}
   unsigned : -2;  // expected-error {{anonymous bit-field has negative width (-2)}}
   float : 12;     // expected-error {{anonymous bit-field has non-integral type 'float'}}
+
+  _Bool : 2;   // expected-error {{width of anonymous bit-field (2 bits) exceeds width of its type (1 bit)}}
+  _Bool h : 5; // expected-error {{width of bit-field 'h' (5 bits) exceeds width of its type (1 bit)}}
 };
 
 struct b {unsigned x : 2;} x;
@@ -60,7 +63,8 @@ typedef unsigned Unsigned;
 typedef signed Signed;
 
 struct Test5 { unsigned n : 2; } t5;
-typedef __typeof__(t5.n) Unsigned; // Bitfield is unsigned
+// Bitfield is unsigned
+struct Test5 sometest5 = {-1}; // expected-warning {{implicit truncation from 'int' to bitfield changes value from -1 to 3}}
 typedef __typeof__(+t5.n) Signed;  // ... but promotes to signed.
 
 typedef __typeof__(t5.n + 0) Signed; // Arithmetic promotes.
diff --git a/test/Sema/bool-compare.c b/test/Sema/bool-compare.c
index 3f1f286a2a38..923ff4208e8b 100644
--- a/test/Sema/bool-compare.c
+++ b/test/Sema/bool-compare.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-logical-not-parentheses
 
 
 void f(int x, int y, int z) {
diff --git a/test/Sema/builtin-longjmp.c b/test/Sema/builtin-longjmp.c
index 5ed393e591c9..fdfbcf861dda 100644
--- a/test/Sema/builtin-longjmp.c
+++ b/test/Sema/builtin-longjmp.c
@@ -4,7 +4,6 @@
 // RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm < %s| FileCheck %s
 // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -emit-llvm < %s| FileCheck %s
 
-// RUN: %clang_cc1 -triple arm-unknown-unknown -emit-llvm-only -verify %s
 // RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm-only -verify %s
 // RUN: %clang_cc1 -triple mips-unknown-unknown -emit-llvm-only -verify %s
 // RUN: %clang_cc1 -triple mips64-unknown-unknown -emit-llvm-only -verify %s
diff --git a/test/Sema/builtins-arm.c b/test/Sema/builtins-arm.c
index 37604dc8bd43..39cb2fa2962c 100644
--- a/test/Sema/builtins-arm.c
+++ b/test/Sema/builtins-arm.c
@@ -46,3 +46,37 @@ void test4() {
 void test5() {
   __builtin_arm_dbg(16); // expected-error {{argument should be a value from 0 to 15}}
 }
+
+void test6(int a, int b, int c) {
+  __builtin_arm_mrc( a, 0, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mrc' must be a constant integer}}
+  __builtin_arm_mrc(15, a, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mrc' must be a constant integer}}
+  __builtin_arm_mrc(15, 0,  a, 0, 3); // expected-error {{argument to '__builtin_arm_mrc' must be a constant integer}}
+  __builtin_arm_mrc(15, 0, 13, a, 3); // expected-error {{argument to '__builtin_arm_mrc' must be a constant integer}}
+  __builtin_arm_mrc(15, 0, 13, 0, a); // expected-error {{argument to '__builtin_arm_mrc' must be a constant integer}}
+
+  __builtin_arm_mrc2( a, 0, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mrc2' must be a constant integer}}
+  __builtin_arm_mrc2(15, a, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mrc2' must be a constant integer}}
+  __builtin_arm_mrc2(15, 0,  a, 0, 3); // expected-error {{argument to '__builtin_arm_mrc2' must be a constant integer}}
+  __builtin_arm_mrc2(15, 0, 13, a, 3); // expected-error {{argument to '__builtin_arm_mrc2' must be a constant integer}}
+  __builtin_arm_mrc2(15, 0, 13, 0, a); // expected-error {{argument to '__builtin_arm_mrc2' must be a constant integer}}
+
+  __builtin_arm_mcr( a, 0, b, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mcr' must be a constant integer}}
+  __builtin_arm_mcr(15, a, b, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mcr' must be a constant integer}}
+  __builtin_arm_mcr(15, 0, b,  a, 0, 3); // expected-error {{argument to '__builtin_arm_mcr' must be a constant integer}}
+  __builtin_arm_mcr(15, 0, b, 13, a, 3); // expected-error {{argument to '__builtin_arm_mcr' must be a constant integer}}
+  __builtin_arm_mcr(15, 0, b, 13, 0, a); // expected-error {{argument to '__builtin_arm_mcr' must be a constant integer}}
+
+  __builtin_arm_mcr2( a, 0, b, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mcr2' must be a constant integer}}
+  __builtin_arm_mcr2(15, a, b, 13, 0, 3); // expected-error {{argument to '__builtin_arm_mcr2' must be a constant integer}}
+  __builtin_arm_mcr2(15, 0, b,  a, 0, 3); // expected-error {{argument to '__builtin_arm_mcr2' must be a constant integer}}
+  __builtin_arm_mcr2(15, 0, b, 13, a, 3); // expected-error {{argument to '__builtin_arm_mcr2' must be a constant integer}}
+  __builtin_arm_mcr2(15, 0, b, 13, 0, a); // expected-error {{argument to '__builtin_arm_mcr2' must be a constant integer}}
+
+  __builtin_arm_mcrr( a, 0, b, c, 0); // expected-error {{argument to '__builtin_arm_mcrr' must be a constant integer}}
+  __builtin_arm_mcrr(15, a, b, c, 0); // expected-error {{argument to '__builtin_arm_mcrr' must be a constant integer}}
+  __builtin_arm_mcrr(15, 0, b, c, a); // expected-error {{argument to '__builtin_arm_mcrr' must be a constant integer}}
+
+  __builtin_arm_mcrr2( a, 0, b, c, 0); // expected-error {{argument to '__builtin_arm_mcrr2' must be a constant integer}}
+  __builtin_arm_mcrr2(15, a, b, c, 0); // expected-error {{argument to '__builtin_arm_mcrr2' must be a constant integer}}
+  __builtin_arm_mcrr2(15, 0, b, c, a); // expected-error {{argument to '__builtin_arm_mcrr2' must be a constant integer}}
+}
diff --git a/test/Sema/builtins-overflow.c b/test/Sema/builtins-overflow.c
new file mode 100644
index 000000000000..e2c07f08ec9f
--- /dev/null
+++ b/test/Sema/builtins-overflow.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#if __has_feature(__builtin_add_overflow)
+#warning defined as expected
+// expected-warning@-1 {{defined as expected}}
+#endif
+
+void test(void) {
+  unsigned r;
+  const char * c;
+  float f;
+  const unsigned q;
+
+  __builtin_add_overflow();  // expected-error {{too few arguments to function call, expected 3, have 0}}
+  __builtin_add_overflow(1, 1, 1, 1);  // expected-error {{too many arguments to function call, expected 3, have 4}}
+
+  __builtin_add_overflow(c, 1, &r);  // expected-error {{operand argument to overflow builtin must be an integer ('const char *' invalid)}}
+  __builtin_add_overflow(1, c, &r);  // expected-error {{operand argument to overflow builtin must be an integer ('const char *' invalid)}}
+  __builtin_add_overflow(1, 1, 3);  // expected-error {{result argument to overflow builtin must be a pointer to a non-const integer ('int' invalid)}}
+  __builtin_add_overflow(1, 1, &f);  // expected-error {{result argument to overflow builtin must be a pointer to a non-const integer ('float *' invalid)}}
+  __builtin_add_overflow(1, 1, &q);  // expected-error {{result argument to overflow builtin must be a pointer to a non-const integer ('const unsigned int *' invalid)}}
+}
diff --git a/test/Sema/callingconv-iamcu.c b/test/Sema/callingconv-iamcu.c
new file mode 100644
index 000000000000..b66320ecf83b
--- /dev/null
+++ b/test/Sema/callingconv-iamcu.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 %s -fsyntax-only -triple i686-intel-elfiamcu -verify
+
+void __attribute__((fastcall)) foo(float *a) { // expected-warning {{calling convention 'fastcall' ignored for this target}}
+}
+
+void __attribute__((stdcall)) bar(float *a) { // expected-warning {{calling convention 'stdcall' ignored for this target}}
+}
+
+void __attribute__((fastcall(1))) baz(float *a) { // expected-error {{'fastcall' attribute takes no arguments}}
+}
+
+void __attribute__((fastcall)) test2(int a, ...) { // expected-warning {{calling convention 'fastcall' ignored for this target}}
+}
+void __attribute__((stdcall)) test3(int a, ...) { // expected-warning {{calling convention 'stdcall' ignored for this target}}
+}
+void __attribute__((thiscall)) test4(int a, ...) { // expected-warning {{calling convention 'thiscall' ignored for this target}}
+}
+
+void __attribute__((cdecl)) ctest0() {}
+
+void __attribute__((cdecl(1))) ctest1(float x) {} // expected-error {{'cdecl' attribute takes no arguments}}
+
+void (__attribute__((fastcall)) *pfoo)(float*) = foo; // expected-warning {{calling convention 'fastcall' ignored for this target}}
+
+void (__attribute__((stdcall)) *pbar)(float*) = bar; // expected-warning {{calling convention 'stdcall' ignored for this target}}
+
+void (*pctest0)() = ctest0;
+
+void ctest2() {}
+void (__attribute__((cdecl)) *pctest2)() = ctest2;
+
+typedef void (__attribute__((fastcall)) *Handler) (float *); // expected-warning {{calling convention 'fastcall' ignored for this target}}
+Handler H = foo;
+
+int __attribute__((pcs("aapcs", "aapcs"))) pcs1(void); // expected-error {{'pcs' attribute takes one argument}}
+int __attribute__((pcs())) pcs2(void); // expected-error {{'pcs' attribute takes one argument}}
+int __attribute__((pcs(pcs1))) pcs3(void); // expected-error {{'pcs' attribute requires a string}} \
+                                           // expected-error {{invalid PCS type}}
+int __attribute__((pcs(0))) pcs4(void); // expected-error {{'pcs' attribute requires a string}}
+/* These are ignored because the target is i386 and not ARM */
+int __attribute__((pcs("aapcs"))) pcs5(void); // expected-warning {{calling convention 'pcs' ignored for this target}}
+int __attribute__((pcs("aapcs-vfp"))) pcs6(void); // expected-warning {{calling convention 'pcs' ignored for this target}}
+int __attribute__((pcs("foo"))) pcs7(void); // expected-error {{invalid PCS type}}
+
+void ctest3();
+void __attribute__((cdecl)) ctest3() {}
+
+typedef __attribute__((stdcall)) void (*PROC)(); // expected-warning {{calling convention 'stdcall' ignored for this target}}
+PROC __attribute__((cdecl)) ctest4(const char *x) {}
+
+void __attribute__((intel_ocl_bicc)) inteloclbifunc(float *a) {} // expected-warning {{calling convention 'intel_ocl_bicc' ignored for this target}}
+
+struct type_test {} __attribute__((stdcall)); // expected-warning {{calling convention 'stdcall' ignored for this target}} expected-warning {{'stdcall' attribute only applies to functions and methods}}
diff --git a/test/Sema/const-eval.c b/test/Sema/const-eval.c
index 5f5b6f3dc198..bfb58bc573e1 100644
--- a/test/Sema/const-eval.c
+++ b/test/Sema/const-eval.c
@@ -118,10 +118,6 @@ float varfloat;
 const float constfloat = 0;
 EVAL_EXPR(43, varfloat && constfloat) // expected-error {{must have a constant size}}
 
-// <rdar://problem/11205586>
-// (Make sure we continue to reject this.)
-EVAL_EXPR(44, "x"[0]); // expected-error {{variable length array}}
-
 // <rdar://problem/10962435>
 EVAL_EXPR(45, ((char*)-1) + 1 == 0 ? 1 : -1)
 EVAL_EXPR(46, ((char*)-1) + 1 < (char*) -1 ? 1 : -1)
@@ -137,3 +133,7 @@ EVAL_EXPR(51, 0 != (float)1e99)
 
 // PR21945
 void PR21945() { int i = (({}), 0l); }
+
+void PR24622();
+struct PR24622 {} pr24622;
+EVAL_EXPR(52, &pr24622 == (void *)&PR24622); // expected-error {{must have a constant size}}
diff --git a/test/Sema/dllexport.c b/test/Sema/dllexport.c
index 69aad2eb54c0..56c9e74225f2 100644
--- a/test/Sema/dllexport.c
+++ b/test/Sema/dllexport.c
@@ -109,6 +109,11 @@ __declspec(dllexport) void redecl6(); // expected-warning{{redeclaration of 'red
 // External linkage is required.
 __declspec(dllexport) static int staticFunc(); // expected-error{{'staticFunc' must have external linkage when declared 'dllexport'}}
 
+// Static locals don't count as having external linkage.
+void staticLocalFunc() {
+  __declspec(dllexport) static int staticLocal; // expected-error{{'staticLocal' must have external linkage when declared 'dllexport'}}
+}
+
 
 
 //===----------------------------------------------------------------------===//
diff --git a/test/Sema/dllimport.c b/test/Sema/dllimport.c
index d81aecd604c7..f863499cf840 100644
--- a/test/Sema/dllimport.c
+++ b/test/Sema/dllimport.c
@@ -168,3 +168,8 @@ __declspec(dllimport) inline void redecl7() {}
 
 // External linkage is required.
 __declspec(dllimport) static int staticFunc(); // expected-error{{'staticFunc' must have external linkage when declared 'dllimport'}}
+
+// Static locals don't count as having external linkage.
+void staticLocalFunc() {
+  __declspec(dllimport) static int staticLocal; // expected-error{{'staticLocal' must have external linkage when declared 'dllimport'}}
+}
diff --git a/test/Sema/enable_if.c b/test/Sema/enable_if.c
index 7faae43b577e..0cd9c48f42b5 100644
--- a/test/Sema/enable_if.c
+++ b/test/Sema/enable_if.c
@@ -5,6 +5,8 @@
 typedef int mode_t;
 typedef unsigned long size_t;
 
+const int TRUE = 1;
+
 int open(const char *pathname, int flags) __attribute__((enable_if(!(flags & O_CREAT), "must specify mode when using O_CREAT"))) __attribute__((overloadable));  // expected-note{{candidate disabled: must specify mode when using O_CREAT}}
 int open(const char *pathname, int flags, mode_t mode) __attribute__((overloadable));  // expected-note{{candidate function not viable: requires 3 arguments, but 2 were provided}}
 
@@ -91,6 +93,12 @@ void test4(int c) {
 #endif
 }
 
+void test5() {
+  int (*p1)(int) = &isdigit2;
+  int (*p2)(int) = isdigit2;
+  void *p3 = (void *)&isdigit2;
+  void *p4 = (void *)isdigit2;
+}
 
 #ifndef CODEGEN
 __attribute__((enable_if(n == 0, "chosen when 'n' is zero"))) void f1(int n); // expected-error{{use of undeclared identifier 'n'}}
@@ -109,4 +117,29 @@ void f(int n) __attribute__((enable_if(global == 0, "chosen when 'global' is zer
 const int cst = 7;
 void return_cst(void) __attribute__((overloadable)) __attribute__((enable_if(cst == 7, "chosen when 'cst' is 7")));
 void test_return_cst() { return_cst(); }
+
+void f2(void) __attribute__((overloadable)) __attribute__((enable_if(1, "always chosen")));
+void f2(void) __attribute__((overloadable)) __attribute__((enable_if(0, "never chosen")));
+void f2(void) __attribute__((overloadable)) __attribute__((enable_if(TRUE, "always chosen #2")));
+void test6() {
+  void (*p1)(void) = &f2; // expected-error{{initializing 'void (*)(void)' with an expression of incompatible type '<overloaded function type>'}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}}
+  void (*p2)(void) = f2; // expected-error{{initializing 'void (*)(void)' with an expression of incompatible type '<overloaded function type>'}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}}
+  void *p3 = (void*)&f2; // expected-error{{address of overloaded function 'f2' is ambiguous}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}}
+  void *p4 = (void*)f2; // expected-error{{address of overloaded function 'f2' is ambiguous}} expected-note@121{{candidate function}} expected-note@122{{candidate function made ineligible by enable_if}} expected-note@123{{candidate function}}
+}
+
+void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m >= 0, "positive")));
+void f3(int m) __attribute__((overloadable)) __attribute__((enable_if(m < 0, "negative")));
+void test7() {
+  void (*p1)(int) = &f3; // expected-error{{initializing 'void (*)(int)' with an expression of incompatible type '<overloaded function type>'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}}
+  void (*p2)(int) = f3; // expected-error{{initializing 'void (*)(int)' with an expression of incompatible type '<overloaded function type>'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}}
+  void *p3 = (void*)&f3; // expected-error{{address of overloaded function 'f3' does not match required type 'void'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}}
+  void *p4 = (void*)f3; // expected-error{{address of overloaded function 'f3' does not match required type 'void'}} expected-note@131{{candidate function made ineligible by enable_if}} expected-note@132{{candidate function made ineligible by enable_if}}
+}
+
+void f4(int m) __attribute__((enable_if(0, "")));
+void test8() {
+  void (*p1)(int) = &f4; // expected-error{{cannot take address of function 'f4' becuase it has one or more non-tautological enable_if conditions}}
+  void (*p2)(int) = f4; // expected-error{{cannot take address of function 'f4' becuase it has one or more non-tautological enable_if conditions}}
+}
 #endif
diff --git a/test/Sema/enum.c b/test/Sema/enum.c
index fc2b491f5c30..3546bfe48fc6 100644
--- a/test/Sema/enum.c
+++ b/test/Sema/enum.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -pedantic
+// RUN: %clang_cc1 -triple %itanium_abi_triple %s -fsyntax-only -verify -pedantic
 enum e {A, 
         B = 42LL << 32,        // expected-warning {{ISO C restricts enumerator values to range of 'int'}}
       C = -4, D = 12456 };
@@ -119,3 +119,7 @@ void crash(enum E* e) // expected-warning {{declaration of 'enum E' will not be
 
 typedef enum { NegativeShort = (short)-1 } NegativeShortEnum;
 int NegativeShortTest[NegativeShort == -1 ? 1 : -1];
+
+// PR24610
+enum Color { Red, Green, Blue }; // expected-note{{previous use is here}}
+typedef struct Color NewColor; // expected-error {{use of 'Color' with tag type that does not match previous declaration}}
diff --git a/test/Sema/exprs.c b/test/Sema/exprs.c
index 17b1aa2851b0..5da4f949a150 100644
--- a/test/Sema/exprs.c
+++ b/test/Sema/exprs.c
@@ -97,6 +97,7 @@ int test9(struct f *P) {
   R = __alignof(P->x);  // expected-error {{invalid application of 'alignof' to bit-field}}
   R = __alignof(P->y);   // ok.
   R = sizeof(P->x); // expected-error {{invalid application of 'sizeof' to bit-field}}
+  __extension__ ({ R = (__typeof__(P->x)) 2; }); // expected-error {{invalid application of 'typeof' to bit-field}}
   return R;
 }
 
diff --git a/test/Sema/ext_vector_casts.c b/test/Sema/ext_vector_casts.c
index 949d67311b36..924013aeb29d 100644
--- a/test/Sema/ext_vector_casts.c
+++ b/test/Sema/ext_vector_casts.c
@@ -20,8 +20,8 @@ static void test() {
     int *ptr;
     int i;
 
-    vec3 += vec2; // expected-error {{can't convert between vector values of different size}}
-    vec4 += vec3; // expected-error {{can't convert between vector values of different size}}
+    vec3 += vec2; // expected-error {{cannot convert between vector values of different size}}
+    vec4 += vec3; // expected-error {{cannot convert between vector values of different size}}
     
     vec4 = 5.0f;
     vec4 = (float4)5.0f;
@@ -44,11 +44,11 @@ static void test() {
      vec4 /= 5.2f;
      vec4 %= 4; // expected-error {{invalid operands to binary expression ('float4' (vector of 4 'float' values) and 'int')}}
     ivec4 %= 4;
-    ivec4 += vec4; // expected-error {{can't convert between vector values of different size ('int4' (vector of 4 'int' values) and 'float4' (vector of 4 'float' values))}}
+    ivec4 += vec4; // expected-error {{cannot convert between vector values of different size ('int4' (vector of 4 'int' values) and 'float4' (vector of 4 'float' values))}}
     ivec4 += (int4)vec4;
     ivec4 -= ivec4;
     ivec4 |= ivec4;
-    ivec4 += ptr; // expected-error {{can't convert between vector and non-scalar values ('int4' (vector of 4 'int' values) and 'int *')}}
+    ivec4 += ptr; // expected-error {{cannot convert between vector and non-scalar values ('int4' (vector of 4 'int' values) and 'int *')}}
 }
 
 typedef __attribute__(( ext_vector_type(2) )) float2 vecfloat2; // expected-error{{invalid vector element type 'float2' (vector of 2 'float' values)}}
@@ -102,11 +102,11 @@ static void splats(int i, long l, __uint128_t t, float f, double d) {
   vs = 65536 + vs; // expected-warning {{implicit conversion from 'int' to 'short8' (vector of 8 'short' values) changes value from 65536 to 0}}
   vs = vs + i; // expected-warning {{implicit conversion loses integer precision}}
   vs = vs + 1;
-  vs = vs + 1.f; // expected-error {{can't convert between vector values of different size}}
+  vs = vs + 1.f; // expected-error {{cannot convert between vector values of different size}}
   
   vi = l + vi; // expected-warning {{implicit conversion loses integer precision}}
   vi = 1 + vi;
-  vi = vi + 2.0; // expected-error {{can't convert between vector values of different size}}
+  vi = vi + 2.0; // expected-error {{cannot convert between vector values of different size}}
   vi = vi + 0xffffffff; // expected-warning {{implicit conversion changes signedness}}
   
   vl = l + vl; // expected-warning {{implicit conversion changes signedness}}
diff --git a/test/Sema/ext_vector_conversions.c b/test/Sema/ext_vector_conversions.c
new file mode 100644
index 000000000000..aa57e2b17ea2
--- /dev/null
+++ b/test/Sema/ext_vector_conversions.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos10.7.0 -fsyntax-only -verify -Wconversion %s
+
+typedef __attribute__((ext_vector_type(4))) char char4;
+typedef __attribute__((ext_vector_type(4))) short short4;
+typedef __attribute__((ext_vector_type(1))) float float1;
+
+static void test() {
+  char4 vc4;
+  float f;
+  // Not allowed.  There's no splatting conversion between float and int vector,
+  // and we don't want to bitcast f to vector-of-char (as would happen with the
+  // old-style vector types).
+  vc4 += f; // expected-error {{cannot convert between vector values of different size}}
+  short4 vs4;
+  long long ll;
+  // This one is OK; we don't re-interpret ll as short4, rather we splat its
+  // value, which should produce a warning about clamping.
+  vs4 += ll; // expected-warning {{implicit conversion loses integer precision}}
+}
diff --git a/test/Sema/fn-ptr-as-fn-prototype.c b/test/Sema/fn-ptr-as-fn-prototype.c
index 4b01b1316e10..0422f2b86ea9 100644
--- a/test/Sema/fn-ptr-as-fn-prototype.c
+++ b/test/Sema/fn-ptr-as-fn-prototype.c
@@ -7,7 +7,7 @@
 
 // CHECK: typedef void (*g)();
 typedef void (*g) ();
-// CHECK: enum {
+// CHECK: enum
 enum {
   k = -1
 };
diff --git a/test/Sema/function-redecl.c b/test/Sema/function-redecl.c
index 561f7fae6b90..eb6e78595ae5 100644
--- a/test/Sema/function-redecl.c
+++ b/test/Sema/function-redecl.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -verify %s
 
 // PR3588
 void g0(int, int);
diff --git a/test/Sema/generic-selection.c b/test/Sema/generic-selection.c
index 8cef975c709c..0563ec0f4fc0 100644
--- a/test/Sema/generic-selection.c
+++ b/test/Sema/generic-selection.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -std=c1x -fsyntax-only -verify %s
 
+void g(void);
+
 void foo(int n) {
   (void) _Generic(0,
       struct A: 0, // expected-error {{type 'struct A' in generic association incomplete}}
@@ -23,4 +25,10 @@ void foo(int n) {
   int a4[_Generic(0L, default: 1, short: 2, float: 3, int: 4) == 1 ? 1 : -1];
   int a5[_Generic(0, int: 1, short: 2, float: 3) == 1 ? 1 : -1];
   int a6[_Generic(0, short: 1, float: 2, int: 3) == 3 ? 1 : -1];
+
+  int a7[_Generic("test", char *: 1, default: 2) == 1 ? 1 : -1];
+  int a8[_Generic(g, void (*)(void): 1, default: 2) == 1 ? 1 : -1];
+
+  const int i = 12;
+  int a9[_Generic(i, int: 1, default: 2) == 1 ? 1 : -1];
 }
diff --git a/test/Sema/inline-asm-validate-amdgpu.cl b/test/Sema/inline-asm-validate-amdgpu.cl
new file mode 100644
index 000000000000..d60b09e0ce9d
--- /dev/null
+++ b/test/Sema/inline-asm-validate-amdgpu.cl
@@ -0,0 +1,14 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -x cl -triple amdgcn -fsyntax-only  %s
+// expected-no-diagnostics
+
+kernel void test () {
+
+  int sgpr = 0, vgpr = 0, imm = 0;
+
+  // sgpr constraints
+  __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );
+
+  // vgpr constraints
+  __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );
+}
diff --git a/test/Sema/inline-asm-validate-x86.c b/test/Sema/inline-asm-validate-x86.c
index 658b714056f8..f21ef6940a6d 100644
--- a/test/Sema/inline-asm-validate-x86.c
+++ b/test/Sema/inline-asm-validate-x86.c
@@ -52,6 +52,32 @@ void K(int i, int j) {
           : "0"(i), "K"(96)); // expected-no-error
 }
 
+void L(int i, int j) {
+  static const int Invalid1 = 1;
+  static const int Invalid2 = 42;
+  static const int Valid1 = 0xff;
+  static const int Valid2 = 0xffff;
+  static const int Valid3 = 0xffffffff;
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(j)); // expected-error{{constraint 'L' expects an integer constant expression}}
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(Invalid1)); // expected-error{{value '1' out of range for constraint 'L'}}
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(Invalid2)); // expected-error{{value '42' out of range for constraint 'L'}}
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(Valid1)); // expected-no-error
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(Valid2)); // expected-no-error
+  __asm__("xorl %0,%2"
+          : "=r"(i)
+          : "0"(i), "L"(Valid3)); // expected-no-error
+}
+
 void M(int i, int j) {
   static const int BelowMin = -1;
   static const int AboveMax = 4;
diff --git a/test/Sema/inline.c b/test/Sema/inline.c
index 8a3835b71ada..eced058f8ddc 100644
--- a/test/Sema/inline.c
+++ b/test/Sema/inline.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -verify %s
 
 #if defined(INCLUDE)
 // -------
diff --git a/test/Sema/internal_linkage.c b/test/Sema/internal_linkage.c
new file mode 100644
index 000000000000..f4deccca63d1
--- /dev/null
+++ b/test/Sema/internal_linkage.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+int var __attribute__((internal_linkage));
+int var2 __attribute__((internal_linkage,common)); // expected-error{{'internal_linkage' and 'common' attributes are not compatible}} \
+                                                   // expected-note{{conflicting attribute is here}}
+int var3 __attribute__((common,internal_linkage)); // expected-error{{'common' and 'internal_linkage' attributes are not compatible}} \
+                                                   // expected-note{{conflicting attribute is here}}
+
+int var4 __attribute__((common)); // expected-error{{'common' and 'internal_linkage' attributes are not compatible}} \
+// expected-note{{previous definition is here}}
+int var4 __attribute__((internal_linkage)); // expected-note{{conflicting attribute is here}} \
+// expected-error{{'internal_linkage' attribute does not appear on the first declaration of 'var4'}}
+
+int var5 __attribute__((internal_linkage)); // expected-error{{'internal_linkage' and 'common' attributes are not compatible}}
+int var5 __attribute__((common)); // expected-note{{conflicting attribute is here}}
+
+__attribute__((internal_linkage)) int f() {}
+struct __attribute__((internal_linkage)) S { // expected-warning{{'internal_linkage' attribute only applies to variables and functions}}
+};
+
+__attribute__((internal_linkage("foo"))) int g() {} // expected-error{{'internal_linkage' attribute takes no arguments}}
diff --git a/test/Sema/mips-interrupt-attr.c b/test/Sema/mips-interrupt-attr.c
new file mode 100644
index 000000000000..17344b6edcf0
--- /dev/null
+++ b/test/Sema/mips-interrupt-attr.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 %s -triple mips-img-elf -verify -fsyntax-only
+struct a { int b; };
+
+struct a test __attribute__((interrupt)); // expected-warning {{'interrupt' attribute only applies to functions and methods}}
+
+__attribute__((interrupt("EIC"))) void foo1() {} // expected-warning {{'interrupt' attribute argument not supported: 'EIC'}}
+
+__attribute__((interrupt("eic", 1))) void foo2() {} // expected-error {{'interrupt' attribute takes no more than 1 argument}}
+
+__attribute__((interrupt("eic"))) void foo3() {}
+__attribute__((interrupt("vector=sw0"))) void foo4() {}
+__attribute__((interrupt("vector=hw0"))) void foo5() {}
+__attribute__((interrupt("vector=hw1"))) void foo6() {}
+__attribute__((interrupt("vector=hw2"))) void foo7() {}
+__attribute__((interrupt("vector=hw3"))) void foo8() {}
+__attribute__((interrupt("vector=hw4"))) void foo9() {}
+__attribute__((interrupt("vector=hw5"))) void fooa() {}
+__attribute__((interrupt(""))) void food() {}
+
+__attribute__((interrupt)) int foob() {return 0;} // expected-warning {{MIPS 'interrupt' attribute only applies to functions that have a 'void' return type}}
+__attribute__((interrupt())) void fooc(int a) {} // expected-warning {{MIPS 'interrupt' attribute only applies to functions that have no parameters}}
+__attribute__((interrupt,mips16)) void fooe() {} // expected-error {{'interrupt' and 'mips16' attributes are not compatible}} \
+                                                 // expected-note {{conflicting attribute is here}}
+__attribute__((mips16,interrupt)) void foof() {} // expected-error {{'mips16' and 'interrupt' attributes are not compatible}} \
+                                                 // expected-note {{conflicting attribute is here}}
+__attribute__((interrupt)) __attribute__ ((mips16)) void foo10() {} // expected-error {{'interrupt' and 'mips16' attributes are not compatible}} \
+                                                                    // expected-note {{conflicting attribute is here}}
+__attribute__((mips16)) __attribute ((interrupt)) void foo11() {} // expected-error {{'mips16' and 'interrupt' attributes are not compatible}} \
+                                                                  // expected-note {{conflicting attribute is here}}
diff --git a/test/Sema/ms_bitfield_layout.c b/test/Sema/ms_bitfield_layout.c
index 293df770cd7e..8d377bc28d49 100644
--- a/test/Sema/ms_bitfield_layout.c
+++ b/test/Sema/ms_bitfield_layout.c
@@ -13,10 +13,16 @@ typedef struct A {
 	short y;
 } A;
 
-// CHECK: Type: struct A
-// CHECK:   Size:128
-// CHECK:   Alignment:32
-// CHECK:   FieldOffsets: [0, 32, 64, 64, 96, 99, 112]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct A
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT: 4:0-21 |   int a
+// CHECK-NEXT:    8:- |   int
+// CHECK-NEXT:  8:0-9 |   int c
+// CHECK-NEXT: 12:0-2 |   char b
+// CHECK-NEXT: 12:3-6 |   char d
+// CHECK-NEXT:     14 |   short y
+// CHECK-NEXT:        | [sizeof=16, align=4]
 
 typedef struct B {
 	char x;
@@ -25,10 +31,13 @@ typedef struct B {
 	char y;
 } B;
 
-// CHECK: Type: struct B
-// CHECK:   Size:48
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 8, 16, 32]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct B
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:    1:- |   int
+// CHECK-NEXT:  2:0-3 |   short a
+// CHECK-NEXT:      4 |   char y
+// CHECK-NEXT:        | [sizeof=6, align=2]
 
 typedef struct C {
 	char x;
@@ -37,10 +46,13 @@ typedef struct C {
 	char y;
 } C;
 
-// CHECK: Type: struct C
-// CHECK:   Size:64
-// CHECK:   Alignment:32
-// CHECK:   FieldOffsets: [0, 16, 32, 32]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct C
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  2:0-3 |   short a
+// CHECK-NEXT:    4:- |   int
+// CHECK-NEXT:      4 |   char y
+// CHECK-NEXT:        | [sizeof=8, align=4]
 
 typedef struct D {
 	char x;
@@ -49,10 +61,13 @@ typedef struct D {
 	char y;
 } D;
 
-// CHECK: Type: struct D
-// CHECK:   Size:16
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 8, 8]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct D
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:    1:- |   short
+// CHECK-NEXT:    1:- |   int
+// CHECK-NEXT:      1 |   char y
+// CHECK-NEXT:        | [sizeof=2, align=1]
 
 typedef union E {
 	char x;
@@ -62,10 +77,15 @@ typedef union E {
 	short y;
 } E;
 
-// CHECK: Type: union E
-// CHECK:   Size:64
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 0, 0, 0, 0]>
+
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | union E
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  0:0-2 |   long long a
+// CHECK-NEXT:  0:0-2 |   int b
+// CHECK-NEXT:    0:- |   long long
+// CHECK-NEXT:      0 |   short
+// CHECK-NEXT:        | [sizeof=8, align=2]
 
 typedef struct F {
 	char x;
@@ -81,10 +101,20 @@ typedef struct F {
 	short y;
 } F;
 
-// CHECK: Type: struct F
-// CHECK:   Size:128
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 8, 11, 16, 32, 38, 48, 64, 80, 96, 112]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct F
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  1:0-2 |   char a
+// CHECK-NEXT:  1:3-5 |   char b
+// CHECK-NEXT:  2:0-2 |   char c
+// CHECK-NEXT:  4:0-5 |   short d
+// CHECK-NEXT: 4:6-11 |   short e
+// CHECK-NEXT:  6:0-5 |   short f
+// CHECK-NEXT: 8:0-10 |   short g
+// CHECK-NEXT:10:0-10 |   short h
+// CHECK-NEXT:12:0-10 |   short i
+// CHECK-NEXT:     14 |   short y
+// CHECK-NEXT:        | [sizeof=16, align=2]
 
 typedef union G {
 	char x;
@@ -94,10 +124,14 @@ typedef union G {
 	short y;
 } G;
 
-// CHECK: Type: union G
-// CHECK:   Size:32
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 0, 0, 0, 0]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | union G
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  0:0-2 |   int a
+// CHECK-NEXT:    0:- |   int
+// CHECK-NEXT:    0:- |   long long
+// CHECK-NEXT:      0 |   short y
+// CHECK-NEXT:        | [sizeof=4, align=2]
 
 typedef struct H {
 	unsigned short a : 1;
@@ -106,20 +140,25 @@ typedef struct H {
 	unsigned short c : 1;
 } H;
 
-// CHECK: Type: struct H
-// CHECK:   Size:32
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 16, 16, 16]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct H
+// CHECK-NEXT:  0:0-0 |   unsigned short a
+// CHECK-NEXT:    2:- |   unsigned char
+// CHECK-NEXT:    2:- |   unsigned long
+// CHECK-NEXT:  2:0-0 |   unsigned short c
+// CHECK-NEXT:        | [sizeof=4, align=2]
 
 typedef struct I {
 	short : 8;
 	__declspec(align(16)) short : 8;
 } I;
 
-// CHECK: Type: struct I
-// CHECK:   Size:16
-// CHECK:   Alignment:16
-// CHECK:   FieldOffsets: [0, 8]
+
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct I
+// CHECK-NEXT:  0:0-7 |   short
+// CHECK-NEXT:  1:0-7 |   short
+// CHECK-NEXT:        | [sizeof=2, align=2]
 
 #pragma pack(push, 1)
 
@@ -133,10 +172,16 @@ typedef struct A1 {
 	short y;
 } A1;
 
-// CHECK: Type: struct A1
-// CHECK:   Size:96
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 40, 40, 72, 75, 80]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct A1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT: 1:0-21 |   int a
+// CHECK-NEXT:    5:- |   int
+// CHECK-NEXT:  5:0-9 |   int c
+// CHECK-NEXT:  9:0-2 |   char b
+// CHECK-NEXT:  9:3-6 |   char d
+// CHECK-NEXT:     10 |   short y
+// CHECK-NEXT:        | [sizeof=12, align=1]
 
 typedef struct B1 {
 	char x;
@@ -145,10 +190,13 @@ typedef struct B1 {
 	char y;
 } B1;
 
-// CHECK: Type: struct B1
-// CHECK:   Size:32
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 8, 24]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct B1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:    1:- |   int
+// CHECK-NEXT:  1:0-3 |   short
+// CHECK-NEXT:      3 |   char y
+// CHECK-NEXT:        | [sizeof=4, align=1]
 
 typedef struct C1 {
 	char x;
@@ -157,10 +205,13 @@ typedef struct C1 {
 	char y;
 } C1;
 
-// CHECK: Type: struct C1
-// CHECK:   Size:32
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 24, 24]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct C1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  1:0-3 |   short
+// CHECK-NEXT:    3:- |   int
+// CHECK-NEXT:      3 |   char y
+// CHECK-NEXT:        | [sizeof=4, align=1]
 
 typedef struct D1 {
 	char x;
@@ -169,10 +220,13 @@ typedef struct D1 {
 	char y;
 } D1;
 
-// CHECK: Type: struct D1
-// CHECK:   Size:16
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 8, 8]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct D1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:    1:- |   short
+// CHECK-NEXT:    1:- |   int
+// CHECK-NEXT:      1 |   char y
+// CHECK-NEXT:        | [sizeof=2, align=1]
 
 typedef union E1 {
 	char x;
@@ -182,10 +236,14 @@ typedef union E1 {
 	short y;
 } E1;
 
-// CHECK: Type: union E1
-// CHECK:   Size:64
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 0, 0, 0, 0]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | union E1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  0:0-2 |   long long a
+// CHECK-NEXT:  0:0-2 |   int b
+// CHECK-NEXT:    0:- |   long long
+// CHECK-NEXT:      0 |   short y
+// CHECK-NEXT:        | [sizeof=8, align=1]
 
 typedef struct F1 {
 	char x;
@@ -201,10 +259,20 @@ typedef struct F1 {
 	short y;
 } F1;
 
-// CHECK: Type: struct F1
-// CHECK:   Size:120
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8, 11, 16, 24, 30, 40, 56, 72, 88, 104]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct F1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  1:0-2 |   char a
+// CHECK-NEXT:  1:3-5 |   char b
+// CHECK-NEXT:  2:0-2 |   char c
+// CHECK-NEXT:  3:0-5 |   short d
+// CHECK-NEXT: 3:6-11 |   short e
+// CHECK-NEXT:  5:0-5 |   short f
+// CHECK-NEXT: 7:0-10 |   short g
+// CHECK-NEXT: 9:0-10 |   short h
+// CHECK-NEXT:11:0-10 |   short i
+// CHECK-NEXT:     13 |   short y
+// CHECK-NEXT:        | [sizeof=15, align=1]
 
 typedef union G1 {
 	char x;
@@ -214,10 +282,14 @@ typedef union G1 {
 	short y;
 } G1;
 
-// CHECK: Type: union G1
-// CHECK:   Size:32
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 0, 0, 0, 0]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | union G1
+// CHECK-NEXT:      0 |   char x
+// CHECK-NEXT:  0:0-2 |   int a
+// CHECK-NEXT:    0:- |   int
+// CHECK-NEXT:    0:- |   long long
+// CHECK-NEXT:      0 |   short y
+// CHECK-NEXT:        | [sizeof=4, align=1]
 
 typedef struct H1 {
 	unsigned long a : 1;
@@ -226,20 +298,24 @@ typedef struct H1 {
 	unsigned long c : 1;
 } H1;
 
-// CHECK: Type: struct H1
-// CHECK:   Size:64
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 32, 32, 32]>
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct H1
+// CHECK-NEXT:  0:0-0 |   unsigned long a
+// CHECK-NEXT:    4:- |   unsigned char
+// CHECK-NEXT:    4:- |   unsigned long
+// CHECK-NEXT:  4:0-0 |   unsigned long c
+// CHECK-NEXT:        | [sizeof=8, align=1]
 
 typedef struct I1 {
 	short : 8;
 	__declspec(align(16)) short : 8;
 } I1;
 
-// CHECK: Type: struct I1
-// CHECK:   Size:16
-// CHECK:   Alignment:8
-// CHECK:   FieldOffsets: [0, 8]
+// CHECK:*** Dumping AST Record Layout
+// CHECK-NEXT:      0 | struct I1
+// CHECK-NEXT:  0:0-7 |   short
+// CHECK-NEXT:  1:0-7 |   short
+// CHECK-NEXT:        | [sizeof=2, align=1]
 
 #pragma pack(pop)
 
diff --git a/test/Sema/ms_class_layout.cpp b/test/Sema/ms_class_layout.cpp
index 896d3edfda38..8638972430f6 100644
--- a/test/Sema/ms_class_layout.cpp
+++ b/test/Sema/ms_class_layout.cpp
@@ -236,11 +236,7 @@ int main() {
 // CHECK-NEXT: 84 |         int b_field
 // CHECK-NEXT: 88 |       int a_field
 // CHECK-NEXT: 92 |       char one
-
-// CHECK-NEXT: sizeof=80, align=8
-// CHECK-NEXT: nvsize=64, nvalign=8
-
-// CHECK: sizeof=96, align=8
+// CHECK-NEXT: sizeof=96, align=8
 // CHECK-NEXT: nvsize=96, nvalign=8
 
 // CHECK: %struct.BaseStruct = type { double, float, %class.C }
@@ -267,10 +263,7 @@ int main() {
 // CHECK-NEXT: 84 |           int b_field
 // CHECK-NEXT: 88 |         int a_field
 // CHECK-NEXT: 92 |         char one
-// CHECK-NEXT: sizeof=80, align=8
-// CHECK-NEXT: nvsize=64, nvalign=8
-
-// CHECK: 96 |   int x
+// CHECK-NEXT: 96 |   int x
 // CHECK-NEXT: sizeof=104, align=8
 // CHECK-NEXT: nvsize=104, nvalign=8
 
diff --git a/test/Sema/non-null-warning.c b/test/Sema/non-null-warning.c
index 024ef1eca667..7dfa3900cbb5 100644
--- a/test/Sema/non-null-warning.c
+++ b/test/Sema/non-null-warning.c
@@ -37,6 +37,9 @@ int * ret_nonnull() {
   return 0; // expected-warning {{null returned from function that requires a non-null return value}}
 }
 
+#define SAFE_CALL(X) if (X) foo(X)
 int main () {
   foo(0); // expected-warning {{null passed to a callee that requires a non-null argument}}
+  (void)sizeof(foo(0)); // expect no diagnostic in unevaluated context.
+  SAFE_CALL(0); // expect no diagnostic for unreachable code.
 }
diff --git a/test/Sema/nonnull.c b/test/Sema/nonnull.c
index 4b3df8518cfc..9503e7c32a86 100644
--- a/test/Sema/nonnull.c
+++ b/test/Sema/nonnull.c
@@ -89,7 +89,7 @@ void redecl_test(void *p) {
 __attribute__((__nonnull__))
 int evil_nonnull_func(int* pointer, void * pv)
 {
-   if (pointer == NULL) {  // expected-warning {{comparison of nonnull parameter 'pointer' equal to a null pointer is false on first encounter}}
+   if (pointer == NULL) {  // expected-warning {{comparison of nonnull parameter 'pointer' equal to a null pointer is 'false' on first encounter}}
      return 0;
    } else {
      return *pointer;
@@ -101,13 +101,13 @@ int evil_nonnull_func(int* pointer, void * pv)
    else
      return *pointer;
 
-   if (pv == NULL) {} // expected-warning {{comparison of nonnull parameter 'pv' equal to a null pointer is false on first encounter}}
+   if (pv == NULL) {} // expected-warning {{comparison of nonnull parameter 'pv' equal to a null pointer is 'false' on first encounter}}
 }
 
 void set_param_to_null(int**);
 int another_evil_nonnull_func(int* pointer, char ch, void * pv) __attribute__((nonnull(1, 3)));
 int another_evil_nonnull_func(int* pointer, char ch, void * pv) {
-   if (pointer == NULL) { // expected-warning {{comparison of nonnull parameter 'pointer' equal to a null pointer is false on first encounter}}
+   if (pointer == NULL) { // expected-warning {{comparison of nonnull parameter 'pointer' equal to a null pointer is 'false' on first encounter}}
      return 0;
    } else {
      return *pointer;
@@ -119,7 +119,7 @@ int another_evil_nonnull_func(int* pointer, char ch, void * pv) {
    else
      return *pointer;
 
-   if (pv == NULL) {} // expected-warning {{comparison of nonnull parameter 'pv' equal to a null pointer is false on first encounter}}
+   if (pv == NULL) {} // expected-warning {{comparison of nonnull parameter 'pv' equal to a null pointer is 'false' on first encounter}}
 }
 
 extern void *returns_null(void**);
@@ -153,3 +153,17 @@ void pr21668_2(__attribute__((nonnull)) const char *p) {
   if (p) // No warning
     ;
 }
+
+__attribute__((returns_nonnull)) void *returns_nonnull_whee();
+
+void returns_nonnull_warning_tests() {
+  if (returns_nonnull_whee() == NULL) {} // expected-warning {{comparison of nonnull function call 'returns_nonnull_whee()' equal to a null pointer is 'false' on first encounter}}
+
+  if (returns_nonnull_whee() != NULL) {} // expected-warning {{comparison of nonnull function call 'returns_nonnull_whee()' not equal to a null pointer is 'true' on first encounter}}
+
+  if (returns_nonnull_whee()) {} // expected-warning {{nonnull function call 'returns_nonnull_whee()' will evaluate to 'true' on first encounter}}
+  if (!returns_nonnull_whee()) {} // expected-warning {{nonnull function call 'returns_nonnull_whee()' will evaluate to 'true' on first encounter}}
+
+  int and_again = !returns_nonnull_whee(); // expected-warning {{nonnull function call 'returns_nonnull_whee()' will evaluate to 'true' on first encounter}}
+  and_again = !returns_nonnull_whee(); // expected-warning {{nonnull function call 'returns_nonnull_whee()' will evaluate to 'true' on first encounter}}
+}
diff --git a/test/Sema/nullability.c b/test/Sema/nullability.c
index 59644c4193a8..bbe5cb4143a0 100644
--- a/test/Sema/nullability.c
+++ b/test/Sema/nullability.c
@@ -47,6 +47,7 @@ typedef _Nonnull int *(^ block_type_3)(int, int);
 typedef _Nonnull int *(* function_pointer_type_3)(int, int);
 typedef _Nonnull int_ptr (^ block_type_4)(int, int);
 typedef _Nonnull int_ptr (* function_pointer_type_4)(int, int);
+typedef void (* function_pointer_type_5)(int_ptr _Nonnull);
 
 void acceptFunctionPtr(_Nonnull int *(*)(void));
 void acceptBlockPtr(_Nonnull int *(^)(void));
@@ -55,7 +56,8 @@ void testBlockFunctionPtrNullability() {
   float *fp;
   fp = (function_pointer_type_3)0; // expected-warning{{from 'function_pointer_type_3' (aka 'int * _Nonnull (*)(int, int)')}}
   fp = (block_type_3)0; // expected-error{{from incompatible type 'block_type_3' (aka 'int * _Nonnull (^)(int, int)')}}
-  fp = (function_pointer_type_4)0; // expected-warning{{from 'function_pointer_type_4' (aka 'int_ptr  _Nonnull (*)(int, int)')}}
+  fp = (function_pointer_type_4)0; // expected-warning{{from 'function_pointer_type_4' (aka 'int * _Nonnull (*)(int, int)')}}
+  fp = (function_pointer_type_5)0; // expected-warning{{from 'function_pointer_type_5' (aka 'void (*)(int * _Nonnull)')}}
   fp = (block_type_4)0; // expected-error{{from incompatible type 'block_type_4' (aka 'int_ptr  _Nonnull (^)(int, int)')}}
 
   acceptFunctionPtr(0); // no-warning
@@ -110,4 +112,7 @@ _Nonnull int *returns_int_ptr(int x) {
 void nullable_to_nonnull(_Nullable int *ptr) {
   int *a = ptr; // okay
   _Nonnull int *b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}}
+  b = ptr; // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}}
+
+  accepts_nonnull_1(ptr); // expected-warning{{implicit conversion from nullable pointer 'int * _Nullable' to non-nullable pointer type 'int * _Nonnull'}}
 }
diff --git a/test/Sema/overloadable.c b/test/Sema/overloadable.c
index bdd471477b47..3120649dbc02 100644
--- a/test/Sema/overloadable.c
+++ b/test/Sema/overloadable.c
@@ -85,3 +85,17 @@ void local() {
 void after_local_1(int) __attribute__((overloadable)); // expected-error {{conflicting types}}
 void after_local_2(int); // expected-error {{must have the 'overloadable' attribute}}
 void after_local_3(int) __attribute__((overloadable));
+
+// Make sure we allow C-specific conversions in C.
+void conversions() {
+  void foo(char *c) __attribute__((overloadable));
+  void foo(char *c) __attribute__((overloadable, enable_if(c, "nope.jpg")));
+
+  void *ptr;
+  foo(ptr);
+
+  void multi_type(unsigned char *c) __attribute__((overloadable));
+  void multi_type(signed char *c) __attribute__((overloadable));
+  unsigned char *c;
+  multi_type(c);
+}
diff --git a/test/Sema/parentheses.c b/test/Sema/parentheses.c
index 739561dd2b33..8c6c4999f73a 100644
--- a/test/Sema/parentheses.c
+++ b/test/Sema/parentheses.c
@@ -55,6 +55,26 @@ void bitwise_rel(unsigned i) {
   // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:14}:"("
   // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:19-[[@LINE-3]]:19}:")"
 
+  (void)(i ^ i | i); // expected-warning {{'^' within '|'}} \
+                     // expected-note {{place parentheses around the '^' expression to silence this warning}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:15-[[@LINE-3]]:15}:")"
+
+  (void)(i | i ^ i); // expected-warning {{'^' within '|'}} \
+                     // expected-note {{place parentheses around the '^' expression to silence this warning}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:14}:"("
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:19-[[@LINE-3]]:19}:")"
+
+  (void)(i & i ^ i); // expected-warning {{'&' within '^'}} \
+                     // expected-note {{place parentheses around the '&' expression to silence this warning}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:15-[[@LINE-3]]:15}:")"
+
+  (void)(i ^ i & i); // expected-warning {{'&' within '^'}} \
+                     // expected-note {{place parentheses around the '&' expression to silence this warning}}
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:14}:"("
+  // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:19-[[@LINE-3]]:19}:")"
+
   (void)(i ||
              i && i); // expected-warning {{'&&' within '||'}} \
                       // expected-note {{place parentheses around the '&&' expression to silence this warning}}
diff --git a/test/Sema/pass-object-size.c b/test/Sema/pass-object-size.c
new file mode 100644
index 000000000000..e4f460b1785f
--- /dev/null
+++ b/test/Sema/pass-object-size.c
@@ -0,0 +1,52 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-linux-gnu -Wincompatible-pointer-types
+//
+// Tests for the pass_object_size attribute
+// Non-failure cases are covered in test/CodeGen/pass-object-size.c
+
+void a(void *p __attribute__((pass_object_size))); //expected-error{{'pass_object_size' attribute takes one argument}}
+void b(void *p __attribute__((pass_object_size(1.0)))); //expected-error{{'pass_object_size' attribute requires parameter 1 to be an integer constant}}
+
+void c(void *p __attribute__((pass_object_size(4)))); //expected-error{{'pass_object_size' attribute requires integer constant between 0 and 3 inclusive}}
+void d(void *p __attribute__((pass_object_size(-1)))); //expected-error{{'pass_object_size' attribute requires integer constant between 0 and 3 inclusive}}
+
+void e(void *p __attribute__((pass_object_size(1ULL<<32)))); //expected-error{{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+void f(char p __attribute__((pass_object_size(0)))); //expected-error{{'pass_object_size' attribute only applies to constant pointer arguments}}
+void g(const char p __attribute__((pass_object_size(0)))); //expected-error{{'pass_object_size' attribute only applies to constant pointer arguments}}
+void h(char *p __attribute__((pass_object_size(0)))) {} //expected-error{{pass_object_size attribute only applies to constant pointer arguments}}
+void i(char *p __attribute__((pass_object_size(0)))); // OK -- const is only necessary on definitions, not decls.
+void j(char *p __attribute__((pass_object_size(0), pass_object_size(1)))); //expected-error{{'pass_object_size' attribute can only be applied once per parameter}}
+
+#define PS(N) __attribute__((pass_object_size(N)))
+#define overloaded __attribute__((overloadable))
+void Overloaded(void *p PS(0)) overloaded; //expected-note{{previous declaration is here}}
+void Overloaded(void *p PS(1)) overloaded; //expected-error{{conflicting pass_object_size attributes on parameters}}
+void Overloaded2(void *p PS(1), void *p2 PS(0)) overloaded; //expected-note{{previous declaration is here}}
+void Overloaded2(void *p PS(0), void *p2 PS(1)) overloaded; //expected-error{{conflicting pass_object_size attributes on parameters}}
+
+void Overloaded3(void *p PS(0), void *p2) overloaded; //expected-note{{previous declaration is here}}
+void Overloaded3(void *p, void *p2 PS(0)) overloaded; //expected-error{{conflicting pass_object_size attributes on parameters}}
+
+void TakeFn(void (*)(void *));
+void TakeFnOvl(void (*)(void *)) overloaded;
+void TakeFnOvl(void (*)(int *)) overloaded;
+
+void NotOverloaded(void *p PS(0));
+void IsOverloaded(void *p PS(0)) overloaded;
+void IsOverloaded(char *p) overloaded;
+void FunctionPtrs() {
+  void (*p)(void *) = NotOverloaded; //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+  void (*p2)(void *) = &NotOverloaded; //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+
+  void (*p3)(void *) = IsOverloaded; //expected-error{{initializing 'void (*)(void *)' with an expression of incompatible type '<overloaded function type>'}} expected-note@-6{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@-5{{type mismatch}}
+  void (*p4)(void *) = &IsOverloaded; //expected-error{{initializing 'void (*)(void *)' with an expression of incompatible type '<overloaded function type>'}} expected-note@-7{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@-6{{type mismatch}}
+
+  void (*p5)(char *) = IsOverloaded;
+  void (*p6)(char *) = &IsOverloaded;
+
+  TakeFn(NotOverloaded); //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+  TakeFn(&NotOverloaded); //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+
+  TakeFnOvl(NotOverloaded); //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+  TakeFnOvl(&NotOverloaded); //expected-error{{cannot take address of function 'NotOverloaded' because parameter 1 has pass_object_size attribute}}
+}
diff --git a/test/Sema/pointer-addition.c b/test/Sema/pointer-addition.c
index 667fe9a68c15..c71e96114b8a 100644
--- a/test/Sema/pointer-addition.c
+++ b/test/Sema/pointer-addition.c
@@ -14,10 +14,10 @@ void a(S* b, void* c) {
   b = 1+b;   // expected-error {{arithmetic on a pointer to an incomplete type}}
   /* The next couple tests are only pedantic warnings in gcc */
   void (*d)(S*,void*) = a;
-  d += 1;    // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' is a GNU extension}}
-  d++;       // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' is a GNU extension}}
-  d--;       // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' is a GNU extension}}
-  d -= 1;    // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' is a GNU extension}}
-  (void)(1 + d); // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' is a GNU extension}}
+  d += 1;    // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' (aka 'void (struct S *, void *)') is a GNU extension}}
+  d++;       // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' (aka 'void (struct S *, void *)') is a GNU extension}}
+  d--;       // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' (aka 'void (struct S *, void *)') is a GNU extension}}
+  d -= 1;    // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' (aka 'void (struct S *, void *)') is a GNU extension}}
+  (void)(1 + d); // expected-warning {{arithmetic on a pointer to the function type 'void (S *, void *)' (aka 'void (struct S *, void *)') is a GNU extension}}
   e++;       // expected-error {{arithmetic on a pointer to an incomplete type}}
 }
diff --git a/test/Sema/redefine_extname.c b/test/Sema/redefine_extname.c
new file mode 100644
index 000000000000..8202176c9f7b
--- /dev/null
+++ b/test/Sema/redefine_extname.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple=x86_64-unknown-linux -Wpragmas -verify %s
+
+// Check that pragma redefine_extname applies to external declarations only.
+#pragma redefine_extname foo_static bar_static
+static int foo_static() { return 1; } // expected-warning {{#pragma redefine_extname is applicable to external C declarations only; not applied to function 'foo_static'}}
+
diff --git a/test/Sema/short-enums.c b/test/Sema/short-enums.c
index 9bf0064646a7..464b09ee1054 100644
--- a/test/Sema/short-enums.c
+++ b/test/Sema/short-enums.c
@@ -1,5 +1,5 @@
 // RUN: not %clang_cc1 -fsyntax-only %s -verify
-// RUN: %clang_cc1 -fshort-enums -fsyntax-only %s -verify
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fshort-enums -fsyntax-only %s -verify
 // expected-no-diagnostics
 
 enum x { A };
diff --git a/test/Sema/struct-packed-align.c b/test/Sema/struct-packed-align.c
index 417c30308ca3..abdcd8e6b9eb 100644
--- a/test/Sema/struct-packed-align.c
+++ b/test/Sema/struct-packed-align.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 %s -fsyntax-only -verify
-// expected-no-diagnostics
+// RUN: %clang_cc1 %s -fsyntax-only -triple=x86_64-windows-coff -verify
 
 // Packed structs.
 struct s {
@@ -138,3 +138,24 @@ extern int n2[__alignof(struct nS) == 8 ? 1 : -1];
 extern int n1[sizeof(struct nS) == 9 ? 1 : -1];
 extern int n2[__alignof(struct nS) == 1 ? 1 : -1];
 #endif
+
+// Packed attribute shouldn't be ignored for bit-field of char types.
+// Note from GCC reference manual: The 4.1, 4.2 and 4.3 series of GCC ignore
+// the packed attribute on bit-fields of type char. This has been fixed in
+// GCC 4.4 but the change can lead to differences in the structure layout.
+// See the documentation of -Wpacked-bitfield-compat for more information.
+struct packed_chars {
+  char a:4;
+  char b:8 __attribute__ ((packed));
+  // expected-warning@-1 {{'packed' attribute was ignored on bit-fields with single-byte alignment in older versions of GCC and Clang}}
+  char c:4;
+};
+
+#if defined(_WIN32) && !defined(__declspec) // _MSC_VER is unavailable in cc1.
+// On Windows clang uses MSVC compatible layout in this case.
+extern int o1[sizeof(struct packed_chars) == 3 ? 1 : -1];
+extern int o2[__alignof(struct packed_chars) == 1 ? 1 : -1];
+#else
+extern int o1[sizeof(struct packed_chars) == 2 ? 1 : -1];
+extern int o2[__alignof(struct packed_chars) == 1 ? 1 : -1];
+#endif
diff --git a/test/Sema/switch-1.c b/test/Sema/switch-1.c
index 5191c92e714d..144c3607f570 100644
--- a/test/Sema/switch-1.c
+++ b/test/Sema/switch-1.c
@@ -1,18 +1,50 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -triple x86_64-apple-darwin10 %s
 // RUN: %clang_cc1 -x c++ -fsyntax-only -verify -triple x86_64-apple-darwin10 %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -verify -triple x86_64-apple-darwin10 -std=c++98 %s
+// RUN: %clang_cc1 -x c++ -fsyntax-only -verify -triple x86_64-apple-darwin10 -std=c++11 %s
 // rdar://11577384
 // rdar://13423975
 
 int f(int i) {
   switch (i) {
-    case 2147483647 + 2: // expected-warning {{overflow in expression; result is -2147483647 with type 'int'}}
+    case 2147483647 + 2:
+#if (__cplusplus <= 199711L) // C or C++03 or earlier modes
+    // expected-warning@-2 {{overflow in expression; result is -2147483647 with type 'int'}}
+#else
+    // expected-error@-4 {{case value is not a constant expression}} \
+    // expected-note@-4 {{value 2147483649 is outside the range of representable values of type 'int'}}
+#endif
       return 1;
-    case 9223372036854775807L * 4: // expected-warning {{overflow in expression; result is -4 with type 'long'}}
+    case 9223372036854775807L * 4:
+#if (__cplusplus <= 199711L)
+    // expected-warning@-2 {{overflow in expression; result is -4 with type 'long'}}
+#else
+    // expected-error@-4 {{case value is not a constant expression}} \
+    // expected-note@-4 {{value 36893488147419103228 is outside the range of representable values of type 'long'}}
+#endif
       return 2;
-    case (123456 *789012) + 1:  // expected-warning {{overflow in expression; result is -1375982336 with type 'int'}}
+    case (123456 *789012) + 1:
+#if (__cplusplus <= 199711L)
+    // expected-warning@-2 {{overflow in expression; result is -1375982336 with type 'int'}}
+#else
+    // expected-error@-4 {{case value is not a constant expression}} \
+    // expected-note@-4 {{value 97408265472 is outside the range of representable values of type 'int'}}
+#endif
       return 3;
-    case (2147483647*4)/4: 	// expected-warning {{overflow in expression; result is -4 with type 'int'}}
-    case (2147483647*4)%4: 	// expected-warning {{overflow in expression; result is -4 with type 'int'}}
+    case (2147483647*4)/4:
+#if (__cplusplus <= 199711L)
+    // expected-warning@-2 {{overflow in expression; result is -4 with type 'int'}}
+#else
+    // expected-error@-4 {{case value is not a constant expression}} \
+    // expected-note@-4 {{value 8589934588 is outside the range of representable values of type 'int'}}
+#endif
+    case (2147483647*4)%4:
+#if (__cplusplus <= 199711L)
+    // expected-warning@-2 {{overflow in expression; result is -4 with type 'int'}}
+#else
+    // expected-error@-4 {{case value is not a constant expression}} \
+    // expected-note@-4 {{value 8589934588 is outside the range of representable values of type 'int'}}
+#endif
       return 4;
     case 2147483647:
       return 0;
diff --git a/test/Sema/thread-specifier.c b/test/Sema/thread-specifier.c
index 3968ae14cf2a..a93850da0077 100644
--- a/test/Sema/thread-specifier.c
+++ b/test/Sema/thread-specifier.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic %s -DGNU
-// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DGNU
+// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DGNU -std=c++98
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic %s -DC11 -D__thread=_Thread_local
-// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DC11 -D__thread=_Thread_local
+// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DC11 -D__thread=_Thread_local -std=c++98
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DCXX11 -D__thread=thread_local -std=c++11 -Wno-deprecated
 // RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -Wno-private-extern -verify -pedantic -x c++ %s -DC11 -D__thread=_Thread_local -std=c++11 -Wno-deprecated
 
diff --git a/test/Sema/unused-expr.c b/test/Sema/unused-expr.c
index 43d8150b02cb..09359687d532 100644
--- a/test/Sema/unused-expr.c
+++ b/test/Sema/unused-expr.c
@@ -156,3 +156,11 @@ void t11(int i, int j) {
 #undef M5
 #undef M6
 #undef M7
+
+#define UNREFERENCED_PARAMETER(x) (x)
+
+void unused_parm(int a) {
+  // Don't warn if the warning is introduced by a macro that's spelled
+  // UNREFERENCED_PARAMETER, as that's a commonly used macro in Windows headers.
+  UNREFERENCED_PARAMETER(a);
+}
diff --git a/test/Sema/varargs-win64.c b/test/Sema/varargs-win64.c
new file mode 100644
index 000000000000..06d1c7f246b9
--- /dev/null
+++ b/test/Sema/varargs-win64.c
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple x86_64-pc-win32
+
+void __attribute__((sysv_abi)) foo(int a, ...) {
+  __builtin_va_list ap;
+  __builtin_va_start(ap, a); // expected-error {{'va_start' used in System V ABI function}}
+}
diff --git a/test/Sema/varargs-x86-32.c b/test/Sema/varargs-x86-32.c
new file mode 100644
index 000000000000..6f57022ffcbf
--- /dev/null
+++ b/test/Sema/varargs-x86-32.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -triple i386-apple-darwin9
+
+void foo(int a, ...) {
+  __builtin_ms_va_start((void *)0, a); // expected-error {{this builtin is only available on x86-64 targets}}
+}
diff --git a/test/Sema/varargs-x86-64.c b/test/Sema/varargs-x86-64.c
index 2fe9b10cf78d..d50dd6a6fc19 100644
--- a/test/Sema/varargs-x86-64.c
+++ b/test/Sema/varargs-x86-64.c
@@ -6,3 +6,75 @@ void f1() {
   (void)__builtin_va_arg(args2, int); // expected-error {{first argument to 'va_arg' is of type 'const __builtin_va_list' and not 'va_list'}}
 }
 
+void f2(int a, ...) {
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a); // expected-error {{'__builtin_ms_va_start' used in System V ABI function}}
+}
+
+void __attribute__((ms_abi)) g1(int a) {
+  __builtin_ms_va_list ap;
+
+  __builtin_ms_va_start(ap, a, a); // expected-error {{too many arguments to function}}
+  __builtin_ms_va_start(ap, a); // expected-error {{'va_start' used in function with fixed args}}
+}
+
+void __attribute__((ms_abi)) g2(int a, int b, ...) {
+  __builtin_ms_va_list ap;
+
+  __builtin_ms_va_start(ap, 10); // expected-warning {{second parameter of 'va_start' not last named argument}}
+  __builtin_ms_va_start(ap, a); // expected-warning {{second parameter of 'va_start' not last named argument}}
+  __builtin_ms_va_start(ap, b);
+}
+
+void __attribute__((ms_abi)) g3(float a, ...) {
+  __builtin_ms_va_list ap;
+
+  __builtin_ms_va_start(ap, a);
+  __builtin_ms_va_start(ap, (a));
+}
+
+void __attribute__((ms_abi)) g5() {
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, ap); // expected-error {{'va_start' used in function with fixed args}}
+}
+
+void __attribute__((ms_abi)) g6(int a, ...) {
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap); // expected-error {{too few arguments to function}}
+}
+
+void __attribute__((ms_abi))
+bar(__builtin_ms_va_list authors, ...) {
+  __builtin_ms_va_start(authors, authors);
+  (void)__builtin_va_arg(authors, int);
+  __builtin_ms_va_end(authors);
+}
+
+void __attribute__((ms_abi)) g7(int a, ...) {
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a);
+  // FIXME: This error message is sub-par.
+  __builtin_va_arg(ap, int) = 1; // expected-error {{expression is not assignable}}
+  int *x = &__builtin_va_arg(ap, int); // expected-error {{cannot take the address of an rvalue}}
+  __builtin_ms_va_end(ap);
+}
+
+void __attribute__((ms_abi)) g8(int a, ...) {
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a);
+  (void)__builtin_va_arg(ap, void); // expected-error {{second argument to 'va_arg' is of incomplete type 'void'}}
+  __builtin_ms_va_end(ap);
+}
+
+enum E { x = -1, y = 2, z = 10000 };
+void __attribute__((ms_abi)) g9(__builtin_ms_va_list args) {
+  (void)__builtin_va_arg(args, float); // expected-warning {{second argument to 'va_arg' is of promotable type 'float'}}
+  (void)__builtin_va_arg(args, enum E); // no-warning
+  (void)__builtin_va_arg(args, short); // expected-warning {{second argument to 'va_arg' is of promotable type 'short'}}
+  (void)__builtin_va_arg(args, char); // expected-warning {{second argument to 'va_arg' is of promotable type 'char'}}
+}
+
+void __attribute__((ms_abi)) g10(int a, ...) {
+  __builtin_va_list ap;
+  __builtin_va_start(ap, a); // expected-error {{'va_start' used in Win64 ABI function}}
+}
diff --git a/test/Sema/vector-cast.c b/test/Sema/vector-cast.c
index 0415c13990c8..03db5408c458 100644
--- a/test/Sema/vector-cast.c
+++ b/test/Sema/vector-cast.c
@@ -3,12 +3,20 @@
 typedef long long t1 __attribute__ ((vector_size (8)));
 typedef char t2 __attribute__ ((vector_size (16)));
 typedef float t3 __attribute__ ((vector_size (16)));
+typedef short s2 __attribute__ ((vector_size(4)));
+
+typedef enum { Evalue = 0x10000 } E;
 
 void f()
 {  
   t1 v1;
   t2 v2;
   t3 v3;
+  s2 v4;
+  E e;
+
+  e = (E)v4;
+  v4 = (s2)e;
   
   v2 = (t2)v1; // expected-error {{invalid conversion between vector type \
 't2' (vector of 16 'char' values) and 't1' (vector of 1 'long long' value) of different size}}
diff --git a/test/Sema/warn-absolute-value.c b/test/Sema/warn-absolute-value.c
index 70601db63a1f..109d515d3b2b 100644
--- a/test/Sema/warn-absolute-value.c
+++ b/test/Sema/warn-absolute-value.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -verify %s -Wabsolute-value
-// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only %s -Wabsolute-value -fdiagnostics-parseable-fixits 2>&1 | FileCheck %s
+// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only -verify %s -Wabsolute-value -Wno-int-conversion
+// RUN: %clang_cc1 -triple i686-pc-linux-gnu -fsyntax-only %s -Wabsolute-value -Wno-int-conversion -fdiagnostics-parseable-fixits 2>&1 | FileCheck %s
 
 int abs(int);
 long int labs(long int);
@@ -780,3 +780,19 @@ void test_unsigned_long(unsigned long x) {
   // CHECK: fix-it:"{{.*}}":{[[@LINE-3]]:9-[[@LINE-3]]:24}:""
 }
 
+long long test_array() {
+  return llabs((long long[]){1});
+  // expected-warning@-1 {{absolute value of array type}}
+}
+long long test_function_pointer() {
+  return llabs(&test_function_pointer);
+  // expected-warning@-1 {{absolute value of pointer type}}
+}
+long long test_void_pointer(void *x) {
+  return llabs(x);
+  // expected-warning@-1 {{absolute value of pointer type}}
+}
+long long test_function() {
+  return llabs(test_function);
+  // expected-warning@-1 {{absolute value of function type}}
+}
diff --git a/test/Sema/warn-double-promotion.c b/test/Sema/warn-double-promotion.c
new file mode 100644
index 000000000000..b6fd0c5ec629
--- /dev/null
+++ b/test/Sema/warn-double-promotion.c
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -verify -fsyntax-only %s -Wdouble-promotion
+
+float ReturnFloatFromDouble(double d) {
+  return d;
+}
+
+float ReturnFloatFromLongDouble(long double ld) {
+  return ld;
+}
+
+double ReturnDoubleFromLongDouble(long double ld) {
+  return ld;
+}
+
+double ReturnDoubleFromFloat(float f) {
+  return f;  //expected-warning{{implicit conversion increases floating-point precision: 'float' to 'double'}}
+}
+
+long double ReturnLongDoubleFromFloat(float f) {
+  return f;  //expected-warning{{implicit conversion increases floating-point precision: 'float' to 'long double'}}
+}
+
+long double ReturnLongDoubleFromDouble(double d) {
+  return d;  //expected-warning{{implicit conversion increases floating-point precision: 'double' to 'long double'}}
+}
+
+void Convert(float f, double d, long double ld) {
+  d = f;  //expected-warning{{implicit conversion increases floating-point precision: 'float' to 'double'}}
+  ld = f; //expected-warning{{implicit conversion increases floating-point precision: 'float' to 'long double'}}
+  ld = d; //expected-warning{{implicit conversion increases floating-point precision: 'double' to 'long double'}}
+  f = d;
+  f = ld;
+  d = ld;
+}
diff --git a/test/Sema/warn-extern-main.c b/test/Sema/warn-extern-main.c
new file mode 100644
index 000000000000..62c2c9b4a86c
--- /dev/null
+++ b/test/Sema/warn-extern-main.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST1
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST2
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST3
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST4
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST5
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST6
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST7
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST8
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DTEST9 -ffreestanding
+
+#if TEST1
+int main; // expected-warning{{variable named 'main' with external linkage has undefined behavior}}
+
+#elif TEST2
+extern int main; // expected-warning{{variable named 'main' with external linkage has undefined behavior}}
+
+#elif TEST3
+// expected-no-diagnostics
+void x() {
+  static int main;
+}
+
+#elif TEST4
+void x() {
+  extern int main; // expected-warning{{variable named 'main' with external linkage has undefined behavior}}
+}
+
+#elif TEST5
+// expected-no-diagnostics
+void x() {
+  int main;
+}
+
+#elif TEST6
+// expected-no-diagnostics
+static int main;
+
+#elif TEST7
+// expected-no-diagnostics
+void x() {
+  auto int main;
+}
+
+#elif TEST8
+// expected-no-diagnostics
+void x() {
+  register int main;
+}
+
+#elif TEST9
+// expected-no-diagnostics
+int main;
+
+#else
+#error Unknown Test
+#endif
diff --git a/test/Sema/warn-logical-not-compare.c b/test/Sema/warn-logical-not-compare.c
new file mode 100644
index 000000000000..b67845ec84a0
--- /dev/null
+++ b/test/Sema/warn-logical-not-compare.c
@@ -0,0 +1,204 @@
+// RUN: %clang_cc1 -fsyntax-only -Wlogical-not-parentheses -verify %s
+// RUN: %clang_cc1 -fsyntax-only -Wlogical-not-parentheses -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s
+
+int getInt();
+
+int test1(int i1, int i2) {
+  int ret;
+
+  ret = !i1 == i2;
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = !i1 != i2;
+  //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = !i1 < i2;
+  //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = !i1 > i2;
+  //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = !i1 <= i2;
+  //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = !i1 >= i2;
+  //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
+
+  ret = i1 == i2;
+  ret = i1 != i2;
+  ret = i1 < i2;
+  ret = i1 > i2;
+  ret = i1 <= i2;
+  ret = i1 >= i2;
+
+  // Warning silenced by parens.
+  ret = (!i1) == i2;
+  ret = (!i1) != i2;
+  ret = (!i1) < i2;
+  ret = (!i1) > i2;
+  ret = (!i1) <= i2;
+  ret = (!i1) >= i2;
+
+  ret = !getInt() == i1;
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:24-[[line]]:24}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
+
+  ret = (!getInt()) == i1;
+  return ret;
+}
+
+enum E {e1, e2};
+enum E getE();
+
+int test2 (enum E e) {
+  int ret;
+  ret = e == e1;
+  ret = e == getE();
+  ret = getE() == e1;
+  ret = getE() == getE();
+
+  ret = !e == e1;
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:11-[[line]]:11}:")"
+
+  ret = !e == getE();
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:21-[[line]]:21}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:11-[[line]]:11}:")"
+
+  ret = !getE() == e1;
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:22-[[line]]:22}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:16-[[line]]:16}:")"
+
+  ret = !getE() == getE();
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:9: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:26-[[line]]:26}:")"
+  // CHECK: to silence this warning
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:16-[[line]]:16}:")"
+
+  ret = !(e == e1);
+  ret = !(e == getE());
+  ret = !(getE() == e1);
+  ret = !(getE() == getE());
+
+  ret = (!e) == e1;
+  ret = (!e) == getE();
+  ret = (!getE()) == e1;
+  ret = (!getE()) == getE();
+
+  return ret;
+}
+
+int PR16673(int x) {
+  int ret;
+  // Make sure we don't emit a fixit for the left paren, but not the right paren.
+#define X(x) x
+  ret = X(!x == 1 && 1);
+  // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
+  // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
+  // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.c:[[line:[0-9]*]]:11: warning
+  // CHECK: to evaluate the comparison first
+  // CHECK-NOT: fix-it
+  // CHECK: to silence this warning
+  // CHECK-NOT: fix-it
+  return ret;
+}
+
+int compare_pointers(int* a, int* b) {
+  int ret;
+  ret = !!a == !!b;
+  ret = !!a != !!b;
+  return ret;
+}
diff --git a/test/Sema/warn-overlap.c b/test/Sema/warn-overlap.c
index 44d6ad5efa5e..1e8a614d0ffc 100644
--- a/test/Sema/warn-overlap.c
+++ b/test/Sema/warn-overlap.c
@@ -2,6 +2,16 @@
 
 #define mydefine 2
 
+enum Choices {
+  CHOICE_0 = 0,
+  CHOICE_1 = 1
+};
+
+enum Unchoices {
+  UNCHOICE_0 = 0,
+  UNCHOICE_1 = 1
+};
+
 void f(int x) {
   int y = 0;
 
@@ -54,6 +64,30 @@ void f(int x) {
 
   if (x == mydefine && x > 3) { }
   if (x == (mydefine + 1) && x > 3) { }
+
+  if (x != CHOICE_0 || x != CHOICE_1) { } // expected-warning {{overlapping comparisons always evaluate to true}}
+  if (x == CHOICE_0 && x == CHOICE_1) { } // expected-warning {{overlapping comparisons always evaluate to false}}
+
+  // Don't warn if comparing x to different types
+  if (x == CHOICE_0 && x == 1) { }
+  if (x != CHOICE_0 || x != 1) { }
+
+  // "Different types" includes different enums
+  if (x == CHOICE_0 && x == UNCHOICE_1) { }
+  if (x != CHOICE_0 || x != UNCHOICE_1) { }
+}
+
+void enums(enum Choices c) {
+  if (c != CHOICE_0 || c != CHOICE_1) { } // expected-warning {{overlapping comparisons always evaluate to true}}
+  if (c == CHOICE_0 && c == CHOICE_1) { } // expected-warning {{overlapping comparisons always evaluate to false}}
+
+  // Don't warn if comparing x to different types
+  if (c == CHOICE_0 && c == 1) { }
+  if (c != CHOICE_0 || c != 1) { }
+
+  // "Different types" includes different enums
+  if (c == CHOICE_0 && c == UNCHOICE_1) { }
+  if (c != CHOICE_0 || c != UNCHOICE_1) { }
 }
 
 // Don't generate a warning here.
diff --git a/test/Sema/warn-sizeof-arrayarg.c b/test/Sema/warn-sizeof-arrayarg.c
index ba8a5fa975a4..32fd2b4d3745 100644
--- a/test/Sema/warn-sizeof-arrayarg.c
+++ b/test/Sema/warn-sizeof-arrayarg.c
@@ -4,11 +4,7 @@ typedef int Arr[10];
 
 typedef int trungl_int;
 
-void f(int a[10], Arr arr) {  // \
-// expected-note {{declared here}} \
-// expected-note {{declared here}} \
-// expected-note {{declared here}} \
-// expected-note {{declared here}}
+void f(int a[10], Arr arr) { // expected-note 4 {{declared here}}
 
   /* Should warn. */
   (void)sizeof(a);  // \
diff --git a/test/Sema/warn-thread-safety-analysis.c b/test/Sema/warn-thread-safety-analysis.c
index 55e6e707f013..a0c4026b9136 100644
--- a/test/Sema/warn-thread-safety-analysis.c
+++ b/test/Sema/warn-thread-safety-analysis.c
@@ -81,7 +81,8 @@ int main() {
   mutex_shared_lock(&mu2);
   Foo_fun1(1);
 
-  mutex_shared_lock(&mu1); // expected-warning{{acquiring mutex 'mu1' that is already held}}
+  mutex_shared_lock(&mu1); // expected-warning{{acquiring mutex 'mu1' that is already held}} \
+                              expected-warning{{mutex 'mu1' must be acquired before 'mu2'}}
   mutex_unlock(&mu1);
   mutex_unlock(&mu2);
   mutex_shared_lock(&mu1);
diff --git a/test/Sema/zvector.c b/test/Sema/zvector.c
index 5220a629cfe1..d1cf1aa01f4b 100644
--- a/test/Sema/zvector.c
+++ b/test/Sema/zvector.c
@@ -309,14 +309,14 @@ void foo(void)
   // -------------------------------------------------------------------------
 
   sc = sc + sc2;
-  sc = sc + uc2; // expected-error {{can't convert}}
-  sc = uc + sc2; // expected-error {{can't convert}}
+  sc = sc + uc2; // expected-error {{cannot convert}}
+  sc = uc + sc2; // expected-error {{cannot convert}}
   sc = sc + bc2;
   sc = bc + sc2;
 
   uc = uc + uc2;
-  uc = sc + uc2; // expected-error {{can't convert}}
-  uc = uc + sc2; // expected-error {{can't convert}}
+  uc = sc + uc2; // expected-error {{cannot convert}}
+  uc = uc + sc2; // expected-error {{cannot convert}}
   uc = bc + uc2;
   uc = uc + bc2;
 
@@ -326,14 +326,14 @@ void foo(void)
   bc = bc + sc2; // expected-error {{incompatible type}}
   bc = sc + bc2; // expected-error {{incompatible type}}
 
-  sc = sc + sc_scalar; // expected-error {{can't convert}}
-  sc = sc + uc_scalar; // expected-error {{can't convert}}
-  sc = sc_scalar + sc; // expected-error {{can't convert}}
-  sc = uc_scalar + sc; // expected-error {{can't convert}}
-  uc = uc + sc_scalar; // expected-error {{can't convert}}
-  uc = uc + uc_scalar; // expected-error {{can't convert}}
-  uc = sc_scalar + uc; // expected-error {{can't convert}}
-  uc = uc_scalar + uc; // expected-error {{can't convert}}
+  sc = sc + sc_scalar; // expected-error {{cannot convert}}
+  sc = sc + uc_scalar; // expected-error {{cannot convert}}
+  sc = sc_scalar + sc; // expected-error {{cannot convert}}
+  sc = uc_scalar + sc; // expected-error {{cannot convert}}
+  uc = uc + sc_scalar; // expected-error {{cannot convert}}
+  uc = uc + uc_scalar; // expected-error {{cannot convert}}
+  uc = sc_scalar + uc; // expected-error {{cannot convert}}
+  uc = uc_scalar + uc; // expected-error {{cannot convert}}
 
   ss = ss + ss2;
   us = us + us2;
@@ -348,30 +348,30 @@ void foo(void)
   bl = bl + bl2; // expected-error {{invalid operands}}
 
   fd = fd + fd2;
-  fd = fd + ul2; // expected-error {{can't convert}}
-  fd = sl + fd2; // expected-error {{can't convert}}
+  fd = fd + ul2; // expected-error {{cannot convert}}
+  fd = sl + fd2; // expected-error {{cannot convert}}
 
   sc += sc2;
-  sc += uc2; // expected-error {{can't convert}}
+  sc += uc2; // expected-error {{cannot convert}}
   sc += bc2;
 
   uc += uc2;
-  uc += sc2; // expected-error {{can't convert}}
+  uc += sc2; // expected-error {{cannot convert}}
   uc += bc2;
 
   bc += bc2; // expected-error {{invalid operands}}
-  bc += sc2; // expected-error {{can't convert}}
-  bc += uc2; // expected-error {{can't convert}}
+  bc += sc2; // expected-error {{cannot convert}}
+  bc += uc2; // expected-error {{cannot convert}}
 
-  sc += ss2; // expected-error {{can't convert}}
-  sc += si2; // expected-error {{can't convert}}
-  sc += sl2; // expected-error {{can't convert}}
-  sc += fd2; // expected-error {{can't convert}}
+  sc += ss2; // expected-error {{cannot convert}}
+  sc += si2; // expected-error {{cannot convert}}
+  sc += sl2; // expected-error {{cannot convert}}
+  sc += fd2; // expected-error {{cannot convert}}
 
-  sc += sc_scalar; // expected-error {{can't convert}}
-  sc += uc_scalar; // expected-error {{can't convert}}
-  uc += sc_scalar; // expected-error {{can't convert}}
-  uc += uc_scalar; // expected-error {{can't convert}}
+  sc += sc_scalar; // expected-error {{cannot convert}}
+  sc += uc_scalar; // expected-error {{cannot convert}}
+  uc += sc_scalar; // expected-error {{cannot convert}}
+  uc += uc_scalar; // expected-error {{cannot convert}}
 
   ss += ss2;
   us += us2;
@@ -395,7 +395,7 @@ void foo(void)
   uc = uc - uc2;
   bc = bc - bc2; // expected-error {{invalid operands}}
 
-  sc = uc - sc2; // expected-error {{can't convert}}
+  sc = uc - sc2; // expected-error {{cannot convert}}
   sc = sc - bc2;
   uc = bc - uc2;
 
@@ -403,9 +403,9 @@ void foo(void)
   uc -= uc2;
   bc -= bc2; // expected-error {{invalid operands}}
 
-  sc -= uc2; // expected-error {{can't convert}}
+  sc -= uc2; // expected-error {{cannot convert}}
   uc -= bc2;
-  bc -= sc2; // expected-error {{can't convert}}
+  bc -= sc2; // expected-error {{cannot convert}}
 
   ss -= ss2;
   us -= us2;
@@ -430,17 +430,17 @@ void foo(void)
   uc = uc * uc2;
   bc = bc * bc2; // expected-error {{invalid operands}}
 
-  sc = uc * sc2; // expected-error {{can't convert}}
-  sc = sc * bc2; // expected-error {{can't convert}}
-  uc = bc * uc2; // expected-error {{can't convert}}
+  sc = uc * sc2; // expected-error {{cannot convert}}
+  sc = sc * bc2; // expected-error {{cannot convert}}
+  uc = bc * uc2; // expected-error {{cannot convert}}
 
   sc *= sc2;
   uc *= uc2;
   bc *= bc2; // expected-error {{invalid operands}}
 
-  sc *= uc2; // expected-error {{can't convert}}
-  uc *= bc2; // expected-error {{can't convert}}
-  bc *= sc2; // expected-error {{can't convert}}
+  sc *= uc2; // expected-error {{cannot convert}}
+  uc *= bc2; // expected-error {{cannot convert}}
+  bc *= sc2; // expected-error {{cannot convert}}
 
   ss *= ss2;
   us *= us2;
@@ -464,17 +464,17 @@ void foo(void)
   uc = uc / uc2;
   bc = bc / bc2; // expected-error {{invalid operands}}
 
-  sc = uc / sc2; // expected-error {{can't convert}}
-  sc = sc / bc2; // expected-error {{can't convert}}
-  uc = bc / uc2; // expected-error {{can't convert}}
+  sc = uc / sc2; // expected-error {{cannot convert}}
+  sc = sc / bc2; // expected-error {{cannot convert}}
+  uc = bc / uc2; // expected-error {{cannot convert}}
 
   sc /= sc2;
   uc /= uc2;
   bc /= bc2; // expected-error {{invalid operands}}
 
-  sc /= uc2; // expected-error {{can't convert}}
-  uc /= bc2; // expected-error {{can't convert}}
-  bc /= sc2; // expected-error {{can't convert}}
+  sc /= uc2; // expected-error {{cannot convert}}
+  uc /= bc2; // expected-error {{cannot convert}}
+  bc /= sc2; // expected-error {{cannot convert}}
 
   ss /= ss2;
   us /= us2;
@@ -498,17 +498,17 @@ void foo(void)
   uc = uc % uc2;
   bc = bc % bc2; // expected-error {{invalid operands}}
 
-  sc = uc % sc2; // expected-error {{can't convert}}
-  sc = sc % bc2; // expected-error {{can't convert}}
-  uc = bc % uc2; // expected-error {{can't convert}}
+  sc = uc % sc2; // expected-error {{cannot convert}}
+  sc = sc % bc2; // expected-error {{cannot convert}}
+  uc = bc % uc2; // expected-error {{cannot convert}}
 
   sc %= sc2;
   uc %= uc2;
   bc %= bc2; // expected-error {{invalid operands}}
 
-  sc %= uc2; // expected-error {{can't convert}}
-  uc %= bc2; // expected-error {{can't convert}}
-  bc %= sc2; // expected-error {{can't convert}}
+  sc %= uc2; // expected-error {{cannot convert}}
+  uc %= bc2; // expected-error {{cannot convert}}
+  bc %= sc2; // expected-error {{cannot convert}}
 
   ss %= ss2;
   us %= us2;
@@ -529,14 +529,14 @@ void foo(void)
   // -------------------------------------------------------------------------
 
   sc = sc & sc2;
-  sc = sc & uc2; // expected-error {{can't convert}}
-  sc = uc & sc2; // expected-error {{can't convert}}
+  sc = sc & uc2; // expected-error {{cannot convert}}
+  sc = uc & sc2; // expected-error {{cannot convert}}
   sc = sc & bc2;
   sc = bc & sc2;
 
   uc = uc & uc2;
-  uc = sc & uc2; // expected-error {{can't convert}}
-  uc = uc & sc2; // expected-error {{can't convert}}
+  uc = sc & uc2; // expected-error {{cannot convert}}
+  uc = uc & sc2; // expected-error {{cannot convert}}
   uc = bc & uc2;
   uc = uc & bc2;
 
@@ -553,25 +553,25 @@ void foo(void)
   fd = fd & ul2; // expected-error {{invalid operands}}
 
   sc &= sc2;
-  sc &= uc2; // expected-error {{can't convert}}
+  sc &= uc2; // expected-error {{cannot convert}}
   sc &= bc2;
 
   uc &= uc2;
-  uc &= sc2; // expected-error {{can't convert}}
+  uc &= sc2; // expected-error {{cannot convert}}
   uc &= bc2;
 
   bc &= bc2;
-  bc &= sc2; // expected-error {{can't convert}}
-  bc &= uc2; // expected-error {{can't convert}}
+  bc &= sc2; // expected-error {{cannot convert}}
+  bc &= uc2; // expected-error {{cannot convert}}
 
-  sc &= ss2; // expected-error {{can't convert}}
-  sc &= si2; // expected-error {{can't convert}}
-  sc &= sl2; // expected-error {{can't convert}}
+  sc &= ss2; // expected-error {{cannot convert}}
+  sc &= si2; // expected-error {{cannot convert}}
+  sc &= sl2; // expected-error {{cannot convert}}
   sc &= fd2; // expected-error {{invalid operands}}
 
-  us &= bc2; // expected-error {{can't convert}}
-  ui &= bc2; // expected-error {{can't convert}}
-  ul &= bc2; // expected-error {{can't convert}}
+  us &= bc2; // expected-error {{cannot convert}}
+  ui &= bc2; // expected-error {{cannot convert}}
+  ul &= bc2; // expected-error {{cannot convert}}
   fd &= bc2; // expected-error {{invalid operands}}
 
   ss &= ss2;
@@ -591,11 +591,11 @@ void foo(void)
   // -------------------------------------------------------------------------
 
   sc = sc | sc2;
-  sc = sc | uc2; // expected-error {{can't convert}}
+  sc = sc | uc2; // expected-error {{cannot convert}}
   sc = sc | bc2;
 
   uc = uc | uc2;
-  uc = sc | uc2; // expected-error {{can't convert}}
+  uc = sc | uc2; // expected-error {{cannot convert}}
   uc = bc | uc2;
 
   bc = bc | bc2;
@@ -625,11 +625,11 @@ void foo(void)
   // -------------------------------------------------------------------------
 
   sc = sc ^ sc2;
-  sc = sc ^ uc2; // expected-error {{can't convert}}
+  sc = sc ^ uc2; // expected-error {{cannot convert}}
   sc = sc ^ bc2;
 
   uc = uc ^ uc2;
-  uc = sc ^ uc2; // expected-error {{can't convert}}
+  uc = sc ^ uc2; // expected-error {{cannot convert}}
   uc = bc ^ uc2;
 
   bc = bc ^ bc2;
@@ -862,10 +862,10 @@ void foo(void)
   (void)(uc == uc2);
   (void)(bc == bc2);
 
-  (void)(sc == uc); // expected-error {{can't convert}}
+  (void)(sc == uc); // expected-error {{cannot convert}}
   (void)(sc == bc);
 
-  (void)(uc == sc); // expected-error {{can't convert}}
+  (void)(uc == sc); // expected-error {{cannot convert}}
   (void)(uc == bc);
 
   (void)(bc == sc);
@@ -884,8 +884,8 @@ void foo(void)
   (void)(bl == bl2);
   (void)(fd == fd2);
 
-  (void)(fd == ul); // expected-error {{can't convert}}
-  (void)(ul == fd); // expected-error {{can't convert}}
+  (void)(fd == ul); // expected-error {{cannot convert}}
+  (void)(ul == fd); // expected-error {{cannot convert}}
 
   // -------------------------------------------------------------------------
   // Test that == rules apply to != too.
@@ -895,7 +895,7 @@ void foo(void)
   (void)(uc != uc2);
   (void)(bc != bc2);
 
-  (void)(sc != uc); // expected-error {{can't convert}}
+  (void)(sc != uc); // expected-error {{cannot convert}}
   (void)(sc != bc);
 
   (void)(ss != ss2);
@@ -919,7 +919,7 @@ void foo(void)
   (void)(uc <= uc2);
   (void)(bc <= bc2);
 
-  (void)(sc <= uc); // expected-error {{can't convert}}
+  (void)(sc <= uc); // expected-error {{cannot convert}}
   (void)(sc <= bc);
 
   (void)(ss <= ss2);
@@ -943,7 +943,7 @@ void foo(void)
   (void)(uc >= uc2);
   (void)(bc >= bc2);
 
-  (void)(sc >= uc); // expected-error {{can't convert}}
+  (void)(sc >= uc); // expected-error {{cannot convert}}
   (void)(sc >= bc);
 
   (void)(ss >= ss2);
@@ -967,7 +967,7 @@ void foo(void)
   (void)(uc < uc2);
   (void)(bc < bc2);
 
-  (void)(sc < uc); // expected-error {{can't convert}}
+  (void)(sc < uc); // expected-error {{cannot convert}}
   (void)(sc < bc);
 
   (void)(ss < ss2);
@@ -991,7 +991,7 @@ void foo(void)
   (void)(uc > uc2);
   (void)(bc > bc2);
 
-  (void)(sc > uc); // expected-error {{can't convert}}
+  (void)(sc > uc); // expected-error {{cannot convert}}
   (void)(sc > bc);
 
   (void)(ss > ss2);
diff --git a/test/SemaCUDA/asm-constraints-mixed.cu b/test/SemaCUDA/asm-constraints-mixed.cu
index a4ac9c65c99f..a3b1e1a08c51 100644
--- a/test/SemaCUDA/asm-constraints-mixed.cu
+++ b/test/SemaCUDA/asm-constraints-mixed.cu
@@ -1,15 +1,39 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fsyntax-only -fcuda-is-device -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// expected-no-diagnostics
+
+__attribute__((device)) register long global_dev_reg asm("r0");
+__attribute__((device)) register long
+    global_dev_hreg asm("rsp"); // device-side error
+
+register long global_host_reg asm("rsp");
+register long global_host_dreg asm("r0"); // host-side error
 
 __attribute__((device)) void df() {
+  register long local_dev_reg asm("r0");
+  register long local_host_reg asm("rsp"); // device-side error
   short h;
   // asm with PTX constraints. Some of them are PTX-specific.
-  __asm__("dont care" : "=h"(h): "f"(0.0), "d"(0.0), "h"(0), "r"(0), "l"(0));
+  __asm__("dont care" : "=h"(h) : "f"(0.0), "d"(0.0), "h"(0), "r"(0), "l"(0));
 }
 
 void hf() {
+  register long local_dev_reg asm("r0"); // host-side error
+  register long local_host_reg asm("rsp");
   int a;
-  // Asm with x86 constraints that are not supported by PTX.
-  __asm__("dont care" : "=a"(a): "a"(0), "b"(0), "c"(0));
+  // Asm with x86 constraints and registers that are not supported by PTX.
+  __asm__("dont care" : "=a"(a) : "a"(0), "b"(0), "c"(0) : "flags");
 }
+
+// Check errors in named register variables.
+// We should only see errors relevant to current compilation mode.
+#if defined(__CUDA_ARCH__)
+// Device-side compilation:
+// expected-error@8 {{unknown register name 'rsp' in asm}}
+// expected-error@15 {{unknown register name 'rsp' in asm}}
+#else
+// Host-side compilation:
+// expected-error@11 {{unknown register name 'r0' in asm}}
+// expected-error@22 {{unknown register name 'r0' in asm}}
+#endif
diff --git a/test/SemaCUDA/attributes.cu b/test/SemaCUDA/attributes.cu
new file mode 100644
index 000000000000..ce4dc925a3f3
--- /dev/null
+++ b/test/SemaCUDA/attributes.cu
@@ -0,0 +1,33 @@
+// Tests handling of CUDA attributes.
+//
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// Now pretend that we're compiling a C file. There should be warnings.
+// RUN: %clang_cc1 -DEXPECT_WARNINGS -fsyntax-only -verify -x c %s
+
+#if defined(EXPECT_WARNINGS)
+// expected-warning@+12 {{'device' attribute ignored}}
+// expected-warning@+12 {{'global' attribute ignored}}
+// expected-warning@+12 {{'constant' attribute ignored}}
+// expected-warning@+12 {{'shared' attribute ignored}}
+// expected-warning@+12 {{'host' attribute ignored}}
+//
+// NOTE: IgnoredAttr in clang which is used for the rest of
+// attributes ignores LangOpts, so there are no warnings.
+#else
+// expected-no-diagnostics
+#endif
+
+__attribute__((device)) void f_device();
+__attribute__((global)) void f_global();
+__attribute__((constant)) int* g_constant;
+__attribute__((shared)) float *g_shared;
+__attribute__((host)) void f_host();
+__attribute__((device_builtin)) void f_device_builtin();
+typedef __attribute__((device_builtin)) const void *t_device_builtin;
+enum __attribute__((device_builtin)) e_device_builtin {E};
+__attribute__((device_builtin)) int v_device_builtin;
+__attribute__((cudart_builtin)) void f_cudart_builtin();
+__attribute__((nv_weak)) void f_nv_weak();
+__attribute__((device_builtin_surface_type)) unsigned long long surface_var;
+__attribute__((device_builtin_texture_type)) unsigned long long texture_var;
diff --git a/test/SemaCUDA/builtins.cu b/test/SemaCUDA/builtins.cu
new file mode 100644
index 000000000000..32b575862cfe
--- /dev/null
+++ b/test/SemaCUDA/builtins.cu
@@ -0,0 +1,31 @@
+// Tests that host and target builtins can be used in the same TU,
+// have appropriate host/device attributes and that CUDA call
+// restrictions are enforced. Also verify that non-target builtins can
+// be used from both host and device functions.
+//
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown \
+// RUN:     -aux-triple nvptx64-unknown-cuda \
+// RUN:     -fcuda-target-overloads -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
+// RUN:     -aux-triple x86_64-unknown-unknown \
+// RUN:     -fcuda-target-overloads -fsyntax-only -verify %s
+
+#if !(defined(__amd64__) && defined(__PTX__))
+#error "Expected to see preprocessor macros from both sides of compilation."
+#endif
+
+void hf() {
+  int x = __builtin_ia32_rdtsc();
+  int y = __builtin_ptx_read_tid_x(); // expected-note  {{'__builtin_ptx_read_tid_x' declared here}}
+  // expected-error@-1 {{reference to __device__ function '__builtin_ptx_read_tid_x' in __host__ function}}
+  x = __builtin_abs(1);
+}
+
+__attribute__((device)) void df() {
+  int x = __builtin_ptx_read_tid_x();
+  int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
+                                  // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
+  x = __builtin_abs(1);
+}
diff --git a/test/SemaCUDA/function-overload.cu b/test/SemaCUDA/function-overload.cu
new file mode 100644
index 000000000000..bd3fb508bfab
--- /dev/null
+++ b/test/SemaCUDA/function-overload.cu
@@ -0,0 +1,317 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// Make sure we handle target overloads correctly.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu \
+// RUN:    -fsyntax-only -fcuda-target-overloads -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda \
+// RUN:    -fsyntax-only -fcuda-target-overloads -fcuda-is-device -verify %s
+
+// Check target overloads handling with disabled call target checks.
+// RUN: %clang_cc1 -DNOCHECKS -triple x86_64-unknown-linux-gnu -fsyntax-only \
+// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads -verify %s
+// RUN: %clang_cc1 -DNOCHECKS -triple nvptx64-nvidia-cuda -fsyntax-only \
+// RUN:    -fcuda-disable-target-call-checks -fcuda-target-overloads \
+// RUN:    -fcuda-is-device -verify %s
+
+#include "Inputs/cuda.h"
+
+typedef int (*fp_t)(void);
+typedef void (*gp_t)(void);
+
+// Host and unattributed functions can't be overloaded
+__host__ int hh(void) { return 1; } // expected-note {{previous definition is here}}
+int hh(void) { return 1; } // expected-error {{redefinition of 'hh'}}
+
+// H/D overloading is OK
+__host__ int dh(void) { return 2; }
+__device__ int dh(void) { return 2; }
+
+// H/HD and D/HD are not allowed
+__host__ __device__ int hdh(void) { return 5; } // expected-note {{previous definition is here}}
+__host__ int hdh(void) { return 4; } // expected-error {{redefinition of 'hdh'}}
+
+__host__ int hhd(void) { return 4; } // expected-note {{previous definition is here}}
+__host__ __device__ int hhd(void) { return 5; } // expected-error {{redefinition of 'hhd'}}
+// expected-warning@-1 {{attribute declaration must precede definition}}
+// expected-note@-3 {{previous definition is here}}
+
+__host__ __device__ int hdd(void) { return 7; } // expected-note {{previous definition is here}}
+__device__ int hdd(void) { return 6; } // expected-error {{redefinition of 'hdd'}}
+
+__device__ int dhd(void) { return 6; } // expected-note {{previous definition is here}}
+__host__ __device__ int dhd(void) { return 7; } // expected-error {{redefinition of 'dhd'}}
+// expected-warning@-1 {{attribute declaration must precede definition}}
+// expected-note@-3 {{previous definition is here}}
+
+// Same tests for extern "C" functions
+extern "C" __host__ int chh(void) {return 11;} // expected-note {{previous definition is here}}
+extern "C" int chh(void) {return 11;} // expected-error {{redefinition of 'chh'}}
+
+// H/D overloading is OK
+extern "C" __device__ int cdh(void) {return 10;}
+extern "C" __host__ int cdh(void) {return 11;}
+
+// H/HD and D/HD overloading is not allowed.
+extern "C" __host__ __device__ int chhd1(void) {return 12;} // expected-note {{previous definition is here}}
+extern "C" __host__ int chhd1(void) {return 13;} // expected-error {{redefinition of 'chhd1'}}
+
+extern "C" __host__ int chhd2(void) {return 13;} // expected-note {{previous definition is here}}
+extern "C" __host__ __device__ int chhd2(void) {return 12;} // expected-error {{redefinition of 'chhd2'}}
+// expected-warning@-1 {{attribute declaration must precede definition}}
+// expected-note@-3 {{previous definition is here}}
+
+// Helper functions to verify calling restrictions.
+__device__ int d(void) { return 8; }
+__host__ int h(void) { return 9; }
+__global__ void g(void) {}
+extern "C" __device__ int cd(void) {return 10;}
+extern "C" __host__ int ch(void) {return 11;}
+
+__host__ void hostf(void) {
+  fp_t dp = d;
+  fp_t cdp = cd;
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{reference to __device__ function 'd' in __host__ function}}
+  // expected-note@65 {{'d' declared here}}
+  // expected-error@-4 {{reference to __device__ function 'cd' in __host__ function}}
+  // expected-note@68 {{'cd' declared here}}
+#endif
+  fp_t hp = h;
+  fp_t chp = ch;
+  fp_t dhp = dh;
+  fp_t cdhp = cdh;
+  gp_t gp = g;
+
+  d();
+  cd();
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{no matching function for call to 'd'}}
+  // expected-note@65 {{candidate function not viable: call to __device__ function from __host__ function}}
+  // expected-error@-4 {{no matching function for call to 'cd'}}
+  // expected-note@68 {{candidate function not viable: call to __device__ function from __host__ function}}
+#endif
+  h();
+  ch();
+  dh();
+  cdh();
+  g(); // expected-error {{call to global function g not configured}}
+  g<<<0,0>>>();
+}
+
+
+__device__ void devicef(void) {
+  fp_t dp = d;
+  fp_t cdp = cd;
+  fp_t hp = h;
+  fp_t chp = ch;
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{reference to __host__ function 'h' in __device__ function}}
+  // expected-note@66 {{'h' declared here}}
+  // expected-error@-4 {{reference to __host__ function 'ch' in __device__ function}}
+  // expected-note@69 {{'ch' declared here}}
+#endif
+  fp_t dhp = dh;
+  fp_t cdhp = cdh;
+  gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
+               // expected-note@67 {{'g' declared here}}
+
+  d();
+  cd();
+  h();
+  ch();
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{no matching function for call to 'h'}}
+  // expected-note@66 {{candidate function not viable: call to __host__ function from __device__ function}}
+  // expected-error@-4 {{no matching function for call to 'ch'}}
+  // expected-note@69 {{candidate function not viable: call to __host__ function from __device__ function}}
+#endif
+  dh();
+  cdh();
+  g(); // expected-error {{no matching function for call to 'g'}}
+  // expected-note@67 {{candidate function not viable: call to __global__ function from __device__ function}}
+  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  // expected-note@67 {{'g' declared here}}
+}
+
+__global__ void globalf(void) {
+  fp_t dp = d;
+  fp_t cdp = cd;
+  fp_t hp = h;
+  fp_t chp = ch;
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{reference to __host__ function 'h' in __global__ function}}
+  // expected-note@66 {{'h' declared here}}
+  // expected-error@-4 {{reference to __host__ function 'ch' in __global__ function}}
+  // expected-note@69 {{'ch' declared here}}
+#endif
+  fp_t dhp = dh;
+  fp_t cdhp = cdh;
+  gp_t gp = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
+               // expected-note@67 {{'g' declared here}}
+
+  d();
+  cd();
+  h();
+  ch();
+#if !defined(NOCHECKS)
+  // expected-error@-3 {{no matching function for call to 'h'}}
+  // expected-note@66 {{candidate function not viable: call to __host__ function from __global__ function}}
+  // expected-error@-4 {{no matching function for call to 'ch'}}
+  // expected-note@69 {{candidate function not viable: call to __host__ function from __global__ function}}
+#endif
+  dh();
+  cdh();
+  g(); // expected-error {{no matching function for call to 'g'}}
+  // expected-note@67 {{candidate function not viable: call to __global__ function from __global__ function}}
+  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  // expected-note@67 {{'g' declared here}}
+}
+
+__host__ __device__ void hostdevicef(void) {
+  fp_t dp = d;
+  fp_t cdp = cd;
+  fp_t hp = h;
+  fp_t chp = ch;
+#if !defined(NOCHECKS)
+#if !defined(__CUDA_ARCH__)
+  // expected-error@-6 {{reference to __device__ function 'd' in __host__ __device__ function}}
+  // expected-note@65 {{'d' declared here}}
+  // expected-error@-7 {{reference to __device__ function 'cd' in __host__ __device__ function}}
+  // expected-note@68 {{'cd' declared here}}
+#else
+  // expected-error@-9 {{reference to __host__ function 'h' in __host__ __device__ function}}
+  // expected-note@66 {{'h' declared here}}
+  // expected-error@-10 {{reference to __host__ function 'ch' in __host__ __device__ function}}
+  // expected-note@69 {{'ch' declared here}}
+#endif
+#endif
+  fp_t dhp = dh;
+  fp_t cdhp = cdh;
+  gp_t gp = g;
+#if defined(__CUDA_ARCH__)
+  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
+  // expected-note@67 {{'g' declared here}}
+#endif
+
+  d();
+  cd();
+  h();
+  ch();
+#if !defined(NOCHECKS)
+#if !defined(__CUDA_ARCH__)
+  // expected-error@-6 {{no matching function for call to 'd'}}
+  // expected-note@65 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+  // expected-error@-7 {{no matching function for call to 'cd'}}
+  // expected-note@68 {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
+#else
+  // expected-error@-9 {{no matching function for call to 'h'}}
+  // expected-note@66 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+  // expected-error@-10 {{no matching function for call to 'ch'}}
+  // expected-note@69 {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
+#endif
+#endif
+
+  dh();
+  cdh();
+  g();
+  g<<<0,0>>>();
+#if !defined(__CUDA_ARCH__)
+  // expected-error@-3 {{call to global function g not configured}}
+#else
+  // expected-error@-5 {{no matching function for call to 'g'}}
+  // expected-note@67 {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
+  // expected-error@-6 {{reference to __global__ function 'g' in __host__ __device__ function}}
+  // expected-note@67 {{'g' declared here}}
+#endif  // __CUDA_ARCH__
+}
+
+// Test for address of overloaded function resolution in the global context.
+fp_t hp = h;
+fp_t chp = ch;
+fp_t dhp = dh;
+fp_t cdhp = cdh;
+gp_t gp = g;
+
+
+// Test overloading of destructors
+// Can't mix H and unattributed destructors
+struct d_h {
+  ~d_h() {} // expected-note {{previous declaration is here}}
+  __host__ ~d_h() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+// H/D overloading is OK
+struct d_dh {
+  __device__ ~d_dh() {}
+  __host__ ~d_dh() {}
+};
+
+// HD is OK
+struct d_hd {
+  __host__ __device__ ~d_hd() {}
+};
+
+// Mixing H/D and HD is not allowed.
+struct d_dhhd {
+  __device__ ~d_dhhd() {}
+  __host__ ~d_dhhd() {} // expected-note {{previous declaration is here}}
+  __host__ __device__ ~d_dhhd() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+struct d_hhd {
+  __host__ ~d_hhd() {} // expected-note {{previous declaration is here}}
+  __host__ __device__ ~d_hhd() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+struct d_hdh {
+  __host__ __device__ ~d_hdh() {} // expected-note {{previous declaration is here}}
+  __host__ ~d_hdh() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+struct d_dhd {
+  __device__ ~d_dhd() {} // expected-note {{previous declaration is here}}
+  __host__ __device__ ~d_dhd() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+struct d_hdd {
+  __host__ __device__ ~d_hdd() {} // expected-note {{previous declaration is here}}
+  __device__ ~d_hdd() {} // expected-error {{destructor cannot be redeclared}}
+};
+
+// Test overloading of member functions
+struct m_h {
+  void operator delete(void *ptr); // expected-note {{previous declaration is here}}
+  __host__ void operator delete(void *ptr); // expected-error {{class member cannot be redeclared}}
+};
+
+// D/H overloading is OK
+struct m_dh {
+  __device__ void operator delete(void *ptr);
+  __host__ void operator delete(void *ptr);
+};
+
+// HD by itself is OK
+struct m_hd {
+  __device__ __host__ void operator delete(void *ptr);
+};
+
+struct m_hhd {
+  __host__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
+  __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
+};
+
+struct m_hdh {
+  __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
+  __host__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
+};
+
+struct m_dhd {
+  __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
+  __host__ __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
+};
+
+struct m_hdd {
+  __host__ __device__ void operator delete(void *ptr) {} // expected-note {{previous declaration is here}}
+  __device__ void operator delete(void *ptr) {} // expected-error {{class member cannot be redeclared}}
+};
diff --git a/test/SemaCUDA/function-target-hd.cu b/test/SemaCUDA/function-target-hd.cu
index 25fcc6e9188f..685f4f9cda62 100644
--- a/test/SemaCUDA/function-target-hd.cu
+++ b/test/SemaCUDA/function-target-hd.cu
@@ -8,9 +8,9 @@
 // host device functions are not allowed to call device functions.
 
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -verify %s
 // RUN: %clang_cc1 -fsyntax-only -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -triple nvptx-unknown-cuda -fcuda-allow-host-calls-from-host-device -verify %s -DTEST_WARN_HD
 
 #include "Inputs/cuda.h"
 
diff --git a/test/SemaCUDA/implicit-intrinsic.cu b/test/SemaCUDA/implicit-intrinsic.cu
index 3d24aa719e57..0793d64b1017 100644
--- a/test/SemaCUDA/implicit-intrinsic.cu
+++ b/test/SemaCUDA/implicit-intrinsic.cu
@@ -1,10 +1,13 @@
-// RUN: %clang_cc1 -std=gnu++11 -triple nvptx64-unknown-unknown -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device \
+// RUN:     -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -fcuda-is-device \
+// RUN:     -fcuda-target-overloads -fsyntax-only -verify %s
 
 #include "Inputs/cuda.h"
 
 // expected-no-diagnostics
 __device__ void __threadfence_system() {
-  // This shouldn't produce an error, since __nvvm_membar_sys is inferred to
-  // be __host__ __device__ and thus callable from device code.
+  // This shouldn't produce an error, since __nvvm_membar_sys should be
+  // __device__ and thus callable from device code.
   __nvvm_membar_sys();
 }
diff --git a/test/SemaCXX/MicrosoftCompatibility-cxx98.cpp b/test/SemaCXX/MicrosoftCompatibility-cxx98.cpp
index bfda837e9590..3b80d0937e23 100644
--- a/test/SemaCXX/MicrosoftCompatibility-cxx98.cpp
+++ b/test/SemaCXX/MicrosoftCompatibility-cxx98.cpp
@@ -12,3 +12,12 @@ void (*PR23733_1)() = static_cast<FnPtrTy>((void *)0); // expected-warning {{sta
 void (*PR23733_2)() = FnPtrTy((void *)0);
 void (*PR23733_3)() = (FnPtrTy)((void *)0);
 void (*PR23733_4)() = reinterpret_cast<FnPtrTy>((void *)0);
+
+long function_prototype(int a);
+long (*function_ptr)(int a);
+
+void function_to_voidptr_conv() {
+  void *a1 = function_prototype;  // expected-warning {{implicit conversion between pointer-to-function and pointer-to-object is a Microsoft extension}}
+  void *a2 = &function_prototype; // expected-warning {{implicit conversion between pointer-to-function and pointer-to-object is a Microsoft extension}}
+  void *a3 = function_ptr;        // expected-warning {{implicit conversion between pointer-to-function and pointer-to-object is a Microsoft extension}}
+}
diff --git a/test/SemaCXX/MicrosoftCompatibility.cpp b/test/SemaCXX/MicrosoftCompatibility.cpp
index 1536007a6478..fe8e8f77accb 100644
--- a/test/SemaCXX/MicrosoftCompatibility.cpp
+++ b/test/SemaCXX/MicrosoftCompatibility.cpp
@@ -9,11 +9,18 @@ typedef unsigned short char16_t;
 typedef unsigned int char32_t;
 #endif
 
-#if _MSC_VER >= 1900
 _Atomic(int) z;
-#else
-struct _Atomic {};
-#endif
+template <typename T>
+struct _Atomic {
+  _Atomic() {}
+  ~_Atomic() {}
+};
+template <typename T>
+struct atomic : _Atomic<T> {
+  typedef _Atomic<T> TheBase;
+  TheBase field;
+};
+_Atomic(int) alpha;
 
 typename decltype(3) a; // expected-warning {{expected a qualified name after 'typename'}}
 
diff --git a/test/SemaCXX/MicrosoftExtensions.cpp b/test/SemaCXX/MicrosoftExtensions.cpp
index db5e4586daf5..22cf2be7c1ac 100644
--- a/test/SemaCXX/MicrosoftExtensions.cpp
+++ b/test/SemaCXX/MicrosoftExtensions.cpp
@@ -153,16 +153,6 @@ static void static_func() // expected-warning {{redeclaring non-static 'static_f
 extern const int static_var; // expected-note {{previous declaration is here}}
 static const int static_var = 3; // expected-warning {{redeclaring non-static 'static_var' as static is a Microsoft extension}}
 
-long function_prototype(int a);
-long (*function_ptr)(int a);
-
-void function_to_voidptr_conv() {
-   void *a1 = function_prototype;
-   void *a2 = &function_prototype;
-   void *a3 = function_ptr;
-}
-
-
 void pointer_to_integral_type_conv(char* ptr) {
    char ch = (char)ptr;
    short sh = (short)ptr;
@@ -412,3 +402,21 @@ void AfterClassBody() {
   _Static_assert(__alignof(s1) == 8, "");
   _Static_assert(__alignof(s2) == 4, "");
 }
+
+namespace PR24246 {
+template <typename TX> struct A {
+  template <bool> struct largest_type_select;
+  // expected-warning@+1 {{explicit specialization of 'largest_type_select' within class scope is a Microsoft extension}}
+  template <> struct largest_type_select<false> {
+    blah x;  // expected-error {{unknown type name 'blah'}}
+  };
+};
+}
+
+namespace PR25265 {
+struct S {
+  int fn() throw(); // expected-note {{previous declaration is here}}
+};
+
+int S::fn() { return 0; } // expected-warning {{is missing exception specification}}
+}
diff --git a/test/SemaCXX/PR16677.cpp b/test/SemaCXX/PR16677.cpp
new file mode 100644
index 000000000000..7140ac79f089
--- /dev/null
+++ b/test/SemaCXX/PR16677.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
+
+class Class_With_Destructor {
+  ~Class_With_Destructor() { }
+};
+
+template <class T>
+class Base { };
+
+template<class T,  // Should be angle bracket instead of comma
+class Derived : public Base<T> { // expected-error{{'Derived' cannot be defined in a type specifier}}
+  Class_With_Destructor member;
+}; // expected-error{{a non-type template parameter cannot have type 'class Derived'}}
+   // expected-error@-1{{expected ',' or '>' in template-parameter-list}}
+   // expected-warning@-2{{declaration does not declare anything}}
+
diff --git a/test/SemaCXX/PR20334-std_initializer_list_diagnosis_assertion.cpp b/test/SemaCXX/PR20334-std_initializer_list_diagnosis_assertion.cpp
new file mode 100644
index 000000000000..ec672089b84a
--- /dev/null
+++ b/test/SemaCXX/PR20334-std_initializer_list_diagnosis_assertion.cpp
@@ -0,0 +1,20 @@
+// RUN: %clang_cc1 -std=c++11 -verify -emit-llvm-only %s
+// RUN: %clang_cc1 -std=c++98 -fsyntax-only -verify %s -DCPP98
+
+namespace std {
+  template <class _E>
+  class initializer_list
+  {};
+}
+
+template<class E> int f(std::initializer_list<E> il);
+	
+
+int F = f({1, 2, 3});
+#ifdef CPP98
+//expected-error@-2{{expected expression}}
+#else
+//expected-error@-4{{cannot compile}}
+#endif
+
+
diff --git a/test/SemaCXX/abstract.cpp b/test/SemaCXX/abstract.cpp
index b521196c23b6..ffd36be5b797 100644
--- a/test/SemaCXX/abstract.cpp
+++ b/test/SemaCXX/abstract.cpp
@@ -8,6 +8,10 @@
   typedef int __CONCAT(__sa, __LINE__)[__b ? 1 : -1]
 #endif
 
+union IncompleteUnion;
+
+static_assert(!__is_abstract(IncompleteUnion), "unions are never abstract");
+
 class C {
   virtual void f() = 0; // expected-note {{unimplemented pure virtual method 'f'}}
 };
diff --git a/test/SemaCXX/addr-of-overloaded-function.cpp b/test/SemaCXX/addr-of-overloaded-function.cpp
index 6d055037735e..cca847b4d2c3 100644
--- a/test/SemaCXX/addr-of-overloaded-function.cpp
+++ b/test/SemaCXX/addr-of-overloaded-function.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s 
 int f(double); // expected-note{{candidate function}}
 int f(int); // expected-note{{candidate function}}
 
@@ -79,7 +81,10 @@ struct C {
   void q3(); // expected-note{{possible target for call}}
   template<typename T1, typename T2>
   void q4(); // expected-note{{possible target for call}}
-  template<typename T1 = int> // expected-warning{{default template arguments for a function template are a C++11 extension}}
+  template<typename T1 = int>
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{default template arguments for a function template are a C++11 extension}}
+#endif
   void q5(); // expected-note{{possible target for call}}
 
   void h() {
@@ -125,13 +130,9 @@ namespace PR7971 {
 }
 
 namespace PR8033 {
-  template <typename T1, typename T2> int f(T1 *, const T2 *); // expected-note {{candidate function [with T1 = const int, T2 = int]}} \
-  // expected-note{{candidate function}}
-  template <typename T1, typename T2> int f(const T1 *, T2 *); // expected-note {{candidate function [with T1 = int, T2 = const int]}} \
-  // expected-note{{candidate function}}
-  int (*p)(const int *, const int *) = f; // expected-error{{address of overloaded function 'f' is ambiguous}} \
-  // expected-error{{address of overloaded function 'f' is ambiguous}}
-
+  template <typename T1, typename T2> int f(T1 *, const T2 *); // expected-note {{candidate function [with T1 = const int, T2 = int]}}
+  template <typename T1, typename T2> int f(const T1 *, T2 *); // expected-note {{candidate function [with T1 = int, T2 = const int]}}
+  int (*p)(const int *, const int *) = f; // expected-error{{address of overloaded function 'f' is ambiguous}}
 }
 
 namespace PR8196 {
diff --git a/test/SemaCXX/alias-template.cpp b/test/SemaCXX/alias-template.cpp
index d5eb27a66132..bcfe428c69dd 100644
--- a/test/SemaCXX/alias-template.cpp
+++ b/test/SemaCXX/alias-template.cpp
@@ -168,3 +168,14 @@ namespace SFINAE {
   fail1<int> f1; // expected-note {{here}}
   fail2<E> f2; // expected-note {{here}}
 }
+
+namespace PR24212 {
+struct X {};
+template <int I>
+struct S {
+  template <int J>
+  using T = X[J];
+  using U = T<I>;
+};
+static_assert(__is_same(S<3>::U, X[2]), ""); // expected-error {{static_assert failed}}
+}
diff --git a/test/SemaCXX/ast-print.cpp b/test/SemaCXX/ast-print.cpp
index 1b57406a64f1..39a52ab8d7e8 100644
--- a/test/SemaCXX/ast-print.cpp
+++ b/test/SemaCXX/ast-print.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ast-print %s -std=gnu++11 | FileCheck %s
+// RUN: %clang_cc1 -triple %ms_abi_triple -ast-print %s -std=gnu++11 | FileCheck %s
 
 // CHECK: r;
 // CHECK-NEXT: (r->method());
@@ -66,7 +66,7 @@ template <class S> void test7()
 template <typename T> void test8(T t) { t.~T(); }
 
 
-// CHECK:      enum E {
+// CHECK:      enum E
 // CHECK-NEXT:  A,
 // CHECK-NEXT:  B,
 // CHECK-NEXT:  C
@@ -219,3 +219,11 @@ struct CXXFunctionalCastExprPrint {} fce = CXXFunctionalCastExprPrint{};
 
 // CHECK: struct CXXTemporaryObjectExprPrint toe = CXXTemporaryObjectExprPrint{};
 struct CXXTemporaryObjectExprPrint { CXXTemporaryObjectExprPrint(); } toe = CXXTemporaryObjectExprPrint{};
+
+namespace PR24872 {
+// CHECK: template <typename T> struct Foo : T {
+// CHECK: using T::operator-;
+template <typename T> struct Foo : T {
+  using T::operator-;
+};
+}
diff --git a/test/SemaCXX/attr-deprecated.cpp b/test/SemaCXX/attr-deprecated.cpp
index d28eb75cca65..eab5a1c0ec08 100644
--- a/test/SemaCXX/attr-deprecated.cpp
+++ b/test/SemaCXX/attr-deprecated.cpp
@@ -26,12 +26,12 @@ void A::h(A* a)
 }
 
 struct B {
-  virtual void f() __attribute__((deprecated)); // expected-note 4 {{'f' has been explicitly marked deprecated here}}
+  virtual void f() __attribute__((deprecated)); // expected-note 6 {{'f' has been explicitly marked deprecated here}}
   void g();
 };
 
 void B::g() {
-  f();
+  f(); // expected-warning{{'f' is deprecated}}
   B::f(); // expected-warning{{'f' is deprecated}}
 }
 
@@ -47,7 +47,7 @@ void C::g() {
 }
 
 void f(B* b, C *c) {
-  b->f();
+  b->f(); // expected-warning{{'f' is deprecated}}
   b->B::f(); // expected-warning{{'f' is deprecated}}
   
   c->f();
@@ -57,12 +57,18 @@ void f(B* b, C *c) {
 
 struct D {
   virtual void f() __attribute__((deprecated));
+  virtual void f(int) __attribute__((deprecated));
+  virtual void f(int, int) __attribute__((deprecated));
 };
 
-void D::f() { }
+void D::f() { } // expected-note{{'f' has been explicitly marked deprecated here}}
+void D::f(int v) { } // expected-note{{'f' has been explicitly marked deprecated here}}
+void D::f(int v1, int v2) { } // expected-note{{'f' has been explicitly marked deprecated here}}
 
 void f(D* d) {
-  d->f();
+  d->f(); // expected-warning{{'f' is deprecated}}
+  d->f(42); // expected-warning{{'f' is deprecated}}
+  d->f(42, 24); // expected-warning{{'f' is deprecated}}
 }
 
 
diff --git a/test/SemaCXX/attr-disable-tail-calls.cpp b/test/SemaCXX/attr-disable-tail-calls.cpp
new file mode 100644
index 000000000000..d442aa6d4409
--- /dev/null
+++ b/test/SemaCXX/attr-disable-tail-calls.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+class B {
+public:
+  [[clang::disable_tail_calls]] virtual int foo1() { return 1; }
+  [[clang::disable_tail_calls]] int foo2() { return 2; }
+};
diff --git a/test/SemaCXX/attr-no-sanitize-address.cpp b/test/SemaCXX/attr-no-sanitize-address.cpp
index 9ca28630552b..0aafe06af980 100644
--- a/test/SemaCXX/attr-no-sanitize-address.cpp
+++ b/test/SemaCXX/attr-no-sanitize-address.cpp
@@ -5,12 +5,14 @@
 #if !__has_attribute(no_sanitize_address)
 #error "Should support no_sanitize_address"
 #endif
-
-void noanal_fun() NO_SANITIZE_ADDRESS;
-
-void noanal_fun_args() __attribute__((no_sanitize_address(1))); // \
-  // expected-error {{'no_sanitize_address' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_ADDRESS;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_address__));
+
+void noanal_fun_args() __attribute__((no_sanitize_address(1))); // \
+  // expected-error {{'no_sanitize_address' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_ADDRESS;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/attr-no-sanitize-memory.cpp b/test/SemaCXX/attr-no-sanitize-memory.cpp
index 9cbcb03d6ecf..8a8d847562af 100644
--- a/test/SemaCXX/attr-no-sanitize-memory.cpp
+++ b/test/SemaCXX/attr-no-sanitize-memory.cpp
@@ -5,12 +5,14 @@
 #if !__has_attribute(no_sanitize_memory)
 #error "Should support no_sanitize_memory"
 #endif
-
-void noanal_fun() NO_SANITIZE_MEMORY;
-
-void noanal_fun_args() __attribute__((no_sanitize_memory(1))); // \
-  // expected-error {{'no_sanitize_memory' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_MEMORY;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_memory__));
+
+void noanal_fun_args() __attribute__((no_sanitize_memory(1))); // \
+  // expected-error {{'no_sanitize_memory' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_MEMORY;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/attr-no-sanitize-thread.cpp b/test/SemaCXX/attr-no-sanitize-thread.cpp
index 6cb9c715bf6c..92a3fa96194c 100644
--- a/test/SemaCXX/attr-no-sanitize-thread.cpp
+++ b/test/SemaCXX/attr-no-sanitize-thread.cpp
@@ -5,12 +5,14 @@
 #if !__has_attribute(no_sanitize_thread)
 #error "Should support no_sanitize_thread"
 #endif
-
-void noanal_fun() NO_SANITIZE_THREAD;
-
-void noanal_fun_args() __attribute__((no_sanitize_thread(1))); // \
-  // expected-error {{'no_sanitize_thread' attribute takes no arguments}}
-
+
+void noanal_fun() NO_SANITIZE_THREAD;
+
+void noanal_fun_alt() __attribute__((__no_sanitize_thread__));
+
+void noanal_fun_args() __attribute__((no_sanitize_thread(1))); // \
+  // expected-error {{'no_sanitize_thread' attribute takes no arguments}}
+
 int noanal_testfn(int y) NO_SANITIZE_THREAD;
 
 int noanal_testfn(int y) {
diff --git a/test/SemaCXX/attr-notail.cpp b/test/SemaCXX/attr-notail.cpp
new file mode 100644
index 000000000000..2f39746dc85b
--- /dev/null
+++ b/test/SemaCXX/attr-notail.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+
+class Base {
+public:
+  [[clang::not_tail_called]] virtual int foo1(); // expected-error {{'not_tail_called' attribute cannot be applied to virtual functions}}
+  virtual int foo2();
+  [[clang::not_tail_called]] int foo3();
+  virtual ~Base() {}
+};
+
+class Derived1 : public Base {
+public:
+  int foo1() override;
+  [[clang::not_tail_called]] int foo2() override; // expected-error {{'not_tail_called' attribute cannot be applied to virtual functions}}
+  [[clang::not_tail_called]] int foo4();
+};
diff --git a/test/SemaCXX/attr-print.cpp b/test/SemaCXX/attr-print.cpp
index 337a6fb69ba3..f40d803e94cb 100644
--- a/test/SemaCXX/attr-print.cpp
+++ b/test/SemaCXX/attr-print.cpp
@@ -26,6 +26,9 @@ int small __attribute__((mode(byte)));
 // CHECK: int v __attribute__((visibility("hidden")));
 int v __attribute__((visibility("hidden")));
 
+// CHECK: char *PR24565() __attribute__((malloc))
+char *PR24565() __attribute__((__malloc__));
+
 // CHECK: class __attribute__((consumable("unknown"))) AttrTester1
 class __attribute__((consumable(unknown))) AttrTester1 {
   // CHECK: void callableWhen() __attribute__((callable_when("unconsumed", "consumed")));
diff --git a/test/SemaCXX/auto-cxx0x.cpp b/test/SemaCXX/auto-cxx0x.cpp
index a8f9e84423a7..f3daf1a19f3c 100644
--- a/test/SemaCXX/auto-cxx0x.cpp
+++ b/test/SemaCXX/auto-cxx0x.cpp
@@ -1,5 +1,8 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++1y
 void f() {
   auto int a; // expected-warning {{'auto' storage class specifier is not permitted in C++11, and will not be supported in future releases}}
   int auto b; // expected-error{{cannot combine with previous 'int' declaration specifier}}
 }
+
+typedef auto PR25449(); // expected-error {{'auto' not allowed in typedef}}
diff --git a/test/SemaCXX/auto-type-from-cxx.cpp b/test/SemaCXX/auto-type-from-cxx.cpp
new file mode 100644
index 000000000000..961402f7d933
--- /dev/null
+++ b/test/SemaCXX/auto-type-from-cxx.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -std=c++14 -fsyntax-only -verify %s
+
+struct A {
+    operator __auto_type() {} // expected-error {{'__auto_type' not allowed in conversion function type}}
+};
+
+__auto_type a() -> int; // expected-error {{'__auto_type' not allowed in function return type}}
+template <typename T>
+__auto_type b() { return T::x; } // expected-error {{'__auto_type' not allowed in function return type}}
+auto c() -> __auto_type { __builtin_unreachable(); } // expected-error {{'__auto_type' not allowed in function return type}}
+int d() {
+  decltype(__auto_type) e = 1; // expected-error {{expected expression}}
+  auto _ = [](__auto_type f) {}; // expected-error {{'__auto_type' not allowed in lambda parameter}}
+  __auto_type g = 2;
+  struct BitField { int field:2; };
+  __auto_type h = BitField{1}.field; // (should work from C++)
+  new __auto_type; // expected-error {{'__auto_type' not allowed in type allocated by 'new'}}
+}
+
diff --git a/test/SemaCXX/bitfield-layout.cpp b/test/SemaCXX/bitfield-layout.cpp
index adecf55a8774..25aa82229925 100644
--- a/test/SemaCXX/bitfield-layout.cpp
+++ b/test/SemaCXX/bitfield-layout.cpp
@@ -5,25 +5,25 @@
 
 // Simple tests.
 struct Test1 {
-  char c : 9; // expected-warning {{size of bit-field 'c' (9 bits) exceeds the size of its type; value will be truncated to 8 bits}}
+  char c : 9; // expected-warning {{width of bit-field 'c' (9 bits) exceeds the width of its type; value will be truncated to 8 bits}}
 };
 CHECK_SIZE(Test1, 2);
 CHECK_ALIGN(Test1, 1);
 
 struct Test2 {
-  char c : 16; // expected-warning {{size of bit-field 'c' (16 bits) exceeds the size of its type; value will be truncated to 8 bits}}
+  char c : 16; // expected-warning {{width of bit-field 'c' (16 bits) exceeds the width of its type; value will be truncated to 8 bits}}
 };
 CHECK_SIZE(Test2, 2);
 CHECK_ALIGN(Test2, 2);
 
 struct Test3 {
-  char c : 32; // expected-warning {{size of bit-field 'c' (32 bits) exceeds the size of its type; value will be truncated to 8 bits}}
+  char c : 32; // expected-warning {{width of bit-field 'c' (32 bits) exceeds the width of its type; value will be truncated to 8 bits}}
 };
 CHECK_SIZE(Test3, 4);
 CHECK_ALIGN(Test3, 4);
 
 struct Test4 {
-  char c : 64; // expected-warning {{size of bit-field 'c' (64 bits) exceeds the size of its type; value will be truncated to 8 bits}}
+  char c : 64; // expected-warning {{width of bit-field 'c' (64 bits) exceeds the width of its type; value will be truncated to 8 bits}}
 };
 CHECK_SIZE(Test4, 8);
 CHECK_ALIGN(Test4, 8);
diff --git a/test/SemaCXX/calling-conv-compat.cpp b/test/SemaCXX/calling-conv-compat.cpp
index cebac9fad6cd..20d93b41e1d2 100644
--- a/test/SemaCXX/calling-conv-compat.cpp
+++ b/test/SemaCXX/calling-conv-compat.cpp
@@ -370,6 +370,19 @@ X<fun_cdecl   >::p tmpl6 = &A::method_thiscall;
 X<fun_stdcall >::p tmpl7 = &A::method_stdcall;
 X<fun_fastcall>::p tmpl8 = &A::method_fastcall;
 
+// Make sure we adjust thiscall to cdecl when extracting the function type from
+// a member pointer.
+template <typename> struct Y;
+
+template <typename Fn, typename C>
+struct Y<Fn C::*> {
+  typedef Fn *p;
+};
+
+void __cdecl f_cdecl();
+Y<decltype(&A::method_thiscall)>::p tmpl9 = &f_cdecl;
+
+
 } // end namespace MemberPointers
 
 // Test that lambdas that capture nothing convert to cdecl function pointers.
diff --git a/test/SemaCXX/cdtor-fn-try-block.cpp b/test/SemaCXX/cdtor-fn-try-block.cpp
new file mode 100644
index 000000000000..d4d8d829ecdb
--- /dev/null
+++ b/test/SemaCXX/cdtor-fn-try-block.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -fsyntax-only -fcxx-exceptions -verify %s -std=c++14
+
+int FileScope;
+
+struct A {
+  int I;
+  void f();
+  A() try {
+  } catch (...) {
+    I = 12; // expected-warning {{cannot refer to a non-static member from the handler of a constructor function try block}}
+    f(); // expected-warning {{cannot refer to a non-static member from the handler of a constructor function try block}}
+
+    FileScope = 12; // ok
+    A a;
+    a.I = 12; // ok
+  }
+};
+
+struct B {
+  int I;
+  void f();
+};
+
+struct C : B {
+  C() try {
+  } catch (...) {
+    I = 12; // expected-warning {{cannot refer to a non-static member from the handler of a constructor function try block}}
+    f(); // expected-warning {{cannot refer to a non-static member from the handler of a constructor function try block}}
+  }
+};
+
+struct D {
+  static int I;
+  static void f();
+
+  D() try {
+  } catch (...) {
+    I = 12; // ok
+    f(); // ok
+  }
+};
+int D::I;
+
+struct E {
+  int I;
+  void f();
+  static int J;
+  static void g();
+
+  ~E() try {
+  } catch (...) {
+    I = 12; // expected-warning {{cannot refer to a non-static member from the handler of a destructor function try block}}
+    f(); // expected-warning {{cannot refer to a non-static member from the handler of a destructor function try block}}
+
+    J = 12; // ok
+    g(); // ok
+  }
+};
+int E::J;
+
+struct F {
+  static int I;
+  static void f();
+};
+int F::I;
+
+struct G : F {
+  G() try {
+  } catch (...) {
+    I = 12; // ok
+    f(); // ok
+  }
+};
+
+struct H {
+  struct A {};
+  enum {
+    E
+  };
+
+  H() try {
+  } catch (...) {
+    H::A a; // ok
+    int I = E; // ok
+  }
+};
+
+struct I {
+  int J;
+
+  I() {
+    try { // not a function-try-block
+    } catch (...) {
+      J = 12; // ok
+	}
+  }
+};
\ No newline at end of file
diff --git a/test/SemaCXX/condition.cpp b/test/SemaCXX/condition.cpp
index 73f3dceef64e..b757fcb8cd5c 100644
--- a/test/SemaCXX/condition.cpp
+++ b/test/SemaCXX/condition.cpp
@@ -17,9 +17,9 @@ void test() {
   switch (s) {} // expected-error {{statement requires expression of integer type ('struct S' invalid)}}
 
   while (struct NewS *x=0) ;
-  while (struct S {} *x=0) ; // expected-error {{types may not be defined in conditions}}
-  while (struct {} *x=0) ; // expected-error {{types may not be defined in conditions}}
-  switch (enum {E} x=0) ; // expected-error {{types may not be defined in conditions}}
+  while (struct S {} *x=0) ; // expected-error {{'S' cannot be defined in a condition}}
+  while (struct {} *x=0) ; // expected-error-re {{'(anonymous struct at {{.*}})' cannot be defined in a condition}}
+  switch (enum {E} x=0) ; // expected-error-re {{'(anonymous enum at {{.*}})' cannot be defined in a condition}}
 
   if (int x=0) { // expected-note 2 {{previous definition is here}}
     int x;  // expected-error {{redefinition of 'x'}}
@@ -59,7 +59,7 @@ void test4(bool (&x)(void)) {
 
 template <class>
 void test5() {
-  if (struct S {}* p = 0) // expected-error {{types may not be defined in conditions}}
+  if (struct S {}* p = 0) // expected-error {{'S' cannot be defined in a condition}}
     ;
 }
 void test5_inst() {
diff --git a/test/SemaCXX/const-cast.cpp b/test/SemaCXX/const-cast.cpp
index 330e18661b19..87df61cbc7b2 100644
--- a/test/SemaCXX/const-cast.cpp
+++ b/test/SemaCXX/const-cast.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 struct A {};
 
@@ -38,8 +40,10 @@ char ***good_const_cast_test(ccvpcvpp var)
   f *fpp = const_cast<f*>(&fp);
   int const A::* const A::*icapcap = 0;
   int A::* A::* iapap = const_cast<int A::* A::*>(icapcap);
-  (void)const_cast<A&&>(A()); // expected-warning {{C++11}}
-
+  (void)const_cast<A&&>(A());
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2 {{rvalue references are a C++11 extension}}
+#endif
   return var4;
 }
 
@@ -61,7 +65,10 @@ short *bad_const_cast_test(char const *volatile *const volatile *var)
   f fp2 = const_cast<f>(fp1); // expected-error {{const_cast to 'f' (aka 'int (*)(int)'), which is not a reference, pointer-to-object, or pointer-to-data-member}}
   void (A::*mfn)() = 0;
   (void)const_cast<void (A::*)()>(mfn); // expected-error-re {{const_cast to 'void (A::*)(){{( __attribute__\(\(thiscall\)\))?}}', which is not a reference, pointer-to-object, or pointer-to-data-member}}
-  (void)const_cast<int&&>(0); // expected-error {{const_cast from rvalue to reference type 'int &&'}} expected-warning {{C++11}}
+  (void)const_cast<int&&>(0); // expected-error {{const_cast from rvalue to reference type 'int &&'}}
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2 {{rvalue references are a C++11 extension}}
+#endif
   return **var3;
 }
 
diff --git a/test/SemaCXX/constant-expression-cxx11.cpp b/test/SemaCXX/constant-expression-cxx11.cpp
index 3a1f6c6bd1a6..7b9d0150e1e8 100644
--- a/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/test/SemaCXX/constant-expression-cxx11.cpp
@@ -327,7 +327,7 @@ struct Str {
 };
 
 extern char externalvar[];
-constexpr bool constaddress = (void *)externalvar == (void *)0x4000UL; // expected-error {{must be initialized by a constant expression}}
+constexpr bool constaddress = (void *)externalvar == (void *)0x4000UL; // expected-error {{must be initialized by a constant expression}} expected-note {{reinterpret_cast}}
 constexpr bool litaddress = "foo" == "foo"; // expected-error {{must be initialized by a constant expression}} expected-warning {{unspecified}}
 static_assert(0 != "foo", "");
 
@@ -1802,8 +1802,8 @@ namespace Bitfields {
     unsigned u : 5;
     int n : 5;
     bool b2 : 3;
-    unsigned u2 : 74; // expected-warning {{exceeds the size of its type}}
-    int n2 : 81; // expected-warning {{exceeds the size of its type}}
+    unsigned u2 : 74; // expected-warning {{exceeds the width of its type}}
+    int n2 : 81; // expected-warning {{exceeds the width of its type}}
   };
 
   constexpr A a = { false, 33, 31, false, 0xffffffff, 0x7fffffff }; // expected-warning 2{{truncation}}
@@ -1874,10 +1874,9 @@ namespace NeverConstantTwoWays {
         0;
   }
 
-  // FIXME: We should diagnose the cast to long here, not the division by zero.
   constexpr int n = // expected-error {{must be initialized by a constant expression}}
-      (int *)(long)&n == &n ?
-        1 / 0 : // expected-warning {{division by zero}} expected-note {{division by zero}}
+      (int *)(long)&n == &n ? // expected-note {{reinterpret_cast}}
+        1 / 0 : // expected-warning {{division by zero}}
         0;
 }
 
@@ -1994,3 +1993,15 @@ namespace PR17938 {
 
   static constexpr auto z = f(Z());
 }
+
+namespace PR24597 {
+  struct A {
+    int x, *p;
+    constexpr A() : x(0), p(&x) {}
+    constexpr A(const A &a) : x(a.x), p(&x) {}
+  };
+  constexpr A f() { return A(); }
+  constexpr A g() { return f(); }
+  constexpr int a = *f().p;
+  constexpr int b = *g().p;
+}
diff --git a/test/SemaCXX/constant-expression-cxx1y.cpp b/test/SemaCXX/constant-expression-cxx1y.cpp
index e8925f3f84aa..e9ecbe83b3da 100644
--- a/test/SemaCXX/constant-expression-cxx1y.cpp
+++ b/test/SemaCXX/constant-expression-cxx1y.cpp
@@ -462,7 +462,7 @@ namespace loops {
       if ((c % 7) == 0) break;
     } while (c != 21);
 
-    return a == 10 && b == 12 & c == 14;
+    return a == 10 && b == 12 && c == 14;
   }
   static_assert(breaks_work(), "");
 
@@ -872,7 +872,7 @@ namespace Lifetime {
 
 namespace Bitfields {
   struct A {
-    bool b : 3;
+    bool b : 1;
     int n : 4;
     unsigned u : 5;
   };
diff --git a/test/SemaCXX/constant-expression.cpp b/test/SemaCXX/constant-expression.cpp
index e01acdd46f93..f82a69209379 100644
--- a/test/SemaCXX/constant-expression.cpp
+++ b/test/SemaCXX/constant-expression.cpp
@@ -141,3 +141,5 @@ namespace rdar16064952 {
    unsigned w = ({int a = b.val[sizeof(0)]; 0; }); // expected-warning {{use of GNU statement expression extension}}
   }
 }
+
+char PR17381_ice = 1000000 * 1000000; // expected-warning {{overflow}} expected-warning {{changes value}}
diff --git a/test/SemaCXX/constexpr-printing.cpp b/test/SemaCXX/constexpr-printing.cpp
index e545f45d6011..7f6a9c6a82f0 100644
--- a/test/SemaCXX/constexpr-printing.cpp
+++ b/test/SemaCXX/constexpr-printing.cpp
@@ -90,10 +90,12 @@ constexpr wchar_t wc = get(L"test\0\\\"\t\a\b\234\u1234\xffffffff"); // \
 
 constexpr char32_t c32_err = get(U"\U00110000"); // expected-error {{invalid universal character}}
 
+#define fold(x) (__builtin_constant_p(x) ? (x) : (x))
+
 typedef decltype(sizeof(int)) LabelDiffTy;
 constexpr LabelDiffTy mulBy3(LabelDiffTy x) { return x * 3; } // expected-note {{subexpression}}
 void LabelDiffTest() {
-  static_assert(mulBy3((LabelDiffTy)&&a-(LabelDiffTy)&&b) == 3, ""); // expected-error {{constant expression}} expected-note {{call to 'mulBy3(&&a - &&b)'}}
+  static_assert(mulBy3(fold((LabelDiffTy)&&a-(LabelDiffTy)&&b)) == 3, ""); // expected-error {{constant expression}} expected-note {{call to 'mulBy3(&&a - &&b)'}}
   a:b:return;
 }
 
diff --git a/test/SemaCXX/constructor-initializer.cpp b/test/SemaCXX/constructor-initializer.cpp
index e3ab610da7ca..c5de33cedb90 100644
--- a/test/SemaCXX/constructor-initializer.cpp
+++ b/test/SemaCXX/constructor-initializer.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -Wreorder -fsyntax-only -verify %s
+// RUN: %clang_cc1 -Wreorder -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -Wreorder -fsyntax-only -verify -std=c++11 %s
+
 class A { 
   int m;
 public:
@@ -98,9 +101,11 @@ struct Current : Derived {
                                                   // expected-error {{member initializer 'NonExisting' does not name a non-static data member or}}
 };
 
-struct M {              // expected-note 2 {{candidate constructor (the implicit copy constructor)}} \
-                        // expected-note {{declared here}} \
-                        // expected-note {{declared here}}
+struct M {              // expected-note 2 {{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+// expected-note@-4 2 {{'M' declared here}}
   M(int i, int j);      // expected-note 2 {{candidate constructor}}
 };
 
@@ -233,7 +238,13 @@ namespace PR7402 {
 // <rdar://problem/8308215>: don't crash.
 // Lots of questionable recovery here;  errors can change.
 namespace test3 {
-  class A : public std::exception {}; // expected-error {{undeclared identifier}} expected-error {{expected class name}} expected-note 2 {{candidate}}
+  class A : public std::exception {}; // expected-error {{undeclared identifier}} expected-error {{expected class name}}
+  // expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+  // expected-note@-3 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+  // expected-note@-5 {{candidate constructor (the implicit default constructor) not viable}}
+
   class B : public A {
   public:
     B(const String& s, int e=0) // expected-error {{unknown type name}} 
diff --git a/test/SemaCXX/conversion-function.cpp b/test/SemaCXX/conversion-function.cpp
index 077e4d9f3cce..649f6b4dce94 100644
--- a/test/SemaCXX/conversion-function.cpp
+++ b/test/SemaCXX/conversion-function.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -Wbind-to-temporary-copy -verify %s 
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -Wbind-to-temporary-copy -verify %s 
 class X { 
 public:
   operator bool();
diff --git a/test/SemaCXX/convert-to-bool.cpp b/test/SemaCXX/convert-to-bool.cpp
index b52f11c93d39..117b50899b7e 100644
--- a/test/SemaCXX/convert-to-bool.cpp
+++ b/test/SemaCXX/convert-to-bool.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 struct ConvToBool {
   operator bool() const;
 };
@@ -8,7 +11,10 @@ struct ConvToInt {
 };
 
 struct ExplicitConvToBool {
-  explicit operator bool(); // expected-warning{{explicit conversion functions are a C++11 extension}}
+  explicit operator bool();
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{explicit conversion functions are a C++11 extension}}
+#endif
 };
 
 void test_conv_to_bool(ConvToBool ctb, ConvToInt cti, ExplicitConvToBool ecb) {
@@ -39,7 +45,10 @@ void test_conv_to_bool(ConvToBool ctb, ConvToInt cti, ExplicitConvToBool ecb) {
 void accepts_bool(bool) { } // expected-note{{candidate function}}
 
 struct ExplicitConvToRef {
-  explicit operator int&(); // expected-warning{{explicit conversion functions are a C++11 extension}}
+  explicit operator int&();
+#if (__cplusplus <= 199711L) // C++03 or earlier modes
+  // expected-warning@-2{{explicit conversion functions are a C++11 extension}}
+#endif
 };
 
 void test_explicit_bool(ExplicitConvToBool ecb) {
@@ -56,7 +65,10 @@ void test_explicit_conv_to_ref(ExplicitConvToRef ecr) {
 struct A { };
 struct B { };
 struct C {
-  explicit operator A&(); // expected-warning{{explicit conversion functions are a C++11 extension}}
+  explicit operator A&(); 
+#if __cplusplus <= 199711L // C++03 or earlier modes
+// expected-warning@-2{{explicit conversion functions are a C++11 extension}}
+#endif
   operator B&(); // expected-note{{candidate}}
 };
 
diff --git a/test/SemaCXX/converting-constructor.cpp b/test/SemaCXX/converting-constructor.cpp
index 1688e51e73fd..75002fa585b8 100644
--- a/test/SemaCXX/converting-constructor.cpp
+++ b/test/SemaCXX/converting-constructor.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 class Z { };
 
 class Y { 
@@ -28,6 +31,10 @@ public:
 };
 
 class FromShortExplicitly { // expected-note{{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
 public:
   explicit FromShortExplicitly(short s);
 };
diff --git a/test/SemaCXX/copy-initialization.cpp b/test/SemaCXX/copy-initialization.cpp
index ea2db0c68e85..d219ee508f03 100644
--- a/test/SemaCXX/copy-initialization.cpp
+++ b/test/SemaCXX/copy-initialization.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 class X {
 public:
   explicit X(const X&); // expected-note {{candidate constructor}}
@@ -58,7 +61,10 @@ namespace DR5 {
 
   namespace Ex2 {
     struct S {
-      S(S&&); // expected-warning {{C++11}}
+      S(S&&);
+#if __cplusplus <= 199711L // C++03 or earlier modes
+      // expected-warning@-2 {{rvalue references are a C++11 extension}}
+#endif
       S(int);
     };
     const S a(0);
diff --git a/test/SemaCXX/coroutines.cpp b/test/SemaCXX/coroutines.cpp
new file mode 100644
index 000000000000..e82cb62f12d4
--- /dev/null
+++ b/test/SemaCXX/coroutines.cpp
@@ -0,0 +1,268 @@
+// RUN: %clang_cc1 -std=c++14 -fcoroutines -verify %s
+
+struct awaitable {
+  bool await_ready();
+  void await_suspend(); // FIXME: coroutine_handle
+  void await_resume();
+} a;
+
+struct suspend_always {
+  bool await_ready() { return false; }
+  void await_suspend() {}
+  void await_resume() {}
+};
+
+struct suspend_never {
+  bool await_ready() { return true; }
+  void await_suspend() {}
+  void await_resume() {}
+};
+
+void no_coroutine_traits() {
+  co_await a; // expected-error {{need to include <coroutine>}}
+}
+
+namespace std {
+  template<typename ...T> struct coroutine_traits; // expected-note {{declared here}}
+};
+
+template<typename Promise> struct coro {};
+template<typename Promise, typename... Ps>
+struct std::coroutine_traits<coro<Promise>, Ps...> {
+  using promise_type = Promise;
+};
+
+void no_specialization() {
+  co_await a; // expected-error {{implicit instantiation of undefined template 'std::coroutine_traits<void>'}}
+}
+
+template<typename ...T> struct std::coroutine_traits<int, T...> {};
+
+int no_promise_type() {
+  co_await a; // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits<int>' has no member named 'promise_type'}}
+}
+
+template<> struct std::coroutine_traits<double, double> { typedef int promise_type; };
+double bad_promise_type(double) {
+  co_await a; // expected-error {{this function cannot be a coroutine: 'std::coroutine_traits<double, double>::promise_type' (aka 'int') is not a class}}
+}
+
+template<> struct std::coroutine_traits<double, int> {
+  struct promise_type {};
+};
+double bad_promise_type_2(int) {
+  co_yield 0; // expected-error {{no member named 'yield_value' in 'std::coroutine_traits<double, int>::promise_type'}}
+}
+
+struct promise; // expected-note 2{{forward declaration}}
+template<typename ...T> struct std::coroutine_traits<void, T...> { using promise_type = promise; };
+
+  // FIXME: This diagnostic is terrible.
+void undefined_promise() { // expected-error {{variable has incomplete type 'promise_type'}}
+  // FIXME: This diagnostic doesn't make any sense.
+  // expected-error@-2 {{incomplete definition of type 'promise'}}
+  co_await a;
+}
+
+struct yielded_thing { const char *p; short a, b; };
+
+struct not_awaitable {};
+
+struct promise {
+  void get_return_object();
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+  awaitable yield_value(int); // expected-note 2{{candidate}}
+  awaitable yield_value(yielded_thing); // expected-note 2{{candidate}}
+  not_awaitable yield_value(void()); // expected-note 2{{candidate}}
+  void return_void();
+  void return_value(int); // expected-note 2{{here}}
+};
+
+void yield() {
+  co_yield 0;
+  co_yield {"foo", 1, 2};
+  co_yield {1e100}; // expected-error {{cannot be narrowed}} expected-note {{explicit cast}} expected-warning {{changes value}} expected-warning {{braces around scalar}}
+  co_yield {"foo", __LONG_LONG_MAX__}; // expected-error {{cannot be narrowed}} expected-note {{explicit cast}} expected-warning {{changes value}}
+  co_yield {"foo"};
+  co_yield "foo"; // expected-error {{no matching}}
+  co_yield 1.0;
+  co_yield yield; // expected-error {{no member named 'await_ready' in 'not_awaitable'}}
+}
+
+void coreturn(int n) {
+  co_await a;
+  if (n == 0)
+    co_return 3;
+  if (n == 1)
+    co_return {4};
+  if (n == 2)
+    co_return "foo"; // expected-error {{cannot initialize a parameter of type 'int' with an lvalue of type 'const char [4]'}}
+  co_return;
+}
+
+void mixed_yield() {
+  co_yield 0; // expected-note {{use of 'co_yield'}}
+  return; // expected-error {{not allowed in coroutine}}
+}
+
+void mixed_await() {
+  co_await a; // expected-note {{use of 'co_await'}}
+  return; // expected-error {{not allowed in coroutine}}
+}
+
+void only_coreturn() {
+  co_return; // expected-warning {{'co_return' used in a function that uses neither 'co_await' nor 'co_yield'}}
+}
+
+void mixed_coreturn(bool b) {
+  if (b)
+    // expected-warning@+1 {{'co_return' used in a function that uses neither}}
+    co_return; // expected-note {{use of 'co_return'}}
+  else
+    return; // expected-error {{not allowed in coroutine}}
+}
+
+struct CtorDtor {
+  CtorDtor() {
+    co_yield 0; // expected-error {{'co_yield' cannot be used in a constructor}}
+  }
+  CtorDtor(awaitable a) {
+    // The spec doesn't say this is ill-formed, but it must be.
+    co_await a; // expected-error {{'co_await' cannot be used in a constructor}}
+  }
+  ~CtorDtor() {
+    co_return 0; // expected-error {{'co_return' cannot be used in a destructor}}
+  }
+  // FIXME: The spec says this is ill-formed.
+  void operator=(CtorDtor&) {
+    co_yield 0;
+  }
+};
+
+void unevaluated() {
+  decltype(co_await a); // expected-error {{cannot be used in an unevaluated context}}
+  sizeof(co_await a); // expected-error {{cannot be used in an unevaluated context}}
+  typeid(co_await a); // expected-error {{cannot be used in an unevaluated context}}
+  decltype(co_yield a); // expected-error {{cannot be used in an unevaluated context}}
+  sizeof(co_yield a); // expected-error {{cannot be used in an unevaluated context}}
+  typeid(co_yield a); // expected-error {{cannot be used in an unevaluated context}}
+}
+
+constexpr void constexpr_coroutine() {
+  co_yield 0; // expected-error {{'co_yield' cannot be used in a constexpr function}}
+}
+
+void varargs_coroutine(const char *, ...) {
+  co_await a; // expected-error {{'co_await' cannot be used in a varargs function}}
+}
+
+struct outer {};
+
+namespace dependent_operator_co_await_lookup {
+  template<typename T> void await_template(T t) {
+    // no unqualified lookup results
+    co_await t; // expected-error {{no member named 'await_ready' in 'dependent_operator_co_await_lookup::not_awaitable'}}
+    // expected-error@-1 {{call to function 'operator co_await' that is neither visible in the template definition nor found by argument-dependent lookup}}
+  };
+  template void await_template(awaitable);
+
+  struct indirectly_awaitable { indirectly_awaitable(outer); };
+  awaitable operator co_await(indirectly_awaitable); // expected-note {{should be declared prior to}}
+  template void await_template(indirectly_awaitable);
+
+  struct not_awaitable {};
+  template void await_template(not_awaitable); // expected-note {{instantiation}}
+
+  template<typename T> void await_template_2(T t) {
+    // one unqualified lookup result
+    co_await t;
+  };
+  template void await_template(outer); // expected-note {{instantiation}}
+  template void await_template_2(outer);
+}
+
+struct yield_fn_tag {};
+template<> struct std::coroutine_traits<void, yield_fn_tag> {
+  struct promise_type {
+    // FIXME: add an await_transform overload for functions
+    awaitable yield_value(int());
+    void return_value(int());
+
+    suspend_never initial_suspend();
+    suspend_never final_suspend();
+    void get_return_object();
+  };
+};
+
+namespace placeholder {
+  awaitable f(), f(int); // expected-note 4{{possible target}}
+  int g(), g(int); // expected-note 2{{candidate}}
+  void x() {
+    co_await f; // expected-error {{reference to overloaded function}}
+  }
+  void y() {
+    co_yield g; // expected-error {{no matching member function for call to 'yield_value'}}
+  }
+  void z() {
+    co_await a;
+    co_return g; // expected-error {{address of overloaded function 'g' does not match required type 'int'}}
+  }
+
+  void x(yield_fn_tag) {
+    co_await f; // expected-error {{reference to overloaded function}}
+  }
+  void y(yield_fn_tag) {
+    co_yield g;
+  }
+  void z(yield_fn_tag) {
+    co_await a;
+    co_return g;
+  }
+}
+
+struct bad_promise_1 {
+  suspend_always initial_suspend();
+  suspend_always final_suspend();
+};
+coro<bad_promise_1> missing_get_return_object() { // expected-error {{no member named 'get_return_object' in 'bad_promise_1'}}
+  co_await a;
+}
+
+struct bad_promise_2 {
+  coro<bad_promise_2> get_return_object();
+  // FIXME: We shouldn't offer a typo-correction here!
+  suspend_always final_suspend(); // expected-note {{here}}
+};
+coro<bad_promise_2> missing_initial_suspend() { // expected-error {{no member named 'initial_suspend' in 'bad_promise_2'}}
+  co_await a;
+}
+
+struct bad_promise_3 {
+  coro<bad_promise_3> get_return_object();
+  // FIXME: We shouldn't offer a typo-correction here!
+  suspend_always initial_suspend(); // expected-note {{here}}
+};
+coro<bad_promise_3> missing_final_suspend() { // expected-error {{no member named 'final_suspend' in 'bad_promise_3'}}
+  co_await a;
+}
+
+struct bad_promise_4 {
+  coro<bad_promise_4> get_return_object();
+  not_awaitable initial_suspend();
+  suspend_always final_suspend();
+};
+// FIXME: This diagnostic is terrible.
+coro<bad_promise_4> bad_initial_suspend() { // expected-error {{no member named 'await_ready' in 'not_awaitable'}}
+  co_await a;
+}
+
+struct bad_promise_5 {
+  coro<bad_promise_5> get_return_object();
+  suspend_always initial_suspend();
+  not_awaitable final_suspend();
+};
+// FIXME: This diagnostic is terrible.
+coro<bad_promise_5> bad_final_suspend() { // expected-error {{no member named 'await_ready' in 'not_awaitable'}}
+  co_await a;
+}
diff --git a/test/SemaCXX/crashes.cpp b/test/SemaCXX/crashes.cpp
index 12251bba95a4..926d13ab4533 100644
--- a/test/SemaCXX/crashes.cpp
+++ b/test/SemaCXX/crashes.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // <rdar://problem/8124080>
 template<typename _Alloc> class allocator;
@@ -31,7 +33,11 @@ template<typename T> struct a : T {
 namespace rdar8605381 {
 struct X {};
 
-struct Y { // expected-note{{candidate}}
+struct Y { // expected-note{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
   Y();
 };
 
diff --git a/test/SemaCXX/cxx0x-initializer-aggregates.cpp b/test/SemaCXX/cxx0x-initializer-aggregates.cpp
index 0e9a97d5bb07..1b01a351f781 100644
--- a/test/SemaCXX/cxx0x-initializer-aggregates.cpp
+++ b/test/SemaCXX/cxx0x-initializer-aggregates.cpp
@@ -129,3 +129,7 @@ namespace array_addressof {
   using T = int[5];
   T *p = &T{1,2,3,4,5}; // expected-error {{taking the address of a temporary object of type 'T' (aka 'int [5]')}}
 }
+
+namespace PR24816 {
+  struct { int i; } ne = {{0, 1}}; // expected-error{{excess elements in scalar initializer}}
+}
diff --git a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
index 9456dd713aa8..060a0f236b4e 100644
--- a/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
+++ b/test/SemaCXX/cxx0x-initializer-stdinitializerlist.cpp
@@ -117,8 +117,10 @@ void argument_deduction() {
 
 void auto_deduction() {
   auto l = {1, 2, 3, 4};
-  auto l2 {1, 2, 3, 4}; // expected-warning {{will change meaning in a future version of Clang}}
+  auto l2 {1, 2, 3, 4}; // expected-error {{initializer for variable 'l2' with type 'auto' contains multiple expressions}}
+  auto l3 {1};
   static_assert(same_type<decltype(l), std::initializer_list<int>>::value, "");
+  static_assert(same_type<decltype(l3), int>::value, "");
   auto bl = {1, 2.0}; // expected-error {{cannot deduce}}
 
   for (int i : {1, 2, 3, 4}) {}
@@ -190,7 +192,7 @@ namespace rdar11948732 {
 }
 
 namespace PR14272 {
-  auto x { { 0, 0 } }; // expected-error {{cannot deduce actual type for variable 'x' with type 'auto' from initializer list}}
+  auto x { { 0, 0 } }; // expected-error {{cannot deduce type for variable 'x' with type 'auto' from nested initializer list}}
 }
 
 namespace initlist_of_array {
@@ -282,3 +284,28 @@ namespace ParameterPackNestedInitializerLists_PR23904c3 {
 
   void foo() { f({{0}}, {{'\0'}}); }
 }
+
+namespace update_rbrace_loc_crash {
+  // We used to crash-on-invalid on this example when updating the right brace
+  // location.
+  template <typename T, T>
+  struct A {};
+  template <typename T, typename F, int... I>
+  std::initializer_list<T> ExplodeImpl(F p1, A<int, I...>) {
+    // expected-error@+1 {{reference to type 'const update_rbrace_loc_crash::Incomplete' could not bind to an rvalue of type 'void'}}
+    return {p1(I)...};
+  }
+  template <typename T, int N, typename F>
+  void Explode(F p1) {
+    // expected-note@+1 {{in instantiation of function template specialization}}
+    ExplodeImpl<T>(p1, A<int, N>());
+  }
+  class Incomplete;
+  struct ContainsIncomplete {
+    const Incomplete &obstacle;
+  };
+  void f() {
+    // expected-note@+1 {{in instantiation of function template specialization}}
+    Explode<ContainsIncomplete, 4>([](int) {});
+  }
+}
diff --git a/test/SemaCXX/cxx0x-return-init-list.cpp b/test/SemaCXX/cxx0x-return-init-list.cpp
index 84bd89b210fc..2aeec7bff7c8 100644
--- a/test/SemaCXX/cxx0x-return-init-list.cpp
+++ b/test/SemaCXX/cxx0x-return-init-list.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
 
 // Test that a very basic variation of generalized initializer returns (that
 // required for libstdc++ 4.5) is supported in C++98.
diff --git a/test/SemaCXX/cxx11-ast-print.cpp b/test/SemaCXX/cxx11-ast-print.cpp
index 27726dedb496..1eeb67a3d9b3 100644
--- a/test/SemaCXX/cxx11-ast-print.cpp
+++ b/test/SemaCXX/cxx11-ast-print.cpp
@@ -1,21 +1,21 @@
 // RUN: %clang_cc1 -std=c++11 -ast-print %s | FileCheck %s
 
-// CHECK: auto operator "" _foo(const char *p, decltype(sizeof(int))) -> decltype(nullptr);
+// CHECK: auto operator""_foo(const char *p, decltype(sizeof(int))) -> decltype(nullptr);
 auto operator"" _foo(const char *p, decltype(sizeof(int))) -> decltype(nullptr);
 
-// CHECK: decltype(""_foo) operator "" _bar(unsigned long long);
+// CHECK: decltype(""_foo) operator""_bar(unsigned long long);
 decltype(""_foo) operator"" _bar(unsigned long long);
 
-// CHECK: decltype(42_bar) operator "" _baz(long double);
+// CHECK: decltype(42_bar) operator""_baz(long double);
 decltype(42_bar) operator"" _baz(long double);
 
-// CHECK: decltype(4.5_baz) operator "" _baz(char);
+// CHECK: decltype(4.5_baz) operator""_baz(char);
 decltype(4.5_baz) operator"" _baz(char);
 
-// CHECK: const char *operator "" _quux(const char *);
+// CHECK: const char *operator""_quux(const char *);
 const char *operator"" _quux(const char *);
 
-// CHECK: template <char ...> const char *operator "" _fritz();
+// CHECK: template <char ...> const char *operator""_fritz();
 template<char...> const char *operator"" _fritz();
 
 // CHECK: const char *p1 = "bar1"_foo;
@@ -40,7 +40,7 @@ const char *p9 = 0x42e3F_fritz;
 const char *p10 = 3.300e+15_fritz;
 
 template <class C, C...> const char *operator"" _suffix();
-// CHECK: const char *PR23120 = operator "" _suffix<char32_t, 66615>();
+// CHECK: const char *PR23120 = operator""_suffix<char32_t, 66615>();
 const char *PR23120 = U"𐐷"_suffix;
 
 // CHECK: ;
diff --git a/test/SemaCXX/cxx11-user-defined-literals.cpp b/test/SemaCXX/cxx11-user-defined-literals.cpp
index cb7796418ee3..b5d4d9976c13 100644
--- a/test/SemaCXX/cxx11-user-defined-literals.cpp
+++ b/test/SemaCXX/cxx11-user-defined-literals.cpp
@@ -70,7 +70,7 @@ namespace Using {
   namespace M {
     int operator"" _using(char);
   }
-  int k1 = 'x'_using; // expected-error {{no matching literal operator for call to 'operator "" _using'}}
+  int k1 = 'x'_using; // expected-error {{no matching literal operator for call to 'operator""_using'}}
 
   using M::operator "" _using;
   int k2 = 'x'_using;
@@ -80,7 +80,7 @@ namespace AmbiguousRawTemplate {
   int operator"" _ambig1(const char *); // expected-note {{candidate}}
   template<char...> int operator"" _ambig1(); // expected-note {{candidate}}
 
-  int k1 = 123_ambig1; // expected-error {{call to 'operator "" _ambig1' is ambiguous}}
+  int k1 = 123_ambig1; // expected-error {{call to 'operator""_ambig1' is ambiguous}}
 
   namespace Inner {
     template<char...> int operator"" _ambig2(); // expected-note 3{{candidate}}
@@ -88,7 +88,7 @@ namespace AmbiguousRawTemplate {
   int operator"" _ambig2(const char *); // expected-note 3{{candidate}}
   using Inner::operator"" _ambig2;
 
-  int k2 = 123_ambig2; // expected-error {{call to 'operator "" _ambig2' is ambiguous}}
+  int k2 = 123_ambig2; // expected-error {{call to 'operator""_ambig2' is ambiguous}}
 
   namespace N {
     using Inner::operator"" _ambig2;
@@ -133,13 +133,13 @@ namespace Namespace {
   int k = _x(); // expected-error {{undeclared identifier '_x'}}
 
   int _y(unsigned long long);
-  int k2 = 123_y; // expected-error {{no matching literal operator for call to 'operator "" _y'}}
+  int k2 = 123_y; // expected-error {{no matching literal operator for call to 'operator""_y'}}
 }
 
 namespace PR14950 {
   template<...> // expected-error {{expected template parameter}}
   int operator"" _b(); // expected-error {{no function template matches function template specialization}}
-  int main() { return 0_b; } // expected-error {{no matching literal operator for call to 'operator "" _b'}}
+  int main() { return 0_b; } // expected-error {{no matching literal operator for call to 'operator""_b'}}
 }
 
 namespace bad_names {
diff --git a/test/SemaCXX/cxx1y-generic-lambdas.cpp b/test/SemaCXX/cxx1y-generic-lambdas.cpp
index c937c6728c2b..3774e8d3834b 100644
--- a/test/SemaCXX/cxx1y-generic-lambdas.cpp
+++ b/test/SemaCXX/cxx1y-generic-lambdas.cpp
@@ -859,7 +859,7 @@ namespace ns1 {
 struct X1 {  
   struct X2 {
     enum { E = [](auto i) { return i; }(3) }; //expected-error{{inside of a constant expression}}\
-                                          //expected-error{{not an integral constant}}\
+                                          //expected-error{{constant}}\
                                           //expected-note{{non-literal type}}
     int L = ([] (int i) { return i; })(2);
     void foo(int i = ([] (int i) { return i; })(2)) { }
diff --git a/test/SemaCXX/cxx1y-init-captures.cpp b/test/SemaCXX/cxx1y-init-captures.cpp
index 203e28d7c3f9..d36882d8e5d1 100644
--- a/test/SemaCXX/cxx1y-init-captures.cpp
+++ b/test/SemaCXX/cxx1y-init-captures.cpp
@@ -190,3 +190,9 @@ int run() {
 }
 
 }
+
+namespace N3922 {
+  struct X { X(); explicit X(const X&); int n; };
+  auto a = [x{X()}] { return x.n; }; // ok
+  auto b = [x = {X()}] {}; // expected-error{{<initializer_list>}}
+}
diff --git a/test/SemaCXX/cxx1y-variable-templates_in_class.cpp b/test/SemaCXX/cxx1y-variable-templates_in_class.cpp
index 123fcfff7f7a..65e2d6b608c2 100644
--- a/test/SemaCXX/cxx1y-variable-templates_in_class.cpp
+++ b/test/SemaCXX/cxx1y-variable-templates_in_class.cpp
@@ -327,3 +327,14 @@ struct S {
   static int f : I; // expected-error {{static member 'f' cannot be a bit-field}}
 };
 }
+
+namespace b20896909 {
+  // This used to crash.
+  template<typename T> struct helper {};
+  template<typename T> class A {
+    template <typename> static helper<typename T::error> x;  // expected-error {{type 'int' cannot be used prior to '::' because it has no members}}
+  };
+  void test() {
+    A<int> ai;  // expected-note {{in instantiation of}}
+  }
+}
diff --git a/test/SemaCXX/cxx1y-variable-templates_top_level.cpp b/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
index 145bc49fff1f..787868fae176 100644
--- a/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
+++ b/test/SemaCXX/cxx1y-variable-templates_top_level.cpp
@@ -90,8 +90,8 @@ namespace odr_tmpl {
   }
 
   namespace pvt_diff_params {
-    template<typename T, typename> T v;   // expected-note {{previous template declaration is here}}
-    template<typename T> T v;   // expected-error {{too few template parameters in template redeclaration}} expected-note {{previous template declaration is here}}
+    template<typename T, typename> T v;   // expected-note 2{{previous template declaration is here}}
+    template<typename T> T v;   // expected-error {{too few template parameters in template redeclaration}}
     template<typename T, typename, typename> T v; // expected-error {{too many template parameters in template redeclaration}}
   }
 
diff --git a/test/SemaCXX/cxx98-compat.cpp b/test/SemaCXX/cxx98-compat.cpp
index 4227272d14e4..25a086d9bcd3 100644
--- a/test/SemaCXX/cxx98-compat.cpp
+++ b/test/SemaCXX/cxx98-compat.cpp
@@ -100,6 +100,9 @@ struct RefQualifier {
 };
 
 auto f() -> int; // expected-warning {{trailing return types are incompatible with C++98}}
+#ifdef CXX14COMPAT
+auto ff() { return 5; } // expected-warning {{'auto' type specifier is incompatible with C++98}}
+#endif
 
 void RangeFor() {
   int xs[] = {1, 2, 3};
diff --git a/test/SemaCXX/decl-expr-ambiguity.cpp b/test/SemaCXX/decl-expr-ambiguity.cpp
index 6abfb2f32874..1b1f7dc8a70e 100644
--- a/test/SemaCXX/decl-expr-ambiguity.cpp
+++ b/test/SemaCXX/decl-expr-ambiguity.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -Wno-int-to-pointer-cast -fsyntax-only -verify -pedantic-errors %s
+// RUN: %clang_cc1 -Wno-int-to-pointer-cast -fsyntax-only -verify -pedantic-errors -std=gnu++98 %s
+// RUN: %clang_cc1 -Wno-int-to-pointer-cast -fsyntax-only -verify -pedantic-errors -std=gnu++11 %s
 // RUN: %clang_cc1 -Wno-int-to-pointer-cast -fsyntax-only -verify -pedantic-errors -x objective-c++ %s
 
 void f() {
@@ -60,6 +62,9 @@ namespace N {
     func(); // expected-warning {{function declaration}} expected-note {{replace parentheses with an initializer}}
 
     S s(); // expected-warning {{function declaration}}
+#if __cplusplus >= 201103L
+    // expected-note@-2 {{replace parentheses with an initializer to declare a variable}}
+#endif
   }
   void nonEmptyParens() {
     int f = 0, // g = 0; expected-note {{change this ',' to a ';' to call 'func2'}}
diff --git a/test/SemaCXX/decl-microsoft-call-conv.cpp b/test/SemaCXX/decl-microsoft-call-conv.cpp
index 6c392ea9420d..acd9b0720b62 100644
--- a/test/SemaCXX/decl-microsoft-call-conv.cpp
+++ b/test/SemaCXX/decl-microsoft-call-conv.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++14 -triple i686-pc-win32 -fms-extensions -verify %s
+// RUN: %clang_cc1 -std=c++14 -triple i686-pc-win32 -fms-extensions -DMSABI -verify %s
 // RUN: %clang_cc1 -std=c++14 -triple i686-pc-mingw32 -verify %s
 // RUN: %clang_cc1 -std=c++14 -triple i686-pc-mingw32 -fms-extensions -verify %s
 
@@ -74,6 +74,11 @@ struct S {
 
   static void            static_member_variadic_default(int x, ...);
   static void __cdecl    static_member_variadic_cdecl(int x, ...);
+
+  // Structors can't be other than default in MS ABI environment
+#ifdef MSABI
+  __vectorcall S(); // expected-warning {{vectorcall calling convention ignored on constructor/destructor}}
+#endif
 };
 
 void __cdecl    S::member_default1() {} // expected-error {{function declared 'cdecl' here was previously declared without calling convention}}
diff --git a/test/SemaCXX/decltype-crash.cpp b/test/SemaCXX/decltype-crash.cpp
index 002bd4cfe737..1cebfcd72c86 100644
--- a/test/SemaCXX/decltype-crash.cpp
+++ b/test/SemaCXX/decltype-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -Wc++11-compat %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wc++11-compat -std=c++98 %s
 
 int& a();
 
diff --git a/test/SemaCXX/default-assignment-operator.cpp b/test/SemaCXX/default-assignment-operator.cpp
index f202b61e109e..e57a898f2f10 100644
--- a/test/SemaCXX/default-assignment-operator.cpp
+++ b/test/SemaCXX/default-assignment-operator.cpp
@@ -1,12 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
 
-class Base { // expected-error {{cannot define the implicit copy assignment operator for 'Base', because non-static reference member 'ref' can't use copy assignment operator}} \
+class Base { // expected-error {{cannot define the implicit copy assignment operator for 'Base', because non-static reference member 'ref' cannot use copy assignment operator}} \
   // expected-warning{{class 'Base' does not declare any constructor to initialize its non-modifiable members}}
   int &ref;  // expected-note {{declared here}} \
   // expected-note{{reference member 'ref' will never be initialized}}
 };
 
-class X  : Base {  // // expected-error {{cannot define the implicit copy assignment operator for 'X', because non-static const member 'cint' can't use copy assignment operator}} \
+class X  : Base {  // // expected-error {{cannot define the implicit copy assignment operator for 'X', because non-static const member 'cint' cannot use copy assignment operator}} \
 // expected-note{{assignment operator for 'Base' first required here}}
 public: 
   X();
@@ -73,7 +73,7 @@ void i() {
 
 // Test5
 
-class E1 { // expected-error{{cannot define the implicit copy assignment operator for 'E1', because non-static const member 'a' can't use copy assignment operator}}
+class E1 { // expected-error{{cannot define the implicit copy assignment operator for 'E1', because non-static const member 'a' cannot use copy assignment operator}}
 
 public:
   const int a; // expected-note{{declared here}}
diff --git a/test/SemaCXX/default1.cpp b/test/SemaCXX/default1.cpp
index 6001001b11eb..fcaa2c839d6c 100644
--- a/test/SemaCXX/default1.cpp
+++ b/test/SemaCXX/default1.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 void f(int i);
 void f(int i = 0); // expected-note {{previous definition is here}}
 void f(int i = 17); // expected-error {{redefinition of default argument}}
@@ -23,7 +26,11 @@ struct X {
 
 void j(X x = 17); // expected-note{{'::j' declared here}}
 
-struct Y { // expected-note 2{{candidate}}
+struct Y { // expected-note 2{{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
   explicit Y(int);
 };
 
diff --git a/test/SemaCXX/deleted-function-access.cpp b/test/SemaCXX/deleted-function-access.cpp
new file mode 100644
index 000000000000..f355e41ae6c4
--- /dev/null
+++ b/test/SemaCXX/deleted-function-access.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
+
+struct S {
+    virtual void f() = delete; //expected-note{{'f' has been explicitly marked deleted here}}
+    void g() { f(); } //expected-error{{attempt to use a deleted function}}
+};
diff --git a/test/SemaCXX/deprecated.cpp b/test/SemaCXX/deprecated.cpp
index 5fcf2130d36f..4ce058968742 100644
--- a/test/SemaCXX/deprecated.cpp
+++ b/test/SemaCXX/deprecated.cpp
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -std=c++98 %s -Wdeprecated -verify -triple x86_64-linux-gnu
 // RUN: %clang_cc1 -std=c++11 %s -Wdeprecated -verify -triple x86_64-linux-gnu
 // RUN: %clang_cc1 -std=c++1y %s -Wdeprecated -verify -triple x86_64-linux-gnu
+// RUN: %clang_cc1 -std=c++1z %s -Wdeprecated -verify -triple x86_64-linux-gnu
 
 // RUN: %clang_cc1 -std=c++1y %s -Wdeprecated -verify -triple x86_64-linux-gnu -Wno-deprecated-register -DNO_DEPRECATED_FLAGS
 
@@ -17,15 +18,29 @@ void h() throw(...);
 
 void stuff() {
   register int n;
-#if __cplusplus >= 201103L && !defined(NO_DEPRECATED_FLAGS)
-  // expected-warning@-2 {{'register' storage class specifier is deprecated}}
+#if __cplusplus > 201402L
+  // expected-error@-2 {{ISO C++1z does not allow 'register' storage class specifier}}
+#elif __cplusplus >= 201103L && !defined(NO_DEPRECATED_FLAGS)
+  // expected-warning@-4 {{'register' storage class specifier is deprecated}}
 #endif
 
   register int m asm("rbx"); // no-warning
 
   int k = to_int(n); // no-warning
   bool b;
-  ++b; // expected-warning {{incrementing expression of type bool is deprecated}}
+  ++b;
+#if __cplusplus > 201402L
+  // expected-error@-2 {{ISO C++1z does not allow incrementing expression of type bool}}
+#else
+  // expected-warning@-4 {{incrementing expression of type bool is deprecated}}
+#endif
+
+  b++;
+#if __cplusplus > 201402L
+  // expected-error@-2 {{ISO C++1z does not allow incrementing expression of type bool}}
+#else
+  // expected-warning@-4 {{incrementing expression of type bool is deprecated}}
+#endif
 
   char *p = "foo";
 #if __cplusplus < 201103L
diff --git a/test/SemaCXX/destructor.cpp b/test/SemaCXX/destructor.cpp
index 60cb0ef09089..e7323f90b3f7 100644
--- a/test/SemaCXX/destructor.cpp
+++ b/test/SemaCXX/destructor.cpp
@@ -217,8 +217,8 @@ class simple_ptr {
 public:
   simple_ptr(T* t): _ptr(t) {}
   ~simple_ptr() { delete _ptr; } // \
-    // expected-warning {{delete called on 'dnvd::B' that has virtual functions but non-virtual destructor}} \
-    // expected-warning {{delete called on 'dnvd::D' that has virtual functions but non-virtual destructor}}
+    // expected-warning {{delete called on non-final 'dnvd::B' that has virtual functions but non-virtual destructor}} \
+    // expected-warning {{delete called on non-final 'dnvd::D' that has virtual functions but non-virtual destructor}}
   T& operator*() const { return *_ptr; }
 private:
   T* _ptr;
@@ -228,7 +228,7 @@ template <typename T>
 class simple_ptr2 {
 public:
   simple_ptr2(T* t): _ptr(t) {}
-  ~simple_ptr2() { delete _ptr; } // expected-warning {{delete called on 'dnvd::B' that has virtual functions but non-virtual destructor}}
+  ~simple_ptr2() { delete _ptr; } // expected-warning {{delete called on non-final 'dnvd::B' that has virtual functions but non-virtual destructor}}
   T& operator*() const { return *_ptr; }
 private:
   T* _ptr;
@@ -314,15 +314,15 @@ void nowarn0() {
 void warn0() {
   {
     B* b = new B();
-    delete b; // expected-warning {{delete called on 'dnvd::B' that has virtual functions but non-virtual destructor}}
+    delete b; // expected-warning {{delete called on non-final 'dnvd::B' that has virtual functions but non-virtual destructor}}
   }
   {
     B* b = new D();
-    delete b; // expected-warning {{delete called on 'dnvd::B' that has virtual functions but non-virtual destructor}}
+    delete b; // expected-warning {{delete called on non-final 'dnvd::B' that has virtual functions but non-virtual destructor}}
   }
   {
     D* d = new D();
-    delete d; // expected-warning {{delete called on 'dnvd::D' that has virtual functions but non-virtual destructor}}
+    delete d; // expected-warning {{delete called on non-final 'dnvd::D' that has virtual functions but non-virtual destructor}}
   }
 }
 
diff --git a/test/SemaCXX/direct-initializer.cpp b/test/SemaCXX/direct-initializer.cpp
index a7899c75e4dc..947622c4e34c 100644
--- a/test/SemaCXX/direct-initializer.cpp
+++ b/test/SemaCXX/direct-initializer.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 int x(1);
 int (x2)(1);
@@ -14,6 +16,10 @@ public: explicit Y(float);
 };
 
 class X { // expected-note{{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
 public:
   explicit X(int); // expected-note{{candidate constructor}}
   X(float, float, float); // expected-note{{candidate constructor}}
@@ -21,6 +27,10 @@ public:
 };
 
 class Z { // expected-note{{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
+
 public:
   Z(int); // expected-note{{candidate constructor}}
 };
diff --git a/test/SemaCXX/dllexport.cpp b/test/SemaCXX/dllexport.cpp
index badb9e259597..a32ba44442d7 100644
--- a/test/SemaCXX/dllexport.cpp
+++ b/test/SemaCXX/dllexport.cpp
@@ -71,6 +71,10 @@ __declspec(dllexport) auto ExternalAutoTypeGlobal = External();
 
 // Thread local variables are invalid.
 __declspec(dllexport) __thread int ThreadLocalGlobal; // expected-error{{'ThreadLocalGlobal' cannot be thread local when declared 'dllexport'}}
+// But a static local TLS var in an export function is OK.
+inline void __declspec(dllexport) ExportedInlineWithThreadLocal() {
+  static __thread int OK; // no-error
+}
 
 // Export in local scope.
 void functionScope() {
@@ -1079,3 +1083,12 @@ template<typename T> template<typename U> __declspec(dllexport) constexpr int CT
 #endif // __has_feature(cxx_variable_templates)
 
 // FIXME: Precedence rules seem to be different for classes.
+
+//===----------------------------------------------------------------------===//
+// Lambdas
+//===----------------------------------------------------------------------===//
+// The MS ABI doesn't provide a stable mangling for lambdas, so they can't be imported or exported.
+#ifdef MS
+// expected-error@+2{{lambda cannot be declared 'dllexport'}}
+#endif
+auto Lambda = []() __declspec(dllexport) -> bool { return true; };
diff --git a/test/SemaCXX/dllimport.cpp b/test/SemaCXX/dllimport.cpp
index 2fa10756da48..5d8ce78f6cdc 100644
--- a/test/SemaCXX/dllimport.cpp
+++ b/test/SemaCXX/dllimport.cpp
@@ -93,6 +93,12 @@ __declspec(dllimport) auto InternalAutoTypeGlobal = Internal(); // expected-erro
 
 // Thread local variables are invalid.
 __declspec(dllimport) __thread int ThreadLocalGlobal; // expected-error{{'ThreadLocalGlobal' cannot be thread local when declared 'dllimport'}}
+// This doesn't work on MinGW, because there, dllimport on the inline function is ignored.
+#ifndef GNU
+inline void __declspec(dllimport) ImportedInlineWithThreadLocal() {
+  static __thread int OK; // no-error
+}
+#endif
 
 // Import in local scope.
 __declspec(dllimport) float LocalRedecl1; // expected-note{{previous declaration is here}}
@@ -1333,3 +1339,14 @@ struct __declspec(dllimport) DerivedFromExplicitlyImportInstantiatedTemplate : p
 template <typename T> struct ExplicitInstantiationDeclTemplateBase { void func() {} };
 extern template struct ExplicitInstantiationDeclTemplateBase<int>;
 struct __declspec(dllimport) DerivedFromExplicitInstantiationDeclTemplateBase : public ExplicitInstantiationDeclTemplateBase<int> {};
+
+//===----------------------------------------------------------------------===//
+// Lambdas
+//===----------------------------------------------------------------------===//
+// The MS ABI doesn't provide a stable mangling for lambdas, so they can't be imported or exported.
+#ifdef MS
+// expected-error@+4{{lambda cannot be declared 'dllimport'}}
+#else
+// expected-warning@+2{{'dllimport' attribute ignored on inline function}}
+#endif
+auto Lambda = []() __declspec(dllimport) -> bool { return true; };
diff --git a/test/SemaCXX/enable_if.cpp b/test/SemaCXX/enable_if.cpp
index 99545e09820e..cd8241808c94 100644
--- a/test/SemaCXX/enable_if.cpp
+++ b/test/SemaCXX/enable_if.cpp
@@ -2,6 +2,8 @@
 
 typedef int (*fp)(int);
 int surrogate(int);
+struct Incomplete;  // expected-note{{forward declaration of 'Incomplete'}} \
+                    // expected-note {{forward declaration of 'Incomplete'}}
 
 struct X {
   X() = default;  // expected-note{{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
@@ -9,24 +11,31 @@ struct X {
   X(bool b) __attribute__((enable_if(b, "chosen when 'b' is true")));  // expected-note{{candidate disabled: chosen when 'b' is true}}
 
   void f(int n) __attribute__((enable_if(n == 0, "chosen when 'n' is zero")));
-  void f(int n) __attribute__((enable_if(n == 1, "chosen when 'n' is one")));  // expected-note{{member declaration nearly matches}} expected-note{{candidate disabled: chosen when 'n' is one}}
+  void f(int n) __attribute__((enable_if(n == 1, "chosen when 'n' is one")));  // expected-note{{member declaration nearly matches}} expected-note 2{{candidate disabled: chosen when 'n' is one}}
+
+  void g(int n) __attribute__((enable_if(n == 0, "chosen when 'n' is zero")));  // expected-note{{candidate disabled: chosen when 'n' is zero}}
+
+  void h(int n, int m = 0) __attribute__((enable_if(m == 0, "chosen when 'm' is zero")));  // expected-note{{candidate disabled: chosen when 'm' is zero}}
 
   static void s(int n) __attribute__((enable_if(n == 0, "chosen when 'n' is zero")));  // expected-note2{{candidate disabled: chosen when 'n' is zero}}
 
   void conflict(int n) __attribute__((enable_if(n+n == 10, "chosen when 'n' is five")));  // expected-note{{candidate function}}
   void conflict(int n) __attribute__((enable_if(n*2 == 10, "chosen when 'n' is five")));  // expected-note{{candidate function}}
 
+  void hidden_by_argument_conversion(Incomplete n, int m = 0) __attribute__((enable_if(m == 10, "chosen when 'm' is ten")));
+  Incomplete hidden_by_incomplete_return_value(int n = 0) __attribute__((enable_if(n == 10, "chosen when 'n' is ten"))); // expected-note{{'hidden_by_incomplete_return_value' declared here}}
+
   operator long() __attribute__((enable_if(true, "chosen on your platform")));
   operator int() __attribute__((enable_if(false, "chosen on other platform")));
 
   operator fp() __attribute__((enable_if(false, "never enabled"))) { return surrogate; }  // expected-note{{conversion candidate of type 'int (*)(int)'}}  // FIXME: the message is not displayed
 };
 
-void X::f(int n) __attribute__((enable_if(n == 0, "chosen when 'n' is zero")))  // expected-note{{member declaration nearly matches}} expected-note{{candidate disabled: chosen when 'n' is zero}}
+void X::f(int n) __attribute__((enable_if(n == 0, "chosen when 'n' is zero")))  // expected-note{{member declaration nearly matches}} expected-note 2{{candidate disabled: chosen when 'n' is zero}}
 {
 }
 
-void X::f(int n) __attribute__((enable_if(n == 2, "chosen when 'n' is two")))  // expected-error{{out-of-line definition of 'f' does not match any declaration in 'X'}} expected-note{{candidate disabled: chosen when 'n' is two}}
+void X::f(int n) __attribute__((enable_if(n == 2, "chosen when 'n' is two")))  // expected-error{{out-of-line definition of 'f' does not match any declaration in 'X'}}
 {
 }
 
@@ -40,6 +49,15 @@ void deprec2(int i) __attribute__((enable_if(old() == 0, "chosen when old() is z
 void overloaded(int);
 void overloaded(long);
 
+struct Int {
+  constexpr Int(int i) : i(i) { }
+  constexpr operator int() const { return i; }
+  int i;
+};
+
+void default_argument(int n, int m = 0) __attribute__((enable_if(m == 0, "chosen when 'm' is zero")));  // expected-note{{candidate disabled: chosen when 'm' is zero}}
+void default_argument_promotion(int n, int m = Int(0)) __attribute__((enable_if(m == 0, "chosen when 'm' is zero")));  // expected-note{{candidate disabled: chosen when 'm' is zero}}
+
 struct Nothing { };
 template<typename T> void typedep(T t) __attribute__((enable_if(t, "")));  // expected-note{{candidate disabled:}}  expected-error{{value of type 'Nothing' is not contextually convertible to 'bool'}}
 template<int N> void valuedep() __attribute__((enable_if(N == 1, "")));
@@ -55,9 +73,15 @@ void test() {
   X x;
   x.f(0);
   x.f(1);
-  x.f(2);  // no error, suppressed by erroneous out-of-line definition
+  x.f(2);  // expected-error{{no matching member function for call to 'f'}}
   x.f(3);  // expected-error{{no matching member function for call to 'f'}}
 
+  x.g(0);
+  x.g(1);  // expected-error{{no matching member function for call to 'g'}}
+
+  x.h(0);
+  x.h(1, 2);  // expected-error{{no matching member function for call to 'h'}}
+
   x.s(0);
   x.s(1);  // expected-error{{no matching member function for call to 's'}}
 
@@ -66,10 +90,19 @@ void test() {
 
   x.conflict(5);  // expected-error{{call to member function 'conflict' is ambiguous}}
 
+  x.hidden_by_argument_conversion(10);  // expected-error{{argument type 'Incomplete' is incomplete}}
+  x.hidden_by_incomplete_return_value(10);  // expected-error{{calling 'hidden_by_incomplete_return_value' with incomplete return type 'Incomplete'}}
+
   deprec2(0);
 
   overloaded(x);
 
+  default_argument(0);
+  default_argument(1, 2);  // expected-error{{no matching function for call to 'default_argument'}}
+
+  default_argument_promotion(0);
+  default_argument_promotion(1, 2);  // expected-error{{no matching function for call to 'default_argument_promotion'}}
+
   int i = x(1);  // expected-error{{no matching function for call to object of type 'X'}}
 
   Nothing n;
@@ -88,6 +121,18 @@ template <class T> void test3() {
   fn3(sizeof(T) == 1);
 }
 
+template <typename T>
+struct Y {
+  T h(int n, int m = 0) __attribute__((enable_if(m == 0, "chosen when 'm' is zero")));  // expected-note{{candidate disabled: chosen when 'm' is zero}}
+};
+
+void test4() {
+  Y<int> y;
+
+  int t0 = y.h(0);
+  int t1 = y.h(1, 2);  // expected-error{{no matching member function for call to 'h'}}
+}
+
 // FIXME: issue an error (without instantiation) because ::h(T()) is not
 // convertible to bool, because return types aren't overloadable.
 void h(int);
@@ -118,3 +163,93 @@ namespace PR20988 {
     fn3(sizeof(T) == 1);
   }
 }
+
+namespace FnPtrs {
+  int ovlFoo(int m) __attribute__((enable_if(m > 0, "")));
+  int ovlFoo(int m);
+
+  void test() {
+    // Assignment gives us a different code path than declarations, and `&foo`
+    // gives us a different code path than `foo`
+    int (*p)(int) = ovlFoo;
+    int (*p2)(int) = &ovlFoo;
+    int (*a)(int);
+    a = ovlFoo;
+    a = &ovlFoo;
+  }
+
+  int ovlBar(int) __attribute__((enable_if(true, "")));
+  int ovlBar(int m) __attribute__((enable_if(false, "")));
+  void test2() {
+    int (*p)(int) = ovlBar;
+    int (*p2)(int) = &ovlBar;
+    int (*a)(int);
+    a = ovlBar;
+    a = &ovlBar;
+  }
+
+  int ovlConflict(int m) __attribute__((enable_if(true, "")));
+  int ovlConflict(int m) __attribute__((enable_if(1, "")));
+  void test3() {
+    int (*p)(int) = ovlConflict; // expected-error{{address of overloaded function 'ovlConflict' is ambiguous}} expected-note@191{{candidate function}} expected-note@192{{candidate function}}
+    int (*p2)(int) = &ovlConflict; // expected-error{{address of overloaded function 'ovlConflict' is ambiguous}} expected-note@191{{candidate function}} expected-note@192{{candidate function}}
+    int (*a)(int);
+    a = ovlConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@191{{candidate function}} expected-note@192{{candidate function}}
+    a = &ovlConflict; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@191{{candidate function}} expected-note@192{{candidate function}}
+  }
+
+  template <typename T>
+  T templated(T m) __attribute__((enable_if(true, ""))) { return T(); }
+  template <typename T>
+  T templated(T m) __attribute__((enable_if(false, ""))) { return T(); }
+  void test4() {
+    int (*p)(int) = templated<int>;
+    int (*p2)(int) = &templated<int>;
+    int (*a)(int);
+    a = templated<int>;
+    a = &templated<int>;
+  }
+
+  template <typename T>
+  T templatedBar(T m) __attribute__((enable_if(m > 0, ""))) { return T(); }
+  void test5() {
+    int (*p)(int) = templatedBar<int>; // expected-error{{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} expected-note@214{{candidate function made ineligible by enable_if}}
+    int (*p2)(int) = &templatedBar<int>; // expected-error{{address of overloaded function 'templatedBar' does not match required type 'int (int)'}} expected-note@214{{candidate function made ineligible by enable_if}}
+    int (*a)(int);
+    a = templatedBar<int>; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@214{{candidate function made ineligible by enable_if}}
+    a = &templatedBar<int>; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@214{{candidate function made ineligible by enable_if}}
+  }
+
+  template <typename T>
+  T templatedConflict(T m) __attribute__((enable_if(false, ""))) { return T(); }
+  template <typename T>
+  T templatedConflict(T m) __attribute__((enable_if(true, ""))) { return T(); }
+  template <typename T>
+  T templatedConflict(T m) __attribute__((enable_if(1, ""))) { return T(); }
+  void test6() {
+    int (*p)(int) = templatedConflict<int>; // expected-error{{address of overloaded function 'templatedConflict' is ambiguous}} expected-note@224{{candidate function made ineligible by enable_if}} expected-note@226{{candidate function}} expected-note@228{{candidate function}}
+    int (*p0)(int) = &templatedConflict<int>; // expected-error{{address of overloaded function 'templatedConflict' is ambiguous}} expected-note@224{{candidate function made ineligible by enable_if}} expected-note@226{{candidate function}} expected-note@228{{candidate function}}
+    int (*a)(int);
+    a = templatedConflict<int>; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@226{{candidate function}} expected-note@228{{candidate function}}
+    a = &templatedConflict<int>; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@226{{candidate function}} expected-note@228{{candidate function}}
+  }
+
+  int ovlNoCandidate(int m) __attribute__((enable_if(false, "")));
+  int ovlNoCandidate(int m) __attribute__((enable_if(0, "")));
+  void test7() {
+    int (*p)(int) = ovlNoCandidate; // expected-error{{address of overloaded function 'ovlNoCandidate' does not match required type}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}}
+    int (*p2)(int) = &ovlNoCandidate; // expected-error{{address of overloaded function 'ovlNoCandidate' does not match required type}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}}
+    int (*a)(int);
+    a = ovlNoCandidate; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}}
+    a = &ovlNoCandidate; // expected-error{{assigning to 'int (*)(int)' from incompatible type '<overloaded function type>'}} expected-note@237{{made ineligible by enable_if}} expected-note@238{{made ineligible by enable_if}}
+  }
+
+  int noOvlNoCandidate(int m) __attribute__((enable_if(false, "")));
+  void test8() {
+    int (*p)(int) = noOvlNoCandidate; // expected-error{{cannot take address of function 'noOvlNoCandidate' becuase it has one or more non-tautological enable_if conditions}}
+    int (*p2)(int) = &noOvlNoCandidate; // expected-error{{cannot take address of function 'noOvlNoCandidate' becuase it has one or more non-tautological enable_if conditions}}
+    int (*a)(int);
+    a = noOvlNoCandidate; // expected-error{{cannot take address of function 'noOvlNoCandidate' becuase it has one or more non-tautological enable_if conditions}}
+    a = &noOvlNoCandidate; // expected-error{{cannot take address of function 'noOvlNoCandidate' becuase it has one or more non-tautological enable_if conditions}}
+  }
+}
diff --git a/test/SemaCXX/enum.cpp b/test/SemaCXX/enum.cpp
index 370e1c34d29c..6b0824b75144 100644
--- a/test/SemaCXX/enum.cpp
+++ b/test/SemaCXX/enum.cpp
@@ -1,4 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -pedantic -std=c++98 -verify -triple x86_64-apple-darwin %s
+// RUN: %clang_cc1 -fsyntax-only -pedantic -std=c++11 -verify -triple x86_64-apple-darwin %s
 enum E { // expected-note{{previous definition is here}}
   Val1,
   Val2
@@ -88,10 +89,24 @@ typedef enum { }; // expected-warning{{typedef requires a name}}
 
 // PR7921
 enum PR7921E {
-    PR7921V = (PR7921E)(123) // expected-error {{expression is not an integral constant expression}}
+    PR7921V = (PR7921E)(123)
+#if __cplusplus < 201103L
+// expected-error@-2 {{expression is not an integral constant expression}}
+#else
+// expected-error@-4 {{must have integral or unscoped enumeration type}}
+// FIXME: The above diagnostic isn't very good; we should instead complain about the type being incomplete.
+#endif
 };
 
 void PR8089() {
   enum E; // expected-error{{ISO C++ forbids forward references to 'enum' types}}
   int a = (E)3; // expected-error{{cannot initialize a variable of type 'int' with an rvalue of type 'E'}}
 }
+
+// This is accepted as a GNU extension. In C++98, there was no provision for
+// expressions with UB to be non-constant.
+enum { overflow = 123456 * 234567 };
+#if __cplusplus >= 201103L
+// expected-warning@-2 {{not an integral constant expression}}
+// expected-note@-3 {{value 28958703552 is outside the range of representable values}}
+#endif
diff --git a/test/SemaCXX/err_typecheck_assign_const.cpp b/test/SemaCXX/err_typecheck_assign_const.cpp
index 376b6e649159..7e2812565144 100644
--- a/test/SemaCXX/err_typecheck_assign_const.cpp
+++ b/test/SemaCXX/err_typecheck_assign_const.cpp
@@ -122,3 +122,10 @@ void test12(H h) {
   h.a = 1;  // expected-error {{cannot assign to non-static data member 'a' with const-qualified type 'const int'}}
   h.b = 2;  // expected-error {{cannot assign to non-static data member 'b' with const-qualified type 'const int &'}}
 }
+
+void test() {
+  typedef const int &Func();
+
+  Func &bar();
+  bar()() = 0; // expected-error {{read-only variable is not assignable}}
+}
diff --git a/test/SemaCXX/exception-spec.cpp b/test/SemaCXX/exception-spec.cpp
new file mode 100644
index 000000000000..f301a63503dd
--- /dev/null
+++ b/test/SemaCXX/exception-spec.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -fcxx-exceptions -std=c++11 %s
+
+namespace MissingOnTemplate {
+  template<typename T> void foo(T) noexcept(true); // expected-note {{previous}}
+  template<typename T> void foo(T); // expected-error {{missing exception specification 'noexcept(true)'}}
+  void test() { foo(0); }
+}
diff --git a/test/SemaCXX/expressions.cpp b/test/SemaCXX/expressions.cpp
index 1a50c99c5904..5a0d6dd0670c 100644
--- a/test/SemaCXX/expressions.cpp
+++ b/test/SemaCXX/expressions.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wno-constant-conversion %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-constant-conversion -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wno-constant-conversion -std=c++11 %s
 
 void choice(int);
 int choice(bool);
@@ -12,6 +14,9 @@ void test() {
 void f0() {
   extern void f0_1(int*);
   register int x;
+#if __cplusplus >= 201103L // C++11 or later
+  // expected-warning@-2 {{'register' storage class specifier is deprecated}}
+#endif
   f0_1(&x);
 }
 
diff --git a/test/SemaCXX/gnu-flags.cpp b/test/SemaCXX/gnu-flags.cpp
index 05770c53704e..3cd18cabe970 100644
--- a/test/SemaCXX/gnu-flags.cpp
+++ b/test/SemaCXX/gnu-flags.cpp
@@ -1,13 +1,37 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -DNONE -Wno-gnu
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -DNONE -Wno-gnu
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -DNONE -Wno-gnu
+
 // RUN: %clang_cc1 -fsyntax-only -verify %s -DALL -Wgnu 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -DALL -Wgnu 
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -DALL -Wgnu 
+
 // RUN: %clang_cc1 -fsyntax-only -verify %s -DALL -Wno-gnu \
 // RUN:   -Wgnu-anonymous-struct -Wredeclared-class-member \
 // RUN:   -Wgnu-flexible-array-union-member -Wgnu-folding-constant \
 // RUN:   -Wgnu-empty-struct
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -DALL -Wno-gnu \
+// RUN:   -Wgnu-anonymous-struct -Wredeclared-class-member \
+// RUN:   -Wgnu-flexible-array-union-member -Wgnu-folding-constant \
+// RUN:   -Wgnu-empty-struct
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -DALL -Wno-gnu \
+// RUN:   -Wgnu-anonymous-struct -Wredeclared-class-member \
+// RUN:   -Wgnu-flexible-array-union-member -Wgnu-folding-constant \
+// RUN:   -Wgnu-empty-struct
+
 // RUN: %clang_cc1 -fsyntax-only -verify %s -DNONE -Wgnu \
 // RUN:   -Wno-gnu-anonymous-struct -Wno-redeclared-class-member \
 // RUN:   -Wno-gnu-flexible-array-union-member -Wno-gnu-folding-constant \
 // RUN:   -Wno-gnu-empty-struct
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -DNONE -Wgnu \
+// RUN:   -Wno-gnu-anonymous-struct -Wno-redeclared-class-member \
+// RUN:   -Wno-gnu-flexible-array-union-member -Wno-gnu-folding-constant \
+// RUN:   -Wno-gnu-empty-struct
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -DNONE -Wgnu \
+// RUN:   -Wno-gnu-anonymous-struct -Wno-redeclared-class-member \
+// RUN:   -Wno-gnu-flexible-array-union-member -Wno-gnu-folding-constant \
+// RUN:   -Wno-gnu-empty-struct
+
 // Additional disabled tests:
 // %clang_cc1 -fsyntax-only -verify %s -DANONYMOUSSTRUCT -Wno-gnu -Wgnu-anonymous-struct
 // %clang_cc1 -fsyntax-only -verify %s -DREDECLAREDCLASSMEMBER -Wno-gnu -Wredeclared-class-member
@@ -59,7 +83,7 @@ struct faum {
 };
 
 
-#if ALL || FOLDINGCONSTANT
+#if (ALL || FOLDINGCONSTANT) && (__cplusplus <= 199711L) // C++03 or earlier modes
 // expected-warning@+4 {{in-class initializer for static data member is not a constant expression; folding it to a constant is a GNU extension}}
 #endif
 
diff --git a/test/SemaCXX/init-priority-attr.cpp b/test/SemaCXX/init-priority-attr.cpp
index a2e6df26ba1b..8f31e2fd62d0 100644
--- a/test/SemaCXX/init-priority-attr.cpp
+++ b/test/SemaCXX/init-priority-attr.cpp
@@ -21,7 +21,7 @@ Two foo __attribute__((init_priority(101))) ( 5, 6 );
 
 Two goo __attribute__((init_priority(2,3))) ( 5, 6 ); // expected-error {{'init_priority' attribute takes one argument}}
 
-Two coo[2]  __attribute__((init_priority(3)));	// expected-error {{init_priority attribute requires integer constant between 101 and 65535 inclusive}}
+Two coo[2]  __attribute__((init_priority(3)));	// expected-error {{'init_priority' attribute requires integer constant between 101 and 65535 inclusive}}
 
 Two koo[4]  __attribute__((init_priority(1.13))); // expected-error {{'init_priority' attribute requires an integer constant}}
 
diff --git a/test/SemaCXX/internal_linkage.cpp b/test/SemaCXX/internal_linkage.cpp
new file mode 100644
index 000000000000..d5cc6767392d
--- /dev/null
+++ b/test/SemaCXX/internal_linkage.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+
+int f() __attribute__((internal_linkage));
+
+class A;
+class __attribute__((internal_linkage)) A {
+public:
+  int x __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+  static int y __attribute__((internal_linkage));
+  void f1() __attribute__((internal_linkage));
+  void f2() __attribute__((internal_linkage)) {}
+  static void f3() __attribute__((internal_linkage)) {}
+  void f4(); // expected-note{{previous definition is here}}
+  static int zz; // expected-note{{previous definition is here}}
+  A() __attribute__((internal_linkage)) {}
+  ~A() __attribute__((internal_linkage)) {}
+  A& operator=(const A&) __attribute__((internal_linkage)) { return *this; }
+  struct {
+    int z  __attribute__((internal_linkage)); // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+  };
+};
+
+__attribute__((internal_linkage)) void A::f4() {} // expected-error{{'internal_linkage' attribute does not appear on the first declaration of 'f4'}}
+
+__attribute__((internal_linkage)) int A::zz; // expected-error{{'internal_linkage' attribute does not appear on the first declaration of 'zz'}}
+
+namespace Z __attribute__((internal_linkage)) { // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+}
+
+__attribute__((internal_linkage("foo"))) int g() {} // expected-error{{'internal_linkage' attribute takes no arguments}}
+
+[[clang::internal_linkage]] int h() {}
+
+enum struct __attribute__((internal_linkage)) E { // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+  a = 1,
+  b = 2
+};
+
+int A::y;
+
+void A::f1() {
+}
+
+void g(int a [[clang::internal_linkage]]) { // expected-warning{{'internal_linkage' attribute only applies to variables, functions and classes}}
+  int x [[clang::internal_linkage]]; // expected-warning{{'internal_linkage' attribute on a non-static local variable is ignored}}
+  static int y [[clang::internal_linkage]];
+}
diff --git a/test/SemaCXX/invalid-member-expr.cpp b/test/SemaCXX/invalid-member-expr.cpp
index 87da79a27c0c..172be6b8266d 100644
--- a/test/SemaCXX/invalid-member-expr.cpp
+++ b/test/SemaCXX/invalid-member-expr.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 class X {};
 
@@ -23,9 +25,17 @@ void test2() {
 // PR6327
 namespace test3 {
   template <class A, class B> struct pair {};
+  template <class _E> class initializer_list {};
+  template <typename _Tp> pair<_Tp, _Tp> minmax(initializer_list<_Tp> __l) {};
 
   void test0() {
-    pair<int, int> z = minmax({}); // expected-error {{expected expression}}
+    pair<int, int> z = minmax({});
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-error@-2 {{expected expression}}
+#else
+    // expected-error@-4 {{no matching function for call to 'minmax'}}
+    // expected-note@-8 {{candidate template ignored: couldn't infer template argument '_Tp'}}
+#endif
   }
 
   struct string {
diff --git a/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp b/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
index 774745777c17..f9e0a5c0a1f0 100644
--- a/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
+++ b/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp
@@ -9,7 +9,7 @@ namespace __debug {
 template <class T>
 class vector {
 public:
-  explicit vector() {} // expected-warning{{should not be explicit}}
+  explicit vector() {} // expected-warning 2 {{should not be explicit}}
 };
 }
 }
@@ -19,5 +19,6 @@ public:
 #include __FILE__
 
 struct { int a, b; std::__debug::vector<int> c; } e[] = { {1, 1} }; // expected-note{{used in initialization here}}
-
+// expected-warning@+1 {{expression with side effects has no effect in an unevaluated context}}
+decltype(new std::__debug::vector<int>[1]{}) x; // expected-note{{used in initialization here}}
 #endif
diff --git a/test/SemaCXX/literal-operators.cpp b/test/SemaCXX/literal-operators.cpp
index f4c5c35a2bcb..ba571788b962 100644
--- a/test/SemaCXX/literal-operators.cpp
+++ b/test/SemaCXX/literal-operators.cpp
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++14 %s
 
 #include <stddef.h>
 
 struct tag {
-  void operator "" _tag_bad (const char *); // expected-error {{literal operator 'operator "" _tag_bad' must be in a namespace or global scope}}
+  void operator "" _tag_bad (const char *); // expected-error {{literal operator 'operator""_tag_bad' must be in a namespace or global scope}}
   friend void operator "" _tag_good (const char *);
 };
 
@@ -35,10 +35,14 @@ typedef const char c;
 void operator "" _good (c*);
 
 // Check extra cv-qualifiers
-void operator "" _cv_good (volatile const char *, const size_t); // expected-error {{parameter declaration for literal operator 'operator "" _cv_good' is not valid}}
+void operator "" _cv_good (volatile const char *, const size_t); // expected-error {{parameter declaration for literal operator 'operator""_cv_good' is not valid}}
 
 // Template declaration
 template <char...> void operator "" _good ();
 
 // FIXME: Test some invalid decls that might crop up.
-template <typename...> void operator "" _invalid(); // expected-error {{parameter declaration for literal operator 'operator "" _invalid' is not valid}}
+template <typename...> void operator "" _invalid(); // expected-error {{parameter declaration for literal operator 'operator""_invalid' is not valid}}
+
+_Complex float operator""if(long double); // expected-warning {{reserved}}
+_Complex float test_if_1() { return 2.0f + 1.5if; };
+void test_if_2() { "foo"if; } // expected-error {{no matching literal operator for call to 'operator""if'}}
diff --git a/test/SemaCXX/make_integer_seq.cpp b/test/SemaCXX/make_integer_seq.cpp
new file mode 100644
index 000000000000..4e15414cbe67
--- /dev/null
+++ b/test/SemaCXX/make_integer_seq.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
+static_assert(__has_builtin(__make_integer_seq), "");
+
+template <class T, T... I>
+struct Seq {
+  static constexpr T PackSize = sizeof...(I);
+};
+
+template <typename T, T N>
+using MakeSeq = __make_integer_seq<Seq, T, N>;
+
+static_assert(__is_same(MakeSeq<int, 0>, Seq<int>), "");
+static_assert(__is_same(MakeSeq<int, 1>, Seq<int, 0>), "");
+static_assert(__is_same(MakeSeq<int, 2>, Seq<int, 0, 1>), "");
+static_assert(__is_same(MakeSeq<int, 3>, Seq<int, 0, 1, 2>), "");
+static_assert(__is_same(MakeSeq<int, 4>, Seq<int, 0, 1, 2, 3>), "");
+
+static_assert(__is_same(MakeSeq<unsigned int, 0U>, Seq<unsigned int>), "");
+static_assert(__is_same(MakeSeq<unsigned int, 1U>, Seq<unsigned int, 0U>), "");
+static_assert(__is_same(MakeSeq<unsigned int, 2U>, Seq<unsigned int, 0U, 1U>), "");
+static_assert(__is_same(MakeSeq<unsigned int, 3U>, Seq<unsigned int, 0U, 1U, 2U>), "");
+static_assert(__is_same(MakeSeq<unsigned int, 4U>, Seq<unsigned int, 0U, 1U, 2U, 3U>), "");
+
+static_assert(__is_same(MakeSeq<long long, 0LL>, Seq<long long>), "");
+static_assert(__is_same(MakeSeq<long long, 1LL>, Seq<long long, 0LL>), "");
+static_assert(__is_same(MakeSeq<long long, 2LL>, Seq<long long, 0LL, 1LL>), "");
+static_assert(__is_same(MakeSeq<long long, 3LL>, Seq<long long, 0LL, 1LL, 2LL>), "");
+static_assert(__is_same(MakeSeq<long long, 4LL>, Seq<long long, 0LL, 1LL, 2LL, 3LL>), "");
+
+static_assert(__is_same(MakeSeq<unsigned long long, 0ULL>, Seq<unsigned long long>), "");
+static_assert(__is_same(MakeSeq<unsigned long long, 1ULL>, Seq<unsigned long long, 0ULL>), "");
+static_assert(__is_same(MakeSeq<unsigned long long, 2ULL>, Seq<unsigned long long, 0ULL, 1ULL>), "");
+static_assert(__is_same(MakeSeq<unsigned long long, 3ULL>, Seq<unsigned long long, 0ULL, 1ULL, 2ULL>), "");
+static_assert(__is_same(MakeSeq<unsigned long long, 4ULL>, Seq<unsigned long long, 0ULL, 1ULL, 2ULL, 3ULL>), "");
+
+template <typename T, T N>
+using ErrorSeq = __make_integer_seq<Seq, T, N>; // expected-error{{must have non-negative sequence length}} \
+                                                   expected-error{{must have integral element type}}
+
+enum Color : int { Red,
+                   Green,
+                   Blue };
+using illformed1 = ErrorSeq<Color, Blue>; // expected-note{{in instantiation}}
+
+using illformed2 = ErrorSeq<int, -5>;
+
+template <typename T, T N> void f() {}
+__make_integer_seq<f, int, 0> x; // expected-error{{template template parameter must be a class template or type alias template}}
diff --git a/test/SemaCXX/many-template-parameter-lists.cpp b/test/SemaCXX/many-template-parameter-lists.cpp
new file mode 100644
index 000000000000..f98005c7e6fb
--- /dev/null
+++ b/test/SemaCXX/many-template-parameter-lists.cpp
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// This is not well-formed C++ but used to crash in sema.
+
+template <class T>
+struct X {
+  template <class U>
+  struct A { // expected-note {{not-yet-instantiated member is declared here}}
+    template <class V>
+    struct B {
+      template <class W>
+      struct C {
+        template <class X>
+        struct D {
+          template <class Y>
+          struct E {
+            template <class Z>
+            void operator+=(Z);
+          };
+        };
+      };
+    };
+  };
+
+  template <class U>
+  template <class V>
+  template <class W>
+  template <class X>
+  template <class Y>
+  template <class Z>
+  friend void A<U>::template B<V>::template C<W>::template D<X>::template E<Y>::operator+=(Z); // expected-warning {{not supported}} expected-error {{no member 'A' in 'X<int>'; it has not yet been instantiated}}
+};
+
+void test() {
+  X<int>::A<int>::B<int>::C<int>::D<int>::E<int>() += 1.0; // expected-note {{in instantiation of template class 'X<int>' requested here}}
+}
diff --git a/test/SemaCXX/member-expr.cpp b/test/SemaCXX/member-expr.cpp
index 5b3393efc0cc..3571fa748b81 100644
--- a/test/SemaCXX/member-expr.cpp
+++ b/test/SemaCXX/member-expr.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 class X{
 public:
@@ -116,8 +118,10 @@ namespace rdar8231724 {
   void f(Y *y) {
     y->N::X1<int>; // expected-error{{'rdar8231724::N::X1' is not a member of class 'rdar8231724::Y'}}
     y->Z<int>::n; // expected-error{{'rdar8231724::Z<int>::n' is not a member of class 'rdar8231724::Y'}}
-    y->template Z<int>::n; // expected-error{{'rdar8231724::Z<int>::n' is not a member of class 'rdar8231724::Y'}} \
-    // expected-warning{{'template' keyword outside of a template}}
+    y->template Z<int>::n; // expected-error{{'rdar8231724::Z<int>::n' is not a member of class 'rdar8231724::Y'}}
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-warning@-2{{'template' keyword outside of a template}}
+#endif
   }
 }
 
diff --git a/test/SemaCXX/member-pointer.cpp b/test/SemaCXX/member-pointer.cpp
index afb7455921e7..f3adb95977a1 100644
--- a/test/SemaCXX/member-pointer.cpp
+++ b/test/SemaCXX/member-pointer.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 struct A {};
 enum B { Dummy };
@@ -14,8 +16,11 @@ int (::A::*pdi2);
 int (A::*pfi)(int);
 void (*A::*ppfie)() throw(); // expected-error {{exception specifications are not allowed beyond a single level of indirection}}
 
-int B::*pbi; // expected-warning{{use of enumeration in a nested name specifier is a C++11 extension}} \
-             // expected-error {{'pbi' does not point into a class}}
+int B::*pbi;
+#if __cplusplus <= 199711L // C++03 or earlier modes
+// expected-warning@-2 {{use of enumeration in a nested name specifier is a C++11 extension}}
+#endif
+// expected-error@-4 {{'pbi' does not point into a class}}
 int C::*pci; // expected-error {{'pci' does not point into a class}}
 void A::*pdv; // expected-error {{'pdv' declared as a member pointer to void}}
 int& A::*pdr; // expected-error {{'pdr' declared as a member pointer to a reference}}
diff --git a/test/SemaCXX/microsoft-super.cpp b/test/SemaCXX/microsoft-super.cpp
new file mode 100644
index 000000000000..bfa9d17dbc21
--- /dev/null
+++ b/test/SemaCXX/microsoft-super.cpp
@@ -0,0 +1,49 @@
+// RUN: %clang_cc1 -fms-extensions -verify %s
+
+// rdar://22464808
+
+namespace test0 {
+  class A {
+  private:
+    void foo(int*);
+  public:
+    void foo(long*);
+  };
+  class B : public A {
+    void test() {
+      __super::foo((long*) 0);
+    }
+  };
+}
+
+namespace test1 {
+  struct A {
+    static void foo(); // expected-note {{member is declared here}}
+  };
+  struct B : private A { // expected-note {{constrained by private inheritance here}}
+    void test() {
+      __super::foo();
+    }
+  };
+  struct C : public B {
+    void test() {
+      __super::foo(); // expected-error {{'foo' is a private member of 'test1::A'}}
+    }
+  };
+}
+
+namespace test2 {
+  struct A {
+    static void foo();
+  };
+  struct B : public A {
+    void test() {
+      __super::foo();
+    }
+  };
+  struct C : private B {
+    void test() {
+      __super::foo();
+    }
+  };
+}
diff --git a/test/SemaCXX/ms-inline-asm.cpp b/test/SemaCXX/ms-inline-asm.cpp
new file mode 100644
index 000000000000..3e445b80f887
--- /dev/null
+++ b/test/SemaCXX/ms-inline-asm.cpp
@@ -0,0 +1,54 @@
+// REQUIRES: x86-registered-target
+// RUN: %clang_cc1 %s -triple i386-apple-darwin10 -fasm-blocks -verify
+
+struct A {
+  int a1;
+  int a2;
+  struct B {
+    int b1;
+    int b2;
+    enum { kValue = 42 };
+  } a3;
+  struct {
+    int indirect_field;
+  };
+};
+
+namespace asdf {
+A a_global;
+}
+
+// The parser combines adjacent __asm blocks into one. Avoid that by calling
+// this.
+void split_inline_asm_call();
+
+void test_field_lookup() {
+  __asm mov eax, asdf::a_global.a3.b2
+  split_inline_asm_call();
+
+  // FIXME: These diagnostics are crap.
+
+  // expected-error@+1 {{undeclared label}}
+  __asm mov eax, asdf::a_global.not_a_field.b2
+  split_inline_asm_call();
+
+  // expected-error@+1 {{undeclared label}}
+  __asm mov eax, asdf::a_global.a3.not_a_field
+  split_inline_asm_call();
+
+  __asm mov eax, A::B::kValue
+  split_inline_asm_call();
+
+  // expected-error@+1 {{undeclared label}}
+  __asm mov eax, asdf::a_global.a3.kValue
+  split_inline_asm_call();
+
+  __asm mov eax, asdf :: a_global.a3.b2
+  split_inline_asm_call();
+
+  __asm mov eax, asdf::a_global . a3 . b2
+  split_inline_asm_call();
+
+  __asm mov eax, asdf::a_global.indirect_field
+  split_inline_asm_call();
+}
diff --git a/test/SemaCXX/ms-interface.cpp b/test/SemaCXX/ms-interface.cpp
index e7386ce5b8e7..4a1c13ddcbba 100644
--- a/test/SemaCXX/ms-interface.cpp
+++ b/test/SemaCXX/ms-interface.cpp
@@ -58,10 +58,12 @@ __interface I6 : X {
 struct S { };
 class C { };
 __interface I { };
+union U;
 
 static_assert(!__is_interface_class(S), "oops");
 static_assert(!__is_interface_class(C), "oops");
-static_assert(__is_interface_class(I), "oops");
+static_assert(!__is_interface_class(I), "oops");
+static_assert(!__is_interface_class(U), "oops");
 
 // expected-error@55 {{interface type cannot inherit from 'struct S'}}
 // expected-note@+1 {{in instantiation of template class 'I6<S>' requested here}}
diff --git a/test/SemaCXX/ms-novtable.cpp b/test/SemaCXX/ms-novtable.cpp
index 2d55c48df3c7..31975b3b4a84 100644
--- a/test/SemaCXX/ms-novtable.cpp
+++ b/test/SemaCXX/ms-novtable.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -fsyntax-only -verify -fms-extensions -Wno-microsoft -std=c++11
+// RUN: %clang_cc1 -triple i386-pc-win32 %s -fsyntax-only -verify -fms-extensions -Wno-microsoft -std=c++11
 
 struct __declspec(novtable) S {};
 enum __declspec(novtable) E {}; // expected-warning{{'novtable' attribute only applies to classes}}
diff --git a/test/SemaCXX/ms-property-error.cpp b/test/SemaCXX/ms-property-error.cpp
new file mode 100644
index 000000000000..ca52538f3a67
--- /dev/null
+++ b/test/SemaCXX/ms-property-error.cpp
@@ -0,0 +1,37 @@
+// RUN: %clang_cc1 -verify -fms-compatibility %s -fsyntax-only -o -
+
+class S {
+public:
+  __declspec(property(get=GetX,put=PutX)) int x[];
+  int GetX(int i, int j) { return i+j; } // expected-note {{'GetX' declared here}}
+  void PutX(int i, int j, int k) { j = i = k; } // expected-note {{'PutX' declared here}}
+};
+
+char *ptr;
+template <typename T>
+class St {
+public:
+  __declspec(property(get=GetX,put=PutX)) T x[];
+  T GetX(T i, T j) { return i+j; } // expected-note 3 {{'GetX' declared here}}
+  T PutX(T i, T j, T k) { return j = i = k; }  // expected-note 2 {{'PutX' declared here}}
+  ~St() {
+    x[1] = 0; // expected-error {{too few arguments to function call, expected 3, have 2}}
+    x[2][3] = 4;
+    ++x[2][3];
+    x[1][2] = x[3][4][5]; // expected-error {{too many arguments to function call, expected 2, have 3}}
+    ptr = x[1][2] = x[3][4]; // expected-error {{assigning to 'char *' from incompatible type 'int'}}
+  }
+};
+
+// CHECK-LABEL: main
+int main(int argc, char **argv) {
+  S *p1 = 0;
+  St<float> *p2 = 0;
+  St<int> a; // expected-note {{in instantiation of member function 'St<int>::~St' requested here}}
+  int j = (p1->x)[223][11][2]; // expected-error {{too many arguments to function call, expected 2, have 3}}
+  (p1->x[23]) = argc; // expected-error {{too few arguments to function call, expected 3, have 2}}
+  float j1 = (p2->x); // expected-error {{too few arguments to function call, expected 2, have 0}}
+  ((p2->x)[23])[1][2] = *argv; // expected-error {{too many arguments to function call, expected 3, have 4}}
+  argv = p2->x[11][22] = argc; // expected-error {{assigning to 'char **' from incompatible type 'float'}}
+  return ++(((p2->x)[23])); // expected-error {{too few arguments to function call, expected 2, have 1}}
+}
diff --git a/test/SemaCXX/ms-property.cpp b/test/SemaCXX/ms-property.cpp
new file mode 100644
index 000000000000..d33dbf0a0ded
--- /dev/null
+++ b/test/SemaCXX/ms-property.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -ast-print -verify -triple=x86_64-pc-win32 -fms-compatibility %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fms-compatibility -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple=x86_64-pc-win32 -fms-compatibility -include-pch %t -verify %s -ast-print -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+class Test1 {
+private:
+  int x_;
+
+public:
+  Test1(int x) : x_(x) {}
+  __declspec(property(get = get_x)) int X;
+  int get_x() const { return x_; }
+  static Test1 *GetTest1() { return new Test1(10); }
+};
+
+class S {
+public:
+  __declspec(property(get=GetX,put=PutX)) int x[];
+  int GetX(int i, int j) { return i+j; }
+  void PutX(int i, int j, int k) { j = i = k; }
+};
+
+template <typename T>
+class St {
+public:
+  __declspec(property(get=GetX,put=PutX)) T x[];
+  T GetX(T i, T j) { return i+j; }
+  T PutX(T i, T j, T k) { return j = i = k; }
+  ~St() { x[0][0] = x[1][1]; }
+};
+
+// CHECK: this->x[0][0] = this->x[1][1];
+// CHECK: this->x[0][0] = this->x[1][1];
+
+// CHECK-LABEL: main
+int main(int argc, char **argv) {
+  S *p1 = 0;
+  St<float> *p2 = 0;
+  // CHECK: St<int> a;
+  St<int> a;
+  // CHECK-NEXT: int j = (p1->x)[223][11];
+  int j = (p1->x)[223][11];
+  // CHECK-NEXT: (p1->x[23])[1] = j;
+  (p1->x[23])[1] = j;
+  // CHECK-NEXT: float j1 = (p2->x[223][11]);
+  float j1 = (p2->x[223][11]);
+  // CHECK-NEXT: ((p2->x)[23])[1] = j1;
+  ((p2->x)[23])[1] = j1;
+  // CHECK-NEXT: ++(((p2->x)[23])[1]);
+  ++(((p2->x)[23])[1]);
+  // CHECK-NEXT: j1 = ((p2->x)[23])[1] = j1;
+  j1 = ((p2->x)[23])[1] = j1;
+  // CHECK-NEXT: return Test1::GetTest1()->X;
+  return Test1::GetTest1()->X;
+}
+#endif // HEADER
diff --git a/test/SemaCXX/ms-unsupported.cpp b/test/SemaCXX/ms-unsupported.cpp
new file mode 100644
index 000000000000..559d703f3673
--- /dev/null
+++ b/test/SemaCXX/ms-unsupported.cpp
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -triple x86_64-pc-windows-gnu %s -fsyntax-only -verify -fms-extensions -Wno-microsoft -std=c++11
+
+// "novtable" is ignored except with the Microsoft C++ ABI.
+// MinGW uses the Itanium C++ ABI so check that it is ignored there.
+struct __declspec(novtable) S {}; // expected-warning{{__declspec attribute 'novtable' is not supported}}
diff --git a/test/SemaCXX/ms_integer_suffix.cpp b/test/SemaCXX/ms_integer_suffix.cpp
index d65e7f463432..aa2f13099d3b 100644
--- a/test/SemaCXX/ms_integer_suffix.cpp
+++ b/test/SemaCXX/ms_integer_suffix.cpp
@@ -18,6 +18,3 @@ static_assert(sizeof(0i32) == __SIZEOF_INT32__, "");
 #ifdef __SIZEOF_INT64__
 static_assert(sizeof(0i64) == __SIZEOF_INT64__, "");
 #endif
-#ifdef __SIZEOF_INT128__
-static_assert(sizeof(0i128) == __SIZEOF_INT128__, "");
-#endif
diff --git a/test/SemaCXX/ms_struct.cpp b/test/SemaCXX/ms_struct.cpp
index 2832b5620f3b..414b56b491c6 100644
--- a/test/SemaCXX/ms_struct.cpp
+++ b/test/SemaCXX/ms_struct.cpp
@@ -11,9 +11,9 @@ struct A {
 
 struct B : public A {
 #ifdef TEST_FOR_ERROR
-  // expected-error@-2 {{ms_struct may not produce MSVC-compatible layouts for classes with base classes or virtual functions}}
+  // expected-error@-2 {{ms_struct may not produce Microsoft-compatible layouts for classes with base classes or virtual functions}}
 #else
-  // expected-warning@-4 {{ms_struct may not produce MSVC-compatible layouts for classes with base classes or virtual functions}}
+  // expected-warning@-4 {{ms_struct may not produce Microsoft-compatible layouts for classes with base classes or virtual functions}}
 #endif
   unsigned long c:16;
 	int d;
@@ -26,9 +26,9 @@ static_assert(__builtin_offsetof(B, d) == 12,
 // rdar://16178895
 struct C {
 #ifdef TEST_FOR_ERROR
-  // expected-error@-2 {{ms_struct may not produce MSVC-compatible layouts for classes with base classes or virtual functions}}
+  // expected-error@-2 {{ms_struct may not produce Microsoft-compatible layouts for classes with base classes or virtual functions}}
 #else
-  // expected-warning@-4 {{ms_struct may not produce MSVC-compatible layouts for classes with base classes or virtual functions}}
+  // expected-warning@-4 {{ms_struct may not produce Microsoft-compatible layouts for classes with base classes or virtual functions}}
 #endif
   virtual void foo();
   long long n;
diff --git a/test/SemaCXX/ms_wide_bitfield.cpp b/test/SemaCXX/ms_wide_bitfield.cpp
index d917390ef14c..b634e78c70ce 100644
--- a/test/SemaCXX/ms_wide_bitfield.cpp
+++ b/test/SemaCXX/ms_wide_bitfield.cpp
@@ -1,9 +1,10 @@
 // RUN: %clang_cc1 -fno-rtti -emit-llvm-only -triple i686-pc-win32 -fdump-record-layouts -fsyntax-only -mms-bitfields -verify %s 2>&1
 
 struct A {
-  char a : 9; // expected-error{{size of bit-field 'a' (9 bits) exceeds size of its type (8 bits)}}
-  int b : 33; // expected-error{{size of bit-field 'b' (33 bits) exceeds size of its type (32 bits)}}
-  bool c : 9; // expected-error{{size of bit-field 'c' (9 bits) exceeds size of its type (8 bits)}}
+  char a : 9; // expected-error{{width of bit-field 'a' (9 bits) exceeds size of its type (8 bits)}}
+  int b : 33; // expected-error{{width of bit-field 'b' (33 bits) exceeds size of its type (32 bits)}}
+  bool c : 9; // expected-error{{width of bit-field 'c' (9 bits) exceeds size of its type (8 bits)}}
+  bool d : 3;
 };
 
 int a[sizeof(A) == 1 ? 1 : -1];
diff --git a/test/SemaCXX/namespace-alias.cpp b/test/SemaCXX/namespace-alias.cpp
index 63615ecbd352..281ee9962e8b 100644
--- a/test/SemaCXX/namespace-alias.cpp
+++ b/test/SemaCXX/namespace-alias.cpp
@@ -125,3 +125,46 @@ namespace PR7014 {
 
   namespace Y = X::Y;
 }
+
+namespace PR25731 {
+  void f() {
+    namespace X = PR25731;
+    namespace X = PR25731;
+    X::f();
+  }
+}
+
+namespace MultipleUnambiguousLookupResults {
+  namespace A { int y; }
+  namespace B {
+    namespace X { int x; }
+    namespace Y = A;
+    namespace Z = A; // expected-note {{candidate}}
+  }
+  namespace C {
+    namespace X = B::X;
+    namespace Y = A;
+    namespace Z = X; // expected-note {{candidate}}
+  }
+  using namespace B;
+  using namespace C;
+  int x1 = X::x; // ok, unambiguous
+  int y1 = Y::y; // ok, unambiguous
+  int z1 = Z::x; // expected-error {{ambiguous}}
+
+  namespace X = C::X;
+  namespace Y = A;
+  int x2 = X::x; // ok, unambiguous
+  int y2 = Y::y; // ok, unambiguous
+}
+
+namespace RedeclOfNonNamespace {
+  int a; // expected-note {{previous}}
+  namespace X { int b; }
+  using X::b; // expected-note {{previous}}
+  namespace c {} // expected-note {{previous}}
+
+  namespace a = X; // expected-error {{different kind}}
+  namespace b = X; // expected-error {{different kind}}
+  namespace c = X; // expected-error-re {{redefinition of 'c'{{$}}}}
+}
diff --git a/test/SemaCXX/namespace.cpp b/test/SemaCXX/namespace.cpp
index d47b7076cec2..e2c1516ac3aa 100644
--- a/test/SemaCXX/namespace.cpp
+++ b/test/SemaCXX/namespace.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s 
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 namespace A { // expected-note 2 {{previous definition is here}}
   int A;
   void f() { A = 0; }
@@ -8,8 +11,11 @@ void f() { A = 0; } // expected-error {{unexpected namespace name 'A': expected
 int A; // expected-error {{redefinition of 'A' as different kind of symbol}}
 class A; // expected-error {{redefinition of 'A' as different kind of symbol}}
 
-class B {}; // expected-note {{previous definition is here}} \
-            // expected-note{{candidate function (the implicit copy assignment operator)}}
+class B {}; // expected-note {{previous definition is here}}
+// expected-note@-1 {{candidate function (the implicit copy assignment operator) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
 
 void C(); // expected-note {{previous definition is here}}
 namespace C {} // expected-error {{redefinition of 'C' as different kind of symbol}}
diff --git a/test/SemaCXX/new-array-size-conv.cpp b/test/SemaCXX/new-array-size-conv.cpp
index c987c2820adf..dbdd4bd8951e 100644
--- a/test/SemaCXX/new-array-size-conv.cpp
+++ b/test/SemaCXX/new-array-size-conv.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -pedantic -verify %s
+// RUN: %clang_cc1 -fsyntax-only -pedantic -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -pedantic -verify -std=c++11 %s
 
 struct ValueInt
 {
@@ -20,8 +22,15 @@ struct TwoValueInts : ValueInt, IndirectValueInt { }; // expected-warning{{direc
 
 
 void test() {
-  (void)new int[ValueInt(10)]; // expected-warning{{implicit conversion from array size expression of type 'ValueInt' to integral type 'int' is a C++11 extension}}
-  (void)new int[ValueEnum()]; // expected-warning{{implicit conversion from array size expression of type 'ValueEnum' to enumeration type 'E' is a C++11 extension}}
+  (void)new int[ValueInt(10)];
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{implicit conversion from array size expression of type 'ValueInt' to integral type 'int' is a C++11 extension}}
+#endif
+
+  (void)new int[ValueEnum()];
+#if __cplusplus <= 199711L
+// expected-warning@-2{{implicit conversion from array size expression of type 'ValueEnum' to enumeration type 'E' is a C++11 extension}}
+#endif
   (void)new int[ValueBoth()]; // expected-error{{ambiguous conversion of array size expression of type 'ValueBoth' to an integral or enumeration type}}
 
   (void)new int[TwoValueInts()]; // expected-error{{ambiguous conversion of array size expression of type 'TwoValueInts' to an integral or enumeration type}}
diff --git a/test/SemaCXX/nullability.cpp b/test/SemaCXX/nullability.cpp
index edce6b057bd3..c73c01a0a862 100644
--- a/test/SemaCXX/nullability.cpp
+++ b/test/SemaCXX/nullability.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -Wno-nullability-declspec %s -verify
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -Wno-nullability-declspec %s -verify -Wnullable-to-nonnull-conversion
 
 #if __has_feature(nullability)
 #else
@@ -67,3 +67,33 @@ void test_accepts_nonnull_null_pointer_literal_template() {
 }
 
 template void test_accepts_nonnull_null_pointer_literal_template<&accepts_nonnull_4>(); // expected-note{{instantiation of function template specialization}}
+
+void TakeNonnull(void *_Nonnull);
+// Check different forms of assignment to a nonull type from a nullable one.
+void AssignAndInitNonNull() {
+  void *_Nullable nullable;
+  void *_Nonnull p(nullable); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p2{nullable}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p3 = {nullable}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p4 = nullable; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull nonnull;
+  nonnull = nullable; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  nonnull = {nullable}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+
+  TakeNonnull(nullable); //expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull}}
+  TakeNonnull(nonnull); // OK
+}
+
+void *_Nullable ReturnNullable();
+
+void AssignAndInitNonNullFromFn() {
+  void *_Nonnull p(ReturnNullable()); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p2{ReturnNullable()}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p3 = {ReturnNullable()}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull p4 = ReturnNullable(); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  void *_Nonnull nonnull;
+  nonnull = ReturnNullable(); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+  nonnull = {ReturnNullable()}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}}
+
+  TakeNonnull(ReturnNullable()); //expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull}}
+}
diff --git a/test/SemaCXX/offsetof.cpp b/test/SemaCXX/offsetof.cpp
index e6069ac9c0a0..c4b288aa05d4 100644
--- a/test/SemaCXX/offsetof.cpp
+++ b/test/SemaCXX/offsetof.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -fsyntax-only -verify %s -Winvalid-offsetof
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10.0.0 -fsyntax-only -verify %s -Winvalid-offsetof -std=c++98
 
 struct NonPOD {
   virtual void f();
diff --git a/test/SemaCXX/overload-call-copycon.cpp b/test/SemaCXX/overload-call-copycon.cpp
index 6720cb695338..84971024acd8 100644
--- a/test/SemaCXX/overload-call-copycon.cpp
+++ b/test/SemaCXX/overload-call-copycon.cpp
@@ -1,6 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -Wnon-pod-varargs
-class X { }; // expected-note {{the implicit copy constructor}} \
-             // expected-note{{the implicit default constructor}}
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -Wnon-pod-varargs
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -Wnon-pod-varargs
+
+class X { }; // expected-note {{the implicit copy constructor}}
+// expected-note@-1 {{the implicit default constructor}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-3 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
 int& copycon(X x); // expected-note{{passing argument to parameter}}
 float& copycon(...);
diff --git a/test/SemaCXX/overload-call.cpp b/test/SemaCXX/overload-call.cpp
index 19ce14481f8e..3d286a94a045 100644
--- a/test/SemaCXX/overload-call.cpp
+++ b/test/SemaCXX/overload-call.cpp
@@ -1,4 +1,7 @@
-// RUN: %clang_cc1 -pedantic -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -pedantic -verify %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -pedantic -verify -std=c++98 %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -pedantic -verify -std=c++11 %s
+
 int* f(int) { return 0; }
 float* f(float) { return 0; }
 void f();
@@ -53,8 +56,19 @@ int* k(char*);
 double* k(bool);
 
 void test_k() {
-  int* ip1 = k("foo"); // expected-warning{{conversion from string literal to 'char *' is deprecated}}
-  int* ip2 = k(("foo")); // expected-warning{{conversion from string literal to 'char *' is deprecated}}
+  int* ip1 = k("foo");
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{conversion from string literal to 'char *' is deprecated}}
+#else
+  // expected-error@-4 {{cannot initialize a variable of type 'int *' with an rvalue of type 'double *'}}
+#endif
+
+  int* ip2 = k(("foo"));
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{conversion from string literal to 'char *' is deprecated}}
+#else
+  // expected-error@-4 {{cannot initialize a variable of type 'int *' with an rvalue of type 'double *'}}
+#endif
   double* dp1 = k(L"foo");
 }
 
@@ -62,7 +76,12 @@ int* l(wchar_t*);
 double* l(bool);
 
 void test_l() {
-  int* ip1 = l(L"foo"); // expected-warning{{conversion from string literal to 'wchar_t *' is deprecated}}
+  int* ip1 = l(L"foo");
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{conversion from string literal to 'wchar_t *' is deprecated}}
+#else
+  // expected-error@-4 {{cannot initialize a variable of type 'int *' with an rvalue of type 'double *'}}
+#endif
   double* dp1 = l("foo");
 }
 
@@ -80,8 +99,12 @@ class E;
 void test_n(E* e) {
   char ca[7];
   int* ip1 = n(ca);
-  int* ip2 = n("foo"); // expected-warning{{conversion from string literal to 'char *' is deprecated}}
-
+  int* ip2 = n("foo");
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{conversion from string literal to 'char *' is deprecated}}
+#else
+  // expected-warning@-4 {{ISO C++11 does not allow conversion from string literal to 'char *'}}
+#endif
   float fa[7];
   double* dp1 = n(fa);
 
@@ -593,8 +616,16 @@ void test5() {
 
 namespace PR20218 {
   void f(void (*const &)()); // expected-note 2{{candidate}}
-  void f(void (&&)()) = delete; // expected-note 2{{candidate}} expected-warning 2{{extension}}
-  void g(void (&&)()) = delete; // expected-note 2{{candidate}} expected-warning 2{{extension}}
+  void f(void (&&)()) = delete; // expected-note 2{{candidate}}
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{rvalue references are a C++11 extension}}
+  // expected-warning@-3 {{deleted function definitions are a C++11 extension}}
+#endif
+  void g(void (&&)()) = delete; // expected-note 2{{candidate}}
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{rvalue references are a C++11 extension}}
+  // expected-warning@-3 {{deleted function definitions are a C++11 extension}}
+#endif
   void g(void (*const &)()); // expected-note 2{{candidate}}
 
   void x();
diff --git a/test/SemaCXX/overloaded-builtin-operators.cpp b/test/SemaCXX/overloaded-builtin-operators.cpp
index 7899403e2ce3..4c2953b0b3fe 100644
--- a/test/SemaCXX/overloaded-builtin-operators.cpp
+++ b/test/SemaCXX/overloaded-builtin-operators.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -fshow-overloads=best -verify -triple x86_64-linux-gnu %s
+// RUN: %clang_cc1 -fsyntax-only -fshow-overloads=best -verify -triple x86_64-linux-gnu -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -fshow-overloads=best -verify -triple x86_64-linux-gnu -std=c++11 %s
 
 struct yes;
 struct no;
@@ -60,7 +62,10 @@ void f(Short s, Long l, Enum1 e1, Enum2 e2, Xpmf pmf) {
   // FIXME: should pass (void)static_cast<no&>(islong(e1 % e2));
 }
 
-struct ShortRef { // expected-note{{candidate function (the implicit copy assignment operator)}}
+struct ShortRef { // expected-note{{candidate function (the implicit copy assignment operator) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   operator short&();
 };
 
@@ -68,7 +73,10 @@ struct LongRef {
   operator volatile long&();
 };
 
-struct XpmfRef { // expected-note{{candidate function (the implicit copy assignment operator)}}
+struct XpmfRef { // expected-note{{candidate function (the implicit copy assignment operator) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   operator pmf&();
 };
 
diff --git a/test/SemaCXX/pass-object-size.cpp b/test/SemaCXX/pass-object-size.cpp
new file mode 100644
index 000000000000..bec0c1c55f29
--- /dev/null
+++ b/test/SemaCXX/pass-object-size.cpp
@@ -0,0 +1,122 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -std=c++11
+
+namespace simple {
+int Foo(void *const p __attribute__((pass_object_size(0))));
+
+int OvlFoo(void *const p __attribute__((pass_object_size(0))));
+int OvlFoo(void *const p, int);
+
+struct Statics {
+  static int Foo(void *const p __attribute__((pass_object_size(0))));
+  static int OvlFoo(void *const p __attribute__((pass_object_size(0))));
+  static int OvlFoo(void *const p __attribute__((pass_object_size(1)))); // expected-error{{conflicting pass_object_size attributes on parameters}} expected-note@-1{{previous declaration is here}}
+  static int OvlFoo(double *p);
+};
+
+struct Members {
+  int Foo(void *const p __attribute__((pass_object_size(0))));
+  int OvlFoo(void *const p __attribute__((pass_object_size(0))));
+  int OvlFoo(void *const p, int);
+};
+
+void Decls() {
+  int (*A)(void *) = &Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  int (*B)(void *) = Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+
+  int (*C)(void *) = &OvlFoo; //expected-error{{address of overloaded function 'OvlFoo' does not match required type 'int (void *)'}} expected-note@6{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@7{{candidate function has different number of parameters (expected 1 but has 2)}}
+  int (*D)(void *) = OvlFoo; //expected-error{{address of overloaded function 'OvlFoo' does not match required type 'int (void *)'}} expected-note@6{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@7{{candidate function has different number of parameters (expected 1 but has 2)}}
+
+  int (*E)(void *) = &Statics::Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  int (*F)(void *) = &Statics::OvlFoo; //expected-error{{address of overloaded function 'OvlFoo' does not match required type 'int (void *)'}} expected-note@11{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@13{{candidate function has type mismatch at 1st parameter (expected 'void *' but has 'double *')}}
+
+  int (*G)(void *) = &Members::Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  int (*H)(void *) = &Members::OvlFoo; //expected-error{{address of overloaded function 'OvlFoo' does not match required type 'int (void *)'}} expected-note@18{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@19{{candidate function has different number of parameters (expected 1 but has 2)}}
+}
+
+void Assigns() {
+  int (*A)(void *);
+  A = &Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  A = Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+
+  A = &OvlFoo; //expected-error{{assigning to 'int (*)(void *)' from incompatible type '<overloaded function type>'}} expected-note@6{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@7{{candidate function has different number of parameters (expected 1 but has 2)}}
+  A = OvlFoo; //expected-error{{assigning to 'int (*)(void *)' from incompatible type '<overloaded function type>'}} expected-note@6{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@7{{candidate function has different number of parameters (expected 1 but has 2)}}
+
+  A = &Statics::Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  A = &Statics::OvlFoo; //expected-error{{assigning to 'int (*)(void *)' from incompatible type '<overloaded function type>'}} expected-note@11{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@13{{candidate function has type mismatch at 1st parameter (expected 'void *' but has 'double *')}}
+
+  int (Members::*M)(void *);
+  M = &Members::Foo; //expected-error{{cannot take address of function 'Foo' because parameter 1 has pass_object_size attribute}}
+  M = &Members::OvlFoo; //expected-error-re{{assigning to '{{.*}}' from incompatible type '<overloaded function type>'}} expected-note@18{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}} expected-note@19{{candidate function has different number of parameters (expected 1 but has 2)}}
+}
+
+} // namespace simple
+
+namespace templates {
+template <typename T>
+int Foo(void *const __attribute__((pass_object_size(0)))) {
+  return 0;
+}
+
+template <typename T> struct Bar {
+  template <typename U>
+  int Foo(void *const __attribute__((pass_object_size(0)))) {
+    return 0;
+  }
+};
+
+void Decls() {
+  int (*A)(void *) = &Foo<void*>; //expected-error{{address of overloaded function 'Foo' does not match required type 'int (void *)'}} expected-note@56{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}}
+  int (Bar<int>::*B)(void *) = &Bar<int>::Foo<double>; //expected-error{{address of overloaded function 'Foo' does not match required type}} expected-note@62{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}}
+}
+
+void Assigns() {
+  int (*A)(void *);
+  A = &Foo<void*>; // expected-error{{assigning to 'int (*)(void *)' from incompatible type '<overloaded function type>'}} expected-note@56{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}}
+  int (Bar<int>::*B)(void *) = &Bar<int>::Foo<double>; //expected-error{{address of overloaded function 'Foo' does not match required type}} expected-note@62{{candidate address cannot be taken because parameter 1 has pass_object_size attribute}}
+}
+} // namespace templates
+
+namespace virt {
+struct Foo {
+  virtual void DoIt(void *const p __attribute__((pass_object_size(0))));
+};
+
+struct Bar : public Foo {
+  void DoIt(void *const p __attribute__((pass_object_size(0)))) override; // OK
+};
+
+struct Baz : public Foo {
+  void DoIt(void *const p) override; //expected-error{{non-virtual member function marked 'override' hides virtual member function}} expected-note@81{{hidden overloaded virtual function 'virt::Foo::DoIt' declared here}}
+};
+}
+
+namespace why {
+void TakeFn(void (*)(int, void *));
+void ObjSize(int, void *const __attribute__((pass_object_size(0))));
+
+void Check() {
+  TakeFn(ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn(&ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn(*ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn(*****ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn(*****&ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+
+  void (*P)(int, void *) = ****ObjSize; //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  P = ****ObjSize; //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+
+  TakeFn((ObjSize)); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn((void*)ObjSize); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+  TakeFn((decltype(P))((void*)ObjSize)); //expected-error{{cannot take address of function 'ObjSize' because parameter 2 has pass_object_size attribute}}
+}
+}
+
+namespace constexpr_support {
+constexpr int getObjSizeType() { return 0; }
+void Foo(void *p __attribute__((pass_object_size(getObjSizeType()))));
+}
+
+namespace lambdas {
+void Bar() {
+  (void)+[](void *const p __attribute__((pass_object_size(0)))) {}; //expected-error-re{{invalid argument type '(lambda at {{.*}})' to unary expression}}
+}
+}
diff --git a/test/SemaCXX/pragma-init_seg.cpp b/test/SemaCXX/pragma-init_seg.cpp
index e18d0e6814af..1b22939f18e3 100644
--- a/test/SemaCXX/pragma-init_seg.cpp
+++ b/test/SemaCXX/pragma-init_seg.cpp
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions %s -triple x86_64-pc-win32
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -std=c++98 %s -triple x86_64-pc-win32
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -std=c++11 %s -triple x86_64-pc-win32
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions %s -triple i386-apple-darwin13.3.0
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -std=c++98 %s -triple i386-apple-darwin13.3.0
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-extensions -std=c++11 %s -triple i386-apple-darwin13.3.0
 
 #ifndef __APPLE__
 #pragma init_seg(L".my_seg") // expected-warning {{expected 'compiler', 'lib', 'user', or a string literal}}
@@ -19,3 +23,6 @@
 
 int f();
 int __declspec(thread) x = f(); // expected-error {{initializer for thread-local variable must be a constant expression}}
+#if __cplusplus >= 201103L
+// expected-note@-2 {{use 'thread_local' to allow this}}
+#endif
diff --git a/test/SemaCXX/pragma-vtordisp.cpp b/test/SemaCXX/pragma-vtordisp.cpp
index 49841c51ef05..649c0ee9e686 100644
--- a/test/SemaCXX/pragma-vtordisp.cpp
+++ b/test/SemaCXX/pragma-vtordisp.cpp
@@ -32,9 +32,41 @@ struct B : virtual A { int b; };
 #pragma vtordisp(), stuff // expected-warning {{extra tokens}}
 
 struct C {
-// FIXME: Our implementation based on token insertion makes it impossible for
-// the pragma to appear everywhere we should support it.
-//#pragma vtordisp()
+#pragma vtordisp()
   struct D : virtual A {
   };
 };
+
+struct E {
+  virtual ~E();
+  virtual void f();
+};
+
+#pragma vtordisp(pop) // expected-warning {{#pragma vtordisp(pop, ...) failed: stack empty}}
+
+void g() {
+  #pragma vtordisp(push, 2)
+  struct F : virtual E {
+    virtual ~F();
+    virtual void f();
+  };
+}
+
+#pragma vtordisp(pop) // OK because of local vtordisp(2) in g().
+
+struct G {
+  void f() {
+    #pragma vtordisp(push, 2) // Method-local pragma - stack will be restored on exit.
+  }
+};
+
+// Stack is restored on exit from G::f(), nothing to pop.
+#pragma vtordisp(pop) // expected-warning {{#pragma vtordisp(pop, ...) failed: stack empty}}
+
+int g2()
+// FIXME: Our implementation based on token insertion makes it impossible for
+// the pragma to appear everywhere we should support it.
+// #pragma vtordisp()
+{
+  return 0;
+}
diff --git a/test/SemaCXX/printf-block.cpp b/test/SemaCXX/printf-block.cpp
index 4a580037e731..b9d06d6c4fe7 100644
--- a/test/SemaCXX/printf-block.cpp
+++ b/test/SemaCXX/printf-block.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -fblocks -Wformat -verify %s -Wno-error=non-pod-varargs
+// RUN: %clang_cc1 -fsyntax-only -fblocks -Wformat -verify %s -Wno-error=non-pod-varargs -std=c++98
+// RUN: %clang_cc1 -fsyntax-only -fblocks -Wformat -verify %s -Wno-error=non-pod-varargs -std=c++11
 
 int (^block) (int, const char *,...) __attribute__((__format__(__printf__,2,3))) = ^ __attribute__((__format__(__printf__,2,3))) (int arg, const char *format,...) {return 5;};
 
@@ -14,5 +16,11 @@ void test_block() {
   HasNoCStr hncs(str);
   int n = 4;
   block(n, "%s %d", str, n); // no-warning
-  block(n, "%s %s", hncs, n); // expected-warning{{cannot pass non-POD object of type 'HasNoCStr' to variadic block; expected type from format string was 'char *'}} expected-warning{{format specifies type 'char *' but the argument has type 'int'}}
+  block(n, "%s %s", hncs, n);
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{cannot pass non-POD object of type 'HasNoCStr' to variadic block; expected type from format string was 'char *'}}
+#else
+  // expected-warning@-4{{format specifies type 'char *' but the argument has type 'HasNoCStr'}}
+#endif
+  // expected-warning@-6{{format specifies type 'char *' but the argument has type 'int'}}
 }
diff --git a/test/SemaCXX/redefine_extname.cpp b/test/SemaCXX/redefine_extname.cpp
new file mode 100644
index 000000000000..460df4ba1b4d
--- /dev/null
+++ b/test/SemaCXX/redefine_extname.cpp
@@ -0,0 +1,6 @@
+// RUN: %clang_cc1 -triple=x86_64-unknown-linux -Wpragmas -verify %s
+
+// Check that pragma redefine_extname applies to C code only, and shouldn't be
+// applied to C++.
+#pragma redefine_extname foo_cpp bar_cpp
+extern int foo_cpp() { return 1; } // expected-warning {{#pragma redefine_extname is applicable to external C declarations only; not applied to function 'foo_cpp'}}
diff --git a/test/SemaCXX/rval-references.cpp b/test/SemaCXX/rval-references.cpp
index fc341e87f4a4..9c79ad7b0b97 100644
--- a/test/SemaCXX/rval-references.cpp
+++ b/test/SemaCXX/rval-references.cpp
@@ -90,5 +90,5 @@ MoveOnly returningNonEligible() {
   else if (0) // Copy from reference can't be elided
     return r; // expected-error {{call to deleted constructor}}
   else // Construction from different type can't be elided
-    return i; // expected-error {{no viable conversion from 'int' to 'MoveOnly'}}
+    return i; // expected-error {{no viable conversion from returned value of type 'int' to function return type 'MoveOnly'}}
 }
diff --git a/test/SemaCXX/sourceranges.cpp b/test/SemaCXX/sourceranges.cpp
index 9ba003aa204a..2f8eb2f2b96c 100644
--- a/test/SemaCXX/sourceranges.cpp
+++ b/test/SemaCXX/sourceranges.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ast-dump %s | FileCheck %s
+// RUN: %clang_cc1 -triple i686-mingw32 -ast-dump %s | FileCheck %s
 
 template<class T>
 class P {
@@ -7,7 +7,7 @@ class P {
 };
 
 namespace foo {
-class A { public: A() {} };
+class A { public: A(int = 0) {} };
 enum B {};
 typedef int C;
 }
@@ -37,3 +37,16 @@ void destruct(foo::A *a1, foo::A *a2, P<int> *p1) {
   // CHECK: MemberExpr {{0x[0-9a-fA-F]+}} <col:3, col:13> '<bound member function type>' ->~P
   p1->~P<int>();
 }
+
+struct D {
+  D(int);
+  ~D();
+};
+
+void construct() {
+  using namespace foo;
+  A a = A(12);
+  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'class foo::A' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
+  D d = D(12);
+  // CHECK: CXXConstructExpr {{0x[0-9a-fA-F]+}} <col:9, col:13> 'struct D' 'void (int){{( __attribute__\(\(thiscall\)\))?}}'
+}
diff --git a/test/SemaCXX/type-convert-construct.cpp b/test/SemaCXX/type-convert-construct.cpp
index 479af21476be..2dec50abebf2 100644
--- a/test/SemaCXX/type-convert-construct.cpp
+++ b/test/SemaCXX/type-convert-construct.cpp
@@ -5,6 +5,8 @@ void f() {
   int v2 = typeof(int)(1,2); // expected-error {{excess elements in scalar initializer}}
   typedef int arr[];
   int v3 = arr(); // expected-error {{array types cannot be value-initialized}}
+  typedef void fn_ty();
+  fn_ty(); // expected-error {{function types cannot be value-initialized}}
   int v4 = int();
   int v5 = int; // expected-error {{expected '(' for function-style cast or type construction}}
   typedef int T;
diff --git a/test/SemaCXX/type-traits.cpp b/test/SemaCXX/type-traits.cpp
index 2265c284763f..69760fd6bd06 100644
--- a/test/SemaCXX/type-traits.cpp
+++ b/test/SemaCXX/type-traits.cpp
@@ -14,6 +14,7 @@ typedef Int IntAr[10];
 typedef Int IntArNB[];
 class Statics { static int priv; static NonPOD np; };
 union EmptyUnion {};
+union IncompleteUnion;
 union Union { int i; float f; };
 struct HasFunc { void f (); };
 struct HasOp { void operator *(); };
@@ -150,6 +151,18 @@ struct VariadicCtor {
   template<typename...T> VariadicCtor(T...);
 };
 
+struct ThrowingDtor {
+  ~ThrowingDtor() throw(int);
+};
+
+struct NoExceptDtor {
+  ~NoExceptDtor() noexcept(true);
+};
+
+struct NoThrowDtor {
+  ~NoThrowDtor() throw();
+};
+
 void is_pod()
 {
   { int arr[T(__is_pod(int))]; }
@@ -223,6 +236,7 @@ void is_empty()
   { int arr[F(__is_empty(Int))]; }
   { int arr[F(__is_empty(POD))]; }
   { int arr[F(__is_empty(EmptyUnion))]; }
+  { int arr[F(__is_empty(IncompleteUnion))]; }
   { int arr[F(__is_empty(EmptyAr))]; }
   { int arr[F(__is_empty(HasRef))]; }
   { int arr[F(__is_empty(HasVirt))]; }
@@ -301,8 +315,23 @@ struct PotentiallyFinal<T*> final { };
 template<>
 struct PotentiallyFinal<int> final { };
 
+struct SealedClass sealed {
+};
+
+template<typename T>
+struct PotentiallySealed { };
+
+template<typename T>
+struct PotentiallySealed<T*> sealed { };
+
+template<>
+struct PotentiallySealed<int> sealed { };
+
 void is_final()
 {
+	{ int arr[T(__is_final(SealedClass))]; }
+	{ int arr[T(__is_final(PotentiallySealed<float*>))]; }
+	{ int arr[T(__is_final(PotentiallySealed<int>))]; }
 	{ int arr[T(__is_final(FinalClass))]; }
 	{ int arr[T(__is_final(PotentiallyFinal<float*>))]; }
 	{ int arr[T(__is_final(PotentiallyFinal<int>))]; }
@@ -318,25 +347,17 @@ void is_final()
 	{ int arr[F(__is_final(IntArNB))]; }
 	{ int arr[F(__is_final(HasAnonymousUnion))]; }
 	{ int arr[F(__is_final(PotentiallyFinal<float>))]; }
+	{ int arr[F(__is_final(PotentiallySealed<float>))]; }
 }
 
-struct SealedClass sealed {
-};
-
-template<typename T>
-struct PotentiallySealed { };
-
-template<typename T>
-struct PotentiallySealed<T*> sealed { };
-
-template<>
-struct PotentiallySealed<int> sealed { };
-
 void is_sealed()
 {
 	{ int arr[T(__is_sealed(SealedClass))]; }
 	{ int arr[T(__is_sealed(PotentiallySealed<float*>))]; }
 	{ int arr[T(__is_sealed(PotentiallySealed<int>))]; }
+	{ int arr[T(__is_sealed(FinalClass))]; }
+	{ int arr[T(__is_sealed(PotentiallyFinal<float*>))]; }
+	{ int arr[T(__is_sealed(PotentiallyFinal<int>))]; }
 
 	{ int arr[F(__is_sealed(int))]; }
 	{ int arr[F(__is_sealed(Union))]; }
@@ -349,6 +370,7 @@ void is_sealed()
 	{ int arr[F(__is_sealed(IntArNB))]; }
 	{ int arr[F(__is_sealed(HasAnonymousUnion))]; }
 	{ int arr[F(__is_sealed(PotentiallyFinal<float>))]; }
+	{ int arr[F(__is_sealed(PotentiallySealed<float>))]; }
 }
 
 typedef HasVirt Polymorph;
@@ -361,6 +383,7 @@ void is_polymorphic()
 
   { int arr[F(__is_polymorphic(int))]; }
   { int arr[F(__is_polymorphic(Union))]; }
+  { int arr[F(__is_polymorphic(IncompleteUnion))]; }
   { int arr[F(__is_polymorphic(Int))]; }
   { int arr[F(__is_polymorphic(IntAr))]; }
   { int arr[F(__is_polymorphic(UnionAr))]; }
@@ -1979,6 +2002,9 @@ void constructible_checks() {
   // PR20228
   { int arr[T(__is_constructible(VariadicCtor,
                                  int, int, int, int, int, int, int, int, int))]; }
+
+  // PR25513
+  { int arr[F(__is_constructible(int(int)))]; }
 }
 
 // Instantiation of __is_trivially_constructible
@@ -2019,3 +2045,34 @@ void array_extent() {
   int t02[T(__array_extent(ConstIntArAr, 0) == 4)];
   int t03[T(__array_extent(ConstIntArAr, 1) == 10)];
 }
+
+void is_destructible_test() {
+  { int arr[T(__is_destructible(int))]; }
+  { int arr[T(__is_destructible(int[2]))]; }
+  { int arr[F(__is_destructible(int[]))]; }
+  { int arr[F(__is_destructible(void))]; }
+  { int arr[T(__is_destructible(int &))]; }
+  { int arr[T(__is_destructible(HasDest))]; }
+  { int arr[F(__is_destructible(AllPrivate))]; }
+  { int arr[T(__is_destructible(SuperNonTrivialStruct))]; }
+  { int arr[T(__is_destructible(AllDefaulted))]; }
+  { int arr[F(__is_destructible(AllDeleted))]; }
+  { int arr[T(__is_destructible(ThrowingDtor))]; }
+  { int arr[T(__is_destructible(NoThrowDtor))]; }
+}
+
+void is_nothrow_destructible_test() {
+  { int arr[T(__is_nothrow_destructible(int))]; }
+  { int arr[T(__is_nothrow_destructible(int[2]))]; }
+  { int arr[F(__is_nothrow_destructible(int[]))]; }
+  { int arr[F(__is_nothrow_destructible(void))]; }
+  { int arr[T(__is_nothrow_destructible(int &))]; }
+  { int arr[T(__is_nothrow_destructible(HasDest))]; }
+  { int arr[F(__is_nothrow_destructible(AllPrivate))]; }
+  { int arr[T(__is_nothrow_destructible(SuperNonTrivialStruct))]; }
+  { int arr[T(__is_nothrow_destructible(AllDefaulted))]; }
+  { int arr[F(__is_nothrow_destructible(AllDeleted))]; }
+  { int arr[F(__is_nothrow_destructible(ThrowingDtor))]; }
+  { int arr[T(__is_nothrow_destructible(NoExceptDtor))]; }
+  { int arr[T(__is_nothrow_destructible(NoThrowDtor))]; }
+}
diff --git a/test/SemaCXX/typo-correction-blocks.c b/test/SemaCXX/typo-correction-blocks.c
new file mode 100644
index 000000000000..300a5be3969d
--- /dev/null
+++ b/test/SemaCXX/typo-correction-blocks.c
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple i386-apple-macosx -fblocks -fsyntax-only -verify %s
+
+extern int h(int *);
+extern void g(int, void (^)(void));
+extern int fuzzys;                  // expected-note {{'fuzzys' declared here}}
+
+static void f(void *v) {
+  g(fuzzy, ^{                       // expected-error {{did you mean 'fuzzys'}}
+    int i = h(v);
+  });
+}
+
diff --git a/test/SemaCXX/typo-correction-delayed.cpp b/test/SemaCXX/typo-correction-delayed.cpp
index 121863d172bd..610d43971381 100644
--- a/test/SemaCXX/typo-correction-delayed.cpp
+++ b/test/SemaCXX/typo-correction-delayed.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wno-c++11-extensions %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 -Wno-c++11-extensions %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 struct A {};
 struct B {};
@@ -109,8 +111,10 @@ S<1> s;
 
 namespace foo {}
 void test_paren_suffix() {
-  foo::bar({5, 6});  // expected-error-re {{no member named 'bar' in namespace 'foo'{{$}}}} \
-                     // expected-error {{expected expression}}
+  foo::bar({5, 6});  // expected-error-re {{no member named 'bar' in namespace 'foo'{{$}}}}
+#if __cplusplus <= 199711L
+  // expected-error@-2 {{expected expression}}
+#endif
 }
 
 const int kNum = 10;  // expected-note {{'kNum' declared here}}
diff --git a/test/SemaCXX/typo-correction.cpp b/test/SemaCXX/typo-correction.cpp
index 174b1403e2db..07c1634431ea 100644
--- a/test/SemaCXX/typo-correction.cpp
+++ b/test/SemaCXX/typo-correction.cpp
@@ -307,14 +307,21 @@ struct A {
   void CreateBar(float, float);
 };
 struct B : A {
-  using A::CreateFoo;
+  using A::CreateFoo; // expected-note {{'CreateFoo' declared here}}
   void CreateFoo(int, int);  // expected-note {{'CreateFoo' declared here}}
 };
 void f(B &x) {
   x.Createfoo(0,0);  // expected-error {{no member named 'Createfoo' in 'PR13387::B'; did you mean 'CreateFoo'?}}
+  x.Createfoo(0.f,0.f);  // expected-error {{no member named 'Createfoo' in 'PR13387::B'; did you mean 'CreateFoo'?}}
 }
 }
 
+namespace using_decl {
+  namespace somewhere { int foobar; }
+  using somewhere::foobar; // expected-note {{declared here}}
+  int k = goobar; // expected-error {{did you mean 'foobar'?}}
+}
+
 struct DataStruct {void foo();};
 struct T {
  DataStruct data_struct;
@@ -640,3 +647,19 @@ int has_include(int); // expected-note {{'has_include' declared here}}
 // expected-error@+1 {{__has_include must be used within a preprocessing directive}}
 int foo = __has_include(42); // expected-error {{use of undeclared identifier '__has_include'; did you mean 'has_include'?}}
 }
+
+namespace PR24781_using_crash {
+namespace A {
+namespace B {
+class Foofoo {};  // expected-note {{'A::B::Foofoo' declared here}}
+}
+}
+
+namespace C {
+namespace D {
+class Bar : public A::B::Foofoo {};
+}
+}
+
+using C::D::Foofoo;  // expected-error {{no member named 'Foofoo' in namespace 'PR24781_using_crash::C::D'; did you mean 'A::B::Foofoo'?}}
+}
diff --git a/test/SemaCXX/undefined-internal.cpp b/test/SemaCXX/undefined-internal.cpp
index 0138967ef80c..29ca5de6d41c 100644
--- a/test/SemaCXX/undefined-internal.cpp
+++ b/test/SemaCXX/undefined-internal.cpp
@@ -1,7 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -Wbind-to-temporary-copy %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wbind-to-temporary-copy -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -Wbind-to-temporary-copy -std=c++11 %s
 
 // Make sure we don't produce invalid IR.
 // RUN: %clang_cc1 -emit-llvm-only %s
+// RUN: %clang_cc1 -emit-llvm-only -std=c++98 %s
+// RUN: %clang_cc1 -emit-llvm-only -std=c++11 %s
 
 namespace test1 {
   static void foo(); // expected-warning {{function 'test1::foo' has internal linkage but is not defined}}
@@ -119,7 +123,12 @@ namespace PR9323 {
   }
   void f(const Uncopyable&) {}
   void test() {
-    f(Uncopyable()); // expected-warning {{C++98 requires an accessible copy constructor}}
+    f(Uncopyable()); 
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-warning@-2 {{C++98 requires an accessible copy constructor}}
+#else
+    // expected-warning@-4 {{copying parameter of type 'PR9323::(anonymous namespace)::Uncopyable' when binding a reference to a temporary would invoke an inaccessible constructor in C++98}}
+#endif
   };
 }
 
diff --git a/test/SemaCXX/underlying_type.cpp b/test/SemaCXX/underlying_type.cpp
index 2a972b174e86..61208c72af04 100644
--- a/test/SemaCXX/underlying_type.cpp
+++ b/test/SemaCXX/underlying_type.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -ffreestanding -fsyntax-only -verify -std=c++11 %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -ffreestanding -fsyntax-only -verify -std=c++11 %s
 
 #include "limits.h"
 
diff --git a/test/SemaCXX/unknown-type-name.cpp b/test/SemaCXX/unknown-type-name.cpp
index 9d28c310ed97..2a9748e3ab67 100644
--- a/test/SemaCXX/unknown-type-name.cpp
+++ b/test/SemaCXX/unknown-type-name.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // PR3990
 namespace N {
@@ -95,7 +97,11 @@ template<typename T> int h(T::type x, char); // expected-error{{missing 'typenam
 
 template<typename T> int junk1(T::junk); // expected-warning{{variable templates are a C++14 extension}}
 template<typename T> int junk2(T::junk) throw(); // expected-error{{missing 'typename'}}
-template<typename T> int junk3(T::junk) = delete; // expected-error{{missing 'typename'}} expected-warning{{C++11}}
+template<typename T> int junk3(T::junk) = delete; // expected-error{{missing 'typename'}}
+#if __cplusplus <= 199711L
+//expected-warning@-2 {{deleted function definitions are a C++11 extension}}
+#endif
+
 template<typename T> int junk4(T::junk j); // expected-error{{missing 'typename'}}
 
 // FIXME: We can tell this was intended to be a function because it does not
diff --git a/test/SemaCXX/using-decl-1.cpp b/test/SemaCXX/using-decl-1.cpp
index ca532692c1cb..ec45b3109a0b 100644
--- a/test/SemaCXX/using-decl-1.cpp
+++ b/test/SemaCXX/using-decl-1.cpp
@@ -30,9 +30,7 @@ struct X0 {
 };
 
 struct X1 : X0 {
-  // FIXME: give this operator() a 'float' parameter to test overloading
-  // behavior. It currently fails.
-  void operator()();
+  void operator()(float&);
   using X0::operator();
   
   void test() {
@@ -327,3 +325,16 @@ namespace PR24033 {
     using PR24033::st; // expected-error {{target of using declaration conflicts with declaration already in scope}}
   }
 }
+
+namespace field_use {
+struct A { int field; };
+struct B : A {
+  // Previously Clang rejected this valid C++11 code because it didn't look
+  // through the UsingShadowDecl.
+  using A::field;
+#if __cplusplus < 201103L
+  // expected-error@+2 {{invalid use of non-static data member 'field'}}
+#endif
+  enum { X = sizeof(field) };
+};
+}
diff --git a/test/SemaCXX/vector-casts.cpp b/test/SemaCXX/vector-casts.cpp
index 2ccd5979c7f7..a3d9de6fff84 100644
--- a/test/SemaCXX/vector-casts.cpp
+++ b/test/SemaCXX/vector-casts.cpp
@@ -1,18 +1,22 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
 typedef int __v2si __attribute__((__vector_size__(8)));
 typedef short __v4hi __attribute__((__vector_size__(8)));
 typedef short __v8hi __attribute__((__vector_size__(16)));
 typedef short __v3hi __attribute__((__ext_vector_type__(3)));
 
-struct S { }; // expected-note 2 {{candidate constructor}}
+struct S { }; // expected-note 3 {{candidate constructor}}
+
+enum E : long long { Evalue };
 
 void f() {
   __v2si v2si;
+  __v3hi v3hi;
   __v4hi v4hi;
   __v8hi v8hi;
   unsigned long long ll;
   unsigned char c;
   S s;
+  E e;
   
   (void)reinterpret_cast<__v2si>(v4hi);
   (void)(__v2si)v4hi;
@@ -23,6 +27,11 @@ void f() {
   (void)reinterpret_cast<__v2si>(ll);
   (void)(__v2si)(ll);
 
+  (void)(E)v2si; // expected-error {{C-style cast from '__v2si' (vector of 2 'int' values) to 'E' is not allowed}}
+  (void)(__v2si)e; // expected-error {{C-style cast from 'E' to '__v2si' (vector of 2 'int' values)}}
+  (void)reinterpret_cast<E>(v2si); // expected-error {{reinterpret_cast from '__v2si' (vector of 2 'int' values) to 'E' is not allowed}}
+  (void)reinterpret_cast<__v2si>(e); // expected-error {{reinterpret_cast from 'E' to '__v2si' (vector of 2 'int' values)}}
+
   (void)reinterpret_cast<S>(v2si); // expected-error {{reinterpret_cast from '__v2si' (vector of 2 'int' values) to 'S' is not allowed}}
   (void)(S)v2si; // expected-error {{no matching conversion for C-style cast from '__v2si' (vector of 2 'int' values) to 'S'}}
   (void)reinterpret_cast<__v2si>(s); // expected-error {{reinterpret_cast from 'S' to '__v2si' (vector of 2 'int' values) is not allowed}}
@@ -36,15 +45,24 @@ void f() {
   (void)(__v8hi)v4hi; // expected-error {{C-style cast from vector '__v4hi' (vector of 4 'short' values) to vector '__v8hi' (vector of 8 'short' values) of different size}}
   (void)reinterpret_cast<__v4hi>(v8hi); // expected-error {{reinterpret_cast from vector '__v8hi' (vector of 8 'short' values) to vector '__v4hi' (vector of 4 'short' values) of different size}}
   (void)(__v4hi)v8hi; // expected-error {{C-style cast from vector '__v8hi' (vector of 8 'short' values) to vector '__v4hi' (vector of 4 'short' values) of different size}}
+
+  (void)(__v3hi)v4hi; // expected-error {{C-style cast from vector '__v4hi' (vector of 4 'short' values) to vector '__v3hi' (vector of 3 'short' values) of different size}}
+  (void)(__v3hi)v2si; // expected-error {{C-style cast from vector '__v2si' (vector of 2 'int' values) to vector '__v3hi' (vector of 3 'short' values) of different size}}
+  (void)(__v4hi)v3hi; // expected-error {{C-style cast from vector '__v3hi' (vector of 3 'short' values) to vector '__v4hi' (vector of 4 'short' values) of different size}}
+  (void)(__v2si)v3hi; // expected-error {{C-style cast from vector '__v3hi' (vector of 3 'short' values) to vector '__v2si' (vector of 2 'int' values) of different size}}
+  (void)reinterpret_cast<__v3hi>(v4hi); // expected-error {{reinterpret_cast from vector '__v4hi' (vector of 4 'short' values) to vector '__v3hi' (vector of 3 'short' values) of different size}}
+  (void)reinterpret_cast<__v3hi>(v2si); // expected-error {{reinterpret_cast from vector '__v2si' (vector of 2 'int' values) to vector '__v3hi' (vector of 3 'short' values) of different size}}
+  (void)reinterpret_cast<__v4hi>(v3hi); // expected-error {{reinterpret_cast from vector '__v3hi' (vector of 3 'short' values) to vector '__v4hi' (vector of 4 'short' values) of different size}}
+  (void)reinterpret_cast<__v2si>(v3hi); // expected-error {{reinterpret_cast from vector '__v3hi' (vector of 3 'short' values) to vector '__v2si' (vector of 2 'int' values) of different size}}
 }
 
 struct testvec {
   __v2si v;
   void madd(const testvec& rhs) {
-    v = v + rhs; // expected-error {{can't convert between vector and non-scalar values}}
+    v = v + rhs; // expected-error {{cannot convert between vector and non-scalar values}}
   }
   void madd2(testvec rhs) {
-    v = v + rhs; // expected-error {{can't convert between vector and non-scalar values}}
+    v = v + rhs; // expected-error {{cannot convert between vector and non-scalar values}}
   }
 };
 
diff --git a/test/SemaCXX/vector-no-lax.cpp b/test/SemaCXX/vector-no-lax.cpp
index 32dcacfb45d8..a85f7f9db060 100644
--- a/test/SemaCXX/vector-no-lax.cpp
+++ b/test/SemaCXX/vector-no-lax.cpp
@@ -4,6 +4,6 @@ typedef int __attribute__((vector_size (16))) vSInt32;
 
 vSInt32 foo (vUInt32 a) {
   vSInt32 b = { 0, 0, 0, 0 };
-  b += a; // expected-error{{can't convert between vector values}}
+  b += a; // expected-error{{cannot convert between vector values}}
   return b;
 }
diff --git a/test/SemaCXX/vector.cpp b/test/SemaCXX/vector.cpp
index bcd7fedb4df2..9b272444a23f 100644
--- a/test/SemaCXX/vector.cpp
+++ b/test/SemaCXX/vector.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -triple x86_64-apple-darwin10 -fsyntax-only -verify -std=c++11 %s
+
 typedef char char16 __attribute__ ((__vector_size__ (16)));
 typedef long long longlong16 __attribute__ ((__vector_size__ (16)));
 typedef char char16_e __attribute__ ((__ext_vector_type__ (16)));
@@ -101,7 +104,10 @@ void casts(longlong16 ll16, longlong16_e ll16e) {
 }
 
 template<typename T>
-struct convertible_to { // expected-note 3 {{candidate function (the implicit copy assignment operator)}}
+struct convertible_to { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
   operator T() const;
 };
 
diff --git a/test/SemaCXX/warn-logical-not-compare.cpp b/test/SemaCXX/warn-logical-not-compare.cpp
index 3ecce474f149..280ab22d7235 100644
--- a/test/SemaCXX/warn-logical-not-compare.cpp
+++ b/test/SemaCXX/warn-logical-not-compare.cpp
@@ -11,67 +11,73 @@ bool test1(int i1, int i2, bool b1, bool b2) {
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{10:10-10:10}:"("
-  // CHECK: fix-it:"{{.*}}":{10:18-10:18}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{10:9-10:9}:"("
-  // CHECK: fix-it:"{{.*}}":{10:12-10:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = !i1 != i2;
   //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{21:10-21:10}:"("
-  // CHECK: fix-it:"{{.*}}":{21:18-21:18}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{21:9-21:9}:"("
-  // CHECK: fix-it:"{{.*}}":{21:12-21:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = !i1 < i2;
   //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{32:10-32:10}:"("
-  // CHECK: fix-it:"{{.*}}":{32:17-32:17}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{32:9-32:9}:"("
-  // CHECK: fix-it:"{{.*}}":{32:12-32:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = !i1 > i2;
   //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{43:10-43:10}:"("
-  // CHECK: fix-it:"{{.*}}":{43:17-43:17}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{43:9-43:9}:"("
-  // CHECK: fix-it:"{{.*}}":{43:12-43:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = !i1 <= i2;
   //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{54:10-54:10}:"("
-  // CHECK: fix-it:"{{.*}}":{54:18-54:18}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{54:9-54:9}:"("
-  // CHECK: fix-it:"{{.*}}":{54:12-54:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = !i1 >= i2;
   //expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{65:10-65:10}:"("
-  // CHECK: fix-it:"{{.*}}":{65:18-65:18}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{65:9-65:9}:"("
-  // CHECK: fix-it:"{{.*}}":{65:12-65:12}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:12-[[line]]:12}:")"
 
   ret = i1 == i2;
   ret = i1 != i2;
@@ -99,12 +105,13 @@ bool test1(int i1, int i2, bool b1, bool b2) {
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{98:10-98:10}:"("
-  // CHECK: fix-it:"{{.*}}":{98:24-98:24}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:24-[[line]]:24}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{98:9-98:9}:"("
-  // CHECK: fix-it:"{{.*}}":{98:18-98:18}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:18-[[line]]:18}:")"
 
   ret = (!getInt()) == i1;
   ret = !getBool() == b1;
@@ -125,45 +132,49 @@ bool test2 (E e) {
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{124:10-124:10}:"("
-  // CHECK: fix-it:"{{.*}}":{124:17-124:17}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:17-[[line]]:17}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{124:9-124:9}:"("
-  // CHECK: fix-it:"{{.*}}":{124:11-124:11}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:11-[[line]]:11}:")"
 
   ret = !e == getE();
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{135:10-135:10}:"("
-  // CHECK: fix-it:"{{.*}}":{135:21-135:21}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:21-[[line]]:21}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{135:9-135:9}:"("
-  // CHECK: fix-it:"{{.*}}":{135:11-135:11}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:11-[[line]]:11}:")"
 
   ret = !getE() == e1;
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{146:10-146:10}:"("
-  // CHECK: fix-it:"{{.*}}":{146:22-146:22}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:22-[[line]]:22}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{146:9-146:9}:"("
-  // CHECK: fix-it:"{{.*}}":{146:16-146:16}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:16-[[line]]:16}:")"
 
   ret = !getE() == getE();
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:9: warning
   // CHECK: to evaluate the comparison first
-  // CHECK: fix-it:"{{.*}}":{157:10-157:10}:"("
-  // CHECK: fix-it:"{{.*}}":{157:26-157:26}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:10-[[line]]:10}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:26-[[line]]:26}:")"
   // CHECK: to silence this warning
-  // CHECK: fix-it:"{{.*}}":{157:9-157:9}:"("
-  // CHECK: fix-it:"{{.*}}":{157:16-157:16}:")"
+  // CHECK: fix-it:"{{.*}}":{[[line]]:9-[[line]]:9}:"("
+  // CHECK: fix-it:"{{.*}}":{[[line]]:16-[[line]]:16}:")"
 
   ret = !(e == e1);
   ret = !(e == getE());
@@ -186,6 +197,7 @@ bool PR16673(int x) {
   // expected-warning@-1 {{logical not is only applied to the left hand side of this comparison}}
   // expected-note@-2 {{add parentheses after the '!' to evaluate the comparison first}}
   // expected-note@-3 {{add parentheses around left hand side expression to silence this warning}}
+  // CHECK: warn-logical-not-compare.cpp:[[line:[0-9]*]]:11: warning
   // CHECK: to evaluate the comparison first
   // CHECK-NOT: fix-it
   // CHECK: to silence this warning
diff --git a/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp b/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
index ade6198fbd0d..e69a81b77f70 100644
--- a/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
+++ b/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 %s -fsyntax-only -verify
 struct A {
-  A() { f(); } // expected-warning {{call to pure virtual member function 'f'; overrides of 'f' in subclasses are not available in the constructor of 'A'}}
-  ~A() { f(); } // expected-warning {{call to pure virtual member function 'f'; overrides of 'f' in subclasses are not available in the destructor of 'A'}}
+  A() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'A'}}
+  ~A() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the destructor of 'A'}}
 
   virtual void f() = 0; // expected-note 2 {{'f' declared here}}
 };
@@ -12,3 +12,11 @@ struct B {
   B() { a->f(); };
   ~B() { a->f(); };
 };
+
+// Don't warn if the call is fully qualified. (PR23215)
+struct C {
+    virtual void f() = 0;
+    C() {
+        C::f();
+    }
+};
diff --git a/test/SemaCXX/warn-pure-virtual-kext.cpp b/test/SemaCXX/warn-pure-virtual-kext.cpp
new file mode 100644
index 000000000000..e681a02cc916
--- /dev/null
+++ b/test/SemaCXX/warn-pure-virtual-kext.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 %s -fapple-kext -fsyntax-only -verify
+
+struct A {
+    virtual void f() = 0; // expected-note {{'f' declared here}}
+    A() {
+        A::f(); // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'A'}} // expected-note {{qualified call to 'A'::'f' is treated as a virtual call to 'f' due to -fapple-kext}}
+    }
+};
diff --git a/test/SemaCXX/warn-sign-conversion.cpp b/test/SemaCXX/warn-sign-conversion.cpp
index 746b1242fe1b..dcfb95b6b57d 100644
--- a/test/SemaCXX/warn-sign-conversion.cpp
+++ b/test/SemaCXX/warn-sign-conversion.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -verify -fsyntax-only -Wsign-conversion %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -verify -fsyntax-only -Wsign-conversion %s
 
 // NOTE: When a 'enumeral mismatch' warning is implemented then expect several
 // of the following cases to be impacted.
diff --git a/test/SemaCXX/warn-thread-safety-analysis.cpp b/test/SemaCXX/warn-thread-safety-analysis.cpp
index 4f31d406b5fb..b5d2f8e1de83 100644
--- a/test/SemaCXX/warn-thread-safety-analysis.cpp
+++ b/test/SemaCXX/warn-thread-safety-analysis.cpp
@@ -5091,3 +5091,101 @@ class Foo {
 
 }  // end namespace ScopedAdoptTest
 
+
+namespace TestReferenceNoThreadSafetyAnalysis {
+
+#define TS_UNCHECKED_READ(x) ts_unchecked_read(x)
+
+// Takes a reference to a guarded data member, and returns an unguarded
+// reference.
+template <class T>
+inline const T& ts_unchecked_read(const T& v) NO_THREAD_SAFETY_ANALYSIS {
+  return v;
+}
+
+template <class T>
+inline T& ts_unchecked_read(T& v) NO_THREAD_SAFETY_ANALYSIS {
+  return v;
+}
+
+
+class Foo {
+public:
+  Foo(): a(0) { }
+
+  int a;
+};
+
+
+class Bar {
+public:
+  Bar() : a(0) { }
+
+  Mutex mu;
+  int a   GUARDED_BY(mu);
+  Foo foo GUARDED_BY(mu);
+};
+
+
+void test() {
+  Bar bar;
+  const Bar cbar;
+
+  int a = TS_UNCHECKED_READ(bar.a);       // nowarn
+  TS_UNCHECKED_READ(bar.a) = 1;           // nowarn
+
+  int b = TS_UNCHECKED_READ(bar.foo).a;   // nowarn
+  TS_UNCHECKED_READ(bar.foo).a = 1;       // nowarn
+
+  int c = TS_UNCHECKED_READ(cbar.a);      // nowarn
+}
+
+#undef TS_UNCHECKED_READ
+
+}  // end namespace TestReferenceNoThreadSafetyAnalysis
+
+
+namespace GlobalAcquiredBeforeAfterTest {
+
+Mutex mu1;
+Mutex mu2 ACQUIRED_AFTER(mu1);
+
+void test3() {
+  mu2.Lock();
+  mu1.Lock();  // expected-warning {{mutex 'mu1' must be acquired before 'mu2'}}
+  mu1.Unlock();
+  mu2.Unlock();
+}
+
+}  // end namespace  GlobalAcquiredBeforeAfterTest
+
+
+namespace LockableUnions {
+
+union LOCKABLE MutexUnion {
+  int a;
+  char* b;
+
+  void Lock()   EXCLUSIVE_LOCK_FUNCTION();
+  void Unlock() UNLOCK_FUNCTION();
+};
+
+MutexUnion muun2;
+MutexUnion muun1 ACQUIRED_BEFORE(muun2);
+
+void test() {
+  muun2.Lock();
+  muun1.Lock();  // expected-warning {{mutex 'muun1' must be acquired before 'muun2'}}
+  muun1.Unlock();
+  muun2.Unlock();
+}
+
+}  // end namespace LockableUnions
+
+// This used to crash.
+class acquired_before_empty_str {
+  void WaitUntilSpaceAvailable() {
+    lock_.ReaderLock(); // expected-note {{acquired here}}
+  } // expected-warning {{mutex 'lock_' is still held at the end of function}}
+  Mutex lock_ ACQUIRED_BEFORE("");
+};
diff --git a/test/SemaCXX/warn-unused-local-typedef-serialize.cpp b/test/SemaCXX/warn-unused-local-typedef-serialize.cpp
index ccb5a09164b4..aa2c48bb5719 100644
--- a/test/SemaCXX/warn-unused-local-typedef-serialize.cpp
+++ b/test/SemaCXX/warn-unused-local-typedef-serialize.cpp
@@ -1,4 +1,3 @@
-// XFAIL: hexagon
 // RUN: %clang -x c++-header -c -Wunused-local-typedef %s -o %t.gch -Werror
 // RUN: %clang -DBE_THE_SOURCE -c -Wunused-local-typedef -include %t %s -o /dev/null 2>&1 | FileCheck %s
 // RUN: %clang -DBE_THE_SOURCE -c -Wunused-local-typedef -include %t %s -o /dev/null 2>&1 | FileCheck %s
diff --git a/test/SemaCXX/writable-strings-deprecated.cpp b/test/SemaCXX/writable-strings-deprecated.cpp
index f9258334980b..7de11b59dd11 100644
--- a/test/SemaCXX/writable-strings-deprecated.cpp
+++ b/test/SemaCXX/writable-strings-deprecated.cpp
@@ -1,25 +1,31 @@
-// RUN: %clang_cc1 -fsyntax-only -Wno-deprecated-writable-strings -verify %s
-// RUN: %clang_cc1 -fsyntax-only -Wno-deprecated -Wdeprecated-increment-bool -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fwritable-strings -verify %s
-// RUN: %clang_cc1 -fsyntax-only -Wno-write-strings -verify %s
-// RUN: %clang_cc1 -fsyntax-only -Werror=c++11-compat -verify %s -DERROR
-// RUN: %clang_cc1 -fsyntax-only -Werror=deprecated -Wno-error=deprecated-increment-bool -verify %s -DERROR
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s
-// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wno-deprecated -Wdeprecated-increment-bool
+// RUN: %clang_cc1 -fsyntax-only -verify %s -DWARNING
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -verify %s -DWARNING
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -Wno-deprecated-writable-strings -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -Wno-deprecated -Wdeprecated-increment-bool -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -fwritable-strings -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -Wno-write-strings -verify %s
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -Werror=c++11-compat -verify %s -DERROR
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 -Werror=deprecated -Wno-error=deprecated-increment-bool -verify %s -DERROR
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -DWARNING
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -Wno-deprecated -Wdeprecated-increment-bool -DWARNING
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -verify %s -pedantic-errors -DERROR
 // rdar://8827606
 
 char *fun(void)
 {
    return "foo";
+#if defined(ERROR)
 #if __cplusplus >= 201103L
-#ifdef ERROR
    // expected-error@-3 {{ISO C++11 does not allow conversion from string literal to 'char *'}}
 #else
-   // expected-warning@-5 {{ISO C++11 does not allow conversion from string literal to 'char *'}}
+   // expected-error@-5 {{conversion from string literal to 'char *' is deprecated}}
+#endif
+#elif defined(WARNING)
+#if __cplusplus >= 201103L
+   // expected-warning@-9 {{ISO C++11 does not allow conversion from string literal to 'char *'}}
+#else
+   // expected-warning@-11 {{conversion from string literal to 'char *' is deprecated}}
 #endif
-#elif defined(ERROR)
-   // expected-error@-8 {{deprecated}}
 #endif
 }
 
diff --git a/test/SemaObjC/access-property-getter.m b/test/SemaObjC/access-property-getter.m
index 41827bb3c8a4..779274431d5f 100644
--- a/test/SemaObjC/access-property-getter.m
+++ b/test/SemaObjC/access-property-getter.m
@@ -1,53 +1,17 @@
 // RUN: %clang_cc1 -verify %s
 
-@protocol NSObject
-- (oneway void)release;
+@protocol Protocol
+- (oneway void) method;
 @end
 
-@protocol XCOutputStreams <NSObject>
-@end
-
-
-@interface XCWorkQueueCommandInvocation 
-{
-    id <XCOutputStreams> _outputStream;
+void accessMethodViaPropertySyntaxAndTriggerWarning(id<Protocol> object) {
+    object.method; // expected-warning {{property access result unused - getters should not be used for side effects}}
 }
-@end
-
-@interface XCWorkQueueCommandSubprocessInvocation : XCWorkQueueCommandInvocation
-@end
-
-@interface XCWorkQueueCommandLocalSubprocessInvocation : XCWorkQueueCommandSubprocessInvocation
-@end
-
-@interface XCWorkQueueCommandDistributedSubprocessInvocation : XCWorkQueueCommandSubprocessInvocation
-@end
-
-@interface XCWorkQueueCommandCacheFetchInvocation : XCWorkQueueCommandSubprocessInvocation
-
-@end
-
-@implementation XCWorkQueueCommandCacheFetchInvocation
-- (id)harvestPredictivelyProcessedOutputFiles
-{
-     _outputStream.release;	// expected-warning {{property access result unused - getters should not be used for side effects}}
-     return 0;
-}
-@end
 
 // rdar://19137815
 #pragma clang diagnostic ignored "-Wunused-getter-return-value"
 
-@interface NSObject @end
-
-@interface I : NSObject
-@property (copy) id window;
-@end
-
-@implementation I
-- (void) Meth {
-  [self window];
-  self.window;
+void accessMethodViaPropertySyntaxWhenWarningIsIgnoredDoesNotTriggerWarning(id<Protocol> object) {
+    object.method;
 }
-@end
 
diff --git a/test/SemaObjC/arc-no-runtime.m b/test/SemaObjC/arc-no-runtime.m
index c5820d4321d5..cc540f6431bd 100644
--- a/test/SemaObjC/arc-no-runtime.m
+++ b/test/SemaObjC/arc-no-runtime.m
@@ -2,7 +2,7 @@
 
 // rdar://problem/9150784
 void test(void) {
-  __weak id x; // expected-error {{the current deployment target does not support automated __weak references}}
+  __weak id x; // expected-error {{cannot create __weak reference because the current deployment target does not support weak references}}
   __weak void *v; // expected-warning {{'__weak' only applies to Objective-C object or block pointer types}}
 }
 
@@ -12,5 +12,5 @@ void test(void) {
 
 @implementation A
 // rdar://9605088
-@synthesize testObjectWeakProperty; // expected-error {{the current deployment target does not support automated __weak references}}
+@synthesize testObjectWeakProperty; // expected-error {{cannot synthesize weak property because the current deployment target does not support weak references}}
 @end
diff --git a/test/SemaObjC/arc-property-decl-attrs.m b/test/SemaObjC/arc-property-decl-attrs.m
index 2d469177723a..6c96ba481c4a 100644
--- a/test/SemaObjC/arc-property-decl-attrs.m
+++ b/test/SemaObjC/arc-property-decl-attrs.m
@@ -38,31 +38,31 @@
 
 @interface Bat 
 @property(strong) __unsafe_unretained id x; // expected-error {{strong property 'x' may not also be declared __unsafe_unretained}}
-@property(strong) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}} expected-error {{property attributes 'strong' and 'weak' are mutually exclusive}}
+@property(strong) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}}
 @property(strong) __autoreleasing id z; // expected-error {{strong property 'z' may not also be declared __autoreleasing}}
 @end
 
 @interface Bau
 @property(retain) __unsafe_unretained id x; // expected-error {{strong property 'x' may not also be declared __unsafe_unretained}}
-@property(retain) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}} expected-error {{property attributes 'retain' and 'weak' are mutually exclusive}}
+@property(retain) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}}
 @property(retain) __autoreleasing id z; // expected-error {{strong property 'z' may not also be declared __autoreleasing}}
 @end
 
 @interface Bav 
 @property(copy) __unsafe_unretained id x; // expected-error {{strong property 'x' may not also be declared __unsafe_unretained}}
-@property(copy) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}} expected-error {{property attributes 'copy' and 'weak' are mutually exclusive}}
+@property(copy) __weak id y; // expected-error {{strong property 'y' may not also be declared __weak}}
 @property(copy) __autoreleasing id z; // expected-error {{strong property 'z' may not also be declared __autoreleasing}}
 @end
 
 @interface Bingo 
 @property(assign) __unsafe_unretained id x;
-@property(assign) __weak id y; // expected-error {{property attributes 'assign' and 'weak' are mutually exclusive}}
+@property(assign) __weak id y; // expected-error {{unsafe_unretained property 'y' may not also be declared __weak}}
 @property(assign) __autoreleasing id z; // expected-error {{unsafe_unretained property 'z' may not also be declared __autoreleasing}}
 @end
 
 @interface Batman 
 @property(unsafe_unretained) __unsafe_unretained id x;
-@property(unsafe_unretained) __weak id y; // expected-error {{property attributes 'unsafe_unretained' and 'weak' are mutually exclusive}}
+@property(unsafe_unretained) __weak id y; // expected-error {{unsafe_unretained property 'y' may not also be declared __weak}}
 @property(unsafe_unretained) __autoreleasing id z; // expected-error {{unsafe_unretained property 'z' may not also be declared __autoreleasing}}
 @end
 
@@ -105,3 +105,19 @@
 @property(nonatomic, weak, nonnull, readonly) id ROdelegate; // expected-error {{property attributes 'nonnull' and 'weak' are mutually exclusive}}
 @end
 
+// rdar://problem/23931441
+@protocol P
+@property(readonly, retain) id prop;
+@end
+
+__attribute__((objc_root_class))
+@interface I2<P>
+@end
+
+@interface I2()
+@property (readwrite) id prop;
+@end
+
+@implementation I2
+@synthesize prop;
+@end
diff --git a/test/SemaObjC/arc-property-lifetime.m b/test/SemaObjC/arc-property-lifetime.m
index 4874ff3a35eb..cfa32d1028d9 100644
--- a/test/SemaObjC/arc-property-lifetime.m
+++ b/test/SemaObjC/arc-property-lifetime.m
@@ -70,7 +70,7 @@
 // rdar://9341593
 @interface Gorf  {
    id __unsafe_unretained x;
-   id y; // expected-error {{existing instance variable 'y' for property 'y' with  assign attribute must be __unsafe_unretained}}
+   id y; // expected-error {{existing instance variable 'y' for property 'y' with assign attribute must be __unsafe_unretained}}
 }
 @property(assign) id __unsafe_unretained x;
 @property(assign) id y; // expected-note {{property declared here}}
@@ -180,7 +180,7 @@ void foo(Baz *f) {
 @end
 
 @interface Foo2 {
-  id _prop; // expected-error {{existing instance variable '_prop' for property 'prop' with  assign attribute must be __unsafe_unretained}}
+  id _prop; // expected-error {{existing instance variable '_prop' for property 'prop' with assign attribute must be __unsafe_unretained}}
 }
 @property (nonatomic, assign) id prop; // expected-note {{property declared here}}
 @end
diff --git a/test/SemaObjC/arc-system-header.m b/test/SemaObjC/arc-system-header.m
index d9392ed73f18..acfd9a8585e3 100644
--- a/test/SemaObjC/arc-system-header.m
+++ b/test/SemaObjC/arc-system-header.m
@@ -5,26 +5,26 @@
 
 #ifndef NO_USE
 void test(id op, void *cp) {
-  cp = test0(op); // expected-error {{'test0' is unavailable: converts between Objective-C and C pointers in -fobjc-arc}}
-  cp = *test1(&op); // expected-error {{'test1' is unavailable: converts between Objective-C and C pointers in -fobjc-arc}}
-// expected-note@arc-system-header.h:1 {{marked unavailable here}}
-// expected-note@arc-system-header.h:5 {{marked unavailable here}}
+  cp = test0(op); // expected-error {{'test0' is unavailable in ARC}}
+  cp = *test1(&op); // expected-error {{'test1' is unavailable in ARC}}
+// expected-note@arc-system-header.h:1 {{inline function performs a conversion which is forbidden in ARC}}
+// expected-note@arc-system-header.h:5 {{inline function performs a conversion which is forbidden in ARC}}
 }
 
 void test3(struct Test3 *p) {
-  p->field = 0; // expected-error {{'field' is unavailable: this system declaration uses an unsupported type}}
-                // expected-note@arc-system-header.h:14 {{marked unavailable here}}
+  p->field = 0; // expected-error {{'field' is unavailable in ARC}}
+                // expected-note@arc-system-header.h:14 {{declaration uses type that is ill-formed in ARC}}
 }
 
 void test4(Test4 *p) {
-  p->field1 = 0; // expected-error {{'field1' is unavailable: this system declaration uses an unsupported type}}
-                 // expected-note@arc-system-header.h:19 {{marked unavailable here}}
+  p->field1 = 0; // expected-error {{'field1' is unavailable in ARC}}
+                 // expected-note@arc-system-header.h:19 {{declaration uses type that is ill-formed in ARC}}
   p->field2 = 0;
 }
 
 void test5(struct Test5 *p) {
-  p->field = 0; // expected-error {{'field' is unavailable: this system field has retaining ownership}}
-                // expected-note@arc-system-header.h:25 {{marked unavailable here}}
+  p->field = 0; // expected-error {{'field' is unavailable in ARC}}
+                // expected-note@arc-system-header.h:25 {{field has non-trivial ownership qualification}}
 }
 
 id test6() {
@@ -39,11 +39,11 @@ id test6() {
 }
 
 void test7(Test7 *p) {
-  *p.prop = 0; // expected-error {{'prop' is unavailable: this system declaration uses an unsupported type}}
-  p.prop = 0; // expected-error {{'prop' is unavailable: this system declaration uses an unsupported type}}
-  *[p prop] = 0; // expected-error {{'prop' is unavailable: this system declaration uses an unsupported type}}
-  [p setProp: 0]; // expected-error {{'setProp:' is unavailable: this system declaration uses an unsupported type}}
-// expected-note@arc-system-header.h:41 4 {{marked unavailable here}}
+  *p.prop = 0; // expected-error {{'prop' is unavailable in ARC}}
+  p.prop = 0; // expected-error {{'prop' is unavailable in ARC}}
+  *[p prop] = 0; // expected-error {{'prop' is unavailable in ARC}}
+  [p setProp: 0]; // expected-error {{'setProp:' is unavailable in ARC}}
+// expected-note@arc-system-header.h:41 4 {{declaration uses type that is ill-formed in ARC}}
 // expected-note@arc-system-header.h:41 2 {{property 'prop' is declared unavailable here}}
 }
 #endif
diff --git a/test/SemaObjC/arc-unavailable-for-weakref.m b/test/SemaObjC/arc-unavailable-for-weakref.m
index 35d5d9830445..82748027435e 100644
--- a/test/SemaObjC/arc-unavailable-for-weakref.m
+++ b/test/SemaObjC/arc-unavailable-for-weakref.m
@@ -56,7 +56,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 @interface I
 {
 }
-@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
+@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont *', which does not support weak references}}
 @end
 
 @implementation I // expected-note {{when implemented by class I}}
@@ -65,7 +65,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 
 // rdar://13676793
 @protocol MyProtocol
-@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
+@property (weak) NSFont *font; // expected-error {{synthesizing __weak instance variable of type 'NSFont *', which does not support weak references}}
 @end
 
 @interface I1 <MyProtocol>
@@ -76,7 +76,7 @@ __attribute__((objc_arc_weak_reference_unavailable))
 @end
 
 @interface Super
-@property (weak) NSFont *font;  // expected-error {{synthesizing __weak instance variable of type 'NSFont * _Nullable', which does not support weak references}}
+@property (weak) NSFont *font;  // expected-error {{synthesizing __weak instance variable of type 'NSFont *', which does not support weak references}}
 @end
 
 
diff --git a/test/SemaObjC/arc-unavailable-system-function.m b/test/SemaObjC/arc-unavailable-system-function.m
index 54ceaafbd699..e9e217980fb8 100644
--- a/test/SemaObjC/arc-unavailable-system-function.m
+++ b/test/SemaObjC/arc-unavailable-system-function.m
@@ -3,11 +3,11 @@
 
 # 1 "<command line>"
 # 1 "/System/Library/Frameworks/Foundation.framework/Headers/Foundation.h" 1 3
-id * foo(); // expected-note {{'foo' has been explicitly marked unavailable here}}
+id * foo(); // expected-note {{declaration uses type that is ill-formed in ARC}}
 
 # 1 "arc-unavailable-system-function.m" 2
 void ret() {
-  foo(); // expected-error {{'foo' is unavailable: this system declaration uses an unsupported type}}
+  foo(); // expected-error {{'foo' is unavailable in ARC}}
 }
 
 
diff --git a/test/SemaObjC/arc.m b/test/SemaObjC/arc.m
index 60bd6735dbf5..c723812212e3 100644
--- a/test/SemaObjC/arc.m
+++ b/test/SemaObjC/arc.m
@@ -127,7 +127,17 @@ void test6(unsigned cond) {
   switch (cond) {
   case 0:
     ;
-    id x; // expected-note {{jump bypasses initialization of retaining variable}}
+    id x; // expected-note {{jump bypasses initialization of __strong variable}}
+
+  case 1: // expected-error {{cannot jump}}
+    break;
+  }
+}
+void test6a(unsigned cond) {
+  switch (cond) {
+  case 0:
+    ;
+    __weak id x; // expected-note {{jump bypasses initialization of __weak variable}}
 
   case 1: // expected-error {{cannot jump}}
     break;
@@ -290,7 +300,7 @@ void test11(id op, void *vp) {
 
 void test12(id collection) {
   for (id x in collection) {
-    x = 0; // expected-error {{fast enumeration variables can't be modified in ARC by default; declare the variable __strong to allow this}}
+    x = 0; // expected-error {{fast enumeration variables cannot be modified in ARC by default; declare the variable __strong to allow this}}
   }
 
   for (const id x in collection) { // expected-note {{variable 'x' declared const here}}
diff --git a/test/SemaObjC/atomoic-property-synnthesis-rules.m b/test/SemaObjC/atomoic-property-synnthesis-rules.m
index b681558220da..c7fac7b61811 100644
--- a/test/SemaObjC/atomoic-property-synnthesis-rules.m
+++ b/test/SemaObjC/atomoic-property-synnthesis-rules.m
@@ -129,10 +129,8 @@
 
 // read-only in class, read-write in class extension - might warn
 @property(readonly) int GetSet_ReadWriteInExt;
-@property(readonly) int Get_ReadWriteInExt;	// expected-note {{property declared here}} \
-                                                // expected-note {{setter and getter must both be synthesized}}
-@property(readonly) int Set_ReadWriteInExt;	// expected-note {{property declared here}} \
-                                                // expected-note {{setter and getter must both be synthesized}}
+@property(readonly) int Get_ReadWriteInExt;
+@property(readonly) int Set_ReadWriteInExt;
 @property(readonly) int None_ReadWriteInExt;
 @property(nonatomic,readonly) int GetSet_Nonatomic_ReadWriteInExt;
 @property(nonatomic,readonly) int Get_Nonatomic_ReadWriteInExt;
@@ -162,10 +160,8 @@
 @property(nonatomic,readonly) int None_Nonatomic_ReadOnly_LateSynthesize;
 
 @property(readonly) int GetSet_ReadWriteInExt_LateSynthesize;
-@property(readonly) int Get_ReadWriteInExt_LateSynthesize;	// expected-note {{property declared here}} \
-                                                                // expected-note {{setter and getter must both be synthesized}}
-@property(readonly) int Set_ReadWriteInExt_LateSynthesize;	// expected-note {{property declared here}} \
-                                                                // expected-note {{setter and getter must both be synthesized}}
+@property(readonly) int Get_ReadWriteInExt_LateSynthesize;
+@property(readonly) int Set_ReadWriteInExt_LateSynthesize;
 @property(readonly) int None_ReadWriteInExt_LateSynthesize;
 @property(nonatomic,readonly) int GetSet_Nonatomic_ReadWriteInExt_LateSynthesize;
 @property(nonatomic,readonly) int Get_Nonatomic_ReadWriteInExt_LateSynthesize;
@@ -207,8 +203,10 @@
 @interface Foo ()
 
 @property(readwrite) int GetSet_ReadWriteInExt;
-@property(readwrite) int Get_ReadWriteInExt;
-@property(readwrite) int Set_ReadWriteInExt;
+@property(readwrite) int Get_ReadWriteInExt; // expected-note {{property declared here}} \
+                                             // expected-note {{setter and getter must both be synthesized}}
+@property(readwrite) int Set_ReadWriteInExt; // expected-note {{property declared here}} \
+                                             // expected-note {{setter and getter must both be synthesized}}
 @property(readwrite) int None_ReadWriteInExt;
 @property(nonatomic,readwrite) int GetSet_Nonatomic_ReadWriteInExt;
 @property(nonatomic,readwrite) int Get_Nonatomic_ReadWriteInExt;
@@ -216,8 +214,10 @@
 @property(nonatomic,readwrite) int None_Nonatomic_ReadWriteInExt;
 
 @property(readwrite) int GetSet_ReadWriteInExt_LateSynthesize;
-@property(readwrite) int Get_ReadWriteInExt_LateSynthesize;
-@property(readwrite) int Set_ReadWriteInExt_LateSynthesize;
+@property(readwrite) int Get_ReadWriteInExt_LateSynthesize;  // expected-note {{property declared here}} \
+                                                             // expected-note {{setter and getter must both be synthesized}}
+@property(readwrite) int Set_ReadWriteInExt_LateSynthesize; // expected-note {{property declared here}} \
+                                                            // expected-note {{setter and getter must both be synthesized}}
 @property(readwrite) int None_ReadWriteInExt_LateSynthesize;
 @property(nonatomic,readwrite) int GetSet_Nonatomic_ReadWriteInExt_LateSynthesize;
 @property(nonatomic,readwrite) int Get_Nonatomic_ReadWriteInExt_LateSynthesize;
diff --git a/test/SemaObjC/attr-availability-1.m b/test/SemaObjC/attr-availability-1.m
index 063407adaeee..d694cbd80da6 100644
--- a/test/SemaObjC/attr-availability-1.m
+++ b/test/SemaObjC/attr-availability-1.m
@@ -3,6 +3,10 @@
 // RUN: %clang_cc1 -x objective-c++ -std=c++03 -triple x86_64-apple-darwin9.0.0 -fsyntax-only -verify %s
 // rdar://18490958
 
+#if !__has_feature(attribute_availability_with_version_underscores)
+# error "missing feature"
+#endif
+
 @protocol P
 - (void)proto_method __attribute__((availability(macosx,introduced=10_1,deprecated=10_2))); // expected-note 2 {{'proto_method' has been explicitly marked deprecated here}}
 @end
diff --git a/test/SemaObjC/attr-availability.m b/test/SemaObjC/attr-availability.m
index ea97e0e472d3..dad2d5b7e794 100644
--- a/test/SemaObjC/attr-availability.m
+++ b/test/SemaObjC/attr-availability.m
@@ -23,6 +23,7 @@
 - (void)overridden4 __attribute__((availability(macosx,deprecated=10.3))); // expected-note{{overridden method is here}}
 - (void)overridden5 __attribute__((availability(macosx,unavailable)));
 - (void)overridden6 __attribute__((availability(macosx,introduced=10.3))); // expected-note{{overridden method is here}}
+- (void)unavailableMethod __attribute__((unavailable));
 @end
 
 // rdar://11475360
@@ -35,6 +36,7 @@
 - (void)overridden4 __attribute__((availability(macosx,deprecated=10.2))); // expected-warning{{overriding method deprecated before overridden method on OS X (10.3 vs. 10.2)}}
 - (void)overridden5 __attribute__((availability(macosx,introduced=10.3)));
 - (void)overridden6 __attribute__((availability(macosx,unavailable))); // expected-warning{{overriding method cannot be unavailable on OS X when its overridden method is available}}
+- (void)unavailableMethod; // does *not* inherit unavailability
 @end
 
 void f(A *a, B *b) {
@@ -206,3 +208,89 @@ void use_myEnum() {
   // expected-error@+1 {{MyEnum_Blah' is unavailable: not available}}
   MyEnum e = MyEnum_Blah; 
 }
+
+// Test that the availability of (optional) protocol methods is not
+// inherited be implementations of those protocol methods.
+@protocol AvailabilityP2
+@optional
+-(void)methodA __attribute__((availability(macosx,introduced=10.1,deprecated=10.2))); // expected-note 4{{'methodA' has been explicitly marked deprecated here}} \
+// expected-note 2{{protocol method is here}}
+-(void)methodB __attribute__((unavailable)); // expected-note 4{{'methodB' has been explicitly marked unavailable here}}
+-(void)methodC;
+@end
+
+void testAvailabilityP2(id<AvailabilityP2> obj) {
+  [obj methodA]; // expected-warning{{'methodA' is deprecated: first deprecated in OS X 10.2}}
+  [obj methodB]; // expected-error{{'methodB' is unavailable}}
+}
+
+@interface ImplementsAvailabilityP2a <AvailabilityP2>
+-(void)methodA;
+-(void)methodB;
+@end
+
+void testImplementsAvailabilityP2a(ImplementsAvailabilityP2a *obj) {
+  [obj methodA]; // okay: availability not inherited
+  [obj methodB]; // okay: unavailability not inherited
+}
+
+__attribute__((objc_root_class))
+@interface ImplementsAvailabilityP2b <AvailabilityP2>
+@end
+
+@implementation ImplementsAvailabilityP2b
+-(void)methodA {
+  // Make sure we're not inheriting availability.
+  id<AvailabilityP2> obj = self;
+  [obj methodA]; // expected-warning{{'methodA' is deprecated: first deprecated in OS X 10.2}}
+  [obj methodB]; // expected-error{{'methodB' is unavailable}}
+}
+-(void)methodB {
+  // Make sure we're not inheriting unavailability.
+  id<AvailabilityP2> obj = self;
+  [obj methodA]; // expected-warning{{'methodA' is deprecated: first deprecated in OS X 10.2}}
+  [obj methodB]; // expected-error{{'methodB' is unavailable}}
+}
+
+@end
+
+void testImplementsAvailabilityP2b(ImplementsAvailabilityP2b *obj) {
+  // still get warnings/errors because we see the protocol version.
+
+  [obj methodA]; // expected-warning{{'methodA' is deprecated: first deprecated in OS X 10.2}}
+  [obj methodB]; // expected-error{{'methodB' is unavailable}}
+}
+
+__attribute__((objc_root_class))
+@interface ImplementsAvailabilityP2c <AvailabilityP2>
+-(void)methodA __attribute__((availability(macosx,introduced=10.2))); // expected-warning{{method introduced after the protocol method it implements on OS X (10.2 vs. 10.1)}}
+-(void)methodB __attribute__((unavailable));
+@end
+
+__attribute__((objc_root_class))
+@interface ImplementsAvailabilityP2d <AvailabilityP2>
+@end
+
+@implementation ImplementsAvailabilityP2d
+-(void)methodA __attribute__((availability(macosx,introduced=10.2))) // expected-warning{{method introduced after the protocol method it implements on OS X (10.2 vs. 10.1)}}
+{
+}
+-(void)methodB __attribute__((unavailable)) {
+}
+@end
+
+__attribute__((objc_root_class))
+@interface InheritUnavailableSuper
+-(void)method __attribute__((unavailable)); // expected-note{{'method' has been explicitly marked unavailable here}}
+@end
+
+@interface InheritUnavailableSub : InheritUnavailableSuper
+-(void)method;
+@end
+
+@implementation InheritUnavailableSub
+-(void)method {
+  InheritUnavailableSuper *obj = self;
+  [obj method]; // expected-error{{'method' is unavailable}}
+}
+@end
diff --git a/test/SemaObjC/attr-designated-init.m b/test/SemaObjC/attr-designated-init.m
index a8673e1b0191..05085884784b 100644
--- a/test/SemaObjC/attr-designated-init.m
+++ b/test/SemaObjC/attr-designated-init.m
@@ -1,6 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -Wno-incomplete-implementation -verify -fblocks %s
 
 #define NS_DESIGNATED_INITIALIZER __attribute__((objc_designated_initializer))
+#define NS_UNAVAILABLE __attribute__((unavailable))
 
 void fnfoo(void) NS_DESIGNATED_INITIALIZER; // expected-error {{only applies to init methods of interface or class extension declarations}}
 
@@ -253,7 +254,7 @@ __attribute__((objc_root_class))
 __attribute__((objc_root_class))
 @interface MyObject
 - (instancetype)initWithStuff:(id)stuff __attribute__((objc_designated_initializer));
-- (instancetype)init __attribute__((unavailable));
+- (instancetype)init NS_UNAVAILABLE;
 @end
 
 @implementation MyObject
@@ -378,6 +379,15 @@ __attribute__((objc_root_class))
 }
 @end
 
+@interface SubTest1 : Test1
+-(instancetype)init NS_UNAVAILABLE;
+-(instancetype)initWithRequiredParameter:(id)foo NS_DESIGNATED_INITIALIZER;
+@end
+@implementation SubTest1
+-(instancetype)initWithRequiredParameter:(id)foo {
+  return [super init];
+}
+@end
 
 @interface Test2 : NSObject
 @end
diff --git a/test/SemaObjC/attr-objc-gc.m b/test/SemaObjC/attr-objc-gc.m
index 827945c668cb..303dce0d8752 100644
--- a/test/SemaObjC/attr-objc-gc.m
+++ b/test/SemaObjC/attr-objc-gc.m
@@ -9,13 +9,13 @@ static id __attribute((objc_gc(hello))) f; // expected-warning{{'objc_gc' attrib
 
 static int __attribute__((objc_gc(weak))) g; // expected-warning {{'objc_gc' only applies to pointer types; type here is 'int'}}
 
-static __weak int h; // expected-warning {{'__weak' only applies to pointer types; type here is 'int'}}
+static __weak int h; // expected-warning {{'__weak' only applies to Objective-C object or block pointer types; type here is 'int'}}
 
 // TODO: it would be great if this reported as __weak
 #define WEAK __weak
-static WEAK int h; // expected-warning {{'objc_gc' only applies to pointer types; type here is 'int'}}
+static WEAK int h; // expected-warning {{'objc_ownership' only applies to Objective-C object or block pointer types; type here is 'int'}}
 
-/* expected-warning {{'__weak' only applies to pointer types; type here is 'int'}}*/ static __we\
+/* expected-warning {{'__weak' only applies to Objective-C object or block pointer types; type here is 'int'}}*/ static __we\
 ak int i;
 
 // rdar://problem/9126213
diff --git a/test/SemaObjC/blocks.m b/test/SemaObjC/blocks.m
index d6681d051de1..e26fb3c0cd3f 100644
--- a/test/SemaObjC/blocks.m
+++ b/test/SemaObjC/blocks.m
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify -fblocks %s
+// RUN: %clang_cc1 -triple %itanium_abi_triple -fsyntax-only -verify -fblocks %s
 
 #define bool _Bool
 @protocol NSObject;
diff --git a/test/SemaObjC/bool-type.m b/test/SemaObjC/bool-type.m
new file mode 100644
index 000000000000..830a7ef0614f
--- /dev/null
+++ b/test/SemaObjC/bool-type.m
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -triple thumbv7s-apple-ios8.0 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-CHAR
+// RUN: %clang_cc1 -triple thumbv7k-apple-watchos2.0 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-BOOL
+// RUN: %clang_cc1 -triple i386-apple-watchos2.0 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-BOOL
+// RUN: %clang_cc1 -triple arm64-apple-ios8.0 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-BOOL
+// RUN: %clang_cc1 -triple x86_64-apple-ios8.0 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-BOOL
+// RUN: %clang_cc1 -triple i386-apple-macosx10.10 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-CHAR
+// RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -ast-dump "%s" 2>&1 | FileCheck %s --check-prefix CHECK-CHAR
+
+// CHECK-CHAR: ObjCBoolLiteralExpr {{.*}} 'signed char' __objc_yes
+// CHECK-BOOL: ObjCBoolLiteralExpr {{.*}} '_Bool' __objc_yes
+
+int var = __objc_yes;
diff --git a/test/SemaObjC/class-bitfield.m b/test/SemaObjC/class-bitfield.m
index 4b13d9a25693..a225d1157f5f 100644
--- a/test/SemaObjC/class-bitfield.m
+++ b/test/SemaObjC/class-bitfield.m
@@ -5,7 +5,7 @@
   int a : -1; // expected-error{{bit-field 'a' has negative width}}
 
   // rdar://6081627
-  int b : 33; // expected-error{{size of bit-field 'b' (33 bits) exceeds size of its type (32 bits)}}
+  int b : 33; // expected-error{{width of bit-field 'b' (33 bits) exceeds width of its type (32 bits)}}
 
   int c : (1 + 0.25); // expected-error{{expression is not an integer constant expression}}
   int d : (int)(1 + 0.25); 
diff --git a/test/SemaObjC/class-extension-dup-methods.m b/test/SemaObjC/class-extension-dup-methods.m
index 5f463aed168c..446d2be0378e 100644
--- a/test/SemaObjC/class-extension-dup-methods.m
+++ b/test/SemaObjC/class-extension-dup-methods.m
@@ -26,3 +26,15 @@ __attribute__((objc_root_class)) @interface AppDelegate
 - (void)someMethodWithArgument:(float)argument; // OK. No warning to be issued here.
 + (void)someMethodWithArgument:(float)argument : (float)argument2; // expected-error {{duplicate declaration of method 'someMethodWithArgument::'}}
 @end
+
+__attribute__((objc_root_class))
+@interface Test
+-(void)meth; // expected-note {{declared here}}
+@end
+
+@interface Test()
+-(void)meth;
+@end
+
+@implementation Test // expected-warning {{method definition for 'meth' not found}}
+@end
diff --git a/test/SemaObjC/format-arg-attribute.m b/test/SemaObjC/format-arg-attribute.m
index 79f5656decc0..8ad34e38738c 100644
--- a/test/SemaObjC/format-arg-attribute.m
+++ b/test/SemaObjC/format-arg-attribute.m
@@ -9,9 +9,9 @@ extern void fc1 (const NSString *) __attribute__((format_arg));  // expected-err
 extern void fc2 (const NSString *) __attribute__((format_arg())); // expected-error {{'format_arg' attribute takes one argument}}
 extern void fc3 (const NSString *) __attribute__((format_arg(1, 2))); // expected-error {{'format_arg' attribute takes one argument}}
 
-struct s1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to functions}}
-union u1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to functions}}
-enum e1 { E1V0 } __attribute__((format_arg(1))); // expected-warning {{'format_arg' attribute only applies to functions}}
+struct s1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
+union u1 { int i; } __attribute__((format_arg(1)));  // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
+enum e1 { E1V0 } __attribute__((format_arg(1))); // expected-warning {{'format_arg' attribute only applies to non-K&R-style functions}}
 
 extern NSString *ff3 (const NSString *) __attribute__((format_arg(3-2)));
 extern NSString *ff4 (const NSString *) __attribute__((format_arg(foo))); // expected-error {{use of undeclared identifier 'foo'}}
diff --git a/test/SemaObjC/gc-attributes.m b/test/SemaObjC/gc-attributes.m
index 2f5432842e74..1023ba6eec37 100644
--- a/test/SemaObjC/gc-attributes.m
+++ b/test/SemaObjC/gc-attributes.m
@@ -20,3 +20,7 @@ void test_f1() {
   f1(&a);
   f1(&a2); // expected-warning{{passing 'A *__strong *' to parameter of type 'A *__weak *' discards qualifiers}} 
 }
+
+// These qualifiers should silently expand to nothing in GC mode.
+void test_unsafe_unretained(__unsafe_unretained id *x) {}
+void test_autoreleasing(__autoreleasing id *x) {}
diff --git a/test/SemaObjC/kindof.m b/test/SemaObjC/kindof.m
index b19e42372078..f205e68ea128 100644
--- a/test/SemaObjC/kindof.m
+++ b/test/SemaObjC/kindof.m
@@ -302,3 +302,19 @@ void testNullability() {
   void processCopyable(__typeof(getSomeCopyable()) string);
   processCopyable(0); // expected-warning{{null passed to a callee that requires a non-null argument}}
 }
+
+// Check that clang doesn't crash when a type parameter is illegal.
+@interface Array1<T> : NSObject
+@end
+
+@interface I1 : NSObject
+@end
+
+@interface Array1<__kindof I1*>(extensions1) // expected-error{{expected type parameter name}}
+@end
+
+@interface Array2<T1, T2, T3> : NSObject
+@end
+
+@interface Array2<T, T, __kindof I1*>(extensions2) // expected-error{{expected type parameter name}} expected-error{{redeclaration of type parameter 'T'}}
+@end
diff --git a/test/SemaObjC/mrc-no-weak.m b/test/SemaObjC/mrc-no-weak.m
new file mode 100644
index 000000000000..fcb17440641f
--- /dev/null
+++ b/test/SemaObjC/mrc-no-weak.m
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -fobjc-runtime=macosx-10.8 -fsyntax-only -verify %s
+
+__attribute__((objc_root_class))
+@interface Root @end
+
+// These should not get diagnosed immediately.
+@interface A : Root {
+  __weak id x;
+}
+@property __weak id y;
+@end
+
+// Diagnostic goes on the ivar if it's explicit.
+@interface B : Root {
+  __weak id x;  // expected-error {{cannot create __weak reference in file using manual reference counting}}
+}
+@property __weak id x;
+@end
+@implementation B
+@synthesize x;
+@end
+
+// Otherwise, it goes with the @synthesize.
+@interface C : Root
+@property __weak id x; // expected-note {{property declared here}}
+@end
+@implementation C
+@synthesize x; // expected-error {{cannot synthesize weak property in file using manual reference counting}}
+@end
+
+@interface D : Root
+@property __weak id x; // expected-note {{property declared here}}
+@end
+@implementation D // expected-error {{cannot synthesize weak property in file using manual reference counting}}
+@end
+
+@interface E : Root {
+@public
+  __weak id x; // expected-note 2 {{declaration uses __weak, but ARC is disabled}}
+}
+@end
+
+void testE(E *e) {
+  id x = e->x; // expected-error {{'x' is unavailable}}
+  e->x = x; // expected-error {{'x' is unavailable}}
+}
+
+@interface F : Root
+@property (weak) id x;
+@end
+
+void testF(F *f) {
+  id x = f.x;
+}
diff --git a/test/SemaObjC/mrc-weak.m b/test/SemaObjC/mrc-weak.m
new file mode 100644
index 000000000000..e961e0ab75ed
--- /dev/null
+++ b/test/SemaObjC/mrc-weak.m
@@ -0,0 +1,67 @@
+// RUN: %clang_cc1 -fobjc-runtime-has-weak -fobjc-weak -fsyntax-only -verify %s
+
+__attribute__((objc_root_class))
+@interface A
+@property (weak) id wa; // expected-note {{property declared here}}
+@property (weak) id wb;
+@property (weak) id wc; // expected-note {{property declared here}}
+@property (weak) id wd;
+@property (unsafe_unretained) id ua;
+@property (unsafe_unretained) id ub; // expected-note {{property declared here}}
+@property (unsafe_unretained) id uc;
+@property (unsafe_unretained) id ud;
+@property (strong) id sa;
+@property (strong) id sb; // expected-note {{property declared here}}
+@property (strong) id sc;
+@property (strong) id sd;
+@end
+
+@implementation A {
+  id _wa; // expected-error {{existing instance variable '_wa' for __weak property 'wa' must be __weak}}
+  __weak id _wb;
+  __unsafe_unretained id _wc; // expected-error {{existing instance variable '_wc' for __weak property 'wc' must be __weak}}
+  id _ua;
+  __weak id _ub; // expected-error {{existing instance variable '_ub' for property 'ub' with unsafe_unretained attribute must be __unsafe_unretained}}
+  __unsafe_unretained id _uc;
+  id _sa;
+  __weak id _sb; // expected-error {{existing instance variable '_sb' for strong property 'sb' may not be __weak}}
+  __unsafe_unretained id _sc;
+}
+@synthesize wa = _wa; // expected-note {{property synthesized here}}
+@synthesize wb = _wb;
+@synthesize wc = _wc; // expected-note {{property synthesized here}}
+@synthesize wd = _wd;
+@synthesize ua = _ua;
+@synthesize ub = _ub; // expected-note {{property synthesized here}}
+@synthesize uc = _uc;
+@synthesize ud = _ud;
+@synthesize sa = _sa;
+@synthesize sb = _sb; // expected-note {{property synthesized here}}
+@synthesize sc = _sc;
+@synthesize sd = _sd;
+@end
+
+void test_goto() {
+  goto after; // expected-error {{cannot jump from this goto statement to its label}}
+  __weak id x; // expected-note {{jump bypasses initialization of __weak variable}}}
+after:
+  return;
+}
+
+void test_weak_cast(id *value) {
+  __weak id *a = (__weak id*) value;
+  id *b = (__weak id*) value; // expected-error {{initializing 'id *' with an expression of type '__weak id *' changes retain/release properties of pointer}}
+  __weak id *c = (id*) value; // expected-error {{initializing '__weak id *' with an expression of type 'id *' changes retain/release properties of pointer}}
+}
+
+void test_unsafe_unretained_cast(id *value) {
+  __unsafe_unretained id *a = (__unsafe_unretained id*) value;
+  id *b = (__unsafe_unretained id*) value;
+  __unsafe_unretained id *c = (id*) value;
+}
+
+void test_cast_qualifier_inference(__weak id *value) {
+  __weak id *a = (id*) value;
+  __unsafe_unretained id *b = (id*) value; // expected-error {{initializing 'id *' with an expression of type '__weak id *' changes retain/release properties of pointer}}
+}
+
diff --git a/test/SemaObjC/no-gc-weak-test.m b/test/SemaObjC/no-gc-weak-test.m
index 6539a9b7f149..3b4c78ec803b 100644
--- a/test/SemaObjC/no-gc-weak-test.m
+++ b/test/SemaObjC/no-gc-weak-test.m
@@ -1,11 +1,10 @@
 // RUN: %clang_cc1 -triple i386-apple-darwin9 -fsyntax-only -verify -Wno-objc-root-class %s
-// expected-no-diagnostics
 
 @interface Subtask
 {
   id _delegate;
 }
-@property(nonatomic,readwrite,assign)   id __weak       delegate;
+@property(nonatomic,readwrite,assign)   id __weak       delegate;  // expected-error {{unsafe_unretained property 'delegate' may not also be declared __weak}}
 @end
 
 @implementation Subtask
@@ -15,15 +14,15 @@
  
 @interface PVSelectionOverlayView2 
 {
- id __weak _selectionRect;
+ id __weak _selectionRect;  // expected-error {{cannot create __weak reference because the current deployment target does not support weak references}} expected-error {{existing instance variable '_selectionRect' for property 'selectionRect' with assign attribute must be __unsafe_unretained}}
 }
 
-@property(assign) id selectionRect;
+@property(assign) id selectionRect; // expected-note {{property declared here}}
 
 @end
 
 @implementation PVSelectionOverlayView2
 
-@synthesize selectionRect = _selectionRect;
+@synthesize selectionRect = _selectionRect; // expected-note {{property synthesized here}}
 @end
 
diff --git a/test/SemaObjC/nonarc-weak.m b/test/SemaObjC/nonarc-weak.m
deleted file mode 100644
index ab51875de1cf..000000000000
--- a/test/SemaObjC/nonarc-weak.m
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.8.0 -fobjc-runtime=macosx-10.8.0 -fsyntax-only -Wunused-function %s > %t.nonarc 2>&1
-// RUN: %clang_cc1 -triple x86_64-apple-macosx10.8.0 -fobjc-runtime=macosx-10.8.0 -fsyntax-only -Wunused-function -fobjc-arc %s > %t.arc 2>&1
-// RUN: FileCheck -input-file=%t.nonarc %s
-// RUN: FileCheck -input-file=%t.arc -check-prefix=ARC %s
-
-static void bar() {} // Intentionally unused.
-
-void foo(id self) {
-  __weak id weakSelf = self;
-}
-
-// CHECK: 9:13: warning: __weak attribute cannot be specified on an automatic variable when ARC is not enabled
-// CHECK: 6:13: warning: unused function 'bar'
-// CHECK: 2 warnings generated
-// ARC: 6:13: warning: unused function 'bar'
-// ARC: 1 warning generated
diff --git a/test/SemaObjC/nullable-weak-property.m b/test/SemaObjC/nullable-weak-property.m
index faafd204675e..7de7edf1eee6 100644
--- a/test/SemaObjC/nullable-weak-property.m
+++ b/test/SemaObjC/nullable-weak-property.m
@@ -11,8 +11,16 @@ void foo (NSFoo * _Nonnull);
 @property(weak) NSFoo *property1;
 @end
 
+#pragma clang assume_nonnull begin
+@interface NSBar ()
+@property(weak) NSFoo *property2;
+@end
+
+#pragma clang assume_nonnull end
+
 @implementation NSBar 
 - (void) Meth {
-   foo (self.property1); // expected-warning {{implicit conversion from nullable pointer 'NSFoo * _Nullable' to non-nullable pointer type 'NSFoo * _Nonnull'}}
+   foo (self.property1); // no warning because nothing is inferred
+   foo (self.property2); // expected-warning {{implicit conversion from nullable pointer 'NSFoo * _Nullable' to non-nullable pointer type 'NSFoo * _Nonnull'}}
 }
 @end
diff --git a/test/SemaObjC/objc-array-literal.m b/test/SemaObjC/objc-array-literal.m
index 2971fcc45399..281994a758b2 100644
--- a/test/SemaObjC/objc-array-literal.m
+++ b/test/SemaObjC/objc-array-literal.m
@@ -11,14 +11,14 @@ typedef unsigned int NSUInteger;
 
 void checkNSArrayUnavailableDiagnostic() {
   id obj;
-  id arr = @[obj]; // expected-error {{NSArray must be available to use Objective-C array literals}}
+  id arr = @[obj]; // expected-error {{definition of class NSArray must be available to use Objective-C array literals}}
 }
 
-@class NSArray;
+@class NSArray; // expected-note {{forward declaration of class here}}
 
 void checkNSArrayFDDiagnostic() {
   id obj;
-  id arr = @[obj]; // expected-error {{declaration of 'arrayWithObjects:count:' is missing in NSArray class}}
+  id arr = @[obj]; // expected-error {{definition of class NSArray must be available to use Objective-C array literals}}
 }
 
 @class NSString;
diff --git a/test/SemaObjC/objc-boxed-expressions-nsvalue.m b/test/SemaObjC/objc-boxed-expressions-nsvalue.m
index 1599f28c7944..f5ef55ef2439 100644
--- a/test/SemaObjC/objc-boxed-expressions-nsvalue.m
+++ b/test/SemaObjC/objc-boxed-expressions-nsvalue.m
@@ -46,7 +46,7 @@ typedef union BOXABLE _BoxableUnion {
 
 void checkNSValueDiagnostic() {
   NSRect rect;
-  id value = @(rect); // expected-error{{NSValue must be available to use Objective-C boxed expressions}}
+  id value = @(rect); // expected-error{{definition of class NSValue must be available to use Objective-C boxed expressions}}
 }
 
 @interface NSValue
diff --git a/test/SemaObjC/objc-dictionary-literal.m b/test/SemaObjC/objc-dictionary-literal.m
index 87f127f9281e..1a2c29f74db0 100644
--- a/test/SemaObjC/objc-dictionary-literal.m
+++ b/test/SemaObjC/objc-dictionary-literal.m
@@ -8,15 +8,15 @@
 void checkNSDictionaryUnavailableDiagnostic() {
   id key;
   id value;
-  id dict = @{ key : value }; // expected-error {{NSDictionary must be available to use Objective-C dictionary literals}}
+  id dict = @{ key : value }; // expected-error {{definition of class NSDictionary must be available to use Objective-C dictionary literals}}
 }
 
-@class NSDictionary;
+@class NSDictionary; // expected-note {{forward declaration of class here}}
 
 void checkNSDictionaryFDDiagnostic() {
   id key;
   id value;
-  id dic = @{ key : value }; // expected-error {{declaration of 'dictionaryWithObjects:forKeys:count:' is missing in NSDictionary class}}
+  id dic = @{ key : value }; // expected-error {{definition of class NSDictionary must be available to use Objective-C dictionary literals}}
 }
 
 @interface NSNumber
diff --git a/test/SemaObjC/objc-literal-nsnumber.m b/test/SemaObjC/objc-literal-nsnumber.m
index 57bc07b139e9..ceb31f8af156 100644
--- a/test/SemaObjC/objc-literal-nsnumber.m
+++ b/test/SemaObjC/objc-literal-nsnumber.m
@@ -10,20 +10,20 @@ typedef int NSInteger;
 #endif
 
 void checkNSNumberUnavailableDiagnostic() {
-  id num = @1000; // expected-error {{NSNumber must be available to use Objective-C literals}}
+  id num = @1000; // expected-error {{definition of class NSNumber must be available to use Objective-C numeric literals}}
 
   int x = 1000;
-  id num1 = @(x); // expected-error {{NSNumber must be available to use Objective-C literals}}\
+  id num1 = @(x); // expected-error {{definition of class NSNumber must be available to use Objective-C numeric literals}}\
                   // expected-error {{illegal type 'int' used in a boxed expression}}
 }
 
-@class NSNumber;
+@class NSNumber; // expected-note 2 {{forward declaration of class here}}
 
 void checkNSNumberFDDiagnostic() {
-  id num = @1000; // expected-error {{NSNumber must be available to use Objective-C literals}}
+  id num = @1000; // expected-error {{definition of class NSNumber must be available to use Objective-C numeric literals}}
 
   int x = 1000;
-  id num1 = @(x); // expected-error {{declaration of 'numberWithInt:' is missing in NSNumber class}}\
+  id num1 = @(x); // expected-error {{definition of class NSNumber must be available to use Objective-C numeric literals}}\
                   // expected-error {{illegal type 'int' used in a boxed expression}}
 }
 
@@ -71,10 +71,10 @@ int main() {
 }
 
 // Dictionary test
-@class NSDictionary;
+@class NSDictionary;  // expected-note {{forward declaration of class here}}
 
 NSDictionary *err() {
-  return @{@"name" : @"value"}; // expected-error {{declaration of 'dictionaryWithObjects:forKeys:count:' is missing in NSDictionary class}}
+  return @{@"name" : @"value"}; // expected-error {{definition of class NSDictionary must be available to use Objective-C dictionary literals}}
 }
 
 @interface NSDate : NSObject
diff --git a/test/SemaObjC/opaque-is-access-warn.m b/test/SemaObjC/opaque-is-access-warn.m
index ac0f724df6f6..4f86866cfec2 100644
--- a/test/SemaObjC/opaque-is-access-warn.m
+++ b/test/SemaObjC/opaque-is-access-warn.m
@@ -1,7 +1,9 @@
 // RUN: %clang -target x86_64-apple-darwin -arch arm64 -mios-version-min=7 -fsyntax-only -Wdeprecated-objc-isa-usage %s -Xclang -verify
 // RUN: %clang -target x86_64-apple-darwin -arch arm64 -mios-version-min=7 -fsyntax-only %s -Xclang -verify
 // RUN: %clang -target x86_64-apple-darwin -mios-simulator-version-min=7 -fsyntax-only -Wdeprecated-objc-isa-usage %s -Xclang -verify
+// RUN: %clang -target x86_64-apple-darwin -arch armv7k -mwatchos-version-min=2 -fsyntax-only -Wdeprecated-objc-isa-usage %s -Xclang -verify
 // rdar://10709102
+// RUN: %clang -target x86_64-apple-darwin -arch x86_64 -fsyntax-only -Wdeprecated-objc-isa-usage %s -Xclang -verify
 
 typedef struct objc_object {
   struct objc_class *isa;
diff --git a/test/SemaObjC/parameterized_classes.m b/test/SemaObjC/parameterized_classes.m
index 644fe3a329af..7f380a10547a 100644
--- a/test/SemaObjC/parameterized_classes.m
+++ b/test/SemaObjC/parameterized_classes.m
@@ -174,6 +174,8 @@ __attribute__((objc_root_class))
 @implementation PC1<T : id> (Cat2) // expected-error{{@implementation cannot have type parameters}}
 @end
 
+typedef T undeclaredT; // expected-error{{unknown type name 'T'}}
+
 // --------------------------------------------------------------------------
 // Interfaces involving type parameters
 // --------------------------------------------------------------------------
diff --git a/test/SemaObjC/parameterized_classes_arc.m b/test/SemaObjC/parameterized_classes_arc.m
new file mode 100644
index 000000000000..608a521f4724
--- /dev/null
+++ b/test/SemaObjC/parameterized_classes_arc.m
@@ -0,0 +1,107 @@
+// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak %s -verify
+
+// rdar://21612439
+
+__attribute__((objc_root_class))
+@interface NSObject
+@end
+  
+@class Forward;
+@class Forward2;
+
+// Tests for generic arguments.
+
+@interface PC1<T> : NSObject
+- (T) get;
+- (void) set: (T) v; // expected-note 4 {{parameter}}
+@end
+
+void test1a(PC1<__weak id> *obj) { // expected-error {{type argument '__weak id' cannot be qualified with '__weak'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1b(PC1<__strong id> *obj) { // expected-error {{type argument '__strong id' cannot be qualified with '__strong'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1c(PC1<id> *obj) {
+  id x = [obj get];
+  [obj set: x];
+}
+
+// Test that this doesn't completely kill downstream type-checking.
+void test1d(PC1<__weak Forward*> *obj) { // expected-error {{type argument 'Forward *__weak' cannot be qualified with '__weak'}}
+  Forward2 *x = [obj get]; // expected-warning {{incompatible}}
+  [obj set: x]; // expected-warning {{incompatible}}
+}
+
+void test1e(PC1<__strong Forward*> *obj) { // expected-error {{type argument 'Forward *__strong' cannot be qualified with '__strong'}}
+  Forward2 *x = [obj get]; // expected-warning {{incompatible}}
+  [obj set: x]; // expected-warning {{incompatible}}
+}
+
+void test1f(PC1<Forward*> *obj) {
+  Forward2 *x = [obj get]; // expected-warning {{incompatible}}
+  [obj set: x]; // expected-warning {{incompatible}}
+}
+
+// Typedefs are fine, just silently ignore them.
+typedef __strong id StrongID;
+void test1g(PC1<StrongID> *obj) {
+  Forward2 *x = [obj get];
+  [obj set: x];
+}
+
+typedef __strong Forward *StrongForward;
+void test1h(PC1<StrongForward> *obj) {
+  Forward2 *x = [obj get]; // expected-warning {{incompatible}}
+  [obj set: x]; // expected-warning {{incompatible}}
+}
+
+// These aren't really ARC-specific, but they're the same basic idea.
+void test1i(PC1<const id> *obj) { // expected-error {{type argument 'const id' cannot be qualified with 'const'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1j(PC1<volatile id> *obj) { // expected-error {{type argument 'volatile id' cannot be qualified with 'volatile'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1k(PC1<__attribute__((address_space(256))) id> *obj) { // expected-error {{type argument '__attribute__((address_space(256))) id' cannot be qualified with '__attribute__((address_space(256)))'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+// Tests for generic parameter bounds.
+
+@interface PC2<T : __strong id> // expected-error {{type bound '__strong id' for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC3<T : __weak id> // expected-error {{type bound '__weak id' for type parameter 'T' cannot be qualified with '__weak'}}
+@end
+
+@interface PC4<T : __strong Forward*> // expected-error {{type bound 'Forward *__strong' for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC5<T : __weak Forward*> // expected-error {{type bound 'Forward *__weak' for type parameter 'T' cannot be qualified with '__weak'}}
+@end
+
+@interface PC6<T : StrongID> // expected-error {{type bound 'StrongID' (aka '__strong id') for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC7<T : StrongForward> // expected-error {{type bound 'StrongForward' (aka 'Forward *__strong') for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+// These aren't really ARC-specific, but they're the same basic idea.
+@interface PC8<T : const id> // expected-error {{type bound 'const id' for type parameter 'T' cannot be qualified with 'const'}}
+@end
+
+@interface PC9<T : volatile id> // expected-error {{type bound 'volatile id' for type parameter 'T' cannot be qualified with 'volatile'}}
+@end
+
+@interface PC10<T : __attribute__((address_space(256))) id> // expected-error {{type bound '__attribute__((address_space(256))) id' for type parameter 'T' cannot be qualified with '__attribute__((address_space(256)))'}}
+@end
\ No newline at end of file
diff --git a/test/SemaObjC/property-3.m b/test/SemaObjC/property-3.m
index 3f82bcc3b7cd..8f2aa2d1ad7b 100644
--- a/test/SemaObjC/property-3.m
+++ b/test/SemaObjC/property-3.m
@@ -31,3 +31,12 @@ typedef signed char BOOL;
 @property (nonatomic, assign) BOOL allowReminders;
 @property (nonatomic, assign) BOOL allowNonatomicProperty; // expected-warning {{'atomic' attribute on property 'allowNonatomicProperty' does not match the property inherited from 'EKProtocolCalendar'}}
 @end
+
+__attribute__((objc_root_class))
+@interface A
+@property (nonatomic, readonly, getter=isAvailable) int available; // expected-note{{property declared here}}
+@end
+
+@interface A ()
+@property (nonatomic, assign, getter=wasAvailable) int available; // expected-warning{{getter name mismatch between property redeclaration ('wasAvailable') and its original declaration ('isAvailable')}}
+@end
diff --git a/test/SemaObjC/property-atomic-redecl.m b/test/SemaObjC/property-atomic-redecl.m
new file mode 100644
index 000000000000..cb6d73a4ac70
--- /dev/null
+++ b/test/SemaObjC/property-atomic-redecl.m
@@ -0,0 +1,57 @@
+// RUN: %clang_cc1 -triple i386-apple-darwin9 -fsyntax-only -verify %s
+
+@interface A
+@end
+
+// Readonly, atomic public redeclaration of property in subclass.
+@interface AtomicInheritanceSuper
+@property (readonly) A *property;
+@end
+
+@interface AtomicInheritanceSuper()
+@property (nonatomic,readwrite,retain) A *property;
+@end
+
+@interface AtomicInheritanceSub : AtomicInheritanceSuper
+@property (readonly) A *property;
+@end
+
+// Readonly, atomic public redeclaration of property in subclass.
+@interface AtomicInheritanceSuper2
+@property (readonly) A *property;
+@end
+
+@interface AtomicInheritanceSub2 : AtomicInheritanceSuper2
+@property (nonatomic, readwrite, retain) A *property;
+@end
+
+@interface ReadonlyAtomic
+@property (readonly, nonatomic) A *property;
+@end
+
+@interface ReadonlyAtomic ()
+@property (readwrite) A *property;
+@end
+
+// Readonly, atomic public redeclaration of property in subclass.
+@interface AtomicInheritanceSuper3
+@property (readonly,atomic) A *property; // expected-note{{property declared here}}
+@end
+
+@interface AtomicInheritanceSuper3()
+@property (nonatomic,readwrite,retain) A *property; // expected-warning{{'atomic' attribute on property 'property' does not match the property inherited from 'AtomicInheritanceSuper3'}}
+@end
+
+@interface AtomicInheritanceSub3 : AtomicInheritanceSuper3
+@property (readonly) A *property;
+@end
+
+// Readonly, atomic public redeclaration of property in subclass.
+@interface AtomicInheritanceSuper4
+@property (readonly, atomic) A *property; // expected-note{{property declared here}}
+@end
+
+@interface AtomicInheritanceSub4 : AtomicInheritanceSuper4
+@property (nonatomic, readwrite, retain) A *property; // expected-warning{{atomic' attribute on property 'property' does not match the property inherited from 'AtomicInheritanceSuper4'}}
+@end
+
diff --git a/test/SemaObjC/property-in-class-extension-1.m b/test/SemaObjC/property-in-class-extension-1.m
index ab461ef6c191..67b57e5faec5 100644
--- a/test/SemaObjC/property-in-class-extension-1.m
+++ b/test/SemaObjC/property-in-class-extension-1.m
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1  -fsyntax-only -triple x86_64-apple-darwin11 -fobjc-runtime-has-weak -verify -Weverything %s
-// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin11 -fobjc-runtime-has-weak -fsyntax-only -verify -Weverything %s
+// RUN: %clang_cc1  -fsyntax-only -triple x86_64-apple-darwin11 -fobjc-runtime-has-weak -fobjc-weak -verify -Weverything %s
+// RUN: %clang_cc1 -x objective-c++ -triple x86_64-apple-darwin11 -fobjc-runtime-has-weak -fobjc-weak -fsyntax-only -verify -Weverything %s
 // rdar://12103400
 
 @class NSString;
@@ -10,7 +10,7 @@
 
 @property (nonatomic, copy, readonly) NSString* matchingMemoryModel;
 
-@property (nonatomic, retain, readonly) NSString* addingNoNewMemoryModel;
+@property (atomic, retain, readonly) NSString* addingNoNewMemoryModel;
 
 @property (readonly) NSString* none;
 @property (readonly) NSString* none1;
@@ -50,10 +50,14 @@
 // rdar://12214070
 @interface radar12214070
 @property (nonatomic, atomic, readonly) float propertyName; // expected-error {{property attributes 'atomic' and 'nonatomic' are mutually exclusive}}
+							    
+@property (nonatomic, readonly) float propertyName2; // expected-note {{property declared here}}
 @end
 
 @interface radar12214070 ()
 @property (atomic, nonatomic, readonly, readwrite) float propertyName; // expected-error {{property attributes 'readonly' and 'readwrite' are mutually exclusive}} \
 		// expected-error {{property attributes 'atomic' and 'nonatomic' are mutually exclusive}}
+
+@property (atomic, readwrite) float propertyName2; // expected-warning {{'atomic' attribute on property 'propertyName2' does not match the property inherited from 'radar12214070'}}
 @end
 
diff --git a/test/SemaObjC/property-in-class-extension.m b/test/SemaObjC/property-in-class-extension.m
index 022a487ec6d1..a780a352199e 100644
--- a/test/SemaObjC/property-in-class-extension.m
+++ b/test/SemaObjC/property-in-class-extension.m
@@ -37,12 +37,11 @@ void FUNC () {
 
 @interface rdar8747333 ()
 - (NSObject *)bam;
-- (NSObject *)warn;	// expected-note {{method 'warn' declared here}}
-- (void)setWarn : (NSObject *)val; // expected-note {{method 'setWarn:' declared here}}
+- (NSObject *)warn;
+- (void)setWarn : (NSObject *)val;
 @end
 
-@implementation rdar8747333 // expected-warning {{method definition for 'warn' not found}} \
-                            // expected-warning {{method definition for 'setWarn:' not found}}
+@implementation rdar8747333
 @synthesize bar = _bar;
 @synthesize baz = _baz;
 @synthesize bam = _bam;
diff --git a/test/SemaObjC/synthesized-ivar.m b/test/SemaObjC/synthesized-ivar.m
index 884a3caf0f64..d25175f40c75 100644
--- a/test/SemaObjC/synthesized-ivar.m
+++ b/test/SemaObjC/synthesized-ivar.m
@@ -57,5 +57,5 @@ int f0(I *a) { return a->IP; } // expected-error {{instance variable 'IP' is pri
 
 @implementation A
 // rdar://9605088
-@synthesize testObjectWeakProperty; // expected-error {{@synthesize of 'weak' property is only allowed in ARC or GC mode}}
+@synthesize testObjectWeakProperty; // expected-error {{cannot synthesize weak property because the current deployment target does not support weak references}}
 @end
diff --git a/test/SemaObjC/typo-correction.m b/test/SemaObjC/typo-correction.m
index 893e31294ad6..58824e2edbf0 100644
--- a/test/SemaObjC/typo-correction.m
+++ b/test/SemaObjC/typo-correction.m
@@ -1,10 +1,16 @@
 // RUN: %clang_cc1 %s -verify -fsyntax-only
 
-@interface B
+@protocol P
+-(id)description;
+@end
+
+@interface B<P>
 @property int x;
 @end
 
-@interface S : B
+@interface S : B {
+  id _someivar; // expected-note {{here}}
+}
 @end
 
 // Spell-checking 'undefined' is ok.
@@ -12,9 +18,13 @@ undefined var; // expected-error {{unknown type name}}
 
 typedef int super1;
 @implementation S
--(void)foo {
+-(void)foo:(id)p1 other:(id)p2 {
   // Spell-checking 'super' is not ok.
   super.x = 0;
   self.x = 0;
 }
+
+-(void)test {
+  [self foo:[super description] other:someivar]; // expected-error {{use of undeclared identifier 'someivar'; did you mean '_someivar'?}}
+}
 @end
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-2.h b/test/SemaObjCXX/Inputs/nullability-consistency-2.h
index 4517738c552b..5203146353ea 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-2.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-2.h
@@ -13,4 +13,9 @@ void g3(const
 @property (retain,nullable) SomeClass *property2;
 - (nullable SomeClass *)method1;
 - (void)method2:(nonnull SomeClass *)param;
+@property (readonly, weak) SomeClass *property3; // expected-warning{{missing a nullability type specifier}}
+@end
+
+@interface SomeClass ()
+@property (readonly, weak) SomeClass *property4; // expected-warning{{missing a nullability type specifier}}
 @end
diff --git a/test/SemaObjCXX/Inputs/nullability-consistency-6.h b/test/SemaObjCXX/Inputs/nullability-consistency-6.h
index cb712e94c661..d1764a87dc63 100644
--- a/test/SemaObjCXX/Inputs/nullability-consistency-6.h
+++ b/test/SemaObjCXX/Inputs/nullability-consistency-6.h
@@ -4,5 +4,15 @@ int *ptr; // expected-warning {{missing a nullability type specifier}}
 
 extern void **blah; // expected-warning 2{{missing a nullability type specifier}}
 
+__attribute__((objc_root_class))
+@interface ClassWithWeakProperties
+@property (readonly, weak) ClassWithWeakProperties *prop1;
+@property (readonly, weak, null_unspecified) ClassWithWeakProperties *prop2;
+@end
+
+@interface ClassWithWeakProperties ()
+@property (readonly, weak) ClassWithWeakProperties *prop3;
+@end
+
 #pragma clang assume_nonnull end
 
diff --git a/test/SemaObjCXX/arc-system-header.mm b/test/SemaObjCXX/arc-system-header.mm
index b97e2f3b1eb4..5f5445c8956b 100644
--- a/test/SemaObjCXX/arc-system-header.mm
+++ b/test/SemaObjCXX/arc-system-header.mm
@@ -4,6 +4,6 @@
 
 void f(A* a) {
   a->data.void_ptr = 0;
-  a->data.a_b.b = 0; // expected-error{{'a_b' is unavailable: this system field has retaining ownership}}
+  a->data.a_b.b = 0; // expected-error{{'a_b' is unavailable in ARC}}
 }
-// expected-note@arc-system-header.h:10{{'a_b' has been explicitly marked unavailable here}}
+// expected-note@arc-system-header.h:10{{field has non-trivial ownership qualification}}
diff --git a/test/SemaObjCXX/arc-type-conversion.mm b/test/SemaObjCXX/arc-type-conversion.mm
index fd42513d5ff4..8544726e25ad 100644
--- a/test/SemaObjCXX/arc-type-conversion.mm
+++ b/test/SemaObjCXX/arc-type-conversion.mm
@@ -6,8 +6,8 @@ void * cvt(id arg) // expected-note{{candidate function not viable: cannot conve
   void* voidp_val;
   (void)(int*)arg; // expected-error {{cast of an Objective-C pointer to 'int *' is disallowed with ARC}}
   (void)(id)arg;
-  (void)(__autoreleasing id*)arg; // expected-error{{C-style cast from 'id' to '__autoreleasing id *' casts away qualifiers}}
-  (void)(id*)arg; // expected-error{{C-style cast from 'id' to '__strong id *' casts away qualifiers}}
+  (void)(__autoreleasing id*)arg; // expected-error{{cast of an Objective-C pointer to '__autoreleasing id *' is disallowed with ARC}}
+  (void)(id*)arg; // expected-error{{cast of an Objective-C pointer to '__strong id *' is disallowed with ARC}}
 
   (void)(__autoreleasing id**)voidp_val;
   (void)(void*)voidp_val;
diff --git a/test/SemaObjCXX/crash.mm b/test/SemaObjCXX/crash.mm
index 521b923cc00d..19cf3099828a 100644
--- a/test/SemaObjCXX/crash.mm
+++ b/test/SemaObjCXX/crash.mm
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only %s -verify 
+// RUN: %clang_cc1 -fsyntax-only -std=c++98 %s -verify 
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify 
 
 // <rdar://problem/11286701>
 namespace std {
@@ -18,6 +20,8 @@ struct EvilStruct {
 
   typedef std::pair<int, int> IntegerPair;
 
-template<typename...Ts> void f(Ts); // expected-error {{unexpanded}} expected-warning {{extension}}
-
+template<typename...Ts> void f(Ts); // expected-error {{unexpanded}}
+#if __cplusplus <= 199711L // C++03 or earlier modes
+// expected-warning@-2 {{variadic templates are a C++11 extension}}
+#endif
 @end
diff --git a/test/SemaObjCXX/cxx1y-lambda.mm b/test/SemaObjCXX/cxx1y-lambda.mm
new file mode 100644
index 000000000000..25445cc68c5e
--- /dev/null
+++ b/test/SemaObjCXX/cxx1y-lambda.mm
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -std=c++1y -Wno-unused-value -fsyntax-only -verify -fobjc-arc %s
+
+// expected-no-diagnostics
+__attribute__((objc_root_class))
+@interface NSString
+@end
+
+// rdar://problem/22344904
+void testResultTypeDeduction(int i) {
+  auto x = [i] {
+    switch (i) {
+    case 0:
+      return @"foo";
+
+    default:
+      return @"bar";
+    }
+  };
+}
diff --git a/test/SemaObjCXX/delay-parsing-func-tryblock.mm b/test/SemaObjCXX/delay-parsing-func-tryblock.mm
index ecee7be629c3..f6b849b95583 100644
--- a/test/SemaObjCXX/delay-parsing-func-tryblock.mm
+++ b/test/SemaObjCXX/delay-parsing-func-tryblock.mm
@@ -1,5 +1,4 @@
 // RUN: %clang_cc1 -x objective-c++ -fcxx-exceptions -fsyntax-only -Werror -verify -Wno-objc-root-class %s
-// expected-no-diagnostics
 // rdar://10387088
 
 @interface MyClass
@@ -29,13 +28,13 @@ BadReturn::BadReturn(MyClass * myObject) try : CObj(myObject) {
   try {
     [myObject privateMethod];
     [myObject privateMethod1];
-    getMe = bar(myObject);
-    [CObj privateMethod1];
+    getMe = bar(myObject); // expected-error {{cannot refer to a non-static member from the handler of a constructor function try block}}
+    [CObj privateMethod1]; // expected-error {{cannot refer to a non-static member from the handler of a constructor function try block}}
   } catch(int ei) {
-    i = ei;
+    i = ei; // expected-error {{cannot refer to a non-static member from the handler of a constructor function try block}}
   } catch(...) {
     {
-      i = 0;
+      i = 0; // expected-error {{cannot refer to a non-static member from the handler of a constructor function try block}}
     }
   }
 }
diff --git a/test/SemaObjCXX/exceptions.mm b/test/SemaObjCXX/exceptions.mm
index 37e6def35b04..78a9a9ef37c5 100644
--- a/test/SemaObjCXX/exceptions.mm
+++ b/test/SemaObjCXX/exceptions.mm
@@ -5,7 +5,7 @@
 namespace test0 {
   void test() {
     try {
-    } catch (NSException e) { // expected-error {{can't catch an Objective-C object by value}}
+    } catch (NSException e) { // expected-error {{cannot catch an Objective-C object by value}}
     }
   }
 }
diff --git a/test/SemaObjCXX/message.mm b/test/SemaObjCXX/message.mm
index 7d8520cc52ac..e2bdd1386f64 100644
--- a/test/SemaObjCXX/message.mm
+++ b/test/SemaObjCXX/message.mm
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile-10.5 -verify -Wno-objc-root-class %s
+// RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile-10.5 -verify -Wno-objc-root-class -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -fobjc-runtime=macosx-fragile-10.5 -verify -Wno-objc-root-class -std=c++11 %s
+
 @interface I1
 - (int*)method;
 @end
@@ -62,15 +65,25 @@ struct identity {
   // or typename-specifiers.
   if (false) {
     if (true)
-      return [typename identity<I3>::type method]; // expected-warning{{occurs outside of a template}}
+      return [typename identity<I3>::type method];
+#if __cplusplus <= 199711L
+      // expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
 
     return [::I3 method];
   }
 
   int* ip1 = {[super method]};
   int* ip2 = {[::I3 method]};
-  int* ip3 = {[typename identity<I3>::type method]}; // expected-warning{{occurs outside of a template}}
-  int* ip4 = {[typename identity<I2_holder>::type().get() method]}; // expected-warning{{occurs outside of a template}}
+  int* ip3 = {[typename identity<I3>::type method]};
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
+
+  int* ip4 = {[typename identity<I2_holder>::type().get() method]};
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
   int array[5] = {[3] = 2};
   return [super method];
 }
diff --git a/test/SemaObjCXX/objc-boxed-expressions-nsvalue.mm b/test/SemaObjCXX/objc-boxed-expressions-nsvalue.mm
index 375b9555fcaf..3b590c46f004 100644
--- a/test/SemaObjCXX/objc-boxed-expressions-nsvalue.mm
+++ b/test/SemaObjCXX/objc-boxed-expressions-nsvalue.mm
@@ -48,7 +48,7 @@ struct BOXABLE NonTriviallyCopyable {
 
 void checkNSValueDiagnostic() {
   NSRect rect;
-  id value = @(rect); // expected-error{{NSValue must be available to use Objective-C boxed expressions}}
+  id value = @(rect); // expected-error{{definition of class NSValue must be available to use Objective-C boxed expressions}}
 }
 
 @interface NSValue
diff --git a/test/SemaObjCXX/parameterized_classes_arc.mm b/test/SemaObjCXX/parameterized_classes_arc.mm
new file mode 100644
index 000000000000..38ddd70986eb
--- /dev/null
+++ b/test/SemaObjCXX/parameterized_classes_arc.mm
@@ -0,0 +1,119 @@
+// RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak %s -verify
+
+// rdar://21612439
+
+__attribute__((objc_root_class))
+@interface NSObject
+@end
+  
+@class Forward;
+@class Forward2;
+
+// Tests for generic arguments.
+
+@interface PC1<T> : NSObject
+- (T) get;
+- (void) set: (T) v;
+@end
+
+void test1a(PC1<__weak id> *obj) { // expected-error {{type argument '__weak id' cannot be qualified with '__weak'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1b(PC1<__strong id> *obj) { // expected-error {{type argument '__strong id' cannot be qualified with '__strong'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1c(PC1<id> *obj) {
+  id x = [obj get];
+  [obj set: x];
+}
+
+// Test that this doesn't completely kill downstream type-checking.
+void test1d(PC1<__weak Forward*> *obj) { // expected-error {{type argument 'Forward *__weak' cannot be qualified with '__weak'}}
+  Forward2 *x = [obj get]; // expected-error {{cannot initialize}}
+  [obj set: x];
+}
+
+void test1e(PC1<__strong Forward*> *obj) { // expected-error {{type argument 'Forward *__strong' cannot be qualified with '__strong'}}
+  Forward2 *x = [obj get]; // expected-error {{cannot initialize}}
+  [obj set: x];
+}
+
+void test1f(PC1<Forward*> *obj) {
+  Forward2 *x = [obj get]; // expected-error {{cannot initialize}}
+  [obj set: x];
+}
+
+// Typedefs are fine, just silently ignore them.
+typedef __strong id StrongID;
+void test1g(PC1<StrongID> *obj) {
+  Forward2 *x = [obj get];
+  [obj set: x];
+}
+
+typedef __strong Forward *StrongForward;
+void test1h(PC1<StrongForward> *obj) {
+  Forward2 *x = [obj get]; // expected-error {{cannot initialize}}
+  [obj set: x];
+}
+
+// These aren't really ARC-specific, but they're the same basic idea.
+void test1i(PC1<const id> *obj) { // expected-error {{type argument 'const id' cannot be qualified with 'const'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1j(PC1<volatile id> *obj) { // expected-error {{type argument 'volatile id' cannot be qualified with 'volatile'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+void test1k(PC1<__attribute__((address_space(256))) id> *obj) { // expected-error {{type argument '__attribute__((address_space(256))) id' cannot be qualified with '__attribute__((address_space(256)))'}}
+  id x = [obj get];
+  [obj set: x];
+}
+
+// Template-specific tests.
+template <class T> PC1<T> *test2_temp();
+void test2a() { test2_temp<id>(); }
+void test2b() { test2_temp<const id>(); }
+void test2c() { test2_temp<volatile id>(); }
+void test2d() { test2_temp<__strong id>(); }
+void test2e() { test2_temp<__weak id>(); }
+void test2f() { test2_temp<__attribute__((address_space(256))) id>(); }
+
+template <class T> PC1<const T> *test3a(); // expected-error {{type argument 'const T' cannot be qualified with 'const'}}
+template <class T> PC1<__strong T> *test3b(); // expected-error {{type argument '__strong T' cannot be qualified with '__strong'}}
+
+// Tests for generic parameter bounds.
+
+@interface PC2<T : __strong id> // expected-error {{type bound '__strong id' for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC3<T : __weak id> // expected-error {{type bound '__weak id' for type parameter 'T' cannot be qualified with '__weak'}}
+@end
+
+@interface PC4<T : __strong Forward*> // expected-error {{type bound 'Forward *__strong' for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC5<T : __weak Forward*> // expected-error {{type bound 'Forward *__weak' for type parameter 'T' cannot be qualified with '__weak'}}
+@end
+
+@interface PC6<T : StrongID> // expected-error {{type bound 'StrongID' (aka '__strong id') for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+@interface PC7<T : StrongForward> // expected-error {{type bound 'StrongForward' (aka 'Forward *__strong') for type parameter 'T' cannot be qualified with '__strong'}}
+@end
+
+// These aren't really ARC-specific, but they're the same basic idea.
+@interface PC8<T : const id> // expected-error {{type bound 'const id' for type parameter 'T' cannot be qualified with 'const'}}
+@end
+
+@interface PC9<T : volatile id> // expected-error {{type bound 'volatile id' for type parameter 'T' cannot be qualified with 'volatile'}}
+@end
+
+@interface PC10<T : __attribute__((address_space(256))) id> // expected-error {{type bound '__attribute__((address_space(256))) id' for type parameter 'T' cannot be qualified with '__attribute__((address_space(256)))'}}
+@end
diff --git a/test/SemaObjCXX/property-invalid-type.mm b/test/SemaObjCXX/property-invalid-type.mm
index 5b8a848df46d..648235894ec4 100644
--- a/test/SemaObjCXX/property-invalid-type.mm
+++ b/test/SemaObjCXX/property-invalid-type.mm
@@ -13,11 +13,11 @@
 @synthesize response;
 - (void) foo :(A*) a   // expected-error {{expected a type}}
 {
-  self.response = a;
+  self.response = a; // expected-error{{assigning to 'int *' from incompatible type 'id'}}
 }
 @end
 
 void foo(I *i)
 {
-  i.helper;
+  i.helper; // expected-warning{{property access result unused - getters should not be used for side effects}}
 }
diff --git a/test/SemaObjCXX/property-type-mismatch.mm b/test/SemaObjCXX/property-type-mismatch.mm
index 2b267ad96eef..6ab07b8f3b80 100644
--- a/test/SemaObjCXX/property-type-mismatch.mm
+++ b/test/SemaObjCXX/property-type-mismatch.mm
@@ -18,3 +18,13 @@
 @property (assign) NSObject<P2> *prop;
 @end
 
+@interface C<T> : NSObject 
+@end
+
+@interface D
+@property (nonatomic,readonly,nonnull) C<D *> *property;
+@end
+
+@interface D ()
+@property (nonatomic, setter=_setProperty:) C *property; // okay
+@end
diff --git a/test/SemaObjCXX/pseudo-destructor.mm b/test/SemaObjCXX/pseudo-destructor.mm
new file mode 100644
index 000000000000..06570c16b674
--- /dev/null
+++ b/test/SemaObjCXX/pseudo-destructor.mm
@@ -0,0 +1,23 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+__attribute__((objc_root_class))
+@interface Root
+@end
+
+@class Forward;
+
+template <class T> void destroyPointer(T *t) {
+  t->~T();
+}
+
+template <class T> void destroyReference(T &t) {
+  t.~T();
+}
+
+template void destroyPointer<Root*>(Root **);
+template void destroyReference<Root*>(Root *&);
+
+// rdar://18522255
+template void destroyPointer<Forward*>(Forward **);
+template void destroyReference<Forward*>(Forward *&);
diff --git a/test/SemaObjCXX/sel-address.mm b/test/SemaObjCXX/sel-address.mm
new file mode 100644
index 000000000000..a1209abd4e68
--- /dev/null
+++ b/test/SemaObjCXX/sel-address.mm
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// pr7390
+
+void f(const SEL& v2) {}
+void g(SEL* _Nonnull);
+void h() {
+  f(@selector(dealloc));
+
+  SEL s = @selector(dealloc);
+  SEL* ps = &s;
+
+  @selector(dealloc) = s;  // expected-error {{expression is not assignable}}
+
+  SEL* ps2 = &@selector(dealloc);
+
+  // Shouldn't crash.
+  g(&@selector(foo));
+}
+
diff --git a/test/SemaObjCXX/vararg-non-pod.mm b/test/SemaObjCXX/vararg-non-pod.mm
index 5a6281d71cb2..daf9aa92b54e 100644
--- a/test/SemaObjCXX/vararg-non-pod.mm
+++ b/test/SemaObjCXX/vararg-non-pod.mm
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-error=non-pod-varargs
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-error=non-pod-varargs -std=c++98
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-error=non-pod-varargs -std=c++11
 
 extern char version[];
 
@@ -17,7 +19,12 @@ void t1(D *d)
 {
   C c(10);
 
-  [d g:10, c]; // expected-warning{{cannot pass object of non-POD type 'C' through variadic method; call will abort at runtime}}
+  [d g:10, c]; 
+#if __cplusplus <= 199711L // C++03 or earlier modes
+  // expected-warning@-2{{cannot pass object of non-POD type 'C' through variadic method; call will abort at runtime}}
+#else
+  // expected-no-diagnostics@-4
+#endif
   [d g:10, version];
 }
 
diff --git a/test/SemaOpenCL/cond.cl b/test/SemaOpenCL/cond.cl
index a1e32df16d56..8cc4f1e8e910 100644
--- a/test/SemaOpenCL/cond.cl
+++ b/test/SemaOpenCL/cond.cl
@@ -84,12 +84,12 @@ uchar2 ntest03(int2 C, uchar X, uchar Y)
 
 float2 ntest04(int2 C, int2 X, float2 Y)
 {
-  return C ? X : Y; // expected-error {{can't convert between vector values of different size ('int2' (vector of 2 'int' values) and 'float2' (vector of 2 'float' values))}}
+  return C ? X : Y; // expected-error {{implicit conversions between vector types ('int2' (vector of 2 'int' values) and 'float2' (vector of 2 'float' values)) are not permitted}}
 }
 
 float2 ntest05(int2 C, int2 X, float Y)
 {
-  return C ? X : Y; // expected-error {{can't convert between vector values of different size ('int2' (vector of 2 'int' values) and 'float')}}
+  return C ? X : Y; // expected-error {{cannot convert between vector values of different size ('int2' (vector of 2 'int' values) and 'float')}}
 }
 
 char2 ntest06(int2 C, char2 X, char2 Y)
@@ -115,7 +115,7 @@ int2 ntest09(int2 C, global int *X, global int *Y)
 
 char3 ntest10(char C, char3 X, char2 Y)
 {
-  return C ? X : Y; // expected-error {{can't convert between vector values of different size ('char3' (vector of 3 'char' values) and 'char2' (vector of 2 'char' values))}}
+  return C ? X : Y; // expected-error {{implicit conversions between vector types ('char3' (vector of 3 'char' values) and 'char2' (vector of 2 'char' values)) are not permitted}}
 }
 
 char3 ntest11(char2 C, char3 X, char Y)
diff --git a/test/SemaOpenCL/null_literal.cl b/test/SemaOpenCL/null_literal.cl
new file mode 100644
index 000000000000..2fb287270f46
--- /dev/null
+++ b/test/SemaOpenCL/null_literal.cl
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -verify %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -DCL20 -verify %s
+
+#define NULL ((void*)0)
+
+void foo(){
+
+global int* ptr1 = NULL;
+
+global int* ptr2 = (global void*)0;
+
+constant int* ptr3 = NULL;
+
+constant int* ptr4 = (global void*)0; // expected-error{{initializing '__constant int *' with an expression of type '__global void *' changes address space of pointer}}
+
+#ifdef CL20
+// Accept explicitly pointer to generic address space in OpenCL v2.0.
+global int* ptr5 = (generic void*)0;
+#endif
+
+global int* ptr6 = (local void*)0; // expected-error{{initializing '__global int *' with an expression of type '__local void *' changes address space of pointer}}
+
+bool cmp = ptr1 == NULL;
+
+cmp = ptr1 == (local void*)0; // expected-error{{comparison between  ('__global int *' and '__local void *') which are pointers to non-overlapping address spaces}}
+
+cmp = ptr3 == NULL;
+
+}
diff --git a/test/SemaOpenCL/storageclass-cl20.cl b/test/SemaOpenCL/storageclass-cl20.cl
new file mode 100644
index 000000000000..c8e7faa208f1
--- /dev/null
+++ b/test/SemaOpenCL/storageclass-cl20.cl
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -DCL20 -cl-std=CL2.0
+
+static constant int G1 = 0;
+int G2 = 0;
+global int G3 = 0;
+local int G4 = 0;// expected-error{{program scope variable must reside in global or constant address space}}
+
+void kernel foo() {
+  static int S1 = 5;
+  static global int S2 = 5;
+  static private int S3 = 5;// expected-error{{program scope variable must reside in global or constant address space}}
+
+  constant int L1 = 0;
+  local int L2;
+}
diff --git a/test/SemaOpenCL/storageclass.cl b/test/SemaOpenCL/storageclass.cl
index d2678f2010d9..c7d8ab984687 100644
--- a/test/SemaOpenCL/storageclass.cl
+++ b/test/SemaOpenCL/storageclass.cl
@@ -1,14 +1,29 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2
 
-static constant int A = 0;
+static constant int G1 = 0;
+constant int G2 = 0;
+int G3 = 0;        // expected-error{{program scope variable must reside in constant address space}}
+global int G4 = 0; // expected-error{{program scope variable must reside in constant address space}}
 
-int X = 0; // expected-error{{global variables must have a constant address space qualifier}}
-
-// static is not allowed at local scope.
 void kernel foo() {
-  static int X = 5; // expected-error{{variables in function scope cannot be declared static}} 
-  auto int Y = 7; // expected-error{{OpenCL does not support the 'auto' storage class specifier}}
+  // static is not allowed at local scope before CL2.0
+  static int S1 = 5;          // expected-error{{variables in function scope cannot be declared static}}
+  static constant int S2 = 5; // expected-error{{variables in function scope cannot be declared static}}
+
+  constant int L1 = 0;
+  local int L2;
+
+  auto int L3 = 7; // expected-error{{OpenCL does not support the 'auto' storage class specifier}}
 }
 
 static void kernel bar() { // expected-error{{kernel functions cannot be declared static}}
 }
+
+void f() {
+  constant int L1 = 0; // expected-error{{non-kernel function variable cannot be declared in constant address space}}
+  local int L2;        // expected-error{{non-kernel function variable cannot be declared in local address space}}
+  {
+    constant int L1 = 0; // expected-error{{non-kernel function variable cannot be declared in constant address space}}
+    local int L2;        // expected-error{{non-kernel function variable cannot be declared in local address space}}
+  }
+}
diff --git a/test/SemaTemplate/alias-templates.cpp b/test/SemaTemplate/alias-templates.cpp
index e7be184db3d6..1849ff64026b 100644
--- a/test/SemaTemplate/alias-templates.cpp
+++ b/test/SemaTemplate/alias-templates.cpp
@@ -201,3 +201,23 @@ namespace PR16904 {
   template <typename T, typename U, typename V>
   using derived2 = ::PR16904::base<T, U>::template derived<V>; // expected-error {{expected a type}} expected-error {{expected ';'}}
 }
+
+namespace PR14858 {
+  template<typename ...T> using X = int[sizeof...(T)];
+
+  template<typename ...U> struct Y {
+    using Z = X<U...>;
+  };
+  using A = Y<int, int, int, int>::Z;
+  using A = int[4];
+
+  // FIXME: These should be treated as being redeclarations.
+  template<typename ...T> void f(X<T...> &) {}
+  template<typename ...T> void f(int(&)[sizeof...(T)]) {}
+
+  template<typename ...T> void g(X<typename T::type...> &) {}
+  template<typename ...T> void g(int(&)[sizeof...(T)]) {} // ok, different
+
+  template<typename ...T, typename ...U> void h(X<T...> &) {}
+  template<typename ...T, typename ...U> void h(X<U...> &) {} // ok, different
+}
diff --git a/test/SemaTemplate/class-template-ctor-initializer.cpp b/test/SemaTemplate/class-template-ctor-initializer.cpp
index 6043327b7bab..6dae20774585 100644
--- a/test/SemaTemplate/class-template-ctor-initializer.cpp
+++ b/test/SemaTemplate/class-template-ctor-initializer.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 template<class X> struct A {};
 
@@ -55,7 +57,11 @@ namespace PR7259 {
 }
 
 namespace NonDependentError {
-  struct Base { Base(int); }; // expected-note 2{{candidate}}
+  struct Base { Base(int); }; // expected-note {{candidate constructor not viable}}
+// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-3 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
   template<typename T>
   struct Derived1 : Base {
diff --git a/test/SemaTemplate/class-template-decl.cpp b/test/SemaTemplate/class-template-decl.cpp
index 4f861dea7058..63482d84df0c 100644
--- a/test/SemaTemplate/class-template-decl.cpp
+++ b/test/SemaTemplate/class-template-decl.cpp
@@ -152,3 +152,10 @@ void DontCrashOnThis() {
   T &pT = T();
   pT;
 }
+
+namespace abstract_dependent_class {
+  template<typename T> struct A {
+    virtual A<T> *clone() = 0; // expected-note {{pure virtual}}
+  };
+  template<typename T> A<T> *A<T>::clone() { return new A<T>; } // expected-error {{abstract class type 'A<T>'}}
+}
diff --git a/test/SemaTemplate/class-template-id.cpp b/test/SemaTemplate/class-template-id.cpp
index 5bbc70c9552c..50cb3ef59ea4 100644
--- a/test/SemaTemplate/class-template-id.cpp
+++ b/test/SemaTemplate/class-template-id.cpp
@@ -9,9 +9,9 @@ A<int, FLOAT> *foo(A<int> *ptr, A<int> const *ptr2, A<int, double> *ptr3) {
   if (ptr)
     return ptr; // okay
   else if (ptr2)
-    return ptr2; // expected-error{{cannot initialize return object of type 'A<int, FLOAT> *' with an lvalue of type 'const A<int> *'}}
+    return ptr2; // expected-error{{cannot initialize return object of type 'A<int, FLOAT> *' (aka 'A<int, float> *') with an lvalue of type 'const A<int> *'}}
   else {
-    return ptr3; // expected-error{{cannot initialize return object of type 'A<int, FLOAT> *' with an lvalue of type 'A<int, double> *'}}
+    return ptr3; // expected-error{{cannot initialize return object of type 'A<int, FLOAT> *' (aka 'A<int, float> *') with an lvalue of type 'A<int, double> *'}}
   }
 }
 
diff --git a/test/SemaTemplate/constructor-template.cpp b/test/SemaTemplate/constructor-template.cpp
index c5306a6e32f4..ee8475b3cdb8 100644
--- a/test/SemaTemplate/constructor-template.cpp
+++ b/test/SemaTemplate/constructor-template.cpp
@@ -1,5 +1,11 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-struct X0 { // expected-note{{candidate}}
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
+struct X0 { // expected-note {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
   X0(int); // expected-note{{candidate}}
   template<typename T> X0(T); // expected-note {{candidate}}
   template<typename T, typename U> X0(T*, U*); // expected-note {{candidate}}
diff --git a/test/SemaTemplate/deduction.cpp b/test/SemaTemplate/deduction.cpp
index e33d1576da58..bf0812277d57 100644
--- a/test/SemaTemplate/deduction.cpp
+++ b/test/SemaTemplate/deduction.cpp
@@ -107,7 +107,7 @@ namespace PR7463 {
 }
 
 namespace test0 {
-  template <class T> void make(const T *(*fn)()); // expected-note {{candidate template ignored: can't deduce a type for 'T' that would make 'const T' equal 'char'}}
+  template <class T> void make(const T *(*fn)()); // expected-note {{candidate template ignored: cannot deduce a type for 'T' that would make 'const T' equal 'char'}}
   char *char_maker();
   void test() {
     make(char_maker); // expected-error {{no matching function for call to 'make'}}
diff --git a/test/SemaTemplate/default-arguments.cpp b/test/SemaTemplate/default-arguments.cpp
index 439a30392118..740a5a9d07b3 100644
--- a/test/SemaTemplate/default-arguments.cpp
+++ b/test/SemaTemplate/default-arguments.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 template<typename T, int N = 2> struct X; // expected-note{{template is declared here}}
 
 X<int, 1> *x1;
@@ -142,7 +144,10 @@ namespace PR9643 {
 namespace PR16288 {
   template<typename X>
   struct S {
-    template<typename T = int, typename U> // expected-warning {{C++11}}
+    template<typename T = int, typename U>
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-warning@-2 {{default template arguments for a function template are a C++11 extension}}
+#endif
     void f();
   };
   template<typename X>
@@ -152,10 +157,25 @@ namespace PR16288 {
 
 namespace DR1635 {
   template <class T> struct X {
-    template <class U = typename T::type> static void f(int) {} // expected-error {{type 'int' cannot be used prior to '::' because it has no members}} \
-                                                                // expected-warning {{C++11}}
+    template <class U = typename T::type> static void f(int) {} // expected-error {{type 'int' cannot be used prior to '::' because it has no members}}
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-warning@-2 {{default template arguments for a function template are a C++11 extension}}
+#endif
     static void f(...) {}
   };
 
   int g() { X<int>::f(0); } // expected-note {{in instantiation of template class 'DR1635::X<int>' requested here}}
 }
+
+namespace NondefDecls {
+  template<typename T> void f1() {
+    int g1(int defarg = T::error);  // expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+  }
+  template void f1<int>();  // expected-note{{in instantiation of function template specialization 'NondefDecls::f1<int>' requested here}}
+}
+
+template <typename T>
+struct C {
+  C(T t = ); // expected-error {{expected expression}}
+};
+C<int> obj;
diff --git a/test/SemaTemplate/default-expr-arguments.cpp b/test/SemaTemplate/default-expr-arguments.cpp
index 14b072a1a5ee..438f5b1aa95f 100644
--- a/test/SemaTemplate/default-expr-arguments.cpp
+++ b/test/SemaTemplate/default-expr-arguments.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 template<typename T>
 class C { C(int a0 = 0); };
 
@@ -6,6 +9,9 @@ template<>
 C<char>::C(int a0);
 
 struct S { }; // expected-note 3 {{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
 template<typename T> void f1(T a, T b = 10) { } // expected-error{{no viable conversion}} \
 // expected-note{{passing argument to parameter 'b' here}}
@@ -67,7 +73,10 @@ void test_x0(X0<int> xi) {
   xi.f(17);
 }
 
-struct NotDefaultConstructible { // expected-note 2{{candidate}}
+struct NotDefaultConstructible { // expected-note 2 {{candidate constructor (the implicit copy constructor) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 2 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
   NotDefaultConstructible(int); // expected-note 2{{candidate}}
 };
 
diff --git a/test/SemaTemplate/derived.cpp b/test/SemaTemplate/derived.cpp
index cbd004cf6195..bad72b5d6766 100644
--- a/test/SemaTemplate/derived.cpp
+++ b/test/SemaTemplate/derived.cpp
@@ -4,8 +4,8 @@
 template<typename T> class vector2 {};
 template<typename T> class vector : vector2<T> {};
 
-template<typename T> void Foo2(vector2<const T*> V) {}  // expected-note{{candidate template ignored: can't deduce a type for 'T' that would make 'const T' equal 'int'}}
-template<typename T> void Foo(vector<const T*> V) {} // expected-note {{candidate template ignored: can't deduce a type for 'T' that would make 'const T' equal 'int'}}
+template<typename T> void Foo2(vector2<const T*> V) {}  // expected-note{{candidate template ignored: cannot deduce a type for 'T' that would make 'const T' equal 'int'}}
+template<typename T> void Foo(vector<const T*> V) {} // expected-note {{candidate template ignored: cannot deduce a type for 'T' that would make 'const T' equal 'int'}}
 
 void test() {
   Foo2(vector2<int*>());  // expected-error{{no matching function for call to 'Foo2'}}
diff --git a/test/SemaTemplate/fun-template-def.cpp b/test/SemaTemplate/fun-template-def.cpp
index 2d515b4b155a..037747d35c0d 100644
--- a/test/SemaTemplate/fun-template-def.cpp
+++ b/test/SemaTemplate/fun-template-def.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // Tests that dependent expressions are always allowed, whereas non-dependent
 // are checked as usual.
@@ -9,6 +11,9 @@
 namespace std { class type_info {}; }
 
 struct dummy {}; // expected-note 3 {{candidate constructor (the implicit copy constructor)}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note@-2 3 {{candidate constructor (the implicit move constructor) not viable}}
+#endif
 
 template<typename T>
 int f0(T x) {
diff --git a/test/SemaTemplate/instantiate-exception-spec-cxx11.cpp b/test/SemaTemplate/instantiate-exception-spec-cxx11.cpp
index f62ef61758a4..6e8323d1ee2c 100644
--- a/test/SemaTemplate/instantiate-exception-spec-cxx11.cpp
+++ b/test/SemaTemplate/instantiate-exception-spec-cxx11.cpp
@@ -178,3 +178,11 @@ namespace Variadic {
   }
 
 }
+
+namespace NondefDecls {
+  template<typename T> void f1() {
+    int g1(int) noexcept(T::error); // expected-error{{type 'int' cannot be used prior to '::' because it has no members}}
+  }
+  template void f1<int>(); // expected-note{{in instantiation of function template specialization 'NondefDecls::f1<int>' requested here}}
+}
+
diff --git a/test/SemaTemplate/instantiate-expr-3.cpp b/test/SemaTemplate/instantiate-expr-3.cpp
index ca88b00300dc..90c322cbf373 100644
--- a/test/SemaTemplate/instantiate-expr-3.cpp
+++ b/test/SemaTemplate/instantiate-expr-3.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
 
 // ---------------------------------------------------------------------
 // Imaginary literals
@@ -108,12 +108,41 @@ template<typename VaList, typename ArgType>
 struct VaArg1 {
   void f(int n, ...) {
     VaList va;
-    __builtin_va_start(va, n); // expected-error{{int}}
+    __builtin_va_start(va, n); // expected-error{{int}} expected-error{{char *}}
     for (int i = 0; i != n; ++i)
       (void)__builtin_va_arg(va, ArgType); // expected-error{{int}}
-    __builtin_va_end(va); // expected-error{{int}}
+    __builtin_va_end(va); // expected-error{{int}} expected-error{{char *}}
   }
 };
 
 template struct VaArg1<__builtin_va_list, int>;
+template struct VaArg1<__builtin_ms_va_list, int>; // expected-note{{instantiation}}
 template struct VaArg1<int, int>; // expected-note{{instantiation}}
+
+template<typename ArgType>
+struct VaArg2 {
+  void __attribute__((ms_abi)) f(int n, ...) {
+    __builtin_ms_va_list va;
+    __builtin_ms_va_start(va, n);
+    for (int i = 0; i != n; ++i)
+      (void)__builtin_va_arg(va, ArgType);
+    __builtin_ms_va_end(va);
+  }
+};
+
+template struct VaArg2<int>;
+
+template<typename VaList, typename ArgType>
+struct VaArg3 {
+  void __attribute__((ms_abi)) f(int n, ...) {
+    VaList va;
+    __builtin_ms_va_start(va, n); // expected-error{{int}} expected-error{{__va_list_tag}}
+    for (int i = 0; i != n; ++i)
+      (void)__builtin_va_arg(va, ArgType); // expected-error{{int}}
+    __builtin_ms_va_end(va); // expected-error{{int}} expected-error{{__va_list_tag}}
+  }
+};
+
+template struct VaArg3<__builtin_ms_va_list, int>;
+template struct VaArg3<__builtin_va_list, int>; // expected-note{{instantiation}}
+template struct VaArg3<int, int>; // expected-note{{instantiation}}
diff --git a/test/SemaTemplate/instantiate-expr-6.cpp b/test/SemaTemplate/instantiate-expr-6.cpp
new file mode 100644
index 000000000000..e6f7fe3f6607
--- /dev/null
+++ b/test/SemaTemplate/instantiate-expr-6.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -emit-llvm-only %s
+
+struct X {
+  template<typename T> static typename T::type g(T t);
+  template<typename T> auto f(T t) -> decltype(g(t));
+  void f(...);
+};
+
+void test() {
+  X().f(0);
+  X().f(0);
+}
diff --git a/test/SemaTemplate/instantiate-function-2.cpp b/test/SemaTemplate/instantiate-function-2.cpp
index f5089c962397..ffdb4c914457 100644
--- a/test/SemaTemplate/instantiate-function-2.cpp
+++ b/test/SemaTemplate/instantiate-function-2.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 template <typename T> struct S {
   S() { }
   S(T t);
@@ -46,7 +49,10 @@ namespace PR9654 {
 namespace AliasTagDef {
   template<typename T>
   T f() {
-    using S = struct { // expected-warning {{C++11}}
+    using S = struct {
+#if __cplusplus <= 199711L
+    // expected-warning@-2 {{alias declarations are a C++11 extension}}
+#endif
       T g() {
         return T();
       }
diff --git a/test/SemaTemplate/instantiate-local-class.cpp b/test/SemaTemplate/instantiate-local-class.cpp
index c0ea6a0bc870..a61af7a5af38 100644
--- a/test/SemaTemplate/instantiate-local-class.cpp
+++ b/test/SemaTemplate/instantiate-local-class.cpp
@@ -448,3 +448,30 @@ namespace PR21332 {
   }
   template void f7<int>();
 }
+
+// rdar://23721638: Ensure that we correctly perform implicit
+// conversions when instantiating the default arguments of local functions.
+namespace rdar23721638 {
+  struct A {
+    A(const char *) = delete;  // expected-note 2 {{explicitly marked deleted here}}
+  };
+
+  template <typename T> void foo() {
+    struct Inner { // expected-note {{in instantiation}}
+      void operator()(T a = "") {} // expected-error {{conversion function from 'const char [1]' to 'rdar23721638::A' invokes a deleted function}}
+      // expected-note@-1 {{passing argument to parameter 'a' here}}
+      // expected-note@-2 {{candidate function not viable}}
+    };
+    Inner()(); // expected-error {{no matching function}}
+  }
+  template void foo<A>(); // expected-note 2 {{in instantiation}}
+
+  template <typename T> void bar() {
+    auto lambda = [](T a = "") {}; // expected-error {{conversion function from 'const char [1]' to 'rdar23721638::A' invokes a deleted function}}
+      // expected-note@-1 {{passing argument to parameter 'a' here}}
+      // expected-note@-2 {{candidate function not viable}}
+      // expected-note@-3 {{conversion candidate of type}}
+    lambda(); // expected-error {{no matching function}}
+  }
+  template void bar<A>(); // expected-note {{in instantiation}}
+}
diff --git a/test/SemaTemplate/instantiate-static-var.cpp b/test/SemaTemplate/instantiate-static-var.cpp
index a7b3433b3544..648ee4153fdc 100644
--- a/test/SemaTemplate/instantiate-static-var.cpp
+++ b/test/SemaTemplate/instantiate-static-var.cpp
@@ -1,4 +1,7 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
+
 template<typename T, T Divisor>
 class X {
 public:
@@ -11,7 +14,13 @@ X<int, 0> xi0; // expected-note{{in instantiation of template class 'X<int, 0>'
 
 template<typename T>
 class Y {
-  static const T value = 0; // expected-warning{{in-class initializer for static data member of type 'const float' is a GNU extension}}
+  static const T value = 0; 
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{in-class initializer for static data member of type 'const float' is a GNU extension}}
+#else
+// expected-error@-4 {{in-class initializer for static data member of type 'const float' requires 'constexpr' specifier}}
+// expected-note@-5 {{add 'constexpr'}}
+#endif
 };
 
 Y<float> fy; // expected-note{{in instantiation of template class 'Y<float>' requested here}}
diff --git a/test/SemaTemplate/instantiate-using-decl.cpp b/test/SemaTemplate/instantiate-using-decl.cpp
index a6a4ea0e0f6d..0bbb3ca9c88c 100644
--- a/test/SemaTemplate/instantiate-using-decl.cpp
+++ b/test/SemaTemplate/instantiate-using-decl.cpp
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++98 -fsyntax-only -verify %s
 
 namespace test0 {
   namespace N { }
@@ -104,3 +105,65 @@ namespace PR16936 {
     x.f();
   }
 }
+
+namespace pr21923 {
+template <typename> struct Base {
+  int field;
+  void method();
+};
+template <typename Scalar> struct Derived : Base<Scalar> {
+  using Base<Scalar>::field;
+  using Base<Scalar>::method;
+  static void m_fn1() {
+    // expected-error@+1 {{invalid use of member 'field' in static member function}}
+    (void)field;
+    // expected-error@+1 {{invalid use of member 'field' in static member function}}
+    (void)&field;
+    // expected-error@+1 {{call to non-static member function without an object argument}}
+    (void)method;
+    // expected-error@+1 {{call to non-static member function without an object argument}}
+    (void)&method;
+    // expected-error@+1 {{call to non-static member function without an object argument}}
+    method();
+    (void)&Base<Scalar>::field;
+    (void)&Base<Scalar>::method;
+  }
+#if __cplusplus >= 201103L
+  // These usages are OK in C++11 due to the unevaluated context.
+  enum { TheSize = sizeof(field) };
+  typedef decltype(field) U;
+#else
+  // expected-error@+1 {{invalid use of non-static data member 'field'}}
+  enum { TheSize = sizeof(field) };
+#endif
+};
+
+#if __cplusplus < 201103L
+// C++98 has an extra note for TheSize.
+// expected-note@+2 {{requested here}}
+#endif
+template class Derived<int>; // expected-note {{requested here}}
+
+// This is interesting because we form an UnresolvedLookupExpr in the static
+// function template and an UnresolvedMemberExpr in the instance function
+// template. As a result, we get slightly different behavior.
+struct UnresolvedTemplateNames {
+  template <typename> void maybe_static();
+#if __cplusplus < 201103L
+  // expected-warning@+2 {{default template arguments for a function template are a C++11 extension}}
+#endif
+  template <typename T, typename T::type = 0> static void maybe_static();
+
+  template <typename T>
+  void instance_method() { (void)maybe_static<T>(); }
+  template <typename T>
+  static void static_method() {
+    // expected-error@+1 {{call to non-static member function without an object argument}}
+    (void)maybe_static<T>();
+  }
+};
+void force_instantiation(UnresolvedTemplateNames x) {
+  x.instance_method<int>();
+  UnresolvedTemplateNames::static_method<int>(); // expected-note {{requested here}}
+}
+} // pr21923
diff --git a/test/SemaTemplate/instantiate-var-template.cpp b/test/SemaTemplate/instantiate-var-template.cpp
index bd7c43334f7d..b7b83e4afdd5 100644
--- a/test/SemaTemplate/instantiate-var-template.cpp
+++ b/test/SemaTemplate/instantiate-var-template.cpp
@@ -34,3 +34,9 @@ namespace InstantiationDependent {
     static_assert(a<sizeof(sizeof(f(T())))> == 0, ""); // expected-error {{static_assert failed}}
   }
 }
+
+namespace PR24483 {
+  template<typename> struct A;
+  template<typename... T> A<T...> models;
+  template<> struct B models<>; // expected-error {{incomplete type 'struct B'}} expected-note {{forward declaration}}
+}
diff --git a/test/SemaTemplate/ms-lookup-template-base-classes.cpp b/test/SemaTemplate/ms-lookup-template-base-classes.cpp
index 9fa59e626ee7..4f3df277d912 100644
--- a/test/SemaTemplate/ms-lookup-template-base-classes.cpp
+++ b/test/SemaTemplate/ms-lookup-template-base-classes.cpp
@@ -4,8 +4,8 @@
 template <class T>
 class A {
 public:
-   void f(T a) { }// expected-note {{must qualify identifier to find this declaration in dependent base class}}
-   void g();// expected-note {{must qualify identifier to find this declaration in dependent base class}}
+   void f(T a) { }// expected-note 2{{must qualify identifier to find this declaration in dependent base class}}
+   void g();// expected-note 2{{must qualify identifier to find this declaration in dependent base class}}
 };
 
 template <class T>
@@ -13,13 +13,13 @@ class B : public A<T> {
 public:
 	void z(T a)
     {
-       f(a); // expected-warning {{use of identifier 'f' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
-       g(); // expected-warning {{use of identifier 'g' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
+       f(a); // expected-warning 2{{use of identifier 'f' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
+       g(); // expected-warning 2{{use of identifier 'g' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
     }
 };
 
 template class B<int>; // expected-note {{requested here}}
-template class B<char>;
+template class B<char>; // expected-note {{requested here}}
 
 void test()
 {
@@ -308,7 +308,7 @@ template <typename T> struct B { struct NameFromBase { T m; }; }; // expected-no
 template <typename T> struct C : A<T>, B<T> {
   NameFromBase m; // expected-error {{member 'NameFromBase' found in multiple base classes of different types}} expected-warning {{use of identifier 'NameFromBase' found via unqualified lookup into dependent bases of class templates is a Microsoft extension}}
 };
-static_assert(sizeof(C<int>) == 4, ""); // expected-note {{in instantiation of template class 'two_types_in_base::C<int>' requested here}}
+static_assert(sizeof(C<int>) != 0, ""); // expected-note {{in instantiation of template class 'two_types_in_base::C<int>' requested here}}
 }
 
 namespace type_and_decl_in_base {
diff --git a/test/SemaTemplate/nested-name-spec-template.cpp b/test/SemaTemplate/nested-name-spec-template.cpp
index 635687d4f6b7..78d09d13deea 100644
--- a/test/SemaTemplate/nested-name-spec-template.cpp
+++ b/test/SemaTemplate/nested-name-spec-template.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 namespace N { 
   namespace M {
@@ -21,8 +23,15 @@ namespace N {
   }
 
   M::Promote<int>::type *ret_intptr3(int* ip) { return ip; }
-  M::template Promote<int>::type *ret_intptr4(int* ip) { return ip; } // expected-warning{{'template' keyword outside of a template}}
-  M::template Promote<int> pi; // expected-warning{{'template' keyword outside of a template}}
+  M::template Promote<int>::type *ret_intptr4(int* ip) { return ip; } 
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{'template' keyword outside of a template}}
+#endif
+
+  M::template Promote<int> pi;
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{'template' keyword outside of a template}}
+#endif
 }
 
 N::M::Promote<int>::type *ret_intptr5(int* ip) { return ip; }
diff --git a/test/SemaTemplate/overload-candidates.cpp b/test/SemaTemplate/overload-candidates.cpp
index 544d897e2449..c0abb5b17488 100644
--- a/test/SemaTemplate/overload-candidates.cpp
+++ b/test/SemaTemplate/overload-candidates.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 template<typename T>
 const T& min(const T&, const T&); // expected-note{{candidate template ignored: deduced conflicting types for parameter 'T' ('int' vs. 'long')}}
@@ -94,8 +96,11 @@ namespace PR15673 {
   struct a_trait : std::false_type {};
 
   template<typename T,
-           typename Requires = typename std::enable_if<a_trait<T>::value>::type> // expected-warning {{C++11 extension}}
-  // expected-note@-1 {{candidate template ignored: disabled by 'enable_if' [with T = int]}}
+           typename Requires = typename std::enable_if<a_trait<T>::value>::type>
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{default template arguments for a function template are a C++11 extension}}
+#endif
+  // expected-note@-4 {{candidate template ignored: disabled by 'enable_if' [with T = int]}}
   void foo() {}
   void bar() { foo<int>(); } // expected-error {{no matching function for call to 'foo'}}
 
@@ -108,7 +113,10 @@ namespace PR15673 {
   struct a_pony : std::enable_if<some_trait<T>::value> {};
 
   template<typename T,
-           typename Requires = typename a_pony<T>::type> // expected-warning {{C++11 extension}}
+           typename Requires = typename a_pony<T>::type>
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{default template arguments for a function template are a C++11 extension}}
+#endif
   // FIXME: The source location here is poor.
   void baz() { } // expected-note {{candidate template ignored: substitution failure [with T = int]: no type named 'type' in 'PR15673::a_pony<int>'}}
   void quux() { baz<int>(); } // expected-error {{no matching function for call to 'baz'}}
@@ -116,11 +124,17 @@ namespace PR15673 {
 
   // FIXME: This note doesn't make it clear which candidate we rejected.
   template <typename T>
-  using unicorns = typename std::enable_if<some_trait<T>::value>::type; // expected-warning {{C++11 extension}}
-  // expected-note@-1 {{candidate template ignored: disabled by 'enable_if' [with T = int]}}
+  using unicorns = typename std::enable_if<some_trait<T>::value>::type;
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{alias declarations are a C++11 extension}}
+#endif
+  // expected-note@-4 {{candidate template ignored: disabled by 'enable_if' [with T = int]}}
 
   template<typename T,
-           typename Requires = unicorns<T> > // expected-warning {{C++11 extension}}
+           typename Requires = unicorns<T> >
+#if __cplusplus <= 199711L
+  // expected-warning@-2 {{default template arguments for a function template are a C++11 extension}}
+#endif
   void wibble() {}
   void wobble() { wibble<int>(); } // expected-error {{no matching function for call to 'wibble'}}
 }
diff --git a/test/SemaTemplate/partial-spec-instantiate.cpp b/test/SemaTemplate/partial-spec-instantiate.cpp
index a93af5026d37..d5ecd8c1e3bf 100644
--- a/test/SemaTemplate/partial-spec-instantiate.cpp
+++ b/test/SemaTemplate/partial-spec-instantiate.cpp
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fsyntax-only %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 // PR4607
 template <class T> struct X {};
@@ -47,4 +49,9 @@ namespace rdar9169404 {
   };
 
   X<bool, -1>::type value;
+#if __cplusplus >= 201103L
+  // expected-error@-2 {{non-type template argument evaluates to -1, which cannot be narrowed to type 'bool'}}
+#else
+  // expected-no-diagnostics
+#endif
 }
diff --git a/test/SemaTemplate/qualified-names-diag.cpp b/test/SemaTemplate/qualified-names-diag.cpp
index b2df47bfff4c..06b94926c75f 100644
--- a/test/SemaTemplate/qualified-names-diag.cpp
+++ b/test/SemaTemplate/qualified-names-diag.cpp
@@ -1,7 +1,12 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 namespace std {
-  template<typename T> class vector { }; // expected-note{{candidate}}
+  template<typename T> class vector { }; // expected-note{{candidate function (the implicit copy assignment operator) not viable}}
+#if __cplusplus >= 201103L // C++11 or later
+  // expected-note@-2 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
 }
 
 typedef int INT;
diff --git a/test/SemaTemplate/recovery-crash.cpp b/test/SemaTemplate/recovery-crash.cpp
index 78f6db40d5c8..02f80495bb94 100644
--- a/test/SemaTemplate/recovery-crash.cpp
+++ b/test/SemaTemplate/recovery-crash.cpp
@@ -35,3 +35,25 @@ namespace PR16225 {
     g<S>(0);  // expected-note {{in instantiation of function template specialization}}
   }
 }
+
+namespace test1 {
+  template <typename> class ArraySlice {};
+  class Foo;
+  class NonTemplateClass {
+    void MemberFunction(ArraySlice<Foo>, int);
+    template <class T> void MemberFuncTemplate(ArraySlice<T>, int);
+  };
+  void NonTemplateClass::MemberFunction(ArraySlice<Foo> resource_data,
+                                        int now) {
+    // expected-note@+1 {{in instantiation of function template specialization 'test1::NonTemplateClass::MemberFuncTemplate<test1::Foo>'}}
+    MemberFuncTemplate(resource_data, now);
+  }
+  template <class T>
+  void NonTemplateClass::MemberFuncTemplate(ArraySlice<T> resource_data, int) {
+    // expected-error@+1 {{use of undeclared identifier 'UndeclaredMethod'}}
+    UndeclaredMethod(resource_data);
+  }
+  // expected-error@+2 {{out-of-line definition of 'UndeclaredMethod' does not match any declaration}}
+  // expected-note@+1 {{must qualify identifier to find this declaration in dependent base class}}
+  void NonTemplateClass::UndeclaredMethod() {}
+}
diff --git a/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp b/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
index 5000927d8941..548f7f8f34fb 100644
--- a/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
+++ b/test/SemaTemplate/temp_arg_nontype_cxx1z.cpp
@@ -148,3 +148,10 @@ namespace DeclMatch {
   template<typename T, T> int f() { return X<T>::n; }
   int k = f<int, 0>(); // ok, friend
 }
+
+namespace PR24921 {
+  enum E { e };
+  template<E> void f();
+  template<int> void f(int);
+  template<> void f<e>() {}
+}
diff --git a/test/SemaTemplate/temp_arg_template.cpp b/test/SemaTemplate/temp_arg_template.cpp
index dec5dd37d484..4a0ed05d8799 100644
--- a/test/SemaTemplate/temp_arg_template.cpp
+++ b/test/SemaTemplate/temp_arg_template.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 template<template<typename T> class X> struct A; // expected-note 2{{previous template template parameter is here}}
 
@@ -31,7 +33,10 @@ template<typename T> void f(int);
 A<f> *a9; // expected-error{{must be a class template}}
 
 // Evil digraph '<:' is parsed as '[', expect error.
-A<::N::Z> *a10; // expected-error{{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+A<::N::Z> *a10;
+#if __cplusplus <= 199711L
+// expected-error@-2 {{found '<::' after a template name which forms the digraph '<:' (aka '[') and a ':', did you mean '< ::'?}}
+#endif
 
 // Do not do a digraph correction here.
 A<: :N::Z> *a11;  // expected-error{{expected expression}} \
@@ -56,16 +61,28 @@ namespace N {
 }
 
 // PR12179
-template <typename Primitive, template <Primitive...> class F> // expected-warning {{variadic templates are a C++11 extension}}
+template <typename Primitive, template <Primitive...> class F>
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{variadic templates are a C++11 extension}}
+#endif
+
 struct unbox_args {
   typedef typename Primitive::template call<F> x;
 };
 
-template <template <typename> class... Templates> // expected-warning {{variadic templates are a C++11 extension}}
+template <template <typename> class... Templates>
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{variadic templates are a C++11 extension}}
+#endif
+
 struct template_tuple {};
 template <typename T>
 struct identity {};
-template <template <typename> class... Templates> // expected-warning {{variadic templates are a C++11 extension}}
+template <template <typename> class... Templates>
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{variadic templates are a C++11 extension}}
+#endif
+
 template_tuple<Templates...> f7() {}
 
 void foo() {
diff --git a/test/SemaTemplate/temp_class_spec_neg.cpp b/test/SemaTemplate/temp_class_spec_neg.cpp
index be5fbb154ea8..1c77038ab8a9 100644
--- a/test/SemaTemplate/temp_class_spec_neg.cpp
+++ b/test/SemaTemplate/temp_class_spec_neg.cpp
@@ -1,15 +1,23 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 template<typename T> struct vector;
 
 // C++ [temp.class.spec]p6:
 namespace N {
   namespace M {
-    template<typename T> struct A; // expected-note{{here}}
+    template<typename T> struct A;
+#if __cplusplus <= 199711L // C++03 or earlier modes
+    // expected-note@-2{{explicitly specialized declaration is here}}
+#endif
   }
 }
 
 template<typename T>
-struct N::M::A<T*> { }; // expected-warning{{C++11 extension}}
+struct N::M::A<T*> { };
+#if __cplusplus <= 199711L
+// expected-warning@-2{{first declaration of class template partial specialization of 'A' outside namespace 'M' is a C++11 extension}}
+#endif
 
 // C++ [temp.class.spec]p9
 //   bullet 1
diff --git a/test/SemaTemplate/typename-specifier-4.cpp b/test/SemaTemplate/typename-specifier-4.cpp
index 44cf966e33b5..2856c99b6ddf 100644
--- a/test/SemaTemplate/typename-specifier-4.cpp
+++ b/test/SemaTemplate/typename-specifier-4.cpp
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 template<typename T, typename U> 
 struct is_same {
   static const bool value = false;
@@ -27,8 +29,11 @@ struct make_pair {
 int a0[is_same<metafun_apply2<make_pair, int, float>::type, 
                pair<int, float> >::value? 1 : -1];
 int a1[is_same<
-         typename make_pair::template apply<int, float>, // expected-warning{{'template' keyword outside of a template}} \
-       // expected-warning{{'typename' occurs outside of a template}}
+         typename make_pair::template apply<int, float>, 
+#if __cplusplus <= 199711L // C++03 and earlier modes
+         // expected-warning@-2 {{'template' keyword outside of a template}}
+         // expected-warning@-3 {{'typename' occurs outside of a template}}
+#endif
          make_pair::apply<int, float>
        >::value? 1 : -1];
 
diff --git a/test/SemaTemplate/typename-specifier.cpp b/test/SemaTemplate/typename-specifier.cpp
index 602e9030636d..b36a103b65ae 100644
--- a/test/SemaTemplate/typename-specifier.cpp
+++ b/test/SemaTemplate/typename-specifier.cpp
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-unused
 // RUN: %clang_cc1 -fsyntax-only -verify %s -Wno-unused -fms-compatibility -DMSVC
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -Wno-unused
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++98 %s -Wno-unused -fms-compatibility -DMSVC
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -Wno-unused
+// RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s -Wno-unused -fms-compatibility -DMSVC
 namespace N {
   struct A {
     typedef int type;
@@ -16,22 +20,38 @@ namespace N {
 
 int i;
 
-typename N::A::type *ip1 = &i; // expected-warning{{'typename' occurs outside of a template}}
-typename N::B::type *ip2 = &i; // expected-error{{no type named 'type' in 'N::B'}} \
-// expected-warning{{'typename' occurs outside of a template}}
-typename N::C::type *ip3 = &i; // expected-error{{typename specifier refers to non-type member 'type'}} \
-// expected-warning{{'typename' occurs outside of a template}}
+typename N::A::type *ip1 = &i;
+#if __cplusplus <= 199711L // C++03 or earlier modes
+// expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
+typename N::B::type *ip2 = &i; // expected-error{{no type named 'type' in 'N::B'}}
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
+typename N::C::type *ip3 = &i; // expected-error{{typename specifier refers to non-type member 'type'}}
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
 
 void test(double d) {
-  typename N::A::type f(typename N::A::type(a)); // expected-warning{{disambiguated as a function declaration}} \
-  // expected-note{{add a pair of parentheses}} expected-warning 2{{'typename' occurs outside of a template}}
+  typename N::A::type f(typename N::A::type(a)); // expected-warning{{disambiguated as a function declaration}}
+  // expected-note@-1 {{add a pair of parentheses}}
+#if __cplusplus <= 199711L
+  // expected-warning@-3 2{{'typename' occurs outside of a template}}
+#endif
   int five = f(5);
   
   using namespace N;
-  for (typename A::type i = 0; i < 10; ++i) // expected-warning{{'typename' occurs outside of a template}}
+  for (typename A::type i = 0; i < 10; ++i)
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
     five += 1;
 
-  const typename N::A::type f2(d); // expected-warning{{'typename' occurs outside of a template}}
+  const typename N::A::type f2(d);
+#if __cplusplus <= 199711L
+// expected-warning@-2 {{'typename' occurs outside of a template}}
+#endif
 }
 
 namespace N {
diff --git a/test/Tooling/clang-check-analyzer.cpp b/test/Tooling/clang-check-analyzer.cpp
index 16cf7ce603ba..ee0a6dc369ca 100644
--- a/test/Tooling/clang-check-analyzer.cpp
+++ b/test/Tooling/clang-check-analyzer.cpp
@@ -1,4 +1,4 @@
 // RUN: clang-check -analyze "%s" -- -c 2>&1 | FileCheck %s
 
 // CHECK: Dereference of null pointer
-int a(int *x) { if(x){} *x = 47; }
+void a(int *x) { if(x){} *x = 47; }
diff --git a/test/Tooling/clang-check-extra-arg.cpp b/test/Tooling/clang-check-extra-arg.cpp
index f6715358453f..a5d00bc8e892 100644
--- a/test/Tooling/clang-check-extra-arg.cpp
+++ b/test/Tooling/clang-check-extra-arg.cpp
@@ -2,4 +2,4 @@
 
 // CHECK: unknown warning option '-Wunimplemented-warning-before'
 // CHECK: unknown warning option '-Wunimplemented-warning'
-int a(){}
+void a(){}
diff --git a/test/VFS/Inputs/public_header.h b/test/VFS/Inputs/public_header.h
index 09d9969d319d..cc7bcb57eea1 100644
--- a/test/VFS/Inputs/public_header.h
+++ b/test/VFS/Inputs/public_header.h
@@ -1,2 +1,3 @@
 #import <SomeFramework/public_header2.h>
+#import "public_header3.h" // includer-relative
 void from_framework(void);
diff --git a/test/VFS/Inputs/public_header3.h b/test/VFS/Inputs/public_header3.h
new file mode 100644
index 000000000000..ac9deac6ab36
--- /dev/null
+++ b/test/VFS/Inputs/public_header3.h
@@ -0,0 +1 @@
+// public_header3.h
diff --git a/test/VFS/Inputs/vfsoverlay.yaml b/test/VFS/Inputs/vfsoverlay.yaml
index f395d45ee3b7..504a1530d3c5 100644
--- a/test/VFS/Inputs/vfsoverlay.yaml
+++ b/test/VFS/Inputs/vfsoverlay.yaml
@@ -22,7 +22,9 @@
                 { 'name': 'public_header.h', 'type': 'file',
                   'external-contents': 'INPUT_DIR/public_header.h' },
                 { 'name': 'public_header2.h', 'type': 'file',
-                  'external-contents': 'INPUT_DIR/public_header2.h' }
+                  'external-contents': 'INPUT_DIR/public_header2.h' },
+                { 'name': 'public_header3.h', 'type': 'file',
+                  'external-contents': 'INPUT_DIR/public_header3.h' }
               ]
             }
           ]
diff --git a/test/VFS/external-names.c b/test/VFS/external-names.c
index 28521b466c37..598071b78248 100644
--- a/test/VFS/external-names.c
+++ b/test/VFS/external-names.c
@@ -27,9 +27,9 @@
 ////
 // Debug info
 
-// RUN: %clang_cc1 -I %t -ivfsoverlay %t.external.yaml -triple %itanium_abi_triple -g -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG-EXTERNAL %s
+// RUN: %clang_cc1 -I %t -ivfsoverlay %t.external.yaml -triple %itanium_abi_triple -debug-info-kind=limited -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG-EXTERNAL %s
 // CHECK-DEBUG-EXTERNAL: !DISubprogram({{.*}}file: ![[Num:[0-9]+]]
 // CHECK-DEBUG-EXTERNAL: ![[Num]] = !DIFile(filename: "{{[^"]*}}Inputs{{.}}external-names.h"
 
-// RUN: %clang_cc1 -I %t -ivfsoverlay %t.yaml -triple %itanium_abi_triple -g -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG %s
+// RUN: %clang_cc1 -I %t -ivfsoverlay %t.yaml -triple %itanium_abi_triple -debug-info-kind=limited -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-DEBUG %s
 // CHECK-DEBUG-NOT: Inputs
diff --git a/test/VFS/real-path-found-first.m b/test/VFS/real-path-found-first.m
index 3b37a644bcbd..5838aa36c452 100644
--- a/test/VFS/real-path-found-first.m
+++ b/test/VFS/real-path-found-first.m
@@ -70,5 +70,6 @@
 #ifndef WITH_PREFIX
 #import <SomeFramework/public_header.h> // expected-warning{{treating}}
 #import <SomeFramework/public_header2.h> // expected-warning{{treating}}
+#import <SomeFramework/public_header3.h> // expected-warning{{treating}}
 @import SomeFramework.public_header2;
 #endif
diff --git a/test/lit.cfg b/test/lit.cfg
index f853b3f7db4f..c6026506660c 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -206,6 +206,7 @@ if has_plugins and config.llvm_plugin_ext:
 
 config.substitutions.append( ('%llvmshlibdir', config.llvm_shlib_dir) )
 config.substitutions.append( ('%pluginext', config.llvm_plugin_ext) )
+config.substitutions.append( ('%PATH%', config.environment['PATH']) )
 
 if config.clang_examples:
     config.available_features.add('examples')
@@ -308,17 +309,21 @@ NoPreHyphenDot = r"(?<!(-|\.))"
 NoPostHyphenDot = r"(?!(-|\.))"
 NoPostBar = r"(?!(/|\\))"
 
-for pattern in [r"\bFileCheck\b",
-                r"\bc-index-test\b",
-                NoPreHyphenDot + r"\bclang-check\b" + NoPostHyphenDot,
-                NoPreHyphenDot + r"\bclang-format\b" + NoPostHyphenDot,
-                NoPreHyphenDot + r"\bclang-interpreter\b" + NoPostHyphenDot,
-                # FIXME: Some clang test uses opt?
-                NoPreHyphenDot + r"\bopt\b" + NoPostBar + NoPostHyphenDot,
-                # Handle these specially as they are strings searched
-                # for during testing.
-                r"\| \bcount\b",
-                r"\| \bnot\b"]:
+tool_patterns = [r"\bFileCheck\b",
+                 r"\bc-index-test\b",
+                 NoPreHyphenDot + r"\bclang-check\b" + NoPostHyphenDot,
+                 NoPreHyphenDot + r"\bclang-format\b" + NoPostHyphenDot,
+                 # FIXME: Some clang test uses opt?
+                 NoPreHyphenDot + r"\bopt\b" + NoPostBar + NoPostHyphenDot,
+                 # Handle these specially as they are strings searched
+                 # for during testing.
+                 r"\| \bcount\b",
+                 r"\| \bnot\b"]
+
+if config.clang_examples:
+    tool_patterns.append(NoPreHyphenDot + r"\bclang-interpreter\b" + NoPostHyphenDot)
+
+for pattern in tool_patterns:
     # Extract the tool name from the pattern.  This relies on the tool
     # name being surrounded by \b word match operators.  If the
     # pattern starts with "| ", include it in the string to be
@@ -350,10 +355,6 @@ if platform.system() not in ['FreeBSD']:
 if execute_external:
     config.available_features.add('shell')
 
-# Exclude MSYS due to transforming '/' to 'X:/mingwroot/'.
-if not platform.system() in ['Windows'] or not execute_external:
-    config.available_features.add('shell-preserves-root')
-
 # For tests that require Darwin to run.
 # This is used by debuginfo-tests/*block*.m and debuginfo-tests/foreach.m.
 if platform.system() in ['Darwin']:
@@ -413,7 +414,7 @@ if not re.match(r'^x86_64.*-(win32|mingw32|windows-gnu)$', config.target_triple)
     config.available_features.add('LP64')
 
 # [PR12920] "clang-driver" -- set if gcc driver is not used.
-if not re.match(r'.*-(cygwin|mingw32|windows-gnu)$', config.target_triple):
+if not re.match(r'.*-(cygwin)$', config.target_triple):
     config.available_features.add('clang-driver')
 
 # [PR18856] Depends to remove opened file. On win32, a file could be removed
@@ -421,10 +422,6 @@ if not re.match(r'.*-(cygwin|mingw32|windows-gnu)$', config.target_triple):
 if platform.system() not in ['Windows']:
     config.available_features.add('can-remove-opened-file')
 
-# Not set for targeting tls-incapable targets.
-if not re.match(r'.*-cygwin$', config.target_triple):
-    config.available_features.add('tls')
-
 # Returns set of available features, registered-target(s) and asserts.
 def get_llvm_config_props():
     set_of_features = set()
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index b2b2f6aa954c..dba676a1f756 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -1,19 +1,23 @@
-add_subdirectory(diagtool)
-add_subdirectory(driver)
-add_subdirectory(clang-format)
-add_subdirectory(clang-format-vs)
-add_subdirectory(clang-fuzzer)
+create_subdirectory_options(CLANG TOOL)
 
-add_subdirectory(c-index-test)
-add_subdirectory(libclang)
+add_clang_subdirectory(diagtool)
+add_clang_subdirectory(driver)
+add_clang_subdirectory(clang-format)
+add_clang_subdirectory(clang-format-vs)
+add_clang_subdirectory(clang-fuzzer)
+add_clang_subdirectory(scan-build)
+add_clang_subdirectory(scan-view)
+
+add_clang_subdirectory(c-index-test)
+add_clang_subdirectory(libclang)
 
 if(CLANG_ENABLE_ARCMT)
-  add_subdirectory(arcmt-test)
-  add_subdirectory(c-arcmt-test)
+  add_clang_subdirectory(arcmt-test)
+  add_clang_subdirectory(c-arcmt-test)
 endif()
 
 if(CLANG_ENABLE_STATIC_ANALYZER)
-  add_subdirectory(clang-check)
+  add_clang_subdirectory(clang-check)
 endif()
 
 # We support checking out the clang-tools-extra repository into the 'extra'
diff --git a/tools/Makefile b/tools/Makefile
index 2ee12992f23f..5c362bfc9aad 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -15,7 +15,7 @@ DIRS :=
 PARALLEL_DIRS := clang-format driver diagtool
 
 ifeq ($(ENABLE_CLANG_STATIC_ANALYZER), 1)
-  PARALLEL_DIRS += clang-check
+  PARALLEL_DIRS += clang-check scan-build scan-view
 endif
 
 ifeq ($(ENABLE_CLANG_ARCMT), 1)
diff --git a/tools/c-index-test/CMakeLists.txt b/tools/c-index-test/CMakeLists.txt
index d0872fd2eff3..c78a42ffe8eb 100644
--- a/tools/c-index-test/CMakeLists.txt
+++ b/tools/c-index-test/CMakeLists.txt
@@ -28,3 +28,23 @@ if (CLANG_HAVE_LIBXML)
   include_directories(SYSTEM ${LIBXML2_INCLUDE_DIR})
   target_link_libraries(c-index-test ${LIBXML2_LIBRARIES})
 endif()
+
+if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+  if(INTERNAL_INSTALL_PREFIX)
+    set(INSTALL_DESTINATION "${INTERNAL_INSTALL_PREFIX}/bin")
+  else()
+    set(INSTALL_DESTINATION bin)
+  endif()
+
+  install(TARGETS c-index-test
+    RUNTIME DESTINATION "${INSTALL_DESTINATION}"
+    COMPONENT c-index-test)
+
+  if (NOT CMAKE_CONFIGURATION_TYPES) # don't add this for IDE's.
+    add_custom_target(install-c-index-test
+      DEPENDS c-index-test
+      COMMAND "${CMAKE_COMMAND}"
+              -DCMAKE_INSTALL_COMPONENT=c-index-test
+              -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
+  endif()
+endif()
diff --git a/tools/c-index-test/c-index-test.c b/tools/c-index-test/c-index-test.c
index eeeb832cd873..48f22eb4bbf6 100644
--- a/tools/c-index-test/c-index-test.c
+++ b/tools/c-index-test/c-index-test.c
@@ -76,7 +76,9 @@ static unsigned getDefaultParsingOptions() {
     options |= CXTranslationUnit_SkipFunctionBodies;
   if (getenv("CINDEXTEST_COMPLETION_BRIEF_COMMENTS"))
     options |= CXTranslationUnit_IncludeBriefCommentsInCodeCompletion;
-  
+  if (getenv("CINDEXTEST_CREATE_PREAMBLE_ON_FIRST_PARSE"))
+    options |= CXTranslationUnit_CreatePreambleOnFirstParse;
+
   return options;
 }
 
@@ -767,6 +769,8 @@ static void PrintCursor(CXCursor Cursor, const char *CommentSchemaFile) {
     clang_disposeString(DeprecatedMessage);
     clang_disposeString(UnavailableMessage);
     
+    if (clang_CXXField_isMutable(Cursor))
+      printf(" (mutable)");
     if (clang_CXXMethod_isStatic(Cursor))
       printf(" (static)");
     if (clang_CXXMethod_isVirtual(Cursor))
@@ -1245,6 +1249,32 @@ static enum CXChildVisitResult PrintLinkage(CXCursor cursor, CXCursor p,
   return CXChildVisit_Recurse;
 }
 
+/******************************************************************************/
+/* Visibility testing.                                                        */
+/******************************************************************************/
+
+static enum CXChildVisitResult PrintVisibility(CXCursor cursor, CXCursor p,
+                                               CXClientData d) {
+  const char *visibility = 0;
+
+  if (clang_isInvalid(clang_getCursorKind(cursor)))
+    return CXChildVisit_Recurse;
+
+  switch (clang_getCursorVisibility(cursor)) {
+    case CXVisibility_Invalid: break;
+    case CXVisibility_Hidden: visibility = "Hidden"; break;
+    case CXVisibility_Protected: visibility = "Protected"; break;
+    case CXVisibility_Default: visibility = "Default"; break;
+  }
+
+  if (visibility) {
+    PrintCursor(cursor, NULL);
+    printf("visibility=%s\n", visibility);
+  }
+
+  return CXChildVisit_Recurse;
+}
+
 /******************************************************************************/
 /* Typekind testing.                                                          */
 /******************************************************************************/
@@ -1430,6 +1460,8 @@ static enum CXChildVisitResult PrintTypeSize(CXCursor cursor, CXCursor p,
 static enum CXChildVisitResult PrintMangledName(CXCursor cursor, CXCursor p,
                                                 CXClientData d) {
   CXString MangledName;
+  if (clang_isUnexposed(clang_getCursorKind(cursor)))
+    return CXChildVisit_Recurse;
   PrintCursor(cursor, NULL);
   MangledName = clang_Cursor_getMangling(cursor);
   printf(" [mangled=%s]\n", clang_getCString(MangledName));
@@ -1437,6 +1469,25 @@ static enum CXChildVisitResult PrintMangledName(CXCursor cursor, CXCursor p,
   return CXChildVisit_Continue;
 }
 
+static enum CXChildVisitResult PrintManglings(CXCursor cursor, CXCursor p,
+                                              CXClientData d) {
+  unsigned I, E;
+  CXStringSet *Manglings = NULL;
+  if (clang_isUnexposed(clang_getCursorKind(cursor)))
+    return CXChildVisit_Recurse;
+  if (!clang_isDeclaration(clang_getCursorKind(cursor)))
+    return CXChildVisit_Recurse;
+  if (clang_getCursorKind(cursor) == CXCursor_ParmDecl)
+    return CXChildVisit_Continue;
+  PrintCursor(cursor, NULL);
+  Manglings = clang_Cursor_getCXXManglings(cursor);
+  for (I = 0, E = Manglings->Count; I < E; ++I)
+    printf(" [mangled=%s]", clang_getCString(Manglings->Strings[I]));
+  clang_disposeStringSet(Manglings);
+  printf("\n");
+  return CXChildVisit_Recurse;
+}
+
 /******************************************************************************/
 /* Bitwidth testing.                                                          */
 /******************************************************************************/
@@ -4061,6 +4112,7 @@ static void print_usage(void) {
     "       c-index-test -test-inclusion-stack-tu <AST file>\n");
   fprintf(stderr,
     "       c-index-test -test-print-linkage-source {<args>}*\n"
+    "       c-index-test -test-print-visibility {<args>}*\n"
     "       c-index-test -test-print-type {<args>}*\n"
     "       c-index-test -test-print-type-size {<args>}*\n"
     "       c-index-test -test-print-bitwidth {<args>}*\n"
@@ -4148,6 +4200,9 @@ int cindextest_main(int argc, const char **argv) {
   else if (argc > 2 && strcmp(argv[1], "-test-print-linkage-source") == 0)
     return perform_test_load_source(argc - 2, argv + 2, "all", PrintLinkage,
                                     NULL);
+  else if (argc > 2 && strcmp(argv[1], "-test-print-visibility") == 0)
+    return perform_test_load_source(argc - 2, argv + 2, "all", PrintVisibility,
+                                    NULL);
   else if (argc > 2 && strcmp(argv[1], "-test-print-type") == 0)
     return perform_test_load_source(argc - 2, argv + 2, "all",
                                     PrintType, 0);
@@ -4159,6 +4214,8 @@ int cindextest_main(int argc, const char **argv) {
                                     PrintBitWidth, 0);
   else if (argc > 2 && strcmp(argv[1], "-test-print-mangle") == 0)
     return perform_test_load_tu(argv[2], "all", NULL, PrintMangledName, NULL);
+  else if (argc > 2 && strcmp(argv[1], "-test-print-manglings") == 0)
+    return perform_test_load_tu(argv[2], "all", NULL, PrintManglings, NULL);
   else if (argc > 1 && strcmp(argv[1], "-print-usr") == 0) {
     if (argc > 2)
       return print_usrs(argv + 2, argv + argc);
diff --git a/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs b/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
index c3aa8fecded5..df872b2e2198 100644
--- a/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
+++ b/tools/clang-format-vs/ClangFormat/ClangFormatPackage.cs
@@ -19,10 +19,10 @@ using Microsoft.VisualStudio.Text;
 using Microsoft.VisualStudio.Text.Editor;
 using Microsoft.VisualStudio.TextManager.Interop;
 using System;
+using System.Collections;
 using System.ComponentModel;
 using System.ComponentModel.Design;
 using System.IO;
-using System.Reflection;
 using System.Runtime.InteropServices;
 using System.Xml.Linq;
 
@@ -32,13 +32,53 @@ namespace LLVM.ClangFormat
     [CLSCompliant(false), ComVisible(true)]
     public class OptionPageGrid : DialogPage
     {
-        private string style = "File";
+        private string assumeFilename = "";
+        private string fallbackStyle = "LLVM";
+        private bool sortIncludes = false;
+        private string style = "file";
+
+        public class StyleConverter : TypeConverter
+        {
+            protected ArrayList values;
+            public StyleConverter()
+            {
+                // Initializes the standard values list with defaults.
+                values = new ArrayList(new string[] { "file", "Chromium", "Google", "LLVM", "Mozilla", "WebKit" });
+            }
+
+            public override bool GetStandardValuesSupported(ITypeDescriptorContext context)
+            {
+                return true;
+            }
+
+            public override StandardValuesCollection GetStandardValues(ITypeDescriptorContext context)
+            {
+                return new StandardValuesCollection(values);
+            }
+
+            public override bool CanConvertFrom(ITypeDescriptorContext context, Type sourceType)
+            {
+                if (sourceType == typeof(string))
+                    return true;
+
+                return base.CanConvertFrom(context, sourceType);
+            }
+
+            public override object ConvertFrom(ITypeDescriptorContext context, System.Globalization.CultureInfo culture, object value)
+            {
+                string s = value as string;
+                if (s == null)
+                    return base.ConvertFrom(context, culture, value);
+
+                return value;
+            }
+        }
 
         [Category("LLVM/Clang")]
         [DisplayName("Style")]
         [Description("Coding style, currently supports:\n" +
-                     "  - Predefined styles ('LLVM', 'Google', 'Chromium', 'Mozilla').\n" +
-                     "  - 'File' to search for a YAML .clang-format or _clang-format\n" +
+                     "  - Predefined styles ('LLVM', 'Google', 'Chromium', 'Mozilla', 'WebKit').\n" +
+                     "  - 'file' to search for a YAML .clang-format or _clang-format\n" +
                      "    configuration file.\n" +
                      "  - A YAML configuration snippet.\n\n" +
                      "'File':\n" +
@@ -48,11 +88,81 @@ namespace LLVM.ClangFormat
                      "  The content of a .clang-format configuration file, as string.\n" +
                      "  Example: '{BasedOnStyle: \"LLVM\", IndentWidth: 8}'\n\n" +
                      "See also: http://clang.llvm.org/docs/ClangFormatStyleOptions.html.")]
+        [TypeConverter(typeof(StyleConverter))]
         public string Style
         {
             get { return style; }
             set { style = value; }
         }
+
+        public sealed class FilenameConverter : TypeConverter
+        {
+            public override bool CanConvertFrom(ITypeDescriptorContext context, Type sourceType)
+            {
+                if (sourceType == typeof(string))
+                    return true;
+
+                return base.CanConvertFrom(context, sourceType);
+            }
+
+            public override object ConvertFrom(ITypeDescriptorContext context, System.Globalization.CultureInfo culture, object value)
+            {
+                string s = value as string;
+                if (s == null)
+                    return base.ConvertFrom(context, culture, value);
+
+                // Check if string contains quotes. On Windows, file names cannot contain quotes.
+                // We do not accept them however to avoid hard-to-debug problems.
+                // A quote in user input would end the parameter quote and so break the command invocation.
+                if (s.IndexOf('\"') != -1)
+                    throw new NotSupportedException("Filename cannot contain quotes");
+
+                return value;
+            }
+        }
+
+        [Category("LLVM/Clang")]
+        [DisplayName("Assume Filename")]
+        [Description("When reading from stdin, clang-format assumes this " +
+                     "filename to look for a style config file (with 'file' style) " +
+                     "and to determine the language.")]
+        [TypeConverter(typeof(FilenameConverter))]
+        public string AssumeFilename
+        {
+            get { return assumeFilename; }
+            set { assumeFilename = value; }
+        }
+
+        public sealed class FallbackStyleConverter : StyleConverter
+        {
+            public FallbackStyleConverter()
+            {
+                // Add "none" to the list of styles.
+                values.Insert(0, "none");
+            }
+        }
+
+        [Category("LLVM/Clang")]
+        [DisplayName("Fallback Style")]
+        [Description("The name of the predefined style used as a fallback in case clang-format " +
+                     "is invoked with 'file' style, but can not find the configuration file.\n" +
+                     "Use 'none' fallback style to skip formatting.")]
+        [TypeConverter(typeof(FallbackStyleConverter))]
+        public string FallbackStyle
+        {
+            get { return fallbackStyle; }
+            set { fallbackStyle = value; }
+        }
+
+        [Category("LLVM/Clang")]
+        [DisplayName("Sort includes")]
+        [Description("Sort touched include lines.\n\n" +
+                     "See also: http://clang.llvm.org/docs/ClangFormat.html.")]
+        public bool SortIncludes
+        {
+            get { return sortIncludes; }
+            set { sortIncludes = value; }
+        }
     }
 
     [PackageRegistration(UseManagedResourcesOnly = true)]
@@ -138,10 +248,17 @@ namespace LLVM.ClangFormat
             // Poor man's escaping - this will not work when quotes are already escaped
             // in the input (but we don't need more).
             string style = GetStyle().Replace("\"", "\\\"");
+            string fallbackStyle = GetFallbackStyle().Replace("\"", "\\\"");
             process.StartInfo.Arguments = " -offset " + offset +
                                           " -length " + length +
                                           " -output-replacements-xml " +
-                                          " -style \"" + style + "\"";
+                                          " -style \"" + style + "\"" +
+                                          " -fallback-style \"" + fallbackStyle + "\"";
+            if (GetSortIncludes())
+              process.StartInfo.Arguments += " -sort-includes ";
+            string assumeFilename = GetAssumeFilename();
+            if (!string.IsNullOrEmpty(assumeFilename))
+              process.StartInfo.Arguments += " -assume-filename \"" + assumeFilename + "\"";
             process.StartInfo.CreateNoWindow = true;
             process.StartInfo.RedirectStandardInput = true;
             process.StartInfo.RedirectStandardOutput = true;
@@ -211,6 +328,24 @@ namespace LLVM.ClangFormat
             return page.Style;
         }
 
+        private string GetAssumeFilename()
+        {
+            var page = (OptionPageGrid)GetDialogPage(typeof(OptionPageGrid));
+            return page.AssumeFilename;
+        }
+
+        private string GetFallbackStyle()
+        {
+            var page = (OptionPageGrid)GetDialogPage(typeof(OptionPageGrid));
+            return page.FallbackStyle;
+        }
+
+        private bool GetSortIncludes()
+        {
+            var page = (OptionPageGrid)GetDialogPage(typeof(OptionPageGrid));
+            return page.SortIncludes;
+        }
+
         private string GetDocumentParent(IWpfTextView view)
         {
             ITextDocument document;
diff --git a/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs b/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs
index e6e4de488012..b1cef49414b5 100644
--- a/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs
+++ b/tools/clang-format-vs/ClangFormat/Properties/AssemblyInfo.cs
@@ -29,5 +29,5 @@ using System.Runtime.InteropServices;
 // You can specify all the values or you can default the Revision and Build Numbers 
 // by using the '*' as shown below:
 
-[assembly: AssemblyVersion("1.0.0.0")]
-[assembly: AssemblyFileVersion("1.0.0.0")]
+[assembly: AssemblyVersion("1.1.0.0")]
+[assembly: AssemblyFileVersion("1.1.0.0")]
diff --git a/tools/clang-format-vs/README.txt b/tools/clang-format-vs/README.txt
index 636b89f3c673..b23355d0cebc 100644
--- a/tools/clang-format-vs/README.txt
+++ b/tools/clang-format-vs/README.txt
@@ -2,9 +2,10 @@ This directory contains a VSPackage project to generate a Visual Studio extensio
 for clang-format.
 
 Build prerequisites are:
-- Visual Studio 2012 Professional
-- Visual Studio 2010 Professional
-- Visual Studio 2010 SDK.
+- Visual Studio 2013 Professional
+- Visual Studio 2013 SDK
+- Visual Studio 2010 Professional (?)
+- Visual Studio 2010 SDK (?)
 
 The extension is built using CMake by setting BUILD_CLANG_FORMAT_VS_PLUGIN=ON
 when configuring a Clang build, and building the clang_format_vsix target.
diff --git a/tools/clang-format/ClangFormat.cpp b/tools/clang-format/ClangFormat.cpp
index 5037e901f3b4..36f237fc750c 100644
--- a/tools/clang-format/ClangFormat.cpp
+++ b/tools/clang-format/ClangFormat.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/Signals.h"
 
 using namespace llvm;
+using clang::tooling::Replacements;
 
 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
 
@@ -73,11 +74,11 @@ FallbackStyle("fallback-style",
               cl::init("LLVM"), cl::cat(ClangFormatCategory));
 
 static cl::opt<std::string>
-AssumeFilename("assume-filename",
+AssumeFileName("assume-filename",
                cl::desc("When reading from stdin, clang-format assumes this\n"
                         "filename to look for a style config file (with\n"
                         "-style=file) and to determine the language."),
-               cl::cat(ClangFormatCategory));
+               cl::init("<stdin>"), cl::cat(ClangFormatCategory));
 
 static cl::opt<bool> Inplace("i",
                              cl::desc("Inplace edit <file>s, if specified."),
@@ -97,6 +98,12 @@ static cl::opt<unsigned>
                     "clang-format from an editor integration"),
            cl::init(0), cl::cat(ClangFormatCategory));
 
+static cl::opt<bool> SortIncludes(
+    "sort-includes",
+    cl::desc("If set, overrides the include sorting behavior determined by the "
+             "SortIncludes style flag"),
+    cl::cat(ClangFormatCategory));
+
 static cl::list<std::string> FileNames(cl::Positional, cl::desc("[<file> ...]"),
                                        cl::cat(ClangFormatCategory));
 
@@ -104,12 +111,11 @@ namespace clang {
 namespace format {
 
 static FileID createInMemoryFile(StringRef FileName, MemoryBuffer *Source,
-                                 SourceManager &Sources, FileManager &Files) {
-  const FileEntry *Entry = Files.getVirtualFile(FileName == "-" ? "<stdin>" :
-                                                    FileName,
-                                                Source->getBufferSize(), 0);
-  Sources.overrideFileContents(Entry, Source, true);
-  return Sources.createFileID(Entry, SourceLocation(), SrcMgr::C_User);
+                                 SourceManager &Sources, FileManager &Files,
+                                 vfs::InMemoryFileSystem *MemFS) {
+  MemFS->addFileNoOwn(FileName, 0, Source);
+  return Sources.createFileID(Files.getFile(FileName), SourceLocation(),
+                              SrcMgr::C_User);
 }
 
 // Parses <start line>:<end line> input to a pair of line numbers.
@@ -121,30 +127,40 @@ static bool parseLineRange(StringRef Input, unsigned &FromLine,
          LineRange.second.getAsInteger(0, ToLine);
 }
 
-static bool fillRanges(SourceManager &Sources, FileID ID,
-                       const MemoryBuffer *Code,
-                       std::vector<CharSourceRange> &Ranges) {
+static bool fillRanges(MemoryBuffer *Code,
+                       std::vector<tooling::Range> &Ranges) {
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+  DiagnosticsEngine Diagnostics(
+      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+      new DiagnosticOptions);
+  SourceManager Sources(Diagnostics, Files);
+  FileID ID = createInMemoryFile("<irrelevant>", Code, Sources, Files,
+                                 InMemoryFileSystem.get());
   if (!LineRanges.empty()) {
     if (!Offsets.empty() || !Lengths.empty()) {
-      llvm::errs() << "error: cannot use -lines with -offset/-length\n";
+      errs() << "error: cannot use -lines with -offset/-length\n";
       return true;
     }
 
     for (unsigned i = 0, e = LineRanges.size(); i < e; ++i) {
       unsigned FromLine, ToLine;
       if (parseLineRange(LineRanges[i], FromLine, ToLine)) {
-        llvm::errs() << "error: invalid <start line>:<end line> pair\n";
+        errs() << "error: invalid <start line>:<end line> pair\n";
         return true;
       }
       if (FromLine > ToLine) {
-        llvm::errs() << "error: start line should be less than end line\n";
+        errs() << "error: start line should be less than end line\n";
         return true;
       }
       SourceLocation Start = Sources.translateLineCol(ID, FromLine, 1);
       SourceLocation End = Sources.translateLineCol(ID, ToLine, UINT_MAX);
       if (Start.isInvalid() || End.isInvalid())
         return true;
-      Ranges.push_back(CharSourceRange::getCharRange(Start, End));
+      unsigned Offset = Sources.getFileOffset(Start);
+      unsigned Length = Sources.getFileOffset(End) - Offset;
+      Ranges.push_back(tooling::Range(Offset, Length));
     }
     return false;
   }
@@ -153,14 +169,12 @@ static bool fillRanges(SourceManager &Sources, FileID ID,
     Offsets.push_back(0);
   if (Offsets.size() != Lengths.size() &&
       !(Offsets.size() == 1 && Lengths.empty())) {
-    llvm::errs()
-        << "error: number of -offset and -length arguments must match.\n";
+    errs() << "error: number of -offset and -length arguments must match.\n";
     return true;
   }
   for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
     if (Offsets[i] >= Code->getBufferSize()) {
-      llvm::errs() << "error: offset " << Offsets[i]
-                   << " is outside the file\n";
+      errs() << "error: offset " << Offsets[i] << " is outside the file\n";
       return true;
     }
     SourceLocation Start =
@@ -168,97 +182,122 @@ static bool fillRanges(SourceManager &Sources, FileID ID,
     SourceLocation End;
     if (i < Lengths.size()) {
       if (Offsets[i] + Lengths[i] > Code->getBufferSize()) {
-        llvm::errs() << "error: invalid length " << Lengths[i]
-                     << ", offset + length (" << Offsets[i] + Lengths[i]
-                     << ") is outside the file.\n";
+        errs() << "error: invalid length " << Lengths[i]
+               << ", offset + length (" << Offsets[i] + Lengths[i]
+               << ") is outside the file.\n";
         return true;
       }
       End = Start.getLocWithOffset(Lengths[i]);
     } else {
       End = Sources.getLocForEndOfFile(ID);
     }
-    Ranges.push_back(CharSourceRange::getCharRange(Start, End));
+    unsigned Offset = Sources.getFileOffset(Start);
+    unsigned Length = Sources.getFileOffset(End) - Offset;
+    Ranges.push_back(tooling::Range(Offset, Length));
   }
   return false;
 }
 
 static void outputReplacementXML(StringRef Text) {
+  // FIXME: When we sort includes, we need to make sure the stream is correct
+  // utf-8.
   size_t From = 0;
   size_t Index;
-  while ((Index = Text.find_first_of("\n\r", From)) != StringRef::npos) {
-    llvm::outs() << Text.substr(From, Index - From);
+  while ((Index = Text.find_first_of("\n\r<&", From)) != StringRef::npos) {
+    outs() << Text.substr(From, Index - From);
     switch (Text[Index]) {
     case '\n':
-      llvm::outs() << "&#10;";
+      outs() << "&#10;";
       break;
     case '\r':
-      llvm::outs() << "&#13;";
+      outs() << "&#13;";
+      break;
+    case '<':
+      outs() << "&lt;";
+      break;
+    case '&':
+      outs() << "&amp;";
       break;
     default:
       llvm_unreachable("Unexpected character encountered!");
     }
     From = Index + 1;
   }
-  llvm::outs() << Text.substr(From);
+  outs() << Text.substr(From);
+}
+
+static void outputReplacementsXML(const Replacements &Replaces) {
+  for (const auto &R : Replaces) {
+    outs() << "<replacement "
+           << "offset='" << R.getOffset() << "' "
+           << "length='" << R.getLength() << "'>";
+    outputReplacementXML(R.getReplacementText());
+    outs() << "</replacement>\n";
+  }
 }
 
 // Returns true on error.
 static bool format(StringRef FileName) {
-  FileManager Files((FileSystemOptions()));
-  DiagnosticsEngine Diagnostics(
-      IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-      new DiagnosticOptions);
-  SourceManager Sources(Diagnostics, Files);
   ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
       MemoryBuffer::getFileOrSTDIN(FileName);
   if (std::error_code EC = CodeOrErr.getError()) {
-    llvm::errs() << EC.message() << "\n";
+    errs() << EC.message() << "\n";
     return true;
   }
   std::unique_ptr<llvm::MemoryBuffer> Code = std::move(CodeOrErr.get());
   if (Code->getBufferSize() == 0)
     return false; // Empty files are formatted correctly.
-  FileID ID = createInMemoryFile(FileName, Code.get(), Sources, Files);
-  std::vector<CharSourceRange> Ranges;
-  if (fillRanges(Sources, ID, Code.get(), Ranges))
+  std::vector<tooling::Range> Ranges;
+  if (fillRanges(Code.get(), Ranges))
     return true;
+  StringRef AssumedFileName = (FileName == "-") ? AssumeFileName : FileName;
+  FormatStyle FormatStyle = getStyle(Style, AssumedFileName, FallbackStyle);
+  if (SortIncludes.getNumOccurrences() != 0)
+    FormatStyle.SortIncludes = SortIncludes;
+  unsigned CursorPosition = Cursor;
+  Replacements Replaces = sortIncludes(FormatStyle, Code->getBuffer(), Ranges,
+                                       AssumedFileName, &CursorPosition);
+  std::string ChangedCode =
+      tooling::applyAllReplacements(Code->getBuffer(), Replaces);
+  for (const auto &R : Replaces)
+    Ranges.push_back({R.getOffset(), R.getLength()});
 
-  FormatStyle FormatStyle = getStyle(
-      Style, (FileName == "-") ? AssumeFilename : FileName, FallbackStyle);
   bool IncompleteFormat = false;
-  tooling::Replacements Replaces =
-      reformat(FormatStyle, Sources, ID, Ranges, &IncompleteFormat);
+  Replacements FormatChanges = reformat(FormatStyle, ChangedCode, Ranges,
+                                        AssumedFileName, &IncompleteFormat);
+  Replaces = tooling::mergeReplacements(Replaces, FormatChanges);
   if (OutputXML) {
-    llvm::outs() << "<?xml version='1.0'?>\n<replacements "
-                    "xml:space='preserve' incomplete_format='"
-                 << (IncompleteFormat ? "true" : "false") << "'>\n";
+    outs() << "<?xml version='1.0'?>\n<replacements "
+              "xml:space='preserve' incomplete_format='"
+           << (IncompleteFormat ? "true" : "false") << "'>\n";
     if (Cursor.getNumOccurrences() != 0)
-      llvm::outs() << "<cursor>"
-                   << tooling::shiftedCodePosition(Replaces, Cursor)
-                   << "</cursor>\n";
+      outs() << "<cursor>"
+             << tooling::shiftedCodePosition(FormatChanges, CursorPosition)
+             << "</cursor>\n";
 
-    for (tooling::Replacements::const_iterator I = Replaces.begin(),
-                                               E = Replaces.end();
-         I != E; ++I) {
-      llvm::outs() << "<replacement "
-                   << "offset='" << I->getOffset() << "' "
-                   << "length='" << I->getLength() << "'>";
-      outputReplacementXML(I->getReplacementText());
-      llvm::outs() << "</replacement>\n";
-    }
-    llvm::outs() << "</replacements>\n";
+    outputReplacementsXML(Replaces); 
+    outs() << "</replacements>\n";
   } else {
+    IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+        new vfs::InMemoryFileSystem);
+    FileManager Files(FileSystemOptions(), InMemoryFileSystem);
+    DiagnosticsEngine Diagnostics(
+        IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+        new DiagnosticOptions);
+    SourceManager Sources(Diagnostics, Files);
+    FileID ID = createInMemoryFile(AssumedFileName, Code.get(), Sources, Files,
+                                   InMemoryFileSystem.get());
     Rewriter Rewrite(Sources, LangOptions());
     tooling::applyAllReplacements(Replaces, Rewrite);
     if (Inplace) {
       if (FileName == "-")
-        llvm::errs() << "error: cannot use -i when reading from stdin.\n";
+        errs() << "error: cannot use -i when reading from stdin.\n";
       else if (Rewrite.overwriteChangedFiles())
         return true;
     } else {
       if (Cursor.getNumOccurrences() != 0)
         outs() << "{ \"Cursor\": "
-               << tooling::shiftedCodePosition(Replaces, Cursor)
+               << tooling::shiftedCodePosition(FormatChanges, CursorPosition)
                << ", \"IncompleteFormat\": "
                << (IncompleteFormat ? "true" : "false") << " }\n";
       Rewrite.getEditBuffer(ID).write(outs());
@@ -283,7 +322,7 @@ int main(int argc, const char **argv) {
   cl::SetVersionPrinter(PrintVersion);
   cl::ParseCommandLineOptions(
       argc, argv,
-      "A tool to format C/C++/Obj-C code.\n\n"
+      "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf code.\n\n"
       "If no arguments are specified, it formats the code from standard input\n"
       "and writes the result to the standard output.\n"
       "If <file>s are given, it reformats the files. If -i is specified\n"
@@ -296,9 +335,9 @@ int main(int argc, const char **argv) {
   if (DumpConfig) {
     std::string Config =
         clang::format::configurationAsText(clang::format::getStyle(
-            Style, FileNames.empty() ? AssumeFilename : FileNames[0],
+            Style, FileNames.empty() ? AssumeFileName : FileNames[0],
             FallbackStyle));
-    llvm::outs() << Config << "\n";
+    outs() << Config << "\n";
     return 0;
   }
 
@@ -312,8 +351,8 @@ int main(int argc, const char **argv) {
     break;
   default:
     if (!Offsets.empty() || !Lengths.empty() || !LineRanges.empty()) {
-      llvm::errs() << "error: -offset, -length and -lines can only be used for "
-                      "single file.\n";
+      errs() << "error: -offset, -length and -lines can only be used for "
+                "single file.\n";
       return 1;
     }
     for (unsigned i = 0; i < FileNames.size(); ++i)
@@ -322,3 +361,4 @@ int main(int argc, const char **argv) {
   }
   return Error ? 1 : 0;
 }
+
diff --git a/tools/clang-format/clang-format-diff.py b/tools/clang-format/clang-format-diff.py
index 64efb83a8c19..9e02bb09387f 100755
--- a/tools/clang-format/clang-format-diff.py
+++ b/tools/clang-format/clang-format-diff.py
@@ -52,6 +52,8 @@ def main():
                       r'|protodevel|java)',
                       help='custom pattern selecting file paths to reformat '
                       '(case insensitive, overridden by -regex)')
+  parser.add_argument('-sort-includes', action='store_true', default=False,
+                      help='let clang-format sort include blocks')
   parser.add_argument('-v', '--verbose', action='store_true',
                       help='be more verbose, ineffective without -i')
   parser.add_argument(
@@ -96,6 +98,8 @@ def main():
     command = [binary, filename]
     if args.i:
       command.append('-i')
+    if args.sort_includes:
+      command.append('-sort-includes')
     command.extend(lines)
     if args.style:
       command.extend(['-style', args.style])
diff --git a/tools/clang-format/fuzzer/ClangFormatFuzzer.cpp b/tools/clang-format/fuzzer/ClangFormatFuzzer.cpp
index fe4941a5ba1a..5334ce873eca 100644
--- a/tools/clang-format/fuzzer/ClangFormatFuzzer.cpp
+++ b/tools/clang-format/fuzzer/ClangFormatFuzzer.cpp
@@ -15,11 +15,12 @@
 
 #include "clang/Format/Format.h"
 
-extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
+extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
   // FIXME: fuzz more things: different styles, different style features.
   std::string s((const char *)data, size);
   auto Style = getGoogleStyle(clang::format::FormatStyle::LK_Cpp);
   Style.ColumnLimit = 60;
   applyAllReplacements(s, clang::format::reformat(
                               Style, s, {clang::tooling::Range(0, s.size())}));
+  return 0;
 }
diff --git a/tools/clang-fuzzer/ClangFuzzer.cpp b/tools/clang-fuzzer/ClangFuzzer.cpp
index 124911cd4a54..d07cf5027b2a 100644
--- a/tools/clang-fuzzer/ClangFuzzer.cpp
+++ b/tools/clang-fuzzer/ClangFuzzer.cpp
@@ -20,7 +20,7 @@
 
 using namespace clang;
 
-extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
+extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
   std::string s((const char *)data, size);
   llvm::opt::ArgStringList CC1Args;
   CC1Args.push_back("-cc1");
@@ -40,7 +40,8 @@ extern "C" void LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
   std::unique_ptr<tooling::ToolAction> action(
       tooling::newFrontendActionFactory<clang::SyntaxOnlyAction>());
   std::shared_ptr<PCHContainerOperations> PCHContainerOps =
-      std::make_shared<RawPCHContainerOperations>();
+      std::make_shared<PCHContainerOperations>();
   action->runInvocation(Invocation.release(), Files.get(), PCHContainerOps,
                         &Diags);
+  return 0;
 }
diff --git a/tools/diagtool/ShowEnabledWarnings.cpp b/tools/diagtool/ShowEnabledWarnings.cpp
index 06f74320b7fc..abbd3afbd58c 100644
--- a/tools/diagtool/ShowEnabledWarnings.cpp
+++ b/tools/diagtool/ShowEnabledWarnings.cpp
@@ -64,9 +64,11 @@ createDiagnostics(unsigned int argc, char **argv) {
     new DiagnosticsEngine(DiagIDs, new DiagnosticOptions(), DiagsBuffer));
 
   // Try to build a CompilerInvocation.
+  SmallVector<const char *, 4> Args;
+  Args.push_back("diagtool");
+  Args.append(argv, argv + argc);
   std::unique_ptr<CompilerInvocation> Invocation(
-      createInvocationFromCommandLine(llvm::makeArrayRef(argv, argc),
-                                      InterimDiags));
+      createInvocationFromCommandLine(Args, InterimDiags));
   if (!Invocation)
     return nullptr;
 
diff --git a/tools/driver/CMakeLists.txt b/tools/driver/CMakeLists.txt
index 3f1ed40abe4d..fca9b616751b 100644
--- a/tools/driver/CMakeLists.txt
+++ b/tools/driver/CMakeLists.txt
@@ -3,7 +3,6 @@ set( LLVM_LINK_COMPONENTS
   Analysis
   CodeGen
   Core
-  IPA
   IPO
   InstCombine
   Instrumentation
@@ -52,38 +51,29 @@ endif()
 
 add_dependencies(clang clang-headers)
 
-if(UNIX)
-  set(CLANGXX_LINK_OR_COPY create_symlink)
-# Create a relative symlink
-  set(clang_binary "clang${CMAKE_EXECUTABLE_SUFFIX}")
-else()
-  set(CLANGXX_LINK_OR_COPY copy)
-  set(clang_binary "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}")
+install(TARGETS clang
+  RUNTIME DESTINATION bin
+  COMPONENT clang)
+
+if(NOT CMAKE_CONFIGURATION_TYPES)
+  add_custom_target(install-clang
+    DEPENDS clang
+    COMMAND "${CMAKE_COMMAND}"
+            -DCMAKE_INSTALL_COMPONENT=clang
+            -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
 endif()
 
-# Create the clang++ symlink in the build directory.
-set(clang_pp "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++${CMAKE_EXECUTABLE_SUFFIX}")
-add_custom_command(TARGET clang POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E ${CLANGXX_LINK_OR_COPY} "${clang_binary}" "${clang_pp}"
-    WORKING_DIRECTORY "${LLVM_RUNTIME_OUTPUT_INTDIR}")
+if(NOT CLANG_LINKS_TO_CREATE)
+  set(CLANG_LINKS_TO_CREATE clang++ clang-cl)
 
-set_property(DIRECTORY APPEND
-  PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${clang_pp})
+  if (WIN32)
+    list(APPEND CLANG_LINKS_TO_CREATE ../msbuild-bin/cl)
+  endif()
+endif()
 
-# Create the clang-cl symlink in the build directory.
-set(clang_cl "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX}")
-add_custom_command(TARGET clang POST_BUILD
-    COMMAND ${CMAKE_COMMAND} -E ${CLANGXX_LINK_OR_COPY} "${clang_binary}" "${clang_cl}"
-    WORKING_DIRECTORY "${LLVM_RUNTIME_OUTPUT_INTDIR}")
-
-set_property(DIRECTORY APPEND
-  PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${clang_cl})
-
-install(TARGETS clang
-  RUNTIME DESTINATION bin)
-
-# Create the clang++ and clang-cl symlinks at installation time.
-install(SCRIPT clang_symlink.cmake -DCMAKE_INSTALL_PREFIX=\"${CMAKE_INSTALL_PREFIX}\")
+foreach(link ${CLANG_LINKS_TO_CREATE})
+  add_clang_symlink(${link} clang)
+endforeach()
 
 # Configure plist creation for OS X.
 set (TOOL_INFO_PLIST "Info.plist" CACHE STRING "Plist name")
@@ -96,10 +86,7 @@ if (APPLE)
 
   set(TOOL_INFO_UTI "${CLANG_VENDOR_UTI}")
   set(TOOL_INFO_VERSION "${CLANG_VERSION}")
-  if (LLVM_SUBMIT_VERSION)
-    set(TOOL_INFO_BUILD_VERSION
-      "${LLVM_SUBMIT_VERSION}.${LLVM_SUBMIT_SUBVERSION}")
-  endif()
+  set(TOOL_INFO_BUILD_VERSION "${LLVM_MAJOR_VERSION}.${LLVM_MINOR_VERSION}")
   
   set(TOOL_INFO_PLIST_OUT "${CMAKE_CURRENT_BINARY_DIR}/${TOOL_INFO_PLIST}")
   target_link_libraries(clang
diff --git a/tools/driver/cc1as_main.cpp b/tools/driver/cc1as_main.cpp
index fd0fbb4c5a26..59b7af521743 100644
--- a/tools/driver/cc1as_main.cpp
+++ b/tools/driver/cc1as_main.cpp
@@ -125,6 +125,10 @@ struct AssemblerInvocation {
   unsigned RelaxAll : 1;
   unsigned NoExecStack : 1;
   unsigned FatalWarnings : 1;
+  unsigned IncrementalLinkerCompatible : 1;
+
+  /// The name of the relocation model to use.
+  std::string RelocationModel;
 
   /// @}
 
@@ -141,7 +145,8 @@ public:
     RelaxAll = 0;
     NoExecStack = 0;
     FatalWarnings = 0;
-    DwarfVersion = 3;
+    IncrementalLinkerCompatible = 0;
+    DwarfVersion = 0;
   }
 
   static bool CreateFromArgs(AssemblerInvocation &Res,
@@ -192,14 +197,10 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
   Opts.IncludePaths = Args.getAllArgValues(OPT_I);
   Opts.NoInitialTextSection = Args.hasArg(OPT_n);
   Opts.SaveTemporaryLabels = Args.hasArg(OPT_msave_temp_labels);
-  Opts.GenDwarfForAssembly = Args.hasArg(OPT_g_Flag);
+  // Any DebugInfoKind implies GenDwarfForAssembly.
+  Opts.GenDwarfForAssembly = Args.hasArg(OPT_debug_info_kind_EQ);
   Opts.CompressDebugSections = Args.hasArg(OPT_compress_debug_sections);
-  if (Args.hasArg(OPT_gdwarf_2))
-    Opts.DwarfVersion = 2;
-  if (Args.hasArg(OPT_gdwarf_3))
-    Opts.DwarfVersion = 3;
-  if (Args.hasArg(OPT_gdwarf_4))
-    Opts.DwarfVersion = 4;
+  Opts.DwarfVersion = getLastArgIntValue(Args, OPT_dwarf_version_EQ, 0, Diags);
   Opts.DwarfDebugFlags = Args.getLastArgValue(OPT_dwarf_debug_flags);
   Opts.DwarfDebugProducer = Args.getLastArgValue(OPT_dwarf_debug_producer);
   Opts.DebugCompilationDir = Args.getLastArgValue(OPT_fdebug_compilation_dir);
@@ -248,6 +249,9 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
   Opts.RelaxAll = Args.hasArg(OPT_mrelax_all);
   Opts.NoExecStack = Args.hasArg(OPT_mno_exec_stack);
   Opts.FatalWarnings = Args.hasArg(OPT_massembler_fatal_warnings);
+  Opts.RelocationModel = Args.getLastArgValue(OPT_mrelocation_model, "pic");
+  Opts.IncrementalLinkerCompatible =
+      Args.hasArg(OPT_mincremental_linker_compatible);
 
   return Success;
 }
@@ -321,8 +325,19 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
   std::unique_ptr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
 
   MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
-  // FIXME: Assembler behavior can change with -static.
-  MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), Reloc::Default,
+
+  llvm::Reloc::Model RM = llvm::Reloc::Default;
+  if (Opts.RelocationModel == "static") {
+    RM = llvm::Reloc::Static;
+  } else if (Opts.RelocationModel == "pic") {
+    RM = llvm::Reloc::PIC_;
+  } else {
+    assert(Opts.RelocationModel == "dynamic-no-pic" &&
+           "Invalid PIC model!");
+    RM = llvm::Reloc::DynamicNoPIC;
+  }
+
+  MOFI->InitMCObjectFileInfo(Triple(Opts.Triple), RM,
                              CodeModel::Default, Ctx);
   if (Opts.SaveTemporaryLabels)
     Ctx.setAllowTemporaryLabels(false);
@@ -383,9 +398,10 @@ static bool ExecuteAssembler(AssemblerInvocation &Opts,
     MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, Opts.Triple,
                                                       Opts.CPU);
     Triple T(Opts.Triple);
-    Str.reset(TheTarget->createMCObjectStreamer(T, Ctx, *MAB, *Out, CE, *STI,
-                                                Opts.RelaxAll,
-                                                /*DWARFMustBeAtTheEnd*/ true));
+    Str.reset(TheTarget->createMCObjectStreamer(
+        T, Ctx, *MAB, *Out, CE, *STI, Opts.RelaxAll,
+        Opts.IncrementalLinkerCompatible,
+        /*DWARFMustBeAtTheEnd*/ true));
     Str.get()->InitSections(Opts.NoExecStack);
   }
 
@@ -496,4 +512,3 @@ int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
 
   return !!Failed;
 }
-
diff --git a/tools/driver/clang_symlink.cmake b/tools/driver/clang_symlink.cmake
deleted file mode 100644
index c01259543c90..000000000000
--- a/tools/driver/clang_symlink.cmake
+++ /dev/null
@@ -1,43 +0,0 @@
-# We need to execute this script at installation time because the
-# DESTDIR environment variable may be unset at configuration time.
-# See PR8397.
-
-if(UNIX)
-  set(CLANGXX_LINK_OR_COPY create_symlink)
-  set(CLANGXX_DESTDIR $ENV{DESTDIR})
-else()
-  set(CLANGXX_LINK_OR_COPY copy)
-endif()
-
-# CMAKE_EXECUTABLE_SUFFIX is undefined on cmake scripts. See PR9286.
-if( WIN32 )
-  set(EXECUTABLE_SUFFIX ".exe")
-else()
-  set(EXECUTABLE_SUFFIX "")
-endif()
-
-set(bindir "${CLANGXX_DESTDIR}${CMAKE_INSTALL_PREFIX}/bin/")
-set(clang "clang${EXECUTABLE_SUFFIX}")
-set(clangxx "clang++${EXECUTABLE_SUFFIX}")
-set(clang_cl "clang-cl${EXECUTABLE_SUFFIX}")
-set(cl "cl${EXECUTABLE_SUFFIX}")
-
-message("Creating clang++ executable based on ${clang}")
-
-execute_process(
-  COMMAND "${CMAKE_COMMAND}" -E ${CLANGXX_LINK_OR_COPY} "${clang}" "${clangxx}"
-  WORKING_DIRECTORY "${bindir}")
-
-message("Creating clang-cl executable based on ${clang}")
-
-execute_process(
-  COMMAND "${CMAKE_COMMAND}" -E ${CLANGXX_LINK_OR_COPY} "${clang}" "${clang_cl}"
-  WORKING_DIRECTORY "${bindir}")
-
-if (WIN32)
-  message("Creating cl executable based on ${clang}")
-
-  execute_process(
-    COMMAND "${CMAKE_COMMAND}" -E ${CLANGXX_LINK_OR_COPY} "${clang}" "../msbuild-bin/${cl}"
-    WORKING_DIRECTORY "${bindir}")
-endif()
diff --git a/tools/driver/driver.cpp b/tools/driver/driver.cpp
index 5925447841fc..ea218d5403d8 100644
--- a/tools/driver/driver.cpp
+++ b/tools/driver/driver.cpp
@@ -18,6 +18,7 @@
 #include "clang/Driver/Driver.h"
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
+#include "clang/Driver/ToolChain.h"
 #include "clang/Frontend/ChainedDiagnosticConsumer.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/SerializedDiagnosticPrinter.h"
@@ -44,7 +45,6 @@
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/StringSaver.h"
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -201,94 +201,33 @@ extern int cc1_main(ArrayRef<const char *> Argv, const char *Argv0,
 extern int cc1as_main(ArrayRef<const char *> Argv, const char *Argv0,
                       void *MainAddr);
 
-struct DriverSuffix {
-  const char *Suffix;
-  const char *ModeFlag;
-};
+static void insertTargetAndModeArgs(StringRef Target, StringRef Mode,
+                                    SmallVectorImpl<const char *> &ArgVector,
+                                    std::set<std::string> &SavedStrings) {
+  if (!Mode.empty()) {
+    // Add the mode flag to the arguments.
+    auto it = ArgVector.begin();
+    if (it != ArgVector.end())
+      ++it;
+    ArgVector.insert(it, GetStableCStr(SavedStrings, Mode));
+  }
 
-static const DriverSuffix *FindDriverSuffix(StringRef ProgName) {
-  // A list of known driver suffixes. Suffixes are compared against the
-  // program name in order. If there is a match, the frontend type if updated as
-  // necessary by applying the ModeFlag.
-  static const DriverSuffix DriverSuffixes[] = {
-      {"clang", nullptr},
-      {"clang++", "--driver-mode=g++"},
-      {"clang-c++", "--driver-mode=g++"},
-      {"clang-cc", nullptr},
-      {"clang-cpp", "--driver-mode=cpp"},
-      {"clang-g++", "--driver-mode=g++"},
-      {"clang-gcc", nullptr},
-      {"clang-cl", "--driver-mode=cl"},
-      {"cc", nullptr},
-      {"cpp", "--driver-mode=cpp"},
-      {"cl", "--driver-mode=cl"},
-      {"++", "--driver-mode=g++"},
-  };
-
-  for (size_t i = 0; i < llvm::array_lengthof(DriverSuffixes); ++i)
-    if (ProgName.endswith(DriverSuffixes[i].Suffix))
-      return &DriverSuffixes[i];
-  return nullptr;
+  if (!Target.empty()) {
+    auto it = ArgVector.begin();
+    if (it != ArgVector.end())
+      ++it;
+    const char *arr[] = {"-target", GetStableCStr(SavedStrings, Target)};
+    ArgVector.insert(it, std::begin(arr), std::end(arr));
+  }
 }
 
-static void ParseProgName(SmallVectorImpl<const char *> &ArgVector,
-                          std::set<std::string> &SavedStrings) {
-  // Try to infer frontend type and default target from the program name by
-  // comparing it against DriverSuffixes in order.
-
-  // If there is a match, the function tries to identify a target as prefix.
-  // E.g. "x86_64-linux-clang" as interpreted as suffix "clang" with target
-  // prefix "x86_64-linux". If such a target prefix is found, is gets added via
-  // -target as implicit first argument.
-
-  std::string ProgName =llvm::sys::path::stem(ArgVector[0]);
-#ifdef LLVM_ON_WIN32
-  // Transform to lowercase for case insensitive file systems.
-  ProgName = StringRef(ProgName).lower();
-#endif
-
-  StringRef ProgNameRef = ProgName;
-  const DriverSuffix *DS = FindDriverSuffix(ProgNameRef);
-
-  if (!DS) {
-    // Try again after stripping any trailing version number:
-    // clang++3.5 -> clang++
-    ProgNameRef = ProgNameRef.rtrim("0123456789.");
-    DS = FindDriverSuffix(ProgNameRef);
-  }
-
-  if (!DS) {
-    // Try again after stripping trailing -component.
-    // clang++-tot -> clang++
-    ProgNameRef = ProgNameRef.slice(0, ProgNameRef.rfind('-'));
-    DS = FindDriverSuffix(ProgNameRef);
-  }
-
-  if (DS) {
-    if (const char *Flag = DS->ModeFlag) {
-      // Add Flag to the arguments.
-      auto it = ArgVector.begin();
-      if (it != ArgVector.end())
-        ++it;
-      ArgVector.insert(it, Flag);
-    }
-
-    StringRef::size_type LastComponent = ProgNameRef.rfind(
-        '-', ProgNameRef.size() - strlen(DS->Suffix));
-    if (LastComponent == StringRef::npos)
-      return;
-
-    // Infer target from the prefix.
-    StringRef Prefix = ProgNameRef.slice(0, LastComponent);
-    std::string IgnoredError;
-    if (llvm::TargetRegistry::lookupTarget(Prefix, IgnoredError)) {
-      auto it = ArgVector.begin();
-      if (it != ArgVector.end())
-        ++it;
-      const char *arr[] = { "-target", GetStableCStr(SavedStrings, Prefix) };
-      ArgVector.insert(it, std::begin(arr), std::end(arr));
-    }
-  }
+static void getCLEnvVarOptions(std::string &EnvValue, llvm::StringSaver &Saver,
+                               SmallVectorImpl<const char *> &Opts) {
+  llvm::cl::TokenizeWindowsCommandLine(EnvValue, Saver, Opts);
+  // The first instance of '#' should be replaced with '=' in each option.
+  for (const char *Opt : Opts)
+    if (char *NumberSignPtr = const_cast<char *>(::strchr(Opt, '#')))
+      *NumberSignPtr = '=';
 }
 
 static void SetBackdoorDriverOutputsFromEnvVars(Driver &TheDriver) {
@@ -335,7 +274,7 @@ CreateAndPopulateDiagOpts(ArrayRef<const char *> argv) {
 }
 
 static void SetInstallDir(SmallVectorImpl<const char *> &argv,
-                          Driver &TheDriver) {
+                          Driver &TheDriver, bool CanonicalPrefixes) {
   // Attempt to find the original path used to invoke the driver, to determine
   // the installed path. We do this manually, because we want to support that
   // path being a symlink.
@@ -346,7 +285,11 @@ static void SetInstallDir(SmallVectorImpl<const char *> &argv,
     if (llvm::ErrorOr<std::string> Tmp = llvm::sys::findProgramByName(
             llvm::sys::path::filename(InstalledPath.str())))
       InstalledPath = *Tmp;
-  llvm::sys::fs::make_absolute(InstalledPath);
+
+  // FIXME: We don't actually canonicalize this, we just make it absolute.
+  if (CanonicalPrefixes)
+    llvm::sys::fs::make_absolute(InstalledPath);
+
   InstalledPath = llvm::sys::path::parent_path(InstalledPath);
   if (llvm::sys::fs::exists(InstalledPath.c_str()))
     TheDriver.setInstalledDir(InstalledPath);
@@ -380,16 +323,35 @@ int main(int argc_, const char **argv_) {
     return 1;
   }
 
+  llvm::InitializeAllTargets();
+  std::string ProgName = argv[0];
+  std::pair<std::string, std::string> TargetAndMode =
+      ToolChain::getTargetAndModeFromProgramName(ProgName);
+
   llvm::BumpPtrAllocator A;
-  llvm::BumpPtrStringSaver Saver(A);
+  llvm::StringSaver Saver(A);
+
+  // Parse response files using the GNU syntax, unless we're in CL mode. There
+  // are two ways to put clang in CL compatibility mode: argv[0] is either
+  // clang-cl or cl, or --driver-mode=cl is on the command line. The normal
+  // command line parsing can't happen until after response file parsing, so we
+  // have to manually search for a --driver-mode=cl argument the hard way.
+  // Finally, our -cc1 tools don't care which tokenization mode we use because
+  // response files written by clang will tokenize the same way in either mode.
+  llvm::cl::TokenizerCallback Tokenizer = &llvm::cl::TokenizeGNUCommandLine;
+  if (TargetAndMode.second == "--driver-mode=cl" ||
+      std::find_if(argv.begin(), argv.end(), [](const char *F) {
+        return F && strcmp(F, "--driver-mode=cl") == 0;
+      }) != argv.end()) {
+    Tokenizer = &llvm::cl::TokenizeWindowsCommandLine;
+  }
 
   // Determines whether we want nullptr markers in argv to indicate response
   // files end-of-lines. We only use this for the /LINK driver argument.
   bool MarkEOLs = true;
   if (argv.size() > 1 && StringRef(argv[1]).startswith("-cc1"))
     MarkEOLs = false;
-  llvm::cl::ExpandResponseFiles(Saver, llvm::cl::TokenizeGNUCommandLine, argv,
-                                MarkEOLs);
+  llvm::cl::ExpandResponseFiles(Saver, Tokenizer, argv, MarkEOLs);
 
   // Handle -cc1 integrated tools, even if -cc1 was expanded from a response
   // file.
@@ -415,6 +377,29 @@ int main(int argc_, const char **argv_) {
     }
   }
 
+  // Handle CL and _CL_ which permits additional command line options to be
+  // prepended or appended.
+  if (Tokenizer == &llvm::cl::TokenizeWindowsCommandLine) {
+    // Arguments in "CL" are prepended.
+    llvm::Optional<std::string> OptCL = llvm::sys::Process::GetEnv("CL");
+    if (OptCL.hasValue()) {
+      SmallVector<const char *, 8> PrependedOpts;
+      getCLEnvVarOptions(OptCL.getValue(), Saver, PrependedOpts);
+
+      // Insert right after the program name to prepend to the argument list.
+      argv.insert(argv.begin() + 1, PrependedOpts.begin(), PrependedOpts.end());
+    }
+    // Arguments in "_CL_" are appended.
+    llvm::Optional<std::string> Opt_CL_ = llvm::sys::Process::GetEnv("_CL_");
+    if (Opt_CL_.hasValue()) {
+      SmallVector<const char *, 8> AppendedOpts;
+      getCLEnvVarOptions(Opt_CL_.getValue(), Saver, AppendedOpts);
+
+      // Insert at the end of the argument list to append.
+      argv.append(AppendedOpts.begin(), AppendedOpts.end());
+    }
+  }
+
   std::set<std::string> SavedStrings;
   // Handle CCC_OVERRIDE_OPTIONS, used for editing a command line behind the
   // scenes.
@@ -447,10 +432,10 @@ int main(int argc_, const char **argv_) {
   ProcessWarningOptions(Diags, *DiagOpts, /*ReportDiags=*/false);
 
   Driver TheDriver(Path, llvm::sys::getDefaultTargetTriple(), Diags);
-  SetInstallDir(argv, TheDriver);
+  SetInstallDir(argv, TheDriver, CanonicalPrefixes);
 
-  llvm::InitializeAllTargets();
-  ParseProgName(argv, SavedStrings);
+  insertTargetAndModeArgs(TargetAndMode.first, TargetAndMode.second, argv,
+                          SavedStrings);
 
   SetBackdoorDriverOutputsFromEnvVars(TheDriver);
 
diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp
index 8225a6c44293..dbda13c23532 100644
--- a/tools/libclang/CIndex.cpp
+++ b/tools/libclang/CIndex.cpp
@@ -29,7 +29,6 @@
 #include "clang/Basic/DiagnosticIDs.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/Version.h"
-#include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
 #include "clang/Frontend/ASTUnit.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -148,7 +147,7 @@ CXSourceRange cxloc::translateSourceRange(const SourceManager &SM,
   SourceLocation EndLoc = R.getEnd();
   if (EndLoc.isValid() && EndLoc.isMacroID() && !SM.isMacroArgExpansion(EndLoc))
     EndLoc = SM.getExpansionRange(EndLoc).second;
-  if (R.isTokenRange() && !EndLoc.isInvalid()) {
+  if (R.isTokenRange() && EndLoc.isValid()) {
     unsigned Length = Lexer::MeasureTokenLength(SM.getSpellingLoc(EndLoc),
                                                 SM, LangOpts);
     EndLoc = EndLoc.getLocWithOffset(Length);
@@ -665,6 +664,13 @@ bool CursorVisitor::VisitTranslationUnitDecl(TranslationUnitDecl *D) {
   llvm_unreachable("Translation units are visited directly by Visit()");
 }
 
+bool CursorVisitor::VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D) {
+    if (VisitTemplateParameters(D->getTemplateParameters()))
+        return true;
+
+    return Visit(MakeCXCursor(D->getTemplatedDecl(), TU, RegionOfInterest));
+}
+
 bool CursorVisitor::VisitTypeAliasDecl(TypeAliasDecl *D) {
   if (TypeSourceInfo *TSInfo = D->getTypeSourceInfo())
     return Visit(TSInfo->getTypeLoc());
@@ -711,11 +717,8 @@ bool CursorVisitor::VisitClassTemplateSpecializationDecl(
           return true;
     }
   }
-  
-  if (ShouldVisitBody && VisitCXXRecordDecl(D))
-    return true;
-  
-  return false;
+
+  return ShouldVisitBody && VisitCXXRecordDecl(D);
 }
 
 bool CursorVisitor::VisitClassTemplatePartialSpecializationDecl(
@@ -940,11 +943,8 @@ bool CursorVisitor::VisitObjCMethodDecl(ObjCMethodDecl *ND) {
       return true;
   }
 
-  if (ND->isThisDeclarationADefinition() &&
-      Visit(MakeCXCursor(ND->getBody(), StmtParent, TU, RegionOfInterest)))
-    return true;
-
-  return false;
+  return ND->isThisDeclarationADefinition() &&
+         Visit(MakeCXCursor(ND->getBody(), StmtParent, TU, RegionOfInterest));
 }
 
 template <typename DeclIt>
@@ -1461,9 +1461,19 @@ bool CursorVisitor::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) {
   case BuiltinType::OCLImage1dBuffer:
   case BuiltinType::OCLImage2d:
   case BuiltinType::OCLImage2dArray:
+  case BuiltinType::OCLImage2dDepth:
+  case BuiltinType::OCLImage2dArrayDepth:
+  case BuiltinType::OCLImage2dMSAA:
+  case BuiltinType::OCLImage2dArrayMSAA:
+  case BuiltinType::OCLImage2dMSAADepth:
+  case BuiltinType::OCLImage2dArrayMSAADepth:
   case BuiltinType::OCLImage3d:
   case BuiltinType::OCLSampler:
   case BuiltinType::OCLEvent:
+  case BuiltinType::OCLClkEvent:
+  case BuiltinType::OCLQueue:
+  case BuiltinType::OCLNDRange:
+  case BuiltinType::OCLReserveID:
 #define BUILTIN_TYPE(Id, SingletonId)
 #define SIGNED_TYPE(Id, SingletonId) case BuiltinType::Id:
 #define UNSIGNED_TYPE(Id, SingletonId) case BuiltinType::Id:
@@ -1514,10 +1524,7 @@ bool CursorVisitor::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) {
 }
 
 bool CursorVisitor::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) {
-  if (Visit(MakeCursorObjCClassRef(TL.getIFaceDecl(), TL.getNameLoc(), TU)))
-    return true;
-
-  return false;
+  return Visit(MakeCursorObjCClassRef(TL.getIFaceDecl(), TL.getNameLoc(), TU));
 }
 
 bool CursorVisitor::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) {
@@ -1635,10 +1642,7 @@ bool CursorVisitor::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) {
 }
 
 bool CursorVisitor::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) {
-  if (VisitNestedNameSpecifierLoc(TL.getQualifierLoc()))
-    return true;
-  
-  return false;
+  return VisitNestedNameSpecifierLoc(TL.getQualifierLoc());
 }
 
 bool CursorVisitor::VisitDependentTemplateSpecializationTypeLoc(
@@ -1745,13 +1749,27 @@ DEF_JOB(StmtVisit, Stmt, StmtVisitKind)
 DEF_JOB(MemberExprParts, MemberExpr, MemberExprPartsKind)
 DEF_JOB(DeclRefExprParts, DeclRefExpr, DeclRefExprPartsKind)
 DEF_JOB(OverloadExprParts, OverloadExpr, OverloadExprPartsKind)
-DEF_JOB(ExplicitTemplateArgsVisit, ASTTemplateArgumentListInfo, 
-        ExplicitTemplateArgsVisitKind)
 DEF_JOB(SizeOfPackExprParts, SizeOfPackExpr, SizeOfPackExprPartsKind)
 DEF_JOB(LambdaExprParts, LambdaExpr, LambdaExprPartsKind)
 DEF_JOB(PostChildrenVisit, void, PostChildrenVisitKind)
 #undef DEF_JOB
 
+class ExplicitTemplateArgsVisit : public VisitorJob {
+public:
+  ExplicitTemplateArgsVisit(const TemplateArgumentLoc *Begin,
+                            const TemplateArgumentLoc *End, CXCursor parent)
+      : VisitorJob(parent, VisitorJob::ExplicitTemplateArgsVisitKind, Begin,
+                   End) {}
+  static bool classof(const VisitorJob *VJ) {
+    return VJ->getKind() == ExplicitTemplateArgsVisitKind;
+  }
+  const TemplateArgumentLoc *begin() const {
+    return static_cast<const TemplateArgumentLoc *>(data[0]);
+  }
+  const TemplateArgumentLoc *end() {
+    return static_cast<const TemplateArgumentLoc *>(data[1]);
+  }
+};
 class DeclVisit : public VisitorJob {
 public:
   DeclVisit(const Decl *D, CXCursor parent, bool isFirst) :
@@ -1930,12 +1948,17 @@ public:
   void VisitOMPOrderedDirective(const OMPOrderedDirective *D);
   void VisitOMPAtomicDirective(const OMPAtomicDirective *D);
   void VisitOMPTargetDirective(const OMPTargetDirective *D);
+  void VisitOMPTargetDataDirective(const OMPTargetDataDirective *D);
   void VisitOMPTeamsDirective(const OMPTeamsDirective *D);
+  void VisitOMPTaskLoopDirective(const OMPTaskLoopDirective *D);
+  void VisitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective *D);
+  void VisitOMPDistributeDirective(const OMPDistributeDirective *D);
 
 private:
   void AddDeclarationNameInfo(const Stmt *S);
   void AddNestedNameSpecifierLoc(NestedNameSpecifierLoc Qualifier);
-  void AddExplicitTemplateArgs(const ASTTemplateArgumentListInfo *A);
+  void AddExplicitTemplateArgs(const TemplateArgumentLoc *A,
+                               unsigned NumTemplateArgs);
   void AddMemberRef(const FieldDecl *D, SourceLocation L);
   void AddStmt(const Stmt *S);
   void AddDecl(const Decl *D, bool isFirst = true);
@@ -1965,10 +1988,9 @@ void EnqueueVisitor::AddDecl(const Decl *D, bool isFirst) {
   if (D)
     WL.push_back(DeclVisit(D, Parent, isFirst));
 }
-void EnqueueVisitor::
-  AddExplicitTemplateArgs(const ASTTemplateArgumentListInfo *A) {
-  if (A)
-    WL.push_back(ExplicitTemplateArgsVisit(A, Parent));
+void EnqueueVisitor::AddExplicitTemplateArgs(const TemplateArgumentLoc *A,
+                                             unsigned NumTemplateArgs) {
+  WL.push_back(ExplicitTemplateArgsVisit(A, A + NumTemplateArgs, Parent));
 }
 void EnqueueVisitor::AddMemberRef(const FieldDecl *D, SourceLocation L) {
   if (D)
@@ -2019,6 +2041,10 @@ void OMPClauseEnqueue::VisitOMPSafelenClause(const OMPSafelenClause *C) {
   Visitor->AddStmt(C->getSafelen());
 }
 
+void OMPClauseEnqueue::VisitOMPSimdlenClause(const OMPSimdlenClause *C) {
+  Visitor->AddStmt(C->getSimdlen());
+}
+
 void OMPClauseEnqueue::VisitOMPCollapseClause(const OMPCollapseClause *C) {
   Visitor->AddStmt(C->getNumForLoops());
 }
@@ -2032,7 +2058,9 @@ void OMPClauseEnqueue::VisitOMPScheduleClause(const OMPScheduleClause *C) {
   Visitor->AddStmt(C->getHelperChunkSize());
 }
 
-void OMPClauseEnqueue::VisitOMPOrderedClause(const OMPOrderedClause *) {}
+void OMPClauseEnqueue::VisitOMPOrderedClause(const OMPOrderedClause *C) {
+  Visitor->AddStmt(C->getNumForLoops());
+}
 
 void OMPClauseEnqueue::VisitOMPNowaitClause(const OMPNowaitClause *) {}
 
@@ -2050,6 +2078,40 @@ void OMPClauseEnqueue::VisitOMPCaptureClause(const OMPCaptureClause *) {}
 
 void OMPClauseEnqueue::VisitOMPSeqCstClause(const OMPSeqCstClause *) {}
 
+void OMPClauseEnqueue::VisitOMPThreadsClause(const OMPThreadsClause *) {}
+
+void OMPClauseEnqueue::VisitOMPSIMDClause(const OMPSIMDClause *) {}
+
+void OMPClauseEnqueue::VisitOMPNogroupClause(const OMPNogroupClause *) {}
+
+void OMPClauseEnqueue::VisitOMPDeviceClause(const OMPDeviceClause *C) {
+  Visitor->AddStmt(C->getDevice());
+}
+
+void OMPClauseEnqueue::VisitOMPNumTeamsClause(const OMPNumTeamsClause *C) {
+  Visitor->AddStmt(C->getNumTeams());
+}
+
+void OMPClauseEnqueue::VisitOMPThreadLimitClause(const OMPThreadLimitClause *C) {
+  Visitor->AddStmt(C->getThreadLimit());
+}
+
+void OMPClauseEnqueue::VisitOMPPriorityClause(const OMPPriorityClause *C) {
+  Visitor->AddStmt(C->getPriority());
+}
+
+void OMPClauseEnqueue::VisitOMPGrainsizeClause(const OMPGrainsizeClause *C) {
+  Visitor->AddStmt(C->getGrainsize());
+}
+
+void OMPClauseEnqueue::VisitOMPNumTasksClause(const OMPNumTasksClause *C) {
+  Visitor->AddStmt(C->getNumTasks());
+}
+
+void OMPClauseEnqueue::VisitOMPHintClause(const OMPHintClause *C) {
+  Visitor->AddStmt(C->getHint());
+}
+
 template<typename T>
 void OMPClauseEnqueue::VisitOMPClauseList(T *Node) {
   for (const auto *I : Node->varlists()) {
@@ -2088,6 +2150,9 @@ void OMPClauseEnqueue::VisitOMPSharedClause(const OMPSharedClause *C) {
 }
 void OMPClauseEnqueue::VisitOMPReductionClause(const OMPReductionClause *C) {
   VisitOMPClauseList(C);
+  for (auto *E : C->privates()) {
+    Visitor->AddStmt(E);
+  }
   for (auto *E : C->lhs_exprs()) {
     Visitor->AddStmt(E);
   }
@@ -2100,6 +2165,9 @@ void OMPClauseEnqueue::VisitOMPReductionClause(const OMPReductionClause *C) {
 }
 void OMPClauseEnqueue::VisitOMPLinearClause(const OMPLinearClause *C) {
   VisitOMPClauseList(C);
+  for (const auto *E : C->privates()) {
+    Visitor->AddStmt(E);
+  }
   for (const auto *E : C->inits()) {
     Visitor->AddStmt(E);
   }
@@ -2147,6 +2215,9 @@ void OMPClauseEnqueue::VisitOMPFlushClause(const OMPFlushClause *C) {
 void OMPClauseEnqueue::VisitOMPDependClause(const OMPDependClause *C) {
   VisitOMPClauseList(C);
 }
+void OMPClauseEnqueue::VisitOMPMapClause(const OMPMapClause *C) {
+  VisitOMPClauseList(C);
+}
 }
 
 void EnqueueVisitor::EnqueueChildren(const OMPClause *S) {
@@ -2171,10 +2242,8 @@ void EnqueueVisitor::VisitCompoundLiteralExpr(const CompoundLiteralExpr *E) {
   AddTypeLoc(E->getTypeSourceInfo());
 }
 void EnqueueVisitor::VisitCompoundStmt(const CompoundStmt *S) {
-  for (CompoundStmt::const_reverse_body_iterator I = S->body_rbegin(),
-        E = S->body_rend(); I != E; ++I) {
-    AddStmt(*I);
-  }
+  for (auto &I : llvm::reverse(S->body()))
+    AddStmt(I);
 }
 void EnqueueVisitor::
 VisitMSDependentExistsStmt(const MSDependentExistsStmt *S) {
@@ -2186,7 +2255,8 @@ VisitMSDependentExistsStmt(const MSDependentExistsStmt *S) {
 
 void EnqueueVisitor::
 VisitCXXDependentScopeMemberExpr(const CXXDependentScopeMemberExpr *E) {
-  AddExplicitTemplateArgs(E->getOptionalExplicitTemplateArgs());
+  if (E->hasExplicitTemplateArgs())
+    AddExplicitTemplateArgs(E->getTemplateArgs(), E->getNumTemplateArgs());
   AddDeclarationNameInfo(E);
   if (NestedNameSpecifierLoc QualifierLoc = E->getQualifierLoc())
     AddNestedNameSpecifierLoc(QualifierLoc);
@@ -2261,14 +2331,14 @@ void EnqueueVisitor::VisitCXXForRangeStmt(const CXXForRangeStmt *S) {
 }
 
 void EnqueueVisitor::VisitDeclRefExpr(const DeclRefExpr *DR) {
-  if (DR->hasExplicitTemplateArgs()) {
-    AddExplicitTemplateArgs(&DR->getExplicitTemplateArgs());
-  }
+  if (DR->hasExplicitTemplateArgs())
+    AddExplicitTemplateArgs(DR->getTemplateArgs(), DR->getNumTemplateArgs());
   WL.push_back(DeclRefExprParts(DR, Parent));
 }
 void EnqueueVisitor::VisitDependentScopeDeclRefExpr(
                                         const DependentScopeDeclRefExpr *E) {
-  AddExplicitTemplateArgs(E->getOptionalExplicitTemplateArgs());
+  if (E->hasExplicitTemplateArgs())
+    AddExplicitTemplateArgs(E->getTemplateArgs(), E->getNumTemplateArgs());
   AddDeclarationNameInfo(E);
   AddNestedNameSpecifierLoc(E->getQualifierLoc());
 }
@@ -2364,7 +2434,6 @@ void EnqueueVisitor::VisitObjCMessageExpr(const ObjCMessageExpr *M) {
 void EnqueueVisitor::VisitOffsetOfExpr(const OffsetOfExpr *E) {
   // Visit the components of the offsetof expression.
   for (unsigned N = E->getNumComponents(), I = N; I > 0; --I) {
-    typedef OffsetOfExpr::OffsetOfNode OffsetOfNode;
     const OffsetOfNode &Node = E->getComponent(I-1);
     switch (Node.getKind()) {
     case OffsetOfNode::Array:
@@ -2382,7 +2451,8 @@ void EnqueueVisitor::VisitOffsetOfExpr(const OffsetOfExpr *E) {
   AddTypeLoc(E->getTypeSourceInfo());
 }
 void EnqueueVisitor::VisitOverloadExpr(const OverloadExpr *E) {
-  AddExplicitTemplateArgs(E->getOptionalExplicitTemplateArgs());
+  if (E->hasExplicitTemplateArgs())
+    AddExplicitTemplateArgs(E->getTemplateArgs(), E->getNumTemplateArgs());
   WL.push_back(OverloadExprParts(E, Parent));
 }
 void EnqueueVisitor::VisitUnaryExprOrTypeTraitExpr(
@@ -2549,6 +2619,11 @@ void EnqueueVisitor::VisitOMPTargetDirective(const OMPTargetDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPTargetDataDirective(const 
+                                                 OMPTargetDataDirective *D) {
+  VisitOMPExecutableDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPTeamsDirective(const OMPTeamsDirective *D) {
   VisitOMPExecutableDirective(D);
 }
@@ -2562,6 +2637,20 @@ void EnqueueVisitor::VisitOMPCancelDirective(const OMPCancelDirective *D) {
   VisitOMPExecutableDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPTaskLoopDirective(const OMPTaskLoopDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void EnqueueVisitor::VisitOMPTaskLoopSimdDirective(
+    const OMPTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
+void EnqueueVisitor::VisitOMPDistributeDirective(
+    const OMPDistributeDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 void CursorVisitor::EnqueueWorkList(VisitorWorkList &WL, const Stmt *S) {
   EnqueueVisitor(WL, MakeCXCursor(S, StmtParent, TU,RegionOfInterest)).Visit(S);
 }
@@ -2597,12 +2686,9 @@ bool CursorVisitor::RunVisitorWorkList(VisitorWorkList &WL) {
         continue;
       }
       case VisitorJob::ExplicitTemplateArgsVisitKind: {
-        const ASTTemplateArgumentListInfo *ArgList =
-          cast<ExplicitTemplateArgsVisit>(&LI)->get();
-        for (const TemplateArgumentLoc *Arg = ArgList->getTemplateArgs(),
-               *ArgEnd = Arg + ArgList->NumTemplateArgs;
-               Arg != ArgEnd; ++Arg) {
-          if (VisitTemplateArgumentLoc(*Arg))
+        for (const TemplateArgumentLoc &Arg :
+             *cast<ExplicitTemplateArgsVisit>(&LI)) {
+          if (VisitTemplateArgumentLoc(Arg))
             return true;
         }
         continue;
@@ -2804,10 +2890,9 @@ bool CursorVisitor::Visit(const Stmt *S) {
 
 namespace {
 typedef SmallVector<SourceRange, 4> RefNamePieces;
-RefNamePieces
-buildPieces(unsigned NameFlags, bool IsMemberRefExpr,
-            const DeclarationNameInfo &NI, const SourceRange &QLoc,
-            const ASTTemplateArgumentListInfo *TemplateArgs = nullptr) {
+RefNamePieces buildPieces(unsigned NameFlags, bool IsMemberRefExpr,
+                          const DeclarationNameInfo &NI, SourceRange QLoc,
+                          const SourceRange *TemplateArgsLoc = nullptr) {
   const bool WantQualifier = NameFlags & CXNameRange_WantQualifier;
   const bool WantTemplateArgs = NameFlags & CXNameRange_WantTemplateArgs;
   const bool WantSinglePiece = NameFlags & CXNameRange_WantSinglePiece;
@@ -2821,11 +2906,10 @@ buildPieces(unsigned NameFlags, bool IsMemberRefExpr,
   
   if (Kind != DeclarationName::CXXOperatorName || IsMemberRefExpr)
     Pieces.push_back(NI.getLoc());
-  
-  if (WantTemplateArgs && TemplateArgs)
-    Pieces.push_back(SourceRange(TemplateArgs->LAngleLoc,
-                                 TemplateArgs->RAngleLoc));
-  
+
+  if (WantTemplateArgs && TemplateArgsLoc && TemplateArgsLoc->isValid())
+    Pieces.push_back(*TemplateArgsLoc);
+
   if (Kind == DeclarationName::CXXOperatorName) {
     Pieces.push_back(SourceLocation::getFromRawEncoding(
                        NI.getInfo().CXXOperatorName.BeginOpNameLoc));
@@ -2955,7 +3039,8 @@ enum CXErrorCode clang_createTranslationUnit2(CXIndex CIdx,
       CompilerInstance::createDiagnostics(new DiagnosticOptions());
   std::unique_ptr<ASTUnit> AU = ASTUnit::LoadFromASTFile(
       ast_filename, CXXIdx->getPCHContainerOperations()->getRawReader(), Diags,
-      FileSystemOpts, CXXIdx->getOnlyLocalDecls(), None,
+      FileSystemOpts, /*UseDebugInfo=*/false,
+      CXXIdx->getOnlyLocalDecls(), None,
       /*CaptureDiagnostics=*/true,
       /*AllowPCHWithCompilerErrors=*/true,
       /*UserFilesAreVolatile=*/true);
@@ -2982,35 +3067,19 @@ clang_createTranslationUnitFromSourceFile(CXIndex CIdx,
                                     Options);
 }
 
-struct ParseTranslationUnitInfo {
-  CXIndex CIdx;
-  const char *source_filename;
-  const char *const *command_line_args;
-  int num_command_line_args;
-  ArrayRef<CXUnsavedFile> unsaved_files;
-  unsigned options;
-  CXTranslationUnit *out_TU;
-  CXErrorCode &result;
-};
-static void clang_parseTranslationUnit_Impl(void *UserData) {
-  const ParseTranslationUnitInfo *PTUI =
-      static_cast<ParseTranslationUnitInfo *>(UserData);
-  CXIndex CIdx = PTUI->CIdx;
-  const char *source_filename = PTUI->source_filename;
-  const char * const *command_line_args = PTUI->command_line_args;
-  int num_command_line_args = PTUI->num_command_line_args;
-  unsigned options = PTUI->options;
-  CXTranslationUnit *out_TU = PTUI->out_TU;
-
+static CXErrorCode
+clang_parseTranslationUnit_Impl(CXIndex CIdx, const char *source_filename,
+                                const char *const *command_line_args,
+                                int num_command_line_args,
+                                ArrayRef<CXUnsavedFile> unsaved_files,
+                                unsigned options, CXTranslationUnit *out_TU) {
   // Set up the initial return values.
   if (out_TU)
     *out_TU = nullptr;
 
   // Check arguments.
-  if (!CIdx || !out_TU) {
-    PTUI->result = CXError_InvalidArguments;
-    return;
-  }
+  if (!CIdx || !out_TU)
+    return CXError_InvalidArguments;
 
   CIndexer *CXXIdx = static_cast<CIndexer *>(CIdx);
 
@@ -3018,6 +3087,8 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
     setThreadBackgroundPriority();
 
   bool PrecompilePreamble = options & CXTranslationUnit_PrecompiledPreamble;
+  bool CreatePreambleOnFirstParse =
+      options & CXTranslationUnit_CreatePreambleOnFirstParse;
   // FIXME: Add a flag for modules.
   TranslationUnitKind TUKind
     = (options & CXTranslationUnit_Incomplete)? TU_Prefix : TU_Complete;
@@ -3044,7 +3115,7 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
   llvm::CrashRecoveryContextCleanupRegistrar<
     std::vector<ASTUnit::RemappedFile> > RemappedCleanup(RemappedFiles.get());
 
-  for (auto &UF : PTUI->unsaved_files) {
+  for (auto &UF : unsaved_files) {
     std::unique_ptr<llvm::MemoryBuffer> MB =
         llvm::MemoryBuffer::getMemBufferCopy(getContents(UF), UF.Filename);
     RemappedFiles->push_back(std::make_pair(UF.Filename, MB.release()));
@@ -3070,12 +3141,12 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
       break;
     }
   }
-  if (!FoundSpellCheckingArgument)
-    Args->push_back("-fno-spell-checking");
-  
   Args->insert(Args->end(), command_line_args,
                command_line_args + num_command_line_args);
 
+  if (!FoundSpellCheckingArgument)
+    Args->insert(Args->begin() + 1, "-fno-spell-checking");
+
   // The 'source_filename' argument is optional.  If the caller does not
   // specify it then it is assumed that the source file is specified
   // in the actual argument list.
@@ -3092,21 +3163,26 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
   
   unsigned NumErrors = Diags->getClient()->getNumErrors();
   std::unique_ptr<ASTUnit> ErrUnit;
+  // Unless the user specified that they want the preamble on the first parse
+  // set it up to be created on the first reparse. This makes the first parse
+  // faster, trading for a slower (first) reparse.
+  unsigned PrecompilePreambleAfterNParses =
+      !PrecompilePreamble ? 0 : 2 - CreatePreambleOnFirstParse;
   std::unique_ptr<ASTUnit> Unit(ASTUnit::LoadFromCommandLine(
       Args->data(), Args->data() + Args->size(),
       CXXIdx->getPCHContainerOperations(), Diags,
       CXXIdx->getClangResourcesPath(), CXXIdx->getOnlyLocalDecls(),
       /*CaptureDiagnostics=*/true, *RemappedFiles.get(),
-      /*RemappedFilesKeepOriginalName=*/true, PrecompilePreamble, TUKind,
-      CacheCodeCompletionResults, IncludeBriefCommentsInCodeCompletion,
+      /*RemappedFilesKeepOriginalName=*/true, PrecompilePreambleAfterNParses,
+      TUKind, CacheCodeCompletionResults, IncludeBriefCommentsInCodeCompletion,
       /*AllowPCHWithCompilerErrors=*/true, SkipFunctionBodies,
-      /*UserFilesAreVolatile=*/true, ForSerialization, &ErrUnit));
+      /*UserFilesAreVolatile=*/true, ForSerialization,
+      CXXIdx->getPCHContainerOperations()->getRawReader().getFormat(),
+      &ErrUnit));
 
   // Early failures in LoadFromCommandLine may return with ErrUnit unset.
-  if (!Unit && !ErrUnit) {
-    PTUI->result = CXError_ASTReadError;
-    return;
-  }
+  if (!Unit && !ErrUnit)
+    return CXError_ASTReadError;
 
   if (NumErrors != Diags->getClient()->getNumErrors()) {
     // Make sure to check that 'Unit' is non-NULL.
@@ -3114,12 +3190,11 @@ static void clang_parseTranslationUnit_Impl(void *UserData) {
       printDiagsToStderr(Unit ? Unit.get() : ErrUnit.get());
   }
 
-  if (isASTReadError(Unit ? Unit.get() : ErrUnit.get())) {
-    PTUI->result = CXError_ASTReadError;
-  } else {
-    *PTUI->out_TU = MakeCXTranslationUnit(CXXIdx, Unit.release());
-    PTUI->result = *PTUI->out_TU ? CXError_Success : CXError_Failure;
-  }
+  if (isASTReadError(Unit ? Unit.get() : ErrUnit.get()))
+    return CXError_ASTReadError;
+
+  *out_TU = MakeCXTranslationUnit(CXXIdx, Unit.release());
+  return *out_TU ? CXError_Success : CXError_Failure;
 }
 
 CXTranslationUnit
@@ -3141,14 +3216,23 @@ clang_parseTranslationUnit(CXIndex CIdx,
 }
 
 enum CXErrorCode clang_parseTranslationUnit2(
-    CXIndex CIdx,
-    const char *source_filename,
-    const char *const *command_line_args,
-    int num_command_line_args,
-    struct CXUnsavedFile *unsaved_files,
-    unsigned num_unsaved_files,
-    unsigned options,
-    CXTranslationUnit *out_TU) {
+    CXIndex CIdx, const char *source_filename,
+    const char *const *command_line_args, int num_command_line_args,
+    struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files,
+    unsigned options, CXTranslationUnit *out_TU) {
+  SmallVector<const char *, 4> Args;
+  Args.push_back("clang");
+  Args.append(command_line_args, command_line_args + num_command_line_args);
+  return clang_parseTranslationUnit2FullArgv(
+      CIdx, source_filename, Args.data(), Args.size(), unsaved_files,
+      num_unsaved_files, options, out_TU);
+}
+
+enum CXErrorCode clang_parseTranslationUnit2FullArgv(
+    CXIndex CIdx, const char *source_filename,
+    const char *const *command_line_args, int num_command_line_args,
+    struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files,
+    unsigned options, CXTranslationUnit *out_TU) {
   LOG_FUNC_SECTION {
     *Log << source_filename << ": ";
     for (int i = 0; i != num_command_line_args; ++i)
@@ -3159,18 +3243,14 @@ enum CXErrorCode clang_parseTranslationUnit2(
     return CXError_InvalidArguments;
 
   CXErrorCode result = CXError_Failure;
-  ParseTranslationUnitInfo PTUI = {
-      CIdx,
-      source_filename,
-      command_line_args,
-      num_command_line_args,
-      llvm::makeArrayRef(unsaved_files, num_unsaved_files),
-      options,
-      out_TU,
-      result};
+  auto ParseTranslationUnitImpl = [=, &result] {
+    result = clang_parseTranslationUnit_Impl(
+        CIdx, source_filename, command_line_args, num_command_line_args,
+        llvm::makeArrayRef(unsaved_files, num_unsaved_files), options, out_TU);
+  };
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_parseTranslationUnit_Impl, &PTUI)) {
+  if (!RunSafely(CRC, ParseTranslationUnitImpl)) {
     fprintf(stderr, "libclang: crash detected during parsing: {\n");
     fprintf(stderr, "  'source_filename' : '%s'\n", source_filename);
     fprintf(stderr, "  'command_line_args' : [");
@@ -3193,7 +3273,7 @@ enum CXErrorCode clang_parseTranslationUnit2(
 
     return CXError_Crashed;
   } else if (getenv("LIBCLANG_RESOURCE_USAGE")) {
-    if (CXTranslationUnit *TU = PTUI.out_TU)
+    if (CXTranslationUnit *TU = out_TU)
       PrintLibclangResourceUsage(*TU);
   }
 
@@ -3204,27 +3284,15 @@ unsigned clang_defaultSaveOptions(CXTranslationUnit TU) {
   return CXSaveTranslationUnit_None;
 }  
 
-namespace {
-
-struct SaveTranslationUnitInfo {
-  CXTranslationUnit TU;
-  const char *FileName;
-  unsigned options;
-  CXSaveError result;
-};
-
-}
-
-static void clang_saveTranslationUnit_Impl(void *UserData) {
-  SaveTranslationUnitInfo *STUI =
-    static_cast<SaveTranslationUnitInfo*>(UserData);
-
-  CIndexer *CXXIdx = STUI->TU->CIdx;
+static CXSaveError clang_saveTranslationUnit_Impl(CXTranslationUnit TU,
+                                                  const char *FileName,
+                                                  unsigned options) {
+  CIndexer *CXXIdx = TU->CIdx;
   if (CXXIdx->isOptEnabled(CXGlobalOpt_ThreadBackgroundPriorityForIndexing))
     setThreadBackgroundPriority();
 
-  bool hadError = cxtu::getASTUnit(STUI->TU)->Save(STUI->FileName);
-  STUI->result = hadError ? CXSaveError_Unknown : CXSaveError_None;
+  bool hadError = cxtu::getASTUnit(TU)->Save(FileName);
+  return hadError ? CXSaveError_Unknown : CXSaveError_None;
 }
 
 int clang_saveTranslationUnit(CXTranslationUnit TU, const char *FileName,
@@ -3243,16 +3311,19 @@ int clang_saveTranslationUnit(CXTranslationUnit TU, const char *FileName,
   if (!CXXUnit->hasSema())
     return CXSaveError_InvalidTU;
 
-  SaveTranslationUnitInfo STUI = { TU, FileName, options, CXSaveError_None };
+  CXSaveError result;
+  auto SaveTranslationUnitImpl = [=, &result]() {
+    result = clang_saveTranslationUnit_Impl(TU, FileName, options);
+  };
 
   if (!CXXUnit->getDiagnostics().hasUnrecoverableErrorOccurred() ||
       getenv("LIBCLANG_NOTHREADS")) {
-    clang_saveTranslationUnit_Impl(&STUI);
+    SaveTranslationUnitImpl();
 
     if (getenv("LIBCLANG_RESOURCE_USAGE"))
       PrintLibclangResourceUsage(TU);
 
-    return STUI.result;
+    return result;
   }
 
   // We have an AST that has invalid nodes due to compiler errors.
@@ -3260,7 +3331,7 @@ int clang_saveTranslationUnit(CXTranslationUnit TU, const char *FileName,
 
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_saveTranslationUnit_Impl, &STUI)) {
+  if (!RunSafely(CRC, SaveTranslationUnitImpl)) {
     fprintf(stderr, "libclang: crash detected during AST saving: {\n");
     fprintf(stderr, "  'filename' : '%s'\n", FileName);
     fprintf(stderr, "  'options' : %d,\n", options);
@@ -3272,7 +3343,7 @@ int clang_saveTranslationUnit(CXTranslationUnit TU, const char *FileName,
     PrintLibclangResourceUsage(TU);
   }
 
-  return STUI.result;
+  return result;
 }
 
 void clang_disposeTranslationUnit(CXTranslationUnit CTUnit) {
@@ -3296,25 +3367,14 @@ unsigned clang_defaultReparseOptions(CXTranslationUnit TU) {
   return CXReparse_None;
 }
 
-struct ReparseTranslationUnitInfo {
-  CXTranslationUnit TU;
-  ArrayRef<CXUnsavedFile> unsaved_files;
-  unsigned options;
-  CXErrorCode &result;
-};
-
-static void clang_reparseTranslationUnit_Impl(void *UserData) {
-  const ReparseTranslationUnitInfo *RTUI =
-      static_cast<ReparseTranslationUnitInfo *>(UserData);
-  CXTranslationUnit TU = RTUI->TU;
-  unsigned options = RTUI->options;
-  (void) options;
-
+static CXErrorCode
+clang_reparseTranslationUnit_Impl(CXTranslationUnit TU,
+                                  ArrayRef<CXUnsavedFile> unsaved_files,
+                                  unsigned options) {
   // Check arguments.
   if (isNotUsableTU(TU)) {
     LOG_BAD_TU(TU);
-    RTUI->result = CXError_InvalidArguments;
-    return;
+    return CXError_InvalidArguments;
   }
 
   // Reset the associated diagnostics.
@@ -3335,7 +3395,7 @@ static void clang_reparseTranslationUnit_Impl(void *UserData) {
   llvm::CrashRecoveryContextCleanupRegistrar<
     std::vector<ASTUnit::RemappedFile> > RemappedCleanup(RemappedFiles.get());
 
-  for (auto &UF : RTUI->unsaved_files) {
+  for (auto &UF : unsaved_files) {
     std::unique_ptr<llvm::MemoryBuffer> MB =
         llvm::MemoryBuffer::getMemBufferCopy(getContents(UF), UF.Filename);
     RemappedFiles->push_back(std::make_pair(UF.Filename, MB.release()));
@@ -3343,9 +3403,10 @@ static void clang_reparseTranslationUnit_Impl(void *UserData) {
 
   if (!CXXUnit->Reparse(CXXIdx->getPCHContainerOperations(),
                         *RemappedFiles.get()))
-    RTUI->result = CXError_Success;
-  else if (isASTReadError(CXXUnit))
-    RTUI->result = CXError_ASTReadError;
+    return CXError_Success;
+  if (isASTReadError(CXXUnit))
+    return CXError_ASTReadError;
+  return CXError_Failure;
 }
 
 int clang_reparseTranslationUnit(CXTranslationUnit TU,
@@ -3359,19 +3420,20 @@ int clang_reparseTranslationUnit(CXTranslationUnit TU,
   if (num_unsaved_files && !unsaved_files)
     return CXError_InvalidArguments;
 
-  CXErrorCode result = CXError_Failure;
-  ReparseTranslationUnitInfo RTUI = {
-      TU, llvm::makeArrayRef(unsaved_files, num_unsaved_files), options,
-      result};
+  CXErrorCode result;
+  auto ReparseTranslationUnitImpl = [=, &result]() {
+    result = clang_reparseTranslationUnit_Impl(
+        TU, llvm::makeArrayRef(unsaved_files, num_unsaved_files), options);
+  };
 
   if (getenv("LIBCLANG_NOTHREADS")) {
-    clang_reparseTranslationUnit_Impl(&RTUI);
+    ReparseTranslationUnitImpl();
     return result;
   }
 
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_reparseTranslationUnit_Impl, &RTUI)) {
+  if (!RunSafely(CRC, ReparseTranslationUnitImpl)) {
     fprintf(stderr, "libclang: crash detected during reparsing\n");
     cxtu::getASTUnit(TU)->setUnsafeToFree(true);
     return CXError_Crashed;
@@ -3551,6 +3613,26 @@ static SourceLocation getLocationFromExpr(const Expr *E) {
   return E->getLocStart();
 }
 
+static std::string getMangledStructor(std::unique_ptr<MangleContext> &M,
+                                      std::unique_ptr<llvm::DataLayout> &DL,
+                                      const NamedDecl *ND,
+                                      unsigned StructorType) {
+  std::string FrontendBuf;
+  llvm::raw_string_ostream FOS(FrontendBuf);
+
+  if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(ND))
+    M->mangleCXXCtor(CD, static_cast<CXXCtorType>(StructorType), FOS);
+  else if (const auto *DD = dyn_cast_or_null<CXXDestructorDecl>(ND))
+    M->mangleCXXDtor(DD, static_cast<CXXDtorType>(StructorType), FOS);
+
+  std::string BackendBuf;
+  llvm::raw_string_ostream BOS(BackendBuf);
+
+  llvm::Mangler::getNameWithPrefix(BOS, llvm::Twine(FOS.str()), *DL);
+
+  return BOS.str();
+}
+
 extern "C" {
 
 unsigned clang_visitChildren(CXCursor parent,
@@ -3780,6 +3862,19 @@ CXString clang_getCursorSpelling(CXCursor C) {
     return cxstring::createRef("packed");
   }
 
+  if (C.kind == CXCursor_VisibilityAttr) {
+    const VisibilityAttr *AA = cast<VisibilityAttr>(cxcursor::getCursorAttr(C));
+    switch (AA->getVisibility()) {
+    case VisibilityAttr::VisibilityType::Default:
+      return cxstring::createRef("default");
+    case VisibilityAttr::VisibilityType::Hidden:
+      return cxstring::createRef("hidden");
+    case VisibilityAttr::VisibilityType::Protected:
+      return cxstring::createRef("protected");
+    }
+    llvm_unreachable("unknown visibility type");
+  }
+
   return cxstring::createEmpty();
 }
 
@@ -3893,11 +3988,15 @@ CXString clang_Cursor_getMangling(CXCursor C) {
 
   std::string FrontendBuf;
   llvm::raw_string_ostream FrontendBufOS(FrontendBuf);
-  MC->mangleName(ND, FrontendBufOS);
+  if (MC->shouldMangleDeclName(ND)) {
+    MC->mangleName(ND, FrontendBufOS);
+  } else {
+    ND->printName(FrontendBufOS);
+  }
 
   // Now apply backend mangling.
   std::unique_ptr<llvm::DataLayout> DL(
-      new llvm::DataLayout(Ctx.getTargetInfo().getTargetDescription()));
+      new llvm::DataLayout(Ctx.getTargetInfo().getDataLayoutString()));
 
   std::string FinalBuf;
   llvm::raw_string_ostream FinalBufOS(FinalBuf);
@@ -3907,6 +4006,54 @@ CXString clang_Cursor_getMangling(CXCursor C) {
   return cxstring::createDup(FinalBufOS.str());
 }
 
+CXStringSet *clang_Cursor_getCXXManglings(CXCursor C) {
+  if (clang_isInvalid(C.kind) || !clang_isDeclaration(C.kind))
+    return nullptr;
+
+  const Decl *D = getCursorDecl(C);
+  if (!(isa<CXXRecordDecl>(D) || isa<CXXMethodDecl>(D)))
+    return nullptr;
+
+  const NamedDecl *ND = cast<NamedDecl>(D);
+
+  ASTContext &Ctx = ND->getASTContext();
+  std::unique_ptr<MangleContext> M(Ctx.createMangleContext());
+  std::unique_ptr<llvm::DataLayout> DL(
+      new llvm::DataLayout(Ctx.getTargetInfo().getDataLayoutString()));
+
+  std::vector<std::string> Manglings;
+
+  auto hasDefaultCXXMethodCC = [](ASTContext &C, const CXXMethodDecl *MD) {
+    auto DefaultCC = C.getDefaultCallingConvention(/*IsVariadic=*/false,
+                                                   /*IsCSSMethod=*/true);
+    auto CC = MD->getType()->getAs<FunctionProtoType>()->getCallConv();
+    return CC == DefaultCC;
+  };
+
+  if (const auto *CD = dyn_cast_or_null<CXXConstructorDecl>(ND)) {
+    Manglings.emplace_back(getMangledStructor(M, DL, CD, Ctor_Base));
+
+    if (Ctx.getTargetInfo().getCXXABI().isItaniumFamily())
+      if (!CD->getParent()->isAbstract())
+        Manglings.emplace_back(getMangledStructor(M, DL, CD, Ctor_Complete));
+
+    if (Ctx.getTargetInfo().getCXXABI().isMicrosoft())
+      if (CD->hasAttr<DLLExportAttr>() && CD->isDefaultConstructor())
+        if (!(hasDefaultCXXMethodCC(Ctx, CD) && CD->getNumParams() == 0))
+          Manglings.emplace_back(getMangledStructor(M, DL, CD,
+                                                    Ctor_DefaultClosure));
+  } else if (const auto *DD = dyn_cast_or_null<CXXDestructorDecl>(ND)) {
+    Manglings.emplace_back(getMangledStructor(M, DL, DD, Dtor_Base));
+    if (Ctx.getTargetInfo().getCXXABI().isItaniumFamily()) {
+      Manglings.emplace_back(getMangledStructor(M, DL, DD, Dtor_Complete));
+      if (DD->isVirtual())
+        Manglings.emplace_back(getMangledStructor(M, DL, DD, Dtor_Deleting));
+    }
+  }
+
+  return cxstring::createSet(Manglings);
+}
+
 CXString clang_getCursorDisplayName(CXCursor C) {
   if (!clang_isDeclaration(C.kind))
     return clang_getCursorSpelling(C);
@@ -4071,6 +4218,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
       return cxstring::createRef("UnaryOperator");
   case CXCursor_ArraySubscriptExpr:
       return cxstring::createRef("ArraySubscriptExpr");
+  case CXCursor_OMPArraySectionExpr:
+      return cxstring::createRef("OMPArraySectionExpr");
   case CXCursor_BinaryOperator:
       return cxstring::createRef("BinaryOperator");
   case CXCursor_CompoundAssignOperator:
@@ -4259,6 +4408,12 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("attribute(host)");
   case CXCursor_CUDASharedAttr:
     return cxstring::createRef("attribute(shared)");
+  case CXCursor_VisibilityAttr:
+    return cxstring::createRef("attribute(visibility)");
+  case CXCursor_DLLExport:
+    return cxstring::createRef("attribute(dllexport)");
+  case CXCursor_DLLImport:
+    return cxstring::createRef("attribute(dllimport)");
   case CXCursor_PreprocessingDirective:
     return cxstring::createRef("preprocessing directive");
   case CXCursor_MacroDefinition:
@@ -4349,14 +4504,24 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPAtomicDirective");
   case CXCursor_OMPTargetDirective:
     return cxstring::createRef("OMPTargetDirective");
+  case CXCursor_OMPTargetDataDirective:
+    return cxstring::createRef("OMPTargetDataDirective");
   case CXCursor_OMPTeamsDirective:
     return cxstring::createRef("OMPTeamsDirective");
   case CXCursor_OMPCancellationPointDirective:
     return cxstring::createRef("OMPCancellationPointDirective");
   case CXCursor_OMPCancelDirective:
     return cxstring::createRef("OMPCancelDirective");
+  case CXCursor_OMPTaskLoopDirective:
+    return cxstring::createRef("OMPTaskLoopDirective");
+  case CXCursor_OMPTaskLoopSimdDirective:
+    return cxstring::createRef("OMPTaskLoopSimdDirective");
+  case CXCursor_OMPDistributeDirective:
+    return cxstring::createRef("OMPDistributeDirective");
   case CXCursor_OverloadCandidate:
       return cxstring::createRef("OverloadCandidate");
+  case CXCursor_TypeAliasTemplateDecl:
+      return cxstring::createRef("TypeAliasTemplateDecl");
   }
 
   llvm_unreachable("Unhandled CXCursorKind");
@@ -5100,6 +5265,7 @@ CXCursor clang_getCursorDefinition(CXCursor C) {
   case Decl::Import:
   case Decl::OMPThreadPrivate:
   case Decl::ObjCTypeParam:
+  case Decl::BuiltinTemplate:
     return C;
 
   // Declaration kinds that don't make any sense here, but are
@@ -5362,10 +5528,12 @@ CXSourceRange clang_getCursorReferenceNameRange(CXCursor C, unsigned NameFlags,
     break;
   
   case CXCursor_DeclRefExpr:
-    if (const DeclRefExpr *E = dyn_cast<DeclRefExpr>(getCursorExpr(C)))
-      Pieces = buildPieces(NameFlags, false, E->getNameInfo(), 
-                           E->getQualifierLoc().getSourceRange(),
-                           E->getOptionalExplicitTemplateArgs());
+    if (const DeclRefExpr *E = dyn_cast<DeclRefExpr>(getCursorExpr(C))) {
+      SourceRange TemplateArgLoc(E->getLAngleLoc(), E->getRAngleLoc());
+      Pieces =
+          buildPieces(NameFlags, false, E->getNameInfo(),
+                      E->getQualifierLoc().getSourceRange(), &TemplateArgLoc);
+    }
     break;
     
   case CXCursor_CallExpr:
@@ -6055,16 +6223,6 @@ MarkMacroArgTokensVisitorDelegate(CXCursor cursor, CXCursor parent,
                                                                      parent);
 }
 
-namespace {
-  struct clang_annotateTokens_Data {
-    CXTranslationUnit TU;
-    ASTUnit *CXXUnit;
-    CXToken *Tokens;
-    unsigned NumTokens;
-    CXCursor *Cursors;
-  };
-}
-
 /// \brief Used by \c annotatePreprocessorTokens.
 /// \returns true if lexing was finished, false otherwise.
 static bool lexNext(Lexer &Lex, Token &Tok,
@@ -6074,10 +6232,7 @@ static bool lexNext(Lexer &Lex, Token &Tok,
 
   ++NextIdx;
   Lex.LexFromRawLexer(Tok);
-  if (Tok.is(tok::eof))
-    return true;
-
-  return false;
+  return Tok.is(tok::eof);
 }
 
 static void annotatePreprocessorTokens(CXTranslationUnit TU,
@@ -6184,13 +6339,9 @@ static void annotatePreprocessorTokens(CXTranslationUnit TU,
 }
 
 // This gets run a separate thread to avoid stack blowout.
-static void clang_annotateTokensImpl(void *UserData) {
-  CXTranslationUnit TU = ((clang_annotateTokens_Data*)UserData)->TU;
-  ASTUnit *CXXUnit = ((clang_annotateTokens_Data*)UserData)->CXXUnit;
-  CXToken *Tokens = ((clang_annotateTokens_Data*)UserData)->Tokens;
-  const unsigned NumTokens = ((clang_annotateTokens_Data*)UserData)->NumTokens;
-  CXCursor *Cursors = ((clang_annotateTokens_Data*)UserData)->Cursors;
-
+static void clang_annotateTokensImpl(CXTranslationUnit TU, ASTUnit *CXXUnit,
+                                     CXToken *Tokens, unsigned NumTokens,
+                                     CXCursor *Cursors) {
   CIndexer *CXXIdx = TU->CIdx;
   if (CXXIdx->isOptEnabled(CXGlobalOpt_ThreadBackgroundPriorityForEditing))
     setThreadBackgroundPriority();
@@ -6326,11 +6477,12 @@ void clang_annotateTokens(CXTranslationUnit TU,
     return;
 
   ASTUnit::ConcurrencyCheck Check(*CXXUnit);
-  
-  clang_annotateTokens_Data data = { TU, CXXUnit, Tokens, NumTokens, Cursors };
+
+  auto AnnotateTokensImpl = [=]() {
+    clang_annotateTokensImpl(TU, CXXUnit, Tokens, NumTokens, Cursors);
+  };
   llvm::CrashRecoveryContext CRC;
-  if (!RunSafely(CRC, clang_annotateTokensImpl, &data,
-                 GetSafetyThreadStackSize() * 2)) {
+  if (!RunSafely(CRC, AnnotateTokensImpl, GetSafetyThreadStackSize() * 2)) {
     fprintf(stderr, "libclang: crash detected while annotating tokens\n");
   }
 }
@@ -6360,6 +6512,27 @@ CXLinkageKind clang_getCursorLinkage(CXCursor cursor) {
 }
 } // end: extern "C"
 
+//===----------------------------------------------------------------------===//
+// Operations for querying visibility of a cursor.
+//===----------------------------------------------------------------------===//
+
+extern "C" {
+CXVisibilityKind clang_getCursorVisibility(CXCursor cursor) {
+  if (!clang_isDeclaration(cursor.kind))
+    return CXVisibility_Invalid;
+
+  const Decl *D = cxcursor::getCursorDecl(cursor);
+  if (const NamedDecl *ND = dyn_cast_or_null<NamedDecl>(D))
+    switch (ND->getVisibility()) {
+      case HiddenVisibility: return CXVisibility_Hidden;
+      case ProtectedVisibility: return CXVisibility_Protected;
+      case DefaultVisibility: return CXVisibility_Default;
+    };
+
+  return CXVisibility_Invalid;
+}
+} // end: extern "C"
+
 //===----------------------------------------------------------------------===//
 // Operations for querying language of a cursor.
 //===----------------------------------------------------------------------===//
@@ -6418,7 +6591,7 @@ extern "C" {
 
 static CXAvailabilityKind getCursorAvailabilityForDecl(const Decl *D) {
   if (isa<FunctionDecl>(D) && cast<FunctionDecl>(D)->isDeleted())
-    return CXAvailability_Available;
+    return CXAvailability_NotAvailable;
   
   switch (D->getAvailability()) {
   case AR_Available:
@@ -6612,8 +6785,6 @@ enum CX_StorageClass clang_Cursor_getStorageClass(CXCursor C) {
     return CX_SC_Static;
   case SC_PrivateExtern:
     return CX_SC_PrivateExtern;
-  case SC_OpenCLWorkGroupLocal:
-    return CX_SC_OpenCLWorkGroupLocal;
   case SC_Auto:
     return CX_SC_Auto;
   case SC_Register:
@@ -6893,6 +7064,16 @@ CXFile clang_Module_getTopLevelHeader(CXTranslationUnit TU,
 //===----------------------------------------------------------------------===//
 
 extern "C" {
+unsigned clang_CXXField_isMutable(CXCursor C) {
+  if (!clang_isDeclaration(C.kind))
+    return 0;
+
+  if (const auto D = cxcursor::getCursorDecl(C))
+    if (const auto FD = dyn_cast_or_null<FieldDecl>(D))
+      return FD->isMutable() ? 1 : 0;
+  return 0;
+}
+
 unsigned clang_CXXMethod_isPureVirtual(CXCursor C) {
   if (!clang_isDeclaration(C.kind))
     return 0;
@@ -7179,14 +7360,13 @@ static unsigned SafetyStackThreadSize = 8 << 20;
 
 namespace clang {
 
-bool RunSafely(llvm::CrashRecoveryContext &CRC,
-               void (*Fn)(void*), void *UserData,
+bool RunSafely(llvm::CrashRecoveryContext &CRC, llvm::function_ref<void()> Fn,
                unsigned Size) {
   if (!Size)
     Size = GetSafetyThreadStackSize();
   if (Size)
-    return CRC.RunSafelyOnThread(Fn, UserData, Size);
-  return CRC.RunSafely(Fn, UserData);
+    return CRC.RunSafelyOnThread(Fn, Size);
+  return CRC.RunSafely(Fn);
 }
 
 unsigned GetSafetyThreadStackSize() {
@@ -7408,8 +7588,6 @@ Logger &cxindex::Logger::operator<<(const llvm::format_object_base &Fmt) {
 static llvm::ManagedStatic<llvm::sys::Mutex> LoggingMutex;
 
 cxindex::Logger::~Logger() {
-  LogOS.flush();
-
   llvm::sys::ScopedLock L(*LoggingMutex);
 
   static llvm::TimeRecord sBeginTR = llvm::TimeRecord::getCurrentTime();
diff --git a/tools/libclang/CIndexCodeCompletion.cpp b/tools/libclang/CIndexCodeCompletion.cpp
index 9609a693abdf..344ed27021a1 100644
--- a/tools/libclang/CIndexCodeCompletion.cpp
+++ b/tools/libclang/CIndexCodeCompletion.cpp
@@ -646,25 +646,12 @@ namespace {
   };
 }
 
-extern "C" {
-struct CodeCompleteAtInfo {
-  CXTranslationUnit TU;
-  const char *complete_filename;
-  unsigned complete_line;
-  unsigned complete_column;
-  ArrayRef<CXUnsavedFile> unsaved_files;
-  unsigned options;
-  CXCodeCompleteResults *result;
-};
-static void clang_codeCompleteAt_Impl(void *UserData) {
-  CodeCompleteAtInfo *CCAI = static_cast<CodeCompleteAtInfo*>(UserData);
-  CXTranslationUnit TU = CCAI->TU;
-  const char *complete_filename = CCAI->complete_filename;
-  unsigned complete_line = CCAI->complete_line;
-  unsigned complete_column = CCAI->complete_column;
-  unsigned options = CCAI->options;
+static CXCodeCompleteResults *
+clang_codeCompleteAt_Impl(CXTranslationUnit TU, const char *complete_filename,
+                          unsigned complete_line, unsigned complete_column,
+                          ArrayRef<CXUnsavedFile> unsaved_files,
+                          unsigned options) {
   bool IncludeBriefComments = options & CXCodeComplete_IncludeBriefComments;
-  CCAI->result = nullptr;
 
 #ifdef UDP_CODE_COMPLETION_LOGGER
 #ifdef UDP_CODE_COMPLETION_LOGGER_PORT
@@ -676,12 +663,12 @@ static void clang_codeCompleteAt_Impl(void *UserData) {
 
   if (cxtu::isNotUsableTU(TU)) {
     LOG_BAD_TU(TU);
-    return;
+    return nullptr;
   }
 
   ASTUnit *AST = cxtu::getASTUnit(TU);
   if (!AST)
-    return;
+    return nullptr;
 
   CIndexer *CXXIdx = TU->CIdx;
   if (CXXIdx->isOptEnabled(CXGlobalOpt_ThreadBackgroundPriorityForEditing))
@@ -692,7 +679,7 @@ static void clang_codeCompleteAt_Impl(void *UserData) {
   // Perform the remapping of source files.
   SmallVector<ASTUnit::RemappedFile, 4> RemappedFiles;
 
-  for (auto &UF : CCAI->unsaved_files) {
+  for (auto &UF : unsaved_files) {
     std::unique_ptr<llvm::MemoryBuffer> MB =
         llvm::MemoryBuffer::getMemBufferCopy(getContents(UF), UF.Filename);
     RemappedFiles.push_back(std::make_pair(UF.Filename, MB.release()));
@@ -804,8 +791,10 @@ static void clang_codeCompleteAt_Impl(void *UserData) {
   }
 #endif
 #endif
-  CCAI->result = Results;
+  return Results;
 }
+
+extern "C" {
 CXCodeCompleteResults *clang_codeCompleteAt(CXTranslationUnit TU,
                                             const char *complete_filename,
                                             unsigned complete_line,
@@ -821,25 +810,28 @@ CXCodeCompleteResults *clang_codeCompleteAt(CXTranslationUnit TU,
   if (num_unsaved_files && !unsaved_files)
     return nullptr;
 
-  CodeCompleteAtInfo CCAI = {TU, complete_filename, complete_line,
-    complete_column, llvm::makeArrayRef(unsaved_files, num_unsaved_files),
-    options, nullptr};
+  CXCodeCompleteResults *result;
+  auto CodeCompleteAtImpl = [=, &result]() {
+    result = clang_codeCompleteAt_Impl(
+        TU, complete_filename, complete_line, complete_column,
+        llvm::makeArrayRef(unsaved_files, num_unsaved_files), options);
+  };
 
   if (getenv("LIBCLANG_NOTHREADS")) {
-    clang_codeCompleteAt_Impl(&CCAI);
-    return CCAI.result;
+    CodeCompleteAtImpl();
+    return result;
   }
 
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_codeCompleteAt_Impl, &CCAI)) {
+  if (!RunSafely(CRC, CodeCompleteAtImpl)) {
     fprintf(stderr, "libclang: crash detected in code completion\n");
     cxtu::getASTUnit(TU)->setUnsafeToFree(true);
     return nullptr;
   } else if (getenv("LIBCLANG_RESOURCE_USAGE"))
     PrintLibclangResourceUsage(TU);
 
-  return CCAI.result;
+  return result;
 }
 
 unsigned clang_defaultCodeCompleteOptions(void) {
diff --git a/tools/libclang/CIndexer.h b/tools/libclang/CIndexer.h
index bf55301d618e..94c27a05600f 100644
--- a/tools/libclang/CIndexer.h
+++ b/tools/libclang/CIndexer.h
@@ -85,8 +85,8 @@ public:
   /// threads when possible.
   ///
   /// \return False if a crash was detected.
-  bool RunSafely(llvm::CrashRecoveryContext &CRC,
-                 void (*Fn)(void*), void *UserData, unsigned Size = 0);
+  bool RunSafely(llvm::CrashRecoveryContext &CRC, llvm::function_ref<void()> Fn,
+                 unsigned Size = 0);
 
   /// \brief Set the thread priority to background.
   /// FIXME: Move to llvm/Support.
diff --git a/tools/libclang/CMakeLists.txt b/tools/libclang/CMakeLists.txt
index 06db70e61ad5..5267c02db5f4 100644
--- a/tools/libclang/CMakeLists.txt
+++ b/tools/libclang/CMakeLists.txt
@@ -102,21 +102,16 @@ if(ENABLE_SHARED)
       PROPERTIES
       VERSION ${LIBCLANG_LIBRARY_VERSION}
       DEFINE_SYMBOL _CINDEX_LIB_)
+  elseif(APPLE)
+    set(LIBCLANG_LINK_FLAGS " -Wl,-compatibility_version -Wl,1")
+    set(LIBCLANG_LINK_FLAGS "${LIBCLANG_LINK_FLAGS} -Wl,-current_version -Wl,${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}")
+
+    set_property(TARGET libclang APPEND_STRING PROPERTY
+                 LINK_FLAGS ${LIBCLANG_LINK_FLAGS})
   else()
     set_target_properties(libclang
       PROPERTIES
       VERSION ${LIBCLANG_LIBRARY_VERSION}
       DEFINE_SYMBOL _CINDEX_LIB_)
   endif()
-
-  if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
-    set(LIBCLANG_LINK_FLAGS " -Wl,-compatibility_version -Wl,1")
-    if (DEFINED ${LLVM_SUBMIT_VERSION})
-      set(LIBCLANG_LINK_FLAGS
-        "${LIBCLANG_LINK_FLAGS} -Wl,-current_version -Wl,${LLVM_SUBMIT_VERSION}.${LLVM_SUBMIT_SUBVERSION}")
-    endif()
-
-    set_property(TARGET libclang APPEND_STRING PROPERTY
-                 LINK_FLAGS ${LIBCLANG_LINK_FLAGS})
-  endif()
 endif()
diff --git a/tools/libclang/CXCompilationDatabase.cpp b/tools/libclang/CXCompilationDatabase.cpp
index 1e4a2cd44aab..82498bf54c74 100644
--- a/tools/libclang/CXCompilationDatabase.cpp
+++ b/tools/libclang/CXCompilationDatabase.cpp
@@ -111,6 +111,16 @@ clang_CompileCommand_getDirectory(CXCompileCommand CCmd)
   return cxstring::createRef(cmd->Directory.c_str());
 }
 
+CXString
+clang_CompileCommand_getFilename(CXCompileCommand CCmd)
+{
+  if (!CCmd)
+    return cxstring::createNull();
+
+  CompileCommand *cmd = static_cast<CompileCommand *>(CCmd);
+  return cxstring::createRef(cmd->Filename.c_str());
+}
+
 unsigned
 clang_CompileCommand_getNumArgs(CXCompileCommand CCmd)
 {
diff --git a/tools/libclang/CXCursor.cpp b/tools/libclang/CXCursor.cpp
index 099edcf14fed..c766d2d69fa9 100644
--- a/tools/libclang/CXCursor.cpp
+++ b/tools/libclang/CXCursor.cpp
@@ -58,6 +58,9 @@ static CXCursorKind GetCursorKind(const Attr *A) {
     case attr::CUDAGlobal: return CXCursor_CUDAGlobalAttr;
     case attr::CUDAHost: return CXCursor_CUDAHostAttr;
     case attr::CUDAShared: return CXCursor_CUDASharedAttr;
+    case attr::Visibility: return CXCursor_VisibilityAttr;
+    case attr::DLLExport: return CXCursor_DLLExport;
+    case attr::DLLImport: return CXCursor_DLLImport;
   }
 
   return CXCursor_UnexposedAttr;
@@ -226,6 +229,10 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::AtomicExprClass:
   case Stmt::BinaryConditionalOperatorClass:
   case Stmt::TypeTraitExprClass:
+  case Stmt::CoroutineBodyStmtClass:
+  case Stmt::CoawaitExprClass:
+  case Stmt::CoreturnStmtClass:
+  case Stmt::CoyieldExprClass:
   case Stmt::CXXBindTemporaryExprClass:
   case Stmt::CXXDefaultArgExprClass:
   case Stmt::CXXDefaultInitExprClass:
@@ -324,10 +331,15 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
     K = CXCursor_UnaryExpr;
     break;
 
+  case Stmt::MSPropertySubscriptExprClass:
   case Stmt::ArraySubscriptExprClass:
     K = CXCursor_ArraySubscriptExpr;
     break;
 
+  case Stmt::OMPArraySectionExprClass:
+    K = CXCursor_OMPArraySectionExpr;
+    break;
+
   case Stmt::BinaryOperatorClass:
     K = CXCursor_BinaryOperator;
     break;
@@ -585,6 +597,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPTargetDirectiveClass:
     K = CXCursor_OMPTargetDirective;
     break;
+  case Stmt::OMPTargetDataDirectiveClass:
+    K = CXCursor_OMPTargetDataDirective;
+    break;
   case Stmt::OMPTeamsDirectiveClass:
     K = CXCursor_OMPTeamsDirective;
     break;
@@ -594,6 +609,15 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPCancelDirectiveClass:
     K = CXCursor_OMPCancelDirective;
     break;
+  case Stmt::OMPTaskLoopDirectiveClass:
+    K = CXCursor_OMPTaskLoopDirective;
+    break;
+  case Stmt::OMPTaskLoopSimdDirectiveClass:
+    K = CXCursor_OMPTaskLoopSimdDirective;
+    break;
+  case Stmt::OMPDistributeDirectiveClass:
+    K = CXCursor_OMPDistributeDirective;
+    break;
   }
 
   CXCursor C = { K, 0, { Parent, S, TU } };
diff --git a/tools/libclang/CXLoadedDiagnostic.cpp b/tools/libclang/CXLoadedDiagnostic.cpp
index 754ad55a6604..2c10d34844b7 100644
--- a/tools/libclang/CXLoadedDiagnostic.cpp
+++ b/tools/libclang/CXLoadedDiagnostic.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -56,7 +57,7 @@ public:
     return mem;
   }
 };
-}
+} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 // Cleanup.
@@ -246,7 +247,7 @@ public:
 
   CXDiagnosticSet load(const char *file);
 };
-}
+} // end anonymous namespace
 
 CXDiagnosticSet DiagLoader::load(const char *file) {
   TopDiags = llvm::make_unique<CXLoadedDiagnosticSetImpl>();
@@ -264,7 +265,7 @@ CXDiagnosticSet DiagLoader::load(const char *file) {
       reportInvalidFile(EC.message());
       break;
     }
-    return 0;
+    return nullptr;
   }
 
   return (CXDiagnosticSet)TopDiags.release();
diff --git a/tools/libclang/CXString.cpp b/tools/libclang/CXString.cpp
index dec8ebcd98e1..1ccbed3b5c24 100644
--- a/tools/libclang/CXString.cpp
+++ b/tools/libclang/CXString.cpp
@@ -112,6 +112,15 @@ CXString createCXString(CXStringBuf *buf) {
   return Str;
 }
 
+CXStringSet *createSet(const std::vector<std::string> &Strings) {
+  CXStringSet *Set = new CXStringSet;
+  Set->Count = Strings.size();
+  Set->Strings = new CXString[Set->Count];
+  for (unsigned SI = 0, SE = Set->Count; SI < SE; ++SI)
+    Set->Strings[SI] = createDup(Strings[SI]);
+  return Set;
+}
+
 
 //===----------------------------------------------------------------------===//
 // String pools.
@@ -175,5 +184,13 @@ void clang_disposeString(CXString string) {
       break;
   }
 }
+
+void clang_disposeStringSet(CXStringSet *set) {
+  for (unsigned SI = 0, SE = set->Count; SI < SE; ++SI)
+    clang_disposeString(set->Strings[SI]);
+  delete[] set->Strings;
+  delete set;
+}
+
 } // end: extern "C"
 
diff --git a/tools/libclang/CXString.h b/tools/libclang/CXString.h
index 72ac0cf46914..6473eb2d7103 100644
--- a/tools/libclang/CXString.h
+++ b/tools/libclang/CXString.h
@@ -68,6 +68,8 @@ CXString createRef(std::string String) = delete;
 /// \brief Create a CXString object that is backed by a string buffer.
 CXString createCXString(CXStringBuf *buf);
 
+CXStringSet *createSet(const std::vector<std::string> &Strings);
+
 /// \brief A string pool used for fast allocation/deallocation of strings.
 class CXStringPool {
 public:
diff --git a/tools/libclang/CXType.cpp b/tools/libclang/CXType.cpp
index 1318e86b5553..72c12cd16b97 100644
--- a/tools/libclang/CXType.cpp
+++ b/tools/libclang/CXType.cpp
@@ -90,6 +90,7 @@ static CXTypeKind GetTypeKind(QualType T) {
     TKCASE(DependentSizedArray);
     TKCASE(Vector);
     TKCASE(MemberPointer);
+    TKCASE(Auto);
     default:
       return CXType_Unexposed;
   }
@@ -101,6 +102,11 @@ CXType cxtype::MakeCXType(QualType T, CXTranslationUnit TU) {
   CXTypeKind TK = CXType_Invalid;
 
   if (TU && !T.isNull()) {
+    // Handle attributed types as the original type
+    if (auto *ATT = T->getAs<AttributedType>()) {
+      return MakeCXType(ATT->getModifiedType(), TU);
+    }
+
     ASTContext &Ctx = cxtu::getASTUnit(TU)->getASTContext();
     if (Ctx.getLangOpts().ObjC1) {
       QualType UnqualT = T.getUnqualifiedType();
@@ -139,7 +145,7 @@ extern "C" {
 
 CXType clang_getCursorType(CXCursor C) {
   using namespace cxcursor;
-  
+
   CXTranslationUnit TU = cxcursor::getCursorTU(C);
   if (!TU)
     return MakeCXType(QualType(), TU);
@@ -159,23 +165,17 @@ CXType clang_getCursorType(CXCursor C) {
       return MakeCXType(Context.getTypeDeclType(TD), TU);
     if (const ObjCInterfaceDecl *ID = dyn_cast<ObjCInterfaceDecl>(D))
       return MakeCXType(Context.getObjCInterfaceType(ID), TU);
-    if (const DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D)) {
-      if (TypeSourceInfo *TSInfo = DD->getTypeSourceInfo())
-        return MakeCXType(TSInfo->getType(), TU);
-      return MakeCXType(DD->getType(), TU);      
-    }
+    if (const DeclaratorDecl *DD = dyn_cast<DeclaratorDecl>(D))
+      return MakeCXType(DD->getType(), TU);
     if (const ValueDecl *VD = dyn_cast<ValueDecl>(D))
       return MakeCXType(VD->getType(), TU);
     if (const ObjCPropertyDecl *PD = dyn_cast<ObjCPropertyDecl>(D))
       return MakeCXType(PD->getType(), TU);
-    if (const FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(D)) {
-      if (TypeSourceInfo *TSInfo = FTD->getTemplatedDecl()->getTypeSourceInfo())
-        return MakeCXType(TSInfo->getType(), TU);
+    if (const FunctionTemplateDecl *FTD = dyn_cast<FunctionTemplateDecl>(D))
       return MakeCXType(FTD->getTemplatedDecl()->getType(), TU);
-    }
     return MakeCXType(QualType(), TU);
   }
-  
+
   if (clang_isReference(C.kind)) {
     switch (C.kind) {
     case CXCursor_ObjCSuperClassRef: {
@@ -183,18 +183,18 @@ CXType clang_getCursorType(CXCursor C) {
         = Context.getObjCInterfaceType(getCursorObjCSuperClassRef(C).first);
       return MakeCXType(T, TU);
     }
-        
+
     case CXCursor_ObjCClassRef: {
       QualType T = Context.getObjCInterfaceType(getCursorObjCClassRef(C).first);
       return MakeCXType(T, TU);
     }
-        
+
     case CXCursor_TypeRef: {
       QualType T = Context.getTypeDeclType(getCursorTypeRef(C).first);
       return MakeCXType(T, TU);
 
     }
-      
+
     case CXCursor_CXXBaseSpecifier:
       return cxtype::MakeCXType(getCursorCXXBaseSpecifier(C)->getType(), TU);
 
@@ -211,7 +211,7 @@ CXType clang_getCursorType(CXCursor C) {
     default:
       break;
     }
-    
+
     return MakeCXType(QualType(), TU);
   }
 
@@ -349,10 +349,10 @@ unsigned clang_isRestrictQualifiedType(CXType CT) {
 CXType clang_getPointeeType(CXType CT) {
   QualType T = GetQualType(CT);
   const Type *TP = T.getTypePtrOrNull();
-  
+
   if (!TP)
     return MakeCXType(QualType(), GetTU(CT));
-  
+
   switch (TP->getTypeClass()) {
     case Type::Pointer:
       T = cast<PointerType>(TP)->getPointeeType();
@@ -411,7 +411,7 @@ try_again:
       D = cast<TemplateSpecializationType>(TP)->getTemplateName()
                                                          .getAsTemplateDecl();
     break;
-      
+
   case Type::InjectedClassName:
     D = cast<InjectedClassNameType>(TP)->getDecl();
     break;
@@ -421,7 +421,7 @@ try_again:
   case Type::Elaborated:
     TP = cast<ElaboratedType>(TP)->getNamedType().getTypePtrOrNull();
     goto try_again;
-    
+
   default:
     break;
   }
@@ -484,6 +484,7 @@ CXString clang_getTypeKindSpelling(enum CXTypeKind K) {
     TKIND(DependentSizedArray);
     TKIND(Vector);
     TKIND(MemberPointer);
+    TKIND(Auto);
   }
 #undef TKIND
   return cxstring::createRef(s);
diff --git a/tools/libclang/CursorVisitor.h b/tools/libclang/CursorVisitor.h
index 91f63b40f6a5..3e5b0c9120c5 100644
--- a/tools/libclang/CursorVisitor.h
+++ b/tools/libclang/CursorVisitor.h
@@ -195,6 +195,7 @@ public:
   bool VisitChildren(CXCursor Parent);
 
   // Declaration visitors
+  bool VisitTypeAliasTemplateDecl(TypeAliasTemplateDecl *D);
   bool VisitTypeAliasDecl(TypeAliasDecl *D);
   bool VisitAttributes(Decl *D);
   bool VisitBlockDecl(BlockDecl *B);
diff --git a/tools/libclang/IndexBody.cpp b/tools/libclang/IndexBody.cpp
index 5539971f04a7..64df4b85beac 100644
--- a/tools/libclang/IndexBody.cpp
+++ b/tools/libclang/IndexBody.cpp
@@ -8,19 +8,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "IndexingContext.h"
-#include "clang/AST/DataRecursiveASTVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 
 using namespace clang;
 using namespace cxindex;
 
 namespace {
 
-class BodyIndexer : public DataRecursiveASTVisitor<BodyIndexer> {
+class BodyIndexer : public RecursiveASTVisitor<BodyIndexer> {
   IndexingContext &IndexCtx;
   const NamedDecl *Parent;
   const DeclContext *ParentDC;
 
-  typedef DataRecursiveASTVisitor<BodyIndexer> base;
+  typedef RecursiveASTVisitor<BodyIndexer> base;
 public:
   BodyIndexer(IndexingContext &indexCtx,
               const NamedDecl *Parent, const DeclContext *DC)
@@ -125,10 +125,11 @@ public:
     return true;
   }
 
-  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
+  bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *E,
+                                   DataRecursionQueue *Q = nullptr) {
     if (E->getOperatorLoc().isInvalid())
       return true; // implicit.
-    return base::TraverseCXXOperatorCallExpr(E);
+    return base::TraverseCXXOperatorCallExpr(E, Q);
   }
 
   bool VisitDeclStmt(DeclStmt *S) {
diff --git a/tools/libclang/IndexTypeSourceInfo.cpp b/tools/libclang/IndexTypeSourceInfo.cpp
index 706870e514b8..9666052ed181 100644
--- a/tools/libclang/IndexTypeSourceInfo.cpp
+++ b/tools/libclang/IndexTypeSourceInfo.cpp
@@ -8,14 +8,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "IndexingContext.h"
-#include "clang/AST/DataRecursiveASTVisitor.h"
+#include "clang/AST/RecursiveASTVisitor.h"
 
 using namespace clang;
 using namespace cxindex;
 
 namespace {
 
-class TypeIndexer : public DataRecursiveASTVisitor<TypeIndexer> {
+class TypeIndexer : public RecursiveASTVisitor<TypeIndexer> {
   IndexingContext &IndexCtx;
   const NamedDecl *Parent;
   const DeclContext *ParentDC;
diff --git a/tools/libclang/Indexing.cpp b/tools/libclang/Indexing.cpp
index e35640029e62..d6e35b0019c9 100644
--- a/tools/libclang/Indexing.cpp
+++ b/tools/libclang/Indexing.cpp
@@ -462,48 +462,24 @@ struct IndexSessionData {
     : CIdx(cIdx), SkipBodyData(new SessionSkipBodyData) {}
 };
 
-struct IndexSourceFileInfo {
-  CXIndexAction idxAction;
-  CXClientData client_data;
-  IndexerCallbacks *index_callbacks;
-  unsigned index_callbacks_size;
-  unsigned index_options;
-  const char *source_filename;
-  const char *const *command_line_args;
-  int num_command_line_args;
-  ArrayRef<CXUnsavedFile> unsaved_files;
-  CXTranslationUnit *out_TU;
-  unsigned TU_options;
-  CXErrorCode &result;
-};
-
 } // anonymous namespace
 
-static void clang_indexSourceFile_Impl(void *UserData) {
-  const IndexSourceFileInfo *ITUI =
-      static_cast<IndexSourceFileInfo *>(UserData);
-  CXIndexAction cxIdxAction = ITUI->idxAction;
-  CXClientData client_data = ITUI->client_data;
-  IndexerCallbacks *client_index_callbacks = ITUI->index_callbacks;
-  unsigned index_callbacks_size = ITUI->index_callbacks_size;
-  unsigned index_options = ITUI->index_options;
-  const char *source_filename = ITUI->source_filename;
-  const char * const *command_line_args = ITUI->command_line_args;
-  int num_command_line_args = ITUI->num_command_line_args;
-  CXTranslationUnit *out_TU  = ITUI->out_TU;
-  unsigned TU_options = ITUI->TU_options;
-
+static CXErrorCode clang_indexSourceFile_Impl(
+    CXIndexAction cxIdxAction, CXClientData client_data,
+    IndexerCallbacks *client_index_callbacks, unsigned index_callbacks_size,
+    unsigned index_options, const char *source_filename,
+    const char *const *command_line_args, int num_command_line_args,
+    ArrayRef<CXUnsavedFile> unsaved_files, CXTranslationUnit *out_TU,
+    unsigned TU_options) {
   if (out_TU)
     *out_TU = nullptr;
   bool requestedToGetTU = (out_TU != nullptr);
 
   if (!cxIdxAction) {
-    ITUI->result = CXError_InvalidArguments;
-    return;
+    return CXError_InvalidArguments;
   }
   if (!client_index_callbacks || index_callbacks_size == 0) {
-    ITUI->result = CXError_InvalidArguments;
-    return;
+    return CXError_InvalidArguments;
   }
 
   IndexerCallbacks CB;
@@ -557,7 +533,7 @@ static void clang_indexSourceFile_Impl(void *UserData) {
     CInvok(createInvocationFromCommandLine(*Args, Diags));
 
   if (!CInvok)
-    return;
+    return CXError_Failure;
 
   // Recover resources if we crash before exiting this function.
   llvm::CrashRecoveryContextCleanupRegistrar<CompilerInvocation,
@@ -565,7 +541,7 @@ static void clang_indexSourceFile_Impl(void *UserData) {
     CInvokCleanup(CInvok.get());
 
   if (CInvok->getFrontendOpts().Inputs.empty())
-    return;
+    return CXError_Failure;
 
   typedef SmallVector<std::unique_ptr<llvm::MemoryBuffer>, 8> MemBufferOwner;
   std::unique_ptr<MemBufferOwner> BufOwner(new MemBufferOwner);
@@ -574,7 +550,7 @@ static void clang_indexSourceFile_Impl(void *UserData) {
   llvm::CrashRecoveryContextCleanupRegistrar<MemBufferOwner> BufOwnerCleanup(
       BufOwner.get());
 
-  for (auto &UF : ITUI->unsaved_files) {
+  for (auto &UF : unsaved_files) {
     std::unique_ptr<llvm::MemoryBuffer> MB =
         llvm::MemoryBuffer::getMemBufferCopy(getContents(UF), UF.Filename);
     CInvok->getPreprocessorOpts().addRemappedFile(UF.Filename, MB.get());
@@ -590,12 +566,14 @@ static void clang_indexSourceFile_Impl(void *UserData) {
   if (index_options & CXIndexOpt_SuppressWarnings)
     CInvok->getDiagnosticOpts().IgnoreWarnings = true;
 
+  // Make sure to use the raw module format.
+  CInvok->getHeaderSearchOpts().ModuleFormat =
+    CXXIdx->getPCHContainerOperations()->getRawReader().getFormat();
+
   ASTUnit *Unit = ASTUnit::create(CInvok.get(), Diags, CaptureDiagnostics,
                                   /*UserFilesAreVolatile=*/true);
-  if (!Unit) {
-    ITUI->result = CXError_InvalidArguments;
-    return;
-  }
+  if (!Unit)
+    return CXError_InvalidArguments;
 
   std::unique_ptr<CXTUOwner> CXTU(
       new CXTUOwner(MakeCXTranslationUnit(CXXIdx, Unit)));
@@ -623,6 +601,7 @@ static void clang_indexSourceFile_Impl(void *UserData) {
   bool Persistent = requestedToGetTU;
   bool OnlyLocalDecls = false;
   bool PrecompilePreamble = false;
+  bool CreatePreambleOnFirstParse = false;
   bool CacheCodeCompletionResults = false;
   PreprocessorOptions &PPOpts = CInvok->getPreprocessorOpts(); 
   PPOpts.AllowPCHWithCompilerErrors = true;
@@ -630,6 +609,8 @@ static void clang_indexSourceFile_Impl(void *UserData) {
   if (requestedToGetTU) {
     OnlyLocalDecls = CXXIdx->getOnlyLocalDecls();
     PrecompilePreamble = TU_options & CXTranslationUnit_PrecompiledPreamble;
+    CreatePreambleOnFirstParse =
+        TU_options & CXTranslationUnit_CreatePreambleOnFirstParse;
     // FIXME: Add a flag for modules.
     CacheCodeCompletionResults
       = TU_options & CXTranslationUnit_CacheCompletionResults;
@@ -642,49 +623,38 @@ static void clang_indexSourceFile_Impl(void *UserData) {
   if (!requestedToGetTU && !CInvok->getLangOpts()->Modules)
     PPOpts.DetailedRecord = false;
 
+  // Unless the user specified that they want the preamble on the first parse
+  // set it up to be created on the first reparse. This makes the first parse
+  // faster, trading for a slower (first) reparse.
+  unsigned PrecompilePreambleAfterNParses =
+      !PrecompilePreamble ? 0 : 2 - CreatePreambleOnFirstParse;
   DiagnosticErrorTrap DiagTrap(*Diags);
   bool Success = ASTUnit::LoadFromCompilerInvocationAction(
       CInvok.get(), CXXIdx->getPCHContainerOperations(), Diags,
       IndexAction.get(), Unit, Persistent, CXXIdx->getClangResourcesPath(),
-      OnlyLocalDecls, CaptureDiagnostics, PrecompilePreamble,
+      OnlyLocalDecls, CaptureDiagnostics, PrecompilePreambleAfterNParses,
       CacheCodeCompletionResults,
       /*IncludeBriefCommentsInCodeCompletion=*/false,
       /*UserFilesAreVolatile=*/true);
   if (DiagTrap.hasErrorOccurred() && CXXIdx->getDisplayDiagnostics())
     printDiagsToStderr(Unit);
 
-  if (isASTReadError(Unit)) {
-    ITUI->result = CXError_ASTReadError;
-    return;
-  }
+  if (isASTReadError(Unit))
+    return CXError_ASTReadError;
 
   if (!Success)
-    return;
+    return CXError_Failure;
 
   if (out_TU)
     *out_TU = CXTU->takeTU();
 
-  ITUI->result = CXError_Success;
+  return CXError_Success;
 }
 
 //===----------------------------------------------------------------------===//
 // clang_indexTranslationUnit Implementation
 //===----------------------------------------------------------------------===//
 
-namespace {
-
-struct IndexTranslationUnitInfo {
-  CXIndexAction idxAction;
-  CXClientData client_data;
-  IndexerCallbacks *index_callbacks;
-  unsigned index_callbacks_size;
-  unsigned index_options;
-  CXTranslationUnit TU;
-  int result;
-};
-
-} // anonymous namespace
-
 static void indexPreprocessingRecord(ASTUnit &Unit, IndexingContext &IdxCtx) {
   Preprocessor &PP = Unit.getPreprocessor();
   if (!PP.getPreprocessingRecord())
@@ -711,9 +681,7 @@ static void indexPreprocessingRecord(ASTUnit &Unit, IndexingContext &IdxCtx) {
 static bool topLevelDeclVisitor(void *context, const Decl *D) {
   IndexingContext &IdxCtx = *static_cast<IndexingContext*>(context);
   IdxCtx.indexTopLevelDecl(D);
-  if (IdxCtx.shouldAbort())
-    return false;
-  return true;
+  return !IdxCtx.shouldAbort();
 }
 
 static void indexTranslationUnit(ASTUnit &Unit, IndexingContext &IdxCtx) {
@@ -728,27 +696,17 @@ static void indexDiagnostics(CXTranslationUnit TU, IndexingContext &IdxCtx) {
   IdxCtx.handleDiagnosticSet(DiagSet);
 }
 
-static void clang_indexTranslationUnit_Impl(void *UserData) {
-  IndexTranslationUnitInfo *ITUI =
-    static_cast<IndexTranslationUnitInfo*>(UserData);
-  CXTranslationUnit TU = ITUI->TU;
-  CXClientData client_data = ITUI->client_data;
-  IndexerCallbacks *client_index_callbacks = ITUI->index_callbacks;
-  unsigned index_callbacks_size = ITUI->index_callbacks_size;
-  unsigned index_options = ITUI->index_options;
-
-  // Set up the initial return value.
-  ITUI->result = CXError_Failure;
-
+static CXErrorCode clang_indexTranslationUnit_Impl(
+    CXIndexAction idxAction, CXClientData client_data,
+    IndexerCallbacks *client_index_callbacks, unsigned index_callbacks_size,
+    unsigned index_options, CXTranslationUnit TU) {
   // Check arguments.
   if (isNotUsableTU(TU)) {
     LOG_BAD_TU(TU);
-    ITUI->result = CXError_InvalidArguments;
-    return;
+    return CXError_InvalidArguments;
   }
   if (!client_index_callbacks || index_callbacks_size == 0) {
-    ITUI->result = CXError_InvalidArguments;
-    return;
+    return CXError_InvalidArguments;
   }
 
   CIndexer *CXXIdx = TU->CIdx;
@@ -777,7 +735,7 @@ static void clang_indexTranslationUnit_Impl(void *UserData) {
 
   ASTUnit *Unit = cxtu::getASTUnit(TU);
   if (!Unit)
-    return;
+    return CXError_Failure;
 
   ASTUnit::ConcurrencyCheck Check(*Unit);
 
@@ -797,7 +755,7 @@ static void clang_indexTranslationUnit_Impl(void *UserData) {
   indexTranslationUnit(*Unit, *IndexCtx);
   indexDiagnostics(TU, *IndexCtx);
 
-  ITUI->result = CXError_Success;
+  return CXError_Success;
 }
 
 //===----------------------------------------------------------------------===//
@@ -959,6 +917,22 @@ int clang_indexSourceFile(CXIndexAction idxAction,
                           unsigned num_unsaved_files,
                           CXTranslationUnit *out_TU,
                           unsigned TU_options) {
+  SmallVector<const char *, 4> Args;
+  Args.push_back("clang");
+  Args.append(command_line_args, command_line_args + num_command_line_args);
+  return clang_indexSourceFileFullArgv(
+      idxAction, client_data, index_callbacks, index_callbacks_size,
+      index_options, source_filename, Args.data(), Args.size(), unsaved_files,
+      num_unsaved_files, out_TU, TU_options);
+}
+
+int clang_indexSourceFileFullArgv(
+    CXIndexAction idxAction, CXClientData client_data,
+    IndexerCallbacks *index_callbacks, unsigned index_callbacks_size,
+    unsigned index_options, const char *source_filename,
+    const char *const *command_line_args, int num_command_line_args,
+    struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files,
+    CXTranslationUnit *out_TU, unsigned TU_options) {
   LOG_FUNC_SECTION {
     *Log << source_filename << ": ";
     for (int i = 0; i != num_command_line_args; ++i)
@@ -969,28 +943,23 @@ int clang_indexSourceFile(CXIndexAction idxAction,
     return CXError_InvalidArguments;
 
   CXErrorCode result = CXError_Failure;
-  IndexSourceFileInfo ITUI = {
-      idxAction,
-      client_data,
-      index_callbacks,
-      index_callbacks_size,
-      index_options,
-      source_filename,
-      command_line_args,
-      num_command_line_args,
-      llvm::makeArrayRef(unsaved_files, num_unsaved_files),
-      out_TU,
-      TU_options,
-      result};
+  auto IndexSourceFileImpl = [=, &result]() {
+    result = clang_indexSourceFile_Impl(
+        idxAction, client_data, index_callbacks, index_callbacks_size,
+        index_options, source_filename, command_line_args,
+        num_command_line_args,
+        llvm::makeArrayRef(unsaved_files, num_unsaved_files), out_TU,
+        TU_options);
+  };
 
   if (getenv("LIBCLANG_NOTHREADS")) {
-    clang_indexSourceFile_Impl(&ITUI);
+    IndexSourceFileImpl();
     return result;
   }
 
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_indexSourceFile_Impl, &ITUI)) {
+  if (!RunSafely(CRC, IndexSourceFileImpl)) {
     fprintf(stderr, "libclang: crash detected during indexing source file: {\n");
     fprintf(stderr, "  'source_filename' : '%s'\n", source_filename);
     fprintf(stderr, "  'command_line_args' : [");
@@ -1030,24 +999,27 @@ int clang_indexTranslationUnit(CXIndexAction idxAction,
     *Log << TU;
   }
 
-  IndexTranslationUnitInfo ITUI = { idxAction, client_data, index_callbacks,
-                                    index_callbacks_size, index_options, TU,
-                                    0 };
+  CXErrorCode result;
+  auto IndexTranslationUnitImpl = [=, &result]() {
+    result = clang_indexTranslationUnit_Impl(
+        idxAction, client_data, index_callbacks, index_callbacks_size,
+        index_options, TU);
+  };
 
   if (getenv("LIBCLANG_NOTHREADS")) {
-    clang_indexTranslationUnit_Impl(&ITUI);
-    return ITUI.result;
+    IndexTranslationUnitImpl();
+    return result;
   }
 
   llvm::CrashRecoveryContext CRC;
 
-  if (!RunSafely(CRC, clang_indexTranslationUnit_Impl, &ITUI)) {
+  if (!RunSafely(CRC, IndexTranslationUnitImpl)) {
     fprintf(stderr, "libclang: crash detected during indexing TU\n");
     
     return 1;
   }
 
-  return ITUI.result;
+  return result;
 }
 
 void clang_indexLoc_getFileLocation(CXIdxLoc location,
diff --git a/tools/libclang/IndexingContext.cpp b/tools/libclang/IndexingContext.cpp
index bf6e172fc074..f7640c63e05c 100644
--- a/tools/libclang/IndexingContext.cpp
+++ b/tools/libclang/IndexingContext.cpp
@@ -804,10 +804,7 @@ bool IndexingContext::markEntityOccurrenceInFile(const NamedDecl *D,
   RefFileOccurrence RefOccur(FE, D);
   std::pair<llvm::DenseSet<RefFileOccurrence>::iterator, bool>
   res = RefFileOccurrences.insert(RefOccur);
-  if (!res.second)
-    return true; // already in map.
-
-  return false;
+  return !res.second; // already in map
 }
 
 const NamedDecl *IndexingContext::getEntityDecl(const NamedDecl *D) const {
diff --git a/tools/libclang/libclang.exports b/tools/libclang/libclang.exports
index f6a7175579aa..993644d02fc1 100644
--- a/tools/libclang/libclang.exports
+++ b/tools/libclang/libclang.exports
@@ -2,6 +2,7 @@ clang_CXCursorSet_contains
 clang_CXCursorSet_insert
 clang_CXIndex_getGlobalOptions
 clang_CXIndex_setGlobalOptions
+clang_CXXField_isMutable
 clang_CXXMethod_isConst
 clang_CXXMethod_isPureVirtual
 clang_CXXMethod_isStatic
@@ -14,6 +15,7 @@ clang_Cursor_getTemplateArgumentValue
 clang_Cursor_getTemplateArgumentUnsignedValue
 clang_Cursor_getBriefCommentText
 clang_Cursor_getCommentRange
+clang_Cursor_getCXXManglings
 clang_Cursor_getMangling
 clang_Cursor_getParsedComment
 clang_Cursor_getRawCommentText
@@ -119,6 +121,7 @@ clang_disposeOverriddenCursors
 clang_disposeCXPlatformAvailability
 clang_disposeSourceRangeList
 clang_disposeString
+clang_disposeStringSet
 clang_disposeTokens
 clang_disposeTranslationUnit
 clang_enableStackTraces
@@ -173,6 +176,7 @@ clang_getCursorSemanticParent
 clang_getCursorSpelling
 clang_getCursorType
 clang_getCursorUSR
+clang_getCursorVisibility
 clang_getDeclObjCTypeEncoding
 clang_getDefinitionSpellingAndExtent
 clang_getDiagnostic
@@ -246,6 +250,7 @@ clang_hashCursor
 clang_indexLoc_getCXSourceLocation
 clang_indexLoc_getFileLocation
 clang_indexSourceFile
+clang_indexSourceFileFullArgv
 clang_indexTranslationUnit
 clang_index_getCXXClassDeclInfo
 clang_index_getClientContainer
@@ -281,6 +286,7 @@ clang_Location_isInSystemHeader
 clang_Location_isFromMainFile
 clang_parseTranslationUnit
 clang_parseTranslationUnit2
+clang_parseTranslationUnit2FullArgv
 clang_remap_dispose
 clang_remap_getFilenames
 clang_remap_getNumFiles
@@ -297,6 +303,9 @@ clang_CompileCommands_dispose
 clang_CompileCommands_getSize
 clang_CompileCommands_getCommand
 clang_CompileCommand_getDirectory
+clang_CompileCommand_getFilename
+clang_CompileCommand_getMappedSourceContent
+clang_CompileCommand_getMappedSourcePath
 clang_CompileCommand_getNumArgs
 clang_CompileCommand_getArg
 clang_visitChildren
diff --git a/tools/scan-build/CMakeLists.txt b/tools/scan-build/CMakeLists.txt
new file mode 100644
index 000000000000..78c243d8e0d6
--- /dev/null
+++ b/tools/scan-build/CMakeLists.txt
@@ -0,0 +1,82 @@
+option(CLANG_INSTALL_SCANBUILD "Install the scan-build tool" ON)
+
+include(GNUInstallDirs)
+
+if (WIN32 AND NOT CYGWIN)
+  set(BinFiles
+        scan-build.bat)
+  set(LibexecFiles
+        ccc-analyzer.bat
+        c++-analyzer.bat)
+else()
+  set(BinFiles
+        scan-build)
+  set(LibexecFiles
+        ccc-analyzer
+        c++-analyzer)
+  if (APPLE)
+    list(APPEND BinFiles
+           set-xcode-analyzer)
+  endif()
+endif()
+
+set(ManPages
+      scan-build.1)
+
+set(ShareFiles
+      scanview.css
+      sorttable.js)
+
+
+if(CLANG_INSTALL_SCANBUILD)
+  foreach(BinFile ${BinFiles})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/${BinFile}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/bin
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}
+                         ${CMAKE_BINARY_DIR}/bin/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile})
+    install(PROGRAMS bin/${BinFile} DESTINATION bin)
+  endforeach()
+
+  foreach(LibexecFile ${LibexecFiles})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/libexec/${LibexecFile}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/libexec
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${LibexecFile}
+                         ${CMAKE_BINARY_DIR}/libexec/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${LibexecFile})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${LibexecFile})
+    install(PROGRAMS libexec/${LibexecFile} DESTINATION libexec)
+  endforeach()
+
+  foreach(ManPage ${ManPages})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_MANDIR}/man1/${ManPage}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_MANDIR}/man1
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/man/${ManPage}
+                         ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_MANDIR}/man1/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/man/${ManPage})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_MANDIR}/man1/${ManPage})
+    install(PROGRAMS man/${ManPage} DESTINATION ${CMAKE_INSTALL_MANDIR}/man1)
+  endforeach()
+
+  foreach(ShareFile ${ShareFiles})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/share/scan-build/${ShareFile}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/share/scan-build
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/share/scan-build/${ShareFile}
+                         ${CMAKE_BINARY_DIR}/share/scan-build/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/scan-build/${ShareFile})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-build/${ShareFile})
+    install(FILES share/scan-build/${ShareFile} DESTINATION share/scan-build)
+  endforeach()
+
+  add_custom_target(scan-build ALL DEPENDS ${Depends})
+  set_target_properties(scan-build PROPERTIES FOLDER "Misc")
+endif()
diff --git a/tools/scan-build/Makefile b/tools/scan-build/Makefile
new file mode 100644
index 000000000000..23aa19882a52
--- /dev/null
+++ b/tools/scan-build/Makefile
@@ -0,0 +1,53 @@
+##===- tools/scan-build/Makefile ---------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL := ../..
+
+include $(CLANG_LEVEL)/../../Makefile.config
+include $(CLANG_LEVEL)/Makefile
+
+ifeq ($(HOST_OS),MingW)
+  Suffix := .bat
+endif
+
+CLANG_INSTALL_SCANBUILD ?= 1
+
+ifeq ($(CLANG_INSTALL_SCANBUILD), 1)
+  InstallTargets := $(ToolDir)/scan-build$(Suffix) \
+                    $(LibexecDir)/c++-analyzer$(Suffix) \
+                    $(LibexecDir)/ccc-analyzer$(Suffix) \
+                    $(ShareDir)/scan-build/scanview.css \
+                    $(ShareDir)/scan-build/sorttable.js \
+                    $(ShareDir)/man/man1/scan-build.1
+
+  ifeq ($(HOST_OS),Darwin)
+    InstallTargets := $(InstallTargets) $(ToolDir)/set-xcode-analyzer
+  endif
+endif
+
+all:: $(InstallTargets)
+
+$(ToolDir)/%: bin/% Makefile $(ToolDir)/.dir
+	$(Echo) "Copying $(notdir $<) to the 'bin' directory..."
+	$(Verb)cp $< $@
+	$(Verb)chmod +x $@
+
+$(LibexecDir)/%: libexec/% Makefile $(LibexecDir)/.dir
+	$(Echo) "Copying $(notdir $<) to the 'libexec' directory..."
+	$(Verb)cp $< $@
+	$(Verb)chmod +x $@
+
+$(ShareDir)/man/man1/%: man/% Makefile $(ShareDir)/man/man1/.dir
+	$(Echo) "Copying $(notdir $<) to the 'man' directory..."
+	$(Verb)cp $< $@
+
+$(ShareDir)/scan-build/%: share/scan-build/% Makefile $(ShareDir)/scan-build/.dir
+	$(Echo) "Copying $(notdir $<) to the 'share' directory..."
+	$(Verb)cp $< $@
+
diff --git a/tools/scan-build/scan-build b/tools/scan-build/bin/scan-build
similarity index 73%
rename from tools/scan-build/scan-build
rename to tools/scan-build/bin/scan-build
index d7cadc3e59e1..6a14484970a2 100755
--- a/tools/scan-build/scan-build
+++ b/tools/scan-build/bin/scan-build
@@ -24,8 +24,8 @@ use Term::ANSIColor;
 use Term::ANSIColor qw(:constants);
 use Cwd qw/ getcwd abs_path /;
 use Sys::Hostname;
+use Hash::Util qw(lock_keys);
 
-my $Verbose = 0;       # Verbose output from this script.
 my $Prog = "scan-build";
 my $BuildName;
 my $BuildDate;
@@ -39,15 +39,40 @@ my $UseColor = (defined $TERM and $TERM =~ 'xterm-.*color' and -t STDOUT
 my $UserName = HtmlEscape(getlogin() || getpwuid($<) || 'unknown');
 my $HostName = HtmlEscape(hostname() || 'unknown');
 my $CurrentDir = HtmlEscape(getcwd());
-my $CurrentDirSuffix = basename($CurrentDir);
 
-my @PluginsToLoad;
 my $CmdArgs;
 
-my $HtmlTitle;
-
 my $Date = localtime();
 
+# Command-line/config arguments.
+my %Options = (
+  Verbose => 0,              # Verbose output from this script.
+  AnalyzeHeaders => 0,
+  OutputDir => undef,        # Parent directory to store HTML files.
+  HtmlTitle => basename($CurrentDir)." - scan-build results",
+  IgnoreErrors => 0,         # Ignore build errors.
+  ViewResults => 0,          # View results when the build terminates.
+  ExitStatusFoundBugs => 0,  # Exit status reflects whether bugs were found
+  KeepEmpty => 0,            # Don't remove output directory even with 0 results.
+  EnableCheckers => {},
+  DisableCheckers => {},
+  UseCC => undef,            # C compiler to use for compilation.
+  UseCXX => undef,           # C++ compiler to use for compilation.
+  AnalyzerTarget => undef,
+  StoreModel => undef,
+  ConstraintsModel => undef,
+  InternalStats => undef,
+  OutputFormat => "html",
+  ConfigOptions => [],       # Options to pass through to the analyzer's -analyzer-config flag.
+  ReportFailures => undef,
+  AnalyzerStats => 0,
+  MaxLoop => 0,
+  PluginsToLoad => [],
+  AnalyzerDiscoveryMethod => undef,
+  OverrideCompiler => 0      # The flag corresponding to the --override-compiler command line option.
+);
+lock_keys(%Options);
+
 ##----------------------------------------------------------------------------##
 # Diagnostics
 ##----------------------------------------------------------------------------##
@@ -231,7 +256,7 @@ sub SetHtmlEnv {
     return;
   }
 
-  if ($Verbose) {
+  if ($Options{Verbose}) {
     Diag("Emitting reports for this run to '$Dir'.\n");
   }
 
@@ -437,7 +462,7 @@ sub CopyFiles {
 
   my $Dir = shift;
 
-  my $JS = Cwd::realpath("$RealBin/sorttable.js");
+  my $JS = Cwd::realpath("$RealBin/../share/scan-build/sorttable.js");
 
   DieDiag("Cannot find 'sorttable.js'.\n")
     if (! -r $JS);
@@ -447,7 +472,7 @@ sub CopyFiles {
   DieDiag("Could not copy 'sorttable.js' to '$Dir'.\n")
     if (! -r "$Dir/sorttable.js");
 
-  my $CSS = Cwd::realpath("$RealBin/scanview.css");
+  my $CSS = Cwd::realpath("$RealBin/../share/scan-build/scanview.css");
 
   DieDiag("Cannot find 'scanview.css'.\n")
     if (! -r $CSS);
@@ -582,7 +607,7 @@ sub Postprocess {
 print OUT <<ENDTEXT;
 <html>
 <head>
-<title>${HtmlTitle}</title>
+<title>${Options{HtmlTitle}}</title>
 <link type="text/css" rel="stylesheet" href="scanview.css"/>
 <script src="sorttable.js"></script>
 <script language='javascript' type="text/javascript">
@@ -637,7 +662,7 @@ function ToggleDisplay(CheckButton, ClassName) {
 <!-- SUMMARYENDHEAD -->
 </head>
 <body>
-<h1>${HtmlTitle}</h1>
+<h1>${Options{HtmlTitle}}</h1>
 
 <table>
 <tr><th>User:</th><td>${UserName}\@${HostName}</td></tr>
@@ -912,21 +937,25 @@ sub AddIfNotPresent {
 }
 
 sub SetEnv {
-  my $Options = shift @_;
-  foreach my $opt ('CC', 'CXX', 'CLANG', 'CLANG_CXX',
-                    'CCC_ANALYZER_ANALYSIS', 'CCC_ANALYZER_PLUGINS',
-                    'CCC_ANALYZER_CONFIG') {
-    die "$opt is undefined\n" if (!defined $opt);
-    $ENV{$opt} = $Options->{$opt};
+  my $EnvVars = shift @_;
+  foreach my $var ('CC', 'CXX', 'CLANG', 'CLANG_CXX',
+                   'CCC_ANALYZER_ANALYSIS', 'CCC_ANALYZER_PLUGINS',
+                   'CCC_ANALYZER_CONFIG') {
+    die "$var is undefined\n" if (!defined $var);
+    $ENV{$var} = $EnvVars->{$var};
   }
-  foreach my $opt ('CCC_ANALYZER_STORE_MODEL',
-                    'CCC_ANALYZER_PLUGINS',
-                    'CCC_ANALYZER_INTERNAL_STATS',
-                    'CCC_ANALYZER_OUTPUT_FORMAT') {
-    my $x = $Options->{$opt};
-    if (defined $x) { $ENV{$opt} = $x }
+  foreach my $var ('CCC_ANALYZER_STORE_MODEL',
+                   'CCC_ANALYZER_CONSTRAINTS_MODEL',
+                   'CCC_ANALYZER_INTERNAL_STATS',
+                   'CCC_ANALYZER_OUTPUT_FORMAT',
+                   'CCC_CC',
+                   'CCC_CXX',
+                   'CCC_REPORT_FAILURES',
+                   'CLANG_ANALYZER_TARGET') {
+    my $x = $EnvVars->{$var};
+    if (defined $x) { $ENV{$var} = $x }
   }
-  my $Verbose = $Options->{'VERBOSE'};
+  my $Verbose = $EnvVars->{'VERBOSE'};
   if ($Verbose >= 2) {
     $ENV{'CCC_ANALYZER_VERBOSE'} = 1;
   }
@@ -935,15 +964,12 @@ sub SetEnv {
   }
 }
 
-# The flag corresponding to the --override-compiler command line option.
-my $OverrideCompiler = 0;
-
 sub RunXcodebuild {
   my $Args = shift;
   my $IgnoreErrors = shift;
   my $CCAnalyzer = shift;
   my $CXXAnalyzer = shift;
-  my $Options = shift;
+  my $EnvVars = shift;
 
   if ($IgnoreErrors) {
     AddIfNotPresent($Args,"-PBXBuildsContinueAfterErrors=YES");
@@ -972,14 +998,14 @@ sub RunXcodebuild {
 
   # If --override-compiler is explicitely requested, resort to the old
   # behavior regardless of Xcode version.
-  if ($OverrideCompiler) {
+  if ($Options{OverrideCompiler}) {
     $oldBehavior = 1;
   }
 
   if ($oldBehavior == 0) {
-    my $OutputDir = $Options->{"OUTPUT_DIR"};
-    my $CLANG = $Options->{"CLANG"};
-    my $OtherFlags = $Options->{"CCC_ANALYZER_ANALYSIS"};
+    my $OutputDir = $EnvVars->{"OUTPUT_DIR"};
+    my $CLANG = $EnvVars->{"CLANG"};
+    my $OtherFlags = $EnvVars->{"CCC_ANALYZER_ANALYSIS"};
     push @$Args,
         "RUN_CLANG_STATIC_ANALYZER=YES",
         "CLANG_ANALYZER_OUTPUT=plist-html",
@@ -991,7 +1017,7 @@ sub RunXcodebuild {
   }
 
   # Default to old behavior where we insert a bogus compiler.
-  SetEnv($Options);
+  SetEnv($EnvVars);
 
   # Check if using iPhone SDK 3.0 (simulator).  If so the compiler being
   # used should be gcc-4.2.
@@ -1023,14 +1049,14 @@ sub RunBuildCommand {
   my $Cmd = $Args->[0];
   my $CCAnalyzer = shift;
   my $CXXAnalyzer = shift;
-  my $Options = shift;
+  my $EnvVars = shift;
 
   if ($Cmd =~ /\bxcodebuild$/) {
-    return RunXcodebuild($Args, $IgnoreErrors, $CCAnalyzer, $CXXAnalyzer, $Options);
+    return RunXcodebuild($Args, $IgnoreErrors, $CCAnalyzer, $CXXAnalyzer, $EnvVars);
   }
 
   # Setup the environment.
-  SetEnv($Options);
+  SetEnv($EnvVars);
 
   if ($Cmd =~ /(.*\/?gcc[^\/]*$)/ or
       $Cmd =~ /(.*\/?cc[^\/]*$)/ or
@@ -1074,6 +1100,7 @@ sub RunBuildCommand {
 
 sub DisplayHelp {
 
+  my $ArgClangNotFoundErrMsg = shift;
 print <<ENDTEXT;
 USAGE: $Prog [options] <build command> [build options]
 
@@ -1145,10 +1172,21 @@ OPTIONS:
    scan-build to use a specific compiler for *compilation* then you can use
    this option to specify a path to that compiler.
 
+   If the given compiler is a cross compiler, you may also need to provide
+   --analyzer-target option to properly analyze the source code because static
+   analyzer runs as if the code is compiled for the host machine by default.
+
  --use-c++ [compiler path]
  --use-c++=[compiler path]
 
-   This is the same as "-use-cc" but for C++ code.
+   This is the same as "--use-cc" but for C++ code.
+
+ --analyzer-target [target triple name for analysis]
+ --analyzer-target=[target triple name for analysis]
+
+   This provides target triple information to clang static analyzer.
+   It only changes the target for analysis but doesn't change the target of a
+   real compiler given by --use-cc and --use-c++ options.
 
  -v
 
@@ -1221,32 +1259,41 @@ LOADING CHECKERS:
  -load-plugin [plugin library]
 ENDTEXT
 
-  # Query clang for list of checkers that are enabled.
+  if (defined $Clang && -x $Clang) {
+    # Query clang for list of checkers that are enabled.
 
-  # create a list to load the plugins via the 'Xclang' command line
-  # argument
-  my @PluginLoadCommandline_xclang;
-  foreach my $param ( @PluginsToLoad ) {
-    push ( @PluginLoadCommandline_xclang, "-Xclang" );
-    push ( @PluginLoadCommandline_xclang, $param );
-  }
-  my %EnabledCheckers;
-  foreach my $lang ("c", "objective-c", "objective-c++", "c++") {
-    my $ExecLine = join(' ', qq/"$Clang"/, @PluginLoadCommandline_xclang, "--analyze", "-x", $lang, "-", "-###", "2>&1", "|");
-    open(PS, $ExecLine);
-    while (<PS>) {
-      foreach my $val (split /\s+/) {
-        $val =~ s/\"//g;
-        if ($val =~ /-analyzer-checker\=([^\s]+)/) {
-          $EnabledCheckers{$1} = 1;
+    # create a list to load the plugins via the 'Xclang' command line
+    # argument
+    my @PluginLoadCommandline_xclang;
+    foreach my $param ( @{$Options{PluginsToLoad}} ) {
+      push ( @PluginLoadCommandline_xclang, "-Xclang" );
+      push ( @PluginLoadCommandline_xclang, "-load" );
+      push ( @PluginLoadCommandline_xclang, "-Xclang" );
+      push ( @PluginLoadCommandline_xclang, $param );
+    }
+
+    my %EnabledCheckers;
+    foreach my $lang ("c", "objective-c", "objective-c++", "c++") {
+      my $ExecLine = join(' ', qq/"$Clang"/, @PluginLoadCommandline_xclang, "--analyze", "-x", $lang, "-", "-###", "2>&1", "|");
+      open(PS, $ExecLine);
+      while (<PS>) {
+        foreach my $val (split /\s+/) {
+          $val =~ s/\"//g;
+          if ($val =~ /-analyzer-checker\=([^\s]+)/) {
+            $EnabledCheckers{$1} = 1;
+          }
         }
       }
     }
-  }
 
-  # Query clang for complete list of checkers.
-  if (defined $Clang && -x $Clang) {
-    my $ExecLine = join(' ', qq/"$Clang"/, "-cc1", @PluginsToLoad, "-analyzer-checker-help", "2>&1", "|");
+    # Query clang for complete list of checkers.
+    my @PluginLoadCommandline;
+    foreach my $param ( @{$Options{PluginsToLoad}} ) {
+      push ( @PluginLoadCommandline, "-load" );
+      push ( @PluginLoadCommandline, $param );
+    }
+
+    my $ExecLine = join(' ', qq/"$Clang"/, "-cc1", @PluginLoadCommandline, "-analyzer-checker-help", "2>&1", "|");
     open(PS, $ExecLine);
     my $foundCheckers = 0;
     while (<PS>) {
@@ -1302,6 +1349,12 @@ ENDTEXT
     }
     close PS;
   }
+  else {
+    print "  *** Could not query Clang for the list of available checkers.\n";
+    if (defined  $ArgClangNotFoundErrMsg) {
+      print "  *** Reason: $ArgClangNotFoundErrMsg\n";
+    }
+  }
 
 print <<ENDTEXT
 
@@ -1346,285 +1399,332 @@ sub ShellEscape {
   return $arg;
 }
 
+##----------------------------------------------------------------------------##
+# FindClang - searches for 'clang' executable.
+##----------------------------------------------------------------------------##
+
+sub FindClang {
+  if (!defined $Options{AnalyzerDiscoveryMethod}) {
+    $Clang = Cwd::realpath("$RealBin/bin/clang") if (-f "$RealBin/bin/clang");
+    if (!defined $Clang || ! -x $Clang) {
+      $Clang = Cwd::realpath("$RealBin/clang") if (-f "$RealBin/clang");
+    }
+    if (!defined $Clang || ! -x $Clang) {
+      return "error: Cannot find an executable 'clang' relative to" .
+             " scan-build. Consider using --use-analyzer to pick a version of" .
+             " 'clang' to use for static analysis.\n";
+    }
+  }
+  else {
+    if ($Options{AnalyzerDiscoveryMethod} =~ /^[Xx]code$/) {
+      my $xcrun = `which xcrun`;
+      chomp $xcrun;
+      if ($xcrun eq "") {
+        return "Cannot find 'xcrun' to find 'clang' for analysis.\n";
+      }
+      $Clang = `$xcrun -toolchain XcodeDefault -find clang`;
+      chomp $Clang;
+      if ($Clang eq "") {
+        return "No 'clang' executable found by 'xcrun'\n";
+      }
+    }
+    else {
+      $Clang = $Options{AnalyzerDiscoveryMethod};
+      if (!defined $Clang or not -x $Clang) {
+        return "Cannot find an executable clang at '$Options{AnalyzerDiscoveryMethod}'\n";
+      }
+    }
+  }
+  return undef;
+}
+
 ##----------------------------------------------------------------------------##
 # Process command-line arguments.
 ##----------------------------------------------------------------------------##
 
-my $AnalyzeHeaders = 0;
-my $HtmlDir;           # Parent directory to store HTML files.
-my $IgnoreErrors = 0;  # Ignore build errors.
-my $ViewResults  = 0;  # View results when the build terminates.
-my $ExitStatusFoundBugs = 0; # Exit status reflects whether bugs were found
-my $KeepEmpty    = 0;  # Don't remove output directory even with 0 results.
-my @AnalysesToRun;
-my $StoreModel;
-my $ConstraintsModel;
-my $InternalStats;
-my @ConfigOptions;
-my $OutputFormat = "html";
-my $AnalyzerStats = 0;
-my $MaxLoop = 0;
 my $RequestDisplayHelp = 0;
 my $ForceDisplayHelp = 0;
-my $AnalyzerDiscoveryMethod;
+
+sub ProcessArgs {
+  my $Args = shift;
+  my $NumArgs = 0;
+
+  while (@$Args) {
+
+    $NumArgs++;
+
+    # Scan for options we recognize.
+
+    my $arg = $Args->[0];
+
+    if ($arg eq "-h" or $arg eq "--help") {
+      $RequestDisplayHelp = 1;
+      shift @$Args;
+      next;
+    }
+
+    if ($arg eq '-analyze-headers') {
+      shift @$Args;
+      $Options{AnalyzeHeaders} = 1;
+      next;
+    }
+
+    if ($arg eq "-o") {
+      shift @$Args;
+
+      if (!@$Args) {
+        DieDiag("'-o' option requires a target directory name.\n");
+      }
+
+      # Construct an absolute path.  Uses the current working directory
+      # as a base if the original path was not absolute.
+      my $OutDir = shift @$Args;
+      mkpath($OutDir) unless (-e $OutDir);  # abs_path wants existing dir
+      $Options{OutputDir} = abs_path($OutDir);
+
+      next;
+    }
+
+    if ($arg =~ /^--html-title(=(.+))?$/) {
+      shift @$Args;
+
+      if (!defined $2 || $2 eq '') {
+        if (!@$Args) {
+          DieDiag("'--html-title' option requires a string.\n");
+        }
+
+        $Options{HtmlTitle} = shift @$Args;
+      } else {
+        $Options{HtmlTitle} = $2;
+      }
+
+      next;
+    }
+
+    if ($arg eq "-k" or $arg eq "--keep-going") {
+      shift @$Args;
+      $Options{IgnoreErrors} = 1;
+      next;
+    }
+
+    if ($arg =~ /^--use-cc(=(.+))?$/) {
+      shift @$Args;
+      my $cc;
+
+      if (!defined $2 || $2 eq "") {
+        if (!@$Args) {
+          DieDiag("'--use-cc' option requires a compiler executable name.\n");
+        }
+        $cc = shift @$Args;
+      }
+      else {
+        $cc = $2;
+      }
+
+      $Options{UseCC} = $cc;
+      next;
+    }
+
+    if ($arg =~ /^--use-c\+\+(=(.+))?$/) {
+      shift @$Args;
+      my $cxx;
+
+      if (!defined $2 || $2 eq "") {
+        if (!@$Args) {
+          DieDiag("'--use-c++' option requires a compiler executable name.\n");
+        }
+        $cxx = shift @$Args;
+      }
+      else {
+        $cxx = $2;
+      }
+
+      $Options{UseCXX} = $cxx;
+      next;
+    }
+
+    if ($arg =~ /^--analyzer-target(=(.+))?$/) {
+      shift @ARGV;
+      my $AnalyzerTarget;
+
+      if (!defined $2 || $2 eq "") {
+        if (!@ARGV) {
+          DieDiag("'--analyzer-target' option requires a target triple name.\n");
+        }
+        $AnalyzerTarget = shift @ARGV;
+      }
+      else {
+        $AnalyzerTarget = $2;
+      }
+
+      $Options{AnalyzerTarget} = $AnalyzerTarget;
+      next;
+    }
+
+    if ($arg eq "-v") {
+      shift @$Args;
+      $Options{Verbose}++;
+      next;
+    }
+
+    if ($arg eq "-V" or $arg eq "--view") {
+      shift @$Args;
+      $Options{ViewResults} = 1;
+      next;
+    }
+
+    if ($arg eq "--status-bugs") {
+      shift @$Args;
+      $Options{ExitStatusFoundBugs} = 1;
+      next;
+    }
+
+    if ($arg eq "-store") {
+      shift @$Args;
+      $Options{StoreModel} = shift @$Args;
+      next;
+    }
+
+    if ($arg eq "-constraints") {
+      shift @$Args;
+      $Options{ConstraintsModel} = shift @$Args;
+      next;
+    }
+
+    if ($arg eq "-internal-stats") {
+      shift @$Args;
+      $Options{InternalStats} = 1;
+      next;
+    }
+
+    if ($arg eq "-plist") {
+      shift @$Args;
+      $Options{OutputFormat} = "plist";
+      next;
+    }
+
+    if ($arg eq "-plist-html") {
+      shift @$Args;
+      $Options{OutputFormat} = "plist-html";
+      next;
+    }
+
+    if ($arg eq "-analyzer-config") {
+      shift @$Args;
+      push @{$Options{ConfigOptions}}, shift @$Args;
+      next;
+    }
+
+    if ($arg eq "-no-failure-reports") {
+      shift @$Args;
+      $Options{ReportFailures} = 0;
+      next;
+    }
+
+    if ($arg eq "-stats") {
+      shift @$Args;
+      $Options{AnalyzerStats} = 1;
+      next;
+    }
+
+    if ($arg eq "-maxloop") {
+      shift @$Args;
+      $Options{MaxLoop} = shift @$Args;
+      next;
+    }
+
+    if ($arg eq "-enable-checker") {
+      shift @$Args;
+      my $Checker = shift @$Args;
+      # Store $NumArgs to preserve the order the checkers were enabled.
+      $Options{EnableCheckers}{$Checker} = $NumArgs;
+      delete $Options{DisableCheckers}{$Checker};
+      next;
+    }
+
+    if ($arg eq "-disable-checker") {
+      shift @$Args;
+      my $Checker = shift @$Args;
+      # Store $NumArgs to preserve the order the checkers were disabled.
+      $Options{DisableCheckers}{$Checker} = $NumArgs;
+      delete $Options{EnableCheckers}{$Checker};
+      next;
+    }
+
+    if ($arg eq "-load-plugin") {
+      shift @$Args;
+      push @{$Options{PluginsToLoad}}, shift @$Args;
+      next;
+    }
+
+    if ($arg eq "--use-analyzer") {
+      shift @$Args;
+      $Options{AnalyzerDiscoveryMethod} = shift @$Args;
+      next;
+    }
+
+    if ($arg =~ /^--use-analyzer=(.+)$/) {
+      shift @$Args;
+      $Options{AnalyzerDiscoveryMethod} = $1;
+      next;
+    }
+
+    if ($arg eq "--keep-empty") {
+      shift @$Args;
+      $Options{KeepEmpty} = 1;
+      next;
+    }
+
+    if ($arg eq "--override-compiler") {
+      shift @$Args;
+      $Options{OverrideCompiler} = 1;
+      next;
+    }
+
+    DieDiag("unrecognized option '$arg'\n") if ($arg =~ /^-/);
+
+    $NumArgs--;
+    last;
+  }
+  return $NumArgs;
+}
 
 if (!@ARGV) {
   $ForceDisplayHelp = 1
 }
 
-while (@ARGV) {
-
-  # Scan for options we recognize.
-
-  my $arg = $ARGV[0];
-
-  if ($arg eq "-h" or $arg eq "--help") {
-    $RequestDisplayHelp = 1;
-    shift @ARGV;
-    next;
-  }
-
-  if ($arg eq '-analyze-headers') {
-    shift @ARGV;
-    $AnalyzeHeaders = 1;
-    next;
-  }
-
-  if ($arg eq "-o") {
-    shift @ARGV;
-
-    if (!@ARGV) {
-      DieDiag("'-o' option requires a target directory name.\n");
-    }
-
-    # Construct an absolute path.  Uses the current working directory
-    # as a base if the original path was not absolute.
-    $HtmlDir = abs_path(shift @ARGV);
-
-    next;
-  }
-
-  if ($arg =~ /^--html-title(=(.+))?$/) {
-    shift @ARGV;
-
-    if (!defined $2 || $2 eq '') {
-      if (!@ARGV) {
-        DieDiag("'--html-title' option requires a string.\n");
-      }
-
-      $HtmlTitle = shift @ARGV;
-    } else {
-      $HtmlTitle = $2;
-    }
-
-    next;
-  }
-
-  if ($arg eq "-k" or $arg eq "--keep-going") {
-    shift @ARGV;
-    $IgnoreErrors = 1;
-    next;
-  }
-
-  if ($arg =~ /^--use-cc(=(.+))?$/) {
-    shift @ARGV;
-    my $cc;
-
-    if (!defined $2 || $2 eq "") {
-      if (!@ARGV) {
-        DieDiag("'--use-cc' option requires a compiler executable name.\n");
-      }
-      $cc = shift @ARGV;
-    }
-    else {
-      $cc = $2;
-    }
-
-    $ENV{"CCC_CC"} = $cc;
-    next;
-  }
-
-  if ($arg =~ /^--use-c\+\+(=(.+))?$/) {
-    shift @ARGV;
-    my $cxx;
-
-    if (!defined $2 || $2 eq "") {
-      if (!@ARGV) {
-        DieDiag("'--use-c++' option requires a compiler executable name.\n");
-      }
-      $cxx = shift @ARGV;
-    }
-    else {
-      $cxx = $2;
-    }
-
-    $ENV{"CCC_CXX"} = $cxx;
-    next;
-  }
-
-  if ($arg eq "-v") {
-    shift @ARGV;
-    $Verbose++;
-    next;
-  }
-
-  if ($arg eq "-V" or $arg eq "--view") {
-    shift @ARGV;
-    $ViewResults = 1;
-    next;
-  }
-
-  if ($arg eq "--status-bugs") {
-    shift @ARGV;
-    $ExitStatusFoundBugs = 1;
-    next;
-  }
-
-  if ($arg eq "-store") {
-    shift @ARGV;
-    $StoreModel = shift @ARGV;
-    next;
-  }
-
-  if ($arg eq "-constraints") {
-    shift @ARGV;
-    $ConstraintsModel = shift @ARGV;
-    next;
-  }
-
-  if ($arg eq "-internal-stats") {
-    shift @ARGV;
-    $InternalStats = 1;
-    next;
-  }
-
-  if ($arg eq "-plist") {
-    shift @ARGV;
-    $OutputFormat = "plist";
-    next;
-  }
-  if ($arg eq "-plist-html") {
-    shift @ARGV;
-    $OutputFormat = "plist-html";
-    next;
-  }
-
-  if ($arg eq "-analyzer-config") {
-    shift @ARGV;
-    push @ConfigOptions, "-analyzer-config", shift @ARGV;
-    next;
-  }
-
-  if ($arg eq "-no-failure-reports") {
-    shift @ARGV;
-    $ENV{"CCC_REPORT_FAILURES"} = 0;
-    next;
-  }
-  if ($arg eq "-stats") {
-    shift @ARGV;
-    $AnalyzerStats = 1;
-    next;
-  }
-  if ($arg eq "-maxloop") {
-    shift @ARGV;
-    $MaxLoop = shift @ARGV;
-    next;
-  }
-  if ($arg eq "-enable-checker") {
-    shift @ARGV;
-    push @AnalysesToRun, "-analyzer-checker", shift @ARGV;
-    next;
-  }
-  if ($arg eq "-disable-checker") {
-    shift @ARGV;
-    push @AnalysesToRun, "-analyzer-disable-checker", shift @ARGV;
-    next;
-  }
-  if ($arg eq "-load-plugin") {
-    shift @ARGV;
-    push @PluginsToLoad, "-load", shift @ARGV;
-    next;
-  }
-  if ($arg eq "--use-analyzer") {
-     shift @ARGV;
-      $AnalyzerDiscoveryMethod = shift @ARGV;
-    next;
-  }
-  if ($arg =~ /^--use-analyzer=(.+)$/) {
-    shift @ARGV;
-    $AnalyzerDiscoveryMethod = $1;
-    next;
-  }
-  if ($arg eq "--keep-empty") {
-    shift @ARGV;
-    $KeepEmpty = 1;
-    next;
-  }
-
-  if ($arg eq "--override-compiler") {
-    shift @ARGV;
-    $OverrideCompiler = 1;
-    next;
-  }
-
-  DieDiag("unrecognized option '$arg'\n") if ($arg =~ /^-/);
-
-  last;
-}
+ProcessArgs(\@ARGV);
+# All arguments are now shifted from @ARGV. The rest is a build command, if any.
 
 if (!@ARGV and !$RequestDisplayHelp) {
   ErrorDiag("No build command specified.\n\n");
   $ForceDisplayHelp = 1;
 }
 
-# Find 'clang'
-if (!defined $AnalyzerDiscoveryMethod) {
-  $Clang = Cwd::realpath("$RealBin/bin/clang");
-  if (!defined $Clang || ! -x $Clang) {
-    $Clang = Cwd::realpath("$RealBin/clang");
-  }
-  if (!defined $Clang || ! -x $Clang) {
-    if (!$RequestDisplayHelp && !$ForceDisplayHelp) {
-      DieDiag("error: Cannot find an executable 'clang' relative to scan-build." .
-                 "  Consider using --use-analyzer to pick a version of 'clang' to use for static analysis.\n");
-    }
-  }
-}
-else {
-  if ($AnalyzerDiscoveryMethod =~ /^[Xx]code$/) {
-    my $xcrun = `which xcrun`;
-    chomp $xcrun;
-    if ($xcrun eq "") {
-        DieDiag("Cannot find 'xcrun' to find 'clang' for analysis.\n");
-    }
-    $Clang = `$xcrun -toolchain XcodeDefault -find clang`;
-    chomp $Clang;
-    if ($Clang eq "") {
-      DieDiag("No 'clang' executable found by 'xcrun'\n");
-    }
-  }
-  else {
-    $Clang = $AnalyzerDiscoveryMethod;
-    if (!defined $Clang or not -x $Clang) {
-         DieDiag("Cannot find an executable clang at '$AnalyzerDiscoveryMethod'\n");
-    }
-  }
-}
+my $ClangNotFoundErrMsg = FindClang();
 
 if ($ForceDisplayHelp || $RequestDisplayHelp) {
-  DisplayHelp();
+  DisplayHelp($ClangNotFoundErrMsg);
   exit $ForceDisplayHelp;
 }
 
+DieDiag($ClangNotFoundErrMsg) if (defined $ClangNotFoundErrMsg);
+
 $ClangCXX = $Clang;
-# Determine operating system under which this copy of Perl was built.
-my $IsWinBuild = ($^O =~/msys|cygwin|MSWin32/);
-if($IsWinBuild) {
-  $ClangCXX =~ s/.exe$/++.exe/;
-}
-else {
-  $ClangCXX =~ s/\-\d+\.\d+$//;
-  $ClangCXX .= "++";
+if ($Clang !~ /\+\+(\.exe)?$/) {
+  # If $Clang holds the name of the clang++ executable then we leave
+  # $ClangCXX and $Clang equal, otherwise construct the name of the clang++
+  # executable from the clang executable name.
+
+  # Determine operating system under which this copy of Perl was built.
+  my $IsWinBuild = ($^O =~/msys|cygwin|MSWin32/);
+  if($IsWinBuild) {
+    $ClangCXX =~ s/.exe$/++.exe/;
+  }
+  else {
+    $ClangCXX =~ s/\-\d+\.\d+$//;
+    $ClangCXX .= "++";
+  }
 }
 
 # Make sure to use "" to handle paths with spaces.
@@ -1632,17 +1732,15 @@ $ClangVersion = HtmlEscape(`"$Clang" --version`);
 
 # Determine where results go.
 $CmdArgs = HtmlEscape(join(' ', map(ShellEscape($_), @ARGV)));
-$HtmlTitle = "${CurrentDirSuffix} - scan-build results"
-  unless (defined($HtmlTitle));
 
 # Determine the output directory for the HTML reports.
-my $BaseDir = $HtmlDir;
-$HtmlDir = GetHTMLRunDir($HtmlDir);
+my $BaseDir = $Options{OutputDir};
+$Options{OutputDir} = GetHTMLRunDir($Options{OutputDir});
 
 # Determine the location of ccc-analyzer.
 my $AbsRealBin = Cwd::realpath($RealBin);
-my $Cmd = "$AbsRealBin/libexec/ccc-analyzer";
-my $CmdCXX = "$AbsRealBin/libexec/c++-analyzer";
+my $Cmd = "$AbsRealBin/../libexec/ccc-analyzer";
+my $CmdCXX = "$AbsRealBin/../libexec/c++-analyzer";
 
 # Portability: use less strict but portable check -e (file exists) instead of
 # non-portable -x (file is executable). On some windows ports -x just checks
@@ -1659,63 +1757,72 @@ if (!defined $CmdCXX || ! -e $CmdCXX) {
 
 Diag("Using '$Clang' for static analysis\n");
 
-SetHtmlEnv(\@ARGV, $HtmlDir);
-if ($AnalyzeHeaders) { push @AnalysesToRun,"-analyzer-opt-analyze-headers"; }
-if ($AnalyzerStats) { push @AnalysesToRun, '-analyzer-checker=debug.Stats'; }
-if ($MaxLoop > 0) { push @AnalysesToRun, "-analyzer-max-loop $MaxLoop"; }
+SetHtmlEnv(\@ARGV, $Options{OutputDir});
+
+my @AnalysesToRun;
+foreach (sort { $Options{EnableCheckers}{$a} <=> $Options{EnableCheckers}{$b} } 
+         keys %{$Options{EnableCheckers}}) { 
+  # Push checkers in order they were enabled.
+  push @AnalysesToRun, "-analyzer-checker", $_;
+}
+foreach (sort { $Options{DisableCheckers}{$a} <=> $Options{DisableCheckers}{$b} } 
+         keys %{$Options{DisableCheckers}}) { 
+  # Push checkers in order they were disabled.
+  push @AnalysesToRun, "-analyzer-disable-checker", $_;
+}
+if ($Options{AnalyzeHeaders}) { push @AnalysesToRun, "-analyzer-opt-analyze-headers"; }
+if ($Options{AnalyzerStats}) { push @AnalysesToRun, '-analyzer-checker=debug.Stats'; }
+if ($Options{MaxLoop} > 0) { push @AnalysesToRun, "-analyzer-max-loop $Options{MaxLoop}"; }
 
 # Delay setting up other environment variables in case we can do true
 # interposition.
-my $CCC_ANALYZER_ANALYSIS = join ' ',@AnalysesToRun;
-my $CCC_ANALYZER_PLUGINS = join ' ',@PluginsToLoad;
-my $CCC_ANALYZER_CONFIG = join ' ',@ConfigOptions;
-my %Options = (
+my $CCC_ANALYZER_ANALYSIS = join ' ', @AnalysesToRun;
+my $CCC_ANALYZER_PLUGINS = join ' ', map { "-load ".$_ } @{$Options{PluginsToLoad}};
+my $CCC_ANALYZER_CONFIG = join ' ', map { "-analyzer-config ".$_ } @{$Options{ConfigOptions}};
+my %EnvVars = (
   'CC' => $Cmd,
   'CXX' => $CmdCXX,
   'CLANG' => $Clang,
   'CLANG_CXX' => $ClangCXX,
-  'VERBOSE' => $Verbose,
+  'VERBOSE' => $Options{Verbose},
   'CCC_ANALYZER_ANALYSIS' => $CCC_ANALYZER_ANALYSIS,
   'CCC_ANALYZER_PLUGINS' => $CCC_ANALYZER_PLUGINS,
   'CCC_ANALYZER_CONFIG' => $CCC_ANALYZER_CONFIG,
-  'OUTPUT_DIR' => $HtmlDir
+  'OUTPUT_DIR' => $Options{OutputDir},
+  'CCC_CC' => $Options{UseCC},
+  'CCC_CXX' => $Options{UseCXX},
+  'CCC_REPORT_FAILURES' => $Options{ReportFailures},
+  'CCC_ANALYZER_STORE_MODEL' => $Options{StoreModel},
+  'CCC_ANALYZER_CONSTRAINTS_MODEL' => $Options{ConstraintsModel},
+  'CCC_ANALYZER_INTERNAL_STATS' => $Options{InternalStats},
+  'CCC_ANALYZER_OUTPUT_FORMAT' => $Options{OutputFormat},
+  'CLANG_ANALYZER_TARGET' => $Options{AnalyzerTarget}
 );
 
-if (defined $StoreModel) {
-  $Options{'CCC_ANALYZER_STORE_MODEL'} = $StoreModel;
-}
-if (defined $ConstraintsModel) {
-  $Options{'CCC_ANALYZER_CONSTRAINTS_MODEL'} = $ConstraintsModel;
-}
-if (defined $InternalStats) {
-  $Options{'CCC_ANALYZER_INTERNAL_STATS'} = 1;
-}
-if (defined $OutputFormat) {
-  $Options{'CCC_ANALYZER_OUTPUT_FORMAT'} = $OutputFormat;
-}
-
 # Run the build.
-my $ExitStatus = RunBuildCommand(\@ARGV, $IgnoreErrors, $Cmd, $CmdCXX,
-                                \%Options);
+my $ExitStatus = RunBuildCommand(\@ARGV, $Options{IgnoreErrors}, $Cmd, $CmdCXX,
+                                \%EnvVars);
 
-if (defined $OutputFormat) {
-  if ($OutputFormat =~ /plist/) {
+if (defined $Options{OutputFormat}) {
+  if ($Options{OutputFormat} =~ /plist/) {
     Diag "Analysis run complete.\n";
-    Diag "Analysis results (plist files) deposited in '$HtmlDir'\n";
+    Diag "Analysis results (plist files) deposited in '$Options{OutputDir}'\n";
   }
-  if ($OutputFormat =~ /html/) {
+  if ($Options{OutputFormat} =~ /html/) {
     # Postprocess the HTML directory.
-    my $NumBugs = Postprocess($HtmlDir, $BaseDir, $AnalyzerStats, $KeepEmpty);
+    my $NumBugs = Postprocess($Options{OutputDir}, $BaseDir,
+                              $Options{AnalyzerStats}, $Options{KeepEmpty});
 
-    if ($ViewResults and -r "$HtmlDir/index.html") {
+    if ($Options{ViewResults} and -r "$Options{OutputDir}/index.html") {
       Diag "Analysis run complete.\n";
-      Diag "Viewing analysis results in '$HtmlDir' using scan-view.\n";
+      Diag "Viewing analysis results in '$Options{OutputDir}' using scan-view.\n";
       my $ScanView = Cwd::realpath("$RealBin/scan-view");
       if (! -x $ScanView) { $ScanView = "scan-view"; }
-      exec $ScanView, "$HtmlDir";
+      if (! -x $ScanView) { $ScanView = Cwd::realpath("$RealBin/../../scan-view/bin/scan-view"); }
+      exec $ScanView, "$Options{OutputDir}";
     }
 
-    if ($ExitStatusFoundBugs) {
+    if ($Options{ExitStatusFoundBugs}) {
       exit 1 if ($NumBugs > 0);
       exit 0;
     }
diff --git a/tools/scan-build/scan-build.bat b/tools/scan-build/bin/scan-build.bat
similarity index 100%
rename from tools/scan-build/scan-build.bat
rename to tools/scan-build/bin/scan-build.bat
diff --git a/tools/scan-build/set-xcode-analyzer b/tools/scan-build/bin/set-xcode-analyzer
similarity index 100%
rename from tools/scan-build/set-xcode-analyzer
rename to tools/scan-build/bin/set-xcode-analyzer
diff --git a/tools/scan-build/c++-analyzer b/tools/scan-build/libexec/c++-analyzer
similarity index 100%
rename from tools/scan-build/c++-analyzer
rename to tools/scan-build/libexec/c++-analyzer
diff --git a/tools/scan-build/c++-analyzer.bat b/tools/scan-build/libexec/c++-analyzer.bat
similarity index 100%
rename from tools/scan-build/c++-analyzer.bat
rename to tools/scan-build/libexec/c++-analyzer.bat
diff --git a/tools/scan-build/ccc-analyzer b/tools/scan-build/libexec/ccc-analyzer
similarity index 97%
rename from tools/scan-build/ccc-analyzer
rename to tools/scan-build/libexec/ccc-analyzer
index 14591aef57c8..831dd42e9c9c 100755
--- a/tools/scan-build/ccc-analyzer
+++ b/tools/scan-build/libexec/ccc-analyzer
@@ -68,6 +68,7 @@ my $Clang;
 my $DefaultCCompiler;
 my $DefaultCXXCompiler;
 my $IsCXX;
+my $AnalyzerTarget;
 
 # If on OSX, use xcrun to determine the SDK root.
 my $UseXCRUN = 0;
@@ -104,6 +105,8 @@ else {
   $IsCXX = 0
 }
 
+$AnalyzerTarget = $ENV{'CLANG_ANALYZER_TARGET'};
+
 ##===----------------------------------------------------------------------===##
 # Cleanup.
 ##===----------------------------------------------------------------------===##
@@ -245,6 +248,10 @@ sub Analyze {
       push @Args, "-Xclang", "-analyzer-viz-egraph-ubigraph";
     }
 
+    if (defined $AnalyzerTarget) {
+      push @Args, "-target", $AnalyzerTarget;
+    }
+
     my $AnalysisArgs = GetCCArgs($HtmlDir, "--analyze", \@Args);
     @CmdArgs = @$AnalysisArgs;
   }
@@ -275,7 +282,7 @@ sub Analyze {
   # We save the output file in the 'crashes' directory if clang encounters
   # any problems with the file.
   my ($ofh, $ofile) = tempfile("clang_output_XXXXXX", DIR => $HtmlDir);
-  
+
   my $OutputStream = silent_system($HtmlDir, $Cmd, @CmdArgs);
   while ( <$OutputStream> ) {
     print $ofh $_;
@@ -579,7 +586,7 @@ foreach (my $i = 0; $i < scalar(@ARGV); ++$i) {
   }
 
   # Compile mode flags.
-  if ($Arg =~ /^-[D,I,U,isystem](.*)$/) {
+  if ($Arg =~ /^-(?:[DIU]|isystem)(.*)$/) {
     my $Tmp = $Arg;
     if ($1 eq '') {
       # FIXME: Check if we are going off the end.
@@ -660,6 +667,15 @@ foreach (my $i = 0; $i < scalar(@ARGV); ++$i) {
     next;
   }
 
+  # Handle -Xclang some-arg. Add both arguments to the compiler options.
+  if ($Arg =~ /^-Xclang$/) {
+    # FIXME: Check if we are going off the end.
+    ++$i;
+    push @CompileOpts, $Arg;
+    push @CompileOpts, $ARGV[$i];
+    next;
+  }
+
   if (!($Arg =~ /^-/)) {
     push @Files, $Arg;
     next;
diff --git a/tools/scan-build/ccc-analyzer.bat b/tools/scan-build/libexec/ccc-analyzer.bat
similarity index 100%
rename from tools/scan-build/ccc-analyzer.bat
rename to tools/scan-build/libexec/ccc-analyzer.bat
diff --git a/tools/scan-build/scan-build.1 b/tools/scan-build/man/scan-build.1
similarity index 99%
rename from tools/scan-build/scan-build.1
rename to tools/scan-build/man/scan-build.1
index 57333642ce16..fd18d1f5c70a 100644
--- a/tools/scan-build/scan-build.1
+++ b/tools/scan-build/man/scan-build.1
@@ -1,6 +1,6 @@
 .\" This file is distributed under the University of Illinois Open Source
 .\" License. See LICENSE.TXT for details.
-.\" $Id: scan-build.1 201182 2014-02-11 21:37:27Z sylvestre $
+.\" $Id: scan-build.1 253074 2015-11-13 20:34:15Z jroelofs $
 .Dd May 25, 2012
 .Dt SCAN-BUILD 1
 .Os "clang" "3.5"
diff --git a/tools/scan-build/scanview.css b/tools/scan-build/share/scan-build/scanview.css
similarity index 100%
rename from tools/scan-build/scanview.css
rename to tools/scan-build/share/scan-build/scanview.css
diff --git a/tools/scan-build/sorttable.js b/tools/scan-build/share/scan-build/sorttable.js
similarity index 100%
rename from tools/scan-build/sorttable.js
rename to tools/scan-build/share/scan-build/sorttable.js
diff --git a/tools/scan-view/CMakeLists.txt b/tools/scan-view/CMakeLists.txt
new file mode 100644
index 000000000000..b305ca562a72
--- /dev/null
+++ b/tools/scan-view/CMakeLists.txt
@@ -0,0 +1,41 @@
+option(CLANG_INSTALL_SCANVIEW "Install the scan-view tool" ON)
+
+set(BinFiles
+      scan-view)
+
+set(ShareFiles
+      ScanView.py
+      Reporter.py
+      startfile.py
+      FileRadar.scpt
+      GetRadarVersion.scpt
+      bugcatcher.ico)
+
+if(CLANG_INSTALL_SCANVIEW)
+  foreach(BinFile ${BinFiles})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/${BinFile}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/bin
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}
+                         ${CMAKE_BINARY_DIR}/bin/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile})
+    install(PROGRAMS bin/${BinFile} DESTINATION bin)
+  endforeach()
+
+  foreach(ShareFile ${ShareFiles})
+    add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/share/scan-view/${ShareFile}
+                       COMMAND ${CMAKE_COMMAND} -E make_directory
+                         ${CMAKE_BINARY_DIR}/share/scan-view
+                       COMMAND ${CMAKE_COMMAND} -E copy
+                         ${CMAKE_CURRENT_SOURCE_DIR}/share/${ShareFile}
+                         ${CMAKE_BINARY_DIR}/share/scan-view/
+                       DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/${ShareFile})
+    list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-view/${ShareFile})
+    install(FILES share/${ShareFile} DESTINATION share/scan-view)
+  endforeach()
+
+  add_custom_target(scan-view ALL DEPENDS ${Depends})
+  set_target_properties(scan-view PROPERTIES FOLDER "Misc")
+endif()
diff --git a/tools/scan-view/Makefile b/tools/scan-view/Makefile
new file mode 100644
index 000000000000..37e4404d6f83
--- /dev/null
+++ b/tools/scan-view/Makefile
@@ -0,0 +1,37 @@
+##===- tools/scan-view/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+CLANG_LEVEL := ../..
+
+include $(CLANG_LEVEL)/../../Makefile.config
+include $(CLANG_LEVEL)/Makefile
+
+CLANG_INSTALL_SCANVIEW ?= 1
+
+ifeq ($(CLANG_INSTALL_SCANVIEW), 1)
+  InstallTargets := $(ToolDir)/scan-view \
+                    $(ShareDir)/scan-view/Reporter.py \
+                    $(ShareDir)/scan-view/ScanView.py \
+                    $(ShareDir)/scan-view/startfile.py \
+                    $(ShareDir)/scan-view/FileRadar.scpt \
+                    $(ShareDir)/scan-view/GetRadarVersion.scpt \
+                    $(ShareDir)/scan-view/bugcatcher.ico
+endif
+
+all:: $(InstallTargets)
+
+$(ToolDir)/%: bin/% Makefile $(ToolDir)/.dir
+	$(Echo) "Copying $(notdir $<) to the 'bin' directory..."
+	$(Verb)cp $< $@
+	$(Verb)chmod +x $@
+
+$(ShareDir)/scan-view/%: share/% Makefile $(ShareDir)/scan-view/.dir
+	$(Echo) "Copying $(notdir $<) to the 'share' directory..."
+	$(Verb)cp $< $@
+
diff --git a/tools/scan-view/bin/scan-view b/tools/scan-view/bin/scan-view
new file mode 100755
index 000000000000..1b6e8ba90dfa
--- /dev/null
+++ b/tools/scan-view/bin/scan-view
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+
+"""The clang static analyzer results viewer.
+"""
+
+import sys
+import imp
+import os
+import posixpath
+import thread
+import time
+import urllib
+import webbrowser
+
+# How long to wait for server to start.
+kSleepTimeout = .05
+kMaxSleeps = int(60 / kSleepTimeout)
+
+# Default server parameters
+
+kDefaultHost = '127.0.0.1'
+kDefaultPort = 8181
+kMaxPortsToTry = 100
+
+###
+
+
+def url_is_up(url):
+    try:
+        o = urllib.urlopen(url)
+    except IOError:
+        return False
+    o.close()
+    return True
+
+
+def start_browser(port, options):
+    import urllib
+    import webbrowser
+
+    url = 'http://%s:%d' % (options.host, port)
+
+    # Wait for server to start...
+    if options.debug:
+        sys.stderr.write('%s: Waiting for server.' % sys.argv[0])
+        sys.stderr.flush()
+    for i in range(kMaxSleeps):
+        if url_is_up(url):
+            break
+        if options.debug:
+            sys.stderr.write('.')
+            sys.stderr.flush()
+        time.sleep(kSleepTimeout)
+    else:
+        print >> sys.stderr, 'WARNING: Unable to detect that server started.'
+
+    if options.debug:
+        print >> sys.stderr, '%s: Starting webbrowser...' % sys.argv[0]
+    webbrowser.open(url)
+
+
+def run(port, options, root):
+    # Prefer to look relative to the installed binary
+    share = os.path.dirname(__file__) + "/../share/scan-view"
+    if not os.path.isdir(share):
+        # Otherwise look relative to the source
+        share = os.path.dirname(__file__) + "/../../scan-view/share"
+    sys.path.append(share)
+
+    import ScanView
+    try:
+        print 'Starting scan-view at: http://%s:%d' % (options.host,
+                                                       port)
+        print '  Use Ctrl-C to exit.'
+        httpd = ScanView.create_server((options.host, port),
+                                       options, root)
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        pass
+
+
+def port_is_open(port):
+    import SocketServer
+    try:
+        t = SocketServer.TCPServer((kDefaultHost, port), None)
+    except:
+        return False
+    t.server_close()
+    return True
+
+
+def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="The clang static analyzer "
+                                                 "results viewer.")
+    parser.add_argument("root", metavar="<results directory>", type=str)
+    parser.add_argument(
+        '--host', dest="host", default=kDefaultHost, type=str,
+        help="Host interface to listen on. (default=%s)" % kDefaultHost)
+    parser.add_argument('--port', dest="port", default=None, type=int,
+                        help="Port to listen on. (default=%s)" % kDefaultPort)
+    parser.add_argument("--debug", dest="debug", default=0,
+                        action="count",
+                        help="Print additional debugging information.")
+    parser.add_argument("--auto-reload", dest="autoReload", default=False,
+                        action="store_true",
+                        help="Automatically update module for each request.")
+    parser.add_argument("--no-browser", dest="startBrowser", default=True,
+                        action="store_false",
+                        help="Don't open a webbrowser on startup.")
+    parser.add_argument("--allow-all-hosts", dest="onlyServeLocal",
+                        default=True, action="store_false",
+                        help='Allow connections from any host (access '
+                             'restricted to "127.0.0.1" by default)')
+    args = parser.parse_args()
+
+    # Make sure this directory is in a reasonable state to view.
+    if not posixpath.exists(posixpath.join(args.root, 'index.html')):
+        parser.error('Invalid directory, analysis results not found!')
+
+    # Find an open port. We aren't particularly worried about race
+    # conditions here. Note that if the user specified a port we only
+    # use that one.
+    if args.port is not None:
+        port = args.port
+    else:
+        for i in range(kMaxPortsToTry):
+            if port_is_open(kDefaultPort + i):
+                port = kDefaultPort + i
+                break
+        else:
+            parser.error('Unable to find usable port in [%d,%d)' %
+                         (kDefaultPort, kDefaultPort+kMaxPortsToTry))
+
+    # Kick off thread to wait for server and start web browser, if
+    # requested.
+    if args.startBrowser:
+        t = thread.start_new_thread(start_browser, (port, args))
+
+    run(port, args, args.root)
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/scan-view/scan-view b/tools/scan-view/scan-view
deleted file mode 100755
index fb27da698882..000000000000
--- a/tools/scan-view/scan-view
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python
-
-"""The clang static analyzer results viewer.
-"""
-
-import sys
-import posixpath
-import thread
-import time
-import urllib
-import webbrowser
-
-# How long to wait for server to start.
-kSleepTimeout = .05
-kMaxSleeps = int(60 / kSleepTimeout)
-
-# Default server parameters
-
-kDefaultHost = '127.0.0.1'
-kDefaultPort = 8181
-kMaxPortsToTry = 100
-
-###
-
-def url_is_up(url):
-    try:
-        o = urllib.urlopen(url)
-    except IOError:
-        return False
-    o.close()
-    return True
-
-def start_browser(port, options):
-    import urllib, webbrowser
-
-    url = 'http://%s:%d'%(options.host, port)
-    
-    # Wait for server to start...
-    if options.debug:
-        sys.stderr.write('%s: Waiting for server.' % sys.argv[0])
-        sys.stderr.flush()
-    for i in range(kMaxSleeps):
-        if url_is_up(url):
-            break
-        if options.debug:
-            sys.stderr.write('.')
-            sys.stderr.flush()
-        time.sleep(kSleepTimeout)
-    else:
-        print >>sys.stderr,'WARNING: Unable to detect that server started.'
-
-    if options.debug:
-        print >>sys.stderr,'%s: Starting webbrowser...' % sys.argv[0]
-    webbrowser.open(url)
-
-def run(port, options, root):
-    import ScanView
-    try:
-        print 'Starting scan-view at: http://%s:%d'%(options.host,
-                                                     port)
-        print '  Use Ctrl-C to exit.'
-        httpd = ScanView.create_server((options.host, port),
-                                       options, root)
-        httpd.serve_forever()
-    except KeyboardInterrupt:
-        pass
-
-def port_is_open(port):
-    import SocketServer
-    try:
-        t = SocketServer.TCPServer((kDefaultHost,port),None)
-    except:
-        return False
-    t.server_close()
-    return True
-
-def main():    
-    from optparse import OptionParser
-    parser = OptionParser('usage: %prog [options] <results directory>')
-    parser.set_description(__doc__)
-    parser.add_option(
-        '--host', dest="host", default=kDefaultHost, type="string",
-        help="Host interface to listen on. (default=%s)" % kDefaultHost)
-    parser.add_option(
-        '--port', dest="port", default=None, type="int",
-        help="Port to listen on. (default=%s)" % kDefaultPort)
-    parser.add_option("--debug", dest="debug", default=0, 
-                      action="count",
-                      help="Print additional debugging information.")
-    parser.add_option("--auto-reload", dest="autoReload", default=False, 
-                      action="store_true",
-                      help="Automatically update module for each request.")
-    parser.add_option("--no-browser", dest="startBrowser", default=True, 
-                      action="store_false",
-                      help="Don't open a webbrowser on startup.")
-    parser.add_option("--allow-all-hosts", dest="onlyServeLocal", default=True, 
-                      action="store_false",
-                      help='Allow connections from any host (access restricted to "127.0.0.1" by default)')
-    (options, args) = parser.parse_args()
-
-    if len(args) != 1:
-        parser.error('No results directory specified.')
-    root, = args
-
-    # Make sure this directory is in a reasonable state to view.
-    if not posixpath.exists(posixpath.join(root,'index.html')):
-        parser.error('Invalid directory, analysis results not found!')
-
-    # Find an open port. We aren't particularly worried about race
-    # conditions here. Note that if the user specified a port we only
-    # use that one.
-    if options.port is not None:
-        port = options.port
-    else:    
-        for i in range(kMaxPortsToTry):
-            if port_is_open(kDefaultPort + i):
-                port = kDefaultPort + i
-                break
-        else:
-            parser.error('Unable to find usable port in [%d,%d)'%(kDefaultPort,
-                                                                  kDefaultPort+kMaxPortsToTry))
-
-    # Kick off thread to wait for server and start web browser, if
-    # requested.
-    if options.startBrowser:
-        t = thread.start_new_thread(start_browser, (port,options))
-
-    run(port, options, root)
-
-if __name__ == '__main__':
-    main()
diff --git a/tools/scan-view/Resources/FileRadar.scpt b/tools/scan-view/share/FileRadar.scpt
similarity index 100%
rename from tools/scan-view/Resources/FileRadar.scpt
rename to tools/scan-view/share/FileRadar.scpt
diff --git a/tools/scan-view/share/GetRadarVersion.scpt b/tools/scan-view/share/GetRadarVersion.scpt
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tools/scan-view/Reporter.py b/tools/scan-view/share/Reporter.py
similarity index 97%
rename from tools/scan-view/Reporter.py
rename to tools/scan-view/share/Reporter.py
index 9560fc1aabbb..294e05b44c25 100644
--- a/tools/scan-view/Reporter.py
+++ b/tools/scan-view/share/Reporter.py
@@ -175,7 +175,7 @@ class RadarReporter:
     @staticmethod
     def isAvailable():
         # FIXME: Find this .scpt better
-        path = os.path.join(os.path.dirname(__file__),'Resources/GetRadarVersion.scpt')
+        path = os.path.join(os.path.dirname(__file__),'../share/scan-view/GetRadarVersion.scpt')
         try:
           p = subprocess.Popen(['osascript',path], 
           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -206,7 +206,7 @@ class RadarReporter:
         if not componentVersion.strip():
             componentVersion = 'X'
 
-        script = os.path.join(os.path.dirname(__file__),'Resources/FileRadar.scpt')
+        script = os.path.join(os.path.dirname(__file__),'../share/scan-view/FileRadar.scpt')
         args = ['osascript', script, component, componentVersion, classification, personID, report.title,
                 report.description, diagnosis, config] + map(os.path.abspath, report.files)
 #        print >>sys.stderr, args
diff --git a/tools/scan-view/ScanView.py b/tools/scan-view/share/ScanView.py
similarity index 99%
rename from tools/scan-view/ScanView.py
rename to tools/scan-view/share/ScanView.py
index ee08baa8bad0..7dc0351ebe7f 100644
--- a/tools/scan-view/ScanView.py
+++ b/tools/scan-view/share/ScanView.py
@@ -73,7 +73,7 @@ kReportReplacements.append((re.compile('<!-- REPORTSUMMARYEXTRA -->'),
 ###
 # Other simple parameters
 
-kResources = posixpath.join(posixpath.dirname(__file__), 'Resources')
+kShare = posixpath.join(posixpath.dirname(__file__), '../share/scan-view')
 kConfigPath = os.path.expanduser('~/.scanview.cfg')
 
 ###
@@ -680,7 +680,7 @@ File Bug</h3>
                     overrides['Radar']['Component Version'] = 'X'
                     return self.send_report(None, overrides)
                 elif name=='favicon.ico':
-                    return self.send_path(posixpath.join(kResources,'bugcatcher.ico'))
+                    return self.send_path(posixpath.join(kShare,'bugcatcher.ico'))
         
         # Match directory entries.
         if components[-1] == '':
diff --git a/tools/scan-view/Resources/bugcatcher.ico b/tools/scan-view/share/bugcatcher.ico
similarity index 100%
rename from tools/scan-view/Resources/bugcatcher.ico
rename to tools/scan-view/share/bugcatcher.ico
diff --git a/tools/scan-view/startfile.py b/tools/scan-view/share/startfile.py
similarity index 100%
rename from tools/scan-view/startfile.py
rename to tools/scan-view/share/startfile.py
diff --git a/unittests/AST/ASTContextParentMapTest.cpp b/unittests/AST/ASTContextParentMapTest.cpp
index 0dcb175e07b3..b1d7db4164ef 100644
--- a/unittests/AST/ASTContextParentMapTest.cpp
+++ b/unittests/AST/ASTContextParentMapTest.cpp
@@ -27,8 +27,9 @@ using clang::tooling::FrontendActionFactory;
 
 TEST(GetParents, ReturnsParentForDecl) {
   MatchVerifier<Decl> Verifier;
-  EXPECT_TRUE(Verifier.match("class C { void f(); };",
-                             methodDecl(hasParent(recordDecl(hasName("C"))))));
+  EXPECT_TRUE(
+      Verifier.match("class C { void f(); };",
+                     cxxMethodDecl(hasParent(recordDecl(hasName("C"))))));
 }
 
 TEST(GetParents, ReturnsParentForStmt) {
@@ -37,24 +38,38 @@ TEST(GetParents, ReturnsParentForStmt) {
                              ifStmt(hasParent(compoundStmt()))));
 }
 
+TEST(GetParents, ReturnsParentForTypeLoc) {
+  MatchVerifier<TypeLoc> Verifier;
+  EXPECT_TRUE(
+      Verifier.match("namespace a { class b {}; } void f(a::b) {}",
+                     typeLoc(hasParent(typeLoc(hasParent(functionDecl()))))));
+}
+
+TEST(GetParents, ReturnsParentForNestedNameSpecifierLoc) {
+  MatchVerifier<NestedNameSpecifierLoc> Verifier;
+  EXPECT_TRUE(Verifier.match("namespace a { class b {}; } void f(a::b) {}",
+                             nestedNameSpecifierLoc(hasParent(typeLoc()))));
+}
+
 TEST(GetParents, ReturnsParentInsideTemplateInstantiations) {
   MatchVerifier<Decl> DeclVerifier;
   EXPECT_TRUE(DeclVerifier.match(
       "template<typename T> struct C { void f() {} };"
       "void g() { C<int> c; c.f(); }",
-      methodDecl(hasName("f"),
-                 hasParent(recordDecl(isTemplateInstantiation())))));
+      cxxMethodDecl(hasName("f"),
+                 hasParent(cxxRecordDecl(isTemplateInstantiation())))));
   EXPECT_TRUE(DeclVerifier.match(
       "template<typename T> struct C { void f() {} };"
       "void g() { C<int> c; c.f(); }",
-      methodDecl(hasName("f"),
-                 hasParent(recordDecl(unless(isTemplateInstantiation()))))));
+      cxxMethodDecl(hasName("f"),
+                 hasParent(cxxRecordDecl(unless(isTemplateInstantiation()))))));
   EXPECT_FALSE(DeclVerifier.match(
       "template<typename T> struct C { void f() {} };"
       "void g() { C<int> c; c.f(); }",
-      methodDecl(hasName("f"),
-                 allOf(hasParent(recordDecl(unless(isTemplateInstantiation()))),
-                       hasParent(recordDecl(isTemplateInstantiation()))))));
+      cxxMethodDecl(
+          hasName("f"),
+          allOf(hasParent(cxxRecordDecl(unless(isTemplateInstantiation()))),
+                hasParent(cxxRecordDecl(isTemplateInstantiation()))))));
 }
 
 TEST(GetParents, ReturnsMultipleParentsInTemplateInstantiations) {
@@ -62,9 +77,9 @@ TEST(GetParents, ReturnsMultipleParentsInTemplateInstantiations) {
   EXPECT_TRUE(TemplateVerifier.match(
       "template<typename T> struct C { void f() {} };"
       "void g() { C<int> c; c.f(); }",
-      compoundStmt(
-          allOf(hasAncestor(recordDecl(isTemplateInstantiation())),
-                hasAncestor(recordDecl(unless(isTemplateInstantiation())))))));
+      compoundStmt(allOf(
+          hasAncestor(cxxRecordDecl(isTemplateInstantiation())),
+          hasAncestor(cxxRecordDecl(unless(isTemplateInstantiation())))))));
 }
 
 } // end namespace ast_matchers
diff --git a/unittests/AST/ASTTypeTraitsTest.cpp b/unittests/AST/ASTTypeTraitsTest.cpp
index eeb01ccad1df..b6356538a12a 100644
--- a/unittests/AST/ASTTypeTraitsTest.cpp
+++ b/unittests/AST/ASTTypeTraitsTest.cpp
@@ -162,5 +162,12 @@ TEST(DynTypedNode, StmtPrint) {
   EXPECT_TRUE(Verifier.match("void f() {}", stmt()));
 }
 
+TEST(DynTypedNode, QualType) {
+  QualType Q;
+  DynTypedNode Node = DynTypedNode::create(Q);
+  EXPECT_TRUE(Node == Node);
+  EXPECT_FALSE(Node < Node);
+}
+
 }  // namespace ast_type_traits
 }  // namespace clang
diff --git a/unittests/AST/DeclPrinterTest.cpp b/unittests/AST/DeclPrinterTest.cpp
index d8cb97709229..d06fbfea6f5e 100644
--- a/unittests/AST/DeclPrinterTest.cpp
+++ b/unittests/AST/DeclPrinterTest.cpp
@@ -471,7 +471,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl1) {
     "struct A {"
     "  A();"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A()"));
 }
 
@@ -480,7 +480,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl2) {
     "struct A {"
     "  A(int a);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A(int a)"));
 }
 
@@ -489,7 +489,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl3) {
     "struct A {"
     "  A(const A &a);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A(const A &a)"));
 }
 
@@ -498,7 +498,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl4) {
     "struct A {"
     "  A(const A &a, int = 0);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A(const A &a, int = 0)"));
 }
 
@@ -507,7 +507,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl5) {
     "struct A {"
     "  A(const A &&a);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A(const A &&a)"));
 }
 
@@ -516,7 +516,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl6) {
     "struct A {"
     "  explicit A(int a);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "explicit A(int a)"));
 }
 
@@ -525,7 +525,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl7) {
     "struct A {"
     "  constexpr A();"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "constexpr A()"));
 }
 
@@ -534,7 +534,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl8) {
     "struct A {"
     "  A() = default;"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A() = default"));
 }
 
@@ -543,7 +543,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl9) {
     "struct A {"
     "  A() = delete;"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A() = delete"));
 }
 
@@ -553,7 +553,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl10) {
     "struct A {"
     "  A(const A &a);"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A<T...>(const A<T...> &a)"));
     // WRONG; Should be: "A(const A<T...> &a);"
 }
@@ -564,7 +564,7 @@ TEST(DeclPrinter, TestCXXConstructorDecl11) {
     "struct A : public T... {"
     "  A(T&&... ts) : T(ts)... {}"
     "};",
-    constructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxConstructorDecl(ofClass(hasName("A"))).bind("id"),
     "A<T...>(T &&...ts) : T(ts)..."));
     // WRONG; Should be: "A(T &&...ts) : T(ts)... {}"
 }
@@ -574,7 +574,7 @@ TEST(DeclPrinter, TestCXXDestructorDecl1) {
     "struct A {"
     "  ~A();"
     "};",
-    destructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxDestructorDecl(ofClass(hasName("A"))).bind("id"),
     "~A()"));
 }
 
@@ -583,7 +583,7 @@ TEST(DeclPrinter, TestCXXDestructorDecl2) {
     "struct A {"
     "  virtual ~A();"
     "};",
-    destructorDecl(ofClass(hasName("A"))).bind("id"),
+    cxxDestructorDecl(ofClass(hasName("A"))).bind("id"),
     "virtual ~A()"));
 }
 
@@ -592,7 +592,7 @@ TEST(DeclPrinter, TestCXXConversionDecl1) {
     "struct A {"
     "  operator int();"
     "};",
-    methodDecl(ofClass(hasName("A"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("A"))).bind("id"),
     "operator int()"));
 }
 
@@ -601,7 +601,7 @@ TEST(DeclPrinter, TestCXXConversionDecl2) {
     "struct A {"
     "  operator bool();"
     "};",
-    methodDecl(ofClass(hasName("A"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("A"))).bind("id"),
     "operator bool()"));
 }
 
@@ -611,7 +611,7 @@ TEST(DeclPrinter, TestCXXConversionDecl3) {
     "struct A {"
     "  operator Z();"
     "};",
-    methodDecl(ofClass(hasName("A"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("A"))).bind("id"),
     "operator Z()"));
 }
 
@@ -621,7 +621,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_AllocationFunction1) {
     "struct Z {"
     "  void *operator new(std::size_t);"
     "};",
-    methodDecl(ofClass(hasName("Z"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
     "void *operator new(std::size_t)"));
     // Should be: with semicolon
 }
@@ -632,7 +632,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_AllocationFunction2) {
     "struct Z {"
     "  void *operator new[](std::size_t);"
     "};",
-    methodDecl(ofClass(hasName("Z"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
     "void *operator new[](std::size_t)"));
     // Should be: with semicolon
 }
@@ -642,7 +642,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_AllocationFunction3) {
     "struct Z {"
     "  void operator delete(void *);"
     "};",
-    methodDecl(ofClass(hasName("Z"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
     "void operator delete(void *) noexcept"));
     // Should be: with semicolon, without noexcept?
 }
@@ -652,7 +652,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_AllocationFunction4) {
     "struct Z {"
     "  void operator delete(void *);"
     "};",
-    methodDecl(ofClass(hasName("Z"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
     "void operator delete(void *)"));
     // Should be: with semicolon
 }
@@ -662,7 +662,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_AllocationFunction5) {
     "struct Z {"
     "  void operator delete[](void *);"
     "};",
-    methodDecl(ofClass(hasName("Z"))).bind("id"),
+    cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
     "void operator delete[](void *) noexcept"));
     // Should be: with semicolon, without noexcept?
 }
@@ -690,7 +690,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator1) {
 
     ASSERT_TRUE(PrintedDeclCXX98Matches(
       Code,
-      methodDecl(ofClass(hasName("Z"))).bind("id"),
+      cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
       Expected));
   }
 }
@@ -714,7 +714,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator2) {
 
     ASSERT_TRUE(PrintedDeclCXX98Matches(
       Code,
-      methodDecl(ofClass(hasName("Z"))).bind("id"),
+      cxxMethodDecl(ofClass(hasName("Z"))).bind("id"),
       Expected));
   }
 }
diff --git a/unittests/AST/NamedDeclPrinterTest.cpp b/unittests/AST/NamedDeclPrinterTest.cpp
index cf97a0abf6c8..92df4577ac77 100644
--- a/unittests/AST/NamedDeclPrinterTest.cpp
+++ b/unittests/AST/NamedDeclPrinterTest.cpp
@@ -131,3 +131,45 @@ TEST(NamedDeclPrinter, TestNamespace2) {
     "A",
     "A"));
 }
+
+TEST(NamedDeclPrinter, TestUnscopedUnnamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "enum { A };",
+    "A",
+    "A"));
+}
+
+TEST(NamedDeclPrinter, TestNamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "enum X { A };",
+    "A",
+    "X::A"));
+}
+
+TEST(NamedDeclPrinter, TestScopedNamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "enum class X { A };",
+    "A",
+    "X::A"));
+}
+
+TEST(NamedDeclPrinter, TestClassWithUnscopedUnnamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "class X { enum { A }; };",
+    "A",
+    "X::A"));
+}
+
+TEST(NamedDeclPrinter, TestClassWithUnscopedNamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "class X { enum Y { A }; };",
+    "A",
+    "X::Y::A"));
+}
+
+TEST(NamedDeclPrinter, TestClassWithScopedNamedEnum) {
+  ASSERT_TRUE(PrintedWrittenNamedDeclCXX11Matches(
+    "class X { enum class Y { A }; };",
+    "A",
+    "X::Y::A"));
+}
diff --git a/unittests/AST/SourceLocationTest.cpp b/unittests/AST/SourceLocationTest.cpp
index b0a8f85f0e40..4c77def61bc4 100644
--- a/unittests/AST/SourceLocationTest.cpp
+++ b/unittests/AST/SourceLocationTest.cpp
@@ -92,13 +92,13 @@ TEST(ParmVarDecl, KNRRange) {
 TEST(CXXNewExpr, ArrayRange) {
   RangeVerifier<CXXNewExpr> Verifier;
   Verifier.expectRange(1, 12, 1, 22);
-  EXPECT_TRUE(Verifier.match("void f() { new int[10]; }", newExpr()));
+  EXPECT_TRUE(Verifier.match("void f() { new int[10]; }", cxxNewExpr()));
 }
 
 TEST(CXXNewExpr, ParenRange) {
   RangeVerifier<CXXNewExpr> Verifier;
   Verifier.expectRange(1, 12, 1, 20);
-  EXPECT_TRUE(Verifier.match("void f() { new int(); }", newExpr()));
+  EXPECT_TRUE(Verifier.match("void f() { new int(); }", cxxNewExpr()));
 }
 
 TEST(MemberExpr, ImplicitMemberRange) {
@@ -221,7 +221,7 @@ TEST(TemplateSpecializationTypeLoc, AngleBracketLocations) {
 TEST(CXXNewExpr, TypeParenRange) {
   RangeVerifier<CXXNewExpr> Verifier;
   Verifier.expectRange(1, 10, 1, 18);
-  EXPECT_TRUE(Verifier.match("int* a = new (int);", newExpr()));
+  EXPECT_TRUE(Verifier.match("int* a = new (int);", cxxNewExpr()));
 }
 
 class UnaryTransformTypeLocParensRangeVerifier : public RangeVerifier<TypeLoc> {
@@ -252,7 +252,7 @@ TEST(CXXFunctionalCastExpr, SourceRange) {
       "int foo() {\n"
       "  return int{};\n"
       "}",
-      functionalCastExpr(), Lang_CXX11));
+      cxxFunctionalCastExpr(), Lang_CXX11));
 }
 
 TEST(CXXConstructExpr, SourceRange) {
@@ -262,7 +262,7 @@ TEST(CXXConstructExpr, SourceRange) {
       "struct A { A(int, int); };\n"
       "void f(A a);\n"
       "void g() { f({0, 0}); }",
-      constructExpr(), Lang_CXX11));
+      cxxConstructExpr(), Lang_CXX11));
 }
 
 TEST(CXXTemporaryObjectExpr, SourceRange) {
@@ -271,7 +271,7 @@ TEST(CXXTemporaryObjectExpr, SourceRange) {
   EXPECT_TRUE(Verifier.match(
       "struct A { A(int, int); };\n"
       "A a( A{0, 0} );",
-      temporaryObjectExpr(), Lang_CXX11));
+      cxxTemporaryObjectExpr(), Lang_CXX11));
 }
 
 TEST(CXXUnresolvedConstructExpr, SourceRange) {
@@ -284,7 +284,7 @@ TEST(CXXUnresolvedConstructExpr, SourceRange) {
       "U foo() {\n"
       "  return U{};\n"
       "}",
-      unresolvedConstructExpr(), Args, Lang_CXX11));
+      cxxUnresolvedConstructExpr(), Args, Lang_CXX11));
 }
 
 TEST(UsingDecl, SourceRange) {
@@ -434,11 +434,11 @@ TEST(FriendDecl, FriendConstructorDestructorLocation) {
   LocationVerifier<FriendDecl> ConstructorVerifier;
   ConstructorVerifier.expectLocation(6, 11);
   EXPECT_TRUE(ConstructorVerifier.match(
-      Code, friendDecl(has(constructorDecl(ofClass(hasName("B")))))));
+      Code, friendDecl(has(cxxConstructorDecl(ofClass(hasName("B")))))));
   LocationVerifier<FriendDecl> DestructorVerifier;
   DestructorVerifier.expectLocation(6, 19);
   EXPECT_TRUE(DestructorVerifier.match(
-      Code, friendDecl(has(destructorDecl(ofClass(hasName("B")))))));
+      Code, friendDecl(has(cxxDestructorDecl(ofClass(hasName("B")))))));
 }
 
 TEST(FriendDecl, FriendConstructorDestructorRange) {
@@ -452,11 +452,11 @@ TEST(FriendDecl, FriendConstructorDestructorRange) {
   RangeVerifier<FriendDecl> ConstructorVerifier;
   ConstructorVerifier.expectRange(6, 1, 6, 13);
   EXPECT_TRUE(ConstructorVerifier.match(
-      Code, friendDecl(has(constructorDecl(ofClass(hasName("B")))))));
+      Code, friendDecl(has(cxxConstructorDecl(ofClass(hasName("B")))))));
   RangeVerifier<FriendDecl> DestructorVerifier;
   DestructorVerifier.expectRange(6, 1, 6, 22);
   EXPECT_TRUE(DestructorVerifier.match(
-      Code, friendDecl(has(destructorDecl(ofClass(hasName("B")))))));
+      Code, friendDecl(has(cxxDestructorDecl(ofClass(hasName("B")))))));
 }
 
 TEST(FriendDecl, FriendTemplateFunctionLocation) {
@@ -527,7 +527,7 @@ TEST(FriendDecl, InstantiationSourceRange) {
       "  friend void operator+<>(S<T> src);\n"
       "};\n"
       "void test(S<double> s) { +s; }",
-      friendDecl(hasParent(recordDecl(isTemplateInstantiation())))));
+      friendDecl(hasParent(cxxRecordDecl(isTemplateInstantiation())))));
 }
 
 TEST(ObjCMessageExpr, CXXConstructExprRange) {
@@ -539,7 +539,7 @@ TEST(ObjCMessageExpr, CXXConstructExprRange) {
       "+ (void) f1: (A)arg;\n"
       "@end\n"
       "void f2() { A a; [B f1: (a)]; }\n",
-      constructExpr(), Lang_OBJCXX));
+      cxxConstructExpr(), Lang_OBJCXX));
 }
 
 } // end namespace ast_matchers
diff --git a/unittests/AST/StmtPrinterTest.cpp b/unittests/AST/StmtPrinterTest.cpp
index b1fd2c1eb42c..bc7fd54e4ad9 100644
--- a/unittests/AST/StmtPrinterTest.cpp
+++ b/unittests/AST/StmtPrinterTest.cpp
@@ -196,7 +196,7 @@ TEST(StmtPrinter, TestCXXConversionDeclImplicit) {
     "void foo(A a, A b) {"
     "  bar(a & b);"
     "}",
-    memberCallExpr(anything()).bind("id"),
+    cxxMemberCallExpr(anything()).bind("id"),
     "a & b"));
 }
 
@@ -210,7 +210,7 @@ TEST(StmtPrinter, TestCXXConversionDeclExplicit) {
     "void foo(A a, A b) {"
     "  auto x = (a & b).operator void *();"
     "}",
-    memberCallExpr(anything()).bind("id"),
+    cxxMemberCallExpr(anything()).bind("id"),
     "(a & b)"));
     // WRONG; Should be: (a & b).operator void *()
 }
diff --git a/unittests/ASTMatchers/ASTMatchersTest.cpp b/unittests/ASTMatchers/ASTMatchersTest.cpp
index 8ef3f15e4c08..cd18df8410b7 100644
--- a/unittests/ASTMatchers/ASTMatchersTest.cpp
+++ b/unittests/ASTMatchers/ASTMatchersTest.cpp
@@ -36,7 +36,7 @@ TEST(HasNameDeathTest, DiesOnEmptyPattern) {
 
 TEST(IsDerivedFromDeathTest, DiesOnEmptyBaseName) {
   ASSERT_DEBUG_DEATH({
-    DeclarationMatcher IsDerivedFromEmpty = recordDecl(isDerivedFrom(""));
+    DeclarationMatcher IsDerivedFromEmpty = cxxRecordDecl(isDerivedFrom(""));
     EXPECT_TRUE(notMatches("class X {};", IsDerivedFromEmpty));
   }, "");
 }
@@ -122,7 +122,7 @@ TEST(DeclarationMatcher, MatchClass) {
 }
 
 TEST(DeclarationMatcher, ClassIsDerived) {
-  DeclarationMatcher IsDerivedFromX = recordDecl(isDerivedFrom("X"));
+  DeclarationMatcher IsDerivedFromX = cxxRecordDecl(isDerivedFrom("X"));
 
   EXPECT_TRUE(matches("class X {}; class Y : public X {};", IsDerivedFromX));
   EXPECT_TRUE(notMatches("class X {};", IsDerivedFromX));
@@ -130,7 +130,7 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("class Y;", IsDerivedFromX));
   EXPECT_TRUE(notMatches("", IsDerivedFromX));
 
-  DeclarationMatcher IsAX = recordDecl(isSameOrDerivedFrom("X"));
+  DeclarationMatcher IsAX = cxxRecordDecl(isSameOrDerivedFrom("X"));
 
   EXPECT_TRUE(matches("class X {}; class Y : public X {};", IsAX));
   EXPECT_TRUE(matches("class X {};", IsAX));
@@ -139,7 +139,7 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("", IsAX));
 
   DeclarationMatcher ZIsDerivedFromX =
-      recordDecl(hasName("Z"), isDerivedFrom("X"));
+      cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
   EXPECT_TRUE(
       matches("class X {}; class Y : public X {}; class Z : public Y {};",
               ZIsDerivedFromX));
@@ -258,14 +258,14 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(
       notMatches("template<int> struct X;"
                  "template<int i> struct X : public X<i-1> {};",
-                 recordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
+                 cxxRecordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
   EXPECT_TRUE(matches(
       "struct A {};"
       "template<int> struct X;"
       "template<int i> struct X : public X<i-1> {};"
       "template<> struct X<0> : public A {};"
       "struct B : public X<42> {};",
-      recordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
+      cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
 
   // FIXME: Once we have better matchers for template type matching,
   // get rid of the Variable(...) matching and match the right template
@@ -282,15 +282,15 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(matches(
       RecursiveTemplateOneParameter,
       varDecl(hasName("z_float"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base1")))))));
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
       RecursiveTemplateOneParameter,
       varDecl(hasName("z_float"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base2")))))));
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
   EXPECT_TRUE(matches(
       RecursiveTemplateOneParameter,
       varDecl(hasName("z_char"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base1"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
                                                 isDerivedFrom("Base2")))))));
 
   const char *RecursiveTemplateTwoParameters =
@@ -307,39 +307,39 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(matches(
       RecursiveTemplateTwoParameters,
       varDecl(hasName("z_float"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base1")))))));
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
       RecursiveTemplateTwoParameters,
       varDecl(hasName("z_float"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base2")))))));
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
   EXPECT_TRUE(matches(
       RecursiveTemplateTwoParameters,
       varDecl(hasName("z_char"),
-              hasInitializer(hasType(recordDecl(isDerivedFrom("Base1"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
                                                 isDerivedFrom("Base2")))))));
   EXPECT_TRUE(matches(
       "namespace ns { class X {}; class Y : public X {}; }",
-      recordDecl(isDerivedFrom("::ns::X"))));
+      cxxRecordDecl(isDerivedFrom("::ns::X"))));
   EXPECT_TRUE(notMatches(
       "class X {}; class Y : public X {};",
-      recordDecl(isDerivedFrom("::ns::X"))));
+      cxxRecordDecl(isDerivedFrom("::ns::X"))));
 
   EXPECT_TRUE(matches(
       "class X {}; class Y : public X {};",
-      recordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
+    cxxRecordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
 
   EXPECT_TRUE(matches(
       "template<typename T> class X {};"
       "template<typename T> using Z = X<T>;"
       "template <typename T> class Y : Z<T> {};",
-      recordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
+    cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
 }
 
 TEST(DeclarationMatcher, hasMethod) {
   EXPECT_TRUE(matches("class A { void func(); };",
-                      recordDecl(hasMethod(hasName("func")))));
+                      cxxRecordDecl(hasMethod(hasName("func")))));
   EXPECT_TRUE(notMatches("class A { void func(); };",
-                         recordDecl(hasMethod(isPublic()))));
+                         cxxRecordDecl(hasMethod(isPublic()))));
 }
 
 TEST(DeclarationMatcher, ClassDerivedFromDependentTemplateSpecialization) {
@@ -349,7 +349,7 @@ TEST(DeclarationMatcher, ClassDerivedFromDependentTemplateSpecialization) {
      "};"
      "template <typename T> struct B : A<T>::template F<T> {};"
      "B<int> b;",
-     recordDecl(hasName("B"), isDerivedFrom(recordDecl()))));
+    cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl()))));
 }
 
 TEST(DeclarationMatcher, hasDeclContext) {
@@ -453,11 +453,20 @@ TEST(AllOf, AllOverloadsWork) {
                      hasArgument(3, integerLiteral(equals(4)))))));
 }
 
-TEST(DeclarationMatcher, MatchAnyOf) {
-  DeclarationMatcher YOrZDerivedFromX =
-      recordDecl(anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
+TEST(ConstructVariadic, MismatchedTypes_Regression) {
   EXPECT_TRUE(
-      matches("class X {}; class Z : public X {};", YOrZDerivedFromX));
+      matches("const int a = 0;",
+              internal::DynTypedMatcher::constructVariadic(
+                  internal::DynTypedMatcher::VO_AnyOf,
+                  ast_type_traits::ASTNodeKind::getFromNodeKind<QualType>(),
+                  {isConstQualified(), arrayType()})
+                  .convertTo<QualType>()));
+}
+
+TEST(DeclarationMatcher, MatchAnyOf) {
+  DeclarationMatcher YOrZDerivedFromX = cxxRecordDecl(
+      anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
+  EXPECT_TRUE(matches("class X {}; class Z : public X {};", YOrZDerivedFromX));
   EXPECT_TRUE(matches("class Y {};", YOrZDerivedFromX));
   EXPECT_TRUE(
       notMatches("class X {}; class W : public X {};", YOrZDerivedFromX));
@@ -485,7 +494,7 @@ TEST(DeclarationMatcher, MatchAnyOf) {
 
   EXPECT_TRUE(
       matches("void f() try { } catch (int) { } catch (...) { }",
-              catchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
+              cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
 }
 
 TEST(DeclarationMatcher, MatchHas) {
@@ -592,7 +601,7 @@ TEST(DeclarationMatcher, MatchHasRecursiveAnyOf) {
 
 TEST(DeclarationMatcher, MatchNot) {
   DeclarationMatcher NotClassX =
-      recordDecl(
+    cxxRecordDecl(
           isDerivedFrom("Y"),
           unless(hasName("X")));
   EXPECT_TRUE(notMatches("", NotClassX));
@@ -709,11 +718,11 @@ TEST(DeclarationMatcher, HasAttr) {
 TEST(DeclarationMatcher, MatchCudaDecl) {
   EXPECT_TRUE(matchesWithCuda("__global__ void f() { }"
                               "void g() { f<<<1, 2>>>(); }",
-                              CUDAKernelCallExpr()));
+                              cudaKernelCallExpr()));
   EXPECT_TRUE(matchesWithCuda("__attribute__((device)) void f() {}",
                               hasAttr(clang::attr::CUDADevice)));
   EXPECT_TRUE(notMatchesWithCuda("void f() {}",
-                                 CUDAKernelCallExpr()));
+                                 cudaKernelCallExpr()));
   EXPECT_FALSE(notMatchesWithCuda("__attribute__((global)) void f() {}",
                                   hasAttr(clang::attr::CUDAGlobal)));
 }
@@ -898,7 +907,8 @@ TEST(TypeMatcher, MatchesClassType) {
   EXPECT_TRUE(matches("class A { public: A *a; };", TypeA));
   EXPECT_TRUE(notMatches("class A {};", TypeA));
 
-  TypeMatcher TypeDerivedFromA = hasDeclaration(recordDecl(isDerivedFrom("A")));
+  TypeMatcher TypeDerivedFromA =
+      hasDeclaration(cxxRecordDecl(isDerivedFrom("A")));
 
   EXPECT_TRUE(matches("class A {}; class B : public A { public: B *b; };",
               TypeDerivedFromA));
@@ -909,6 +919,57 @@ TEST(TypeMatcher, MatchesClassType) {
 
   EXPECT_TRUE(
       matches("class A { public: A *a; class B {}; };", TypeAHasClassB));
+
+  EXPECT_TRUE(matchesC("struct S {}; void f(void) { struct S s; }",
+                       varDecl(hasType(namedDecl(hasName("S"))))));
+}
+
+TEST(TypeMatcher, MatchesDeclTypes) {
+  // TypedefType -> TypedefNameDecl
+  EXPECT_TRUE(matches("typedef int I; void f(I i);",
+                      parmVarDecl(hasType(namedDecl(hasName("I"))))));
+  // ObjCObjectPointerType
+  EXPECT_TRUE(matchesObjC("@interface Foo @end void f(Foo *f);",
+                          parmVarDecl(hasType(objcObjectPointerType()))));
+  // ObjCObjectPointerType -> ObjCInterfaceType -> ObjCInterfaceDecl
+  EXPECT_TRUE(matchesObjC(
+      "@interface Foo @end void f(Foo *f);",
+      parmVarDecl(hasType(pointsTo(objcInterfaceDecl(hasName("Foo")))))));
+  // TemplateTypeParmType
+  EXPECT_TRUE(matches("template <typename T> void f(T t);",
+                      parmVarDecl(hasType(templateTypeParmType()))));
+  // TemplateTypeParmType -> TemplateTypeParmDecl
+  EXPECT_TRUE(matches("template <typename T> void f(T t);",
+                      parmVarDecl(hasType(namedDecl(hasName("T"))))));
+  // InjectedClassNameType
+  EXPECT_TRUE(matches("template <typename T> struct S {"
+                      "  void f(S s);"
+                      "};",
+                      parmVarDecl(hasType(injectedClassNameType()))));
+  EXPECT_TRUE(notMatches("template <typename T> struct S {"
+                         "  void g(S<T> s);"
+                         "};",
+                         parmVarDecl(hasType(injectedClassNameType()))));
+  // InjectedClassNameType -> CXXRecordDecl
+  EXPECT_TRUE(matches("template <typename T> struct S {"
+                      "  void f(S s);"
+                      "};",
+                      parmVarDecl(hasType(namedDecl(hasName("S"))))));
+
+  static const char Using[] = "template <typename T>"
+                              "struct Base {"
+                              "  typedef T Foo;"
+                              "};"
+                              ""
+                              "template <typename T>"
+                              "struct S : private Base<T> {"
+                              "  using typename Base<T>::Foo;"
+                              "  void f(Foo);"
+                              "};";
+  // UnresolvedUsingTypenameDecl
+  EXPECT_TRUE(matches(Using, unresolvedUsingTypenameDecl(hasName("Foo"))));
+  // UnresolvedUsingTypenameType -> UnresolvedUsingTypenameDecl
+  EXPECT_TRUE(matches(Using, parmVarDecl(hasType(namedDecl(hasName("Foo"))))));
 }
 
 TEST(Matcher, BindMatchedNodes) {
@@ -928,7 +989,7 @@ TEST(Matcher, BindMatchedNodes) {
       new VerifyIdIsBoundTo<Decl>("b")));
 
   StatementMatcher MethodX =
-      callExpr(callee(methodDecl(hasName("x")))).bind("x");
+      callExpr(callee(cxxMethodDecl(hasName("x")))).bind("x");
 
   EXPECT_TRUE(matchAndVerifyResultTrue("class A { void x() { x(); } };",
       MethodX,
@@ -1042,13 +1103,14 @@ TEST(HasTypeLoc, MatchesDeclaratorDecls) {
 TEST(Matcher, Call) {
   // FIXME: Do we want to overload Call() to directly take
   // Matcher<Decl>, too?
-  StatementMatcher MethodX = callExpr(hasDeclaration(methodDecl(hasName("x"))));
+  StatementMatcher MethodX =
+      callExpr(hasDeclaration(cxxMethodDecl(hasName("x"))));
 
   EXPECT_TRUE(matches("class Y { void x() { x(); } };", MethodX));
   EXPECT_TRUE(notMatches("class Y { void x() {} };", MethodX));
 
   StatementMatcher MethodOnY =
-      memberCallExpr(on(hasType(recordDecl(hasName("Y")))));
+      cxxMemberCallExpr(on(hasType(recordDecl(hasName("Y")))));
 
   EXPECT_TRUE(
       matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
@@ -1067,7 +1129,7 @@ TEST(Matcher, Call) {
                  MethodOnY));
 
   StatementMatcher MethodOnYPointer =
-      memberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
+      cxxMemberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
 
   EXPECT_TRUE(
       matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
@@ -1094,9 +1156,9 @@ TEST(Matcher, Lambda) {
 TEST(Matcher, ForRange) {
   EXPECT_TRUE(matches("int as[] = { 1, 2, 3 };"
                       "void f() { for (auto &a : as); }",
-                      forRangeStmt()));
+                      cxxForRangeStmt()));
   EXPECT_TRUE(notMatches("void f() { for (int i; i<5; ++i); }",
-                         forRangeStmt()));
+                         cxxForRangeStmt()));
 }
 
 TEST(Matcher, SubstNonTypeTemplateParm) {
@@ -1110,6 +1172,20 @@ TEST(Matcher, SubstNonTypeTemplateParm) {
                       substNonTypeTemplateParmExpr()));
 }
 
+TEST(Matcher, NonTypeTemplateParmDecl) {
+  EXPECT_TRUE(matches("template <int N> void f();",
+                      nonTypeTemplateParmDecl(hasName("N"))));
+  EXPECT_TRUE(
+      notMatches("template <typename T> void f();", nonTypeTemplateParmDecl()));
+}
+
+TEST(Matcher, templateTypeParmDecl) {
+  EXPECT_TRUE(matches("template <typename T> void f();",
+                      templateTypeParmDecl(hasName("T"))));
+  EXPECT_TRUE(
+      notMatches("template <int N> void f();", templateTypeParmDecl()));
+}
+
 TEST(Matcher, UserDefinedLiteral) {
   EXPECT_TRUE(matches("constexpr char operator \"\" _inc (const char i) {"
                       "  return i + 1;"
@@ -1130,9 +1206,10 @@ TEST(Matcher, FlowControl) {
 TEST(HasType, MatchesAsString) {
   EXPECT_TRUE(
       matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
-              memberCallExpr(on(hasType(asString("class Y *"))))));
-  EXPECT_TRUE(matches("class X { void x(int x) {} };",
-      methodDecl(hasParameter(0, hasType(asString("int"))))));
+              cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
+  EXPECT_TRUE(
+      matches("class X { void x(int x) {} };",
+              cxxMethodDecl(hasParameter(0, hasType(asString("int"))))));
   EXPECT_TRUE(matches("namespace ns { struct A {}; }  struct B { ns::A a; };",
       fieldDecl(hasType(asString("ns::A")))));
   EXPECT_TRUE(matches("namespace { struct A {}; }  struct B { A a; };",
@@ -1140,7 +1217,7 @@ TEST(HasType, MatchesAsString) {
 }
 
 TEST(Matcher, OverloadedOperatorCall) {
-  StatementMatcher OpCall = operatorCallExpr();
+  StatementMatcher OpCall = cxxOperatorCallExpr();
   // Unary operator
   EXPECT_TRUE(matches("class Y { }; "
               "bool operator!(Y x) { return false; }; "
@@ -1167,22 +1244,22 @@ TEST(Matcher, OverloadedOperatorCall) {
 
 TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
   StatementMatcher OpCallAndAnd =
-      operatorCallExpr(hasOverloadedOperatorName("&&"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
   EXPECT_TRUE(matches("class Y { }; "
               "bool operator&&(Y x, Y y) { return true; }; "
               "Y a; Y b; bool c = a && b;", OpCallAndAnd));
   StatementMatcher OpCallLessLess =
-      operatorCallExpr(hasOverloadedOperatorName("<<"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("<<"));
   EXPECT_TRUE(notMatches("class Y { }; "
               "bool operator&&(Y x, Y y) { return true; }; "
               "Y a; Y b; bool c = a && b;",
               OpCallLessLess));
   StatementMatcher OpStarCall =
-      operatorCallExpr(hasOverloadedOperatorName("*"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("*"));
   EXPECT_TRUE(matches("class Y; int operator*(Y &); void f(Y &y) { *y; }",
               OpStarCall));
   DeclarationMatcher ClassWithOpStar =
-    recordDecl(hasMethod(hasOverloadedOperatorName("*")));
+    cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")));
   EXPECT_TRUE(matches("class Y { int operator*(); };",
                       ClassWithOpStar));
   EXPECT_TRUE(notMatches("class Y { void myOperator(); };",
@@ -1194,26 +1271,25 @@ TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
 
 TEST(Matcher, NestedOverloadedOperatorCalls) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-        "class Y { }; "
-        "Y& operator&&(Y& x, Y& y) { return x; }; "
-        "Y a; Y b; Y c; Y d = a && b && c;",
-        operatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
-        new VerifyIdIsBoundTo<CXXOperatorCallExpr>("x", 2)));
-  EXPECT_TRUE(matches(
-        "class Y { }; "
-        "Y& operator&&(Y& x, Y& y) { return x; }; "
-        "Y a; Y b; Y c; Y d = a && b && c;",
-        operatorCallExpr(hasParent(operatorCallExpr()))));
-  EXPECT_TRUE(matches(
-        "class Y { }; "
-        "Y& operator&&(Y& x, Y& y) { return x; }; "
-        "Y a; Y b; Y c; Y d = a && b && c;",
-        operatorCallExpr(hasDescendant(operatorCallExpr()))));
+      "class Y { }; "
+      "Y& operator&&(Y& x, Y& y) { return x; }; "
+      "Y a; Y b; Y c; Y d = a && b && c;",
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
+      new VerifyIdIsBoundTo<CXXOperatorCallExpr>("x", 2)));
+  EXPECT_TRUE(matches("class Y { }; "
+                      "Y& operator&&(Y& x, Y& y) { return x; }; "
+                      "Y a; Y b; Y c; Y d = a && b && c;",
+                      cxxOperatorCallExpr(hasParent(cxxOperatorCallExpr()))));
+  EXPECT_TRUE(
+      matches("class Y { }; "
+              "Y& operator&&(Y& x, Y& y) { return x; }; "
+              "Y a; Y b; Y c; Y d = a && b && c;",
+              cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
 }
 
 TEST(Matcher, ThisPointerType) {
   StatementMatcher MethodOnY =
-    memberCallExpr(thisPointerType(recordDecl(hasName("Y"))));
+    cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))));
 
   EXPECT_TRUE(
       matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
@@ -1245,7 +1321,7 @@ TEST(Matcher, VariableUsage) {
   StatementMatcher Reference =
       declRefExpr(to(
           varDecl(hasInitializer(
-              memberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
+              cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
 
   EXPECT_TRUE(matches(
       "class Y {"
@@ -1279,6 +1355,29 @@ TEST(Matcher, VarDecl_Storage) {
   EXPECT_TRUE(matches("void f() { static int X; }", M));
 }
 
+TEST(Matcher, VarDecl_StorageDuration) {
+  std::string T =
+      "void f() { int x; static int y; } int a;";
+
+  EXPECT_TRUE(matches(T, varDecl(hasName("x"), hasAutomaticStorageDuration())));
+  EXPECT_TRUE(
+      notMatches(T, varDecl(hasName("y"), hasAutomaticStorageDuration())));
+  EXPECT_TRUE(
+      notMatches(T, varDecl(hasName("a"), hasAutomaticStorageDuration())));
+
+  EXPECT_TRUE(matches(T, varDecl(hasName("y"), hasStaticStorageDuration())));
+  EXPECT_TRUE(matches(T, varDecl(hasName("a"), hasStaticStorageDuration())));
+  EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasStaticStorageDuration())));
+
+  // FIXME: It is really hard to test with thread_local itself because not all
+  // targets support TLS, which causes this to be an error depending on what
+  // platform the test is being run on. We do not have access to the TargetInfo
+  // object to be able to test whether the platform supports TLS or not.
+  EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasThreadStorageDuration())));
+  EXPECT_TRUE(notMatches(T, varDecl(hasName("y"), hasThreadStorageDuration())));
+  EXPECT_TRUE(notMatches(T, varDecl(hasName("a"), hasThreadStorageDuration())));
+}
+
 TEST(Matcher, FindsVarDeclInFunctionParameter) {
   EXPECT_TRUE(matches(
       "void f(int i) {}",
@@ -1287,7 +1386,7 @@ TEST(Matcher, FindsVarDeclInFunctionParameter) {
 
 TEST(Matcher, CalledVariable) {
   StatementMatcher CallOnVariableY =
-      memberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
+      cxxMemberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
 
   EXPECT_TRUE(matches(
       "class Y { public: void x() { Y y; y.x(); } };", CallOnVariableY));
@@ -1370,6 +1469,14 @@ TEST(IsInteger, ReportsNoFalsePositives) {
                           to(varDecl(hasType(isInteger()))))))));
 }
 
+TEST(IsAnyCharacter, MatchesCharacters) {
+  EXPECT_TRUE(matches("char i = 0;", varDecl(hasType(isAnyCharacter()))));
+}
+
+TEST(IsAnyCharacter, ReportsNoFalsePositives) {
+  EXPECT_TRUE(notMatches("int i;", varDecl(hasType(isAnyCharacter()))));
+}
+
 TEST(IsArrow, MatchesMemberVariablesViaArrow) {
   EXPECT_TRUE(matches("class Y { void x() { this->y; } int y; };",
               memberExpr(isArrow())));
@@ -1398,18 +1505,25 @@ TEST(IsArrow, MatchesMemberCallsViaArrow) {
 }
 
 TEST(Callee, MatchesDeclarations) {
-  StatementMatcher CallMethodX = callExpr(callee(methodDecl(hasName("x"))));
+  StatementMatcher CallMethodX = callExpr(callee(cxxMethodDecl(hasName("x"))));
 
   EXPECT_TRUE(matches("class Y { void x() { x(); } };", CallMethodX));
   EXPECT_TRUE(notMatches("class Y { void x() {} };", CallMethodX));
 
-  CallMethodX = callExpr(callee(conversionDecl()));
+  CallMethodX = callExpr(callee(cxxConversionDecl()));
   EXPECT_TRUE(
       matches("struct Y { operator int() const; }; int i = Y();", CallMethodX));
   EXPECT_TRUE(notMatches("struct Y { operator int() const; }; Y y = Y();",
                          CallMethodX));
 }
 
+TEST(ConversionDeclaration, IsExplicit) {
+  EXPECT_TRUE(matches("struct S { explicit operator int(); };",
+                      cxxConversionDecl(isExplicit())));
+  EXPECT_TRUE(notMatches("struct S { operator int(); };",
+                         cxxConversionDecl(isExplicit())));
+}
+
 TEST(Callee, MatchesMemberExpressions) {
   EXPECT_TRUE(matches("class Y { void x() { this->x(); } };",
               callExpr(callee(memberExpr()))));
@@ -1442,6 +1556,13 @@ TEST(Function, MatchesFunctionDeclarations) {
       notMatches("void f(int);"
                  "template <typename T> struct S { void g(T t) { f(t); } };",
                  CallFunctionF));
+
+  EXPECT_TRUE(matches("void f(...);", functionDecl(isVariadic())));
+  EXPECT_TRUE(notMatches("void f(int);", functionDecl(isVariadic())));
+  EXPECT_TRUE(notMatches("template <typename... Ts> void f(Ts...);",
+                         functionDecl(isVariadic())));
+  EXPECT_TRUE(notMatches("void f();", functionDecl(isVariadic())));
+  EXPECT_TRUE(notMatchesC("void f();", functionDecl(isVariadic())));
 }
 
 TEST(FunctionTemplate, MatchesFunctionTemplateDeclarations) {
@@ -1545,34 +1666,38 @@ TEST(QualType, hasLocalQualifiers) {
 
 TEST(HasParameter, CallsInnerMatcher) {
   EXPECT_TRUE(matches("class X { void x(int) {} };",
-      methodDecl(hasParameter(0, varDecl()))));
+                      cxxMethodDecl(hasParameter(0, varDecl()))));
   EXPECT_TRUE(notMatches("class X { void x(int) {} };",
-      methodDecl(hasParameter(0, hasName("x")))));
+                         cxxMethodDecl(hasParameter(0, hasName("x")))));
 }
 
 TEST(HasParameter, DoesNotMatchIfIndexOutOfBounds) {
   EXPECT_TRUE(notMatches("class X { void x(int) {} };",
-      methodDecl(hasParameter(42, varDecl()))));
+                         cxxMethodDecl(hasParameter(42, varDecl()))));
 }
 
 TEST(HasType, MatchesParameterVariableTypesStrictly) {
-  EXPECT_TRUE(matches("class X { void x(X x) {} };",
-      methodDecl(hasParameter(0, hasType(recordDecl(hasName("X")))))));
-  EXPECT_TRUE(notMatches("class X { void x(const X &x) {} };",
-      methodDecl(hasParameter(0, hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(matches(
+      "class X { void x(X x) {} };",
+      cxxMethodDecl(hasParameter(0, hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(notMatches(
+      "class X { void x(const X &x) {} };",
+      cxxMethodDecl(hasParameter(0, hasType(recordDecl(hasName("X")))))));
   EXPECT_TRUE(matches("class X { void x(const X *x) {} };",
-      methodDecl(hasParameter(0, 
-                              hasType(pointsTo(recordDecl(hasName("X"))))))));
+                      cxxMethodDecl(hasParameter(
+                          0, hasType(pointsTo(recordDecl(hasName("X"))))))));
   EXPECT_TRUE(matches("class X { void x(const X &x) {} };",
-      methodDecl(hasParameter(0,
-                              hasType(references(recordDecl(hasName("X"))))))));
+                      cxxMethodDecl(hasParameter(
+                          0, hasType(references(recordDecl(hasName("X"))))))));
 }
 
 TEST(HasAnyParameter, MatchesIndependentlyOfPosition) {
-  EXPECT_TRUE(matches("class Y {}; class X { void x(X x, Y y) {} };",
-      methodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
-  EXPECT_TRUE(matches("class Y {}; class X { void x(Y y, X x) {} };",
-      methodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(matches(
+      "class Y {}; class X { void x(X x, Y y) {} };",
+      cxxMethodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(matches(
+      "class Y {}; class X { void x(Y y, X x) {} };",
+      cxxMethodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
 }
 
 TEST(Returns, MatchesReturnTypes) {
@@ -1599,6 +1724,15 @@ TEST(IsDeleted, MatchesDeletedFunctionDeclarations) {
                       functionDecl(hasName("Func"), isDeleted())));
 }
 
+TEST(IsNoThrow, MatchesNoThrowFunctionDeclarations) {
+  EXPECT_TRUE(notMatches("void f();", functionDecl(isNoThrow())));
+  EXPECT_TRUE(notMatches("void f() throw(int);", functionDecl(isNoThrow())));
+  EXPECT_TRUE(
+      notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
+  EXPECT_TRUE(matches("void f() throw();", functionDecl(isNoThrow())));
+  EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
+}
+
 TEST(isConstexpr, MatchesConstexprDeclarations) {
   EXPECT_TRUE(matches("constexpr int foo = 42;",
                       varDecl(hasName("foo"), isConstexpr())));
@@ -1607,21 +1741,22 @@ TEST(isConstexpr, MatchesConstexprDeclarations) {
 }
 
 TEST(HasAnyParameter, DoesntMatchIfInnerMatcherDoesntMatch) {
-  EXPECT_TRUE(notMatches("class Y {}; class X { void x(int) {} };",
-      methodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(notMatches(
+      "class Y {}; class X { void x(int) {} };",
+      cxxMethodDecl(hasAnyParameter(hasType(recordDecl(hasName("X")))))));
 }
 
 TEST(HasAnyParameter, DoesNotMatchThisPointer) {
   EXPECT_TRUE(notMatches("class Y {}; class X { void x() {} };",
-      methodDecl(hasAnyParameter(hasType(pointsTo(
-          recordDecl(hasName("X"))))))));
+                         cxxMethodDecl(hasAnyParameter(
+                             hasType(pointsTo(recordDecl(hasName("X"))))))));
 }
 
 TEST(HasName, MatchesParameterVariableDeclarations) {
   EXPECT_TRUE(matches("class Y {}; class X { void x(int x) {} };",
-      methodDecl(hasAnyParameter(hasName("x")))));
+                      cxxMethodDecl(hasAnyParameter(hasName("x")))));
   EXPECT_TRUE(notMatches("class Y {}; class X { void x(int) {} };",
-      methodDecl(hasAnyParameter(hasName("x")))));
+                         cxxMethodDecl(hasAnyParameter(hasName("x")))));
 }
 
 TEST(Matcher, MatchesClassTemplateSpecialization) {
@@ -1774,46 +1909,68 @@ TEST(Matcher, MatchesAccessSpecDecls) {
   EXPECT_TRUE(notMatches("class C { int i; };", accessSpecDecl()));
 }
 
+TEST(Matcher, MatchesFinal) {
+  EXPECT_TRUE(matches("class X final {};", cxxRecordDecl(isFinal())));
+  EXPECT_TRUE(matches("class X { virtual void f() final; };",
+                      cxxMethodDecl(isFinal())));
+  EXPECT_TRUE(notMatches("class X {};", cxxRecordDecl(isFinal())));
+  EXPECT_TRUE(
+      notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
+}
+
 TEST(Matcher, MatchesVirtualMethod) {
   EXPECT_TRUE(matches("class X { virtual int f(); };",
-      methodDecl(isVirtual(), hasName("::X::f"))));
-  EXPECT_TRUE(notMatches("class X { int f(); };",
-      methodDecl(isVirtual())));
+                      cxxMethodDecl(isVirtual(), hasName("::X::f"))));
+  EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isVirtual())));
 }
 
 TEST(Matcher, MatchesPureMethod) {
   EXPECT_TRUE(matches("class X { virtual int f() = 0; };",
-      methodDecl(isPure(), hasName("::X::f"))));
-  EXPECT_TRUE(notMatches("class X { int f(); };",
-      methodDecl(isPure())));
+                      cxxMethodDecl(isPure(), hasName("::X::f"))));
+  EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isPure())));
+}
+
+TEST(Matcher, MatchesCopyAssignmentOperator) {
+  EXPECT_TRUE(matches("class X { X &operator=(X); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
+  EXPECT_TRUE(matches("class X { X &operator=(X &); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
+  EXPECT_TRUE(matches("class X { X &operator=(const X &); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
+  EXPECT_TRUE(matches("class X { X &operator=(volatile X &); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
+  EXPECT_TRUE(matches("class X { X &operator=(const volatile X &); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
+  EXPECT_TRUE(notMatches("class X { X &operator=(X &&); };",
+                      cxxMethodDecl(isCopyAssignmentOperator())));
 }
 
 TEST(Matcher, MatchesConstMethod) {
-  EXPECT_TRUE(matches("struct A { void foo() const; };",
-                      methodDecl(isConst())));
-  EXPECT_TRUE(notMatches("struct A { void foo(); };",
-                         methodDecl(isConst())));
+  EXPECT_TRUE(
+      matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
+  EXPECT_TRUE(
+      notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
 }
 
 TEST(Matcher, MatchesOverridingMethod) {
   EXPECT_TRUE(matches("class X { virtual int f(); }; "
                       "class Y : public X { int f(); };",
-       methodDecl(isOverride(), hasName("::Y::f"))));
+                      cxxMethodDecl(isOverride(), hasName("::Y::f"))));
   EXPECT_TRUE(notMatches("class X { virtual int f(); }; "
-                        "class Y : public X { int f(); };",
-       methodDecl(isOverride(), hasName("::X::f"))));
+                         "class Y : public X { int f(); };",
+                         cxxMethodDecl(isOverride(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); }; "
                          "class Y : public X { int f(); };",
-       methodDecl(isOverride())));
+                         cxxMethodDecl(isOverride())));
   EXPECT_TRUE(notMatches("class X { int f(); int f(int); }; ",
-       methodDecl(isOverride())));
+                         cxxMethodDecl(isOverride())));
   EXPECT_TRUE(
       matches("template <typename Base> struct Y : Base { void f() override;};",
-              methodDecl(isOverride(), hasName("::Y::f"))));
+              cxxMethodDecl(isOverride(), hasName("::Y::f"))));
 }
 
 TEST(Matcher, ConstructorCall) {
-  StatementMatcher Constructor = constructExpr();
+  StatementMatcher Constructor = cxxConstructExpr();
 
   EXPECT_TRUE(
       matches("class X { public: X(); }; void x() { X x; }", Constructor));
@@ -1827,7 +1984,7 @@ TEST(Matcher, ConstructorCall) {
 }
 
 TEST(Matcher, ConstructorArgument) {
-  StatementMatcher Constructor = constructExpr(
+  StatementMatcher Constructor = cxxConstructExpr(
       hasArgument(0, declRefExpr(to(varDecl(hasName("y"))))));
 
   EXPECT_TRUE(
@@ -1843,7 +2000,7 @@ TEST(Matcher, ConstructorArgument) {
       notMatches("class X { public: X(int); }; void x() { int z; X x(z); }",
                  Constructor));
 
-  StatementMatcher WrongIndex = constructExpr(
+  StatementMatcher WrongIndex = cxxConstructExpr(
       hasArgument(42, declRefExpr(to(varDecl(hasName("y"))))));
   EXPECT_TRUE(
       notMatches("class X { public: X(int); }; void x() { int y; X x(y); }",
@@ -1851,7 +2008,7 @@ TEST(Matcher, ConstructorArgument) {
 }
 
 TEST(Matcher, ConstructorArgumentCount) {
-  StatementMatcher Constructor1Arg = constructExpr(argumentCountIs(1));
+  StatementMatcher Constructor1Arg = cxxConstructExpr(argumentCountIs(1));
 
   EXPECT_TRUE(
       matches("class X { public: X(int); }; void x() { X x(0); }",
@@ -1868,7 +2025,8 @@ TEST(Matcher, ConstructorArgumentCount) {
 }
 
 TEST(Matcher, ConstructorListInitialization) {
-  StatementMatcher ConstructorListInit = constructExpr(isListInitialization());
+  StatementMatcher ConstructorListInit =
+      cxxConstructExpr(isListInitialization());
 
   EXPECT_TRUE(
       matches("class X { public: X(int); }; void x() { X x{0}; }",
@@ -1880,13 +2038,13 @@ TEST(Matcher, ConstructorListInitialization) {
 
 TEST(Matcher,ThisExpr) {
   EXPECT_TRUE(
-      matches("struct X { int a; int f () { return a; } };", thisExpr()));
+      matches("struct X { int a; int f () { return a; } };", cxxThisExpr()));
   EXPECT_TRUE(
-      notMatches("struct X { int f () { int a; return a; } };", thisExpr()));
+      notMatches("struct X { int f () { int a; return a; } };", cxxThisExpr()));
 }
 
 TEST(Matcher, BindTemporaryExpression) {
-  StatementMatcher TempExpression = bindTemporaryExpr();
+  StatementMatcher TempExpression = cxxBindTemporaryExpr();
 
   std::string ClassString = "class string { public: string(); ~string(); }; ";
 
@@ -1953,46 +2111,76 @@ TEST(MaterializeTemporaryExpr, MatchesTemporary) {
 
 TEST(ConstructorDeclaration, SimpleCase) {
   EXPECT_TRUE(matches("class Foo { Foo(int i); };",
-                      constructorDecl(ofClass(hasName("Foo")))));
+                      cxxConstructorDecl(ofClass(hasName("Foo")))));
   EXPECT_TRUE(notMatches("class Foo { Foo(int i); };",
-                         constructorDecl(ofClass(hasName("Bar")))));
+                         cxxConstructorDecl(ofClass(hasName("Bar")))));
 }
 
 TEST(ConstructorDeclaration, IsImplicit) {
   // This one doesn't match because the constructor is not added by the
   // compiler (it is not needed).
   EXPECT_TRUE(notMatches("class Foo { };",
-                         constructorDecl(isImplicit())));
+                         cxxConstructorDecl(isImplicit())));
   // The compiler added the implicit default constructor.
   EXPECT_TRUE(matches("class Foo { }; Foo* f = new Foo();",
-                      constructorDecl(isImplicit())));
+                      cxxConstructorDecl(isImplicit())));
   EXPECT_TRUE(matches("class Foo { Foo(){} };",
-                      constructorDecl(unless(isImplicit()))));
+                      cxxConstructorDecl(unless(isImplicit()))));
   // The compiler added an implicit assignment operator.
   EXPECT_TRUE(matches("struct A { int x; } a = {0}, b = a; void f() { a = b; }",
-                      methodDecl(isImplicit(), hasName("operator="))));
+                      cxxMethodDecl(isImplicit(), hasName("operator="))));
+}
+
+TEST(ConstructorDeclaration, IsExplicit) {
+  EXPECT_TRUE(matches("struct S { explicit S(int); };",
+                      cxxConstructorDecl(isExplicit())));
+  EXPECT_TRUE(notMatches("struct S { S(int); };",
+                         cxxConstructorDecl(isExplicit())));
+}
+
+TEST(ConstructorDeclaration, Kinds) {
+  EXPECT_TRUE(matches("struct S { S(); };",
+                      cxxConstructorDecl(isDefaultConstructor())));
+  EXPECT_TRUE(notMatches("struct S { S(); };",
+                         cxxConstructorDecl(isCopyConstructor())));
+  EXPECT_TRUE(notMatches("struct S { S(); };",
+                         cxxConstructorDecl(isMoveConstructor())));
+
+  EXPECT_TRUE(notMatches("struct S { S(const S&); };",
+                         cxxConstructorDecl(isDefaultConstructor())));
+  EXPECT_TRUE(matches("struct S { S(const S&); };",
+                      cxxConstructorDecl(isCopyConstructor())));
+  EXPECT_TRUE(notMatches("struct S { S(const S&); };",
+                         cxxConstructorDecl(isMoveConstructor())));
+
+  EXPECT_TRUE(notMatches("struct S { S(S&&); };",
+                         cxxConstructorDecl(isDefaultConstructor())));
+  EXPECT_TRUE(notMatches("struct S { S(S&&); };",
+                         cxxConstructorDecl(isCopyConstructor())));
+  EXPECT_TRUE(matches("struct S { S(S&&); };",
+                      cxxConstructorDecl(isMoveConstructor())));
 }
 
 TEST(DestructorDeclaration, MatchesVirtualDestructor) {
   EXPECT_TRUE(matches("class Foo { virtual ~Foo(); };",
-                      destructorDecl(ofClass(hasName("Foo")))));
+                      cxxDestructorDecl(ofClass(hasName("Foo")))));
 }
 
 TEST(DestructorDeclaration, DoesNotMatchImplicitDestructor) {
   EXPECT_TRUE(notMatches("class Foo {};",
-                         destructorDecl(ofClass(hasName("Foo")))));
+                         cxxDestructorDecl(ofClass(hasName("Foo")))));
 }
 
 TEST(HasAnyConstructorInitializer, SimpleCase) {
-  EXPECT_TRUE(notMatches(
-      "class Foo { Foo() { } };",
-      constructorDecl(hasAnyConstructorInitializer(anything()))));
-  EXPECT_TRUE(matches(
-      "class Foo {"
-      "  Foo() : foo_() { }"
-      "  int foo_;"
-      "};",
-      constructorDecl(hasAnyConstructorInitializer(anything()))));
+  EXPECT_TRUE(
+      notMatches("class Foo { Foo() { } };",
+                 cxxConstructorDecl(hasAnyConstructorInitializer(anything()))));
+  EXPECT_TRUE(
+      matches("class Foo {"
+              "  Foo() : foo_() { }"
+              "  int foo_;"
+              "};",
+              cxxConstructorDecl(hasAnyConstructorInitializer(anything()))));
 }
 
 TEST(HasAnyConstructorInitializer, ForField) {
@@ -2003,11 +2191,11 @@ TEST(HasAnyConstructorInitializer, ForField) {
       "  Baz foo_;"
       "  Baz bar_;"
       "};";
-  EXPECT_TRUE(matches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       forField(hasType(recordDecl(hasName("Baz"))))))));
-  EXPECT_TRUE(matches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       forField(hasName("foo_"))))));
-  EXPECT_TRUE(notMatches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(notMatches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       forField(hasType(recordDecl(hasName("Bar"))))))));
 }
 
@@ -2017,9 +2205,9 @@ TEST(HasAnyConstructorInitializer, WithInitializer) {
       "  Foo() : foo_(0) { }"
       "  int foo_;"
       "};";
-  EXPECT_TRUE(matches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       withInitializer(integerLiteral(equals(0)))))));
-  EXPECT_TRUE(notMatches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(notMatches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       withInitializer(integerLiteral(equals(1)))))));
 }
 
@@ -2031,16 +2219,40 @@ TEST(HasAnyConstructorInitializer, IsWritten) {
       "  Bar foo_;"
       "  Bar bar_;"
       "};";
-  EXPECT_TRUE(matches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       allOf(forField(hasName("foo_")), isWritten())))));
-  EXPECT_TRUE(notMatches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(notMatches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       allOf(forField(hasName("bar_")), isWritten())))));
-  EXPECT_TRUE(matches(Code, constructorDecl(hasAnyConstructorInitializer(
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(hasAnyConstructorInitializer(
       allOf(forField(hasName("bar_")), unless(isWritten()))))));
 }
 
+TEST(HasAnyConstructorInitializer, IsBaseInitializer) {
+  static const char Code[] =
+      "struct B {};"
+      "struct D : B {"
+      "  int I;"
+      "  D(int i) : I(i) {}"
+      "};"
+      "struct E : B {"
+      "  E() : B() {}"
+      "};";
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(allOf(
+    hasAnyConstructorInitializer(allOf(isBaseInitializer(), isWritten())),
+    hasName("E")))));
+  EXPECT_TRUE(notMatches(Code, cxxConstructorDecl(allOf(
+    hasAnyConstructorInitializer(allOf(isBaseInitializer(), isWritten())),
+    hasName("D")))));
+  EXPECT_TRUE(matches(Code, cxxConstructorDecl(allOf(
+    hasAnyConstructorInitializer(allOf(isMemberInitializer(), isWritten())),
+    hasName("D")))));
+  EXPECT_TRUE(notMatches(Code, cxxConstructorDecl(allOf(
+    hasAnyConstructorInitializer(allOf(isMemberInitializer(), isWritten())),
+    hasName("E")))));
+}
+
 TEST(Matcher, NewExpression) {
-  StatementMatcher New = newExpr();
+  StatementMatcher New = cxxNewExpr();
 
   EXPECT_TRUE(matches("class X { public: X(); }; void x() { new X; }", New));
   EXPECT_TRUE(
@@ -2051,7 +2263,7 @@ TEST(Matcher, NewExpression) {
 }
 
 TEST(Matcher, NewExpressionArgument) {
-  StatementMatcher New = constructExpr(
+  StatementMatcher New = cxxConstructExpr(
       hasArgument(0, declRefExpr(to(varDecl(hasName("y"))))));
 
   EXPECT_TRUE(
@@ -2064,7 +2276,7 @@ TEST(Matcher, NewExpressionArgument) {
       notMatches("class X { public: X(int); }; void x() { int z; new X(z); }",
                  New));
 
-  StatementMatcher WrongIndex = constructExpr(
+  StatementMatcher WrongIndex = cxxConstructExpr(
       hasArgument(42, declRefExpr(to(varDecl(hasName("y"))))));
   EXPECT_TRUE(
       notMatches("class X { public: X(int); }; void x() { int y; new X(y); }",
@@ -2072,7 +2284,7 @@ TEST(Matcher, NewExpressionArgument) {
 }
 
 TEST(Matcher, NewExpressionArgumentCount) {
-  StatementMatcher New = constructExpr(argumentCountIs(1));
+  StatementMatcher New = cxxConstructExpr(argumentCountIs(1));
 
   EXPECT_TRUE(
       matches("class X { public: X(int); }; void x() { new X(0); }", New));
@@ -2083,11 +2295,11 @@ TEST(Matcher, NewExpressionArgumentCount) {
 
 TEST(Matcher, DeleteExpression) {
   EXPECT_TRUE(matches("struct A {}; void f(A* a) { delete a; }",
-                      deleteExpr()));
+                      cxxDeleteExpr()));
 }
 
 TEST(Matcher, DefaultArgument) {
-  StatementMatcher Arg = defaultArgExpr();
+  StatementMatcher Arg = cxxDefaultArgExpr();
 
   EXPECT_TRUE(matches("void x(int, int = 0) { int y; x(y); }", Arg));
   EXPECT_TRUE(
@@ -2152,7 +2364,7 @@ TEST(Matcher, FloatLiterals) {
 }
 
 TEST(Matcher, NullPtrLiteral) {
-  EXPECT_TRUE(matches("int* i = nullptr;", nullPtrLiteralExpr()));
+  EXPECT_TRUE(matches("int* i = nullptr;", cxxNullPtrLiteralExpr()));
 }
 
 TEST(Matcher, GNUNullExpr) {
@@ -2164,7 +2376,8 @@ TEST(Matcher, AsmStatement) {
 }
 
 TEST(Matcher, Conditions) {
-  StatementMatcher Condition = ifStmt(hasCondition(boolLiteral(equals(true))));
+  StatementMatcher Condition =
+      ifStmt(hasCondition(cxxBoolLiteral(equals(true))));
 
   EXPECT_TRUE(matches("void x() { if (true) {} }", Condition));
   EXPECT_TRUE(notMatches("void x() { if (false) {} }", Condition));
@@ -2175,13 +2388,13 @@ TEST(Matcher, Conditions) {
 
 TEST(IfStmt, ChildTraversalMatchers) {
   EXPECT_TRUE(matches("void f() { if (false) true; else false; }",
-                      ifStmt(hasThen(boolLiteral(equals(true))))));
+                      ifStmt(hasThen(cxxBoolLiteral(equals(true))))));
   EXPECT_TRUE(notMatches("void f() { if (false) false; else true; }",
-                         ifStmt(hasThen(boolLiteral(equals(true))))));
+                         ifStmt(hasThen(cxxBoolLiteral(equals(true))))));
   EXPECT_TRUE(matches("void f() { if (false) false; else true; }",
-                      ifStmt(hasElse(boolLiteral(equals(true))))));
+                      ifStmt(hasElse(cxxBoolLiteral(equals(true))))));
   EXPECT_TRUE(notMatches("void f() { if (false) true; else false; }",
-                         ifStmt(hasElse(boolLiteral(equals(true))))));
+                         ifStmt(hasElse(cxxBoolLiteral(equals(true))))));
 }
 
 TEST(MatchBinaryOperator, HasOperatorName) {
@@ -2193,17 +2406,22 @@ TEST(MatchBinaryOperator, HasOperatorName) {
 
 TEST(MatchBinaryOperator, HasLHSAndHasRHS) {
   StatementMatcher OperatorTrueFalse =
-      binaryOperator(hasLHS(boolLiteral(equals(true))),
-                     hasRHS(boolLiteral(equals(false))));
+      binaryOperator(hasLHS(cxxBoolLiteral(equals(true))),
+                     hasRHS(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true || false; }", OperatorTrueFalse));
   EXPECT_TRUE(matches("void x() { true && false; }", OperatorTrueFalse));
   EXPECT_TRUE(notMatches("void x() { false || true; }", OperatorTrueFalse));
+
+  StatementMatcher OperatorIntPointer = arraySubscriptExpr(
+      hasLHS(hasType(isInteger())), hasRHS(hasType(pointsTo(qualType()))));
+  EXPECT_TRUE(matches("void x() { 1[\"abc\"]; }", OperatorIntPointer));
+  EXPECT_TRUE(notMatches("void x() { \"abc\"[1]; }", OperatorIntPointer));
 }
 
 TEST(MatchBinaryOperator, HasEitherOperand) {
   StatementMatcher HasOperand =
-      binaryOperator(hasEitherOperand(boolLiteral(equals(false))));
+      binaryOperator(hasEitherOperand(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true || false; }", HasOperand));
   EXPECT_TRUE(matches("void x() { false && true; }", HasOperand));
@@ -2320,7 +2538,7 @@ TEST(MatchUnaryOperator, HasOperatorName) {
 
 TEST(MatchUnaryOperator, HasUnaryOperand) {
   StatementMatcher OperatorOnFalse =
-      unaryOperator(hasUnaryOperand(boolLiteral(equals(false))));
+      unaryOperator(hasUnaryOperand(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { !false; }", OperatorOnFalse));
   EXPECT_TRUE(notMatches("void x() { !true; }", OperatorOnFalse));
@@ -2363,15 +2581,15 @@ TEST(Matcher, UnaryOperatorTypes) {
 
 TEST(Matcher, ConditionalOperator) {
   StatementMatcher Conditional = conditionalOperator(
-      hasCondition(boolLiteral(equals(true))),
-      hasTrueExpression(boolLiteral(equals(false))));
+      hasCondition(cxxBoolLiteral(equals(true))),
+      hasTrueExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { false ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { true ? true : false; }", Conditional));
 
   StatementMatcher ConditionalFalse = conditionalOperator(
-      hasFalseExpression(boolLiteral(equals(false))));
+      hasFalseExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? true : false; }", ConditionalFalse));
   EXPECT_TRUE(
@@ -2478,13 +2696,13 @@ TEST(Matcher, IsDefinition) {
   EXPECT_TRUE(notMatches("extern int a;", DefinitionOfVariableA));
 
   DeclarationMatcher DefinitionOfMethodA =
-      methodDecl(hasName("a"), isDefinition());
+      cxxMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("class A { void a() {} };", DefinitionOfMethodA));
   EXPECT_TRUE(notMatches("class A { void a(); };", DefinitionOfMethodA));
 }
 
 TEST(Matcher, OfClass) {
-  StatementMatcher Constructor = constructExpr(hasDeclaration(methodDecl(
+  StatementMatcher Constructor = cxxConstructExpr(hasDeclaration(cxxMethodDecl(
       ofClass(hasName("X")))));
 
   EXPECT_TRUE(
@@ -2502,7 +2720,7 @@ TEST(Matcher, VisitsTemplateInstantiations) {
       "class A { public: void x(); };"
       "template <typename T> class B { public: void y() { T t; t.x(); } };"
       "void f() { B<A> b; b.y(); }",
-      callExpr(callee(methodDecl(hasName("x"))))));
+      callExpr(callee(cxxMethodDecl(hasName("x"))))));
 
   EXPECT_TRUE(matches(
       "class A { public: void x(); };"
@@ -2513,8 +2731,8 @@ TEST(Matcher, VisitsTemplateInstantiations) {
       "void f() {"
       "  C::B<A> b; b.y();"
       "}",
-      recordDecl(hasName("C"),
-                 hasDescendant(callExpr(callee(methodDecl(hasName("x"))))))));
+      recordDecl(hasName("C"), hasDescendant(callExpr(
+                                   callee(cxxMethodDecl(hasName("x"))))))));
 }
 
 TEST(Matcher, HandlesNullQualTypes) {
@@ -2607,10 +2825,10 @@ TEST(For, ForLoopInternals) {
 
 TEST(For, ForRangeLoopInternals) {
   EXPECT_TRUE(matches("void f(){ int a[] {1, 2}; for (int i : a); }",
-                      forRangeStmt(hasLoopVariable(anything()))));
+                      cxxForRangeStmt(hasLoopVariable(anything()))));
   EXPECT_TRUE(matches(
       "void f(){ int a[] {1, 2}; for (int i : a); }",
-      forRangeStmt(hasRangeInit(declRefExpr(to(varDecl(hasName("a"))))))));
+      cxxForRangeStmt(hasRangeInit(declRefExpr(to(varDecl(hasName("a"))))))));
 }
 
 TEST(For, NegativeForLoopInternals) {
@@ -2650,7 +2868,7 @@ TEST(HasBody, FindsBodyOfForWhileDoLoops) {
   EXPECT_TRUE(matches("void f() { do {} while(true); }",
               doStmt(hasBody(compoundStmt()))));
   EXPECT_TRUE(matches("void f() { int p[2]; for (auto x : p) {} }",
-              forRangeStmt(hasBody(compoundStmt()))));
+              cxxForRangeStmt(hasBody(compoundStmt()))));
 }
 
 TEST(HasAnySubstatement, MatchesForTopLevelCompoundStatement) {
@@ -2771,16 +2989,16 @@ TEST(Member, MatchesMemberAllocationFunction) {
   EXPECT_TRUE(matchesConditionally(
       "namespace std { typedef typeof(sizeof(int)) size_t; }"
       "class X { void *operator new(std::size_t); };",
-      methodDecl(ofClass(hasName("X"))), true, "-std=gnu++98"));
+      cxxMethodDecl(ofClass(hasName("X"))), true, "-std=gnu++98"));
 
   EXPECT_TRUE(matches("class X { void operator delete(void*); };",
-                      methodDecl(ofClass(hasName("X")))));
+                      cxxMethodDecl(ofClass(hasName("X")))));
 
   // Fails in C++11 mode
   EXPECT_TRUE(matchesConditionally(
       "namespace std { typedef typeof(sizeof(int)) size_t; }"
       "class X { void operator delete[](void*, std::size_t); };",
-      methodDecl(ofClass(hasName("X"))), true, "-std=gnu++98"));
+      cxxMethodDecl(ofClass(hasName("X"))), true, "-std=gnu++98"));
 }
 
 TEST(HasObjectExpression, DoesNotMatchMember) {
@@ -2822,6 +3040,15 @@ TEST(Field, MatchesField) {
   EXPECT_TRUE(matches("class X { int m; };", fieldDecl(hasName("m"))));
 }
 
+TEST(IsVolatileQualified, QualifiersMatch) {
+  EXPECT_TRUE(matches("volatile int i = 42;",
+                      varDecl(hasType(isVolatileQualified()))));
+  EXPECT_TRUE(notMatches("volatile int *i;",
+                         varDecl(hasType(isVolatileQualified()))));
+  EXPECT_TRUE(matches("typedef volatile int v_int; v_int i = 42;",
+                      varDecl(hasType(isVolatileQualified()))));
+}
+
 TEST(IsConstQualified, MatchesConstInt) {
   EXPECT_TRUE(matches("const int i = 42;",
                       varDecl(hasType(isConstQualified()))));
@@ -2868,59 +3095,59 @@ TEST(CastExpression, DoesNotMatchNonCasts) {
 
 TEST(ReinterpretCast, MatchesSimpleCase) {
   EXPECT_TRUE(matches("char* p = reinterpret_cast<char*>(&p);",
-                      reinterpretCastExpr()));
+                      cxxReinterpretCastExpr()));
 }
 
 TEST(ReinterpretCast, DoesNotMatchOtherCasts) {
-  EXPECT_TRUE(notMatches("char* p = (char*)(&p);", reinterpretCastExpr()));
+  EXPECT_TRUE(notMatches("char* p = (char*)(&p);", cxxReinterpretCastExpr()));
   EXPECT_TRUE(notMatches("char q, *p = const_cast<char*>(&q);",
-                         reinterpretCastExpr()));
+                         cxxReinterpretCastExpr()));
   EXPECT_TRUE(notMatches("void* p = static_cast<void*>(&p);",
-                         reinterpretCastExpr()));
+                         cxxReinterpretCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
                          "B b;"
                          "D* p = dynamic_cast<D*>(&b);",
-                         reinterpretCastExpr()));
+                         cxxReinterpretCastExpr()));
 }
 
 TEST(FunctionalCast, MatchesSimpleCase) {
   std::string foo_class = "class Foo { public: Foo(const char*); };";
   EXPECT_TRUE(matches(foo_class + "void r() { Foo f = Foo(\"hello world\"); }",
-                      functionalCastExpr()));
+                      cxxFunctionalCastExpr()));
 }
 
 TEST(FunctionalCast, DoesNotMatchOtherCasts) {
   std::string FooClass = "class Foo { public: Foo(const char*); };";
   EXPECT_TRUE(
       notMatches(FooClass + "void r() { Foo f = (Foo) \"hello world\"; }",
-                 functionalCastExpr()));
+                 cxxFunctionalCastExpr()));
   EXPECT_TRUE(
       notMatches(FooClass + "void r() { Foo f = \"hello world\"; }",
-                 functionalCastExpr()));
+                 cxxFunctionalCastExpr()));
 }
 
 TEST(DynamicCast, MatchesSimpleCase) {
   EXPECT_TRUE(matches("struct B { virtual ~B() {} }; struct D : B {};"
                       "B b;"
                       "D* p = dynamic_cast<D*>(&b);",
-                      dynamicCastExpr()));
+                      cxxDynamicCastExpr()));
 }
 
 TEST(StaticCast, MatchesSimpleCase) {
   EXPECT_TRUE(matches("void* p(static_cast<void*>(&p));",
-                      staticCastExpr()));
+                      cxxStaticCastExpr()));
 }
 
 TEST(StaticCast, DoesNotMatchOtherCasts) {
-  EXPECT_TRUE(notMatches("char* p = (char*)(&p);", staticCastExpr()));
+  EXPECT_TRUE(notMatches("char* p = (char*)(&p);", cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("char q, *p = const_cast<char*>(&q);",
-                         staticCastExpr()));
+                         cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("void* p = reinterpret_cast<char*>(&p);",
-                         staticCastExpr()));
+                         cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
                          "B b;"
                          "D* p = dynamic_cast<D*>(&b);",
-                         staticCastExpr()));
+                         cxxStaticCastExpr()));
 }
 
 TEST(CStyleCast, MatchesSimpleCase) {
@@ -2939,7 +3166,7 @@ TEST(CStyleCast, DoesNotMatchOtherCasts) {
 
 TEST(HasDestinationType, MatchesSimpleCase) {
   EXPECT_TRUE(matches("char* p = static_cast<char*>(0);",
-                      staticCastExpr(hasDestinationType(
+                      cxxStaticCastExpr(hasDestinationType(
                           pointsTo(TypeMatcher(anything()))))));
 }
 
@@ -3142,7 +3369,7 @@ TEST(HasSourceExpression, MatchesImplicitCasts) {
   EXPECT_TRUE(matches("class string {}; class URL { public: URL(string s); };"
                       "void r() {string a_string; URL url = a_string; }",
                       implicitCastExpr(
-                          hasSourceExpression(constructExpr()))));
+                          hasSourceExpression(cxxConstructExpr()))));
 }
 
 TEST(HasSourceExpression, MatchesExplicitCasts) {
@@ -3314,21 +3541,26 @@ TEST(SwitchCase, MatchesEachCase) {
 TEST(ForEachConstructorInitializer, MatchesInitializers) {
   EXPECT_TRUE(matches(
       "struct X { X() : i(42), j(42) {} int i, j; };",
-      constructorDecl(forEachConstructorInitializer(ctorInitializer()))));
+      cxxConstructorDecl(forEachConstructorInitializer(cxxCtorInitializer()))));
 }
 
 TEST(ExceptionHandling, SimpleCases) {
-  EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", catchStmt()));
-  EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", tryStmt()));
-  EXPECT_TRUE(notMatches("void foo() try { } catch(int X) { }", throwExpr()));
+  EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxCatchStmt()));
+  EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxTryStmt()));
+  EXPECT_TRUE(
+      notMatches("void foo() try { } catch(int X) { }", cxxThrowExpr()));
   EXPECT_TRUE(matches("void foo() try { throw; } catch(int X) { }",
-                      throwExpr()));
+                      cxxThrowExpr()));
   EXPECT_TRUE(matches("void foo() try { throw 5;} catch(int X) { }",
-                      throwExpr()));
+                      cxxThrowExpr()));
   EXPECT_TRUE(matches("void foo() try { throw; } catch(...) { }",
-                      catchStmt(isCatchAll())));
+                      cxxCatchStmt(isCatchAll())));
   EXPECT_TRUE(notMatches("void foo() try { throw; } catch(int) { }",
-                         catchStmt(isCatchAll())));
+                         cxxCatchStmt(isCatchAll())));
+  EXPECT_TRUE(matches("void foo() try {} catch(int X) { }",
+                      varDecl(isExceptionVariable())));
+  EXPECT_TRUE(notMatches("void foo() try { int X; } catch (...) { }",
+                         varDecl(isExceptionVariable())));
 }
 
 TEST(HasConditionVariableStatement, DoesNotMatchCondition) {
@@ -3460,12 +3692,12 @@ TEST(LoopingMatchers, DoNotOverwritePreviousMatchResultOnFailure) {
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class X { void f(); void g(); };",
-      recordDecl(decl().bind("x"), hasMethod(hasName("g"))),
+      cxxRecordDecl(decl().bind("x"), hasMethod(hasName("g"))),
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class X { X() : a(1), b(2) {} double a; int b; };",
       recordDecl(decl().bind("x"),
-                 has(constructorDecl(
+                 has(cxxConstructorDecl(
                      hasAnyConstructorInitializer(forField(hasName("b")))))),
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
@@ -3489,12 +3721,12 @@ TEST(LoopingMatchers, DoNotOverwritePreviousMatchResultOnFailure) {
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A{}; class B{}; class C : B, A {};",
-      recordDecl(decl().bind("x"), isDerivedFrom("::A")),
+      cxxRecordDecl(decl().bind("x"), isDerivedFrom("::A")),
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A{}; typedef A B; typedef A C; typedef A D;"
       "class E : A {};",
-      recordDecl(decl().bind("x"), isDerivedFrom("C")),
+      cxxRecordDecl(decl().bind("x"), isDerivedFrom("C")),
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A { class B { void f() {} }; };",
@@ -3513,8 +3745,8 @@ TEST(LoopingMatchers, DoNotOverwritePreviousMatchResultOnFailure) {
       new VerifyIdIsBoundTo<Decl>("x", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A { A() : s(), i(42) {} const char *s; int i; };",
-      constructorDecl(hasName("::A::A"), decl().bind("x"),
-                      forEachConstructorInitializer(forField(hasName("i")))),
+      cxxConstructorDecl(hasName("::A::A"), decl().bind("x"),
+                         forEachConstructorInitializer(forField(hasName("i")))),
       new VerifyIdIsBoundTo<Decl>("x", 1)));
 }
 
@@ -3588,11 +3820,11 @@ TEST(IsTemplateInstantiation, MatchesImplicitClassTemplateInstantiation) {
 
   EXPECT_TRUE(matches(
       "template <typename T> class X {}; class A {}; X<A> x;",
-      recordDecl(hasName("::X"), isTemplateInstantiation())));
+      cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 
   EXPECT_TRUE(matches(
       "template <typename T> class X { T t; }; class A {}; X<A> x;",
-      recordDecl(isTemplateInstantiation(), hasDescendant(
+      cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
           fieldDecl(hasType(recordDecl(hasName("A"))))))));
 }
 
@@ -3607,7 +3839,7 @@ TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
   EXPECT_TRUE(matches(
       "template <typename T> class X { T t; }; class A {};"
       "template class X<A>;",
-      recordDecl(isTemplateInstantiation(), hasDescendant(
+      cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
           fieldDecl(hasType(recordDecl(hasName("A"))))))));
 }
 
@@ -3616,7 +3848,7 @@ TEST(IsTemplateInstantiation,
   EXPECT_TRUE(matches(
       "template <typename T> class X {};"
       "template <typename T> class X<T*> {}; class A {}; X<A*> x;",
-      recordDecl(hasName("::X"), isTemplateInstantiation())));
+      cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation,
@@ -3627,7 +3859,7 @@ TEST(IsTemplateInstantiation,
       "  template <typename U> class Y { U u; };"
       "  Y<A> y;"
       "};",
-      recordDecl(hasName("::X::Y"), isTemplateInstantiation())));
+      cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
@@ -3640,31 +3872,31 @@ TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
       "  template <typename U> class Y { U u; };"
       "  Y<T> y;"
       "}; X<A> x;",
-      recordDecl(hasName("::X<A>::Y"), unless(isTemplateInstantiation()))));
+      cxxRecordDecl(hasName("::X<A>::Y"), unless(isTemplateInstantiation()))));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchExplicitClassTemplateSpecialization) {
   EXPECT_TRUE(notMatches(
       "template <typename T> class X {}; class A {};"
       "template <> class X<A> {}; X<A> x;",
-      recordDecl(hasName("::X"), isTemplateInstantiation())));
+      cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchNonTemplate) {
   EXPECT_TRUE(notMatches(
       "class A {}; class Y { A a; };",
-      recordDecl(isTemplateInstantiation())));
+      cxxRecordDecl(isTemplateInstantiation())));
 }
 
 TEST(IsInstantiated, MatchesInstantiation) {
   EXPECT_TRUE(
       matches("template<typename T> class A { T i; }; class Y { A<int> a; };",
-              recordDecl(isInstantiated())));
+              cxxRecordDecl(isInstantiated())));
 }
 
 TEST(IsInstantiated, NotMatchesDefinition) {
   EXPECT_TRUE(notMatches("template<typename T> class A { T i; };",
-                         recordDecl(isInstantiated())));
+                         cxxRecordDecl(isInstantiated())));
 }
 
 TEST(IsInTemplateInstantiation, MatchesInstantiationStmt) {
@@ -3716,7 +3948,7 @@ TEST(IsExplicitTemplateSpecialization,
      DoesNotMatchPrimaryTemplate) {
   EXPECT_TRUE(notMatches(
       "template <typename T> class X {};",
-      recordDecl(isExplicitTemplateSpecialization())));
+      cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(notMatches(
       "template <typename T> void f(T t);",
       functionDecl(isExplicitTemplateSpecialization())));
@@ -3727,7 +3959,7 @@ TEST(IsExplicitTemplateSpecialization,
   EXPECT_TRUE(notMatches(
       "template <typename T> class X {};"
       "template class X<int>; extern template class X<long>;",
-      recordDecl(isExplicitTemplateSpecialization())));
+      cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(notMatches(
       "template <typename T> void f(T t) {}"
       "template void f(int t); extern template void f(long t);",
@@ -3738,7 +3970,7 @@ TEST(IsExplicitTemplateSpecialization,
      DoesNotMatchImplicitTemplateInstantiations) {
   EXPECT_TRUE(notMatches(
       "template <typename T> class X {}; X<int> x;",
-      recordDecl(isExplicitTemplateSpecialization())));
+      cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(notMatches(
       "template <typename T> void f(T t); void g() { f(10); }",
       functionDecl(isExplicitTemplateSpecialization())));
@@ -3749,7 +3981,7 @@ TEST(IsExplicitTemplateSpecialization,
   EXPECT_TRUE(matches(
       "template <typename T> class X {};"
       "template<> class X<int> {};",
-      recordDecl(isExplicitTemplateSpecialization())));
+      cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(matches(
       "template <typename T> void f(T t) {}"
       "template<> void f(int t) {}",
@@ -3831,7 +4063,7 @@ TEST(HasAncestor, MatchesInTemplateInstantiations) {
 TEST(HasAncestor, MatchesInImplicitCode) {
   EXPECT_TRUE(matches(
       "struct X {}; struct A { A() {} X x; };",
-      constructorDecl(
+      cxxConstructorDecl(
           hasAnyConstructorInitializer(withInitializer(expr(
               hasAncestor(recordDecl(hasName("A")))))))));
 }
@@ -3854,8 +4086,9 @@ TEST(HasAncestor, MatchesAllAncestors) {
       "void t() { C<int>::f(); }",
       integerLiteral(
           equals(42),
-          allOf(hasAncestor(recordDecl(isTemplateInstantiation())),
-                hasAncestor(recordDecl(unless(isTemplateInstantiation())))))));
+          allOf(
+              hasAncestor(cxxRecordDecl(isTemplateInstantiation())),
+              hasAncestor(cxxRecordDecl(unless(isTemplateInstantiation())))))));
 }
 
 TEST(HasParent, MatchesAllParents) {
@@ -3865,23 +4098,23 @@ TEST(HasParent, MatchesAllParents) {
       integerLiteral(
           equals(42),
           hasParent(compoundStmt(hasParent(functionDecl(
-              hasParent(recordDecl(isTemplateInstantiation())))))))));
-  EXPECT_TRUE(matches(
-      "template <typename T> struct C { static void f() { 42; } };"
-      "void t() { C<int>::f(); }",
-      integerLiteral(
-          equals(42),
-          hasParent(compoundStmt(hasParent(functionDecl(
-              hasParent(recordDecl(unless(isTemplateInstantiation()))))))))));
+              hasParent(cxxRecordDecl(isTemplateInstantiation())))))))));
+  EXPECT_TRUE(
+      matches("template <typename T> struct C { static void f() { 42; } };"
+              "void t() { C<int>::f(); }",
+              integerLiteral(
+                  equals(42),
+                  hasParent(compoundStmt(hasParent(functionDecl(hasParent(
+                      cxxRecordDecl(unless(isTemplateInstantiation()))))))))));
   EXPECT_TRUE(matches(
       "template <typename T> struct C { static void f() { 42; } };"
       "void t() { C<int>::f(); }",
       integerLiteral(equals(42),
-                     hasParent(compoundStmt(allOf(
-                         hasParent(functionDecl(
-                             hasParent(recordDecl(isTemplateInstantiation())))),
-                         hasParent(functionDecl(hasParent(recordDecl(
-                             unless(isTemplateInstantiation())))))))))));
+                     hasParent(compoundStmt(
+                         allOf(hasParent(functionDecl(hasParent(
+                                   cxxRecordDecl(isTemplateInstantiation())))),
+                               hasParent(functionDecl(hasParent(cxxRecordDecl(
+                                   unless(isTemplateInstantiation())))))))))));
   EXPECT_TRUE(
       notMatches("template <typename T> struct C { static void f() {} };"
                  "void t() { C<int>::f(); }",
@@ -3913,9 +4146,16 @@ TEST(TypeMatching, MatchesTypes) {
   EXPECT_TRUE(matches("struct S {};", qualType().bind("loc")));
 }
 
+TEST(TypeMatching, MatchesBool) {
+  EXPECT_TRUE(matches("struct S { bool func(); };",
+                      cxxMethodDecl(returns(booleanType()))));
+  EXPECT_TRUE(notMatches("struct S { void func(); };",
+                         cxxMethodDecl(returns(booleanType()))));
+}
+
 TEST(TypeMatching, MatchesVoid) {
-  EXPECT_TRUE(
-      matches("struct S { void func(); };", methodDecl(returns(voidType()))));
+  EXPECT_TRUE(matches("struct S { void func(); };",
+                      cxxMethodDecl(returns(voidType()))));
 }
 
 TEST(TypeMatching, MatchesArrayTypes) {
@@ -3952,6 +4192,11 @@ TEST(TypeMatching, MatchesArrayTypes) {
   EXPECT_TRUE(matches("const int a = 0;", qualType(isInteger())));
 }
 
+TEST(TypeMatching, DecayedType) {
+  EXPECT_TRUE(matches("void f(int i[]);", valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
+  EXPECT_TRUE(notMatches("int i[7];", decayedType()));
+}
+
 TEST(TypeMatching, MatchesComplexTypes) {
   EXPECT_TRUE(matches("_Complex float f;", complexType()));
   EXPECT_TRUE(matches(
@@ -4247,6 +4492,18 @@ TEST(ElaboratedTypeNarrowing, namesType) {
     elaboratedType(elaboratedType(namesType(typedefType())))));
 }
 
+TEST(TypeMatching, MatchesSubstTemplateTypeParmType) {
+  const std::string code = "template <typename T>"
+                           "int F() {"
+                           "  return 1 + T();"
+                           "}"
+                           "int i = F<int>();";
+  EXPECT_FALSE(matches(code, binaryOperator(hasLHS(
+                                 expr(hasType(substTemplateTypeParmType()))))));
+  EXPECT_TRUE(matches(code, binaryOperator(hasRHS(
+                                expr(hasType(substTemplateTypeParmType()))))));
+}
+
 TEST(NNS, MatchesNestedNameSpecifiers) {
   EXPECT_TRUE(matches("namespace ns { struct A {}; } ns::A a;",
                       nestedNameSpecifier()));
@@ -4254,6 +4511,8 @@ TEST(NNS, MatchesNestedNameSpecifiers) {
                       nestedNameSpecifier()));
   EXPECT_TRUE(matches("struct A { void f(); }; void A::f() {}",
                       nestedNameSpecifier()));
+  EXPECT_TRUE(matches("namespace a { namespace b {} } namespace ab = a::b;",
+                      nestedNameSpecifier()));
 
   EXPECT_TRUE(matches(
     "struct A { static void f() {} }; void g() { A::f(); }",
@@ -4268,6 +4527,16 @@ TEST(NullStatement, SimpleCases) {
   EXPECT_TRUE(notMatches("void f() {int i;}", nullStmt()));
 }
 
+TEST(NS, Anonymous) {
+  EXPECT_TRUE(notMatches("namespace N {}", namespaceDecl(isAnonymous())));
+  EXPECT_TRUE(matches("namespace {}", namespaceDecl(isAnonymous())));
+}
+
+TEST(NS, Alias) {
+  EXPECT_TRUE(matches("namespace test {} namespace alias = ::test;",
+                      namespaceAliasDecl(hasName("alias"))));
+}
+
 TEST(NNS, MatchesTypes) {
   NestedNameSpecifierMatcher Matcher = nestedNameSpecifier(
     specifiesType(hasDeclaration(recordDecl(hasName("A")))));
@@ -4480,6 +4749,16 @@ public:
                              decl(has(decl(equalsNode(TypedNode)))).bind(""))),
                          *Node, Context)) != nullptr;
   }
+  bool verify(const BoundNodes &Nodes, ASTContext &Context, const Type *Node) {
+    // Use the original typed pointer to verify we can pass pointers to subtypes
+    // to equalsNode.
+    const T *TypedNode = cast<T>(Node);
+    const auto *Dec = Nodes.getNodeAs<FieldDecl>("decl");
+    return selectFirst<T>(
+               "", match(fieldDecl(hasParent(decl(has(fieldDecl(
+                             hasType(type(equalsNode(TypedNode)).bind(""))))))),
+                         *Dec, Context)) != nullptr;
+  }
 };
 
 TEST(IsEqualTo, MatchesNodesByIdentity) {
@@ -4489,6 +4768,10 @@ TEST(IsEqualTo, MatchesNodesByIdentity) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "void f() { if (true) if(true) {} }", ifStmt().bind(""),
       new VerifyAncestorHasChildIsEqual<IfStmt>()));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class X { class Y {} y; };",
+      fieldDecl(hasName("y"), hasType(type().bind(""))).bind("decl"),
+      new VerifyAncestorHasChildIsEqual<Type>()));
 }
 
 TEST(MatchFinder, CheckProfiling) {
@@ -4656,12 +4939,12 @@ TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
       "void f(StringRef v) {"
       "  v.data();"
       "}",
-      memberCallExpr(
-          callee(methodDecl(hasName("data"))),
-          on(declRefExpr(to(varDecl(hasType(recordDecl(hasName("StringRef"))))
-                                .bind("var")))),
-          unless(hasAncestor(stmt(hasDescendant(memberCallExpr(
-              callee(methodDecl(anyOf(hasName("size"), hasName("length")))),
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
               on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
           .bind("data"),
       new VerifyIdIsBoundTo<Expr>("data", 1)));
@@ -4672,12 +4955,12 @@ TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
       "  v.data();"
       "  v.size();"
       "}",
-      memberCallExpr(
-          callee(methodDecl(hasName("data"))),
-          on(declRefExpr(to(varDecl(hasType(recordDecl(hasName("StringRef"))))
-                                .bind("var")))),
-          unless(hasAncestor(stmt(hasDescendant(memberCallExpr(
-              callee(methodDecl(anyOf(hasName("size"), hasName("length")))),
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
               on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
           .bind("data")));
 }
@@ -4687,6 +4970,13 @@ TEST(TypeDefDeclMatcher, Match) {
                       typedefDecl(hasName("typedefDeclTest"))));
 }
 
+TEST(IsInlineMatcher, IsInline) {
+  EXPECT_TRUE(matches("void g(); inline void f();",
+                      functionDecl(isInline(), hasName("f"))));
+  EXPECT_TRUE(matches("namespace n { inline namespace m {} }",
+                      namespaceDecl(isInline(), hasName("m"))));
+}
+
 // FIXME: Figure out how to specify paths so the following tests pass on Windows.
 #ifndef LLVM_ON_WIN32
 
@@ -4772,6 +5062,9 @@ TEST(ObjCMessageExprMatcher, SimpleExprs) {
   EXPECT_TRUE(matchesObjC(
       Objc1String,
       objcMessageExpr(hasSelector("contents"), hasUnarySelector())));
+  EXPECT_TRUE(matchesObjC(
+      Objc1String,
+      objcMessageExpr(hasSelector("contents"), numSelectorArgs(0))));
   EXPECT_TRUE(matchesObjC(
       Objc1String,
       objcMessageExpr(matchesSelector("uppercase*"),
diff --git a/unittests/ASTMatchers/ASTMatchersTest.h b/unittests/ASTMatchers/ASTMatchersTest.h
index 403a305db244..68824e61ac69 100644
--- a/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/unittests/ASTMatchers/ASTMatchersTest.h
@@ -119,6 +119,19 @@ testing::AssertionResult matchesObjC(const std::string &Code,
     "", FileContentMappings(), "input.m");
 }
 
+template <typename T>
+testing::AssertionResult matchesC(const std::string &Code, const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, true, "", FileContentMappings(),
+                              "input.c");
+}
+
+template <typename T>
+testing::AssertionResult notMatchesC(const std::string &Code,
+                                     const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, false, "", FileContentMappings(),
+                              "input.c");
+}
+
 template <typename T>
 testing::AssertionResult notMatchesObjC(const std::string &Code,
                                      const T &AMatcher) {
@@ -165,6 +178,7 @@ testing::AssertionResult matchesConditionallyWithCuda(
   Args.push_back("-xcuda");
   Args.push_back("-fno-ms-extensions");
   Args.push_back("--cuda-host-only");
+  Args.push_back("-nocudainc");
   Args.push_back(CompileArg);
   if (!runToolOnCodeWithArgs(Factory->create(),
                              CudaHeader + Code, Args)) {
diff --git a/unittests/ASTMatchers/Dynamic/ParserTest.cpp b/unittests/ASTMatchers/Dynamic/ParserTest.cpp
index 45b0910ab0a2..0de6242a67b0 100644
--- a/unittests/ASTMatchers/Dynamic/ParserTest.cpp
+++ b/unittests/ASTMatchers/Dynamic/ParserTest.cpp
@@ -50,7 +50,7 @@ public:
   }
 
   VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
-                                        const SourceRange &NameRange,
+                                        SourceRange NameRange,
                                         StringRef BindID,
                                         ArrayRef<ParserValue> Args,
                                         Diagnostics *Error) override {
@@ -102,7 +102,7 @@ TEST(ParserTest, ParseString) {
   EXPECT_EQ("1:1: Error parsing string token: <\"Baz>", Sema.Errors[2]);
 }
 
-bool matchesRange(const SourceRange &Range, unsigned StartLine,
+bool matchesRange(SourceRange Range, unsigned StartLine,
                   unsigned EndLine, unsigned StartColumn, unsigned EndColumn) {
   EXPECT_EQ(StartLine, Range.Start.Line);
   EXPECT_EQ(EndLine, Range.End.Line);
@@ -301,12 +301,12 @@ TEST(ParserTest, CompletionNamedValues) {
   EXPECT_EQ("String nameX", Comps[0].MatcherDecl);
 
   // Can complete if there are names in the expression.
-  Code = "methodDecl(hasName(nameX), ";
+  Code = "cxxMethodDecl(hasName(nameX), ";
   Comps = Parser::completeExpression(Code, Code.size(), nullptr, &NamedValues);
   EXPECT_LT(0u, Comps.size());
 
   // Can complete names and registry together.
-  Code = "methodDecl(hasP";
+  Code = "cxxMethodDecl(hasP";
   Comps = Parser::completeExpression(Code, Code.size(), nullptr, &NamedValues);
   ASSERT_EQ(3u, Comps.size());
   EXPECT_EQ("aramA", Comps[0].TypedText);
@@ -318,8 +318,10 @@ TEST(ParserTest, CompletionNamedValues) {
       Comps[1].MatcherDecl);
 
   EXPECT_EQ("arent(", Comps[2].TypedText);
-  EXPECT_EQ("Matcher<Decl> hasParent(Matcher<Decl|Stmt>)",
-            Comps[2].MatcherDecl);
+  EXPECT_EQ(
+      "Matcher<Decl> "
+      "hasParent(Matcher<TemplateArgument|NestedNameSpecifierLoc|Decl|...>)",
+      Comps[2].MatcherDecl);
 }
 
 }  // end anonymous namespace
diff --git a/unittests/ASTMatchers/Dynamic/RegistryTest.cpp b/unittests/ASTMatchers/Dynamic/RegistryTest.cpp
index a6b0a1b41d6b..8e97566f6920 100644
--- a/unittests/ASTMatchers/Dynamic/RegistryTest.cpp
+++ b/unittests/ASTMatchers/Dynamic/RegistryTest.cpp
@@ -129,7 +129,7 @@ TEST_F(RegistryTest, CanConstructNoArgs) {
   Matcher<Stmt> IsArrowValue = constructMatcher(
       "memberExpr", constructMatcher("isArrow")).getTypedMatcher<Stmt>();
   Matcher<Stmt> BoolValue =
-      constructMatcher("boolLiteral").getTypedMatcher<Stmt>();
+      constructMatcher("cxxBoolLiteral").getTypedMatcher<Stmt>();
 
   const std::string ClassSnippet = "struct Foo { int x; };\n"
                                    "Foo *foo = new Foo;\n"
@@ -195,7 +195,7 @@ TEST_F(RegistryTest, OverloadedMatchers) {
       "callExpr",
       constructMatcher(
           "callee",
-          constructMatcher("methodDecl",
+          constructMatcher("cxxMethodDecl",
                            constructMatcher("hasName", StringRef("x")))))
       .getTypedMatcher<Stmt>();
 
@@ -255,11 +255,11 @@ TEST_F(RegistryTest, PolymorphicMatchers) {
   EXPECT_FALSE(matches("void Foo(){};", RecordDecl));
 
   Matcher<Stmt> ConstructExpr = constructMatcher(
-      "constructExpr",
+      "cxxConstructExpr",
       constructMatcher(
           "hasDeclaration",
           constructMatcher(
-              "methodDecl",
+              "cxxMethodDecl",
               constructMatcher(
                   "ofClass", constructMatcher("hasName", StringRef("Foo"))))))
                                     .getTypedMatcher<Stmt>();
@@ -300,7 +300,7 @@ TEST_F(RegistryTest, TypeTraversal) {
 
 TEST_F(RegistryTest, CXXCtorInitializer) {
   Matcher<Decl> CtorDecl = constructMatcher(
-      "constructorDecl",
+      "cxxConstructorDecl",
       constructMatcher(
           "hasAnyConstructorInitializer",
           constructMatcher("forField",
@@ -416,9 +416,10 @@ TEST_F(RegistryTest, Errors) {
             "(Actual = String)",
             Error->toString());
   Error.reset(new Diagnostics());
-  EXPECT_TRUE(constructMatcher("recordDecl", constructMatcher("recordDecl"),
-                               constructMatcher("parameterCountIs", 3),
-                               Error.get()).isNull());
+  EXPECT_TRUE(
+      constructMatcher("cxxRecordDecl", constructMatcher("cxxRecordDecl"),
+                       constructMatcher("parameterCountIs", 3), Error.get())
+          .isNull());
   EXPECT_EQ("Incorrect type for arg 2. (Expected = Matcher<CXXRecordDecl>) != "
             "(Actual = Matcher<FunctionDecl>)",
             Error->toString());
@@ -432,7 +433,7 @@ TEST_F(RegistryTest, Errors) {
       Error->toString());
   Error.reset(new Diagnostics());
   EXPECT_TRUE(constructMatcher(
-      "recordDecl",
+      "cxxRecordDecl",
       constructMatcher("allOf",
                        constructMatcher("isDerivedFrom", StringRef("FOO")),
                        constructMatcher("isArrow")),
@@ -447,15 +448,17 @@ TEST_F(RegistryTest, Completion) {
   CompVector Comps = getCompletions();
   // Overloaded
   EXPECT_TRUE(hasCompletion(
-      Comps, "hasParent(", "Matcher<Decl|Stmt> hasParent(Matcher<Decl|Stmt>)"));
+      Comps, "hasParent(",
+      "Matcher<TemplateArgument|NestedNameSpecifierLoc|Decl|...> "
+      "hasParent(Matcher<TemplateArgument|NestedNameSpecifierLoc|Decl|...>)"));
   // Variadic.
   EXPECT_TRUE(hasCompletion(Comps, "whileStmt(",
                             "Matcher<Stmt> whileStmt(Matcher<WhileStmt>...)"));
   // Polymorphic.
   EXPECT_TRUE(hasCompletion(
       Comps, "hasDescendant(",
-      "Matcher<NestedNameSpecifier|NestedNameSpecifierLoc|QualType|...> "
-      "hasDescendant(Matcher<CXXCtorInitializer|NestedNameSpecifier|"
+      "Matcher<TemplateArgument|NestedNameSpecifier|NestedNameSpecifierLoc|...>"
+      " hasDescendant(Matcher<TemplateArgument|NestedNameSpecifier|"
       "NestedNameSpecifierLoc|...>)"));
 
   CompVector WhileComps = getCompletions("whileStmt", 0);
@@ -463,7 +466,9 @@ TEST_F(RegistryTest, Completion) {
   EXPECT_TRUE(hasCompletion(WhileComps, "hasBody(",
                             "Matcher<WhileStmt> hasBody(Matcher<Stmt>)"));
   EXPECT_TRUE(hasCompletion(WhileComps, "hasParent(",
-                            "Matcher<Stmt> hasParent(Matcher<Decl|Stmt>)"));
+                            "Matcher<Stmt> "
+                            "hasParent(Matcher<TemplateArgument|"
+                            "NestedNameSpecifierLoc|Decl|...>)"));
   EXPECT_TRUE(
       hasCompletion(WhileComps, "allOf(", "Matcher<T> allOf(Matcher<T>...)"));
 
diff --git a/unittests/Basic/SourceManagerTest.cpp b/unittests/Basic/SourceManagerTest.cpp
index 494c27a2f1cd..5a1a393fbf02 100644
--- a/unittests/Basic/SourceManagerTest.cpp
+++ b/unittests/Basic/SourceManagerTest.cpp
@@ -66,7 +66,7 @@ class VoidModuleLoader : public ModuleLoader {
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
     { return nullptr; }
   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; };
+    { return 0; }
 };
 
 TEST_F(SourceManagerTest, isBeforeInTranslationUnit) {
diff --git a/unittests/Basic/VirtualFileSystemTest.cpp b/unittests/Basic/VirtualFileSystemTest.cpp
index 71d2d2b60c04..ac07035d3e1f 100644
--- a/unittests/Basic/VirtualFileSystemTest.cpp
+++ b/unittests/Basic/VirtualFileSystemTest.cpp
@@ -14,11 +14,24 @@
 #include "llvm/Support/SourceMgr.h"
 #include "gtest/gtest.h"
 #include <map>
+
 using namespace clang;
 using namespace llvm;
 using llvm::sys::fs::UniqueID;
 
 namespace {
+struct DummyFile : public vfs::File {
+  vfs::Status S;
+  explicit DummyFile(vfs::Status S) : S(S) {}
+  llvm::ErrorOr<vfs::Status> status() override { return S; }
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
+  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
+            bool IsVolatile) override {
+    llvm_unreachable("unimplemented");
+  }
+  virtual std::error_code close() override { return std::error_code(); }
+};
+
 class DummyFileSystem : public vfs::FileSystem {
   int FSID;   // used to produce UniqueIDs
   int FileID; // used to produce UniqueIDs
@@ -41,7 +54,16 @@ public:
   }
   ErrorOr<std::unique_ptr<vfs::File>>
   openFileForRead(const Twine &Path) override {
-    llvm_unreachable("unimplemented");
+    auto S = status(Path);
+    if (S)
+      return std::unique_ptr<vfs::File>(new DummyFile{*S});
+    return S.getError();
+  }
+  llvm::ErrorOr<std::string> getCurrentWorkingDirectory() const override {
+    return std::string();
+  }
+  std::error_code setCurrentWorkingDirectory(const Twine &Path) override {
+    return std::error_code();
   }
 
   struct DirIterImpl : public clang::vfs::detail::DirIterImpl {
@@ -92,20 +114,20 @@ public:
   }
 
   void addRegularFile(StringRef Path, sys::fs::perms Perms = sys::fs::all_all) {
-    vfs::Status S(Path, Path, UniqueID(FSID, FileID++), sys::TimeValue::now(),
-                  0, 0, 1024, sys::fs::file_type::regular_file, Perms);
+    vfs::Status S(Path, UniqueID(FSID, FileID++), sys::TimeValue::now(), 0, 0,
+                  1024, sys::fs::file_type::regular_file, Perms);
     addEntry(Path, S);
   }
 
   void addDirectory(StringRef Path, sys::fs::perms Perms = sys::fs::all_all) {
-    vfs::Status S(Path, Path, UniqueID(FSID, FileID++), sys::TimeValue::now(),
-                  0, 0, 0, sys::fs::file_type::directory_file, Perms);
+    vfs::Status S(Path, UniqueID(FSID, FileID++), sys::TimeValue::now(), 0, 0,
+                  0, sys::fs::file_type::directory_file, Perms);
     addEntry(Path, S);
   }
 
   void addSymlink(StringRef Path) {
-    vfs::Status S(Path, Path, UniqueID(FSID, FileID++), sys::TimeValue::now(),
-                  0, 0, 0, sys::fs::file_type::symlink_file, sys::fs::all_all);
+    vfs::Status S(Path, UniqueID(FSID, FileID++), sys::TimeValue::now(), 0, 0,
+                  0, sys::fs::file_type::symlink_file, sys::fs::all_all);
     addEntry(Path, S);
   }
 };
@@ -279,7 +301,7 @@ struct ScopedDir {
   }
   operator StringRef() { return Path.str(); }
 };
-}
+} // end anonymous namespace
 
 TEST(VirtualFileSystemTest, BasicRealFSIteration) {
   ScopedDir TestDirectory("virtual-file-system-test", /*Unique*/true);
@@ -515,6 +537,128 @@ TEST(VirtualFileSystemTest, HiddenInIteration) {
   }
 }
 
+class InMemoryFileSystemTest : public ::testing::Test {
+protected:
+  clang::vfs::InMemoryFileSystem FS;
+  clang::vfs::InMemoryFileSystem NormalizedFS;
+
+  InMemoryFileSystemTest()
+      : FS(/*UseNormalizedPaths=*/false),
+        NormalizedFS(/*UseNormalizedPaths=*/true) {}
+};
+
+TEST_F(InMemoryFileSystemTest, IsEmpty) {
+  auto Stat = FS.status("/a");
+  ASSERT_EQ(Stat.getError(),errc::no_such_file_or_directory) << FS.toString();
+  Stat = FS.status("/");
+  ASSERT_EQ(Stat.getError(), errc::no_such_file_or_directory) << FS.toString();
+}
+
+TEST_F(InMemoryFileSystemTest, WindowsPath) {
+  FS.addFile("c:/windows/system128/foo.cpp", 0, MemoryBuffer::getMemBuffer(""));
+  auto Stat = FS.status("c:");
+#if !defined(_WIN32)
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << FS.toString();
+#endif
+  Stat = FS.status("c:/windows/system128/foo.cpp");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << FS.toString();
+  FS.addFile("d:/windows/foo.cpp", 0, MemoryBuffer::getMemBuffer(""));
+  Stat = FS.status("d:/windows/foo.cpp");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << FS.toString();
+}
+
+TEST_F(InMemoryFileSystemTest, OverlayFile) {
+  FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a"));
+  NormalizedFS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a"));
+  auto Stat = FS.status("/");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << FS.toString();
+  Stat = FS.status("/.");
+  ASSERT_FALSE(Stat);
+  Stat = NormalizedFS.status("/.");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << FS.toString();
+  Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_EQ("/a", Stat->getName());
+}
+
+TEST_F(InMemoryFileSystemTest, OverlayFileNoOwn) {
+  auto Buf = MemoryBuffer::getMemBuffer("a");
+  FS.addFileNoOwn("/a", 0, Buf.get());
+  auto Stat = FS.status("/a");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_EQ("/a", Stat->getName());
+}
+
+TEST_F(InMemoryFileSystemTest, OpenFileForRead) {
+  FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a"));
+  FS.addFile("././c", 0, MemoryBuffer::getMemBuffer("c"));
+  FS.addFile("./d/../d", 0, MemoryBuffer::getMemBuffer("d"));
+  NormalizedFS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a"));
+  NormalizedFS.addFile("././c", 0, MemoryBuffer::getMemBuffer("c"));
+  NormalizedFS.addFile("./d/../d", 0, MemoryBuffer::getMemBuffer("d"));
+  auto File = FS.openFileForRead("/a");
+  ASSERT_EQ("a", (*(*File)->getBuffer("ignored"))->getBuffer());
+  File = FS.openFileForRead("/a"); // Open again.
+  ASSERT_EQ("a", (*(*File)->getBuffer("ignored"))->getBuffer());
+  File = NormalizedFS.openFileForRead("/././a"); // Open again.
+  ASSERT_EQ("a", (*(*File)->getBuffer("ignored"))->getBuffer());
+  File = FS.openFileForRead("/");
+  ASSERT_EQ(File.getError(), errc::invalid_argument) << FS.toString();
+  File = FS.openFileForRead("/b");
+  ASSERT_EQ(File.getError(), errc::no_such_file_or_directory) << FS.toString();
+  File = FS.openFileForRead("./c");
+  ASSERT_FALSE(File);
+  File = FS.openFileForRead("e/../d");
+  ASSERT_FALSE(File);
+  File = NormalizedFS.openFileForRead("./c");
+  ASSERT_EQ("c", (*(*File)->getBuffer("ignored"))->getBuffer());
+  File = NormalizedFS.openFileForRead("e/../d");
+  ASSERT_EQ("d", (*(*File)->getBuffer("ignored"))->getBuffer());
+}
+
+TEST_F(InMemoryFileSystemTest, DuplicatedFile) {
+  ASSERT_TRUE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a")));
+  ASSERT_FALSE(FS.addFile("/a/b", 0, MemoryBuffer::getMemBuffer("a")));
+  ASSERT_TRUE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("a")));
+  ASSERT_FALSE(FS.addFile("/a", 0, MemoryBuffer::getMemBuffer("b")));
+}
+
+TEST_F(InMemoryFileSystemTest, DirectoryIteration) {
+  FS.addFile("/a", 0, MemoryBuffer::getMemBuffer(""));
+  FS.addFile("/b/c", 0, MemoryBuffer::getMemBuffer(""));
+
+  std::error_code EC;
+  vfs::directory_iterator I = FS.dir_begin("/", EC);
+  ASSERT_FALSE(EC);
+  ASSERT_EQ("/a", I->getName());
+  I.increment(EC);
+  ASSERT_FALSE(EC);
+  ASSERT_EQ("/b", I->getName());
+  I.increment(EC);
+  ASSERT_FALSE(EC);
+  ASSERT_EQ(vfs::directory_iterator(), I);
+
+  I = FS.dir_begin("/b", EC);
+  ASSERT_FALSE(EC);
+  ASSERT_EQ("/b/c", I->getName());
+  I.increment(EC);
+  ASSERT_FALSE(EC);
+  ASSERT_EQ(vfs::directory_iterator(), I);
+}
+
+TEST_F(InMemoryFileSystemTest, WorkingDirectory) {
+  FS.setCurrentWorkingDirectory("/b");
+  FS.addFile("c", 0, MemoryBuffer::getMemBuffer(""));
+
+  auto Stat = FS.status("/b/c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+  ASSERT_EQ("c", Stat->getName());
+  ASSERT_EQ("/b", *FS.getCurrentWorkingDirectory());
+
+  Stat = FS.status("c");
+  ASSERT_FALSE(Stat.getError()) << Stat.getError() << "\n" << FS.toString();
+}
+
 // NOTE: in the tests below, we use '//root/' as our root directory, since it is
 // a legal *absolute* path on Windows as well as *nix.
 class VFSFromYAMLTest : public ::testing::Test {
@@ -589,10 +733,20 @@ TEST_F(VFSFromYAMLTest, MappedFiles) {
   ErrorOr<vfs::Status> S = O->status("//root/file1");
   ASSERT_FALSE(S.getError());
   EXPECT_EQ("//root/foo/bar/a", S->getName());
+  EXPECT_TRUE(S->IsVFSMapped);
 
   ErrorOr<vfs::Status> SLower = O->status("//root/foo/bar/a");
   EXPECT_EQ("//root/foo/bar/a", SLower->getName());
   EXPECT_TRUE(S->equivalent(*SLower));
+  EXPECT_FALSE(SLower->IsVFSMapped);
+
+  // file after opening
+  auto OpenedF = O->openFileForRead("//root/file1");
+  ASSERT_FALSE(OpenedF.getError());
+  auto OpenedS = (*OpenedF)->status();
+  ASSERT_FALSE(OpenedS.getError());
+  EXPECT_EQ("//root/foo/bar/a", OpenedS->getName());
+  EXPECT_TRUE(OpenedS->IsVFSMapped);
 
   // directory
   S = O->status("//root/");
@@ -906,7 +1060,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIteration) {
                     "]\n"
                     "}",
                     Lower);
-  ASSERT_TRUE(FS.get() != NULL);
+  ASSERT_TRUE(FS.get() != nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 6636d82db74d..b85ec7e6dfa0 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -25,6 +25,6 @@ add_subdirectory(Sema)
 add_subdirectory(CodeGen)
 # FIXME: libclang unit tests are disabled on Windows due
 # to failures, mostly in libclang.VirtualFileOverlay_*.
-if(NOT WIN32) 
+if(NOT WIN32 AND CLANG_TOOL_LIBCLANG_BUILD) 
   add_subdirectory(libclang)
 endif()
diff --git a/unittests/CodeGen/BufferSourceTest.cpp b/unittests/CodeGen/BufferSourceTest.cpp
index e80e83bd2887..85df768aa1b4 100644
--- a/unittests/CodeGen/BufferSourceTest.cpp
+++ b/unittests/CodeGen/BufferSourceTest.cpp
@@ -67,7 +67,7 @@ TEST(BufferSourceTest, EmitCXXGlobalInitFunc) {
             compiler.getCodeGenOpts(),
             llvm::getGlobalContext())));
 
-    compiler.createSema(clang::TU_Prefix,NULL);
+    compiler.createSema(clang::TU_Prefix, nullptr);
 
     clang::SourceManager &sm = compiler.getSourceManager();
     sm.setMainFileID(sm.createFileID(
@@ -76,4 +76,4 @@ TEST(BufferSourceTest, EmitCXXGlobalInitFunc) {
     clang::ParseAST(compiler.getSema(), false, false);
 }
 
-}
+} // end anonymous namespace
diff --git a/unittests/Driver/CMakeLists.txt b/unittests/Driver/CMakeLists.txt
index 8cc963b33a21..2df1eb24fd44 100644
--- a/unittests/Driver/CMakeLists.txt
+++ b/unittests/Driver/CMakeLists.txt
@@ -3,9 +3,11 @@ set(LLVM_LINK_COMPONENTS
   )
 
 add_clang_unittest(ClangDriverTests
+  ToolChainTest.cpp
   MultilibTest.cpp
   )
 
 target_link_libraries(ClangDriverTests
   clangDriver
+  clangBasic
   )
diff --git a/unittests/Driver/ToolChainTest.cpp b/unittests/Driver/ToolChainTest.cpp
new file mode 100644
index 000000000000..ef21e2d17c6c
--- /dev/null
+++ b/unittests/Driver/ToolChainTest.cpp
@@ -0,0 +1,120 @@
+//===- unittests/Driver/ToolChainTest.cpp --- ToolChain tests -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unit tests for ToolChains.
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Driver/ToolChain.h"
+#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Driver/Compilation.h"
+#include "clang/Driver/Driver.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+using namespace clang;
+using namespace clang::driver;
+
+namespace {
+
+TEST(ToolChainTest, VFSGCCInstallation) {
+  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  struct TestDiagnosticConsumer : public DiagnosticConsumer {};
+  DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer);
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  Driver TheDriver("/bin/clang", "arm-linux-gnueabihf", Diags,
+                   InMemoryFileSystem);
+
+  const char *EmptyFiles[] = {
+      "foo.cpp",
+      "/bin/clang",
+      "/usr/lib/gcc/arm-linux-gnueabi/4.6.1/crtbegin.o",
+      "/usr/lib/gcc/arm-linux-gnueabi/4.6.1/crtend.o",
+      "/usr/lib/gcc/arm-linux-gnueabihf/4.6.3/crtbegin.o",
+      "/usr/lib/gcc/arm-linux-gnueabihf/4.6.3/crtend.o",
+      "/usr/lib/arm-linux-gnueabi/crt1.o",
+      "/usr/lib/arm-linux-gnueabi/crti.o",
+      "/usr/lib/arm-linux-gnueabi/crtn.o",
+      "/usr/lib/arm-linux-gnueabihf/crt1.o",
+      "/usr/lib/arm-linux-gnueabihf/crti.o",
+      "/usr/lib/arm-linux-gnueabihf/crtn.o",
+      "/usr/include/arm-linux-gnueabi/.keep",
+      "/usr/include/arm-linux-gnueabihf/.keep",
+      "/lib/arm-linux-gnueabi/.keep",
+      "/lib/arm-linux-gnueabihf/.keep"};
+
+  for (const char *Path : EmptyFiles)
+    InMemoryFileSystem->addFile(Path, 0,
+                                llvm::MemoryBuffer::getMemBuffer("\n"));
+
+  std::unique_ptr<Compilation> C(TheDriver.BuildCompilation(
+      {"-fsyntax-only", "--gcc-toolchain=", "foo.cpp"}));
+
+  std::string S;
+  {
+    llvm::raw_string_ostream OS(S);
+    C->getDefaultToolChain().printVerboseInfo(OS);
+  }
+#if LLVM_ON_WIN32
+  std::replace(S.begin(), S.end(), '\\', '/');
+#endif
+  EXPECT_EQ(
+      "Found candidate GCC installation: "
+      "/usr/lib/gcc/arm-linux-gnueabihf/4.6.3\n"
+      "Selected GCC installation: /usr/lib/gcc/arm-linux-gnueabihf/4.6.3\n"
+      "Candidate multilib: .;@m32\n"
+      "Selected multilib: .;@m32\n",
+      S);
+}
+
+TEST(ToolChainTest, VFSGCCInstallationRelativeDir) {
+  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+  struct TestDiagnosticConsumer : public DiagnosticConsumer {};
+  DiagnosticsEngine Diags(DiagID, &*DiagOpts, new TestDiagnosticConsumer);
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  Driver TheDriver("/home/test/bin/clang", "arm-linux-gnueabi", Diags,
+                   InMemoryFileSystem);
+
+  const char *EmptyFiles[] = {
+      "foo.cpp", "/home/test/lib/gcc/arm-linux-gnueabi/4.6.1/crtbegin.o",
+      "/home/test/include/arm-linux-gnueabi/.keep"};
+
+  for (const char *Path : EmptyFiles)
+    InMemoryFileSystem->addFile(Path, 0,
+                                llvm::MemoryBuffer::getMemBuffer("\n"));
+
+  std::unique_ptr<Compilation> C(TheDriver.BuildCompilation(
+      {"-fsyntax-only", "--gcc-toolchain=", "foo.cpp"}));
+
+  std::string S;
+  {
+    llvm::raw_string_ostream OS(S);
+    C->getDefaultToolChain().printVerboseInfo(OS);
+  }
+#if LLVM_ON_WIN32
+  std::replace(S.begin(), S.end(), '\\', '/');
+#endif
+  EXPECT_EQ("Found candidate GCC installation: "
+            "/home/test/lib/gcc/arm-linux-gnueabi/4.6.1\n"
+            "Selected GCC installation: "
+            "/home/test/bin/../lib/gcc/arm-linux-gnueabi/4.6.1\n"
+            "Candidate multilib: .;@m32\n"
+            "Selected multilib: .;@m32\n",
+            S);
+}
+
+} // end anonymous namespace
diff --git a/unittests/Format/CMakeLists.txt b/unittests/Format/CMakeLists.txt
index 6d48cf871335..01af435fffce 100644
--- a/unittests/Format/CMakeLists.txt
+++ b/unittests/Format/CMakeLists.txt
@@ -8,6 +8,7 @@ add_clang_unittest(FormatTests
   FormatTestJS.cpp
   FormatTestProto.cpp
   FormatTestSelective.cpp
+  SortIncludesTest.cpp
   )
 
 target_link_libraries(FormatTests
diff --git a/unittests/Format/FormatTest.cpp b/unittests/Format/FormatTest.cpp
index d61dc7f662c2..6a0506d89732 100644
--- a/unittests/Format/FormatTest.cpp
+++ b/unittests/Format/FormatTest.cpp
@@ -313,7 +313,7 @@ TEST_F(FormatTest, FormatIfWithoutCompoundStatement) {
                "  f();\n"
                "}",
                AllowsMergedIf);
-  verifyFormat("if (a) {/* Never merge this */\n"
+  verifyFormat("if (a) { /* Never merge this */\n"
                "  f();\n"
                "}",
                AllowsMergedIf);
@@ -756,6 +756,9 @@ TEST_F(FormatTest, ShortCaseLabels) {
                "case 7:\n"
                "  // comment\n"
                "  return;\n"
+               "case 8:\n"
+               "  x = 8; // comment\n"
+               "  break;\n"
                "default: y = 1; break;\n"
                "}",
                Style);
@@ -1194,6 +1197,13 @@ TEST_F(FormatTest, AlignsBlockComments) {
                    "comment */"));
 }
 
+TEST_F(FormatTest, CommentReflowingCanBeTurnedOff) {
+  FormatStyle Style = getLLVMStyleWithColumns(20);
+  Style.ReflowComments = false;
+  verifyFormat("// aaaaaaaaa aaaaaaaaaa aaaaaaaaaa", Style);
+  verifyFormat("/* aaaaaaaaa aaaaaaaaaa aaaaaaaaaa */", Style);
+}
+
 TEST_F(FormatTest, CorrectlyHandlesLengthOfBlockComments) {
   EXPECT_EQ("double *x; /* aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
             "              aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa */",
@@ -1864,11 +1874,21 @@ TEST_F(FormatTest, UnderstandsAccessSpecifiers) {
                      "};");
   verifyFormat("class A {\n"
                "public slots:\n"
-               "  void f() {}\n"
+               "  void f1() {}\n"
                "public Q_SLOTS:\n"
-               "  void f() {}\n"
+               "  void f2() {}\n"
+               "protected slots:\n"
+               "  void f3() {}\n"
+               "protected Q_SLOTS:\n"
+               "  void f4() {}\n"
+               "private slots:\n"
+               "  void f5() {}\n"
+               "private Q_SLOTS:\n"
+               "  void f6() {}\n"
                "signals:\n"
-               "  void g();\n"
+               "  void g1();\n"
+               "Q_SIGNALS:\n"
+               "  void g2();\n"
                "};");
 
   // Don't interpret 'signals' the wrong way.
@@ -2189,6 +2209,13 @@ TEST_F(FormatTest, FormatsNamespaces) {
                    "}    // my_namespace\n"
                    "#endif    // HEADER_GUARD"));
 
+  EXPECT_EQ("namespace A::B {\n"
+            "class C {};\n"
+            "}",
+            format("namespace A::B {\n"
+                   "class C {};\n"
+                   "}"));
+
   FormatStyle Style = getLLVMStyle();
   Style.NamespaceIndentation = FormatStyle::NI_All;
   EXPECT_EQ("namespace out {\n"
@@ -2329,13 +2356,16 @@ TEST_F(FormatTest, IncompleteTryCatchBlocks) {
 
 TEST_F(FormatTest, FormatTryCatchBraceStyles) {
   FormatStyle Style = getLLVMStyle();
-  Style.BreakBeforeBraces = FormatStyle::BS_Attach;
-  verifyFormat("try {\n"
-               "  // something\n"
-               "} catch (...) {\n"
-               "  // something\n"
-               "}",
-               Style);
+  for (auto BraceStyle : {FormatStyle::BS_Attach, FormatStyle::BS_Mozilla,
+                          FormatStyle::BS_WebKit}) {
+    Style.BreakBeforeBraces = BraceStyle;
+    verifyFormat("try {\n"
+                 "  // something\n"
+                 "} catch (...) {\n"
+                 "  // something\n"
+                 "}",
+                 Style);
+  }
   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
   verifyFormat("try {\n"
                "  // something\n"
@@ -2378,6 +2408,15 @@ TEST_F(FormatTest, FormatTryCatchBraceStyles) {
                "    // something\n"
                "  }",
                Style);
+  Style.BreakBeforeBraces = FormatStyle::BS_Custom;
+  Style.BraceWrapping.BeforeCatch = true;
+  verifyFormat("try {\n"
+               "  // something\n"
+               "}\n"
+               "catch (...) {\n"
+               "  // something\n"
+               "}",
+               Style);
 }
 
 TEST_F(FormatTest, FormatObjCTryCatch) {
@@ -2905,6 +2944,8 @@ TEST_F(FormatTest, MacroCallsWithoutTrailingSemicolon) {
                    "  EXCLUSIVE_LOCK_FUNCTION(mu_);\n"
                    "};",
                    getLLVMStyleWithColumns(40)));
+
+  verifyFormat("MACRO(>)");
 }
 
 TEST_F(FormatTest, LayoutMacroDefinitionsStatementsSpanningBlocks) {
@@ -3073,10 +3114,12 @@ TEST_F(FormatTest, LayoutBlockInsideParens) {
                "  int i;\n"
                "  int j;\n"
                "});");
-  verifyFormat("functionCall({\n"
-               "  int i;\n"
-               "  int j;\n"
-               "}, aaaa, bbbb, cccc);");
+  verifyFormat("functionCall(\n"
+               "    {\n"
+               "      int i;\n"
+               "      int j;\n"
+               "    },\n"
+               "    aaaa, bbbb, cccc);");
   verifyFormat("functionA(functionB({\n"
                "            int i;\n"
                "            int j;\n"
@@ -3183,9 +3226,11 @@ TEST_F(FormatTest, LayoutNestedBlocks) {
                      "});");
   FormatStyle Style = getGoogleStyle();
   Style.ColumnLimit = 45;
-  verifyFormat("Debug(aaaaa, {\n"
-               "  if (aaaaaaaaaaaaaaaaaaaaaaaa) return;\n"
-               "}, a);",
+  verifyFormat("Debug(aaaaa,\n"
+               "      {\n"
+               "        if (aaaaaaaaaaaaaaaaaaaaaaaa) return;\n"
+               "      },\n"
+               "      a);",
                Style);
 
   verifyFormat("SomeFunction({MACRO({ return output; }), b});");
@@ -3296,7 +3341,7 @@ TEST_F(FormatTest, LineBreakingInBinaryExpressions) {
   verifyFormat("aaaaaa = aaaaaaa(aaaaaaa, // break\n"
                "                 aaaaaa) >>\n"
                "         bbbbbb;");
-  verifyFormat("Whitespaces.addUntouchableComment(\n"
+  verifyFormat("aa = Whitespaces.addUntouchableComment(\n"
                "    SourceMgr.getSpellingColumnNumber(\n"
                "        TheLine.Last->FormatTok.Tok.getLocation()) -\n"
                "    1);");
@@ -3484,7 +3529,7 @@ TEST_F(FormatTest, NoOperandAlignment) {
                "        * cccccccccccccccccccccccccccccccccccc;",
                Style);
 
-  Style.AlignAfterOpenBracket = false;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
   verifyFormat("return (a > b\n"
                "    // comment1\n"
                "    // comment2\n"
@@ -3512,6 +3557,10 @@ TEST_F(FormatTest, ConstructorInitializers) {
                "    : Inttializer(FitsOnTheLine) {}",
                getLLVMStyleWithColumns(43));
 
+  verifyFormat("template <typename T>\n"
+               "Constructor() : Initializer(FitsOnTheLine) {}",
+               getLLVMStyleWithColumns(45));
+
   verifyFormat(
       "SomeClass::Constructor()\n"
       "    : aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaaaa(aaaaaaaaaaaa) {}");
@@ -3524,6 +3573,9 @@ TEST_F(FormatTest, ConstructorInitializers) {
       "SomeClass::Constructor()\n"
       "    : aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa),\n"
       "      aaaaaaaaaaaaaaa(aaaaaaaaaaaa) {}");
+  verifyFormat("Constructor(aaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "            aaaaaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)\n"
+               "    : aaaaaaaaaa(aaaaaa) {}");
 
   verifyFormat("Constructor()\n"
                "    : aaaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaa),\n"
@@ -3581,6 +3633,11 @@ TEST_F(FormatTest, ConstructorInitializers) {
                "    : aaaaa(aaaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaa,\n"
                "            aaaaaaaaaaaaaaaaaaaaaa) {}",
                OnePerLine);
+  OnePerLine.ColumnLimit = 60;
+  verifyFormat("Constructor()\n"
+               "    : aaaaaaaaaaaaaaaaaaaa(a),\n"
+               "      bbbbbbbbbbbbbbbbbbbbbbbb(b) {}",
+               OnePerLine);
 
   EXPECT_EQ("Constructor()\n"
             "    : // Comment forcing unwanted break.\n"
@@ -3892,6 +3949,8 @@ TEST_F(FormatTest, BreaksFunctionDeclarationsWithTrailingTokens) {
 }
 
 TEST_F(FormatTest, FunctionAnnotations) {
+  verifyFormat("DEPRECATED(\"Use NewClass::NewFunction instead.\")\n"
+               "int OldFunction(const string &parameter) {}");
   verifyFormat("DEPRECATED(\"Use NewClass::NewFunction instead.\")\n"
                "string OldFunction(const string &parameter) {}");
   verifyFormat("template <typename T>\n"
@@ -4038,7 +4097,8 @@ TEST_F(FormatTest, FormatsOneParameterPerLineIfNecessary) {
 
   verifyFormat("std::vector<aaaaaaaaaaaaaaaaaaaaaaa,\n"
                "            aaaaaaaaaaaaaaaaaaaaaaa,\n"
-               "            aaaaaaaaaaaaaaaaaaaaaaa> aaaaaaaaaaaaaaaaaa;",
+               "            aaaaaaaaaaaaaaaaaaaaaaa>\n"
+               "    aaaaaaaaaaaaaaaaaa;",
                NoBinPacking);
   verifyFormat("a(\"a\"\n"
                "  \"a\",\n"
@@ -4097,9 +4157,13 @@ TEST_F(FormatTest, FormatsBuilderPattern) {
   verifyFormat("return aaaaaaaaaaaaaaaaa->aaaaa().aaaaaaaaaaaaa().aaaaaa() <\n"
                "       aaaaaaaaaaaaaaa->aaaaa().aaaaaaaaaaaaa().aaaaaa();");
   verifyFormat(
-      "aaaaaaa->aaaaaaa->aaaaaaaaaaaaaaaa(\n"
+      "aaaaaaa->aaaaaaa->aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
       "                    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)\n"
       "    ->aaaaaaaa(aaaaaaaaaaaaaaa);");
+  verifyFormat(
+      "aaaaaaa->aaaaaaa\n"
+      "    ->aaaaaaaaaaaaaaaaaaaaaaa(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)\n"
+      "    ->aaaaaaaa(aaaaaaaaaaaaaaa);");
   verifyFormat(
       "aaaaaaaaaaaaaaaaaaa()->aaaaaa(bbbbb)->aaaaaaaaaaaaaaaaaaa( // break\n"
       "    aaaaaaaaaaaaaa);");
@@ -4164,6 +4228,12 @@ TEST_F(FormatTest, FormatsBuilderPattern) {
   // Prefer not to break after empty parentheses.
   verifyFormat("FirstToken->WhitespaceRange.getBegin().getLocWithOffset(\n"
                "    First->LastNewlineOffset);");
+
+  // Prefer not to create "hanging" indents.
+  verifyFormat(
+      "return !soooooooooooooome_map\n"
+      "            .insert(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa)\n"
+      "            .second;");
 }
 
 TEST_F(FormatTest, BreaksAccordingToOperatorPrecedence) {
@@ -4279,7 +4349,7 @@ TEST_F(FormatTest, AlignsAfterOpenBracket) {
       "SomeLongVariableName->someFunction(foooooooo(aaaaaaaaaaaaaaa,\n"
       "                                             aaaaaaaaaaaaaaaaaaaaa));");
   FormatStyle Style = getLLVMStyle();
-  Style.AlignAfterOpenBracket = false;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
   verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
                "    aaaaaaaaaaa aaaaaaaa, aaaaaaaaa aaaaaaa) {}",
                Style);
@@ -4301,6 +4371,26 @@ TEST_F(FormatTest, AlignsAfterOpenBracket) {
       "SomeLongVariableName->someFunction(foooooooo(aaaaaaaaaaaaaaa,\n"
       "    aaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa));",
       Style);
+
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+  Style.BinPackArguments = false;
+  Style.BinPackParameters = false;
+  verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n"
+               "    aaaaaaaaaaa aaaaaaaa,\n"
+               "    aaaaaaaaa aaaaaaa,\n"
+               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) {}",
+               Style);
+  verifyFormat("SomeLongVariableName->someVeryLongFunctionName(\n"
+               "    aaaaaaaaaaa aaaaaaaaa,\n"
+               "    aaaaaaaaaaa aaaaaaaaa,\n"
+               "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);",
+               Style);
+  verifyFormat("SomeLongVariableName->someFunction(\n"
+               "    foooooooo(\n"
+               "        aaaaaaaaaaaaaaa,\n"
+               "        aaaaaaaaaaaaaaaaaaaaa,\n"
+               "        aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa));",
+               Style);
 }
 
 TEST_F(FormatTest, ParenthesesAndOperandAlignment) {
@@ -4308,17 +4398,17 @@ TEST_F(FormatTest, ParenthesesAndOperandAlignment) {
   verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n"
                "          bbbbbbbbbbbbbbbbbbbbbb);",
                Style);
-  Style.AlignAfterOpenBracket = true;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_Align;
   Style.AlignOperands = false;
   verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n"
                "          bbbbbbbbbbbbbbbbbbbbbb);",
                Style);
-  Style.AlignAfterOpenBracket = false;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
   Style.AlignOperands = true;
   verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n"
                "          bbbbbbbbbbbbbbbbbbbbbb);",
                Style);
-  Style.AlignAfterOpenBracket = false;
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign;
   Style.AlignOperands = false;
   verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n"
                "    bbbbbbbbbbbbbbbbbbbbbb);",
@@ -4642,28 +4732,82 @@ TEST_F(FormatTest, AlignsStringLiterals) {
                "              \"c\";");
 }
 
-TEST_F(FormatTest, DefinitionReturnTypeBreakingStyle) {
+TEST_F(FormatTest, ReturnTypeBreakingStyle) {
   FormatStyle Style = getLLVMStyle();
-  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_TopLevel;
-  verifyFormat("class C {\n"
+  // No declarations or definitions should be moved to own line.
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None;
+  verifyFormat("class A {\n"
                "  int f() { return 1; }\n"
+               "  int g();\n"
                "};\n"
-               "int\n"
-               "f() {\n"
-               "  return 1;\n"
-               "}",
+               "int f() { return 1; }\n"
+               "int g();\n",
                Style);
-  Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
-  verifyFormat("class C {\n"
+
+  // All declarations and definitions should have the return type moved to its
+  // own
+  // line.
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
+  verifyFormat("class E {\n"
                "  int\n"
                "  f() {\n"
                "    return 1;\n"
                "  }\n"
+               "  int\n"
+               "  g();\n"
                "};\n"
                "int\n"
                "f() {\n"
                "  return 1;\n"
-               "}",
+               "}\n"
+               "int\n"
+               "g();\n",
+               Style);
+
+  // Top-level definitions, and no kinds of declarations should have the
+  // return type moved to its own line.
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevelDefinitions;
+  verifyFormat("class B {\n"
+               "  int f() { return 1; }\n"
+               "  int g();\n"
+               "};\n"
+               "int\n"
+               "f() {\n"
+               "  return 1;\n"
+               "}\n"
+               "int g();\n",
+               Style);
+
+  // Top-level definitions and declarations should have the return type moved
+  // to its own line.
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel;
+  verifyFormat("class C {\n"
+               "  int f() { return 1; }\n"
+               "  int g();\n"
+               "};\n"
+               "int\n"
+               "f() {\n"
+               "  return 1;\n"
+               "}\n"
+               "int\n"
+               "g();\n",
+               Style);
+
+  // All definitions should have the return type moved to its own line, but no
+  // kinds of declarations.
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
+  verifyFormat("class D {\n"
+               "  int\n"
+               "  f() {\n"
+               "    return 1;\n"
+               "  }\n"
+               "  int g();\n"
+               "};\n"
+               "int\n"
+               "f() {\n"
+               "  return 1;\n"
+               "}\n"
+               "int g();\n",
                Style);
   verifyFormat("const char *\n"
                "f(void) {\n" // Break here.
@@ -4678,6 +4822,32 @@ TEST_F(FormatTest, DefinitionReturnTypeBreakingStyle) {
                "}\n"
                "template <class T> T *f(T &c);\n", // No break here.
                Style);
+  verifyFormat("class C {\n"
+               "  int\n"
+               "  operator+() {\n"
+               "    return 1;\n"
+               "  }\n"
+               "  int\n"
+               "  operator()() {\n"
+               "    return 1;\n"
+               "  }\n"
+               "};\n",
+               Style);
+  verifyFormat("void\n"
+               "A::operator()() {}\n"
+               "void\n"
+               "A::operator>>() {}\n"
+               "void\n"
+               "A::operator+() {}\n",
+               Style);
+  verifyFormat("void *operator new(std::size_t s);", // No break here.
+               Style);
+  verifyFormat("void *\n"
+               "operator new(std::size_t s) {}",
+               Style);
+  verifyFormat("void *\n"
+               "operator delete[](void *ptr) {}",
+               Style);
   Style.BreakBeforeBraces = FormatStyle::BS_Stroustrup;
   verifyFormat("const char *\n"
                "f(void)\n" // Break here.
@@ -5135,6 +5305,8 @@ TEST_F(FormatTest, UnderstandsTemplateParameters) {
   EXPECT_EQ("A<A<A<A>>> a;", format("A<A<A<A>> > a;", getGoogleStyle()));
   EXPECT_EQ("A<::A<int>> a;", format("A< ::A<int>> a;", getGoogleStyle()));
   EXPECT_EQ("A<::A<int>> a;", format("A<::A<int> > a;", getGoogleStyle()));
+  EXPECT_EQ("auto x = [] { A<A<A<A>>> a; };",
+            format("auto x=[]{A<A<A<A> >> a;};", getGoogleStyle()));
 
   verifyFormat("A<A>> a;", getChromiumStyle(FormatStyle::LK_Cpp));
 
@@ -5280,36 +5452,44 @@ TEST_F(FormatTest, UnderstandsOverloadedOperators) {
   verifyGoogleFormat("operator ::A();");
 
   verifyFormat("using A::operator+;");
+  verifyFormat("inline A operator^(const A &lhs, const A &rhs) {}\n"
+               "int i;");
 }
 
 TEST_F(FormatTest, UnderstandsFunctionRefQualification) {
-  verifyFormat("Deleted &operator=(const Deleted &)& = default;");
-  verifyFormat("Deleted &operator=(const Deleted &)&& = delete;");
-  verifyFormat("SomeType MemberFunction(const Deleted &)& = delete;");
-  verifyFormat("SomeType MemberFunction(const Deleted &)&& = delete;");
-  verifyFormat("Deleted &operator=(const Deleted &)&;");
-  verifyFormat("Deleted &operator=(const Deleted &)&&;");
-  verifyFormat("SomeType MemberFunction(const Deleted &)&;");
-  verifyFormat("SomeType MemberFunction(const Deleted &)&&;");
+  verifyFormat("Deleted &operator=(const Deleted &) & = default;");
+  verifyFormat("Deleted &operator=(const Deleted &) && = delete;");
+  verifyFormat("SomeType MemberFunction(const Deleted &) & = delete;");
+  verifyFormat("SomeType MemberFunction(const Deleted &) && = delete;");
+  verifyFormat("Deleted &operator=(const Deleted &) &;");
+  verifyFormat("Deleted &operator=(const Deleted &) &&;");
+  verifyFormat("SomeType MemberFunction(const Deleted &) &;");
+  verifyFormat("SomeType MemberFunction(const Deleted &) &&;");
+  verifyFormat("SomeType MemberFunction(const Deleted &) && {}");
+  verifyFormat("SomeType MemberFunction(const Deleted &) && final {}");
+  verifyFormat("SomeType MemberFunction(const Deleted &) && override {}");
 
-  verifyGoogleFormat("Deleted& operator=(const Deleted&)& = default;");
-  verifyGoogleFormat("SomeType MemberFunction(const Deleted&)& = delete;");
-  verifyGoogleFormat("Deleted& operator=(const Deleted&)&;");
-  verifyGoogleFormat("SomeType MemberFunction(const Deleted&)&;");
+  FormatStyle AlignLeft = getLLVMStyle();
+  AlignLeft.PointerAlignment = FormatStyle::PAS_Left;
+  verifyFormat("Deleted& operator=(const Deleted&) & = default;", AlignLeft);
+  verifyFormat("SomeType MemberFunction(const Deleted&) & = delete;",
+               AlignLeft);
+  verifyFormat("Deleted& operator=(const Deleted&) &;", AlignLeft);
+  verifyFormat("SomeType MemberFunction(const Deleted&) &;", AlignLeft);
 
   FormatStyle Spaces = getLLVMStyle();
   Spaces.SpacesInCStyleCastParentheses = true;
-  verifyFormat("Deleted &operator=(const Deleted &)& = default;", Spaces);
-  verifyFormat("SomeType MemberFunction(const Deleted &)& = delete;", Spaces);
-  verifyFormat("Deleted &operator=(const Deleted &)&;", Spaces);
-  verifyFormat("SomeType MemberFunction(const Deleted &)&;", Spaces);
+  verifyFormat("Deleted &operator=(const Deleted &) & = default;", Spaces);
+  verifyFormat("SomeType MemberFunction(const Deleted &) & = delete;", Spaces);
+  verifyFormat("Deleted &operator=(const Deleted &) &;", Spaces);
+  verifyFormat("SomeType MemberFunction(const Deleted &) &;", Spaces);
 
   Spaces.SpacesInCStyleCastParentheses = false;
   Spaces.SpacesInParentheses = true;
-  verifyFormat("Deleted &operator=( const Deleted & )& = default;", Spaces);
-  verifyFormat("SomeType MemberFunction( const Deleted & )& = delete;", Spaces);
-  verifyFormat("Deleted &operator=( const Deleted & )&;", Spaces);
-  verifyFormat("SomeType MemberFunction( const Deleted & )&;", Spaces);
+  verifyFormat("Deleted &operator=( const Deleted & ) & = default;", Spaces);
+  verifyFormat("SomeType MemberFunction( const Deleted & ) & = delete;", Spaces);
+  verifyFormat("Deleted &operator=( const Deleted & ) &;", Spaces);
+  verifyFormat("SomeType MemberFunction( const Deleted & ) &;", Spaces);
 }
 
 TEST_F(FormatTest, UnderstandsNewAndDelete) {
@@ -5370,6 +5550,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
   verifyIndependentOfContext("return (int **&)a;");
   verifyIndependentOfContext("f((*PointerToArray)[10]);");
   verifyFormat("void f(Type (*parameter)[10]) {}");
+  verifyFormat("void f(Type (&parameter)[10]) {}");
   verifyGoogleFormat("return sizeof(int**);");
   verifyIndependentOfContext("Type **A = static_cast<Type **>(P);");
   verifyGoogleFormat("Type** A = static_cast<Type**>(P);");
@@ -5377,6 +5558,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
   verifyFormat("auto PointerBinding = [](const char *S) {};");
   verifyFormat("typedef typeof(int(int, int)) *MyFunc;");
   verifyFormat("[](const decltype(*a) &value) {}");
+  verifyFormat("decltype(a * b) F();");
   verifyFormat("#define MACRO() [](A *a) { return 1; }");
   verifyIndependentOfContext("typedef void (*f)(int *a);");
   verifyIndependentOfContext("int i{a * b};");
@@ -5430,6 +5612,8 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
   FormatStyle Left = getLLVMStyle();
   Left.PointerAlignment = FormatStyle::PAS_Left;
   verifyFormat("x = *a(x) = *a(y);", Left);
+  verifyFormat("for (;; * = b) {\n}", Left);
+  verifyFormat("return *this += 1;", Left);
 
   verifyIndependentOfContext("a = *(x + y);");
   verifyIndependentOfContext("a = &(x + y);");
@@ -5513,6 +5697,7 @@ TEST_F(FormatTest, UnderstandsUsesOfStarAndAmp) {
   // verifyIndependentOfContext("MACRO(A *a);");
 
   verifyFormat("DatumHandle const *operator->() const { return input_; }");
+  verifyFormat("return options != nullptr && operator==(*options);");
 
   EXPECT_EQ("#define OP(x)                                    \\\n"
             "  ostream &operator<<(ostream &s, const A &a) {  \\\n"
@@ -5558,7 +5743,7 @@ TEST_F(FormatTest, UnderstandsAttributes) {
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa __attribute__((unused))\n"
                "aaaaaaaaaaaaaaaaaaaaaaa(int i);");
   FormatStyle AfterType = getLLVMStyle();
-  AfterType.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
+  AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions;
   verifyFormat("__attribute__((nodebug)) void\n"
                "foo() {}\n",
                AfterType);
@@ -5596,6 +5781,15 @@ TEST_F(FormatTest, AdaptivelyFormatsPointersAndReferences) {
                    "int * a;\n"
                    "int *  a;",
                    getGoogleStyle()));
+  EXPECT_EQ("auto x = [] {\n"
+            "  int *a;\n"
+            "  int *a;\n"
+            "  int *a;\n"
+            "};",
+            format("auto x=[]{int *a;\n"
+                   "int * a;\n"
+                   "int *  a;};",
+                   getGoogleStyle()));
 }
 
 TEST_F(FormatTest, UnderstandsRvalueReferences) {
@@ -5700,6 +5894,8 @@ TEST_F(FormatTest, FormatsCasts) {
   verifyFormat("virtual void foo(int *a, char *) const;");
   verifyFormat("int a = sizeof(int *) + b;");
   verifyFormat("int a = alignof(int *) + b;", getGoogleStyle());
+  verifyFormat("bool b = f(g<int>) && c;");
+  verifyFormat("typedef void (*f)(int i) func;");
 
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa *foo = (aaaaaaaaaaaaaaaaa *)\n"
                "    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb;");
@@ -5732,6 +5928,7 @@ TEST_F(FormatTest, FormatsFunctionTypes) {
   verifyFormat("#define DEREF_AND_CALL_F(x) f(*x)");
   verifyFormat("some_var = function(*some_pointer_var)[0];");
   verifyFormat("void f() { function(*some_pointer_var)[0] = 10; }");
+  verifyFormat("int x = f(&h)();");
 }
 
 TEST_F(FormatTest, FormatsPointersToArrayTypes) {
@@ -5848,6 +6045,8 @@ TEST_F(FormatTest, BreaksLongDeclarations) {
 TEST_F(FormatTest, FormatsArrays) {
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaa[aaaaaaaaaaaaaaaaaaaaaaaaa]\n"
                "                         [bbbbbbbbbbbbbbbbbbbbbbbbb] = c;");
+  verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaa[aaaaaaaaaaa(aaaaaaaaaaaa)]\n"
+               "                         [bbbbbbbbbbb(bbbbbbbbbbbb)] = c;");
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
                "    [bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb] = ccccccccccc;");
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n"
@@ -6098,6 +6297,7 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) {
                "    void f() { int i{2}; }\n"
                "  };\n"
                "};");
+  verifyFormat("#define A {a, a},");
 
   // In combination with BinPackArguments = false.
   FormatStyle NoBinPacking = getLLVMStyle();
@@ -6307,6 +6507,11 @@ TEST_F(FormatTest, FormatsBracedListsInColumnLayout) {
                "    1111111111, 2222222222, 33333333333, 4444444444, //\n"
                "    111111111,  222222222,  3333333333,  444444444,  //\n"
                "    11111111,   22222222,   333333333,   44444444};");
+  // Trailing comment in the last line.
+  verifyFormat("int aaaaa[] = {\n"
+               "    1, 2, 3, // comment\n"
+               "    4, 5, 6  // comment\n"
+               "};");
 
   // With nested lists, we should either format one item per line or all nested
   // lists one on line.
@@ -6331,6 +6536,12 @@ TEST_F(FormatTest, FormatsBracedListsInColumnLayout) {
                "                   bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb};");
 
   verifyNoCrash("a<,");
+  
+  // No braced initializer here.
+  verifyFormat("void f() {\n"
+               "  struct Dummy {};\n"
+               "  f(v);\n"
+               "}");
 }
 
 TEST_F(FormatTest, PullTrivialFunctionDefinitionsIntoSingleLine) {
@@ -6509,6 +6720,8 @@ TEST_F(FormatTest, DoNotInterfereWithErrorAndWarning) {
 
 TEST_F(FormatTest, FormatHashIfExpressions) {
   verifyFormat("#if AAAA && BBBB");
+  verifyFormat("#if (AAAA && BBBB)");
+  verifyFormat("#elif (AAAA && BBBB)");
   // FIXME: Come up with a better indentation for #elif.
   verifyFormat(
       "#if !defined(AAAAAAA) && (defined CCCCCC || defined DDDDDD) &&  \\\n"
@@ -7074,6 +7287,22 @@ TEST_F(FormatTest, FormatObjCMethodDeclarations) {
                "                         y:(id<yyyyyyyyyyyyyyyyyyyy>)y\n"
                "    NS_DESIGNATED_INITIALIZER;",
                getLLVMStyleWithColumns(60));
+
+  // Continuation indent width should win over aligning colons if the function
+  // name is long.
+  FormatStyle continuationStyle = getGoogleStyle();
+  continuationStyle.ColumnLimit = 40;
+  continuationStyle.IndentWrappedFunctionNames = true;
+  verifyFormat("- (void)shortf:(GTMFoo *)theFoo\n"
+               "    dontAlignNamef:(NSRect)theRect {\n"
+               "}",
+               continuationStyle);
+
+  // Make sure we don't break aligning for short parameter names.
+  verifyFormat("- (void)shortf:(GTMFoo *)theFoo\n"
+               "       aShortf:(NSRect)theRect {\n"
+               "}",
+               continuationStyle);
 }
 
 TEST_F(FormatTest, FormatObjCMethodExpr) {
@@ -7238,8 +7467,8 @@ TEST_F(FormatTest, FormatObjCMethodExpr) {
                "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa];");
   verifyFormat("[aaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaa[aaaaaaaaaaaaaaaaaaaaa]\n"
                "    aaaaaaaaaaaaaaaaaaaaaa];");
-  verifyFormat("[call aaaaaaaa.aaaaaa.aaaaaaaa.aaaaaaaa.aaaaaaaa.aaaaaaaa\n"
-               "        .aaaaaaaa];", // FIXME: Indentation seems off.
+  verifyFormat("[call aaaaaaaa.aaaaaa.aaaaaaaa.aaaaaaaa.aaaaaaaa\n"
+               "        .aaaaaaaa.aaaaaaaa];", // FIXME: Indentation seems off.
                getLLVMStyleWithColumns(60));
 
   verifyFormat(
@@ -7345,6 +7574,19 @@ TEST_F(FormatTest, ObjCSnippets) {
                "@import baz;");
 }
 
+TEST_F(FormatTest, ObjCForIn) {
+  verifyFormat("- (void)test {\n"
+               "  for (NSString *n in arrayOfStrings) {\n"
+               "    foo(n);\n"
+               "  }\n"
+               "}");
+  verifyFormat("- (void)test {\n"
+               "  for (NSString *n in (__bridge NSArray *)arrayOfStrings) {\n"
+               "    foo(n);\n"
+               "  }\n"
+               "}");
+}
+
 TEST_F(FormatTest, ObjCLiterals) {
   verifyFormat("@\"String\"");
   verifyFormat("@1");
@@ -7417,6 +7659,13 @@ TEST_F(FormatTest, ObjCDictLiterals) {
                "      bbbbbbbbbbbbbbbbbb : bbbbb,\n"
                "      cccccccccccccccc : ccccccccccccccc\n"
                "    }];");
+
+  // Ensure that casts before the key are kept on the same line as the key.
+  verifyFormat(
+      "NSDictionary *d = @{\n"
+      "  (aaaaaaaa id)aaaaaaaaa : (aaaaaaaa id)aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+      "  (aaaaaaaa id)aaaaaaaaaaaaaa : (aaaaaaaa id)aaaaaaaaaaaaaa,\n"
+      "};");
 }
 
 TEST_F(FormatTest, ObjCArrayLiterals) {
@@ -7625,6 +7874,13 @@ TEST_F(FormatTest, BreaksStringLiterals) {
             format("#define A \"some text other\";", AlignLeft));
 }
 
+TEST_F(FormatTest, FullyRemoveEmptyLines) {
+  FormatStyle NoEmptyLines = getLLVMStyleWithColumns(80);
+  NoEmptyLines.MaxEmptyLinesToKeep = 0;
+  EXPECT_EQ("int i = a(b());",
+            format("int i=a(\n\n b(\n\n\n )\n\n);", NoEmptyLines));
+}
+
 TEST_F(FormatTest, BreaksStringLiteralsWithTabs) {
   EXPECT_EQ(
       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
@@ -8028,11 +8284,13 @@ TEST_F(FormatTest, ConfigurableUseOfTab) {
                "};",
                Tab);
   verifyFormat("{\n"
-               "\tQ({\n"
-               "\t\tint a;\n"
-               "\t\tsomeFunction(aaaaaaaa,\n"
-               "\t\t             bbbbbbb);\n"
-               "\t}, p);\n"
+               "\tQ(\n"
+               "\t    {\n"
+               "\t\t    int a;\n"
+               "\t\t    someFunction(aaaaaaaa,\n"
+               "\t\t                 bbbbbbb);\n"
+               "\t    },\n"
+               "\t    p);\n"
                "}",
                Tab);
   EXPECT_EQ("{\n"
@@ -8210,6 +8468,8 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) {
   verifyFormat("static_assert(sizeof(char) == 1, \"Impossible!\");", NoSpace);
   verifyFormat("int f() throw(Deprecated);", NoSpace);
   verifyFormat("typedef void (*cb)(int);", NoSpace);
+  verifyFormat("T A::operator()();", NoSpace);
+  verifyFormat("X A::operator++(T);", NoSpace);
 
   FormatStyle Space = getLLVMStyle();
   Space.SpaceBeforeParens = FormatStyle::SBPO_Always;
@@ -8255,6 +8515,8 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeParens) {
   verifyFormat("static_assert (sizeof (char) == 1, \"Impossible!\");", Space);
   verifyFormat("int f () throw (Deprecated);", Space);
   verifyFormat("typedef void (*cb) (int);", Space);
+  verifyFormat("T A::operator() ();", Space);
+  verifyFormat("X A::operator++ (T);", Space);
 }
 
 TEST_F(FormatTest, ConfigurableSpacesInParentheses) {
@@ -8264,6 +8526,8 @@ TEST_F(FormatTest, ConfigurableSpacesInParentheses) {
   verifyFormat("call( x, y, z );", Spaces);
   verifyFormat("call();", Spaces);
   verifyFormat("std::function<void( int, int )> callback;", Spaces);
+  verifyFormat("void inFunction() { std::function<void( int, int )> fct; }",
+               Spaces);
   verifyFormat("while ( (bool)1 )\n"
                "  continue;",
                Spaces);
@@ -8439,6 +8703,10 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                "int oneTwoThree = 123;\n"
                "int oneTwo      = 12;",
                Alignment);
+  verifyFormat("int oneTwoThree = 123;\n"
+               "int oneTwo      = 12;\n"
+               "method();\n",
+               Alignment);
   verifyFormat("int oneTwoThree = 123; // comment\n"
                "int oneTwo      = 12;  // comment",
                Alignment);
@@ -8502,7 +8770,7 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                Alignment);
   verifyFormat("class C {\n"
                "public:\n"
-               "  int i = 1;\n"
+               "  int i            = 1;\n"
                "  virtual void f() = 0;\n"
                "};",
                Alignment);
@@ -8531,6 +8799,19 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
                "    someLooooooooooooooooongFunction();\n"
                "int j = 2;",
                Alignment);
+
+  verifyFormat("auto lambda = []() {\n"
+               "  auto i = 0;\n"
+               "  return 0;\n"
+               "};\n"
+               "int i  = 0;\n"
+               "auto v = type{\n"
+               "    i = 1,   //\n"
+               "    (i = 2), //\n"
+               "    i = 3    //\n"
+               "};",
+               Alignment);
+
   // FIXME: Should align all three assignments
   verifyFormat(
       "int i      = 1;\n"
@@ -8538,6 +8819,260 @@ TEST_F(FormatTest, AlignConsecutiveAssignments) {
       "                          loooooooooooooooooooooongParameterB);\n"
       "int j = 2;",
       Alignment);
+
+  verifyFormat("template <typename T, typename T_0 = very_long_type_name_0,\n"
+               "          typename B   = very_long_type_name_1,\n"
+               "          typename T_2 = very_long_type_name_2>\n"
+               "auto foo() {}\n",
+               Alignment);
+  verifyFormat("int a, b = 1;\n"
+               "int c  = 2;\n"
+               "int dd = 3;\n",
+               Alignment);
+  verifyFormat("int aa       = ((1 > 2) ? 3 : 4);\n"
+               "float b[1][] = {{3.f}};\n",
+               Alignment);
+}
+
+TEST_F(FormatTest, AlignConsecutiveDeclarations) {
+  FormatStyle Alignment = getLLVMStyle();
+  Alignment.AlignConsecutiveDeclarations = false;
+  verifyFormat("float const a = 5;\n"
+               "int oneTwoThree = 123;",
+               Alignment);
+  verifyFormat("int a = 5;\n"
+               "float const oneTwoThree = 123;",
+               Alignment);
+
+  Alignment.AlignConsecutiveDeclarations = true;
+  verifyFormat("float const a = 5;\n"
+               "int         oneTwoThree = 123;",
+               Alignment);
+  verifyFormat("int         a = method();\n"
+               "float const oneTwoThree = 133;",
+               Alignment);
+  verifyFormat("int i = 1, j = 10;\n"
+               "something = 2000;",
+               Alignment);
+  verifyFormat("something = 2000;\n"
+               "int i = 1, j = 10;\n",
+               Alignment);
+  verifyFormat("float      something = 2000;\n"
+               "double     another = 911;\n"
+               "int        i = 1, j = 10;\n"
+               "const int *oneMore = 1;\n"
+               "unsigned   i = 2;",
+               Alignment);
+  verifyFormat("float a = 5;\n"
+               "int   one = 1;\n"
+               "method();\n"
+               "const double       oneTwoThree = 123;\n"
+               "const unsigned int oneTwo = 12;",
+               Alignment);
+  verifyFormat("int      oneTwoThree{0}; // comment\n"
+               "unsigned oneTwo;         // comment",
+               Alignment);
+  EXPECT_EQ("float const a = 5;\n"
+            "\n"
+            "int oneTwoThree = 123;",
+            format("float const   a = 5;\n"
+                   "\n"
+                   "int           oneTwoThree= 123;",
+                   Alignment));
+  EXPECT_EQ("float a = 5;\n"
+            "int   one = 1;\n"
+            "\n"
+            "unsigned oneTwoThree = 123;",
+            format("float    a = 5;\n"
+                   "int      one = 1;\n"
+                   "\n"
+                   "unsigned oneTwoThree = 123;",
+                   Alignment));
+  EXPECT_EQ("float a = 5;\n"
+            "int   one = 1;\n"
+            "\n"
+            "unsigned oneTwoThree = 123;\n"
+            "int      oneTwo = 12;",
+            format("float    a = 5;\n"
+                   "int one = 1;\n"
+                   "\n"
+                   "unsigned oneTwoThree = 123;\n"
+                   "int oneTwo = 12;",
+                   Alignment));
+  Alignment.AlignConsecutiveAssignments = true;
+  verifyFormat("float      something = 2000;\n"
+               "double     another   = 911;\n"
+               "int        i = 1, j = 10;\n"
+               "const int *oneMore = 1;\n"
+               "unsigned   i       = 2;",
+               Alignment);
+  verifyFormat("int      oneTwoThree = {0}; // comment\n"
+               "unsigned oneTwo      = 0;   // comment",
+               Alignment);
+  EXPECT_EQ("void SomeFunction(int parameter = 0) {\n"
+            "  int const i   = 1;\n"
+            "  int *     j   = 2;\n"
+            "  int       big = 10000;\n"
+            "\n"
+            "  unsigned oneTwoThree = 123;\n"
+            "  int      oneTwo      = 12;\n"
+            "  method();\n"
+            "  float k  = 2;\n"
+            "  int   ll = 10000;\n"
+            "}",
+            format("void SomeFunction(int parameter= 0) {\n"
+                   " int const  i= 1;\n"
+                   "  int *j=2;\n"
+                   " int big  =  10000;\n"
+                   "\n"
+                   "unsigned oneTwoThree  =123;\n"
+                   "int oneTwo = 12;\n"
+                   "  method();\n"
+                   "float k= 2;\n"
+                   "int ll=10000;\n"
+                   "}",
+                   Alignment));
+  Alignment.AlignConsecutiveAssignments = false;
+  Alignment.AlignEscapedNewlinesLeft = true;
+  verifyFormat("#define A              \\\n"
+               "  int       aaaa = 12; \\\n"
+               "  float     b = 23;    \\\n"
+               "  const int ccc = 234; \\\n"
+               "  unsigned  dddddddddd = 2345;",
+               Alignment);
+  Alignment.AlignEscapedNewlinesLeft = false;
+  Alignment.ColumnLimit = 30;
+  verifyFormat("#define A                    \\\n"
+               "  int       aaaa = 12;       \\\n"
+               "  float     b = 23;          \\\n"
+               "  const int ccc = 234;       \\\n"
+               "  int       dddddddddd = 2345;",
+               Alignment);
+  Alignment.ColumnLimit = 80;
+  verifyFormat("void SomeFunction(int parameter = 1, int i = 2, int j = 3, int "
+               "k = 4, int l = 5,\n"
+               "                  int m = 6) {\n"
+               "  const int j = 10;\n"
+               "  otherThing = 1;\n"
+               "}",
+               Alignment);
+  verifyFormat("void SomeFunction(int parameter = 0) {\n"
+               "  int const i = 1;\n"
+               "  int *     j = 2;\n"
+               "  int       big = 10000;\n"
+               "}",
+               Alignment);
+  verifyFormat("class C {\n"
+               "public:\n"
+               "  int          i = 1;\n"
+               "  virtual void f() = 0;\n"
+               "};",
+               Alignment);
+  verifyFormat("float i = 1;\n"
+               "if (SomeType t = getSomething()) {\n"
+               "}\n"
+               "const unsigned j = 2;\n"
+               "int            big = 10000;",
+               Alignment);
+  verifyFormat("float j = 7;\n"
+               "for (int k = 0; k < N; ++k) {\n"
+               "}\n"
+               "unsigned j = 2;\n"
+               "int      big = 10000;\n"
+               "}",
+               Alignment);
+  Alignment.BreakBeforeBinaryOperators = FormatStyle::BOS_All;
+  verifyFormat("float              i = 1;\n"
+               "LooooooooooongType loooooooooooooooooooooongVariable\n"
+               "    = someLooooooooooooooooongFunction();\n"
+               "int j = 2;",
+               Alignment);
+  Alignment.BreakBeforeBinaryOperators = FormatStyle::BOS_None;
+  verifyFormat("int                i = 1;\n"
+               "LooooooooooongType loooooooooooooooooooooongVariable =\n"
+               "    someLooooooooooooooooongFunction();\n"
+               "int j = 2;",
+               Alignment);
+
+  Alignment.AlignConsecutiveAssignments = true;
+  verifyFormat("auto lambda = []() {\n"
+               "  auto  ii = 0;\n"
+               "  float j  = 0;\n"
+               "  return 0;\n"
+               "};\n"
+               "int   i  = 0;\n"
+               "float i2 = 0;\n"
+               "auto  v  = type{\n"
+               "    i = 1,   //\n"
+               "    (i = 2), //\n"
+               "    i = 3    //\n"
+               "};",
+               Alignment);
+  Alignment.AlignConsecutiveAssignments = false;
+
+  // FIXME: Should align all three declarations
+  verifyFormat(
+      "int      i = 1;\n"
+      "SomeType a = SomeFunction(looooooooooooooooooooooongParameterA,\n"
+      "                          loooooooooooooooooooooongParameterB);\n"
+      "int j = 2;",
+      Alignment);
+
+  // Test interactions with ColumnLimit and AlignConsecutiveAssignments:
+  // We expect declarations and assignments to align, as long as it doesn't
+  // exceed the column limit, starting a new alignemnt sequence whenever it
+  // happens.
+  Alignment.AlignConsecutiveAssignments = true;
+  Alignment.ColumnLimit = 30;
+  verifyFormat("float    ii              = 1;\n"
+               "unsigned j               = 2;\n"
+               "int someVerylongVariable = 1;\n"
+               "AnotherLongType  ll = 123456;\n"
+               "VeryVeryLongType k  = 2;\n"
+               "int              myvar = 1;",
+               Alignment);
+  Alignment.ColumnLimit = 80;
+  Alignment.AlignConsecutiveAssignments = false;
+
+  verifyFormat(
+      "template <typename LongTemplate, typename VeryLongTemplateTypeName,\n"
+      "          typename LongType, typename B>\n"
+      "auto foo() {}\n",
+      Alignment);
+  verifyFormat("float a, b = 1;\n"
+               "int   c = 2;\n"
+               "int   dd = 3;\n",
+               Alignment);
+  verifyFormat("int   aa = ((1 > 2) ? 3 : 4);\n"
+               "float b[1][] = {{3.f}};\n",
+               Alignment);
+  Alignment.AlignConsecutiveAssignments = true;
+  verifyFormat("float a, b = 1;\n"
+               "int   c  = 2;\n"
+               "int   dd = 3;\n",
+               Alignment);
+  verifyFormat("int   aa     = ((1 > 2) ? 3 : 4);\n"
+               "float b[1][] = {{3.f}};\n",
+               Alignment);
+  Alignment.AlignConsecutiveAssignments = false;
+
+  Alignment.ColumnLimit = 30;
+  Alignment.BinPackParameters = false;
+  verifyFormat("void foo(float     a,\n"
+               "         float     b,\n"
+               "         int       c,\n"
+               "         uint32_t *d) {\n"
+               "  int *  e = 0;\n"
+               "  float  f = 0;\n"
+               "  double g = 0;\n"
+               "}\n"
+               "void bar(ino_t     a,\n"
+               "         int       b,\n"
+               "         uint32_t *c,\n"
+               "         bool      d) {}\n",
+               Alignment);
+  Alignment.BinPackParameters = true;
+  Alignment.ColumnLimit = 80;
 }
 
 TEST_F(FormatTest, LinuxBraceBreaking) {
@@ -8552,6 +9087,8 @@ TEST_F(FormatTest, LinuxBraceBreaking) {
                "    if (true) {\n"
                "      a();\n"
                "      b();\n"
+               "    } else {\n"
+               "      a();\n"
                "    }\n"
                "  }\n"
                "  void g() { return; }\n"
@@ -8994,6 +9531,45 @@ TEST_F(FormatTest, GNUBraceBreaking) {
                "#endif",
                GNUBraceStyle);
 }
+
+TEST_F(FormatTest, WebKitBraceBreaking) {
+  FormatStyle WebKitBraceStyle = getLLVMStyle();
+  WebKitBraceStyle.BreakBeforeBraces = FormatStyle::BS_WebKit;
+  verifyFormat("namespace a {\n"
+               "class A {\n"
+               "  void f()\n"
+               "  {\n"
+               "    if (true) {\n"
+               "      a();\n"
+               "      b();\n"
+               "    }\n"
+               "  }\n"
+               "  void g() { return; }\n"
+               "};\n"
+               "enum E {\n"
+               "  A,\n"
+               "  // foo\n"
+               "  B,\n"
+               "  C\n"
+               "};\n"
+               "struct B {\n"
+               "  int x;\n"
+               "};\n"
+               "}\n",
+               WebKitBraceStyle);
+  verifyFormat("struct S {\n"
+               "  int Type;\n"
+               "  union {\n"
+               "    int x;\n"
+               "    double y;\n"
+               "  } Value;\n"
+               "  class C {\n"
+               "    MyFavoriteType Value;\n"
+               "  } Class;\n"
+               "};\n",
+               WebKitBraceStyle);
+}
+
 TEST_F(FormatTest, CatchExceptionReferenceBinding) {
   verifyFormat("void f() {\n"
                "  try {\n"
@@ -9119,6 +9695,20 @@ TEST_F(FormatTest, GetsCorrectBasedOnStyle) {
 
 #define CHECK_PARSE_BOOL(FIELD) CHECK_PARSE_BOOL_FIELD(FIELD, #FIELD)
 
+#define CHECK_PARSE_NESTED_BOOL_FIELD(STRUCT, FIELD, CONFIG_NAME)              \
+  Style.STRUCT.FIELD = false;                                                  \
+  EXPECT_EQ(0,                                                                 \
+            parseConfiguration(#STRUCT ":\n  " CONFIG_NAME ": true", &Style)   \
+                .value());                                                     \
+  EXPECT_TRUE(Style.STRUCT.FIELD);                                             \
+  EXPECT_EQ(0,                                                                 \
+            parseConfiguration(#STRUCT ":\n  " CONFIG_NAME ": false", &Style)  \
+                .value());                                                     \
+  EXPECT_FALSE(Style.STRUCT.FIELD);
+
+#define CHECK_PARSE_NESTED_BOOL(STRUCT, FIELD)                                 \
+  CHECK_PARSE_NESTED_BOOL_FIELD(STRUCT, FIELD, #FIELD)
+
 #define CHECK_PARSE(TEXT, FIELD, VALUE)                                        \
   EXPECT_NE(VALUE, Style.FIELD);                                               \
   EXPECT_EQ(0, parseConfiguration(TEXT, &Style).value());                      \
@@ -9127,30 +9717,33 @@ TEST_F(FormatTest, GetsCorrectBasedOnStyle) {
 TEST_F(FormatTest, ParsesConfigurationBools) {
   FormatStyle Style = {};
   Style.Language = FormatStyle::LK_Cpp;
-  CHECK_PARSE_BOOL(AlignAfterOpenBracket);
   CHECK_PARSE_BOOL(AlignEscapedNewlinesLeft);
   CHECK_PARSE_BOOL(AlignOperands);
   CHECK_PARSE_BOOL(AlignTrailingComments);
   CHECK_PARSE_BOOL(AlignConsecutiveAssignments);
+  CHECK_PARSE_BOOL(AlignConsecutiveDeclarations);
   CHECK_PARSE_BOOL(AllowAllParametersOfDeclarationOnNextLine);
   CHECK_PARSE_BOOL(AllowShortBlocksOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortCaseLabelsOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortIfStatementsOnASingleLine);
   CHECK_PARSE_BOOL(AllowShortLoopsOnASingleLine);
   CHECK_PARSE_BOOL(AlwaysBreakTemplateDeclarations);
-  CHECK_PARSE_BOOL(BinPackParameters);
   CHECK_PARSE_BOOL(BinPackArguments);
+  CHECK_PARSE_BOOL(BinPackParameters);
   CHECK_PARSE_BOOL(BreakBeforeTernaryOperators);
   CHECK_PARSE_BOOL(BreakConstructorInitializersBeforeComma);
   CHECK_PARSE_BOOL(ConstructorInitializerAllOnOneLineOrOnePerLine);
   CHECK_PARSE_BOOL(DerivePointerAlignment);
   CHECK_PARSE_BOOL_FIELD(DerivePointerAlignment, "DerivePointerBinding");
+  CHECK_PARSE_BOOL(DisableFormat);
   CHECK_PARSE_BOOL(IndentCaseLabels);
   CHECK_PARSE_BOOL(IndentWrappedFunctionNames);
   CHECK_PARSE_BOOL(KeepEmptyLinesAtTheStartOfBlocks);
   CHECK_PARSE_BOOL(ObjCSpaceAfterProperty);
   CHECK_PARSE_BOOL(ObjCSpaceBeforeProtocolList);
   CHECK_PARSE_BOOL(Cpp11BracedListStyle);
+  CHECK_PARSE_BOOL(ReflowComments);
+  CHECK_PARSE_BOOL(SortIncludes);
   CHECK_PARSE_BOOL(SpacesInParentheses);
   CHECK_PARSE_BOOL(SpacesInSquareBrackets);
   CHECK_PARSE_BOOL(SpacesInAngles);
@@ -9159,6 +9752,18 @@ TEST_F(FormatTest, ParsesConfigurationBools) {
   CHECK_PARSE_BOOL(SpacesInCStyleCastParentheses);
   CHECK_PARSE_BOOL(SpaceAfterCStyleCast);
   CHECK_PARSE_BOOL(SpaceBeforeAssignmentOperators);
+
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterClass);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterControlStatement);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterEnum);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterFunction);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterNamespace);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterObjCDeclaration);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterStruct);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, AfterUnion);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, BeforeCatch);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, BeforeElse);
+  CHECK_PARSE_NESTED_BOOL(BraceWrapping, IndentBraces);
 }
 
 #undef CHECK_PARSE_BOOL
@@ -9217,6 +9822,19 @@ TEST_F(FormatTest, ParsesConfiguration) {
   CHECK_PARSE("BreakBeforeBinaryOperators: true", BreakBeforeBinaryOperators,
               FormatStyle::BOS_All);
 
+  Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak;
+  CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket,
+              FormatStyle::BAS_Align);
+  CHECK_PARSE("AlignAfterOpenBracket: DontAlign", AlignAfterOpenBracket,
+              FormatStyle::BAS_DontAlign);
+  CHECK_PARSE("AlignAfterOpenBracket: AlwaysBreak", AlignAfterOpenBracket,
+              FormatStyle::BAS_AlwaysBreak);
+  // For backward compatibility:
+  CHECK_PARSE("AlignAfterOpenBracket: false", AlignAfterOpenBracket,
+              FormatStyle::BAS_DontAlign);
+  CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket,
+              FormatStyle::BAS_Align);
+
   Style.UseTab = FormatStyle::UT_ForIndentation;
   CHECK_PARSE("UseTab: Never", UseTab, FormatStyle::UT_Never);
   CHECK_PARSE("UseTab: ForIndentation", UseTab, FormatStyle::UT_ForIndentation);
@@ -9270,6 +9888,23 @@ TEST_F(FormatTest, ParsesConfiguration) {
   CHECK_PARSE("BreakBeforeBraces: Allman", BreakBeforeBraces,
               FormatStyle::BS_Allman);
   CHECK_PARSE("BreakBeforeBraces: GNU", BreakBeforeBraces, FormatStyle::BS_GNU);
+  CHECK_PARSE("BreakBeforeBraces: WebKit", BreakBeforeBraces,
+              FormatStyle::BS_WebKit);
+  CHECK_PARSE("BreakBeforeBraces: Custom", BreakBeforeBraces,
+              FormatStyle::BS_Custom);
+
+  Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
+  CHECK_PARSE("AlwaysBreakAfterReturnType: None", AlwaysBreakAfterReturnType,
+              FormatStyle::RTBS_None);
+  CHECK_PARSE("AlwaysBreakAfterReturnType: All", AlwaysBreakAfterReturnType,
+              FormatStyle::RTBS_All);
+  CHECK_PARSE("AlwaysBreakAfterReturnType: TopLevel",
+              AlwaysBreakAfterReturnType, FormatStyle::RTBS_TopLevel);
+  CHECK_PARSE("AlwaysBreakAfterReturnType: AllDefinitions",
+              AlwaysBreakAfterReturnType, FormatStyle::RTBS_AllDefinitions);
+  CHECK_PARSE("AlwaysBreakAfterReturnType: TopLevelDefinitions",
+              AlwaysBreakAfterReturnType,
+              FormatStyle::RTBS_TopLevelDefinitions);
 
   Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All;
   CHECK_PARSE("AlwaysBreakAfterDefinitionReturnType: None",
@@ -9288,6 +9923,8 @@ TEST_F(FormatTest, ParsesConfiguration) {
   CHECK_PARSE("NamespaceIndentation: All", NamespaceIndentation,
               FormatStyle::NI_All);
 
+  // FIXME: This is required because parsing a configuration simply overwrites
+  // the first N elements of the list instead of resetting it.
   Style.ForEachMacros.clear();
   std::vector<std::string> BoostForeach;
   BoostForeach.push_back("BOOST_FOREACH");
@@ -9297,6 +9934,16 @@ TEST_F(FormatTest, ParsesConfiguration) {
   BoostAndQForeach.push_back("Q_FOREACH");
   CHECK_PARSE("ForEachMacros: [BOOST_FOREACH, Q_FOREACH]", ForEachMacros,
               BoostAndQForeach);
+
+  Style.IncludeCategories.clear();
+  std::vector<FormatStyle::IncludeCategory> ExpectedCategories = {{"abc/.*", 2},
+                                                                  {".*", 1}};
+  CHECK_PARSE("IncludeCategories:\n"
+              "  - Regex: abc/.*\n"
+              "    Priority: 2\n"
+              "  - Regex: .*\n"
+              "    Priority: 1",
+              IncludeCategories, ExpectedCategories);
 }
 
 TEST_F(FormatTest, ParsesConfigurationWithLanguages) {
@@ -9497,6 +10144,11 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
             "\"八九十\tqq\"",
             format("\"一\t二 \t三 四 五\t六 \t七 八九十\tqq\"",
                    getLLVMStyleWithColumns(11)));
+
+  // UTF8 character in an escape sequence.
+  EXPECT_EQ("\"aaaaaa\"\n"
+            "\"\\\xC2\x8D\"",
+            format("\"aaaaaa\\\xC2\x8D\"", getLLVMStyleWithColumns(10)));
 }
 
 TEST_F(FormatTest, HandlesDoubleWidthCharsInMultiLineStrings) {
@@ -9875,6 +10527,9 @@ TEST_F(FormatTest, FormatsLambdas) {
   verifyFormat("SomeFunction({[&] {\n"
                "  // comment\n"
                "}});");
+  verifyFormat("virtual aaaaaaaaaaaaaaaa(std::function<bool()> bbbbbbbbbbbb =\n"
+               "                             [&]() { return true; },\n"
+               "                         aaaaa aaaaaaaaa);");
 
   // Lambdas with return types.
   verifyFormat("int c = []() -> int { return 2; }();\n");
@@ -9885,6 +10540,7 @@ TEST_F(FormatTest, FormatsLambdas) {
   verifyGoogleFormat("auto a = [&b, c](D* d) -> pair<D*, D*> {};");
   verifyGoogleFormat("auto a = [&b, c](D* d) -> D& {};");
   verifyGoogleFormat("auto a = [&b, c](D* d) -> const D* {};");
+  verifyFormat("[a, a]() -> a<1> {};");
   verifyFormat("auto aaaaaaaa = [](int i, // break for some reason\n"
                "                   int j) -> int {\n"
                "  return ffffffffffffffffffffffffffffffffffffffffffff(i * j);\n"
@@ -10191,6 +10847,9 @@ TEST_F(FormatTest, SpacesInAngles) {
   verifyFormat("f< int, float >();", Spaces);
   verifyFormat("template <> g() {}", Spaces);
   verifyFormat("template < std::vector< int > > f() {}", Spaces);
+  verifyFormat("std::function< void(int, int) > fct;", Spaces);
+  verifyFormat("void inFunction() { std::function< void(int, int) > fct; }",
+               Spaces);
 
   Spaces.Standard = FormatStyle::LS_Cpp03;
   Spaces.SpacesInAngles = true;
@@ -10362,6 +11021,12 @@ TEST_F(FormatTest, DoNotCrashOnInvalidInput) {
   verifyNoCrash("#define a\\\n /**/}");
 }
 
+TEST_F(FormatTest, FormatsTableGenCode) {
+  FormatStyle Style = getLLVMStyle();
+  Style.Language = FormatStyle::LK_TableGen;
+  verifyFormat("include \"a.td\"\ninclude \"b.td\"", Style);
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
diff --git a/unittests/Format/FormatTestJS.cpp b/unittests/Format/FormatTestJS.cpp
index d068d35d7d39..cba2ce3370c9 100644
--- a/unittests/Format/FormatTestJS.cpp
+++ b/unittests/Format/FormatTestJS.cpp
@@ -49,7 +49,8 @@ protected:
   static void verifyFormat(
       llvm::StringRef Code,
       const FormatStyle &Style = getGoogleStyle(FormatStyle::LK_JavaScript)) {
-    EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
+    std::string result = format(test::messUp(Code), Style);
+    EXPECT_EQ(Code.str(), result) << "Formatted:\n" << result;
   }
 };
 
@@ -79,8 +80,8 @@ TEST_F(FormatTestJS, UnderstandsJavaScriptOperators) {
                "  q();",
                getGoogleJSStyleWithColumns(20));
   verifyFormat("var x = aaaaaaaaaa ?\n"
-               "            bbbbbb :\n"
-               "            ccc;",
+               "    bbbbbb :\n"
+               "    ccc;",
                getGoogleJSStyleWithColumns(20));
 
   verifyFormat("var b = a.map((x) => x + 1);");
@@ -99,9 +100,31 @@ TEST_F(FormatTestJS, LiteralOperatorsCanBeKeywords) {
   verifyFormat("not.and.or.not_eq = 1;");
 }
 
+TEST_F(FormatTestJS, ReservedWords) {
+  // JavaScript reserved words (aka keywords) are only illegal when used as
+  // Identifiers, but are legal as IdentifierNames.
+  verifyFormat("x.class.struct = 1;");
+  verifyFormat("x.case = 1;");
+  verifyFormat("x.interface = 1;");
+  verifyFormat("x = {\n"
+               "  a: 12,\n"
+               "  interface: 1,\n"
+               "  switch: 1,\n"
+               "};");
+  verifyFormat("var struct = 2;");
+  verifyFormat("var union = 2;");
+}
+
+TEST_F(FormatTestJS, CppKeywords) {
+  // Make sure we don't mess stuff up because of C++ keywords.
+  verifyFormat("return operator && (aa);");
+}
+
 TEST_F(FormatTestJS, ES6DestructuringAssignment) {
   verifyFormat("var [a, b, c] = [1, 2, 3];");
+  verifyFormat("let [a, b, c] = [1, 2, 3];");
   verifyFormat("var {a, b} = {a: 1, b: 2};");
+  verifyFormat("let {a, b} = {a: 1, b: 2};");
 }
 
 TEST_F(FormatTestJS, ContainerLiterals) {
@@ -236,6 +259,8 @@ TEST_F(FormatTestJS, GoogModules) {
                getGoogleJSStyleWithColumns(40));
   verifyFormat("var long = goog.require('this.is.really.absurdly.long');",
                getGoogleJSStyleWithColumns(40));
+  verifyFormat("goog.setTestOnly('this.is.really.absurdly.long');",
+               getGoogleJSStyleWithColumns(40));
 
   // These should be wrapped normally.
   verifyFormat(
@@ -263,31 +288,43 @@ TEST_F(FormatTestJS, ArrayLiterals) {
                "  bbbbbbbbbbbbbbbbbbbbbbbbbbb,\n"
                "  ccccccccccccccccccccccccccc\n"
                "];");
-  verifyFormat("var someVariable = SomeFuntion([\n"
+  verifyFormat("var someVariable = SomeFunction([\n"
                "  aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
                "  bbbbbbbbbbbbbbbbbbbbbbbbbbb,\n"
                "  ccccccccccccccccccccccccccc\n"
                "]);");
-  verifyFormat("var someVariable = SomeFuntion([\n"
+  verifyFormat("var someVariable = SomeFunction([\n"
                "  [aaaaaaaaaaaaaaaaaaaaaa, bbbbbbbbbbbbbbbbbbbbbb],\n"
                "]);",
                getGoogleJSStyleWithColumns(51));
-  verifyFormat("var someVariable = SomeFuntion(aaaa, [\n"
+  verifyFormat("var someVariable = SomeFunction(aaaa, [\n"
                "  aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
                "  bbbbbbbbbbbbbbbbbbbbbbbbbbb,\n"
                "  ccccccccccccccccccccccccccc\n"
                "]);");
-  verifyFormat("var someVariable = SomeFuntion(aaaa,\n"
-               "                               [\n"
-               "                                 aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
-               "                                 bbbbbbbbbbbbbbbbbbbbbbbbbbb,\n"
-               "                                 ccccccccccccccccccccccccccc\n"
-               "                               ],\n"
-               "                               aaaa);");
+  verifyFormat("var someVariable = SomeFunction(\n"
+               "    aaaa,\n"
+               "    [\n"
+               "      aaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "      bbbbbbbbbbbbbbbbbbbbbbbbbbb,\n"
+               "      ccccccccccccccccccccccccccc\n"
+               "    ],\n"
+               "    aaaa);");
 
   verifyFormat("someFunction([], {a: a});");
 }
 
+TEST_F(FormatTestJS, ColumnLayoutForArrayLiterals) {
+  verifyFormat("var array = [\n"
+               "  a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n"
+               "  a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n"
+               "];");
+  verifyFormat("var array = someFunction([\n"
+               "  a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n"
+               "  a, a, a, a, a, a, a, a, a, a, a, a, a, a, a,\n"
+               "]);");
+}
+
 TEST_F(FormatTestJS, FunctionLiterals) {
   verifyFormat("doFoo(function() {});");
   verifyFormat("doFoo(function() { return 1; });");
@@ -305,14 +342,11 @@ TEST_F(FormatTestJS, FunctionLiterals) {
                "    style: {direction: ''}\n"
                "  }\n"
                "};");
-  EXPECT_EQ("abc = xyz ?\n"
-            "          function() {\n"
-            "            return 1;\n"
-            "          } :\n"
-            "          function() {\n"
-            "            return -1;\n"
-            "          };",
-            format("abc=xyz?function(){return 1;}:function(){return -1;};"));
+  verifyFormat("abc = xyz ? function() {\n"
+               "  return 1;\n"
+               "} : function() {\n"
+               "  return -1;\n"
+               "};");
 
   verifyFormat("var closure = goog.bind(\n"
                "    function() {  // comment\n"
@@ -364,17 +398,13 @@ TEST_F(FormatTestJS, FunctionLiterals) {
                "      someFunction();\n"
                "    }, this), aaaaaaaaaaaaaaaaa);");
 
-  // FIXME: This is not ideal yet.
-  verifyFormat("someFunction(goog.bind(\n"
-               "                 function() {\n"
-               "                   doSomething();\n"
-               "                   doSomething();\n"
-               "                 },\n"
-               "                 this),\n"
-               "             goog.bind(function() {\n"
-               "               doSomething();\n"
-               "               doSomething();\n"
-               "             }, this));");
+  verifyFormat("someFunction(goog.bind(function() {\n"
+               "  doSomething();\n"
+               "  doSomething();\n"
+               "}, this), goog.bind(function() {\n"
+               "  doSomething();\n"
+               "  doSomething();\n"
+               "}, this));");
 
   // FIXME: This is bad, we should be wrapping before "function() {".
   verifyFormat("someFunction(function() {\n"
@@ -434,6 +464,12 @@ TEST_F(FormatTestJS, InliningFunctionLiterals) {
                "  }\n"
                "}",
                Style);
+
+  Style.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty;
+  verifyFormat("var func = function() {\n"
+               "  return 1;\n"
+               "};",
+               Style);
 }
 
 TEST_F(FormatTestJS, MultipleFunctionLiterals) {
@@ -457,16 +493,16 @@ TEST_F(FormatTestJS, MultipleFunctionLiterals) {
                "      doFoo();\n"
                "      doBaz();\n"
                "    });\n");
-  // FIXME: Here, we should probably break right after the "(" for consistency.
-  verifyFormat("promise.then([],\n"
-               "             function success() {\n"
-               "               doFoo();\n"
-               "               doBar();\n"
-               "             },\n"
-               "             function error() {\n"
-               "               doFoo();\n"
-               "               doBaz();\n"
-               "             });\n");
+  verifyFormat("promise.then(\n"
+               "    [],\n"
+               "    function success() {\n"
+               "      doFoo();\n"
+               "      doBar();\n"
+               "    },\n"
+               "    function error() {\n"
+               "      doFoo();\n"
+               "      doBaz();\n"
+               "    });\n");
 
   verifyFormat("getSomeLongPromise()\n"
                "    .then(function(value) { body(); })\n"
@@ -508,13 +544,13 @@ TEST_F(FormatTestJS, ArrowFunctions) {
                "       aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa) =>\n"
                "          aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n"
                "};");
-  verifyFormat(
-      "var a = a.aaaaaaa((a: a) => aaaaaaaaaaaaaaaaaaaaa(bbbbbbbbb) &&\n"
-      "                            aaaaaaaaaaaaaaaaaaaaa(bbbbbbb));");
-  verifyFormat(
-      "var a = a.aaaaaaa((a: a) => aaaaaaaaaaaaaaaaaaaaa(bbbbbbbbb) ?\n"
-      "                                aaaaaaaaaaaaaaaaaaaaa(bbbbbbb) :\n"
-      "                                aaaaaaaaaaaaaaaaaaaaa(bbbbbbb));");
+  verifyFormat("var a = a.aaaaaaa(\n"
+               "    (a: a) => aaaaaaaaaaaaaaaaaaaaaaaaa(bbbbbbbbb) &&\n"
+               "        aaaaaaaaaaaaaaaaaaaaaaaaa(bbbbbbb));");
+  verifyFormat("var a = a.aaaaaaa(\n"
+               "    (a: a) => aaaaaaaaaaaaaaaaaaaaa(bbbbbbbbb) ?\n"
+               "        aaaaaaaaaaaaaaaaaaaaa(bbbbbbb) :\n"
+               "        aaaaaaaaaaaaaaaaaaaaa(bbbbbbb));");
 
   // FIXME: This is bad, we should be wrapping before "() => {".
   verifyFormat("someFunction(() => {\n"
@@ -531,6 +567,11 @@ TEST_F(FormatTestJS, ReturnStatements) {
                "}");
 }
 
+TEST_F(FormatTestJS, ForLoops) {
+  verifyFormat("for (var i in [2, 3]) {\n"
+               "}");
+}
+
 TEST_F(FormatTestJS, AutomaticSemicolonInsertion) {
   // The following statements must not wrap, as otherwise the program meaning
   // would change due to automatic semicolon insertion.
@@ -564,7 +605,7 @@ TEST_F(FormatTestJS, TryCatch) {
 
 TEST_F(FormatTestJS, StringLiteralConcatenation) {
   verifyFormat("var literal = 'hello ' +\n"
-               "              'world';");
+               "    'world';");
 }
 
 TEST_F(FormatTestJS, RegexLiteralClassification) {
@@ -585,6 +626,13 @@ TEST_F(FormatTestJS, RegexLiteralClassification) {
 
   // Not regex literals.
   verifyFormat("var a = a / 2 + b / 3;");
+  verifyFormat("var a = a++ / 2;");
+  // Prefix unary can operate on regex literals, not that it makes sense.
+  verifyFormat("var a = ++/a/;");
+
+  // This is a known issue, regular expressions are incorrectly detected if
+  // directly following a closing parenthesis.
+  verifyFormat("if (foo) / bar /.exec(baz);");
 }
 
 TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
@@ -602,9 +650,18 @@ TEST_F(FormatTestJS, RegexLiteralSpecialCharacters) {
   verifyFormat("var regex = /x|y/;");
   verifyFormat("var regex = /a{2}/;");
   verifyFormat("var regex = /a{1,3}/;");
+
   verifyFormat("var regex = /[abc]/;");
   verifyFormat("var regex = /[^abc]/;");
   verifyFormat("var regex = /[\\b]/;");
+  verifyFormat("var regex = /[/]/;");
+  verifyFormat("var regex = /[\\/]/;");
+  verifyFormat("var regex = /\\[/;");
+  verifyFormat("var regex = /\\\\[/]/;");
+  verifyFormat("var regex = /}[\"]/;");
+  verifyFormat("var regex = /}[/\"]/;");
+  verifyFormat("var regex = /}[\"/]/;");
+
   verifyFormat("var regex = /\\b/;");
   verifyFormat("var regex = /\\B/;");
   verifyFormat("var regex = /\\d/;");
@@ -705,12 +762,18 @@ TEST_F(FormatTestJS, ClassDeclarations) {
 TEST_F(FormatTestJS, InterfaceDeclarations) {
   verifyFormat("interface I {\n"
                "  x: string;\n"
+               "  enum: string[];\n"
                "}\n"
                "var y;");
   // Ensure that state is reset after parsing the interface.
   verifyFormat("interface a {}\n"
                "export function b() {}\n"
                "var x;");
+
+  // Arrays of object type literals.
+  verifyFormat("interface I {\n"
+               "  o: {}[];\n"
+               "}");
 }
 
 TEST_F(FormatTestJS, EnumDeclarations) {
@@ -818,7 +881,7 @@ TEST_F(FormatTestJS, TemplateStrings) {
                getGoogleJSStyleWithColumns(35)); // Barely fits.
   EXPECT_EQ("var x = `hello\n"
             "  ${world}` >=\n"
-            "        some();",
+            "    some();",
             format("var x =\n"
                    "    `hello\n"
                    "  ${world}` >= some();",
@@ -843,7 +906,7 @@ TEST_F(FormatTestJS, TemplateStrings) {
   // are first token in line.
   verifyFormat(
       "var a = aaaaaaaaaaaaaaaaaaaaaaaaaaaa ||\n"
-      "        `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`;");
+      "    `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`;");
 
   // Two template strings.
   verifyFormat("var x = `hello` == `hello`;");
@@ -893,11 +956,20 @@ TEST_F(FormatTestJS, TypeArguments) {
   verifyFormat("function f(): List<any> {}");
   verifyFormat("function aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa():\n"
                "    bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb {}");
-  verifyFormat("function aaaaaaaaaa(aaaaaaaaaaaaaaaa: aaaaaaaaaaaaaaaaaa,\n"
-               "                    aaaaaaaaaaaaaaaa: aaaaaaaaaaaaaaaaaa):\n"
+  verifyFormat("function aaaaaaaaaa(\n"
+               "    aaaaaaaaaaaaaaaa: aaaaaaaaaaaaaaaaaaa,\n"
+               "    aaaaaaaaaaaaaaaa: aaaaaaaaaaaaaaaaaaa):\n"
                "    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa {}");
 }
 
+TEST_F(FormatTestJS, UserDefinedTypeGuards) {
+  verifyFormat(
+      "function foo(check: Object):\n"
+      "    check is {foo: string, bar: string, baz: string, foobar: string} {\n"
+      "  return 'bar' in check;\n"
+      "}\n");
+}
+
 TEST_F(FormatTestJS, OptionalTypes) {
   verifyFormat("function x(a?: b, c?, d?) {}");
   verifyFormat("class X {\n"
@@ -920,5 +992,23 @@ TEST_F(FormatTestJS, IndexSignature) {
   verifyFormat("var x: {[k: string]: v};");
 }
 
+TEST_F(FormatTestJS, WrapAfterParen) {
+  verifyFormat("xxxxxxxxxxx(\n"
+               "    aaa, aaa);",
+               getGoogleJSStyleWithColumns(20));
+  verifyFormat("xxxxxxxxxxx(\n"
+               "    aaa, aaa, aaa,\n"
+               "    aaa, aaa, aaa);",
+               getGoogleJSStyleWithColumns(20));
+  verifyFormat("xxxxxxxxxxx(\n"
+               "    aaaaaaaaaaaaaaaaaaaaaaaa,\n"
+               "    function(x) {\n"
+               "      y();  //\n"
+               "    });",
+               getGoogleJSStyleWithColumns(40));
+  verifyFormat("while (aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa &&\n"
+               "       bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb) {\n}");
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/unittests/Format/FormatTestJava.cpp b/unittests/Format/FormatTestJava.cpp
index 8e590879f6e2..8fadfc09b3ed 100644
--- a/unittests/Format/FormatTestJava.cpp
+++ b/unittests/Format/FormatTestJava.cpp
@@ -276,6 +276,10 @@ TEST_F(FormatTestJava, Annotations) {
   verifyFormat("void SomeFunction(@org.llvm.Nullable String something) {}");
 
   verifyFormat("@Partial @Mock DataLoader loader;");
+  verifyFormat("@Partial\n"
+               "@Mock\n"
+               "DataLoader loader;",
+               getChromiumStyle(FormatStyle::LK_Java));
   verifyFormat("@SuppressWarnings(value = \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\")\n"
                "public static int iiiiiiiiiiiiiiiiiiiiiiii;");
 
@@ -393,6 +397,10 @@ TEST_F(FormatTestJava, SynchronizedKeyword) {
                "}");
 }
 
+TEST_F(FormatTestJava, AssertKeyword) {
+  verifyFormat("assert a && b;");
+}
+
 TEST_F(FormatTestJava, PackageDeclarations) {
   verifyFormat("package some.really.loooooooooooooooooooooong.package;",
                getStyleWithColumns(50));
@@ -418,6 +426,7 @@ TEST_F(FormatTestJava, CppKeywords) {
   verifyFormat("public void union(Type a, Type b);");
   verifyFormat("public void struct(Object o);");
   verifyFormat("public void delete(Object o);");
+  verifyFormat("return operator && (aa);");
 }
 
 TEST_F(FormatTestJava, NeverAlignAfterReturn) {
diff --git a/unittests/Format/FormatTestProto.cpp b/unittests/Format/FormatTestProto.cpp
index 74f7005b219f..cd2c0c8aa461 100644
--- a/unittests/Format/FormatTestProto.cpp
+++ b/unittests/Format/FormatTestProto.cpp
@@ -73,6 +73,12 @@ TEST_F(FormatTestProto, FormatsEnums) {
                "  TYPE_A = 1;\n"
                "  TYPE_B = 2;\n"
                "};");
+  verifyFormat("enum Type {\n"
+               "  UNKNOWN = 0 [(some_options) = {\n"
+               "    a: aa,\n"
+               "    b: bb\n"
+               "  }];\n"
+               "};");
 }
 
 TEST_F(FormatTestProto, UnderstandsReturns) {
@@ -156,5 +162,10 @@ TEST_F(FormatTestProto, FormatsService) {
                "};");
 }
 
+TEST_F(FormatTestProto, ExtendingMessage) {
+  verifyFormat("extend .foo.Bar {\n"
+               "}");
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/unittests/Format/FormatTestSelective.cpp b/unittests/Format/FormatTestSelective.cpp
index 8d2cb5a2fcbd..d53d1c042921 100644
--- a/unittests/Format/FormatTestSelective.cpp
+++ b/unittests/Format/FormatTestSelective.cpp
@@ -45,8 +45,14 @@ TEST_F(FormatTestSelective, RemovesTrailingWhitespaceOfFormattedLine) {
 }
 
 TEST_F(FormatTestSelective, FormatsCorrectRegionForLeadingWhitespace) {
-  EXPECT_EQ("int b;\nint a;", format("int b;\n   int a;", 7, 0));
-  EXPECT_EQ("int b;\n   int a;", format("int b;\n   int a;", 6, 0));
+  EXPECT_EQ("{int b;\n"
+            "  int a;\n"
+            "}",
+            format("{int b;\n  int  a;}", 8, 0));
+  EXPECT_EQ("{\n"
+            "  int b;\n"
+            "  int  a;}",
+            format("{int b;\n  int  a;}", 7, 0));
 
   Style.ColumnLimit = 12;
   EXPECT_EQ("#define A  \\\n"
@@ -84,11 +90,11 @@ TEST_F(FormatTestSelective, ReformatsMovedLines) {
       "template <typename T> T *getFETokenInfo() const {\n"
       "  return static_cast<T *>(FETokenInfo);\n"
       "}\n"
-      "  int a; // <- Should not be formatted",
+      "int  a; // <- Should not be formatted",
       format(
           "template<typename T>\n"
           "T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }\n"
-          "  int a; // <- Should not be formatted",
+          "int  a; // <- Should not be formatted",
           9, 5));
 }
 
@@ -142,12 +148,12 @@ TEST_F(FormatTestSelective, FormatsCommentsLocally) {
             "       // is\n"
             "       // a\n"
             "\n"
-            "  // This is unrelated",
+            "//This is unrelated",
             format("int a; // This\n"
                    "     // is\n"
                    "     // a\n"
                    "\n"
-                   "  // This is unrelated",
+                   "//This is unrelated",
                    0, 0));
   EXPECT_EQ("int a;\n"
             "// This is\n"
@@ -267,6 +273,27 @@ TEST_F(FormatTestSelective, IndividualStatementsOfNestedBlocks) {
                    0, 0));
 }
 
+TEST_F(FormatTestSelective, WrongIndent) {
+  EXPECT_EQ("namespace {\n"
+            "int i;\n"
+            "int j;\n"
+            "}",
+            format("namespace {\n"
+                   "  int i;\n" // Format here.
+                   "  int j;\n"
+                   "}",
+                   15, 0));
+  EXPECT_EQ("namespace {\n"
+            "  int i;\n"
+            "  int j;\n"
+            "}",
+            format("namespace {\n"
+                   "  int i;\n"
+                   "  int j;\n" // Format here.
+                   "}",
+                   24, 0));
+}
+
 TEST_F(FormatTestSelective, AlwaysFormatsEntireMacroDefinitions) {
   Style.AlignEscapedNewlinesLeft = true;
   EXPECT_EQ("int  i;\n"
@@ -310,13 +337,17 @@ TEST_F(FormatTestSelective, ReformatRegionAdjustsIndent) {
   EXPECT_EQ("{\n"
             "{\n"
             "  a;\n"
-            "b;\n"
+            "  b;\n"
+            "  c;\n"
+            " d;\n"
             "}\n"
             "}",
             format("{\n"
                    "{\n"
                    "     a;\n"
-                   "b;\n"
+                   "   b;\n"
+                   "  c;\n"
+                   " d;\n"
                    "}\n"
                    "}",
                    9, 2));
@@ -436,6 +467,27 @@ TEST_F(FormatTestSelective, UnderstandsTabs) {
                    21, 0));
 }
 
+TEST_F(FormatTestSelective, StopFormattingWhenLeavingScope) {
+  EXPECT_EQ(
+      "void f() {\n"
+      "  if (a) {\n"
+      "    g();\n"
+      "    h();\n"
+      "}\n"
+      "\n"
+      "void g() {\n"
+      "}",
+      format("void f() {\n"
+             "  if (a) {\n" // Assume this was added without the closing brace.
+             "  g();\n"
+             "  h();\n"
+             "}\n"
+             "\n"
+             "void g() {\n" // Make sure not to format this.
+             "}",
+             15, 0));
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang
diff --git a/unittests/Format/SortIncludesTest.cpp b/unittests/Format/SortIncludesTest.cpp
new file mode 100644
index 000000000000..dbe11749572b
--- /dev/null
+++ b/unittests/Format/SortIncludesTest.cpp
@@ -0,0 +1,255 @@
+//===- unittest/Format/SortIncludesTest.cpp - Include sort unit tests -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FormatTestUtils.h"
+#include "clang/Format/Format.h"
+#include "llvm/Support/Debug.h"
+#include "gtest/gtest.h"
+
+#define DEBUG_TYPE "format-test"
+
+namespace clang {
+namespace format {
+namespace {
+
+class SortIncludesTest : public ::testing::Test {
+protected:
+  std::string sort(llvm::StringRef Code, StringRef FileName = "input.cpp") {
+    std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+    std::string Sorted =
+        applyAllReplacements(Code, sortIncludes(Style, Code, Ranges, FileName));
+    return applyAllReplacements(Sorted,
+                                reformat(Style, Sorted, Ranges, FileName));
+  }
+
+  unsigned newCursor(llvm::StringRef Code, unsigned Cursor) {
+    std::vector<tooling::Range> Ranges(1, tooling::Range(0, Code.size()));
+    sortIncludes(Style, Code, Ranges, "input.cpp", &Cursor);
+    return Cursor;
+  }
+
+  FormatStyle Style = getLLVMStyle();
+
+};
+
+TEST_F(SortIncludesTest, BasicSorting) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, SupportClangFormatOff) {
+  EXPECT_EQ("#include <a>\n"
+            "#include <b>\n"
+            "#include <c>\n"
+            "// clang-format off\n"
+            "#include <b>\n"
+            "#include <a>\n"
+            "#include <c>\n"
+            "// clang-format on\n",
+            sort("#include <b>\n"
+                 "#include <a>\n"
+                 "#include <c>\n"
+                 "// clang-format off\n"
+                 "#include <b>\n"
+                 "#include <a>\n"
+                 "#include <c>\n"
+                 "// clang-format on\n"));
+}
+
+TEST_F(SortIncludesTest, IncludeSortingCanBeDisabled) {
+  Style.SortIncludes = false;
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "#include \"b.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, MixIncludeAndImport) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#import \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#import \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, FixTrailingComments) {
+  EXPECT_EQ("#include \"a.h\"  // comment\n"
+            "#include \"bb.h\" // comment\n"
+            "#include \"ccc.h\"\n",
+            sort("#include \"a.h\" // comment\n"
+                 "#include \"ccc.h\"\n"
+                 "#include \"bb.h\" // comment\n"));
+}
+
+TEST_F(SortIncludesTest, LeadingWhitespace) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort(" #include \"a.h\"\n"
+                 "  #include \"c.h\"\n"
+                 "   #include \"b.h\"\n"));
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("# include \"a.h\"\n"
+                 "#  include \"c.h\"\n"
+                 "#   include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, GreaterInComment) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\" // >\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\" // >\n"));
+}
+
+TEST_F(SortIncludesTest, SortsLocallyInEachBlock) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "\n"
+            "#include \"b.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "\n"
+                 "#include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, HandlesAngledIncludesAsSeparateBlocks) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"c.h\"\n"
+            "#include <b.h>\n"
+            "#include <d.h>\n",
+            sort("#include <d.h>\n"
+                 "#include <b.h>\n"
+                 "#include \"c.h\"\n"
+                 "#include \"a.h\"\n"));
+
+  Style = getGoogleStyle(FormatStyle::LK_Cpp);
+  EXPECT_EQ("#include <b.h>\n"
+            "#include <d.h>\n"
+            "#include \"a.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include <d.h>\n"
+                 "#include <b.h>\n"
+                 "#include \"c.h\"\n"
+                 "#include \"a.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, HandlesMultilineIncludes) {
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"a.h\"\n"
+                 "#include \\\n"
+                 "\"c.h\"\n"
+                 "#include \"b.h\"\n"));
+}
+
+TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
+  EXPECT_EQ("#include \"llvm/a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"llvm/a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n",
+                 "a.cc"));
+  EXPECT_EQ("#include \"llvm/a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"llvm/a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n",
+                 "a_main.cc"));
+  EXPECT_EQ("#include \"llvm/input.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n",
+            sort("#include \"llvm/input.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n",
+                 "input.mm"));
+
+  // Don't do this in headers.
+  EXPECT_EQ("#include \"b.h\"\n"
+            "#include \"c.h\"\n"
+            "#include \"llvm/a.h\"\n",
+            sort("#include \"llvm/a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n",
+                 "a.h"));
+
+  // Only do this in the first #include block.
+  EXPECT_EQ("#include <a>\n"
+            "\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n"
+            "#include \"llvm/a.h\"\n",
+            sort("#include <a>\n"
+                 "\n"
+                 "#include \"llvm/a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"b.h\"\n",
+                 "a.cc"));
+
+  // Only recognize the first #include with a matching basename as main include.
+  EXPECT_EQ("#include \"a.h\"\n"
+            "#include \"b.h\"\n"
+            "#include \"c.h\"\n"
+            "#include \"llvm/a.h\"\n",
+            sort("#include \"b.h\"\n"
+                 "#include \"a.h\"\n"
+                 "#include \"c.h\"\n"
+                 "#include \"llvm/a.h\"\n",
+                 "a.cc"));
+}
+
+TEST_F(SortIncludesTest, NegativePriorities) {
+  Style.IncludeCategories = {{".*important_os_header.*", -1}, {".*", 1}};
+  EXPECT_EQ("#include \"important_os_header.h\"\n"
+            "#include \"c_main.h\"\n"
+            "#include \"a_other.h\"\n",
+            sort("#include \"c_main.h\"\n"
+                 "#include \"a_other.h\"\n"
+                 "#include \"important_os_header.h\"\n",
+                 "c_main.cc"));
+
+  // check stable when re-run
+  EXPECT_EQ("#include \"important_os_header.h\"\n"
+            "#include \"c_main.h\"\n"
+            "#include \"a_other.h\"\n",
+            sort("#include \"important_os_header.h\"\n"
+                 "#include \"c_main.h\"\n"
+                 "#include \"a_other.h\"\n",
+                 "c_main.cc"));
+}
+
+TEST_F(SortIncludesTest, CalculatesCorrectCursorPosition) {
+  std::string Code = "#include <ccc>\n"    // Start of line: 0
+                     "#include <bbbbbb>\n" // Start of line: 15
+                     "#include <a>\n";     // Start of line: 33
+  EXPECT_EQ(31u, newCursor(Code, 0));
+  EXPECT_EQ(13u, newCursor(Code, 15));
+  EXPECT_EQ(0u, newCursor(Code, 33));
+
+  EXPECT_EQ(41u, newCursor(Code, 10));
+  EXPECT_EQ(23u, newCursor(Code, 25));
+  EXPECT_EQ(10u, newCursor(Code, 43));
+}
+
+} // end namespace
+} // end namespace format
+} // end namespace clang
diff --git a/unittests/Lex/LexerTest.cpp b/unittests/Lex/LexerTest.cpp
index b5a39b303d00..42f8eb5a5c7f 100644
--- a/unittests/Lex/LexerTest.cpp
+++ b/unittests/Lex/LexerTest.cpp
@@ -42,7 +42,7 @@ class VoidModuleLoader : public ModuleLoader {
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
     { return nullptr; }
   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; };
+    { return 0; }
 };
 
 // The test fixture.
diff --git a/unittests/Lex/PPCallbacksTest.cpp b/unittests/Lex/PPCallbacksTest.cpp
index 94812fc93de9..cbce5c6e1676 100644
--- a/unittests/Lex/PPCallbacksTest.cpp
+++ b/unittests/Lex/PPCallbacksTest.cpp
@@ -47,7 +47,7 @@ class VoidModuleLoader : public ModuleLoader {
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
     { return nullptr; }
   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; };
+    { return 0; }
 };
 
 // Stub to collect data from InclusionDirective callbacks.
@@ -88,7 +88,7 @@ public:
     unsigned State;
   } CallbackParameters;
 
-  PragmaOpenCLExtensionCallbacks() : Name("Not called."), State(99) {};
+  PragmaOpenCLExtensionCallbacks() : Name("Not called."), State(99) {}
 
   void PragmaOpenCLExtension(clang::SourceLocation NameLoc,
                              const clang::IdentifierInfo *Name,
@@ -98,7 +98,7 @@ public:
       this->Name = Name->getName();
       this->StateLoc = StateLoc;
       this->State = State;
-  };
+  }
 
   SourceLocation NameLoc;
   SmallString<16> Name;
@@ -110,15 +110,16 @@ public:
 class PPCallbacksTest : public ::testing::Test {
 protected:
   PPCallbacksTest()
-      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
-        DiagOpts(new DiagnosticOptions()),
+      : InMemoryFileSystem(new vfs::InMemoryFileSystem),
+        FileMgr(FileSystemOptions(), InMemoryFileSystem),
+        DiagID(new DiagnosticIDs()), DiagOpts(new DiagnosticOptions()),
         Diags(DiagID, DiagOpts.get(), new IgnoringDiagConsumer()),
         SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions()) {
     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
   }
 
-  FileSystemOptions FileMgrOpts;
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem;
   FileManager FileMgr;
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
@@ -133,7 +134,8 @@ protected:
   void AddFakeHeader(HeaderSearch& HeaderInfo, const char* HeaderPath, 
     bool IsSystemHeader) {
       // Tell FileMgr about header.
-      FileMgr.getVirtualFile(HeaderPath, 0, 0);
+      InMemoryFileSystem->addFile(HeaderPath, 0,
+                                  llvm::MemoryBuffer::getMemBuffer("\n"));
 
       // Add header's parent path to search path.
       StringRef SearchPath = llvm::sys::path::parent_path(HeaderPath);
diff --git a/unittests/Lex/PPConditionalDirectiveRecordTest.cpp b/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
index d2e364050105..9345fc239026 100644
--- a/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
+++ b/unittests/Lex/PPConditionalDirectiveRecordTest.cpp
@@ -66,7 +66,7 @@ class VoidModuleLoader : public ModuleLoader {
   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
     { return nullptr; }
   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
-    { return 0; };
+    { return 0; }
 };
 
 TEST_F(PPConditionalDirectiveRecordTest, PPRecAPI) {
diff --git a/unittests/Tooling/CMakeLists.txt b/unittests/Tooling/CMakeLists.txt
index 469e6a956b2d..33b2046ae928 100644
--- a/unittests/Tooling/CMakeLists.txt
+++ b/unittests/Tooling/CMakeLists.txt
@@ -1,10 +1,12 @@
 set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
   Support
   )
 
 add_clang_unittest(ToolingTests
   CommentHandlerTest.cpp
   CompilationDatabaseTest.cpp
+  LookupTest.cpp
   ToolingTest.cpp
   RecursiveASTVisitorTest.cpp
   RecursiveASTVisitorTestCallVisitor.cpp
diff --git a/unittests/Tooling/CommentHandlerTest.cpp b/unittests/Tooling/CommentHandlerTest.cpp
index da5604524cda..b42b28b9c948 100644
--- a/unittests/Tooling/CommentHandlerTest.cpp
+++ b/unittests/Tooling/CommentHandlerTest.cpp
@@ -97,6 +97,10 @@ public:
       : Current(Comments.begin()), End(Comments.end()), PP(PP)
     { }
 
+  CommentVerifier(CommentVerifier &&C) : Current(C.Current), End(C.End), PP(C.PP) {
+    C.Current = C.End;
+  }
+
   ~CommentVerifier() {
     if (Current != End) {
       EXPECT_TRUE(Current == End) << "Unexpected comment \""
diff --git a/unittests/Tooling/CompilationDatabaseTest.cpp b/unittests/Tooling/CompilationDatabaseTest.cpp
index 3e5a589caf17..380d86fc5660 100644
--- a/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -36,8 +36,14 @@ TEST(JSONCompilationDatabase, ErrsOnInvalidFormat) {
   expectFailure("[{[]:\"\"}]", "Incorrectly typed entry");
   expectFailure("[{}]", "Empty entry");
   expectFailure("[{\"directory\":\"\",\"command\":\"\"}]", "Missing file");
-  expectFailure("[{\"directory\":\"\",\"file\":\"\"}]", "Missing command");
+  expectFailure("[{\"directory\":\"\",\"file\":\"\"}]", "Missing command or arguments");
   expectFailure("[{\"command\":\"\",\"file\":\"\"}]", "Missing directory");
+  expectFailure("[{\"directory\":\"\",\"arguments\":[]}]", "Missing file");
+  expectFailure("[{\"arguments\":\"\",\"file\":\"\"}]", "Missing directory");
+  expectFailure("[{\"directory\":\"\",\"arguments\":\"\",\"file\":\"\"}]", "Arguments not array");
+  expectFailure("[{\"directory\":\"\",\"command\":[],\"file\":\"\"}]", "Command not string");
+  expectFailure("[{\"directory\":\"\",\"arguments\":[[]],\"file\":\"\"}]",
+                "Arguments contain non-string");
 }
 
 static std::vector<std::string> getAllFiles(StringRef JSONDatabase,
@@ -92,8 +98,8 @@ TEST(JSONCompilationDatabase, GetAllCompileCommands) {
   StringRef FileName1("file1");
   StringRef Command1("command1");
   StringRef Directory2("//net/dir2");
-  StringRef FileName2("file1");
-  StringRef Command2("command1");
+  StringRef FileName2("file2");
+  StringRef Command2("command2");
 
   std::vector<CompileCommand> Commands = getAllCompileCommands(
       ("[{\"directory\":\"" + Directory1 + "\"," +
@@ -105,11 +111,32 @@ TEST(JSONCompilationDatabase, GetAllCompileCommands) {
       ErrorMessage);
   EXPECT_EQ(2U, Commands.size()) << ErrorMessage;
   EXPECT_EQ(Directory1, Commands[0].Directory) << ErrorMessage;
+  EXPECT_EQ(FileName1, Commands[0].Filename) << ErrorMessage;
   ASSERT_EQ(1u, Commands[0].CommandLine.size());
   EXPECT_EQ(Command1, Commands[0].CommandLine[0]) << ErrorMessage;
   EXPECT_EQ(Directory2, Commands[1].Directory) << ErrorMessage;
+  EXPECT_EQ(FileName2, Commands[1].Filename) << ErrorMessage;
   ASSERT_EQ(1u, Commands[1].CommandLine.size());
   EXPECT_EQ(Command2, Commands[1].CommandLine[0]) << ErrorMessage;
+
+  // Check that order is preserved.
+  Commands = getAllCompileCommands(
+      ("[{\"directory\":\"" + Directory2 + "\"," +
+             "\"command\":\"" + Command2 + "\","
+             "\"file\":\"" + FileName2 + "\"},"
+       " {\"directory\":\"" + Directory1 + "\"," +
+             "\"command\":\"" + Command1 + "\","
+             "\"file\":\"" + FileName1 + "\"}]").str(),
+      ErrorMessage);
+  EXPECT_EQ(2U, Commands.size()) << ErrorMessage;
+  EXPECT_EQ(Directory2, Commands[0].Directory) << ErrorMessage;
+  EXPECT_EQ(FileName2, Commands[0].Filename) << ErrorMessage;
+  ASSERT_EQ(1u, Commands[0].CommandLine.size());
+  EXPECT_EQ(Command2, Commands[0].CommandLine[0]) << ErrorMessage;
+  EXPECT_EQ(Directory1, Commands[1].Directory) << ErrorMessage;
+  EXPECT_EQ(FileName1, Commands[1].Filename) << ErrorMessage;
+  ASSERT_EQ(1u, Commands[1].CommandLine.size());
+  EXPECT_EQ(Command1, Commands[1].CommandLine[0]) << ErrorMessage;
 }
 
 static CompileCommand findCompileArgsInJsonDatabase(StringRef FileName,
@@ -126,6 +153,25 @@ static CompileCommand findCompileArgsInJsonDatabase(StringRef FileName,
   return Commands[0];
 }
 
+TEST(JSONCompilationDatabase, ArgumentsPreferredOverCommand) {
+   StringRef Directory("//net/dir");
+   StringRef FileName("//net/dir/filename");
+   StringRef Command("command");
+   StringRef Arguments = "arguments";
+   Twine ArgumentsAccumulate;
+   std::string ErrorMessage;
+   CompileCommand FoundCommand = findCompileArgsInJsonDatabase(
+      FileName,
+      ("[{\"directory\":\"" + Directory + "\","
+         "\"arguments\":[\"" + Arguments + "\"],"
+         "\"command\":\"" + Command + "\","
+         "\"file\":\"" + FileName + "\"}]").str(),
+      ErrorMessage);
+   EXPECT_EQ(Directory, FoundCommand.Directory) << ErrorMessage;
+   EXPECT_EQ(1u, FoundCommand.CommandLine.size()) << ErrorMessage;
+   EXPECT_EQ(Arguments, FoundCommand.CommandLine[0]) << ErrorMessage;
+}
+
 struct FakeComparator : public PathComparator {
   ~FakeComparator() override {}
   bool equivalent(StringRef FileA, StringRef FileB) const override {
@@ -402,14 +448,16 @@ TEST(FixedCompilationDatabase, ReturnsFixedCommandLine) {
   CommandLine.push_back("one");
   CommandLine.push_back("two");
   FixedCompilationDatabase Database(".", CommandLine);
+  StringRef FileName("source");
   std::vector<CompileCommand> Result =
-    Database.getCompileCommands("source");
+    Database.getCompileCommands(FileName);
   ASSERT_EQ(1ul, Result.size());
   std::vector<std::string> ExpectedCommandLine(1, "clang-tool");
   ExpectedCommandLine.insert(ExpectedCommandLine.end(),
                              CommandLine.begin(), CommandLine.end());
   ExpectedCommandLine.push_back("source");
   EXPECT_EQ(".", Result[0].Directory);
+  EXPECT_EQ(FileName, Result[0].Filename);
   EXPECT_EQ(ExpectedCommandLine, Result[0].CommandLine);
 }
 
diff --git a/unittests/Tooling/LookupTest.cpp b/unittests/Tooling/LookupTest.cpp
new file mode 100644
index 000000000000..d847a298fff2
--- /dev/null
+++ b/unittests/Tooling/LookupTest.cpp
@@ -0,0 +1,108 @@
+//===- unittest/Tooling/LookupTest.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestVisitor.h"
+#include "clang/Tooling/Core/Lookup.h"
+using namespace clang;
+
+namespace {
+struct GetDeclsVisitor : TestVisitor<GetDeclsVisitor> {
+  std::function<void(CallExpr *)> OnCall;
+  SmallVector<Decl *, 4> DeclStack;
+
+  bool VisitCallExpr(CallExpr *Expr) {
+    OnCall(Expr);
+    return true;
+  }
+
+  bool TraverseDecl(Decl *D) {
+    DeclStack.push_back(D);
+    bool Ret = TestVisitor::TraverseDecl(D);
+    DeclStack.pop_back();
+    return Ret;
+  }
+};
+
+TEST(LookupTest, replaceNestedName) {
+  GetDeclsVisitor Visitor;
+
+  auto replaceCallExpr = [&](const CallExpr *Expr,
+                             StringRef ReplacementString) {
+    const auto *Callee = cast<DeclRefExpr>(Expr->getCallee()->IgnoreImplicit());
+    const ValueDecl *FD = Callee->getDecl();
+    return tooling::replaceNestedName(
+        Callee->getQualifier(), Visitor.DeclStack.back()->getDeclContext(), FD,
+        ReplacementString);
+  };
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("bar", replaceCallExpr(Expr, "::bar"));
+  };
+  Visitor.runOver("namespace a { void foo(); }\n"
+                  "namespace a { void f() { foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { void foo(); }\n"
+                  "namespace a { void f() { foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("a::bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { void foo(); }\n"
+                  "namespace b { void f() { a::foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("a::bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { void foo(); }\n"
+                  "namespace b { namespace a { void foo(); }\n"
+                  "void f() { a::foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("c::bar", replaceCallExpr(Expr, "::a::c::bar"));
+  };
+  Visitor.runOver("namespace a { namespace b { void foo(); }\n"
+                  "void f() { b::foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { namespace b { void foo(); }\n"
+                  "void f() { b::foo(); } }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("bar", replaceCallExpr(Expr, "::bar"));
+  };
+  Visitor.runOver("void foo(); void f() { foo(); }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("::bar", replaceCallExpr(Expr, "::bar"));
+  };
+  Visitor.runOver("void foo(); void f() { ::foo(); }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("a::bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { void foo(); }\nvoid f() { a::foo(); }\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("a::bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver("namespace a { int foo(); }\nauto f = a::foo();\n");
+
+  Visitor.OnCall = [&](CallExpr *Expr) {
+    EXPECT_EQ("bar", replaceCallExpr(Expr, "::a::bar"));
+  };
+  Visitor.runOver(
+      "namespace a { int foo(); }\nusing a::foo;\nauto f = foo();\n");
+}
+
+} // end anonymous namespace
diff --git a/unittests/Tooling/RefactoringCallbacksTest.cpp b/unittests/Tooling/RefactoringCallbacksTest.cpp
index c2b331c70afc..ad8aa8f98feb 100644
--- a/unittests/Tooling/RefactoringCallbacksTest.cpp
+++ b/unittests/Tooling/RefactoringCallbacksTest.cpp
@@ -71,7 +71,7 @@ TEST(RefactoringCallbacksTest, ReplacesStmtWithStmt) {
   ReplaceStmtWithStmt Callback("always-false", "should-be");
   expectRewritten(Code, Expected,
       id("always-false", conditionalOperator(
-          hasCondition(boolLiteral(equals(false))),
+          hasCondition(cxxBoolLiteral(equals(false))),
           hasFalseExpression(id("should-be", expr())))),
       Callback);
 }
@@ -92,7 +92,7 @@ TEST(RefactoringCallbacksTest, RemovesEntireIfOnEmptyElse) {
   std::string Expected = "void f() {  }";
   ReplaceIfStmtWithItsBody Callback("id", false);
   expectRewritten(Code, Expected,
-      id("id", ifStmt(hasCondition(boolLiteral(equals(false))))),
+      id("id", ifStmt(hasCondition(cxxBoolLiteral(equals(false))))),
       Callback);
 }
 
diff --git a/unittests/Tooling/RefactoringTest.cpp b/unittests/Tooling/RefactoringTest.cpp
index 6c2c16b484d7..ff11aeae117a 100644
--- a/unittests/Tooling/RefactoringTest.cpp
+++ b/unittests/Tooling/RefactoringTest.cpp
@@ -176,8 +176,8 @@ TEST(ShiftedCodePositionTest, FindsNewCodePosition) {
   EXPECT_EQ(1u, shiftedCodePosition(Replaces, 2)); //  i|t   i;
   EXPECT_EQ(2u, shiftedCodePosition(Replaces, 3)); //  in|   i;
   EXPECT_EQ(3u, shiftedCodePosition(Replaces, 4)); //  int|  i;
-  EXPECT_EQ(4u, shiftedCodePosition(Replaces, 5)); //  int | i;
-  EXPECT_EQ(4u, shiftedCodePosition(Replaces, 6)); //  int  |i;
+  EXPECT_EQ(3u, shiftedCodePosition(Replaces, 5)); //  int | i;
+  EXPECT_EQ(3u, shiftedCodePosition(Replaces, 6)); //  int  |i;
   EXPECT_EQ(4u, shiftedCodePosition(Replaces, 7)); //  int   |;
   EXPECT_EQ(5u, shiftedCodePosition(Replaces, 8)); //  int   i|
 }
@@ -195,8 +195,8 @@ TEST(ShiftedCodePositionTest, VectorFindsNewCodePositionWithInserts) {
   EXPECT_EQ(1u, shiftedCodePosition(Replaces, 2)); //  i|t   i;
   EXPECT_EQ(2u, shiftedCodePosition(Replaces, 3)); //  in|   i;
   EXPECT_EQ(3u, shiftedCodePosition(Replaces, 4)); //  int|  i;
-  EXPECT_EQ(4u, shiftedCodePosition(Replaces, 5)); //  int | i;
-  EXPECT_EQ(4u, shiftedCodePosition(Replaces, 6)); //  int  |i;
+  EXPECT_EQ(3u, shiftedCodePosition(Replaces, 5)); //  int | i;
+  EXPECT_EQ(3u, shiftedCodePosition(Replaces, 6)); //  int  |i;
   EXPECT_EQ(4u, shiftedCodePosition(Replaces, 7)); //  int   |;
   EXPECT_EQ(5u, shiftedCodePosition(Replaces, 8)); //  int   i|
 }
@@ -205,8 +205,17 @@ TEST(ShiftedCodePositionTest, FindsNewCodePositionWithInserts) {
   Replacements Replaces;
   Replaces.insert(Replacement("", 4, 0, "\"\n\""));
   // Assume '"12345678"' is turned into '"1234"\n"5678"'.
-  EXPECT_EQ(4u, shiftedCodePosition(Replaces, 4)); // "123|5678"
-  EXPECT_EQ(8u, shiftedCodePosition(Replaces, 5)); // "1234|678"
+  EXPECT_EQ(3u, shiftedCodePosition(Replaces, 3)); // "123|5678"
+  EXPECT_EQ(7u, shiftedCodePosition(Replaces, 4)); // "1234|678"
+  EXPECT_EQ(8u, shiftedCodePosition(Replaces, 5)); // "12345|78"
+}
+
+TEST(ShiftedCodePositionTest, FindsNewCodePositionInReplacedText) {
+  Replacements Replaces;
+  // Replace the first four characters with "abcd".
+  Replaces.insert(Replacement("", 0, 4, "abcd"));
+  for (unsigned i = 0; i < 3; ++i)
+    EXPECT_EQ(i, shiftedCodePosition(Replaces, i));
 }
 
 class FlushRewrittenFilesTest : public ::testing::Test {
@@ -489,5 +498,98 @@ TEST(DeduplicateTest, detectsConflicts) {
   }
 }
 
+class MergeReplacementsTest : public ::testing::Test {
+protected:
+  void mergeAndTestRewrite(StringRef Code, StringRef Intermediate,
+                           StringRef Result, const Replacements &First,
+                           const Replacements &Second) {
+    // These are mainly to verify the test itself and make it easier to read.
+    std::string AfterFirst = applyAllReplacements(Code, First);
+    std::string InSequenceRewrite = applyAllReplacements(AfterFirst, Second);
+    EXPECT_EQ(Intermediate, AfterFirst);
+    EXPECT_EQ(Result, InSequenceRewrite);
+
+    tooling::Replacements Merged = mergeReplacements(First, Second);
+    std::string MergedRewrite = applyAllReplacements(Code, Merged);
+    EXPECT_EQ(InSequenceRewrite, MergedRewrite);
+    if (InSequenceRewrite != MergedRewrite)
+      for (tooling::Replacement M : Merged)
+        llvm::errs() << M.getOffset() << " " << M.getLength() << " "
+                     << M.getReplacementText() << "\n";
+  }
+  void mergeAndTestRewrite(StringRef Code, const Replacements &First,
+                           const Replacements &Second) {
+    std::string InSequenceRewrite =
+        applyAllReplacements(applyAllReplacements(Code, First), Second);
+    tooling::Replacements Merged = mergeReplacements(First, Second);
+    std::string MergedRewrite = applyAllReplacements(Code, Merged);
+    EXPECT_EQ(InSequenceRewrite, MergedRewrite);
+    if (InSequenceRewrite != MergedRewrite)
+      for (tooling::Replacement M : Merged)
+        llvm::errs() << M.getOffset() << " " << M.getLength() << " "
+                     << M.getReplacementText() << "\n";
+  }
+};
+
+TEST_F(MergeReplacementsTest, Offsets) {
+  mergeAndTestRewrite("aaa", "aabab", "cacabab",
+                      {{"", 2, 0, "b"}, {"", 3, 0, "b"}},
+                      {{"", 0, 0, "c"}, {"", 1, 0, "c"}});
+  mergeAndTestRewrite("aaa", "babaa", "babacac",
+                      {{"", 0, 0, "b"}, {"", 1, 0, "b"}},
+                      {{"", 4, 0, "c"}, {"", 5, 0, "c"}});
+  mergeAndTestRewrite("aaaa", "aaa", "aac", {{"", 1, 1, ""}},
+                      {{"", 2, 1, "c"}});
+
+  mergeAndTestRewrite("aa", "bbabba", "bbabcba",
+                      {{"", 0, 0, "bb"}, {"", 1, 0, "bb"}}, {{"", 4, 0, "c"}});
+}
+
+TEST_F(MergeReplacementsTest, Concatenations) {
+  // Basic concatenations. It is important to merge these into a single
+  // replacement to ensure the correct order.
+  EXPECT_EQ((Replacements{{"", 0, 0, "ab"}}),
+            mergeReplacements({{"", 0, 0, "a"}}, {{"", 1, 0, "b"}}));
+  EXPECT_EQ((Replacements{{"", 0, 0, "ba"}}),
+            mergeReplacements({{"", 0, 0, "a"}}, {{"", 0, 0, "b"}}));
+  mergeAndTestRewrite("", "a", "ab", {{"", 0, 0, "a"}}, {{"", 1, 0, "b"}});
+  mergeAndTestRewrite("", "a", "ba", {{"", 0, 0, "a"}}, {{"", 0, 0, "b"}});
+}
+
+TEST_F(MergeReplacementsTest, NotChangingLengths) {
+  mergeAndTestRewrite("aaaa", "abba", "acca", {{"", 1, 2, "bb"}},
+                      {{"", 1, 2, "cc"}});
+  mergeAndTestRewrite("aaaa", "abba", "abcc", {{"", 1, 2, "bb"}},
+                      {{"", 2, 2, "cc"}});
+  mergeAndTestRewrite("aaaa", "abba", "ccba", {{"", 1, 2, "bb"}},
+                      {{"", 0, 2, "cc"}});
+  mergeAndTestRewrite("aaaaaa", "abbdda", "abccda",
+                      {{"", 1, 2, "bb"}, {"", 3, 2, "dd"}}, {{"", 2, 2, "cc"}});
+}
+
+TEST_F(MergeReplacementsTest, OverlappingRanges) {
+  mergeAndTestRewrite("aaa", "bbd", "bcbcd",
+                      {{"", 0, 1, "bb"}, {"", 1, 2, "d"}},
+                      {{"", 1, 0, "c"}, {"", 2, 0, "c"}});
+
+  mergeAndTestRewrite("aaaa", "aabbaa", "acccca", {{"", 2, 0, "bb"}},
+                      {{"", 1, 4, "cccc"}});
+  mergeAndTestRewrite("aaaa", "aababa", "acccca",
+                      {{"", 2, 0, "b"}, {"", 3, 0, "b"}}, {{"", 1, 4, "cccc"}});
+  mergeAndTestRewrite("aaaaaa", "abbbba", "abba", {{"", 1, 4, "bbbb"}},
+                      {{"", 2, 2, ""}});
+  mergeAndTestRewrite("aaaa", "aa", "cc", {{"", 1, 1, ""}, {"", 2, 1, ""}},
+                      {{"", 0, 2, "cc"}});
+  mergeAndTestRewrite("aa", "abbba", "abcbcba", {{"", 1, 0, "bbb"}},
+                      {{"", 2, 0, "c"}, {"", 3, 0, "c"}});
+
+  mergeAndTestRewrite("aaa", "abbab", "ccdd",
+                      {{"", 0, 1, ""}, {"", 2, 0, "bb"}, {"", 3, 0, "b"}},
+                      {{"", 0, 2, "cc"}, {"", 2, 3, "dd"}});
+  mergeAndTestRewrite("aa", "babbab", "ccdd",
+                      {{"", 0, 0, "b"}, {"", 1, 0, "bb"}, {"", 2, 0, "b"}},
+                      {{"", 0, 3, "cc"}, {"", 3, 3, "dd"}});
+}
+
 } // end namespace tooling
 } // end namespace clang
diff --git a/unittests/Tooling/RewriterTestContext.h b/unittests/Tooling/RewriterTestContext.h
index 112efac52ebc..eee7ea1873b8 100644
--- a/unittests/Tooling/RewriterTestContext.h
+++ b/unittests/Tooling/RewriterTestContext.h
@@ -34,15 +34,20 @@ namespace clang {
 /// methods, which help with writing tests that change files.
 class RewriterTestContext {
  public:
-  RewriterTestContext()
-      : DiagOpts(new DiagnosticOptions()),
-        Diagnostics(IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
-                    &*DiagOpts),
-        DiagnosticPrinter(llvm::outs(), &*DiagOpts),
-        Files((FileSystemOptions())),
-        Sources(Diagnostics, Files),
-        Rewrite(Sources, Options) {
+   RewriterTestContext()
+       : DiagOpts(new DiagnosticOptions()),
+         Diagnostics(IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
+                     &*DiagOpts),
+         DiagnosticPrinter(llvm::outs(), &*DiagOpts),
+         InMemoryFileSystem(new vfs::InMemoryFileSystem),
+         OverlayFileSystem(
+             new vfs::OverlayFileSystem(vfs::getRealFileSystem())),
+         Files(FileSystemOptions(), OverlayFileSystem),
+         Sources(Diagnostics, Files), Rewrite(Sources, Options) {
     Diagnostics.setClient(&DiagnosticPrinter, false);
+    // FIXME: To make these tests truly in-memory, we need to overlay the
+    // builtin headers.
+    OverlayFileSystem->pushOverlay(InMemoryFileSystem);
   }
 
   ~RewriterTestContext() {}
@@ -50,9 +55,9 @@ class RewriterTestContext {
   FileID createInMemoryFile(StringRef Name, StringRef Content) {
     std::unique_ptr<llvm::MemoryBuffer> Source =
         llvm::MemoryBuffer::getMemBuffer(Content);
-    const FileEntry *Entry =
-      Files.getVirtualFile(Name, Source->getBufferSize(), 0);
-    Sources.overrideFileContents(Entry, std::move(Source));
+    InMemoryFileSystem->addFile(Name, 0, std::move(Source));
+
+    const FileEntry *Entry = Files.getFile(Name);
     assert(Entry != nullptr);
     return Sources.createFileID(Entry, SourceLocation(), SrcMgr::C_User);
   }
@@ -109,6 +114,8 @@ class RewriterTestContext {
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
   DiagnosticsEngine Diagnostics;
   TextDiagnosticPrinter DiagnosticPrinter;
+  IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem;
+  IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem;
   FileManager Files;
   SourceManager Sources;
   LangOptions Options;
diff --git a/unittests/Tooling/ToolingTest.cpp b/unittests/Tooling/ToolingTest.cpp
index 4b14ebb2c300..c4b174f183da 100644
--- a/unittests/Tooling/ToolingTest.cpp
+++ b/unittests/Tooling/ToolingTest.cpp
@@ -18,6 +18,8 @@
 #include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/TargetRegistry.h"
 #include "gtest/gtest.h"
 #include <algorithm>
 #include <string>
@@ -147,8 +149,13 @@ TEST(newFrontendActionFactory, CreatesFrontendActionFactoryFromFactoryType) {
 }
 
 TEST(ToolInvocation, TestMapVirtualFile) {
-  IntrusiveRefCntPtr<clang::FileManager> Files(
-      new clang::FileManager(clang::FileSystemOptions()));
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem(
+      new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
+  llvm::IntrusiveRefCntPtr<FileManager> Files(
+      new FileManager(FileSystemOptions(), OverlayFileSystem));
   std::vector<std::string> Args;
   Args.push_back("tool-executable");
   Args.push_back("-Idef");
@@ -156,8 +163,10 @@ TEST(ToolInvocation, TestMapVirtualFile) {
   Args.push_back("test.cpp");
   clang::tooling::ToolInvocation Invocation(Args, new SyntaxOnlyAction,
                                             Files.get());
-  Invocation.mapVirtualFile("test.cpp", "#include <abc>\n");
-  Invocation.mapVirtualFile("def/abc", "\n");
+  InMemoryFileSystem->addFile(
+      "test.cpp", 0, llvm::MemoryBuffer::getMemBuffer("#include <abc>\n"));
+  InMemoryFileSystem->addFile("def/abc", 0,
+                              llvm::MemoryBuffer::getMemBuffer("\n"));
   EXPECT_TRUE(Invocation.run());
 }
 
@@ -166,8 +175,13 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
   // mapped module.map is found on the include path. In the future, expand this
   // test to run a full modules enabled compilation, so we make sure we can
   // rerun modules compilations with a virtual file system.
-  IntrusiveRefCntPtr<clang::FileManager> Files(
-      new clang::FileManager(clang::FileSystemOptions()));
+  llvm::IntrusiveRefCntPtr<vfs::OverlayFileSystem> OverlayFileSystem(
+      new vfs::OverlayFileSystem(vfs::getRealFileSystem()));
+  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
+      new vfs::InMemoryFileSystem);
+  OverlayFileSystem->pushOverlay(InMemoryFileSystem);
+  llvm::IntrusiveRefCntPtr<FileManager> Files(
+      new FileManager(FileSystemOptions(), OverlayFileSystem));
   std::vector<std::string> Args;
   Args.push_back("tool-executable");
   Args.push_back("-Idef");
@@ -175,11 +189,14 @@ TEST(ToolInvocation, TestVirtualModulesCompilation) {
   Args.push_back("test.cpp");
   clang::tooling::ToolInvocation Invocation(Args, new SyntaxOnlyAction,
                                             Files.get());
-  Invocation.mapVirtualFile("test.cpp", "#include <abc>\n");
-  Invocation.mapVirtualFile("def/abc", "\n");
+  InMemoryFileSystem->addFile(
+      "test.cpp", 0, llvm::MemoryBuffer::getMemBuffer("#include <abc>\n"));
+  InMemoryFileSystem->addFile("def/abc", 0,
+                              llvm::MemoryBuffer::getMemBuffer("\n"));
   // Add a module.map file in the include directory of our header, so we trigger
   // the module.map header search logic.
-  Invocation.mapVirtualFile("def/module.map", "\n");
+  InMemoryFileSystem->addFile("def/module.map", 0,
+                              llvm::MemoryBuffer::getMemBuffer("\n"));
   EXPECT_TRUE(Invocation.run());
 }
 
@@ -271,7 +288,7 @@ TEST(ClangToolTest, ArgumentAdjusters) {
   bool Found = false;
   bool Ran = false;
   ArgumentsAdjuster CheckSyntaxOnlyAdjuster =
-      [&Found, &Ran](const CommandLineArguments &Args) {
+      [&Found, &Ran](const CommandLineArguments &Args, StringRef /*unused*/) {
     Ran = true;
     if (std::find(Args.begin(), Args.end(), "-fsyntax-only") != Args.end())
       Found = true;
@@ -291,6 +308,86 @@ TEST(ClangToolTest, ArgumentAdjusters) {
   EXPECT_FALSE(Found);
 }
 
+namespace {
+/// Find a target name such that looking for it in TargetRegistry by that name
+/// returns the same target. We expect that there is at least one target
+/// configured with this property.
+std::string getAnyTarget() {
+  llvm::InitializeAllTargets();
+  for (const auto &Target : llvm::TargetRegistry::targets()) {
+    std::string Error;
+    StringRef TargetName(Target.getName());
+    if (TargetName == "x86-64")
+      TargetName = "x86_64";
+    if (llvm::TargetRegistry::lookupTarget(TargetName, Error) == &Target) {
+      return TargetName;
+    }
+  }
+  return "";
+}
+}
+
+TEST(addTargetAndModeForProgramName, AddsTargetAndMode) {
+  std::string Target = getAnyTarget();
+  ASSERT_FALSE(Target.empty());
+
+  std::vector<std::string> Args = {"clang", "-foo"};
+  addTargetAndModeForProgramName(Args, "");
+  EXPECT_EQ((std::vector<std::string>{"clang", "-foo"}), Args);
+  addTargetAndModeForProgramName(Args, Target + "-g++");
+  EXPECT_EQ((std::vector<std::string>{"clang", "-target", Target,
+                                      "--driver-mode=g++", "-foo"}),
+            Args);
+}
+
+TEST(addTargetAndModeForProgramName, PathIgnored) {
+  std::string Target = getAnyTarget();
+  ASSERT_FALSE(Target.empty());
+
+  SmallString<32> ToolPath;
+  llvm::sys::path::append(ToolPath, "foo", "bar", Target + "-g++");
+
+  std::vector<std::string> Args = {"clang", "-foo"};
+  addTargetAndModeForProgramName(Args, ToolPath);
+  EXPECT_EQ((std::vector<std::string>{"clang", "-target", Target,
+                                      "--driver-mode=g++", "-foo"}),
+            Args);
+}
+
+TEST(addTargetAndModeForProgramName, IgnoresExistingTarget) {
+  std::string Target = getAnyTarget();
+  ASSERT_FALSE(Target.empty());
+
+  std::vector<std::string> Args = {"clang", "-foo", "-target", "something"};
+  addTargetAndModeForProgramName(Args, Target + "-g++");
+  EXPECT_EQ((std::vector<std::string>{"clang", "--driver-mode=g++", "-foo",
+                                      "-target", "something"}),
+            Args);
+
+  std::vector<std::string> ArgsAlt = {"clang", "-foo", "-target=something"};
+  addTargetAndModeForProgramName(ArgsAlt, Target + "-g++");
+  EXPECT_EQ((std::vector<std::string>{"clang", "--driver-mode=g++", "-foo",
+                                      "-target=something"}),
+            ArgsAlt);
+}
+
+TEST(addTargetAndModeForProgramName, IgnoresExistingMode) {
+  std::string Target = getAnyTarget();
+  ASSERT_FALSE(Target.empty());
+
+  std::vector<std::string> Args = {"clang", "-foo", "--driver-mode=abc"};
+  addTargetAndModeForProgramName(Args, Target + "-g++");
+  EXPECT_EQ((std::vector<std::string>{"clang", "-target", Target, "-foo",
+                                      "--driver-mode=abc"}),
+            Args);
+
+  std::vector<std::string> ArgsAlt = {"clang", "-foo", "--driver-mode", "abc"};
+  addTargetAndModeForProgramName(ArgsAlt, Target + "-g++");
+  EXPECT_EQ((std::vector<std::string>{"clang", "-target", Target, "-foo",
+                                      "--driver-mode", "abc"}),
+            ArgsAlt);
+}
+
 #ifndef LLVM_ON_WIN32
 TEST(ClangToolTest, BuildASTs) {
   FixedCompilationDatabase Compilations("/", std::vector<std::string>());
diff --git a/unittests/libclang/LibclangTest.cpp b/unittests/libclang/LibclangTest.cpp
index becebf076d61..e190dec89a87 100644
--- a/unittests/libclang/LibclangTest.cpp
+++ b/unittests/libclang/LibclangTest.cpp
@@ -379,8 +379,10 @@ public:
       Filename = Path.str();
       Files.insert(Filename);
     }
+    llvm::sys::fs::create_directories(llvm::sys::path::parent_path(Filename));
     std::ofstream OS(Filename);
     OS << Contents;
+    assert(OS.good());
   }
   void DisplayDiagnostics() {
     unsigned NumDiagnostics = clang_getNumDiagnostics(ClangTU);
@@ -465,3 +467,30 @@ TEST_F(LibclangReparseTest, ReparseWithModule) {
   ASSERT_TRUE(ReparseTU(0, nullptr /* No unsaved files. */));
   EXPECT_EQ(0U, clang_getNumDiagnostics(ClangTU));
 }
+
+TEST_F(LibclangReparseTest, clang_parseTranslationUnit2FullArgv) {
+  // Provide a fake GCC 99.9.9 standard library that always overrides any local
+  // GCC installation.
+  std::string EmptyFiles[] = {"lib/gcc/arm-linux-gnueabi/99.9.9/crtbegin.o",
+                              "include/arm-linux-gnueabi/.keep",
+                              "include/c++/99.9.9/vector"};
+
+  for (auto &Name : EmptyFiles)
+    WriteFile(Name, "\n");
+
+  std::string Filename = "test.cc";
+  WriteFile(Filename, "#include <vector>\n");
+
+  std::string Clang = "bin/clang";
+  WriteFile(Clang, "");
+
+  const char *Argv[] = {Clang.c_str(), "-target", "arm-linux-gnueabi",
+                        "--gcc-toolchain="};
+
+  EXPECT_EQ(CXError_Success,
+            clang_parseTranslationUnit2FullArgv(Index, Filename.c_str(), Argv,
+                                                sizeof(Argv) / sizeof(Argv[0]),
+                                                nullptr, 0, TUFlags, &ClangTU));
+  EXPECT_EQ(0U, clang_getNumDiagnostics(ClangTU));
+  DisplayDiagnostics();
+}
diff --git a/utils/TableGen/ClangAttrEmitter.cpp b/utils/TableGen/ClangAttrEmitter.cpp
index f2aa4002b99f..f70bff2c3b74 100644
--- a/utils/TableGen/ClangAttrEmitter.cpp
+++ b/utils/TableGen/ClangAttrEmitter.cpp
@@ -54,7 +54,7 @@ public:
   const std::string &nameSpace() const { return NS; }
   bool knownToGCC() const { return K; }
 };
-} // namespace
+} // end anonymous namespace
 
 static std::vector<FlattenedSpelling>
 GetFlattenedSpellings(const Record &Attr) {
@@ -166,17 +166,18 @@ namespace {
     std::string lowerName, upperName;
     StringRef attrName;
     bool isOpt;
+    bool Fake;
 
   public:
     Argument(const Record &Arg, StringRef Attr)
       : lowerName(Arg.getValueAsString("Name")), upperName(lowerName),
-        attrName(Attr), isOpt(false) {
+        attrName(Attr), isOpt(false), Fake(false) {
       if (!lowerName.empty()) {
         lowerName[0] = std::tolower(lowerName[0]);
         upperName[0] = std::toupper(upperName[0]);
       }
     }
-    virtual ~Argument() {}
+    virtual ~Argument() = default;
 
     StringRef getLowerName() const { return lowerName; }
     StringRef getUpperName() const { return upperName; }
@@ -185,6 +186,9 @@ namespace {
     bool isOptional() const { return isOpt; }
     void setOptional(bool set) { isOpt = set; }
 
+    bool isFake() const { return Fake; }
+    void setFake(bool fake) { Fake = fake; }
+
     // These functions print the argument contents formatted in different ways.
     virtual void writeAccessors(raw_ostream &OS) const = 0;
     virtual void writeAccessorDefinitions(raw_ostream &OS) const {}
@@ -275,6 +279,8 @@ namespace {
         OS << "    OS << \" \";\n";
         OS << "    dumpBareDeclRef(SA->get" << getUpperName() << "());\n"; 
       } else if (type == "IdentifierInfo *") {
+        if (isOptional())
+          OS << "    if (SA->get" << getUpperName() << "())\n  ";
         OS << "    OS << \" \" << SA->get" << getUpperName()
            << "()->getName();\n";
       } else if (type == "TypeSourceInfo *") {
@@ -348,7 +354,7 @@ namespace {
          << "Length])";
     }
     void writeCtorDefaultInitializers(raw_ostream &OS) const override {
-      OS << getLowerName() << "Length(0)," << getLowerName() << "(0)";
+      OS << getLowerName() << "Length(0)," << getLowerName() << "(nullptr)";
     }
     void writeCtorParameters(raw_ostream &OS) const override {
       OS << "llvm::StringRef " << getUpperName();
@@ -491,7 +497,7 @@ namespace {
       // The aligned attribute argument expression is optional.
       OS << "    if (is" << getLowerName() << "Expr && "
          << getLowerName() << "Expr)\n";
-      OS << "      " << getLowerName() << "Expr->printPretty(OS, 0, Policy);\n";
+      OS << "      " << getLowerName() << "Expr->printPretty(OS, nullptr, Policy);\n";
       OS << "    OS << \"";
     }
     void writeDump(raw_ostream &OS) const override {
@@ -613,7 +619,7 @@ namespace {
     std::vector<std::string> uniques;
     std::set<std::string> unique_set(enums.begin(), enums.end());
     for (const auto &i : enums) {
-      std::set<std::string>::iterator set_i = unique_set.find(i);
+      auto set_i = unique_set.find(i);
       if (set_i != unique_set.end()) {
         uniques.push_back(i);
         unique_set.erase(set_i);
@@ -659,8 +665,7 @@ namespace {
       OS << type << " " << getUpperName();
     }
     void writeDeclarations(raw_ostream &OS) const override {
-      std::vector<std::string>::const_iterator i = uniques.begin(),
-                                               e = uniques.end();
+      auto i = uniques.cbegin(), e = uniques.cend();
       // The last one needs to not have a comma.
       --e;
 
@@ -765,8 +770,7 @@ namespace {
     bool isVariadicEnumArg() const override { return true; }
     
     void writeDeclarations(raw_ostream &OS) const override {
-      std::vector<std::string>::const_iterator i = uniques.begin(),
-                                               e = uniques.end();
+      auto i = uniques.cbegin(), e = uniques.cend();
       // The last one needs to not have a comma.
       --e;
 
@@ -952,7 +956,7 @@ namespace {
     }
 
     void writeTemplateInstantiation(raw_ostream &OS) const override {
-      OS << "      " << getType() << " *tempInst" << getUpperName()
+      OS << "      auto *tempInst" << getUpperName()
          << " = new (C, 16) " << getType()
          << "[A->" << getLowerName() << "_size()];\n";
       OS << "      {\n";
@@ -1018,7 +1022,7 @@ namespace {
           getType(), "SA->get" + std::string(getUpperName()) + "Loc()");
     }
   };
-}
+} // end anonymous namespace
 
 static std::unique_ptr<Argument>
 createArgument(const Record &Arg, StringRef Attr,
@@ -1078,6 +1082,9 @@ createArgument(const Record &Arg, StringRef Attr,
   if (Ptr && Arg.getValueAsBit("Optional"))
     Ptr->setOptional(true);
 
+  if (Ptr && Arg.getValueAsBit("Fake"))
+    Ptr->setFake(true);
+
   return Ptr;
 }
 
@@ -1180,28 +1187,39 @@ writePrettyPrintFunction(Record &R,
     if (Variety == "Pragma") {
       OS << " \";\n";
       OS << "    printPrettyPragma(OS, Policy);\n";
+      OS << "    OS << \"\\n\";";
       OS << "    break;\n";
       OS << "  }\n";
       continue;
     }
 
+    // Fake arguments aren't part of the parsed form and should not be
+    // pretty-printed.
+    bool hasNonFakeArgs = false;
+    for (const auto &arg : Args) {
+      if (arg->isFake()) continue;
+      hasNonFakeArgs = true;
+    }
+
     // FIXME: always printing the parenthesis isn't the correct behavior for
     // attributes which have optional arguments that were not provided. For
     // instance: __attribute__((aligned)) will be pretty printed as
     // __attribute__((aligned())). The logic should check whether there is only
     // a single argument, and if it is optional, whether it has been provided.
-    if (!Args.empty())
+    if (hasNonFakeArgs)
       OS << "(";
     if (Spelling == "availability") {
       writeAvailabilityValue(OS);
     } else {
-      for (auto I = Args.begin(), E = Args.end(); I != E; ++ I) {
-        if (I != Args.begin()) OS << ", ";
-        (*I)->writeValue(OS);
+      unsigned index = 0;
+      for (const auto &arg : Args) {
+        if (arg->isFake()) continue;
+        if (index++) OS << ", ";
+        arg->writeValue(OS);
       }
     }
 
-    if (!Args.empty())
+    if (hasNonFakeArgs)
       OS << ")";
     OS << Suffix + "\";\n";
 
@@ -1473,10 +1491,19 @@ void EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
     std::vector<std::unique_ptr<Argument>> Args;
     Args.reserve(ArgRecords.size());
 
+    bool HasOptArg = false;
+    bool HasFakeArg = false;
     for (const auto *ArgRecord : ArgRecords) {
       Args.emplace_back(createArgument(*ArgRecord, R.getName()));
       Args.back()->writeDeclarations(OS);
       OS << "\n\n";
+
+      // For these purposes, fake takes priority over optional.
+      if (Args.back()->isFake()) {
+        HasFakeArg = true;
+      } else if (Args.back()->isOptional()) {
+        HasOptArg = true;
+      }
     }
 
     OS << "\npublic:\n";
@@ -1495,69 +1522,53 @@ void EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
     if (!ElideSpelling)
       OS << CreateSemanticSpellings(Spellings, SemanticToSyntacticMap);
 
-    OS << "  static " << R.getName() << "Attr *CreateImplicit(";
-    OS << "ASTContext &Ctx";
-    if (!ElideSpelling)
-      OS << ", Spelling S";
-    for (auto const &ai : Args) {
-      OS << ", ";
-      ai->writeCtorParameters(OS);
-    }
-    OS << ", SourceRange Loc = SourceRange()";
-    OS << ") {\n";
-    OS << "    " << R.getName() << "Attr *A = new (Ctx) " << R.getName();
-    OS << "Attr(Loc, Ctx, ";
-    for (auto const &ai : Args) {
-      ai->writeImplicitCtorArgs(OS);
-      OS << ", ";
-    }
-    OS << (ElideSpelling ? "0" : "S") << ");\n";
-    OS << "    A->setImplicit(true);\n";
-    OS << "    return A;\n  }\n\n";
+    // Emit CreateImplicit factory methods.
+    auto emitCreateImplicit = [&](bool emitFake) {
+      OS << "  static " << R.getName() << "Attr *CreateImplicit(";
+      OS << "ASTContext &Ctx";
+      if (!ElideSpelling)
+        OS << ", Spelling S";
+      for (auto const &ai : Args) {
+        if (ai->isFake() && !emitFake) continue;
+        OS << ", ";
+        ai->writeCtorParameters(OS);
+      }
+      OS << ", SourceRange Loc = SourceRange()";
+      OS << ") {\n";
+      OS << "    auto *A = new (Ctx) " << R.getName();
+      OS << "Attr(Loc, Ctx, ";
+      for (auto const &ai : Args) {
+        if (ai->isFake() && !emitFake) continue;
+        ai->writeImplicitCtorArgs(OS);
+        OS << ", ";
+      }
+      OS << (ElideSpelling ? "0" : "S") << ");\n";
+      OS << "    A->setImplicit(true);\n";
+      OS << "    return A;\n  }\n\n";
+    };
 
-    OS << "  " << R.getName() << "Attr(SourceRange R, ASTContext &Ctx\n";
-    
-    bool HasOpt = false;
-    for (auto const &ai : Args) {
-      OS << "              , ";
-      ai->writeCtorParameters(OS);
-      OS << "\n";
-      if (ai->isOptional())
-        HasOpt = true;
+    // Emit a CreateImplicit that takes all the arguments.
+    emitCreateImplicit(true);
+
+    // Emit a CreateImplicit that takes all the non-fake arguments.
+    if (HasFakeArg) {
+      emitCreateImplicit(false);
     }
 
-    OS << "              , ";
-    OS << "unsigned SI\n";
+    // Emit constructors.
+    auto emitCtor = [&](bool emitOpt, bool emitFake) {
+      auto shouldEmitArg = [=](const std::unique_ptr<Argument> &arg) {
+        if (arg->isFake()) return emitFake;
+        if (arg->isOptional()) return emitOpt;
+        return true;
+      };
 
-    OS << "             )\n";
-    OS << "    : " << SuperName << "(attr::" << R.getName() << ", R, SI, "
-       << R.getValueAsBit("LateParsed") << ", "
-       << R.getValueAsBit("DuplicatesAllowedWhileMerging") << ")\n";
-
-    for (auto const &ai : Args) {
-      OS << "              , ";
-      ai->writeCtorInitializers(OS);
-      OS << "\n";
-    }
-
-    OS << "  {\n";
-  
-    for (auto const &ai : Args) {
-      ai->writeCtorBody(OS);
-      OS << "\n";
-    }
-    OS << "  }\n\n";
-
-    // If there are optional arguments, write out a constructor that elides the
-    // optional arguments as well.
-    if (HasOpt) {
       OS << "  " << R.getName() << "Attr(SourceRange R, ASTContext &Ctx\n";
       for (auto const &ai : Args) {
-        if (!ai->isOptional()) {
-          OS << "              , ";
-          ai->writeCtorParameters(OS);
-          OS << "\n";
-        }
+        if (!shouldEmitArg(ai)) continue;
+        OS << "              , ";
+        ai->writeCtorParameters(OS);
+        OS << "\n";
       }
 
       OS << "              , ";
@@ -1570,19 +1581,37 @@ void EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
 
       for (auto const &ai : Args) {
         OS << "              , ";
-        ai->writeCtorDefaultInitializers(OS);
+        if (!shouldEmitArg(ai)) {
+          ai->writeCtorDefaultInitializers(OS);
+        } else {
+          ai->writeCtorInitializers(OS);
+        }
         OS << "\n";
       }
 
       OS << "  {\n";
   
       for (auto const &ai : Args) {
-        if (!ai->isOptional()) {
-          ai->writeCtorBody(OS);
-          OS << "\n";
-        }
+        if (!shouldEmitArg(ai)) continue;
+        ai->writeCtorBody(OS);
+        OS << "\n";
       }
       OS << "  }\n\n";
+
+    };
+
+    // Emit a constructor that includes all the arguments.
+    // This is necessary for cloning.
+    emitCtor(true, true);
+
+    // Emit a constructor that takes all the non-fake arguments.
+    if (HasFakeArg) {
+      emitCtor(true, false);
+    }
+ 
+    // Emit a constructor that takes all the non-fake, non-optional arguments.
+    if (HasOptArg) {
+      emitCtor(false, false);
     }
 
     OS << "  " << R.getName() << "Attr *clone(ASTContext &C) const;\n";
@@ -1604,6 +1633,9 @@ void EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
       ai->writeAccessors(OS);
       OS << "\n\n";
 
+      // Don't write conversion routines for fake arguments.
+      if (ai->isFake()) continue;
+
       if (ai->isEnumArg())
         static_cast<const EnumArgument *>(ai.get())->writeConversion(OS);
       else if (ai->isVariadicEnumArg())
@@ -1620,7 +1652,7 @@ void EmitClangAttrClass(RecordKeeper &Records, raw_ostream &OS) {
     OS << "};\n\n";
   }
 
-  OS << "#endif\n";
+  OS << "#endif // LLVM_CLANG_ATTR_CLASSES_INC\n";
 }
 
 // Emits the class method definitions for attributes.
@@ -1695,7 +1727,7 @@ void EmitClangAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
 
 static void EmitAttrList(raw_ostream &OS, StringRef Class,
                          const std::vector<Record*> &AttrList) {
-  std::vector<Record*>::const_iterator i = AttrList.begin(), e = AttrList.end();
+  auto i = AttrList.cbegin(), e = AttrList.cend();
 
   if (i != e) {
     // Move the end iterator back to emit the last attribute.
@@ -1847,7 +1879,7 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
     OS << "  case attr::" << R.getName() << ": {\n";
     Args = R.getValueAsListOfDefs("Args");
     if (R.isSubClassOf(InhClass) || !Args.empty())
-      OS << "    const " << R.getName() << "Attr *SA = cast<" << R.getName()
+      OS << "    const auto *SA = cast<" << R.getName()
          << "Attr>(A);\n";
     if (R.isSubClassOf(InhClass))
       OS << "    Record.push_back(SA->isInherited());\n";
@@ -1862,6 +1894,65 @@ void EmitClangAttrPCHWrite(RecordKeeper &Records, raw_ostream &OS) {
   OS << "  }\n";
 }
 
+// Generate a conditional expression to check if the current target satisfies
+// the conditions for a TargetSpecificAttr record, and append the code for
+// those checks to the Test string. If the FnName string pointer is non-null,
+// append a unique suffix to distinguish this set of target checks from other
+// TargetSpecificAttr records.
+static void GenerateTargetSpecificAttrChecks(const Record *R,
+                                             std::vector<std::string> &Arches,
+                                             std::string &Test,
+                                             std::string *FnName) {
+  // It is assumed that there will be an llvm::Triple object
+  // named "T" and a TargetInfo object named "Target" within
+  // scope that can be used to determine whether the attribute exists in
+  // a given target.
+  Test += "(";
+
+  for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
+    std::string Part = *I;
+    Test += "T.getArch() == llvm::Triple::" + Part;
+    if (I + 1 != E)
+      Test += " || ";
+    if (FnName)
+      *FnName += Part;
+  }
+  Test += ")";
+
+  // If the attribute is specific to particular OSes, check those.
+  if (!R->isValueUnset("OSes")) {
+    // We know that there was at least one arch test, so we need to and in the
+    // OS tests.
+    Test += " && (";
+    std::vector<std::string> OSes = R->getValueAsListOfStrings("OSes");
+    for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) {
+      std::string Part = *I;
+
+      Test += "T.getOS() == llvm::Triple::" + Part;
+      if (I + 1 != E)
+        Test += " || ";
+      if (FnName)
+        *FnName += Part;
+    }
+    Test += ")";
+  }
+
+  // If one or more CXX ABIs are specified, check those as well.
+  if (!R->isValueUnset("CXXABIs")) {
+    Test += " && (";
+    std::vector<std::string> CXXABIs = R->getValueAsListOfStrings("CXXABIs");
+    for (auto I = CXXABIs.begin(), E = CXXABIs.end(); I != E; ++I) {
+      std::string Part = *I;
+      Test += "Target.getCXXABI().getKind() == TargetCXXABI::" + Part;
+      if (I + 1 != E)
+        Test += " || ";
+      if (FnName)
+        *FnName += Part;
+    }
+    Test += ")";
+  }
+}
+
 static void GenerateHasAttrSpellingStringSwitch(
     const std::vector<Record *> &Attrs, raw_ostream &OS,
     const std::string &Variety = "", const std::string &Scope = "") {
@@ -1887,37 +1978,12 @@ static void GenerateHasAttrSpellingStringSwitch(
       }
     }
 
-    // It is assumed that there will be an llvm::Triple object named T within
-    // scope that can be used to determine whether the attribute exists in
-    // a given target.
     std::string Test;
     if (Attr->isSubClassOf("TargetSpecificAttr")) {
       const Record *R = Attr->getValueAsDef("Target");
       std::vector<std::string> Arches = R->getValueAsListOfStrings("Arches");
+      GenerateTargetSpecificAttrChecks(R, Arches, Test, nullptr);
 
-      Test += "(";
-      for (auto AI = Arches.begin(), AE = Arches.end(); AI != AE; ++AI) {
-        std::string Part = *AI;
-        Test += "T.getArch() == llvm::Triple::" + Part;
-        if (AI + 1 != AE)
-          Test += " || ";
-      }
-      Test += ")";
-
-      std::vector<std::string> OSes;
-      if (!R->isValueUnset("OSes")) {
-        Test += " && (";
-        std::vector<std::string> OSes = R->getValueAsListOfStrings("OSes");
-        for (auto AI = OSes.begin(), AE = OSes.end(); AI != AE; ++AI) {
-          std::string Part = *AI;
-
-          Test += "T.getOS() == llvm::Triple::" + Part;
-          if (AI + 1 != AE)
-            Test += " || ";
-        }
-        Test += ")";
-      }
-      
       // If this is the C++11 variety, also add in the LangOpts test.
       if (Variety == "CXX11")
         Test += " && LangOpts.CPlusPlus11";
@@ -1964,6 +2030,7 @@ void EmitClangAttrHasAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
     }
   }
 
+  OS << "const llvm::Triple &T = Target.getTriple();\n";
   OS << "switch (Syntax) {\n";
   OS << "case AttrSyntax::GNU:\n";
   OS << "  return llvm::StringSwitch<int>(Name)\n";
@@ -1976,9 +2043,7 @@ void EmitClangAttrHasAttrImpl(RecordKeeper &Records, raw_ostream &OS) {
   GenerateHasAttrSpellingStringSwitch(Pragma, OS, "Pragma");
   OS << "case AttrSyntax::CXX: {\n";
   // C++11-style attributes are further split out based on the Scope.
-  for (std::map<std::string, std::vector<Record *>>::iterator I = CXX.begin(),
-                                                              E = CXX.end();
-       I != E; ++I) {
+  for (auto I = CXX.cbegin(), E = CXX.cend(); I != E; ++I) {
     if (I != CXX.begin())
       OS << " else ";
     if (I->first.empty())
@@ -2049,7 +2114,7 @@ void EmitClangAttrASTVisitor(RecordKeeper &Records, raw_ostream &OS) {
     OS << "  bool Visit"
        << R.getName() << "Attr(" << R.getName() << "Attr *A) {\n"
        << "    return true; \n"
-       << "  };\n";
+       << "  }\n";
   }
   OS << "\n#else // ATTR_VISITOR_DECLS_ONLY\n\n";
 
@@ -2123,12 +2188,12 @@ void EmitClangAttrTemplateInstantiate(RecordKeeper &Records, raw_ostream &OS) {
     bool ShouldClone = R.getValueAsBit("Clone");
 
     if (!ShouldClone) {
-      OS << "      return NULL;\n";
+      OS << "      return nullptr;\n";
       OS << "    }\n";
       continue;
     }
 
-    OS << "      const " << R.getName() << "Attr *A = cast<"
+    OS << "      const auto *A = cast<"
        << R.getName() << "Attr>(At);\n";
     bool TDependent = R.getValueAsBit("TemplateDependent");
 
@@ -2157,7 +2222,7 @@ void EmitClangAttrTemplateInstantiate(RecordKeeper &Records, raw_ostream &OS) {
   }
   OS << "  } // end switch\n"
      << "  llvm_unreachable(\"Unknown attribute!\");\n"
-     << "  return 0;\n"
+     << "  return nullptr;\n"
      << "}\n\n"
      << "} // end namespace sema\n"
      << "} // end namespace clang\n";
@@ -2328,6 +2393,7 @@ static std::string GetSubjectWithSuffix(const Record *R) {
     return "Decl";
   return B + "Decl";
 }
+
 static std::string GenerateCustomAppertainsTo(const Record &Subject,
                                               raw_ostream &OS) {
   std::string FnName = "is" + Subject.getName();
@@ -2335,7 +2401,7 @@ static std::string GenerateCustomAppertainsTo(const Record &Subject,
   // If this code has already been generated, simply return the previous
   // instance of it.
   static std::set<std::string> CustomSubjectSet;
-  std::set<std::string>::iterator I = CustomSubjectSet.find(FnName);
+  auto I = CustomSubjectSet.find(FnName);
   if (I != CustomSubjectSet.end())
     return *I;
 
@@ -2349,7 +2415,7 @@ static std::string GenerateCustomAppertainsTo(const Record &Subject,
   }
 
   OS << "static bool " << FnName << "(const Decl *D) {\n";
-  OS << "  if (const " << GetSubjectWithSuffix(Base) << " *S = dyn_cast<";
+  OS << "  if (const auto *S = dyn_cast<";
   OS << GetSubjectWithSuffix(Base);
   OS << ">(D))\n";
   OS << "    return " << Subject.getValueAsString("CheckCode") << ";\n";
@@ -2449,7 +2515,7 @@ static std::string GenerateLangOptRequirements(const Record &R,
   // If this code has already been generated, simply return the previous
   // instance of it.
   static std::set<std::string> CustomLangOptsSet;
-  std::set<std::string>::iterator I = CustomLangOptsSet.find(FnName);
+  auto I = CustomLangOptsSet.find(FnName);
   if (I != CustomLangOptsSet.end())
     return *I;
 
@@ -2466,7 +2532,7 @@ static std::string GenerateLangOptRequirements(const Record &R,
 }
 
 static void GenerateDefaultTargetRequirements(raw_ostream &OS) {
-  OS << "static bool defaultTargetRequirements(const llvm::Triple &) {\n";
+  OS << "static bool defaultTargetRequirements(const TargetInfo &) {\n";
   OS << "  return true;\n";
   OS << "}\n\n";
 }
@@ -2505,47 +2571,19 @@ static std::string GenerateTargetRequirements(const Record &Attr,
     }
   }
 
-  std::string FnName = "isTarget", Test = "(";
-  for (auto I = Arches.begin(), E = Arches.end(); I != E; ++I) {
-    std::string Part = *I;
-    Test += "Arch == llvm::Triple::" + Part;
-    if (I + 1 != E)
-      Test += " || ";
-    FnName += Part;
-  }
-  Test += ")";
-
-  // If the target also requires OS testing, generate those tests as well.
-  bool UsesOS = false;
-  if (!R->isValueUnset("OSes")) {
-    UsesOS = true;
-    
-    // We know that there was at least one arch test, so we need to and in the
-    // OS tests.
-    Test += " && (";
-    std::vector<std::string> OSes = R->getValueAsListOfStrings("OSes");
-    for (auto I = OSes.begin(), E = OSes.end(); I != E; ++I) {
-      std::string Part = *I;
-
-      Test += "OS == llvm::Triple::" + Part;
-      if (I + 1 != E)
-        Test += " || ";
-      FnName += Part;
-    }
-    Test += ")";
-  }
+  std::string FnName = "isTarget";
+  std::string Test;
+  GenerateTargetSpecificAttrChecks(R, Arches, Test, &FnName);
 
   // If this code has already been generated, simply return the previous
   // instance of it.
   static std::set<std::string> CustomTargetSet;
-  std::set<std::string>::iterator I = CustomTargetSet.find(FnName);
+  auto I = CustomTargetSet.find(FnName);
   if (I != CustomTargetSet.end())
     return *I;
 
-  OS << "static bool " << FnName << "(const llvm::Triple &T) {\n";
-  OS << "  llvm::Triple::ArchType Arch = T.getArch();\n";
-  if (UsesOS)
-    OS << "  llvm::Triple::OSType OS = T.getOS();\n";
+  OS << "static bool " << FnName << "(const TargetInfo &Target) {\n";
+  OS << "  const llvm::Triple &T = Target.getTriple();\n";
   OS << "  return " << Test << ";\n";
   OS << "}\n\n";
 
@@ -2758,13 +2796,13 @@ void EmitClangAttrDump(RecordKeeper &Records, raw_ostream &OS) {
 
     Args = R.getValueAsListOfDefs("Args");
     if (!Args.empty()) {
-      OS << "    const " << R.getName() << "Attr *SA = cast<" << R.getName()
+      OS << "    const auto *SA = cast<" << R.getName()
          << "Attr>(A);\n";
       for (const auto *Arg : Args)
         createArgument(*Arg, R.getName())->writeDump(OS);
 
-      for (auto AI = Args.begin(), AE = Args.end(); AI != AE; ++AI)
-        createArgument(**AI, R.getName())->writeDumpChildren(OS);
+      for (const auto *AI : Args)
+        createArgument(*AI, R.getName())->writeDumpChildren(OS);
     }
     OS <<
       "    break;\n"
diff --git a/utils/TableGen/ClangCommentCommandInfoEmitter.cpp b/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
index 3349030466fc..3522cd472d9d 100644
--- a/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
+++ b/utils/TableGen/ClangCommentCommandInfoEmitter.cpp
@@ -72,7 +72,7 @@ void EmitClangCommentCommandInfo(RecordKeeper &Records, raw_ostream &OS) {
   OS << "const CommandInfo *CommandTraits::getBuiltinCommandInfo(\n"
      << "                                         StringRef Name) {\n";
   StringMatcher("Name", Matches, OS).Emit();
-  OS << "  return NULL;\n"
+  OS << "  return nullptr;\n"
      << "}\n\n";
 }
 
@@ -123,4 +123,3 @@ void EmitClangCommentCommandList(RecordKeeper &Records, raw_ostream &OS) {
   }
 }
 } // end namespace clang
-
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 7644ae2c04da..6e7bc9057fd7 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -36,6 +36,7 @@
 #include "llvm/TableGen/SetTheory.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
+#include <deque>
 #include <map>
 #include <sstream>
 #include <string>
@@ -393,7 +394,7 @@ public:
 
   /// Return true if the prototype has a scalar argument.
   /// This does not return true for the "splat" code ('a').
-  bool protoHasScalar();
+  bool protoHasScalar() const;
 
   /// Return the index that parameter PIndex will sit at
   /// in a generated function call. This is often just PIndex,
@@ -431,9 +432,9 @@ public:
   /// Return the name, mangled with type information.
   /// If ForceClassS is true, use ClassS (u32/s32) instead
   /// of the intrinsic's own type class.
-  std::string getMangledName(bool ForceClassS = false);
+  std::string getMangledName(bool ForceClassS = false) const;
   /// Return the type code for a builtin function call.
-  std::string getInstTypeCode(Type T, ClassKind CK);
+  std::string getInstTypeCode(Type T, ClassKind CK) const;
   /// Return the type string for a BUILTIN() macro in Builtins.def.
   std::string getBuiltinTypeStr();
 
@@ -444,7 +445,7 @@ public:
   void indexBody();
 
 private:
-  std::string mangleName(std::string Name, ClassKind CK);
+  std::string mangleName(std::string Name, ClassKind CK) const;
 
   void initVariables();
   std::string replaceParamsIn(std::string S);
@@ -494,7 +495,7 @@ private:
 class NeonEmitter {
   RecordKeeper &Records;
   DenseMap<Record *, ClassKind> ClassMap;
-  std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
+  std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
   unsigned UniqueNumber;
 
   void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
@@ -507,7 +508,7 @@ class NeonEmitter {
 public:
   /// Called by Intrinsic - this attempts to get an intrinsic that takes
   /// the given types as arguments.
-  Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);
+  Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types);
 
   /// Called by Intrinsic - returns a globally-unique number.
   unsigned getUniqueNumber() { return UniqueNumber++; }
@@ -836,10 +837,6 @@ void Type::applyModifier(char Mod) {
     Float = true;
     break;
   case 'f':
-    // Special case - if we're half-precision, a floating
-    // point argument needs to be 128-bits (double size).
-    if (isHalf())
-      Bitwidth = 128;
     Float = true;
     ElementBitwidth = 32;
     break;
@@ -954,7 +951,7 @@ void Type::applyModifier(char Mod) {
 // Intrinsic implementation
 //===----------------------------------------------------------------------===//
 
-std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
+std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
   char typeCode = '\0';
   bool printNumber = true;
 
@@ -992,6 +989,10 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
   return S;
 }
 
+static bool isFloatingPointProtoModifier(char Mod) {
+  return Mod == 'F' || Mod == 'f';
+}
+
 std::string Intrinsic::getBuiltinTypeStr() {
   ClassKind LocalCK = getClassKind(true);
   std::string S;
@@ -1013,7 +1014,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (!RetT.isScalar() && !RetT.isSigned())
       RetT.makeSigned();
 
-    bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
+    bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]);
     if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
       // Cast to vector of 8-bit elements.
       RetT.makeInteger(8, true);
@@ -1026,7 +1027,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
     if (T.isPoly())
       T.makeInteger(T.getElementSizeInBits(), false);
 
-    bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
+    bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]);
     if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
       T.makeInteger(8, true);
     // Halves always get converted to 8-bit elements.
@@ -1049,7 +1050,7 @@ std::string Intrinsic::getBuiltinTypeStr() {
   return S;
 }
 
-std::string Intrinsic::getMangledName(bool ForceClassS) {
+std::string Intrinsic::getMangledName(bool ForceClassS) const {
   // Check if the prototype has a scalar operand with the type of the vector
   // elements.  If not, bitcasting the args will take care of arg checking.
   // The actual signedness etc. will be taken care of with special enums.
@@ -1060,12 +1061,12 @@ std::string Intrinsic::getMangledName(bool ForceClassS) {
   return mangleName(Name, ForceClassS ? ClassS : LocalCK);
 }
 
-std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
+std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
   std::string typeCode = getInstTypeCode(BaseType, LocalCK);
   std::string S = Name;
 
-  if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
-      Name == "vcvt_f64_f32")
+  if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
+      Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32")
     return Name;
 
   if (typeCode.size() > 0) {
@@ -1278,7 +1279,7 @@ void Intrinsic::emitShadowedArgs() {
 
 // We don't check 'a' in this function, because for builtin function the
 // argument matching to 'a' uses a vector type splatted from a scalar type.
-bool Intrinsic::protoHasScalar() {
+bool Intrinsic::protoHasScalar() const {
   return (Proto.find('s') != std::string::npos ||
           Proto.find('z') != std::string::npos ||
           Proto.find('r') != std::string::npos ||
@@ -1363,7 +1364,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
   // Extra constant integer to hold type class enum for this function, e.g. s8
   if (getClassKind(true) == ClassB) {
     Type ThisTy = getReturnType();
-    if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
+    if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0]))
       ThisTy = getParamType(0);
     if (ThisTy.isPointer())
       ThisTy = getParamType(1);
@@ -1491,15 +1492,14 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
     N = SI->getAsUnquotedString();
   else
     N = emitDagArg(DI->getArg(0), "").second;
-  Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
-  assert(Callee && "getIntrinsic should not return us nullptr!");
+  Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types);
 
   // Make sure the callee is known as an early def.
-  Callee->setNeededEarly();
-  Intr.Dependencies.insert(Callee);
+  Callee.setNeededEarly();
+  Intr.Dependencies.insert(&Callee);
 
   // Now create the call itself.
-  std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
+  std::string S = CallPrefix.str() + Callee.getMangledName(true) + "(";
   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
     if (I != 0)
       S += ", ";
@@ -1507,7 +1507,7 @@ std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
   }
   S += ")";
 
-  return std::make_pair(Callee->getReturnType(), S);
+  return std::make_pair(Callee.getReturnType(), S);
 }
 
 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
@@ -1855,11 +1855,11 @@ void Intrinsic::indexBody() {
 // NeonEmitter implementation
 //===----------------------------------------------------------------------===//
 
-Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
+Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
   // First, look up the name in the intrinsic map.
   assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
                   ("Intrinsic '" + Name + "' not found!").str());
-  std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
+  auto &V = IntrinsicMap.find(Name.str())->second;
   std::vector<Intrinsic *> GoodVec;
 
   // Create a string to print if we end up failing.
@@ -1874,35 +1874,35 @@ Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
 
   // Now, look through each intrinsic implementation and see if the types are
   // compatible.
-  for (auto *I : V) {
-    ErrMsg += "  - " + I->getReturnType().str() + " " + I->getMangledName();
+  for (auto &I : V) {
+    ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
     ErrMsg += "(";
-    for (unsigned A = 0; A < I->getNumParams(); ++A) {
+    for (unsigned A = 0; A < I.getNumParams(); ++A) {
       if (A != 0)
         ErrMsg += ", ";
-      ErrMsg += I->getParamType(A).str();
+      ErrMsg += I.getParamType(A).str();
     }
     ErrMsg += ")\n";
 
-    if (I->getNumParams() != Types.size())
+    if (I.getNumParams() != Types.size())
       continue;
 
     bool Good = true;
     for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
-      if (I->getParamType(Arg) != Types[Arg]) {
+      if (I.getParamType(Arg) != Types[Arg]) {
         Good = false;
         break;
       }
     }
     if (Good)
-      GoodVec.push_back(I);
+      GoodVec.push_back(&I);
   }
 
   assert_with_loc(GoodVec.size() > 0,
                   "No compatible intrinsic found - " + ErrMsg);
   assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
 
-  return GoodVec.front();
+  return *GoodVec.front();
 }
 
 void NeonEmitter::createIntrinsic(Record *R,
@@ -1947,13 +1947,12 @@ void NeonEmitter::createIntrinsic(Record *R,
   std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
   NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
 		     NewTypeSpecs.end());
+  auto &Entry = IntrinsicMap[Name];
 
   for (auto &I : NewTypeSpecs) {
-    Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
-                                  *this, Guard, IsUnavailable, BigEndianSafe);
-
-    IntrinsicMap[Name].push_back(IT);
-    Out.push_back(IT);
+    Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
+                       Guard, IsUnavailable, BigEndianSafe);
+    Out.push_back(&Entry.back());
   }
 
   CurrentRecord = nullptr;
@@ -2023,8 +2022,8 @@ void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
 
     uint64_t Mask = 0ULL;
     Type Ty = Def->getReturnType();
-    if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
-        Def->getProto()[0] == 'F')
+    if (Def->getProto()[0] == 'v' ||
+        isFloatingPointProtoModifier(Def->getProto()[0]))
       Ty = Def->getParamType(0);
     if (Ty.isPointer())
       Ty = Def->getParamType(1);
diff --git a/utils/analyzer/CmpRuns.py b/utils/analyzer/CmpRuns.py
index ce5ddfb3add4..2d1f44f6880c 100755
--- a/utils/analyzer/CmpRuns.py
+++ b/utils/analyzer/CmpRuns.py
@@ -5,7 +5,7 @@ CmpRuns - A simple tool for comparing two static analyzer runs to determine
 which reports have been added, removed, or changed.
 
 This is designed to support automated testing using the static analyzer, from
-two perspectives: 
+two perspectives:
   1. To monitor changes in the static analyzer's reports on real code bases, for
      regression testing.
 
@@ -19,11 +19,11 @@ Usage:
     #
     resultsA = loadResultsFromSingleRun(singleRunInfoA, deleteEmpty)
     resultsB = loadResultsFromSingleRun(singleRunInfoB, deleteEmpty)
-    
-    # Generate a relation from diagnostics in run A to diagnostics in run B 
-    # to obtain a list of triples (a, b, confidence). 
+
+    # Generate a relation from diagnostics in run A to diagnostics in run B
+    # to obtain a list of triples (a, b, confidence).
     diff = compareResults(resultsA, resultsB)
-           
+
 """
 
 import os
@@ -32,7 +32,7 @@ import CmpRuns
 
 # Information about analysis run:
 # path - the analysis output directory
-# root - the name of the root directory, which will be disregarded when 
+# root - the name of the root directory, which will be disregarded when
 # determining the source file name
 class SingleRunInfo:
     def __init__(self, path, root="", verboseLog=None):
@@ -56,7 +56,7 @@ class AnalysisDiagnostic:
 
     def getLine(self):
         return self._loc['line']
-        
+
     def getColumn(self):
         return self._loc['col']
 
@@ -70,8 +70,8 @@ class AnalysisDiagnostic:
         id = self.getFileName() + "+"
         if 'issue_context' in self._data :
           id += self._data['issue_context'] + "+"
-        if 'issue_hash' in self._data :
-          id += str(self._data['issue_hash'])
+        if 'issue_hash_content_of_line_in_context' in self._data :
+          id += str(self._data['issue_hash_content_of_line_in_context'])
         return id
 
     def getReport(self):
@@ -80,12 +80,12 @@ class AnalysisDiagnostic:
         return os.path.join(self._report.run.path, self._htmlReport)
 
     def getReadableName(self):
-        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(), 
-                                     self.getColumn(), self.getCategory(), 
+        return '%s:%d:%d, %s: %s' % (self.getFileName(), self.getLine(),
+                                     self.getColumn(), self.getCategory(),
                                      self.getDescription())
-        
-    # Note, the data format is not an API and may change from one analyzer 
-    # version to another.        
+
+    # Note, the data format is not an API and may change from one analyzer
+    # version to another.
     def getRawData(self):
         return self._data
 
@@ -94,7 +94,7 @@ class multidict:
         self.data = {}
         for key,value in elts:
             self[key] = value
-    
+
     def __getitem__(self, item):
         return self.data[item]
     def __setitem__(self, key, value):
@@ -134,15 +134,15 @@ class AnalysisRun:
         # Cumulative list of all diagnostics from all the reports.
         self.diagnostics = []
         self.clang_version = None
-    
+
     def getClangVersion(self):
         return self.clang_version
 
     def readSingleFile(self, p, deleteEmpty):
         data = plistlib.readPlist(p)
 
-        # We want to retrieve the clang version even if there are no 
-        # reports. Assume that all reports were created using the same 
+        # We want to retrieve the clang version even if there are no
+        # reports. Assume that all reports were created using the same
         # clang version (this is always true and is more efficient).
         if 'clang_version' in data:
             if self.clang_version == None:
@@ -166,9 +166,9 @@ class AnalysisRun:
                 htmlFiles.append(d.pop('HTMLDiagnostics_files')[0])
         else:
             htmlFiles = [None] * len(data['diagnostics'])
-            
+
         report = AnalysisReport(self, data.pop('files'))
-        diagnostics = [AnalysisDiagnostic(d, report, h) 
+        diagnostics = [AnalysisDiagnostic(d, report, h)
                        for d,h in zip(data.pop('diagnostics'),
                                       htmlFiles)]
 
@@ -179,7 +179,7 @@ class AnalysisRun:
         self.diagnostics.extend(diagnostics)
 
 
-# Backward compatibility API. 
+# Backward compatibility API.
 def loadResults(path, opts, root = "", deleteEmpty=True):
     return loadResultsFromSingleRun(SingleRunInfo(path, root, opts.verboseLog),
                                     deleteEmpty)
@@ -257,7 +257,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
     # Load the run results.
     resultsA = loadResults(dirA, opts, opts.rootA, deleteEmpty)
     resultsB = loadResults(dirB, opts, opts.rootB, deleteEmpty)
-    
+
     # Open the verbose log, if given.
     if opts.verboseLog:
         auxLog = open(opts.verboseLog, "wb")
@@ -285,7 +285,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
                                          b.getReadableName())
             foundDiffs += 1
             if auxLog:
-                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)" 
+                print >>auxLog, ("('CHANGED', %r, %r, %r, %r)"
                                  % (a.getReadableName(),
                                     b.getReadableName(),
                                     a.getReport(),
@@ -299,7 +299,7 @@ def dumpScanBuildResultsDiff(dirA, dirB, opts, deleteEmpty=True):
     if auxLog:
         print >>auxLog, "('TOTAL NEW REPORTS', %r)" % TotalReports
         print >>auxLog, "('TOTAL DIFFERENCES', %r)" % foundDiffs
-        
+
     return foundDiffs, len(resultsA.diagnostics), len(resultsB.diagnostics)
 
 def main():
@@ -322,7 +322,7 @@ def main():
 
     dirA,dirB = args
 
-    dumpScanBuildResultsDiff(dirA, dirB, opts)    
+    dumpScanBuildResultsDiff(dirA, dirB, opts)
 
 if __name__ == '__main__':
     main()
diff --git a/utils/analyzer/SATestAdd.py b/utils/analyzer/SATestAdd.py
index 64ff4ff65683..4b94a109ce64 100644
--- a/utils/analyzer/SATestAdd.py
+++ b/utils/analyzer/SATestAdd.py
@@ -1,20 +1,44 @@
 #!/usr/bin/env python
 
 """
-Static Analyzer qualification infrastructure: adding a new project to 
+Static Analyzer qualification infrastructure: adding a new project to
 the Repository Directory.
 
  Add a new project for testing: build it and add to the Project Map file.
    Assumes it's being run from the Repository Directory.
-   The project directory should be added inside the Repository Directory and 
+   The project directory should be added inside the Repository Directory and
    have the same name as the project ID
-   
+
  The project should use the following files for set up:
-      - pre_run_static_analyzer.sh - prepare the build environment.
+      - cleanup_run_static_analyzer.sh - prepare the build environment.
                                      Ex: make clean can be a part of it.
       - run_static_analyzer.cmd - a list of commands to run through scan-build.
                                      Each command should be on a separate line.
-                                     Choose from: configure, make, xcodebuild 
+                                     Choose from: configure, make, xcodebuild
+      - download_project.sh - download the project into the CachedSource/
+                                     directory. For example, download a zip of
+                                     the project source from GitHub, unzip it,
+                                     and rename the unzipped directory to
+                                     'CachedSource'. This script is not called
+                                     when 'CachedSource' is already present,
+                                     so an alternative is to check the
+                                     'CachedSource' directory into the
+                                     repository directly.
+      - CachedSource/ - An optional directory containing the source of the
+                                     project being analyzed. If present,
+                                     download_project.sh will not be called.
+      - changes_for_analyzer.patch - An optional patch file for any local changes
+                                     (e.g., to adapt to newer version of clang)
+                                     that should be applied to CachedSource
+                                     before analysis. To construct this patch,
+                                     run the the download script to download
+                                     the project to CachedSource, copy the
+                                     CachedSource to another directory (for
+                                     example, PatchedSource) and make any needed
+                                     modifications to the the copied source.
+                                     Then run:
+                                          diff -ur CachedSource PatchedSource \
+                                              > changes_for_analyzer.patch
 """
 import SATestBuild
 
@@ -27,7 +51,7 @@ def isExistingProject(PMapFile, projectID) :
     for I in PMapReader:
         if projectID == I[0]:
             return True
-    return False    
+    return False
 
 # Add a new project for testing: build it and add to the Project Map file.
 # Params:
@@ -39,7 +63,7 @@ def addNewProject(ID, BuildMode) :
     if not os.path.exists(Dir):
         print "Error: Project directory is missing: %s" % Dir
         sys.exit(-1)
-        
+
     # Build the project.
     SATestBuild.testProject(ID, BuildMode, IsReferenceBuild=True, Dir=Dir)
 
@@ -51,19 +75,19 @@ def addNewProject(ID, BuildMode) :
         print "Warning: Creating the Project Map file!!"
         PMapFile = open(ProjectMapPath, "w+b")
     try:
-        if (isExistingProject(PMapFile, ID)) :        
+        if (isExistingProject(PMapFile, ID)) :
             print >> sys.stdout, 'Warning: Project with ID \'', ID, \
                                  '\' already exists.'
             print >> sys.stdout, "Reference output has been regenerated."
-        else:                     
+        else:
             PMapWriter = csv.writer(PMapFile)
             PMapWriter.writerow( (ID, int(BuildMode)) );
             print "The project map is updated: ", ProjectMapPath
     finally:
         PMapFile.close()
-            
 
-# TODO: Add an option not to build. 
+
+# TODO: Add an option not to build.
 # TODO: Set the path to the Repository directory.
 if __name__ == '__main__':
     if len(sys.argv) < 2:
@@ -73,10 +97,10 @@ if __name__ == '__main__':
                              '1 for scan_build; ' \
                              '2 for single file c++11 project'
         sys.exit(-1)
-    
-    BuildMode = 1    
+
+    BuildMode = 1
     if (len(sys.argv) >= 3):
-        BuildMode = int(sys.argv[2])  
+        BuildMode = int(sys.argv[2])
     assert((BuildMode == 0) | (BuildMode == 1) | (BuildMode == 2))
-        
+
     addNewProject(sys.argv[1], BuildMode)
diff --git a/utils/analyzer/SATestBuild.py b/utils/analyzer/SATestBuild.py
index 2d91ba41e24c..d0503c6389c9 100644
--- a/utils/analyzer/SATestBuild.py
+++ b/utils/analyzer/SATestBuild.py
@@ -6,8 +6,8 @@ Static Analyzer qualification infrastructure.
 The goal is to test the analyzer against different projects, check for failures,
 compare results, and measure performance.
 
-Repository Directory will contain sources of the projects as well as the 
-information on how to build them and the expected output. 
+Repository Directory will contain sources of the projects as well as the
+information on how to build them and the expected output.
 Repository Directory structure:
    - ProjectMap file
    - Historical Performance Data
@@ -19,16 +19,16 @@ Repository Directory structure:
 Note that the build tree must be inside the project dir.
 
 To test the build of the analyzer one would:
-   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that 
+   - Copy over a copy of the Repository Directory. (TODO: Prefer to ensure that
      the build directory does not pollute the repository to min network traffic).
    - Build all projects, until error. Produce logs to report errors.
-   - Compare results.  
+   - Compare results.
 
-The files which should be kept around for failure investigations: 
+The files which should be kept around for failure investigations:
    RepositoryCopy/Project DirI/ScanBuildResults
-   RepositoryCopy/Project DirI/run_static_analyzer.log      
-   
-Assumptions (TODO: shouldn't need to assume these.):   
+   RepositoryCopy/Project DirI/run_static_analyzer.log
+
+Assumptions (TODO: shouldn't need to assume these.):
    The script is being run from the Repository Directory.
    The compiler for scan-build and scan-build are in the PATH.
    export PATH=/Users/zaks/workspace/c2llvm/build/Release+Asserts/bin:$PATH
@@ -36,6 +36,10 @@ Assumptions (TODO: shouldn't need to assume these.):
 For more logging, set the  env variables:
    zaks:TI zaks$ export CCC_ANALYZER_LOG=1
    zaks:TI zaks$ export CCC_ANALYZER_VERBOSE=1
+
+The list of checkers tested are hardcoded in the Checkers variable.
+For testing additional checkers, use the SA_ADDITIONAL_CHECKERS environment
+variable. It should contain a comma separated list.
 """
 import CmpRuns
 
@@ -48,7 +52,7 @@ import shutil
 import time
 import plistlib
 import argparse
-from subprocess import check_call, CalledProcessError
+from subprocess import check_call, check_output, CalledProcessError
 
 #------------------------------------------------------------------------------
 # Helper functions.
@@ -116,16 +120,16 @@ class flushfile(object):
 sys.stdout = flushfile(sys.stdout)
 
 def getProjectMapPath():
-    ProjectMapPath = os.path.join(os.path.abspath(os.curdir), 
+    ProjectMapPath = os.path.join(os.path.abspath(os.curdir),
                                   ProjectMapFile)
     if not os.path.exists(ProjectMapPath):
         print "Error: Cannot find the Project Map file " + ProjectMapPath +\
                 "\nRunning script for the wrong directory?"
-        sys.exit(-1)  
-    return ProjectMapPath         
+        sys.exit(-1)
+    return ProjectMapPath
 
 def getProjectDir(ID):
-    return os.path.join(os.path.abspath(os.curdir), ID)        
+    return os.path.join(os.path.abspath(os.curdir), ID)
 
 def getSBOutputDirName(IsReferenceBuild) :
     if IsReferenceBuild == True :
@@ -150,15 +154,17 @@ Jobs = int(math.ceil(detectCPUs() * 0.75))
 ProjectMapFile = "projectMap.csv"
 
 # Names of the project specific scripts.
+# The script that downloads the project.
+DownloadScript = "download_project.sh"
 # The script that needs to be executed before the build can start.
 CleanupScript = "cleanup_run_static_analyzer.sh"
-# This is a file containing commands for scan-build.  
+# This is a file containing commands for scan-build.
 BuildScript = "run_static_analyzer.cmd"
 
 # The log file name.
 LogFolderName = "Logs"
 BuildLogName = "run_static_analyzer.log"
-# Summary file - contains the summary of the failures. Ex: This info can be be  
+# Summary file - contains the summary of the failures. Ex: This info can be be
 # displayed when buildbot detects a build failure.
 NumOfFailuresInSummary = 10
 FailuresSummaryFileName = "failures.txt"
@@ -169,6 +175,21 @@ DiffsSummaryFileName = "diffs.txt"
 SBOutputDirName = "ScanBuildResults"
 SBOutputDirReferencePrefix = "Ref"
 
+# The name of the directory storing the cached project source. If this directory
+# does not exist, the download script will be executed. That script should
+# create the "CachedSource" directory and download the project source into it.
+CachedSourceDirName = "CachedSource"
+
+# The name of the directory containing the source code that will be analyzed.
+# Each time a project is analyzed, a fresh copy of its CachedSource directory
+# will be copied to the PatchedSource directory and then the local patches
+# in PatchfileName will be applied (if PatchfileName exists).
+PatchedSourceDirName = "PatchedSource"
+
+# The name of the patchfile specifying any changes that should be applied
+# to the CachedSource before analyzing.
+PatchfileName = "changes_for_analyzer.patch"
+
 # The list of checkers used during analyzes.
 # Currently, consists of all the non-experimental checkers, plus a few alpha
 # checkers we don't want to regress on.
@@ -182,35 +203,93 @@ Verbose = 1
 
 # Run pre-processing script if any.
 def runCleanupScript(Dir, PBuildLogFile):
+    Cwd = os.path.join(Dir, PatchedSourceDirName)
     ScriptPath = os.path.join(Dir, CleanupScript)
+    runScript(ScriptPath, PBuildLogFile, Cwd)
+
+# Run the script to download the project, if it exists.
+def runDownloadScript(Dir, PBuildLogFile):
+    ScriptPath = os.path.join(Dir, DownloadScript)
+    runScript(ScriptPath, PBuildLogFile, Dir)
+
+# Run the provided script if it exists.
+def runScript(ScriptPath, PBuildLogFile, Cwd):
     if os.path.exists(ScriptPath):
         try:
-            if Verbose == 1:        
+            if Verbose == 1:
                 print "  Executing: %s" % (ScriptPath,)
-            check_call("chmod +x %s" % ScriptPath, cwd = Dir, 
+            check_call("chmod +x %s" % ScriptPath, cwd = Cwd,
                                               stderr=PBuildLogFile,
-                                              stdout=PBuildLogFile, 
-                                              shell=True)    
-            check_call(ScriptPath, cwd = Dir, stderr=PBuildLogFile,
-                                              stdout=PBuildLogFile, 
+                                              stdout=PBuildLogFile,
+                                              shell=True)
+            check_call(ScriptPath, cwd = Cwd, stderr=PBuildLogFile,
+                                              stdout=PBuildLogFile,
                                               shell=True)
         except:
-            print "Error: The pre-processing step failed. See ", \
-                  PBuildLogFile.name, " for details."
+            print "Error: Running %s failed. See %s for details." % (ScriptPath,
+                PBuildLogFile.name)
             sys.exit(-1)
 
-# Build the project with scan-build by reading in the commands and 
+# Download the project and apply the local patchfile if it exists.
+def downloadAndPatch(Dir, PBuildLogFile):
+    CachedSourceDirPath = os.path.join(Dir, CachedSourceDirName)
+
+    # If the we don't already have the cached source, run the project's
+    # download script to download it.
+    if not os.path.exists(CachedSourceDirPath):
+      runDownloadScript(Dir, PBuildLogFile)
+      if not os.path.exists(CachedSourceDirPath):
+        print "Error: '%s' not found after download." % (CachedSourceDirPath)
+        exit(-1)
+
+    PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
+
+    # Remove potentially stale patched source.
+    if os.path.exists(PatchedSourceDirPath):
+        shutil.rmtree(PatchedSourceDirPath)
+
+    # Copy the cached source and apply any patches to the copy.
+    shutil.copytree(CachedSourceDirPath, PatchedSourceDirPath, symlinks=True)
+    applyPatch(Dir, PBuildLogFile)
+
+def applyPatch(Dir, PBuildLogFile):
+    PatchfilePath = os.path.join(Dir, PatchfileName)
+    PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
+    if not os.path.exists(PatchfilePath):
+        print "  No local patches."
+        return
+
+    print "  Applying patch."
+    try:
+        check_call("patch -p1 < %s" % (PatchfilePath),
+                    cwd = PatchedSourceDirPath,
+                    stderr=PBuildLogFile,
+                    stdout=PBuildLogFile,
+                    shell=True)
+    except:
+        print "Error: Patch failed. See %s for details." % (PBuildLogFile.name)
+        sys.exit(-1)
+
+# Build the project with scan-build by reading in the commands and
 # prefixing them with the scan-build options.
 def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
     BuildScriptPath = os.path.join(Dir, BuildScript)
     if not os.path.exists(BuildScriptPath):
         print "Error: build script is not defined: %s" % BuildScriptPath
         sys.exit(-1)
+
+    AllCheckers = Checkers
+    if os.environ.has_key('SA_ADDITIONAL_CHECKERS'):
+        AllCheckers = AllCheckers + ',' + os.environ['SA_ADDITIONAL_CHECKERS']
+
+    # Run scan-build from within the patched source directory.
+    SBCwd = os.path.join(Dir, PatchedSourceDirName)
+
     SBOptions = "--use-analyzer " + Clang + " "
     SBOptions += "-plist-html -o " + SBOutputDir + " "
-    SBOptions += "-enable-checker " + Checkers + " "  
+    SBOptions += "-enable-checker " + AllCheckers + " "
     SBOptions += "--keep-empty "
-    # Always use ccc-analyze to ensure that we can locate the failures 
+    # Always use ccc-analyze to ensure that we can locate the failures
     # directory.
     SBOptions += "--override-compiler "
     try:
@@ -227,11 +306,11 @@ def runScanBuild(Dir, SBOutputDir, PBuildLogFile):
                 "-j" not in Command:
                 Command += " -j%d" % Jobs
             SBCommand = SBPrefix + Command
-            if Verbose == 1:        
+            if Verbose == 1:
                 print "  Executing: %s" % (SBCommand,)
-            check_call(SBCommand, cwd = Dir, stderr=PBuildLogFile,
-                                             stdout=PBuildLogFile, 
-                                             shell=True)
+            check_call(SBCommand, cwd = SBCwd, stderr=PBuildLogFile,
+                                               stdout=PBuildLogFile,
+                                               shell=True)
     except:
         print "Error: scan-build failed. See ",PBuildLogFile.name,\
               " for details."
@@ -245,97 +324,114 @@ def hasNoExtension(FileName):
 
 def isValidSingleInputFile(FileName):
     (Root, Ext) = os.path.splitext(FileName)
-    if ((Ext == ".i") | (Ext == ".ii") | 
-        (Ext == ".c") | (Ext == ".cpp") | 
+    if ((Ext == ".i") | (Ext == ".ii") |
+        (Ext == ".c") | (Ext == ".cpp") |
         (Ext == ".m") | (Ext == "")) :
         return True
     return False
-   
+
+# Get the path to the SDK for the given SDK name. Returns None if
+# the path cannot be determined.
+def getSDKPath(SDKName):
+    if which("xcrun") is None:
+        return None
+
+    Cmd = "xcrun --sdk " + SDKName + " --show-sdk-path"
+    return check_output(Cmd, shell=True).rstrip()
+
 # Run analysis on a set of preprocessed files.
 def runAnalyzePreprocessed(Dir, SBOutputDir, Mode):
     if os.path.exists(os.path.join(Dir, BuildScript)):
         print "Error: The preprocessed files project should not contain %s" % \
                BuildScript
-        raise Exception()       
+        raise Exception()
+
+    CmdPrefix = Clang + " -cc1 "
+
+    # For now, we assume the preprocessed files should be analyzed
+    # with the OS X SDK.
+    SDKPath = getSDKPath("macosx")
+    if SDKPath is not None:
+      CmdPrefix += "-isysroot " + SDKPath + " "
+
+    CmdPrefix += "-analyze -analyzer-output=plist -w "
+    CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "
 
-    CmdPrefix = Clang + " -cc1 -analyze -analyzer-output=plist -w "
-    CmdPrefix += "-analyzer-checker=" + Checkers +" -fcxx-exceptions -fblocks "   
-    
     if (Mode == 2) :
-        CmdPrefix += "-std=c++11 " 
-    
+        CmdPrefix += "-std=c++11 "
+
     PlistPath = os.path.join(Dir, SBOutputDir, "date")
     FailPath = os.path.join(PlistPath, "failures");
     os.makedirs(FailPath);
- 
+
     for FullFileName in glob.glob(Dir + "/*"):
         FileName = os.path.basename(FullFileName)
         Failed = False
-        
+
         # Only run the analyzes on supported files.
         if (hasNoExtension(FileName)):
             continue
         if (isValidSingleInputFile(FileName) == False):
             print "Error: Invalid single input file %s." % (FullFileName,)
             raise Exception()
-        
+
         # Build and call the analyzer command.
         OutputOption = "-o " + os.path.join(PlistPath, FileName) + ".plist "
         Command = CmdPrefix + OutputOption + FileName
         LogFile = open(os.path.join(FailPath, FileName + ".stderr.txt"), "w+b")
         try:
-            if Verbose == 1:        
+            if Verbose == 1:
                 print "  Executing: %s" % (Command,)
             check_call(Command, cwd = Dir, stderr=LogFile,
-                                           stdout=LogFile, 
+                                           stdout=LogFile,
                                            shell=True)
         except CalledProcessError, e:
             print "Error: Analyzes of %s failed. See %s for details." \
                   "Error code %d." % \
                    (FullFileName, LogFile.name, e.returncode)
-            Failed = True       
+            Failed = True
         finally:
-            LogFile.close()            
-        
+            LogFile.close()
+
         # If command did not fail, erase the log file.
         if Failed == False:
             os.remove(LogFile.name);
 
 def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
-    TBegin = time.time() 
+    TBegin = time.time()
 
     BuildLogPath = os.path.join(SBOutputDir, LogFolderName, BuildLogName)
-    print "Log file: %s" % (BuildLogPath,) 
+    print "Log file: %s" % (BuildLogPath,)
     print "Output directory: %s" %(SBOutputDir, )
-    
+
     # Clean up the log file.
     if (os.path.exists(BuildLogPath)) :
         RmCommand = "rm " + BuildLogPath
         if Verbose == 1:
             print "  Executing: %s" % (RmCommand,)
         check_call(RmCommand, shell=True)
-    
+
     # Clean up scan build results.
     if (os.path.exists(SBOutputDir)) :
         RmCommand = "rm -r " + SBOutputDir
-        if Verbose == 1: 
+        if Verbose == 1:
             print "  Executing: %s" % (RmCommand,)
             check_call(RmCommand, shell=True)
     assert(not os.path.exists(SBOutputDir))
     os.makedirs(os.path.join(SBOutputDir, LogFolderName))
-        
+
     # Open the log file.
     PBuildLogFile = open(BuildLogPath, "wb+")
-    
+
     # Build and analyze the project.
     try:
-        runCleanupScript(Dir, PBuildLogFile)
-        
         if (ProjectBuildMode == 1):
+            downloadAndPatch(Dir, PBuildLogFile)
+            runCleanupScript(Dir, PBuildLogFile)
             runScanBuild(Dir, SBOutputDir, PBuildLogFile)
         else:
             runAnalyzePreprocessed(Dir, SBOutputDir, ProjectBuildMode)
-        
+
         if IsReferenceBuild :
             runCleanupScript(Dir, PBuildLogFile)
 
@@ -346,32 +442,36 @@ def buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild):
                         continue
                     Plist = os.path.join(DirPath, F)
                     Data = plistlib.readPlist(Plist)
-                    Paths = [SourceFile[len(Dir)+1:] if SourceFile.startswith(Dir)\
-                            else SourceFile for SourceFile in Data['files']]
+                    PathPrefix = Dir
+                    if (ProjectBuildMode == 1):
+                        PathPrefix = os.path.join(Dir, PatchedSourceDirName)
+                    Paths = [SourceFile[len(PathPrefix)+1:]\
+                              if SourceFile.startswith(PathPrefix)\
+                              else SourceFile for SourceFile in Data['files']]
                     Data['files'] = Paths
                     plistlib.writePlist(Data, Plist)
-           
+
     finally:
         PBuildLogFile.close()
-        
+
     print "Build complete (time: %.2f). See the log for more details: %s" % \
-           ((time.time()-TBegin), BuildLogPath) 
-       
+           ((time.time()-TBegin), BuildLogPath)
+
 # A plist file is created for each call to the analyzer(each source file).
-# We are only interested on the once that have bug reports, so delete the rest.        
+# We are only interested on the once that have bug reports, so delete the rest.
 def CleanUpEmptyPlists(SBOutputDir):
     for F in glob.glob(SBOutputDir + "/*/*.plist"):
         P = os.path.join(SBOutputDir, F)
-        
+
         Data = plistlib.readPlist(P)
         # Delete empty reports.
         if not Data['files']:
             os.remove(P)
             continue
 
-# Given the scan-build output directory, checks if the build failed 
-# (by searching for the failures directories). If there are failures, it 
-# creates a summary file in the output directory.         
+# Given the scan-build output directory, checks if the build failed
+# (by searching for the failures directories). If there are failures, it
+# creates a summary file in the output directory.
 def checkBuild(SBOutputDir):
     # Check if there are failures.
     Failures = glob.glob(SBOutputDir + "/*/failures/*.stderr.txt")
@@ -382,37 +482,37 @@ def checkBuild(SBOutputDir):
         print "Number of bug reports (non-empty plist files) produced: %d" %\
            len(Plists)
         return;
-    
+
     # Create summary file to display when the build fails.
     SummaryPath = os.path.join(SBOutputDir, LogFolderName, FailuresSummaryFileName)
     if (Verbose > 0):
         print "  Creating the failures summary file %s" % (SummaryPath,)
-    
+
     SummaryLog = open(SummaryPath, "w+")
     try:
         SummaryLog.write("Total of %d failures discovered.\n" % (TotalFailed,))
         if TotalFailed > NumOfFailuresInSummary:
-            SummaryLog.write("See the first %d below.\n" 
+            SummaryLog.write("See the first %d below.\n"
                                                    % (NumOfFailuresInSummary,))
         # TODO: Add a line "See the results folder for more."
-    
+
         FailuresCopied = NumOfFailuresInSummary
         Idx = 0
         for FailLogPathI in Failures:
             if Idx >= NumOfFailuresInSummary:
                 break;
-            Idx += 1 
+            Idx += 1
             SummaryLog.write("\n-- Error #%d -----------\n" % (Idx,));
             FailLogI = open(FailLogPathI, "r");
-            try: 
+            try:
                 shutil.copyfileobj(FailLogI, SummaryLog);
             finally:
                 FailLogI.close()
     finally:
         SummaryLog.close()
-    
+
     print "Error: analysis failed. See ", SummaryPath
-    sys.exit(-1)       
+    sys.exit(-1)
 
 # Auxiliary object to discard stdout.
 class Discarder(object):
@@ -424,46 +524,47 @@ class Discarder(object):
 #   0 - success if there are no crashes or analyzer failure.
 #   1 - success if there are no difference in the number of reported bugs.
 #   2 - success if all the bug reports are identical.
-def runCmpResults(Dir, Strictness = 0):   
-    TBegin = time.time() 
+def runCmpResults(Dir, Strictness = 0):
+    TBegin = time.time()
 
     RefDir = os.path.join(Dir, SBOutputDirReferencePrefix + SBOutputDirName)
     NewDir = os.path.join(Dir, SBOutputDirName)
-    
+
     # We have to go one level down the directory tree.
-    RefList = glob.glob(RefDir + "/*") 
+    RefList = glob.glob(RefDir + "/*")
     NewList = glob.glob(NewDir + "/*")
-    
+
     # Log folders are also located in the results dir, so ignore them.
     RefLogDir = os.path.join(RefDir, LogFolderName)
     if RefLogDir in RefList:
         RefList.remove(RefLogDir)
     NewList.remove(os.path.join(NewDir, LogFolderName))
-    
+
     if len(RefList) == 0 or len(NewList) == 0:
         return False
     assert(len(RefList) == len(NewList))
 
-    # There might be more then one folder underneath - one per each scan-build 
+    # There might be more then one folder underneath - one per each scan-build
     # command (Ex: one for configure and one for make).
     if (len(RefList) > 1):
         # Assume that the corresponding folders have the same names.
         RefList.sort()
         NewList.sort()
-    
+
     # Iterate and find the differences.
     NumDiffs = 0
-    PairList = zip(RefList, NewList)    
-    for P in PairList:    
-        RefDir = P[0] 
+    PairList = zip(RefList, NewList)
+    for P in PairList:
+        RefDir = P[0]
         NewDir = P[1]
-    
-        assert(RefDir != NewDir) 
-        if Verbose == 1:        
+
+        assert(RefDir != NewDir)
+        if Verbose == 1:
             print "  Comparing Results: %s %s" % (RefDir, NewDir)
-    
+
         DiffsPath = os.path.join(NewDir, DiffsSummaryFileName)
-        Opts = CmpRuns.CmpOptions(DiffsPath, "", Dir)
+        PatchedSourceDirPath = os.path.join(Dir, PatchedSourceDirName)
+        Opts = CmpRuns.CmpOptions(DiffsPath, "", PatchedSourceDirPath)
         # Discard everything coming out of stdout (CmpRun produces a lot of them).
         OLD_STDOUT = sys.stdout
         sys.stdout = Discarder()
@@ -476,70 +577,70 @@ def runCmpResults(Dir, Strictness = 0):
                   (NumDiffs, DiffsPath,)
         if Strictness >= 2 and NumDiffs > 0:
             print "Error: Diffs found in strict mode (2)."
-            sys.exit(-1)       
+            sys.exit(-1)
         elif Strictness >= 1 and ReportsInRef != ReportsInNew:
             print "Error: The number of results are different in strict mode (1)."
-            sys.exit(-1)       
-                    
-    print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin) 
+            sys.exit(-1)
+
+    print "Diagnostic comparison complete (time: %.2f)." % (time.time()-TBegin)
     return (NumDiffs > 0)
-    
+
 def updateSVN(Mode, ProjectsMap):
     try:
-        ProjectsMap.seek(0)    
+        ProjectsMap.seek(0)
         for I in csv.reader(ProjectsMap):
-            ProjName = I[0] 
+            ProjName = I[0]
             Path = os.path.join(ProjName, getSBOutputDirName(True))
-    
+
             if Mode == "delete":
                 Command = "svn delete %s" % (Path,)
             else:
                 Command = "svn add %s" % (Path,)
 
-            if Verbose == 1:        
+            if Verbose == 1:
                 print "  Executing: %s" % (Command,)
-            check_call(Command, shell=True)    
-    
+            check_call(Command, shell=True)
+
         if Mode == "delete":
             CommitCommand = "svn commit -m \"[analyzer tests] Remove " \
-                            "reference results.\""     
+                            "reference results.\""
         else:
             CommitCommand = "svn commit -m \"[analyzer tests] Add new " \
                             "reference results.\""
-        if Verbose == 1:        
+        if Verbose == 1:
             print "  Executing: %s" % (CommitCommand,)
-        check_call(CommitCommand, shell=True)    
+        check_call(CommitCommand, shell=True)
     except:
         print "Error: SVN update failed."
         sys.exit(-1)
-        
+
 def testProject(ID, ProjectBuildMode, IsReferenceBuild=False, Dir=None, Strictness = 0):
     print " \n\n--- Building project %s" % (ID,)
 
-    TBegin = time.time() 
+    TBegin = time.time()
 
     if Dir is None :
-        Dir = getProjectDir(ID)        
-    if Verbose == 1:        
+        Dir = getProjectDir(ID)
+    if Verbose == 1:
         print "  Build directory: %s." % (Dir,)
-    
+
     # Set the build results directory.
     RelOutputDir = getSBOutputDirName(IsReferenceBuild)
     SBOutputDir = os.path.join(Dir, RelOutputDir)
-                
+
     buildProject(Dir, SBOutputDir, ProjectBuildMode, IsReferenceBuild)
 
     checkBuild(SBOutputDir)
-    
+
     if IsReferenceBuild == False:
         runCmpResults(Dir, Strictness)
-        
+
     print "Completed tests for project %s (time: %.2f)." % \
           (ID, (time.time()-TBegin))
-    
+
 def testAll(IsReferenceBuild = False, UpdateSVN = False, Strictness = 0):
     PMapFile = open(getProjectMapPath(), "rb")
-    try:        
+    try:
         # Validate the input.
         for I in csv.reader(PMapFile):
             if (len(I) != 2) :
@@ -548,16 +649,16 @@ def testAll(IsReferenceBuild = False, UpdateSVN = False, Strictness = 0):
             if (not ((I[1] == "0") | (I[1] == "1") | (I[1] == "2"))):
                 print "Error: Second entry in the ProjectMapFile should be 0" \
                       " (single file), 1 (project), or 2(single file c++11)."
-                raise Exception()              
+                raise Exception()
 
-        # When we are regenerating the reference results, we might need to 
+        # When we are regenerating the reference results, we might need to
         # update svn. Remove reference results from SVN.
         if UpdateSVN == True:
             assert(IsReferenceBuild == True);
             updateSVN("delete",  PMapFile);
-            
+
         # Test the projects.
-        PMapFile.seek(0)    
+        PMapFile.seek(0)
         for I in csv.reader(PMapFile):
             testProject(I[0], int(I[1]), IsReferenceBuild, None, Strictness)
 
@@ -567,10 +668,10 @@ def testAll(IsReferenceBuild = False, UpdateSVN = False, Strictness = 0):
 
     except:
         print "Error occurred. Premature termination."
-        raise                            
+        raise
     finally:
-        PMapFile.close()    
-            
+        PMapFile.close()
+
 if __name__ == '__main__':
     # Parse command line arguments.
     Parser = argparse.ArgumentParser(description='Test the Clang Static Analyzer.')
diff --git a/utils/analyzer/SumTimerInfo.py b/utils/analyzer/SumTimerInfo.py
index 4ef1ceb4cec5..0c3585bbc279 100644
--- a/utils/analyzer/SumTimerInfo.py
+++ b/utils/analyzer/SumTimerInfo.py
@@ -3,7 +3,7 @@
 """
 Script to Summarize statistics in the scan-build output.
 
-Statistics are enabled by passing '-internal-stats' option to scan-build 
+Statistics are enabled by passing '-internal-stats' option to scan-build
 (or '-analyzer-stats' to the analyzer).
 
 """
@@ -69,7 +69,7 @@ if __name__ == '__main__':
         if ((")  Total" in line) and (Mode == 1)) :
           s = line.split()
           TotalTime = TotalTime + float(s[6])
-          
+
     print "TU Count %d" % (Count)
     print "Time %f" % (Time)
     print "Warnings %d" % (Warnings)
@@ -81,4 +81,3 @@ if __name__ == '__main__':
     print "MaxTime %f" % (MaxTime)
     print "TotalTime %f" % (TotalTime)
     print "Max CFG Size %d" % (MaxCFGSize)
-    
\ No newline at end of file
diff --git a/utils/analyzer/ubiviz b/utils/analyzer/ubiviz
index 1582797c63f9..9d821c3c10a0 100755
--- a/utils/analyzer/ubiviz
+++ b/utils/analyzer/ubiviz
@@ -18,7 +18,7 @@ import sys
 def Error(message):
     print >> sys.stderr, 'ubiviz: ' + message
     sys.exit(1)
-    
+
 def StreamData(filename):
   file = open(filename)
   for ln in file:
@@ -55,20 +55,19 @@ def Display(G, data):
 
 def main(args):
   if len(args) == 0:
-    Error('no input files')    
+    Error('no input files')
 
   server = xmlrpclib.Server('http://127.0.0.1:20738/RPC2')
   G = server.ubigraph
-            
+
   for arg in args:
     G.clear()
     for x in StreamData(arg):
       Display(G,x)
-  
+
   sys.exit(0)
-  
+
 
 if __name__ == '__main__':
     main(sys.argv[1:])
-    
-    
\ No newline at end of file
+
diff --git a/utils/clang.natvis b/utils/clang.natvis
index 107af502f7d0..a0004e9b0163 100644
--- a/utils/clang.natvis
+++ b/utils/clang.natvis
@@ -6,6 +6,14 @@ Put this file into "%USERPROFILE%\Documents\Visual Studio 2012\Visualizers"
 or create a symbolic link so it updates automatically.
 -->
 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
+  <Type Name="clang::Type">
+    <DisplayString Condition="(clang::Type::TypeClass)TypeBits.TC == clang::Type::Builtin">Builtin Type={(clang::BuiltinType::Kind)BuiltinTypeBits.Kind}</DisplayString>
+    <DisplayString Condition="(clang::Type::TypeClass)TypeBits.TC == clang::Type::Attributed">Modified Type={((clang::AttributedType*)this)->ModifiedType} Attribute={(clang::AttributedType::Kind)AttributedTypeBits.AttrKind}</DisplayString>
+    <DisplayString>Type Class={(clang::Type::TypeClass)TypeBits.TC}</DisplayString>
+  </Type>
+  <Type Name="clang::QualType">
+    <DisplayString>{((clang::ExtQualsTypeCommonBase *)(((uintptr_t)Value.Value) &amp; ~(uintptr_t)((1 &lt;&lt; clang::TypeAlignmentInBits) - 1)))-&gt;BaseType}</DisplayString>
+  </Type>
   <Type Name="clang::IdentifierInfo">
     <DisplayString Condition="Entry != 0">({((llvm::StringMapEntry&lt;clang::IdentifierInfo *&gt;*)Entry)+1,s})</DisplayString>
     <Expand>
@@ -24,13 +32,14 @@ or create a symbolic link so it updates automatically.
       <Item Condition="(Ptr &amp; PtrMask) == StoredObjCZeroArgSelector" Name="[ObjC Zero Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)</Item>
       <Item Condition="(Ptr &amp; PtrMask) == StoredObjCOneArgSelector" Name="[ObjC One Arg Selector]">*(clang::IdentifierInfo *)(Ptr &amp; ~PtrMask)</Item>
       <Item Condition="(Ptr &amp; PtrMask) == StoredDeclarationNameExtra" Name="[Extra]">(clang::DeclarationNameExtra::ExtraKind)((clang::DeclarationNameExtra *)(Ptr &amp; ~PtrMask))-&gt;ExtraKindOrNumArgs</Item>
-    </Expand>
-  </Type>
-  <Type Name="clang::Token">
-    <DisplayString>{(clang::tok::TokenKind)Kind}</DisplayString>
-  </Type>
-  <Type Name="clang::DeclSpec">
-    <DisplayString>[{(clang::DeclSpec::SCS)StorageClassSpec}], [{(clang::TypeSpecifierType)TypeSpecType}]</DisplayString>
+    </Expand>
+  </Type>
+  <Type Name="clang::Token">
+    <DisplayString Condition="Kind != clang::tok::identifier">{(clang::tok::TokenKind)Kind}</DisplayString>
+    <DisplayString Condition="Kind == clang::tok::identifier">{{Identifier ({*(clang::IdentifierInfo *)(PtrData)})}}</DisplayString>
+  </Type>
+  <Type Name="clang::DeclSpec">
+    <DisplayString>[{(clang::DeclSpec::SCS)StorageClassSpec}], [{(clang::TypeSpecifierType)TypeSpecType}]</DisplayString>
   </Type>
   <Type Name="clang::PragmaHandler">
     <DisplayString>{Name,s}</DisplayString>
diff --git a/utils/perf-training/CMakeLists.txt b/utils/perf-training/CMakeLists.txt
new file mode 100644
index 000000000000..ccedcf5d5163
--- /dev/null
+++ b/utils/perf-training/CMakeLists.txt
@@ -0,0 +1,36 @@
+if(LLVM_BUILD_INSTRUMENTED)
+  if (CMAKE_CFG_INTDIR STREQUAL ".")
+    set(LLVM_BUILD_MODE ".")
+  else ()
+    set(LLVM_BUILD_MODE "%(build_mode)s")
+  endif ()
+
+  string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
+
+  configure_lit_site_cfg(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    )
+
+  add_lit_testsuite(generate-profraw "Generating clang PGO data"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS clang clear-profraw
+    )
+
+  add_custom_target(clear-profraw
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Clearing old profraw data")
+
+  if(NOT LLVM_PROFDATA)
+    find_program(LLVM_PROFDATA llvm-profdata)
+  endif()
+
+  if(NOT LLVM_PROFDATA)
+    message(FATAL_ERROR "Must set LLVM_PROFDATA to point to llvm-profdata to use for merging PGO data")
+  endif()
+
+  add_custom_target(generate-profdata
+    COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge ${LLVM_PROFDATA} ${CMAKE_CURRENT_BINARY_DIR}/clang.profdata ${CMAKE_CURRENT_BINARY_DIR}
+    COMMENT "Merging profdata"
+    DEPENDS generate-profraw)
+endif()
diff --git a/utils/perf-training/README.txt b/utils/perf-training/README.txt
new file mode 100644
index 000000000000..cfbce40ca3ef
--- /dev/null
+++ b/utils/perf-training/README.txt
@@ -0,0 +1,6 @@
+==========================
+ Performance Training Data
+==========================
+
+This directory contains simple source files for use as training data for
+generating PGO data and linker order files for clang.
diff --git a/utils/perf-training/cxx/hello_world.cpp b/utils/perf-training/cxx/hello_world.cpp
new file mode 100644
index 000000000000..66e00d00d261
--- /dev/null
+++ b/utils/perf-training/cxx/hello_world.cpp
@@ -0,0 +1,7 @@
+// RUN: %clang_cpp -c %s
+#include <iostream>
+
+int main(int, char**) {
+  std::cout << "Hello, World!";
+  return 0;
+}
diff --git a/utils/perf-training/lit.cfg b/utils/perf-training/lit.cfg
new file mode 100644
index 000000000000..af4b43b78b09
--- /dev/null
+++ b/utils/perf-training/lit.cfg
@@ -0,0 +1,37 @@
+# -*- Python -*-
+
+from lit import Test
+import lit.formats
+import lit.util
+
+def getSysrootFlagsOnDarwin(config, lit_config):
+    # On Darwin, support relocatable SDKs by providing Clang with a
+    # default system root path.
+    if 'darwin' in config.target_triple:
+        try:
+            out = lit.util.capture(['xcrun', '--show-sdk-path']).strip()
+            res = 0
+        except OSError:
+            res = -1
+        if res == 0 and out:
+            sdk_path = out
+            lit_config.note('using SDKROOT: %r' % sdk_path)
+            return '-isysroot %s' % sdk_path
+    return ''
+
+sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config)
+
+config.clang = lit.util.which('clang', config.clang_tools_dir).replace('\\', '/')
+
+config.name = 'Clang Perf Training'
+config.suffixes = ['.c', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap']
+
+use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
+config.test_format = lit.formats.ShTest(use_lit_shell == "0")
+config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=cpp %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang_cc1', ' %s -cc1 %s ' % (config.clang, sysroot_flags)))
+config.substitutions.append( ('%clang', ' %s %s ' % (config.clang, sysroot_flags) ) )
+config.substitutions.append( ('%test_root', config.test_exec_root ) )
+
+config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%p.profraw'
+
diff --git a/utils/perf-training/lit.site.cfg.in b/utils/perf-training/lit.site.cfg.in
new file mode 100644
index 000000000000..9dc380242e52
--- /dev/null
+++ b/utils/perf-training/lit.site.cfg.in
@@ -0,0 +1,20 @@
+import sys
+
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.clang_tools_dir = "@CLANG_TOOLS_DIR@"
+config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@"
+config.target_triple = "@TARGET_TRIPLE@"
+
+# Support substitution of the tools and libs dirs with user parameters. This is
+# used when we can't determine the tool dir at configuration time.
+try:
+    config.clang_tools_dir = config.clang_tools_dir % lit_config.params
+except KeyError:
+    e = sys.exc_info()[1]
+    key, = e.args
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/lit.cfg")
diff --git a/utils/perf-training/perf-helper.py b/utils/perf-training/perf-helper.py
new file mode 100644
index 000000000000..448801133e0a
--- /dev/null
+++ b/utils/perf-training/perf-helper.py
@@ -0,0 +1,46 @@
+#===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+import sys
+import os
+import subprocess
+
+def findProfrawFiles(path):
+  profraw_files = []
+  for root, dirs, files in os.walk(path): 
+    for filename in files:
+      if filename.endswith(".profraw"):
+        profraw_files.append(os.path.join(root, filename))
+  return profraw_files
+
+def clean(args):
+  if len(args) != 1:
+    print 'Usage: %s clean <path>\n\tRemoves all *.profraw files from <path>.' % __file__
+    return 1
+  for profraw in findProfrawFiles(args[0]):
+    os.remove(profraw)
+  return 0
+
+def merge(args):
+  if len(args) != 3:
+    print 'Usage: %s clean <llvm-profdata> <output> <path>\n\tMerges all profraw files from path into output.' % __file__
+    return 1
+  cmd = [args[0], 'merge', '-o', args[1]]
+  cmd.extend(findProfrawFiles(args[2]))
+  subprocess.check_call(cmd)
+  return 0
+
+commands = {'clean' : clean, 'merge' : merge}
+
+def main():
+  f = commands[sys.argv[1]]
+  sys.exit(f(sys.argv[2:]))
+
+if __name__ == '__main__':
+  main()
diff --git a/www/analyzer/faq.html b/www/analyzer/faq.html
index 129bfb63bb1b..9d2962b1121a 100644
--- a/www/analyzer/faq.html
+++ b/www/analyzer/faq.html
@@ -26,6 +26,8 @@ the bug is reached?</a></li>
   <li><a href="#null_pointer">The analyzer reports a null dereference, but I know that the
 pointer is never null. How can I tell the analyzer that a pointer can never be
 null?</a></li>
+  <li><a href="#dead_store">How do I tell the static analyzer that I don't care about a specific dead store?</a></li>
+  <li><a href="#unused_ivar">How do I tell the static analyzer that I don't care about a specific unused instance variable in Objective C?</a></li>
   <li><a href="#use_assert">The analyzer assumes that a loop body is never entered.  How can I tell it that the loop body will be entered at least once?</a></li>
   <li><a href="#suppress_issue">How can I suppress a specific analyzer warning?</a></li>
   <li><a href="#exclude_code">How can I selectively exclude code the analyzer examines?</a></li>
@@ -64,6 +66,18 @@ int foo(int *b) {
   return *b;
 }</pre>
 
+<h4 id="dead_store" class="faq">Q: How do I tell the static analyzer that I don't care about a specific dead store?</h4>
+
+<p>When the analyzer sees that a value stored into a variable is never used, it's going to produce a message similar to this one:
+<pre class="code_example">Value stored to 'x' is never read</pre>
+You can use the <tt>(void)x;</tt> idiom to acknowledge that there is a dead store in your code but you do not want it to be reported in the future.</p>
+
+<h4 id="unused_ivar" class="faq">Q: How do I tell the static analyzer that I don't care about a specific unused instance variable in Objective C?</h4>
+
+<p>When the analyzer sees that a value stored into a variable is never used, it is going to produce a message similar to this one:
+<pre class="code_example">Instance variable 'commonName' in class 'HappyBird' is never used by the methods in its @implementation</pre>
+You can add <tt>__attribute__((unused))</tt> to the instance variable declaration to suppress the warning.</p>
+
 <h4 id="use_assert" class="faq">Q: The analyzer assumes that a loop body is never entered.  How can I tell it that the loop body will be entered at least once?</h4>
 
 <img src="images/example_use_assert.png" alt="example use assert">
diff --git a/www/analyzer/index.html b/www/analyzer/index.html
index 33e858179df5..98e295e28da3 100644
--- a/www/analyzer/index.html
+++ b/www/analyzer/index.html
@@ -95,7 +95,7 @@ applications.</p>
   <div style="padding:15px">
    <h3 style="margin:0px;padding:0px">Mac OS X</h3>
    <ul>
-    <li>Latest build (Intel-only binary, 10.5+):<br>
+    <li>Latest build (10.7+):<br>
      <!--#include virtual="latest_checker.html.incl"-->
     </li>
     <li><a href="/release_notes.html">Release notes</a></li>
diff --git a/www/analyzer/installation.html b/www/analyzer/installation.html
index 54a5da51adcf..6a855999c5ff 100644
--- a/www/analyzer/installation.html
+++ b/www/analyzer/installation.html
@@ -23,7 +23,7 @@ get started analyzing your code.</p>
 <h2>Packaged Builds (Mac OS X)</h2>
 
 <p>Semi-regular pre-built binaries of the analyzer are available on Mac
-OS X.  These are built to run on Mac OS 10.5 and later.</p>
+OS X.  These are built to run on OS X 10.7 and later.</p>
 
 <p>Builds are released frequently.  Often the differences between build
 numbers being a few bug fixes or minor feature improvements.  When using
@@ -100,11 +100,8 @@ binaries to the installation directory of your choice (specified when you run
 <li>The locations of the <tt>scan-build</tt> and <tt>scan-view</tt>
 programs.
 
-<p>Currently these are not installed using <tt>make install</tt>, and
-are located in <tt>$(SRCDIR)/tools/clang/tools/scan-build</tt> and
-<tt>$(SRCDIR)/tools/clang/tools/scan-view</tt> respectively (where
-<tt>$(SRCDIR)</tt> is the root LLVM source directory).  These locations
-are subject to change.</p></li>
+<p>These are installed via <tt>make install</tt> into the bin directory
+when clang is built.</p></li>
 
 </ul>
 </div>
diff --git a/www/analyzer/latest_checker.html.incl b/www/analyzer/latest_checker.html.incl
index 99ed3d8898ae..84d64e61a9a9 100644
--- a/www/analyzer/latest_checker.html.incl
+++ b/www/analyzer/latest_checker.html.incl
@@ -1 +1 @@
-<b><a href="downloads/checker-276.tar.bz2">checker-276.tar.bz2</a></b> (built February 19, 2014)
+<b><a href="downloads/checker-277.tar.bz2">checker-277.tar.bz2</a></b> (built October 28, 2015)
diff --git a/www/analyzer/release_notes.html b/www/analyzer/release_notes.html
index 81f9c9a52da9..be78a1933c94 100644
--- a/www/analyzer/release_notes.html
+++ b/www/analyzer/release_notes.html
@@ -15,6 +15,36 @@
 
 <h1>Release notes for <tt>checker-XXX</tt> builds</h1>
 
+<h4 id="checker_277">checker-277</h4>
+<p><b>built:</b> October 28, 2015</br>
+	<b>download:</b> <a href="downloads/checker-277.tar.bz2">checker-277.tar.bz2</a></p>
+	<p><b>highlights:</b></p>
+	<ul>
+    <li>Includes about 20 months of change to Clang itself.</li>
+    <li>New checker for C++ leaks is turned on by default.</li>
+    <li>Added various small checks and bug fixes.</li>
+    <li>Added experimental checkers for Objective-C:</li>
+    <ul>
+        <li>New localizability checks:
+        <ul>
+            <li>Checker warning about uses of non-localized <tt>NSString</tt>s passed to UI methods expecting localized strings.</li>
+            <li>Checker warning when the comment argument is missing from <tt>NSLocalizedString</tt> macros.</li>
+            <li>These can be enabled by passing the following command to <tt>scan-build</tt>:
+<br />
+  &nbsp;&nbsp;<tt>-enable-checker alpha.osx.cocoa.NonLocalizedStringChecker,alpha.osx.cocoa.EmptyLocalizationContextChecker</tt>
+</li>
+        </ul>
+        </li>
+        <li>New checks for <tt>_Nonnull</tt> type qualifiers. These can be enabled with:
+<br />
+  &nbsp;&nbsp;<tt>-enable-checker nullability.NullPassedToNonnull,nullability.NullReturnedFromNonnull</tt></li>
+        <li>New checks for misuse of Objective-C generics. These can be enabled with <tt>-enable-checker alpha.osx.cocoa.ObjCGenerics</tt>.</li>
+    </ul>
+    <li>Support for <tt>cf_returns_retained</tt> and <tt>cf_returns_not_retained</tt> attributes in out-parameters.</li>
+    <li>The analyzer now creates one state for a range switch case instead of multiple, resulting in performance improvements.</li>
+    <li>Now requires OS X 10.7 or later.
+	</ul>
+
 <h4 id="checker_276">checker-276</h4>
 <p><b>built:</b> February 19, 2014</br>
 	<b>download:</b> <a href="downloads/checker-276.tar.bz2">checker-276.tar.bz2</a></p>
diff --git a/www/analyzer/scan-build.html b/www/analyzer/scan-build.html
index 28723e64eb80..04e93232a6b3 100644
--- a/www/analyzer/scan-build.html
+++ b/www/analyzer/scan-build.html
@@ -309,11 +309,11 @@ steps they need to take (e.g., setup code signing).</p>
 
 <h3>Recommendation: use &quot;Build and Analyze&quot;</h3>
 
-<p>The absolute easiest way to analyze iPhone projects is to use the <a
-href="http://developer.apple.com/mac/library/featuredarticles/StaticAnalysis/index.html"><i>Build
-and Analyze</i> feature in Xcode 3.2</a> (which is based on the Clang Static
-Analyzer). There a user can analyze their project with the click of a button
-without most of the setup described later.</p>
+<p>The absolute easiest way to analyze iPhone projects is to use the
+<a href="https://developer.apple.com/library/ios/recipes/xcode_help-source_editor/chapters/Analyze.html#//apple_ref/doc/uid/TP40009975-CH4-SW1"><i>Analyze</i>
+feature in Xcode</a> (which is based on the Clang Static Analyzer). There a
+user can analyze their project right from a menu without most of the setup
+described later.</p>
 
 <p><a href="/xcode.html">Instructions are available</a> on this
 website on how to use open source builds of the analyzer as a replacement for
diff --git a/www/analyzer/xcode.html b/www/analyzer/xcode.html
index e01f32b55ca8..8ccae81898d5 100644
--- a/www/analyzer/xcode.html
+++ b/www/analyzer/xcode.html
@@ -22,7 +22,7 @@
 
 <p>Since Xcode 3.2, users have been able to run the Clang Static Analyzer
 <a
-href="https://developer.apple.com/library/mac/documentation/ToolsLanguages/Conceptual/Xcode4UserGuide/060-Debug_Your_App/debug_app.html#//apple_ref/doc/uid/TP40010215-CH3-SW17">directly
+href="https://developer.apple.com/library/ios/recipes/xcode_help-source_editor/chapters/Analyze.html#//apple_ref/doc/uid/TP40009975-CH4-SW1">directly
 within Xcode</a>.</p>
 
 <p>It integrates directly with the Xcode build system and
@@ -54,7 +54,7 @@ presents analysis results directly within Xcode's editor.</p>
 <p>Xcode is available as a free download from Apple on the <a
 href="https://itunes.apple.com/us/app/xcode/id497799835?mt=12">Mac
 App Store</a>, with <a 
-href="https://developer.apple.com/library/mac/documentation/ToolsLanguages/Conceptual/Xcode4UserGuide/060-Debug_Your_App/debug_app.html#//apple_ref/doc/uid/TP40010215-CH3-SW17">instructions
+href="https://developer.apple.com/library/ios/recipes/xcode_help-source_editor/chapters/Analyze.html#//apple_ref/doc/uid/TP40009975-CH4-SW1">instructions
 available</a> for using the analyzer.</p>
 
 <h2>Using open source analyzer builds with Xcode</h2>
diff --git a/www/comparison.html b/www/comparison.html
index 4bca65dc40d3..26f421d73b94 100644
--- a/www/comparison.html
+++ b/www/comparison.html
@@ -55,8 +55,8 @@
     by Clang. For instance, in C mode, GCC supports
     <a href="http://gcc.gnu.org/onlinedocs/gcc/Nested-Functions.html">nested
     functions</a> and has an
-    <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=37428">undocumented
-    extension allowing VLAs in structs</a>.
+    <a href="https://gcc.gnu.org/onlinedocs/gcc/Variable-Length.html">extension
+    allowing VLAs in structs</a>.
     </ul>
 
     <p>Pro's of clang vs GCC:</p>
@@ -95,12 +95,13 @@
         that produced it (it is not a structured format).</li>
     <li>Clang is <a href="features.html#performance">much faster and uses far
         less memory</a> than GCC.</li>
-    <li>Clang aims to provide extremely clear and concise diagnostics (error and
-        warning messages), and includes support for <a
-        href="diagnostics.html">expressive diagnostics</a>.  GCC's warnings are 
-        sometimes acceptable, but are often confusing and it does not support
-        expressive diagnostics.  Clang also preserves typedefs in diagnostics
-        consistently, showing macro expansions and many other features.</li>
+    <li>Clang has been designed from the start to provide extremely clear and
+        concise diagnostics (error and warning messages), and includes support
+        for <a href="diagnostics.html">expressive diagnostics</a>.
+        Modern versions of GCC have made significant advances in this area,
+        incorporating various Clang features such as preserving typedefs in
+        diagnostics and showing macro expansions, but GCC is still catching
+        up.</li>
     <li>GCC is licensed under the GPL license. <a href="features.html#license">
         clang uses a BSD license,</a> which allows it to be embedded in
         software that is not GPL-licensed.</li>
diff --git a/www/cxx_dr_status.html b/www/cxx_dr_status.html
index 81b893aeb568..dd506a08ad45 100644
--- a/www/cxx_dr_status.html
+++ b/www/cxx_dr_status.html
@@ -28,7 +28,7 @@
 <!--*************************************************************************-->
 <h1>C++ Defect Report Support in Clang</h1>
 <!--*************************************************************************-->
-<p>Last updated: $Date: 2015-07-15 01:57:14 +0200 (Wed, 15 Jul 2015) $</p>
+<p>Last updated: $Date: 2015-11-12 23:04:34 +0100 (Thu, 12 Nov 2015) $</p>
 
 <h2 id="cxxdr">C++ defect report implementation status</h2>
 
@@ -2483,7 +2483,7 @@ of class templates</td>
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#407">407</a></td>
     <td>C++11</td>
     <td>Named class with associated typedef: two names or one?</td>
-    <td class="none" align="center">No</td>
+    <td class="svn" align="center">SVN</td>
   </tr>
   <tr id="408">
     <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#408">408</a></td>
diff --git a/www/cxx_status.html b/www/cxx_status.html
index ec3424a25095..efcf82fd844a 100644
--- a/www/cxx_status.html
+++ b/www/cxx_status.html
@@ -25,7 +25,7 @@
 <!--*************************************************************************-->
 <h1>C++ Support in Clang</h1>
 <!--*************************************************************************-->
-<p>Last updated: $Date: 2015-05-22 03:11:10 +0200 (Fri, 22 May 2015) $</p>
+<p>Last updated: $Date: 2015-11-26 03:23:21 +0100 (Thu, 26 Nov 2015) $</p>
 
 <p>Clang fully implements all published ISO C++ standards including <a
 href="#cxx11">C++11</a>, as well as the upcoming <a
@@ -564,36 +564,65 @@ as the draft C++1z standard evolves.</p>
     <tr>
       <td>New <tt>auto</tt> rules for direct-list-initialization
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n3922.html">N3922</a></td>
-      <td class="none" align="center">No</td>
+      <td class="svn" align="center">Clang 3.8 <a href="#n3922">(7)</a></td>
     </tr>
     <!-- Urbana papers -->
     <tr>
       <td>Fold expressions</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4295.html">N4295</a></td>
-      <td class="svn" align="center">Clang 3.6</td>
+      <td class="full" align="center">Clang 3.6</td>
     </tr>
     <tr>
       <td><tt>u8</tt> character literals</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4267.html">N4267</a></td>
-      <td class="svn" align="center">Clang 3.6</td>
+      <td class="full" align="center">Clang 3.6</td>
     </tr>
     <tr>
       <td>Nested namespace definition</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4230.html">N4230</a></td>
-      <td class="svn" align="center">Clang 3.6</td>
+      <td class="full" align="center">Clang 3.6</td>
     </tr>
     <tr>
       <td>Attributes for namespaces and enumerators</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4266.html">N4266</a></td>
-      <td class="svn" align="center">Clang 3.6</td>
+      <td class="full" align="center">Clang 3.6</td>
     </tr>
     <tr>
       <td>Allow constant evaluation for all non-type template arguments</td>
       <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4268.html">N4268</a></td>
-      <td class="svn" align="center">Clang 3.6</td>
+      <td class="full" align="center">Clang 3.6</td>
+    </tr>
+    <!-- Kona papers -->
+    <tr>
+      <td>Remove deprecated <tt>register</tt> storage class</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0001r1.html">P0001R1</a></td>
+      <td class="svn" align="center">Clang 3.8</td>
+    </tr>
+    <tr>
+      <td>Remove deprecated <tt>bool</tt> increment</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0002r1.html">P0002R1</a></td>
+      <td class="svn" align="center">Clang 3.8</td>
+    </tr>
+    <tr>
+      <td>Make exception specifications part of the type system</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0012r1.html">P0012R1</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td><tt>__has_include</tt> in preprocessor conditionals</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0061.html">P0061R1</a></td>
+      <td class="full" align="center">Yes</td>
     </tr>
 </table>
 
+<p>
+<span id="n3922">(7): This is a backwards-incompatible change that is applied to
+all language versions that allow type deduction from <tt>auto</tt>
+(per the request of the C++ committee).
+In Clang 3.7, a warning is emitted for all cases that would change meaning.
+</span>
+</p>
+
 <h2 id="ts">Technical specifications and standing documents</h2>
 
 <p>ISO C++ also publishes a number of documents describing additional language
@@ -615,23 +644,42 @@ Clang version they became available:</p>
       </td>
     </tr>
     <tr>
-      <td class="svn" align="center">
+      <td class="full" align="center">
         Clang 3.6 (<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4200">N4200</a>)</a>
       </td>
     </tr>
+    <!-- FIXME: Implement latest recommendations.
     <tr>
-      <td>[DRAFT TS] Array extensions (arrays of runtime bound)</td>
-      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2013/n3820.html">N3820</a></td>
+      <td class="svn" align="center">
+        SVN (<a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0096r0">P0096R0</a>)</a>
+      </td>
+    </tr>
+    -->
+    <!-- No compiler support is known to be needed for:
+           * Concurrency TS
+           * Parallelism TS
+           * Ranges TS
+           * Networking TS
+           * File System TS
+    -->
+    <tr>
+      <td>[TS] Concepts</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/p0121r0.pdf">P0121R0</a></td>
       <td class="none" align="center">No</td>
     </tr>
     <tr>
-      <td>[DRAFT TS] Library fundamentals (invocation type traits)</td>
-      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n3908.html">N3908</a></td>
+      <td>[TS] Library Fundamentals, Version 1 (invocation type traits)</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4480.html">N4480</a></td>
       <td class="none" align="center">No</td>
     </tr>
     <tr>
-      <td>[DRAFT TS] Concepts</td>
-      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4377.pdf">N4377</a></td>
+      <td>[DRAFT TS] Library Fundamentals, Version 2 (<tt>source_location</tt>)</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4529.html">N4529</a></td>
+      <td class="none" align="center">No</td>
+    </tr>
+    <tr>
+      <td>[TS] Transactional Memory</td>
+      <td><a href="http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4514.pdf">N4514</a></td>
       <td class="none" align="center">No</td>
     </tr>
 </table>
diff --git a/www/get_started.html b/www/get_started.html
index 254745b82b5d..541c45a9146f 100644
--- a/www/get_started.html
+++ b/www/get_started.html
@@ -76,6 +76,14 @@ follows:</p>
     <li><tt>cd ../..</tt></li>
   </ul>
   </li>
+  <li>Checkout libcxx: (only required to build and run Compiler-RT tests on OS X, optional otherwise)
+  <ul>
+    <li><tt>cd llvm/projects</tt></li>
+    <li><tt>svn co http://llvm.org/svn/llvm-project/libcxx/trunk
+        libcxx</tt></li>
+    <li><tt>cd ../..</tt></li>
+  </ul>
+  </li>
   <li>Build LLVM and Clang:
   <ul>
     <li><tt>mkdir build</tt> (in-tree build is not supported)</li>
@@ -101,7 +109,7 @@ follows:</p>
       the best version of libstdc++ headers available and use them - it will
       look both for system installations of libstdc++ as well as installations
       adjacent to Clang itself. If your configuration fits neither of these
-      scenarios, you can use the <tt>--with-gcc-toolchain</tt> configure option
+      scenarios, you can use the <tt>-DGCC_INSTALL_PREFIX</tt> cmake option
       to tell Clang where the gcc containing the desired libstdc++ is installed.
   </li>
   <li>Try it out (assuming you add llvm/Debug+Asserts/bin to your path):
diff --git a/www/related.html b/www/related.html
index 62dd2632cd54..560787eb2236 100644
--- a/www/related.html
+++ b/www/related.html
@@ -82,6 +82,17 @@
         </p>
       </dd>
 
+      <dt>DXR</dt>
+      <dd>
+        <p>
+          <b>Site:</b>
+        <a href="https://github.com/mozilla/dxr#dxr">https://github.com/mozilla/dxr</a>
+        </p>
+        <p>
+        DXR is a code search and navigation tool aimed at making sense of large projects like Firefox. It supports full-text and regex searches as well as structural queries like "Find all the callers of this function."
+        </p>
+      </dd>
+
     </dl>
   </div>
 </body>

From b9695a57f6beb43a0824d2ddccb677a217ca5c2a Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 14:05:33 +0000
Subject: [PATCH 03/10] Drop patches which are certain to be obsolete now.

---
 .../patches/patch-03-enable-armv6-clrex.diff  |  20 --
 .../patch-04-clang-add-mips-triples.diff      |  33 ---
 ...-clang-r244063-missing-atomic-libcall.diff | 229 ------------------
 .../patch-06-llvm-r248439-fdiv-hoisting.diff  |  83 -------
 .../patches/patch-08-clang-cc1as-dwarf2.diff  |  21 --
 5 files changed, 386 deletions(-)
 delete mode 100644 contrib/llvm/patches/patch-03-enable-armv6-clrex.diff
 delete mode 100644 contrib/llvm/patches/patch-04-clang-add-mips-triples.diff
 delete mode 100644 contrib/llvm/patches/patch-05-clang-r244063-missing-atomic-libcall.diff
 delete mode 100644 contrib/llvm/patches/patch-06-llvm-r248439-fdiv-hoisting.diff
 delete mode 100644 contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff

diff --git a/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff b/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff
deleted file mode 100644
index 574e3bd50a9c..000000000000
--- a/contrib/llvm/patches/patch-03-enable-armv6-clrex.diff
+++ /dev/null
@@ -1,20 +0,0 @@
-For now, enable the clrex instruction for armv6, until upstream
-implements this properly.
-
-Submitted by:	rdivacky
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/275362
-
-Index: lib/Target/ARM/ARMInstrInfo.td
-===================================================================
---- lib/Target/ARM/ARMInstrInfo.td
-+++ lib/Target/ARM/ARMInstrInfo.td
-@@ -4640,7 +4640,7 @@ def STLEXD : AIstlex<0b01, (outs GPR:$Rd),
- 
- def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
-                 [(int_arm_clrex)]>,
--            Requires<[IsARM, HasV7]>  {
-+            Requires<[IsARM, HasV6]>  {
-   let Inst{31-0} = 0b11110101011111111111000000011111;
- }
- 
diff --git a/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff b/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff
deleted file mode 100644
index 2a66949a1503..000000000000
--- a/contrib/llvm/patches/patch-04-clang-add-mips-triples.diff
+++ /dev/null
@@ -1,33 +0,0 @@
-Allow clang to be built for mips/mips64 backend types by adding our mips
-triple ids
-
-This only allows testing and does not change the defaults for mips/mips64.
-They still build/use gcc by default.
-
-Differential Revision:	https://reviews.freebsd.org/D1190
-Reviewed by:	dim
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/277423
-
-Index: tools/clang/lib/Driver/Tools.cpp
-===================================================================
---- tools/clang/lib/Driver/Tools.cpp
-+++ tools/clang/lib/Driver/Tools.cpp
-@@ -6652,6 +6652,17 @@ void freebsd::Link::ConstructJob(Compilation &C, c
-     CmdArgs.push_back("elf32ppc_fbsd");
-   }
- 
-+  if (Arg *A = Args.getLastArg(options::OPT_G)) {
-+    if (ToolChain.getArch() == llvm::Triple::mips ||
-+      ToolChain.getArch() == llvm::Triple::mipsel ||
-+      ToolChain.getArch() == llvm::Triple::mips64 ||
-+      ToolChain.getArch() == llvm::Triple::mips64el) {
-+      StringRef v = A->getValue();
-+      CmdArgs.push_back(Args.MakeArgString("-G" + v));
-+      A->claim();
-+    }
-+  }
-+
-   if (Output.isFilename()) {
-     CmdArgs.push_back("-o");
-     CmdArgs.push_back(Output.getFilename());
diff --git a/contrib/llvm/patches/patch-05-clang-r244063-missing-atomic-libcall.diff b/contrib/llvm/patches/patch-05-clang-r244063-missing-atomic-libcall.diff
deleted file mode 100644
index ec4520f6a9be..000000000000
--- a/contrib/llvm/patches/patch-05-clang-r244063-missing-atomic-libcall.diff
+++ /dev/null
@@ -1,229 +0,0 @@
-Pull in r244063 from upstream clang trunk (by James Y Knight):
-
-  Add missing atomic libcall support.
-
-  Support for emitting libcalls for __atomic_fetch_nand and
-  __atomic_{add,sub,and,or,xor,nand}_fetch was missing; add it, and some
-  test cases.
-
-  Differential Revision: http://reviews.llvm.org/D10847
-
-This fixes "cannot compile this atomic library call yet" errors when
-compiling code which calls the above builtins, on arm < v6.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/288127
-
-Index: tools/clang/docs/LanguageExtensions.rst
-===================================================================
---- tools/clang/docs/LanguageExtensions.rst
-+++ tools/clang/docs/LanguageExtensions.rst
-@@ -1715,6 +1715,9 @@ The macros ``__ATOMIC_RELAXED``, ``__ATOMIC_CONSUM
- provided, with values corresponding to the enumerators of C11's
- ``memory_order`` enumeration.
- 
-+(Note that Clang additionally provides GCC-compatible ``__atomic_*``
-+builtins)
-+
- Low-level ARM exclusive memory builtins
- ---------------------------------------
- 
-Index: tools/clang/lib/CodeGen/CGAtomic.cpp
-===================================================================
---- tools/clang/lib/CodeGen/CGAtomic.cpp
-+++ tools/clang/lib/CodeGen/CGAtomic.cpp
-@@ -699,7 +699,7 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr
- 
-   switch (E->getOp()) {
-   case AtomicExpr::AO__c11_atomic_init:
--    llvm_unreachable("Already handled!");
-+    llvm_unreachable("Already handled above with EmitAtomicInit!");
- 
-   case AtomicExpr::AO__c11_atomic_load:
-   case AtomicExpr::AO__atomic_load_n:
-@@ -785,6 +785,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr
-   if (UseLibcall) {
-     bool UseOptimizedLibcall = false;
-     switch (E->getOp()) {
-+    case AtomicExpr::AO__c11_atomic_init:
-+      llvm_unreachable("Already handled above with EmitAtomicInit!");
-+
-     case AtomicExpr::AO__c11_atomic_fetch_add:
-     case AtomicExpr::AO__atomic_fetch_add:
-     case AtomicExpr::AO__c11_atomic_fetch_and:
-@@ -791,14 +794,34 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr
-     case AtomicExpr::AO__atomic_fetch_and:
-     case AtomicExpr::AO__c11_atomic_fetch_or:
-     case AtomicExpr::AO__atomic_fetch_or:
-+    case AtomicExpr::AO__atomic_fetch_nand:
-     case AtomicExpr::AO__c11_atomic_fetch_sub:
-     case AtomicExpr::AO__atomic_fetch_sub:
-     case AtomicExpr::AO__c11_atomic_fetch_xor:
-     case AtomicExpr::AO__atomic_fetch_xor:
-+    case AtomicExpr::AO__atomic_add_fetch:
-+    case AtomicExpr::AO__atomic_and_fetch:
-+    case AtomicExpr::AO__atomic_nand_fetch:
-+    case AtomicExpr::AO__atomic_or_fetch:
-+    case AtomicExpr::AO__atomic_sub_fetch:
-+    case AtomicExpr::AO__atomic_xor_fetch:
-       // For these, only library calls for certain sizes exist.
-       UseOptimizedLibcall = true;
-       break;
--    default:
-+
-+    case AtomicExpr::AO__c11_atomic_load:
-+    case AtomicExpr::AO__c11_atomic_store:
-+    case AtomicExpr::AO__c11_atomic_exchange:
-+    case AtomicExpr::AO__c11_atomic_compare_exchange_weak:
-+    case AtomicExpr::AO__c11_atomic_compare_exchange_strong:
-+    case AtomicExpr::AO__atomic_load_n:
-+    case AtomicExpr::AO__atomic_load:
-+    case AtomicExpr::AO__atomic_store_n:
-+    case AtomicExpr::AO__atomic_store:
-+    case AtomicExpr::AO__atomic_exchange_n:
-+    case AtomicExpr::AO__atomic_exchange:
-+    case AtomicExpr::AO__atomic_compare_exchange_n:
-+    case AtomicExpr::AO__atomic_compare_exchange:
-       // Only use optimized library calls for sizes for which they exist.
-       if (Size == 1 || Size == 2 || Size == 4 || Size == 8)
-         UseOptimizedLibcall = true;
-@@ -820,6 +843,9 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr
-     QualType RetTy;
-     bool HaveRetTy = false;
-     switch (E->getOp()) {
-+    case AtomicExpr::AO__c11_atomic_init:
-+      llvm_unreachable("Already handled!");
-+
-     // There is only one libcall for compare an exchange, because there is no
-     // optimisation benefit possible from a libcall version of a weak compare
-     // and exchange.
-@@ -903,7 +929,49 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr
-       AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-                         E->getExprLoc(), sizeChars);
-       break;
--    default: return EmitUnsupportedRValue(E, "atomic library call");
-+    // T __atomic_fetch_nand_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_fetch_nand:
-+      LibCallName = "__atomic_fetch_nand";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+
-+    // T __atomic_add_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_add_fetch:
-+      LibCallName = "__atomic_add_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+    // T __atomic_and_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_and_fetch:
-+      LibCallName = "__atomic_and_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+    // T __atomic_or_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_or_fetch:
-+      LibCallName = "__atomic_or_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+    // T __atomic_sub_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_sub_fetch:
-+      LibCallName = "__atomic_sub_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+    // T __atomic_xor_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_xor_fetch:
-+      LibCallName = "__atomic_xor_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-+    // T __atomic_nand_fetch_N(T *mem, T val, int order)
-+    case AtomicExpr::AO__atomic_nand_fetch:
-+      LibCallName = "__atomic_nand_fetch";
-+      AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy,
-+                        E->getExprLoc(), sizeChars);
-+      break;
-     }
- 
-     // Optimized functions have the size in their name.
-Index: tools/clang/test/CodeGen/atomic-ops-libcall.c
-===================================================================
---- tools/clang/test/CodeGen/atomic-ops-libcall.c
-+++ tools/clang/test/CodeGen/atomic-ops-libcall.c
-@@ -35,3 +35,75 @@ int *fp2a(int **p) {
-   // Note, the GNU builtins do not multiply by sizeof(T)!
-   return __atomic_fetch_sub(p, 4, memory_order_relaxed);
- }
-+
-+int test_atomic_fetch_add(int *p) {
-+  // CHECK: test_atomic_fetch_add
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_add(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_fetch_sub(int *p) {
-+  // CHECK: test_atomic_fetch_sub
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_sub(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_fetch_and(int *p) {
-+  // CHECK: test_atomic_fetch_and
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_and_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_and(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_fetch_or(int *p) {
-+  // CHECK: test_atomic_fetch_or
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_or_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_or(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_fetch_xor(int *p) {
-+  // CHECK: test_atomic_fetch_xor
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_xor_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_xor(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_fetch_nand(int *p) {
-+  // CHECK: test_atomic_fetch_nand
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_nand_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_fetch_nand(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_add_fetch(int *p) {
-+  // CHECK: test_atomic_add_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_add_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_add_fetch(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_sub_fetch(int *p) {
-+  // CHECK: test_atomic_sub_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_sub_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_sub_fetch(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_and_fetch(int *p) {
-+  // CHECK: test_atomic_and_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_and_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_and_fetch(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_or_fetch(int *p) {
-+  // CHECK: test_atomic_or_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_or_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_or_fetch(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_xor_fetch(int *p) {
-+  // CHECK: test_atomic_xor_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_xor_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_xor_fetch(p, 55, memory_order_seq_cst);
-+}
-+
-+int test_atomic_nand_fetch(int *p) {
-+  // CHECK: test_atomic_nand_fetch
-+  // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_nand_fetch_4(i8* {{%[0-9]+}}, i32 55, i32 5)
-+  return __atomic_nand_fetch(p, 55, memory_order_seq_cst);
-+}
diff --git a/contrib/llvm/patches/patch-06-llvm-r248439-fdiv-hoisting.diff b/contrib/llvm/patches/patch-06-llvm-r248439-fdiv-hoisting.diff
deleted file mode 100644
index d066b250c008..000000000000
--- a/contrib/llvm/patches/patch-06-llvm-r248439-fdiv-hoisting.diff
+++ /dev/null
@@ -1,83 +0,0 @@
-Pull in r248439 from upstream llvm trunk (by Sanjay Patel):
-
-  set div/rem default values to 'expensive' in TargetTransformInfo's
-  cost model
-
-  ...because that's what the cost model was intended to do.
-
-  As discussed in D12882, this fix has a temporary unintended
-  consequence for SimplifyCFG: it causes us to not speculate an fdiv.
-  However, two wrongs make PR24818 right, and two wrongs make PR24343
-  act right even though it's really still wrong.
-
-  I intend to correct SimplifyCFG and add to CodeGenPrepare to account
-  for this cost model change and preserve the righteousness for the bug
-  report cases.
-
-  https://llvm.org/bugs/show_bug.cgi?id=24818
-  https://llvm.org/bugs/show_bug.cgi?id=24343
-
-  Differential Revision: http://reviews.llvm.org/D12882
-
-This fixes the too-eager fdiv hoisting in pow(), which could lead to
-unexpected floating point exceptions.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/288195
-
-Index: include/llvm/Analysis/TargetTransformInfoImpl.h
-===================================================================
---- include/llvm/Analysis/TargetTransformInfoImpl.h
-+++ include/llvm/Analysis/TargetTransformInfoImpl.h
-@@ -60,6 +60,14 @@ class TargetTransformInfoImplBase {
-       // Otherwise, the default basic cost is used.
-       return TTI::TCC_Basic;
- 
-+    case Instruction::FDiv:
-+    case Instruction::FRem:
-+    case Instruction::SDiv:
-+    case Instruction::SRem:
-+    case Instruction::UDiv:
-+    case Instruction::URem:
-+      return TTI::TCC_Expensive;
-+
-     case Instruction::IntToPtr: {
-       // An inttoptr cast is free so long as the input is a legal integer type
-       // which doesn't contain values outside the range of a pointer.
-Index: test/Transforms/SimplifyCFG/speculate-math.ll
-===================================================================
---- test/Transforms/SimplifyCFG/speculate-math.ll
-+++ test/Transforms/SimplifyCFG/speculate-math.ll
-@@ -7,6 +7,33 @@ declare float @llvm.fabs.f32(float) nounwind reado
- declare float @llvm.minnum.f32(float, float) nounwind readonly
- declare float @llvm.maxnum.f32(float, float) nounwind readonly
- 
-+; FIXME: This is intended to be a temporary test. As discussed in 
-+; D12882, we actually do want to speculate even expensive operations
-+; in SimplifyCFG because it can expose more optimizations for other
-+; passes. Therefore, we either need to adjust SimplifyCFG's 
-+; calculations that use the TTI cost model or use a different cost
-+; model for deciding which ops should be speculated in SimplifyCFG. 
-+; We should also be using the TTI cost model later - for example in
-+; CodeGenPrepare - to potentially undo this speculation.
-+
-+; Do not speculate fdiv by default because it is generally expensive. 
-+
-+; CHECK-LABEL: @fdiv_test(
-+; CHECK-NOT: select
-+define double @fdiv_test(double %a, double %b) {
-+entry:
-+  %cmp = fcmp ogt double %a, 0.0
-+  br i1 %cmp, label %cond.true, label %cond.end
-+
-+cond.true:
-+  %div = fdiv double %b, %a
-+  br label %cond.end
-+
-+cond.end:
-+  %cond = phi double [ %div, %cond.true ], [ 0.0, %entry ]
-+  ret double %cond
-+}
-+
- ; CHECK-LABEL: @sqrt_test(
- ; CHECK: select
- define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
diff --git a/contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff b/contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff
deleted file mode 100644
index eb00168ea01b..000000000000
--- a/contrib/llvm/patches/patch-08-clang-cc1as-dwarf2.diff
+++ /dev/null
@@ -1,21 +0,0 @@
-In assembler mode, clang defaulted to DWARF3, if only -g was specified.
-Change this to DWARF2, in the simplest way possible.  (Upstream, this
-was fixed in clang trunk r250173, but this was done along with a lot of
-shuffling around of debug option handling, so it cannot be applied
-as-is.)
-
-Introduced here: https://svnweb.freebsd.org/changeset/base/291701
-
-Index: tools/clang/tools/driver/cc1as_main.cpp
-===================================================================
---- tools/clang/tools/driver/cc1as_main.cpp
-+++ tools/clang/tools/driver/cc1as_main.cpp
-@@ -141,7 +141,7 @@ struct AssemblerInvocation {
-     RelaxAll = 0;
-     NoExecStack = 0;
-     FatalWarnings = 0;
--    DwarfVersion = 3;
-+    DwarfVersion = 2;
-   }
- 
-   static bool CreateFromArgs(AssemblerInvocation &Res,

From 9e2dc6672349b6dc03bc2916166abaf9e2197619 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 14:06:01 +0000
Subject: [PATCH 04/10] Using trunk for now, instead of 3.7.1.

---
 contrib/llvm/patches/README.TXT | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/contrib/llvm/patches/README.TXT b/contrib/llvm/patches/README.TXT
index 96112b69cffd..d3898f59af6e 100644
--- a/contrib/llvm/patches/README.TXT
+++ b/contrib/llvm/patches/README.TXT
@@ -1,11 +1,11 @@
 This is a set of individual patches, which contain all the customizations to
 llvm/clang currently in the FreeBSD base system.  These can be applied in
-alphabetical order to a pristine llvm/clang 3.7.1 source tree, for example by
+alphabetical order to a pristine llvm/clang trunk source tree, for example by
 doing:
 
-svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-3.7.1 
-svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-3.7.1/tools/clang
-cd llvm-3.7.1
+svn co https://llvm.org/svn/llvm-project/llvm/trunk llvm-trunk 
+svn co https://llvm.org/svn/llvm-project/cfe/trunk llvm-trunk/tools/clang
+cd llvm-trunk
 for p in /usr/src/contrib/llvm/patches/patch-*.diff; do
 	patch -p0 -f -F0 -E -i $p -s || break
 done

From a27deaebb21958a9f56c5373bafef662a5d8759a Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 16:42:09 +0000
Subject: [PATCH 05/10] Drop the clang patch which added a custom vendor suffix
 to the version printed with -v.  We have historically put a date stamp there
 (roughly corresponding to the date of import), but this has never been used
 for anything, and the patch has also never been upstreamed, so let's get rid
 of it now.

---
 .../patches/patch-01-clang-vendor-suffix.diff | 22 -------------------
 .../llvm/tools/clang/lib/Basic/Version.cpp    |  4 +---
 2 files changed, 1 insertion(+), 25 deletions(-)
 delete mode 100644 contrib/llvm/patches/patch-01-clang-vendor-suffix.diff

diff --git a/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff b/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff
deleted file mode 100644
index f94b9f33b002..000000000000
--- a/contrib/llvm/patches/patch-01-clang-vendor-suffix.diff
+++ /dev/null
@@ -1,22 +0,0 @@
-This patch adds a FreeBSD-specific suffix to clang's version string.  This is
-usually of the form "(yyyyddmm)", representing the date when the compiler was
-last updated.
-
-Introduced here: http://svnweb.freebsd.org/changeset/base/209107
-
-Index: tools/clang/lib/Basic/Version.cpp
-===================================================================
---- tools/clang/lib/Basic/Version.cpp
-+++ tools/clang/lib/Basic/Version.cpp
-@@ -128,8 +128,10 @@ std::string getClangToolFullVersion(StringRef Tool
-   OS << ToolName << " version " CLANG_VERSION_STRING " "
-      << getClangFullRepositoryVersion();
- 
-+#ifdef CLANG_VENDOR_SUFFIX
-+  OS << CLANG_VENDOR_SUFFIX;
-+#elif defined(CLANG_VENDOR)
-   // If vendor supplied, include the base LLVM version as well.
--#ifdef CLANG_VENDOR
-   OS << " (based on " << BACKEND_PACKAGE_STRING << ")";
- #endif
- 
diff --git a/contrib/llvm/tools/clang/lib/Basic/Version.cpp b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
index 6accb044603f..a1a67c2bc144 100644
--- a/contrib/llvm/tools/clang/lib/Basic/Version.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Version.cpp
@@ -128,10 +128,8 @@ std::string getClangToolFullVersion(StringRef ToolName) {
   OS << ToolName << " version " CLANG_VERSION_STRING " "
      << getClangFullRepositoryVersion();
 
-#ifdef CLANG_VENDOR_SUFFIX
-  OS << CLANG_VENDOR_SUFFIX;
-#elif defined(CLANG_VENDOR)
   // If vendor supplied, include the base LLVM version as well.
+#ifdef CLANG_VENDOR
   OS << " (based on " << BACKEND_PACKAGE_STRING << ")";
 #endif
 

From 250e909a40af6b3c213578582e4aacc5cdf984e9 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 18:23:01 +0000
Subject: [PATCH 06/10] Update clang's Version.inc file, and regenerate various
 generated configuration headers (these used to be generated by autoconf, but
 upstream has deprecated autoconf in favor of CMake).

---
 lib/clang/include/clang/Basic/Version.inc   |   9 +-
 lib/clang/include/clang/Config/config.h     |   9 +-
 lib/clang/include/llvm/Config/config.h      | 253 +++++++++++---------
 lib/clang/include/llvm/Config/llvm-config.h |  20 +-
 lib/clang/include/llvm/Support/DataTypes.h  |  12 +-
 5 files changed, 170 insertions(+), 133 deletions(-)

diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc
index 5c5b264f1643..350b12e5217e 100644
--- a/lib/clang/include/clang/Basic/Version.inc
+++ b/lib/clang/include/clang/Basic/Version.inc
@@ -1,11 +1,10 @@
 /* $FreeBSD$ */
 
-#define	CLANG_VERSION			3.7.1
+#define	CLANG_VERSION			3.8.0
 #define	CLANG_VERSION_MAJOR		3
-#define	CLANG_VERSION_MINOR		7
-#define	CLANG_VERSION_PATCHLEVEL	1
+#define	CLANG_VERSION_MINOR		8
+#define	CLANG_VERSION_PATCHLEVEL	0
 
 #define	CLANG_VENDOR			"FreeBSD "
-#define	CLANG_VENDOR_SUFFIX		" 20151225"
 
-#define	SVN_REVISION			"255217"
+#define	SVN_REVISION			"256633"
diff --git a/lib/clang/include/clang/Config/config.h b/lib/clang/include/clang/Config/config.h
index 892bf1b190cc..61a4ce80d9bc 100644
--- a/lib/clang/include/clang/Config/config.h
+++ b/lib/clang/include/clang/Config/config.h
@@ -1,11 +1,10 @@
 /* $FreeBSD$ */
-/* tools/clang/include/clang/Config/config.h.  Generated from config.h.in by configure.  */
 /* This generated file is for internal use. Do not include it from headers. */
 
-#ifdef CONFIG_H
+#ifdef CLANG_CONFIG_H
 #error config.h can only be included once
 #else
-#define CONFIG_H
+#define CLANG_CONFIG_H
 
 /* Bug report URL. */
 #define BUG_REPORT_URL "https://bugs.freebsd.org/submit/"
@@ -31,10 +30,8 @@
 /* Define if we have libxml2 */
 /* #undef CLANG_HAVE_LIBXML */
 
-#define PACKAGE_STRING "LLVM 3.7.1"
-
 /* The LLVM product name and version */
-#define BACKEND_PACKAGE_STRING PACKAGE_STRING
+#define BACKEND_PACKAGE_STRING "LLVM 3.8.0svn"
 
 /* Linker version detected at compile time. */
 /* #undef HOST_LINK_VERSION */
diff --git a/lib/clang/include/llvm/Config/config.h b/lib/clang/include/llvm/Config/config.h
index 8342fcb7290a..53222f3f9bce 100644
--- a/lib/clang/include/llvm/Config/config.h
+++ b/lib/clang/include/llvm/Config/config.h
@@ -1,6 +1,5 @@
 /* $FreeBSD$ */
-/* include/llvm/Config/config.h.  Generated from config.h.in by configure.  */
-/* include/llvm/Config/config.h.in.  Generated from autoconf/configure.ac by autoheader.  */
+/* include/llvm/Config/config.h.cmake corresponding to config.h.in. */
 
 #ifndef CONFIG_H
 #define CONFIG_H
@@ -14,79 +13,67 @@
 /* Bug report URL. */
 #define BUG_REPORT_URL "https://bugs.freebsd.org/submit/"
 
-/* Default OpenMP runtime used by -fopenmp. */
-#define CLANG_DEFAULT_OPENMP_RUNTIME "libomp"
-
-/* Define if we have libxml2 */
-/* #undef CLANG_HAVE_LIBXML */
-
-/* Multilib suffix for libdir. */
-#define CLANG_LIBDIR_SUFFIX ""
-
-/* Relative directory for resource files */
-#define CLANG_RESOURCE_DIR ""
-
-/* Directories clang will search for headers */
-#define C_INCLUDE_DIRS ""
-
-/* Default <path> to all compiler invocations for --sysroot=<path>. */
-/* #undef DEFAULT_SYSROOT */
-
 /* Define if you want backtraces on crash */
-#define ENABLE_BACKTRACES 1
+#define ENABLE_BACKTRACES
 
-/* Define to enable crash handling overrides */
-#define ENABLE_CRASH_OVERRIDES 1
+/* Define to enable crash overrides */
+#define ENABLE_CRASH_OVERRIDES
+
+/* Define to disable C++ atexit */
+#define DISABLE_LLVM_DYLIB_ATEXIT
 
 /* Define if position independent code is enabled */
-#define ENABLE_PIC 0
+#define ENABLE_PIC
 
 /* Define if timestamp information (e.g., __DATE__) is allowed */
-#define ENABLE_TIMESTAMPS 0
+/* #undef ENABLE_TIMESTAMPS */
 
-/* Directory where gcc is installed. */
-#define GCC_INSTALL_PREFIX ""
+/* Define to 1 if you have the `arc4random' function. */
+#define HAVE_DECL_ARC4RANDOM 1
 
 /* Define to 1 if you have the `backtrace' function. */
 /* #undef HAVE_BACKTRACE */
 
-/* Define to 1 if you have the <CrashReporterClient.h> header file. */
-/* #undef HAVE_CRASHREPORTERCLIENT_H */
+/* Define to 1 if you have the `bcopy' function. */
+#undef HAVE_BCOPY
 
-/* can use __crashreporter_info__ */
-#define HAVE_CRASHREPORTER_INFO 0
+/* Define to 1 if you have the `closedir' function. */
+#define HAVE_CLOSEDIR 1
 
 /* Define to 1 if you have the <cxxabi.h> header file. */
 #define HAVE_CXXABI_H 1
 
-/* Define to 1 if you have the declaration of `arc4random', and to 0 if you
-   don't. */
-#define HAVE_DECL_ARC4RANDOM 1
+/* Define to 1 if you have the <CrashReporterClient.h> header file. */
+#undef HAVE_CRASHREPORTERCLIENT_H
 
-/* Define to 1 if you have the declaration of `FE_ALL_EXCEPT', and to 0 if you
-   don't. */
-#define HAVE_DECL_FE_ALL_EXCEPT 1
-
-/* Define to 1 if you have the declaration of `FE_INEXACT', and to 0 if you
-   don't. */
-#define HAVE_DECL_FE_INEXACT 1
+/* can use __crashreporter_info__ */
+#undef HAVE_CRASHREPORTER_INFO
 
 /* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
    don't. */
 #define HAVE_DECL_STRERROR_S 0
 
+/* Define to 1 if you have the DIA SDK installed, and to 0 if you don't. */
+/* #undef HAVE_DIA_SDK */
+
 /* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
    */
 #define HAVE_DIRENT_H 1
 
+/* Define if you have the GNU dld library. */
+#undef HAVE_DLD
+
+/* Define to 1 if you have the `dlerror' function. */
+#define HAVE_DLERROR 1
+
 /* Define to 1 if you have the <dlfcn.h> header file. */
 #define HAVE_DLFCN_H 1
 
 /* Define if dlopen() is available on this platform. */
 #define HAVE_DLOPEN 1
 
-/* Define if the dot program is available */
-/* #undef HAVE_DOT */
+/* Define if you have the _dyld_func_lookup function. */
+#undef HAVE_DYLD
 
 /* Define to 1 if you have the <errno.h> header file. */
 #define HAVE_ERRNO_H 1
@@ -111,14 +98,14 @@
 /* Define to 1 if you have the <ffi.h> header file. */
 /* #undef HAVE_FFI_H */
 
-/* Define to 1 if you have the `futimens' function. */
+/* Define to 1 if you have the `futimes' function. */
+#define HAVE_FUTIMES 1
+
+/* Define to 1 if you have the `futimens' function */
 #if __FreeBSD_version >= 1100056
 #define HAVE_FUTIMENS 1
 #endif
 
-/* Define to 1 if you have the `futimes' function. */
-#define HAVE_FUTIMES 1
-
 /* Define to 1 if you have the `getcwd' function. */
 #define HAVE_GETCWD 1
 
@@ -143,14 +130,14 @@
 /* Define to 1 if you have the `isatty' function. */
 #define HAVE_ISATTY 1
 
-/* Define if libedit is available on this platform. */
-#define HAVE_LIBEDIT 1
+/* Define if you have the libdl library or equivalent. */
+/* #undef HAVE_LIBDL */
 
 /* Define to 1 if you have the `m' library (-lm). */
-#define HAVE_LIBM 1
+#undef HAVE_LIBM
 
 /* Define to 1 if you have the `ole32' library (-lole32). */
-/* #undef HAVE_LIBOLE32 */
+#undef HAVE_LIBOLE32
 
 /* Define to 1 if you have the `psapi' library (-lpsapi). */
 /* #undef HAVE_LIBPSAPI */
@@ -161,25 +148,34 @@
 /* Define to 1 if you have the `shell32' library (-lshell32). */
 /* #undef HAVE_LIBSHELL32 */
 
-/* Define to 1 if you have the `z' library (-lz). */
+/* Define to 1 if you have the 'z' library (-lz). */
 #define HAVE_LIBZ 1
 
-/* Define if you can use -rdynamic. */
-#define HAVE_LINK_EXPORT_DYNAMIC 1
+/* Define to 1 if you have the 'edit' library (-ledit). */
+#define HAVE_LIBEDIT 1
+
+/* Define to 1 if you have the <limits.h> header file. */
+#define HAVE_LIMITS_H 1
 
 /* Define to 1 if you have the <link.h> header file. */
 #define HAVE_LINK_H 1
 
+/* Define if you can use -rdynamic. */
+#define HAVE_LINK_EXPORT_DYNAMIC 1
+
 /* Define if you can use -Wl,-R. to pass -R. to the linker, in order to add
    the current directory to the dynamic linker search path. */
-#define HAVE_LINK_R 1
+#undef HAVE_LINK_R
 
 /* Define to 1 if you have the `longjmp' function. */
-#define HAVE_LONGJMP 1
+/* #undef HAVE_LONGJMP */
 
 /* Define to 1 if you have the <mach/mach.h> header file. */
 /* #undef HAVE_MACH_MACH_H */
 
+/* Define to 1 if you have the <mach-o/dyld.h> header file. */
+/* #undef HAVE_MACH_O_DYLD_H */
+
 /* Define if mallinfo() is available on this platform. */
 /* #undef HAVE_MALLINFO */
 
@@ -192,8 +188,8 @@
 /* Define to 1 if you have the `malloc_zone_statistics' function. */
 /* #undef HAVE_MALLOC_ZONE_STATISTICS */
 
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
+/* Define to 1 if you have the `mallctl` function. */
+#define HAVE_MALLCTL 1
 
 /* Define to 1 if you have the `mkdtemp' function. */
 #define HAVE_MKDTEMP 1
@@ -205,26 +201,32 @@
 #define HAVE_MKTEMP 1
 
 /* Define to 1 if you have a working `mmap' system call. */
-#define HAVE_MMAP 1
+#undef HAVE_MMAP
 
 /* Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if
    it uses MAP_ANON */
-/* #undef HAVE_MMAP_ANONYMOUS */
+#undef HAVE_MMAP_ANONYMOUS
 
 /* Define if mmap() can map files into memory */
-#define HAVE_MMAP_FILE 
+#undef HAVE_MMAP_FILE
 
 /* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
 /* #undef HAVE_NDIR_H */
 
+/* Define to 1 if you have the `opendir' function. */
+#define HAVE_OPENDIR 1
+
 /* Define to 1 if you have the `posix_spawn' function. */
-/* #undef HAVE_POSIX_SPAWN */
+#define HAVE_POSIX_SPAWN 1
 
 /* Define to 1 if you have the `pread' function. */
 #define HAVE_PREAD 1
 
+/* Define if libtool can extract symbol lists from object files. */
+#undef HAVE_PRELOADED_SYMBOLS
+
 /* Define to have the %a format string */
-#define HAVE_PRINTF_A 1
+#undef HAVE_PRINTF_A
 
 /* Have pthread_getspecific */
 #define HAVE_PTHREAD_GETSPECIFIC 1
@@ -241,6 +243,9 @@
 /* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
 #define HAVE_RAND48 1
 
+/* Define to 1 if you have the `readdir' function. */
+#define HAVE_READDIR 1
+
 /* Define to 1 if you have the `realpath' function. */
 #define HAVE_REALPATH 1
 
@@ -251,28 +256,34 @@
 #define HAVE_SETENV 1
 
 /* Define to 1 if you have the `setjmp' function. */
-#define HAVE_SETJMP 1
-
-/* Define to 1 if you have the <setjmp.h> header file. */
-#define HAVE_SETJMP_H 1
+/* #undef HAVE_SETJMP */
 
 /* Define to 1 if you have the `setrlimit' function. */
 #define HAVE_SETRLIMIT 1
 
+/* Define if you have the shl_load function. */
+#undef HAVE_SHL_LOAD
+
 /* Define to 1 if you have the `siglongjmp' function. */
-#define HAVE_SIGLONGJMP 1
+/* #undef HAVE_SIGLONGJMP */
 
 /* Define to 1 if you have the <signal.h> header file. */
 #define HAVE_SIGNAL_H 1
 
 /* Define to 1 if you have the `sigsetjmp' function. */
-#define HAVE_SIGSETJMP 1
+/* #undef HAVE_SIGSETJMP */
 
 /* Define to 1 if you have the <stdint.h> header file. */
 #define HAVE_STDINT_H 1
 
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
+/* Set to 1 if the std::isinf function is found in <cmath> */
+#undef HAVE_STD_ISINF_IN_CMATH
+
+/* Set to 1 if the std::isnan function is found in <cmath> */
+#undef HAVE_STD_ISNAN_IN_CMATH
+
+/* Define to 1 if you have the `strdup' function. */
+/* #undef HAVE_STRDUP */
 
 /* Define to 1 if you have the `strerror' function. */
 #define HAVE_STRERROR 1
@@ -280,12 +291,6 @@
 /* Define to 1 if you have the `strerror_r' function. */
 #define HAVE_STRERROR_R 1
 
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
 /* Define to 1 if you have the `strtoll' function. */
 #define HAVE_STRTOLL 1
 
@@ -293,17 +298,17 @@
 #define HAVE_STRTOQ 1
 
 /* Define to 1 if you have the `sysconf' function. */
-#define HAVE_SYSCONF 1
+#undef HAVE_SYSCONF
 
 /* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
    */
-/* #undef HAVE_SYS_DIR_H */
+#define HAVE_SYS_DIR_H 1
 
 /* Define to 1 if you have the <sys/ioctl.h> header file. */
 #define HAVE_SYS_IOCTL_H 1
 
 /* Define to 1 if you have the <sys/mman.h> header file. */
-#define HAVE_SYS_MMAN_H 1
+#define HAVE_SYS_MMAN_H 
 
 /* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
    */
@@ -322,14 +327,11 @@
 #define HAVE_SYS_TIME_H 1
 
 /* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
+/* #undef HAVE_SYS_TYPES_H */
 
 /* Define to 1 if you have the <sys/uio.h> header file. */
 #define HAVE_SYS_UIO_H 1
 
-/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
-#define HAVE_SYS_WAIT_H 1
-
 /* Define if the setupterm() function is supported this platform. */
 #define HAVE_TERMINFO 1
 
@@ -346,7 +348,7 @@
 #define HAVE_UTIME_H 1
 
 /* Define to 1 if the system has the type `u_int64_t'. */
-/* #undef HAVE_U_INT64_T */
+#define HAVE_U_INT64_T 1
 
 /* Define to 1 if you have the <valgrind/valgrind.h> header file. */
 /* #undef HAVE_VALGRIND_VALGRIND_H */
@@ -382,7 +384,7 @@
 /* #undef HAVE___DIVDI3 */
 
 /* Define to 1 if you have the `__dso_handle' function. */
-#define HAVE___DSO_HANDLE 1
+#undef HAVE___DSO_HANDLE
 
 /* Have host's __fixdfdi */
 /* #undef HAVE___FIXDFDI */
@@ -415,7 +417,7 @@
 /* #undef HAVE____CHKSTK_MS */
 
 /* Linker version detected at compile time. */
-/* #undef HOST_LINK_VERSION */
+#undef HOST_LINK_VERSION
 
 /* Installation directory for binary executables */
 /* #undef LLVM_BINDIR */
@@ -426,19 +428,22 @@
 /* Installation directory for data files */
 /* #undef LLVM_DATADIR */
 
-/* Target triple LLVM will generate code for by default */
+/* Target triple LLVM will generate code for by default
+ * Doesn't use `cmakedefine` because it is allowed to be empty.
+ */
 /* #undef LLVM_DEFAULT_TARGET_TRIPLE */
 
 /* Installation directory for documentation */
 /* #undef LLVM_DOCSDIR */
 
-/* Define to enable checks that alter the LLVM C++ ABI */
-#define LLVM_ENABLE_ABI_BREAKING_CHECKS 1
+/* Define if LLVM is built with asserts and checks that change the layout of
+   client-visible data structures.  */
+#define LLVM_ENABLE_ABI_BREAKING_CHECKS
 
 /* Define if threads enabled */
 #define LLVM_ENABLE_THREADS 1
 
-/* Define if zlib is enabled */
+/* Define if zlib compression is available */
 #define LLVM_ENABLE_ZLIB 1
 
 /* Installation directory for config files */
@@ -486,36 +491,49 @@
 /* Define if this is Win32ish platform */
 /* #undef LLVM_ON_WIN32 */
 
-/* Define to path to dot program if found or 'echo dot' otherwise */
-/* #undef LLVM_PATH_DOT */
-
 /* Installation prefix directory */
 #define LLVM_PREFIX "/usr"
 
 /* Define if we have the Intel JIT API runtime support library */
-#define LLVM_USE_INTEL_JITEVENTS 0
+/* #undef LLVM_USE_INTEL_JITEVENTS */
 
 /* Define if we have the oprofile JIT-support library */
-#define LLVM_USE_OPROFILE 0
+/* #undef LLVM_USE_OPROFILE */
 
 /* Major version of the LLVM API */
 #define LLVM_VERSION_MAJOR 3
 
 /* Minor version of the LLVM API */
-#define LLVM_VERSION_MINOR 7
+#define LLVM_VERSION_MINOR 8
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 1
+#define LLVM_VERSION_PATCH 0
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "3.7.1"
+#define LLVM_VERSION_STRING "3.8.0svn"
 
-/* The shared library extension */
+/* Define if we link Polly to the tools */
+/* #undef LINK_POLLY_INTO_TOOLS */
+
+/* Define if the OS needs help to load dependent libraries for dlopen(). */
+/* #undef LTDL_DLOPEN_DEPLIBS */
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LTDL_OBJDIR
+
+/* Define to the extension used for shared libraries, say, ".so". */
 #define LTDL_SHLIB_EXT ".so"
 
+/* Define to the system default library search path. */
+/* #undef LTDL_SYSSEARCHPATH */
+
 /* Define if /dev/zero should be used when mapping RWX memory, or undefine if
    its not necessary */
-/* #undef NEED_DEV_ZERO_FOR_MMAP */
+#undef NEED_DEV_ZERO_FOR_MMAP
+
+/* Define if dlsym() requires a leading underscore in symbol names. */
+#undef NEED_USCORE
 
 /* Define to the address where bug reports for this package should be sent. */
 #define PACKAGE_BUGREPORT "https://bugs.freebsd.org/submit/"
@@ -524,36 +542,51 @@
 #define PACKAGE_NAME "LLVM"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "LLVM 3.7.1"
+#define PACKAGE_STRING "LLVM 3.8.0svn"
 
 /* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "llvm"
+#undef PACKAGE_TARNAME
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "3.7.1"
+#define PACKAGE_VERSION "3.8.0svn"
 
 /* Define as the return type of signal handlers (`int' or `void'). */
 #define RETSIGTYPE void
 
 /* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
-/* #undef STAT_MACROS_BROKEN */
+#undef STAT_MACROS_BROKEN
 
 /* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
+#undef STDC_HEADERS
 
 /* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
-#define TIME_WITH_SYS_TIME 1
+#undef TIME_WITH_SYS_TIME
 
 /* Define to 1 if your <sys/time.h> declares `struct tm'. */
-/* #undef TM_IN_SYS_TIME */
+#undef TM_IN_SYS_TIME
 
 /* Type of 1st arg on ELM Callback */
 /* #undef WIN32_ELMCB_PCSTR */
 
 /* Define to `int' if <sys/types.h> does not define. */
-/* #undef pid_t */
+#undef pid_t
 
 /* Define to `unsigned int' if <sys/types.h> does not define. */
-/* #undef size_t */
+#undef size_t
+
+/* Define to a function replacing strtoll */
+/* #undef strtoll */
+
+/* Define to a function implementing strtoull */
+/* #undef strtoull */
+
+/* Define to a function implementing stricmp */
+/* #undef stricmp */
+
+/* Define to a function implementing strdup */
+/* #undef strdup */
+
+/* Define to 1 if you have the `_chsize_s' function. */
+/* #undef HAVE__CHSIZE_S */
 
 #endif
diff --git a/lib/clang/include/llvm/Config/llvm-config.h b/lib/clang/include/llvm/Config/llvm-config.h
index 36bcda7f73af..05837d9c27fe 100644
--- a/lib/clang/include/llvm/Config/llvm-config.h
+++ b/lib/clang/include/llvm/Config/llvm-config.h
@@ -1,5 +1,3 @@
-/* $FreeBSD$ */
-/* include/llvm/Config/llvm-config.h.  Generated from llvm-config.h.in by configure.  */
 /*===------- llvm/Config/llvm-config.h - llvm configuration -------*- C -*-===*/
 /*                                                                            */
 /*                     The LLVM Compiler Infrastructure                       */
@@ -31,8 +29,9 @@
 /* Installation directory for documentation */
 /* #undef LLVM_DOCSDIR */
 
-/* Define to enable checks that alter the LLVM C++ ABI */
-#define LLVM_ENABLE_ABI_BREAKING_CHECKS 1
+/* Define if LLVM is built with asserts and checks that change the layout of
+   client-visible data structures.  */
+#define LLVM_ENABLE_ABI_BREAKING_CHECKS
 
 /* Define if threads enabled */
 #define LLVM_ENABLE_THREADS 1
@@ -86,21 +85,24 @@
 #define LLVM_PREFIX "/usr"
 
 /* Define if we have the Intel JIT API runtime support library */
-#define LLVM_USE_INTEL_JITEVENTS 0
+/* #undef LLVM_USE_INTEL_JITEVENTS */
 
 /* Define if we have the oprofile JIT-support library */
-#define LLVM_USE_OPROFILE 0
+/* #undef LLVM_USE_OPROFILE */
 
 /* Major version of the LLVM API */
 #define LLVM_VERSION_MAJOR 3
 
 /* Minor version of the LLVM API */
-#define LLVM_VERSION_MINOR 7
+#define LLVM_VERSION_MINOR 8
 
 /* Patch version of the LLVM API */
-#define LLVM_VERSION_PATCH 1
+#define LLVM_VERSION_PATCH 0
 
 /* LLVM version string */
-#define LLVM_VERSION_STRING "3.7.1"
+#define LLVM_VERSION_STRING "3.8.0svn"
+
+/* Define if we link Polly to the tools */
+/* #undef LINK_POLLY_INTO_TOOLS */
 
 #endif
diff --git a/lib/clang/include/llvm/Support/DataTypes.h b/lib/clang/include/llvm/Support/DataTypes.h
index 6f95bc413ac1..ceefdaf11aeb 100644
--- a/lib/clang/include/llvm/Support/DataTypes.h
+++ b/lib/clang/include/llvm/Support/DataTypes.h
@@ -1,5 +1,4 @@
 /* $FreeBSD$ */
-/* include/llvm/Support/DataTypes.h.  Generated from DataTypes.h.in by configure.  */
 /*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\
 |*                                                                            *|
 |*                     The LLVM Compiler Infrastructure                       *|
@@ -23,7 +22,7 @@
 
 /* Please leave this file C-compatible. */
 
-/* Please keep this file in sync with DataTypes.h.cmake */
+/* Please keep this file in sync with DataTypes.h.in */
 
 #ifndef SUPPORT_DATATYPES_H
 #define SUPPORT_DATATYPES_H
@@ -31,7 +30,7 @@
 #define HAVE_INTTYPES_H 1
 #define HAVE_STDINT_H 1
 #define HAVE_UINT64_T 1
-/* #undef HAVE_U_INT64_T */
+#define HAVE_U_INT64_T 1
 
 #ifdef __cplusplus
 #include <cmath>
@@ -103,6 +102,13 @@ typedef signed int ssize_t;
 #define PRIu64 "I64u"
 #define PRIx64 "I64x"
 #define PRIX64 "I64X"
+
+#define PRId32 "d"
+#define PRIi32 "i"
+#define PRIo32 "o"
+#define PRIu32 "u"
+#define PRIx32 "x"
+#define PRIX32 "X"
 #endif /* HAVE_INTTYPES_H */
 
 #endif /* _MSC_VER */

From 802df53c824b0d8020c32cb6227313f3604186e6 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Wed, 30 Dec 2015 18:52:29 +0000
Subject: [PATCH 07/10] Update various build glue files for the new llvm/clang
 version number.

---
 ObsoleteFiles.inc                        |  82 ++++++++++++
 etc/mtree/BSD.debug.dist                 |   2 +-
 etc/mtree/BSD.usr.dist                   |   2 +-
 lib/clang/include/Makefile               |   9 +-
 lib/libclang_rt/Makefile.inc             |   2 +-
 tools/build/mk/OptionalObsoleteFiles.inc | 162 +++++++++++------------
 6 files changed, 173 insertions(+), 86 deletions(-)

diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index dff00ae453a1..8d6a94be9580 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -38,6 +38,88 @@
 #   xargs -n1 | sort | uniq -d;
 # done
 
+# yyyymmdd: new clang import which bumps version from 3.7.1 to 3.8.0.
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/allocator_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/asan_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/common_interface_defs.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/coverage_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/dfsan_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/linux_syscall_hooks.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/lsan_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/msan_interface.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/tsan_interface_atomic.h
+OLD_DIRS+=usr/lib/clang/3.7.1/include/sanitizer
+OLD_FILES+=usr/lib/clang/3.7.1/include/__stddef_max_align_t.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/__wmmintrin_aes.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/__wmmintrin_pclmul.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/adxintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/altivec.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/ammintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/arm_acle.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/arm_neon.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx2intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512bwintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512cdintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512dqintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512erintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512fintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vlbwintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vldqintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vlintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/avxintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/bmi2intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/bmiintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/cpuid.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/cuda_builtin_vars.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/emmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/f16cintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/fma4intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/fmaintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/fxsrintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/htmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/htmxlintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/ia32intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/immintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/lzcntintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/mm3dnow.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/mm_malloc.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/mmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/module.modulemap
+OLD_FILES+=usr/lib/clang/3.7.1/include/nmmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/pmmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/popcntintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/prfchwintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/rdseedintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/rtmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/s390intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/shaintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/smmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/tbmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/tmmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/vadefs.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/vecintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/wmmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/x86intrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/xmmintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/xopintrin.h
+OLD_FILES+=usr/lib/clang/3.7.1/include/xtestintrin.h
+OLD_DIRS+=usr/lib/clang/3.7.1/include
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan-x86_64.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan_cxx-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan_cxx-x86_64.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-arm.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-x86_64.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.safestack-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.safestack-x86_64.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone-x86_64.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone_cxx-i386.a
+OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone_cxx-x86_64.a
+OLD_DIRS+=usr/lib/clang/3.7.1/lib/freebsd
+OLD_DIRS+=usr/lib/clang/3.7.1/lib
+OLD_DIRS+=usr/lib/clang/3.7.1
 # 20151225: new clang import which bumps version from 3.7.0 to 3.7.1.
 OLD_FILES+=usr/lib/clang/3.7.0/include/sanitizer/allocator_interface.h
 OLD_FILES+=usr/lib/clang/3.7.0/include/sanitizer/asan_interface.h
diff --git a/etc/mtree/BSD.debug.dist b/etc/mtree/BSD.debug.dist
index 4a499ea2ab4d..bcb960f34d8d 100644
--- a/etc/mtree/BSD.debug.dist
+++ b/etc/mtree/BSD.debug.dist
@@ -27,7 +27,7 @@
             ..
             lib
                 clang
-                    3.7.1
+                    3.8.0
                         lib
                             freebsd
                             ..
diff --git a/etc/mtree/BSD.usr.dist b/etc/mtree/BSD.usr.dist
index 1a5835b52729..d15eea5e27b9 100644
--- a/etc/mtree/BSD.usr.dist
+++ b/etc/mtree/BSD.usr.dist
@@ -19,7 +19,7 @@
         aout
         ..
         clang
-            3.7.1
+            3.8.0
                 include
                     sanitizer
                     ..
diff --git a/lib/clang/include/Makefile b/lib/clang/include/Makefile
index 2b20cbb1443c..4fd15d168fba 100644
--- a/lib/clang/include/Makefile
+++ b/lib/clang/include/Makefile
@@ -8,9 +8,10 @@ LLVM_SRCS= ${.CURDIR}/../../../contrib/llvm
 
 .PATH: ${LLVM_SRCS}/tools/clang/lib/Headers
 
-INCSDIR=${LIBDIR}/clang/3.7.1/include
+INCSDIR=${LIBDIR}/clang/3.8.0/include
 
-INCS=	__stddef_max_align_t.h \
+INCS=	__clang_cuda_runtime_wrapper.h \
+	__stddef_max_align_t.h \
 	__wmmintrin_aes.h \
 	__wmmintrin_pclmul.h \
 	adxintrin.h \
@@ -62,6 +63,10 @@ INCS=	__stddef_max_align_t.h \
 	x86intrin.h \
 	xmmintrin.h \
 	xopintrin.h \
+	xsavecintrin.h \
+	xsaveintrin.h \
+	xsaveoptintrin.h \
+	xsavesintrin.h \
 	xtestintrin.h \
 	${GENINCS}
 GENINCS= arm_neon.h
diff --git a/lib/libclang_rt/Makefile.inc b/lib/libclang_rt/Makefile.inc
index 1e656d98c8c9..cd1e79142ee5 100644
--- a/lib/libclang_rt/Makefile.inc
+++ b/lib/libclang_rt/Makefile.inc
@@ -5,7 +5,7 @@
 CRTARCH=${MACHINE_CPUARCH:C/amd64/x86_64/}
 CRTSRC=${.CURDIR}/../../../contrib/compiler-rt
 
-CLANGDIR=/usr/lib/clang/3.7.1
+CLANGDIR=/usr/lib/clang/3.8.0
 LIBDIR=${CLANGDIR}/lib/freebsd
 
 NO_PIC=
diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc
index cb51987e1a6c..d55050644bd6 100644
--- a/tools/build/mk/OptionalObsoleteFiles.inc
+++ b/tools/build/mk/OptionalObsoleteFiles.inc
@@ -1053,87 +1053,87 @@ OLD_FILES+=usr/bin/clang++
 OLD_FILES+=usr/bin/clang-cpp
 OLD_FILES+=usr/bin/clang-tblgen
 OLD_FILES+=usr/bin/tblgen
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/allocator_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/asan_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/common_interface_defs.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/coverage_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/dfsan_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/linux_syscall_hooks.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/lsan_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/msan_interface.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/tsan_interface_atomic.h
-OLD_DIRS+=usr/lib/clang/3.7.1/include/sanitizer
-OLD_FILES+=usr/lib/clang/3.7.1/include/__stddef_max_align_t.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/__wmmintrin_aes.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/__wmmintrin_pclmul.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/adxintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/altivec.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/ammintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/arm_acle.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/arm_neon.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx2intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512bwintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512cdintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512dqintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512erintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512fintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vlbwintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vldqintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avx512vlintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/avxintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/bmi2intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/bmiintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/cpuid.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/cuda_builtin_vars.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/emmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/f16cintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/fma4intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/fmaintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/fxsrintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/htmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/htmxlintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/ia32intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/immintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/lzcntintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/mm3dnow.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/mm_malloc.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/mmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/module.modulemap
-OLD_FILES+=usr/lib/clang/3.7.1/include/nmmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/pmmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/popcntintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/prfchwintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/rdseedintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/rtmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/s390intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/shaintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/smmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/tbmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/tmmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/vadefs.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/vecintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/wmmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/x86intrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/xmmintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/xopintrin.h
-OLD_FILES+=usr/lib/clang/3.7.1/include/xtestintrin.h
-OLD_DIRS+=usr/lib/clang/3.7.1/include
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan-x86_64.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan_cxx-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.asan_cxx-x86_64.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-arm.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.profile-x86_64.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.safestack-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.safestack-x86_64.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone-x86_64.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone_cxx-i386.a
-OLD_FILES+=usr/lib/clang/3.7.1/lib/freebsd/libclang_rt.ubsan_standalone_cxx-x86_64.a
-OLD_DIRS+=usr/lib/clang/3.7.1/lib/freebsd
-OLD_DIRS+=usr/lib/clang/3.7.1/lib
-OLD_DIRS+=usr/lib/clang/3.7.1
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/allocator_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/asan_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/common_interface_defs.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/coverage_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/dfsan_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/linux_syscall_hooks.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/lsan_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/msan_interface.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/tsan_interface_atomic.h
+OLD_DIRS+=usr/lib/clang/3.8.0/include/sanitizer
+OLD_FILES+=usr/lib/clang/3.8.0/include/__stddef_max_align_t.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/__wmmintrin_aes.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/__wmmintrin_pclmul.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/adxintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/altivec.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/ammintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/arm_acle.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/arm_neon.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx2intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512bwintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512cdintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512dqintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512erintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512fintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512vlbwintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512vldqintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avx512vlintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/avxintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/bmi2intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/bmiintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/cpuid.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/cuda_builtin_vars.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/emmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/f16cintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/fma4intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/fmaintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/fxsrintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/htmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/htmxlintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/ia32intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/immintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/lzcntintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/mm3dnow.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/mm_malloc.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/mmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/module.modulemap
+OLD_FILES+=usr/lib/clang/3.8.0/include/nmmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/pmmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/popcntintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/prfchwintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/rdseedintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/rtmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/s390intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/shaintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/smmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/tbmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/tmmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/vadefs.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/vecintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/wmmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/x86intrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/xmmintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/xopintrin.h
+OLD_FILES+=usr/lib/clang/3.8.0/include/xtestintrin.h
+OLD_DIRS+=usr/lib/clang/3.8.0/include
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.asan-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.asan-x86_64.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.asan_cxx-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.asan_cxx-x86_64.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.profile-arm.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.profile-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.profile-x86_64.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.safestack-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.safestack-x86_64.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.ubsan_standalone-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.ubsan_standalone-x86_64.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.ubsan_standalone_cxx-i386.a
+OLD_FILES+=usr/lib/clang/3.8.0/lib/freebsd/libclang_rt.ubsan_standalone_cxx-x86_64.a
+OLD_DIRS+=usr/lib/clang/3.8.0/lib/freebsd
+OLD_DIRS+=usr/lib/clang/3.8.0/lib
+OLD_DIRS+=usr/lib/clang/3.8.0
 OLD_DIRS+=usr/lib/clang
 OLD_FILES+=usr/share/doc/llvm/clang/LICENSE.TXT
 OLD_DIRS+=usr/share/doc/llvm/clang

From 34cdd77646db8a3e57441b3f7d028973dc880c54 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Thu, 31 Dec 2015 14:39:45 +0000
Subject: [PATCH 08/10] First part of updating llvm/clang build glue: getting
 llvm-tblgen, clang-tblgen and clang itself built.

---
 Makefile.inc1                                 |  6 ++---
 lib/clang/Makefile                            | 21 ++++++++-------
 lib/clang/clang.build.mk                      | 21 +++++++++++----
 lib/clang/include/AttributesCompatFunc.inc    |  2 ++
 lib/clang/include/llvm/IR/Attributes.inc      |  2 ++
 lib/clang/libclangast/Makefile                |  5 ++++
 lib/clang/libclangcodegen/Makefile            |  1 +
 lib/clang/libclangfrontend/Makefile           |  2 ++
 lib/clang/libclangrewritefrontend/Makefile    |  7 ++++-
 lib/clang/libclangsema/Makefile               |  1 +
 lib/clang/libclangserialization/Makefile      |  1 +
 .../libclangstaticanalyzercheckers/Makefile   |  5 ++++
 lib/clang/libclangstaticanalyzercore/Makefile |  3 +++
 .../libclangstaticanalyzerfrontend/Makefile   |  2 +-
 .../Makefile                                  |  2 +-
 .../Makefile.depend                           |  0
 lib/clang/libllvmaarch64codegen/Makefile      |  1 +
 lib/clang/libllvmaarch64desc/Makefile         |  3 ++-
 lib/clang/libllvmaarch64disassembler/Makefile |  4 ++-
 lib/clang/libllvmanalysis/Makefile            | 19 +++++++++-----
 .../Makefile                                  |  2 +-
 .../Makefile.depend                           |  0
 lib/clang/libllvmarmcodegen/Makefile          |  1 +
 lib/clang/libllvmarmdesc/Makefile             |  4 +--
 lib/clang/libllvmarminfo/Makefile             |  3 ++-
 lib/clang/libllvmasmparser/Makefile           |  2 ++
 lib/clang/libllvmasmprinter/Makefile          |  5 ++--
 lib/clang/libllvmbitreader/Makefile           | 12 ++++++---
 lib/clang/libllvmbitwriter/Makefile           |  3 +++
 lib/clang/libllvmcodegen/Makefile             |  6 ++++-
 lib/clang/libllvmcore/Makefile                |  6 +++--
 lib/clang/libllvmdebuginfodwarf/Makefile      |  2 ++
 lib/clang/libllvminstcombine/Makefile         |  3 ++-
 lib/clang/libllvminstrumentation/Makefile     | 11 ++++----
 lib/clang/libllvmipa/Makefile                 | 20 --------------
 lib/clang/libllvmipa/Makefile.depend          | 26 -------------------
 lib/clang/libllvmipo/Makefile                 |  8 +++++-
 lib/clang/libllvmirreader/Makefile            |  2 ++
 lib/clang/libllvmlinker/Makefile              |  5 +++-
 lib/clang/libllvmmc/Makefile                  |  1 +
 lib/clang/libllvmmipsasmparser/Makefile       |  3 ++-
 .../Makefile                                  |  5 ++--
 .../Makefile.depend                           |  0
 lib/clang/libllvmmipscodegen/Makefile         |  3 ++-
 lib/clang/libllvmmipsdesc/Makefile            |  3 ++-
 lib/clang/libllvmmipsdisassembler/Makefile    |  4 ++-
 lib/clang/libllvmmipsinfo/Makefile            |  3 ++-
 lib/clang/libllvmobjcarcopts/Makefile         | 14 +++++-----
 lib/clang/libllvmobject/Makefile              |  3 +++
 lib/clang/libllvmorcjit/Makefile              |  3 +++
 .../Makefile                                  |  2 +-
 .../Makefile.depend                           |  0
 lib/clang/libllvmpowerpccodegen/Makefile      |  5 +++-
 lib/clang/libllvmpowerpcinfo/Makefile         |  3 ++-
 lib/clang/libllvmprofiledata/Makefile         |  2 ++
 lib/clang/libllvmscalaropts/Makefile          |  5 ++--
 lib/clang/libllvmselectiondag/Makefile        |  3 ++-
 .../Makefile                                  |  2 +-
 .../Makefile.depend                           |  0
 lib/clang/libllvmsparccodegen/Makefile        |  3 ++-
 lib/clang/libllvmsparcdisassembler/Makefile   |  4 ++-
 lib/clang/libllvmsparcinfo/Makefile           |  3 ++-
 lib/clang/libllvmsupport/Makefile             |  4 ++-
 lib/clang/libllvmtarget/Makefile              |  3 ++-
 lib/clang/libllvmtransformutils/Makefile      |  4 ++-
 lib/clang/libllvmvectorize/Makefile           |  3 ++-
 .../Makefile                                  |  2 +-
 .../Makefile.depend                           |  0
 lib/clang/libllvmx86codegen/Makefile          |  4 ++-
 share/mk/local.meta.sys.mk                    |  3 ++-
 tools/build/mk/OptionalObsoleteFiles.inc      |  4 +--
 usr.bin/clang/Makefile                        |  2 +-
 usr.bin/clang/clang/Makefile                  | 25 +++++++++---------
 .../clang/{tblgen => llvm-tblgen}/Makefile    |  3 ++-
 .../{tblgen => llvm-tblgen}/Makefile.depend   |  0
 .../tblgen.1 => llvm-tblgen/llvm-tblgen.1}    |  0
 76 files changed, 217 insertions(+), 143 deletions(-)
 create mode 100644 lib/clang/include/AttributesCompatFunc.inc
 create mode 100644 lib/clang/include/llvm/IR/Attributes.inc
 rename lib/clang/{libllvmaarch64instprinter => libllvmaarch64asmprinter}/Makefile (91%)
 rename lib/clang/{libllvmaarch64instprinter => libllvmaarch64asmprinter}/Makefile.depend (100%)
 rename lib/clang/{libllvmarminstprinter => libllvmarmasmprinter}/Makefile (90%)
 rename lib/clang/{libllvmarminstprinter => libllvmarmasmprinter}/Makefile.depend (100%)
 delete mode 100644 lib/clang/libllvmipa/Makefile
 delete mode 100644 lib/clang/libllvmipa/Makefile.depend
 rename lib/clang/{libllvmmipsinstprinter => libllvmmipsasmprinter}/Makefile (76%)
 rename lib/clang/{libllvmmipsinstprinter => libllvmmipsasmprinter}/Makefile.depend (100%)
 rename lib/clang/{libllvmpowerpcinstprinter => libllvmpowerpcasmprinter}/Makefile (89%)
 rename lib/clang/{libllvmpowerpcinstprinter => libllvmpowerpcasmprinter}/Makefile.depend (100%)
 rename lib/clang/{libllvmsparcinstprinter => libllvmsparcasmprinter}/Makefile (90%)
 rename lib/clang/{libllvmsparcinstprinter => libllvmsparcasmprinter}/Makefile.depend (100%)
 rename lib/clang/{libllvmx86instprinter => libllvmx86asmprinter}/Makefile (92%)
 rename lib/clang/{libllvmx86instprinter => libllvmx86asmprinter}/Makefile.depend (100%)
 rename usr.bin/clang/{tblgen => llvm-tblgen}/Makefile (95%)
 rename usr.bin/clang/{tblgen => llvm-tblgen}/Makefile.depend (100%)
 rename usr.bin/clang/{tblgen/tblgen.1 => llvm-tblgen/llvm-tblgen.1} (100%)

diff --git a/Makefile.inc1 b/Makefile.inc1
index a26df46ecafd..afd473940dc3 100644
--- a/Makefile.inc1
+++ b/Makefile.inc1
@@ -1459,11 +1459,11 @@ _gensnmptree=	usr.sbin/bsnmpd/gensnmptree
 _clang_tblgen= \
 	lib/clang/libllvmsupport \
 	lib/clang/libllvmtablegen \
-	usr.bin/clang/tblgen \
+	usr.bin/clang/llvm-tblgen \
 	usr.bin/clang/clang-tblgen
 
 ${_bt}-usr.bin/clang/clang-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
-${_bt}-usr.bin/clang/tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
+${_bt}-usr.bin/clang/llvm-tblgen: ${_bt}-lib/clang/libllvmtablegen ${_bt}-lib/clang/libllvmsupport
 .endif
 
 # Default to building the GPL DTC, but build the BSDL one if users explicitly
@@ -1668,7 +1668,7 @@ NXBENV=		MAKEOBJDIRPREFIX=${OBJTREE}/nxb \
 		INSTALL="sh ${.CURDIR}/tools/install.sh" \
 		PATH=${PATH}:${OBJTREE}/gperf_for_gcc/usr/bin
 NXBMAKE=	${NXBENV} ${MAKE} \
-		TBLGEN=${NXBDESTDIR}/usr/bin/tblgen \
+		LLVM_TBLGEN=${NXBDESTDIR}/usr/bin/llvm-tblgen \
 		CLANG_TBLGEN=${NXBDESTDIR}/usr/bin/clang-tblgen \
 		MACHINE=${TARGET} MACHINE_ARCH=${TARGET_ARCH} \
 		MK_GDB=no MK_TESTS=no \
diff --git a/lib/clang/Makefile b/lib/clang/Makefile
index 905346a4567e..eacda631abc6 100644
--- a/lib/clang/Makefile
+++ b/lib/clang/Makefile
@@ -38,7 +38,6 @@ SUBDIR=	libclanganalysis \
 	libllvmcore \
 	libllvminstcombine \
 	libllvminstrumentation \
-	libllvmipa \
 	libllvmipo \
 	libllvmirreader \
 	libllvmlinker \
@@ -58,38 +57,38 @@ SUBDIR=	libclanganalysis \
 	libllvmvectorize \
 	\
 	libllvmaarch64asmparser \
+	libllvmaarch64asmprinter \
 	libllvmaarch64codegen \
 	libllvmaarch64desc \
 	libllvmaarch64info \
-	libllvmaarch64instprinter \
 	libllvmaarch64utils \
 	libllvmarmasmparser \
+	libllvmarmasmprinter \
 	libllvmarmcodegen \
 	libllvmarmdesc \
 	libllvmarmdisassembler \
 	libllvmarminfo \
-	libllvmarminstprinter \
 	libllvmmipsasmparser \
+	libllvmmipsasmprinter \
 	libllvmmipscodegen \
 	libllvmmipsdesc \
 	libllvmmipsinfo \
-	libllvmmipsinstprinter \
 	libllvmpowerpcasmparser \
+	libllvmpowerpcasmprinter \
 	libllvmpowerpccodegen \
 	libllvmpowerpcdesc \
 	libllvmpowerpcdisassembler \
 	libllvmpowerpcinfo \
-	libllvmpowerpcinstprinter \
 	libllvmsparcasmparser \
+	libllvmsparcasmprinter \
 	libllvmsparccodegen \
 	libllvmsparcdesc \
 	libllvmsparcinfo \
-	libllvmsparcinstprinter \
 	libllvmx86asmparser \
+	libllvmx86asmprinter \
 	libllvmx86codegen \
 	libllvmx86desc \
 	libllvmx86info \
-	libllvmx86instprinter \
 	libllvmx86utils
 
 .if ${MK_CLANG_EXTRAS} != "no"
@@ -99,11 +98,13 @@ SUBDIR+=libllvmdebuginfodwarf \
 	libllvmlto \
 	libllvmmirparser \
 	libllvmorcjit \
-	libllvmpasses
+	libllvmpasses \
+	libllvmsymbolize
 .endif # MK_CLANG_EXTRAS
+	
 .if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no"
-SUBDIR+=libllvmaarch64disassembler \
-	libllvmexecutionengine \
+SUBDIR+=libllvmexecutionengine \
+	libllvmaarch64disassembler \
 	libllvminterpreter \
 	libllvmmcjit \
 	libllvmmipsdisassembler \
diff --git a/lib/clang/clang.build.mk b/lib/clang/clang.build.mk
index e90bd3f80648..eb6ef16ed1f4 100644
--- a/lib/clang/clang.build.mk
+++ b/lib/clang/clang.build.mk
@@ -39,13 +39,24 @@ CXXFLAGS.clang+= -stdlib=libc++
 
 .PATH:	${LLVM_SRCS}/${SRCDIR}
 
-TBLGEN?=	tblgen
+LLVM_TBLGEN?=	llvm-tblgen
 CLANG_TBLGEN?=	clang-tblgen
 
+Attributes.inc.h: ${LLVM_SRCS}/include/llvm/IR/Attributes.td
+	${LLVM_TBLGEN} -gen-attrs \
+	    -I ${LLVM_SRCS}/include -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
+	    ${LLVM_SRCS}/include/llvm/IR/Attributes.td
+
+AttributesCompatFunc.inc.h: ${LLVM_SRCS}/lib/IR/AttributesCompatFunc.td
+	${LLVM_TBLGEN} -gen-attrs \
+	    -I ${LLVM_SRCS}/include -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
+	    ${LLVM_SRCS}/lib/IR/AttributesCompatFunc.td
+
 Intrinsics.inc.h: ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
-	${TBLGEN} -gen-intrinsic \
+	${LLVM_TBLGEN} -gen-intrinsic \
 	    -I ${LLVM_SRCS}/include -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
 	    ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
+
 .for arch in \
 	AArch64/AArch64 ARM/ARM Mips/Mips PowerPC/PPC Sparc/Sparc X86/X86
 . for hdr in \
@@ -63,7 +74,7 @@ Intrinsics.inc.h: ${LLVM_SRCS}/include/llvm/IR/Intrinsics.td
 	RegisterInfo/-gen-register-info \
 	SubtargetInfo/-gen-subtarget
 ${arch:T}Gen${hdr:H:C/$/.inc.h/}: ${LLVM_SRCS}/lib/Target/${arch:H}/${arch:T}.td
-	${TBLGEN} ${hdr:T:C/,/ /g} \
+	${LLVM_TBLGEN} ${hdr:T:C/,/ /g} \
 	    -I ${LLVM_SRCS}/include -I ${LLVM_SRCS}/lib/Target/${arch:H} \
 	    -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
 	    ${LLVM_SRCS}/lib/Target/${arch:H}/${arch:T}.td
@@ -212,13 +223,13 @@ Diagnostic${hdr}Kinds.inc.h: ${CLANG_SRCS}/include/clang/Basic/Diagnostic.td
 # XXX: Atrocious hack, need to clean this up later
 .if defined(LIB) && ${LIB} == "llvmlibdriver"
 Options.inc.h: ${LLVM_SRCS}/lib/LibDriver/Options.td
-	${TBLGEN} -gen-opt-parser-defs \
+	${LLVM_TBLGEN} -gen-opt-parser-defs \
 	    -I ${LLVM_SRCS}/include \
 	    -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
 	    ${LLVM_SRCS}/lib/LibDriver/Options.td
 .else
 Options.inc.h: ${CLANG_SRCS}/include/clang/Driver/Options.td
-	${TBLGEN} -gen-opt-parser-defs \
+	${LLVM_TBLGEN} -gen-opt-parser-defs \
 	    -I ${LLVM_SRCS}/include -I ${CLANG_SRCS}/include/clang/Driver \
 	    -d ${.TARGET:C/\.h$/.d/} -o ${.TARGET} \
 	    ${CLANG_SRCS}/include/clang/Driver/Options.td
diff --git a/lib/clang/include/AttributesCompatFunc.inc b/lib/clang/include/AttributesCompatFunc.inc
new file mode 100644
index 000000000000..7f4e777481c7
--- /dev/null
+++ b/lib/clang/include/AttributesCompatFunc.inc
@@ -0,0 +1,2 @@
+/* $FreeBSD$ */
+#include "AttributesCompatFunc.inc.h"
diff --git a/lib/clang/include/llvm/IR/Attributes.inc b/lib/clang/include/llvm/IR/Attributes.inc
new file mode 100644
index 000000000000..dfe888ecf595
--- /dev/null
+++ b/lib/clang/include/llvm/IR/Attributes.inc
@@ -0,0 +1,2 @@
+/* $FreeBSD$ */
+#include "Attributes.inc.h"
diff --git a/lib/clang/libclangast/Makefile b/lib/clang/libclangast/Makefile
index a3f00aa00bd3..d77857bc7e64 100644
--- a/lib/clang/libclangast/Makefile
+++ b/lib/clang/libclangast/Makefile
@@ -34,6 +34,7 @@ SRCS=	APValue.cpp \
 	ExprCXX.cpp \
 	ExprClassification.cpp \
 	ExprConstant.cpp \
+	ExprObjC.cpp \
 	ExternalASTSource.cpp \
 	InheritViz.cpp \
 	ItaniumCXXABI.cpp \
@@ -43,13 +44,17 @@ SRCS=	APValue.cpp \
 	MicrosoftMangle.cpp \
 	NSAPI.cpp \
 	NestedNameSpecifier.cpp \
+	OpenMPClause.cpp \
 	ParentMap.cpp \
 	RawCommentList.cpp \
 	RecordLayout.cpp \
 	RecordLayoutBuilder.cpp \
 	SelectorLocationsKind.cpp \
 	Stmt.cpp \
+	StmtCXX.cpp \
 	StmtIterator.cpp \
+	StmtObjC.cpp \
+	StmtOpenMP.cpp \
 	StmtPrinter.cpp \
 	StmtProfile.cpp \
 	StmtViz.cpp \
diff --git a/lib/clang/libclangcodegen/Makefile b/lib/clang/libclangcodegen/Makefile
index 623daba9be93..6b907a6c801c 100644
--- a/lib/clang/libclangcodegen/Makefile
+++ b/lib/clang/libclangcodegen/Makefile
@@ -56,6 +56,7 @@ SRCS=	BackendUtil.cpp \
 TGHDRS=	AttrList \
 	AttrParsedAttrList \
 	AttrVisitor \
+	Attributes \
 	Attrs \
 	CommentCommandList \
 	CommentNodes \
diff --git a/lib/clang/libclangfrontend/Makefile b/lib/clang/libclangfrontend/Makefile
index 8e63fb042ccf..0a71441d4ce5 100644
--- a/lib/clang/libclangfrontend/Makefile
+++ b/lib/clang/libclangfrontend/Makefile
@@ -33,6 +33,7 @@ SRCS=	ASTConsumers.cpp \
 	PrintPreprocessedOutput.cpp \
 	SerializedDiagnosticPrinter.cpp \
 	SerializedDiagnosticReader.cpp \
+	TestModuleFileExtension.cpp \
 	TextDiagnostic.cpp \
 	TextDiagnosticBuffer.cpp \
 	TextDiagnosticPrinter.cpp \
@@ -41,6 +42,7 @@ SRCS=	ASTConsumers.cpp \
 TGHDRS=	AttrList \
 	AttrParsedAttrList \
 	AttrVisitor \
+	Attributes \
 	Attrs \
 	CommentCommandList \
 	CommentNodes \
diff --git a/lib/clang/libclangrewritefrontend/Makefile b/lib/clang/libclangrewritefrontend/Makefile
index 2f614fb366ab..bf8685885f29 100644
--- a/lib/clang/libclangrewritefrontend/Makefile
+++ b/lib/clang/libclangrewritefrontend/Makefile
@@ -1,6 +1,6 @@
 # $FreeBSD$
 
-.include <bsd.own.mk>
+.include <src.opts.mk>
 
 LIB=	clangrewritefrontend
 
@@ -12,6 +12,11 @@ SRCS=	FixItRewriter.cpp \
 	RewriteMacros.cpp \
 	RewriteTest.cpp
 
+.if ${MK_CLANG_EXTRAS} != "no"
+SRCS+=	RewriteModernObjC.cpp \
+	RewriteObjC.cpp
+.endif
+
 TGHDRS=	AttrList \
 	AttrParsedAttrList \
 	Attrs \
diff --git a/lib/clang/libclangsema/Makefile b/lib/clang/libclangsema/Makefile
index 08c120a380df..0ab91e447a25 100644
--- a/lib/clang/libclangsema/Makefile
+++ b/lib/clang/libclangsema/Makefile
@@ -24,6 +24,7 @@ SRCS=	AnalysisBasedWarnings.cpp \
 	SemaChecking.cpp \
 	SemaCodeComplete.cpp \
 	SemaConsumer.cpp \
+	SemaCoroutine.cpp \
 	SemaDecl.cpp \
 	SemaDeclAttr.cpp \
 	SemaDeclCXX.cpp \
diff --git a/lib/clang/libclangserialization/Makefile b/lib/clang/libclangserialization/Makefile
index 3a6a65eeea31..bd9b2d830f72 100644
--- a/lib/clang/libclangserialization/Makefile
+++ b/lib/clang/libclangserialization/Makefile
@@ -15,6 +15,7 @@ SRCS=	ASTCommon.cpp \
 	GeneratePCH.cpp \
 	GlobalModuleIndex.cpp \
 	Module.cpp \
+	ModuleFileExtension.cpp \
 	ModuleManager.cpp
 
 TGHDRS=	AttrList \
diff --git a/lib/clang/libclangstaticanalyzercheckers/Makefile b/lib/clang/libclangstaticanalyzercheckers/Makefile
index ec4a76ff72c6..c40203998e15 100644
--- a/lib/clang/libclangstaticanalyzercheckers/Makefile
+++ b/lib/clang/libclangstaticanalyzercheckers/Makefile
@@ -28,6 +28,7 @@ SRCS=	AllocationDiagnostics.cpp \
 	DereferenceChecker.cpp \
 	DirectIvarAssignment.cpp \
 	DivZeroChecker.cpp \
+	DynamicTypeChecker.cpp \
 	DynamicTypePropagation.cpp \
 	ExprInspectionChecker.cpp \
 	FixedAddressChecker.cpp \
@@ -35,6 +36,7 @@ SRCS=	AllocationDiagnostics.cpp \
 	IdenticalExprChecker.cpp \
 	IvarInvalidationChecker.cpp \
 	LLVMConventionsChecker.cpp \
+	LocalizationChecker.cpp \
 	MacOSKeychainAPIChecker.cpp \
 	MacOSXAPIChecker.cpp \
 	MallocChecker.cpp \
@@ -44,12 +46,14 @@ SRCS=	AllocationDiagnostics.cpp \
 	NSErrorChecker.cpp \
 	NoReturnFunctionChecker.cpp \
 	NonNullParamChecker.cpp \
+	NullabilityChecker.cpp \
 	ObjCAtSyncChecker.cpp \
 	ObjCContainersASTChecker.cpp \
 	ObjCContainersChecker.cpp \
 	ObjCMissingSuperCallChecker.cpp \
 	ObjCSelfInitChecker.cpp \
 	ObjCUnusedIVarsChecker.cpp \
+	PaddingChecker.cpp \
 	PointerArithChecker.cpp \
 	PointerSubChecker.cpp \
 	PthreadLockChecker.cpp \
@@ -70,6 +74,7 @@ SRCS=	AllocationDiagnostics.cpp \
 	UnixAPIChecker.cpp \
 	UnreachableCodeChecker.cpp \
 	VLASizeChecker.cpp \
+	VforkChecker.cpp \
 	VirtualCallChecker.cpp
 
 .if ${MK_CLANG_EXTRAS} != "no"
diff --git a/lib/clang/libclangstaticanalyzercore/Makefile b/lib/clang/libclangstaticanalyzercore/Makefile
index e171517479f8..bfee77aef2bf 100644
--- a/lib/clang/libclangstaticanalyzercore/Makefile
+++ b/lib/clang/libclangstaticanalyzercore/Makefile
@@ -21,6 +21,7 @@ SRCS=	APSIntType.cpp \
 	CommonBugCategories.cpp \
 	ConstraintManager.cpp \
 	CoreEngine.cpp \
+	DynamicTypeMap.cpp \
 	Environment.cpp \
 	ExplodedGraph.cpp \
 	ExprEngine.cpp \
@@ -30,6 +31,8 @@ SRCS=	APSIntType.cpp \
 	ExprEngineObjC.cpp \
 	FunctionSummary.cpp \
 	HTMLDiagnostics.cpp \
+	IssueHash.cpp \
+	LoopWidening.cpp \
 	MemRegion.cpp \
 	PathDiagnostic.cpp \
 	PlistDiagnostics.cpp \
diff --git a/lib/clang/libclangstaticanalyzerfrontend/Makefile b/lib/clang/libclangstaticanalyzerfrontend/Makefile
index 397550901ede..5e074d920368 100644
--- a/lib/clang/libclangstaticanalyzerfrontend/Makefile
+++ b/lib/clang/libclangstaticanalyzerfrontend/Makefile
@@ -7,8 +7,8 @@ LIB=	clangstaticanalyzerfrontend
 SRCDIR=	tools/clang/lib/StaticAnalyzer/Frontend
 SRCS=	AnalysisConsumer.cpp \
 	CheckerRegistration.cpp \
-	ModelConsumer.cpp \
 	FrontendActions.cpp \
+	ModelConsumer.cpp \
 	ModelInjector.cpp
 
 TGHDRS=	AttrList \
diff --git a/lib/clang/libllvmaarch64instprinter/Makefile b/lib/clang/libllvmaarch64asmprinter/Makefile
similarity index 91%
rename from lib/clang/libllvmaarch64instprinter/Makefile
rename to lib/clang/libllvmaarch64asmprinter/Makefile
index c037dc8854f6..222dd74b7e79 100644
--- a/lib/clang/libllvmaarch64instprinter/Makefile
+++ b/lib/clang/libllvmaarch64asmprinter/Makefile
@@ -2,7 +2,7 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmaarch64instprinter
+LIB=	llvmaarch64asmprinter
 
 SRCDIR=	lib/Target/AArch64/InstPrinter
 INCDIR=	lib/Target/AArch64
diff --git a/lib/clang/libllvmaarch64instprinter/Makefile.depend b/lib/clang/libllvmaarch64asmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmaarch64instprinter/Makefile.depend
rename to lib/clang/libllvmaarch64asmprinter/Makefile.depend
diff --git a/lib/clang/libllvmaarch64codegen/Makefile b/lib/clang/libllvmaarch64codegen/Makefile
index db0023f991c2..50fec61bb966 100644
--- a/lib/clang/libllvmaarch64codegen/Makefile
+++ b/lib/clang/libllvmaarch64codegen/Makefile
@@ -41,6 +41,7 @@ TGHDRS=	AArch64GenCallingConv \
 	AArch64GenMCPseudoLowering \
 	AArch64GenRegisterInfo \
 	AArch64GenSubtargetInfo \
+	Attributes \
 	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmaarch64desc/Makefile b/lib/clang/libllvmaarch64desc/Makefile
index 041fbf21db61..e757f45227ba 100644
--- a/lib/clang/libllvmaarch64desc/Makefile
+++ b/lib/clang/libllvmaarch64desc/Makefile
@@ -19,6 +19,7 @@ CFLAGS+= -I${LLVM_SRCS}/${SRCDIR}/..
 TGHDRS=	AArch64GenInstrInfo \
 	AArch64GenMCCodeEmitter \
 	AArch64GenRegisterInfo \
-	AArch64GenSubtargetInfo
+	AArch64GenSubtargetInfo \
+	Attributes
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmaarch64disassembler/Makefile b/lib/clang/libllvmaarch64disassembler/Makefile
index 0cf9658b6a27..12b5025b44e9 100644
--- a/lib/clang/libllvmaarch64disassembler/Makefile
+++ b/lib/clang/libllvmaarch64disassembler/Makefile
@@ -12,7 +12,9 @@ SRCS=	AArch64Disassembler.cpp
 SRCS+=	AArch64ExternalSymbolizer.cpp
 .endif
 
-TGHDRS=	AArch64GenDisassemblerTables \
+TGHDRS=	Attributes \
+	Intrinsics \
+	AArch64GenDisassemblerTables \
 	AArch64GenInstrInfo \
 	AArch64GenRegisterInfo \
 	AArch64GenSubtargetInfo
diff --git a/lib/clang/libllvmanalysis/Makefile b/lib/clang/libllvmanalysis/Makefile
index 23d1f876c22d..a05083b6f3a8 100644
--- a/lib/clang/libllvmanalysis/Makefile
+++ b/lib/clang/libllvmanalysis/Makefile
@@ -6,9 +6,7 @@ LIB=	llvmanalysis
 
 SRCDIR=	lib/Analysis
 SRCS=	AliasAnalysis.cpp \
-	AliasAnalysisCounter.cpp \
 	AliasAnalysisEvaluator.cpp \
-	AliasDebugger.cpp \
 	AliasSetTracker.cpp \
 	AssumptionCache.cpp \
 	BasicAliasAnalysis.cpp \
@@ -18,16 +16,23 @@ SRCS=	AliasAnalysis.cpp \
 	CFG.cpp \
 	CFGPrinter.cpp \
 	CFLAliasAnalysis.cpp \
+	CallGraph.cpp \
+	CallGraphSCCPass.cpp \
+	CallPrinter.cpp \
 	CaptureTracking.cpp \
 	CodeMetrics.cpp \
 	ConstantFolding.cpp \
 	CostModel.cpp \
 	Delinearization.cpp \
+	DemandedBits.cpp \
 	DependenceAnalysis.cpp \
 	DivergenceAnalysis.cpp \
 	DomPrinter.cpp \
 	DominanceFrontier.cpp \
+	EHPersonalities.cpp \
+	GlobalsModRef.cpp \
 	IVUsers.cpp \
+	InlineCost.cpp \
 	InstCount.cpp \
 	InstructionSimplify.cpp \
 	Interval.cpp \
@@ -35,8 +40,6 @@ SRCS=	AliasAnalysis.cpp \
 	IteratedDominanceFrontier.cpp \
 	LazyCallGraph.cpp \
 	LazyValueInfo.cpp \
-	LibCallAliasAnalysis.cpp \
-	LibCallSemantics.cpp \
 	Lint.cpp \
 	Loads.cpp \
 	LoopAccessAnalysis.cpp \
@@ -48,7 +51,10 @@ SRCS=	AliasAnalysis.cpp \
 	MemoryDependenceAnalysis.cpp \
 	MemoryLocation.cpp \
 	ModuleDebugInfoPrinter.cpp \
-	NoAliasAnalysis.cpp \
+	ObjCARCAliasAnalysis.cpp \
+	ObjCARCAnalysisUtils.cpp \
+	ObjCARCInstKind.cpp \
+	OrderedBasicBlock.cpp \
 	PHITransAddr.cpp \
 	PostDominators.cpp \
 	PtrUseVisitor.cpp \
@@ -73,6 +79,7 @@ SRCS+=	Analysis.cpp \
 	Trace.cpp
 .endif
 	
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmarminstprinter/Makefile b/lib/clang/libllvmarmasmprinter/Makefile
similarity index 90%
rename from lib/clang/libllvmarminstprinter/Makefile
rename to lib/clang/libllvmarmasmprinter/Makefile
index 67b8fee26965..b9e9f95b2db5 100644
--- a/lib/clang/libllvmarminstprinter/Makefile
+++ b/lib/clang/libllvmarmasmprinter/Makefile
@@ -2,7 +2,7 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmarminstprinter
+LIB=	llvmarmasmprinter
 
 SRCDIR=	lib/Target/ARM/InstPrinter
 INCDIR=	lib/Target/ARM
diff --git a/lib/clang/libllvmarminstprinter/Makefile.depend b/lib/clang/libllvmarmasmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmarminstprinter/Makefile.depend
rename to lib/clang/libllvmarmasmprinter/Makefile.depend
diff --git a/lib/clang/libllvmarmcodegen/Makefile b/lib/clang/libllvmarmcodegen/Makefile
index 6da61b9f12ec..a77dfdce68f1 100644
--- a/lib/clang/libllvmarmcodegen/Makefile
+++ b/lib/clang/libllvmarmcodegen/Makefile
@@ -46,6 +46,7 @@ TGHDRS=	ARMGenAsmWriter \
 	ARMGenMCPseudoLowering \
 	ARMGenRegisterInfo \
 	ARMGenSubtargetInfo \
+	Attributes \
 	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmarmdesc/Makefile b/lib/clang/libllvmarmdesc/Makefile
index 42c3f5c7cad7..423f80758aaf 100644
--- a/lib/clang/libllvmarmdesc/Makefile
+++ b/lib/clang/libllvmarmdesc/Makefile
@@ -6,7 +6,6 @@ LIB=	llvmarmdesc
 
 SRCDIR=	lib/Target/ARM/MCTargetDesc
 SRCS=	ARMAsmBackend.cpp \
-	ARMELFObjectWriter.cpp \
 	ARMELFObjectWriter.cpp \
 	ARMELFStreamer.cpp \
 	ARMMCAsmInfo.cpp \
@@ -24,6 +23,7 @@ CFLAGS+= -I${LLVM_SRCS}/${SRCDIR}/..
 TGHDRS=	ARMGenInstrInfo \
 	ARMGenMCCodeEmitter \
 	ARMGenRegisterInfo \
-	ARMGenSubtargetInfo
+	ARMGenSubtargetInfo \
+	Attributes
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmarminfo/Makefile b/lib/clang/libllvmarminfo/Makefile
index 1da434c71a0d..fcf03549b6e9 100644
--- a/lib/clang/libllvmarminfo/Makefile
+++ b/lib/clang/libllvmarminfo/Makefile
@@ -10,6 +10,7 @@ SRCS=	ARMTargetInfo.cpp
 
 TGHDRS=	ARMGenInstrInfo \
 	ARMGenRegisterInfo \
-	ARMGenSubtargetInfo
+	ARMGenSubtargetInfo \
+	Attributes
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmasmparser/Makefile b/lib/clang/libllvmasmparser/Makefile
index bb8f0dfda238..7ee59866ab1a 100644
--- a/lib/clang/libllvmasmparser/Makefile
+++ b/lib/clang/libllvmasmparser/Makefile
@@ -9,4 +9,6 @@ SRCS=	LLLexer.cpp \
 	LLParser.cpp \
 	Parser.cpp
 
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmasmprinter/Makefile b/lib/clang/libllvmasmprinter/Makefile
index 1cddd556f53d..eca8cd3a37d1 100644
--- a/lib/clang/libllvmasmprinter/Makefile
+++ b/lib/clang/libllvmasmprinter/Makefile
@@ -12,8 +12,8 @@ SRCS=	ARMException.cpp \
 	AsmPrinterInlineAsm.cpp \
 	DIE.cpp \
 	DIEHash.cpp \
-	DebugLocStream.cpp \
 	DbgValueHistoryCalculator.cpp \
+	DebugLocStream.cpp \
 	DwarfAccelTable.cpp \
 	DwarfCFIException.cpp \
 	DwarfCompileUnit.cpp \
@@ -28,6 +28,7 @@ SRCS=	ARMException.cpp \
 	WinCodeViewLineTables.cpp \
 	WinException.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmbitreader/Makefile b/lib/clang/libllvmbitreader/Makefile
index 6add8a39ab08..252c9a9979cd 100644
--- a/lib/clang/libllvmbitreader/Makefile
+++ b/lib/clang/libllvmbitreader/Makefile
@@ -1,14 +1,18 @@
 # $FreeBSD$
 
-.include <bsd.own.mk>
+.include <src.opts.mk>
 
 LIB=	llvmbitreader
 
 SRCDIR=	lib/Bitcode/Reader
-SRCS=	BitReader.cpp \
-	BitcodeReader.cpp \
+SRCS=	BitcodeReader.cpp \
 	BitstreamReader.cpp
 
-TGHDRS=	Intrinsics
+.if ${MK_CLANG_EXTRAS} != "no"
+SRCS+=	BitReader.cpp
+.endif
+
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmbitwriter/Makefile b/lib/clang/libllvmbitwriter/Makefile
index 2e5fa2ac7ffd..4ce8bde6791b 100644
--- a/lib/clang/libllvmbitwriter/Makefile
+++ b/lib/clang/libllvmbitwriter/Makefile
@@ -13,4 +13,7 @@ SRCS=	BitcodeWriter.cpp \
 SRCS+=	BitWriter.cpp
 .endif
 
+TGHDRS=	Attributes \
+	Intrinsics
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmcodegen/Makefile b/lib/clang/libllvmcodegen/Makefile
index 3205f6a1d4e9..f5c7f4e8c09a 100644
--- a/lib/clang/libllvmcodegen/Makefile
+++ b/lib/clang/libllvmcodegen/Makefile
@@ -27,6 +27,7 @@ SRCS=	AggressiveAntiDepBreaker.cpp \
 	ExpandISelPseudos.cpp \
 	ExpandPostRAPseudos.cpp \
 	FaultMaps.cpp \
+	FuncletLayout.cpp \
 	GCMetadata.cpp \
 	GCRootLowering.cpp \
 	GlobalMerge.cpp \
@@ -39,6 +40,7 @@ SRCS=	AggressiveAntiDepBreaker.cpp \
 	LLVMTargetMachine.cpp \
 	LatencyPriorityQueue.cpp \
 	LexicalScopes.cpp \
+	LiveDebugValues.cpp \
 	LiveDebugVariables.cpp \
 	LiveInterval.cpp \
 	LiveIntervalAnalysis.cpp \
@@ -84,6 +86,7 @@ SRCS=	AggressiveAntiDepBreaker.cpp \
 	OptimizePHIs.cpp \
 	PHIElimination.cpp \
 	PHIEliminationUtils.cpp \
+	ParallelCG.cpp \
 	Passes.cpp \
 	PeepholeOptimizer.cpp \
 	PostRASchedulerList.cpp \
@@ -134,6 +137,7 @@ SRCS+=	GCMetadataPrinter.cpp \
 	GCStrategy.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmcore/Makefile b/lib/clang/libllvmcore/Makefile
index 1d57eccd7f82..dc9ce47974dc 100644
--- a/lib/clang/libllvmcore/Makefile
+++ b/lib/clang/libllvmcore/Makefile
@@ -23,6 +23,7 @@ SRCS=	AsmWriter.cpp \
 	DiagnosticPrinter.cpp \
 	Dominators.cpp \
 	Function.cpp \
+	FunctionInfo.cpp \
 	GCOV.cpp \
 	GVMaterializer.cpp \
 	Globals.cpp \
@@ -38,7 +39,6 @@ SRCS=	AsmWriter.cpp \
 	MDBuilder.cpp \
 	Mangler.cpp \
 	Metadata.cpp \
-	MetadataTracking.cpp \
 	Module.cpp \
 	Operator.cpp \
 	Pass.cpp \
@@ -57,6 +57,8 @@ SRCS=	AsmWriter.cpp \
 SRCS+=	PassManager.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	AttributesCompatFunc \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmdebuginfodwarf/Makefile b/lib/clang/libllvmdebuginfodwarf/Makefile
index 5f52f33d03e4..6af9703980a0 100644
--- a/lib/clang/libllvmdebuginfodwarf/Makefile
+++ b/lib/clang/libllvmdebuginfodwarf/Makefile
@@ -16,10 +16,12 @@ SRCS=	DWARFAbbreviationDeclaration.cpp \
 	DWARFDebugInfoEntry.cpp \
 	DWARFDebugLine.cpp \
 	DWARFDebugLoc.cpp \
+	DWARFDebugMacro.cpp \
 	DWARFDebugRangeList.cpp \
 	DWARFFormValue.cpp \
 	DWARFTypeUnit.cpp \
 	DWARFUnit.cpp \
+	DWARFUnitIndex.cpp \
 	SyntaxHighlighting.cpp
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvminstcombine/Makefile b/lib/clang/libllvminstcombine/Makefile
index 3752aef1987f..a23b60baf64f 100644
--- a/lib/clang/libllvminstcombine/Makefile
+++ b/lib/clang/libllvminstcombine/Makefile
@@ -19,6 +19,7 @@ SRCS=	InstCombineAddSub.cpp \
 	InstCombineVectorOps.cpp \
 	InstructionCombining.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvminstrumentation/Makefile b/lib/clang/libllvminstrumentation/Makefile
index 82730b00319a..c38e9e2a0493 100644
--- a/lib/clang/libllvminstrumentation/Makefile
+++ b/lib/clang/libllvminstrumentation/Makefile
@@ -9,16 +9,15 @@ SRCS=	AddressSanitizer.cpp \
 	BoundsChecking.cpp \
 	DataFlowSanitizer.cpp \
 	GCOVProfiling.cpp \
-	MemorySanitizer.cpp \
 	InstrProfiling.cpp \
+	Instrumentation.cpp \
+	MemorySanitizer.cpp \
+	PGOInstrumentation.cpp \
 	SafeStack.cpp \
 	SanitizerCoverage.cpp \
 	ThreadSanitizer.cpp
 
-.if ${MK_CLANG_EXTRAS} != "no"
-SRCS+=	Instrumentation.cpp
-.endif
-
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmipa/Makefile b/lib/clang/libllvmipa/Makefile
deleted file mode 100644
index 3380cf358415..000000000000
--- a/lib/clang/libllvmipa/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# $FreeBSD$
-
-.include <src.opts.mk>
-
-LIB=	llvmipa
-
-SRCDIR=	lib/Analysis/IPA
-SRCS=	CallGraph.cpp \
-	CallGraphSCCPass.cpp \
-	CallPrinter.cpp \
-	GlobalsModRef.cpp \
-	InlineCost.cpp
-
-.if ${MK_CLANG_EXTRAS} != "no"
-SRCS+=	IPA.cpp
-.endif
-
-TGHDRS=	Intrinsics
-
-.include "../clang.lib.mk"
diff --git a/lib/clang/libllvmipa/Makefile.depend b/lib/clang/libllvmipa/Makefile.depend
deleted file mode 100644
index b534e62100b6..000000000000
--- a/lib/clang/libllvmipa/Makefile.depend
+++ /dev/null
@@ -1,26 +0,0 @@
-# $FreeBSD$
-# Autogenerated - do NOT edit!
-
-DIRDEPS = \
-	include \
-	include/xlocale \
-	lib/libc++ \
-	lib/msun \
-	usr.bin/clang/tblgen.host \
-
-
-.include <dirdeps.mk>
-
-.if ${DEP_RELDIR} == ${_DEP_RELDIR}
-# local dependencies - needed for -jN in clean tree
-CallGraph.o: Intrinsics.inc.h
-CallGraph.po: Intrinsics.inc.h
-CallGraphSCCPass.o: Intrinsics.inc.h
-CallGraphSCCPass.po: Intrinsics.inc.h
-CallPrinter.o: Intrinsics.inc.h
-CallPrinter.po: Intrinsics.inc.h
-GlobalsModRef.o: Intrinsics.inc.h
-GlobalsModRef.po: Intrinsics.inc.h
-InlineCost.o: Intrinsics.inc.h
-InlineCost.po: Intrinsics.inc.h
-.endif
diff --git a/lib/clang/libllvmipo/Makefile b/lib/clang/libllvmipo/Makefile
index 16e1c3adbabf..09134029a860 100644
--- a/lib/clang/libllvmipo/Makefile
+++ b/lib/clang/libllvmipo/Makefile
@@ -8,13 +8,17 @@ SRCDIR=	lib/Transforms/IPO
 SRCS=	ArgumentPromotion.cpp \
 	BarrierNoopPass.cpp \
 	ConstantMerge.cpp \
+	CrossDSOCFI.cpp \
 	DeadArgumentElimination.cpp \
 	ElimAvailExtern.cpp \
 	ExtractGV.cpp \
+	ForceFunctionAttrs.cpp \
 	FunctionAttrs.cpp \
+	FunctionImport.cpp \
 	GlobalDCE.cpp \
 	GlobalOpt.cpp \
 	IPConstantPropagation.cpp \
+	InferFunctionAttrs.cpp \
 	InlineAlways.cpp \
 	InlineSimple.cpp \
 	Inliner.cpp \
@@ -25,6 +29,7 @@ SRCS=	ArgumentPromotion.cpp \
 	PartialInlining.cpp \
 	PassManagerBuilder.cpp \
 	PruneEH.cpp \
+	SampleProfile.cpp \
 	StripDeadPrototypes.cpp \
 	StripSymbols.cpp
 
@@ -32,6 +37,7 @@ SRCS=	ArgumentPromotion.cpp \
 SRCS+=	IPO.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmirreader/Makefile b/lib/clang/libllvmirreader/Makefile
index d9f7e693fd43..8f89947d7a46 100644
--- a/lib/clang/libllvmirreader/Makefile
+++ b/lib/clang/libllvmirreader/Makefile
@@ -7,4 +7,6 @@ LIB=	llvmirreader
 SRCDIR=	lib/IRReader
 SRCS=	IRReader.cpp
 
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmlinker/Makefile b/lib/clang/libllvmlinker/Makefile
index 711b36986c16..eed5a3c2c6b6 100644
--- a/lib/clang/libllvmlinker/Makefile
+++ b/lib/clang/libllvmlinker/Makefile
@@ -5,6 +5,9 @@
 LIB=	llvmlinker
 
 SRCDIR=	lib/Linker
-SRCS=	LinkModules.cpp
+SRCS=	IRMover.cpp \
+	LinkModules.cpp
+
+TGHDRS=	Attributes
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmmc/Makefile b/lib/clang/libllvmmc/Makefile
index 5acd2cce5dc4..7b70a066cbaa 100644
--- a/lib/clang/libllvmmc/Makefile
+++ b/lib/clang/libllvmmc/Makefile
@@ -21,6 +21,7 @@ SRCS=	ConstantPools.cpp \
 	MCELFObjectTargetWriter.cpp \
 	MCELFStreamer.cpp \
 	MCExpr.cpp \
+	MCFragment.cpp \
 	MCInst.cpp \
 	MCInstPrinter.cpp \
 	MCInstrAnalysis.cpp \
diff --git a/lib/clang/libllvmmipsasmparser/Makefile b/lib/clang/libllvmmipsasmparser/Makefile
index 0572f8d225b7..b6b87bf28a4e 100644
--- a/lib/clang/libllvmmipsasmparser/Makefile
+++ b/lib/clang/libllvmmipsasmparser/Makefile
@@ -8,7 +8,8 @@ SRCDIR=	lib/Target/Mips/AsmParser
 INCDIR=	lib/Target/Mips
 SRCS=	MipsAsmParser.cpp
 
-TGHDRS=	MipsGenAsmMatcher \
+TGHDRS=	Attributes \
+	MipsGenAsmMatcher \
 	MipsGenInstrInfo \
 	MipsGenRegisterInfo \
 	MipsGenSubtargetInfo
diff --git a/lib/clang/libllvmmipsinstprinter/Makefile b/lib/clang/libllvmmipsasmprinter/Makefile
similarity index 76%
rename from lib/clang/libllvmmipsinstprinter/Makefile
rename to lib/clang/libllvmmipsasmprinter/Makefile
index 39df7cc59ef3..94cd25f63f2d 100644
--- a/lib/clang/libllvmmipsinstprinter/Makefile
+++ b/lib/clang/libllvmmipsasmprinter/Makefile
@@ -2,13 +2,14 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmmipsinstprinter
+LIB=	llvmmipsasmprinter
 
 SRCDIR=	lib/Target/Mips/InstPrinter
 INCDIR=	lib/Target/Mips
 SRCS=	MipsInstPrinter.cpp
 
-TGHDRS=	MipsGenAsmWriter \
+TGHDRS=	Attributes \
+	MipsGenAsmWriter \
 	MipsGenInstrInfo \
 	MipsGenRegisterInfo \
 	MipsGenSubtargetInfo
diff --git a/lib/clang/libllvmmipsinstprinter/Makefile.depend b/lib/clang/libllvmmipsasmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmmipsinstprinter/Makefile.depend
rename to lib/clang/libllvmmipsasmprinter/Makefile.depend
diff --git a/lib/clang/libllvmmipscodegen/Makefile b/lib/clang/libllvmmipscodegen/Makefile
index b40b65c25aac..95e8b3571e84 100644
--- a/lib/clang/libllvmmipscodegen/Makefile
+++ b/lib/clang/libllvmmipscodegen/Makefile
@@ -38,7 +38,8 @@ SRCS=	Mips16FrameLowering.cpp \
 	MipsTargetMachine.cpp \
 	MipsTargetObjectFile.cpp
 
-TGHDRS=	Intrinsics \
+TGHDRS=	Attributes \
+	Intrinsics \
 	MipsGenAsmWriter \
 	MipsGenCallingConv \
 	MipsGenCodeEmitter \
diff --git a/lib/clang/libllvmmipsdesc/Makefile b/lib/clang/libllvmmipsdesc/Makefile
index f22ff8074f41..19c585fe42ac 100644
--- a/lib/clang/libllvmmipsdesc/Makefile
+++ b/lib/clang/libllvmmipsdesc/Makefile
@@ -19,7 +19,8 @@ SRCS=	MipsABIFlagsSection.cpp \
 	MipsTargetStreamer.cpp
 CFLAGS+= -I${LLVM_SRCS}/${SRCDIR}/..
 
-TGHDRS=	MipsGenInstrInfo \
+TGHDRS=	Attributes \
+	MipsGenInstrInfo \
 	MipsGenMCCodeEmitter \
 	MipsGenRegisterInfo \
 	MipsGenSubtargetInfo
diff --git a/lib/clang/libllvmmipsdisassembler/Makefile b/lib/clang/libllvmmipsdisassembler/Makefile
index 41d20628416b..8a6006dbcc4b 100644
--- a/lib/clang/libllvmmipsdisassembler/Makefile
+++ b/lib/clang/libllvmmipsdisassembler/Makefile
@@ -8,7 +8,9 @@ SRCDIR=	lib/Target/Mips/Disassembler
 INCDIR=	lib/Target/Mips
 SRCS=	MipsDisassembler.cpp
 
-TGHDRS=	MipsGenDisassemblerTables \
+TGHDRS=	Attributes \
+	Intrinsics \
+	MipsGenDisassemblerTables \
 	MipsGenInstrInfo \
 	MipsGenRegisterInfo \
 	MipsGenSubtargetInfo
diff --git a/lib/clang/libllvmmipsinfo/Makefile b/lib/clang/libllvmmipsinfo/Makefile
index c65c8c86f743..be8d5883680e 100644
--- a/lib/clang/libllvmmipsinfo/Makefile
+++ b/lib/clang/libllvmmipsinfo/Makefile
@@ -8,7 +8,8 @@ SRCDIR=	lib/Target/Mips/TargetInfo
 INCDIR=	lib/Target/Mips
 SRCS=	MipsTargetInfo.cpp
 
-TGHDRS=	MipsGenInstrInfo \
+TGHDRS=	Attributes \
+	MipsGenInstrInfo \
 	MipsGenRegisterInfo \
 	MipsGenSubtargetInfo
 
diff --git a/lib/clang/libllvmobjcarcopts/Makefile b/lib/clang/libllvmobjcarcopts/Makefile
index 308e11a7d70f..b88c5b5f3ec6 100644
--- a/lib/clang/libllvmobjcarcopts/Makefile
+++ b/lib/clang/libllvmobjcarcopts/Makefile
@@ -1,15 +1,12 @@
 # $FreeBSD$
 
-.include <bsd.own.mk>
+.include <src.opts.mk>
 
 LIB=	llvmobjcarcopts
 
 SRCDIR=	lib/Transforms/ObjCARC
-SRCS=	ARCInstKind.cpp \
-	DependencyAnalysis.cpp \
-	ObjCARC.cpp \
+SRCS=	DependencyAnalysis.cpp \
 	ObjCARCAPElim.cpp \
-	ObjCARCAliasAnalysis.cpp \
 	ObjCARCContract.cpp \
 	ObjCARCExpand.cpp \
 	ObjCARCOpts.cpp \
@@ -17,6 +14,11 @@ SRCS=	ARCInstKind.cpp \
 	ProvenanceAnalysisEvaluator.cpp \
 	PtrState.cpp
 
-TGHDRS=	Intrinsics
+.if ${MK_CLANG_EXTRAS} != "no"
+SRCS+=	ObjCARC.cpp
+.endif
+
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmobject/Makefile b/lib/clang/libllvmobject/Makefile
index c575ecd574d2..f7864d254dc5 100644
--- a/lib/clang/libllvmobject/Makefile
+++ b/lib/clang/libllvmobject/Makefile
@@ -14,6 +14,7 @@ SRCS=	Archive.cpp \
 	ELFObjectFile.cpp \
 	ELFYAML.cpp \
 	Error.cpp \
+	FunctionIndexObjectFile.cpp \
 	IRObjectFile.cpp \
 	MachOObjectFile.cpp \
 	MachOUniversal.cpp \
@@ -25,5 +26,7 @@ SRCS=	Archive.cpp \
 SRCS+=	Object.cpp \
 	SymbolSize.cpp
 .endif
+
+TGHDRS=	Attributes
 	
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmorcjit/Makefile b/lib/clang/libllvmorcjit/Makefile
index 84621de80782..27259b2914ff 100644
--- a/lib/clang/libllvmorcjit/Makefile
+++ b/lib/clang/libllvmorcjit/Makefile
@@ -7,6 +7,9 @@ LIB=	llvmorcjit
 SRCDIR=	lib/ExecutionEngine/Orc
 SRCS=	ExecutionUtils.cpp \
 	IndirectionUtils.cpp \
+	NullResolver.cpp \
+	OrcCBindings.cpp \
+	OrcCBindingsStack.cpp \
 	OrcMCJITReplacement.cpp \
 	OrcTargetSupport.cpp
 
diff --git a/lib/clang/libllvmpowerpcinstprinter/Makefile b/lib/clang/libllvmpowerpcasmprinter/Makefile
similarity index 89%
rename from lib/clang/libllvmpowerpcinstprinter/Makefile
rename to lib/clang/libllvmpowerpcasmprinter/Makefile
index ac98321db91c..0fa17e0625ce 100644
--- a/lib/clang/libllvmpowerpcinstprinter/Makefile
+++ b/lib/clang/libllvmpowerpcasmprinter/Makefile
@@ -2,7 +2,7 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmpowerpcinstprinter
+LIB=	llvmpowerpcasmprinter
 
 SRCDIR=	lib/Target/PowerPC/InstPrinter
 INCDIR=	lib/Target/PowerPC
diff --git a/lib/clang/libllvmpowerpcinstprinter/Makefile.depend b/lib/clang/libllvmpowerpcasmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmpowerpcinstprinter/Makefile.depend
rename to lib/clang/libllvmpowerpcasmprinter/Makefile.depend
diff --git a/lib/clang/libllvmpowerpccodegen/Makefile b/lib/clang/libllvmpowerpccodegen/Makefile
index ea83b75702c4..d0ec3cd8bb11 100644
--- a/lib/clang/libllvmpowerpccodegen/Makefile
+++ b/lib/clang/libllvmpowerpccodegen/Makefile
@@ -6,6 +6,7 @@ LIB=	llvmpowerpccodegen
 
 SRCDIR=	lib/Target/PowerPC
 SRCS=	PPCAsmPrinter.cpp \
+	PPCBoolRetToInt.cpp \
 	PPCBranchSelector.cpp \
 	PPCCTRLoops.cpp \
 	PPCEarlyReturn.cpp \
@@ -18,6 +19,7 @@ SRCS=	PPCAsmPrinter.cpp \
 	PPCLoopDataPrefetch.cpp \
 	PPCLoopPreIncPrep.cpp \
 	PPCMCInstLower.cpp \
+	PPCMIPeephole.cpp \
 	PPCMachineFunctionInfo.cpp \
 	PPCRegisterInfo.cpp \
 	PPCSubtarget.cpp \
@@ -30,7 +32,8 @@ SRCS=	PPCAsmPrinter.cpp \
 	PPCVSXFMAMutate.cpp \
 	PPCVSXSwapRemoval.cpp
 
-TGHDRS=	Intrinsics \
+TGHDRS=	Attributes \
+	Intrinsics \
 	PPCGenCallingConv \
 	PPCGenCodeEmitter \
 	PPCGenDAGISel \
diff --git a/lib/clang/libllvmpowerpcinfo/Makefile b/lib/clang/libllvmpowerpcinfo/Makefile
index 03bba0e72931..db1577642102 100644
--- a/lib/clang/libllvmpowerpcinfo/Makefile
+++ b/lib/clang/libllvmpowerpcinfo/Makefile
@@ -8,7 +8,8 @@ SRCDIR=	lib/Target/PowerPC/TargetInfo
 INCDIR=	lib/Target/PowerPC
 SRCS=	PowerPCTargetInfo.cpp
 
-TGHDRS=	PPCGenInstrInfo \
+TGHDRS=	Attributes \
+	PPCGenInstrInfo \
 	PPCGenRegisterInfo \
 	PPCGenSubtargetInfo
 
diff --git a/lib/clang/libllvmprofiledata/Makefile b/lib/clang/libllvmprofiledata/Makefile
index a38a1903fae2..f7b3b19a4786 100644
--- a/lib/clang/libllvmprofiledata/Makefile
+++ b/lib/clang/libllvmprofiledata/Makefile
@@ -18,4 +18,6 @@ SRCS=	CoverageMapping.cpp \
 SRCS+=	SampleProfWriter.cpp
 .endif
 
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmscalaropts/Makefile b/lib/clang/libllvmscalaropts/Makefile
index 8579df910d26..a05916ebc24d 100644
--- a/lib/clang/libllvmscalaropts/Makefile
+++ b/lib/clang/libllvmscalaropts/Makefile
@@ -26,6 +26,7 @@ SRCS=	ADCE.cpp \
 	LoopDistribute.cpp \
 	LoopIdiomRecognize.cpp \
 	LoopInterchange.cpp \
+	LoopLoadElimination.cpp \
 	LoopRerollPass.cpp \
 	LoopRotation.cpp \
 	LoopStrengthReduce.cpp \
@@ -43,7 +44,6 @@ SRCS=	ADCE.cpp \
 	RewriteStatepointsForGC.cpp \
 	SCCP.cpp \
 	SROA.cpp \
-	SampleProfile.cpp \
 	ScalarReplAggregates.cpp \
 	Scalarizer.cpp \
 	SeparateConstOffsetFromGEP.cpp \
@@ -59,6 +59,7 @@ SRCS+=	LoopInstSimplify.cpp \
 	Scalar.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmselectiondag/Makefile b/lib/clang/libllvmselectiondag/Makefile
index b73a1c53f8cd..ed3425197a98 100644
--- a/lib/clang/libllvmselectiondag/Makefile
+++ b/lib/clang/libllvmselectiondag/Makefile
@@ -30,6 +30,7 @@ SRCS=	DAGCombiner.cpp \
 	TargetLowering.cpp \
 	TargetSelectionDAGInfo.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmsparcinstprinter/Makefile b/lib/clang/libllvmsparcasmprinter/Makefile
similarity index 90%
rename from lib/clang/libllvmsparcinstprinter/Makefile
rename to lib/clang/libllvmsparcasmprinter/Makefile
index e15c55e4d06f..6144fd4b8226 100644
--- a/lib/clang/libllvmsparcinstprinter/Makefile
+++ b/lib/clang/libllvmsparcasmprinter/Makefile
@@ -2,7 +2,7 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmsparcinstprinter
+LIB=	llvmsparcasmprinter
 
 SRCDIR=	lib/Target/Sparc/InstPrinter
 INCDIR=	lib/Target/Sparc
diff --git a/lib/clang/libllvmsparcinstprinter/Makefile.depend b/lib/clang/libllvmsparcasmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmsparcinstprinter/Makefile.depend
rename to lib/clang/libllvmsparcasmprinter/Makefile.depend
diff --git a/lib/clang/libllvmsparccodegen/Makefile b/lib/clang/libllvmsparccodegen/Makefile
index 35b131066705..4c42a706b3a7 100644
--- a/lib/clang/libllvmsparccodegen/Makefile
+++ b/lib/clang/libllvmsparccodegen/Makefile
@@ -18,7 +18,8 @@ SRCS=	DelaySlotFiller.cpp \
 	SparcTargetMachine.cpp \
 	SparcTargetObjectFile.cpp
 
-TGHDRS=	Intrinsics \
+TGHDRS=	Attributes \
+	Intrinsics \
 	SparcGenAsmWriter \
 	SparcGenCallingConv \
 	SparcGenCodeEmitter \
diff --git a/lib/clang/libllvmsparcdisassembler/Makefile b/lib/clang/libllvmsparcdisassembler/Makefile
index a388ff225d11..1913ee6f64c5 100644
--- a/lib/clang/libllvmsparcdisassembler/Makefile
+++ b/lib/clang/libllvmsparcdisassembler/Makefile
@@ -8,7 +8,9 @@ SRCDIR=	lib/Target/Sparc/Disassembler
 INCDIR=	lib/Target/Sparc
 SRCS=	SparcDisassembler.cpp
 
-TGHDRS=	SparcGenDisassemblerTables \
+TGHDRS=	Attributes \
+	Intrinsics \
+	SparcGenDisassemblerTables \
 	SparcGenInstrInfo \
 	SparcGenRegisterInfo \
 	SparcGenSubtargetInfo
diff --git a/lib/clang/libllvmsparcinfo/Makefile b/lib/clang/libllvmsparcinfo/Makefile
index 6c8052e33256..46791f95c0ec 100644
--- a/lib/clang/libllvmsparcinfo/Makefile
+++ b/lib/clang/libllvmsparcinfo/Makefile
@@ -8,7 +8,8 @@ SRCDIR=	lib/Target/Sparc/TargetInfo
 INCDIR=	lib/Target/Sparc
 SRCS=	SparcTargetInfo.cpp
 
-TGHDRS=	SparcGenInstrInfo \
+TGHDRS=	Attributes \
+	SparcGenInstrInfo \
 	SparcGenRegisterInfo \
 	SparcGenSubtargetInfo
 
diff --git a/lib/clang/libllvmsupport/Makefile b/lib/clang/libllvmsupport/Makefile
index 860285b47263..fdd9dcadcb13 100644
--- a/lib/clang/libllvmsupport/Makefile
+++ b/lib/clang/libllvmsupport/Makefile
@@ -34,6 +34,7 @@ SRCS=	APFloat.cpp \
 	IntEqClasses.cpp \
 	IntervalMap.cpp \
 	IntrusiveRefCntPtr.cpp \
+	JamCRC.cpp \
 	LEB128.cpp \
 	LineIterator.cpp \
 	Locale.cpp \
@@ -77,7 +78,6 @@ SRCS=	APFloat.cpp \
 	Triple.cpp \
 	Twine.cpp \
 	Unicode.cpp \
-	Valgrind.cpp \
 	YAMLParser.cpp \
 	YAMLTraits.cpp \
 	circular_raw_ostream.cpp \
@@ -96,6 +96,8 @@ SRCS+=	ARMWinEH.cpp \
 	FileOutputBuffer.cpp \
 	FileUtilities.cpp \
 	SystemUtils.cpp \
+	ThreadPool.cpp \
+	Valgrind.cpp \
 	Watchdog.cpp
 .endif
 
diff --git a/lib/clang/libllvmtarget/Makefile b/lib/clang/libllvmtarget/Makefile
index 82410e0f59af..25f977baa20d 100644
--- a/lib/clang/libllvmtarget/Makefile
+++ b/lib/clang/libllvmtarget/Makefile
@@ -16,6 +16,7 @@ SRCS=	Target.cpp \
 SRCS+=	TargetIntrinsicInfo.cpp
 .endif
 	
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmtransformutils/Makefile b/lib/clang/libllvmtransformutils/Makefile
index f5043e60aa85..ecb8d1c5ea2f 100644
--- a/lib/clang/libllvmtransformutils/Makefile
+++ b/lib/clang/libllvmtransformutils/Makefile
@@ -40,6 +40,7 @@ SRCS=	ASanStackFrameLayout.cpp \
 	SimplifyIndVar.cpp \
 	SimplifyInstructions.cpp \
 	SimplifyLibCalls.cpp \
+	SplitModule.cpp \
 	SymbolRewriter.cpp \
 	UnifyFunctionExitNodes.cpp \
 	ValueMapper.cpp
@@ -48,6 +49,7 @@ SRCS=	ASanStackFrameLayout.cpp \
 SRCS+=	Utils.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmvectorize/Makefile b/lib/clang/libllvmvectorize/Makefile
index bfe1a1013916..0bca448a27fe 100644
--- a/lib/clang/libllvmvectorize/Makefile
+++ b/lib/clang/libllvmvectorize/Makefile
@@ -13,6 +13,7 @@ SRCS=	BBVectorize.cpp \
 SRCS+=	Vectorize.cpp
 .endif
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmx86instprinter/Makefile b/lib/clang/libllvmx86asmprinter/Makefile
similarity index 92%
rename from lib/clang/libllvmx86instprinter/Makefile
rename to lib/clang/libllvmx86asmprinter/Makefile
index 308f91c8020e..aa4d31b59a65 100644
--- a/lib/clang/libllvmx86instprinter/Makefile
+++ b/lib/clang/libllvmx86asmprinter/Makefile
@@ -2,7 +2,7 @@
 
 .include <bsd.own.mk>
 
-LIB=	llvmx86instprinter
+LIB=	llvmx86asmprinter
 
 SRCDIR=	lib/Target/X86/InstPrinter
 INCDIR=	lib/Target/X86
diff --git a/lib/clang/libllvmx86instprinter/Makefile.depend b/lib/clang/libllvmx86asmprinter/Makefile.depend
similarity index 100%
rename from lib/clang/libllvmx86instprinter/Makefile.depend
rename to lib/clang/libllvmx86asmprinter/Makefile.depend
diff --git a/lib/clang/libllvmx86codegen/Makefile b/lib/clang/libllvmx86codegen/Makefile
index 0f6776a9e099..78ccc640e547 100644
--- a/lib/clang/libllvmx86codegen/Makefile
+++ b/lib/clang/libllvmx86codegen/Makefile
@@ -17,6 +17,7 @@ SRCS=	X86AsmPrinter.cpp \
 	X86InstrInfo.cpp \
 	X86MCInstLower.cpp \
 	X86MachineFunctionInfo.cpp \
+	X86OptimizeLEAs.cpp \
 	X86PadShortFunction.cpp \
 	X86RegisterInfo.cpp \
 	X86SelectionDAGInfo.cpp \
@@ -27,7 +28,8 @@ SRCS=	X86AsmPrinter.cpp \
 	X86VZeroUpper.cpp \
 	X86WinEHState.cpp
 
-TGHDRS=	Intrinsics \
+TGHDRS=	Attributes \
+	Intrinsics \
 	X86GenCallingConv \
 	X86GenDAGISel \
 	X86GenFastISel \
diff --git a/share/mk/local.meta.sys.mk b/share/mk/local.meta.sys.mk
index 1aa749abd231..8aa79f11e0e2 100644
--- a/share/mk/local.meta.sys.mk
+++ b/share/mk/local.meta.sys.mk
@@ -210,8 +210,9 @@ TRACER= ${TIME_STAMP} ${:U}
 MK_SHARED_TOOLCHAIN= no
 .endif
 TOOLCHAIN_VARS=	AS AR CC CLANG_TBLGEN CXX CPP LD NM OBJDUMP OBJCOPY RANLIB \
-		STRINGS SIZE TBLGEN
+		STRINGS SIZE LLVM_TBLGEN
 _toolchain_bin_CLANG_TBLGEN=	/usr/bin/clang-tblgen
+_toolchain_bin_LLVM_TBLGEN=	/usr/bin/llvm-tblgen
 _toolchain_bin_CXX=		/usr/bin/c++
 .ifdef WITH_TOOLSDIR
 TOOLSDIR?= ${HOST_OBJTOP}/tools
diff --git a/tools/build/mk/OptionalObsoleteFiles.inc b/tools/build/mk/OptionalObsoleteFiles.inc
index d55050644bd6..7c638eedacd1 100644
--- a/tools/build/mk/OptionalObsoleteFiles.inc
+++ b/tools/build/mk/OptionalObsoleteFiles.inc
@@ -1052,7 +1052,7 @@ OLD_FILES+=usr/bin/clang
 OLD_FILES+=usr/bin/clang++
 OLD_FILES+=usr/bin/clang-cpp
 OLD_FILES+=usr/bin/clang-tblgen
-OLD_FILES+=usr/bin/tblgen
+OLD_FILES+=usr/bin/llvm-tblgen
 OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/allocator_interface.h
 OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/asan_interface.h
 OLD_FILES+=usr/lib/clang/3.8.0/include/sanitizer/common_interface_defs.h
@@ -1143,7 +1143,7 @@ OLD_DIRS+=usr/share/doc/llvm
 OLD_FILES+=usr/share/man/man1/clang.1.gz
 OLD_FILES+=usr/share/man/man1/clang++.1.gz
 OLD_FILES+=usr/share/man/man1/clang-cpp.1.gz
-OLD_FILES+=usr/share/man/man1/tblgen.1.gz
+OLD_FILES+=usr/share/man/man1/llvm-tblgen.1.gz
 .endif
 
 .if ${MK_CLANG_EXTRAS} == no
diff --git a/usr.bin/clang/Makefile b/usr.bin/clang/Makefile
index c19f7b6efd32..8e42cdad3548 100644
--- a/usr.bin/clang/Makefile
+++ b/usr.bin/clang/Makefile
@@ -2,7 +2,7 @@
 
 .include <src.opts.mk>
 
-SUBDIR=	clang clang-tblgen tblgen
+SUBDIR=	clang clang-tblgen llvm-tblgen
 
 .if !defined(TOOLS_PREFIX)
 .if ${MK_CLANG_EXTRAS} != "no"
diff --git a/usr.bin/clang/clang/Makefile b/usr.bin/clang/clang/Makefile
index 1489a5e2d327..b6deacb335cc 100644
--- a/usr.bin/clang/clang/Makefile
+++ b/usr.bin/clang/clang/Makefile
@@ -30,7 +30,9 @@ MLINKS+= clang.1 cc.1 \
 	clang.1 cpp.1
 .endif
 
-TGHDRS=	DiagnosticCommonKinds \
+TGHDRS=	Attributes \
+	Intrinsics \
+	DiagnosticCommonKinds \
 	DiagnosticDriverKinds \
 	DiagnosticFrontendKinds \
 	DiagnosticLexKinds \
@@ -64,40 +66,39 @@ LIBDEPS=clangfrontendtool \
 	clangbasic \
 	llvmoption \
 	llvmobjcarcopts \
-	llvmlinker \
-	llvmirreader \
 	llvmipo \
 	llvmvectorize \
-	llvmbitwriter \
+	llvmlinker \
+	llvmirreader \
 	llvmasmparser \
 	llvmaarch64codegen \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86asmparser \
 	llvmx86codegen \
 	llvmselectiondag \
@@ -105,11 +106,11 @@ LIBDEPS=clangfrontendtool \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -117,7 +118,7 @@ LIBDEPS=clangfrontendtool \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/tblgen/Makefile b/usr.bin/clang/llvm-tblgen/Makefile
similarity index 95%
rename from usr.bin/clang/tblgen/Makefile
rename to usr.bin/clang/llvm-tblgen/Makefile
index 75a45de8e53c..14b46db89ca9 100644
--- a/usr.bin/clang/tblgen/Makefile
+++ b/usr.bin/clang/llvm-tblgen/Makefile
@@ -2,12 +2,13 @@
 
 .include <bsd.own.mk>
 
-PROG_CXX=tblgen
+PROG_CXX=llvm-tblgen
 
 SRCDIR=	utils/TableGen
 SRCS=	AsmMatcherEmitter.cpp \
 	AsmWriterEmitter.cpp \
 	AsmWriterInst.cpp \
+	Attributes.cpp \
 	CTagsEmitter.cpp \
 	CallingConvEmitter.cpp \
 	CodeEmitterGen.cpp \
diff --git a/usr.bin/clang/tblgen/Makefile.depend b/usr.bin/clang/llvm-tblgen/Makefile.depend
similarity index 100%
rename from usr.bin/clang/tblgen/Makefile.depend
rename to usr.bin/clang/llvm-tblgen/Makefile.depend
diff --git a/usr.bin/clang/tblgen/tblgen.1 b/usr.bin/clang/llvm-tblgen/llvm-tblgen.1
similarity index 100%
rename from usr.bin/clang/tblgen/tblgen.1
rename to usr.bin/clang/llvm-tblgen/llvm-tblgen.1

From ffa548ae3e01619b2dd07d722eb8a00a711928bd Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Thu, 31 Dec 2015 17:37:35 +0000
Subject: [PATCH 09/10] Next part of updating llvm/clang build glue: getting
 the WITH_CLANG_EXTRAS tools built.

---
 ObsoleteFiles.inc                             |  1 +
 lib/clang/Makefile                            |  4 +--
 lib/clang/libllvmaarch64disassembler/Makefile |  9 +++----
 lib/clang/libllvmexecutionengine/Makefile     |  2 ++
 lib/clang/libllvminterpreter/Makefile         |  3 ++-
 lib/clang/libllvmlto/Makefile                 |  3 ++-
 lib/clang/libllvmmcjit/Makefile               |  2 ++
 lib/clang/libllvmmirparser/Makefile           | 11 ++++----
 lib/clang/libllvmorcjit/Makefile              |  3 +++
 lib/clang/libllvmpasses/Makefile              |  3 ++-
 lib/clang/libllvmsymbolize/Makefile           | 12 +++++++++
 lib/clang/libllvmx86disassembler/Makefile     |  7 ++----
 usr.bin/clang/Makefile                        |  1 -
 usr.bin/clang/bugpoint/Makefile               | 13 +++++-----
 usr.bin/clang/clang/Makefile                  |  2 --
 usr.bin/clang/llc/Makefile                    | 20 ++++++++-------
 usr.bin/clang/lldb/Makefile                   | 12 ++++-----
 usr.bin/clang/lli/Makefile                    |  9 ++++---
 usr.bin/clang/llvm-ar/Makefile                | 18 +++++++------
 usr.bin/clang/llvm-as/Makefile                |  2 ++
 usr.bin/clang/llvm-bcanalyzer/Makefile        |  2 ++
 usr.bin/clang/llvm-cxxdump/Makefile           | 16 ++++++------
 usr.bin/clang/llvm-diff/Makefile              |  2 ++
 usr.bin/clang/llvm-dis/Makefile               |  4 ++-
 usr.bin/clang/llvm-extract/Makefile           | 10 +++++---
 usr.bin/clang/llvm-link/Makefile              |  8 ++++--
 usr.bin/clang/llvm-lto/Makefile               | 25 +++++++++++--------
 usr.bin/clang/llvm-mc/Makefile                | 16 ++++++------
 usr.bin/clang/llvm-nm/Makefile                | 18 +++++++------
 usr.bin/clang/llvm-objdump/Makefile           | 16 ++++++------
 usr.bin/clang/llvm-profdata/Makefile          |  2 ++
 usr.bin/clang/llvm-rtdyld/Makefile            | 16 ++++++------
 usr.bin/clang/llvm-symbolizer/Makefile        |  6 ++---
 usr.bin/clang/macho-dump/Makefile             | 18 -------------
 usr.bin/clang/opt/Makefile                    | 22 ++++++++--------
 35 files changed, 172 insertions(+), 146 deletions(-)
 create mode 100644 lib/clang/libllvmsymbolize/Makefile
 delete mode 100644 usr.bin/clang/macho-dump/Makefile

diff --git a/ObsoleteFiles.inc b/ObsoleteFiles.inc
index 8d6a94be9580..f0923e84578c 100644
--- a/ObsoleteFiles.inc
+++ b/ObsoleteFiles.inc
@@ -39,6 +39,7 @@
 # done
 
 # yyyymmdd: new clang import which bumps version from 3.7.1 to 3.8.0.
+OLD_FILES+=usr/bin/macho-dump
 OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/allocator_interface.h
 OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/asan_interface.h
 OLD_FILES+=usr/lib/clang/3.7.1/include/sanitizer/common_interface_defs.h
diff --git a/lib/clang/Makefile b/lib/clang/Makefile
index eacda631abc6..42f190a09ebe 100644
--- a/lib/clang/Makefile
+++ b/lib/clang/Makefile
@@ -66,7 +66,6 @@ SUBDIR=	libclanganalysis \
 	libllvmarmasmprinter \
 	libllvmarmcodegen \
 	libllvmarmdesc \
-	libllvmarmdisassembler \
 	libllvmarminfo \
 	libllvmmipsasmparser \
 	libllvmmipsasmprinter \
@@ -77,7 +76,6 @@ SUBDIR=	libclanganalysis \
 	libllvmpowerpcasmprinter \
 	libllvmpowerpccodegen \
 	libllvmpowerpcdesc \
-	libllvmpowerpcdisassembler \
 	libllvmpowerpcinfo \
 	libllvmsparcasmparser \
 	libllvmsparcasmprinter \
@@ -105,9 +103,11 @@ SUBDIR+=libllvmdebuginfodwarf \
 .if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no"
 SUBDIR+=libllvmexecutionengine \
 	libllvmaarch64disassembler \
+	libllvmarmdisassembler \
 	libllvminterpreter \
 	libllvmmcjit \
 	libllvmmipsdisassembler \
+	libllvmpowerpcdisassembler \
 	libllvmruntimedyld \
 	libllvmsparcdisassembler \
 	libllvmx86disassembler
diff --git a/lib/clang/libllvmaarch64disassembler/Makefile b/lib/clang/libllvmaarch64disassembler/Makefile
index 12b5025b44e9..f8d71a940328 100644
--- a/lib/clang/libllvmaarch64disassembler/Makefile
+++ b/lib/clang/libllvmaarch64disassembler/Makefile
@@ -1,16 +1,13 @@
 # $FreeBSD$
 
-.include <src.opts.mk>
+.include <bsd.own.mk>
 
 LIB=	llvmaarch64disassembler
 
 SRCDIR=	lib/Target/AArch64/Disassembler
 INCDIR=	lib/Target/AArch64
-SRCS=	AArch64Disassembler.cpp
-
-.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no"
-SRCS+=	AArch64ExternalSymbolizer.cpp
-.endif
+SRCS=	AArch64Disassembler.cpp \
+	AArch64ExternalSymbolizer.cpp
 
 TGHDRS=	Attributes \
 	Intrinsics \
diff --git a/lib/clang/libllvmexecutionengine/Makefile b/lib/clang/libllvmexecutionengine/Makefile
index 13855a010ae7..304473b8f9b5 100644
--- a/lib/clang/libllvmexecutionengine/Makefile
+++ b/lib/clang/libllvmexecutionengine/Makefile
@@ -11,4 +11,6 @@ SRCS=	ExecutionEngine.cpp \
 	SectionMemoryManager.cpp \
 	TargetSelect.cpp
 
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvminterpreter/Makefile b/lib/clang/libllvminterpreter/Makefile
index 06fcca4fb9f9..52880e0890ae 100644
--- a/lib/clang/libllvminterpreter/Makefile
+++ b/lib/clang/libllvminterpreter/Makefile
@@ -9,6 +9,7 @@ SRCS=	Execution.cpp \
 	ExternalFunctions.cpp \
 	Interpreter.cpp \
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmlto/Makefile b/lib/clang/libllvmlto/Makefile
index 776c45f573ca..c7cb9da53fe3 100644
--- a/lib/clang/libllvmlto/Makefile
+++ b/lib/clang/libllvmlto/Makefile
@@ -8,6 +8,7 @@ SRCDIR=	lib/LTO
 SRCS=	LTOCodeGenerator.cpp \
 	LTOModule.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmmcjit/Makefile b/lib/clang/libllvmmcjit/Makefile
index 207fd81725d4..632dd5e425af 100644
--- a/lib/clang/libllvmmcjit/Makefile
+++ b/lib/clang/libllvmmcjit/Makefile
@@ -7,4 +7,6 @@ LIB=	llvmmcjit
 SRCDIR=	lib/ExecutionEngine/MCJIT
 SRCS=	MCJIT.cpp
 
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmmirparser/Makefile b/lib/clang/libllvmmirparser/Makefile
index a3a64c69ab5c..463244128a79 100644
--- a/lib/clang/libllvmmirparser/Makefile
+++ b/lib/clang/libllvmmirparser/Makefile
@@ -1,15 +1,14 @@
 # $FreeBSD$
 
-.include <src.opts.mk>
+.include <bsd.own.mk>
 
 LIB=	llvmmirparser
 
 SRCDIR=	lib/CodeGen/MIRParser
-SRCS=	MIParser.cpp \
+SRCS=	MILexer.cpp \
+	MIParser.cpp \
 	MIRParser.cpp
 
-.if ${MK_CLANG_EXTRAS} != "no"
-SRCS+=	MILexer.cpp
-.endif
-	
+TGHDRS=	Attributes
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmorcjit/Makefile b/lib/clang/libllvmorcjit/Makefile
index 27259b2914ff..985d5ab7724f 100644
--- a/lib/clang/libllvmorcjit/Makefile
+++ b/lib/clang/libllvmorcjit/Makefile
@@ -13,4 +13,7 @@ SRCS=	ExecutionUtils.cpp \
 	OrcMCJITReplacement.cpp \
 	OrcTargetSupport.cpp
 
+TGHDRS=	Attributes \
+	Intrinsics
+
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmpasses/Makefile b/lib/clang/libllvmpasses/Makefile
index ed4e20dbf2b9..eba54f6b0006 100644
--- a/lib/clang/libllvmpasses/Makefile
+++ b/lib/clang/libllvmpasses/Makefile
@@ -7,6 +7,7 @@ LIB=	llvmpasses
 SRCDIR=	lib/Passes
 SRCS=	PassBuilder.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
 
 .include "../clang.lib.mk"
diff --git a/lib/clang/libllvmsymbolize/Makefile b/lib/clang/libllvmsymbolize/Makefile
new file mode 100644
index 000000000000..88a73c16beec
--- /dev/null
+++ b/lib/clang/libllvmsymbolize/Makefile
@@ -0,0 +1,12 @@
+# $FreeBSD$
+
+.include <bsd.own.mk>
+
+LIB=	llvmsymbolize
+
+SRCDIR=	lib/DebugInfo/Symbolize
+SRCS=	DIPrinter.cpp \
+	SymbolizableObjectFile.cpp \
+	Symbolize.cpp
+
+.include "../clang.lib.mk"
diff --git a/lib/clang/libllvmx86disassembler/Makefile b/lib/clang/libllvmx86disassembler/Makefile
index 23ec976875d4..5470d9f09794 100644
--- a/lib/clang/libllvmx86disassembler/Makefile
+++ b/lib/clang/libllvmx86disassembler/Makefile
@@ -6,11 +6,8 @@ LIB=	llvmx86disassembler
 
 SRCDIR=	lib/Target/X86/Disassembler
 INCDIR=	lib/Target/X86
-SRCS=	X86Disassembler.cpp
-
-.if ${MK_CLANG_EXTRAS} != "no" || ${MK_LLDB} != "no"
-SRCS+=	X86DisassemblerDecoder.cpp
-.endif
+SRCS=	X86Disassembler.cpp \
+	X86DisassemblerDecoder.cpp
 
 TGHDRS=	X86GenDisassemblerTables \
 	X86GenInstrInfo \
diff --git a/usr.bin/clang/Makefile b/usr.bin/clang/Makefile
index 8e42cdad3548..692b6c806590 100644
--- a/usr.bin/clang/Makefile
+++ b/usr.bin/clang/Makefile
@@ -27,7 +27,6 @@ SUBDIR+=bugpoint \
 	llvm-profdata \
 	llvm-rtdyld \
 	llvm-symbolizer \
-	macho-dump \
 	opt
 .endif
 
diff --git a/usr.bin/clang/bugpoint/Makefile b/usr.bin/clang/bugpoint/Makefile
index dc520bbb6c6a..5dd0ede1c444 100644
--- a/usr.bin/clang/bugpoint/Makefile
+++ b/usr.bin/clang/bugpoint/Makefile
@@ -15,24 +15,25 @@ SRCS=	BugDriver.cpp \
 	ToolRunner.cpp \
 	bugpoint.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmcodegen \
 	llvmtarget \
 	llvmobjcarcopts \
-	llvmirreader \
 	llvmbitwriter \
-	llvmlinker \
 	llvmipo \
 	llvmvectorize \
+	llvmlinker \
+	llvmirreader \
 	llvmscalaropts \
+	llvminstcombine \
+	llvminstrumentation \
 	llvmprofiledata \
 	llvmobject \
 	llvmmcparser \
 	llvmbitreader \
-	llvminstcombine \
-	llvminstrumentation \
 	llvmtransformutils \
-	llvmipa \
 	llvmmc \
 	llvmanalysis \
 	llvmasmparser \
diff --git a/usr.bin/clang/clang/Makefile b/usr.bin/clang/clang/Makefile
index b6deacb335cc..e28ba9de5f32 100644
--- a/usr.bin/clang/clang/Makefile
+++ b/usr.bin/clang/clang/Makefile
@@ -77,7 +77,6 @@ LIBDEPS=clangfrontendtool \
 	llvmaarch64info \
 	llvmaarch64asmprinter \
 	llvmaarch64utils \
-	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
@@ -88,7 +87,6 @@ LIBDEPS=clangfrontendtool \
 	llvmmipsdesc \
 	llvmmipsinfo \
 	llvmmipsasmprinter \
-	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
diff --git a/usr.bin/clang/llc/Makefile b/usr.bin/clang/llc/Makefile
index d54a3ce3c65a..e6c0d5a75b2c 100644
--- a/usr.bin/clang/llc/Makefile
+++ b/usr.bin/clang/llc/Makefile
@@ -7,7 +7,9 @@ PROG_CXX=llc
 SRCDIR=	tools/llc
 SRCS=	llc.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmmirparser \
 	llvmirreader \
 	llvmasmparser \
@@ -16,32 +18,32 @@ LIBDEPS=llvmmirparser \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -50,11 +52,11 @@ LIBDEPS=llvmmirparser \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -62,7 +64,7 @@ LIBDEPS=llvmmirparser \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/lldb/Makefile b/usr.bin/clang/lldb/Makefile
index 1b014e6d0a0a..cf4ddf5ceef0 100644
--- a/usr.bin/clang/lldb/Makefile
+++ b/usr.bin/clang/lldb/Makefile
@@ -109,32 +109,32 @@ LIBDEPS=\
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -155,7 +155,7 @@ LIBDEPS=\
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/lli/Makefile b/usr.bin/clang/lli/Makefile
index 785209552df8..149602ee7702 100644
--- a/usr.bin/clang/lli/Makefile
+++ b/usr.bin/clang/lli/Makefile
@@ -11,6 +11,9 @@ SRCS=	OrcLazyJIT.cpp \
 	RemoteTargetExternal.cpp \
 	lli.cpp
 
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmx86disassembler \
 	llvmx86asmparser \
 	llvmirreader \
@@ -19,19 +22,19 @@ LIBDEPS=llvmx86disassembler \
 	llvmx86desc \
 	llvmx86info \
 	llvmmcdisassembler \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmx86utils \
 	llvmselectiondag \
 	llvmasmprinter \
 	llvminterpreter \
 	llvmcodegen \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
+	llvmbitwriter \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmorcjit \
 	llvmtransformutils \
-	llvmipa \
 	llvmmcjit \
 	llvmexecutionengine \
 	llvmtarget \
diff --git a/usr.bin/clang/llvm-ar/Makefile b/usr.bin/clang/llvm-ar/Makefile
index 512625761abc..8be2741df7a5 100644
--- a/usr.bin/clang/llvm-ar/Makefile
+++ b/usr.bin/clang/llvm-ar/Makefile
@@ -7,6 +7,8 @@ PROG_CXX=llvm-ar
 SRCDIR=	tools/llvm-ar
 SRCS=	llvm-ar.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmlibdriver \
 	llvmoption \
 	llvmaarch64disassembler \
@@ -14,32 +16,32 @@ LIBDEPS=llvmlibdriver \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -48,11 +50,11 @@ LIBDEPS=llvmlibdriver \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -60,7 +62,7 @@ LIBDEPS=llvmlibdriver \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-as/Makefile b/usr.bin/clang/llvm-as/Makefile
index d3d07327e80c..964f6faea604 100644
--- a/usr.bin/clang/llvm-as/Makefile
+++ b/usr.bin/clang/llvm-as/Makefile
@@ -7,6 +7,8 @@ PROG_CXX=llvm-as
 SRCDIR=	tools/llvm-as
 SRCS=	llvm-as.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmbitwriter \
 	llvmasmparser \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-bcanalyzer/Makefile b/usr.bin/clang/llvm-bcanalyzer/Makefile
index 54446dcb1cdf..f900fdc08f26 100644
--- a/usr.bin/clang/llvm-bcanalyzer/Makefile
+++ b/usr.bin/clang/llvm-bcanalyzer/Makefile
@@ -7,6 +7,8 @@ PROG_CXX=llvm-bcanalyzer
 SRCDIR=	tools/llvm-bcanalyzer
 SRCS=	llvm-bcanalyzer.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmbitreader \
 	llvmcore \
 	llvmsupport
diff --git a/usr.bin/clang/llvm-cxxdump/Makefile b/usr.bin/clang/llvm-cxxdump/Makefile
index b483930fb8e5..0c2e97889e86 100644
--- a/usr.bin/clang/llvm-cxxdump/Makefile
+++ b/usr.bin/clang/llvm-cxxdump/Makefile
@@ -14,32 +14,32 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -48,16 +48,16 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmx86utils \
 	llvmobject \
 	llvmmcparser \
diff --git a/usr.bin/clang/llvm-diff/Makefile b/usr.bin/clang/llvm-diff/Makefile
index 527daa22908f..0891d47256c1 100644
--- a/usr.bin/clang/llvm-diff/Makefile
+++ b/usr.bin/clang/llvm-diff/Makefile
@@ -10,6 +10,8 @@ SRCS=	llvm-diff.cpp \
 	DiffLog.cpp \
 	DifferenceEngine.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmirreader \
 	llvmbitreader \
 	llvmasmparser \
diff --git a/usr.bin/clang/llvm-dis/Makefile b/usr.bin/clang/llvm-dis/Makefile
index 62e5b2ec3ae8..0fb8884debd1 100644
--- a/usr.bin/clang/llvm-dis/Makefile
+++ b/usr.bin/clang/llvm-dis/Makefile
@@ -7,7 +7,9 @@ PROG_CXX=llvm-dis
 SRCDIR=	tools/llvm-dis
 SRCS=	llvm-dis.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmanalysis \
 	llvmbitreader \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-extract/Makefile b/usr.bin/clang/llvm-extract/Makefile
index 5e5f4a910c59..92d69c3f4d85 100644
--- a/usr.bin/clang/llvm-extract/Makefile
+++ b/usr.bin/clang/llvm-extract/Makefile
@@ -7,9 +7,9 @@ PROG_CXX=llvm-extract
 SRCDIR=	tools/llvm-extract
 SRCS=	llvm-extract.cpp
 
-LIBDEPS=llvmirreader \
-	llvmasmparser \
-	llvmbitwriter \
+TGHDRS=	Attributes
+
+LIBDEPS=llvmbitwriter \
 	llvmipo \
 	llvmvectorize \
 	llvmscalaropts \
@@ -17,10 +17,12 @@ LIBDEPS=llvmirreader \
 	llvmobject \
 	llvmmcparser \
 	llvmmc \
+	llvmlinker \
+	llvmirreader \
 	llvmbitreader \
+	llvmasmparser \
 	llvminstcombine \
 	llvmtransformutils \
-	llvmipa \
 	llvmanalysis \
 	llvmcore \
 	llvmsupport
diff --git a/usr.bin/clang/llvm-link/Makefile b/usr.bin/clang/llvm-link/Makefile
index d75a85476551..d556f9a8ce45 100644
--- a/usr.bin/clang/llvm-link/Makefile
+++ b/usr.bin/clang/llvm-link/Makefile
@@ -7,13 +7,17 @@ PROG_CXX=llvm-link
 SRCDIR=	tools/llvm-link
 SRCS=	llvm-link.cpp
 
-LIBDEPS=llvmirreader \
+TGHDRS=	Attributes
+
+LIBDEPS=llvmobject \
+	llvmmcparser \
+	llvmmc \
+	llvmirreader \
 	llvmasmparser \
 	llvmbitwriter \
 	llvmbitreader \
 	llvmlinker \
 	llvmtransformutils \
-	llvmipa \
 	llvmanalysis \
 	llvmcore \
 	llvmsupport
diff --git a/usr.bin/clang/llvm-lto/Makefile b/usr.bin/clang/llvm-lto/Makefile
index abda95144d22..0c1fe27410bd 100644
--- a/usr.bin/clang/llvm-lto/Makefile
+++ b/usr.bin/clang/llvm-lto/Makefile
@@ -8,38 +8,40 @@ PROG_CXX=llvm-lto
 SRCDIR=	tools/llvm-lto
 SRCS=	llvm-lto.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmaarch64disassembler \
 	llvmaarch64codegen \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -47,24 +49,25 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmasmprinter \
 	llvmx86desc \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmx86utils \
 	llvmmcdisassembler \
 	llvmlto \
 	llvmobjcarcopts \
-	llvmlinker \
 	llvmipo \
 	llvmvectorize \
+	llvmlinker \
+	llvmirreader \
+	llvmasmparser \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
+	llvminstcombine \
+	llvminstrumentation \
 	llvmprofiledata \
 	llvmobject \
 	llvmmcparser \
-	llvminstcombine \
-	llvminstrumentation \
 	llvmtransformutils \
-	llvmipa \
 	llvmmc \
 	llvmbitwriter \
 	llvmbitreader \
diff --git a/usr.bin/clang/llvm-mc/Makefile b/usr.bin/clang/llvm-mc/Makefile
index 0fe37c1228e3..e6a29ad0a071 100644
--- a/usr.bin/clang/llvm-mc/Makefile
+++ b/usr.bin/clang/llvm-mc/Makefile
@@ -14,32 +14,32 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -48,11 +48,11 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -60,7 +60,7 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-nm/Makefile b/usr.bin/clang/llvm-nm/Makefile
index 38dd8714e5c8..23d3e6ab36f7 100644
--- a/usr.bin/clang/llvm-nm/Makefile
+++ b/usr.bin/clang/llvm-nm/Makefile
@@ -7,37 +7,39 @@ PROG_CXX=llvm-nm
 SRCDIR=	tools/llvm-nm
 SRCS=	llvm-nm.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmaarch64disassembler \
 	llvmaarch64codegen \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -46,11 +48,11 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -58,7 +60,7 @@ LIBDEPS=llvmaarch64disassembler \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-objdump/Makefile b/usr.bin/clang/llvm-objdump/Makefile
index 49123a5f8200..ebcc2372931c 100644
--- a/usr.bin/clang/llvm-objdump/Makefile
+++ b/usr.bin/clang/llvm-objdump/Makefile
@@ -17,32 +17,32 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -51,11 +51,11 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -63,7 +63,7 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-profdata/Makefile b/usr.bin/clang/llvm-profdata/Makefile
index 50785210205e..7cd4ab6f7922 100644
--- a/usr.bin/clang/llvm-profdata/Makefile
+++ b/usr.bin/clang/llvm-profdata/Makefile
@@ -7,6 +7,8 @@ PROG_CXX=llvm-profdata
 SRCDIR=	tools/llvm-profdata
 SRCS=	llvm-profdata.cpp
 
+TGHDRS=	Attributes
+
 LIBDEPS=llvmprofiledata \
 	llvmobject \
 	llvmmcparser \
diff --git a/usr.bin/clang/llvm-rtdyld/Makefile b/usr.bin/clang/llvm-rtdyld/Makefile
index ee386b4286b6..453fe31f2137 100644
--- a/usr.bin/clang/llvm-rtdyld/Makefile
+++ b/usr.bin/clang/llvm-rtdyld/Makefile
@@ -17,32 +17,32 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -51,11 +51,11 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmcodegen \
 	llvmtarget \
 	llvmscalaropts \
-	llvmprofiledata \
 	llvminstcombine \
 	llvminstrumentation \
+	llvmprofiledata \
 	llvmtransformutils \
-	llvmipa \
+	llvmbitwriter \
 	llvmanalysis \
 	llvmx86desc \
 	llvmobject \
@@ -63,7 +63,7 @@ LIBDEPS=llvmdebuginfodwarf \
 	llvmbitreader \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmmc \
 	llvmx86utils \
 	llvmcore \
diff --git a/usr.bin/clang/llvm-symbolizer/Makefile b/usr.bin/clang/llvm-symbolizer/Makefile
index c6281fdb1fc0..367a13995c3a 100644
--- a/usr.bin/clang/llvm-symbolizer/Makefile
+++ b/usr.bin/clang/llvm-symbolizer/Makefile
@@ -5,10 +5,10 @@
 PROG_CXX=llvm-symbolizer
 
 SRCDIR=	tools/llvm-symbolizer
-SRCS=	llvm-symbolizer.cpp \
-	LLVMSymbolize.cpp
+SRCS=	llvm-symbolizer.cpp
 
-LIBDEPS=llvmdebuginfopdb \
+LIBDEPS=llvmsymbolize \
+	llvmdebuginfopdb \
 	llvmdebuginfodwarf \
 	llvmobject \
 	llvmmcparser \
diff --git a/usr.bin/clang/macho-dump/Makefile b/usr.bin/clang/macho-dump/Makefile
deleted file mode 100644
index d74326cac05d..000000000000
--- a/usr.bin/clang/macho-dump/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-# $FreeBSD$
-
-MAN=
-.include <bsd.own.mk>
-
-PROG_CXX=macho-dump
-
-SRCDIR=	tools/macho-dump
-SRCS=	macho-dump.cpp
-
-LIBDEPS=llvmobject \
-	llvmmcparser \
-	llvmmc \
-	llvmbitreader \
-	llvmcore \
-	llvmsupport
-
-.include "../clang.prog.mk"
diff --git a/usr.bin/clang/opt/Makefile b/usr.bin/clang/opt/Makefile
index e85575983734..cf0c3cfb7f70 100644
--- a/usr.bin/clang/opt/Makefile
+++ b/usr.bin/clang/opt/Makefile
@@ -13,39 +13,41 @@ SRCS=	AnalysisWrappers.cpp \
 	PrintSCC.cpp \
 	opt.cpp
 
-TGHDRS=	Intrinsics
+TGHDRS=	Attributes \
+	Intrinsics
+
 LIBDEPS=llvmpasses \
 	llvmaarch64disassembler \
 	llvmaarch64codegen \
 	llvmaarch64asmparser \
 	llvmaarch64desc \
 	llvmaarch64info \
-	llvmaarch64instprinter \
+	llvmaarch64asmprinter \
 	llvmaarch64utils \
 	llvmarmdisassembler \
 	llvmarmcodegen \
 	llvmarmasmparser \
 	llvmarmdesc \
 	llvmarminfo \
-	llvmarminstprinter \
+	llvmarmasmprinter \
 	llvmmipsdisassembler \
 	llvmmipscodegen \
 	llvmmipsasmparser \
 	llvmmipsdesc \
 	llvmmipsinfo \
-	llvmmipsinstprinter \
+	llvmmipsasmprinter \
 	llvmpowerpcdisassembler \
 	llvmpowerpccodegen \
 	llvmpowerpcasmparser \
 	llvmpowerpcdesc \
 	llvmpowerpcinfo \
-	llvmpowerpcinstprinter \
+	llvmpowerpcasmprinter \
 	llvmsparcdisassembler \
 	llvmsparccodegen \
 	llvmsparcasmparser \
 	llvmsparcdesc \
 	llvmsparcinfo \
-	llvmsparcinstprinter \
+	llvmsparcasmprinter \
 	llvmx86disassembler \
 	llvmx86asmparser \
 	llvmx86codegen \
@@ -56,19 +58,19 @@ LIBDEPS=llvmpasses \
 	llvmx86desc \
 	llvmmcdisassembler \
 	llvmx86info \
-	llvmx86instprinter \
+	llvmx86asmprinter \
 	llvmx86utils \
 	llvmipo \
 	llvmvectorize \
+	llvmlinker \
 	llvmobjcarcopts \
 	llvmscalaropts \
+	llvminstcombine \
+	llvminstrumentation \
 	llvmprofiledata \
 	llvmobject \
 	llvmmcparser \
-	llvminstcombine \
-	llvminstrumentation \
 	llvmtransformutils \
-	llvmipa \
 	llvmmc \
 	llvmanalysis \
 	llvmirreader \

From 79f08b9adf95d3bcfb16ce7df4686026b632eaed Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dim@FreeBSD.org>
Date: Thu, 31 Dec 2015 21:01:06 +0000
Subject: [PATCH 10/10] For determining the compiler version, quote the string
 to be echo'd, otherwise the command might fail.  This is because clang -v now
 results in the following:

FreeBSD clang version 3.8.0 (trunk 256633) (based on LLVM 3.8.0svn)

The second "3.8.8svn)" string tripped up the shell command.
---
 share/mk/bsd.compiler.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/share/mk/bsd.compiler.mk b/share/mk/bsd.compiler.mk
index d7763e4e06b4..dee6a1d8a288 100644
--- a/share/mk/bsd.compiler.mk
+++ b/share/mk/bsd.compiler.mk
@@ -138,7 +138,7 @@ COMPILER_TYPE:=	clang
 . endif
 .endif
 .if !defined(COMPILER_VERSION)
-COMPILER_VERSION!=echo ${_v:M[1-9].[0-9]*} | awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3;}'
+COMPILER_VERSION!=echo "${_v:M[1-9].[0-9]*}" | awk -F. '{print $$1 * 10000 + $$2 * 100 + $$3;}'
 .endif
 .undef _v
 .endif